From bde5c9925315ab45494e2839e482429fb626bf58 Mon Sep 17 00:00:00 2001
From: Nathan Miller <nathan_miller23@berkeley.edu>
Date: Wed, 16 Sep 2020 02:15:34 -0500
Subject: [PATCH 01/38] updated submodules

---
 .gitignore    | 7 +++++++
 overcooked_ai | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index fbf22604..67adabf6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -106,6 +106,13 @@ venv.bak/
 # mypy
 .mypy_cache/
 
+# VSCode
+**/.vscode/
+
+# CHAI specific
+**/data_dir.py
+**/slack.json
+
 # Other
 .DS_Store
 *.key
diff --git a/overcooked_ai b/overcooked_ai
index 6eaceb0a..321b390f 160000
--- a/overcooked_ai
+++ b/overcooked_ai
@@ -1 +1 @@
-Subproject commit 6eaceb0a9a2501f1b9fccbf4c7016d6662ed1108
+Subproject commit 321b390f0f2aa8310c87029bd22f3b26cbc351c8

From a1c70b2f44bbbdddc79b14678ceaf0367ee099f6 Mon Sep 17 00:00:00 2001
From: micah <mdc@berkeley.edu>
Date: Wed, 4 Nov 2020 11:13:18 -0800
Subject: [PATCH 02/38] Fixed overcooked commit pointer based on issue #14

---
 overcooked_ai | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/overcooked_ai b/overcooked_ai
index 321b390f..6eaceb0a 160000
--- a/overcooked_ai
+++ b/overcooked_ai
@@ -1 +1 @@
-Subproject commit 321b390f0f2aa8310c87029bd22f3b26cbc351c8
+Subproject commit 6eaceb0a9a2501f1b9fccbf4c7016d6662ed1108

From 56073fbe2fa7beb04ace087d592d58cb91fd086a Mon Sep 17 00:00:00 2001
From: micah <mdc@berkeley.edu>
Date: Tue, 1 Feb 2022 01:17:18 +0100
Subject: [PATCH 03/38] Fixed test issue

---
 human_aware_rl/rllib/rllib.py | 3 +--
 overcooked_ai                 | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/human_aware_rl/rllib/rllib.py b/human_aware_rl/rllib/rllib.py
index e93a07b0..8e1bfdea 100644
--- a/human_aware_rl/rllib/rllib.py
+++ b/human_aware_rl/rllib/rllib.py
@@ -193,8 +193,7 @@ def _get_featurize_fn(self, agent_id):
     def _get_obs(self, state):
         ob_p0 = self._get_featurize_fn(self.curr_agents[0])(state)[0]
         ob_p1 = self._get_featurize_fn(self.curr_agents[1])(state)[1]
-
-        return ob_p0, ob_p1
+        return ob_p0.astype(np.float32), ob_p1.astype(np.float32)
 
     def _populate_agents(self):
         # Always include at least one ppo agent (i.e. bc_sp not supported for simplicity)
diff --git a/overcooked_ai b/overcooked_ai
index cc8aebbe..7e774a1a 160000
--- a/overcooked_ai
+++ b/overcooked_ai
@@ -1 +1 @@
-Subproject commit cc8aebbe5bb2b43262c4a104a74a7d7a48517f50
+Subproject commit 7e774a1aa29c28b7b69dc0a8903822ac2c6b4f23

From 6dce6d5ac83a6856e08d0fa7205e316650e27e04 Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein <alex@digitalmaker.io>
Date: Wed, 1 Jun 2022 17:19:50 +0200
Subject: [PATCH 04/38] package versions corrections

---
 install.sh                                  |   6 +
 setup_corrections/setup_baselines.py        |  63 ++++++++
 setup_corrections/setup_main.py             |  21 +++
 setup_corrections/setup_overcooked.py       |  16 +++
 setup_corrections/setup_stable_baselines.py | 150 ++++++++++++++++++++
 5 files changed, 256 insertions(+)
 create mode 100644 setup_corrections/setup_baselines.py
 create mode 100644 setup_corrections/setup_main.py
 create mode 100644 setup_corrections/setup_overcooked.py
 create mode 100644 setup_corrections/setup_stable_baselines.py

diff --git a/install.sh b/install.sh
index 6aacfcda..ee695242 100755
--- a/install.sh
+++ b/install.sh
@@ -1,4 +1,10 @@
 #!/bin/sh
+
+cp setup_corrections/setup_baselines.py baselines/setup.py
+cp setup_corrections/setup_stable_baselines.py stable_baselines/setup.py
+cp setup_corrections/setup_main.py setup.py
+cp setup_corrections/setup_overcooked.py overcooked_ai/setup.py
+
 cd baselines
 python setup.py develop
 cd ..
diff --git a/setup_corrections/setup_baselines.py b/setup_corrections/setup_baselines.py
new file mode 100644
index 00000000..ee3ebdf0
--- /dev/null
+++ b/setup_corrections/setup_baselines.py
@@ -0,0 +1,63 @@
+import re
+from setuptools import setup, find_packages
+import sys
+
+if sys.version_info.major != 3:
+    print('This Python is only compatible with Python 3, but you are running '
+          'Python {}. The installation will likely fail.'.format(sys.version_info.major))
+
+
+extras = {
+    'test': [
+        'filelock',
+        'pytest',
+        'pytest-forked',
+        'atari-py'
+    ],
+    'bullet': [
+        'pybullet',
+    ],
+    'mpi': [
+        'mpi4py'
+    ]
+}
+
+all_deps = []
+for group_name in extras:
+    all_deps += extras[group_name]
+
+extras['all'] = all_deps
+
+setup(name='baselines',
+      packages=[package for package in find_packages()
+                if package.startswith('baselines')],
+      install_requires=[
+          'gym==0.17.2',
+          'scipy==1.5.0',
+          'tqdm',
+          'joblib',
+          'dill',
+          'progressbar2',
+          'cloudpickle',
+          'click',
+          'opencv-python'
+      ],
+      extras_require=extras,
+      description='OpenAI baselines: high quality implementations of reinforcement learning algorithms',
+      author='OpenAI',
+      url='https://github.com/openai/baselines',
+      author_email='gym@openai.com',
+      version='0.1.5')
+
+
+# ensure there is some tensorflow build with version above 1.4
+import pkg_resources
+tf_pkg = None
+for tf_pkg_name in ['tensorflow', 'tensorflow-gpu', 'tf-nightly', 'tf-nightly-gpu']:
+    try:
+        tf_pkg = pkg_resources.get_distribution(tf_pkg_name)
+    except pkg_resources.DistributionNotFound:
+        pass
+assert tf_pkg is not None, 'TensorFlow needed, of version above 1.4'
+from distutils.version import LooseVersion
+assert LooseVersion(re.sub(r'-?rc\d+$', '', tf_pkg.version)) >= LooseVersion('1.4.0')
diff --git a/setup_corrections/setup_main.py b/setup_corrections/setup_main.py
new file mode 100644
index 00000000..e4709084
--- /dev/null
+++ b/setup_corrections/setup_main.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+
+from setuptools import setup, find_packages
+
+setup(name='human_aware_rl',
+      version='0.0.1',
+      description='This package has shared components.',
+      author='Micah Carroll',
+      author_email='micah.d.carroll@berkeley.edu',
+      packages=find_packages(),
+      install_requires=[
+        'GitPython',
+        'memory_profiler',
+        'sacred==0.7.4',
+        'pymongo',
+        'numpy==1.15.1',
+        'matplotlib==3.0.3',
+        'seaborn==0.9.0',
+        'pygame==1.9.5'
+      ],
+    )
\ No newline at end of file
diff --git a/setup_corrections/setup_overcooked.py b/setup_corrections/setup_overcooked.py
new file mode 100644
index 00000000..0eed6848
--- /dev/null
+++ b/setup_corrections/setup_overcooked.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+
+from setuptools import setup, find_packages
+
+setup(name='overcooked_ai',
+      version='0.0.1',
+      description='Cooperative multi-agent environment based on Overcooked',
+      author='Micah Carroll',
+      author_email='micah.d.carroll@berkeley.edu',
+      packages=find_packages(),
+      install_requires=[
+        'numpy==1.18.5',
+        'tqdm',
+        'gym==0.17.2'
+      ]
+    )
\ No newline at end of file
diff --git a/setup_corrections/setup_stable_baselines.py b/setup_corrections/setup_stable_baselines.py
new file mode 100644
index 00000000..eaca23bd
--- /dev/null
+++ b/setup_corrections/setup_stable_baselines.py
@@ -0,0 +1,150 @@
+import sys
+import subprocess
+from setuptools import setup, find_packages
+from distutils.version import LooseVersion
+
+if sys.version_info.major != 3:
+    print('This Python is only compatible with Python 3, but you are running '
+          'Python {}. The installation will likely fail.'.format(sys.version_info.major))
+
+# Check tensorflow installation to avoid
+# breaking pre-installed tf gpu
+install_tf, tf_gpu = False, False
+try:
+    import tensorflow as tf
+    if tf.__version__ < LooseVersion('1.5.0'):
+        install_tf = True
+        # check if a gpu version is needed
+        tf_gpu = tf.test.is_gpu_available()
+except ImportError:
+    install_tf = True
+    # Check if a nvidia gpu is present
+    for command in ['nvidia-smi', '/usr/bin/nvidia-smi', 'nvidia-smi.exe']:
+        try:
+            if subprocess.call([command]) == 0:
+                tf_gpu = True
+                break
+        except IOError:  # command does not exist / is not executable
+            pass
+
+tf_dependency = []
+if install_tf:
+    tf_dependency = ['tensorflow-gpu>=1.5.0'] if tf_gpu else ['tensorflow>=1.5.0']
+    if tf_gpu:
+        print("A GPU was detected, tensorflow-gpu will be installed")
+
+
+long_description = """
+[![Build Status](https://travis-ci.com/hill-a/stable-baselines.svg?branch=master)](https://travis-ci.com/hill-a/stable-baselines) [![Documentation Status](https://readthedocs.org/projects/stable-baselines/badge/?version=master)](https://stable-baselines.readthedocs.io/en/master/?badge=master) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/3bcb4cd6d76a4270acb16b5fe6dd9efa)](https://www.codacy.com/app/baselines_janitors/stable-baselines?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=hill-a/stable-baselines&amp;utm_campaign=Badge_Grade) [![Codacy Badge](https://api.codacy.com/project/badge/Coverage/3bcb4cd6d76a4270acb16b5fe6dd9efa)](https://www.codacy.com/app/baselines_janitors/stable-baselines?utm_source=github.com&utm_medium=referral&utm_content=hill-a/stable-baselines&utm_campaign=Badge_Coverage)
+
+# Stable Baselines
+
+Stable Baselines is a set of improved implementations of reinforcement learning algorithms based on OpenAI [Baselines](https://github.com/openai/baselines/).
+
+These algorithms will make it easier for the research community and industry to replicate, refine, and identify new ideas, and will create good baselines to build projects on top of. We expect these tools will be used as a base around which new ideas can be added, and as a tool for comparing a new approach against existing ones. We also hope that the simplicity of these tools will allow beginners to experiment with a more advanced toolset, without being buried in implementation details.
+
+## Main differences with OpenAI Baselines
+This toolset is a fork of OpenAI Baselines, with a major structural refactoring, and code cleanups:
+
+-   Unified structure for all algorithms
+-   PEP8 compliant (unified code style)
+-   Documented functions and classes
+-   More tests & more code coverage
+
+## Links
+
+Repository:
+https://github.com/hill-a/stable-baselines
+
+Medium article:
+https://medium.com/@araffin/df87c4b2fc82
+
+Documentation:
+https://stable-baselines.readthedocs.io/en/master/
+
+RL Baselines Zoo:
+https://github.com/araffin/rl-baselines-zoo
+
+## Quick example
+
+Most of the library tries to follow a sklearn-like syntax for the Reinforcement Learning algorithms using Gym.
+
+Here is a quick example of how to train and run PPO2 on a cartpole environment:
+
+```python
+import gym
+
+from stable_baselines.common.policies import MlpPolicy
+from stable_baselines.common.vec_env import DummyVecEnv
+from stable_baselines import PPO2
+
+env = gym.make('CartPole-v1')
+env = DummyVecEnv([lambda: env])  # The algorithms require a vectorized environment to run
+
+model = PPO2(MlpPolicy, env, verbose=1)
+model.learn(total_timesteps=10000)
+
+obs = env.reset()
+for i in range(1000):
+    action, _states = model.predict(obs)
+    obs, rewards, dones, info = env.step(action)
+    env.render()
+```
+
+Or just train a model with a one liner if [the environment is registered in Gym](https://github.com/openai/gym/wiki/Environments) and if [the policy is registered](https://stable-baselines.readthedocs.io/en/master/guide/custom_policy.html):
+
+```python
+from stable_baselines import PPO2
+
+model = PPO2('MlpPolicy', 'CartPole-v1').learn(10000)
+```
+
+"""
+
+setup(name='stable_baselines',
+      packages=[package for package in find_packages()
+                if package.startswith('stable_baselines')],
+      install_requires=[
+          'gym[atari,classic_control]>=0.10.9',
+          'scipy==1.5.0',
+          'tqdm',
+          'joblib',
+          'zmq',
+          'dill',
+          'mpi4py',
+          'cloudpickle>=0.5.5',
+          'click',
+          'opencv-python',
+          'numpy==1.18.5',
+          'pandas==1.0.5',
+          'matplotlib',
+          'seaborn',
+          'glob2'
+      ] + tf_dependency,
+      extras_require={
+        'tests': [
+            'pytest==3.5.1',
+            'pytest-cov'
+        ],
+        'docs': [
+            'sphinx',
+            'sphinx-autobuild',
+            'sphinx-rtd-theme'
+        ]
+      },
+      description='A fork of OpenAI Baselines, implementations of reinforcement learning algorithms.',
+      author='Ashley Hill',
+      url='https://github.com/hill-a/stable-baselines',
+      author_email='ashley.hill@u-psud.fr',
+      keywords="reinforcement-learning-algorithms reinforcement-learning machine-learning "
+               "gym openai baselines toolbox python data-science",
+      license="MIT",
+      long_description=long_description,
+      long_description_content_type='text/markdown',
+      version="2.5.1a0",
+      )
+
+# python setup.py sdist
+# python setup.py bdist_wheel
+# twine upload --repository-url https://test.pypi.org/legacy/ dist/*
+# twine upload dist/*

From 75f17b137a2b0b7e12ffc0191560a1195991e1a3 Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein <alex@digitalmaker.io>
Date: Wed, 1 Jun 2022 17:20:36 +0200
Subject: [PATCH 05/38] package versions corrections

---
 install.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/install.sh b/install.sh
index ee695242..cc7ddfc2 100755
--- a/install.sh
+++ b/install.sh
@@ -1,7 +1,7 @@
 #!/bin/sh
 
 cp setup_corrections/setup_baselines.py baselines/setup.py
-cp setup_corrections/setup_stable_baselines.py stable_baselines/setup.py
+cp setup_corrections/setup_stable_baselines.py stable-baselines/setup.py
 cp setup_corrections/setup_main.py setup.py
 cp setup_corrections/setup_overcooked.py overcooked_ai/setup.py
 
@@ -21,4 +21,4 @@ cd tfjs-converter
 yarn
 cd ..
 
-python setup.py develop
\ No newline at end of file
+python setup.py develop

From 43671214bcce7edeef5fd5ae29fd758b3a5799cd Mon Sep 17 00:00:00 2001
From: micah <mdc@berkeley.edu>
Date: Fri, 10 Jun 2022 16:08:12 -0700
Subject: [PATCH 06/38] Updated overcooked pointer

---
 overcooked_ai | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/overcooked_ai b/overcooked_ai
index 7e774a1a..57696371 160000
--- a/overcooked_ai
+++ b/overcooked_ai
@@ -1 +1 @@
-Subproject commit 7e774a1aa29c28b7b69dc0a8903822ac2c6b4f23
+Subproject commit 57696371ecccc6c25f1b8dc86b1cd1d71f3bda2e

From a413fb5daffdc7d4799a0fa29abb1c9c643e489d Mon Sep 17 00:00:00 2001
From: micah <mdc@berkeley.edu>
Date: Fri, 10 Jun 2022 17:29:25 -0700
Subject: [PATCH 07/38] Fixed testing issue

---
 overcooked_ai | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/overcooked_ai b/overcooked_ai
index 57696371..a778db7d 160000
--- a/overcooked_ai
+++ b/overcooked_ai
@@ -1 +1 @@
-Subproject commit 57696371ecccc6c25f1b8dc86b1cd1d71f3bda2e
+Subproject commit a778db7dfcfe2225e84209f8fa382450baf45ed9

From 343904f032f3d28374f16afa9499084dcc1c7348 Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein
 <49325191+alexlichtenstein@users.noreply.github.com>
Date: Thu, 4 Aug 2022 21:23:32 +0200
Subject: [PATCH 08/38] changes to the install script and readme

---
 human_aware_rl/ppo/ppo_rllib_client.py           |  4 ++++
 .../ppo/ppo_rllib_from_params_client.py          | 16 +++++++++++++---
 human_aware_rl/rllib/rllib.py                    |  7 ++++---
 install.sh                                       | 12 ++++++++++++
 4 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/human_aware_rl/ppo/ppo_rllib_client.py b/human_aware_rl/ppo/ppo_rllib_client.py
index 2972391c..593e9540 100644
--- a/human_aware_rl/ppo/ppo_rllib_client.py
+++ b/human_aware_rl/ppo/ppo_rllib_client.py
@@ -57,6 +57,9 @@ def _env_creator(env_config):
 
 @ex.config
 def my_config():
+    ### Resume chekpoint_path ###
+    resume_checkpoint_path = None
+
     ### Model params ###
 
     # Whether dense reward should come from potential function or not
@@ -317,6 +320,7 @@ def my_config():
         "seeds" : seeds,
         "results_dir" : results_dir,
         "ray_params" : ray_params,
+        "resume_checkpoint_path": resume_checkpoint_path,
         "verbose" : verbose
     }
 
diff --git a/human_aware_rl/ppo/ppo_rllib_from_params_client.py b/human_aware_rl/ppo/ppo_rllib_from_params_client.py
index 0b8552a2..4da4c673 100644
--- a/human_aware_rl/ppo/ppo_rllib_from_params_client.py
+++ b/human_aware_rl/ppo/ppo_rllib_from_params_client.py
@@ -32,7 +32,7 @@
 from ray.rllib.models import ModelCatalog
 from ray.rllib.agents.ppo.ppo import PPOTrainer
 from human_aware_rl.ppo.ppo_rllib import RllibPPOModel, RllibLSTMPPOModel
-from human_aware_rl.rllib.rllib import OvercookedMultiAgent, save_trainer, gen_trainer_from_params
+from human_aware_rl.rllib.rllib import OvercookedMultiAgent, save_trainer, gen_trainer_from_params, load_trainer
 from human_aware_rl.imitation.behavior_cloning_tf2 import BehaviorCloningPolicy, BC_SAVE_DIR
 
 
@@ -83,6 +83,9 @@ def naive_params_schedule_fn(outside_information):
 
 @ex_fp.config
 def my_config():
+    ### Resume chekpoint_path ###
+    resume_checkpoint_path = None
+
     ### Model params ###
 
     # whether to use recurrence in ppo model
@@ -368,6 +371,7 @@ def my_config():
         "temp_dir" : temp_dir,
         "results_dir" : results_dir,
         "ray_params" : ray_params,
+        "resume_checkpoint_path": resume_checkpoint_path,
         "verbose" : verbose
     }
 
@@ -377,8 +381,14 @@ def _env_creater(env_config):
 
 
 def run(params):
-    # Retrieve the tune.Trainable object that is used for the experiment
-    trainer = gen_trainer_from_params(params)
+
+    # Check if any resume checkpoint given
+    saved_path = params["resume_checkpoint_path"]
+    if saved_path:
+        trainer = load_trainer(save_path=saved_path, true_num_workers=True)
+    else:
+        # Retrieve the tune.Trainable object that is used for the experiment
+        trainer = gen_trainer_from_params(params)
 
     # Object to store training results in
     result = {}
diff --git a/human_aware_rl/rllib/rllib.py b/human_aware_rl/rllib/rllib.py
index 8e1bfdea..8a0d2a23 100644
--- a/human_aware_rl/rllib/rllib.py
+++ b/human_aware_rl/rllib/rllib.py
@@ -622,7 +622,7 @@ def save_trainer(trainer, params, path=None):
         dill.dump(config, f)
     return save_path
 
-def load_trainer(save_path):
+def load_trainer(save_path, true_num_workers=False):
     """
     Returns a ray compatible trainer object that was previously saved at `save_path` by a call to `save_trainer`
     Note that `save_path` is the full path to the checkpoint FILE, not the checkpoint directory
@@ -633,8 +633,9 @@ def load_trainer(save_path):
         # We use dill (instead of pickle) here because we must deserialize functions
         config = dill.load(f)
     
-    # Override this param to lower overhead in trainer creation
-    config['training_params']['num_workers'] = 0
+    if not true_num_workers:
+        # Override this param to lower overhead in trainer creation
+        config['training_params']['num_workers'] = 0
 
     # Get un-trained trainer object with proper config
     trainer = gen_trainer_from_params(config)
diff --git a/install.sh b/install.sh
index d5c276b4..2cd021bc 100755
--- a/install.sh
+++ b/install.sh
@@ -1,4 +1,16 @@
 #!/bin/sh
+
+# Install git-lfs for OSX
+if [[ "$OSTYPE" =~ ^darwin ]]; then
+  if command -v brew
+  then
+    brew install git-lfs
+    git lfs install
+  else
+    echo "Please install brew and run the install script again"
+  fi
+fi
+
 cd overcooked_ai
 pip install -e .
 cd ..

From b2336ff0ca3b25e725efd79643fc09b00b400e4d Mon Sep 17 00:00:00 2001
From: Micah Carroll <mdc@berkeley.edu>
Date: Thu, 4 Aug 2022 22:56:23 +0100
Subject: [PATCH 09/38] Added PR template

---
 .github/pull_request_template.md | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 .github/pull_request_template.md

diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
new file mode 100644
index 00000000..5c34820e
--- /dev/null
+++ b/.github/pull_request_template.md
@@ -0,0 +1,19 @@
+# Description
+
+Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change.
+
+Fixes # (issue)
+
+## Type of change
+
+- [ ] Bug fix (non-breaking change which fixes an issue)
+- [ ] New feature (non-breaking change which adds functionality)
+- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
+- [ ] This change requires a documentation update
+
+# Checklist:
+
+- [ ] I have commented my code, particularly in hard-to-understand areas
+- [ ] I have made corresponding changes to the documentation
+- [ ] My changes generate no new warnings
+- [ ] I have added tests that prove my fix is effective or that my feature works

From a70392909bc8e59ec4ce58278a821c471990a5c0 Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein
 <49325191+alexlichtenstein@users.noreply.github.com>
Date: Mon, 8 Aug 2022 19:09:55 +0200
Subject: [PATCH 10/38] fix to install file git lfs/brew

---
 install.sh | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/install.sh b/install.sh
index 2cd021bc..d8fed619 100755
--- a/install.sh
+++ b/install.sh
@@ -2,12 +2,15 @@
 
 # Install git-lfs for OSX
 if [[ "$OSTYPE" =~ ^darwin ]]; then
-  if command -v brew
-  then
-    brew install git-lfs
-    git lfs install
+  if hash git lfs 2>/dev/null; then
+        git lfs install
   else
-    echo "Please install brew and run the install script again"
+    if command -v brew; then
+        brew install git-lfs
+        git lfs install
+    else
+        echo "Please install brew and run the install script again"
+    fi
   fi
 fi
 

From 67f5dd6863e449f2a57703d0d9470c2a5404b7d1 Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein
 <49325191+alexlichtenstein@users.noreply.github.com>
Date: Wed, 10 Aug 2022 19:59:47 +0200
Subject: [PATCH 11/38] add additional documentation to the load_trainer method

---
 human_aware_rl/rllib/rllib.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/human_aware_rl/rllib/rllib.py b/human_aware_rl/rllib/rllib.py
index 8a0d2a23..222e820a 100644
--- a/human_aware_rl/rllib/rllib.py
+++ b/human_aware_rl/rllib/rllib.py
@@ -626,6 +626,9 @@ def load_trainer(save_path, true_num_workers=False):
     """
     Returns a ray compatible trainer object that was previously saved at `save_path` by a call to `save_trainer`
     Note that `save_path` is the full path to the checkpoint FILE, not the checkpoint directory
+    Additionally we decide if we want to use the same number of remote workers (see ray library Training APIs)
+    as we store in the previous configuration, by default = False, we use only the local worker
+    (see ray library API)
     """
     # Read in params used to create trainer
     config_path = os.path.join(os.path.dirname(save_path), "config.pkl")

From a4a5cd19f013e366df80cb0bdcae7658663d8a42 Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein
 <49325191+alexlichtenstein@users.noreply.github.com>
Date: Wed, 24 Aug 2022 01:02:46 +0200
Subject: [PATCH 12/38] adding trained model and test on resume functionality

---
 human_aware_rl/ppo/ppo_rllib_test.py          |  61 ++-
 .../cramped_room/checkpoint-500               | Bin 0 -> 185376 bytes
 .../cramped_room/checkpoint-500.tune_metadata | Bin 0 -> 214 bytes
 .../trained_example/cramped_room/config.pkl   | Bin 0 -> 2544 bytes
 .../trained_example/cramped_room/progress.csv |   3 +
 .../trained_example/cramped_room/result.json  | 500 ++++++++++++++++++
 6 files changed, 549 insertions(+), 15 deletions(-)
 create mode 100644 human_aware_rl/ppo/trained_example/cramped_room/checkpoint-500
 create mode 100644 human_aware_rl/ppo/trained_example/cramped_room/checkpoint-500.tune_metadata
 create mode 100644 human_aware_rl/ppo/trained_example/cramped_room/config.pkl
 create mode 100644 human_aware_rl/ppo/trained_example/cramped_room/progress.csv
 create mode 100644 human_aware_rl/ppo/trained_example/cramped_room/result.json

diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py
index b86fc597..bf46d7dd 100644
--- a/human_aware_rl/ppo/ppo_rllib_test.py
+++ b/human_aware_rl/ppo/ppo_rllib_test.py
@@ -10,6 +10,7 @@
 from overcooked_ai_py.agents.benchmarking import AgentEvaluator
 import tensorflow as tf
 import numpy as np
+import json
 
 # Note: using the same seed across architectures can still result in differing values
 def set_global_seed(seed):
@@ -35,7 +36,7 @@ def __init__(self, test_name, compute_pickle, strict, min_performance):
         self.compute_pickle = compute_pickle
         self.strict = strict
         self.min_performance = min_performance
-    
+
     def setUp(self):
         set_global_seed(0)
 
@@ -57,13 +58,13 @@ def setUp(self):
 
     def tearDown(self):
         # Write results of this test to disk for future reproducibility tests
-        # Note: This causes unit tests to have a side effect (generally frowned upon) and only works because 
+        # Note: This causes unit tests to have a side effect (generally frowned upon) and only works because
         #   unittest is single threaded. If tests were run concurrently this could result in a race condition!
         if self.compute_pickle:
             with open(PPO_EXPECTED_DATA_PATH, 'wb') as f:
                 pickle.dump(self.expected, f)
-        
-        # Cleanup 
+
+        # Cleanup
         shutil.rmtree(self.temp_results_dir)
         shutil.rmtree(self.temp_model_dir)
         ray.shutdown()
@@ -144,7 +145,7 @@ def test_ppo_sp_no_phi(self):
 
         if self.compute_pickle:
             self.expected['test_ppo_sp_no_phi'] = results
-        
+
         # Reproducibility test
         if self.strict:
             self.assertDictEqual(results, self.expected['test_ppo_sp_no_phi'])
@@ -249,35 +250,63 @@ def test_ppo_fp_sp_yes_phi(self):
     def test_ppo_bc(self):
         # Train bc model
         model_dir = self.temp_model_dir
-        params_to_override = { 
+        params_to_override = {
             "layouts" : ['inverse_marshmallow_experiment'],
             "data_path" : None,
             "epochs" : 10
         }
         bc_params = get_bc_params(**params_to_override)
         train_bc_model(model_dir, bc_params)
-    
+
         # Train rllib model
-        config_updates = { 
-            "results_dir" : self.temp_results_dir, 
-            "bc_schedule" : [(0.0, 0.0), (8e3, 1.0)], 
-            "num_training_iters" : 20, 
-            "bc_model_dir" : model_dir, 
+        config_updates = {
+            "results_dir" : self.temp_results_dir,
+            "bc_schedule" : [(0.0, 0.0), (8e3, 1.0)],
+            "num_training_iters" : 20,
+            "bc_model_dir" : model_dir,
             "evaluation_interval" : 5,
             "verbose" : False
         }
         results = ex.run(config_updates=config_updates, options={'--loglevel': 'ERROR'}).result
-    
+
         # Sanity check
         self.assertGreaterEqual(results['average_total_reward'], self.min_performance)
-    
+
         if self.compute_pickle:
             self.expected['test_ppo_bc'] = results
-    
+
         # Reproducibility test
         if self.strict:
             self.assertDictEqual(results, self.expected['test_ppo_bc'])
 
+    def test_resume_functionality(self):
+        load_path = os.path.join(os.path.abspath('.'), 'trained_example/cramped_room/checkpoint-500')
+        # Load and train an agent for another iteration
+        results = ex_fp.run(
+            config_updates={
+                "results_dir": self.temp_results_dir,
+                "num_workers": 1,
+                "num_training_iters": 1,
+                "resume_checkpoint_path": load_path,
+                "verbose": False
+            },
+            options={'--loglevel': 'ERROR'}
+        ).result
+
+        #Test that the rewards from 1 additional iteration are not too different from the original model
+        #performance
+
+        threshold = 0.1
+
+        with open('trained_example/cramped_room/result.json') as f:
+            j = json.loads(f.readlines()[-1])
+            #Test total reward
+            self.assertAlmostEqual(j['episode_reward_mean'], results['average_total_reward'],
+                                   delta=threshold * j['episode_reward_mean'])
+            #Test sparse reward
+            self.assertAlmostEqual(j['custom_metrics']['sparse_reward_mean'], results['average_sparse_reward'],
+                                   delta=threshold * j['custom_metrics']['sparse_reward_mean'])
+
 def _clear_pickle():
     # Write an empty dictionary to our static "expected" results location
     with open(PPO_EXPECTED_DATA_PATH, 'wb') as f:
@@ -302,6 +331,8 @@ def _clear_pickle():
     suite.addTest(TestPPORllib('test_ppo_fp_sp_no_phi', **args))
     suite.addTest(TestPPORllib('test_ppo_fp_sp_yes_phi', **args))
     suite.addTest(TestPPORllib('test_ppo_bc', **args))
+    suite.addTest(TestPPORllib('test_resume_functionality', **args))
+
     success = unittest.TextTestRunner(verbosity=2).run(suite).wasSuccessful()
     sys.exit(not success)
         
diff --git a/human_aware_rl/ppo/trained_example/cramped_room/checkpoint-500 b/human_aware_rl/ppo/trained_example/cramped_room/checkpoint-500
new file mode 100644
index 0000000000000000000000000000000000000000..03752ea8362dd953385d305f6e2a046b697fac91
GIT binary patch
literal 185376
zcmXVXd0bB47j=V1Nt%mDDN&Lks^{z@8qE<Zg(#JzIYg97GomO-lT^x(sp&cU2nk<P
zrXop%id0B4zrFA8eeUP;{BiES=gxbtz1F%>;=6)G44frJL`1d)1aICL93(pL{$nwb
zsQ-;i4UKzk@(bM<91<kDD@e>)d}tsrFd#_WBY1tdaj>7?rVYknp_}|djQ<xVyClH&
z|HaD&4V$zlNFu~pVrYwy(Dk7kgCutaN%;)TD{bJc_P=SSdIb1yUAM`9Q|PAkep83`
z?!VD5NXElIY)fFcu}47gM&m7Eenabmhju2rE_mZmct~h)m`A8=knI1WWc@w=w>D__
zLXo{ea#o@nf=0v#$uAEYX|Pb#V4=j&qPb8bG)Uq9$9sl`2X2%N8s#kZe@|^@kYZv&
ze2~)eAZ3Gv;?AN&V<bpr$&w`t{tf;9-`_%!&>+<fiSs)Dh=}wIxZ-Wq8T8U3!ghw=
z!Kw{yg4nJkloij%XM+7OtKvPZvKbCvH^<;G(PUQGCdNOfxuD6GQFQqHddM?1WAWN~
zL}zwZjrgSPAl<Z%8;VZEsAVI${!c}EH?Gy<;5>c4;F6Yw>#rEpKU55Q4{{*nUO$fc
zpvtd)k_J~(Y5ui!0(`g}0p=UV;c6RO9w{P@U%SskZFvPucb-Tq9TG^GSvp%Ly%|RB
zC=rCeHigLTPPFstXxL^IgbIG;WPQvQIQQO)PARfwo36fqnR*8i#^y5x`&jyXSqfLO
z&gS=)tl$>W1-S9#B(9`%m24YpKp%FW1()RpeC{+E-kq;Wm)WnRA1<E2-i$g_W5aoC
zj|9_tnnZ5Js`HeAv-FCHF`e}4Gu}Sz&PbOdc1h`?<u41a`CV*|>V!KqsYM0qd+j+}
z*IX00<ufe*w1}B+XoYOMSbF(f4rNPcvVX?1#819}uhdTCI?ihRR;V<69Pk+YR95ms
zQ~cQbwivvjmBvRkaHe?THU7D@8^ppG+6B(!Ym}zIrey~N<_QOAV}?77F0|s7GIG4u
z`UF-wO3_zaW@D-L6s+B8!F{g}g9NE&y!^wIw|7OMsAC~7pL&&~d$q7K9>e?Z9pTOa
z(O{aIhzb(AJl69dmPHm(J?r^E-J69$+!r-U7qMY_yI{dUCdfR_29XEeMB?c}n&@!?
z2e+AWI~Q~Qh<=2JpMK%rC$p(&c{oIm9KoBGx#EtsU-9R^9(Z{FD6~1tKzZjs+<#n`
z-~E<`W9OUTzda4`$MYQUv3|UyIFg;({SRy+6G1)ZFj(D6#p7{TVf2<#czwr}7v~Fw
zs#db}<wy;5l}HwzPpIP8t<3qD(*>Md@Z<sEVRZP%L)`I)8XeIuKj+KKcl=C~J`D{u
z<TJ|8fXc>7qWP-{KFfCE=2jQla#EGrK31epE{~@&bJoJW&8K0G*I&pkT}KU9ou-=`
zQqg{X6Vd5x0d2+I)F?)jJGvy2xA`{w^nx!4o3Ai~$z?3OR~^57?juuAoF(Duv&f?K
z>(sn#I;}Qzfy#^$Xc|=yEnX*35Y>)HuPme^$7t{;hKIl;W(l8QPbscV#~O)UFc{N}
z=N{Fv2)iGcFw9jLpIb*p3!dTC9WnU2%9;Lq6eVzIQAH};gJ~BNP|oow6VqKlo3BUV
z%Sr-QcRBGNuaNXfCef$qS~N#ljqaMYlVn5&V04QUtnNxdTlW~~-kXowo>|aq<wvHb
z*}+aBg&7abL2sBm)b)<U=-41A%|D9&SOF#sZxXCs`-!a39)VDngw0(NxIHw~Li?Ny
z1dP`vyZ%<N-q=KpO#ekPN@kP$_f1Ie!$YXNM;o&w_QQaM76t}w#C7IoV0z^=`?>ca
zizpU{&vFR@$$Mo;CtSre<xRM!ARDD+%s}(TGoq?@g>A7v2})89&_6N(6B<Xs;xoQj
zxLh9{9vl%|Xj?%%&F3<?C%#~=n1Br`2jJAS(}MYTY{2>cHk9}=8gh>M5!YHvEH4`c
z{k<trA|eACJ_IeEWi!J~s;pva3C7R1z!N*>Gi?nkn5<=wS(_Dcm|Zoq7E#3>+X2=y
z-V3|K=7GYPyTXIl7vomlsc<o755A53MAS!3CFuitAX3B3$*k3E)j~%Y=05?<6wVM!
zFK@IQbSG`OUSOHDQ$V^4iSrR(Xz2DJt>?Wl$NK{FTTn?(FF8(B{YQgQB9aMa&&kiz
z^Wn*01Ce%}hwi1?WcTG55Ge_&mayN8M#IvHUn&4oo&^rGOUZ=GtDttnQ9)mvH*3w<
zj)A#J!WE0ZkUkAA*gv_4xyj97%Gutqf3zK@XFX(#65>hunmEC*MP)?cy$z|Ayd*Sn
z+X1E0fw<c$idpI&7dq4x3F7SaP<mhj)RvsE7`9ChvM#r?{yEX0YZs0pcdphb%n1Rb
z<;gIRC5^-CFSEXX>2U4*U+lVT#@FXn!beMYp~UJq_*V29e9Pan;i?<?7MF6IRhJJZ
zJbd89TV<?zb{1{cP2yz>B=}{^i}10o4-@Z5^O!p)h(KJ0@3Yn-5n~Ud)mBrgKBt;(
z2#bNL<;y|(=?)Z$`da0-F962ekAY4tWAv;nCV{`A!9sf)nxBxtpT{Rd?P0XYlsAWk
z@!?o?^1aZZ$`tZms*<t2=UJ=ya_H<(Wy4pwVpK<+#WR03q8=FtI@|AqQe!%Fepw8^
zCIPfe{U$iEM;eo=9$>My7oPB41k(DqVA{qya;8{^pNsH<w`1?%wWATx{!s?bdPza;
zbY<F_up6~Ty@TwZ!|AV$*(`jA6J5FC4GBA_%HxiIWZloEf{5RRs?%y4;O?(L+};s}
z?O$Us@5?`7pJNn+dMbjCn>d@ieH2QkwKKDQlB}d&6&g-#Lbns5IB&u$mOn}vTmD5r
zn`;UNS9rl)o9S?&J_w_N1}#>;--L>N!*Ihn3N>P{*`_Dy<m<Q!QaDrx+f*h&gywqI
z@S+Mg?^VHOnbMkvU5lBH>J)q{>4>&-f>}b0E=He-z>fwK;be3;zS{qdT)bxvIASes
zk@!!rs$e1eE0}`!cU{6u6I?+lS4esrbTKJxE`NG#HGTH^9Q$?uFk4`J2G)Fi4|>xC
zT)6m>uzmSPj67#YV=||5Zm!4Ao+`xsGkhUA_B5R9Jxtwt9Ps0VDo7Sdpcw&qJW~Gw
zuCPAEd(?LGf0>2oSguY*FKf^St9Tf3SBtLG7zb%(_gL_K%D>fV(x4T#T>GLw%^lH?
zYrk(J-EMhw=3p!zbx9i!ZjT555_^8Sw+k=zkH#xS7rD}`Sni(SOFeGY!_HP+^3dxC
z^qH&hJzASEFl`_A9>OewBVo)$sKw*j47tMGw{W#92y{0Vpn~ccnC)rGue=}0=NH?;
ztmw1C;;}b}a`(`2+*ZZeov)!V{vA&L=FG-xNTTw<5z_FvQV=*wf|S?C<D<J3>~Cfg
zM5U()e;3REY3=c#aKILnzeTdRDogxZ76sZb@`aXTE|KzC>!DwNfFzuqjG3x>%%N40
z&AuIld4kQt&O*l4s*Z%8FWt%4>!FxuIS6zA3Q72*RPc0G0<A7}P>Az_$k8WZdiyF&
zDlCENUuOxgNan&1&C}2jUqois&g9w^8Q?rV9V;IhQ5}yKGV!Jp95FbJ#&;G#+RK^n
zdwevG+EmQ$Tr2>!a1kn98ABqo_T$>aZg6A%NoKV5H1p|MPXBYQBF986@Qu=A)_Q+G
z1TAl6GVhB}HSQ<=NSeo-cFPg3JZIkWbU8?DdPYP|)#>^->+rFfCZ8iXNt7-e27A~9
z;R~O`wa-qtdGH#uUmXS}BQN4a$JY?%G@Q;ab$|!Iy+Gw>nMKe2LoiKpCVX&Dfc)--
z_-5h)^j{>(>pD{5>_|71dLPK9w&~F?pT5J1c2h9TJ_R!%hRc0$q!;FOV!(lgyuri{
z>caPeZK*$%32~&kc@LoWX@AYt9S?BjM@`tgy#`#KrE&Qc!}#yduW^K`KRITxlP)^z
z&5K*q>BrJCXsFJHS5~v}Ytu!%(iX)WTot+Xl~b^q6wxVL?5X4P(Ohd+1l63>#M*r%
z`R18>i1OADYPekvNB(i)lf6n<o0ckgk};64k;pWZ($PQB5l;9N(`g&N!@!8AEF)~_
z{{I=l*XWHQxyMcMxrrGKB9mEONt?iV(oPzEf`feGf3U1-4bQx+%q0h1=@I)ZGF9v?
zJSn_Q8~rEH?#ctO<;p(#t<Q<8nkP{Wuj{CM><ea>t-#&7R*<P=N#(wMgt(WsJZ6^z
z4c{@Ijv|*y#_!8GUE~q6;v48`^$u@Vj_2Q-GH~^R12pS?FG>bKVO^cdXxY?71b!>A
zE+`Ls3dh3L@4xUz$rAE%@_BwLC!J^9ynyMie#0LF8S2`z7r(bp!}=?+U_b2`o|$kI
zYW7UR@+*&pk|RubphF@A91@{wfx{ti97mHALJ;5o2XuA?qP@*Ws8)&xTQ^;J`mhUZ
zbj10Ew9BCK_?+PH&P4Fst<CE`8>7ksX*&GcbI{i5hOaaox(pA3rqOIJd3}PwBzQ4s
zzF8#5N(}~qttUDz&LWH7+=n;a#@PIOGnSiv#Yta9NV|(MA34{M@jG#_B~OeeJuSD`
z9r7I`F75}hqGHxD;yw&3RiV~>Uo6H>7vav<i`dQ_d#?Vb5nAu&gVLl1d{!Avi>lOd
z%;GX=mR^9x(rVmoMH4LXm4>f=w!Br*nNNP!#g;0M<8@v}v^{wf_Dg)m{N$^29T`D4
zPW?fi?tTE<<^F@RMrEp)-vJBitH3BM3bswp1LKpCe6eX55yp4Ij~mUPxK5r6T{196
zN0gpAw;b)F#mTXv^C+7ofgww)K%w?C7)zc2wORr(&yIoX{x(S1t8Y;rsLJPeCNu4l
z3bJopCwn@OK|Ys!#Bq&kFfrQ$+CF~72Z<|T>~sm}?--9=YjjDl%_H(8>MW%1S;cyG
zuEV1kgxf#<A?mXh!uWEIM{;|GlQ*UaN_7NSkds?|IYb?L&TPS*&GFcKTp8yN^_ds1
zdNbJrKuo5bC)u8Ra76S;Gzp!IEve51%jFx1?%m7erqmkTVIzvp`V^-bWq{~IEi_O6
zfWD3ixM1c@7I#riXrbCcJlhnZa`txSo%H~#q?O6_RXX7EeH%DD^21vRhfo_^nbNW}
zIDhsC7XGdvu^P@~@O%<E=@EyHf2^V1<38Cs{Ul6DvB$M{9YCf%2&0Fopzp0R&~Nv^
z?;XcL&D{}{UZtX3?P%;C!f3J|l_2n7sqk)A0$3hSWKr!Z0*8lAD5o0$+goH{|MCmW
zqCb{3+!+|c?><n!#TZj(*RZ>PUXbilK6tah5=T_aV%9kie6+KPnT2<=&W)qt{S;9w
zpSunMi|(*5e`Z2dl3|UB(G0?Vt-&QTmty|`JB*rEE(p|`38wi{n7ZX4vA8oA_L%KJ
zucgvh-qtJ3blF0lEtp6K`quz4I?ZO58-h>kQSwHh4`N<Mke9GR5GY+qqBK7e<$2$T
zgN-@Uc2yS`4DMz#jcZ7^??g=bZ!}m|9~LNF+9S+soJ*GE%z=4r@>p)M9JP<n66RTM
zgIL!$%)~EM&~X2wu(mA`M8eEaVWtWU`<6jO^wtUgc^#q0S4&e%?LxA(t_t^_`H9}6
z?g>W9OYp3^SfsP6u&MP2+0E`lhr2jFcRY^{-8HCIC`W2`+=QG;4!^f<GuMcC2O&2M
zKr?U@yq$Itd(GX!XWjslAL>in{!V6@drjHW+)#A>u^IYiEo1$&Z;+H*rff`S1m+Fw
z5Gb7O6;zE$BP2qd<z=20w8=dsYgF`6u6`cw{uO|MR`S3W+QBV{x1|5wa@HfU6zT?^
zlDku5VQpq2njR6srWZNzD}OA9yi9iO!cQ!Yp9WD+y)FLsy<|swO}OPKcOEpq0g6Uw
zlO?fkv_jkg>{o=rd$BO#-i%D>l`2B_B31q+c^xkMAr8NDP7!NAS7zP#2d|i_Vb`!E
zR7+CF^n`erHL0Fi)|CoGB7B+gDPtH`Wej~Icfl}MA5i}Bp7lR=Muq(=FlqH3I3AjS
z#A7?Krd*;vn1DV>uSinYHeBVh7wW6Ov#1EPD0$dO2EtwNitAkVe*Sx=K6{VwdH8j~
z`PIqnNzYdnod1MD@>Ei0RbJDGqsXJ?+j#J51_r#e!FdPD7^(j&@asN^E<-)3u7MMn
zOq_wuMb0FDs2}ksuh>Y@ckD(ugWFU0psdGa-28Jq7(05Q)QL~1fBhzI88G7eRwu#U
zU6Nqpkj9^OL~zZM`{|7%KS1~B5qf&?1yP&R4)R^Yh`MtI%(<V-LbqQ>rSM~@HrRrr
zKFJHdFU=;R8lL=Z+!rj1b)nDfGlk(^D+KA^3c=^_B=%{VF>Xwgf+rsjL0QXbKB4gx
zZ(Z^R3xwlnX_qNi&AcMq+M-H77|o|gP11#=Z8`n7M2r3!o6PTuU*RV^JopHj&Cf(W
zBdxOv@LPX1Zl8G0;#|)<?j%tTuhvfGnz_oD_Fe|h{(MNhwcdcvqkNi}k_72-=iuP?
z0_Zxc4QzN56jwWdTAU1;99E!V%Q`T5^(UNaQ3@WiOSou`A#G9<p-Wmc_+MRT{8%{>
zGg&N|C!+;9cUIupv396EVXM${K`fMPyGrb}0j4ikhWg}a>@zPTb#dO<^4}(uwQgsj
zz3bSec_NVayi)+@qtJbO3~nF&gguSigm;^kG2;tXK<@s-^qQsUwD}gKzC>`@s6++$
zPmu9wI4oZC1ugwIlV>MGVe(2Ta<!uaZ}yqOv}Y4wv(Ioyd?-h6ui1>ZR)1uYW1}JY
z?nqcQQ5#baoq<Qcf7ZNebcet)A>PR@fvLTX`2N-zK)+5wN`NBItoHzM_pvBAo=oI~
z>F|2?RgnDXzz-ih%w*(lLW6$*`1{1rT!RsGcTEHse_lZK67;ymks|yO`h#2xa;9$z
z_foYHL%2|FER9KQ!@wa9X4AfT;F;G)Dq{bGlcq-8;N9w)N;Mf?C2PvRoXNzY+{$i!
z>cq7j)mT1xE}tRO$JY0xgUye{c(W)B<F~ZI-Ba4U=imjLr*RcJ<QJ2%OJAdokuR*a
zmw{zD|KZJ*NpMN_Hs2I5jf#4y;e=8b;fq!+9G^b`H#VBmebZl{$xa8JTzZ;y^7$|}
zq!HL=5$>|(B3SLOg^M+Z(J9OWMDITaS?{fM_-HRGvs;Wh2L$uUc40#Ei~u<CV;0xB
zBV=RL^Z4~|oGF-PL+N%37Ru6mZNoEI=eH9r#$`ZKku?6xmZlnsFR^)69N4)eVD~?5
zn!IETZyv(%WVak&+ItCJxk%BZN?)3{b|h73OeK2B8A98qbLf>LW>EiHkqqydL*+zg
z@Z?D+s35b4>DwM+O_u)LDzg-eWoqa)@hp60qKjd<4{*85ei{+>1Nn#BxGc38hsE@f
zwj<YxlieWv@Xy9xD<wL2M>7<3$?<Q+R(SvHDms0z4WFHT4&PPpk>30(C^ca?^*JEV
zXP<GxhD13U5-|vN3gWaYy^1okXxb`h#$%On!X=6ld~)9=F6ncSmAue_fnrHo-E&?b
zDwLuwi_c?{$utsEql5bv3YpBSm!RSziD@@3L$7xc>v^rs3qA_q#6eM7^KC6;J{E(B
z?M8f-<|LH1IRILN#lq`9){&Lh`eEsg5me@AD3<A~(9S+ldX*V*|3F>3KQS0D&uPJG
z$!@Toc%I}m|A6p$+mWl-P|>WL#HIAF!1VfR9Obwh|Fz%5$N9zFtndrIF_1!sYDM}i
z;|Xl2uO#`lN3g1SHP4UUi>a#<AtUh`-kux_L9tKa-T82y!gujiN1Gt`l@U=C`wzpG
zh;zFWmE@~V7EzZW@M)C}Z_*gWa>VvQrRZ#6cA7lGSBpnx)j-^jUN{<Bif5CKvBkQp
z`QLGcWV(kTWGPyM;gVFmmAnj}auE#Vqj*Mp3oN~4hdu_*aBO@E`7~(?Z){THY>qj{
zD|1Qa?<6J?;lZ^g|0Ly^q9_eo<g{B1{0)u5mg>tmx3LnGrE>6b=uRGc&V)N^`eLt9
zGM}9l!>tzl2bF_S=;QVpB<||KrPX8EgW02@ZB80i-3ev;o`>P+;5LEl+B66jG+^4;
z3<&w+N){&@kRIQ7_T8oef1iGipNIm9G}s1@jwRxQwXcO)D@p`Eln!Flye!h!av0a{
z=_l3P0X`W$Bq~KY>~P{@eA_w?4G+GD*~QDr#VxsHQ}=Ax8YW_4S|knAa<0O8<N0Jp
zg+2>f^MUx3o*Jr01lCQ;COPwz@${b(QnVukeQh(br*<WK<P`!BMHjG_qT6B8E*}<=
z)4_%ph%%3m+w4?UA|8=TL2c<x<kk>Nx}g0elq@M@lh@0`zF2eosQHIUD#~K#A8p+F
zDwll?Dq<lkb+Pnr1B#y6id)2n_^G|0S<UF5?9H4u7G;+sC^dP{+O{|gpM6an;@qxS
zw98ndxt=_pe!3Ton-^luhw+eVS10J4;!Vh}ad>8rEcUCvBjv&YQelyZYv;~~3gJw!
zye=iQo)w5s;vbL{S3exMS|E&&jfP!%Ga)(TAUOeX>_vY9Ry%Enuw_SuT|OCv&rQIc
zWywVEj2C2{pM&vpdzs6(3g#$$j^3uV7G=k*Sz+-62vnI$UJOXUGOtV~a;*sRN4P-9
zDSM2nlVsyR$D(xQTBseqK-gDzS)kMA1ksv0Xy#tT#%`N|Zgv6SHva<APFgLL{0L}x
zKnfo@t_0=ANQR#_pvb8RfpSI$krO>n`c}mYMW#8DVbi+En*arDu&fX|iCt$CBvvtl
z#Wz3^&a)SbJRzV)jAtu;0ol_3V9K5p7#^R+R30mG{-};DXewvH#Fs0nEP@5ce&I7^
zDV~ZR*mGRKKV1@~2Ucj&Gaqxo-Hk)X?MtAhR0`VYDCl35LRO?HfMv&t8p8#~_<qJC
zW_Hh?+?7*@Bi+$3es?5VxDv>7n<Z4#c7nEfr9$^gd9u1|4=l+Lfxmf|m^QZs6N^3)
zb<O}p8UvuH|2c~a+(1HC%Omr=k6Vo1VC%+MezJ2t=!!M7$b?fM%d$cC4HFLNJc1R$
zyM?z`{9tEyEAa1XJBe~cGBl0X#S*jc@G7SoWN$tp>n{eAfvFF0+}AvTzGEfcet8Hz
zr(R=)t=oyozbWucZXw+NQNk>L#0qb$3qT#^Nsw1pO&*O}4yi|ekqw_tGXJVTwCI<^
z&#z*zKfHq6{izLpaYNie^DZcfvLHvVspF#QXIZ-E5WoJ)15Y2f!iq`{eCBA+zQx<I
z`shk>VMdPdQfNJya@?6MHWx$XGIfy4kAhJ(Bj90@DF3Xk0MAz~BCg3#(cNASm;QGE
zm$HvUKWGtt4&8*UiQ{n9tZ0Z0TnkgqPQhin3<Wb6D8M4E6QHWR5h^W&V3%XWBX~SG
zTpvqen;nfj_>S4_-T`NmBZ$A^biThyk-B-ugXd8dYUz9sJZl~^wqBpy|7cA$*cl=d
zJ_bfU(Gw(l|7M$>>oWK4lPq?)ETt#qhH_qRK3%=r0}I^V!05?hur67TA2Cn{nTa3J
zZPsRHAbC;v$5D~Lwu<E;dt-Q4^~aizbu+Kz&ppahT(tO=mq6R$5WmOm>2ak9T&r?L
zsQ*NZ)>J<fm`^{;y}V-ak!uRw?L7#S-ekg*wPUD8=M4V!Mh&UccjB97#Nb5ryRdER
zV~~&0#=7cmJg7bwPn@x$(*{;xSL9jL9pV8}Lkw8(9Z~*LLY0{f0<Dh^<gZ(%fkLh{
zop*Q~xiu@4I9G+@@81W=tY9tZ?3{s_1!I}DzaHD(?!*fBM?y;COmuD!CbfsALwH{T
zG6e^Co3#-QY+Xr}Z8*HrTaJ|md(oz6HCksUF#2W*Mh&+iUp&)Ty0aY=eMrY6PCKzs
zX*q6wI2XD<za+vp8Boj*<JC7D%?r*DErBX+9K8_(&nbbt`))D^uETuo6J(xt7D$Ni
zfw{5QP;}gLu-dZ+3KU$?an>0@PNpgz4h@A@)mm)9bXC42@GrjX6UF`>Jzn~J4BxI{
z4Nr}#aOSo^SaP@;PVHL_k9Y6KkJ?K7!NdsoGNzRnn`IKOb=vfaz6qDL48;J;nOLTE
zl^hNogf&YI`L1#h#&ul)oq!ocbfP$G{$)b1S{K6l;fvAeo-(amxP*EMlEM39AkKV$
z0nXWeCx_CWv)!6%WNnQO&H0%pa9Hn2XRp-abE7RFd%h+A`~!(`rvxk}A~>c`mq#0n
z9=dOa0NREq-#(seuK9tvIi~!@-yFQ`Qir0pD{!FP0b|T7g{>Q(lKaJT@W9Du&@5Jk
zifS#;YV`$sgttI8Z8QDqdxeePcZVGs5aU^|%0XRRlbTJkgn)l)a5?2J9D7)Zd;Zwr
z6Q$!Qx`WW_^f}Bdq=z`$UuRaQjxwj^;?z#h2qosYL7S{6dME6qVJ_ZyCPad2z9!sA
z)(#`5Nb&H~+4$pi9hv#A0Lzvq5dG1FXEj}g(`+(GJr$uh{QUWdfjqvbe=;BO#h9<`
z*~|TwePZJZyWq^D9b{ry23x(dikR&mN$U))`RZF&VcJ>+d@38m-2~F~XR<Z7%#Y#u
zUygva=`cLAV*?Z%cBCGUoS^fn9-TAMoGvfFi2ow=!Cp?3rq1x;{s-pb)4b6n<k@KQ
zeEA97JS?3mJ@BU$CJCG-CG&$ux>W7LXexR~k8-bL=sWKeIem8lTqqg|ZKrPu?QgdW
z<2Fyh{Y4EpDJl^^uUW%W;^*<#9rD!LISJm+lBb6&VgQ9TAo;2umKSD_(DwIWT=9wg
zGZth1$K~1j!cb^gtV!#Yd!aE;i%)d*#1fAu7;;2~JLJgm<fFd)d7lJ7=$!|vR;8ib
z_EY%l!#J9AMU=J~n?id{8nFGR;GDNROsPBq?`76<$4&p(-ur5JTrHjW_zm$Ccpg6I
zzejiY4j&JEL(%jWaz|5?D{e~_$R%syrE9ODMddY4nDhhR&3gfV2LjNpyhAW9uo}Yh
z(rO}J-9jxfAzqIW;US;&`48_?G-&q-y0hXYw2ztsQ=Z&`Jgv1PX;UV>X?qQxuD9Yx
z1?%zMtPew)<37xG72%j<NIu4Hrz3w&r3P{rV8?zH_MpxKq{?Mcqss`E>rUcVzJx=&
zh#~!N`)9$;&~Y48lCkSoAr|!<gs9JwD0<$W+&bTkbCsoFaX}AQT}X#lo7Qldn2q#9
z))<;Pv9r2&U_HtYU5oEqJBiz$CUBUx7W_N(;h?(}N*xHt`w981@uE2xdA%mm_r^2N
z&3oXM$$!K$z*%@Aa2Bq;Z3ha1qk=E)`K-34%3{LKaiA@$3KND;Wkrk6u?_Fc&|zP>
zpu|U&xj(aqd8y0btmI;B-gt&-ha6%4F$sb!X)o3zrBx&4w?i;`w<dmV{=&9j(<ip)
zH?gj>HKc6hZp_T;WjfR4@SJpi^~;mmc%a||v8n%0Fm~ZhyxP1N|K+5k>D(69H+mS0
z-Dw6AfjT&1QxYx~+yVW$CNMELnuVK{!tObHnbOxZycgpPwx{&LPS03Sux~PCtd)Sm
zGgGl3LmgYg%kfQcv|!Z6BLr@`<GS-EFlBW#n<?E%iU-tT=cCv7J^3N14`Ggm>_ov6
zn@!+-LL48Bcg4E$J|?pMUbWd1eNw+o0WP(~z}<-}L1a&c1xJb^if)30LLcZmmqBhF
zhyZ1kxe%x|1HUwmhKldUnf*{bNSC`wd^D<w{J2<lG5#MZpOa7GZXCcR>vyw#B@}YZ
zm%<(gC!yt3du%>30#2uGA}$xdk)xeOpg8I<E^zqH)KgC)eHKT8Jp720cr-5D_(<@s
z=OP5xTI12n9t4uMF}t{xs9`#fIhVST-|PJ0?e-K%{ZYgEFFYd&tBsg1orumhUc^(&
z4W4`&51)-q@n-D`jM_V|#=bKai@KcAdAbz2;41_D?FHobvIf@I8xOf7?NEA-4r@Mw
zEJ|t|i4BSXAC;XX?QtBo8VrZjJt|li<qM79#US?KE!Jr?17}&fqF>EsSnsQV+DSD`
zpNd1&pYt^-VUt;ENDRzcID}bK7o)-L`^4UNH4JO)BNn%>vigNekjm$ghJ7!|sSPRE
zSJXt(RcztGmt<6*jifGbH70}{CG}ByXzy-A>eP`9HfzHz^DQLZAPV(Ij0gFATl~6n
zCraOqXUbiiOqicT%2)gmXjRMMd-oxoPU|+Sy>r20Y(qVpI(!yxA6WrTCHj0@O9Jfu
z{s01Jb;16<5<J6MgI+7$$laT*X#0OtVek0>e%LUG>ytoUAAFjOcpFHXY*x`pM-8B-
zUxVJ>_aC0MX(um~bYOW!FMIRrJD%R0Oj;JIqR$HgeXi%(<kS+TIl~^J6nbk?e`{gm
zw|F4?l33KXvBWx54Mg&K$mHa$XmSdLQL8J34JW2!@XyoC@V+lD*svKzp4XY1<nI+a
z2hM;gHfykcn;qmez7gErv4;e%Uq@~!&LyD-?vb|V>dd}pB=F*7DF16n{F?kA>ZGYf
zmzN4^Rq0^#O;z|b<RM6zm<V@c-jmJ;r^wnJuHYN$4x*1E(O9FLO}-w3+Y+Mi(!$Ls
zIcWm!7defF8D{WA^9Pn)?1y`6)nP&WcZ-*6uHh|aL&@A}eDkt!ns;a?xQ<rgYj>OQ
z!S;h(u>LWK{ksh%Gk<|fW&$+Umyr%fRo>RP1XjMdjBSFc+@iCMEdOs99u(K7=fc0B
z!phl5!zw}K;9mHnyo|e#>xcbUT7l)<<%1DQ@HFWVPQN^gzOui}@7>+cBi<duE6W0D
z-+EP=ZzN6+G&fP7isdx3;1%di%0-Vg&5+a|kJAJDYQ{K7(~DgW6kgb}vaFd9aOXJm
z)N0Tn<%JJivI4$5hVP1$q02t}g7Gg_@Ei4mLPbqa7PxL7H_w^RSAW^feV4hC>>x3k
zwQ38W9Vkh?QaV6GZ6!bYbvC!rJdVFVZeZzCZP=L9RZKc43~KGx2v--sVMUrlo)U>_
z^VV&O5F7A;t?e>|LE#%_9~nxd#u=d4oMblLGmh0ZXbX3o%VQ;LlcBOB3H#d?!n3`Z
zgqu!<gc=~h8a1pse>q7WHX!u#&?C0>hslXz5$v2PK}UQa!#BQZz)yKH==7f~RcjrE
z>+<5D>a-61<CH}Vt@Uxf^lV5AxX<?Ke-K9X>%!7k$;`tw9h_ToND#j#$Uikm9B14h
zm9;m>Y&#vky`Y`l2n*#|b~C{#Z6>~qPK8i#!cx~)!UoMBu=}kDtvvY_(vD4}^4>+b
za%K?@99N~~3+n{jRtuf-{y|Ik4wyg16gMUAMRW5-kojDXZaC@9Wyv|={Raz4$LCON
z66WH!TQ+p@lsw`y?>k28#n3J1go1`MOb|UufGxg@=mxQ8Z0jL^`1mdwtj^!WN`*Zb
z+^B$)5|ZGeQZD%aejqH^Y|Q0)mw}qSD_{K6mKV3p6VAT&2OcKoV31fSuAr;=+w`B<
zs}T$G_>;`vQ;SpniSy*kt^9^_0ad<qjoz-=%WvKO3B#9+;xgM9y?nQXMgOZ{7j-ge
z;G<mPk$94iFLR;mbIu9V=MHB@H#>;$r}ro}CVFVjqdXwEAM_e7qjBgcE{Hn>_aBMU
zDF*~#e$j>2*Ofv}`v(|3Y(Ae~v`o->`!d?Bt!006ykU#?BD!%&AiexeN#L)oi^2po
zSaIw(q_=P6KJ1JzMSmC%IJO*(#)weobvnGHpom=fQ8L6Su7r_--S9+bGTj#`gq8ch
zz?GX-T-9x;Z&R7U10r9+=LuT;)3ZFlkS090%APx`N8ql3uedpM3O#+US<o=18!9U}
zL|(dyv+t|&@+*@3!=M4qxpx>2?vUj($DicQ);G{VW-={bCPA%zHR;b|!6eIJC0_M*
z<I@-AQnRDiG1pm!UpX_Hju))qR-aP1Af^lzE{bqJxqWc9U=Q#3wGzKQImb_)IEl_p
zL)@{aFIB2kf*5N}Tvsc{vrQKYzm|-J^=hy9;&;(>)<H8`aAP&s-M*dcTKn-UR}=7H
zN+!6SQ>TBTQ|XcrQ`&H#3^Zki@Lje(5B6No4KaXh|FMLX1Rh4EAw9&?YBr5rGlt(X
zSE8?e*;1|ZPXyD<#tCJ86{y;*V)91IjBnH*s<$4spcdNA-rHF4!R|N)x)+Fg+&>VF
zItb@S{=r>~(qPGX37#7JahC4IE|8jC$%%g+x35=%*H)8gMr#XPa$JWa(}gr>*Gljk
zAwmz1G>1Di(KK!dZ%3#m(#-V+P<?kZp6kq_=VY&gPE;2wvv|tq?C8Xh4W6jl)Py6z
zR?sFB#f^S#Wj1DIaIIz*81<~>DjRO#wTg*?fs#bN=l})3!tvbw-7B(wRR&I)e;mvb
zZ^GE+cC;{z(s$c6QMsDnjN7wVqD(Ec8|Y#2+t+yi(p4g%GZE@VtN5G{2i7;Fm5MdQ
zU`cNPk#g9E@`EQ?M8$HJeb)+4JB&eV76suFu4wYTm|UfSSXwj_B>PlgXT^NXYS@W=
zj@O0S`tigBZwh7eV%W@*$+)Ib0;66GVeEY}Y_Kf`cFda(0l(+sj-Lwz&30c{a;_AX
zH|`g1nJG^WU%o8}Gx|(+4S5W`&yK-=HJ{k!xAAc7*gTL67U0+)F?hw#9PeDaC@?&?
zjftd8U{gEfuxQE1n%rqkjDDLckSTr6d|pM8j#OEkqaBU1#x9^Pw}M#iiy=`ht%BM&
zj5rKN!lAZd=#(9W@?KNG{idlvae5ql=$V3>%f_M6fnm%xx0<!otiZs_&xy9JH&mQ8
zhdIi@Sc~yBc^{;q!p{ez+m#{q$rHArQ39N=C=hv1C%CYnkp+JDW=2W9Br1KrMYM}4
z<`oVHg+-OZsDxk4Iy45}Is-^HjfC3z=9<yV640ek3N8O#VjsU6<CCMKu&r^K!2a_V
zwtUGc!K$taY=r9_cIQ_dJ|m++Deo+Cj@t`g7rY`9UMkcW-6_YfE&?#}DPqD@EwoL!
zOY}B;AQ3|x<5`_nvOguAO_RFHa#G*0snM>W^}v+{xxHaJ*MG8j`EW*lCBu@GILPi0
z66aPcl6u7x-LsK>RUY!gl@<sh9_kHwH`Ih8J|{@Px*@;o*fi2GHVV|oE0aU}q(HLg
zF%#K$qPjw$0msJd#eqh1eCGa%5sl^W?7umv+#v>*Q^E!NQ#A0>tQ5Taej5xr9VbVQ
zN`P!@1yk^iBYAPV;BD2AxAN3=Hg1hLp4S#bli&}+-{Y<l!o<OCzy|zY%R_3B8;Q6n
z4r8qMGHr)d0_R0x;BY>ctWGn4R0Rd{yTg^a2aAwk{T8---VkQbl!UyHEud{*BPhOV
zj017SY{9q~Ja=a!W@cU`a;>xR!p{VV=vD*uekuH2`<>_?lZM*%-NFqYmqNL^4k|Cb
zPO@!Zu=S~<@Kow$825KLL>~Tt9n*SH;b$+ZAAZX=9n|FyAJ#&tW4ds4l?Pw(X@lUL
z<3yg5rOu^fcjBvVPq-l(MM74+1>IZ&swojgBRAwjc#u6U9XExJZ8xLciUw5PFIng}
zqL$sS*?_Ae^O@V2<K$Ur2uV_Rf;U>fS*qC?;_@PNsJ0Bpo)Srn_PxO}U8fTJxc%g}
z_AYRH>w?>T?TLxQZV)lON5;!-M!9|^=x!y@9XE|BUr>drOatgQb%Q$fW-{>D8z-DO
z$)cWG5s?PF>T=sz5a$pLE5^MfJELRKSkDzCwW?TG%qSet84E|2jYj7eA|TS`Ba{~(
zjujiF;c1~fri^gI!H-+nuIkCCU7S0lB+bR#)*-FNZY6Gr<XAe>oP+l*mbP;gENF1y
zZYrvD_{JBg<*|a_(rsfQ=?v$Qsr*6IkY1X)54yXpgv%m;G#HPfMC}<|S3eK#df(ub
zsW%PMb)jtmBdO^997t`c2bq}7f)a;Bvdt<07514yg#jmH1Dznu(gW+0bBTB2OzNK@
z%XPKZ!MFuoNSqQN_ZLTLr=4(D;TqJR>VpSsROtc!4F1-o(QyA1KFqZUd?mV3?S}!D
zPEw;bTlc}AA9~cS@eU4{3+NZearARtqp+-bG?o64iJ!EbsqKvzzF+Gq)b(6qQbWFO
zw+APAV2v6t6jkL-4cTN-WeRP%H-=lOgbls_9jH0;IJ;$k2Df>v!H#bd7<OPUrYye6
zM4qY&(hvJUUXL0W^zCHs*S6r8rRQ0ds0}W-I1;XSB;eh9I_yQU3#tdEv7jt7%p4;i
z&Wl?}%C<Yq;K@6dr?-TNBvo0Yr!Byv?>3@yXAneHtO8P)NiN8I!1AZdFxD~(nwLHq
zsyU;$apx=^wXBRZ{gI|xmVl2nPQinx3S`A64~ScOk!;>7N)=9Xv{c*!wT2ZIYfi@D
z;QY&YSMd;xJu8cjhwI?n$hAzz&b(%ObvD@`)qoGxGRTPiDA4=15w6>ehYMSmlf=16
zu=?RT{v_}TlbwD`khv%e4qTIi`Rld8-6a?99H4mKk_q1K_QSlxro4CVZn|ncvdGG@
zpy+Q3qbg?cpwMihUarg|q{VRj%6%YRP=&D?({P5K7N1a(3;lnlVB4EzAXecFQ^QxH
zwZ~X0JLVy-UFD4_od?mobURlcr3i=gFA$a0f8mLC3ixa=p;MZg*o|j*(8|+^6vex-
z9mC(T87qvqaLssrG$tRnMY_W!X?tot>NBX1>t?r922kg+0*!FFi5t&O<w6}Vekbuc
zQFBSg*9rBkFl-%v@a!ltOgH1lU=N)%wFgvARY3pqljN;(951_dm%jUJO~1V^C%yL`
zL(H~exF}{b7FntAMdKc0d(%03|L8Z2JC(w_CwlSPqOa82`Y4I~jVw<YAi#eM*KJJT
zXa0`h7q~a=Ic`eF-=9N{zg>ra^tAbszwzwS8WVEQy9!3W*}*3KOr>#sad0@i7@wX`
z<B@h}@Q%*_37)cwf6F(d1qG2bsO=9tx&4S_N0{-Zx(^_|@CwV=SpmYvYz)63#d;){
zu;xquAi<=OO)d6i`O#9?Fw79dE{`Sg)d#TI#(=MoQ|If>3CWyQ#ZXn{O;pCzpzq&g
zp_}<09{$pq_3xO6Zx&n!`}U!}P5&fhhIfGB;21hHF9HrJtl<yc7VxoxRow5TA}!jw
z1Xta;#`L3PVgJNn{&&qZu*kkHj5W}}NgK`ihp|nV7V#BDN@wwh``-zZO>@9}-BNn`
zvo|kze~pL~nzDU=r}8gR&iMS_0vMMq3E5x{&*onT;o@@W3>4>69?J4m2XR>36b4`a
z^a$Q9kHW~e=V+47Ic)BpKo`aKf$hy)+Sw~kkKY_mW8;lczfOZca516>SKbr6un*;f
zikV1c1l&{Kg;w0~p;j*^aZQbAB<6)QrcaXVlvq)1u@qc$vsrNV>tB>B83l*;b)bUl
zY_Jk+!Ywu;Ao<@d80$2ZT~~Le>Ec_tbU`-XwBCX?FW*OEWKwYAw%zRgo1q@6`6b4W
zpFth1bLq_fpTg@yd9`-o5;XIR;Wrk@0-U;zw~v1(E4e<;?5_ZiiQ_PCmjxKRWZ~0O
zb~N<aHKw%a17v=S7Y42BB8JM_XyFUwdu)oJJ3at5J~ZKvcC6)LW4>X>>#dOAlt?P=
zwdhgB7Z5Zp1hYei!==l!AmzqAFk765ZFX0ghnqWm{8hu|Y_&wm#8e!;&=I}Q>9Yqv
z)i8a{WN>!Af^i?-5Q~y1+?DN(a%bvcrdB8$5KCl_!&8}|_Xsv~bTq8_)keHSzOeNs
zkJyb*UT8Qzf^D9y3%`ty*E|{FgHJ~7tZ`gC4;+_d;Vru^iwsx{3nXs~!_#t!(n?Qq
z$?pz1{UjYul47B0rY)L<ZNZb8F62Moomh7DA6qGRiX3+DCM)t^3hZSLTS$v9VWJfy
z*{}~o-u*A`;Pg9`$-a6-c3kWv4x1Ol?`0{-e(!{7pN77dYd_D*znlasyLi-L0<17Q
zODtrM!aAwLxGOdr+|9<pt=YZg&|oh6@j3%EFN+hCQ8PeMSr&9`_d?ytoiP1dGz>QW
zWW+QYViq`~^5nfZxNLy<zes|L0Zojxc}_a_co83ySoY=RR7js?kA0Pc!p7H&p>swY
z$xI7_-iiKLcp?Hsc7LnREMGz_7kgppkye)Zb|&d8O&|g*4~&);2RZ$3#LOuceflF{
z?L8qI_V5bPEb;)Ow5w!Ik_E1{T?YjZo#Bb;QRX>m4_>L*gEC`vF{0QM)T?ZWyyH^r
zys;G0*KNm!vms=9ohP_I^v6zbX*Bpa27TU7fiIpN%rkc(njDRWmfdyazxzpGZ0`nd
z&MpS)?=h?`!&zWi7$wYou@|N5_M&alEa=2d>=ysdO5`2TZQC`{P;!A}4e3`!Poh|Y
zvNDaW@Z#R21ewo0G&*<_vFisk%ABMF;tQc$Z3;|_PKO2Fdtpq$V$O|j!p6w!a4hT+
zo^rp5;<psJblyyQrZyMD3ghu-jvYX!IvJLKfS6qSDV(>c03I!!0Cx*w!Nf_4wU*n0
zg_t_zO?WSqeY72;GO|dgsU^(Ji5$|0dzp6ZG~x2*M{LX0HMk+i11ukV2~0ANkkez5
ziG9yy@;k;H>Mm)p!tzA8pXdc`kBtNqOuZrU>^T1Fp9MQHI2t>0-jF4FPW;$xd5ei^
zlJu&hj>X#{EoP<vBtB;4H{v!g0hJRp1Zx(nF^fe`Fl?g&{hBqTT}jvi-WTIDRE_zk
z-32fZHslq}P~p?s6zItdF3|Q`l+{l1B`KG>$s(Kcr0VVzR4BX13S`H_TQ^b2mfFf9
zgyO7HA_?=h0T_iGXYU_dKm)reu-HEZN)C-7D_)Jp+KymB;D1@d(`RPmERP_}Zb&BD
zjVoA#>jmL+OMRSOHjUg_HV-teD6`A@Z!o7z4e}GD$@unG=K1s;#;u)=FQ(^W&9xO6
zf7Xj-n#B<V;c0xcWG#7+G#UT=$;VQ&Jb~$6Rq}VKDp<}cLDPv}$gO`f&{JX(#@=5{
zHeFNT27kuUjTS3luOO8>D<zZKD-B?1ZcO#1!(d;)d^q=eD3?BZ4$2oD$>BpULH)fX
z_4n5V@kR^K^pt?aO_z}$jO7cr8&KJ-33Q~|Z|w5Rh3;2&>}`yv#YBmv)JSX_6dOG!
zU%hIPL~2ltle4+efCgRQoDbEGqv>9;^%(cChM&3=&h_;c(8sYMxWoM<$SN4|55KPQ
zkil~FoYBE{+?_=4w)=A3wKhykFq<~tHRYsmb<N53qEsH|;QSFQX^%-D-*x8zAr%+7
z|L!#?f5U=nOx}Ty>aF>%@7dgN<1sLkyu=67{V`r3hR27XY4s2W71ozf!~M#*S1tt>
z_GiMHr*DMY>pwuYX8>sSio@H`Vp69Pfnu$uSUExk-Fmbjp>`##hGg*B(@vKB(}DhX
zUXTLD*u7SYwYzv=vmW4+r?y~OrzwbSvSgC4hl7dXG}0i_B3R_Vj||F0LW51J@P&>l
zZthiJ<1F%UVWlIacF17&wI~?rH^eh_R>2cCo)#U9WcSMNVG!R>#4fFdX=Yk5ujM{O
z``m(vfyej^L&>z3`CMV>+qo|*tnt~jHrO6%$^V7VWqDJN;OoL%ynbjTHfv}=XuC81
z8@&hjX4q3%KY4!PnH%qVB#QeT-Qc9}L^yxho+vwSqUU2CF=MNJ5H0v*QEb+aIZ#OC
z8q45!i#Wgje313T%hQA7eNn_v795`pqu=`vLYZ3(j*sGa@9h~(>uF?F*Bynm`fuQF
zxj&SJSkwKN^jX@7bnu)mLkIUO(WG((7I0xCH(L6@qF~Q4Y^j+B!@VZ(@|G5Xwbx-_
zvWir4v@&EF?&IC(Uk~ZPI<%;}ntgq_7aT4n33b0X@iz?%sqLJPc<jGyR$Q?g4=?o?
zdUj^8M>D1pIS-)2e5Lr$xBxh1D@$eWYw)ytZoE8W8B{4B!3~MBFmr4lF}!u0yzVbz
zPw68FT=fs1ZZkr${4uyNIgPJN8%qy&P^dg#2NV8#3GT~csqR-<K1Q~VNrxn|BP*&Q
zZ{<H$R<Vw57Zc?ZOXO*X;!U`z@<vc_MvX6d=uB<KHo$X*0$9D=8;^}X%Pf_4!ULZ|
zw3?BGKd%3SW2e*LxqCLK_D$!Z^K*HH8pp*$UfD-mgcv;i4s6o6$22<Y(fhp!o<9}L
zJI)-y;bqsDMpG)ZPM^U_-6a8kKZ8jjQQWgpo199`Aa1X|lW@6_RH|JMg&D8FrdX8U
z{yGgjgAH)T?*qJA#S|wdcM4axbU?lIM4I$e6)IH2(X=fb3qxIbjMjd>Uqy_*Owgit
zPJR@8Bfy=j;^{rzQjGI4$L2q>_)Zt-DA%FB<6|3I>>k01zAg{?H5$`G<4N-681|s-
zGCb*s<>$6ZQm30<wA*|u7)G2xb$2Pg@A6YTVk|{>%sB>i*YtQ%`8d*a@15}c_19pl
z{et*89OEA5gG7Fe$p4|}yyK~U-!M)nNw!pEWt36&I?w${T1Z1Gl1inh2&F<Qn`|N_
zWR*xuDxBwj$ZBXOX-7-jSG$bg=l7R?yk4Bw=bZE0&vjqd`y#48>^Qgb%Q$4J#BG&W
z0g)r#L9g-*?)0U<e7<ZKH+XO@7xgiLK53spF73SuA(n&SKUIO5r^eGH)zx?-P7?B4
z>Zq65Pjqq~$6bw>gFn6v!S<kg^xsQ+rubcqGuB&A?>KxUhSwLfHDmcZ$j{^0P$|u6
zom7AwR>qu*iW;`}#>2Mzb(r{mB=<1TnyFMKK!|4+Tur}+1`QXf)xFJZ%?cOHv@+)c
z&W6G`{ux@zcF~`2+`(MQmFh*0Vh<K)fNY)=k%%+pT0i;2f>|+Ocl?<^C2Ktym9d_y
zKc!Lsv3&wg59H{JOBP)5PhC{XJ4L2c=do{tLP&MLk0vo{+$`Z5c+#i~2TjxYjOY$*
z?fgnD0`G$Q*F9+Q$QZ9^DZ}LOk<fOvjfDHUQnk#_^xHXcK}}f!%>CqBSA5h7{bQ9d
z-gplN)aQ{r%~4P=+5pP*v>;lti}nbv5vwW5!d)vjL9&+*DZUhizukhs+q0NXK43^)
z&Bo%PUzud#qC9Ym=tZUQ#r21tmC`)tT%7)N9cFe_Q{z3Q80oZ;iVwDti)%)J->>tS
zet9hn&$%YhzZi>_f3^!pPfWm*=ak`eZWTQfEsC923h3@tmZ+)rn2<SUL^|1n4)b%w
z{p5W(7<HXQ&l~}z%O+x<R7bu3r``0*+9n8j_fPO%?K-s+GllD+K@g*0kDgcO;h_%-
zye1U}j{eU`$K-Qlq_U@A<&kT6(SHK$-=&2j*^h;`pA@liG4C_HSwdcleipV=PR7^c
z%ZSTb8+6>_MIvpKNlB$Vil2=I*^zUw@|qIfs$T}T>m#vzWEl2VTLJeg8Vhf^;GtWh
z<i^VaqO?U4#;=S;@A#L(k=u=-cGouYL3}G}&DnudT#t}Dec#Al%jIYv#(PA@3={97
za^aHk7T|JsJf*vQXyV>+bhdL88R0hrBegEq2MspR#b5$O-Nr(lc5&J$|F*vI@--T{
zNt)Pv-;Od*g3&MDk9Mq70KJ9tQ8PPUaQsa_b#z}27H;9NH!%v9iibddLKMV4iKJ~_
zEwt>4I-R&76dTpj>MicI(L3IKWUE^++V9~r6i!xP|0Dt;eKlcMbqHqMT@2B~;rJsg
ziW_I;CKMJH;d1p4*w;}4u}gAc@k?=*Yh8jOD`nU%X=`pqOFn-3bqJ5i7%^Fo8SG%M
zHOsk~gU=m@aCCY(R<^Wa_VFC3{vpi;{3psL8~((W+A#DRTu$bmB3NbihWPLBfQm9-
z^zbvLg=tFQ5v2lhB_D)kH+i4eEd?4Y8;b+{YZIB~SO0py5!6l!hgeZhw0UAh8V%P&
zPt$bJ+4zNgs{Twx`1|8eD_&O?eL)OEtI3J#DA4$#!uE^^rUm8#@+|Tgs6XCC9)Dg8
z2BH4oQB;B#HKgH}&ks^7XTX_eiF2Q2E(=yEchU>*EAhc4mHP94CvXc_wUPk-WYoz}
z;6`~I#px3+;^Fx-c&&9aSa`+LbpE?Hd-DTY^lvqcXt+yF=I#djaxZ~)whfH8(ZxG0
zEkeUv+PHUk0zN*agPPn9I$KJM7A0E>x=tI>L!GCIkFx<>uvEqyRW0PDAeTgLF(yVQ
zKa=3xDdg9G>R>r~J)ai}f%qHh7}>Z@;4gZGsuVb*f{PTblQAUqcV18}O?kml$7r0k
zP8ToTxP+ame8wU(MUXKoiZ0rniLsv^kRtacx_<vS`gOHETzozntafJ#Y^vljzGD?a
zw*yP#cS>344EA&G(Ad4nEO?I<H*L%~R8V>YgQ7BUUc85_e7*uk*GO}MjjM(B*HgHR
z^X2sxW(}z9J(3GwdY5jL^CPRY-{FGVHdx*=hLc&T#B0vcxVl20YcR0^Va+nEy^_un
z!&;&8ODZn*oy4};et}?&;f@7}&<*PjF!Rns%=Fb8EKs-Nra1250#*1Oj%qGvxN<dT
z*_Z`IX36a9L3uK5)^={e#fi*QX9{=gdmJoV{RC8^_n?HK52yR=5Bns)g8pb$Cshib
zOyPqx^zLuPbA#8}m>6Yl|4Lbu+Fj1Zy4`}qco=usmSeV|1beRP$Qi6$M%)gh;gMyM
zEb_<+?s%RP_fjQ^<+UeLO_#&O`)Z&docH~n$QT0-_md&$s2GUM>9?!VIYka0DWdf)
zBVkWvGG+*cWZ>&exc|`ugO&^kCOvjWd$F^^z4HuUKzb`p**q2If8jj<3%iIT&raAp
zW;BX_cSflt&rp4%2JBlC54}A+S76iz!3ixPT!|~8CqH?^_bDGit9K1f7-<cwKl#Gn
zo;1SQ@q2+SfRQ)5=(SdkcD@Nl*F--O;G2(AUiagul)Jp{o`~~HbueRbBpY2Gg30r9
z;Mu=vbnUx8uzg+#y-^p=Ub&A2TiMSzaq3Hao)=wvDaH`A19q^(?|%^dBgT10+hAd>
z8soYWFzt&SG`Z@+{%Q%D(UT6sraE*e+zW!*X#Q^<LElTasm^*+vVW%`Q~G)f5~qy<
zzs>cqch_EQik->L@Jm9nC;;_#Kd7Izh5x+#`z8F8SWS1VwZ??wr-W6C+Nd3H5U-s(
zj~`XdaYLslTfeVGFv%NmiM0taWqb_h#$WR9up8?hKLh5J9)@p=JXm<#EY>!Z3R0c-
z@lwhVHa%#<quUjU@xn-!w{IV)j44E&4+F5Hpq);eZUmwU_0+;B0*7|FvSoZ0^1rV#
zT&IyTd*zi2<|)za{=Or)(sdJi@JS5)UEYJ=7F}+=b0l*vK8`<1+?iPLa<*~&Ahq6p
zi5>|_V+THGvA^<SO#Qt!o7XQ!pFU*hDHua^tCYAIoheLHeiEnM(1PD1THx<{W#+v8
zC;5D82?`0}-b!k7amxEpg1NAj$?Mpa)AF3+zDTO$pC$}WsG_6pS<?S<5!+41vejO{
zA$L_gywj6s=k@JaZT|=^*y}zx-`++<3UA?TQA2ii>2uI8DPq4*yRa*kr*Jsll*>B#
z5L{ebxWUEi$%h5`(9G`$PKelX?ryrA|0+!`cgJSjuql>%yIPwI%bms@5Pd@Btv(2)
zrE_?HlmwoOP8HVX26M$hQ@F=7mN3`#J+$3O2uA~|Nyf`{AknuLJ}f<o&rNQF@8(H@
zZk-b7ntO+utnLQ)EFE@FtqLyfGvW5GECIpNTrA_?$J)*N;q-<8GT8MP6)o0-*ESVa
zR8j}~{x;DqEuz@FelhjwNn+5wi#o15fpsMkEJe}=J`C-|4&@l3SdF1=w`2mfJ+=^B
zPvoFv-gH!UIzT67k41s20%SjzV{^~HM6&EUdY5QXgRyyd_=7UHPW}tN*=j=U1+9V+
zA<B%}hx2;5E_M~hu{5oR;G9x~mxoHpiy;ZFVbcyssQZF%)yHtfokxWMy*eb`L!DDl
zR{?Taf-6ak$49(BS!w%HGQl^EyJfwBTo2j;itmj%vlk~J{Pj9?cWr|1i!*U{2cOZs
zpwEd9w9|~+OV}QjI$@$t6g^P>5%pGxa-A0(!TRSp8frX&Md$_Ns*WN-L-Rz)t^5e)
z{ZUZd;e!Krd4^4p678APj*1ue(0W5T7Mqlc;g<2j$JQ}qz)BroxrD$r>$NmAW;On`
zu_nVMSE<T+K08C|A!bD+L`7(l^Jybs+O%qbOf}*^_X2F+cLsXKAHtr$kr-srOUaBH
z@N*0&3uh_O*U~w->LUS_k;mYIYBO#7b%m~7a2(5AVu`z*w?Oln1p1CWjTQ|ZLZdhK
z5R$fy9{6^UtbJW5xCM`C*UR>L>@2T;Rx*`bJz|Q-i=-j>x(q%DYM|W`=jqJRb~q!X
znCvk2Cg-{=K&rqVrPd5n!xf$Mpmqjc>+_=1zmCEGGDc&CaSnV+^uY&Li%}!r0cVK&
zLWa&}JQ~$R9B;0Iy46!?&)9WveO)D-oK%7b|D_7D%T~eeV1M*(E9UR<5vaVO2*pzi
zanrXEc=fCpt+>YTbxZ@`&e;rNdBzt>)Hb|ok%;-}H%Q~bWSa7Q5@lH~u+~HkMgHc|
z5ba(X`c@vjFU%Bn1ihpp-p=%8raf70@|`BVbmPBY_n=E@7@WPX4ObgXQ9ZAj%+%eF
zvlNp-y+szECQpHZ8;LmWPO$B}k#VT9SP!nTEE@MNuU_k_HF<eS6lcvmYqRWE2>o3p
zje@D7IHUXsUX~jL6Wi*j^XV~Aq<BSGB*xztiz<cNYq#Qf+liR^r3h9gHbdy~1emUt
zAPnp(q`#+s5UiF}gMXRhVeZ}|xM-ucpwV|R{t%gi;{2@6%WIG!^-U;x$`^;O_0j%G
z3h>vx1C<!hWEwMy_tWv7&x2hw?N==KT)7oTjaEf*ot@~W_=Ki7r{jidai$+O5lL4B
z9rJuU6Ca28X^%bK4R^6Q;TlaaX`yNJFTjqsr!eN_B^<FV2D4k+h?IN=DtE2HT`2}!
zo@peWUj3LXoiBrL7Q~@NYaAGyUWWtk;?Uk>jbL`S5pCJpPPTbJqAl-KLC)I~<&OU)
zKc*z1TuCx{F6n@a*N4+09d|+Fs!IZ&2X;{PEuQvPjE9s-x){-~0ynZ_Vfk|lOg-_B
z&|`TZQv47)^aaABvpcC<S{W?V<_Fa62Ke_G?=ub17tT-D;UwOl$E8!B(QADZS%Ozt
z{S3o?T4h@<6fKCR13T~1#jm2F)c6GvS;^~3UN-E*shwm@Hlvc3vEZBFizQ#@LTbTM
z966)|SKjT#Uawr@uv`!00u=F1QH?NiAReph^!V5Cl!(OV3F!Na^le=MEiseADJ=)c
z(j#uPk!%y1WC|c}vNRe|clzCMCrEd`B3qOC>96;;I6ZaPZm-lq+f_YDXcjn^D0a@q
z_&h~&<B%)4+rev&=K1u3YXGkCiX)9v-r;S%O|<U)R$Tp=(S{0ZxNO;pYAxDC_iiuM
z?7l)_%|(HjzmQB6xC`@7#M5u1<LJ9RGqC+yCY;}=g|jTKV~9~F=)Rwi6*gO7MHUsh
zH>g0jd>ZyT3F*$8-?07o2)N5@0J`5lV#O~f^4Pu&wmehk&mxBYqmYyuH-emqE|`3j
zf!)U|vH46rot{$1mNgs~j*uEaLEtWkui(g5PhX+-)@N``w;Q^LCU9HsXRuCDV|G#f
zB6|`<IlH-ELDA(4)R>>g&3}}b(ib^y-%dx=yQjf2@+Wgr8jG0*M!;Q_EnNGWFJQp?
ztAliBbNk){z8J8@LhTJG=5q}uy#0&Icz@;GRi*4tXe4_pmJU_U4{^bgYhdbC0cPie
z*tQi(T=#1+CgR^lX77;X>K9Q~XYd7SXEVI`cA7PXy{9rV$!Ox8MxZAEU4%zTV#7b7
z_Fz1?oEt|Mr*A}$CSNr2-VU1Yzfs5Jt#Id;9As2HA)J9McHNyu9{-7@@2<sQTI+IH
z*=~(~LCdJ4cO}Wlv>}bxb;*>h0^!iTGt}Q?1U_0{2D_OLuD?DNdtZ9d_sU<vd4dfL
zFRX-UnF;7#&`QUBUqYfT{v_!=ThVp!18sj9icgQ-rz?_uA*P`c<t=~HgU^OhuErU9
zJPwcx+v)|wug;;C{T}!b_W-89;W)4^fXGGnz)Saz;L^zvSf;MQMwp(Xhr&hi<>_Qh
zO#g${rqjv0#nHI@;S9ECTRB;?_5xALQezh^_Tsk<;k;G9h_!wFfH(Z7<JOz^=`58v
zxTrRb35(Lnr7^kqq9UE@LNdKu<Vga*-yrupfpocNl6%t|V0&mE3?5Gclb&AkZc83X
zZ;OY?7xmFQ+?=)OPsal%!-%3+Bs9jj^1hH}J`cBu`&NIJ&oFcfrk>s}<envwCh4(s
z+X{7VZsr9RT6`Dfmfxd2laG>`VOKU;|2GDv&c=&@tt9Qj71-nVgr+T0rT-Nz<+*W+
zZ0d1UGGsK8b@Xh8PYY|%B%jYrj`D&n(%Rh0nq)k<Tb8?bN11#*XUrXmwBlM%ujYJD
z-GH5si?O!;8~R)&Y^3P{Hvay8`XGNUZrSI@PF-FJ1B&Nx!p&0Ha&!!L#^ZwEgq|yx
zlP$x%9J5*Z5`VgT+C(h8_Y7AViQxMe#~>_pHb>ewaE@ElxPuOEVB@92ofwjXP4Y|G
z^Z6>A!R9&;|6UJEZ>F=VR#i@5Gm@Ev9b|U9OYocG8B{o)LpMwb63DA+a~{UN=$*NN
z9W6eGav!9))Qs75dbT_q7yV6UEn3M<w@N})n-}nP{CM_i(F|a1o;=I19UKRhxggsW
zFnrz*zBXwy^=lkEuqp)?o+<#@-e<5gD4rSCr88H-6gDz8jNRH9!}9t=*o-9yh~HX2
zJaS&Z*1Q(Qtob!U+SUe-^Hfl2pEJE0p9ue<HA?ioz?WwtaJtDjF2`HIbv5$7<K@5M
zM@<?0?rOoAV-)bO2f@yb5dsq>6;@X$LPs6vee7+Qq3F#4JfNb9<@knNA;z4ER4|wa
z-=PvecH$ygeGE~NM3=${Jfl@0nDuTDMjxss-(=nGs;!mLO>{i_T`DF}?h?U@{}Rzu
z(~<Od4Z*(N<0yZrfpE>o$ic88SaIYO9ElHySrg<*%DPver<KICHuL<@F9*<OdIjzo
zxl!O6umcJ>eK_^>9%#KyrrYKpK-(K;Q25!1?7N;zza+&0tBqv8Mij%Az}2u%_8b^V
zSHMt>E2gaAc^k`GVN;<sM6XO|DdGD-BTbntk(<JHY_{bVJ}M;-luPl&18r9H_Z`TD
z$a61;%><L+Ci*^jiw>o+kf=J3$vsTLbiLWE@!%9t{aGnkbMq}Y^7`cD539J=k}f!F
zcnVG!*U~%5o1wO*fO_qf1pBA}NV%U%YA46i+W0E+*H%c|C#s;`h)7@$Khf$-QBb7n
zB}n-ki(TovKyr#7K$9O9ZITgAQEnm88BSpLX)<KrRE0adj#xkWH&r~jAL^16K)xsg
zZ(s=8oH2t81$j`tEJsqE%hB4w4#S$$U|F0KeWe#H{5lvzWb2pXzlbNqvwa~P{k>mM
zOCF*1(M$Amvm`t%xhvEj-i(1^4EL>jg{z&M>xB-6I4)2PR*bwve`cDIbqi8JYAwM}
zVuy&M<5^f<5Jiv8Jx#q7Ps7qL2dKf;U2w{h*EbAO$(qa}P(9^M?>^Wjyg&38jI~8@
zZqGaFv~we<AF+huogQd>v4VC;<<UOJ$;7}!9-1ztQ0KMY_-XeL6>JYeA9)d~^m`;Y
z51L{pNP>dLSjfMdNtSMkr2bqIzPut&$Is^ZEFY(X-uEL^r&142)JKq@=Vz&O>Uq+2
z>n8699IT&mIEjWfucCKdi%8B2XDAcT!;8n&Xn#=|naF$1_P53gOFuZlwXg~b9WycD
zkU4&fFvs)eOCdme8IEZ^N_S~pqR-{FqZH2@ZGHQPL~LAxf>EZZJZln4>$}2_ms+SD
z@(wHJ%VGcHGdzPK34%+aq5VQWl`U|9k1o=Jz9UUwUKEFaJ166Qog2c$H5zok&qnm|
zN`YZ}Wze12X%~G+6@!ujF>CG-G+*;k=%}TI<(JB_FYz~Mw%sJX4`k?f=N;6ddl^nw
zUV^HF8!_{$8ybb%;Y(|6p?~pr^4L6y7RDN2mD&v=edQ{NoMT=exm=&@T{=u7)wz0F
zvIxBb15sr0q}q|!cnwCxm!I2ZQ8P+LC})?9c0Z-C<@{~>PkKIPY}iV?mN<dP)n_&$
zIk)Q$&HX7HF<J%ux_M4gnkXfAmqW+j4cH|#AT^)2f&Wk&jmVIL*Evme>$2s%&Zb6Q
zNXcQ4Zxa1}m_sFn9T0hC6q^@uiCjGA$W7}=z>fyeT<Ogxu=4F6K_5RCYt+k=t7~|c
zT51*a?C!&;!@y^UZ)4G8Q#h|J!O~8KfUf#CY}lGdbJy*_*M6ql`P~~aVs<ZtwB=*-
z&ptAcX$igK1W0|=X|0wai9V+SuUBjZpDZyvS!#$X^AoV~Td`ed?=I}~pT=iV9ALa*
zI{9ll7kn>>qKoEbx~^|Oc>iVs?@wn1;zJ@Zz~4Q4<?fQ%<;D<cxYKUyTUjt#>kZ;B
zf06i0j2?23gYh9%WUhEJBrQ$Aq6tkz(>GplHP05$UMnRvA`7XJbvVooQ-w1JN21GB
z7g{rp=j|B8)2>Npd0nHKmU#t1DAmIv_kSeTVkSmB%tC`j8==m;4CD*!Sg6lWc=NIm
ze8v~xaz#EP*)a{2g<G-j%Ny9*GoKz)bg7p+vI@^QPlqj<?U*8R05q<bBiEq846U5m
zi46hp;^!&Sa3BcOM=7z&vSVb|FLCxVbR+ZN&l9JXdbT>c7G4PZaRaHqD+TK8p<pH?
z#~nla4dTo!dKWj<X$og{$e(?<vWz=CaV8N9z6am#c%kIvfAIN@Hy5&fKR4SM$-mJa
zuwm3jYWH4&4ZN8R({{e*RNdxc$Mwx@yj3gApCQ4H9!h}wHGb^)=H)E4Lzx`Uy#u=K
z{QS|u=NV>&;9s9M%$@U)&WK%u3pPt|N`LmVtl2tDWa(VI_Tme4^xVfM&CzwkXH`l3
zw@9is)`8CHHODqXM~KN9g|Hxz*A2HoON#-Oem9=xyoyD~*g&4KBu`@Nq^ObKV>06A
zb!z`~n24BkT331T4CL4VOnH`waz}Z_RYsq%V4Wt`*e!y$&pYX!g$E%^XE~m*OcmUZ
z>>+16HsfM-CESmP&~17JRE6Hbv+gU1#I7lHaa<D4GcSgR16f2uazE<X$Wza8O;mSE
zHJ1HQXL9SB(fZ$QyftM#zS~}B>oX}E;^p~_XamFR>ms<H+M^(ITM}$pbC0Ag6p(TA
zXR{+iiFi@{y3khEi5mG+^7G7j*n9sFjvSo864utE^ieOi`;I0Q+?)(SuU11+^CDvR
z+>kBNUkz=%rv7fs5ttaQfNX;t6X`A@iMqbfETzFoJvs!kE1lWmzl(5Px*=LGm;jB5
z6sL)#qSLrv<VH#nyt<{tO&eH86H?P)+;)z=_`Du;y63@!+;Lp2`F2oIpUUdCY)37p
zMW{4^(#qUxD1EaQb<?}(u;v&VlX_41x&1wCE0L$wC%mBTR}h9bEn**QwYX)cgYaC9
zI=j>K5DVU1!QX{znR=@S?A#`Ua~ij>W%A`{HEA+zovX(k<N4O1a|gks@)XOddqddk
zL~hx$$Fy#qE$5>!L=1l0GpCDh@Zot^xaDHTQZ#}{v+He?^%G?@BpkST_b}=EXN<5l
zgAy+tFmPVY4cvMJ!gg`);M+UMJ*Dt^<0ZJK&`P3I7Q&h@yfQkoRXDZCl#8^eqjO#3
z!NjGDo3VZbO8!@e4OMHw_TMUQ&g1`3v*r|B3=%<Qn`v-j{d#yUF2+`LK8M6@Kd`Ya
z25znHzza2YY`yw8W}EX2H~qOswjDCUe(`c5xlod83iM?jSEs<6@@?R1QU(_CV_4#y
zIQF5f3(CIy!^!TeK{+mqGqsgw+Lu+?DEV8c_d9{9Egj9Iul`2jIS3~M?hAVra){|;
ze~in>!6dKKm~weO_wUy`$TuoLX^}K0_3Iip!q0?D_PGrS`_7Tf<nQoeM+2zdXy81{
z%~{NnMEF-x#_7*@<bS7%JbX*oJGlukIaQZ?yGp0t=$$)!N-DwK4nIk==WK54@GYEv
zB!Ofns<M~5*<^nEarn~JC~T^pz=U2!Ov<%@<Tvca+G3mfHSadD)SWpvv7@7Iq|YV#
z>B@evzmf{EzdgB>TPJaJh%<4WQ2<qv2#2qvQt_i&_$-PEfAs7iDovO14Dabh4Lz>!
z!zi{oW-O;zEzNaRbmEZaL@tQWAv?kX&aomBC#OdVr8gbNl_JZbaN;wPnf3@BwSt*s
z(l2tF&kLE~+`}rqXA0!f4B@s#Jq~{!qI)hXv&OTxps6sNxfm`aQMXg5(*{qNe@C7>
zywXl^!t^zYNn0?P;Kx|?PaluhJQkj<xrfHQr)>K6LsaVfJ5=9!TG)IqiY=@=%H7oN
zh2<)r!M|t_9;J6<+M1_0d<Xe`TOl?+Xte9OO~B>BKhgtNsY^jH{@qnVErQ+o?5;Z$
zDg7XF1C~@o+S~R{#U{+yu>@*GGUy1^G4S$hhahOQ36$p<pvgE}@>(I1o-N(Y=lW{|
z9(w-p(swWEEYZcSCldvys!!3tOjFqS-$$w+mx3xW;;hWk9)h;!fz94Xva4?n$|&Cy
z4vd*BSa$0%*_!rIaDJ)-40xW0j$So*@o5gq-hWEBPf~%KnzzZj(hD>s#gSP5NCU5@
z(I8UjM^^i)V%LKoMDG``-!-fvBYY-9dU+uBUe6~dB#%>z)5^TI7mV}HiJ?v8ZuB5&
z<U;ruoUN}8_Cw`%{xaLBBNq<89Rk6A`vzg-e_I58)kbvj>jYt<XdrG5y+*4{V~M2t
z2y{FfM6}!Od46J--7V2`bob?1*hQC+gVVm!(IW(?C7FOh*K0{+SQJrfj3b-l*5Q|C
zWyly;PD8B){OsCHmR|w1*S}p~DAYom8&habdoTu{a;CGDHbKEfSJb*zC*0y0Nj^l9
zddIwY96VG-H&i9#IuU8IJ8`q%n^=ln&TteR-Wi33O7(c_fr8zn%aTx;zYDViiBMlN
z3NNci!K>-xu+{bku?)+`jN1!IpwBftDsvj<xF_MJ715w<>4#;9X2G?YPH;kxe>Rm%
zp)>Oj3F!GPm^iZl1(p_AY$negN}2`R&936;<1SzyG9LC@U#5Yg%6QG#9|B|);eKVT
z!0NL<n!*)3d;3_j@sA96FVGP@6$$5m9wm)h<LNl-QSdUkRN&GrhiY{t#NKNm>|79w
zFGIqGN$n$%?hvQFZEu8$@@izf`vASZG6X%_M?lx~Fi3LjrcZmW6aSO;*dO+RdhL^^
zUG~v%Ftv$Z^$Nl+W8S+lYZeT|$bopz7HY53LW}fNu-d1RT5NCz69a3yZDIz!SSAH2
zBc>y!iD6%(E}hVAgMVFl-*2iCOswU=`=b)gUv!;hEdbQdy$v>3GDt{cHfC*E3=b#%
z#B=YbfmOi?*nRChbYIJdjnVIfvj1ebXT#E*WXxCmSCJ39%A2w0HL%k1r${zga}qNf
zgkL`J|Ih3;Jd=75CQLNs?%SUQt&pF>r#EkqhL$zxL4C0?Yzag}4Uw+o>%@Z#hT5@q
zm^k}}P-od@j2uo81X-HFx~4eXT^ol3>OtUd&`p=sgkVsa1r)j*rH3-&`8>QCguD-e
z2;~a$_|bguFx&+q6(fn}9aTY%TR2{I7==sjSc2cc46;Hc4$rBnVeJ|b;yoq~rj%Nv
z%UVOY+BXBMU+;&I)xq@0^}V<+E*hU!{i9W**HTR_p1*Fa02gzm;MON+3|gKo@Vqw}
zLo1KK&*nVT(yv6@_GyCCiN{beF%$0Gx&n2T<G3wLuff{N0r+-c7>XJXfsNWe)?{CZ
z3wqwr+^^bL;O&Pt4gs9q?LJ%?P>COOzLUVxpLBOtKS_PIgyV85!TYo{6Z^KGIXo6;
z68|m3g?|Z<;8Cn8JrNYYl%Q;vE4CKr;kPjdAnQURSiawjeY+Vf9lS26;h&HEtFdVC
z%Y*rNxe<YvEO*so68-LP!Bpg}xKo}-xQs_T*vqCIftPVT&i%wQm6yJQt$`Yx)Ak>N
z>1Dq_Ip2yKRbj!5UcZA~NBimPrRLngS_h&b&F5_I9^~qG)S;Z5H!dq%MWmjXg2=-S
zF#pd+I;&>@w~SC@i_|!_>)JJPu{jYp+a~bm^9K0cfi(N<I(qBg2GF-K#`=iykl{KN
zYt|+ZkIhR+;n6C3cGPC*QhP2uEAf%|rwd6{@i$WYVkyY|O`&oI(=pWB4ZZ(H!;J-Z
zh?&Q6nv&Q+^fILJ((~JP4=U0jgWnauO3+80Lk-0G%|W^#s+>6U9U4~;{RcN!jie6q
zO>tUHDt!Kt50_DgmPe++heUJc@ApBlVn~);NU5dY#x~<6epVGMe@w!)rHJ#0J78u#
z7bH*aXCME5z-xOd_-+qpw($Kb&TqqMShk{?=3U@fL{7KS)~Qc$S6_#-I`IHbH*Upo
zWzVtmXdMoR^V#1PTkdD~0B8@$u$k+|vVgwNc<}|#47agiN!ML4;z%c`SFPb3hv%W^
z&n!XihXT;+*aZP4<2bj7Pc%YL8#EN0u|<{V<?onnw``~rl=3ZT@MS6J_WuMvA*+NR
z#|{zih34#)h6)>18i9VZ#kpLa9iY^94*I0_@LXbTRyAM1$}?K&=gK@X`_ncwYknb+
z-D<}fgjEZ6S3km8&CBp*gAS)~<}N5)5aoOyE3wh*yqM)`cP4rujMFl-;Uw}zxkTq)
z{I#c>Xy~dj4^4`j{-vXu*JL<tc%Q-(KQ^Ui3!k0ciW@Y9cz&TdXQ-k9YZK<-sn-_V
z>1i^y(yeaHfM@)Fkq%)PHjPX1nT0i3$<!yH9lz*wa~o_6xeK=g@wsFs-ux|t>ZAGG
z;?hIxMdoSlfZBR)o&P2r7o<V{{Zs)j$7<Z8Dg!f?1(4;T8`vwJZLYIfj$OFA1dsRh
zLIcmaUSeSmBP)yHYn~Qc&1ZEM6({4ona$AkXdcTCxq@5s-QlcOg>d`>Wfr>IgdO%$
z#BO;Drl9p2HI|O&w&WP&qktGrHuVL1Js-;zua<!~SEgca_IVnRkP4HNb-BK59hekc
z1pUJj-21mbAbIp*xL$u<c&+&WT@Y7<19At^p;Cjh*>oM}&yGU}$GM!>ha|YV*9$GX
zUXX#)65N^3`YdX%Bq#pFf-K>)!^_gsak5(v3AOXUY2gN-_eP4y9T<(-7m}c(!GsxH
zlxO>jo!M}8rjYAVX4PZ7-~<%IzC3p>M)iwusvin`J{_klXbwJEeGe`^Z~_IbeVjp}
zJiQQR%N2TfGtXN(5c;c*Ufm1We7b-;FAQJ;_1nBJUWq&9TT6<2y6KHKE^Kl3S$115
z3%}*u1;_&~#9M=T_GxlgqQkLE!G*i09z)i+K19t8>Vi+b9-Kz*db(hOHv4%$mFMT*
z<U4PcgT09k`$JT?ACG0Z#!!A&Jaaa3CVL^)T8S(BCBv;>eiGIW)d&(QM7e-AYbG^n
zAwBZ55&flp<8Fy@+^a|OOeEzVHvALk{Fgd$>rOqOO&`QSt8W=^u1Eusmt7d+Gl7%h
z6p6gwO|sZ(1it!n5&nyq4t3uYIJx?949-krx9!~on+EPe&k{#4NT0xzY%UTBiwaEX
z9}D{(cfqZ`qqP6kP84`#(XGqfV3lV%1iY@Hhm&}0o1z2^cnwkW_uo+`<eA{y6>WUJ
zViN3JS1j1uAVLGZKhQVDlBi_;9G?1b$1|=2bj$BaXi$+ybyi%YqJ;<W#g`mhVsJo^
z$Fsj)-MvkEA05XL_6&7K7}2<oyGg<{BYZkCiJtP6r7I5WqtlB|^p5+mAl_xVpm5P4
zp<e$&7<zk?j=v*|`IC(VC!J>!HqQ!kM(>8fX-Re2t4Bldcnv(a<ta&cn-2yXk}+_@
zJRIe45HBw}NT)R;86mqJ4c%6Q+QdfkMR7BkQC2}Fx!1ugA9J#8gdH(7TL#_|`ysXd
zCwb7O0Rt}xG0M!ur@52pUs-wRQg$MJi}|@HyOLIoUjdEY>H^0pgY+KRh0%f;=#w4_
z->jUm@AXJ{`RcrI`Bp<HG)x4gQXR-hE)kynR6_1Wn?TT|$E4=GIeL2V-oB4t>`Wr|
zlD)bsG1ev=%r37-E-ePc^ygvcB_{|GGoZEpRkXiU9ZpDx(L}jqbhwcKT@Ovj?~w<I
zpPeLlsXX90V-|dmjS*P9yGz2Io>APTi+)l9eA`q@8)Nr?!7(#PjCwDOEeN2E=90pU
z#ox&0h^6p6oX>bhC}Q0}2p;TcqMg4Rs7G8d@m8CNS!*r9F+GTg2WZ1fvsj{*WDGh9
z{&-e32<)VF@sLX>)@r?@jn?B~shbZ(E?7&l_Gp3TU@oz$bs@W>_>7x$E9Nbn%vN%I
z_ICMZ98Kjo(;g+xN@F`#-K)aHnWJe-X$O~(eTrm^t;GJ6ZcLvVN%wn9=GOJ^g2ER!
z!M5!kqURg>AkdSUpIOZ%j>&~W8&@nEZB9h4jTWBHu|`MzAkuPU7udA#0Ke9)&?3`E
zrj1X4BE#$C_snp7v&$Ht4$K7+Ro=^bTov*MIgoN3rYF|%*`8nv&~E3MB=ZgmvfFtM
zsdhHmv1c+`>OLV+&5fwG_z2ymxdz`gXQ8XiU7BcfoIY>rhu2qQ>~iKF!y9v&>D-zE
zIPvWTS((Xm-UfEyuM{D*Jold7T_6hKA~Nit8^twGj*#tzvRLxw3%pe>#-7_X@Ol)V
zVV)p^-t(P=8?MFj+VEH``fnvnd>{#$o=5rY#s}*7pBr(!x|A3x?<Y3*;)v3IAJBZ3
zE{t^1p&1P`i1z|_y6x#o6zR>fIUbro$Hzqw5>!t#AN~{cJ$p$<9yAaPol^%DTV?p-
z+)h6FPXXZx6Y{AzTWI7X$F%>b!_)W6aFId)Ei&^bw`$VJ`9+O_e$$77X@*;1b3r0i
znK%i~r_O;|UZ-VRne_YeDAJ&-M_Thfk@Xq{RBDe4kiz4b_^1&FBASU+mIAk%_gR|u
zNpK%ZY&nk$(m3D!HuOK1=j!#$IrIKHl>In@J5+It(maZu4ue!mbrEJ=@W<5>c6@p-
zgheSju?f54(Xu}TRGBZmeq$K~<-CMRx>nqt7knRA*d}bcZ^{-dF5(V$Jc6+^wve}@
z#JRvzN5K4C0=cjD4))$w;2w7$<Gr967~VCBbDn;fHV3u}ovsG4(QcD5SL!VZa(9Q%
zpVq<~-&gReLy<dLWQN`qleq^s*Kl(exv}rX6*%}Ag}c5U=UMgAf~v_+F+bFj?KO{P
zch4rV@Sz^Gt+y5gue3nvs1)`qe6-LGwVCTt5!ULX$Z}VTv&wlJxHd3l8$gQ$7V_ux
z2phPOxf++`exn(KJ0Na<5}rubfS~s2B%)>?yxs#;c8Mf5DR@JXt$}c)=?sW;*-!e{
zEP%xi9@BAmy>Q-qeY}4;9A6KgrX8}+$svgtBL1M6y6kd-Bbm}5pLd1t)er}5r3^G^
z+lGUgsrWYU2z}tw2qe0QbSx~P!Qb12y19+yl@?(C@moUI|0MB5SSp0MZGnweU&$Am
z3CE7*)1Jh%`V(41G;UKlDrZ#EHJe`3%TvVIy59}Z<#!lvZQ|L1eD6xew*BOfzatmA
zxQrstybf7WNuuXyLj3^)7B9a7(=<e=O80vrCc6e)%N}5V&RDi6NSm4;yoEEnnglZj
z%VEctSa7oO7y6h@h3y?jq2OEuTs-uNWS{2q;Wn1guA{}~b7f?k^mp9pb{j6CE~^T;
zPPTWa<1O_f*dpeGK8O5h%Y-#T?$jx8_B+G(WDL=V-3-!pRD<$wE#`4o4t=&b(1leg
zuxMloot3cyA}T$Y)e0qWkkB9_%JoUXPbJQ&tOaLZeF*6f3^<7&FCh4t6kNEp2ac7-
zQ`d-S`XpHg1AAQ|SHg+~rHXNXCYkWrs~QkE{X)OhOSqqZ%F$(?1KqgwI6e1@*F>)i
zxIH6F*wXZmbjv&u*!ku$Y~p>!M<+Y6E4TQrKeOX}ey$OJj;_GOmJ}wo#+c1$&%<{M
z?&FyEJhQuV3v7=V#WTu^>Eqf5ux*AV+gLr5d!kc<D?VwElA0)3u}L3q>hb<>i$*x(
z`9k<OBpRx!a&V5!TVkzo00)yF()e>0tc%yjm+!8?@5XVQM};JJZQf2+6kLR=bGPBs
zFlDw}dlQpcXUt9aUMf60sLSU0#!{05Q&GxHmNP1?pmISeVE<w-%X@9W<gKMpHpZG0
zTh%WJ@zRCIIbJZcG?2-p?#D+-N4emgPGIBy1R7^PCrdt9bK>h%@UE{9^L%6kdFpMD
z{w^E!qA294`~rKIE;y_z3StgLkZ@27&kkyG?=Bsr{6K|cw1*+*%U*JN!8Z2!vj{E-
znv9<Up5t=+OLZB}@u+Ej4;ChTfTv<+n7N{mN^YwHtxG3_l^Lh`&bTG)lj{-Tna3Qt
zQu!WVO&o%er$%v?)+S@nQ#~%Fvl72(ZziWSUGb^UJUsum0_Uinfq{8a-1w1-R3>JH
zVDz0a+{;``ey{Kv*DU%<`sDO6#%vDPwl|1Vm{&xvgs#9<Wxs_RR#d{w)4#}z_6ejx
z)f25F@1b{A51wyOz?NV2u=2oKRPsxJM>~I0{bRi5<-JJoQ#T)KHk4w_r(?MC<57Bc
zXE_+Z(%{_oy}+j46yZ*+taI$vVh8WsfLv91FnkvVntUJF@AUoT(WNHgsg_c*b(AId
z&}bbSRvRWuT};@>bylEWP=N*4BhhFLV*iXf7=1#EojQCT@{6X@rzysqP1k(7*xw4s
z8XYDkX9rQ=m0`q~*N~%Lger>~1Wqw-G`i3Z4BHjaM|lNm?)^g*3+KYTVpG`P9*yM_
z&Ee0iljzq}%<FRNC@C2WIw|UCdwo1sS7nmk4IT8<?Et~u?rh=vp35{>X9T+R_R&9C
zKq`9A(T}3zgsoFsg`Ok*smkyRfdk*4xAbD4AV;klTgDdB^9D1irN9#Z7>AK$y;*o9
zPZ8xNmD3UYym;f>N1<)Y@A{b|*V4@~&Umyn1yfaKz<)|5sJQhi9xXkKiLE(!a?(CJ
zr!NR*FAJvXGlu9ri)Ol9KM+q0&Oz!wAEygFa0=5GuIki)$=b)s!=C?0Yvdoo*{9>A
zKvm#FHqh7PBdjrEG$Z~pZ88#LtA==PtXVq^eqszH<EdTY(F}NN(nh~4oFPUZ%`p7-
zCM-*lp}oaA^yQ_cf(SdL_6z0-{gj`Pan7+&*ewmxp5fH-M=RAE6obG*73}nuz`g^c
zNUZA=JTW?!#Ks>YX8b%LQ@#VA{&1&PveZ$;O9EYXt%gsX&X9O=8{e0hO(cI!C0}+q
zqw`05Xk9OXblV6FRc|EH(n2!fxdQ4;NT#YW8}LL(I8nd29IGGN!rAN=VZJoa-Mc=I
z-fNVCpjiP#vs+9!-qeVcPW9%yyi~F3Xfx^Bs09b#0MOPI)c)9X;nhk7c&s!D?PK+Y
zPoLeP8?D?yZTodXbk30qzr9Eono^fnTR}R)2s1Xzk!j*Nw7Nu_?}>RTIK+22YCeBR
zQX;m)pQswh6BK}e_idXDy+zV0&6d4NBR>`jATc<X&R)I^>ms&t!6F;+?~fr=HZ6k7
zyuP_aBOSeS=VQ*K`>5S>g6|5<KwCdc$i2Cf(=90G-rulgVYm4l%;iZ?CL_T!5JYf1
zm{4<XDKxqe2kYkVMi~ux(6K2eMTHZgF?6@!s^em`Z?qCht6ZVpBdo}s0U2<)d7o^w
z(FPH-%DQ{&^)UXq3EVL?z~3hy3L|9>+Iea8kX^Y}n4YBqJ95KtT*?x*%+C<RuO;EV
zBujQ<*L_UAX~#tzO9i>|er)`A3EdluAXK!JJPB;WMejAiI?0MX{v>7BpX?yu4Ut%K
z`WBqK`A$GLj^ozsy$c6ESi!oq8ZeS4a3g0GG|FuN8=l`(yJ7?Rez1V#nXN!>ejQZ|
z3`djxv*c`nDJl9D23|QM(5rAN*1L>>$R(Bnr6aQ-G50;4*Srs1_vm2PKSqjqu2o9z
zFzuP6290xu>>|=)=&>8l*l(YS<Adcu>~uFdQ(}n5#Z!oJsE5GzYb80*7DpFv5fwyc
zRnsis`Q7f)DB-P-3k-aqE#@OF4xEECHT)s}l^Xok=^)Mi`nY*^op2yV6uzBp#esu6
z$m!FQgr$>WnZ|4d?oZ?jR=@WtPFt9SuOBYJ=Y!3dou5whi|-+5WWkq~E*!Sa#Y}Ta
zt~>7o%3rul)1H;$p+$Sp<n27NdY&=&?C(MNoUj>UYt2#U(S+_3j?xcvPIIo|0c=&M
z1Bf-e#J2UOoKkWXTmC$k6YD5o?DGR|lbI_ftaV^pI|JE-ai;hpOpOhN+`+w0JmXj}
zpLm^cxBK_<8ovFv4Y{mcOvT_fOU{?!8m^sY555J$yx;@uO|d2$U+xY!KD>Z{@#naa
zfAz@aH<7IIjYBP(EQ2Gi+t@2rY0RBEj>Z1fWOwt;n8MNhBq%tF*%^F>@QUxaX8uiT
zdutZ`c=!}O*>nu%@cd0Jjab&vH52PYc!rZp3{lds#UkZCn$nm{T==`Vi}OihQ>ITs
z`Q1ia`xv+p#^IqjAJX$h5q5oDiE1@~ixrH)<iTkYl06stC6#dvjY78FlxH=pCB0XU
z(<#Prr0z&M-o8<U|3b{+oOK$#=zIrux-Nvxb~lC3*5~2FACu9H=?UEC3h-ffvfb?1
zQkq_>jWf0!B6n{V!{5)f_|rE9^3U^ZjKi%UeEuK3{CEQ1k=Tx+R2Ki(oyUjvEwI%g
zjyb32!stEapp-q14M!H>e;X|X{rQT#cQcmVA^bk3GZQS%d2-{c)0xCbcP7jCx>RrZ
zPV622gFg;=*poULd?h70NZ|8&+AndDIe=oXB76B$oQ<s-5~x3o6aIU$o0a|2<ZR|9
z!QOGj7<#q{t{!jzZOaIJRQ((+R*hj(Ha4KtT~G2(a}%m?$#8O+6VvC+`1Afel`G95
zfusNz<W{2kZfUaqvo`fi_P|%J#kjafmis89!7k7J2Brz(Y+j%r8zcOJgDMX&qLc3_
z5w~V$JLlHV=JQx}tCn-w_J+)qiojs#JgnBOu76~*9-g1-!3pjMpm<Ckf8RKX@r`?l
z=w^;<G%;pj`N4<-qgX(YD|?pGLnQhQxoW-N<VKzq8<{A}td{=;@3k_pGIJy$JR_id
z&j;A}vk)~scd}J6>p3~?QW~roPfAzy;`wh8oI_n2H{r};m@Ix38*?6!as7p``jRNi
zE6aurVd1=fTZ5LjpMg%!4CWMQ##L_i=VCroFxTW9++TheE4S;wU&*6zKq{o(U!fBp
zujOaKZ8<nY^#+}#+>Yvj9XOcLMIZQk;GHdnv~TtS;ppivap%-_y!XS3X-+%F+CE8e
zZq?h_*^$|FQ#Q{So79RrOPX-hyQA!#&R<A<eFblxs)L@nM_Er{Jnp!}aDGlaz_zLQ
z>AEqO=k}0}pDYJc^5gLj?@jNPI!)#DqnVrj9`tc^5FAtPr`0#R@YsW1$et=e&R;49
zg`Q&4t(5@WVI!QG7>lExdO)&DA2=KuhNGX`;PZAx@LWBcZd@CR_on*_v)_HBnI=0}
zYefc`vHK!bj|;{ngJ1Y=r7?mpsteHZ_68I;F2uAc&fKzZU2yAIGOEhe(x^9zFfTKJ
zF0V?({iP>RXXX^zI^r8R7G1?^?NsjmV{`C*m4GrYjId-xC(a%%!T$U99IWhaVAkJ$
z=>4O~By~pPtsNpz(qYd6M-`!)+k9^OwiCGN?r5g_S`j@~o`Sb~Wa!%?9M>bR#aPDy
zUYF-RnX1Dezb+CdD<5N9cF%=y<_<Dyz0kNQAOAhI$E=n{n3;c>9J#8@)$@6m?x}UQ
zyZYr}<>i}bp>`kp9wp&h={2m<lfincV!TZJ1ZJV~EPC^H;>i0S_s<!JQBMQ#z)eNS
z^4y23Ke|Dd<5R)OF)84D-LYP4#SECO-a^{LE1Ca+dpPo~AG<W>J`qz5#O)sAfp=*V
z`a24IU9BN$=UKeFt_1JpO5p4xhv=zm3kel{N;2svh^o*dH@c=_&8t)_wU>vPq4Ch%
zlZRsaM#76SeV7)TOWn%P;J3C)I^%+?P-Th}YCLm+YadS0W??%nS@eY1oQ*;;?@{oo
zb|WOk3{mTImFU=Pj*=0Fq0}aoln)*g6h<z^eX`k5ynBFHH65Xr>6-+9Ru2mc9rEmw
zZFgZ$njYr)j-fj?yAYo*qv6}ZN5sbMCk9U*3psmy39Uff=Q;_ihcDu&b)SXFpKDO_
z_H^j46=MEg7c^i05biJ&+#%xt5${uoCqHw2UOWZ%2OHunJ$2YW<`mCdiNGg=LDc?$
zDAkldPrDw5@b48x7-w8Zmz(^eB4NX}nv>#5%JcnTSv3bkppX0>RYoT48jFjK19?8O
zw!m@rXk1vd4yC4!fb7F{R9(g$8=v};1=FK(t=DGsPfwv;`@RZ2&o9D;*b;P7zlfLi
z=a2`_mcq*NO!CEZ95Vh69CTqeWQQ-L*-iWCy2v;hw%&kPJQxqp$~l3NBY(E+{!ZVX
z%z*DHd>?32DNO(UQ{cWclvHQVhd@OIn4;kf4F@tH%<L%4xbvK}k0lWN*9B;C3^6N-
z#1k9D!Nz3~9n13)+onk(ahinNb=3Ke=I^96dm}w5y9{>xJBHE1d~7<A1;>|kQ)#DW
zxOXfKRysZt%v>UY_tyr&j|vwkxa7p12CT*5JRPQ;zZrjIF-)%)(l>cCx$|<fKsR@u
z;H0I1Ikec~Wakv3q)PFRZxMRiE+R`OiE-QCT0^398T&pUq_SoHpxUA-n0&VwkLQ+?
z0@q+}v1z{`f2fsy+pLIIA@OMPU%4QXpT{)!^8Cqz-a!44)U0%a?;pp(+s5~F%u_vV
zJ1mNiU%jU5cqZsVo`>EqI}NPAn)Bz$emthS15LXA@4xd!PtnETH)sYTKJV?Ue%cA7
zO-*3-(+RNl{yQ2sF9-H;@wn2Zl_Yjc!DsT2Y;=&uFb7F;<=b2As~O81w${KFt2^Xr
z@h86TQiT}~InejlXOf`MQ@G9444-FPpuyd-uqI#<obz{J@A!VODXIoAZMreO3EPTR
zbIh>zR}<-5G@3Zyj=>I}1^A?W6V5i-1{t5mkfJaX;maS#Nl>jJeRX*sh6<9%ycBtK
zeQ1hhcU7Q^4Upgeqv*WDvHspTE?Y8^B%whiAua1U_d!N7qCqO5($J7fC8g{wqoq=a
zq)3v4=iDcWG}KqSRMMbGODbvoKEHom&*gIA`8?;`_xt^NEj1uEUB^Yb)8~L~f0S?+
zT@4x4W+KI?vur`rF!1+q!}(!fMWwr{#HlYEL`Dzg@ypK1_)85%G7Ih5*F&>#*zyo2
z=dc#579`-N1A;<a>l^-!{>n@axr>}KBXM)pe9?ao^U+OJ6TFr#gEb8*ROBkKednlS
zsfH%M+o}tO_vPW#u@(H-meaU?Bq6>-Lt$u;EB#dB%!Bl``Q9NRq}tL3vhsw_IH%$K
zsNfIHjZ))R?nFSJnKRwjGMaDRBE@(2pM_^3lZ3BV<B2&s+^r&mUuoRRJ3lJ%?1FRT
zM!;)+2UK8}>tjq_mdLN3?T4Y!oVN<udoPDNK0@A#%l$hc`m!b!y%t1bsj?$)lzoa-
zcfXL@&^GkF7!CK8wqdl24u8_U7?#}B;_iAUsrtMf*thj3B+2zNhuiBgaixgw&)mU7
zzXGkSaHKnN7x#Lz9}G@<fY~i$?o(n&rwixd(}Takv)BnVelgPU-`Z?W@F<#Rn*!7K
zx1yGuKYKhegGg*8BK4k!B=dTc&{Yp5RpolZejtp|w=1A$j&KK9AaLtVri<Ra{2`9%
z+5sM~RuUhV7o<0(fIJpn!{%7A!qEW`BOgQNZj8bDNEx))=SY&|<^VUgKsOz4(FxWm
zmT$X3E_k_#1Je)T&Om28_9j=fKmHI&acV@ZX<;~fLIs3%48(6^<1j>UqCJYs!Rs;u
zL=XOsW9ok{iPN>3L36<p=G=6ibr-6LhRj)p-P)1(W9mjByV{2T)mlN<hrKYy)Dc@$
z)ajaG6ZxBe`LHl(9Nab4wz?_(Pds4iQE?g(I&4Me^zR*0I%ZxWdN|*MLW}Vb+Y^E6
zOKqsFlLYtrwwdZrsKEF53LjhTg62sdh`Q-IxVwE1*}C;2o4Cb+bpM`;`v*Fqr)VGc
zEXjd3%@@piZ68aD6``J;1`C%S&i@^F4DR8w_%%iFpaoCHrD~tx#U)8@J4uC_Y!@-Z
z?UOK%Z^mlDg{L@pDQ(}~iv5c(!>n7s@v^{G*e^~e9Ru|Eh|Lqgc*6`j@4p(1zu-i}
zmCxX$#6rkqA0aZO77lF+Cd;R%<H}-Y^(%<LzWz?kJFu7=%1)=*HyBzAuhII&A;+>3
zb{+l+@7J_rK+a?^UY!Othd#q?>-B82=@lzd9}b-+*YNk3vv@8u1}=L^(A78pk?_BR
zaHy6vZ@G~GwoBvbnByODZVsW(@4v$Z?@q&tOUIz@j5Zx6?9&faG@{8j1OAm1<K6WO
z=*Y;s7#-DE^X;K8mlXCISA#P6{yCX6cjrx3ddh$YzM0D9(t2TUb0}?E*h)r!`T!e>
zyt(r5G(LtVFiF1&{K>H2@a-jF(vA~6f22QMO>}s*hZO$?aiS7!V>ah*3=XkKhT(79
zNaT(Ybdupcq80TVMX4U#RA7xZZz<$Pfj*?zLIrhirPhp9y=u88WeDAAI+A`!0xEyX
z0bZ>BjIDO={IsV5U!o;Ap46mioA*FEQ1%>2jLia@)k;`D)D=&e3Cx?-ZRFwSDApRS
zK^yvNz*=B;)g{e>GvPOJ=j%+k;j{yOhg?N7-Ab4!xLQ0%{KSXHCtzo>2@TK@vU$R6
zU}`qu#wN1#`np4qB^?XVPJh5N<0#N4H_6ohywG505xW&wL*CmT#@coLuygo()_sK#
zu|yrd*{VaIB_>0(h78EtO0t*RL=dq<hHu?I2r70>fCg2;Z~Af<<b8W2+LzY`yihDI
zZ8AaitKXUJ$+d7}gC7lxY8PEDq`YG3Ofu5*IZt?#g6kg60R>^#rQB48T_1+iAKIBP
zt>Xb4BO~x9KLpEktDxmw16aNvz(u!S6Y_m3vszFE%A1m5okt&N*dGzxwYk{gQVT7Q
zgx>t8^APrNB+aeMMqB?kux>>Lo=rT9>30JmjSDWC@7vH`>k_^?8%U(G0x?!J1Mq1*
zTAjZuII@-aj=O5G!eTG^QznheH%Fta!BlWP;m2Oy*@9u~JuqVaODh{qKL{VO5nIz-
zKv#``rsxj2uf7?#H3%7ZA!8R*I19>7CyOKUR4_rQn$&bnf#;iCvFTbOs9vyw4eCnR
zs;UcnCN2b$vkO-Z6LQ^O@2~>LCF1(+Cs|bNLG1K<EEb34vxu;vsOj*U<uAU@(zo?7
zk@G2hDAkAuiq-$`ixOO_7X)vKr!XhcVcWE%$-%)xL|$TlX6`=?A3Lhxb9ryLd*hR6
zp!^V=SX+VJd1GLPuRX;3t;JtWw)j+Q4O|zPVJjCc1}D>vtfcr5IqtMwboj_Nq;>zX
zF+&8d_)cTASWzMxbap1H4_SbIvrbvHxL1pwT6_{u@^2@-rk8QS3=NUv9-+t6JQEV~
zDv9^pbaMZ*m{b*?BR*TA*r*yUabZjh%ldC4kr+K(a0Mq~VU55WA1WnEI53bz>5moe
z{aW}ELNH0v4})9Yku^cxtSwz0RE2C%+j1Ak{eF`;)~B&}>0P3ChPnbTDhOjo>SDM|
z33G{}=(c19E;rr^v&*enS(h^0%n`am<(jOtYc5VpQ9%3X2h|hK*Q4W&<LtTm7xJ!B
zTKs5RI~zA~nb0G=h#Re+vPm=mpjHjE$5)X@zk`VBg-&+lbrVEQH^aeIABbYwX*97*
zCv%_OhIWSu<lfxp@N$;~torbeoPN~7#!AHF(-Xg$@di_plc)o4a%ZsJ243t+VIVlR
z?*Ug2?`m1saA=d*FOHa4#!|Y*W8&^e{CGE$z50BD_3w^C>wasr*<mDF_UJKtvugt8
z&QOQEGZ9#ptsroz)S>Ua0hTRshN|5$7(58T-zW+szy6=!CS*kS>R|KlZBW)@L{bh<
zg<oN+DDh`%O~T_<@GUJIo6UvZviWovWj%rE?UF)Mx`^e?b;jB&n&N4nwIRaWm`LBg
zLJsd63c=mxX!AmggjPE+4W|^e$&P?ga{F1)qnk|1Xe8F%kcOM(wWzWG3}iK|1o7QL
zJnP0uP+7f;hm|{1w(z6)y7yCJv}ppLetiKvv2w<|`k~x*aRc#QCPfvij>8^-Yb0SJ
z!O3WS+)}7a*S}NX=T>K9$KVlEC^2DJcRCtu@+Ok5>NqXX7evB&y}w{7?CW|<UgZsd
z^of7T-fJ>A+9(Qb0>0Pyyo`a>wyTME<1UOC_Od41<PDkpY6wmXp9*z{qM*g7m3=N*
z#bmc@LXGghHOP1}8?<*85WQ1m<0eNOb>X~NVWSGlj^oUxY^Z3^1`p<cf1+rdwleGr
znFEg+Bd|%)7>fd$h{W`1HBJThSWf;5>~U}an+}1|TP+D?LnUCEXc$)J89<1HJw{~4
zv54ad#9iqV^sLy91{X7!gsxuoo?m;}u3diop65gGER4dGtNE;Mq8``2br=q8Ed-w_
zE<Al+0A^R|Gj>#tT7UcxHhEXz+OR>wu2_l2zZYZuV>ABQOiJ7@Ux2?7#(-n9Aumcj
zOQol0@*L@7bbXu)_d3~znrrsKZmp^G(EWN6|2>kM&1O_IunG-6&mgOTbDL-V=!k-Y
zK<Gs5nqtN08QXKOQZLwLj@+Uni5T>la^1_JkRj|mTbmMj{E-2W+QPx>Pb<lNAIej0
zzreJDlhnaIjc@8R<6SrZSv@q5g}}F2FivhCYnY!13zwW@W)rmN$&9x+@x)gUFC9q9
z<Okxk;r{gY>n+e!u1HPnt>A~sbo_qa0-If{MJ-Y*!8h7W*p-H$zSIilH6{^0AGs)w
z{UbQJe<k2Mr(vWyvO<);$reg2Q*eHSA$n;35!csh!?gi3@SL&|&R4d9QRjBDeIo*4
zzQY#Cn(Ro5Hm5+4#zVI9w=nB(kcNf`fA&%6Q<=y)!?TyYxMoZlk@fLG*Y|ty=%6aL
zNw0zh@-4uO*5L8Yg{bgAxU-JcLp%NRtomd)9)7(N2Y4R9CkuCj(a$JcqO$^YUdn>k
zrAHVtcR<acHhEZo$BVbEU&r-y26DxUm9+3rIgEI_jQ?3)0S>VW__0dxG4>?zJ|UO7
z`tA%q4My{@n&ossVgY$8cpl5noA9sFS|D>{2-Qq>;YD{sxp{pN+;382jXf(wpQIwu
zbmS{2b~6RJ_-pv^{&4QL!I%xJ+|9+2+QNHpIW)h%f}$<6sQ=79vc5cuX?@;8>*y^O
z@-ULm+EEW}%NKM1b7$aN>{3`jU%?|K3BEUU0PaXshb1R1sK?NQ@Mdok{^mR2YwQD-
zQo%$4K8LtWcs@>l2YmMFqjW`)4jpvV2QS?Z<XT<bY=!RNOViJI2zOgIdY=U1(X=t#
z<#8=ayBqLRbQJjg*}`?c=F($xb@{KQCfv0=3{E6`KyPOkPD(F}p8N}={`qzo`$Lnj
z|2Ko)66tV-9|fWb=L>n~nicryT@qINZV{LX+lX9RAiC@`qhPk3NH>hf@lsddfnyRM
z&T_H%PCABoor0y3vuNJxMR@VdUT}Nv1nl2gF8wK*Mhm^I#>36Hr28=QAG4f-(CG-Y
zE#@0!_Hc)cd0bcZ0C*%`1h3MOnD%}Yuj-n}<*KLRivyB$ci}EROKu~tUffcnXsJyz
z7EKqOjjhF+xp7?a<9U=gI*A8tcjU9DbwH(TIGUf-rGphMxcxFQUJg{GPK%V`>5u=g
z_4_pJ@w-49%dbLdn!2#3@8l;*KbR*jr^?;4`L178IJ$H^UHEAM&Azn&1{}HvzYNpi
z-8P_jg>&bltvL5w2-lM8#7^NZr($4CS9)B82%8|-|K>8;9u--AWnd9KKPd~$_qOv9
zlak1uH(T+ytDN}O*d81{$cQeSR>fT`r|{;UB|`UGjf=l1a`$zQAY-{FT~sfC;uRFZ
zH`@`u2>yhNTHj!FfY8^|cjIome1#m}5dKxy2W8hKz@4Lwu;r-%x1F2?9x8r3U41G3
zQWfSW(tjb&aRH9Gw~uW5{Q%zmaG^(ZKQO2B8)3kN0rX~_8Jv}<$C>X&)9;fnh*KR8
zfzj+8a46~*NJo0nl6VJN8D>u_#$O`0ofF8|gld$L6*@gL`Y|O`ns!~=f^Hkmv9k&X
z;nG<#4yzkUKb<xrqg@V){vKX}dv<T(PYgBr-j#!JUjGwJyH&s?hFD;|tO~`B-x%3y
zOx1oohCdenv5qg*EI4fy+6c_k=XcDhk53y@`e_1sc_ZQ57Ckay{6~~GiiI!x6@?w7
zSd{JJ0|SeS*heADc};T`sVvonK~3(0k8(O{?OldXAMM8egbBDvb_j%S8VsX6CzH?d
z8-(*?3+X$(kBl-l5yjlsff$8m=09sKWJ?wkr%y8&F;`%7ss*3v-|-@ooOdiN#~r0!
zIisX<G_gtPVXM_5aC@dDcm`#lzP=@fAI*j-C$*qOz5rght|IfCKQZI=7AOvGwUSC&
zB%FT_iXN{!j0Y<(SxxU9FOt@c5bYm05DZ-m@tMH*_bBcYOWgKh#X;IQO6efE@E{I*
z^%bBls+L`|6_{~R^))fJZV>TdB$&>eNj}8x#A~agAXHbHEK-dYI?B@_JupjLuuB1|
zZXO`PNnU8CybNYpFUP<~@n~S~#`=#t3asrg2sjx5<>K?qLKu{I-)d#)bN?d}Pu8<n
zzQe(L+fMjsB?Tp6tKjyFbfy&*11sa>nZ1oHv;-G3-|Z^k-*QpZ_0|kZ>}1(o-ABws
zU>>gRBiNK=k9Q0lacSsl;{L-HyT`Oxt%}Y9f9Xm#r>2m7?Uu(g7ax+sP-|rGPu4tc
z9Kxo@rC{~_EVg!@8SD(0LT*(S<Gc$`$%g&21?G$`*=!Wf-gh_Ptl<M-w(dUI8?%oM
zHl+e9aRl>GRRgtq#w1|Zak1F5fGC@7h0>F$WShMjYUpN)YK6N<(3dZ4+B#)0`f4ds
zO$>qHLH2OaMHwQE%W>nETJfMK3Cu3WPuOdv!oHtcpdTEI#}{hAsK5*2wn1_vAY5Qq
zyXlLSeUYUI?~{tlk6A(D6PEJf4*C7{6X_jR%igu_V&r2VsWJD$E<J(sceqe2-DU(g
zcKu;77cQ`xs@F{55eXT7fr%}+P$RM@SgCIDCC6MRqBunG!lWAF_oSJ4O%w_R-ic`X
zEJGYJeKoV$U&^wwCz7oFiDXQ)DRe(;WVJb|;#V3*g2&;YC|LWtNJ3y}JU1}J&^8J5
z-zH7|nX3rhqO;`0se`cOlN(j?@54Hmxiox(G=Ki85`3p;gSsezDVp5EJl|z#_T&O&
zT=|cTF}nu#AKIbzx-0nnPQ{%z3-Llk7uIWrgVC2-+%^6X8FI;ux6~wq_i25m?Z1RO
z?@)oBD_VG9|12z13Sn1fD`E5ctq?I%Rn#M0#57%;V3d5Wcyiht^dHnomfnmPcFJ93
zzFZu9zPW{X_zI3NOIvI%ACJ=pOhccvwb0gZT3j1fU$cC`Mm$$4a3zlk{+qu+!oD~R
zw*K-43Adrdc~l%44h_N28{UzQd^5au^e3wde$J-&ig1SPXI5JhSz~kfwpHlr#l&Rm
zC`|3zBxK<8NWrQK=Dp|*Sy`CK*i2hCr(qkGAAQGmuG67|B?T7HnUm=EZ87Z}uFIDP
z3m&(^^Khg&9;>zgq5IrP+>h!I(ST1dp>H-lFv1ybB`hYg0fTv>buw6(yvNk6@zfwE
zSzt%Zpg-l>@#^onqDkKKk!B~cwE8oc>?KKm*%<Nusiu7L_I%JAEhbMc?Zg{xqlxI-
zK*&F@!^^w;=-Dm*u&3%0H*l2~9Xu`ri~M@PZhnB+{qQw%YWimSs!R#rEBfQ=bAh<e
z&w@%u7r`9A@pQN1YcxF<g+g;0t1mu8&)gD>e`AA71H{~+c?e%3p39%Fc@C0KTbYvJ
z;QM8^9Fs4d;a;CdasAs)XwbHm=k)6GBd@1ZSMAZ%Za^|M{Zzs9qdoaE4L4Ghdx^8|
z1a{kSFSCafQlvQ)yyXY6(}j;%mxnM%%?l;FWRh^<P7iEZ=?Cx3457ENnr;5D18YxA
zVBx{Z_%6a6q&loHOl>aIJ_y3sgJW^0k+I+sHb;pC$1RD_J$OE+oGcn!!F~pwXWq8I
z*qHDZV$jseHteXwjj}NqBk-{tPQD;XhNW=-kdRyZJd?GD9L3Hny<+WAxv*SeB;RUh
z1dm22llf<&$)2|VFsH-~7wc+~k)IQY;=P+h${`smi(<j@vH|$)(Bhly2QwNwm0v5@
z;k}M$F-?4zJQ(yCXBUhRbE9SC`p)(2?jT8QJ*EeD6k}20mLW#VPDa;3a!@;EH^{#@
z0{#yF*h}lXY{#=$xND$7zXUyp*CE@GYjlgx8El6abGP!@7vy+Z{}v&WWraun6u}AM
zXE?yLFvLv<;Kg8GZrw%R6c)m*sC=Q@G#veHDIR|B1qTQ3frooPGxR)++bjFv#kh-*
zuQV6N-Mmel-@j!O&neN5^-?@MZW}gRjOJQ1<RG}x7B648iV?j%IJ-In=f57rCzbCO
zJWtX*=5!SP=rqK-PaS0OkJsWGH!bP6Z@Gda)&*pi)PqNNCfE3HKB&BQge%ROXd%q=
zuC+YHx!-mH@J*cDI)lqq3;Ct_Iy~x8A=+Iy$h)^2;fJmI{M`q@{|4C8ccmiU`gH~`
z)XpX8MHlgR!8iOKcpJA=O3+n5vcdJD20Yr}M3>k-0;d}-U{er+=yC%dxenvwF9p*@
zHv9N{`VHkk5<gYEg1`lPpzY8S=$L9K-a1VUN-oR+WBZlx>)><Zm#vJYmll!erf6(W
zz9qPtltrZh<N3V<K;Jt{fnWbx`p?~%_l4~xc?l~yzuN=rgN5Ivz*e!$*@T*>qA_OD
z1x%b}h8r$U<+AxQv^T&Oju?k?HF6U?6Lt8~R2>p^WgB>Je+Tl1WO(s@U84MHA%7tr
zD*9z-N`GFpqc#VUiTnx%JKN$}k7*q033rNVg?3!}`8pWWYKQ!}JM2HKPJ6$7#Oxm@
z1mECs*3^}X@67|?lqipUUN6s=EIx=k@)}U(^im={LI(DDHo@VecgXjS62i7^fiJbD
zbV=i0?if&oRVy=S$>aO%^HgQt=zaqgT}JWuMLR+Jk_Oe$p2CMTYg4T;E_9s2Ag*UU
zk{Sw`m3ar;`1-R>tbdL)eLwaX&f41qK3~<jx7ulXLsA00lRE&0o`>hgb?jD76YQLl
zMz3CuLCycvaUEQQU;Q(<!|hezmQj!Y%r`T!tB?^Yu;AgDv0#$66Mi%&lGVErBeZnr
zvI$1?;3+ApTXTg(d`;uNZ3=LA!w1MqmF72TnUIyWguE%Ih~wI6H2Z}o-|<3`Pd+Eb
z6BvlL?dpOm`W}L>8&bVVmvFt%S0X*p^iuIPys<8ptC%NQ>4$FSm3s$)*uD`52!0%%
zDhX|#8=+~zSSnTU6sD1_I3^kehHEKVG(VrUoH&QUQ&q4#`Z%f=*je>O?}Za!hj!KJ
zO!;&?nvK{Aaek@joO761`b~rH+b$80A1Q+GOOCw~H?uXy>tW;%DUdo8f=zjXn=1A;
z3A>+yP8&U8tl%=~m~IM%$5Zk2gQ1|+UPj`astCSXZ`I%vfhz}igU4EVxbx-&-q|w{
z-R1V<=)VG6t>cV%eOLqOXwzo>9mmN5rKQ+ont~m*gCV$gC)}8sZH1@WNz{L8<mccs
z%-V9N=+E6;G_Xw|>t;M*gRHJt?Ko2;);n`ss73<rx7dO*W&-1H?jF(K5trDN9XBz$
zQx*2CTnFtx53pq6PHXgC1+HDw#1+Zj<kyzTa4>B%xZVweh<|?2=Dq{Qj9!3SJLS-+
zFOSp~G>DgVuY`|n(oopf!vb{WS>RK_Uvb0<yDxT;I=4D@JU^2?{^!TW>~%n!X%37O
zlrj5bUBtRZ$R=J@1K*An5Ii_Sw0fBfu-9`j{hm6g*4`1hZ`8!La}uC;Tj05TNMsT=
z#G>TPA5x?)umT5nS?OkHv(=BHLB1!3<jz?nemrG0F`XgIG}XsjSzO$V>9v<d7vkix
zEcQFeeCH1nQzNlxTa?h%i4fI)FvE58=Cbtl&E)uzN;Y@80cN&GplZrxQixjUvA0ev
zF{@YPzk!L!_&229;RrdP=?3##yNO189D10IV~<^QvAs?fv%mYp_dVLET%ZCWBSM)*
z`WIA4`T^c^CHS;qYL<WdUFmVxhe%7}xZ{<})LwcB>Q%e)bd@q}?bP7)y_sZc-FnOj
z9LrUPNI;Ks1P)LC&CCX7p_|bavcv5U9uUq9)ylCX^4cOk>GTj@dn3!LJ}DAwvXkKY
zR#}0wu%AiHXtdH8<&K&&v|+x$FbJM3oQYb_kz<emQ&ghBBBX_!TA+avyLGH|{k1^b
zi{iD0IP#`d3(NEtpyreu5|%vy)8b-rwz@h7o<51yQGzqAQIZcmb)Jp;y^N+wCqTk{
zS4g&XKpFiwwpj83{=;mnGI<Mjb?@+f{a_d_JlooP3q(!LpW#!23Z~;GkXhKz7B4&w
zE#?h)qbn0jO`BoR$yQR6vl6N#H-f6pPmyVfGx)!#5LI>VAdeg4Af_Oa)Wk1>!4sS?
z^splv75z)3rDzM@DQmEQuNJO8;>XH*CE0J6M5x-X0j3kW$;^qd=(OG#-}M}2A-B%3
z7)LWmTWW+8<F>-e=VmYq$KYH0&+L4a7CYZ$$3{;shsyce+0^k>Vz0lOh}M!tY_h!y
z<n7XiwrmwB{B&E~*R9BBbOVeHvd1MZv7)uE!%@-%Nt!_mD|Rp@^OV*w!^^+He7y@k
z*8YOsPqbiRK@<)jrAW2^4WZgSudoP@ar48nxcl08{?$fc36+k59CcqRHF^bmxGD^~
z_2$68wTYHTItJ5}6J=P+RcXVT2l#w~B7N~%iXZ-C$3w4ca;48oT(PmoDx~)Xmdf<t
z&-1A;Y@Grxwy(xYDN87?Jw|`{%;!0s0&}z`+ba3N1Rgj+hE{GbrqcrpkX?!ruMmsL
zi|*0fM*k|6R?LK_%Lu<@xrqlvxAQ#7=ghicCjT`=jrUb9qHmO9FhjVf%<fHP&1c8*
zKb|V^X>S+dLLTC{n+`2J_yu=;8b<qtj$~o|J#Z5IV#nb&Yg^v{9b2^d!1U=f=iq%<
zpf>_P?K=S)F}?Wo{v<52k|$=u+%iCZBCK>sWbSIoct70*o%NA@uId-X%(G^ny$6s`
zO-J@de>2Eet%t%oL&ywX0X9*QB7@C0Nra^Wk**iA2c2udrl+{3RW%-@U3vxXEMZ9-
zdRfN!1q7d{<D0Ox82vdFSNwd5Gkztrn-P*&IQ5g&zXEAqzPttw=~}|+1p@Q^y(>5d
zq@jv>Hy#{v3S?gQke99q3c2S=!;&d5^z2WuCR+t()7q^br1*o_Q;kl{7tZ?LJ*YFf
znMCd$2a}ql_}5EvbeYs)sMa^4N;SgWWt6~M`#6wyN2TM!C#RwA^naoSGvxX1={xAr
z7Jahz>~Gf7H-*bSUx*XLcfpAzvJLG6Fzt5>E0hT*k>Y*gNm?;*v{Z_R>U>9oyl;Y!
zJ{8)IOhY|Ib?$5KCrVX!2dAdrV)Yl5;PKIuIvyCyl}6lQJ$kbAN`@!CC`*Hjrz7Bk
zVWa57Ya#2^sz5(oF&9Y$EyQSf6Z-aR8hT2+#04{DKr!$PybM^v;jT3cs8^$#VGenu
zP>TOj>R8hkd%oN>18c5X(D7H_gGPQZt`E8n)j?8pUGOb9b0!ooZA*pA{=YG6$zbj@
zYP!J0z7CEmuf(Y%WayV3W4bGG6#A+uQNK^eYTTb3XTCSivMH_y>39n#exfc)G_HR?
zjCQT$t6c>LUBFzJ934qlU-qWbs~@0w%w;_3w@G|-*Z}Thw-jc(%J7ThBe~10@%S)i
zt@yK7A$;=L&Rd^-2e*YXeCGXuH1fOz4N~eS!>V&(S<gx$wQ4Li)7gUqjK<Q|I)Ozn
z<T#Ez`G}Wa8bcFD%ko<9x4f$1I$OBMlq+Bo)lrn^$K#jc#vfXst~mxB-k9?hN5=D2
z$JcX(_iIT&{R2E@vK*9*>cBl_7*o)hMn7a<L9epUIP=s?NY$Io1w$M2k)Oij1l~{A
z`y7<3TFHE+(-}{dr*BT*hx5;zXqjR${8;}TO-4lXk~x)_^RfZbY^uZo4=3=47gNcL
zRTiTC?f2P=3*)$E%3G^tazkNa|5`Bk@D5CSPl0CYHat7EABW$Mf<@;P`GeGRI4^NG
zzqFL{(<5Z*1hvI9C_$cf-1Q;*0)~RQh6(POt4fz;cq6nZ@$cLJ0~0-eESa2#yDWaN
zzO$z=dz~pg<t9gyKhD7HrcxNQMndSO2eCEhR?s6?9C+!iIvD?aBwcVh1Eh@a;g)ad
zXjJu#q<`NhIx;z(X8oB67F$;kp8!wNVpR>Qg$wwT^D<OX!5B-0`N^7DM__n(EYiE-
z<epywJe_VX9%MD28y@L`tLFD%{h5I@cgSV9x6?~3$NmE3JHXmaui?FhG?lB7<X8Rt
zAUW|S&NekAb59uYsd8>)cG4=8NxT4uL%lIiqXq-(p2ClX|A=b;IC}KPWMO774=RWL
zMNg$`puTbgliI6+DHlJo2|tC;cRN*ZmwJr8Mf33IrW$nnvJ;#-9oV$blfbBaFSZQ|
z$4wWTakl?-EHPP$p;F0Yn_C`UpEDUuBs}5fOAV34pKapENFSuVuf-8jrc5g?4J3X|
zLCcAUN!Q%68rCAilKL5@?I^%aJDr*B^maDkvKOlu7KQPc2m3P>u;aOb=&i>v-1c{5
z&E`d3@HVfFJ&y0gZ+^eYr*LiZ&UrJtTry0Yl+rF9@3;y#^{#}sNn0^f=MH|)Jd8Ky
zyk~NQQpC4Scc9$IgY3`XNMMRWPfGbSXt}pzSIa^2x;q)1<oDs)Nrm{{^$dO)>JP0J
z6U2w-JtL!iQ^iwHyWp3l87ynrE%sD9+R8U15uDa)LdcFcZ1Sc-kh=Av*i*>M%6WyO
zS(SH<osJW$_!9_OLLrM>4hN@$ee8+(4m=QU1(LRpSwc?)iMcZ$Jf=jT#M;K{Ympzx
zv$kZ&^6Y0JchgB(*8qHcXf;SUy|t9MalfYdcCPr*TN~_K7Ku&Izp-oPu6THZ0?zlS
zCr+79NYT!3M1S^nSo9|eLVCWE4xN#ZyK+1!J3p2Qc^u-O<|F=YEwBPtET~E8n*yVT
z>4CxP@$laaa~!uPhF#6|#@|Px$ZMhFq*1bmInJ00!e>1h_D>SC9&7>o-UU!vF9TEl
zK4X0zJ>u2L`lP(Cf)V9tJU-$evpD!#a8+kn1&o$s`>sY2*O=X~NM8wZ-U_^M`2=u$
z>4X95F~nwgDEvr{!)&Y5?0iH#epJwg?@yy3_}gjtU82Af3@m8V(+618Uk_1OfkQ2I
zc~f{Nu1gX!n9><=RkxAIcI?G78&sk4=oO(KB)DMmLh(Y711S2G(&%@JG;FmjU!!FM
zk+tXOMOA_Ic%c+458THlQ7IW}?~fPlG?0vVL#h<4A*<lA;2Kzh1GkSwjkk$VcHk``
zN7q3|!8$Z@P==2EdQdsn1$=&-WyC8Cu6eo>Z=I*iU!)>D(}v&-of*g)zLP!o$AX{r
zYy@FmdSb^Wx?fe6@*&62CF3B&F(3|=$;IC8V)4)KC%`%HIVha_B5=i4!SDE2WV@6!
zRDlHxs>s5i51FXEz?YVkuY#s6l5CdHQ{4Vh727qBv2{6vY1pwp@Vzw>jHX31djTnC
zvnR)D-H`J{{oiq>yxR>f_XtdR{UEfttsp9Bn*#P~lbF+mMXYA>M6`+BCdNrYD1B`P
zl($weZZ-kldFe0-IVa*VvYgy^?I0ejyG0e_)nScUDr)75arqmDug_{>a@-SA+1**=
zcGm{AEl%Mw#WE6J?IWsi6LO(Lhl9ie5uW`Uir0^okq-4Etn*)^$S&6(adRmdTwNwU
z*`Ow#b!aO*Ss~4=*4@Kj^D4#8>yr4O)aUTe#2P=#-xE3+SJBPG9!i3u_<_IS0^4C0
zcUTw+)p1#vmEnc|CS8V@{d1`EfeMgme}uRGOW=0aGI%!U4|5G4L%uDwq$TZB_(6Sb
znm1((ZT(O{{`SekJjau4(jz%Ei;?B=ulDlc|J}tko5T6`1uFFSzN7e{2B`6hR_5_3
z0;t7Pc=NKEyT6`9N83NfcK1E7Vr>NZno%z{*qX@=Ge=UHYYTYcJWp!Z<_i~7X5-gJ
z3urMOjGb=|!gZs$Jg;s7H*_3JXZD%GgALAnLu3}0I^Qb_IyaSnSse_b1V5gl{F?lv
zUbJeHj>WXBbUt}xED4y9Lv%7`k&yZXG#qe|sf`{)4M$dBi_r0|{bV6p^hD?y6}%!6
z6BnUV$^vE+6A3ETk?3D^lvN0sSc$EBt#W;WA*E3PQZ_V_fN%3ynf(y#x-a<UN@F47
z#W9g?lO~k?8&13g_f3TNJMpu;SS&f2P0F-3vIAjarussUm+PNK{absPpOu>EmE8b3
zHee)CKc<acv-QXnVdh_O*&KTQzJ-BtS8!vv6Be7E10t6N(_Rh`54vyy#;#mNMqcb^
zin{%x3^!+damEkQe-^REQ$n7iHkZckRU>M`&T{2vO?u_-dLn;+9&O%j01;bX;q{U{
zs0prQ0iFZ-+j0s1Ltb#}GzLQNsWvnpJ&tekk%!miam4YQ9DP*t38oBG;sJI0$n@1|
z9G=cb9q)-kwq`CL?$A_|`*neMM{YfKobqGK68bRsUpM|#K8FKU4#ID*txVC>8ka7S
z7rX!RVa`=kVZ-xmpwA8QNvJ-&xTOhK*9BN@Gs%Q)H`cQ3Th}dT4v>UBGlVSdflT(W
zXE8lkaGsqERVDAgW)NlPa)_lKxc!+1v`c7Gr31GB^(Ju%-5tC%r9fb}48%uL3qjs4
z8B6vo;wD6oYJ7Q(%er6VqS!8wE0g6<qk5T6jv`l?<3Mlb#h_$$6i5w+nohmZbnx^R
zJX}7Q&N_C8K2yv`OQCzaWaLXU7@Lg#nxpuNeogxD)qZ-v{u<=9b&7@`S%;CrbKbo8
z9WEg%aQ>nccPa6Kq?vcwlidltGa!hRhnyulcCMwh8&8r8HO)kOk|k}eDd%KnAKJdl
zgx|MvaKy3*TsQj?a}17wme6=^y{87$wpGK;D1Ba+D2;PAdGcf4HQ?>)N1tAlp%->m
z;ak_a1kRqr$!jyYU49MxS2BV&K9}dYza;spIXQ&QX(!%F)2OCRJ{#wjgxgko(${+>
z>DM{i;px3GoUXcplI|+})L9j}s`v`=wA10gi<dyjuW#bx&DK;>=ye?}$rZZlh<{4t
z_>|O9Xgp&Y4{e@G!jxP@Z_f*k+A$_Pt5^jtznKo!wkdE@Hy+J(p0PvgH{!0i+qnDk
z9#DiNP*+(gxOJb9h@X8ZqyB`v^tA$+mkt>C3h_UobCG8&Nkg`7<tzMspgzb{bjqOv
z!i1gOM3WHyl~14^TefnGQ@~GudxU!G|Cp4qHgues4~m74Am&{UYFl0w4H$ETjL9vA
zGN+uXEWvrU^*|lU-iqR0%bt>-H4X56xGWXv#-d%B3jZ$bv1<lfL)YzfU^^p~v_4ei
zKiw_((E@K$e5eISh93umW4BOY;vlLu)g5W03}19i19ZkoQS19kqN}DtRw8E_qq8Tm
z-!Xplpym#~qRbMkb#j^H;=6Fe@en%S8_f^a>GFTq7cu`nOLQ6VlHAd<hS4?;$?V4O
z;!(5I=yS*c|3oj;opA$CMOpKiG4EO0*?0&@j3TM?bC^&4V_b+zbgC}{CNCzRM&^Lj
ziW-(5Xo-POyTwQEhYNG-PAGCeii%;y@Rm1_3%P~pIyVLv$0We;E>~2XybLNX7K>E0
zm3T<_9$0EE10^yWQC)Q<xV!-L9+V1Gmv?|d`DPfTx>5Ay^luT}nIqo*>!o<I`5oe;
zeI2tkPmyEOkCWVIc1%7d8-8D2f-8e+*{C6=m@ahJXa33uSp|`Jq-qxA(2t@6QN?Ja
zWDVX|-SPd6JzyDQgioKS;;Mrt<W;-3sM7f~c`(Kvx9IF40z8e#PL78MgE~lFaFM9r
zXgu^aA~8IkBnpk3ir(u5K8m*|^a}f#M=#y6b+gc!%JV0e9*3ZrsVvCyeOCLf*+P?_
z6D)kO9(&t%VdJ0qR%Lyc*<u<6etV8v)_AOkO#+K%)R1^~#C#F9WjUf_PBN6`#<H!}
zk*sWAIP5f-4^iED>~u{67$hu)D3w(xao?S3=YL?9HA-j^XiL_q9V5?5HbcbT_L>L{
zLvg*yV|Mjk3mf>jgqgk=#SXR{C9j98lk`AuGP~abi;Z59G4vrD^L)SPr-2K0^o)X~
zYJbF+;#a{R!!mYF@+6Kwe~?WsS7Xx^R>5YAVwC=)4QHlYw;F6|AU5_{!UB5}(S5xF
z{77_wCf~8}Tm1qKvyT>sx>Sh^UWSv3BMuM{c?xPm``L%*MP!p&yWkH@5m~nheGhX(
zwq7xttQFaih=KYjJZD)}%N#OMqJecy)WrG6r?bV4QC60NtKs$;8!Y!*3a+P9Vacx3
zxZf^P+`KgkPM*z&#IqVa{9-Qc<>@u~#kW!Br3JrYJd39sb;S7EcqSCh*sGoE(CkYV
zYV)V~Xud3Ve;LEig`dDmWua>yavg`yxgnPG*avH_C)W(kkLRkpi$&>?y`t6LA;e?h
z3}`c5M0WJg!B@-O;gS@=cLNrY7WRehH0&U?La*uF1T8FAj)vhI!!Y212dk3V0qPHf
zz@~qqDE;Om67g^<+Vox*N0g8n|DCHrdW|9Q_x8Lla5~yZ%n|-(_nG^X^XUHVCYxUA
zAdZoF2QF{)=s0y@U*miL$BT<_W!f!t5xh<w-tpqa#;1sVPX%b|SCg#ErhG%LVA=~2
z;Q+t)Fudsm9zK}>hF|WJP78t(kuQnlGgsW-6^(Zt7QsL}!QT{-Njy&J5a~0X*s9gf
zu8kKmuTKR&Sd12iI(3qezz-z2X9JkI#sa(Q463OWB=`7Gyk=L-JX$V^_Gx>fYEKLC
zc@#^|+>;RJr#&PZb?GEaRvz5fRfvLTU1Se~-(Yd@3B0P5Nb*;fGPOD@*x8gw&bQBm
z!-4I@VzvnmIQmLtT2^PZZoVAcs?ot}@d<WS?=O)Sy=CJ{TZw9KBN@G?M0{(XCO_=A
ziFZ8QC|+MPl_)LPhfW5QdCQ@j1W(*#KUE9y>C<au>c*?^MtqOFu-}KvyBBkFsZ?ei
z|3uU#X~A2BZgRqfr>H5t34WgspbwsnAu3az;+^Uip;uOp8m0Yg#g{mKV9_Mrm@kJJ
z!uNJa5MhO_8GU><pIY41V&)1h<b(x-;P6q@pf?jFlBS6C#tFN1vjD!(As4!ij-)62
zYI&g5e-NNFjLvBlIBWxSsC?sb_)FdSY%lXmRH6fCe(c8}k9FA5y^U)O?M1@`bzbk>
z3y#*4sIKi>rY}2!p9?<0{!mjs>X$2d`J)_zSC-<!f=--iG?P+YX{erd2n(BQ$oVyU
z;nVWL^j}vswcMV<)!#LcQD2slYmo}jmUmXveM^#sE*XKn5h0jsTrZZNyp%+At|Q(@
zg?o-z8XA)XI=0Ie+`HTvrBjNCe5VwveXooDYb-_YE@_aV4L<nHxRa!-Z-?A_OPNIF
zN6}~nFBYYC85aib!IJGv$k#Noe=EIV)?G6gY9r)yZC(hoy}2m0_6^~~bs01@vBsP;
z<iYQ0SPAZ;xMQQ3ta~K8u9XW_H*R2*#2RQVrX>2vaJJlZIW~kSSOxpe<nx5yXF=#$
z(3W=PQ`L^6LUb<1Ohq_mHV+(!u7N(6q4aprBeC4K5X{?H1{wLUz{GnBznpm=LQdpj
z^N>sEd%PK+D@t(NzeDJtOM3inUo;fo?-E5CiEvJVLd~_sugUWMqvZCHA>65WFq|#8
zP0B4EVtVrv9CRugc3tH)Z4yl^-&YGZtx?DG)n2?Q1F`8pd!f@4N8Sp1UQOj)C^~RW
zq!ye2P1kPWBHv4Rr)v&)4z3j4kJaX0iO(Td`7P+_#qocqhSMqu1NtB?9cq_7hF|%Y
zU}1YPn;$G>3Z{O5YnPp=)=edPux%vIU7ARKJ)6wK90cD}?FHC6&6PW{i!giA4cv8f
zG$a(f#-;mi^M`T9Jfm5f&loH`3oBl-!XX`4mHiNn7kK0TW3$mPD22<Ej-zhId-3p@
z8u)(a7`G{}wi<rZo)^s@O7-#w(3SbRoJKC??oAEoA})d>uAJQy`plh9V!@YI1h3A>
zas5}BXtrVvNNv0!nv%SgCvNXVi;>45dx!zukh>WmCkM&}Hnq)`3Gkq@4y1N2;coe9
zFv4U$pKWpw)Es|8kVzMAt&yf0SE9k$vz&O2I{?G_eZg0&0)I<*!Ba06FwEEwW-ko*
zvq3YdWJa54A1T8NS0>T+24(u>aK30*S_^Ef4#wO76Dq~qV0v;I&Q>s>T{l{A7`kzX
z5u<SV$pkDih=FBWHWH7*Y*gPD1|ef6)7R&R@Z3=)aOt5V-U*w=4Xz80wB14;vHv_T
z3z~-uro4pAh8*(4GKTdJSA~wmacq3+2yS{$hrfLgPL?nJf=XY_S?iMbLJwgQYrNAS
za1-<K>R@dccex#s(;6Up`e|6YT7kBx=0Ss&9c+1*NxGkyp>%0E@g0~<g7ZY8e};k=
zWS!8#w|hWl`mbTvFBtG)7leI#n;I?k{!vvFdJcDf?h`NmauSvpI?=_QSJ;4&%WzHh
z7+&z3!)=@S=zcbi_rBeU3j>6%UdTkQ@I;5c)$)XXpAmTP-(Y%w!423oej`eDEMxCA
zgUGU@x6tpn9p=72gX@=mgrIW~H2dXR@#tSl{DrqX%=mJQ{LFXahi@##-dDSL;pG~n
zTfTz5WCkpie=g+A$AO36XC|7Xz+`__3VRDRzA?y9U<SJ3#}nmvUM`6zXMVuQ!}D>x
zZVQ^PjzZ(2BW&E#NRU+=K(peM_=Yq8ME0w+(P7_8yxE@w0&Euh?aa_(bTmr*%Mx8Q
z2|y345U9HAMQ(T3vC(HH<E9W3FzB!(DZbh`c<xT@v{k|2uVDg$&K4^MjDS7IGFhMR
zP?DSJi38VZfzRqwq<8gJY&zOX+KRiGGDMJRhy1X|)(O&<8HtoChGF4`DE4d1Jo0Xa
zhPd)g4&=Vt!!Bv8g5xf`tC#k_tijSg5&xk8k!KT7GW8;<U(tl)dl;MZqaM|UY-Y)?
zmC4QS2qKZ-1=3?FApw%Ov@V;Jtf^!4%~~?KH2~vg#$$tF5BVZ<3iitukevQIqEV3-
zS%ARnws`RfB5L$SNn^F3Qtb);=bc4-13HOgVKWqJBw%W~z_uw-ggG`5&^Xah#4akM
zcHK2rG;kGsT(y@CFiyY{M|Cv$yN@aRT0(=D2Fl9{``DJ}Y}EIqq^m^C&UH*eYa?~c
zI+_fVr39x&awv(qG!@!XN0Oq?PEhG}gV`6!;F=0!eASiAx|4UnSfzSW(lj3ZtQ>Hs
zp)Y#xKZ^~SX?Q|kR8xE8HCDBb$BKH6>LVlYd{q;>VYr_e-xua1_e*P*Jn_LfhZA9N
z$vb9nBpR*DV$knM9Ba87f$Npth;K{FgVk&!44Ze4R4kp2?VXR=t-Tu|NaqyRMrEM(
z>r%36m=cTMV1qlxI$`#g6u3=lh_z`k`!+cMQ|mWCo#o@2mn#k6153oi;}=71Mj}ki
z3J2SL`l9?FtBHAsEmZCQObR8;&_HJ{=5~A~_rn*XiPbSC`%M<tcgP86=MLh1e;tGw
z*+Yn{CjMS5#Z*_Mi&d{D!g8A=baFKo_6OlG*+2m!I-AJdnHO2g?O2w5*a+17;vv;i
z73HralVuHpheGB!S(NUMZCf^pANTzf+X#Dzcb#%9eUUOrE=a^7DG35k-IP#8CUiq7
zR1O%8mK_ne#=sRm3BH+v744+_;wZdU8wfik#Uy){BHmYt#v><YA+xh*pZ*9uzL(2!
z{@rufS&OW0lq-CR`-GFOhC@?v0wkVn0!zO@!DSyIZj9FF4*e6je(Y`Ze&~d1S4M#H
z0z-7$rbrf)a`N+VGMtZ}%N>3>ihf+RqJ2Yu)_6;u1MP9oiTbu}xT$XioPLqY{H=0{
zvb5m7I_3rmIR+RiT}(a;^LUH&JHq~49%M}hz@t7xEDIV<zRVJK=$;kT5}O^ZEb7C_
z{BjLOkB`K0UZybWu9~=ei855^bPMMSCE>mnhC68vyBakKTQm~cSfyy}T^Rs{R&k)E
zuo0hy`-8;UAgcrwUm{^;PloSXgKD>wu&VhO;l4IFLRRqb+Bl&6j<+P}Nd*25SO!TR
ziJ<-PIT<7D_`lDY1>F+D{?`cb&Zu&H{`nX5Nh14^nuW)94`VJlhZtY)!}WKa5RZ3D
z#viH4JS}1t&p&aHbiUEy4zZ0yH&F~(<0Ww4);%I$(Kxz)tsZRtn?^QHeQWt|!FOgO
z{eV2&_npkV7LCmr-%;Ip7+(3B03)JQVdkqOyfWXE1<EetD#5`_ZetQ8z1Qc>N$>aw
zyChuq&ye=Lx5kP;KXJoQYd&CfHg6c$3A@{+c)*T*d`NK)|CTI^?}G1%{$_gMYJ&{+
zxGMqsz=zMccLR+*#?tPM3xS2+vC3?oME}O@pjEe=xz>-N{K&BknsoCQn`Pw2cb=QT
zCEaXk!n$pI`lW7ox3&dNu2P|qUi(n-*HKtg^O2RjhyV{!q)2~|JX2J2MuW+KU)lrl
zyRjW)ud+euu$zKIQ%owqD1wBeN{#(Q5hFH&|5<-9>VK;x64!Dq_xG<w*XuGcZU1TE
zjuDIPdF!ynYZw-<)5EA$B3S(EHY5!l&I@ZtvU6jPAo<WntYvcvwnd`+s)aahTt2g0
znU2E-uOKroG%^k8O)O)pA-f}GkB@dA$NBYfq~_>7_F~t@n$D9ExWix*&W)W3sTanu
z+eSU4I#>eov=r%U!HMB=b12O}IEcO{8N|3GpL*Rk!4#QPb~$thdu}hqUtI5n`<C{&
zdRIfux`6`w^-3iP(5Xk};zIGeE<Cq4<);P5w$IHl7Ba}tyz7f59scu`_{{r_*q*V9
z_}(c5J3C3f=18d6cBQZjoi4)`ZD_{iFbfp5-GQ15IyAtq2Ip@4jvJ0lW6vho!-2S8
z<mLF&HBwo-aftJ2fope&4rwlCzjh_V!WBzEoM%Y165d$aUe4l!k`GYR1!pnyTs}_m
z*h}M855gj;F|^wM9Qv7z<{R4thweGT|IYJ4H`8#gyS{>EJXubsN@P;q_9AqiU(DX9
z9fGx93#h|CEo!kX2o}xz3l`VXxZJR(*mgjl`?-H*D~noj*!NRHZq$Q@=4U|vsxo?@
z^C>8~yP#~$5mCE-0f<8OQohlM4oOJG(x(Zc8C|Ey^@QPkK<idU!VK8F{5&H2U5~bY
z*X3JzJdUl=0QVL1=+t_7ZqOMA(!~S#?+-du*3+KZ&Nu?6W@^#tHx2mhbLFJ9e+ZZH
zJIp;4w(vm~37G5t6aR##2pJLsI2*ncKdszD-;8b)SWy1_;9O5~Zh0Ym*c*s1t^`qw
zdKErhwFtK>72>&zAJI}no35#j=D#|0>Cq=X-19^lj%~fj?pa*|7Znr!<eLIF-)w`)
zN>gB1?{n7uHUdJg1H99kLsK>eQX`iYTwJz-j?exHf!i+$*^^r|^x0xQB4Z|Pve`;U
z9?3yXt+!Bo>pj*@7UQxDtH|I&G1}j`0zJ24=;g7a>C;qaR8C&SKL%L9!oE+UFPr1}
z#^-gYtXhjQ?Irw@sh!YwNTYA!_CS50J}8*D(*1$ss8jnUb|UaBR=*s_<R*>a-Ogjc
ze!UF0*dn+A25uzJ8l>oM^)Pn*!))qX_lRlrSA*+G6Mi&OpGQ5|1Ur}eGL?D$eE+b+
z81Lc<*OKD-gVF;aZcBuN_dA&0GHJYgE{#7kjmIOCWMO}!4_{;02<uIbfXmSN@IR)`
zJSxZUi`$h-k_M7Sif9lep?c0f4U!}@3KdCw6OHD%d8R=U4T>a*k|aH6ADNOVi3ph!
zg~*usy?^go?^^Hjx3%u&&fVE(e?Hgs{^xCM)4PW!dX}O8^1m>qDwC%^RN=2j1ybE)
z6Phkl59^jk;p{E(5dF56?DfqMk4;pkbcP01HmZVR>x*DyJ_B73n3IqS1!^)=7pD9W
zSOXOtsY47OFylBZmDR?@kNx;^zoTH^D0qGPt5I%9Ge7!Pj(Exr0@v|panQyB2wFRV
z?$p{xejIy57j3nsiw_D6-~Y7v?@ixHhg>O%HuB?(5~3kos!L?jkc#tP8AE9#Me^qw
z@lRVXt{-X*r4w`@_lGrGW<CjRFP~!#e=@{9Yi449d<0bWnUkzxG0=I?5Q}~eCrJ@*
zME08C;C|P@svg^shOhBt?Po@07q4e(o|d><seny!+=KQToA7g2mEi9yXH}wV*uH5T
zp7XTFRSE0J(Z#7?Ba$b+7izKJY#!8?2u#Xkf7o=q)jGUws5pK`8g{=Mh1wV2i0?;C
zN5e<Ta9g#WSPD6`ezg&#K1K~jCr*cqqBc>+zlWj~t5dM{g&$Nk-VuG-KbkQmPj+Fu
z0fZ*c!H-@&V#8E5mht<OC~&(78*b=ARCq-_^RfpC11XV0aWSbf6*?A0_ON!RA=Ye<
z1BucDXQd}yA?4f#HkM|yD4F;5Z!~A(-F;p{-b4qgwmRU+S`}1kOTm=%;VALSlt@@O
zh{GrE7bktTha(}<SP>Y^w4<uX{GUR1z(I}OO;dy4i*4au=tf9ubAY7>4&%-R=S2TG
zT(tVHd!kr$ayL70<`ipwT~6F2hd@WxC3aieR&-H9=qdd>gO9PDCD-34j~cAtUaK^=
zKR<|S2?tm~VJ+FzF$%mSkKp9TFWFj^dR$xL!O8``WImR|Rn2*DaK>&-J9Si~)i2C1
zIsW)SbcB@ec+VV#-zDM~hIrHQHjxk*SnK=dWBC4okP|Y8m>DZGb3cI(WO#?|`yfK-
zZn7?K$gmC@cUpAE<1jwXjKM+_Gv`uyxa4RDANNWT|8W`OXKRq`>h(eG`eYD2567Tp
z15_9Hi*qDLg5{=B;AFA^46_1Rpx-9;F<qa$vp7xu^-7?cI0AQlhy&d<d&F+PC58M}
zH5<1imbH}s7GIWHflb#g6XW^>oS`6z!o&<|w_X#)fn!kD=?KXPDG|MC9LrjUJty0(
z%9#I=R+0b40I`H&Vco?0aS*lD5*-86nEYiaY-y_%HKuehCN&gIo~^>BZap%2+GwQ1
zfPQ#69xrrt5Dk@V^ck80u7iHT%(iyC(D@kd9(f7}l!acVn=&1-ZaCD&SE0)Ar{Xup
z)o|N*B>y7xH%(&1D4&)M8POB?x4X&$r$z+|+W)bbgO#{IxZlWZ4#9@2hAiM&3M}}u
z0iQ)}gw5ewu()3mO>Q{CGute2q;CVWjhQWu%2*;w6Yh3D;ULtVwo&w=xrA*OyMW=P
zV8UkKB9`CN+4J#!xZu4TbIIOEB!(%0#E}C;J#iybUNw+yc6!Yw-q!|&PazN`Ib9?h
zkN^>KahTh6m=u+MAvM=+Sl9krLTBh8dA5#=ikuFK*Zyf|ab|Cr;_B((Zd1pc79q*a
zawivGs^EVkrBPx_5_$93o&{zMhp){^;Ir)yhA5BZ(Z8cayL6nzd17B&x>bwnOO&#Q
z`V;7}g)Xq8F#vKlZNORG!aezrm&{u30ygZ<hZEoKA@HNHcz7lIJ94+R^Y%&bE_@tZ
zl6Zy%i<R;Ej-k{oco(k|oAKDMx)3vJHgT}OCfwTy4v<ensLdipZsc7KE9Q0Mu_L*B
z-;4KnxjYNU>5Qizx<+ySJQvNcFXB>P;`#jCaC~8zhXaQ@^Qzvr;2G4)y<gwQE~kT_
z)~U}2y2OEulQFFpmtc5zDYtfrr8T!pao52Fp~L(Nl=8fI<I``fdGrOeyebVCBFsHU
zTG_+n4$v^ghOV5X$>aAu7Og5?MwhNT3PiJ+jq#XBqRwVopNO3W4NVJ4nnop)Sb5p%
z!W|I=1x<x}rX@^H>Al#gVF2qlk0yx^W6&x7KZx=OCAQ9jGu37#vo)F|&id2Mem~v<
zx?jAA{(BkNJWn5TmN;NVu!%6c`Q!F6gIGp`1Xulj*m_N60#2CmSnN?9%}j~FpS8}z
z&mB|wl5^c4%IwCS7D>!#%5g?RQdsc6%V_gyB}h!y<lV1>;H_*byq{LXTnyHujY|iK
z->m}k{A*#5cO8=|>4jO|Ry=CqHJG<Yf(~zTrhyhaNpEo<%nTL2y`LN*(ZrgLk2)p3
z*`bRsLpz{8H4Em4+=Uq}!6^I56Cyua<I3G}P#!2j4=>c<fzr{Ot+Z!aEgn$9XXDDd
zzPzA31D-URaMzPd!96^iU$0lDW_GUp@83%#!haS<{vFG&+fAU`?B_5yy<({SF`Azy
zHvDP#BzjI@g@+q>L7BY)&+40q&1(-Zw}mxi<?B&6D!Urb=v08`fE0)<x`IP)NK^CU
zbExBSvG|XPGT)P51}=-ec&3n}TNdBSjAdM*)>)okJ3E|j&q-t|E_a~j#w#m@`T}&`
z(nHpDKZ6Kk4|-?+S}rT?$o2IEM|jLMs;VpG2q)`8p_d(AHsxez{VX21)1B_CkfP#_
zAX=Xp!&hpL#j$!<sBc2HIIip}?ce4|%_Au_l2}3Q-o0i=a?Gi%^nKiHpCa~~n!`Vi
z>W0ounLNnDi+_#W%O?vxiug6zfa}b-V}u@+cC*5~ge6q-R3<!;o=2w48%x|WCHUF!
zX~KJo<4aGq3VZHPP?@Ah)dD8Kqh%?iqR@|qE&Bp(I}=drXeh3@+YRsMDxf$-gOr^(
zi<_rBVFL$R34ZA9;2+<Ha^~8&MY)7!3SK_vi;L;A!96I-uZ2@ptLS?d;l2Ns;|*G^
zVE*O=-V50X%~EUm*6KEVrQZbM3my3NXF@j6@D|u#G2o9b&f*9EY0wovXVLa2v)H!i
z9Uvnx>+6mMGZwv`Ke3xYuB{pcxt?+0*bxOOKelrJf!=Us))kN#@`BuSK7p|v$MMF4
z4Dkiw9{AL>>m=6i5!0Emg_-M@V8q+)kYTRK2kPvBz|rk&_MA$=B{+tZD!1XlUz~ne
zXu;Pl4W_RLN%KeB<7wUVGq^f-0RLk(l@_!rbM4wu^oQJ9Ix!}Wzt$Xyn#q3z)~^=d
zcJ@E`dnAwFT)CHq6tzIo`*--LP6vISTMAu<9NhLwl3ES(6|FqwL{IIt;C(lI;hf$^
zK4Z!aanPuCYlWEC%&9|@1_)=cx8L`YV!@#(6{E(hYKmasT6q}Y6H3hgXy9jqtMKP?
z8!S`5gSHd(Y3RIU?mAbIe|+f4ORq%Zzj6~;m$ZqyZyw9Kb60@r?soLF(&mBc<AlA7
zEM;T=k>~@#c)sE(WK1*UCVLyW!3SBUJ2?p~j`@I8Y8EG#Pm!)CiI_5F9tIx$jS0&#
zIKu&SmDdCIEs<e-`h0AbErW03Xf&9p$i7ZlM^?t3U?Gyv$QOf0DAPZ@-q%#%az`J4
zWv|T9R%Z(iwUUC~C}}b8+6xNv^D!vNPDH-U##$k>p|5p}d~X)++S=?{*O7F*yGmB*
zsXk@?HDNg5Ni!B0*@NYVS>Wp@i%RYzn9a_cB+1Z}WF;3ciw}8h`{itGm>Wp~jTK?>
zz)hkFHdEoy#eQ~MN|9AM3Ei>OG?bmX7HUTY5$}NY*u1e4i<Vm9p?N{LQPTk@=L}&Q
zqZ`EV>L%f)3WPdO#7S;8Fl+AxsFa=q3j>dWX!AI5E1E{c(XC`_<{I3$^bq+M*&>Sc
z${;hg{$)}NBth9z52NHXAurbhT4q;?I;Wo!m>);$+h#^%6aOZ@pA(Dz4c_8+z6oTc
z-5VBVvbVnINx$IcFb3NV*`nUfs;Jz&4I2AwLF!C0=GW>$et|rcuei)YW#hnh8KCm-
z;W*!N64Wi8hx-pJVSDBrHabh#b;hhi#SLXF+r<QbzP3QDU4-|FGg+YWc8EHB*gERH
z8r<-?$js7ziZ&e(xFa(j5vM7>Xx@F0H63ato_BT;p>u-)G!95^Fw|v#W4aE~qV;>n
zLDB9ZL_IWu<ysZ7OAcKm>P3XO^VlkOZ;d8;1`w#|HDV1umqlii2N7#<#;Dv&*14I3
zu;1`8A$Fs|JVgqlqGpI}r=GNSn6ig7>Nd0NktIxGxJkYLbz`_Q%pYcIR>HpmN$X$A
zhuEN%{p6y)DV4Q&g#T>R=%cDR5PSYCJJqUARh^W@I$sca0>{z}&quibS_4jWjKXQp
zB5_HfKa8z92<3g<IISj?_R<WjJ0%5vs-`?wQkn!+A(7~jv<fdx5l5XqD&iW+@UwId
z+NKuOXYHB+${SXJ|K>5o)zca2!8Rse=nS6cb;0+T;6)n{0Ub}SlcXaJ#It7>gbwh>
zzlM&G(Ih4xo%V?u96Vs)M?)H}x0BqsKOb+uR^%bKjL3XFb#8I73EH!TT>J!qySPmZ
zN_8tSn54n-51)wYAsvYQ(hENVo``1lO4AtQk9gR+3WXFJuD*Bzvu=cvs|Gt*v)v*H
z`F0gnx7y&dx`*W5hi>Mp)XSb7iGj3J2bghhG-UXnAhtiI3v*7jIDWD-zPUFA6!wc)
z*LR^uG`~<3ldcRU&jgm@(Ji3qJ_4lzW@ExUp<h+`m>EkXqeSo-abf>(uoxDP&7Yj%
zuAd_;r$J<YUp>0k$KpA2B{<|>MIxQ=khFsz#h06tp<(eQ(id8d?nRC4gv|+YLT0?k
z(yWg?&zDE@l+jpw!3;#bRj|hBIa&O^0W<41;}Q2p<{+CPocDjB-{v+rvqhTMJuE=g
ze*^gAzR7rg(F^#uXb!o)@DWLioW#9Nl|=sQ%Ehby{)ByAYjE*xHPHBGOE>Qn6YaHy
zpcFn2BE_FsLDM$WifIPha7)neRiIxynsL>tR;ahR!*4b&<ma09^AA=TyxPHrOJ5sG
zAH)yh7QLCgYElm<=Pl(mdQrS@y$ViE4W@g7htpSw%J|p}Q+iC(leSog@c5RmjQHw7
zp;IZuK6;PZMXxbOMVZHVsM8hA)okFn-8^Yi5^lJ@9VRPm;x~GCVOLlgdAwd=qlH}r
z-J1dYMC}81PbY#Iosq#~p_06%CWrhdbf^baPvfQ@#;|ko1io9zN1W685+(}YT)CHi
zyfg4R`4T3~+Z_%Vy0Damx0Ms!U0P^6(@)%Ms)OyQ0S{lvLB2!*@pwKG({f`$Vf1b0
z`d*(r`|qmQ_1{s}b)cJtp0q*Vpfw<2TU%QcsRK9m-6o>xvZ&ttSM0&Q<Ni<Mn4OOT
zpPRJ=q!TMKHBTCkUtCI*=9Q7j`(xn@>t@q!hQj`jQFuXiG9=31hN!!vxkKV!@DSeJ
z@5gV+bc;evi5pIpUr34_OY0y;!kR41?YDkWqX>7m?_&~GqiEVb2Uz*y6vS1G;|=|U
z`f9}s3;<5dTN==L&I`<P+rap<&BS%n6mYE`hi_#le|h>7(7X@$bI)@y=%}>5oM=vO
z4jBx8@~=VR1zCEkdIocHl&1xW?Py-`@NC%IBnWyim-h?*?}n&95N}qA#Mcju@Wiko
zsQtPeb_;IHnDK95u<1zRA$5_RxMKrbzNGTao6m`qU!~MHR1Sa__3gMLCKT;*u98u4
z6@)vr!i^^;baa5b;1OL9E~Pi|zy4(G$q~5eweR8fO)s)M<s(rubKnLqGkFGHLivZD
z=)PO<%q`l*#$|+)y3YFo55pd&Hhg62b8djbZ^2`>!;`B!)#K^FObVIa{Exy|oT-ul
zqii#wlljtq*+aPf)j)i{Vl@l-IfbYDdhid~pWtp~omJmaYw*$;N!Ld$f~VKt;d<+A
zY&)_U_C21=y-U{POD*A9ylkN|(VHGGub~g8PT(uY6rkC_bTI!q6Vji}q`$3|`0E{c
z;;>8q#GC&*QWswpy8h@$cv7Rw_jM;jW!*J)aPIQ@$@9je$7nBZSoH??YhGjyYOOf>
z>?rDL8wrg8s<hS93AWDE<ugML<IhqFYOrc0oxWu(cdPZJmR~Qztd9vadB7CDY0+Z-
zN8=D2>X4%elO*Xg^S2l@>9C*&TTEOhifDR7FQ(U&V`)Gsys<Z6uV&qW>KU^|LG$D3
z#5-DiYsoO$cXl?~AC?D2!(R62!vMT8)<Bp)$I{$Qf3fu2X&m5YNMvU(;X6+}CetJr
zi!RS=VNF+;V=FxleZtPT!e}mUJe~kgz8po<hbOVx;52FWxQoQ(0T!!|g2C(5=$RGO
zM9cjYX;FFt<I*m{udIpeQvOktJT;x`jnJkK2i?Q1^$jHD<PdI=a1~B%L0Gmg8`4&-
z7J7iu7<NisG+yX^l}GCH5ut<eMcYlx+&&rdD^1Z??-~}rKacb0q`+z=5zl%eLBHS5
zWg}Z(vQx_r6ZzRM@XDNnnAU5=A)*>2ge?5#l;`9^+HcU9cNiTtJ_uR=_u|=~m(#(U
z2%eXj$KSk?r!TFuKzrjA*i@SXW2|L(i}_(_tA9!EZJ<#1qE~#dMvc2JJBaq%+MvO5
zD0MmM3RPX!RC>BRpVTv$4tIKm!6KnQulhk8EhF5Sm;57vq7syt<H4U_G2{1spT~)n
zQ@|`R7iR35Nv#wX!v~uaFld>;l<$s*E&JEg7@=P&|8_94`%l=#<I>nTa|Rf-XEUQB
zPk{%biAs42=%o4#2W(3Mc?f2kN8Z85FWrfgnLdWBQ=yG#4#NJZVl327V^g>1u=oXu
z<mn8-A0E1g#a`$YKNt8*cDqy9+JPEy>f;!ci`ak$7v)$@-UDH8V2M&h85VvTR_|;r
z4Tjs+f|7iO*m#x-*?C5p>BUX2Uo+GXt+$(y3;p-;OtlMpx-|y-zz0YdqIJOvaqG(j
zRyW3ugv~R-IcxU8hWRI0;*dF*-qc8Tc-#=nk1YVx0aNPx>VL2^?<{b8{v233=P15U
z{(*z%2B1&6JGM=8!zSV0(X}gt^(&2s>Hr(m-FO+)8>Zm&JZUE5)<bee-epQI$>=Y!
z06fnM^Ng^wywEityF6Dw$h@uCaxa5eZrDKNAd?;Pqu8Fh3T?~tMY)mNNYHKrxRkJ+
zHB{dtQ6<sVs!cnf+V?AS7S4vccIsmH4Z^M<JDW{BEh+S;6G6MSiPW@?#W>?Bm}XT;
zw%&|@6+*_{Dep0J-EoEt?Oc!bGv`3Vr#tnE-ezbvsfw6bRkN(U1EDD3q<F3RZPIkk
z6xJTECs8%Sp>*~#SQ8$CttxS-dMFG^Ov}g<Qzhs#&_-L?abmgbSmIw1BvzO71>@1$
zpe)?8#SL2z6&J$L-Twtsi_?Xv7ur~n+a6N2?~2Iw`zPYBR4%?F=Y{q=6QJ|fJ5o7X
z4U5e0GDDYAA&)Quw~eK6Zs&QnYA$ElQXA;e8xka}u?%XyYzDg=9p+c?h8&(D^nhFo
zxlVQo3Cw;>9=aC5?6J>4&wVhD9;3^xu6!iMllL%{nNl=V*bV>LuTEti1hF%-qL7?_
z2`<C_uy!FU9W-VKwkWM5kLn4&6Y_==n~pJ6eO2&pG!l1Q-wF-4KZuS^jzebnhp5ih
z1J{z<q%2;8yb~EgfwTu!)M}yxX%Ib0n2V-D55P%a{jDBuf`Po7?D#H;(I=JoOSvIf
zHTora^!hT6UndQfWF5%<6;Yok3nW?AVC^;@9(23%tjm*;UE0DF+74oMP%<3uT!^(X
zju>q_fnKyThll%&*_XL}Y}puLF1#EjYE%dkIz9>LtMQ4cuXP7~lUx?J=K_0kWF?q9
zRfQx4ZD!#iLcf~=ug6i2)EJHe?e)%3Go_O3oDq)!vdi#e;Tdvef)@Ta&ja778$i**
zNYU17o@n;Fk?fAt!Ht7-@ygt6*1R=}Bve+QrSEW<lOe*t>u-~#_o~R>8+qcT^SX)k
z$v6=9^q~0Z3pp!fJO{i{hl`7TiyVi(MZI)CQD561vc<L+T}{&KKfm{8U-J`5bFmbB
z-Xev|1;*)f$Kzn68V-d68gc(Ih7Qd_&gIE5GUk{q23*a8;{5mIh3OF3IrAHP)Spib
zWL~l#`V}y**MzsX9wTAjE-}Lt_xc5M#?nJ0{pmW@$pSjWR`g`?7#iz;m2FXv=7ar<
znaz#QxGQTmJ)3?KMjrdiQdEp+rfC=X{@svH`W}Y{{y)Jl9%0yXb3Wnm4DP6T2{+c~
zuuYb0sKsF!zI;<Tx7r`eTc;_~(NYey{&f~TF(C)7WH?BTb4R~)1z1`;lfFpU26uN9
z(s6o&XshiYqB2y0*?TWJn>aIrJnObWuYh1WYw$Mu<mm<)?j<;Xugqe1(&y0iLpN}(
z{AUQ)%kbAe18SOhjySxoWGmH$_ta;?d#g9|&=otdimxDYJ`12-em5(MNP;)7>lo|Z
z4E)71)VJv)TVs;h$i^7>si21+Yu6Fei^BZfJR5VD`9M{El2}4L!K$QL33AsalMan@
zZ2RZq%t>b(Sk6_&q{?)*-`ftmb<VTh;dZD}+eZc(?7>0BAHdJ843mX#w*QX?oG$RT
zBPUITr9U|<5ijEIRtq7{@&?vhABUg;U&MBr8dTQy8fqI$(NnPl@puk}((lP&KI$+Y
zUND^RP<ABYKc!&}y~Q4B>7i;<4?FIZVjXuo0k117;tpv9)1lfJrd$s*KiKhGQ*^K_
zZW0ff*<=+^6GbO9d|?j+{p$O<7ua;WF`Tby1(k<C@WdSj%>JVfyUsr%@@KxV>iLHu
zIjs@e7Ul@=fWfZo+sOw#cWSUkgC8^Kf}s17ylu@%61DULNmRTAOWP8`s=x~lhSfs0
zTQ5Wi^T=xNb+EHEpEo|Rha^6nTZ0JJ58HytHd<)us15(coy3F(cTid~nNQByE$~bJ
zuzB}5t64DzOh4_R4L|jHW9&dsa53O{V;jlU*et$U^a4M=ol3VYJ3z+E9TPiltH9fZ
zYU1m&a_ByR&3(sVA_=rSjV}+KBBsK3ZQoTL*ng;ly?^A-k0;Lp(*v<WC!<v~L_ZJZ
zgpNYMA5D7r<V_lo(8!d+wCMNdNAMzi248Y{H{JMiIDe35Bi!#uq2K8V*!@+LYxk~0
zw{|%mHhKuP__zyi&l8-5p=MknSdyNR+(nO#7)Z0eE`-t*y1cJ>C#(#)hogon^7xbi
z{Nwg^@g3hG{L`#RoVqoMRwo(JRX?*?PqZ%gy0?lQQdQyCht`wd<=eqKwcpy2TF}U7
zpf?2;=<`qZbh^`VzUK5PY`P_nTkmIxrj3|P6RKljnUxL5KTw1jKNRu&psU1mc)xIt
z+FSSEJPZ6$ZAg#lY=w_QhC%ZsE!tcyFp=uNvH8;H`O@MtOuDjzA98ua>`$4Ar2KC2
zt9lifm*hk{T_xb3(<-*+fDJ9l*v4<%(1!E5`rtIvh-n>qhQanK)Gy*26v)f+fe|J3
zH5>EcMo|VtIfde+Cq~?G_z5=U;TdcQeoiL3Ys0MjChW-f4p3VVNjJ<aAam`8vcgaV
z+xk+tD)9Q!{&S;EHm!tzs==o-b0Nsx5KpVFgnlh!y1h!+A6dlHvWha%dZp>yE9xSc
zZ+*)PzKn+tODD0ecfUdT*bi&NNl)=BXyb&CZ7{BHA>E{=!nb(ziBJFd3Kdlm@M0+8
z_A{4Z+}~(!k{wPz<m`ddst1IbZ4CE5YfCT2)_|1YH|+kSL;KquU|M?#O1fLXcF|b=
zL&i$fH(11f-mMh%M2z5=Y=sNW?9i)nBDXKmp%z0wlfyo9X`-_N4bvD-NBEzm#_fa7
zP1?Hz=DhC4=e90HdKM>31=gqR1VT=(6I?bXyTpf-%J|h|@8I*HmpHUy5Z#$G7S;E?
z#7iE5|2KD{e9VS;)<|-zMKZ)l>N?UbaU$v8CNSodGo~5OIJ@gs8us*T7tLMrTBIy}
z9KC0)g`rsn<jj>O*#6lP#`XS(E)Orksfd}((8eEj1cx!HKRd~rKnFP6S&#Eu{aH}^
z0#+LSfmzw7fx`-O(HiNk_%CHNY*jJ9x{E8A%9(D`*Ji}VR&8PxPMhk(#*Kx~za7MT
z_8uUmCcebZekv%|siMEHJ<dF-3|m?RCZS?7p2_wDqvHFb;3Wb}Vf8GMp_hs8C)Yvb
zwrm(KDktKdlkl(c2vVt1Mf9u9*{ke)w%sy|RGv(LqjnG3^RGLwJ!FgE<?CdpHwygn
zT{|#v!W8ia$tAd}u9%TA3Fwk%ivD+;#IsA)Q1w(8s4tSm3m(zfkt2e;D9vn3LdeBb
zNi140PrS}qLU6a*g3}8tc=5m$)vgGvyt}<bAzBV*501t4J5u0_;H9vRdc{88F~VJw
zmf~<9Z8V%33=NU_<oDYSQe=8kn8S+6LrXm&Z>LMh?bRT$uR&~fH-~Ke{f4anZvseI
z`-pv9W?+RzG|1^r$IdceCNE?ngZkCrP1;iuwIie6@Om)G9y0{4&31-ehU+m&__^KL
z|CA`WykmDPz2K&c!16i0gL%J5t)FJN7g{5X@#UyBcuak$==cFe*btozojXk6NM<$u
zdYTM9PbR?Ct>4)fuMtA8`f~kV`>#aec>&Z{sDalhLp*mc4dzrU6KCUAajpDH`cPUQ
zZqCT2Q~X`H-pqF*6)nNb^JO7~UB3$NW|tE_WfNY1CS=S%9snnm`TXkb2_Uy}3T1l)
zCf9r&>fx+O)dM8yuRGKD%ZCa4>Bd^r-Ma%-U%PNRuz_*Q1WbDA$U3e>LFm1HmUhAu
zRfl%6dNpUfARdepTPs;;$3}d*U>L5rrh`S#cayT5Q>_0?7inLc1j-k;GqcEGwr{!M
zi@f}mY;8;-j!%3AZYvi(YfurbnRgElCp^Q+=A+Sc<U@FH{vLZ<*o9VBad0zqApPE1
z4A6WGq77FF49yrUzU+*)zFN$nNt4_6e!|1TT;n-B2Ui6Moy^Zhq-$rBNZx-7`z_8R
z(#O<koQ&Xb4w{C$yoKGOjvUdK2nWMe_t~@SK_Uf#0Te#RglYTDfQr}zfn`4g+bbi9
z*Vh=F*lB|8ul?Z4sC8(YH;mbNPsb;-mm^tmo>|ZY7#9`@&Yz}%@ewIhZPSEFtCiT_
z3A4~B|CDH(LQws?z<6Q*egPL}&Bk-~8&NW#2!kYlu{rTk@N{xI`!+TK)TCasjaQ>s
zw4cCzUf+l2xoR-$lM~uJEE5gi@dL+-qs1~0wPF80ck2@`y-7xNIP3eK%8CXCK)<UD
zy<NEoy?ozdFds+*p9vmVdk%*ho(l{Z3rJN5o~#!qcC$s${bS9uos)RdqF<P*C*spJ
z`f;h!1AL|Q6gs!J!GE!?qMYzLe82l5+;Wp5svD>A-$EYz`2IaY*2I?c2`2PI#3Trf
zp90@z<O#b%MNYz2)2CuB_Et)gKTm2Scau+$-ZX1|ij=e9w<dh&Pz~5W*Mi@zO@rH7
ziTs3u6B(E_5T0D}=1*sQ15rjEpW726u(wTV@?~}EY^Ft*j?|?m1mD;sE`x(je&MnW
z75Lw^W-@ennJ8?xH_OtGB`KbAT=#PsU-PS;e0qEco)#H!a~4F0-ygtb>N?rSKj%SG
za5FfVd9i0VHxi4Z!rjKd52R|A2Fb3Z@afBBl#Y&pmTex`a_FdV3OUYh{GAIDtpV1K
zvc9;;Z6r!uj~AO5+$XllpG7G{;(;HUiJk5iDCMPx;paDj5*Kp!OU8@;ZoLbiURB}v
zSyEz)X?Ym=Vjq$1b7oUUHetZ0!6dp%n#1n|=A@ejHW4TBu+Xol6}}%<<AvSfwh|aK
z;UM0>^^mM-9)U{)(6Oh$<khR(2_@cj_~+XaI8;+XB2Q}bUbj5Rm>7g^v$Al`FCRYe
z(qKHjZWiDA_Y3wZM?=7mXm+NcrQSQR2W#VRgW|;?{%@`<ZN4^!I31VckzbKft#_>d
z^fH)kc@?aU10Z>-9sczb@j#(7EiJv1^bf7VllC=el#@rkwM~Q6_--68+zZuM$be~F
zvS@-xaQhrR45!Z~!pM|gII(C0o=wez3+t1igmsge{6k>3^Et-OHHCX!;XJ}`D89bg
z!`i}AAwu9H*1EjLl3$B(&}|3)K1`9ghTp?n`E>D%N`c`kXNwQyZh&Qg3?CY@h$l9T
zf=spDVCrCj8G09C``-aP%V7un7;J<3^R)Q)kwfT!n!E6yp)Z{)aF7z;D2aPNA7HIb
z#ay(!2F{-!#f^G8aDL|s^xCwZtJX{MeM`!4tGpIB`4b5%$E{(Cmo(|NvJvzpD`kp)
zv2aVug>KuM2CMhQz^Zy#YH=<aCiLs`+dZ)m`sJ9IXSNAkkJYRvC5+yvyD2*RWC2aO
zGMIPo9>U-MmZ2Z+2Jrl!sl0sjOFWbGfHltNFrYJ1K+>E<)^myPnxaZu4lct!uNqvp
zXC?GzCSqlgF2A2?OoL><;*BeEJR&WP?-TB(VWS_92;4@U9aiC{L2t1<O<*8S%jd!K
z60p}MjfT3*(Mz6t$@WE(WbCd7ATjedgq}Z7(k(;i)8<ih!HEKHJVO=l#+l*CeP-N!
zqB1RAHVJlZGNAX{55m^AmH6r7N%m-SDF(gXgMRny;YP6+1oYSAubdG=Kgk{{N;Cv7
z-ch{h)kW^9ydo#<<G9b+b}al)3pM51U}490v1VT=n1vl+b6yLc9hqQWH~6O2=NT*U
z%U27QXg7>1$ET9*jt}u?-84SxnhlQVU&D=U6VUUJ1T5+PK)SVGLay$1$oV!HZ@kyQ
zALk~E$Eo*#*~*Xb@5Bh!R6doxsXGLlrKUjXE}`4DC<$horhwh_z2x89!>~nJlgoTf
z2WJB#T4LpmJyMdOV6zi0&3lan+<^}K?*vSG@`Ln#mf|+{>h#3AE9Clf2Y5bXKDixz
zf!zKzUSu&n7hXXo)=m+AMpP>CPx}-$N$3;oTDseMibpIyG%dsP-*xH1x)b=~*ApQt
zwu+o}I|YuLD@c{eB?!LlBz}2kCYzr31}m<7#QVd?)3W?flDlCDA0F@z=QBMXzs#N9
zT^LGPvn(Itw+b}hUc=wYsq|8m9Nd*W3hA%BSWnM4_Os0&n`b7$-w8vYZrCQ)a$*gW
ze*KzAtxFIXC$ZRNW(FyIF)H_NB2hhk*5{%XF~j6BagL9JFOCMFpcOB8Jpu)OObLsr
zbVbAcElm0Pb+TH8ky5K0>}0YcT#Yh>4XJVBrBfnt+^gyMYsv*8$~C~oHKxSPG9SrK
zGc3(KjT-VB;Qn+ojL>*be(DQckdqsj>FFbwJz+LGC#4{qnf)-*zym+GXyO)~g{U_+
zLoEIMJ$a@3n}yy?V%HR|krB~<nS}Xm(T1f1!SL=F^onR_TDtk5zCB&MVs8#wuN(<m
zuDgI!Zmjs8;IenQegyrm*R#-FXTZBG8{2<J!KD#b*~b20(DYd&E{G^)k$q=K$p@j4
zv+$s`|6>)g`VkNK`>33)weW;&J2^O5HWvDYY{A?w{cNp9EHT?Nl(bel!|L`3kWhbD
zFEwoz2E7!xhBc4tRq0*k9~BQBKPm{R${{;KqVUTH6P7#HjuaK|7AtQtASH8CSVxP%
zNek2#_0%civ)fjpjl0JYvi3NM+gm|uH_e2pV<+O9P#<>Vhrk41qyar!7Q$xH2#7NI
zL*A_`BQ^WKll{9>p~gdC{wT(f)(bkY>#s6ySBQfx&pk2t;e&d~*u5lZz!406F$D}=
zMYw5i94gw3MswR(T$o)8k58<`zQb~`HfJQ<pAgDIeV&ljqkoXm2C;a)^*mFJ$siN=
zDMLVH6S-`yi06!6vDibSP)=R}Bmx@G7Ae<=e@>r{CTrHgptGa!&Gs^KQ%>lCA5ai|
zDX;^}n)#rvBn2e4hv+U@2ihvGn7cre{k~U0w57CRv~VY9n=B)Kx8V@E(-e(WX*=*s
zo^Y-gI=n#x63IukQ=;}ofiPMr0AGAk!`ACDnDTl6dN#je`73kC>;MD&wq!Bdnr(yr
zuOG!33AW5N!;vk>cu$h<-DdHPk(e!4PufT6fP#7f8T$J&d)Kj>DF52Rf>IWeLkspn
z-vN2iu%`#HI7u0lLz*y=DN-|)0X*ci99hxahN9Yda#&;s#vLJWBQYIZ%%Z5rKr43E
z2;u#Q82EC}6xE+w5H^S7-;NYqd8`Twg8qp+iVE1D-}~{K!eLes)XX9+I+?Ogvq+-n
zU;Ud|mzezN%j{u!5_m?e1OGXbiNpqbBuUaBaeV`e+&2jP_iRJ8ZPRgTE<vYd6Irw2
z9Pk$n5}kGk!!OfkL3vvtD;kl-j6+PYaLEJqcd4**x2+*1{63QyH$|Ku6oE}m=gBDR
zNvLe2%4)!wr7Us>r<{J0^2G=xRE2CoQ$EXgZeVfB9+0-Knv_~}v!4Ycq370H=8#)V
z6wi(VT}wYS`)0@VW+>AKAHvDn7~w4WZY128wu7B`t48<tsPczJg-q?UBQ7|wlWp3z
zlODO70A1k=>xJ(KB%2P#!-6-#XOafa30W*MPpcy;-@^p1O*Q*0ZA4BK_mW9&`_S*d
zY3#C-4QMG|VKGa!p!`J?_SsaTms=rCEt<)<yJzrA3ajyFstZjG7doXK($u1AJU?(b
z6*u>I;>FkcuyVXKJ#gtHe!f%z#nn+T>+Kd==r$EPMx^sW?|Rt$;fj2ca0b07u|hOf
zKNceYJphMmmiSF2OUUk<(yyA0{DF8KkDE6V<_-yBH<O0I$T5k`y>l|#`{^v&R_y~p
z^~JRUpTp&IR{USnG;;X5HP=`-pZZ135ZLX51P<L<tXbv;UCZTg?IIO+ElnO{GYF>r
ztR%kriBR$(lu1N6S)DF+#KLwD2zev?`z|RW^|hK1a9I|AczL7Lh%LBFY7=@6EhRoe
zU+c=?1!xg87jMKHvupm9>|U2IhV^h5-zQ|HTWlb_ND>UpbFp>ca-yGn1iv=EKs);f
z7`L~L4f&ld%IVt9T=lGYv!XO4HQdMQ^Xm9yus<4T8i@0BKcMR(!IvogTlC*~O&B4}
zW&_QaqWSoXFmd<_SXq7$;<NNg_=X%DdO45GtvHYVNt5`F%lSetznZMStHyNlW`aYV
z6Rt2R#MkmbKl&}CzAuVVGO!(X={<l8bx**u+X;5M%X4)rLuxu$Ox_tA(%G#gSpF=N
z%2t_?vJpclOH!oaLXY3GYbPr^Do<}eP-S|eWc(`@jxUj8M5eO&@ZF?~P4^hWbw*t$
z3tXr2zRn()XX=1@`L?3gCI^1>pBH@ll!)FRyGcu44!h;`30teLQpvTGxYsKqVwvHB
zbHfgR>an4;V&yb0*%isxJh_VnwQt1XZ)50^b2c#YZYuxdbH(Z#8Az=+ALOax=P>5X
zZ~F9G4*pbi#6@H&_|_Iv<2)O2m_z_w{Io&vkPIW!tzSZ<Wh2v!Izu+i7jhDnndp&v
z1y%b0<8P;R!JTW(^$!vcz=st(F(%T4E8P-Y=0|N|pj8Lf-&u}2#G4npoCRw141#S3
z^5N%i@>?nc=upE!w7^Fej=g$|zmIBgRZyayZ83Nnu5r8X0B35fnA_@1xSzZcf1Gfo
zzZz6{c(*F9FTDWiMeXd2YbSKvv1T4O8>r1-3+PQ;LDfBk%)|Y6Q2po-^|a8YUI&$^
z&z`}23G9KpkMyW@^HWlKG8TM0g6Vlt4SJ<z;fRI9*c~A!vc>E%UIJ@!Ytjj{oqC0+
zdH-RFm*0_$<we}%q!wNAX)>L?KNpwuToXKUoA?{|JUZjdU&zv#&R%=nU<<3R!-)ul
zxcNDBV_+t_9Tw3COXuJ@yLfnWO_^(+DaGfhiP*HG2kg2}<KI&=_@=jyczeqdYWM9d
ztdVyHg(Pp{XtReT+*ah*X3wOu<NlIqYR7Rg*W$mfR`6NZ47gfmEM%NifW7i+Y{R|)
zezo2b>a-K#$G0PRMy>{q?k~Uv=K1xV$-kgldkJ<fGlH<t5;E!SV;p_z37PV<9HokG
zK*`^s{QO2+I2|{cPpzDc9Cc~%$3FbH@(NWG`ai?M#!+MO7TRfYmNarp{@~Ui>Sp0b
zd(T|Qs+_NoX(|N<>onlx>*2KhS1kUN98JPI-1x=!A>`^OO*%VV7q>~9^A%az5Gdrx
zmPieQ@tYpw)y=VRx%L|*s21YPcS-p2`WM(gIh023^q}rjWoc?i2rS&+4Mo=y;6R}&
zH9we4)n$J}iryz2SMdkz3!}JqzXkQZr$g62w5F9e?vpmTmpE;JK5Prkq@zQPsrULc
zI_1)EJQ6+}U6=nA%^tY{|9+VPX?r&UDM|o|r%_<RtZ;qZ|GW=fWNzvk<}`Z{N^DqK
z-|{>b8#GUfV-&sNVBbiz(8yx@h2FwMV+(X#|CpKW+bL=ZJR(w!pA3sOUuGwz9*7pr
z-3v3M(r{(PLm1>6jMIxxVB4a>plp;xGPI9~4hAek=TT2s)i@oDQkh0R|9MS}JA+}#
zi*)?vmM-MOqOgDWDl*PH8V_{Kb3OIHqLZUO;*X!p$uqOZBDts#0t<(tlfND*+%yAf
za(A=l7Bl=-mI`un2cyZ6Rm{J*N4(%%C5d=87}K6PFtgG>qOCtiG4A@5wM-ItI-{SE
zi|HxIQ<Pwl)O_?;+=>>OW0-{RYU{cm%}lOr6}%f=O6o3a5c5`nVRlnOY^!{`e%IA;
z*lzKacy1X1^=}j5?q6@{cl0D(dP478$^)MzZ6Xy*s)z-5#T~oG3c0Zqp$|A2t{(8f
z?Y1JgbX@3E1uTP_)uwo@BbH@UE5Z7=%VECYhiTmSf!P*5tT!u&AzySNLHB~Mc;cfq
z*!fZk;@+NQEe<{8jZ;5sFTO`wgC;>zQV?q>epK(!HiET!Mxc6$4ry<UMaQaST+Iiv
zv6qbDuh4}n`q9f;L%dK&Z8CHBKZ}YvUr6VYuk1(nTUK?`7mxJjv!<F7jH?-nHXHWA
z`&bn)%%97$h1~1px+c8Yq>ul6@)`M@$`-6DWDAEL$E&j#TV3GA{8J6ZxjU)|Nz#FY
zLrUOk6bW}_hOjcT5s-gA44ZPSplHs0qI}#3Ew+@CwGT$4XIVOFHqK?^&c{KxRsy7r
zABHUri-@Pp2eRpE1ln%@EZ#Wbo!HWG4Afi6f`8aUcJ0O==DPEw*gPf}t5qX0e9l`@
z%9&2)`F%S(`&i)I2>HW&`zxeqXRUbKaeM6YzQkITC$j8+lR>vUUmR4j4$8X(*Nf3x
z78T+H?J0kWg#R|HhSqhkLN^f}v{=9$;|>VW+J$@X^x*!Co2>ZJU2JRj!acrfv|V6D
zWmd_eq>w?`d9fIVJ13&alac5hv!4E4Vv6b71vv8EMK)mAApZ92Tt0|;LGS`GoDGV@
zY3rhJ{IZMq?{YZKwX?_YvM8pmmq8?k`-r|pJYwp<0*JX)GM?2@!k}^A*ygq8*v8i<
ziC$O=UPvE}?XF2s==O|BtZ6yhC?5^FkC(9Zjw8X=Xc_d&nquUS7_j{>7UXT5Af`nD
zGPKUJ^&}OCl7)~kEDb&>Qz56iQA8%}A^vmph=gQ|^=cCUC)r@AToa9}-$g?9*=6iW
ztt&YF9zlYf-!oylfa>BEXghkCxFygQ`jfVR?YL;`X6prTwXzDr^HbT6lbx6+Z;hkk
z6|iscVZqD#9k=Bv;^eumd_Q#IN$W9i<H!ur|D4;Ce3s&PbxT^QH=YK^y+otWcHrfj
z&ZGvV;igm*SUM<*?QI$cE3Hnm5}jn6->(Tj-A}Vy(x>>-lS}BJd!x9N;5c}^ei1i}
zz6#QN7vQzJUPw7)%I*8~C`wE49|s?!L}xf8m`{gVD}VA-$%EfNK<Gu1&ptak@()8A
zN$z-Otn;Y_UB#oKQb%Pf4joS8#Ywbdbu$dzS;u`#me4!XZ6M~K8gJh0&kgg|!tZI8
z)T{Zv^}moeIQ92#`ml36jq{M>x2H=%N=GK$=~GSvPK*NMlpm1U;lpmPegGEM|JcFu
z5vU;{Lfvoc!7|N~nCV<5HA4o#M&&PTZPhFm{^J?3y>UdeyJaell}W^RZL#F-G@<9b
za}wm1Mv(_M9I&YDkJzi<Q1BwhVdtB>%thrtCa>;_4nkH+`idI98DGc7>2-m<WC?Ei
z8zO#|p^vFwPQusMTg1zM`s13>|A{hB2>!yw9++L&$Xpy8!0wEQxAS296ILu1!EBLC
z`b~Ce?`n8DznEy>ZNQpaRiw7~5AnIW32XJ7;duFE5;0<JgnGb&;Sv%O;o)JX%fdp}
zFIgG7a_!2ct4#w|E?pC$5hNcpc(z1*gl5o?#0V|-2yMgJlCw49BXojPlcxuIj5Wd+
z?_}`q^Ii25?Tt}eC5fHB)q)Q7%J?qa2Md4hVGnk_Vlm%kM7Htst>@$)XOG7oVT}d*
zS%Ah*@%;~W?3t1s-nu%o{;h5eo3?zHc#NtsCSQ2KHmI26DpDwl7`ZlLRKNn2|Nj}y
zOjj+B3|+oDLihhYpWgqTZ?t>F7~%O0XAcnmXw24F8=)^erhTWkghc50pR9AFJU#z@
z8Z$3zh9<S?^sLrv*7jr#_2?Z()y+!;(^wIj7tVuqk$$vG*b1x2_|Or(jX2}LP=VC!
zjxRS2<5T3aApY_**n6T3gUr3@yYMoh*;ywLNEK<V*&sCa-#`-8=aSK-0&U6d0~EaV
z!=c`1VBVW~q<h?d<Va={ei^h1wk%SE(n(2>GGB|IJ7|pd!f%8yzjRQPe-G2-jw4)k
zCnk@@SiZ3e4s4A>9c=?{|2>Tj7E&BCK37FGAF@DdKrv`eNoC%nbg0+;MRdNd9bR91
z7!P>4!lyn5GW)<erX^UK2WHChcdCl?^ueXLb;lvhJo%Ar5*a|ploV>%rAk#}heNjX
zK{Qv~ggw76zzC};(4abjSIgdnIpz}Vz<n)Yb19H~(&WH)fgx>=Jtz)zQNYfv22}dU
zI39e(pO?s*!Qg4bY0CQbpjaqugv)!O(%?8Y4^R~iNHc>=+zMLMqlJ@i6uQ~lpl5V3
zyz!L)W!p}C``iR?y}5wyPCC4Q1MnM(#c*f^!~VldV2f`liyA3d8H@L$weKI$Uo6W_
z(_hv<j=5;9Y8fC}(`5t$&98#R1uwW_ph4M6X&y8r290wx`HHt~u=?glvG;NWP5F4}
z$M2BTBtwIr8o|-{`G~s{VQQ2qy*0HKUfdH}?KaCWdy5*gc3D8DNc@98O+OeK>_rW)
zv3UGbAndw$oa~CW#=SoKaCylB*!yh??fv%_zHZ4OY6<y7=bQ#T<jJ8xZZ<z1dkQr2
z#Pz?%jG@lo9w2w>z@xev{IWt7uJxEh?PTPr^jj+~I(-zcW(=n8=EouIWg5$SJ{8LK
zE7*?3KUs#$5UlY{Kzo~bav<X)gddQEvlUsGa;+EV-p<6++vkGGoeWrWONLL&)n@JE
z)`Gsa4xK1n!mL*Npw;vS7@(hyA*xcerspkQD$RuN^NztCLxje2k8z9Va-7im15ZD>
zk0*SN2-Eg_y72uQ(X7r(AkwL0zvdi47q$St4Q>J59&Kb36IpbpDt)$WCDj);2wK-J
zz^@P{uGLE7sf|yVq-7qgygLut<ieTHxu5uS#b(jJ^-^?bpI}w*t|2=L(&5dcjqvr1
zHPpRPr>{bnpi=Hda9mJLPHUHea&sy(|2B_q%)TUg*V4$Qz72vYUDM&Rc_C>0P=G@j
zhskb>p**GVJ9@8iL$znU@K2xwY}oP^LjUun*6W0Ko1*}l1J1$NdtdSBXHOPfGFTwh
z7lVa*F9v0uN9R2y;={AQ;dQMm#PDMVpLog=XFf9|OExS(rL$x3mqQpo<=+bPZl{Bp
ziI@!w$;ZL+74XY(JQhhFhGF^k+}*)RfZjE;=Eaw=Hh2IZRB{UxqgDB<yw&{J!gRP_
zFG0UP*awC|WB95ECOq20oh44X2lfgBshjF67<c*}3tvAN&V>7N#l_3PZJsGzDsc!e
zl;6c}&t0IOISX!I-%U0j-$~j(*wS?+rgZpoeL!NsbNs?d)lyj+d+ija>%@T1x4WYJ
zt^KgbZ6X$zbiwOxb4*gO0H3uIG&%pSwfvmvJpM==Bp&eQF(*1vEy@`mw<xgQjs#q>
z^)Ou%;LQmD_3`m%M}{oLhuZ&P<K8py_uv@X_up8aZ0cX{;$bA}d$9;TJ_wV`0~xMx
z#)^B+PJxgAF2Snwb7=56iWPq_rn3V|*nkasd|pv7dj0DZFV7JEKKnrq9+9JWc5AS>
zE!$97Zo;55BLNP|@|fXo(6&^XYmR(W{}Pk&=gE2&_`Dhl3I@}JgFjGzVHM_^7Qvv8
zGH{}l@+_()*xbF*{<0F4o4JUzC(6)J+ia28IUPsr?Lv(q@-ST|0Q`1)Vd?V$bYJES
z=3cXfCkW(~7k+d3Z2ba!nA?p3YovK#<RM&C^9&EX*$NY^l5kDYA7-oAg>PKSL{mp?
zBYRrcQp03xwB*iQCPI?uj;FZcyAJ&+5MWLobHVs|vg}!CH7lHSi>)mv7h#Vvzx|~S
z9W#r7eU_qb8#aKMc9)=vO$8~xLX`A$!drVLz&5)x<igSC@Nc&Q9cEF6I-^zin!Wbq
ze%dH%R#!?Y1S^j8%Z>PVTq)6@)8K`}eh8dd4gubGL@PJy)BoPTXJxtvS?2hIF#q8g
zdU&G@H;K3|?)UzNSCXHj_%X*{aaoWm7zcu#C(u|F>{IjBkuOtKX|>SqaVfK=k7J(T
z;mqB1(xV^5aHo*yFtVcO4~60Nh37>HO|Kz1-47qm`HmLr#*=@k!Z%_gq3#2g;Xt#^
z)bh(=R+qjOhq~nBJI8Y{Fyar?Tev~6_cUBMMKBF&3WWAwMKHQ>2?Y0#W}S_5ASO|d
zZ@4gmE>@P|hjQ)sw&G~4_Rk|+X!S4q;>J;EPZ%yMhR?^t@lMG?NO@X<ZN(Fyb<kg2
z-;n|QWg#59l*E;X4WVX+#iUPo8F7@-uf;aVj1zJGl~9tm=rnN=zSS8m_rx-uE9q4m
zW1j31L55}jABxU9uEzfj<L$Kf9!eVxrBdg)KN5;GWJE$%vV{~45^W7-v}H6z%dYd>
zpM<7{@l_~fBqJguiGI)T|6Zqa>O7xu-PiTLW~0pCR#xrcAPmfELW{e(!130g+<Gdx
z??0I~I{VNOS1FkDpDN5L5r?_TY1p^+C|uav!p{%ije&avh?PSW`y+2HnZ-H&_2)Uz
z?SAJNC(Av=-PMzNNd;hiY!QrwjWc<VMM(BiE!vu^M*DisGp|!~;NVCLtDU4+GlTCy
zh35~k4?g~7gk~zDxBdxYc}W4f?;Qc&+c4hEbE`4m&z_e!yMkT)jY|cE^{{G*dNg5^
z0kM6$5mZ_o=+nALnB8nnpT6z_??8^U`AnK7_DiuB6DqLfz+q^$kt8bN;du4iU0#%{
zEZjB?g417=QL_6oX!I^2Dpqgt(ebHt=>l6!6NrPZN2)Y?Z4z^)_YR~ua7}uTgyG;B
zDfUlB7W6fT;M+S9po~^Dcf1KYkMKeFhX5Y9t%$#G?8Qkxt*LBH0Mu;Kp(Wfc-y-%d
zk_bz7XmK;&3dQO4d>iuk+Gfo6sAivP1d_-tD$uWbAE*613ey+#0{`4P%pGWA8sg65
zVXH{I_;4k=O0)~rlBJ1uH;)}8SMl2&e;i4A3(7n4Vfeyqs=nqD6kCqMeg`ueH#da6
zEf!Cl=T<`5D;6vKh0yBSdUC=_g$6rKWt`;48G)KiJUsM?*?EIY(N`vsje`?NW!*$v
zn<GwRA2@*7=qS&<vKa60pMhqg9~=$ibjg~s&CJu}2dtHb9n~CgAQPr|!W4x+Se^b5
z3;&psrk}^);sPuBOJoZki?ZSz$%xJgbD2Q53-tD2ERGi6#;;@&?3*8re_vL>g<Dqa
z#zX~7Qqv&4yWZnu!%<jVcNquLHp2Xn2`pxqlgxY@aM~)3wLKxYcHKDZr@00CKb3<^
ze`3w54Qc4H;5;Mr%a-gc832PIFMP1-AG2UnIPrY)2^(uKR!^+H1?fJ4aGs+(%YBz2
zlUB_q8!c-1Y3VcY^+r3ICYlZ{V(N^m<{|jLaVmVgP{!CD_zSy})?$Z1Dhg@uLvQbs
z%;vE-%-L<68{tnT+%#VVW0AWkgpR{|UJ^%a-;F6@O?;s_Id~vKmqr(O;-00=*jXd*
z*!c7wJh_+1Z?oj48(U}Koc+KgYTm#*hAJc+rbE+pMUYS$VHU6MU`JBwVZqZl5VEd@
zF@aq8<1au@1nCe_#}POkxrDr~ks`vquNn1wcNxFYJghP`rJ-Z8jE<!x8|v4{JJA1t
znSujwb#y0VxuBhqwn-vh$E85EssJpb7t`g|a-_b|A0KngcA2-LL2lTDepN0<nV4HR
z>M(&!eCS5g4s9h>&m72*fd|a6PeIG@hu|xA3sv3L!biyqa3o!p+_4oPyB5`e`O}9?
zP`oK_t<yvYUn#mE>j<`Q<Qm1A{$m~=y^MdQU5L|LNqQ|P1s;sP!G#H;G~en8v<Y!Z
zW8E6|r<EGrATWZ@xxM0(d^YOrnMCY&NYi6+f&`zI!qCs<yh>j!;`!PQ{Eo=di5DbM
zZ*UM+SG|GoA{`o3H3IkjoAHd~APQ&5W5<^1q}N}Q=w}wz#4>hpc|4YBRc-(?V}EM3
zlsn;aO+eyJ`OJCmeOS0A7E6nk<EML~Siek??A#pA?DdLfcLgxaaiKMEyZ$>yM%cr<
zqI}#wO`IAm=)qo55wg1B8w{`I9A`UD;Gf%EOX-*yZC+pvsZsk-EGz?#a!$5g*9mX7
zvk^OWQ#h!NS(2HdAJKe;3S}2QhtSjF(3muV^aN5|+kG1D*Lh)hRuXDUi%|ZvNK8;p
zMuSJEfjRdL$6uc0&x^eR)&`~|`KS;z&`XB(9zy)?Cx1}~R<a-Dzrqc_EzJ2@pJ1R;
zmTp}y0#8>ovWipAV(g<H=JWYvx>n&LxXZG<-fN|>=A%Cr&d<e2?z88Y7PC>$6=-a~
zBqn_<Wt~H|(1B^~@L9W;Bg~${d98Oahf6B26Y+z`mMu`e>OoDo4!3VxBx8P21vH&%
z!&J9d=rQmN4qi~872g$EBX51Gmv@RetFnMG-mMSjGeqd@1$EHu@*Qneg-Fd8F{nwM
zjcSEFnx&!xmp9!<bHyNxf4_t5XwPTMj>qA=qd=!9&2ofg8c@3;1Cpx5h@+!D{YT%h
zqGz;-#n)Nvxn=K}ul{!QPgVmQN$tXH<xZG!t&1m`y&S(N_+w>NE7N|Jb50g?;M|9T
z_+zCK)MW`$Q&T;1q}~terR=Dy%2aB(pc%*Fm*A|u!zi>v7pqN&*zdY-_*?A|CZusK
zP1>1^>alXXOI*qH^YKuB<|Hg?$->u51gW3xRfvsz!V}==Htd8E&XFNZ^VOwM+VdSN
z65<U_|26W}_?+U+X5o9+EEJym1~#6wfm1L4u~mjKydy?G(AfGv$K!8{p-;Vty=DCc
z1Mg1+>*!qAE_emMN&g3zzvkeA#zvlY?F9V2JPy5UBG9<NkUU#%OYca@P(g`#^z_I8
z=;ofM3q4N3$)8?WE7Su!LIN;kU=0Ylf92GwcxVh;2a9fL(d5|qP{gIQHLbdE=f5CY
zlP*C|DF0?O6V0eiqA)RYGXz<oP#BODru2y>J*}!pn}i76sb<CcZh8jabS}UP-Su!=
zNsu@z{KIRnj<ZVUuDr~ChUA&v#E+|g@eXAyflZ>VydC>j!($aEs<mSl<`~SRYb<9m
z&dhA+NLb07c>NfsyQ`AV?NX#fRfq07B1I+)pXP;Fv_aOqTo{`*$mX})!>O|JsOB$4
zBkn9`c*hv}esdQNPW2`drcbe1L6rWK_C(186X^kAGa~AK0K#@%ft$;sNQX%VHYXp0
z0#kdi`1ygqt;`S)xT?T*V#=4ecL`&h|1nbxc4F0wU)VNNmt5l-Ay<1ZfV2=%nsCR6
z>{9c=`0RL&cJ0kRm@31}8@_-ODmZ#`cNSamHXh0oo?z*2?s;;JhU8`m;v)DBdu6<!
zZn6~l)+<9V#K_{2tXrsOILzF8u1+6Tx1x9Td-$yX9*-BQQek&_rjzGfZPc9#6XG@L
zrh*pCZ_k0t2X3Rl&l<d&eH1^>R)MIzLFU`nH25#DkX`dioC>~fz**!n67Zp?*ABD0
z2JV8%EGLpCkxXSiK7<$fYB+YPrY3aDZm8+ts6$cWM51ml=tgj<!DeTy-|j>*+pMYR
zl}j)~@-4cR-(YU(hT~zkG<IvF6v<?Eu|2hG@%cpq*pXs|iMtG8nbJSrrMG-&JGq_4
zubYdp%ln|hX#gHsbmLLg$84XYA2B{8PV~19VyBieeXRBwBe}MCbF(D)a9<w94i)l_
z_^hLr*PQX{)^Co7UnPU=g*7mJ)-85)V=mOrG$WJaLwN<QFVJFs4l5cFgUsj{6Rg+B
z45#rNJ%pyvQI1k5;jkXPGuPv}{s!DXt&1JFo`VPN9C%LU%gMLOqj+iTJ-Uh=;;RMK
zLBqKix=*DUFYfUm0v>7*E%_dVhC<oN?swUor}wcdeI7PlG^c)!smLze0mUNE*x`M;
zxbxOx^h~{vuHmihh4;Id*xn_$;JYan%jZCl_f&du#!+VW8!M`mwt+py*x<K$zcEpL
z7`}aeh4xkFVb+cuY*^I@x47i>>-k)wYMwbU%pHV?I&nw8cLMC4_|te@x*ydSJJUnA
zli}J_MQ{t*hK_&qV3%7W9zW}gB44=wwU`n)wO)cm+q+Vp!wcv%xrN(rbU~HmdF<H#
zm5rP!Os@~gKxexgQ8tMr^P+A-(45&IoyjepHR3$MIXqBo4@3W#qwE6}Cp;)zi?zki
zSx@SMVpF*(jaL?Xf}F(PPmb`_j4$Es6^m%k?;M_}rvd3Is7J|5!dU)98bX#_!)!K?
zwOxLYx?H=7S7-xPj27b3N0Z2e;2`p@`U?yVltM}GLbmqLD5F#CO8=8R$1ly+CCgNe
zX}x|e<ilaKIbDk@PJTkU33>cB!|UiE8HwX>hjChFI9qJ811#kquv;x-VMNUvFNk`<
zcLg(8qcDhW>m=z8=|UKN_JOqs&%&hjVI<v8n{T(^0~qBguuWnaF#oPSq`#15h=B}Q
zlJpk>b7#|MyJujkXD0Xz-o;n%r_g;y4JfqvGi<sTMYn6V;<!*WO*&&qv$G88<EaJc
zv9E;D8-K?*-VMd=t_%cyI}Z=ml;Gm_Cgz`^Jkk|MI8yo<*u8xW{{D@{;9o6Fa`Hc@
zX*Yv&c1C2@NGphJe+zzdmOyZVAkF{eOvdBvQK;60B=zxF_seH6!e$0X)|yHR9*WR@
zUk7r@xeW!%J;;M5?#^d45pLQ^Q7xW4Ro7h1)0S3(ofAIuvvT*q<|YDG^Bm#n0zoo+
z=_#D?ss=I#B~dM04Q-=^*>g88;g+OTL}|AdLilW4(j7|9%RItg)>CN2$3z@DTLuTz
zTVR3EMCNdk6%B1_hAq#6aq)|O)<&%qr5{P~YNt~OTdhRtL@DZZUJJfm3uYvD#Nyd|
zS?s*6Lg2+{!>5&7=?n`yy073OdoQO8>leI4{!B4qA9)9Lgl@pP>9eV@8w<wIFXQZ2
zay632tVmf?4Xb$SGn9$O;!A~lFmI#+vhv=eo-@VFSWB8%8Vr2lE%@f;Fn{BG4g4aj
z10Ty@;4YIg__ua8)$Eidr~e94+q;jM?VXc>ck3NfZ~Pk8N7t~kHYekM`=5bB`E=;|
zElA_I_ucP&g9#0kp{FJ-;Vn~@A@Ucm!Yc(M5|^O~e+N2v)1N1>Zf(Fd6aHcIvYtR*
z>1=wl)&q;gFN4v)c|>xo4OeAo^Zk7Y7+kQTNmE`!oW%?(7tX@aiz?Q|>N+ylb}^C0
zr5L%{8D%fuz>(D<xFV+?ps^4(8ybU7TMTGFyN_*Yf7#;`HDUg$NG#DTgpvLf5KZIb
z9dQFXVxIwX^WtIaqG}X(1^Uv+1&!A7ap%lH+-t8!Z@-z1bM$^NzIE=rioSn1_edn2
z8LvuJ4@(iz!ATG)=ZU*iZBX%Y5>(7jW_+rQt0Vl?m>W~-FvIjRx@*5dQ)^XfRX)zf
zId-wD*JbnGCM#1%fm=APCr0NgKjz=~Q^rt}QA{XEfq6D&_^+)JPH5yXHI}6?Wtk=$
zvS%*}EKo&JUL7MRyn_C;n*b@1TwAl@MJNf9qzB|BY0v9NIB`b^6r6L$&)J4F#9Eq=
zMqx75?H;=&Jc#Ky6$_8N)#&|qg5dK*fF=~a#=eOrSe)7k<2`=ZeXkXdUE*A5X72%V
z6p)nnvu%bBcwEww)IZ)uwTI^5#u-0A>0kjg+N4vHes?A#PnlF*k)l!_-*MIUS%hBJ
zp_O~}=m*U-s9fa7#Liv^`QOKwxrr~)`ua&+beSV?JQgK0YQ$-afHQt<_JI9f>*2a=
zHNU+08@er=#<SJA0l|jNaQeR}nEf#k-brl3*CQ^F>Nku^FL`uL#~AJ!;U<$mHqzDK
z!`W@qtnsLCC4aZyDfqERn)IK$kL9NGX!q|JjF!`<YA+kWW1Az*R&<Bl;c<BLC=g=q
zmO)jkFRii2#ua;Pu$!+(6LY^{{TD4J@8K_e#T$l*;ClR(*w5Z95d+|<5cTO#aN{m*
zx>{5dA9_`TZTU_%pl1+wnw{Y04bBj1lmK6ZCs4DybD^{POHJ3E1g1uQ8mlj_Mc*6h
zk>vg>F!4<tZa8>~@Bh{VVf9noy?;JDz4!}mNFcj-eJ1W!y93KO?QFY)80N?K;IMTA
z2u-=lJA6%?&Odh$EuTwb<7&>mdn%m<{qn~%tA673{421!`3Q&|KZ}R+d~tH#Q?~s#
zq4Se8Fg@)GM6B!pmDgsp`u1eXk}tTMyOVv5&gP$du?&QJb~DH0hj~Rm<>9XGIMgo+
zWJUH*V7y<d6N4%X*!j!=j%{_O3CEto?CSroU7x|^1|P+3yI7w^JK(^YsWk1k3=Lgg
z4QqTQXxz;xh-E#nL30K%pBIF>1&C+T-^0_UN|apeh{sl1!oLmQLGcT>UVRhhS%?op
z=I%$3E1U>p^B<tj{$A$rSTO7OU@O_;<_>K#pBTBd&+tvi5bI*{k?o8%0l(e<!NO6f
zu~y&DYU_W*&;K1~5*nuALd%Kh>S03@w{k6BKFKIYSXS)CF2=6v9-Ok8PEMH(qRm@j
z8W!`4JvP;d*VLp;=a%f^TGB3a#3TXkj<Aq?^=;=xIgdi;BXhcgqpZG~w-@WqbTPrw
z8>p3BA**ixlOOY;1r&>YfFGQPzm4Y++dq3SeMdCAGsXl=;t%1mI3=pPJ^{pB|HDgx
zPnnAO@f;c06RVGTQ#CnP5>!+SVqTiCYLyc%IBiIjeWl6q<zYB$@fb>94TYz>gK$rM
z4ND(4;ln`<x=~q;oGod<kd!};8-iNEu*(HoZ|#SO-S60@J^5gtFH1d^4M5_MDd_L%
zho2+9yi7ZD;$k)%9jArCzSWKJYW`vf367>!OE;sg6GzM(EWrB9FR<n59BOsW7<^V=
zgO4(5aK!N$^vn*$&AHaRmny?B^_>GRev>kt@#i@I?%SQH6d4U$-c&(ZfIXC?&!h2P
zEPHMXqA*7W6giv=MH{qh)cP#w$D|&nQ&JS{V<xea6hcvX0UxF>w!yL)ACdT4;{7Y_
zsQmK|7Wb%<O{+NK<k>qg`))QmH)QgD?oVWoUKL^1bKF$JKi^=!(`Hycjcb1$?uMm(
zdKfHKgLAAWQRCk^c(Jkrb1t?(1~=<?DmWj?*OlXmutIoJS%E*+9R)vuD{%H@J=^8+
z4p(ly#r`~R3R@0o@_hQ@=$6(0*t1vENmzI{qoyoCovq54^UqE&4<e%J2QJkXI@OqB
z=TaQt{lK@!J~2*VPhhaA3pezYg6M`?5@lmb4$PWH&KZ@%GU@N^Ija|ZaXDFfTzVP!
z&iW6|)TiRZ1_4Mczsd9@81tT0rqZZ|hWKso1xWkS&%Ue>VH%2-5-XKADALHe>H5m~
z1Lp;4$&Y``<$WRSnWA=#TXh_Cx&~2pEFNQrxjU!20Lij3f$YWyXcTq?=B8@X#{11&
z(sB)Ltrw<GeRuJzkBno4!2$Mopf%>AC^`4t7%NV^VpnGD!}iC*)UsB9^dByVo5hoG
z=VKs>RZ-MYB82e03VGv-DQN9V!7nEfE!w{^&;P8Xug4mZdJLd|&M~|Vlh83-j*5(3
z;0@n$VqdWBAnF)^FKQ0(ldf}A*GZqz&P)bB$a*qOd8hDekUr*ltJ4pod-)m*g1~#)
zJ~-tY2^)VW^DemQ<G~l!@WyEy?pON4pZ+%xx))u9gn~1ee(e{(%={Viom+2q>#LJJ
zYogKhwgas1Pr<*;R`R9y172FRqbBO0CkpvWqT8$Ipl;-aczOr}|4BkoOf|@^xW$GZ
z)h5C#!*GV>Vcfjx3H#h!j%dD`3MP{pm`$I!W{$x@ZlZq#ye@MZm*Gwrv2Fsp6S3@V
zmlGsSXAz{X+ypINg7oSk9n#$qTrE73hYhkq%*&u>?1_F?*tJTCG#Z_Ue)lW*Pu>T{
z_;<k42Z7J6ouitHWUlvp_$Mk!n(vK*VZ}BkY~ceO`W(xQ?&sR$$JF6@o(SDjq)&@o
zufR$fbH?S|WNI{j3jQeS1#d337JU0AdpIx-?u`Cq`|ak#CDS6z;=N@|%D3RYnH{_p
zON_9IbH3+tG=}d-l!;-~d*(iO&z~M$3zrwC@FhzxU}C)mT@zLhn_{Za&U`cG-SwfN
z8BI(<lqg;Gb22?99uKjtw#4VSF<<6d7<_sD06qu-A<H*0Wq-0@+U+G&MMVNUnL!Nm
z7N?gZ2+ykX3`&12L4&ZPSg>p!J2$-#H#3`H;>trP&Q7L1Ssm<Q&oXA*Zwl4%@MNC$
ziLv(=tY;ebX_FWeDPr6N?6gBWnVz;0tZPuD>hEkxP_!nM(9px^k@@g;MLl%x+=y3I
zy!am$Boo;mm(gvtBOa8TNm`~@k}H2zQ9;8KAHIvnhR57oYicJ>cvy$LfXR?CZiElL
zpI~Ep3fpt*1@nAk5^FhH32~i%tcD<qOt%uMOq~Qup|k1V!<m?NU55lX3(>}bRahAO
z7dvj<Kr!iQ)Mru)2zEZ=PdEL6J`GYNH&Plrs{62{CmJ7BEM_8fIj7G|hBno^!{o7N
z@MB4K%^HprboW;R_B<eveJz?E(cX*cgZpqxls#{j!YHF1<itF1uV7YmDNwPOZ$Z+j
zigR>v1Vh*3xNA{2`|!sfE=_Dm@5${3U7fv9@s-n`#cQx?RXKjabd-ot#P5p~=&a8#
z!Bple99C|Fk#{|~*xL%fb6SzW=M%gZ_ez+2RUDF)g)wT1CJD0m%s!0Eg}kjt;9;}^
z%}?YpVOM_vzu_~x|6~gIFXfR)-*;F&Eta1%><89f2Dsa12gY%SbDw#0;a;9Ljz|6A
zoRJR@%H%<RKo$c%l{szZGt70Yg4V(J;4(7IeB#_(KUXR<X`|7g=5mZT<D4Y<`H&%P
zRdFcvBo0&eZ{X(!#6pP8bm&l7gaNYcz$-WbzjaISzs_n@n!1z!dd?-@d=d)@cgNVY
z-8P_jb{&zaS6~v`N7zScN}zevk2%JriFDr>pdS&ZcC{Sc;z${`JxfN>WFh>k`V?a1
zTNpXznK=0BI5J8nVQEh*4sDj9i;SjY-`@Z{QiC`%K$xzX`5F|y?WdPYs#r~Kwwvpg
z!nk(!v;E<=@H?GC-;fXfPM0Q%e=^t&Tsk?-)f@HC|HDZ&kxXG+DevZpEq!JE8go{^
z1-l)4*wyKl2y$F=>0<8Q61IZVbb7(_eGCSrap}|&E!Oe<1z0q^865QoQL9B1oy+z>
z&MBzLEaiUYwFT|FUJcXFtt2w#Gsx+&ai(W?J;wfi=jib;8$&EcaOVvL_H=_dbxO6S
zA)KctH0~EqJ)jFRj;w=i+&#@}zd3!Dod+&TuJmhBCsR_a15Kar!%bTmCj9G7jM09|
zh^)GelEjZq`@RjT6fVJB`FAku^d*RYJj@uUn-PJsCM;Q`Nq_%$6+FAQ5gYSv_#QdV
z{9d^e_@%L!nV>}K|1&11FPV~D;Q(~GYD>;(yn%<@jC;f33@B`$PF>SOVRcqKZ~W9A
zkQh{l6%vzSE0<o~a9s=j^lC97`qhkwqA+XXDot+}9^n#VzTkiOAYD1R%W;mMFo{_*
zg?^ebk>+}dGCxLr*_44&N2w=;Y?y``O}_G9&7+$Cc-QS8Gl@^~V6R>Wq<@cRm+1FG
zjQV0M6gUnKuKx!yw|2o=hYui@qJtNDTXAFI5?E9*m+D&^vB@i&SeN)BP^%Gugz@#v
ze2%u#T-ykq>W#3e*@tnJai(t4TuYB;AydN9i9kIC&xA~++Z1%Eve*q~`m<A*A0WZ)
z8}FF9eV>52@c_i@Q}_*v(eP)<Z02xw4P=Zi0+}x*>`jetU_Yoss<*pB*r}_~HdG7m
z$u|08NS2xx4<plG#iiPK)V03{uK6%jZS);qS>AxE*>B(-jJgi2*$-I$;3ls8pvzY%
z_=n4HHe+_T0K28zikH!*$Y{p-k_7>qn855u7^{|q`yT&bZ*Uv9$6ifL2^6H7yL_Ph
zY7%;yszSrTZa8$s5|wu)VA>{iyzoMkc<h`^&Zspo9xH{YnAKl2*((Lt**v)OMwsre
zaAFL0O{DtwuH(|3B4m#CM6};FgrfIuJE~7Sg-#OE?9lRN=Acjv`aFFG9UEq&a#An$
za(Z-oR0x!OI?w!YsA8i&NYm4y@9|ehG3Y$H3Xe`df}gIwkgrw<s^8==c*{$!4W|>8
z({tFfD+Ix~y^a;PGKXWg$8b%W3EiYHi~bfn1eYHNF*nT5V1`~J#<W#nu&60-mPs!*
zioJpUlN!Wi)_0WFJ<Yi`3t|2JF8tFkO%*lP!?h$q;_e|pBP*BF6kZEv2uG2AlXSGX
zu$Kv3dXr1C9VFSa2SAl000qDM3Ge20fML@_LT2`2`J_&qE>{AJvZWboT{%2|ayqrQ
z(7@blgbI~DXO}vKGvc-qI8>Q|QT|%QY&aBs_FL1qnsbo;P=V@izXNYucQUpE(QHP#
zE{^`tpucrDl7>(9Oh;Nf>hR3S-^82vuhgD*eU&$rcZp>EP>EWsREM(~av=Jj7P%rl
z4VT25)0qRV@TIk%6HH2B=IuM^<f#Wf^$+=9M*=WtdNtnHaimtW;;`;+1pYVLz*N;E
zoY-qdQs%6pP5Odda%MIy|Gk3wz;wXlPtKtH*pC`)ZF8Ld^&&^^Kj$FZCPsz!hl9>%
zdm3Qbid$ZN$8JquGH+%M9t(FR)Jzq_zsq1>?lJJHS`78=8BEJ6Tk7v}gSEMzgt7`}
z@sc@5*f+fneTM?^gXSLGI~c$&UG^42S|-4!6}nWru?!5W{n-L53G)1OIG$TlfWPl#
zu_w#3U`w(X`CG7r&Kh&W1m-*}^IeHcoEBrQ?IB#=b_+FIgsFFBJO9U|omf|w0P-Y_
zZ3#1_Rf$zNr^p5r;_rdgX-_t}Yc5@`Foy_kIe>T8OM!_<5i2p#f$ZwN%D3J5iJu^9
z0y7IY(=hiwP_6$3!E+Q~$6j-4d8>wXVL2^jg(tnO@RZHS{|kFnt;zdkcOZ237-nDP
z8V<F8z?oHicxLkhgt+r?mC-#^lPku9K5syxCV;*6QGo;m#-r(w6Ztj%ktZ8Ik1i=J
z$AINGz*lfRsS<mI51hZ_ddvltcJG?enPs^C_cWfI-C3wAe-H8}U0~L59_!xr0Z8jc
zY+X_Yw*FJd$%%S&b--shEGbRXrbvRb(NQ>V^A1Z+K7!19m)O<IH$m@AVN$6_ICuSD
z2v~X=HhuS{dwWJ<%QtRDZ*qwD?ATw(Ru08wll|efe-o#*i=ae-EPWCxOW!qU!NJB!
zWbc%%thW$1!*KhI?(fsd0x1i)Q&Y<%`EQ48?Jv-$q0BY0ABHLm9mYM^h`tSz<CmOp
zrH>U|Ve;GWV7Rgcr{ydl(OS81UcL@%>*VQktzde_PM*|PDuQ+GMMhRpm%I=QhnWF3
zxcb(4v^geEkA&U@x%G!w@0+jLMT;lW+DArI_vIjCFl`FGZE}v$u_@*Mml1`-ONyB=
ziAn60-IM8kSwan3zN2JZH6yX@9V0l>h)&IlkbiX?w{DNdS+Z|%M@%|qKKJ6B>E6`E
z#EO-Cvy^<jv<mtg<N3i~@8CwB5^<?m0nK7wteK}Bb*wUF{e5Ckz;HR)xqTL%(0q#T
z4U_pAIo9-&{TEob=@blOJ8zF%4Qo_%7nc7rB16qrfrfB|3V9FeSa}Awtumsy?)La~
z^?hc+wKE(Y>ktXglccpzrOCmUPB<Yq6csnDr1C;K*k&h1^drnj+rP~)YJLHYI8V;0
z1;uz%^)sUH2N3CrqRW@80Ks1j@$j4k8*5U@Wl>Wyur3T1)~3J(?172mhM4$KmxTS=
z!~XMG#{T<YNzVT2zzs>u@yz83<fQXbG7c7G?x#8UtS*N!AJ0awU^^nJd=xTeHZbcC
zZ-?3|YnU6=Gf0Bm12~a1jcT9W!Yqw@&Rcr)C75JPqM>JOh<=U|YD7qK&zchXReF=1
zvg096t}vm``__THdp6s2!xbh32H>t=6QJeL2j-Wb0WGV10GCr=Kx^zpj<j<DWioZ)
zYt}{9E6fD{_8n!53gsbYcQeetaSI$X%kfH#Ii5{Xh7XU_@Pw%VJ?XucX}+JxDDP%r
z<_!i`-4x`{a|p)Orkx<Q>?F=DnNA0`nh`U>ZrIIHIn~4*X<3K??Up#mrS0<}@ZAjR
z6etNv`AJZvSBRUr1lL0)CEDHL0byKPVA47{D*a{xaZePW$(;A{#>K7lP}x;B-oF%H
zpNWS~na$W1ZiMcB6>vyIhPWRd1O1@I<cw_!W1VS>QCd<s{wA2MsrdoTY!VY^%Qbwy
z8-tl!!r9%YuYuu%atJm1jV>nVVP9+!zU@$;KYhLM)@K2FM`a)L#77=>>!`3U=gP50
zNSXRYM?pAu##&K0i8YeScC6x38E33qLAm=QPPqFTii79UX)(oM>pX=nKO#(o4yu!F
zZNDLQRFT@9m4T%<eb}cJZZK^;hUc!TOs94`(>>iPOdP(#J#hy?;O1m%VsjnJIz-6T
zCR@h0Ta^B)9%Y6-`uL*5;!wKgGKLB1(`}11c>`O*9D%;a;4)74?0Cb3eOIS>uam&U
zvyORcqDhw?mZ2Zs3KPxA;<za4F7h0;>3!~RlD8=soTL6va%p0_qJQA&XM#*hM+<gH
zpJpokG&oAy6!Ny*gDxzY%j&x%qUC4=rkFV3zx|DP@xQz1wm6T?ebNYi5-Jo+W$CXb
z15!58fHrMVqB1J^99>C^%5r;gsN_`a&(xtg+a}VegHKVba}NC;$s;LJD~aSU3QlL!
zF>69CZdKL6f$JyW9sSKN7!f6MTlI*I=NZV(T?YDZj7dW5QQWg*5ciHLktS0s&QDW}
zi!UvOw1s01#r525Kb3108!yC*!s29XPYQf0=b@&XBH8vtksb(3Wv2~Gkde<H865*{
z@;fUZ6ha2U*T;q`kXzuZl8!D}2@oDHNZBH{YTIsK-t*!O^vcRi+{rreF1_jHg@1M?
zwZ5-Wk!vyB`QHd$4Q+w!<>_4OTs{B(!4cHwG}~sGb}$_|fbt5`>`ntNArwD@inTZq
zZTltobB-(Y-aE-|n>d5bHB1J7we76U$z_m`Ifusme1pbyJZQZw!0nx;#7v?MWs1k}
zk>mB6T(-{9a=!xIe#C_?ezJ{g@qPymvN90x%ZyUTSEw~bm?V8wp`9GqLqewuJhwlq
zE-xvE_g?#<lpcljSCyRin2-K}17P{+C(O}NC0dj2@`Y{nN%Qkt;H(se&jy^>q@S%2
z9I%=F&FR_uvTVrPHVY`*@e!mI=fjVt`%K=?oA5;Y4rV>l<MR{7LB00`#4q&6Lw!l;
zs3S$13}kU!M}gXQn2@&EJ~Yyh<GnpNooaVZqPA~#!|xh~ys(@^BrgcjWQF4}dwCRY
zeAfe2Q)|I=jVPJAGmfm3$bu|!aq>+t9>47igO}%2QHArlM+~W;R?vL#SXjlM8*K%z
z+>62E+bnukNSz%UP^W!A_TohDcmBRvf;{8>sI93$rE8|s<16fFkj7pb;robj(7DcD
z5G{h8VPEjo8VUm-PB%Gx#eFxwqD}uh)ShTeH|E=d)UXe`iKAjC&sU&6Li6B59v`&L
zqQJ@KICiT>!JoHL?92O}_$xGny&IZ<!|H!w!8T+5(e>eQ-Fi0p@k@jr`Sgvw;_;1n
zws;0PoR-ISW$r}V>3Zajo*reyrcmEgx^z>=U1)#Xh1Pq_`C>+@Fg<7l%)La=`JWYA
zsbYmn4mzxN=L*{K@;5#<W|^w5`4GH1i+Na;ikccS_+3+wbgaICM*<ole{~A%Yh3~X
z)`h5}p-Dyp7nAHy6vp?yWYnk5rL7iT;J5!V>R;g~I3GAK-i!!5cJCxO@=Ea7!hEJL
zM*y#79%APOt%F_dhV*xcGYUVeMRQ#v_-o?KubdiBcINHIV~=~-<?5=;^XsZ?`0=l}
z>j2<2Nl7AOz7VW;xWiRHC75G+474*VvG~?g)LS6|<9%<KH-~l-nG`X){qGy5?*>Od
z-zmko{WhRcWl^kyXEr`uD@$g-U%{kZ)S~hB>70*Tr^bBIVX`~vD@rdq3F67#Y`1C=
z(-*V|F3iZ_Klo}-Lm$Mzk6cUkk6Z_Cbj;EbM-p)9mg#h&Zz%76!4>Rl^@rz0
z$FT9`F<iG$mK^3?05^$vjw<=ZaZ4x#;{rQU8FUZhx6C13y#{psis$HVHv{|k>f)0*
z)2V`B1$Y!lkW>LXDiZJkGGtGp)%A0%(&TTLWtN4Ttw*6=VkLYPPQj~gUNGsv6XZ&7
zNbt>FP-~w6f0e|cJpMa6_4e40-I3u>6TQl6?SIZxn=T-I%LJH~o(|NOyoPxq(O7Q0
z6Gdzu!ub$?Do}9*^2<v3CCi*yg}vi!@YwO167Jl*Cp;2NzPOV6Wzpyze7fe}ts>TQ
zmm~Z#KM3<rM$iq-i(r{g0!&i>?HKJCP42XKF<mWr5L%^1lK*sJXPYY2e^iF`-0ZOt
zhFIg`^_akca<6daXVoqTI8b0f<yI<#U9mPQ#!bOP&y>hYpN}x_zZgdLuQYs~Q-r23
zMwuHk^1<_>2d!U!j7eA(g)=wHQ<3`D)rC3NtJ5~y(dQkHur!F^iX}p9dGk@Ma59AA
zl}^;;;{_a1L^N5rnC!TJnfZS6DO<7mE7S1K7^g6!V7s-Psjsl$*Yx%BF5OPXmXZ@#
zlq5uL-Gv}$uNXPnG>o@&&1va{i(EtTOPIfWJr(R>$eRU4{4stmTK~;~??MB3PBn*_
zcv~69(*=me?P78(JOETP0iMj&=J{^@$9BeiM#&IS(&AFh{9LF-X1sfdI=$av|E3i7
zcvlMh@7vj$r6qwF{7rxuj!neW*)nvjmoVu&UV`-2I&itVika}M4y%GDkhu=$!P<_a
z1akV2^YU7V8QMZGC;E`0<jM5<pd#H6agix>n@E}+x$jVrH9YcTVdLFC45e253IEB`
z(n^ZAo9}S#Zu4QmMiU5HB1kkBQTSQ61ww|GLiD^Y$BWggz}RSnku(~Db=p-Z{hUj`
zPCUYN%@C(&-Zk?SBBW_P_dQ6OFGd!h=x5tE$}(4ivuYYNR9UyEMNm@Sh~mckz~jPm
zh%B`Skpu2<c*SJ$qO+QH+PH~pfiCCv);BnmTMj*=f^=?AG=GR|Gut|HiT`K*A^1_#
zh~~$N@tRROIu7OI9d9kt5_=lXZziN~avSzUiIU?xUZRxfOJtW1GHrnyQP{r`P8xDq
zu622E?us-H^whEY^|yh)QzpdO7I7`CRxmeL7uSY;1i?1MBMxfdQm0ELGPJ44$1nJ)
zuLzcUM}VG@GQGJ(4dhSXWj3nU*0dI#$1m^W;KK?<>h!G;#V$?-GwwP1cgcjGvEVoF
zR=*4#PT2;*2kx>JDRIbL8iv7@rKl!)kBN|v#LwH0p}yr#B6uK>&HK2W{PC8dgF!Rc
zDV39{)x;>UX(#}NlM5mCmIN&}e8YKXWr$pu5b?TXMg=&p7Vm{T6K*R{T1*Yl&2<!w
zCrqI`qUC7i&}y9PFNn70d$1tY0`y0n@r?6vHb3AeY;ACd%0M-mrFfY6_r4WRXAeTf
zp><?&+kCouPaCIScZ2gG8)8)u1qOPT*a=p${LXd9;m>6ud}wkT%2wZH?_D&-jj5W{
zT+so;%-2!z+$&HUl8SM5+sN!&?;zvx6uRrC2|N*sWv{Qh3eyZWvp;8@MvM1JHP&+?
z=%q3}?5)+Nc5bIZVp9XZ@XT)Z#K##}-_OURSNqu8ObwRhnL^&IpLl;{I_+AT&$hPc
zV5vqdwj5DLC*7Br7ch$o6-twH>(gNVw|yvN<;0jq#Di~*B1de_U`$g_BRAe>MT>fH
zTBj<>9lL`3!~ZzqSqx+GLmordmh-ZuHaHH;7D3w+d9rlfWPDL3NH>|BVvfHRLF*7N
zwET4%qklNk4DFAwV(kLV(sZOB4S4jF(oc}ND1y^+-I>*yRqUI?9!zHcVe;dcI(<9#
zidEbB5iP%;f`<1S=z)S3Fux&6uUp$9b6^Vnny>;3&T!sr^D^EVV|iM!_aq~mTmZ+_
zFTiK3Yhaf-jBX;FU*{Z9w*#sq{p&D3EEh$=i5ledsxwT>+H$6|at`+0m_fvp<Euvs
z_R)e2Q+(^LNGJ1CG5vrOIxf_p&jKSc=jk_Gj%&%vN3-bO1sg!{mNsdZ_rsDCvUE|D
zHAYkk;*#P*rs&T@>^mvU=PMU74jXpBO+hi<PZ@oby}AK{9}hy2l?!{%{uxKF{{iAl
z6&U)Y9Vh*lz^hAW=D~B0=38?Scti2{dy*_2|8f#a!??T2)_tJIy3=i^mV&CXDt(f2
z5d5?i!OV%H?_Q~dUe=a|O_0X@&+6f6*fy@I<qSrvXw$#;J5eT}iE)UJM0v;i=sou}
zxZZw;rDfM4dO!o5)TH63*jh-}T}XT)$3XR2CmWNd2fK&QvF4G6K#FEi=XJ@frjtCZ
ztUS$|>F7nZV*|0GI|@F8$#D6~9*91@0h-T-;tUmlrnf6V&5WT!FMFUT_#xiQ;=!Td
zanyh30AH0niCL#8>93nbjl|RllORk6v&}%_UI%kZ_z|S;sDPXhYihzi=V;o?(O9bv
zyh}ysct<ey_Kd)y=b!nSmXh$L!-Z6CSx+skR>2m{bI{pz8+^XL!#lkR;PRlF^Cw4h
zzHJ5a@Ph{F@@IKbe-puCu!y-Exq<m@YC&7ys?h-XMX2)q2R`!SG*|;|8gMh6Q9FE;
zu{Dq2=61Wdxkn0nSy6;ke4CH!7Hwi;K75DT)0fz9XMwoQoIw0G$`J>-U$AxGBj$5!
zIzK>x0qG^PP~y*8y6Eg_+<e@fUb}J>FWBm{8ne}C`IYmq?Y%Ny-Z}~Vl|SR&C%L$k
zYa}gdkfL**D}t6|D_fs&kNNP(ki3nHLiOo(wB?E{wFw>vsmq?s_5Q0k`r;-d7wSp)
zZl^HAULH<z>udi)b0+onTDI0!italW&t-2vfv~nSyc};p!qMoao&CttpK^3U#{!s~
z@`ZV<qfJ%z48Va^&CIDaF({ch%FOexg?I9LG|f_!xNQ=lKLQ?u?#r(=7#+kljH$s{
zm9^L_kOvhqaU}0}H;%2bfe?*Adb4*8Y+J(MB-;_xOI!lIw}CY*n@mHF_CoIMG2mo3
zCT>YCDrw!v&&Q{c)jmf+(ES4bdVHQu5wB#lc4lI7`xo4NL6Ax;dBiAAaN<Zm3iO}H
zL&jC>3-(8Aq5kFn;H14f1ah8rms!fhXR#CLE*7IT<NvVvxgJ%0P|IH0w-rzAGiL^d
z^}y@KGd8|7({b)H#E$+mxI5%3Bpu(y9@hfoM;!*OsHx<OH=_AZU6S|gD08jxD=U7j
z7M%sAunTXMf|CV9`CV#MqIn7q6;A>;I0defZ*U|SXrf69Yq6&Uq`&6EzmuCVz*LXU
z+FZa+%yy$PH-w46{SWZ>Lp5u9;sBgr6EWze6uGwh9SV)Gppz1eFM3ZP@2NP+J==rZ
zrkl}UlBQHp{0u4t-GT2#T^Knb7`^u&fMZ7$s8-u0s1sG7r<S)sga1`%xh+PO!`t9l
zl08{IumV+j7Sj*q_AoThoC$0wLJQf|aOwRA=3iPkv+d$bJo8J8-VYmQ|NOoW`*o7w
zRaGyOXq$$HsV5<r&4KYLmm#`fF7ypbP=!1zdTMG06RWNP3%!i#{)-Nfeo7IOFFb{!
zJkE#ie+fM67SS#@b&}Chz$mcF<ngb3_AE!`*;2Tgkv5)&r@!82jdn=U)m{hLtjzE5
z8lN)jbJpO};25_1#6dLG@n-%#pG;hRPQj;|L#VMK25q?(?n^sn(LS#2pjoE`9H!e5
zKNmAR&&^wBT3Hb$Q~<iBm7&I`!;p7*8|m}OhG655xT>TLL-(@a+Bt!4)p!l>uIYhj
zYBJBdxt*OBZAS8#7a;5S51a}+u(<FMTa~?z_(!(D{7W9BZqqtE82E;<)v3c+jxJU|
zAOTcg1N;|A(5vnoWzm_VpZ0uWT36enb@XD8Uz!6(``_bU?lt+rbzEk%7)SbcL08)l
z{%TL-C~4Jra%B)>nnP&rQw<vV^8U@9T^F#R=^jSRL+A`KB7dsvnOj>w;O?|mhb3B#
zC^2OM^K^nW{!}{+?b&<T$0ujAW45}q_~l>rb88FJoqCF?YO<qeQ|F;vr7y`*H6VXi
z#lW}5B3N<bAtM}~iaU2+WY@HXvZ>|<<WGwmY@ee67e8u{CP9DN@*)zeCN?upAxaSO
z)f{y0wqwG-I*iMkOzj>T6Io72n!Q1U*}C;Eb3(QQ&sDx>{`^{wGsfRDB<Va#M<k%#
zS~;R0^q3!OFU)TWp3AnL@5JF&FZQ)!1l;RchG%S?!Ssy|T_E=nmK>BP`SQjPAb0|e
zil4(0lQi&QQn*>kHOJIfo|phxINzg@5zddqk%w|rP^lDM=DcU0tS`n#%8BUxCIuQd
z2V>&XOg2R06^PhOAnUhJ#K=cHYOiksy;{v!pH<;Fr??vfKPN)gcVV(_PY66Id&{in
zW{~ciWB5MhrufL-4Xf6+poQ`;%nwhbOHKRv29s^5-VF~L|3`pqJSa;iw1lvSedL+b
zB0(&Zu7LHQUC9drHP(Fd4JNZel*;%h0q^-nZ2xIUy{r4+uI(YFmz#_HoOmCHX4f%u
zK7GZCELD1Lw*r>NWTNYqD2$W%$)-&5hf)1xh;I(0skMZapI!s;-=)ctbUU(a&O;!-
zCR5im7uu6o2gfrFz~#vvTsPQ?7ApqW{(EC^v}hZwI#>*M#cwc<(GEEFO^s?bA3~??
zj?~UP4uVQ=VHB4g<9!%oeebBy75NwN<~A>S^-&Inbi|^y!4RnXu4L!hFNfVz*Ylrd
z9f9ROYIJeuc68v@^`$d<p?iuEV=-t$ri&k<2lvJ^4bu(L;bs|r*_FYcpPJ0puU_lW
zYGepX|Kw<vX`X|h;wETrk!Gqli?bUIf?>_ReITsW3YWG@!nrRKY5Ln#98yVv$@~21
zDKio1=FW8HkLMzuV92tTZxGSl&gf3<VWJ&4l4-;q@+2Xd9&0&|E?f?2k%}2sU2J8)
zsGo<m;STiuRAs^%tCK|27&z}@2Qqqx*exdC_&V*K@ag_Mc4GK<T(IOB94LB%gEz83
z$1wxea9W^Iu?ejY2!P+&{m6df+DFbrpv~ulIMr2<P8DruZrBQf9wPw0V2LBW&tT;E
z78<vHG0oU;0*YR<VCv)!;o=^&;gcsGO1O-5)hFO=zXQdyf1ui-5*2+_C@1<r>XwVl
z&BgkRiyX4n3$~$+$~88%U7B8Xj=_Nj0iJR75%zb=Fs^kq1S^Tvpte96=WgA|c*O|N
z8JlH@c0)YQe|Z(G&V6D=6|Z9Xy>ZYgR0pBbd3=|sePkq`Yrz@jV_<wHv^Hm>gxnL@
z<*}P?cG0It`)@<f)DNssy*ZuZZ%8AYeaQZc6G+^;dyFG_%(of|XMS@zwmqvvNKMO1
zW|CSHKkT|biTmnE7ykW$N>d+VQq@#?^kO&Ieyu>)1I9pCB%zDlMttXx=J@N82zlFO
z!0a{=g5ugt++aI^e+$l{{e<JNa+W$Rijkx48S5NED@L%sT8d1)`UM-OMRC5FMIc??
z%3uDXm;JKMo4@GAIlOQG96or8Q$2xdwmkL}9Y4DU-iUE~tw0HEi5r5ZywybfZx+)g
zvjVriSO|t;v+#UW4K9gjgZ&bc5Xd$1yz7{Xn{tHMn{oHx+Cg2isLhzhAK8wJ=XW5!
zktfMT$#6VOiDW%+2Q$M89JA1(V_xORN?qZ<DccG^)a>cd9~JE5h;qm6%prKnL2TK7
z3A;+TOdFIslui}JuKia)d+QCz47>%~3|z?e!=BJNVnY_HhLTl=*-&kuK~f@4G8v%*
znAdGh|8!o4$jDV#^Xfj%o%@tI7b_2&Ke~g=-}mhOL#Hv>!Go8{Y4M@+V(4(iCwLVQ
zgHy5((&iBXGQW8yWjHN#e6Jb1DE%3CC(p;$XPmxdvYGUs^#REn-X!B+0&DxA8S+fI
z<_?n^_`{b+HvFi=e}^nc!jaePU)gQ2?W7!ag<{4ovV|YiYzgg|CAdQVI;#s>^v<~t
zm@#93eXn#AQ;LuBEh4O`)sA+~<mF7RdT_+Kd249W#ZEN4BY*)V4^YtO9A?D<1eO_6
z#XZsZK)eEj^j6~Zs6mhyx&nRv<!owz2BT*&1TUr=lSM80`1j61aOM1wtU?<$t}B2g
z>PzVxopIPM=fV~U`Z3B~Dzs(49u#=RprH5;40=?~EP8kcuMB<Um(SkJ7OVdP-`-@j
ztqp_D^20DDrV4!>N!Tm+8BHR`aCJo`!VXR98_J!HT^r!sta)_hx6K^wZv#}0C`07=
zBrI)~#D}&?@UtcZbz&wFJ;9S`Se(dIOvz$5=-*`{)Gj%Gtqf()yL~~iO=9%S3<?=;
z;nb~gBi-ba3i}HiQQEi#$EBND{<@P4Pi-0z;Yfdz_WXf{?00y2WF2^#Szzn#iNs#9
z02M}uP~X&_sE@V7kjFK4*{~+ODm%(k5W5R@d*yJtm=h5>5skXPGI&j60<^b%CK);*
z4{Fu-AZn}_HosS5=Cr2a!fi3w`nrNYy4--e`QPCGF)`x5(>e&>Ul)LSQwHOIxF2e3
zoe*+z5Tjec?Ev@Q2j*c|m@&*Q%Yqkg&f}fgFCm0H#13~mjyj(TEoT>St=#j;wq5VR
zM?)1B7VpQNms<GXyA_p4yu^6+nbXB)6RH1m<o{8c3x`TyVX{8AH+S}7!euL}ay1{m
zaPz6y4|B1tL<Ow>-o~HRTqFE4Ir3bSkM9D7No?hDo{H5GIGneGcI$ev#kclSn;p;5
znit9nO#i@_{BMLgk`s&mi>|`$o?74!EXTZsqG+~03J;H1Q>o39h|aoF6mR#Y0t-_x
z>YEp~S@4+yQo3YF&IRkodGPSA0<oJ`0a6j(Bz0v8{C;^2_CHQ$F1<>Jt*@Pk-G)QV
zRF_b8ZL1NuepUka6NO-$Jew*S$nt}1+;DZP0eBgU(dY5=VC3j^_;4Z#cJ7{w>Ff1L
zlT-ltP`H9z^WBR)!}A!DR>O$)Xj5OOeT@3?RQA`bZ#bCa$*@wYHTGLN@yaDch0<D>
zQzuBDtxDqu`CMZaZi&L(|8e!^e>r|(+jc6=N`p!?r&LG@)wPagu7rqW_-0mS$vmdH
zXjW)aX+j!F({&z;kWeY4loSyWB~(gy&inbi_Ycqe7s%DsI?r_+`@U_(K16)M6lO7G
zj2b(OaqY^DETe2G+G;L?u0_32^4yExZMu@?G{SPDlh8eF9x+auh~XEqL0?)F^_J`=
z+Ruz|<icP)`;YHtMp{G1{A4(#xI%C{PmP`Euwr`s6JS+~BKTixMps*o$H|Uj>EDjP
z%mF#JZ*~lxATcQ4Bf$>x_l88D5Zv&s2{%V333tyIp*k&<sJ>@_oX@Ez;_>O2Vo-;h
zrT4*%q^;0;RDj>*DHl*X4r0!fq09bC=-YRbL<J>)o!3)=O=>8bSzW-zOSeO|V-c>V
z;?S_t0CO7e!m*J{V8#MvL3W%2xSXnhiPJslryZj7@t7&F)AKuqN*;oKeqJI^N=gUx
z|B@KT7(VY%&7CXQ&1813fhQi1$vFpecBwdDxS+5E{_#wuNe{nauxKi%Rz3hv3BZq~
z3bcqn`|PK+z^r@Lw0pt@)RrxWWw?+%xxJSw=o-aV$SKmrBfXHD%jb)`fd%&81DoCn
zLap(#w7#JaRkTc5u!RB9n&|@z_c=4F(+=8YKcIgNKWjaGj7;HYxQEL{!SJjKyOsMN
zjx#c#E3G%t8yBx*RKOyb6{O8rS(f#SAvbzMw3S?R8v)d72K&>E#PV}0s%_@K4;5$l
zJotGmTg7*vjmtPaZQe955&)sblI-`WC^VXygMMCR5c08w)BW&^*k?!J7%MBL{`Mgz
zD{qB225n%t={#4GI>a5@>W{0>`Qxv=B}8h|AZhjRg>Lr<qG3LpSsZJDQDz>@#drgl
zh4ER>>OrvhuO0394qku0D_Op<5mkHb*}(Eq^vS6$5Ef}pA9fysiO+K}L+2jGY}(3H
ze;cAyiYqk6%VYDo5HQ>w3aImqTkG;0q?47nx8@R{H+np~ye|d&?2Mr=#zf$#yqYch
zZxQ|T-)zXV=!3_{-N5UX2SogN3Byw);HkknHs#7I?&qGHWOgub3#)tyH^t1*Y0OWY
zlw}BQH41Er{R*}bK62$lhM;IVPM9K{LduywxaWFf%egW%ol(Rcagm@zhVPO^_zV8(
z{KXixJ3K$YP0-fm#Qw@XCy(E^ai@NX^EtaOxcEyFS+S#}?E8iilpTx!mw)5gjrD0<
zLwAB;z*ZjCc%Fex+EUzHbKXd*XA0BTy%*fAFT+~XC9uj(6wG?Vad2+{CRF8dvwcKq
zY5rBBfrl8})<oGME#cSaQ*nvK7A$LPfT#9{h0$N-*eWYAM(RS@(rLSFE)q#rVr#}z
z5{m`tHrMfH{BrK)(gj$Wy@$PM?T0(oArLcvu~6i@G01C9rcJJ$phCJJuDwcN8FC$V
z*R6-0(@TiSh0VMfCK-FYF2J<{dGh(>d@$R&Sy1Zvh<tjNgK-(juym<Dn{1>;ztvc?
z!W^EF^3fkLD;YgP@4;8OsqD)@E!n<89ZWvO<MA3l>iR1YSE$}9+okV=;cwq^cimK2
zkgFtqmAy-p!qvdD${cF%=7FC}6KVT$0DF}cSm*nlVBa@}w$G8}bWga#&4T5uz&RKj
z>y;rZToy%>w21U3A#_$Pp~GjR@aKVC%q)+9xutC!Z=4itduR`f3l6inD{qn$?$Yq{
z*k>>o55i}LS%g#7Mg71Y?!xnT_`^V(u4tRiW>v3cAD#9>-#BAN%y{0~s^{d5$^zW%
zQU}w5RoJHq;aE2K4*xyPf;r(=Q7D~AzCJ1k@0ePF)yd~jccmU0_5gH*hhXyiWmtTu
z5w}$Y!snjzIMUXfNIvSsOVj13R=x?>&GTReM|0eorf9HwBEXcqG2DB-M>sIwo@V~u
z&osF<(7s|y-D>8t32AOHF+K=Z{pkXM^coCUA;pp&aim-E8oX(KN{oNyplVwqx3MP;
zRg0Ek?=gRNZdDyBhUN22inFMnycx>xMnKECN>p`m#K2=K(EU&v7M{w)`ES;6m0L==
z$>}9{$-<TYdo#f5KK^~W>K;5;t%CZM*GTV?Cc)d)CEV@Y<<PfxBlr4$GNCt4unsSE
z>QX-+D?_TG@b@jyzOjTA<|U9Tx$)fn#~bNzTRAA7sE0cy1JFFKj{Hw9<Jhau5SEHy
zab^q=zyAc>3#ZUacLTw_Hd^2)xq?isQepn{#33@5aao~?uzpi5SE%H|661L@Z0ZI0
z`R6DRy}S%U$7|3S{{HQtnFfD{8}Y*BC{83}Ey{1mAP;JcVWQ1z!KtQn-1KWMoH~;Q
z7LFO9#P367R=p-B+Eq9|=Pgc;Hf0h4&Qv#NBdm=PV_o4dAz0@ur)8E3?5ZN|Fn$E#
zKeee%-d61RR)rfqWSHVM8&20J9?}fnal<WVtb<b0NL6|p%<Q~RZdJVDgu-SrDDje8
z9(WM8hGk*jOMNQ#lJ7&zkK!s4^=Qv)W7-nnLiQHy#AClkuuyp;JkZYbD)#4+6%{LR
z--?eUTQi8eJ8~9t&nU$kr#I07H)r^jHI0pC5-jf;pF6F|!uO-B;mkM{@VoyOBx0Y!
zT8Ymv;Oa_JC)u#^5>bfVlFZdhhQ3XFg+F}#1PZ1PxLY|n_^{?5Zc%;$Uz}Vyf7#La
z;8Z?j2_BQdAZwPpccCC{R2YUxXW#&zrR%)I@f<;UCcY*brdvqRGT%dj%8?70%bpd~
zV45}N+>RpNGQ-5z?h;B{@z%tY*EoiM-nC}##|eCAXvSh~I#@5x1uj<sf433rv1&3t
zy?h>}tapN{k%nMF-wycioD&=FD24pL{larb$A!jiFW{g)KerN3<&J&uMeS@Ecx<%-
zj%~ONj}NHR@zz`5;XyrC6X3`O?54xZr-39vT!|H07*mJY9;`00pJ>iL4ShI@{&K(1
z?Hggj7V1{QGLv_B)KrKAjn6@8>mPFEb0C=ei3zS;aHc(j@x-`nG`^kJ#C=#f5zlNM
zNjJiKn|Y^4L)NM3>{hfGmFA74Q%alo?v^}V5xX1LYb3#`V|o1NWJYV`PSY!m72wse
z6hfWPvu%fSVd5eSHb+OEtt#k)GK15!c<d-z(sBbUhi+i#MFCcJoade54s?F^L>7NE
z33hF|iVynsa!Q3_?8r=4lB?WLI;Wh$gpgc3b7~wbTRfTt4O+6e7m>I_%b9IG^aSfa
zctW^+1XS<ShcG8Q8|D;3)Yg1Ni*1FlPqCJWrs>00pA7E56R{+Gr6Kz>u>q&XL^6eY
zr*Qt!i`)d$rJP>NO(^btgz}Bz>`u}EymOL-cV9MO(Va`ErfA4&I>)2J`F4I+a|lKh
zy~iiRfr8Fu%Rn>iFtM98g-UDAf*&6zvtrLI)Yvsg=qPH2g*^r=uQwEj!o|6V1-i`P
zksEHb7zq`g>P$V=8u=h4)0{kkrdLS|H{Io}v9*~X`lC~Lpj@1Io(iFU6_+t4M;^Lw
z*~6CViA?5x5Cl74<N`8AWBGh0G?;Z9i@r?7w8v^R?axH&vv(sd(37Cw6v{b=3o>W`
zqiE7UL$I0M#Yv932n!d<v$rSgnB1*!!POtC3_~df92v>>hE$ZkEVzvMKL${G6oFF$
z0ryjTIY>ocfKF{onBmulH3tW=b%qPEc>h?qa%Mb8iT98e!x)lwe;YKcbLF<K)qqSP
z-<NzhhQGru;|iDNVUKGvCv2U;<hPe#!b3Owcu9fPxm&|RqgQAbF3sk88laNy20UyO
zB9QiT$BA%(EB>AUy8@%oe*PS8d){lfbK(;9G~C7HZqI_iwjum`VIF(XABVk5*3c<;
zDp7M&C8PvZbNjbG;6{G~roiV9udn$<tb$TuUv)b=pN++%e*bu;mC*XO$u!!~|CRfz
zT#C)>dN6xRBe|vb8mIfzlWE~U@N!BJckpH?Y}NjVS~k1jtf4$xG$CG~vhjsr%mYpQ
zmEgqr1jW)bHrwIZ7(Qzt7L6hNY`CK!2;Lmw*?l8t5Yyu$VNua_@G^GfEF7&N{KHi|
zxXg?2PZr|8;{cg^x6DST;}Oa!FJRAnBG~>7TC`tv57rd=3)3tnqx`&W<ofSZ>{$Fk
z=$Kj!ts+@?CC`}7aY!pWn{))T6C)sQX#=#2X`%C*dh9Zkpel(xgYw-cq4WqdHg0P%
zxBRm>l>Vc*Ps)cAY1<>Hy_5nQMpO2~Hxk=ko70+gNBEvk6S2J}1EwMsc*nz0(3@-k
za~GxK)F)%uVOLE$K(jz}?s05U-VEUl%i-Ye3D~u!zAW<HRIt?w=e`&%0oj3ZY}T#4
z_-yuk*c{z}t!6EP4+bLa;q*@s=PUzZJ&v?JJ_)`)*n{ES-WYW%1Nz?A;$;s#s>bK1
zD~&Yq^S3QD+*pxr*_?^iV?)Rtn>FOVP7oNy>d})uVN5oQ&q7Jb^E;~toacmto`#!O
zEBwEt;Qz}pHu_&q@WlV&1RF2+H2MEH#-{(5V=S9*z<Vasxr#S>f=5@^U_p`#%6pk%
z_epUigO*(7&qTp?v#)q!X$Q}BHs!Wwo<#52OF+wCjLYSdGl_G<utQt{O`fQ7GV#Yt
zOI$<*_PY}CXZ}AfF3ksfl9JJL(m~J3|38kg`Tuf^r~L2FXZAlo-_+%v=Kn9p*y4XV
z#`_x$S(C(1>khUajnyV{@3|zw5j_{ie}r7g+SzQ|j|uSFVl-SCy2qKX%zz?k-WuZn
zgj3oy9*3*#Nw}v3l~L4S-`iycqg~9Q+c+8*>=dWlR#d_K_dHYS&`SLLco}zC><TEB
z$<u-n@~qR^pXhE|ibqb)6D(BLM@P9QaG)X*KW~?ytCW^8c}oI(3J6uxg^+$|G+ULu
zi-lP6oMO{3Y#x!qP5rWxYcKF310MIe^{)%jI?WRd+Pk@hll29?Wf|OnycUg}`k6#7
z7p24F`JBY8=djD7l^go%i{UTBh4#-CD4SW&CCt-id+ynC=B{=$=7lB3pBH7)m&!1%
zE)Mm#XO>CMH^K_-D*!%$Sd)1V73yVigs~*HmP{qj7v<sQ2o6`wKf>xC1(p4`_$5Yl
zg>n-f#q-bBi;&!|LdCeN<ai2ChoAodI`a|*!D{_rkTV^RO^txDOGPLvn9UlVr=zfR
zB#l;{3tJ~@v!BbGNJP{Ro{FA_4x*tX--TxyBwxkXR`c1p@&>Z+RzBSFwqi@Ko`-};
zFQMDX>)0W>j_ZxkgydWDbjt!)7;SnV9+|1K`2+IoYEC@rdHfK(Nv_6>1XWtU=pGs^
z|Aw(u`>;T_oLemWgZ#~Xh(2+BxOL`A@|M}rOV;Dq)zmJsq<uHB>)DSQ2X=Bkpg|w>
zDM0WgImp=ZiNssIgPBh5oEpywS(h@N?wP}9@h<kEy32d4cq7S-clncDd0rUo%X5JJ
z<>>C|mvG~_Q=G+{R-7)wGcE?EkTpkzpe8no&h!h$qTtV@M<|NstD4B?2%!Hhh{Rhb
zKH>E4RG2XDInl2^0(TCqg_Hg_g(Ir=gZ#SPthlp<9KUKvKfR8{F0)1S=Ic)My8H$_
zM~`M>7fymF{s3PyRQP*98jPPZi)lALLc{q_d7f_uC)T9`2WL*A*PH!Nk4<E6*92jP
za2{0s*@}Prb=cp$sdPz17?kC_fVl6II17vI+{?Up?)35)Hd8^3`5km-ucF*(;^jnP
z>AHRh@csu9*^8N&<2e{77R@Az2eC#&jf(v4z{1;JWZfY{6kRcmh1u4^zwsYHW?CNf
z&E0}O%P(^uN=^uz=z9o#Fa`W(%HmhiqxiH-5yyQpq2r4}06&kz(8^Tk**XZv)nmw(
zd$G{#DND1YIT(6h1wF@`u!l}y@8&3?g}6B;7|YO<f~$hnT~+Y+x-R`_8iLc`sj!P`
z<tXvR67&S$NZy9;MDmvu-F@RddX%Mtjb9op4GBe=TRGsZa2RCHuY@J3ZtUstZ6Ibl
z6T9QmIiHEcXl7^({%O;>w(&27%6Ao*!@n_Xm0vUoIWGz0d?&+!qb-okbF-%uOo53h
z>#(|u&pK~B#WkrJVA$J>HYxWdk;F|FBqpfPzV6GUL`{pDW2}(M6Q=IBiYy+}#|63t
zl5Zn+!HuqZ3|*m&cO)#B;X1;^xL9<VBZsnPlnu`cCo>d35pT@mu8RA^j*B~~*?eQB
z$?rFul?EW?_jlMgw;g(aeItnlJ4o7<43HSF%|Z`0z^^;j<l~&Zw0HA3T6FFtw?s7n
z#ZPGpE4GIclc~O(^??Mmst}<b{36FNQe0qbD31po*5D6*zPg^f4NO!Ihk7*FB~O29
zFlP@o->4wdI)G{I?1$J{t5K%=Vrie(SkCuKBpgkOWbY5JgzGb<;hcUl4vx0q-t9@k
zij9|XpU(qmd!2$#yM!40VLOxf+AZvOvJ->n$iM*o%t;%)1Dz#xXtrsE&5xIF1s3m4
zLyfTt&AS>0QyX6pxsVxj#Ksub!ruee7-ZnL`!_fl!#-TS%8~6nB93P@<8WHiHSWYM
zSFZDuAHH^t#NX;~Q1M(F*O_EYTPL)l+C)uucrValHv`<y-)q0NTT+EpdMLBzJUq(!
zfp6A{VZq}%EPeJK>&w4k)yXuTS*O4jnx??79gDGG*C+VCCyzV3c{Ah}R$*?NKFj~r
z#u-US(urC(adyEh80mMG`)L_X9$Z-rm6O$J7)=7Jdtziq{Sdy}lnt`tjyN+~l%-yM
zjqi7R6D8+Dfrh#S_jRv6`+Tv6Yu|bq?>x1IFAqo1)J={oN9e-_Hb{W$qc@~?<t)}P
zoDJsovQVBmfq8zG;$FOcC#>4c-@^kJ<2a?;@biopj8c*$YddDM88zZ`tqah?=6iU`
zW+59|G#8A2JJ72iw{iW)bp-PkJp*;oR`8C$2<ieuoH4^5rYzyvg46e4vY8jyY~i0t
zYWp}F={YEeK>N8{B*$<poN|5+Z*^_ZEJcqd%uwZaWHj-4w3QH^D-ZU6Bv@^BJKUK1
z0f(laVGoaOhjhhEf%!`lbT!L?z`G)N_uCE5R%r;J<{qxH9|ZMZAK~*Pci0shk9H%E
zp=e|{+*7!UE+tQK^-rGvId~KD+l*Lk?<qlq`eX1gmZ#G-Ojy?F1Y)hsP+`nXjLlBO
z$7Tme+Sxx4w@|Na$(QY<;`@2-{A>sC!N1%AksvtdA;FZ|>d1%1E%;$D1>Z|8f$fpr
z!k;E;f~+9^UCL`dEL!#k%>sMj-~2a%A3a{s9%jrW=1(Q6N@k4hYR4zMLGZ>0#`n2+
zj<uNz`&M`o>TZ+?R;TYHX_?Jr=(R4LaAFo6r!$L*2>5<R`XMx&*Msw)enC?g5w`bN
zG#ROsj?Y$5+)w-9!Nvz<QOznr!krRQDj82+i`c<Y=@Jm=R|)OB9^uC-O<Z7g41ZkY
z(02R@)NSy^$wTHOWP~*<J2sZi335OtdJxAr)f2B~AF$h31v$T3NRiDdcsOr8+goN2
zJ1b2&M-?~xyT1=o$Em`JyPvqoacKw@J*48~I()u18&1sCX4{Ns3Mz9l_{{hnyg6?_
zH@Igc{Ibl1D06>)rWApL`?Z*hZV5QaJw@NKzBcWBPAvGg8reuh`0k_$O~?sAozZg4
zd36FAyzxQ!Q}s7I<KIg%hf48U`80OXDUvi4jij^V6zKAK**NA=Ak1_*gE7Y~+2v8~
zxOPVXETO)TW%LEszxSjwOeE=*&M{=kFohQ{Jn6e1tr%AoOa{X4!PE$Q6oX2b`$dJO
z>GHV{Z)y6E&qV&1vWlfHbOJr+xm?%HjZ}BP8rjfh$TT*X<DmZou35H`csD2D-@SJr
z`S4G03-)Ffi3t#9KLmE?!r;N~E2xm1h?XbM;-7}+IHoFtn_Mx2t#LU`4sPznt=r9b
zAHWv$GRuJI%c*F>8>BkcZ6qt~Oi*r=G<_dZLKYpB!s3)ykgb@7qkpTip!Gu-dMp>s
zn>(@T)Em-$@F~VA&x7cUKCWxMEuC{_JXK27q`uy6Wm6C9QmwKI&_2=3eN}X!NAEs{
zRrY-M-0CB2$PuCAc_y_-O$seBP@&h}^LFIoOL)M|4?^UZ;FJ;P;BvGwvpYMA{sjpp
zzCITe_nyW;iwVTMTaQ**|KRq`_2nd@Ux7<)8=lNC;T-@0IML(^h`oJ<{#JQdCtg9U
z#^#e>UnFVx`RnkuWDv{xHTmkF9(^Eb!=#GefnR<C4pf`L@RpzCmNDgisCVG0?rgmI
zPy;kyjbXa6-Nf0~5;mFYz@`Q97?~l2H*vY>rL>&IdJW^pR0oXITFQE@OTprnbJ_R(
z^I5ByGsmNiaKoYx;51FZGLj|meeZs#Vz=@7qop`J;t03fvIN#I$iu&?Bbdm_G8k{l
zz^5<+F5ozLYxPCwAa6%@crK@UQhRZPA#ZfsCjmiemh6t*R@%G3fqKpN!8<#g(V_GS
zggC3v+rC`_uZ@=>%%lj^4#&ghg{?T7)q}-{pD@zGh|2KBrh)qLEdOpOc$|)gp-Un(
zp=6LaWxc@Mz=hcN<BFi5GglB5ydI67y}+k65t!n%pB|Pui#bPE<7>}ETvkmm?9x_}
zfAA7)J!+4o2~+Wn^JT0ySk5)6hY<IPGOTmWL70=al-^!d3#QQrG2(@e;L5F9;f&oD
zm|ma@;(z<$(iSoLa>ZtLRCgSG_vIn}Gf@DWB0XmFM-+}^=E8q99DbiMgQmRD!nT+n
z*uQNJnvDw<+#BDGPd<Mqsz!SF&Ow9Km8SEorb>`<Z^sK}{JCc^hn;se!`g!rSO`C}
z(SKLOJrftBr@ALm2c9j!=kHm{OuqA;H-rl2>zMcPIvgi*kKg4C;FDBkb~4_TC6xK_
zOfEH=Sf@j66-=p?eI)E}OT<e?9iVjS9a#MKB@G^zv2eyhCTca7soycfG7)<=KOmoc
z9Qp;z<W|$!IyPYMqkvj+a?DR;fV{oA87j?2Fy-YO9{ZyO^5su(o%3Xj;-%YP50501
zUaAtq+CG>pt51jg8!+X~Bd##75sfay351zuB*c2MplWI`d8@<sma4KLaLZhXHkge+
zhpgDk`5FBA#(P7mXVAu|Equ=@l6+p@$~kOMV8RQzP*-fmhMk__pY`U@UswqT^UrXe
zi$v)nzf*#ev+^vnnK$u;NwJ*44q`A;k2ESba0YS@p>|^oID|9^#{Sd*-x0r|b=VkQ
zx~*Y^=7Yz^Y(cinC$6=`5!i$t*f7f)j%m;14AYF+k{L3vL+Lu?ACQ4#{I(=QZZ~_T
z6Ap%sW^CuiDDH97LoR3MWIAm~mY$>UAg^FATW%W2XC8%^Ti*$_H&23n`!%dQ9m}~~
z7$DtYvmw{67nde@k>9z^f~yMep?_-t=;)Zkh1g)Q+NZ<K?B`9QcK`6Js*vQx{l>eu
zOsIbSA6%vpiG|Onu+f`82r?f=fp<t1H%mDW#~NQkcV9ysULuZi?z&i*)C*#qCR=XD
z+scjBVd{o1o0}sq;atN7@Iby7U&~LyEgvS(_+2x=<hBPW^)|wz*$>g}y_`TfnB#6N
z<9!k9bzt$|<q+(^_bh}v@LGW~egCG7D|*85{rW*N%S@3i8216}P4~04CUsm~^e0jw
zJc1|Ie}h@QTWD3g4y@$U5WeIt*WIjw&kcC{+tfQ8u^fesToz{%*NVEU_d|NtL}ouD
z7;WkwgJYyNm&JG5+#mA!^CQT$d#(j-y+HE1)DMjFU%-g>WyJ8vci5m=Or-eVzbk*Y
z-{&x%z0eQg&e@J(rG7Q=I9?4i4|S2uIR=pIR*P?a^iU8Vfz!{H3lo3HG8y}++_srs
ztZ0o7;=mkuvE~TN?r_ID<_@HHXAg`q-biGYSYc0;1$<iPOTsk=@vnmxEu8HPYj(fl
z9{yZ__w6cRUivruXycFU?I^nU5U?ZH6Tv$5HYh|*pb&dUpmZsQ1O#hx^X?QwU@f5H
z)Fent&%_5jr}*)|UDS2VJBSOP%l=jgakH!|n^-ax<2?k>|J<J#4VB}>k0v<Ap%yo)
zaS(ZX0^IxQ2@f=8fQG<Wc)u@)1b%tTXU;!>zDW{f->E>&ECXgUQJ=jH4}l%#=Y_YP
z%A@R0{(WM>NUEnXlVuimVuk8;aIm+*5&y}N{ePp$EAvp4iqGWss;kgi>1zVBM?W#_
zP$uNuSp};OFQZFuiQ%u|Oi0`Q2tB;|$m)MD;ll<wei!`%YlD~3_B0uIX<LP77pv1+
z<K=kz&LB)Ut;psJU%=<qwXCIHnmSG%<Zc=HaWB7$vVVPi&L-A`t@pFS@jP2S)8iqU
z1*}GS@owSBQ)+DE&h^~ml5)=Tkq#|eITl-&X5*C|Z?I{}Sva{Y4Pt#2se_{uE#vdY
zE)hoPcB+WGGWR+cR}v+3S~mu7PreD;7xTOyzg&#j6%RL8e-v)e_(aye>gP;H>2L+N
zKXJ=OC&HrYYg~cC7tk@O<t^e}5Lgt4;YU9Tuh&(Qzh<Q{b*45`Pdfzh!bdpejv;wy
znoB&NoJP5blkmH|5lkoSC!><e5S&I+$(esqJ!lzT_~Os5EZYp;zn<X+C3CFw7){=u
znt@gMmh{oOFOW6&G={97!BuY(WqTLK!D5wVn0m(t?g*>NlI0@U5!FierB*}ZjV`EL
z9SUK}2Wewy9Bw`A#2&0XLtY)8LA}j8Va|+mxUN?MPfFxrtgb0Ju{ssM7LJ91Z$7-&
z$Bl)o5b&JN2q=@DFTC(|BE43bQx>Fu6Lto~VXOKK{6I!=XFp5QTMcz&b#^TrnJdAj
zM}C1nqaNX@|M;zm_eXd<Wf~V8>dzgy?#%rAW^>J45SnFavl~HfIMpJ6OA?c&E_{Z3
z(dy^8a7zp=UaZeV<)&~e-xh()LZ08&E6H9u%dntvqsTG7&*axS0P&tvQ0j9o@jX3=
zmQj-Q^T0>w*wTQak~hc{o~^j!v=!See;TujC27vAt9a2~1vDgv;g-T2^iCFI_s^ZC
zI(=i=0k<aZcHb?5Z<Z|4`ICVMyO*-+jB(t;i=mh_y#l8mRmEjmWiU6;8q*!V<E^jS
zAT!|}PSBjfStk;>p`QijS#BU$F^k1znZSj(y_7bX(;&DBZw4*sCV6WdyMGEU=XX27
zhYg{Yx`UiMU|{4l`fL`5Z|l!;+hgC8eEE-pG`j}0_t}B(9d`4Xq^sQ6l_O~SP!%>9
z1|gfP!?rfN;jgq6(ED~Ky&V)t*6Hs>%hK)m#@+|V?Qp^;wU6P+yv3wNWCu;RkfBAd
z2$TKE`>gn^jF!&}o1U3AV77;Mcl^i!-+&G*yYn4CDda)NTXhIhHeq>{pEyJJXM+3s
zZ(*ib6LbwVL)Wku&eCgv_Vv>2!#P7-)U85e`<KxhHb%5;PYb-B*^2FZd6p%oj<4cZ
z;<X*}%s0-GS>2e+>YF^+(sofU`;i@V{HF(wYjQy~L4*b@6<~S9O1{$&4KhUxJF6FR
zUcVPp#~?pAJ4cjdhdtxYKA(#<{p!Gq$3nGC0huD?dwD$DEhj$^-W(nT(FhxuC$vKE
z6KX_TbREg5c4E)0Z=pu%C*kr%vslm*8Fo!I0^bFl!&gJ<V1AUJmurorj;pRhoc?W?
z6?G4XT+C?K>}oh(ug0*bnKQZ=hS_J=!$ogd*16q~_DJxY>AD)Se|jhuXKR36$uM+p
zL;O}hkLeHo#Gxe;Y`^GB7#w4dQkPaUgD`b2`+S8^Jx`kTt0*z~<z|etZwB#&E@ZOj
z5}Y|44!htXeiK~>f2Q7s70Kgpb?q;>o*96{uNSbT^8F-PMTc3Zcfk8?BDnqVV^~}r
z4KI>{@T>ndI3cErDP9tMJ~RTF?hN3kL0jG=>w{a5w}HtuRmM}~*+O!W(@+zqm%{Et
z+9WY>neY|L9Ny#j<aTiLIfcR7*0VroJ)-vA2LlySpncYP&L(I+xJ&%Qn{o;8xqc-*
z{UcU*TIoM(c1oT#mixgI(Ey=M{2ux=w+++|F2@vYHRj{w2c>E=1k=i|a0|+uAm(5V
zB+l``oTvyWnsymYA|vojVFfD2DX`Y}=fQkY2SiO7&8F355}ywlWYVMG*cV=c|E+yq
zCS`pCJ9j8SW8qZ#W>h9wVv<Z+UL@nH-b)ZV)Qf?8LqV+SF{Y0H2t!W80_W`^B>3KX
zn0-`{+5H#J6h<EA-n6xYsgyaq1EVs-YcXW>uVZ|#jsJ`zu7RA+W8qqRN0#LBhbvw?
zf=0{4VA9Jlrns__6stdl2U|nA))nt@uA7Bm!-E5OLUIxG3!QLw4{v3j*IAbE%N&QY
z$8c&xmUPmYkr4hxge~dRhe?r{IBrxPY!jc#TH2&Zvesj8+JBOByV#AT*Aq~%A_ko+
zX2E&he0<JhE%z#qqR;mmP|>)Pm20#Ku6-85VYx>TcT$r5-Q15w`6Fmx<z}w+Oc)-X
zIg`D(W=M-x-9@=9J2vgpW)OL_3Kx$p=k$uo@ZT|A$SSBKKWZ+3(XSsw>Lz76o(i-o
z_#(=`?I&UC%b3rv>9l@@C$83#Wo2>h;B-uy8cj%o@h|=(cLy5bfcpV*dN=Pns=o*`
zz4}1Qj?YDWJi;Vyo#MF}8gOs9HNh9fcxIObz1cd1>n`2p(p)6z_tz@8a&iojyIu;%
zd;;*B6mM9!h=db(pWj?gCQtwKgo5~Lfk?C_tocEpd%7BKaoxt`d~=}U!gdZdE`wm^
zGA!M%Nm_?|u_IU(cV|}PhSK?1pmzsSszvC9^WGS}=AA&hvl)}(Cg5ffeR%QjD*lV)
z`GUrcu<U{y&&zEmpZ?Velx`Z5aRut&Gc5>{%4_gfvmws{lc!han$q#F`-Mw$yHWk2
z30#U_gm2#FarC1)y*}9jCSCSNzuN^^(yPlpw0wuz*UGuKhyGyVl9NPdlO~-rSr%`<
z`3)5fiZD4rp62nh-Q|&$;IZK`Hh%BHb@j^3@>_?X+x9xT_iuu^@++v_yfe6heBg!~
zm-E@1iEIJxXSbtYkfs-Yuzl-i>>7w+B^L^zuILcVtWl!}q@~#Dw?%khi#IG5Or;Oc
z&1R;yYxvnjA{_nxj9WCGcUFCh;x4y8L7mO%c;)^%IyPn{uA1jSum6^YMH_mt>oP)n
z+)6HEawFFhrvVf4|B&_>v*}Z#QWUQbfi30&rl?ng=2mj7uVx-wJL)%@M&x0rlPGi4
zuORKV8%bet7lw|vqV`iV@ZH#3@I&*0P`RrTPx$cPKNlIA)@VVmiAmuVGZ*?c>mXL|
z{EWK}@O?5zO;qXq#+?<NM9VA!VU3dl`#QQ28q}^p+37rx2vNiX+iZm%Yn|xV2)@&}
zkN2&F@P}ycZ*J~^2-H`*$O$(F!!MKDP+1v;S+BncHuM{DwhIiA_6FmL=erP}grH?x
z8VOu!NAg2%W8)MvaC~!}+w<TeiuLgCu9qzsCiY`Bt>He-nTe;y7=g$p3p!6(r))&@
zdGs2{z@o!i?5Uazbx6O7gNG|2@6aC@TN{CIySFo$dwbErz?z%uqDZfd&}KDSQgrri
z23}JhV+DU#d#IYqg@=SwQTG#6RCX?l+~mhSKV?GEJC=V|s)GEm3EoOhL%&C=ETDrE
zM)d6E{T4|qVb*%|iywnhDt~ba-?c1$H;6i2kx))_=&P%~==${sz8lB)yU*TW3vESM
zuVxkbqwLIc11wp?h5^ja?#9a!Etr`12IHlwU}AeCj(9o`&-ffiRo<Gc8nG06o5BRr
zliq`tZ4LNt=UM3asc=GK1}I4kk)r`+f?bstxy^j;TWW6=kLWO^*VXO|M+clGLvj`1
z{#2C~JSf7iC5`aZM1`7YNaCrk7K~v!Y~?gJl79XqjIHy8&Uz8y-bIs`O7?i}AV{(2
z`+0}^-bSpFe~4Y4YVcRWfF^`llc0e*azZo`78!R6V<L;O!GQ1T%sCEMbvp5CS2gdZ
zTn<((zevi*IcTYwjy>wz*@x;abVB<URNve!h@bZm+IMMy?F>pbJ^|ROa1f0}Hn7W`
zitLW-Gcy0W4?O-)4?pv)oW8mUp4n1B#-~UzHUb6L+A1K{yb_Q6{3RUP-wUUA$fMK~
zQ`)kqL?BX@g5Q2fp_1SMclyC<Zn2{_ljP@gau<^E89NP0-QBQzloJ2$QUVhEKC38v
z7ap+G;u(yZs1$z_=1aJ79x}bC__G2==uD=TzZTF{sn^NAYg@VI8#i$qzu#J<e}yMc
zG?CHUrJ23QOgw3qgYljtq4eZvpiW-gM?N>+pgN9{lyR&kejdAa{vNjT?_o{)t+-?Q
zDOj2-%i^B}gF%2HlD&yIX^sLT2QtyMb3dl0l%Vs^bnZ`OjbM*{8$LBnM{93>w!F>|
zJ}<wC!I_^yJpL|b{gI^Sj|5^<jUl@GSTOA)36S^a3MuO1bGsjM%aT1LX%V?>v$G%t
z7p;|pb?en=igpRvYY`wwUiS;ewW`p?<vT!9>>UJk^DZGZJ?6G72A+N2!XRl1^ZjH8
z+ei>5=FGzlhRZ?n#y6-)oB;WUy>R+&J^H=!ASdcwPpsdmv7)h*z2f^zU+xpG9>3v<
z>1pu$!oJeSkG-+Fobq$_iNXz)j#%mU9=C7JL#=O8%*WUmUrc(-Wj%UH%wBp6j;Y8q
z1C?9YUU(i0e7nkEk|pFc3dyeMdb~KH8+s;6F<ZXRne}fJPRi@UeJQ1IXhAz!u`UC;
zK1So@Z7X2)BEH|imNHpaU6dX%5yU@l#>vTcOwwo^&nkTd!FMc~nZXEldzlZXal4jh
zdUBvUNsWD>)%fkq8MyW>1Rfj|2W2KlGva?@$Si$)a<YcoF?BkdyuK1&9aO@vW8_%X
zpL(u2ya>jwkY!_ODO$y9b8gENSXGNWJ@IlK8kD~x)|OqE7Lg5cM^fPHYJJE|RS_5+
z_D0uqRklYM%JqCGfFZ5Xxaxa^U@5l`|9Hydxib$jDQE*MI?3NzuKa>86V#wfJ`?LN
zG(pt0CwyMM1>~k1QH4{ksPRCZz`0Sh&5X|pXNAxwuaY4^Ta_IgD@!Ylq98w0lHD6?
z#1tk35kHR4@)+Iclow54H4@g?c5Mg7+TMrLQj_VJb{p2B@PcbHXu<<c571RL5qve%
zxCFH`SjV5qGKN<%+dK$_$;xOlTZ`rj`Mu0r6BZEW$1_xJqEnMPtF@D1W{WjptJyk`
zJiZgd>#HIDOB5~{^MvbFt^;Z+!{r~;hpJIu@ZWC>s+yxg4O>%CuyZ<-F@A`5%Ga<a
zg%L2Wayc%3mWpLP3b4>K9wRiqgQfT}sNU;An)fO}&n8Q5hsaZW*_HrPR~|vNvr5pP
zz88DqmGJ5cP5Ax%I;3s*iK&wY1#i=RNu$eJbhGUOdBX@u{;wSUJ5JO2dzI;d-7gU~
zJi`lVT8ygbG9$Ays3~Q@ynjbQkdHN%XWPL|9X=D*@(e_s22sgM5+ge`ai#nTZflky
z*~Zz!`M;tp@R&M!i6-IhiLz|uCQq`_=rJrgk_tL)57D*7h?Tn%`eyPo9NTdP8b>AI
zul!25@<Rb$Jhc+OY6wJ0o_(Ou%+Gw!)}vtKS$sawg<(b2SaM<&ip}3n<@cI%TBVIB
z>U9T>d2D7*{<_pwF-g$BaxB{(<VAf4cXN~SY|FTp#&G7AJkPlvhVe(_@puQrsr!x5
zYIh2DwqAzZQ_1Ky;v2Zm{)juYO(8CgXMPrY5@Xfru>VFPPXF^1OeMO=Y4d*UtJ9;?
zUTkFrYYxGc>p`4!{Z~9Q>`vA`9nUUo(q^85P?-DOmv~o*;3tupuynW;KHEIQ+|)_T
zb+jV;c=HX~-o6jk>KaU<pJ%b8AXKL+poha~E_6{Z&Pzx_@oOS<+@chrUj)B3FOMZQ
zW|qJovSi=K`+_;llo==@jEs^4FZ;((bR+;ZPNYIz)HP6lt_a3+6&c2Gxc%o7c(yD-
zc<+TjjLvO>J~{plku-<zwDT@*@(Ma`x8w9Q2mHsbL3Y-CSn+%sTyJdV91kK|ZP8*=
zUx<Qmw?FsQ{xEYKugQwa^6|~K*ZAKk-ZdE`!fEqfA=!}UxMrk~1l}zM<qL=Km2N-q
z*C{q`c{~YIZh-V6aagpnllUCU<4iJt<CY`hH0b$nvP4RnY1SS9qrC^ADasjHbTqn5
z_b0PH{sVaj72$ymJb!n_dwiWyE^NxEfi8C$rqUcow3=rN_ZEJ`Jp&uq*V*B)?qoBu
zKeiaV4mabP7E_v>YlQW6{JBwQMa%6sa|=GL$7gNHFz9|B`#(p)&Zrg}PrWUunh-}^
zwx7dExv@~JX$z1t3^_MvL3z0bEA7*OpbJqjd!Pk2jr}J46mXd+tvrPJ-ZO#j9tLH;
zk6?I87vA2R%)TBe!{SPwg&I1SnGt2Cxb_9V54tVfu_O_<$$P=G(PC_x?kgl$g5YqD
zB}5Ju;}Lf+7(Y}^<c^M|4L9WJ-@${B`ppd^bTS2t*KcF3*TtFM;x4j1i|1l%NTZG3
z2$Z%ErT-KPVCmY?aO=Ajb@P!1KC?yE-uHpR*#YFbNjr2*D1;Hb+oo2;5_8n;@VfeG
zXk2Ci*X8eFagaFc?eNFhm&UMa%lB~owHS?(=l`!khj8bDTypQj5Lds@nbuwsklP+7
zVf4}huEI?e#fS4@Ro6&xeX*ZrL@Z?n$!<`v@E-pAb4+l#N({<6havUgAlR$Sq78i&
zP%zdP{x)ypwm;BhE8<gN@s~lqTRDn0I9Fiv)$c^QJ(k;+osC*=o}%;P23+^*BC$+W
zU>%n)KzD8ixUSB^Kqp2-%_4;T)z=`?av|PY@C3DE<1p8acXd28AP@MST4weH{B4(y
zF4rGGL5>-&Y*`HT>-e1NK396txdgRr?dZ7{cb;|n8lL<af>%#HIOXx}WO$oAl~z5&
zea)Rjhf1``HlrkX;5L@+4>n~VFJ9#KG@Jum5x}+ie}c5$MKE9y1IOn6AtOR#U`*Rg
z_#NFYpcCFfpx9Zs7;_E{2Or^Id0#jgdz!e6UCS2zXNhZfK0`S#C0OzD7CI!i2o-1Y
zti7W*;hx)F{GL}X@M-IV(DRdM?PyO7*k_DV4i9nF6g7I+>jMt=7|_@SNVG-XqP>|r
z9@a?&|0h*s+W~&JX+YrO{WB1GwV%^+@8(EOI||Jxw3NEzh*nkMJ<UMKJUp3}M1Mw;
z`Gyev^*B8GnZYwU{$sza&cieL5NxDRu*!1<C^<>taS0h*wf`~hN_`Jvh0D2F-_+UV
z9wkWruE(<f_LKgIc2J16#BZM(u)(>9Om`bazqjwimm6%!lm$1Tf2S!wk1W9APJc9a
zJjJPId?h+zk72CMC@K>B0GG8Ug5}^<D1O}tGy0UcE?q0e$xovC-ZQCFjtGj3^TL=4
zMOqO*87>A8dM?TgC8kwz1F~hfILnl!Wyn(F-Wkm2m=hP0sl_RO(PZ7>Z@3tl_hi~s
zS$a@D89#5T!CIbqofKEZHQOP1m=3f&FcfY)(x!)R+Cpkn8}2WxBkq?&Q0in94&I8z
zbDIb#eIyuhlm8n$yabzOuA}FN_d&E@B{~L-!rU?eXdf}fp(uT{dN7V1mgC(l+t#qZ
z5$6P*PmP)DBPr%~RUOP0xRGz#F)Yzl71gC}*_?18??+Z<N5dCD{`pEAWj+9w3jJXD
zj%P}X_LKNFU3QV*|Jv+c#LZe;h_jwgLHl(h*-xhebe}B>y5sMH|KDoRUOxn<Y%k-;
zgQ3v%a|4M(N9eK*151nbpkO<SROhI%^pz8t)v@WM@W52ge|II<EL});jMAZ{TTFRp
z)=m7>Ujo5fW(wt+E!Y^BTIgM)2M2z9!d|^fEJ(NkXQ#Q)+jq}kUc+r%V-(AY>3zoW
z$CU*x$(u>dxDe*NE0(zZy9P_+X9x~-e&Oz%HKg~%&hxn*SJ)soo6JxDgD1{>6DH-p
z!1J3{u*IuyV%C%0FyZL}<lf!p{;4Jl%V+1}!M!3NGk*jX+GldQGZJ9<)=hNSC&TmX
zHnUff53#X#9^WVB8P&fHV0STp&*%A~C&Qm}3x>3yF8DUs`uqVEcNd(yOO|m{?nBJq
zx8&Jn2TD)z+1`<Ru>7$J*d3e2eS_yXFPi7vz59rh)(OEzr_h?L{R7Ge6NK8u|KQU@
z2^zEf3Kw(29i`>I;+3baO!I4*@Mk<?DL*GYBkqYu9aDsn5|@B@Tmrv0YWVM#C?jbj
z@SL+DY_oM@X(=M?nY=Ey=BPic(M^NV!hASxuz>9{E639*qMX+8QTW~I0DA6`BF~&M
zp~*p<x__<1Pj|MFZM<JuILjM%+VO1c*s;tsmw(s%CW0F^MA;giy)Ri54)=eD5i%Ue
zjU37M!S(OL%Q6uf__|r3^R}`~)#V6u%&fvayL4$<Kp!-Ir~{9oX4G|FhbJTJ(0SfY
z6sG40zT6pxj-8=&q+bV&GqI#kjCj^lLK?T1&wVV*SjfCR`$%l`GP?P=Ih*t1AEpd@
z3%`oq#Nst3bc?bWy|K=k$p%QW@RgVGX-7YP3aaBW&tK-0`HtVl+77P!>lPR)%R$}S
zK2UjCf{WdBA0^Y@kR$PV2p!*G#D{A9wZ=p+v0a0uUUDIAeE&jq<9D1M8;lySC(un>
zBxuhZ9eOT)7XF;G8}vl4a0)wW!7eL=B;Jsq|8_*e<4b=8e_s2+1}#UFIJXV=@iU|^
zmWs5iR)y-9+yc|5G90WMTULJX5axM{vL#<jadWI0crCq#ZE+MO5_8~tb{IC;9Yo9F
zWNz~^F)V5d#<;Pecp_mmV^8&j)?MPX$y<u)dDRO7j)^mq44xIctppH4xSonQp07HD
zj%Je7o6pL>eCvQ8vd`i1zIV`W_Mh;FkuTfK=S~ky+*P(jIgW^3iiBGt)3N1i5sufL
z%_c`w3%4Iug&PeD)ZqAhSS2!@OBRf#*^<|xs4NSGht8Ae1G6!2nCHqqx{RY7av)On
zGIF|lEGAQgUMhG;o|@`&vKBIoTj7iC+s3ey3v`*1lnB%K9*$Q7|AVDlV*~+~W^5t4
zuv5a{xcNXHIw}rv(bJV#)h2QJRZ<joskA|aW&v5xpB0ac_1TW7#Z39k2x`E4Z+3+B
z<92GwGK1&X9Qu&P_f0HWt<7DO>Rp9LZiH}Q9ape*wFlJ~O4H|=Cvd~kO7uudg8F;1
zxViqaKzS_x?`c(`^`y(VWzR~Mcj*Ihe9*wf9eoG6#WwiP#1RvHIYC3lUGVVv4fjUN
zQmz2F`7e_2#~TB@m=%sr4yBx<tTXMOqRqbkvZBG-tMFj-QFs=c0baUyv8;U;=<PU*
zuhZY-zfLDi{1*cjhDP9>wIAHO(@}gg&sDhl8b>I6z|nP^V3K+e6s&F}`}!`FWt!hc
zugMiinywMI^S9yoif_W#3m3w7-B!>uZ^Da7!O;6uiq;<y#SKL-;gE_w`**t>E(vGD
z=`H`zk)OMMexOMsC%nd`YJpr~feNy~F8r&16MKFLp~@i_tBVrhVqiPB=w~Xjp-$MK
zb4HLc&zH2So<`qe-@x;NDLwgs_tl^_Y>o;nduQ5;T@lO35k5OodsUvgo>OPD=kWWp
z-L>#rqneD8pfqRb6zYZZJiLiQoE6Rc4SC<~YQ<vYJ}$>6RGK;V6+vI}UT$-CJU;PQ
z#;Pukg+og(kyrOdvfEZ#R3l_OlQ6I4isp2}^1Y%gzhn$O{b?H3q%VO>{H~C^^a0fH
zhV+fPY$!7rLLOfSD@R${)MP;?6wYPGey`^~pA$g(PX`$BQWE23Vo}?B8ui=Qiwop{
z??zK%f;!+NpTd>KEM!Lv=8$>358~?^6AZ9SWMS@ms9B~nEO>Jh_Pti3$+IP>a(yeF
zkR1msJs6HWw}wjTT~snklr9h52nNrKu{XeitZ<Z}hR5XS_uBRF@5dC_r2Gb6yFUZH
zy)U6<va0oP+(~M%D+cd9@y9O>Q<>&FKfIQC7Q#=QfNqnWWhuv=Vp9rXZ~1epqez5n
ziZEdIJY(;vLq9|mOQA`^T55V<g(|l4?(}D`Fluxt3G@F3CHBqe!+*D=<1_Ho8bzl6
zOPbYcM8miR4K_mmmspmPg?TkztmnQuzIfQojSCUN;3^-kcOB2X5;up}kJiC}aT<6O
zdZ3R&5{e~!5F{U62Qi<{!1l3i_@-kF`<XF=CjJfQ?#hS3lK;-Y&F=}Qb>Eo9yhy{$
zI%^!Mo6q-x{o$0xRGfY{l6>3fZ{vI*2*dBEV_U2P@~mkXb{DnDbiWH1XYQwy)Yfy}
zo;+)<$_C;C9)X6@R$R^JGY)?C;Z`-R;Z90xF_A}i@x?qRJf7RnjX9AG|1~9Imh=Ed
ze=g={11i{@RBw}UZ#iD+i6jfn6@<6u=i!W0TNp7W3ZFP=;gYZ|I7)0Ht8@0|^IyjB
zT{aPivl6U~r%K@oa0kJ{sY127ILPlm$-0VHGD|srWq7`p8>8LI)yWy6-JbQ#%jOKZ
zd{v!2so9T#@-3vXI*85DR|5B~novAtn3HH!042r87`5jyNIxD;&Gf`*ftebOxHFoq
z9vg`DzH>>etvI!<olgoIYVh?`e^i_u1s}HL!>gn&91=W(z>Nt434cBI<!3$YNRdKp
zXaTF>P}qLY8^@SUhvAlQ!gcFJvEk`bW{6g_MN*nQ%32K(%42EyO3EIL-N-chlHswM
zGn1L}iuefqN&ie+V&7*~W?>!(TKNWa?EbY_&$A@=J>^|CZ3jrI&0In6)=+HT@tpUv
zRl(r}YHYE`AV*aP@YN>1=dpe&Q}mcar#}n<iD@(0((iKcIxPaOyyJHve3rI<PA}2=
zYEBOwjS)oss^k8)KjR`Z4OvXN1ntz@#C-T1nD&h_%v_j<9$k$P_4p|qd6FTh(A8u%
zu|qg?<PRB~B|`)M0my7VWOM$QHti%WU}W`J5L|o#y%%-E6xs7A5--7|7H6aO!A;yR
z)xDg@?nUgq^%HcsQ2`tO&S$1(63pVoU7phvi07{yW*0Y4V9kZkamZ2^{?3vHhXNg*
zv8Ri$Y7cj7<$w5pxH|8DF5mF|o7rS1BO_UvMR?ukWt1dINh&ELQc7>Mv}Ki2h)_yO
zLc_@Ry3b3bB}GX^nGGr}?WDf<=ll46e);|f_k;U&U*~n4$MHN~y;CRG(lp52W&iAN
z3BSkFvQhAG-53@IHj)1(2~qcEInwL3nnYgrz^z(Rz~+p?K4B{|{nsv7yniE{ED)w8
zpFJV}p9jwIdknKex%;BVGUk=vPpmCih7aOCg2%84oXa>5YmK>Fc^t>nSP3YllgKs>
zoa1eEKZrxus^NT=EZr^Uhz~bh#++r!oQ5!qk=i{0qJ7-BexZ%<Fs_c>_jw1rZ%AUb
zQuN@`{%U+SAWM(>$kW=jgRuChEM8mCh^^N3Ox2oK7~UyC9Ju#JOIMtU&j>^Z^$biO
z%ZK?Qo5`CrYiir>LoN-(pq9fysLfXa+qa7J)CG=XP@qU&J>i3iR0iDJmcq8*Uq-%a
zWW)VVAu^iy1q2rOLGyv-jP@iO&{%f>LSy6LPSst8DBj|j7=uia$~8PY=!MHumf@kh
zWf1CGjqA4PgP8gNjyd~*Y&DNQ+I9-8c0Xq3Ck{aM`z*}YFogoec6?AikDh4VN|o9w
zz#-^32A=C+zP^d$rAGC^Z#50NOIx1CN}Yj2PZxq;+7_%hCP0INdN4?l^MrdhV@s1F
z=_!!GxFYWU?ViZZ-&%B9kRzS;Oo{HRT!!O&Gr(@LDY^eP8WwNdPi-^P_(gjr<Nbve
zC_UdAv)ep*YPMfM&hIKuiu<N4xb&$0>8LS0+X=8PNdSTiIezv)A|$TTphf2c;M5Ci
zx++DS80u%zZqFRNFB(cO2X29voKJ#1y9MOgbXc=A8DIZ<%+5OuAZ9uYy+(&1(pZ#!
zxZldYI2sOb_E&&riyVwR=)*N(d+^qse7KbQ5*|%{!@hBmp+PS`u}WSo*ehAYJUBm!
zYI{!OkJviq&srst-n5_gXNXV{_5~CN$KYhaO!%@-4PJc628jt$FxXHB%9jOE`Z?#L
zKD`JOWYeL3>wFT#Z$~z(1KyvPrAj)X@V=-UmL=u!1ix;<E1Op_^9sd@zQ;=}jx2(T
zLLKrrVFSE*c9QiT7{#FSzo4S{86WUV8CQkPthjJ1yU}MamM^!W?mx>oJ>(Sz2U-#@
z1%0}yT!Hy9kjU}Qo7h`xWl(RUF4N*O%(exlqk-TW8mgj-Tl<q?+nT9VTv!`B_eC*p
zMTF_!G9Pfh{s+ttE+#jIBtU0*CR=?m8J(kB;L^QaZ00RVBF{>Zn1rclH2oM|UlK!Z
zJH_IBwK?`L(;aztL;s+j)J=S{qMMmNu!QxkDP@G*!a>IOBgeBh!$rwrRQ+EE=M~Wb
zzr(<|E}TyfXs6=~iPijb#;Fh=BnAdCVRS^tnSP%j%k@ApxI|uz{o1vUNn61&mK_sd
z^8EuiH}naZ?`gsC-zG$$Rul7;pRmezIL^=oSGwpk!kz1H@nF|ASoyq-b?J?ULLD8(
zg0z9z1|At)9E1OU-@%$Ae=+5R2CN+Og%4{)XmN=&HGd;au4fIRqR|)T;NO#g8S>=R
z_4n+m$^g*4y_+tJmZF<D{XrjY@qde#!Mj{#nz!o%LyeZ;>|4Gx@S6oq>3z(%^qz<x
z<%e)>kva`;FUBXu|KaMuGw^<(3|(f~l8zB63WExGX3Ax@<qpTENmizFy<Onqz;tR@
zJ(U(Zm4kc!H@2(Ail+Rnhd@CG+PSO|P88UK&ANBsr4$LH)A;O%GN54@ArPbAfnBbS
zbXiR$yK-wHJZRs@D9&kxU$1?@yzUdEMv9aFw9^<p7V-6zV5~Yag)~bqz==BhLE=w1
zxf)F9LqlXYnjD5Bcl3$WG;b<j8HY0S8gaiv7Bm@tWF5DxBa7Z1MYs8#nEkUE=I49h
zJF}-OH9ZNNxqF$^=ku`W)gTJBoP^a!(;4Mv6{@u18e-l<l)veW(|Y@0hr<J=ieooC
z2^(NS&fElp{2SoE?k&t^&x7AN0cZ?p;j0+-vm1^Rs&uLdkJb#s-OX1a`VnRCDrK`v
z3+M5@6}WqWuNBFU<`^taYjIHMJJbZs1B0dtc=UnG#wWyZ48Lfk72V(<=1zCod((FF
zPq4Qyk=JhUi+PqR!w3(1Gfr=~j#+MYDadbUTcc*uJ<JGfT9LvgAIV{)?n%={ir(1j
zolfJDHloMO6-1oNUtUqsBr|69!g+}lSU0gABs*0x_fj<M2#A3Gt0gEuF^%!EQe-9k
zU&1@@x%ek<B1)D#gFr3^Eh2TBJ?xUnOUhM6pY2ycZomXR!aT`K{TTjP2SK{VC=(MU
z($U^|5f(1~j`Twpx*zI<Yud?dF44uRAYqE*Z<&iPzG0Md5Vl2+vj43-h~I=0@c!f>
zG+eZw=WpDG9qkL*xh4W+OXy!_oF5Ntha=35k7i1TO4;ZeN@V-D7Vd62i`GmKA`P~u
z@Zf?b_NSU2K6dtGx9zV4Y0C`|e$fmMeOpL+5~NA)2_D|vTZ`F;F5ysvC(N(j1e!PR
zV#I-qxMe$w55M-|919<u9B~1LYg^dL!XCVulgH@>)i`Lq7<U)n0X^AaCbIlKwEXUZ
zqlu&Jm#p=`bbNwiJ$Kl1$L5f2c19%D;34Z`5<s3meT;oe-{RTg4{Z9AdzgCWBI_?U
ziHrvZq1vfGOyg&UJRO=yj~}dotZY8&U7L&J?Yo(vj=Ok_DFr27GRNJ?1kuFj^$S+7
z#F~mYobh1|ms{y&zm==dedhD%$LiheqN&{ZVf6qGd9rYn^T-Fju!F$kV)RqWAE*gk
z%M>s9icuVEkvXNxAFTCY59`0f{(@o*Y7pZ7N0un~T9{)p*5URy6X^HUlla%=1ddf5
zM*a>9s(wt1cAF_N!`1u2w_gB;Di1Ih?_J@8#7_Fc%$qq`)dT;nkt3=@7HItQ1ypuB
z)2&LE*xWI0FH?7#IT@YLTx>hU`0j2;X-7AbIa3baaGp?uv2RR`T`auyF|%hs2Z7~|
zZkTvih<0>sr8fP6@NsxLIrg%?zQ9|W<BgiY&*N7ZP1{DcDx(S(6zqZ$mq%>LZs5f`
zP3QQ5dURG5gWrUHqU{+2s`pJFHIvrifeEKDVg5;2ICC|ZW4Vl(g|g5h9}oY<cd}EX
zFQW$MH|}jWps9nEup#6CJpUR8)s8p8uzw|_ItkT3U15TJw4(Kb7ohOQX)vE@j-S4*
zVs!Yy)K2jQPMROcu6kik8iy`o_J{-3KU@I&&L77)Du;2?_*vL57KbJK_OqRpRgB-9
z6OfzdPn#y3XHKougQLe|=zm++;glM2vgCdip5hn-e>?PWeAyKAPi=;$EthardIfWD
z#YxPWKZ`onCgAP^f^<`S8?M(s1r-K6A#B}h*!E14nkp|NBf8GabDtatp0C2OYX#{m
zxrx;6Xcm4gRHDa)Pr_Vo<|y)OhD_yFX73&q+?MSOp9VZh?)O{R>Glhcrtrv&v=TT~
z_zwj+*7RUyHFPB}f~J5kXr|E&1DZhA71$Bcs66V~dj$eYcC%d~UZm)%3CL-W^M_oo
zF}m_ecrExMnrLL9fMFS~Xm&%ZO^dL~_zUjq6QQ2pSKyAZ9Da$O6AJw7#t+|P7^_lG
z<Z-j|VbFloK0Bg)Se3diZn59SxPvOE!M`ln!B{SoW(8kez)7MROv|x};P69)?BhID
zwYe3zqMGw;Dej}1QgP^g;xPR7=PCegE5JM72ed;d`kcMXx|$1<mhwVqZ}<eJVXv93
zERS+b3I@HNpzN+B99Na2RS8Sk_91O{tR)F@7FD5sh6mBzaSFBvadUUSKYKUV3|HT-
zg%=AyLGreA$`4<`zPR!nV%#UvU61Raty~_SzT-TY((<s%Oo2+dWMBesCoGPXAZrg7
zA)F4P>*pBL*mK!5{<k*!x+nv#I*8HDPjABZ72;%k+F^Y9Vk+s7SH{y!4fwbW;iuzQ
zcx8{*V3fHXU4O%!wz+T&?0k-e==u>e@4teT?^H>(_FsF=hm+ArYZzzU<MzrahQ!=*
zA*nA7$JxTy;nC<%wmStGyE#*Em;XcX@>mAxPUDPy>sB&$y^)pKvznH#mqtyyGKhTK
z!*;L!z#Lm^fen9E(IqE<xERi0S6AfW+Af4~Hw8Mgq!98_SZ-(8!30&OVVHv~cxzo}
ztsecwd><|oxMw1b@lN2mUl1lYv;L!z8TV0kT#eGb67)>fN9bQ?jxUxaLR8#g{55|7
zZ%h6IznDezhs#;K67PaH)26U_8fNsIffy+e9DxPe8I1Lhe!lPIr8r^50IWRa2Z~x&
zOs8Qc$Oz2B{9W!;^@1$3R5OjYW3D{&t^P5t{~H4vzuB_(YpqDzZy);KH&JS|*&i~#
zT97g{Vs;BoqtuyWi`Sh5%j<8zcJ>4`D&fv8^Uv%#LsL5TK%Q(keG(dT8oAs<DX!(u
z#=ljzC~?7qpYk;YYZq=u*-lOB=Er%IS0}Tf+0*fLOdL$Ouz~qiIf<_EO2uzC+nHEz
zF>u;=g?^Rmhh<#uU;TPGt#ICp1|pi^sPmpR^v)!oOHQG3b2YroO<>n9RUneeG3@Uo
z9*Meq3<7-P>C#*AcBx$UFy`@MDDJ<(_{Pnl!B?`d5st9e7js_Vx-Hn5bPOLW1jDtZ
zt2l;O2~yoptd`haCM75as`CrcXwGxA2Q{3%UyT~t`QYu*?XY8y5<E5eg@(2btliS-
zByLQGXF0H+W1Ht=(!y=<<k}rbo<0?S>%C%L?>WaT<gzd;%Q*i`jUs!!_90)k&x19*
z%IU5d@=#&vLBD=WVXu54B>8$b^StUio7&=zO#*Jrz;j#h%-IA?S`=<9>*j6rF{E{p
zlGOO0C@tf@!N+#Rp~l@?TsLR}bB){I_ipq7&PYuKJX)}3_aio`W)IjIY(|fd`qY8*
zHWsKJp;P+3LETZ6&T7oYvazYOOkRg<42fmc_V>Zu2|W7ZkvC&0*~DdC)9_wPF?Uwn
zwtrs2b=G;EhtH2Kad5m4?FDQ}==wIiE$0fqH@^bWO%?3-^J&a|OsyZ;u!Ue2mqC_X
zgKg*X(E7R~_3kg=_Pqxfj~Ppd{0>K^rD{8Suv?2-+e~H4wAXOmH_h;{{4STD)q})Y
zSJ)#1pU`905XN1d2hlHeN%gn_+1{v1Pt8^#T{1$DBCAA-!Vi!lv2N6CT!{SMDMYV_
zht8*uF(wPH!tRO=TvMR}q7fQU80<jn1`SD>#3A_jdMf6Ady0{NyFo~0J$&jY#UYN5
z_C6>VRzK`y*5`<l*Nu9#V#+MISdEy_GlyC#NKvx<JiEha5>0*TOgq&Rp;!7Y{{3AE
zoA%f-XRGp|D_<J^EHbAf<9orG)9deE&tjziJILEBEQ9+v4wYDA0oE1TvsYs0VV=oJ
zMqjZBiiM(>kMYgWuQ!Wij+{gb{~YLEdYP}gt`GNIAK^SNOMr0a^Nf;d><NyOKW$L~
zEUM0j$=hy0{tj_sd+7@(wU)u1&6<Rm{lQYziDbbH52z_^uj|nH2>bULLhe#C&f9Vt
z594lVxY>boPS2#ZCW>IvHVpM4yFv1n5KWghCF5Jea958YlM%R=oRC%_LQ`*H#nf&T
zw?Bs)Rf~D;+qUtV1cbmO^%Xpr`xe8VC*#spiZnlb6kiLSWzM*`<IC>%s4f|cJc%xh
z*D;}$_1o&FCT(F3j68%TzLSaXHFMU?SAg`a%tX<^44P|eL6eLIpwdN^z*{+D-Q103
zeJ5~x?HGSgf+dYst_OqX9PD(BBf6+M)3l!pFv#B<ha1~rJ}XJ^q$m~LsLec9-2u?#
z$sDL}z}7cz>>bG{eEQoTKj>N$hwqm_@T(%)9?O8?g>8^=r5~<{b1XrlS|~p~lY|8o
zfW#FZy|mDZzKh_rkegq?H)I}jljFOMZTJHH<3(tBX&an<zk%-fA`AV22S8`N4T{8i
z;h{`*vQcOfJ`7op3~^u*yIts|M{8*M33vQpWK4gV>}7Y}oCqRuuIww07yI<Y4H&)#
zq`0LR*WIYZmFd~I=+`Bb&=<gsS`X1Xp0Y-F{-P+C^VzU+2S!g~ap*`j$IFs}v#^=d
z8Llz``A@Nu1~Wm;g79#%E4`T_M!($_hq>SXA<2Bk#srPw#olhVs<8#9+cd(uG({Nw
zD~~@f_24GeN;diUSl!(AAtq8T0`&hHkqyHOsOHrPbVcAhwm7eySv*}IjQb>Mj?5Z*
ze%^odLQMiZ@HC_IRE_DSuKR34uQaU_?88t`H!5{zG8z3g1ZK{OV76C;vEA2!+Zvo<
z=fx;GY4<1|WS5ck(G8&dDFoscb3L>XvgA_fM4B??9^=<|2ZVfd=oBews+l^M^tlR=
zAJ>mCE|QBer{p!hG4w{)w|($M<{u^w3)5#~9_+)zoVHij!z-Sihc*UHaHvuSS6^<%
zyIH>>re7CKUBBRyNO6$!7(<uq=BO*yjE^~<xbadkI{wIrD71Zm)^uHJqjQa8$|p1J
z(VbANb-KPy_7^P3Z^E#tx%|k@Q|XBATUh<`J-X$^!%KB(YLXJoO7GRRS1uBvdCO9m
z-EO+{s^4__`G*K<Zn^?-qz`=jl<Ch!$QWMIrf1Sp_@N<AeByY(e%J7Mydts+7rlId
z&nLe{1KkAFk($Ju*;>f@jxNVnRthu<Cosq6EW~Sr5!mxX5O#BX7+;QGvphKv_Y`;V
zy8dyTFb7Gd&u}9vpbFHhR+mg^kw)#zK&-QE#ax97FuF4a7MpD7+t@dtGjyJxn-gvS
z)Zi$O=Rbo=jxHyjs}v#Om_FWVmZY5{s#IIs7p7enp~rpBGs$@qiB4$)#~s&(3!f_R
zRJJ=o<`snOUPIoCrh!Un8b($wqd#U=@S@hrli{2cZ2LY8GdDG}Rg?7Sk3S}K%Wiq%
zW+qN5MJ$=j><YZ4u^COR)?%031mZng9pCMu_+QK%*f{$>_T8<++WSo`|Jq4#R^Gu(
zt8HZ-TdC7YuE$_S&@nh=?8y3k+fDY&+lWRF6v$+gk8pP2CCua)POg_vLq~uPuJP4`
zXI(yc-$5OZWjcVV!6z_~%;NQa@MPa!<5;z>m+<HXV<MU3fgH7h6~14@gdXEV<QXBN
zJH+MUd%mzQw_L!Akv_-|K8<6`C((ZO%i!PVgFC-y(ypVvTzB_rSfqLakLdzd&empX
zKj*S=jO!8DAj~E=tC0DDCG7M+e_=)4bN17_F^ni)f{7fvO7utqwGnNGXax^?A}^nP
z>3a`<)SO4jq(s~!EJZWA=Ahmt7j}Dd2;Cu9%g#@9r72Mq9@jraquVy%p??;1vm9A3
zL1j`{RAj&8@>;N!7oZa52bgI_vEV=90Dp;12I>l|#F21GI^)=Uwz+p9yE=CP1p0np
zmctCXC`N!xvA@KwJuF-QI$4q|Qyb+uIqzqU&%b3nw`AaP87XS>YYp{TIDy_YN37Vk
z9Zy)uz=$Pfv(=76)Um7R^lcH^?Yaq>mg(S_HxB}TweY`$UB>e|H*rDaM#v4+A{mt`
z&{Lj8(mJZ(9mg2XOW6Vs-kMPJNp|e>*~^&9h2ODpz6O^6OUJ+7T41<M9AibZV9%j^
zFwl4lc8mbs9%Mla{>{b{*VJg$M_H0Vgy@i87?r;uMgsRYGYcxl@Tu%7s<g5S$73&n
z_ex33>sw0C9iIe|t@cDJyO>$hdkzP~8!)BuD;nn?wOjlA1rr*pOD?A@q{oDVG1Nnz
z*jm+L()XJ%w$+Zz@8Lrs#?Yn!A#$H%wI#U};mj?3w6?N_g`8jZQCK<D=t|&x2X)q8
zYAe+EPbXSaYC&#s27Siq0mgsz(9uE;_T97Rr+IPgfigRc`nL&gSk53vS9L+r%p}%N
z;vkagFJQtcbsCl}3$K>y(GyX}Ahr1&^KhmJswP;|FG^Jq)|QJOGk!389%q6`K7(!n
z<{-G9GR+(7nByt2>{0&#)VGL-19u#$y*%ehce{#<k9k8})<g)jxz5;HPKC=_GQ?Y@
z5K~=tqiG=5YqnB}y05i^s{51Z%mQC3-FyYF7WA>5U_>e+?tuT**Gw4Z<NW3nFiY(P
zkHg=Y5-&;GJFLh4yKDmC?U!)zov-X^tJC;#|3;?UY!(~3Bp(}+^=SL84fJ1FFe$D}
zXUQ%XYUCI|2XFtv#rh_czC4WOHF+FMfHF6^JLRt#SFr0p6Yz1@gq;cod>?fNKT&HK
zoTJEG>=GyUt;JcXvkd;8z7HRGEF)}A3v6>rVST^KlI-i*_HJRC7;;IIsN`|_q-+e#
zI%G$@&XzH*D}yO*`^e^372&hbVR-c=3maC7visf4VfWJyu<TVc6YQuD-m<INZ@J%4
zyE&E~H+jr<tO>w%Zv*K>E<2#?R|Sswg0M<?jGJL!W9;wWOrOVL_Ot7KOyPXJJDPcL
z_wQ#I=n{e1pREy=OroP7l|bDu2bVci<D&s7a@?HD<;Nandhd;~4^s~_IvQbgY}gJb
zNEgE%w_nVPdG6%xS##=WtqY_3z0q-#F4OqUntT~?XR@!~gvWB<@!pjZ=;m_Q+}@B}
ztQ<l$j@x3EcmtTjden3F4B#ofv>!dc2Isxchn&HQRKso-v3JzR4elB2m2OkI{=yKT
z-AVA#`-WcX*I9|Gc1-B}Yo9-0jefcl*^szUxacB5$>M#GTAzhEo$9puc`Ali1aQ39
zW9&|{h1qb}ipkiS$NpQ|0eer2z$3GCj40P(Pm@Oc8W)PfSJ$vJEL)fyhog9o$-<CO
zS!$u0%k@jX0o4c9xOCVJlOiV(v-9uRnp4_z$@F>H@t$IBmlA{~>(DdhrS+DhWlXxK
z5vUn=f`WS|41U(Yzn>-Oh2k|(z!QhOEFtQb+Q_CInL#B^?5{5=JBzqmiJ*rUJuxE_
zFKCZr((|`WGk3RI_WBi=)(cR<mE4}8;v*Basg99~lETiMJDAWi59MPTINkgfxcW;G
zgHVE!E4$eb4;85Qyf}9L>3&vlLMzt?_Yb?GCsA{0U;0x18Jp#<iI+uW$+`|@vWoN2
zPP^QM$-?HOuwe!f{1l7tR+{o3?8;-U`xNLd2{qg>b{mFD#b~9l0J+g|na#N443oR(
zkig|9$gR&?ac068HcSzstHkZ-rjd(~5v59YFyDD(>1r-pC&=r`l1BOM_p!m`ExgNE
z1uw<8{>{gCLHzX=$TyXx^|D+iP>>@irb&_S_NrLZ*99G}+rU)48GnxSvd=F>!lOr}
z=)`GulMCZu@uKrs8#jeK|Jr~ePWm)Cuo=8`UNgOSCei8Re^|MNZ_zbK1!YD>h*9D(
zPSd!AW9qHAX=nf|sw2_j(_6gdX+~zuo<b(BQp4fnfn2}R8!(RWLFre2u=lC}Q563U
z7ySf@!{~VkRZ)SJ9!t5tNDDY1HWRb12D8CI&v0t;YYcpCN%e$gQqdf32+Dp7A-`tR
z@w;O1D(fQp3`_tiO#>RxzZyS_6+;x4&+Tt>!Nu2PNmK413fshST_Jzj{T;5LeNUM_
zk9feWUpA8-s7(gmfFo?AUzz#b{&MI}BICB>0@!}wLH^ZnI>h~MDsOhezw?Qh);)&K
z3Vmz|cTU%+?j|kaE~MV_45OT5K;OPGqaV04&f@$HOz$utPlgh>-|BhPvOxHC6{y5c
zE}tT~9v;Q#z-f*Zc|Aa%R^Qx5Gd~YAH%buya*V+VTOOl|hXh(ioPc*xpYc#M*E>aQ
zVD;V{cFvJ%=GKXQaB#&&e5=<96+3I$LgUlW^c~p{>$zAKF^Bn5Rt3WUVxVx*Om2T*
zLdOO#<Mij5>`%^1E^DAfmhx88-^P*n$ij*Uh$u38rP8F=WC_;?Ax}G&cQG%XC2>28
zcsem;IhzpJkGefF5R~IVbfOQldo!`#aPI(nu>TqqevHJ$mCAIhQ5p&^HD?$1NmHMH
zWh~$4DYNkNZ|J$LNQ7!lVfVLDs6VX2?%NT8LW>?_c=`@p@*ozfTr;rKHX5w|n+_Xt
zcq|cDqXE$(T>d!;Q!eX~O}`ZAD^Ypk@@py_knv%wr6v<U^%?xqxH$fG5(kOCeGmlB
z7~0c;hFKkqmaaV6bmaydA4;!p%2B4$uZ@@;I!TQ9mi=VgK2dONnvZYKCcqtw=j{GD
zl&xCd2kM&}aOg!P{L#;4?bb??Fk5kQ<G(RnlKKUA{u6?Ph<QYPKd0yX6=7~^sE`}8
zm2lpKP0Z7lbQW`*aQ=la%!cQC(7Mo>w5#h8`<N88^OA+Bt2Pqh<9E@2RTFp}Hsbh{
zud#yjKb*C40fRp$;DyBinjA31&kq(b313yYJ8vxtnaqJ|vrPPS;)(r&_+nf=TEO+W
zL}0|S{mi`i-w>`I0h2gW`fhz1xjB%8n=5YM*p1uRy=f(t2;7e*LI)AAsnegIRcO0v
z2`+F@XPiWgNo&6hjXm^*>CavP$`PH+Mafy{_vR}rHn)hGzw<H^RCE{gbIw4Gz9bR2
zU_v)M3c^-y52W>LGRL-@PX2C>B})ZlFfvhsSTb>#mE(*}BIQueN$_mdO5(Fxo@R<q
zBC{sSvz`kQ_=zjTs3@l`T3iyKUfMfYeVb|QjgQ4p@j;bJa!jO`GCg?nf(uzZcpc5f
zcF+Yz*WiX=J4D?|<h(e>D54+;8fnQ;zj(a<X^B7a%Ie2k&hKEljSNX{6C;ad!|-{4
zDzQ8$gKFx=bbpI95&n6J|D0<(xR&`6&*m(LOjS#}@)L(>fW-x@th>!FDwqs6wmf0;
zHl2s^t;>kRqDFSFp*0m&SU_$+l0zq_QZQU?j(7Gq;{vBPsN|jFcNqQR$IL&-xEd;8
zPwOQ-u;M1O=vN55UjcOT%}~5->xvT^O|f(PVfyO6H>_XsoF~6W6Q6fB)l1JPz>}+g
zU`_QNxK{4T+!fa#<+>?g$N4~(&dk6Zu0uf4H=PU_In&5ITliG44E{)&(aZVS=wi}}
z2Ya9M%M^^@$)jX^_unCW6VB;MJ3iy0*SU2U-7B!#ri}OfYXQ4u%~X;ms7<<qN8#7I
zYY-CQ$mDryGPY+c*s+DLaOS<q5TG`J-ky5^x6Mms{<fLWD+Lxre3~4Y>o1MXr#m@@
zdK&8-&tZ<%HKDK4Y388l9{T>JG<|kY53)l7Y5&Eu@IyohVv0}0-|)9E&#eKDd}!t8
zzZnPD<Q1^xkPJKjq6N{mJjEJ_J_gSL?zg+n3B$baz~Y??v2YL9|Fte3kFPW$=L(Ei
z#l<RM<Ms`X537;L8CxP}{(l@}OY{HBF}D6+PO#1YaDr!fMcV#9j<MbU<rp``IKxKQ
zGwhA+Hn8B~QD#rMAoj|b;6_#tUy~?oKR=smS};HfaZ_gJ_$U4+A0c{e?^D+MtOC~j
zoQ;)3CHCq|uJHyQ)v+R`Q&2*qkDa))hkdnO0Ie&Z@>73{vXS;%B6&f}C;mUma&X9|
z%_0AbF=qbv@!)?Rk9b8o{LkZMx;rLDI{pvASRhkCAkI9LnwOu2vKblhw8)07nvGC8
zCP>HhuVBCWY?}Vg4!-UC4J*=>F*!JtZfgI?s7*4UTI?-mLrXO@d9{JGL@7KNnnFuQ
zB#GM3iELWC4EdfsnOvVi;Y7`RstY{UE6j+_*r`G94w}(*pEbx72P2YO%Fs<|8uWbs
z6kM+;NrX`eX0%TwtDgn&e$2MUQ)QFr#*CAU&9oJ?E@l{BCumcd=4rI}*c~WM+DM<D
zDuHEMvQ)xhHF<yG8smH5CFUBO#k3#$@VZMHx}*w_1=U>JUyV6#R!(KLQyu8SVj*IE
zY7gUc#2aUkRwmxR2fmU;q;KaGRGGXKa;E%&t{39;NoWTgyFHIRe#Dq;4V=WaOIh$c
zq-K!domNyPER2Lq8|Po<nsd*kEy9*{Pw+(AF<fT4h8j4(hT2QX>=SP}o}a>3C_g6%
z0oIC4`aw5xA#nnp=$nZ0h78=^lnYU=z4&Ud1I4!{qJaNak{c{cYu!|7w&pCd_K7@o
zm)OgtPrt&p$(iigonego=qL2yQjD@?^T^)GIp|b>7_+(CfwE{0^E|y6WWRO8g=7IT
zkIuxf&FA_4BPt-WPK~U%>WEW<4VX_}7a;13HT{*nn(WB7BY8IGP%^-Z*7@h7aNsU{
zIJAXI80gUbp4ue4$BtP4YO3dFodzFK6S`lb7iu>A!we%nifr%4?Cs;MM$#y6-irmK
zsXYVj+>23TPC6sLq>TM=cmn<RjxyZ~%Gouk`j|D%9i65h1j(&u!87b5tJQu6ju>$c
zl;-VJ+OnP}Y`UNQDa38U{M&H?&z<-`e2qD}TzYQZe>BrZnO-P4h+Q({IQ3&C=8FTh
zK0B3jfSHoizdn59%imFOuO?Zg`vXk)jd)`|L!$2QWPdN3&d&DrB@bXZU3N$qB)LXZ
zq4g9NygLv7?JB5GN;!fpQ_u06bK4=^%87dD%%$H%CWFU&6S^Wzj+jQo!s8|GbiJS)
z`8o41;08IGvpp6LcB#_ehY6ZIIF0IV(F`kc0_JjxoxtxTh!|Q9TXmN5MVnjUUu7UI
zX_uic$+p;V_b3xE@|0&$`-kawHzTHwCt&U1Uo6r)O4~Q<k!_FU>HPt3xE}e2J&IG|
z*`^yfw_1|=_T7e6GcGgM7qkigIv-7j&51x)4UP*dkazx?H0+KDH3|KI!Xj%)K$jo(
zmC4XKts}6dN16VfoPi=PjP0ox0Z?KijjMLmfiw5@?hamst9oSVZ8ZjSo-Ki*MU}86
z<S_V*Z-n-$ZA`$1R+P9gpVjt^#l97~yt$?2SRefm@;{ZL&YNK9?v^Gl3K3Mk{1Kiw
zQp5l5K9e-}<iXTUlElAsHd;%&lK{pOlXTVKVCpxx^(6&Hj{ET&To&NY_Ef&}8V^ER
zmFe+YO7w5_6-aQ=Buq~qY)$=%pyN$59i!0aye3uNwHV#qKf~Gk4_U>>3&`nJd+d|$
z>!U+}I%%G93?j-8k^JRm^uI2KD(#J>%?7bd(4v#@Ppb*UH`HP26dO3yu$w5kucFPN
z<FJHta8O<~Y>8GTnWxQZ;<@|y?8|bdX691TQgZ=+&s#;xtM`!iS$FK!bMp9?pGuMy
zh8yTlNma&h>u0!a*2U;g@Ps_|2oj68m|#_Ndgpo%<5v6y^0Zivc6%MX7AJ8jjVg3M
z5)Suuim>}-4VDWi5Cg+xD7>}}(=I#Hx@GyuFJDMTeworwCzaUAbDMY)`(~36mvOL+
z7sg1&8$a-u(@o56CVOl;@wjP5gYFf>w1#+g^0gj5JHH>dyyQ|dTcXIG+F^9Oa@F?V
zJ3;tQI+-R1=|an<Mn-P;W%gH;II+9^9QE(jKuhUbRI@)yECnU#`$HQTO_@?Sx*`ZQ
z9vpyY>yt?IsWrq#bQNv%Yi6hNe{cjPZac~7&~PbDNZ<1nE{6Yr2^RxE&!!MsAOB*m
zSep`o3^fq*&R{z$j^hWDuek8jBAoTgnp0;l-~m@Tc16?#+BtVQt2oxjlD}8EZM`dT
zDpY1~230a~cl4O$l{{Kz<w#_wde|Q^RHD`fi>Oj$I=-}ihv#g~iOX+8mV0Vy@^Lz`
zIco-UsRL=h;zONuZSec*6r3|$gCQaPDD-e0Gh<6TYmj%0+R8}Nz~UgTxl@W-CO1P7
ziDp`7jxn3xx8cbBU5ur56>L&GP1{Y*6Q<0N98VA=KTB5PnNy1)syv0UZ0%u>Y-1VU
z^h3<#JQK2_YZAPww&J!!KE&os7{mvwK-HRys1$J@<a?IU&>S_oWquq~+9uNGKVO+A
zXFmgj#<=>P3Hhct4}aJPWAz*%+AAc%`>kVa=k;2Yt~ww?rtUH%&*D~)wS}+nkB~dM
zckTxN-*e#1{S35;IuF6VTvF!Xd}<i-6xUr21z(Gou&zRa=ss=1h39ght7ALPN>d{)
z(lM~Ka0AYz5_rs28&VhQvYT6r*srr^5aZdpRC$jLUdZ{6T{G2&Zj00=T9J~ZFx~>Y
zG|!>u=`QH6i$<qS2XWPhJpL7{_v|63_xLnsBRvyw2j}O%ho6it>B_$fgRZ%xIa-d^
zPkRIhJya+!eiZM@Y$v6Wu4Kn>Ez|zpnv^aur9n2D<Xp%)`p%`7U4BH4?pke(H9G|<
zlnkJ`fDko0GK$6Gwj_B+E<f{aKK9>;12?}tOlhn#ZIVr3kLp)2FPSNX3qzvA^d!2W
zcL{m*XCY~S<pnOTdCX7e{}_#p%}}|)mZ*;8umbVLAQ@2u4X5>K{6Gf3PNou{jSpj$
zMmYAg^us>w0T>%U!1q~l6|e2kN8Kw4^w$k%IBRD`#947N>=(?~W!;5Wp}X+7t_(Tt
zV~6R{94YLh6nHg?(I$UO(x@s;?%!EM<<0d-LvtRkXpn{2h0}@ITWh-bgEVn?wTI`L
z>ccf<g_0FV3h1$mLy&!43!T3!fNf1M4)yc{$*lywavMBLK8T~)4a|}$ld)zd=Neu;
z3`wU>qP(&!)A4U1Py0_CwA$)nc#|m2nr22e+h*e$=~48kY6H@DlB1n{XY;uB7&+B>
z93>|YosuV!Pv>>%lnCy|r7RAUqN~B3bJ6C{4x~3_*OOfvPLlqmBJ`lC1T{}EX6&w7
zkl$(ydHL}rrxq`Ou={Hv{qF!Px+9%RSEo?s{t_C`XQ+UxBo#h<h8=z8iAF||^pcMl
zI{%qO4gdJjinXRR>k~y^lVc=+suSfWGGy1JT^zYcmF~<k2P=Dq_wmVh6yZSim!4gO
zb^h|SqR50kVN4hmoxODaK^x+JPKFHm8PcJx@kpj9L1vo(E}c0S1{W&Rp66k7RA4S^
z8eEJHXR}aOVIc|T&I5z9`|xN03o_HAL86q9qBYh`!5jkR>N>>2s|AL(u&_*~A6?R2
z=+{k=OeLJdurn(`Ou~|WpuVtknkdbyGNZ2^{DcqHlwDW3l^U7vrg7>qs5T{<MlSXt
z+S7KS?X>{>*=t6BS<J*&T(k1TpGHKbX$buGh><UYJ-Dx0nP|ni;fAkM(ez_){a9Nh
zqyHioHtzieGqS?zh36W?qQ8MzkTL`H+&HI(-FX~`b);wWGO#%*PacGGL=ibtRyR+O
z9>8kQyQ2np=Oj-5<3X-1j3brq7BoISg{_EKOw?;yQFzKkqA{`<yE+U>vSApQEBg?m
z?jr2x$So(%c#=Ei+cEg8855CH1{z8$$zGX5^tzTHO>>^f*zQ`!w_LlH+GYf!*tA%*
zPr3nad*0YJ-Be&7n%83JmI%`H*oHI;>Y@3a{b;6e4em^9$KWSUI7@dLn9Q+c*HrHS
zStV;a;weX9o*4OU(1Wd;!^xO^1wCbQ1Cr+b!K0GB(CGUFuKu1w>=Wvt<=`MsLawYn
zQ9Kqj9IY_vhdRvGYT;Uu&)`qvbv$`(1)5sa%FK<oCW8W^e5w7xq#=9<Gt2cG<Dn^r
zM}$R5MUp-~dhbH6{#?f_wiTioPkQ0fHwETy!8le0mVrzBL-=)g5)BXOKwD#OW7V;n
zF$-D+-C2rsdZ0Od|7k9>%Swr?UbqHUexF3Xm@Fqkg97xt_a6Mf&DfIX*3srzhFH4z
z@I;^6(FgMS#53h9-U{RtH`&QFbmu)*Fj0%tmRGO?8->tSjw6WsmP3Nge0s9Gf@{S&
zMjHp$lIl4vjC8vMyz}A_H?3Rv>#;H^_#9IAS=WY28t`anaXPLI5U0P2-Z7gdFeG<x
zJw7OqB)VJpAXb|~_6Qs$`ZGVG%#8-dEo>_J{Ne|6YJ5Vk+F4{^R1EJWKERW6CV<{O
z8>llMK^Kl-JF>@|c@}JmkAFLoXNzUXnrnyPh?O2OJ3E;~v|Xk}{Ojzv6xWE&EW)<=
zMIdp|7>DK6Xoy${tP(UN7kT>7rj-Z0%MVcJfD;@@6r@K(8Zi5NDDF_W#rB^TBnEtC
z)?%qRq#lqX+sCbmwy+{P{N!eRKDRXvFJhcHida^FGY*#*pu>+SvV8mln4A;g+JwsN
ze>$m=^b%!!m#RhF&PkI4%Cl(S(g&=qlPP^6ph8oVW)PjOQatrHk%nv8@J{n)vu{n7
zkQ`<|jahyQuKp~<%oiQ_vd4m+N*5)At`o3BD+g3(2BFA=L~40(E$U|`!P@WuydJDV
zcN|P%s!PStAgu&t^j?9%^g)pMc^fR3j6nbELUupbcD7`-D?J~600&}3`3FkEp&+P}
zy)r?OYHZ$1$HQybFQa_?mcN|tdi0$2(cDGSZ_Feg{il)AfC89)qkwgg>%|woZ(zy6
zX~ZSfiC)Q@KssM7h4C}-%of*GNDE5X7sUpslb6Ow|2@Mjh+yEK&VBINx{S8JbRpZ0
zcO$uLidlQr>7|BnGzb)--5-L;ulKj$I+qY(_kRO-{i#&Fr4*RSea!T>mE_FxFr0W<
zk|J*_tHujq9vsMnv^P4Wxp6UlJn<ux7cZy5Lm#kgXFUe{Yhy<85&Qu^V9w>;oGYjc
ztCmM%-Hb_a(o=@l$x-t^jLf3~TPR%KlR{JEzrseNbF9;CQ5dZ|LWVE-!{eiV$O_IN
zCpNec<IzF%`IpPYMv9S>d-74}uK`5ZTwr%j{{YppE%sz@D%rlo2wvtq!*H);#GE)f
zGV&E2c3R?-gM#$rQ)A+CPm9Exd;ovJLfp6f5g6O3bFEs=wExgJ4z`|QWV?}-Y~2R-
zT3kQQba`6%NuF-g*C#n!FEYiOZK;m<3hH300G2{5%K!J9{oXCas<Ik%_OZQq{d*a<
zXnkP)?uiktUAfG|I6HRMU?6U&pAN>)4>M|ajY)rv44w_Wg_jFD8I#kgXek(uSer)6
zUUMz8srgX%y#X$)vVy`&H@b#vD!0C~4B{TC!jw>ddeL<=-MKait&W7kuInmf_rEE~
zzcQE6xB86YHc5P=Gt$Jh4XA+rYfS1oM1$ftf>iVk{5m0-qx;B_3uOuHPnU8mt(-_E
z-Dza)7d=9_{fId;IT3T=GjqgdG2`#qi;G+nk#BH{4jH&YAh*SMX0(PbU9^Id0Bg$2
zvZdqR7eOGT9Uho00kgVzEGp3>_nx$&<VO)kzxEgWIitxoZ~g;^?wkR>(<HKGumqpf
z>JfzrrJ%ShnwGl><B?HCuFxTcC$UwNp4s&dP7Qldm1!d6{9q?O9-aWM&vNjCp%j@I
zV@ZOhUq*{lM`^Ii5!!NO3jGxQ2kJ!f*(Ig3$-`M^X?M^>Hl|XNKHJlTdO?3_aPkva
z>hu$(R+bWWOEBf2$uPcSCePwX83$nG64D{VDEIa<E%Rx>h{|WUCx9W3rn#~|rHr9%
z`vg`QHBeHVq02TM1;y!VSR7i*jI`Qt@6Qiz<GYfs`gZ_diAIvM@!>>mk}PF8ky*Jy
z3nh8lG^;QPo@%Ydt$IDEH<6DaOJ3kL2Ps_Q<czH)v&f~mKsdDY0$z-eCzB#XXvkVo
zT5hw7l{S|l2fm*p1p~s^QKQIhU9us^&zW9oF5+!4mH?$`{;+Gq1X`aYO=fG9k%?_a
z^o8|Y2o5kHQz8{`lG_E=KR+2eQ+$bo;70a{&>5nv-2=^$|54V|p6DG^AQu<Tg_GC0
zq-ePXm9*Z^-c4?TK%cKL0q=nGn-y^Pmpzuef5G80<mmFdZY0qzgDiY(0r%PzSibr~
zxVrZrM}b)eN)NK|*)nk^abF@&a+M!hwMvD%M?S*>KNb$F407NPB{H9}ge7lgQJKUz
zkP`Zb$FFO_quyS|zbOIp`&8)frPJy6xUI~!1(^`mIE+hr6zMb0#r&~Kk=*UFU|Jqu
z!v(i&Alxe#r#WhqzWaud6j01R#+>JUwi9Iwe|Qn$DMJ|c_y-CFe1MZeszhNcMH9{?
zb8#pIjJh<KK_yut$2C*qW_8+H&yk})>5^J%OuMa=@Mdl&n=Eb*A9$x(#T{C(@L~*G
z?kG*F6ogod9vKo>Crvu<1)++UBE8-EgBkC4Bx>$<U{_>>%2t!eP9b@kz*&gz{SkpT
z6C+utl{w6niZ`ehkVJN88*^l|X8!P(dd6d-HZ7Zcn$?p&fNRa4@zo^1vq$$ez&wSE
z5Xs$5F2~h@X~0virP7_4Z+{AAiwvl%oiJ5RUjUJ!qgeAk20pgrV`+*t3by7k_C%g$
z2v{>BqKBBcqU~hnih5Lf{|mC;p2E{~8xd<xLCG265H>rT?g(<FT8tyHy3h=__jC8>
z>JVzWDT-}ye}zj!t?91zQ?RPUk3_tbXV%Bwh1*vzu(e<OXqCiLD*4EQ_N~@q7W?L*
z+oI{Ha{4I#c1)nJ>)T*{@GE$}yMTGl8Z#kF?($3?oaEeff^7Y0Ht)vucyyO(#hi^t
zFmHDRQxK~|&2Lx`$&eH}rJKdHtT5U$x}QCGekOT-u>s#&=d*M~9rR{IK*sQ6_#~bN
z=eiSc%e@dX&)kT%6q*5j_7UZ6&F#NVt$<{6O=`6%nYfq-5c`&KTw>vclRqbsdoPQb
zoA-Cn(DT#L*C!Ds3R}ahWfl0uT7=%{=zy(aUc^#e7U!he;DH6d@KBo*eHt@~XjQbK
zQ{QLKu`mL=&Kok02lf-G$;0edb$23Duo{lNOQqB5d`NDaIladz`YY@@QP%Jl6DD|=
zC`50fZzj}%ZLBmCBke^WjBJ2&Y(0+8_9k*WCFr@%3Z_NblKL)YC^cwfGS(T<y^=g!
zb3~4~`ai+$<7=pmga>3U?q?N??nCpWgFuHKFcFWYG0F;a;1oxUt=FAOvyRr+KfHE?
zn%<d8=fsqN!9yOYSQbi#dSzL?VsVl}Mxf11n1oC9L5XlC=vf58XLASEPJIggbhM`q
zQIC*p;W0l;F7jH7S5ONH9qN;r4m*RsLa{&so)~O}>Fgye^))1slcOMgni8=X?`0p(
z7)Dbgpts$nL0`ERyn}T3E;7?F{O~rswo;EcegBSX=tqiTF5&SJW4hj!I}?5s!++YZ
z;O&KM7?rStkkM|~d{&Gm1pi^3)RqE>?&EbnxCQ3_{bl#YHb7WjHk+n;98ERjiI9*2
z@k%#_GJ8*0V0eU1wB%AiL-ACz_&NACy~5eHg4A0{oz8G^!&K#BC>5x|6@Aj=W0W!T
zBE23i{7xh}F4i<erXHPgTX5p~46J_;%XB^Y4m(Oj$lk9~WNiHp)C;=^ySY}ZKmSBI
zmyRS3O1y+YsY@{Nzn74;=QLHy5+$p`7f_wMZBVEYOdcM}hQSpJ;Qo_KU=s1PKJRxN
zrmoUr3#?MmeaBUd@xI6=y|$q>`L`I`o%1;ItqW=5NIcWF*s&s4MCm@B4i!I9!}Q8L
z!l%pKsI%HP=F!72R@_yLxV@SM67!A7>P=DP-|#*nd*(UL2{8w@UV=oeDTT-56=+f`
z1?%sn!0{-=)wMkETo(Z4e}1Bmn;S6`d<dP3CFvmLL%nGO6l$*^MgeEpejO+J<?&%0
z+*<}#hhE_QyhYS3V>|O>zYHldO=7om-wT&-|6wvp5S@1pxTH}H_wAC$$%E@aNj-o$
zk!V1q8_%NE6?LR(ck#q)9(izkHa>P(N`zx~(Y22+GiMyN=!q5Q@X%l@l37OByV!)h
zU6ux)(?8>x3Bf?bI7jW10cKk;rzd;Qq4tB@ASq&u?JV_%+n%49tFo*3Qu#_Wftxqq
ztNNl3lZI9F1-Qu-<JzfpaE^21x2JiqgAKB1@n-<OO;M(98f~28;0gMFGU5dn&n5l^
z&U8Vv6zD(Y{>GVY{7uTyWc9%)Dm!HaUcHQk_IZYE%F{6j-la*d*^gpW&R#lly9Lc-
z7t#EHZ_Hj-#PY|nbP{urc-DB4wvifAZ*mLgow283LmwHV&64z|o&&g_(8D+9##nL9
zW>z!WoCKbAr&@-JAbPbEc%KU)<ySFw>^cJ#PBY1WJ2`FZ+Z@LCpDsOiteE#vDGmoh
z=Fmml>>kbOD+Bc((3-cOk)5o`%~Mz4Vf<uj+?-7^za)}b^Fyh`nL+mV@CTT^$%fm4
zSAaP8y=__fk@>gn7#8zKAUV7Zrt;6A)xBpJ*^^iAimBk~tO+IuY~jq`QnXH<hB<}{
zz~bXf=-VMocH}kVkgOQ->j|e$6?HiNei#xhb<r<hiA?_I#+n#PquUz=qEc2363xyu
zJV}YfIef(Opwl$+S2rF^nu1<hJo0u_koRpc9_ISy;P=rQNcs00JpO%yN#Er`Tz?|%
zyXOt37pajg`#jmChohV}t45EFHL@l97QnET4=hYnB%550GZwvl{DT)|aDvBSnB-*x
zdJaD@>zX=ITQdrc{l~bL$ba}E{{}|LW}{?eJRDvfNYBekll7{T@Yf+8TydI6o=BA9
zYPUBKWX!N#VurLybCmr!oqJE8pJXm~{D6i#$|Nv*JGZI*1RgJMbChXC^5D~TwEpn{
z;8O$t&fimD>EMkgxp__c*%LVGa1j;X7-3_pxc#lz1Xw<oYmJ#>2K|SoQIo!_oNMd?
z<h(QBG+H;bO&sL?s-8y;rOV;pjAq7fYzYzDYD?s#XV7^EO7WI~2AQ6m3H8pk@a|eT
zTs86Kzj>$$>JI?4<Nk0&NHv@@G@H8T9E8_#y|}Mm7FtK@;A8hyR&eH8@ay7ecVqte
z)no=`^!{)&Y7*pL*awY@P9&}F8^mw3BR+e@AeVbj7i?ZbBCizU*|n0qsJSW3cTYbm
z;M-^)U3eO6kE@d=l`O0^SWik{J7PoCM0)=<LD#3{=rx=GYddO*Zb3Pyt(Kskp-spO
zDZ%>F)}(b}GfJ*bpz$x|ad?Uy2yIg%Z;jSd(*6;?cBG@n`$q8CvzYM0zN6^PBG@|1
z7Qb&1CDNQr1au;qSE{Y3ucQnDHZrhFISFg;wnNZ<IhvWc5cMv(VOu~l`*2<-&N0cN
zhlj(_T-ub*+V&J*Y}KX2mPgg4PeSU8Pk3hHJ@C@VWMlRxl6gMc$gbp-jOTX&(oh}%
za!Jb6$5N97{i?(N!_ay7W7$S=JlUCLB{Cu_WQ*rMS3)AGD59hkr8God?T}=PqEbTA
zQc6Yf+~;U0O)c%AL0Y0zwDjJ8fsfB~KhJ$#=lp)(ueP%a@aKG)X0RnozN*EhUsIvE
zHws~+{zF_M|6H_Waw>l*+>TO~m%+|7WmeF25%d&`QBFY?=b0PT#|^H6aWfBy>{3pN
z)N>OsrOOaBrrr^6T!nZiCmVXygUO&yAa4#HO`@bwh}|cToA(g!Z<xze!xC^vBam8@
zD&7mf4;sS$;BMh(STitJY+)A5Wb_F5E6bR|-Bs9>rOnO0yO2()59g%<htiwCP$4xp
zfdcmAL7C5Vyy8BFHBHlC@2VA9riUcUaVo_~^YQ%U^vN`7{~n<kJQdCU<iezFY1CD(
z#XdKkMhh2Z=6B=(WSW>zS70Vhtl7vqmX4!oTT+>I`(3fbg%4beag1Ol$pS-xFn6gu
zl-{p)CU0kHN@B%O`M{Xg>{f=V@>7Dr#Q~>0GiH*lacog~3~(hCki0a7uTrpuDU*(a
z>w$B4uy+!UAMDNo;@8pm*9Smz>UGp9I?124xPUvxmq6KkZ+Ngal$YIdK&0dHyiWg7
zHdYk5((fIsX>X+l+u0w)TsEy^I<w}px8A~DT5AuO3MSeG%d=QoT)gPpL0h^V`xt_5
zE3#>yoAKt*A2=m1RqUX#A4X)_(`i3x8qzPz2A0gHh9^h4=#Wd|uf@W>u2hMdK32fy
zAWM<mevU;Z+w)0_WidlIBaV8X!PS>E@TSAP(Y$CT)U^Yu<SCNj=7(5b@)t@MG{fGZ
z`?yOVO9TqgA>`OMd{p=p_ZJMn4FXN1JFEhaYq+!8<D*IH%nbhSzo$YH$C-8XtU{5;
zSLhRH_qJa*aXsfH*j>T?;dW{lv^eNvYx70XEX^U9I_)B#+oZ{*bl8)5nmSX-HlT?g
zU+`}(v_Q<Rm!emH7eO*Fg^O0G!NtaQ&dc#B_(%y>2B#p1F-d_@tInZst{>|=D8ZCh
z%%sK|!Gf&6mAf8ei<aIUkau7|SCw!P?M9f=^95_+KUYoWZ=y?86~^Edc7nUUVvIlu
zGlc>dOFC5k9-Eg=#n#L<aPnxdXkG4YEZ?HTPK}UY5dty7we=*kYmjBzB6|2pcVH!|
zwWyl>3PL0QV(L(7W)q}BM@qw3T*GLlYkUk%KgV;`e+E%P&JVn_N&&|<J;&?sb?Bi^
z3;(IAhnIS##rECI!0sFV{ERpw_|t2|+<d>`vd};E!`GdM{uM4n7w^L5-cE2xOTdgN
zfB5)M&D=fdD_lZS0+YP56>1mjvb_R@CQ{fZ#j5S$E-dro6-^9TUU??uAG(dh%T7Re
z>}&j2?n>)!CX(h_SC|&@4E>cKp}c_xEgg~uJB3|vYquxL2_&f{%3*Br9dq`@?j0O?
zL0}`i3*ReMK!WCOO3|}qGXmsjwwPmU3s2%w{~5fZh5~z{7eou6?T0lrBiLOVRXV1$
z1dm%@#N{EW7%0*s^ZnQOq?n@^m@o;Go=jujH=lDeE_q<0yf&%oN->q+(oFPBNY*O)
z3Eww!Y|S`@1^gVc3bvur2ZPWlQ=6B&YYe+B<zVq@S+W_ciCNMj);PaXboX{Y)(&}u
z$2@ak!d6QfH1oE2ugq0=b4-)%>dZr#9W~hb!x<(mnm~*0?I*L%)tEk2iCgI>5Nk~Q
z@Lts;oNjE$GD}ME^vz8a=6DOlI!%!1s!bWb=W(si05;N1nWFAp;cge%(m^K0uGano
z<Id;sxib#WI}O9tUb+~r+>Cav-ZW+M0Q$UZ6Z}+8MCr;Dy6)5q1#c%ow2KB)8qtsU
zEC%7vUB|(z@EwLryW{L*V@R&!1HbA@Dy&$d&NRf@V$+;Ly!xjU)+Wp(6N$^BcaC{5
z<swI0JBG6}+mi)y<3>0+(2HKq?5uNhYQS5*)!c{E&%|-o%J|J!npOQ$6koh|jWd}2
z9{2heGT(8&{O5H(q+2dzGM1e}=xV_~&ZoJ;z9G2kZ3~R2^L)fv6`U4!0#92ECbjrI
z^lR@jP?a0R#*Hb)wyB5VQs!-b%E|$FjQ?97In4np_J-3P7aMrCbpy;$-$`>nY{wh<
zKlr}Sk|eKF23>8}!SUN%Qm9m5_4f9ZdG;gfCTLNqkpw>SK8+@4^TF~E4-Mh7_z6CG
zWcJMz9$e35flmW~o3Q~}xJo$sL5@`))nd~Bnc`f%X>9V2SeC5u6dR7&P(j=wSKT?A
zDLuCUMN1#!k})Y{C)j8`3p0ci=Qlnzr~~JhCb2W~;_%x9L!pW7hbL6lk}%f=mRIlk
z=94x&tsh7O(@){5bUQqhz7ER<nS*lQSo%I&g%(MD$5Wz2?uq#=m}+XzTpD*HthQkT
zEGr@SQ!00J^9i_h)|AF?>;)~)<y5>*M9XRtSmxR+<{wuNSxe?{>;I`^c&I?x`@V{v
zc`c@nv7?yuYYUcRRYKDJKfz1U6^y6XbGA+YG2YsfZ8uHB78xPYw>*%gObZg-bD2U3
zK8n<KYBTmdoD7oAdepo72pY?8gWYFDIAQiE>XFc+L31{N?EN5c;I89FNQ96x^XToY
z&CsImjhsNNI<}yRv!C#qJ9Tje`|-FD4FfxH)xL)?Jy7W3&oIJOZAO?mZyDr8CJ5=|
zO#b~Lb&@yVi3@yN`FYW^U7pP<;eM^Sinp>y;CE$rcK=~DO!zSo3fkL2UMCd$kL7||
zhYl<};XsT1!r<8s6K;O77Rl!Ia+<dsaCpT9e5HRMGvf^?GiemFiLhWps<lY!-xuil
zH;z>NUUIx{Hf%W)2VEP!z<~9qxgK$fU}0U2Y~5Now<DR2SYHT!0(sF^Xb=tDn~Uvg
z0jPL7!gZ_HbmkN(%T^rS$TsX73O&Q}NOQ?GaLlwNy&6?)a7A3AJdS<*{S0%nzryor
zHf-|fQk1hX5ziXrK!;M2uwcLoaB(*07p{7Qx%(B^v|bAqa5ox9Ez0J+zg44T_7l7_
zDN8JGeu~%eihx@?XE5F9tw=o=#oASpq;@$Q)P|3Q2Mzvs-zrxm^?oO4jGIQrPmP%7
zjpfYskvu30^KEl~cU)D~DZVqR3w6#c$F-N*Kq1!}UdV+r%Uz0Wh}m9I)J<84N;YH#
z9|zNd@_iVzbuWs4ji8p<*;uP#Ln<*cv~Hy-I}#$nmReSF5gOU>Dg3Xf{F^6^Tr2$U
z#p|QnRt+*p`py5lTY#%S{U^Q<(hS`P4sbd7&$<1J(_m4!3I<l_Q-aAViv7StRDl|Q
zUE(e7$+l$|c8|pUw{+Ov&*|*$ds!~2vWqwOS%rQ-MOgRHf!*9w0Nm9gP|Xw3fInK~
zo$vxpKbCW=Dm&2WuDj5Jp2rMwPxC*=snP-QV|aQ(hFq03S&zyT)Xjd1PnCzWZKr=?
z@!fOitzpM}#j?!SH;pm{(&hS<^F{h!UcvTP1KEzQO}xU%mH4Oo4j7~fHtf6U5T_?e
z+e*|a;^%nbuD1ulK98aYjo}*Y(&%I82ul2KH+p0iV6&PNE~%`<mh77_A>=lu8E#^Q
z<}J`T&VeE))j(1&z^{PmAooNB(^wZ~MoZJLkvS}D=pdRVP%ZCo*JH;DJz(q0GuYL)
zmkrS8xC6_2ajO4u&`aIQM?}Ps-Iho2@mLW1H*F7IFXLdL^JTDEJe&2*DuXSb?C7}l
z8lE$H$xlt0&o6Vj37%^@;d4MfcZ=J`v~nBy`oLB^@}~km48Dh6u~HE8>LSR`zYRq@
zx3J}-ZsAd{ap?WUmV2Vsj%R~rWAFGOxG;S(dRPiZ4XYbm;@D{Lyr;lI$FHNb+(g*u
z<H6fC#Di#V5Ihc_i<8UDN#|@k*bU1<)jMC&>-Ie;Up0vR*fa=6xSFtI&?g$z-UsD}
z^hoKHkQ4io&Cip{g4M&sxc%HL7PtH!9Gg3U$xe8WYZrBJKUWvStT(GDZ+#Ih^Hbs!
z6&~~JFM6~2j`7^*^cb9KVkR`C|A2GuA>8y)h9na0xwM(9P-PEeQlmEF>)W>Q?Ylhv
z{<IU1e|gM}8$N*~Bz}s^XXem=4M!kS;{j}_>3|IXleoJvmQBAs5+#h!!)V2`T+IGg
zcm}59DIpKzmv|2nzinV!Hh2)XXE-w+a2!lF5(~E$$o^6aQ1aT2N#i9@xA$S~ism%T
zO3Z>?cr4mB2ZDPB)9$vjn2{+53y0R?4Uc{FXo@ZSP%{d~{wTsZ=A)SWc)_-n_lD1R
z-N<?eIkH1z(s1(QA>4;RQ#iNmjf<XwC0UPI0`qt&(%bS1*F{$21mhX>-{l}YsJH>W
zyb~z%*m976JBpr9sDm5NjZspo7lOul&|}^O)E@beCTPG9A459$-&+h;)@P@LXYs!G
zV~{=lf}d!y73XQDQ{QHHdMEC{iuW(@TDd^qlSl&hjZ<lDpE13Vl?CZFv&Cw&kGlGu
z?GPDFm<L|#Oh|jG15-1aMf<#Of*RZeg?M8ozRSUc>Zh)v4gEOqz6zW3P#w~hG<mI}
z-L5^f9}K27)kox&;NlH()XFdA+c#$7oNc3dou#FGP~<wA_xLiOeMO6Pw{EAAGoFi-
zPP_)0n6YF&{SBH`O=2UZT$yZ<4D@vm1^@a2R8YIg67KJ$%i~NyMN`Z5!|EUWoEM?&
z93Cete@*su*aptU@CJ_SA76jkLWd=;R3nA8hsFIG10nm!7znvjKwEmQLUB|QKi6LP
zJWhTQ8W-n4yJ96ho@BuF-V*LHyOu+K`U*I7ZVW6sV#^@rAzJA^fblkU*z>1|yX&gY
z9#~akBY9x_${OT9InnA}S5cE!gQW*$SjLTNE~CsI<IW^t)cvLS{*hq*+MUh*E}DoR
zhuY%EFKsY#WCiqEo&d+&p&;IO6R&@s1OB^*@ay9iVfzHe46RK$8^MNg-~I~*L=UB3
zy|OGbIE*&=sW55JBe?YK1$f#k&yQ~X%;U>kE@Jy<cyfLpdW<$kSn?9?X_&DuU#Ic&
zElT+lsl~XX`x*b<@E|z;31Ihi$FVzKYH{DDVS?G7L(X3b&Q7)vQu(IbN{`XPdj)7<
zV;PogsY9(b`QZC|6ZL6}xc9r0v2UIN4cRb)w5Ry7edc~7wLB5`Mkk{Cmz7-o*d%<b
z5y;;DZo>MO0Pufc$Z4oJiB&y3=(d6``80jtJ9-CDq04oYZYk%u#ecYTw@`E$I}r6Q
zx5I+Eix{k5fNs9yNGIk6w`h(M&Kv&8r9onwK<SrcQQp?<X^Rq^(=X?%wrt`qo)-%i
z_C`E;$B{p#zmdKSEdldO3#i2*pUd==bv>G|#v(_Jq6|kFN`0+JXWm<}WpC|pn`tE<
zFC`0|GV*Lpsu8=}VnC|)b0~7U4Q1?AVr_lnDehtxNS%HRN$(_J!7gPXl5>jl4sV9r
z@m9<-$DZ9E*uf>!Aof+ZTC8Cd#qLi2i~O@L(Z@Vzt}QKvT`Q5}W_EAFZw_Nv>*{A<
zQ?v>cUjBs2B1_yMm`ArZ2t=iZM7DKx85{BKJlyCsr$Hr~*oIblmOQV5OL@72HLp&`
z(iJyhpRzq0XPwC;s&zS)oibGFKp+ufPcEPASX1l*mc25M9-Y@_qr+Zv@s@o!O)$B>
zJN5&-guQdL^*fy3xRKY(I*d1zV(7sJOUyVePgy$q;ZRAtXkT^_rgoNM3_l2>cBkTG
z2My-9FN6YpT=As-G=5pXCY|d4gkx=nQO1QJdghrx=Z{&_aOVTywlo%V8XT!xEJ00c
z6)?eGml}n<^pXJ+px?Uz=Up2B4|`SFhVhwvtU($o`Q5_}?*zNy)v?UlXbFU0AI+9~
z?t{5Eg`9`A6Ft1Nf$6oFV!rPRoF!<ucdS#OSG!{HYWr}!bK*(efw)~bLw`J!otq9*
zDz{<%oHkUS_z3ofjbx3<!fr-qAKzK-C=z8~!S||J%&k%%hs;$Y#};>bB>jOOJy)C6
z_iB*zG$#sJs=#k(T_WU;_Cme-Ni4p83p*D+hXqI1v$*?tV6{||N?j+hW$6LzjbS2{
zeounL&VGJq>j7?2<qCW<VjX&TJ2RV!7EC=Pg>~#!VAHpKLgnhQylk(Cia9SFd36*u
z4=CYp%6gMjlqL4N-4)+*9M0Xlp2^~>?}BC181}k$Eepg{mhi%brT460_Z468s=33c
zyP^q{thCubtwpr2If(5vc#Ca1p)7ew4|H^;vFbiYoYyf^$l$252fmw8uk$l+*LR7%
z?^7fh*IQuJ?<cmu>&A{WtFf73KT$4*gjT5peSDJ1JOk}0?|BaDteEP0B|i}o&GN8l
z^bK^a`it|9D$(?hmTdbnffCtXk1JzGvxM1+?ALS!_B87dv@My8wkN*AIaPCZt8Eh)
zwbb)<I&0}g`feB--HumlKjGmLMU30xiEDx^aoWg(pz7elB84VtanWWtZFCVA|Gs=S
zv!WOcZ~WmFI&tLhros+BwdZ`cWRueLP=R!L8rR?XgWf&gc(^3ZYK8k<nB4$)zUwa@
z)ym?+suok$M&W(ewFmat3}81pw&U2trS$RJQ1*7A1=z|*!H7vM$lbb&E+J3Aq~-+w
z?L#$=eDMwAe)-b2-)d~+#09Wp&Rg`3Ne7i*!FVNq9Q-yO%>Gng$9<|iq$y?LopOCv
zVb%`om9%+rycYADA<(k4jHqF!G~Fz%fOUg5VN00~iNBlBll*8l`fnAK=#=2*u%q~P
z=MZ+i)d~ICNV>3Jk7n#D;+<COu|I>pbN@aEvW`OvR5wbVSVb(VIxrd&t;5o%T2aoO
zk8rg_ho$Yf!bf`xW`eWw>`-Kts3T!NAN5d$dA>RYK28PV`gJ^JN1Xt_f+(>=A)~W>
zFZf4^L)hcW=kR#uDVV+IIG1(R4{vGeQi^*a&VFe@A52d{Rkkry?X<;Ve~m#_QwvVq
zKF;6F7)ARtf~hbslr8a3=dQ_na%FJ_u+Srs1zxVe+u4eAN!X#c3HwOHvDR$wNNJk&
zU?aep+i+u{U=a^4<vfemV@JtMC|*<u-QVVMYxpkQJ;0DH&2z!-^FR~!33sz_KQ^u+
zocG<B0{`VWGvCG%u28j?|L|iM-oF;W6oWFb+TN0S1_hzK#|UUJ@nlDz=hqL;s6fBr
zGjQ-m0~Wo05iSpxWPkbuqJQ-`-XUQlQ|OGs=HMcxecFUxH@$-Lce`N1-J5*-W=XP~
z`3{^!fpmSC1lx5imS)NMu})!!ls4Xu83yOU#wp=+K=TRTUn>TG!B#f+XAV9;kC^2W
zkG8Xo$nk9^Eb6kP>fnQD<JN~78~enkcfUoNut~6y$-s!wtEuatBx!sZOn0)~n3;10
zHzA6nx~oT_XGjLRRy*PLLGGk4`<DOlt4lOVYBs*N7($CmCP0i}8k#sdi!(f{gNl+C
zZ2d%87{9|^wEo9Qe$g($qFvA+(E4I%YTf|$cW8XQ-jgbPcIiCdKVTFq-PZ&II?QNe
z`ZZXrw-MI%q++>1Mp3d!fuC1x*=tQr%#_-KO`bA%A)yqmNGP$X>n?+yp0F$HRz!Gf
zLZyyEU+ADB4DC1vEB?em=viqx(0URcE)n6Ful=BYq!rWESJQ&@f9Ti!fSWB=$AGv1
zHfq~x==<;&$}JSw1rKj(A8o_3+>uRscZpAQuEhl}K0@R|M>ziTAv8RCg_A4O$l7R;
zIPBP9+UvC!D~8>`{BgN7A$$zXnY4+8U$@6=d0)_rD~0@wV5at=hQHw+0-dYw<I$KL
z-foy9bGA>xo&Oz&<5FKiMzRcVeUha2vApZD!F>WPd=gHbsY=quN!Yn99boDw^tEe*
z37XlQPo?m?u&5h{I!z_`l8iGw&co815;V4W8aoxWfOP*z!zul(urS=4FP?WE`Zi=?
z_xxh;{G0>d77l>P@xc6sS7A(xCaIV$#jR!nAtUA*9!-`L@@Y;?cdr?3S)Y#28(xCL
z%uviT$*G^QSD(E_2lN~)yaPA1a+lIq^LgJF(WAeo>f-{uY5OXHfU!;jjx>g_b3%qQ
zI?su<JU+qOt&RfSwfdk?vz7XHUxDsx9XN8s2PltIV-l8e{5fkA7Jtc@WL)mR8i&ik
zEh>N`>d$$Xnd3;J{w7LD55(L8DJ%?5#D$6+&5ByhJx<PKhpg{HZ<`FcyNYOjk~SOY
zbB#N7JrnD0hM@HQ3Y2`=0{fCyiv2T`Y0pMo)~<PoOBKi#W!IyG-E=7N1G-_!SOZ+F
zRSJ89q;NudAvJ71i7VxUD7Sbep1F|>sh?JW+BSFg?~6IR`rZf@Np!&8*`qLV?qT?|
zQ<mb#D^h{tSN>3$0jt)|rp3x@u%`cr_?_BD@OT+SgP9U7xBrYuKHuQ;xaGJ)AVx(T
z975j5mcW+SDb(U4$LyL^DRibARSnMIXFXbukNS4u$ecSk^xkn)-6zY2D(2zjrOkXs
z{6Mr241+)F<(TT86S!NcQxsd&jjwZ**~5>)+<`-`G`Z#>{C!!C8<m6D*9a9FA@z&P
zr&<tOZJ={TJXk4w!FM`(6tYQ@K1l9j_frGe%{e~oU}-tpPt;)+zqjLm(gZ!evp`ON
zGgOUDVR^HsGy5`S+W#|)|M&Pcyt}SJ9+ypF?pSH2Dd*2AC~jms-#Ww7`*v(i@e|y9
z;V9m}c?%VVUdyMF7|@Pd4?j8z@zQe{ys@PSv(8$=>!DBJ_C*ki=JQyMNC6z@%HZPE
zCtMkv#P46^*{LQg<m&CXGkcez=h#%(v}^@SxcUTd{+kW=stwrT5GOYIpAR#&Pi6Lg
zjr=)Il2sU;$4ToCvK=1^!FRhll?wSk{S!fy`Fkf@d0;PGSmngesrra}RytFC${bcx
zpU55i(<eS5DnYBlGawiC3mbJ;!}@6>Sdi%`aopU4qWsLKXt+y{c=h*KQ}0Au6GKQM
z?jFuoJ_pZK?FDk>54g8z6h1dop-EDMv3&JPX0$mJeX$Xr{L;o<xw)8=Pz%9t+OhGP
zE4A*w4M*>s;SQe`$PJQ5p{?}=rrtRQIuB&<W04AZ&yr@hI5CfgbC`l$C_X!%%-*P~
z(b3n1kecYo+Um+U+1nDlx!*;2(6*5I)C%;#-_o>PbtFp|WKJ`iBOzQ$i5=Hg#m8SK
zvmY(-;3)HlOW0tD8{Fc>mCD+<e|k5Rp4A|y^tHIGW=MVRwgUXrAPA4r=c3fZOs=bX
z19E>3i>CIMxY&2=Q_rP&aQwnr9HgEHHP3G2SL-_%|0A8v&=fLQ$1`9^vl5Ne8b=;p
zD)>85f-~+?<3HCIV^aKBAwx2rjq=o_?N8hIYn^rQ?ne#04h`jw`VVLBy<W6_+$-o)
zH-|L?6sWRlA^3Q1z*w6i{NobLnjhrwt)ZjYnnz>UqoETic<ci_&OYL9uQRB=#uPLb
z4q#PR_d?S6JKT{q5%6%J1=Eyt<E2OIu<)O2S>=p5q<L&QeR=0V&37+|;$Av|e2Who
zVK9jGdi3xUWo%%6w+lDMQkTA7(xeh6#DZ2gAwwqg=oXpLx^4Thro0W4Cnd1I-g>yK
z(-9Jn3DiE-jgYtLpO6RjhYGz;c=?|el@xD;mQ3W0D$e4^=S?EXa#OY{ei(JGEaUwx
zHsQ4mLSOM-1lxaO5Whcb7q7QW1hIqL_`hoe)7ZAP{PmRvqUZDLaL;r(rlLI=Z6vEP
zq$mxYx21C5!#qIh;#JYg$km+Y8UrwE8o@S9JA+3jEui^_+F_MmJv1Jx;CICLg03)U
z_xP^N{-q2gV-HD`+TX;T>vv=A(~okNGK-jI>ot+I!eQ9H?gB(VOQ9``6iF_=3FEJZ
zu`q$C>+x<Lzu4K4Ex4<V)kYj^NI68|cZZY0+eDZv>@;dG2eJlDbqtw$1OoblpxjxY
z&kyjx?<XV4>&gh2Ff@Yo^v=W7ojgqOv0$}(R>8~PJb&idV79zMUu-kx7S=_N!F6g|
zSge;FEX^;3zcC0ebJW;YkpUbjQ6fQH%wA>8fZ=kw0(Dn`CG5Q;jyzP0A;z=0mAP&-
zY{?C7^L!gz*R9C5xjcl~$@(C<`6#pb@Edjsw7Fyci<w7=k)WJThS}%sq2$*K_<qll
zbfz5S8ou2}jU~#U_)UW4=G4JU<!{{dQlW^oXb?RX?BumG1-qGCA5=Ae;D-MUrpzb%
zdEU01r60AU`D26G;8kjrepZe)R1akrybB>YdNTX2rA(i{PN!qHinxbLZjA4F22W#x
z2)o{MP4kjr(dT^EidJ2A=;BbeH=&U$`#p)hdRxuk{d@<@rcVH!>2ty2>sQDuS%}t?
z@<df*8^mGj%Rqgm(DRB}!|YEz!^LkOL*OVSI@!DjE--{oZzM?ZzzF8Jd?X(>Y$EQd
zGh(G(pSg&mAxuG-1rHtd1`f7zY)Q)&?t9fUFpm5I#lrJGtkwiAS39w{XBVSe`Un2t
zn8y%1?kY|%Ps5u}pP})pR-9TC0$R(Gn8SA~7Fn|e&W?|v&FTVKFF%^RS`9F?eF>C}
z6sXs>*3h-ynT}{l;qo+XD6%;RYj%yGfxg3qb6E%H^j49^4xf#Ne<bmUTNb>D6-+_a
zTPP;)2|jZ_1)}@YC@CftkL!-2sKn{C=fw?JIdmt@ex8Kaf`+q(nI@<@Ae6UiDF$0r
zcj8QynM_bR{un<KHw$@jvp=@b_s$Gzj(xy;GbW36zs`j7Jp-v_Sp#Mp_(IBv^IWF6
zCR8#RI$#*bM#ap9oX~JGxUvTIP8SHosA_O)@}??_*-$<GIPbVO9|P5bS&pv~g)jMn
zE$@j}T~~u8Ue|e@$5sp?v?({GN<7fnAJQKDMn~6r%>0om*q^e6ZnY}c=rn{?u1-Z!
z#A0}<q6l6v72jUehp#>>*|r7e@bo+tP<)%o9TlFJXFG3#>6tM0v856G%0@AtFo6-!
z_l+}oQjS)s#%#V}0DJXg0RHFSh`Xco8P@yL&f<K~+r)E2NrFy(;^0I;CM-ETPt;aW
zkG$1qe&L26yxP$eAxp6yN%R_$gWvK_{|@m6l0zus$4F+Oqrm>VZO_>pUN36@mc;^%
zKf+Y61K2+E4MtTi=iE;J0(DtM%3i)7C#gl_Z`Yx$+(e*&`mD#;1sMoTg~pYB=7XD)
z>z7_j#6i8TlrQ1Of}3KwriY^-G%AMqvlAfU5G{HPvsuH_q0DBID?9xlmQ9&DgYpK=
zfX|zS9z*dgrgU%$`B625X+Oo;)v|2XWP9P>y$e>@rm_7wyU-%I0c)T4azBf`VQ2AY
zh<CgX&*d^8agzh=6}n>eGv>4XZB@8@TNIP~B~Wfvtl5CK>0p*{j1{_g(x8#6sCHtu
z=w?TaVB}iD<Q>hZ_sl?k*aAIa*KRD#L|;N(@N4*SVjxZ7uky?0mc#1HTG+BX4R(K(
zg&|HAxT#Z@dP{D>*<~n}>z{}Yp~pbFI~4~#kQZot;kejFpB`3B;-t=5&~~4{{Ld>U
z6ce0GTXr7?idUfQDa9~zd=lNR>Jx3e(+LrpUQ{*mHoTb@MXOef#rrw`U|Z5W(zi+C
zH@vxm8?PupZe9eYNgfu3T+E|sSCa98oicV_m*o%ZTarSWKq<T@%?e7?Sj4nkC=>R~
zEATml-4qC%2ZfB2t}1s!U5&QsT!x2Vp1`RcS#ai?Hy(7=$CMUZ%6aw_#tXj{wXvg^
zt!@a|-xcN<+w@t6WH{a%90>iHx~wMRBAV!_LC$6^hG`b8^x7mQu`8HnKU|Nqas+x=
zxdXGEHU+NSD+7MPC)j_}g>L+u$2<~yF(GycJ7oNdr+{Ye>arv(RtTa~+ZrKxqXpab
zS1`Gc3FMwT9>WpAlGxpT2e0mVgH@aC+14jZSg=+ArIjyYxg$#;a)%+4zA8z>2I`ST
z?`mrBJ`at;d|iEL4;aMv0SjM*A2Ku9&d4DsaqcKYosgr({LS#U=?z{u;|rp#uIx!@
zIY~}*rQ>Ih;p+L*g|qbwU_aY1Wb_g~(x(rHD9FI0C~4T!eHj+}F6M8^RO9P?rEFtB
z6hHn?0W9BBR<~HGonN>31AnhYlO<|#G=6Os#|haBd^#I~N{v~r)F4V0@^AF&9~zYu
zf#cHO_)NYHhlRa{vOn&;fn_TOb(i6{M{+o;?jOh|H@YgMJwTOZhV*Wb3R@m+2e0?~
zq4dNM+S8}TI}The*iV09{@+$um~w-k9Fzu?&W5y6_`Mxj7{sKkK7dDaH80Z=D;Vh=
zgzwKN!TONI;*3;iaLWo@@lFcIR$oE2ExYiq>0X%Jwv_F&`^g1s&ca(YOIg?Ha@g?l
zDi{WEtS4&=%sIbSpa+j(hg(8m+5Xk6e|`j5^sCb12gYEzZ3~+orbGsRH1YeYo4ih&
z747;jf}5r7Me0FPEJnhTKlJ67Xye>q`n=eb{H)se9D#Xp)|O*_QL23IeFDW-j_h#G
zOjg&U38Qt2vFLRxd@=N)c{Yk5GJWd0Yh^p;eVoJ9cLu<s9ie2jFkck5z>K+=O$G%=
zF<%l`jEgR>6=r@4w7W5x{XJsI<|!C3!#n3;_%Wd`my<$^PfOD7DT)C5?U_l^7o2Fl
z4K-H3<M~o^lHD|dRStax`d$<Hducy;d?!OgtTL$UiVAVEZO|Y55EXYF#pl9v>$$T}
z;L`+iGpu?*fu->~&Gx|N)En5j*%_|;%VUM!3L(pBi|?kK#gp3Be5jT!$3@!kgFjl~
z$IJ0haxDV_&bbI@02wejqRUkMzM<rFOFDJcjVWw1q{5{#>~fr7={O=#cn4d<rs{!M
zb;p4#>JFk4W(KtQj2Q593N{tn(x`$+a80=nYYZ)U-(!dIj4&Tv=d+D>z5h*A`sok%
zFw=>>i<`_Ib`PV2OLpM7y%{(*S9l*h&4V8re(*zS3p=@P1bf{zjGk_ug~2lhQlLRI
z?tAzFt<^Jl6bE8_Pb{cg-@xz1W;A++D!Im4&>FiskiYqZV+(}N%~?Vvg*X~JViXK3
zwWp8`!cNC|05yAkf|&=@n0r7rJkaqW3tuffFS-w_RI0G3R1qFNxQ1_TFJ#)*$|M~v
z!A^{PfnoV_w4m!Xev}?W|LnS8i5sJpRyRd*-_OGK?0ax%)g3ICOl0*9TVZ~YNTA6&
z3nsR?pwKh{Y;SL(^K%*LOt)dW-CfWdVa>};y@03Y$gr4!`#FdAQtag}<VG8-K*BjY
zy7_r2T3;!Jn%9l^x}}I?Wh$(D;wQmWtjF@3dGu7d4`)($;#r|nGr?4!R>dfjI|$uP
zh3x{B^s@N+n`!*om`gBqoiy1eG=uduH|qMD1%4O3fRl=4Cq$E&!uwcuOUEBW6ig^e
zClQpQ27^pZHU4(o0)dfIFyAHxeYWa=LtPKBOhbHrQiU?!E3iXV_h6x31&*<ti%&NS
zro4ZbV4+(AD3(gFkG`dl{y>^0_HTpeAV>68FXToG9HKw_4&vqj#=4A!EYm0}v@p}8
z)9yo<{QGvi7h*svZxtwii7~B+9sxO%$1#_o3Ia2xg<p+nc-Z0!T+J*ZzrH$wYchtJ
z-Bn?CK8#~8&9(XWb&7oC$>HpXLogmsIfr^rMuKnqM99nthK{8(Ai-)3J4a^_9Oja0
zt2%^e)^gF=KcUq0E=-#2Ae{ShAm>UO8VJPBA3==}*Eot5W<J7<<HlsLej9)7?LQb1
zZ~*KBe_@k!GM`bj2+B^&Gu_SRT%p|xa5Acerv)C&WPv0z-SC`i{P+_jSGW>I+TkMS
z9cVgx9jg&K_x?k4X@1*CHm%Hy)?FJ4H_i^^l|IX|+g|>{y|)42_<I<6{SFQ}@sIna
z_XU^Kwt-a5CZLdCp!=U-7Ay1;pS#in^XFEHXZ%<W-<waP!69FE^^O}J4zXlo>%~yu
zf1IDW(OjUSNy3?gZD2I~FP7`>;_ZAV&?V1sNM0br+@b`s*~@Tzp+A##I%vbfBk!Sq
zR3g5%m_%C!s<OKy+tGVvIoGcHf~y-Ubl>0Pp-GiBJ=h=2e$IQ%nv7DI{0;>+{D~4(
zrM=`NW|x8G4t0V4C&@;(3MP(yjUc!Gl1qBJuybl1!OV2*nDO*stTxnzt!nb3>jhWP
z%c}xS<ZOt)<;Ifc?15_2PSh+E?&CFsM4Oj5;z_;T+*${7W?F6oE9`&56OlAI-O%Oo
zq6)CDVK=nu3M`0WGQi2HLgelr;?3%d7+gNZk7>9qbT@{xk^@e3KFp17RD6SRy0_Ss
z=pUdqYdZzCeS-hKDl(>Xm-(F$jHzyG(EZd~agVG9Gf6Dq<$vl?2Uvm0twWGgBaQET
zg#5RVF)t}ThDOyg{D~cJu+Cy2EzC=IxolHN?MH+=_TWBzF-@1P*{y^MzCX}@yen*v
zipB5W$Ivk+DfZ>14*OUx&oYY3glvvF<+SaC`mI-Cz13!X+C7y*jrZaW#SC1Yorb%^
zI-yajwk~gMF6z$oLHE}uxGWh1N^KHG`Pl>)gluX@%rX}C;3q6>Rpv`QBjL)P<GgIl
zWt6#plwNI-BJa2MX!p;W1uylW?jA|uY`kB*vP+o7X70s_X(!Q4;NhJ8a2qS*T-b>C
zAmSS)aIY?<!4R9{yed5qW}?n)l$8vL+}6P3cj`<mItHr0)WOu#SU86WB*m9Hv{Ex2
z7cVuU3nMvDe#Y}3P3P00DT_r5C7z?I{X4ijTN#|c>Z1FKW8BEOUm-<m3%zh}g+20n
zLDox?W|=2)Z+3)Hm@#A8Aw$`=j+3GnNlHS!YYcmNz=Sla&*0a^86<w^Pg6}Uqj9)0
zl#I_u?f)#O{JJSj-L6Rof1B|AVP~;wP7ZGtcmsw@M}k|!RGfUO5JLG3u36<D{C%$n
zJz=(#yV8U;R>@HO2R-z;y$xR(JmXHJ{=-P+dcJjMDwk4F35u$B(f9jZ?xfg+tu1ha
zOF}j_&pDocCH)gRKmTx5U<?jjtx6@<Ie0MVF*h-<8WKExaM;6iaqvwBaUbL;<%SDI
zUeCrk;}qD*I6qcdl`R-#Y?#ESAiOrGUod1^uqi%+h@1q2mF*7k#ZwRX*<Wlh;qnMJ
zEh`FEE)Id?slD8lu3`vVBTXk`hf`*SKkNRM!2LFKqRm+c@zllf;(He@;L3l!SaQ3T
zPj%?Uh~cd$Cr8k1V@_%(uRw3)aGb0^kkyCVz;riD!ID=36EZ%)rSWE1G5#~Gx+v_=
z=l<bK2cPBKjut}AEoD}JbUvxYO5qS|U#`s~MLhG?Vti*Zg7#>h#7CJNnGVcEr}Js9
z-$UlJaMv>K+=r9gE|D&)@EQT3Pn=j}gPC}zs0vk1PXYh$KIrs0mZmOygM%AJbE8)m
z;MEvKz9Cv2{~LP@3ig*mReu$XeRPfQwH=AI!R11PqY9$}Cd0Zb`7ofm0zS>PC#Nq4
zB8P7u@si9v2ynEZRC{Uef!cMrelrQB_Kbm?hce7AC5?L<VM~VAN)%*w3*VV(3fb}$
z^!Gc0N|(>VZ-LFQ_-ioh%}*10#+=Y`X~wm22F(739o0x};G}|UfS-^;sheKoQO*iV
z)s;!IHW|Y*7*ldd$BhpOa^CI0;Zm0Dg7yHm`ivuP?)ZgA+qKa3fg<Y=`g$F9v20YV
zG(1e$i7RDo1pB2ETlvonY&_2MKP!w#$=`v3mv%tnf`cp!BcNxgG^V=$g!`*hDPnOk
zMCn=4ey^*1qW)K$-N2Y$v=ZKP4o6%4VK7rci{#QrvziNHct5NkR*eWm&*xGgEA^P~
zNXg(%Zn=jy>gJ;7!$%-WJOE2fC*a~6H8@D<6YQ-H<;DkHh3f)?%PG?Y4y-#Rbj{pp
zWVHp29~VY0`s%Fl=OgZ=auzuD>BHS~PD0jsJFclWqA;i35P!&(%~lGbnTgBEqpA|T
z&R^thS26KtDOJ(byPNpSUeoEkS0KE&FBYwjJ%Q=T^XOXQCD6Joi{<)a@zC)#_)<H9
zN&Pk9gii$q$x5>5LAm(Qv>X3G79MwwVyk}S2zRepZ1dk*<PCnK@}F|_RP6v=ivjq&
z{~Y&wW+?=WK8$mZ2nOyPTZ-Q<#_%I`Xc+ku7Oj^MI>0)VY&4O4$1Nf&TUS2f&R{5%
z{>pa-565XIzqmNOw_%CZ>P*^q5J<bWgQBm4sDE(_r#HD41Ndg(@2FE_(hl(-IYs<7
zbG-1}YM@nX3A`GgN^^%>;t5Ahdi=IsJm>m(zS&>cCwB?-{Wu3|Py5JMYS%$=*CLoa
zD?{|_&<awVI*4UUY-I~~I^oPKI;j5Go`%1f!Y%igVkb_Qb5pkW2y-4`x8yt+wk9ux
zf1N)taDo)I-!x<vcNSBVydo8D3S_SWmqS6|c*5C7IB}Lgo6uVhf0kwmbl^E;JK2h+
z$$b))UPuJNN5#Ese*lFJUtm_<V3Ha!1BV0$Q}CNh`0`VR=<nT+(7uA_TUUOE5&FxR
zv&~N--#eCWjgz7E`sVDQ_e-dXya>(5525oXSKO&ziw_IFKwsow_GwHUth)3T&umx@
zO-1|gPH`d4neD{2t<Z-b$}YITPn+IJJcGeGo2b`J4$tas!rAHqQ)bF#?&@|2aA?{?
z`D?9EN7!|b?bo4wTKY8O-C5XXY)b2wn_)WZ#tWB!LD~yT*8H&pcN9H=bGAdt{@p<s
zyZ<XpwsE5L3sP+06hByA@skgCH=_UgLnuIQ0JBw4f}g5dP&qstuAjE0Unj%S$D<q*
zRFv_Uhp<L;-3}hd>Y$SLa-wBx;r6bz`0Ge1U$erOA9w#UCeL%GVM7>pM6~mrTTXJ5
zGIOzW@HtGM{v9Tpe1;(tl_5<36V_Sf0w?UfUPVu(?_;DnwJGIXv%Mk<;41lYE)^c_
zoWtzeOi6jz9QNhiO5A6DmXB4`!@cQgwBqa(lCx5xl`cOZYW_m@Ds%uv8tcN9ORh|=
z{1+_I=)?707jWUBB1|7=MgbS^;M?(<bTxe&b(xJpT4#xqPh?<uj{zH8TL>~^g#X>W
z5*B&M(`>(=xUAZrHI11^T}{?t@OB^g*%;AUVNQ~f(<eUaoP-JozQBX80-4#W3k7pH
z%YHwY?S5dvW_{Jd+Ws84keEj)rVDY7U^hH<W-xh9iRbqlnbG0f<MES$0;*S9v3(D%
zsizVkzD%CYlgk%b>xiMb%AB8Z-4hkZn8B%vDcsgy=kfOg9-eAs0=ziRy{<s`_B0Dd
zO6FkX@c^t9tYF#|(!w)8k(Q6K5E=jVV8#;>w_a9<vAZUcY<975E)dZHUvvB{S%o)^
z)k!<-8s}8bM2au{X`fsR3~AATbTuKXYjTczB|isyza4>^R%vKY-Hgi}2jJQhJGfu+
z?QkmOdYz)yU>Nl!5(e)OX6X;?*}>nBV1M6I$l97n%TCw9^KH8LCP;@|q7Oh%M<9Qr
zXA<9T*^Y2-A^n_Vj&9XGP_ROSHI=C`-({{eRp}MqHRA$uw<YNEtMd>m7`Nu_{0!=<
zLf1~p11|Xf<>l{RfHlou0L;g-EB$|AvqBKp{AW9MaDEKKx`f?|JGDqAz<bvrbi{lJ
z?Vh1tf3flku221p(cM|_?0Pu={q`Id*02f9Zk2Nr1q!a;$X)ebmx-Ho`y$tUNP%T`
zs?v{70sKqlVk}RXOa)%4)V+2!b%rOAKQrVb?%hI#8x2rq8B2}ZttlXXDvUodg`J%e
zN&AZQXz+FeUR<~f#dd4a`uJY7wynZ73fExxS0_ja8jfWX3czt!9JL;7;J(M4$B9a6
zc=baBY+mOEwdXiiZE7Ks&rPN6v-LSg%fW2TN=+K@O^?28+XIT77W78#5hf+tk)w_T
zPL#~2_Jsx*9}qy^pM|sWU^fbU;R%;tDo}vXMT_@*%#D%i=XXw%X6aWZkd%(D$SyB{
zt#0|l-%*?nQSQrGNbf=JnyDHKJv$u#dnJPLu~|&Us)_qzJc0^k$FSeSh5pr9M#I*}
za-Zt7(D!u^o=ECKr?GG0%uEmVtWk@ubqJaN@d{L1nv1pHC54=gFHKS?rP`k9c-3b(
z#)XRc1EEHg{Gk;8{*%NrN}7<>>5KXm+SHTe07FB6KxWEB2(KE2Du>+B`H~mAEIE&c
z-<M@U)}^>J-wA#_xWXOC5oYnJ)3D9-AMD>W45m3I3wH2&{BP-GwqW%>tSc;_NJ{|w
zk{%?lB-r+RBC$5X+&;DeB*H!MLF-s7@s6QcHXWEBzEiMANK(+=W9-JRhr+Hl6r;Bo
zu;Jk^cnw!2cEI8+>=_`9>AB_9R5_CUnfC}9JT>sHfgkI*8Uw}CE^uZyZgB-OywF&<
zQ;*SV<sM)4qxUGqmJIITj!nDHmG(Tv9p>>&;+6yZ##vL1pFMx!`*r#`>pW~7Jb()t
zIGoPk74u*AAK_NMs)9g?8?dYySly7Rbne+DxL2CbAC+(e<Fz&r5!C|O+k}jCL>9<(
z`mixNnv6=#+2~|DR;=iVJCe;{uIDiJ&R_;JmC<BnqKn+%qw?stpdI(j8_l+s&tSRB
zW}?FcISLGTCon#~pm&iuY>78#wb6#0#Ih~i?^Pq{it9)GR#V0;ZT${2cMc@e&Ln&=
zDG0hZ>X77zJa}{X13U@Xh*{3fe38;V-2O&`I^t&1@X<-)LFSIEY5i8%5i5!LOOHZ|
z&vbzqTS|6EA}~+w0la%G$p(J^g;}q+qwZIE+Ux!j{tEZ^#gZRjVc7xR<;n_DD0+sL
zlFn#<aVLn{I5y1Cj!$}T%C2l^1eg5%;-*|J%6hVy4=)>wOOtP6nRF5n6Ygtk2o&sR
zV%Nh`RNLo9T2*;CRCyVzk(a^kar?pJkvth6i=yRk=J7KhTZtE~HKSD#3bf^XCmz1B
z1Fsxb<BNRFAU66gh6RqGuKsvx9VkWN*QT)ax@n|d^BNsDOrXk-H8{d+G&>WbM(!iu
z<IyZ0A7|`g-SdW0bJrOho7je7O0OWdvIZighO--64&oT~ILJ~4Qhu&NZ?<PZ)%7%L
z@|UEYe^;`CY89xTRe&!A7V7bf@@!^MHOIIA$Lb=a*qj%^G<l3RH+f3}D=2g)>q37x
zy>S*7^JE!Ic(@u$w!Xlx-?TBw-i5y|zKDLS$FV(5c`Se9J8URegb(zJ_{arL5UszL
zGwcUC5}nR%-?9=Nx1I$Xjg>Grd_6l|nn(HRceo9H*D+wOI;~BbO4G81Tzv0J@S7k!
zzri~Avuq8>84q9<8K#(c-ycE>c3{wFfkU@^6CUqA4TXRA@TWBDasDm|ays$@LC1)l
z6WM}hM=uV&W5kYs-UspDv*7)QV)U9AhgW7u(vGyzaBjpc_<{X=e&sXh{gsIucHf0_
zeQ!DUxM!&LLy@U!dEr=XK{_@26*p1V7r6P)A*3^$MZ9%q2QGRt&1WCrV0So;^}EP_
zy8I4`h5l!Qvm))ks!F?_`$LRcBs2}UhbvF4#T%WCV7D}nC5Fr=U5)t|>Sqr!4VU={
z1sBeBwLZ(fGKWoAID$?6Y>#c0K3q?%2PSO#$Z77Cf%`^0roNpZ9?|=Y4-L(OWhcCG
z=Yj+1u_>OSx7y(wk954YLI%1**3%k+iS%Xe5Inr*2JZ(&tcb3{WA#Mt)_n&mN{PmI
z^OHp$D-5YuQ=OhK6gb?cRG4q`e!AEd#8Q1F$glI3Xspng?Xdg<9Rj~erF=UMh;e7T
z=X3xURRvXi2Cg4?55H*n(W<`Fe6VB$>#gX7vtO5sCraMoy2WPb*>@ganLGli{u%gF
zdK}v%*u<tS9SWsewqxe=P4(iHy6oW4F5Dv{<Z3@Xz{7V;sBqy`95gbUoe(Y_!`>#M
zQ_*oaRXiIKOw5@377cnh#*nu+G7$QGY3Sq>4Vjl7LFUHIY|<~``!b~h{A1l{ch*I;
zT%ZA?4@aZ-t4@I*xf5z)g)UvBCw;y#4WhS4v!!aj@O8>=eEv6pMz{~7-__?)&nyLV
zpS<DxUPQAYiqYu&@f9jr`mu~V7IeL_5+?Ugfy6(JY*^)FI5I_^cApRBE)>}?Pfg+P
zwJWot<vY=RZ6Rn466}&A=2FPrSHREjfR`JJ>XkN0kiX;y+?JqAdfLx0tIUE`9=i%T
z&K2;cycCK?yTFz-fgARBKldn>X9<t@vQMAyqix(Z-n`3!=?^htxF-b;9CW5Po>FYs
znj?JVjxy|-qeXSy?r=cJH0`sQ3ZD<!xkf0}aW9J%SY?(q{WxyPPknKr)@0pNaotNj
zmXWaqx9`zry8dBUcVIW{NOoaIYS!YKh)q~K&zcM~4ahm78oxX5<C=sH^{O^c!H1Lv
z@4PDnv+-p#oN3LJs?T$G1Z#ZRbs4esVN;s7bu+xI&cgEBIgn)#g(vHt;)IjZ;G-G`
z7K$~{EU{4R*OJe#T%3p&OEWnG{ZkOVUW-j|G2{(eRA@td0aTxmp!ACZ9>7z`A-ghW
zXBC67+<k29&4&&b0>26w+WuaH-I|z1d95Eos%<XD%NAfsI$)sc1MKt5hR0vO;IVD9
z*z~sJc=?+Y9o3%#LCG6oV!fC@a^IWuhvbSLkrj0WK1A(|P@0>s%I<G@gmUv*A<xa0
zY1b~|-#8B<53O0)(;Q4aU#j@8$<4Sh_Zha8aU>gc9$gQnp<41*Xz5m<xxJR$#Y!=R
zFNsC7Y;WrRZwcOV$wtT7Qz`gp8s$zGn5YYbvG&wH5EXCX&h9Kc`^ruD{yZJV61+FS
zEBV3HG*p<m`J`ad=RTa;JPi}t{=wiZ6}BKu%*)1x(T1<SR9Mvuv&PgSyZ9X2dnZ#%
z)nuaH9MG<prXbB1n2@E0c|%&Dc*ha$)k_t&?CThEtIvn!o5oQ#PIS#{)kb}lAAHun
zN?7rDJOq3U0^jf$_*QL6MLlg`6r#k+g+1M^_8iQ(=|n1n!l*6q2|xMOu=?NEFN20k
z8Xn9r<BgNuSig_3N7kwZXbz-3Khto2{!cC`D2c9>kE4rz;cRo|Z0=I)R$LLhjP3oO
zsWb7$>W%uhAtXhKq6|^Sj7^5;>^ljG=9C7dQ5q;klakEJ5Rsus8B%;Dsqmb=l@cjQ
zk_IZ3CMgX{H2lu*UF%)z{R>#f^BniR_vdq6wNoiKbRextsuu;BOY<_PbXciF8(uC<
z<GiM4f>`ntzHZlHUw3z+!iyYAbakMmUF9$^S)Y^&CxXl(T}1ao<hry2k`wjVG|fN!
z?36szF0f|?_ENOc$AVsK+lwRW_JW1bH@T+RAa<y$$AscvsMquihT08adO`<qwBsBW
zuVaC_JI9i&a7Vw}5<`(Izw#x%H88pH7~Btjg1!kOaNH|KkE`A3n_nZAjhA9g+v~(i
z5uf;Jcb&n|_7Hx}uR+CLj*j0;#PT^gq}eG!g*%U8O|=o5IA}Y|@&xXLY!5c?97;{;
z54rj{IT|o7m>F@4*`u;R_OIwZzw?I-d~i<3fwki?s>7IkE=S|vCJC|;STYmF_u&Th
z*=&TBn|RqmVdrt7hy^_3h&;b?c4L?0#F!KuGiNbrxqQchN<;YeN0sB@ICVy(i+(UU
za&{fU+WAX-dgDBb)Eh>7FX+Ro{$UtrB|^5+6cV{IvGTJxY<Q5t_6PKG;}@NTy2m4E
z*}`y~qI?ak`B0kPKaSab96=QqYvFda0hGxk^Hq0&3TFwshMXQ;vU(%EJQGKqBjRk8
zG{fL`#0{?HObLpPj-gV^Al5j46Yh^p;a9HO42>&-VOeY(HyM|3%hGtXiXFyg-H~U1
z_dG+*R6Azhewba~y%X2_tb(&Y7+Y2u!^As^K`h*{B~#uA8S0f(5UxUduE+2Rd5SQj
zb0%Nhm5oEz9l;K@cX;a21Tydb0`Gt1@&T2Z;9q5lT5qQa8Df1lZkZ#kGv9y`qCa8*
z1Ptw4(%CDGNIqAu2V7e6_-Ky+`ZA;&J@yK=z|^4>YY__*BPW53`%TeanntaIwhQmw
zgY0m%57R%N0)dUmAb#42(Sh5^3lC#Vj|rVJk7g<b^J#s+N%%d`jG1@FVb+f|+_399
z_^7Uh=hrL4hk?ROquGHOHy^>fDp6#D%jsVV!?Vivl<EEojPDJnt!b^Cf21&roA1Vq
zr5%{7+X!mwO-4)oGz4=&W?&x8eH|9gx84%wFVj}C`)hLP&L20LBi{$hxB+DGd>+(k
zzU1uv^oaSdW3y-4u(NtzIClIRa@x3r4b4aa>v$z*bS)hA+c`2-UW%Q2=z{u7wNPS@
z3|n3mjDK72bNP;G%z22wd-6EQ=@grR^XU!v<-q`Uvc8<pE;nN~Uk-!$^w~`Ewgof&
zF%#9y+C=9QzH)u{hjTtpci}*R*%Gie69%rCM_tRL7%p~U-QGF4^ANyTvPOpwwygPH
z98SFN#d$tIfonoSVZD|q<C5FZ=~p<Peu85fxicu_tOhAZ9bgR`yZGlQN#jED`Nf69
z>FUcw*y3BsbzG0boX}XTT&_q`kA0c!z88=XQHcu{Skj=b&*hu%e1qlz&a{706An_p
zf*)GiaNvLAvD$w;xe2}Cr^`J+)4>Pa15^0y^pjwI;ugLW#o!EMJ2pqyX(wm)g2J0;
zXn1rl6$rf+t<59Y%k&{!iGB@d)i9nVhh4{I3P<r_-Fpz<*+R9K_1GMNrPcpE8s?dP
z61X;!^k&EzZ1S*Ws#n@!j6ovykDE+moTDIa@iWeSZ8v|;BobEZKEN<HEAi^4G${NY
zjB5@b5g%VEF!hA9mY%7=^A~&zf8?XN&zpt!{k|Bu$wDEkB^xFvjUb=cNKo7L5<iRo
zpopJ{PMw!wT<T%0U!})y2@IgDkyRk+sti#lq%fhWTl^tPiRnL>E!fEIVRJ|`E{xia
zHp6nnLLmsw#u&2o%jMbF&ttH3?QirKcFU<f^QijWTilnng1eXZ0K}_%Kvios*S$Fb
z8!}y4!MrPYMD{A2aJU2$);xsLM{WFkPk;9JO*yYCwHYOsKE%^G$)FQDhKAj^&Slnx
zF;8oC_T-!{Ho5-6O}hrO15Mo^kuS?PJh7t>OOJxtj=?l#<uh*6U^O=Kyc@<wjV0|t
zQmkaW5({kCp-<nR!Nloa;4JKyt_q*a#P8!kYjqBO{A9*5w%UmlI-Y{`^hgH#odo7$
z1f3Q#5+_n_ls}ZzrkHWlS>W|_xTU4finB8?U{N1uvZMl)Moq>!PrWd3>^V5|WfDrL
zO{ZHXo8Z5_*TlbyVnC}+k-?Q$m>MSV_>jlY2nS~RF&a&5Y?$#IN0!qdaPdPQgT>!4
z2%239lMCh9g2czT;lm;tyJa>l-<5_<pQV`lE=&H_U3pmABK!_TFSvE?`B2?t15LY9
zpxgNXYKmGQH+wyW*GW;x=aI~I#dEA1<$#;6d9laB+;!-)4|rm^64UIy#Q*zv5RAs_
zL-VF|)LJx)4XrkX(*c7aw9T5jblRX`vM#v^=Lt81+q}A)7B1>_XJ)sBUe&=={5Ld9
z@DlVu+(K(Al&OL9y`B&kFU^v!rje?FI#XMA7sHg}VOeejr``S!_Blr}`;|}mLcbU`
zbDBTaRqsR3Rnl-TJAqBjyUclBS4FF>nv_|kPh(PA;OZMI_Fm!z^0G#F{9_<~N=f6K
z&!5AZbLB8{!e*AM;7=2#iCDLr43o9ig^Ia@Xh!Z@YVdgj)>GUm%-K-r0ISorEe2E`
zmW^i@?Plw1E<<(hL-<hg9@<{XGNT8r*z|EW1-dMwL&rkEY+orR7W{<CFXC{YVJ4by
zN`<L?2Wi6RF!uh_KsG-nLGbEqhx5~$F~>St0FOn3o0B<({uBE6m)iJ?=f*Sp0S&OK
zLXPQ%-$S|4FL-?yM*R}0?D=gTr^y~iyAO3(+3*PKXCOCsn*lu#9031JYB;UaOQ=w}
z6(qJfk^RsPlrR>d<Off*@Og(YOgMuo3%&NkhhUBM4C-pT$JJNsFtc$YHte}6-ApaV
zf;1&=_LBt2Uswafw&Xy}o)>Uk?K>x50bGM+34HA@gV;c0{*mo^&`)$>I|6iQ(b`e$
z`>kb6XN?mz&$OlkYu3_$9m8PiZYAu#<O7zI#?z$m8|V__#9rUBrRkkvY<Fiktvqo7
zWw>piRBz0(^RDB_X<GOoB^S5um1W9fR<l`Yx!9on1~&}92F@8v+0qeDQS~SXGbi1^
ze*(v-^}|Rey{{er7;3_)jFC9eLYB$hb_2}~LU(z3JzUY2plv_);+s%!adN~dW|~z<
zzR|+Y$8!MtTBuJ8Ia9Ixw?eGaJWl#C)nL0vi{!gk3Ih`>ysekajfmeu0h?ox-yh8u
zOv=WdbAokUIQQ+EFo!-bF^7!0C)}T;?If9Y23^1J<UWoN(bd{ED1Gu8@$+`k8-)ru
zv||DFglj<LJ{_3iIs^}k-1v7whEqt7+s?gT0Kuk5&?%@MRToXB%XjBfZLkafpH_wF
z)T?!57`c%<`p{dH=WqbW_y$2mP?0!im^>-Bed23b1s-Yk;NZaqGC3#Y8+M%(W;3}^
zI$2;J)|B!-f{k5teizlcD8oI!Xz@&@PK!RD#_Y5j+?30U<FD6><ZB!FGtSSUX{s8|
z`CA27{;i=3i!(4YG7tW%?Bd3!3jNK_A&}dVgs*gJc#rvod?+^qj{G-<eO{!;qAr@V
z#fRM~{mWg>=FxEo+{WV_a);^lN$5Fl1dAvRp-TrJ;K6^7ISIQ{xcK65uBFXboSL_V
zHu+5@tue!aj2~cKv;{n>nL%zRD?xGEDmKh@E$N^4W*&QZFj*x-S7WWo_@OjQ9bF}4
z@AqO#qy@7&l1qW7#=>+BXS!o3Lr*715-(Wp1LJb3ufUjEs+O=jO4I55#DTPZ?-z(2
zCd1afR>3SqX<B)6JEv+Yhj1^7GA3KnOu<O}RN)?oizo1_d^5O<ORwVl*<J8Fd@2|7
zs|)gSr!x7xg>-l6A?7CR2bQ=U##NW6QS{(g3^~<+8)Y*o3pFXeE(XP=9ikF}r4SN$
z1z%+5vu|<tLFf1(&@|czZR0GUUnPkKN(y{;NelKeYdktvZKBiHby#Pk;08D<M+T<l
zc<W*tY<&~Ota=jJx~pb*eb5w6uVWOe_xTE^_nKghYzR%#3F5+vyqVpTUKpUGO_E)Y
zxW$4x<nDtzctQA{>N`x~{7y|J&(9Ch??=4o-<lC@;Ql!<pB0Mqe!s$ztIe6t3{94P
za54_P_7avnKS4?H-h%TYlNp=-gx7~>fX|j#DB)VT|FYMyGN)!RId;u<`_OVQnxTU}
zVIQ$l<rBu6DRU1?tHJ+tHg=n9aFtuG^PxBnPW}leg{M*+_y}z7W5=Mp=nK5DTt|&p
zTOn}C2>y@i653U4hj#AE(W_R4&8SzQ585-Bp`<f}UeAEvg%))Fr7GJZ9zY{58$s#d
zF-&(_7)q-Q7C*L5#SJd4BF*;WnB<y3jlFl^!DUmjef^7je`%mFE1Cm;r)_|My(&=t
zZ4Zj7^5Njs$y^ewgy*e7Z(AV`4m?Nn)IW$;`_ec^Lti)`Wkbw%EScX-fbuePe7i~u
ziYg6RhT1TCZeRr17lAc(4F7FX2Z__0*>Ib^?DM9DY|+HU^vBYf691{t_q&J0Hmbj&
zzWXNbE<K292hYRl*SlcTZC&>7496CytFxu^9oWQ=duV>42MN6Z+7Yyrf^+p~Wppqd
zPwj@GYjxQuLm|`p)Q~NU>%aq!%1mw37PcUEG--*7#K~Uk`7^Pd@MxqFWoK;QAx@Vj
zn;Wu+`}Jt!mSWiY$$~DLr?L5BXNsw>N1JMO{Mg}6c9$Pw+lyrGj9gd6nN|rFY@<$b
zq)tC1SFzi@|L~lUO)mH=&94h+fL@9w*~Ra%VVe}i6@;Sgo=X@uONM>@zLswFYto9K
zDiEd5W-$pWl#}a87bidFJ&QMRXWbWpxY3l_ZFbP8(thr9hci7HwwOw~4#UAW<}74m
z3^!Bq0^Be&XKHK0psv0e_DsKs!&<MRY_%SV<b!d-sQH{&V-US7-h(0Us%$R3-GQg|
zw9xUXJe5x`;*VT8&&v*)0kV#cA{l=>(XRqOS|4IVJq5*Z;%*%*+R}xUmE~Y%vjiWg
zD~R@KJ;86K^?b?KY4B|SIKFw~M4B2uma=b7!x?)F`IgpL^cOEjc`Yf3E8E3+>W5H_
zTqBfBNx<m+yI^^;BdfFV<fL2`$vm3yEw9g}zZncKz8nCv#Y;&4j|M3Uvxkp+eqyJc
zkL~XOMYjBf;CHfdC&SAlV4Sua-AX%%{o*Vp`HrK`!x8wgU$}t_&whQ>CU!(amTkA5
zM8*?s>Er}EPO0x5`Yb+yvn`Hb+=Vpk96yrX-jfZkqgCne-9+??SEQ|$zOdUoiT}Oo
z9fps-i|RH?w0Ke|nGBYrt>=G$N#}HKxU}Gfsa;HSV%O6=xvP+*_ZB`|a#Rv_8a8wV
z6I-Ii!iSsC#QWOJ&UzuuJ`#;foOIaFSHkyoS_bz{#hblv9t`ugzXG==d&rLv@~?YF
z<AnChHom(~aeWEBwzuT|Lj^eraGLlL{bNJ9XCe0m-d8B}yzfI3Wo<UjI|FLG3gF1S
z;cQ}yK2&@(L9e7Oc=C`VRgV4y4YE3vT~<&rLe`ayo1Mu0KtpzNs|E9Se~DKn%o9hI
z4P^s#)Tn;gV^L&}9X%NtOj<&>=K9#<816fU>{F!J*;gy!LU{=c`}`e`uQ-qNR11uD
z1>-d788mz45?Xp@E6%NP;6BBU<Qz5A;bn_HbL}___vR?G9PKx7J~tFP%O^27{2f;8
zc7mfrl<B^^I?cU(oO3vvh5l!v#FPKIFtD$Mo|0*>3p}84oZw(s)`6S9DzVR1^I_Ps
z`~0m{V=+%!1<TL(qm}#@7&<$R8&=T|ug{HT?h`t=%Pw^waX1ej6#nG4)x3Z&at-iS
zX&GfdJ`X4lw0wjrtyi=peJP>4-WtF!a6ZMYJarFeeD%Q{51--bk5g&&p(0pQ9L)x{
ze?Xi08E`h!knQm1&{x8ej+V*NaF&4cW+kxcdORAmZ)S0Q!ECeOn`p_=f=#2IS8SRh
zFeTi#F>z}>6mp$>eA#gN9`+r+1qHKWsbsKy{0{rS)j<BiXBA=g0-M<JEruPCL0r@f
zf5Hf7ZydnJj!uKVS0A9}%XTn3HjJISx|I5F3?-k&P}Gf`gAYWKY`8)V7ARTLT)#g&
z>*YZ=Uf?&43ZdEoo5kJ_HK<)U&rCDZA<LpT*gL8Ls<VY&@yDU;sjf9@UoghiLJq#5
zq76d(-NbQseZY3|P#V5)I`_u=BRt4Y;=CKKLga`~cz(7DZL+%$ZNL6O_$5s$7DeE_
z>wodde;SO<J}oM~_8N<~e#K-%BX)A3E3TB{xoNnLRn1z!bOy!4-$_z*dVVa9iub3)
zw=OVXv<vw8Zswm2DiC-V#UiIAl5ljVFQ=&34^BZsSMcXoh>jdZksd1KQ?m~aHYt*)
zFk{GG`dTox@4|&uJE-dBESj&q8Xcck!E}L<-7j>gx>hZv0=H<KE~Jj!OI+BU3I4oy
zN)10eV*opKLyweB6yTi$4`KZeeR9fr%iG<tW%o@a*r}2B?An8~SZH+^XQ~OF)59mJ
zez8AmsyTyMhf5*vm*5<X`GYCtwVZlVEU&dS2pV$Up?BIbZpRR3N>EnElo!6VQz8|L
z_RnMAg<kGLk79TkH;rwvS^&=7P4Lchw7?6uwXwMN77eFw#%D$%8uQ^lKC}Efe|N1W
zxt@9pFY^zBbEzC1khsU&I<3Xu+C%9<XIe$_fJeA{#$r}_uaNb;XcB+a)~3a_$8iYX
zfb+^7`4nFpc0DtcSHHT3buF65+NBKGfw$4PpjMtAckvT0bGX1wbeqH8j&mTTBPIB}
zDo<1^<fV_NFBjM|o&4&B#`Jyj3FIa_l4On{OB=M9SIbprUdOy?d~YbPcuR2Ojr@zn
zI*ROIS{Yb51=Dq{eh3=%4S&oPa#dA9u)V>NwJA1nnKE_UI5WW&<^L7u*sHQ9*CHtR
zv^pgIaAvoz7ee17A-9$z#}bO8Kz1ggak3e0D0Cq@n8FQ}?ZZvzLq4nD!dSTwmj1RM
zn}xVV&2N9u6xfkB5?9mL%Tp<J#1G6EH-Pm&4+E*aMUeStDD^jA0#z(ww%7iH+709Q
zFx3a}v$zPq`7LB0s)9x9r2g{HN5x|IX+Qe*<T+0{p7`m_Q}{kkPJE~9p{Q+qKTODy
z2OHM|Og485_Kz3bCqmA=?!;G6mW<;+{auQ!FBerPXPUCFeyuR=suIQaRAJzp07_Y3
z2JJn+V8=`gdJ$g(iGpjyLPFRBPJD=~vQ0pdBKfG@TiBW9Ls)6Tb>2nkFPHgU6(;lx
zZZ)-^P;)*8|7%+cXIIJ)jt;?R@73vz!eP8%ZHx8q9NGEyE0`{IfjdKoVYc>Dd}MJH
zR~B}l^s5klqq75(HLSwN**X-);>h9Pbaa$(L&IbB`03{?`Vu>WF0T$_vn{W4&ej}L
z+hYWG=T8@#d0&CT%@gR>;yv`^z;<}Kynuci@8GN-n6rb`n_1)eHKbUU3QwLzLbcG(
zeki-0ERz&zpm+*Rs-J}FqoUx`!EkzHH=5o1ro%Er^w^$oSBP(_$BNnBurhr&HE-bP
z{gzhHyD7&^6n&}X+aMe$+sAv`c){TLM`(c(2Ua;<+~4v}?(*kA3<<tSYEfT6@y%)8
zL2(e7NDW|TC!D3Rb@#d9hXNtfBn}M*bwOof3bcNC!$sG967El>Xr{dn=Jze(8d|q;
zhT`vB)~!y?Dfcc6U-N{oPVGUJMW1me9%D{I&!l&X7t7ptl=nL#%Z`~!kXqIv<~7C?
z^Mq`8kjyN$$tDUrjzoeK6@c-_L9k$<7JKYDknRsaZt$vwG$?N*y!t$k7L0D=W&Xso
zZ69putLsuoh;d-kst)2ze^Zc|aSrFMl@c#7FX2_EJmRlU*ugd>{;YVf9Yui-VNe}&
z6Etr)a{=zF=~wq%2)ZA^zPB4uZbAsC#{R*prz0R^$PRQ5Znmv|C-ji(6xjheIkH_h
zlwH|ki`^sCV8_k3LO$x9@I7gVs-plQdxU&fvXF_3UPI9?&dhSE0o5=3!S1e|Mn}iq
zKrp<^f7Kqq8V%loe!Mb8HYmeD^LE((TZ?7qzT&6YtVHXBJTG_8h*Vw`LVBhiOHvw7
zc9~HyxB3rnbHR|^oVHqIYc&h)j{QWZfZM#<tGDR1!Wo-OTS5J2Cikzf4fd-?qd0FC
zj&D-|-{3?_y}S@b8+F)+8S^<q>2G|?%S)UUH-Tc0)#7c(_uR@pbw;i~Ir)JbF()kx
z-nMfXIZ2z&^~<u&xp%k~^G8uquPod0XDCExeCB>{Y=o~SS=h2niMxD6oo&0fjHQh$
z!JM#juxqv8$;!~BX5VKxb4V*!zUl~m%)0=+^OnHRUpF!0=vcPT&=F<@<w5PwbYQ=&
z>DbE&#6PP9?Ze4nU;GJv?{s8KUe3m;cV3~BgfUzUY3GMmZsk55>j0V8{rKX`M9^QG
zf#>8+@l{`#sH5oxTFeZfH8ulTTe?2x?o1ce{HG6o4k=()rp0=s6&M@R%hfh@R=7GJ
z0DG0;u)k##H#kg#6&g&!&~GAEceNAt>Ph08QI9}>`W$AtO_PZX0&#w49p1bhiGwFE
zhnEZUam&yLP+~ucA29I`X!y@T=~24u^6+=?G^31ruBXa4!Qk#`*uYzvXy91?Zgd{q
z%KzGF!72t_gX;qu!7S68$(JQS;gTSbniL~C;}OT*F06uvu}!$WVL1(v8Og%N3fx<_
zBV5EHQ})+B560b|gwsj{Ze&6Y+IU-#^|xzqC?kTTXAfsT##{40?JJ<<J!3aEZN__o
znZ5qB4IQ&xNKOVLsrODQ)=U&SJ}HAB-tG-nk5l9ZG6}Xgbr6k_xeO+rGuW!wJ@oDL
z6*zqV4sI>4g`0y$Gw(V3sH<lQy*#v^vnhCi+;MpfSeK7W0`yrFifF{SUvTq;0n=F@
zMte*4(F$8rQuPR7IcW}T1wRR1nqEiF!;<Ed_(Q^h3AB1b6<S4PK>eI-uEOLLHt)Lv
z#dl>P*nKAZs*=Zf+MU4iMMmPExk|Wn=Ue{h<{6~gszyg_^PyHJ16@Spsc6GeaUGut
zL%I*pzm^$9*Cvug&pY5-h1t{7bk5<mGJP7W#Ol?QE2I?>S2g?x+A1?xpMM+Pkk?>k
zLN_L1$b7Ntz;C!d?gLludIkT<O-0&oL{?W*anI==aP5*bT)5^SPIzF<C#vbge^)s4
zKDwQUUo|-YmJFHa*Wn}WZp^weil6m68wz|U<Lsb=cqulD6fbJSuh%o+LY@a)Hat!r
zQhf+!9N_<)x-ODE5e*7L7vWe)Bp2{jfknt^(*<R3YMy(Vf?SU9L3_e*`wI=GwX_+G
zrJlet;pa8in$x4Lcd)mM_}Qk1K)cYJNyTfh-_s?@<j+sk=}l+R+Ye%_ryW{6iX*GH
z8nj31Jr>oDX2at~v9jX5m_20-l+H1t>bo0Q*7rpGR_{)qKD_3=+!*g>sKaJ?+R+yo
zM=H9U#V(Iaqm37}aP*EP+}T5KQ1{CpF0lGP{MuV8FhoP)O7ACr>Yu+{T~;slm%oJ#
zU$VK-&0FzW+Z*U#V9)9n|AZq>2JHOMG|>nl*Z1;W0&0(Oz>~**qsCDyUN=zK_biWr
z+MFS*W$Hc{suGWNH?{EHnGh6S{>*SkGW`7euA=jNJr>;S0GCi1P|O~UNkuAb^A|%}
znKKQ11`mZixA&mbGzK!|!?<=`C-`vX6htRJ2eZ|~Sl_dk5aXpxCBn{ZpT<qpUOE9L
zjjTh9m1&{?!8PMmHiPw_9><hyR$#qvFNVH%fS^}0<ho|QO~DW^%0G1npC>e<*Q5m8
zc>bH%WBUkHiylT2GJ3dfk_yiEv!mL*!||0%BHqaB0OJ*H@O|24*qt(%eH!G;=M1QV
zl)^!vUX1*%4tuE0JOnM~li0<UG1Sl|Mwtyp?7wS`kdti2*$r($%MrD<H$D7u!y^;s
zdiE3iIeraQ?gTOi-41x~r9_tR>p3s|1<=s-1JX`C;{5&?Q2Wo1+>#f2@uuc)yx&=Z
zJKD$M?G#UTa{nhB*i;0wO`Wiz*Nsw6TeFLmTI{Tbz;2b}Xr;jP)y@q<b*-PMuwV*3
z*3w~ZCih_Ri1+x>XSsO4$xpnhZq8bNoEOcy*^3)jjv+^#{irv=RG4{tv+gMzOBQ^Y
z2f~3K>V84XN$Z%ySp}&0Wrjy$m7qOBnG4;<;hd6rRGC<cMcdU_ZB8|K702RvZZdqI
z8-#6X)v$Plz3u0wU2J}79v<?&i{J0Y;-bi7U~lw>Ygn2j+#N?S%krJfLNb75961Ha
z!#d#6os~@fODrCbeg<WyU*o)>xhzUfo@67}Gp9&<@t_?eae`qn*V|`~2js5cZgnkK
zF3fE^?T1oI>`1o2*MJ%yWq`)%6#jtaYI@NA690@i4QWx2P`cchC0~!k%%{iT*u5gi
zl`6*M_@;7~O9^82XL+bw`4Loh`%~d%3;N?2#$C_m*o~<z;LtUVN_!-kd3p@5pFSRf
zeuvVh*RN6f$Z@uQrV<5U5x$(!1+y<Eaea2*u|A*xmn~|AO{JN<zpz8>_;mn2{F0)V
zDem-c?MAw4@PeOeEy-%vbm5Ij88mrM1r`b4i+O(~!Mt9Wy9us7?wC58`#P1|>1Rcy
zmBp|$<1REf3c1AIe!N>>h<lcs@~^ipW`1%>7^!B0KLT}d&8?a2{?_BTR$~iX?7q*1
zh1}u_qP6hidIPR{=WsUk9tTR_{aI>FG86>c!F4lhJXXCGzRlHT=?atJgnI-EoO-PH
zoC>z*o(X}>g)HA&pQ$cP72R@_Vy2;`9G#tH3l^invsQ~r<0|>|%Y=S|TcE(pf%V`1
z4g&)>fPRNLTV|XOsrv+vY{3aw{#g^8w{B<Y=cTDqS0A=%JcqLx#;kYvWIokT=sIi7
zW3uh`{1d-;)*|>|S~w%-{H+L0YQxw(fdzQ<O|_^|*bnO2M$+e=Fqrl|n0aov#|=NL
z2vJizVIVlr?#M4fZ>61|knO`PpG~9pMZR!G^%;EY5Zrbi+O+YnNXU-*K*8-fY)XU<
z%Q!oUWG|b-#KtYceNzE6+}6{LNdkjv$}Kq1mP!HtouH-Pt>Bo6h_Tc%@#DEC*(iM}
zs@w7x4rjgMr_FwZbBm-PYvUnQc{&Uu`>$|U-SY7KaVxS4&jf`B?d3c7n~LAGKM{SY
zHloM=u^8~a7>8eQLfg|StgTWP`d8b~;<?$NS9F})lDLR&6%*DS9ZjJNiT&sK4sX`X
z<SK10a#uW4;c?vxQ0V`FW94!nWVSjRlJ*25ciqA_*(2B!>$!Yw#UR`;p#)VP9v4fx
zNr}d98w0;>Bx%W`Vw|OM9!IQQj#`<P7^Hd_e=FQZ<HJ3m_{fe$>K&#Jl@gd86a{<k
zTe69QZ}RV@)BOGdXY%7V(Y!DE?B0rpSSxV(WhbTycU(V;?jA#l8Y5wYss`I}s2C<z
zKF5+%7qHjK1>UAC#_ZY^Y(T#$^_~d95Z$kEd$BTgeV-+`m&cOn$~mycz8F?UT*Wmz
z{wu3JXTwQcGGLYg!Kjs_1EJ$SV_8lMwidKOw)Ai+SjEAm!~IwvITMtA4#xH0Etpb-
zH3r^uVL^rfYg!*-#2pFoDE$HCm_3`N^(!;uVr9%fn!w+Ng?QlaSlGARm+VLF#^SYd
zwCMf`c%6`oo#Fb#Dmt;s(2xckR$(PWzk<o}QrP)=yY2ji4!Az32|j5bMvcXzD5J{(
zkIdGg0Oc!i@6`nS@0bVMZEeZ6v`)k+B4d2jp@%&?CSZf$qk9=~8BR7>F@5V?xH?UW
zef#PLQ#&>3kg)8$VYC?3-_1j<;~Ur+BMYW8L6_x5N5h?Oqw)62r7R}?IMmyl0m#jR
z=jNx;>QanoKzbrqUE@Kum1p_PS)0h{?helDtRd~!8ZS1OkjHLVjTiV*nIPY@hSn&C
z)831ZF+Wii^n=6b?<#!?32Nf}UQ4ng=?`(C;5Ky|y8%02&!mj)NumK^9ys1inGH-{
zM|S#UaBAyNw&-&kW|&HIJ7blZ{(MasX7>RNwm#s3j_1P@-9B!zel(vyxr*~VFc<^l
zg2m1~LPkQELH@q%&A0CodP}A{6dh*HnWzuLg@gLBXZi(bae2p|m}rP=uG@k2S4Fn$
zaX1CLyWyba3z*rXE`ExK9)8g8fun9`xJAEibM0$K;?i9S_{}MhD*lFIbHy2WJLWeo
zIUNGmR%hX;_k+;aOyJASt`aX~39vvh40oC~V)fHAU~bXJJ(#G<vernm9okJ$xlot|
z>7Eg@oDV`)<1?1ahVuo<OIcc~&<hi~Iif0aa`hh#FIBQ2^<XE$*F^D!m$w1O8j$V6
z?SzlDSm*JJ;&27v9QV8i>3OOocXoxqwo#-<etF#Ekp|RcV?pB-?_m43R(@^lAo{sf
z@Uktor?;o_IZcU=XtvD)C;ND^GX1llDL<2KY97Sq-aW~;j!P7gfigE@{a~8auP2(5
zeg+?{TfiPx9O54y7v3>WH(~JjE^gG6B+hAJCuqBDqyQnit#jreuJ5ZCKHVxbz`%fQ
z=~@r+&!334zMI7M2!4APX+!QotO^B`+QPp3UO2u;8n=iq@cG?oFgGh1-)hfetp|c>
zPWpCqJS)k%K2&j&f6K5{)i1b$#~r*@{UY$me*{ZaUgP>VHmu^4IT-|~fZh`xE^eH_
zw*8ult1*P%<)X{fm2zRJgFAXxIf1tBeyCl3TcmJC3o=u3`Mq-lZe;ut*6;BF@*YRB
z8&Ruip@ud2&b$Zb3SQ!-aq6TtY%TAIhuP1M7x|b>23r^Y2FYE2AU9cX2`*tw&%PU^
zo+-02mY?BZpe=hRbcb#x_rr(;FYeXC4|ukEI17|N#6BA((mc~hHuK#?e7JBHzLxO9
zH=gA*^hW@1;+YL|Eqk%hunh8s?!pZ^AH<gWUr<_iF!g-+0me#>Y@VnHK30rkwHtrH
zCuMyqnspbK8Rdfd&BM4gZv>NhsDz>{C5&IZf!sP)aL?0si(A)S<m11}vRyWP@OoK7
zId2q0Z-1<3iLWcLIJyr#DhIO|<-xS&{azR;?3x-vzk&SA3=nco0{1-+t|q?%@n&JZ
z))GgSQzD^n><&2ZC(O22o8icTMZB6d6BsLx;o>SM+B)5cO&NZRs}%=Bs=PM+k-Y@(
zo{gYQCz`p3qkB+hGx4VsWGVN(38g;$4d<H|!ar407EmtuZb}qb)4W-NzwQs}9SFqa
zbAr3};c)hLP9kS0vZ0G+f`{168md?6^Pc-p!S81XwZAWOAzD79DP;N68`VHz{w5f*
z%$$sV9)?l-$I|beTKIn7CT4N%Cx&}`<V{`+oa01mnq%Q5%x#4`89#xReJ#ObI^Q|J
zHHKW-sTd5jIl#X7t3vSpW$Z|m2U7<_(Zw6Z&=M=nRy&^*ua+lpn<Yn;_mrr!d=ZTL
zYfO!L?;-rZ>8O8cE13T}$lQK5!}y0|(W?0&C$p)Ow+R-`lao&Ieda0LvGQyzObh4#
z$W3Fu>Wf%+Y5_`Z+<-5FpMjCD9kiN0<)`#(k#s->4osUtRi#C6@zhAR>5C%QcS4P^
zYjG5qA@pFMIq=*ZZB{gTDnC!9jr(x@6gHiE&*^XBIS=zrF65br?OHU2rbkynQuqb2
zD5AU~h_!>8+eFs?Z!?tlc$2(B2-pl!r3KGS+4WEn%UAKD;$CC2DbK;z*Q~K_lQl&j
z+m8$6!a4VkLzv^h0kEohCV9$4lV08v(eV^-`dw^BKMUJ2qG}WbX?Am$wdB~pE_<x_
zt1a+g98u0S6Kw+~;@$mSxHI+u=HH*pSNFcbnPZjdOVUSl{<#_t*B0TT6i+gJYQvuF
z)S?~c#<(<LIhjOVtf&d~M76%jFs*Md^jtE5%Hl!{3J3tDq+r@SJRGJ)1u&(gC2(m*
zywLetgEi?pn30mJz*}5|9}Dt0wUm)GOnX1y>u<_E8RsEhCMC=At_~!bC2EwN`xCS`
z4P@53Dx%S+HE8A*b>_bPBV6yvrhVUD!J8lLSgtn<oa^?YxuF+YW#{vKhx^1XMq(W5
z{tr)hMZl~q5gS}v$A!(Y$3H>cocE%tGVfbH*!F8Sdp30znmshbt&XR0yS6>ux7X#I
zDud{g+#;;|9!6Ha%c-PF65FDd;g>Ws{3Q5zPuJ@Sj_}*)c%%|N+ey^7PYiLhC0X3_
z&s_HhMYNhB$G$8!#S#-ImL>5-@LB3W{}v?}DrCrB#vr&15L|K60!LtN4)L~1n6f~R
z?hP0WrK9t>jz&G0d`{>k8qQ^t4?PCyC8sMa{`u0>?w7zMhLDj*KRB<LNkhlX<Moar
zthe2VpT9am$*o9IovzR7dk28x`a39|sY<Ggf#P&e!Lp`fyq{l|=+|y(%q^3kY>Vem
z*&T~R&&!kD$cvzR&y};4I0)^T%kZ3VCRY^tg;^5yIK6QawzMZ=T-ItdX!wj56=sll
z`~-fHlOKsn&QL_+2G*8yk{{otLW^$S0JDYtusCr$zdY+Zw0&EN3nwc8PyL);Mie|8
z<<2_8D<G0@1PRXsbRTvCjBXESD^3oi+dCBqH=5Chb`!>{w6RmZSNY~vC3N;3OEYc7
zT!xi4wXOWZhi;dl))h<1##)7)oGI*JI8$L}3QX2(FpFg=xa-UdKJ&?Y&^>qyxA#or
z_VL+hq$vEK4g1U02w58Y7Zu!4-+5#pa3T`CLg49=zu;u<PoD}C*=&7lcIvo@J#Bu1
zT0&+q`GNsm+t$H5#t8YrY0KD~6VEX$v6SEXbQY`(l4Q4iB)FG>|F}&%Ply{j%-H6!
zE8xxM8s25w37Ee8KK4oE@*3%KEG@W^yT17=jx0Cf-VF7#_3#khcbApvS>jfD?QBg7
zE!wo~dodi@x?6acKj2;qyIZ|I(bT`T6P&Yv=0_tqolAl7mycuhNCy~oV=L9vnz5U*
zb5NBNJ_ic_!0cWKOU#jEy~3XK@9YumuF!uhS~?YjSP+ewuD~~cEf5&|dSv{}n$5F~
zV*U#Z*@PfFs()R;@!nP7o*U0bzbZlV6UFQkFT;)>96>QRCy}+_sc=))XP7SR4EGe;
z#NTd!pm*QETH`!C`*;kTD!r+pdKlZ1nF6ks!oH!`f@(|B=_q93MwemaF-ML+pC<HA
zJ>%HJuxWT@oeDkqsen2KNiezT7JB8Mq4|&Ma8juzdVGD17w0!Z>)uPkZodhSp9_ZH
z<0SCL(H!W$agV!oJ%n~2PQ%Q>#`H$ukyJ+xhUt)vR@xh3)FGksCAfsATsnl3ogsAQ
ztvNo`&c;}4X<GaKFLo&$!lKVQ?ARthoVay9AD3yxHh4wCL-mV-m*h0A4|ZZZEC1qJ
zpA&TF^(;<oKZ(v%_TsIlFSrt6hnEtZ#F<<RM}sIYSP`3vSuR8An(QFaNa1hfKc@iw
z8kLy4PZ1yb?H=UmRdahLX=D7J7|72Px+4?h`Lrh`wC}zv$trCGDd}jw*l`66p0tqs
zzhyzsxAE-m_8DyJazXa=$O}&mc@BEp&d`CU9wg3H75JB__)5q@ENIlnWs}3$!{9MC
zDn|qttE?SW)Xieme^$}R83v@a-iob?m!q@q%vkF5H!!bUPRJ_QVtj8V{NjAbzV9h+
z^%=)?K5KznJD%ebDiIlEXrkY-FevYS%p0w;q3gEiA%Z&tMl;hy)|N-;kFF8kGI<Pb
zC-!rrBkkZ<Bl2@UN7Ce`Ml5*gOjes^m|v<Q`}t@osc-uLOJvLhHqU%03<0*o=RD{R
ze+nm0WiqQA3tm&*ls^8*2M?DAAknjcDeiiW)(YCp=b+GWsJhOV2DD+(hQs`2&Wdf?
z(#Sv9mxT9YZ{u)<z>1X9MmXOqocZjsWCNrg@IA*%g^ZO4EEdk^)kWj!sPQ<_t}n^x
z{PGJZO_ayvci(ZOehiuKPJqhqd#Ka!F1%H~hIeaID?;{f0Jj0F;8DyAyt$zP9osHr
zzvE=6yI~6-WK&>N`$>Mn;=yFe4WwbtIWXb9E=o-2VfRrnhAhyeAG2q2ht&JQZSNs^
z`RgD&tMFioqEJr9J%q(CdMlQ9*CL&9E10dk3yn7Tgw}8j=lVo2w>d8X_CH09o5zJ&
z{Xk0o6bTQiEyQsKO<2BbH9Wf$4XNp8LHej2u5|8zkW5+po%s}o77Ogx+genjzKpi_
z*4ZAv*T=u{U5Mv>a<Jo?(EWUIMI_r^jn2>H`PHBY^It~O^V?H!sD=j<d`d8UTO=+O
z_Q^@VrZ5x3c91<X9yJ<9;R7uo(|4*c<E9AfP}rTdzQ<shi(KROV0^yRf!S#6rkt1i
zP-EL__V(HiFrE7vS^~!K1CPyO3&sNd^HrgW-axQA`4o#D-RBK<4x}9_F4VwJqC0(I
z?CVtrIy_8_%WZ~ZzTjnYtW<^F`59#<jR#=YzI-_OL;&_CttE3|M|@7Fm~TH^44OjD
zD^r_8pRVuX#&uaN$NT{1==|k!8Vevi{vR$qI|H+yS;OR%Eu6FTT_HQWiKdUfjNJ)Q
z{J-suxZSrAK73n&GLO9|!#f+kWtoHd*E~*XaSq)V7^`4n#D0|R#d(55pglZ`OD$bN
zMT_RJ{77#qu3>chqp+XrbD|{CROWJfCpC43<I$GEl-*Sc;ay2EuWuv#@i+<dp8nyC
zm-$lfLE;sRv{;qN4i^3HH2O{R_p@KKL{UOQV%xT@=Ihq`?pSZWX5;D|e$%}Nc`LX|
zg!?&o4~g`1T;S(q=ql-I7w$L1TP50FLQ+CPd+>R_^p=z8z;`jfeZfPUhzKSMbTj~d
z@dEDB{sp*ZOEU2PP_$v>HkioyaOXxp7wexLgjrjDSNz?IT<ga`h$zv)j3Lb;Q^TWT
zw+Ab^Rj&qsZs0h)&&s)g!g+k~&R9_E_2lf`W%+sW8oW%>P?Uc2lh2f1Pj>BYoT5ns
zmlyj<H1GF8-gxH`{z$4moZq@%RJ0)<&xR(5?RCp|JL6$^TO^NS$zj~Z<bg0o;waC@
z4uHFZl(~?UKz1f7m@9jBpHrUz+{;WGPI9LLTef7Xkef~9yIY2e(wE4f!h4R(Q7Pc$
z70z*K@jtlhrix-eXMew$YnCYg-)HA(X})>=cHi||{9OL``MLi8=QnGC-|YW+ey(;A
zeslik3ED|Wyu4V0aeH2JENKUO{S<^0h#Gs6XoNd+df@Pv1WtYG81hxDhRdUWqWoKV
z(y&XyHOgHOHdmiq*9mPOg*`O)wIO?TD2_x)BUx<uE^vte_Wf}o_MsJ~YHdU_r4;Um
zu#wyIzylxI$bdx%!KS2C=sM8@_ir7>8>O9aVniS|`pYtZ^ANVN<tk^pMTKs!PiFE7
zYhmRlO>xuUIJW84>N3~I#Vmi|Tllm|2KCGBnZ&0?xR>h1e0D_8<O^YB6db{KzSRKP
z&0@iPWJa$Co<`Sgh3vntHRvsj_6AF&Qo+wvY(%+0V><R93^FtauaHFXiR$ZE6M0ZD
z8o2S@g*vo6w+=gs55mkvg)BN}8hu<hnf(hr2|-&Y@qusFun|p<#hzQwGgtF%Y~3MQ
z$~ZBJe1Dend$esiPI&X!M2;iViPynxp)1Rm`~qcD8*t@mW16Xu#1F9R7uW3{NTG|q
z<8bqHxN<}njyoO;ga7IxxoEOeZAP@BU@`Rn(#N>mxx(ftoSBC&W*%N2g<1DgC>v19
zDcH@%D0Kt&=CKs6lu%^D^+vMfj$qFJN(y`SW->-~zT(^qFT&?<vc%3Au$yD7*xivg
zps1NgO~*zIyQoY@zU~9XWWj>FaweOVn~opbf8qG%L!860Bf=C!Br;j|6o$MwMnOG7
z)8<$rbJ}o;rmuTI+k}?JpM-~SvU~!`^=7i;Z^lwaNhw!9J0CmzuVB7I2y^o+g3;24
zSjUz{G&kIkWVTA<p{7Ru=e`KKaM_s2f0SWw&FpFP5Lxoo(`Sy$&M+1C?;sY8ZMTbr
zjZ<VWTpsL0Y_kS;+5HMUKJA4G<t6C&U7F>;Q)llwTd{TdK@=xG<Z9hw;B31g^SqD-
zmPh09+qpRLUK2C6qVW=a_5yZcjy8%%h+&m`D?b=fH01mpC{#)0_ct2jnSn=1x;KVh
z`y2pEl)s{Cq!kT0tVa5s?cfqpCEoPYj%}3Hr6K+sSXA0j=9j#H&1sInJvDAD;g1Cy
zCrour&yArgLYmThb|dSvG2z?)hEZ4XFzU(|%+Kw;w*6Pf(BSWV(33l!HkX95ElVXy
z^vRf&-ATiM_-3wKYA#!FK$=x&B*BKqer)oBgJ_qdNYR%LF~2-x%1}yXOO<$LSu~R!
z*C}GN+b7eZMrAg=UW&bo+QA}t5gGkbXLh!S=<U0S^o3W4<oklfG-Cic2@E{&m4w>|
zD?m_s(S)eKkTW&{*EOBPFLj=5)$KCOG+0JITpD2AB6Bu+PBc{gr_J^*wq)CH%2A+1
z7+Zg{1xGbUF(rK`c5YNUeom8S4PE2eG^caeli9@Wlpf7|H%?*)RvUtK_GMVVa4Y$x
z2$Z%2cQVk-qRkhQuvYI3x%{o<4{WStGlj(4f<dl$w8@$^ZrMrNz3K4erxU!GRmhzf
zHG$p!D75*UwhG2>bvQgj4P*Dd0oSO5tas09HhjccT>P*UkK8U4#~q1c^#v`g;A9eW
z^OGU9)*75*dLMfZ30qFhmGq)+Im@pb$clAN3r42H>}ly(m?SjbDyz%TL+v<rwC!WE
zX^b7Xe;SGdQ>dd{1C6hSlr@z;=0}Ic)58)C;iNN+S$S^f{5QHXKh;cu$f3*Tgw7Q|
z5zc&N)8skZRx{>wR)$?sn80fL)tK1{S5DVFh}0VEu(9?oY~JM$KUNK-*4d3%arP}t
z+t$vt=L<BXWrY}VTY-9C9-;3o55x|M%b{w@NS5Vz3=hcKGQB60LE-N>id=YyJzDk-
zeqKJoeR?v5c+I7xwUgr}^bCNu7ytM#<JQoJ5p8_F%r5r0JBtm^34_6{vl$<+!-9P?
znEh`vb|yefN%t(ctd=nHAFvNPljo84Qe##eRtvk+e{mk`LfMeav24htb1=AB6E3(0
z)Alk?G7dOFhegKld(<c<xxpNji?;IBPI@d*G8Y{}4?t*WHyjE$O^fcRuv)D=9KFhb
zHLKO&JNIe0=Y<O|j!a=wekOC34L(e}OohUiO=OWXLYa1a4G!L_$7Y$>Gee6fTuSY7
zvQCg;o+;y)_J9^nHu*Nwlq-fVcUM^3pvLMhRq%t|PT|7lPEk_cIy#{<mvzilq8)FQ
zsC?-N@%n`V5z5#KmmAL{k1=xWwXoSfxBoK?^?ZP9rumYycO$xtJq6{w0u}qNV@q1@
z!)P_(|J{*U?D^5<v?632y_ofkyHj?C^XhLFoz)DdmhWMlrT#!}jE*bsv*Z_VBwh{M
zw0^_-N>k?F+6{yFc)D447mx0p4rhEm;kkR^_@L$j#_7(YJ)3t^O13$jyCfy{4|V24
ztj@xZj?0ky!5h0QEXlJs65rl(WxJvzaYbJrNdMY_`AJ`3$jCbW96JaB^QLl2YDyGo
z(~jQ9l9+qaI?Q7Om|LVId+=LCH;2{XoHy<KsgAF>{L}+X?Cgh?IUMZ^F65*p+6bxe
zN(kOpjZL~5WVJ__Q0GcA*T7?Fb$J87`ba02bzumVFPCNq?)b1DqlYo4E!~j&a0JU&
zOoW*G)m)Q82iL6=&)Gh(=cjJGizB)>L-oXm@W~;De1?x<PEVR~x|$SKD!c}Xy{>@5
zR<Fga7{b*$Z1l$OXI)`!P-p)O=M4CT)4vv>TV5rO6i)FL+vf<4;SjM!;1I^2a%Jm>
z%d&X_NqFTbdt4lEMSXE|@lsPPn^={Jy6HR)&z!<8YQMznK^1US%K;Y)r%k6dZ{TeA
z9hf~f2r@<`qsOISsJuQ#*dF<y)RtndwE4E!==~5nT;ssPYTTgqVJCPzwPz<*Hp7WW
z%IyB$<?P$O;k@GA7pVPhF2-<U#8x5Z%w<mpw<1iJO+Vv8b$>RJizt+>+OZU`otQ__
zpS_vFVq-AxRAwcTVIXanz$wZpV29#ZI3+ZTKFM*oE~tlVO$de9-FNv%!Yw9RF_@*k
zHi543DZ&(_9kd$UsC(2UwDP$OHgh+y-Krw?aV{{oRkF+@un?!3tf0XsqcPa$82|3{
zT*^^;!ev#LqEhc&QHt3pwrSOMe3cx;F6?;=_ss{h*Bf@guoHvW5yuZ)qiPY_M{mOf
zwJ55)xC{=Tjo|Q23c~VcnAnsGa#@=>Rf%F4dTJJ2nH3Ih8Ftj9x`R}H4?v&gjre-!
zK5zqD<_6{EGj%5k35224{Ch8D2iC)*##Hhu-pOR*WB3mqsw~7l4P}BV;eO;8=##ai
z3mYO?O}-Rs70ydbF6H30k7L>V23?VRtt@F;tJ8hy-&pP=$(}u%Ng`zp`j%os<yqkn
z?O@CH?bN0pk3!khveg3FNeQp6n#f|O-omgODs+93CtChnB~TDDxteZUgemnHw*CQU
zdFTaJFBQnqt={~jGBv_oe}zrLZ06H*gf&$Q%@^-_cy5x8iF1XvLiJ?$PrDy7_Joo}
z_b}4DZpBsx-RC54N>aK0eERQ54138Lps8#sZGO6hQz(eT$sa`QxrRXXd+o@gREOZ@
z_)%={XFZacxdL=Y4g&YNb)s=WSK-;pfwcK#1go-&<&HX?hbKE?*u71^FgErHjQls8
zlDhUWr{i^S!E^@eX|x6L;52MhOM_|+VX8cNKdV(bkN3n!nBKof;(4XVF(E$=*1Rvr
zjr!-g7v7U8ZB;kQN8EuG#g3Tq-vptJH4HSji$DRkkXyEp3b5(K27&Hjr}-E+T@X=n
zpja@>_oJ=xd4V$jmAf*1B;9YH&NgkT1QX>L7^Pdp?g?Al2Oj$DWays%$JLvEQ~gGN
z|7MCx(I66$WK7C*UVDoMQ4vvODvcUV6{QHtTv9ZVqEs3+kn`HRL4%@s?yb2r&l+g(
zeSN;`{@g#@|A4ieb=GyRYwzdt@qil3iI|&cCq(9&)9^1vux-x|>i_yJRbJI4Sz8w9
z?wQDbJ1fajO$oZr9?c)U;?VqH0?vQ80dL*DMn#f+>q7N#{N|{Hsv0u!fx~Wz0(t;H
zer%yP=Ld5^oGmKO?v9R6gV0K2O|5HBf9`eiITQ`(4W}MhVnzObTDT^Z_jk^PzstPI
zbe%c$SgFm+{WhSL#XRv$S1<O;*@g2qZ^y%0X~GA~u{idaln^%aM-5Xi+4=$A=(<`P
zZQmpd^51S8l)g!@9k?HwM^3@x-42rCkMr>FUzr#^{gnLk%S=3OJ&6;Vjj+R>cl4sm
zEYAHTH5E?hQ^J|v{G|8*Sv>tIn%#IyM^toh-DXcbQ@$15mDO4OMj1T1bdHV()k4Yn
z?tFdM8Y*!61N+t{!-V^-a5>`?S^d<Jh?ilq*5|GAXC;@(^HM8`mWCMc@d|jvs=@+e
zGoIcxP_m#YV5NBu{&rAf?Z_G2+-Abs>K1rp+GBD$J{fKLz5)w{l@fu~4kn!2$*RNE
zDAWt^)O97$Shy9tehe2b&d-$hQgPvg=u*jYc^UeITq2!evpGU<CI9->4o6j(;*`QM
zDPgIse~X#;A~*xfe7?gQTXU(6ppA<b{S;kv2l3ponix_d8BvbBgY#-kg>zQ&pPT<^
zWBD=IctbK|&CtNsZ@cNqhF%<TcPzh5Gh_G0L3rI@HTTe;#TkBQ!0MGge7ohu-R|9k
zeT_kQWoACk{L>8<UHt>Xwh{{SU(DmrTGENJDfDc4UtGH(52x-N&t|J@XkF|eRPQOm
z`tUr8u|6*kFrUanGP}~U;oDux@^;ayC$^BJSs?_j8;>2nZ^OVx^Z06B2^|R82cbLC
z$V9FoC9<7ygY97|w6bJh%N-o_cou9lU&WK`UGe?d$sDvQ8qZw%NWIl-_(OgkXsHBY
ztWhGLKXMSJKO9PZHY8y8=NaT<rpK!CHj$$)3VQ+);dItWoKyByHj?LX*2X$W8sQHS
zWqWCe_C>*0^NR39-w%czdkzCS9fkVIyYNnjJ2DIXjcgLNKz=k)6rAoqhqKpI(QWPt
zx}W4L7&oX=WPLW=elm^Mr%4o#UzaG@`z|>C-Xb%&po0_Ft8uvcBUpT37yfoT&YYEv
zUGJTs+jb`*>&{8B=C(U~9jq1Zt#TB<9{weigpcB%xxXmTZk=$uO`Vl5`$*fljnKTQ
zH|WJbg9q8);Bt)-X1y+lb<d{o<jg(fE~QMD1z4evWhMsRn1z}xMwo4A2Ok!D$ad?G
zm#B3Q;m;%+eAIO?jd?H-N33*XryE*SRFTWi;!l$Qn{b?bp;d_XxhG%lSt;N2C<9%K
z{2BC}IDO|2+81VBTUH~Pat-rv;@1OExT;oO^k^))S{#FJuJ^$<eIk#PN$sf3eaLK6
z20#6;1D;P#p&M<-h@><?W2Fvf9MctDWcs+NunX5e3`O@9F7$An7CusHC8aOt1pnTC
z=<)IvYzsU9zecRVXZwA4cf?IlNYci?vuDx%2gUq%lpC9NY7yr@91YLB6(PR#6$vBz
z3rkbFVr0o}Qo1umnvy$l-p0YW?Vc)pi(JMH)9ZztF=@OvzXKc&SuI=sZW_c}{0IFc
z3+z$N0x>+?3L9rAaF1RgXygujdDa-Iy{p7s61~`T`BMm2Xr?_<((F>4f=uthVz$|{
zf|u7;iUZT4@n&oT*!Lya{&a<CbyKfaS#=5zz7&H`m$<U~KM{PTNq@On2R>BS3E$Mu
zqZ^rjV2j5qVQc1Z$i1C|ljk={WchO1y7(^Lkt{{R;|@IbKpojfM{{k(4eI+y8MQQH
z@ng|3`t1H$&|mVBB9Ev;RcZ*GTr&z!?6!yX9~xlM5>4(OyoI{rV9c_YHjm?tQO|B7
zFJ1US%vZ4CG1D}$o2?4hbvQ_gezU~d%R8mSNsD-ev~crV0}LPaMm*)QgSy>*FATY@
z&28;j(v+-_tNXs8^{WDeM(LgW>}|5VO!~XCh71-9R_4=RiL!Y%BEn_IsAT9EDWx5@
zA16ICxoG>w8Fwp<M-6F;eeTu(bhtbL7jMbs4*}_@d21uP%nze^XS(CTXStAn-vzJO
zRR|elN8^sH9oVb?a|j(_4p~#@@#2yI>|gkl;(O>|@z2gY=UE{O+1jKe(Z_op?u3K%
z{-Zv|$<)KklH=0+xSOdRz5FbZ(Ib*!?iW$WX`U^tt5_;uQxwdRgK{}3PimOUdtloe
zYj&+NL9-A!Sp{`tm+kvxXKG|{uK5xqw?CqNT^@kR@m#E$ln-9Zd$C4GL(~X<OR8^{
z@}xhC>_4{!rWF>5DG7(+y);i7qrR60+`Ir2Iv*4AIb9gK;VC@(3iu<^lYd6$utB6`
z-_sg^L)O2c>;hK~E$@SMMT2NwmxXZXRW>zrPiL2J`C`J28ezoxK(5UjkM2<k*qAnj
zhlls(-TQoSyp|c~Ug*ZAiuxRpbqZvAir{9~U&8u>-n6D5lBIwyUou%k3Mo=b;CvV+
zD%=N~y;JyorwqC{Plx3RwV)N`f~S|x<<ePWxx>(9j049CaeGh0u#j(1Jv$RGTi%cj
zuy(@%L+ZfbS~e-TAdb&eX8Cm$SmWClucSucLCH)scl;Q>|8_368&=WF^nYT*t}dAL
z7<ux#ci`$;F0^ItrNnEiS#kP%&^+=3>}N~&wvfd_gh7tzux9`|iRo}CryPt&4Fj9I
zTKG>K%NMM}VBNw99Q)r3NKE=6)=N!VuP96IlW!r}dyC2aq#KXO9>7L@{KdKvp?IrH
zAVxY1wL>>-7p!w<bF<WLi@RHjlPgFFI^35R-)ex;j;qB3-_7AslMJj+o1sR^KpcBt
zO5`t`%lAu{a=*9BDZXeekn(=&6S9ZezlR9vKNn+(eJ?f&N(a+RrQ$`&BGC2Ra~g8R
z6jfZ5xwhI7ow~<}PlpK<`BnpFJv##zVjN@>zGQ-Obr|bdhrnqmJ-n>wGc3ON7`SpM
zgq_|AHiK5en|+(;$KwsOz)TrNRT*Hvj(dcU5#w=o##7q3s8EQTI2yH5bTIS5RCp4v
zjaoJlye~0<Ri_L5H0i9ccy}9B#23=u3o`b<eFN%Z08>Bpz?x~#sJ8cB8Z)UInC6d!
zt@J|hUXg?Qr8BwGHdrWH5W&$+0eH!=OtkA{O@?w04*%_pLURu`tGGism&f6WtK(tK
z4QKK5%<C{;YFqYH90Ge5_@nAzU;Gwq!V!zju=V?3UZeGnnq%{2pHmFj=cF2|o{kZ}
zDo7M9YeP;9mPuRmHNu~`N96YH9`*2_j<VKvT64AoMvmA6Rbf_CgA(amcNo0ytd4&J
z^RQ|9E|{CQo*Wie6YdXTpEf=I>6Jk%PukL)*HhqDl?I-w)TD#u|G|}qG3YY0ghqb~
zq6KZPVz;L!X{*w6veozmQHNB7Ub7_Yl2ZjwyfKS6&v4+x!%p;Vd=UCw&?41+4q&%2
zf}QqMOPjs-!oVd}khCQh>R&0tQLoK>^werRKF|?+g-imza-OCSw&kX-qeyFUE1cBQ
z$MUc+Zp-Y3KSL|zPo?C>Lcf`OM6l;=5tDG)f09(Z=PSu(jda~(D*uZf&cy{j5Na}#
zr*2Tki*r@z<;GAlukFL<a$mvcms!GAQyr;kQ$oic>0tL!d3a`Cv6wj56{0Ga)0@kG
z#ZEFMJZKSx1J4KY^5tIW-Qln_16eHpd~zKGWCr24t8c-&Z99BjG@c(`TT5Tmy*YaL
zA{;3!fw+?oh07OhFjnzBExU0{P?b{LRd+<HR_Vq`#;W+HZvstQHIZRMG4y)f;(Q@q
z1yx0DjQ#S99;$l_kAh<<??6xfdf$R4tRF>>Ovga7l=d&`d=bhHKcmb;yHGphG`-Tw
z<JRuO`Q#ZTc6xdfTzvcRix*PLCAv3X$u@@Re)V*-`zUD^D#IFMBltA%xe#TvOPFY8
z%6nF{xcnL#!1_zBL*;E{eEswyDb<w-0sU`7#}0wi7TW{gkMKpqR8_8@+f2!pKDcYC
zA*$qc=3!c~@{3iqVxo2of47(@?(RJcy<{;MHOz`{^lOG$YcGq>*0&0`w}s&m-L-g6
z+8kC^w}H{lwQ|o(ari)E1qOCofphPyf*(#^SX6A!v*(TGZ(60GKk7bwza0%)l0mrP
ziVv@^SCy#C?a(kdly{#<gfP`aely4q^YZ<~@t4Qoib2;Q>{1F|Y}CMKk?Z08pN<mk
z#DZNPW^nFnSG*`S7S^BKEiV7EoZo+aExRJo`Yv^wz`0uzg!*A#xTp_EO_2?f^>mf6
zlse;D$5z4Z%`N(_kVbVoj|-(KenP-vsd=!`9xa>FK*vdq15J;}y{@UigiQum*)1NH
z%`TR2OU*%X?KHgmAQtPTgwmNkrMR|zF_>3p(D}+ebZ6*1++X8FPcOH?{pq{tb+Zl{
zelz7%DY<iG#WPy^@tMrtW(PP~h1I6$1f$ZSbotMIdN4J@kcKQZq_2;Cd00VDyn3P*
zX0+I#)rkOZe~={E9V=+<5{W#&Wv#eN@it9;Uxk*-qNqG(Ij`uw8XL6J@cXzFie7L~
zEM8Rywoe;G%@YeL@#QyAA5=?QOom~5VFNY0s^il5o}_KMg06?(mgr=97)eK9%4Rii
zn>mNytk%Vw-toBVy#+q6Ek@OuEp&RoDNsB$3HFz1)4I=H_(mT+4oMq`jk|`kYX3Df
z+{h544yt3^oTG3%bRk^6e~eVi6M5VVUG|OMPA<`psN*_!y#GUnZS#BZ@4`^_{PvT~
zo;8BX{IBAtL-lY#^(=(AKcSt{JaF-y=i-lsZ8T=$1k}iULjJdhbBpx0y4xlZIXA2F
zH??#0-^MOHyF(;vbTP$!Pgjd|>o%Z~L|pu$ID=nhI&il>Tjif#Si<Ak(*4x-1<bWg
z<q>CI3e}0%sP_sByj1&McqrX}Kkj={`)&U>GFfzyM!(3Qx*iAFeDng?XY9@^{&f<*
zC5`4U+RGuyVFn)255hOn_p5`!C#be<hV;*F*z{m68*bYTzkY5Z;nfdPwvWQ~^7U{*
z?17d^-MHnRGtN3t0&@cHg1pN>d{|S(hj-}XQE>#`s4bGd!#k<(htrUq|5EV2VI@xY
zwL#yPJB7VJv{A6nBb5@sfrtCBnx_%}mwjKX>|=)+V}s#c{AzyWm?U5QML^-yZWj~(
zm3*L52Tw^9^4YhRIyGu0f#n!ad^!3B6|Kpp^LzVY=IDVO`80<*->m}wmaaHSYO>FG
zEfHLQug6YOyIUp27mrxH6;AHoN<G5r<!2Xdr#t;((C(W9D>lSHijgt4WP0Fj#av;8
ztA~^--XNT9b{C924^Z<2ZJht%E&X;agVmPz>B-bD)M4`<VbrcJ?73tKKTcL8?V5Ga
z?(8e{>GeTau4%)OhT#%@`7b?x(G8c^<dDk2!Q4`8kJr36*9<?sUTm{!0d5SzJ5mE<
z%iY~@ODq;*=lh_~y-;@OFp%aQUJEfN=h1`wC~P?~8GE%_3l*ESv8Gj@r=<=@W0kLh
z`@Ds6-OEnm)(yMaRjW$YB{dLbqGV>WP{P@#WAWv*!8oAr3%YZ90(N{8jdu=e^BVP+
z!W>5voH2A5?w>YNY6oOOuw>W1E$#Pa59)<C%pC#W4#ceWy>Rz`i^xB;369x!=E_0H
zE4Hq}pVEHvPjLvN-zsS*V~O1#8}q25<!t9ZUA{2#p0IVp8vIcEMChfG!e+4o&b1y5
zRo6T6oWR2n7&Mq8A8r-~O$!tZRwuGyCr`TG{TBJnl=i&W?YUFIL19a-4{8*R!h$9b
zeDb^tAGP0unF~(9GxZNr$~^<Tb|&+e)Ai7_pQya;AN>2;6}QGEah=;7%Dc20lvT$=
z#48UvCpDOLd-kM&JA24ZYG#?w^_CSsbQQZ`PhQ(W6-y57<3lUvz;~xjq@Xd9|NcBo
zuk3eFu<a&j?&!e>Uk0O3RTZ7y`I-EaYIvxWir7~9)MZ#{e>#`@hn)J3B%gI->9pT4
zj@agpMHM>iVzFLIEag*)*q84IJco}rvbpH9GIF>Hwka61`+_6%YQG{Ms;rSLFOO;S
zZn==Yaw$Ag{sGCI3*g-!O>CW+%+`i)h15$o>1>)cUw_aM3tnq-l&@rGklJ6JN1ue=
zv-JfoHRG*AllX740%YlL!O8Lqu+J3nXO|-R%Cwr=AIiDhk|NQy{FUg-#~#%7$B}Qj
z`fxzhcF|#-5=P;BXg?5!mV45ub7BP@*wO|&`8lnza^ay8rE$8^N#RYIGb|}hr`pHb
zJUR6Ug&HdG()gkH{@*CvU)B-qS9)Roqa3(B1bL%0Z_WA~h$+8r!2VScv}n_OY&qYR
zd#4|wtUfiAb$cLcZYhB<SO^`IE9lqe58xeMh9RNWm}6(mjRC><g@$w7;LFtF)1QOK
zW}u2n0;Ojhg|(d@!jt12m_8)%=<H4SzI7rip6!P<cf)zyj~u*r>4@m?#+ZAqvg5Su
z2)vcv0-a}GfxzW=MIG^+Y{yz{)^O>LqsR8;q~-JI^OkM$z^-;YUr!aa$J>&&=YH`_
z$`*PuXDoJhkKs9!Iz#ldB8bWK1;f~7@}7B7e74*JO_DO;^mz+3s%eBF?sGZ(Xbpv~
zb3uDEZ^%=S3{kiLgMbGEd0oOPEUm~VKOJLKlFX5D=!D85x?Jn+$E~q3Sbguki$%*5
z`Vwsq20m%LWn>QSxG&jI!n?@y%)8>lu(i0m&V~*AHSj-}%zF02=(U9#23-lpm1gJU
zR&U(Ve!U4+X?~$ovwKJszUzXkZ%;J-l>!evthwWQ>Hf4sYAU^Lf`b~G6yX=cTkL<6
zcgPx4sUC-?Rp!FzKS~tpW&*DjlxTp*CGzTekW@c-aE6XMPs$&TSDxfUzdj0>cSR9B
zhu1?{o0-%gAmC!Dz1yU~FG~Nwf=gZ)D81XprM#kD?-YcjAv?vPHOIhQuRCRG6CC~+
zg@d9ebL8S`nEz!9#5e1JqD>qwJCV<~P1+zfr7sqGo}r-KuVKcv3Di11R^HI#zUcVy
z4k<Vf=C1FHXsTHZ-pjRuF7YYUBJC1O>!i0$gaQ_>Euo<J(=c$?8}N4QC)?rd!Kt~<
z>@d}kyvQH7*SA5RVY<BOa4H+W%n=sIN1(cE3hyw8;hiofT;JS4YoEpAFLcCL_R{&d
z+KhcFJ-~HZtwcr-r5!7}qeIzsatMrIebs1Q>)cNF&r}Q3y?gNP$5}Y;`v?TX6x>qf
zMRqrziV3aZd^>I{)1VWe95S2i$MnJcUwT|>^oIT|)<wT}jifwb8I{{>^ODcDaL6kM
zw-j|^=eU)qFl&=Is`U}P&8xtn+zod13_+8!b|H2DJz?>Z8C2*w9NXLVNTZ~fj$Tm0
z?-kylpl!yhE&L!OPZvU)Y*|J13TWItOlH!q?x)`~imvO#A(HXL|F<KpyJLu%`2xkC
z4#Lr+pTcn+eKxwXnCA`d%WwY{aQnbT!Vb@G6!NG)_L;bcbmvQH@CGH$|MDKTJM172
zjXZvybX|n#)o3yyA8P6YS+}=3JJ@d*b0s_5n`~2Dvb8^y=Z@gK0as)n^8%&4S|{qW
zKT(XmwF{^F{-f=~eaNkOEq*m{<kaxtT$eS7H(7?@rm_M&S2tT2`$v`LXh(>J()_&0
zw}N&TeTSzrRCv{y)!f^$75ZHqj5DP@`pP^L?zvo>{(WwMgdIVck=d7DdG?YBX6Ipz
zXQPM?dGx1p5zqgV0E36Fq$$m{bP(-1#e5{mcUVyHRv#W<;RG)(hHwuZ51d=|7VN6-
z!7{D>*gvThJ@<5lgQB&#p=kt{{;}qgVIKVSmJhaMhtp>rXI|i%BN_!J;dIw5!KXM{
zFwlPw@5c#{a7_`%etIjJbT$a)p2;kzPQ_5S2{<^yiSA2u?Bun<*z=Vl&%CmRcTIao
zf6ko|>l2Sa!MX?Z{gM_&zB^9m9|fZIwR_M~?oZBd#_`nv6Fix_2`sm`;DrNYctW-V
z`Kx*0C7n9qc>?mm+2grvz#KmPV*y*|DY5=;PYzl+S00di8?N5k2Jb9n`15c_7<Y9I
zB`Qk$lQLT{nlK9gn~IzoUqyW_b@@YK7<T&l8=f0Z!0opjaKzo)&`V)Fe|)}=O2w{R
zYaWB$kB-9B_ksA@_Z%&}E&Wzd9-mm<3pei7=9AJsX41~CIPh@+%7-gp?3Gf{?^J{E
zv1t+xnQFlcJN*XJAL@MMs5-_LSfU2{LAM@bc~9|rS)k<_{G45c{jVIrzS0h7#^?DQ
z5!e%)Dl*yhQ#OrrO5xa`O?Ys=m1OKv<Pl-+tfu%3iyDuC)#t5fkmSp~`zGLTv)5?a
zU25Ni8sYX|J27tS2yoeDDbWm*`S|+&0uJ=%UMA&mX0AK`40tGd&)dP`5f_+KUx#<v
zB@1EWYAU;ZReml+%4wADhO-ZYuu?%;)Ze6uYxWdFYoFs{<Js3xRXq?Av-+Zs^Jyq7
ze=Q6Dk%Fyp9q{nbaX3w}AGR9$&?MWgc-;R1czgaQ-k+6CmKKv~gYgz&bn;4`?X{eD
zO}ZrRN=l^@rx%j>r!;7KmW)G}25|pU5i-{2;=5l*;l-2!`rY@5Fl<yCoUQg0jHiXd
zc&9*~+f|ba?iUJ|GBX9!Z>B=pXEi?VqANciwwV9ZX`zv_wYXvKS+blMNCU@hp_CV9
z?Dahj<(XNvRNsexE_(?TW~ppFS|Y3uE5aL7kAmL8Ch_aAO#J@umu%A90Q}{?8@^j;
z^8QM7+!g#43hY&Qa)LKJYg-Mu{)12{F&39in}cFpZ)h*=g&S&}`B&3H*$0dHQe#ku
ze_zUkLX$wQ6Ss<22dHrm%`~#@mkpmnefjIW)3{<$I`7-*EYz>dMrXs0uI-B=a6!{F
zUg2>NN~H5+%{DJQ^1dU_IJyT9uZ-v678~)kOB%GTJw^{pvsg(bRZ!bBmI`-xV}q(O
z|5cjKjUC<i#kRwuRlU?OyR0JY(~gly2Y2Ak4;`|f;lzg`+hObad2HP3#MTEixM=GS
z*mbsr=7n7Yg|;-dkZSav{{iO@Jc>D%Tk%2UbIMUw;f8iso@HHeVzcm;{4x$e<((At
z+#CyUF3#hgrgcKUyuCv0v1Ba0*DMddqrf>QFNoh8#^R6A^JJ)A$X9emN`!}vqW0uj
zJY{zjBvfvtm8bgQ!e71d%b$-FRF^7VH0?-5hB|oXQaeRGdF1#kZ5G8HA4+P!_0Y)6
zfcNFCqv-ItG<$S!9{;-pY%Is&>(;Rt_<R|vFNwvbiwbP!WG~IRGH~2_Z}H{VcVbeB
z4#yRi%01`zB{joYcy3QD9_&3D&-R%^-4vX`ME4V!kGJO1gdVW&VuskOlOGSuv_<U-
zdvT&~s&K*dBQ$(X!GEo4q%2$}uaM6)-EKdfjmlu_tKGr!-Z{wL){|Rpj?ky1mEyOo
zdTu<^AG(|wPKK%v;O)WneBg#IEVeVlnR7~L{uCRqoN_>D7(WGnC9R}Am?PWlnJ@S*
zpTToiF5y)d_R-R^d`dGL2`vjX@y{(?&OEE(qBmhECb--nm-u=7;BLK`zFL*D6inHo
zyP3;V$M3YwNG?1~nac}rb>kU-QX%0`Jdc}D3U#W$Isa-zW1Cr&zgm%t3kC8Sd<yd3
z`s420&eV8D2S4ad<S910!L7d$uRSY;EDXkAgRi+*STvchom$8PqW9oUuY4Y)yFjx0
zC39@NAJ3aIP0$$DRrax36E}62)BB;<Xl#2rzq@V>=hkbp8~%n5d-U+GdnL&CeTR{I
zU8rbpKmOA)fQOvg4*^Tncv5!<^m8j?7j;uMbWov-%f@5z7&lzBKuYlErO36z6u4Gb
zMVbYDqr*uzNPmcwBi*^2HKXh3OSL7RbD02{`7Chf86f`Z0>u)^tm4gec(`vGss7kb
zRe!(3gW<1W{E>&kj(dUh{Oc8RNNJ<L(>BnerN(g6VIpfEuxE>7TG%aJ9e+G}M;#;r
z%Z8ABG%7<B7GZx39-vJZtG2<y4!*deW+40R?f^o4f9zIm!D=0+3Og+qalyMkP_=Ck
zPw)L6)K-sUp;HrR>N;?zcqhEuW`P0UXJU+6JIx<E6dzmu6xN>~Al;Lyg*S8<W?R05
zX(osz5f$L)8HaN|M&P=s>F}SyD5)V6!19rTaQ}50+f-i_N+(<;8^w4EJRZb<k9%<3
zyTN$ZsTQ7DU84)LW{BmrnP|G&AEV#gCO@-WxcejoH%NEaRcB`Ke;W6w;iVaS{+h&z
zO>@a-<4ECT_YzJoR~JW$jkIFYXe#Te#|!RBv#tDJG%R90?`ZFcUkv(+otie&(XDyh
zd(cJtA-&U-d#~qldU{mz{)6nZlpWc8%>u)stZ?L-EKnH|!Xp-SK!u}6;BZkb{XF7G
zi&U<Or!SnMfh&x;_FX+ZdmM>9&OV~hpnr70XfDVTE>r81>#}u5hVb)NHm@xxfJOZ?
zfKx57p>>T=x8%3%MV>w$tDHw!q3$%i(Tz|4*I8;CnPJQmIW?|7L{nnBpsIHf=j6sh
zrffBDm1Yh@q<yS*4{bg^%L%OPqv-Yfmvl1qKG~i<NjaT|V%ijC4w|_JO8Pd-M@B!R
zhlQOXD0>Eb{r2Ir9S8EK552%dV=52079`X>?N9e!ouK)ThH#j|N4VFVfVB4+BtFn!
zhr|LwA*Y%;o%%@k8f@`g_-H!fxrM{e<zVYWIih7XRj0dRxb<OtFkw1JMr-i87HMB`
za5`QR?AW1eK3`ioh`%m9N`>aG*uQBtKi6G=GWSk6W~4;O8NV70*VTh#&t%f?{#Ne4
zAdd?Tcc2%ckX3V1NK8(~_-~Fp;Hn`SnysN%1rvGS=1$mY;VarSay?H8x4?@dVmWA(
z7H({~M|u*8c=J@Lk>FcSQMTz2meET756i_T-3!p+;Y{vVO3*I92D*l0NH*61N2J@M
ztSuAU>KwSkn@w<i!a^vTsEyMO45#8y8*DkNBG$Ze#N?69baudaZazAX$E54y_?ht-
z@8Ai0PFwOnm2=>;lw==-9Ml~#n8Vzq{P6}Wnm9wET?NS4Y@?~La6&H_;*kyA-v`jw
zx9a$)XB7YTO66gBHNckVVX;Dnu-YR8&6HJn*}aL>90913TS)4PsT6e0K{T)YPM^Ox
zpysK+wEOW-%F&bZ4ujK$n9>sZ5aP=-H>`z?)d@K8YQ3=ED2>$mj^clN;<>Eao@>md
zmUD!ZEiK8!h`IyNVSWy_*^Yo|kDd6I)FfH!v`-A_pv<HDO6Q2OWV=rfV$Gnjd^N!z
z=e&C=vnxM>{~p-k;^#xy{n=;1ukx;NQdvf2_RneGSx?wJ`Zf$1b&@Vw4&jIg7WBKK
zE8gDX4*MRL2}Z4B;LWxrcsgu5ZJB#tD3ARFAv>mHY4lq1-8%?6R~F%4M~T+j>!0&~
zQjTGK56S#8XE3(CZV@MZ%z#b}=B%`>C%)b-nO#m4fo-&k_`C8eY3K%^)npwWwP+))
zJ>!cr|5{?Fi>+Y$WgshR?4c)Wi?Qp{i=bWN&ud={z|k9nsSaKFO<5Pr>C_vaclwW>
z1UtjX5NQr}qL{G51{^UU5<@;62aCqtqGj+Bbd5~IrhOLZaOr~>U!w`>f7E4ve%tf)
zwF^bBTqEvcVnEXmOZy(bZFr*kDjc)(Lan8_IP$b2#rUeQp6)7?Pktn(t?VH9nXIE#
zNA#%Y3}qbocCaw<&Lv^W(mD9pR~hZaS(yKyG8lF@;i&N@((Zd6Y!IaXzw$23-Wy3i
z6>|C(^cvL1Xj81~cj|9m4gR->p;m?hZcy>Yru9zf6&{M(D<lif!|&AmbSo_Fpu$^w
z9-vK%HlVpggRKTgSr2a|zM!0nt!w8~OYi4!cynirvdN%E?~$x`$DQxjro;4eazVCL
z5x>huvAih+lXMPJ|N5R-uh<PPJ9)FJ*Drc@U3yE$xPi^XSK{@AQd)KLBz>Miv^}v-
zY-qX#YNMsMqop+!@BRV{<3@44!Wo*o;2+KUy^_w^NG41p1Bsl!g(g}%@`VR)C@07R
zKD?JqTWJmuCmw>jo4+BvxDU^l=zsIK4WWZ~1t_gd;@YGUyiw_tC{OIk-Iwf!@#TY9
zU(1*`sVw4*gR7x)%NsIj9|rAtO^|!V9bF?@MV%yIMd2I-B>of{4cuANI7aX-x+iuW
zG6Z+ubihk9ywUx6o7mf^jO?oy!mzekEQL~pvfmE0?8Iu}meee^t_kMP3!`LXlVV_1
zm1NU;+!eR~>x!En?L@s(Ik@XfCsaFfSF%wZhW(~BFf%`t!?pyX-u0Q}7B-eNj;XRz
zw=sA!xdB#pxeIzhYcSyM4tjf=T{Qk)=fiVW(|_A5;Z)F8d@@3pT|&*_UziOQ<tBmd
z@naBcmInj%mXp;I1GrJ+jqh(hrtl?Af_aDyxes;V^t=W9F=UnK_FyRQkD7>)AttQV
zpvIAjujs~XExG>3j;M6e5@!?*#JmGz+0l3q|M6~zcAE@du_lYfgN9f)w-g%EG}+c5
zm+$Ti!uH13a60?0IQv6^s51NlS-Tiw^|7^hR-prXs8qs|juY|Gc0E#@GMb0<x(3&C
z6ggwfj}uRhdWw-nf9O<p99q?zVpGUW{CrdTy{c1(CyP7qn~%MyZ1zi_^4aLpGy$^T
z>9gOOo1{eR_=D9@u6BPbcpQqtu$iaDgpeNiNO>Er=@yA&K5Ov#=qOyXaw+=NWPqEC
zL<Y38rW-{GlzJvvWWRF3xpXQvI)!to-!fiyCmnvw+a-%zYK7)&l2~O`GGuk^fp1@Y
z5cekZpgX}Ih$^)C+X#K!n6poCoU>DisaN4tMRm@NS78HpC3LaQ;q)mW&98j8^O73b
z(v%mnNuv^kiko%trg0{o{56{=D{PkA+v=mn#!|40_du)XQ*o5WQ2ctjBae7Imix`<
zfCKE@VZvW?q3G5>Sg8x*n0bD5_rWx-{OC;Ct2VODv)*`r@N)=1j_4piBYf1@z@6Nz
z_&1-HpF7qpUe!HA`i&`k#$v0O^XfUpiz$-D6~O&tE?=1T8Y-nXeYRgQxV)JPEA35r
z{J&iwh^pe1+5?>PuwAsAN_1mnAWxmY0|xwf1M@R~(^Z#k(1iCPzuyv2{?bgNHI>-n
zyd}COJ)rSR6|mXy4WyiE2K`zMT)n%R9?$KEeH2sRk7hqSveA)ySYH<ld~I=xdLZw&
zSH>I%UCuIy!0xK=$>dr$e3H5eXww-gyJ}tAqvv0`I>(ANv&%rgd<Aq1Tmb5SHq#!Z
zc-XF9KxR?3u!F8bd%6o3FLvUB2QF-G`iT}UNCBlgAL(>MA52)<0lUmoM+=LW@OSky
zN?T@u*;<F`_zFv$;<JYe=C+YujTxyn4?%yek(eH=L3S0>@M!Q=GVK<IK9^Hq|F}BX
zAGMqEUiF3B+gH>4L<3CeZzV3<afOamjKnPOWK@vmN|nPaDDLh}SfA(z-S^vKRHPGE
zl~zOM(-@wiv<LEQo9OTs12l?ELA7)rtQuJ>wmeLRok_~%tvUhotVe;`eO>gHdKGv3
z3_x+C2k(O>Xq#OJK@SqJ`Aq|DbXqD}4(KEEUg5-d^D7|M!HRtJ^zl{X6R2pE$R{hW
zh-O`zg{%-S9MwG)--kuOv9Jy}Lh1lP`AB|Kt<M+k*t69W={+|l10tO*$SSxW=iJQ1
z=zdH1oc2HRY4OBm;WeQ4sRIW6egzr#!YHE6fO{FLgZ3F`Hvct&@_q+!6B|qS`Jue*
zco~eF@=nZ%$wT8V^<oF_7vhAZ@z~es5k)oS!>vK{aNXo0a-Uv8cCcI61TpfDBTaF^
zs3C|W9Ps+ytF&c`x_DTkH}vR`N7V)0_?i9;>OJ?o+#32~zmOqZG_51I-h2kzXPHsh
z#}Zs>e4Ym27l;a!T9#9_XxOG2DIe0GH5T+|%NtwK)~#B4d#Yo@i9&Ea(o0bOktxLt
z6Y-Hr5}kad$>Z7%itQe6VRE|`Cj0{U_v?zN_H~myso=YdUDzL(*U|vJ-%fQ=b<d+d
zZIdY2H3{2xDza**HqOp=<1v@g@xeeXI(9IRYV9L&P0$%uX^xbC+cpDRCY$gAvK31G
zZqatlV-z=3oBj;Shw{yG`h3(1L;Iefif8LNNt&nWl%=59s0&;F{-V|eYlxru^Xylv
z1wDr@P;e+4|IEmsO-oZqjM)jhCDN6~00%y$YsEc_L(yU4Hu5x(%tiW2X!`guWEPbO
zy$4DL&y3r2tMDZKS@9h{`|gy;S>C9>X%UZjevFoS<&sI@Dv5ZNfzfuJ{Pmaz*XV0N
z)QCl#BkiZ1U#_6w*^{wi-C)?gE}c6#597MxzHFoH&Cu@wG$d!!lCL_DI7^57?dpS5
zYP|XBB5QsZ7cHD<7|H7f%HePQXXrP^iC*~ihK#XOg{gUeBnkL<{C@YNU^%EOTDrY;
z!Eqlc=R<(tax;o`cORscHy_Z(fUW$oemf}~n7}%9g~C56pQwAFP&D1W1>5?M;L}^z
zqr;d9SX<L3KR)|5XqnhUi$oDU{9&EgTRJ!PYM;g#67eo(ZBOh~zY1lp^H`xU4qvZG
z!=Yahe@V8B@&40Y+zoo6S7<jpHGZSe-`F0*MmXWgC;GU%_d43#VoqJRb?2YOx^%r`
zFRV1^hI_pYu(ACX1P|-PPda_4Jz82~<JB@4d2yE5YuYYwc0K`#d*gZ7nF(l_eV2@H
z{Q|c<8}9k;vZysj8!T-d@zAPqTsQKvaI<_FZC=@3qN=EH(4(>3_pULX*fty;w6_VH
zJ(_9e<|%wfveL}o9tD!^jaTJo!lB{qqJ>%(IrdqD%Oq;&jr(cB@@4vLJH-i3ERCe#
z#eZNb7{dT6q{AaU#Dwd&W!vqCiXj=s(CvT?UV4yBr8_6_CS9OgW?_(R)?doT4CapS
zYGpg#YGbeIXGrV!SloHCK{mBe+Q$rELuYF`vMjq@th4vQuo5pU^Ht-yCc`13+!5(@
zIk(sq@p-4tn5YRTdON}@iAE8*ca6N^8`Hx&J+5+@j?emdaj0CKXFY!ho9d+RtLT95
zK5vDmi{g2Qy<`XNby*12%HnTxoPnFyf^d2fSASi^dOeKg7aOm^p?w!cb9DvoP?=4S
zZTj%u$uVH{zyQmwLwVO7b=KW?Q(Tpz$6sj|E%#L8m5*(4$JBJ%?iR&Ae6?BKyq%iu
zzl$H;BIQO0A5g;ET$bm(hkd(4<o=s|aZ#5^(Bsr68sk5Y&l_9tv~!C51bu4Lvv1Rm
z)7MDD?2Gi~@Dq21hH`uLXx{(hf-p6;3r;(K9vZiWW1o^qSTH|}!r~pdwX>P@`|T!k
z*It4*Vgj-2vXv0{^oKYK6ma7@D{h(8hrd*eV~31g+%K<K#LE}uHrLlt%q}zhzN?RL
zMcUWYy7gxFnNlzD-4O1*`42p#G%U{7<NX85h3<_vK~*yuv|PuF%jR^1gRhU^4Beq<
z@!xS+_sxT20*^ua#(LTxnhFOq!|^1I=7p=zP_HakzL+?S|89`(BGsN4)o90A$A6Qi
z!e{8wrh_{dr@^rD8nJe%5{BHJjQ!^<7jq62qIzIAjvOV?f_b!PbJ3UW7slb){O6F?
z$B9>GO~ccRpTQ@|;JIm<Ts~^>Mre_CxNaGv@#ct+@ZnPy*j)N4y8o?&Kl63qsGAu-
zO&~f~ypC10!zFUE6X1+FG`saT$Tg2s_mDBHmtw?a)4qc8${I>L7>u8;UJ+(@ZzAW^
zld^etd~oK9j{J^~(z~)fFf`biyV<+pboF}_9&kdoGiNCd`}|FqH&6vzzw{wLk3h7a
zgsf3h1W$t&LdCr_zIHR7M+Q4UVOcR4p4JrBpV=UIni=uFDG%V8S_U5UETtahyP)q0
zX$ITmfVp~S_^?9&7pzc${8JV5ex4V8tvBLbx86~p)n@+mz>;TZ5WOntAsN*d3JnI$
zv`wcs8y)Z?zk539ty#<!|3Yw_@j&ciWy7zJ@4&*vQRujG5Y3v*AXh687uS!%6pyal
zJn8_vwhZTu$;rIb=ZSn+pbCANqs>j}=D1Y(2eoq|x(rT$5{I>vQR9c}bEK~7Djnfh
z)HYbMz#l$jeFBI4y(hx{zQyD!0X7#80Qr*5bkQ*%TyMr;=PsXN&P#LLyfqE)&FRaP
z;w|W+S_bA{9z#LF7;Krd1U(a?Fk7t$PP=*l6H2;Cq{{KUaJn@r+#inLAE{zY{(D&(
z_u;^<<JjJ4I{mmkR6IYsn-FE-htWQ9(c>cmwf`T2!2i!*@cmzYfZzY{119)I`~M&Q
zLcss>7j(KU#rsxA<a?xK>4ekkL91#Jym>7#1UqySs)j#-h5h|#_VPaB&RweVYc{jQ
z_X~bGU->09EEg4nepH5N@HrRmNzDxXs_x=Soqgi+@-NU`Cr^0rvRT-E##E*^!B@1j
z0Z9CmRXf<l1n7Ti&=aT2KOdbWykBHVF~6V7zAM|&mB>`FC}*wgnp2Lf*~wK%to{sc
zK|95gvRa{P`j*;&zABV^&jBWFF&6_U8m`^Z5o*S&ka6m9F=F)+VXya8*frp$@bRV-
zOrd14hithJ75xBSzJDnG*iz?m`@R+EnVEz9{AQ>soIp8lqXo@|7$JIMT=b;>m%lLh
zfB6f6|NHp`{m<t&*)KZy|MC|?{+GW{FhII%Imcj?i-OecP7ynPo+Pd;FqJaY(vCiG
z1~#wJ=WyMxuwdX=Jk{TyFB}@nO-r8A`!FrOA3m6m4_pZgm-x_M>s;KheJ(onSEBw`
zx?+h<E^X2@<oyv6QFuzR(5vhn9IfxhZTF-d>?>DJEgg++vc>S+D4gedxbe9912F1)
zz37xJk>S+~VAby=Tw#14!c2`YImZn>pV!L<-%EnziRqYrM*;6`?j-lQ{*at1P6#g)
z^5F$4QrDZ->@ax|=zhtds|)uDQ<4q~-d5wGeR(pDR$3=0T~Wp;W65-UQHw*p6r>Zu
zkvtEW3l#y9-E>D0J36j`fhh^RZ{d2B?|BXw|4KX*mqImO*~Ddcgo|1=puJGKYfUr7
z%$$+zmo*-jsqes-)Fu@CaOYF=v~f=QII;XcU3f}IU}23FJ9xZ=Jip^~e02&>|FRrb
z{QE;)hu##d_ZXqy@JFB%m(DuNkATY*O`h{~7oAXCBffYxgtj>Pz(Qp=jw~~$>BUk<
z^O8H;#IJ>_^h#=Ri07k6)nIXAJ=t8T6r3Z0s@`6Ki4%OVQK=Jd9Gi@bgD!z#!W?Kh
zr^~xt7mIS!OZ0oVH?W5tU$7`elUyB+ERcbF>m=d$PdohhE(mvTGvb(h9qE42LjCF@
z#E}z{(CUmGMAb!M^Zqvwt15$6F?T6wyED8UY0KMMJwW)}EZEjNkWv3oQgXQ}8{~gg
z*jjQ$9yrn#UzHl-0lAdvDIWnVz4r>AZk~XCn`TkZMH21s;RmwNy$8#kdrIc7RXk$G
z0=8YgUAQzl8e?O%IXHiq)aUiV?ZPxZA5lepegg-&e4?4#g0c9P0`5GVgfq9A(Ic;p
zqC6-B*XKWgk$1u+`l=D{K41rilO9up%`ULhTOiyET7iS@^62-D{&;+bZ*6#{1-Gx9
zi{YzIi1X7!v1w*+R<k<>9>agi9RFp*%Y$;+Z-;TTPG1EA<85K?nIJkj&l`<C8Of{%
z{D!?tR#CG3bXb@df^+&RQ-iyna9d?D8a^(>LB%<^z#tQj?L8_4nD^u>GnIr)iG1`r
zV81v@+y^eF{zCN-iTb4he8luWI5qPWyit>O&$d5gtIkUK)mV{E&94UqvqRLLm5NJJ
z6}dY2fy{VhGTnVSR*cnIh95TML&r6CF!s|_>e8nXVz*Djz}WXdnKvoR{2djH&_-)4
zl+IUgKJ;9P8hQ=j#^MTkF~0*^3ZXcfW^h(b1KEwxheIa4ang&9RCmKpBK6n96s-c`
zhu<6VzS<v%+VFy6i;bXKFCBhc7?VZTDY~Voj9aUE;x2aq*4|jfCB5=wwXr5-IV};7
zde%U2a4b0Go5IJF&%na+tPuV;0!r@d^Ia>+Hd@ym%zfi|inUC>qE#|%J<7y&XI0tL
z-4`AEUKaQD^Cj6k1@!J|hwVL>>Q{B;sNvJm=x{W&+9qK`eX2ZQ=P%gaKLU^BsNv&E
zg%jBt&7%LvMzZ<tC>)rQf|ZkZ(t(mfp`VgF_SKQPFXuB*sf#yux@~~hoU>ry*j2Q7
zONc}?GiSxdZ4`FM5bJ-f#SuH6NHd*|JU{s&>{q@Hj_<BexPB*moHSk@w=M)v^vl4A
z-}-#@ts7_e>&%ONu8XVRYV%5oGB@k}20p)GD*jH?5FU>zrsm*s@bvmg`|s?KuL<Y~
z6R!8;g`v~H_x@ZVx!E1ekM_W<_I&a1v?t>Ax?qS6_T*JB`{0PCAL2`S9$D#}Ct>C(
zvdp+f^S=v}{@V{WnCA&A$bdVYS%q=Zxtyh0LzaC9;kIyR9<^aVOvsrd8{=9`YK1Nw
zKF*gW-MSz&woU?n^@TX*>Ikw)?T?~!m6-i1g@P=rVbAP|c<W~rx(4l{3u|3j|5OiL
ze0&NYxtcB9NNX0RZcT@R8e4X1OhnU+T-vd%JLZ-5Mz1k(5*>aPinFI-c;FXUAE=40
zp|^!7<;7^PJ<sKuqrg$hlJWO=iS#~66Fy6GsTb*5+*cx8HBvgimaJ9{O}%lWmK)UW
zn}7?I-$~TtbTNCNKSzDvg1^5PVq;S$-t(?Xa62a59o>6NJ#PozzbKdX1uo>P%|$Sy
ze>SdC8;>0e&%?Rvo1t@RCk$EGm+kzey;YD8HRP9p!^1~na;rbiE(ydHMS1X}b&$L^
zQWN9kmMk;M6I>VN;UM!(^2R_jH1EF#n!Z=jpx<vr+kZNk;QAS6?Rf!d!Nc(IS66J<
zFp<PQWn`41!!m3UN5B3HizM=Ht7CWYzOxc$Kxh6U-!4y^*@e36pCA}hAap*F0dp2f
zy=>=xIOC}wf2-XkKQYUl2ToVV$C`iWN99g>AMJ^Uq;uG!_gyL%uEN?*4z#Gd2M+t<
zMF~BsVXd)0Ug~AR4T1f!*Jo!=3RPe`^J79`lLHSd2*8##+oAK2!T43WZLA;ji)e%$
zZ>!u(cF$L%x9>+dHOd3W`E(|kw-FaO^}@!|BHChl71|6mIq>jKiMVwN__{5>EObPx
zlHRpiC;V8!Vg{SskY*4&61dHr#914hxiL}df=qZ$anlU3*QT#Dafmm#h-)D^@TIg2
z*W=2nhmuKiDjcjZ6Ec75@$rm%Y$0Z#;&BsH@1!H)=l)4M&!@7}FT3NJ3ko=I#vwYe
zd^y?=*+G|=RZ5veWzsK+;Sn0Wa9O-GZvz9IJywQOKkM*DiC!2})dRbZHD{;po5gsK
zVpy&$b>nMRg6cPd0d;DaT)vF&e3{6n2d0U$#FN6(xw>o-J&@;r0v@zr2Q_x86wU{9
zpkwx_Lcfg{xa`;v?y+ewR<D|g8)e;io7zO4!7^Uz5k$R(=OoY86y6+pCr*5M32IjO
zz^0D_Kxsssc)3p;Pu-}4hrSD#{BIn4YpY;Q)Eqb>@q^L}mhnEE0Dn96;%v86%&<gs
zzX*INT!F_ZzlVoPo|2VG4L3atrMUl;cvwRkcir}whCY$<46pn1As|rfuSTzWmvDa%
zQ>k~^gD1N@0k=(^@$X?DtlyLh^HZ0h?8a9*JSrZ(v`k_ZDX*t^qB~5zby5tqUP$9U
zsZxHz6R~B<4f&;h8nk_{7Kcms`17YL=unh{^p?(+C7cSy`8{1Yb&ey0ay$7SdkgiK
zocZdpGjMlEG-OKOXWylzV*Hm^P>$}xsGL6RTC@_4rX`B2HcB(G)m`D-KQkUXN$TEr
zYo)mZ50J*CN+|0#2*scMBwo&J2$`FSTkr3HVZI%xLxq>9r22>y6}36S#1k(ygmQS4
zJJ^hJ!aX@EG%UG-<HE<|y|_R0cz!4jOP#}$>uo@B(itIsrVajW{|UnnRzYrpE6#D9
zi)Xb9`T5lhj;+w*y|z~&cv~uGFCKzRwL-D#S~^epaT&5ZFCvp}z2MB<TX20-2Yz|L
zfVJWmL+lkFFt6`IDbpnrh<zn(ZG0j>cyu7Esp(Kb9bnfVsrX;Av#{}Af3`Yp!NX^S
z;oyV{C{|U5+&gBx|9dP=mwE}0nvMy!xsgKR7&}aeieQD-I{HYx2udqNC+DMNnAiq!
zgR<~`+#YJRzDd18dvW9Q3Ygf{7>||T626Vl#NStP=<s=S;d%5F7An{C<~uLw!~HtA
zv^@pip8?jr;(>R<F2dq1PMjY<7}Kqt+2mJeekJ9rvOl_E^-VKgac>shl=>qDNx2kM
za-BL|I|;9<-oX%w^|7Rb5~=x5hq28Qs7<nbK6rZ(gx>A2J6ea|Spi%$wg8WaSFplZ
zq~w#wMdt{KUOS)@t$G_M&E1r^ZpmF}xRQ@1QR~GjH50yhxk_}_Ho+$v<9M1+N1UgW
z&bLx6$>jTSc_O@p#l!8nL&kX0IW1YGRr~R_UjM;uQ!ARf=sGntS@M=NE4(+gE6Hc~
zzzdVh=)uyV;)~>V@zw2dII68Lrx&>6yr-_L{z2O5j6F&Q1}=2pYAr_nbcS++0Erv)
z86@orl*b+x52o%CmcE%!)1<vcH05F2k*RFfD6szO48D`To`!4MqU9)SJZRv<O2efc
zXCHgEYp4J^brpsvoD=#bOLyQE6U5L)ecWz1OrHB@GgLe7gUdJ1(iyid*fy*m+Wztq
zzB)_1l&dk2b2l5V^xPqtP11PU>}7Ccc`%OnSt2?gdM1_+YoTYWm(bT+0XTWjC@!}f
z#9JqO^X(a-)THGhsx`)7bJPTylinsK3?DC9UpBziP48&<|JT)<M`Iav|NoL9^N>mr
z8B)m*N^xI%BWVy7X^>PhRYF2(L{yYn(m;wzBhiTKY@*U2qEeAYQc8wOrRjG)-_P&)
z=UM)=*1g>4vaWs3KJWc{Re7>s5_bIU^S61kr7F1es}FV<&4B9;O0a*wEt=6f>TG`j
z19ExxGg*!-m3Fe3%dFspHRC0JZN{=cYpL5~2rk)i6J{;h$sf|vMa3Y-ZYc;XwDJlF
zYxh9^nhP*?+Gx7_ES*0UvAk?s{SPn>Fu{0<Qa0mP3txf$V7*x|ZjSrTTMjkAt9sGQ
zZNr|@*K_YNqmYRL7p0_BGj9*$vox?_Mi{;*T!|r<t#IAD-~6H#i7@)%A5Ov2h7FTf
zWzxUAan6JtI4pT2T6m3O?FY_@$~V7fvxc^^z)46Bw<S@d^Q3U=%(LCUXX53DGx_M(
zOV|dnFwUgUk)mf6^P{51(wW_vY-XVp9uYqb;vW^Mf6pG4Ev|~Et;}$%x;*~*^}b}%
zn1N`C^Khl)8&DAXvnSjV(5++`9Cxf`tFPC=IO#^VwQm|rweY~)m>ZnRyFv(mFM_$F
z4~n+#QOD!+yjiVDA;`?QE_$&jfVAAtgP)ip+LjOGI(NKfy^#`BvgH7;PKbFMqXg4{
zHd`sbi-KN`WJMY}IB~QIE1sCe5`}r=5znTxG1sSJ;NdUqqUs%{pOc6aVy18!%Rce_
ze{M4M$V2Se;_Wzg={d~1I2ebzOTxTiK5U|qC%e=%8m%&x<Lhm4AU#f=Du+n1q{vXR
z-Fug1&CJDw6lH8&BOy@aa|Jth9J_K#M7Dy>uxRRXno@M0>6||R^PdLbn)oVcwXlPD
zQ)3pRHHChuePDe9%S8VB>;-ye1kJw>xJj6sttzy|d+as%t<_`RMLt~9<XudAcM!Ys
z?IbQdYeKm;ecYH$sqpUcVsPKG43_mg<D_@S;I)0DxW<k)IQ@1X^;JznzunsOR?G-x
z`jTj+Kn#6psV|szCt}ZAS9~T)gO=VjW|W*++Ogc36}l&|2~Mj@tq+9r+u9iSsHGob
z=bwSe2b5vC^E7JcJ_2?@-7vxEEgPQ1Gwr*kq*Im+-u7~|wR9@@4-dn3;rT{hi)T6;
z&A5JHM(oQo9~Klj0;l-b!6xHTAetq@H{i~fYue-0X~H{?^WX&a4L%QvM%TC9;Bs~n
zHRW-1Vb&oya@~kY$$n+FrTNS*c_#$CeGIs2A~w}Vb4Q2I!-CY0Fs5q|KK~Sn4^Ks4
z*cB~ETCa^WUU!2^s|wQ&{s7Z|7*f8SG3^VGD%&|kg$6F(&+ZHFyYAK5OyivvHHw*A
z`_yS6e4B(5^o-%k<2`Il)D8;UEsx{>1oE+MwiF*KMx7H)=|Sx>awuxxRl^2ShWcez
zD)W-vXv~1VG+Ffdx*6r8OZm@+XIR;@ckpK860DkIj<P#uV!K%+GktoTJ9K9sJ2=r2
zZ`@19?ftRL^$>>>Qxc%+i6vGK%YnbXQ)#xrU~(95imHc;!T-WVP@LDumQ~(l>w-Et
znE~I~Cksm)sU(I^I`!#08&1!CqS&LhbUt*|czE>B7dx-nVyE49y3+g^{u!<y&nFUP
zbpuQU>(Wm4BE1M=)}MlcJsgA`&&2eQAb6*@m;0<T7|jn|ViM=Zu@4fdI6~$Ge9_p!
z#>kyz<(B!l>hC9R`Sjf|UjHoPj!mQZrccndS;*8cjK`*@O6dO7NHG1%VEB?)(r74R
zehS%)8BBz>vq$(zriWSZsjJwv!;=#ESQ;NV5FhN&K*crb;5}6r<%@&q_>`gSxkWx%
zxO%Z{fvfa<)*$-F-{(DqgLjH=5BqRWjY3{VU`>1mi^(bk<|jeB+g79JcrT1|)SwF=
z%rGt5fZ2}@V?+BVv*vr(xtLEuFyzfHdhjkC($>Y$Z0BSQYDvKHg`=UT&W&X)HD!Ze
zM#GFztMKLD-MGlm0*nutLvr0ENbOq3d>)jcMY}DGn|g`PTz`y-`VFjGx<jyWW%9wA
z57@wje17rk3oLu=9%ve_1obZqF#c8#7kzR@nR>M;HfP=g(Yd{}ENc=>&gRhK+iX%8
zt3|E+d%m7_GT%*SnEQtaVJ-Gz_S+O%Aw7#<uIfzlBBSt@kXPPZW{d+xBk0TD?<{bW
zKN!<My3M|pjy<~z8&WTDolo8HyJ<Z8YBNR@>cO)#u~~R}ax3o`uLhaIEJ=#ckDpl>
zPq_jmTG`E&U;JL^qg^(Gnh;&^wk%=h(-&du3SG1d-Gf12<_P5IW$@!cBII~0W666V
zpC-)smh`No$I1?PWkCj<s5a$qEKz1R)*RzzJO+3y{PZV&W!lQ|m>W3{tMx|WN~J68
zyWcD3GRXsDv{TDomhOTtOMWn$+)1e4h$s<X#ciHq4bT1kSy1(JRylbWwod9}c7sCj
zpMN-Z)$f8=#lCQ|O_)F2aF?CgGJ=2ez>m(RNaN=r_t}pyM|8<u#|^nQ4ju|wSfvZ6
z*l(BboPM$;8t+cwTZCurSGkdCM+oMrqyD7UA5EVsS3pVrI8fR-9~Rjx!X<G#*vDsH
zxcr+R+a&J{fC)^NTSq5Gy<tPQsFTT&d^qzg8J2B}A^D-R*=NUDc=XUBauoC8x|V0M
zEo&vIFK-5px#P!tQ~J5@u6F2h*n(QzeaP;0I8CZtg}QwU(Xe(5H4EqVrZqe8ovk=t
z`}TniYoAT66Ap8&bM<L<=1#WboC-$BS+lthhEZ{F6D&OFjhp9%Vw3F%jJM9?igM~f
z`R8SBYIP+0H9?m)B@c!7^RwXokrS-55-H)!Z<f`4kr!IIBxiWEw7uyHDoG)`xYCX{
zP3mN7RpB({d=kG>S}?qBxWjgx@uBnD9o)7V^_-(!I8N*d!wx+g^xUmN;x851Kfic7
zHggGzpKf6rL$<RO7ptLAFmF%vOcu;Y(kxVS7H+aE=O?c<!8Z<KHr9%%wDm<gM4y(V
zCqLZ<6NL<Q$sVI4E;fQOxR^!y`{TlYVU)bpiVAM;qG`uBk}m$>8oaCV%MD3v^a>zn
z#ZIm*v4-&}3(>PY2Q0SzWtmU%x%{nBaIV{r3z(aWX)7;M{lc-BT`Z3l_ATJOZXcWO
z^^(bV&!i_G4DdyP6zT8%A(|o>YIk3nL4WjKv(u+9P|spb+P6m&O<RuPv{4PL{&5D}
zSK9%>0hhq=ks(&-&SZ9b6RD+b0L6-ZfS&>Z`kL)~sPPzyi-Q`itJ>b+S#b||u;dCP
zSqx|Uk`nOA1Q&WTbv|{P3b~;nCM=!#W8XzJ%3XQ@o*xfkbB2t97W;ULmoMao^=@UC
z-rZzcXD8DF(;T`Q+6=`%glE}UjjfS$G2Ppc$}G*<f@llul8OLR$w^FX%o`>toOgru
zv!QivIfX1W#tn;v`E<GCxWeWz3lOp(8aMCp{ypa*x~Gbb{PVJGFmn<5oe7v=)XiT{
zjKbb)waka#Vg01)J2+VHFZCFv2J;leXj#4|{t5of^<{R!!$cjv_>3?^WM+e5pQKQE
zcRI^`I|H?+K7gw949>SPi)&Y$U21Y9kCoJF(`}nLnlRi2W!DOs)Ydl89%TTZ4_ngG
zuj}E_A#3_;ybC?_#3^Qq2m^0f)8$1{SkfBC?Y6xkusdwoyk(OqHk`wQ!7J%Nk%*6t
zI|uT?8`%X9EtF|o2{X#`ILqXN{5jhoIwQR8S|T@N(y7tll-<a(I`Sazrx|%wdthpA
z3}$a|g8lEDD5)zD)ujh>b2i&hgp3~8OccYm&2rRz?JU=&5<yFh2ePm2+1z8n{C6ru
z0<+#pP_aY_x4%7#?X5Q=rF3c5KF$H}J&fmlb#L&){>bC;S?^$kk`XO>?IPIaji}Mb
zloooq(s84MWkX^N$<h7}+`C!B{$|_YsuL^O6n;Ct`g8<lI*r7fqJ9>1Ns4AwZ{hze
zDdJN$1)=}6Veoai8HstOVPLrnIq&((zCBuwg)==VG;TI^uaPe;d~*$Ii-TZy{6Ms>
zH^IGLzshWqN0R8}T-?N^f!_=X`m?|uQzr{NS>?gxF5Sh?8DL3L%WpxrP8?jb-bdfZ
zv~sVWUS@4u+(7fs2>L1X@Km1(ww{+msl#asDHfaYu&bKiHhc-&zDlsB{0nA!TT^NN
zscY<T+7SAzd=&ScH$pdI?yl%dFtu2w)5iH0cxI?IhEB)?jnkKe++`eOwEMt9jU4vX
zIEKtb48*dF*%O6PjPyJCLn<}Uz4IE3lQ_uET8pt;Kh?-g+7b_(j-=<O&hsjt2h*P-
zS87;%gvPXSr0sH>H57D~RV~VAb(%ZbRlQzT5$^?~&M47niV)72OYv@tKI>0%!!y<c
z*iN->kc-?-@weB3b>1R4X!3^jMODH&r9RjxbDdwM?m@~jkD1l3-B54%n|pq%8(vg@
zU{&vjiUurH!ROy*($N__H#Ecu><lBZNqYz1d}tiMTa;D0Pc|G6<P64NU261O$jaQa
zF~EOwx6z`8Xc{vAGIyqGC(QZM4uS5s*jI@`)OtUbYPOs5<=I2fJ!=8J-8cgsj9S=^
z&}00hD+=V;PyuHz7zr5VH()(no{GnN;F1nc{4IS1X4l+juX2n*YULib+R_L|bgE+1
zYy*sRZse;^9$^W;3fWIvd3x198C#lGQ%D8JsuWfTeX>(zaY~0YPu+#2x<G2!KAW_-
zDO8YB0B>F|z-OI{DD1s8TX!%YO*5*n>qiLg*qX`W1eT^%_AU$@#BiTpJID<lg7EVK
zTU#1~UH50<$sc{}^Q%N`aLDH^w$+h(l`*fgTA$l~N*yMbzGOcydeVzQ^HAejx5)qg
zc<O({*w)J%SZY%NAF;B8xi;rAqs4>i!Wmhpj=jMKyt!HS_?jX^Z(z~YwvgBtDp&}<
zv#FKo7_&f~c0a#E_qcVK(cz8v9wejml22^HwP)~IJ|CPm$)f!XU)u1c2$J2eK;_3v
z<fwfBzpVel?Yj|)^&dnOtzw9c(nr|FFN$mv#e>UbFZMd4021d#;D$E_>`9IsWh8{-
z8~Igb?nC7`*;8lA?xfu0dZx`K--<kHk#%JIn>XR88>7m`*@lAquM;HOD2Lm-hER|A
zeq3|i8BgwtA@id-B$Ca+h4#jz<<<?O7hPq4C)=ZKVJ{ooW{#`MM&k&{E#QD0<}91c
zyykzkR<%38256pw!cu8Gd`ALb)dW+3`cRb1olg>4Z$!s*qHx`$k08~ZiMktXv1?K<
zOFui7)(mi<%ksBaQ}cK_C+p10vfL?Nur6IW>VQcbl0g6dedrNbI4_a|aI8Tz{26kC
zL-}>sMyjYkPz(E?d$7K>o0y)sHFdr^%%3++=f>K;Wj8dPam}OzuKl$;^QnEu9}XHo
z%@eFx@X>gH@22EB;})koql#@lHH}UvS!3+%C9vrGBwF#Ri}P5UK-2#T|J%C%n9o32
z8gVt8dZ&8BnN#uTY~e<meDA^b=}x%mMFZ?9sfFjOm08EVR@S>_EXp<Kv*`BcyyQ|d
z@=@<&o*fhUKOtSP?9c*KG<w4&jqT){HlKyz+TM`!p_R=a5{}6V)i5^XCdkeYVIPgA
z(8_;C__fXohX?qu#4~@nW4Y_dT6Ze_D;b3!C63Txry6LrYG%ismI&D)XS(sjm`?MN
zxY<yK0w*`YL+4efQ$3&Q-&rS`)T>D~n<i3S+cr3Or-#|?&xXHt&)L(4GjPBt8R9Os
zajl_}B==wr+<NqoTW(Oqe5O2N-^_M1x9WJPcK*fx{j&_m$Zeyo&r(RzBo(|`PNB&Z
zU3@y$kv`9tz)rzzJHPfMwQtuVaf+oX(_m))RhsoAs!-~o8fIwP!)8U!!gFr!cvN5v
zOuaLUu5)7aa)czEuaSrP8b>B2xrkL1EXB$)As2G$9`shsW!;LsP@kR3rN^oBD^W?b
z{#826bMoO|96k+NH|FA4he-0Puw-!^Rowc$Yq+50g-kI*0$hvFkxa)jT6l32+<Y8L
zu^)@el6_Y|-u^jU@&-GafO5F*w-YZ_C&6+qhmrn7TXdLcN$V6ZLwd+@HuvKs92n3G
z-1-FkW^<l?8z?YC2R9b1BkVg=ZUX(#qqi}`aA(&I=Kfh4T|@I=RQ+_SzbKp$4@yFB
z>uz{0c9fny3dG93^ir0V%96@*$ihGiN32soZ*5cBoqvF(ju7Y0%4fh%$9fhZQA}?`
zOW@}6jj-};Ci9*7+q!pV8oOoohFvMphoX2#TAsWOQg_&bLFql_94=(Jy00?-`$kkJ
zIgVO4$x@~C61KkR49k6RfEy_ti<Zy=L4#|#TZ*kvv7m*`xI2orkE~?VzmJ7^V#QFg
zFGIA<{WROJa+0-l?c~=!Y2wIvEH$kXLl!p?LmU}9vHlm#e$Vs8=|WZ^qKk`Ob&1XL
z9F9lrZo=n>I;{8HZNWMf5B`aA=wqfp$}uwp8}CPU3>;{{&efP8{gd}NXUX^;Es|_4
zU?)W_Fu?vVT&{lv6AY>ZzTS44Uow-;v$?_Dk|<|N7meuKNNr{!*p^-PM4?B!2fG;Z
z7H*ve8v5%qyE}vvM%?eSt`QH|+~Y&>VNnH6GVNtc+t<;-0~6W#fZJdj6(_LJw&JpY
z!Fcx6YJ7kDXjx2!G|subnI7L=#jl8)O9!+KNU2!?Qzq;n9qoCncDXF5{Z_+!<F)9^
z!sB>Z^&EIuxqy;^76r~oVxPVVM&E!;W-1yDt@q{-TPyHXc{A$2Q3`s`_u#~}3k0*X
zEw~26-~)&0#AMv5#VL@MJy?UU6-ML5d*jgL(LwrByAIO6)iUP%k|lrDf*&EO6zwvP
zJvlrVRx6iq+D;J^q`ig~et*Q`eIB6U97%Gy+|Tw&OJi+H6jfMrEbFoXj$g3=({lFF
zD}5)rU^k8XT|W!Qt+~c0ww<DX{cg-;@gXSen2Q>Jj#1A1Xng+T0epD57J_wNGp~1h
zDM7uHH5cy3e_MC5O>g{hSeqe=6}+>mS+Ejw%F=M)yup|^dm^T5Hn5sWGr&+!j-K4~
z;qouoQD>4X>07SC@a`qFE-9Qv9R)OWQ>2AICZWg9-Pq$lk*Uc_(TWjLbRp3IZ}~st
z(`hyN8|`K_j|RiP#dB%(R2?$j@>mpAs6oE3+F_t}0hMd!a+BvOVDn;qEYPcF1FP*w
z>v}k%f(N}2=6^TN7*5YJj3{)RA%xo;fp1!fiX%IiS;Yl7HqQ^W4`kxb1_c<Waun8k
z4kFi$SD-&)J3b7a4BQ4sRG%i|rwiuW8#*pHKP`<F`SiirOHB~3d>=*$_Ib&KF7C>i
z``3=ho5SB5@gk>oN47d^JpCEG8I)Rk*h*=661Tp`J|*3SSNaAdChWrvKF`EH&8O`1
z{^2xf^BFkHDPm^MOp3le5;yJ)MYHGT6ym%R_dbgOoh&6hbD>)FjF-hC(>?e(R+z=?
zkRn&(Z0@Le1|IF$&!s(l#%i0SSeX9k(uPhQ_GFGC^y{a<vUh{{U&8rb2od4XAYE8}
z`y<rGn(~Vz??7(-NYe8Pg+_Hv4EiAn+awRrzFXH=c++4i4o_#gF&p9i&8b5Dn!s|}
zDuI8O&0b12v8`vPVRuI)GYKe$$2u0|oZ(BR#v9S6RM?{y)<a>79;!OdVmDqTk;{q*
zYCosTY}S2*`ug+WJw=nUW)39L3`;zju84u-mavM}SyW!L2mcwCvm5tzk?T)4I<^&P
zu*NrLutI~L#|r$Dm^l8=u0~kaZ^RB1onk{%HCc^)K6q=`i0pf1q5RVyz9G|uDJ^j1
zjyK+inQqtN>eP2o5x4+*xODDr)mCgB=!IXbLRj$5Zs`6LgzGN+gh2gr5SN<Is%A=o
z5NZNxD>aha5W^OQeJnfGDX`5g-Ppki76Ri~4o&Y)LtCj)SSOakNlzF_70IWBdmLkm
zJuDc3&Ba+ltON#~+Q`QBO(&aqda!Mez<`bZ0NZ^Aqh8Sm*5>QOg$#KG0WAVE-_I6b
zhmT{+KF+}vn8|ioiOBq>V6DDsiIbGY&?bC6{wNW6wYSaidGTDq7C8d0mu|x*xesjJ
zizrdYg7d5<JPDl5=HXBmQ?PaLU>iSe#kT`xF|bdsY*h6`%;rwh4L2vUQy&Q?PD-rW
z(ikq}=;P)bZR&RnMh&5-c28Ch&l?tlihmr|-L>QsW-LJI1Urg#oX1Ygl0&J&YbbSG
z6XIuG;(RR*!GvZpvJ}_ETjsrBwV{d26POcnPevkHZo$4~u~-u(OF4h8@OBGLMbhSZ
z?7;~K*kv~odrcGB8C6L#?QmteS~9$%MgXRdRK|!xEppXaf>+<Y10(H)0(;4d>Agu}
zNg6|NgswQgOPa}4SINR>-{%lyXibA2?gHfzM_Ja8N1zcXu-a`DaB{3a)>&8c8{Q6s
zgHjXu=iNMO3Yy9mZ(NS~$M-@`?OZJFl?U5hLO<V3kM^vd4k<?~;i2~~k&j%LXrYuX
zS$^dxSwcaWt6j>?O0;8^^Urdl8_q+9a86YilVUwpxDPuhFw6grX=kp}>anh-8nhP*
zdycLrY}q3<HfRS!zbR`$OKLLhjSR==N0Vsjro$}fwiK>;IUb)riDFwX%HZk4g4MRA
zgKz4|hy0>%{3~fKrlo$H^=IBk$=RBa`_YfI%QvvT8R6L9YlV)+6DjJ~NNU&7q|LVw
z`6UzJp^%X{Z1bD9&@Y2d`A8@hzGLpCQdkxl#(Z%$rM)Wvf0co#sWpsr=lD{2Ulg4#
zTE*?MKT(#>XW<Pu9U9lV0*m|S(fA{Aa6hz|nRnUoxdYpoq_WWGT4B$PIXMY7UarIW
zYlFFO8y~>n<MOQYtB`9}5Ij%uDVVQ39CfZZ<Bq_1N}n9VFS9Pj`h%X}+rADOv@C3%
zKR!!N{)6!NH3m5ff_q5WgI#|6mT5XXfaj}Z@RHwX9DmIdBi;$Uzt{s%YnO&ah4-n4
zt$+_+x4GRj=cC<TD{3!S$Bs907(X(IrFZ1u_v}Mh?RA$wb|W4hfH_ukJ7HPMS*TpU
z4?1LP*<L?^<s&M@Z^in=SM8$Yd5W}k<VdvtWktHlE9q|$f&ZZ;I9>Y<yP3R}4onpW
z>T?#d`9VvOYuw4Z4Y>hdt<vF!n+@IY&%u*E_UI>}M6xwT{Iu&kSea83OP|~he=8^8
zUYR>wVWR?#=g;AA?-oAFc?W=DEgP`L8BA6WL(LVZ(JyTx+P`RKIq_A@*-YRlrWXpX
zhdmeteq}E;ebH`-KYlLrq3*$7S?^a3)N0s>Hy2DMzUwu+^zjzEw55`z@7PRsSAWA5
zzj)LfJs!#~tRvFsg+eDCY_^s}_o-@}`NQk{27V|cC?8>m-tWZCA*JkXM6qaF5~9<b
zx9q2vB0Z{IkEI>)oZp(0oK)OOCVe>zB^q5(K5-SDGKypqqC&}|brzO}8-RO=4O|a6
z1Ls?Bb0g-Z@k$z-KsHVl|IUxcg2i8%v5qjJB&LOmGW+1^+F4Z7ZiaQIjd0Ww5x!7X
z#n1m1<E>Al@L5|IXk~b!d6*fDxj&aezY4w8AOB!)U$JPn^=x{5)gF6nMYv*q01UT0
z0lF$GtW{uY%j(af(}A+M4yACgrwYVZ`&0jL6?|#A8QuhpqzIiM^x8+2&+=Kz#<c0-
zu<br*mmbN>ZLtGMKXZ~;SPSKLVbqdW39E#;nX{@u`+RmXt3eLn+u^}l?#-vWIjyX6
zWf!yhoD8)!#_ZBBJ)Hl^4P#`1Hcvjt$zDB;ty>G>lcoxLn3};1KdPgO<RlC=Jqf-&
zL+MVQ8P-qJriHG{`Qf*ma8(!Mc25|E1L6SX=jihpUlZA=Ya`Ko_90Mq86f;GYSH2;
zjL9_B!Z9svY?<<cg?UL*#<BY>-nD~eJkFrtHOC+=%t+|lp9KH4U2NN;Q{dtt%p^Hp
z<cepH!b+XtSf!%}gH0#k>}yjgb;2Cz@KD3|YvjrKff8E~;*GQ9_F$5)8@7~=!kg`p
z9BkbL>EkMSYspMlXV}WNd&G;hJG61rAn~&6ORdl++ZT_h=u_f)arAkX#p|nTK~KLH
zEu4Ou)!N$8)RDs7UuzP6=_W{_yIlHdRTeCh!$OXj(#UhesJmq!H_ynIt#=5gf?r$N
zzw?Vp{j9LL8$5u#UPr-FKR<R)@FHyfahUmds8GW90l0pV2tAEF@tD0Nv1NJ`;bzFc
z6~#b>kzls_QNuD$?}j;%5%^MdHwLCE;QP)5p~o|ejZ_t*cRiXg^1ddWj?u(>nRciW
z76mu<e`9#x5f5a>uo#P(H0kOzH20l?YjX{;ca)kayvLc`7bJ4$U8CTcRRX%4KgK>P
z1~B(Wqu8F@gP5Zph2ENuWMi=mwWr9?5@CP#xK|7>SrxJK^)J9rGnZYzR0d&X@7S0P
zYV>SS084x~R$vCSi|S_&CqttHqVgGYDAl}>(_a||MglBKVOqIJ_L(9*i`1gkuY!d=
zw+{9E8O9fdWbn1q50~||db80lCSX8cJU+^@;1wPVOqFt7i1Zftvm?A&qV=V+Wx|@6
zS*sz9f5L6A@xw1?&*Px;8Qg)ATLSy+Bpbf^0z0zgB?~q4r_0m*s9bS9`@L}=E6ovl
zA;Ov1<={}<_TGt?+@eNLt!CrdZ<;vJDUM<qIv}EW3}k=W2E$%eu!OV#tnuHElkC=D
zf~_<EsiBZvSbH5>e_m&Qcnun#T*tPZFs2iEj<|2VKgMLn;uqaa@ceE@e$RjNqXdRm
z__B7EKGhB%Bp=7L1umGmkV7+0i(EYAvDo(*%q$uW)`O0f$iMx~it^jojyJl(jMW%k
zbX*CN?<@eL1C6ZyX9^oqk%Vh&XQOjzAnSTB4~8W&7%~0`%PSd*Q`LgeWUx1-6w6}9
zP~rZ`CKk3tjTg8VKSA<wFR*duv@LHUF0M32t8WSvrSONnHq*zxRSlw9^|k!I4h6a)
zS1a;<wU=UoBEZb78VuC0u}6Vb@Xsp%3+C$K(obR7|HB$L#l@D^M$6Df`$gE<s864j
z3@FgUm^|KfiL?xDuxPUcy&iXwX`~LqZ{h+W^WJR25<|8jcr#SSOvEokM&ZiGi|}`7
z9Y1H50d9Af#whhcFln#?eAx1vRgRiZ$@CK@u6zlzl(b2%SC}hGDCEwr5gdxSxv>B4
zNUEzJ0C#-ENL6(kUjM<-7KP1dX{`v_{qOnlHZhEwH5}Vjbf|ojGo^i$WdrvpklSHn
zQc;#9H_b{u|9dKE*nVS96<68H!B%+mKs%eS)Wgn;O=B**A3(Nt0VwP0vGRR}bbjC*
z{P)=g$Gy*DkCiwYnEZ>$3p^rrN*{k2%fQe_GVJr5A=n|zJ1E#+0JHUxpub=*Tls7U
z)z6)RC&QfB%lo^?^Vt!&ZW>Hgokqf4^;Nhp?BAaGhLA_eE?hGuopt%Ir@X_v!NBS=
zM7$AWPlv_9g|5jMnQKi;R_}+fg&eoxi#TrBlLE<?W8sic*Y>X52)Z|ILFF4kbn{}I
zd&+@pJd_IJIp&!A`xbk*EsdQRuoZhoK4O6>e$Xk30t)wHWyd^mNbzv`sX7kLPb#88
z<8XR4Zz{U@1#smBKJ@UFJ%~?r;KqH@#(SIF!Lv%AhN|VUW8Tl1-1<*tC(mxCC7xDR
z&&J21O|t{4zBfdL1B=M!pDa!EYbw)iDuj_s-t*(?R4BauIBYp9_(tUSG6_k6c{PmD
zg^D;FUiOeB-JHUXC3(U48i5D&H5ncRJ^|0(;i$2AKdKnSuvh=2q1i_gJI7`)t#|ME
zuP!TafzciuEBFKQU<oV}^|HEI&*12AXI}Jh2M(;U#pr*gpwP3Ezbu@~$MAE>f6zzR
za_Ky)-Ef6D2^{L+L($ex;sWsSgUwVe?7{Q;JSbN_0VR#@!3i%T?x4HSuL{1-l#cA6
zhJS+~IKGH)I=q96x2@x9Po7~a_U7#7kA<v5BZlh#dhrHDI<PWuJv|+JLxclWu=@`{
z*;rjln;K83`3f#)>*Bth%4B}c+D313CET*ppfO{-&^7QA9K61t#$Gy3Pv-@|i`%2A
zc7HY-dF>$nU6PH{?^QsTQw8cTm_{`X#<bW<U^{yXY|rCDmhEmusayJU&~vb4=T~h9
z^|!_>%+^u3>z88Adw=qCQ^aZ7pP3lcu><ejSEc9g-(X*&q>X#?3QFj6LXD?7=yS~%
zzLFJ<(iuykgMRRFj)PG7<!H3245USC8@NYzO1Z0cB6zTC5H@)G&|XU+E3{|}DqkE=
z^Y%s2q0kDhna@Dzje@{)t4XS%3Iv04>Bfw`MDI`Hn6+=PUuq5=vl@w#-rwQDT4_?8
ze-KUIW@0HPxQzDN<CKmlP#8OdS9o%Z%l_vC)h9x@7O72aac2^#+3hF8WT6+h^926v
zkU`bm1K@Mo8d@pT5*h7QxO+8__Vs+lu@#S*Lz+2SU$%v^6K7!m5*76AABf#DMP!os
zjxFl9BKQ78$PP0j#Vz`@qTnT4tyBRw6oh%$eS648S{>A6oS<Lm4JJMo_5{Y#?2%$I
z%onXivyEx&!B%g|oTf=`;agec;;m3OeLwBpb!W=`rhL)n6ApCkx-jGEehto*ZDi${
zR_N#Pf$iEo3GE7f=+SmX_EW|Nl0t0h;*2P4?ORYLvHK2p{_Zen8?=mlx>vw7C)#7D
zTRLV{o`=ONgJIwg2|Vv}33Rgz>AH#v9u~z|Z@KTr@BWp*l?23M_;^)zI97?a-BqL1
zq%Llw{}Biu;>S9qX4AvAN#qwHc>YRNn2x~FcpaCF=OS&0Ke-1M(n&lelEL~x6PTx;
zJ+@Tnlh3BH@X2v+*_q{c;Jb}4Eoe$YrMGq5TnP*Q%7-XgzFbG>vs=*^S4G^tW<Mki
zF%w)BDtOoQI68(bqTkav*l>9p7}g10uEAM&Of&-z)r*n2syKh=r!tL~`^+r=DAKJF
zr&ves3T#h##rJ6;a~Ipq>OTm+ybI+lCFB~n@Z1#G_T3rPPd@?^jKH>-a!5V?74py#
zV^;={mBizccclj*>uMzLGjJj%91muf{zYK)qCR%VWja@NOW5Pw5bm*TlUZwnE7&A8
zuro{TL7^oIrOuc@^Aio~7<-;i?Gt00Hb?V8Te^6)lT|Qeh79VaM`EhMcTr)eDlMIw
z$n`vp65Z2phnsktOCKzFBF@jo#Ko;l+3y<{Ewqd}iW1?tm9=p9Wr=H#JK(B4#;AAi
zJ1bcmiAO8mm)u(10sg9p_C<%G^WO>#(wYDZ*V@zFGv?6sXC7vRj;1k%`odb2g7(hY
z)Hi7rKg)XxJ}R6i__)Tyn75Af{k<DD4?PQKK40JrZ+YP6NviO`>lb5%W9jn~WjxrN
z!MFfznsi|%7R9@=)$W6+_SH5loNY-*)(+s*22V$?+t(p{=SJp#_z$d;-OF-V7We3E
zDoiXHL<z^cL4DpycrShi8e5C`1c9&R)w_~rH!Y`kM;76q&U~gQZ-Zq5Z|p{Q2zT^*
zHNWgpGsZfuBJ0JM;6u{{TpzF<@`U_)+khaJiZK*!?M)_whvDQsqjABI62U~ifNO0T
zf#a2ADBec{hGp3Er=+V`w%$yXtGmh#eJboZ-f3dTm6DQ%ucL$j^bU5%!W14`RkH5;
za=6|=oC*#Fg58B5T&<9EE&po+23y@qclDm;w5<IwGqDgH4jtfgF4@6}=aJx6IfzUq
zUtls*BXCAgK1)-H!AX0r!m%Hx*xV17xVVZu$ZrpY9|B9!(O8pY&0EWIA_kGnv_x+E
zKA|Tq+;49GyPe)N-x1B&Xg~@!$=Gx2Hr_G01SLDhpvT)4Fk*dIneMA=*tlsFKE1sK
zN3@BNO>P&vBz_KTj%U+(<0zOmO%e3%)X?3?i2JMXiCM=v2=_Sg>~>x^+a$WlC-!c{
z-r~c|`i2q>e7_e`nxfHaLL%|JIp=zG1k^5m%a3Z2rD1Z~WE|%tc%25}SVt|KdsGio
zf7bCso@X(=Dcf;yk0O05RbYyjMkA@$p@q9G$q5;wj6>7WJERZ~-5rAa%MY_aF-~Nc
z_K|&zZV@uA)=XT@jy>EW!ds^=vYwSYXw5TslGYqg2}d;9n1A>9Ap4b=Ki&`)UULKe
zesh{AW{z7`=Fzne$FaYMfD5c-%S-Y=LA?W3$QD?yEKS7D+WqXTr!fDMTMKU+T}fk1
zFs61Z<ClyGxYTx4G@v0G-R_1`u)Yn7^l!0#VNP_=4u5*%U4YAs_JZYl6}<S?l0DsB
z%<FyCqF1^n=)%7VoM~+{v>dvH7k5vgti-9<Xs!t5;%`~aS#L}ea`i4^Z&{v35e@pN
zMg5=WU}5HKD4uF2xC<Y`gv(MeE`A!QztRJ%--7cqi=(xxcfw1fh4ggEI`pb^!U+n&
z<heAS=U-{S&|Ts*#v+>cQ?<fBZF0=6cN4Y-$J5dlH7w0jqR-_ssV&->?7rkOod+Vy
z|0iU52Os3h+s2~_W{Ik$rGy@T44jEIWHv*Uak`LAzxZ8+%#ArxcIQFd>Hr&Pxr5CZ
z_(GUhT}7^6tYD$oZ#K9}no^9;v-zzB_(#eTvfN^+V2TDc-!;Q2u6xT$XIf(6rf4`P
zb&_>gW-;F}4dA?29rkQ?hrbEC*<!DaWcz0Uy%pxlUKqS**NPJ%&j`@mi^Il}Ff?iB
zMQ@@{)9OdMY-sWt>~0Cbgh$8VPQE6+eZkm5A%8c%P4F5;XNny5%!TXOZ{gg0Rg!yF
z0NVR}!9m)BrrZ?v_!B>t4tc7IJxfe5N>VtdzDKnEFc%fKnd4LA3|93;5}sNm)B47*
zFy_%>_-s6cI@LF`f1%1a_1IoAm@}5X<(gp3u5FMsGmKTEKC9l^1(`AtWY%8JcWgb%
zZylS*_Ui59qCOUKs>ds#WWyMo7d`;@z4jpUrLo+jv7PL}IZaq};Sn=D9*_UdR<X|M
z<1lRh1Pq(D2Ib8xaO+fC+G1{pvJ0MqbaN71ZM($AeESLO4Uf{Xi>@dYrAo?^meG4J
zEAnr?%cj-Uagq6}Xw(TcoEOqu>a;|LGVKj8%bvG>>7Ky`8yE7Shg=A84tOaJ#ub)F
zxn%?6d8f+<S*f}S>hAGo<z=22kem)fVtp~YNfMupi)WYnb=mzG1xR&X3RO$z^Y?-#
zQ1Js(%oFmB=^F#cu;w?X<oOrwY|NmP+3wVk`hiK>J_J))bGmg*i%GsZ!qVggw(;eg
z{1>xG3OVA*vRkK6+g)u6dvF-rGu*I8M-vTS8*&|=UU8n;JE`Z#FYcoFF{m#81upT6
zXsWysyb3iG?$DgztW5z9xYo!v`gX(hL}zTT9?LpTCJJ?FJq=K^p>f=38qxihJH1<Q
zGQJpwWe-<Sth6j$8b1ZMZc>1m+uDH3RKsnLqiFvHo~tqmA)k*MIJ?XW`2I-+)Ldfl
zMeRlSQ2K^z7Pv2ZgMF#4X*<9DMFEbmX=cq9zgc+fVB9reDJFP5WAY!esmxOXpXmKz
z726iWx&29^`V}5no+Cn4hP1zT2fZAx$4}K1982wLcvCwHz9c2ExO-vHlpn<;dV^VN
zK@OYn<q3Q)*e%Ro^h4c^S@d9{Geo`Cp&>rknCv1E9GJ3>=@?~_RP=A|hNKupggZdr
z_-_zVKa^De<g*uZ8km^M0SH{^$<OfL!JoFWqYnRh*cvB=CzX1cf4m&mKjagCdugj^
zOa6XlNR<$AOcrH#>f@rH%G@^>HDN9@jU0jmpL554EXsS$#<b3)S=oaqW>FFTsH|cB
zOSS0r!UX(NE`h)NFEQ6gX$TTUe5GC{-JLrQC(QT)bNV*Xt}zNIduK1Olmr~5&hsk<
zl<_vVO<37UX|VXRpJv4kpdO1m>}jkQ=|3`J_Kh3q{>k+$Yg~iyH_Q?I#AWPJaWFQu
z=TOz(<+y5fS6OT9F_4DCr45Y&gLIlIg?NUAEc;*ng1PDc<1Z}#FF#<#fA|4jt|2S`
zfBu5^fB6dq2Zf1d%z|Ojx*&E=5kyX7A=c>`H|OMFI5w&g)|n|m>-i#1GFj-9N|f<q
zUi~b+nyA5OfgH2dEEL`QvA^v3nZH&?zj}h=y+RnaN)?Ra8@b2}TRD@b*4&@li{ZeT
zv!xI2FXNjdS6QD~(NmUsHj_Km^sY>AZ7^)tUc~YPEz24o+Q82%N^JU8buj7Q499|#
zK>Kw#Y~NA94G^0OuY(4{>w|g_;~4~&72CKw<8rv4m)+pg;%yLEqz609Ea3a1$NXp&
zJE3dP#nruZg2Go4V6fVQdvV7J4FA688m3JFy^*%?WXTu)fvOER-{2n?;u99)`@j4J
zi~sT${QkRs{{LCORjwhc|Bt^QwJT)J|64&RF|o@NlIYj4G_1)uNx#njV5!fQnQCsX
zz=YX}H5(FfzIv%h;oujRpLPLWSex*J?u@|g$}QJarf0D)ujZoOu)XxBQeX^frZClz
zlkl)}9sLlvt9K72W1J>~4?{<DMzV>dI47Sgmf1krneuGT*BKaV=f^#L<%FIS7oca%
zM%txqfv4NoVD{*IYQORWawMF&W#12A<Mtg;@?H~L*Zcy#4Y8<}c7>X6<smbij4sE-
zkvp*#EzKw3osOHJQei6iu?6l_mn(&A&cx?++bFb7hSg13NQVxM<C4q>9xl2r)XV)O
zTX7DLzGwogEHQez`aQI*JVG0+pA#qWXceMLG2de!{QY9j6%XG^c8BL+%CR^^E*D*f
z2<I(XNBpU=h5T>3(&z~eRM3+^S09f<*Tf|JRD6v$=n1guDyl|YC-^4BwqU@rZ2FcJ
zPwV;z;lG*bY>3J=+_i5QKCrHZ`HNGiU%Q#!?5$_%WsE_RB`Rw@M3;biY`l4#I&Pk&
z$1i1R`>~q>D{e04x(70i-g<UN$R$|G#?Y>n!$~$x3Ceuic-^^*Sg8A(Z+krs=c)%_
zUg3DUJ+y(f*r?#;nq9Pd;Bid<ZcOJBcaWy18;j6Pp+%$RAg=l}tdOuqmj(CPxS@y9
zcrH(-KYvoG${ptL>M4IHNMLV7$fKoqJpFLX!q%}#!aSZArQQg@uXCL7wv!BOv2=j#
z_l57+DM_RKGto!$K3jU?1q{!~;D<Jx=C?$paQl}MRb-7oU-?m(vZ;XWj;Y70jl$ou
zp_tlJ?!rjnT|TYOg$}$QO0gvi@XhQ*{32%tGo_E>^p<ZFVChfwM^@2fb6q%+B+iPa
z#!*pSG-tovf(Dw!!PTr{dM<qn)~6M+s6ngf?5`3^R8>Y#>$Rve>;Qc@_)l=^D51ZB
z8GehHLB(QmM9w?0a%><NbjxCByE9JPdWfcI#&BC)&Jmg>p+n4F1`g>|Q=W$6Bf_x0
zE|s?jZ~T{E$hQ`x@*nsAg7{10QLCh*Oc~a&qQG=4Nz!KWc4M*p_&K_fBkY$X_fc_P
z1KvKfnEMW;bUQ<t?){M$98`0-xKs0JOl2f>L`l-B@9Qc5v=xpHkE460TiMLw0hqTy
zh20#z8RFWtaQ*rSvi_SzZ>5J}?{^6-IIxH<Idh!$UQowp)hFqBc0cIsjbSTwcH>lb
zln$Lgfb+)ffWpfI$Z57U&YpP-lkyYk?{G787Th&fx}h}ShbgIz_ruwP+xhhu_OVCX
z#LJIv{KyREW?*Ks244N)iu*f*uwjc1UX<5B)o2^imd&CW?dRFkdO13zqKc`d)l4)0
zGAWxq2gy%Y;9cNV3M=>vab`O?i_7}l(db0lHGDXv4t@r~iC;Ojbw%*jbu?5rr{L`K
z580*9^YOIqIVuzK-cG$SwARTJau+WkS>e4fR6PeF@DJQPmWcC4TQW!g2SBZMSm61T
zf0F?8%VIswDi}i}FQih=PF=j(8jY5Yhf)27J2&ZrB+U?N@bu6M`ZfC>o7(+Dbi>ph
zf-;-I(b$8+UB405Z-AYbTu{0~@H`k5<1e*LS|NTGL#JhvXYNueTzr5sg7<NceWYpX
z9uDpA=E2aC09rU;0RGO{f<8CLvVCoeY-otkZoBJ6CZt9b-pT#;7=~|NSF%U9bI@wo
zK}<QQkKZ&Uu<zJr+%a8}zTTLFSJUF@(cw#+``!!~te=PC6KCP%!U^Die=#NR+Rv7Y
z)bW?=c>GmR#3s0o;velBj+=%XV2Sx*Tr<Fgx{}hs@KXh&{@3i!`xDq!Z$;{Nd`QAO
zlP(UpLf7&vXuSPTmN_{ZMrFrf&vG~TTRa>;aoVKu;wYT+iKdW%u#mO?TYoM7ul@%9
zw|)fur+x&xhOGNf{gB!j5+YO&d-c|dH$|R3Cft3s9^Nk}goB6E`Qu9*%0kwMg@pcZ
zQJVPQq7?Su4-Nm%hi-5U+4z4&X~3?Ki2s)L#P!OypRSIz^)gqnec(61_No3j+xP#J
zZBtfB+b%gb##Xyh+IEfmcN;!^if!*#Wt+H7H8#<0A5psXRr#T=6r0C;9+zK=^(t5V
zrEOa@aH8$H?b5cF;)dHElAmoeV(k^SI%kmWSEcxJ@z!1CMlQuRPcpPCp025}`PkP`
zUX;*e^Xj>EMWWOJo5Y&WHn-nfR#erU69OLcw(YZwY%gld+BS|HPHV!&Z5JmGxBdL)
zlZ{k_fvwBUD>mEC-m^KGzsaVwYM`yoTMJv)gOavKZx5@;ml|n1WBL-CY0ATGi+_mQ
z%G_wMsrV{x`#?0ncIc;Co4!UP+g$I@<u<pcRyfy*+lFik3)%d?4`t&2J(Ml~UB9jW
zS-)+rA>046egk%fME>WYNW6b7X&Vw1781QOJT%-SM0~M~K(-2A7q&WZ^=9AoAp<=_
sB*r?5X<QX^6g%Z5=DS!y_;uKNuhl`m>z9Xyd4>6gNNx*}3OD)x00Wh#x&QzG

literal 0
HcmV?d00001

diff --git a/human_aware_rl/ppo/trained_example/cramped_room/checkpoint-500.tune_metadata b/human_aware_rl/ppo/trained_example/cramped_room/checkpoint-500.tune_metadata
new file mode 100644
index 0000000000000000000000000000000000000000..1b1a3cedee628b3a79ddc3581036927903274e2a
GIT binary patch
literal 214
zcmXwzJrBV^7{`x#7i?yusVREJBDN-y?zdf@y62Kh&|dEdgT(5aSbY~C#RqVt*5Cj2
z=3O-zvmPM?&p8%qj|a<@&=odvY#xDJ1VE{rrQt-%B}ytILB%wkg%E|h%G%DREEZr@
zcF<n(uCRea)-@T6x#2dmu+y~-c>rOZQYvXZUca7sJS2qmor1YCnZSRhk=$0lYX(jz
hI2bO}*6QY+b)C6@3qN6DzOyY>cO>u^KGp4`{{b=NKEnV2

literal 0
HcmV?d00001

diff --git a/human_aware_rl/ppo/trained_example/cramped_room/config.pkl b/human_aware_rl/ppo/trained_example/cramped_room/config.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..b2a1fc0f5da7e00f65b2cbf789760ee444439b7c
GIT binary patch
literal 2544
zcmZuzOLyBu6qb{gw(k2)A5hAxDp1=Ev^<OBI<6BGJH|;`wbTq_X>5t;bw`p@hl&=?
zp-on>g%vEj>7Fe+w)_LwvH;G39SiP^<TS}4=R}e2ow;-Gcfb2h?vE#+sQMm7QC#0-
z4hblyZV0&q##DVLn&XfWCyZPeU(*lbcXP2pre~(6YE3d*xmjz^!!~VPKZKt7nOn6j
zuI<{SzB}7=ZDw{sEPqNnq3_RZ)w$;NQJ5VVW*6G4wb@xBV!_UalGC@LX=<W93%k~0
zxi8|>_H3^|N;RLrZdIP*#V%R)`4ZzH?9mkc0B)75NtZ^3MM8Up!QO^4A=3@}CUG&+
z2H(EM<Qe@q?&7}V_;Eyfoc3MjMZ{rVKeAwd<C~lWOB^Kv*b(D!K-;O0;Vy^6!FsvX
zZT#}rqkm;a4kw{uF*9};9BRt)`kb1?aO?mM*ULYCD}KJyx8v?UbzKTa>g7KkA6fae
z{#buXgrsiQq;M3ix7B}s`uetnV9+C>ML9Nx4n?uYR`tEO7|o6!hQ#n$uLsBL<&&3x
z`~7tFg6!jxli8oHmw(QV&EqDKN(v>MsFxpQ9?^MGV?T&PIH{#=2pKcsl(t(xj9xaN
zPD~@)_lWHw3it`9HB&z(d|t%;V3;2G1c7Hd@T}T_GpeSn=fFDfoLa9On!Att&^(!<
z$C$)~(LUmPc1<GZ_<iidfI0BIWW|k2y9qDEvYE^q*xdJIuQCe1n7~V_E=qLGtiljp
zPT*Wh7KBc8*rP6kbN~!we!xt^ecy%iGAw7y0_k7_-CjxHRaHNNVOz76YOPUgE|8ff
zY0WK=cI^hdmJP$y%=|QI%~Z9;matdaQ*dE4Cv#}q>?&y6C2(>VK3W7H3zcTICW96;
zGK)htIZEgdLpd)(Zt`5zlsJ~vBQ&E?nMmMuRo{UdET7vez6WmzLW>D?k!ho4mlC+F
zrZW;z9uZuN9hJQf;mtKU0L#>l2*oK`Mozr7Ci!Rhv0wt}`_u{H?GC&nmyXOPasuA%
zz<c^WJaA+UY-1@7;7SMHm!*W*5aRlhSZSd0Dpn!{#*LlGCP)b!$gIhe2~<@5C_1rK
zl$-0_>QN)|Ib5xm|3oMLNPM>}8lh0F-y5wG;0VbJEE}fQBs5*)359DNxP}z2BZV~u
zxjU_{Z^dsG%MHjYL_zR|!Aw(5te6(EfrlSh=VD1}44##uB`~W>9EHtyYy$@facme?
z>{5?Vtd9}y6l~W{n^EX8iw<m`<2qkTV$_?+U(Jlw)Yg)b@{p<zhBkb!XZK+)ftD&$
zNQK|DIee(MvHap<h@w*LGE`EB4U1F05sSeJi*-Aij=ROp`E30R0>6mI@YoBBChJjb
z8nh)7!^k%*8b-DO^Q+d`HEgSpEh%MYRx&KC%3cfcSyy_YvLP%s{?18?Y>=J|yXy(u
zNQ+^?NUL>0ZE>MU9Ya=ya1(`0G}yGm0OucWrNV-FA+iF1kD9W9nO~CNc6RNsI|1Xi
z6d0))9bLmnt=1gbs2k)OQlAlT`AUfldRIz?QmJ&YR3a?I_X+$E?_O%Zgzrnvx*a}F
zL81djQ04d#2;P)pHaQCs#877ViGB(#TU*5GkYuZX$}qx`fzwUEP$dhE7)y0=y2MRy
zn_$YNjUiUi5H?`)&fK(2P_Va^g4p8%{vsP4yPI!|=O{)V`trVsm>bB9Ehz%U$PJ1j
zDf|nzEK3NJ!5I~E`l(^sj#I!5j+0NVjPzp95IT^DCH4RO2IiaxN7ZsTjbqxlR;?a0
zbLN<%xVa>kQ|{)zAG@pkkegS)ZK6^N(VjzO4c}j4W<#h+MTiM_UxEJ+0{tW|*i7z%
ztT257Se{J0#}UMAjQB$cC5p0|bhIMNdjPS#CD%a`S3Vw92!=3_fFKQ?l)tQ9s%}%p
z_T=Es`STGB+puB~(v59Q(IX^Jh>QR9;ET1~Y)0@|8}25MB=EUxD<RbL{7j$;fqMzu
V4<Eo6-B_|6EAILsgD+R(!hZ?BFf9N8

literal 0
HcmV?d00001

diff --git a/human_aware_rl/ppo/trained_example/cramped_room/progress.csv b/human_aware_rl/ppo/trained_example/cramped_room/progress.csv
new file mode 100644
index 00000000..4592a9c8
--- /dev/null
+++ b/human_aware_rl/ppo/trained_example/cramped_room/progress.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c8e4335a85e9b0dab4e306c3cc0f7abfd151f09c2a5229e97a88d2c1036d4b9
+size 1787766
diff --git a/human_aware_rl/ppo/trained_example/cramped_room/result.json b/human_aware_rl/ppo/trained_example/cramped_room/result.json
new file mode 100644
index 00000000..f37fb6d8
--- /dev/null
+++ b/human_aware_rl/ppo/trained_example/cramped_room/result.json
@@ -0,0 +1,500 @@
+{"episode_reward_max": 25.0, "episode_reward_min": 0.0, "episode_reward_mean": 8.65625, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 14.0}, "policy_reward_mean": {"ppo": 4.328125}, "custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 8.65625, "shaped_reward_min": 0, "shaped_reward_max": 25, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.46875, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.34375, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.78125, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.75, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 3.84375, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 5.0, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.28125, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.28125, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 1.03125, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 3, "potting_onion_agent_1_mean": 0.84375, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.96875, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 2.5625, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.15625, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.125, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.125, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.03125, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.9375, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 0.4375, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.28125, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 2, "soup_delivery_agent_0_mean": 0.125, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.0625, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.3125, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.125, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 1.03125, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 3, "optimal_onion_potting_agent_1_mean": 0.84375, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.03125, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 3, "viable_onion_potting_agent_1_mean": 0.84375, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [3.0, 6.0, 8.0, 6.0, 3.0, 9.0, 9.0, 9.0, 17.0, 25.0, 8.0, 6.0, 6.0, 3.0, 3.0, 17.0, 14.0, 17.0, 6.0, 3.0, 14.0, 11.0, 9.0, 3.0, 8.0, 11.0, 11.0, 14.0, 0.0, 3.0, 9.0, 6.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 0.0, 0.0, 6.0, 0.0, 8.0, 3.0, 3.0, 3.0, 0.0, 3.0, 6.0, 6.0, 3.0, 0.0, 9.0, 14.0, 3.0, 14.0, 11.0, 3.0, 5.0, 3.0, 3.0, 0.0, 6.0, 3.0, 0.0, 0.0, 3.0, 14.0, 3.0, 14.0, 0.0, 11.0, 6.0, 6.0, 0.0, 3.0, 0.0, 9.0, 5.0, 3.0, 8.0, 3.0, 6.0, 3.0, 0.0, 3.0, 5.0, 3.0, 8.0, 8.0, 3.0, 11.0, 3.0, 0.0, 0.0, 0.0, 3.0, 3.0, 6.0, 0.0, 6.0]}, "sampler_perf": {"mean_env_wait_ms": 0.45519290086990427, "mean_processing_ms": 0.16432746798999048, "mean_inference_ms": 0.7360864547945244}, "off_policy_estimator": {}, "info": {"num_steps_trained": 24000, "num_steps_sampled": 12800, "sample_time_ms": 17464.662, "load_time_ms": 141.686, "grad_time_ms": 6292.799, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.20000000298023224, "cur_lr": 0.0010000000474974513, "total_loss": -0.3615521490573883, "policy_loss": -0.003929345868527889, "vf_loss": 0.7481115460395813, "vf_explained_var": 0.002059757709503174, "kl": 0.00042938394472002983, "entropy": 1.7889174222946167, "entropy_coeff": 0.20000000298023224, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 12800, "episodes_total": 32, "training_iteration": 1, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-13-21", "timestamp": 1660241601, "time_this_iter_s": 23.966287851333618, "time_total_s": 23.966287851333618, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 23.966287851333618, "timesteps_since_restore": 12800, "iterations_since_restore": 1, "perf": {"cpu_util_percent": 43.42857142857144, "ram_util_percent": 57.03714285714286}}
+{"episode_reward_max": 25.0, "episode_reward_min": 0.0, "episode_reward_mean": 9.140625, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 14.0}, "policy_reward_mean": {"ppo": 4.5703125}, "custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 9.140625, "shaped_reward_min": 0, "shaped_reward_max": 25, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.734375, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.328125, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.65625, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.46875, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 4.109375, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 4.90625, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.65625, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.515625, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.03125, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 3, "potting_onion_agent_1_mean": 0.90625, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.90625, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.4375, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.15625, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.171875, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.4375, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.03125, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.953125, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.859375, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 0.53125, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 0.375, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.171875, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.046875, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.34375, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.21875, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.03125, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 3, "optimal_onion_potting_agent_1_mean": 0.90625, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.03125, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 3, "viable_onion_potting_agent_1_mean": 0.90625, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [0.0, 6.0, 11.0, 20.0, 3.0, 6.0, 14.0, 6.0, 9.0, 3.0, 3.0, 6.0, 9.0, 12.0, 12.0, 19.0, 8.0, 12.0, 8.0, 14.0, 3.0, 3.0, 17.0, 3.0, 20.0, 11.0, 3.0, 14.0, 22.0, 3.0, 14.0, 14.0, 3.0, 6.0, 8.0, 6.0, 3.0, 9.0, 9.0, 9.0, 17.0, 25.0, 8.0, 6.0, 6.0, 3.0, 3.0, 17.0, 14.0, 17.0, 6.0, 3.0, 14.0, 11.0, 9.0, 3.0, 8.0, 11.0, 11.0, 14.0, 0.0, 3.0, 9.0, 6.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [0.0, 0.0, 3.0, 3.0, 3.0, 8.0, 6.0, 14.0, 0.0, 3.0, 0.0, 6.0, 11.0, 3.0, 0.0, 6.0, 3.0, 6.0, 3.0, 0.0, 0.0, 3.0, 3.0, 3.0, 0.0, 9.0, 12.0, 0.0, 3.0, 9.0, 14.0, 5.0, 8.0, 0.0, 6.0, 6.0, 3.0, 5.0, 11.0, 3.0, 0.0, 3.0, 3.0, 0.0, 14.0, 3.0, 3.0, 0.0, 14.0, 6.0, 6.0, 5.0, 0.0, 3.0, 0.0, 14.0, 13.0, 9.0, 0.0, 3.0, 11.0, 3.0, 11.0, 3.0, 3.0, 0.0, 0.0, 6.0, 0.0, 8.0, 3.0, 3.0, 3.0, 0.0, 3.0, 6.0, 6.0, 3.0, 0.0, 9.0, 14.0, 3.0, 14.0, 11.0, 3.0, 5.0, 3.0, 3.0, 0.0, 6.0, 3.0, 0.0, 0.0, 3.0, 14.0, 3.0, 14.0, 0.0, 11.0, 6.0, 6.0, 0.0, 3.0, 0.0, 9.0, 5.0, 3.0, 8.0, 3.0, 6.0, 3.0, 0.0, 3.0, 5.0, 3.0, 8.0, 8.0, 3.0, 11.0, 3.0, 0.0, 0.0, 0.0, 3.0, 3.0, 6.0, 0.0, 6.0]}, "sampler_perf": {"mean_env_wait_ms": 0.45293878883424954, "mean_processing_ms": 0.1625872490925028, "mean_inference_ms": 0.7353549498376587}, "off_policy_estimator": {}, "info": {"num_steps_trained": 48000, "num_steps_sampled": 25600, "sample_time_ms": 17329.738, "load_time_ms": 90.796, "grad_time_ms": 6126.382, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.10000000149011612, "cur_lr": 0.0010000000474974513, "total_loss": -0.35112297534942627, "policy_loss": -0.008805765770375729, "vf_loss": 0.7840461730957031, "vf_explained_var": -0.002521991729736328, "kl": 0.00048407851136289537, "entropy": 1.7883315086364746, "entropy_coeff": 0.19148799777030945, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 25600, "episodes_total": 64, "training_iteration": 2, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-13-45", "timestamp": 1660241625, "time_this_iter_s": 23.218619108200073, "time_total_s": 47.18490695953369, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 47.18490695953369, "timesteps_since_restore": 25600, "iterations_since_restore": 2, "perf": {"cpu_util_percent": 37.300000000000004, "ram_util_percent": 57.44117647058823}}
+{"episode_reward_max": 25.0, "episode_reward_min": 0.0, "episode_reward_mean": 9.052083333333334, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 16.0}, "policy_reward_mean": {"ppo": 4.526041666666667}, "custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 9.052083333333334, "shaped_reward_min": 0, "shaped_reward_max": 25, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.791666666666667, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 6.104166666666667, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.8854166666666665, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 4.260416666666667, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 4.208333333333333, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 4.697916666666667, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.5208333333333333, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.5104166666666667, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.0416666666666667, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 3, "potting_onion_agent_1_mean": 0.875, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.8229166666666665, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.46875, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.125, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.17708333333333334, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.3333333333333335, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.0416666666666665, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.9895833333333334, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.8125, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 0.4479166666666667, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 0.3854166666666667, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.14583333333333334, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.07291666666666667, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.2708333333333333, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.20833333333333334, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.0416666666666667, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 3, "optimal_onion_potting_agent_1_mean": 0.875, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.0416666666666667, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 3, "viable_onion_potting_agent_1_mean": 0.875, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [9.0, 17.0, 9.0, 8.0, 19.0, 3.0, 9.0, 14.0, 11.0, 22.0, 6.0, 3.0, 3.0, 6.0, 9.0, 3.0, 11.0, 11.0, 3.0, 3.0, 11.0, 9.0, 9.0, 3.0, 11.0, 6.0, 11.0, 6.0, 16.0, 8.0, 6.0, 9.0, 3.0, 6.0, 8.0, 6.0, 3.0, 9.0, 9.0, 9.0, 17.0, 25.0, 8.0, 6.0, 6.0, 3.0, 3.0, 17.0, 14.0, 17.0, 6.0, 3.0, 14.0, 11.0, 9.0, 3.0, 8.0, 11.0, 11.0, 14.0, 0.0, 3.0, 9.0, 6.0, 0.0, 6.0, 11.0, 20.0, 3.0, 6.0, 14.0, 6.0, 9.0, 3.0, 3.0, 6.0, 9.0, 12.0, 12.0, 19.0, 8.0, 12.0, 8.0, 14.0, 3.0, 3.0, 17.0, 3.0, 20.0, 11.0, 3.0, 14.0, 22.0, 3.0, 14.0, 14.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 3.0, 3.0, 14.0, 3.0, 6.0, 3.0, 5.0, 3.0, 16.0, 3.0, 0.0, 6.0, 3.0, 5.0, 9.0, 3.0, 8.0, 14.0, 8.0, 0.0, 6.0, 3.0, 0.0, 0.0, 3.0, 0.0, 6.0, 9.0, 0.0, 0.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0, 0.0, 0.0, 3.0, 8.0, 3.0, 9.0, 0.0, 9.0, 0.0, 3.0, 0.0, 3.0, 8.0, 3.0, 3.0, 11.0, 0.0, 3.0, 3.0, 8.0, 8.0, 5.0, 3.0, 6.0, 0.0, 3.0, 6.0, 3.0, 0.0, 0.0, 6.0, 0.0, 8.0, 3.0, 3.0, 3.0, 0.0, 3.0, 6.0, 6.0, 3.0, 0.0, 9.0, 14.0, 3.0, 14.0, 11.0, 3.0, 5.0, 3.0, 3.0, 0.0, 6.0, 3.0, 0.0, 0.0, 3.0, 14.0, 3.0, 14.0, 0.0, 11.0, 6.0, 6.0, 0.0, 3.0, 0.0, 9.0, 5.0, 3.0, 8.0, 3.0, 6.0, 3.0, 0.0, 3.0, 5.0, 3.0, 8.0, 8.0, 3.0, 11.0, 3.0, 0.0, 0.0, 0.0, 3.0, 3.0, 6.0, 0.0, 6.0, 0.0, 0.0, 3.0, 3.0, 3.0, 8.0, 6.0, 14.0, 0.0, 3.0, 0.0, 6.0, 11.0, 3.0, 0.0, 6.0, 3.0, 6.0, 3.0, 0.0, 0.0, 3.0, 3.0, 3.0, 0.0, 9.0, 12.0, 0.0, 3.0, 9.0, 14.0, 5.0, 8.0, 0.0, 6.0, 6.0, 3.0, 5.0, 11.0, 3.0, 0.0, 3.0, 3.0, 0.0, 14.0, 3.0, 3.0, 0.0, 14.0, 6.0, 6.0, 5.0, 0.0, 3.0, 0.0, 14.0, 13.0, 9.0, 0.0, 3.0, 11.0, 3.0, 11.0, 3.0]}, "sampler_perf": {"mean_env_wait_ms": 0.45189034776232173, "mean_processing_ms": 0.1615299805648739, "mean_inference_ms": 0.7382304408765018}, "off_policy_estimator": {}, "info": {"num_steps_trained": 72000, "num_steps_sampled": 38400, "sample_time_ms": 17410.737, "load_time_ms": 73.556, "grad_time_ms": 6485.631, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.05000000074505806, "cur_lr": 0.0010000000474974513, "total_loss": -0.3324081599712372, "policy_loss": -0.005397057626396418, "vf_loss": 0.7086341977119446, "vf_explained_var": -0.000792384147644043, "kl": 0.0004734609683509916, "entropy": 1.7876968383789062, "entropy_coeff": 0.18297599256038666, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 38400, "episodes_total": 96, "training_iteration": 3, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-14-10", "timestamp": 1660241650, "time_this_iter_s": 24.84310221672058, "time_total_s": 72.02800917625427, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 72.02800917625427, "timesteps_since_restore": 38400, "iterations_since_restore": 3, "perf": {"cpu_util_percent": 38.59428571428571, "ram_util_percent": 57.505714285714284}}
+{"episode_reward_max": 24.0, "episode_reward_min": 0.0, "episode_reward_mean": 9.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 18.0}, "policy_reward_mean": {"ppo": 4.775}, "custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 9.55, "shaped_reward_min": 0, "shaped_reward_max": 24, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.79, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.74, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.83, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 3.88, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 4.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 4.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.51, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.58, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.09, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.85, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.64, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.36, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.12, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.94, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.68, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 0.36, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 0.53, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.12, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.13, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.17, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.27, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.09, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.85, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.09, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.85, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [8.0, 6.0, 8.0, 8.0, 6.0, 3.0, 24.0, 6.0, 6.0, 6.0, 19.0, 11.0, 14.0, 8.0, 9.0, 3.0, 8.0, 14.0, 14.0, 9.0, 17.0, 3.0, 19.0, 19.0, 17.0, 12.0, 6.0, 14.0, 11.0, 14.0, 14.0, 9.0, 0.0, 3.0, 9.0, 6.0, 0.0, 6.0, 11.0, 20.0, 3.0, 6.0, 14.0, 6.0, 9.0, 3.0, 3.0, 6.0, 9.0, 12.0, 12.0, 19.0, 8.0, 12.0, 8.0, 14.0, 3.0, 3.0, 17.0, 3.0, 20.0, 11.0, 3.0, 14.0, 22.0, 3.0, 14.0, 14.0, 9.0, 17.0, 9.0, 8.0, 19.0, 3.0, 9.0, 14.0, 11.0, 22.0, 6.0, 3.0, 3.0, 6.0, 9.0, 3.0, 11.0, 11.0, 3.0, 3.0, 11.0, 9.0, 9.0, 3.0, 11.0, 6.0, 11.0, 6.0, 16.0, 8.0, 6.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [5.0, 3.0, 3.0, 3.0, 3.0, 5.0, 8.0, 0.0, 0.0, 6.0, 0.0, 3.0, 6.0, 18.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 11.0, 8.0, 3.0, 8.0, 6.0, 8.0, 8.0, 0.0, 6.0, 3.0, 0.0, 3.0, 0.0, 8.0, 14.0, 0.0, 9.0, 5.0, 9.0, 0.0, 6.0, 11.0, 3.0, 0.0, 11.0, 8.0, 6.0, 13.0, 12.0, 5.0, 6.0, 6.0, 6.0, 0.0, 3.0, 11.0, 3.0, 8.0, 6.0, 8.0, 5.0, 9.0, 6.0, 3.0, 0.0, 0.0, 0.0, 3.0, 3.0, 6.0, 0.0, 6.0, 0.0, 0.0, 3.0, 3.0, 3.0, 8.0, 6.0, 14.0, 0.0, 3.0, 0.0, 6.0, 11.0, 3.0, 0.0, 6.0, 3.0, 6.0, 3.0, 0.0, 0.0, 3.0, 3.0, 3.0, 0.0, 9.0, 12.0, 0.0, 3.0, 9.0, 14.0, 5.0, 8.0, 0.0, 6.0, 6.0, 3.0, 5.0, 11.0, 3.0, 0.0, 3.0, 3.0, 0.0, 14.0, 3.0, 3.0, 0.0, 14.0, 6.0, 6.0, 5.0, 0.0, 3.0, 0.0, 14.0, 13.0, 9.0, 0.0, 3.0, 11.0, 3.0, 11.0, 3.0, 6.0, 3.0, 3.0, 14.0, 3.0, 6.0, 3.0, 5.0, 3.0, 16.0, 3.0, 0.0, 6.0, 3.0, 5.0, 9.0, 3.0, 8.0, 14.0, 8.0, 0.0, 6.0, 3.0, 0.0, 0.0, 3.0, 0.0, 6.0, 9.0, 0.0, 0.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0, 0.0, 0.0, 3.0, 8.0, 3.0, 9.0, 0.0, 9.0, 0.0, 3.0, 0.0, 3.0, 8.0, 3.0, 3.0, 11.0, 0.0, 3.0, 3.0, 8.0, 8.0, 5.0, 3.0, 6.0, 0.0, 3.0, 6.0]}, "sampler_perf": {"mean_env_wait_ms": 0.45063714229107177, "mean_processing_ms": 0.15983031380236057, "mean_inference_ms": 0.739669952549497}, "off_policy_estimator": {}, "info": {"num_steps_trained": 96000, "num_steps_sampled": 51200, "sample_time_ms": 17369.146, "load_time_ms": 64.562, "grad_time_ms": 6646.22, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.02500000037252903, "cur_lr": 0.0010000000474974513, "total_loss": -0.3157036006450653, "policy_loss": -0.004088650923222303, "vf_loss": 0.8062646985054016, "vf_explained_var": 0.0032039880752563477, "kl": 0.0005627681966871023, "entropy": 1.7866708040237427, "entropy_coeff": 0.17446400225162506, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 51200, "episodes_total": 128, "training_iteration": 4, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-14-35", "timestamp": 1660241675, "time_this_iter_s": 24.43727397918701, "time_total_s": 96.46528315544128, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 96.46528315544128, "timesteps_since_restore": 51200, "iterations_since_restore": 4, "perf": {"cpu_util_percent": 39.84571428571428, "ram_util_percent": 57.64285714285714}}
+{"episode_reward_max": 54.0, "episode_reward_min": 3.0, "episode_reward_mean": 10.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 28.0}, "policy_reward_mean": {"ppo": 5.42}, "custom_metrics": {"sparse_reward_mean": 0.2, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 10.44, "shaped_reward_min": 3, "shaped_reward_max": 25, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.52, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.69, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.68, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 3.83, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 3.91, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 4.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.44, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.05, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.94, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.7, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 2.33, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.15, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.77, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.92, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.63, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 0.43, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 0.51, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.14, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.16, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.18, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.22, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 1.05, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.94, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.05, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.94, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [16.0, 9.0, 8.0, 14.0, 9.0, 9.0, 6.0, 11.0, 19.0, 9.0, 12.0, 8.0, 17.0, 3.0, 11.0, 16.0, 11.0, 6.0, 22.0, 6.0, 3.0, 14.0, 11.0, 20.0, 25.0, 54.0, 14.0, 16.0, 3.0, 14.0, 3.0, 3.0, 22.0, 3.0, 14.0, 14.0, 9.0, 17.0, 9.0, 8.0, 19.0, 3.0, 9.0, 14.0, 11.0, 22.0, 6.0, 3.0, 3.0, 6.0, 9.0, 3.0, 11.0, 11.0, 3.0, 3.0, 11.0, 9.0, 9.0, 3.0, 11.0, 6.0, 11.0, 6.0, 16.0, 8.0, 6.0, 9.0, 8.0, 6.0, 8.0, 8.0, 6.0, 3.0, 24.0, 6.0, 6.0, 6.0, 19.0, 11.0, 14.0, 8.0, 9.0, 3.0, 8.0, 14.0, 14.0, 9.0, 17.0, 3.0, 19.0, 19.0, 17.0, 12.0, 6.0, 14.0, 11.0, 14.0, 14.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [11.0, 5.0, 6.0, 3.0, 0.0, 8.0, 8.0, 6.0, 6.0, 3.0, 0.0, 9.0, 0.0, 6.0, 5.0, 6.0, 11.0, 8.0, 0.0, 9.0, 12.0, 0.0, 8.0, 0.0, 5.0, 12.0, 0.0, 3.0, 6.0, 5.0, 5.0, 11.0, 5.0, 6.0, 3.0, 3.0, 0.0, 22.0, 3.0, 3.0, 3.0, 0.0, 11.0, 3.0, 5.0, 6.0, 14.0, 6.0, 16.0, 9.0, 26.0, 28.0, 11.0, 3.0, 8.0, 8.0, 3.0, 0.0, 0.0, 14.0, 0.0, 3.0, 3.0, 0.0, 13.0, 9.0, 0.0, 3.0, 11.0, 3.0, 11.0, 3.0, 6.0, 3.0, 3.0, 14.0, 3.0, 6.0, 3.0, 5.0, 3.0, 16.0, 3.0, 0.0, 6.0, 3.0, 5.0, 9.0, 3.0, 8.0, 14.0, 8.0, 0.0, 6.0, 3.0, 0.0, 0.0, 3.0, 0.0, 6.0, 9.0, 0.0, 0.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0, 0.0, 0.0, 3.0, 8.0, 3.0, 9.0, 0.0, 9.0, 0.0, 3.0, 0.0, 3.0, 8.0, 3.0, 3.0, 11.0, 0.0, 3.0, 3.0, 8.0, 8.0, 5.0, 3.0, 6.0, 0.0, 3.0, 6.0, 5.0, 3.0, 3.0, 3.0, 3.0, 5.0, 8.0, 0.0, 0.0, 6.0, 0.0, 3.0, 6.0, 18.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 11.0, 8.0, 3.0, 8.0, 6.0, 8.0, 8.0, 0.0, 6.0, 3.0, 0.0, 3.0, 0.0, 8.0, 14.0, 0.0, 9.0, 5.0, 9.0, 0.0, 6.0, 11.0, 3.0, 0.0, 11.0, 8.0, 6.0, 13.0, 12.0, 5.0, 6.0, 6.0, 6.0, 0.0, 3.0, 11.0, 3.0, 8.0, 6.0, 8.0, 5.0, 9.0, 6.0, 3.0]}, "sampler_perf": {"mean_env_wait_ms": 0.45066530826567375, "mean_processing_ms": 0.15893004682590756, "mean_inference_ms": 0.7420671329840245}, "off_policy_estimator": {}, "info": {"num_steps_trained": 120000, "num_steps_sampled": 64000, "sample_time_ms": 17388.622, "load_time_ms": 59.359, "grad_time_ms": 6772.673, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.012500000186264515, "cur_lr": 0.0010000000474974513, "total_loss": -0.30251750349998474, "policy_loss": -0.006208862643688917, "vf_loss": 1.4635206460952759, "vf_explained_var": 0.0046030678786337376, "kl": 0.0005594257963821292, "entropy": 1.7864326238632202, "entropy_coeff": 0.16595199704170227, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 64000, "episodes_total": 160, "training_iteration": 5, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-14-59", "timestamp": 1660241699, "time_this_iter_s": 24.809880018234253, "time_total_s": 121.27516317367554, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 121.27516317367554, "timesteps_since_restore": 64000, "iterations_since_restore": 5, "perf": {"cpu_util_percent": 39.71142857142857, "ram_util_percent": 57.60285714285714}}
+{"episode_reward_max": 68.0, "episode_reward_min": 0.0, "episode_reward_mean": 12.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 37.0}, "policy_reward_mean": {"ppo": 6.195}, "custom_metrics": {"sparse_reward_mean": 0.4, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 11.59, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.22, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 10, "onion_pickup_agent_1_mean": 5.75, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.39, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.9, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 3.53, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 4.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.43, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.59, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.1, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.99, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.59, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 2.07, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.21, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.74, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 0.51, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 3, "soup_pickup_agent_1_mean": 0.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.17, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.22, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.23, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.2, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 1.1, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.99, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.1, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.99, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [14.0, 6.0, 20.0, 17.0, 14.0, 11.0, 6.0, 8.0, 6.0, 0.0, 11.0, 3.0, 22.0, 11.0, 19.0, 9.0, 20.0, 23.0, 6.0, 11.0, 11.0, 22.0, 19.0, 11.0, 11.0, 17.0, 14.0, 6.0, 68.0, 20.0, 9.0, 8.0, 16.0, 8.0, 6.0, 9.0, 8.0, 6.0, 8.0, 8.0, 6.0, 3.0, 24.0, 6.0, 6.0, 6.0, 19.0, 11.0, 14.0, 8.0, 9.0, 3.0, 8.0, 14.0, 14.0, 9.0, 17.0, 3.0, 19.0, 19.0, 17.0, 12.0, 6.0, 14.0, 11.0, 14.0, 14.0, 9.0, 16.0, 9.0, 8.0, 14.0, 9.0, 9.0, 6.0, 11.0, 19.0, 9.0, 12.0, 8.0, 17.0, 3.0, 11.0, 16.0, 11.0, 6.0, 22.0, 6.0, 3.0, 14.0, 11.0, 20.0, 25.0, 54.0, 14.0, 16.0, 3.0, 14.0, 3.0, 3.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [8.0, 6.0, 6.0, 0.0, 14.0, 6.0, 14.0, 3.0, 14.0, 0.0, 8.0, 3.0, 0.0, 6.0, 8.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0, 11.0, 3.0, 0.0, 8.0, 14.0, 3.0, 8.0, 6.0, 13.0, 9.0, 0.0, 17.0, 3.0, 12.0, 11.0, 0.0, 6.0, 11.0, 0.0, 8.0, 3.0, 6.0, 16.0, 9.0, 10.0, 5.0, 6.0, 5.0, 6.0, 9.0, 8.0, 3.0, 11.0, 6.0, 0.0, 31.0, 37.0, 11.0, 9.0, 0.0, 9.0, 0.0, 8.0, 8.0, 8.0, 5.0, 3.0, 6.0, 0.0, 3.0, 6.0, 5.0, 3.0, 3.0, 3.0, 3.0, 5.0, 8.0, 0.0, 0.0, 6.0, 0.0, 3.0, 6.0, 18.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 11.0, 8.0, 3.0, 8.0, 6.0, 8.0, 8.0, 0.0, 6.0, 3.0, 0.0, 3.0, 0.0, 8.0, 14.0, 0.0, 9.0, 5.0, 9.0, 0.0, 6.0, 11.0, 3.0, 0.0, 11.0, 8.0, 6.0, 13.0, 12.0, 5.0, 6.0, 6.0, 6.0, 0.0, 3.0, 11.0, 3.0, 8.0, 6.0, 8.0, 5.0, 9.0, 6.0, 3.0, 11.0, 5.0, 6.0, 3.0, 0.0, 8.0, 8.0, 6.0, 6.0, 3.0, 0.0, 9.0, 0.0, 6.0, 5.0, 6.0, 11.0, 8.0, 0.0, 9.0, 12.0, 0.0, 8.0, 0.0, 5.0, 12.0, 0.0, 3.0, 6.0, 5.0, 5.0, 11.0, 5.0, 6.0, 3.0, 3.0, 0.0, 22.0, 3.0, 3.0, 3.0, 0.0, 11.0, 3.0, 5.0, 6.0, 14.0, 6.0, 16.0, 9.0, 26.0, 28.0, 11.0, 3.0, 8.0, 8.0, 3.0, 0.0, 0.0, 14.0, 0.0, 3.0, 3.0, 0.0]}, "sampler_perf": {"mean_env_wait_ms": 0.4527646689746759, "mean_processing_ms": 0.15894443082147025, "mean_inference_ms": 0.7459920431247151}, "off_policy_estimator": {}, "info": {"num_steps_trained": 144000, "num_steps_sampled": 76800, "sample_time_ms": 17631.781, "load_time_ms": 55.878, "grad_time_ms": 7055.375, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0062500000931322575, "cur_lr": 0.0010000000474974513, "total_loss": -0.285332590341568, "policy_loss": -0.004330330062657595, "vf_loss": 1.753544807434082, "vf_explained_var": 0.007292529102414846, "kl": 0.0005500561674125493, "entropy": 1.7859567403793335, "entropy_coeff": 0.15744000673294067, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 76800, "episodes_total": 192, "training_iteration": 6, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-15-27", "timestamp": 1660241727, "time_this_iter_s": 27.381940841674805, "time_total_s": 148.65710401535034, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 148.65710401535034, "timesteps_since_restore": 76800, "iterations_since_restore": 6, "perf": {"cpu_util_percent": 46.235897435897435, "ram_util_percent": 57.91025641025641}}
+{"episode_reward_max": 68.0, "episode_reward_min": 0.0, "episode_reward_mean": 13.96, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 37.0}, "policy_reward_mean": {"ppo": 6.98}, "custom_metrics": {"sparse_reward_mean": 0.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 12.36, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.26, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.86, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.13, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 8, "useful_onion_pickup_agent_1_mean": 3.81, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 3.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 4.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.74, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.72, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.13, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.16, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.81, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.17, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.22, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.26, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.85, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.61, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 0.6, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 3, "soup_pickup_agent_1_mean": 0.59, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.23, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.17, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.31, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.3, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 1.13, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.16, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.13, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.16, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [6.0, 14.0, 19.0, 6.0, 6.0, 17.0, 14.0, 3.0, 25.0, 11.0, 14.0, 9.0, 17.0, 14.0, 17.0, 14.0, 25.0, 17.0, 6.0, 19.0, 14.0, 3.0, 6.0, 3.0, 6.0, 63.0, 9.0, 28.0, 14.0, 8.0, 57.0, 9.0, 11.0, 14.0, 14.0, 9.0, 16.0, 9.0, 8.0, 14.0, 9.0, 9.0, 6.0, 11.0, 19.0, 9.0, 12.0, 8.0, 17.0, 3.0, 11.0, 16.0, 11.0, 6.0, 22.0, 6.0, 3.0, 14.0, 11.0, 20.0, 25.0, 54.0, 14.0, 16.0, 3.0, 14.0, 3.0, 3.0, 14.0, 6.0, 20.0, 17.0, 14.0, 11.0, 6.0, 8.0, 6.0, 0.0, 11.0, 3.0, 22.0, 11.0, 19.0, 9.0, 20.0, 23.0, 6.0, 11.0, 11.0, 22.0, 19.0, 11.0, 11.0, 17.0, 14.0, 6.0, 68.0, 20.0, 9.0, 8.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 3.0, 6.0, 8.0, 10.0, 9.0, 0.0, 6.0, 3.0, 3.0, 9.0, 8.0, 6.0, 8.0, 0.0, 3.0, 17.0, 8.0, 8.0, 3.0, 5.0, 9.0, 6.0, 3.0, 3.0, 14.0, 11.0, 3.0, 14.0, 3.0, 6.0, 8.0, 9.0, 16.0, 6.0, 11.0, 0.0, 6.0, 14.0, 5.0, 6.0, 8.0, 3.0, 0.0, 6.0, 0.0, 0.0, 3.0, 3.0, 3.0, 29.0, 34.0, 6.0, 3.0, 6.0, 22.0, 8.0, 6.0, 5.0, 3.0, 31.0, 26.0, 3.0, 6.0, 3.0, 8.0, 6.0, 8.0, 5.0, 9.0, 6.0, 3.0, 11.0, 5.0, 6.0, 3.0, 0.0, 8.0, 8.0, 6.0, 6.0, 3.0, 0.0, 9.0, 0.0, 6.0, 5.0, 6.0, 11.0, 8.0, 0.0, 9.0, 12.0, 0.0, 8.0, 0.0, 5.0, 12.0, 0.0, 3.0, 6.0, 5.0, 5.0, 11.0, 5.0, 6.0, 3.0, 3.0, 0.0, 22.0, 3.0, 3.0, 3.0, 0.0, 11.0, 3.0, 5.0, 6.0, 14.0, 6.0, 16.0, 9.0, 26.0, 28.0, 11.0, 3.0, 8.0, 8.0, 3.0, 0.0, 0.0, 14.0, 0.0, 3.0, 3.0, 0.0, 8.0, 6.0, 6.0, 0.0, 14.0, 6.0, 14.0, 3.0, 14.0, 0.0, 8.0, 3.0, 0.0, 6.0, 8.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0, 11.0, 3.0, 0.0, 8.0, 14.0, 3.0, 8.0, 6.0, 13.0, 9.0, 0.0, 17.0, 3.0, 12.0, 11.0, 0.0, 6.0, 11.0, 0.0, 8.0, 3.0, 6.0, 16.0, 9.0, 10.0, 5.0, 6.0, 5.0, 6.0, 9.0, 8.0, 3.0, 11.0, 6.0, 0.0, 31.0, 37.0, 11.0, 9.0, 0.0, 9.0, 0.0, 8.0]}, "sampler_perf": {"mean_env_wait_ms": 7.041783650517479, "mean_processing_ms": 0.16106120541031635, "mean_inference_ms": 2.816008339885107}, "off_policy_estimator": {}, "info": {"num_steps_trained": 168000, "num_steps_sampled": 89600, "sample_time_ms": 363821.155, "load_time_ms": 54.883, "grad_time_ms": 7100.098, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0031250000465661287, "cur_lr": 0.0010000000474974513, "total_loss": -0.26905307173728943, "policy_loss": -0.0034452469553798437, "vf_loss": 2.160554885864258, "vf_explained_var": 0.012243330478668213, "kl": 0.0006163662183098495, "entropy": 1.784928321838379, "entropy_coeff": 0.14892800152301788, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 89600, "episodes_total": 224, "training_iteration": 7, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-56-15", "timestamp": 1660244175, "time_this_iter_s": 2448.401287794113, "time_total_s": 2597.0583918094635, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2597.0583918094635, "timesteps_since_restore": 89600, "iterations_since_restore": 7, "perf": {"cpu_util_percent": 53.55, "ram_util_percent": 58.647826086956535}}
+{"episode_reward_max": 68.0, "episode_reward_min": 0.0, "episode_reward_mean": 14.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 37.0}, "policy_reward_mean": {"ppo": 7.205}, "custom_metrics": {"sparse_reward_mean": 1.0, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 12.41, "shaped_reward_min": 0, "shaped_reward_max": 33, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.3, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.32, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.3, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.44, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 3.5, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.73, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.61, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.59, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.2, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.11, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.73, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.53, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.16, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.89, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.94, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.81, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 0.61, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 3, "soup_pickup_agent_1_mean": 0.74, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.2, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.19, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.4, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.41, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 1.2, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.11, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.2, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.11, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [11.0, 9.0, 17.0, 8.0, 14.0, 19.0, 6.0, 33.0, 11.0, 6.0, 11.0, 14.0, 16.0, 3.0, 17.0, 6.0, 16.0, 11.0, 3.0, 57.0, 16.0, 8.0, 17.0, 9.0, 57.0, 11.0, 0.0, 11.0, 11.0, 11.0, 22.0, 11.0, 3.0, 14.0, 3.0, 3.0, 14.0, 6.0, 20.0, 17.0, 14.0, 11.0, 6.0, 8.0, 6.0, 0.0, 11.0, 3.0, 22.0, 11.0, 19.0, 9.0, 20.0, 23.0, 6.0, 11.0, 11.0, 22.0, 19.0, 11.0, 11.0, 17.0, 14.0, 6.0, 68.0, 20.0, 9.0, 8.0, 6.0, 14.0, 19.0, 6.0, 6.0, 17.0, 14.0, 3.0, 25.0, 11.0, 14.0, 9.0, 17.0, 14.0, 17.0, 14.0, 25.0, 17.0, 6.0, 19.0, 14.0, 3.0, 6.0, 3.0, 6.0, 63.0, 9.0, 28.0, 14.0, 8.0, 57.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 8.0, 6.0, 3.0, 11.0, 6.0, 0.0, 8.0, 0.0, 14.0, 3.0, 16.0, 3.0, 3.0, 19.0, 14.0, 3.0, 8.0, 3.0, 3.0, 8.0, 3.0, 11.0, 3.0, 3.0, 13.0, 0.0, 3.0, 8.0, 9.0, 3.0, 3.0, 6.0, 10.0, 3.0, 8.0, 0.0, 3.0, 23.0, 34.0, 8.0, 8.0, 8.0, 0.0, 3.0, 14.0, 0.0, 9.0, 28.0, 29.0, 6.0, 5.0, 0.0, 0.0, 6.0, 5.0, 6.0, 5.0, 3.0, 8.0, 14.0, 8.0, 3.0, 8.0, 3.0, 0.0, 0.0, 14.0, 0.0, 3.0, 3.0, 0.0, 8.0, 6.0, 6.0, 0.0, 14.0, 6.0, 14.0, 3.0, 14.0, 0.0, 8.0, 3.0, 0.0, 6.0, 8.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0, 11.0, 3.0, 0.0, 8.0, 14.0, 3.0, 8.0, 6.0, 13.0, 9.0, 0.0, 17.0, 3.0, 12.0, 11.0, 0.0, 6.0, 11.0, 0.0, 8.0, 3.0, 6.0, 16.0, 9.0, 10.0, 5.0, 6.0, 5.0, 6.0, 9.0, 8.0, 3.0, 11.0, 6.0, 0.0, 31.0, 37.0, 11.0, 9.0, 0.0, 9.0, 0.0, 8.0, 3.0, 3.0, 6.0, 8.0, 10.0, 9.0, 0.0, 6.0, 3.0, 3.0, 9.0, 8.0, 6.0, 8.0, 0.0, 3.0, 17.0, 8.0, 8.0, 3.0, 5.0, 9.0, 6.0, 3.0, 3.0, 14.0, 11.0, 3.0, 14.0, 3.0, 6.0, 8.0, 9.0, 16.0, 6.0, 11.0, 0.0, 6.0, 14.0, 5.0, 6.0, 8.0, 3.0, 0.0, 6.0, 0.0, 0.0, 3.0, 3.0, 3.0, 29.0, 34.0, 6.0, 3.0, 6.0, 22.0, 8.0, 6.0, 5.0, 3.0, 31.0, 26.0, 3.0, 6.0]}, "sampler_perf": {"mean_env_wait_ms": 12.808520770186506, "mean_processing_ms": 0.16344551542853641, "mean_inference_ms": 4.631216998135988}, "off_policy_estimator": {}, "info": {"num_steps_trained": 192000, "num_steps_sampled": 102400, "sample_time_ms": 320761.187, "load_time_ms": 52.674, "grad_time_ms": 7271.518, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0015625000232830644, "cur_lr": 0.0010000000474974513, "total_loss": -0.25552046298980713, "policy_loss": -0.005265455227345228, "vf_loss": 1.9171754121780396, "vf_explained_var": 0.015465259552001953, "kl": 0.0006017824052833021, "entropy": 1.7836121320724487, "entropy_coeff": 0.1404159963130951, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 102400, "episodes_total": 256, "training_iteration": 8, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-56-43", "timestamp": 1660244203, "time_this_iter_s": 27.877708196640015, "time_total_s": 2624.9361000061035, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2624.9361000061035, "timesteps_since_restore": 102400, "iterations_since_restore": 8, "perf": {"cpu_util_percent": 42.6075, "ram_util_percent": 58.39000000000001}}
+{"episode_reward_max": 68.0, "episode_reward_min": 0.0, "episode_reward_mean": 16.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 37.0}, "policy_reward_mean": {"ppo": 8.09}, "custom_metrics": {"sparse_reward_mean": 1.2, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 13.78, "shaped_reward_min": 0, "shaped_reward_max": 36, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.25, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.27, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.31, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.46, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.5, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.57, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.48, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.21, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 3, "potting_onion_agent_1_mean": 1.33, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 3.05, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.8, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.19, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 1.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.92, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 0.81, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 0.8, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.22, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.17, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.54, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.49, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 1.21, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 3, "optimal_onion_potting_agent_1_mean": 1.33, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.21, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 3, "viable_onion_potting_agent_1_mean": 1.33, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [6.0, 28.0, 3.0, 14.0, 19.0, 57.0, 9.0, 11.0, 9.0, 17.0, 14.0, 14.0, 23.0, 25.0, 11.0, 28.0, 14.0, 14.0, 17.0, 12.0, 3.0, 16.0, 14.0, 22.0, 36.0, 17.0, 12.0, 22.0, 14.0, 16.0, 14.0, 17.0, 68.0, 20.0, 9.0, 8.0, 6.0, 14.0, 19.0, 6.0, 6.0, 17.0, 14.0, 3.0, 25.0, 11.0, 14.0, 9.0, 17.0, 14.0, 17.0, 14.0, 25.0, 17.0, 6.0, 19.0, 14.0, 3.0, 6.0, 3.0, 6.0, 63.0, 9.0, 28.0, 14.0, 8.0, 57.0, 9.0, 11.0, 9.0, 17.0, 8.0, 14.0, 19.0, 6.0, 33.0, 11.0, 6.0, 11.0, 14.0, 16.0, 3.0, 17.0, 6.0, 16.0, 11.0, 3.0, 57.0, 16.0, 8.0, 17.0, 9.0, 57.0, 11.0, 0.0, 11.0, 11.0, 11.0, 22.0, 11.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [0.0, 6.0, 22.0, 6.0, 3.0, 0.0, 8.0, 6.0, 10.0, 9.0, 23.0, 34.0, 3.0, 6.0, 11.0, 0.0, 6.0, 3.0, 11.0, 6.0, 11.0, 3.0, 5.0, 9.0, 17.0, 6.0, 9.0, 16.0, 8.0, 3.0, 6.0, 22.0, 3.0, 11.0, 14.0, 0.0, 6.0, 11.0, 6.0, 6.0, 0.0, 3.0, 5.0, 11.0, 8.0, 6.0, 6.0, 16.0, 11.0, 25.0, 8.0, 9.0, 3.0, 9.0, 16.0, 6.0, 8.0, 6.0, 16.0, 0.0, 11.0, 3.0, 6.0, 11.0, 31.0, 37.0, 11.0, 9.0, 0.0, 9.0, 0.0, 8.0, 3.0, 3.0, 6.0, 8.0, 10.0, 9.0, 0.0, 6.0, 3.0, 3.0, 9.0, 8.0, 6.0, 8.0, 0.0, 3.0, 17.0, 8.0, 8.0, 3.0, 5.0, 9.0, 6.0, 3.0, 3.0, 14.0, 11.0, 3.0, 14.0, 3.0, 6.0, 8.0, 9.0, 16.0, 6.0, 11.0, 0.0, 6.0, 14.0, 5.0, 6.0, 8.0, 3.0, 0.0, 6.0, 0.0, 0.0, 3.0, 3.0, 3.0, 29.0, 34.0, 6.0, 3.0, 6.0, 22.0, 8.0, 6.0, 5.0, 3.0, 31.0, 26.0, 3.0, 6.0, 3.0, 8.0, 6.0, 3.0, 11.0, 6.0, 0.0, 8.0, 0.0, 14.0, 3.0, 16.0, 3.0, 3.0, 19.0, 14.0, 3.0, 8.0, 3.0, 3.0, 8.0, 3.0, 11.0, 3.0, 3.0, 13.0, 0.0, 3.0, 8.0, 9.0, 3.0, 3.0, 6.0, 10.0, 3.0, 8.0, 0.0, 3.0, 23.0, 34.0, 8.0, 8.0, 8.0, 0.0, 3.0, 14.0, 0.0, 9.0, 28.0, 29.0, 6.0, 5.0, 0.0, 0.0, 6.0, 5.0, 6.0, 5.0, 3.0, 8.0, 14.0, 8.0, 3.0, 8.0]}, "sampler_perf": {"mean_env_wait_ms": 17.933374555696833, "mean_processing_ms": 0.16517403131021888, "mean_inference_ms": 6.240884020190002}, "off_policy_estimator": {}, "info": {"num_steps_trained": 216000, "num_steps_sampled": 115200, "sample_time_ms": 287052.402, "load_time_ms": 50.896, "grad_time_ms": 7408.088, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0007812500116415322, "cur_lr": 0.0010000000474974513, "total_loss": -0.24049125611782074, "policy_loss": -0.005544388201087713, "vf_loss": 1.8025983572006226, "vf_explained_var": 0.016161540523171425, "kl": 0.0006836934480816126, "entropy": 1.7825666666030884, "entropy_coeff": 0.1319040060043335, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 115200, "episodes_total": 288, "training_iteration": 9, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-57-09", "timestamp": 1660244229, "time_this_iter_s": 25.946558237075806, "time_total_s": 2650.8826582431793, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2650.8826582431793, "timesteps_since_restore": 115200, "iterations_since_restore": 9, "perf": {"cpu_util_percent": 38.36216216216216, "ram_util_percent": 57.93513513513512}}
+{"episode_reward_max": 57.0, "episode_reward_min": 0.0, "episode_reward_mean": 15.66, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 34.0}, "policy_reward_mean": {"ppo": 7.83}, "custom_metrics": {"sparse_reward_mean": 1.0, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 13.66, "shaped_reward_min": 0, "shaped_reward_max": 36, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.85, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 10, "onion_pickup_agent_1_mean": 5.36, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.23, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.74, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.36, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 1.1, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 3, "potting_onion_agent_1_mean": 1.43, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 4, "dish_pickup_agent_0_mean": 3.36, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.81, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.21, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 1.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.93, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 0.89, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 0.62, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.25, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.15, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.55, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.33, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.1, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 3, "optimal_onion_potting_agent_1_mean": 1.43, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 4, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.1, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 3, "viable_onion_potting_agent_1_mean": 1.43, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 4, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [14.0, 19.0, 9.0, 6.0, 9.0, 17.0, 6.0, 6.0, 20.0, 20.0, 17.0, 14.0, 11.0, 9.0, 57.0, 16.0, 17.0, 14.0, 11.0, 8.0, 11.0, 16.0, 14.0, 25.0, 22.0, 3.0, 12.0, 9.0, 9.0, 17.0, 17.0, 3.0, 14.0, 8.0, 57.0, 9.0, 11.0, 9.0, 17.0, 8.0, 14.0, 19.0, 6.0, 33.0, 11.0, 6.0, 11.0, 14.0, 16.0, 3.0, 17.0, 6.0, 16.0, 11.0, 3.0, 57.0, 16.0, 8.0, 17.0, 9.0, 57.0, 11.0, 0.0, 11.0, 11.0, 11.0, 22.0, 11.0, 6.0, 28.0, 3.0, 14.0, 19.0, 57.0, 9.0, 11.0, 9.0, 17.0, 14.0, 14.0, 23.0, 25.0, 11.0, 28.0, 14.0, 14.0, 17.0, 12.0, 3.0, 16.0, 14.0, 22.0, 36.0, 17.0, 12.0, 22.0, 14.0, 16.0, 14.0, 17.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [8.0, 6.0, 10.0, 9.0, 3.0, 6.0, 0.0, 6.0, 3.0, 6.0, 8.0, 9.0, 6.0, 0.0, 3.0, 3.0, 14.0, 6.0, 9.0, 11.0, 14.0, 3.0, 3.0, 11.0, 11.0, 0.0, 3.0, 6.0, 23.0, 34.0, 11.0, 5.0, 14.0, 3.0, 8.0, 6.0, 8.0, 3.0, 0.0, 8.0, 8.0, 3.0, 10.0, 6.0, 6.0, 8.0, 13.0, 12.0, 8.0, 14.0, 0.0, 3.0, 6.0, 6.0, 0.0, 9.0, 3.0, 6.0, 9.0, 8.0, 8.0, 9.0, 0.0, 3.0, 8.0, 6.0, 5.0, 3.0, 31.0, 26.0, 3.0, 6.0, 3.0, 8.0, 6.0, 3.0, 11.0, 6.0, 0.0, 8.0, 0.0, 14.0, 3.0, 16.0, 3.0, 3.0, 19.0, 14.0, 3.0, 8.0, 3.0, 3.0, 8.0, 3.0, 11.0, 3.0, 3.0, 13.0, 0.0, 3.0, 8.0, 9.0, 3.0, 3.0, 6.0, 10.0, 3.0, 8.0, 0.0, 3.0, 23.0, 34.0, 8.0, 8.0, 8.0, 0.0, 3.0, 14.0, 0.0, 9.0, 28.0, 29.0, 6.0, 5.0, 0.0, 0.0, 6.0, 5.0, 6.0, 5.0, 3.0, 8.0, 14.0, 8.0, 3.0, 8.0, 0.0, 6.0, 22.0, 6.0, 3.0, 0.0, 8.0, 6.0, 10.0, 9.0, 23.0, 34.0, 3.0, 6.0, 11.0, 0.0, 6.0, 3.0, 11.0, 6.0, 11.0, 3.0, 5.0, 9.0, 17.0, 6.0, 9.0, 16.0, 8.0, 3.0, 6.0, 22.0, 3.0, 11.0, 14.0, 0.0, 6.0, 11.0, 6.0, 6.0, 0.0, 3.0, 5.0, 11.0, 8.0, 6.0, 6.0, 16.0, 11.0, 25.0, 8.0, 9.0, 3.0, 9.0, 16.0, 6.0, 8.0, 6.0, 16.0, 0.0, 11.0, 3.0, 6.0, 11.0]}, "sampler_perf": {"mean_env_wait_ms": 16.781937376069852, "mean_processing_ms": 0.16502285382446033, "mean_inference_ms": 5.880940450206554}, "off_policy_estimator": {}, "info": {"num_steps_trained": 240000, "num_steps_sampled": 128000, "sample_time_ms": 260081.675, "load_time_ms": 49.456, "grad_time_ms": 7564.799, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0003906250058207661, "cur_lr": 0.0010000000474974513, "total_loss": -0.22780847549438477, "policy_loss": -0.00820181891322136, "vf_loss": 1.5030304193496704, "vf_explained_var": 0.01960124634206295, "kl": 0.0007011755951680243, "entropy": 1.7809678316116333, "entropy_coeff": 0.1233920007944107, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 128000, "episodes_total": 320, "training_iteration": 10, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-57-36", "timestamp": 1660244256, "time_this_iter_s": 26.38225793838501, "time_total_s": 2677.2649161815643, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2677.2649161815643, "timesteps_since_restore": 128000, "iterations_since_restore": 10, "perf": {"cpu_util_percent": 35.91621621621621, "ram_util_percent": 58.01891891891893}}
+{"episode_reward_max": 65.0, "episode_reward_min": 3.0, "episode_reward_mean": 16.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 37.0}, "policy_reward_mean": {"ppo": 8.47}, "custom_metrics": {"sparse_reward_mean": 1.0, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 14.94, "shaped_reward_min": 3, "shaped_reward_max": 36, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.74, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 10, "onion_pickup_agent_1_mean": 5.61, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.22, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.89, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.57, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.46, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 1.18, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.59, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 4, "dish_pickup_agent_0_mean": 3.46, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.72, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.23, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.98, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 1.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.85, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.08, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 0.69, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.29, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.19, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.65, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.4, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.18, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.59, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 4, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.18, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.59, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 4, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [17.0, 14.0, 20.0, 25.0, 20.0, 14.0, 11.0, 9.0, 3.0, 17.0, 65.0, 11.0, 9.0, 23.0, 65.0, 11.0, 8.0, 25.0, 11.0, 28.0, 19.0, 20.0, 17.0, 9.0, 11.0, 60.0, 17.0, 3.0, 27.0, 25.0, 11.0, 8.0, 11.0, 11.0, 22.0, 11.0, 6.0, 28.0, 3.0, 14.0, 19.0, 57.0, 9.0, 11.0, 9.0, 17.0, 14.0, 14.0, 23.0, 25.0, 11.0, 28.0, 14.0, 14.0, 17.0, 12.0, 3.0, 16.0, 14.0, 22.0, 36.0, 17.0, 12.0, 22.0, 14.0, 16.0, 14.0, 17.0, 14.0, 19.0, 9.0, 6.0, 9.0, 17.0, 6.0, 6.0, 20.0, 20.0, 17.0, 14.0, 11.0, 9.0, 57.0, 16.0, 17.0, 14.0, 11.0, 8.0, 11.0, 16.0, 14.0, 25.0, 22.0, 3.0, 12.0, 9.0, 9.0, 17.0, 17.0, 3.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [9.0, 8.0, 8.0, 6.0, 14.0, 6.0, 6.0, 19.0, 6.0, 14.0, 0.0, 14.0, 3.0, 8.0, 3.0, 6.0, 0.0, 3.0, 9.0, 8.0, 28.0, 37.0, 5.0, 6.0, 3.0, 6.0, 14.0, 9.0, 31.0, 34.0, 5.0, 6.0, 0.0, 8.0, 11.0, 14.0, 5.0, 6.0, 9.0, 19.0, 8.0, 11.0, 12.0, 8.0, 3.0, 14.0, 0.0, 9.0, 0.0, 11.0, 26.0, 34.0, 8.0, 9.0, 0.0, 3.0, 13.0, 14.0, 14.0, 11.0, 11.0, 0.0, 8.0, 0.0, 6.0, 5.0, 3.0, 8.0, 14.0, 8.0, 3.0, 8.0, 0.0, 6.0, 22.0, 6.0, 3.0, 0.0, 8.0, 6.0, 10.0, 9.0, 23.0, 34.0, 3.0, 6.0, 11.0, 0.0, 6.0, 3.0, 11.0, 6.0, 11.0, 3.0, 5.0, 9.0, 17.0, 6.0, 9.0, 16.0, 8.0, 3.0, 6.0, 22.0, 3.0, 11.0, 14.0, 0.0, 6.0, 11.0, 6.0, 6.0, 0.0, 3.0, 5.0, 11.0, 8.0, 6.0, 6.0, 16.0, 11.0, 25.0, 8.0, 9.0, 3.0, 9.0, 16.0, 6.0, 8.0, 6.0, 16.0, 0.0, 11.0, 3.0, 6.0, 11.0, 8.0, 6.0, 10.0, 9.0, 3.0, 6.0, 0.0, 6.0, 3.0, 6.0, 8.0, 9.0, 6.0, 0.0, 3.0, 3.0, 14.0, 6.0, 9.0, 11.0, 14.0, 3.0, 3.0, 11.0, 11.0, 0.0, 3.0, 6.0, 23.0, 34.0, 11.0, 5.0, 14.0, 3.0, 8.0, 6.0, 8.0, 3.0, 0.0, 8.0, 8.0, 3.0, 10.0, 6.0, 6.0, 8.0, 13.0, 12.0, 8.0, 14.0, 0.0, 3.0, 6.0, 6.0, 0.0, 9.0, 3.0, 6.0, 9.0, 8.0, 8.0, 9.0, 0.0, 3.0]}, "sampler_perf": {"mean_env_wait_ms": 15.10732110386456, "mean_processing_ms": 0.16432950718550896, "mean_inference_ms": 5.354497783846054}, "off_policy_estimator": {}, "info": {"num_steps_trained": 264000, "num_steps_sampled": 140800, "sample_time_ms": 260129.157, "load_time_ms": 39.286, "grad_time_ms": 7759.155, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.00019531250291038305, "cur_lr": 0.0010000000474974513, "total_loss": -0.21097473800182343, "policy_loss": -0.006903436034917831, "vf_loss": 2.839796781539917, "vf_explained_var": 0.029899099841713905, "kl": 0.0006908049690537155, "entropy": 1.7788597345352173, "entropy_coeff": 0.11488000303506851, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 140800, "episodes_total": 352, "training_iteration": 11, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-58-02", "timestamp": 1660244282, "time_this_iter_s": 26.244181156158447, "time_total_s": 2703.509097337723, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2703.509097337723, "timesteps_since_restore": 140800, "iterations_since_restore": 11, "perf": {"cpu_util_percent": 36.42631578947368, "ram_util_percent": 57.83157894736843}}
+{"episode_reward_max": 65.0, "episode_reward_min": 3.0, "episode_reward_mean": 18.34, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 40.0}, "policy_reward_mean": {"ppo": 9.17}, "custom_metrics": {"sparse_reward_mean": 1.4, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 15.54, "shaped_reward_min": 3, "shaped_reward_max": 31, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.67, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 10, "onion_pickup_agent_1_mean": 5.71, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.21, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.04, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 3.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 1.18, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.74, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.55, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 2.98, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.18, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 1.2, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.92, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.05, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 0.76, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.29, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.22, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.61, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.42, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.18, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.74, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.18, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.74, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [20.0, 11.0, 60.0, 25.0, 22.0, 65.0, 12.0, 11.0, 23.0, 3.0, 30.0, 17.0, 19.0, 31.0, 28.0, 19.0, 14.0, 25.0, 25.0, 22.0, 20.0, 11.0, 11.0, 57.0, 17.0, 6.0, 17.0, 11.0, 11.0, 11.0, 14.0, 14.0, 14.0, 16.0, 14.0, 17.0, 14.0, 19.0, 9.0, 6.0, 9.0, 17.0, 6.0, 6.0, 20.0, 20.0, 17.0, 14.0, 11.0, 9.0, 57.0, 16.0, 17.0, 14.0, 11.0, 8.0, 11.0, 16.0, 14.0, 25.0, 22.0, 3.0, 12.0, 9.0, 9.0, 17.0, 17.0, 3.0, 17.0, 14.0, 20.0, 25.0, 20.0, 14.0, 11.0, 9.0, 3.0, 17.0, 65.0, 11.0, 9.0, 23.0, 65.0, 11.0, 8.0, 25.0, 11.0, 28.0, 19.0, 20.0, 17.0, 9.0, 11.0, 60.0, 17.0, 3.0, 27.0, 25.0, 11.0, 8.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [11.0, 9.0, 0.0, 11.0, 20.0, 40.0, 12.0, 13.0, 11.0, 11.0, 34.0, 31.0, 3.0, 9.0, 5.0, 6.0, 0.0, 23.0, 3.0, 0.0, 16.0, 14.0, 6.0, 11.0, 13.0, 6.0, 17.0, 14.0, 14.0, 14.0, 6.0, 13.0, 5.0, 9.0, 16.0, 9.0, 16.0, 9.0, 5.0, 17.0, 8.0, 12.0, 3.0, 8.0, 0.0, 11.0, 26.0, 31.0, 0.0, 17.0, 6.0, 0.0, 3.0, 14.0, 8.0, 3.0, 6.0, 5.0, 8.0, 3.0, 3.0, 11.0, 11.0, 3.0, 8.0, 6.0, 16.0, 0.0, 11.0, 3.0, 6.0, 11.0, 8.0, 6.0, 10.0, 9.0, 3.0, 6.0, 0.0, 6.0, 3.0, 6.0, 8.0, 9.0, 6.0, 0.0, 3.0, 3.0, 14.0, 6.0, 9.0, 11.0, 14.0, 3.0, 3.0, 11.0, 11.0, 0.0, 3.0, 6.0, 23.0, 34.0, 11.0, 5.0, 14.0, 3.0, 8.0, 6.0, 8.0, 3.0, 0.0, 8.0, 8.0, 3.0, 10.0, 6.0, 6.0, 8.0, 13.0, 12.0, 8.0, 14.0, 0.0, 3.0, 6.0, 6.0, 0.0, 9.0, 3.0, 6.0, 9.0, 8.0, 8.0, 9.0, 0.0, 3.0, 9.0, 8.0, 8.0, 6.0, 14.0, 6.0, 6.0, 19.0, 6.0, 14.0, 0.0, 14.0, 3.0, 8.0, 3.0, 6.0, 0.0, 3.0, 9.0, 8.0, 28.0, 37.0, 5.0, 6.0, 3.0, 6.0, 14.0, 9.0, 31.0, 34.0, 5.0, 6.0, 0.0, 8.0, 11.0, 14.0, 5.0, 6.0, 9.0, 19.0, 8.0, 11.0, 12.0, 8.0, 3.0, 14.0, 0.0, 9.0, 0.0, 11.0, 26.0, 34.0, 8.0, 9.0, 0.0, 3.0, 13.0, 14.0, 14.0, 11.0, 11.0, 0.0, 8.0, 0.0]}, "sampler_perf": {"mean_env_wait_ms": 13.746840147909538, "mean_processing_ms": 0.16393515170904638, "mean_inference_ms": 4.926997257535761}, "off_policy_estimator": {}, "info": {"num_steps_trained": 288000, "num_steps_sampled": 153600, "sample_time_ms": 260207.009, "load_time_ms": 38.842, "grad_time_ms": 8068.402, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 9.765625145519152e-05, "cur_lr": 0.0010000000474974513, "total_loss": -0.19599168002605438, "policy_loss": -0.007250078488141298, "vf_loss": 2.85541033744812, "vf_explained_var": 0.045025069266557693, "kl": 0.0006896388367749751, "entropy": 1.7771064043045044, "entropy_coeff": 0.10636799782514572, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 153600, "episodes_total": 384, "training_iteration": 12, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-58-29", "timestamp": 1660244309, "time_this_iter_s": 27.08998394012451, "time_total_s": 2730.5990812778473, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2730.5990812778473, "timesteps_since_restore": 153600, "iterations_since_restore": 12, "perf": {"cpu_util_percent": 37.42368421052632, "ram_util_percent": 58.20789473684212}}
+{"episode_reward_max": 76.0, "episode_reward_min": 3.0, "episode_reward_mean": 21.2, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 45.0}, "policy_reward_mean": {"ppo": 10.6}, "custom_metrics": {"sparse_reward_mean": 1.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 17.6, "shaped_reward_min": 3, "shaped_reward_max": 37, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.63, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 10, "onion_pickup_agent_1_mean": 5.81, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.24, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.08, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.93, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 3.44, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.43, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.33, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.98, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.66, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.21, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.21, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 1.24, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.92, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.19, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 0.96, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 0.27, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.31, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.74, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 11, "soup_drop_agent_1_mean": 0.54, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 1.33, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.98, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.33, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.98, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [76.0, 30.0, 25.0, 9.0, 28.0, 17.0, 68.0, 6.0, 3.0, 22.0, 17.0, 37.0, 14.0, 34.0, 12.0, 17.0, 27.0, 11.0, 25.0, 11.0, 17.0, 17.0, 25.0, 11.0, 12.0, 3.0, 19.0, 22.0, 60.0, 25.0, 25.0, 34.0, 9.0, 17.0, 17.0, 3.0, 17.0, 14.0, 20.0, 25.0, 20.0, 14.0, 11.0, 9.0, 3.0, 17.0, 65.0, 11.0, 9.0, 23.0, 65.0, 11.0, 8.0, 25.0, 11.0, 28.0, 19.0, 20.0, 17.0, 9.0, 11.0, 60.0, 17.0, 3.0, 27.0, 25.0, 11.0, 8.0, 20.0, 11.0, 60.0, 25.0, 22.0, 65.0, 12.0, 11.0, 23.0, 3.0, 30.0, 17.0, 19.0, 31.0, 28.0, 19.0, 14.0, 25.0, 25.0, 22.0, 20.0, 11.0, 11.0, 57.0, 17.0, 6.0, 17.0, 11.0, 11.0, 11.0, 14.0, 14.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [31.0, 45.0, 19.0, 11.0, 5.0, 20.0, 6.0, 3.0, 6.0, 22.0, 6.0, 11.0, 28.0, 40.0, 6.0, 0.0, 3.0, 0.0, 11.0, 11.0, 8.0, 9.0, 20.0, 17.0, 5.0, 9.0, 20.0, 14.0, 9.0, 3.0, 5.0, 12.0, 14.0, 13.0, 3.0, 8.0, 13.0, 12.0, 11.0, 0.0, 5.0, 12.0, 9.0, 8.0, 17.0, 8.0, 0.0, 11.0, 9.0, 3.0, 3.0, 0.0, 6.0, 13.0, 8.0, 14.0, 28.0, 32.0, 19.0, 6.0, 8.0, 17.0, 19.0, 15.0, 3.0, 6.0, 9.0, 8.0, 8.0, 9.0, 0.0, 3.0, 9.0, 8.0, 8.0, 6.0, 14.0, 6.0, 6.0, 19.0, 6.0, 14.0, 0.0, 14.0, 3.0, 8.0, 3.0, 6.0, 0.0, 3.0, 9.0, 8.0, 28.0, 37.0, 5.0, 6.0, 3.0, 6.0, 14.0, 9.0, 31.0, 34.0, 5.0, 6.0, 0.0, 8.0, 11.0, 14.0, 5.0, 6.0, 9.0, 19.0, 8.0, 11.0, 12.0, 8.0, 3.0, 14.0, 0.0, 9.0, 0.0, 11.0, 26.0, 34.0, 8.0, 9.0, 0.0, 3.0, 13.0, 14.0, 14.0, 11.0, 11.0, 0.0, 8.0, 0.0, 11.0, 9.0, 0.0, 11.0, 20.0, 40.0, 12.0, 13.0, 11.0, 11.0, 34.0, 31.0, 3.0, 9.0, 5.0, 6.0, 0.0, 23.0, 3.0, 0.0, 16.0, 14.0, 6.0, 11.0, 13.0, 6.0, 17.0, 14.0, 14.0, 14.0, 6.0, 13.0, 5.0, 9.0, 16.0, 9.0, 16.0, 9.0, 5.0, 17.0, 8.0, 12.0, 3.0, 8.0, 0.0, 11.0, 26.0, 31.0, 0.0, 17.0, 6.0, 0.0, 3.0, 14.0, 8.0, 3.0, 6.0, 5.0, 8.0, 3.0, 3.0, 11.0, 11.0, 3.0]}, "sampler_perf": {"mean_env_wait_ms": 12.621089528193751, "mean_processing_ms": 0.1639912702671728, "mean_inference_ms": 4.5967771099672925}, "off_policy_estimator": {}, "info": {"num_steps_trained": 312000, "num_steps_sampled": 166400, "sample_time_ms": 261554.79, "load_time_ms": 38.918, "grad_time_ms": 8201.071, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 4.882812572759576e-05, "cur_lr": 0.0010000000474974513, "total_loss": -0.18183590471744537, "policy_loss": -0.00839205738157034, "vf_loss": 3.3925907611846924, "vf_explained_var": 0.04012133553624153, "kl": 0.0007842599879950285, "entropy": 1.775907039642334, "entropy_coeff": 0.09785600006580353, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 166400, "episodes_total": 416, "training_iteration": 13, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-59-09", "timestamp": 1660244349, "time_this_iter_s": 39.64977407455444, "time_total_s": 2770.2488553524017, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2770.2488553524017, "timesteps_since_restore": 166400, "iterations_since_restore": 13, "perf": {"cpu_util_percent": 43.457142857142856, "ram_util_percent": 59.38095238095238}}
+{"episode_reward_max": 76.0, "episode_reward_min": 3.0, "episode_reward_mean": 22.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 45.0}, "policy_reward_mean": {"ppo": 11.325}, "custom_metrics": {"sparse_reward_mean": 1.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 19.05, "shaped_reward_min": 3, "shaped_reward_max": 44, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.74, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 10, "onion_pickup_agent_1_mean": 5.54, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.3, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.91, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 3.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.51, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 1.98, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.54, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.23, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.27, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.29, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 0.88, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.17, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 1.08, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 0.27, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.34, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.71, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 11, "soup_drop_agent_1_mean": 0.6, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 1.51, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 1.98, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.51, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 1.98, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [19.0, 28.0, 14.0, 17.0, 17.0, 19.0, 17.0, 22.0, 63.0, 66.0, 22.0, 14.0, 9.0, 22.0, 14.0, 22.0, 20.0, 71.0, 25.0, 23.0, 3.0, 22.0, 22.0, 16.0, 20.0, 9.0, 24.0, 44.0, 12.0, 17.0, 20.0, 20.0, 27.0, 25.0, 11.0, 8.0, 20.0, 11.0, 60.0, 25.0, 22.0, 65.0, 12.0, 11.0, 23.0, 3.0, 30.0, 17.0, 19.0, 31.0, 28.0, 19.0, 14.0, 25.0, 25.0, 22.0, 20.0, 11.0, 11.0, 57.0, 17.0, 6.0, 17.0, 11.0, 11.0, 11.0, 14.0, 14.0, 76.0, 30.0, 25.0, 9.0, 28.0, 17.0, 68.0, 6.0, 3.0, 22.0, 17.0, 37.0, 14.0, 34.0, 12.0, 17.0, 27.0, 11.0, 25.0, 11.0, 17.0, 17.0, 25.0, 11.0, 12.0, 3.0, 19.0, 22.0, 60.0, 25.0, 25.0, 34.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [8.0, 11.0, 17.0, 11.0, 3.0, 11.0, 5.0, 12.0, 3.0, 14.0, 3.0, 16.0, 11.0, 6.0, 14.0, 8.0, 31.0, 32.0, 29.0, 37.0, 11.0, 11.0, 5.0, 9.0, 9.0, 0.0, 11.0, 11.0, 8.0, 6.0, 8.0, 14.0, 11.0, 9.0, 41.0, 30.0, 11.0, 14.0, 9.0, 14.0, 0.0, 3.0, 14.0, 8.0, 14.0, 8.0, 8.0, 8.0, 14.0, 6.0, 3.0, 6.0, 3.0, 21.0, 14.0, 30.0, 3.0, 9.0, 11.0, 6.0, 12.0, 8.0, 9.0, 11.0, 13.0, 14.0, 14.0, 11.0, 11.0, 0.0, 8.0, 0.0, 11.0, 9.0, 0.0, 11.0, 20.0, 40.0, 12.0, 13.0, 11.0, 11.0, 34.0, 31.0, 3.0, 9.0, 5.0, 6.0, 0.0, 23.0, 3.0, 0.0, 16.0, 14.0, 6.0, 11.0, 13.0, 6.0, 17.0, 14.0, 14.0, 14.0, 6.0, 13.0, 5.0, 9.0, 16.0, 9.0, 16.0, 9.0, 5.0, 17.0, 8.0, 12.0, 3.0, 8.0, 0.0, 11.0, 26.0, 31.0, 0.0, 17.0, 6.0, 0.0, 3.0, 14.0, 8.0, 3.0, 6.0, 5.0, 8.0, 3.0, 3.0, 11.0, 11.0, 3.0, 31.0, 45.0, 19.0, 11.0, 5.0, 20.0, 6.0, 3.0, 6.0, 22.0, 6.0, 11.0, 28.0, 40.0, 6.0, 0.0, 3.0, 0.0, 11.0, 11.0, 8.0, 9.0, 20.0, 17.0, 5.0, 9.0, 20.0, 14.0, 9.0, 3.0, 5.0, 12.0, 14.0, 13.0, 3.0, 8.0, 13.0, 12.0, 11.0, 0.0, 5.0, 12.0, 9.0, 8.0, 17.0, 8.0, 0.0, 11.0, 9.0, 3.0, 3.0, 0.0, 6.0, 13.0, 8.0, 14.0, 28.0, 32.0, 19.0, 6.0, 8.0, 17.0, 19.0, 15.0]}, "sampler_perf": {"mean_env_wait_ms": 11.674808393625103, "mean_processing_ms": 0.16471740447247565, "mean_inference_ms": 4.325696825802514}, "off_policy_estimator": {}, "info": {"num_steps_trained": 336000, "num_steps_sampled": 179200, "sample_time_ms": 262067.662, "load_time_ms": 39.631, "grad_time_ms": 8508.383, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.441406286379788e-05, "cur_lr": 0.0010000000474974513, "total_loss": -0.169602632522583, "policy_loss": -0.011421400122344494, "vf_loss": 3.2296648025512695, "vf_explained_var": 0.07911600917577744, "kl": 0.0008258241578005254, "entropy": 1.7740892171859741, "entropy_coeff": 0.08934400230646133, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 179200, "episodes_total": 448, "training_iteration": 14, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-59-41", "timestamp": 1660244381, "time_this_iter_s": 32.64548587799072, "time_total_s": 2802.8943412303925, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2802.8943412303925, "timesteps_since_restore": 179200, "iterations_since_restore": 14, "perf": {"cpu_util_percent": 44.92765957446808, "ram_util_percent": 58.32340425531916}}
+{"episode_reward_max": 84.0, "episode_reward_min": 3.0, "episode_reward_mean": 23.09, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 51.0}, "policy_reward_mean": {"ppo": 11.545}, "custom_metrics": {"sparse_reward_mean": 1.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 19.49, "shaped_reward_min": 3, "shaped_reward_max": 44, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.62, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.9, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 2.94, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.0, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 3.41, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.61, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.53, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.07, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.42, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 3.01, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.35, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.91, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 0.75, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.13, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 1.03, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 0.28, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.31, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.69, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 11, "soup_drop_agent_1_mean": 0.59, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 1.53, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.07, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.53, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.07, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [17.0, 84.0, 14.0, 9.0, 9.0, 34.0, 22.0, 9.0, 23.0, 20.0, 9.0, 36.0, 14.0, 22.0, 11.0, 20.0, 25.0, 17.0, 6.0, 9.0, 30.0, 60.0, 9.0, 25.0, 22.0, 14.0, 23.0, 28.0, 19.0, 6.0, 76.0, 25.0, 11.0, 11.0, 14.0, 14.0, 76.0, 30.0, 25.0, 9.0, 28.0, 17.0, 68.0, 6.0, 3.0, 22.0, 17.0, 37.0, 14.0, 34.0, 12.0, 17.0, 27.0, 11.0, 25.0, 11.0, 17.0, 17.0, 25.0, 11.0, 12.0, 3.0, 19.0, 22.0, 60.0, 25.0, 25.0, 34.0, 19.0, 28.0, 14.0, 17.0, 17.0, 19.0, 17.0, 22.0, 63.0, 66.0, 22.0, 14.0, 9.0, 22.0, 14.0, 22.0, 20.0, 71.0, 25.0, 23.0, 3.0, 22.0, 22.0, 16.0, 20.0, 9.0, 24.0, 44.0, 12.0, 17.0, 20.0, 20.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [14.0, 3.0, 33.0, 51.0, 3.0, 11.0, 3.0, 6.0, 0.0, 9.0, 14.0, 20.0, 9.0, 13.0, 0.0, 9.0, 17.0, 6.0, 14.0, 6.0, 3.0, 6.0, 27.0, 9.0, 8.0, 6.0, 3.0, 19.0, 3.0, 8.0, 8.0, 12.0, 11.0, 14.0, 8.0, 9.0, 3.0, 3.0, 6.0, 3.0, 13.0, 17.0, 26.0, 34.0, 6.0, 3.0, 14.0, 11.0, 11.0, 11.0, 3.0, 11.0, 9.0, 14.0, 13.0, 15.0, 8.0, 11.0, 3.0, 3.0, 38.0, 38.0, 17.0, 8.0, 6.0, 5.0, 8.0, 3.0, 3.0, 11.0, 11.0, 3.0, 31.0, 45.0, 19.0, 11.0, 5.0, 20.0, 6.0, 3.0, 6.0, 22.0, 6.0, 11.0, 28.0, 40.0, 6.0, 0.0, 3.0, 0.0, 11.0, 11.0, 8.0, 9.0, 20.0, 17.0, 5.0, 9.0, 20.0, 14.0, 9.0, 3.0, 5.0, 12.0, 14.0, 13.0, 3.0, 8.0, 13.0, 12.0, 11.0, 0.0, 5.0, 12.0, 9.0, 8.0, 17.0, 8.0, 0.0, 11.0, 9.0, 3.0, 3.0, 0.0, 6.0, 13.0, 8.0, 14.0, 28.0, 32.0, 19.0, 6.0, 8.0, 17.0, 19.0, 15.0, 8.0, 11.0, 17.0, 11.0, 3.0, 11.0, 5.0, 12.0, 3.0, 14.0, 3.0, 16.0, 11.0, 6.0, 14.0, 8.0, 31.0, 32.0, 29.0, 37.0, 11.0, 11.0, 5.0, 9.0, 9.0, 0.0, 11.0, 11.0, 8.0, 6.0, 8.0, 14.0, 11.0, 9.0, 41.0, 30.0, 11.0, 14.0, 9.0, 14.0, 0.0, 3.0, 14.0, 8.0, 14.0, 8.0, 8.0, 8.0, 14.0, 6.0, 3.0, 6.0, 3.0, 21.0, 14.0, 30.0, 3.0, 9.0, 11.0, 6.0, 12.0, 8.0, 9.0, 11.0]}, "sampler_perf": {"mean_env_wait_ms": 10.867209759257396, "mean_processing_ms": 0.16544513042105083, "mean_inference_ms": 4.098041869605518}, "off_policy_estimator": {}, "info": {"num_steps_trained": 360000, "num_steps_sampled": 192000, "sample_time_ms": 262263.341, "load_time_ms": 39.79, "grad_time_ms": 8700.33, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.220703143189894e-05, "cur_lr": 0.0010000000474974513, "total_loss": -0.14661313593387604, "policy_loss": -0.0037220455706119537, "vf_loss": 3.146031618118286, "vf_explained_var": 0.09564539045095444, "kl": 0.0008609917131252587, "entropy": 1.7716461420059204, "entropy_coeff": 0.08083199709653854, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 192000, "episodes_total": 480, "training_iteration": 15, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-00-10", "timestamp": 1660244410, "time_this_iter_s": 28.69369125366211, "time_total_s": 2831.5880324840546, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2831.5880324840546, "timesteps_since_restore": 192000, "iterations_since_restore": 15, "perf": {"cpu_util_percent": 32.489999999999995, "ram_util_percent": 57.802499999999995}}
+{"episode_reward_max": 84.0, "episode_reward_min": 3.0, "episode_reward_mean": 25.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 51.0}, "policy_reward_mean": {"ppo": 12.94}, "custom_metrics": {"sparse_reward_mean": 2.6, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 20.68, "shaped_reward_min": 3, "shaped_reward_max": 44, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.11, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 6.01, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.28, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.06, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.99, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 3.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.49, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.61, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.25, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.28, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.36, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.34, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.95, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 0.65, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 0.98, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 1.13, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.29, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.37, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.58, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.62, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.61, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.25, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.61, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.25, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [17.0, 26.0, 42.0, 36.0, 11.0, 17.0, 9.0, 12.0, 20.0, 30.0, 22.0, 60.0, 23.0, 66.0, 36.0, 28.0, 79.0, 22.0, 31.0, 68.0, 28.0, 20.0, 54.0, 9.0, 14.0, 17.0, 66.0, 20.0, 14.0, 11.0, 17.0, 19.0, 60.0, 25.0, 25.0, 34.0, 19.0, 28.0, 14.0, 17.0, 17.0, 19.0, 17.0, 22.0, 63.0, 66.0, 22.0, 14.0, 9.0, 22.0, 14.0, 22.0, 20.0, 71.0, 25.0, 23.0, 3.0, 22.0, 22.0, 16.0, 20.0, 9.0, 24.0, 44.0, 12.0, 17.0, 20.0, 20.0, 17.0, 84.0, 14.0, 9.0, 9.0, 34.0, 22.0, 9.0, 23.0, 20.0, 9.0, 36.0, 14.0, 22.0, 11.0, 20.0, 25.0, 17.0, 6.0, 9.0, 30.0, 60.0, 9.0, 25.0, 22.0, 14.0, 23.0, 28.0, 19.0, 6.0, 76.0, 25.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [12.0, 5.0, 20.0, 6.0, 14.0, 28.0, 19.0, 17.0, 3.0, 8.0, 3.0, 14.0, 3.0, 6.0, 6.0, 6.0, 3.0, 17.0, 17.0, 13.0, 9.0, 13.0, 26.0, 34.0, 14.0, 9.0, 37.0, 29.0, 8.0, 28.0, 12.0, 16.0, 29.0, 50.0, 3.0, 19.0, 8.0, 23.0, 33.0, 35.0, 5.0, 23.0, 14.0, 6.0, 31.0, 23.0, 9.0, 0.0, 3.0, 11.0, 3.0, 14.0, 26.0, 40.0, 3.0, 17.0, 6.0, 8.0, 5.0, 6.0, 9.0, 8.0, 8.0, 11.0, 28.0, 32.0, 19.0, 6.0, 8.0, 17.0, 19.0, 15.0, 8.0, 11.0, 17.0, 11.0, 3.0, 11.0, 5.0, 12.0, 3.0, 14.0, 3.0, 16.0, 11.0, 6.0, 14.0, 8.0, 31.0, 32.0, 29.0, 37.0, 11.0, 11.0, 5.0, 9.0, 9.0, 0.0, 11.0, 11.0, 8.0, 6.0, 8.0, 14.0, 11.0, 9.0, 41.0, 30.0, 11.0, 14.0, 9.0, 14.0, 0.0, 3.0, 14.0, 8.0, 14.0, 8.0, 8.0, 8.0, 14.0, 6.0, 3.0, 6.0, 3.0, 21.0, 14.0, 30.0, 3.0, 9.0, 11.0, 6.0, 12.0, 8.0, 9.0, 11.0, 14.0, 3.0, 33.0, 51.0, 3.0, 11.0, 3.0, 6.0, 0.0, 9.0, 14.0, 20.0, 9.0, 13.0, 0.0, 9.0, 17.0, 6.0, 14.0, 6.0, 3.0, 6.0, 27.0, 9.0, 8.0, 6.0, 3.0, 19.0, 3.0, 8.0, 8.0, 12.0, 11.0, 14.0, 8.0, 9.0, 3.0, 3.0, 6.0, 3.0, 13.0, 17.0, 26.0, 34.0, 6.0, 3.0, 14.0, 11.0, 11.0, 11.0, 3.0, 11.0, 9.0, 14.0, 13.0, 15.0, 8.0, 11.0, 3.0, 3.0, 38.0, 38.0, 17.0, 8.0]}, "sampler_perf": {"mean_env_wait_ms": 10.168319641902713, "mean_processing_ms": 0.16598079643560795, "mean_inference_ms": 3.884933201233045}, "off_policy_estimator": {}, "info": {"num_steps_trained": 384000, "num_steps_sampled": 204800, "sample_time_ms": 262331.542, "load_time_ms": 40.118, "grad_time_ms": 8762.055, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 6.10351571594947e-06, "cur_lr": 0.0010000000474974513, "total_loss": -0.13645179569721222, "policy_loss": -0.00917948316782713, "vf_loss": 5.045528888702393, "vf_explained_var": 0.08776132017374039, "kl": 0.0009270868613384664, "entropy": 1.7668260335922241, "entropy_coeff": 0.07231999933719635, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 204800, "episodes_total": 512, "training_iteration": 16, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-00-39", "timestamp": 1660244439, "time_this_iter_s": 28.684066772460938, "time_total_s": 2860.2720992565155, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2860.2720992565155, "timesteps_since_restore": 204800, "iterations_since_restore": 16, "perf": {"cpu_util_percent": 36.62439024390244, "ram_util_percent": 57.81951219512195}}
+{"episode_reward_max": 84.0, "episode_reward_min": 6.0, "episode_reward_mean": 27.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 51.0}, "policy_reward_mean": {"ppo": 13.93}, "custom_metrics": {"sparse_reward_mean": 3.4, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 21.06, "shaped_reward_min": 6, "shaped_reward_max": 44, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.37, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 6.13, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.43, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.09, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 3.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.62, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.64, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.71, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 5, "potting_onion_agent_1_mean": 2.35, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.08, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 2.84, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.38, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.33, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.78, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.57, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 0.98, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 1.08, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.35, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.36, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.57, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.61, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.71, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 5, "optimal_onion_potting_agent_1_mean": 2.35, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.71, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 5, "viable_onion_potting_agent_1_mean": 2.35, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [17.0, 28.0, 34.0, 20.0, 57.0, 14.0, 71.0, 12.0, 12.0, 66.0, 28.0, 63.0, 17.0, 28.0, 23.0, 9.0, 20.0, 22.0, 71.0, 26.0, 27.0, 20.0, 20.0, 9.0, 19.0, 20.0, 76.0, 19.0, 68.0, 68.0, 17.0, 25.0, 12.0, 17.0, 20.0, 20.0, 17.0, 84.0, 14.0, 9.0, 9.0, 34.0, 22.0, 9.0, 23.0, 20.0, 9.0, 36.0, 14.0, 22.0, 11.0, 20.0, 25.0, 17.0, 6.0, 9.0, 30.0, 60.0, 9.0, 25.0, 22.0, 14.0, 23.0, 28.0, 19.0, 6.0, 76.0, 25.0, 17.0, 26.0, 42.0, 36.0, 11.0, 17.0, 9.0, 12.0, 20.0, 30.0, 22.0, 60.0, 23.0, 66.0, 36.0, 28.0, 79.0, 22.0, 31.0, 68.0, 28.0, 20.0, 54.0, 9.0, 14.0, 17.0, 66.0, 20.0, 14.0, 11.0, 17.0, 19.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 11.0, 19.0, 9.0, 17.0, 17.0, 3.0, 17.0, 26.0, 31.0, 8.0, 6.0, 38.0, 33.0, 6.0, 6.0, 3.0, 9.0, 34.0, 32.0, 17.0, 11.0, 31.0, 32.0, 6.0, 11.0, 16.0, 12.0, 12.0, 11.0, 3.0, 6.0, 3.0, 17.0, 10.0, 12.0, 36.0, 35.0, 11.0, 15.0, 13.0, 14.0, 3.0, 17.0, 14.0, 6.0, 6.0, 3.0, 6.0, 13.0, 12.0, 8.0, 40.0, 36.0, 8.0, 11.0, 40.0, 28.0, 34.0, 34.0, 3.0, 14.0, 9.0, 16.0, 3.0, 9.0, 11.0, 6.0, 12.0, 8.0, 9.0, 11.0, 14.0, 3.0, 33.0, 51.0, 3.0, 11.0, 3.0, 6.0, 0.0, 9.0, 14.0, 20.0, 9.0, 13.0, 0.0, 9.0, 17.0, 6.0, 14.0, 6.0, 3.0, 6.0, 27.0, 9.0, 8.0, 6.0, 3.0, 19.0, 3.0, 8.0, 8.0, 12.0, 11.0, 14.0, 8.0, 9.0, 3.0, 3.0, 6.0, 3.0, 13.0, 17.0, 26.0, 34.0, 6.0, 3.0, 14.0, 11.0, 11.0, 11.0, 3.0, 11.0, 9.0, 14.0, 13.0, 15.0, 8.0, 11.0, 3.0, 3.0, 38.0, 38.0, 17.0, 8.0, 12.0, 5.0, 20.0, 6.0, 14.0, 28.0, 19.0, 17.0, 3.0, 8.0, 3.0, 14.0, 3.0, 6.0, 6.0, 6.0, 3.0, 17.0, 17.0, 13.0, 9.0, 13.0, 26.0, 34.0, 14.0, 9.0, 37.0, 29.0, 8.0, 28.0, 12.0, 16.0, 29.0, 50.0, 3.0, 19.0, 8.0, 23.0, 33.0, 35.0, 5.0, 23.0, 14.0, 6.0, 31.0, 23.0, 9.0, 0.0, 3.0, 11.0, 3.0, 14.0, 26.0, 40.0, 3.0, 17.0, 6.0, 8.0, 5.0, 6.0, 9.0, 8.0, 8.0, 11.0]}, "sampler_perf": {"mean_env_wait_ms": 9.556971747293405, "mean_processing_ms": 0.16611483871912305, "mean_inference_ms": 3.69458132267773}, "off_policy_estimator": {}, "info": {"num_steps_trained": 408000, "num_steps_sampled": 217600, "sample_time_ms": 20254.523, "load_time_ms": 38.861, "grad_time_ms": 8931.015, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 3.051757857974735e-06, "cur_lr": 0.0010000000474974513, "total_loss": -0.12081533670425415, "policy_loss": -0.008616355247795582, "vf_loss": 5.614309310913086, "vf_explained_var": 0.13559557497501373, "kl": 0.0008749772678129375, "entropy": 1.76718270778656, "entropy_coeff": 0.06380800157785416, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 217600, "episodes_total": 544, "training_iteration": 17, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-01-08", "timestamp": 1660244468, "time_this_iter_s": 29.310136079788208, "time_total_s": 2889.5822353363037, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2889.5822353363037, "timesteps_since_restore": 217600, "iterations_since_restore": 17, "perf": {"cpu_util_percent": 39.28333333333334, "ram_util_percent": 57.69761904761903}}
+{"episode_reward_max": 82.0, "episode_reward_min": 6.0, "episode_reward_mean": 31.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 50.0}, "policy_reward_mean": {"ppo": 15.58}, "custom_metrics": {"sparse_reward_mean": 4.0, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 23.16, "shaped_reward_min": 6, "shaped_reward_max": 47, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.73, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.83, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.76, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.82, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 3.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.52, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.57, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 2.08, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 5, "potting_onion_agent_1_mean": 2.25, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.02, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.94, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.45, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.79, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.77, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.54, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.08, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.32, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.47, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.49, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.57, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.73, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 2.08, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 5, "optimal_onion_potting_agent_1_mean": 2.25, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.08, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 5, "viable_onion_potting_agent_1_mean": 2.25, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [76.0, 44.0, 28.0, 12.0, 36.0, 33.0, 9.0, 24.0, 28.0, 25.0, 31.0, 68.0, 17.0, 47.0, 23.0, 12.0, 17.0, 22.0, 28.0, 23.0, 20.0, 37.0, 14.0, 8.0, 28.0, 22.0, 39.0, 82.0, 71.0, 57.0, 30.0, 9.0, 19.0, 6.0, 76.0, 25.0, 17.0, 26.0, 42.0, 36.0, 11.0, 17.0, 9.0, 12.0, 20.0, 30.0, 22.0, 60.0, 23.0, 66.0, 36.0, 28.0, 79.0, 22.0, 31.0, 68.0, 28.0, 20.0, 54.0, 9.0, 14.0, 17.0, 66.0, 20.0, 14.0, 11.0, 17.0, 19.0, 17.0, 28.0, 34.0, 20.0, 57.0, 14.0, 71.0, 12.0, 12.0, 66.0, 28.0, 63.0, 17.0, 28.0, 23.0, 9.0, 20.0, 22.0, 71.0, 26.0, 27.0, 20.0, 20.0, 9.0, 19.0, 20.0, 76.0, 19.0, 68.0, 68.0, 17.0, 25.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [34.0, 42.0, 18.0, 26.0, 11.0, 17.0, 9.0, 3.0, 17.0, 19.0, 19.0, 14.0, 3.0, 6.0, 16.0, 8.0, 16.0, 12.0, 12.0, 13.0, 20.0, 11.0, 37.0, 31.0, 9.0, 8.0, 28.0, 19.0, 9.0, 14.0, 3.0, 9.0, 11.0, 6.0, 8.0, 14.0, 11.0, 17.0, 9.0, 14.0, 9.0, 11.0, 20.0, 17.0, 9.0, 5.0, 5.0, 3.0, 16.0, 12.0, 12.0, 10.0, 12.0, 27.0, 38.0, 44.0, 35.0, 36.0, 29.0, 28.0, 11.0, 19.0, 0.0, 9.0, 8.0, 11.0, 3.0, 3.0, 38.0, 38.0, 17.0, 8.0, 12.0, 5.0, 20.0, 6.0, 14.0, 28.0, 19.0, 17.0, 3.0, 8.0, 3.0, 14.0, 3.0, 6.0, 6.0, 6.0, 3.0, 17.0, 17.0, 13.0, 9.0, 13.0, 26.0, 34.0, 14.0, 9.0, 37.0, 29.0, 8.0, 28.0, 12.0, 16.0, 29.0, 50.0, 3.0, 19.0, 8.0, 23.0, 33.0, 35.0, 5.0, 23.0, 14.0, 6.0, 31.0, 23.0, 9.0, 0.0, 3.0, 11.0, 3.0, 14.0, 26.0, 40.0, 3.0, 17.0, 6.0, 8.0, 5.0, 6.0, 9.0, 8.0, 8.0, 11.0, 6.0, 11.0, 19.0, 9.0, 17.0, 17.0, 3.0, 17.0, 26.0, 31.0, 8.0, 6.0, 38.0, 33.0, 6.0, 6.0, 3.0, 9.0, 34.0, 32.0, 17.0, 11.0, 31.0, 32.0, 6.0, 11.0, 16.0, 12.0, 12.0, 11.0, 3.0, 6.0, 3.0, 17.0, 10.0, 12.0, 36.0, 35.0, 11.0, 15.0, 13.0, 14.0, 3.0, 17.0, 14.0, 6.0, 6.0, 3.0, 6.0, 13.0, 12.0, 8.0, 40.0, 36.0, 8.0, 11.0, 40.0, 28.0, 34.0, 34.0, 3.0, 14.0, 9.0, 16.0]}, "sampler_perf": {"mean_env_wait_ms": 9.01879810548929, "mean_processing_ms": 0.16630796767792247, "mean_inference_ms": 3.5295458802458652}, "off_policy_estimator": {}, "info": {"num_steps_trained": 432000, "num_steps_sampled": 230400, "sample_time_ms": 20513.179, "load_time_ms": 38.872, "grad_time_ms": 8952.524, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.5258789289873675e-06, "cur_lr": 0.0010000000474974513, "total_loss": -0.10658890753984451, "policy_loss": -0.009598230011761189, "vf_loss": 4.846475601196289, "vf_explained_var": 0.11691506952047348, "kl": 0.0009377954411320388, "entropy": 1.762791633605957, "entropy_coeff": 0.055296000093221664, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 230400, "episodes_total": 576, "training_iteration": 18, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-01-39", "timestamp": 1660244499, "time_this_iter_s": 30.67889380455017, "time_total_s": 2920.261129140854, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2920.261129140854, "timesteps_since_restore": 230400, "iterations_since_restore": 18, "perf": {"cpu_util_percent": 40.46744186046512, "ram_util_percent": 57.767441860465105}}
+{"episode_reward_max": 88.0, "episode_reward_min": 8.0, "episode_reward_mean": 32.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 53.0}, "policy_reward_mean": {"ppo": 16.475}, "custom_metrics": {"sparse_reward_mean": 3.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 25.35, "shaped_reward_min": 8, "shaped_reward_max": 48, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.56, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.61, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.65, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 8, "useful_onion_pickup_agent_1_mean": 3.81, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.8, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.85, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.39, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.41, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 2.27, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.23, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 2.97, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.91, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.41, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.54, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.0, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.61, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 1.22, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.52, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.6, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.51, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.55, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.94, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.27, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.23, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.27, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.23, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [34.0, 42.0, 88.0, 33.0, 17.0, 30.0, 76.0, 85.0, 33.0, 31.0, 68.0, 12.0, 16.0, 79.0, 68.0, 19.0, 33.0, 19.0, 25.0, 31.0, 36.0, 44.0, 39.0, 11.0, 22.0, 42.0, 31.0, 25.0, 20.0, 31.0, 31.0, 17.0, 14.0, 11.0, 17.0, 19.0, 17.0, 28.0, 34.0, 20.0, 57.0, 14.0, 71.0, 12.0, 12.0, 66.0, 28.0, 63.0, 17.0, 28.0, 23.0, 9.0, 20.0, 22.0, 71.0, 26.0, 27.0, 20.0, 20.0, 9.0, 19.0, 20.0, 76.0, 19.0, 68.0, 68.0, 17.0, 25.0, 76.0, 44.0, 28.0, 12.0, 36.0, 33.0, 9.0, 24.0, 28.0, 25.0, 31.0, 68.0, 17.0, 47.0, 23.0, 12.0, 17.0, 22.0, 28.0, 23.0, 20.0, 37.0, 14.0, 8.0, 28.0, 22.0, 39.0, 82.0, 71.0, 57.0, 30.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [11.0, 23.0, 25.0, 17.0, 45.0, 43.0, 22.0, 11.0, 5.0, 12.0, 17.0, 13.0, 48.0, 28.0, 32.0, 53.0, 22.0, 11.0, 18.0, 13.0, 29.0, 39.0, 6.0, 6.0, 3.0, 13.0, 37.0, 42.0, 32.0, 36.0, 11.0, 8.0, 13.0, 20.0, 11.0, 8.0, 8.0, 17.0, 14.0, 17.0, 19.0, 17.0, 14.0, 30.0, 14.0, 25.0, 5.0, 6.0, 6.0, 16.0, 9.0, 33.0, 17.0, 14.0, 11.0, 14.0, 6.0, 14.0, 12.0, 19.0, 11.0, 20.0, 14.0, 3.0, 6.0, 8.0, 5.0, 6.0, 9.0, 8.0, 8.0, 11.0, 6.0, 11.0, 19.0, 9.0, 17.0, 17.0, 3.0, 17.0, 26.0, 31.0, 8.0, 6.0, 38.0, 33.0, 6.0, 6.0, 3.0, 9.0, 34.0, 32.0, 17.0, 11.0, 31.0, 32.0, 6.0, 11.0, 16.0, 12.0, 12.0, 11.0, 3.0, 6.0, 3.0, 17.0, 10.0, 12.0, 36.0, 35.0, 11.0, 15.0, 13.0, 14.0, 3.0, 17.0, 14.0, 6.0, 6.0, 3.0, 6.0, 13.0, 12.0, 8.0, 40.0, 36.0, 8.0, 11.0, 40.0, 28.0, 34.0, 34.0, 3.0, 14.0, 9.0, 16.0, 34.0, 42.0, 18.0, 26.0, 11.0, 17.0, 9.0, 3.0, 17.0, 19.0, 19.0, 14.0, 3.0, 6.0, 16.0, 8.0, 16.0, 12.0, 12.0, 13.0, 20.0, 11.0, 37.0, 31.0, 9.0, 8.0, 28.0, 19.0, 9.0, 14.0, 3.0, 9.0, 11.0, 6.0, 8.0, 14.0, 11.0, 17.0, 9.0, 14.0, 9.0, 11.0, 20.0, 17.0, 9.0, 5.0, 5.0, 3.0, 16.0, 12.0, 12.0, 10.0, 12.0, 27.0, 38.0, 44.0, 35.0, 36.0, 29.0, 28.0, 11.0, 19.0, 0.0, 9.0]}, "sampler_perf": {"mean_env_wait_ms": 8.541158675961446, "mean_processing_ms": 0.1664622179417048, "mean_inference_ms": 3.3849019348875076}, "off_policy_estimator": {}, "info": {"num_steps_trained": 456000, "num_steps_sampled": 243200, "sample_time_ms": 20842.585, "load_time_ms": 38.976, "grad_time_ms": 8953.309, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 7.629394644936838e-07, "cur_lr": 0.0010000000474974513, "total_loss": -0.09107109159231186, "policy_loss": -0.009291496127843857, "vf_loss": 5.607062816619873, "vf_explained_var": 0.08896120637655258, "kl": 0.0008400729275308549, "entropy": 1.7600102424621582, "entropy_coeff": 0.04678399860858917, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 243200, "episodes_total": 608, "training_iteration": 19, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-02-08", "timestamp": 1660244528, "time_this_iter_s": 29.248838186264038, "time_total_s": 2949.509967327118, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2949.509967327118, "timesteps_since_restore": 243200, "iterations_since_restore": 19, "perf": {"cpu_util_percent": 33.43571428571428, "ram_util_percent": 57.790476190476205}}
+{"episode_reward_max": 88.0, "episode_reward_min": 8.0, "episode_reward_mean": 37.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 56.0}, "policy_reward_mean": {"ppo": 18.55}, "custom_metrics": {"sparse_reward_mean": 4.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 27.5, "shaped_reward_min": 8, "shaped_reward_max": 48, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.54, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.78, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.75, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.94, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.83, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.41, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 2.45, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.42, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.16, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.13, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.43, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.72, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.31, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.65, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.61, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.62, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 1.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.45, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.42, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.45, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.42, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [23.0, 74.0, 17.0, 14.0, 9.0, 76.0, 63.0, 88.0, 66.0, 37.0, 37.0, 37.0, 68.0, 77.0, 28.0, 28.0, 68.0, 25.0, 60.0, 20.0, 22.0, 36.0, 23.0, 82.0, 12.0, 9.0, 23.0, 19.0, 42.0, 79.0, 42.0, 20.0, 68.0, 68.0, 17.0, 25.0, 76.0, 44.0, 28.0, 12.0, 36.0, 33.0, 9.0, 24.0, 28.0, 25.0, 31.0, 68.0, 17.0, 47.0, 23.0, 12.0, 17.0, 22.0, 28.0, 23.0, 20.0, 37.0, 14.0, 8.0, 28.0, 22.0, 39.0, 82.0, 71.0, 57.0, 30.0, 9.0, 34.0, 42.0, 88.0, 33.0, 17.0, 30.0, 76.0, 85.0, 33.0, 31.0, 68.0, 12.0, 16.0, 79.0, 68.0, 19.0, 33.0, 19.0, 25.0, 31.0, 36.0, 44.0, 39.0, 11.0, 22.0, 42.0, 31.0, 25.0, 20.0, 31.0, 31.0, 17.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [11.0, 12.0, 43.0, 31.0, 11.0, 6.0, 14.0, 0.0, 9.0, 0.0, 45.0, 31.0, 28.0, 35.0, 32.0, 56.0, 38.0, 28.0, 14.0, 23.0, 8.0, 29.0, 14.0, 23.0, 34.0, 34.0, 34.0, 43.0, 14.0, 14.0, 14.0, 14.0, 30.0, 38.0, 14.0, 11.0, 26.0, 34.0, 6.0, 14.0, 8.0, 14.0, 17.0, 19.0, 17.0, 6.0, 39.0, 43.0, 6.0, 6.0, 3.0, 6.0, 11.0, 12.0, 13.0, 6.0, 17.0, 25.0, 37.0, 42.0, 22.0, 20.0, 6.0, 14.0, 40.0, 28.0, 34.0, 34.0, 3.0, 14.0, 9.0, 16.0, 34.0, 42.0, 18.0, 26.0, 11.0, 17.0, 9.0, 3.0, 17.0, 19.0, 19.0, 14.0, 3.0, 6.0, 16.0, 8.0, 16.0, 12.0, 12.0, 13.0, 20.0, 11.0, 37.0, 31.0, 9.0, 8.0, 28.0, 19.0, 9.0, 14.0, 3.0, 9.0, 11.0, 6.0, 8.0, 14.0, 11.0, 17.0, 9.0, 14.0, 9.0, 11.0, 20.0, 17.0, 9.0, 5.0, 5.0, 3.0, 16.0, 12.0, 12.0, 10.0, 12.0, 27.0, 38.0, 44.0, 35.0, 36.0, 29.0, 28.0, 11.0, 19.0, 0.0, 9.0, 11.0, 23.0, 25.0, 17.0, 45.0, 43.0, 22.0, 11.0, 5.0, 12.0, 17.0, 13.0, 48.0, 28.0, 32.0, 53.0, 22.0, 11.0, 18.0, 13.0, 29.0, 39.0, 6.0, 6.0, 3.0, 13.0, 37.0, 42.0, 32.0, 36.0, 11.0, 8.0, 13.0, 20.0, 11.0, 8.0, 8.0, 17.0, 14.0, 17.0, 19.0, 17.0, 14.0, 30.0, 14.0, 25.0, 5.0, 6.0, 6.0, 16.0, 9.0, 33.0, 17.0, 14.0, 11.0, 14.0, 6.0, 14.0, 12.0, 19.0, 11.0, 20.0, 14.0, 3.0]}, "sampler_perf": {"mean_env_wait_ms": 8.114151695990262, "mean_processing_ms": 0.1664756703889973, "mean_inference_ms": 3.2561915126083236}, "off_policy_estimator": {}, "info": {"num_steps_trained": 480000, "num_steps_sampled": 256000, "sample_time_ms": 21118.245, "load_time_ms": 39.16, "grad_time_ms": 8931.236, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 3.814697322468419e-07, "cur_lr": 0.0010000000474974513, "total_loss": -0.07289835065603256, "policy_loss": -0.006338973995298147, "vf_loss": 7.939427852630615, "vf_explained_var": 0.1275780349969864, "kl": 0.000996587099507451, "entropy": 1.7598587274551392, "entropy_coeff": 0.03827200084924698, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 256000, "episodes_total": 640, "training_iteration": 20, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-02-37", "timestamp": 1660244557, "time_this_iter_s": 28.921189069747925, "time_total_s": 2978.431156396866, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2978.431156396866, "timesteps_since_restore": 256000, "iterations_since_restore": 20, "perf": {"cpu_util_percent": 30.78048780487805, "ram_util_percent": 57.77073170731706}}
+{"episode_reward_max": 116.0, "episode_reward_min": 9.0, "episode_reward_mean": 39.04, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 19.52}, "custom_metrics": {"sparse_reward_mean": 5.8, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 27.44, "shaped_reward_min": 9, "shaped_reward_max": 48, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.18, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.61, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.52, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.9, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 2.3, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.63, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.32, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 3.21, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.46, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.54, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.87, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.61, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.36, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.49, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.58, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.57, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.69, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.85, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.3, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.63, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.3, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.63, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [25.0, 9.0, 17.0, 27.0, 14.0, 19.0, 60.0, 36.0, 25.0, 17.0, 16.0, 28.0, 34.0, 45.0, 23.0, 36.0, 22.0, 22.0, 63.0, 12.0, 68.0, 25.0, 63.0, 25.0, 11.0, 57.0, 76.0, 30.0, 116.0, 45.0, 79.0, 80.0, 71.0, 57.0, 30.0, 9.0, 34.0, 42.0, 88.0, 33.0, 17.0, 30.0, 76.0, 85.0, 33.0, 31.0, 68.0, 12.0, 16.0, 79.0, 68.0, 19.0, 33.0, 19.0, 25.0, 31.0, 36.0, 44.0, 39.0, 11.0, 22.0, 42.0, 31.0, 25.0, 20.0, 31.0, 31.0, 17.0, 23.0, 74.0, 17.0, 14.0, 9.0, 76.0, 63.0, 88.0, 66.0, 37.0, 37.0, 37.0, 68.0, 77.0, 28.0, 28.0, 68.0, 25.0, 60.0, 20.0, 22.0, 36.0, 23.0, 82.0, 12.0, 9.0, 23.0, 19.0, 42.0, 79.0, 42.0, 20.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [12.0, 13.0, 3.0, 6.0, 6.0, 11.0, 10.0, 17.0, 11.0, 3.0, 6.0, 13.0, 26.0, 34.0, 17.0, 19.0, 3.0, 22.0, 8.0, 9.0, 8.0, 8.0, 19.0, 9.0, 28.0, 6.0, 25.0, 20.0, 14.0, 9.0, 9.0, 27.0, 16.0, 6.0, 11.0, 11.0, 31.0, 32.0, 6.0, 6.0, 33.0, 35.0, 13.0, 12.0, 29.0, 34.0, 6.0, 19.0, 3.0, 8.0, 26.0, 31.0, 34.0, 42.0, 10.0, 20.0, 68.0, 48.0, 22.0, 23.0, 48.0, 31.0, 45.0, 35.0, 35.0, 36.0, 29.0, 28.0, 11.0, 19.0, 0.0, 9.0, 11.0, 23.0, 25.0, 17.0, 45.0, 43.0, 22.0, 11.0, 5.0, 12.0, 17.0, 13.0, 48.0, 28.0, 32.0, 53.0, 22.0, 11.0, 18.0, 13.0, 29.0, 39.0, 6.0, 6.0, 3.0, 13.0, 37.0, 42.0, 32.0, 36.0, 11.0, 8.0, 13.0, 20.0, 11.0, 8.0, 8.0, 17.0, 14.0, 17.0, 19.0, 17.0, 14.0, 30.0, 14.0, 25.0, 5.0, 6.0, 6.0, 16.0, 9.0, 33.0, 17.0, 14.0, 11.0, 14.0, 6.0, 14.0, 12.0, 19.0, 11.0, 20.0, 14.0, 3.0, 11.0, 12.0, 43.0, 31.0, 11.0, 6.0, 14.0, 0.0, 9.0, 0.0, 45.0, 31.0, 28.0, 35.0, 32.0, 56.0, 38.0, 28.0, 14.0, 23.0, 8.0, 29.0, 14.0, 23.0, 34.0, 34.0, 34.0, 43.0, 14.0, 14.0, 14.0, 14.0, 30.0, 38.0, 14.0, 11.0, 26.0, 34.0, 6.0, 14.0, 8.0, 14.0, 17.0, 19.0, 17.0, 6.0, 39.0, 43.0, 6.0, 6.0, 3.0, 6.0, 11.0, 12.0, 13.0, 6.0, 17.0, 25.0, 37.0, 42.0, 22.0, 20.0, 6.0, 14.0]}, "sampler_perf": {"mean_env_wait_ms": 7.729743796447544, "mean_processing_ms": 0.1663097758329898, "mean_inference_ms": 3.1401875416297957}, "off_policy_estimator": {}, "info": {"num_steps_trained": 504000, "num_steps_sampled": 268800, "sample_time_ms": 21421.608, "load_time_ms": 38.85, "grad_time_ms": 8956.305, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.9073486612342094e-07, "cur_lr": 0.0010000000474974513, "total_loss": -0.05712709203362465, "policy_loss": -0.005733281373977661, "vf_loss": 7.644298553466797, "vf_explained_var": 0.10351377725601196, "kl": 0.0011409734142944217, "entropy": 1.7526286840438843, "entropy_coeff": 0.029759999364614487, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 268800, "episodes_total": 672, "training_iteration": 21, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-03-07", "timestamp": 1660244587, "time_this_iter_s": 29.522944927215576, "time_total_s": 3007.9541013240814, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3007.9541013240814, "timesteps_since_restore": 268800, "iterations_since_restore": 21, "perf": {"cpu_util_percent": 34.18809523809524, "ram_util_percent": 57.730952380952374}}
+{"episode_reward_max": 116.0, "episode_reward_min": 9.0, "episode_reward_mean": 37.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 18.735}, "custom_metrics": {"sparse_reward_mean": 5.4, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 26.67, "shaped_reward_min": 9, "shaped_reward_max": 50, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.24, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.51, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.51, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.59, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.46, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.49, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 2.32, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.55, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.18, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 3.39, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.52, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.81, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.7, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.35, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.35, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.52, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.55, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.73, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.71, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.32, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.55, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.32, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.55, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [73.0, 25.0, 31.0, 34.0, 16.0, 9.0, 23.0, 39.0, 35.0, 28.0, 79.0, 36.0, 57.0, 25.0, 28.0, 12.0, 17.0, 25.0, 25.0, 71.0, 44.0, 62.0, 22.0, 17.0, 23.0, 25.0, 37.0, 66.0, 50.0, 23.0, 12.0, 30.0, 20.0, 31.0, 31.0, 17.0, 23.0, 74.0, 17.0, 14.0, 9.0, 76.0, 63.0, 88.0, 66.0, 37.0, 37.0, 37.0, 68.0, 77.0, 28.0, 28.0, 68.0, 25.0, 60.0, 20.0, 22.0, 36.0, 23.0, 82.0, 12.0, 9.0, 23.0, 19.0, 42.0, 79.0, 42.0, 20.0, 25.0, 9.0, 17.0, 27.0, 14.0, 19.0, 60.0, 36.0, 25.0, 17.0, 16.0, 28.0, 34.0, 45.0, 23.0, 36.0, 22.0, 22.0, 63.0, 12.0, 68.0, 25.0, 63.0, 25.0, 11.0, 57.0, 76.0, 30.0, 116.0, 45.0, 79.0, 80.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [41.0, 32.0, 17.0, 8.0, 10.0, 21.0, 21.0, 13.0, 6.0, 10.0, 6.0, 3.0, 9.0, 14.0, 25.0, 14.0, 27.0, 8.0, 5.0, 23.0, 34.0, 45.0, 25.0, 11.0, 23.0, 34.0, 14.0, 11.0, 14.0, 14.0, 6.0, 6.0, 3.0, 14.0, 14.0, 11.0, 11.0, 14.0, 34.0, 37.0, 28.0, 16.0, 26.0, 36.0, 13.0, 9.0, 6.0, 11.0, 9.0, 14.0, 20.0, 5.0, 23.0, 14.0, 32.0, 34.0, 19.0, 31.0, 11.0, 12.0, 6.0, 6.0, 9.0, 21.0, 6.0, 14.0, 12.0, 19.0, 11.0, 20.0, 14.0, 3.0, 11.0, 12.0, 43.0, 31.0, 11.0, 6.0, 14.0, 0.0, 9.0, 0.0, 45.0, 31.0, 28.0, 35.0, 32.0, 56.0, 38.0, 28.0, 14.0, 23.0, 8.0, 29.0, 14.0, 23.0, 34.0, 34.0, 34.0, 43.0, 14.0, 14.0, 14.0, 14.0, 30.0, 38.0, 14.0, 11.0, 26.0, 34.0, 6.0, 14.0, 8.0, 14.0, 17.0, 19.0, 17.0, 6.0, 39.0, 43.0, 6.0, 6.0, 3.0, 6.0, 11.0, 12.0, 13.0, 6.0, 17.0, 25.0, 37.0, 42.0, 22.0, 20.0, 6.0, 14.0, 12.0, 13.0, 3.0, 6.0, 6.0, 11.0, 10.0, 17.0, 11.0, 3.0, 6.0, 13.0, 26.0, 34.0, 17.0, 19.0, 3.0, 22.0, 8.0, 9.0, 8.0, 8.0, 19.0, 9.0, 28.0, 6.0, 25.0, 20.0, 14.0, 9.0, 9.0, 27.0, 16.0, 6.0, 11.0, 11.0, 31.0, 32.0, 6.0, 6.0, 33.0, 35.0, 13.0, 12.0, 29.0, 34.0, 6.0, 19.0, 3.0, 8.0, 26.0, 31.0, 34.0, 42.0, 10.0, 20.0, 68.0, 48.0, 22.0, 23.0, 48.0, 31.0, 45.0, 35.0]}, "sampler_perf": {"mean_env_wait_ms": 7.382168276478265, "mean_processing_ms": 0.16610498275202595, "mean_inference_ms": 3.034581035402087}, "off_policy_estimator": {}, "info": {"num_steps_trained": 528000, "num_steps_sampled": 281600, "sample_time_ms": 21629.558, "load_time_ms": 39.186, "grad_time_ms": 8958.867, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 9.536743306171047e-08, "cur_lr": 0.0010000000474974513, "total_loss": -0.045350998640060425, "policy_loss": -0.008628163486719131, "vf_loss": 5.3433098793029785, "vf_explained_var": 0.18131445348262787, "kl": 0.0009087324724532664, "entropy": 1.7534428834915161, "entropy_coeff": 0.021247999742627144, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 281600, "episodes_total": 704, "training_iteration": 22, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-03-36", "timestamp": 1660244616, "time_this_iter_s": 29.197812795639038, "time_total_s": 3037.1519141197205, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3037.1519141197205, "timesteps_since_restore": 281600, "iterations_since_restore": 22, "perf": {"cpu_util_percent": 36.32142857142857, "ram_util_percent": 57.66904761904762}}
+{"episode_reward_max": 128.0, "episode_reward_min": 9.0, "episode_reward_mean": 40.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 20.145}, "custom_metrics": {"sparse_reward_mean": 6.0, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 28.29, "shaped_reward_min": 9, "shaped_reward_max": 53, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.24, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.54, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.44, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.7, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 2.43, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.43, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.42, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 2.43, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.7, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.2, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 3.37, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 2.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.72, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.77, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 1.49, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.41, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.55, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.61, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.82, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.67, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.43, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.7, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.43, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.7, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [63.0, 9.0, 12.0, 125.0, 82.0, 68.0, 47.0, 24.0, 28.0, 30.0, 33.0, 20.0, 47.0, 128.0, 22.0, 37.0, 43.0, 74.0, 80.0, 53.0, 62.0, 76.0, 36.0, 31.0, 39.0, 20.0, 28.0, 9.0, 34.0, 17.0, 60.0, 85.0, 42.0, 79.0, 42.0, 20.0, 25.0, 9.0, 17.0, 27.0, 14.0, 19.0, 60.0, 36.0, 25.0, 17.0, 16.0, 28.0, 34.0, 45.0, 23.0, 36.0, 22.0, 22.0, 63.0, 12.0, 68.0, 25.0, 63.0, 25.0, 11.0, 57.0, 76.0, 30.0, 116.0, 45.0, 79.0, 80.0, 73.0, 25.0, 31.0, 34.0, 16.0, 9.0, 23.0, 39.0, 35.0, 28.0, 79.0, 36.0, 57.0, 25.0, 28.0, 12.0, 17.0, 25.0, 25.0, 71.0, 44.0, 62.0, 22.0, 17.0, 23.0, 25.0, 37.0, 66.0, 50.0, 23.0, 12.0, 30.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [29.0, 34.0, 3.0, 6.0, 6.0, 6.0, 58.0, 67.0, 48.0, 34.0, 32.0, 36.0, 8.0, 39.0, 9.0, 15.0, 13.0, 15.0, 19.0, 11.0, 14.0, 19.0, 8.0, 12.0, 28.0, 19.0, 61.0, 67.0, 10.0, 12.0, 11.0, 26.0, 14.0, 29.0, 40.0, 34.0, 38.0, 42.0, 17.0, 36.0, 26.0, 36.0, 39.0, 37.0, 14.0, 22.0, 22.0, 9.0, 22.0, 17.0, 3.0, 17.0, 8.0, 20.0, 9.0, 0.0, 17.0, 17.0, 11.0, 6.0, 31.0, 29.0, 48.0, 37.0, 17.0, 25.0, 37.0, 42.0, 22.0, 20.0, 6.0, 14.0, 12.0, 13.0, 3.0, 6.0, 6.0, 11.0, 10.0, 17.0, 11.0, 3.0, 6.0, 13.0, 26.0, 34.0, 17.0, 19.0, 3.0, 22.0, 8.0, 9.0, 8.0, 8.0, 19.0, 9.0, 28.0, 6.0, 25.0, 20.0, 14.0, 9.0, 9.0, 27.0, 16.0, 6.0, 11.0, 11.0, 31.0, 32.0, 6.0, 6.0, 33.0, 35.0, 13.0, 12.0, 29.0, 34.0, 6.0, 19.0, 3.0, 8.0, 26.0, 31.0, 34.0, 42.0, 10.0, 20.0, 68.0, 48.0, 22.0, 23.0, 48.0, 31.0, 45.0, 35.0, 41.0, 32.0, 17.0, 8.0, 10.0, 21.0, 21.0, 13.0, 6.0, 10.0, 6.0, 3.0, 9.0, 14.0, 25.0, 14.0, 27.0, 8.0, 5.0, 23.0, 34.0, 45.0, 25.0, 11.0, 23.0, 34.0, 14.0, 11.0, 14.0, 14.0, 6.0, 6.0, 3.0, 14.0, 14.0, 11.0, 11.0, 14.0, 34.0, 37.0, 28.0, 16.0, 26.0, 36.0, 13.0, 9.0, 6.0, 11.0, 9.0, 14.0, 20.0, 5.0, 23.0, 14.0, 32.0, 34.0, 19.0, 31.0, 11.0, 12.0, 6.0, 6.0, 9.0, 21.0]}, "sampler_perf": {"mean_env_wait_ms": 7.066502405705404, "mean_processing_ms": 0.16594670413037083, "mean_inference_ms": 2.938769748646424}, "off_policy_estimator": {}, "info": {"num_steps_trained": 552000, "num_steps_sampled": 294400, "sample_time_ms": 20554.7, "load_time_ms": 39.212, "grad_time_ms": 9005.588, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 4.7683716530855236e-08, "cur_lr": 0.0010000000474974513, "total_loss": -0.025955183431506157, "policy_loss": -0.0047044227831065655, "vf_loss": 9.28939437866211, "vf_explained_var": 0.20618398487567902, "kl": 0.0012655678438022733, "entropy": 1.741496205329895, "entropy_coeff": 0.012736000120639801, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 294400, "episodes_total": 736, "training_iteration": 23, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-04-05", "timestamp": 1660244645, "time_this_iter_s": 29.364897966384888, "time_total_s": 3066.5168120861053, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3066.5168120861053, "timesteps_since_restore": 294400, "iterations_since_restore": 23, "perf": {"cpu_util_percent": 34.0, "ram_util_percent": 57.70487804878048}}
+{"episode_reward_max": 128.0, "episode_reward_min": 9.0, "episode_reward_mean": 43.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 21.51}, "custom_metrics": {"sparse_reward_mean": 6.8, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 29.42, "shaped_reward_min": 9, "shaped_reward_max": 53, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.56, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 5.35, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.71, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.74, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.41, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 2.68, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.66, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.29, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 3.32, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.52, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.48, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.0, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.94, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.76, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.7, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.46, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.47, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.6, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.71, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.76, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.66, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.68, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.66, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.68, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.66, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [9.0, 42.0, 31.0, 28.0, 34.0, 41.0, 14.0, 20.0, 31.0, 41.0, 79.0, 30.0, 74.0, 28.0, 9.0, 22.0, 39.0, 65.0, 63.0, 66.0, 68.0, 22.0, 76.0, 47.0, 63.0, 79.0, 79.0, 74.0, 25.0, 20.0, 17.0, 25.0, 116.0, 45.0, 79.0, 80.0, 73.0, 25.0, 31.0, 34.0, 16.0, 9.0, 23.0, 39.0, 35.0, 28.0, 79.0, 36.0, 57.0, 25.0, 28.0, 12.0, 17.0, 25.0, 25.0, 71.0, 44.0, 62.0, 22.0, 17.0, 23.0, 25.0, 37.0, 66.0, 50.0, 23.0, 12.0, 30.0, 63.0, 9.0, 12.0, 125.0, 82.0, 68.0, 47.0, 24.0, 28.0, 30.0, 33.0, 20.0, 47.0, 128.0, 22.0, 37.0, 43.0, 74.0, 80.0, 53.0, 62.0, 76.0, 36.0, 31.0, 39.0, 20.0, 28.0, 9.0, 34.0, 17.0, 60.0, 85.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 3.0, 22.0, 20.0, 19.0, 12.0, 14.0, 14.0, 29.0, 5.0, 25.0, 16.0, 14.0, 0.0, 9.0, 11.0, 16.0, 15.0, 14.0, 27.0, 37.0, 42.0, 15.0, 15.0, 42.0, 32.0, 16.0, 12.0, 6.0, 3.0, 19.0, 3.0, 28.0, 11.0, 28.0, 37.0, 31.0, 32.0, 35.0, 31.0, 26.0, 42.0, 5.0, 17.0, 34.0, 42.0, 31.0, 16.0, 28.0, 35.0, 35.0, 44.0, 40.0, 39.0, 38.0, 36.0, 8.0, 17.0, 12.0, 8.0, 8.0, 9.0, 9.0, 16.0, 68.0, 48.0, 22.0, 23.0, 48.0, 31.0, 45.0, 35.0, 41.0, 32.0, 17.0, 8.0, 10.0, 21.0, 21.0, 13.0, 6.0, 10.0, 6.0, 3.0, 9.0, 14.0, 25.0, 14.0, 27.0, 8.0, 5.0, 23.0, 34.0, 45.0, 25.0, 11.0, 23.0, 34.0, 14.0, 11.0, 14.0, 14.0, 6.0, 6.0, 3.0, 14.0, 14.0, 11.0, 11.0, 14.0, 34.0, 37.0, 28.0, 16.0, 26.0, 36.0, 13.0, 9.0, 6.0, 11.0, 9.0, 14.0, 20.0, 5.0, 23.0, 14.0, 32.0, 34.0, 19.0, 31.0, 11.0, 12.0, 6.0, 6.0, 9.0, 21.0, 29.0, 34.0, 3.0, 6.0, 6.0, 6.0, 58.0, 67.0, 48.0, 34.0, 32.0, 36.0, 8.0, 39.0, 9.0, 15.0, 13.0, 15.0, 19.0, 11.0, 14.0, 19.0, 8.0, 12.0, 28.0, 19.0, 61.0, 67.0, 10.0, 12.0, 11.0, 26.0, 14.0, 29.0, 40.0, 34.0, 38.0, 42.0, 17.0, 36.0, 26.0, 36.0, 39.0, 37.0, 14.0, 22.0, 22.0, 9.0, 22.0, 17.0, 3.0, 17.0, 8.0, 20.0, 9.0, 0.0, 17.0, 17.0, 11.0, 6.0, 31.0, 29.0, 48.0, 37.0]}, "sampler_perf": {"mean_env_wait_ms": 6.778628865492887, "mean_processing_ms": 0.1658515378049688, "mean_inference_ms": 2.8523193064237637}, "off_policy_estimator": {}, "info": {"num_steps_trained": 576000, "num_steps_sampled": 307200, "sample_time_ms": 20518.226, "load_time_ms": 38.592, "grad_time_ms": 9000.858, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.3841858265427618e-08, "cur_lr": 0.0010000000474974513, "total_loss": -0.01208301167935133, "policy_loss": -0.0054773432202637196, "vf_loss": 7.661229610443115, "vf_explained_var": 0.24070757627487183, "kl": 0.001010378822684288, "entropy": 1.7452141046524048, "entropy_coeff": 0.004224000032991171, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 307200, "episodes_total": 768, "training_iteration": 24, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-04-37", "timestamp": 1660244677, "time_this_iter_s": 32.22774386405945, "time_total_s": 3098.744555950165, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3098.744555950165, "timesteps_since_restore": 307200, "iterations_since_restore": 24, "perf": {"cpu_util_percent": 33.75869565217391, "ram_util_percent": 57.7217391304348}}
+{"episode_reward_max": 128.0, "episode_reward_min": 9.0, "episode_reward_mean": 45.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 67.0}, "policy_reward_mean": {"ppo": 22.71}, "custom_metrics": {"sparse_reward_mean": 7.4, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 30.62, "shaped_reward_min": 9, "shaped_reward_max": 56, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.3, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 5.66, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.55, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.23, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 2.55, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.99, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.67, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 3.36, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.62, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.48, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.91, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.77, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.57, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.59, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.63, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.79, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.87, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.68, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.55, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.99, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.55, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.99, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [38.0, 33.0, 12.0, 96.0, 77.0, 31.0, 16.0, 48.0, 28.0, 53.0, 20.0, 28.0, 20.0, 74.0, 12.0, 79.0, 17.0, 91.0, 76.0, 31.0, 9.0, 23.0, 84.0, 68.0, 125.0, 82.0, 31.0, 33.0, 17.0, 42.0, 79.0, 71.0, 50.0, 23.0, 12.0, 30.0, 63.0, 9.0, 12.0, 125.0, 82.0, 68.0, 47.0, 24.0, 28.0, 30.0, 33.0, 20.0, 47.0, 128.0, 22.0, 37.0, 43.0, 74.0, 80.0, 53.0, 62.0, 76.0, 36.0, 31.0, 39.0, 20.0, 28.0, 9.0, 34.0, 17.0, 60.0, 85.0, 9.0, 42.0, 31.0, 28.0, 34.0, 41.0, 14.0, 20.0, 31.0, 41.0, 79.0, 30.0, 74.0, 28.0, 9.0, 22.0, 39.0, 65.0, 63.0, 66.0, 68.0, 22.0, 76.0, 47.0, 63.0, 79.0, 79.0, 74.0, 25.0, 20.0, 17.0, 25.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [25.0, 13.0, 8.0, 25.0, 6.0, 6.0, 59.0, 37.0, 34.0, 43.0, 20.0, 11.0, 0.0, 16.0, 20.0, 28.0, 16.0, 12.0, 30.0, 23.0, 8.0, 12.0, 14.0, 14.0, 11.0, 9.0, 39.0, 35.0, 6.0, 6.0, 37.0, 42.0, 14.0, 3.0, 45.0, 46.0, 42.0, 34.0, 20.0, 11.0, 3.0, 6.0, 12.0, 11.0, 33.0, 51.0, 40.0, 28.0, 59.0, 66.0, 40.0, 42.0, 17.0, 14.0, 14.0, 19.0, 5.0, 12.0, 6.0, 36.0, 34.0, 45.0, 29.0, 42.0, 19.0, 31.0, 11.0, 12.0, 6.0, 6.0, 9.0, 21.0, 29.0, 34.0, 3.0, 6.0, 6.0, 6.0, 58.0, 67.0, 48.0, 34.0, 32.0, 36.0, 8.0, 39.0, 9.0, 15.0, 13.0, 15.0, 19.0, 11.0, 14.0, 19.0, 8.0, 12.0, 28.0, 19.0, 61.0, 67.0, 10.0, 12.0, 11.0, 26.0, 14.0, 29.0, 40.0, 34.0, 38.0, 42.0, 17.0, 36.0, 26.0, 36.0, 39.0, 37.0, 14.0, 22.0, 22.0, 9.0, 22.0, 17.0, 3.0, 17.0, 8.0, 20.0, 9.0, 0.0, 17.0, 17.0, 11.0, 6.0, 31.0, 29.0, 48.0, 37.0, 6.0, 3.0, 22.0, 20.0, 19.0, 12.0, 14.0, 14.0, 29.0, 5.0, 25.0, 16.0, 14.0, 0.0, 9.0, 11.0, 16.0, 15.0, 14.0, 27.0, 37.0, 42.0, 15.0, 15.0, 42.0, 32.0, 16.0, 12.0, 6.0, 3.0, 19.0, 3.0, 28.0, 11.0, 28.0, 37.0, 31.0, 32.0, 35.0, 31.0, 26.0, 42.0, 5.0, 17.0, 34.0, 42.0, 31.0, 16.0, 28.0, 35.0, 35.0, 44.0, 40.0, 39.0, 38.0, 36.0, 8.0, 17.0, 12.0, 8.0, 8.0, 9.0, 9.0, 16.0]}, "sampler_perf": {"mean_env_wait_ms": 6.515320221213355, "mean_processing_ms": 0.1659420994573044, "mean_inference_ms": 2.777987966122339}, "off_policy_estimator": {}, "info": {"num_steps_trained": 600000, "num_steps_sampled": 320000, "sample_time_ms": 21126.015, "load_time_ms": 38.663, "grad_time_ms": 9096.932, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.1920929132713809e-08, "cur_lr": 0.0010000000474974513, "total_loss": -0.00892395805567503, "policy_loss": -0.008948341012001038, "vf_loss": 8.925480842590332, "vf_explained_var": 0.24435751140117645, "kl": 0.0012184166116639972, "entropy": 1.7363275289535522, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 320000, "episodes_total": 800, "training_iteration": 25, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-05-13", "timestamp": 1660244713, "time_this_iter_s": 35.73040580749512, "time_total_s": 3134.47496175766, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3134.47496175766, "timesteps_since_restore": 320000, "iterations_since_restore": 25, "perf": {"cpu_util_percent": 36.32, "ram_util_percent": 57.904}}
+{"episode_reward_max": 142.0, "episode_reward_min": 9.0, "episode_reward_mean": 46.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 79.0}, "policy_reward_mean": {"ppo": 23.465}, "custom_metrics": {"sparse_reward_mean": 7.8, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 31.33, "shaped_reward_min": 9, "shaped_reward_max": 65, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.5, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 5.81, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.81, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.34, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.31, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.3, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 2.7, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 3.0, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.76, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 3.26, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.49, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.96, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.73, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.52, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.63, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.68, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.77, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.78, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.76, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.7, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 3.0, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.7, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 3.0, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [9.0, 142.0, 120.0, 26.0, 59.0, 71.0, 122.0, 44.0, 53.0, 12.0, 45.0, 47.0, 36.0, 83.0, 36.0, 74.0, 31.0, 9.0, 31.0, 9.0, 105.0, 25.0, 79.0, 33.0, 74.0, 41.0, 63.0, 31.0, 9.0, 20.0, 22.0, 31.0, 34.0, 17.0, 60.0, 85.0, 9.0, 42.0, 31.0, 28.0, 34.0, 41.0, 14.0, 20.0, 31.0, 41.0, 79.0, 30.0, 74.0, 28.0, 9.0, 22.0, 39.0, 65.0, 63.0, 66.0, 68.0, 22.0, 76.0, 47.0, 63.0, 79.0, 79.0, 74.0, 25.0, 20.0, 17.0, 25.0, 38.0, 33.0, 12.0, 96.0, 77.0, 31.0, 16.0, 48.0, 28.0, 53.0, 20.0, 28.0, 20.0, 74.0, 12.0, 79.0, 17.0, 91.0, 76.0, 31.0, 9.0, 23.0, 84.0, 68.0, 125.0, 82.0, 31.0, 33.0, 17.0, 42.0, 79.0, 71.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [0.0, 9.0, 63.0, 79.0, 63.0, 57.0, 9.0, 17.0, 36.0, 23.0, 37.0, 34.0, 60.0, 62.0, 36.0, 8.0, 28.0, 25.0, 9.0, 3.0, 14.0, 31.0, 21.0, 26.0, 16.0, 20.0, 48.0, 35.0, 17.0, 19.0, 37.0, 37.0, 3.0, 28.0, 3.0, 6.0, 9.0, 22.0, 3.0, 6.0, 51.0, 54.0, 14.0, 11.0, 28.0, 51.0, 11.0, 22.0, 34.0, 40.0, 19.0, 22.0, 37.0, 26.0, 17.0, 14.0, 0.0, 9.0, 8.0, 12.0, 11.0, 11.0, 17.0, 14.0, 17.0, 17.0, 11.0, 6.0, 31.0, 29.0, 48.0, 37.0, 6.0, 3.0, 22.0, 20.0, 19.0, 12.0, 14.0, 14.0, 29.0, 5.0, 25.0, 16.0, 14.0, 0.0, 9.0, 11.0, 16.0, 15.0, 14.0, 27.0, 37.0, 42.0, 15.0, 15.0, 42.0, 32.0, 16.0, 12.0, 6.0, 3.0, 19.0, 3.0, 28.0, 11.0, 28.0, 37.0, 31.0, 32.0, 35.0, 31.0, 26.0, 42.0, 5.0, 17.0, 34.0, 42.0, 31.0, 16.0, 28.0, 35.0, 35.0, 44.0, 40.0, 39.0, 38.0, 36.0, 8.0, 17.0, 12.0, 8.0, 8.0, 9.0, 9.0, 16.0, 25.0, 13.0, 8.0, 25.0, 6.0, 6.0, 59.0, 37.0, 34.0, 43.0, 20.0, 11.0, 0.0, 16.0, 20.0, 28.0, 16.0, 12.0, 30.0, 23.0, 8.0, 12.0, 14.0, 14.0, 11.0, 9.0, 39.0, 35.0, 6.0, 6.0, 37.0, 42.0, 14.0, 3.0, 45.0, 46.0, 42.0, 34.0, 20.0, 11.0, 3.0, 6.0, 12.0, 11.0, 33.0, 51.0, 40.0, 28.0, 59.0, 66.0, 40.0, 42.0, 17.0, 14.0, 14.0, 19.0, 5.0, 12.0, 6.0, 36.0, 34.0, 45.0, 29.0, 42.0]}, "sampler_perf": {"mean_env_wait_ms": 6.273535039263677, "mean_processing_ms": 0.16612280496799353, "mean_inference_ms": 2.713084381170351}, "off_policy_estimator": {}, "info": {"num_steps_trained": 624000, "num_steps_sampled": 332800, "sample_time_ms": 21568.501, "load_time_ms": 38.757, "grad_time_ms": 9275.576, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 5.9604645663569045e-09, "cur_lr": 0.0010000000474974513, "total_loss": -0.011042184196412563, "policy_loss": -0.01108124852180481, "vf_loss": 9.051116943359375, "vf_explained_var": 0.3293954133987427, "kl": 0.0011855209013447165, "entropy": 1.7320860624313354, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 332800, "episodes_total": 832, "training_iteration": 26, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-05-48", "timestamp": 1660244748, "time_this_iter_s": 34.898388147354126, "time_total_s": 3169.373349905014, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3169.373349905014, "timesteps_since_restore": 332800, "iterations_since_restore": 26, "perf": {"cpu_util_percent": 38.21224489795919, "ram_util_percent": 57.97551020408163}}
+{"episode_reward_max": 142.0, "episode_reward_min": 9.0, "episode_reward_mean": 48.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 79.0}, "policy_reward_mean": {"ppo": 24.285}, "custom_metrics": {"sparse_reward_mean": 8.2, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 32.17, "shaped_reward_min": 9, "shaped_reward_max": 73, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.38, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.95, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.8, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.51, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.33, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 2.69, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 3.14, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.9, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 3.46, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.66, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.94, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.83, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.54, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.75, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.7, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.77, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.75, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.87, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.69, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 3.14, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.69, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 3.14, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [44.0, 12.0, 63.0, 25.0, 17.0, 49.0, 9.0, 79.0, 119.0, 43.0, 12.0, 130.0, 79.0, 53.0, 22.0, 24.0, 36.0, 17.0, 82.0, 42.0, 65.0, 88.0, 23.0, 80.0, 66.0, 44.0, 113.0, 9.0, 66.0, 76.0, 9.0, 38.0, 25.0, 20.0, 17.0, 25.0, 38.0, 33.0, 12.0, 96.0, 77.0, 31.0, 16.0, 48.0, 28.0, 53.0, 20.0, 28.0, 20.0, 74.0, 12.0, 79.0, 17.0, 91.0, 76.0, 31.0, 9.0, 23.0, 84.0, 68.0, 125.0, 82.0, 31.0, 33.0, 17.0, 42.0, 79.0, 71.0, 9.0, 142.0, 120.0, 26.0, 59.0, 71.0, 122.0, 44.0, 53.0, 12.0, 45.0, 47.0, 36.0, 83.0, 36.0, 74.0, 31.0, 9.0, 31.0, 9.0, 105.0, 25.0, 79.0, 33.0, 74.0, 41.0, 63.0, 31.0, 9.0, 20.0, 22.0, 31.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [23.0, 21.0, 9.0, 3.0, 43.0, 20.0, 9.0, 16.0, 12.0, 5.0, 18.0, 31.0, 6.0, 3.0, 34.0, 45.0, 60.0, 59.0, 14.0, 29.0, 6.0, 6.0, 65.0, 65.0, 42.0, 37.0, 19.0, 34.0, 9.0, 13.0, 8.0, 16.0, 19.0, 17.0, 5.0, 12.0, 34.0, 48.0, 17.0, 25.0, 34.0, 31.0, 57.0, 31.0, 17.0, 6.0, 32.0, 48.0, 29.0, 37.0, 19.0, 25.0, 70.0, 43.0, 0.0, 9.0, 34.0, 32.0, 44.0, 32.0, 3.0, 6.0, 22.0, 16.0, 8.0, 17.0, 12.0, 8.0, 8.0, 9.0, 9.0, 16.0, 25.0, 13.0, 8.0, 25.0, 6.0, 6.0, 59.0, 37.0, 34.0, 43.0, 20.0, 11.0, 0.0, 16.0, 20.0, 28.0, 16.0, 12.0, 30.0, 23.0, 8.0, 12.0, 14.0, 14.0, 11.0, 9.0, 39.0, 35.0, 6.0, 6.0, 37.0, 42.0, 14.0, 3.0, 45.0, 46.0, 42.0, 34.0, 20.0, 11.0, 3.0, 6.0, 12.0, 11.0, 33.0, 51.0, 40.0, 28.0, 59.0, 66.0, 40.0, 42.0, 17.0, 14.0, 14.0, 19.0, 5.0, 12.0, 6.0, 36.0, 34.0, 45.0, 29.0, 42.0, 0.0, 9.0, 63.0, 79.0, 63.0, 57.0, 9.0, 17.0, 36.0, 23.0, 37.0, 34.0, 60.0, 62.0, 36.0, 8.0, 28.0, 25.0, 9.0, 3.0, 14.0, 31.0, 21.0, 26.0, 16.0, 20.0, 48.0, 35.0, 17.0, 19.0, 37.0, 37.0, 3.0, 28.0, 3.0, 6.0, 9.0, 22.0, 3.0, 6.0, 51.0, 54.0, 14.0, 11.0, 28.0, 51.0, 11.0, 22.0, 34.0, 40.0, 19.0, 22.0, 37.0, 26.0, 17.0, 14.0, 0.0, 9.0, 8.0, 12.0, 11.0, 11.0, 17.0, 14.0]}, "sampler_perf": {"mean_env_wait_ms": 6.050716001931983, "mean_processing_ms": 0.1663751803875143, "mean_inference_ms": 2.655688107582492}, "off_policy_estimator": {}, "info": {"num_steps_trained": 648000, "num_steps_sampled": 345600, "sample_time_ms": 22006.752, "load_time_ms": 38.851, "grad_time_ms": 9447.322, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.9802322831784522e-09, "cur_lr": 0.0010000000474974513, "total_loss": -0.010166086256504059, "policy_loss": -0.010217566043138504, "vf_loss": 9.166760444641113, "vf_explained_var": 0.3867878019809723, "kl": 0.001088446588255465, "entropy": 1.7303863763809204, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 345600, "episodes_total": 864, "training_iteration": 27, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-06-24", "timestamp": 1660244784, "time_this_iter_s": 35.4101459980011, "time_total_s": 3204.783495903015, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3204.783495903015, "timesteps_since_restore": 345600, "iterations_since_restore": 27, "perf": {"cpu_util_percent": 38.552, "ram_util_percent": 58.32}}
+{"episode_reward_max": 145.0, "episode_reward_min": 9.0, "episode_reward_mean": 55.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 79.0}, "policy_reward_mean": {"ppo": 27.97}, "custom_metrics": {"sparse_reward_mean": 10.0, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 35.94, "shaped_reward_min": 9, "shaped_reward_max": 73, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.38, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 6.64, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.83, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 5.04, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 2.73, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 3.69, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.85, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.25, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.71, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.55, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.73, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.77, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.58, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.93, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.87, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.87, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.87, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.97, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.89, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.73, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 3.69, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.73, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 3.69, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [93.0, 93.0, 74.0, 133.0, 106.0, 85.0, 23.0, 47.0, 93.0, 23.0, 47.0, 74.0, 33.0, 98.0, 96.0, 17.0, 49.0, 110.0, 84.0, 145.0, 34.0, 99.0, 48.0, 55.0, 71.0, 71.0, 65.0, 46.0, 44.0, 12.0, 20.0, 71.0, 17.0, 42.0, 79.0, 71.0, 9.0, 142.0, 120.0, 26.0, 59.0, 71.0, 122.0, 44.0, 53.0, 12.0, 45.0, 47.0, 36.0, 83.0, 36.0, 74.0, 31.0, 9.0, 31.0, 9.0, 105.0, 25.0, 79.0, 33.0, 74.0, 41.0, 63.0, 31.0, 9.0, 20.0, 22.0, 31.0, 44.0, 12.0, 63.0, 25.0, 17.0, 49.0, 9.0, 79.0, 119.0, 43.0, 12.0, 130.0, 79.0, 53.0, 22.0, 24.0, 36.0, 17.0, 82.0, 42.0, 65.0, 88.0, 23.0, 80.0, 66.0, 44.0, 113.0, 9.0, 66.0, 76.0, 9.0, 38.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [47.0, 46.0, 53.0, 40.0, 37.0, 37.0, 59.0, 74.0, 55.0, 51.0, 40.0, 45.0, 3.0, 20.0, 22.0, 25.0, 53.0, 40.0, 14.0, 9.0, 24.0, 23.0, 36.0, 38.0, 21.0, 12.0, 37.0, 61.0, 44.0, 52.0, 6.0, 11.0, 29.0, 20.0, 48.0, 62.0, 38.0, 46.0, 76.0, 69.0, 6.0, 28.0, 47.0, 52.0, 17.0, 31.0, 16.0, 39.0, 40.0, 31.0, 35.0, 36.0, 32.0, 33.0, 21.0, 25.0, 23.0, 21.0, 6.0, 6.0, 8.0, 12.0, 33.0, 38.0, 5.0, 12.0, 6.0, 36.0, 34.0, 45.0, 29.0, 42.0, 0.0, 9.0, 63.0, 79.0, 63.0, 57.0, 9.0, 17.0, 36.0, 23.0, 37.0, 34.0, 60.0, 62.0, 36.0, 8.0, 28.0, 25.0, 9.0, 3.0, 14.0, 31.0, 21.0, 26.0, 16.0, 20.0, 48.0, 35.0, 17.0, 19.0, 37.0, 37.0, 3.0, 28.0, 3.0, 6.0, 9.0, 22.0, 3.0, 6.0, 51.0, 54.0, 14.0, 11.0, 28.0, 51.0, 11.0, 22.0, 34.0, 40.0, 19.0, 22.0, 37.0, 26.0, 17.0, 14.0, 0.0, 9.0, 8.0, 12.0, 11.0, 11.0, 17.0, 14.0, 23.0, 21.0, 9.0, 3.0, 43.0, 20.0, 9.0, 16.0, 12.0, 5.0, 18.0, 31.0, 6.0, 3.0, 34.0, 45.0, 60.0, 59.0, 14.0, 29.0, 6.0, 6.0, 65.0, 65.0, 42.0, 37.0, 19.0, 34.0, 9.0, 13.0, 8.0, 16.0, 19.0, 17.0, 5.0, 12.0, 34.0, 48.0, 17.0, 25.0, 34.0, 31.0, 57.0, 31.0, 17.0, 6.0, 32.0, 48.0, 29.0, 37.0, 19.0, 25.0, 70.0, 43.0, 0.0, 9.0, 34.0, 32.0, 44.0, 32.0, 3.0, 6.0, 22.0, 16.0]}, "sampler_perf": {"mean_env_wait_ms": 5.8446114836959895, "mean_processing_ms": 0.16658688353388335, "mean_inference_ms": 2.6012841728705705}, "off_policy_estimator": {}, "info": {"num_steps_trained": 672000, "num_steps_sampled": 358400, "sample_time_ms": 22190.055, "load_time_ms": 38.857, "grad_time_ms": 9598.179, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.4901161415892261e-09, "cur_lr": 0.0010000000474974513, "total_loss": -0.008647923357784748, "policy_loss": -0.00907482486218214, "vf_loss": 12.8626708984375, "vf_explained_var": 0.32375723123550415, "kl": 0.0009376012603752315, "entropy": 1.7187572717666626, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 358400, "episodes_total": 896, "training_iteration": 28, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-06-58", "timestamp": 1660244818, "time_this_iter_s": 34.0201780796051, "time_total_s": 3238.8036739826202, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3238.8036739826202, "timesteps_since_restore": 358400, "iterations_since_restore": 28, "perf": {"cpu_util_percent": 38.32708333333333, "ram_util_percent": 57.17499999999999}}
+{"episode_reward_max": 145.0, "episode_reward_min": 9.0, "episode_reward_mean": 55.35, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 76.0}, "policy_reward_mean": {"ppo": 27.675}, "custom_metrics": {"sparse_reward_mean": 9.6, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 36.15, "shaped_reward_min": 9, "shaped_reward_max": 73, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.38, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 6.69, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.84, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 4.96, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.39, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.75, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 3.61, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.54, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.31, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.66, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.65, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.58, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.06, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.82, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.92, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.89, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 1.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.83, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.75, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 3.61, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.75, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 3.61, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [39.0, 96.0, 81.0, 77.0, 9.0, 34.0, 47.0, 20.0, 74.0, 44.0, 120.0, 34.0, 22.0, 42.0, 76.0, 66.0, 38.0, 44.0, 9.0, 101.0, 37.0, 39.0, 34.0, 36.0, 53.0, 9.0, 66.0, 80.0, 95.0, 34.0, 50.0, 54.0, 9.0, 20.0, 22.0, 31.0, 44.0, 12.0, 63.0, 25.0, 17.0, 49.0, 9.0, 79.0, 119.0, 43.0, 12.0, 130.0, 79.0, 53.0, 22.0, 24.0, 36.0, 17.0, 82.0, 42.0, 65.0, 88.0, 23.0, 80.0, 66.0, 44.0, 113.0, 9.0, 66.0, 76.0, 9.0, 38.0, 93.0, 93.0, 74.0, 133.0, 106.0, 85.0, 23.0, 47.0, 93.0, 23.0, 47.0, 74.0, 33.0, 98.0, 96.0, 17.0, 49.0, 110.0, 84.0, 145.0, 34.0, 99.0, 48.0, 55.0, 71.0, 71.0, 65.0, 46.0, 44.0, 12.0, 20.0, 71.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [15.0, 24.0, 32.0, 64.0, 38.0, 43.0, 39.0, 38.0, 0.0, 9.0, 14.0, 20.0, 18.0, 29.0, 14.0, 6.0, 40.0, 34.0, 22.0, 22.0, 62.0, 58.0, 15.0, 19.0, 10.0, 12.0, 25.0, 17.0, 34.0, 42.0, 37.0, 29.0, 22.0, 16.0, 24.0, 20.0, 0.0, 9.0, 45.0, 56.0, 20.0, 17.0, 30.0, 9.0, 12.0, 22.0, 14.0, 22.0, 27.0, 26.0, 0.0, 9.0, 26.0, 40.0, 34.0, 46.0, 50.0, 45.0, 18.0, 16.0, 28.0, 22.0, 28.0, 26.0, 0.0, 9.0, 8.0, 12.0, 11.0, 11.0, 17.0, 14.0, 23.0, 21.0, 9.0, 3.0, 43.0, 20.0, 9.0, 16.0, 12.0, 5.0, 18.0, 31.0, 6.0, 3.0, 34.0, 45.0, 60.0, 59.0, 14.0, 29.0, 6.0, 6.0, 65.0, 65.0, 42.0, 37.0, 19.0, 34.0, 9.0, 13.0, 8.0, 16.0, 19.0, 17.0, 5.0, 12.0, 34.0, 48.0, 17.0, 25.0, 34.0, 31.0, 57.0, 31.0, 17.0, 6.0, 32.0, 48.0, 29.0, 37.0, 19.0, 25.0, 70.0, 43.0, 0.0, 9.0, 34.0, 32.0, 44.0, 32.0, 3.0, 6.0, 22.0, 16.0, 47.0, 46.0, 53.0, 40.0, 37.0, 37.0, 59.0, 74.0, 55.0, 51.0, 40.0, 45.0, 3.0, 20.0, 22.0, 25.0, 53.0, 40.0, 14.0, 9.0, 24.0, 23.0, 36.0, 38.0, 21.0, 12.0, 37.0, 61.0, 44.0, 52.0, 6.0, 11.0, 29.0, 20.0, 48.0, 62.0, 38.0, 46.0, 76.0, 69.0, 6.0, 28.0, 47.0, 52.0, 17.0, 31.0, 16.0, 39.0, 40.0, 31.0, 35.0, 36.0, 32.0, 33.0, 21.0, 25.0, 23.0, 21.0, 6.0, 6.0, 8.0, 12.0, 33.0, 38.0]}, "sampler_perf": {"mean_env_wait_ms": 5.653214857264523, "mean_processing_ms": 0.16674718188605944, "mean_inference_ms": 2.549555614199102}, "off_policy_estimator": {}, "info": {"num_steps_trained": 696000, "num_steps_sampled": 371200, "sample_time_ms": 22379.228, "load_time_ms": 38.745, "grad_time_ms": 9750.752, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 7.450580707946131e-10, "cur_lr": 0.0010000000474974513, "total_loss": -0.007846680469810963, "policy_loss": -0.007850968278944492, "vf_loss": 8.63664722442627, "vf_explained_var": 0.4092896282672882, "kl": 0.001057352521456778, "entropy": 1.7187713384628296, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 371200, "episodes_total": 928, "training_iteration": 29, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-07-30", "timestamp": 1660244850, "time_this_iter_s": 32.66524410247803, "time_total_s": 3271.4689180850983, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3271.4689180850983, "timesteps_since_restore": 371200, "iterations_since_restore": 29, "perf": {"cpu_util_percent": 40.74130434782609, "ram_util_percent": 58.79130434782609}}
+{"episode_reward_max": 179.0, "episode_reward_min": 9.0, "episode_reward_mean": 58.6, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 92.0}, "policy_reward_mean": {"ppo": 29.3}, "custom_metrics": {"sparse_reward_mean": 10.2, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 38.2, "shaped_reward_min": 9, "shaped_reward_max": 70, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.55, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 7.06, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 4.05, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.26, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.83, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.86, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.66, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.29, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.74, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.67, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.98, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.67, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.45, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.26, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.9, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.07, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.92, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.1, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.9, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.83, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.86, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.83, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.86, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [90.0, 48.0, 179.0, 39.0, 60.0, 91.0, 39.0, 122.0, 20.0, 34.0, 42.0, 23.0, 50.0, 12.0, 20.0, 71.0, 66.0, 35.0, 44.0, 73.0, 64.0, 17.0, 131.0, 38.0, 41.0, 36.0, 105.0, 53.0, 39.0, 71.0, 20.0, 79.0, 66.0, 76.0, 9.0, 38.0, 93.0, 93.0, 74.0, 133.0, 106.0, 85.0, 23.0, 47.0, 93.0, 23.0, 47.0, 74.0, 33.0, 98.0, 96.0, 17.0, 49.0, 110.0, 84.0, 145.0, 34.0, 99.0, 48.0, 55.0, 71.0, 71.0, 65.0, 46.0, 44.0, 12.0, 20.0, 71.0, 39.0, 96.0, 81.0, 77.0, 9.0, 34.0, 47.0, 20.0, 74.0, 44.0, 120.0, 34.0, 22.0, 42.0, 76.0, 66.0, 38.0, 44.0, 9.0, 101.0, 37.0, 39.0, 34.0, 36.0, 53.0, 9.0, 66.0, 80.0, 95.0, 34.0, 50.0, 54.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [39.0, 51.0, 20.0, 28.0, 87.0, 92.0, 25.0, 14.0, 29.0, 31.0, 45.0, 46.0, 25.0, 14.0, 65.0, 57.0, 11.0, 9.0, 22.0, 12.0, 14.0, 28.0, 6.0, 17.0, 30.0, 20.0, 6.0, 6.0, 12.0, 8.0, 23.0, 48.0, 29.0, 37.0, 13.0, 22.0, 20.0, 24.0, 36.0, 37.0, 31.0, 33.0, 11.0, 6.0, 68.0, 63.0, 11.0, 27.0, 24.0, 17.0, 20.0, 16.0, 64.0, 41.0, 31.0, 22.0, 12.0, 27.0, 26.0, 45.0, 11.0, 9.0, 33.0, 46.0, 34.0, 32.0, 44.0, 32.0, 3.0, 6.0, 22.0, 16.0, 47.0, 46.0, 53.0, 40.0, 37.0, 37.0, 59.0, 74.0, 55.0, 51.0, 40.0, 45.0, 3.0, 20.0, 22.0, 25.0, 53.0, 40.0, 14.0, 9.0, 24.0, 23.0, 36.0, 38.0, 21.0, 12.0, 37.0, 61.0, 44.0, 52.0, 6.0, 11.0, 29.0, 20.0, 48.0, 62.0, 38.0, 46.0, 76.0, 69.0, 6.0, 28.0, 47.0, 52.0, 17.0, 31.0, 16.0, 39.0, 40.0, 31.0, 35.0, 36.0, 32.0, 33.0, 21.0, 25.0, 23.0, 21.0, 6.0, 6.0, 8.0, 12.0, 33.0, 38.0, 15.0, 24.0, 32.0, 64.0, 38.0, 43.0, 39.0, 38.0, 0.0, 9.0, 14.0, 20.0, 18.0, 29.0, 14.0, 6.0, 40.0, 34.0, 22.0, 22.0, 62.0, 58.0, 15.0, 19.0, 10.0, 12.0, 25.0, 17.0, 34.0, 42.0, 37.0, 29.0, 22.0, 16.0, 24.0, 20.0, 0.0, 9.0, 45.0, 56.0, 20.0, 17.0, 30.0, 9.0, 12.0, 22.0, 14.0, 22.0, 27.0, 26.0, 0.0, 9.0, 26.0, 40.0, 34.0, 46.0, 50.0, 45.0, 18.0, 16.0, 28.0, 22.0, 28.0, 26.0]}, "sampler_perf": {"mean_env_wait_ms": 5.475175554008815, "mean_processing_ms": 0.16692031317355585, "mean_inference_ms": 2.4996312531417773}, "off_policy_estimator": {}, "info": {"num_steps_trained": 720000, "num_steps_sampled": 384000, "sample_time_ms": 22626.486, "load_time_ms": 38.637, "grad_time_ms": 9834.774, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.7252903539730653e-10, "cur_lr": 0.0010000000474974513, "total_loss": -0.010750534944236279, "policy_loss": -0.01101712416857481, "vf_loss": 11.21933650970459, "vf_explained_var": 0.33813270926475525, "kl": 0.0012414826778694987, "entropy": 1.7106833457946777, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 384000, "episodes_total": 960, "training_iteration": 30, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-08-02", "timestamp": 1660244882, "time_this_iter_s": 32.23107981681824, "time_total_s": 3303.6999979019165, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3303.6999979019165, "timesteps_since_restore": 384000, "iterations_since_restore": 30, "perf": {"cpu_util_percent": 45.34130434782608, "ram_util_percent": 56.88478260869565}}
+{"episode_reward_max": 182.0, "episode_reward_min": 9.0, "episode_reward_mean": 57.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 96.0}, "policy_reward_mean": {"ppo": 28.805}, "custom_metrics": {"sparse_reward_mean": 10.4, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 36.81, "shaped_reward_min": 9, "shaped_reward_max": 65, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.93, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 6.72, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 4.36, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 4.83, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.46, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.99, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.62, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.62, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.3, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.64, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.68, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.5, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.05, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.73, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.09, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.82, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.86, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.85, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.99, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.62, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.99, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.62, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [34.0, 182.0, 133.0, 66.0, 23.0, 42.0, 53.0, 125.0, 22.0, 12.0, 87.0, 98.0, 9.0, 46.0, 30.0, 12.0, 37.0, 99.0, 63.0, 85.0, 77.0, 42.0, 133.0, 173.0, 66.0, 35.0, 39.0, 88.0, 34.0, 52.0, 82.0, 23.0, 44.0, 12.0, 20.0, 71.0, 39.0, 96.0, 81.0, 77.0, 9.0, 34.0, 47.0, 20.0, 74.0, 44.0, 120.0, 34.0, 22.0, 42.0, 76.0, 66.0, 38.0, 44.0, 9.0, 101.0, 37.0, 39.0, 34.0, 36.0, 53.0, 9.0, 66.0, 80.0, 95.0, 34.0, 50.0, 54.0, 90.0, 48.0, 179.0, 39.0, 60.0, 91.0, 39.0, 122.0, 20.0, 34.0, 42.0, 23.0, 50.0, 12.0, 20.0, 71.0, 66.0, 35.0, 44.0, 73.0, 64.0, 17.0, 131.0, 38.0, 41.0, 36.0, 105.0, 53.0, 39.0, 71.0, 20.0, 79.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [23.0, 11.0, 86.0, 96.0, 68.0, 65.0, 37.0, 29.0, 17.0, 6.0, 6.0, 36.0, 22.0, 31.0, 62.0, 63.0, 8.0, 14.0, 3.0, 9.0, 41.0, 46.0, 55.0, 43.0, 0.0, 9.0, 28.0, 18.0, 19.0, 11.0, 9.0, 3.0, 23.0, 14.0, 59.0, 40.0, 31.0, 32.0, 45.0, 40.0, 39.0, 38.0, 19.0, 23.0, 67.0, 66.0, 85.0, 88.0, 34.0, 32.0, 20.0, 15.0, 22.0, 17.0, 37.0, 51.0, 20.0, 14.0, 27.0, 25.0, 43.0, 39.0, 11.0, 12.0, 23.0, 21.0, 6.0, 6.0, 8.0, 12.0, 33.0, 38.0, 15.0, 24.0, 32.0, 64.0, 38.0, 43.0, 39.0, 38.0, 0.0, 9.0, 14.0, 20.0, 18.0, 29.0, 14.0, 6.0, 40.0, 34.0, 22.0, 22.0, 62.0, 58.0, 15.0, 19.0, 10.0, 12.0, 25.0, 17.0, 34.0, 42.0, 37.0, 29.0, 22.0, 16.0, 24.0, 20.0, 0.0, 9.0, 45.0, 56.0, 20.0, 17.0, 30.0, 9.0, 12.0, 22.0, 14.0, 22.0, 27.0, 26.0, 0.0, 9.0, 26.0, 40.0, 34.0, 46.0, 50.0, 45.0, 18.0, 16.0, 28.0, 22.0, 28.0, 26.0, 39.0, 51.0, 20.0, 28.0, 87.0, 92.0, 25.0, 14.0, 29.0, 31.0, 45.0, 46.0, 25.0, 14.0, 65.0, 57.0, 11.0, 9.0, 22.0, 12.0, 14.0, 28.0, 6.0, 17.0, 30.0, 20.0, 6.0, 6.0, 12.0, 8.0, 23.0, 48.0, 29.0, 37.0, 13.0, 22.0, 20.0, 24.0, 36.0, 37.0, 31.0, 33.0, 11.0, 6.0, 68.0, 63.0, 11.0, 27.0, 24.0, 17.0, 20.0, 16.0, 64.0, 41.0, 31.0, 22.0, 12.0, 27.0, 26.0, 45.0, 11.0, 9.0, 33.0, 46.0]}, "sampler_perf": {"mean_env_wait_ms": 5.308917467290777, "mean_processing_ms": 0.16704433507360725, "mean_inference_ms": 2.4512479594099825}, "off_policy_estimator": {}, "info": {"num_steps_trained": 744000, "num_steps_sampled": 396800, "sample_time_ms": 22677.978, "load_time_ms": 38.542, "grad_time_ms": 9985.158, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.8626451769865326e-10, "cur_lr": 0.0010000000474974513, "total_loss": -0.00831019040197134, "policy_loss": -0.008908797055482864, "vf_loss": 14.524895668029785, "vf_explained_var": 0.35295844078063965, "kl": 0.0011723049683496356, "entropy": 1.7077676057815552, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 396800, "episodes_total": 992, "training_iteration": 31, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-08-34", "timestamp": 1660244914, "time_this_iter_s": 31.540908813476562, "time_total_s": 3335.240906715393, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3335.240906715393, "timesteps_since_restore": 396800, "iterations_since_restore": 31, "perf": {"cpu_util_percent": 42.184090909090905, "ram_util_percent": 56.9090909090909}}
+{"episode_reward_max": 187.0, "episode_reward_min": 9.0, "episode_reward_mean": 63.31, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 101.0}, "policy_reward_mean": {"ppo": 31.655}, "custom_metrics": {"sparse_reward_mean": 12.0, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 39.31, "shaped_reward_min": 9, "shaped_reward_max": 77, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.12, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 6.89, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 4.48, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.08, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.41, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.38, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.33, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 3.08, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.95, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 4.02, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.61, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.77, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.62, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.98, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.76, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.57, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.24, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.81, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.26, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.91, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.9, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.86, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.08, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.95, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.08, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.95, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [87.0, 34.0, 187.0, 74.0, 128.0, 47.0, 23.0, 157.0, 12.0, 85.0, 20.0, 42.0, 20.0, 92.0, 82.0, 44.0, 9.0, 87.0, 20.0, 98.0, 31.0, 42.0, 83.0, 58.0, 45.0, 96.0, 81.0, 93.0, 36.0, 69.0, 88.0, 74.0, 95.0, 34.0, 50.0, 54.0, 90.0, 48.0, 179.0, 39.0, 60.0, 91.0, 39.0, 122.0, 20.0, 34.0, 42.0, 23.0, 50.0, 12.0, 20.0, 71.0, 66.0, 35.0, 44.0, 73.0, 64.0, 17.0, 131.0, 38.0, 41.0, 36.0, 105.0, 53.0, 39.0, 71.0, 20.0, 79.0, 34.0, 182.0, 133.0, 66.0, 23.0, 42.0, 53.0, 125.0, 22.0, 12.0, 87.0, 98.0, 9.0, 46.0, 30.0, 12.0, 37.0, 99.0, 63.0, 85.0, 77.0, 42.0, 133.0, 173.0, 66.0, 35.0, 39.0, 88.0, 34.0, 52.0, 82.0, 23.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [50.0, 37.0, 17.0, 17.0, 86.0, 101.0, 37.0, 37.0, 55.0, 73.0, 24.0, 23.0, 17.0, 6.0, 79.0, 78.0, 6.0, 6.0, 37.0, 48.0, 5.0, 15.0, 13.0, 29.0, 9.0, 11.0, 45.0, 47.0, 39.0, 43.0, 13.0, 31.0, 6.0, 3.0, 42.0, 45.0, 14.0, 6.0, 58.0, 40.0, 16.0, 15.0, 11.0, 31.0, 40.0, 43.0, 14.0, 44.0, 17.0, 28.0, 45.0, 51.0, 44.0, 37.0, 49.0, 44.0, 14.0, 22.0, 35.0, 34.0, 36.0, 52.0, 48.0, 26.0, 50.0, 45.0, 18.0, 16.0, 28.0, 22.0, 28.0, 26.0, 39.0, 51.0, 20.0, 28.0, 87.0, 92.0, 25.0, 14.0, 29.0, 31.0, 45.0, 46.0, 25.0, 14.0, 65.0, 57.0, 11.0, 9.0, 22.0, 12.0, 14.0, 28.0, 6.0, 17.0, 30.0, 20.0, 6.0, 6.0, 12.0, 8.0, 23.0, 48.0, 29.0, 37.0, 13.0, 22.0, 20.0, 24.0, 36.0, 37.0, 31.0, 33.0, 11.0, 6.0, 68.0, 63.0, 11.0, 27.0, 24.0, 17.0, 20.0, 16.0, 64.0, 41.0, 31.0, 22.0, 12.0, 27.0, 26.0, 45.0, 11.0, 9.0, 33.0, 46.0, 23.0, 11.0, 86.0, 96.0, 68.0, 65.0, 37.0, 29.0, 17.0, 6.0, 6.0, 36.0, 22.0, 31.0, 62.0, 63.0, 8.0, 14.0, 3.0, 9.0, 41.0, 46.0, 55.0, 43.0, 0.0, 9.0, 28.0, 18.0, 19.0, 11.0, 9.0, 3.0, 23.0, 14.0, 59.0, 40.0, 31.0, 32.0, 45.0, 40.0, 39.0, 38.0, 19.0, 23.0, 67.0, 66.0, 85.0, 88.0, 34.0, 32.0, 20.0, 15.0, 22.0, 17.0, 37.0, 51.0, 20.0, 14.0, 27.0, 25.0, 43.0, 39.0, 11.0, 12.0]}, "sampler_perf": {"mean_env_wait_ms": 5.15345566138169, "mean_processing_ms": 0.16717489002220728, "mean_inference_ms": 2.4061553716842257}, "off_policy_estimator": {}, "info": {"num_steps_trained": 768000, "num_steps_sampled": 409600, "sample_time_ms": 22972.316, "load_time_ms": 38.916, "grad_time_ms": 10035.28, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 9.313225884932663e-11, "cur_lr": 0.0010000000474974513, "total_loss": -0.007412114646285772, "policy_loss": -0.007903209887444973, "vf_loss": 13.404266357421875, "vf_explained_var": 0.34650716185569763, "kl": 0.0011789536802098155, "entropy": 1.6986547708511353, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 409600, "episodes_total": 1024, "training_iteration": 32, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-09-07", "timestamp": 1660244947, "time_this_iter_s": 32.6441330909729, "time_total_s": 3367.885039806366, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3367.885039806366, "timesteps_since_restore": 409600, "iterations_since_restore": 32, "perf": {"cpu_util_percent": 40.13191489361702, "ram_util_percent": 57.20638297872342}}
+{"episode_reward_max": 187.0, "episode_reward_min": 9.0, "episode_reward_mean": 66.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 101.0}, "policy_reward_mean": {"ppo": 33.165}, "custom_metrics": {"sparse_reward_mean": 13.6, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 39.13, "shaped_reward_min": 9, "shaped_reward_max": 77, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.01, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 6.99, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 4.25, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.15, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.51, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.97, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 4.08, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 3.85, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.66, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.71, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.65, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.67, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.1, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.91, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.96, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.87, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.91, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.97, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 4.08, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.97, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 4.08, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [9.0, 76.0, 98.0, 45.0, 41.0, 127.0, 12.0, 46.0, 70.0, 144.0, 71.0, 117.0, 111.0, 9.0, 54.0, 40.0, 79.0, 14.0, 62.0, 63.0, 106.0, 20.0, 27.0, 136.0, 90.0, 34.0, 52.0, 94.0, 117.0, 90.0, 39.0, 85.0, 39.0, 71.0, 20.0, 79.0, 34.0, 182.0, 133.0, 66.0, 23.0, 42.0, 53.0, 125.0, 22.0, 12.0, 87.0, 98.0, 9.0, 46.0, 30.0, 12.0, 37.0, 99.0, 63.0, 85.0, 77.0, 42.0, 133.0, 173.0, 66.0, 35.0, 39.0, 88.0, 34.0, 52.0, 82.0, 23.0, 87.0, 34.0, 187.0, 74.0, 128.0, 47.0, 23.0, 157.0, 12.0, 85.0, 20.0, 42.0, 20.0, 92.0, 82.0, 44.0, 9.0, 87.0, 20.0, 98.0, 31.0, 42.0, 83.0, 58.0, 45.0, 96.0, 81.0, 93.0, 36.0, 69.0, 88.0, 74.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 3.0, 41.0, 35.0, 51.0, 47.0, 23.0, 22.0, 19.0, 22.0, 57.0, 70.0, 6.0, 6.0, 21.0, 25.0, 31.0, 39.0, 73.0, 71.0, 34.0, 37.0, 55.0, 62.0, 51.0, 60.0, 6.0, 3.0, 20.0, 34.0, 20.0, 20.0, 29.0, 50.0, 11.0, 3.0, 25.0, 37.0, 23.0, 40.0, 52.0, 54.0, 8.0, 12.0, 18.0, 9.0, 66.0, 70.0, 41.0, 49.0, 12.0, 22.0, 25.0, 27.0, 46.0, 48.0, 66.0, 51.0, 42.0, 48.0, 19.0, 20.0, 48.0, 37.0, 12.0, 27.0, 26.0, 45.0, 11.0, 9.0, 33.0, 46.0, 23.0, 11.0, 86.0, 96.0, 68.0, 65.0, 37.0, 29.0, 17.0, 6.0, 6.0, 36.0, 22.0, 31.0, 62.0, 63.0, 8.0, 14.0, 3.0, 9.0, 41.0, 46.0, 55.0, 43.0, 0.0, 9.0, 28.0, 18.0, 19.0, 11.0, 9.0, 3.0, 23.0, 14.0, 59.0, 40.0, 31.0, 32.0, 45.0, 40.0, 39.0, 38.0, 19.0, 23.0, 67.0, 66.0, 85.0, 88.0, 34.0, 32.0, 20.0, 15.0, 22.0, 17.0, 37.0, 51.0, 20.0, 14.0, 27.0, 25.0, 43.0, 39.0, 11.0, 12.0, 50.0, 37.0, 17.0, 17.0, 86.0, 101.0, 37.0, 37.0, 55.0, 73.0, 24.0, 23.0, 17.0, 6.0, 79.0, 78.0, 6.0, 6.0, 37.0, 48.0, 5.0, 15.0, 13.0, 29.0, 9.0, 11.0, 45.0, 47.0, 39.0, 43.0, 13.0, 31.0, 6.0, 3.0, 42.0, 45.0, 14.0, 6.0, 58.0, 40.0, 16.0, 15.0, 11.0, 31.0, 40.0, 43.0, 14.0, 44.0, 17.0, 28.0, 45.0, 51.0, 44.0, 37.0, 49.0, 44.0, 14.0, 22.0, 35.0, 34.0, 36.0, 52.0, 48.0, 26.0]}, "sampler_perf": {"mean_env_wait_ms": 5.0079354762159145, "mean_processing_ms": 0.1673845002022688, "mean_inference_ms": 2.3652145638679087}, "off_policy_estimator": {}, "info": {"num_steps_trained": 792000, "num_steps_sampled": 422400, "sample_time_ms": 23410.855, "load_time_ms": 38.911, "grad_time_ms": 10160.504, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 4.6566129424663316e-11, "cur_lr": 0.0010000000474974513, "total_loss": -0.01017048116773367, "policy_loss": -0.010584059171378613, "vf_loss": 12.619880676269531, "vf_explained_var": 0.45027461647987366, "kl": 0.001254777773283422, "entropy": 1.6968183517456055, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 422400, "episodes_total": 1056, "training_iteration": 33, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-09-42", "timestamp": 1660244982, "time_this_iter_s": 35.00341510772705, "time_total_s": 3402.888454914093, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3402.888454914093, "timesteps_since_restore": 422400, "iterations_since_restore": 33, "perf": {"cpu_util_percent": 42.62857142857143, "ram_util_percent": 58.25510204081633}}
+{"episode_reward_max": 187.0, "episode_reward_min": 9.0, "episode_reward_mean": 68.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 101.0}, "policy_reward_mean": {"ppo": 34.395}, "custom_metrics": {"sparse_reward_mean": 13.8, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 41.19, "shaped_reward_min": 9, "shaped_reward_max": 77, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.84, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 7.2, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 4.04, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.36, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.45, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 3.01, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 4.16, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 3.86, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 3.65, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.85, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.61, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.7, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.17, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.21, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.04, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.17, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 1.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.96, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.01, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 4.16, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.01, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 4.16, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [90.0, 79.0, 36.0, 58.0, 49.0, 76.0, 79.0, 98.0, 136.0, 87.0, 50.0, 145.0, 28.0, 23.0, 110.0, 19.0, 134.0, 14.0, 12.0, 58.0, 77.0, 38.0, 99.0, 80.0, 142.0, 42.0, 124.0, 74.0, 93.0, 23.0, 103.0, 90.0, 34.0, 52.0, 82.0, 23.0, 87.0, 34.0, 187.0, 74.0, 128.0, 47.0, 23.0, 157.0, 12.0, 85.0, 20.0, 42.0, 20.0, 92.0, 82.0, 44.0, 9.0, 87.0, 20.0, 98.0, 31.0, 42.0, 83.0, 58.0, 45.0, 96.0, 81.0, 93.0, 36.0, 69.0, 88.0, 74.0, 9.0, 76.0, 98.0, 45.0, 41.0, 127.0, 12.0, 46.0, 70.0, 144.0, 71.0, 117.0, 111.0, 9.0, 54.0, 40.0, 79.0, 14.0, 62.0, 63.0, 106.0, 20.0, 27.0, 136.0, 90.0, 34.0, 52.0, 94.0, 117.0, 90.0, 39.0, 85.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [42.0, 48.0, 41.0, 38.0, 12.0, 24.0, 26.0, 32.0, 24.0, 25.0, 35.0, 41.0, 36.0, 43.0, 44.0, 54.0, 74.0, 62.0, 45.0, 42.0, 28.0, 22.0, 65.0, 80.0, 14.0, 14.0, 6.0, 17.0, 40.0, 70.0, 6.0, 13.0, 70.0, 64.0, 11.0, 3.0, 3.0, 9.0, 36.0, 22.0, 39.0, 38.0, 19.0, 19.0, 54.0, 45.0, 35.0, 45.0, 63.0, 79.0, 16.0, 26.0, 63.0, 61.0, 34.0, 40.0, 48.0, 45.0, 12.0, 11.0, 49.0, 54.0, 41.0, 49.0, 20.0, 14.0, 27.0, 25.0, 43.0, 39.0, 11.0, 12.0, 50.0, 37.0, 17.0, 17.0, 86.0, 101.0, 37.0, 37.0, 55.0, 73.0, 24.0, 23.0, 17.0, 6.0, 79.0, 78.0, 6.0, 6.0, 37.0, 48.0, 5.0, 15.0, 13.0, 29.0, 9.0, 11.0, 45.0, 47.0, 39.0, 43.0, 13.0, 31.0, 6.0, 3.0, 42.0, 45.0, 14.0, 6.0, 58.0, 40.0, 16.0, 15.0, 11.0, 31.0, 40.0, 43.0, 14.0, 44.0, 17.0, 28.0, 45.0, 51.0, 44.0, 37.0, 49.0, 44.0, 14.0, 22.0, 35.0, 34.0, 36.0, 52.0, 48.0, 26.0, 6.0, 3.0, 41.0, 35.0, 51.0, 47.0, 23.0, 22.0, 19.0, 22.0, 57.0, 70.0, 6.0, 6.0, 21.0, 25.0, 31.0, 39.0, 73.0, 71.0, 34.0, 37.0, 55.0, 62.0, 51.0, 60.0, 6.0, 3.0, 20.0, 34.0, 20.0, 20.0, 29.0, 50.0, 11.0, 3.0, 25.0, 37.0, 23.0, 40.0, 52.0, 54.0, 8.0, 12.0, 18.0, 9.0, 66.0, 70.0, 41.0, 49.0, 12.0, 22.0, 25.0, 27.0, 46.0, 48.0, 66.0, 51.0, 42.0, 48.0, 19.0, 20.0, 48.0, 37.0]}, "sampler_perf": {"mean_env_wait_ms": 4.8713540125875445, "mean_processing_ms": 0.16758394883061775, "mean_inference_ms": 2.326528288901312}, "off_policy_estimator": {}, "info": {"num_steps_trained": 816000, "num_steps_sampled": 435200, "sample_time_ms": 23336.79, "load_time_ms": 39.064, "grad_time_ms": 10020.897, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.3283064712331658e-11, "cur_lr": 0.0010000000474974513, "total_loss": -0.007203067187219858, "policy_loss": -0.007930143736302853, "vf_loss": 15.71717357635498, "vf_explained_var": 0.34764334559440613, "kl": 0.0010395334102213383, "entropy": 1.6892824172973633, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 435200, "episodes_total": 1088, "training_iteration": 34, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-10-12", "timestamp": 1660245012, "time_this_iter_s": 30.092119216918945, "time_total_s": 3432.980574131012, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3432.980574131012, "timesteps_since_restore": 435200, "iterations_since_restore": 34, "perf": {"cpu_util_percent": 41.03023255813954, "ram_util_percent": 57.66976744186048}}
+{"episode_reward_max": 146.0, "episode_reward_min": 9.0, "episode_reward_mean": 73.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 80.0}, "policy_reward_mean": {"ppo": 36.89}, "custom_metrics": {"sparse_reward_mean": 15.0, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 43.78, "shaped_reward_min": 9, "shaped_reward_max": 89, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.77, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.47, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 4.16, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 5.66, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.57, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 3.1, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 4.4, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 4.23, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 3.69, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 0.95, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 12, "useful_dish_drop_agent_0_mean": 0.62, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.69, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.44, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 12, "soup_pickup_agent_1_mean": 2.28, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.32, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.16, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 9, "soup_drop_agent_1_mean": 0.86, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.1, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 4.4, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.1, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 4.4, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [42.0, 125.0, 20.0, 87.0, 46.0, 53.0, 93.0, 74.0, 121.0, 84.0, 139.0, 75.0, 57.0, 48.0, 146.0, 44.0, 61.0, 90.0, 20.0, 95.0, 31.0, 125.0, 145.0, 98.0, 129.0, 68.0, 66.0, 48.0, 105.0, 87.0, 47.0, 98.0, 36.0, 69.0, 88.0, 74.0, 9.0, 76.0, 98.0, 45.0, 41.0, 127.0, 12.0, 46.0, 70.0, 144.0, 71.0, 117.0, 111.0, 9.0, 54.0, 40.0, 79.0, 14.0, 62.0, 63.0, 106.0, 20.0, 27.0, 136.0, 90.0, 34.0, 52.0, 94.0, 117.0, 90.0, 39.0, 85.0, 90.0, 79.0, 36.0, 58.0, 49.0, 76.0, 79.0, 98.0, 136.0, 87.0, 50.0, 145.0, 28.0, 23.0, 110.0, 19.0, 134.0, 14.0, 12.0, 58.0, 77.0, 38.0, 99.0, 80.0, 142.0, 42.0, 124.0, 74.0, 93.0, 23.0, 103.0, 90.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [19.0, 23.0, 67.0, 58.0, 11.0, 9.0, 45.0, 42.0, 16.0, 30.0, 13.0, 40.0, 35.0, 58.0, 37.0, 37.0, 62.0, 59.0, 33.0, 51.0, 72.0, 67.0, 25.0, 50.0, 28.0, 29.0, 27.0, 21.0, 73.0, 73.0, 14.0, 30.0, 32.0, 29.0, 48.0, 42.0, 14.0, 6.0, 50.0, 45.0, 11.0, 20.0, 71.0, 54.0, 73.0, 72.0, 56.0, 42.0, 64.0, 65.0, 31.0, 37.0, 26.0, 40.0, 20.0, 28.0, 54.0, 51.0, 41.0, 46.0, 15.0, 32.0, 53.0, 45.0, 14.0, 22.0, 35.0, 34.0, 36.0, 52.0, 48.0, 26.0, 6.0, 3.0, 41.0, 35.0, 51.0, 47.0, 23.0, 22.0, 19.0, 22.0, 57.0, 70.0, 6.0, 6.0, 21.0, 25.0, 31.0, 39.0, 73.0, 71.0, 34.0, 37.0, 55.0, 62.0, 51.0, 60.0, 6.0, 3.0, 20.0, 34.0, 20.0, 20.0, 29.0, 50.0, 11.0, 3.0, 25.0, 37.0, 23.0, 40.0, 52.0, 54.0, 8.0, 12.0, 18.0, 9.0, 66.0, 70.0, 41.0, 49.0, 12.0, 22.0, 25.0, 27.0, 46.0, 48.0, 66.0, 51.0, 42.0, 48.0, 19.0, 20.0, 48.0, 37.0, 42.0, 48.0, 41.0, 38.0, 12.0, 24.0, 26.0, 32.0, 24.0, 25.0, 35.0, 41.0, 36.0, 43.0, 44.0, 54.0, 74.0, 62.0, 45.0, 42.0, 28.0, 22.0, 65.0, 80.0, 14.0, 14.0, 6.0, 17.0, 40.0, 70.0, 6.0, 13.0, 70.0, 64.0, 11.0, 3.0, 3.0, 9.0, 36.0, 22.0, 39.0, 38.0, 19.0, 19.0, 54.0, 45.0, 35.0, 45.0, 63.0, 79.0, 16.0, 26.0, 63.0, 61.0, 34.0, 40.0, 48.0, 45.0, 12.0, 11.0, 49.0, 54.0, 41.0, 49.0]}, "sampler_perf": {"mean_env_wait_ms": 4.742888771715567, "mean_processing_ms": 0.4291925216501232, "mean_inference_ms": 2.2890734350045245}, "off_policy_estimator": {}, "info": {"num_steps_trained": 840000, "num_steps_sampled": 448000, "sample_time_ms": 59523.327, "load_time_ms": 38.502, "grad_time_ms": 106209.033, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.1641532356165829e-11, "cur_lr": 0.0010000000474974513, "total_loss": -0.007091447710990906, "policy_loss": -0.007865053601562977, "vf_loss": 16.12926483154297, "vf_explained_var": 0.35502591729164124, "kl": 0.0012615231098607183, "entropy": 1.6786518096923828, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 448000, "episodes_total": 1120, "training_iteration": 35, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-32-51", "timestamp": 1660246371, "time_this_iter_s": 1359.4666819572449, "time_total_s": 4792.447256088257, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 4792.447256088257, "timesteps_since_restore": 448000, "iterations_since_restore": 35, "perf": {"cpu_util_percent": 73.38606557377048, "ram_util_percent": 58.19344262295081}}
+{"episode_reward_max": 195.0, "episode_reward_min": 9.0, "episode_reward_mean": 78.31, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 98.0}, "policy_reward_mean": {"ppo": 39.155}, "custom_metrics": {"sparse_reward_mean": 16.0, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 46.31, "shaped_reward_min": 9, "shaped_reward_max": 89, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.99, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.5, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 4.36, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 5.74, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 3.39, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 4.55, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.39, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 3.86, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 0.99, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.79, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.93, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 12, "useful_dish_drop_agent_0_mean": 0.6, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.65, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.62, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 12, "soup_pickup_agent_1_mean": 2.33, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.3, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.4, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.2, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 9, "soup_drop_agent_1_mean": 0.83, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.39, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 4.55, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.39, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 4.55, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [31.0, 142.0, 23.0, 96.0, 64.0, 98.0, 65.0, 112.0, 36.0, 23.0, 42.0, 113.0, 33.0, 41.0, 98.0, 148.0, 130.0, 119.0, 39.0, 88.0, 42.0, 142.0, 105.0, 120.0, 179.0, 195.0, 12.0, 12.0, 56.0, 60.0, 9.0, 94.0, 117.0, 90.0, 39.0, 85.0, 90.0, 79.0, 36.0, 58.0, 49.0, 76.0, 79.0, 98.0, 136.0, 87.0, 50.0, 145.0, 28.0, 23.0, 110.0, 19.0, 134.0, 14.0, 12.0, 58.0, 77.0, 38.0, 99.0, 80.0, 142.0, 42.0, 124.0, 74.0, 93.0, 23.0, 103.0, 90.0, 42.0, 125.0, 20.0, 87.0, 46.0, 53.0, 93.0, 74.0, 121.0, 84.0, 139.0, 75.0, 57.0, 48.0, 146.0, 44.0, 61.0, 90.0, 20.0, 95.0, 31.0, 125.0, 145.0, 98.0, 129.0, 68.0, 66.0, 48.0, 105.0, 87.0, 47.0, 98.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [15.0, 16.0, 76.0, 66.0, 9.0, 14.0, 45.0, 51.0, 33.0, 31.0, 43.0, 55.0, 34.0, 31.0, 60.0, 52.0, 10.0, 26.0, 3.0, 20.0, 22.0, 20.0, 59.0, 54.0, 17.0, 16.0, 22.0, 19.0, 46.0, 52.0, 74.0, 74.0, 59.0, 71.0, 54.0, 65.0, 20.0, 19.0, 47.0, 41.0, 26.0, 16.0, 73.0, 69.0, 49.0, 56.0, 59.0, 61.0, 81.0, 98.0, 98.0, 97.0, 3.0, 9.0, 6.0, 6.0, 30.0, 26.0, 32.0, 28.0, 6.0, 3.0, 46.0, 48.0, 66.0, 51.0, 42.0, 48.0, 19.0, 20.0, 48.0, 37.0, 42.0, 48.0, 41.0, 38.0, 12.0, 24.0, 26.0, 32.0, 24.0, 25.0, 35.0, 41.0, 36.0, 43.0, 44.0, 54.0, 74.0, 62.0, 45.0, 42.0, 28.0, 22.0, 65.0, 80.0, 14.0, 14.0, 6.0, 17.0, 40.0, 70.0, 6.0, 13.0, 70.0, 64.0, 11.0, 3.0, 3.0, 9.0, 36.0, 22.0, 39.0, 38.0, 19.0, 19.0, 54.0, 45.0, 35.0, 45.0, 63.0, 79.0, 16.0, 26.0, 63.0, 61.0, 34.0, 40.0, 48.0, 45.0, 12.0, 11.0, 49.0, 54.0, 41.0, 49.0, 19.0, 23.0, 67.0, 58.0, 11.0, 9.0, 45.0, 42.0, 16.0, 30.0, 13.0, 40.0, 35.0, 58.0, 37.0, 37.0, 62.0, 59.0, 33.0, 51.0, 72.0, 67.0, 25.0, 50.0, 28.0, 29.0, 27.0, 21.0, 73.0, 73.0, 14.0, 30.0, 32.0, 29.0, 48.0, 42.0, 14.0, 6.0, 50.0, 45.0, 11.0, 20.0, 71.0, 54.0, 73.0, 72.0, 56.0, 42.0, 64.0, 65.0, 31.0, 37.0, 26.0, 40.0, 20.0, 28.0, 54.0, 51.0, 41.0, 46.0, 15.0, 32.0, 53.0, 45.0]}, "sampler_perf": {"mean_env_wait_ms": 4.6429756749225914, "mean_processing_ms": 0.6884190143076668, "mean_inference_ms": 3.1849506897639785}, "off_policy_estimator": {}, "info": {"num_steps_trained": 864000, "num_steps_sampled": 460800, "sample_time_ms": 197454.884, "load_time_ms": 38.154, "grad_time_ms": 142553.757, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 5.8207661780829145e-12, "cur_lr": 0.0010000000474974513, "total_loss": -0.009596621617674828, "policy_loss": -0.010532871820032597, "vf_loss": 17.772741317749023, "vf_explained_var": 0.41850244998931885, "kl": 0.0012102305190637708, "entropy": 1.6820656061172485, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 460800, "episodes_total": 1152, "training_iteration": 36, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-02-29", "timestamp": 1660248149, "time_this_iter_s": 1777.6666460037231, "time_total_s": 6570.11390209198, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6570.11390209198, "timesteps_since_restore": 460800, "iterations_since_restore": 36, "perf": {"cpu_util_percent": 79.74032921810701, "ram_util_percent": 58.72098765432099}}
+{"episode_reward_max": 195.0, "episode_reward_min": 9.0, "episode_reward_mean": 82.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 98.0}, "policy_reward_mean": {"ppo": 41.135}, "custom_metrics": {"sparse_reward_mean": 16.0, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 50.27, "shaped_reward_min": 9, "shaped_reward_max": 89, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.38, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.68, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 4.88, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 6.05, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.31, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 3.66, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.96, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.49, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.05, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 1.05, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.82, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 12, "useful_dish_drop_agent_0_mean": 0.62, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.82, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 12, "soup_pickup_agent_1_mean": 2.31, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.57, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.45, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.13, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 9, "soup_drop_agent_1_mean": 0.77, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 3.66, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.96, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.66, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.96, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [118.0, 107.0, 31.0, 150.0, 71.0, 58.0, 88.0, 100.0, 155.0, 98.0, 42.0, 110.0, 38.0, 94.0, 61.0, 77.0, 59.0, 66.0, 65.0, 36.0, 130.0, 67.0, 112.0, 72.0, 164.0, 45.0, 111.0, 149.0, 37.0, 82.0, 93.0, 98.0, 93.0, 23.0, 103.0, 90.0, 42.0, 125.0, 20.0, 87.0, 46.0, 53.0, 93.0, 74.0, 121.0, 84.0, 139.0, 75.0, 57.0, 48.0, 146.0, 44.0, 61.0, 90.0, 20.0, 95.0, 31.0, 125.0, 145.0, 98.0, 129.0, 68.0, 66.0, 48.0, 105.0, 87.0, 47.0, 98.0, 31.0, 142.0, 23.0, 96.0, 64.0, 98.0, 65.0, 112.0, 36.0, 23.0, 42.0, 113.0, 33.0, 41.0, 98.0, 148.0, 130.0, 119.0, 39.0, 88.0, 42.0, 142.0, 105.0, 120.0, 179.0, 195.0, 12.0, 12.0, 56.0, 60.0, 9.0, 94.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [61.0, 57.0, 48.0, 59.0, 16.0, 15.0, 60.0, 90.0, 26.0, 45.0, 40.0, 18.0, 45.0, 43.0, 63.0, 37.0, 78.0, 77.0, 51.0, 47.0, 11.0, 31.0, 60.0, 50.0, 30.0, 8.0, 48.0, 46.0, 37.0, 24.0, 37.0, 40.0, 25.0, 34.0, 29.0, 37.0, 39.0, 26.0, 19.0, 17.0, 62.0, 68.0, 44.0, 23.0, 61.0, 51.0, 39.0, 33.0, 79.0, 85.0, 17.0, 28.0, 56.0, 55.0, 73.0, 76.0, 12.0, 25.0, 26.0, 56.0, 55.0, 38.0, 48.0, 50.0, 48.0, 45.0, 12.0, 11.0, 49.0, 54.0, 41.0, 49.0, 19.0, 23.0, 67.0, 58.0, 11.0, 9.0, 45.0, 42.0, 16.0, 30.0, 13.0, 40.0, 35.0, 58.0, 37.0, 37.0, 62.0, 59.0, 33.0, 51.0, 72.0, 67.0, 25.0, 50.0, 28.0, 29.0, 27.0, 21.0, 73.0, 73.0, 14.0, 30.0, 32.0, 29.0, 48.0, 42.0, 14.0, 6.0, 50.0, 45.0, 11.0, 20.0, 71.0, 54.0, 73.0, 72.0, 56.0, 42.0, 64.0, 65.0, 31.0, 37.0, 26.0, 40.0, 20.0, 28.0, 54.0, 51.0, 41.0, 46.0, 15.0, 32.0, 53.0, 45.0, 15.0, 16.0, 76.0, 66.0, 9.0, 14.0, 45.0, 51.0, 33.0, 31.0, 43.0, 55.0, 34.0, 31.0, 60.0, 52.0, 10.0, 26.0, 3.0, 20.0, 22.0, 20.0, 59.0, 54.0, 17.0, 16.0, 22.0, 19.0, 46.0, 52.0, 74.0, 74.0, 59.0, 71.0, 54.0, 65.0, 20.0, 19.0, 47.0, 41.0, 26.0, 16.0, 73.0, 69.0, 49.0, 56.0, 59.0, 61.0, 81.0, 98.0, 98.0, 97.0, 3.0, 9.0, 6.0, 6.0, 30.0, 26.0, 32.0, 28.0, 6.0, 3.0, 46.0, 48.0]}, "sampler_perf": {"mean_env_wait_ms": 4.550001067823368, "mean_processing_ms": 0.9407599736993785, "mean_inference_ms": 4.060064536997679}, "off_policy_estimator": {}, "info": {"num_steps_trained": 888000, "num_steps_sampled": 473600, "sample_time_ms": 197652.347, "load_time_ms": 38.247, "grad_time_ms": 142451.276, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.9103830890414573e-12, "cur_lr": 0.0010000000474974513, "total_loss": -0.00908196996897459, "policy_loss": -0.009920346550643444, "vf_loss": 16.691673278808594, "vf_explained_var": 0.3790724277496338, "kl": 0.0013888808898627758, "entropy": 1.661569595336914, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 473600, "episodes_total": 1184, "training_iteration": 37, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-03-05", "timestamp": 1660248185, "time_this_iter_s": 36.35908007621765, "time_total_s": 6606.472982168198, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6606.472982168198, "timesteps_since_restore": 473600, "iterations_since_restore": 37, "perf": {"cpu_util_percent": 52.89999999999999, "ram_util_percent": 59.76923076923076}}
+{"episode_reward_max": 195.0, "episode_reward_min": 9.0, "episode_reward_mean": 82.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 98.0}, "policy_reward_mean": {"ppo": 41.22}, "custom_metrics": {"sparse_reward_mean": 15.6, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 51.24, "shaped_reward_min": 9, "shaped_reward_max": 84, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.6, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.44, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 4.92, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.88, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.45, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 3.85, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.87, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.4, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.09, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.1, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.52, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.59, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.75, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 2.47, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.51, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.42, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 9, "soup_drop_agent_1_mean": 0.97, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 3.85, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.87, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.85, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.87, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [155.0, 107.0, 66.0, 64.0, 59.0, 130.0, 80.0, 101.0, 98.0, 109.0, 71.0, 68.0, 84.0, 79.0, 57.0, 20.0, 37.0, 82.0, 48.0, 64.0, 84.0, 113.0, 101.0, 62.0, 133.0, 71.0, 23.0, 109.0, 125.0, 23.0, 58.0, 75.0, 105.0, 87.0, 47.0, 98.0, 31.0, 142.0, 23.0, 96.0, 64.0, 98.0, 65.0, 112.0, 36.0, 23.0, 42.0, 113.0, 33.0, 41.0, 98.0, 148.0, 130.0, 119.0, 39.0, 88.0, 42.0, 142.0, 105.0, 120.0, 179.0, 195.0, 12.0, 12.0, 56.0, 60.0, 9.0, 94.0, 118.0, 107.0, 31.0, 150.0, 71.0, 58.0, 88.0, 100.0, 155.0, 98.0, 42.0, 110.0, 38.0, 94.0, 61.0, 77.0, 59.0, 66.0, 65.0, 36.0, 130.0, 67.0, 112.0, 72.0, 164.0, 45.0, 111.0, 149.0, 37.0, 82.0, 93.0, 98.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [82.0, 73.0, 64.0, 43.0, 37.0, 29.0, 30.0, 34.0, 28.0, 31.0, 62.0, 68.0, 37.0, 43.0, 50.0, 51.0, 44.0, 54.0, 50.0, 59.0, 42.0, 29.0, 29.0, 39.0, 42.0, 42.0, 34.0, 45.0, 37.0, 20.0, 8.0, 12.0, 20.0, 17.0, 42.0, 40.0, 22.0, 26.0, 38.0, 26.0, 34.0, 50.0, 54.0, 59.0, 55.0, 46.0, 40.0, 22.0, 60.0, 73.0, 37.0, 34.0, 11.0, 12.0, 51.0, 58.0, 56.0, 69.0, 17.0, 6.0, 28.0, 30.0, 39.0, 36.0, 54.0, 51.0, 41.0, 46.0, 15.0, 32.0, 53.0, 45.0, 15.0, 16.0, 76.0, 66.0, 9.0, 14.0, 45.0, 51.0, 33.0, 31.0, 43.0, 55.0, 34.0, 31.0, 60.0, 52.0, 10.0, 26.0, 3.0, 20.0, 22.0, 20.0, 59.0, 54.0, 17.0, 16.0, 22.0, 19.0, 46.0, 52.0, 74.0, 74.0, 59.0, 71.0, 54.0, 65.0, 20.0, 19.0, 47.0, 41.0, 26.0, 16.0, 73.0, 69.0, 49.0, 56.0, 59.0, 61.0, 81.0, 98.0, 98.0, 97.0, 3.0, 9.0, 6.0, 6.0, 30.0, 26.0, 32.0, 28.0, 6.0, 3.0, 46.0, 48.0, 61.0, 57.0, 48.0, 59.0, 16.0, 15.0, 60.0, 90.0, 26.0, 45.0, 40.0, 18.0, 45.0, 43.0, 63.0, 37.0, 78.0, 77.0, 51.0, 47.0, 11.0, 31.0, 60.0, 50.0, 30.0, 8.0, 48.0, 46.0, 37.0, 24.0, 37.0, 40.0, 25.0, 34.0, 29.0, 37.0, 39.0, 26.0, 19.0, 17.0, 62.0, 68.0, 44.0, 23.0, 61.0, 51.0, 39.0, 33.0, 79.0, 85.0, 17.0, 28.0, 56.0, 55.0, 73.0, 76.0, 12.0, 25.0, 26.0, 56.0, 55.0, 38.0, 48.0, 50.0]}, "sampler_perf": {"mean_env_wait_ms": 4.462790236915632, "mean_processing_ms": 0.9577209626577212, "mean_inference_ms": 4.91290526345304}, "off_policy_estimator": {}, "info": {"num_steps_trained": 912000, "num_steps_sampled": 486400, "sample_time_ms": 197395.402, "load_time_ms": 38.358, "grad_time_ms": 142364.304, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.4551915445207286e-12, "cur_lr": 0.0010000000474974513, "total_loss": -0.0073294141329824924, "policy_loss": -0.007997877895832062, "vf_loss": 15.018708229064941, "vf_explained_var": 0.4496181905269623, "kl": 0.0011589183704927564, "entropy": 1.666812539100647, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 486400, "episodes_total": 1216, "training_iteration": 38, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-03-36", "timestamp": 1660248216, "time_this_iter_s": 30.582061052322388, "time_total_s": 6637.05504322052, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6637.05504322052, "timesteps_since_restore": 486400, "iterations_since_restore": 38, "perf": {"cpu_util_percent": 42.890697674418604, "ram_util_percent": 58.16976744186046}}
+{"episode_reward_max": 164.0, "episode_reward_min": 9.0, "episode_reward_mean": 80.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 90.0}, "policy_reward_mean": {"ppo": 40.455}, "custom_metrics": {"sparse_reward_mean": 14.0, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 52.91, "shaped_reward_min": 9, "shaped_reward_max": 87, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.33, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.33, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 4.82, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.89, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.99, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 3.84, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.96, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.6, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.1, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.2, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.0, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.6, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.66, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.88, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 2.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.56, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.41, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.1, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 3.84, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.96, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.84, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.96, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [95.0, 36.0, 158.0, 96.0, 95.0, 45.0, 66.0, 127.0, 84.0, 147.0, 34.0, 84.0, 96.0, 118.0, 42.0, 112.0, 112.0, 11.0, 28.0, 66.0, 42.0, 39.0, 85.0, 137.0, 96.0, 31.0, 92.0, 60.0, 66.0, 72.0, 56.0, 104.0, 56.0, 60.0, 9.0, 94.0, 118.0, 107.0, 31.0, 150.0, 71.0, 58.0, 88.0, 100.0, 155.0, 98.0, 42.0, 110.0, 38.0, 94.0, 61.0, 77.0, 59.0, 66.0, 65.0, 36.0, 130.0, 67.0, 112.0, 72.0, 164.0, 45.0, 111.0, 149.0, 37.0, 82.0, 93.0, 98.0, 155.0, 107.0, 66.0, 64.0, 59.0, 130.0, 80.0, 101.0, 98.0, 109.0, 71.0, 68.0, 84.0, 79.0, 57.0, 20.0, 37.0, 82.0, 48.0, 64.0, 84.0, 113.0, 101.0, 62.0, 133.0, 71.0, 23.0, 109.0, 125.0, 23.0, 58.0, 75.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [50.0, 45.0, 6.0, 30.0, 84.0, 74.0, 46.0, 50.0, 45.0, 50.0, 20.0, 25.0, 33.0, 33.0, 67.0, 60.0, 42.0, 42.0, 66.0, 81.0, 14.0, 20.0, 37.0, 47.0, 50.0, 46.0, 62.0, 56.0, 17.0, 25.0, 41.0, 71.0, 45.0, 67.0, 8.0, 3.0, 16.0, 12.0, 29.0, 37.0, 25.0, 17.0, 28.0, 11.0, 52.0, 33.0, 65.0, 72.0, 47.0, 49.0, 22.0, 9.0, 31.0, 61.0, 30.0, 30.0, 34.0, 32.0, 28.0, 44.0, 22.0, 34.0, 51.0, 53.0, 30.0, 26.0, 32.0, 28.0, 6.0, 3.0, 46.0, 48.0, 61.0, 57.0, 48.0, 59.0, 16.0, 15.0, 60.0, 90.0, 26.0, 45.0, 40.0, 18.0, 45.0, 43.0, 63.0, 37.0, 78.0, 77.0, 51.0, 47.0, 11.0, 31.0, 60.0, 50.0, 30.0, 8.0, 48.0, 46.0, 37.0, 24.0, 37.0, 40.0, 25.0, 34.0, 29.0, 37.0, 39.0, 26.0, 19.0, 17.0, 62.0, 68.0, 44.0, 23.0, 61.0, 51.0, 39.0, 33.0, 79.0, 85.0, 17.0, 28.0, 56.0, 55.0, 73.0, 76.0, 12.0, 25.0, 26.0, 56.0, 55.0, 38.0, 48.0, 50.0, 82.0, 73.0, 64.0, 43.0, 37.0, 29.0, 30.0, 34.0, 28.0, 31.0, 62.0, 68.0, 37.0, 43.0, 50.0, 51.0, 44.0, 54.0, 50.0, 59.0, 42.0, 29.0, 29.0, 39.0, 42.0, 42.0, 34.0, 45.0, 37.0, 20.0, 8.0, 12.0, 20.0, 17.0, 42.0, 40.0, 22.0, 26.0, 38.0, 26.0, 34.0, 50.0, 54.0, 59.0, 55.0, 46.0, 40.0, 22.0, 60.0, 73.0, 37.0, 34.0, 11.0, 12.0, 51.0, 58.0, 56.0, 69.0, 17.0, 6.0, 28.0, 30.0, 39.0, 36.0]}, "sampler_perf": {"mean_env_wait_ms": 4.361961744597006, "mean_processing_ms": 0.9376235930507917, "mean_inference_ms": 4.9270347290029886}, "off_policy_estimator": {}, "info": {"num_steps_trained": 936000, "num_steps_sampled": 499200, "sample_time_ms": 197058.326, "load_time_ms": 38.236, "grad_time_ms": 142247.976, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 7.275957722603643e-13, "cur_lr": 0.0010000000474974513, "total_loss": -0.009562704712152481, "policy_loss": -0.010270781815052032, "vf_loss": 15.400076866149902, "vf_explained_var": 0.39905285835266113, "kl": 0.0014264689525589347, "entropy": 1.6638473272323608, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 499200, "episodes_total": 1248, "training_iteration": 39, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-04-04", "timestamp": 1660248244, "time_this_iter_s": 28.12965416908264, "time_total_s": 6665.184697389603, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6665.184697389603, "timesteps_since_restore": 499200, "iterations_since_restore": 39, "perf": {"cpu_util_percent": 32.9825, "ram_util_percent": 58.30499999999999}}
+{"episode_reward_max": 193.0, "episode_reward_min": 9.0, "episode_reward_mean": 81.89, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 98.0}, "policy_reward_mean": {"ppo": 40.945}, "custom_metrics": {"sparse_reward_mean": 14.4, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 53.09, "shaped_reward_min": 9, "shaped_reward_max": 87, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.34, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.25, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 4.88, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.78, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 2.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 3.9, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.92, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 12, "dish_pickup_agent_0_mean": 4.72, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.2, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.07, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.98, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.64, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.67, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.82, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 2.74, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.41, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.51, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.9, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.92, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 12, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.9, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.92, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 12, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [63.0, 122.0, 107.0, 193.0, 107.0, 44.0, 20.0, 144.0, 80.0, 53.0, 156.0, 100.0, 55.0, 74.0, 89.0, 9.0, 69.0, 52.0, 50.0, 96.0, 115.0, 58.0, 87.0, 144.0, 92.0, 20.0, 63.0, 81.0, 115.0, 84.0, 99.0, 150.0, 37.0, 82.0, 93.0, 98.0, 155.0, 107.0, 66.0, 64.0, 59.0, 130.0, 80.0, 101.0, 98.0, 109.0, 71.0, 68.0, 84.0, 79.0, 57.0, 20.0, 37.0, 82.0, 48.0, 64.0, 84.0, 113.0, 101.0, 62.0, 133.0, 71.0, 23.0, 109.0, 125.0, 23.0, 58.0, 75.0, 95.0, 36.0, 158.0, 96.0, 95.0, 45.0, 66.0, 127.0, 84.0, 147.0, 34.0, 84.0, 96.0, 118.0, 42.0, 112.0, 112.0, 11.0, 28.0, 66.0, 42.0, 39.0, 85.0, 137.0, 96.0, 31.0, 92.0, 60.0, 66.0, 72.0, 56.0, 104.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [31.0, 32.0, 55.0, 67.0, 50.0, 57.0, 95.0, 98.0, 52.0, 55.0, 14.0, 30.0, 6.0, 14.0, 78.0, 66.0, 40.0, 40.0, 20.0, 33.0, 75.0, 81.0, 47.0, 53.0, 27.0, 28.0, 39.0, 35.0, 49.0, 40.0, 6.0, 3.0, 41.0, 28.0, 27.0, 25.0, 30.0, 20.0, 54.0, 42.0, 59.0, 56.0, 27.0, 31.0, 37.0, 50.0, 76.0, 68.0, 37.0, 55.0, 5.0, 15.0, 24.0, 39.0, 43.0, 38.0, 56.0, 59.0, 32.0, 52.0, 50.0, 49.0, 70.0, 80.0, 12.0, 25.0, 26.0, 56.0, 55.0, 38.0, 48.0, 50.0, 82.0, 73.0, 64.0, 43.0, 37.0, 29.0, 30.0, 34.0, 28.0, 31.0, 62.0, 68.0, 37.0, 43.0, 50.0, 51.0, 44.0, 54.0, 50.0, 59.0, 42.0, 29.0, 29.0, 39.0, 42.0, 42.0, 34.0, 45.0, 37.0, 20.0, 8.0, 12.0, 20.0, 17.0, 42.0, 40.0, 22.0, 26.0, 38.0, 26.0, 34.0, 50.0, 54.0, 59.0, 55.0, 46.0, 40.0, 22.0, 60.0, 73.0, 37.0, 34.0, 11.0, 12.0, 51.0, 58.0, 56.0, 69.0, 17.0, 6.0, 28.0, 30.0, 39.0, 36.0, 50.0, 45.0, 6.0, 30.0, 84.0, 74.0, 46.0, 50.0, 45.0, 50.0, 20.0, 25.0, 33.0, 33.0, 67.0, 60.0, 42.0, 42.0, 66.0, 81.0, 14.0, 20.0, 37.0, 47.0, 50.0, 46.0, 62.0, 56.0, 17.0, 25.0, 41.0, 71.0, 45.0, 67.0, 8.0, 3.0, 16.0, 12.0, 29.0, 37.0, 25.0, 17.0, 28.0, 11.0, 52.0, 33.0, 65.0, 72.0, 47.0, 49.0, 22.0, 9.0, 31.0, 61.0, 30.0, 30.0, 34.0, 32.0, 28.0, 44.0, 22.0, 34.0, 51.0, 53.0]}, "sampler_perf": {"mean_env_wait_ms": 4.262822214946647, "mean_processing_ms": 0.9177406233023881, "mean_inference_ms": 4.823257056931315}, "off_policy_estimator": {}, "info": {"num_steps_trained": 960000, "num_steps_sampled": 512000, "sample_time_ms": 196701.65, "load_time_ms": 38.048, "grad_time_ms": 142153.928, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.6379788613018216e-13, "cur_lr": 0.0010000000474974513, "total_loss": -0.008069280534982681, "policy_loss": -0.008976585231721401, "vf_loss": 17.312698364257812, "vf_explained_var": 0.4009813070297241, "kl": 0.0012740670936182141, "entropy": 1.647910237312317, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 512000, "episodes_total": 1280, "training_iteration": 40, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-04-32", "timestamp": 1660248272, "time_this_iter_s": 27.727252960205078, "time_total_s": 6692.911950349808, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6692.911950349808, "timesteps_since_restore": 512000, "iterations_since_restore": 40, "perf": {"cpu_util_percent": 32.13, "ram_util_percent": 58.30499999999999}}
+{"episode_reward_max": 213.0, "episode_reward_min": 6.0, "episode_reward_mean": 85.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 113.0}, "policy_reward_mean": {"ppo": 42.77}, "custom_metrics": {"sparse_reward_mean": 14.8, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 55.94, "shaped_reward_min": 6, "shaped_reward_max": 101, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.53, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.11, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.17, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.71, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 1.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.9, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.34, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.79, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 12, "dish_pickup_agent_0_mean": 4.45, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.39, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 1.11, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 1.14, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.99, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.99, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.55, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.54, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.81, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.11, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.45, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.72, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.15, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 1.23, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 4.34, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.79, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 12, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.34, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.79, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 12, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [104.0, 141.0, 53.0, 70.0, 96.0, 127.0, 58.0, 31.0, 6.0, 52.0, 141.0, 55.0, 64.0, 88.0, 148.0, 69.0, 187.0, 50.0, 124.0, 107.0, 93.0, 99.0, 116.0, 72.0, 89.0, 42.0, 146.0, 213.0, 118.0, 82.0, 81.0, 28.0, 125.0, 23.0, 58.0, 75.0, 95.0, 36.0, 158.0, 96.0, 95.0, 45.0, 66.0, 127.0, 84.0, 147.0, 34.0, 84.0, 96.0, 118.0, 42.0, 112.0, 112.0, 11.0, 28.0, 66.0, 42.0, 39.0, 85.0, 137.0, 96.0, 31.0, 92.0, 60.0, 66.0, 72.0, 56.0, 104.0, 63.0, 122.0, 107.0, 193.0, 107.0, 44.0, 20.0, 144.0, 80.0, 53.0, 156.0, 100.0, 55.0, 74.0, 89.0, 9.0, 69.0, 52.0, 50.0, 96.0, 115.0, 58.0, 87.0, 144.0, 92.0, 20.0, 63.0, 81.0, 115.0, 84.0, 99.0, 150.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [48.0, 56.0, 68.0, 73.0, 25.0, 28.0, 42.0, 28.0, 53.0, 43.0, 58.0, 69.0, 34.0, 24.0, 19.0, 12.0, 3.0, 3.0, 23.0, 29.0, 73.0, 68.0, 21.0, 34.0, 42.0, 22.0, 35.0, 53.0, 68.0, 80.0, 41.0, 28.0, 99.0, 88.0, 30.0, 20.0, 60.0, 64.0, 59.0, 48.0, 48.0, 45.0, 53.0, 46.0, 57.0, 59.0, 37.0, 35.0, 44.0, 45.0, 20.0, 22.0, 76.0, 70.0, 100.0, 113.0, 57.0, 61.0, 48.0, 34.0, 31.0, 50.0, 3.0, 25.0, 56.0, 69.0, 17.0, 6.0, 28.0, 30.0, 39.0, 36.0, 50.0, 45.0, 6.0, 30.0, 84.0, 74.0, 46.0, 50.0, 45.0, 50.0, 20.0, 25.0, 33.0, 33.0, 67.0, 60.0, 42.0, 42.0, 66.0, 81.0, 14.0, 20.0, 37.0, 47.0, 50.0, 46.0, 62.0, 56.0, 17.0, 25.0, 41.0, 71.0, 45.0, 67.0, 8.0, 3.0, 16.0, 12.0, 29.0, 37.0, 25.0, 17.0, 28.0, 11.0, 52.0, 33.0, 65.0, 72.0, 47.0, 49.0, 22.0, 9.0, 31.0, 61.0, 30.0, 30.0, 34.0, 32.0, 28.0, 44.0, 22.0, 34.0, 51.0, 53.0, 31.0, 32.0, 55.0, 67.0, 50.0, 57.0, 95.0, 98.0, 52.0, 55.0, 14.0, 30.0, 6.0, 14.0, 78.0, 66.0, 40.0, 40.0, 20.0, 33.0, 75.0, 81.0, 47.0, 53.0, 27.0, 28.0, 39.0, 35.0, 49.0, 40.0, 6.0, 3.0, 41.0, 28.0, 27.0, 25.0, 30.0, 20.0, 54.0, 42.0, 59.0, 56.0, 27.0, 31.0, 37.0, 50.0, 76.0, 68.0, 37.0, 55.0, 5.0, 15.0, 24.0, 39.0, 43.0, 38.0, 56.0, 59.0, 32.0, 52.0, 50.0, 49.0, 70.0, 80.0]}, "sampler_perf": {"mean_env_wait_ms": 4.168357407076552, "mean_processing_ms": 0.8988004870947216, "mean_inference_ms": 4.723239868801954}, "off_policy_estimator": {}, "info": {"num_steps_trained": 984000, "num_steps_sampled": 524800, "sample_time_ms": 196459.456, "load_time_ms": 38.195, "grad_time_ms": 142037.515, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.8189894306509108e-13, "cur_lr": 0.0010000000474974513, "total_loss": -0.008718971163034439, "policy_loss": -0.009683111682534218, "vf_loss": 17.845956802368164, "vf_explained_var": 0.43686649203300476, "kl": 0.0014183915918692946, "entropy": 1.6409085988998413, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 524800, "episodes_total": 1312, "training_iteration": 41, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-05-00", "timestamp": 1660248300, "time_this_iter_s": 27.954697370529175, "time_total_s": 6720.866647720337, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6720.866647720337, "timesteps_since_restore": 524800, "iterations_since_restore": 41, "perf": {"cpu_util_percent": 35.58461538461538, "ram_util_percent": 58.16923076923076}}
+{"episode_reward_max": 213.0, "episode_reward_min": 6.0, "episode_reward_mean": 92.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 116.0}, "policy_reward_mean": {"ppo": 46.035}, "custom_metrics": {"sparse_reward_mean": 16.8, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 58.47, "shaped_reward_min": 6, "shaped_reward_max": 101, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.0, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 7.36, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.54, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.75, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 1.99, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.0, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.6, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.96, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 12, "dish_pickup_agent_0_mean": 4.36, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.4, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 1.2, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.18, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.9, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.64, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.48, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 2.87, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 12, "soup_pickup_agent_1_mean": 3.27, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.58, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.83, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.25, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 4.6, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.96, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 12, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.6, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.96, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 12, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [73.0, 64.0, 96.0, 101.0, 66.0, 168.0, 146.0, 144.0, 87.0, 61.0, 95.0, 58.0, 122.0, 91.0, 58.0, 138.0, 78.0, 58.0, 58.0, 36.0, 110.0, 76.0, 99.0, 169.0, 167.0, 201.0, 55.0, 104.0, 212.0, 31.0, 115.0, 31.0, 66.0, 72.0, 56.0, 104.0, 63.0, 122.0, 107.0, 193.0, 107.0, 44.0, 20.0, 144.0, 80.0, 53.0, 156.0, 100.0, 55.0, 74.0, 89.0, 9.0, 69.0, 52.0, 50.0, 96.0, 115.0, 58.0, 87.0, 144.0, 92.0, 20.0, 63.0, 81.0, 115.0, 84.0, 99.0, 150.0, 104.0, 141.0, 53.0, 70.0, 96.0, 127.0, 58.0, 31.0, 6.0, 52.0, 141.0, 55.0, 64.0, 88.0, 148.0, 69.0, 187.0, 50.0, 124.0, 107.0, 93.0, 99.0, 116.0, 72.0, 89.0, 42.0, 146.0, 213.0, 118.0, 82.0, 81.0, 28.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [41.0, 32.0, 30.0, 34.0, 43.0, 53.0, 48.0, 53.0, 29.0, 37.0, 91.0, 77.0, 76.0, 70.0, 68.0, 76.0, 51.0, 36.0, 28.0, 33.0, 54.0, 41.0, 19.0, 39.0, 62.0, 60.0, 48.0, 43.0, 30.0, 28.0, 57.0, 81.0, 42.0, 36.0, 30.0, 28.0, 34.0, 24.0, 15.0, 21.0, 57.0, 53.0, 49.0, 27.0, 50.0, 49.0, 76.0, 93.0, 83.0, 84.0, 85.0, 116.0, 35.0, 20.0, 41.0, 63.0, 105.0, 107.0, 17.0, 14.0, 56.0, 59.0, 8.0, 23.0, 34.0, 32.0, 28.0, 44.0, 22.0, 34.0, 51.0, 53.0, 31.0, 32.0, 55.0, 67.0, 50.0, 57.0, 95.0, 98.0, 52.0, 55.0, 14.0, 30.0, 6.0, 14.0, 78.0, 66.0, 40.0, 40.0, 20.0, 33.0, 75.0, 81.0, 47.0, 53.0, 27.0, 28.0, 39.0, 35.0, 49.0, 40.0, 6.0, 3.0, 41.0, 28.0, 27.0, 25.0, 30.0, 20.0, 54.0, 42.0, 59.0, 56.0, 27.0, 31.0, 37.0, 50.0, 76.0, 68.0, 37.0, 55.0, 5.0, 15.0, 24.0, 39.0, 43.0, 38.0, 56.0, 59.0, 32.0, 52.0, 50.0, 49.0, 70.0, 80.0, 48.0, 56.0, 68.0, 73.0, 25.0, 28.0, 42.0, 28.0, 53.0, 43.0, 58.0, 69.0, 34.0, 24.0, 19.0, 12.0, 3.0, 3.0, 23.0, 29.0, 73.0, 68.0, 21.0, 34.0, 42.0, 22.0, 35.0, 53.0, 68.0, 80.0, 41.0, 28.0, 99.0, 88.0, 30.0, 20.0, 60.0, 64.0, 59.0, 48.0, 48.0, 45.0, 53.0, 46.0, 57.0, 59.0, 37.0, 35.0, 44.0, 45.0, 20.0, 22.0, 76.0, 70.0, 100.0, 113.0, 57.0, 61.0, 48.0, 34.0, 31.0, 50.0, 3.0, 25.0]}, "sampler_perf": {"mean_env_wait_ms": 4.078369518030482, "mean_processing_ms": 0.8807722404539655, "mean_inference_ms": 4.6286338379623215}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1008000, "num_steps_sampled": 537600, "sample_time_ms": 196166.577, "load_time_ms": 37.661, "grad_time_ms": 141994.493, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 9.094947153254554e-14, "cur_lr": 0.0010000000474974513, "total_loss": -0.00832280796021223, "policy_loss": -0.009453889913856983, "vf_loss": 19.490577697753906, "vf_explained_var": 0.44570884108543396, "kl": 0.0015499308938160539, "entropy": 1.6359552145004272, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 537600, "episodes_total": 1344, "training_iteration": 42, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-05-29", "timestamp": 1660248329, "time_this_iter_s": 29.278310775756836, "time_total_s": 6750.144958496094, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6750.144958496094, "timesteps_since_restore": 537600, "iterations_since_restore": 42, "perf": {"cpu_util_percent": 35.96428571428572, "ram_util_percent": 58.190476190476204}}
+{"episode_reward_max": 213.0, "episode_reward_min": 6.0, "episode_reward_mean": 92.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 116.0}, "policy_reward_mean": {"ppo": 46.195}, "custom_metrics": {"sparse_reward_mean": 17.2, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 57.99, "shaped_reward_min": 6, "shaped_reward_max": 101, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.96, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 7.36, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 5.46, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.68, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 1.94, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 4.56, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.96, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.25, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.38, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 1.21, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.2, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.64, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.47, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 2.99, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 12, "soup_pickup_agent_1_mean": 3.11, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.63, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.73, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.25, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.19, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 4.56, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.96, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.56, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.96, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [45.0, 93.0, 46.0, 61.0, 123.0, 57.0, 42.0, 23.0, 31.0, 128.0, 52.0, 85.0, 146.0, 137.0, 25.0, 112.0, 102.0, 119.0, 28.0, 84.0, 68.0, 101.0, 144.0, 92.0, 88.0, 58.0, 98.0, 109.0, 139.0, 91.0, 48.0, 98.0, 115.0, 84.0, 99.0, 150.0, 104.0, 141.0, 53.0, 70.0, 96.0, 127.0, 58.0, 31.0, 6.0, 52.0, 141.0, 55.0, 64.0, 88.0, 148.0, 69.0, 187.0, 50.0, 124.0, 107.0, 93.0, 99.0, 116.0, 72.0, 89.0, 42.0, 146.0, 213.0, 118.0, 82.0, 81.0, 28.0, 73.0, 64.0, 96.0, 101.0, 66.0, 168.0, 146.0, 144.0, 87.0, 61.0, 95.0, 58.0, 122.0, 91.0, 58.0, 138.0, 78.0, 58.0, 58.0, 36.0, 110.0, 76.0, 99.0, 169.0, 167.0, 201.0, 55.0, 104.0, 212.0, 31.0, 115.0, 31.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [31.0, 14.0, 37.0, 56.0, 21.0, 25.0, 41.0, 20.0, 65.0, 58.0, 19.0, 38.0, 16.0, 26.0, 9.0, 14.0, 14.0, 17.0, 49.0, 79.0, 22.0, 30.0, 43.0, 42.0, 72.0, 74.0, 63.0, 74.0, 19.0, 6.0, 56.0, 56.0, 55.0, 47.0, 55.0, 64.0, 11.0, 17.0, 31.0, 53.0, 37.0, 31.0, 53.0, 48.0, 66.0, 78.0, 53.0, 39.0, 42.0, 46.0, 19.0, 39.0, 52.0, 46.0, 53.0, 56.0, 65.0, 74.0, 42.0, 49.0, 19.0, 29.0, 50.0, 48.0, 56.0, 59.0, 32.0, 52.0, 50.0, 49.0, 70.0, 80.0, 48.0, 56.0, 68.0, 73.0, 25.0, 28.0, 42.0, 28.0, 53.0, 43.0, 58.0, 69.0, 34.0, 24.0, 19.0, 12.0, 3.0, 3.0, 23.0, 29.0, 73.0, 68.0, 21.0, 34.0, 42.0, 22.0, 35.0, 53.0, 68.0, 80.0, 41.0, 28.0, 99.0, 88.0, 30.0, 20.0, 60.0, 64.0, 59.0, 48.0, 48.0, 45.0, 53.0, 46.0, 57.0, 59.0, 37.0, 35.0, 44.0, 45.0, 20.0, 22.0, 76.0, 70.0, 100.0, 113.0, 57.0, 61.0, 48.0, 34.0, 31.0, 50.0, 3.0, 25.0, 41.0, 32.0, 30.0, 34.0, 43.0, 53.0, 48.0, 53.0, 29.0, 37.0, 91.0, 77.0, 76.0, 70.0, 68.0, 76.0, 51.0, 36.0, 28.0, 33.0, 54.0, 41.0, 19.0, 39.0, 62.0, 60.0, 48.0, 43.0, 30.0, 28.0, 57.0, 81.0, 42.0, 36.0, 30.0, 28.0, 34.0, 24.0, 15.0, 21.0, 57.0, 53.0, 49.0, 27.0, 50.0, 49.0, 76.0, 93.0, 83.0, 84.0, 85.0, 116.0, 35.0, 20.0, 41.0, 63.0, 105.0, 107.0, 17.0, 14.0, 56.0, 59.0, 8.0, 23.0]}, "sampler_perf": {"mean_env_wait_ms": 3.992670298084334, "mean_processing_ms": 0.8636158543743789, "mean_inference_ms": 4.538596678243932}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1032000, "num_steps_sampled": 550400, "sample_time_ms": 195606.597, "load_time_ms": 37.682, "grad_time_ms": 141791.558, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 4.547473576627277e-14, "cur_lr": 0.0010000000474974513, "total_loss": -0.009549283422529697, "policy_loss": -0.010450693778693676, "vf_loss": 17.197433471679688, "vf_explained_var": 0.4546402394771576, "kl": 0.00132859090808779, "entropy": 1.6366652250289917, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 550400, "episodes_total": 1376, "training_iteration": 43, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-05-57", "timestamp": 1660248357, "time_this_iter_s": 27.376117944717407, "time_total_s": 6777.521076440811, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6777.521076440811, "timesteps_since_restore": 550400, "iterations_since_restore": 43, "perf": {"cpu_util_percent": 34.52051282051282, "ram_util_percent": 58.123076923076916}}
+{"episode_reward_max": 212.0, "episode_reward_min": 9.0, "episode_reward_mean": 89.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 116.0}, "policy_reward_mean": {"ppo": 44.72}, "custom_metrics": {"sparse_reward_mean": 17.0, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 55.44, "shaped_reward_min": 9, "shaped_reward_max": 92, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.54, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 7.64, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 5.03, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 6.0, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 1.95, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 4.16, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 5.24, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.56, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.45, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 1.09, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.82, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.69, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 3.0, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 12, "soup_pickup_agent_1_mean": 2.76, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 1.68, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.64, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.22, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 0.96, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 4.16, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 5.24, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.16, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 5.24, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [147.0, 136.0, 50.0, 114.0, 28.0, 49.0, 161.0, 95.0, 135.0, 98.0, 121.0, 165.0, 12.0, 110.0, 104.0, 84.0, 103.0, 9.0, 66.0, 101.0, 31.0, 42.0, 34.0, 50.0, 90.0, 101.0, 33.0, 191.0, 47.0, 61.0, 144.0, 82.0, 118.0, 82.0, 81.0, 28.0, 73.0, 64.0, 96.0, 101.0, 66.0, 168.0, 146.0, 144.0, 87.0, 61.0, 95.0, 58.0, 122.0, 91.0, 58.0, 138.0, 78.0, 58.0, 58.0, 36.0, 110.0, 76.0, 99.0, 169.0, 167.0, 201.0, 55.0, 104.0, 212.0, 31.0, 115.0, 31.0, 45.0, 93.0, 46.0, 61.0, 123.0, 57.0, 42.0, 23.0, 31.0, 128.0, 52.0, 85.0, 146.0, 137.0, 25.0, 112.0, 102.0, 119.0, 28.0, 84.0, 68.0, 101.0, 144.0, 92.0, 88.0, 58.0, 98.0, 109.0, 139.0, 91.0, 48.0, 98.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [76.0, 71.0, 62.0, 74.0, 27.0, 23.0, 61.0, 53.0, 12.0, 16.0, 18.0, 31.0, 69.0, 92.0, 44.0, 51.0, 81.0, 54.0, 40.0, 58.0, 63.0, 58.0, 85.0, 80.0, 3.0, 9.0, 54.0, 56.0, 50.0, 54.0, 42.0, 42.0, 42.0, 61.0, 0.0, 9.0, 35.0, 31.0, 53.0, 48.0, 22.0, 9.0, 20.0, 22.0, 20.0, 14.0, 14.0, 36.0, 43.0, 47.0, 53.0, 48.0, 13.0, 20.0, 85.0, 106.0, 16.0, 31.0, 26.0, 35.0, 80.0, 64.0, 34.0, 48.0, 57.0, 61.0, 48.0, 34.0, 31.0, 50.0, 3.0, 25.0, 41.0, 32.0, 30.0, 34.0, 43.0, 53.0, 48.0, 53.0, 29.0, 37.0, 91.0, 77.0, 76.0, 70.0, 68.0, 76.0, 51.0, 36.0, 28.0, 33.0, 54.0, 41.0, 19.0, 39.0, 62.0, 60.0, 48.0, 43.0, 30.0, 28.0, 57.0, 81.0, 42.0, 36.0, 30.0, 28.0, 34.0, 24.0, 15.0, 21.0, 57.0, 53.0, 49.0, 27.0, 50.0, 49.0, 76.0, 93.0, 83.0, 84.0, 85.0, 116.0, 35.0, 20.0, 41.0, 63.0, 105.0, 107.0, 17.0, 14.0, 56.0, 59.0, 8.0, 23.0, 31.0, 14.0, 37.0, 56.0, 21.0, 25.0, 41.0, 20.0, 65.0, 58.0, 19.0, 38.0, 16.0, 26.0, 9.0, 14.0, 14.0, 17.0, 49.0, 79.0, 22.0, 30.0, 43.0, 42.0, 72.0, 74.0, 63.0, 74.0, 19.0, 6.0, 56.0, 56.0, 55.0, 47.0, 55.0, 64.0, 11.0, 17.0, 31.0, 53.0, 37.0, 31.0, 53.0, 48.0, 66.0, 78.0, 53.0, 39.0, 42.0, 46.0, 19.0, 39.0, 52.0, 46.0, 53.0, 56.0, 65.0, 74.0, 42.0, 49.0, 19.0, 29.0, 50.0, 48.0]}, "sampler_perf": {"mean_env_wait_ms": 3.9108563485299497, "mean_processing_ms": 0.8472286288222008, "mean_inference_ms": 4.453100666428265}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1056000, "num_steps_sampled": 563200, "sample_time_ms": 195418.359, "load_time_ms": 37.483, "grad_time_ms": 141705.307, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.2737367883136385e-14, "cur_lr": 0.0010000000474974513, "total_loss": -0.010453901253640652, "policy_loss": -0.011599976569414139, "vf_loss": 19.665088653564453, "vf_explained_var": 0.43753400444984436, "kl": 0.0012759790988638997, "entropy": 1.640870451927185, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 563200, "episodes_total": 1408, "training_iteration": 44, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-06-24", "timestamp": 1660248384, "time_this_iter_s": 27.344013929367065, "time_total_s": 6804.865090370178, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6804.865090370178, "timesteps_since_restore": 563200, "iterations_since_restore": 44, "perf": {"cpu_util_percent": 32.94102564102564, "ram_util_percent": 58.05128205128204}}
+{"episode_reward_max": 239.0, "episode_reward_min": 9.0, "episode_reward_mean": 90.38, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 127.0}, "policy_reward_mean": {"ppo": 45.19}, "custom_metrics": {"sparse_reward_mean": 19.0, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 52.38, "shaped_reward_min": 9, "shaped_reward_max": 92, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.55, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 7.59, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 5.0, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.95, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 2.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 4.08, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 5.02, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.4, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.28, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.02, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.74, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.74, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.79, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 2.8, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 2.43, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 1.55, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.53, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.16, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.8, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 4.08, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 5.02, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.08, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 5.02, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [239.0, 130.0, 113.0, 17.0, 130.0, 162.0, 144.0, 153.0, 152.0, 55.0, 105.0, 53.0, 182.0, 77.0, 161.0, 71.0, 31.0, 95.0, 93.0, 92.0, 25.0, 77.0, 12.0, 47.0, 77.0, 88.0, 193.0, 58.0, 115.0, 63.0, 74.0, 98.0, 212.0, 31.0, 115.0, 31.0, 45.0, 93.0, 46.0, 61.0, 123.0, 57.0, 42.0, 23.0, 31.0, 128.0, 52.0, 85.0, 146.0, 137.0, 25.0, 112.0, 102.0, 119.0, 28.0, 84.0, 68.0, 101.0, 144.0, 92.0, 88.0, 58.0, 98.0, 109.0, 139.0, 91.0, 48.0, 98.0, 147.0, 136.0, 50.0, 114.0, 28.0, 49.0, 161.0, 95.0, 135.0, 98.0, 121.0, 165.0, 12.0, 110.0, 104.0, 84.0, 103.0, 9.0, 66.0, 101.0, 31.0, 42.0, 34.0, 50.0, 90.0, 101.0, 33.0, 191.0, 47.0, 61.0, 144.0, 82.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [127.0, 112.0, 68.0, 62.0, 46.0, 67.0, 5.0, 12.0, 62.0, 68.0, 79.0, 83.0, 71.0, 73.0, 84.0, 69.0, 83.0, 69.0, 30.0, 25.0, 51.0, 54.0, 23.0, 30.0, 91.0, 91.0, 40.0, 37.0, 71.0, 90.0, 23.0, 48.0, 17.0, 14.0, 48.0, 47.0, 50.0, 43.0, 46.0, 46.0, 11.0, 14.0, 29.0, 48.0, 3.0, 9.0, 30.0, 17.0, 40.0, 37.0, 56.0, 32.0, 97.0, 96.0, 33.0, 25.0, 67.0, 48.0, 34.0, 29.0, 37.0, 37.0, 54.0, 44.0, 105.0, 107.0, 17.0, 14.0, 56.0, 59.0, 8.0, 23.0, 31.0, 14.0, 37.0, 56.0, 21.0, 25.0, 41.0, 20.0, 65.0, 58.0, 19.0, 38.0, 16.0, 26.0, 9.0, 14.0, 14.0, 17.0, 49.0, 79.0, 22.0, 30.0, 43.0, 42.0, 72.0, 74.0, 63.0, 74.0, 19.0, 6.0, 56.0, 56.0, 55.0, 47.0, 55.0, 64.0, 11.0, 17.0, 31.0, 53.0, 37.0, 31.0, 53.0, 48.0, 66.0, 78.0, 53.0, 39.0, 42.0, 46.0, 19.0, 39.0, 52.0, 46.0, 53.0, 56.0, 65.0, 74.0, 42.0, 49.0, 19.0, 29.0, 50.0, 48.0, 76.0, 71.0, 62.0, 74.0, 27.0, 23.0, 61.0, 53.0, 12.0, 16.0, 18.0, 31.0, 69.0, 92.0, 44.0, 51.0, 81.0, 54.0, 40.0, 58.0, 63.0, 58.0, 85.0, 80.0, 3.0, 9.0, 54.0, 56.0, 50.0, 54.0, 42.0, 42.0, 42.0, 61.0, 0.0, 9.0, 35.0, 31.0, 53.0, 48.0, 22.0, 9.0, 20.0, 22.0, 20.0, 14.0, 14.0, 36.0, 43.0, 47.0, 53.0, 48.0, 13.0, 20.0, 85.0, 106.0, 16.0, 31.0, 26.0, 35.0, 80.0, 64.0, 34.0, 48.0]}, "sampler_perf": {"mean_env_wait_ms": 3.83276732749063, "mean_processing_ms": 0.8315868647990772, "mean_inference_ms": 4.371610853440936}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1080000, "num_steps_sampled": 576000, "sample_time_ms": 158697.168, "load_time_ms": 37.561, "grad_time_ms": 45441.027, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.1368683941568192e-14, "cur_lr": 0.0010000000474974513, "total_loss": -0.007090561557561159, "policy_loss": -0.008278795517981052, "vf_loss": 20.059175491333008, "vf_explained_var": 0.4839383065700531, "kl": 0.0014106096932664514, "entropy": 1.63534414768219, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 576000, "episodes_total": 1440, "training_iteration": 45, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-06-54", "timestamp": 1660248414, "time_this_iter_s": 29.613693952560425, "time_total_s": 6834.478784322739, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6834.478784322739, "timesteps_since_restore": 576000, "iterations_since_restore": 45, "perf": {"cpu_util_percent": 32.607142857142854, "ram_util_percent": 58.099999999999994}}
+{"episode_reward_max": 239.0, "episode_reward_min": 9.0, "episode_reward_mean": 94.76, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 127.0}, "policy_reward_mean": {"ppo": 47.38}, "custom_metrics": {"sparse_reward_mean": 20.8, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 53.16, "shaped_reward_min": 9, "shaped_reward_max": 87, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.72, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 7.12, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 5.09, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 5.56, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 1.94, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.98, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 4.37, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.82, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.27, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.46, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.02, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.77, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.78, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 2.53, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 2.52, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 1.43, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.71, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.99, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.73, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 4.37, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.82, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.37, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.82, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [164.0, 92.0, 107.0, 36.0, 41.0, 99.0, 107.0, 73.0, 42.0, 204.0, 99.0, 139.0, 45.0, 93.0, 104.0, 93.0, 198.0, 148.0, 74.0, 66.0, 152.0, 50.0, 122.0, 112.0, 39.0, 87.0, 122.0, 34.0, 115.0, 84.0, 144.0, 39.0, 139.0, 91.0, 48.0, 98.0, 147.0, 136.0, 50.0, 114.0, 28.0, 49.0, 161.0, 95.0, 135.0, 98.0, 121.0, 165.0, 12.0, 110.0, 104.0, 84.0, 103.0, 9.0, 66.0, 101.0, 31.0, 42.0, 34.0, 50.0, 90.0, 101.0, 33.0, 191.0, 47.0, 61.0, 144.0, 82.0, 239.0, 130.0, 113.0, 17.0, 130.0, 162.0, 144.0, 153.0, 152.0, 55.0, 105.0, 53.0, 182.0, 77.0, 161.0, 71.0, 31.0, 95.0, 93.0, 92.0, 25.0, 77.0, 12.0, 47.0, 77.0, 88.0, 193.0, 58.0, 115.0, 63.0, 74.0, 98.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [91.0, 73.0, 32.0, 60.0, 45.0, 62.0, 22.0, 14.0, 14.0, 27.0, 53.0, 46.0, 59.0, 48.0, 37.0, 36.0, 17.0, 25.0, 92.0, 112.0, 42.0, 57.0, 65.0, 74.0, 17.0, 28.0, 45.0, 48.0, 43.0, 61.0, 43.0, 50.0, 97.0, 101.0, 72.0, 76.0, 38.0, 36.0, 32.0, 34.0, 81.0, 71.0, 27.0, 23.0, 60.0, 62.0, 59.0, 53.0, 27.0, 12.0, 37.0, 50.0, 58.0, 64.0, 12.0, 22.0, 51.0, 64.0, 45.0, 39.0, 74.0, 70.0, 25.0, 14.0, 65.0, 74.0, 42.0, 49.0, 19.0, 29.0, 50.0, 48.0, 76.0, 71.0, 62.0, 74.0, 27.0, 23.0, 61.0, 53.0, 12.0, 16.0, 18.0, 31.0, 69.0, 92.0, 44.0, 51.0, 81.0, 54.0, 40.0, 58.0, 63.0, 58.0, 85.0, 80.0, 3.0, 9.0, 54.0, 56.0, 50.0, 54.0, 42.0, 42.0, 42.0, 61.0, 0.0, 9.0, 35.0, 31.0, 53.0, 48.0, 22.0, 9.0, 20.0, 22.0, 20.0, 14.0, 14.0, 36.0, 43.0, 47.0, 53.0, 48.0, 13.0, 20.0, 85.0, 106.0, 16.0, 31.0, 26.0, 35.0, 80.0, 64.0, 34.0, 48.0, 127.0, 112.0, 68.0, 62.0, 46.0, 67.0, 5.0, 12.0, 62.0, 68.0, 79.0, 83.0, 71.0, 73.0, 84.0, 69.0, 83.0, 69.0, 30.0, 25.0, 51.0, 54.0, 23.0, 30.0, 91.0, 91.0, 40.0, 37.0, 71.0, 90.0, 23.0, 48.0, 17.0, 14.0, 48.0, 47.0, 50.0, 43.0, 46.0, 46.0, 11.0, 14.0, 29.0, 48.0, 3.0, 9.0, 30.0, 17.0, 40.0, 37.0, 56.0, 32.0, 97.0, 96.0, 33.0, 25.0, 67.0, 48.0, 34.0, 29.0, 37.0, 37.0, 54.0, 44.0]}, "sampler_perf": {"mean_env_wait_ms": 3.75819263232233, "mean_processing_ms": 0.8166563749373907, "mean_inference_ms": 4.294229000839884}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1104000, "num_steps_sampled": 588800, "sample_time_ms": 20385.134, "load_time_ms": 37.538, "grad_time_ms": 8901.075, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 5.684341970784096e-15, "cur_lr": 0.0010000000474974513, "total_loss": -0.00807119719684124, "policy_loss": -0.009133302606642246, "vf_loss": 18.76689338684082, "vf_explained_var": 0.5084854960441589, "kl": 0.0014663866022601724, "entropy": 1.6291638612747192, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 588800, "episodes_total": 1472, "training_iteration": 46, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-07-23", "timestamp": 1660248443, "time_this_iter_s": 29.134671926498413, "time_total_s": 6863.613456249237, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6863.613456249237, "timesteps_since_restore": 588800, "iterations_since_restore": 46, "perf": {"cpu_util_percent": 34.358536585365854, "ram_util_percent": 58.190243902439015}}
+{"episode_reward_max": 245.0, "episode_reward_min": 9.0, "episode_reward_mean": 94.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 135.0}, "policy_reward_mean": {"ppo": 47.0}, "custom_metrics": {"sparse_reward_mean": 21.0, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 52.0, "shaped_reward_min": 9, "shaped_reward_max": 90, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.57, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 6.59, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 5.07, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 5.13, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 1.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.65, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 4.41, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.63, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.29, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.49, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.0, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.84, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.78, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.32, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.43, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.31, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.65, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.89, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.71, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 4.41, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.63, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.41, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.63, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [245.0, 133.0, 23.0, 133.0, 37.0, 55.0, 22.0, 55.0, 69.0, 87.0, 68.0, 71.0, 9.0, 9.0, 51.0, 88.0, 136.0, 125.0, 118.0, 139.0, 95.0, 130.0, 67.0, 79.0, 38.0, 82.0, 20.0, 98.0, 190.0, 31.0, 153.0, 104.0, 47.0, 61.0, 144.0, 82.0, 239.0, 130.0, 113.0, 17.0, 130.0, 162.0, 144.0, 153.0, 152.0, 55.0, 105.0, 53.0, 182.0, 77.0, 161.0, 71.0, 31.0, 95.0, 93.0, 92.0, 25.0, 77.0, 12.0, 47.0, 77.0, 88.0, 193.0, 58.0, 115.0, 63.0, 74.0, 98.0, 164.0, 92.0, 107.0, 36.0, 41.0, 99.0, 107.0, 73.0, 42.0, 204.0, 99.0, 139.0, 45.0, 93.0, 104.0, 93.0, 198.0, 148.0, 74.0, 66.0, 152.0, 50.0, 122.0, 112.0, 39.0, 87.0, 122.0, 34.0, 115.0, 84.0, 144.0, 39.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [110.0, 135.0, 59.0, 74.0, 11.0, 12.0, 68.0, 65.0, 22.0, 15.0, 34.0, 21.0, 11.0, 11.0, 23.0, 32.0, 25.0, 44.0, 46.0, 41.0, 42.0, 26.0, 37.0, 34.0, 0.0, 9.0, 0.0, 9.0, 24.0, 27.0, 43.0, 45.0, 66.0, 70.0, 64.0, 61.0, 54.0, 64.0, 63.0, 76.0, 56.0, 39.0, 59.0, 71.0, 30.0, 37.0, 36.0, 43.0, 16.0, 22.0, 40.0, 42.0, 3.0, 17.0, 45.0, 53.0, 91.0, 99.0, 19.0, 12.0, 78.0, 75.0, 59.0, 45.0, 16.0, 31.0, 26.0, 35.0, 80.0, 64.0, 34.0, 48.0, 127.0, 112.0, 68.0, 62.0, 46.0, 67.0, 5.0, 12.0, 62.0, 68.0, 79.0, 83.0, 71.0, 73.0, 84.0, 69.0, 83.0, 69.0, 30.0, 25.0, 51.0, 54.0, 23.0, 30.0, 91.0, 91.0, 40.0, 37.0, 71.0, 90.0, 23.0, 48.0, 17.0, 14.0, 48.0, 47.0, 50.0, 43.0, 46.0, 46.0, 11.0, 14.0, 29.0, 48.0, 3.0, 9.0, 30.0, 17.0, 40.0, 37.0, 56.0, 32.0, 97.0, 96.0, 33.0, 25.0, 67.0, 48.0, 34.0, 29.0, 37.0, 37.0, 54.0, 44.0, 91.0, 73.0, 32.0, 60.0, 45.0, 62.0, 22.0, 14.0, 14.0, 27.0, 53.0, 46.0, 59.0, 48.0, 37.0, 36.0, 17.0, 25.0, 92.0, 112.0, 42.0, 57.0, 65.0, 74.0, 17.0, 28.0, 45.0, 48.0, 43.0, 61.0, 43.0, 50.0, 97.0, 101.0, 72.0, 76.0, 38.0, 36.0, 32.0, 34.0, 81.0, 71.0, 27.0, 23.0, 60.0, 62.0, 59.0, 53.0, 27.0, 12.0, 37.0, 50.0, 58.0, 64.0, 12.0, 22.0, 51.0, 64.0, 45.0, 39.0, 74.0, 70.0, 25.0, 14.0]}, "sampler_perf": {"mean_env_wait_ms": 3.6869910389254943, "mean_processing_ms": 0.8024023118044294, "mean_inference_ms": 4.221576344650746}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1128000, "num_steps_sampled": 601600, "sample_time_ms": 19943.652, "load_time_ms": 37.337, "grad_time_ms": 8965.172, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.842170985392048e-15, "cur_lr": 0.0010000000474974513, "total_loss": -0.007924961857497692, "policy_loss": -0.009038448333740234, "vf_loss": 19.34569549560547, "vf_explained_var": 0.504978597164154, "kl": 0.001396413892507553, "entropy": 1.6421631574630737, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 601600, "episodes_total": 1504, "training_iteration": 47, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-07-55", "timestamp": 1660248475, "time_this_iter_s": 32.583869218826294, "time_total_s": 6896.197325468063, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6896.197325468063, "timesteps_since_restore": 601600, "iterations_since_restore": 47, "perf": {"cpu_util_percent": 34.10869565217391, "ram_util_percent": 58.16956521739129}}
+{"episode_reward_max": 245.0, "episode_reward_min": 9.0, "episode_reward_mean": 101.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 135.0}, "policy_reward_mean": {"ppo": 50.81}, "custom_metrics": {"sparse_reward_mean": 22.8, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 56.02, "shaped_reward_min": 9, "shaped_reward_max": 98, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.76, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 6.77, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.56, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 5.52, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 1.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.89, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.87, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 4.79, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.97, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.5, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.66, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.03, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 1.0, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.83, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.39, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.68, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.42, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 1.81, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.82, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.79, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 4.79, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.97, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.79, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.97, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [173.0, 93.0, 109.0, 218.0, 177.0, 155.0, 136.0, 150.0, 96.0, 191.0, 93.0, 98.0, 150.0, 110.0, 76.0, 107.0, 121.0, 158.0, 139.0, 159.0, 38.0, 148.0, 47.0, 117.0, 141.0, 9.0, 141.0, 87.0, 148.0, 144.0, 93.0, 106.0, 115.0, 63.0, 74.0, 98.0, 164.0, 92.0, 107.0, 36.0, 41.0, 99.0, 107.0, 73.0, 42.0, 204.0, 99.0, 139.0, 45.0, 93.0, 104.0, 93.0, 198.0, 148.0, 74.0, 66.0, 152.0, 50.0, 122.0, 112.0, 39.0, 87.0, 122.0, 34.0, 115.0, 84.0, 144.0, 39.0, 245.0, 133.0, 23.0, 133.0, 37.0, 55.0, 22.0, 55.0, 69.0, 87.0, 68.0, 71.0, 9.0, 9.0, 51.0, 88.0, 136.0, 125.0, 118.0, 139.0, 95.0, 130.0, 67.0, 79.0, 38.0, 82.0, 20.0, 98.0, 190.0, 31.0, 153.0, 104.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [88.0, 85.0, 50.0, 43.0, 57.0, 52.0, 94.0, 124.0, 74.0, 103.0, 85.0, 70.0, 76.0, 60.0, 64.0, 86.0, 53.0, 43.0, 89.0, 102.0, 49.0, 44.0, 52.0, 46.0, 84.0, 66.0, 51.0, 59.0, 40.0, 36.0, 53.0, 54.0, 65.0, 56.0, 74.0, 84.0, 68.0, 71.0, 81.0, 78.0, 11.0, 27.0, 67.0, 81.0, 19.0, 28.0, 51.0, 66.0, 73.0, 68.0, 6.0, 3.0, 71.0, 70.0, 36.0, 51.0, 68.0, 80.0, 74.0, 70.0, 42.0, 51.0, 58.0, 48.0, 67.0, 48.0, 34.0, 29.0, 37.0, 37.0, 54.0, 44.0, 91.0, 73.0, 32.0, 60.0, 45.0, 62.0, 22.0, 14.0, 14.0, 27.0, 53.0, 46.0, 59.0, 48.0, 37.0, 36.0, 17.0, 25.0, 92.0, 112.0, 42.0, 57.0, 65.0, 74.0, 17.0, 28.0, 45.0, 48.0, 43.0, 61.0, 43.0, 50.0, 97.0, 101.0, 72.0, 76.0, 38.0, 36.0, 32.0, 34.0, 81.0, 71.0, 27.0, 23.0, 60.0, 62.0, 59.0, 53.0, 27.0, 12.0, 37.0, 50.0, 58.0, 64.0, 12.0, 22.0, 51.0, 64.0, 45.0, 39.0, 74.0, 70.0, 25.0, 14.0, 110.0, 135.0, 59.0, 74.0, 11.0, 12.0, 68.0, 65.0, 22.0, 15.0, 34.0, 21.0, 11.0, 11.0, 23.0, 32.0, 25.0, 44.0, 46.0, 41.0, 42.0, 26.0, 37.0, 34.0, 0.0, 9.0, 0.0, 9.0, 24.0, 27.0, 43.0, 45.0, 66.0, 70.0, 64.0, 61.0, 54.0, 64.0, 63.0, 76.0, 56.0, 39.0, 59.0, 71.0, 30.0, 37.0, 36.0, 43.0, 16.0, 22.0, 40.0, 42.0, 3.0, 17.0, 45.0, 53.0, 91.0, 99.0, 19.0, 12.0, 78.0, 75.0, 59.0, 45.0]}, "sampler_perf": {"mean_env_wait_ms": 3.619074133252118, "mean_processing_ms": 0.7888363298173107, "mean_inference_ms": 4.154787775271583}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1152000, "num_steps_sampled": 614400, "sample_time_ms": 20418.706, "load_time_ms": 37.151, "grad_time_ms": 9069.585, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.421085492696024e-15, "cur_lr": 0.0010000000474974513, "total_loss": -0.01129829604178667, "policy_loss": -0.012764283455908298, "vf_loss": 22.684043884277344, "vf_explained_var": 0.5366321206092834, "kl": 0.0014537613606080413, "entropy": 1.6048468351364136, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 614400, "episodes_total": 1536, "training_iteration": 48, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-08-32", "timestamp": 1660248512, "time_this_iter_s": 36.37463116645813, "time_total_s": 6932.5719566345215, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6932.5719566345215, "timesteps_since_restore": 614400, "iterations_since_restore": 48, "perf": {"cpu_util_percent": 36.49999999999999, "ram_util_percent": 58.29423076923076}}
+{"episode_reward_max": 245.0, "episode_reward_min": 9.0, "episode_reward_mean": 109.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 135.0}, "policy_reward_mean": {"ppo": 54.89}, "custom_metrics": {"sparse_reward_mean": 25.6, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 58.58, "shaped_reward_min": 9, "shaped_reward_max": 101, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.59, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 7.48, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.59, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 6.35, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 1.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.5, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.82, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.8, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 4.63, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.66, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.66, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.45, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.17, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.9, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.0, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.74, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.69, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.6, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.67, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.72, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.71, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.77, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.88, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 4.63, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.66, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.63, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.66, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [101.0, 110.0, 157.0, 167.0, 84.0, 71.0, 128.0, 42.0, 162.0, 184.0, 88.0, 108.0, 204.0, 95.0, 179.0, 155.0, 147.0, 121.0, 207.0, 76.0, 113.0, 39.0, 134.0, 75.0, 31.0, 9.0, 181.0, 162.0, 204.0, 187.0, 20.0, 167.0, 115.0, 84.0, 144.0, 39.0, 245.0, 133.0, 23.0, 133.0, 37.0, 55.0, 22.0, 55.0, 69.0, 87.0, 68.0, 71.0, 9.0, 9.0, 51.0, 88.0, 136.0, 125.0, 118.0, 139.0, 95.0, 130.0, 67.0, 79.0, 38.0, 82.0, 20.0, 98.0, 190.0, 31.0, 153.0, 104.0, 173.0, 93.0, 109.0, 218.0, 177.0, 155.0, 136.0, 150.0, 96.0, 191.0, 93.0, 98.0, 150.0, 110.0, 76.0, 107.0, 121.0, 158.0, 139.0, 159.0, 38.0, 148.0, 47.0, 117.0, 141.0, 9.0, 141.0, 87.0, 148.0, 144.0, 93.0, 106.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [54.0, 47.0, 50.0, 60.0, 71.0, 86.0, 76.0, 91.0, 46.0, 38.0, 30.0, 41.0, 65.0, 63.0, 24.0, 18.0, 75.0, 87.0, 85.0, 99.0, 42.0, 46.0, 53.0, 55.0, 105.0, 99.0, 50.0, 45.0, 96.0, 83.0, 76.0, 79.0, 69.0, 78.0, 67.0, 54.0, 99.0, 108.0, 30.0, 46.0, 53.0, 60.0, 28.0, 11.0, 57.0, 77.0, 22.0, 53.0, 20.0, 11.0, 3.0, 6.0, 98.0, 83.0, 92.0, 70.0, 93.0, 111.0, 85.0, 102.0, 17.0, 3.0, 82.0, 85.0, 51.0, 64.0, 45.0, 39.0, 74.0, 70.0, 25.0, 14.0, 110.0, 135.0, 59.0, 74.0, 11.0, 12.0, 68.0, 65.0, 22.0, 15.0, 34.0, 21.0, 11.0, 11.0, 23.0, 32.0, 25.0, 44.0, 46.0, 41.0, 42.0, 26.0, 37.0, 34.0, 0.0, 9.0, 0.0, 9.0, 24.0, 27.0, 43.0, 45.0, 66.0, 70.0, 64.0, 61.0, 54.0, 64.0, 63.0, 76.0, 56.0, 39.0, 59.0, 71.0, 30.0, 37.0, 36.0, 43.0, 16.0, 22.0, 40.0, 42.0, 3.0, 17.0, 45.0, 53.0, 91.0, 99.0, 19.0, 12.0, 78.0, 75.0, 59.0, 45.0, 88.0, 85.0, 50.0, 43.0, 57.0, 52.0, 94.0, 124.0, 74.0, 103.0, 85.0, 70.0, 76.0, 60.0, 64.0, 86.0, 53.0, 43.0, 89.0, 102.0, 49.0, 44.0, 52.0, 46.0, 84.0, 66.0, 51.0, 59.0, 40.0, 36.0, 53.0, 54.0, 65.0, 56.0, 74.0, 84.0, 68.0, 71.0, 81.0, 78.0, 11.0, 27.0, 67.0, 81.0, 19.0, 28.0, 51.0, 66.0, 73.0, 68.0, 6.0, 3.0, 71.0, 70.0, 36.0, 51.0, 68.0, 80.0, 74.0, 70.0, 42.0, 51.0, 58.0, 48.0]}, "sampler_perf": {"mean_env_wait_ms": 3.5541442522194506, "mean_processing_ms": 0.7759022948286116, "mean_inference_ms": 4.093132668646798}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1176000, "num_steps_sampled": 627200, "sample_time_ms": 20998.642, "load_time_ms": 37.333, "grad_time_ms": 9227.616, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 7.10542746348012e-16, "cur_lr": 0.0010000000474974513, "total_loss": -0.0032997550442814827, "policy_loss": -0.004892440978437662, "vf_loss": 23.954416275024414, "vf_explained_var": 0.525080680847168, "kl": 0.0015437895199283957, "entropy": 1.6054998636245728, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 627200, "episodes_total": 1568, "training_iteration": 49, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-09-07", "timestamp": 1660248547, "time_this_iter_s": 35.51046395301819, "time_total_s": 6968.08242058754, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6968.08242058754, "timesteps_since_restore": 627200, "iterations_since_restore": 49, "perf": {"cpu_util_percent": 33.118, "ram_util_percent": 57.68999999999998}}
+{"episode_reward_max": 238.0, "episode_reward_min": 9.0, "episode_reward_mean": 123.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 124.0}, "policy_reward_mean": {"ppo": 61.525}, "custom_metrics": {"sparse_reward_mean": 29.6, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 63.85, "shaped_reward_min": 9, "shaped_reward_max": 101, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.32, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 8.04, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 6.33, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 6.84, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 1.88, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.89, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.82, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 5.07, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 6.16, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.73, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.48, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.33, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.6, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.93, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.89, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.03, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.84, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.77, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.96, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 5.07, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 6.16, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.07, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 6.16, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [127.0, 147.0, 128.0, 210.0, 179.0, 107.0, 141.0, 75.0, 107.0, 175.0, 129.0, 153.0, 88.0, 98.0, 71.0, 105.0, 238.0, 110.0, 164.0, 68.0, 118.0, 107.0, 65.0, 171.0, 209.0, 50.0, 164.0, 99.0, 88.0, 12.0, 122.0, 166.0, 190.0, 31.0, 153.0, 104.0, 173.0, 93.0, 109.0, 218.0, 177.0, 155.0, 136.0, 150.0, 96.0, 191.0, 93.0, 98.0, 150.0, 110.0, 76.0, 107.0, 121.0, 158.0, 139.0, 159.0, 38.0, 148.0, 47.0, 117.0, 141.0, 9.0, 141.0, 87.0, 148.0, 144.0, 93.0, 106.0, 101.0, 110.0, 157.0, 167.0, 84.0, 71.0, 128.0, 42.0, 162.0, 184.0, 88.0, 108.0, 204.0, 95.0, 179.0, 155.0, 147.0, 121.0, 207.0, 76.0, 113.0, 39.0, 134.0, 75.0, 31.0, 9.0, 181.0, 162.0, 204.0, 187.0, 20.0, 167.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [48.0, 79.0, 74.0, 73.0, 68.0, 60.0, 102.0, 108.0, 87.0, 92.0, 51.0, 56.0, 64.0, 77.0, 41.0, 34.0, 67.0, 40.0, 89.0, 86.0, 68.0, 61.0, 79.0, 74.0, 43.0, 45.0, 49.0, 49.0, 35.0, 36.0, 56.0, 49.0, 122.0, 116.0, 48.0, 62.0, 90.0, 74.0, 34.0, 34.0, 64.0, 54.0, 51.0, 56.0, 34.0, 31.0, 91.0, 80.0, 108.0, 101.0, 25.0, 25.0, 80.0, 84.0, 57.0, 42.0, 42.0, 46.0, 3.0, 9.0, 54.0, 68.0, 88.0, 78.0, 91.0, 99.0, 19.0, 12.0, 78.0, 75.0, 59.0, 45.0, 88.0, 85.0, 50.0, 43.0, 57.0, 52.0, 94.0, 124.0, 74.0, 103.0, 85.0, 70.0, 76.0, 60.0, 64.0, 86.0, 53.0, 43.0, 89.0, 102.0, 49.0, 44.0, 52.0, 46.0, 84.0, 66.0, 51.0, 59.0, 40.0, 36.0, 53.0, 54.0, 65.0, 56.0, 74.0, 84.0, 68.0, 71.0, 81.0, 78.0, 11.0, 27.0, 67.0, 81.0, 19.0, 28.0, 51.0, 66.0, 73.0, 68.0, 6.0, 3.0, 71.0, 70.0, 36.0, 51.0, 68.0, 80.0, 74.0, 70.0, 42.0, 51.0, 58.0, 48.0, 54.0, 47.0, 50.0, 60.0, 71.0, 86.0, 76.0, 91.0, 46.0, 38.0, 30.0, 41.0, 65.0, 63.0, 24.0, 18.0, 75.0, 87.0, 85.0, 99.0, 42.0, 46.0, 53.0, 55.0, 105.0, 99.0, 50.0, 45.0, 96.0, 83.0, 76.0, 79.0, 69.0, 78.0, 67.0, 54.0, 99.0, 108.0, 30.0, 46.0, 53.0, 60.0, 28.0, 11.0, 57.0, 77.0, 22.0, 53.0, 20.0, 11.0, 3.0, 6.0, 98.0, 83.0, 92.0, 70.0, 93.0, 111.0, 85.0, 102.0, 17.0, 3.0, 82.0, 85.0]}, "sampler_perf": {"mean_env_wait_ms": 3.491898537786064, "mean_processing_ms": 0.7635432106455172, "mean_inference_ms": 4.03419160595968}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1200000, "num_steps_sampled": 640000, "sample_time_ms": 21322.647, "load_time_ms": 37.352, "grad_time_ms": 9260.248, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 3.55271373174006e-16, "cur_lr": 0.0010000000474974513, "total_loss": -0.005630036350339651, "policy_loss": -0.007203007582575083, "vf_loss": 23.694684982299805, "vf_explained_var": 0.5489806532859802, "kl": 0.0013687704922631383, "entropy": 1.592978835105896, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 640000, "episodes_total": 1600, "training_iteration": 50, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-09-39", "timestamp": 1660248579, "time_this_iter_s": 31.292397022247314, "time_total_s": 6999.374817609787, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6999.374817609787, "timesteps_since_restore": 640000, "iterations_since_restore": 50, "perf": {"cpu_util_percent": 38.30227272727273, "ram_util_percent": 57.75909090909093}}
+{"episode_reward_max": 238.0, "episode_reward_min": 9.0, "episode_reward_mean": 125.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 122.0}, "policy_reward_mean": {"ppo": 62.735}, "custom_metrics": {"sparse_reward_mean": 29.2, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 67.07, "shaped_reward_min": 9, "shaped_reward_max": 115, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.57, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 8.17, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 6.51, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 6.96, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 1.87, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.89, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.35, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 6.28, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.84, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.48, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.41, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.05, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.97, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.93, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.58, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.75, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 3.1, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.95, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.2, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.96, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.79, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.85, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 5.35, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 6.28, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.35, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 6.28, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [98.0, 63.0, 63.0, 192.0, 93.0, 178.0, 196.0, 53.0, 109.0, 156.0, 136.0, 166.0, 163.0, 175.0, 118.0, 115.0, 137.0, 115.0, 110.0, 155.0, 132.0, 79.0, 158.0, 213.0, 104.0, 153.0, 70.0, 195.0, 153.0, 85.0, 101.0, 123.0, 148.0, 144.0, 93.0, 106.0, 101.0, 110.0, 157.0, 167.0, 84.0, 71.0, 128.0, 42.0, 162.0, 184.0, 88.0, 108.0, 204.0, 95.0, 179.0, 155.0, 147.0, 121.0, 207.0, 76.0, 113.0, 39.0, 134.0, 75.0, 31.0, 9.0, 181.0, 162.0, 204.0, 187.0, 20.0, 167.0, 127.0, 147.0, 128.0, 210.0, 179.0, 107.0, 141.0, 75.0, 107.0, 175.0, 129.0, 153.0, 88.0, 98.0, 71.0, 105.0, 238.0, 110.0, 164.0, 68.0, 118.0, 107.0, 65.0, 171.0, 209.0, 50.0, 164.0, 99.0, 88.0, 12.0, 122.0, 166.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [36.0, 62.0, 38.0, 25.0, 35.0, 28.0, 94.0, 98.0, 41.0, 52.0, 86.0, 92.0, 106.0, 90.0, 24.0, 29.0, 63.0, 46.0, 63.0, 93.0, 73.0, 63.0, 85.0, 81.0, 90.0, 73.0, 71.0, 104.0, 70.0, 48.0, 56.0, 59.0, 71.0, 66.0, 56.0, 59.0, 54.0, 56.0, 73.0, 82.0, 65.0, 67.0, 44.0, 35.0, 71.0, 87.0, 108.0, 105.0, 58.0, 46.0, 88.0, 65.0, 35.0, 35.0, 107.0, 88.0, 70.0, 83.0, 48.0, 37.0, 59.0, 42.0, 63.0, 60.0, 68.0, 80.0, 74.0, 70.0, 42.0, 51.0, 58.0, 48.0, 54.0, 47.0, 50.0, 60.0, 71.0, 86.0, 76.0, 91.0, 46.0, 38.0, 30.0, 41.0, 65.0, 63.0, 24.0, 18.0, 75.0, 87.0, 85.0, 99.0, 42.0, 46.0, 53.0, 55.0, 105.0, 99.0, 50.0, 45.0, 96.0, 83.0, 76.0, 79.0, 69.0, 78.0, 67.0, 54.0, 99.0, 108.0, 30.0, 46.0, 53.0, 60.0, 28.0, 11.0, 57.0, 77.0, 22.0, 53.0, 20.0, 11.0, 3.0, 6.0, 98.0, 83.0, 92.0, 70.0, 93.0, 111.0, 85.0, 102.0, 17.0, 3.0, 82.0, 85.0, 48.0, 79.0, 74.0, 73.0, 68.0, 60.0, 102.0, 108.0, 87.0, 92.0, 51.0, 56.0, 64.0, 77.0, 41.0, 34.0, 67.0, 40.0, 89.0, 86.0, 68.0, 61.0, 79.0, 74.0, 43.0, 45.0, 49.0, 49.0, 35.0, 36.0, 56.0, 49.0, 122.0, 116.0, 48.0, 62.0, 90.0, 74.0, 34.0, 34.0, 64.0, 54.0, 51.0, 56.0, 34.0, 31.0, 91.0, 80.0, 108.0, 101.0, 25.0, 25.0, 80.0, 84.0, 57.0, 42.0, 42.0, 46.0, 3.0, 9.0, 54.0, 68.0, 88.0, 78.0]}, "sampler_perf": {"mean_env_wait_ms": 3.4320001935473794, "mean_processing_ms": 0.7516646134052514, "mean_inference_ms": 3.975737374961292}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1224000, "num_steps_sampled": 652800, "sample_time_ms": 21552.858, "load_time_ms": 37.216, "grad_time_ms": 9227.975, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 1.77635686587003e-16, "cur_lr": 0.0010000000474974513, "total_loss": -0.0032480310183018446, "policy_loss": -0.004836531355977058, "vf_loss": 23.794113159179688, "vf_explained_var": 0.5322676301002502, "kl": 0.0011860225349664688, "entropy": 1.5818275213241577, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 652800, "episodes_total": 1632, "training_iteration": 51, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-10-09", "timestamp": 1660248609, "time_this_iter_s": 29.93578290939331, "time_total_s": 7029.31060051918, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7029.31060051918, "timesteps_since_restore": 652800, "iterations_since_restore": 51, "perf": {"cpu_util_percent": 36.09523809523809, "ram_util_percent": 57.82142857142857}}
+{"episode_reward_max": 264.0, "episode_reward_min": 12.0, "episode_reward_mean": 128.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 139.0}, "policy_reward_mean": {"ppo": 64.12}, "custom_metrics": {"sparse_reward_mean": 30.2, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 67.84, "shaped_reward_min": 12, "shaped_reward_max": 115, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.73, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 7.95, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 6.6, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 6.65, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 1.75, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.97, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.97, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.65, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.99, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.73, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.52, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.15, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.82, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.97, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.57, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.7, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 3.06, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.08, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.14, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.04, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.78, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.88, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 5.65, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.99, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.65, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.99, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [141.0, 138.0, 143.0, 211.0, 127.0, 20.0, 65.0, 150.0, 93.0, 153.0, 47.0, 144.0, 264.0, 148.0, 59.0, 121.0, 39.0, 204.0, 187.0, 208.0, 121.0, 124.0, 14.0, 95.0, 81.0, 107.0, 198.0, 114.0, 104.0, 152.0, 238.0, 88.0, 204.0, 187.0, 20.0, 167.0, 127.0, 147.0, 128.0, 210.0, 179.0, 107.0, 141.0, 75.0, 107.0, 175.0, 129.0, 153.0, 88.0, 98.0, 71.0, 105.0, 238.0, 110.0, 164.0, 68.0, 118.0, 107.0, 65.0, 171.0, 209.0, 50.0, 164.0, 99.0, 88.0, 12.0, 122.0, 166.0, 98.0, 63.0, 63.0, 192.0, 93.0, 178.0, 196.0, 53.0, 109.0, 156.0, 136.0, 166.0, 163.0, 175.0, 118.0, 115.0, 137.0, 115.0, 110.0, 155.0, 132.0, 79.0, 158.0, 213.0, 104.0, 153.0, 70.0, 195.0, 153.0, 85.0, 101.0, 123.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [65.0, 76.0, 57.0, 81.0, 92.0, 51.0, 115.0, 96.0, 62.0, 65.0, 9.0, 11.0, 36.0, 29.0, 65.0, 85.0, 54.0, 39.0, 88.0, 65.0, 33.0, 14.0, 65.0, 79.0, 125.0, 139.0, 77.0, 71.0, 33.0, 26.0, 65.0, 56.0, 20.0, 19.0, 91.0, 113.0, 98.0, 89.0, 110.0, 98.0, 53.0, 68.0, 57.0, 67.0, 8.0, 6.0, 56.0, 39.0, 47.0, 34.0, 46.0, 61.0, 93.0, 105.0, 55.0, 59.0, 64.0, 40.0, 73.0, 79.0, 125.0, 113.0, 40.0, 48.0, 93.0, 111.0, 85.0, 102.0, 17.0, 3.0, 82.0, 85.0, 48.0, 79.0, 74.0, 73.0, 68.0, 60.0, 102.0, 108.0, 87.0, 92.0, 51.0, 56.0, 64.0, 77.0, 41.0, 34.0, 67.0, 40.0, 89.0, 86.0, 68.0, 61.0, 79.0, 74.0, 43.0, 45.0, 49.0, 49.0, 35.0, 36.0, 56.0, 49.0, 122.0, 116.0, 48.0, 62.0, 90.0, 74.0, 34.0, 34.0, 64.0, 54.0, 51.0, 56.0, 34.0, 31.0, 91.0, 80.0, 108.0, 101.0, 25.0, 25.0, 80.0, 84.0, 57.0, 42.0, 42.0, 46.0, 3.0, 9.0, 54.0, 68.0, 88.0, 78.0, 36.0, 62.0, 38.0, 25.0, 35.0, 28.0, 94.0, 98.0, 41.0, 52.0, 86.0, 92.0, 106.0, 90.0, 24.0, 29.0, 63.0, 46.0, 63.0, 93.0, 73.0, 63.0, 85.0, 81.0, 90.0, 73.0, 71.0, 104.0, 70.0, 48.0, 56.0, 59.0, 71.0, 66.0, 56.0, 59.0, 54.0, 56.0, 73.0, 82.0, 65.0, 67.0, 44.0, 35.0, 71.0, 87.0, 108.0, 105.0, 58.0, 46.0, 88.0, 65.0, 35.0, 35.0, 107.0, 88.0, 70.0, 83.0, 48.0, 37.0, 59.0, 42.0, 63.0, 60.0]}, "sampler_perf": {"mean_env_wait_ms": 3.3744175644507366, "mean_processing_ms": 0.7402432912819328, "mean_inference_ms": 3.918069988577329}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1248000, "num_steps_sampled": 665600, "sample_time_ms": 21746.558, "load_time_ms": 37.515, "grad_time_ms": 9170.716, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 8.88178432935015e-17, "cur_lr": 0.0010000000474974513, "total_loss": -0.00774852791801095, "policy_loss": -0.009392179548740387, "vf_loss": 24.351181030273438, "vf_explained_var": 0.5798514485359192, "kl": 0.0016348478384315968, "entropy": 1.5829213857650757, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 665600, "episodes_total": 1664, "training_iteration": 52, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-10-39", "timestamp": 1660248639, "time_this_iter_s": 30.649518966674805, "time_total_s": 7059.960119485855, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7059.960119485855, "timesteps_since_restore": 665600, "iterations_since_restore": 52, "perf": {"cpu_util_percent": 41.01818181818182, "ram_util_percent": 57.649999999999984}}
+{"episode_reward_max": 293.0, "episode_reward_min": 12.0, "episode_reward_mean": 128.76, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 154.0}, "policy_reward_mean": {"ppo": 64.38}, "custom_metrics": {"sparse_reward_mean": 30.0, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 68.76, "shaped_reward_min": 12, "shaped_reward_max": 115, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.92, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 7.71, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 6.68, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 6.5, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 1.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.57, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.92, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.85, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.81, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.75, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.45, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.26, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.75, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.64, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.72, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 3.16, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.06, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.19, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.04, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.85, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.87, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 5.85, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.81, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.85, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.81, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [207.0, 153.0, 195.0, 101.0, 90.0, 210.0, 225.0, 106.0, 121.0, 75.0, 60.0, 170.0, 23.0, 130.0, 147.0, 136.0, 39.0, 133.0, 158.0, 101.0, 66.0, 116.0, 50.0, 167.0, 146.0, 293.0, 36.0, 109.0, 245.0, 121.0, 207.0, 97.0, 88.0, 12.0, 122.0, 166.0, 98.0, 63.0, 63.0, 192.0, 93.0, 178.0, 196.0, 53.0, 109.0, 156.0, 136.0, 166.0, 163.0, 175.0, 118.0, 115.0, 137.0, 115.0, 110.0, 155.0, 132.0, 79.0, 158.0, 213.0, 104.0, 153.0, 70.0, 195.0, 153.0, 85.0, 101.0, 123.0, 141.0, 138.0, 143.0, 211.0, 127.0, 20.0, 65.0, 150.0, 93.0, 153.0, 47.0, 144.0, 264.0, 148.0, 59.0, 121.0, 39.0, 204.0, 187.0, 208.0, 121.0, 124.0, 14.0, 95.0, 81.0, 107.0, 198.0, 114.0, 104.0, 152.0, 238.0, 88.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [105.0, 102.0, 68.0, 85.0, 96.0, 99.0, 53.0, 48.0, 38.0, 52.0, 106.0, 104.0, 113.0, 112.0, 53.0, 53.0, 59.0, 62.0, 47.0, 28.0, 22.0, 38.0, 90.0, 80.0, 9.0, 14.0, 70.0, 60.0, 82.0, 65.0, 70.0, 66.0, 19.0, 20.0, 64.0, 69.0, 96.0, 62.0, 62.0, 39.0, 31.0, 35.0, 70.0, 46.0, 27.0, 23.0, 82.0, 85.0, 71.0, 75.0, 154.0, 139.0, 16.0, 20.0, 56.0, 53.0, 126.0, 119.0, 62.0, 59.0, 103.0, 104.0, 45.0, 52.0, 42.0, 46.0, 3.0, 9.0, 54.0, 68.0, 88.0, 78.0, 36.0, 62.0, 38.0, 25.0, 35.0, 28.0, 94.0, 98.0, 41.0, 52.0, 86.0, 92.0, 106.0, 90.0, 24.0, 29.0, 63.0, 46.0, 63.0, 93.0, 73.0, 63.0, 85.0, 81.0, 90.0, 73.0, 71.0, 104.0, 70.0, 48.0, 56.0, 59.0, 71.0, 66.0, 56.0, 59.0, 54.0, 56.0, 73.0, 82.0, 65.0, 67.0, 44.0, 35.0, 71.0, 87.0, 108.0, 105.0, 58.0, 46.0, 88.0, 65.0, 35.0, 35.0, 107.0, 88.0, 70.0, 83.0, 48.0, 37.0, 59.0, 42.0, 63.0, 60.0, 65.0, 76.0, 57.0, 81.0, 92.0, 51.0, 115.0, 96.0, 62.0, 65.0, 9.0, 11.0, 36.0, 29.0, 65.0, 85.0, 54.0, 39.0, 88.0, 65.0, 33.0, 14.0, 65.0, 79.0, 125.0, 139.0, 77.0, 71.0, 33.0, 26.0, 65.0, 56.0, 20.0, 19.0, 91.0, 113.0, 98.0, 89.0, 110.0, 98.0, 53.0, 68.0, 57.0, 67.0, 8.0, 6.0, 56.0, 39.0, 47.0, 34.0, 46.0, 61.0, 93.0, 105.0, 55.0, 59.0, 64.0, 40.0, 73.0, 79.0, 125.0, 113.0, 40.0, 48.0]}, "sampler_perf": {"mean_env_wait_ms": 3.319165284336134, "mean_processing_ms": 0.7292810578162132, "mean_inference_ms": 3.8629630777095305}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1272000, "num_steps_sampled": 678400, "sample_time_ms": 22200.855, "load_time_ms": 37.251, "grad_time_ms": 9256.826, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 4.440892164675075e-17, "cur_lr": 0.0010000000474974513, "total_loss": -0.006719778757542372, "policy_loss": -0.008363676257431507, "vf_loss": 24.369796752929688, "vf_explained_var": 0.5794721245765686, "kl": 0.0014526437735185027, "entropy": 1.586159348487854, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 678400, "episodes_total": 1696, "training_iteration": 53, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-11-12", "timestamp": 1660248672, "time_this_iter_s": 32.775245904922485, "time_total_s": 7092.735365390778, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7092.735365390778, "timesteps_since_restore": 678400, "iterations_since_restore": 53, "perf": {"cpu_util_percent": 38.126086956521746, "ram_util_percent": 57.626086956521746}}
+{"episode_reward_max": 296.0, "episode_reward_min": 14.0, "episode_reward_mean": 132.59, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 154.0}, "policy_reward_mean": {"ppo": 66.295}, "custom_metrics": {"sparse_reward_mean": 31.4, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 69.79, "shaped_reward_min": 14, "shaped_reward_max": 110, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.04, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 7.88, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 6.88, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 6.68, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 15, "onion_drop_agent_0_mean": 1.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.93, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.95, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.92, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 13, "dish_pickup_agent_0_mean": 4.44, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.47, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.4, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.36, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.73, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.57, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.61, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 3.12, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.22, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 2.14, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.14, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.84, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.97, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 5.95, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.92, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 13, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.95, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.92, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 13, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [205.0, 61.0, 126.0, 104.0, 179.0, 296.0, 72.0, 126.0, 172.0, 17.0, 155.0, 207.0, 227.0, 101.0, 110.0, 101.0, 207.0, 77.0, 155.0, 159.0, 155.0, 158.0, 150.0, 96.0, 81.0, 125.0, 155.0, 42.0, 270.0, 155.0, 147.0, 75.0, 153.0, 85.0, 101.0, 123.0, 141.0, 138.0, 143.0, 211.0, 127.0, 20.0, 65.0, 150.0, 93.0, 153.0, 47.0, 144.0, 264.0, 148.0, 59.0, 121.0, 39.0, 204.0, 187.0, 208.0, 121.0, 124.0, 14.0, 95.0, 81.0, 107.0, 198.0, 114.0, 104.0, 152.0, 238.0, 88.0, 207.0, 153.0, 195.0, 101.0, 90.0, 210.0, 225.0, 106.0, 121.0, 75.0, 60.0, 170.0, 23.0, 130.0, 147.0, 136.0, 39.0, 133.0, 158.0, 101.0, 66.0, 116.0, 50.0, 167.0, 146.0, 293.0, 36.0, 109.0, 245.0, 121.0, 207.0, 97.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [105.0, 100.0, 28.0, 33.0, 65.0, 61.0, 51.0, 53.0, 92.0, 87.0, 143.0, 153.0, 32.0, 40.0, 69.0, 57.0, 75.0, 97.0, 3.0, 14.0, 79.0, 76.0, 107.0, 100.0, 102.0, 125.0, 37.0, 64.0, 49.0, 61.0, 42.0, 59.0, 104.0, 103.0, 43.0, 34.0, 68.0, 87.0, 77.0, 82.0, 64.0, 91.0, 74.0, 84.0, 84.0, 66.0, 50.0, 46.0, 42.0, 39.0, 69.0, 56.0, 86.0, 69.0, 23.0, 19.0, 138.0, 132.0, 65.0, 90.0, 74.0, 73.0, 33.0, 42.0, 70.0, 83.0, 48.0, 37.0, 59.0, 42.0, 63.0, 60.0, 65.0, 76.0, 57.0, 81.0, 92.0, 51.0, 115.0, 96.0, 62.0, 65.0, 9.0, 11.0, 36.0, 29.0, 65.0, 85.0, 54.0, 39.0, 88.0, 65.0, 33.0, 14.0, 65.0, 79.0, 125.0, 139.0, 77.0, 71.0, 33.0, 26.0, 65.0, 56.0, 20.0, 19.0, 91.0, 113.0, 98.0, 89.0, 110.0, 98.0, 53.0, 68.0, 57.0, 67.0, 8.0, 6.0, 56.0, 39.0, 47.0, 34.0, 46.0, 61.0, 93.0, 105.0, 55.0, 59.0, 64.0, 40.0, 73.0, 79.0, 125.0, 113.0, 40.0, 48.0, 105.0, 102.0, 68.0, 85.0, 96.0, 99.0, 53.0, 48.0, 38.0, 52.0, 106.0, 104.0, 113.0, 112.0, 53.0, 53.0, 59.0, 62.0, 47.0, 28.0, 22.0, 38.0, 90.0, 80.0, 9.0, 14.0, 70.0, 60.0, 82.0, 65.0, 70.0, 66.0, 19.0, 20.0, 64.0, 69.0, 96.0, 62.0, 62.0, 39.0, 31.0, 35.0, 70.0, 46.0, 27.0, 23.0, 82.0, 85.0, 71.0, 75.0, 154.0, 139.0, 16.0, 20.0, 56.0, 53.0, 126.0, 119.0, 62.0, 59.0, 103.0, 104.0, 45.0, 52.0]}, "sampler_perf": {"mean_env_wait_ms": 3.2660665746708606, "mean_processing_ms": 0.718736935276724, "mean_inference_ms": 3.8108664134221066}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1296000, "num_steps_sampled": 691200, "sample_time_ms": 22619.672, "load_time_ms": 37.409, "grad_time_ms": 9296.582, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 2.2204460823375376e-17, "cur_lr": 0.0010000000474974513, "total_loss": -0.006839328911155462, "policy_loss": -0.008672266267240047, "vf_loss": 26.157081604003906, "vf_explained_var": 0.5799071192741394, "kl": 0.0013235282385721803, "entropy": 1.5655454397201538, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 691200, "episodes_total": 1728, "training_iteration": 54, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-11-44", "timestamp": 1660248704, "time_this_iter_s": 31.93130087852478, "time_total_s": 7124.666666269302, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7124.666666269302, "timesteps_since_restore": 691200, "iterations_since_restore": 54, "perf": {"cpu_util_percent": 38.36222222222223, "ram_util_percent": 57.54666666666667}}
+{"episode_reward_max": 301.0, "episode_reward_min": 17.0, "episode_reward_mean": 138.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 155.0}, "policy_reward_mean": {"ppo": 69.05}, "custom_metrics": {"sparse_reward_mean": 33.6, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 70.9, "shaped_reward_min": 17, "shaped_reward_max": 113, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.15, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 8.01, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 6.92, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 6.88, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 15, "onion_drop_agent_0_mean": 1.72, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.45, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.97, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.92, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 6.05, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 12, "potting_onion_agent_1_mean": 6.19, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 13, "dish_pickup_agent_0_mean": 4.59, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.72, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.4, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.26, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.79, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.83, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.58, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.66, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 3.16, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.24, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 2.13, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.25, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.93, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 0.89, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 6.05, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 12, "optimal_onion_potting_agent_1_mean": 6.19, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 13, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.05, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 12, "viable_onion_potting_agent_1_mean": 6.19, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 13, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [144.0, 88.0, 37.0, 77.0, 106.0, 117.0, 253.0, 98.0, 179.0, 104.0, 139.0, 152.0, 64.0, 74.0, 273.0, 67.0, 150.0, 163.0, 78.0, 68.0, 141.0, 187.0, 39.0, 158.0, 202.0, 301.0, 153.0, 247.0, 199.0, 163.0, 115.0, 193.0, 104.0, 152.0, 238.0, 88.0, 207.0, 153.0, 195.0, 101.0, 90.0, 210.0, 225.0, 106.0, 121.0, 75.0, 60.0, 170.0, 23.0, 130.0, 147.0, 136.0, 39.0, 133.0, 158.0, 101.0, 66.0, 116.0, 50.0, 167.0, 146.0, 293.0, 36.0, 109.0, 245.0, 121.0, 207.0, 97.0, 205.0, 61.0, 126.0, 104.0, 179.0, 296.0, 72.0, 126.0, 172.0, 17.0, 155.0, 207.0, 227.0, 101.0, 110.0, 101.0, 207.0, 77.0, 155.0, 159.0, 155.0, 158.0, 150.0, 96.0, 81.0, 125.0, 155.0, 42.0, 270.0, 155.0, 147.0, 75.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [65.0, 79.0, 41.0, 47.0, 14.0, 23.0, 26.0, 51.0, 61.0, 45.0, 55.0, 62.0, 125.0, 128.0, 53.0, 45.0, 81.0, 98.0, 50.0, 54.0, 68.0, 71.0, 80.0, 72.0, 33.0, 31.0, 29.0, 45.0, 142.0, 131.0, 39.0, 28.0, 62.0, 88.0, 88.0, 75.0, 27.0, 51.0, 39.0, 29.0, 71.0, 70.0, 82.0, 105.0, 20.0, 19.0, 66.0, 92.0, 97.0, 105.0, 146.0, 155.0, 71.0, 82.0, 128.0, 119.0, 113.0, 86.0, 94.0, 69.0, 62.0, 53.0, 107.0, 86.0, 64.0, 40.0, 73.0, 79.0, 125.0, 113.0, 40.0, 48.0, 105.0, 102.0, 68.0, 85.0, 96.0, 99.0, 53.0, 48.0, 38.0, 52.0, 106.0, 104.0, 113.0, 112.0, 53.0, 53.0, 59.0, 62.0, 47.0, 28.0, 22.0, 38.0, 90.0, 80.0, 9.0, 14.0, 70.0, 60.0, 82.0, 65.0, 70.0, 66.0, 19.0, 20.0, 64.0, 69.0, 96.0, 62.0, 62.0, 39.0, 31.0, 35.0, 70.0, 46.0, 27.0, 23.0, 82.0, 85.0, 71.0, 75.0, 154.0, 139.0, 16.0, 20.0, 56.0, 53.0, 126.0, 119.0, 62.0, 59.0, 103.0, 104.0, 45.0, 52.0, 105.0, 100.0, 28.0, 33.0, 65.0, 61.0, 51.0, 53.0, 92.0, 87.0, 143.0, 153.0, 32.0, 40.0, 69.0, 57.0, 75.0, 97.0, 3.0, 14.0, 79.0, 76.0, 107.0, 100.0, 102.0, 125.0, 37.0, 64.0, 49.0, 61.0, 42.0, 59.0, 104.0, 103.0, 43.0, 34.0, 68.0, 87.0, 77.0, 82.0, 64.0, 91.0, 74.0, 84.0, 84.0, 66.0, 50.0, 46.0, 42.0, 39.0, 69.0, 56.0, 86.0, 69.0, 23.0, 19.0, 138.0, 132.0, 65.0, 90.0, 74.0, 73.0, 33.0, 42.0]}, "sampler_perf": {"mean_env_wait_ms": 3.214861984478974, "mean_processing_ms": 0.7085517018126862, "mean_inference_ms": 3.760458478559153}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1320000, "num_steps_sampled": 704000, "sample_time_ms": 22712.594, "load_time_ms": 37.614, "grad_time_ms": 9158.532, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 1.1102230411687688e-17, "cur_lr": 0.0010000000474974513, "total_loss": -0.007639638613909483, "policy_loss": -0.00974108837544918, "vf_loss": 28.871795654296875, "vf_explained_var": 0.5772756934165955, "kl": 0.0015572212869301438, "entropy": 1.5714462995529175, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 704000, "episodes_total": 1760, "training_iteration": 55, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-12-13", "timestamp": 1660248733, "time_this_iter_s": 29.164530992507935, "time_total_s": 7153.83119726181, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7153.83119726181, "timesteps_since_restore": 704000, "iterations_since_restore": 55, "perf": {"cpu_util_percent": 37.4609756097561, "ram_util_percent": 57.50487804878048}}
+{"episode_reward_max": 301.0, "episode_reward_min": 12.0, "episode_reward_mean": 149.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 161.0}, "policy_reward_mean": {"ppo": 74.835}, "custom_metrics": {"sparse_reward_mean": 37.8, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 74.07, "shaped_reward_min": 12, "shaped_reward_max": 118, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.1, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 8.78, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 6.98, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 7.57, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 5.98, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 12, "potting_onion_agent_1_mean": 6.87, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.43, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.52, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.59, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.1, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.33, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 2.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.34, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.8, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 0.89, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 5.98, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 12, "optimal_onion_potting_agent_1_mean": 6.87, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.98, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 12, "viable_onion_potting_agent_1_mean": 6.87, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [203.0, 136.0, 112.0, 259.0, 164.0, 150.0, 167.0, 93.0, 178.0, 227.0, 236.0, 250.0, 218.0, 169.0, 145.0, 159.0, 41.0, 12.0, 144.0, 131.0, 298.0, 47.0, 201.0, 213.0, 176.0, 155.0, 238.0, 115.0, 219.0, 82.0, 216.0, 148.0, 245.0, 121.0, 207.0, 97.0, 205.0, 61.0, 126.0, 104.0, 179.0, 296.0, 72.0, 126.0, 172.0, 17.0, 155.0, 207.0, 227.0, 101.0, 110.0, 101.0, 207.0, 77.0, 155.0, 159.0, 155.0, 158.0, 150.0, 96.0, 81.0, 125.0, 155.0, 42.0, 270.0, 155.0, 147.0, 75.0, 144.0, 88.0, 37.0, 77.0, 106.0, 117.0, 253.0, 98.0, 179.0, 104.0, 139.0, 152.0, 64.0, 74.0, 273.0, 67.0, 150.0, 163.0, 78.0, 68.0, 141.0, 187.0, 39.0, 158.0, 202.0, 301.0, 153.0, 247.0, 199.0, 163.0, 115.0, 193.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [103.0, 100.0, 70.0, 66.0, 69.0, 43.0, 134.0, 125.0, 81.0, 83.0, 62.0, 88.0, 71.0, 96.0, 39.0, 54.0, 89.0, 89.0, 104.0, 123.0, 124.0, 112.0, 119.0, 131.0, 111.0, 107.0, 73.0, 96.0, 74.0, 71.0, 60.0, 99.0, 19.0, 22.0, 6.0, 6.0, 77.0, 67.0, 66.0, 65.0, 161.0, 137.0, 9.0, 38.0, 92.0, 109.0, 106.0, 107.0, 79.0, 97.0, 74.0, 81.0, 107.0, 131.0, 56.0, 59.0, 106.0, 113.0, 40.0, 42.0, 111.0, 105.0, 55.0, 93.0, 126.0, 119.0, 62.0, 59.0, 103.0, 104.0, 45.0, 52.0, 105.0, 100.0, 28.0, 33.0, 65.0, 61.0, 51.0, 53.0, 92.0, 87.0, 143.0, 153.0, 32.0, 40.0, 69.0, 57.0, 75.0, 97.0, 3.0, 14.0, 79.0, 76.0, 107.0, 100.0, 102.0, 125.0, 37.0, 64.0, 49.0, 61.0, 42.0, 59.0, 104.0, 103.0, 43.0, 34.0, 68.0, 87.0, 77.0, 82.0, 64.0, 91.0, 74.0, 84.0, 84.0, 66.0, 50.0, 46.0, 42.0, 39.0, 69.0, 56.0, 86.0, 69.0, 23.0, 19.0, 138.0, 132.0, 65.0, 90.0, 74.0, 73.0, 33.0, 42.0, 65.0, 79.0, 41.0, 47.0, 14.0, 23.0, 26.0, 51.0, 61.0, 45.0, 55.0, 62.0, 125.0, 128.0, 53.0, 45.0, 81.0, 98.0, 50.0, 54.0, 68.0, 71.0, 80.0, 72.0, 33.0, 31.0, 29.0, 45.0, 142.0, 131.0, 39.0, 28.0, 62.0, 88.0, 88.0, 75.0, 27.0, 51.0, 39.0, 29.0, 71.0, 70.0, 82.0, 105.0, 20.0, 19.0, 66.0, 92.0, 97.0, 105.0, 146.0, 155.0, 71.0, 82.0, 128.0, 119.0, 113.0, 86.0, 94.0, 69.0, 62.0, 53.0, 107.0, 86.0]}, "sampler_perf": {"mean_env_wait_ms": 3.16541739890218, "mean_processing_ms": 0.6987189186943766, "mean_inference_ms": 3.711599248384498}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1344000, "num_steps_sampled": 716800, "sample_time_ms": 22951.468, "load_time_ms": 37.343, "grad_time_ms": 9065.669, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 5.551115205843844e-18, "cur_lr": 0.0010000000474974513, "total_loss": -0.004821139387786388, "policy_loss": -0.006896324921399355, "vf_loss": 28.4981746673584, "vf_explained_var": 0.6428199410438538, "kl": 0.0015486044576391578, "entropy": 1.5492569208145142, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 716800, "episodes_total": 1792, "training_iteration": 56, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-12-44", "timestamp": 1660248764, "time_this_iter_s": 30.594375133514404, "time_total_s": 7184.425572395325, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7184.425572395325, "timesteps_since_restore": 716800, "iterations_since_restore": 56, "perf": {"cpu_util_percent": 38.95348837209303, "ram_util_percent": 57.599999999999994}}
+{"episode_reward_max": 305.0, "episode_reward_min": 12.0, "episode_reward_mean": 157.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 6.0}, "policy_reward_max": {"ppo": 161.0}, "policy_reward_mean": {"ppo": 78.625}, "custom_metrics": {"sparse_reward_mean": 41.6, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 74.05, "shaped_reward_min": 12, "shaped_reward_max": 118, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.82, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 9.13, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 6.75, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 8.03, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.96, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 5.8, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 12, "potting_onion_agent_1_mean": 7.28, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.56, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.7, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.49, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.38, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.75, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.51, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.56, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.06, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.3, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.2, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.25, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.78, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 0.91, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 5.8, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 12, "optimal_onion_potting_agent_1_mean": 7.28, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.8, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 12, "viable_onion_potting_agent_1_mean": 7.28, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [201.0, 66.0, 180.0, 121.0, 247.0, 190.0, 66.0, 139.0, 139.0, 205.0, 215.0, 267.0, 62.0, 213.0, 155.0, 181.0, 58.0, 174.0, 305.0, 207.0, 155.0, 178.0, 107.0, 166.0, 213.0, 99.0, 152.0, 246.0, 167.0, 110.0, 155.0, 108.0, 270.0, 155.0, 147.0, 75.0, 144.0, 88.0, 37.0, 77.0, 106.0, 117.0, 253.0, 98.0, 179.0, 104.0, 139.0, 152.0, 64.0, 74.0, 273.0, 67.0, 150.0, 163.0, 78.0, 68.0, 141.0, 187.0, 39.0, 158.0, 202.0, 301.0, 153.0, 247.0, 199.0, 163.0, 115.0, 193.0, 203.0, 136.0, 112.0, 259.0, 164.0, 150.0, 167.0, 93.0, 178.0, 227.0, 236.0, 250.0, 218.0, 169.0, 145.0, 159.0, 41.0, 12.0, 144.0, 131.0, 298.0, 47.0, 201.0, 213.0, 176.0, 155.0, 238.0, 115.0, 219.0, 82.0, 216.0, 148.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [95.0, 106.0, 37.0, 29.0, 94.0, 86.0, 62.0, 59.0, 113.0, 134.0, 76.0, 114.0, 26.0, 40.0, 57.0, 82.0, 57.0, 82.0, 99.0, 106.0, 100.0, 115.0, 121.0, 146.0, 25.0, 37.0, 108.0, 105.0, 76.0, 79.0, 95.0, 86.0, 22.0, 36.0, 85.0, 89.0, 148.0, 157.0, 103.0, 104.0, 71.0, 84.0, 95.0, 83.0, 56.0, 51.0, 85.0, 81.0, 111.0, 102.0, 45.0, 54.0, 70.0, 82.0, 105.0, 141.0, 76.0, 91.0, 65.0, 45.0, 68.0, 87.0, 64.0, 44.0, 138.0, 132.0, 65.0, 90.0, 74.0, 73.0, 33.0, 42.0, 65.0, 79.0, 41.0, 47.0, 14.0, 23.0, 26.0, 51.0, 61.0, 45.0, 55.0, 62.0, 125.0, 128.0, 53.0, 45.0, 81.0, 98.0, 50.0, 54.0, 68.0, 71.0, 80.0, 72.0, 33.0, 31.0, 29.0, 45.0, 142.0, 131.0, 39.0, 28.0, 62.0, 88.0, 88.0, 75.0, 27.0, 51.0, 39.0, 29.0, 71.0, 70.0, 82.0, 105.0, 20.0, 19.0, 66.0, 92.0, 97.0, 105.0, 146.0, 155.0, 71.0, 82.0, 128.0, 119.0, 113.0, 86.0, 94.0, 69.0, 62.0, 53.0, 107.0, 86.0, 103.0, 100.0, 70.0, 66.0, 69.0, 43.0, 134.0, 125.0, 81.0, 83.0, 62.0, 88.0, 71.0, 96.0, 39.0, 54.0, 89.0, 89.0, 104.0, 123.0, 124.0, 112.0, 119.0, 131.0, 111.0, 107.0, 73.0, 96.0, 74.0, 71.0, 60.0, 99.0, 19.0, 22.0, 6.0, 6.0, 77.0, 67.0, 66.0, 65.0, 161.0, 137.0, 9.0, 38.0, 92.0, 109.0, 106.0, 107.0, 79.0, 97.0, 74.0, 81.0, 107.0, 131.0, 56.0, 59.0, 106.0, 113.0, 40.0, 42.0, 111.0, 105.0, 55.0, 93.0]}, "sampler_perf": {"mean_env_wait_ms": 3.1176592492930246, "mean_processing_ms": 0.6892173986565695, "mean_inference_ms": 3.663860513787198}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1368000, "num_steps_sampled": 729600, "sample_time_ms": 22935.784, "load_time_ms": 37.435, "grad_time_ms": 8913.484, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 2.775557602921922e-18, "cur_lr": 0.0010000000474974513, "total_loss": -0.004853060003370047, "policy_loss": -0.0074228327721357346, "vf_loss": 33.384822845458984, "vf_explained_var": 0.6208257079124451, "kl": 0.0016279626870527864, "entropy": 1.5374183654785156, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 729600, "episodes_total": 1824, "training_iteration": 57, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-13-15", "timestamp": 1660248795, "time_this_iter_s": 30.907179594039917, "time_total_s": 7215.332751989365, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7215.332751989365, "timesteps_since_restore": 729600, "iterations_since_restore": 57, "perf": {"cpu_util_percent": 39.13636363636363, "ram_util_percent": 57.62499999999999}}
+{"episode_reward_max": 305.0, "episode_reward_min": 12.0, "episode_reward_mean": 166.72, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 6.0}, "policy_reward_max": {"ppo": 161.0}, "policy_reward_mean": {"ppo": 83.36}, "custom_metrics": {"sparse_reward_mean": 44.8, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 77.12, "shaped_reward_min": 12, "shaped_reward_max": 118, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.95, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 9.49, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 7.04, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.32, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.51, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.99, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.99, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 6.02, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 7.65, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.56, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.61, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.54, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.48, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.63, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.71, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.54, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.52, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.17, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.16, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.43, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.24, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.65, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.75, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 6.02, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 7.65, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.02, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 7.65, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [120.0, 115.0, 175.0, 270.0, 118.0, 90.0, 239.0, 267.0, 206.0, 239.0, 256.0, 67.0, 87.0, 93.0, 43.0, 230.0, 164.0, 193.0, 148.0, 222.0, 158.0, 138.0, 144.0, 171.0, 181.0, 241.0, 239.0, 93.0, 167.0, 152.0, 164.0, 263.0, 199.0, 163.0, 115.0, 193.0, 203.0, 136.0, 112.0, 259.0, 164.0, 150.0, 167.0, 93.0, 178.0, 227.0, 236.0, 250.0, 218.0, 169.0, 145.0, 159.0, 41.0, 12.0, 144.0, 131.0, 298.0, 47.0, 201.0, 213.0, 176.0, 155.0, 238.0, 115.0, 219.0, 82.0, 216.0, 148.0, 201.0, 66.0, 180.0, 121.0, 247.0, 190.0, 66.0, 139.0, 139.0, 205.0, 215.0, 267.0, 62.0, 213.0, 155.0, 181.0, 58.0, 174.0, 305.0, 207.0, 155.0, 178.0, 107.0, 166.0, 213.0, 99.0, 152.0, 246.0, 167.0, 110.0, 155.0, 108.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [65.0, 55.0, 50.0, 65.0, 83.0, 92.0, 134.0, 136.0, 59.0, 59.0, 44.0, 46.0, 107.0, 132.0, 129.0, 138.0, 93.0, 113.0, 119.0, 120.0, 122.0, 134.0, 36.0, 31.0, 36.0, 51.0, 45.0, 48.0, 25.0, 18.0, 113.0, 117.0, 72.0, 92.0, 90.0, 103.0, 82.0, 66.0, 123.0, 99.0, 82.0, 76.0, 52.0, 86.0, 72.0, 72.0, 84.0, 87.0, 93.0, 88.0, 130.0, 111.0, 138.0, 101.0, 45.0, 48.0, 79.0, 88.0, 66.0, 86.0, 79.0, 85.0, 138.0, 125.0, 113.0, 86.0, 94.0, 69.0, 62.0, 53.0, 107.0, 86.0, 103.0, 100.0, 70.0, 66.0, 69.0, 43.0, 134.0, 125.0, 81.0, 83.0, 62.0, 88.0, 71.0, 96.0, 39.0, 54.0, 89.0, 89.0, 104.0, 123.0, 124.0, 112.0, 119.0, 131.0, 111.0, 107.0, 73.0, 96.0, 74.0, 71.0, 60.0, 99.0, 19.0, 22.0, 6.0, 6.0, 77.0, 67.0, 66.0, 65.0, 161.0, 137.0, 9.0, 38.0, 92.0, 109.0, 106.0, 107.0, 79.0, 97.0, 74.0, 81.0, 107.0, 131.0, 56.0, 59.0, 106.0, 113.0, 40.0, 42.0, 111.0, 105.0, 55.0, 93.0, 95.0, 106.0, 37.0, 29.0, 94.0, 86.0, 62.0, 59.0, 113.0, 134.0, 76.0, 114.0, 26.0, 40.0, 57.0, 82.0, 57.0, 82.0, 99.0, 106.0, 100.0, 115.0, 121.0, 146.0, 25.0, 37.0, 108.0, 105.0, 76.0, 79.0, 95.0, 86.0, 22.0, 36.0, 85.0, 89.0, 148.0, 157.0, 103.0, 104.0, 71.0, 84.0, 95.0, 83.0, 56.0, 51.0, 85.0, 81.0, 111.0, 102.0, 45.0, 54.0, 70.0, 82.0, 105.0, 141.0, 76.0, 91.0, 65.0, 45.0, 68.0, 87.0, 64.0, 44.0]}, "sampler_perf": {"mean_env_wait_ms": 3.0715992457802104, "mean_processing_ms": 0.6800724399450163, "mean_inference_ms": 3.6184030024872835}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1392000, "num_steps_sampled": 742400, "sample_time_ms": 22627.432, "load_time_ms": 37.419, "grad_time_ms": 8703.731, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 1.387778801460961e-18, "cur_lr": 0.0010000000474974513, "total_loss": -0.004274281207472086, "policy_loss": -0.006773001980036497, "vf_loss": 32.668846130371094, "vf_explained_var": 0.6147891879081726, "kl": 0.0016452163690701127, "entropy": 1.5363364219665527, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 742400, "episodes_total": 1856, "training_iteration": 58, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-13-46", "timestamp": 1660248826, "time_this_iter_s": 31.19256901741028, "time_total_s": 7246.525321006775, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7246.525321006775, "timesteps_since_restore": 742400, "iterations_since_restore": 58, "perf": {"cpu_util_percent": 37.85, "ram_util_percent": 57.636363636363626}}
+{"episode_reward_max": 347.0, "episode_reward_min": 9.0, "episode_reward_mean": 173.36, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 177.0}, "policy_reward_mean": {"ppo": 86.68}, "custom_metrics": {"sparse_reward_mean": 48.4, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 76.56, "shaped_reward_min": 9, "shaped_reward_max": 110, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.08, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 9.37, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 7.26, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.3, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.49, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.82, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.89, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 6.31, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 7.56, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.77, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.59, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.36, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 1.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.85, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.71, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.56, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.55, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.05, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 3.11, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.34, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.34, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.64, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.64, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 6.31, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 7.56, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.31, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 7.56, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [195.0, 147.0, 179.0, 305.0, 302.0, 199.0, 156.0, 201.0, 202.0, 53.0, 144.0, 131.0, 215.0, 347.0, 204.0, 241.0, 209.0, 112.0, 85.0, 238.0, 207.0, 158.0, 193.0, 185.0, 244.0, 185.0, 194.0, 193.0, 170.0, 9.0, 253.0, 115.0, 219.0, 82.0, 216.0, 148.0, 201.0, 66.0, 180.0, 121.0, 247.0, 190.0, 66.0, 139.0, 139.0, 205.0, 215.0, 267.0, 62.0, 213.0, 155.0, 181.0, 58.0, 174.0, 305.0, 207.0, 155.0, 178.0, 107.0, 166.0, 213.0, 99.0, 152.0, 246.0, 167.0, 110.0, 155.0, 108.0, 120.0, 115.0, 175.0, 270.0, 118.0, 90.0, 239.0, 267.0, 206.0, 239.0, 256.0, 67.0, 87.0, 93.0, 43.0, 230.0, 164.0, 193.0, 148.0, 222.0, 158.0, 138.0, 144.0, 171.0, 181.0, 241.0, 239.0, 93.0, 167.0, 152.0, 164.0, 263.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [87.0, 108.0, 90.0, 57.0, 94.0, 85.0, 140.0, 165.0, 160.0, 142.0, 93.0, 106.0, 81.0, 75.0, 93.0, 108.0, 97.0, 105.0, 25.0, 28.0, 69.0, 75.0, 63.0, 68.0, 127.0, 88.0, 170.0, 177.0, 97.0, 107.0, 122.0, 119.0, 103.0, 106.0, 58.0, 54.0, 40.0, 45.0, 110.0, 128.0, 105.0, 102.0, 66.0, 92.0, 83.0, 110.0, 94.0, 91.0, 119.0, 125.0, 104.0, 81.0, 102.0, 92.0, 91.0, 102.0, 83.0, 87.0, 6.0, 3.0, 123.0, 130.0, 64.0, 51.0, 106.0, 113.0, 40.0, 42.0, 111.0, 105.0, 55.0, 93.0, 95.0, 106.0, 37.0, 29.0, 94.0, 86.0, 62.0, 59.0, 113.0, 134.0, 76.0, 114.0, 26.0, 40.0, 57.0, 82.0, 57.0, 82.0, 99.0, 106.0, 100.0, 115.0, 121.0, 146.0, 25.0, 37.0, 108.0, 105.0, 76.0, 79.0, 95.0, 86.0, 22.0, 36.0, 85.0, 89.0, 148.0, 157.0, 103.0, 104.0, 71.0, 84.0, 95.0, 83.0, 56.0, 51.0, 85.0, 81.0, 111.0, 102.0, 45.0, 54.0, 70.0, 82.0, 105.0, 141.0, 76.0, 91.0, 65.0, 45.0, 68.0, 87.0, 64.0, 44.0, 65.0, 55.0, 50.0, 65.0, 83.0, 92.0, 134.0, 136.0, 59.0, 59.0, 44.0, 46.0, 107.0, 132.0, 129.0, 138.0, 93.0, 113.0, 119.0, 120.0, 122.0, 134.0, 36.0, 31.0, 36.0, 51.0, 45.0, 48.0, 25.0, 18.0, 113.0, 117.0, 72.0, 92.0, 90.0, 103.0, 82.0, 66.0, 123.0, 99.0, 82.0, 76.0, 52.0, 86.0, 72.0, 72.0, 84.0, 87.0, 93.0, 88.0, 130.0, 111.0, 138.0, 101.0, 45.0, 48.0, 79.0, 88.0, 66.0, 86.0, 79.0, 85.0, 138.0, 125.0]}, "sampler_perf": {"mean_env_wait_ms": 3.027099137658147, "mean_processing_ms": 0.6712374159631969, "mean_inference_ms": 3.574879048855646}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1416000, "num_steps_sampled": 755200, "sample_time_ms": 22439.192, "load_time_ms": 37.393, "grad_time_ms": 8622.999, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 6.938894007304805e-19, "cur_lr": 0.0010000000474974513, "total_loss": -0.005061946343630552, "policy_loss": -0.0077269431203603745, "vf_loss": 34.30827713012695, "vf_explained_var": 0.6426100730895996, "kl": 0.0014075502986088395, "entropy": 1.5316654443740845, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 755200, "episodes_total": 1888, "training_iteration": 59, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-14-19", "timestamp": 1660248859, "time_this_iter_s": 32.82003712654114, "time_total_s": 7279.345358133316, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7279.345358133316, "timesteps_since_restore": 755200, "iterations_since_restore": 59, "perf": {"cpu_util_percent": 38.134782608695645, "ram_util_percent": 57.654347826086926}}
+{"episode_reward_max": 347.0, "episode_reward_min": 9.0, "episode_reward_mean": 183.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 177.0}, "policy_reward_mean": {"ppo": 91.965}, "custom_metrics": {"sparse_reward_mean": 52.8, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 78.33, "shaped_reward_min": 9, "shaped_reward_max": 124, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.49, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 9.58, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 7.62, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.41, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.48, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.65, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.88, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.99, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 6.64, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 7.61, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 14, "dish_pickup_agent_0_mean": 4.84, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.61, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.42, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.76, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.59, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.56, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.14, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.85, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.47, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.33, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.56, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.4, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 6.64, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 7.61, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 14, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.64, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 7.61, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 14, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [167.0, 264.0, 176.0, 241.0, 333.0, 82.0, 213.0, 145.0, 167.0, 188.0, 259.0, 307.0, 74.0, 213.0, 299.0, 284.0, 193.0, 136.0, 161.0, 188.0, 238.0, 79.0, 193.0, 241.0, 133.0, 172.0, 238.0, 168.0, 256.0, 213.0, 196.0, 212.0, 167.0, 110.0, 155.0, 108.0, 120.0, 115.0, 175.0, 270.0, 118.0, 90.0, 239.0, 267.0, 206.0, 239.0, 256.0, 67.0, 87.0, 93.0, 43.0, 230.0, 164.0, 193.0, 148.0, 222.0, 158.0, 138.0, 144.0, 171.0, 181.0, 241.0, 239.0, 93.0, 167.0, 152.0, 164.0, 263.0, 195.0, 147.0, 179.0, 305.0, 302.0, 199.0, 156.0, 201.0, 202.0, 53.0, 144.0, 131.0, 215.0, 347.0, 204.0, 241.0, 209.0, 112.0, 85.0, 238.0, 207.0, 158.0, 193.0, 185.0, 244.0, 185.0, 194.0, 193.0, 170.0, 9.0, 253.0, 115.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [75.0, 92.0, 131.0, 133.0, 79.0, 97.0, 128.0, 113.0, 160.0, 173.0, 35.0, 47.0, 111.0, 102.0, 74.0, 71.0, 78.0, 89.0, 83.0, 105.0, 133.0, 126.0, 161.0, 146.0, 37.0, 37.0, 94.0, 119.0, 155.0, 144.0, 142.0, 142.0, 86.0, 107.0, 65.0, 71.0, 79.0, 82.0, 91.0, 97.0, 129.0, 109.0, 34.0, 45.0, 103.0, 90.0, 128.0, 113.0, 73.0, 60.0, 88.0, 84.0, 120.0, 118.0, 86.0, 82.0, 128.0, 128.0, 113.0, 100.0, 108.0, 88.0, 95.0, 117.0, 76.0, 91.0, 65.0, 45.0, 68.0, 87.0, 64.0, 44.0, 65.0, 55.0, 50.0, 65.0, 83.0, 92.0, 134.0, 136.0, 59.0, 59.0, 44.0, 46.0, 107.0, 132.0, 129.0, 138.0, 93.0, 113.0, 119.0, 120.0, 122.0, 134.0, 36.0, 31.0, 36.0, 51.0, 45.0, 48.0, 25.0, 18.0, 113.0, 117.0, 72.0, 92.0, 90.0, 103.0, 82.0, 66.0, 123.0, 99.0, 82.0, 76.0, 52.0, 86.0, 72.0, 72.0, 84.0, 87.0, 93.0, 88.0, 130.0, 111.0, 138.0, 101.0, 45.0, 48.0, 79.0, 88.0, 66.0, 86.0, 79.0, 85.0, 138.0, 125.0, 87.0, 108.0, 90.0, 57.0, 94.0, 85.0, 140.0, 165.0, 160.0, 142.0, 93.0, 106.0, 81.0, 75.0, 93.0, 108.0, 97.0, 105.0, 25.0, 28.0, 69.0, 75.0, 63.0, 68.0, 127.0, 88.0, 170.0, 177.0, 97.0, 107.0, 122.0, 119.0, 103.0, 106.0, 58.0, 54.0, 40.0, 45.0, 110.0, 128.0, 105.0, 102.0, 66.0, 92.0, 83.0, 110.0, 94.0, 91.0, 119.0, 125.0, 104.0, 81.0, 102.0, 92.0, 91.0, 102.0, 83.0, 87.0, 6.0, 3.0, 123.0, 130.0, 64.0, 51.0]}, "sampler_perf": {"mean_env_wait_ms": 2.9840674048083304, "mean_processing_ms": 0.6627022470508764, "mean_inference_ms": 3.532781736604636}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1440000, "num_steps_sampled": 768000, "sample_time_ms": 22393.025, "load_time_ms": 37.431, "grad_time_ms": 8732.011, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.4694470036524025e-19, "cur_lr": 0.0010000000474974513, "total_loss": -0.0032793853897601366, "policy_loss": -0.0061605386435985565, "vf_loss": 36.42392349243164, "vf_explained_var": 0.6542922854423523, "kl": 0.0015746770659461617, "entropy": 1.5224775075912476, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 768000, "episodes_total": 1920, "training_iteration": 60, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-14-51", "timestamp": 1660248891, "time_this_iter_s": 31.919984817504883, "time_total_s": 7311.265342950821, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7311.265342950821, "timesteps_since_restore": 768000, "iterations_since_restore": 60, "perf": {"cpu_util_percent": 38.15, "ram_util_percent": 57.70217391304345}}
+{"episode_reward_max": 347.0, "episode_reward_min": 9.0, "episode_reward_mean": 199.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 185.0}, "policy_reward_mean": {"ppo": 99.975}, "custom_metrics": {"sparse_reward_mean": 59.6, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 80.75, "shaped_reward_min": 9, "shaped_reward_max": 124, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.59, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 9.82, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 7.86, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.95, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.32, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.42, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.68, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.78, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 6.94, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 8.02, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 5.14, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.61, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.24, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.61, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.59, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.19, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.78, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 2.59, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.44, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.48, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.28, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 6.94, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 8.02, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.94, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 8.02, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [255.0, 247.0, 262.0, 209.0, 147.0, 316.0, 267.0, 155.0, 275.0, 139.0, 256.0, 341.0, 209.0, 204.0, 250.0, 164.0, 86.0, 301.0, 247.0, 249.0, 244.0, 136.0, 123.0, 185.0, 167.0, 99.0, 153.0, 264.0, 210.0, 298.0, 150.0, 241.0, 167.0, 152.0, 164.0, 263.0, 195.0, 147.0, 179.0, 305.0, 302.0, 199.0, 156.0, 201.0, 202.0, 53.0, 144.0, 131.0, 215.0, 347.0, 204.0, 241.0, 209.0, 112.0, 85.0, 238.0, 207.0, 158.0, 193.0, 185.0, 244.0, 185.0, 194.0, 193.0, 170.0, 9.0, 253.0, 115.0, 167.0, 264.0, 176.0, 241.0, 333.0, 82.0, 213.0, 145.0, 167.0, 188.0, 259.0, 307.0, 74.0, 213.0, 299.0, 284.0, 193.0, 136.0, 161.0, 188.0, 238.0, 79.0, 193.0, 241.0, 133.0, 172.0, 238.0, 168.0, 256.0, 213.0, 196.0, 212.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [115.0, 140.0, 119.0, 128.0, 140.0, 122.0, 100.0, 109.0, 68.0, 79.0, 153.0, 163.0, 127.0, 140.0, 69.0, 86.0, 150.0, 125.0, 78.0, 61.0, 138.0, 118.0, 185.0, 156.0, 118.0, 91.0, 101.0, 103.0, 120.0, 130.0, 91.0, 73.0, 53.0, 33.0, 141.0, 160.0, 135.0, 112.0, 116.0, 133.0, 108.0, 136.0, 63.0, 73.0, 66.0, 57.0, 97.0, 88.0, 87.0, 80.0, 36.0, 63.0, 84.0, 69.0, 127.0, 137.0, 114.0, 96.0, 136.0, 162.0, 76.0, 74.0, 128.0, 113.0, 79.0, 88.0, 66.0, 86.0, 79.0, 85.0, 138.0, 125.0, 87.0, 108.0, 90.0, 57.0, 94.0, 85.0, 140.0, 165.0, 160.0, 142.0, 93.0, 106.0, 81.0, 75.0, 93.0, 108.0, 97.0, 105.0, 25.0, 28.0, 69.0, 75.0, 63.0, 68.0, 127.0, 88.0, 170.0, 177.0, 97.0, 107.0, 122.0, 119.0, 103.0, 106.0, 58.0, 54.0, 40.0, 45.0, 110.0, 128.0, 105.0, 102.0, 66.0, 92.0, 83.0, 110.0, 94.0, 91.0, 119.0, 125.0, 104.0, 81.0, 102.0, 92.0, 91.0, 102.0, 83.0, 87.0, 6.0, 3.0, 123.0, 130.0, 64.0, 51.0, 75.0, 92.0, 131.0, 133.0, 79.0, 97.0, 128.0, 113.0, 160.0, 173.0, 35.0, 47.0, 111.0, 102.0, 74.0, 71.0, 78.0, 89.0, 83.0, 105.0, 133.0, 126.0, 161.0, 146.0, 37.0, 37.0, 94.0, 119.0, 155.0, 144.0, 142.0, 142.0, 86.0, 107.0, 65.0, 71.0, 79.0, 82.0, 91.0, 97.0, 129.0, 109.0, 34.0, 45.0, 103.0, 90.0, 128.0, 113.0, 73.0, 60.0, 88.0, 84.0, 120.0, 118.0, 86.0, 82.0, 128.0, 128.0, 113.0, 100.0, 108.0, 88.0, 95.0, 117.0]}, "sampler_perf": {"mean_env_wait_ms": 2.942382408677298, "mean_processing_ms": 0.6544447985694, "mean_inference_ms": 3.4920936282643287}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1464000, "num_steps_sampled": 780800, "sample_time_ms": 22508.979, "load_time_ms": 37.523, "grad_time_ms": 8924.852, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.7347235018262012e-19, "cur_lr": 0.0010000000474974513, "total_loss": -0.0013272188371047378, "policy_loss": -0.004394210409373045, "vf_loss": 38.1645622253418, "vf_explained_var": 0.6507807374000549, "kl": 0.002042042789980769, "entropy": 1.4989361763000488, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 780800, "episodes_total": 1952, "training_iteration": 61, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-15-24", "timestamp": 1660248924, "time_this_iter_s": 33.02385997772217, "time_total_s": 7344.289202928543, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7344.289202928543, "timesteps_since_restore": 780800, "iterations_since_restore": 61, "perf": {"cpu_util_percent": 36.91521739130434, "ram_util_percent": 57.791304347826106}}
+{"episode_reward_max": 341.0, "episode_reward_min": 9.0, "episode_reward_mean": 206.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 185.0}, "policy_reward_mean": {"ppo": 103.06}, "custom_metrics": {"sparse_reward_mean": 60.8, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 84.52, "shaped_reward_min": 9, "shaped_reward_max": 130, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.75, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 10.13, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 8.0, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 9.26, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.38, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.63, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.8, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 7.2, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 8.37, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 5.08, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.7, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.62, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.3, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.76, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.61, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.54, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.3, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.86, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.76, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.61, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.44, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.2, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 7.2, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 8.37, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 7.2, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 8.37, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [176.0, 235.0, 158.0, 301.0, 201.0, 221.0, 176.0, 223.0, 169.0, 313.0, 196.0, 215.0, 118.0, 166.0, 290.0, 81.0, 145.0, 196.0, 161.0, 310.0, 256.0, 298.0, 244.0, 110.0, 319.0, 179.0, 152.0, 207.0, 301.0, 127.0, 307.0, 236.0, 170.0, 9.0, 253.0, 115.0, 167.0, 264.0, 176.0, 241.0, 333.0, 82.0, 213.0, 145.0, 167.0, 188.0, 259.0, 307.0, 74.0, 213.0, 299.0, 284.0, 193.0, 136.0, 161.0, 188.0, 238.0, 79.0, 193.0, 241.0, 133.0, 172.0, 238.0, 168.0, 256.0, 213.0, 196.0, 212.0, 255.0, 247.0, 262.0, 209.0, 147.0, 316.0, 267.0, 155.0, 275.0, 139.0, 256.0, 341.0, 209.0, 204.0, 250.0, 164.0, 86.0, 301.0, 247.0, 249.0, 244.0, 136.0, 123.0, 185.0, 167.0, 99.0, 153.0, 264.0, 210.0, 298.0, 150.0, 241.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [83.0, 93.0, 110.0, 125.0, 80.0, 78.0, 156.0, 145.0, 100.0, 101.0, 106.0, 115.0, 82.0, 94.0, 107.0, 116.0, 99.0, 70.0, 161.0, 152.0, 102.0, 94.0, 115.0, 100.0, 58.0, 60.0, 84.0, 82.0, 150.0, 140.0, 47.0, 34.0, 71.0, 74.0, 99.0, 97.0, 72.0, 89.0, 139.0, 171.0, 139.0, 117.0, 152.0, 146.0, 119.0, 125.0, 53.0, 57.0, 144.0, 175.0, 94.0, 85.0, 67.0, 85.0, 100.0, 107.0, 133.0, 168.0, 50.0, 77.0, 165.0, 142.0, 102.0, 134.0, 83.0, 87.0, 6.0, 3.0, 123.0, 130.0, 64.0, 51.0, 75.0, 92.0, 131.0, 133.0, 79.0, 97.0, 128.0, 113.0, 160.0, 173.0, 35.0, 47.0, 111.0, 102.0, 74.0, 71.0, 78.0, 89.0, 83.0, 105.0, 133.0, 126.0, 161.0, 146.0, 37.0, 37.0, 94.0, 119.0, 155.0, 144.0, 142.0, 142.0, 86.0, 107.0, 65.0, 71.0, 79.0, 82.0, 91.0, 97.0, 129.0, 109.0, 34.0, 45.0, 103.0, 90.0, 128.0, 113.0, 73.0, 60.0, 88.0, 84.0, 120.0, 118.0, 86.0, 82.0, 128.0, 128.0, 113.0, 100.0, 108.0, 88.0, 95.0, 117.0, 115.0, 140.0, 119.0, 128.0, 140.0, 122.0, 100.0, 109.0, 68.0, 79.0, 153.0, 163.0, 127.0, 140.0, 69.0, 86.0, 150.0, 125.0, 78.0, 61.0, 138.0, 118.0, 185.0, 156.0, 118.0, 91.0, 101.0, 103.0, 120.0, 130.0, 91.0, 73.0, 53.0, 33.0, 141.0, 160.0, 135.0, 112.0, 116.0, 133.0, 108.0, 136.0, 63.0, 73.0, 66.0, 57.0, 97.0, 88.0, 87.0, 80.0, 36.0, 63.0, 84.0, 69.0, 127.0, 137.0, 114.0, 96.0, 136.0, 162.0, 76.0, 74.0, 128.0, 113.0]}, "sampler_perf": {"mean_env_wait_ms": 2.902001918125836, "mean_processing_ms": 0.6464539836010921, "mean_inference_ms": 3.4531930074110506}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1488000, "num_steps_sampled": 793600, "sample_time_ms": 22714.251, "load_time_ms": 37.548, "grad_time_ms": 9167.09, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 8.673617509131006e-20, "cur_lr": 0.0010000000474974513, "total_loss": -0.0036004248540848494, "policy_loss": -0.006528293248265982, "vf_loss": 36.78936767578125, "vf_explained_var": 0.6745734810829163, "kl": 0.0014449331210926175, "entropy": 1.5021357536315918, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 793600, "episodes_total": 1984, "training_iteration": 62, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-15-59", "timestamp": 1660248959, "time_this_iter_s": 35.12303113937378, "time_total_s": 7379.412234067917, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7379.412234067917, "timesteps_since_restore": 793600, "iterations_since_restore": 62, "perf": {"cpu_util_percent": 35.66, "ram_util_percent": 57.732}}
+{"episode_reward_max": 368.0, "episode_reward_min": 9.0, "episode_reward_mean": 220.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 186.0}, "policy_reward_mean": {"ppo": 110.09}, "custom_metrics": {"sparse_reward_mean": 65.8, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 88.58, "shaped_reward_min": 9, "shaped_reward_max": 130, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.92, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 10.43, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 8.21, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 9.74, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.6, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 7.47, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 8.83, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 5.07, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.8, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.74, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.42, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.75, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.6, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.29, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.14, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.73, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.87, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.48, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.21, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 7.47, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 8.83, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 7.47, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 8.83, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [316.0, 212.0, 368.0, 304.0, 253.0, 351.0, 178.0, 9.0, 122.0, 253.0, 164.0, 253.0, 213.0, 250.0, 179.0, 219.0, 330.0, 169.0, 241.0, 207.0, 319.0, 241.0, 264.0, 284.0, 307.0, 139.0, 201.0, 267.0, 71.0, 267.0, 241.0, 313.0, 256.0, 213.0, 196.0, 212.0, 255.0, 247.0, 262.0, 209.0, 147.0, 316.0, 267.0, 155.0, 275.0, 139.0, 256.0, 341.0, 209.0, 204.0, 250.0, 164.0, 86.0, 301.0, 247.0, 249.0, 244.0, 136.0, 123.0, 185.0, 167.0, 99.0, 153.0, 264.0, 210.0, 298.0, 150.0, 241.0, 176.0, 235.0, 158.0, 301.0, 201.0, 221.0, 176.0, 223.0, 169.0, 313.0, 196.0, 215.0, 118.0, 166.0, 290.0, 81.0, 145.0, 196.0, 161.0, 310.0, 256.0, 298.0, 244.0, 110.0, 319.0, 179.0, 152.0, 207.0, 301.0, 127.0, 307.0, 236.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [149.0, 167.0, 107.0, 105.0, 182.0, 186.0, 140.0, 164.0, 124.0, 129.0, 185.0, 166.0, 85.0, 93.0, 6.0, 3.0, 52.0, 70.0, 111.0, 142.0, 55.0, 109.0, 116.0, 137.0, 97.0, 116.0, 129.0, 121.0, 80.0, 99.0, 120.0, 99.0, 166.0, 164.0, 80.0, 89.0, 108.0, 133.0, 98.0, 109.0, 170.0, 149.0, 124.0, 117.0, 125.0, 139.0, 135.0, 149.0, 151.0, 156.0, 71.0, 68.0, 97.0, 104.0, 128.0, 139.0, 34.0, 37.0, 122.0, 145.0, 131.0, 110.0, 163.0, 150.0, 128.0, 128.0, 113.0, 100.0, 108.0, 88.0, 95.0, 117.0, 115.0, 140.0, 119.0, 128.0, 140.0, 122.0, 100.0, 109.0, 68.0, 79.0, 153.0, 163.0, 127.0, 140.0, 69.0, 86.0, 150.0, 125.0, 78.0, 61.0, 138.0, 118.0, 185.0, 156.0, 118.0, 91.0, 101.0, 103.0, 120.0, 130.0, 91.0, 73.0, 53.0, 33.0, 141.0, 160.0, 135.0, 112.0, 116.0, 133.0, 108.0, 136.0, 63.0, 73.0, 66.0, 57.0, 97.0, 88.0, 87.0, 80.0, 36.0, 63.0, 84.0, 69.0, 127.0, 137.0, 114.0, 96.0, 136.0, 162.0, 76.0, 74.0, 128.0, 113.0, 83.0, 93.0, 110.0, 125.0, 80.0, 78.0, 156.0, 145.0, 100.0, 101.0, 106.0, 115.0, 82.0, 94.0, 107.0, 116.0, 99.0, 70.0, 161.0, 152.0, 102.0, 94.0, 115.0, 100.0, 58.0, 60.0, 84.0, 82.0, 150.0, 140.0, 47.0, 34.0, 71.0, 74.0, 99.0, 97.0, 72.0, 89.0, 139.0, 171.0, 139.0, 117.0, 152.0, 146.0, 119.0, 125.0, 53.0, 57.0, 144.0, 175.0, 94.0, 85.0, 67.0, 85.0, 100.0, 107.0, 133.0, 168.0, 50.0, 77.0, 165.0, 142.0, 102.0, 134.0]}, "sampler_perf": {"mean_env_wait_ms": 2.8628696517904486, "mean_processing_ms": 0.6387085923696654, "mean_inference_ms": 3.4155991200139666}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1512000, "num_steps_sampled": 806400, "sample_time_ms": 22536.552, "load_time_ms": 37.579, "grad_time_ms": 9310.446, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 4.336808754565503e-20, "cur_lr": 0.0010000000474974513, "total_loss": -0.0014393635792657733, "policy_loss": -0.0051459651440382, "vf_loss": 44.491573333740234, "vf_explained_var": 0.6412068009376526, "kl": 0.001486484077759087, "entropy": 1.485115885734558, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 806400, "episodes_total": 2016, "training_iteration": 63, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-16-31", "timestamp": 1660248991, "time_this_iter_s": 32.43239998817444, "time_total_s": 7411.844634056091, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7411.844634056091, "timesteps_since_restore": 806400, "iterations_since_restore": 63, "perf": {"cpu_util_percent": 36.01739130434783, "ram_util_percent": 57.59347826086956}}
+{"episode_reward_max": 368.0, "episode_reward_min": 9.0, "episode_reward_mean": 232.66, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 190.0}, "policy_reward_mean": {"ppo": 116.33}, "custom_metrics": {"sparse_reward_mean": 70.6, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 91.46, "shaped_reward_min": 9, "shaped_reward_max": 130, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 9.53, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 10.35, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 8.77, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 9.57, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.64, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.72, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.08, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 8.75, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.7, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.59, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.51, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.48, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.22, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.44, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.67, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 3.04, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.42, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.32, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 8.08, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 8.75, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.08, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 8.75, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [55.0, 264.0, 127.0, 313.0, 163.0, 287.0, 298.0, 216.0, 341.0, 287.0, 235.0, 298.0, 204.0, 295.0, 225.0, 218.0, 287.0, 215.0, 313.0, 255.0, 367.0, 239.0, 210.0, 367.0, 258.0, 252.0, 353.0, 128.0, 178.0, 284.0, 270.0, 273.0, 210.0, 298.0, 150.0, 241.0, 176.0, 235.0, 158.0, 301.0, 201.0, 221.0, 176.0, 223.0, 169.0, 313.0, 196.0, 215.0, 118.0, 166.0, 290.0, 81.0, 145.0, 196.0, 161.0, 310.0, 256.0, 298.0, 244.0, 110.0, 319.0, 179.0, 152.0, 207.0, 301.0, 127.0, 307.0, 236.0, 316.0, 212.0, 368.0, 304.0, 253.0, 351.0, 178.0, 9.0, 122.0, 253.0, 164.0, 253.0, 213.0, 250.0, 179.0, 219.0, 330.0, 169.0, 241.0, 207.0, 319.0, 241.0, 264.0, 284.0, 307.0, 139.0, 201.0, 267.0, 71.0, 267.0, 241.0, 313.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [26.0, 29.0, 134.0, 130.0, 63.0, 64.0, 144.0, 169.0, 76.0, 87.0, 142.0, 145.0, 131.0, 167.0, 113.0, 103.0, 152.0, 189.0, 161.0, 126.0, 111.0, 124.0, 164.0, 134.0, 105.0, 99.0, 133.0, 162.0, 119.0, 106.0, 110.0, 108.0, 153.0, 134.0, 100.0, 115.0, 147.0, 166.0, 129.0, 126.0, 190.0, 177.0, 122.0, 117.0, 107.0, 103.0, 189.0, 178.0, 118.0, 140.0, 117.0, 135.0, 188.0, 165.0, 60.0, 68.0, 86.0, 92.0, 143.0, 141.0, 128.0, 142.0, 131.0, 142.0, 114.0, 96.0, 136.0, 162.0, 76.0, 74.0, 128.0, 113.0, 83.0, 93.0, 110.0, 125.0, 80.0, 78.0, 156.0, 145.0, 100.0, 101.0, 106.0, 115.0, 82.0, 94.0, 107.0, 116.0, 99.0, 70.0, 161.0, 152.0, 102.0, 94.0, 115.0, 100.0, 58.0, 60.0, 84.0, 82.0, 150.0, 140.0, 47.0, 34.0, 71.0, 74.0, 99.0, 97.0, 72.0, 89.0, 139.0, 171.0, 139.0, 117.0, 152.0, 146.0, 119.0, 125.0, 53.0, 57.0, 144.0, 175.0, 94.0, 85.0, 67.0, 85.0, 100.0, 107.0, 133.0, 168.0, 50.0, 77.0, 165.0, 142.0, 102.0, 134.0, 149.0, 167.0, 107.0, 105.0, 182.0, 186.0, 140.0, 164.0, 124.0, 129.0, 185.0, 166.0, 85.0, 93.0, 6.0, 3.0, 52.0, 70.0, 111.0, 142.0, 55.0, 109.0, 116.0, 137.0, 97.0, 116.0, 129.0, 121.0, 80.0, 99.0, 120.0, 99.0, 166.0, 164.0, 80.0, 89.0, 108.0, 133.0, 98.0, 109.0, 170.0, 149.0, 124.0, 117.0, 125.0, 139.0, 135.0, 149.0, 151.0, 156.0, 71.0, 68.0, 97.0, 104.0, 128.0, 139.0, 34.0, 37.0, 122.0, 145.0, 131.0, 110.0, 163.0, 150.0]}, "sampler_perf": {"mean_env_wait_ms": 2.824862261482626, "mean_processing_ms": 0.6311660417813414, "mean_inference_ms": 3.3776913519982266}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1536000, "num_steps_sampled": 819200, "sample_time_ms": 21992.331, "load_time_ms": 37.617, "grad_time_ms": 9517.458, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.1684043772827515e-20, "cur_lr": 0.0010000000474974513, "total_loss": -0.0046628438867628574, "policy_loss": -0.008121621794998646, "vf_loss": 41.953346252441406, "vf_explained_var": 0.6826162934303284, "kl": 0.0015492010861635208, "entropy": 1.4731155633926392, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 819200, "episodes_total": 2048, "training_iteration": 64, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-17-00", "timestamp": 1660249020, "time_this_iter_s": 28.55878710746765, "time_total_s": 7440.403421163559, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7440.403421163559, "timesteps_since_restore": 819200, "iterations_since_restore": 64, "perf": {"cpu_util_percent": 38.065, "ram_util_percent": 57.504999999999995}}
+{"episode_reward_max": 390.0, "episode_reward_min": 9.0, "episode_reward_mean": 243.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 201.0}, "policy_reward_mean": {"ppo": 121.615}, "custom_metrics": {"sparse_reward_mean": 74.8, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 93.63, "shaped_reward_min": 9, "shaped_reward_max": 130, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 9.88, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 10.52, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 9.11, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 9.77, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.62, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.73, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.38, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 8.85, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.62, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.54, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.52, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.24, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 2.71, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 3.06, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.41, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.36, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 8.38, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 8.85, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.38, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 8.85, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [207.0, 187.0, 252.0, 141.0, 266.0, 299.0, 182.0, 344.0, 352.0, 232.0, 324.0, 260.0, 182.0, 336.0, 170.0, 193.0, 239.0, 255.0, 212.0, 295.0, 267.0, 166.0, 142.0, 65.0, 163.0, 330.0, 361.0, 390.0, 290.0, 259.0, 159.0, 252.0, 301.0, 127.0, 307.0, 236.0, 316.0, 212.0, 368.0, 304.0, 253.0, 351.0, 178.0, 9.0, 122.0, 253.0, 164.0, 253.0, 213.0, 250.0, 179.0, 219.0, 330.0, 169.0, 241.0, 207.0, 319.0, 241.0, 264.0, 284.0, 307.0, 139.0, 201.0, 267.0, 71.0, 267.0, 241.0, 313.0, 55.0, 264.0, 127.0, 313.0, 163.0, 287.0, 298.0, 216.0, 341.0, 287.0, 235.0, 298.0, 204.0, 295.0, 225.0, 218.0, 287.0, 215.0, 313.0, 255.0, 367.0, 239.0, 210.0, 367.0, 258.0, 252.0, 353.0, 128.0, 178.0, 284.0, 270.0, 273.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [99.0, 108.0, 93.0, 94.0, 123.0, 129.0, 76.0, 65.0, 128.0, 138.0, 144.0, 155.0, 95.0, 87.0, 161.0, 183.0, 182.0, 170.0, 124.0, 108.0, 170.0, 154.0, 119.0, 141.0, 89.0, 93.0, 165.0, 171.0, 97.0, 73.0, 100.0, 93.0, 134.0, 105.0, 127.0, 128.0, 103.0, 109.0, 155.0, 140.0, 118.0, 149.0, 82.0, 84.0, 61.0, 81.0, 32.0, 33.0, 89.0, 74.0, 155.0, 175.0, 168.0, 193.0, 201.0, 189.0, 137.0, 153.0, 121.0, 138.0, 84.0, 75.0, 121.0, 131.0, 133.0, 168.0, 50.0, 77.0, 165.0, 142.0, 102.0, 134.0, 149.0, 167.0, 107.0, 105.0, 182.0, 186.0, 140.0, 164.0, 124.0, 129.0, 185.0, 166.0, 85.0, 93.0, 6.0, 3.0, 52.0, 70.0, 111.0, 142.0, 55.0, 109.0, 116.0, 137.0, 97.0, 116.0, 129.0, 121.0, 80.0, 99.0, 120.0, 99.0, 166.0, 164.0, 80.0, 89.0, 108.0, 133.0, 98.0, 109.0, 170.0, 149.0, 124.0, 117.0, 125.0, 139.0, 135.0, 149.0, 151.0, 156.0, 71.0, 68.0, 97.0, 104.0, 128.0, 139.0, 34.0, 37.0, 122.0, 145.0, 131.0, 110.0, 163.0, 150.0, 26.0, 29.0, 134.0, 130.0, 63.0, 64.0, 144.0, 169.0, 76.0, 87.0, 142.0, 145.0, 131.0, 167.0, 113.0, 103.0, 152.0, 189.0, 161.0, 126.0, 111.0, 124.0, 164.0, 134.0, 105.0, 99.0, 133.0, 162.0, 119.0, 106.0, 110.0, 108.0, 153.0, 134.0, 100.0, 115.0, 147.0, 166.0, 129.0, 126.0, 190.0, 177.0, 122.0, 117.0, 107.0, 103.0, 189.0, 178.0, 118.0, 140.0, 117.0, 135.0, 188.0, 165.0, 60.0, 68.0, 86.0, 92.0, 143.0, 141.0, 128.0, 142.0, 131.0, 142.0]}, "sampler_perf": {"mean_env_wait_ms": 2.787957702502074, "mean_processing_ms": 0.623833190465171, "mean_inference_ms": 3.33934185787935}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1560000, "num_steps_sampled": 832000, "sample_time_ms": 21829.313, "load_time_ms": 37.489, "grad_time_ms": 9503.726, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.0842021886413758e-20, "cur_lr": 0.0010000000474974513, "total_loss": -0.0026782825589179993, "policy_loss": -0.0062830038368701935, "vf_loss": 43.38319778442383, "vf_explained_var": 0.6999297738075256, "kl": 0.0015296392375603318, "entropy": 1.4671941995620728, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 832000, "episodes_total": 2080, "training_iteration": 65, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-17-27", "timestamp": 1660249047, "time_this_iter_s": 27.39732599258423, "time_total_s": 7467.800747156143, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7467.800747156143, "timesteps_since_restore": 832000, "iterations_since_restore": 65, "perf": {"cpu_util_percent": 36.88717948717949, "ram_util_percent": 57.59230769230768}}
+{"episode_reward_max": 402.0, "episode_reward_min": 55.0, "episode_reward_mean": 253.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 26.0}, "policy_reward_max": {"ppo": 201.0}, "policy_reward_mean": {"ppo": 126.685}, "custom_metrics": {"sparse_reward_mean": 79.0, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 95.37, "shaped_reward_min": 31, "shaped_reward_max": 133, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.36, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 10.55, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 9.6, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 9.72, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.33, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.56, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.73, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 8.84, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 8.85, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 4.65, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.47, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.93, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.47, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.55, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.55, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.36, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.02, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.86, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.4, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.4, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 8.84, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 8.85, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.84, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 8.85, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [282.0, 319.0, 247.0, 267.0, 244.0, 256.0, 305.0, 402.0, 352.0, 210.0, 139.0, 185.0, 293.0, 324.0, 164.0, 244.0, 345.0, 293.0, 333.0, 339.0, 150.0, 206.0, 344.0, 132.0, 204.0, 319.0, 258.0, 307.0, 250.0, 282.0, 298.0, 305.0, 71.0, 267.0, 241.0, 313.0, 55.0, 264.0, 127.0, 313.0, 163.0, 287.0, 298.0, 216.0, 341.0, 287.0, 235.0, 298.0, 204.0, 295.0, 225.0, 218.0, 287.0, 215.0, 313.0, 255.0, 367.0, 239.0, 210.0, 367.0, 258.0, 252.0, 353.0, 128.0, 178.0, 284.0, 270.0, 273.0, 207.0, 187.0, 252.0, 141.0, 266.0, 299.0, 182.0, 344.0, 352.0, 232.0, 324.0, 260.0, 182.0, 336.0, 170.0, 193.0, 239.0, 255.0, 212.0, 295.0, 267.0, 166.0, 142.0, 65.0, 163.0, 330.0, 361.0, 390.0, 290.0, 259.0, 159.0, 252.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [151.0, 131.0, 158.0, 161.0, 122.0, 125.0, 134.0, 133.0, 112.0, 132.0, 130.0, 126.0, 153.0, 152.0, 201.0, 201.0, 173.0, 179.0, 99.0, 111.0, 72.0, 67.0, 97.0, 88.0, 152.0, 141.0, 168.0, 156.0, 85.0, 79.0, 125.0, 119.0, 168.0, 177.0, 139.0, 154.0, 165.0, 168.0, 182.0, 157.0, 82.0, 68.0, 89.0, 117.0, 170.0, 174.0, 73.0, 59.0, 112.0, 92.0, 136.0, 183.0, 119.0, 139.0, 162.0, 145.0, 119.0, 131.0, 150.0, 132.0, 167.0, 131.0, 158.0, 147.0, 34.0, 37.0, 122.0, 145.0, 131.0, 110.0, 163.0, 150.0, 26.0, 29.0, 134.0, 130.0, 63.0, 64.0, 144.0, 169.0, 76.0, 87.0, 142.0, 145.0, 131.0, 167.0, 113.0, 103.0, 152.0, 189.0, 161.0, 126.0, 111.0, 124.0, 164.0, 134.0, 105.0, 99.0, 133.0, 162.0, 119.0, 106.0, 110.0, 108.0, 153.0, 134.0, 100.0, 115.0, 147.0, 166.0, 129.0, 126.0, 190.0, 177.0, 122.0, 117.0, 107.0, 103.0, 189.0, 178.0, 118.0, 140.0, 117.0, 135.0, 188.0, 165.0, 60.0, 68.0, 86.0, 92.0, 143.0, 141.0, 128.0, 142.0, 131.0, 142.0, 99.0, 108.0, 93.0, 94.0, 123.0, 129.0, 76.0, 65.0, 128.0, 138.0, 144.0, 155.0, 95.0, 87.0, 161.0, 183.0, 182.0, 170.0, 124.0, 108.0, 170.0, 154.0, 119.0, 141.0, 89.0, 93.0, 165.0, 171.0, 97.0, 73.0, 100.0, 93.0, 134.0, 105.0, 127.0, 128.0, 103.0, 109.0, 155.0, 140.0, 118.0, 149.0, 82.0, 84.0, 61.0, 81.0, 32.0, 33.0, 89.0, 74.0, 155.0, 175.0, 168.0, 193.0, 201.0, 189.0, 137.0, 153.0, 121.0, 138.0, 84.0, 75.0, 121.0, 131.0]}, "sampler_perf": {"mean_env_wait_ms": 2.7521510850720086, "mean_processing_ms": 0.6167185711534096, "mean_inference_ms": 3.3015786839102956}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1584000, "num_steps_sampled": 844800, "sample_time_ms": 21629.389, "load_time_ms": 37.73, "grad_time_ms": 9476.984, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 5.421010943206879e-21, "cur_lr": 0.0010000000474974513, "total_loss": -0.005265243351459503, "policy_loss": -0.009142073802649975, "vf_loss": 46.01101303100586, "vf_explained_var": 0.713275671005249, "kl": 0.001622045412659645, "entropy": 1.4485527276992798, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 844800, "episodes_total": 2112, "training_iteration": 66, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-17-56", "timestamp": 1660249076, "time_this_iter_s": 28.3277370929718, "time_total_s": 7496.128484249115, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7496.128484249115, "timesteps_since_restore": 844800, "iterations_since_restore": 66, "perf": {"cpu_util_percent": 32.035000000000004, "ram_util_percent": 57.5875}}
+{"episode_reward_max": 421.0, "episode_reward_min": 23.0, "episode_reward_mean": 258.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 9.0}, "policy_reward_max": {"ppo": 211.0}, "policy_reward_mean": {"ppo": 129.21}, "custom_metrics": {"sparse_reward_mean": 81.6, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 95.22, "shaped_reward_min": 20, "shaped_reward_max": 144, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.07, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 10.87, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 9.37, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 10.02, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.53, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.75, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 8.65, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 9.14, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.79, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.54, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.81, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.57, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.52, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.59, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.24, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.05, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.87, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.45, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.29, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 8.65, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 9.14, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.65, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 9.14, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [218.0, 187.0, 60.0, 267.0, 71.0, 244.0, 238.0, 379.0, 373.0, 258.0, 339.0, 198.0, 330.0, 309.0, 358.0, 267.0, 293.0, 298.0, 264.0, 356.0, 309.0, 253.0, 236.0, 421.0, 287.0, 296.0, 384.0, 254.0, 264.0, 208.0, 23.0, 225.0, 178.0, 284.0, 270.0, 273.0, 207.0, 187.0, 252.0, 141.0, 266.0, 299.0, 182.0, 344.0, 352.0, 232.0, 324.0, 260.0, 182.0, 336.0, 170.0, 193.0, 239.0, 255.0, 212.0, 295.0, 267.0, 166.0, 142.0, 65.0, 163.0, 330.0, 361.0, 390.0, 290.0, 259.0, 159.0, 252.0, 282.0, 319.0, 247.0, 267.0, 244.0, 256.0, 305.0, 402.0, 352.0, 210.0, 139.0, 185.0, 293.0, 324.0, 164.0, 244.0, 345.0, 293.0, 333.0, 339.0, 150.0, 206.0, 344.0, 132.0, 204.0, 319.0, 258.0, 307.0, 250.0, 282.0, 298.0, 305.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [114.0, 104.0, 93.0, 94.0, 29.0, 31.0, 151.0, 116.0, 34.0, 37.0, 113.0, 131.0, 114.0, 124.0, 193.0, 186.0, 188.0, 185.0, 132.0, 126.0, 159.0, 180.0, 96.0, 102.0, 157.0, 173.0, 158.0, 151.0, 180.0, 178.0, 136.0, 131.0, 153.0, 140.0, 146.0, 152.0, 127.0, 137.0, 186.0, 170.0, 151.0, 158.0, 107.0, 146.0, 108.0, 128.0, 211.0, 210.0, 144.0, 143.0, 145.0, 151.0, 204.0, 180.0, 124.0, 130.0, 133.0, 131.0, 99.0, 109.0, 9.0, 14.0, 109.0, 116.0, 86.0, 92.0, 143.0, 141.0, 128.0, 142.0, 131.0, 142.0, 99.0, 108.0, 93.0, 94.0, 123.0, 129.0, 76.0, 65.0, 128.0, 138.0, 144.0, 155.0, 95.0, 87.0, 161.0, 183.0, 182.0, 170.0, 124.0, 108.0, 170.0, 154.0, 119.0, 141.0, 89.0, 93.0, 165.0, 171.0, 97.0, 73.0, 100.0, 93.0, 134.0, 105.0, 127.0, 128.0, 103.0, 109.0, 155.0, 140.0, 118.0, 149.0, 82.0, 84.0, 61.0, 81.0, 32.0, 33.0, 89.0, 74.0, 155.0, 175.0, 168.0, 193.0, 201.0, 189.0, 137.0, 153.0, 121.0, 138.0, 84.0, 75.0, 121.0, 131.0, 151.0, 131.0, 158.0, 161.0, 122.0, 125.0, 134.0, 133.0, 112.0, 132.0, 130.0, 126.0, 153.0, 152.0, 201.0, 201.0, 173.0, 179.0, 99.0, 111.0, 72.0, 67.0, 97.0, 88.0, 152.0, 141.0, 168.0, 156.0, 85.0, 79.0, 125.0, 119.0, 168.0, 177.0, 139.0, 154.0, 165.0, 168.0, 182.0, 157.0, 82.0, 68.0, 89.0, 117.0, 170.0, 174.0, 73.0, 59.0, 112.0, 92.0, 136.0, 183.0, 119.0, 139.0, 162.0, 145.0, 119.0, 131.0, 150.0, 132.0, 167.0, 131.0, 158.0, 147.0]}, "sampler_perf": {"mean_env_wait_ms": 2.717456296666147, "mean_processing_ms": 0.6098308335159816, "mean_inference_ms": 3.265231126103296}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1608000, "num_steps_sampled": 857600, "sample_time_ms": 21370.114, "load_time_ms": 37.697, "grad_time_ms": 9313.497, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.7105054716034394e-21, "cur_lr": 0.0010000000474974513, "total_loss": -0.0019875967409461737, "policy_loss": -0.006022992078214884, "vf_loss": 47.62739562988281, "vf_explained_var": 0.6981029510498047, "kl": 0.0015933552058413625, "entropy": 1.4546891450881958, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 857600, "episodes_total": 2144, "training_iteration": 67, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-18-22", "timestamp": 1660249102, "time_this_iter_s": 26.67682385444641, "time_total_s": 7522.805308103561, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7522.805308103561, "timesteps_since_restore": 857600, "iterations_since_restore": 67, "perf": {"cpu_util_percent": 34.505405405405405, "ram_util_percent": 57.59189189189188}}
+{"episode_reward_max": 421.0, "episode_reward_min": 23.0, "episode_reward_mean": 275.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 9.0}, "policy_reward_max": {"ppo": 211.0}, "policy_reward_mean": {"ppo": 137.5}, "custom_metrics": {"sparse_reward_mean": 88.8, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 97.4, "shaped_reward_min": 20, "shaped_reward_max": 144, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.21, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 11.29, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 9.56, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 10.48, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.53, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.74, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 8.75, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 9.63, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 5.05, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.81, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.75, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.62, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.54, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.52, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.73, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.19, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.17, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.87, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.46, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.29, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 8.75, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 9.63, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.75, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 9.63, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [330.0, 248.0, 387.0, 301.0, 298.0, 190.0, 359.0, 353.0, 296.0, 365.0, 293.0, 364.0, 236.0, 325.0, 321.0, 259.0, 303.0, 255.0, 273.0, 256.0, 261.0, 290.0, 307.0, 298.0, 359.0, 287.0, 261.0, 249.0, 255.0, 307.0, 176.0, 413.0, 290.0, 259.0, 159.0, 252.0, 282.0, 319.0, 247.0, 267.0, 244.0, 256.0, 305.0, 402.0, 352.0, 210.0, 139.0, 185.0, 293.0, 324.0, 164.0, 244.0, 345.0, 293.0, 333.0, 339.0, 150.0, 206.0, 344.0, 132.0, 204.0, 319.0, 258.0, 307.0, 250.0, 282.0, 298.0, 305.0, 218.0, 187.0, 60.0, 267.0, 71.0, 244.0, 238.0, 379.0, 373.0, 258.0, 339.0, 198.0, 330.0, 309.0, 358.0, 267.0, 293.0, 298.0, 264.0, 356.0, 309.0, 253.0, 236.0, 421.0, 287.0, 296.0, 384.0, 254.0, 264.0, 208.0, 23.0, 225.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [151.0, 179.0, 118.0, 130.0, 193.0, 194.0, 162.0, 139.0, 139.0, 159.0, 105.0, 85.0, 180.0, 179.0, 172.0, 181.0, 159.0, 137.0, 168.0, 197.0, 137.0, 156.0, 180.0, 184.0, 114.0, 122.0, 171.0, 154.0, 173.0, 148.0, 135.0, 124.0, 149.0, 154.0, 124.0, 131.0, 148.0, 125.0, 111.0, 145.0, 139.0, 122.0, 144.0, 146.0, 153.0, 154.0, 152.0, 146.0, 182.0, 177.0, 156.0, 131.0, 109.0, 152.0, 121.0, 128.0, 130.0, 125.0, 140.0, 167.0, 85.0, 91.0, 207.0, 206.0, 137.0, 153.0, 121.0, 138.0, 84.0, 75.0, 121.0, 131.0, 151.0, 131.0, 158.0, 161.0, 122.0, 125.0, 134.0, 133.0, 112.0, 132.0, 130.0, 126.0, 153.0, 152.0, 201.0, 201.0, 173.0, 179.0, 99.0, 111.0, 72.0, 67.0, 97.0, 88.0, 152.0, 141.0, 168.0, 156.0, 85.0, 79.0, 125.0, 119.0, 168.0, 177.0, 139.0, 154.0, 165.0, 168.0, 182.0, 157.0, 82.0, 68.0, 89.0, 117.0, 170.0, 174.0, 73.0, 59.0, 112.0, 92.0, 136.0, 183.0, 119.0, 139.0, 162.0, 145.0, 119.0, 131.0, 150.0, 132.0, 167.0, 131.0, 158.0, 147.0, 114.0, 104.0, 93.0, 94.0, 29.0, 31.0, 151.0, 116.0, 34.0, 37.0, 113.0, 131.0, 114.0, 124.0, 193.0, 186.0, 188.0, 185.0, 132.0, 126.0, 159.0, 180.0, 96.0, 102.0, 157.0, 173.0, 158.0, 151.0, 180.0, 178.0, 136.0, 131.0, 153.0, 140.0, 146.0, 152.0, 127.0, 137.0, 186.0, 170.0, 151.0, 158.0, 107.0, 146.0, 108.0, 128.0, 211.0, 210.0, 144.0, 143.0, 145.0, 151.0, 204.0, 180.0, 124.0, 130.0, 133.0, 131.0, 99.0, 109.0, 9.0, 14.0, 109.0, 116.0]}, "sampler_perf": {"mean_env_wait_ms": 2.683811918728699, "mean_processing_ms": 0.6031493203010873, "mean_inference_ms": 3.2301262692347574}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1632000, "num_steps_sampled": 870400, "sample_time_ms": 21057.05, "load_time_ms": 37.776, "grad_time_ms": 9180.759, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.3552527358017197e-21, "cur_lr": 0.0010000000474974513, "total_loss": -0.0049219937063753605, "policy_loss": -0.009040978737175465, "vf_loss": 48.340152740478516, "vf_explained_var": 0.6955335140228271, "kl": 0.0016705109737813473, "entropy": 1.430059552192688, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 870400, "episodes_total": 2176, "training_iteration": 68, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-18-49", "timestamp": 1660249129, "time_this_iter_s": 26.73872995376587, "time_total_s": 7549.544038057327, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7549.544038057327, "timesteps_since_restore": 870400, "iterations_since_restore": 68, "perf": {"cpu_util_percent": 34.623684210526314, "ram_util_percent": 57.58947368421052}}
+{"episode_reward_max": 462.0, "episode_reward_min": 23.0, "episode_reward_mean": 287.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 9.0}, "policy_reward_max": {"ppo": 236.0}, "policy_reward_mean": {"ppo": 143.99}, "custom_metrics": {"sparse_reward_mean": 94.8, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 98.38, "shaped_reward_min": 20, "shaped_reward_max": 144, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.38, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 11.18, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 9.77, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 10.46, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.55, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.59, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.86, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 9.74, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 16, "dish_pickup_agent_0_mean": 4.92, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.78, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.84, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.72, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.49, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.6, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.24, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.11, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.95, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.41, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.24, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 8.86, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 9.74, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 16, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.86, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 9.74, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 16, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [242.0, 301.0, 234.0, 344.0, 450.0, 312.0, 239.0, 344.0, 359.0, 253.0, 316.0, 255.0, 356.0, 171.0, 396.0, 398.0, 276.0, 402.0, 339.0, 338.0, 353.0, 462.0, 296.0, 237.0, 293.0, 298.0, 231.0, 250.0, 347.0, 63.0, 356.0, 210.0, 250.0, 282.0, 298.0, 305.0, 218.0, 187.0, 60.0, 267.0, 71.0, 244.0, 238.0, 379.0, 373.0, 258.0, 339.0, 198.0, 330.0, 309.0, 358.0, 267.0, 293.0, 298.0, 264.0, 356.0, 309.0, 253.0, 236.0, 421.0, 287.0, 296.0, 384.0, 254.0, 264.0, 208.0, 23.0, 225.0, 330.0, 248.0, 387.0, 301.0, 298.0, 190.0, 359.0, 353.0, 296.0, 365.0, 293.0, 364.0, 236.0, 325.0, 321.0, 259.0, 303.0, 255.0, 273.0, 256.0, 261.0, 290.0, 307.0, 298.0, 359.0, 287.0, 261.0, 249.0, 255.0, 307.0, 176.0, 413.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [122.0, 120.0, 142.0, 159.0, 117.0, 117.0, 178.0, 166.0, 214.0, 236.0, 150.0, 162.0, 115.0, 124.0, 174.0, 170.0, 187.0, 172.0, 114.0, 139.0, 169.0, 147.0, 122.0, 133.0, 172.0, 184.0, 83.0, 88.0, 186.0, 210.0, 190.0, 208.0, 137.0, 139.0, 193.0, 209.0, 182.0, 157.0, 177.0, 161.0, 193.0, 160.0, 236.0, 226.0, 147.0, 149.0, 125.0, 112.0, 138.0, 155.0, 144.0, 154.0, 111.0, 120.0, 136.0, 114.0, 170.0, 177.0, 23.0, 40.0, 176.0, 180.0, 108.0, 102.0, 119.0, 131.0, 150.0, 132.0, 167.0, 131.0, 158.0, 147.0, 114.0, 104.0, 93.0, 94.0, 29.0, 31.0, 151.0, 116.0, 34.0, 37.0, 113.0, 131.0, 114.0, 124.0, 193.0, 186.0, 188.0, 185.0, 132.0, 126.0, 159.0, 180.0, 96.0, 102.0, 157.0, 173.0, 158.0, 151.0, 180.0, 178.0, 136.0, 131.0, 153.0, 140.0, 146.0, 152.0, 127.0, 137.0, 186.0, 170.0, 151.0, 158.0, 107.0, 146.0, 108.0, 128.0, 211.0, 210.0, 144.0, 143.0, 145.0, 151.0, 204.0, 180.0, 124.0, 130.0, 133.0, 131.0, 99.0, 109.0, 9.0, 14.0, 109.0, 116.0, 151.0, 179.0, 118.0, 130.0, 193.0, 194.0, 162.0, 139.0, 139.0, 159.0, 105.0, 85.0, 180.0, 179.0, 172.0, 181.0, 159.0, 137.0, 168.0, 197.0, 137.0, 156.0, 180.0, 184.0, 114.0, 122.0, 171.0, 154.0, 173.0, 148.0, 135.0, 124.0, 149.0, 154.0, 124.0, 131.0, 148.0, 125.0, 111.0, 145.0, 139.0, 122.0, 144.0, 146.0, 153.0, 154.0, 152.0, 146.0, 182.0, 177.0, 156.0, 131.0, 109.0, 152.0, 121.0, 128.0, 130.0, 125.0, 140.0, 167.0, 85.0, 91.0, 207.0, 206.0]}, "sampler_perf": {"mean_env_wait_ms": 2.6511088970025942, "mean_processing_ms": 0.5966469783379228, "mean_inference_ms": 3.1956784822649578}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1656000, "num_steps_sampled": 883200, "sample_time_ms": 20666.066, "load_time_ms": 37.699, "grad_time_ms": 8951.146, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 6.776263679008599e-22, "cur_lr": 0.0010000000474974513, "total_loss": -0.007915745489299297, "policy_loss": -0.011840385384857655, "vf_loss": 46.363162994384766, "vf_explained_var": 0.7722532153129578, "kl": 0.0015700907679274678, "entropy": 1.423343300819397, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 883200, "episodes_total": 2208, "training_iteration": 69, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-19-16", "timestamp": 1660249156, "time_this_iter_s": 26.615740060806274, "time_total_s": 7576.1597781181335, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7576.1597781181335, "timesteps_since_restore": 883200, "iterations_since_restore": 69, "perf": {"cpu_util_percent": 33.539473684210535, "ram_util_percent": 57.605263157894726}}
+{"episode_reward_max": 462.0, "episode_reward_min": 23.0, "episode_reward_mean": 296.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 9.0}, "policy_reward_max": {"ppo": 236.0}, "policy_reward_mean": {"ppo": 148.435}, "custom_metrics": {"sparse_reward_mean": 98.6, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 99.67, "shaped_reward_min": 23, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.41, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 11.35, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 9.87, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 10.64, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.57, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.56, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.93, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 10.01, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.72, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.92, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.78, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.4, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.48, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.68, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.17, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.2, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.83, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.39, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.27, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 8.93, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 10.01, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.93, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 10.01, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [249.0, 68.0, 319.0, 402.0, 327.0, 293.0, 350.0, 396.0, 396.0, 247.0, 387.0, 134.0, 387.0, 301.0, 356.0, 365.0, 308.0, 344.0, 348.0, 333.0, 345.0, 359.0, 259.0, 307.0, 330.0, 269.0, 350.0, 247.0, 356.0, 302.0, 269.0, 68.0, 264.0, 208.0, 23.0, 225.0, 330.0, 248.0, 387.0, 301.0, 298.0, 190.0, 359.0, 353.0, 296.0, 365.0, 293.0, 364.0, 236.0, 325.0, 321.0, 259.0, 303.0, 255.0, 273.0, 256.0, 261.0, 290.0, 307.0, 298.0, 359.0, 287.0, 261.0, 249.0, 255.0, 307.0, 176.0, 413.0, 242.0, 301.0, 234.0, 344.0, 450.0, 312.0, 239.0, 344.0, 359.0, 253.0, 316.0, 255.0, 356.0, 171.0, 396.0, 398.0, 276.0, 402.0, 339.0, 338.0, 353.0, 462.0, 296.0, 237.0, 293.0, 298.0, 231.0, 250.0, 347.0, 63.0, 356.0, 210.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [129.0, 120.0, 28.0, 40.0, 165.0, 154.0, 191.0, 211.0, 161.0, 166.0, 152.0, 141.0, 179.0, 171.0, 222.0, 174.0, 205.0, 191.0, 131.0, 116.0, 188.0, 199.0, 60.0, 74.0, 212.0, 175.0, 147.0, 154.0, 170.0, 186.0, 185.0, 180.0, 146.0, 162.0, 167.0, 177.0, 177.0, 171.0, 153.0, 180.0, 169.0, 176.0, 189.0, 170.0, 133.0, 126.0, 139.0, 168.0, 176.0, 154.0, 142.0, 127.0, 183.0, 167.0, 124.0, 123.0, 176.0, 180.0, 142.0, 160.0, 133.0, 136.0, 28.0, 40.0, 133.0, 131.0, 99.0, 109.0, 9.0, 14.0, 109.0, 116.0, 151.0, 179.0, 118.0, 130.0, 193.0, 194.0, 162.0, 139.0, 139.0, 159.0, 105.0, 85.0, 180.0, 179.0, 172.0, 181.0, 159.0, 137.0, 168.0, 197.0, 137.0, 156.0, 180.0, 184.0, 114.0, 122.0, 171.0, 154.0, 173.0, 148.0, 135.0, 124.0, 149.0, 154.0, 124.0, 131.0, 148.0, 125.0, 111.0, 145.0, 139.0, 122.0, 144.0, 146.0, 153.0, 154.0, 152.0, 146.0, 182.0, 177.0, 156.0, 131.0, 109.0, 152.0, 121.0, 128.0, 130.0, 125.0, 140.0, 167.0, 85.0, 91.0, 207.0, 206.0, 122.0, 120.0, 142.0, 159.0, 117.0, 117.0, 178.0, 166.0, 214.0, 236.0, 150.0, 162.0, 115.0, 124.0, 174.0, 170.0, 187.0, 172.0, 114.0, 139.0, 169.0, 147.0, 122.0, 133.0, 172.0, 184.0, 83.0, 88.0, 186.0, 210.0, 190.0, 208.0, 137.0, 139.0, 193.0, 209.0, 182.0, 157.0, 177.0, 161.0, 193.0, 160.0, 236.0, 226.0, 147.0, 149.0, 125.0, 112.0, 138.0, 155.0, 144.0, 154.0, 111.0, 120.0, 136.0, 114.0, 170.0, 177.0, 23.0, 40.0, 176.0, 180.0, 108.0, 102.0]}, "sampler_perf": {"mean_env_wait_ms": 2.619364465918477, "mean_processing_ms": 0.59033913982099, "mean_inference_ms": 3.1628679481393043}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1680000, "num_steps_sampled": 896000, "sample_time_ms": 20622.52, "load_time_ms": 37.691, "grad_time_ms": 8717.912, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.3881318395042993e-22, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007909121923148632, "policy_loss": -0.0035155529621988535, "vf_loss": 50.137577056884766, "vf_explained_var": 0.7450786232948303, "kl": 0.0021507267374545336, "entropy": 1.4145766496658325, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 896000, "episodes_total": 2240, "training_iteration": 70, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-19-45", "timestamp": 1660249185, "time_this_iter_s": 29.150850772857666, "time_total_s": 7605.310628890991, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7605.310628890991, "timesteps_since_restore": 896000, "iterations_since_restore": 70, "perf": {"cpu_util_percent": 33.670731707317074, "ram_util_percent": 57.6219512195122}}
+{"episode_reward_max": 462.0, "episode_reward_min": 63.0, "episode_reward_mean": 310.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 23.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 155.035}, "custom_metrics": {"sparse_reward_mean": 103.8, "sparse_reward_min": 20, "sparse_reward_max": 160, "shaped_reward_mean": 102.47, "shaped_reward_min": 23, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.6, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 11.29, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 10.1, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 10.61, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.0, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.0, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.53, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.51, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 9.29, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 9.98, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.56, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.21, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.96, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.34, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.4, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.65, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.17, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.23, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.85, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.33, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.23, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 9.29, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 9.98, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.29, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 9.98, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [367.0, 350.0, 230.0, 359.0, 362.0, 348.0, 408.0, 313.0, 345.0, 393.0, 399.0, 324.0, 293.0, 296.0, 396.0, 159.0, 236.0, 264.0, 408.0, 456.0, 270.0, 304.0, 356.0, 327.0, 213.0, 275.0, 350.0, 284.0, 390.0, 237.0, 402.0, 250.0, 255.0, 307.0, 176.0, 413.0, 242.0, 301.0, 234.0, 344.0, 450.0, 312.0, 239.0, 344.0, 359.0, 253.0, 316.0, 255.0, 356.0, 171.0, 396.0, 398.0, 276.0, 402.0, 339.0, 338.0, 353.0, 462.0, 296.0, 237.0, 293.0, 298.0, 231.0, 250.0, 347.0, 63.0, 356.0, 210.0, 249.0, 68.0, 319.0, 402.0, 327.0, 293.0, 350.0, 396.0, 396.0, 247.0, 387.0, 134.0, 387.0, 301.0, 356.0, 365.0, 308.0, 344.0, 348.0, 333.0, 345.0, 359.0, 259.0, 307.0, 330.0, 269.0, 350.0, 247.0, 356.0, 302.0, 269.0, 68.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [195.0, 172.0, 171.0, 179.0, 123.0, 107.0, 183.0, 176.0, 184.0, 178.0, 178.0, 170.0, 211.0, 197.0, 154.0, 159.0, 174.0, 171.0, 204.0, 189.0, 205.0, 194.0, 167.0, 157.0, 156.0, 137.0, 150.0, 146.0, 206.0, 190.0, 88.0, 71.0, 121.0, 115.0, 125.0, 139.0, 205.0, 203.0, 241.0, 215.0, 135.0, 135.0, 138.0, 166.0, 182.0, 174.0, 162.0, 165.0, 119.0, 94.0, 128.0, 147.0, 177.0, 173.0, 148.0, 136.0, 201.0, 189.0, 106.0, 131.0, 195.0, 207.0, 123.0, 127.0, 130.0, 125.0, 140.0, 167.0, 85.0, 91.0, 207.0, 206.0, 122.0, 120.0, 142.0, 159.0, 117.0, 117.0, 178.0, 166.0, 214.0, 236.0, 150.0, 162.0, 115.0, 124.0, 174.0, 170.0, 187.0, 172.0, 114.0, 139.0, 169.0, 147.0, 122.0, 133.0, 172.0, 184.0, 83.0, 88.0, 186.0, 210.0, 190.0, 208.0, 137.0, 139.0, 193.0, 209.0, 182.0, 157.0, 177.0, 161.0, 193.0, 160.0, 236.0, 226.0, 147.0, 149.0, 125.0, 112.0, 138.0, 155.0, 144.0, 154.0, 111.0, 120.0, 136.0, 114.0, 170.0, 177.0, 23.0, 40.0, 176.0, 180.0, 108.0, 102.0, 129.0, 120.0, 28.0, 40.0, 165.0, 154.0, 191.0, 211.0, 161.0, 166.0, 152.0, 141.0, 179.0, 171.0, 222.0, 174.0, 205.0, 191.0, 131.0, 116.0, 188.0, 199.0, 60.0, 74.0, 212.0, 175.0, 147.0, 154.0, 170.0, 186.0, 185.0, 180.0, 146.0, 162.0, 167.0, 177.0, 177.0, 171.0, 153.0, 180.0, 169.0, 176.0, 189.0, 170.0, 133.0, 126.0, 139.0, 168.0, 176.0, 154.0, 142.0, 127.0, 183.0, 167.0, 124.0, 123.0, 176.0, 180.0, 142.0, 160.0, 133.0, 136.0, 28.0, 40.0]}, "sampler_perf": {"mean_env_wait_ms": 2.5885045396886737, "mean_processing_ms": 0.584211440514898, "mean_inference_ms": 3.1312910646882246}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1704000, "num_steps_sampled": 908800, "sample_time_ms": 20412.625, "load_time_ms": 37.645, "grad_time_ms": 8491.272, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.6940659197521496e-22, "cur_lr": 0.0010000000474974513, "total_loss": 9.037616109708324e-05, "policy_loss": -0.004211378749459982, "vf_loss": 49.97343826293945, "vf_explained_var": 0.7645077705383301, "kl": 0.0018662656657397747, "entropy": 1.391157627105713, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 908800, "episodes_total": 2272, "training_iteration": 71, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-20-14", "timestamp": 1660249214, "time_this_iter_s": 28.656519889831543, "time_total_s": 7633.967148780823, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7633.967148780823, "timesteps_since_restore": 908800, "iterations_since_restore": 71, "perf": {"cpu_util_percent": 34.982926829268294, "ram_util_percent": 57.707317073170735}}
+{"episode_reward_max": 456.0, "episode_reward_min": 63.0, "episode_reward_mean": 316.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 23.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 158.135}, "custom_metrics": {"sparse_reward_mean": 105.8, "sparse_reward_min": 20, "sparse_reward_max": 160, "shaped_reward_mean": 104.67, "shaped_reward_min": 23, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.74, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 11.52, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 10.21, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 10.81, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.0, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.52, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.64, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 9.4, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 10.18, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 4.86, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.41, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.42, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 2.04, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.35, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.36, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.75, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.22, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.28, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.84, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.38, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.29, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 9.4, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 10.18, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.4, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 10.18, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [408.0, 316.0, 234.0, 245.0, 373.0, 336.0, 450.0, 416.0, 413.0, 356.0, 247.0, 410.0, 182.0, 347.0, 233.0, 316.0, 284.0, 316.0, 242.0, 296.0, 396.0, 142.0, 324.0, 351.0, 359.0, 361.0, 399.0, 353.0, 344.0, 275.0, 410.0, 382.0, 347.0, 63.0, 356.0, 210.0, 249.0, 68.0, 319.0, 402.0, 327.0, 293.0, 350.0, 396.0, 396.0, 247.0, 387.0, 134.0, 387.0, 301.0, 356.0, 365.0, 308.0, 344.0, 348.0, 333.0, 345.0, 359.0, 259.0, 307.0, 330.0, 269.0, 350.0, 247.0, 356.0, 302.0, 269.0, 68.0, 367.0, 350.0, 230.0, 359.0, 362.0, 348.0, 408.0, 313.0, 345.0, 393.0, 399.0, 324.0, 293.0, 296.0, 396.0, 159.0, 236.0, 264.0, 408.0, 456.0, 270.0, 304.0, 356.0, 327.0, 213.0, 275.0, 350.0, 284.0, 390.0, 237.0, 402.0, 250.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [199.0, 209.0, 151.0, 165.0, 95.0, 139.0, 128.0, 117.0, 175.0, 198.0, 156.0, 180.0, 239.0, 211.0, 208.0, 208.0, 210.0, 203.0, 174.0, 182.0, 123.0, 124.0, 204.0, 206.0, 88.0, 94.0, 178.0, 169.0, 116.0, 117.0, 151.0, 165.0, 149.0, 135.0, 163.0, 153.0, 144.0, 98.0, 151.0, 145.0, 179.0, 217.0, 62.0, 80.0, 156.0, 168.0, 184.0, 167.0, 165.0, 194.0, 182.0, 179.0, 207.0, 192.0, 179.0, 174.0, 178.0, 166.0, 141.0, 134.0, 219.0, 191.0, 188.0, 194.0, 170.0, 177.0, 23.0, 40.0, 176.0, 180.0, 108.0, 102.0, 129.0, 120.0, 28.0, 40.0, 165.0, 154.0, 191.0, 211.0, 161.0, 166.0, 152.0, 141.0, 179.0, 171.0, 222.0, 174.0, 205.0, 191.0, 131.0, 116.0, 188.0, 199.0, 60.0, 74.0, 212.0, 175.0, 147.0, 154.0, 170.0, 186.0, 185.0, 180.0, 146.0, 162.0, 167.0, 177.0, 177.0, 171.0, 153.0, 180.0, 169.0, 176.0, 189.0, 170.0, 133.0, 126.0, 139.0, 168.0, 176.0, 154.0, 142.0, 127.0, 183.0, 167.0, 124.0, 123.0, 176.0, 180.0, 142.0, 160.0, 133.0, 136.0, 28.0, 40.0, 195.0, 172.0, 171.0, 179.0, 123.0, 107.0, 183.0, 176.0, 184.0, 178.0, 178.0, 170.0, 211.0, 197.0, 154.0, 159.0, 174.0, 171.0, 204.0, 189.0, 205.0, 194.0, 167.0, 157.0, 156.0, 137.0, 150.0, 146.0, 206.0, 190.0, 88.0, 71.0, 121.0, 115.0, 125.0, 139.0, 205.0, 203.0, 241.0, 215.0, 135.0, 135.0, 138.0, 166.0, 182.0, 174.0, 162.0, 165.0, 119.0, 94.0, 128.0, 147.0, 177.0, 173.0, 148.0, 136.0, 201.0, 189.0, 106.0, 131.0, 195.0, 207.0, 123.0, 127.0]}, "sampler_perf": {"mean_env_wait_ms": 2.558557313555292, "mean_processing_ms": 0.5782709476223633, "mean_inference_ms": 3.1013750793848702}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1728000, "num_steps_sampled": 921600, "sample_time_ms": 20167.129, "load_time_ms": 37.249, "grad_time_ms": 8246.669, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 8.470329598760748e-23, "cur_lr": 0.0010000000474974513, "total_loss": -0.0024793706834316254, "policy_loss": -0.007412927225232124, "vf_loss": 56.26578903198242, "vf_explained_var": 0.7433841228485107, "kl": 0.0019004354253411293, "entropy": 1.3860511779785156, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 921600, "episodes_total": 2304, "training_iteration": 72, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-20-44", "timestamp": 1660249244, "time_this_iter_s": 30.219820022583008, "time_total_s": 7664.186968803406, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7664.186968803406, "timesteps_since_restore": 921600, "iterations_since_restore": 72, "perf": {"cpu_util_percent": 35.07380952380952, "ram_util_percent": 57.70714285714284}}
+{"episode_reward_max": 465.0, "episode_reward_min": 68.0, "episode_reward_mean": 332.46, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 28.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 166.23}, "custom_metrics": {"sparse_reward_mean": 111.6, "sparse_reward_min": 20, "sparse_reward_max": 160, "shaped_reward_mean": 109.26, "shaped_reward_min": 28, "shaped_reward_max": 145, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.81, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 12.13, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 10.31, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 11.46, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 0.91, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.46, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 9.53, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 10.79, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 4.84, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.44, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.6, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.3, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.36, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.29, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.63, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.46, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.27, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.07, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.28, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.31, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 9.53, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 10.79, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.53, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 10.79, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [373.0, 465.0, 251.0, 348.0, 399.0, 284.0, 344.0, 459.0, 416.0, 410.0, 416.0, 358.0, 422.0, 402.0, 405.0, 422.0, 247.0, 296.0, 407.0, 246.0, 120.0, 310.0, 413.0, 338.0, 353.0, 405.0, 416.0, 365.0, 236.0, 287.0, 350.0, 408.0, 356.0, 302.0, 269.0, 68.0, 367.0, 350.0, 230.0, 359.0, 362.0, 348.0, 408.0, 313.0, 345.0, 393.0, 399.0, 324.0, 293.0, 296.0, 396.0, 159.0, 236.0, 264.0, 408.0, 456.0, 270.0, 304.0, 356.0, 327.0, 213.0, 275.0, 350.0, 284.0, 390.0, 237.0, 402.0, 250.0, 408.0, 316.0, 234.0, 245.0, 373.0, 336.0, 450.0, 416.0, 413.0, 356.0, 247.0, 410.0, 182.0, 347.0, 233.0, 316.0, 284.0, 316.0, 242.0, 296.0, 396.0, 142.0, 324.0, 351.0, 359.0, 361.0, 399.0, 353.0, 344.0, 275.0, 410.0, 382.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [170.0, 203.0, 226.0, 239.0, 131.0, 120.0, 162.0, 186.0, 205.0, 194.0, 132.0, 152.0, 166.0, 178.0, 235.0, 224.0, 205.0, 211.0, 188.0, 222.0, 197.0, 219.0, 177.0, 181.0, 212.0, 210.0, 196.0, 206.0, 213.0, 192.0, 199.0, 223.0, 111.0, 136.0, 147.0, 149.0, 193.0, 214.0, 121.0, 125.0, 55.0, 65.0, 165.0, 145.0, 203.0, 210.0, 170.0, 168.0, 161.0, 192.0, 191.0, 214.0, 213.0, 203.0, 178.0, 187.0, 98.0, 138.0, 150.0, 137.0, 176.0, 174.0, 202.0, 206.0, 176.0, 180.0, 142.0, 160.0, 133.0, 136.0, 28.0, 40.0, 195.0, 172.0, 171.0, 179.0, 123.0, 107.0, 183.0, 176.0, 184.0, 178.0, 178.0, 170.0, 211.0, 197.0, 154.0, 159.0, 174.0, 171.0, 204.0, 189.0, 205.0, 194.0, 167.0, 157.0, 156.0, 137.0, 150.0, 146.0, 206.0, 190.0, 88.0, 71.0, 121.0, 115.0, 125.0, 139.0, 205.0, 203.0, 241.0, 215.0, 135.0, 135.0, 138.0, 166.0, 182.0, 174.0, 162.0, 165.0, 119.0, 94.0, 128.0, 147.0, 177.0, 173.0, 148.0, 136.0, 201.0, 189.0, 106.0, 131.0, 195.0, 207.0, 123.0, 127.0, 199.0, 209.0, 151.0, 165.0, 95.0, 139.0, 128.0, 117.0, 175.0, 198.0, 156.0, 180.0, 239.0, 211.0, 208.0, 208.0, 210.0, 203.0, 174.0, 182.0, 123.0, 124.0, 204.0, 206.0, 88.0, 94.0, 178.0, 169.0, 116.0, 117.0, 151.0, 165.0, 149.0, 135.0, 163.0, 153.0, 144.0, 98.0, 151.0, 145.0, 179.0, 217.0, 62.0, 80.0, 156.0, 168.0, 184.0, 167.0, 165.0, 194.0, 182.0, 179.0, 207.0, 192.0, 179.0, 174.0, 178.0, 166.0, 141.0, 134.0, 219.0, 191.0, 188.0, 194.0]}, "sampler_perf": {"mean_env_wait_ms": 2.5294421557916076, "mean_processing_ms": 0.5725031041756122, "mean_inference_ms": 3.0723080495869532}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1752000, "num_steps_sampled": 934400, "sample_time_ms": 20097.062, "load_time_ms": 37.14, "grad_time_ms": 8126.211, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 4.235164799380374e-23, "cur_lr": 0.0010000000474974513, "total_loss": -0.0012375875376164913, "policy_loss": -0.006141460034996271, "vf_loss": 55.8723258972168, "vf_explained_var": 0.7437755465507507, "kl": 0.0014161770232021809, "entropy": 1.3667305707931519, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 934400, "episodes_total": 2336, "training_iteration": 73, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-21-14", "timestamp": 1660249274, "time_this_iter_s": 30.526150941848755, "time_total_s": 7694.7131197452545, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7694.7131197452545, "timesteps_since_restore": 934400, "iterations_since_restore": 73, "perf": {"cpu_util_percent": 36.46279069767442, "ram_util_percent": 57.75116279069769}}
+{"episode_reward_max": 465.0, "episode_reward_min": 120.0, "episode_reward_mean": 346.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 55.0}, "policy_reward_max": {"ppo": 239.0}, "policy_reward_mean": {"ppo": 173.395}, "custom_metrics": {"sparse_reward_mean": 117.2, "sparse_reward_min": 40, "sparse_reward_max": 160, "shaped_reward_mean": 112.39, "shaped_reward_min": 40, "shaped_reward_max": 147, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.26, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 12.36, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 10.7, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 11.66, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.6, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.69, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 9.87, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.09, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 4.89, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.77, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.49, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.43, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.37, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.39, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.72, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.58, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.36, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.29, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.29, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.24, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 9.87, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.09, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.87, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.09, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [358.0, 413.0, 455.0, 395.0, 402.0, 341.0, 290.0, 427.0, 413.0, 253.0, 355.0, 365.0, 316.0, 458.0, 196.0, 459.0, 398.0, 359.0, 407.0, 322.0, 410.0, 416.0, 390.0, 301.0, 416.0, 279.0, 301.0, 215.0, 359.0, 395.0, 310.0, 339.0, 390.0, 237.0, 402.0, 250.0, 408.0, 316.0, 234.0, 245.0, 373.0, 336.0, 450.0, 416.0, 413.0, 356.0, 247.0, 410.0, 182.0, 347.0, 233.0, 316.0, 284.0, 316.0, 242.0, 296.0, 396.0, 142.0, 324.0, 351.0, 359.0, 361.0, 399.0, 353.0, 344.0, 275.0, 410.0, 382.0, 373.0, 465.0, 251.0, 348.0, 399.0, 284.0, 344.0, 459.0, 416.0, 410.0, 416.0, 358.0, 422.0, 402.0, 405.0, 422.0, 247.0, 296.0, 407.0, 246.0, 120.0, 310.0, 413.0, 338.0, 353.0, 405.0, 416.0, 365.0, 236.0, 287.0, 350.0, 408.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [187.0, 171.0, 213.0, 200.0, 222.0, 233.0, 208.0, 187.0, 203.0, 199.0, 173.0, 168.0, 128.0, 162.0, 217.0, 210.0, 211.0, 202.0, 119.0, 134.0, 180.0, 175.0, 169.0, 196.0, 160.0, 156.0, 235.0, 223.0, 107.0, 89.0, 229.0, 230.0, 188.0, 210.0, 179.0, 180.0, 199.0, 208.0, 157.0, 165.0, 225.0, 185.0, 208.0, 208.0, 187.0, 203.0, 157.0, 144.0, 204.0, 212.0, 151.0, 128.0, 142.0, 159.0, 108.0, 107.0, 182.0, 177.0, 195.0, 200.0, 144.0, 166.0, 171.0, 168.0, 201.0, 189.0, 106.0, 131.0, 195.0, 207.0, 123.0, 127.0, 199.0, 209.0, 151.0, 165.0, 95.0, 139.0, 128.0, 117.0, 175.0, 198.0, 156.0, 180.0, 239.0, 211.0, 208.0, 208.0, 210.0, 203.0, 174.0, 182.0, 123.0, 124.0, 204.0, 206.0, 88.0, 94.0, 178.0, 169.0, 116.0, 117.0, 151.0, 165.0, 149.0, 135.0, 163.0, 153.0, 144.0, 98.0, 151.0, 145.0, 179.0, 217.0, 62.0, 80.0, 156.0, 168.0, 184.0, 167.0, 165.0, 194.0, 182.0, 179.0, 207.0, 192.0, 179.0, 174.0, 178.0, 166.0, 141.0, 134.0, 219.0, 191.0, 188.0, 194.0, 170.0, 203.0, 226.0, 239.0, 131.0, 120.0, 162.0, 186.0, 205.0, 194.0, 132.0, 152.0, 166.0, 178.0, 235.0, 224.0, 205.0, 211.0, 188.0, 222.0, 197.0, 219.0, 177.0, 181.0, 212.0, 210.0, 196.0, 206.0, 213.0, 192.0, 199.0, 223.0, 111.0, 136.0, 147.0, 149.0, 193.0, 214.0, 121.0, 125.0, 55.0, 65.0, 165.0, 145.0, 203.0, 210.0, 170.0, 168.0, 161.0, 192.0, 191.0, 214.0, 213.0, 203.0, 178.0, 187.0, 98.0, 138.0, 150.0, 137.0, 176.0, 174.0, 202.0, 206.0]}, "sampler_perf": {"mean_env_wait_ms": 2.5011550162167318, "mean_processing_ms": 0.5669031638789668, "mean_inference_ms": 3.044470138401616}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1776000, "num_steps_sampled": 947200, "sample_time_ms": 20475.505, "load_time_ms": 36.886, "grad_time_ms": 8011.391, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 2.117582399690187e-23, "cur_lr": 0.0010000000474974513, "total_loss": -0.002805360360071063, "policy_loss": -0.007409963756799698, "vf_loss": 52.88139724731445, "vf_explained_var": 0.7572636008262634, "kl": 0.0014988663606345654, "entropy": 1.3671082258224487, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 947200, "episodes_total": 2368, "training_iteration": 74, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-21-46", "timestamp": 1660249306, "time_this_iter_s": 31.191842079162598, "time_total_s": 7725.904961824417, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7725.904961824417, "timesteps_since_restore": 947200, "iterations_since_restore": 74, "perf": {"cpu_util_percent": 34.40666666666667, "ram_util_percent": 57.844444444444456}}
+{"episode_reward_max": 465.0, "episode_reward_min": 120.0, "episode_reward_mean": 364.08, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 55.0}, "policy_reward_max": {"ppo": 239.0}, "policy_reward_mean": {"ppo": 182.04}, "custom_metrics": {"sparse_reward_mean": 124.0, "sparse_reward_min": 40, "sparse_reward_max": 160, "shaped_reward_mean": 116.08, "shaped_reward_min": 40, "shaped_reward_max": 147, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.3, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 12.93, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 10.86, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 12.23, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 0.93, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.98, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.55, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.57, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.09, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.68, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.25, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.56, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.33, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.38, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.42, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.93, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.62, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.36, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.21, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.16, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 10.09, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.68, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.09, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.68, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [438.0, 390.0, 364.0, 396.0, 413.0, 456.0, 346.0, 453.0, 332.0, 413.0, 251.0, 379.0, 384.0, 464.0, 384.0, 416.0, 401.0, 345.0, 339.0, 344.0, 355.0, 398.0, 419.0, 387.0, 302.0, 299.0, 453.0, 230.0, 362.0, 399.0, 405.0, 396.0, 344.0, 275.0, 410.0, 382.0, 373.0, 465.0, 251.0, 348.0, 399.0, 284.0, 344.0, 459.0, 416.0, 410.0, 416.0, 358.0, 422.0, 402.0, 405.0, 422.0, 247.0, 296.0, 407.0, 246.0, 120.0, 310.0, 413.0, 338.0, 353.0, 405.0, 416.0, 365.0, 236.0, 287.0, 350.0, 408.0, 358.0, 413.0, 455.0, 395.0, 402.0, 341.0, 290.0, 427.0, 413.0, 253.0, 355.0, 365.0, 316.0, 458.0, 196.0, 459.0, 398.0, 359.0, 407.0, 322.0, 410.0, 416.0, 390.0, 301.0, 416.0, 279.0, 301.0, 215.0, 359.0, 395.0, 310.0, 339.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [216.0, 222.0, 208.0, 182.0, 173.0, 191.0, 206.0, 190.0, 213.0, 200.0, 219.0, 237.0, 169.0, 177.0, 239.0, 214.0, 169.0, 163.0, 209.0, 204.0, 133.0, 118.0, 191.0, 188.0, 176.0, 208.0, 233.0, 231.0, 204.0, 180.0, 211.0, 205.0, 196.0, 205.0, 179.0, 166.0, 156.0, 183.0, 167.0, 177.0, 174.0, 181.0, 192.0, 206.0, 201.0, 218.0, 171.0, 216.0, 149.0, 153.0, 153.0, 146.0, 236.0, 217.0, 111.0, 119.0, 183.0, 179.0, 207.0, 192.0, 197.0, 208.0, 202.0, 194.0, 178.0, 166.0, 141.0, 134.0, 219.0, 191.0, 188.0, 194.0, 170.0, 203.0, 226.0, 239.0, 131.0, 120.0, 162.0, 186.0, 205.0, 194.0, 132.0, 152.0, 166.0, 178.0, 235.0, 224.0, 205.0, 211.0, 188.0, 222.0, 197.0, 219.0, 177.0, 181.0, 212.0, 210.0, 196.0, 206.0, 213.0, 192.0, 199.0, 223.0, 111.0, 136.0, 147.0, 149.0, 193.0, 214.0, 121.0, 125.0, 55.0, 65.0, 165.0, 145.0, 203.0, 210.0, 170.0, 168.0, 161.0, 192.0, 191.0, 214.0, 213.0, 203.0, 178.0, 187.0, 98.0, 138.0, 150.0, 137.0, 176.0, 174.0, 202.0, 206.0, 187.0, 171.0, 213.0, 200.0, 222.0, 233.0, 208.0, 187.0, 203.0, 199.0, 173.0, 168.0, 128.0, 162.0, 217.0, 210.0, 211.0, 202.0, 119.0, 134.0, 180.0, 175.0, 169.0, 196.0, 160.0, 156.0, 235.0, 223.0, 107.0, 89.0, 229.0, 230.0, 188.0, 210.0, 179.0, 180.0, 199.0, 208.0, 157.0, 165.0, 225.0, 185.0, 208.0, 208.0, 187.0, 203.0, 157.0, 144.0, 204.0, 212.0, 151.0, 128.0, 142.0, 159.0, 108.0, 107.0, 182.0, 177.0, 195.0, 200.0, 144.0, 166.0, 171.0, 168.0]}, "sampler_perf": {"mean_env_wait_ms": 2.4736327447914648, "mean_processing_ms": 0.5614637333952731, "mean_inference_ms": 3.0177932889211685}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1800000, "num_steps_sampled": 960000, "sample_time_ms": 20800.663, "load_time_ms": 36.895, "grad_time_ms": 8168.473, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.0587911998450935e-23, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010099885985255241, "policy_loss": -0.006211612839251757, "vf_loss": 58.7685546875, "vf_explained_var": 0.7208888530731201, "kl": 0.0020332669373601675, "entropy": 1.3504695892333984, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 960000, "episodes_total": 2400, "training_iteration": 75, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-22-18", "timestamp": 1660249338, "time_this_iter_s": 32.21927499771118, "time_total_s": 7758.124236822128, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7758.124236822128, "timesteps_since_restore": 960000, "iterations_since_restore": 75, "perf": {"cpu_util_percent": 32.595555555555556, "ram_util_percent": 57.83555555555553}}
+{"episode_reward_max": 507.0, "episode_reward_min": 196.0, "episode_reward_mean": 369.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 258.0}, "policy_reward_mean": {"ppo": 184.535}, "custom_metrics": {"sparse_reward_mean": 125.8, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 117.47, "shaped_reward_min": 70, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.42, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.2, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 10.97, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 12.42, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 0.9, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.54, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.26, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.75, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 2.64, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.24, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.45, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.4, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 4.05, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.53, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.78, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.35, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.18, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.15, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 10.26, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.75, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.26, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.75, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [272.0, 313.0, 408.0, 444.0, 304.0, 362.0, 304.0, 450.0, 408.0, 370.0, 476.0, 293.0, 453.0, 241.0, 410.0, 453.0, 405.0, 350.0, 350.0, 507.0, 427.0, 393.0, 267.0, 407.0, 419.0, 303.0, 410.0, 378.0, 350.0, 345.0, 376.0, 352.0, 236.0, 287.0, 350.0, 408.0, 358.0, 413.0, 455.0, 395.0, 402.0, 341.0, 290.0, 427.0, 413.0, 253.0, 355.0, 365.0, 316.0, 458.0, 196.0, 459.0, 398.0, 359.0, 407.0, 322.0, 410.0, 416.0, 390.0, 301.0, 416.0, 279.0, 301.0, 215.0, 359.0, 395.0, 310.0, 339.0, 438.0, 390.0, 364.0, 396.0, 413.0, 456.0, 346.0, 453.0, 332.0, 413.0, 251.0, 379.0, 384.0, 464.0, 384.0, 416.0, 401.0, 345.0, 339.0, 344.0, 355.0, 398.0, 419.0, 387.0, 302.0, 299.0, 453.0, 230.0, 362.0, 399.0, 405.0, 396.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [136.0, 136.0, 156.0, 157.0, 194.0, 214.0, 212.0, 232.0, 153.0, 151.0, 177.0, 185.0, 158.0, 146.0, 219.0, 231.0, 205.0, 203.0, 198.0, 172.0, 250.0, 226.0, 135.0, 158.0, 244.0, 209.0, 125.0, 116.0, 194.0, 216.0, 229.0, 224.0, 201.0, 204.0, 160.0, 190.0, 157.0, 193.0, 258.0, 249.0, 208.0, 219.0, 196.0, 197.0, 138.0, 129.0, 192.0, 215.0, 211.0, 208.0, 140.0, 163.0, 206.0, 204.0, 189.0, 189.0, 171.0, 179.0, 177.0, 168.0, 206.0, 170.0, 173.0, 179.0, 98.0, 138.0, 150.0, 137.0, 176.0, 174.0, 202.0, 206.0, 187.0, 171.0, 213.0, 200.0, 222.0, 233.0, 208.0, 187.0, 203.0, 199.0, 173.0, 168.0, 128.0, 162.0, 217.0, 210.0, 211.0, 202.0, 119.0, 134.0, 180.0, 175.0, 169.0, 196.0, 160.0, 156.0, 235.0, 223.0, 107.0, 89.0, 229.0, 230.0, 188.0, 210.0, 179.0, 180.0, 199.0, 208.0, 157.0, 165.0, 225.0, 185.0, 208.0, 208.0, 187.0, 203.0, 157.0, 144.0, 204.0, 212.0, 151.0, 128.0, 142.0, 159.0, 108.0, 107.0, 182.0, 177.0, 195.0, 200.0, 144.0, 166.0, 171.0, 168.0, 216.0, 222.0, 208.0, 182.0, 173.0, 191.0, 206.0, 190.0, 213.0, 200.0, 219.0, 237.0, 169.0, 177.0, 239.0, 214.0, 169.0, 163.0, 209.0, 204.0, 133.0, 118.0, 191.0, 188.0, 176.0, 208.0, 233.0, 231.0, 204.0, 180.0, 211.0, 205.0, 196.0, 205.0, 179.0, 166.0, 156.0, 183.0, 167.0, 177.0, 174.0, 181.0, 192.0, 206.0, 201.0, 218.0, 171.0, 216.0, 149.0, 153.0, 153.0, 146.0, 236.0, 217.0, 111.0, 119.0, 183.0, 179.0, 207.0, 192.0, 197.0, 208.0, 202.0, 194.0]}, "sampler_perf": {"mean_env_wait_ms": 2.4468834694970774, "mean_processing_ms": 0.5561781773126093, "mean_inference_ms": 2.992510104410383}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1824000, "num_steps_sampled": 972800, "sample_time_ms": 21085.438, "load_time_ms": 36.907, "grad_time_ms": 8381.058, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 5.293955999225468e-24, "cur_lr": 0.0010000000474974513, "total_loss": -0.000600266270339489, "policy_loss": -0.005276266019791365, "vf_loss": 53.540836334228516, "vf_explained_var": 0.7716453671455383, "kl": 0.0016209534369409084, "entropy": 1.3561688661575317, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 972800, "episodes_total": 2432, "training_iteration": 76, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-22-51", "timestamp": 1660249371, "time_this_iter_s": 33.30055785179138, "time_total_s": 7791.42479467392, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7791.42479467392, "timesteps_since_restore": 972800, "iterations_since_restore": 76, "perf": {"cpu_util_percent": 33.693617021276594, "ram_util_percent": 57.704255319148906}}
+{"episode_reward_max": 510.0, "episode_reward_min": 208.0, "episode_reward_mean": 377.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 104.0}, "policy_reward_max": {"ppo": 274.0}, "policy_reward_mean": {"ppo": 188.74}, "custom_metrics": {"sparse_reward_mean": 129.0, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 119.48, "shaped_reward_min": 70, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.62, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 13.48, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 11.01, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 17, "useful_onion_pickup_agent_1_mean": 12.72, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.0, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.62, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.63, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.34, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 12.03, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.39, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.71, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 2.78, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.31, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.0, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.4, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.29, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.04, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.6, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.78, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.39, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.19, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.18, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 10.34, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 12.03, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.34, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 12.03, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [376.0, 305.0, 302.0, 355.0, 447.0, 413.0, 364.0, 453.0, 459.0, 393.0, 368.0, 399.0, 404.0, 402.0, 373.0, 399.0, 381.0, 395.0, 367.0, 404.0, 208.0, 459.0, 287.0, 510.0, 347.0, 324.0, 410.0, 453.0, 404.0, 408.0, 319.0, 344.0, 359.0, 395.0, 310.0, 339.0, 438.0, 390.0, 364.0, 396.0, 413.0, 456.0, 346.0, 453.0, 332.0, 413.0, 251.0, 379.0, 384.0, 464.0, 384.0, 416.0, 401.0, 345.0, 339.0, 344.0, 355.0, 398.0, 419.0, 387.0, 302.0, 299.0, 453.0, 230.0, 362.0, 399.0, 405.0, 396.0, 272.0, 313.0, 408.0, 444.0, 304.0, 362.0, 304.0, 450.0, 408.0, 370.0, 476.0, 293.0, 453.0, 241.0, 410.0, 453.0, 405.0, 350.0, 350.0, 507.0, 427.0, 393.0, 267.0, 407.0, 419.0, 303.0, 410.0, 378.0, 350.0, 345.0, 376.0, 352.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [191.0, 185.0, 153.0, 152.0, 132.0, 170.0, 182.0, 173.0, 226.0, 221.0, 212.0, 201.0, 181.0, 183.0, 213.0, 240.0, 238.0, 221.0, 191.0, 202.0, 174.0, 194.0, 194.0, 205.0, 208.0, 196.0, 208.0, 194.0, 195.0, 178.0, 205.0, 194.0, 189.0, 192.0, 221.0, 174.0, 189.0, 178.0, 205.0, 199.0, 104.0, 104.0, 224.0, 235.0, 131.0, 156.0, 236.0, 274.0, 180.0, 167.0, 151.0, 173.0, 199.0, 211.0, 222.0, 231.0, 211.0, 193.0, 194.0, 214.0, 162.0, 157.0, 172.0, 172.0, 182.0, 177.0, 195.0, 200.0, 144.0, 166.0, 171.0, 168.0, 216.0, 222.0, 208.0, 182.0, 173.0, 191.0, 206.0, 190.0, 213.0, 200.0, 219.0, 237.0, 169.0, 177.0, 239.0, 214.0, 169.0, 163.0, 209.0, 204.0, 133.0, 118.0, 191.0, 188.0, 176.0, 208.0, 233.0, 231.0, 204.0, 180.0, 211.0, 205.0, 196.0, 205.0, 179.0, 166.0, 156.0, 183.0, 167.0, 177.0, 174.0, 181.0, 192.0, 206.0, 201.0, 218.0, 171.0, 216.0, 149.0, 153.0, 153.0, 146.0, 236.0, 217.0, 111.0, 119.0, 183.0, 179.0, 207.0, 192.0, 197.0, 208.0, 202.0, 194.0, 136.0, 136.0, 156.0, 157.0, 194.0, 214.0, 212.0, 232.0, 153.0, 151.0, 177.0, 185.0, 158.0, 146.0, 219.0, 231.0, 205.0, 203.0, 198.0, 172.0, 250.0, 226.0, 135.0, 158.0, 244.0, 209.0, 125.0, 116.0, 194.0, 216.0, 229.0, 224.0, 201.0, 204.0, 160.0, 190.0, 157.0, 193.0, 258.0, 249.0, 208.0, 219.0, 196.0, 197.0, 138.0, 129.0, 192.0, 215.0, 211.0, 208.0, 140.0, 163.0, 206.0, 204.0, 189.0, 189.0, 171.0, 179.0, 177.0, 168.0, 206.0, 170.0, 173.0, 179.0]}, "sampler_perf": {"mean_env_wait_ms": 2.4209020546417674, "mean_processing_ms": 0.551048674766232, "mean_inference_ms": 2.9680431709223565}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1848000, "num_steps_sampled": 985600, "sample_time_ms": 21397.355, "load_time_ms": 37.117, "grad_time_ms": 8669.98, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 2.646977999612734e-24, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010796785354614258, "policy_loss": -0.006340180989354849, "vf_loss": 59.34244918823242, "vf_explained_var": 0.7488496899604797, "kl": 0.0016171737806871533, "entropy": 1.3474963903427124, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 985600, "episodes_total": 2464, "training_iteration": 77, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-23-24", "timestamp": 1660249404, "time_this_iter_s": 32.688453912734985, "time_total_s": 7824.113248586655, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7824.113248586655, "timesteps_since_restore": 985600, "iterations_since_restore": 77, "perf": {"cpu_util_percent": 40.12173913043479, "ram_util_percent": 58.68478260869566}}
+{"episode_reward_max": 525.0, "episode_reward_min": 194.0, "episode_reward_mean": 380.13, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 94.0}, "policy_reward_max": {"ppo": 274.0}, "policy_reward_mean": {"ppo": 190.065}, "custom_metrics": {"sparse_reward_mean": 129.4, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 121.33, "shaped_reward_min": 74, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.11, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 13.42, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 11.42, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 17, "useful_onion_pickup_agent_1_mean": 12.66, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.73, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.78, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 10.7, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.85, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.71, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 2.94, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.57, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.92, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.34, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.26, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 3.93, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.73, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.73, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.45, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.16, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.23, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 10.7, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.85, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.7, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.85, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [416.0, 411.0, 419.0, 358.0, 367.0, 413.0, 427.0, 456.0, 450.0, 461.0, 461.0, 301.0, 405.0, 447.0, 293.0, 285.0, 399.0, 430.0, 422.0, 362.0, 416.0, 525.0, 194.0, 365.0, 407.0, 294.0, 239.0, 304.0, 296.0, 356.0, 387.0, 453.0, 362.0, 399.0, 405.0, 396.0, 272.0, 313.0, 408.0, 444.0, 304.0, 362.0, 304.0, 450.0, 408.0, 370.0, 476.0, 293.0, 453.0, 241.0, 410.0, 453.0, 405.0, 350.0, 350.0, 507.0, 427.0, 393.0, 267.0, 407.0, 419.0, 303.0, 410.0, 378.0, 350.0, 345.0, 376.0, 352.0, 376.0, 305.0, 302.0, 355.0, 447.0, 413.0, 364.0, 453.0, 459.0, 393.0, 368.0, 399.0, 404.0, 402.0, 373.0, 399.0, 381.0, 395.0, 367.0, 404.0, 208.0, 459.0, 287.0, 510.0, 347.0, 324.0, 410.0, 453.0, 404.0, 408.0, 319.0, 344.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [205.0, 211.0, 205.0, 206.0, 200.0, 219.0, 176.0, 182.0, 191.0, 176.0, 204.0, 209.0, 219.0, 208.0, 225.0, 231.0, 219.0, 231.0, 221.0, 240.0, 231.0, 230.0, 152.0, 149.0, 205.0, 200.0, 234.0, 213.0, 143.0, 150.0, 143.0, 142.0, 202.0, 197.0, 220.0, 210.0, 200.0, 222.0, 185.0, 177.0, 215.0, 201.0, 254.0, 271.0, 100.0, 94.0, 186.0, 179.0, 205.0, 202.0, 151.0, 143.0, 108.0, 131.0, 161.0, 143.0, 143.0, 153.0, 171.0, 185.0, 201.0, 186.0, 227.0, 226.0, 183.0, 179.0, 207.0, 192.0, 197.0, 208.0, 202.0, 194.0, 136.0, 136.0, 156.0, 157.0, 194.0, 214.0, 212.0, 232.0, 153.0, 151.0, 177.0, 185.0, 158.0, 146.0, 219.0, 231.0, 205.0, 203.0, 198.0, 172.0, 250.0, 226.0, 135.0, 158.0, 244.0, 209.0, 125.0, 116.0, 194.0, 216.0, 229.0, 224.0, 201.0, 204.0, 160.0, 190.0, 157.0, 193.0, 258.0, 249.0, 208.0, 219.0, 196.0, 197.0, 138.0, 129.0, 192.0, 215.0, 211.0, 208.0, 140.0, 163.0, 206.0, 204.0, 189.0, 189.0, 171.0, 179.0, 177.0, 168.0, 206.0, 170.0, 173.0, 179.0, 191.0, 185.0, 153.0, 152.0, 132.0, 170.0, 182.0, 173.0, 226.0, 221.0, 212.0, 201.0, 181.0, 183.0, 213.0, 240.0, 238.0, 221.0, 191.0, 202.0, 174.0, 194.0, 194.0, 205.0, 208.0, 196.0, 208.0, 194.0, 195.0, 178.0, 205.0, 194.0, 189.0, 192.0, 221.0, 174.0, 189.0, 178.0, 205.0, 199.0, 104.0, 104.0, 224.0, 235.0, 131.0, 156.0, 236.0, 274.0, 180.0, 167.0, 151.0, 173.0, 199.0, 211.0, 222.0, 231.0, 211.0, 193.0, 194.0, 214.0, 162.0, 157.0, 172.0, 172.0]}, "sampler_perf": {"mean_env_wait_ms": 2.3955189497497584, "mean_processing_ms": 0.5460179273755849, "mean_inference_ms": 2.943733434085924}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1872000, "num_steps_sampled": 998400, "sample_time_ms": 21499.137, "load_time_ms": 37.019, "grad_time_ms": 8919.956, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.323488999806367e-24, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033416959922760725, "policy_loss": -0.003114718245342374, "vf_loss": 71.20785522460938, "vf_explained_var": 0.7243476510047913, "kl": 0.001916095265187323, "entropy": 1.3287501335144043, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 998400, "episodes_total": 2496, "training_iteration": 78, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-23-54", "timestamp": 1660249434, "time_this_iter_s": 30.256299018859863, "time_total_s": 7854.3695476055145, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7854.3695476055145, "timesteps_since_restore": 998400, "iterations_since_restore": 78, "perf": {"cpu_util_percent": 32.448837209302326, "ram_util_percent": 58.1279069767442}}
+{"episode_reward_max": 525.0, "episode_reward_min": 194.0, "episode_reward_mean": 380.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 94.0}, "policy_reward_max": {"ppo": 274.0}, "policy_reward_mean": {"ppo": 190.095}, "custom_metrics": {"sparse_reward_mean": 130.0, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 120.19, "shaped_reward_min": 74, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.34, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 13.71, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 11.45, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 12.72, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.47, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.4, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.97, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.84, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.48, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 11.9, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.1, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.51, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.92, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.22, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.22, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 3.88, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.67, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.69, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.36, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.16, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.26, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 10.48, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 11.9, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.48, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 11.9, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [408.0, 398.0, 294.0, 405.0, 291.0, 450.0, 350.0, 419.0, 465.0, 462.0, 312.0, 465.0, 370.0, 416.0, 510.0, 290.0, 237.0, 376.0, 404.0, 407.0, 344.0, 450.0, 462.0, 351.0, 405.0, 333.0, 344.0, 237.0, 288.0, 465.0, 384.0, 353.0, 350.0, 345.0, 376.0, 352.0, 376.0, 305.0, 302.0, 355.0, 447.0, 413.0, 364.0, 453.0, 459.0, 393.0, 368.0, 399.0, 404.0, 402.0, 373.0, 399.0, 381.0, 395.0, 367.0, 404.0, 208.0, 459.0, 287.0, 510.0, 347.0, 324.0, 410.0, 453.0, 404.0, 408.0, 319.0, 344.0, 416.0, 411.0, 419.0, 358.0, 367.0, 413.0, 427.0, 456.0, 450.0, 461.0, 461.0, 301.0, 405.0, 447.0, 293.0, 285.0, 399.0, 430.0, 422.0, 362.0, 416.0, 525.0, 194.0, 365.0, 407.0, 294.0, 239.0, 304.0, 296.0, 356.0, 387.0, 453.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [199.0, 209.0, 194.0, 204.0, 159.0, 135.0, 208.0, 197.0, 136.0, 155.0, 228.0, 222.0, 162.0, 188.0, 209.0, 210.0, 231.0, 234.0, 217.0, 245.0, 140.0, 172.0, 236.0, 229.0, 184.0, 186.0, 226.0, 190.0, 267.0, 243.0, 150.0, 140.0, 117.0, 120.0, 187.0, 189.0, 197.0, 207.0, 203.0, 204.0, 164.0, 180.0, 221.0, 229.0, 227.0, 235.0, 171.0, 180.0, 199.0, 206.0, 159.0, 174.0, 159.0, 185.0, 119.0, 118.0, 140.0, 148.0, 229.0, 236.0, 180.0, 204.0, 175.0, 178.0, 171.0, 179.0, 177.0, 168.0, 206.0, 170.0, 173.0, 179.0, 191.0, 185.0, 153.0, 152.0, 132.0, 170.0, 182.0, 173.0, 226.0, 221.0, 212.0, 201.0, 181.0, 183.0, 213.0, 240.0, 238.0, 221.0, 191.0, 202.0, 174.0, 194.0, 194.0, 205.0, 208.0, 196.0, 208.0, 194.0, 195.0, 178.0, 205.0, 194.0, 189.0, 192.0, 221.0, 174.0, 189.0, 178.0, 205.0, 199.0, 104.0, 104.0, 224.0, 235.0, 131.0, 156.0, 236.0, 274.0, 180.0, 167.0, 151.0, 173.0, 199.0, 211.0, 222.0, 231.0, 211.0, 193.0, 194.0, 214.0, 162.0, 157.0, 172.0, 172.0, 205.0, 211.0, 205.0, 206.0, 200.0, 219.0, 176.0, 182.0, 191.0, 176.0, 204.0, 209.0, 219.0, 208.0, 225.0, 231.0, 219.0, 231.0, 221.0, 240.0, 231.0, 230.0, 152.0, 149.0, 205.0, 200.0, 234.0, 213.0, 143.0, 150.0, 143.0, 142.0, 202.0, 197.0, 220.0, 210.0, 200.0, 222.0, 185.0, 177.0, 215.0, 201.0, 254.0, 271.0, 100.0, 94.0, 186.0, 179.0, 205.0, 202.0, 151.0, 143.0, 108.0, 131.0, 161.0, 143.0, 143.0, 153.0, 171.0, 185.0, 201.0, 186.0, 227.0, 226.0]}, "sampler_perf": {"mean_env_wait_ms": 2.3706807134794996, "mean_processing_ms": 0.5410767279997776, "mean_inference_ms": 2.9190331024760856}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1896000, "num_steps_sampled": 1011200, "sample_time_ms": 21546.342, "load_time_ms": 37.021, "grad_time_ms": 9020.101, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 6.617444999031835e-25, "cur_lr": 0.0010000000474974513, "total_loss": -0.0019535624887794256, "policy_loss": -0.0075730024836957455, "vf_loss": 62.825687408447266, "vf_explained_var": 0.7674410939216614, "kl": 0.001638473360799253, "entropy": 1.3262617588043213, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1011200, "episodes_total": 2528, "training_iteration": 79, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-24-22", "timestamp": 1660249462, "time_this_iter_s": 28.0881450176239, "time_total_s": 7882.457692623138, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7882.457692623138, "timesteps_since_restore": 1011200, "iterations_since_restore": 79, "perf": {"cpu_util_percent": 32.120000000000005, "ram_util_percent": 58.1375}}
+{"episode_reward_max": 525.0, "episode_reward_min": 9.0, "episode_reward_mean": 390.28, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 195.14}, "custom_metrics": {"sparse_reward_mean": 134.0, "sparse_reward_min": 0, "sparse_reward_max": 180, "shaped_reward_mean": 122.28, "shaped_reward_min": 9, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.84, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 13.96, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 11.85, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 17, "useful_onion_pickup_agent_1_mean": 12.82, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.91, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.79, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 11.99, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 4.93, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.67, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.89, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.84, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.24, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.22, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 3.83, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.83, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.64, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.5, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.27, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 10.79, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 11.99, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.79, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 11.99, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [453.0, 516.0, 456.0, 467.0, 470.0, 410.0, 9.0, 464.0, 390.0, 330.0, 456.0, 516.0, 297.0, 330.0, 450.0, 465.0, 288.0, 413.0, 393.0, 410.0, 455.0, 456.0, 402.0, 455.0, 367.0, 516.0, 441.0, 459.0, 438.0, 408.0, 399.0, 410.0, 404.0, 408.0, 319.0, 344.0, 416.0, 411.0, 419.0, 358.0, 367.0, 413.0, 427.0, 456.0, 450.0, 461.0, 461.0, 301.0, 405.0, 447.0, 293.0, 285.0, 399.0, 430.0, 422.0, 362.0, 416.0, 525.0, 194.0, 365.0, 407.0, 294.0, 239.0, 304.0, 296.0, 356.0, 387.0, 453.0, 408.0, 398.0, 294.0, 405.0, 291.0, 450.0, 350.0, 419.0, 465.0, 462.0, 312.0, 465.0, 370.0, 416.0, 510.0, 290.0, 237.0, 376.0, 404.0, 407.0, 344.0, 450.0, 462.0, 351.0, 405.0, 333.0, 344.0, 237.0, 288.0, 465.0, 384.0, 353.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [222.0, 231.0, 269.0, 247.0, 214.0, 242.0, 239.0, 228.0, 228.0, 242.0, 198.0, 212.0, 3.0, 6.0, 232.0, 232.0, 188.0, 202.0, 163.0, 167.0, 234.0, 222.0, 253.0, 263.0, 162.0, 135.0, 159.0, 171.0, 221.0, 229.0, 230.0, 235.0, 145.0, 143.0, 202.0, 211.0, 201.0, 192.0, 191.0, 219.0, 225.0, 230.0, 231.0, 225.0, 190.0, 212.0, 219.0, 236.0, 179.0, 188.0, 270.0, 246.0, 227.0, 214.0, 226.0, 233.0, 207.0, 231.0, 198.0, 210.0, 196.0, 203.0, 203.0, 207.0, 211.0, 193.0, 194.0, 214.0, 162.0, 157.0, 172.0, 172.0, 205.0, 211.0, 205.0, 206.0, 200.0, 219.0, 176.0, 182.0, 191.0, 176.0, 204.0, 209.0, 219.0, 208.0, 225.0, 231.0, 219.0, 231.0, 221.0, 240.0, 231.0, 230.0, 152.0, 149.0, 205.0, 200.0, 234.0, 213.0, 143.0, 150.0, 143.0, 142.0, 202.0, 197.0, 220.0, 210.0, 200.0, 222.0, 185.0, 177.0, 215.0, 201.0, 254.0, 271.0, 100.0, 94.0, 186.0, 179.0, 205.0, 202.0, 151.0, 143.0, 108.0, 131.0, 161.0, 143.0, 143.0, 153.0, 171.0, 185.0, 201.0, 186.0, 227.0, 226.0, 199.0, 209.0, 194.0, 204.0, 159.0, 135.0, 208.0, 197.0, 136.0, 155.0, 228.0, 222.0, 162.0, 188.0, 209.0, 210.0, 231.0, 234.0, 217.0, 245.0, 140.0, 172.0, 236.0, 229.0, 184.0, 186.0, 226.0, 190.0, 267.0, 243.0, 150.0, 140.0, 117.0, 120.0, 187.0, 189.0, 197.0, 207.0, 203.0, 204.0, 164.0, 180.0, 221.0, 229.0, 227.0, 235.0, 171.0, 180.0, 199.0, 206.0, 159.0, 174.0, 159.0, 185.0, 119.0, 118.0, 140.0, 148.0, 229.0, 236.0, 180.0, 204.0, 175.0, 178.0]}, "sampler_perf": {"mean_env_wait_ms": 2.3463084552547957, "mean_processing_ms": 0.5362072894726034, "mean_inference_ms": 2.894108730963018}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1920000, "num_steps_sampled": 1024000, "sample_time_ms": 21336.358, "load_time_ms": 36.943, "grad_time_ms": 8894.576, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 3.3087224995159173e-25, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009382636635564268, "policy_loss": -0.007344415877014399, "vf_loss": 70.56519317626953, "vf_explained_var": 0.7276310324668884, "kl": 0.001774398609995842, "entropy": 1.3007346391677856, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1024000, "episodes_total": 2560, "training_iteration": 80, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-24-48", "timestamp": 1660249488, "time_this_iter_s": 25.79700207710266, "time_total_s": 7908.254694700241, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7908.254694700241, "timesteps_since_restore": 1024000, "iterations_since_restore": 80, "perf": {"cpu_util_percent": 33.88055555555556, "ram_util_percent": 58.030555555555566}}
+{"episode_reward_max": 522.0, "episode_reward_min": 9.0, "episode_reward_mean": 403.96, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 201.98}, "custom_metrics": {"sparse_reward_mean": 139.0, "sparse_reward_min": 0, "sparse_reward_max": 180, "shaped_reward_mean": 125.96, "shaped_reward_min": 9, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.08, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.26, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 12.09, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.06, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.53, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.5, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.96, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.9, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 11.08, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 12.3, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.64, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.08, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.23, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 3.96, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.91, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.72, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.62, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.16, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.23, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 11.08, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 12.3, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.08, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 12.3, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [459.0, 342.0, 513.0, 519.0, 410.0, 390.0, 424.0, 458.0, 362.0, 373.0, 399.0, 358.0, 456.0, 421.0, 459.0, 467.0, 419.0, 359.0, 327.0, 419.0, 393.0, 430.0, 447.0, 447.0, 522.0, 398.0, 405.0, 507.0, 468.0, 465.0, 410.0, 344.0, 296.0, 356.0, 387.0, 453.0, 408.0, 398.0, 294.0, 405.0, 291.0, 450.0, 350.0, 419.0, 465.0, 462.0, 312.0, 465.0, 370.0, 416.0, 510.0, 290.0, 237.0, 376.0, 404.0, 407.0, 344.0, 450.0, 462.0, 351.0, 405.0, 333.0, 344.0, 237.0, 288.0, 465.0, 384.0, 353.0, 453.0, 516.0, 456.0, 467.0, 470.0, 410.0, 9.0, 464.0, 390.0, 330.0, 456.0, 516.0, 297.0, 330.0, 450.0, 465.0, 288.0, 413.0, 393.0, 410.0, 455.0, 456.0, 402.0, 455.0, 367.0, 516.0, 441.0, 459.0, 438.0, 408.0, 399.0, 410.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [235.0, 224.0, 165.0, 177.0, 253.0, 260.0, 262.0, 257.0, 197.0, 213.0, 191.0, 199.0, 199.0, 225.0, 240.0, 218.0, 181.0, 181.0, 193.0, 180.0, 207.0, 192.0, 190.0, 168.0, 231.0, 225.0, 216.0, 205.0, 225.0, 234.0, 230.0, 237.0, 206.0, 213.0, 180.0, 179.0, 155.0, 172.0, 208.0, 211.0, 202.0, 191.0, 219.0, 211.0, 214.0, 233.0, 212.0, 235.0, 265.0, 257.0, 204.0, 194.0, 205.0, 200.0, 257.0, 250.0, 239.0, 229.0, 249.0, 216.0, 211.0, 199.0, 162.0, 182.0, 143.0, 153.0, 171.0, 185.0, 201.0, 186.0, 227.0, 226.0, 199.0, 209.0, 194.0, 204.0, 159.0, 135.0, 208.0, 197.0, 136.0, 155.0, 228.0, 222.0, 162.0, 188.0, 209.0, 210.0, 231.0, 234.0, 217.0, 245.0, 140.0, 172.0, 236.0, 229.0, 184.0, 186.0, 226.0, 190.0, 267.0, 243.0, 150.0, 140.0, 117.0, 120.0, 187.0, 189.0, 197.0, 207.0, 203.0, 204.0, 164.0, 180.0, 221.0, 229.0, 227.0, 235.0, 171.0, 180.0, 199.0, 206.0, 159.0, 174.0, 159.0, 185.0, 119.0, 118.0, 140.0, 148.0, 229.0, 236.0, 180.0, 204.0, 175.0, 178.0, 222.0, 231.0, 269.0, 247.0, 214.0, 242.0, 239.0, 228.0, 228.0, 242.0, 198.0, 212.0, 3.0, 6.0, 232.0, 232.0, 188.0, 202.0, 163.0, 167.0, 234.0, 222.0, 253.0, 263.0, 162.0, 135.0, 159.0, 171.0, 221.0, 229.0, 230.0, 235.0, 145.0, 143.0, 202.0, 211.0, 201.0, 192.0, 191.0, 219.0, 225.0, 230.0, 231.0, 225.0, 190.0, 212.0, 219.0, 236.0, 179.0, 188.0, 270.0, 246.0, 227.0, 214.0, 226.0, 233.0, 207.0, 231.0, 198.0, 210.0, 196.0, 203.0, 203.0, 207.0]}, "sampler_perf": {"mean_env_wait_ms": 2.3225268766730203, "mean_processing_ms": 0.5314526800460904, "mean_inference_ms": 2.8694240871343926}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1944000, "num_steps_sampled": 1036800, "sample_time_ms": 21260.609, "load_time_ms": 36.811, "grad_time_ms": 8748.654, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.6543612497579586e-25, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011773156002163887, "policy_loss": -0.006681745406240225, "vf_loss": 61.517730712890625, "vf_explained_var": 0.7553827166557312, "kl": 0.0021572383120656013, "entropy": 1.2946891784667969, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1036800, "episodes_total": 2592, "training_iteration": 81, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-25-15", "timestamp": 1660249515, "time_this_iter_s": 26.438808917999268, "time_total_s": 7934.69350361824, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7934.69350361824, "timesteps_since_restore": 1036800, "iterations_since_restore": 81, "perf": {"cpu_util_percent": 30.592105263157894, "ram_util_percent": 58.057894736842115}}
+{"episode_reward_max": 522.0, "episode_reward_min": 9.0, "episode_reward_mean": 412.5, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 206.25}, "custom_metrics": {"sparse_reward_mean": 142.0, "sparse_reward_min": 0, "sparse_reward_max": 180, "shaped_reward_mean": 128.5, "shaped_reward_min": 9, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.49, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.04, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 12.53, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.01, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.9, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.78, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 11.56, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 12.31, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 5.19, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.02, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.93, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.31, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.3, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.12, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.06, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.82, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.76, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.21, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.19, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 11.56, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 12.31, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.56, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 12.31, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [316.0, 235.0, 462.0, 501.0, 449.0, 411.0, 357.0, 401.0, 462.0, 408.0, 393.0, 410.0, 347.0, 401.0, 393.0, 444.0, 363.0, 470.0, 470.0, 413.0, 347.0, 459.0, 516.0, 456.0, 476.0, 398.0, 353.0, 401.0, 421.0, 413.0, 300.0, 355.0, 288.0, 465.0, 384.0, 353.0, 453.0, 516.0, 456.0, 467.0, 470.0, 410.0, 9.0, 464.0, 390.0, 330.0, 456.0, 516.0, 297.0, 330.0, 450.0, 465.0, 288.0, 413.0, 393.0, 410.0, 455.0, 456.0, 402.0, 455.0, 367.0, 516.0, 441.0, 459.0, 438.0, 408.0, 399.0, 410.0, 459.0, 342.0, 513.0, 519.0, 410.0, 390.0, 424.0, 458.0, 362.0, 373.0, 399.0, 358.0, 456.0, 421.0, 459.0, 467.0, 419.0, 359.0, 327.0, 419.0, 393.0, 430.0, 447.0, 447.0, 522.0, 398.0, 405.0, 507.0, 468.0, 465.0, 410.0, 344.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [165.0, 151.0, 116.0, 119.0, 239.0, 223.0, 252.0, 249.0, 229.0, 220.0, 204.0, 207.0, 190.0, 167.0, 210.0, 191.0, 228.0, 234.0, 200.0, 208.0, 214.0, 179.0, 203.0, 207.0, 179.0, 168.0, 176.0, 225.0, 185.0, 208.0, 216.0, 228.0, 160.0, 203.0, 233.0, 237.0, 228.0, 242.0, 204.0, 209.0, 186.0, 161.0, 218.0, 241.0, 251.0, 265.0, 230.0, 226.0, 234.0, 242.0, 185.0, 213.0, 186.0, 167.0, 208.0, 193.0, 225.0, 196.0, 218.0, 195.0, 152.0, 148.0, 188.0, 167.0, 140.0, 148.0, 229.0, 236.0, 180.0, 204.0, 175.0, 178.0, 222.0, 231.0, 269.0, 247.0, 214.0, 242.0, 239.0, 228.0, 228.0, 242.0, 198.0, 212.0, 3.0, 6.0, 232.0, 232.0, 188.0, 202.0, 163.0, 167.0, 234.0, 222.0, 253.0, 263.0, 162.0, 135.0, 159.0, 171.0, 221.0, 229.0, 230.0, 235.0, 145.0, 143.0, 202.0, 211.0, 201.0, 192.0, 191.0, 219.0, 225.0, 230.0, 231.0, 225.0, 190.0, 212.0, 219.0, 236.0, 179.0, 188.0, 270.0, 246.0, 227.0, 214.0, 226.0, 233.0, 207.0, 231.0, 198.0, 210.0, 196.0, 203.0, 203.0, 207.0, 235.0, 224.0, 165.0, 177.0, 253.0, 260.0, 262.0, 257.0, 197.0, 213.0, 191.0, 199.0, 199.0, 225.0, 240.0, 218.0, 181.0, 181.0, 193.0, 180.0, 207.0, 192.0, 190.0, 168.0, 231.0, 225.0, 216.0, 205.0, 225.0, 234.0, 230.0, 237.0, 206.0, 213.0, 180.0, 179.0, 155.0, 172.0, 208.0, 211.0, 202.0, 191.0, 219.0, 211.0, 214.0, 233.0, 212.0, 235.0, 265.0, 257.0, 204.0, 194.0, 205.0, 200.0, 257.0, 250.0, 239.0, 229.0, 249.0, 216.0, 211.0, 199.0, 162.0, 182.0]}, "sampler_perf": {"mean_env_wait_ms": 2.299333204784243, "mean_processing_ms": 0.5268100926342811, "mean_inference_ms": 2.8452761548654255}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1968000, "num_steps_sampled": 1049600, "sample_time_ms": 21056.327, "load_time_ms": 36.751, "grad_time_ms": 8586.844, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 8.271806248789793e-26, "cur_lr": 0.0010000000474974513, "total_loss": -0.0015234133461490273, "policy_loss": -0.007057014852762222, "vf_loss": 61.880123138427734, "vf_explained_var": 0.7578676342964172, "kl": 0.002027077367529273, "entropy": 1.3088246583938599, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1049600, "episodes_total": 2624, "training_iteration": 82, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-25-41", "timestamp": 1660249541, "time_this_iter_s": 26.555142879486084, "time_total_s": 7961.248646497726, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7961.248646497726, "timesteps_since_restore": 1049600, "iterations_since_restore": 82, "perf": {"cpu_util_percent": 33.34324324324324, "ram_util_percent": 58.07837837837838}}
+{"episode_reward_max": 522.0, "episode_reward_min": 177.0, "episode_reward_mean": 411.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 205.56}, "custom_metrics": {"sparse_reward_mean": 141.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 127.92, "shaped_reward_min": 57, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.28, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.01, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 12.41, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.19, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.37, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.75, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.72, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.55, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 12.4, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.89, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.89, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.37, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.2, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.94, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.91, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.2, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.11, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.55, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 12.4, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.55, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 12.4, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [350.0, 419.0, 422.0, 465.0, 177.0, 450.0, 415.0, 344.0, 282.0, 467.0, 407.0, 404.0, 516.0, 456.0, 396.0, 453.0, 458.0, 418.0, 295.0, 353.0, 406.0, 351.0, 516.0, 519.0, 444.0, 412.0, 444.0, 467.0, 287.0, 406.0, 237.0, 450.0, 438.0, 408.0, 399.0, 410.0, 459.0, 342.0, 513.0, 519.0, 410.0, 390.0, 424.0, 458.0, 362.0, 373.0, 399.0, 358.0, 456.0, 421.0, 459.0, 467.0, 419.0, 359.0, 327.0, 419.0, 393.0, 430.0, 447.0, 447.0, 522.0, 398.0, 405.0, 507.0, 468.0, 465.0, 410.0, 344.0, 316.0, 235.0, 462.0, 501.0, 449.0, 411.0, 357.0, 401.0, 462.0, 408.0, 393.0, 410.0, 347.0, 401.0, 393.0, 444.0, 363.0, 470.0, 470.0, 413.0, 347.0, 459.0, 516.0, 456.0, 476.0, 398.0, 353.0, 401.0, 421.0, 413.0, 300.0, 355.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [177.0, 173.0, 202.0, 217.0, 200.0, 222.0, 240.0, 225.0, 94.0, 83.0, 227.0, 223.0, 205.0, 210.0, 171.0, 173.0, 139.0, 143.0, 239.0, 228.0, 194.0, 213.0, 189.0, 215.0, 250.0, 266.0, 230.0, 226.0, 213.0, 183.0, 230.0, 223.0, 237.0, 221.0, 205.0, 213.0, 150.0, 145.0, 159.0, 194.0, 206.0, 200.0, 179.0, 172.0, 251.0, 265.0, 255.0, 264.0, 210.0, 234.0, 199.0, 213.0, 233.0, 211.0, 233.0, 234.0, 143.0, 144.0, 212.0, 194.0, 120.0, 117.0, 226.0, 224.0, 207.0, 231.0, 198.0, 210.0, 196.0, 203.0, 203.0, 207.0, 235.0, 224.0, 165.0, 177.0, 253.0, 260.0, 262.0, 257.0, 197.0, 213.0, 191.0, 199.0, 199.0, 225.0, 240.0, 218.0, 181.0, 181.0, 193.0, 180.0, 207.0, 192.0, 190.0, 168.0, 231.0, 225.0, 216.0, 205.0, 225.0, 234.0, 230.0, 237.0, 206.0, 213.0, 180.0, 179.0, 155.0, 172.0, 208.0, 211.0, 202.0, 191.0, 219.0, 211.0, 214.0, 233.0, 212.0, 235.0, 265.0, 257.0, 204.0, 194.0, 205.0, 200.0, 257.0, 250.0, 239.0, 229.0, 249.0, 216.0, 211.0, 199.0, 162.0, 182.0, 165.0, 151.0, 116.0, 119.0, 239.0, 223.0, 252.0, 249.0, 229.0, 220.0, 204.0, 207.0, 190.0, 167.0, 210.0, 191.0, 228.0, 234.0, 200.0, 208.0, 214.0, 179.0, 203.0, 207.0, 179.0, 168.0, 176.0, 225.0, 185.0, 208.0, 216.0, 228.0, 160.0, 203.0, 233.0, 237.0, 228.0, 242.0, 204.0, 209.0, 186.0, 161.0, 218.0, 241.0, 251.0, 265.0, 230.0, 226.0, 234.0, 242.0, 185.0, 213.0, 186.0, 167.0, 208.0, 193.0, 225.0, 196.0, 218.0, 195.0, 152.0, 148.0, 188.0, 167.0]}, "sampler_perf": {"mean_env_wait_ms": 2.2767232665412354, "mean_processing_ms": 0.5222866661980672, "mean_inference_ms": 2.821797669244603}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1992000, "num_steps_sampled": 1062400, "sample_time_ms": 20905.085, "load_time_ms": 36.646, "grad_time_ms": 8463.059, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 4.1359031243948966e-26, "cur_lr": 0.0010000000474974513, "total_loss": -0.00031348783522844315, "policy_loss": -0.006104966159909964, "vf_loss": 64.42190551757812, "vf_explained_var": 0.7651865482330322, "kl": 0.0017986185848712921, "entropy": 1.3014076948165894, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1062400, "episodes_total": 2656, "training_iteration": 83, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-26-09", "timestamp": 1660249569, "time_this_iter_s": 27.776076078414917, "time_total_s": 7989.024722576141, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7989.024722576141, "timesteps_since_restore": 1062400, "iterations_since_restore": 83, "perf": {"cpu_util_percent": 33.82000000000001, "ram_util_percent": 58.82000000000001}}
+{"episode_reward_max": 573.0, "episode_reward_min": 126.0, "episode_reward_mean": 408.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 204.43}, "custom_metrics": {"sparse_reward_mean": 141.0, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 126.86, "shaped_reward_min": 46, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.31, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.9, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 12.53, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 13.04, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.42, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.74, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.72, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.51, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 12.31, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.89, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.35, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.37, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.3, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.07, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.53, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.15, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.12, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.51, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 12.31, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.51, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 12.31, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [126.0, 424.0, 458.0, 418.0, 404.0, 465.0, 228.0, 353.0, 465.0, 350.0, 375.0, 441.0, 422.0, 418.0, 467.0, 441.0, 339.0, 370.0, 404.0, 467.0, 453.0, 458.0, 465.0, 345.0, 476.0, 459.0, 516.0, 573.0, 467.0, 459.0, 393.0, 413.0, 468.0, 465.0, 410.0, 344.0, 316.0, 235.0, 462.0, 501.0, 449.0, 411.0, 357.0, 401.0, 462.0, 408.0, 393.0, 410.0, 347.0, 401.0, 393.0, 444.0, 363.0, 470.0, 470.0, 413.0, 347.0, 459.0, 516.0, 456.0, 476.0, 398.0, 353.0, 401.0, 421.0, 413.0, 300.0, 355.0, 350.0, 419.0, 422.0, 465.0, 177.0, 450.0, 415.0, 344.0, 282.0, 467.0, 407.0, 404.0, 516.0, 456.0, 396.0, 453.0, 458.0, 418.0, 295.0, 353.0, 406.0, 351.0, 516.0, 519.0, 444.0, 412.0, 444.0, 467.0, 287.0, 406.0, 237.0, 450.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [63.0, 63.0, 224.0, 200.0, 224.0, 234.0, 210.0, 208.0, 202.0, 202.0, 240.0, 225.0, 117.0, 111.0, 173.0, 180.0, 223.0, 242.0, 178.0, 172.0, 181.0, 194.0, 219.0, 222.0, 222.0, 200.0, 207.0, 211.0, 230.0, 237.0, 232.0, 209.0, 165.0, 174.0, 185.0, 185.0, 204.0, 200.0, 230.0, 237.0, 238.0, 215.0, 256.0, 202.0, 236.0, 229.0, 194.0, 151.0, 226.0, 250.0, 231.0, 228.0, 252.0, 264.0, 293.0, 280.0, 238.0, 229.0, 222.0, 237.0, 207.0, 186.0, 212.0, 201.0, 239.0, 229.0, 249.0, 216.0, 211.0, 199.0, 162.0, 182.0, 165.0, 151.0, 116.0, 119.0, 239.0, 223.0, 252.0, 249.0, 229.0, 220.0, 204.0, 207.0, 190.0, 167.0, 210.0, 191.0, 228.0, 234.0, 200.0, 208.0, 214.0, 179.0, 203.0, 207.0, 179.0, 168.0, 176.0, 225.0, 185.0, 208.0, 216.0, 228.0, 160.0, 203.0, 233.0, 237.0, 228.0, 242.0, 204.0, 209.0, 186.0, 161.0, 218.0, 241.0, 251.0, 265.0, 230.0, 226.0, 234.0, 242.0, 185.0, 213.0, 186.0, 167.0, 208.0, 193.0, 225.0, 196.0, 218.0, 195.0, 152.0, 148.0, 188.0, 167.0, 177.0, 173.0, 202.0, 217.0, 200.0, 222.0, 240.0, 225.0, 94.0, 83.0, 227.0, 223.0, 205.0, 210.0, 171.0, 173.0, 139.0, 143.0, 239.0, 228.0, 194.0, 213.0, 189.0, 215.0, 250.0, 266.0, 230.0, 226.0, 213.0, 183.0, 230.0, 223.0, 237.0, 221.0, 205.0, 213.0, 150.0, 145.0, 159.0, 194.0, 206.0, 200.0, 179.0, 172.0, 251.0, 265.0, 255.0, 264.0, 210.0, 234.0, 199.0, 213.0, 233.0, 211.0, 233.0, 234.0, 143.0, 144.0, 212.0, 194.0, 120.0, 117.0, 226.0, 224.0]}, "sampler_perf": {"mean_env_wait_ms": 2.254677940793998, "mean_processing_ms": 0.5178804318399313, "mean_inference_ms": 2.7992231247898705}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2016000, "num_steps_sampled": 1075200, "sample_time_ms": 20798.01, "load_time_ms": 36.647, "grad_time_ms": 8403.274, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.0679515621974483e-26, "cur_lr": 0.0010000000474974513, "total_loss": 0.001430995762348175, "policy_loss": -0.004144120961427689, "vf_loss": 62.17998123168945, "vf_explained_var": 0.801994264125824, "kl": 0.0024192428681999445, "entropy": 1.2857705354690552, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1075200, "episodes_total": 2688, "training_iteration": 84, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-26-39", "timestamp": 1660249599, "time_this_iter_s": 29.525622129440308, "time_total_s": 8018.550344705582, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8018.550344705582, "timesteps_since_restore": 1075200, "iterations_since_restore": 84, "perf": {"cpu_util_percent": 28.77560975609756, "ram_util_percent": 58.31951219512194}}
+{"episode_reward_max": 573.0, "episode_reward_min": 126.0, "episode_reward_mean": 415.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 207.665}, "custom_metrics": {"sparse_reward_mean": 143.2, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 128.93, "shaped_reward_min": 46, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.57, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.2, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 12.84, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 13.3, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.45, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.38, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.73, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.82, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 11.79, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 12.45, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.83, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 2.59, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.4, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.27, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.95, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.99, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.72, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.17, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.15, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.79, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 12.45, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.79, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 12.45, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [516.0, 287.0, 462.0, 456.0, 449.0, 402.0, 272.0, 452.0, 407.0, 430.0, 450.0, 464.0, 465.0, 461.0, 407.0, 458.0, 404.0, 456.0, 310.0, 470.0, 352.0, 458.0, 376.0, 444.0, 501.0, 495.0, 458.0, 464.0, 447.0, 473.0, 516.0, 384.0, 421.0, 413.0, 300.0, 355.0, 350.0, 419.0, 422.0, 465.0, 177.0, 450.0, 415.0, 344.0, 282.0, 467.0, 407.0, 404.0, 516.0, 456.0, 396.0, 453.0, 458.0, 418.0, 295.0, 353.0, 406.0, 351.0, 516.0, 519.0, 444.0, 412.0, 444.0, 467.0, 287.0, 406.0, 237.0, 450.0, 126.0, 424.0, 458.0, 418.0, 404.0, 465.0, 228.0, 353.0, 465.0, 350.0, 375.0, 441.0, 422.0, 418.0, 467.0, 441.0, 339.0, 370.0, 404.0, 467.0, 453.0, 458.0, 465.0, 345.0, 476.0, 459.0, 516.0, 573.0, 467.0, 459.0, 393.0, 413.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [256.0, 260.0, 150.0, 137.0, 229.0, 233.0, 230.0, 226.0, 203.0, 246.0, 189.0, 213.0, 146.0, 126.0, 210.0, 242.0, 200.0, 207.0, 221.0, 209.0, 225.0, 225.0, 224.0, 240.0, 231.0, 234.0, 242.0, 219.0, 208.0, 199.0, 220.0, 238.0, 193.0, 211.0, 231.0, 225.0, 140.0, 170.0, 230.0, 240.0, 178.0, 174.0, 224.0, 234.0, 180.0, 196.0, 228.0, 216.0, 252.0, 249.0, 239.0, 256.0, 223.0, 235.0, 242.0, 222.0, 225.0, 222.0, 226.0, 247.0, 249.0, 267.0, 197.0, 187.0, 225.0, 196.0, 218.0, 195.0, 152.0, 148.0, 188.0, 167.0, 177.0, 173.0, 202.0, 217.0, 200.0, 222.0, 240.0, 225.0, 94.0, 83.0, 227.0, 223.0, 205.0, 210.0, 171.0, 173.0, 139.0, 143.0, 239.0, 228.0, 194.0, 213.0, 189.0, 215.0, 250.0, 266.0, 230.0, 226.0, 213.0, 183.0, 230.0, 223.0, 237.0, 221.0, 205.0, 213.0, 150.0, 145.0, 159.0, 194.0, 206.0, 200.0, 179.0, 172.0, 251.0, 265.0, 255.0, 264.0, 210.0, 234.0, 199.0, 213.0, 233.0, 211.0, 233.0, 234.0, 143.0, 144.0, 212.0, 194.0, 120.0, 117.0, 226.0, 224.0, 63.0, 63.0, 224.0, 200.0, 224.0, 234.0, 210.0, 208.0, 202.0, 202.0, 240.0, 225.0, 117.0, 111.0, 173.0, 180.0, 223.0, 242.0, 178.0, 172.0, 181.0, 194.0, 219.0, 222.0, 222.0, 200.0, 207.0, 211.0, 230.0, 237.0, 232.0, 209.0, 165.0, 174.0, 185.0, 185.0, 204.0, 200.0, 230.0, 237.0, 238.0, 215.0, 256.0, 202.0, 236.0, 229.0, 194.0, 151.0, 226.0, 250.0, 231.0, 228.0, 252.0, 264.0, 293.0, 280.0, 238.0, 229.0, 222.0, 237.0, 207.0, 186.0, 212.0, 201.0]}, "sampler_perf": {"mean_env_wait_ms": 2.2331698554816866, "mean_processing_ms": 0.5135892745725562, "mean_inference_ms": 2.7773374770155983}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2040000, "num_steps_sampled": 1088000, "sample_time_ms": 20542.192, "load_time_ms": 36.513, "grad_time_ms": 8356.672, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.0339757810987241e-26, "cur_lr": 0.0010000000474974513, "total_loss": 9.005811443785205e-05, "policy_loss": -0.005502933170646429, "vf_loss": 62.30662536621094, "vf_explained_var": 0.7652042508125305, "kl": 0.0015233332524076104, "entropy": 1.275335431098938, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1088000, "episodes_total": 2720, "training_iteration": 85, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-27-08", "timestamp": 1660249628, "time_this_iter_s": 29.196897983551025, "time_total_s": 8047.747242689133, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8047.747242689133, "timesteps_since_restore": 1088000, "iterations_since_restore": 85, "perf": {"cpu_util_percent": 31.616666666666664, "ram_util_percent": 58.38809523809524}}
+{"episode_reward_max": 573.0, "episode_reward_min": 126.0, "episode_reward_mean": 429.06, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 214.53}, "custom_metrics": {"sparse_reward_mean": 148.0, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 133.06, "shaped_reward_min": 46, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.29, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.59, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 13.5, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.69, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.8, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.38, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.89, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.77, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.11, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.83, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.2, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.0, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 2.83, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.33, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.36, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.32, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.0, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.12, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.82, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.12, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.13, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 12.11, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.83, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.11, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.83, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [452.0, 461.0, 501.0, 241.0, 462.0, 459.0, 519.0, 413.0, 405.0, 302.0, 465.0, 519.0, 498.0, 570.0, 465.0, 444.0, 398.0, 479.0, 419.0, 516.0, 419.0, 522.0, 419.0, 516.0, 470.0, 470.0, 519.0, 418.0, 504.0, 444.0, 355.0, 324.0, 287.0, 406.0, 237.0, 450.0, 126.0, 424.0, 458.0, 418.0, 404.0, 465.0, 228.0, 353.0, 465.0, 350.0, 375.0, 441.0, 422.0, 418.0, 467.0, 441.0, 339.0, 370.0, 404.0, 467.0, 453.0, 458.0, 465.0, 345.0, 476.0, 459.0, 516.0, 573.0, 467.0, 459.0, 393.0, 413.0, 516.0, 287.0, 462.0, 456.0, 449.0, 402.0, 272.0, 452.0, 407.0, 430.0, 450.0, 464.0, 465.0, 461.0, 407.0, 458.0, 404.0, 456.0, 310.0, 470.0, 352.0, 458.0, 376.0, 444.0, 501.0, 495.0, 458.0, 464.0, 447.0, 473.0, 516.0, 384.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [230.0, 222.0, 235.0, 226.0, 254.0, 247.0, 114.0, 127.0, 234.0, 228.0, 215.0, 244.0, 256.0, 263.0, 208.0, 205.0, 208.0, 197.0, 151.0, 151.0, 239.0, 226.0, 254.0, 265.0, 248.0, 250.0, 279.0, 291.0, 226.0, 239.0, 214.0, 230.0, 200.0, 198.0, 239.0, 240.0, 201.0, 218.0, 250.0, 266.0, 212.0, 207.0, 265.0, 257.0, 208.0, 211.0, 264.0, 252.0, 234.0, 236.0, 238.0, 232.0, 262.0, 257.0, 204.0, 214.0, 240.0, 264.0, 238.0, 206.0, 185.0, 170.0, 157.0, 167.0, 143.0, 144.0, 212.0, 194.0, 120.0, 117.0, 226.0, 224.0, 63.0, 63.0, 224.0, 200.0, 224.0, 234.0, 210.0, 208.0, 202.0, 202.0, 240.0, 225.0, 117.0, 111.0, 173.0, 180.0, 223.0, 242.0, 178.0, 172.0, 181.0, 194.0, 219.0, 222.0, 222.0, 200.0, 207.0, 211.0, 230.0, 237.0, 232.0, 209.0, 165.0, 174.0, 185.0, 185.0, 204.0, 200.0, 230.0, 237.0, 238.0, 215.0, 256.0, 202.0, 236.0, 229.0, 194.0, 151.0, 226.0, 250.0, 231.0, 228.0, 252.0, 264.0, 293.0, 280.0, 238.0, 229.0, 222.0, 237.0, 207.0, 186.0, 212.0, 201.0, 256.0, 260.0, 150.0, 137.0, 229.0, 233.0, 230.0, 226.0, 203.0, 246.0, 189.0, 213.0, 146.0, 126.0, 210.0, 242.0, 200.0, 207.0, 221.0, 209.0, 225.0, 225.0, 224.0, 240.0, 231.0, 234.0, 242.0, 219.0, 208.0, 199.0, 220.0, 238.0, 193.0, 211.0, 231.0, 225.0, 140.0, 170.0, 230.0, 240.0, 178.0, 174.0, 224.0, 234.0, 180.0, 196.0, 228.0, 216.0, 252.0, 249.0, 239.0, 256.0, 223.0, 235.0, 242.0, 222.0, 225.0, 222.0, 226.0, 247.0, 249.0, 267.0, 197.0, 187.0]}, "sampler_perf": {"mean_env_wait_ms": 2.2121816621028128, "mean_processing_ms": 0.509404154557512, "mean_inference_ms": 2.7561899846531217}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2064000, "num_steps_sampled": 1100800, "sample_time_ms": 20247.187, "load_time_ms": 36.147, "grad_time_ms": 8204.595, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 5.169878905493621e-27, "cur_lr": 0.0010000000474974513, "total_loss": 0.0034205808769911528, "policy_loss": -0.0028535639867186546, "vf_loss": 69.0377197265625, "vf_explained_var": 0.760657787322998, "kl": 0.002043861197307706, "entropy": 1.2592506408691406, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1100800, "episodes_total": 2752, "training_iteration": 86, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-27-37", "timestamp": 1660249657, "time_this_iter_s": 28.82673192024231, "time_total_s": 8076.573974609375, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8076.573974609375, "timesteps_since_restore": 1100800, "iterations_since_restore": 86, "perf": {"cpu_util_percent": 33.515, "ram_util_percent": 58.42750000000001}}
+{"episode_reward_max": 570.0, "episode_reward_min": 142.0, "episode_reward_mean": 435.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 70.0}, "policy_reward_max": {"ppo": 291.0}, "policy_reward_mean": {"ppo": 217.895}, "custom_metrics": {"sparse_reward_mean": 150.0, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 135.79, "shaped_reward_min": 62, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.42, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.29, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.49, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.24, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.9, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.69, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.97, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.92, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 12.14, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.18, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.1, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 3.06, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.9, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.33, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.27, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.21, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.05, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.99, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.13, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.17, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.14, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.18, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.14, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.18, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [461.0, 404.0, 464.0, 393.0, 467.0, 459.0, 419.0, 142.0, 419.0, 456.0, 481.0, 507.0, 407.0, 470.0, 513.0, 467.0, 459.0, 419.0, 473.0, 462.0, 316.0, 294.0, 416.0, 513.0, 465.0, 465.0, 424.0, 401.0, 470.0, 419.0, 367.0, 341.0, 467.0, 459.0, 393.0, 413.0, 516.0, 287.0, 462.0, 456.0, 449.0, 402.0, 272.0, 452.0, 407.0, 430.0, 450.0, 464.0, 465.0, 461.0, 407.0, 458.0, 404.0, 456.0, 310.0, 470.0, 352.0, 458.0, 376.0, 444.0, 501.0, 495.0, 458.0, 464.0, 447.0, 473.0, 516.0, 384.0, 452.0, 461.0, 501.0, 241.0, 462.0, 459.0, 519.0, 413.0, 405.0, 302.0, 465.0, 519.0, 498.0, 570.0, 465.0, 444.0, 398.0, 479.0, 419.0, 516.0, 419.0, 522.0, 419.0, 516.0, 470.0, 470.0, 519.0, 418.0, 504.0, 444.0, 355.0, 324.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [227.0, 234.0, 210.0, 194.0, 236.0, 228.0, 186.0, 207.0, 242.0, 225.0, 226.0, 233.0, 218.0, 201.0, 70.0, 72.0, 202.0, 217.0, 222.0, 234.0, 234.0, 247.0, 235.0, 272.0, 197.0, 210.0, 228.0, 242.0, 245.0, 268.0, 247.0, 220.0, 231.0, 228.0, 211.0, 208.0, 230.0, 243.0, 224.0, 238.0, 162.0, 154.0, 139.0, 155.0, 186.0, 230.0, 265.0, 248.0, 236.0, 229.0, 236.0, 229.0, 223.0, 201.0, 191.0, 210.0, 237.0, 233.0, 207.0, 212.0, 187.0, 180.0, 175.0, 166.0, 238.0, 229.0, 222.0, 237.0, 207.0, 186.0, 212.0, 201.0, 256.0, 260.0, 150.0, 137.0, 229.0, 233.0, 230.0, 226.0, 203.0, 246.0, 189.0, 213.0, 146.0, 126.0, 210.0, 242.0, 200.0, 207.0, 221.0, 209.0, 225.0, 225.0, 224.0, 240.0, 231.0, 234.0, 242.0, 219.0, 208.0, 199.0, 220.0, 238.0, 193.0, 211.0, 231.0, 225.0, 140.0, 170.0, 230.0, 240.0, 178.0, 174.0, 224.0, 234.0, 180.0, 196.0, 228.0, 216.0, 252.0, 249.0, 239.0, 256.0, 223.0, 235.0, 242.0, 222.0, 225.0, 222.0, 226.0, 247.0, 249.0, 267.0, 197.0, 187.0, 230.0, 222.0, 235.0, 226.0, 254.0, 247.0, 114.0, 127.0, 234.0, 228.0, 215.0, 244.0, 256.0, 263.0, 208.0, 205.0, 208.0, 197.0, 151.0, 151.0, 239.0, 226.0, 254.0, 265.0, 248.0, 250.0, 279.0, 291.0, 226.0, 239.0, 214.0, 230.0, 200.0, 198.0, 239.0, 240.0, 201.0, 218.0, 250.0, 266.0, 212.0, 207.0, 265.0, 257.0, 208.0, 211.0, 264.0, 252.0, 234.0, 236.0, 238.0, 232.0, 262.0, 257.0, 204.0, 214.0, 240.0, 264.0, 238.0, 206.0, 185.0, 170.0, 157.0, 167.0]}, "sampler_perf": {"mean_env_wait_ms": 2.191700194557133, "mean_processing_ms": 0.5053220551327391, "mean_inference_ms": 2.7357250964199444}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2088000, "num_steps_sampled": 1113600, "sample_time_ms": 20142.29, "load_time_ms": 35.752, "grad_time_ms": 8101.845, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 2.5849394527468104e-27, "cur_lr": 0.0010000000474974513, "total_loss": 0.0009223390952683985, "policy_loss": -0.005599660333245993, "vf_loss": 71.4854736328125, "vf_explained_var": 0.7612900733947754, "kl": 0.002260145964100957, "entropy": 1.2530813217163086, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1113600, "episodes_total": 2784, "training_iteration": 87, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-28-07", "timestamp": 1660249687, "time_this_iter_s": 30.60737180709839, "time_total_s": 8107.181346416473, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8107.181346416473, "timesteps_since_restore": 1113600, "iterations_since_restore": 87, "perf": {"cpu_util_percent": 30.168181818181814, "ram_util_percent": 58.34545454545453}}
+{"episode_reward_max": 570.0, "episode_reward_min": 142.0, "episode_reward_mean": 443.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 70.0}, "policy_reward_max": {"ppo": 291.0}, "policy_reward_mean": {"ppo": 221.745}, "custom_metrics": {"sparse_reward_mean": 152.4, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 138.69, "shaped_reward_min": 62, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.81, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.03, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.9, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.16, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.91, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.58, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.94, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.86, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 12.54, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.09, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.22, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.33, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.42, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 3.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.46, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.38, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.13, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.23, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.97, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.12, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.54, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.09, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.54, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.09, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [302.0, 366.0, 313.0, 504.0, 522.0, 530.0, 473.0, 468.0, 447.0, 403.0, 522.0, 519.0, 461.0, 516.0, 401.0, 516.0, 459.0, 464.0, 467.0, 373.0, 458.0, 525.0, 513.0, 273.0, 415.0, 461.0, 467.0, 473.0, 522.0, 406.0, 522.0, 467.0, 447.0, 473.0, 516.0, 384.0, 452.0, 461.0, 501.0, 241.0, 462.0, 459.0, 519.0, 413.0, 405.0, 302.0, 465.0, 519.0, 498.0, 570.0, 465.0, 444.0, 398.0, 479.0, 419.0, 516.0, 419.0, 522.0, 419.0, 516.0, 470.0, 470.0, 519.0, 418.0, 504.0, 444.0, 355.0, 324.0, 461.0, 404.0, 464.0, 393.0, 467.0, 459.0, 419.0, 142.0, 419.0, 456.0, 481.0, 507.0, 407.0, 470.0, 513.0, 467.0, 459.0, 419.0, 473.0, 462.0, 316.0, 294.0, 416.0, 513.0, 465.0, 465.0, 424.0, 401.0, 470.0, 419.0, 367.0, 341.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [151.0, 151.0, 185.0, 181.0, 153.0, 160.0, 262.0, 242.0, 265.0, 257.0, 268.0, 262.0, 253.0, 220.0, 244.0, 224.0, 221.0, 226.0, 192.0, 211.0, 265.0, 257.0, 249.0, 270.0, 237.0, 224.0, 264.0, 252.0, 215.0, 186.0, 259.0, 257.0, 245.0, 214.0, 238.0, 226.0, 229.0, 238.0, 184.0, 189.0, 230.0, 228.0, 262.0, 263.0, 265.0, 248.0, 143.0, 130.0, 208.0, 207.0, 228.0, 233.0, 241.0, 226.0, 232.0, 241.0, 265.0, 257.0, 207.0, 199.0, 255.0, 267.0, 244.0, 223.0, 225.0, 222.0, 226.0, 247.0, 249.0, 267.0, 197.0, 187.0, 230.0, 222.0, 235.0, 226.0, 254.0, 247.0, 114.0, 127.0, 234.0, 228.0, 215.0, 244.0, 256.0, 263.0, 208.0, 205.0, 208.0, 197.0, 151.0, 151.0, 239.0, 226.0, 254.0, 265.0, 248.0, 250.0, 279.0, 291.0, 226.0, 239.0, 214.0, 230.0, 200.0, 198.0, 239.0, 240.0, 201.0, 218.0, 250.0, 266.0, 212.0, 207.0, 265.0, 257.0, 208.0, 211.0, 264.0, 252.0, 234.0, 236.0, 238.0, 232.0, 262.0, 257.0, 204.0, 214.0, 240.0, 264.0, 238.0, 206.0, 185.0, 170.0, 157.0, 167.0, 227.0, 234.0, 210.0, 194.0, 236.0, 228.0, 186.0, 207.0, 242.0, 225.0, 226.0, 233.0, 218.0, 201.0, 70.0, 72.0, 202.0, 217.0, 222.0, 234.0, 234.0, 247.0, 235.0, 272.0, 197.0, 210.0, 228.0, 242.0, 245.0, 268.0, 247.0, 220.0, 231.0, 228.0, 211.0, 208.0, 230.0, 243.0, 224.0, 238.0, 162.0, 154.0, 139.0, 155.0, 186.0, 230.0, 265.0, 248.0, 236.0, 229.0, 236.0, 229.0, 223.0, 201.0, 191.0, 210.0, 237.0, 233.0, 207.0, 212.0, 187.0, 180.0, 175.0, 166.0]}, "sampler_perf": {"mean_env_wait_ms": 2.1716872563286627, "mean_processing_ms": 0.5013361308454948, "mean_inference_ms": 2.7160497473266743}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2112000, "num_steps_sampled": 1126400, "sample_time_ms": 20200.821, "load_time_ms": 36.161, "grad_time_ms": 8143.064, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.2924697263734052e-27, "cur_lr": 0.0010000000474974513, "total_loss": 0.004522919189184904, "policy_loss": -0.0018036967376247048, "vf_loss": 69.45938110351562, "vf_explained_var": 0.7786126732826233, "kl": 0.001827276311814785, "entropy": 1.2386289834976196, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1126400, "episodes_total": 2816, "training_iteration": 88, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-28-39", "timestamp": 1660249719, "time_this_iter_s": 31.2521071434021, "time_total_s": 8138.4334535598755, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8138.4334535598755, "timesteps_since_restore": 1126400, "iterations_since_restore": 88, "perf": {"cpu_util_percent": 29.57045454545455, "ram_util_percent": 58.37954545454544}}
+{"episode_reward_max": 530.0, "episode_reward_min": 142.0, "episode_reward_mean": 443.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 70.0}, "policy_reward_max": {"ppo": 276.0}, "policy_reward_mean": {"ppo": 221.975}, "custom_metrics": {"sparse_reward_mean": 152.6, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 138.75, "shaped_reward_min": 62, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.54, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.22, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.68, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.28, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.74, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 0.81, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.97, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 12.52, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 13.12, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.55, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.11, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.22, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.48, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.46, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.08, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.28, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.87, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.12, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.17, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.52, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 13.12, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.52, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 13.12, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [473.0, 468.0, 447.0, 473.0, 522.0, 513.0, 470.0, 464.0, 352.0, 504.0, 476.0, 398.0, 358.0, 410.0, 519.0, 405.0, 465.0, 416.0, 406.0, 525.0, 393.0, 459.0, 456.0, 408.0, 465.0, 438.0, 476.0, 462.0, 478.0, 479.0, 507.0, 522.0, 504.0, 444.0, 355.0, 324.0, 461.0, 404.0, 464.0, 393.0, 467.0, 459.0, 419.0, 142.0, 419.0, 456.0, 481.0, 507.0, 407.0, 470.0, 513.0, 467.0, 459.0, 419.0, 473.0, 462.0, 316.0, 294.0, 416.0, 513.0, 465.0, 465.0, 424.0, 401.0, 470.0, 419.0, 367.0, 341.0, 302.0, 366.0, 313.0, 504.0, 522.0, 530.0, 473.0, 468.0, 447.0, 403.0, 522.0, 519.0, 461.0, 516.0, 401.0, 516.0, 459.0, 464.0, 467.0, 373.0, 458.0, 525.0, 513.0, 273.0, 415.0, 461.0, 467.0, 473.0, 522.0, 406.0, 522.0, 467.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [238.0, 235.0, 234.0, 234.0, 226.0, 221.0, 236.0, 237.0, 257.0, 265.0, 240.0, 273.0, 236.0, 234.0, 248.0, 216.0, 178.0, 174.0, 250.0, 254.0, 231.0, 245.0, 210.0, 188.0, 181.0, 177.0, 202.0, 208.0, 243.0, 276.0, 199.0, 206.0, 244.0, 221.0, 208.0, 208.0, 193.0, 213.0, 259.0, 266.0, 210.0, 183.0, 229.0, 230.0, 234.0, 222.0, 212.0, 196.0, 234.0, 231.0, 224.0, 214.0, 232.0, 244.0, 248.0, 214.0, 241.0, 237.0, 238.0, 241.0, 252.0, 255.0, 255.0, 267.0, 240.0, 264.0, 238.0, 206.0, 185.0, 170.0, 157.0, 167.0, 227.0, 234.0, 210.0, 194.0, 236.0, 228.0, 186.0, 207.0, 242.0, 225.0, 226.0, 233.0, 218.0, 201.0, 70.0, 72.0, 202.0, 217.0, 222.0, 234.0, 234.0, 247.0, 235.0, 272.0, 197.0, 210.0, 228.0, 242.0, 245.0, 268.0, 247.0, 220.0, 231.0, 228.0, 211.0, 208.0, 230.0, 243.0, 224.0, 238.0, 162.0, 154.0, 139.0, 155.0, 186.0, 230.0, 265.0, 248.0, 236.0, 229.0, 236.0, 229.0, 223.0, 201.0, 191.0, 210.0, 237.0, 233.0, 207.0, 212.0, 187.0, 180.0, 175.0, 166.0, 151.0, 151.0, 185.0, 181.0, 153.0, 160.0, 262.0, 242.0, 265.0, 257.0, 268.0, 262.0, 253.0, 220.0, 244.0, 224.0, 221.0, 226.0, 192.0, 211.0, 265.0, 257.0, 249.0, 270.0, 237.0, 224.0, 264.0, 252.0, 215.0, 186.0, 259.0, 257.0, 245.0, 214.0, 238.0, 226.0, 229.0, 238.0, 184.0, 189.0, 230.0, 228.0, 262.0, 263.0, 265.0, 248.0, 143.0, 130.0, 208.0, 207.0, 228.0, 233.0, 241.0, 226.0, 232.0, 241.0, 265.0, 257.0, 207.0, 199.0, 255.0, 267.0, 244.0, 223.0]}, "sampler_perf": {"mean_env_wait_ms": 2.1521722183648633, "mean_processing_ms": 0.4974515268120716, "mean_inference_ms": 2.697593198173253}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2136000, "num_steps_sampled": 1139200, "sample_time_ms": 20564.193, "load_time_ms": 36.199, "grad_time_ms": 8238.148, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 6.462348631867026e-28, "cur_lr": 0.0010000000474974513, "total_loss": 0.0003657890483736992, "policy_loss": -0.005678663495928049, "vf_loss": 66.65350341796875, "vf_explained_var": 0.7769116759300232, "kl": 0.0020363712683320045, "entropy": 1.2417923212051392, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1139200, "episodes_total": 2848, "training_iteration": 89, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-29-11", "timestamp": 1660249751, "time_this_iter_s": 32.67408323287964, "time_total_s": 8171.107536792755, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8171.107536792755, "timesteps_since_restore": 1139200, "iterations_since_restore": 89, "perf": {"cpu_util_percent": 27.073913043478264, "ram_util_percent": 58.2586956521739}}
+{"episode_reward_max": 570.0, "episode_reward_min": 273.0, "episode_reward_mean": 452.2, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 130.0}, "policy_reward_max": {"ppo": 287.0}, "policy_reward_mean": {"ppo": 226.1}, "custom_metrics": {"sparse_reward_mean": 156.0, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 140.2, "shaped_reward_min": 73, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.85, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.29, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.98, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 14.39, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.79, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 0.82, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.98, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 12.77, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 13.33, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.71, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.36, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.61, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.85, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.19, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.47, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.72, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.91, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 4.53, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.73, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.15, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.12, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 12.77, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 13.33, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.77, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 13.33, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [398.0, 419.0, 462.0, 419.0, 339.0, 519.0, 467.0, 522.0, 450.0, 449.0, 444.0, 495.0, 453.0, 438.0, 462.0, 476.0, 456.0, 413.0, 403.0, 430.0, 459.0, 476.0, 476.0, 458.0, 347.0, 424.0, 476.0, 570.0, 401.0, 516.0, 455.0, 516.0, 470.0, 419.0, 367.0, 341.0, 302.0, 366.0, 313.0, 504.0, 522.0, 530.0, 473.0, 468.0, 447.0, 403.0, 522.0, 519.0, 461.0, 516.0, 401.0, 516.0, 459.0, 464.0, 467.0, 373.0, 458.0, 525.0, 513.0, 273.0, 415.0, 461.0, 467.0, 473.0, 522.0, 406.0, 522.0, 467.0, 473.0, 468.0, 447.0, 473.0, 522.0, 513.0, 470.0, 464.0, 352.0, 504.0, 476.0, 398.0, 358.0, 410.0, 519.0, 405.0, 465.0, 416.0, 406.0, 525.0, 393.0, 459.0, 456.0, 408.0, 465.0, 438.0, 476.0, 462.0, 478.0, 479.0, 507.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [207.0, 191.0, 217.0, 202.0, 231.0, 231.0, 211.0, 208.0, 176.0, 163.0, 260.0, 259.0, 234.0, 233.0, 262.0, 260.0, 238.0, 212.0, 230.0, 219.0, 232.0, 212.0, 261.0, 234.0, 219.0, 234.0, 226.0, 212.0, 237.0, 225.0, 247.0, 229.0, 236.0, 220.0, 200.0, 213.0, 200.0, 203.0, 202.0, 228.0, 225.0, 234.0, 236.0, 240.0, 228.0, 248.0, 242.0, 216.0, 177.0, 170.0, 217.0, 207.0, 231.0, 245.0, 287.0, 283.0, 201.0, 200.0, 264.0, 252.0, 233.0, 222.0, 260.0, 256.0, 237.0, 233.0, 207.0, 212.0, 187.0, 180.0, 175.0, 166.0, 151.0, 151.0, 185.0, 181.0, 153.0, 160.0, 262.0, 242.0, 265.0, 257.0, 268.0, 262.0, 253.0, 220.0, 244.0, 224.0, 221.0, 226.0, 192.0, 211.0, 265.0, 257.0, 249.0, 270.0, 237.0, 224.0, 264.0, 252.0, 215.0, 186.0, 259.0, 257.0, 245.0, 214.0, 238.0, 226.0, 229.0, 238.0, 184.0, 189.0, 230.0, 228.0, 262.0, 263.0, 265.0, 248.0, 143.0, 130.0, 208.0, 207.0, 228.0, 233.0, 241.0, 226.0, 232.0, 241.0, 265.0, 257.0, 207.0, 199.0, 255.0, 267.0, 244.0, 223.0, 238.0, 235.0, 234.0, 234.0, 226.0, 221.0, 236.0, 237.0, 257.0, 265.0, 240.0, 273.0, 236.0, 234.0, 248.0, 216.0, 178.0, 174.0, 250.0, 254.0, 231.0, 245.0, 210.0, 188.0, 181.0, 177.0, 202.0, 208.0, 243.0, 276.0, 199.0, 206.0, 244.0, 221.0, 208.0, 208.0, 193.0, 213.0, 259.0, 266.0, 210.0, 183.0, 229.0, 230.0, 234.0, 222.0, 212.0, 196.0, 234.0, 231.0, 224.0, 214.0, 232.0, 244.0, 248.0, 214.0, 241.0, 237.0, 238.0, 241.0, 252.0, 255.0, 255.0, 267.0]}, "sampler_perf": {"mean_env_wait_ms": 2.133109953902478, "mean_processing_ms": 0.4936599987837839, "mean_inference_ms": 2.6799343629485026}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2160000, "num_steps_sampled": 1152000, "sample_time_ms": 20913.282, "load_time_ms": 36.475, "grad_time_ms": 8548.769, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 3.231174315933513e-28, "cur_lr": 0.0010000000474974513, "total_loss": 0.007687473203986883, "policy_loss": 0.0007542042876593769, "vf_loss": 75.50032806396484, "vf_explained_var": 0.7544476985931396, "kl": 0.0026988324243575335, "entropy": 1.2335320711135864, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1152000, "episodes_total": 2880, "training_iteration": 90, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-29-44", "timestamp": 1660249784, "time_this_iter_s": 32.394510984420776, "time_total_s": 8203.502047777176, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8203.502047777176, "timesteps_since_restore": 1152000, "iterations_since_restore": 90, "perf": {"cpu_util_percent": 31.20434782608696, "ram_util_percent": 58.23260869565217}}
+{"episode_reward_max": 570.0, "episode_reward_min": 339.0, "episode_reward_mean": 459.06, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 163.0}, "policy_reward_max": {"ppo": 287.0}, "policy_reward_mean": {"ppo": 229.53}, "custom_metrics": {"sparse_reward_mean": 158.4, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 142.26, "shaped_reward_min": 99, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.9, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 15.54, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.05, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 14.67, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 0.7, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.94, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 12.97, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 13.54, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.78, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.99, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.98, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.2, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.3, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.65, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.07, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.46, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.84, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.13, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.17, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 12.97, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 13.54, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.97, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 13.54, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [510.0, 411.0, 418.0, 522.0, 465.0, 519.0, 465.0, 408.0, 473.0, 525.0, 522.0, 481.0, 447.0, 527.0, 476.0, 527.0, 449.0, 396.0, 453.0, 453.0, 470.0, 395.0, 481.0, 516.0, 516.0, 449.0, 516.0, 373.0, 416.0, 407.0, 392.0, 516.0, 522.0, 406.0, 522.0, 467.0, 473.0, 468.0, 447.0, 473.0, 522.0, 513.0, 470.0, 464.0, 352.0, 504.0, 476.0, 398.0, 358.0, 410.0, 519.0, 405.0, 465.0, 416.0, 406.0, 525.0, 393.0, 459.0, 456.0, 408.0, 465.0, 438.0, 476.0, 462.0, 478.0, 479.0, 507.0, 522.0, 398.0, 419.0, 462.0, 419.0, 339.0, 519.0, 467.0, 522.0, 450.0, 449.0, 444.0, 495.0, 453.0, 438.0, 462.0, 476.0, 456.0, 413.0, 403.0, 430.0, 459.0, 476.0, 476.0, 458.0, 347.0, 424.0, 476.0, 570.0, 401.0, 516.0, 455.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 245.0, 210.0, 201.0, 218.0, 200.0, 267.0, 255.0, 240.0, 225.0, 275.0, 244.0, 218.0, 247.0, 202.0, 206.0, 252.0, 221.0, 257.0, 268.0, 262.0, 260.0, 237.0, 244.0, 217.0, 230.0, 265.0, 262.0, 249.0, 227.0, 259.0, 268.0, 227.0, 222.0, 192.0, 204.0, 233.0, 220.0, 222.0, 231.0, 225.0, 245.0, 195.0, 200.0, 239.0, 242.0, 254.0, 262.0, 259.0, 257.0, 237.0, 212.0, 267.0, 249.0, 184.0, 189.0, 211.0, 205.0, 198.0, 209.0, 202.0, 190.0, 249.0, 267.0, 265.0, 257.0, 207.0, 199.0, 255.0, 267.0, 244.0, 223.0, 238.0, 235.0, 234.0, 234.0, 226.0, 221.0, 236.0, 237.0, 257.0, 265.0, 240.0, 273.0, 236.0, 234.0, 248.0, 216.0, 178.0, 174.0, 250.0, 254.0, 231.0, 245.0, 210.0, 188.0, 181.0, 177.0, 202.0, 208.0, 243.0, 276.0, 199.0, 206.0, 244.0, 221.0, 208.0, 208.0, 193.0, 213.0, 259.0, 266.0, 210.0, 183.0, 229.0, 230.0, 234.0, 222.0, 212.0, 196.0, 234.0, 231.0, 224.0, 214.0, 232.0, 244.0, 248.0, 214.0, 241.0, 237.0, 238.0, 241.0, 252.0, 255.0, 255.0, 267.0, 207.0, 191.0, 217.0, 202.0, 231.0, 231.0, 211.0, 208.0, 176.0, 163.0, 260.0, 259.0, 234.0, 233.0, 262.0, 260.0, 238.0, 212.0, 230.0, 219.0, 232.0, 212.0, 261.0, 234.0, 219.0, 234.0, 226.0, 212.0, 237.0, 225.0, 247.0, 229.0, 236.0, 220.0, 200.0, 213.0, 200.0, 203.0, 202.0, 228.0, 225.0, 234.0, 236.0, 240.0, 228.0, 248.0, 242.0, 216.0, 177.0, 170.0, 217.0, 207.0, 231.0, 245.0, 287.0, 283.0, 201.0, 200.0, 264.0, 252.0, 233.0, 222.0, 260.0, 256.0]}, "sampler_perf": {"mean_env_wait_ms": 2.114456633491147, "mean_processing_ms": 0.4899465714962644, "mean_inference_ms": 2.662192402592387}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2184000, "num_steps_sampled": 1164800, "sample_time_ms": 20895.618, "load_time_ms": 36.529, "grad_time_ms": 8787.695, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.6155871579667565e-28, "cur_lr": 0.0010000000474974513, "total_loss": 0.003199361963197589, "policy_loss": -0.003974525723606348, "vf_loss": 77.90489959716797, "vf_explained_var": 0.7496511936187744, "kl": 0.001869131694547832, "entropy": 1.2332016229629517, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1164800, "episodes_total": 2912, "training_iteration": 91, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-30-12", "timestamp": 1660249812, "time_this_iter_s": 28.652703046798706, "time_total_s": 8232.154750823975, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8232.154750823975, "timesteps_since_restore": 1164800, "iterations_since_restore": 91, "perf": {"cpu_util_percent": 34.4075, "ram_util_percent": 58.25750000000001}}
+{"episode_reward_max": 570.0, "episode_reward_min": 296.0, "episode_reward_mean": 465.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 133.0}, "policy_reward_max": {"ppo": 287.0}, "policy_reward_mean": {"ppo": 232.56}, "custom_metrics": {"sparse_reward_mean": 160.6, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 143.92, "shaped_reward_min": 96, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.61, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.05, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.75, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.24, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.71, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.81, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.71, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 14.12, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.98, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.72, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.99, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.21, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.31, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.77, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.1, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.5, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.86, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.16, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.18, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 12.71, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 14.12, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.71, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 14.12, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [513.0, 453.0, 516.0, 533.0, 296.0, 453.0, 501.0, 422.0, 465.0, 395.0, 530.0, 473.0, 519.0, 458.0, 404.0, 487.0, 449.0, 516.0, 516.0, 510.0, 504.0, 516.0, 516.0, 473.0, 516.0, 519.0, 470.0, 387.0, 522.0, 453.0, 421.0, 438.0, 478.0, 479.0, 507.0, 522.0, 398.0, 419.0, 462.0, 419.0, 339.0, 519.0, 467.0, 522.0, 450.0, 449.0, 444.0, 495.0, 453.0, 438.0, 462.0, 476.0, 456.0, 413.0, 403.0, 430.0, 459.0, 476.0, 476.0, 458.0, 347.0, 424.0, 476.0, 570.0, 401.0, 516.0, 455.0, 516.0, 510.0, 411.0, 418.0, 522.0, 465.0, 519.0, 465.0, 408.0, 473.0, 525.0, 522.0, 481.0, 447.0, 527.0, 476.0, 527.0, 449.0, 396.0, 453.0, 453.0, 470.0, 395.0, 481.0, 516.0, 516.0, 449.0, 516.0, 373.0, 416.0, 407.0, 392.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 256.0, 218.0, 235.0, 254.0, 262.0, 273.0, 260.0, 133.0, 163.0, 219.0, 234.0, 235.0, 266.0, 207.0, 215.0, 239.0, 226.0, 195.0, 200.0, 269.0, 261.0, 230.0, 243.0, 263.0, 256.0, 229.0, 229.0, 202.0, 202.0, 247.0, 240.0, 225.0, 224.0, 248.0, 268.0, 250.0, 266.0, 252.0, 258.0, 249.0, 255.0, 272.0, 244.0, 254.0, 262.0, 232.0, 241.0, 260.0, 256.0, 250.0, 269.0, 233.0, 237.0, 189.0, 198.0, 265.0, 257.0, 233.0, 220.0, 209.0, 212.0, 218.0, 220.0, 241.0, 237.0, 238.0, 241.0, 252.0, 255.0, 255.0, 267.0, 207.0, 191.0, 217.0, 202.0, 231.0, 231.0, 211.0, 208.0, 176.0, 163.0, 260.0, 259.0, 234.0, 233.0, 262.0, 260.0, 238.0, 212.0, 230.0, 219.0, 232.0, 212.0, 261.0, 234.0, 219.0, 234.0, 226.0, 212.0, 237.0, 225.0, 247.0, 229.0, 236.0, 220.0, 200.0, 213.0, 200.0, 203.0, 202.0, 228.0, 225.0, 234.0, 236.0, 240.0, 228.0, 248.0, 242.0, 216.0, 177.0, 170.0, 217.0, 207.0, 231.0, 245.0, 287.0, 283.0, 201.0, 200.0, 264.0, 252.0, 233.0, 222.0, 260.0, 256.0, 265.0, 245.0, 210.0, 201.0, 218.0, 200.0, 267.0, 255.0, 240.0, 225.0, 275.0, 244.0, 218.0, 247.0, 202.0, 206.0, 252.0, 221.0, 257.0, 268.0, 262.0, 260.0, 237.0, 244.0, 217.0, 230.0, 265.0, 262.0, 249.0, 227.0, 259.0, 268.0, 227.0, 222.0, 192.0, 204.0, 233.0, 220.0, 222.0, 231.0, 225.0, 245.0, 195.0, 200.0, 239.0, 242.0, 254.0, 262.0, 259.0, 257.0, 237.0, 212.0, 267.0, 249.0, 184.0, 189.0, 211.0, 205.0, 198.0, 209.0, 202.0, 190.0, 249.0, 267.0]}, "sampler_perf": {"mean_env_wait_ms": 2.0961781849266816, "mean_processing_ms": 0.48630585862921505, "mean_inference_ms": 2.6442580305818364}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2208000, "num_steps_sampled": 1177600, "sample_time_ms": 21020.172, "load_time_ms": 36.643, "grad_time_ms": 9041.599, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 8.077935789833782e-29, "cur_lr": 0.0010000000474974513, "total_loss": 0.009082547388970852, "policy_loss": 0.0023276470601558685, "vf_loss": 73.66332244873047, "vf_explained_var": 0.7556483149528503, "kl": 0.003871823428198695, "entropy": 1.2228628396987915, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1177600, "episodes_total": 2944, "training_iteration": 92, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-30-43", "timestamp": 1660249843, "time_this_iter_s": 30.34039807319641, "time_total_s": 8262.495148897171, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8262.495148897171, "timesteps_since_restore": 1177600, "iterations_since_restore": 92, "perf": {"cpu_util_percent": 30.85116279069767, "ram_util_percent": 58.283720930232555}}
+{"episode_reward_max": 573.0, "episode_reward_min": 230.0, "episode_reward_mean": 467.31, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 106.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 233.655}, "custom_metrics": {"sparse_reward_mean": 161.6, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 144.11, "shaped_reward_min": 70, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.51, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 15.93, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.74, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.27, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.56, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.45, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.65, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.68, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.66, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 14.2, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.04, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 0.96, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.35, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.62, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.23, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.37, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.05, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.12, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 12.66, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 14.2, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.66, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 14.2, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [444.0, 410.0, 481.0, 411.0, 456.0, 461.0, 230.0, 370.0, 398.0, 473.0, 513.0, 507.0, 461.0, 522.0, 573.0, 495.0, 516.0, 390.0, 501.0, 519.0, 525.0, 524.0, 479.0, 447.0, 467.0, 464.0, 450.0, 392.0, 522.0, 416.0, 478.0, 510.0, 401.0, 516.0, 455.0, 516.0, 510.0, 411.0, 418.0, 522.0, 465.0, 519.0, 465.0, 408.0, 473.0, 525.0, 522.0, 481.0, 447.0, 527.0, 476.0, 527.0, 449.0, 396.0, 453.0, 453.0, 470.0, 395.0, 481.0, 516.0, 516.0, 449.0, 516.0, 373.0, 416.0, 407.0, 392.0, 516.0, 513.0, 453.0, 516.0, 533.0, 296.0, 453.0, 501.0, 422.0, 465.0, 395.0, 530.0, 473.0, 519.0, 458.0, 404.0, 487.0, 449.0, 516.0, 516.0, 510.0, 504.0, 516.0, 516.0, 473.0, 516.0, 519.0, 470.0, 387.0, 522.0, 453.0, 421.0, 438.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [224.0, 220.0, 205.0, 205.0, 229.0, 252.0, 210.0, 201.0, 236.0, 220.0, 223.0, 238.0, 124.0, 106.0, 174.0, 196.0, 191.0, 207.0, 241.0, 232.0, 260.0, 253.0, 268.0, 239.0, 227.0, 234.0, 262.0, 260.0, 293.0, 280.0, 250.0, 245.0, 249.0, 267.0, 195.0, 195.0, 255.0, 246.0, 261.0, 258.0, 254.0, 271.0, 262.0, 262.0, 247.0, 232.0, 201.0, 246.0, 230.0, 237.0, 223.0, 241.0, 221.0, 229.0, 197.0, 195.0, 254.0, 268.0, 211.0, 205.0, 239.0, 239.0, 259.0, 251.0, 201.0, 200.0, 264.0, 252.0, 233.0, 222.0, 260.0, 256.0, 265.0, 245.0, 210.0, 201.0, 218.0, 200.0, 267.0, 255.0, 240.0, 225.0, 275.0, 244.0, 218.0, 247.0, 202.0, 206.0, 252.0, 221.0, 257.0, 268.0, 262.0, 260.0, 237.0, 244.0, 217.0, 230.0, 265.0, 262.0, 249.0, 227.0, 259.0, 268.0, 227.0, 222.0, 192.0, 204.0, 233.0, 220.0, 222.0, 231.0, 225.0, 245.0, 195.0, 200.0, 239.0, 242.0, 254.0, 262.0, 259.0, 257.0, 237.0, 212.0, 267.0, 249.0, 184.0, 189.0, 211.0, 205.0, 198.0, 209.0, 202.0, 190.0, 249.0, 267.0, 257.0, 256.0, 218.0, 235.0, 254.0, 262.0, 273.0, 260.0, 133.0, 163.0, 219.0, 234.0, 235.0, 266.0, 207.0, 215.0, 239.0, 226.0, 195.0, 200.0, 269.0, 261.0, 230.0, 243.0, 263.0, 256.0, 229.0, 229.0, 202.0, 202.0, 247.0, 240.0, 225.0, 224.0, 248.0, 268.0, 250.0, 266.0, 252.0, 258.0, 249.0, 255.0, 272.0, 244.0, 254.0, 262.0, 232.0, 241.0, 260.0, 256.0, 250.0, 269.0, 233.0, 237.0, 189.0, 198.0, 265.0, 257.0, 233.0, 220.0, 209.0, 212.0, 218.0, 220.0]}, "sampler_perf": {"mean_env_wait_ms": 2.0782697759005244, "mean_processing_ms": 0.48273945160484155, "mean_inference_ms": 2.6262237371699033}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2232000, "num_steps_sampled": 1190400, "sample_time_ms": 21149.002, "load_time_ms": 36.704, "grad_time_ms": 9227.888, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 4.038967894916891e-29, "cur_lr": 0.0010000000474974513, "total_loss": 0.0010848678648471832, "policy_loss": -0.005190685391426086, "vf_loss": 68.93277740478516, "vf_explained_var": 0.7608636021614075, "kl": 0.0020486123394221067, "entropy": 1.2354419231414795, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1190400, "episodes_total": 2976, "training_iteration": 93, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-31-14", "timestamp": 1660249874, "time_this_iter_s": 30.926449298858643, "time_total_s": 8293.42159819603, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8293.42159819603, "timesteps_since_restore": 1190400, "iterations_since_restore": 93, "perf": {"cpu_util_percent": 31.343181818181815, "ram_util_percent": 58.222727272727276}}
+{"episode_reward_max": 573.0, "episode_reward_min": 63.0, "episode_reward_mean": 462.26, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 231.13}, "custom_metrics": {"sparse_reward_mean": 160.2, "sparse_reward_min": 20, "sparse_reward_max": 200, "shaped_reward_mean": 141.86, "shaped_reward_min": 23, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.41, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.74, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.57, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.05, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.71, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.32, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.82, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.39, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 14.12, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.43, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.08, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.39, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.41, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.52, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.31, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.22, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.11, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.19, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.17, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 12.39, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 14.12, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.39, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 14.12, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [482.0, 525.0, 507.0, 516.0, 478.0, 519.0, 516.0, 410.0, 513.0, 295.0, 401.0, 462.0, 501.0, 519.0, 63.0, 456.0, 381.0, 510.0, 464.0, 461.0, 473.0, 407.0, 570.0, 444.0, 444.0, 384.0, 467.0, 428.0, 456.0, 462.0, 513.0, 519.0, 416.0, 407.0, 392.0, 516.0, 513.0, 453.0, 516.0, 533.0, 296.0, 453.0, 501.0, 422.0, 465.0, 395.0, 530.0, 473.0, 519.0, 458.0, 404.0, 487.0, 449.0, 516.0, 516.0, 510.0, 504.0, 516.0, 516.0, 473.0, 516.0, 519.0, 470.0, 387.0, 522.0, 453.0, 421.0, 438.0, 444.0, 410.0, 481.0, 411.0, 456.0, 461.0, 230.0, 370.0, 398.0, 473.0, 513.0, 507.0, 461.0, 522.0, 573.0, 495.0, 516.0, 390.0, 501.0, 519.0, 525.0, 524.0, 479.0, 447.0, 467.0, 464.0, 450.0, 392.0, 522.0, 416.0, 478.0, 510.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [250.0, 232.0, 259.0, 266.0, 244.0, 263.0, 261.0, 255.0, 253.0, 225.0, 259.0, 260.0, 251.0, 265.0, 210.0, 200.0, 248.0, 265.0, 140.0, 155.0, 194.0, 207.0, 222.0, 240.0, 252.0, 249.0, 259.0, 260.0, 29.0, 34.0, 206.0, 250.0, 184.0, 197.0, 256.0, 254.0, 239.0, 225.0, 231.0, 230.0, 225.0, 248.0, 204.0, 203.0, 288.0, 282.0, 215.0, 229.0, 226.0, 218.0, 174.0, 210.0, 231.0, 236.0, 222.0, 206.0, 235.0, 221.0, 218.0, 244.0, 251.0, 262.0, 271.0, 248.0, 211.0, 205.0, 198.0, 209.0, 202.0, 190.0, 249.0, 267.0, 257.0, 256.0, 218.0, 235.0, 254.0, 262.0, 273.0, 260.0, 133.0, 163.0, 219.0, 234.0, 235.0, 266.0, 207.0, 215.0, 239.0, 226.0, 195.0, 200.0, 269.0, 261.0, 230.0, 243.0, 263.0, 256.0, 229.0, 229.0, 202.0, 202.0, 247.0, 240.0, 225.0, 224.0, 248.0, 268.0, 250.0, 266.0, 252.0, 258.0, 249.0, 255.0, 272.0, 244.0, 254.0, 262.0, 232.0, 241.0, 260.0, 256.0, 250.0, 269.0, 233.0, 237.0, 189.0, 198.0, 265.0, 257.0, 233.0, 220.0, 209.0, 212.0, 218.0, 220.0, 224.0, 220.0, 205.0, 205.0, 229.0, 252.0, 210.0, 201.0, 236.0, 220.0, 223.0, 238.0, 124.0, 106.0, 174.0, 196.0, 191.0, 207.0, 241.0, 232.0, 260.0, 253.0, 268.0, 239.0, 227.0, 234.0, 262.0, 260.0, 293.0, 280.0, 250.0, 245.0, 249.0, 267.0, 195.0, 195.0, 255.0, 246.0, 261.0, 258.0, 254.0, 271.0, 262.0, 262.0, 247.0, 232.0, 201.0, 246.0, 230.0, 237.0, 223.0, 241.0, 221.0, 229.0, 197.0, 195.0, 254.0, 268.0, 211.0, 205.0, 239.0, 239.0, 259.0, 251.0]}, "sampler_perf": {"mean_env_wait_ms": 2.060765834369586, "mean_processing_ms": 0.47925634757705055, "mean_inference_ms": 2.608906134199901}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2256000, "num_steps_sampled": 1203200, "sample_time_ms": 21186.356, "load_time_ms": 36.604, "grad_time_ms": 9318.639, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 2.0194839474584456e-29, "cur_lr": 0.0010000000474974513, "total_loss": 0.00026301448815502226, "policy_loss": -0.005903394427150488, "vf_loss": 67.8399658203125, "vf_explained_var": 0.7958834171295166, "kl": 0.0017271721735596657, "entropy": 1.2351765632629395, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1203200, "episodes_total": 3008, "training_iteration": 94, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-31-44", "timestamp": 1660249904, "time_this_iter_s": 30.80340886116028, "time_total_s": 8324.22500705719, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8324.22500705719, "timesteps_since_restore": 1203200, "iterations_since_restore": 94, "perf": {"cpu_util_percent": 33.47727272727272, "ram_util_percent": 58.21363636363637}}
+{"episode_reward_max": 573.0, "episode_reward_min": 63.0, "episode_reward_mean": 461.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 230.965}, "custom_metrics": {"sparse_reward_mean": 160.2, "sparse_reward_min": 20, "sparse_reward_max": 200, "shaped_reward_mean": 141.53, "shaped_reward_min": 23, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.54, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.69, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.8, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 14.97, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.8, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.82, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.44, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 14.16, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.46, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.36, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.4, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.56, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.12, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.31, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.02, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.17, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.44, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 14.16, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.44, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 14.16, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [441.0, 453.0, 479.0, 405.0, 504.0, 456.0, 525.0, 519.0, 390.0, 379.0, 465.0, 522.0, 530.0, 359.0, 473.0, 453.0, 524.0, 441.0, 407.0, 525.0, 530.0, 453.0, 464.0, 525.0, 422.0, 492.0, 465.0, 398.0, 519.0, 513.0, 464.0, 513.0, 522.0, 453.0, 421.0, 438.0, 444.0, 410.0, 481.0, 411.0, 456.0, 461.0, 230.0, 370.0, 398.0, 473.0, 513.0, 507.0, 461.0, 522.0, 573.0, 495.0, 516.0, 390.0, 501.0, 519.0, 525.0, 524.0, 479.0, 447.0, 467.0, 464.0, 450.0, 392.0, 522.0, 416.0, 478.0, 510.0, 482.0, 525.0, 507.0, 516.0, 478.0, 519.0, 516.0, 410.0, 513.0, 295.0, 401.0, 462.0, 501.0, 519.0, 63.0, 456.0, 381.0, 510.0, 464.0, 461.0, 473.0, 407.0, 570.0, 444.0, 444.0, 384.0, 467.0, 428.0, 456.0, 462.0, 513.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [213.0, 228.0, 227.0, 226.0, 246.0, 233.0, 199.0, 206.0, 253.0, 251.0, 230.0, 226.0, 265.0, 260.0, 258.0, 261.0, 193.0, 197.0, 197.0, 182.0, 228.0, 237.0, 249.0, 273.0, 264.0, 266.0, 183.0, 176.0, 242.0, 231.0, 205.0, 248.0, 262.0, 262.0, 234.0, 207.0, 192.0, 215.0, 264.0, 261.0, 270.0, 260.0, 232.0, 221.0, 236.0, 228.0, 266.0, 259.0, 195.0, 227.0, 257.0, 235.0, 230.0, 235.0, 212.0, 186.0, 264.0, 255.0, 250.0, 263.0, 229.0, 235.0, 253.0, 260.0, 265.0, 257.0, 233.0, 220.0, 209.0, 212.0, 218.0, 220.0, 224.0, 220.0, 205.0, 205.0, 229.0, 252.0, 210.0, 201.0, 236.0, 220.0, 223.0, 238.0, 124.0, 106.0, 174.0, 196.0, 191.0, 207.0, 241.0, 232.0, 260.0, 253.0, 268.0, 239.0, 227.0, 234.0, 262.0, 260.0, 293.0, 280.0, 250.0, 245.0, 249.0, 267.0, 195.0, 195.0, 255.0, 246.0, 261.0, 258.0, 254.0, 271.0, 262.0, 262.0, 247.0, 232.0, 201.0, 246.0, 230.0, 237.0, 223.0, 241.0, 221.0, 229.0, 197.0, 195.0, 254.0, 268.0, 211.0, 205.0, 239.0, 239.0, 259.0, 251.0, 250.0, 232.0, 259.0, 266.0, 244.0, 263.0, 261.0, 255.0, 253.0, 225.0, 259.0, 260.0, 251.0, 265.0, 210.0, 200.0, 248.0, 265.0, 140.0, 155.0, 194.0, 207.0, 222.0, 240.0, 252.0, 249.0, 259.0, 260.0, 29.0, 34.0, 206.0, 250.0, 184.0, 197.0, 256.0, 254.0, 239.0, 225.0, 231.0, 230.0, 225.0, 248.0, 204.0, 203.0, 288.0, 282.0, 215.0, 229.0, 226.0, 218.0, 174.0, 210.0, 231.0, 236.0, 222.0, 206.0, 235.0, 221.0, 218.0, 244.0, 251.0, 262.0, 271.0, 248.0]}, "sampler_perf": {"mean_env_wait_ms": 2.043636062637732, "mean_processing_ms": 0.47584888992539853, "mean_inference_ms": 2.592145322205639}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2280000, "num_steps_sampled": 1216000, "sample_time_ms": 21306.082, "load_time_ms": 36.597, "grad_time_ms": 9453.053, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.0097419737292228e-29, "cur_lr": 0.0010000000474974513, "total_loss": 0.001733560231514275, "policy_loss": -0.004548916593194008, "vf_loss": 68.90572357177734, "vf_explained_var": 0.7648184895515442, "kl": 0.0019422214245423675, "entropy": 1.216185212135315, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1216000, "episodes_total": 3040, "training_iteration": 95, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-32-16", "timestamp": 1660249936, "time_this_iter_s": 31.733500242233276, "time_total_s": 8355.958507299423, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8355.958507299423, "timesteps_since_restore": 1216000, "iterations_since_restore": 95, "perf": {"cpu_util_percent": 30.170454545454547, "ram_util_percent": 58.22272727272727}}
+{"episode_reward_max": 576.0, "episode_reward_min": 63.0, "episode_reward_mean": 464.77, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 288.0}, "policy_reward_mean": {"ppo": 232.385}, "custom_metrics": {"sparse_reward_mean": 161.0, "sparse_reward_min": 20, "sparse_reward_max": 200, "shaped_reward_mean": 142.77, "shaped_reward_min": 23, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.82, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.62, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.01, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 14.82, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.84, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.65, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 12.83, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 13.99, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.39, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.41, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.11, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.32, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.34, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.54, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.18, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.32, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.09, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.83, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 13.99, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.83, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 13.99, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [447.0, 513.0, 487.0, 413.0, 348.0, 492.0, 421.0, 470.0, 476.0, 447.0, 504.0, 412.0, 470.0, 519.0, 516.0, 533.0, 404.0, 441.0, 447.0, 576.0, 459.0, 510.0, 408.0, 510.0, 329.0, 450.0, 510.0, 516.0, 525.0, 510.0, 450.0, 484.0, 522.0, 416.0, 478.0, 510.0, 482.0, 525.0, 507.0, 516.0, 478.0, 519.0, 516.0, 410.0, 513.0, 295.0, 401.0, 462.0, 501.0, 519.0, 63.0, 456.0, 381.0, 510.0, 464.0, 461.0, 473.0, 407.0, 570.0, 444.0, 444.0, 384.0, 467.0, 428.0, 456.0, 462.0, 513.0, 519.0, 441.0, 453.0, 479.0, 405.0, 504.0, 456.0, 525.0, 519.0, 390.0, 379.0, 465.0, 522.0, 530.0, 359.0, 473.0, 453.0, 524.0, 441.0, 407.0, 525.0, 530.0, 453.0, 464.0, 525.0, 422.0, 492.0, 465.0, 398.0, 519.0, 513.0, 464.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [217.0, 230.0, 252.0, 261.0, 246.0, 241.0, 204.0, 209.0, 166.0, 182.0, 240.0, 252.0, 210.0, 211.0, 230.0, 240.0, 228.0, 248.0, 232.0, 215.0, 258.0, 246.0, 224.0, 188.0, 228.0, 242.0, 265.0, 254.0, 249.0, 267.0, 267.0, 266.0, 202.0, 202.0, 235.0, 206.0, 212.0, 235.0, 288.0, 288.0, 225.0, 234.0, 248.0, 262.0, 219.0, 189.0, 257.0, 253.0, 177.0, 152.0, 229.0, 221.0, 259.0, 251.0, 254.0, 262.0, 261.0, 264.0, 267.0, 243.0, 221.0, 229.0, 244.0, 240.0, 254.0, 268.0, 211.0, 205.0, 239.0, 239.0, 259.0, 251.0, 250.0, 232.0, 259.0, 266.0, 244.0, 263.0, 261.0, 255.0, 253.0, 225.0, 259.0, 260.0, 251.0, 265.0, 210.0, 200.0, 248.0, 265.0, 140.0, 155.0, 194.0, 207.0, 222.0, 240.0, 252.0, 249.0, 259.0, 260.0, 29.0, 34.0, 206.0, 250.0, 184.0, 197.0, 256.0, 254.0, 239.0, 225.0, 231.0, 230.0, 225.0, 248.0, 204.0, 203.0, 288.0, 282.0, 215.0, 229.0, 226.0, 218.0, 174.0, 210.0, 231.0, 236.0, 222.0, 206.0, 235.0, 221.0, 218.0, 244.0, 251.0, 262.0, 271.0, 248.0, 213.0, 228.0, 227.0, 226.0, 246.0, 233.0, 199.0, 206.0, 253.0, 251.0, 230.0, 226.0, 265.0, 260.0, 258.0, 261.0, 193.0, 197.0, 197.0, 182.0, 228.0, 237.0, 249.0, 273.0, 264.0, 266.0, 183.0, 176.0, 242.0, 231.0, 205.0, 248.0, 262.0, 262.0, 234.0, 207.0, 192.0, 215.0, 264.0, 261.0, 270.0, 260.0, 232.0, 221.0, 236.0, 228.0, 266.0, 259.0, 195.0, 227.0, 257.0, 235.0, 230.0, 235.0, 212.0, 186.0, 264.0, 255.0, 250.0, 263.0, 229.0, 235.0, 253.0, 260.0]}, "sampler_perf": {"mean_env_wait_ms": 2.0268528055210124, "mean_processing_ms": 0.47250865851188434, "mean_inference_ms": 2.5756730471163247}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2304000, "num_steps_sampled": 1228800, "sample_time_ms": 21320.865, "load_time_ms": 36.696, "grad_time_ms": 9593.729, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 5.048709868646114e-30, "cur_lr": 0.0010000000474974513, "total_loss": -0.0013039499754086137, "policy_loss": -0.007722912821918726, "vf_loss": 70.26915740966797, "vf_explained_var": 0.7757861018180847, "kl": 0.001609964296221733, "entropy": 1.2159069776535034, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1228800, "episodes_total": 3072, "training_iteration": 96, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-32-47", "timestamp": 1660249967, "time_this_iter_s": 30.381797075271606, "time_total_s": 8386.340304374695, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8386.340304374695, "timesteps_since_restore": 1228800, "iterations_since_restore": 96, "perf": {"cpu_util_percent": 31.758139534883718, "ram_util_percent": 58.16046511627907}}
+{"episode_reward_max": 579.0, "episode_reward_min": 128.0, "episode_reward_mean": 470.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 294.0}, "policy_reward_mean": {"ppo": 235.265}, "custom_metrics": {"sparse_reward_mean": 163.2, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 144.13, "shaped_reward_min": 48, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.93, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.66, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.25, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 14.85, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.47, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.72, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.73, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.11, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 14.05, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.73, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.43, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.12, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.32, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.55, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.16, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.4, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.08, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.11, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 14.05, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.11, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 14.05, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 522.0, 522.0, 462.0, 446.0, 519.0, 573.0, 352.0, 473.0, 519.0, 507.0, 524.0, 519.0, 467.0, 513.0, 522.0, 504.0, 519.0, 456.0, 453.0, 168.0, 516.0, 476.0, 470.0, 507.0, 570.0, 516.0, 444.0, 128.0, 510.0, 579.0, 317.0, 456.0, 462.0, 513.0, 519.0, 441.0, 453.0, 479.0, 405.0, 504.0, 456.0, 525.0, 519.0, 390.0, 379.0, 465.0, 522.0, 530.0, 359.0, 473.0, 453.0, 524.0, 441.0, 407.0, 525.0, 530.0, 453.0, 464.0, 525.0, 422.0, 492.0, 465.0, 398.0, 519.0, 513.0, 464.0, 513.0, 447.0, 513.0, 487.0, 413.0, 348.0, 492.0, 421.0, 470.0, 476.0, 447.0, 504.0, 412.0, 470.0, 519.0, 516.0, 533.0, 404.0, 441.0, 447.0, 576.0, 459.0, 510.0, 408.0, 510.0, 329.0, 450.0, 510.0, 516.0, 525.0, 510.0, 450.0, 484.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 268.0, 252.0, 270.0, 258.0, 264.0, 238.0, 224.0, 227.0, 219.0, 246.0, 273.0, 285.0, 288.0, 168.0, 184.0, 240.0, 233.0, 266.0, 253.0, 258.0, 249.0, 249.0, 275.0, 259.0, 260.0, 240.0, 227.0, 246.0, 267.0, 267.0, 255.0, 256.0, 248.0, 269.0, 250.0, 225.0, 231.0, 225.0, 228.0, 85.0, 83.0, 265.0, 251.0, 232.0, 244.0, 222.0, 248.0, 253.0, 254.0, 286.0, 284.0, 256.0, 260.0, 223.0, 221.0, 65.0, 63.0, 256.0, 254.0, 294.0, 285.0, 162.0, 155.0, 235.0, 221.0, 218.0, 244.0, 251.0, 262.0, 271.0, 248.0, 213.0, 228.0, 227.0, 226.0, 246.0, 233.0, 199.0, 206.0, 253.0, 251.0, 230.0, 226.0, 265.0, 260.0, 258.0, 261.0, 193.0, 197.0, 197.0, 182.0, 228.0, 237.0, 249.0, 273.0, 264.0, 266.0, 183.0, 176.0, 242.0, 231.0, 205.0, 248.0, 262.0, 262.0, 234.0, 207.0, 192.0, 215.0, 264.0, 261.0, 270.0, 260.0, 232.0, 221.0, 236.0, 228.0, 266.0, 259.0, 195.0, 227.0, 257.0, 235.0, 230.0, 235.0, 212.0, 186.0, 264.0, 255.0, 250.0, 263.0, 229.0, 235.0, 253.0, 260.0, 217.0, 230.0, 252.0, 261.0, 246.0, 241.0, 204.0, 209.0, 166.0, 182.0, 240.0, 252.0, 210.0, 211.0, 230.0, 240.0, 228.0, 248.0, 232.0, 215.0, 258.0, 246.0, 224.0, 188.0, 228.0, 242.0, 265.0, 254.0, 249.0, 267.0, 267.0, 266.0, 202.0, 202.0, 235.0, 206.0, 212.0, 235.0, 288.0, 288.0, 225.0, 234.0, 248.0, 262.0, 219.0, 189.0, 257.0, 253.0, 177.0, 152.0, 229.0, 221.0, 259.0, 251.0, 254.0, 262.0, 261.0, 264.0, 267.0, 243.0, 221.0, 229.0, 244.0, 240.0]}, "sampler_perf": {"mean_env_wait_ms": 2.0104424851575393, "mean_processing_ms": 0.4692484587452608, "mean_inference_ms": 2.5596152283112645}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2328000, "num_steps_sampled": 1241600, "sample_time_ms": 21335.822, "load_time_ms": 37.307, "grad_time_ms": 9680.502, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.524354934323057e-30, "cur_lr": 0.0010000000474974513, "total_loss": 0.001703931367956102, "policy_loss": -0.005316242575645447, "vf_loss": 76.20516204833984, "vf_explained_var": 0.7805307507514954, "kl": 0.002101513324305415, "entropy": 1.2007073163986206, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1241600, "episodes_total": 3104, "training_iteration": 97, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-33-18", "timestamp": 1660249998, "time_this_iter_s": 31.63303232192993, "time_total_s": 8417.973336696625, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8417.973336696625, "timesteps_since_restore": 1241600, "iterations_since_restore": 97, "perf": {"cpu_util_percent": 34.47777777777778, "ram_util_percent": 58.24}}
+{"episode_reward_max": 579.0, "episode_reward_min": 128.0, "episode_reward_mean": 478.77, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 294.0}, "policy_reward_mean": {"ppo": 239.385}, "custom_metrics": {"sparse_reward_mean": 166.0, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 146.77, "shaped_reward_min": 48, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.02, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.94, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.22, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.14, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.7, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.74, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.12, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 14.32, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.2, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.63, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.32, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.99, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.89, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.29, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.59, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.25, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.44, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.13, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.12, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 14.32, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.12, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 14.32, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [473.0, 522.0, 510.0, 525.0, 513.0, 525.0, 459.0, 410.0, 504.0, 522.0, 522.0, 507.0, 237.0, 527.0, 525.0, 472.0, 501.0, 459.0, 450.0, 530.0, 519.0, 525.0, 530.0, 579.0, 522.0, 519.0, 479.0, 573.0, 530.0, 513.0, 344.0, 447.0, 519.0, 513.0, 464.0, 513.0, 447.0, 513.0, 487.0, 413.0, 348.0, 492.0, 421.0, 470.0, 476.0, 447.0, 504.0, 412.0, 470.0, 519.0, 516.0, 533.0, 404.0, 441.0, 447.0, 576.0, 459.0, 510.0, 408.0, 510.0, 329.0, 450.0, 510.0, 516.0, 525.0, 510.0, 450.0, 484.0, 525.0, 522.0, 522.0, 462.0, 446.0, 519.0, 573.0, 352.0, 473.0, 519.0, 507.0, 524.0, 519.0, 467.0, 513.0, 522.0, 504.0, 519.0, 456.0, 453.0, 168.0, 516.0, 476.0, 470.0, 507.0, 570.0, 516.0, 444.0, 128.0, 510.0, 579.0, 317.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [232.0, 241.0, 262.0, 260.0, 252.0, 258.0, 255.0, 270.0, 260.0, 253.0, 259.0, 266.0, 227.0, 232.0, 195.0, 215.0, 254.0, 250.0, 258.0, 264.0, 254.0, 268.0, 248.0, 259.0, 117.0, 120.0, 261.0, 266.0, 259.0, 266.0, 227.0, 245.0, 248.0, 253.0, 232.0, 227.0, 233.0, 217.0, 267.0, 263.0, 265.0, 254.0, 264.0, 261.0, 265.0, 265.0, 291.0, 288.0, 262.0, 260.0, 259.0, 260.0, 244.0, 235.0, 290.0, 283.0, 255.0, 275.0, 253.0, 260.0, 175.0, 169.0, 217.0, 230.0, 264.0, 255.0, 250.0, 263.0, 229.0, 235.0, 253.0, 260.0, 217.0, 230.0, 252.0, 261.0, 246.0, 241.0, 204.0, 209.0, 166.0, 182.0, 240.0, 252.0, 210.0, 211.0, 230.0, 240.0, 228.0, 248.0, 232.0, 215.0, 258.0, 246.0, 224.0, 188.0, 228.0, 242.0, 265.0, 254.0, 249.0, 267.0, 267.0, 266.0, 202.0, 202.0, 235.0, 206.0, 212.0, 235.0, 288.0, 288.0, 225.0, 234.0, 248.0, 262.0, 219.0, 189.0, 257.0, 253.0, 177.0, 152.0, 229.0, 221.0, 259.0, 251.0, 254.0, 262.0, 261.0, 264.0, 267.0, 243.0, 221.0, 229.0, 244.0, 240.0, 257.0, 268.0, 252.0, 270.0, 258.0, 264.0, 238.0, 224.0, 227.0, 219.0, 246.0, 273.0, 285.0, 288.0, 168.0, 184.0, 240.0, 233.0, 266.0, 253.0, 258.0, 249.0, 249.0, 275.0, 259.0, 260.0, 240.0, 227.0, 246.0, 267.0, 267.0, 255.0, 256.0, 248.0, 269.0, 250.0, 225.0, 231.0, 225.0, 228.0, 85.0, 83.0, 265.0, 251.0, 232.0, 244.0, 222.0, 248.0, 253.0, 254.0, 286.0, 284.0, 256.0, 260.0, 223.0, 221.0, 65.0, 63.0, 256.0, 254.0, 294.0, 285.0, 162.0, 155.0]}, "sampler_perf": {"mean_env_wait_ms": 1.9943851070346994, "mean_processing_ms": 0.4660638204377501, "mean_inference_ms": 2.5438091293770433}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2352000, "num_steps_sampled": 1254400, "sample_time_ms": 21313.715, "load_time_ms": 36.778, "grad_time_ms": 9499.638, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.2621774671615285e-30, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019398670410737395, "policy_loss": -0.005300960969179869, "vf_loss": 78.3524398803711, "vf_explained_var": 0.7676915526390076, "kl": 0.0015995064750313759, "entropy": 1.188806414604187, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1254400, "episodes_total": 3136, "training_iteration": 98, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-33-47", "timestamp": 1660250027, "time_this_iter_s": 29.219672203063965, "time_total_s": 8447.193008899689, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8447.193008899689, "timesteps_since_restore": 1254400, "iterations_since_restore": 98, "perf": {"cpu_util_percent": 33.91190476190476, "ram_util_percent": 58.29285714285714}}
+{"episode_reward_max": 579.0, "episode_reward_min": 128.0, "episode_reward_mean": 485.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 294.0}, "policy_reward_mean": {"ppo": 242.725}, "custom_metrics": {"sparse_reward_mean": 168.4, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 148.65, "shaped_reward_min": 48, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.54, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.76, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.85, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.04, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.33, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.69, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.74, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.5, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.11, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.61, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.25, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.41, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.54, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.28, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.37, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.5, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.11, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.5, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.11, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [458.0, 473.0, 516.0, 522.0, 524.0, 570.0, 453.0, 476.0, 458.0, 522.0, 576.0, 498.0, 519.0, 406.0, 516.0, 533.0, 450.0, 473.0, 525.0, 525.0, 464.0, 447.0, 441.0, 479.0, 467.0, 504.0, 507.0, 513.0, 519.0, 406.0, 467.0, 498.0, 525.0, 510.0, 450.0, 484.0, 525.0, 522.0, 522.0, 462.0, 446.0, 519.0, 573.0, 352.0, 473.0, 519.0, 507.0, 524.0, 519.0, 467.0, 513.0, 522.0, 504.0, 519.0, 456.0, 453.0, 168.0, 516.0, 476.0, 470.0, 507.0, 570.0, 516.0, 444.0, 128.0, 510.0, 579.0, 317.0, 473.0, 522.0, 510.0, 525.0, 513.0, 525.0, 459.0, 410.0, 504.0, 522.0, 522.0, 507.0, 237.0, 527.0, 525.0, 472.0, 501.0, 459.0, 450.0, 530.0, 519.0, 525.0, 530.0, 579.0, 522.0, 519.0, 479.0, 573.0, 530.0, 513.0, 344.0, 447.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [217.0, 241.0, 237.0, 236.0, 268.0, 248.0, 250.0, 272.0, 256.0, 268.0, 285.0, 285.0, 233.0, 220.0, 231.0, 245.0, 227.0, 231.0, 263.0, 259.0, 288.0, 288.0, 245.0, 253.0, 257.0, 262.0, 197.0, 209.0, 256.0, 260.0, 251.0, 282.0, 246.0, 204.0, 232.0, 241.0, 261.0, 264.0, 263.0, 262.0, 223.0, 241.0, 221.0, 226.0, 224.0, 217.0, 241.0, 238.0, 233.0, 234.0, 253.0, 251.0, 259.0, 248.0, 256.0, 257.0, 254.0, 265.0, 185.0, 221.0, 229.0, 238.0, 234.0, 264.0, 261.0, 264.0, 267.0, 243.0, 221.0, 229.0, 244.0, 240.0, 257.0, 268.0, 252.0, 270.0, 258.0, 264.0, 238.0, 224.0, 227.0, 219.0, 246.0, 273.0, 285.0, 288.0, 168.0, 184.0, 240.0, 233.0, 266.0, 253.0, 258.0, 249.0, 249.0, 275.0, 259.0, 260.0, 240.0, 227.0, 246.0, 267.0, 267.0, 255.0, 256.0, 248.0, 269.0, 250.0, 225.0, 231.0, 225.0, 228.0, 85.0, 83.0, 265.0, 251.0, 232.0, 244.0, 222.0, 248.0, 253.0, 254.0, 286.0, 284.0, 256.0, 260.0, 223.0, 221.0, 65.0, 63.0, 256.0, 254.0, 294.0, 285.0, 162.0, 155.0, 232.0, 241.0, 262.0, 260.0, 252.0, 258.0, 255.0, 270.0, 260.0, 253.0, 259.0, 266.0, 227.0, 232.0, 195.0, 215.0, 254.0, 250.0, 258.0, 264.0, 254.0, 268.0, 248.0, 259.0, 117.0, 120.0, 261.0, 266.0, 259.0, 266.0, 227.0, 245.0, 248.0, 253.0, 232.0, 227.0, 233.0, 217.0, 267.0, 263.0, 265.0, 254.0, 264.0, 261.0, 265.0, 265.0, 291.0, 288.0, 262.0, 260.0, 259.0, 260.0, 244.0, 235.0, 290.0, 283.0, 255.0, 275.0, 253.0, 260.0, 175.0, 169.0, 217.0, 230.0]}, "sampler_perf": {"mean_env_wait_ms": 1.978664058751599, "mean_processing_ms": 0.4629441310230788, "mean_inference_ms": 2.5282732021268366}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2376000, "num_steps_sampled": 1267200, "sample_time_ms": 21031.143, "load_time_ms": 36.859, "grad_time_ms": 9358.331, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 6.3108873358076425e-31, "cur_lr": 0.0010000000474974513, "total_loss": 0.00523378886282444, "policy_loss": -0.0017726494697853923, "vf_loss": 76.06880950927734, "vf_explained_var": 0.753373920917511, "kl": 0.001648509525693953, "entropy": 1.2008789777755737, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1267200, "episodes_total": 3168, "training_iteration": 99, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-34-16", "timestamp": 1660250056, "time_this_iter_s": 28.433568000793457, "time_total_s": 8475.626576900482, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8475.626576900482, "timesteps_since_restore": 1267200, "iterations_since_restore": 99, "perf": {"cpu_util_percent": 33.795, "ram_util_percent": 58.2625}}
+{"episode_reward_max": 582.0, "episode_reward_min": 128.0, "episode_reward_mean": 480.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 240.47}, "custom_metrics": {"sparse_reward_mean": 166.8, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 147.34, "shaped_reward_min": 48, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.37, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.81, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.59, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.1, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.32, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.69, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.32, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.21, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.45, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.96, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.92, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.27, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.36, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.23, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.39, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.12, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 13.32, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.21, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.32, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.21, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 504.0, 524.0, 513.0, 347.0, 522.0, 355.0, 336.0, 533.0, 459.0, 522.0, 384.0, 576.0, 567.0, 177.0, 582.0, 467.0, 470.0, 519.0, 533.0, 398.0, 510.0, 513.0, 441.0, 398.0, 516.0, 409.0, 507.0, 525.0, 516.0, 530.0, 404.0, 128.0, 510.0, 579.0, 317.0, 473.0, 522.0, 510.0, 525.0, 513.0, 525.0, 459.0, 410.0, 504.0, 522.0, 522.0, 507.0, 237.0, 527.0, 525.0, 472.0, 501.0, 459.0, 450.0, 530.0, 519.0, 525.0, 530.0, 579.0, 522.0, 519.0, 479.0, 573.0, 530.0, 513.0, 344.0, 447.0, 458.0, 473.0, 516.0, 522.0, 524.0, 570.0, 453.0, 476.0, 458.0, 522.0, 576.0, 498.0, 519.0, 406.0, 516.0, 533.0, 450.0, 473.0, 525.0, 525.0, 464.0, 447.0, 441.0, 479.0, 467.0, 504.0, 507.0, 513.0, 519.0, 406.0, 467.0, 498.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [252.0, 273.0, 265.0, 239.0, 251.0, 273.0, 254.0, 259.0, 188.0, 159.0, 260.0, 262.0, 168.0, 187.0, 151.0, 185.0, 275.0, 258.0, 239.0, 220.0, 264.0, 258.0, 187.0, 197.0, 288.0, 288.0, 270.0, 297.0, 89.0, 88.0, 284.0, 298.0, 241.0, 226.0, 241.0, 229.0, 259.0, 260.0, 254.0, 279.0, 194.0, 204.0, 259.0, 251.0, 256.0, 257.0, 227.0, 214.0, 186.0, 212.0, 240.0, 276.0, 193.0, 216.0, 242.0, 265.0, 259.0, 266.0, 262.0, 254.0, 276.0, 254.0, 205.0, 199.0, 65.0, 63.0, 256.0, 254.0, 294.0, 285.0, 162.0, 155.0, 232.0, 241.0, 262.0, 260.0, 252.0, 258.0, 255.0, 270.0, 260.0, 253.0, 259.0, 266.0, 227.0, 232.0, 195.0, 215.0, 254.0, 250.0, 258.0, 264.0, 254.0, 268.0, 248.0, 259.0, 117.0, 120.0, 261.0, 266.0, 259.0, 266.0, 227.0, 245.0, 248.0, 253.0, 232.0, 227.0, 233.0, 217.0, 267.0, 263.0, 265.0, 254.0, 264.0, 261.0, 265.0, 265.0, 291.0, 288.0, 262.0, 260.0, 259.0, 260.0, 244.0, 235.0, 290.0, 283.0, 255.0, 275.0, 253.0, 260.0, 175.0, 169.0, 217.0, 230.0, 217.0, 241.0, 237.0, 236.0, 268.0, 248.0, 250.0, 272.0, 256.0, 268.0, 285.0, 285.0, 233.0, 220.0, 231.0, 245.0, 227.0, 231.0, 263.0, 259.0, 288.0, 288.0, 245.0, 253.0, 257.0, 262.0, 197.0, 209.0, 256.0, 260.0, 251.0, 282.0, 246.0, 204.0, 232.0, 241.0, 261.0, 264.0, 263.0, 262.0, 223.0, 241.0, 221.0, 226.0, 224.0, 217.0, 241.0, 238.0, 233.0, 234.0, 253.0, 251.0, 259.0, 248.0, 256.0, 257.0, 254.0, 265.0, 185.0, 221.0, 229.0, 238.0, 234.0, 264.0]}, "sampler_perf": {"mean_env_wait_ms": 1.9632342892537746, "mean_processing_ms": 0.4598755283783044, "mean_inference_ms": 2.5128753018749035}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2400000, "num_steps_sampled": 1280000, "sample_time_ms": 20819.102, "load_time_ms": 36.792, "grad_time_ms": 9258.115, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.1554436679038213e-31, "cur_lr": 0.0010000000474974513, "total_loss": -0.0015751657774671912, "policy_loss": -0.008594638668000698, "vf_loss": 76.2179183959961, "vf_explained_var": 0.7723303437232971, "kl": 0.002320505678653717, "entropy": 1.2046717405319214, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1280000, "episodes_total": 3200, "training_iteration": 100, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-34-45", "timestamp": 1660250085, "time_this_iter_s": 29.270292043685913, "time_total_s": 8504.896868944168, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8504.896868944168, "timesteps_since_restore": 1280000, "iterations_since_restore": 100, "perf": {"cpu_util_percent": 32.380487804878044, "ram_util_percent": 58.27560975609755}}
+{"episode_reward_max": 582.0, "episode_reward_min": 177.0, "episode_reward_mean": 476.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 86.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 238.185}, "custom_metrics": {"sparse_reward_mean": 165.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 145.97, "shaped_reward_min": 57, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.43, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.61, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.55, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.93, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.79, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 1.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.83, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.36, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.91, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.54, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.31, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.26, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.16, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.37, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.13, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 13.36, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.91, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.36, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.91, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [447.0, 450.0, 522.0, 513.0, 519.0, 351.0, 573.0, 425.0, 473.0, 530.0, 479.0, 404.0, 470.0, 522.0, 525.0, 516.0, 197.0, 458.0, 519.0, 464.0, 180.0, 299.0, 519.0, 527.0, 506.0, 516.0, 522.0, 473.0, 570.0, 504.0, 530.0, 513.0, 530.0, 513.0, 344.0, 447.0, 458.0, 473.0, 516.0, 522.0, 524.0, 570.0, 453.0, 476.0, 458.0, 522.0, 576.0, 498.0, 519.0, 406.0, 516.0, 533.0, 450.0, 473.0, 525.0, 525.0, 464.0, 447.0, 441.0, 479.0, 467.0, 504.0, 507.0, 513.0, 519.0, 406.0, 467.0, 498.0, 525.0, 504.0, 524.0, 513.0, 347.0, 522.0, 355.0, 336.0, 533.0, 459.0, 522.0, 384.0, 576.0, 567.0, 177.0, 582.0, 467.0, 470.0, 519.0, 533.0, 398.0, 510.0, 513.0, 441.0, 398.0, 516.0, 409.0, 507.0, 525.0, 516.0, 530.0, 404.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [225.0, 222.0, 232.0, 218.0, 254.0, 268.0, 254.0, 259.0, 258.0, 261.0, 180.0, 171.0, 288.0, 285.0, 221.0, 204.0, 256.0, 217.0, 269.0, 261.0, 236.0, 243.0, 197.0, 207.0, 242.0, 228.0, 260.0, 262.0, 262.0, 263.0, 270.0, 246.0, 102.0, 95.0, 217.0, 241.0, 268.0, 251.0, 217.0, 247.0, 86.0, 94.0, 143.0, 156.0, 262.0, 257.0, 259.0, 268.0, 261.0, 245.0, 241.0, 275.0, 260.0, 262.0, 220.0, 253.0, 285.0, 285.0, 240.0, 264.0, 272.0, 258.0, 244.0, 269.0, 255.0, 275.0, 253.0, 260.0, 175.0, 169.0, 217.0, 230.0, 217.0, 241.0, 237.0, 236.0, 268.0, 248.0, 250.0, 272.0, 256.0, 268.0, 285.0, 285.0, 233.0, 220.0, 231.0, 245.0, 227.0, 231.0, 263.0, 259.0, 288.0, 288.0, 245.0, 253.0, 257.0, 262.0, 197.0, 209.0, 256.0, 260.0, 251.0, 282.0, 246.0, 204.0, 232.0, 241.0, 261.0, 264.0, 263.0, 262.0, 223.0, 241.0, 221.0, 226.0, 224.0, 217.0, 241.0, 238.0, 233.0, 234.0, 253.0, 251.0, 259.0, 248.0, 256.0, 257.0, 254.0, 265.0, 185.0, 221.0, 229.0, 238.0, 234.0, 264.0, 252.0, 273.0, 265.0, 239.0, 251.0, 273.0, 254.0, 259.0, 188.0, 159.0, 260.0, 262.0, 168.0, 187.0, 151.0, 185.0, 275.0, 258.0, 239.0, 220.0, 264.0, 258.0, 187.0, 197.0, 288.0, 288.0, 270.0, 297.0, 89.0, 88.0, 284.0, 298.0, 241.0, 226.0, 241.0, 229.0, 259.0, 260.0, 254.0, 279.0, 194.0, 204.0, 259.0, 251.0, 256.0, 257.0, 227.0, 214.0, 186.0, 212.0, 240.0, 276.0, 193.0, 216.0, 242.0, 265.0, 259.0, 266.0, 262.0, 254.0, 276.0, 254.0, 205.0, 199.0]}, "sampler_perf": {"mean_env_wait_ms": 1.9480966486925166, "mean_processing_ms": 0.45685958478274097, "mean_inference_ms": 2.497748516500124}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2424000, "num_steps_sampled": 1292800, "sample_time_ms": 20962.686, "load_time_ms": 36.873, "grad_time_ms": 9154.171, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.5777218339519106e-31, "cur_lr": 0.0010000000474974513, "total_loss": 0.00011571295181056485, "policy_loss": -0.006676681339740753, "vf_loss": 73.92855834960938, "vf_explained_var": 0.7952176928520203, "kl": 0.0016933353617787361, "entropy": 1.2009243965148926, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1292800, "episodes_total": 3232, "training_iteration": 101, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-35-15", "timestamp": 1660250115, "time_this_iter_s": 29.051042795181274, "time_total_s": 8533.94791173935, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8533.94791173935, "timesteps_since_restore": 1292800, "iterations_since_restore": 101, "perf": {"cpu_util_percent": 29.43571428571429, "ram_util_percent": 58.27142857142857}}
+{"episode_reward_max": 582.0, "episode_reward_min": 177.0, "episode_reward_mean": 479.09, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 86.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 239.545}, "custom_metrics": {"sparse_reward_mean": 166.0, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 147.09, "shaped_reward_min": 57, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.17, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 15.98, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.22, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.32, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.87, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.88, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.56, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.04, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.4, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.51, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.26, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.4, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.45, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.29, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.25, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.13, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 13.04, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.4, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.04, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.4, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 487.0, 513.0, 570.0, 513.0, 570.0, 522.0, 476.0, 446.0, 453.0, 459.0, 522.0, 504.0, 468.0, 516.0, 479.0, 464.0, 522.0, 516.0, 467.0, 444.0, 401.0, 522.0, 510.0, 530.0, 513.0, 482.0, 507.0, 516.0, 516.0, 513.0, 470.0, 519.0, 406.0, 467.0, 498.0, 525.0, 504.0, 524.0, 513.0, 347.0, 522.0, 355.0, 336.0, 533.0, 459.0, 522.0, 384.0, 576.0, 567.0, 177.0, 582.0, 467.0, 470.0, 519.0, 533.0, 398.0, 510.0, 513.0, 441.0, 398.0, 516.0, 409.0, 507.0, 525.0, 516.0, 530.0, 404.0, 447.0, 450.0, 522.0, 513.0, 519.0, 351.0, 573.0, 425.0, 473.0, 530.0, 479.0, 404.0, 470.0, 522.0, 525.0, 516.0, 197.0, 458.0, 519.0, 464.0, 180.0, 299.0, 519.0, 527.0, 506.0, 516.0, 522.0, 473.0, 570.0, 504.0, 530.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 269.0, 234.0, 253.0, 261.0, 252.0, 282.0, 288.0, 251.0, 262.0, 287.0, 283.0, 265.0, 257.0, 243.0, 233.0, 212.0, 234.0, 244.0, 209.0, 226.0, 233.0, 254.0, 268.0, 247.0, 257.0, 236.0, 232.0, 265.0, 251.0, 240.0, 239.0, 226.0, 238.0, 259.0, 263.0, 237.0, 279.0, 233.0, 234.0, 213.0, 231.0, 196.0, 205.0, 246.0, 276.0, 251.0, 259.0, 272.0, 258.0, 256.0, 257.0, 226.0, 256.0, 244.0, 263.0, 267.0, 249.0, 260.0, 256.0, 251.0, 262.0, 233.0, 237.0, 254.0, 265.0, 185.0, 221.0, 229.0, 238.0, 234.0, 264.0, 252.0, 273.0, 265.0, 239.0, 251.0, 273.0, 254.0, 259.0, 188.0, 159.0, 260.0, 262.0, 168.0, 187.0, 151.0, 185.0, 275.0, 258.0, 239.0, 220.0, 264.0, 258.0, 187.0, 197.0, 288.0, 288.0, 270.0, 297.0, 89.0, 88.0, 284.0, 298.0, 241.0, 226.0, 241.0, 229.0, 259.0, 260.0, 254.0, 279.0, 194.0, 204.0, 259.0, 251.0, 256.0, 257.0, 227.0, 214.0, 186.0, 212.0, 240.0, 276.0, 193.0, 216.0, 242.0, 265.0, 259.0, 266.0, 262.0, 254.0, 276.0, 254.0, 205.0, 199.0, 225.0, 222.0, 232.0, 218.0, 254.0, 268.0, 254.0, 259.0, 258.0, 261.0, 180.0, 171.0, 288.0, 285.0, 221.0, 204.0, 256.0, 217.0, 269.0, 261.0, 236.0, 243.0, 197.0, 207.0, 242.0, 228.0, 260.0, 262.0, 262.0, 263.0, 270.0, 246.0, 102.0, 95.0, 217.0, 241.0, 268.0, 251.0, 217.0, 247.0, 86.0, 94.0, 143.0, 156.0, 262.0, 257.0, 259.0, 268.0, 261.0, 245.0, 241.0, 275.0, 260.0, 262.0, 220.0, 253.0, 285.0, 285.0, 240.0, 264.0, 272.0, 258.0, 244.0, 269.0]}, "sampler_perf": {"mean_env_wait_ms": 1.9332518889221049, "mean_processing_ms": 0.4539033753742656, "mean_inference_ms": 2.483090315311066}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2448000, "num_steps_sampled": 1305600, "sample_time_ms": 21004.371, "load_time_ms": 37.153, "grad_time_ms": 9035.152, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 7.888609169759553e-32, "cur_lr": 0.0010000000474974513, "total_loss": 0.00245770625770092, "policy_loss": -0.003976076375693083, "vf_loss": 70.27108764648438, "vf_explained_var": 0.7766384482383728, "kl": 0.001931712031364441, "entropy": 1.1866337060928345, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1305600, "episodes_total": 3264, "training_iteration": 102, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-35-45", "timestamp": 1660250145, "time_this_iter_s": 29.56961703300476, "time_total_s": 8563.517528772354, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8563.517528772354, "timesteps_since_restore": 1305600, "iterations_since_restore": 102, "perf": {"cpu_util_percent": 29.607142857142858, "ram_util_percent": 58.27380952380951}}
+{"episode_reward_max": 579.0, "episode_reward_min": 180.0, "episode_reward_mean": 488.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 86.0}, "policy_reward_max": {"ppo": 296.0}, "policy_reward_mean": {"ppo": 244.08}, "custom_metrics": {"sparse_reward_mean": 169.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 149.76, "shaped_reward_min": 60, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.36, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.19, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.44, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.55, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.71, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.85, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.56, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.33, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 14.56, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.48, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.73, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.96, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.2, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.21, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.54, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.38, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.43, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.22, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.1, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 13.33, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 14.56, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.33, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 14.56, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 525.0, 396.0, 513.0, 479.0, 455.0, 525.0, 518.0, 519.0, 350.0, 579.0, 516.0, 519.0, 504.0, 455.0, 510.0, 384.0, 570.0, 498.0, 473.0, 519.0, 576.0, 516.0, 573.0, 465.0, 510.0, 533.0, 504.0, 525.0, 450.0, 482.0, 444.0, 525.0, 516.0, 530.0, 404.0, 447.0, 450.0, 522.0, 513.0, 519.0, 351.0, 573.0, 425.0, 473.0, 530.0, 479.0, 404.0, 470.0, 522.0, 525.0, 516.0, 197.0, 458.0, 519.0, 464.0, 180.0, 299.0, 519.0, 527.0, 506.0, 516.0, 522.0, 473.0, 570.0, 504.0, 530.0, 513.0, 530.0, 487.0, 513.0, 570.0, 513.0, 570.0, 522.0, 476.0, 446.0, 453.0, 459.0, 522.0, 504.0, 468.0, 516.0, 479.0, 464.0, 522.0, 516.0, 467.0, 444.0, 401.0, 522.0, 510.0, 530.0, 513.0, 482.0, 507.0, 516.0, 516.0, 513.0, 470.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [249.0, 270.0, 262.0, 263.0, 196.0, 200.0, 259.0, 254.0, 234.0, 245.0, 226.0, 229.0, 260.0, 265.0, 267.0, 251.0, 251.0, 268.0, 176.0, 174.0, 285.0, 294.0, 259.0, 257.0, 262.0, 257.0, 260.0, 244.0, 219.0, 236.0, 251.0, 259.0, 179.0, 205.0, 277.0, 293.0, 250.0, 248.0, 244.0, 229.0, 256.0, 263.0, 280.0, 296.0, 254.0, 262.0, 290.0, 283.0, 242.0, 223.0, 264.0, 246.0, 260.0, 273.0, 245.0, 259.0, 267.0, 258.0, 222.0, 228.0, 236.0, 246.0, 240.0, 204.0, 259.0, 266.0, 262.0, 254.0, 276.0, 254.0, 205.0, 199.0, 225.0, 222.0, 232.0, 218.0, 254.0, 268.0, 254.0, 259.0, 258.0, 261.0, 180.0, 171.0, 288.0, 285.0, 221.0, 204.0, 256.0, 217.0, 269.0, 261.0, 236.0, 243.0, 197.0, 207.0, 242.0, 228.0, 260.0, 262.0, 262.0, 263.0, 270.0, 246.0, 102.0, 95.0, 217.0, 241.0, 268.0, 251.0, 217.0, 247.0, 86.0, 94.0, 143.0, 156.0, 262.0, 257.0, 259.0, 268.0, 261.0, 245.0, 241.0, 275.0, 260.0, 262.0, 220.0, 253.0, 285.0, 285.0, 240.0, 264.0, 272.0, 258.0, 244.0, 269.0, 261.0, 269.0, 234.0, 253.0, 261.0, 252.0, 282.0, 288.0, 251.0, 262.0, 287.0, 283.0, 265.0, 257.0, 243.0, 233.0, 212.0, 234.0, 244.0, 209.0, 226.0, 233.0, 254.0, 268.0, 247.0, 257.0, 236.0, 232.0, 265.0, 251.0, 240.0, 239.0, 226.0, 238.0, 259.0, 263.0, 237.0, 279.0, 233.0, 234.0, 213.0, 231.0, 196.0, 205.0, 246.0, 276.0, 251.0, 259.0, 272.0, 258.0, 256.0, 257.0, 226.0, 256.0, 244.0, 263.0, 267.0, 249.0, 260.0, 256.0, 251.0, 262.0, 233.0, 237.0]}, "sampler_perf": {"mean_env_wait_ms": 1.9186906528346697, "mean_processing_ms": 0.45100660003140314, "mean_inference_ms": 2.4688879649561444}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2472000, "num_steps_sampled": 1318400, "sample_time_ms": 21037.799, "load_time_ms": 37.038, "grad_time_ms": 8875.986, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.9443045848797766e-32, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011967640602961183, "policy_loss": -0.0056260935962200165, "vf_loss": 74.17142486572266, "vf_explained_var": 0.7644608616828918, "kl": 0.0018772757612168789, "entropy": 1.188565731048584, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1318400, "episodes_total": 3296, "training_iteration": 103, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-36-14", "timestamp": 1660250174, "time_this_iter_s": 29.66763973236084, "time_total_s": 8593.185168504715, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8593.185168504715, "timesteps_since_restore": 1318400, "iterations_since_restore": 103, "perf": {"cpu_util_percent": 31.43809523809524, "ram_util_percent": 58.23095238095237}}
+{"episode_reward_max": 579.0, "episode_reward_min": 350.0, "episode_reward_mean": 503.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 251.985}, "custom_metrics": {"sparse_reward_mean": 175.4, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 153.17, "shaped_reward_min": 104, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.39, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.67, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.84, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.53, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.76, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.51, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.58, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 14.95, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.84, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.79, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.62, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.96, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.21, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.24, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.71, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.41, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.62, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.34, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.58, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 14.95, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.58, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 14.95, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 573.0, 513.0, 519.0, 393.0, 564.0, 570.0, 570.0, 513.0, 513.0, 522.0, 492.0, 519.0, 413.0, 570.0, 482.0, 570.0, 510.0, 513.0, 462.0, 522.0, 498.0, 570.0, 498.0, 518.0, 510.0, 522.0, 504.0, 524.0, 536.0, 447.0, 452.0, 570.0, 504.0, 530.0, 513.0, 530.0, 487.0, 513.0, 570.0, 513.0, 570.0, 522.0, 476.0, 446.0, 453.0, 459.0, 522.0, 504.0, 468.0, 516.0, 479.0, 464.0, 522.0, 516.0, 467.0, 444.0, 401.0, 522.0, 510.0, 530.0, 513.0, 482.0, 507.0, 516.0, 516.0, 513.0, 470.0, 519.0, 525.0, 396.0, 513.0, 479.0, 455.0, 525.0, 518.0, 519.0, 350.0, 579.0, 516.0, 519.0, 504.0, 455.0, 510.0, 384.0, 570.0, 498.0, 473.0, 519.0, 576.0, 516.0, 573.0, 465.0, 510.0, 533.0, 504.0, 525.0, 450.0, 482.0, 444.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 280.0, 293.0, 280.0, 255.0, 258.0, 259.0, 260.0, 194.0, 199.0, 272.0, 292.0, 295.0, 275.0, 278.0, 292.0, 250.0, 263.0, 265.0, 248.0, 264.0, 258.0, 246.0, 246.0, 259.0, 260.0, 217.0, 196.0, 276.0, 294.0, 249.0, 233.0, 272.0, 298.0, 243.0, 267.0, 259.0, 254.0, 237.0, 225.0, 249.0, 273.0, 255.0, 243.0, 293.0, 277.0, 250.0, 248.0, 248.0, 270.0, 259.0, 251.0, 260.0, 262.0, 251.0, 253.0, 269.0, 255.0, 281.0, 255.0, 227.0, 220.0, 227.0, 225.0, 285.0, 285.0, 240.0, 264.0, 272.0, 258.0, 244.0, 269.0, 261.0, 269.0, 234.0, 253.0, 261.0, 252.0, 282.0, 288.0, 251.0, 262.0, 287.0, 283.0, 265.0, 257.0, 243.0, 233.0, 212.0, 234.0, 244.0, 209.0, 226.0, 233.0, 254.0, 268.0, 247.0, 257.0, 236.0, 232.0, 265.0, 251.0, 240.0, 239.0, 226.0, 238.0, 259.0, 263.0, 237.0, 279.0, 233.0, 234.0, 213.0, 231.0, 196.0, 205.0, 246.0, 276.0, 251.0, 259.0, 272.0, 258.0, 256.0, 257.0, 226.0, 256.0, 244.0, 263.0, 267.0, 249.0, 260.0, 256.0, 251.0, 262.0, 233.0, 237.0, 249.0, 270.0, 262.0, 263.0, 196.0, 200.0, 259.0, 254.0, 234.0, 245.0, 226.0, 229.0, 260.0, 265.0, 267.0, 251.0, 251.0, 268.0, 176.0, 174.0, 285.0, 294.0, 259.0, 257.0, 262.0, 257.0, 260.0, 244.0, 219.0, 236.0, 251.0, 259.0, 179.0, 205.0, 277.0, 293.0, 250.0, 248.0, 244.0, 229.0, 256.0, 263.0, 280.0, 296.0, 254.0, 262.0, 290.0, 283.0, 242.0, 223.0, 264.0, 246.0, 260.0, 273.0, 245.0, 259.0, 267.0, 258.0, 222.0, 228.0, 236.0, 246.0, 240.0, 204.0]}, "sampler_perf": {"mean_env_wait_ms": 1.9044104614216666, "mean_processing_ms": 0.44816608164827715, "mean_inference_ms": 2.4550939840364316}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2496000, "num_steps_sampled": 1331200, "sample_time_ms": 21059.641, "load_time_ms": 36.972, "grad_time_ms": 8872.978, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.9721522924398883e-32, "cur_lr": 0.0010000000474974513, "total_loss": 0.002359784208238125, "policy_loss": -0.004356598015874624, "vf_loss": 73.04959106445312, "vf_explained_var": 0.7670376896858215, "kl": 0.0017897128127515316, "entropy": 1.1771515607833862, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1331200, "episodes_total": 3328, "training_iteration": 104, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-36-45", "timestamp": 1660250205, "time_this_iter_s": 30.991883993148804, "time_total_s": 8624.177052497864, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8624.177052497864, "timesteps_since_restore": 1331200, "iterations_since_restore": 104, "perf": {"cpu_util_percent": 35.49545454545455, "ram_util_percent": 58.338636363636354}}
+{"episode_reward_max": 579.0, "episode_reward_min": 350.0, "episode_reward_mean": 506.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 253.335}, "custom_metrics": {"sparse_reward_mean": 176.4, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 153.87, "shaped_reward_min": 104, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.18, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.08, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.67, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.52, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.93, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.56, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.47, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.78, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.76, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.8, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.93, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.67, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.18, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.19, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.73, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.38, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.64, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.31, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.78, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.76, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.78, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.76, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [467.0, 525.0, 527.0, 464.0, 516.0, 459.0, 516.0, 570.0, 519.0, 522.0, 416.0, 504.0, 507.0, 513.0, 458.0, 489.0, 522.0, 519.0, 510.0, 519.0, 525.0, 576.0, 476.0, 522.0, 513.0, 530.0, 530.0, 459.0, 516.0, 498.0, 530.0, 576.0, 516.0, 516.0, 513.0, 470.0, 519.0, 525.0, 396.0, 513.0, 479.0, 455.0, 525.0, 518.0, 519.0, 350.0, 579.0, 516.0, 519.0, 504.0, 455.0, 510.0, 384.0, 570.0, 498.0, 473.0, 519.0, 576.0, 516.0, 573.0, 465.0, 510.0, 533.0, 504.0, 525.0, 450.0, 482.0, 444.0, 573.0, 573.0, 513.0, 519.0, 393.0, 564.0, 570.0, 570.0, 513.0, 513.0, 522.0, 492.0, 519.0, 413.0, 570.0, 482.0, 570.0, 510.0, 513.0, 462.0, 522.0, 498.0, 570.0, 498.0, 518.0, 510.0, 522.0, 504.0, 524.0, 536.0, 447.0, 452.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [237.0, 230.0, 259.0, 266.0, 271.0, 256.0, 211.0, 253.0, 256.0, 260.0, 232.0, 227.0, 267.0, 249.0, 280.0, 290.0, 262.0, 257.0, 259.0, 263.0, 210.0, 206.0, 247.0, 257.0, 254.0, 253.0, 252.0, 261.0, 231.0, 227.0, 240.0, 249.0, 273.0, 249.0, 259.0, 260.0, 253.0, 257.0, 270.0, 249.0, 255.0, 270.0, 284.0, 292.0, 247.0, 229.0, 265.0, 257.0, 245.0, 268.0, 260.0, 270.0, 273.0, 257.0, 214.0, 245.0, 260.0, 256.0, 251.0, 247.0, 262.0, 268.0, 291.0, 285.0, 267.0, 249.0, 260.0, 256.0, 251.0, 262.0, 233.0, 237.0, 249.0, 270.0, 262.0, 263.0, 196.0, 200.0, 259.0, 254.0, 234.0, 245.0, 226.0, 229.0, 260.0, 265.0, 267.0, 251.0, 251.0, 268.0, 176.0, 174.0, 285.0, 294.0, 259.0, 257.0, 262.0, 257.0, 260.0, 244.0, 219.0, 236.0, 251.0, 259.0, 179.0, 205.0, 277.0, 293.0, 250.0, 248.0, 244.0, 229.0, 256.0, 263.0, 280.0, 296.0, 254.0, 262.0, 290.0, 283.0, 242.0, 223.0, 264.0, 246.0, 260.0, 273.0, 245.0, 259.0, 267.0, 258.0, 222.0, 228.0, 236.0, 246.0, 240.0, 204.0, 293.0, 280.0, 293.0, 280.0, 255.0, 258.0, 259.0, 260.0, 194.0, 199.0, 272.0, 292.0, 295.0, 275.0, 278.0, 292.0, 250.0, 263.0, 265.0, 248.0, 264.0, 258.0, 246.0, 246.0, 259.0, 260.0, 217.0, 196.0, 276.0, 294.0, 249.0, 233.0, 272.0, 298.0, 243.0, 267.0, 259.0, 254.0, 237.0, 225.0, 249.0, 273.0, 255.0, 243.0, 293.0, 277.0, 250.0, 248.0, 248.0, 270.0, 259.0, 251.0, 260.0, 262.0, 251.0, 253.0, 269.0, 255.0, 281.0, 255.0, 227.0, 220.0, 227.0, 225.0]}, "sampler_perf": {"mean_env_wait_ms": 1.890394604549175, "mean_processing_ms": 0.4453764179105544, "mean_inference_ms": 2.441389523770417}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2520000, "num_steps_sampled": 1344000, "sample_time_ms": 20963.704, "load_time_ms": 37.317, "grad_time_ms": 8768.329, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 9.860761462199441e-33, "cur_lr": 0.0010000000474974513, "total_loss": 0.002573954639956355, "policy_loss": -0.004460552707314491, "vf_loss": 76.1985855102539, "vf_explained_var": 0.7691051959991455, "kl": 0.002485529985278845, "entropy": 1.1707229614257812, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1344000, "episodes_total": 3360, "training_iteration": 105, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-37-15", "timestamp": 1660250235, "time_this_iter_s": 29.730670928955078, "time_total_s": 8653.907723426819, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8653.907723426819, "timesteps_since_restore": 1344000, "iterations_since_restore": 105, "perf": {"cpu_util_percent": 34.530952380952385, "ram_util_percent": 58.22619047619047}}
+{"episode_reward_max": 576.0, "episode_reward_min": 390.0, "episode_reward_mean": 508.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 191.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 254.47}, "custom_metrics": {"sparse_reward_mean": 177.0, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 154.94, "shaped_reward_min": 110, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.12, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.03, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.67, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.45, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.97, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.46, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.54, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.88, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.73, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.11, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.71, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.79, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.7, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.19, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.88, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.27, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.76, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.19, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.88, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.73, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.88, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.73, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [462.0, 533.0, 546.0, 518.0, 522.0, 513.0, 492.0, 501.0, 525.0, 522.0, 467.0, 390.0, 522.0, 527.0, 473.0, 525.0, 519.0, 519.0, 468.0, 504.0, 564.0, 470.0, 513.0, 516.0, 522.0, 421.0, 525.0, 573.0, 525.0, 527.0, 525.0, 516.0, 525.0, 450.0, 482.0, 444.0, 573.0, 573.0, 513.0, 519.0, 393.0, 564.0, 570.0, 570.0, 513.0, 513.0, 522.0, 492.0, 519.0, 413.0, 570.0, 482.0, 570.0, 510.0, 513.0, 462.0, 522.0, 498.0, 570.0, 498.0, 518.0, 510.0, 522.0, 504.0, 524.0, 536.0, 447.0, 452.0, 467.0, 525.0, 527.0, 464.0, 516.0, 459.0, 516.0, 570.0, 519.0, 522.0, 416.0, 504.0, 507.0, 513.0, 458.0, 489.0, 522.0, 519.0, 510.0, 519.0, 525.0, 576.0, 476.0, 522.0, 513.0, 530.0, 530.0, 459.0, 516.0, 498.0, 530.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [238.0, 224.0, 270.0, 263.0, 275.0, 271.0, 267.0, 251.0, 254.0, 268.0, 245.0, 268.0, 256.0, 236.0, 270.0, 231.0, 273.0, 252.0, 269.0, 253.0, 231.0, 236.0, 191.0, 199.0, 267.0, 255.0, 272.0, 255.0, 241.0, 232.0, 260.0, 265.0, 266.0, 253.0, 259.0, 260.0, 238.0, 230.0, 260.0, 244.0, 281.0, 283.0, 231.0, 239.0, 259.0, 254.0, 254.0, 262.0, 268.0, 254.0, 210.0, 211.0, 263.0, 262.0, 285.0, 288.0, 259.0, 266.0, 248.0, 279.0, 268.0, 257.0, 264.0, 252.0, 267.0, 258.0, 222.0, 228.0, 236.0, 246.0, 240.0, 204.0, 293.0, 280.0, 293.0, 280.0, 255.0, 258.0, 259.0, 260.0, 194.0, 199.0, 272.0, 292.0, 295.0, 275.0, 278.0, 292.0, 250.0, 263.0, 265.0, 248.0, 264.0, 258.0, 246.0, 246.0, 259.0, 260.0, 217.0, 196.0, 276.0, 294.0, 249.0, 233.0, 272.0, 298.0, 243.0, 267.0, 259.0, 254.0, 237.0, 225.0, 249.0, 273.0, 255.0, 243.0, 293.0, 277.0, 250.0, 248.0, 248.0, 270.0, 259.0, 251.0, 260.0, 262.0, 251.0, 253.0, 269.0, 255.0, 281.0, 255.0, 227.0, 220.0, 227.0, 225.0, 237.0, 230.0, 259.0, 266.0, 271.0, 256.0, 211.0, 253.0, 256.0, 260.0, 232.0, 227.0, 267.0, 249.0, 280.0, 290.0, 262.0, 257.0, 259.0, 263.0, 210.0, 206.0, 247.0, 257.0, 254.0, 253.0, 252.0, 261.0, 231.0, 227.0, 240.0, 249.0, 273.0, 249.0, 259.0, 260.0, 253.0, 257.0, 270.0, 249.0, 255.0, 270.0, 284.0, 292.0, 247.0, 229.0, 265.0, 257.0, 245.0, 268.0, 260.0, 270.0, 273.0, 257.0, 214.0, 245.0, 260.0, 256.0, 251.0, 247.0, 262.0, 268.0, 291.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 1.8766403613441696, "mean_processing_ms": 0.44263891296008706, "mean_inference_ms": 2.427842279334728}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2544000, "num_steps_sampled": 1356800, "sample_time_ms": 20984.654, "load_time_ms": 37.36, "grad_time_ms": 8798.762, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 4.930380731099721e-33, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012900071451440454, "policy_loss": -0.00579724321141839, "vf_loss": 76.69783782958984, "vf_explained_var": 0.7642709612846375, "kl": 0.0020595567766577005, "entropy": 1.1650750637054443, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1356800, "episodes_total": 3392, "training_iteration": 106, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-37-46", "timestamp": 1660250266, "time_this_iter_s": 30.89556574821472, "time_total_s": 8684.803289175034, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8684.803289175034, "timesteps_since_restore": 1356800, "iterations_since_restore": 106, "perf": {"cpu_util_percent": 35.19772727272727, "ram_util_percent": 58.284090909090885}}
+{"episode_reward_max": 576.0, "episode_reward_min": 390.0, "episode_reward_mean": 509.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 254.605}, "custom_metrics": {"sparse_reward_mean": 176.8, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 155.61, "shaped_reward_min": 110, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.26, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.32, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.78, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.59, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.0, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.97, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.83, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 4.17, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.7, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.17, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.9, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.24, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.76, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.15, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.97, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.83, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.97, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.83, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 576.0, 468.0, 510.0, 522.0, 510.0, 497.0, 453.0, 462.0, 519.0, 530.0, 522.0, 530.0, 527.0, 561.0, 525.0, 507.0, 522.0, 462.0, 501.0, 522.0, 522.0, 522.0, 530.0, 525.0, 516.0, 522.0, 522.0, 515.0, 573.0, 522.0, 404.0, 524.0, 536.0, 447.0, 452.0, 467.0, 525.0, 527.0, 464.0, 516.0, 459.0, 516.0, 570.0, 519.0, 522.0, 416.0, 504.0, 507.0, 513.0, 458.0, 489.0, 522.0, 519.0, 510.0, 519.0, 525.0, 576.0, 476.0, 522.0, 513.0, 530.0, 530.0, 459.0, 516.0, 498.0, 530.0, 576.0, 462.0, 533.0, 546.0, 518.0, 522.0, 513.0, 492.0, 501.0, 525.0, 522.0, 467.0, 390.0, 522.0, 527.0, 473.0, 525.0, 519.0, 519.0, 468.0, 504.0, 564.0, 470.0, 513.0, 516.0, 522.0, 421.0, 525.0, 573.0, 525.0, 527.0, 525.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [263.0, 262.0, 290.0, 286.0, 236.0, 232.0, 242.0, 268.0, 268.0, 254.0, 271.0, 239.0, 248.0, 249.0, 214.0, 239.0, 230.0, 232.0, 258.0, 261.0, 264.0, 266.0, 267.0, 255.0, 273.0, 257.0, 251.0, 276.0, 293.0, 268.0, 264.0, 261.0, 244.0, 263.0, 267.0, 255.0, 239.0, 223.0, 244.0, 257.0, 273.0, 249.0, 256.0, 266.0, 262.0, 260.0, 252.0, 278.0, 262.0, 263.0, 263.0, 253.0, 251.0, 271.0, 254.0, 268.0, 270.0, 245.0, 288.0, 285.0, 262.0, 260.0, 222.0, 182.0, 269.0, 255.0, 281.0, 255.0, 227.0, 220.0, 227.0, 225.0, 237.0, 230.0, 259.0, 266.0, 271.0, 256.0, 211.0, 253.0, 256.0, 260.0, 232.0, 227.0, 267.0, 249.0, 280.0, 290.0, 262.0, 257.0, 259.0, 263.0, 210.0, 206.0, 247.0, 257.0, 254.0, 253.0, 252.0, 261.0, 231.0, 227.0, 240.0, 249.0, 273.0, 249.0, 259.0, 260.0, 253.0, 257.0, 270.0, 249.0, 255.0, 270.0, 284.0, 292.0, 247.0, 229.0, 265.0, 257.0, 245.0, 268.0, 260.0, 270.0, 273.0, 257.0, 214.0, 245.0, 260.0, 256.0, 251.0, 247.0, 262.0, 268.0, 291.0, 285.0, 238.0, 224.0, 270.0, 263.0, 275.0, 271.0, 267.0, 251.0, 254.0, 268.0, 245.0, 268.0, 256.0, 236.0, 270.0, 231.0, 273.0, 252.0, 269.0, 253.0, 231.0, 236.0, 191.0, 199.0, 267.0, 255.0, 272.0, 255.0, 241.0, 232.0, 260.0, 265.0, 266.0, 253.0, 259.0, 260.0, 238.0, 230.0, 260.0, 244.0, 281.0, 283.0, 231.0, 239.0, 259.0, 254.0, 254.0, 262.0, 268.0, 254.0, 210.0, 211.0, 263.0, 262.0, 285.0, 288.0, 259.0, 266.0, 248.0, 279.0, 268.0, 257.0, 264.0, 252.0]}, "sampler_perf": {"mean_env_wait_ms": 1.863128513341346, "mean_processing_ms": 0.43994753351318544, "mean_inference_ms": 2.414282806471956}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2568000, "num_steps_sampled": 1369600, "sample_time_ms": 20842.684, "load_time_ms": 37.196, "grad_time_ms": 8837.48, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 2.4651903655498604e-33, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005029598250985146, "policy_loss": -0.0072616818360984325, "vf_loss": 83.45578002929688, "vf_explained_var": 0.7516160011291504, "kl": 0.0016769097419455647, "entropy": 1.1618729829788208, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1369600, "episodes_total": 3424, "training_iteration": 107, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-38-17", "timestamp": 1660250297, "time_this_iter_s": 30.596415996551514, "time_total_s": 8715.399705171585, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8715.399705171585, "timesteps_since_restore": 1369600, "iterations_since_restore": 107, "perf": {"cpu_util_percent": 35.45581395348837, "ram_util_percent": 58.237209302325574}}
+{"episode_reward_max": 582.0, "episode_reward_min": 390.0, "episode_reward_mean": 513.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 297.0}, "policy_reward_mean": {"ppo": 256.905}, "custom_metrics": {"sparse_reward_mean": 178.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 157.41, "shaped_reward_min": 110, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.26, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.75, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.79, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.94, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.0, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.44, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.95, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.15, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.64, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 4.38, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.74, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.71, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.18, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.25, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.18, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.95, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.15, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.95, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.15, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [495.0, 525.0, 579.0, 513.0, 465.0, 519.0, 525.0, 525.0, 503.0, 522.0, 522.0, 533.0, 522.0, 468.0, 522.0, 525.0, 582.0, 573.0, 527.0, 519.0, 519.0, 570.0, 501.0, 519.0, 510.0, 467.0, 522.0, 522.0, 527.0, 533.0, 473.0, 465.0, 516.0, 498.0, 530.0, 576.0, 462.0, 533.0, 546.0, 518.0, 522.0, 513.0, 492.0, 501.0, 525.0, 522.0, 467.0, 390.0, 522.0, 527.0, 473.0, 525.0, 519.0, 519.0, 468.0, 504.0, 564.0, 470.0, 513.0, 516.0, 522.0, 421.0, 525.0, 573.0, 525.0, 527.0, 525.0, 516.0, 525.0, 576.0, 468.0, 510.0, 522.0, 510.0, 497.0, 453.0, 462.0, 519.0, 530.0, 522.0, 530.0, 527.0, 561.0, 525.0, 507.0, 522.0, 462.0, 501.0, 522.0, 522.0, 522.0, 530.0, 525.0, 516.0, 522.0, 522.0, 515.0, 573.0, 522.0, 404.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [243.0, 252.0, 252.0, 273.0, 293.0, 286.0, 267.0, 246.0, 232.0, 233.0, 260.0, 259.0, 264.0, 261.0, 272.0, 253.0, 250.0, 253.0, 266.0, 256.0, 272.0, 250.0, 269.0, 264.0, 261.0, 261.0, 223.0, 245.0, 249.0, 273.0, 252.0, 273.0, 285.0, 297.0, 287.0, 286.0, 268.0, 259.0, 267.0, 252.0, 260.0, 259.0, 280.0, 290.0, 239.0, 262.0, 267.0, 252.0, 263.0, 247.0, 246.0, 221.0, 249.0, 273.0, 257.0, 265.0, 262.0, 265.0, 264.0, 269.0, 234.0, 239.0, 229.0, 236.0, 260.0, 256.0, 251.0, 247.0, 262.0, 268.0, 291.0, 285.0, 238.0, 224.0, 270.0, 263.0, 275.0, 271.0, 267.0, 251.0, 254.0, 268.0, 245.0, 268.0, 256.0, 236.0, 270.0, 231.0, 273.0, 252.0, 269.0, 253.0, 231.0, 236.0, 191.0, 199.0, 267.0, 255.0, 272.0, 255.0, 241.0, 232.0, 260.0, 265.0, 266.0, 253.0, 259.0, 260.0, 238.0, 230.0, 260.0, 244.0, 281.0, 283.0, 231.0, 239.0, 259.0, 254.0, 254.0, 262.0, 268.0, 254.0, 210.0, 211.0, 263.0, 262.0, 285.0, 288.0, 259.0, 266.0, 248.0, 279.0, 268.0, 257.0, 264.0, 252.0, 263.0, 262.0, 290.0, 286.0, 236.0, 232.0, 242.0, 268.0, 268.0, 254.0, 271.0, 239.0, 248.0, 249.0, 214.0, 239.0, 230.0, 232.0, 258.0, 261.0, 264.0, 266.0, 267.0, 255.0, 273.0, 257.0, 251.0, 276.0, 293.0, 268.0, 264.0, 261.0, 244.0, 263.0, 267.0, 255.0, 239.0, 223.0, 244.0, 257.0, 273.0, 249.0, 256.0, 266.0, 262.0, 260.0, 252.0, 278.0, 262.0, 263.0, 263.0, 253.0, 251.0, 271.0, 254.0, 268.0, 270.0, 245.0, 288.0, 285.0, 262.0, 260.0, 222.0, 182.0]}, "sampler_perf": {"mean_env_wait_ms": 1.8498555104022234, "mean_processing_ms": 0.43730168549860937, "mean_inference_ms": 2.400809449110995}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2592000, "num_steps_sampled": 1382400, "sample_time_ms": 20702.738, "load_time_ms": 37.249, "grad_time_ms": 9000.45, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.2325951827749302e-33, "cur_lr": 0.0010000000474974513, "total_loss": 0.004501763265579939, "policy_loss": -0.002659810474142432, "vf_loss": 77.439453125, "vf_explained_var": 0.7766797542572021, "kl": 0.002080060075968504, "entropy": 1.1647237539291382, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1382400, "episodes_total": 3456, "training_iteration": 108, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-38-46", "timestamp": 1660250326, "time_this_iter_s": 29.44796586036682, "time_total_s": 8744.847671031952, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8744.847671031952, "timesteps_since_restore": 1382400, "iterations_since_restore": 108, "perf": {"cpu_util_percent": 35.6452380952381, "ram_util_percent": 58.221428571428575}}
+{"episode_reward_max": 582.0, "episode_reward_min": 404.0, "episode_reward_mean": 517.15, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 297.0}, "policy_reward_mean": {"ppo": 258.575}, "custom_metrics": {"sparse_reward_mean": 179.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 158.75, "shaped_reward_min": 121, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.45, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.7, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.93, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.97, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.45, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.06, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.24, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 4.4, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.9, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.12, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.88, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.36, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.75, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.26, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.06, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.24, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.06, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.24, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [498.0, 525.0, 519.0, 522.0, 522.0, 516.0, 452.0, 530.0, 530.0, 522.0, 519.0, 473.0, 453.0, 570.0, 522.0, 441.0, 576.0, 476.0, 516.0, 507.0, 573.0, 530.0, 519.0, 579.0, 525.0, 473.0, 510.0, 567.0, 576.0, 516.0, 525.0, 524.0, 525.0, 527.0, 525.0, 516.0, 525.0, 576.0, 468.0, 510.0, 522.0, 510.0, 497.0, 453.0, 462.0, 519.0, 530.0, 522.0, 530.0, 527.0, 561.0, 525.0, 507.0, 522.0, 462.0, 501.0, 522.0, 522.0, 522.0, 530.0, 525.0, 516.0, 522.0, 522.0, 515.0, 573.0, 522.0, 404.0, 495.0, 525.0, 579.0, 513.0, 465.0, 519.0, 525.0, 525.0, 503.0, 522.0, 522.0, 533.0, 522.0, 468.0, 522.0, 525.0, 582.0, 573.0, 527.0, 519.0, 519.0, 570.0, 501.0, 519.0, 510.0, 467.0, 522.0, 522.0, 527.0, 533.0, 473.0, 465.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [246.0, 252.0, 261.0, 264.0, 269.0, 250.0, 258.0, 264.0, 264.0, 258.0, 272.0, 244.0, 239.0, 213.0, 263.0, 267.0, 263.0, 267.0, 268.0, 254.0, 267.0, 252.0, 240.0, 233.0, 221.0, 232.0, 284.0, 286.0, 264.0, 258.0, 225.0, 216.0, 292.0, 284.0, 245.0, 231.0, 245.0, 271.0, 258.0, 249.0, 290.0, 283.0, 266.0, 264.0, 251.0, 268.0, 296.0, 283.0, 263.0, 262.0, 219.0, 254.0, 264.0, 246.0, 291.0, 276.0, 282.0, 294.0, 248.0, 268.0, 252.0, 273.0, 265.0, 259.0, 259.0, 266.0, 248.0, 279.0, 268.0, 257.0, 264.0, 252.0, 263.0, 262.0, 290.0, 286.0, 236.0, 232.0, 242.0, 268.0, 268.0, 254.0, 271.0, 239.0, 248.0, 249.0, 214.0, 239.0, 230.0, 232.0, 258.0, 261.0, 264.0, 266.0, 267.0, 255.0, 273.0, 257.0, 251.0, 276.0, 293.0, 268.0, 264.0, 261.0, 244.0, 263.0, 267.0, 255.0, 239.0, 223.0, 244.0, 257.0, 273.0, 249.0, 256.0, 266.0, 262.0, 260.0, 252.0, 278.0, 262.0, 263.0, 263.0, 253.0, 251.0, 271.0, 254.0, 268.0, 270.0, 245.0, 288.0, 285.0, 262.0, 260.0, 222.0, 182.0, 243.0, 252.0, 252.0, 273.0, 293.0, 286.0, 267.0, 246.0, 232.0, 233.0, 260.0, 259.0, 264.0, 261.0, 272.0, 253.0, 250.0, 253.0, 266.0, 256.0, 272.0, 250.0, 269.0, 264.0, 261.0, 261.0, 223.0, 245.0, 249.0, 273.0, 252.0, 273.0, 285.0, 297.0, 287.0, 286.0, 268.0, 259.0, 267.0, 252.0, 260.0, 259.0, 280.0, 290.0, 239.0, 262.0, 267.0, 252.0, 263.0, 247.0, 246.0, 221.0, 249.0, 273.0, 257.0, 265.0, 262.0, 265.0, 264.0, 269.0, 234.0, 239.0, 229.0, 236.0]}, "sampler_perf": {"mean_env_wait_ms": 1.8368123627881316, "mean_processing_ms": 0.4346999202219803, "mean_inference_ms": 2.3873597355001146}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2616000, "num_steps_sampled": 1395200, "sample_time_ms": 20622.092, "load_time_ms": 37.297, "grad_time_ms": 9168.951, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 6.162975913874651e-34, "cur_lr": 0.0010000000474974513, "total_loss": -0.0004981299280188978, "policy_loss": -0.007735797669738531, "vf_loss": 78.24005889892578, "vf_explained_var": 0.7600134015083313, "kl": 0.0021366437431424856, "entropy": 1.1726828813552856, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1395200, "episodes_total": 3488, "training_iteration": 109, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-39-16", "timestamp": 1660250356, "time_this_iter_s": 29.312750816345215, "time_total_s": 8774.160421848297, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8774.160421848297, "timesteps_since_restore": 1395200, "iterations_since_restore": 109, "perf": {"cpu_util_percent": 34.91219512195122, "ram_util_percent": 58.29999999999999}}
+{"episode_reward_max": 582.0, "episode_reward_min": 390.0, "episode_reward_mean": 513.77, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 256.885}, "custom_metrics": {"sparse_reward_mean": 177.8, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 158.17, "shaped_reward_min": 116, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.47, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.76, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.68, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.53, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.52, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 14.01, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.08, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.76, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.52, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.83, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.89, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.33, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.22, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.01, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.08, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.01, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.08, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [396.0, 516.0, 522.0, 465.0, 390.0, 522.0, 530.0, 570.0, 487.0, 519.0, 525.0, 527.0, 507.0, 522.0, 473.0, 519.0, 519.0, 573.0, 519.0, 422.0, 495.0, 525.0, 519.0, 473.0, 522.0, 530.0, 522.0, 510.0, 498.0, 510.0, 516.0, 522.0, 515.0, 573.0, 522.0, 404.0, 495.0, 525.0, 579.0, 513.0, 465.0, 519.0, 525.0, 525.0, 503.0, 522.0, 522.0, 533.0, 522.0, 468.0, 522.0, 525.0, 582.0, 573.0, 527.0, 519.0, 519.0, 570.0, 501.0, 519.0, 510.0, 467.0, 522.0, 522.0, 527.0, 533.0, 473.0, 465.0, 498.0, 525.0, 519.0, 522.0, 522.0, 516.0, 452.0, 530.0, 530.0, 522.0, 519.0, 473.0, 453.0, 570.0, 522.0, 441.0, 576.0, 476.0, 516.0, 507.0, 573.0, 530.0, 519.0, 579.0, 525.0, 473.0, 510.0, 567.0, 576.0, 516.0, 525.0, 524.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [203.0, 193.0, 260.0, 256.0, 268.0, 254.0, 236.0, 229.0, 203.0, 187.0, 261.0, 261.0, 271.0, 259.0, 272.0, 298.0, 236.0, 251.0, 260.0, 259.0, 269.0, 256.0, 258.0, 269.0, 256.0, 251.0, 270.0, 252.0, 252.0, 221.0, 270.0, 249.0, 256.0, 263.0, 285.0, 288.0, 270.0, 249.0, 203.0, 219.0, 254.0, 241.0, 250.0, 275.0, 273.0, 246.0, 236.0, 237.0, 250.0, 272.0, 270.0, 260.0, 262.0, 260.0, 243.0, 267.0, 243.0, 255.0, 266.0, 244.0, 269.0, 247.0, 249.0, 273.0, 270.0, 245.0, 288.0, 285.0, 262.0, 260.0, 222.0, 182.0, 243.0, 252.0, 252.0, 273.0, 293.0, 286.0, 267.0, 246.0, 232.0, 233.0, 260.0, 259.0, 264.0, 261.0, 272.0, 253.0, 250.0, 253.0, 266.0, 256.0, 272.0, 250.0, 269.0, 264.0, 261.0, 261.0, 223.0, 245.0, 249.0, 273.0, 252.0, 273.0, 285.0, 297.0, 287.0, 286.0, 268.0, 259.0, 267.0, 252.0, 260.0, 259.0, 280.0, 290.0, 239.0, 262.0, 267.0, 252.0, 263.0, 247.0, 246.0, 221.0, 249.0, 273.0, 257.0, 265.0, 262.0, 265.0, 264.0, 269.0, 234.0, 239.0, 229.0, 236.0, 246.0, 252.0, 261.0, 264.0, 269.0, 250.0, 258.0, 264.0, 264.0, 258.0, 272.0, 244.0, 239.0, 213.0, 263.0, 267.0, 263.0, 267.0, 268.0, 254.0, 267.0, 252.0, 240.0, 233.0, 221.0, 232.0, 284.0, 286.0, 264.0, 258.0, 225.0, 216.0, 292.0, 284.0, 245.0, 231.0, 245.0, 271.0, 258.0, 249.0, 290.0, 283.0, 266.0, 264.0, 251.0, 268.0, 296.0, 283.0, 263.0, 262.0, 219.0, 254.0, 264.0, 246.0, 291.0, 276.0, 282.0, 294.0, 248.0, 268.0, 252.0, 273.0, 265.0, 259.0]}, "sampler_perf": {"mean_env_wait_ms": 1.8240069603143507, "mean_processing_ms": 0.43214623654293904, "mean_inference_ms": 2.374213479802633}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2640000, "num_steps_sampled": 1408000, "sample_time_ms": 20611.05, "load_time_ms": 37.227, "grad_time_ms": 9292.4, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 3.0814879569373254e-34, "cur_lr": 0.0010000000474974513, "total_loss": 0.006001986563205719, "policy_loss": -0.0016462085768580437, "vf_loss": 82.35639953613281, "vf_explained_var": 0.7567899823188782, "kl": 0.0018497154815122485, "entropy": 1.174903154373169, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1408000, "episodes_total": 3520, "training_iteration": 110, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-39-46", "timestamp": 1660250386, "time_this_iter_s": 30.394602060317993, "time_total_s": 8804.555023908615, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8804.555023908615, "timesteps_since_restore": 1408000, "iterations_since_restore": 110, "perf": {"cpu_util_percent": 34.141860465116274, "ram_util_percent": 58.19302325581395}}
+{"episode_reward_max": 579.0, "episode_reward_min": 390.0, "episode_reward_mean": 511.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 187.0}, "policy_reward_max": {"ppo": 299.0}, "policy_reward_mean": {"ppo": 255.505}, "custom_metrics": {"sparse_reward_mean": 177.0, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 157.01, "shaped_reward_min": 116, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.44, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.74, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.65, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.53, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.59, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.96, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 15.02, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.87, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.77, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.24, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.85, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.33, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.75, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.22, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.96, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 15.02, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.96, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 15.02, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 507.0, 513.0, 504.0, 462.0, 522.0, 498.0, 576.0, 516.0, 519.0, 519.0, 564.0, 519.0, 561.0, 401.0, 522.0, 519.0, 570.0, 501.0, 484.0, 519.0, 522.0, 507.0, 413.0, 516.0, 573.0, 516.0, 522.0, 522.0, 422.0, 465.0, 482.0, 527.0, 533.0, 473.0, 465.0, 498.0, 525.0, 519.0, 522.0, 522.0, 516.0, 452.0, 530.0, 530.0, 522.0, 519.0, 473.0, 453.0, 570.0, 522.0, 441.0, 576.0, 476.0, 516.0, 507.0, 573.0, 530.0, 519.0, 579.0, 525.0, 473.0, 510.0, 567.0, 576.0, 516.0, 525.0, 524.0, 396.0, 516.0, 522.0, 465.0, 390.0, 522.0, 530.0, 570.0, 487.0, 519.0, 525.0, 527.0, 507.0, 522.0, 473.0, 519.0, 519.0, 573.0, 519.0, 422.0, 495.0, 525.0, 519.0, 473.0, 522.0, 530.0, 522.0, 510.0, 498.0, 510.0, 516.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 287.0, 250.0, 257.0, 251.0, 262.0, 242.0, 262.0, 240.0, 222.0, 250.0, 272.0, 244.0, 254.0, 283.0, 293.0, 256.0, 260.0, 261.0, 258.0, 263.0, 256.0, 265.0, 299.0, 261.0, 258.0, 276.0, 285.0, 212.0, 189.0, 259.0, 263.0, 262.0, 257.0, 282.0, 288.0, 259.0, 242.0, 250.0, 234.0, 257.0, 262.0, 259.0, 263.0, 258.0, 249.0, 203.0, 210.0, 268.0, 248.0, 298.0, 275.0, 265.0, 251.0, 260.0, 262.0, 267.0, 255.0, 203.0, 219.0, 230.0, 235.0, 247.0, 235.0, 262.0, 265.0, 264.0, 269.0, 234.0, 239.0, 229.0, 236.0, 246.0, 252.0, 261.0, 264.0, 269.0, 250.0, 258.0, 264.0, 264.0, 258.0, 272.0, 244.0, 239.0, 213.0, 263.0, 267.0, 263.0, 267.0, 268.0, 254.0, 267.0, 252.0, 240.0, 233.0, 221.0, 232.0, 284.0, 286.0, 264.0, 258.0, 225.0, 216.0, 292.0, 284.0, 245.0, 231.0, 245.0, 271.0, 258.0, 249.0, 290.0, 283.0, 266.0, 264.0, 251.0, 268.0, 296.0, 283.0, 263.0, 262.0, 219.0, 254.0, 264.0, 246.0, 291.0, 276.0, 282.0, 294.0, 248.0, 268.0, 252.0, 273.0, 265.0, 259.0, 203.0, 193.0, 260.0, 256.0, 268.0, 254.0, 236.0, 229.0, 203.0, 187.0, 261.0, 261.0, 271.0, 259.0, 272.0, 298.0, 236.0, 251.0, 260.0, 259.0, 269.0, 256.0, 258.0, 269.0, 256.0, 251.0, 270.0, 252.0, 252.0, 221.0, 270.0, 249.0, 256.0, 263.0, 285.0, 288.0, 270.0, 249.0, 203.0, 219.0, 254.0, 241.0, 250.0, 275.0, 273.0, 246.0, 236.0, 237.0, 250.0, 272.0, 270.0, 260.0, 262.0, 260.0, 243.0, 267.0, 243.0, 255.0, 266.0, 244.0, 269.0, 247.0, 249.0, 273.0]}, "sampler_perf": {"mean_env_wait_ms": 1.8114390530893243, "mean_processing_ms": 0.4296416576287852, "mean_inference_ms": 2.361419613419488}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2664000, "num_steps_sampled": 1420800, "sample_time_ms": 20548.052, "load_time_ms": 37.151, "grad_time_ms": 9411.24, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.5407439784686627e-34, "cur_lr": 0.0010000000474974513, "total_loss": 0.0037929911632090807, "policy_loss": -0.003666130593046546, "vf_loss": 80.49629974365234, "vf_explained_var": 0.7547799944877625, "kl": 0.002083237050101161, "entropy": 1.1810179948806763, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1420800, "episodes_total": 3552, "training_iteration": 111, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-40-16", "timestamp": 1660250416, "time_this_iter_s": 29.606700897216797, "time_total_s": 8834.161724805832, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8834.161724805832, "timesteps_since_restore": 1420800, "iterations_since_restore": 111, "perf": {"cpu_util_percent": 34.73809523809524, "ram_util_percent": 58.2547619047619}}
+{"episode_reward_max": 576.0, "episode_reward_min": 390.0, "episode_reward_mean": 510.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 187.0}, "policy_reward_max": {"ppo": 299.0}, "policy_reward_mean": {"ppo": 255.4}, "custom_metrics": {"sparse_reward_mean": 177.0, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 156.8, "shaped_reward_min": 115, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.15, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.34, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.57, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.58, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.0, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.52, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.57, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.89, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 15.0, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.96, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.41, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.8, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.97, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.31, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.13, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.78, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.37, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.73, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.28, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.89, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 15.0, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.89, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 15.0, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [507.0, 527.0, 513.0, 516.0, 479.0, 516.0, 441.0, 573.0, 522.0, 516.0, 513.0, 395.0, 525.0, 519.0, 521.0, 522.0, 522.0, 525.0, 530.0, 525.0, 462.0, 522.0, 441.0, 530.0, 573.0, 525.0, 527.0, 525.0, 522.0, 513.0, 570.0, 525.0, 576.0, 516.0, 525.0, 524.0, 396.0, 516.0, 522.0, 465.0, 390.0, 522.0, 530.0, 570.0, 487.0, 519.0, 525.0, 527.0, 507.0, 522.0, 473.0, 519.0, 519.0, 573.0, 519.0, 422.0, 495.0, 525.0, 519.0, 473.0, 522.0, 530.0, 522.0, 510.0, 498.0, 510.0, 516.0, 522.0, 576.0, 507.0, 513.0, 504.0, 462.0, 522.0, 498.0, 576.0, 516.0, 519.0, 519.0, 564.0, 519.0, 561.0, 401.0, 522.0, 519.0, 570.0, 501.0, 484.0, 519.0, 522.0, 507.0, 413.0, 516.0, 573.0, 516.0, 522.0, 522.0, 422.0, 465.0, 482.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 245.0, 263.0, 264.0, 258.0, 255.0, 262.0, 254.0, 235.0, 244.0, 256.0, 260.0, 221.0, 220.0, 293.0, 280.0, 256.0, 266.0, 273.0, 243.0, 262.0, 251.0, 195.0, 200.0, 272.0, 253.0, 253.0, 266.0, 258.0, 263.0, 263.0, 259.0, 262.0, 260.0, 262.0, 263.0, 255.0, 275.0, 252.0, 273.0, 226.0, 236.0, 274.0, 248.0, 233.0, 208.0, 270.0, 260.0, 288.0, 285.0, 249.0, 276.0, 265.0, 262.0, 259.0, 266.0, 261.0, 261.0, 254.0, 259.0, 277.0, 293.0, 255.0, 270.0, 282.0, 294.0, 248.0, 268.0, 252.0, 273.0, 265.0, 259.0, 203.0, 193.0, 260.0, 256.0, 268.0, 254.0, 236.0, 229.0, 203.0, 187.0, 261.0, 261.0, 271.0, 259.0, 272.0, 298.0, 236.0, 251.0, 260.0, 259.0, 269.0, 256.0, 258.0, 269.0, 256.0, 251.0, 270.0, 252.0, 252.0, 221.0, 270.0, 249.0, 256.0, 263.0, 285.0, 288.0, 270.0, 249.0, 203.0, 219.0, 254.0, 241.0, 250.0, 275.0, 273.0, 246.0, 236.0, 237.0, 250.0, 272.0, 270.0, 260.0, 262.0, 260.0, 243.0, 267.0, 243.0, 255.0, 266.0, 244.0, 269.0, 247.0, 249.0, 273.0, 289.0, 287.0, 250.0, 257.0, 251.0, 262.0, 242.0, 262.0, 240.0, 222.0, 250.0, 272.0, 244.0, 254.0, 283.0, 293.0, 256.0, 260.0, 261.0, 258.0, 263.0, 256.0, 265.0, 299.0, 261.0, 258.0, 276.0, 285.0, 212.0, 189.0, 259.0, 263.0, 262.0, 257.0, 282.0, 288.0, 259.0, 242.0, 250.0, 234.0, 257.0, 262.0, 259.0, 263.0, 258.0, 249.0, 203.0, 210.0, 268.0, 248.0, 298.0, 275.0, 265.0, 251.0, 260.0, 262.0, 267.0, 255.0, 203.0, 219.0, 230.0, 235.0, 247.0, 235.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7991027412203113, "mean_processing_ms": 0.4271859275092463, "mean_inference_ms": 2.3490714276117277}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2688000, "num_steps_sampled": 1433600, "sample_time_ms": 20492.657, "load_time_ms": 37.09, "grad_time_ms": 9588.563, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 7.703719892343314e-35, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006233404856175184, "policy_loss": -0.00705617293715477, "vf_loss": 82.67167663574219, "vf_explained_var": 0.7619187235832214, "kl": 0.0019442345947027206, "entropy": 1.175291895866394, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1433600, "episodes_total": 3584, "training_iteration": 112, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-40-46", "timestamp": 1660250446, "time_this_iter_s": 30.788507223129272, "time_total_s": 8864.950232028961, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8864.950232028961, "timesteps_since_restore": 1433600, "iterations_since_restore": 112, "perf": {"cpu_util_percent": 33.402325581395345, "ram_util_percent": 58.2720930232558}}
+{"episode_reward_max": 576.0, "episode_reward_min": 174.0, "episode_reward_mean": 509.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 85.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 254.9}, "custom_metrics": {"sparse_reward_mean": 177.0, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 155.8, "shaped_reward_min": 54, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.12, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.21, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.57, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.54, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.93, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.99, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.49, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.92, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.89, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.15, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.66, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.35, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.78, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.32, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.7, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.28, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 13.92, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.89, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.92, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.89, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 525.0, 361.0, 525.0, 524.0, 465.0, 461.0, 525.0, 174.0, 524.0, 570.0, 522.0, 519.0, 522.0, 444.0, 570.0, 570.0, 525.0, 519.0, 525.0, 516.0, 479.0, 522.0, 525.0, 513.0, 516.0, 570.0, 462.0, 573.0, 530.0, 492.0, 522.0, 498.0, 510.0, 516.0, 522.0, 576.0, 507.0, 513.0, 504.0, 462.0, 522.0, 498.0, 576.0, 516.0, 519.0, 519.0, 564.0, 519.0, 561.0, 401.0, 522.0, 519.0, 570.0, 501.0, 484.0, 519.0, 522.0, 507.0, 413.0, 516.0, 573.0, 516.0, 522.0, 522.0, 422.0, 465.0, 482.0, 507.0, 527.0, 513.0, 516.0, 479.0, 516.0, 441.0, 573.0, 522.0, 516.0, 513.0, 395.0, 525.0, 519.0, 521.0, 522.0, 522.0, 525.0, 530.0, 525.0, 462.0, 522.0, 441.0, 530.0, 573.0, 525.0, 527.0, 525.0, 522.0, 513.0, 570.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 282.0, 263.0, 262.0, 187.0, 174.0, 246.0, 279.0, 256.0, 268.0, 230.0, 235.0, 234.0, 227.0, 265.0, 260.0, 89.0, 85.0, 262.0, 262.0, 288.0, 282.0, 262.0, 260.0, 258.0, 261.0, 257.0, 265.0, 224.0, 220.0, 287.0, 283.0, 305.0, 265.0, 264.0, 261.0, 259.0, 260.0, 267.0, 258.0, 259.0, 257.0, 236.0, 243.0, 278.0, 244.0, 262.0, 263.0, 248.0, 265.0, 258.0, 258.0, 285.0, 285.0, 237.0, 225.0, 286.0, 287.0, 272.0, 258.0, 248.0, 244.0, 267.0, 255.0, 243.0, 255.0, 266.0, 244.0, 269.0, 247.0, 249.0, 273.0, 289.0, 287.0, 250.0, 257.0, 251.0, 262.0, 242.0, 262.0, 240.0, 222.0, 250.0, 272.0, 244.0, 254.0, 283.0, 293.0, 256.0, 260.0, 261.0, 258.0, 263.0, 256.0, 265.0, 299.0, 261.0, 258.0, 276.0, 285.0, 212.0, 189.0, 259.0, 263.0, 262.0, 257.0, 282.0, 288.0, 259.0, 242.0, 250.0, 234.0, 257.0, 262.0, 259.0, 263.0, 258.0, 249.0, 203.0, 210.0, 268.0, 248.0, 298.0, 275.0, 265.0, 251.0, 260.0, 262.0, 267.0, 255.0, 203.0, 219.0, 230.0, 235.0, 247.0, 235.0, 262.0, 245.0, 263.0, 264.0, 258.0, 255.0, 262.0, 254.0, 235.0, 244.0, 256.0, 260.0, 221.0, 220.0, 293.0, 280.0, 256.0, 266.0, 273.0, 243.0, 262.0, 251.0, 195.0, 200.0, 272.0, 253.0, 253.0, 266.0, 258.0, 263.0, 263.0, 259.0, 262.0, 260.0, 262.0, 263.0, 255.0, 275.0, 252.0, 273.0, 226.0, 236.0, 274.0, 248.0, 233.0, 208.0, 270.0, 260.0, 288.0, 285.0, 249.0, 276.0, 265.0, 262.0, 259.0, 266.0, 261.0, 261.0, 254.0, 259.0, 277.0, 293.0, 255.0, 270.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7869849940199092, "mean_processing_ms": 0.4247741951637306, "mean_inference_ms": 2.3369101082045303}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2712000, "num_steps_sampled": 1446400, "sample_time_ms": 20390.667, "load_time_ms": 37.347, "grad_time_ms": 9843.106, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 3.851859946171657e-35, "cur_lr": 0.0010000000474974513, "total_loss": 0.0046621630899608135, "policy_loss": -0.0027472442016005516, "vf_loss": 80.03414916992188, "vf_explained_var": 0.7775616645812988, "kl": 0.0022294942755252123, "entropy": 1.1880191564559937, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1446400, "episodes_total": 3616, "training_iteration": 113, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-41-18", "timestamp": 1660250478, "time_this_iter_s": 31.195298194885254, "time_total_s": 8896.145530223846, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8896.145530223846, "timesteps_since_restore": 1446400, "iterations_since_restore": 113, "perf": {"cpu_util_percent": 34.73181818181818, "ram_util_percent": 58.20681818181818}}
+{"episode_reward_max": 576.0, "episode_reward_min": 174.0, "episode_reward_mean": 507.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 85.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 253.95}, "custom_metrics": {"sparse_reward_mean": 175.8, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.3, "shaped_reward_min": 54, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.0, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.27, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.44, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.7, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.01, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.94, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.51, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.38, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.77, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.97, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.77, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.25, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.86, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.31, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.72, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.26, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.1, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 13.77, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.97, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.77, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.97, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 479.0, 516.0, 449.0, 507.0, 573.0, 504.0, 492.0, 424.0, 536.0, 573.0, 478.0, 576.0, 522.0, 522.0, 519.0, 567.0, 401.0, 490.0, 522.0, 513.0, 522.0, 245.0, 521.0, 533.0, 570.0, 573.0, 516.0, 464.0, 576.0, 573.0, 522.0, 522.0, 422.0, 465.0, 482.0, 507.0, 527.0, 513.0, 516.0, 479.0, 516.0, 441.0, 573.0, 522.0, 516.0, 513.0, 395.0, 525.0, 519.0, 521.0, 522.0, 522.0, 525.0, 530.0, 525.0, 462.0, 522.0, 441.0, 530.0, 573.0, 525.0, 527.0, 525.0, 522.0, 513.0, 570.0, 525.0, 570.0, 525.0, 361.0, 525.0, 524.0, 465.0, 461.0, 525.0, 174.0, 524.0, 570.0, 522.0, 519.0, 522.0, 444.0, 570.0, 570.0, 525.0, 519.0, 525.0, 516.0, 479.0, 522.0, 525.0, 513.0, 516.0, 570.0, 462.0, 573.0, 530.0, 492.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 265.0, 241.0, 238.0, 257.0, 259.0, 219.0, 230.0, 263.0, 244.0, 289.0, 284.0, 241.0, 263.0, 247.0, 245.0, 214.0, 210.0, 263.0, 273.0, 272.0, 301.0, 230.0, 248.0, 291.0, 285.0, 275.0, 247.0, 260.0, 262.0, 259.0, 260.0, 279.0, 288.0, 209.0, 192.0, 244.0, 246.0, 254.0, 268.0, 241.0, 272.0, 267.0, 255.0, 122.0, 123.0, 264.0, 257.0, 270.0, 263.0, 303.0, 267.0, 287.0, 286.0, 249.0, 267.0, 234.0, 230.0, 294.0, 282.0, 292.0, 281.0, 256.0, 266.0, 267.0, 255.0, 203.0, 219.0, 230.0, 235.0, 247.0, 235.0, 262.0, 245.0, 263.0, 264.0, 258.0, 255.0, 262.0, 254.0, 235.0, 244.0, 256.0, 260.0, 221.0, 220.0, 293.0, 280.0, 256.0, 266.0, 273.0, 243.0, 262.0, 251.0, 195.0, 200.0, 272.0, 253.0, 253.0, 266.0, 258.0, 263.0, 263.0, 259.0, 262.0, 260.0, 262.0, 263.0, 255.0, 275.0, 252.0, 273.0, 226.0, 236.0, 274.0, 248.0, 233.0, 208.0, 270.0, 260.0, 288.0, 285.0, 249.0, 276.0, 265.0, 262.0, 259.0, 266.0, 261.0, 261.0, 254.0, 259.0, 277.0, 293.0, 255.0, 270.0, 288.0, 282.0, 263.0, 262.0, 187.0, 174.0, 246.0, 279.0, 256.0, 268.0, 230.0, 235.0, 234.0, 227.0, 265.0, 260.0, 89.0, 85.0, 262.0, 262.0, 288.0, 282.0, 262.0, 260.0, 258.0, 261.0, 257.0, 265.0, 224.0, 220.0, 287.0, 283.0, 305.0, 265.0, 264.0, 261.0, 259.0, 260.0, 267.0, 258.0, 259.0, 257.0, 236.0, 243.0, 278.0, 244.0, 262.0, 263.0, 248.0, 265.0, 258.0, 258.0, 285.0, 285.0, 237.0, 225.0, 286.0, 287.0, 272.0, 258.0, 248.0, 244.0, 267.0, 255.0]}, "sampler_perf": {"mean_env_wait_ms": 1.775081398673085, "mean_processing_ms": 0.4224038975736352, "mean_inference_ms": 2.3249390744039835}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2736000, "num_steps_sampled": 1459200, "sample_time_ms": 20266.804, "load_time_ms": 37.384, "grad_time_ms": 9843.514, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.9259299730858284e-35, "cur_lr": 0.0010000000474974513, "total_loss": -0.0005095542292110622, "policy_loss": -0.007585855200886726, "vf_loss": 76.6465072631836, "vf_explained_var": 0.7633175849914551, "kl": 0.0019467826932668686, "entropy": 1.1766948699951172, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1459200, "episodes_total": 3648, "training_iteration": 114, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-41-47", "timestamp": 1660250507, "time_this_iter_s": 29.75877094268799, "time_total_s": 8925.904301166534, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8925.904301166534, "timesteps_since_restore": 1459200, "iterations_since_restore": 114, "perf": {"cpu_util_percent": 34.199999999999996, "ram_util_percent": 58.228571428571435}}
+{"episode_reward_max": 576.0, "episode_reward_min": 174.0, "episode_reward_mean": 512.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 85.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 256.345}, "custom_metrics": {"sparse_reward_mean": 177.8, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 157.09, "shaped_reward_min": 54, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.25, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.43, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.59, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.87, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.56, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.43, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.9, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.09, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.72, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.18, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.13, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.88, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.36, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.71, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.33, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.1, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 13.9, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.09, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.9, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.09, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 516.0, 467.0, 530.0, 507.0, 513.0, 519.0, 558.0, 504.0, 522.0, 501.0, 525.0, 525.0, 524.0, 519.0, 513.0, 513.0, 525.0, 516.0, 567.0, 510.0, 521.0, 513.0, 522.0, 518.0, 576.0, 530.0, 527.0, 459.0, 525.0, 570.0, 525.0, 522.0, 513.0, 570.0, 525.0, 570.0, 525.0, 361.0, 525.0, 524.0, 465.0, 461.0, 525.0, 174.0, 524.0, 570.0, 522.0, 519.0, 522.0, 444.0, 570.0, 570.0, 525.0, 519.0, 525.0, 516.0, 479.0, 522.0, 525.0, 513.0, 516.0, 570.0, 462.0, 573.0, 530.0, 492.0, 522.0, 519.0, 479.0, 516.0, 449.0, 507.0, 573.0, 504.0, 492.0, 424.0, 536.0, 573.0, 478.0, 576.0, 522.0, 522.0, 519.0, 567.0, 401.0, 490.0, 522.0, 513.0, 522.0, 245.0, 521.0, 533.0, 570.0, 573.0, 516.0, 464.0, 576.0, 573.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 260.0, 251.0, 265.0, 239.0, 228.0, 266.0, 264.0, 255.0, 252.0, 253.0, 260.0, 254.0, 265.0, 285.0, 273.0, 244.0, 260.0, 270.0, 252.0, 247.0, 254.0, 270.0, 255.0, 260.0, 265.0, 265.0, 259.0, 258.0, 261.0, 264.0, 249.0, 268.0, 245.0, 268.0, 257.0, 251.0, 265.0, 276.0, 291.0, 253.0, 257.0, 263.0, 258.0, 264.0, 249.0, 264.0, 258.0, 264.0, 254.0, 288.0, 288.0, 266.0, 264.0, 262.0, 265.0, 241.0, 218.0, 258.0, 267.0, 279.0, 291.0, 249.0, 276.0, 261.0, 261.0, 254.0, 259.0, 277.0, 293.0, 255.0, 270.0, 288.0, 282.0, 263.0, 262.0, 187.0, 174.0, 246.0, 279.0, 256.0, 268.0, 230.0, 235.0, 234.0, 227.0, 265.0, 260.0, 89.0, 85.0, 262.0, 262.0, 288.0, 282.0, 262.0, 260.0, 258.0, 261.0, 257.0, 265.0, 224.0, 220.0, 287.0, 283.0, 305.0, 265.0, 264.0, 261.0, 259.0, 260.0, 267.0, 258.0, 259.0, 257.0, 236.0, 243.0, 278.0, 244.0, 262.0, 263.0, 248.0, 265.0, 258.0, 258.0, 285.0, 285.0, 237.0, 225.0, 286.0, 287.0, 272.0, 258.0, 248.0, 244.0, 267.0, 255.0, 254.0, 265.0, 241.0, 238.0, 257.0, 259.0, 219.0, 230.0, 263.0, 244.0, 289.0, 284.0, 241.0, 263.0, 247.0, 245.0, 214.0, 210.0, 263.0, 273.0, 272.0, 301.0, 230.0, 248.0, 291.0, 285.0, 275.0, 247.0, 260.0, 262.0, 259.0, 260.0, 279.0, 288.0, 209.0, 192.0, 244.0, 246.0, 254.0, 268.0, 241.0, 272.0, 267.0, 255.0, 122.0, 123.0, 264.0, 257.0, 270.0, 263.0, 303.0, 267.0, 287.0, 286.0, 249.0, 267.0, 234.0, 230.0, 294.0, 282.0, 292.0, 281.0, 256.0, 266.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7633797791154415, "mean_processing_ms": 0.42007059281459463, "mean_inference_ms": 2.3128736955847145}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2760000, "num_steps_sampled": 1472000, "sample_time_ms": 20145.685, "load_time_ms": 37.355, "grad_time_ms": 9802.708, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 9.629649865429142e-36, "cur_lr": 0.0010000000474974513, "total_loss": -0.001834428054280579, "policy_loss": -0.008199676871299744, "vf_loss": 69.51961517333984, "vf_explained_var": 0.7745820879936218, "kl": 0.002087961183860898, "entropy": 1.1734023094177246, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1472000, "episodes_total": 3680, "training_iteration": 115, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-42-15", "timestamp": 1660250535, "time_this_iter_s": 28.112826824188232, "time_total_s": 8954.017127990723, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8954.017127990723, "timesteps_since_restore": 1472000, "iterations_since_restore": 115, "perf": {"cpu_util_percent": 35.5225, "ram_util_percent": 58.23499999999999}}
+{"episode_reward_max": 582.0, "episode_reward_min": 180.0, "episode_reward_mean": 512.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 75.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 256.435}, "custom_metrics": {"sparse_reward_mean": 178.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.47, "shaped_reward_min": 60, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.4, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.32, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.79, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.8, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.94, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.04, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.97, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.18, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.95, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.24, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.15, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.8, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.44, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.65, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.4, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.04, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.97, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.04, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.97, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 504.0, 522.0, 339.0, 465.0, 416.0, 516.0, 476.0, 573.0, 519.0, 530.0, 180.0, 570.0, 570.0, 470.0, 573.0, 456.0, 573.0, 513.0, 501.0, 567.0, 579.0, 519.0, 518.0, 582.0, 519.0, 564.0, 504.0, 582.0, 461.0, 456.0, 501.0, 573.0, 530.0, 492.0, 522.0, 519.0, 479.0, 516.0, 449.0, 507.0, 573.0, 504.0, 492.0, 424.0, 536.0, 573.0, 478.0, 576.0, 522.0, 522.0, 519.0, 567.0, 401.0, 490.0, 522.0, 513.0, 522.0, 245.0, 521.0, 533.0, 570.0, 573.0, 516.0, 464.0, 576.0, 573.0, 522.0, 522.0, 516.0, 467.0, 530.0, 507.0, 513.0, 519.0, 558.0, 504.0, 522.0, 501.0, 525.0, 525.0, 524.0, 519.0, 513.0, 513.0, 525.0, 516.0, 567.0, 510.0, 521.0, 513.0, 522.0, 518.0, 576.0, 530.0, 527.0, 459.0, 525.0, 570.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 290.0, 254.0, 250.0, 251.0, 271.0, 161.0, 178.0, 228.0, 237.0, 211.0, 205.0, 248.0, 268.0, 241.0, 235.0, 300.0, 273.0, 271.0, 248.0, 269.0, 261.0, 105.0, 75.0, 287.0, 283.0, 289.0, 281.0, 248.0, 222.0, 283.0, 290.0, 216.0, 240.0, 289.0, 284.0, 261.0, 252.0, 248.0, 253.0, 266.0, 301.0, 284.0, 295.0, 268.0, 251.0, 245.0, 273.0, 291.0, 291.0, 257.0, 262.0, 277.0, 287.0, 258.0, 246.0, 287.0, 295.0, 234.0, 227.0, 233.0, 223.0, 248.0, 253.0, 286.0, 287.0, 272.0, 258.0, 248.0, 244.0, 267.0, 255.0, 254.0, 265.0, 241.0, 238.0, 257.0, 259.0, 219.0, 230.0, 263.0, 244.0, 289.0, 284.0, 241.0, 263.0, 247.0, 245.0, 214.0, 210.0, 263.0, 273.0, 272.0, 301.0, 230.0, 248.0, 291.0, 285.0, 275.0, 247.0, 260.0, 262.0, 259.0, 260.0, 279.0, 288.0, 209.0, 192.0, 244.0, 246.0, 254.0, 268.0, 241.0, 272.0, 267.0, 255.0, 122.0, 123.0, 264.0, 257.0, 270.0, 263.0, 303.0, 267.0, 287.0, 286.0, 249.0, 267.0, 234.0, 230.0, 294.0, 282.0, 292.0, 281.0, 256.0, 266.0, 262.0, 260.0, 251.0, 265.0, 239.0, 228.0, 266.0, 264.0, 255.0, 252.0, 253.0, 260.0, 254.0, 265.0, 285.0, 273.0, 244.0, 260.0, 270.0, 252.0, 247.0, 254.0, 270.0, 255.0, 260.0, 265.0, 265.0, 259.0, 258.0, 261.0, 264.0, 249.0, 268.0, 245.0, 268.0, 257.0, 251.0, 265.0, 276.0, 291.0, 253.0, 257.0, 263.0, 258.0, 264.0, 249.0, 264.0, 258.0, 264.0, 254.0, 288.0, 288.0, 266.0, 264.0, 262.0, 265.0, 241.0, 218.0, 258.0, 267.0, 279.0, 291.0, 249.0, 276.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7518867216658078, "mean_processing_ms": 0.4177743622386954, "mean_inference_ms": 2.300854372548392}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2784000, "num_steps_sampled": 1484800, "sample_time_ms": 20021.401, "load_time_ms": 37.489, "grad_time_ms": 9776.788, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 4.814824932714571e-36, "cur_lr": 0.0010000000474974513, "total_loss": 0.00206244015134871, "policy_loss": -0.005156705155968666, "vf_loss": 78.12344360351562, "vf_explained_var": 0.7715883851051331, "kl": 0.0026745833456516266, "entropy": 1.1864006519317627, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1484800, "episodes_total": 3712, "training_iteration": 116, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-42-45", "timestamp": 1660250565, "time_this_iter_s": 29.3955659866333, "time_total_s": 8983.412693977356, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8983.412693977356, "timesteps_since_restore": 1484800, "iterations_since_restore": 116, "perf": {"cpu_util_percent": 35.66904761904762, "ram_util_percent": 58.25}}
+{"episode_reward_max": 582.0, "episode_reward_min": 180.0, "episode_reward_mean": 513.52, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 75.0}, "policy_reward_max": {"ppo": 301.0}, "policy_reward_mean": {"ppo": 256.76}, "custom_metrics": {"sparse_reward_mean": 178.4, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.72, "shaped_reward_min": 60, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.34, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.93, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.72, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.0, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.55, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.54, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.11, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.0, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.81, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.48, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.12, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.76, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.96, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.79, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.19, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.19, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.67, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.53, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.11, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.0, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.11, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.0, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 530.0, 519.0, 455.0, 222.0, 524.0, 519.0, 525.0, 519.0, 576.0, 467.0, 522.0, 579.0, 570.0, 522.0, 512.0, 510.0, 495.0, 522.0, 573.0, 522.0, 573.0, 525.0, 576.0, 482.0, 522.0, 484.0, 525.0, 456.0, 461.0, 516.0, 522.0, 464.0, 576.0, 573.0, 522.0, 522.0, 516.0, 467.0, 530.0, 507.0, 513.0, 519.0, 558.0, 504.0, 522.0, 501.0, 525.0, 525.0, 524.0, 519.0, 513.0, 513.0, 525.0, 516.0, 567.0, 510.0, 521.0, 513.0, 522.0, 518.0, 576.0, 530.0, 527.0, 459.0, 525.0, 570.0, 525.0, 573.0, 504.0, 522.0, 339.0, 465.0, 416.0, 516.0, 476.0, 573.0, 519.0, 530.0, 180.0, 570.0, 570.0, 470.0, 573.0, 456.0, 573.0, 513.0, 501.0, 567.0, 579.0, 519.0, 518.0, 582.0, 519.0, 564.0, 504.0, 582.0, 461.0, 456.0, 501.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 265.0, 273.0, 257.0, 254.0, 265.0, 220.0, 235.0, 105.0, 117.0, 259.0, 265.0, 265.0, 254.0, 260.0, 265.0, 261.0, 258.0, 278.0, 298.0, 227.0, 240.0, 253.0, 269.0, 286.0, 293.0, 290.0, 280.0, 266.0, 256.0, 252.0, 260.0, 252.0, 258.0, 237.0, 258.0, 264.0, 258.0, 277.0, 296.0, 268.0, 254.0, 285.0, 288.0, 270.0, 255.0, 279.0, 297.0, 231.0, 251.0, 257.0, 265.0, 237.0, 247.0, 263.0, 262.0, 221.0, 235.0, 234.0, 227.0, 260.0, 256.0, 262.0, 260.0, 234.0, 230.0, 294.0, 282.0, 292.0, 281.0, 256.0, 266.0, 262.0, 260.0, 251.0, 265.0, 239.0, 228.0, 266.0, 264.0, 255.0, 252.0, 253.0, 260.0, 254.0, 265.0, 285.0, 273.0, 244.0, 260.0, 270.0, 252.0, 247.0, 254.0, 270.0, 255.0, 260.0, 265.0, 265.0, 259.0, 258.0, 261.0, 264.0, 249.0, 268.0, 245.0, 268.0, 257.0, 251.0, 265.0, 276.0, 291.0, 253.0, 257.0, 263.0, 258.0, 264.0, 249.0, 264.0, 258.0, 264.0, 254.0, 288.0, 288.0, 266.0, 264.0, 262.0, 265.0, 241.0, 218.0, 258.0, 267.0, 279.0, 291.0, 249.0, 276.0, 283.0, 290.0, 254.0, 250.0, 251.0, 271.0, 161.0, 178.0, 228.0, 237.0, 211.0, 205.0, 248.0, 268.0, 241.0, 235.0, 300.0, 273.0, 271.0, 248.0, 269.0, 261.0, 105.0, 75.0, 287.0, 283.0, 289.0, 281.0, 248.0, 222.0, 283.0, 290.0, 216.0, 240.0, 289.0, 284.0, 261.0, 252.0, 248.0, 253.0, 266.0, 301.0, 284.0, 295.0, 268.0, 251.0, 245.0, 273.0, 291.0, 291.0, 257.0, 262.0, 277.0, 287.0, 258.0, 246.0, 287.0, 295.0, 234.0, 227.0, 233.0, 223.0, 248.0, 253.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7406015862798738, "mean_processing_ms": 0.4155246745297136, "mean_inference_ms": 2.2890629120226706}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2808000, "num_steps_sampled": 1497600, "sample_time_ms": 20042.808, "load_time_ms": 37.283, "grad_time_ms": 9721.955, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.4074124663572855e-36, "cur_lr": 0.0010000000474974513, "total_loss": 0.00018632395949680358, "policy_loss": -0.007065422832965851, "vf_loss": 78.37664794921875, "vf_explained_var": 0.7705362439155579, "kl": 0.0016083299415186048, "entropy": 1.171847939491272, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1497600, "episodes_total": 3744, "training_iteration": 117, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-43-15", "timestamp": 1660250595, "time_this_iter_s": 30.259077787399292, "time_total_s": 9013.671771764755, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9013.671771764755, "timesteps_since_restore": 1497600, "iterations_since_restore": 117, "perf": {"cpu_util_percent": 36.71627906976744, "ram_util_percent": 58.1906976744186}}
+{"episode_reward_max": 582.0, "episode_reward_min": 180.0, "episode_reward_mean": 512.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 75.0}, "policy_reward_max": {"ppo": 301.0}, "policy_reward_mean": {"ppo": 256.43}, "custom_metrics": {"sparse_reward_mean": 178.0, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.86, "shaped_reward_min": 60, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.66, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.32, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.99, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.63, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.98, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.6, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.55, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 14.1, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.01, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.63, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.57, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.07, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.16, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.58, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.45, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.53, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.1, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.01, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.1, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.01, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 516.0, 510.0, 525.0, 570.0, 468.0, 495.0, 525.0, 519.0, 519.0, 522.0, 579.0, 579.0, 501.0, 527.0, 530.0, 525.0, 573.0, 525.0, 522.0, 522.0, 527.0, 444.0, 515.0, 462.0, 533.0, 525.0, 519.0, 519.0, 527.0, 504.0, 515.0, 459.0, 525.0, 570.0, 525.0, 573.0, 504.0, 522.0, 339.0, 465.0, 416.0, 516.0, 476.0, 573.0, 519.0, 530.0, 180.0, 570.0, 570.0, 470.0, 573.0, 456.0, 573.0, 513.0, 501.0, 567.0, 579.0, 519.0, 518.0, 582.0, 519.0, 564.0, 504.0, 582.0, 461.0, 456.0, 501.0, 519.0, 530.0, 519.0, 455.0, 222.0, 524.0, 519.0, 525.0, 519.0, 576.0, 467.0, 522.0, 579.0, 570.0, 522.0, 512.0, 510.0, 495.0, 522.0, 573.0, 522.0, 573.0, 525.0, 576.0, 482.0, 522.0, 484.0, 525.0, 456.0, 461.0, 516.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [263.0, 267.0, 265.0, 251.0, 249.0, 261.0, 265.0, 260.0, 283.0, 287.0, 227.0, 241.0, 239.0, 256.0, 260.0, 265.0, 256.0, 263.0, 269.0, 250.0, 263.0, 259.0, 282.0, 297.0, 288.0, 291.0, 249.0, 252.0, 273.0, 254.0, 275.0, 255.0, 276.0, 249.0, 281.0, 292.0, 256.0, 269.0, 267.0, 255.0, 253.0, 269.0, 265.0, 262.0, 220.0, 224.0, 258.0, 257.0, 218.0, 244.0, 263.0, 270.0, 262.0, 263.0, 251.0, 268.0, 267.0, 252.0, 261.0, 266.0, 253.0, 251.0, 251.0, 264.0, 241.0, 218.0, 258.0, 267.0, 279.0, 291.0, 249.0, 276.0, 283.0, 290.0, 254.0, 250.0, 251.0, 271.0, 161.0, 178.0, 228.0, 237.0, 211.0, 205.0, 248.0, 268.0, 241.0, 235.0, 300.0, 273.0, 271.0, 248.0, 269.0, 261.0, 105.0, 75.0, 287.0, 283.0, 289.0, 281.0, 248.0, 222.0, 283.0, 290.0, 216.0, 240.0, 289.0, 284.0, 261.0, 252.0, 248.0, 253.0, 266.0, 301.0, 284.0, 295.0, 268.0, 251.0, 245.0, 273.0, 291.0, 291.0, 257.0, 262.0, 277.0, 287.0, 258.0, 246.0, 287.0, 295.0, 234.0, 227.0, 233.0, 223.0, 248.0, 253.0, 254.0, 265.0, 273.0, 257.0, 254.0, 265.0, 220.0, 235.0, 105.0, 117.0, 259.0, 265.0, 265.0, 254.0, 260.0, 265.0, 261.0, 258.0, 278.0, 298.0, 227.0, 240.0, 253.0, 269.0, 286.0, 293.0, 290.0, 280.0, 266.0, 256.0, 252.0, 260.0, 252.0, 258.0, 237.0, 258.0, 264.0, 258.0, 277.0, 296.0, 268.0, 254.0, 285.0, 288.0, 270.0, 255.0, 279.0, 297.0, 231.0, 251.0, 257.0, 265.0, 237.0, 247.0, 263.0, 262.0, 221.0, 235.0, 234.0, 227.0, 260.0, 256.0, 262.0, 260.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7295179891011523, "mean_processing_ms": 0.4133177002223833, "mean_inference_ms": 2.2776485040760637}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2832000, "num_steps_sampled": 1510400, "sample_time_ms": 20089.612, "load_time_ms": 37.29, "grad_time_ms": 9589.898, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.2037062331786428e-36, "cur_lr": 0.0010000000474974513, "total_loss": 0.0031413733959198, "policy_loss": -0.00419240677729249, "vf_loss": 79.22246551513672, "vf_explained_var": 0.7654686570167542, "kl": 0.0017640552250668406, "entropy": 1.1769217252731323, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1510400, "episodes_total": 3776, "training_iteration": 118, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-43-44", "timestamp": 1660250624, "time_this_iter_s": 28.596869230270386, "time_total_s": 9042.268640995026, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9042.268640995026, "timesteps_since_restore": 1510400, "iterations_since_restore": 118, "perf": {"cpu_util_percent": 34.160000000000004, "ram_util_percent": 58.245000000000005}}
+{"episode_reward_max": 582.0, "episode_reward_min": 222.0, "episode_reward_mean": 517.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 105.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 258.905}, "custom_metrics": {"sparse_reward_mean": 179.4, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 159.01, "shaped_reward_min": 62, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.89, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.83, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.07, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 1.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.64, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.8, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.55, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.76, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.4, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.87, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.77, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.84, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.46, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.7, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.36, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.8, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.55, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.8, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.55, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 573.0, 570.0, 525.0, 507.0, 558.0, 522.0, 465.0, 525.0, 522.0, 519.0, 507.0, 519.0, 525.0, 482.0, 576.0, 522.0, 484.0, 530.0, 507.0, 516.0, 462.0, 530.0, 525.0, 561.0, 519.0, 573.0, 530.0, 519.0, 464.0, 533.0, 525.0, 582.0, 461.0, 456.0, 501.0, 519.0, 530.0, 519.0, 455.0, 222.0, 524.0, 519.0, 525.0, 519.0, 576.0, 467.0, 522.0, 579.0, 570.0, 522.0, 512.0, 510.0, 495.0, 522.0, 573.0, 522.0, 573.0, 525.0, 576.0, 482.0, 522.0, 484.0, 525.0, 456.0, 461.0, 516.0, 522.0, 530.0, 516.0, 510.0, 525.0, 570.0, 468.0, 495.0, 525.0, 519.0, 519.0, 522.0, 579.0, 579.0, 501.0, 527.0, 530.0, 525.0, 573.0, 525.0, 522.0, 522.0, 527.0, 444.0, 515.0, 462.0, 533.0, 525.0, 519.0, 519.0, 527.0, 504.0, 515.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 294.0, 293.0, 280.0, 284.0, 286.0, 275.0, 250.0, 246.0, 261.0, 279.0, 279.0, 281.0, 241.0, 223.0, 242.0, 255.0, 270.0, 258.0, 264.0, 268.0, 251.0, 244.0, 263.0, 256.0, 263.0, 255.0, 270.0, 253.0, 229.0, 282.0, 294.0, 258.0, 264.0, 238.0, 246.0, 264.0, 266.0, 259.0, 248.0, 263.0, 253.0, 232.0, 230.0, 270.0, 260.0, 256.0, 269.0, 276.0, 285.0, 261.0, 258.0, 303.0, 270.0, 270.0, 260.0, 254.0, 265.0, 231.0, 233.0, 262.0, 271.0, 260.0, 265.0, 287.0, 295.0, 234.0, 227.0, 233.0, 223.0, 248.0, 253.0, 254.0, 265.0, 273.0, 257.0, 254.0, 265.0, 220.0, 235.0, 105.0, 117.0, 259.0, 265.0, 265.0, 254.0, 260.0, 265.0, 261.0, 258.0, 278.0, 298.0, 227.0, 240.0, 253.0, 269.0, 286.0, 293.0, 290.0, 280.0, 266.0, 256.0, 252.0, 260.0, 252.0, 258.0, 237.0, 258.0, 264.0, 258.0, 277.0, 296.0, 268.0, 254.0, 285.0, 288.0, 270.0, 255.0, 279.0, 297.0, 231.0, 251.0, 257.0, 265.0, 237.0, 247.0, 263.0, 262.0, 221.0, 235.0, 234.0, 227.0, 260.0, 256.0, 262.0, 260.0, 263.0, 267.0, 265.0, 251.0, 249.0, 261.0, 265.0, 260.0, 283.0, 287.0, 227.0, 241.0, 239.0, 256.0, 260.0, 265.0, 256.0, 263.0, 269.0, 250.0, 263.0, 259.0, 282.0, 297.0, 288.0, 291.0, 249.0, 252.0, 273.0, 254.0, 275.0, 255.0, 276.0, 249.0, 281.0, 292.0, 256.0, 269.0, 267.0, 255.0, 253.0, 269.0, 265.0, 262.0, 220.0, 224.0, 258.0, 257.0, 218.0, 244.0, 263.0, 270.0, 262.0, 263.0, 251.0, 268.0, 267.0, 252.0, 261.0, 266.0, 253.0, 251.0, 251.0, 264.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7186253925564765, "mean_processing_ms": 0.4111520953158576, "mean_inference_ms": 2.2666653584244876}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2856000, "num_steps_sampled": 1523200, "sample_time_ms": 20187.204, "load_time_ms": 37.338, "grad_time_ms": 9575.103, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 6.018531165893214e-37, "cur_lr": 0.0010000000474974513, "total_loss": 0.002520867856219411, "policy_loss": -0.0053411815315485, "vf_loss": 84.43938446044922, "vf_explained_var": 0.7439851760864258, "kl": 0.0023064902052283287, "entropy": 1.1637717485427856, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1523200, "episodes_total": 3808, "training_iteration": 119, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-44-14", "timestamp": 1660250654, "time_this_iter_s": 30.140514850616455, "time_total_s": 9072.409155845642, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9072.409155845642, "timesteps_since_restore": 1523200, "iterations_since_restore": 119, "perf": {"cpu_util_percent": 32.737209302325574, "ram_util_percent": 58.15116279069766}}
+{"episode_reward_max": 582.0, "episode_reward_min": 398.0, "episode_reward_mean": 521.43, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 191.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 260.715}, "custom_metrics": {"sparse_reward_mean": 180.8, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 159.83, "shaped_reward_min": 118, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.01, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.86, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.2, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 1.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.62, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.87, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.65, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.74, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.78, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.16, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.13, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.07, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.31, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.95, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.19, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.87, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.65, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.87, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.65, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [510.0, 576.0, 507.0, 527.0, 570.0, 510.0, 501.0, 522.0, 570.0, 516.0, 467.0, 518.0, 507.0, 579.0, 530.0, 570.0, 398.0, 582.0, 573.0, 433.0, 530.0, 570.0, 516.0, 524.0, 399.0, 570.0, 533.0, 573.0, 522.0, 525.0, 501.0, 522.0, 456.0, 461.0, 516.0, 522.0, 530.0, 516.0, 510.0, 525.0, 570.0, 468.0, 495.0, 525.0, 519.0, 519.0, 522.0, 579.0, 579.0, 501.0, 527.0, 530.0, 525.0, 573.0, 525.0, 522.0, 522.0, 527.0, 444.0, 515.0, 462.0, 533.0, 525.0, 519.0, 519.0, 527.0, 504.0, 515.0, 570.0, 573.0, 570.0, 525.0, 507.0, 558.0, 522.0, 465.0, 525.0, 522.0, 519.0, 507.0, 519.0, 525.0, 482.0, 576.0, 522.0, 484.0, 530.0, 507.0, 516.0, 462.0, 530.0, 525.0, 561.0, 519.0, 573.0, 530.0, 519.0, 464.0, 533.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [249.0, 261.0, 286.0, 290.0, 251.0, 256.0, 280.0, 247.0, 287.0, 283.0, 259.0, 251.0, 249.0, 252.0, 257.0, 265.0, 286.0, 284.0, 256.0, 260.0, 236.0, 231.0, 265.0, 253.0, 262.0, 245.0, 280.0, 299.0, 276.0, 254.0, 301.0, 269.0, 205.0, 193.0, 291.0, 291.0, 292.0, 281.0, 216.0, 217.0, 283.0, 247.0, 292.0, 278.0, 272.0, 244.0, 255.0, 269.0, 208.0, 191.0, 274.0, 296.0, 271.0, 262.0, 278.0, 295.0, 252.0, 270.0, 263.0, 262.0, 255.0, 246.0, 249.0, 273.0, 221.0, 235.0, 234.0, 227.0, 260.0, 256.0, 262.0, 260.0, 263.0, 267.0, 265.0, 251.0, 249.0, 261.0, 265.0, 260.0, 283.0, 287.0, 227.0, 241.0, 239.0, 256.0, 260.0, 265.0, 256.0, 263.0, 269.0, 250.0, 263.0, 259.0, 282.0, 297.0, 288.0, 291.0, 249.0, 252.0, 273.0, 254.0, 275.0, 255.0, 276.0, 249.0, 281.0, 292.0, 256.0, 269.0, 267.0, 255.0, 253.0, 269.0, 265.0, 262.0, 220.0, 224.0, 258.0, 257.0, 218.0, 244.0, 263.0, 270.0, 262.0, 263.0, 251.0, 268.0, 267.0, 252.0, 261.0, 266.0, 253.0, 251.0, 251.0, 264.0, 276.0, 294.0, 293.0, 280.0, 284.0, 286.0, 275.0, 250.0, 246.0, 261.0, 279.0, 279.0, 281.0, 241.0, 223.0, 242.0, 255.0, 270.0, 258.0, 264.0, 268.0, 251.0, 244.0, 263.0, 256.0, 263.0, 255.0, 270.0, 253.0, 229.0, 282.0, 294.0, 258.0, 264.0, 238.0, 246.0, 264.0, 266.0, 259.0, 248.0, 263.0, 253.0, 232.0, 230.0, 270.0, 260.0, 256.0, 269.0, 276.0, 285.0, 261.0, 258.0, 303.0, 270.0, 270.0, 260.0, 254.0, 265.0, 231.0, 233.0, 262.0, 271.0, 260.0, 265.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7079238505690706, "mean_processing_ms": 0.40902061791269845, "mean_inference_ms": 2.2560343134520804}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2880000, "num_steps_sampled": 1536000, "sample_time_ms": 20247.355, "load_time_ms": 37.42, "grad_time_ms": 9479.74, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.009265582946607e-37, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019442923367023468, "policy_loss": -0.005335395690053701, "vf_loss": 78.6177749633789, "vf_explained_var": 0.7651795744895935, "kl": 0.0017719753086566925, "entropy": 1.164175033569336, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1536000, "episodes_total": 3840, "training_iteration": 120, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-44-44", "timestamp": 1660250684, "time_this_iter_s": 30.042346954345703, "time_total_s": 9102.451502799988, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9102.451502799988, "timesteps_since_restore": 1536000, "iterations_since_restore": 120, "perf": {"cpu_util_percent": 33.52142857142857, "ram_util_percent": 58.276190476190465}}
+{"episode_reward_max": 582.0, "episode_reward_min": 365.0, "episode_reward_mean": 523.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 180.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 261.805}, "custom_metrics": {"sparse_reward_mean": 181.6, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 160.41, "shaped_reward_min": 118, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.47, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.88, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.39, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 1.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.52, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.64, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.93, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.7, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.21, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.61, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 3.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.25, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.28, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.16, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.93, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.7, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.93, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.7, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [564.0, 579.0, 507.0, 530.0, 525.0, 365.0, 518.0, 522.0, 501.0, 525.0, 515.0, 576.0, 582.0, 530.0, 507.0, 522.0, 530.0, 522.0, 456.0, 504.0, 582.0, 579.0, 519.0, 533.0, 573.0, 525.0, 498.0, 567.0, 573.0, 450.0, 479.0, 522.0, 519.0, 527.0, 504.0, 515.0, 570.0, 573.0, 570.0, 525.0, 507.0, 558.0, 522.0, 465.0, 525.0, 522.0, 519.0, 507.0, 519.0, 525.0, 482.0, 576.0, 522.0, 484.0, 530.0, 507.0, 516.0, 462.0, 530.0, 525.0, 561.0, 519.0, 573.0, 530.0, 519.0, 464.0, 533.0, 525.0, 510.0, 576.0, 507.0, 527.0, 570.0, 510.0, 501.0, 522.0, 570.0, 516.0, 467.0, 518.0, 507.0, 579.0, 530.0, 570.0, 398.0, 582.0, 573.0, 433.0, 530.0, 570.0, 516.0, 524.0, 399.0, 570.0, 533.0, 573.0, 522.0, 525.0, 501.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 272.0, 290.0, 289.0, 251.0, 256.0, 252.0, 278.0, 263.0, 262.0, 185.0, 180.0, 260.0, 258.0, 254.0, 268.0, 247.0, 254.0, 265.0, 260.0, 262.0, 253.0, 290.0, 286.0, 296.0, 286.0, 257.0, 273.0, 255.0, 252.0, 265.0, 257.0, 265.0, 265.0, 274.0, 248.0, 218.0, 238.0, 260.0, 244.0, 291.0, 291.0, 293.0, 286.0, 260.0, 259.0, 270.0, 263.0, 285.0, 288.0, 279.0, 246.0, 252.0, 246.0, 275.0, 292.0, 287.0, 286.0, 218.0, 232.0, 248.0, 231.0, 260.0, 262.0, 267.0, 252.0, 261.0, 266.0, 253.0, 251.0, 251.0, 264.0, 276.0, 294.0, 293.0, 280.0, 284.0, 286.0, 275.0, 250.0, 246.0, 261.0, 279.0, 279.0, 281.0, 241.0, 223.0, 242.0, 255.0, 270.0, 258.0, 264.0, 268.0, 251.0, 244.0, 263.0, 256.0, 263.0, 255.0, 270.0, 253.0, 229.0, 282.0, 294.0, 258.0, 264.0, 238.0, 246.0, 264.0, 266.0, 259.0, 248.0, 263.0, 253.0, 232.0, 230.0, 270.0, 260.0, 256.0, 269.0, 276.0, 285.0, 261.0, 258.0, 303.0, 270.0, 270.0, 260.0, 254.0, 265.0, 231.0, 233.0, 262.0, 271.0, 260.0, 265.0, 249.0, 261.0, 286.0, 290.0, 251.0, 256.0, 280.0, 247.0, 287.0, 283.0, 259.0, 251.0, 249.0, 252.0, 257.0, 265.0, 286.0, 284.0, 256.0, 260.0, 236.0, 231.0, 265.0, 253.0, 262.0, 245.0, 280.0, 299.0, 276.0, 254.0, 301.0, 269.0, 205.0, 193.0, 291.0, 291.0, 292.0, 281.0, 216.0, 217.0, 283.0, 247.0, 292.0, 278.0, 272.0, 244.0, 255.0, 269.0, 208.0, 191.0, 274.0, 296.0, 271.0, 262.0, 278.0, 295.0, 252.0, 270.0, 263.0, 262.0, 255.0, 246.0, 249.0, 273.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6974210688663596, "mean_processing_ms": 0.40692675131976414, "mean_inference_ms": 2.2458724940047134}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2904000, "num_steps_sampled": 1548800, "sample_time_ms": 20389.537, "load_time_ms": 37.524, "grad_time_ms": 9428.918, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.5046327914733034e-37, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008685672655701637, "policy_loss": -0.007173554971814156, "vf_loss": 86.26753997802734, "vf_explained_var": 0.7487472891807556, "kl": 0.001581608667038381, "entropy": 1.1692520380020142, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1548800, "episodes_total": 3872, "training_iteration": 121, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-45-15", "timestamp": 1660250715, "time_this_iter_s": 30.52119469642639, "time_total_s": 9132.972697496414, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9132.972697496414, "timesteps_since_restore": 1548800, "iterations_since_restore": 121, "perf": {"cpu_util_percent": 33.260465116279065, "ram_util_percent": 58.28837209302325}}
+{"episode_reward_max": 582.0, "episode_reward_min": 123.0, "episode_reward_mean": 521.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 58.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 260.89}, "custom_metrics": {"sparse_reward_mean": 180.8, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 160.18, "shaped_reward_min": 43, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.75, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.91, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.02, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.03, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.32, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 0.7, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.67, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 14.12, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.46, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.01, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.55, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 3.78, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.66, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.21, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.17, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.0, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.35, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.94, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.25, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.12, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.46, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.12, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.46, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 522.0, 522.0, 482.0, 522.0, 579.0, 573.0, 527.0, 530.0, 522.0, 123.0, 519.0, 570.0, 525.0, 470.0, 522.0, 573.0, 492.0, 570.0, 576.0, 465.0, 411.0, 525.0, 525.0, 579.0, 510.0, 516.0, 573.0, 525.0, 536.0, 573.0, 573.0, 519.0, 464.0, 533.0, 525.0, 510.0, 576.0, 507.0, 527.0, 570.0, 510.0, 501.0, 522.0, 570.0, 516.0, 467.0, 518.0, 507.0, 579.0, 530.0, 570.0, 398.0, 582.0, 573.0, 433.0, 530.0, 570.0, 516.0, 524.0, 399.0, 570.0, 533.0, 573.0, 522.0, 525.0, 501.0, 522.0, 564.0, 579.0, 507.0, 530.0, 525.0, 365.0, 518.0, 522.0, 501.0, 525.0, 515.0, 576.0, 582.0, 530.0, 507.0, 522.0, 530.0, 522.0, 456.0, 504.0, 582.0, 579.0, 519.0, 533.0, 573.0, 525.0, 498.0, 567.0, 573.0, 450.0, 479.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 267.0, 255.0, 272.0, 250.0, 246.0, 236.0, 273.0, 249.0, 283.0, 296.0, 298.0, 275.0, 264.0, 263.0, 266.0, 264.0, 255.0, 267.0, 58.0, 65.0, 249.0, 270.0, 298.0, 272.0, 261.0, 264.0, 245.0, 225.0, 252.0, 270.0, 276.0, 297.0, 243.0, 249.0, 273.0, 297.0, 292.0, 284.0, 231.0, 234.0, 201.0, 210.0, 269.0, 256.0, 262.0, 263.0, 287.0, 292.0, 265.0, 245.0, 248.0, 268.0, 292.0, 281.0, 259.0, 266.0, 273.0, 263.0, 271.0, 302.0, 295.0, 278.0, 254.0, 265.0, 231.0, 233.0, 262.0, 271.0, 260.0, 265.0, 249.0, 261.0, 286.0, 290.0, 251.0, 256.0, 280.0, 247.0, 287.0, 283.0, 259.0, 251.0, 249.0, 252.0, 257.0, 265.0, 286.0, 284.0, 256.0, 260.0, 236.0, 231.0, 265.0, 253.0, 262.0, 245.0, 280.0, 299.0, 276.0, 254.0, 301.0, 269.0, 205.0, 193.0, 291.0, 291.0, 292.0, 281.0, 216.0, 217.0, 283.0, 247.0, 292.0, 278.0, 272.0, 244.0, 255.0, 269.0, 208.0, 191.0, 274.0, 296.0, 271.0, 262.0, 278.0, 295.0, 252.0, 270.0, 263.0, 262.0, 255.0, 246.0, 249.0, 273.0, 292.0, 272.0, 290.0, 289.0, 251.0, 256.0, 252.0, 278.0, 263.0, 262.0, 185.0, 180.0, 260.0, 258.0, 254.0, 268.0, 247.0, 254.0, 265.0, 260.0, 262.0, 253.0, 290.0, 286.0, 296.0, 286.0, 257.0, 273.0, 255.0, 252.0, 265.0, 257.0, 265.0, 265.0, 274.0, 248.0, 218.0, 238.0, 260.0, 244.0, 291.0, 291.0, 293.0, 286.0, 260.0, 259.0, 270.0, 263.0, 285.0, 288.0, 279.0, 246.0, 252.0, 246.0, 275.0, 292.0, 287.0, 286.0, 218.0, 232.0, 248.0, 231.0, 260.0, 262.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6870995124483497, "mean_processing_ms": 0.4048687165306798, "mean_inference_ms": 2.2359032456104564}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2928000, "num_steps_sampled": 1561600, "sample_time_ms": 20397.642, "load_time_ms": 37.125, "grad_time_ms": 9297.193, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 7.523163957366517e-38, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016971243312582374, "policy_loss": -0.006050370167940855, "vf_loss": 83.2496109008789, "vf_explained_var": 0.7647652626037598, "kl": 0.0023221501614898443, "entropy": 1.154932975769043, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1561600, "episodes_total": 3904, "training_iteration": 122, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-45-44", "timestamp": 1660250744, "time_this_iter_s": 29.548327922821045, "time_total_s": 9162.521025419235, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9162.521025419235, "timesteps_since_restore": 1561600, "iterations_since_restore": 122, "perf": {"cpu_util_percent": 34.20476190476191, "ram_util_percent": 58.335714285714296}}
+{"episode_reward_max": 582.0, "episode_reward_min": 123.0, "episode_reward_mean": 527.77, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 58.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 263.885}, "custom_metrics": {"sparse_reward_mean": 182.6, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 162.57, "shaped_reward_min": 43, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.77, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.84, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.97, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.03, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 0.97, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 0.64, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.53, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 14.24, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.6, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.15, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.63, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.07, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.98, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.48, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.91, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.38, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.24, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.6, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.24, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.6, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [536.0, 573.0, 564.0, 532.0, 527.0, 579.0, 573.0, 519.0, 530.0, 576.0, 576.0, 576.0, 573.0, 576.0, 476.0, 516.0, 501.0, 522.0, 498.0, 576.0, 527.0, 482.0, 519.0, 576.0, 533.0, 570.0, 522.0, 527.0, 530.0, 536.0, 527.0, 573.0, 522.0, 525.0, 501.0, 522.0, 564.0, 579.0, 507.0, 530.0, 525.0, 365.0, 518.0, 522.0, 501.0, 525.0, 515.0, 576.0, 582.0, 530.0, 507.0, 522.0, 530.0, 522.0, 456.0, 504.0, 582.0, 579.0, 519.0, 533.0, 573.0, 525.0, 498.0, 567.0, 573.0, 450.0, 479.0, 522.0, 576.0, 522.0, 522.0, 482.0, 522.0, 579.0, 573.0, 527.0, 530.0, 522.0, 123.0, 519.0, 570.0, 525.0, 470.0, 522.0, 573.0, 492.0, 570.0, 576.0, 465.0, 411.0, 525.0, 525.0, 579.0, 510.0, 516.0, 573.0, 525.0, 536.0, 573.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 258.0, 292.0, 281.0, 269.0, 295.0, 260.0, 272.0, 253.0, 274.0, 305.0, 274.0, 287.0, 286.0, 256.0, 263.0, 262.0, 268.0, 287.0, 289.0, 281.0, 295.0, 285.0, 291.0, 278.0, 295.0, 293.0, 283.0, 248.0, 228.0, 251.0, 265.0, 249.0, 252.0, 268.0, 254.0, 251.0, 247.0, 293.0, 283.0, 270.0, 257.0, 235.0, 247.0, 258.0, 261.0, 294.0, 282.0, 265.0, 268.0, 285.0, 285.0, 271.0, 251.0, 259.0, 268.0, 257.0, 273.0, 270.0, 266.0, 259.0, 268.0, 285.0, 288.0, 252.0, 270.0, 263.0, 262.0, 255.0, 246.0, 249.0, 273.0, 292.0, 272.0, 290.0, 289.0, 251.0, 256.0, 252.0, 278.0, 263.0, 262.0, 185.0, 180.0, 260.0, 258.0, 254.0, 268.0, 247.0, 254.0, 265.0, 260.0, 262.0, 253.0, 290.0, 286.0, 296.0, 286.0, 257.0, 273.0, 255.0, 252.0, 265.0, 257.0, 265.0, 265.0, 274.0, 248.0, 218.0, 238.0, 260.0, 244.0, 291.0, 291.0, 293.0, 286.0, 260.0, 259.0, 270.0, 263.0, 285.0, 288.0, 279.0, 246.0, 252.0, 246.0, 275.0, 292.0, 287.0, 286.0, 218.0, 232.0, 248.0, 231.0, 260.0, 262.0, 285.0, 291.0, 267.0, 255.0, 272.0, 250.0, 246.0, 236.0, 273.0, 249.0, 283.0, 296.0, 298.0, 275.0, 264.0, 263.0, 266.0, 264.0, 255.0, 267.0, 58.0, 65.0, 249.0, 270.0, 298.0, 272.0, 261.0, 264.0, 245.0, 225.0, 252.0, 270.0, 276.0, 297.0, 243.0, 249.0, 273.0, 297.0, 292.0, 284.0, 231.0, 234.0, 201.0, 210.0, 269.0, 256.0, 262.0, 263.0, 287.0, 292.0, 265.0, 245.0, 248.0, 268.0, 292.0, 281.0, 259.0, 266.0, 273.0, 263.0, 271.0, 302.0, 295.0, 278.0]}, "sampler_perf": {"mean_env_wait_ms": 1.676944844024091, "mean_processing_ms": 0.4028482080008365, "mean_inference_ms": 2.2261191245028336}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2952000, "num_steps_sampled": 1574400, "sample_time_ms": 20489.857, "load_time_ms": 37.388, "grad_time_ms": 9147.038, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.7615819786832586e-38, "cur_lr": 0.0010000000474974513, "total_loss": 0.004776147659868002, "policy_loss": -0.0032110288739204407, "vf_loss": 85.63726806640625, "vf_explained_var": 0.7386021614074707, "kl": 0.0019908936228603125, "entropy": 1.1530929803848267, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1574400, "episodes_total": 3936, "training_iteration": 123, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-46-15", "timestamp": 1660250775, "time_this_iter_s": 30.61848020553589, "time_total_s": 9193.139505624771, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9193.139505624771, "timesteps_since_restore": 1574400, "iterations_since_restore": 123, "perf": {"cpu_util_percent": 33.09302325581395, "ram_util_percent": 58.90232558139535}}
+{"episode_reward_max": 582.0, "episode_reward_min": 123.0, "episode_reward_mean": 532.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 58.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 266.085}, "custom_metrics": {"sparse_reward_mean": 184.2, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 163.77, "shaped_reward_min": 43, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.6, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.9, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.81, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.11, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 0.84, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 0.59, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.53, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 14.18, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.78, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.98, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.81, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.07, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.69, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.42, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.03, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.33, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.18, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.78, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.18, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.78, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 501.0, 519.0, 573.0, 579.0, 527.0, 567.0, 519.0, 579.0, 522.0, 519.0, 524.0, 570.0, 519.0, 573.0, 521.0, 576.0, 507.0, 576.0, 573.0, 455.0, 533.0, 525.0, 582.0, 579.0, 576.0, 516.0, 510.0, 522.0, 513.0, 513.0, 573.0, 573.0, 450.0, 479.0, 522.0, 576.0, 522.0, 522.0, 482.0, 522.0, 579.0, 573.0, 527.0, 530.0, 522.0, 123.0, 519.0, 570.0, 525.0, 470.0, 522.0, 573.0, 492.0, 570.0, 576.0, 465.0, 411.0, 525.0, 525.0, 579.0, 510.0, 516.0, 573.0, 525.0, 536.0, 573.0, 573.0, 536.0, 573.0, 564.0, 532.0, 527.0, 579.0, 573.0, 519.0, 530.0, 576.0, 576.0, 576.0, 573.0, 576.0, 476.0, 516.0, 501.0, 522.0, 498.0, 576.0, 527.0, 482.0, 519.0, 576.0, 533.0, 570.0, 522.0, 527.0, 530.0, 536.0, 527.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [270.0, 255.0, 250.0, 251.0, 260.0, 259.0, 289.0, 284.0, 296.0, 283.0, 279.0, 248.0, 288.0, 279.0, 256.0, 263.0, 298.0, 281.0, 264.0, 258.0, 267.0, 252.0, 260.0, 264.0, 272.0, 298.0, 253.0, 266.0, 289.0, 284.0, 271.0, 250.0, 297.0, 279.0, 243.0, 264.0, 290.0, 286.0, 288.0, 285.0, 219.0, 236.0, 270.0, 263.0, 280.0, 245.0, 293.0, 289.0, 294.0, 285.0, 270.0, 306.0, 255.0, 261.0, 263.0, 247.0, 251.0, 271.0, 255.0, 258.0, 267.0, 246.0, 284.0, 289.0, 287.0, 286.0, 218.0, 232.0, 248.0, 231.0, 260.0, 262.0, 285.0, 291.0, 267.0, 255.0, 272.0, 250.0, 246.0, 236.0, 273.0, 249.0, 283.0, 296.0, 298.0, 275.0, 264.0, 263.0, 266.0, 264.0, 255.0, 267.0, 58.0, 65.0, 249.0, 270.0, 298.0, 272.0, 261.0, 264.0, 245.0, 225.0, 252.0, 270.0, 276.0, 297.0, 243.0, 249.0, 273.0, 297.0, 292.0, 284.0, 231.0, 234.0, 201.0, 210.0, 269.0, 256.0, 262.0, 263.0, 287.0, 292.0, 265.0, 245.0, 248.0, 268.0, 292.0, 281.0, 259.0, 266.0, 273.0, 263.0, 271.0, 302.0, 295.0, 278.0, 278.0, 258.0, 292.0, 281.0, 269.0, 295.0, 260.0, 272.0, 253.0, 274.0, 305.0, 274.0, 287.0, 286.0, 256.0, 263.0, 262.0, 268.0, 287.0, 289.0, 281.0, 295.0, 285.0, 291.0, 278.0, 295.0, 293.0, 283.0, 248.0, 228.0, 251.0, 265.0, 249.0, 252.0, 268.0, 254.0, 251.0, 247.0, 293.0, 283.0, 270.0, 257.0, 235.0, 247.0, 258.0, 261.0, 294.0, 282.0, 265.0, 268.0, 285.0, 285.0, 271.0, 251.0, 259.0, 268.0, 257.0, 273.0, 270.0, 266.0, 259.0, 268.0, 285.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 1.666936267539323, "mean_processing_ms": 0.40085873681350664, "mean_inference_ms": 2.216239678267129}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2976000, "num_steps_sampled": 1587200, "sample_time_ms": 20498.249, "load_time_ms": 37.366, "grad_time_ms": 8983.735, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.8807909893416293e-38, "cur_lr": 0.0010000000474974513, "total_loss": 0.001341886818408966, "policy_loss": -0.006108943372964859, "vf_loss": 80.26326751708984, "vf_explained_var": 0.763457715511322, "kl": 0.0015635616146028042, "entropy": 1.1509909629821777, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1587200, "episodes_total": 3968, "training_iteration": 124, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-46-43", "timestamp": 1660250803, "time_this_iter_s": 28.20863699913025, "time_total_s": 9221.348142623901, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9221.348142623901, "timesteps_since_restore": 1587200, "iterations_since_restore": 124, "perf": {"cpu_util_percent": 33.417500000000004, "ram_util_percent": 58.46}}
+{"episode_reward_max": 582.0, "episode_reward_min": 450.0, "episode_reward_mean": 544.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 212.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 272.12}, "custom_metrics": {"sparse_reward_mean": 188.6, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 167.04, "shaped_reward_min": 130, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.82, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.58, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.42, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.37, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.29, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.25, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.31, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.41, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.23, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.33, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.29, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.25, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.29, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.25, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 533.0, 573.0, 579.0, 564.0, 579.0, 579.0, 570.0, 567.0, 524.0, 573.0, 573.0, 516.0, 525.0, 522.0, 450.0, 576.0, 513.0, 476.0, 573.0, 576.0, 576.0, 579.0, 573.0, 522.0, 519.0, 579.0, 539.0, 570.0, 573.0, 507.0, 573.0, 525.0, 536.0, 573.0, 573.0, 536.0, 573.0, 564.0, 532.0, 527.0, 579.0, 573.0, 519.0, 530.0, 576.0, 576.0, 576.0, 573.0, 576.0, 476.0, 516.0, 501.0, 522.0, 498.0, 576.0, 527.0, 482.0, 519.0, 576.0, 533.0, 570.0, 522.0, 527.0, 530.0, 536.0, 527.0, 573.0, 525.0, 501.0, 519.0, 573.0, 579.0, 527.0, 567.0, 519.0, 579.0, 522.0, 519.0, 524.0, 570.0, 519.0, 573.0, 521.0, 576.0, 507.0, 576.0, 573.0, 455.0, 533.0, 525.0, 582.0, 579.0, 576.0, 516.0, 510.0, 522.0, 513.0, 513.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 295.0, 270.0, 263.0, 304.0, 269.0, 285.0, 294.0, 286.0, 278.0, 279.0, 300.0, 290.0, 289.0, 284.0, 286.0, 297.0, 270.0, 281.0, 243.0, 283.0, 290.0, 288.0, 285.0, 261.0, 255.0, 257.0, 268.0, 258.0, 264.0, 212.0, 238.0, 292.0, 284.0, 259.0, 254.0, 239.0, 237.0, 290.0, 283.0, 279.0, 297.0, 288.0, 288.0, 280.0, 299.0, 289.0, 284.0, 259.0, 263.0, 274.0, 245.0, 299.0, 280.0, 261.0, 278.0, 295.0, 275.0, 285.0, 288.0, 240.0, 267.0, 288.0, 285.0, 259.0, 266.0, 273.0, 263.0, 271.0, 302.0, 295.0, 278.0, 278.0, 258.0, 292.0, 281.0, 269.0, 295.0, 260.0, 272.0, 253.0, 274.0, 305.0, 274.0, 287.0, 286.0, 256.0, 263.0, 262.0, 268.0, 287.0, 289.0, 281.0, 295.0, 285.0, 291.0, 278.0, 295.0, 293.0, 283.0, 248.0, 228.0, 251.0, 265.0, 249.0, 252.0, 268.0, 254.0, 251.0, 247.0, 293.0, 283.0, 270.0, 257.0, 235.0, 247.0, 258.0, 261.0, 294.0, 282.0, 265.0, 268.0, 285.0, 285.0, 271.0, 251.0, 259.0, 268.0, 257.0, 273.0, 270.0, 266.0, 259.0, 268.0, 285.0, 288.0, 270.0, 255.0, 250.0, 251.0, 260.0, 259.0, 289.0, 284.0, 296.0, 283.0, 279.0, 248.0, 288.0, 279.0, 256.0, 263.0, 298.0, 281.0, 264.0, 258.0, 267.0, 252.0, 260.0, 264.0, 272.0, 298.0, 253.0, 266.0, 289.0, 284.0, 271.0, 250.0, 297.0, 279.0, 243.0, 264.0, 290.0, 286.0, 288.0, 285.0, 219.0, 236.0, 270.0, 263.0, 280.0, 245.0, 293.0, 289.0, 294.0, 285.0, 270.0, 306.0, 255.0, 261.0, 263.0, 247.0, 251.0, 271.0, 255.0, 258.0, 267.0, 246.0, 284.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6570861935641006, "mean_processing_ms": 0.3989029759372638, "mean_inference_ms": 2.206484585731059}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3000000, "num_steps_sampled": 1600000, "sample_time_ms": 20649.468, "load_time_ms": 37.298, "grad_time_ms": 8980.547, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001723404973745346, "policy_loss": -0.005563261453062296, "vf_loss": 78.65084075927734, "vf_explained_var": 0.7562505602836609, "kl": 0.00201344583183527, "entropy": 1.1568351984024048, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1600000, "episodes_total": 4000, "training_iteration": 125, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-47-13", "timestamp": 1660250833, "time_this_iter_s": 29.59022808074951, "time_total_s": 9250.93837070465, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9250.93837070465, "timesteps_since_restore": 1600000, "iterations_since_restore": 125, "perf": {"cpu_util_percent": 29.699999999999996, "ram_util_percent": 58.414285714285725}}
+{"episode_reward_max": 627.0, "episode_reward_min": 288.0, "episode_reward_mean": 541.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 136.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 270.505}, "custom_metrics": {"sparse_reward_mean": 187.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 165.81, "shaped_reward_min": 88, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.39, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.05, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.86, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 16.42, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.65, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.41, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.46, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.28, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.03, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.15, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.12, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.13, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.46, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.38, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.28, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.03, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.28, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.03, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [470.0, 522.0, 519.0, 570.0, 522.0, 522.0, 522.0, 627.0, 461.0, 484.0, 573.0, 573.0, 288.0, 561.0, 513.0, 576.0, 573.0, 576.0, 522.0, 525.0, 576.0, 519.0, 510.0, 576.0, 576.0, 522.0, 533.0, 530.0, 527.0, 579.0, 570.0, 522.0, 530.0, 536.0, 527.0, 573.0, 525.0, 501.0, 519.0, 573.0, 579.0, 527.0, 567.0, 519.0, 579.0, 522.0, 519.0, 524.0, 570.0, 519.0, 573.0, 521.0, 576.0, 507.0, 576.0, 573.0, 455.0, 533.0, 525.0, 582.0, 579.0, 576.0, 516.0, 510.0, 522.0, 513.0, 513.0, 573.0, 579.0, 533.0, 573.0, 579.0, 564.0, 579.0, 579.0, 570.0, 567.0, 524.0, 573.0, 573.0, 516.0, 525.0, 522.0, 450.0, 576.0, 513.0, 476.0, 573.0, 576.0, 576.0, 579.0, 573.0, 522.0, 519.0, 579.0, 539.0, 570.0, 573.0, 507.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [234.0, 236.0, 268.0, 254.0, 259.0, 260.0, 285.0, 285.0, 260.0, 262.0, 261.0, 261.0, 253.0, 269.0, 316.0, 311.0, 216.0, 245.0, 240.0, 244.0, 301.0, 272.0, 287.0, 286.0, 136.0, 152.0, 282.0, 279.0, 255.0, 258.0, 282.0, 294.0, 284.0, 289.0, 275.0, 301.0, 249.0, 273.0, 274.0, 251.0, 299.0, 277.0, 271.0, 248.0, 258.0, 252.0, 285.0, 291.0, 282.0, 294.0, 252.0, 270.0, 256.0, 277.0, 265.0, 265.0, 248.0, 279.0, 296.0, 283.0, 277.0, 293.0, 278.0, 244.0, 257.0, 273.0, 270.0, 266.0, 259.0, 268.0, 285.0, 288.0, 270.0, 255.0, 250.0, 251.0, 260.0, 259.0, 289.0, 284.0, 296.0, 283.0, 279.0, 248.0, 288.0, 279.0, 256.0, 263.0, 298.0, 281.0, 264.0, 258.0, 267.0, 252.0, 260.0, 264.0, 272.0, 298.0, 253.0, 266.0, 289.0, 284.0, 271.0, 250.0, 297.0, 279.0, 243.0, 264.0, 290.0, 286.0, 288.0, 285.0, 219.0, 236.0, 270.0, 263.0, 280.0, 245.0, 293.0, 289.0, 294.0, 285.0, 270.0, 306.0, 255.0, 261.0, 263.0, 247.0, 251.0, 271.0, 255.0, 258.0, 267.0, 246.0, 284.0, 289.0, 284.0, 295.0, 270.0, 263.0, 304.0, 269.0, 285.0, 294.0, 286.0, 278.0, 279.0, 300.0, 290.0, 289.0, 284.0, 286.0, 297.0, 270.0, 281.0, 243.0, 283.0, 290.0, 288.0, 285.0, 261.0, 255.0, 257.0, 268.0, 258.0, 264.0, 212.0, 238.0, 292.0, 284.0, 259.0, 254.0, 239.0, 237.0, 290.0, 283.0, 279.0, 297.0, 288.0, 288.0, 280.0, 299.0, 289.0, 284.0, 259.0, 263.0, 274.0, 245.0, 299.0, 280.0, 261.0, 278.0, 295.0, 275.0, 285.0, 288.0, 240.0, 267.0, 288.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6473988294860027, "mean_processing_ms": 0.3969770866429698, "mean_inference_ms": 2.1969066690858874}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3024000, "num_steps_sampled": 1612800, "sample_time_ms": 20818.492, "load_time_ms": 37.263, "grad_time_ms": 8921.308, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004004280548542738, "policy_loss": -0.004071192815899849, "vf_loss": 86.5199966430664, "vf_explained_var": 0.7602561116218567, "kl": 0.0020587241742759943, "entropy": 1.153051495552063, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1612800, "episodes_total": 4032, "training_iteration": 126, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-47-43", "timestamp": 1660250863, "time_this_iter_s": 30.492609977722168, "time_total_s": 9281.430980682373, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9281.430980682373, "timesteps_since_restore": 1612800, "iterations_since_restore": 126, "perf": {"cpu_util_percent": 30.204651162790697, "ram_util_percent": 58.4372093023256}}
+{"episode_reward_max": 630.0, "episode_reward_min": 288.0, "episode_reward_mean": 546.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 136.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 273.07}, "custom_metrics": {"sparse_reward_mean": 190.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 166.14, "shaped_reward_min": 88, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.74, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.0, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.13, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.38, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.83, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.43, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.45, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.54, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.95, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.88, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.55, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.29, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.74, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.97, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.72, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.9, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.54, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.95, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.54, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.95, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 570.0, 576.0, 573.0, 510.0, 573.0, 570.0, 527.0, 543.0, 570.0, 546.0, 525.0, 573.0, 579.0, 573.0, 495.0, 576.0, 579.0, 576.0, 522.0, 579.0, 519.0, 506.0, 576.0, 579.0, 579.0, 498.0, 567.0, 576.0, 504.0, 630.0, 579.0, 522.0, 513.0, 513.0, 573.0, 579.0, 533.0, 573.0, 579.0, 564.0, 579.0, 579.0, 570.0, 567.0, 524.0, 573.0, 573.0, 516.0, 525.0, 522.0, 450.0, 576.0, 513.0, 476.0, 573.0, 576.0, 576.0, 579.0, 573.0, 522.0, 519.0, 579.0, 539.0, 570.0, 573.0, 507.0, 573.0, 470.0, 522.0, 519.0, 570.0, 522.0, 522.0, 522.0, 627.0, 461.0, 484.0, 573.0, 573.0, 288.0, 561.0, 513.0, 576.0, 573.0, 576.0, 522.0, 525.0, 576.0, 519.0, 510.0, 576.0, 576.0, 522.0, 533.0, 530.0, 527.0, 579.0, 570.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 290.0, 274.0, 296.0, 290.0, 286.0, 283.0, 290.0, 264.0, 246.0, 290.0, 283.0, 277.0, 293.0, 248.0, 279.0, 273.0, 270.0, 279.0, 291.0, 272.0, 274.0, 273.0, 252.0, 289.0, 284.0, 290.0, 289.0, 282.0, 291.0, 239.0, 256.0, 287.0, 289.0, 284.0, 295.0, 281.0, 295.0, 262.0, 260.0, 299.0, 280.0, 244.0, 275.0, 253.0, 253.0, 287.0, 289.0, 283.0, 296.0, 280.0, 299.0, 253.0, 245.0, 291.0, 276.0, 291.0, 285.0, 244.0, 260.0, 303.0, 327.0, 274.0, 305.0, 251.0, 271.0, 255.0, 258.0, 267.0, 246.0, 284.0, 289.0, 284.0, 295.0, 270.0, 263.0, 304.0, 269.0, 285.0, 294.0, 286.0, 278.0, 279.0, 300.0, 290.0, 289.0, 284.0, 286.0, 297.0, 270.0, 281.0, 243.0, 283.0, 290.0, 288.0, 285.0, 261.0, 255.0, 257.0, 268.0, 258.0, 264.0, 212.0, 238.0, 292.0, 284.0, 259.0, 254.0, 239.0, 237.0, 290.0, 283.0, 279.0, 297.0, 288.0, 288.0, 280.0, 299.0, 289.0, 284.0, 259.0, 263.0, 274.0, 245.0, 299.0, 280.0, 261.0, 278.0, 295.0, 275.0, 285.0, 288.0, 240.0, 267.0, 288.0, 285.0, 234.0, 236.0, 268.0, 254.0, 259.0, 260.0, 285.0, 285.0, 260.0, 262.0, 261.0, 261.0, 253.0, 269.0, 316.0, 311.0, 216.0, 245.0, 240.0, 244.0, 301.0, 272.0, 287.0, 286.0, 136.0, 152.0, 282.0, 279.0, 255.0, 258.0, 282.0, 294.0, 284.0, 289.0, 275.0, 301.0, 249.0, 273.0, 274.0, 251.0, 299.0, 277.0, 271.0, 248.0, 258.0, 252.0, 285.0, 291.0, 282.0, 294.0, 252.0, 270.0, 256.0, 277.0, 265.0, 265.0, 248.0, 279.0, 296.0, 283.0, 277.0, 293.0, 278.0, 244.0]}, "sampler_perf": {"mean_env_wait_ms": 1.637864403277332, "mean_processing_ms": 0.39508190605522825, "mean_inference_ms": 2.1874563334987878}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3048000, "num_steps_sampled": 1625600, "sample_time_ms": 20804.416, "load_time_ms": 37.185, "grad_time_ms": 8880.278, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00042137285345233977, "policy_loss": -0.007074173539876938, "vf_loss": 72.28662872314453, "vf_explained_var": 0.7638903260231018, "kl": 0.0020576624665409327, "entropy": 1.1517143249511719, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1625600, "episodes_total": 4064, "training_iteration": 127, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-48-13", "timestamp": 1660250893, "time_this_iter_s": 29.709146738052368, "time_total_s": 9311.140127420425, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9311.140127420425, "timesteps_since_restore": 1625600, "iterations_since_restore": 127, "perf": {"cpu_util_percent": 33.61904761904762, "ram_util_percent": 58.37380952380953}}
+{"episode_reward_max": 630.0, "episode_reward_min": 288.0, "episode_reward_mean": 546.4, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 136.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 273.2}, "custom_metrics": {"sparse_reward_mean": 190.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 166.4, "shaped_reward_min": 88, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.49, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.0, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.98, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.33, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.33, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.42, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.44, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.96, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.97, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.72, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.7, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.99, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.44, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.96, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.44, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.96, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 576.0, 567.0, 567.0, 558.0, 530.0, 573.0, 582.0, 582.0, 521.0, 576.0, 510.0, 579.0, 579.0, 522.0, 573.0, 573.0, 576.0, 473.0, 419.0, 539.0, 573.0, 576.0, 570.0, 573.0, 484.0, 582.0, 576.0, 533.0, 396.0, 573.0, 570.0, 570.0, 573.0, 507.0, 573.0, 470.0, 522.0, 519.0, 570.0, 522.0, 522.0, 522.0, 627.0, 461.0, 484.0, 573.0, 573.0, 288.0, 561.0, 513.0, 576.0, 573.0, 576.0, 522.0, 525.0, 576.0, 519.0, 510.0, 576.0, 576.0, 522.0, 533.0, 530.0, 527.0, 579.0, 570.0, 522.0, 576.0, 570.0, 576.0, 573.0, 510.0, 573.0, 570.0, 527.0, 543.0, 570.0, 546.0, 525.0, 573.0, 579.0, 573.0, 495.0, 576.0, 579.0, 576.0, 522.0, 579.0, 519.0, 506.0, 576.0, 579.0, 579.0, 498.0, 567.0, 576.0, 504.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 282.0, 285.0, 291.0, 281.0, 286.0, 282.0, 285.0, 289.0, 269.0, 264.0, 266.0, 303.0, 270.0, 307.0, 275.0, 285.0, 297.0, 253.0, 268.0, 304.0, 272.0, 231.0, 279.0, 299.0, 280.0, 289.0, 290.0, 271.0, 251.0, 300.0, 273.0, 288.0, 285.0, 290.0, 286.0, 238.0, 235.0, 212.0, 207.0, 265.0, 274.0, 288.0, 285.0, 304.0, 272.0, 277.0, 293.0, 285.0, 288.0, 261.0, 223.0, 298.0, 284.0, 289.0, 287.0, 281.0, 252.0, 196.0, 200.0, 285.0, 288.0, 280.0, 290.0, 295.0, 275.0, 285.0, 288.0, 240.0, 267.0, 288.0, 285.0, 234.0, 236.0, 268.0, 254.0, 259.0, 260.0, 285.0, 285.0, 260.0, 262.0, 261.0, 261.0, 253.0, 269.0, 316.0, 311.0, 216.0, 245.0, 240.0, 244.0, 301.0, 272.0, 287.0, 286.0, 136.0, 152.0, 282.0, 279.0, 255.0, 258.0, 282.0, 294.0, 284.0, 289.0, 275.0, 301.0, 249.0, 273.0, 274.0, 251.0, 299.0, 277.0, 271.0, 248.0, 258.0, 252.0, 285.0, 291.0, 282.0, 294.0, 252.0, 270.0, 256.0, 277.0, 265.0, 265.0, 248.0, 279.0, 296.0, 283.0, 277.0, 293.0, 278.0, 244.0, 286.0, 290.0, 274.0, 296.0, 290.0, 286.0, 283.0, 290.0, 264.0, 246.0, 290.0, 283.0, 277.0, 293.0, 248.0, 279.0, 273.0, 270.0, 279.0, 291.0, 272.0, 274.0, 273.0, 252.0, 289.0, 284.0, 290.0, 289.0, 282.0, 291.0, 239.0, 256.0, 287.0, 289.0, 284.0, 295.0, 281.0, 295.0, 262.0, 260.0, 299.0, 280.0, 244.0, 275.0, 253.0, 253.0, 287.0, 289.0, 283.0, 296.0, 280.0, 299.0, 253.0, 245.0, 291.0, 276.0, 291.0, 285.0, 244.0, 260.0, 303.0, 327.0, 274.0, 305.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6284902770333296, "mean_processing_ms": 0.39321693792454526, "mean_inference_ms": 2.178397267354796}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3072000, "num_steps_sampled": 1638400, "sample_time_ms": 20978.899, "load_time_ms": 37.247, "grad_time_ms": 8964.602, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0014722892083227634, "policy_loss": -0.0057091922499239445, "vf_loss": 77.60167694091797, "vf_explained_var": 0.7587153315544128, "kl": 0.0015954332193359733, "entropy": 1.1573811769485474, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1638400, "episodes_total": 4096, "training_iteration": 128, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-48-44", "timestamp": 1660250924, "time_this_iter_s": 31.189378023147583, "time_total_s": 9342.329505443573, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9342.329505443573, "timesteps_since_restore": 1638400, "iterations_since_restore": 128, "perf": {"cpu_util_percent": 28.313636363636366, "ram_util_percent": 58.377272727272725}}
+{"episode_reward_max": 630.0, "episode_reward_min": 396.0, "episode_reward_mean": 548.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 196.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 274.355}, "custom_metrics": {"sparse_reward_mean": 191.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 166.71, "shaped_reward_min": 116, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.06, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.11, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.38, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.77, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.72, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.33, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.37, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.46, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.05, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.02, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.61, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.2, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.77, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.19, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.17, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.7, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.1, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.46, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.05, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.46, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.05, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [561.0, 510.0, 525.0, 576.0, 582.0, 573.0, 564.0, 516.0, 501.0, 567.0, 524.0, 570.0, 573.0, 530.0, 501.0, 444.0, 576.0, 476.0, 576.0, 507.0, 573.0, 519.0, 570.0, 530.0, 573.0, 512.0, 527.0, 570.0, 570.0, 487.0, 530.0, 582.0, 527.0, 579.0, 570.0, 522.0, 576.0, 570.0, 576.0, 573.0, 510.0, 573.0, 570.0, 527.0, 543.0, 570.0, 546.0, 525.0, 573.0, 579.0, 573.0, 495.0, 576.0, 579.0, 576.0, 522.0, 579.0, 519.0, 506.0, 576.0, 579.0, 579.0, 498.0, 567.0, 576.0, 504.0, 630.0, 579.0, 573.0, 576.0, 567.0, 567.0, 558.0, 530.0, 573.0, 582.0, 582.0, 521.0, 576.0, 510.0, 579.0, 579.0, 522.0, 573.0, 573.0, 576.0, 473.0, 419.0, 539.0, 573.0, 576.0, 570.0, 573.0, 484.0, 582.0, 576.0, 533.0, 396.0, 573.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 280.0, 253.0, 257.0, 267.0, 258.0, 296.0, 280.0, 283.0, 299.0, 275.0, 298.0, 287.0, 277.0, 267.0, 249.0, 247.0, 254.0, 284.0, 283.0, 267.0, 257.0, 279.0, 291.0, 287.0, 286.0, 272.0, 258.0, 234.0, 267.0, 218.0, 226.0, 275.0, 301.0, 242.0, 234.0, 289.0, 287.0, 240.0, 267.0, 274.0, 299.0, 263.0, 256.0, 270.0, 300.0, 268.0, 262.0, 274.0, 299.0, 255.0, 257.0, 271.0, 256.0, 286.0, 284.0, 269.0, 301.0, 232.0, 255.0, 270.0, 260.0, 296.0, 286.0, 248.0, 279.0, 296.0, 283.0, 277.0, 293.0, 278.0, 244.0, 286.0, 290.0, 274.0, 296.0, 290.0, 286.0, 283.0, 290.0, 264.0, 246.0, 290.0, 283.0, 277.0, 293.0, 248.0, 279.0, 273.0, 270.0, 279.0, 291.0, 272.0, 274.0, 273.0, 252.0, 289.0, 284.0, 290.0, 289.0, 282.0, 291.0, 239.0, 256.0, 287.0, 289.0, 284.0, 295.0, 281.0, 295.0, 262.0, 260.0, 299.0, 280.0, 244.0, 275.0, 253.0, 253.0, 287.0, 289.0, 283.0, 296.0, 280.0, 299.0, 253.0, 245.0, 291.0, 276.0, 291.0, 285.0, 244.0, 260.0, 303.0, 327.0, 274.0, 305.0, 291.0, 282.0, 285.0, 291.0, 281.0, 286.0, 282.0, 285.0, 289.0, 269.0, 264.0, 266.0, 303.0, 270.0, 307.0, 275.0, 285.0, 297.0, 253.0, 268.0, 304.0, 272.0, 231.0, 279.0, 299.0, 280.0, 289.0, 290.0, 271.0, 251.0, 300.0, 273.0, 288.0, 285.0, 290.0, 286.0, 238.0, 235.0, 212.0, 207.0, 265.0, 274.0, 288.0, 285.0, 304.0, 272.0, 277.0, 293.0, 285.0, 288.0, 261.0, 223.0, 298.0, 284.0, 289.0, 287.0, 281.0, 252.0, 196.0, 200.0, 285.0, 288.0, 280.0, 290.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6192641289400398, "mean_processing_ms": 0.39137958658922545, "mean_inference_ms": 2.169420054882037}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3096000, "num_steps_sampled": 1651200, "sample_time_ms": 21000.262, "load_time_ms": 37.071, "grad_time_ms": 9066.202, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002854668302461505, "policy_loss": -0.004409888293594122, "vf_loss": 78.45098114013672, "vf_explained_var": 0.7681138515472412, "kl": 0.0020372606813907623, "entropy": 1.1610809564590454, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1651200, "episodes_total": 4128, "training_iteration": 129, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-49-15", "timestamp": 1660250955, "time_this_iter_s": 31.373005151748657, "time_total_s": 9373.702510595322, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9373.702510595322, "timesteps_since_restore": 1651200, "iterations_since_restore": 129, "perf": {"cpu_util_percent": 30.084444444444443, "ram_util_percent": 58.27111111111109}}
+{"episode_reward_max": 630.0, "episode_reward_min": 396.0, "episode_reward_mean": 544.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 196.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 272.245}, "custom_metrics": {"sparse_reward_mean": 189.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 166.09, "shaped_reward_min": 116, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.51, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.0, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.38, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.79, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.37, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.39, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.37, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.02, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.07, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.62, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.72, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.12, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.75, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.83, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.16, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.21, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.11, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.37, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.02, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.37, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.02, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 576.0, 567.0, 516.0, 522.0, 573.0, 510.0, 479.0, 524.0, 501.0, 512.0, 573.0, 522.0, 582.0, 579.0, 576.0, 576.0, 513.0, 570.0, 456.0, 567.0, 539.0, 567.0, 570.0, 576.0, 522.0, 519.0, 525.0, 570.0, 579.0, 453.0, 515.0, 576.0, 504.0, 630.0, 579.0, 573.0, 576.0, 567.0, 567.0, 558.0, 530.0, 573.0, 582.0, 582.0, 521.0, 576.0, 510.0, 579.0, 579.0, 522.0, 573.0, 573.0, 576.0, 473.0, 419.0, 539.0, 573.0, 576.0, 570.0, 573.0, 484.0, 582.0, 576.0, 533.0, 396.0, 573.0, 570.0, 561.0, 510.0, 525.0, 576.0, 582.0, 573.0, 564.0, 516.0, 501.0, 567.0, 524.0, 570.0, 573.0, 530.0, 501.0, 444.0, 576.0, 476.0, 576.0, 507.0, 573.0, 519.0, 570.0, 530.0, 573.0, 512.0, 527.0, 570.0, 570.0, 487.0, 530.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 288.0, 288.0, 299.0, 268.0, 272.0, 244.0, 252.0, 270.0, 282.0, 291.0, 256.0, 254.0, 250.0, 229.0, 266.0, 258.0, 236.0, 265.0, 238.0, 274.0, 267.0, 306.0, 262.0, 260.0, 288.0, 294.0, 291.0, 288.0, 295.0, 281.0, 277.0, 299.0, 272.0, 241.0, 285.0, 285.0, 226.0, 230.0, 281.0, 286.0, 265.0, 274.0, 287.0, 280.0, 290.0, 280.0, 293.0, 283.0, 256.0, 266.0, 253.0, 266.0, 266.0, 259.0, 287.0, 283.0, 280.0, 299.0, 236.0, 217.0, 268.0, 247.0, 291.0, 285.0, 244.0, 260.0, 303.0, 327.0, 274.0, 305.0, 291.0, 282.0, 285.0, 291.0, 281.0, 286.0, 282.0, 285.0, 289.0, 269.0, 264.0, 266.0, 303.0, 270.0, 307.0, 275.0, 285.0, 297.0, 253.0, 268.0, 304.0, 272.0, 231.0, 279.0, 299.0, 280.0, 289.0, 290.0, 271.0, 251.0, 300.0, 273.0, 288.0, 285.0, 290.0, 286.0, 238.0, 235.0, 212.0, 207.0, 265.0, 274.0, 288.0, 285.0, 304.0, 272.0, 277.0, 293.0, 285.0, 288.0, 261.0, 223.0, 298.0, 284.0, 289.0, 287.0, 281.0, 252.0, 196.0, 200.0, 285.0, 288.0, 280.0, 290.0, 281.0, 280.0, 253.0, 257.0, 267.0, 258.0, 296.0, 280.0, 283.0, 299.0, 275.0, 298.0, 287.0, 277.0, 267.0, 249.0, 247.0, 254.0, 284.0, 283.0, 267.0, 257.0, 279.0, 291.0, 287.0, 286.0, 272.0, 258.0, 234.0, 267.0, 218.0, 226.0, 275.0, 301.0, 242.0, 234.0, 289.0, 287.0, 240.0, 267.0, 274.0, 299.0, 263.0, 256.0, 270.0, 300.0, 268.0, 262.0, 274.0, 299.0, 255.0, 257.0, 271.0, 256.0, 286.0, 284.0, 269.0, 301.0, 232.0, 255.0, 270.0, 260.0, 296.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6101955338743283, "mean_processing_ms": 0.3895722397312522, "mean_inference_ms": 2.1607416335063014}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3120000, "num_steps_sampled": 1664000, "sample_time_ms": 21017.48, "load_time_ms": 36.902, "grad_time_ms": 9228.931, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00922582671046257, "policy_loss": 0.0016869133105501533, "vf_loss": 81.20984649658203, "vf_explained_var": 0.7594642043113708, "kl": 0.003354247659444809, "entropy": 1.164129376411438, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1664000, "episodes_total": 4160, "training_iteration": 130, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-49-47", "timestamp": 1660250987, "time_this_iter_s": 31.841378211975098, "time_total_s": 9405.543888807297, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9405.543888807297, "timesteps_since_restore": 1664000, "iterations_since_restore": 130, "perf": {"cpu_util_percent": 32.67111111111111, "ram_util_percent": 58.35999999999998}}
+{"episode_reward_max": 582.0, "episode_reward_min": 396.0, "episode_reward_mean": 538.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 191.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 269.245}, "custom_metrics": {"sparse_reward_mean": 187.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 164.09, "shaped_reward_min": 116, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.79, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.75, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.29, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.98, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.94, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.81, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.39, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.41, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.59, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.61, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.01, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.08, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.85, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.17, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.21, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.04, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.92, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.59, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.61, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.59, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.61, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 570.0, 516.0, 573.0, 570.0, 473.0, 573.0, 521.0, 570.0, 530.0, 582.0, 525.0, 579.0, 533.0, 564.0, 582.0, 461.0, 573.0, 524.0, 576.0, 516.0, 573.0, 573.0, 396.0, 576.0, 507.0, 467.0, 549.0, 527.0, 522.0, 418.0, 579.0, 533.0, 396.0, 573.0, 570.0, 561.0, 510.0, 525.0, 576.0, 582.0, 573.0, 564.0, 516.0, 501.0, 567.0, 524.0, 570.0, 573.0, 530.0, 501.0, 444.0, 576.0, 476.0, 576.0, 507.0, 573.0, 519.0, 570.0, 530.0, 573.0, 512.0, 527.0, 570.0, 570.0, 487.0, 530.0, 582.0, 582.0, 576.0, 567.0, 516.0, 522.0, 573.0, 510.0, 479.0, 524.0, 501.0, 512.0, 573.0, 522.0, 582.0, 579.0, 576.0, 576.0, 513.0, 570.0, 456.0, 567.0, 539.0, 567.0, 570.0, 576.0, 522.0, 519.0, 525.0, 570.0, 579.0, 453.0, 515.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [279.0, 294.0, 280.0, 290.0, 253.0, 263.0, 288.0, 285.0, 298.0, 272.0, 235.0, 238.0, 282.0, 291.0, 265.0, 256.0, 293.0, 277.0, 264.0, 266.0, 291.0, 291.0, 271.0, 254.0, 287.0, 292.0, 274.0, 259.0, 286.0, 278.0, 287.0, 295.0, 215.0, 246.0, 288.0, 285.0, 268.0, 256.0, 288.0, 288.0, 276.0, 240.0, 292.0, 281.0, 300.0, 273.0, 191.0, 205.0, 290.0, 286.0, 250.0, 257.0, 241.0, 226.0, 274.0, 275.0, 272.0, 255.0, 262.0, 260.0, 207.0, 211.0, 284.0, 295.0, 281.0, 252.0, 196.0, 200.0, 285.0, 288.0, 280.0, 290.0, 281.0, 280.0, 253.0, 257.0, 267.0, 258.0, 296.0, 280.0, 283.0, 299.0, 275.0, 298.0, 287.0, 277.0, 267.0, 249.0, 247.0, 254.0, 284.0, 283.0, 267.0, 257.0, 279.0, 291.0, 287.0, 286.0, 272.0, 258.0, 234.0, 267.0, 218.0, 226.0, 275.0, 301.0, 242.0, 234.0, 289.0, 287.0, 240.0, 267.0, 274.0, 299.0, 263.0, 256.0, 270.0, 300.0, 268.0, 262.0, 274.0, 299.0, 255.0, 257.0, 271.0, 256.0, 286.0, 284.0, 269.0, 301.0, 232.0, 255.0, 270.0, 260.0, 296.0, 286.0, 294.0, 288.0, 288.0, 288.0, 299.0, 268.0, 272.0, 244.0, 252.0, 270.0, 282.0, 291.0, 256.0, 254.0, 250.0, 229.0, 266.0, 258.0, 236.0, 265.0, 238.0, 274.0, 267.0, 306.0, 262.0, 260.0, 288.0, 294.0, 291.0, 288.0, 295.0, 281.0, 277.0, 299.0, 272.0, 241.0, 285.0, 285.0, 226.0, 230.0, 281.0, 286.0, 265.0, 274.0, 287.0, 280.0, 290.0, 280.0, 293.0, 283.0, 256.0, 266.0, 253.0, 266.0, 266.0, 259.0, 287.0, 283.0, 280.0, 299.0, 236.0, 217.0, 268.0, 247.0]}, "sampler_perf": {"mean_env_wait_ms": 1.601265947983415, "mean_processing_ms": 0.38779407949911077, "mean_inference_ms": 2.1521646972676964}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3144000, "num_steps_sampled": 1676800, "sample_time_ms": 21002.089, "load_time_ms": 36.986, "grad_time_ms": 9370.108, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003206493565812707, "policy_loss": -0.00454886956140399, "vf_loss": 83.29342651367188, "vf_explained_var": 0.7723144888877869, "kl": 0.0017231384990736842, "entropy": 1.1479605436325073, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1676800, "episodes_total": 4192, "training_iteration": 131, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-50-19", "timestamp": 1660251019, "time_this_iter_s": 31.782477855682373, "time_total_s": 9437.32636666298, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9437.32636666298, "timesteps_since_restore": 1676800, "iterations_since_restore": 131, "perf": {"cpu_util_percent": 30.406818181818174, "ram_util_percent": 58.26818181818181}}
+{"episode_reward_max": 587.0, "episode_reward_min": 396.0, "episode_reward_mean": 544.22, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 191.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 272.11}, "custom_metrics": {"sparse_reward_mean": 189.0, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 166.22, "shaped_reward_min": 116, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.9, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.98, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.5, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.29, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.95, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.82, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.38, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.73, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.85, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.49, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.17, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.15, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.08, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.96, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.73, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.85, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.73, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.85, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 504.0, 573.0, 561.0, 576.0, 582.0, 530.0, 498.0, 573.0, 573.0, 579.0, 579.0, 533.0, 522.0, 579.0, 576.0, 539.0, 519.0, 573.0, 573.0, 576.0, 573.0, 573.0, 573.0, 573.0, 495.0, 587.0, 570.0, 524.0, 579.0, 509.0, 527.0, 570.0, 487.0, 530.0, 582.0, 582.0, 576.0, 567.0, 516.0, 522.0, 573.0, 510.0, 479.0, 524.0, 501.0, 512.0, 573.0, 522.0, 582.0, 579.0, 576.0, 576.0, 513.0, 570.0, 456.0, 567.0, 539.0, 567.0, 570.0, 576.0, 522.0, 519.0, 525.0, 570.0, 579.0, 453.0, 515.0, 573.0, 570.0, 516.0, 573.0, 570.0, 473.0, 573.0, 521.0, 570.0, 530.0, 582.0, 525.0, 579.0, 533.0, 564.0, 582.0, 461.0, 573.0, 524.0, 576.0, 516.0, 573.0, 573.0, 396.0, 576.0, 507.0, 467.0, 549.0, 527.0, 522.0, 418.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 278.0, 232.0, 272.0, 287.0, 286.0, 289.0, 272.0, 282.0, 294.0, 286.0, 296.0, 268.0, 262.0, 253.0, 245.0, 290.0, 283.0, 278.0, 295.0, 273.0, 306.0, 290.0, 289.0, 260.0, 273.0, 267.0, 255.0, 285.0, 294.0, 293.0, 283.0, 272.0, 267.0, 248.0, 271.0, 287.0, 286.0, 288.0, 285.0, 291.0, 285.0, 289.0, 284.0, 274.0, 299.0, 282.0, 291.0, 296.0, 277.0, 250.0, 245.0, 288.0, 299.0, 289.0, 281.0, 260.0, 264.0, 283.0, 296.0, 246.0, 263.0, 267.0, 260.0, 269.0, 301.0, 232.0, 255.0, 270.0, 260.0, 296.0, 286.0, 294.0, 288.0, 288.0, 288.0, 299.0, 268.0, 272.0, 244.0, 252.0, 270.0, 282.0, 291.0, 256.0, 254.0, 250.0, 229.0, 266.0, 258.0, 236.0, 265.0, 238.0, 274.0, 267.0, 306.0, 262.0, 260.0, 288.0, 294.0, 291.0, 288.0, 295.0, 281.0, 277.0, 299.0, 272.0, 241.0, 285.0, 285.0, 226.0, 230.0, 281.0, 286.0, 265.0, 274.0, 287.0, 280.0, 290.0, 280.0, 293.0, 283.0, 256.0, 266.0, 253.0, 266.0, 266.0, 259.0, 287.0, 283.0, 280.0, 299.0, 236.0, 217.0, 268.0, 247.0, 279.0, 294.0, 280.0, 290.0, 253.0, 263.0, 288.0, 285.0, 298.0, 272.0, 235.0, 238.0, 282.0, 291.0, 265.0, 256.0, 293.0, 277.0, 264.0, 266.0, 291.0, 291.0, 271.0, 254.0, 287.0, 292.0, 274.0, 259.0, 286.0, 278.0, 287.0, 295.0, 215.0, 246.0, 288.0, 285.0, 268.0, 256.0, 288.0, 288.0, 276.0, 240.0, 292.0, 281.0, 300.0, 273.0, 191.0, 205.0, 290.0, 286.0, 250.0, 257.0, 241.0, 226.0, 274.0, 275.0, 272.0, 255.0, 262.0, 260.0, 207.0, 211.0, 284.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5924734608627003, "mean_processing_ms": 0.38604375166496974, "mean_inference_ms": 2.1438413106785164}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3168000, "num_steps_sampled": 1689600, "sample_time_ms": 21092.562, "load_time_ms": 37.447, "grad_time_ms": 9515.71, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0031676997896283865, "policy_loss": -0.004229032900184393, "vf_loss": 79.702880859375, "vf_explained_var": 0.7654879093170166, "kl": 0.0019305540481582284, "entropy": 1.1470965147018433, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1689600, "episodes_total": 4224, "training_iteration": 132, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-50-51", "timestamp": 1660251051, "time_this_iter_s": 31.913390159606934, "time_total_s": 9469.239756822586, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9469.239756822586, "timesteps_since_restore": 1689600, "iterations_since_restore": 132, "perf": {"cpu_util_percent": 33.193478260869554, "ram_util_percent": 58.276086956521716}}
+{"episode_reward_max": 587.0, "episode_reward_min": 123.0, "episode_reward_mean": 540.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 270.16}, "custom_metrics": {"sparse_reward_mean": 187.6, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 165.12, "shaped_reward_min": 43, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.9, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.8, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.56, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.01, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.83, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.89, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 0.35, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.41, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 14.81, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.58, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.98, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.46, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.76, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.15, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.08, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 14.81, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.58, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.81, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.58, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [396.0, 576.0, 519.0, 461.0, 522.0, 570.0, 576.0, 501.0, 570.0, 536.0, 579.0, 576.0, 522.0, 573.0, 573.0, 530.0, 519.0, 573.0, 525.0, 576.0, 582.0, 576.0, 570.0, 573.0, 522.0, 419.0, 123.0, 582.0, 576.0, 522.0, 579.0, 576.0, 570.0, 579.0, 453.0, 515.0, 573.0, 570.0, 516.0, 573.0, 570.0, 473.0, 573.0, 521.0, 570.0, 530.0, 582.0, 525.0, 579.0, 533.0, 564.0, 582.0, 461.0, 573.0, 524.0, 576.0, 516.0, 573.0, 573.0, 396.0, 576.0, 507.0, 467.0, 549.0, 527.0, 522.0, 418.0, 579.0, 570.0, 504.0, 573.0, 561.0, 576.0, 582.0, 530.0, 498.0, 573.0, 573.0, 579.0, 579.0, 533.0, 522.0, 579.0, 576.0, 539.0, 519.0, 573.0, 573.0, 576.0, 573.0, 573.0, 573.0, 573.0, 495.0, 587.0, 570.0, 524.0, 579.0, 509.0, 527.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [203.0, 193.0, 298.0, 278.0, 254.0, 265.0, 231.0, 230.0, 259.0, 263.0, 283.0, 287.0, 287.0, 289.0, 244.0, 257.0, 282.0, 288.0, 271.0, 265.0, 287.0, 292.0, 281.0, 295.0, 263.0, 259.0, 293.0, 280.0, 281.0, 292.0, 273.0, 257.0, 252.0, 267.0, 290.0, 283.0, 265.0, 260.0, 296.0, 280.0, 297.0, 285.0, 298.0, 278.0, 298.0, 272.0, 298.0, 275.0, 256.0, 266.0, 215.0, 204.0, 60.0, 63.0, 287.0, 295.0, 281.0, 295.0, 256.0, 266.0, 287.0, 292.0, 288.0, 288.0, 287.0, 283.0, 280.0, 299.0, 236.0, 217.0, 268.0, 247.0, 279.0, 294.0, 280.0, 290.0, 253.0, 263.0, 288.0, 285.0, 298.0, 272.0, 235.0, 238.0, 282.0, 291.0, 265.0, 256.0, 293.0, 277.0, 264.0, 266.0, 291.0, 291.0, 271.0, 254.0, 287.0, 292.0, 274.0, 259.0, 286.0, 278.0, 287.0, 295.0, 215.0, 246.0, 288.0, 285.0, 268.0, 256.0, 288.0, 288.0, 276.0, 240.0, 292.0, 281.0, 300.0, 273.0, 191.0, 205.0, 290.0, 286.0, 250.0, 257.0, 241.0, 226.0, 274.0, 275.0, 272.0, 255.0, 262.0, 260.0, 207.0, 211.0, 284.0, 295.0, 292.0, 278.0, 232.0, 272.0, 287.0, 286.0, 289.0, 272.0, 282.0, 294.0, 286.0, 296.0, 268.0, 262.0, 253.0, 245.0, 290.0, 283.0, 278.0, 295.0, 273.0, 306.0, 290.0, 289.0, 260.0, 273.0, 267.0, 255.0, 285.0, 294.0, 293.0, 283.0, 272.0, 267.0, 248.0, 271.0, 287.0, 286.0, 288.0, 285.0, 291.0, 285.0, 289.0, 284.0, 274.0, 299.0, 282.0, 291.0, 296.0, 277.0, 250.0, 245.0, 288.0, 299.0, 289.0, 281.0, 260.0, 264.0, 283.0, 296.0, 246.0, 263.0, 267.0, 260.0]}, "sampler_perf": {"mean_env_wait_ms": 1.583806400101539, "mean_processing_ms": 0.3843212568870559, "mean_inference_ms": 2.1355512648653474}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3192000, "num_steps_sampled": 1702400, "sample_time_ms": 21045.575, "load_time_ms": 37.092, "grad_time_ms": 9547.761, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004417246673256159, "policy_loss": -0.0036684710066765547, "vf_loss": 86.54926300048828, "vf_explained_var": 0.7708062529563904, "kl": 0.0019647751469165087, "entropy": 1.1384211778640747, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1702400, "episodes_total": 4256, "training_iteration": 133, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-51-22", "timestamp": 1660251082, "time_this_iter_s": 30.465492963790894, "time_total_s": 9499.705249786377, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9499.705249786377, "timesteps_since_restore": 1702400, "iterations_since_restore": 133, "perf": {"cpu_util_percent": 27.94883720930233, "ram_util_percent": 58.35813953488371}}
+{"episode_reward_max": 587.0, "episode_reward_min": 123.0, "episode_reward_mean": 547.7, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 273.85}, "custom_metrics": {"sparse_reward_mean": 189.8, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 168.1, "shaped_reward_min": 43, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.85, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.27, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.49, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.46, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.86, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.85, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 0.35, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.42, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 14.74, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.03, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.74, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.69, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.95, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 14.74, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.03, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.74, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.03, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 582.0, 582.0, 579.0, 579.0, 507.0, 576.0, 522.0, 516.0, 579.0, 582.0, 573.0, 576.0, 579.0, 530.0, 582.0, 522.0, 579.0, 576.0, 525.0, 519.0, 539.0, 570.0, 567.0, 570.0, 576.0, 579.0, 582.0, 573.0, 579.0, 582.0, 579.0, 527.0, 522.0, 418.0, 579.0, 570.0, 504.0, 573.0, 561.0, 576.0, 582.0, 530.0, 498.0, 573.0, 573.0, 579.0, 579.0, 533.0, 522.0, 579.0, 576.0, 539.0, 519.0, 573.0, 573.0, 576.0, 573.0, 573.0, 573.0, 573.0, 495.0, 587.0, 570.0, 524.0, 579.0, 509.0, 527.0, 396.0, 576.0, 519.0, 461.0, 522.0, 570.0, 576.0, 501.0, 570.0, 536.0, 579.0, 576.0, 522.0, 573.0, 573.0, 530.0, 519.0, 573.0, 525.0, 576.0, 582.0, 576.0, 570.0, 573.0, 522.0, 419.0, 123.0, 582.0, 576.0, 522.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [256.0, 263.0, 277.0, 305.0, 296.0, 286.0, 288.0, 291.0, 306.0, 273.0, 258.0, 249.0, 277.0, 299.0, 278.0, 244.0, 245.0, 271.0, 279.0, 300.0, 291.0, 291.0, 288.0, 285.0, 290.0, 286.0, 286.0, 293.0, 270.0, 260.0, 293.0, 289.0, 263.0, 259.0, 297.0, 282.0, 282.0, 294.0, 257.0, 268.0, 264.0, 255.0, 270.0, 269.0, 280.0, 290.0, 263.0, 304.0, 285.0, 285.0, 290.0, 286.0, 301.0, 278.0, 296.0, 286.0, 296.0, 277.0, 284.0, 295.0, 280.0, 302.0, 289.0, 290.0, 272.0, 255.0, 262.0, 260.0, 207.0, 211.0, 284.0, 295.0, 292.0, 278.0, 232.0, 272.0, 287.0, 286.0, 289.0, 272.0, 282.0, 294.0, 286.0, 296.0, 268.0, 262.0, 253.0, 245.0, 290.0, 283.0, 278.0, 295.0, 273.0, 306.0, 290.0, 289.0, 260.0, 273.0, 267.0, 255.0, 285.0, 294.0, 293.0, 283.0, 272.0, 267.0, 248.0, 271.0, 287.0, 286.0, 288.0, 285.0, 291.0, 285.0, 289.0, 284.0, 274.0, 299.0, 282.0, 291.0, 296.0, 277.0, 250.0, 245.0, 288.0, 299.0, 289.0, 281.0, 260.0, 264.0, 283.0, 296.0, 246.0, 263.0, 267.0, 260.0, 203.0, 193.0, 298.0, 278.0, 254.0, 265.0, 231.0, 230.0, 259.0, 263.0, 283.0, 287.0, 287.0, 289.0, 244.0, 257.0, 282.0, 288.0, 271.0, 265.0, 287.0, 292.0, 281.0, 295.0, 263.0, 259.0, 293.0, 280.0, 281.0, 292.0, 273.0, 257.0, 252.0, 267.0, 290.0, 283.0, 265.0, 260.0, 296.0, 280.0, 297.0, 285.0, 298.0, 278.0, 298.0, 272.0, 298.0, 275.0, 256.0, 266.0, 215.0, 204.0, 60.0, 63.0, 287.0, 295.0, 281.0, 295.0, 256.0, 266.0, 287.0, 292.0, 288.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5752673474082683, "mean_processing_ms": 0.382627028657334, "mean_inference_ms": 2.127290316097115}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3216000, "num_steps_sampled": 1715200, "sample_time_ms": 21118.905, "load_time_ms": 37.128, "grad_time_ms": 9783.423, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003744603367522359, "policy_loss": -0.004061851184815168, "vf_loss": 83.74505615234375, "vf_explained_var": 0.7541170120239258, "kl": 0.001809759414754808, "entropy": 1.1361082792282104, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1715200, "episodes_total": 4288, "training_iteration": 134, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-51-53", "timestamp": 1660251113, "time_this_iter_s": 31.301603078842163, "time_total_s": 9531.00685286522, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9531.00685286522, "timesteps_since_restore": 1715200, "iterations_since_restore": 134, "perf": {"cpu_util_percent": 35.08181818181818, "ram_util_percent": 58.252272727272725}}
+{"episode_reward_max": 582.0, "episode_reward_min": 123.0, "episode_reward_mean": 546.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 273.49}, "custom_metrics": {"sparse_reward_mean": 189.4, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 168.18, "shaped_reward_min": 43, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.7, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.28, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.45, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.75, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.82, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 14.66, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.09, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.91, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.4, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.01, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 14.66, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.09, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.66, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.09, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 561.0, 533.0, 582.0, 573.0, 533.0, 581.0, 459.0, 570.0, 522.0, 579.0, 570.0, 573.0, 582.0, 576.0, 582.0, 579.0, 576.0, 576.0, 567.0, 582.0, 579.0, 564.0, 582.0, 570.0, 228.0, 479.0, 573.0, 516.0, 576.0, 582.0, 522.0, 524.0, 579.0, 509.0, 527.0, 396.0, 576.0, 519.0, 461.0, 522.0, 570.0, 576.0, 501.0, 570.0, 536.0, 579.0, 576.0, 522.0, 573.0, 573.0, 530.0, 519.0, 573.0, 525.0, 576.0, 582.0, 576.0, 570.0, 573.0, 522.0, 419.0, 123.0, 582.0, 576.0, 522.0, 579.0, 576.0, 519.0, 582.0, 582.0, 579.0, 579.0, 507.0, 576.0, 522.0, 516.0, 579.0, 582.0, 573.0, 576.0, 579.0, 530.0, 582.0, 522.0, 579.0, 576.0, 525.0, 519.0, 539.0, 570.0, 567.0, 570.0, 576.0, 579.0, 582.0, 573.0, 579.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [301.0, 278.0, 274.0, 287.0, 266.0, 267.0, 284.0, 298.0, 292.0, 281.0, 273.0, 260.0, 288.0, 293.0, 233.0, 226.0, 287.0, 283.0, 253.0, 269.0, 290.0, 289.0, 287.0, 283.0, 279.0, 294.0, 289.0, 293.0, 297.0, 279.0, 291.0, 291.0, 277.0, 302.0, 299.0, 277.0, 299.0, 277.0, 265.0, 302.0, 290.0, 292.0, 288.0, 291.0, 289.0, 275.0, 285.0, 297.0, 277.0, 293.0, 114.0, 114.0, 248.0, 231.0, 285.0, 288.0, 256.0, 260.0, 292.0, 284.0, 300.0, 282.0, 253.0, 269.0, 260.0, 264.0, 283.0, 296.0, 246.0, 263.0, 267.0, 260.0, 203.0, 193.0, 298.0, 278.0, 254.0, 265.0, 231.0, 230.0, 259.0, 263.0, 283.0, 287.0, 287.0, 289.0, 244.0, 257.0, 282.0, 288.0, 271.0, 265.0, 287.0, 292.0, 281.0, 295.0, 263.0, 259.0, 293.0, 280.0, 281.0, 292.0, 273.0, 257.0, 252.0, 267.0, 290.0, 283.0, 265.0, 260.0, 296.0, 280.0, 297.0, 285.0, 298.0, 278.0, 298.0, 272.0, 298.0, 275.0, 256.0, 266.0, 215.0, 204.0, 60.0, 63.0, 287.0, 295.0, 281.0, 295.0, 256.0, 266.0, 287.0, 292.0, 288.0, 288.0, 256.0, 263.0, 277.0, 305.0, 296.0, 286.0, 288.0, 291.0, 306.0, 273.0, 258.0, 249.0, 277.0, 299.0, 278.0, 244.0, 245.0, 271.0, 279.0, 300.0, 291.0, 291.0, 288.0, 285.0, 290.0, 286.0, 286.0, 293.0, 270.0, 260.0, 293.0, 289.0, 263.0, 259.0, 297.0, 282.0, 282.0, 294.0, 257.0, 268.0, 264.0, 255.0, 270.0, 269.0, 280.0, 290.0, 263.0, 304.0, 285.0, 285.0, 290.0, 286.0, 301.0, 278.0, 296.0, 286.0, 296.0, 277.0, 284.0, 295.0, 280.0, 302.0, 289.0, 290.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5668312042090953, "mean_processing_ms": 0.38095382690757534, "mean_inference_ms": 2.118642826517617}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3240000, "num_steps_sampled": 1728000, "sample_time_ms": 20924.032, "load_time_ms": 37.391, "grad_time_ms": 9937.539, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0035265153273940086, "policy_loss": -0.004777689930051565, "vf_loss": 88.75411224365234, "vf_explained_var": 0.7641527056694031, "kl": 0.002029512310400605, "entropy": 1.1424118280410767, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1728000, "episodes_total": 4320, "training_iteration": 135, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-52-22", "timestamp": 1660251142, "time_this_iter_s": 29.18880271911621, "time_total_s": 9560.195655584335, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9560.195655584335, "timesteps_since_restore": 1728000, "iterations_since_restore": 135, "perf": {"cpu_util_percent": 35.34146341463415, "ram_util_percent": 58.2390243902439}}
+{"episode_reward_max": 582.0, "episode_reward_min": 228.0, "episode_reward_mean": 550.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 309.0}, "policy_reward_mean": {"ppo": 275.345}, "custom_metrics": {"sparse_reward_mean": 190.6, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 169.49, "shaped_reward_min": 68, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.86, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.31, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.28, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.55, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.78, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.35, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.44, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.71, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.22, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.29, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.01, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.96, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.71, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.22, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.71, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.22, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 579.0, 510.0, 582.0, 573.0, 573.0, 470.0, 579.0, 530.0, 567.0, 573.0, 533.0, 582.0, 461.0, 496.0, 510.0, 513.0, 579.0, 530.0, 570.0, 525.0, 525.0, 573.0, 294.0, 576.0, 539.0, 522.0, 498.0, 573.0, 576.0, 579.0, 570.0, 576.0, 522.0, 579.0, 576.0, 519.0, 582.0, 582.0, 579.0, 579.0, 507.0, 576.0, 522.0, 516.0, 579.0, 582.0, 573.0, 576.0, 579.0, 530.0, 582.0, 522.0, 579.0, 576.0, 525.0, 519.0, 539.0, 570.0, 567.0, 570.0, 576.0, 579.0, 582.0, 573.0, 579.0, 582.0, 579.0, 579.0, 561.0, 533.0, 582.0, 573.0, 533.0, 581.0, 459.0, 570.0, 522.0, 579.0, 570.0, 573.0, 582.0, 576.0, 582.0, 579.0, 576.0, 576.0, 567.0, 582.0, 579.0, 564.0, 582.0, 570.0, 228.0, 479.0, 573.0, 516.0, 576.0, 582.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 279.0, 270.0, 309.0, 255.0, 255.0, 282.0, 300.0, 275.0, 298.0, 293.0, 280.0, 239.0, 231.0, 293.0, 286.0, 260.0, 270.0, 287.0, 280.0, 273.0, 300.0, 275.0, 258.0, 285.0, 297.0, 229.0, 232.0, 252.0, 244.0, 270.0, 240.0, 241.0, 272.0, 288.0, 291.0, 249.0, 281.0, 287.0, 283.0, 257.0, 268.0, 258.0, 267.0, 279.0, 294.0, 142.0, 152.0, 285.0, 291.0, 281.0, 258.0, 262.0, 260.0, 261.0, 237.0, 292.0, 281.0, 287.0, 289.0, 285.0, 294.0, 279.0, 291.0, 281.0, 295.0, 256.0, 266.0, 287.0, 292.0, 288.0, 288.0, 256.0, 263.0, 277.0, 305.0, 296.0, 286.0, 288.0, 291.0, 306.0, 273.0, 258.0, 249.0, 277.0, 299.0, 278.0, 244.0, 245.0, 271.0, 279.0, 300.0, 291.0, 291.0, 288.0, 285.0, 290.0, 286.0, 286.0, 293.0, 270.0, 260.0, 293.0, 289.0, 263.0, 259.0, 297.0, 282.0, 282.0, 294.0, 257.0, 268.0, 264.0, 255.0, 270.0, 269.0, 280.0, 290.0, 263.0, 304.0, 285.0, 285.0, 290.0, 286.0, 301.0, 278.0, 296.0, 286.0, 296.0, 277.0, 284.0, 295.0, 280.0, 302.0, 289.0, 290.0, 301.0, 278.0, 274.0, 287.0, 266.0, 267.0, 284.0, 298.0, 292.0, 281.0, 273.0, 260.0, 288.0, 293.0, 233.0, 226.0, 287.0, 283.0, 253.0, 269.0, 290.0, 289.0, 287.0, 283.0, 279.0, 294.0, 289.0, 293.0, 297.0, 279.0, 291.0, 291.0, 277.0, 302.0, 299.0, 277.0, 299.0, 277.0, 265.0, 302.0, 290.0, 292.0, 288.0, 291.0, 289.0, 275.0, 285.0, 297.0, 277.0, 293.0, 114.0, 114.0, 248.0, 231.0, 285.0, 288.0, 256.0, 260.0, 292.0, 284.0, 300.0, 282.0, 253.0, 269.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5585142836902617, "mean_processing_ms": 0.37930323172658476, "mean_inference_ms": 2.109956620242284}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3264000, "num_steps_sampled": 1740800, "sample_time_ms": 20787.416, "load_time_ms": 37.15, "grad_time_ms": 10003.018, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003316950984299183, "policy_loss": -0.004835940897464752, "vf_loss": 87.2677993774414, "vf_explained_var": 0.7657222151756287, "kl": 0.0019325317116454244, "entropy": 1.1477751731872559, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1740800, "episodes_total": 4352, "training_iteration": 136, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-52-52", "timestamp": 1660251172, "time_this_iter_s": 29.77871298789978, "time_total_s": 9589.974368572235, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9589.974368572235, "timesteps_since_restore": 1740800, "iterations_since_restore": 136, "perf": {"cpu_util_percent": 32.416666666666664, "ram_util_percent": 58.35476190476191}}
+{"episode_reward_max": 630.0, "episode_reward_min": 228.0, "episode_reward_mean": 546.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 273.105}, "custom_metrics": {"sparse_reward_mean": 189.2, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 167.81, "shaped_reward_min": 68, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.06, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.14, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.48, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.26, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.89, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.37, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.48, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.88, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.37, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.85, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.32, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.51, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.88, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.88, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 576.0, 479.0, 576.0, 570.0, 342.0, 470.0, 573.0, 510.0, 518.0, 570.0, 582.0, 539.0, 570.0, 510.0, 587.0, 579.0, 516.0, 516.0, 582.0, 576.0, 576.0, 570.0, 576.0, 519.0, 570.0, 581.0, 630.0, 573.0, 522.0, 573.0, 516.0, 573.0, 579.0, 582.0, 579.0, 579.0, 561.0, 533.0, 582.0, 573.0, 533.0, 581.0, 459.0, 570.0, 522.0, 579.0, 570.0, 573.0, 582.0, 576.0, 582.0, 579.0, 576.0, 576.0, 567.0, 582.0, 579.0, 564.0, 582.0, 570.0, 228.0, 479.0, 573.0, 516.0, 576.0, 582.0, 522.0, 570.0, 579.0, 510.0, 582.0, 573.0, 573.0, 470.0, 579.0, 530.0, 567.0, 573.0, 533.0, 582.0, 461.0, 496.0, 510.0, 513.0, 579.0, 530.0, 570.0, 525.0, 525.0, 573.0, 294.0, 576.0, 539.0, 522.0, 498.0, 573.0, 576.0, 579.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [274.0, 251.0, 298.0, 278.0, 254.0, 225.0, 297.0, 279.0, 286.0, 284.0, 166.0, 176.0, 221.0, 249.0, 296.0, 277.0, 259.0, 251.0, 259.0, 259.0, 298.0, 272.0, 283.0, 299.0, 271.0, 268.0, 293.0, 277.0, 264.0, 246.0, 285.0, 302.0, 291.0, 288.0, 286.0, 230.0, 249.0, 267.0, 284.0, 298.0, 293.0, 283.0, 288.0, 288.0, 283.0, 287.0, 285.0, 291.0, 257.0, 262.0, 286.0, 284.0, 291.0, 290.0, 316.0, 314.0, 277.0, 296.0, 262.0, 260.0, 283.0, 290.0, 264.0, 252.0, 296.0, 277.0, 284.0, 295.0, 280.0, 302.0, 289.0, 290.0, 301.0, 278.0, 274.0, 287.0, 266.0, 267.0, 284.0, 298.0, 292.0, 281.0, 273.0, 260.0, 288.0, 293.0, 233.0, 226.0, 287.0, 283.0, 253.0, 269.0, 290.0, 289.0, 287.0, 283.0, 279.0, 294.0, 289.0, 293.0, 297.0, 279.0, 291.0, 291.0, 277.0, 302.0, 299.0, 277.0, 299.0, 277.0, 265.0, 302.0, 290.0, 292.0, 288.0, 291.0, 289.0, 275.0, 285.0, 297.0, 277.0, 293.0, 114.0, 114.0, 248.0, 231.0, 285.0, 288.0, 256.0, 260.0, 292.0, 284.0, 300.0, 282.0, 253.0, 269.0, 291.0, 279.0, 270.0, 309.0, 255.0, 255.0, 282.0, 300.0, 275.0, 298.0, 293.0, 280.0, 239.0, 231.0, 293.0, 286.0, 260.0, 270.0, 287.0, 280.0, 273.0, 300.0, 275.0, 258.0, 285.0, 297.0, 229.0, 232.0, 252.0, 244.0, 270.0, 240.0, 241.0, 272.0, 288.0, 291.0, 249.0, 281.0, 287.0, 283.0, 257.0, 268.0, 258.0, 267.0, 279.0, 294.0, 142.0, 152.0, 285.0, 291.0, 281.0, 258.0, 262.0, 260.0, 261.0, 237.0, 292.0, 281.0, 287.0, 289.0, 285.0, 294.0, 279.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5503391624395824, "mean_processing_ms": 0.37768347850915746, "mean_inference_ms": 2.101391542622976}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3288000, "num_steps_sampled": 1753600, "sample_time_ms": 20872.885, "load_time_ms": 37.097, "grad_time_ms": 9999.872, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012538364389911294, "policy_loss": -0.005918627139180899, "vf_loss": 77.47673797607422, "vf_explained_var": 0.7781977653503418, "kl": 0.0019029680406674743, "entropy": 1.1504276990890503, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1753600, "episodes_total": 4384, "training_iteration": 137, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-53-22", "timestamp": 1660251202, "time_this_iter_s": 30.53275179862976, "time_total_s": 9620.507120370865, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9620.507120370865, "timesteps_since_restore": 1753600, "iterations_since_restore": 137, "perf": {"cpu_util_percent": 31.753488372093024, "ram_util_percent": 58.406976744186025}}
+{"episode_reward_max": 630.0, "episode_reward_min": 294.0, "episode_reward_mean": 546.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 142.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 273.055}, "custom_metrics": {"sparse_reward_mean": 189.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 168.11, "shaped_reward_min": 94, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.28, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.22, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.76, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.22, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 1.01, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.4, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.95, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.87, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.37, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.72, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.38, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.94, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.84, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.95, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.87, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.95, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.87, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 459.0, 573.0, 579.0, 527.0, 519.0, 579.0, 525.0, 516.0, 579.0, 579.0, 579.0, 579.0, 573.0, 449.0, 579.0, 579.0, 576.0, 441.0, 576.0, 533.0, 516.0, 582.0, 516.0, 579.0, 579.0, 567.0, 587.0, 527.0, 579.0, 573.0, 579.0, 516.0, 576.0, 582.0, 522.0, 570.0, 579.0, 510.0, 582.0, 573.0, 573.0, 470.0, 579.0, 530.0, 567.0, 573.0, 533.0, 582.0, 461.0, 496.0, 510.0, 513.0, 579.0, 530.0, 570.0, 525.0, 525.0, 573.0, 294.0, 576.0, 539.0, 522.0, 498.0, 573.0, 576.0, 579.0, 570.0, 525.0, 576.0, 479.0, 576.0, 570.0, 342.0, 470.0, 573.0, 510.0, 518.0, 570.0, 582.0, 539.0, 570.0, 510.0, 587.0, 579.0, 516.0, 516.0, 582.0, 576.0, 576.0, 570.0, 576.0, 519.0, 570.0, 581.0, 630.0, 573.0, 522.0, 573.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 230.0, 229.0, 295.0, 278.0, 295.0, 284.0, 275.0, 252.0, 266.0, 253.0, 285.0, 294.0, 260.0, 265.0, 261.0, 255.0, 285.0, 294.0, 272.0, 307.0, 284.0, 295.0, 273.0, 306.0, 290.0, 283.0, 228.0, 221.0, 296.0, 283.0, 279.0, 300.0, 299.0, 277.0, 226.0, 215.0, 280.0, 296.0, 262.0, 271.0, 261.0, 255.0, 288.0, 294.0, 267.0, 249.0, 289.0, 290.0, 281.0, 298.0, 279.0, 288.0, 286.0, 301.0, 256.0, 271.0, 280.0, 299.0, 275.0, 298.0, 283.0, 296.0, 256.0, 260.0, 292.0, 284.0, 300.0, 282.0, 253.0, 269.0, 291.0, 279.0, 270.0, 309.0, 255.0, 255.0, 282.0, 300.0, 275.0, 298.0, 293.0, 280.0, 239.0, 231.0, 293.0, 286.0, 260.0, 270.0, 287.0, 280.0, 273.0, 300.0, 275.0, 258.0, 285.0, 297.0, 229.0, 232.0, 252.0, 244.0, 270.0, 240.0, 241.0, 272.0, 288.0, 291.0, 249.0, 281.0, 287.0, 283.0, 257.0, 268.0, 258.0, 267.0, 279.0, 294.0, 142.0, 152.0, 285.0, 291.0, 281.0, 258.0, 262.0, 260.0, 261.0, 237.0, 292.0, 281.0, 287.0, 289.0, 285.0, 294.0, 279.0, 291.0, 274.0, 251.0, 298.0, 278.0, 254.0, 225.0, 297.0, 279.0, 286.0, 284.0, 166.0, 176.0, 221.0, 249.0, 296.0, 277.0, 259.0, 251.0, 259.0, 259.0, 298.0, 272.0, 283.0, 299.0, 271.0, 268.0, 293.0, 277.0, 264.0, 246.0, 285.0, 302.0, 291.0, 288.0, 286.0, 230.0, 249.0, 267.0, 284.0, 298.0, 293.0, 283.0, 288.0, 288.0, 283.0, 287.0, 285.0, 291.0, 257.0, 262.0, 286.0, 284.0, 291.0, 290.0, 316.0, 314.0, 277.0, 296.0, 262.0, 260.0, 283.0, 290.0, 264.0, 252.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5422943636827329, "mean_processing_ms": 0.376087334740523, "mean_inference_ms": 2.0930739405664296}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3312000, "num_steps_sampled": 1766400, "sample_time_ms": 20654.49, "load_time_ms": 37.188, "grad_time_ms": 9982.936, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0023388864938169718, "policy_loss": -0.0055216290056705475, "vf_loss": 84.28978729248047, "vf_explained_var": 0.7621362209320068, "kl": 0.0017433507600799203, "entropy": 1.136921763420105, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1766400, "episodes_total": 4416, "training_iteration": 138, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-53-51", "timestamp": 1660251231, "time_this_iter_s": 28.834796905517578, "time_total_s": 9649.341917276382, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9649.341917276382, "timesteps_since_restore": 1766400, "iterations_since_restore": 138, "perf": {"cpu_util_percent": 31.565853658536582, "ram_util_percent": 58.34634146341463}}
+{"episode_reward_max": 630.0, "episode_reward_min": 342.0, "episode_reward_mean": 553.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 166.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 276.825}, "custom_metrics": {"sparse_reward_mean": 191.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 170.05, "shaped_reward_min": 102, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.36, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.1, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.92, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.18, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.93, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.38, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.44, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.22, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.88, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.69, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.73, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.57, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.86, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.22, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.88, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.22, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.88, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 345.0, 465.0, 558.0, 570.0, 573.0, 582.0, 573.0, 576.0, 573.0, 573.0, 573.0, 582.0, 579.0, 582.0, 582.0, 567.0, 582.0, 576.0, 579.0, 587.0, 519.0, 570.0, 567.0, 579.0, 582.0, 533.0, 522.0, 587.0, 504.0, 630.0, 536.0, 573.0, 576.0, 579.0, 570.0, 525.0, 576.0, 479.0, 576.0, 570.0, 342.0, 470.0, 573.0, 510.0, 518.0, 570.0, 582.0, 539.0, 570.0, 510.0, 587.0, 579.0, 516.0, 516.0, 582.0, 576.0, 576.0, 570.0, 576.0, 519.0, 570.0, 581.0, 630.0, 573.0, 522.0, 573.0, 516.0, 630.0, 459.0, 573.0, 579.0, 527.0, 519.0, 579.0, 525.0, 516.0, 579.0, 579.0, 579.0, 579.0, 573.0, 449.0, 579.0, 579.0, 576.0, 441.0, 576.0, 533.0, 516.0, 582.0, 516.0, 579.0, 579.0, 567.0, 587.0, 527.0, 579.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 300.0, 177.0, 168.0, 229.0, 236.0, 275.0, 283.0, 271.0, 299.0, 292.0, 281.0, 286.0, 296.0, 287.0, 286.0, 283.0, 293.0, 293.0, 280.0, 280.0, 293.0, 296.0, 277.0, 293.0, 289.0, 272.0, 307.0, 294.0, 288.0, 298.0, 284.0, 272.0, 295.0, 293.0, 289.0, 287.0, 289.0, 287.0, 292.0, 291.0, 296.0, 257.0, 262.0, 273.0, 297.0, 281.0, 286.0, 298.0, 281.0, 282.0, 300.0, 273.0, 260.0, 272.0, 250.0, 295.0, 292.0, 252.0, 252.0, 307.0, 323.0, 267.0, 269.0, 292.0, 281.0, 287.0, 289.0, 285.0, 294.0, 279.0, 291.0, 274.0, 251.0, 298.0, 278.0, 254.0, 225.0, 297.0, 279.0, 286.0, 284.0, 166.0, 176.0, 221.0, 249.0, 296.0, 277.0, 259.0, 251.0, 259.0, 259.0, 298.0, 272.0, 283.0, 299.0, 271.0, 268.0, 293.0, 277.0, 264.0, 246.0, 285.0, 302.0, 291.0, 288.0, 286.0, 230.0, 249.0, 267.0, 284.0, 298.0, 293.0, 283.0, 288.0, 288.0, 283.0, 287.0, 285.0, 291.0, 257.0, 262.0, 286.0, 284.0, 291.0, 290.0, 316.0, 314.0, 277.0, 296.0, 262.0, 260.0, 283.0, 290.0, 264.0, 252.0, 313.0, 317.0, 230.0, 229.0, 295.0, 278.0, 295.0, 284.0, 275.0, 252.0, 266.0, 253.0, 285.0, 294.0, 260.0, 265.0, 261.0, 255.0, 285.0, 294.0, 272.0, 307.0, 284.0, 295.0, 273.0, 306.0, 290.0, 283.0, 228.0, 221.0, 296.0, 283.0, 279.0, 300.0, 299.0, 277.0, 226.0, 215.0, 280.0, 296.0, 262.0, 271.0, 261.0, 255.0, 288.0, 294.0, 267.0, 249.0, 289.0, 290.0, 281.0, 298.0, 279.0, 288.0, 286.0, 301.0, 256.0, 271.0, 280.0, 299.0, 275.0, 298.0, 283.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.534348979817542, "mean_processing_ms": 0.37451084317148714, "mean_inference_ms": 2.0846763834338202}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3336000, "num_steps_sampled": 1779200, "sample_time_ms": 20417.117, "load_time_ms": 37.045, "grad_time_ms": 9852.38, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006708970759063959, "policy_loss": -0.0014362437650561333, "vf_loss": 87.18399810791016, "vf_explained_var": 0.7458827495574951, "kl": 0.0019282657885923982, "entropy": 1.1463767290115356, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1779200, "episodes_total": 4448, "training_iteration": 139, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-54-19", "timestamp": 1660251259, "time_this_iter_s": 27.688152074813843, "time_total_s": 9677.030069351196, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9677.030069351196, "timesteps_since_restore": 1779200, "iterations_since_restore": 139, "perf": {"cpu_util_percent": 30.3025641025641, "ram_util_percent": 58.341025641025624}}
+{"episode_reward_max": 630.0, "episode_reward_min": 345.0, "episode_reward_mean": 556.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 168.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 278.16}, "custom_metrics": {"sparse_reward_mean": 192.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 170.72, "shaped_reward_min": 105, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.18, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.21, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.77, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.37, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.72, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.8, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.38, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.19, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.1, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.56, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.53, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.62, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.91, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.85, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.19, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.1, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.19, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.1, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 567.0, 530.0, 573.0, 576.0, 521.0, 570.0, 579.0, 501.0, 573.0, 522.0, 579.0, 630.0, 576.0, 570.0, 576.0, 579.0, 522.0, 576.0, 525.0, 576.0, 579.0, 567.0, 579.0, 576.0, 579.0, 582.0, 576.0, 504.0, 579.0, 573.0, 408.0, 573.0, 522.0, 573.0, 516.0, 630.0, 459.0, 573.0, 579.0, 527.0, 519.0, 579.0, 525.0, 516.0, 579.0, 579.0, 579.0, 579.0, 573.0, 449.0, 579.0, 579.0, 576.0, 441.0, 576.0, 533.0, 516.0, 582.0, 516.0, 579.0, 579.0, 567.0, 587.0, 527.0, 579.0, 573.0, 579.0, 576.0, 345.0, 465.0, 558.0, 570.0, 573.0, 582.0, 573.0, 576.0, 573.0, 573.0, 573.0, 582.0, 579.0, 582.0, 582.0, 567.0, 582.0, 576.0, 579.0, 587.0, 519.0, 570.0, 567.0, 579.0, 582.0, 533.0, 522.0, 587.0, 504.0, 630.0, 536.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 261.0, 280.0, 287.0, 249.0, 281.0, 293.0, 280.0, 288.0, 288.0, 251.0, 270.0, 280.0, 290.0, 277.0, 302.0, 242.0, 259.0, 295.0, 278.0, 258.0, 264.0, 285.0, 294.0, 320.0, 310.0, 284.0, 292.0, 287.0, 283.0, 296.0, 280.0, 285.0, 294.0, 257.0, 265.0, 297.0, 279.0, 262.0, 263.0, 288.0, 288.0, 298.0, 281.0, 293.0, 274.0, 301.0, 278.0, 293.0, 283.0, 298.0, 281.0, 291.0, 291.0, 266.0, 310.0, 236.0, 268.0, 285.0, 294.0, 290.0, 283.0, 205.0, 203.0, 277.0, 296.0, 262.0, 260.0, 283.0, 290.0, 264.0, 252.0, 313.0, 317.0, 230.0, 229.0, 295.0, 278.0, 295.0, 284.0, 275.0, 252.0, 266.0, 253.0, 285.0, 294.0, 260.0, 265.0, 261.0, 255.0, 285.0, 294.0, 272.0, 307.0, 284.0, 295.0, 273.0, 306.0, 290.0, 283.0, 228.0, 221.0, 296.0, 283.0, 279.0, 300.0, 299.0, 277.0, 226.0, 215.0, 280.0, 296.0, 262.0, 271.0, 261.0, 255.0, 288.0, 294.0, 267.0, 249.0, 289.0, 290.0, 281.0, 298.0, 279.0, 288.0, 286.0, 301.0, 256.0, 271.0, 280.0, 299.0, 275.0, 298.0, 283.0, 296.0, 276.0, 300.0, 177.0, 168.0, 229.0, 236.0, 275.0, 283.0, 271.0, 299.0, 292.0, 281.0, 286.0, 296.0, 287.0, 286.0, 283.0, 293.0, 293.0, 280.0, 280.0, 293.0, 296.0, 277.0, 293.0, 289.0, 272.0, 307.0, 294.0, 288.0, 298.0, 284.0, 272.0, 295.0, 293.0, 289.0, 287.0, 289.0, 287.0, 292.0, 291.0, 296.0, 257.0, 262.0, 273.0, 297.0, 281.0, 286.0, 298.0, 281.0, 282.0, 300.0, 273.0, 260.0, 272.0, 250.0, 295.0, 292.0, 252.0, 252.0, 307.0, 323.0, 267.0, 269.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5264986068464634, "mean_processing_ms": 0.37294987476110114, "mean_inference_ms": 2.0764572301568647}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3360000, "num_steps_sampled": 1792000, "sample_time_ms": 20405.098, "load_time_ms": 37.005, "grad_time_ms": 9490.101, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0018680343637242913, "policy_loss": -0.005901841446757317, "vf_loss": 83.4326400756836, "vf_explained_var": 0.7634987831115723, "kl": 0.002031019888818264, "entropy": 1.1467581987380981, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1792000, "episodes_total": 4480, "training_iteration": 140, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-54-47", "timestamp": 1660251287, "time_this_iter_s": 28.096507787704468, "time_total_s": 9705.1265771389, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9705.1265771389, "timesteps_since_restore": 1792000, "iterations_since_restore": 140, "perf": {"cpu_util_percent": 30.9875, "ram_util_percent": 58.3925}}
+{"episode_reward_max": 630.0, "episode_reward_min": 345.0, "episode_reward_mean": 560.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 168.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 280.105}, "custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 171.81, "shaped_reward_min": 105, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.15, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.17, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.73, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.41, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.21, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.23, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.53, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.75, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.77, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.21, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.23, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.21, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.23, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 522.0, 504.0, 573.0, 579.0, 530.0, 582.0, 576.0, 579.0, 579.0, 570.0, 525.0, 573.0, 582.0, 630.0, 558.0, 576.0, 576.0, 576.0, 533.0, 582.0, 582.0, 530.0, 530.0, 630.0, 576.0, 516.0, 476.0, 579.0, 582.0, 573.0, 576.0, 527.0, 579.0, 573.0, 579.0, 576.0, 345.0, 465.0, 558.0, 570.0, 573.0, 582.0, 573.0, 576.0, 573.0, 573.0, 573.0, 582.0, 579.0, 582.0, 582.0, 567.0, 582.0, 576.0, 579.0, 587.0, 519.0, 570.0, 567.0, 579.0, 582.0, 533.0, 522.0, 587.0, 504.0, 630.0, 536.0, 530.0, 567.0, 530.0, 573.0, 576.0, 521.0, 570.0, 579.0, 501.0, 573.0, 522.0, 579.0, 630.0, 576.0, 570.0, 576.0, 579.0, 522.0, 576.0, 525.0, 576.0, 579.0, 567.0, 579.0, 576.0, 579.0, 582.0, 576.0, 504.0, 579.0, 573.0, 408.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 280.0, 262.0, 260.0, 260.0, 244.0, 277.0, 296.0, 296.0, 283.0, 264.0, 266.0, 301.0, 281.0, 285.0, 291.0, 289.0, 290.0, 291.0, 288.0, 287.0, 283.0, 263.0, 262.0, 292.0, 281.0, 301.0, 281.0, 303.0, 327.0, 272.0, 286.0, 296.0, 280.0, 282.0, 294.0, 282.0, 294.0, 268.0, 265.0, 298.0, 284.0, 298.0, 284.0, 255.0, 275.0, 253.0, 277.0, 311.0, 319.0, 288.0, 288.0, 250.0, 266.0, 238.0, 238.0, 292.0, 287.0, 309.0, 273.0, 286.0, 287.0, 289.0, 287.0, 256.0, 271.0, 280.0, 299.0, 275.0, 298.0, 283.0, 296.0, 276.0, 300.0, 177.0, 168.0, 229.0, 236.0, 275.0, 283.0, 271.0, 299.0, 292.0, 281.0, 286.0, 296.0, 287.0, 286.0, 283.0, 293.0, 293.0, 280.0, 280.0, 293.0, 296.0, 277.0, 293.0, 289.0, 272.0, 307.0, 294.0, 288.0, 298.0, 284.0, 272.0, 295.0, 293.0, 289.0, 287.0, 289.0, 287.0, 292.0, 291.0, 296.0, 257.0, 262.0, 273.0, 297.0, 281.0, 286.0, 298.0, 281.0, 282.0, 300.0, 273.0, 260.0, 272.0, 250.0, 295.0, 292.0, 252.0, 252.0, 307.0, 323.0, 267.0, 269.0, 269.0, 261.0, 280.0, 287.0, 249.0, 281.0, 293.0, 280.0, 288.0, 288.0, 251.0, 270.0, 280.0, 290.0, 277.0, 302.0, 242.0, 259.0, 295.0, 278.0, 258.0, 264.0, 285.0, 294.0, 320.0, 310.0, 284.0, 292.0, 287.0, 283.0, 296.0, 280.0, 285.0, 294.0, 257.0, 265.0, 297.0, 279.0, 262.0, 263.0, 288.0, 288.0, 298.0, 281.0, 293.0, 274.0, 301.0, 278.0, 293.0, 283.0, 298.0, 281.0, 291.0, 291.0, 266.0, 310.0, 236.0, 268.0, 285.0, 294.0, 290.0, 283.0, 205.0, 203.0]}, "sampler_perf": {"mean_env_wait_ms": 1.518751324463327, "mean_processing_ms": 0.37140910407762817, "mean_inference_ms": 2.0683253134575508}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3384000, "num_steps_sampled": 1804800, "sample_time_ms": 20217.154, "load_time_ms": 36.826, "grad_time_ms": 9453.413, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004852355923503637, "policy_loss": -0.003466278314590454, "vf_loss": 88.89630126953125, "vf_explained_var": 0.7491546273231506, "kl": 0.0020531185436993837, "entropy": 1.1419917345046997, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1804800, "episodes_total": 4512, "training_iteration": 141, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-55-17", "timestamp": 1660251317, "time_this_iter_s": 29.532893180847168, "time_total_s": 9734.659470319748, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9734.659470319748, "timesteps_since_restore": 1804800, "iterations_since_restore": 141, "perf": {"cpu_util_percent": 29.842857142857145, "ram_util_percent": 58.3642857142857}}
+{"episode_reward_max": 630.0, "episode_reward_min": 123.0, "episode_reward_mean": 554.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 277.34}, "custom_metrics": {"sparse_reward_mean": 192.0, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 170.68, "shaped_reward_min": 43, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.13, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.18, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.63, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.41, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.33, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.06, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.23, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.71, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.82, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.15, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.06, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.23, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.06, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.23, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 455.0, 582.0, 123.0, 476.0, 527.0, 579.0, 582.0, 567.0, 579.0, 579.0, 579.0, 579.0, 481.0, 576.0, 587.0, 582.0, 576.0, 579.0, 570.0, 579.0, 582.0, 573.0, 579.0, 579.0, 413.0, 579.0, 570.0, 465.0, 579.0, 579.0, 573.0, 587.0, 504.0, 630.0, 536.0, 530.0, 567.0, 530.0, 573.0, 576.0, 521.0, 570.0, 579.0, 501.0, 573.0, 522.0, 579.0, 630.0, 576.0, 570.0, 576.0, 579.0, 522.0, 576.0, 525.0, 576.0, 579.0, 567.0, 579.0, 576.0, 579.0, 582.0, 576.0, 504.0, 579.0, 573.0, 408.0, 573.0, 522.0, 504.0, 573.0, 579.0, 530.0, 582.0, 576.0, 579.0, 579.0, 570.0, 525.0, 573.0, 582.0, 630.0, 558.0, 576.0, 576.0, 576.0, 533.0, 582.0, 582.0, 530.0, 530.0, 630.0, 576.0, 516.0, 476.0, 579.0, 582.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [248.0, 274.0, 248.0, 207.0, 280.0, 302.0, 60.0, 63.0, 249.0, 227.0, 275.0, 252.0, 284.0, 295.0, 297.0, 285.0, 282.0, 285.0, 301.0, 278.0, 291.0, 288.0, 278.0, 301.0, 286.0, 293.0, 236.0, 245.0, 277.0, 299.0, 294.0, 293.0, 293.0, 289.0, 279.0, 297.0, 286.0, 293.0, 283.0, 287.0, 288.0, 291.0, 273.0, 309.0, 287.0, 286.0, 301.0, 278.0, 294.0, 285.0, 211.0, 202.0, 295.0, 284.0, 285.0, 285.0, 233.0, 232.0, 292.0, 287.0, 289.0, 290.0, 288.0, 285.0, 295.0, 292.0, 252.0, 252.0, 307.0, 323.0, 267.0, 269.0, 269.0, 261.0, 280.0, 287.0, 249.0, 281.0, 293.0, 280.0, 288.0, 288.0, 251.0, 270.0, 280.0, 290.0, 277.0, 302.0, 242.0, 259.0, 295.0, 278.0, 258.0, 264.0, 285.0, 294.0, 320.0, 310.0, 284.0, 292.0, 287.0, 283.0, 296.0, 280.0, 285.0, 294.0, 257.0, 265.0, 297.0, 279.0, 262.0, 263.0, 288.0, 288.0, 298.0, 281.0, 293.0, 274.0, 301.0, 278.0, 293.0, 283.0, 298.0, 281.0, 291.0, 291.0, 266.0, 310.0, 236.0, 268.0, 285.0, 294.0, 290.0, 283.0, 205.0, 203.0, 293.0, 280.0, 262.0, 260.0, 260.0, 244.0, 277.0, 296.0, 296.0, 283.0, 264.0, 266.0, 301.0, 281.0, 285.0, 291.0, 289.0, 290.0, 291.0, 288.0, 287.0, 283.0, 263.0, 262.0, 292.0, 281.0, 301.0, 281.0, 303.0, 327.0, 272.0, 286.0, 296.0, 280.0, 282.0, 294.0, 282.0, 294.0, 268.0, 265.0, 298.0, 284.0, 298.0, 284.0, 255.0, 275.0, 253.0, 277.0, 311.0, 319.0, 288.0, 288.0, 250.0, 266.0, 238.0, 238.0, 292.0, 287.0, 309.0, 273.0, 286.0, 287.0, 289.0, 287.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5111310718572366, "mean_processing_ms": 0.36989117514475767, "mean_inference_ms": 2.0605337474583503}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3408000, "num_steps_sampled": 1817600, "sample_time_ms": 20052.563, "load_time_ms": 36.402, "grad_time_ms": 9487.641, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00461258739233017, "policy_loss": -0.0034613541793078184, "vf_loss": 86.5114974975586, "vf_explained_var": 0.770569384098053, "kl": 0.0022539596538990736, "entropy": 1.154403805732727, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1817600, "episodes_total": 4544, "training_iteration": 142, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-55-47", "timestamp": 1660251347, "time_this_iter_s": 30.608631134033203, "time_total_s": 9765.268101453781, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9765.268101453781, "timesteps_since_restore": 1817600, "iterations_since_restore": 142, "perf": {"cpu_util_percent": 30.76046511627907, "ram_util_percent": 58.4186046511628}}
+{"episode_reward_max": 633.0, "episode_reward_min": 123.0, "episode_reward_mean": 553.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 276.845}, "custom_metrics": {"sparse_reward_mean": 191.6, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 170.49, "shaped_reward_min": 43, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.23, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.87, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.68, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.13, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.76, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.37, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.29, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.14, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.01, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.6, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.61, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.73, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.98, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.14, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.01, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.14, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.01, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 533.0, 570.0, 633.0, 582.0, 579.0, 573.0, 582.0, 123.0, 573.0, 579.0, 576.0, 579.0, 582.0, 576.0, 524.0, 573.0, 584.0, 573.0, 582.0, 579.0, 518.0, 573.0, 573.0, 564.0, 576.0, 573.0, 567.0, 633.0, 527.0, 579.0, 630.0, 504.0, 579.0, 573.0, 408.0, 573.0, 522.0, 504.0, 573.0, 579.0, 530.0, 582.0, 576.0, 579.0, 579.0, 570.0, 525.0, 573.0, 582.0, 630.0, 558.0, 576.0, 576.0, 576.0, 533.0, 582.0, 582.0, 530.0, 530.0, 630.0, 576.0, 516.0, 476.0, 579.0, 582.0, 573.0, 576.0, 522.0, 455.0, 582.0, 123.0, 476.0, 527.0, 579.0, 582.0, 567.0, 579.0, 579.0, 579.0, 579.0, 481.0, 576.0, 587.0, 582.0, 576.0, 579.0, 570.0, 579.0, 582.0, 573.0, 579.0, 579.0, 413.0, 579.0, 570.0, 465.0, 579.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 298.0, 259.0, 274.0, 285.0, 285.0, 302.0, 331.0, 280.0, 302.0, 281.0, 298.0, 286.0, 287.0, 286.0, 296.0, 60.0, 63.0, 288.0, 285.0, 295.0, 284.0, 275.0, 301.0, 280.0, 299.0, 303.0, 279.0, 289.0, 287.0, 275.0, 249.0, 292.0, 281.0, 291.0, 293.0, 292.0, 281.0, 291.0, 291.0, 288.0, 291.0, 235.0, 283.0, 293.0, 280.0, 284.0, 289.0, 276.0, 288.0, 284.0, 292.0, 294.0, 279.0, 278.0, 289.0, 301.0, 332.0, 260.0, 267.0, 278.0, 301.0, 314.0, 316.0, 236.0, 268.0, 285.0, 294.0, 290.0, 283.0, 205.0, 203.0, 293.0, 280.0, 262.0, 260.0, 260.0, 244.0, 277.0, 296.0, 296.0, 283.0, 264.0, 266.0, 301.0, 281.0, 285.0, 291.0, 289.0, 290.0, 291.0, 288.0, 287.0, 283.0, 263.0, 262.0, 292.0, 281.0, 301.0, 281.0, 303.0, 327.0, 272.0, 286.0, 296.0, 280.0, 282.0, 294.0, 282.0, 294.0, 268.0, 265.0, 298.0, 284.0, 298.0, 284.0, 255.0, 275.0, 253.0, 277.0, 311.0, 319.0, 288.0, 288.0, 250.0, 266.0, 238.0, 238.0, 292.0, 287.0, 309.0, 273.0, 286.0, 287.0, 289.0, 287.0, 248.0, 274.0, 248.0, 207.0, 280.0, 302.0, 60.0, 63.0, 249.0, 227.0, 275.0, 252.0, 284.0, 295.0, 297.0, 285.0, 282.0, 285.0, 301.0, 278.0, 291.0, 288.0, 278.0, 301.0, 286.0, 293.0, 236.0, 245.0, 277.0, 299.0, 294.0, 293.0, 293.0, 289.0, 279.0, 297.0, 286.0, 293.0, 283.0, 287.0, 288.0, 291.0, 273.0, 309.0, 287.0, 286.0, 301.0, 278.0, 294.0, 285.0, 211.0, 202.0, 295.0, 284.0, 285.0, 285.0, 233.0, 232.0, 292.0, 287.0, 289.0, 290.0, 288.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5036560426122845, "mean_processing_ms": 0.36840756428267724, "mean_inference_ms": 2.053955603003225}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3432000, "num_steps_sampled": 1830400, "sample_time_ms": 20726.741, "load_time_ms": 36.338, "grad_time_ms": 9326.632, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004940376617014408, "policy_loss": -0.002967018634080887, "vf_loss": 84.7812271118164, "vf_explained_var": 0.7767437100410461, "kl": 0.0015952900284901261, "entropy": 1.1414709091186523, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1830400, "episodes_total": 4576, "training_iteration": 143, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-56-23", "timestamp": 1660251383, "time_this_iter_s": 35.59740996360779, "time_total_s": 9800.865511417389, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9800.865511417389, "timesteps_since_restore": 1830400, "iterations_since_restore": 143, "perf": {"cpu_util_percent": 28.452, "ram_util_percent": 58.38199999999999}}
+{"episode_reward_max": 636.0, "episode_reward_min": 123.0, "episode_reward_mean": 555.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 277.9}, "custom_metrics": {"sparse_reward_mean": 192.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 171.4, "shaped_reward_min": 43, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.55, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.67, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.01, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.97, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.76, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.5, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 15.79, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.54, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.61, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.68, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.9, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.79, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.5, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 15.79, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.5, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 15.79, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 570.0, 582.0, 579.0, 582.0, 533.0, 495.0, 573.0, 533.0, 582.0, 582.0, 582.0, 522.0, 518.0, 524.0, 587.0, 579.0, 579.0, 579.0, 582.0, 579.0, 636.0, 510.0, 419.0, 570.0, 576.0, 579.0, 576.0, 573.0, 570.0, 587.0, 579.0, 582.0, 573.0, 576.0, 522.0, 455.0, 582.0, 123.0, 476.0, 527.0, 579.0, 582.0, 567.0, 579.0, 579.0, 579.0, 579.0, 481.0, 576.0, 587.0, 582.0, 576.0, 579.0, 570.0, 579.0, 582.0, 573.0, 579.0, 579.0, 413.0, 579.0, 570.0, 465.0, 579.0, 579.0, 573.0, 579.0, 533.0, 570.0, 633.0, 582.0, 579.0, 573.0, 582.0, 123.0, 573.0, 579.0, 576.0, 579.0, 582.0, 576.0, 524.0, 573.0, 584.0, 573.0, 582.0, 579.0, 518.0, 573.0, 573.0, 564.0, 576.0, 573.0, 567.0, 633.0, 527.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 282.0, 294.0, 273.0, 297.0, 286.0, 296.0, 296.0, 283.0, 280.0, 302.0, 268.0, 265.0, 235.0, 260.0, 279.0, 294.0, 266.0, 267.0, 299.0, 283.0, 292.0, 290.0, 290.0, 292.0, 263.0, 259.0, 256.0, 262.0, 261.0, 263.0, 314.0, 273.0, 298.0, 281.0, 288.0, 291.0, 297.0, 282.0, 289.0, 293.0, 277.0, 302.0, 320.0, 316.0, 261.0, 249.0, 211.0, 208.0, 299.0, 271.0, 283.0, 293.0, 280.0, 299.0, 296.0, 280.0, 288.0, 285.0, 301.0, 269.0, 278.0, 309.0, 292.0, 287.0, 309.0, 273.0, 286.0, 287.0, 289.0, 287.0, 248.0, 274.0, 248.0, 207.0, 280.0, 302.0, 60.0, 63.0, 249.0, 227.0, 275.0, 252.0, 284.0, 295.0, 297.0, 285.0, 282.0, 285.0, 301.0, 278.0, 291.0, 288.0, 278.0, 301.0, 286.0, 293.0, 236.0, 245.0, 277.0, 299.0, 294.0, 293.0, 293.0, 289.0, 279.0, 297.0, 286.0, 293.0, 283.0, 287.0, 288.0, 291.0, 273.0, 309.0, 287.0, 286.0, 301.0, 278.0, 294.0, 285.0, 211.0, 202.0, 295.0, 284.0, 285.0, 285.0, 233.0, 232.0, 292.0, 287.0, 289.0, 290.0, 288.0, 285.0, 281.0, 298.0, 259.0, 274.0, 285.0, 285.0, 302.0, 331.0, 280.0, 302.0, 281.0, 298.0, 286.0, 287.0, 286.0, 296.0, 60.0, 63.0, 288.0, 285.0, 295.0, 284.0, 275.0, 301.0, 280.0, 299.0, 303.0, 279.0, 289.0, 287.0, 275.0, 249.0, 292.0, 281.0, 291.0, 293.0, 292.0, 281.0, 291.0, 291.0, 288.0, 291.0, 235.0, 283.0, 293.0, 280.0, 284.0, 289.0, 276.0, 288.0, 284.0, 292.0, 294.0, 279.0, 278.0, 289.0, 301.0, 332.0, 260.0, 267.0, 278.0, 301.0, 314.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4962976590401342, "mean_processing_ms": 0.3669482669309259, "mean_inference_ms": 2.047611703358859}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3456000, "num_steps_sampled": 1843200, "sample_time_ms": 20697.837, "load_time_ms": 36.511, "grad_time_ms": 9260.983, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.007136467844247818, "policy_loss": -0.0009602725622244179, "vf_loss": 86.6334457397461, "vf_explained_var": 0.7632217407226562, "kl": 0.0016821371391415596, "entropy": 1.1331907510757446, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1843200, "episodes_total": 4608, "training_iteration": 144, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-56-53", "timestamp": 1660251413, "time_this_iter_s": 30.354671239852905, "time_total_s": 9831.220182657242, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9831.220182657242, "timesteps_since_restore": 1843200, "iterations_since_restore": 144, "perf": {"cpu_util_percent": 32.744186046511636, "ram_util_percent": 58.41860465116278}}
+{"episode_reward_max": 636.0, "episode_reward_min": 123.0, "episode_reward_mean": 559.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 279.92}, "custom_metrics": {"sparse_reward_mean": 194.0, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 171.84, "shaped_reward_min": 43, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.68, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.06, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.03, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.75, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.39, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.61, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 15.8, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.52, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.78, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.69, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.61, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 15.8, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.61, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 15.8, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 501.0, 582.0, 573.0, 576.0, 582.0, 570.0, 530.0, 579.0, 234.0, 576.0, 567.0, 630.0, 573.0, 582.0, 530.0, 573.0, 579.0, 576.0, 570.0, 576.0, 579.0, 522.0, 570.0, 522.0, 552.0, 576.0, 576.0, 579.0, 579.0, 579.0, 465.0, 579.0, 579.0, 573.0, 579.0, 533.0, 570.0, 633.0, 582.0, 579.0, 573.0, 582.0, 123.0, 573.0, 579.0, 576.0, 579.0, 582.0, 576.0, 524.0, 573.0, 584.0, 573.0, 582.0, 579.0, 518.0, 573.0, 573.0, 564.0, 576.0, 573.0, 567.0, 633.0, 527.0, 579.0, 630.0, 579.0, 576.0, 570.0, 582.0, 579.0, 582.0, 533.0, 495.0, 573.0, 533.0, 582.0, 582.0, 582.0, 522.0, 518.0, 524.0, 587.0, 579.0, 579.0, 579.0, 582.0, 579.0, 636.0, 510.0, 419.0, 570.0, 576.0, 579.0, 576.0, 573.0, 570.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [273.0, 306.0, 290.0, 286.0, 255.0, 246.0, 283.0, 299.0, 280.0, 293.0, 297.0, 279.0, 292.0, 290.0, 280.0, 290.0, 277.0, 253.0, 283.0, 296.0, 112.0, 122.0, 283.0, 293.0, 285.0, 282.0, 316.0, 314.0, 292.0, 281.0, 282.0, 300.0, 271.0, 259.0, 280.0, 293.0, 279.0, 300.0, 291.0, 285.0, 272.0, 298.0, 294.0, 282.0, 289.0, 290.0, 259.0, 263.0, 284.0, 286.0, 252.0, 270.0, 285.0, 267.0, 298.0, 278.0, 277.0, 299.0, 283.0, 296.0, 282.0, 297.0, 276.0, 303.0, 233.0, 232.0, 292.0, 287.0, 289.0, 290.0, 288.0, 285.0, 281.0, 298.0, 259.0, 274.0, 285.0, 285.0, 302.0, 331.0, 280.0, 302.0, 281.0, 298.0, 286.0, 287.0, 286.0, 296.0, 60.0, 63.0, 288.0, 285.0, 295.0, 284.0, 275.0, 301.0, 280.0, 299.0, 303.0, 279.0, 289.0, 287.0, 275.0, 249.0, 292.0, 281.0, 291.0, 293.0, 292.0, 281.0, 291.0, 291.0, 288.0, 291.0, 235.0, 283.0, 293.0, 280.0, 284.0, 289.0, 276.0, 288.0, 284.0, 292.0, 294.0, 279.0, 278.0, 289.0, 301.0, 332.0, 260.0, 267.0, 278.0, 301.0, 314.0, 316.0, 283.0, 296.0, 282.0, 294.0, 273.0, 297.0, 286.0, 296.0, 296.0, 283.0, 280.0, 302.0, 268.0, 265.0, 235.0, 260.0, 279.0, 294.0, 266.0, 267.0, 299.0, 283.0, 292.0, 290.0, 290.0, 292.0, 263.0, 259.0, 256.0, 262.0, 261.0, 263.0, 314.0, 273.0, 298.0, 281.0, 288.0, 291.0, 297.0, 282.0, 289.0, 293.0, 277.0, 302.0, 320.0, 316.0, 261.0, 249.0, 211.0, 208.0, 299.0, 271.0, 283.0, 293.0, 280.0, 299.0, 296.0, 280.0, 288.0, 285.0, 301.0, 269.0, 278.0, 309.0]}, "sampler_perf": {"mean_env_wait_ms": 1.489071157531513, "mean_processing_ms": 0.36552392885765655, "mean_inference_ms": 2.041836182574639}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3480000, "num_steps_sampled": 1856000, "sample_time_ms": 21121.64, "load_time_ms": 36.034, "grad_time_ms": 9412.787, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004381807986646891, "policy_loss": -0.0034314494114369154, "vf_loss": 83.83314514160156, "vf_explained_var": 0.7805802226066589, "kl": 0.0022449749521911144, "entropy": 1.1401251554489136, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1856000, "episodes_total": 4640, "training_iteration": 145, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-57-28", "timestamp": 1660251448, "time_this_iter_s": 34.93755006790161, "time_total_s": 9866.157732725143, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9866.157732725143, "timesteps_since_restore": 1856000, "iterations_since_restore": 145, "perf": {"cpu_util_percent": 34.077999999999996, "ram_util_percent": 58.46000000000001}}
+{"episode_reward_max": 636.0, "episode_reward_min": 234.0, "episode_reward_mean": 565.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 112.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 282.895}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.19, "shaped_reward_min": 74, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.64, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.49, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.05, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 15.75, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.68, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.78, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.7, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 15.75, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 15.75, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 579.0, 582.0, 522.0, 522.0, 579.0, 627.0, 582.0, 525.0, 627.0, 576.0, 579.0, 579.0, 530.0, 582.0, 582.0, 579.0, 582.0, 582.0, 519.0, 575.0, 576.0, 579.0, 582.0, 576.0, 587.0, 579.0, 633.0, 527.0, 579.0, 630.0, 579.0, 576.0, 570.0, 582.0, 579.0, 582.0, 533.0, 495.0, 573.0, 533.0, 582.0, 582.0, 582.0, 522.0, 518.0, 524.0, 587.0, 579.0, 579.0, 579.0, 582.0, 579.0, 636.0, 510.0, 419.0, 570.0, 576.0, 579.0, 576.0, 573.0, 570.0, 587.0, 579.0, 576.0, 501.0, 582.0, 573.0, 576.0, 582.0, 570.0, 530.0, 579.0, 234.0, 576.0, 567.0, 630.0, 573.0, 582.0, 530.0, 573.0, 579.0, 576.0, 570.0, 576.0, 579.0, 522.0, 570.0, 522.0, 552.0, 576.0, 576.0, 579.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 296.0, 283.0, 274.0, 305.0, 286.0, 296.0, 293.0, 286.0, 285.0, 297.0, 296.0, 283.0, 280.0, 302.0, 269.0, 253.0, 273.0, 249.0, 281.0, 298.0, 321.0, 306.0, 283.0, 299.0, 249.0, 276.0, 320.0, 307.0, 293.0, 283.0, 299.0, 280.0, 283.0, 296.0, 270.0, 260.0, 296.0, 286.0, 297.0, 285.0, 294.0, 285.0, 289.0, 293.0, 279.0, 303.0, 258.0, 261.0, 296.0, 279.0, 293.0, 283.0, 292.0, 287.0, 279.0, 303.0, 292.0, 284.0, 287.0, 300.0, 291.0, 288.0, 301.0, 332.0, 260.0, 267.0, 278.0, 301.0, 314.0, 316.0, 283.0, 296.0, 282.0, 294.0, 273.0, 297.0, 286.0, 296.0, 296.0, 283.0, 280.0, 302.0, 268.0, 265.0, 235.0, 260.0, 279.0, 294.0, 266.0, 267.0, 299.0, 283.0, 292.0, 290.0, 290.0, 292.0, 263.0, 259.0, 256.0, 262.0, 261.0, 263.0, 314.0, 273.0, 298.0, 281.0, 288.0, 291.0, 297.0, 282.0, 289.0, 293.0, 277.0, 302.0, 320.0, 316.0, 261.0, 249.0, 211.0, 208.0, 299.0, 271.0, 283.0, 293.0, 280.0, 299.0, 296.0, 280.0, 288.0, 285.0, 301.0, 269.0, 278.0, 309.0, 273.0, 306.0, 290.0, 286.0, 255.0, 246.0, 283.0, 299.0, 280.0, 293.0, 297.0, 279.0, 292.0, 290.0, 280.0, 290.0, 277.0, 253.0, 283.0, 296.0, 112.0, 122.0, 283.0, 293.0, 285.0, 282.0, 316.0, 314.0, 292.0, 281.0, 282.0, 300.0, 271.0, 259.0, 280.0, 293.0, 279.0, 300.0, 291.0, 285.0, 272.0, 298.0, 294.0, 282.0, 289.0, 290.0, 259.0, 263.0, 284.0, 286.0, 252.0, 270.0, 285.0, 267.0, 298.0, 278.0, 277.0, 299.0, 283.0, 296.0, 282.0, 297.0, 276.0, 303.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4819236352558276, "mean_processing_ms": 0.3641093342877964, "mean_inference_ms": 2.0353369545862554}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3504000, "num_steps_sampled": 1868800, "sample_time_ms": 21304.847, "load_time_ms": 36.48, "grad_time_ms": 9579.852, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003786050481721759, "policy_loss": -0.004469693172723055, "vf_loss": 88.19132232666016, "vf_explained_var": 0.7629249095916748, "kl": 0.0019031836418434978, "entropy": 1.1267800331115723, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1868800, "episodes_total": 4672, "training_iteration": 146, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-58-02", "timestamp": 1660251482, "time_this_iter_s": 33.29244089126587, "time_total_s": 9899.45017361641, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9899.45017361641, "timesteps_since_restore": 1868800, "iterations_since_restore": 146, "perf": {"cpu_util_percent": 34.12765957446809, "ram_util_percent": 58.40212765957448}}
+{"episode_reward_max": 630.0, "episode_reward_min": 234.0, "episode_reward_mean": 569.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 112.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 284.95}, "custom_metrics": {"sparse_reward_mean": 197.2, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 175.5, "shaped_reward_min": 74, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.1, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.88, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.51, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.15, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.33, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.03, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.93, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.76, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.89, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.85, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.03, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.93, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.03, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.93, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 579.0, 579.0, 582.0, 582.0, 579.0, 582.0, 582.0, 576.0, 579.0, 587.0, 581.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 536.0, 579.0, 587.0, 510.0, 582.0, 582.0, 579.0, 582.0, 570.0, 573.0, 579.0, 576.0, 576.0, 576.0, 573.0, 570.0, 587.0, 579.0, 576.0, 501.0, 582.0, 573.0, 576.0, 582.0, 570.0, 530.0, 579.0, 234.0, 576.0, 567.0, 630.0, 573.0, 582.0, 530.0, 573.0, 579.0, 576.0, 570.0, 576.0, 579.0, 522.0, 570.0, 522.0, 552.0, 576.0, 576.0, 579.0, 579.0, 579.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 579.0, 582.0, 522.0, 522.0, 579.0, 627.0, 582.0, 525.0, 627.0, 576.0, 579.0, 579.0, 530.0, 582.0, 582.0, 579.0, 582.0, 582.0, 519.0, 575.0, 576.0, 579.0, 582.0, 576.0, 587.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [312.0, 315.0, 280.0, 299.0, 291.0, 288.0, 293.0, 289.0, 285.0, 297.0, 288.0, 291.0, 283.0, 299.0, 288.0, 294.0, 294.0, 282.0, 288.0, 291.0, 304.0, 283.0, 282.0, 299.0, 270.0, 303.0, 289.0, 290.0, 278.0, 298.0, 289.0, 290.0, 288.0, 288.0, 278.0, 304.0, 296.0, 280.0, 279.0, 257.0, 297.0, 282.0, 288.0, 299.0, 240.0, 270.0, 302.0, 280.0, 293.0, 289.0, 286.0, 293.0, 301.0, 281.0, 285.0, 285.0, 296.0, 277.0, 280.0, 299.0, 288.0, 288.0, 290.0, 286.0, 296.0, 280.0, 288.0, 285.0, 301.0, 269.0, 278.0, 309.0, 273.0, 306.0, 290.0, 286.0, 255.0, 246.0, 283.0, 299.0, 280.0, 293.0, 297.0, 279.0, 292.0, 290.0, 280.0, 290.0, 277.0, 253.0, 283.0, 296.0, 112.0, 122.0, 283.0, 293.0, 285.0, 282.0, 316.0, 314.0, 292.0, 281.0, 282.0, 300.0, 271.0, 259.0, 280.0, 293.0, 279.0, 300.0, 291.0, 285.0, 272.0, 298.0, 294.0, 282.0, 289.0, 290.0, 259.0, 263.0, 284.0, 286.0, 252.0, 270.0, 285.0, 267.0, 298.0, 278.0, 277.0, 299.0, 283.0, 296.0, 282.0, 297.0, 276.0, 303.0, 291.0, 288.0, 296.0, 283.0, 274.0, 305.0, 286.0, 296.0, 293.0, 286.0, 285.0, 297.0, 296.0, 283.0, 280.0, 302.0, 269.0, 253.0, 273.0, 249.0, 281.0, 298.0, 321.0, 306.0, 283.0, 299.0, 249.0, 276.0, 320.0, 307.0, 293.0, 283.0, 299.0, 280.0, 283.0, 296.0, 270.0, 260.0, 296.0, 286.0, 297.0, 285.0, 294.0, 285.0, 289.0, 293.0, 279.0, 303.0, 258.0, 261.0, 296.0, 279.0, 293.0, 283.0, 292.0, 287.0, 279.0, 303.0, 292.0, 284.0, 287.0, 300.0, 291.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4748545298111457, "mean_processing_ms": 0.362707314353448, "mean_inference_ms": 2.028542543914196}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3528000, "num_steps_sampled": 1881600, "sample_time_ms": 21110.787, "load_time_ms": 36.619, "grad_time_ms": 9537.52, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004449079744517803, "policy_loss": -0.0040388829074800014, "vf_loss": 90.50656127929688, "vf_explained_var": 0.7546594142913818, "kl": 0.0021286073606461287, "entropy": 1.1253728866577148, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1881600, "episodes_total": 4704, "training_iteration": 147, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-58-30", "timestamp": 1660251510, "time_this_iter_s": 28.167391061782837, "time_total_s": 9927.617564678192, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9927.617564678192, "timesteps_since_restore": 1881600, "iterations_since_restore": 147, "perf": {"cpu_util_percent": 35.05, "ram_util_percent": 58.895}}
+{"episode_reward_max": 630.0, "episode_reward_min": 419.0, "episode_reward_mean": 573.52, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 206.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 286.76}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 177.12, "shaped_reward_min": 139, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.82, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.32, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.36, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.36, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.89, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.37, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.57, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.11, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.89, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.37, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.89, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.37, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 570.0, 579.0, 419.0, 567.0, 579.0, 579.0, 582.0, 579.0, 519.0, 582.0, 510.0, 582.0, 579.0, 582.0, 573.0, 630.0, 579.0, 579.0, 579.0, 624.0, 518.0, 579.0, 519.0, 579.0, 579.0, 573.0, 581.0, 576.0, 587.0, 576.0, 576.0, 579.0, 579.0, 579.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 579.0, 582.0, 522.0, 522.0, 579.0, 627.0, 582.0, 525.0, 627.0, 576.0, 579.0, 579.0, 530.0, 582.0, 582.0, 579.0, 582.0, 582.0, 519.0, 575.0, 576.0, 579.0, 582.0, 576.0, 587.0, 579.0, 627.0, 579.0, 579.0, 582.0, 582.0, 579.0, 582.0, 582.0, 576.0, 579.0, 587.0, 581.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 536.0, 579.0, 587.0, 510.0, 582.0, 582.0, 579.0, 582.0, 570.0, 573.0, 579.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [300.0, 282.0, 289.0, 293.0, 290.0, 280.0, 291.0, 288.0, 206.0, 213.0, 283.0, 284.0, 292.0, 287.0, 291.0, 288.0, 284.0, 298.0, 289.0, 290.0, 265.0, 254.0, 283.0, 299.0, 259.0, 251.0, 299.0, 283.0, 290.0, 289.0, 285.0, 297.0, 280.0, 293.0, 328.0, 302.0, 286.0, 293.0, 296.0, 283.0, 293.0, 286.0, 320.0, 304.0, 282.0, 236.0, 306.0, 273.0, 262.0, 257.0, 287.0, 292.0, 295.0, 284.0, 293.0, 280.0, 292.0, 289.0, 291.0, 285.0, 303.0, 284.0, 275.0, 301.0, 277.0, 299.0, 283.0, 296.0, 282.0, 297.0, 276.0, 303.0, 291.0, 288.0, 296.0, 283.0, 274.0, 305.0, 286.0, 296.0, 293.0, 286.0, 285.0, 297.0, 296.0, 283.0, 280.0, 302.0, 269.0, 253.0, 273.0, 249.0, 281.0, 298.0, 321.0, 306.0, 283.0, 299.0, 249.0, 276.0, 320.0, 307.0, 293.0, 283.0, 299.0, 280.0, 283.0, 296.0, 270.0, 260.0, 296.0, 286.0, 297.0, 285.0, 294.0, 285.0, 289.0, 293.0, 279.0, 303.0, 258.0, 261.0, 296.0, 279.0, 293.0, 283.0, 292.0, 287.0, 279.0, 303.0, 292.0, 284.0, 287.0, 300.0, 291.0, 288.0, 312.0, 315.0, 280.0, 299.0, 291.0, 288.0, 293.0, 289.0, 285.0, 297.0, 288.0, 291.0, 283.0, 299.0, 288.0, 294.0, 294.0, 282.0, 288.0, 291.0, 304.0, 283.0, 282.0, 299.0, 270.0, 303.0, 289.0, 290.0, 278.0, 298.0, 289.0, 290.0, 288.0, 288.0, 278.0, 304.0, 296.0, 280.0, 279.0, 257.0, 297.0, 282.0, 288.0, 299.0, 240.0, 270.0, 302.0, 280.0, 293.0, 289.0, 286.0, 293.0, 301.0, 281.0, 285.0, 285.0, 296.0, 277.0, 280.0, 299.0, 288.0, 288.0, 290.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4678650225470864, "mean_processing_ms": 0.36131377085229416, "mean_inference_ms": 2.021530568417585}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3552000, "num_steps_sampled": 1894400, "sample_time_ms": 21206.603, "load_time_ms": 36.564, "grad_time_ms": 9559.344, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005005656275898218, "policy_loss": -0.0032577281817793846, "vf_loss": 88.27960205078125, "vf_explained_var": 0.7724118232727051, "kl": 0.001774882897734642, "entropy": 1.1291688680648804, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1894400, "episodes_total": 4736, "training_iteration": 148, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-59-00", "timestamp": 1660251540, "time_this_iter_s": 30.007760047912598, "time_total_s": 9957.625324726105, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9957.625324726105, "timesteps_since_restore": 1894400, "iterations_since_restore": 148, "perf": {"cpu_util_percent": 34.11190476190476, "ram_util_percent": 58.61190476190477}}
+{"episode_reward_max": 630.0, "episode_reward_min": 180.0, "episode_reward_mean": 568.89, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 86.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 284.445}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 175.29, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.47, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.64, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.01, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.62, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.79, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.42, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.5, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.55, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.54, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.1, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.19, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.82, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.5, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.55, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.5, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.55, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 582.0, 576.0, 582.0, 582.0, 462.0, 573.0, 564.0, 582.0, 579.0, 573.0, 180.0, 582.0, 579.0, 564.0, 579.0, 564.0, 579.0, 530.0, 579.0, 576.0, 579.0, 630.0, 582.0, 579.0, 582.0, 579.0, 560.0, 524.0, 576.0, 582.0, 576.0, 587.0, 579.0, 627.0, 579.0, 579.0, 582.0, 582.0, 579.0, 582.0, 582.0, 576.0, 579.0, 587.0, 581.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 536.0, 579.0, 587.0, 510.0, 582.0, 582.0, 579.0, 582.0, 570.0, 573.0, 579.0, 576.0, 576.0, 582.0, 582.0, 570.0, 579.0, 419.0, 567.0, 579.0, 579.0, 582.0, 579.0, 519.0, 582.0, 510.0, 582.0, 579.0, 582.0, 573.0, 630.0, 579.0, 579.0, 579.0, 624.0, 518.0, 579.0, 519.0, 579.0, 579.0, 573.0, 581.0, 576.0, 587.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 298.0, 281.0, 280.0, 299.0, 291.0, 291.0, 284.0, 292.0, 288.0, 294.0, 295.0, 287.0, 230.0, 232.0, 280.0, 293.0, 267.0, 297.0, 296.0, 286.0, 291.0, 288.0, 305.0, 268.0, 86.0, 94.0, 296.0, 286.0, 288.0, 291.0, 285.0, 279.0, 281.0, 298.0, 277.0, 287.0, 293.0, 286.0, 276.0, 254.0, 274.0, 305.0, 285.0, 291.0, 299.0, 280.0, 312.0, 318.0, 288.0, 294.0, 293.0, 286.0, 294.0, 288.0, 273.0, 306.0, 282.0, 278.0, 256.0, 268.0, 280.0, 296.0, 279.0, 303.0, 292.0, 284.0, 287.0, 300.0, 291.0, 288.0, 312.0, 315.0, 280.0, 299.0, 291.0, 288.0, 293.0, 289.0, 285.0, 297.0, 288.0, 291.0, 283.0, 299.0, 288.0, 294.0, 294.0, 282.0, 288.0, 291.0, 304.0, 283.0, 282.0, 299.0, 270.0, 303.0, 289.0, 290.0, 278.0, 298.0, 289.0, 290.0, 288.0, 288.0, 278.0, 304.0, 296.0, 280.0, 279.0, 257.0, 297.0, 282.0, 288.0, 299.0, 240.0, 270.0, 302.0, 280.0, 293.0, 289.0, 286.0, 293.0, 301.0, 281.0, 285.0, 285.0, 296.0, 277.0, 280.0, 299.0, 288.0, 288.0, 290.0, 286.0, 300.0, 282.0, 289.0, 293.0, 290.0, 280.0, 291.0, 288.0, 206.0, 213.0, 283.0, 284.0, 292.0, 287.0, 291.0, 288.0, 284.0, 298.0, 289.0, 290.0, 265.0, 254.0, 283.0, 299.0, 259.0, 251.0, 299.0, 283.0, 290.0, 289.0, 285.0, 297.0, 280.0, 293.0, 328.0, 302.0, 286.0, 293.0, 296.0, 283.0, 293.0, 286.0, 320.0, 304.0, 282.0, 236.0, 306.0, 273.0, 262.0, 257.0, 287.0, 292.0, 295.0, 284.0, 293.0, 280.0, 292.0, 289.0, 291.0, 285.0, 303.0, 284.0, 275.0, 301.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4609623899935302, "mean_processing_ms": 0.3599358567345953, "mean_inference_ms": 2.0144329368432397}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3576000, "num_steps_sampled": 1907200, "sample_time_ms": 21431.224, "load_time_ms": 36.685, "grad_time_ms": 9588.21, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004353505093604326, "policy_loss": -0.003862809156998992, "vf_loss": 87.85071563720703, "vf_explained_var": 0.7780687212944031, "kl": 0.002437218790873885, "entropy": 1.137519359588623, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1907200, "episodes_total": 4768, "training_iteration": 149, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-59-30", "timestamp": 1660251570, "time_this_iter_s": 30.225661993026733, "time_total_s": 9987.850986719131, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9987.850986719131, "timesteps_since_restore": 1907200, "iterations_since_restore": 149, "perf": {"cpu_util_percent": 31.69069767441861, "ram_util_percent": 58.481395348837225}}
+{"episode_reward_max": 630.0, "episode_reward_min": 180.0, "episode_reward_mean": 565.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 86.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 282.965}, "custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.93, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.19, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.63, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.86, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.68, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.79, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.3, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.39, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.26, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.57, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.44, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.06, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.57, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.68, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.13, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.26, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.21, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.26, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.57, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.26, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.57, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 587.0, 573.0, 579.0, 522.0, 573.0, 579.0, 582.0, 570.0, 525.0, 579.0, 567.0, 576.0, 570.0, 576.0, 582.0, 582.0, 587.0, 576.0, 530.0, 630.0, 582.0, 579.0, 456.0, 579.0, 581.0, 573.0, 576.0, 516.0, 579.0, 582.0, 576.0, 573.0, 579.0, 576.0, 576.0, 582.0, 582.0, 570.0, 579.0, 419.0, 567.0, 579.0, 579.0, 582.0, 579.0, 519.0, 582.0, 510.0, 582.0, 579.0, 582.0, 573.0, 630.0, 579.0, 579.0, 579.0, 624.0, 518.0, 579.0, 519.0, 579.0, 579.0, 573.0, 581.0, 576.0, 587.0, 576.0, 579.0, 579.0, 579.0, 582.0, 576.0, 582.0, 582.0, 462.0, 573.0, 564.0, 582.0, 579.0, 573.0, 180.0, 582.0, 579.0, 564.0, 579.0, 564.0, 579.0, 530.0, 579.0, 576.0, 579.0, 630.0, 582.0, 579.0, 582.0, 579.0, 560.0, 524.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 275.0, 291.0, 296.0, 296.0, 277.0, 301.0, 278.0, 269.0, 253.0, 288.0, 285.0, 293.0, 286.0, 283.0, 299.0, 275.0, 295.0, 260.0, 265.0, 292.0, 287.0, 278.0, 289.0, 281.0, 295.0, 272.0, 298.0, 292.0, 284.0, 288.0, 294.0, 285.0, 297.0, 292.0, 295.0, 291.0, 285.0, 276.0, 254.0, 319.0, 311.0, 308.0, 274.0, 277.0, 302.0, 225.0, 231.0, 295.0, 284.0, 285.0, 296.0, 294.0, 279.0, 277.0, 299.0, 249.0, 267.0, 290.0, 289.0, 276.0, 306.0, 282.0, 294.0, 296.0, 277.0, 280.0, 299.0, 288.0, 288.0, 290.0, 286.0, 300.0, 282.0, 289.0, 293.0, 290.0, 280.0, 291.0, 288.0, 206.0, 213.0, 283.0, 284.0, 292.0, 287.0, 291.0, 288.0, 284.0, 298.0, 289.0, 290.0, 265.0, 254.0, 283.0, 299.0, 259.0, 251.0, 299.0, 283.0, 290.0, 289.0, 285.0, 297.0, 280.0, 293.0, 328.0, 302.0, 286.0, 293.0, 296.0, 283.0, 293.0, 286.0, 320.0, 304.0, 282.0, 236.0, 306.0, 273.0, 262.0, 257.0, 287.0, 292.0, 295.0, 284.0, 293.0, 280.0, 292.0, 289.0, 291.0, 285.0, 303.0, 284.0, 275.0, 301.0, 287.0, 292.0, 298.0, 281.0, 280.0, 299.0, 291.0, 291.0, 284.0, 292.0, 288.0, 294.0, 295.0, 287.0, 230.0, 232.0, 280.0, 293.0, 267.0, 297.0, 296.0, 286.0, 291.0, 288.0, 305.0, 268.0, 86.0, 94.0, 296.0, 286.0, 288.0, 291.0, 285.0, 279.0, 281.0, 298.0, 277.0, 287.0, 293.0, 286.0, 276.0, 254.0, 274.0, 305.0, 285.0, 291.0, 299.0, 280.0, 312.0, 318.0, 288.0, 294.0, 293.0, 286.0, 294.0, 288.0, 273.0, 306.0, 282.0, 278.0, 256.0, 268.0, 280.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4541632476370934, "mean_processing_ms": 0.35857867420290873, "mean_inference_ms": 2.0077131328660243}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3600000, "num_steps_sampled": 1920000, "sample_time_ms": 21437.009, "load_time_ms": 37.013, "grad_time_ms": 9883.239, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004526351112872362, "policy_loss": -0.0033871959894895554, "vf_loss": 84.81644439697266, "vf_explained_var": 0.7658727169036865, "kl": 0.002766131656244397, "entropy": 1.1361898183822632, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1920000, "episodes_total": 4800, "training_iteration": 150, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-00-01", "timestamp": 1660251601, "time_this_iter_s": 31.108325004577637, "time_total_s": 10018.95931172371, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10018.95931172371, "timesteps_since_restore": 1920000, "iterations_since_restore": 150, "perf": {"cpu_util_percent": 34.15227272727273, "ram_util_percent": 58.540909090909096}}
+{"episode_reward_max": 630.0, "episode_reward_min": 180.0, "episode_reward_mean": 565.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 86.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 282.735}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.87, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.15, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.46, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.82, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.73, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.74, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.28, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.45, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.98, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.28, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.45, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.28, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.45, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 570.0, 516.0, 425.0, 570.0, 584.0, 570.0, 573.0, 579.0, 579.0, 576.0, 579.0, 567.0, 579.0, 573.0, 579.0, 582.0, 576.0, 579.0, 576.0, 579.0, 582.0, 576.0, 582.0, 579.0, 464.0, 582.0, 582.0, 570.0, 579.0, 573.0, 579.0, 581.0, 576.0, 587.0, 576.0, 579.0, 579.0, 579.0, 582.0, 576.0, 582.0, 582.0, 462.0, 573.0, 564.0, 582.0, 579.0, 573.0, 180.0, 582.0, 579.0, 564.0, 579.0, 564.0, 579.0, 530.0, 579.0, 576.0, 579.0, 630.0, 582.0, 579.0, 582.0, 579.0, 560.0, 524.0, 576.0, 567.0, 587.0, 573.0, 579.0, 522.0, 573.0, 579.0, 582.0, 570.0, 525.0, 579.0, 567.0, 576.0, 570.0, 576.0, 582.0, 582.0, 587.0, 576.0, 530.0, 630.0, 582.0, 579.0, 456.0, 579.0, 581.0, 573.0, 576.0, 516.0, 579.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 297.0, 295.0, 275.0, 268.0, 248.0, 220.0, 205.0, 285.0, 285.0, 301.0, 283.0, 285.0, 285.0, 283.0, 290.0, 282.0, 297.0, 299.0, 280.0, 268.0, 308.0, 288.0, 291.0, 270.0, 297.0, 294.0, 285.0, 282.0, 291.0, 280.0, 299.0, 289.0, 293.0, 290.0, 286.0, 292.0, 287.0, 288.0, 288.0, 300.0, 279.0, 302.0, 280.0, 286.0, 290.0, 286.0, 296.0, 283.0, 296.0, 232.0, 232.0, 285.0, 297.0, 302.0, 280.0, 278.0, 292.0, 291.0, 288.0, 292.0, 281.0, 299.0, 280.0, 292.0, 289.0, 291.0, 285.0, 303.0, 284.0, 275.0, 301.0, 287.0, 292.0, 298.0, 281.0, 280.0, 299.0, 291.0, 291.0, 284.0, 292.0, 288.0, 294.0, 295.0, 287.0, 230.0, 232.0, 280.0, 293.0, 267.0, 297.0, 296.0, 286.0, 291.0, 288.0, 305.0, 268.0, 86.0, 94.0, 296.0, 286.0, 288.0, 291.0, 285.0, 279.0, 281.0, 298.0, 277.0, 287.0, 293.0, 286.0, 276.0, 254.0, 274.0, 305.0, 285.0, 291.0, 299.0, 280.0, 312.0, 318.0, 288.0, 294.0, 293.0, 286.0, 294.0, 288.0, 273.0, 306.0, 282.0, 278.0, 256.0, 268.0, 280.0, 296.0, 292.0, 275.0, 291.0, 296.0, 296.0, 277.0, 301.0, 278.0, 269.0, 253.0, 288.0, 285.0, 293.0, 286.0, 283.0, 299.0, 275.0, 295.0, 260.0, 265.0, 292.0, 287.0, 278.0, 289.0, 281.0, 295.0, 272.0, 298.0, 292.0, 284.0, 288.0, 294.0, 285.0, 297.0, 292.0, 295.0, 291.0, 285.0, 276.0, 254.0, 319.0, 311.0, 308.0, 274.0, 277.0, 302.0, 225.0, 231.0, 295.0, 284.0, 285.0, 296.0, 294.0, 279.0, 277.0, 299.0, 249.0, 267.0, 290.0, 289.0, 276.0, 306.0, 282.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4474584223491644, "mean_processing_ms": 0.3572431217885297, "mean_inference_ms": 2.0013634824095012}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3624000, "num_steps_sampled": 1932800, "sample_time_ms": 21691.813, "load_time_ms": 37.111, "grad_time_ms": 9921.937, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0037641674280166626, "policy_loss": -0.004076274111866951, "vf_loss": 84.02509307861328, "vf_explained_var": 0.7596387267112732, "kl": 0.001788324792869389, "entropy": 1.124145746231079, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1932800, "episodes_total": 4832, "training_iteration": 151, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-00-34", "timestamp": 1660251634, "time_this_iter_s": 32.471389293670654, "time_total_s": 10051.43070101738, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10051.43070101738, "timesteps_since_restore": 1932800, "iterations_since_restore": 151, "perf": {"cpu_util_percent": 34.02391304347826, "ram_util_percent": 58.56739130434782}}
+{"episode_reward_max": 630.0, "episode_reward_min": 402.0, "episode_reward_mean": 568.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 189.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 284.035}, "custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.27, "shaped_reward_min": 122, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.39, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.35, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.12, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.7, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.51, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.4, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.59, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.97, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.05, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.51, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.4, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.51, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.4, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 582.0, 576.0, 576.0, 582.0, 587.0, 402.0, 564.0, 582.0, 533.0, 525.0, 579.0, 576.0, 587.0, 579.0, 539.0, 582.0, 582.0, 582.0, 533.0, 570.0, 584.0, 579.0, 579.0, 582.0, 630.0, 579.0, 587.0, 582.0, 579.0, 579.0, 560.0, 524.0, 576.0, 567.0, 587.0, 573.0, 579.0, 522.0, 573.0, 579.0, 582.0, 570.0, 525.0, 579.0, 567.0, 576.0, 570.0, 576.0, 582.0, 582.0, 587.0, 576.0, 530.0, 630.0, 582.0, 579.0, 456.0, 579.0, 581.0, 573.0, 576.0, 516.0, 579.0, 582.0, 576.0, 582.0, 570.0, 516.0, 425.0, 570.0, 584.0, 570.0, 573.0, 579.0, 579.0, 576.0, 579.0, 567.0, 579.0, 573.0, 579.0, 582.0, 576.0, 579.0, 576.0, 579.0, 582.0, 576.0, 582.0, 579.0, 464.0, 582.0, 582.0, 570.0, 579.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 293.0, 286.0, 282.0, 297.0, 276.0, 306.0, 285.0, 291.0, 285.0, 291.0, 291.0, 291.0, 289.0, 298.0, 189.0, 213.0, 290.0, 274.0, 293.0, 289.0, 272.0, 261.0, 262.0, 263.0, 293.0, 286.0, 275.0, 301.0, 294.0, 293.0, 280.0, 299.0, 264.0, 275.0, 281.0, 301.0, 299.0, 283.0, 281.0, 301.0, 263.0, 270.0, 276.0, 294.0, 301.0, 283.0, 296.0, 283.0, 295.0, 284.0, 288.0, 294.0, 321.0, 309.0, 288.0, 291.0, 286.0, 301.0, 286.0, 296.0, 304.0, 275.0, 273.0, 306.0, 282.0, 278.0, 256.0, 268.0, 280.0, 296.0, 292.0, 275.0, 291.0, 296.0, 296.0, 277.0, 301.0, 278.0, 269.0, 253.0, 288.0, 285.0, 293.0, 286.0, 283.0, 299.0, 275.0, 295.0, 260.0, 265.0, 292.0, 287.0, 278.0, 289.0, 281.0, 295.0, 272.0, 298.0, 292.0, 284.0, 288.0, 294.0, 285.0, 297.0, 292.0, 295.0, 291.0, 285.0, 276.0, 254.0, 319.0, 311.0, 308.0, 274.0, 277.0, 302.0, 225.0, 231.0, 295.0, 284.0, 285.0, 296.0, 294.0, 279.0, 277.0, 299.0, 249.0, 267.0, 290.0, 289.0, 276.0, 306.0, 282.0, 294.0, 285.0, 297.0, 295.0, 275.0, 268.0, 248.0, 220.0, 205.0, 285.0, 285.0, 301.0, 283.0, 285.0, 285.0, 283.0, 290.0, 282.0, 297.0, 299.0, 280.0, 268.0, 308.0, 288.0, 291.0, 270.0, 297.0, 294.0, 285.0, 282.0, 291.0, 280.0, 299.0, 289.0, 293.0, 290.0, 286.0, 292.0, 287.0, 288.0, 288.0, 300.0, 279.0, 302.0, 280.0, 286.0, 290.0, 286.0, 296.0, 283.0, 296.0, 232.0, 232.0, 285.0, 297.0, 302.0, 280.0, 278.0, 292.0, 291.0, 288.0, 292.0, 281.0, 299.0, 280.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4408300434942674, "mean_processing_ms": 0.35592594931553234, "mean_inference_ms": 1.9949418939108405}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3648000, "num_steps_sampled": 1945600, "sample_time_ms": 21661.173, "load_time_ms": 37.278, "grad_time_ms": 9874.5, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006686341017484665, "policy_loss": -0.0018611648119986057, "vf_loss": 91.119873046875, "vf_explained_var": 0.7503556609153748, "kl": 0.002358483849093318, "entropy": 1.128965973854065, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1945600, "episodes_total": 4864, "training_iteration": 152, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-01-03", "timestamp": 1660251663, "time_this_iter_s": 29.826536893844604, "time_total_s": 10081.257237911224, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10081.257237911224, "timesteps_since_restore": 1945600, "iterations_since_restore": 152, "perf": {"cpu_util_percent": 34.71904761904763, "ram_util_percent": 58.37619047619047}}
+{"episode_reward_max": 630.0, "episode_reward_min": 402.0, "episode_reward_mean": 569.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 189.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 284.96}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.32, "shaped_reward_min": 122, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.59, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.27, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.3, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.7, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.76, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.38, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.49, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.93, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.94, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.76, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.38, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.76, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.38, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 582.0, 510.0, 579.0, 579.0, 587.0, 576.0, 582.0, 579.0, 573.0, 582.0, 573.0, 582.0, 579.0, 576.0, 522.0, 582.0, 587.0, 579.0, 576.0, 512.0, 579.0, 582.0, 587.0, 576.0, 573.0, 582.0, 582.0, 582.0, 582.0, 579.0, 516.0, 579.0, 582.0, 576.0, 582.0, 570.0, 516.0, 425.0, 570.0, 584.0, 570.0, 573.0, 579.0, 579.0, 576.0, 579.0, 567.0, 579.0, 573.0, 579.0, 582.0, 576.0, 579.0, 576.0, 579.0, 582.0, 576.0, 582.0, 579.0, 464.0, 582.0, 582.0, 570.0, 579.0, 573.0, 579.0, 579.0, 579.0, 579.0, 582.0, 576.0, 576.0, 582.0, 587.0, 402.0, 564.0, 582.0, 533.0, 525.0, 579.0, 576.0, 587.0, 579.0, 539.0, 582.0, 582.0, 582.0, 533.0, 570.0, 584.0, 579.0, 579.0, 582.0, 630.0, 579.0, 587.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 290.0, 293.0, 289.0, 285.0, 297.0, 244.0, 266.0, 290.0, 289.0, 282.0, 297.0, 309.0, 278.0, 287.0, 289.0, 293.0, 289.0, 273.0, 306.0, 294.0, 279.0, 289.0, 293.0, 280.0, 293.0, 279.0, 303.0, 281.0, 298.0, 291.0, 285.0, 262.0, 260.0, 286.0, 296.0, 293.0, 294.0, 288.0, 291.0, 283.0, 293.0, 262.0, 250.0, 285.0, 294.0, 280.0, 302.0, 296.0, 291.0, 285.0, 291.0, 290.0, 283.0, 288.0, 294.0, 291.0, 291.0, 285.0, 297.0, 288.0, 294.0, 298.0, 281.0, 249.0, 267.0, 290.0, 289.0, 276.0, 306.0, 282.0, 294.0, 285.0, 297.0, 295.0, 275.0, 268.0, 248.0, 220.0, 205.0, 285.0, 285.0, 301.0, 283.0, 285.0, 285.0, 283.0, 290.0, 282.0, 297.0, 299.0, 280.0, 268.0, 308.0, 288.0, 291.0, 270.0, 297.0, 294.0, 285.0, 282.0, 291.0, 280.0, 299.0, 289.0, 293.0, 290.0, 286.0, 292.0, 287.0, 288.0, 288.0, 300.0, 279.0, 302.0, 280.0, 286.0, 290.0, 286.0, 296.0, 283.0, 296.0, 232.0, 232.0, 285.0, 297.0, 302.0, 280.0, 278.0, 292.0, 291.0, 288.0, 292.0, 281.0, 299.0, 280.0, 283.0, 296.0, 293.0, 286.0, 282.0, 297.0, 276.0, 306.0, 285.0, 291.0, 285.0, 291.0, 291.0, 291.0, 289.0, 298.0, 189.0, 213.0, 290.0, 274.0, 293.0, 289.0, 272.0, 261.0, 262.0, 263.0, 293.0, 286.0, 275.0, 301.0, 294.0, 293.0, 280.0, 299.0, 264.0, 275.0, 281.0, 301.0, 299.0, 283.0, 281.0, 301.0, 263.0, 270.0, 276.0, 294.0, 301.0, 283.0, 296.0, 283.0, 295.0, 284.0, 288.0, 294.0, 321.0, 309.0, 288.0, 291.0, 286.0, 301.0, 286.0, 296.0, 304.0, 275.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4342721716334546, "mean_processing_ms": 0.3546223616494384, "mean_inference_ms": 1.9881963342076971}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3672000, "num_steps_sampled": 1958400, "sample_time_ms": 20770.751, "load_time_ms": 37.491, "grad_time_ms": 9746.757, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0023278051521629095, "policy_loss": -0.0060347276739776134, "vf_loss": 89.3071060180664, "vf_explained_var": 0.7670709490776062, "kl": 0.0017067408189177513, "entropy": 1.1363595724105835, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1958400, "episodes_total": 4896, "training_iteration": 153, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-01-29", "timestamp": 1660251689, "time_this_iter_s": 25.417139053344727, "time_total_s": 10106.67437696457, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10106.67437696457, "timesteps_since_restore": 1958400, "iterations_since_restore": 153, "perf": {"cpu_util_percent": 33.84166666666667, "ram_util_percent": 58.383333333333326}}
+{"episode_reward_max": 630.0, "episode_reward_min": 402.0, "episode_reward_mean": 571.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 189.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 285.78}, "custom_metrics": {"sparse_reward_mean": 197.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.76, "shaped_reward_min": 122, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.4, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.56, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.14, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.91, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.69, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.58, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.6, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.69, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.66, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.1, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.58, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.6, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.58, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.6, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 582.0, 576.0, 576.0, 579.0, 582.0, 579.0, 573.0, 576.0, 582.0, 579.0, 582.0, 579.0, 522.0, 576.0, 581.0, 510.0, 527.0, 584.0, 576.0, 573.0, 582.0, 576.0, 522.0, 576.0, 561.0, 579.0, 579.0, 579.0, 579.0, 581.0, 579.0, 570.0, 579.0, 573.0, 579.0, 579.0, 579.0, 579.0, 582.0, 576.0, 576.0, 582.0, 587.0, 402.0, 564.0, 582.0, 533.0, 525.0, 579.0, 576.0, 587.0, 579.0, 539.0, 582.0, 582.0, 582.0, 533.0, 570.0, 584.0, 579.0, 579.0, 582.0, 630.0, 579.0, 587.0, 582.0, 579.0, 579.0, 582.0, 582.0, 510.0, 579.0, 579.0, 587.0, 576.0, 582.0, 579.0, 573.0, 582.0, 573.0, 582.0, 579.0, 576.0, 522.0, 582.0, 587.0, 579.0, 576.0, 512.0, 579.0, 582.0, 587.0, 576.0, 573.0, 582.0, 582.0, 582.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 278.0, 273.0, 309.0, 290.0, 286.0, 298.0, 278.0, 282.0, 297.0, 285.0, 297.0, 293.0, 286.0, 283.0, 290.0, 294.0, 282.0, 298.0, 284.0, 294.0, 285.0, 284.0, 298.0, 285.0, 294.0, 280.0, 242.0, 292.0, 284.0, 303.0, 278.0, 262.0, 248.0, 281.0, 246.0, 290.0, 294.0, 299.0, 277.0, 282.0, 291.0, 290.0, 292.0, 282.0, 294.0, 270.0, 252.0, 277.0, 299.0, 283.0, 278.0, 280.0, 299.0, 279.0, 300.0, 282.0, 297.0, 278.0, 301.0, 297.0, 284.0, 291.0, 288.0, 278.0, 292.0, 291.0, 288.0, 292.0, 281.0, 299.0, 280.0, 283.0, 296.0, 293.0, 286.0, 282.0, 297.0, 276.0, 306.0, 285.0, 291.0, 285.0, 291.0, 291.0, 291.0, 289.0, 298.0, 189.0, 213.0, 290.0, 274.0, 293.0, 289.0, 272.0, 261.0, 262.0, 263.0, 293.0, 286.0, 275.0, 301.0, 294.0, 293.0, 280.0, 299.0, 264.0, 275.0, 281.0, 301.0, 299.0, 283.0, 281.0, 301.0, 263.0, 270.0, 276.0, 294.0, 301.0, 283.0, 296.0, 283.0, 295.0, 284.0, 288.0, 294.0, 321.0, 309.0, 288.0, 291.0, 286.0, 301.0, 286.0, 296.0, 304.0, 275.0, 289.0, 290.0, 293.0, 289.0, 285.0, 297.0, 244.0, 266.0, 290.0, 289.0, 282.0, 297.0, 309.0, 278.0, 287.0, 289.0, 293.0, 289.0, 273.0, 306.0, 294.0, 279.0, 289.0, 293.0, 280.0, 293.0, 279.0, 303.0, 281.0, 298.0, 291.0, 285.0, 262.0, 260.0, 286.0, 296.0, 293.0, 294.0, 288.0, 291.0, 283.0, 293.0, 262.0, 250.0, 285.0, 294.0, 280.0, 302.0, 296.0, 291.0, 285.0, 291.0, 290.0, 283.0, 288.0, 294.0, 291.0, 291.0, 285.0, 297.0, 288.0, 294.0, 298.0, 281.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4277901540826492, "mean_processing_ms": 0.3533349618976105, "mean_inference_ms": 1.9812328755781439}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3696000, "num_steps_sampled": 1971200, "sample_time_ms": 20717.538, "load_time_ms": 37.296, "grad_time_ms": 9558.366, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0045716362074017525, "policy_loss": -0.004014193546026945, "vf_loss": 91.47116088867188, "vf_explained_var": 0.753397524356842, "kl": 0.001791521324776113, "entropy": 1.1225804090499878, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1971200, "episodes_total": 4928, "training_iteration": 154, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-01-57", "timestamp": 1660251717, "time_this_iter_s": 27.938206911087036, "time_total_s": 10134.612583875656, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10134.612583875656, "timesteps_since_restore": 1971200, "iterations_since_restore": 154, "perf": {"cpu_util_percent": 33.69230769230769, "ram_util_percent": 58.38717948717951}}
+{"episode_reward_max": 627.0, "episode_reward_min": 510.0, "episode_reward_mean": 573.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 242.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.615}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.83, "shaped_reward_min": 150, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.36, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.54, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.06, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.93, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.53, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.7, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.76, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.62, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.84, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.15, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.53, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.7, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.53, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.7, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 576.0, 582.0, 582.0, 579.0, 576.0, 579.0, 567.0, 576.0, 579.0, 579.0, 579.0, 579.0, 579.0, 570.0, 579.0, 576.0, 627.0, 587.0, 582.0, 512.0, 564.0, 582.0, 579.0, 582.0, 527.0, 570.0, 579.0, 525.0, 582.0, 582.0, 587.0, 579.0, 587.0, 582.0, 579.0, 579.0, 582.0, 582.0, 510.0, 579.0, 579.0, 587.0, 576.0, 582.0, 579.0, 573.0, 582.0, 573.0, 582.0, 579.0, 576.0, 522.0, 582.0, 587.0, 579.0, 576.0, 512.0, 579.0, 582.0, 587.0, 576.0, 573.0, 582.0, 582.0, 582.0, 582.0, 579.0, 570.0, 582.0, 576.0, 576.0, 579.0, 582.0, 579.0, 573.0, 576.0, 582.0, 579.0, 582.0, 579.0, 522.0, 576.0, 581.0, 510.0, 527.0, 584.0, 576.0, 573.0, 582.0, 576.0, 522.0, 576.0, 561.0, 579.0, 579.0, 579.0, 579.0, 581.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 277.0, 298.0, 278.0, 287.0, 295.0, 287.0, 295.0, 295.0, 284.0, 290.0, 286.0, 285.0, 294.0, 286.0, 281.0, 293.0, 283.0, 298.0, 281.0, 288.0, 291.0, 296.0, 283.0, 293.0, 286.0, 288.0, 291.0, 287.0, 283.0, 281.0, 298.0, 294.0, 282.0, 306.0, 321.0, 301.0, 286.0, 288.0, 294.0, 260.0, 252.0, 285.0, 279.0, 288.0, 294.0, 284.0, 295.0, 281.0, 301.0, 256.0, 271.0, 290.0, 280.0, 293.0, 286.0, 260.0, 265.0, 283.0, 299.0, 289.0, 293.0, 294.0, 293.0, 288.0, 291.0, 286.0, 301.0, 286.0, 296.0, 304.0, 275.0, 289.0, 290.0, 293.0, 289.0, 285.0, 297.0, 244.0, 266.0, 290.0, 289.0, 282.0, 297.0, 309.0, 278.0, 287.0, 289.0, 293.0, 289.0, 273.0, 306.0, 294.0, 279.0, 289.0, 293.0, 280.0, 293.0, 279.0, 303.0, 281.0, 298.0, 291.0, 285.0, 262.0, 260.0, 286.0, 296.0, 293.0, 294.0, 288.0, 291.0, 283.0, 293.0, 262.0, 250.0, 285.0, 294.0, 280.0, 302.0, 296.0, 291.0, 285.0, 291.0, 290.0, 283.0, 288.0, 294.0, 291.0, 291.0, 285.0, 297.0, 288.0, 294.0, 298.0, 281.0, 292.0, 278.0, 273.0, 309.0, 290.0, 286.0, 298.0, 278.0, 282.0, 297.0, 285.0, 297.0, 293.0, 286.0, 283.0, 290.0, 294.0, 282.0, 298.0, 284.0, 294.0, 285.0, 284.0, 298.0, 285.0, 294.0, 280.0, 242.0, 292.0, 284.0, 303.0, 278.0, 262.0, 248.0, 281.0, 246.0, 290.0, 294.0, 299.0, 277.0, 282.0, 291.0, 290.0, 292.0, 282.0, 294.0, 270.0, 252.0, 277.0, 299.0, 283.0, 278.0, 280.0, 299.0, 279.0, 300.0, 282.0, 297.0, 278.0, 301.0, 297.0, 284.0, 291.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4213938109933664, "mean_processing_ms": 0.35206327984916896, "mean_inference_ms": 1.974322466189253}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3720000, "num_steps_sampled": 1984000, "sample_time_ms": 20399.728, "load_time_ms": 37.504, "grad_time_ms": 9171.651, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0077364686876535416, "policy_loss": -0.000512867234647274, "vf_loss": 88.10808563232422, "vf_explained_var": 0.7624195218086243, "kl": 0.0021189304534345865, "entropy": 1.1229437589645386, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1984000, "episodes_total": 4960, "training_iteration": 155, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-02-25", "timestamp": 1660251745, "time_this_iter_s": 27.89369297027588, "time_total_s": 10162.506276845932, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10162.506276845932, "timesteps_since_restore": 1984000, "iterations_since_restore": 155, "perf": {"cpu_util_percent": 34.404999999999994, "ram_util_percent": 58.395}}
+{"episode_reward_max": 627.0, "episode_reward_min": 462.0, "episode_reward_mean": 570.6, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 285.3}, "custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.4, "shaped_reward_min": 142, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.04, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.7, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.71, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.95, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.17, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.87, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.02, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.54, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.14, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.62, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.64, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.3, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.17, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.87, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.17, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.87, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 462.0, 578.0, 579.0, 519.0, 579.0, 582.0, 567.0, 576.0, 573.0, 573.0, 579.0, 579.0, 576.0, 579.0, 564.0, 519.0, 567.0, 579.0, 570.0, 582.0, 573.0, 579.0, 527.0, 582.0, 576.0, 573.0, 570.0, 515.0, 576.0, 582.0, 579.0, 582.0, 582.0, 582.0, 579.0, 570.0, 582.0, 576.0, 576.0, 579.0, 582.0, 579.0, 573.0, 576.0, 582.0, 579.0, 582.0, 579.0, 522.0, 576.0, 581.0, 510.0, 527.0, 584.0, 576.0, 573.0, 582.0, 576.0, 522.0, 576.0, 561.0, 579.0, 579.0, 579.0, 579.0, 581.0, 579.0, 573.0, 576.0, 582.0, 582.0, 579.0, 576.0, 579.0, 567.0, 576.0, 579.0, 579.0, 579.0, 579.0, 579.0, 570.0, 579.0, 576.0, 627.0, 587.0, 582.0, 512.0, 564.0, 582.0, 579.0, 582.0, 527.0, 570.0, 579.0, 525.0, 582.0, 582.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 294.0, 236.0, 226.0, 295.0, 283.0, 287.0, 292.0, 268.0, 251.0, 295.0, 284.0, 285.0, 297.0, 281.0, 286.0, 293.0, 283.0, 289.0, 284.0, 285.0, 288.0, 290.0, 289.0, 280.0, 299.0, 286.0, 290.0, 295.0, 284.0, 285.0, 279.0, 264.0, 255.0, 279.0, 288.0, 295.0, 284.0, 273.0, 297.0, 286.0, 296.0, 290.0, 283.0, 284.0, 295.0, 249.0, 278.0, 288.0, 294.0, 277.0, 299.0, 282.0, 291.0, 290.0, 280.0, 260.0, 255.0, 278.0, 298.0, 280.0, 302.0, 294.0, 285.0, 291.0, 291.0, 285.0, 297.0, 288.0, 294.0, 298.0, 281.0, 292.0, 278.0, 273.0, 309.0, 290.0, 286.0, 298.0, 278.0, 282.0, 297.0, 285.0, 297.0, 293.0, 286.0, 283.0, 290.0, 294.0, 282.0, 298.0, 284.0, 294.0, 285.0, 284.0, 298.0, 285.0, 294.0, 280.0, 242.0, 292.0, 284.0, 303.0, 278.0, 262.0, 248.0, 281.0, 246.0, 290.0, 294.0, 299.0, 277.0, 282.0, 291.0, 290.0, 292.0, 282.0, 294.0, 270.0, 252.0, 277.0, 299.0, 283.0, 278.0, 280.0, 299.0, 279.0, 300.0, 282.0, 297.0, 278.0, 301.0, 297.0, 284.0, 291.0, 288.0, 296.0, 277.0, 298.0, 278.0, 287.0, 295.0, 287.0, 295.0, 295.0, 284.0, 290.0, 286.0, 285.0, 294.0, 286.0, 281.0, 293.0, 283.0, 298.0, 281.0, 288.0, 291.0, 296.0, 283.0, 293.0, 286.0, 288.0, 291.0, 287.0, 283.0, 281.0, 298.0, 294.0, 282.0, 306.0, 321.0, 301.0, 286.0, 288.0, 294.0, 260.0, 252.0, 285.0, 279.0, 288.0, 294.0, 284.0, 295.0, 281.0, 301.0, 256.0, 271.0, 290.0, 280.0, 293.0, 286.0, 260.0, 265.0, 283.0, 299.0, 289.0, 293.0, 294.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4150911429857618, "mean_processing_ms": 0.3508138897349896, "mean_inference_ms": 1.9677135371948458}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3744000, "num_steps_sampled": 1996800, "sample_time_ms": 20238.789, "load_time_ms": 37.088, "grad_time_ms": 8942.988, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004439468961209059, "policy_loss": -0.0041490718722343445, "vf_loss": 91.5320053100586, "vf_explained_var": 0.7567749619483948, "kl": 0.001588103943504393, "entropy": 1.1293169260025024, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1996800, "episodes_total": 4992, "training_iteration": 156, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-02-54", "timestamp": 1660251774, "time_this_iter_s": 29.386072158813477, "time_total_s": 10191.892349004745, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10191.892349004745, "timesteps_since_restore": 1996800, "iterations_since_restore": 156, "perf": {"cpu_util_percent": 34.03658536585366, "ram_util_percent": 58.353658536585364}}
+{"episode_reward_max": 630.0, "episode_reward_min": 462.0, "episode_reward_mean": 569.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 284.82}, "custom_metrics": {"sparse_reward_mean": 197.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.84, "shaped_reward_min": 141, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.09, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.54, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.8, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.79, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.54, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.38, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.2, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.86, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.98, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.78, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 4.91, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.25, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.21, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.2, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.86, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.2, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.86, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 533.0, 564.0, 630.0, 579.0, 587.0, 579.0, 593.0, 582.0, 582.0, 584.0, 582.0, 573.0, 513.0, 570.0, 587.0, 507.0, 579.0, 564.0, 516.0, 579.0, 579.0, 576.0, 579.0, 579.0, 576.0, 579.0, 530.0, 573.0, 549.0, 501.0, 582.0, 579.0, 579.0, 581.0, 579.0, 573.0, 576.0, 582.0, 582.0, 579.0, 576.0, 579.0, 567.0, 576.0, 579.0, 579.0, 579.0, 579.0, 579.0, 570.0, 579.0, 576.0, 627.0, 587.0, 582.0, 512.0, 564.0, 582.0, 579.0, 582.0, 527.0, 570.0, 579.0, 525.0, 582.0, 582.0, 587.0, 587.0, 462.0, 578.0, 579.0, 519.0, 579.0, 582.0, 567.0, 576.0, 573.0, 573.0, 579.0, 579.0, 576.0, 579.0, 564.0, 519.0, 567.0, 579.0, 570.0, 582.0, 573.0, 579.0, 527.0, 582.0, 576.0, 573.0, 570.0, 515.0, 576.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [299.0, 283.0, 271.0, 262.0, 284.0, 280.0, 309.0, 321.0, 294.0, 285.0, 291.0, 296.0, 283.0, 296.0, 289.0, 304.0, 294.0, 288.0, 286.0, 296.0, 295.0, 289.0, 283.0, 299.0, 280.0, 293.0, 263.0, 250.0, 274.0, 296.0, 293.0, 294.0, 256.0, 251.0, 295.0, 284.0, 285.0, 279.0, 245.0, 271.0, 288.0, 291.0, 284.0, 295.0, 284.0, 292.0, 289.0, 290.0, 293.0, 286.0, 276.0, 300.0, 299.0, 280.0, 253.0, 277.0, 278.0, 295.0, 270.0, 279.0, 264.0, 237.0, 286.0, 296.0, 282.0, 297.0, 278.0, 301.0, 297.0, 284.0, 291.0, 288.0, 296.0, 277.0, 298.0, 278.0, 287.0, 295.0, 287.0, 295.0, 295.0, 284.0, 290.0, 286.0, 285.0, 294.0, 286.0, 281.0, 293.0, 283.0, 298.0, 281.0, 288.0, 291.0, 296.0, 283.0, 293.0, 286.0, 288.0, 291.0, 287.0, 283.0, 281.0, 298.0, 294.0, 282.0, 306.0, 321.0, 301.0, 286.0, 288.0, 294.0, 260.0, 252.0, 285.0, 279.0, 288.0, 294.0, 284.0, 295.0, 281.0, 301.0, 256.0, 271.0, 290.0, 280.0, 293.0, 286.0, 260.0, 265.0, 283.0, 299.0, 289.0, 293.0, 294.0, 293.0, 293.0, 294.0, 236.0, 226.0, 295.0, 283.0, 287.0, 292.0, 268.0, 251.0, 295.0, 284.0, 285.0, 297.0, 281.0, 286.0, 293.0, 283.0, 289.0, 284.0, 285.0, 288.0, 290.0, 289.0, 280.0, 299.0, 286.0, 290.0, 295.0, 284.0, 285.0, 279.0, 264.0, 255.0, 279.0, 288.0, 295.0, 284.0, 273.0, 297.0, 286.0, 296.0, 290.0, 283.0, 284.0, 295.0, 249.0, 278.0, 288.0, 294.0, 277.0, 299.0, 282.0, 291.0, 290.0, 280.0, 260.0, 255.0, 278.0, 298.0, 280.0, 302.0, 294.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4088928730851003, "mean_processing_ms": 0.34958857715576797, "mean_inference_ms": 1.9613751884714845}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3768000, "num_steps_sampled": 2009600, "sample_time_ms": 20457.246, "load_time_ms": 36.907, "grad_time_ms": 8867.46, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003602199489250779, "policy_loss": -0.004857169929891825, "vf_loss": 90.2380599975586, "vf_explained_var": 0.7651500105857849, "kl": 0.0019707006867974997, "entropy": 1.1288973093032837, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2009600, "episodes_total": 5024, "training_iteration": 157, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-03-24", "timestamp": 1660251804, "time_this_iter_s": 29.596789121627808, "time_total_s": 10221.489138126373, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10221.489138126373, "timesteps_since_restore": 2009600, "iterations_since_restore": 157, "perf": {"cpu_util_percent": 35.76428571428571, "ram_util_percent": 58.59285714285714}}
+{"episode_reward_max": 630.0, "episode_reward_min": 185.0, "episode_reward_mean": 563.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 281.585}, "custom_metrics": {"sparse_reward_mean": 195.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 172.77, "shaped_reward_min": 65, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.97, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.41, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.66, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.69, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.42, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.12, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.65, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.07, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.77, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.2, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.67, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.12, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.65, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.12, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.65, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 573.0, 495.0, 579.0, 579.0, 582.0, 570.0, 576.0, 567.0, 579.0, 582.0, 582.0, 579.0, 530.0, 573.0, 536.0, 533.0, 579.0, 185.0, 576.0, 582.0, 582.0, 544.0, 579.0, 576.0, 552.0, 570.0, 573.0, 630.0, 579.0, 495.0, 579.0, 525.0, 582.0, 582.0, 587.0, 587.0, 462.0, 578.0, 579.0, 519.0, 579.0, 582.0, 567.0, 576.0, 573.0, 573.0, 579.0, 579.0, 576.0, 579.0, 564.0, 519.0, 567.0, 579.0, 570.0, 582.0, 573.0, 579.0, 527.0, 582.0, 576.0, 573.0, 570.0, 515.0, 576.0, 582.0, 579.0, 582.0, 533.0, 564.0, 630.0, 579.0, 587.0, 579.0, 593.0, 582.0, 582.0, 584.0, 582.0, 573.0, 513.0, 570.0, 587.0, 507.0, 579.0, 564.0, 516.0, 579.0, 579.0, 576.0, 579.0, 579.0, 576.0, 579.0, 530.0, 573.0, 549.0, 501.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 300.0, 280.0, 293.0, 248.0, 247.0, 295.0, 284.0, 282.0, 297.0, 278.0, 304.0, 276.0, 294.0, 288.0, 288.0, 299.0, 268.0, 299.0, 280.0, 285.0, 297.0, 294.0, 288.0, 291.0, 288.0, 277.0, 253.0, 297.0, 276.0, 273.0, 263.0, 275.0, 258.0, 288.0, 291.0, 89.0, 96.0, 301.0, 275.0, 290.0, 292.0, 288.0, 294.0, 276.0, 268.0, 282.0, 297.0, 290.0, 286.0, 281.0, 271.0, 293.0, 277.0, 275.0, 298.0, 316.0, 314.0, 286.0, 293.0, 256.0, 239.0, 287.0, 292.0, 260.0, 265.0, 283.0, 299.0, 289.0, 293.0, 294.0, 293.0, 293.0, 294.0, 236.0, 226.0, 295.0, 283.0, 287.0, 292.0, 268.0, 251.0, 295.0, 284.0, 285.0, 297.0, 281.0, 286.0, 293.0, 283.0, 289.0, 284.0, 285.0, 288.0, 290.0, 289.0, 280.0, 299.0, 286.0, 290.0, 295.0, 284.0, 285.0, 279.0, 264.0, 255.0, 279.0, 288.0, 295.0, 284.0, 273.0, 297.0, 286.0, 296.0, 290.0, 283.0, 284.0, 295.0, 249.0, 278.0, 288.0, 294.0, 277.0, 299.0, 282.0, 291.0, 290.0, 280.0, 260.0, 255.0, 278.0, 298.0, 280.0, 302.0, 294.0, 285.0, 299.0, 283.0, 271.0, 262.0, 284.0, 280.0, 309.0, 321.0, 294.0, 285.0, 291.0, 296.0, 283.0, 296.0, 289.0, 304.0, 294.0, 288.0, 286.0, 296.0, 295.0, 289.0, 283.0, 299.0, 280.0, 293.0, 263.0, 250.0, 274.0, 296.0, 293.0, 294.0, 256.0, 251.0, 295.0, 284.0, 285.0, 279.0, 245.0, 271.0, 288.0, 291.0, 284.0, 295.0, 284.0, 292.0, 289.0, 290.0, 293.0, 286.0, 276.0, 300.0, 299.0, 280.0, 253.0, 277.0, 278.0, 295.0, 270.0, 279.0, 264.0, 237.0, 286.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4027851249897816, "mean_processing_ms": 0.3483803806446401, "mean_inference_ms": 1.9551724322503146}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3792000, "num_steps_sampled": 2022400, "sample_time_ms": 20413.446, "load_time_ms": 37.245, "grad_time_ms": 8954.782, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005850760731846094, "policy_loss": -0.002336603356525302, "vf_loss": 87.48675537109375, "vf_explained_var": 0.7656591534614563, "kl": 0.0021419422701001167, "entropy": 1.1226191520690918, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2022400, "episodes_total": 5056, "training_iteration": 158, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-03-54", "timestamp": 1660251834, "time_this_iter_s": 30.44686508178711, "time_total_s": 10251.93600320816, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10251.93600320816, "timesteps_since_restore": 2022400, "iterations_since_restore": 158, "perf": {"cpu_util_percent": 37.25348837209302, "ram_util_percent": 58.44883720930233}}
+{"episode_reward_max": 630.0, "episode_reward_min": 185.0, "episode_reward_mean": 564.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 282.115}, "custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.03, "shaped_reward_min": 65, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.88, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.41, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.6, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.77, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.4, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.08, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.67, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.19, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.95, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.36, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.31, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.28, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.08, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.67, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.08, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.67, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 582.0, 573.0, 561.0, 584.0, 489.0, 573.0, 512.0, 582.0, 582.0, 576.0, 539.0, 570.0, 519.0, 627.0, 579.0, 518.0, 516.0, 576.0, 582.0, 582.0, 587.0, 582.0, 582.0, 576.0, 582.0, 573.0, 579.0, 579.0, 582.0, 570.0, 630.0, 515.0, 576.0, 582.0, 579.0, 582.0, 533.0, 564.0, 630.0, 579.0, 587.0, 579.0, 593.0, 582.0, 582.0, 584.0, 582.0, 573.0, 513.0, 570.0, 587.0, 507.0, 579.0, 564.0, 516.0, 579.0, 579.0, 576.0, 579.0, 579.0, 576.0, 579.0, 530.0, 573.0, 549.0, 501.0, 582.0, 576.0, 573.0, 495.0, 579.0, 579.0, 582.0, 570.0, 576.0, 567.0, 579.0, 582.0, 582.0, 579.0, 530.0, 573.0, 536.0, 533.0, 579.0, 185.0, 576.0, 582.0, 582.0, 544.0, 579.0, 576.0, 552.0, 570.0, 573.0, 630.0, 579.0, 495.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [309.0, 278.0, 294.0, 288.0, 287.0, 286.0, 297.0, 264.0, 291.0, 293.0, 240.0, 249.0, 292.0, 281.0, 262.0, 250.0, 297.0, 285.0, 285.0, 297.0, 279.0, 297.0, 262.0, 277.0, 287.0, 283.0, 266.0, 253.0, 321.0, 306.0, 286.0, 293.0, 272.0, 246.0, 274.0, 242.0, 282.0, 294.0, 284.0, 298.0, 288.0, 294.0, 285.0, 302.0, 295.0, 287.0, 288.0, 294.0, 279.0, 297.0, 289.0, 293.0, 277.0, 296.0, 286.0, 293.0, 304.0, 275.0, 290.0, 292.0, 294.0, 276.0, 326.0, 304.0, 260.0, 255.0, 278.0, 298.0, 280.0, 302.0, 294.0, 285.0, 299.0, 283.0, 271.0, 262.0, 284.0, 280.0, 309.0, 321.0, 294.0, 285.0, 291.0, 296.0, 283.0, 296.0, 289.0, 304.0, 294.0, 288.0, 286.0, 296.0, 295.0, 289.0, 283.0, 299.0, 280.0, 293.0, 263.0, 250.0, 274.0, 296.0, 293.0, 294.0, 256.0, 251.0, 295.0, 284.0, 285.0, 279.0, 245.0, 271.0, 288.0, 291.0, 284.0, 295.0, 284.0, 292.0, 289.0, 290.0, 293.0, 286.0, 276.0, 300.0, 299.0, 280.0, 253.0, 277.0, 278.0, 295.0, 270.0, 279.0, 264.0, 237.0, 286.0, 296.0, 276.0, 300.0, 280.0, 293.0, 248.0, 247.0, 295.0, 284.0, 282.0, 297.0, 278.0, 304.0, 276.0, 294.0, 288.0, 288.0, 299.0, 268.0, 299.0, 280.0, 285.0, 297.0, 294.0, 288.0, 291.0, 288.0, 277.0, 253.0, 297.0, 276.0, 273.0, 263.0, 275.0, 258.0, 288.0, 291.0, 89.0, 96.0, 301.0, 275.0, 290.0, 292.0, 288.0, 294.0, 276.0, 268.0, 282.0, 297.0, 290.0, 286.0, 281.0, 271.0, 293.0, 277.0, 275.0, 298.0, 316.0, 314.0, 286.0, 293.0, 256.0, 239.0, 287.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3967794054069242, "mean_processing_ms": 0.3471925853842631, "mean_inference_ms": 1.9496805791927851}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3816000, "num_steps_sampled": 2035200, "sample_time_ms": 20765.512, "load_time_ms": 37.179, "grad_time_ms": 8845.352, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005116061773151159, "policy_loss": -0.0030946088954806328, "vf_loss": 87.75751495361328, "vf_explained_var": 0.7570715546607971, "kl": 0.0022622861433774233, "entropy": 1.1301772594451904, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2035200, "episodes_total": 5088, "training_iteration": 159, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-04-27", "timestamp": 1660251867, "time_this_iter_s": 32.650943994522095, "time_total_s": 10284.586947202682, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10284.586947202682, "timesteps_since_restore": 2035200, "iterations_since_restore": 159, "perf": {"cpu_util_percent": 33.56739130434783, "ram_util_percent": 58.49565217391306}}
+{"episode_reward_max": 630.0, "episode_reward_min": 185.0, "episode_reward_mean": 562.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 281.46}, "custom_metrics": {"sparse_reward_mean": 195.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 172.92, "shaped_reward_min": 65, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.87, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.29, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.57, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.7, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.54, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.07, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.62, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.05, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.4, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.29, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.27, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.07, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.62, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.07, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.62, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [237.0, 573.0, 590.0, 573.0, 582.0, 576.0, 579.0, 576.0, 587.0, 582.0, 582.0, 579.0, 576.0, 579.0, 521.0, 582.0, 570.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 579.0, 582.0, 579.0, 513.0, 582.0, 582.0, 582.0, 576.0, 582.0, 573.0, 549.0, 501.0, 582.0, 576.0, 573.0, 495.0, 579.0, 579.0, 582.0, 570.0, 576.0, 567.0, 579.0, 582.0, 582.0, 579.0, 530.0, 573.0, 536.0, 533.0, 579.0, 185.0, 576.0, 582.0, 582.0, 544.0, 579.0, 576.0, 552.0, 570.0, 573.0, 630.0, 579.0, 495.0, 579.0, 587.0, 582.0, 573.0, 561.0, 584.0, 489.0, 573.0, 512.0, 582.0, 582.0, 576.0, 539.0, 570.0, 519.0, 627.0, 579.0, 518.0, 516.0, 576.0, 582.0, 582.0, 587.0, 582.0, 582.0, 576.0, 582.0, 573.0, 579.0, 579.0, 582.0, 570.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [122.0, 115.0, 280.0, 293.0, 291.0, 299.0, 293.0, 280.0, 291.0, 291.0, 291.0, 285.0, 292.0, 287.0, 287.0, 289.0, 285.0, 302.0, 299.0, 283.0, 285.0, 297.0, 283.0, 296.0, 289.0, 287.0, 291.0, 288.0, 269.0, 252.0, 287.0, 295.0, 269.0, 301.0, 282.0, 294.0, 293.0, 289.0, 293.0, 289.0, 293.0, 289.0, 289.0, 293.0, 291.0, 288.0, 281.0, 298.0, 275.0, 307.0, 294.0, 285.0, 256.0, 257.0, 295.0, 287.0, 280.0, 302.0, 285.0, 297.0, 291.0, 285.0, 302.0, 280.0, 278.0, 295.0, 270.0, 279.0, 264.0, 237.0, 286.0, 296.0, 276.0, 300.0, 280.0, 293.0, 248.0, 247.0, 295.0, 284.0, 282.0, 297.0, 278.0, 304.0, 276.0, 294.0, 288.0, 288.0, 299.0, 268.0, 299.0, 280.0, 285.0, 297.0, 294.0, 288.0, 291.0, 288.0, 277.0, 253.0, 297.0, 276.0, 273.0, 263.0, 275.0, 258.0, 288.0, 291.0, 89.0, 96.0, 301.0, 275.0, 290.0, 292.0, 288.0, 294.0, 276.0, 268.0, 282.0, 297.0, 290.0, 286.0, 281.0, 271.0, 293.0, 277.0, 275.0, 298.0, 316.0, 314.0, 286.0, 293.0, 256.0, 239.0, 287.0, 292.0, 309.0, 278.0, 294.0, 288.0, 287.0, 286.0, 297.0, 264.0, 291.0, 293.0, 240.0, 249.0, 292.0, 281.0, 262.0, 250.0, 297.0, 285.0, 285.0, 297.0, 279.0, 297.0, 262.0, 277.0, 287.0, 283.0, 266.0, 253.0, 321.0, 306.0, 286.0, 293.0, 272.0, 246.0, 274.0, 242.0, 282.0, 294.0, 284.0, 298.0, 288.0, 294.0, 285.0, 302.0, 295.0, 287.0, 288.0, 294.0, 279.0, 297.0, 289.0, 293.0, 277.0, 296.0, 286.0, 293.0, 304.0, 275.0, 290.0, 292.0, 294.0, 276.0, 326.0, 304.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3908344800853478, "mean_processing_ms": 0.3460119024024818, "mean_inference_ms": 1.9441766821864475}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3840000, "num_steps_sampled": 2048000, "sample_time_ms": 20695.053, "load_time_ms": 36.846, "grad_time_ms": 8667.722, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033892595674842596, "policy_loss": -0.004953155294060707, "vf_loss": 89.03093719482422, "vf_explained_var": 0.7680574059486389, "kl": 0.0018749010050669312, "entropy": 1.1213653087615967, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2048000, "episodes_total": 5120, "training_iteration": 160, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-04-56", "timestamp": 1660251896, "time_this_iter_s": 28.625488996505737, "time_total_s": 10313.212436199188, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10313.212436199188, "timesteps_since_restore": 2048000, "iterations_since_restore": 160, "perf": {"cpu_util_percent": 34.958536585365856, "ram_util_percent": 58.548780487804876}}
+{"episode_reward_max": 630.0, "episode_reward_min": 237.0, "episode_reward_mean": 567.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 115.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 283.645}, "custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.49, "shaped_reward_min": 77, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.94, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.55, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.56, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.93, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.03, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.83, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.23, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.49, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.29, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.03, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.83, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.03, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.83, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 567.0, 582.0, 582.0, 579.0, 576.0, 587.0, 504.0, 579.0, 522.0, 516.0, 579.0, 579.0, 584.0, 587.0, 525.0, 630.0, 582.0, 408.0, 582.0, 530.0, 630.0, 570.0, 579.0, 582.0, 576.0, 582.0, 573.0, 561.0, 582.0, 576.0, 573.0, 630.0, 579.0, 495.0, 579.0, 587.0, 582.0, 573.0, 561.0, 584.0, 489.0, 573.0, 512.0, 582.0, 582.0, 576.0, 539.0, 570.0, 519.0, 627.0, 579.0, 518.0, 516.0, 576.0, 582.0, 582.0, 587.0, 582.0, 582.0, 576.0, 582.0, 573.0, 579.0, 579.0, 582.0, 570.0, 630.0, 237.0, 573.0, 590.0, 573.0, 582.0, 576.0, 579.0, 576.0, 587.0, 582.0, 582.0, 579.0, 576.0, 579.0, 521.0, 582.0, 570.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 579.0, 582.0, 579.0, 513.0, 582.0, 582.0, 582.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 289.0, 277.0, 290.0, 291.0, 291.0, 279.0, 303.0, 295.0, 284.0, 294.0, 282.0, 295.0, 292.0, 257.0, 247.0, 289.0, 290.0, 250.0, 272.0, 249.0, 267.0, 290.0, 289.0, 302.0, 277.0, 299.0, 285.0, 304.0, 283.0, 267.0, 258.0, 299.0, 331.0, 285.0, 297.0, 205.0, 203.0, 291.0, 291.0, 269.0, 261.0, 313.0, 317.0, 288.0, 282.0, 289.0, 290.0, 288.0, 294.0, 293.0, 283.0, 298.0, 284.0, 288.0, 285.0, 284.0, 277.0, 296.0, 286.0, 283.0, 293.0, 291.0, 282.0, 316.0, 314.0, 286.0, 293.0, 256.0, 239.0, 287.0, 292.0, 309.0, 278.0, 294.0, 288.0, 287.0, 286.0, 297.0, 264.0, 291.0, 293.0, 240.0, 249.0, 292.0, 281.0, 262.0, 250.0, 297.0, 285.0, 285.0, 297.0, 279.0, 297.0, 262.0, 277.0, 287.0, 283.0, 266.0, 253.0, 321.0, 306.0, 286.0, 293.0, 272.0, 246.0, 274.0, 242.0, 282.0, 294.0, 284.0, 298.0, 288.0, 294.0, 285.0, 302.0, 295.0, 287.0, 288.0, 294.0, 279.0, 297.0, 289.0, 293.0, 277.0, 296.0, 286.0, 293.0, 304.0, 275.0, 290.0, 292.0, 294.0, 276.0, 326.0, 304.0, 122.0, 115.0, 280.0, 293.0, 291.0, 299.0, 293.0, 280.0, 291.0, 291.0, 291.0, 285.0, 292.0, 287.0, 287.0, 289.0, 285.0, 302.0, 299.0, 283.0, 285.0, 297.0, 283.0, 296.0, 289.0, 287.0, 291.0, 288.0, 269.0, 252.0, 287.0, 295.0, 269.0, 301.0, 282.0, 294.0, 293.0, 289.0, 293.0, 289.0, 293.0, 289.0, 289.0, 293.0, 291.0, 288.0, 281.0, 298.0, 275.0, 307.0, 294.0, 285.0, 256.0, 257.0, 295.0, 287.0, 280.0, 302.0, 285.0, 297.0, 291.0, 285.0, 302.0, 280.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3849427098912996, "mean_processing_ms": 0.3448405344667729, "mean_inference_ms": 1.9385368397952782}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3864000, "num_steps_sampled": 2060800, "sample_time_ms": 20359.83, "load_time_ms": 36.714, "grad_time_ms": 8574.991, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003171335905790329, "policy_loss": -0.005784957204014063, "vf_loss": 95.20501708984375, "vf_explained_var": 0.7632928490638733, "kl": 0.001863123499788344, "entropy": 1.1284128427505493, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2060800, "episodes_total": 5152, "training_iteration": 161, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-05-24", "timestamp": 1660251924, "time_this_iter_s": 28.188406705856323, "time_total_s": 10341.400842905045, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10341.400842905045, "timesteps_since_restore": 2060800, "iterations_since_restore": 161, "perf": {"cpu_util_percent": 36.3875, "ram_util_percent": 58.585}}
+{"episode_reward_max": 630.0, "episode_reward_min": 237.0, "episode_reward_mean": 568.36, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 115.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 284.18}, "custom_metrics": {"sparse_reward_mean": 196.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 175.16, "shaped_reward_min": 77, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.88, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.91, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.43, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.21, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.57, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.85, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.1, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.37, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.32, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.85, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.1, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.85, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.1, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 579.0, 582.0, 587.0, 582.0, 573.0, 576.0, 579.0, 573.0, 627.0, 558.0, 570.0, 579.0, 579.0, 573.0, 581.0, 582.0, 539.0, 582.0, 579.0, 582.0, 579.0, 570.0, 567.0, 522.0, 587.0, 564.0, 584.0, 507.0, 493.0, 570.0, 579.0, 579.0, 582.0, 570.0, 630.0, 237.0, 573.0, 590.0, 573.0, 582.0, 576.0, 579.0, 576.0, 587.0, 582.0, 582.0, 579.0, 576.0, 579.0, 521.0, 582.0, 570.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 579.0, 582.0, 579.0, 513.0, 582.0, 582.0, 582.0, 576.0, 582.0, 567.0, 567.0, 582.0, 582.0, 579.0, 576.0, 587.0, 504.0, 579.0, 522.0, 516.0, 579.0, 579.0, 584.0, 587.0, 525.0, 630.0, 582.0, 408.0, 582.0, 530.0, 630.0, 570.0, 579.0, 582.0, 576.0, 582.0, 573.0, 561.0, 582.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 285.0, 291.0, 288.0, 296.0, 286.0, 293.0, 294.0, 291.0, 291.0, 287.0, 286.0, 282.0, 294.0, 290.0, 289.0, 283.0, 290.0, 313.0, 314.0, 289.0, 269.0, 284.0, 286.0, 295.0, 284.0, 283.0, 296.0, 284.0, 289.0, 284.0, 297.0, 283.0, 299.0, 268.0, 271.0, 302.0, 280.0, 294.0, 285.0, 297.0, 285.0, 293.0, 286.0, 282.0, 288.0, 302.0, 265.0, 252.0, 270.0, 291.0, 296.0, 284.0, 280.0, 285.0, 299.0, 249.0, 258.0, 250.0, 243.0, 279.0, 291.0, 280.0, 299.0, 304.0, 275.0, 290.0, 292.0, 294.0, 276.0, 326.0, 304.0, 122.0, 115.0, 280.0, 293.0, 291.0, 299.0, 293.0, 280.0, 291.0, 291.0, 291.0, 285.0, 292.0, 287.0, 287.0, 289.0, 285.0, 302.0, 299.0, 283.0, 285.0, 297.0, 283.0, 296.0, 289.0, 287.0, 291.0, 288.0, 269.0, 252.0, 287.0, 295.0, 269.0, 301.0, 282.0, 294.0, 293.0, 289.0, 293.0, 289.0, 293.0, 289.0, 289.0, 293.0, 291.0, 288.0, 281.0, 298.0, 275.0, 307.0, 294.0, 285.0, 256.0, 257.0, 295.0, 287.0, 280.0, 302.0, 285.0, 297.0, 291.0, 285.0, 302.0, 280.0, 278.0, 289.0, 277.0, 290.0, 291.0, 291.0, 279.0, 303.0, 295.0, 284.0, 294.0, 282.0, 295.0, 292.0, 257.0, 247.0, 289.0, 290.0, 250.0, 272.0, 249.0, 267.0, 290.0, 289.0, 302.0, 277.0, 299.0, 285.0, 304.0, 283.0, 267.0, 258.0, 299.0, 331.0, 285.0, 297.0, 205.0, 203.0, 291.0, 291.0, 269.0, 261.0, 313.0, 317.0, 288.0, 282.0, 289.0, 290.0, 288.0, 294.0, 293.0, 283.0, 298.0, 284.0, 288.0, 285.0, 284.0, 277.0, 296.0, 286.0, 283.0, 293.0, 291.0, 282.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3791010078296764, "mean_processing_ms": 0.34367607505559905, "mean_inference_ms": 1.932410583140313}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3888000, "num_steps_sampled": 2073600, "sample_time_ms": 20389.495, "load_time_ms": 36.868, "grad_time_ms": 8516.869, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.007552561815828085, "policy_loss": -0.0015357719967141747, "vf_loss": 96.43359375, "vf_explained_var": 0.7504541277885437, "kl": 0.0026693844702094793, "entropy": 1.110058307647705, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2073600, "episodes_total": 5184, "training_iteration": 162, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-05-53", "timestamp": 1660251953, "time_this_iter_s": 29.546289205551147, "time_total_s": 10370.947132110596, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10370.947132110596, "timesteps_since_restore": 2073600, "iterations_since_restore": 162, "perf": {"cpu_util_percent": 34.892682926829266, "ram_util_percent": 58.55365853658536}}
+{"episode_reward_max": 630.0, "episode_reward_min": 345.0, "episode_reward_mean": 568.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 284.395}, "custom_metrics": {"sparse_reward_mean": 197.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.79, "shaped_reward_min": 105, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.0, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.92, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.5, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.25, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.41, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.01, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.21, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.51, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.58, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.69, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.3, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.23, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.65, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.01, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.01, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 582.0, 470.0, 564.0, 576.0, 576.0, 576.0, 582.0, 587.0, 579.0, 582.0, 579.0, 345.0, 570.0, 576.0, 582.0, 576.0, 576.0, 579.0, 576.0, 567.0, 567.0, 582.0, 579.0, 564.0, 570.0, 579.0, 582.0, 579.0, 576.0, 627.0, 582.0, 582.0, 576.0, 582.0, 567.0, 567.0, 582.0, 582.0, 579.0, 576.0, 587.0, 504.0, 579.0, 522.0, 516.0, 579.0, 579.0, 584.0, 587.0, 525.0, 630.0, 582.0, 408.0, 582.0, 530.0, 630.0, 570.0, 579.0, 582.0, 576.0, 582.0, 573.0, 561.0, 582.0, 576.0, 573.0, 576.0, 579.0, 582.0, 587.0, 582.0, 573.0, 576.0, 579.0, 573.0, 627.0, 558.0, 570.0, 579.0, 579.0, 573.0, 581.0, 582.0, 539.0, 582.0, 579.0, 582.0, 579.0, 570.0, 567.0, 522.0, 587.0, 564.0, 584.0, 507.0, 493.0, 570.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 301.0, 285.0, 294.0, 279.0, 303.0, 227.0, 243.0, 263.0, 301.0, 280.0, 296.0, 276.0, 300.0, 283.0, 293.0, 286.0, 296.0, 301.0, 286.0, 288.0, 291.0, 288.0, 294.0, 302.0, 277.0, 173.0, 172.0, 289.0, 281.0, 285.0, 291.0, 306.0, 276.0, 280.0, 296.0, 290.0, 286.0, 293.0, 286.0, 283.0, 293.0, 280.0, 287.0, 289.0, 278.0, 298.0, 284.0, 285.0, 294.0, 280.0, 284.0, 282.0, 288.0, 281.0, 298.0, 295.0, 287.0, 282.0, 297.0, 285.0, 291.0, 306.0, 321.0, 280.0, 302.0, 285.0, 297.0, 291.0, 285.0, 302.0, 280.0, 278.0, 289.0, 277.0, 290.0, 291.0, 291.0, 279.0, 303.0, 295.0, 284.0, 294.0, 282.0, 295.0, 292.0, 257.0, 247.0, 289.0, 290.0, 250.0, 272.0, 249.0, 267.0, 290.0, 289.0, 302.0, 277.0, 299.0, 285.0, 304.0, 283.0, 267.0, 258.0, 299.0, 331.0, 285.0, 297.0, 205.0, 203.0, 291.0, 291.0, 269.0, 261.0, 313.0, 317.0, 288.0, 282.0, 289.0, 290.0, 288.0, 294.0, 293.0, 283.0, 298.0, 284.0, 288.0, 285.0, 284.0, 277.0, 296.0, 286.0, 283.0, 293.0, 291.0, 282.0, 291.0, 285.0, 291.0, 288.0, 296.0, 286.0, 293.0, 294.0, 291.0, 291.0, 287.0, 286.0, 282.0, 294.0, 290.0, 289.0, 283.0, 290.0, 313.0, 314.0, 289.0, 269.0, 284.0, 286.0, 295.0, 284.0, 283.0, 296.0, 284.0, 289.0, 284.0, 297.0, 283.0, 299.0, 268.0, 271.0, 302.0, 280.0, 294.0, 285.0, 297.0, 285.0, 293.0, 286.0, 282.0, 288.0, 302.0, 265.0, 252.0, 270.0, 291.0, 296.0, 284.0, 280.0, 285.0, 299.0, 249.0, 258.0, 250.0, 243.0, 279.0, 291.0, 280.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3733429258954066, "mean_processing_ms": 0.3425317863430243, "mean_inference_ms": 1.9266299653449164}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3912000, "num_steps_sampled": 2086400, "sample_time_ms": 20824.632, "load_time_ms": 36.596, "grad_time_ms": 8758.37, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004362883511930704, "policy_loss": -0.003907353617250919, "vf_loss": 88.24420166015625, "vf_explained_var": 0.7741295695304871, "kl": 0.002105970401316881, "entropy": 1.1083542108535767, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2086400, "episodes_total": 5216, "training_iteration": 163, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-06-26", "timestamp": 1660251986, "time_this_iter_s": 32.18382000923157, "time_total_s": 10403.130952119827, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10403.130952119827, "timesteps_since_restore": 2086400, "iterations_since_restore": 163, "perf": {"cpu_util_percent": 33.87608695652174, "ram_util_percent": 58.582608695652176}}
+{"episode_reward_max": 630.0, "episode_reward_min": 336.0, "episode_reward_mean": 565.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 162.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 282.99}, "custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 173.98, "shaped_reward_min": 96, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.87, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.89, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.41, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.2, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.72, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.58, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.0, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.12, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.15, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.31, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.26, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.0, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.0, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 587.0, 501.0, 579.0, 573.0, 558.0, 582.0, 525.0, 579.0, 430.0, 576.0, 530.0, 519.0, 579.0, 587.0, 582.0, 336.0, 570.0, 465.0, 582.0, 582.0, 582.0, 573.0, 582.0, 576.0, 582.0, 582.0, 627.0, 587.0, 630.0, 579.0, 579.0, 561.0, 582.0, 576.0, 573.0, 576.0, 579.0, 582.0, 587.0, 582.0, 573.0, 576.0, 579.0, 573.0, 627.0, 558.0, 570.0, 579.0, 579.0, 573.0, 581.0, 582.0, 539.0, 582.0, 579.0, 582.0, 579.0, 570.0, 567.0, 522.0, 587.0, 564.0, 584.0, 507.0, 493.0, 570.0, 579.0, 582.0, 579.0, 582.0, 470.0, 564.0, 576.0, 576.0, 576.0, 582.0, 587.0, 579.0, 582.0, 579.0, 345.0, 570.0, 576.0, 582.0, 576.0, 576.0, 579.0, 576.0, 567.0, 567.0, 582.0, 579.0, 564.0, 570.0, 579.0, 582.0, 579.0, 576.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 295.0, 292.0, 255.0, 246.0, 279.0, 300.0, 283.0, 290.0, 285.0, 273.0, 301.0, 281.0, 264.0, 261.0, 285.0, 294.0, 218.0, 212.0, 286.0, 290.0, 263.0, 267.0, 272.0, 247.0, 300.0, 279.0, 304.0, 283.0, 301.0, 281.0, 162.0, 174.0, 279.0, 291.0, 231.0, 234.0, 276.0, 306.0, 286.0, 296.0, 281.0, 301.0, 288.0, 285.0, 290.0, 292.0, 291.0, 285.0, 298.0, 284.0, 296.0, 286.0, 311.0, 316.0, 290.0, 297.0, 319.0, 311.0, 289.0, 290.0, 293.0, 286.0, 284.0, 277.0, 296.0, 286.0, 283.0, 293.0, 291.0, 282.0, 291.0, 285.0, 291.0, 288.0, 296.0, 286.0, 293.0, 294.0, 291.0, 291.0, 287.0, 286.0, 282.0, 294.0, 290.0, 289.0, 283.0, 290.0, 313.0, 314.0, 289.0, 269.0, 284.0, 286.0, 295.0, 284.0, 283.0, 296.0, 284.0, 289.0, 284.0, 297.0, 283.0, 299.0, 268.0, 271.0, 302.0, 280.0, 294.0, 285.0, 297.0, 285.0, 293.0, 286.0, 282.0, 288.0, 302.0, 265.0, 252.0, 270.0, 291.0, 296.0, 284.0, 280.0, 285.0, 299.0, 249.0, 258.0, 250.0, 243.0, 279.0, 291.0, 280.0, 299.0, 281.0, 301.0, 285.0, 294.0, 279.0, 303.0, 227.0, 243.0, 263.0, 301.0, 280.0, 296.0, 276.0, 300.0, 283.0, 293.0, 286.0, 296.0, 301.0, 286.0, 288.0, 291.0, 288.0, 294.0, 302.0, 277.0, 173.0, 172.0, 289.0, 281.0, 285.0, 291.0, 306.0, 276.0, 280.0, 296.0, 290.0, 286.0, 293.0, 286.0, 283.0, 293.0, 280.0, 287.0, 289.0, 278.0, 298.0, 284.0, 285.0, 294.0, 280.0, 284.0, 282.0, 288.0, 281.0, 298.0, 295.0, 287.0, 282.0, 297.0, 285.0, 291.0, 306.0, 321.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3676747484724832, "mean_processing_ms": 0.341408599904806, "mean_inference_ms": 1.9212242933819834}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3936000, "num_steps_sampled": 2099200, "sample_time_ms": 20928.906, "load_time_ms": 37.45, "grad_time_ms": 9167.09, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0035528007429093122, "policy_loss": -0.005154869984835386, "vf_loss": 92.63870239257812, "vf_explained_var": 0.7672746181488037, "kl": 0.0020837958436459303, "entropy": 1.1124038696289062, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2099200, "episodes_total": 5248, "training_iteration": 164, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-06-59", "timestamp": 1660252019, "time_this_iter_s": 33.07875204086304, "time_total_s": 10436.20970416069, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10436.20970416069, "timesteps_since_restore": 2099200, "iterations_since_restore": 164, "perf": {"cpu_util_percent": 35.41276595744681, "ram_util_percent": 58.536170212765946}}
+{"episode_reward_max": 633.0, "episode_reward_min": 336.0, "episode_reward_mean": 565.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 162.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 282.81}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.02, "shaped_reward_min": 96, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.68, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.95, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.35, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.25, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.29, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.78, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.06, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.36, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.37, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.36, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.55, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.78, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.06, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.78, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.06, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 579.0, 576.0, 573.0, 587.0, 573.0, 633.0, 630.0, 630.0, 573.0, 582.0, 582.0, 582.0, 516.0, 579.0, 582.0, 627.0, 576.0, 416.0, 582.0, 582.0, 579.0, 558.0, 582.0, 576.0, 576.0, 525.0, 579.0, 579.0, 513.0, 582.0, 582.0, 507.0, 493.0, 570.0, 579.0, 582.0, 579.0, 582.0, 470.0, 564.0, 576.0, 576.0, 576.0, 582.0, 587.0, 579.0, 582.0, 579.0, 345.0, 570.0, 576.0, 582.0, 576.0, 576.0, 579.0, 576.0, 567.0, 567.0, 582.0, 579.0, 564.0, 570.0, 579.0, 582.0, 579.0, 576.0, 627.0, 579.0, 587.0, 501.0, 579.0, 573.0, 558.0, 582.0, 525.0, 579.0, 430.0, 576.0, 530.0, 519.0, 579.0, 587.0, 582.0, 336.0, 570.0, 465.0, 582.0, 582.0, 582.0, 573.0, 582.0, 576.0, 582.0, 582.0, 627.0, 587.0, 630.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 290.0, 283.0, 296.0, 295.0, 281.0, 305.0, 268.0, 297.0, 290.0, 296.0, 277.0, 314.0, 319.0, 319.0, 311.0, 308.0, 322.0, 291.0, 282.0, 296.0, 286.0, 297.0, 285.0, 285.0, 297.0, 266.0, 250.0, 295.0, 284.0, 291.0, 291.0, 324.0, 303.0, 298.0, 278.0, 213.0, 203.0, 287.0, 295.0, 296.0, 286.0, 283.0, 296.0, 279.0, 279.0, 285.0, 297.0, 282.0, 294.0, 285.0, 291.0, 274.0, 251.0, 293.0, 286.0, 289.0, 290.0, 267.0, 246.0, 283.0, 299.0, 299.0, 283.0, 249.0, 258.0, 250.0, 243.0, 279.0, 291.0, 280.0, 299.0, 281.0, 301.0, 285.0, 294.0, 279.0, 303.0, 227.0, 243.0, 263.0, 301.0, 280.0, 296.0, 276.0, 300.0, 283.0, 293.0, 286.0, 296.0, 301.0, 286.0, 288.0, 291.0, 288.0, 294.0, 302.0, 277.0, 173.0, 172.0, 289.0, 281.0, 285.0, 291.0, 306.0, 276.0, 280.0, 296.0, 290.0, 286.0, 293.0, 286.0, 283.0, 293.0, 280.0, 287.0, 289.0, 278.0, 298.0, 284.0, 285.0, 294.0, 280.0, 284.0, 282.0, 288.0, 281.0, 298.0, 295.0, 287.0, 282.0, 297.0, 285.0, 291.0, 306.0, 321.0, 287.0, 292.0, 295.0, 292.0, 255.0, 246.0, 279.0, 300.0, 283.0, 290.0, 285.0, 273.0, 301.0, 281.0, 264.0, 261.0, 285.0, 294.0, 218.0, 212.0, 286.0, 290.0, 263.0, 267.0, 272.0, 247.0, 300.0, 279.0, 304.0, 283.0, 301.0, 281.0, 162.0, 174.0, 279.0, 291.0, 231.0, 234.0, 276.0, 306.0, 286.0, 296.0, 281.0, 301.0, 288.0, 285.0, 290.0, 292.0, 291.0, 285.0, 298.0, 284.0, 296.0, 286.0, 311.0, 316.0, 290.0, 297.0, 319.0, 311.0, 289.0, 290.0, 293.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3620936512767547, "mean_processing_ms": 0.34030575499793914, "mean_inference_ms": 1.9161448666876886}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3960000, "num_steps_sampled": 2112000, "sample_time_ms": 21120.168, "load_time_ms": 37.608, "grad_time_ms": 9339.122, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019619378726929426, "policy_loss": -0.006335819140076637, "vf_loss": 88.54428100585938, "vf_explained_var": 0.7676218152046204, "kl": 0.0017338074976578355, "entropy": 1.1133431196212769, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2112000, "episodes_total": 5280, "training_iteration": 165, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-07-30", "timestamp": 1660252050, "time_this_iter_s": 31.535957098007202, "time_total_s": 10467.745661258698, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10467.745661258698, "timesteps_since_restore": 2112000, "iterations_since_restore": 165, "perf": {"cpu_util_percent": 34.425000000000004, "ram_util_percent": 58.65909090909092}}
+{"episode_reward_max": 633.0, "episode_reward_min": 336.0, "episode_reward_mean": 569.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 162.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.81}, "custom_metrics": {"sparse_reward_mean": 197.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 175.22, "shaped_reward_min": 96, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.54, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.19, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.24, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.44, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.67, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.3, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.43, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.38, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.34, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.67, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.3, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.67, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.3, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 567.0, 536.0, 579.0, 582.0, 582.0, 567.0, 587.0, 522.0, 582.0, 579.0, 578.0, 570.0, 582.0, 582.0, 582.0, 582.0, 582.0, 582.0, 570.0, 573.0, 573.0, 564.0, 576.0, 576.0, 573.0, 570.0, 579.0, 573.0, 579.0, 570.0, 579.0, 582.0, 579.0, 576.0, 627.0, 579.0, 587.0, 501.0, 579.0, 573.0, 558.0, 582.0, 525.0, 579.0, 430.0, 576.0, 530.0, 519.0, 579.0, 587.0, 582.0, 336.0, 570.0, 465.0, 582.0, 582.0, 582.0, 573.0, 582.0, 576.0, 582.0, 582.0, 627.0, 587.0, 630.0, 579.0, 579.0, 576.0, 579.0, 576.0, 573.0, 587.0, 573.0, 633.0, 630.0, 630.0, 573.0, 582.0, 582.0, 582.0, 516.0, 579.0, 582.0, 627.0, 576.0, 416.0, 582.0, 582.0, 579.0, 558.0, 582.0, 576.0, 576.0, 525.0, 579.0, 579.0, 513.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 289.0, 284.0, 283.0, 261.0, 275.0, 289.0, 290.0, 283.0, 299.0, 299.0, 283.0, 275.0, 292.0, 298.0, 289.0, 252.0, 270.0, 293.0, 289.0, 303.0, 276.0, 290.0, 288.0, 293.0, 277.0, 278.0, 304.0, 285.0, 297.0, 301.0, 281.0, 297.0, 285.0, 281.0, 301.0, 283.0, 299.0, 283.0, 287.0, 292.0, 281.0, 287.0, 286.0, 273.0, 291.0, 296.0, 280.0, 281.0, 295.0, 290.0, 283.0, 296.0, 274.0, 297.0, 282.0, 297.0, 276.0, 303.0, 276.0, 292.0, 278.0, 284.0, 295.0, 295.0, 287.0, 282.0, 297.0, 285.0, 291.0, 306.0, 321.0, 287.0, 292.0, 295.0, 292.0, 255.0, 246.0, 279.0, 300.0, 283.0, 290.0, 285.0, 273.0, 301.0, 281.0, 264.0, 261.0, 285.0, 294.0, 218.0, 212.0, 286.0, 290.0, 263.0, 267.0, 272.0, 247.0, 300.0, 279.0, 304.0, 283.0, 301.0, 281.0, 162.0, 174.0, 279.0, 291.0, 231.0, 234.0, 276.0, 306.0, 286.0, 296.0, 281.0, 301.0, 288.0, 285.0, 290.0, 292.0, 291.0, 285.0, 298.0, 284.0, 296.0, 286.0, 311.0, 316.0, 290.0, 297.0, 319.0, 311.0, 289.0, 290.0, 293.0, 286.0, 286.0, 290.0, 283.0, 296.0, 295.0, 281.0, 305.0, 268.0, 297.0, 290.0, 296.0, 277.0, 314.0, 319.0, 319.0, 311.0, 308.0, 322.0, 291.0, 282.0, 296.0, 286.0, 297.0, 285.0, 285.0, 297.0, 266.0, 250.0, 295.0, 284.0, 291.0, 291.0, 324.0, 303.0, 298.0, 278.0, 213.0, 203.0, 287.0, 295.0, 296.0, 286.0, 283.0, 296.0, 279.0, 279.0, 285.0, 297.0, 282.0, 294.0, 285.0, 291.0, 274.0, 251.0, 293.0, 286.0, 289.0, 290.0, 267.0, 246.0, 283.0, 299.0, 299.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3565665278792982, "mean_processing_ms": 0.33921520895760066, "mean_inference_ms": 1.9109392629722073}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3984000, "num_steps_sampled": 2124800, "sample_time_ms": 21230.051, "load_time_ms": 38.109, "grad_time_ms": 9623.189, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006461843382567167, "policy_loss": -0.0018003573641180992, "vf_loss": 88.1545181274414, "vf_explained_var": 0.7546200752258301, "kl": 0.00197615590877831, "entropy": 1.106500267982483, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2124800, "episodes_total": 5312, "training_iteration": 166, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-08-04", "timestamp": 1660252084, "time_this_iter_s": 33.33124303817749, "time_total_s": 10501.076904296875, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10501.076904296875, "timesteps_since_restore": 2124800, "iterations_since_restore": 166, "perf": {"cpu_util_percent": 33.48510638297872, "ram_util_percent": 58.49574468085109}}
+{"episode_reward_max": 633.0, "episode_reward_min": 416.0, "episode_reward_mean": 574.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 203.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 287.395}, "custom_metrics": {"sparse_reward_mean": 198.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 177.19, "shaped_reward_min": 136, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.54, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.32, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.6, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.86, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.42, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.78, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.32, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.3, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.7, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.25, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.86, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.42, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.86, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.42, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 576.0, 582.0, 573.0, 576.0, 576.0, 576.0, 582.0, 579.0, 579.0, 582.0, 582.0, 582.0, 579.0, 579.0, 582.0, 530.0, 579.0, 582.0, 579.0, 579.0, 525.0, 582.0, 579.0, 582.0, 582.0, 576.0, 582.0, 582.0, 522.0, 579.0, 587.0, 630.0, 579.0, 579.0, 576.0, 579.0, 576.0, 573.0, 587.0, 573.0, 633.0, 630.0, 630.0, 573.0, 582.0, 582.0, 582.0, 516.0, 579.0, 582.0, 627.0, 576.0, 416.0, 582.0, 582.0, 579.0, 558.0, 582.0, 576.0, 576.0, 525.0, 579.0, 579.0, 513.0, 582.0, 582.0, 573.0, 567.0, 536.0, 579.0, 582.0, 582.0, 567.0, 587.0, 522.0, 582.0, 579.0, 578.0, 570.0, 582.0, 582.0, 582.0, 582.0, 582.0, 582.0, 570.0, 573.0, 573.0, 564.0, 576.0, 576.0, 573.0, 570.0, 579.0, 573.0, 579.0, 570.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 301.0, 281.0, 293.0, 283.0, 294.0, 288.0, 288.0, 285.0, 292.0, 284.0, 282.0, 294.0, 291.0, 285.0, 293.0, 289.0, 292.0, 287.0, 288.0, 291.0, 301.0, 281.0, 295.0, 287.0, 291.0, 291.0, 297.0, 282.0, 294.0, 285.0, 293.0, 289.0, 269.0, 261.0, 279.0, 300.0, 298.0, 284.0, 270.0, 309.0, 288.0, 291.0, 263.0, 262.0, 293.0, 289.0, 303.0, 276.0, 286.0, 296.0, 301.0, 281.0, 286.0, 290.0, 296.0, 286.0, 300.0, 282.0, 272.0, 250.0, 303.0, 276.0, 290.0, 297.0, 319.0, 311.0, 289.0, 290.0, 293.0, 286.0, 286.0, 290.0, 283.0, 296.0, 295.0, 281.0, 305.0, 268.0, 297.0, 290.0, 296.0, 277.0, 314.0, 319.0, 319.0, 311.0, 308.0, 322.0, 291.0, 282.0, 296.0, 286.0, 297.0, 285.0, 285.0, 297.0, 266.0, 250.0, 295.0, 284.0, 291.0, 291.0, 324.0, 303.0, 298.0, 278.0, 213.0, 203.0, 287.0, 295.0, 296.0, 286.0, 283.0, 296.0, 279.0, 279.0, 285.0, 297.0, 282.0, 294.0, 285.0, 291.0, 274.0, 251.0, 293.0, 286.0, 289.0, 290.0, 267.0, 246.0, 283.0, 299.0, 299.0, 283.0, 284.0, 289.0, 284.0, 283.0, 261.0, 275.0, 289.0, 290.0, 283.0, 299.0, 299.0, 283.0, 275.0, 292.0, 298.0, 289.0, 252.0, 270.0, 293.0, 289.0, 303.0, 276.0, 290.0, 288.0, 293.0, 277.0, 278.0, 304.0, 285.0, 297.0, 301.0, 281.0, 297.0, 285.0, 281.0, 301.0, 283.0, 299.0, 283.0, 287.0, 292.0, 281.0, 287.0, 286.0, 273.0, 291.0, 296.0, 280.0, 281.0, 295.0, 290.0, 283.0, 296.0, 274.0, 297.0, 282.0, 297.0, 276.0, 303.0, 276.0, 292.0, 278.0, 284.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.351083451607247, "mean_processing_ms": 0.3381304952990823, "mean_inference_ms": 1.9054854328203157}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4008000, "num_steps_sampled": 2137600, "sample_time_ms": 21012.721, "load_time_ms": 38.367, "grad_time_ms": 10014.737, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006168690975755453, "policy_loss": -0.002181840827688575, "vf_loss": 88.96065521240234, "vf_explained_var": 0.762434184551239, "kl": 0.0017693521222099662, "entropy": 1.0910512208938599, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2137600, "episodes_total": 5344, "training_iteration": 167, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-08-35", "timestamp": 1660252115, "time_this_iter_s": 31.34629511833191, "time_total_s": 10532.423199415207, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10532.423199415207, "timesteps_since_restore": 2137600, "iterations_since_restore": 167, "perf": {"cpu_util_percent": 33.334090909090904, "ram_util_percent": 58.479545454545466}}
+{"episode_reward_max": 633.0, "episode_reward_min": 452.0, "episode_reward_mean": 571.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 285.905}, "custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.21, "shaped_reward_min": 132, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.71, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.73, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.4, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.13, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.01, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.11, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.25, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.41, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.43, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.39, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.01, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.11, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.01, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.11, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 570.0, 525.0, 582.0, 579.0, 452.0, 579.0, 567.0, 530.0, 576.0, 582.0, 582.0, 525.0, 576.0, 627.0, 573.0, 573.0, 527.0, 576.0, 579.0, 564.0, 633.0, 582.0, 567.0, 536.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 576.0, 579.0, 513.0, 582.0, 582.0, 573.0, 567.0, 536.0, 579.0, 582.0, 582.0, 567.0, 587.0, 522.0, 582.0, 579.0, 578.0, 570.0, 582.0, 582.0, 582.0, 582.0, 582.0, 582.0, 570.0, 573.0, 573.0, 564.0, 576.0, 576.0, 573.0, 570.0, 579.0, 573.0, 579.0, 570.0, 579.0, 579.0, 582.0, 576.0, 582.0, 573.0, 576.0, 576.0, 576.0, 582.0, 579.0, 579.0, 582.0, 582.0, 582.0, 579.0, 579.0, 582.0, 530.0, 579.0, 582.0, 579.0, 579.0, 525.0, 582.0, 579.0, 582.0, 582.0, 576.0, 582.0, 582.0, 522.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 299.0, 292.0, 278.0, 255.0, 270.0, 287.0, 295.0, 292.0, 287.0, 223.0, 229.0, 274.0, 305.0, 272.0, 295.0, 272.0, 258.0, 280.0, 296.0, 291.0, 291.0, 288.0, 294.0, 269.0, 256.0, 294.0, 282.0, 312.0, 315.0, 291.0, 282.0, 302.0, 271.0, 261.0, 266.0, 281.0, 295.0, 298.0, 281.0, 280.0, 284.0, 326.0, 307.0, 296.0, 286.0, 288.0, 279.0, 267.0, 269.0, 298.0, 281.0, 288.0, 291.0, 291.0, 288.0, 294.0, 285.0, 275.0, 301.0, 294.0, 282.0, 303.0, 273.0, 289.0, 290.0, 267.0, 246.0, 283.0, 299.0, 299.0, 283.0, 284.0, 289.0, 284.0, 283.0, 261.0, 275.0, 289.0, 290.0, 283.0, 299.0, 299.0, 283.0, 275.0, 292.0, 298.0, 289.0, 252.0, 270.0, 293.0, 289.0, 303.0, 276.0, 290.0, 288.0, 293.0, 277.0, 278.0, 304.0, 285.0, 297.0, 301.0, 281.0, 297.0, 285.0, 281.0, 301.0, 283.0, 299.0, 283.0, 287.0, 292.0, 281.0, 287.0, 286.0, 273.0, 291.0, 296.0, 280.0, 281.0, 295.0, 290.0, 283.0, 296.0, 274.0, 297.0, 282.0, 297.0, 276.0, 303.0, 276.0, 292.0, 278.0, 284.0, 295.0, 283.0, 296.0, 301.0, 281.0, 293.0, 283.0, 294.0, 288.0, 288.0, 285.0, 292.0, 284.0, 282.0, 294.0, 291.0, 285.0, 293.0, 289.0, 292.0, 287.0, 288.0, 291.0, 301.0, 281.0, 295.0, 287.0, 291.0, 291.0, 297.0, 282.0, 294.0, 285.0, 293.0, 289.0, 269.0, 261.0, 279.0, 300.0, 298.0, 284.0, 270.0, 309.0, 288.0, 291.0, 263.0, 262.0, 293.0, 289.0, 303.0, 276.0, 286.0, 296.0, 301.0, 281.0, 286.0, 290.0, 296.0, 286.0, 300.0, 282.0, 272.0, 250.0, 303.0, 276.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3456449831196562, "mean_processing_ms": 0.3370529700799381, "mean_inference_ms": 1.8997987412977424}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4032000, "num_steps_sampled": 2150400, "sample_time_ms": 20935.463, "load_time_ms": 38.009, "grad_time_ms": 9985.412, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0029837340116500854, "policy_loss": -0.005580740049481392, "vf_loss": 91.1910629272461, "vf_explained_var": 0.7490768432617188, "kl": 0.0017398769268766046, "entropy": 1.1092572212219238, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2150400, "episodes_total": 5376, "training_iteration": 168, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-09-04", "timestamp": 1660252144, "time_this_iter_s": 29.37734818458557, "time_total_s": 10561.800547599792, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10561.800547599792, "timesteps_since_restore": 2150400, "iterations_since_restore": 168, "perf": {"cpu_util_percent": 32.31428571428572, "ram_util_percent": 58.38571428571428}}
+{"episode_reward_max": 633.0, "episode_reward_min": 452.0, "episode_reward_mean": 570.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 285.355}, "custom_metrics": {"sparse_reward_mean": 197.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.91, "shaped_reward_min": 132, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.72, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.74, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.45, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.19, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.44, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.94, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.14, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.07, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.52, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.35, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.5, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.94, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.14, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.94, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.14, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 522.0, 576.0, 576.0, 582.0, 533.0, 536.0, 573.0, 570.0, 576.0, 579.0, 582.0, 576.0, 516.0, 579.0, 579.0, 579.0, 504.0, 582.0, 558.0, 579.0, 579.0, 576.0, 582.0, 582.0, 573.0, 573.0, 582.0, 576.0, 579.0, 579.0, 573.0, 579.0, 570.0, 579.0, 579.0, 582.0, 576.0, 582.0, 573.0, 576.0, 576.0, 576.0, 582.0, 579.0, 579.0, 582.0, 582.0, 582.0, 579.0, 579.0, 582.0, 530.0, 579.0, 582.0, 579.0, 579.0, 525.0, 582.0, 579.0, 582.0, 582.0, 576.0, 582.0, 582.0, 522.0, 579.0, 582.0, 570.0, 525.0, 582.0, 579.0, 452.0, 579.0, 567.0, 530.0, 576.0, 582.0, 582.0, 525.0, 576.0, 627.0, 573.0, 573.0, 527.0, 576.0, 579.0, 564.0, 633.0, 582.0, 567.0, 536.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 280.0, 299.0, 262.0, 260.0, 283.0, 293.0, 284.0, 292.0, 293.0, 289.0, 249.0, 284.0, 273.0, 263.0, 274.0, 299.0, 288.0, 282.0, 297.0, 279.0, 296.0, 283.0, 273.0, 309.0, 285.0, 291.0, 251.0, 265.0, 285.0, 294.0, 283.0, 296.0, 280.0, 299.0, 249.0, 255.0, 296.0, 286.0, 284.0, 274.0, 293.0, 286.0, 287.0, 292.0, 292.0, 284.0, 283.0, 299.0, 289.0, 293.0, 275.0, 298.0, 290.0, 283.0, 285.0, 297.0, 290.0, 286.0, 289.0, 290.0, 286.0, 293.0, 297.0, 276.0, 303.0, 276.0, 292.0, 278.0, 284.0, 295.0, 283.0, 296.0, 301.0, 281.0, 293.0, 283.0, 294.0, 288.0, 288.0, 285.0, 292.0, 284.0, 282.0, 294.0, 291.0, 285.0, 293.0, 289.0, 292.0, 287.0, 288.0, 291.0, 301.0, 281.0, 295.0, 287.0, 291.0, 291.0, 297.0, 282.0, 294.0, 285.0, 293.0, 289.0, 269.0, 261.0, 279.0, 300.0, 298.0, 284.0, 270.0, 309.0, 288.0, 291.0, 263.0, 262.0, 293.0, 289.0, 303.0, 276.0, 286.0, 296.0, 301.0, 281.0, 286.0, 290.0, 296.0, 286.0, 300.0, 282.0, 272.0, 250.0, 303.0, 276.0, 283.0, 299.0, 292.0, 278.0, 255.0, 270.0, 287.0, 295.0, 292.0, 287.0, 223.0, 229.0, 274.0, 305.0, 272.0, 295.0, 272.0, 258.0, 280.0, 296.0, 291.0, 291.0, 288.0, 294.0, 269.0, 256.0, 294.0, 282.0, 312.0, 315.0, 291.0, 282.0, 302.0, 271.0, 261.0, 266.0, 281.0, 295.0, 298.0, 281.0, 280.0, 284.0, 326.0, 307.0, 296.0, 286.0, 288.0, 279.0, 267.0, 269.0, 298.0, 281.0, 288.0, 291.0, 291.0, 288.0, 294.0, 285.0, 275.0, 301.0, 294.0, 282.0, 303.0, 273.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3402643108658208, "mean_processing_ms": 0.3359831351985412, "mean_inference_ms": 1.8939334033513233}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4056000, "num_steps_sampled": 2163200, "sample_time_ms": 20463.428, "load_time_ms": 38.087, "grad_time_ms": 10025.502, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005885738879442215, "policy_loss": -0.001977432519197464, "vf_loss": 84.17040252685547, "vf_explained_var": 0.7570996880531311, "kl": 0.0022582625970244408, "entropy": 1.1077399253845215, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2163200, "episodes_total": 5408, "training_iteration": 169, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-09-33", "timestamp": 1660252173, "time_this_iter_s": 28.335352182388306, "time_total_s": 10590.13589978218, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10590.13589978218, "timesteps_since_restore": 2163200, "iterations_since_restore": 169, "perf": {"cpu_util_percent": 29.0625, "ram_util_percent": 58.379999999999995}}
+{"episode_reward_max": 633.0, "episode_reward_min": 422.0, "episode_reward_mean": 569.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 209.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 284.605}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.61, "shaped_reward_min": 132, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.25, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.33, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.01, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.65, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.51, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.4, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.4, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.63, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.02, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.03, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.87, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.4, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.63, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.4, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.63, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 536.0, 561.0, 576.0, 570.0, 587.0, 582.0, 573.0, 582.0, 576.0, 579.0, 576.0, 573.0, 582.0, 579.0, 579.0, 582.0, 576.0, 579.0, 630.0, 536.0, 582.0, 587.0, 579.0, 582.0, 422.0, 587.0, 579.0, 576.0, 536.0, 573.0, 582.0, 582.0, 522.0, 579.0, 582.0, 570.0, 525.0, 582.0, 579.0, 452.0, 579.0, 567.0, 530.0, 576.0, 582.0, 582.0, 525.0, 576.0, 627.0, 573.0, 573.0, 527.0, 576.0, 579.0, 564.0, 633.0, 582.0, 567.0, 536.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 576.0, 579.0, 579.0, 522.0, 576.0, 576.0, 582.0, 533.0, 536.0, 573.0, 570.0, 576.0, 579.0, 582.0, 576.0, 516.0, 579.0, 579.0, 579.0, 504.0, 582.0, 558.0, 579.0, 579.0, 576.0, 582.0, 582.0, 573.0, 573.0, 582.0, 576.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 293.0, 283.0, 250.0, 286.0, 275.0, 286.0, 282.0, 294.0, 276.0, 294.0, 296.0, 291.0, 283.0, 299.0, 280.0, 293.0, 286.0, 296.0, 295.0, 281.0, 288.0, 291.0, 293.0, 283.0, 292.0, 281.0, 292.0, 290.0, 281.0, 298.0, 299.0, 280.0, 293.0, 289.0, 286.0, 290.0, 293.0, 286.0, 311.0, 319.0, 254.0, 282.0, 279.0, 303.0, 296.0, 291.0, 278.0, 301.0, 294.0, 288.0, 209.0, 213.0, 282.0, 305.0, 287.0, 292.0, 291.0, 285.0, 258.0, 278.0, 278.0, 295.0, 296.0, 286.0, 300.0, 282.0, 272.0, 250.0, 303.0, 276.0, 283.0, 299.0, 292.0, 278.0, 255.0, 270.0, 287.0, 295.0, 292.0, 287.0, 223.0, 229.0, 274.0, 305.0, 272.0, 295.0, 272.0, 258.0, 280.0, 296.0, 291.0, 291.0, 288.0, 294.0, 269.0, 256.0, 294.0, 282.0, 312.0, 315.0, 291.0, 282.0, 302.0, 271.0, 261.0, 266.0, 281.0, 295.0, 298.0, 281.0, 280.0, 284.0, 326.0, 307.0, 296.0, 286.0, 288.0, 279.0, 267.0, 269.0, 298.0, 281.0, 288.0, 291.0, 291.0, 288.0, 294.0, 285.0, 275.0, 301.0, 294.0, 282.0, 303.0, 273.0, 291.0, 288.0, 280.0, 299.0, 262.0, 260.0, 283.0, 293.0, 284.0, 292.0, 293.0, 289.0, 249.0, 284.0, 273.0, 263.0, 274.0, 299.0, 288.0, 282.0, 297.0, 279.0, 296.0, 283.0, 273.0, 309.0, 285.0, 291.0, 251.0, 265.0, 285.0, 294.0, 283.0, 296.0, 280.0, 299.0, 249.0, 255.0, 296.0, 286.0, 284.0, 274.0, 293.0, 286.0, 287.0, 292.0, 292.0, 284.0, 283.0, 299.0, 289.0, 293.0, 275.0, 298.0, 290.0, 283.0, 285.0, 297.0, 290.0, 286.0, 289.0, 290.0, 286.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 1.334950887862658, "mean_processing_ms": 0.3349244818889894, "mean_inference_ms": 1.8881606558059565}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4080000, "num_steps_sampled": 2176000, "sample_time_ms": 20370.044, "load_time_ms": 38.206, "grad_time_ms": 10005.991, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00501619465649128, "policy_loss": -0.0036706894170492887, "vf_loss": 92.4554214477539, "vf_explained_var": 0.7515974044799805, "kl": 0.0018303836695849895, "entropy": 1.1173133850097656, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2176000, "episodes_total": 5440, "training_iteration": 170, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-10-00", "timestamp": 1660252200, "time_this_iter_s": 27.505138874053955, "time_total_s": 10617.641038656235, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10617.641038656235, "timesteps_since_restore": 2176000, "iterations_since_restore": 170, "perf": {"cpu_util_percent": 30.13076923076923, "ram_util_percent": 58.446153846153834}}
+{"episode_reward_max": 630.0, "episode_reward_min": 422.0, "episode_reward_mean": 571.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 209.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 285.645}, "custom_metrics": {"sparse_reward_mean": 197.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.89, "shaped_reward_min": 141, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.63, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.26, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.6, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.46, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.74, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.52, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.52, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.97, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.89, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.74, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.52, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.74, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.52, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 579.0, 573.0, 587.0, 579.0, 576.0, 582.0, 587.0, 582.0, 582.0, 579.0, 579.0, 536.0, 582.0, 579.0, 576.0, 582.0, 579.0, 582.0, 582.0, 573.0, 501.0, 576.0, 522.0, 582.0, 582.0, 579.0, 576.0, 590.0, 539.0, 582.0, 582.0, 579.0, 576.0, 576.0, 576.0, 579.0, 579.0, 522.0, 576.0, 576.0, 582.0, 533.0, 536.0, 573.0, 570.0, 576.0, 579.0, 582.0, 576.0, 516.0, 579.0, 579.0, 579.0, 504.0, 582.0, 558.0, 579.0, 579.0, 576.0, 582.0, 582.0, 573.0, 573.0, 582.0, 576.0, 579.0, 579.0, 579.0, 576.0, 536.0, 561.0, 576.0, 570.0, 587.0, 582.0, 573.0, 582.0, 576.0, 579.0, 576.0, 573.0, 582.0, 579.0, 579.0, 582.0, 576.0, 579.0, 630.0, 536.0, 582.0, 587.0, 579.0, 582.0, 422.0, 587.0, 579.0, 576.0, 536.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [300.0, 287.0, 303.0, 276.0, 280.0, 293.0, 294.0, 293.0, 295.0, 284.0, 305.0, 271.0, 293.0, 289.0, 292.0, 295.0, 287.0, 295.0, 283.0, 299.0, 296.0, 283.0, 288.0, 291.0, 271.0, 265.0, 295.0, 287.0, 288.0, 291.0, 278.0, 298.0, 286.0, 296.0, 289.0, 290.0, 287.0, 295.0, 297.0, 285.0, 279.0, 294.0, 269.0, 232.0, 287.0, 289.0, 254.0, 268.0, 291.0, 291.0, 276.0, 306.0, 295.0, 284.0, 278.0, 298.0, 293.0, 297.0, 271.0, 268.0, 299.0, 283.0, 290.0, 292.0, 294.0, 285.0, 275.0, 301.0, 294.0, 282.0, 303.0, 273.0, 291.0, 288.0, 280.0, 299.0, 262.0, 260.0, 283.0, 293.0, 284.0, 292.0, 293.0, 289.0, 249.0, 284.0, 273.0, 263.0, 274.0, 299.0, 288.0, 282.0, 297.0, 279.0, 296.0, 283.0, 273.0, 309.0, 285.0, 291.0, 251.0, 265.0, 285.0, 294.0, 283.0, 296.0, 280.0, 299.0, 249.0, 255.0, 296.0, 286.0, 284.0, 274.0, 293.0, 286.0, 287.0, 292.0, 292.0, 284.0, 283.0, 299.0, 289.0, 293.0, 275.0, 298.0, 290.0, 283.0, 285.0, 297.0, 290.0, 286.0, 289.0, 290.0, 286.0, 293.0, 293.0, 286.0, 293.0, 283.0, 250.0, 286.0, 275.0, 286.0, 282.0, 294.0, 276.0, 294.0, 296.0, 291.0, 283.0, 299.0, 280.0, 293.0, 286.0, 296.0, 295.0, 281.0, 288.0, 291.0, 293.0, 283.0, 292.0, 281.0, 292.0, 290.0, 281.0, 298.0, 299.0, 280.0, 293.0, 289.0, 286.0, 290.0, 293.0, 286.0, 311.0, 319.0, 254.0, 282.0, 279.0, 303.0, 296.0, 291.0, 278.0, 301.0, 294.0, 288.0, 209.0, 213.0, 282.0, 305.0, 287.0, 292.0, 291.0, 285.0, 258.0, 278.0, 278.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3296971139486637, "mean_processing_ms": 0.333876638718542, "mean_inference_ms": 1.8823863210387035}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4104000, "num_steps_sampled": 2188800, "sample_time_ms": 20376.689, "load_time_ms": 38.395, "grad_time_ms": 9898.441, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001497833989560604, "policy_loss": -0.006948364432901144, "vf_loss": 90.00249481201172, "vf_explained_var": 0.7635095119476318, "kl": 0.0017910072347149253, "entropy": 1.1081151962280273, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2188800, "episodes_total": 5472, "training_iteration": 171, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-10-27", "timestamp": 1660252227, "time_this_iter_s": 27.183032989501953, "time_total_s": 10644.824071645737, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10644.824071645737, "timesteps_since_restore": 2188800, "iterations_since_restore": 171, "perf": {"cpu_util_percent": 32.57105263157895, "ram_util_percent": 58.3842105263158}}
+{"episode_reward_max": 630.0, "episode_reward_min": 422.0, "episode_reward_mean": 573.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 209.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 286.845}, "custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 178.09, "shaped_reward_min": 141, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.25, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.54, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.64, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.86, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.51, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.61, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.01, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.86, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.51, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.86, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.51, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 576.0, 582.0, 582.0, 630.0, 573.0, 579.0, 530.0, 576.0, 627.0, 533.0, 530.0, 579.0, 579.0, 579.0, 525.0, 630.0, 519.0, 530.0, 582.0, 627.0, 576.0, 587.0, 582.0, 582.0, 579.0, 582.0, 582.0, 582.0, 567.0, 579.0, 582.0, 576.0, 579.0, 579.0, 579.0, 576.0, 536.0, 561.0, 576.0, 570.0, 587.0, 582.0, 573.0, 582.0, 576.0, 579.0, 576.0, 573.0, 582.0, 579.0, 579.0, 582.0, 576.0, 579.0, 630.0, 536.0, 582.0, 587.0, 579.0, 582.0, 422.0, 587.0, 579.0, 576.0, 536.0, 573.0, 587.0, 579.0, 573.0, 587.0, 579.0, 576.0, 582.0, 587.0, 582.0, 582.0, 579.0, 579.0, 536.0, 582.0, 579.0, 576.0, 582.0, 579.0, 582.0, 582.0, 573.0, 501.0, 576.0, 522.0, 582.0, 582.0, 579.0, 576.0, 590.0, 539.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 304.0, 286.0, 293.0, 290.0, 286.0, 296.0, 286.0, 289.0, 293.0, 316.0, 314.0, 298.0, 275.0, 286.0, 293.0, 261.0, 269.0, 278.0, 298.0, 308.0, 319.0, 265.0, 268.0, 272.0, 258.0, 283.0, 296.0, 294.0, 285.0, 292.0, 287.0, 263.0, 262.0, 305.0, 325.0, 280.0, 239.0, 272.0, 258.0, 306.0, 276.0, 314.0, 313.0, 288.0, 288.0, 296.0, 291.0, 292.0, 290.0, 289.0, 293.0, 275.0, 304.0, 294.0, 288.0, 305.0, 277.0, 297.0, 285.0, 288.0, 279.0, 293.0, 286.0, 285.0, 297.0, 290.0, 286.0, 289.0, 290.0, 286.0, 293.0, 293.0, 286.0, 293.0, 283.0, 250.0, 286.0, 275.0, 286.0, 282.0, 294.0, 276.0, 294.0, 296.0, 291.0, 283.0, 299.0, 280.0, 293.0, 286.0, 296.0, 295.0, 281.0, 288.0, 291.0, 293.0, 283.0, 292.0, 281.0, 292.0, 290.0, 281.0, 298.0, 299.0, 280.0, 293.0, 289.0, 286.0, 290.0, 293.0, 286.0, 311.0, 319.0, 254.0, 282.0, 279.0, 303.0, 296.0, 291.0, 278.0, 301.0, 294.0, 288.0, 209.0, 213.0, 282.0, 305.0, 287.0, 292.0, 291.0, 285.0, 258.0, 278.0, 278.0, 295.0, 300.0, 287.0, 303.0, 276.0, 280.0, 293.0, 294.0, 293.0, 295.0, 284.0, 305.0, 271.0, 293.0, 289.0, 292.0, 295.0, 287.0, 295.0, 283.0, 299.0, 296.0, 283.0, 288.0, 291.0, 271.0, 265.0, 295.0, 287.0, 288.0, 291.0, 278.0, 298.0, 286.0, 296.0, 289.0, 290.0, 287.0, 295.0, 297.0, 285.0, 279.0, 294.0, 269.0, 232.0, 287.0, 289.0, 254.0, 268.0, 291.0, 291.0, 276.0, 306.0, 295.0, 284.0, 278.0, 298.0, 293.0, 297.0, 271.0, 268.0, 299.0, 283.0, 290.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3245227944112767, "mean_processing_ms": 0.33284763340426393, "mean_inference_ms": 1.876875864691374}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4128000, "num_steps_sampled": 2201600, "sample_time_ms": 20481.131, "load_time_ms": 38.136, "grad_time_ms": 9697.559, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0045767915435135365, "policy_loss": -0.0035035184118896723, "vf_loss": 86.42507934570312, "vf_explained_var": 0.7563931345939636, "kl": 0.002320564817637205, "entropy": 1.1244043111801147, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2201600, "episodes_total": 5504, "training_iteration": 172, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-10-56", "timestamp": 1660252256, "time_this_iter_s": 28.577091932296753, "time_total_s": 10673.401163578033, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10673.401163578033, "timesteps_since_restore": 2201600, "iterations_since_restore": 172, "perf": {"cpu_util_percent": 35.19024390243903, "ram_util_percent": 58.548780487804876}}
+{"episode_reward_max": 630.0, "episode_reward_min": 496.0, "episode_reward_mean": 574.03, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 232.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 287.015}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.63, "shaped_reward_min": 138, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.48, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.83, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.69, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.68, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.78, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.24, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.85, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.69, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.68, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.69, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.68, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 579.0, 579.0, 582.0, 498.0, 579.0, 579.0, 576.0, 561.0, 587.0, 570.0, 552.0, 587.0, 576.0, 579.0, 582.0, 576.0, 587.0, 576.0, 579.0, 579.0, 579.0, 582.0, 573.0, 570.0, 496.0, 579.0, 630.0, 576.0, 567.0, 582.0, 579.0, 576.0, 536.0, 573.0, 587.0, 579.0, 573.0, 587.0, 579.0, 576.0, 582.0, 587.0, 582.0, 582.0, 579.0, 579.0, 536.0, 582.0, 579.0, 576.0, 582.0, 579.0, 582.0, 582.0, 573.0, 501.0, 576.0, 522.0, 582.0, 582.0, 579.0, 576.0, 590.0, 539.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 582.0, 630.0, 573.0, 579.0, 530.0, 576.0, 627.0, 533.0, 530.0, 579.0, 579.0, 579.0, 525.0, 630.0, 519.0, 530.0, 582.0, 627.0, 576.0, 587.0, 582.0, 582.0, 579.0, 582.0, 582.0, 582.0, 567.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 282.0, 297.0, 291.0, 291.0, 241.0, 257.0, 289.0, 290.0, 290.0, 289.0, 280.0, 296.0, 275.0, 286.0, 301.0, 286.0, 281.0, 289.0, 271.0, 281.0, 302.0, 285.0, 276.0, 300.0, 295.0, 284.0, 292.0, 290.0, 291.0, 285.0, 295.0, 292.0, 299.0, 277.0, 289.0, 290.0, 293.0, 286.0, 284.0, 295.0, 290.0, 292.0, 285.0, 288.0, 291.0, 279.0, 253.0, 243.0, 297.0, 282.0, 318.0, 312.0, 283.0, 293.0, 291.0, 276.0, 286.0, 296.0, 287.0, 292.0, 291.0, 285.0, 258.0, 278.0, 278.0, 295.0, 300.0, 287.0, 303.0, 276.0, 280.0, 293.0, 294.0, 293.0, 295.0, 284.0, 305.0, 271.0, 293.0, 289.0, 292.0, 295.0, 287.0, 295.0, 283.0, 299.0, 296.0, 283.0, 288.0, 291.0, 271.0, 265.0, 295.0, 287.0, 288.0, 291.0, 278.0, 298.0, 286.0, 296.0, 289.0, 290.0, 287.0, 295.0, 297.0, 285.0, 279.0, 294.0, 269.0, 232.0, 287.0, 289.0, 254.0, 268.0, 291.0, 291.0, 276.0, 306.0, 295.0, 284.0, 278.0, 298.0, 293.0, 297.0, 271.0, 268.0, 299.0, 283.0, 290.0, 292.0, 278.0, 304.0, 286.0, 293.0, 290.0, 286.0, 296.0, 286.0, 289.0, 293.0, 316.0, 314.0, 298.0, 275.0, 286.0, 293.0, 261.0, 269.0, 278.0, 298.0, 308.0, 319.0, 265.0, 268.0, 272.0, 258.0, 283.0, 296.0, 294.0, 285.0, 292.0, 287.0, 263.0, 262.0, 305.0, 325.0, 280.0, 239.0, 272.0, 258.0, 306.0, 276.0, 314.0, 313.0, 288.0, 288.0, 296.0, 291.0, 292.0, 290.0, 289.0, 293.0, 275.0, 304.0, 294.0, 288.0, 305.0, 277.0, 297.0, 285.0, 288.0, 279.0, 293.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3194261704658106, "mean_processing_ms": 0.3318357537689677, "mean_inference_ms": 1.8715822466645085}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4152000, "num_steps_sampled": 2214400, "sample_time_ms": 20271.412, "load_time_ms": 38.227, "grad_time_ms": 9546.44, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0062603577971458435, "policy_loss": -0.0018654250307008624, "vf_loss": 86.83306121826172, "vf_explained_var": 0.7576972842216492, "kl": 0.0021647585090249777, "entropy": 1.1150306463241577, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2214400, "episodes_total": 5536, "training_iteration": 173, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-11-25", "timestamp": 1660252285, "time_this_iter_s": 28.57458209991455, "time_total_s": 10701.975745677948, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10701.975745677948, "timesteps_since_restore": 2214400, "iterations_since_restore": 173, "perf": {"cpu_util_percent": 30.26, "ram_util_percent": 58.657500000000006}}
+{"episode_reward_max": 633.0, "episode_reward_min": 496.0, "episode_reward_mean": 576.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 239.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 288.29}, "custom_metrics": {"sparse_reward_mean": 199.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.78, "shaped_reward_min": 138, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.4, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.65, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.13, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.01, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.53, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.86, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.38, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.64, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.38, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.53, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.86, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.53, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.86, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 579.0, 627.0, 573.0, 633.0, 582.0, 567.0, 530.0, 573.0, 564.0, 582.0, 573.0, 579.0, 582.0, 579.0, 558.0, 582.0, 582.0, 582.0, 576.0, 573.0, 582.0, 627.0, 630.0, 582.0, 582.0, 582.0, 573.0, 536.0, 582.0, 579.0, 582.0, 590.0, 539.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 582.0, 630.0, 573.0, 579.0, 530.0, 576.0, 627.0, 533.0, 530.0, 579.0, 579.0, 579.0, 525.0, 630.0, 519.0, 530.0, 582.0, 627.0, 576.0, 587.0, 582.0, 582.0, 579.0, 582.0, 582.0, 582.0, 567.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 498.0, 579.0, 579.0, 576.0, 561.0, 587.0, 570.0, 552.0, 587.0, 576.0, 579.0, 582.0, 576.0, 587.0, 576.0, 579.0, 579.0, 579.0, 582.0, 573.0, 570.0, 496.0, 579.0, 630.0, 576.0, 567.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 285.0, 290.0, 289.0, 298.0, 329.0, 270.0, 303.0, 308.0, 325.0, 301.0, 281.0, 279.0, 288.0, 268.0, 262.0, 296.0, 277.0, 286.0, 278.0, 289.0, 293.0, 285.0, 288.0, 294.0, 285.0, 301.0, 281.0, 295.0, 284.0, 281.0, 277.0, 291.0, 291.0, 308.0, 274.0, 286.0, 296.0, 300.0, 276.0, 284.0, 289.0, 289.0, 293.0, 316.0, 311.0, 321.0, 309.0, 286.0, 296.0, 297.0, 285.0, 298.0, 284.0, 283.0, 290.0, 270.0, 266.0, 285.0, 297.0, 292.0, 287.0, 293.0, 289.0, 293.0, 297.0, 271.0, 268.0, 299.0, 283.0, 290.0, 292.0, 278.0, 304.0, 286.0, 293.0, 290.0, 286.0, 296.0, 286.0, 289.0, 293.0, 316.0, 314.0, 298.0, 275.0, 286.0, 293.0, 261.0, 269.0, 278.0, 298.0, 308.0, 319.0, 265.0, 268.0, 272.0, 258.0, 283.0, 296.0, 294.0, 285.0, 292.0, 287.0, 263.0, 262.0, 305.0, 325.0, 280.0, 239.0, 272.0, 258.0, 306.0, 276.0, 314.0, 313.0, 288.0, 288.0, 296.0, 291.0, 292.0, 290.0, 289.0, 293.0, 275.0, 304.0, 294.0, 288.0, 305.0, 277.0, 297.0, 285.0, 288.0, 279.0, 293.0, 286.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 282.0, 297.0, 291.0, 291.0, 241.0, 257.0, 289.0, 290.0, 290.0, 289.0, 280.0, 296.0, 275.0, 286.0, 301.0, 286.0, 281.0, 289.0, 271.0, 281.0, 302.0, 285.0, 276.0, 300.0, 295.0, 284.0, 292.0, 290.0, 291.0, 285.0, 295.0, 292.0, 299.0, 277.0, 289.0, 290.0, 293.0, 286.0, 284.0, 295.0, 290.0, 292.0, 285.0, 288.0, 291.0, 279.0, 253.0, 243.0, 297.0, 282.0, 318.0, 312.0, 283.0, 293.0, 291.0, 276.0, 286.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3143927753552085, "mean_processing_ms": 0.33083733112110686, "mean_inference_ms": 1.8663998879774686}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4176000, "num_steps_sampled": 2227200, "sample_time_ms": 20085.918, "load_time_ms": 37.439, "grad_time_ms": 9177.291, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0001482805237174034, "policy_loss": -0.00769606651738286, "vf_loss": 81.13143920898438, "vf_explained_var": 0.764965295791626, "kl": 0.0018476974219083786, "entropy": 1.1307072639465332, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2227200, "episodes_total": 5568, "training_iteration": 174, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-11-52", "timestamp": 1660252312, "time_this_iter_s": 27.522704124450684, "time_total_s": 10729.498449802399, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10729.498449802399, "timesteps_since_restore": 2227200, "iterations_since_restore": 174, "perf": {"cpu_util_percent": 34.294871794871796, "ram_util_percent": 58.587179487179476}}
+{"episode_reward_max": 633.0, "episode_reward_min": 393.0, "episode_reward_mean": 573.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 196.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 286.51}, "custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.82, "shaped_reward_min": 113, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.37, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.56, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.07, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.82, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.43, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.68, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.37, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.58, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.3, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.26, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.43, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.68, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.43, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.68, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [507.0, 579.0, 582.0, 576.0, 582.0, 393.0, 465.0, 579.0, 582.0, 570.0, 627.0, 579.0, 573.0, 576.0, 579.0, 504.0, 579.0, 579.0, 576.0, 527.0, 579.0, 519.0, 579.0, 587.0, 576.0, 633.0, 579.0, 576.0, 582.0, 579.0, 524.0, 627.0, 582.0, 582.0, 567.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 498.0, 579.0, 579.0, 576.0, 561.0, 587.0, 570.0, 552.0, 587.0, 576.0, 579.0, 582.0, 576.0, 587.0, 576.0, 579.0, 579.0, 579.0, 582.0, 573.0, 570.0, 496.0, 579.0, 630.0, 576.0, 567.0, 582.0, 567.0, 579.0, 627.0, 573.0, 633.0, 582.0, 567.0, 530.0, 573.0, 564.0, 582.0, 573.0, 579.0, 582.0, 579.0, 558.0, 582.0, 582.0, 582.0, 576.0, 573.0, 582.0, 627.0, 630.0, 582.0, 582.0, 582.0, 573.0, 536.0, 582.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [239.0, 268.0, 286.0, 293.0, 283.0, 299.0, 284.0, 292.0, 297.0, 285.0, 197.0, 196.0, 229.0, 236.0, 294.0, 285.0, 285.0, 297.0, 289.0, 281.0, 317.0, 310.0, 301.0, 278.0, 278.0, 295.0, 282.0, 294.0, 284.0, 295.0, 262.0, 242.0, 297.0, 282.0, 290.0, 289.0, 295.0, 281.0, 272.0, 255.0, 288.0, 291.0, 260.0, 259.0, 287.0, 292.0, 294.0, 293.0, 286.0, 290.0, 309.0, 324.0, 285.0, 294.0, 285.0, 291.0, 297.0, 285.0, 288.0, 291.0, 270.0, 254.0, 305.0, 322.0, 305.0, 277.0, 297.0, 285.0, 288.0, 279.0, 293.0, 286.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 282.0, 297.0, 291.0, 291.0, 241.0, 257.0, 289.0, 290.0, 290.0, 289.0, 280.0, 296.0, 275.0, 286.0, 301.0, 286.0, 281.0, 289.0, 271.0, 281.0, 302.0, 285.0, 276.0, 300.0, 295.0, 284.0, 292.0, 290.0, 291.0, 285.0, 295.0, 292.0, 299.0, 277.0, 289.0, 290.0, 293.0, 286.0, 284.0, 295.0, 290.0, 292.0, 285.0, 288.0, 291.0, 279.0, 253.0, 243.0, 297.0, 282.0, 318.0, 312.0, 283.0, 293.0, 291.0, 276.0, 286.0, 296.0, 282.0, 285.0, 290.0, 289.0, 298.0, 329.0, 270.0, 303.0, 308.0, 325.0, 301.0, 281.0, 279.0, 288.0, 268.0, 262.0, 296.0, 277.0, 286.0, 278.0, 289.0, 293.0, 285.0, 288.0, 294.0, 285.0, 301.0, 281.0, 295.0, 284.0, 281.0, 277.0, 291.0, 291.0, 308.0, 274.0, 286.0, 296.0, 300.0, 276.0, 284.0, 289.0, 289.0, 293.0, 316.0, 311.0, 321.0, 309.0, 286.0, 296.0, 297.0, 285.0, 298.0, 284.0, 283.0, 290.0, 270.0, 266.0, 285.0, 297.0, 292.0, 287.0, 293.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3094165014431445, "mean_processing_ms": 0.3298474823059415, "mean_inference_ms": 1.8613034748518011}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4200000, "num_steps_sampled": 2240000, "sample_time_ms": 20009.535, "load_time_ms": 37.081, "grad_time_ms": 9049.935, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033403884153813124, "policy_loss": -0.004778089467436075, "vf_loss": 86.8664779663086, "vf_explained_var": 0.7622640132904053, "kl": 0.0018111681565642357, "entropy": 1.1363428831100464, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2240000, "episodes_total": 5600, "training_iteration": 175, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-12-22", "timestamp": 1660252342, "time_this_iter_s": 29.488188982009888, "time_total_s": 10758.986638784409, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10758.986638784409, "timesteps_since_restore": 2240000, "iterations_since_restore": 175, "perf": {"cpu_util_percent": 31.97380952380952, "ram_util_percent": 58.61666666666667}}
+{"episode_reward_max": 633.0, "episode_reward_min": 393.0, "episode_reward_mean": 573.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 196.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 286.835}, "custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.47, "shaped_reward_min": 113, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.4, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.7, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.58, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.29, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.57, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.55, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.2, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.82, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.77, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.57, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.55, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.57, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.55, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 582.0, 627.0, 582.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 579.0, 573.0, 576.0, 579.0, 530.0, 576.0, 579.0, 576.0, 525.0, 576.0, 519.0, 530.0, 576.0, 579.0, 582.0, 630.0, 576.0, 567.0, 582.0, 567.0, 579.0, 627.0, 573.0, 633.0, 582.0, 567.0, 530.0, 573.0, 564.0, 582.0, 573.0, 579.0, 582.0, 579.0, 558.0, 582.0, 582.0, 582.0, 576.0, 573.0, 582.0, 627.0, 630.0, 582.0, 582.0, 582.0, 573.0, 536.0, 582.0, 579.0, 582.0, 507.0, 579.0, 582.0, 576.0, 582.0, 393.0, 465.0, 579.0, 582.0, 570.0, 627.0, 579.0, 573.0, 576.0, 579.0, 504.0, 579.0, 579.0, 576.0, 527.0, 579.0, 519.0, 579.0, 587.0, 576.0, 633.0, 579.0, 576.0, 582.0, 579.0, 524.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [280.0, 302.0, 293.0, 289.0, 294.0, 288.0, 310.0, 317.0, 294.0, 288.0, 294.0, 288.0, 276.0, 311.0, 282.0, 297.0, 291.0, 288.0, 296.0, 286.0, 280.0, 299.0, 298.0, 281.0, 296.0, 286.0, 277.0, 299.0, 303.0, 279.0, 291.0, 288.0, 283.0, 299.0, 280.0, 299.0, 295.0, 278.0, 293.0, 283.0, 296.0, 283.0, 266.0, 264.0, 277.0, 299.0, 283.0, 296.0, 291.0, 285.0, 264.0, 261.0, 286.0, 290.0, 267.0, 252.0, 272.0, 258.0, 278.0, 298.0, 290.0, 289.0, 291.0, 291.0, 318.0, 312.0, 283.0, 293.0, 291.0, 276.0, 286.0, 296.0, 282.0, 285.0, 290.0, 289.0, 298.0, 329.0, 270.0, 303.0, 308.0, 325.0, 301.0, 281.0, 279.0, 288.0, 268.0, 262.0, 296.0, 277.0, 286.0, 278.0, 289.0, 293.0, 285.0, 288.0, 294.0, 285.0, 301.0, 281.0, 295.0, 284.0, 281.0, 277.0, 291.0, 291.0, 308.0, 274.0, 286.0, 296.0, 300.0, 276.0, 284.0, 289.0, 289.0, 293.0, 316.0, 311.0, 321.0, 309.0, 286.0, 296.0, 297.0, 285.0, 298.0, 284.0, 283.0, 290.0, 270.0, 266.0, 285.0, 297.0, 292.0, 287.0, 293.0, 289.0, 239.0, 268.0, 286.0, 293.0, 283.0, 299.0, 284.0, 292.0, 297.0, 285.0, 197.0, 196.0, 229.0, 236.0, 294.0, 285.0, 285.0, 297.0, 289.0, 281.0, 317.0, 310.0, 301.0, 278.0, 278.0, 295.0, 282.0, 294.0, 284.0, 295.0, 262.0, 242.0, 297.0, 282.0, 290.0, 289.0, 295.0, 281.0, 272.0, 255.0, 288.0, 291.0, 260.0, 259.0, 287.0, 292.0, 294.0, 293.0, 286.0, 290.0, 309.0, 324.0, 285.0, 294.0, 285.0, 291.0, 297.0, 285.0, 288.0, 291.0, 270.0, 254.0, 305.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 1.304511305303639, "mean_processing_ms": 0.3288745301367266, "mean_inference_ms": 1.8566250484766516}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4224000, "num_steps_sampled": 2252800, "sample_time_ms": 20239.261, "load_time_ms": 36.617, "grad_time_ms": 8693.262, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0060581061989068985, "policy_loss": -0.0023995088413357735, "vf_loss": 90.20238494873047, "vf_explained_var": 0.7652048468589783, "kl": 0.0019277030369266868, "entropy": 1.1252202987670898, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2252800, "episodes_total": 5632, "training_iteration": 176, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-12-54", "timestamp": 1660252374, "time_this_iter_s": 32.0580530166626, "time_total_s": 10791.044691801071, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10791.044691801071, "timesteps_since_restore": 2252800, "iterations_since_restore": 176, "perf": {"cpu_util_percent": 31.34666666666667, "ram_util_percent": 58.57333333333334}}
+{"episode_reward_max": 633.0, "episode_reward_min": 393.0, "episode_reward_mean": 571.4, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 196.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.7}, "custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.2, "shaped_reward_min": 113, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.35, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.28, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.61, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.36, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.59, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.44, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.84, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.24, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.2, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.59, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.44, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.59, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.44, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 525.0, 627.0, 570.0, 504.0, 579.0, 582.0, 579.0, 576.0, 570.0, 579.0, 579.0, 576.0, 582.0, 519.0, 573.0, 579.0, 573.0, 582.0, 579.0, 582.0, 582.0, 587.0, 519.0, 573.0, 579.0, 633.0, 590.0, 579.0, 573.0, 587.0, 536.0, 582.0, 579.0, 582.0, 507.0, 579.0, 582.0, 576.0, 582.0, 393.0, 465.0, 579.0, 582.0, 570.0, 627.0, 579.0, 573.0, 576.0, 579.0, 504.0, 579.0, 579.0, 576.0, 527.0, 579.0, 519.0, 579.0, 587.0, 576.0, 633.0, 579.0, 576.0, 582.0, 579.0, 524.0, 627.0, 582.0, 582.0, 582.0, 627.0, 582.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 579.0, 573.0, 576.0, 579.0, 530.0, 576.0, 579.0, 576.0, 525.0, 576.0, 519.0, 530.0, 576.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [312.0, 318.0, 290.0, 292.0, 272.0, 253.0, 314.0, 313.0, 275.0, 295.0, 261.0, 243.0, 298.0, 281.0, 291.0, 291.0, 299.0, 280.0, 294.0, 282.0, 287.0, 283.0, 288.0, 291.0, 288.0, 291.0, 288.0, 288.0, 302.0, 280.0, 267.0, 252.0, 286.0, 287.0, 288.0, 291.0, 285.0, 288.0, 283.0, 299.0, 290.0, 289.0, 298.0, 284.0, 289.0, 293.0, 296.0, 291.0, 276.0, 243.0, 300.0, 273.0, 286.0, 293.0, 309.0, 324.0, 302.0, 288.0, 292.0, 287.0, 289.0, 284.0, 292.0, 295.0, 270.0, 266.0, 285.0, 297.0, 292.0, 287.0, 293.0, 289.0, 239.0, 268.0, 286.0, 293.0, 283.0, 299.0, 284.0, 292.0, 297.0, 285.0, 197.0, 196.0, 229.0, 236.0, 294.0, 285.0, 285.0, 297.0, 289.0, 281.0, 317.0, 310.0, 301.0, 278.0, 278.0, 295.0, 282.0, 294.0, 284.0, 295.0, 262.0, 242.0, 297.0, 282.0, 290.0, 289.0, 295.0, 281.0, 272.0, 255.0, 288.0, 291.0, 260.0, 259.0, 287.0, 292.0, 294.0, 293.0, 286.0, 290.0, 309.0, 324.0, 285.0, 294.0, 285.0, 291.0, 297.0, 285.0, 288.0, 291.0, 270.0, 254.0, 305.0, 322.0, 280.0, 302.0, 293.0, 289.0, 294.0, 288.0, 310.0, 317.0, 294.0, 288.0, 294.0, 288.0, 276.0, 311.0, 282.0, 297.0, 291.0, 288.0, 296.0, 286.0, 280.0, 299.0, 298.0, 281.0, 296.0, 286.0, 277.0, 299.0, 303.0, 279.0, 291.0, 288.0, 283.0, 299.0, 280.0, 299.0, 295.0, 278.0, 293.0, 283.0, 296.0, 283.0, 266.0, 264.0, 277.0, 299.0, 283.0, 296.0, 291.0, 285.0, 264.0, 261.0, 286.0, 290.0, 267.0, 252.0, 272.0, 258.0, 278.0, 298.0, 290.0, 289.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2996756400527443, "mean_processing_ms": 0.3279171800323935, "mean_inference_ms": 1.852052219769451}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4248000, "num_steps_sampled": 2265600, "sample_time_ms": 20324.791, "load_time_ms": 36.647, "grad_time_ms": 8435.096, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008986306493170559, "policy_loss": -0.007334645837545395, "vf_loss": 87.94988250732422, "vf_explained_var": 0.7740858197212219, "kl": 0.001811654889024794, "entropy": 1.123410940170288, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2265600, "episodes_total": 5664, "training_iteration": 177, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-13-23", "timestamp": 1660252403, "time_this_iter_s": 29.61364197731018, "time_total_s": 10820.658333778381, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10820.658333778381, "timesteps_since_restore": 2265600, "iterations_since_restore": 177, "perf": {"cpu_util_percent": 35.069047619047616, "ram_util_percent": 58.67619047619048}}
+{"episode_reward_max": 633.0, "episode_reward_min": 504.0, "episode_reward_mean": 576.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 243.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.0}, "custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 178.0, "shaped_reward_min": 144, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.81, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.32, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.41, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.63, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.75, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.54, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.85, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.43, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.33, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.22, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.87, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.19, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.75, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.54, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.75, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.54, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 570.0, 519.0, 579.0, 582.0, 627.0, 576.0, 582.0, 579.0, 573.0, 582.0, 582.0, 527.0, 582.0, 579.0, 587.0, 630.0, 582.0, 576.0, 579.0, 570.0, 582.0, 582.0, 579.0, 525.0, 579.0, 576.0, 587.0, 584.0, 582.0, 573.0, 587.0, 582.0, 579.0, 524.0, 627.0, 582.0, 582.0, 582.0, 627.0, 582.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 579.0, 573.0, 576.0, 579.0, 530.0, 576.0, 579.0, 576.0, 525.0, 576.0, 519.0, 530.0, 576.0, 579.0, 582.0, 630.0, 582.0, 525.0, 627.0, 570.0, 504.0, 579.0, 582.0, 579.0, 576.0, 570.0, 579.0, 579.0, 576.0, 582.0, 519.0, 573.0, 579.0, 573.0, 582.0, 579.0, 582.0, 582.0, 587.0, 519.0, 573.0, 579.0, 633.0, 590.0, 579.0, 573.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 281.0, 289.0, 270.0, 249.0, 292.0, 287.0, 286.0, 296.0, 305.0, 322.0, 289.0, 287.0, 295.0, 287.0, 300.0, 279.0, 290.0, 283.0, 285.0, 297.0, 289.0, 293.0, 254.0, 273.0, 294.0, 288.0, 291.0, 288.0, 301.0, 286.0, 314.0, 316.0, 295.0, 287.0, 289.0, 287.0, 277.0, 302.0, 292.0, 278.0, 278.0, 304.0, 288.0, 294.0, 291.0, 288.0, 259.0, 266.0, 275.0, 304.0, 299.0, 277.0, 296.0, 291.0, 290.0, 294.0, 288.0, 294.0, 288.0, 285.0, 290.0, 297.0, 297.0, 285.0, 288.0, 291.0, 270.0, 254.0, 305.0, 322.0, 280.0, 302.0, 293.0, 289.0, 294.0, 288.0, 310.0, 317.0, 294.0, 288.0, 294.0, 288.0, 276.0, 311.0, 282.0, 297.0, 291.0, 288.0, 296.0, 286.0, 280.0, 299.0, 298.0, 281.0, 296.0, 286.0, 277.0, 299.0, 303.0, 279.0, 291.0, 288.0, 283.0, 299.0, 280.0, 299.0, 295.0, 278.0, 293.0, 283.0, 296.0, 283.0, 266.0, 264.0, 277.0, 299.0, 283.0, 296.0, 291.0, 285.0, 264.0, 261.0, 286.0, 290.0, 267.0, 252.0, 272.0, 258.0, 278.0, 298.0, 290.0, 289.0, 291.0, 291.0, 312.0, 318.0, 290.0, 292.0, 272.0, 253.0, 314.0, 313.0, 275.0, 295.0, 261.0, 243.0, 298.0, 281.0, 291.0, 291.0, 299.0, 280.0, 294.0, 282.0, 287.0, 283.0, 288.0, 291.0, 288.0, 291.0, 288.0, 288.0, 302.0, 280.0, 267.0, 252.0, 286.0, 287.0, 288.0, 291.0, 285.0, 288.0, 283.0, 299.0, 290.0, 289.0, 298.0, 284.0, 289.0, 293.0, 296.0, 291.0, 276.0, 243.0, 300.0, 273.0, 286.0, 293.0, 309.0, 324.0, 302.0, 288.0, 292.0, 287.0, 289.0, 284.0, 292.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2948917928576587, "mean_processing_ms": 0.3269695972321587, "mean_inference_ms": 1.8475779345693215}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4272000, "num_steps_sampled": 2278400, "sample_time_ms": 20480.726, "load_time_ms": 37.228, "grad_time_ms": 8437.297, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004881067667156458, "policy_loss": -0.003187847323715687, "vf_loss": 86.31526947021484, "vf_explained_var": 0.7646486163139343, "kl": 0.0018008003244176507, "entropy": 1.125217080116272, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2278400, "episodes_total": 5696, "training_iteration": 178, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-13-54", "timestamp": 1660252434, "time_this_iter_s": 30.965723037719727, "time_total_s": 10851.624056816101, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10851.624056816101, "timesteps_since_restore": 2278400, "iterations_since_restore": 178, "perf": {"cpu_util_percent": 34.43636363636364, "ram_util_percent": 58.54318181818183}}
+{"episode_reward_max": 633.0, "episode_reward_min": 504.0, "episode_reward_mean": 577.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 243.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.99}, "custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 178.38, "shaped_reward_min": 144, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.7, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.35, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.38, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.65, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.73, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.62, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.95, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.38, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.28, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.84, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.26, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.73, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.62, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.73, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.62, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 561.0, 579.0, 582.0, 627.0, 527.0, 579.0, 576.0, 582.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 561.0, 576.0, 587.0, 630.0, 582.0, 627.0, 579.0, 582.0, 579.0, 582.0, 582.0, 576.0, 582.0, 579.0, 579.0, 576.0, 530.0, 576.0, 579.0, 582.0, 630.0, 582.0, 525.0, 627.0, 570.0, 504.0, 579.0, 582.0, 579.0, 576.0, 570.0, 579.0, 579.0, 576.0, 582.0, 519.0, 573.0, 579.0, 573.0, 582.0, 579.0, 582.0, 582.0, 587.0, 519.0, 573.0, 579.0, 633.0, 590.0, 579.0, 573.0, 587.0, 582.0, 570.0, 519.0, 579.0, 582.0, 627.0, 576.0, 582.0, 579.0, 573.0, 582.0, 582.0, 527.0, 582.0, 579.0, 587.0, 630.0, 582.0, 576.0, 579.0, 570.0, 582.0, 582.0, 579.0, 525.0, 579.0, 576.0, 587.0, 584.0, 582.0, 573.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 296.0, 273.0, 288.0, 294.0, 285.0, 285.0, 297.0, 318.0, 309.0, 264.0, 263.0, 287.0, 292.0, 280.0, 296.0, 288.0, 294.0, 293.0, 289.0, 288.0, 294.0, 300.0, 279.0, 294.0, 285.0, 288.0, 291.0, 294.0, 288.0, 278.0, 301.0, 279.0, 282.0, 290.0, 286.0, 302.0, 285.0, 320.0, 310.0, 297.0, 285.0, 315.0, 312.0, 295.0, 284.0, 304.0, 278.0, 278.0, 301.0, 289.0, 293.0, 290.0, 292.0, 275.0, 301.0, 293.0, 289.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0, 272.0, 258.0, 278.0, 298.0, 290.0, 289.0, 291.0, 291.0, 312.0, 318.0, 290.0, 292.0, 272.0, 253.0, 314.0, 313.0, 275.0, 295.0, 261.0, 243.0, 298.0, 281.0, 291.0, 291.0, 299.0, 280.0, 294.0, 282.0, 287.0, 283.0, 288.0, 291.0, 288.0, 291.0, 288.0, 288.0, 302.0, 280.0, 267.0, 252.0, 286.0, 287.0, 288.0, 291.0, 285.0, 288.0, 283.0, 299.0, 290.0, 289.0, 298.0, 284.0, 289.0, 293.0, 296.0, 291.0, 276.0, 243.0, 300.0, 273.0, 286.0, 293.0, 309.0, 324.0, 302.0, 288.0, 292.0, 287.0, 289.0, 284.0, 292.0, 295.0, 289.0, 293.0, 281.0, 289.0, 270.0, 249.0, 292.0, 287.0, 286.0, 296.0, 305.0, 322.0, 289.0, 287.0, 295.0, 287.0, 300.0, 279.0, 290.0, 283.0, 285.0, 297.0, 289.0, 293.0, 254.0, 273.0, 294.0, 288.0, 291.0, 288.0, 301.0, 286.0, 314.0, 316.0, 295.0, 287.0, 289.0, 287.0, 277.0, 302.0, 292.0, 278.0, 278.0, 304.0, 288.0, 294.0, 291.0, 288.0, 259.0, 266.0, 275.0, 304.0, 299.0, 277.0, 296.0, 291.0, 290.0, 294.0, 288.0, 294.0, 288.0, 285.0, 290.0, 297.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2901493032290514, "mean_processing_ms": 0.3260293499521716, "mean_inference_ms": 1.842905806043276}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4296000, "num_steps_sampled": 2291200, "sample_time_ms": 20658.749, "load_time_ms": 37.127, "grad_time_ms": 8627.523, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005556942895054817, "policy_loss": -0.0025492331478744745, "vf_loss": 86.67485809326172, "vf_explained_var": 0.7664775848388672, "kl": 0.0018904004245996475, "entropy": 1.1226133108139038, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2291200, "episodes_total": 5728, "training_iteration": 179, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-14-26", "timestamp": 1660252466, "time_this_iter_s": 32.01629400253296, "time_total_s": 10883.640350818634, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10883.640350818634, "timesteps_since_restore": 2291200, "iterations_since_restore": 179, "perf": {"cpu_util_percent": 31.479999999999997, "ram_util_percent": 58.526666666666664}}
+{"episode_reward_max": 630.0, "episode_reward_min": 516.0, "episode_reward_mean": 578.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 289.07}, "custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 178.54, "shaped_reward_min": 156, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.34, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.69, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.3, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.82, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.62, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.88, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.82, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.62, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.82, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.62, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 567.0, 530.0, 627.0, 582.0, 579.0, 587.0, 579.0, 579.0, 579.0, 579.0, 582.0, 582.0, 576.0, 576.0, 516.0, 579.0, 587.0, 582.0, 570.0, 570.0, 519.0, 573.0, 582.0, 576.0, 579.0, 579.0, 576.0, 530.0, 582.0, 624.0, 590.0, 579.0, 573.0, 587.0, 582.0, 570.0, 519.0, 579.0, 582.0, 627.0, 576.0, 582.0, 579.0, 573.0, 582.0, 582.0, 527.0, 582.0, 579.0, 587.0, 630.0, 582.0, 576.0, 579.0, 570.0, 582.0, 582.0, 579.0, 525.0, 579.0, 576.0, 587.0, 584.0, 582.0, 573.0, 587.0, 587.0, 561.0, 579.0, 582.0, 627.0, 527.0, 579.0, 576.0, 582.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 561.0, 576.0, 587.0, 630.0, 582.0, 627.0, 579.0, 582.0, 579.0, 582.0, 582.0, 576.0, 582.0, 579.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 283.0, 289.0, 287.0, 288.0, 279.0, 270.0, 260.0, 309.0, 318.0, 298.0, 284.0, 276.0, 303.0, 291.0, 296.0, 298.0, 281.0, 283.0, 296.0, 284.0, 295.0, 277.0, 302.0, 297.0, 285.0, 294.0, 288.0, 293.0, 283.0, 284.0, 292.0, 263.0, 253.0, 290.0, 289.0, 302.0, 285.0, 297.0, 285.0, 291.0, 279.0, 294.0, 276.0, 261.0, 258.0, 282.0, 291.0, 293.0, 289.0, 293.0, 283.0, 280.0, 299.0, 282.0, 297.0, 298.0, 278.0, 269.0, 261.0, 285.0, 297.0, 298.0, 326.0, 302.0, 288.0, 292.0, 287.0, 289.0, 284.0, 292.0, 295.0, 289.0, 293.0, 281.0, 289.0, 270.0, 249.0, 292.0, 287.0, 286.0, 296.0, 305.0, 322.0, 289.0, 287.0, 295.0, 287.0, 300.0, 279.0, 290.0, 283.0, 285.0, 297.0, 289.0, 293.0, 254.0, 273.0, 294.0, 288.0, 291.0, 288.0, 301.0, 286.0, 314.0, 316.0, 295.0, 287.0, 289.0, 287.0, 277.0, 302.0, 292.0, 278.0, 278.0, 304.0, 288.0, 294.0, 291.0, 288.0, 259.0, 266.0, 275.0, 304.0, 299.0, 277.0, 296.0, 291.0, 290.0, 294.0, 288.0, 294.0, 288.0, 285.0, 290.0, 297.0, 291.0, 296.0, 273.0, 288.0, 294.0, 285.0, 285.0, 297.0, 318.0, 309.0, 264.0, 263.0, 287.0, 292.0, 280.0, 296.0, 288.0, 294.0, 293.0, 289.0, 288.0, 294.0, 300.0, 279.0, 294.0, 285.0, 288.0, 291.0, 294.0, 288.0, 278.0, 301.0, 279.0, 282.0, 290.0, 286.0, 302.0, 285.0, 320.0, 310.0, 297.0, 285.0, 315.0, 312.0, 295.0, 284.0, 304.0, 278.0, 278.0, 301.0, 289.0, 293.0, 290.0, 292.0, 275.0, 301.0, 293.0, 289.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2854556266765977, "mean_processing_ms": 0.3250985864852822, "mean_inference_ms": 1.838409331377913}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4320000, "num_steps_sampled": 2304000, "sample_time_ms": 20831.137, "load_time_ms": 37.325, "grad_time_ms": 8787.85, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003588956082239747, "policy_loss": -0.004645919892936945, "vf_loss": 88.00481414794922, "vf_explained_var": 0.7581232190132141, "kl": 0.0017625847831368446, "entropy": 1.131211280822754, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2304000, "episodes_total": 5760, "training_iteration": 180, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-14-57", "timestamp": 1660252497, "time_this_iter_s": 30.824997186660767, "time_total_s": 10914.465348005295, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10914.465348005295, "timesteps_since_restore": 2304000, "iterations_since_restore": 180, "perf": {"cpu_util_percent": 31.02954545454545, "ram_util_percent": 58.488636363636374}}
+{"episode_reward_max": 630.0, "episode_reward_min": 501.0, "episode_reward_mean": 575.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 246.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 287.89}, "custom_metrics": {"sparse_reward_mean": 199.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 177.38, "shaped_reward_min": 141, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.23, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.73, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.91, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.04, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.5, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.39, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.93, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.35, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.59, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.39, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.65, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.39, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.93, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.39, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.93, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [521.0, 576.0, 573.0, 539.0, 579.0, 582.0, 536.0, 630.0, 579.0, 582.0, 582.0, 579.0, 576.0, 582.0, 558.0, 579.0, 552.0, 576.0, 579.0, 587.0, 579.0, 579.0, 587.0, 567.0, 582.0, 576.0, 501.0, 504.0, 573.0, 587.0, 587.0, 579.0, 584.0, 582.0, 573.0, 587.0, 587.0, 561.0, 579.0, 582.0, 627.0, 527.0, 579.0, 576.0, 582.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 561.0, 576.0, 587.0, 630.0, 582.0, 627.0, 579.0, 582.0, 579.0, 582.0, 582.0, 576.0, 582.0, 579.0, 579.0, 576.0, 579.0, 576.0, 567.0, 530.0, 627.0, 582.0, 579.0, 587.0, 579.0, 579.0, 579.0, 579.0, 582.0, 582.0, 576.0, 576.0, 516.0, 579.0, 587.0, 582.0, 570.0, 570.0, 519.0, 573.0, 582.0, 576.0, 579.0, 579.0, 576.0, 530.0, 582.0, 624.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 254.0, 283.0, 293.0, 285.0, 288.0, 275.0, 264.0, 288.0, 291.0, 304.0, 278.0, 267.0, 269.0, 323.0, 307.0, 298.0, 281.0, 303.0, 279.0, 297.0, 285.0, 280.0, 299.0, 277.0, 299.0, 286.0, 296.0, 286.0, 272.0, 308.0, 271.0, 275.0, 277.0, 292.0, 284.0, 276.0, 303.0, 286.0, 301.0, 291.0, 288.0, 301.0, 278.0, 292.0, 295.0, 291.0, 276.0, 293.0, 289.0, 293.0, 283.0, 254.0, 247.0, 246.0, 258.0, 281.0, 292.0, 286.0, 301.0, 295.0, 292.0, 282.0, 297.0, 290.0, 294.0, 288.0, 294.0, 288.0, 285.0, 290.0, 297.0, 291.0, 296.0, 273.0, 288.0, 294.0, 285.0, 285.0, 297.0, 318.0, 309.0, 264.0, 263.0, 287.0, 292.0, 280.0, 296.0, 288.0, 294.0, 293.0, 289.0, 288.0, 294.0, 300.0, 279.0, 294.0, 285.0, 288.0, 291.0, 294.0, 288.0, 278.0, 301.0, 279.0, 282.0, 290.0, 286.0, 302.0, 285.0, 320.0, 310.0, 297.0, 285.0, 315.0, 312.0, 295.0, 284.0, 304.0, 278.0, 278.0, 301.0, 289.0, 293.0, 290.0, 292.0, 275.0, 301.0, 293.0, 289.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0, 296.0, 283.0, 289.0, 287.0, 288.0, 279.0, 270.0, 260.0, 309.0, 318.0, 298.0, 284.0, 276.0, 303.0, 291.0, 296.0, 298.0, 281.0, 283.0, 296.0, 284.0, 295.0, 277.0, 302.0, 297.0, 285.0, 294.0, 288.0, 293.0, 283.0, 284.0, 292.0, 263.0, 253.0, 290.0, 289.0, 302.0, 285.0, 297.0, 285.0, 291.0, 279.0, 294.0, 276.0, 261.0, 258.0, 282.0, 291.0, 293.0, 289.0, 293.0, 283.0, 280.0, 299.0, 282.0, 297.0, 298.0, 278.0, 269.0, 261.0, 285.0, 297.0, 298.0, 326.0]}, "sampler_perf": {"mean_env_wait_ms": 1.280816050662255, "mean_processing_ms": 0.3241780547884837, "mean_inference_ms": 1.833999332912814}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4344000, "num_steps_sampled": 2316800, "sample_time_ms": 21054.918, "load_time_ms": 37.113, "grad_time_ms": 8943.853, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0029653788078576326, "policy_loss": -0.005187256261706352, "vf_loss": 87.18419647216797, "vf_explained_var": 0.7553746104240417, "kl": 0.0017378958873450756, "entropy": 1.13156259059906, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2316800, "episodes_total": 5792, "training_iteration": 181, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-15-28", "timestamp": 1660252528, "time_this_iter_s": 30.97549271583557, "time_total_s": 10945.44084072113, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10945.44084072113, "timesteps_since_restore": 2316800, "iterations_since_restore": 181, "perf": {"cpu_util_percent": 34.48409090909092, "ram_util_percent": 58.6068181818182}}
+{"episode_reward_max": 630.0, "episode_reward_min": 465.0, "episode_reward_mean": 573.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 227.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 286.56}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.72, "shaped_reward_min": 141, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.09, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.86, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.71, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.03, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.58, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.22, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.99, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.35, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.36, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.22, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.99, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.22, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.99, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 530.0, 530.0, 627.0, 582.0, 579.0, 538.0, 576.0, 582.0, 579.0, 519.0, 582.0, 582.0, 582.0, 630.0, 579.0, 579.0, 576.0, 567.0, 576.0, 465.0, 587.0, 582.0, 587.0, 570.0, 582.0, 579.0, 573.0, 582.0, 584.0, 627.0, 576.0, 582.0, 579.0, 579.0, 576.0, 579.0, 576.0, 567.0, 530.0, 627.0, 582.0, 579.0, 587.0, 579.0, 579.0, 579.0, 579.0, 582.0, 582.0, 576.0, 576.0, 516.0, 579.0, 587.0, 582.0, 570.0, 570.0, 519.0, 573.0, 582.0, 576.0, 579.0, 579.0, 576.0, 530.0, 582.0, 624.0, 521.0, 576.0, 573.0, 539.0, 579.0, 582.0, 536.0, 630.0, 579.0, 582.0, 582.0, 579.0, 576.0, 582.0, 558.0, 579.0, 552.0, 576.0, 579.0, 587.0, 579.0, 579.0, 587.0, 567.0, 582.0, 576.0, 501.0, 504.0, 573.0, 587.0, 587.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 272.0, 258.0, 270.0, 260.0, 313.0, 314.0, 281.0, 301.0, 285.0, 294.0, 263.0, 275.0, 280.0, 296.0, 292.0, 290.0, 294.0, 285.0, 270.0, 249.0, 296.0, 286.0, 286.0, 296.0, 282.0, 300.0, 318.0, 312.0, 295.0, 284.0, 291.0, 288.0, 288.0, 288.0, 291.0, 276.0, 278.0, 298.0, 227.0, 238.0, 308.0, 279.0, 298.0, 284.0, 313.0, 274.0, 284.0, 286.0, 285.0, 297.0, 270.0, 309.0, 282.0, 291.0, 284.0, 298.0, 295.0, 289.0, 313.0, 314.0, 285.0, 291.0, 293.0, 289.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0, 296.0, 283.0, 289.0, 287.0, 288.0, 279.0, 270.0, 260.0, 309.0, 318.0, 298.0, 284.0, 276.0, 303.0, 291.0, 296.0, 298.0, 281.0, 283.0, 296.0, 284.0, 295.0, 277.0, 302.0, 297.0, 285.0, 294.0, 288.0, 293.0, 283.0, 284.0, 292.0, 263.0, 253.0, 290.0, 289.0, 302.0, 285.0, 297.0, 285.0, 291.0, 279.0, 294.0, 276.0, 261.0, 258.0, 282.0, 291.0, 293.0, 289.0, 293.0, 283.0, 280.0, 299.0, 282.0, 297.0, 298.0, 278.0, 269.0, 261.0, 285.0, 297.0, 298.0, 326.0, 267.0, 254.0, 283.0, 293.0, 285.0, 288.0, 275.0, 264.0, 288.0, 291.0, 304.0, 278.0, 267.0, 269.0, 323.0, 307.0, 298.0, 281.0, 303.0, 279.0, 297.0, 285.0, 280.0, 299.0, 277.0, 299.0, 286.0, 296.0, 286.0, 272.0, 308.0, 271.0, 275.0, 277.0, 292.0, 284.0, 276.0, 303.0, 286.0, 301.0, 291.0, 288.0, 301.0, 278.0, 292.0, 295.0, 291.0, 276.0, 293.0, 289.0, 293.0, 283.0, 254.0, 247.0, 246.0, 258.0, 281.0, 292.0, 286.0, 301.0, 295.0, 292.0, 282.0, 297.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2762174298521924, "mean_processing_ms": 0.3232647451131093, "mean_inference_ms": 1.8295321417191508}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4368000, "num_steps_sampled": 2329600, "sample_time_ms": 20991.973, "load_time_ms": 37.186, "grad_time_ms": 9226.22, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002576154889538884, "policy_loss": -0.005821262951940298, "vf_loss": 89.62581634521484, "vf_explained_var": 0.7608991265296936, "kl": 0.002179400995373726, "entropy": 1.1303036212921143, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2329600, "episodes_total": 5824, "training_iteration": 182, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-15-59", "timestamp": 1660252559, "time_this_iter_s": 30.775686264038086, "time_total_s": 10976.216526985168, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10976.216526985168, "timesteps_since_restore": 2329600, "iterations_since_restore": 182, "perf": {"cpu_util_percent": 31.2, "ram_util_percent": 58.5}}
+{"episode_reward_max": 633.0, "episode_reward_min": 465.0, "episode_reward_mean": 575.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 227.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 287.805}, "custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.61, "shaped_reward_min": 141, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.48, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.18, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.42, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.72, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.58, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.31, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.42, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.47, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.39, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.72, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.58, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.72, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.58, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 587.0, 627.0, 579.0, 576.0, 582.0, 576.0, 573.0, 576.0, 633.0, 525.0, 576.0, 579.0, 579.0, 582.0, 579.0, 536.0, 576.0, 627.0, 579.0, 576.0, 582.0, 582.0, 579.0, 573.0, 587.0, 576.0, 576.0, 630.0, 582.0, 582.0, 576.0, 530.0, 582.0, 624.0, 521.0, 576.0, 573.0, 539.0, 579.0, 582.0, 536.0, 630.0, 579.0, 582.0, 582.0, 579.0, 576.0, 582.0, 558.0, 579.0, 552.0, 576.0, 579.0, 587.0, 579.0, 579.0, 587.0, 567.0, 582.0, 576.0, 501.0, 504.0, 573.0, 587.0, 587.0, 579.0, 576.0, 530.0, 530.0, 627.0, 582.0, 579.0, 538.0, 576.0, 582.0, 579.0, 519.0, 582.0, 582.0, 582.0, 630.0, 579.0, 579.0, 576.0, 567.0, 576.0, 465.0, 587.0, 582.0, 587.0, 570.0, 582.0, 579.0, 573.0, 582.0, 584.0, 627.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 299.0, 283.0, 290.0, 297.0, 313.0, 314.0, 292.0, 287.0, 284.0, 292.0, 291.0, 291.0, 291.0, 285.0, 291.0, 282.0, 272.0, 304.0, 311.0, 322.0, 255.0, 270.0, 283.0, 293.0, 289.0, 290.0, 294.0, 285.0, 284.0, 298.0, 297.0, 282.0, 261.0, 275.0, 288.0, 288.0, 305.0, 322.0, 294.0, 285.0, 290.0, 286.0, 290.0, 292.0, 295.0, 287.0, 300.0, 279.0, 293.0, 280.0, 293.0, 294.0, 293.0, 283.0, 296.0, 280.0, 321.0, 309.0, 305.0, 277.0, 288.0, 294.0, 298.0, 278.0, 269.0, 261.0, 285.0, 297.0, 298.0, 326.0, 267.0, 254.0, 283.0, 293.0, 285.0, 288.0, 275.0, 264.0, 288.0, 291.0, 304.0, 278.0, 267.0, 269.0, 323.0, 307.0, 298.0, 281.0, 303.0, 279.0, 297.0, 285.0, 280.0, 299.0, 277.0, 299.0, 286.0, 296.0, 286.0, 272.0, 308.0, 271.0, 275.0, 277.0, 292.0, 284.0, 276.0, 303.0, 286.0, 301.0, 291.0, 288.0, 301.0, 278.0, 292.0, 295.0, 291.0, 276.0, 293.0, 289.0, 293.0, 283.0, 254.0, 247.0, 246.0, 258.0, 281.0, 292.0, 286.0, 301.0, 295.0, 292.0, 282.0, 297.0, 285.0, 291.0, 272.0, 258.0, 270.0, 260.0, 313.0, 314.0, 281.0, 301.0, 285.0, 294.0, 263.0, 275.0, 280.0, 296.0, 292.0, 290.0, 294.0, 285.0, 270.0, 249.0, 296.0, 286.0, 286.0, 296.0, 282.0, 300.0, 318.0, 312.0, 295.0, 284.0, 291.0, 288.0, 288.0, 288.0, 291.0, 276.0, 278.0, 298.0, 227.0, 238.0, 308.0, 279.0, 298.0, 284.0, 313.0, 274.0, 284.0, 286.0, 285.0, 297.0, 270.0, 309.0, 282.0, 291.0, 284.0, 298.0, 295.0, 289.0, 313.0, 314.0, 285.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2716706020385649, "mean_processing_ms": 0.32236021091809197, "mean_inference_ms": 1.8252783373393515}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4392000, "num_steps_sampled": 2342400, "sample_time_ms": 21166.824, "load_time_ms": 37.427, "grad_time_ms": 9604.389, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001810177811421454, "policy_loss": -0.006374426186084747, "vf_loss": 87.48321533203125, "vf_explained_var": 0.7590639591217041, "kl": 0.00198071519844234, "entropy": 1.1274290084838867, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2342400, "episodes_total": 5856, "training_iteration": 183, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-16-33", "timestamp": 1660252593, "time_this_iter_s": 34.10594201087952, "time_total_s": 11010.322468996048, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11010.322468996048, "timesteps_since_restore": 2342400, "iterations_since_restore": 183, "perf": {"cpu_util_percent": 32.16041666666667, "ram_util_percent": 58.54791666666666}}
+{"episode_reward_max": 636.0, "episode_reward_min": 465.0, "episode_reward_mean": 581.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 227.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 290.625}, "custom_metrics": {"sparse_reward_mean": 200.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.65, "shaped_reward_min": 145, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.68, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.48, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.43, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.67, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.45, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.26, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.03, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.72, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.57, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.65, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.54, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 579.0, 630.0, 579.0, 636.0, 582.0, 579.0, 587.0, 582.0, 582.0, 630.0, 576.0, 582.0, 573.0, 630.0, 582.0, 570.0, 576.0, 582.0, 582.0, 582.0, 579.0, 582.0, 576.0, 587.0, 579.0, 582.0, 630.0, 590.0, 579.0, 561.0, 579.0, 573.0, 587.0, 587.0, 579.0, 576.0, 530.0, 530.0, 627.0, 582.0, 579.0, 538.0, 576.0, 582.0, 579.0, 519.0, 582.0, 582.0, 582.0, 630.0, 579.0, 579.0, 576.0, 567.0, 576.0, 465.0, 587.0, 582.0, 587.0, 570.0, 582.0, 579.0, 573.0, 582.0, 584.0, 627.0, 576.0, 582.0, 582.0, 587.0, 627.0, 579.0, 576.0, 582.0, 576.0, 573.0, 576.0, 633.0, 525.0, 576.0, 579.0, 579.0, 582.0, 579.0, 536.0, 576.0, 627.0, 579.0, 576.0, 582.0, 582.0, 579.0, 573.0, 587.0, 576.0, 576.0, 630.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 285.0, 288.0, 291.0, 318.0, 312.0, 289.0, 290.0, 316.0, 320.0, 296.0, 286.0, 290.0, 289.0, 290.0, 297.0, 286.0, 296.0, 293.0, 289.0, 314.0, 316.0, 301.0, 275.0, 293.0, 289.0, 278.0, 295.0, 318.0, 312.0, 286.0, 296.0, 276.0, 294.0, 298.0, 278.0, 295.0, 287.0, 296.0, 286.0, 293.0, 289.0, 292.0, 287.0, 296.0, 286.0, 282.0, 294.0, 307.0, 280.0, 288.0, 291.0, 290.0, 292.0, 316.0, 314.0, 298.0, 292.0, 293.0, 286.0, 278.0, 283.0, 299.0, 280.0, 281.0, 292.0, 286.0, 301.0, 295.0, 292.0, 282.0, 297.0, 285.0, 291.0, 272.0, 258.0, 270.0, 260.0, 313.0, 314.0, 281.0, 301.0, 285.0, 294.0, 263.0, 275.0, 280.0, 296.0, 292.0, 290.0, 294.0, 285.0, 270.0, 249.0, 296.0, 286.0, 286.0, 296.0, 282.0, 300.0, 318.0, 312.0, 295.0, 284.0, 291.0, 288.0, 288.0, 288.0, 291.0, 276.0, 278.0, 298.0, 227.0, 238.0, 308.0, 279.0, 298.0, 284.0, 313.0, 274.0, 284.0, 286.0, 285.0, 297.0, 270.0, 309.0, 282.0, 291.0, 284.0, 298.0, 295.0, 289.0, 313.0, 314.0, 285.0, 291.0, 293.0, 289.0, 299.0, 283.0, 290.0, 297.0, 313.0, 314.0, 292.0, 287.0, 284.0, 292.0, 291.0, 291.0, 291.0, 285.0, 291.0, 282.0, 272.0, 304.0, 311.0, 322.0, 255.0, 270.0, 283.0, 293.0, 289.0, 290.0, 294.0, 285.0, 284.0, 298.0, 297.0, 282.0, 261.0, 275.0, 288.0, 288.0, 305.0, 322.0, 294.0, 285.0, 290.0, 286.0, 290.0, 292.0, 295.0, 287.0, 300.0, 279.0, 293.0, 280.0, 293.0, 294.0, 293.0, 283.0, 296.0, 280.0, 321.0, 309.0, 305.0, 277.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.26716887743788, "mean_processing_ms": 0.3214640615198408, "mean_inference_ms": 1.8211055910807965}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4416000, "num_steps_sampled": 2355200, "sample_time_ms": 21369.68, "load_time_ms": 37.437, "grad_time_ms": 9828.237, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0049454327672719955, "policy_loss": -0.002732283202931285, "vf_loss": 82.44231414794922, "vf_explained_var": 0.771254301071167, "kl": 0.0019334623357281089, "entropy": 1.1330214738845825, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2355200, "episodes_total": 5888, "training_iteration": 184, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-17-05", "timestamp": 1660252625, "time_this_iter_s": 31.787577867507935, "time_total_s": 11042.110046863556, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11042.110046863556, "timesteps_since_restore": 2355200, "iterations_since_restore": 184, "perf": {"cpu_util_percent": 30.99777777777778, "ram_util_percent": 58.44666666666665}}
+{"episode_reward_max": 636.0, "episode_reward_min": 194.0, "episode_reward_mean": 578.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 93.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 289.095}, "custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 178.59, "shaped_reward_min": 74, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.6, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.44, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.57, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.61, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.05, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.55, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.61, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.61, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 584.0, 576.0, 573.0, 579.0, 515.0, 576.0, 582.0, 576.0, 582.0, 587.0, 522.0, 573.0, 582.0, 519.0, 576.0, 582.0, 518.0, 570.0, 633.0, 582.0, 194.0, 582.0, 630.0, 587.0, 576.0, 579.0, 527.0, 576.0, 630.0, 587.0, 582.0, 584.0, 627.0, 576.0, 582.0, 582.0, 587.0, 627.0, 579.0, 576.0, 582.0, 576.0, 573.0, 576.0, 633.0, 525.0, 576.0, 579.0, 579.0, 582.0, 579.0, 536.0, 576.0, 627.0, 579.0, 576.0, 582.0, 582.0, 579.0, 573.0, 587.0, 576.0, 576.0, 630.0, 582.0, 582.0, 573.0, 579.0, 630.0, 579.0, 636.0, 582.0, 579.0, 587.0, 582.0, 582.0, 630.0, 576.0, 582.0, 573.0, 630.0, 582.0, 570.0, 576.0, 582.0, 582.0, 582.0, 579.0, 582.0, 576.0, 587.0, 579.0, 582.0, 630.0, 590.0, 579.0, 561.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [298.0, 281.0, 286.0, 296.0, 283.0, 301.0, 286.0, 290.0, 282.0, 291.0, 292.0, 287.0, 260.0, 255.0, 275.0, 301.0, 294.0, 288.0, 280.0, 296.0, 291.0, 291.0, 288.0, 299.0, 259.0, 263.0, 272.0, 301.0, 303.0, 279.0, 261.0, 258.0, 278.0, 298.0, 286.0, 296.0, 268.0, 250.0, 287.0, 283.0, 313.0, 320.0, 283.0, 299.0, 93.0, 101.0, 287.0, 295.0, 315.0, 315.0, 300.0, 287.0, 279.0, 297.0, 288.0, 291.0, 262.0, 265.0, 277.0, 299.0, 306.0, 324.0, 285.0, 302.0, 284.0, 298.0, 295.0, 289.0, 313.0, 314.0, 285.0, 291.0, 293.0, 289.0, 299.0, 283.0, 290.0, 297.0, 313.0, 314.0, 292.0, 287.0, 284.0, 292.0, 291.0, 291.0, 291.0, 285.0, 291.0, 282.0, 272.0, 304.0, 311.0, 322.0, 255.0, 270.0, 283.0, 293.0, 289.0, 290.0, 294.0, 285.0, 284.0, 298.0, 297.0, 282.0, 261.0, 275.0, 288.0, 288.0, 305.0, 322.0, 294.0, 285.0, 290.0, 286.0, 290.0, 292.0, 295.0, 287.0, 300.0, 279.0, 293.0, 280.0, 293.0, 294.0, 293.0, 283.0, 296.0, 280.0, 321.0, 309.0, 305.0, 277.0, 288.0, 294.0, 288.0, 285.0, 288.0, 291.0, 318.0, 312.0, 289.0, 290.0, 316.0, 320.0, 296.0, 286.0, 290.0, 289.0, 290.0, 297.0, 286.0, 296.0, 293.0, 289.0, 314.0, 316.0, 301.0, 275.0, 293.0, 289.0, 278.0, 295.0, 318.0, 312.0, 286.0, 296.0, 276.0, 294.0, 298.0, 278.0, 295.0, 287.0, 296.0, 286.0, 293.0, 289.0, 292.0, 287.0, 296.0, 286.0, 282.0, 294.0, 307.0, 280.0, 288.0, 291.0, 290.0, 292.0, 316.0, 314.0, 298.0, 292.0, 293.0, 286.0, 278.0, 283.0, 299.0, 280.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2627159762487399, "mean_processing_ms": 0.320576860334056, "mean_inference_ms": 1.8169666521005257}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4440000, "num_steps_sampled": 2368000, "sample_time_ms": 21316.526, "load_time_ms": 37.828, "grad_time_ms": 10200.15, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004557406529784203, "policy_loss": -0.004057899583131075, "vf_loss": 91.82827758789062, "vf_explained_var": 0.7658367156982422, "kl": 0.001969862962141633, "entropy": 1.135046362876892, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2368000, "episodes_total": 5920, "training_iteration": 185, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-17-38", "timestamp": 1660252658, "time_this_iter_s": 32.679043769836426, "time_total_s": 11074.789090633392, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11074.789090633392, "timesteps_since_restore": 2368000, "iterations_since_restore": 185, "perf": {"cpu_util_percent": 33.50425531914893, "ram_util_percent": 58.438297872340435}}
+{"episode_reward_max": 636.0, "episode_reward_min": 194.0, "episode_reward_mean": 576.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 93.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.105}, "custom_metrics": {"sparse_reward_mean": 199.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 177.81, "shaped_reward_min": 74, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.66, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.42, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.81, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.75, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.44, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.45, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.75, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.75, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 627.0, 527.0, 582.0, 624.0, 581.0, 576.0, 576.0, 584.0, 576.0, 587.0, 527.0, 582.0, 576.0, 498.0, 579.0, 582.0, 518.0, 630.0, 582.0, 576.0, 587.0, 582.0, 582.0, 582.0, 627.0, 582.0, 468.0, 627.0, 576.0, 582.0, 582.0, 576.0, 630.0, 582.0, 582.0, 573.0, 579.0, 630.0, 579.0, 636.0, 582.0, 579.0, 587.0, 582.0, 582.0, 630.0, 576.0, 582.0, 573.0, 630.0, 582.0, 570.0, 576.0, 582.0, 582.0, 582.0, 579.0, 582.0, 576.0, 587.0, 579.0, 582.0, 630.0, 590.0, 579.0, 561.0, 579.0, 579.0, 582.0, 584.0, 576.0, 573.0, 579.0, 515.0, 576.0, 582.0, 576.0, 582.0, 587.0, 522.0, 573.0, 582.0, 519.0, 576.0, 582.0, 518.0, 570.0, 633.0, 582.0, 194.0, 582.0, 630.0, 587.0, 576.0, 579.0, 527.0, 576.0, 630.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 284.0, 310.0, 317.0, 277.0, 250.0, 281.0, 301.0, 316.0, 308.0, 289.0, 292.0, 288.0, 288.0, 283.0, 293.0, 286.0, 298.0, 285.0, 291.0, 302.0, 285.0, 270.0, 257.0, 298.0, 284.0, 277.0, 299.0, 255.0, 243.0, 287.0, 292.0, 293.0, 289.0, 250.0, 268.0, 316.0, 314.0, 290.0, 292.0, 305.0, 271.0, 278.0, 309.0, 300.0, 282.0, 294.0, 288.0, 284.0, 298.0, 313.0, 314.0, 297.0, 285.0, 227.0, 241.0, 310.0, 317.0, 289.0, 287.0, 294.0, 288.0, 293.0, 289.0, 296.0, 280.0, 321.0, 309.0, 305.0, 277.0, 288.0, 294.0, 288.0, 285.0, 288.0, 291.0, 318.0, 312.0, 289.0, 290.0, 316.0, 320.0, 296.0, 286.0, 290.0, 289.0, 290.0, 297.0, 286.0, 296.0, 293.0, 289.0, 314.0, 316.0, 301.0, 275.0, 293.0, 289.0, 278.0, 295.0, 318.0, 312.0, 286.0, 296.0, 276.0, 294.0, 298.0, 278.0, 295.0, 287.0, 296.0, 286.0, 293.0, 289.0, 292.0, 287.0, 296.0, 286.0, 282.0, 294.0, 307.0, 280.0, 288.0, 291.0, 290.0, 292.0, 316.0, 314.0, 298.0, 292.0, 293.0, 286.0, 278.0, 283.0, 299.0, 280.0, 298.0, 281.0, 286.0, 296.0, 283.0, 301.0, 286.0, 290.0, 282.0, 291.0, 292.0, 287.0, 260.0, 255.0, 275.0, 301.0, 294.0, 288.0, 280.0, 296.0, 291.0, 291.0, 288.0, 299.0, 259.0, 263.0, 272.0, 301.0, 303.0, 279.0, 261.0, 258.0, 278.0, 298.0, 286.0, 296.0, 268.0, 250.0, 287.0, 283.0, 313.0, 320.0, 283.0, 299.0, 93.0, 101.0, 287.0, 295.0, 315.0, 315.0, 300.0, 287.0, 279.0, 297.0, 288.0, 291.0, 262.0, 265.0, 277.0, 299.0, 306.0, 324.0, 285.0, 302.0]}, "sampler_perf": {"mean_env_wait_ms": 1.258340305403844, "mean_processing_ms": 0.31970797918049665, "mean_inference_ms": 1.8136714986418816}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4464000, "num_steps_sampled": 2380800, "sample_time_ms": 21817.869, "load_time_ms": 37.793, "grad_time_ms": 10598.659, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005312865134328604, "policy_loss": -0.0029478278011083603, "vf_loss": 88.26638793945312, "vf_explained_var": 0.762065589427948, "kl": 0.0017753179417923093, "entropy": 1.1319037675857544, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2380800, "episodes_total": 5952, "training_iteration": 186, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-18-19", "timestamp": 1660252699, "time_this_iter_s": 41.059054136276245, "time_total_s": 11115.848144769669, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11115.848144769669, "timesteps_since_restore": 2380800, "iterations_since_restore": 186, "perf": {"cpu_util_percent": 27.889655172413793, "ram_util_percent": 58.474137931034484}}
+{"episode_reward_max": 633.0, "episode_reward_min": 194.0, "episode_reward_mean": 572.75, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 93.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 286.375}, "custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 176.75, "shaped_reward_min": 74, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.75, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.43, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.81, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.41, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.17, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.27, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.56, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.81, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.41, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.81, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.41, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 582.0, 510.0, 579.0, 587.0, 582.0, 579.0, 627.0, 576.0, 576.0, 530.0, 579.0, 587.0, 582.0, 587.0, 461.0, 576.0, 582.0, 573.0, 630.0, 587.0, 582.0, 582.0, 627.0, 579.0, 573.0, 582.0, 630.0, 579.0, 576.0, 579.0, 576.0, 590.0, 579.0, 561.0, 579.0, 579.0, 582.0, 584.0, 576.0, 573.0, 579.0, 515.0, 576.0, 582.0, 576.0, 582.0, 587.0, 522.0, 573.0, 582.0, 519.0, 576.0, 582.0, 518.0, 570.0, 633.0, 582.0, 194.0, 582.0, 630.0, 587.0, 576.0, 579.0, 527.0, 576.0, 630.0, 587.0, 570.0, 627.0, 527.0, 582.0, 624.0, 581.0, 576.0, 576.0, 584.0, 576.0, 587.0, 527.0, 582.0, 576.0, 498.0, 579.0, 582.0, 518.0, 630.0, 582.0, 576.0, 587.0, 582.0, 582.0, 582.0, 627.0, 582.0, 468.0, 627.0, 576.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 293.0, 287.0, 295.0, 258.0, 252.0, 291.0, 288.0, 296.0, 291.0, 298.0, 284.0, 285.0, 294.0, 315.0, 312.0, 289.0, 287.0, 285.0, 291.0, 270.0, 260.0, 285.0, 294.0, 282.0, 305.0, 297.0, 285.0, 301.0, 286.0, 226.0, 235.0, 292.0, 284.0, 287.0, 295.0, 285.0, 288.0, 308.0, 322.0, 287.0, 300.0, 295.0, 287.0, 292.0, 290.0, 304.0, 323.0, 286.0, 293.0, 289.0, 284.0, 284.0, 298.0, 326.0, 304.0, 297.0, 282.0, 285.0, 291.0, 281.0, 298.0, 285.0, 291.0, 298.0, 292.0, 293.0, 286.0, 278.0, 283.0, 299.0, 280.0, 298.0, 281.0, 286.0, 296.0, 283.0, 301.0, 286.0, 290.0, 282.0, 291.0, 292.0, 287.0, 260.0, 255.0, 275.0, 301.0, 294.0, 288.0, 280.0, 296.0, 291.0, 291.0, 288.0, 299.0, 259.0, 263.0, 272.0, 301.0, 303.0, 279.0, 261.0, 258.0, 278.0, 298.0, 286.0, 296.0, 268.0, 250.0, 287.0, 283.0, 313.0, 320.0, 283.0, 299.0, 93.0, 101.0, 287.0, 295.0, 315.0, 315.0, 300.0, 287.0, 279.0, 297.0, 288.0, 291.0, 262.0, 265.0, 277.0, 299.0, 306.0, 324.0, 285.0, 302.0, 286.0, 284.0, 310.0, 317.0, 277.0, 250.0, 281.0, 301.0, 316.0, 308.0, 289.0, 292.0, 288.0, 288.0, 283.0, 293.0, 286.0, 298.0, 285.0, 291.0, 302.0, 285.0, 270.0, 257.0, 298.0, 284.0, 277.0, 299.0, 255.0, 243.0, 287.0, 292.0, 293.0, 289.0, 250.0, 268.0, 316.0, 314.0, 290.0, 292.0, 305.0, 271.0, 278.0, 309.0, 300.0, 282.0, 294.0, 288.0, 284.0, 298.0, 313.0, 314.0, 297.0, 285.0, 227.0, 241.0, 310.0, 317.0, 289.0, 287.0, 294.0, 288.0, 293.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2540029262486032, "mean_processing_ms": 0.31884364369781465, "mean_inference_ms": 1.8101035219779944}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4488000, "num_steps_sampled": 2393600, "sample_time_ms": 21725.743, "load_time_ms": 37.383, "grad_time_ms": 10590.941, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005064256023615599, "policy_loss": -0.0036211840342730284, "vf_loss": 92.49484252929688, "vf_explained_var": 0.7542417645454407, "kl": 0.001856558839790523, "entropy": 1.1280810832977295, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2393600, "episodes_total": 5984, "training_iteration": 187, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-18-47", "timestamp": 1660252727, "time_this_iter_s": 28.611520051956177, "time_total_s": 11144.459664821625, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11144.459664821625, "timesteps_since_restore": 2393600, "iterations_since_restore": 187, "perf": {"cpu_util_percent": 32.7725, "ram_util_percent": 58.567499999999995}}
+{"episode_reward_max": 636.0, "episode_reward_min": 461.0, "episode_reward_mean": 580.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 337.0}, "policy_reward_mean": {"ppo": 290.15}, "custom_metrics": {"sparse_reward_mean": 200.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.1, "shaped_reward_min": 138, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.13, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.74, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.64, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.08, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.48, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.67, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.08, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.48, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.08, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.48, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 570.0, 582.0, 570.0, 579.0, 576.0, 593.0, 587.0, 582.0, 582.0, 579.0, 576.0, 633.0, 636.0, 636.0, 587.0, 579.0, 582.0, 582.0, 519.0, 582.0, 570.0, 633.0, 579.0, 587.0, 581.0, 579.0, 582.0, 582.0, 630.0, 582.0, 567.0, 527.0, 576.0, 630.0, 587.0, 570.0, 627.0, 527.0, 582.0, 624.0, 581.0, 576.0, 576.0, 584.0, 576.0, 587.0, 527.0, 582.0, 576.0, 498.0, 579.0, 582.0, 518.0, 630.0, 582.0, 576.0, 587.0, 582.0, 582.0, 582.0, 627.0, 582.0, 468.0, 627.0, 576.0, 582.0, 582.0, 576.0, 582.0, 510.0, 579.0, 587.0, 582.0, 579.0, 627.0, 576.0, 576.0, 530.0, 579.0, 587.0, 582.0, 587.0, 461.0, 576.0, 582.0, 573.0, 630.0, 587.0, 582.0, 582.0, 627.0, 579.0, 573.0, 582.0, 630.0, 579.0, 576.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 281.0, 294.0, 276.0, 279.0, 303.0, 289.0, 281.0, 298.0, 281.0, 278.0, 298.0, 295.0, 298.0, 292.0, 295.0, 286.0, 296.0, 297.0, 285.0, 289.0, 290.0, 275.0, 301.0, 325.0, 308.0, 317.0, 319.0, 310.0, 326.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 301.0, 281.0, 259.0, 260.0, 292.0, 290.0, 279.0, 291.0, 296.0, 337.0, 283.0, 296.0, 289.0, 298.0, 295.0, 286.0, 292.0, 287.0, 281.0, 301.0, 306.0, 276.0, 308.0, 322.0, 288.0, 294.0, 290.0, 277.0, 262.0, 265.0, 277.0, 299.0, 306.0, 324.0, 285.0, 302.0, 286.0, 284.0, 310.0, 317.0, 277.0, 250.0, 281.0, 301.0, 316.0, 308.0, 289.0, 292.0, 288.0, 288.0, 283.0, 293.0, 286.0, 298.0, 285.0, 291.0, 302.0, 285.0, 270.0, 257.0, 298.0, 284.0, 277.0, 299.0, 255.0, 243.0, 287.0, 292.0, 293.0, 289.0, 250.0, 268.0, 316.0, 314.0, 290.0, 292.0, 305.0, 271.0, 278.0, 309.0, 300.0, 282.0, 294.0, 288.0, 284.0, 298.0, 313.0, 314.0, 297.0, 285.0, 227.0, 241.0, 310.0, 317.0, 289.0, 287.0, 294.0, 288.0, 293.0, 289.0, 283.0, 293.0, 287.0, 295.0, 258.0, 252.0, 291.0, 288.0, 296.0, 291.0, 298.0, 284.0, 285.0, 294.0, 315.0, 312.0, 289.0, 287.0, 285.0, 291.0, 270.0, 260.0, 285.0, 294.0, 282.0, 305.0, 297.0, 285.0, 301.0, 286.0, 226.0, 235.0, 292.0, 284.0, 287.0, 295.0, 285.0, 288.0, 308.0, 322.0, 287.0, 300.0, 295.0, 287.0, 292.0, 290.0, 304.0, 323.0, 286.0, 293.0, 289.0, 284.0, 284.0, 298.0, 326.0, 304.0, 297.0, 282.0, 285.0, 291.0, 281.0, 298.0, 285.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2497077213230872, "mean_processing_ms": 0.3179872495448108, "mean_inference_ms": 1.8063396156782892}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4512000, "num_steps_sampled": 2406400, "sample_time_ms": 21491.638, "load_time_ms": 36.743, "grad_time_ms": 10320.581, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005896018352359533, "policy_loss": -0.002354246797040105, "vf_loss": 88.0772933959961, "vf_explained_var": 0.767683744430542, "kl": 0.0020883409306406975, "entropy": 1.1149283647537231, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2406400, "episodes_total": 6016, "training_iteration": 188, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-19-13", "timestamp": 1660252753, "time_this_iter_s": 25.91284203529358, "time_total_s": 11170.372506856918, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11170.372506856918, "timesteps_since_restore": 2406400, "iterations_since_restore": 188, "perf": {"cpu_util_percent": 33.778378378378385, "ram_util_percent": 58.56486486486485}}
+{"episode_reward_max": 636.0, "episode_reward_min": 458.0, "episode_reward_mean": 582.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 337.0}, "policy_reward_mean": {"ppo": 291.145}, "custom_metrics": {"sparse_reward_mean": 201.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.89, "shaped_reward_min": 138, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.36, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.03, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.0, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.4, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.32, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.97, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.36, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.4, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.33, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.4, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.32, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.4, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.32, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 458.0, 587.0, 579.0, 516.0, 582.0, 579.0, 590.0, 587.0, 582.0, 630.0, 533.0, 579.0, 630.0, 582.0, 630.0, 576.0, 582.0, 582.0, 587.0, 582.0, 582.0, 576.0, 584.0, 567.0, 582.0, 584.0, 576.0, 564.0, 582.0, 627.0, 633.0, 627.0, 576.0, 582.0, 582.0, 576.0, 582.0, 510.0, 579.0, 587.0, 582.0, 579.0, 627.0, 576.0, 576.0, 530.0, 579.0, 587.0, 582.0, 587.0, 461.0, 576.0, 582.0, 573.0, 630.0, 587.0, 582.0, 582.0, 627.0, 579.0, 573.0, 582.0, 630.0, 579.0, 576.0, 579.0, 576.0, 576.0, 570.0, 582.0, 570.0, 579.0, 576.0, 593.0, 587.0, 582.0, 582.0, 579.0, 576.0, 633.0, 636.0, 636.0, 587.0, 579.0, 582.0, 582.0, 519.0, 582.0, 570.0, 633.0, 579.0, 587.0, 581.0, 579.0, 582.0, 582.0, 630.0, 582.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 226.0, 232.0, 298.0, 289.0, 290.0, 289.0, 249.0, 267.0, 297.0, 285.0, 303.0, 276.0, 301.0, 289.0, 283.0, 304.0, 293.0, 289.0, 319.0, 311.0, 268.0, 265.0, 283.0, 296.0, 322.0, 308.0, 288.0, 294.0, 311.0, 319.0, 286.0, 290.0, 283.0, 299.0, 283.0, 299.0, 295.0, 292.0, 286.0, 296.0, 278.0, 304.0, 289.0, 287.0, 284.0, 300.0, 290.0, 277.0, 283.0, 299.0, 309.0, 275.0, 282.0, 294.0, 285.0, 279.0, 296.0, 286.0, 318.0, 309.0, 319.0, 314.0, 310.0, 317.0, 289.0, 287.0, 294.0, 288.0, 293.0, 289.0, 283.0, 293.0, 287.0, 295.0, 258.0, 252.0, 291.0, 288.0, 296.0, 291.0, 298.0, 284.0, 285.0, 294.0, 315.0, 312.0, 289.0, 287.0, 285.0, 291.0, 270.0, 260.0, 285.0, 294.0, 282.0, 305.0, 297.0, 285.0, 301.0, 286.0, 226.0, 235.0, 292.0, 284.0, 287.0, 295.0, 285.0, 288.0, 308.0, 322.0, 287.0, 300.0, 295.0, 287.0, 292.0, 290.0, 304.0, 323.0, 286.0, 293.0, 289.0, 284.0, 284.0, 298.0, 326.0, 304.0, 297.0, 282.0, 285.0, 291.0, 281.0, 298.0, 285.0, 291.0, 295.0, 281.0, 294.0, 276.0, 279.0, 303.0, 289.0, 281.0, 298.0, 281.0, 278.0, 298.0, 295.0, 298.0, 292.0, 295.0, 286.0, 296.0, 297.0, 285.0, 289.0, 290.0, 275.0, 301.0, 325.0, 308.0, 317.0, 319.0, 310.0, 326.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 301.0, 281.0, 259.0, 260.0, 292.0, 290.0, 279.0, 291.0, 296.0, 337.0, 283.0, 296.0, 289.0, 298.0, 295.0, 286.0, 292.0, 287.0, 281.0, 301.0, 306.0, 276.0, 308.0, 322.0, 288.0, 294.0, 290.0, 277.0]}, "sampler_perf": {"mean_env_wait_ms": 1.245424462248642, "mean_processing_ms": 0.3171301897785842, "mean_inference_ms": 1.801636761317335}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4536000, "num_steps_sampled": 2419200, "sample_time_ms": 21345.619, "load_time_ms": 36.86, "grad_time_ms": 10234.708, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0035058397334069014, "policy_loss": -0.0047208876349031925, "vf_loss": 87.84651947021484, "vf_explained_var": 0.7590529918670654, "kl": 0.0018027568003162742, "entropy": 1.1158560514450073, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2419200, "episodes_total": 6048, "training_iteration": 189, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-19-43", "timestamp": 1660252783, "time_this_iter_s": 29.704707860946655, "time_total_s": 11200.077214717865, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11200.077214717865, "timesteps_since_restore": 2419200, "iterations_since_restore": 189, "perf": {"cpu_util_percent": 33.13571428571428, "ram_util_percent": 58.52142857142859}}
+{"episode_reward_max": 636.0, "episode_reward_min": 458.0, "episode_reward_mean": 581.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 337.0}, "policy_reward_mean": {"ppo": 290.745}, "custom_metrics": {"sparse_reward_mean": 200.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.89, "shaped_reward_min": 138, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.59, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.76, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.06, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.65, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.04, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.93, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.87, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.31, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.65, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.04, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.65, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.04, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 582.0, 582.0, 582.0, 582.0, 579.0, 582.0, 521.0, 582.0, 579.0, 582.0, 522.0, 582.0, 527.0, 576.0, 522.0, 582.0, 582.0, 627.0, 576.0, 587.0, 627.0, 582.0, 579.0, 587.0, 573.0, 584.0, 630.0, 576.0, 579.0, 579.0, 576.0, 579.0, 576.0, 576.0, 570.0, 582.0, 570.0, 579.0, 576.0, 593.0, 587.0, 582.0, 582.0, 579.0, 576.0, 633.0, 636.0, 636.0, 587.0, 579.0, 582.0, 582.0, 519.0, 582.0, 570.0, 633.0, 579.0, 587.0, 581.0, 579.0, 582.0, 582.0, 630.0, 582.0, 567.0, 579.0, 458.0, 587.0, 579.0, 516.0, 582.0, 579.0, 590.0, 587.0, 582.0, 630.0, 533.0, 579.0, 630.0, 582.0, 630.0, 576.0, 582.0, 582.0, 587.0, 582.0, 582.0, 576.0, 584.0, 567.0, 582.0, 584.0, 576.0, 564.0, 582.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 289.0, 288.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 288.0, 286.0, 296.0, 297.0, 285.0, 282.0, 297.0, 293.0, 289.0, 270.0, 251.0, 295.0, 287.0, 288.0, 291.0, 285.0, 297.0, 263.0, 259.0, 285.0, 297.0, 275.0, 252.0, 290.0, 286.0, 279.0, 243.0, 294.0, 288.0, 290.0, 292.0, 311.0, 316.0, 289.0, 287.0, 295.0, 292.0, 314.0, 313.0, 291.0, 291.0, 288.0, 291.0, 304.0, 283.0, 292.0, 281.0, 290.0, 294.0, 320.0, 310.0, 286.0, 290.0, 283.0, 296.0, 297.0, 282.0, 285.0, 291.0, 281.0, 298.0, 285.0, 291.0, 295.0, 281.0, 294.0, 276.0, 279.0, 303.0, 289.0, 281.0, 298.0, 281.0, 278.0, 298.0, 295.0, 298.0, 292.0, 295.0, 286.0, 296.0, 297.0, 285.0, 289.0, 290.0, 275.0, 301.0, 325.0, 308.0, 317.0, 319.0, 310.0, 326.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 301.0, 281.0, 259.0, 260.0, 292.0, 290.0, 279.0, 291.0, 296.0, 337.0, 283.0, 296.0, 289.0, 298.0, 295.0, 286.0, 292.0, 287.0, 281.0, 301.0, 306.0, 276.0, 308.0, 322.0, 288.0, 294.0, 290.0, 277.0, 287.0, 292.0, 226.0, 232.0, 298.0, 289.0, 290.0, 289.0, 249.0, 267.0, 297.0, 285.0, 303.0, 276.0, 301.0, 289.0, 283.0, 304.0, 293.0, 289.0, 319.0, 311.0, 268.0, 265.0, 283.0, 296.0, 322.0, 308.0, 288.0, 294.0, 311.0, 319.0, 286.0, 290.0, 283.0, 299.0, 283.0, 299.0, 295.0, 292.0, 286.0, 296.0, 278.0, 304.0, 289.0, 287.0, 284.0, 300.0, 290.0, 277.0, 283.0, 299.0, 309.0, 275.0, 282.0, 294.0, 285.0, 279.0, 296.0, 286.0, 318.0, 309.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2411902050042325, "mean_processing_ms": 0.3162859757027142, "mean_inference_ms": 1.7969666432132458}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4560000, "num_steps_sampled": 2432000, "sample_time_ms": 21237.531, "load_time_ms": 36.84, "grad_time_ms": 10068.85, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006991778966039419, "policy_loss": -0.0012481998419389129, "vf_loss": 87.9997329711914, "vf_explained_var": 0.7513763904571533, "kl": 0.0021018313709646463, "entropy": 1.119996428489685, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2432000, "episodes_total": 6080, "training_iteration": 190, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-20-11", "timestamp": 1660252811, "time_this_iter_s": 28.08810520172119, "time_total_s": 11228.165319919586, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11228.165319919586, "timesteps_since_restore": 2432000, "iterations_since_restore": 190, "perf": {"cpu_util_percent": 33.77, "ram_util_percent": 58.345000000000006}}
+{"episode_reward_max": 636.0, "episode_reward_min": 441.0, "episode_reward_mean": 580.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 290.06}, "custom_metrics": {"sparse_reward_mean": 200.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 178.92, "shaped_reward_min": 121, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.13, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.02, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.84, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.29, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.37, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.17, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.21, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.33, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.37, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.17, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.37, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.17, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 441.0, 582.0, 636.0, 582.0, 576.0, 582.0, 579.0, 621.0, 567.0, 579.0, 576.0, 567.0, 579.0, 582.0, 587.0, 573.0, 522.0, 581.0, 633.0, 627.0, 579.0, 587.0, 573.0, 579.0, 627.0, 587.0, 579.0, 582.0, 627.0, 522.0, 582.0, 630.0, 582.0, 567.0, 579.0, 458.0, 587.0, 579.0, 516.0, 582.0, 579.0, 590.0, 587.0, 582.0, 630.0, 533.0, 579.0, 630.0, 582.0, 630.0, 576.0, 582.0, 582.0, 587.0, 582.0, 582.0, 576.0, 584.0, 567.0, 582.0, 584.0, 576.0, 564.0, 582.0, 627.0, 633.0, 579.0, 579.0, 579.0, 582.0, 582.0, 582.0, 582.0, 579.0, 582.0, 521.0, 582.0, 579.0, 582.0, 522.0, 582.0, 527.0, 576.0, 522.0, 582.0, 582.0, 627.0, 576.0, 587.0, 627.0, 582.0, 579.0, 587.0, 573.0, 584.0, 630.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 287.0, 295.0, 284.0, 220.0, 221.0, 286.0, 296.0, 320.0, 316.0, 294.0, 288.0, 298.0, 278.0, 293.0, 289.0, 288.0, 291.0, 319.0, 302.0, 292.0, 275.0, 296.0, 283.0, 289.0, 287.0, 298.0, 269.0, 289.0, 290.0, 288.0, 294.0, 287.0, 300.0, 290.0, 283.0, 256.0, 266.0, 299.0, 282.0, 321.0, 312.0, 311.0, 316.0, 296.0, 283.0, 303.0, 284.0, 282.0, 291.0, 286.0, 293.0, 321.0, 306.0, 295.0, 292.0, 285.0, 294.0, 285.0, 297.0, 303.0, 324.0, 261.0, 261.0, 306.0, 276.0, 308.0, 322.0, 288.0, 294.0, 290.0, 277.0, 287.0, 292.0, 226.0, 232.0, 298.0, 289.0, 290.0, 289.0, 249.0, 267.0, 297.0, 285.0, 303.0, 276.0, 301.0, 289.0, 283.0, 304.0, 293.0, 289.0, 319.0, 311.0, 268.0, 265.0, 283.0, 296.0, 322.0, 308.0, 288.0, 294.0, 311.0, 319.0, 286.0, 290.0, 283.0, 299.0, 283.0, 299.0, 295.0, 292.0, 286.0, 296.0, 278.0, 304.0, 289.0, 287.0, 284.0, 300.0, 290.0, 277.0, 283.0, 299.0, 309.0, 275.0, 282.0, 294.0, 285.0, 279.0, 296.0, 286.0, 318.0, 309.0, 319.0, 314.0, 290.0, 289.0, 288.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 288.0, 286.0, 296.0, 297.0, 285.0, 282.0, 297.0, 293.0, 289.0, 270.0, 251.0, 295.0, 287.0, 288.0, 291.0, 285.0, 297.0, 263.0, 259.0, 285.0, 297.0, 275.0, 252.0, 290.0, 286.0, 279.0, 243.0, 294.0, 288.0, 290.0, 292.0, 311.0, 316.0, 289.0, 287.0, 295.0, 292.0, 314.0, 313.0, 291.0, 291.0, 288.0, 291.0, 304.0, 283.0, 292.0, 281.0, 290.0, 294.0, 320.0, 310.0, 286.0, 290.0, 283.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.237012566934364, "mean_processing_ms": 0.3154539706719903, "mean_inference_ms": 1.7926176479402052}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4584000, "num_steps_sampled": 2444800, "sample_time_ms": 21212.772, "load_time_ms": 37.115, "grad_time_ms": 9943.576, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002480272436514497, "policy_loss": -0.005884131882339716, "vf_loss": 89.30957794189453, "vf_explained_var": 0.7648332118988037, "kl": 0.0016885297372937202, "entropy": 1.1330945491790771, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2444800, "episodes_total": 6112, "training_iteration": 191, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-20-41", "timestamp": 1660252841, "time_this_iter_s": 29.47701120376587, "time_total_s": 11257.642331123352, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11257.642331123352, "timesteps_since_restore": 2444800, "iterations_since_restore": 191, "perf": {"cpu_util_percent": 32.38536585365854, "ram_util_percent": 58.368292682926814}}
+{"episode_reward_max": 636.0, "episode_reward_min": 441.0, "episode_reward_mean": 583.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 291.615}, "custom_metrics": {"sparse_reward_mean": 201.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.63, "shaped_reward_min": 121, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.94, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.16, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.69, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.33, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.3, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.67, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.46, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.33, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.3, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.33, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.3, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 587.0, 582.0, 630.0, 584.0, 579.0, 587.0, 584.0, 630.0, 522.0, 573.0, 630.0, 582.0, 570.0, 627.0, 627.0, 573.0, 579.0, 582.0, 582.0, 582.0, 633.0, 582.0, 576.0, 630.0, 582.0, 573.0, 579.0, 544.0, 579.0, 627.0, 564.0, 582.0, 627.0, 633.0, 579.0, 579.0, 579.0, 582.0, 582.0, 582.0, 582.0, 579.0, 582.0, 521.0, 582.0, 579.0, 582.0, 522.0, 582.0, 527.0, 576.0, 522.0, 582.0, 582.0, 627.0, 576.0, 587.0, 627.0, 582.0, 579.0, 587.0, 573.0, 584.0, 630.0, 576.0, 579.0, 579.0, 579.0, 441.0, 582.0, 636.0, 582.0, 576.0, 582.0, 579.0, 621.0, 567.0, 579.0, 576.0, 567.0, 579.0, 582.0, 587.0, 573.0, 522.0, 581.0, 633.0, 627.0, 579.0, 587.0, 573.0, 579.0, 627.0, 587.0, 579.0, 582.0, 627.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 288.0, 291.0, 289.0, 298.0, 296.0, 286.0, 311.0, 319.0, 301.0, 283.0, 298.0, 281.0, 295.0, 292.0, 295.0, 289.0, 311.0, 319.0, 260.0, 262.0, 289.0, 284.0, 331.0, 299.0, 296.0, 286.0, 288.0, 282.0, 328.0, 299.0, 315.0, 312.0, 289.0, 284.0, 290.0, 289.0, 284.0, 298.0, 300.0, 282.0, 299.0, 283.0, 318.0, 315.0, 291.0, 291.0, 285.0, 291.0, 317.0, 313.0, 281.0, 301.0, 290.0, 283.0, 290.0, 289.0, 269.0, 275.0, 291.0, 288.0, 316.0, 311.0, 285.0, 279.0, 296.0, 286.0, 318.0, 309.0, 319.0, 314.0, 290.0, 289.0, 288.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 288.0, 286.0, 296.0, 297.0, 285.0, 282.0, 297.0, 293.0, 289.0, 270.0, 251.0, 295.0, 287.0, 288.0, 291.0, 285.0, 297.0, 263.0, 259.0, 285.0, 297.0, 275.0, 252.0, 290.0, 286.0, 279.0, 243.0, 294.0, 288.0, 290.0, 292.0, 311.0, 316.0, 289.0, 287.0, 295.0, 292.0, 314.0, 313.0, 291.0, 291.0, 288.0, 291.0, 304.0, 283.0, 292.0, 281.0, 290.0, 294.0, 320.0, 310.0, 286.0, 290.0, 283.0, 296.0, 292.0, 287.0, 295.0, 284.0, 220.0, 221.0, 286.0, 296.0, 320.0, 316.0, 294.0, 288.0, 298.0, 278.0, 293.0, 289.0, 288.0, 291.0, 319.0, 302.0, 292.0, 275.0, 296.0, 283.0, 289.0, 287.0, 298.0, 269.0, 289.0, 290.0, 288.0, 294.0, 287.0, 300.0, 290.0, 283.0, 256.0, 266.0, 299.0, 282.0, 321.0, 312.0, 311.0, 316.0, 296.0, 283.0, 303.0, 284.0, 282.0, 291.0, 286.0, 293.0, 321.0, 306.0, 295.0, 292.0, 285.0, 294.0, 285.0, 297.0, 303.0, 324.0, 261.0, 261.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2328827841512373, "mean_processing_ms": 0.3146313961274523, "mean_inference_ms": 1.788385259276164}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4608000, "num_steps_sampled": 2457600, "sample_time_ms": 21198.373, "load_time_ms": 37.126, "grad_time_ms": 9801.163, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001665265765041113, "policy_loss": -0.006540585309267044, "vf_loss": 87.64542388916016, "vf_explained_var": 0.7604849338531494, "kl": 0.0022042018827050924, "entropy": 1.1173783540725708, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2457600, "episodes_total": 6144, "training_iteration": 192, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-21-10", "timestamp": 1660252870, "time_this_iter_s": 29.205125331878662, "time_total_s": 11286.84745645523, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11286.84745645523, "timesteps_since_restore": 2457600, "iterations_since_restore": 192, "perf": {"cpu_util_percent": 31.859523809523814, "ram_util_percent": 58.37619047619048}}
+{"episode_reward_max": 636.0, "episode_reward_min": 441.0, "episode_reward_mean": 586.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 293.055}, "custom_metrics": {"sparse_reward_mean": 202.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 180.51, "shaped_reward_min": 121, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.32, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.17, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.98, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.5, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.5, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.55, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.24, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.48, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.55, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.24, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.55, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.24, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 587.0, 582.0, 636.0, 579.0, 570.0, 513.0, 579.0, 582.0, 587.0, 579.0, 582.0, 564.0, 633.0, 587.0, 630.0, 627.0, 579.0, 636.0, 582.0, 587.0, 584.0, 582.0, 582.0, 627.0, 582.0, 579.0, 579.0, 579.0, 587.0, 579.0, 567.0, 584.0, 630.0, 576.0, 579.0, 579.0, 579.0, 441.0, 582.0, 636.0, 582.0, 576.0, 582.0, 579.0, 621.0, 567.0, 579.0, 576.0, 567.0, 579.0, 582.0, 587.0, 573.0, 522.0, 581.0, 633.0, 627.0, 579.0, 587.0, 573.0, 579.0, 627.0, 587.0, 579.0, 582.0, 627.0, 522.0, 579.0, 579.0, 587.0, 582.0, 630.0, 584.0, 579.0, 587.0, 584.0, 630.0, 522.0, 573.0, 630.0, 582.0, 570.0, 627.0, 627.0, 573.0, 579.0, 582.0, 582.0, 582.0, 633.0, 582.0, 576.0, 630.0, 582.0, 573.0, 579.0, 544.0, 579.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 296.0, 292.0, 295.0, 274.0, 308.0, 312.0, 324.0, 304.0, 275.0, 287.0, 283.0, 261.0, 252.0, 277.0, 302.0, 298.0, 284.0, 291.0, 296.0, 278.0, 301.0, 285.0, 297.0, 285.0, 279.0, 323.0, 310.0, 278.0, 309.0, 308.0, 322.0, 311.0, 316.0, 288.0, 291.0, 314.0, 322.0, 286.0, 296.0, 294.0, 293.0, 286.0, 298.0, 293.0, 289.0, 294.0, 288.0, 312.0, 315.0, 287.0, 295.0, 295.0, 284.0, 293.0, 286.0, 296.0, 283.0, 298.0, 289.0, 284.0, 295.0, 288.0, 279.0, 290.0, 294.0, 320.0, 310.0, 286.0, 290.0, 283.0, 296.0, 292.0, 287.0, 295.0, 284.0, 220.0, 221.0, 286.0, 296.0, 320.0, 316.0, 294.0, 288.0, 298.0, 278.0, 293.0, 289.0, 288.0, 291.0, 319.0, 302.0, 292.0, 275.0, 296.0, 283.0, 289.0, 287.0, 298.0, 269.0, 289.0, 290.0, 288.0, 294.0, 287.0, 300.0, 290.0, 283.0, 256.0, 266.0, 299.0, 282.0, 321.0, 312.0, 311.0, 316.0, 296.0, 283.0, 303.0, 284.0, 282.0, 291.0, 286.0, 293.0, 321.0, 306.0, 295.0, 292.0, 285.0, 294.0, 285.0, 297.0, 303.0, 324.0, 261.0, 261.0, 291.0, 288.0, 288.0, 291.0, 289.0, 298.0, 296.0, 286.0, 311.0, 319.0, 301.0, 283.0, 298.0, 281.0, 295.0, 292.0, 295.0, 289.0, 311.0, 319.0, 260.0, 262.0, 289.0, 284.0, 331.0, 299.0, 296.0, 286.0, 288.0, 282.0, 328.0, 299.0, 315.0, 312.0, 289.0, 284.0, 290.0, 289.0, 284.0, 298.0, 300.0, 282.0, 299.0, 283.0, 318.0, 315.0, 291.0, 291.0, 285.0, 291.0, 317.0, 313.0, 281.0, 301.0, 290.0, 283.0, 290.0, 289.0, 269.0, 275.0, 291.0, 288.0, 316.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2287946124429, "mean_processing_ms": 0.3138148366539807, "mean_inference_ms": 1.7841696712783897}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4632000, "num_steps_sampled": 2470400, "sample_time_ms": 20938.204, "load_time_ms": 37.066, "grad_time_ms": 9514.831, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005852494388818741, "policy_loss": -0.0018877206603065133, "vf_loss": 83.014892578125, "vf_explained_var": 0.7724275588989258, "kl": 0.0019637763034552336, "entropy": 1.1225537061691284, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2470400, "episodes_total": 6176, "training_iteration": 193, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-21-39", "timestamp": 1660252899, "time_this_iter_s": 28.640799045562744, "time_total_s": 11315.488255500793, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11315.488255500793, "timesteps_since_restore": 2470400, "iterations_since_restore": 193, "perf": {"cpu_util_percent": 32.46, "ram_util_percent": 58.379999999999995}}
+{"episode_reward_max": 636.0, "episode_reward_min": 513.0, "episode_reward_mean": 587.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 252.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 293.645}, "custom_metrics": {"sparse_reward_mean": 202.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.69, "shaped_reward_min": 153, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.56, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.21, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.44, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.71, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.3, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.71, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.3, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.71, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.3, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 576.0, 582.0, 579.0, 636.0, 576.0, 582.0, 590.0, 584.0, 582.0, 573.0, 582.0, 525.0, 573.0, 587.0, 624.0, 576.0, 579.0, 582.0, 636.0, 582.0, 579.0, 581.0, 576.0, 587.0, 582.0, 582.0, 582.0, 582.0, 590.0, 582.0, 587.0, 579.0, 582.0, 627.0, 522.0, 579.0, 579.0, 587.0, 582.0, 630.0, 584.0, 579.0, 587.0, 584.0, 630.0, 522.0, 573.0, 630.0, 582.0, 570.0, 627.0, 627.0, 573.0, 579.0, 582.0, 582.0, 582.0, 633.0, 582.0, 576.0, 630.0, 582.0, 573.0, 579.0, 544.0, 579.0, 627.0, 587.0, 587.0, 582.0, 636.0, 579.0, 570.0, 513.0, 579.0, 582.0, 587.0, 579.0, 582.0, 564.0, 633.0, 587.0, 630.0, 627.0, 579.0, 636.0, 582.0, 587.0, 584.0, 582.0, 582.0, 627.0, 582.0, 579.0, 579.0, 579.0, 587.0, 579.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 311.0, 295.0, 281.0, 288.0, 294.0, 303.0, 276.0, 317.0, 319.0, 288.0, 288.0, 290.0, 292.0, 291.0, 299.0, 292.0, 292.0, 290.0, 292.0, 284.0, 289.0, 293.0, 289.0, 260.0, 265.0, 272.0, 301.0, 286.0, 301.0, 314.0, 310.0, 280.0, 296.0, 275.0, 304.0, 281.0, 301.0, 316.0, 320.0, 292.0, 290.0, 300.0, 279.0, 297.0, 284.0, 285.0, 291.0, 294.0, 293.0, 291.0, 291.0, 293.0, 289.0, 287.0, 295.0, 292.0, 290.0, 301.0, 289.0, 286.0, 296.0, 294.0, 293.0, 285.0, 294.0, 285.0, 297.0, 303.0, 324.0, 261.0, 261.0, 291.0, 288.0, 288.0, 291.0, 289.0, 298.0, 296.0, 286.0, 311.0, 319.0, 301.0, 283.0, 298.0, 281.0, 295.0, 292.0, 295.0, 289.0, 311.0, 319.0, 260.0, 262.0, 289.0, 284.0, 331.0, 299.0, 296.0, 286.0, 288.0, 282.0, 328.0, 299.0, 315.0, 312.0, 289.0, 284.0, 290.0, 289.0, 284.0, 298.0, 300.0, 282.0, 299.0, 283.0, 318.0, 315.0, 291.0, 291.0, 285.0, 291.0, 317.0, 313.0, 281.0, 301.0, 290.0, 283.0, 290.0, 289.0, 269.0, 275.0, 291.0, 288.0, 316.0, 311.0, 291.0, 296.0, 292.0, 295.0, 274.0, 308.0, 312.0, 324.0, 304.0, 275.0, 287.0, 283.0, 261.0, 252.0, 277.0, 302.0, 298.0, 284.0, 291.0, 296.0, 278.0, 301.0, 285.0, 297.0, 285.0, 279.0, 323.0, 310.0, 278.0, 309.0, 308.0, 322.0, 311.0, 316.0, 288.0, 291.0, 314.0, 322.0, 286.0, 296.0, 294.0, 293.0, 286.0, 298.0, 293.0, 289.0, 294.0, 288.0, 312.0, 315.0, 287.0, 295.0, 295.0, 284.0, 293.0, 286.0, 296.0, 283.0, 298.0, 289.0, 284.0, 295.0, 288.0, 279.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2247614188456033, "mean_processing_ms": 0.3130098403023273, "mean_inference_ms": 1.7801660461855682}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4656000, "num_steps_sampled": 2483200, "sample_time_ms": 21025.315, "load_time_ms": 37.175, "grad_time_ms": 9483.469, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005179767496883869, "policy_loss": -0.003016052069142461, "vf_loss": 87.5873031616211, "vf_explained_var": 0.7668092250823975, "kl": 0.0019739444833248854, "entropy": 1.125815987586975, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2483200, "episodes_total": 6208, "training_iteration": 194, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-22-11", "timestamp": 1660252931, "time_this_iter_s": 32.347792863845825, "time_total_s": 11347.83604836464, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11347.83604836464, "timesteps_since_restore": 2483200, "iterations_since_restore": 194, "perf": {"cpu_util_percent": 33.310869565217395, "ram_util_percent": 58.36521739130432}}
+{"episode_reward_max": 636.0, "episode_reward_min": 460.0, "episode_reward_mean": 584.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 292.085}, "custom_metrics": {"sparse_reward_mean": 201.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 180.97, "shaped_reward_min": 140, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.83, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.02, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.33, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.24, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.57, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.83, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.18, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.74, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.27, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.83, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.18, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.83, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.18, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 582.0, 579.0, 579.0, 579.0, 536.0, 636.0, 576.0, 579.0, 582.0, 590.0, 587.0, 512.0, 582.0, 576.0, 567.0, 579.0, 579.0, 576.0, 579.0, 582.0, 460.0, 582.0, 633.0, 582.0, 573.0, 630.0, 582.0, 576.0, 587.0, 579.0, 630.0, 579.0, 544.0, 579.0, 627.0, 587.0, 587.0, 582.0, 636.0, 579.0, 570.0, 513.0, 579.0, 582.0, 587.0, 579.0, 582.0, 564.0, 633.0, 587.0, 630.0, 627.0, 579.0, 636.0, 582.0, 587.0, 584.0, 582.0, 582.0, 627.0, 582.0, 579.0, 579.0, 579.0, 587.0, 579.0, 567.0, 633.0, 576.0, 582.0, 579.0, 636.0, 576.0, 582.0, 590.0, 584.0, 582.0, 573.0, 582.0, 525.0, 573.0, 587.0, 624.0, 576.0, 579.0, 582.0, 636.0, 582.0, 579.0, 581.0, 576.0, 587.0, 582.0, 582.0, 582.0, 582.0, 590.0, 582.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 277.0, 290.0, 292.0, 286.0, 293.0, 288.0, 291.0, 285.0, 294.0, 266.0, 270.0, 326.0, 310.0, 290.0, 286.0, 293.0, 286.0, 291.0, 291.0, 303.0, 287.0, 295.0, 292.0, 247.0, 265.0, 283.0, 299.0, 298.0, 278.0, 289.0, 278.0, 283.0, 296.0, 293.0, 286.0, 301.0, 275.0, 287.0, 292.0, 291.0, 291.0, 229.0, 231.0, 281.0, 301.0, 313.0, 320.0, 290.0, 292.0, 281.0, 292.0, 322.0, 308.0, 301.0, 281.0, 288.0, 288.0, 306.0, 281.0, 290.0, 289.0, 322.0, 308.0, 290.0, 289.0, 269.0, 275.0, 291.0, 288.0, 316.0, 311.0, 291.0, 296.0, 292.0, 295.0, 274.0, 308.0, 312.0, 324.0, 304.0, 275.0, 287.0, 283.0, 261.0, 252.0, 277.0, 302.0, 298.0, 284.0, 291.0, 296.0, 278.0, 301.0, 285.0, 297.0, 285.0, 279.0, 323.0, 310.0, 278.0, 309.0, 308.0, 322.0, 311.0, 316.0, 288.0, 291.0, 314.0, 322.0, 286.0, 296.0, 294.0, 293.0, 286.0, 298.0, 293.0, 289.0, 294.0, 288.0, 312.0, 315.0, 287.0, 295.0, 295.0, 284.0, 293.0, 286.0, 296.0, 283.0, 298.0, 289.0, 284.0, 295.0, 288.0, 279.0, 322.0, 311.0, 295.0, 281.0, 288.0, 294.0, 303.0, 276.0, 317.0, 319.0, 288.0, 288.0, 290.0, 292.0, 291.0, 299.0, 292.0, 292.0, 290.0, 292.0, 284.0, 289.0, 293.0, 289.0, 260.0, 265.0, 272.0, 301.0, 286.0, 301.0, 314.0, 310.0, 280.0, 296.0, 275.0, 304.0, 281.0, 301.0, 316.0, 320.0, 292.0, 290.0, 300.0, 279.0, 297.0, 284.0, 285.0, 291.0, 294.0, 293.0, 291.0, 291.0, 293.0, 289.0, 287.0, 295.0, 292.0, 290.0, 301.0, 289.0, 286.0, 296.0, 294.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2207711535428367, "mean_processing_ms": 0.31221340698687855, "mean_inference_ms": 1.7762098630677763}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4680000, "num_steps_sampled": 2496000, "sample_time_ms": 21023.805, "load_time_ms": 36.814, "grad_time_ms": 9071.865, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004175250884145498, "policy_loss": -0.004759882111102343, "vf_loss": 95.0163803100586, "vf_explained_var": 0.7534318566322327, "kl": 0.0021568441297858953, "entropy": 1.1329950094223022, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2496000, "episodes_total": 6240, "training_iteration": 195, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-22-40", "timestamp": 1660252960, "time_this_iter_s": 28.54381275177002, "time_total_s": 11376.37986111641, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11376.37986111641, "timesteps_since_restore": 2496000, "iterations_since_restore": 195, "perf": {"cpu_util_percent": 32.82000000000001, "ram_util_percent": 58.44}}
+{"episode_reward_max": 636.0, "episode_reward_min": 460.0, "episode_reward_mean": 583.03, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 291.515}, "custom_metrics": {"sparse_reward_mean": 201.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 180.23, "shaped_reward_min": 140, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.29, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.47, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.81, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.61, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.38, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.52, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.17, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.41, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.42, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.38, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.52, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.38, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.52, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 519.0, 510.0, 584.0, 587.0, 630.0, 587.0, 582.0, 587.0, 579.0, 573.0, 579.0, 633.0, 579.0, 587.0, 633.0, 579.0, 582.0, 579.0, 570.0, 630.0, 630.0, 579.0, 573.0, 582.0, 573.0, 579.0, 567.0, 582.0, 573.0, 630.0, 582.0, 579.0, 587.0, 579.0, 567.0, 633.0, 576.0, 582.0, 579.0, 636.0, 576.0, 582.0, 590.0, 584.0, 582.0, 573.0, 582.0, 525.0, 573.0, 587.0, 624.0, 576.0, 579.0, 582.0, 636.0, 582.0, 579.0, 581.0, 576.0, 587.0, 582.0, 582.0, 582.0, 582.0, 590.0, 582.0, 587.0, 573.0, 582.0, 579.0, 579.0, 579.0, 536.0, 636.0, 576.0, 579.0, 582.0, 590.0, 587.0, 512.0, 582.0, 576.0, 567.0, 579.0, 579.0, 576.0, 579.0, 582.0, 460.0, 582.0, 633.0, 582.0, 573.0, 630.0, 582.0, 576.0, 587.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [298.0, 281.0, 257.0, 262.0, 242.0, 268.0, 289.0, 295.0, 298.0, 289.0, 313.0, 317.0, 285.0, 302.0, 286.0, 296.0, 285.0, 302.0, 283.0, 296.0, 289.0, 284.0, 296.0, 283.0, 314.0, 319.0, 285.0, 294.0, 293.0, 294.0, 306.0, 327.0, 287.0, 292.0, 298.0, 284.0, 298.0, 281.0, 281.0, 289.0, 316.0, 314.0, 311.0, 319.0, 293.0, 286.0, 282.0, 291.0, 292.0, 290.0, 288.0, 285.0, 285.0, 294.0, 274.0, 293.0, 293.0, 289.0, 277.0, 296.0, 321.0, 309.0, 288.0, 294.0, 296.0, 283.0, 298.0, 289.0, 284.0, 295.0, 288.0, 279.0, 322.0, 311.0, 295.0, 281.0, 288.0, 294.0, 303.0, 276.0, 317.0, 319.0, 288.0, 288.0, 290.0, 292.0, 291.0, 299.0, 292.0, 292.0, 290.0, 292.0, 284.0, 289.0, 293.0, 289.0, 260.0, 265.0, 272.0, 301.0, 286.0, 301.0, 314.0, 310.0, 280.0, 296.0, 275.0, 304.0, 281.0, 301.0, 316.0, 320.0, 292.0, 290.0, 300.0, 279.0, 297.0, 284.0, 285.0, 291.0, 294.0, 293.0, 291.0, 291.0, 293.0, 289.0, 287.0, 295.0, 292.0, 290.0, 301.0, 289.0, 286.0, 296.0, 294.0, 293.0, 296.0, 277.0, 290.0, 292.0, 286.0, 293.0, 288.0, 291.0, 285.0, 294.0, 266.0, 270.0, 326.0, 310.0, 290.0, 286.0, 293.0, 286.0, 291.0, 291.0, 303.0, 287.0, 295.0, 292.0, 247.0, 265.0, 283.0, 299.0, 298.0, 278.0, 289.0, 278.0, 283.0, 296.0, 293.0, 286.0, 301.0, 275.0, 287.0, 292.0, 291.0, 291.0, 229.0, 231.0, 281.0, 301.0, 313.0, 320.0, 290.0, 292.0, 281.0, 292.0, 322.0, 308.0, 301.0, 281.0, 288.0, 288.0, 306.0, 281.0, 290.0, 289.0, 322.0, 308.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2168243946409252, "mean_processing_ms": 0.31142476273286085, "mean_inference_ms": 1.7722672001307933}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4704000, "num_steps_sampled": 2508800, "sample_time_ms": 20149.8, "load_time_ms": 36.819, "grad_time_ms": 8683.766, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005854760762304068, "policy_loss": -0.0029166024178266525, "vf_loss": 93.378173828125, "vf_explained_var": 0.7535201907157898, "kl": 0.00207762373611331, "entropy": 1.1329069137573242, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2508800, "episodes_total": 6272, "training_iteration": 196, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-23-08", "timestamp": 1660252988, "time_this_iter_s": 28.434014320373535, "time_total_s": 11404.813875436783, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11404.813875436783, "timesteps_since_restore": 2508800, "iterations_since_restore": 196, "perf": {"cpu_util_percent": 34.197500000000005, "ram_util_percent": 58.575}}
+{"episode_reward_max": 636.0, "episode_reward_min": 460.0, "episode_reward_mean": 581.72, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 290.86}, "custom_metrics": {"sparse_reward_mean": 201.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.72, "shaped_reward_min": 140, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.38, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.2, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.95, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.32, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.52, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.23, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.05, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.87, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.52, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.23, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.52, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.23, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 587.0, 587.0, 570.0, 579.0, 564.0, 582.0, 627.0, 539.0, 582.0, 630.0, 582.0, 579.0, 587.0, 573.0, 579.0, 579.0, 633.0, 582.0, 584.0, 582.0, 582.0, 582.0, 587.0, 579.0, 582.0, 579.0, 627.0, 576.0, 582.0, 516.0, 503.0, 582.0, 590.0, 582.0, 587.0, 573.0, 582.0, 579.0, 579.0, 579.0, 536.0, 636.0, 576.0, 579.0, 582.0, 590.0, 587.0, 512.0, 582.0, 576.0, 567.0, 579.0, 579.0, 576.0, 579.0, 582.0, 460.0, 582.0, 633.0, 582.0, 573.0, 630.0, 582.0, 576.0, 587.0, 579.0, 630.0, 579.0, 519.0, 510.0, 584.0, 587.0, 630.0, 587.0, 582.0, 587.0, 579.0, 573.0, 579.0, 633.0, 579.0, 587.0, 633.0, 579.0, 582.0, 579.0, 570.0, 630.0, 630.0, 579.0, 573.0, 582.0, 573.0, 579.0, 567.0, 582.0, 573.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 291.0, 297.0, 290.0, 287.0, 300.0, 289.0, 281.0, 290.0, 289.0, 273.0, 291.0, 291.0, 291.0, 327.0, 300.0, 272.0, 267.0, 283.0, 299.0, 319.0, 311.0, 292.0, 290.0, 289.0, 290.0, 294.0, 293.0, 288.0, 285.0, 290.0, 289.0, 276.0, 303.0, 310.0, 323.0, 286.0, 296.0, 299.0, 285.0, 295.0, 287.0, 281.0, 301.0, 288.0, 294.0, 289.0, 298.0, 293.0, 286.0, 288.0, 294.0, 281.0, 298.0, 321.0, 306.0, 296.0, 280.0, 293.0, 289.0, 250.0, 266.0, 256.0, 247.0, 292.0, 290.0, 301.0, 289.0, 286.0, 296.0, 294.0, 293.0, 296.0, 277.0, 290.0, 292.0, 286.0, 293.0, 288.0, 291.0, 285.0, 294.0, 266.0, 270.0, 326.0, 310.0, 290.0, 286.0, 293.0, 286.0, 291.0, 291.0, 303.0, 287.0, 295.0, 292.0, 247.0, 265.0, 283.0, 299.0, 298.0, 278.0, 289.0, 278.0, 283.0, 296.0, 293.0, 286.0, 301.0, 275.0, 287.0, 292.0, 291.0, 291.0, 229.0, 231.0, 281.0, 301.0, 313.0, 320.0, 290.0, 292.0, 281.0, 292.0, 322.0, 308.0, 301.0, 281.0, 288.0, 288.0, 306.0, 281.0, 290.0, 289.0, 322.0, 308.0, 298.0, 281.0, 257.0, 262.0, 242.0, 268.0, 289.0, 295.0, 298.0, 289.0, 313.0, 317.0, 285.0, 302.0, 286.0, 296.0, 285.0, 302.0, 283.0, 296.0, 289.0, 284.0, 296.0, 283.0, 314.0, 319.0, 285.0, 294.0, 293.0, 294.0, 306.0, 327.0, 287.0, 292.0, 298.0, 284.0, 298.0, 281.0, 281.0, 289.0, 316.0, 314.0, 311.0, 319.0, 293.0, 286.0, 282.0, 291.0, 292.0, 290.0, 288.0, 285.0, 285.0, 294.0, 274.0, 293.0, 293.0, 289.0, 277.0, 296.0, 321.0, 309.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2129132177848776, "mean_processing_ms": 0.31064427839869574, "mean_inference_ms": 1.768252985066466}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4728000, "num_steps_sampled": 2521600, "sample_time_ms": 20346.302, "load_time_ms": 37.457, "grad_time_ms": 8681.875, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004015960264950991, "policy_loss": -0.004478786140680313, "vf_loss": 90.65137481689453, "vf_explained_var": 0.7576496601104736, "kl": 0.0018428467446938157, "entropy": 1.140787959098816, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2521600, "episodes_total": 6304, "training_iteration": 197, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-23-39", "timestamp": 1660253019, "time_this_iter_s": 30.565216064453125, "time_total_s": 11435.379091501236, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11435.379091501236, "timesteps_since_restore": 2521600, "iterations_since_restore": 197, "perf": {"cpu_util_percent": 35.77272727272727, "ram_util_percent": 59.17272727272728}}
+{"episode_reward_max": 633.0, "episode_reward_min": 498.0, "episode_reward_mean": 580.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 242.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 290.44}, "custom_metrics": {"sparse_reward_mean": 200.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.28, "shaped_reward_min": 138, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.22, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.24, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.92, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.42, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.51, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.7, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.43, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.19, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.32, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.41, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.43, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.19, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.43, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.19, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 498.0, 587.0, 573.0, 576.0, 587.0, 582.0, 587.0, 525.0, 576.0, 533.0, 630.0, 576.0, 573.0, 582.0, 582.0, 579.0, 582.0, 527.0, 573.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 576.0, 579.0, 579.0, 579.0, 579.0, 582.0, 576.0, 587.0, 579.0, 630.0, 579.0, 519.0, 510.0, 584.0, 587.0, 630.0, 587.0, 582.0, 587.0, 579.0, 573.0, 579.0, 633.0, 579.0, 587.0, 633.0, 579.0, 582.0, 579.0, 570.0, 630.0, 630.0, 579.0, 573.0, 582.0, 573.0, 579.0, 567.0, 582.0, 573.0, 630.0, 582.0, 587.0, 587.0, 587.0, 570.0, 579.0, 564.0, 582.0, 627.0, 539.0, 582.0, 630.0, 582.0, 579.0, 587.0, 573.0, 579.0, 579.0, 633.0, 582.0, 584.0, 582.0, 582.0, 582.0, 587.0, 579.0, 582.0, 579.0, 627.0, 576.0, 582.0, 516.0, 503.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [309.0, 321.0, 253.0, 245.0, 287.0, 300.0, 281.0, 292.0, 288.0, 288.0, 294.0, 293.0, 285.0, 297.0, 288.0, 299.0, 266.0, 259.0, 283.0, 293.0, 265.0, 268.0, 299.0, 331.0, 293.0, 283.0, 293.0, 280.0, 286.0, 296.0, 285.0, 297.0, 299.0, 280.0, 303.0, 279.0, 267.0, 260.0, 293.0, 280.0, 282.0, 294.0, 299.0, 283.0, 295.0, 284.0, 286.0, 293.0, 298.0, 284.0, 293.0, 286.0, 292.0, 284.0, 287.0, 292.0, 293.0, 286.0, 288.0, 291.0, 294.0, 285.0, 298.0, 284.0, 288.0, 288.0, 306.0, 281.0, 290.0, 289.0, 322.0, 308.0, 298.0, 281.0, 257.0, 262.0, 242.0, 268.0, 289.0, 295.0, 298.0, 289.0, 313.0, 317.0, 285.0, 302.0, 286.0, 296.0, 285.0, 302.0, 283.0, 296.0, 289.0, 284.0, 296.0, 283.0, 314.0, 319.0, 285.0, 294.0, 293.0, 294.0, 306.0, 327.0, 287.0, 292.0, 298.0, 284.0, 298.0, 281.0, 281.0, 289.0, 316.0, 314.0, 311.0, 319.0, 293.0, 286.0, 282.0, 291.0, 292.0, 290.0, 288.0, 285.0, 285.0, 294.0, 274.0, 293.0, 293.0, 289.0, 277.0, 296.0, 321.0, 309.0, 288.0, 294.0, 296.0, 291.0, 297.0, 290.0, 287.0, 300.0, 289.0, 281.0, 290.0, 289.0, 273.0, 291.0, 291.0, 291.0, 327.0, 300.0, 272.0, 267.0, 283.0, 299.0, 319.0, 311.0, 292.0, 290.0, 289.0, 290.0, 294.0, 293.0, 288.0, 285.0, 290.0, 289.0, 276.0, 303.0, 310.0, 323.0, 286.0, 296.0, 299.0, 285.0, 295.0, 287.0, 281.0, 301.0, 288.0, 294.0, 289.0, 298.0, 293.0, 286.0, 288.0, 294.0, 281.0, 298.0, 321.0, 306.0, 296.0, 280.0, 293.0, 289.0, 250.0, 266.0, 256.0, 247.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2090434460421848, "mean_processing_ms": 0.3098731145088783, "mean_inference_ms": 1.7643973758778697}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4752000, "num_steps_sampled": 2534400, "sample_time_ms": 20635.573, "load_time_ms": 37.478, "grad_time_ms": 8930.84, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.008007452823221684, "policy_loss": -0.00045695496373809874, "vf_loss": 90.38675689697266, "vf_explained_var": 0.7534659504890442, "kl": 0.0025916944723576307, "entropy": 1.1485199928283691, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2534400, "episodes_total": 6336, "training_iteration": 198, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-24-10", "timestamp": 1660253050, "time_this_iter_s": 31.295607089996338, "time_total_s": 11466.674698591232, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11466.674698591232, "timesteps_since_restore": 2534400, "iterations_since_restore": 198, "perf": {"cpu_util_percent": 30.265909090909087, "ram_util_percent": 58.70227272727273}}
+{"episode_reward_max": 633.0, "episode_reward_min": 498.0, "episode_reward_mean": 579.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 245.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 289.78}, "custom_metrics": {"sparse_reward_mean": 200.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.16, "shaped_reward_min": 138, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.35, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.22, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.02, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.77, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.48, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.14, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.37, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.48, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.14, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.48, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.14, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 576.0, 576.0, 582.0, 627.0, 582.0, 630.0, 579.0, 579.0, 576.0, 516.0, 579.0, 590.0, 573.0, 582.0, 587.0, 579.0, 576.0, 576.0, 582.0, 536.0, 582.0, 582.0, 633.0, 582.0, 582.0, 582.0, 579.0, 564.0, 579.0, 582.0, 582.0, 573.0, 630.0, 582.0, 587.0, 587.0, 587.0, 570.0, 579.0, 564.0, 582.0, 627.0, 539.0, 582.0, 630.0, 582.0, 579.0, 587.0, 573.0, 579.0, 579.0, 633.0, 582.0, 584.0, 582.0, 582.0, 582.0, 587.0, 579.0, 582.0, 579.0, 627.0, 576.0, 582.0, 516.0, 503.0, 630.0, 498.0, 587.0, 573.0, 576.0, 587.0, 582.0, 587.0, 525.0, 576.0, 533.0, 630.0, 576.0, 573.0, 582.0, 582.0, 579.0, 582.0, 527.0, 573.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 576.0, 579.0, 579.0, 579.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [301.0, 278.0, 285.0, 297.0, 287.0, 289.0, 284.0, 292.0, 290.0, 292.0, 316.0, 311.0, 280.0, 302.0, 314.0, 316.0, 296.0, 283.0, 293.0, 286.0, 283.0, 293.0, 265.0, 251.0, 288.0, 291.0, 296.0, 294.0, 287.0, 286.0, 291.0, 291.0, 294.0, 293.0, 283.0, 296.0, 298.0, 278.0, 290.0, 286.0, 293.0, 289.0, 266.0, 270.0, 298.0, 284.0, 295.0, 287.0, 318.0, 315.0, 296.0, 286.0, 280.0, 302.0, 288.0, 294.0, 293.0, 286.0, 287.0, 277.0, 280.0, 299.0, 296.0, 286.0, 293.0, 289.0, 277.0, 296.0, 321.0, 309.0, 288.0, 294.0, 296.0, 291.0, 297.0, 290.0, 287.0, 300.0, 289.0, 281.0, 290.0, 289.0, 273.0, 291.0, 291.0, 291.0, 327.0, 300.0, 272.0, 267.0, 283.0, 299.0, 319.0, 311.0, 292.0, 290.0, 289.0, 290.0, 294.0, 293.0, 288.0, 285.0, 290.0, 289.0, 276.0, 303.0, 310.0, 323.0, 286.0, 296.0, 299.0, 285.0, 295.0, 287.0, 281.0, 301.0, 288.0, 294.0, 289.0, 298.0, 293.0, 286.0, 288.0, 294.0, 281.0, 298.0, 321.0, 306.0, 296.0, 280.0, 293.0, 289.0, 250.0, 266.0, 256.0, 247.0, 309.0, 321.0, 253.0, 245.0, 287.0, 300.0, 281.0, 292.0, 288.0, 288.0, 294.0, 293.0, 285.0, 297.0, 288.0, 299.0, 266.0, 259.0, 283.0, 293.0, 265.0, 268.0, 299.0, 331.0, 293.0, 283.0, 293.0, 280.0, 286.0, 296.0, 285.0, 297.0, 299.0, 280.0, 303.0, 279.0, 267.0, 260.0, 293.0, 280.0, 282.0, 294.0, 299.0, 283.0, 295.0, 284.0, 286.0, 293.0, 298.0, 284.0, 293.0, 286.0, 292.0, 284.0, 287.0, 292.0, 293.0, 286.0, 288.0, 291.0, 294.0, 285.0, 298.0, 284.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2052169809302002, "mean_processing_ms": 0.3091122486533884, "mean_inference_ms": 1.7607399677301792}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4776000, "num_steps_sampled": 2547200, "sample_time_ms": 20764.539, "load_time_ms": 37.37, "grad_time_ms": 8925.603, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002967844484373927, "policy_loss": -0.005320979747921228, "vf_loss": 88.59487915039062, "vf_explained_var": 0.7679054141044617, "kl": 0.0019444593926891685, "entropy": 1.141340732574463, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2547200, "episodes_total": 6368, "training_iteration": 199, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-24-41", "timestamp": 1660253081, "time_this_iter_s": 30.933609008789062, "time_total_s": 11497.608307600021, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11497.608307600021, "timesteps_since_restore": 2547200, "iterations_since_restore": 199, "perf": {"cpu_util_percent": 31.409090909090903, "ram_util_percent": 58.724999999999994}}
+{"episode_reward_max": 633.0, "episode_reward_min": 368.0, "episode_reward_mean": 574.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 179.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 287.24}, "custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 177.28, "shaped_reward_min": 128, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.35, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.95, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.96, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.14, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.54, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.7, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.48, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.93, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.05, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.43, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.24, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.38, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.28, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.48, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.93, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.48, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.93, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 576.0, 587.0, 579.0, 579.0, 582.0, 576.0, 489.0, 530.0, 579.0, 570.0, 630.0, 582.0, 587.0, 587.0, 567.0, 587.0, 368.0, 579.0, 498.0, 587.0, 573.0, 582.0, 579.0, 633.0, 587.0, 525.0, 579.0, 567.0, 633.0, 582.0, 582.0, 576.0, 582.0, 516.0, 503.0, 630.0, 498.0, 587.0, 573.0, 576.0, 587.0, 582.0, 587.0, 525.0, 576.0, 533.0, 630.0, 576.0, 573.0, 582.0, 582.0, 579.0, 582.0, 527.0, 573.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 576.0, 579.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 576.0, 576.0, 582.0, 627.0, 582.0, 630.0, 579.0, 579.0, 576.0, 516.0, 579.0, 590.0, 573.0, 582.0, 587.0, 579.0, 576.0, 576.0, 582.0, 536.0, 582.0, 582.0, 633.0, 582.0, 582.0, 582.0, 579.0, 564.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 279.0, 297.0, 309.0, 278.0, 286.0, 293.0, 285.0, 294.0, 295.0, 287.0, 284.0, 292.0, 249.0, 240.0, 264.0, 266.0, 273.0, 306.0, 281.0, 289.0, 316.0, 314.0, 283.0, 299.0, 292.0, 295.0, 285.0, 302.0, 289.0, 278.0, 299.0, 288.0, 189.0, 179.0, 293.0, 286.0, 253.0, 245.0, 296.0, 291.0, 291.0, 282.0, 293.0, 289.0, 290.0, 289.0, 322.0, 311.0, 293.0, 294.0, 256.0, 269.0, 280.0, 299.0, 284.0, 283.0, 316.0, 317.0, 291.0, 291.0, 291.0, 291.0, 296.0, 280.0, 293.0, 289.0, 250.0, 266.0, 256.0, 247.0, 309.0, 321.0, 253.0, 245.0, 287.0, 300.0, 281.0, 292.0, 288.0, 288.0, 294.0, 293.0, 285.0, 297.0, 288.0, 299.0, 266.0, 259.0, 283.0, 293.0, 265.0, 268.0, 299.0, 331.0, 293.0, 283.0, 293.0, 280.0, 286.0, 296.0, 285.0, 297.0, 299.0, 280.0, 303.0, 279.0, 267.0, 260.0, 293.0, 280.0, 282.0, 294.0, 299.0, 283.0, 295.0, 284.0, 286.0, 293.0, 298.0, 284.0, 293.0, 286.0, 292.0, 284.0, 287.0, 292.0, 293.0, 286.0, 288.0, 291.0, 294.0, 285.0, 298.0, 284.0, 301.0, 278.0, 285.0, 297.0, 287.0, 289.0, 284.0, 292.0, 290.0, 292.0, 316.0, 311.0, 280.0, 302.0, 314.0, 316.0, 296.0, 283.0, 293.0, 286.0, 283.0, 293.0, 265.0, 251.0, 288.0, 291.0, 296.0, 294.0, 287.0, 286.0, 291.0, 291.0, 294.0, 293.0, 283.0, 296.0, 298.0, 278.0, 290.0, 286.0, 293.0, 289.0, 266.0, 270.0, 298.0, 284.0, 295.0, 287.0, 318.0, 315.0, 296.0, 286.0, 280.0, 302.0, 288.0, 294.0, 293.0, 286.0, 287.0, 277.0, 280.0, 299.0, 296.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2014273105628746, "mean_processing_ms": 0.30835462294201915, "mean_inference_ms": 1.7571376095037237}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4800000, "num_steps_sampled": 2560000, "sample_time_ms": 20842.97, "load_time_ms": 37.195, "grad_time_ms": 9109.166, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005231037735939026, "policy_loss": -0.0033684810623526573, "vf_loss": 91.692626953125, "vf_explained_var": 0.7593931555747986, "kl": 0.002331085503101349, "entropy": 1.1394835710525513, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2560000, "episodes_total": 6400, "training_iteration": 200, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-25-12", "timestamp": 1660253112, "time_this_iter_s": 30.703901290893555, "time_total_s": 11528.312208890915, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11528.312208890915, "timesteps_since_restore": 2560000, "iterations_since_restore": 200, "perf": {"cpu_util_percent": 32.688372093023254, "ram_util_percent": 58.67906976744187}}
+{"episode_reward_max": 633.0, "episode_reward_min": 299.0, "episode_reward_mean": 576.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 139.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 288.47}, "custom_metrics": {"sparse_reward_mean": 199.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 177.74, "shaped_reward_min": 99, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.98, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.07, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.25, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.66, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.75, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.35, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.63, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.87, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.26, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.9, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.86, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.63, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.87, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.63, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.87, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 587.0, 627.0, 579.0, 627.0, 579.0, 630.0, 630.0, 630.0, 579.0, 579.0, 299.0, 633.0, 582.0, 582.0, 522.0, 630.0, 582.0, 627.0, 567.0, 564.0, 582.0, 555.0, 582.0, 579.0, 576.0, 579.0, 533.0, 576.0, 576.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 576.0, 576.0, 582.0, 627.0, 582.0, 630.0, 579.0, 579.0, 576.0, 516.0, 579.0, 590.0, 573.0, 582.0, 587.0, 579.0, 576.0, 576.0, 582.0, 536.0, 582.0, 582.0, 633.0, 582.0, 582.0, 582.0, 579.0, 564.0, 579.0, 582.0, 630.0, 576.0, 587.0, 579.0, 579.0, 582.0, 576.0, 489.0, 530.0, 579.0, 570.0, 630.0, 582.0, 587.0, 587.0, 567.0, 587.0, 368.0, 579.0, 498.0, 587.0, 573.0, 582.0, 579.0, 633.0, 587.0, 525.0, 579.0, 567.0, 633.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 293.0, 287.0, 300.0, 309.0, 318.0, 298.0, 281.0, 304.0, 323.0, 297.0, 282.0, 322.0, 308.0, 305.0, 325.0, 308.0, 322.0, 277.0, 302.0, 284.0, 295.0, 160.0, 139.0, 322.0, 311.0, 289.0, 293.0, 304.0, 278.0, 253.0, 269.0, 304.0, 326.0, 302.0, 280.0, 303.0, 324.0, 280.0, 287.0, 285.0, 279.0, 286.0, 296.0, 279.0, 276.0, 295.0, 287.0, 283.0, 296.0, 283.0, 293.0, 282.0, 297.0, 265.0, 268.0, 293.0, 283.0, 289.0, 287.0, 291.0, 291.0, 295.0, 287.0, 293.0, 286.0, 288.0, 291.0, 294.0, 285.0, 298.0, 284.0, 301.0, 278.0, 285.0, 297.0, 287.0, 289.0, 284.0, 292.0, 290.0, 292.0, 316.0, 311.0, 280.0, 302.0, 314.0, 316.0, 296.0, 283.0, 293.0, 286.0, 283.0, 293.0, 265.0, 251.0, 288.0, 291.0, 296.0, 294.0, 287.0, 286.0, 291.0, 291.0, 294.0, 293.0, 283.0, 296.0, 298.0, 278.0, 290.0, 286.0, 293.0, 289.0, 266.0, 270.0, 298.0, 284.0, 295.0, 287.0, 318.0, 315.0, 296.0, 286.0, 280.0, 302.0, 288.0, 294.0, 293.0, 286.0, 287.0, 277.0, 280.0, 299.0, 296.0, 286.0, 316.0, 314.0, 279.0, 297.0, 309.0, 278.0, 286.0, 293.0, 285.0, 294.0, 295.0, 287.0, 284.0, 292.0, 249.0, 240.0, 264.0, 266.0, 273.0, 306.0, 281.0, 289.0, 316.0, 314.0, 283.0, 299.0, 292.0, 295.0, 285.0, 302.0, 289.0, 278.0, 299.0, 288.0, 189.0, 179.0, 293.0, 286.0, 253.0, 245.0, 296.0, 291.0, 291.0, 282.0, 293.0, 289.0, 290.0, 289.0, 322.0, 311.0, 293.0, 294.0, 256.0, 269.0, 280.0, 299.0, 284.0, 283.0, 316.0, 317.0, 291.0, 291.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1976757528000228, "mean_processing_ms": 0.3076033986967843, "mean_inference_ms": 1.7536061115922081}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4824000, "num_steps_sampled": 2572800, "sample_time_ms": 20929.774, "load_time_ms": 36.844, "grad_time_ms": 9259.169, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019615469500422478, "policy_loss": -0.006233252584934235, "vf_loss": 87.63289642333984, "vf_explained_var": 0.7635285258293152, "kl": 0.0017622611485421658, "entropy": 1.1369844675064087, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2572800, "episodes_total": 6432, "training_iteration": 201, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-25-44", "timestamp": 1660253144, "time_this_iter_s": 31.842552185058594, "time_total_s": 11560.154761075974, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11560.154761075974, "timesteps_since_restore": 2572800, "iterations_since_restore": 201, "perf": {"cpu_util_percent": 24.447826086956525, "ram_util_percent": 58.667391304347845}}
+{"episode_reward_max": 633.0, "episode_reward_min": 299.0, "episode_reward_mean": 573.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 139.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 286.725}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 177.05, "shaped_reward_min": 99, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.26, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.02, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.77, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.2, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.56, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.76, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.33, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.46, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.88, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.24, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.62, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.71, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.28, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.46, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.88, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.46, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.88, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 533.0, 576.0, 582.0, 579.0, 582.0, 579.0, 582.0, 582.0, 582.0, 479.0, 576.0, 519.0, 525.0, 627.0, 522.0, 582.0, 587.0, 587.0, 627.0, 582.0, 582.0, 582.0, 579.0, 582.0, 579.0, 579.0, 582.0, 475.0, 587.0, 582.0, 579.0, 579.0, 564.0, 579.0, 582.0, 630.0, 576.0, 587.0, 579.0, 579.0, 582.0, 576.0, 489.0, 530.0, 579.0, 570.0, 630.0, 582.0, 587.0, 587.0, 567.0, 587.0, 368.0, 579.0, 498.0, 587.0, 573.0, 582.0, 579.0, 633.0, 587.0, 525.0, 579.0, 567.0, 633.0, 582.0, 582.0, 576.0, 587.0, 627.0, 579.0, 627.0, 579.0, 630.0, 630.0, 630.0, 579.0, 579.0, 299.0, 633.0, 582.0, 582.0, 522.0, 630.0, 582.0, 627.0, 567.0, 564.0, 582.0, 555.0, 582.0, 579.0, 576.0, 579.0, 533.0, 576.0, 576.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 285.0, 261.0, 272.0, 283.0, 293.0, 297.0, 285.0, 288.0, 291.0, 283.0, 299.0, 291.0, 288.0, 281.0, 301.0, 283.0, 299.0, 299.0, 283.0, 248.0, 231.0, 295.0, 281.0, 264.0, 255.0, 260.0, 265.0, 309.0, 318.0, 274.0, 248.0, 294.0, 288.0, 283.0, 304.0, 301.0, 286.0, 318.0, 309.0, 288.0, 294.0, 293.0, 289.0, 296.0, 286.0, 299.0, 280.0, 281.0, 301.0, 300.0, 279.0, 288.0, 291.0, 289.0, 293.0, 243.0, 232.0, 300.0, 287.0, 297.0, 285.0, 284.0, 295.0, 293.0, 286.0, 287.0, 277.0, 280.0, 299.0, 296.0, 286.0, 316.0, 314.0, 279.0, 297.0, 309.0, 278.0, 286.0, 293.0, 285.0, 294.0, 295.0, 287.0, 284.0, 292.0, 249.0, 240.0, 264.0, 266.0, 273.0, 306.0, 281.0, 289.0, 316.0, 314.0, 283.0, 299.0, 292.0, 295.0, 285.0, 302.0, 289.0, 278.0, 299.0, 288.0, 189.0, 179.0, 293.0, 286.0, 253.0, 245.0, 296.0, 291.0, 291.0, 282.0, 293.0, 289.0, 290.0, 289.0, 322.0, 311.0, 293.0, 294.0, 256.0, 269.0, 280.0, 299.0, 284.0, 283.0, 316.0, 317.0, 291.0, 291.0, 291.0, 291.0, 283.0, 293.0, 287.0, 300.0, 309.0, 318.0, 298.0, 281.0, 304.0, 323.0, 297.0, 282.0, 322.0, 308.0, 305.0, 325.0, 308.0, 322.0, 277.0, 302.0, 284.0, 295.0, 160.0, 139.0, 322.0, 311.0, 289.0, 293.0, 304.0, 278.0, 253.0, 269.0, 304.0, 326.0, 302.0, 280.0, 303.0, 324.0, 280.0, 287.0, 285.0, 279.0, 286.0, 296.0, 279.0, 276.0, 295.0, 287.0, 283.0, 296.0, 283.0, 293.0, 282.0, 297.0, 265.0, 268.0, 293.0, 283.0, 289.0, 287.0, 291.0, 291.0, 295.0, 287.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1939639805139826, "mean_processing_ms": 0.3068617868060299, "mean_inference_ms": 1.750206276185966}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4848000, "num_steps_sampled": 2585600, "sample_time_ms": 21104.737, "load_time_ms": 36.737, "grad_time_ms": 9324.388, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0035205576568841934, "policy_loss": -0.004760665353387594, "vf_loss": 88.47342681884766, "vf_explained_var": 0.7671054005622864, "kl": 0.0017035487107932568, "entropy": 1.1322449445724487, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2585600, "episodes_total": 6464, "training_iteration": 202, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-26-16", "timestamp": 1660253176, "time_this_iter_s": 31.605774879455566, "time_total_s": 11591.760535955429, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11591.760535955429, "timesteps_since_restore": 2585600, "iterations_since_restore": 202, "perf": {"cpu_util_percent": 31.240000000000006, "ram_util_percent": 58.77555555555557}}
+{"episode_reward_max": 633.0, "episode_reward_min": 299.0, "episode_reward_mean": 580.31, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 139.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 290.155}, "custom_metrics": {"sparse_reward_mean": 200.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 179.51, "shaped_reward_min": 99, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.11, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.4, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.71, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.64, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.53, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.34, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.27, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.36, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.33, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.7, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.34, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.27, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.34, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.27, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 564.0, 579.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 630.0, 633.0, 579.0, 539.0, 536.0, 582.0, 582.0, 587.0, 627.0, 587.0, 579.0, 630.0, 584.0, 579.0, 579.0, 579.0, 579.0, 630.0, 633.0, 630.0, 582.0, 630.0, 567.0, 633.0, 582.0, 582.0, 576.0, 587.0, 627.0, 579.0, 627.0, 579.0, 630.0, 630.0, 630.0, 579.0, 579.0, 299.0, 633.0, 582.0, 582.0, 522.0, 630.0, 582.0, 627.0, 567.0, 564.0, 582.0, 555.0, 582.0, 579.0, 576.0, 579.0, 533.0, 576.0, 576.0, 582.0, 582.0, 579.0, 533.0, 576.0, 582.0, 579.0, 582.0, 579.0, 582.0, 582.0, 582.0, 479.0, 576.0, 519.0, 525.0, 627.0, 522.0, 582.0, 587.0, 587.0, 627.0, 582.0, 582.0, 582.0, 579.0, 582.0, 579.0, 579.0, 582.0, 475.0, 587.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 297.0, 286.0, 301.0, 280.0, 284.0, 288.0, 291.0, 291.0, 288.0, 299.0, 283.0, 301.0, 281.0, 292.0, 290.0, 291.0, 291.0, 293.0, 289.0, 307.0, 323.0, 318.0, 315.0, 294.0, 285.0, 270.0, 269.0, 266.0, 270.0, 291.0, 291.0, 302.0, 280.0, 290.0, 297.0, 316.0, 311.0, 302.0, 285.0, 291.0, 288.0, 321.0, 309.0, 291.0, 293.0, 293.0, 286.0, 286.0, 293.0, 294.0, 285.0, 290.0, 289.0, 320.0, 310.0, 313.0, 320.0, 311.0, 319.0, 305.0, 277.0, 313.0, 317.0, 284.0, 283.0, 316.0, 317.0, 291.0, 291.0, 291.0, 291.0, 283.0, 293.0, 287.0, 300.0, 309.0, 318.0, 298.0, 281.0, 304.0, 323.0, 297.0, 282.0, 322.0, 308.0, 305.0, 325.0, 308.0, 322.0, 277.0, 302.0, 284.0, 295.0, 160.0, 139.0, 322.0, 311.0, 289.0, 293.0, 304.0, 278.0, 253.0, 269.0, 304.0, 326.0, 302.0, 280.0, 303.0, 324.0, 280.0, 287.0, 285.0, 279.0, 286.0, 296.0, 279.0, 276.0, 295.0, 287.0, 283.0, 296.0, 283.0, 293.0, 282.0, 297.0, 265.0, 268.0, 293.0, 283.0, 289.0, 287.0, 291.0, 291.0, 295.0, 287.0, 294.0, 285.0, 261.0, 272.0, 283.0, 293.0, 297.0, 285.0, 288.0, 291.0, 283.0, 299.0, 291.0, 288.0, 281.0, 301.0, 283.0, 299.0, 299.0, 283.0, 248.0, 231.0, 295.0, 281.0, 264.0, 255.0, 260.0, 265.0, 309.0, 318.0, 274.0, 248.0, 294.0, 288.0, 283.0, 304.0, 301.0, 286.0, 318.0, 309.0, 288.0, 294.0, 293.0, 289.0, 296.0, 286.0, 299.0, 280.0, 281.0, 301.0, 300.0, 279.0, 288.0, 291.0, 289.0, 293.0, 243.0, 232.0, 300.0, 287.0, 297.0, 285.0, 284.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.190287620141578, "mean_processing_ms": 0.3061308463803027, "mean_inference_ms": 1.7468323528445506}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4872000, "num_steps_sampled": 2598400, "sample_time_ms": 21208.335, "load_time_ms": 36.421, "grad_time_ms": 9429.96, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0062708244659006596, "policy_loss": -0.0019446747610345483, "vf_loss": 87.80118560791016, "vf_explained_var": 0.7648043632507324, "kl": 0.001872226013801992, "entropy": 1.1292202472686768, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2598400, "episodes_total": 6496, "training_iteration": 203, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-26-47", "timestamp": 1660253207, "time_this_iter_s": 30.729102849960327, "time_total_s": 11622.48963880539, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11622.48963880539, "timesteps_since_restore": 2598400, "iterations_since_restore": 203, "perf": {"cpu_util_percent": 31.509302325581398, "ram_util_percent": 58.665116279069764}}
+{"episode_reward_max": 633.0, "episode_reward_min": 475.0, "episode_reward_mean": 581.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 290.785}, "custom_metrics": {"sparse_reward_mean": 200.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 180.37, "shaped_reward_min": 155, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.37, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.35, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.05, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.54, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.78, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.55, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.19, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.45, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.55, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.19, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.55, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.19, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 587.0, 579.0, 633.0, 582.0, 576.0, 564.0, 633.0, 525.0, 582.0, 579.0, 630.0, 579.0, 573.0, 573.0, 579.0, 587.0, 579.0, 582.0, 579.0, 521.0, 576.0, 530.0, 584.0, 630.0, 587.0, 582.0, 579.0, 582.0, 630.0, 627.0, 582.0, 576.0, 576.0, 582.0, 582.0, 579.0, 533.0, 576.0, 582.0, 579.0, 582.0, 579.0, 582.0, 582.0, 582.0, 479.0, 576.0, 519.0, 525.0, 627.0, 522.0, 582.0, 587.0, 587.0, 627.0, 582.0, 582.0, 582.0, 579.0, 582.0, 579.0, 579.0, 582.0, 475.0, 587.0, 582.0, 579.0, 582.0, 587.0, 564.0, 579.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 630.0, 633.0, 579.0, 539.0, 536.0, 582.0, 582.0, 587.0, 627.0, 587.0, 579.0, 630.0, 584.0, 579.0, 579.0, 579.0, 579.0, 630.0, 633.0, 630.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 285.0, 291.0, 296.0, 290.0, 289.0, 314.0, 319.0, 288.0, 294.0, 280.0, 296.0, 278.0, 286.0, 316.0, 317.0, 260.0, 265.0, 292.0, 290.0, 292.0, 287.0, 326.0, 304.0, 294.0, 285.0, 293.0, 280.0, 287.0, 286.0, 288.0, 291.0, 298.0, 289.0, 303.0, 276.0, 291.0, 291.0, 277.0, 302.0, 253.0, 268.0, 288.0, 288.0, 267.0, 263.0, 290.0, 294.0, 307.0, 323.0, 297.0, 290.0, 296.0, 286.0, 293.0, 286.0, 285.0, 297.0, 317.0, 313.0, 317.0, 310.0, 288.0, 294.0, 293.0, 283.0, 289.0, 287.0, 291.0, 291.0, 295.0, 287.0, 294.0, 285.0, 261.0, 272.0, 283.0, 293.0, 297.0, 285.0, 288.0, 291.0, 283.0, 299.0, 291.0, 288.0, 281.0, 301.0, 283.0, 299.0, 299.0, 283.0, 248.0, 231.0, 295.0, 281.0, 264.0, 255.0, 260.0, 265.0, 309.0, 318.0, 274.0, 248.0, 294.0, 288.0, 283.0, 304.0, 301.0, 286.0, 318.0, 309.0, 288.0, 294.0, 293.0, 289.0, 296.0, 286.0, 299.0, 280.0, 281.0, 301.0, 300.0, 279.0, 288.0, 291.0, 289.0, 293.0, 243.0, 232.0, 300.0, 287.0, 297.0, 285.0, 284.0, 295.0, 285.0, 297.0, 286.0, 301.0, 280.0, 284.0, 288.0, 291.0, 291.0, 288.0, 299.0, 283.0, 301.0, 281.0, 292.0, 290.0, 291.0, 291.0, 293.0, 289.0, 307.0, 323.0, 318.0, 315.0, 294.0, 285.0, 270.0, 269.0, 266.0, 270.0, 291.0, 291.0, 302.0, 280.0, 290.0, 297.0, 316.0, 311.0, 302.0, 285.0, 291.0, 288.0, 321.0, 309.0, 291.0, 293.0, 293.0, 286.0, 286.0, 293.0, 294.0, 285.0, 290.0, 289.0, 320.0, 310.0, 313.0, 320.0, 311.0, 319.0, 305.0, 277.0, 313.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1866467197845827, "mean_processing_ms": 0.3054065028212096, "mean_inference_ms": 1.7433019705968333}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4896000, "num_steps_sampled": 2611200, "sample_time_ms": 20995.947, "load_time_ms": 36.336, "grad_time_ms": 9492.439, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 3.609728810261004e-05, "policy_loss": -0.0082255182787776, "vf_loss": 88.31702423095703, "vf_explained_var": 0.7638809680938721, "kl": 0.0019561152439564466, "entropy": 1.140177845954895, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2611200, "episodes_total": 6528, "training_iteration": 204, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-27-18", "timestamp": 1660253238, "time_this_iter_s": 30.846153020858765, "time_total_s": 11653.335791826248, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11653.335791826248, "timesteps_since_restore": 2611200, "iterations_since_restore": 204, "perf": {"cpu_util_percent": 30.906818181818178, "ram_util_percent": 58.60227272727274}}
+{"episode_reward_max": 633.0, "episode_reward_min": 237.0, "episode_reward_mean": 583.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 115.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 291.985}, "custom_metrics": {"sparse_reward_mean": 201.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 180.37, "shaped_reward_min": 77, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.57, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.28, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.69, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.56, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.19, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.56, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.19, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.56, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.19, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [237.0, 576.0, 542.0, 573.0, 630.0, 624.0, 633.0, 582.0, 630.0, 579.0, 630.0, 582.0, 582.0, 567.0, 579.0, 618.0, 579.0, 627.0, 630.0, 582.0, 582.0, 576.0, 633.0, 627.0, 624.0, 582.0, 539.0, 567.0, 579.0, 582.0, 544.0, 573.0, 475.0, 587.0, 582.0, 579.0, 582.0, 587.0, 564.0, 579.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 630.0, 633.0, 579.0, 539.0, 536.0, 582.0, 582.0, 587.0, 627.0, 587.0, 579.0, 630.0, 584.0, 579.0, 579.0, 579.0, 579.0, 630.0, 633.0, 630.0, 582.0, 630.0, 576.0, 587.0, 579.0, 633.0, 582.0, 576.0, 564.0, 633.0, 525.0, 582.0, 579.0, 630.0, 579.0, 573.0, 573.0, 579.0, 587.0, 579.0, 582.0, 579.0, 521.0, 576.0, 530.0, 584.0, 630.0, 587.0, 582.0, 579.0, 582.0, 630.0, 627.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [122.0, 115.0, 285.0, 291.0, 284.0, 258.0, 281.0, 292.0, 314.0, 316.0, 312.0, 312.0, 320.0, 313.0, 293.0, 289.0, 314.0, 316.0, 283.0, 296.0, 316.0, 314.0, 284.0, 298.0, 284.0, 298.0, 278.0, 289.0, 280.0, 299.0, 305.0, 313.0, 291.0, 288.0, 316.0, 311.0, 304.0, 326.0, 288.0, 294.0, 291.0, 291.0, 295.0, 281.0, 315.0, 318.0, 302.0, 325.0, 313.0, 311.0, 290.0, 292.0, 259.0, 280.0, 278.0, 289.0, 292.0, 287.0, 282.0, 300.0, 275.0, 269.0, 286.0, 287.0, 243.0, 232.0, 300.0, 287.0, 297.0, 285.0, 284.0, 295.0, 285.0, 297.0, 286.0, 301.0, 280.0, 284.0, 288.0, 291.0, 291.0, 288.0, 299.0, 283.0, 301.0, 281.0, 292.0, 290.0, 291.0, 291.0, 293.0, 289.0, 307.0, 323.0, 318.0, 315.0, 294.0, 285.0, 270.0, 269.0, 266.0, 270.0, 291.0, 291.0, 302.0, 280.0, 290.0, 297.0, 316.0, 311.0, 302.0, 285.0, 291.0, 288.0, 321.0, 309.0, 291.0, 293.0, 293.0, 286.0, 286.0, 293.0, 294.0, 285.0, 290.0, 289.0, 320.0, 310.0, 313.0, 320.0, 311.0, 319.0, 305.0, 277.0, 313.0, 317.0, 291.0, 285.0, 291.0, 296.0, 290.0, 289.0, 314.0, 319.0, 288.0, 294.0, 280.0, 296.0, 278.0, 286.0, 316.0, 317.0, 260.0, 265.0, 292.0, 290.0, 292.0, 287.0, 326.0, 304.0, 294.0, 285.0, 293.0, 280.0, 287.0, 286.0, 288.0, 291.0, 298.0, 289.0, 303.0, 276.0, 291.0, 291.0, 277.0, 302.0, 253.0, 268.0, 288.0, 288.0, 267.0, 263.0, 290.0, 294.0, 307.0, 323.0, 297.0, 290.0, 296.0, 286.0, 293.0, 286.0, 285.0, 297.0, 317.0, 313.0, 317.0, 310.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1830353872295725, "mean_processing_ms": 0.3046858000734139, "mean_inference_ms": 1.7395956173502736}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4920000, "num_steps_sampled": 2624000, "sample_time_ms": 20976.168, "load_time_ms": 36.272, "grad_time_ms": 9605.806, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005292419344186783, "policy_loss": -0.002614696277305484, "vf_loss": 84.73992156982422, "vf_explained_var": 0.7728293538093567, "kl": 0.0027176842559129, "entropy": 1.1337858438491821, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2624000, "episodes_total": 6560, "training_iteration": 205, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-27-47", "timestamp": 1660253267, "time_this_iter_s": 29.478952169418335, "time_total_s": 11682.814743995667, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11682.814743995667, "timesteps_since_restore": 2624000, "iterations_since_restore": 205, "perf": {"cpu_util_percent": 32.5452380952381, "ram_util_percent": 58.56666666666667}}
+{"episode_reward_max": 636.0, "episode_reward_min": 237.0, "episode_reward_mean": 586.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 115.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 293.06}, "custom_metrics": {"sparse_reward_mean": 203.0, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 180.12, "shaped_reward_min": 77, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.63, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.28, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.3, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.75, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.29, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.56, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.23, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.57, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.59, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.41, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.56, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.23, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.56, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.23, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [621.0, 570.0, 587.0, 630.0, 633.0, 579.0, 630.0, 587.0, 582.0, 624.0, 624.0, 579.0, 636.0, 579.0, 576.0, 624.0, 525.0, 627.0, 579.0, 579.0, 630.0, 581.0, 465.0, 579.0, 582.0, 579.0, 573.0, 579.0, 582.0, 573.0, 579.0, 587.0, 633.0, 630.0, 582.0, 630.0, 576.0, 587.0, 579.0, 633.0, 582.0, 576.0, 564.0, 633.0, 525.0, 582.0, 579.0, 630.0, 579.0, 573.0, 573.0, 579.0, 587.0, 579.0, 582.0, 579.0, 521.0, 576.0, 530.0, 584.0, 630.0, 587.0, 582.0, 579.0, 582.0, 630.0, 627.0, 582.0, 237.0, 576.0, 542.0, 573.0, 630.0, 624.0, 633.0, 582.0, 630.0, 579.0, 630.0, 582.0, 582.0, 567.0, 579.0, 618.0, 579.0, 627.0, 630.0, 582.0, 582.0, 576.0, 633.0, 627.0, 624.0, 582.0, 539.0, 567.0, 579.0, 582.0, 544.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [310.0, 311.0, 271.0, 299.0, 298.0, 289.0, 324.0, 306.0, 316.0, 317.0, 299.0, 280.0, 322.0, 308.0, 290.0, 297.0, 295.0, 287.0, 307.0, 317.0, 315.0, 309.0, 291.0, 288.0, 322.0, 314.0, 288.0, 291.0, 285.0, 291.0, 305.0, 319.0, 260.0, 265.0, 313.0, 314.0, 277.0, 302.0, 285.0, 294.0, 309.0, 321.0, 285.0, 296.0, 234.0, 231.0, 288.0, 291.0, 296.0, 286.0, 285.0, 294.0, 285.0, 288.0, 290.0, 289.0, 296.0, 286.0, 293.0, 280.0, 298.0, 281.0, 295.0, 292.0, 313.0, 320.0, 311.0, 319.0, 305.0, 277.0, 313.0, 317.0, 291.0, 285.0, 291.0, 296.0, 290.0, 289.0, 314.0, 319.0, 288.0, 294.0, 280.0, 296.0, 278.0, 286.0, 316.0, 317.0, 260.0, 265.0, 292.0, 290.0, 292.0, 287.0, 326.0, 304.0, 294.0, 285.0, 293.0, 280.0, 287.0, 286.0, 288.0, 291.0, 298.0, 289.0, 303.0, 276.0, 291.0, 291.0, 277.0, 302.0, 253.0, 268.0, 288.0, 288.0, 267.0, 263.0, 290.0, 294.0, 307.0, 323.0, 297.0, 290.0, 296.0, 286.0, 293.0, 286.0, 285.0, 297.0, 317.0, 313.0, 317.0, 310.0, 288.0, 294.0, 122.0, 115.0, 285.0, 291.0, 284.0, 258.0, 281.0, 292.0, 314.0, 316.0, 312.0, 312.0, 320.0, 313.0, 293.0, 289.0, 314.0, 316.0, 283.0, 296.0, 316.0, 314.0, 284.0, 298.0, 284.0, 298.0, 278.0, 289.0, 280.0, 299.0, 305.0, 313.0, 291.0, 288.0, 316.0, 311.0, 304.0, 326.0, 288.0, 294.0, 291.0, 291.0, 295.0, 281.0, 315.0, 318.0, 302.0, 325.0, 313.0, 311.0, 290.0, 292.0, 259.0, 280.0, 278.0, 289.0, 292.0, 287.0, 282.0, 300.0, 275.0, 269.0, 286.0, 287.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1794594867485337, "mean_processing_ms": 0.3039706521282891, "mean_inference_ms": 1.735821597361437}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4944000, "num_steps_sampled": 2636800, "sample_time_ms": 21024.303, "load_time_ms": 36.234, "grad_time_ms": 9682.83, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00302000530064106, "policy_loss": -0.005335552152246237, "vf_loss": 89.21270751953125, "vf_explained_var": 0.7561216354370117, "kl": 0.0017618268029764295, "entropy": 1.131414532661438, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2636800, "episodes_total": 6592, "training_iteration": 206, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-28-17", "timestamp": 1660253297, "time_this_iter_s": 29.685957193374634, "time_total_s": 11712.500701189041, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11712.500701189041, "timesteps_since_restore": 2636800, "iterations_since_restore": 206, "perf": {"cpu_util_percent": 30.95714285714286, "ram_util_percent": 58.669047619047625}}
+{"episode_reward_max": 636.0, "episode_reward_min": 237.0, "episode_reward_mean": 589.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 115.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 294.77}, "custom_metrics": {"sparse_reward_mean": 204.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 180.34, "shaped_reward_min": 77, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.51, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.3, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.12, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.5, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.54, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.25, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.27, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.62, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.37, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.54, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.25, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.54, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.25, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 530.0, 582.0, 630.0, 624.0, 633.0, 558.0, 570.0, 627.0, 624.0, 573.0, 590.0, 576.0, 573.0, 587.0, 627.0, 582.0, 576.0, 582.0, 579.0, 582.0, 564.0, 576.0, 627.0, 627.0, 579.0, 633.0, 630.0, 633.0, 579.0, 630.0, 627.0, 582.0, 630.0, 627.0, 582.0, 237.0, 576.0, 542.0, 573.0, 630.0, 624.0, 633.0, 582.0, 630.0, 579.0, 630.0, 582.0, 582.0, 567.0, 579.0, 618.0, 579.0, 627.0, 630.0, 582.0, 582.0, 576.0, 633.0, 627.0, 624.0, 582.0, 539.0, 567.0, 579.0, 582.0, 544.0, 573.0, 621.0, 570.0, 587.0, 630.0, 633.0, 579.0, 630.0, 587.0, 582.0, 624.0, 624.0, 579.0, 636.0, 579.0, 576.0, 624.0, 525.0, 627.0, 579.0, 579.0, 630.0, 581.0, 465.0, 579.0, 582.0, 579.0, 573.0, 579.0, 582.0, 573.0, 579.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [274.0, 299.0, 263.0, 267.0, 289.0, 293.0, 321.0, 309.0, 309.0, 315.0, 309.0, 324.0, 271.0, 287.0, 293.0, 277.0, 318.0, 309.0, 313.0, 311.0, 290.0, 283.0, 291.0, 299.0, 288.0, 288.0, 300.0, 273.0, 286.0, 301.0, 308.0, 319.0, 296.0, 286.0, 285.0, 291.0, 293.0, 289.0, 288.0, 291.0, 292.0, 290.0, 281.0, 283.0, 289.0, 287.0, 314.0, 313.0, 313.0, 314.0, 289.0, 290.0, 324.0, 309.0, 313.0, 317.0, 316.0, 317.0, 288.0, 291.0, 311.0, 319.0, 326.0, 301.0, 285.0, 297.0, 317.0, 313.0, 317.0, 310.0, 288.0, 294.0, 122.0, 115.0, 285.0, 291.0, 284.0, 258.0, 281.0, 292.0, 314.0, 316.0, 312.0, 312.0, 320.0, 313.0, 293.0, 289.0, 314.0, 316.0, 283.0, 296.0, 316.0, 314.0, 284.0, 298.0, 284.0, 298.0, 278.0, 289.0, 280.0, 299.0, 305.0, 313.0, 291.0, 288.0, 316.0, 311.0, 304.0, 326.0, 288.0, 294.0, 291.0, 291.0, 295.0, 281.0, 315.0, 318.0, 302.0, 325.0, 313.0, 311.0, 290.0, 292.0, 259.0, 280.0, 278.0, 289.0, 292.0, 287.0, 282.0, 300.0, 275.0, 269.0, 286.0, 287.0, 310.0, 311.0, 271.0, 299.0, 298.0, 289.0, 324.0, 306.0, 316.0, 317.0, 299.0, 280.0, 322.0, 308.0, 290.0, 297.0, 295.0, 287.0, 307.0, 317.0, 315.0, 309.0, 291.0, 288.0, 322.0, 314.0, 288.0, 291.0, 285.0, 291.0, 305.0, 319.0, 260.0, 265.0, 313.0, 314.0, 277.0, 302.0, 285.0, 294.0, 309.0, 321.0, 285.0, 296.0, 234.0, 231.0, 288.0, 291.0, 296.0, 286.0, 285.0, 294.0, 285.0, 288.0, 290.0, 289.0, 296.0, 286.0, 293.0, 280.0, 298.0, 281.0, 295.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1759167909615367, "mean_processing_ms": 0.30326280237978454, "mean_inference_ms": 1.7321279401839695}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4968000, "num_steps_sampled": 2649600, "sample_time_ms": 20977.446, "load_time_ms": 35.842, "grad_time_ms": 9697.985, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015924535691738129, "policy_loss": -0.005817517638206482, "vf_loss": 79.77727508544922, "vf_explained_var": 0.7645978927612305, "kl": 0.001973592210561037, "entropy": 1.1355053186416626, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2649600, "episodes_total": 6624, "training_iteration": 207, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-28-47", "timestamp": 1660253327, "time_this_iter_s": 30.242400884628296, "time_total_s": 11742.74310207367, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11742.74310207367, "timesteps_since_restore": 2649600, "iterations_since_restore": 207, "perf": {"cpu_util_percent": 33.359523809523814, "ram_util_percent": 58.971428571428575}}
+{"episode_reward_max": 636.0, "episode_reward_min": 465.0, "episode_reward_mean": 592.22, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 296.11}, "custom_metrics": {"sparse_reward_mean": 205.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 181.02, "shaped_reward_min": 145, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.49, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.37, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.04, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.73, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.62, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.32, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.27, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.86, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 15.62, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.32, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.62, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.32, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 582.0, 630.0, 630.0, 582.0, 564.0, 576.0, 582.0, 630.0, 561.0, 633.0, 587.0, 579.0, 579.0, 627.0, 627.0, 627.0, 518.0, 633.0, 633.0, 582.0, 630.0, 582.0, 582.0, 587.0, 573.0, 627.0, 582.0, 573.0, 582.0, 564.0, 573.0, 579.0, 582.0, 544.0, 573.0, 621.0, 570.0, 587.0, 630.0, 633.0, 579.0, 630.0, 587.0, 582.0, 624.0, 624.0, 579.0, 636.0, 579.0, 576.0, 624.0, 525.0, 627.0, 579.0, 579.0, 630.0, 581.0, 465.0, 579.0, 582.0, 579.0, 573.0, 579.0, 582.0, 573.0, 579.0, 587.0, 573.0, 530.0, 582.0, 630.0, 624.0, 633.0, 558.0, 570.0, 627.0, 624.0, 573.0, 590.0, 576.0, 573.0, 587.0, 627.0, 582.0, 576.0, 582.0, 579.0, 582.0, 564.0, 576.0, 627.0, 627.0, 579.0, 633.0, 630.0, 633.0, 579.0, 630.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 299.0, 296.0, 286.0, 311.0, 319.0, 306.0, 324.0, 291.0, 291.0, 283.0, 281.0, 289.0, 287.0, 286.0, 296.0, 315.0, 315.0, 277.0, 284.0, 321.0, 312.0, 290.0, 297.0, 296.0, 283.0, 284.0, 295.0, 310.0, 317.0, 313.0, 314.0, 326.0, 301.0, 252.0, 266.0, 319.0, 314.0, 323.0, 310.0, 296.0, 286.0, 315.0, 315.0, 290.0, 292.0, 283.0, 299.0, 292.0, 295.0, 283.0, 290.0, 308.0, 319.0, 279.0, 303.0, 279.0, 294.0, 290.0, 292.0, 283.0, 281.0, 278.0, 295.0, 292.0, 287.0, 282.0, 300.0, 275.0, 269.0, 286.0, 287.0, 310.0, 311.0, 271.0, 299.0, 298.0, 289.0, 324.0, 306.0, 316.0, 317.0, 299.0, 280.0, 322.0, 308.0, 290.0, 297.0, 295.0, 287.0, 307.0, 317.0, 315.0, 309.0, 291.0, 288.0, 322.0, 314.0, 288.0, 291.0, 285.0, 291.0, 305.0, 319.0, 260.0, 265.0, 313.0, 314.0, 277.0, 302.0, 285.0, 294.0, 309.0, 321.0, 285.0, 296.0, 234.0, 231.0, 288.0, 291.0, 296.0, 286.0, 285.0, 294.0, 285.0, 288.0, 290.0, 289.0, 296.0, 286.0, 293.0, 280.0, 298.0, 281.0, 295.0, 292.0, 274.0, 299.0, 263.0, 267.0, 289.0, 293.0, 321.0, 309.0, 309.0, 315.0, 309.0, 324.0, 271.0, 287.0, 293.0, 277.0, 318.0, 309.0, 313.0, 311.0, 290.0, 283.0, 291.0, 299.0, 288.0, 288.0, 300.0, 273.0, 286.0, 301.0, 308.0, 319.0, 296.0, 286.0, 285.0, 291.0, 293.0, 289.0, 288.0, 291.0, 292.0, 290.0, 281.0, 283.0, 289.0, 287.0, 314.0, 313.0, 313.0, 314.0, 289.0, 290.0, 324.0, 309.0, 313.0, 317.0, 316.0, 317.0, 288.0, 291.0, 311.0, 319.0, 326.0, 301.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1724058892059666, "mean_processing_ms": 0.3025608135721768, "mean_inference_ms": 1.7284261128230096}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4992000, "num_steps_sampled": 2662400, "sample_time_ms": 20797.656, "load_time_ms": 35.752, "grad_time_ms": 9645.805, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0059862625785171986, "policy_loss": -0.00204761722125113, "vf_loss": 86.01973724365234, "vf_explained_var": 0.7589619755744934, "kl": 0.0022174532059580088, "entropy": 1.136189579963684, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2662400, "episodes_total": 6656, "training_iteration": 208, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-29-16", "timestamp": 1660253356, "time_this_iter_s": 28.974893808364868, "time_total_s": 11771.717995882034, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11771.717995882034, "timesteps_since_restore": 2662400, "iterations_since_restore": 208, "perf": {"cpu_util_percent": 35.22682926829268, "ram_util_percent": 58.67073170731708}}
+{"episode_reward_max": 639.0, "episode_reward_min": 518.0, "episode_reward_mean": 593.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 252.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 296.525}, "custom_metrics": {"sparse_reward_mean": 205.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.45, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.55, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.33, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.15, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.62, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.54, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.74, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.76, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.22, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.27, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.59, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 15.76, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.22, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.76, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.22, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 627.0, 579.0, 582.0, 579.0, 618.0, 522.0, 581.0, 590.0, 587.0, 582.0, 582.0, 579.0, 564.0, 630.0, 582.0, 587.0, 639.0, 570.0, 576.0, 567.0, 576.0, 582.0, 630.0, 584.0, 587.0, 633.0, 582.0, 630.0, 582.0, 579.0, 582.0, 582.0, 573.0, 579.0, 587.0, 573.0, 530.0, 582.0, 630.0, 624.0, 633.0, 558.0, 570.0, 627.0, 624.0, 573.0, 590.0, 576.0, 573.0, 587.0, 627.0, 582.0, 576.0, 582.0, 579.0, 582.0, 564.0, 576.0, 627.0, 627.0, 579.0, 633.0, 630.0, 633.0, 579.0, 630.0, 627.0, 584.0, 582.0, 630.0, 630.0, 582.0, 564.0, 576.0, 582.0, 630.0, 561.0, 633.0, 587.0, 579.0, 579.0, 627.0, 627.0, 627.0, 518.0, 633.0, 633.0, 582.0, 630.0, 582.0, 582.0, 587.0, 573.0, 627.0, 582.0, 573.0, 582.0, 564.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 313.0, 315.0, 312.0, 290.0, 289.0, 297.0, 285.0, 281.0, 298.0, 313.0, 305.0, 255.0, 267.0, 287.0, 294.0, 304.0, 286.0, 298.0, 289.0, 287.0, 295.0, 288.0, 294.0, 289.0, 290.0, 286.0, 278.0, 314.0, 316.0, 285.0, 297.0, 300.0, 287.0, 316.0, 323.0, 273.0, 297.0, 286.0, 290.0, 278.0, 289.0, 291.0, 285.0, 293.0, 289.0, 321.0, 309.0, 287.0, 297.0, 286.0, 301.0, 321.0, 312.0, 291.0, 291.0, 316.0, 314.0, 286.0, 296.0, 302.0, 277.0, 294.0, 288.0, 296.0, 286.0, 293.0, 280.0, 298.0, 281.0, 295.0, 292.0, 274.0, 299.0, 263.0, 267.0, 289.0, 293.0, 321.0, 309.0, 309.0, 315.0, 309.0, 324.0, 271.0, 287.0, 293.0, 277.0, 318.0, 309.0, 313.0, 311.0, 290.0, 283.0, 291.0, 299.0, 288.0, 288.0, 300.0, 273.0, 286.0, 301.0, 308.0, 319.0, 296.0, 286.0, 285.0, 291.0, 293.0, 289.0, 288.0, 291.0, 292.0, 290.0, 281.0, 283.0, 289.0, 287.0, 314.0, 313.0, 313.0, 314.0, 289.0, 290.0, 324.0, 309.0, 313.0, 317.0, 316.0, 317.0, 288.0, 291.0, 311.0, 319.0, 326.0, 301.0, 285.0, 299.0, 296.0, 286.0, 311.0, 319.0, 306.0, 324.0, 291.0, 291.0, 283.0, 281.0, 289.0, 287.0, 286.0, 296.0, 315.0, 315.0, 277.0, 284.0, 321.0, 312.0, 290.0, 297.0, 296.0, 283.0, 284.0, 295.0, 310.0, 317.0, 313.0, 314.0, 326.0, 301.0, 252.0, 266.0, 319.0, 314.0, 323.0, 310.0, 296.0, 286.0, 315.0, 315.0, 290.0, 292.0, 283.0, 299.0, 292.0, 295.0, 283.0, 290.0, 308.0, 319.0, 279.0, 303.0, 279.0, 294.0, 290.0, 292.0, 283.0, 281.0, 278.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.168925444559228, "mean_processing_ms": 0.30186423192914913, "mean_inference_ms": 1.7247612604215892}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5016000, "num_steps_sampled": 2675200, "sample_time_ms": 20704.168, "load_time_ms": 35.714, "grad_time_ms": 9573.901, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0030685996171087027, "policy_loss": -0.005047030281275511, "vf_loss": 86.80921173095703, "vf_explained_var": 0.7612468600273132, "kl": 0.0021123213227838278, "entropy": 1.1305813789367676, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2675200, "episodes_total": 6688, "training_iteration": 209, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-29-45", "timestamp": 1660253385, "time_this_iter_s": 29.278584241867065, "time_total_s": 11800.996580123901, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11800.996580123901, "timesteps_since_restore": 2675200, "iterations_since_restore": 209, "perf": {"cpu_util_percent": 32.47857142857143, "ram_util_percent": 58.55952380952381}}
+{"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 592.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 252.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 296.49}, "custom_metrics": {"sparse_reward_mean": 205.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.18, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.55, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.36, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.12, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.75, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.66, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.39, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 15.66, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.39, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.66, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.39, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 579.0, 570.0, 582.0, 576.0, 633.0, 582.0, 582.0, 582.0, 627.0, 633.0, 587.0, 579.0, 587.0, 587.0, 579.0, 582.0, 584.0, 513.0, 630.0, 582.0, 630.0, 630.0, 633.0, 590.0, 582.0, 582.0, 582.0, 579.0, 630.0, 582.0, 579.0, 633.0, 579.0, 630.0, 627.0, 584.0, 582.0, 630.0, 630.0, 582.0, 564.0, 576.0, 582.0, 630.0, 561.0, 633.0, 587.0, 579.0, 579.0, 627.0, 627.0, 627.0, 518.0, 633.0, 633.0, 582.0, 630.0, 582.0, 582.0, 587.0, 573.0, 627.0, 582.0, 573.0, 582.0, 564.0, 573.0, 630.0, 627.0, 579.0, 582.0, 579.0, 618.0, 522.0, 581.0, 590.0, 587.0, 582.0, 582.0, 579.0, 564.0, 630.0, 582.0, 587.0, 639.0, 570.0, 576.0, 567.0, 576.0, 582.0, 630.0, 584.0, 587.0, 633.0, 582.0, 630.0, 582.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 285.0, 287.0, 292.0, 288.0, 282.0, 295.0, 287.0, 283.0, 293.0, 319.0, 314.0, 282.0, 300.0, 291.0, 291.0, 303.0, 279.0, 306.0, 321.0, 314.0, 319.0, 290.0, 297.0, 278.0, 301.0, 300.0, 287.0, 304.0, 283.0, 298.0, 281.0, 299.0, 283.0, 289.0, 295.0, 259.0, 254.0, 321.0, 309.0, 285.0, 297.0, 311.0, 319.0, 318.0, 312.0, 318.0, 315.0, 293.0, 297.0, 296.0, 286.0, 294.0, 288.0, 297.0, 285.0, 295.0, 284.0, 306.0, 324.0, 293.0, 289.0, 286.0, 293.0, 316.0, 317.0, 288.0, 291.0, 311.0, 319.0, 326.0, 301.0, 285.0, 299.0, 296.0, 286.0, 311.0, 319.0, 306.0, 324.0, 291.0, 291.0, 283.0, 281.0, 289.0, 287.0, 286.0, 296.0, 315.0, 315.0, 277.0, 284.0, 321.0, 312.0, 290.0, 297.0, 296.0, 283.0, 284.0, 295.0, 310.0, 317.0, 313.0, 314.0, 326.0, 301.0, 252.0, 266.0, 319.0, 314.0, 323.0, 310.0, 296.0, 286.0, 315.0, 315.0, 290.0, 292.0, 283.0, 299.0, 292.0, 295.0, 283.0, 290.0, 308.0, 319.0, 279.0, 303.0, 279.0, 294.0, 290.0, 292.0, 283.0, 281.0, 278.0, 295.0, 317.0, 313.0, 315.0, 312.0, 290.0, 289.0, 297.0, 285.0, 281.0, 298.0, 313.0, 305.0, 255.0, 267.0, 287.0, 294.0, 304.0, 286.0, 298.0, 289.0, 287.0, 295.0, 288.0, 294.0, 289.0, 290.0, 286.0, 278.0, 314.0, 316.0, 285.0, 297.0, 300.0, 287.0, 316.0, 323.0, 273.0, 297.0, 286.0, 290.0, 278.0, 289.0, 291.0, 285.0, 293.0, 289.0, 321.0, 309.0, 287.0, 297.0, 286.0, 301.0, 321.0, 312.0, 291.0, 291.0, 316.0, 314.0, 286.0, 296.0, 302.0, 277.0, 294.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 1.165477981572783, "mean_processing_ms": 0.3011732242601824, "mean_inference_ms": 1.7211386547427134}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5040000, "num_steps_sampled": 2688000, "sample_time_ms": 20664.196, "load_time_ms": 35.919, "grad_time_ms": 9521.919, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002112786052748561, "policy_loss": -0.006137066055089235, "vf_loss": 88.17215728759766, "vf_explained_var": 0.7567508220672607, "kl": 0.0019861103501170874, "entropy": 1.1347342729568481, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2688000, "episodes_total": 6720, "training_iteration": 210, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-30-15", "timestamp": 1660253415, "time_this_iter_s": 29.789448976516724, "time_total_s": 11830.786029100418, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11830.786029100418, "timesteps_since_restore": 2688000, "iterations_since_restore": 210, "perf": {"cpu_util_percent": 30.638095238095236, "ram_util_percent": 58.67142857142858}}
+{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 586.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 293.28}, "custom_metrics": {"sparse_reward_mean": 202.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 181.36, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.32, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.57, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.89, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.35, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.33, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.52, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.34, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.67, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.33, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.52, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.33, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.52, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 573.0, 579.0, 465.0, 579.0, 630.0, 582.0, 582.0, 536.0, 587.0, 587.0, 630.0, 579.0, 630.0, 579.0, 582.0, 579.0, 579.0, 576.0, 582.0, 530.0, 484.0, 627.0, 582.0, 579.0, 584.0, 579.0, 590.0, 587.0, 587.0, 579.0, 636.0, 573.0, 582.0, 564.0, 573.0, 630.0, 627.0, 579.0, 582.0, 579.0, 618.0, 522.0, 581.0, 590.0, 587.0, 582.0, 582.0, 579.0, 564.0, 630.0, 582.0, 587.0, 639.0, 570.0, 576.0, 567.0, 576.0, 582.0, 630.0, 584.0, 587.0, 633.0, 582.0, 630.0, 582.0, 579.0, 582.0, 573.0, 579.0, 570.0, 582.0, 576.0, 633.0, 582.0, 582.0, 582.0, 627.0, 633.0, 587.0, 579.0, 587.0, 587.0, 579.0, 582.0, 584.0, 513.0, 630.0, 582.0, 630.0, 630.0, 633.0, 590.0, 582.0, 582.0, 582.0, 579.0, 630.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 298.0, 284.0, 289.0, 288.0, 291.0, 231.0, 234.0, 279.0, 300.0, 313.0, 317.0, 297.0, 285.0, 291.0, 291.0, 271.0, 265.0, 302.0, 285.0, 290.0, 297.0, 316.0, 314.0, 286.0, 293.0, 313.0, 317.0, 294.0, 285.0, 296.0, 286.0, 287.0, 292.0, 288.0, 291.0, 290.0, 286.0, 293.0, 289.0, 260.0, 270.0, 234.0, 250.0, 308.0, 319.0, 294.0, 288.0, 290.0, 289.0, 296.0, 288.0, 292.0, 287.0, 301.0, 289.0, 301.0, 286.0, 300.0, 287.0, 300.0, 279.0, 320.0, 316.0, 279.0, 294.0, 290.0, 292.0, 283.0, 281.0, 278.0, 295.0, 317.0, 313.0, 315.0, 312.0, 290.0, 289.0, 297.0, 285.0, 281.0, 298.0, 313.0, 305.0, 255.0, 267.0, 287.0, 294.0, 304.0, 286.0, 298.0, 289.0, 287.0, 295.0, 288.0, 294.0, 289.0, 290.0, 286.0, 278.0, 314.0, 316.0, 285.0, 297.0, 300.0, 287.0, 316.0, 323.0, 273.0, 297.0, 286.0, 290.0, 278.0, 289.0, 291.0, 285.0, 293.0, 289.0, 321.0, 309.0, 287.0, 297.0, 286.0, 301.0, 321.0, 312.0, 291.0, 291.0, 316.0, 314.0, 286.0, 296.0, 302.0, 277.0, 294.0, 288.0, 288.0, 285.0, 287.0, 292.0, 288.0, 282.0, 295.0, 287.0, 283.0, 293.0, 319.0, 314.0, 282.0, 300.0, 291.0, 291.0, 303.0, 279.0, 306.0, 321.0, 314.0, 319.0, 290.0, 297.0, 278.0, 301.0, 300.0, 287.0, 304.0, 283.0, 298.0, 281.0, 299.0, 283.0, 289.0, 295.0, 259.0, 254.0, 321.0, 309.0, 285.0, 297.0, 311.0, 319.0, 318.0, 312.0, 318.0, 315.0, 293.0, 297.0, 296.0, 286.0, 294.0, 288.0, 297.0, 285.0, 295.0, 284.0, 306.0, 324.0, 293.0, 289.0, 286.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 1.162072675470566, "mean_processing_ms": 0.30049261071423955, "mean_inference_ms": 1.7176923877441694}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5064000, "num_steps_sampled": 2700800, "sample_time_ms": 20604.341, "load_time_ms": 36.396, "grad_time_ms": 9451.079, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0014451188035309315, "policy_loss": -0.0075116343796253204, "vf_loss": 95.30281829833984, "vf_explained_var": 0.7530279755592346, "kl": 0.001810736837796867, "entropy": 1.147046446800232, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2700800, "episodes_total": 6752, "training_iteration": 211, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-30-46", "timestamp": 1660253446, "time_this_iter_s": 30.540673971176147, "time_total_s": 11861.326703071594, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11861.326703071594, "timesteps_since_restore": 2700800, "iterations_since_restore": 211, "perf": {"cpu_util_percent": 31.16511627906976, "ram_util_percent": 58.63023255813955}}
+{"episode_reward_max": 636.0, "episode_reward_min": 465.0, "episode_reward_mean": 585.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 292.975}, "custom_metrics": {"sparse_reward_mean": 202.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 181.55, "shaped_reward_min": 138, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.63, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.41, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.19, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.82, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.7, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.39, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.48, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.36, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.19, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.81, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.48, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.36, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.48, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.36, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 627.0, 582.0, 542.0, 582.0, 579.0, 468.0, 539.0, 584.0, 587.0, 582.0, 633.0, 587.0, 579.0, 498.0, 627.0, 579.0, 630.0, 582.0, 630.0, 630.0, 582.0, 576.0, 587.0, 633.0, 582.0, 621.0, 627.0, 582.0, 587.0, 582.0, 525.0, 630.0, 582.0, 579.0, 582.0, 573.0, 579.0, 570.0, 582.0, 576.0, 633.0, 582.0, 582.0, 582.0, 627.0, 633.0, 587.0, 579.0, 587.0, 587.0, 579.0, 582.0, 584.0, 513.0, 630.0, 582.0, 630.0, 630.0, 633.0, 590.0, 582.0, 582.0, 582.0, 579.0, 630.0, 582.0, 579.0, 576.0, 573.0, 579.0, 465.0, 579.0, 630.0, 582.0, 582.0, 536.0, 587.0, 587.0, 630.0, 579.0, 630.0, 579.0, 582.0, 579.0, 579.0, 576.0, 582.0, 530.0, 484.0, 627.0, 582.0, 579.0, 584.0, 579.0, 590.0, 587.0, 587.0, 579.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [303.0, 324.0, 318.0, 309.0, 294.0, 288.0, 267.0, 275.0, 288.0, 294.0, 293.0, 286.0, 236.0, 232.0, 277.0, 262.0, 288.0, 296.0, 302.0, 285.0, 285.0, 297.0, 314.0, 319.0, 296.0, 291.0, 293.0, 286.0, 245.0, 253.0, 318.0, 309.0, 295.0, 284.0, 318.0, 312.0, 293.0, 289.0, 309.0, 321.0, 310.0, 320.0, 280.0, 302.0, 293.0, 283.0, 286.0, 301.0, 320.0, 313.0, 286.0, 296.0, 314.0, 307.0, 319.0, 308.0, 301.0, 281.0, 282.0, 305.0, 288.0, 294.0, 256.0, 269.0, 316.0, 314.0, 286.0, 296.0, 302.0, 277.0, 294.0, 288.0, 288.0, 285.0, 287.0, 292.0, 288.0, 282.0, 295.0, 287.0, 283.0, 293.0, 319.0, 314.0, 282.0, 300.0, 291.0, 291.0, 303.0, 279.0, 306.0, 321.0, 314.0, 319.0, 290.0, 297.0, 278.0, 301.0, 300.0, 287.0, 304.0, 283.0, 298.0, 281.0, 299.0, 283.0, 289.0, 295.0, 259.0, 254.0, 321.0, 309.0, 285.0, 297.0, 311.0, 319.0, 318.0, 312.0, 318.0, 315.0, 293.0, 297.0, 296.0, 286.0, 294.0, 288.0, 297.0, 285.0, 295.0, 284.0, 306.0, 324.0, 293.0, 289.0, 286.0, 293.0, 278.0, 298.0, 284.0, 289.0, 288.0, 291.0, 231.0, 234.0, 279.0, 300.0, 313.0, 317.0, 297.0, 285.0, 291.0, 291.0, 271.0, 265.0, 302.0, 285.0, 290.0, 297.0, 316.0, 314.0, 286.0, 293.0, 313.0, 317.0, 294.0, 285.0, 296.0, 286.0, 287.0, 292.0, 288.0, 291.0, 290.0, 286.0, 293.0, 289.0, 260.0, 270.0, 234.0, 250.0, 308.0, 319.0, 294.0, 288.0, 290.0, 289.0, 296.0, 288.0, 292.0, 287.0, 301.0, 289.0, 301.0, 286.0, 300.0, 287.0, 300.0, 279.0, 320.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 1.15872347920077, "mean_processing_ms": 0.2998198878857747, "mean_inference_ms": 1.7145174243808747}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5088000, "num_steps_sampled": 2713600, "sample_time_ms": 20641.235, "load_time_ms": 36.613, "grad_time_ms": 9485.245, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.010028759017586708, "policy_loss": 0.0009867753833532333, "vf_loss": 96.11052703857422, "vf_explained_var": 0.7489395141601562, "kl": 0.0021745015401393175, "entropy": 1.1381220817565918, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2713600, "episodes_total": 6784, "training_iteration": 212, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-31-18", "timestamp": 1660253478, "time_this_iter_s": 32.31651592254639, "time_total_s": 11893.64321899414, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11893.64321899414, "timesteps_since_restore": 2713600, "iterations_since_restore": 212, "perf": {"cpu_util_percent": 33.958695652173915, "ram_util_percent": 58.643478260869585}}
+{"episode_reward_max": 636.0, "episode_reward_min": 465.0, "episode_reward_mean": 583.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 291.905}, "custom_metrics": {"sparse_reward_mean": 201.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 181.01, "shaped_reward_min": 138, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.71, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.22, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.33, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.27, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.79, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.43, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.72, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.08, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.13, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.79, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.72, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.08, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.72, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.08, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [478.0, 582.0, 582.0, 582.0, 576.0, 576.0, 627.0, 579.0, 630.0, 536.0, 579.0, 630.0, 582.0, 582.0, 579.0, 582.0, 570.0, 576.0, 630.0, 627.0, 536.0, 579.0, 582.0, 579.0, 587.0, 584.0, 630.0, 582.0, 630.0, 582.0, 582.0, 579.0, 579.0, 630.0, 582.0, 579.0, 576.0, 573.0, 579.0, 465.0, 579.0, 630.0, 582.0, 582.0, 536.0, 587.0, 587.0, 630.0, 579.0, 630.0, 579.0, 582.0, 579.0, 579.0, 576.0, 582.0, 530.0, 484.0, 627.0, 582.0, 579.0, 584.0, 579.0, 590.0, 587.0, 587.0, 579.0, 636.0, 627.0, 627.0, 582.0, 542.0, 582.0, 579.0, 468.0, 539.0, 584.0, 587.0, 582.0, 633.0, 587.0, 579.0, 498.0, 627.0, 579.0, 630.0, 582.0, 630.0, 630.0, 582.0, 576.0, 587.0, 633.0, 582.0, 621.0, 627.0, 582.0, 587.0, 582.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [246.0, 232.0, 301.0, 281.0, 298.0, 284.0, 291.0, 291.0, 303.0, 273.0, 282.0, 294.0, 316.0, 311.0, 293.0, 286.0, 308.0, 322.0, 270.0, 266.0, 288.0, 291.0, 324.0, 306.0, 291.0, 291.0, 291.0, 291.0, 288.0, 291.0, 293.0, 289.0, 296.0, 274.0, 297.0, 279.0, 315.0, 315.0, 308.0, 319.0, 266.0, 270.0, 283.0, 296.0, 290.0, 292.0, 283.0, 296.0, 296.0, 291.0, 293.0, 291.0, 316.0, 314.0, 281.0, 301.0, 316.0, 314.0, 290.0, 292.0, 289.0, 293.0, 286.0, 293.0, 295.0, 284.0, 306.0, 324.0, 293.0, 289.0, 286.0, 293.0, 278.0, 298.0, 284.0, 289.0, 288.0, 291.0, 231.0, 234.0, 279.0, 300.0, 313.0, 317.0, 297.0, 285.0, 291.0, 291.0, 271.0, 265.0, 302.0, 285.0, 290.0, 297.0, 316.0, 314.0, 286.0, 293.0, 313.0, 317.0, 294.0, 285.0, 296.0, 286.0, 287.0, 292.0, 288.0, 291.0, 290.0, 286.0, 293.0, 289.0, 260.0, 270.0, 234.0, 250.0, 308.0, 319.0, 294.0, 288.0, 290.0, 289.0, 296.0, 288.0, 292.0, 287.0, 301.0, 289.0, 301.0, 286.0, 300.0, 287.0, 300.0, 279.0, 320.0, 316.0, 303.0, 324.0, 318.0, 309.0, 294.0, 288.0, 267.0, 275.0, 288.0, 294.0, 293.0, 286.0, 236.0, 232.0, 277.0, 262.0, 288.0, 296.0, 302.0, 285.0, 285.0, 297.0, 314.0, 319.0, 296.0, 291.0, 293.0, 286.0, 245.0, 253.0, 318.0, 309.0, 295.0, 284.0, 318.0, 312.0, 293.0, 289.0, 309.0, 321.0, 310.0, 320.0, 280.0, 302.0, 293.0, 283.0, 286.0, 301.0, 320.0, 313.0, 286.0, 296.0, 314.0, 307.0, 319.0, 308.0, 301.0, 281.0, 282.0, 305.0, 288.0, 294.0, 256.0, 269.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1554138563428238, "mean_processing_ms": 0.299155513024685, "mean_inference_ms": 1.7115208998476246}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5112000, "num_steps_sampled": 2726400, "sample_time_ms": 20734.364, "load_time_ms": 36.691, "grad_time_ms": 9415.374, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0028692474588751793, "policy_loss": -0.0050502982921898365, "vf_loss": 84.87030029296875, "vf_explained_var": 0.7659473419189453, "kl": 0.0017100750701501966, "entropy": 1.134959101676941, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2726400, "episodes_total": 6816, "training_iteration": 213, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-31-49", "timestamp": 1660253509, "time_this_iter_s": 30.962037086486816, "time_total_s": 11924.605256080627, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11924.605256080627, "timesteps_since_restore": 2726400, "iterations_since_restore": 213, "perf": {"cpu_util_percent": 30.947727272727267, "ram_util_percent": 58.58863636363639}}
+{"episode_reward_max": 636.0, "episode_reward_min": 345.0, "episode_reward_mean": 584.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 292.095}, "custom_metrics": {"sparse_reward_mean": 201.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 180.59, "shaped_reward_min": 105, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.49, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.4, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.08, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.65, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.89, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.44, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.56, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.14, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.25, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.78, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.85, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.36, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.56, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.14, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.56, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.14, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 587.0, 576.0, 582.0, 587.0, 630.0, 587.0, 630.0, 476.0, 579.0, 587.0, 584.0, 587.0, 627.0, 579.0, 558.0, 479.0, 630.0, 579.0, 630.0, 579.0, 630.0, 584.0, 576.0, 627.0, 627.0, 345.0, 579.0, 621.0, 582.0, 519.0, 587.0, 587.0, 579.0, 636.0, 627.0, 627.0, 582.0, 542.0, 582.0, 579.0, 468.0, 539.0, 584.0, 587.0, 582.0, 633.0, 587.0, 579.0, 498.0, 627.0, 579.0, 630.0, 582.0, 630.0, 630.0, 582.0, 576.0, 587.0, 633.0, 582.0, 621.0, 627.0, 582.0, 587.0, 582.0, 525.0, 478.0, 582.0, 582.0, 582.0, 576.0, 576.0, 627.0, 579.0, 630.0, 536.0, 579.0, 630.0, 582.0, 582.0, 579.0, 582.0, 570.0, 576.0, 630.0, 627.0, 536.0, 579.0, 582.0, 579.0, 587.0, 584.0, 630.0, 582.0, 630.0, 582.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [299.0, 283.0, 309.0, 321.0, 287.0, 300.0, 290.0, 286.0, 289.0, 293.0, 296.0, 291.0, 314.0, 316.0, 303.0, 284.0, 314.0, 316.0, 243.0, 233.0, 273.0, 306.0, 296.0, 291.0, 291.0, 293.0, 298.0, 289.0, 307.0, 320.0, 284.0, 295.0, 264.0, 294.0, 238.0, 241.0, 326.0, 304.0, 288.0, 291.0, 319.0, 311.0, 289.0, 290.0, 311.0, 319.0, 292.0, 292.0, 295.0, 281.0, 321.0, 306.0, 316.0, 311.0, 173.0, 172.0, 295.0, 284.0, 302.0, 319.0, 291.0, 291.0, 259.0, 260.0, 301.0, 286.0, 300.0, 287.0, 300.0, 279.0, 320.0, 316.0, 303.0, 324.0, 318.0, 309.0, 294.0, 288.0, 267.0, 275.0, 288.0, 294.0, 293.0, 286.0, 236.0, 232.0, 277.0, 262.0, 288.0, 296.0, 302.0, 285.0, 285.0, 297.0, 314.0, 319.0, 296.0, 291.0, 293.0, 286.0, 245.0, 253.0, 318.0, 309.0, 295.0, 284.0, 318.0, 312.0, 293.0, 289.0, 309.0, 321.0, 310.0, 320.0, 280.0, 302.0, 293.0, 283.0, 286.0, 301.0, 320.0, 313.0, 286.0, 296.0, 314.0, 307.0, 319.0, 308.0, 301.0, 281.0, 282.0, 305.0, 288.0, 294.0, 256.0, 269.0, 246.0, 232.0, 301.0, 281.0, 298.0, 284.0, 291.0, 291.0, 303.0, 273.0, 282.0, 294.0, 316.0, 311.0, 293.0, 286.0, 308.0, 322.0, 270.0, 266.0, 288.0, 291.0, 324.0, 306.0, 291.0, 291.0, 291.0, 291.0, 288.0, 291.0, 293.0, 289.0, 296.0, 274.0, 297.0, 279.0, 315.0, 315.0, 308.0, 319.0, 266.0, 270.0, 283.0, 296.0, 290.0, 292.0, 283.0, 296.0, 296.0, 291.0, 293.0, 291.0, 316.0, 314.0, 281.0, 301.0, 316.0, 314.0, 290.0, 292.0, 289.0, 293.0, 286.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1521297133023998, "mean_processing_ms": 0.2984947227408811, "mean_inference_ms": 1.7084618155808986}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5136000, "num_steps_sampled": 2739200, "sample_time_ms": 20745.898, "load_time_ms": 36.566, "grad_time_ms": 9296.474, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004741498734802008, "policy_loss": -0.003847965970635414, "vf_loss": 91.54241943359375, "vf_explained_var": 0.7623968124389648, "kl": 0.00236759171821177, "entropy": 1.1295729875564575, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2739200, "episodes_total": 6848, "training_iteration": 214, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-32-19", "timestamp": 1660253539, "time_this_iter_s": 29.774744749069214, "time_total_s": 11954.380000829697, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11954.380000829697, "timesteps_since_restore": 2739200, "iterations_since_restore": 214, "perf": {"cpu_util_percent": 27.035714285714292, "ram_util_percent": 58.526190476190486}}
+{"episode_reward_max": 636.0, "episode_reward_min": 345.0, "episode_reward_mean": 584.13, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 292.065}, "custom_metrics": {"sparse_reward_mean": 202.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 180.13, "shaped_reward_min": 105, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.09, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.77, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.7, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.86, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.43, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.22, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.38, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.73, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.22, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.38, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.22, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.38, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 558.0, 579.0, 576.0, 579.0, 536.0, 633.0, 584.0, 587.0, 576.0, 536.0, 627.0, 582.0, 627.0, 456.0, 584.0, 579.0, 582.0, 587.0, 621.0, 518.0, 567.0, 633.0, 627.0, 624.0, 582.0, 590.0, 621.0, 630.0, 630.0, 582.0, 636.0, 582.0, 587.0, 582.0, 525.0, 478.0, 582.0, 582.0, 582.0, 576.0, 576.0, 627.0, 579.0, 630.0, 536.0, 579.0, 630.0, 582.0, 582.0, 579.0, 582.0, 570.0, 576.0, 630.0, 627.0, 536.0, 579.0, 582.0, 579.0, 587.0, 584.0, 630.0, 582.0, 630.0, 582.0, 582.0, 579.0, 582.0, 630.0, 587.0, 576.0, 582.0, 587.0, 630.0, 587.0, 630.0, 476.0, 579.0, 587.0, 584.0, 587.0, 627.0, 579.0, 558.0, 479.0, 630.0, 579.0, 630.0, 579.0, 630.0, 584.0, 576.0, 627.0, 627.0, 345.0, 579.0, 621.0, 582.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 281.0, 277.0, 292.0, 287.0, 287.0, 289.0, 286.0, 293.0, 262.0, 274.0, 319.0, 314.0, 297.0, 287.0, 288.0, 299.0, 285.0, 291.0, 278.0, 258.0, 308.0, 319.0, 304.0, 278.0, 326.0, 301.0, 225.0, 231.0, 295.0, 289.0, 300.0, 279.0, 301.0, 281.0, 293.0, 294.0, 315.0, 306.0, 247.0, 271.0, 280.0, 287.0, 326.0, 307.0, 311.0, 316.0, 317.0, 307.0, 286.0, 296.0, 290.0, 300.0, 313.0, 308.0, 313.0, 317.0, 318.0, 312.0, 296.0, 286.0, 321.0, 315.0, 301.0, 281.0, 282.0, 305.0, 288.0, 294.0, 256.0, 269.0, 246.0, 232.0, 301.0, 281.0, 298.0, 284.0, 291.0, 291.0, 303.0, 273.0, 282.0, 294.0, 316.0, 311.0, 293.0, 286.0, 308.0, 322.0, 270.0, 266.0, 288.0, 291.0, 324.0, 306.0, 291.0, 291.0, 291.0, 291.0, 288.0, 291.0, 293.0, 289.0, 296.0, 274.0, 297.0, 279.0, 315.0, 315.0, 308.0, 319.0, 266.0, 270.0, 283.0, 296.0, 290.0, 292.0, 283.0, 296.0, 296.0, 291.0, 293.0, 291.0, 316.0, 314.0, 281.0, 301.0, 316.0, 314.0, 290.0, 292.0, 289.0, 293.0, 286.0, 293.0, 299.0, 283.0, 309.0, 321.0, 287.0, 300.0, 290.0, 286.0, 289.0, 293.0, 296.0, 291.0, 314.0, 316.0, 303.0, 284.0, 314.0, 316.0, 243.0, 233.0, 273.0, 306.0, 296.0, 291.0, 291.0, 293.0, 298.0, 289.0, 307.0, 320.0, 284.0, 295.0, 264.0, 294.0, 238.0, 241.0, 326.0, 304.0, 288.0, 291.0, 319.0, 311.0, 289.0, 290.0, 311.0, 319.0, 292.0, 292.0, 295.0, 281.0, 321.0, 306.0, 316.0, 311.0, 173.0, 172.0, 295.0, 284.0, 302.0, 319.0, 291.0, 291.0, 259.0, 260.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1488626119910377, "mean_processing_ms": 0.297841305997747, "mean_inference_ms": 1.7053304969218863}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5160000, "num_steps_sampled": 2752000, "sample_time_ms": 20853.401, "load_time_ms": 36.595, "grad_time_ms": 9300.168, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004025696776807308, "policy_loss": -0.0038255956023931503, "vf_loss": 84.18643951416016, "vf_explained_var": 0.7665885090827942, "kl": 0.0019039264880120754, "entropy": 1.1346958875656128, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2752000, "episodes_total": 6880, "training_iteration": 215, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-32-49", "timestamp": 1660253569, "time_this_iter_s": 30.592424869537354, "time_total_s": 11984.972425699234, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11984.972425699234, "timesteps_since_restore": 2752000, "iterations_since_restore": 215, "perf": {"cpu_util_percent": 30.46511627906977, "ram_util_percent": 58.576744186046504}}
+{"episode_reward_max": 636.0, "episode_reward_min": 345.0, "episode_reward_mean": 589.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 294.55}, "custom_metrics": {"sparse_reward_mean": 204.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 181.1, "shaped_reward_min": 105, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.05, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.94, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.63, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.02, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.88, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.38, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.09, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.64, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.49, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.8, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.09, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.64, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.09, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.64, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 630.0, 627.0, 582.0, 627.0, 579.0, 579.0, 567.0, 587.0, 587.0, 630.0, 579.0, 582.0, 576.0, 582.0, 630.0, 570.0, 582.0, 627.0, 587.0, 582.0, 582.0, 630.0, 582.0, 627.0, 582.0, 627.0, 587.0, 633.0, 587.0, 582.0, 630.0, 630.0, 582.0, 582.0, 579.0, 582.0, 630.0, 587.0, 576.0, 582.0, 587.0, 630.0, 587.0, 630.0, 476.0, 579.0, 587.0, 584.0, 587.0, 627.0, 579.0, 558.0, 479.0, 630.0, 579.0, 630.0, 579.0, 630.0, 584.0, 576.0, 627.0, 627.0, 345.0, 579.0, 621.0, 582.0, 519.0, 636.0, 558.0, 579.0, 576.0, 579.0, 536.0, 633.0, 584.0, 587.0, 576.0, 536.0, 627.0, 582.0, 627.0, 456.0, 584.0, 579.0, 582.0, 587.0, 621.0, 518.0, 567.0, 633.0, 627.0, 624.0, 582.0, 590.0, 621.0, 630.0, 630.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 282.0, 317.0, 313.0, 309.0, 318.0, 296.0, 286.0, 316.0, 311.0, 288.0, 291.0, 285.0, 294.0, 286.0, 281.0, 298.0, 289.0, 298.0, 289.0, 314.0, 316.0, 290.0, 289.0, 289.0, 293.0, 291.0, 285.0, 285.0, 297.0, 306.0, 324.0, 298.0, 272.0, 283.0, 299.0, 311.0, 316.0, 293.0, 294.0, 300.0, 282.0, 293.0, 289.0, 319.0, 311.0, 285.0, 297.0, 310.0, 317.0, 293.0, 289.0, 310.0, 317.0, 300.0, 287.0, 316.0, 317.0, 298.0, 289.0, 283.0, 299.0, 319.0, 311.0, 316.0, 314.0, 290.0, 292.0, 289.0, 293.0, 286.0, 293.0, 299.0, 283.0, 309.0, 321.0, 287.0, 300.0, 290.0, 286.0, 289.0, 293.0, 296.0, 291.0, 314.0, 316.0, 303.0, 284.0, 314.0, 316.0, 243.0, 233.0, 273.0, 306.0, 296.0, 291.0, 291.0, 293.0, 298.0, 289.0, 307.0, 320.0, 284.0, 295.0, 264.0, 294.0, 238.0, 241.0, 326.0, 304.0, 288.0, 291.0, 319.0, 311.0, 289.0, 290.0, 311.0, 319.0, 292.0, 292.0, 295.0, 281.0, 321.0, 306.0, 316.0, 311.0, 173.0, 172.0, 295.0, 284.0, 302.0, 319.0, 291.0, 291.0, 259.0, 260.0, 314.0, 322.0, 281.0, 277.0, 292.0, 287.0, 287.0, 289.0, 286.0, 293.0, 262.0, 274.0, 319.0, 314.0, 297.0, 287.0, 288.0, 299.0, 285.0, 291.0, 278.0, 258.0, 308.0, 319.0, 304.0, 278.0, 326.0, 301.0, 225.0, 231.0, 295.0, 289.0, 300.0, 279.0, 301.0, 281.0, 293.0, 294.0, 315.0, 306.0, 247.0, 271.0, 280.0, 287.0, 326.0, 307.0, 311.0, 316.0, 317.0, 307.0, 286.0, 296.0, 290.0, 300.0, 313.0, 308.0, 313.0, 317.0, 318.0, 312.0, 296.0, 286.0, 321.0, 315.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1456158536618073, "mean_processing_ms": 0.29719304474995795, "mean_inference_ms": 1.7020409366400755}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5184000, "num_steps_sampled": 2764800, "sample_time_ms": 20854.663, "load_time_ms": 36.839, "grad_time_ms": 9330.064, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0055513703264296055, "policy_loss": -0.0025626528076827526, "vf_loss": 86.77967071533203, "vf_explained_var": 0.7667043805122375, "kl": 0.00211916770786047, "entropy": 1.1278961896896362, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2764800, "episodes_total": 6912, "training_iteration": 216, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-33-19", "timestamp": 1660253599, "time_this_iter_s": 29.99899387359619, "time_total_s": 12014.97141957283, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12014.97141957283, "timesteps_since_restore": 2764800, "iterations_since_restore": 216, "perf": {"cpu_util_percent": 32.32380952380952, "ram_util_percent": 58.607142857142854}}
+{"episode_reward_max": 639.0, "episode_reward_min": 456.0, "episode_reward_mean": 596.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 225.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 298.185}, "custom_metrics": {"sparse_reward_mean": 206.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 183.17, "shaped_reward_min": 136, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.25, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.9, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.82, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.07, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.31, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.83, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.48, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.77, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.7, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.67, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.31, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.83, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.31, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.83, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 630.0, 636.0, 582.0, 584.0, 587.0, 636.0, 630.0, 573.0, 573.0, 630.0, 582.0, 627.0, 576.0, 633.0, 630.0, 633.0, 587.0, 633.0, 579.0, 579.0, 587.0, 582.0, 627.0, 630.0, 639.0, 587.0, 582.0, 579.0, 582.0, 576.0, 636.0, 579.0, 621.0, 582.0, 519.0, 636.0, 558.0, 579.0, 576.0, 579.0, 536.0, 633.0, 584.0, 587.0, 576.0, 536.0, 627.0, 582.0, 627.0, 456.0, 584.0, 579.0, 582.0, 587.0, 621.0, 518.0, 567.0, 633.0, 627.0, 624.0, 582.0, 590.0, 621.0, 630.0, 630.0, 582.0, 636.0, 576.0, 630.0, 627.0, 582.0, 627.0, 579.0, 579.0, 567.0, 587.0, 587.0, 630.0, 579.0, 582.0, 576.0, 582.0, 630.0, 570.0, 582.0, 627.0, 587.0, 582.0, 582.0, 630.0, 582.0, 627.0, 582.0, 627.0, 587.0, 633.0, 587.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [298.0, 329.0, 321.0, 309.0, 321.0, 315.0, 289.0, 293.0, 294.0, 290.0, 281.0, 306.0, 306.0, 330.0, 313.0, 317.0, 279.0, 294.0, 288.0, 285.0, 316.0, 314.0, 293.0, 289.0, 319.0, 308.0, 288.0, 288.0, 324.0, 309.0, 322.0, 308.0, 311.0, 322.0, 295.0, 292.0, 316.0, 317.0, 283.0, 296.0, 287.0, 292.0, 293.0, 294.0, 278.0, 304.0, 316.0, 311.0, 321.0, 309.0, 315.0, 324.0, 306.0, 281.0, 293.0, 289.0, 293.0, 286.0, 293.0, 289.0, 291.0, 285.0, 314.0, 322.0, 295.0, 284.0, 302.0, 319.0, 291.0, 291.0, 259.0, 260.0, 314.0, 322.0, 281.0, 277.0, 292.0, 287.0, 287.0, 289.0, 286.0, 293.0, 262.0, 274.0, 319.0, 314.0, 297.0, 287.0, 288.0, 299.0, 285.0, 291.0, 278.0, 258.0, 308.0, 319.0, 304.0, 278.0, 326.0, 301.0, 225.0, 231.0, 295.0, 289.0, 300.0, 279.0, 301.0, 281.0, 293.0, 294.0, 315.0, 306.0, 247.0, 271.0, 280.0, 287.0, 326.0, 307.0, 311.0, 316.0, 317.0, 307.0, 286.0, 296.0, 290.0, 300.0, 313.0, 308.0, 313.0, 317.0, 318.0, 312.0, 296.0, 286.0, 321.0, 315.0, 294.0, 282.0, 317.0, 313.0, 309.0, 318.0, 296.0, 286.0, 316.0, 311.0, 288.0, 291.0, 285.0, 294.0, 286.0, 281.0, 298.0, 289.0, 298.0, 289.0, 314.0, 316.0, 290.0, 289.0, 289.0, 293.0, 291.0, 285.0, 285.0, 297.0, 306.0, 324.0, 298.0, 272.0, 283.0, 299.0, 311.0, 316.0, 293.0, 294.0, 300.0, 282.0, 293.0, 289.0, 319.0, 311.0, 285.0, 297.0, 310.0, 317.0, 293.0, 289.0, 310.0, 317.0, 300.0, 287.0, 316.0, 317.0, 298.0, 289.0, 283.0, 299.0, 319.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1424035037874523, "mean_processing_ms": 0.29655443936404674, "mean_inference_ms": 1.6989240589354977}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5208000, "num_steps_sampled": 2777600, "sample_time_ms": 20971.783, "load_time_ms": 36.769, "grad_time_ms": 9428.242, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002736276714131236, "policy_loss": -0.004988871049135923, "vf_loss": 82.89418029785156, "vf_explained_var": 0.7724503874778748, "kl": 0.00226503680460155, "entropy": 1.1285419464111328, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2777600, "episodes_total": 6944, "training_iteration": 217, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-33-52", "timestamp": 1660253632, "time_this_iter_s": 32.39657115936279, "time_total_s": 12047.367990732193, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12047.367990732193, "timesteps_since_restore": 2777600, "iterations_since_restore": 217, "perf": {"cpu_util_percent": 33.11304347826087, "ram_util_percent": 58.56521739130436}}
+{"episode_reward_max": 639.0, "episode_reward_min": 291.0, "episode_reward_mean": 593.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 296.97}, "custom_metrics": {"sparse_reward_mean": 205.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 183.14, "shaped_reward_min": 91, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.36, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.16, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.67, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.59, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.53, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.59, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.53, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.59, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.53, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 579.0, 587.0, 582.0, 630.0, 636.0, 579.0, 633.0, 579.0, 584.0, 582.0, 579.0, 579.0, 579.0, 630.0, 630.0, 587.0, 587.0, 518.0, 633.0, 587.0, 582.0, 530.0, 582.0, 462.0, 582.0, 627.0, 291.0, 587.0, 579.0, 582.0, 630.0, 630.0, 582.0, 636.0, 576.0, 630.0, 627.0, 582.0, 627.0, 579.0, 579.0, 567.0, 587.0, 587.0, 630.0, 579.0, 582.0, 576.0, 582.0, 630.0, 570.0, 582.0, 627.0, 587.0, 582.0, 582.0, 630.0, 582.0, 627.0, 582.0, 627.0, 587.0, 633.0, 587.0, 582.0, 630.0, 627.0, 630.0, 636.0, 582.0, 584.0, 587.0, 636.0, 630.0, 573.0, 573.0, 630.0, 582.0, 627.0, 576.0, 633.0, 630.0, 633.0, 587.0, 633.0, 579.0, 579.0, 587.0, 582.0, 627.0, 630.0, 639.0, 587.0, 582.0, 579.0, 582.0, 576.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 306.0, 282.0, 297.0, 298.0, 281.0, 301.0, 286.0, 283.0, 299.0, 319.0, 311.0, 321.0, 315.0, 299.0, 280.0, 315.0, 318.0, 294.0, 285.0, 298.0, 286.0, 288.0, 294.0, 297.0, 282.0, 282.0, 297.0, 290.0, 289.0, 311.0, 319.0, 309.0, 321.0, 296.0, 291.0, 291.0, 296.0, 262.0, 256.0, 317.0, 316.0, 294.0, 293.0, 296.0, 286.0, 262.0, 268.0, 289.0, 293.0, 228.0, 234.0, 288.0, 294.0, 313.0, 314.0, 145.0, 146.0, 291.0, 296.0, 293.0, 286.0, 286.0, 296.0, 313.0, 317.0, 318.0, 312.0, 296.0, 286.0, 321.0, 315.0, 294.0, 282.0, 317.0, 313.0, 309.0, 318.0, 296.0, 286.0, 316.0, 311.0, 288.0, 291.0, 285.0, 294.0, 286.0, 281.0, 298.0, 289.0, 298.0, 289.0, 314.0, 316.0, 290.0, 289.0, 289.0, 293.0, 291.0, 285.0, 285.0, 297.0, 306.0, 324.0, 298.0, 272.0, 283.0, 299.0, 311.0, 316.0, 293.0, 294.0, 300.0, 282.0, 293.0, 289.0, 319.0, 311.0, 285.0, 297.0, 310.0, 317.0, 293.0, 289.0, 310.0, 317.0, 300.0, 287.0, 316.0, 317.0, 298.0, 289.0, 283.0, 299.0, 319.0, 311.0, 298.0, 329.0, 321.0, 309.0, 321.0, 315.0, 289.0, 293.0, 294.0, 290.0, 281.0, 306.0, 306.0, 330.0, 313.0, 317.0, 279.0, 294.0, 288.0, 285.0, 316.0, 314.0, 293.0, 289.0, 319.0, 308.0, 288.0, 288.0, 324.0, 309.0, 322.0, 308.0, 311.0, 322.0, 295.0, 292.0, 316.0, 317.0, 283.0, 296.0, 287.0, 292.0, 293.0, 294.0, 278.0, 304.0, 316.0, 311.0, 321.0, 309.0, 315.0, 324.0, 306.0, 281.0, 293.0, 289.0, 293.0, 286.0, 293.0, 289.0, 291.0, 285.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1392367943050357, "mean_processing_ms": 0.29592632211468906, "mean_inference_ms": 1.6959597907664128}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5232000, "num_steps_sampled": 2790400, "sample_time_ms": 21233.48, "load_time_ms": 36.919, "grad_time_ms": 9592.49, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006457938347011805, "policy_loss": -0.0026744985952973366, "vf_loss": 97.0146713256836, "vf_explained_var": 0.7470273375511169, "kl": 0.0016420072643086314, "entropy": 1.1380563974380493, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2790400, "episodes_total": 6976, "training_iteration": 218, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-34-25", "timestamp": 1660253665, "time_this_iter_s": 33.2370343208313, "time_total_s": 12080.605025053024, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12080.605025053024, "timesteps_since_restore": 2790400, "iterations_since_restore": 218, "perf": {"cpu_util_percent": 35.75531914893618, "ram_util_percent": 58.63191489361703}}
+{"episode_reward_max": 639.0, "episode_reward_min": 291.0, "episode_reward_mean": 593.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 296.79}, "custom_metrics": {"sparse_reward_mean": 205.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 182.78, "shaped_reward_min": 91, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.95, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.17, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.44, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.72, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.81, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.31, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.21, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.81, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.31, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.81, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.31, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 627.0, 555.0, 636.0, 627.0, 582.0, 630.0, 408.0, 582.0, 587.0, 570.0, 573.0, 633.0, 639.0, 633.0, 630.0, 630.0, 587.0, 573.0, 630.0, 587.0, 576.0, 630.0, 576.0, 630.0, 630.0, 584.0, 582.0, 582.0, 567.0, 633.0, 582.0, 633.0, 587.0, 582.0, 630.0, 627.0, 630.0, 636.0, 582.0, 584.0, 587.0, 636.0, 630.0, 573.0, 573.0, 630.0, 582.0, 627.0, 576.0, 633.0, 630.0, 633.0, 587.0, 633.0, 579.0, 579.0, 587.0, 582.0, 627.0, 630.0, 639.0, 587.0, 582.0, 579.0, 582.0, 576.0, 636.0, 582.0, 579.0, 579.0, 587.0, 582.0, 630.0, 636.0, 579.0, 633.0, 579.0, 584.0, 582.0, 579.0, 579.0, 579.0, 630.0, 630.0, 587.0, 587.0, 518.0, 633.0, 587.0, 582.0, 530.0, 582.0, 462.0, 582.0, 627.0, 291.0, 587.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 318.0, 309.0, 269.0, 286.0, 324.0, 312.0, 319.0, 308.0, 291.0, 291.0, 318.0, 312.0, 202.0, 206.0, 292.0, 290.0, 293.0, 294.0, 278.0, 292.0, 289.0, 284.0, 332.0, 301.0, 322.0, 317.0, 311.0, 322.0, 326.0, 304.0, 317.0, 313.0, 301.0, 286.0, 277.0, 296.0, 316.0, 314.0, 296.0, 291.0, 272.0, 304.0, 319.0, 311.0, 291.0, 285.0, 313.0, 317.0, 316.0, 314.0, 290.0, 294.0, 299.0, 283.0, 285.0, 297.0, 281.0, 286.0, 317.0, 316.0, 287.0, 295.0, 316.0, 317.0, 298.0, 289.0, 283.0, 299.0, 319.0, 311.0, 298.0, 329.0, 321.0, 309.0, 321.0, 315.0, 289.0, 293.0, 294.0, 290.0, 281.0, 306.0, 306.0, 330.0, 313.0, 317.0, 279.0, 294.0, 288.0, 285.0, 316.0, 314.0, 293.0, 289.0, 319.0, 308.0, 288.0, 288.0, 324.0, 309.0, 322.0, 308.0, 311.0, 322.0, 295.0, 292.0, 316.0, 317.0, 283.0, 296.0, 287.0, 292.0, 293.0, 294.0, 278.0, 304.0, 316.0, 311.0, 321.0, 309.0, 315.0, 324.0, 306.0, 281.0, 293.0, 289.0, 293.0, 286.0, 293.0, 289.0, 291.0, 285.0, 314.0, 322.0, 276.0, 306.0, 282.0, 297.0, 298.0, 281.0, 301.0, 286.0, 283.0, 299.0, 319.0, 311.0, 321.0, 315.0, 299.0, 280.0, 315.0, 318.0, 294.0, 285.0, 298.0, 286.0, 288.0, 294.0, 297.0, 282.0, 282.0, 297.0, 290.0, 289.0, 311.0, 319.0, 309.0, 321.0, 296.0, 291.0, 291.0, 296.0, 262.0, 256.0, 317.0, 316.0, 294.0, 293.0, 296.0, 286.0, 262.0, 268.0, 289.0, 293.0, 228.0, 234.0, 288.0, 294.0, 313.0, 314.0, 145.0, 146.0, 291.0, 296.0, 293.0, 286.0, 286.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1361092001504756, "mean_processing_ms": 0.29530600936138574, "mean_inference_ms": 1.6931104739373604}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5256000, "num_steps_sampled": 2803200, "sample_time_ms": 21336.415, "load_time_ms": 37.483, "grad_time_ms": 9561.229, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005191893433220685, "policy_loss": -0.0074623264372348785, "vf_loss": 85.4925765991211, "vf_explained_var": 0.7601101994514465, "kl": 0.0019686671439558268, "entropy": 1.1354910135269165, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2803200, "episodes_total": 7008, "training_iteration": 219, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-34-55", "timestamp": 1660253695, "time_this_iter_s": 30.000843048095703, "time_total_s": 12110.60586810112, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12110.60586810112, "timesteps_since_restore": 2803200, "iterations_since_restore": 219, "perf": {"cpu_util_percent": 30.46666666666667, "ram_util_percent": 58.68571428571429}}
+{"episode_reward_max": 639.0, "episode_reward_min": 291.0, "episode_reward_mean": 588.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 294.01}, "custom_metrics": {"sparse_reward_mean": 203.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 181.62, "shaped_reward_min": 91, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.07, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.97, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.25, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.77, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.68, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.87, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.03, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.15, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.59, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.87, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.03, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.87, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.03, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 624.0, 582.0, 518.0, 582.0, 630.0, 576.0, 633.0, 587.0, 582.0, 587.0, 582.0, 584.0, 582.0, 627.0, 544.0, 579.0, 576.0, 487.0, 582.0, 582.0, 584.0, 636.0, 582.0, 633.0, 630.0, 539.0, 579.0, 579.0, 627.0, 630.0, 579.0, 582.0, 576.0, 636.0, 582.0, 579.0, 579.0, 587.0, 582.0, 630.0, 636.0, 579.0, 633.0, 579.0, 584.0, 582.0, 579.0, 579.0, 579.0, 630.0, 630.0, 587.0, 587.0, 518.0, 633.0, 587.0, 582.0, 530.0, 582.0, 462.0, 582.0, 627.0, 291.0, 587.0, 579.0, 582.0, 636.0, 627.0, 555.0, 636.0, 627.0, 582.0, 630.0, 408.0, 582.0, 587.0, 570.0, 573.0, 633.0, 639.0, 633.0, 630.0, 630.0, 587.0, 573.0, 630.0, 587.0, 576.0, 630.0, 576.0, 630.0, 630.0, 584.0, 582.0, 582.0, 567.0, 633.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [308.0, 322.0, 292.0, 290.0, 321.0, 303.0, 292.0, 290.0, 252.0, 266.0, 297.0, 285.0, 313.0, 317.0, 298.0, 278.0, 324.0, 309.0, 285.0, 302.0, 291.0, 291.0, 291.0, 296.0, 291.0, 291.0, 287.0, 297.0, 291.0, 291.0, 310.0, 317.0, 272.0, 272.0, 290.0, 289.0, 293.0, 283.0, 257.0, 230.0, 285.0, 297.0, 285.0, 297.0, 300.0, 284.0, 312.0, 324.0, 277.0, 305.0, 313.0, 320.0, 321.0, 309.0, 265.0, 274.0, 294.0, 285.0, 299.0, 280.0, 311.0, 316.0, 309.0, 321.0, 293.0, 286.0, 293.0, 289.0, 291.0, 285.0, 314.0, 322.0, 276.0, 306.0, 282.0, 297.0, 298.0, 281.0, 301.0, 286.0, 283.0, 299.0, 319.0, 311.0, 321.0, 315.0, 299.0, 280.0, 315.0, 318.0, 294.0, 285.0, 298.0, 286.0, 288.0, 294.0, 297.0, 282.0, 282.0, 297.0, 290.0, 289.0, 311.0, 319.0, 309.0, 321.0, 296.0, 291.0, 291.0, 296.0, 262.0, 256.0, 317.0, 316.0, 294.0, 293.0, 296.0, 286.0, 262.0, 268.0, 289.0, 293.0, 228.0, 234.0, 288.0, 294.0, 313.0, 314.0, 145.0, 146.0, 291.0, 296.0, 293.0, 286.0, 286.0, 296.0, 319.0, 317.0, 318.0, 309.0, 269.0, 286.0, 324.0, 312.0, 319.0, 308.0, 291.0, 291.0, 318.0, 312.0, 202.0, 206.0, 292.0, 290.0, 293.0, 294.0, 278.0, 292.0, 289.0, 284.0, 332.0, 301.0, 322.0, 317.0, 311.0, 322.0, 326.0, 304.0, 317.0, 313.0, 301.0, 286.0, 277.0, 296.0, 316.0, 314.0, 296.0, 291.0, 272.0, 304.0, 319.0, 311.0, 291.0, 285.0, 313.0, 317.0, 316.0, 314.0, 290.0, 294.0, 299.0, 283.0, 285.0, 297.0, 281.0, 286.0, 317.0, 316.0, 287.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.133009827763421, "mean_processing_ms": 0.29469029655090995, "mean_inference_ms": 1.6902501086005228}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5280000, "num_steps_sampled": 2816000, "sample_time_ms": 21400.362, "load_time_ms": 37.346, "grad_time_ms": 9606.188, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002275400562211871, "policy_loss": -0.0062116296030581, "vf_loss": 90.5528793334961, "vf_explained_var": 0.7516798973083496, "kl": 0.0019114302704110742, "entropy": 1.1365100145339966, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2816000, "episodes_total": 7040, "training_iteration": 220, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-35-26", "timestamp": 1660253726, "time_this_iter_s": 30.873941659927368, "time_total_s": 12141.479809761047, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12141.479809761047, "timesteps_since_restore": 2816000, "iterations_since_restore": 220, "perf": {"cpu_util_percent": 31.518181818181816, "ram_util_percent": 58.6159090909091}}
+{"episode_reward_max": 639.0, "episode_reward_min": 291.0, "episode_reward_mean": 590.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 295.165}, "custom_metrics": {"sparse_reward_mean": 204.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 181.93, "shaped_reward_min": 91, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.88, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.64, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.09, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.71, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.69, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.1, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.9, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.89, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.02, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.63, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.64, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.34, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.29, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.1, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.9, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.1, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.9, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 579.0, 573.0, 579.0, 627.0, 630.0, 582.0, 587.0, 627.0, 630.0, 636.0, 573.0, 582.0, 582.0, 579.0, 582.0, 627.0, 570.0, 582.0, 573.0, 593.0, 590.0, 582.0, 579.0, 555.0, 627.0, 539.0, 636.0, 582.0, 633.0, 582.0, 291.0, 587.0, 579.0, 582.0, 636.0, 627.0, 555.0, 636.0, 627.0, 582.0, 630.0, 408.0, 582.0, 587.0, 570.0, 573.0, 633.0, 639.0, 633.0, 630.0, 630.0, 587.0, 573.0, 630.0, 587.0, 576.0, 630.0, 576.0, 630.0, 630.0, 584.0, 582.0, 582.0, 567.0, 633.0, 582.0, 630.0, 582.0, 624.0, 582.0, 518.0, 582.0, 630.0, 576.0, 633.0, 587.0, 582.0, 587.0, 582.0, 584.0, 582.0, 627.0, 544.0, 579.0, 576.0, 487.0, 582.0, 582.0, 584.0, 636.0, 582.0, 633.0, 630.0, 539.0, 579.0, 579.0, 627.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 306.0, 324.0, 302.0, 277.0, 288.0, 285.0, 284.0, 295.0, 322.0, 305.0, 313.0, 317.0, 289.0, 293.0, 287.0, 300.0, 310.0, 317.0, 308.0, 322.0, 314.0, 322.0, 291.0, 282.0, 298.0, 284.0, 301.0, 281.0, 285.0, 294.0, 300.0, 282.0, 299.0, 328.0, 284.0, 286.0, 288.0, 294.0, 283.0, 290.0, 289.0, 304.0, 291.0, 299.0, 294.0, 288.0, 283.0, 296.0, 285.0, 270.0, 311.0, 316.0, 260.0, 279.0, 327.0, 309.0, 285.0, 297.0, 324.0, 309.0, 290.0, 292.0, 145.0, 146.0, 291.0, 296.0, 293.0, 286.0, 286.0, 296.0, 319.0, 317.0, 318.0, 309.0, 269.0, 286.0, 324.0, 312.0, 319.0, 308.0, 291.0, 291.0, 318.0, 312.0, 202.0, 206.0, 292.0, 290.0, 293.0, 294.0, 278.0, 292.0, 289.0, 284.0, 332.0, 301.0, 322.0, 317.0, 311.0, 322.0, 326.0, 304.0, 317.0, 313.0, 301.0, 286.0, 277.0, 296.0, 316.0, 314.0, 296.0, 291.0, 272.0, 304.0, 319.0, 311.0, 291.0, 285.0, 313.0, 317.0, 316.0, 314.0, 290.0, 294.0, 299.0, 283.0, 285.0, 297.0, 281.0, 286.0, 317.0, 316.0, 287.0, 295.0, 308.0, 322.0, 292.0, 290.0, 321.0, 303.0, 292.0, 290.0, 252.0, 266.0, 297.0, 285.0, 313.0, 317.0, 298.0, 278.0, 324.0, 309.0, 285.0, 302.0, 291.0, 291.0, 291.0, 296.0, 291.0, 291.0, 287.0, 297.0, 291.0, 291.0, 310.0, 317.0, 272.0, 272.0, 290.0, 289.0, 293.0, 283.0, 257.0, 230.0, 285.0, 297.0, 285.0, 297.0, 300.0, 284.0, 312.0, 324.0, 277.0, 305.0, 313.0, 320.0, 321.0, 309.0, 265.0, 274.0, 294.0, 285.0, 299.0, 280.0, 311.0, 316.0, 309.0, 321.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1299217967931974, "mean_processing_ms": 0.29407401911825776, "mean_inference_ms": 1.6873151851132406}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5304000, "num_steps_sampled": 2828800, "sample_time_ms": 21404.207, "load_time_ms": 36.943, "grad_time_ms": 9639.523, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004844650160521269, "policy_loss": -0.004174739122390747, "vf_loss": 95.8445816040039, "vf_explained_var": 0.7459821701049805, "kl": 0.0019909220281988382, "entropy": 1.1301350593566895, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2828800, "episodes_total": 7072, "training_iteration": 221, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-35-57", "timestamp": 1660253757, "time_this_iter_s": 30.906293869018555, "time_total_s": 12172.386103630066, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12172.386103630066, "timesteps_since_restore": 2828800, "iterations_since_restore": 221, "perf": {"cpu_util_percent": 27.49545454545455, "ram_util_percent": 58.63636363636363}}
+{"episode_reward_max": 639.0, "episode_reward_min": 487.0, "episode_reward_mean": 596.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 298.01}, "custom_metrics": {"sparse_reward_mean": 206.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 183.62, "shaped_reward_min": 155, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.36, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.93, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.85, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.1, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.23, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.04, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.99, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.22, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.25, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.23, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.04, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.23, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.04, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 579.0, 630.0, 630.0, 627.0, 587.0, 639.0, 587.0, 630.0, 582.0, 633.0, 587.0, 627.0, 579.0, 576.0, 633.0, 582.0, 570.0, 630.0, 627.0, 579.0, 627.0, 582.0, 633.0, 633.0, 630.0, 579.0, 582.0, 627.0, 627.0, 576.0, 582.0, 567.0, 633.0, 582.0, 630.0, 582.0, 624.0, 582.0, 518.0, 582.0, 630.0, 576.0, 633.0, 587.0, 582.0, 587.0, 582.0, 584.0, 582.0, 627.0, 544.0, 579.0, 576.0, 487.0, 582.0, 582.0, 584.0, 636.0, 582.0, 633.0, 630.0, 539.0, 579.0, 579.0, 627.0, 630.0, 582.0, 630.0, 579.0, 573.0, 579.0, 627.0, 630.0, 582.0, 587.0, 627.0, 630.0, 636.0, 573.0, 582.0, 582.0, 579.0, 582.0, 627.0, 570.0, 582.0, 573.0, 593.0, 590.0, 582.0, 579.0, 555.0, 627.0, 539.0, 636.0, 582.0, 633.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 283.0, 296.0, 293.0, 286.0, 310.0, 320.0, 318.0, 312.0, 318.0, 309.0, 290.0, 297.0, 325.0, 314.0, 290.0, 297.0, 319.0, 311.0, 291.0, 291.0, 309.0, 324.0, 295.0, 292.0, 298.0, 329.0, 280.0, 299.0, 280.0, 296.0, 318.0, 315.0, 293.0, 289.0, 283.0, 287.0, 319.0, 311.0, 305.0, 322.0, 285.0, 294.0, 309.0, 318.0, 285.0, 297.0, 320.0, 313.0, 311.0, 322.0, 313.0, 317.0, 296.0, 283.0, 286.0, 296.0, 305.0, 322.0, 310.0, 317.0, 289.0, 287.0, 285.0, 297.0, 281.0, 286.0, 317.0, 316.0, 287.0, 295.0, 308.0, 322.0, 292.0, 290.0, 321.0, 303.0, 292.0, 290.0, 252.0, 266.0, 297.0, 285.0, 313.0, 317.0, 298.0, 278.0, 324.0, 309.0, 285.0, 302.0, 291.0, 291.0, 291.0, 296.0, 291.0, 291.0, 287.0, 297.0, 291.0, 291.0, 310.0, 317.0, 272.0, 272.0, 290.0, 289.0, 293.0, 283.0, 257.0, 230.0, 285.0, 297.0, 285.0, 297.0, 300.0, 284.0, 312.0, 324.0, 277.0, 305.0, 313.0, 320.0, 321.0, 309.0, 265.0, 274.0, 294.0, 285.0, 299.0, 280.0, 311.0, 316.0, 309.0, 321.0, 290.0, 292.0, 306.0, 324.0, 302.0, 277.0, 288.0, 285.0, 284.0, 295.0, 322.0, 305.0, 313.0, 317.0, 289.0, 293.0, 287.0, 300.0, 310.0, 317.0, 308.0, 322.0, 314.0, 322.0, 291.0, 282.0, 298.0, 284.0, 301.0, 281.0, 285.0, 294.0, 300.0, 282.0, 299.0, 328.0, 284.0, 286.0, 288.0, 294.0, 283.0, 290.0, 289.0, 304.0, 291.0, 299.0, 294.0, 288.0, 283.0, 296.0, 285.0, 270.0, 311.0, 316.0, 260.0, 279.0, 327.0, 309.0, 285.0, 297.0, 324.0, 309.0, 290.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1268569615111614, "mean_processing_ms": 0.2934624000398477, "mean_inference_ms": 1.6844698808001166}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5328000, "num_steps_sampled": 2841600, "sample_time_ms": 21356.225, "load_time_ms": 36.973, "grad_time_ms": 9654.508, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0037463188637048006, "policy_loss": -0.0037510646507143974, "vf_loss": 80.60189056396484, "vf_explained_var": 0.7646245360374451, "kl": 0.002355078933760524, "entropy": 1.125628113746643, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2841600, "episodes_total": 7104, "training_iteration": 222, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-36-29", "timestamp": 1660253789, "time_this_iter_s": 31.9894540309906, "time_total_s": 12204.375557661057, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12204.375557661057, "timesteps_since_restore": 2841600, "iterations_since_restore": 222, "perf": {"cpu_util_percent": 30.18222222222223, "ram_util_percent": 58.70444444444445}}
+{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 601.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 300.665}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.13, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.83, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.26, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.21, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.06, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.79, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.37, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.21, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.21, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 633.0, 579.0, 582.0, 582.0, 633.0, 627.0, 630.0, 630.0, 636.0, 633.0, 579.0, 519.0, 552.0, 627.0, 582.0, 582.0, 627.0, 630.0, 582.0, 582.0, 627.0, 630.0, 582.0, 587.0, 630.0, 627.0, 582.0, 627.0, 576.0, 630.0, 630.0, 579.0, 579.0, 627.0, 630.0, 582.0, 630.0, 579.0, 573.0, 579.0, 627.0, 630.0, 582.0, 587.0, 627.0, 630.0, 636.0, 573.0, 582.0, 582.0, 579.0, 582.0, 627.0, 570.0, 582.0, 573.0, 593.0, 590.0, 582.0, 579.0, 555.0, 627.0, 539.0, 636.0, 582.0, 633.0, 582.0, 582.0, 579.0, 579.0, 630.0, 630.0, 627.0, 587.0, 639.0, 587.0, 630.0, 582.0, 633.0, 587.0, 627.0, 579.0, 576.0, 633.0, 582.0, 570.0, 630.0, 627.0, 579.0, 627.0, 582.0, 633.0, 633.0, 630.0, 579.0, 582.0, 627.0, 627.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 327.0, 306.0, 292.0, 287.0, 280.0, 302.0, 282.0, 300.0, 318.0, 315.0, 305.0, 322.0, 314.0, 316.0, 323.0, 307.0, 319.0, 317.0, 322.0, 311.0, 297.0, 282.0, 261.0, 258.0, 274.0, 278.0, 308.0, 319.0, 290.0, 292.0, 296.0, 286.0, 308.0, 319.0, 322.0, 308.0, 288.0, 294.0, 290.0, 292.0, 324.0, 303.0, 309.0, 321.0, 290.0, 292.0, 298.0, 289.0, 314.0, 316.0, 319.0, 308.0, 288.0, 294.0, 313.0, 314.0, 288.0, 288.0, 309.0, 321.0, 324.0, 306.0, 294.0, 285.0, 299.0, 280.0, 311.0, 316.0, 309.0, 321.0, 290.0, 292.0, 306.0, 324.0, 302.0, 277.0, 288.0, 285.0, 284.0, 295.0, 322.0, 305.0, 313.0, 317.0, 289.0, 293.0, 287.0, 300.0, 310.0, 317.0, 308.0, 322.0, 314.0, 322.0, 291.0, 282.0, 298.0, 284.0, 301.0, 281.0, 285.0, 294.0, 300.0, 282.0, 299.0, 328.0, 284.0, 286.0, 288.0, 294.0, 283.0, 290.0, 289.0, 304.0, 291.0, 299.0, 294.0, 288.0, 283.0, 296.0, 285.0, 270.0, 311.0, 316.0, 260.0, 279.0, 327.0, 309.0, 285.0, 297.0, 324.0, 309.0, 290.0, 292.0, 296.0, 286.0, 283.0, 296.0, 293.0, 286.0, 310.0, 320.0, 318.0, 312.0, 318.0, 309.0, 290.0, 297.0, 325.0, 314.0, 290.0, 297.0, 319.0, 311.0, 291.0, 291.0, 309.0, 324.0, 295.0, 292.0, 298.0, 329.0, 280.0, 299.0, 280.0, 296.0, 318.0, 315.0, 293.0, 289.0, 283.0, 287.0, 319.0, 311.0, 305.0, 322.0, 285.0, 294.0, 309.0, 318.0, 285.0, 297.0, 320.0, 313.0, 311.0, 322.0, 313.0, 317.0, 296.0, 283.0, 286.0, 296.0, 305.0, 322.0, 310.0, 317.0, 289.0, 287.0]}, "sampler_perf": {"mean_env_wait_ms": 1.123810088157581, "mean_processing_ms": 0.2928524721479363, "mean_inference_ms": 1.6814399707435803}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5352000, "num_steps_sampled": 2854400, "sample_time_ms": 21091.047, "load_time_ms": 36.918, "grad_time_ms": 9784.155, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -5.6165892601711676e-05, "policy_loss": -0.007852478884160519, "vf_loss": 83.60053253173828, "vf_explained_var": 0.7575058937072754, "kl": 0.001709200325421989, "entropy": 1.1274746656417847, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2854400, "episodes_total": 7136, "training_iteration": 223, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-36-59", "timestamp": 1660253819, "time_this_iter_s": 29.606478929519653, "time_total_s": 12233.982036590576, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12233.982036590576, "timesteps_since_restore": 2854400, "iterations_since_restore": 223, "perf": {"cpu_util_percent": 31.040476190476188, "ram_util_percent": 58.538095238095245}}
+{"episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 595.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 297.745}, "custom_metrics": {"sparse_reward_mean": 206.4, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 182.69, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.37, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.27, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.82, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.3, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.15, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.55, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.41, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.39, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.82, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.3, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.82, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.3, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 582.0, 636.0, 630.0, 66.0, 573.0, 633.0, 573.0, 570.0, 579.0, 582.0, 630.0, 633.0, 582.0, 582.0, 579.0, 572.0, 579.0, 582.0, 587.0, 587.0, 630.0, 630.0, 630.0, 582.0, 582.0, 579.0, 419.0, 587.0, 633.0, 587.0, 636.0, 582.0, 633.0, 582.0, 582.0, 579.0, 579.0, 630.0, 630.0, 627.0, 587.0, 639.0, 587.0, 630.0, 582.0, 633.0, 587.0, 627.0, 579.0, 576.0, 633.0, 582.0, 570.0, 630.0, 627.0, 579.0, 627.0, 582.0, 633.0, 633.0, 630.0, 579.0, 582.0, 627.0, 627.0, 576.0, 582.0, 633.0, 579.0, 582.0, 582.0, 633.0, 627.0, 630.0, 630.0, 636.0, 633.0, 579.0, 519.0, 552.0, 627.0, 582.0, 582.0, 627.0, 630.0, 582.0, 582.0, 627.0, 630.0, 582.0, 587.0, 630.0, 627.0, 582.0, 627.0, 576.0, 630.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 295.0, 321.0, 309.0, 298.0, 284.0, 316.0, 320.0, 316.0, 314.0, 34.0, 32.0, 277.0, 296.0, 309.0, 324.0, 279.0, 294.0, 285.0, 285.0, 285.0, 294.0, 285.0, 297.0, 332.0, 298.0, 316.0, 317.0, 296.0, 286.0, 296.0, 286.0, 288.0, 291.0, 274.0, 298.0, 290.0, 289.0, 283.0, 299.0, 291.0, 296.0, 305.0, 282.0, 311.0, 319.0, 303.0, 327.0, 312.0, 318.0, 296.0, 286.0, 291.0, 291.0, 293.0, 286.0, 214.0, 205.0, 287.0, 300.0, 323.0, 310.0, 288.0, 299.0, 327.0, 309.0, 285.0, 297.0, 324.0, 309.0, 290.0, 292.0, 296.0, 286.0, 283.0, 296.0, 293.0, 286.0, 310.0, 320.0, 318.0, 312.0, 318.0, 309.0, 290.0, 297.0, 325.0, 314.0, 290.0, 297.0, 319.0, 311.0, 291.0, 291.0, 309.0, 324.0, 295.0, 292.0, 298.0, 329.0, 280.0, 299.0, 280.0, 296.0, 318.0, 315.0, 293.0, 289.0, 283.0, 287.0, 319.0, 311.0, 305.0, 322.0, 285.0, 294.0, 309.0, 318.0, 285.0, 297.0, 320.0, 313.0, 311.0, 322.0, 313.0, 317.0, 296.0, 283.0, 286.0, 296.0, 305.0, 322.0, 310.0, 317.0, 289.0, 287.0, 290.0, 292.0, 327.0, 306.0, 292.0, 287.0, 280.0, 302.0, 282.0, 300.0, 318.0, 315.0, 305.0, 322.0, 314.0, 316.0, 323.0, 307.0, 319.0, 317.0, 322.0, 311.0, 297.0, 282.0, 261.0, 258.0, 274.0, 278.0, 308.0, 319.0, 290.0, 292.0, 296.0, 286.0, 308.0, 319.0, 322.0, 308.0, 288.0, 294.0, 290.0, 292.0, 324.0, 303.0, 309.0, 321.0, 290.0, 292.0, 298.0, 289.0, 314.0, 316.0, 319.0, 308.0, 288.0, 294.0, 313.0, 314.0, 288.0, 288.0, 309.0, 321.0, 324.0, 306.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1207907910306545, "mean_processing_ms": 0.2922476172198019, "mean_inference_ms": 1.6784104187428721}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5376000, "num_steps_sampled": 2867200, "sample_time_ms": 21165.857, "load_time_ms": 36.996, "grad_time_ms": 9704.336, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003032231703400612, "policy_loss": -0.005307988729327917, "vf_loss": 89.09744262695312, "vf_explained_var": 0.7809851765632629, "kl": 0.0017985772574320436, "entropy": 1.139058232307434, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2867200, "episodes_total": 7168, "training_iteration": 224, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-37-28", "timestamp": 1660253848, "time_this_iter_s": 29.72331213951111, "time_total_s": 12263.705348730087, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12263.705348730087, "timesteps_since_restore": 2867200, "iterations_since_restore": 224, "perf": {"cpu_util_percent": 31.121428571428574, "ram_util_percent": 58.642857142857146}}
+{"episode_reward_max": 636.0, "episode_reward_min": 66.0, "episode_reward_mean": 594.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 297.15}, "custom_metrics": {"sparse_reward_mean": 206.0, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 182.3, "shaped_reward_min": 26, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.08, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.41, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.5, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.89, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.24, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.42, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.89, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.24, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.89, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.24, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 582.0, 630.0, 587.0, 627.0, 579.0, 633.0, 582.0, 624.0, 582.0, 579.0, 630.0, 579.0, 636.0, 582.0, 630.0, 576.0, 621.0, 633.0, 582.0, 584.0, 636.0, 579.0, 576.0, 579.0, 636.0, 630.0, 633.0, 582.0, 570.0, 630.0, 582.0, 627.0, 627.0, 576.0, 582.0, 633.0, 579.0, 582.0, 582.0, 633.0, 627.0, 630.0, 630.0, 636.0, 633.0, 579.0, 519.0, 552.0, 627.0, 582.0, 582.0, 627.0, 630.0, 582.0, 582.0, 627.0, 630.0, 582.0, 587.0, 630.0, 627.0, 582.0, 627.0, 576.0, 630.0, 630.0, 582.0, 630.0, 582.0, 636.0, 630.0, 66.0, 573.0, 633.0, 573.0, 570.0, 579.0, 582.0, 630.0, 633.0, 582.0, 582.0, 579.0, 572.0, 579.0, 582.0, 587.0, 587.0, 630.0, 630.0, 630.0, 582.0, 582.0, 579.0, 419.0, 587.0, 633.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 289.0, 293.0, 283.0, 299.0, 306.0, 324.0, 293.0, 294.0, 323.0, 304.0, 284.0, 295.0, 319.0, 314.0, 283.0, 299.0, 322.0, 302.0, 294.0, 288.0, 284.0, 295.0, 319.0, 311.0, 291.0, 288.0, 316.0, 320.0, 294.0, 288.0, 326.0, 304.0, 292.0, 284.0, 314.0, 307.0, 319.0, 314.0, 286.0, 296.0, 285.0, 299.0, 324.0, 312.0, 297.0, 282.0, 291.0, 285.0, 287.0, 292.0, 321.0, 315.0, 313.0, 317.0, 317.0, 316.0, 296.0, 286.0, 292.0, 278.0, 316.0, 314.0, 286.0, 296.0, 305.0, 322.0, 310.0, 317.0, 289.0, 287.0, 290.0, 292.0, 327.0, 306.0, 292.0, 287.0, 280.0, 302.0, 282.0, 300.0, 318.0, 315.0, 305.0, 322.0, 314.0, 316.0, 323.0, 307.0, 319.0, 317.0, 322.0, 311.0, 297.0, 282.0, 261.0, 258.0, 274.0, 278.0, 308.0, 319.0, 290.0, 292.0, 296.0, 286.0, 308.0, 319.0, 322.0, 308.0, 288.0, 294.0, 290.0, 292.0, 324.0, 303.0, 309.0, 321.0, 290.0, 292.0, 298.0, 289.0, 314.0, 316.0, 319.0, 308.0, 288.0, 294.0, 313.0, 314.0, 288.0, 288.0, 309.0, 321.0, 324.0, 306.0, 287.0, 295.0, 321.0, 309.0, 298.0, 284.0, 316.0, 320.0, 316.0, 314.0, 34.0, 32.0, 277.0, 296.0, 309.0, 324.0, 279.0, 294.0, 285.0, 285.0, 285.0, 294.0, 285.0, 297.0, 332.0, 298.0, 316.0, 317.0, 296.0, 286.0, 296.0, 286.0, 288.0, 291.0, 274.0, 298.0, 290.0, 289.0, 283.0, 299.0, 291.0, 296.0, 305.0, 282.0, 311.0, 319.0, 303.0, 327.0, 312.0, 318.0, 296.0, 286.0, 291.0, 291.0, 293.0, 286.0, 214.0, 205.0, 287.0, 300.0, 323.0, 310.0, 288.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 1.117796938264296, "mean_processing_ms": 0.29164906507723115, "mean_inference_ms": 1.675395869901085}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5400000, "num_steps_sampled": 2880000, "sample_time_ms": 21210.508, "load_time_ms": 36.928, "grad_time_ms": 9651.029, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003380303969606757, "policy_loss": -0.0046376134268939495, "vf_loss": 85.82404327392578, "vf_explained_var": 0.7595102190971375, "kl": 0.0017190409125760198, "entropy": 1.1289840936660767, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2880000, "episodes_total": 7200, "training_iteration": 225, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-37-59", "timestamp": 1660253879, "time_this_iter_s": 30.507438898086548, "time_total_s": 12294.212787628174, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12294.212787628174, "timesteps_since_restore": 2880000, "iterations_since_restore": 225, "perf": {"cpu_util_percent": 29.595348837209304, "ram_util_percent": 58.604651162790695}}
+{"episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 592.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 296.345}, "custom_metrics": {"sparse_reward_mean": 205.2, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 182.29, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.96, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.88, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.49, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.08, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.1, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.44, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.36, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.1, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.1, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 575.0, 582.0, 627.0, 630.0, 627.0, 579.0, 579.0, 630.0, 630.0, 579.0, 633.0, 584.0, 567.0, 627.0, 627.0, 582.0, 627.0, 587.0, 587.0, 582.0, 576.0, 579.0, 633.0, 639.0, 582.0, 587.0, 579.0, 582.0, 533.0, 582.0, 627.0, 576.0, 630.0, 630.0, 582.0, 630.0, 582.0, 636.0, 630.0, 66.0, 573.0, 633.0, 573.0, 570.0, 579.0, 582.0, 630.0, 633.0, 582.0, 582.0, 579.0, 572.0, 579.0, 582.0, 587.0, 587.0, 630.0, 630.0, 630.0, 582.0, 582.0, 579.0, 419.0, 587.0, 633.0, 587.0, 582.0, 582.0, 582.0, 630.0, 587.0, 627.0, 579.0, 633.0, 582.0, 624.0, 582.0, 579.0, 630.0, 579.0, 636.0, 582.0, 630.0, 576.0, 621.0, 633.0, 582.0, 584.0, 636.0, 579.0, 576.0, 579.0, 636.0, 630.0, 633.0, 582.0, 570.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 309.0, 288.0, 294.0, 284.0, 291.0, 293.0, 289.0, 311.0, 316.0, 314.0, 316.0, 313.0, 314.0, 290.0, 289.0, 290.0, 289.0, 324.0, 306.0, 311.0, 319.0, 292.0, 287.0, 319.0, 314.0, 291.0, 293.0, 296.0, 271.0, 323.0, 304.0, 309.0, 318.0, 284.0, 298.0, 309.0, 318.0, 282.0, 305.0, 297.0, 290.0, 287.0, 295.0, 291.0, 285.0, 287.0, 292.0, 316.0, 317.0, 317.0, 322.0, 294.0, 288.0, 301.0, 286.0, 282.0, 297.0, 298.0, 284.0, 270.0, 263.0, 288.0, 294.0, 313.0, 314.0, 288.0, 288.0, 309.0, 321.0, 324.0, 306.0, 287.0, 295.0, 321.0, 309.0, 298.0, 284.0, 316.0, 320.0, 316.0, 314.0, 34.0, 32.0, 277.0, 296.0, 309.0, 324.0, 279.0, 294.0, 285.0, 285.0, 285.0, 294.0, 285.0, 297.0, 332.0, 298.0, 316.0, 317.0, 296.0, 286.0, 296.0, 286.0, 288.0, 291.0, 274.0, 298.0, 290.0, 289.0, 283.0, 299.0, 291.0, 296.0, 305.0, 282.0, 311.0, 319.0, 303.0, 327.0, 312.0, 318.0, 296.0, 286.0, 291.0, 291.0, 293.0, 286.0, 214.0, 205.0, 287.0, 300.0, 323.0, 310.0, 288.0, 299.0, 294.0, 288.0, 289.0, 293.0, 283.0, 299.0, 306.0, 324.0, 293.0, 294.0, 323.0, 304.0, 284.0, 295.0, 319.0, 314.0, 283.0, 299.0, 322.0, 302.0, 294.0, 288.0, 284.0, 295.0, 319.0, 311.0, 291.0, 288.0, 316.0, 320.0, 294.0, 288.0, 326.0, 304.0, 292.0, 284.0, 314.0, 307.0, 319.0, 314.0, 286.0, 296.0, 285.0, 299.0, 324.0, 312.0, 297.0, 282.0, 291.0, 285.0, 287.0, 292.0, 321.0, 315.0, 313.0, 317.0, 317.0, 316.0, 296.0, 286.0, 292.0, 278.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1148293138411964, "mean_processing_ms": 0.29105729699863353, "mean_inference_ms": 1.6724660361311725}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5424000, "num_steps_sampled": 2892800, "sample_time_ms": 21194.238, "load_time_ms": 37.016, "grad_time_ms": 9566.171, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004350067116320133, "policy_loss": -0.004312645178288221, "vf_loss": 92.26403045654297, "vf_explained_var": 0.7493538856506348, "kl": 0.0016388074727728963, "entropy": 1.1273828744888306, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2892800, "episodes_total": 7232, "training_iteration": 226, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-38-28", "timestamp": 1660253908, "time_this_iter_s": 28.989330291748047, "time_total_s": 12323.202117919922, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12323.202117919922, "timesteps_since_restore": 2892800, "iterations_since_restore": 226, "perf": {"cpu_util_percent": 31.509756097560977, "ram_util_percent": 58.60975609756099}}
+{"episode_reward_max": 639.0, "episode_reward_min": 419.0, "episode_reward_mean": 596.7, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 205.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 298.35}, "custom_metrics": {"sparse_reward_mean": 206.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 183.9, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.96, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.09, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.54, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.38, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.04, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.3, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.6, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.91, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.87, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.04, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.3, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.04, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.3, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 582.0, 587.0, 624.0, 582.0, 618.0, 582.0, 539.0, 633.0, 582.0, 582.0, 582.0, 582.0, 636.0, 633.0, 582.0, 582.0, 579.0, 630.0, 630.0, 587.0, 630.0, 582.0, 576.0, 627.0, 579.0, 573.0, 582.0, 582.0, 587.0, 636.0, 419.0, 587.0, 633.0, 587.0, 582.0, 582.0, 582.0, 630.0, 587.0, 627.0, 579.0, 633.0, 582.0, 624.0, 582.0, 579.0, 630.0, 579.0, 636.0, 582.0, 630.0, 576.0, 621.0, 633.0, 582.0, 584.0, 636.0, 579.0, 576.0, 579.0, 636.0, 630.0, 633.0, 582.0, 570.0, 630.0, 630.0, 582.0, 575.0, 582.0, 627.0, 630.0, 627.0, 579.0, 579.0, 630.0, 630.0, 579.0, 633.0, 584.0, 567.0, 627.0, 627.0, 582.0, 627.0, 587.0, 587.0, 582.0, 576.0, 579.0, 633.0, 639.0, 582.0, 587.0, 579.0, 582.0, 533.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 287.0, 292.0, 295.0, 287.0, 292.0, 295.0, 321.0, 303.0, 291.0, 291.0, 305.0, 313.0, 283.0, 299.0, 270.0, 269.0, 316.0, 317.0, 288.0, 294.0, 290.0, 292.0, 298.0, 284.0, 283.0, 299.0, 319.0, 317.0, 309.0, 324.0, 291.0, 291.0, 287.0, 295.0, 293.0, 286.0, 312.0, 318.0, 314.0, 316.0, 309.0, 278.0, 324.0, 306.0, 291.0, 291.0, 297.0, 279.0, 317.0, 310.0, 295.0, 284.0, 282.0, 291.0, 288.0, 294.0, 290.0, 292.0, 294.0, 293.0, 325.0, 311.0, 214.0, 205.0, 287.0, 300.0, 323.0, 310.0, 288.0, 299.0, 294.0, 288.0, 289.0, 293.0, 283.0, 299.0, 306.0, 324.0, 293.0, 294.0, 323.0, 304.0, 284.0, 295.0, 319.0, 314.0, 283.0, 299.0, 322.0, 302.0, 294.0, 288.0, 284.0, 295.0, 319.0, 311.0, 291.0, 288.0, 316.0, 320.0, 294.0, 288.0, 326.0, 304.0, 292.0, 284.0, 314.0, 307.0, 319.0, 314.0, 286.0, 296.0, 285.0, 299.0, 324.0, 312.0, 297.0, 282.0, 291.0, 285.0, 287.0, 292.0, 321.0, 315.0, 313.0, 317.0, 317.0, 316.0, 296.0, 286.0, 292.0, 278.0, 316.0, 314.0, 321.0, 309.0, 288.0, 294.0, 284.0, 291.0, 293.0, 289.0, 311.0, 316.0, 314.0, 316.0, 313.0, 314.0, 290.0, 289.0, 290.0, 289.0, 324.0, 306.0, 311.0, 319.0, 292.0, 287.0, 319.0, 314.0, 291.0, 293.0, 296.0, 271.0, 323.0, 304.0, 309.0, 318.0, 284.0, 298.0, 309.0, 318.0, 282.0, 305.0, 297.0, 290.0, 287.0, 295.0, 291.0, 285.0, 287.0, 292.0, 316.0, 317.0, 317.0, 322.0, 294.0, 288.0, 301.0, 286.0, 282.0, 297.0, 298.0, 284.0, 270.0, 263.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.111882925643259, "mean_processing_ms": 0.2904711783343595, "mean_inference_ms": 1.6695128259184024}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5448000, "num_steps_sampled": 2905600, "sample_time_ms": 21046.741, "load_time_ms": 36.918, "grad_time_ms": 9303.678, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001480274717323482, "policy_loss": -0.006882220506668091, "vf_loss": 89.27208709716797, "vf_explained_var": 0.7621426582336426, "kl": 0.0023567674215883017, "entropy": 1.1294348239898682, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2905600, "episodes_total": 7264, "training_iteration": 227, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-38-56", "timestamp": 1660253936, "time_this_iter_s": 28.29434609413147, "time_total_s": 12351.496464014053, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12351.496464014053, "timesteps_since_restore": 2905600, "iterations_since_restore": 227, "perf": {"cpu_util_percent": 34.097500000000004, "ram_util_percent": 58.625000000000014}}
+{"episode_reward_max": 639.0, "episode_reward_min": 533.0, "episode_reward_mean": 598.73, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 263.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 299.365}, "custom_metrics": {"sparse_reward_mean": 207.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.33, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.07, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.63, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.32, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.27, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.9, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.15, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.36, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.27, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.27, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 633.0, 582.0, 630.0, 630.0, 579.0, 630.0, 587.0, 630.0, 582.0, 627.0, 582.0, 627.0, 633.0, 541.0, 579.0, 582.0, 633.0, 630.0, 624.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 630.0, 630.0, 630.0, 633.0, 576.0, 582.0, 633.0, 582.0, 570.0, 630.0, 630.0, 582.0, 575.0, 582.0, 627.0, 630.0, 627.0, 579.0, 579.0, 630.0, 630.0, 579.0, 633.0, 584.0, 567.0, 627.0, 627.0, 582.0, 627.0, 587.0, 587.0, 582.0, 576.0, 579.0, 633.0, 639.0, 582.0, 587.0, 579.0, 582.0, 533.0, 582.0, 579.0, 579.0, 582.0, 587.0, 624.0, 582.0, 618.0, 582.0, 539.0, 633.0, 582.0, 582.0, 582.0, 582.0, 636.0, 633.0, 582.0, 582.0, 579.0, 630.0, 630.0, 587.0, 630.0, 582.0, 576.0, 627.0, 579.0, 573.0, 582.0, 582.0, 587.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [279.0, 297.0, 316.0, 317.0, 293.0, 289.0, 306.0, 324.0, 311.0, 319.0, 292.0, 287.0, 314.0, 316.0, 290.0, 297.0, 312.0, 318.0, 291.0, 291.0, 315.0, 312.0, 291.0, 291.0, 313.0, 314.0, 317.0, 316.0, 266.0, 275.0, 289.0, 290.0, 279.0, 303.0, 322.0, 311.0, 303.0, 327.0, 308.0, 316.0, 291.0, 291.0, 288.0, 294.0, 285.0, 297.0, 280.0, 302.0, 289.0, 293.0, 291.0, 288.0, 317.0, 313.0, 317.0, 313.0, 319.0, 311.0, 317.0, 316.0, 266.0, 310.0, 294.0, 288.0, 317.0, 316.0, 296.0, 286.0, 292.0, 278.0, 316.0, 314.0, 321.0, 309.0, 288.0, 294.0, 284.0, 291.0, 293.0, 289.0, 311.0, 316.0, 314.0, 316.0, 313.0, 314.0, 290.0, 289.0, 290.0, 289.0, 324.0, 306.0, 311.0, 319.0, 292.0, 287.0, 319.0, 314.0, 291.0, 293.0, 296.0, 271.0, 323.0, 304.0, 309.0, 318.0, 284.0, 298.0, 309.0, 318.0, 282.0, 305.0, 297.0, 290.0, 287.0, 295.0, 291.0, 285.0, 287.0, 292.0, 316.0, 317.0, 317.0, 322.0, 294.0, 288.0, 301.0, 286.0, 282.0, 297.0, 298.0, 284.0, 270.0, 263.0, 288.0, 294.0, 287.0, 292.0, 287.0, 292.0, 295.0, 287.0, 292.0, 295.0, 321.0, 303.0, 291.0, 291.0, 305.0, 313.0, 283.0, 299.0, 270.0, 269.0, 316.0, 317.0, 288.0, 294.0, 290.0, 292.0, 298.0, 284.0, 283.0, 299.0, 319.0, 317.0, 309.0, 324.0, 291.0, 291.0, 287.0, 295.0, 293.0, 286.0, 312.0, 318.0, 314.0, 316.0, 309.0, 278.0, 324.0, 306.0, 291.0, 291.0, 297.0, 279.0, 317.0, 310.0, 295.0, 284.0, 282.0, 291.0, 288.0, 294.0, 290.0, 292.0, 294.0, 293.0, 325.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.108955063453815, "mean_processing_ms": 0.2898914011168066, "mean_inference_ms": 1.6664528501755567}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5472000, "num_steps_sampled": 2918400, "sample_time_ms": 20842.466, "load_time_ms": 36.811, "grad_time_ms": 9088.977, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0036609917879104614, "policy_loss": -0.0044582299888134, "vf_loss": 86.8133316040039, "vf_explained_var": 0.7590463161468506, "kl": 0.0019074537558481097, "entropy": 1.124218463897705, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2918400, "episodes_total": 7296, "training_iteration": 228, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-39-25", "timestamp": 1660253965, "time_this_iter_s": 29.044671058654785, "time_total_s": 12380.541135072708, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12380.541135072708, "timesteps_since_restore": 2918400, "iterations_since_restore": 228, "perf": {"cpu_util_percent": 34.02195121951219, "ram_util_percent": 58.739024390243905}}
+{"episode_reward_max": 636.0, "episode_reward_min": 524.0, "episode_reward_mean": 595.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 297.94}, "custom_metrics": {"sparse_reward_mean": 206.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.88, "shaped_reward_min": 164, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.65, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.9, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.34, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.9, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.19, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.39, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.9, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.34, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.9, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.34, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 576.0, 627.0, 630.0, 582.0, 579.0, 582.0, 633.0, 525.0, 587.0, 573.0, 627.0, 587.0, 624.0, 524.0, 587.0, 633.0, 579.0, 536.0, 582.0, 587.0, 627.0, 579.0, 533.0, 582.0, 627.0, 587.0, 633.0, 627.0, 582.0, 573.0, 633.0, 579.0, 582.0, 533.0, 582.0, 579.0, 579.0, 582.0, 587.0, 624.0, 582.0, 618.0, 582.0, 539.0, 633.0, 582.0, 582.0, 582.0, 582.0, 636.0, 633.0, 582.0, 582.0, 579.0, 630.0, 630.0, 587.0, 630.0, 582.0, 576.0, 627.0, 579.0, 573.0, 582.0, 582.0, 587.0, 636.0, 576.0, 633.0, 582.0, 630.0, 630.0, 579.0, 630.0, 587.0, 630.0, 582.0, 627.0, 582.0, 627.0, 633.0, 541.0, 579.0, 582.0, 633.0, 630.0, 624.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 630.0, 630.0, 630.0, 633.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [320.0, 316.0, 287.0, 289.0, 312.0, 315.0, 316.0, 314.0, 296.0, 286.0, 286.0, 293.0, 285.0, 297.0, 320.0, 313.0, 259.0, 266.0, 291.0, 296.0, 292.0, 281.0, 310.0, 317.0, 297.0, 290.0, 317.0, 307.0, 253.0, 271.0, 280.0, 307.0, 319.0, 314.0, 284.0, 295.0, 267.0, 269.0, 290.0, 292.0, 298.0, 289.0, 310.0, 317.0, 288.0, 291.0, 275.0, 258.0, 294.0, 288.0, 313.0, 314.0, 283.0, 304.0, 322.0, 311.0, 316.0, 311.0, 300.0, 282.0, 278.0, 295.0, 319.0, 314.0, 282.0, 297.0, 298.0, 284.0, 270.0, 263.0, 288.0, 294.0, 287.0, 292.0, 287.0, 292.0, 295.0, 287.0, 292.0, 295.0, 321.0, 303.0, 291.0, 291.0, 305.0, 313.0, 283.0, 299.0, 270.0, 269.0, 316.0, 317.0, 288.0, 294.0, 290.0, 292.0, 298.0, 284.0, 283.0, 299.0, 319.0, 317.0, 309.0, 324.0, 291.0, 291.0, 287.0, 295.0, 293.0, 286.0, 312.0, 318.0, 314.0, 316.0, 309.0, 278.0, 324.0, 306.0, 291.0, 291.0, 297.0, 279.0, 317.0, 310.0, 295.0, 284.0, 282.0, 291.0, 288.0, 294.0, 290.0, 292.0, 294.0, 293.0, 325.0, 311.0, 279.0, 297.0, 316.0, 317.0, 293.0, 289.0, 306.0, 324.0, 311.0, 319.0, 292.0, 287.0, 314.0, 316.0, 290.0, 297.0, 312.0, 318.0, 291.0, 291.0, 315.0, 312.0, 291.0, 291.0, 313.0, 314.0, 317.0, 316.0, 266.0, 275.0, 289.0, 290.0, 279.0, 303.0, 322.0, 311.0, 303.0, 327.0, 308.0, 316.0, 291.0, 291.0, 288.0, 294.0, 285.0, 297.0, 280.0, 302.0, 289.0, 293.0, 291.0, 288.0, 317.0, 313.0, 317.0, 313.0, 319.0, 311.0, 317.0, 316.0, 266.0, 310.0, 294.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1060616527900746, "mean_processing_ms": 0.28931749831274756, "mean_inference_ms": 1.663577876904456}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5496000, "num_steps_sampled": 2931200, "sample_time_ms": 20902.121, "load_time_ms": 36.225, "grad_time_ms": 9147.239, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004833377432078123, "policy_loss": -0.003439890220761299, "vf_loss": 88.37776947021484, "vf_explained_var": 0.7585814595222473, "kl": 0.0015477427514269948, "entropy": 1.1290167570114136, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2931200, "episodes_total": 7328, "training_iteration": 229, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-39-56", "timestamp": 1660253996, "time_this_iter_s": 31.170966863632202, "time_total_s": 12411.71210193634, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12411.71210193634, "timesteps_since_restore": 2931200, "iterations_since_restore": 229, "perf": {"cpu_util_percent": 34.31136363636364, "ram_util_percent": 58.67272727272726}}
+{"episode_reward_max": 636.0, "episode_reward_min": 524.0, "episode_reward_mean": 598.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 299.285}, "custom_metrics": {"sparse_reward_mean": 207.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.57, "shaped_reward_min": 164, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.94, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.43, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.56, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.03, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.33, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.95, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.2, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.39, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.03, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.33, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.03, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.33, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 633.0, 582.0, 587.0, 630.0, 630.0, 627.0, 582.0, 630.0, 579.0, 627.0, 579.0, 579.0, 587.0, 576.0, 579.0, 576.0, 587.0, 582.0, 582.0, 576.0, 582.0, 539.0, 633.0, 587.0, 579.0, 630.0, 633.0, 633.0, 633.0, 636.0, 579.0, 582.0, 582.0, 587.0, 636.0, 576.0, 633.0, 582.0, 630.0, 630.0, 579.0, 630.0, 587.0, 630.0, 582.0, 627.0, 582.0, 627.0, 633.0, 541.0, 579.0, 582.0, 633.0, 630.0, 624.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 630.0, 630.0, 630.0, 633.0, 576.0, 582.0, 636.0, 576.0, 627.0, 630.0, 582.0, 579.0, 582.0, 633.0, 525.0, 587.0, 573.0, 627.0, 587.0, 624.0, 524.0, 587.0, 633.0, 579.0, 536.0, 582.0, 587.0, 627.0, 579.0, 533.0, 582.0, 627.0, 587.0, 633.0, 627.0, 582.0, 573.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 311.0, 316.0, 317.0, 296.0, 286.0, 303.0, 284.0, 316.0, 314.0, 313.0, 317.0, 317.0, 310.0, 293.0, 289.0, 314.0, 316.0, 285.0, 294.0, 316.0, 311.0, 286.0, 293.0, 283.0, 296.0, 289.0, 298.0, 285.0, 291.0, 287.0, 292.0, 297.0, 279.0, 294.0, 293.0, 288.0, 294.0, 284.0, 298.0, 287.0, 289.0, 290.0, 292.0, 273.0, 266.0, 310.0, 323.0, 295.0, 292.0, 294.0, 285.0, 321.0, 309.0, 321.0, 312.0, 313.0, 320.0, 331.0, 302.0, 321.0, 315.0, 296.0, 283.0, 288.0, 294.0, 290.0, 292.0, 294.0, 293.0, 325.0, 311.0, 279.0, 297.0, 316.0, 317.0, 293.0, 289.0, 306.0, 324.0, 311.0, 319.0, 292.0, 287.0, 314.0, 316.0, 290.0, 297.0, 312.0, 318.0, 291.0, 291.0, 315.0, 312.0, 291.0, 291.0, 313.0, 314.0, 317.0, 316.0, 266.0, 275.0, 289.0, 290.0, 279.0, 303.0, 322.0, 311.0, 303.0, 327.0, 308.0, 316.0, 291.0, 291.0, 288.0, 294.0, 285.0, 297.0, 280.0, 302.0, 289.0, 293.0, 291.0, 288.0, 317.0, 313.0, 317.0, 313.0, 319.0, 311.0, 317.0, 316.0, 266.0, 310.0, 294.0, 288.0, 320.0, 316.0, 287.0, 289.0, 312.0, 315.0, 316.0, 314.0, 296.0, 286.0, 286.0, 293.0, 285.0, 297.0, 320.0, 313.0, 259.0, 266.0, 291.0, 296.0, 292.0, 281.0, 310.0, 317.0, 297.0, 290.0, 317.0, 307.0, 253.0, 271.0, 280.0, 307.0, 319.0, 314.0, 284.0, 295.0, 267.0, 269.0, 290.0, 292.0, 298.0, 289.0, 310.0, 317.0, 288.0, 291.0, 275.0, 258.0, 294.0, 288.0, 313.0, 314.0, 283.0, 304.0, 322.0, 311.0, 316.0, 311.0, 300.0, 282.0, 278.0, 295.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1032059640461922, "mean_processing_ms": 0.28875135486760906, "mean_inference_ms": 1.6608895336787544}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5520000, "num_steps_sampled": 2944000, "sample_time_ms": 20966.765, "load_time_ms": 36.396, "grad_time_ms": 9172.251, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005542902275919914, "policy_loss": -0.0032051329035311937, "vf_loss": 93.1218490600586, "vf_explained_var": 0.7535824775695801, "kl": 0.0018033984815701842, "entropy": 1.1283119916915894, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2944000, "episodes_total": 7360, "training_iteration": 230, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-40-28", "timestamp": 1660254028, "time_this_iter_s": 31.772056102752686, "time_total_s": 12443.484158039093, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12443.484158039093, "timesteps_since_restore": 2944000, "iterations_since_restore": 230, "perf": {"cpu_util_percent": 32.88666666666666, "ram_util_percent": 58.577777777777776}}
+{"episode_reward_max": 636.0, "episode_reward_min": 444.0, "episode_reward_mean": 595.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 215.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 297.695}, "custom_metrics": {"sparse_reward_mean": 205.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 183.79, "shaped_reward_min": 124, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.74, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.2, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.96, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.34, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.02, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.96, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.34, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.96, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.34, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [527.0, 582.0, 576.0, 587.0, 582.0, 633.0, 444.0, 630.0, 590.0, 633.0, 522.0, 636.0, 579.0, 587.0, 633.0, 627.0, 587.0, 630.0, 636.0, 587.0, 582.0, 582.0, 582.0, 627.0, 636.0, 582.0, 627.0, 582.0, 579.0, 579.0, 633.0, 536.0, 630.0, 633.0, 576.0, 582.0, 636.0, 576.0, 627.0, 630.0, 582.0, 579.0, 582.0, 633.0, 525.0, 587.0, 573.0, 627.0, 587.0, 624.0, 524.0, 587.0, 633.0, 579.0, 536.0, 582.0, 587.0, 627.0, 579.0, 533.0, 582.0, 627.0, 587.0, 633.0, 627.0, 582.0, 573.0, 633.0, 630.0, 633.0, 582.0, 587.0, 630.0, 630.0, 627.0, 582.0, 630.0, 579.0, 627.0, 579.0, 579.0, 587.0, 576.0, 579.0, 576.0, 587.0, 582.0, 582.0, 576.0, 582.0, 539.0, 633.0, 587.0, 579.0, 630.0, 633.0, 633.0, 633.0, 636.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 265.0, 287.0, 295.0, 277.0, 299.0, 285.0, 302.0, 293.0, 289.0, 313.0, 320.0, 229.0, 215.0, 321.0, 309.0, 292.0, 298.0, 311.0, 322.0, 259.0, 263.0, 309.0, 327.0, 288.0, 291.0, 292.0, 295.0, 315.0, 318.0, 321.0, 306.0, 295.0, 292.0, 318.0, 312.0, 313.0, 323.0, 293.0, 294.0, 294.0, 288.0, 285.0, 297.0, 291.0, 291.0, 322.0, 305.0, 329.0, 307.0, 291.0, 291.0, 313.0, 314.0, 281.0, 301.0, 296.0, 283.0, 291.0, 288.0, 316.0, 317.0, 273.0, 263.0, 319.0, 311.0, 317.0, 316.0, 266.0, 310.0, 294.0, 288.0, 320.0, 316.0, 287.0, 289.0, 312.0, 315.0, 316.0, 314.0, 296.0, 286.0, 286.0, 293.0, 285.0, 297.0, 320.0, 313.0, 259.0, 266.0, 291.0, 296.0, 292.0, 281.0, 310.0, 317.0, 297.0, 290.0, 317.0, 307.0, 253.0, 271.0, 280.0, 307.0, 319.0, 314.0, 284.0, 295.0, 267.0, 269.0, 290.0, 292.0, 298.0, 289.0, 310.0, 317.0, 288.0, 291.0, 275.0, 258.0, 294.0, 288.0, 313.0, 314.0, 283.0, 304.0, 322.0, 311.0, 316.0, 311.0, 300.0, 282.0, 278.0, 295.0, 319.0, 314.0, 319.0, 311.0, 316.0, 317.0, 296.0, 286.0, 303.0, 284.0, 316.0, 314.0, 313.0, 317.0, 317.0, 310.0, 293.0, 289.0, 314.0, 316.0, 285.0, 294.0, 316.0, 311.0, 286.0, 293.0, 283.0, 296.0, 289.0, 298.0, 285.0, 291.0, 287.0, 292.0, 297.0, 279.0, 294.0, 293.0, 288.0, 294.0, 284.0, 298.0, 287.0, 289.0, 290.0, 292.0, 273.0, 266.0, 310.0, 323.0, 295.0, 292.0, 294.0, 285.0, 321.0, 309.0, 321.0, 312.0, 313.0, 320.0, 331.0, 302.0, 321.0, 315.0, 296.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 1.100385875299439, "mean_processing_ms": 0.2881910582639845, "mean_inference_ms": 1.6584370926312206}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5544000, "num_steps_sampled": 2956800, "sample_time_ms": 21086.396, "load_time_ms": 36.806, "grad_time_ms": 9224.029, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0032783111091703176, "policy_loss": -0.005397517699748278, "vf_loss": 92.44506072998047, "vf_explained_var": 0.7564309239387512, "kl": 0.001717855571769178, "entropy": 1.137366771697998, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2956800, "episodes_total": 7392, "training_iteration": 231, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-41-01", "timestamp": 1660254061, "time_this_iter_s": 32.62551975250244, "time_total_s": 12476.109677791595, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12476.109677791595, "timesteps_since_restore": 2956800, "iterations_since_restore": 231, "perf": {"cpu_util_percent": 34.25652173913044, "ram_util_percent": 58.589130434782625}}
+{"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 597.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 215.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 298.635}, "custom_metrics": {"sparse_reward_mean": 206.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.07, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.1, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.42, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.44, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.96, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.42, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.24, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.96, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.42, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.96, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.42, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 630.0, 579.0, 639.0, 582.0, 633.0, 582.0, 587.0, 630.0, 573.0, 627.0, 579.0, 576.0, 582.0, 630.0, 587.0, 590.0, 582.0, 630.0, 627.0, 582.0, 582.0, 582.0, 573.0, 576.0, 570.0, 582.0, 582.0, 579.0, 633.0, 627.0, 633.0, 627.0, 582.0, 573.0, 633.0, 630.0, 633.0, 582.0, 587.0, 630.0, 630.0, 627.0, 582.0, 630.0, 579.0, 627.0, 579.0, 579.0, 587.0, 576.0, 579.0, 576.0, 587.0, 582.0, 582.0, 576.0, 582.0, 539.0, 633.0, 587.0, 579.0, 630.0, 633.0, 633.0, 633.0, 636.0, 579.0, 527.0, 582.0, 576.0, 587.0, 582.0, 633.0, 444.0, 630.0, 590.0, 633.0, 522.0, 636.0, 579.0, 587.0, 633.0, 627.0, 587.0, 630.0, 636.0, 587.0, 582.0, 582.0, 582.0, 627.0, 636.0, 582.0, 627.0, 582.0, 579.0, 579.0, 633.0, 536.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 306.0, 316.0, 314.0, 277.0, 302.0, 318.0, 321.0, 286.0, 296.0, 303.0, 330.0, 286.0, 296.0, 288.0, 299.0, 324.0, 306.0, 274.0, 299.0, 311.0, 316.0, 291.0, 288.0, 292.0, 284.0, 294.0, 288.0, 318.0, 312.0, 300.0, 287.0, 296.0, 294.0, 290.0, 292.0, 304.0, 326.0, 315.0, 312.0, 294.0, 288.0, 299.0, 283.0, 291.0, 291.0, 282.0, 291.0, 290.0, 286.0, 282.0, 288.0, 294.0, 288.0, 293.0, 289.0, 297.0, 282.0, 314.0, 319.0, 308.0, 319.0, 313.0, 320.0, 316.0, 311.0, 300.0, 282.0, 278.0, 295.0, 319.0, 314.0, 319.0, 311.0, 316.0, 317.0, 296.0, 286.0, 303.0, 284.0, 316.0, 314.0, 313.0, 317.0, 317.0, 310.0, 293.0, 289.0, 314.0, 316.0, 285.0, 294.0, 316.0, 311.0, 286.0, 293.0, 283.0, 296.0, 289.0, 298.0, 285.0, 291.0, 287.0, 292.0, 297.0, 279.0, 294.0, 293.0, 288.0, 294.0, 284.0, 298.0, 287.0, 289.0, 290.0, 292.0, 273.0, 266.0, 310.0, 323.0, 295.0, 292.0, 294.0, 285.0, 321.0, 309.0, 321.0, 312.0, 313.0, 320.0, 331.0, 302.0, 321.0, 315.0, 296.0, 283.0, 262.0, 265.0, 287.0, 295.0, 277.0, 299.0, 285.0, 302.0, 293.0, 289.0, 313.0, 320.0, 229.0, 215.0, 321.0, 309.0, 292.0, 298.0, 311.0, 322.0, 259.0, 263.0, 309.0, 327.0, 288.0, 291.0, 292.0, 295.0, 315.0, 318.0, 321.0, 306.0, 295.0, 292.0, 318.0, 312.0, 313.0, 323.0, 293.0, 294.0, 294.0, 288.0, 285.0, 297.0, 291.0, 291.0, 322.0, 305.0, 329.0, 307.0, 291.0, 291.0, 313.0, 314.0, 281.0, 301.0, 296.0, 283.0, 291.0, 288.0, 316.0, 317.0, 273.0, 263.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0975911945131436, "mean_processing_ms": 0.28763548597333904, "mean_inference_ms": 1.656000718644699}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5568000, "num_steps_sampled": 2969600, "sample_time_ms": 21056.358, "load_time_ms": 36.577, "grad_time_ms": 9266.344, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002132798545062542, "policy_loss": -0.0064245969988405704, "vf_loss": 91.22052001953125, "vf_explained_var": 0.7570000290870667, "kl": 0.002030483214184642, "entropy": 1.129306674003601, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2969600, "episodes_total": 7424, "training_iteration": 232, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-41-33", "timestamp": 1660254093, "time_this_iter_s": 32.109358072280884, "time_total_s": 12508.219035863876, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12508.219035863876, "timesteps_since_restore": 2969600, "iterations_since_restore": 232, "perf": {"cpu_util_percent": 33.97777777777779, "ram_util_percent": 58.6088888888889}}
+{"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 596.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 215.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 298.275}, "custom_metrics": {"sparse_reward_mean": 206.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 183.75, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.95, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.95, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.46, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.26, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.11, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.17, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.11, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.17, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.11, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.17, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 576.0, 579.0, 573.0, 627.0, 579.0, 582.0, 630.0, 530.0, 630.0, 636.0, 627.0, 570.0, 582.0, 627.0, 582.0, 584.0, 582.0, 587.0, 587.0, 582.0, 576.0, 636.0, 579.0, 582.0, 579.0, 584.0, 630.0, 627.0, 630.0, 630.0, 633.0, 633.0, 636.0, 579.0, 527.0, 582.0, 576.0, 587.0, 582.0, 633.0, 444.0, 630.0, 590.0, 633.0, 522.0, 636.0, 579.0, 587.0, 633.0, 627.0, 587.0, 630.0, 636.0, 587.0, 582.0, 582.0, 582.0, 627.0, 636.0, 582.0, 627.0, 582.0, 579.0, 579.0, 633.0, 536.0, 627.0, 630.0, 579.0, 639.0, 582.0, 633.0, 582.0, 587.0, 630.0, 573.0, 627.0, 579.0, 576.0, 582.0, 630.0, 587.0, 590.0, 582.0, 630.0, 627.0, 582.0, 582.0, 582.0, 573.0, 576.0, 570.0, 582.0, 582.0, 579.0, 633.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 284.0, 296.0, 286.0, 288.0, 288.0, 286.0, 293.0, 278.0, 295.0, 318.0, 309.0, 285.0, 294.0, 295.0, 287.0, 308.0, 322.0, 268.0, 262.0, 314.0, 316.0, 314.0, 322.0, 305.0, 322.0, 287.0, 283.0, 296.0, 286.0, 322.0, 305.0, 286.0, 296.0, 295.0, 289.0, 298.0, 284.0, 301.0, 286.0, 284.0, 303.0, 288.0, 294.0, 300.0, 276.0, 332.0, 304.0, 282.0, 297.0, 304.0, 278.0, 277.0, 302.0, 301.0, 283.0, 316.0, 314.0, 310.0, 317.0, 324.0, 306.0, 321.0, 309.0, 313.0, 320.0, 331.0, 302.0, 321.0, 315.0, 296.0, 283.0, 262.0, 265.0, 287.0, 295.0, 277.0, 299.0, 285.0, 302.0, 293.0, 289.0, 313.0, 320.0, 229.0, 215.0, 321.0, 309.0, 292.0, 298.0, 311.0, 322.0, 259.0, 263.0, 309.0, 327.0, 288.0, 291.0, 292.0, 295.0, 315.0, 318.0, 321.0, 306.0, 295.0, 292.0, 318.0, 312.0, 313.0, 323.0, 293.0, 294.0, 294.0, 288.0, 285.0, 297.0, 291.0, 291.0, 322.0, 305.0, 329.0, 307.0, 291.0, 291.0, 313.0, 314.0, 281.0, 301.0, 296.0, 283.0, 291.0, 288.0, 316.0, 317.0, 273.0, 263.0, 321.0, 306.0, 316.0, 314.0, 277.0, 302.0, 318.0, 321.0, 286.0, 296.0, 303.0, 330.0, 286.0, 296.0, 288.0, 299.0, 324.0, 306.0, 274.0, 299.0, 311.0, 316.0, 291.0, 288.0, 292.0, 284.0, 294.0, 288.0, 318.0, 312.0, 300.0, 287.0, 296.0, 294.0, 290.0, 292.0, 304.0, 326.0, 315.0, 312.0, 294.0, 288.0, 299.0, 283.0, 291.0, 291.0, 282.0, 291.0, 290.0, 286.0, 282.0, 288.0, 294.0, 288.0, 293.0, 289.0, 297.0, 282.0, 314.0, 319.0, 308.0, 319.0, 313.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0948199723537093, "mean_processing_ms": 0.2870853418047666, "mean_inference_ms": 1.653643049405544}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5592000, "num_steps_sampled": 2982400, "sample_time_ms": 21374.201, "load_time_ms": 36.582, "grad_time_ms": 9304.523, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033536478877067566, "policy_loss": -0.004937517922371626, "vf_loss": 88.6025161743164, "vf_explained_var": 0.7515634894371033, "kl": 0.0023627106565982103, "entropy": 1.138161540031433, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2982400, "episodes_total": 7456, "training_iteration": 233, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-42-06", "timestamp": 1660254126, "time_this_iter_s": 33.16590905189514, "time_total_s": 12541.384944915771, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12541.384944915771, "timesteps_since_restore": 2982400, "iterations_since_restore": 233, "perf": {"cpu_util_percent": 33.06170212765958, "ram_util_percent": 58.5808510638298}}
+{"episode_reward_max": 639.0, "episode_reward_min": 530.0, "episode_reward_mean": 598.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 262.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 299.29}, "custom_metrics": {"sparse_reward_mean": 207.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.18, "shaped_reward_min": 170, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.81, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.49, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.91, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.39, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.36, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.51, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.75, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.91, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.39, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.91, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.39, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 582.0, 630.0, 582.0, 627.0, 576.0, 576.0, 636.0, 630.0, 579.0, 630.0, 582.0, 582.0, 587.0, 636.0, 579.0, 581.0, 587.0, 582.0, 630.0, 639.0, 627.0, 639.0, 630.0, 630.0, 627.0, 570.0, 587.0, 579.0, 582.0, 576.0, 579.0, 579.0, 633.0, 536.0, 627.0, 630.0, 579.0, 639.0, 582.0, 633.0, 582.0, 587.0, 630.0, 573.0, 627.0, 579.0, 576.0, 582.0, 630.0, 587.0, 590.0, 582.0, 630.0, 627.0, 582.0, 582.0, 582.0, 573.0, 576.0, 570.0, 582.0, 582.0, 579.0, 633.0, 627.0, 633.0, 579.0, 582.0, 576.0, 579.0, 573.0, 627.0, 579.0, 582.0, 630.0, 530.0, 630.0, 636.0, 627.0, 570.0, 582.0, 627.0, 582.0, 584.0, 582.0, 587.0, 587.0, 582.0, 576.0, 636.0, 579.0, 582.0, 579.0, 584.0, 630.0, 627.0, 630.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [328.0, 302.0, 284.0, 298.0, 283.0, 299.0, 308.0, 322.0, 292.0, 290.0, 300.0, 327.0, 291.0, 285.0, 295.0, 281.0, 326.0, 310.0, 313.0, 317.0, 302.0, 277.0, 318.0, 312.0, 293.0, 289.0, 298.0, 284.0, 296.0, 291.0, 332.0, 304.0, 293.0, 286.0, 285.0, 296.0, 303.0, 284.0, 299.0, 283.0, 324.0, 306.0, 327.0, 312.0, 316.0, 311.0, 319.0, 320.0, 317.0, 313.0, 319.0, 311.0, 309.0, 318.0, 293.0, 277.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 290.0, 286.0, 296.0, 283.0, 291.0, 288.0, 316.0, 317.0, 273.0, 263.0, 321.0, 306.0, 316.0, 314.0, 277.0, 302.0, 318.0, 321.0, 286.0, 296.0, 303.0, 330.0, 286.0, 296.0, 288.0, 299.0, 324.0, 306.0, 274.0, 299.0, 311.0, 316.0, 291.0, 288.0, 292.0, 284.0, 294.0, 288.0, 318.0, 312.0, 300.0, 287.0, 296.0, 294.0, 290.0, 292.0, 304.0, 326.0, 315.0, 312.0, 294.0, 288.0, 299.0, 283.0, 291.0, 291.0, 282.0, 291.0, 290.0, 286.0, 282.0, 288.0, 294.0, 288.0, 293.0, 289.0, 297.0, 282.0, 314.0, 319.0, 308.0, 319.0, 313.0, 320.0, 295.0, 284.0, 296.0, 286.0, 288.0, 288.0, 286.0, 293.0, 278.0, 295.0, 318.0, 309.0, 285.0, 294.0, 295.0, 287.0, 308.0, 322.0, 268.0, 262.0, 314.0, 316.0, 314.0, 322.0, 305.0, 322.0, 287.0, 283.0, 296.0, 286.0, 322.0, 305.0, 286.0, 296.0, 295.0, 289.0, 298.0, 284.0, 301.0, 286.0, 284.0, 303.0, 288.0, 294.0, 300.0, 276.0, 332.0, 304.0, 282.0, 297.0, 304.0, 278.0, 277.0, 302.0, 301.0, 283.0, 316.0, 314.0, 310.0, 317.0, 324.0, 306.0, 321.0, 309.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0920719973611583, "mean_processing_ms": 0.2865396383505603, "mean_inference_ms": 1.6512949554166665}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5616000, "num_steps_sampled": 2995200, "sample_time_ms": 21497.979, "load_time_ms": 36.457, "grad_time_ms": 9378.106, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0021230385173112154, "policy_loss": -0.0060439333319664, "vf_loss": 87.32781982421875, "vf_explained_var": 0.7546737194061279, "kl": 0.0017831752775236964, "entropy": 1.1316334009170532, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2995200, "episodes_total": 7488, "training_iteration": 234, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-42-38", "timestamp": 1660254158, "time_this_iter_s": 31.695109128952026, "time_total_s": 12573.080054044724, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12573.080054044724, "timesteps_since_restore": 2995200, "iterations_since_restore": 234, "perf": {"cpu_util_percent": 33.72888888888888, "ram_util_percent": 58.67111111111112}}
+{"episode_reward_max": 639.0, "episode_reward_min": 530.0, "episode_reward_mean": 599.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 261.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 299.585}, "custom_metrics": {"sparse_reward_mean": 207.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.37, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.78, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.66, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.93, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.37, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.37, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.93, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.37, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.93, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.37, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 582.0, 627.0, 630.0, 582.0, 582.0, 579.0, 582.0, 582.0, 590.0, 627.0, 579.0, 627.0, 576.0, 636.0, 579.0, 579.0, 582.0, 627.0, 561.0, 630.0, 587.0, 630.0, 582.0, 579.0, 639.0, 630.0, 579.0, 636.0, 584.0, 541.0, 579.0, 633.0, 627.0, 633.0, 579.0, 582.0, 576.0, 579.0, 573.0, 627.0, 579.0, 582.0, 630.0, 530.0, 630.0, 636.0, 627.0, 570.0, 582.0, 627.0, 582.0, 584.0, 582.0, 587.0, 587.0, 582.0, 576.0, 636.0, 579.0, 582.0, 579.0, 584.0, 630.0, 627.0, 630.0, 630.0, 630.0, 582.0, 582.0, 630.0, 582.0, 627.0, 576.0, 576.0, 636.0, 630.0, 579.0, 630.0, 582.0, 582.0, 587.0, 636.0, 579.0, 581.0, 587.0, 582.0, 630.0, 639.0, 627.0, 639.0, 630.0, 630.0, 627.0, 570.0, 587.0, 579.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 298.0, 295.0, 284.0, 285.0, 297.0, 305.0, 322.0, 323.0, 307.0, 283.0, 299.0, 294.0, 288.0, 287.0, 292.0, 305.0, 277.0, 293.0, 289.0, 299.0, 291.0, 308.0, 319.0, 301.0, 278.0, 322.0, 305.0, 283.0, 293.0, 319.0, 317.0, 288.0, 291.0, 294.0, 285.0, 286.0, 296.0, 306.0, 321.0, 291.0, 270.0, 306.0, 324.0, 286.0, 301.0, 303.0, 327.0, 291.0, 291.0, 290.0, 289.0, 312.0, 327.0, 321.0, 309.0, 283.0, 296.0, 319.0, 317.0, 283.0, 301.0, 280.0, 261.0, 297.0, 282.0, 314.0, 319.0, 308.0, 319.0, 313.0, 320.0, 295.0, 284.0, 296.0, 286.0, 288.0, 288.0, 286.0, 293.0, 278.0, 295.0, 318.0, 309.0, 285.0, 294.0, 295.0, 287.0, 308.0, 322.0, 268.0, 262.0, 314.0, 316.0, 314.0, 322.0, 305.0, 322.0, 287.0, 283.0, 296.0, 286.0, 322.0, 305.0, 286.0, 296.0, 295.0, 289.0, 298.0, 284.0, 301.0, 286.0, 284.0, 303.0, 288.0, 294.0, 300.0, 276.0, 332.0, 304.0, 282.0, 297.0, 304.0, 278.0, 277.0, 302.0, 301.0, 283.0, 316.0, 314.0, 310.0, 317.0, 324.0, 306.0, 321.0, 309.0, 328.0, 302.0, 284.0, 298.0, 283.0, 299.0, 308.0, 322.0, 292.0, 290.0, 300.0, 327.0, 291.0, 285.0, 295.0, 281.0, 326.0, 310.0, 313.0, 317.0, 302.0, 277.0, 318.0, 312.0, 293.0, 289.0, 298.0, 284.0, 296.0, 291.0, 332.0, 304.0, 293.0, 286.0, 285.0, 296.0, 303.0, 284.0, 299.0, 283.0, 324.0, 306.0, 327.0, 312.0, 316.0, 311.0, 319.0, 320.0, 317.0, 313.0, 319.0, 311.0, 309.0, 318.0, 293.0, 277.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 290.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0893523875509898, "mean_processing_ms": 0.2860034188911333, "mean_inference_ms": 1.6490498343131736}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5640000, "num_steps_sampled": 3008000, "sample_time_ms": 21585.027, "load_time_ms": 36.709, "grad_time_ms": 9663.091, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033673776779323816, "policy_loss": -0.0045895627699792385, "vf_loss": 85.23816680908203, "vf_explained_var": 0.7584102749824524, "kl": 0.0018025357276201248, "entropy": 1.1337394714355469, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3008000, "episodes_total": 7520, "training_iteration": 235, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-43-12", "timestamp": 1660254192, "time_this_iter_s": 34.23338508605957, "time_total_s": 12607.313439130783, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12607.313439130783, "timesteps_since_restore": 3008000, "iterations_since_restore": 235, "perf": {"cpu_util_percent": 33.239583333333336, "ram_util_percent": 58.65}}
+{"episode_reward_max": 639.0, "episode_reward_min": 541.0, "episode_reward_mean": 603.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 261.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 301.58}, "custom_metrics": {"sparse_reward_mean": 208.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 185.56, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.48, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.55, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.97, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.73, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.76, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.49, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.39, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.86, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.76, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.82, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.71, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.76, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.76, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 582.0, 627.0, 584.0, 633.0, 579.0, 630.0, 582.0, 627.0, 582.0, 584.0, 587.0, 630.0, 582.0, 627.0, 630.0, 627.0, 587.0, 582.0, 639.0, 582.0, 582.0, 633.0, 582.0, 587.0, 627.0, 633.0, 582.0, 639.0, 579.0, 630.0, 630.0, 627.0, 630.0, 630.0, 630.0, 582.0, 582.0, 630.0, 582.0, 627.0, 576.0, 576.0, 636.0, 630.0, 579.0, 630.0, 582.0, 582.0, 587.0, 636.0, 579.0, 581.0, 587.0, 582.0, 630.0, 639.0, 627.0, 639.0, 630.0, 630.0, 627.0, 570.0, 587.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 627.0, 630.0, 582.0, 582.0, 579.0, 582.0, 582.0, 590.0, 627.0, 579.0, 627.0, 576.0, 636.0, 579.0, 579.0, 582.0, 627.0, 561.0, 630.0, 587.0, 630.0, 582.0, 579.0, 639.0, 630.0, 579.0, 636.0, 584.0, 541.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 312.0, 324.0, 306.0, 285.0, 297.0, 316.0, 311.0, 283.0, 301.0, 311.0, 322.0, 288.0, 291.0, 321.0, 309.0, 293.0, 289.0, 315.0, 312.0, 301.0, 281.0, 292.0, 292.0, 295.0, 292.0, 316.0, 314.0, 291.0, 291.0, 309.0, 318.0, 321.0, 309.0, 313.0, 314.0, 301.0, 286.0, 291.0, 291.0, 322.0, 317.0, 294.0, 288.0, 290.0, 292.0, 311.0, 322.0, 288.0, 294.0, 288.0, 299.0, 316.0, 311.0, 316.0, 317.0, 281.0, 301.0, 324.0, 315.0, 294.0, 285.0, 317.0, 313.0, 316.0, 314.0, 310.0, 317.0, 324.0, 306.0, 321.0, 309.0, 328.0, 302.0, 284.0, 298.0, 283.0, 299.0, 308.0, 322.0, 292.0, 290.0, 300.0, 327.0, 291.0, 285.0, 295.0, 281.0, 326.0, 310.0, 313.0, 317.0, 302.0, 277.0, 318.0, 312.0, 293.0, 289.0, 298.0, 284.0, 296.0, 291.0, 332.0, 304.0, 293.0, 286.0, 285.0, 296.0, 303.0, 284.0, 299.0, 283.0, 324.0, 306.0, 327.0, 312.0, 316.0, 311.0, 319.0, 320.0, 317.0, 313.0, 319.0, 311.0, 309.0, 318.0, 293.0, 277.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 290.0, 286.0, 284.0, 298.0, 295.0, 284.0, 285.0, 297.0, 305.0, 322.0, 323.0, 307.0, 283.0, 299.0, 294.0, 288.0, 287.0, 292.0, 305.0, 277.0, 293.0, 289.0, 299.0, 291.0, 308.0, 319.0, 301.0, 278.0, 322.0, 305.0, 283.0, 293.0, 319.0, 317.0, 288.0, 291.0, 294.0, 285.0, 286.0, 296.0, 306.0, 321.0, 291.0, 270.0, 306.0, 324.0, 286.0, 301.0, 303.0, 327.0, 291.0, 291.0, 290.0, 289.0, 312.0, 327.0, 321.0, 309.0, 283.0, 296.0, 319.0, 317.0, 283.0, 301.0, 280.0, 261.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0866480049346467, "mean_processing_ms": 0.2854697984995614, "mean_inference_ms": 1.6467374423655963}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5664000, "num_steps_sampled": 3020800, "sample_time_ms": 21716.507, "load_time_ms": 36.498, "grad_time_ms": 9682.814, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004189230967313051, "policy_loss": -0.003748750314116478, "vf_loss": 85.03255462646484, "vf_explained_var": 0.76678067445755, "kl": 0.001733882469125092, "entropy": 1.130557656288147, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3020800, "episodes_total": 7552, "training_iteration": 236, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-43-43", "timestamp": 1660254223, "time_this_iter_s": 30.502008199691772, "time_total_s": 12637.815447330475, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12637.815447330475, "timesteps_since_restore": 3020800, "iterations_since_restore": 236, "perf": {"cpu_util_percent": 34.461363636363636, "ram_util_percent": 59.190909090909095}}
+{"episode_reward_max": 639.0, "episode_reward_min": 515.0, "episode_reward_mean": 596.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 298.165}, "custom_metrics": {"sparse_reward_mean": 206.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.53, "shaped_reward_min": 155, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.6, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.98, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.74, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.73, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.76, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.51, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.41, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.86, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.76, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.51, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.76, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.51, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 539.0, 630.0, 587.0, 630.0, 579.0, 579.0, 582.0, 627.0, 521.0, 636.0, 582.0, 582.0, 587.0, 576.0, 627.0, 527.0, 627.0, 567.0, 576.0, 570.0, 627.0, 538.0, 630.0, 636.0, 561.0, 515.0, 582.0, 630.0, 633.0, 558.0, 587.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 627.0, 630.0, 582.0, 582.0, 579.0, 582.0, 582.0, 590.0, 627.0, 579.0, 627.0, 576.0, 636.0, 579.0, 579.0, 582.0, 627.0, 561.0, 630.0, 587.0, 630.0, 582.0, 579.0, 639.0, 630.0, 579.0, 636.0, 584.0, 541.0, 633.0, 630.0, 582.0, 627.0, 584.0, 633.0, 579.0, 630.0, 582.0, 627.0, 582.0, 584.0, 587.0, 630.0, 582.0, 627.0, 630.0, 627.0, 587.0, 582.0, 639.0, 582.0, 582.0, 633.0, 582.0, 587.0, 627.0, 633.0, 582.0, 639.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 295.0, 297.0, 282.0, 265.0, 274.0, 316.0, 314.0, 294.0, 293.0, 319.0, 311.0, 293.0, 286.0, 293.0, 286.0, 294.0, 288.0, 310.0, 317.0, 260.0, 261.0, 324.0, 312.0, 303.0, 279.0, 293.0, 289.0, 290.0, 297.0, 282.0, 294.0, 318.0, 309.0, 268.0, 259.0, 311.0, 316.0, 289.0, 278.0, 282.0, 294.0, 276.0, 294.0, 306.0, 321.0, 273.0, 265.0, 306.0, 324.0, 317.0, 319.0, 293.0, 268.0, 258.0, 257.0, 291.0, 291.0, 316.0, 314.0, 308.0, 325.0, 264.0, 294.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 290.0, 286.0, 284.0, 298.0, 295.0, 284.0, 285.0, 297.0, 305.0, 322.0, 323.0, 307.0, 283.0, 299.0, 294.0, 288.0, 287.0, 292.0, 305.0, 277.0, 293.0, 289.0, 299.0, 291.0, 308.0, 319.0, 301.0, 278.0, 322.0, 305.0, 283.0, 293.0, 319.0, 317.0, 288.0, 291.0, 294.0, 285.0, 286.0, 296.0, 306.0, 321.0, 291.0, 270.0, 306.0, 324.0, 286.0, 301.0, 303.0, 327.0, 291.0, 291.0, 290.0, 289.0, 312.0, 327.0, 321.0, 309.0, 283.0, 296.0, 319.0, 317.0, 283.0, 301.0, 280.0, 261.0, 321.0, 312.0, 324.0, 306.0, 285.0, 297.0, 316.0, 311.0, 283.0, 301.0, 311.0, 322.0, 288.0, 291.0, 321.0, 309.0, 293.0, 289.0, 315.0, 312.0, 301.0, 281.0, 292.0, 292.0, 295.0, 292.0, 316.0, 314.0, 291.0, 291.0, 309.0, 318.0, 321.0, 309.0, 313.0, 314.0, 301.0, 286.0, 291.0, 291.0, 322.0, 317.0, 294.0, 288.0, 290.0, 292.0, 311.0, 322.0, 288.0, 294.0, 288.0, 299.0, 316.0, 311.0, 316.0, 317.0, 281.0, 301.0, 324.0, 315.0, 294.0, 285.0, 317.0, 313.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0839571838028972, "mean_processing_ms": 0.28493791430015475, "mean_inference_ms": 1.6442666845730367}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5688000, "num_steps_sampled": 3033600, "sample_time_ms": 21735.725, "load_time_ms": 37.102, "grad_time_ms": 9981.103, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00033502434962429106, "policy_loss": -0.007877787575125694, "vf_loss": 87.85860443115234, "vf_explained_var": 0.7610828280448914, "kl": 0.0018075080588459969, "entropy": 1.1460970640182495, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3033600, "episodes_total": 7584, "training_iteration": 237, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-44-14", "timestamp": 1660254254, "time_this_iter_s": 31.47483992576599, "time_total_s": 12669.29028725624, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12669.29028725624, "timesteps_since_restore": 3033600, "iterations_since_restore": 237, "perf": {"cpu_util_percent": 33.54772727272728, "ram_util_percent": 58.545454545454554}}
+{"episode_reward_max": 639.0, "episode_reward_min": 515.0, "episode_reward_mean": 598.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 299.205}, "custom_metrics": {"sparse_reward_mean": 207.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.41, "shaped_reward_min": 155, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.85, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.61, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.21, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.64, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.68, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.82, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.56, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.94, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.87, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.82, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.56, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.82, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.56, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 579.0, 627.0, 582.0, 582.0, 544.0, 630.0, 587.0, 630.0, 584.0, 636.0, 624.0, 582.0, 627.0, 579.0, 630.0, 633.0, 590.0, 633.0, 630.0, 636.0, 576.0, 633.0, 582.0, 579.0, 582.0, 544.0, 630.0, 636.0, 579.0, 587.0, 573.0, 579.0, 636.0, 584.0, 541.0, 633.0, 630.0, 582.0, 627.0, 584.0, 633.0, 579.0, 630.0, 582.0, 627.0, 582.0, 584.0, 587.0, 630.0, 582.0, 627.0, 630.0, 627.0, 587.0, 582.0, 639.0, 582.0, 582.0, 633.0, 582.0, 587.0, 627.0, 633.0, 582.0, 639.0, 579.0, 630.0, 582.0, 579.0, 539.0, 630.0, 587.0, 630.0, 579.0, 579.0, 582.0, 627.0, 521.0, 636.0, 582.0, 582.0, 587.0, 576.0, 627.0, 527.0, 627.0, 567.0, 576.0, 570.0, 627.0, 538.0, 630.0, 636.0, 561.0, 515.0, 582.0, 630.0, 633.0, 558.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 312.0, 298.0, 281.0, 313.0, 314.0, 293.0, 289.0, 285.0, 297.0, 270.0, 274.0, 321.0, 309.0, 286.0, 301.0, 310.0, 320.0, 290.0, 294.0, 322.0, 314.0, 327.0, 297.0, 288.0, 294.0, 310.0, 317.0, 288.0, 291.0, 311.0, 319.0, 313.0, 320.0, 288.0, 302.0, 313.0, 320.0, 324.0, 306.0, 312.0, 324.0, 289.0, 287.0, 311.0, 322.0, 288.0, 294.0, 283.0, 296.0, 301.0, 281.0, 273.0, 271.0, 311.0, 319.0, 319.0, 317.0, 291.0, 288.0, 302.0, 285.0, 299.0, 274.0, 283.0, 296.0, 319.0, 317.0, 283.0, 301.0, 280.0, 261.0, 321.0, 312.0, 324.0, 306.0, 285.0, 297.0, 316.0, 311.0, 283.0, 301.0, 311.0, 322.0, 288.0, 291.0, 321.0, 309.0, 293.0, 289.0, 315.0, 312.0, 301.0, 281.0, 292.0, 292.0, 295.0, 292.0, 316.0, 314.0, 291.0, 291.0, 309.0, 318.0, 321.0, 309.0, 313.0, 314.0, 301.0, 286.0, 291.0, 291.0, 322.0, 317.0, 294.0, 288.0, 290.0, 292.0, 311.0, 322.0, 288.0, 294.0, 288.0, 299.0, 316.0, 311.0, 316.0, 317.0, 281.0, 301.0, 324.0, 315.0, 294.0, 285.0, 317.0, 313.0, 287.0, 295.0, 297.0, 282.0, 265.0, 274.0, 316.0, 314.0, 294.0, 293.0, 319.0, 311.0, 293.0, 286.0, 293.0, 286.0, 294.0, 288.0, 310.0, 317.0, 260.0, 261.0, 324.0, 312.0, 303.0, 279.0, 293.0, 289.0, 290.0, 297.0, 282.0, 294.0, 318.0, 309.0, 268.0, 259.0, 311.0, 316.0, 289.0, 278.0, 282.0, 294.0, 276.0, 294.0, 306.0, 321.0, 273.0, 265.0, 306.0, 324.0, 317.0, 319.0, 293.0, 268.0, 258.0, 257.0, 291.0, 291.0, 316.0, 314.0, 308.0, 325.0, 264.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0812829517018399, "mean_processing_ms": 0.2844054156337277, "mean_inference_ms": 1.641726673758305}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5712000, "num_steps_sampled": 3046400, "sample_time_ms": 21856.898, "load_time_ms": 37.298, "grad_time_ms": 10099.958, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0027887988835573196, "policy_loss": -0.005770063493400812, "vf_loss": 91.25625610351562, "vf_explained_var": 0.7579948306083679, "kl": 0.001784983091056347, "entropy": 1.1335158348083496, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3046400, "episodes_total": 7616, "training_iteration": 238, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-44-46", "timestamp": 1660254286, "time_this_iter_s": 31.44696879386902, "time_total_s": 12700.73725605011, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12700.73725605011, "timesteps_since_restore": 3046400, "iterations_since_restore": 238, "perf": {"cpu_util_percent": 33.757777777777775, "ram_util_percent": 58.49555555555556}}
+{"episode_reward_max": 639.0, "episode_reward_min": 242.0, "episode_reward_mean": 593.31, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 296.655}, "custom_metrics": {"sparse_reward_mean": 205.2, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 182.91, "shaped_reward_min": 82, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.1, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.37, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.34, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.76, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.73, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.02, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.22, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.1, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.02, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.22, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.02, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.22, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 524.0, 582.0, 636.0, 582.0, 630.0, 582.0, 633.0, 630.0, 630.0, 582.0, 582.0, 539.0, 579.0, 639.0, 579.0, 576.0, 582.0, 579.0, 636.0, 579.0, 633.0, 630.0, 587.0, 242.0, 573.0, 627.0, 582.0, 636.0, 587.0, 627.0, 636.0, 582.0, 639.0, 579.0, 630.0, 582.0, 579.0, 539.0, 630.0, 587.0, 630.0, 579.0, 579.0, 582.0, 627.0, 521.0, 636.0, 582.0, 582.0, 587.0, 576.0, 627.0, 527.0, 627.0, 567.0, 576.0, 570.0, 627.0, 538.0, 630.0, 636.0, 561.0, 515.0, 582.0, 630.0, 633.0, 558.0, 633.0, 579.0, 627.0, 582.0, 582.0, 544.0, 630.0, 587.0, 630.0, 584.0, 636.0, 624.0, 582.0, 627.0, 579.0, 630.0, 633.0, 590.0, 633.0, 630.0, 636.0, 576.0, 633.0, 582.0, 579.0, 582.0, 544.0, 630.0, 636.0, 579.0, 587.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 259.0, 265.0, 288.0, 294.0, 314.0, 322.0, 290.0, 292.0, 319.0, 311.0, 295.0, 287.0, 317.0, 316.0, 326.0, 304.0, 309.0, 321.0, 293.0, 289.0, 298.0, 284.0, 263.0, 276.0, 286.0, 293.0, 320.0, 319.0, 295.0, 284.0, 290.0, 286.0, 284.0, 298.0, 278.0, 301.0, 319.0, 317.0, 288.0, 291.0, 313.0, 320.0, 322.0, 308.0, 299.0, 288.0, 117.0, 125.0, 290.0, 283.0, 319.0, 308.0, 286.0, 296.0, 324.0, 312.0, 301.0, 286.0, 311.0, 316.0, 314.0, 322.0, 281.0, 301.0, 324.0, 315.0, 294.0, 285.0, 317.0, 313.0, 287.0, 295.0, 297.0, 282.0, 265.0, 274.0, 316.0, 314.0, 294.0, 293.0, 319.0, 311.0, 293.0, 286.0, 293.0, 286.0, 294.0, 288.0, 310.0, 317.0, 260.0, 261.0, 324.0, 312.0, 303.0, 279.0, 293.0, 289.0, 290.0, 297.0, 282.0, 294.0, 318.0, 309.0, 268.0, 259.0, 311.0, 316.0, 289.0, 278.0, 282.0, 294.0, 276.0, 294.0, 306.0, 321.0, 273.0, 265.0, 306.0, 324.0, 317.0, 319.0, 293.0, 268.0, 258.0, 257.0, 291.0, 291.0, 316.0, 314.0, 308.0, 325.0, 264.0, 294.0, 321.0, 312.0, 298.0, 281.0, 313.0, 314.0, 293.0, 289.0, 285.0, 297.0, 270.0, 274.0, 321.0, 309.0, 286.0, 301.0, 310.0, 320.0, 290.0, 294.0, 322.0, 314.0, 327.0, 297.0, 288.0, 294.0, 310.0, 317.0, 288.0, 291.0, 311.0, 319.0, 313.0, 320.0, 288.0, 302.0, 313.0, 320.0, 324.0, 306.0, 312.0, 324.0, 289.0, 287.0, 311.0, 322.0, 288.0, 294.0, 283.0, 296.0, 301.0, 281.0, 273.0, 271.0, 311.0, 319.0, 319.0, 317.0, 291.0, 288.0, 302.0, 285.0, 299.0, 274.0]}, "sampler_perf": {"mean_env_wait_ms": 1.078628112981745, "mean_processing_ms": 0.2838764625844994, "mean_inference_ms": 1.6391328483371586}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5736000, "num_steps_sampled": 3059200, "sample_time_ms": 21753.38, "load_time_ms": 37.651, "grad_time_ms": 10111.988, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005386353936046362, "policy_loss": -0.003314490430057049, "vf_loss": 92.68680572509766, "vf_explained_var": 0.7602830529212952, "kl": 0.0021554683335125446, "entropy": 1.135677456855774, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3059200, "episodes_total": 7648, "training_iteration": 239, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-45-16", "timestamp": 1660254316, "time_this_iter_s": 30.261106967926025, "time_total_s": 12730.998363018036, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12730.998363018036, "timesteps_since_restore": 3059200, "iterations_since_restore": 239, "perf": {"cpu_util_percent": 33.04761904761905, "ram_util_percent": 58.58571428571428}}
+{"episode_reward_max": 639.0, "episode_reward_min": 242.0, "episode_reward_mean": 598.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 299.29}, "custom_metrics": {"sparse_reward_mean": 207.0, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 184.58, "shaped_reward_min": 82, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.27, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.47, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.3, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.22, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.24, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.1, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.22, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.24, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.22, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.24, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 636.0, 630.0, 587.0, 567.0, 587.0, 582.0, 624.0, 539.0, 633.0, 633.0, 630.0, 636.0, 633.0, 633.0, 630.0, 636.0, 630.0, 630.0, 582.0, 576.0, 636.0, 587.0, 630.0, 576.0, 584.0, 579.0, 579.0, 570.0, 630.0, 587.0, 582.0, 582.0, 630.0, 633.0, 558.0, 633.0, 579.0, 627.0, 582.0, 582.0, 544.0, 630.0, 587.0, 630.0, 584.0, 636.0, 624.0, 582.0, 627.0, 579.0, 630.0, 633.0, 590.0, 633.0, 630.0, 636.0, 576.0, 633.0, 582.0, 579.0, 582.0, 544.0, 630.0, 636.0, 579.0, 587.0, 573.0, 579.0, 524.0, 582.0, 636.0, 582.0, 630.0, 582.0, 633.0, 630.0, 630.0, 582.0, 582.0, 539.0, 579.0, 639.0, 579.0, 576.0, 582.0, 579.0, 636.0, 579.0, 633.0, 630.0, 587.0, 242.0, 573.0, 627.0, 582.0, 636.0, 587.0, 627.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 298.0, 327.0, 309.0, 319.0, 311.0, 301.0, 286.0, 284.0, 283.0, 286.0, 301.0, 289.0, 293.0, 311.0, 313.0, 273.0, 266.0, 317.0, 316.0, 306.0, 327.0, 313.0, 317.0, 324.0, 312.0, 309.0, 324.0, 319.0, 314.0, 313.0, 317.0, 327.0, 309.0, 309.0, 321.0, 321.0, 309.0, 292.0, 290.0, 293.0, 283.0, 322.0, 314.0, 283.0, 304.0, 322.0, 308.0, 283.0, 293.0, 291.0, 293.0, 285.0, 294.0, 296.0, 283.0, 282.0, 288.0, 315.0, 315.0, 298.0, 289.0, 301.0, 281.0, 291.0, 291.0, 316.0, 314.0, 308.0, 325.0, 264.0, 294.0, 321.0, 312.0, 298.0, 281.0, 313.0, 314.0, 293.0, 289.0, 285.0, 297.0, 270.0, 274.0, 321.0, 309.0, 286.0, 301.0, 310.0, 320.0, 290.0, 294.0, 322.0, 314.0, 327.0, 297.0, 288.0, 294.0, 310.0, 317.0, 288.0, 291.0, 311.0, 319.0, 313.0, 320.0, 288.0, 302.0, 313.0, 320.0, 324.0, 306.0, 312.0, 324.0, 289.0, 287.0, 311.0, 322.0, 288.0, 294.0, 283.0, 296.0, 301.0, 281.0, 273.0, 271.0, 311.0, 319.0, 319.0, 317.0, 291.0, 288.0, 302.0, 285.0, 299.0, 274.0, 288.0, 291.0, 259.0, 265.0, 288.0, 294.0, 314.0, 322.0, 290.0, 292.0, 319.0, 311.0, 295.0, 287.0, 317.0, 316.0, 326.0, 304.0, 309.0, 321.0, 293.0, 289.0, 298.0, 284.0, 263.0, 276.0, 286.0, 293.0, 320.0, 319.0, 295.0, 284.0, 290.0, 286.0, 284.0, 298.0, 278.0, 301.0, 319.0, 317.0, 288.0, 291.0, 313.0, 320.0, 322.0, 308.0, 299.0, 288.0, 117.0, 125.0, 290.0, 283.0, 319.0, 308.0, 286.0, 296.0, 324.0, 312.0, 301.0, 286.0, 311.0, 316.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0759937679443485, "mean_processing_ms": 0.28335070451542615, "mean_inference_ms": 1.6365163711120108}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5760000, "num_steps_sampled": 3072000, "sample_time_ms": 21577.613, "load_time_ms": 37.508, "grad_time_ms": 10049.353, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0017478683730587363, "policy_loss": -0.006376888602972031, "vf_loss": 86.9516372680664, "vf_explained_var": 0.7652549743652344, "kl": 0.0021124929189682007, "entropy": 1.1408079862594604, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3072000, "episodes_total": 7680, "training_iteration": 240, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-45-45", "timestamp": 1660254345, "time_this_iter_s": 29.390948057174683, "time_total_s": 12760.38931107521, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12760.38931107521, "timesteps_since_restore": 3072000, "iterations_since_restore": 240, "perf": {"cpu_util_percent": 32.80238095238095, "ram_util_percent": 58.55238095238095}}
+{"episode_reward_max": 639.0, "episode_reward_min": 242.0, "episode_reward_mean": 595.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 297.615}, "custom_metrics": {"sparse_reward_mean": 205.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 183.63, "shaped_reward_min": 82, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.09, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.26, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.38, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.65, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.72, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.14, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.17, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.14, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.17, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.14, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.17, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 630.0, 590.0, 627.0, 582.0, 639.0, 630.0, 576.0, 630.0, 458.0, 630.0, 550.0, 630.0, 587.0, 630.0, 582.0, 627.0, 570.0, 587.0, 630.0, 579.0, 558.0, 584.0, 538.0, 587.0, 587.0, 564.0, 630.0, 582.0, 633.0, 579.0, 539.0, 636.0, 579.0, 587.0, 573.0, 579.0, 524.0, 582.0, 636.0, 582.0, 630.0, 582.0, 633.0, 630.0, 630.0, 582.0, 582.0, 539.0, 579.0, 639.0, 579.0, 576.0, 582.0, 579.0, 636.0, 579.0, 633.0, 630.0, 587.0, 242.0, 573.0, 627.0, 582.0, 636.0, 587.0, 627.0, 636.0, 582.0, 636.0, 630.0, 587.0, 567.0, 587.0, 582.0, 624.0, 539.0, 633.0, 633.0, 630.0, 636.0, 633.0, 633.0, 630.0, 636.0, 630.0, 630.0, 582.0, 576.0, 636.0, 587.0, 630.0, 576.0, 584.0, 579.0, 579.0, 570.0, 630.0, 587.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [318.0, 309.0, 305.0, 325.0, 294.0, 296.0, 316.0, 311.0, 292.0, 290.0, 317.0, 322.0, 311.0, 319.0, 276.0, 300.0, 314.0, 316.0, 230.0, 228.0, 326.0, 304.0, 276.0, 274.0, 328.0, 302.0, 294.0, 293.0, 306.0, 324.0, 286.0, 296.0, 310.0, 317.0, 294.0, 276.0, 286.0, 301.0, 321.0, 309.0, 293.0, 286.0, 286.0, 272.0, 293.0, 291.0, 267.0, 271.0, 302.0, 285.0, 296.0, 291.0, 278.0, 286.0, 311.0, 319.0, 295.0, 287.0, 316.0, 317.0, 299.0, 280.0, 268.0, 271.0, 319.0, 317.0, 291.0, 288.0, 302.0, 285.0, 299.0, 274.0, 288.0, 291.0, 259.0, 265.0, 288.0, 294.0, 314.0, 322.0, 290.0, 292.0, 319.0, 311.0, 295.0, 287.0, 317.0, 316.0, 326.0, 304.0, 309.0, 321.0, 293.0, 289.0, 298.0, 284.0, 263.0, 276.0, 286.0, 293.0, 320.0, 319.0, 295.0, 284.0, 290.0, 286.0, 284.0, 298.0, 278.0, 301.0, 319.0, 317.0, 288.0, 291.0, 313.0, 320.0, 322.0, 308.0, 299.0, 288.0, 117.0, 125.0, 290.0, 283.0, 319.0, 308.0, 286.0, 296.0, 324.0, 312.0, 301.0, 286.0, 311.0, 316.0, 314.0, 322.0, 284.0, 298.0, 327.0, 309.0, 319.0, 311.0, 301.0, 286.0, 284.0, 283.0, 286.0, 301.0, 289.0, 293.0, 311.0, 313.0, 273.0, 266.0, 317.0, 316.0, 306.0, 327.0, 313.0, 317.0, 324.0, 312.0, 309.0, 324.0, 319.0, 314.0, 313.0, 317.0, 327.0, 309.0, 309.0, 321.0, 321.0, 309.0, 292.0, 290.0, 293.0, 283.0, 322.0, 314.0, 283.0, 304.0, 322.0, 308.0, 283.0, 293.0, 291.0, 293.0, 285.0, 294.0, 296.0, 283.0, 282.0, 288.0, 315.0, 315.0, 298.0, 289.0, 301.0, 281.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0733850104445417, "mean_processing_ms": 0.2828300453202057, "mean_inference_ms": 1.6339069456399316}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5784000, "num_steps_sampled": 3084800, "sample_time_ms": 21470.585, "load_time_ms": 37.079, "grad_time_ms": 10121.915, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0020955076906830072, "policy_loss": -0.006229180842638016, "vf_loss": 88.9510269165039, "vf_explained_var": 0.7567486763000488, "kl": 0.0017531089251860976, "entropy": 1.140811562538147, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3084800, "episodes_total": 7712, "training_iteration": 241, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-46-18", "timestamp": 1660254378, "time_this_iter_s": 32.28085994720459, "time_total_s": 12792.670171022415, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12792.670171022415, "timesteps_since_restore": 3084800, "iterations_since_restore": 241, "perf": {"cpu_util_percent": 31.733333333333334, "ram_util_percent": 58.655555555555544}}
+{"episode_reward_max": 639.0, "episode_reward_min": 458.0, "episode_reward_mean": 603.13, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 301.565}, "custom_metrics": {"sparse_reward_mean": 208.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.53, "shaped_reward_min": 138, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.74, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.4, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.95, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.84, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.07, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.53, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.51, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.07, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.53, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.07, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.53, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 590.0, 639.0, 639.0, 627.0, 570.0, 582.0, 633.0, 590.0, 627.0, 633.0, 576.0, 576.0, 627.0, 582.0, 624.0, 630.0, 633.0, 630.0, 576.0, 633.0, 633.0, 590.0, 587.0, 639.0, 579.0, 582.0, 570.0, 630.0, 582.0, 636.0, 627.0, 636.0, 587.0, 627.0, 636.0, 582.0, 636.0, 630.0, 587.0, 567.0, 587.0, 582.0, 624.0, 539.0, 633.0, 633.0, 630.0, 636.0, 633.0, 633.0, 630.0, 636.0, 630.0, 630.0, 582.0, 576.0, 636.0, 587.0, 630.0, 576.0, 584.0, 579.0, 579.0, 570.0, 630.0, 587.0, 582.0, 627.0, 630.0, 590.0, 627.0, 582.0, 639.0, 630.0, 576.0, 630.0, 458.0, 630.0, 550.0, 630.0, 587.0, 630.0, 582.0, 627.0, 570.0, 587.0, 630.0, 579.0, 558.0, 584.0, 538.0, 587.0, 587.0, 564.0, 630.0, 582.0, 633.0, 579.0, 539.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 306.0, 294.0, 296.0, 322.0, 317.0, 317.0, 322.0, 316.0, 311.0, 275.0, 295.0, 300.0, 282.0, 326.0, 307.0, 294.0, 296.0, 313.0, 314.0, 311.0, 322.0, 291.0, 285.0, 272.0, 304.0, 313.0, 314.0, 294.0, 288.0, 315.0, 309.0, 313.0, 317.0, 314.0, 319.0, 316.0, 314.0, 280.0, 296.0, 308.0, 325.0, 321.0, 312.0, 293.0, 297.0, 296.0, 291.0, 317.0, 322.0, 291.0, 288.0, 292.0, 290.0, 294.0, 276.0, 311.0, 319.0, 288.0, 294.0, 314.0, 322.0, 316.0, 311.0, 324.0, 312.0, 301.0, 286.0, 311.0, 316.0, 314.0, 322.0, 284.0, 298.0, 327.0, 309.0, 319.0, 311.0, 301.0, 286.0, 284.0, 283.0, 286.0, 301.0, 289.0, 293.0, 311.0, 313.0, 273.0, 266.0, 317.0, 316.0, 306.0, 327.0, 313.0, 317.0, 324.0, 312.0, 309.0, 324.0, 319.0, 314.0, 313.0, 317.0, 327.0, 309.0, 309.0, 321.0, 321.0, 309.0, 292.0, 290.0, 293.0, 283.0, 322.0, 314.0, 283.0, 304.0, 322.0, 308.0, 283.0, 293.0, 291.0, 293.0, 285.0, 294.0, 296.0, 283.0, 282.0, 288.0, 315.0, 315.0, 298.0, 289.0, 301.0, 281.0, 318.0, 309.0, 305.0, 325.0, 294.0, 296.0, 316.0, 311.0, 292.0, 290.0, 317.0, 322.0, 311.0, 319.0, 276.0, 300.0, 314.0, 316.0, 230.0, 228.0, 326.0, 304.0, 276.0, 274.0, 328.0, 302.0, 294.0, 293.0, 306.0, 324.0, 286.0, 296.0, 310.0, 317.0, 294.0, 276.0, 286.0, 301.0, 321.0, 309.0, 293.0, 286.0, 286.0, 272.0, 293.0, 291.0, 267.0, 271.0, 302.0, 285.0, 296.0, 291.0, 278.0, 286.0, 311.0, 319.0, 295.0, 287.0, 316.0, 317.0, 299.0, 280.0, 268.0, 271.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0708107094165102, "mean_processing_ms": 0.2823168700385721, "mean_inference_ms": 1.631412939127947}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5808000, "num_steps_sampled": 3097600, "sample_time_ms": 21506.392, "load_time_ms": 37.292, "grad_time_ms": 9971.815, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004354400560259819, "policy_loss": -0.0035108765587210655, "vf_loss": 84.29744720458984, "vf_explained_var": 0.7617435455322266, "kl": 0.0018548279767856002, "entropy": 1.1289268732070923, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3097600, "episodes_total": 7744, "training_iteration": 242, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-46-49", "timestamp": 1660254409, "time_this_iter_s": 30.967852115631104, "time_total_s": 12823.638023138046, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12823.638023138046, "timesteps_since_restore": 3097600, "iterations_since_restore": 242, "perf": {"cpu_util_percent": 31.486363636363638, "ram_util_percent": 58.63863636363636}}
+{"episode_reward_max": 639.0, "episode_reward_min": 458.0, "episode_reward_mean": 602.75, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 301.375}, "custom_metrics": {"sparse_reward_mean": 208.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.15, "shaped_reward_min": 138, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.81, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.82, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.0, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.89, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.93, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.57, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.78, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.93, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.57, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.93, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.57, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 633.0, 630.0, 627.0, 627.0, 627.0, 582.0, 633.0, 630.0, 636.0, 584.0, 627.0, 582.0, 633.0, 573.0, 587.0, 627.0, 587.0, 630.0, 582.0, 576.0, 627.0, 579.0, 639.0, 570.0, 579.0, 582.0, 633.0, 579.0, 633.0, 627.0, 544.0, 570.0, 630.0, 587.0, 582.0, 627.0, 630.0, 590.0, 627.0, 582.0, 639.0, 630.0, 576.0, 630.0, 458.0, 630.0, 550.0, 630.0, 587.0, 630.0, 582.0, 627.0, 570.0, 587.0, 630.0, 579.0, 558.0, 584.0, 538.0, 587.0, 587.0, 564.0, 630.0, 582.0, 633.0, 579.0, 539.0, 627.0, 590.0, 639.0, 639.0, 627.0, 570.0, 582.0, 633.0, 590.0, 627.0, 633.0, 576.0, 576.0, 627.0, 582.0, 624.0, 630.0, 633.0, 630.0, 576.0, 633.0, 633.0, 590.0, 587.0, 639.0, 579.0, 582.0, 570.0, 630.0, 582.0, 636.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 316.0, 317.0, 301.0, 329.0, 313.0, 314.0, 311.0, 316.0, 311.0, 316.0, 286.0, 296.0, 314.0, 319.0, 311.0, 319.0, 324.0, 312.0, 286.0, 298.0, 324.0, 303.0, 278.0, 304.0, 307.0, 326.0, 288.0, 285.0, 298.0, 289.0, 308.0, 319.0, 293.0, 294.0, 326.0, 304.0, 295.0, 287.0, 280.0, 296.0, 315.0, 312.0, 289.0, 290.0, 322.0, 317.0, 288.0, 282.0, 283.0, 296.0, 299.0, 283.0, 309.0, 324.0, 282.0, 297.0, 312.0, 321.0, 305.0, 322.0, 278.0, 266.0, 282.0, 288.0, 315.0, 315.0, 298.0, 289.0, 301.0, 281.0, 318.0, 309.0, 305.0, 325.0, 294.0, 296.0, 316.0, 311.0, 292.0, 290.0, 317.0, 322.0, 311.0, 319.0, 276.0, 300.0, 314.0, 316.0, 230.0, 228.0, 326.0, 304.0, 276.0, 274.0, 328.0, 302.0, 294.0, 293.0, 306.0, 324.0, 286.0, 296.0, 310.0, 317.0, 294.0, 276.0, 286.0, 301.0, 321.0, 309.0, 293.0, 286.0, 286.0, 272.0, 293.0, 291.0, 267.0, 271.0, 302.0, 285.0, 296.0, 291.0, 278.0, 286.0, 311.0, 319.0, 295.0, 287.0, 316.0, 317.0, 299.0, 280.0, 268.0, 271.0, 321.0, 306.0, 294.0, 296.0, 322.0, 317.0, 317.0, 322.0, 316.0, 311.0, 275.0, 295.0, 300.0, 282.0, 326.0, 307.0, 294.0, 296.0, 313.0, 314.0, 311.0, 322.0, 291.0, 285.0, 272.0, 304.0, 313.0, 314.0, 294.0, 288.0, 315.0, 309.0, 313.0, 317.0, 314.0, 319.0, 316.0, 314.0, 280.0, 296.0, 308.0, 325.0, 321.0, 312.0, 293.0, 297.0, 296.0, 291.0, 317.0, 322.0, 291.0, 288.0, 292.0, 290.0, 294.0, 276.0, 311.0, 319.0, 288.0, 294.0, 314.0, 322.0, 316.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.068272032402952, "mean_processing_ms": 0.2818129859025947, "mean_inference_ms": 1.6291161273108918}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5832000, "num_steps_sampled": 3110400, "sample_time_ms": 21475.079, "load_time_ms": 37.422, "grad_time_ms": 9892.717, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0034763356670737267, "policy_loss": -0.004455787595361471, "vf_loss": 84.99886322021484, "vf_explained_var": 0.7575659155845642, "kl": 0.0017217934364452958, "entropy": 1.135510802268982, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3110400, "episodes_total": 7776, "training_iteration": 243, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-47-21", "timestamp": 1660254441, "time_this_iter_s": 32.067052125930786, "time_total_s": 12855.705075263977, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12855.705075263977, "timesteps_since_restore": 3110400, "iterations_since_restore": 243, "perf": {"cpu_util_percent": 31.317777777777778, "ram_util_percent": 58.61555555555556}}
+{"episode_reward_max": 639.0, "episode_reward_min": 468.0, "episode_reward_mean": 602.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 301.15}, "custom_metrics": {"sparse_reward_mean": 208.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.7, "shaped_reward_min": 144, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.09, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.47, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.57, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.6, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.84, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.07, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.35, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.02, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.95, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.37, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.14, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.07, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.35, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.07, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.35, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 539.0, 630.0, 549.0, 627.0, 564.0, 539.0, 636.0, 627.0, 587.0, 582.0, 636.0, 582.0, 630.0, 627.0, 576.0, 581.0, 627.0, 630.0, 582.0, 576.0, 579.0, 582.0, 627.0, 587.0, 587.0, 627.0, 582.0, 468.0, 636.0, 630.0, 504.0, 582.0, 633.0, 579.0, 539.0, 627.0, 590.0, 639.0, 639.0, 627.0, 570.0, 582.0, 633.0, 590.0, 627.0, 633.0, 576.0, 576.0, 627.0, 582.0, 624.0, 630.0, 633.0, 630.0, 576.0, 633.0, 633.0, 590.0, 587.0, 639.0, 579.0, 582.0, 570.0, 630.0, 582.0, 636.0, 627.0, 630.0, 633.0, 630.0, 627.0, 627.0, 627.0, 582.0, 633.0, 630.0, 636.0, 584.0, 627.0, 582.0, 633.0, 573.0, 587.0, 627.0, 587.0, 630.0, 582.0, 576.0, 627.0, 579.0, 639.0, 570.0, 579.0, 582.0, 633.0, 579.0, 633.0, 627.0, 544.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 311.0, 269.0, 270.0, 311.0, 319.0, 275.0, 274.0, 308.0, 319.0, 292.0, 272.0, 271.0, 268.0, 324.0, 312.0, 313.0, 314.0, 302.0, 285.0, 299.0, 283.0, 322.0, 314.0, 293.0, 289.0, 310.0, 320.0, 319.0, 308.0, 281.0, 295.0, 282.0, 299.0, 307.0, 320.0, 321.0, 309.0, 283.0, 299.0, 277.0, 299.0, 293.0, 286.0, 275.0, 307.0, 311.0, 316.0, 285.0, 302.0, 299.0, 288.0, 316.0, 311.0, 299.0, 283.0, 229.0, 239.0, 318.0, 318.0, 308.0, 322.0, 238.0, 266.0, 295.0, 287.0, 316.0, 317.0, 299.0, 280.0, 268.0, 271.0, 321.0, 306.0, 294.0, 296.0, 322.0, 317.0, 317.0, 322.0, 316.0, 311.0, 275.0, 295.0, 300.0, 282.0, 326.0, 307.0, 294.0, 296.0, 313.0, 314.0, 311.0, 322.0, 291.0, 285.0, 272.0, 304.0, 313.0, 314.0, 294.0, 288.0, 315.0, 309.0, 313.0, 317.0, 314.0, 319.0, 316.0, 314.0, 280.0, 296.0, 308.0, 325.0, 321.0, 312.0, 293.0, 297.0, 296.0, 291.0, 317.0, 322.0, 291.0, 288.0, 292.0, 290.0, 294.0, 276.0, 311.0, 319.0, 288.0, 294.0, 314.0, 322.0, 316.0, 311.0, 313.0, 317.0, 316.0, 317.0, 301.0, 329.0, 313.0, 314.0, 311.0, 316.0, 311.0, 316.0, 286.0, 296.0, 314.0, 319.0, 311.0, 319.0, 324.0, 312.0, 286.0, 298.0, 324.0, 303.0, 278.0, 304.0, 307.0, 326.0, 288.0, 285.0, 298.0, 289.0, 308.0, 319.0, 293.0, 294.0, 326.0, 304.0, 295.0, 287.0, 280.0, 296.0, 315.0, 312.0, 289.0, 290.0, 322.0, 317.0, 288.0, 282.0, 283.0, 296.0, 299.0, 283.0, 309.0, 324.0, 282.0, 297.0, 312.0, 321.0, 305.0, 322.0, 278.0, 266.0]}, "sampler_perf": {"mean_env_wait_ms": 1.065748558737823, "mean_processing_ms": 0.28131319823148404, "mean_inference_ms": 1.6267302844305909}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5856000, "num_steps_sampled": 3123200, "sample_time_ms": 21258.847, "load_time_ms": 37.597, "grad_time_ms": 9965.955, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00222679297439754, "policy_loss": -0.005516994744539261, "vf_loss": 83.11483764648438, "vf_explained_var": 0.7694733142852783, "kl": 0.002387256594374776, "entropy": 1.135390281677246, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3123200, "episodes_total": 7808, "training_iteration": 244, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-47-51", "timestamp": 1660254471, "time_this_iter_s": 30.266911029815674, "time_total_s": 12885.971986293793, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12885.971986293793, "timesteps_since_restore": 3123200, "iterations_since_restore": 244, "perf": {"cpu_util_percent": 32.02093023255814, "ram_util_percent": 58.54186046511629}}
+{"episode_reward_max": 639.0, "episode_reward_min": 468.0, "episode_reward_mean": 599.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 299.72}, "custom_metrics": {"sparse_reward_mean": 207.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.24, "shaped_reward_min": 144, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.11, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.54, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.87, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.97, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.28, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.42, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.97, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.28, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.97, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.28, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 633.0, 633.0, 570.0, 582.0, 593.0, 633.0, 636.0, 587.0, 579.0, 627.0, 579.0, 539.0, 587.0, 630.0, 636.0, 584.0, 579.0, 582.0, 630.0, 544.0, 584.0, 630.0, 587.0, 587.0, 573.0, 582.0, 573.0, 627.0, 527.0, 624.0, 587.0, 630.0, 582.0, 636.0, 627.0, 630.0, 633.0, 630.0, 627.0, 627.0, 627.0, 582.0, 633.0, 630.0, 636.0, 584.0, 627.0, 582.0, 633.0, 573.0, 587.0, 627.0, 587.0, 630.0, 582.0, 576.0, 627.0, 579.0, 639.0, 570.0, 579.0, 582.0, 633.0, 579.0, 633.0, 627.0, 544.0, 627.0, 539.0, 630.0, 549.0, 627.0, 564.0, 539.0, 636.0, 627.0, 587.0, 582.0, 636.0, 582.0, 630.0, 627.0, 576.0, 581.0, 627.0, 630.0, 582.0, 576.0, 579.0, 582.0, 627.0, 587.0, 587.0, 627.0, 582.0, 468.0, 636.0, 630.0, 504.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [318.0, 309.0, 318.0, 315.0, 322.0, 311.0, 290.0, 280.0, 285.0, 297.0, 302.0, 291.0, 322.0, 311.0, 324.0, 312.0, 304.0, 283.0, 293.0, 286.0, 321.0, 306.0, 285.0, 294.0, 275.0, 264.0, 293.0, 294.0, 318.0, 312.0, 319.0, 317.0, 277.0, 307.0, 291.0, 288.0, 288.0, 294.0, 319.0, 311.0, 268.0, 276.0, 300.0, 284.0, 314.0, 316.0, 290.0, 297.0, 299.0, 288.0, 291.0, 282.0, 287.0, 295.0, 287.0, 286.0, 313.0, 314.0, 259.0, 268.0, 311.0, 313.0, 288.0, 299.0, 311.0, 319.0, 288.0, 294.0, 314.0, 322.0, 316.0, 311.0, 313.0, 317.0, 316.0, 317.0, 301.0, 329.0, 313.0, 314.0, 311.0, 316.0, 311.0, 316.0, 286.0, 296.0, 314.0, 319.0, 311.0, 319.0, 324.0, 312.0, 286.0, 298.0, 324.0, 303.0, 278.0, 304.0, 307.0, 326.0, 288.0, 285.0, 298.0, 289.0, 308.0, 319.0, 293.0, 294.0, 326.0, 304.0, 295.0, 287.0, 280.0, 296.0, 315.0, 312.0, 289.0, 290.0, 322.0, 317.0, 288.0, 282.0, 283.0, 296.0, 299.0, 283.0, 309.0, 324.0, 282.0, 297.0, 312.0, 321.0, 305.0, 322.0, 278.0, 266.0, 316.0, 311.0, 269.0, 270.0, 311.0, 319.0, 275.0, 274.0, 308.0, 319.0, 292.0, 272.0, 271.0, 268.0, 324.0, 312.0, 313.0, 314.0, 302.0, 285.0, 299.0, 283.0, 322.0, 314.0, 293.0, 289.0, 310.0, 320.0, 319.0, 308.0, 281.0, 295.0, 282.0, 299.0, 307.0, 320.0, 321.0, 309.0, 283.0, 299.0, 277.0, 299.0, 293.0, 286.0, 275.0, 307.0, 311.0, 316.0, 285.0, 302.0, 299.0, 288.0, 316.0, 311.0, 299.0, 283.0, 229.0, 239.0, 318.0, 318.0, 308.0, 322.0, 238.0, 266.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0632381583217039, "mean_processing_ms": 0.2808155253638906, "mean_inference_ms": 1.6242198083388235}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5880000, "num_steps_sampled": 3136000, "sample_time_ms": 21038.899, "load_time_ms": 37.35, "grad_time_ms": 9776.148, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004664632026106119, "policy_loss": -0.003766902955248952, "vf_loss": 90.03823852539062, "vf_explained_var": 0.7575922012329102, "kl": 0.002137060509994626, "entropy": 1.1445802450180054, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3136000, "episodes_total": 7840, "training_iteration": 245, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-48-21", "timestamp": 1660254501, "time_this_iter_s": 30.129722118377686, "time_total_s": 12916.10170841217, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12916.10170841217, "timesteps_since_restore": 3136000, "iterations_since_restore": 245, "perf": {"cpu_util_percent": 34.127906976744185, "ram_util_percent": 58.56744186046512}}
+{"episode_reward_max": 639.0, "episode_reward_min": 468.0, "episode_reward_mean": 600.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 300.025}, "custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.45, "shaped_reward_min": 144, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.85, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.75, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.28, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.79, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.87, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.61, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.64, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.54, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.61, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.64, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.61, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.64, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 630.0, 579.0, 630.0, 627.0, 633.0, 630.0, 630.0, 582.0, 639.0, 627.0, 587.0, 639.0, 636.0, 582.0, 627.0, 582.0, 630.0, 633.0, 636.0, 513.0, 582.0, 630.0, 570.0, 584.0, 630.0, 633.0, 582.0, 630.0, 582.0, 636.0, 630.0, 579.0, 633.0, 627.0, 544.0, 627.0, 539.0, 630.0, 549.0, 627.0, 564.0, 539.0, 636.0, 627.0, 587.0, 582.0, 636.0, 582.0, 630.0, 627.0, 576.0, 581.0, 627.0, 630.0, 582.0, 576.0, 579.0, 582.0, 627.0, 587.0, 587.0, 627.0, 582.0, 468.0, 636.0, 630.0, 504.0, 627.0, 633.0, 633.0, 570.0, 582.0, 593.0, 633.0, 636.0, 587.0, 579.0, 627.0, 579.0, 539.0, 587.0, 630.0, 636.0, 584.0, 579.0, 582.0, 630.0, 544.0, 584.0, 630.0, 587.0, 587.0, 573.0, 582.0, 573.0, 627.0, 527.0, 624.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 311.0, 319.0, 290.0, 289.0, 321.0, 309.0, 309.0, 318.0, 329.0, 304.0, 323.0, 307.0, 318.0, 312.0, 293.0, 289.0, 314.0, 325.0, 314.0, 313.0, 297.0, 290.0, 314.0, 325.0, 311.0, 325.0, 290.0, 292.0, 303.0, 324.0, 296.0, 286.0, 314.0, 316.0, 315.0, 318.0, 324.0, 312.0, 261.0, 252.0, 293.0, 289.0, 321.0, 309.0, 288.0, 282.0, 285.0, 299.0, 315.0, 315.0, 315.0, 318.0, 285.0, 297.0, 319.0, 311.0, 292.0, 290.0, 319.0, 317.0, 319.0, 311.0, 282.0, 297.0, 312.0, 321.0, 305.0, 322.0, 278.0, 266.0, 316.0, 311.0, 269.0, 270.0, 311.0, 319.0, 275.0, 274.0, 308.0, 319.0, 292.0, 272.0, 271.0, 268.0, 324.0, 312.0, 313.0, 314.0, 302.0, 285.0, 299.0, 283.0, 322.0, 314.0, 293.0, 289.0, 310.0, 320.0, 319.0, 308.0, 281.0, 295.0, 282.0, 299.0, 307.0, 320.0, 321.0, 309.0, 283.0, 299.0, 277.0, 299.0, 293.0, 286.0, 275.0, 307.0, 311.0, 316.0, 285.0, 302.0, 299.0, 288.0, 316.0, 311.0, 299.0, 283.0, 229.0, 239.0, 318.0, 318.0, 308.0, 322.0, 238.0, 266.0, 318.0, 309.0, 318.0, 315.0, 322.0, 311.0, 290.0, 280.0, 285.0, 297.0, 302.0, 291.0, 322.0, 311.0, 324.0, 312.0, 304.0, 283.0, 293.0, 286.0, 321.0, 306.0, 285.0, 294.0, 275.0, 264.0, 293.0, 294.0, 318.0, 312.0, 319.0, 317.0, 277.0, 307.0, 291.0, 288.0, 288.0, 294.0, 319.0, 311.0, 268.0, 276.0, 300.0, 284.0, 314.0, 316.0, 290.0, 297.0, 299.0, 288.0, 291.0, 282.0, 287.0, 295.0, 287.0, 286.0, 313.0, 314.0, 259.0, 268.0, 311.0, 313.0, 288.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0607418368814732, "mean_processing_ms": 0.280319937755019, "mean_inference_ms": 1.6216711984881527}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5904000, "num_steps_sampled": 3148800, "sample_time_ms": 21042.313, "load_time_ms": 37.258, "grad_time_ms": 9784.882, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003808257170021534, "policy_loss": -0.0040723783895373344, "vf_loss": 84.46407318115234, "vf_explained_var": 0.7558939456939697, "kl": 0.0020272734109312296, "entropy": 1.1315315961837769, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3148800, "episodes_total": 7872, "training_iteration": 246, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-48-52", "timestamp": 1660254532, "time_this_iter_s": 30.6191668510437, "time_total_s": 12946.720875263214, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12946.720875263214, "timesteps_since_restore": 3148800, "iterations_since_restore": 246, "perf": {"cpu_util_percent": 29.048837209302324, "ram_util_percent": 58.57906976744185}}
+{"episode_reward_max": 639.0, "episode_reward_min": 468.0, "episode_reward_mean": 601.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 300.71}, "custom_metrics": {"sparse_reward_mean": 208.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.02, "shaped_reward_min": 144, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.83, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.97, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.91, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.35, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.36, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.59, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.77, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.24, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.74, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.74, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.7, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.59, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.77, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.59, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.77, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 582.0, 630.0, 576.0, 582.0, 633.0, 573.0, 587.0, 582.0, 630.0, 633.0, 582.0, 633.0, 530.0, 636.0, 582.0, 636.0, 584.0, 630.0, 633.0, 630.0, 582.0, 630.0, 521.0, 587.0, 627.0, 576.0, 630.0, 579.0, 630.0, 582.0, 584.0, 468.0, 636.0, 630.0, 504.0, 627.0, 633.0, 633.0, 570.0, 582.0, 593.0, 633.0, 636.0, 587.0, 579.0, 627.0, 579.0, 539.0, 587.0, 630.0, 636.0, 584.0, 579.0, 582.0, 630.0, 544.0, 584.0, 630.0, 587.0, 587.0, 573.0, 582.0, 573.0, 627.0, 527.0, 624.0, 587.0, 627.0, 630.0, 579.0, 630.0, 627.0, 633.0, 630.0, 630.0, 582.0, 639.0, 627.0, 587.0, 639.0, 636.0, 582.0, 627.0, 582.0, 630.0, 633.0, 636.0, 513.0, 582.0, 630.0, 570.0, 584.0, 630.0, 633.0, 582.0, 630.0, 582.0, 636.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 295.0, 287.0, 314.0, 316.0, 291.0, 285.0, 288.0, 294.0, 319.0, 314.0, 279.0, 294.0, 290.0, 297.0, 288.0, 294.0, 303.0, 327.0, 324.0, 309.0, 286.0, 296.0, 316.0, 317.0, 261.0, 269.0, 322.0, 314.0, 288.0, 294.0, 317.0, 319.0, 296.0, 288.0, 316.0, 314.0, 321.0, 312.0, 301.0, 329.0, 293.0, 289.0, 313.0, 317.0, 259.0, 262.0, 293.0, 294.0, 308.0, 319.0, 283.0, 293.0, 308.0, 322.0, 297.0, 282.0, 321.0, 309.0, 303.0, 279.0, 302.0, 282.0, 229.0, 239.0, 318.0, 318.0, 308.0, 322.0, 238.0, 266.0, 318.0, 309.0, 318.0, 315.0, 322.0, 311.0, 290.0, 280.0, 285.0, 297.0, 302.0, 291.0, 322.0, 311.0, 324.0, 312.0, 304.0, 283.0, 293.0, 286.0, 321.0, 306.0, 285.0, 294.0, 275.0, 264.0, 293.0, 294.0, 318.0, 312.0, 319.0, 317.0, 277.0, 307.0, 291.0, 288.0, 288.0, 294.0, 319.0, 311.0, 268.0, 276.0, 300.0, 284.0, 314.0, 316.0, 290.0, 297.0, 299.0, 288.0, 291.0, 282.0, 287.0, 295.0, 287.0, 286.0, 313.0, 314.0, 259.0, 268.0, 311.0, 313.0, 288.0, 299.0, 313.0, 314.0, 311.0, 319.0, 290.0, 289.0, 321.0, 309.0, 309.0, 318.0, 329.0, 304.0, 323.0, 307.0, 318.0, 312.0, 293.0, 289.0, 314.0, 325.0, 314.0, 313.0, 297.0, 290.0, 314.0, 325.0, 311.0, 325.0, 290.0, 292.0, 303.0, 324.0, 296.0, 286.0, 314.0, 316.0, 315.0, 318.0, 324.0, 312.0, 261.0, 252.0, 293.0, 289.0, 321.0, 309.0, 288.0, 282.0, 285.0, 299.0, 315.0, 315.0, 315.0, 318.0, 285.0, 297.0, 319.0, 311.0, 292.0, 290.0, 319.0, 317.0, 319.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0582836436059244, "mean_processing_ms": 0.2798326074687889, "mean_inference_ms": 1.6195165404601743}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5928000, "num_steps_sampled": 3161600, "sample_time_ms": 21417.89, "load_time_ms": 36.65, "grad_time_ms": 9717.385, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.007093754131346941, "policy_loss": -0.0009618126205168664, "vf_loss": 86.259033203125, "vf_explained_var": 0.7558541893959045, "kl": 0.001976242521777749, "entropy": 1.140650749206543, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3161600, "episodes_total": 7904, "training_iteration": 247, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-49-26", "timestamp": 1660254566, "time_this_iter_s": 34.550382137298584, "time_total_s": 12981.271257400513, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12981.271257400513, "timesteps_since_restore": 3161600, "iterations_since_restore": 247, "perf": {"cpu_util_percent": 29.197959183673472, "ram_util_percent": 58.64285714285715}}
+{"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 608.15, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 252.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 304.075}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.55, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.87, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.14, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.7, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.29, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.82, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.88, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.17, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.93, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.82, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.88, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.82, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.88, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 636.0, 576.0, 633.0, 587.0, 633.0, 630.0, 636.0, 596.0, 576.0, 636.0, 633.0, 630.0, 582.0, 582.0, 627.0, 630.0, 627.0, 639.0, 573.0, 627.0, 633.0, 582.0, 582.0, 582.0, 561.0, 627.0, 636.0, 636.0, 636.0, 584.0, 633.0, 627.0, 527.0, 624.0, 587.0, 627.0, 630.0, 579.0, 630.0, 627.0, 633.0, 630.0, 630.0, 582.0, 639.0, 627.0, 587.0, 639.0, 636.0, 582.0, 627.0, 582.0, 630.0, 633.0, 636.0, 513.0, 582.0, 630.0, 570.0, 584.0, 630.0, 633.0, 582.0, 630.0, 582.0, 636.0, 630.0, 633.0, 582.0, 630.0, 576.0, 582.0, 633.0, 573.0, 587.0, 582.0, 630.0, 633.0, 582.0, 633.0, 530.0, 636.0, 582.0, 636.0, 584.0, 630.0, 633.0, 630.0, 582.0, 630.0, 521.0, 587.0, 627.0, 576.0, 630.0, 579.0, 630.0, 582.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 309.0, 327.0, 270.0, 306.0, 305.0, 328.0, 286.0, 301.0, 322.0, 311.0, 308.0, 322.0, 314.0, 322.0, 299.0, 297.0, 293.0, 283.0, 316.0, 320.0, 319.0, 314.0, 311.0, 319.0, 288.0, 294.0, 298.0, 284.0, 312.0, 315.0, 324.0, 306.0, 311.0, 316.0, 322.0, 317.0, 297.0, 276.0, 307.0, 320.0, 321.0, 312.0, 280.0, 302.0, 304.0, 278.0, 300.0, 282.0, 266.0, 295.0, 303.0, 324.0, 324.0, 312.0, 324.0, 312.0, 318.0, 318.0, 296.0, 288.0, 323.0, 310.0, 313.0, 314.0, 259.0, 268.0, 311.0, 313.0, 288.0, 299.0, 313.0, 314.0, 311.0, 319.0, 290.0, 289.0, 321.0, 309.0, 309.0, 318.0, 329.0, 304.0, 323.0, 307.0, 318.0, 312.0, 293.0, 289.0, 314.0, 325.0, 314.0, 313.0, 297.0, 290.0, 314.0, 325.0, 311.0, 325.0, 290.0, 292.0, 303.0, 324.0, 296.0, 286.0, 314.0, 316.0, 315.0, 318.0, 324.0, 312.0, 261.0, 252.0, 293.0, 289.0, 321.0, 309.0, 288.0, 282.0, 285.0, 299.0, 315.0, 315.0, 315.0, 318.0, 285.0, 297.0, 319.0, 311.0, 292.0, 290.0, 319.0, 317.0, 319.0, 311.0, 319.0, 314.0, 295.0, 287.0, 314.0, 316.0, 291.0, 285.0, 288.0, 294.0, 319.0, 314.0, 279.0, 294.0, 290.0, 297.0, 288.0, 294.0, 303.0, 327.0, 324.0, 309.0, 286.0, 296.0, 316.0, 317.0, 261.0, 269.0, 322.0, 314.0, 288.0, 294.0, 317.0, 319.0, 296.0, 288.0, 316.0, 314.0, 321.0, 312.0, 301.0, 329.0, 293.0, 289.0, 313.0, 317.0, 259.0, 262.0, 293.0, 294.0, 308.0, 319.0, 283.0, 293.0, 308.0, 322.0, 297.0, 282.0, 321.0, 309.0, 303.0, 279.0, 302.0, 282.0]}, "sampler_perf": {"mean_env_wait_ms": 1.055854669948681, "mean_processing_ms": 0.27935130516970164, "mean_inference_ms": 1.6177570824217435}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5952000, "num_steps_sampled": 3174400, "sample_time_ms": 21678.331, "load_time_ms": 37.099, "grad_time_ms": 9688.848, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009431429207324982, "policy_loss": -0.008797372691333294, "vf_loss": 84.26534271240234, "vf_explained_var": 0.7609202265739441, "kl": 0.001977160107344389, "entropy": 1.14460289478302, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3174400, "episodes_total": 7936, "training_iteration": 248, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-50-00", "timestamp": 1660254600, "time_this_iter_s": 33.773277044296265, "time_total_s": 13015.044534444809, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13015.044534444809, "timesteps_since_restore": 3174400, "iterations_since_restore": 248, "perf": {"cpu_util_percent": 27.302083333333332, "ram_util_percent": 58.68958333333333}}
+{"episode_reward_max": 639.0, "episode_reward_min": 425.0, "episode_reward_mean": 606.43, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 211.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 303.215}, "custom_metrics": {"sparse_reward_mean": 210.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 186.43, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.3, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.63, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.93, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.82, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.14, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.6, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.14, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.6, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.14, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.6, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 627.0, 636.0, 627.0, 582.0, 582.0, 587.0, 630.0, 630.0, 636.0, 633.0, 633.0, 582.0, 633.0, 630.0, 582.0, 425.0, 576.0, 578.0, 627.0, 587.0, 639.0, 636.0, 582.0, 630.0, 630.0, 525.0, 630.0, 579.0, 579.0, 630.0, 587.0, 630.0, 582.0, 636.0, 630.0, 633.0, 582.0, 630.0, 576.0, 582.0, 633.0, 573.0, 587.0, 582.0, 630.0, 633.0, 582.0, 633.0, 530.0, 636.0, 582.0, 636.0, 584.0, 630.0, 633.0, 630.0, 582.0, 630.0, 521.0, 587.0, 627.0, 576.0, 630.0, 579.0, 630.0, 582.0, 584.0, 636.0, 636.0, 576.0, 633.0, 587.0, 633.0, 630.0, 636.0, 596.0, 576.0, 636.0, 633.0, 630.0, 582.0, 582.0, 627.0, 630.0, 627.0, 639.0, 573.0, 627.0, 633.0, 582.0, 582.0, 582.0, 561.0, 627.0, 636.0, 636.0, 636.0, 584.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 306.0, 321.0, 317.0, 319.0, 308.0, 319.0, 286.0, 296.0, 296.0, 286.0, 301.0, 286.0, 309.0, 321.0, 313.0, 317.0, 327.0, 309.0, 320.0, 313.0, 319.0, 314.0, 293.0, 289.0, 319.0, 314.0, 314.0, 316.0, 293.0, 289.0, 214.0, 211.0, 299.0, 277.0, 295.0, 283.0, 308.0, 319.0, 298.0, 289.0, 330.0, 309.0, 316.0, 320.0, 291.0, 291.0, 314.0, 316.0, 311.0, 319.0, 260.0, 265.0, 318.0, 312.0, 291.0, 288.0, 295.0, 284.0, 309.0, 321.0, 295.0, 292.0, 319.0, 311.0, 292.0, 290.0, 319.0, 317.0, 319.0, 311.0, 319.0, 314.0, 295.0, 287.0, 314.0, 316.0, 291.0, 285.0, 288.0, 294.0, 319.0, 314.0, 279.0, 294.0, 290.0, 297.0, 288.0, 294.0, 303.0, 327.0, 324.0, 309.0, 286.0, 296.0, 316.0, 317.0, 261.0, 269.0, 322.0, 314.0, 288.0, 294.0, 317.0, 319.0, 296.0, 288.0, 316.0, 314.0, 321.0, 312.0, 301.0, 329.0, 293.0, 289.0, 313.0, 317.0, 259.0, 262.0, 293.0, 294.0, 308.0, 319.0, 283.0, 293.0, 308.0, 322.0, 297.0, 282.0, 321.0, 309.0, 303.0, 279.0, 302.0, 282.0, 319.0, 317.0, 309.0, 327.0, 270.0, 306.0, 305.0, 328.0, 286.0, 301.0, 322.0, 311.0, 308.0, 322.0, 314.0, 322.0, 299.0, 297.0, 293.0, 283.0, 316.0, 320.0, 319.0, 314.0, 311.0, 319.0, 288.0, 294.0, 298.0, 284.0, 312.0, 315.0, 324.0, 306.0, 311.0, 316.0, 322.0, 317.0, 297.0, 276.0, 307.0, 320.0, 321.0, 312.0, 280.0, 302.0, 304.0, 278.0, 300.0, 282.0, 266.0, 295.0, 303.0, 324.0, 324.0, 312.0, 324.0, 312.0, 318.0, 318.0, 296.0, 288.0, 323.0, 310.0]}, "sampler_perf": {"mean_env_wait_ms": 1.053454756557531, "mean_processing_ms": 0.27887402151447716, "mean_inference_ms": 1.6161016017922192}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5976000, "num_steps_sampled": 3187200, "sample_time_ms": 21826.606, "load_time_ms": 36.816, "grad_time_ms": 9749.499, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0024596769362688065, "policy_loss": -0.005453174468129873, "vf_loss": 84.88723754882812, "vf_explained_var": 0.7672951221466064, "kl": 0.0021601892076432705, "entropy": 1.1517353057861328, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3187200, "episodes_total": 7968, "training_iteration": 249, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-50-33", "timestamp": 1660254633, "time_this_iter_s": 32.3484160900116, "time_total_s": 13047.39295053482, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13047.39295053482, "timesteps_since_restore": 3187200, "iterations_since_restore": 249, "perf": {"cpu_util_percent": 29.615217391304352, "ram_util_percent": 58.70434782608695}}
+{"episode_reward_max": 639.0, "episode_reward_min": 425.0, "episode_reward_mean": 604.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 211.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 302.285}, "custom_metrics": {"sparse_reward_mean": 209.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.77, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.29, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.5, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.63, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.89, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.14, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.47, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.13, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.48, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.14, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.47, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.14, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.47, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 584.0, 624.0, 570.0, 579.0, 587.0, 567.0, 627.0, 587.0, 576.0, 636.0, 630.0, 539.0, 627.0, 639.0, 636.0, 587.0, 630.0, 570.0, 570.0, 584.0, 621.0, 582.0, 627.0, 582.0, 627.0, 579.0, 533.0, 630.0, 630.0, 579.0, 596.0, 579.0, 630.0, 582.0, 584.0, 636.0, 636.0, 576.0, 633.0, 587.0, 633.0, 630.0, 636.0, 596.0, 576.0, 636.0, 633.0, 630.0, 582.0, 582.0, 627.0, 630.0, 627.0, 639.0, 573.0, 627.0, 633.0, 582.0, 582.0, 582.0, 561.0, 627.0, 636.0, 636.0, 636.0, 584.0, 633.0, 633.0, 627.0, 636.0, 627.0, 582.0, 582.0, 587.0, 630.0, 630.0, 636.0, 633.0, 633.0, 582.0, 633.0, 630.0, 582.0, 425.0, 576.0, 578.0, 627.0, 587.0, 639.0, 636.0, 582.0, 630.0, 630.0, 525.0, 630.0, 579.0, 579.0, 630.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 306.0, 291.0, 293.0, 310.0, 314.0, 280.0, 290.0, 296.0, 283.0, 285.0, 302.0, 287.0, 280.0, 318.0, 309.0, 277.0, 310.0, 292.0, 284.0, 314.0, 322.0, 306.0, 324.0, 278.0, 261.0, 309.0, 318.0, 319.0, 320.0, 322.0, 314.0, 286.0, 301.0, 326.0, 304.0, 295.0, 275.0, 292.0, 278.0, 295.0, 289.0, 308.0, 313.0, 290.0, 292.0, 305.0, 322.0, 291.0, 291.0, 327.0, 300.0, 279.0, 300.0, 263.0, 270.0, 316.0, 314.0, 313.0, 317.0, 296.0, 283.0, 302.0, 294.0, 297.0, 282.0, 321.0, 309.0, 303.0, 279.0, 302.0, 282.0, 319.0, 317.0, 309.0, 327.0, 270.0, 306.0, 305.0, 328.0, 286.0, 301.0, 322.0, 311.0, 308.0, 322.0, 314.0, 322.0, 299.0, 297.0, 293.0, 283.0, 316.0, 320.0, 319.0, 314.0, 311.0, 319.0, 288.0, 294.0, 298.0, 284.0, 312.0, 315.0, 324.0, 306.0, 311.0, 316.0, 322.0, 317.0, 297.0, 276.0, 307.0, 320.0, 321.0, 312.0, 280.0, 302.0, 304.0, 278.0, 300.0, 282.0, 266.0, 295.0, 303.0, 324.0, 324.0, 312.0, 324.0, 312.0, 318.0, 318.0, 296.0, 288.0, 323.0, 310.0, 319.0, 314.0, 306.0, 321.0, 317.0, 319.0, 308.0, 319.0, 286.0, 296.0, 296.0, 286.0, 301.0, 286.0, 309.0, 321.0, 313.0, 317.0, 327.0, 309.0, 320.0, 313.0, 319.0, 314.0, 293.0, 289.0, 319.0, 314.0, 314.0, 316.0, 293.0, 289.0, 214.0, 211.0, 299.0, 277.0, 295.0, 283.0, 308.0, 319.0, 298.0, 289.0, 330.0, 309.0, 316.0, 320.0, 291.0, 291.0, 314.0, 316.0, 311.0, 319.0, 260.0, 265.0, 318.0, 312.0, 291.0, 288.0, 295.0, 284.0, 309.0, 321.0, 295.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0510667862182812, "mean_processing_ms": 0.27840025138599184, "mean_inference_ms": 1.6143808795038155}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6000000, "num_steps_sampled": 3200000, "sample_time_ms": 22115.848, "load_time_ms": 36.609, "grad_time_ms": 9825.426, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0038353295531123877, "policy_loss": -0.004546869080513716, "vf_loss": 89.5432357788086, "vf_explained_var": 0.7639234662055969, "kl": 0.002313032979145646, "entropy": 1.144262671470642, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3200000, "episodes_total": 8000, "training_iteration": 250, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-51-06", "timestamp": 1660254666, "time_this_iter_s": 33.03909492492676, "time_total_s": 13080.432045459747, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13080.432045459747, "timesteps_since_restore": 3200000, "iterations_since_restore": 250, "perf": {"cpu_util_percent": 30.089130434782607, "ram_util_percent": 58.62826086956523}}
+{"episode_reward_max": 639.0, "episode_reward_min": 425.0, "episode_reward_mean": 605.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 211.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 302.845}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 186.09, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.03, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.8, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.08, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.77, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.94, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.55, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.94, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.94, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 579.0, 627.0, 630.0, 633.0, 633.0, 587.0, 639.0, 582.0, 581.0, 587.0, 624.0, 633.0, 621.0, 582.0, 639.0, 636.0, 582.0, 587.0, 627.0, 582.0, 582.0, 627.0, 639.0, 630.0, 630.0, 587.0, 630.0, 627.0, 630.0, 630.0, 582.0, 636.0, 636.0, 584.0, 633.0, 633.0, 627.0, 636.0, 627.0, 582.0, 582.0, 587.0, 630.0, 630.0, 636.0, 633.0, 633.0, 582.0, 633.0, 630.0, 582.0, 425.0, 576.0, 578.0, 627.0, 587.0, 639.0, 636.0, 582.0, 630.0, 630.0, 525.0, 630.0, 579.0, 579.0, 630.0, 587.0, 627.0, 584.0, 624.0, 570.0, 579.0, 587.0, 567.0, 627.0, 587.0, 576.0, 636.0, 630.0, 539.0, 627.0, 639.0, 636.0, 587.0, 630.0, 570.0, 570.0, 584.0, 621.0, 582.0, 627.0, 582.0, 627.0, 579.0, 533.0, 630.0, 630.0, 579.0, 596.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 304.0, 275.0, 320.0, 307.0, 316.0, 314.0, 312.0, 321.0, 316.0, 317.0, 295.0, 292.0, 312.0, 327.0, 294.0, 288.0, 284.0, 297.0, 295.0, 292.0, 302.0, 322.0, 316.0, 317.0, 308.0, 313.0, 291.0, 291.0, 322.0, 317.0, 319.0, 317.0, 291.0, 291.0, 290.0, 297.0, 307.0, 320.0, 290.0, 292.0, 280.0, 302.0, 319.0, 308.0, 316.0, 323.0, 313.0, 317.0, 305.0, 325.0, 296.0, 291.0, 313.0, 317.0, 309.0, 318.0, 311.0, 319.0, 308.0, 322.0, 289.0, 293.0, 324.0, 312.0, 318.0, 318.0, 296.0, 288.0, 323.0, 310.0, 319.0, 314.0, 306.0, 321.0, 317.0, 319.0, 308.0, 319.0, 286.0, 296.0, 296.0, 286.0, 301.0, 286.0, 309.0, 321.0, 313.0, 317.0, 327.0, 309.0, 320.0, 313.0, 319.0, 314.0, 293.0, 289.0, 319.0, 314.0, 314.0, 316.0, 293.0, 289.0, 214.0, 211.0, 299.0, 277.0, 295.0, 283.0, 308.0, 319.0, 298.0, 289.0, 330.0, 309.0, 316.0, 320.0, 291.0, 291.0, 314.0, 316.0, 311.0, 319.0, 260.0, 265.0, 318.0, 312.0, 291.0, 288.0, 295.0, 284.0, 309.0, 321.0, 295.0, 292.0, 321.0, 306.0, 291.0, 293.0, 310.0, 314.0, 280.0, 290.0, 296.0, 283.0, 285.0, 302.0, 287.0, 280.0, 318.0, 309.0, 277.0, 310.0, 292.0, 284.0, 314.0, 322.0, 306.0, 324.0, 278.0, 261.0, 309.0, 318.0, 319.0, 320.0, 322.0, 314.0, 286.0, 301.0, 326.0, 304.0, 295.0, 275.0, 292.0, 278.0, 295.0, 289.0, 308.0, 313.0, 290.0, 292.0, 305.0, 322.0, 291.0, 291.0, 327.0, 300.0, 279.0, 300.0, 263.0, 270.0, 316.0, 314.0, 313.0, 317.0, 296.0, 283.0, 302.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0486945411620672, "mean_processing_ms": 0.2779294488920563, "mean_inference_ms": 1.612487194421866}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6024000, "num_steps_sampled": 3212800, "sample_time_ms": 22207.762, "load_time_ms": 36.674, "grad_time_ms": 9691.515, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005205323453992605, "policy_loss": -0.0029822138603776693, "vf_loss": 87.57022857666016, "vf_explained_var": 0.7586490511894226, "kl": 0.0020639507565647364, "entropy": 1.1389611959457397, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3212800, "episodes_total": 8032, "training_iteration": 251, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-51-37", "timestamp": 1660254697, "time_this_iter_s": 31.857529878616333, "time_total_s": 13112.289575338364, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13112.289575338364, "timesteps_since_restore": 3212800, "iterations_since_restore": 251, "perf": {"cpu_util_percent": 30.984444444444442, "ram_util_percent": 58.69777777777778}}
+{"episode_reward_max": 639.0, "episode_reward_min": 518.0, "episode_reward_mean": 606.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.325}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.25, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.82, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.86, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.27, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.22, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.66, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.8, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.8, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.8, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 630.0, 633.0, 633.0, 627.0, 582.0, 579.0, 630.0, 633.0, 633.0, 587.0, 633.0, 633.0, 636.0, 582.0, 570.0, 590.0, 579.0, 576.0, 630.0, 581.0, 579.0, 518.0, 636.0, 636.0, 633.0, 576.0, 590.0, 633.0, 636.0, 633.0, 579.0, 579.0, 630.0, 587.0, 627.0, 584.0, 624.0, 570.0, 579.0, 587.0, 567.0, 627.0, 587.0, 576.0, 636.0, 630.0, 539.0, 627.0, 639.0, 636.0, 587.0, 630.0, 570.0, 570.0, 584.0, 621.0, 582.0, 627.0, 582.0, 627.0, 579.0, 533.0, 630.0, 630.0, 579.0, 596.0, 630.0, 579.0, 627.0, 630.0, 633.0, 633.0, 587.0, 639.0, 582.0, 581.0, 587.0, 624.0, 633.0, 621.0, 582.0, 639.0, 636.0, 582.0, 587.0, 627.0, 582.0, 582.0, 627.0, 639.0, 630.0, 630.0, 587.0, 630.0, 627.0, 630.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 317.0, 313.0, 312.0, 318.0, 314.0, 319.0, 317.0, 316.0, 323.0, 304.0, 292.0, 290.0, 289.0, 290.0, 316.0, 314.0, 306.0, 327.0, 316.0, 317.0, 299.0, 288.0, 311.0, 322.0, 319.0, 314.0, 322.0, 314.0, 296.0, 286.0, 291.0, 279.0, 296.0, 294.0, 291.0, 288.0, 288.0, 288.0, 313.0, 317.0, 290.0, 291.0, 294.0, 285.0, 258.0, 260.0, 311.0, 325.0, 316.0, 320.0, 315.0, 318.0, 282.0, 294.0, 301.0, 289.0, 318.0, 315.0, 314.0, 322.0, 316.0, 317.0, 291.0, 288.0, 295.0, 284.0, 309.0, 321.0, 295.0, 292.0, 321.0, 306.0, 291.0, 293.0, 310.0, 314.0, 280.0, 290.0, 296.0, 283.0, 285.0, 302.0, 287.0, 280.0, 318.0, 309.0, 277.0, 310.0, 292.0, 284.0, 314.0, 322.0, 306.0, 324.0, 278.0, 261.0, 309.0, 318.0, 319.0, 320.0, 322.0, 314.0, 286.0, 301.0, 326.0, 304.0, 295.0, 275.0, 292.0, 278.0, 295.0, 289.0, 308.0, 313.0, 290.0, 292.0, 305.0, 322.0, 291.0, 291.0, 327.0, 300.0, 279.0, 300.0, 263.0, 270.0, 316.0, 314.0, 313.0, 317.0, 296.0, 283.0, 302.0, 294.0, 311.0, 319.0, 304.0, 275.0, 320.0, 307.0, 316.0, 314.0, 312.0, 321.0, 316.0, 317.0, 295.0, 292.0, 312.0, 327.0, 294.0, 288.0, 284.0, 297.0, 295.0, 292.0, 302.0, 322.0, 316.0, 317.0, 308.0, 313.0, 291.0, 291.0, 322.0, 317.0, 319.0, 317.0, 291.0, 291.0, 290.0, 297.0, 307.0, 320.0, 290.0, 292.0, 280.0, 302.0, 319.0, 308.0, 316.0, 323.0, 313.0, 317.0, 305.0, 325.0, 296.0, 291.0, 313.0, 317.0, 309.0, 318.0, 311.0, 319.0, 308.0, 322.0, 289.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0463435116537838, "mean_processing_ms": 0.277465645143399, "mean_inference_ms": 1.6108149086882515}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6048000, "num_steps_sampled": 3225600, "sample_time_ms": 22477.661, "load_time_ms": 36.527, "grad_time_ms": 9715.891, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003005747450515628, "policy_loss": -0.005394397769123316, "vf_loss": 89.67745208740234, "vf_explained_var": 0.7541216015815735, "kl": 0.0018617714522406459, "entropy": 1.1351839303970337, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3225600, "episodes_total": 8064, "training_iteration": 252, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-52-11", "timestamp": 1660254731, "time_this_iter_s": 33.91162323951721, "time_total_s": 13146.20119857788, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13146.20119857788, "timesteps_since_restore": 3225600, "iterations_since_restore": 252, "perf": {"cpu_util_percent": 29.32083333333333, "ram_util_percent": 58.725}}
+{"episode_reward_max": 639.0, "episode_reward_min": 473.0, "episode_reward_mean": 603.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 301.78}, "custom_metrics": {"sparse_reward_mean": 208.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.96, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.81, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.78, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.23, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.71, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.66, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.78, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.85, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.51, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.78, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.85, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.78, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.85, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 567.0, 636.0, 570.0, 633.0, 633.0, 582.0, 582.0, 587.0, 579.0, 536.0, 587.0, 587.0, 567.0, 630.0, 587.0, 539.0, 582.0, 570.0, 473.0, 627.0, 627.0, 587.0, 587.0, 633.0, 581.0, 579.0, 576.0, 587.0, 636.0, 582.0, 582.0, 630.0, 630.0, 579.0, 596.0, 630.0, 579.0, 627.0, 630.0, 633.0, 633.0, 587.0, 639.0, 582.0, 581.0, 587.0, 624.0, 633.0, 621.0, 582.0, 639.0, 636.0, 582.0, 587.0, 627.0, 582.0, 582.0, 627.0, 639.0, 630.0, 630.0, 587.0, 630.0, 627.0, 630.0, 630.0, 582.0, 636.0, 630.0, 630.0, 633.0, 633.0, 627.0, 582.0, 579.0, 630.0, 633.0, 633.0, 587.0, 633.0, 633.0, 636.0, 582.0, 570.0, 590.0, 579.0, 576.0, 630.0, 581.0, 579.0, 518.0, 636.0, 636.0, 633.0, 576.0, 590.0, 633.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 285.0, 282.0, 319.0, 317.0, 278.0, 292.0, 321.0, 312.0, 318.0, 315.0, 288.0, 294.0, 290.0, 292.0, 303.0, 284.0, 299.0, 280.0, 268.0, 268.0, 288.0, 299.0, 291.0, 296.0, 278.0, 289.0, 315.0, 315.0, 295.0, 292.0, 265.0, 274.0, 294.0, 288.0, 288.0, 282.0, 242.0, 231.0, 316.0, 311.0, 319.0, 308.0, 288.0, 299.0, 303.0, 284.0, 317.0, 316.0, 289.0, 292.0, 283.0, 296.0, 287.0, 289.0, 293.0, 294.0, 314.0, 322.0, 289.0, 293.0, 296.0, 286.0, 316.0, 314.0, 313.0, 317.0, 296.0, 283.0, 302.0, 294.0, 311.0, 319.0, 304.0, 275.0, 320.0, 307.0, 316.0, 314.0, 312.0, 321.0, 316.0, 317.0, 295.0, 292.0, 312.0, 327.0, 294.0, 288.0, 284.0, 297.0, 295.0, 292.0, 302.0, 322.0, 316.0, 317.0, 308.0, 313.0, 291.0, 291.0, 322.0, 317.0, 319.0, 317.0, 291.0, 291.0, 290.0, 297.0, 307.0, 320.0, 290.0, 292.0, 280.0, 302.0, 319.0, 308.0, 316.0, 323.0, 313.0, 317.0, 305.0, 325.0, 296.0, 291.0, 313.0, 317.0, 309.0, 318.0, 311.0, 319.0, 308.0, 322.0, 289.0, 293.0, 319.0, 317.0, 317.0, 313.0, 312.0, 318.0, 314.0, 319.0, 317.0, 316.0, 323.0, 304.0, 292.0, 290.0, 289.0, 290.0, 316.0, 314.0, 306.0, 327.0, 316.0, 317.0, 299.0, 288.0, 311.0, 322.0, 319.0, 314.0, 322.0, 314.0, 296.0, 286.0, 291.0, 279.0, 296.0, 294.0, 291.0, 288.0, 288.0, 288.0, 313.0, 317.0, 290.0, 291.0, 294.0, 285.0, 258.0, 260.0, 311.0, 325.0, 316.0, 320.0, 315.0, 318.0, 282.0, 294.0, 301.0, 289.0, 318.0, 315.0, 314.0, 322.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0439919117658958, "mean_processing_ms": 0.2769978233028824, "mean_inference_ms": 1.608730530277712}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6072000, "num_steps_sampled": 3238400, "sample_time_ms": 22102.374, "load_time_ms": 36.62, "grad_time_ms": 9528.927, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.007198518142104149, "policy_loss": -0.002070576651021838, "vf_loss": 98.38677215576172, "vf_explained_var": 0.7492752075195312, "kl": 0.0017245132476091385, "entropy": 1.1391605138778687, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3238400, "episodes_total": 8096, "training_iteration": 253, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-52-38", "timestamp": 1660254758, "time_this_iter_s": 26.443045139312744, "time_total_s": 13172.644243717194, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13172.644243717194, "timesteps_since_restore": 3238400, "iterations_since_restore": 253, "perf": {"cpu_util_percent": 30.831578947368424, "ram_util_percent": 58.665789473684214}}
+{"episode_reward_max": 639.0, "episode_reward_min": 473.0, "episode_reward_mean": 603.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 301.77}, "custom_metrics": {"sparse_reward_mean": 209.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.54, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.9, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.7, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.96, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.71, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.87, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.74, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.87, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.74, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.87, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.74, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 584.0, 636.0, 630.0, 627.0, 630.0, 561.0, 633.0, 579.0, 627.0, 582.0, 579.0, 582.0, 627.0, 582.0, 636.0, 630.0, 630.0, 582.0, 633.0, 582.0, 636.0, 633.0, 627.0, 582.0, 630.0, 573.0, 587.0, 627.0, 633.0, 627.0, 639.0, 627.0, 630.0, 630.0, 582.0, 636.0, 630.0, 630.0, 633.0, 633.0, 627.0, 582.0, 579.0, 630.0, 633.0, 633.0, 587.0, 633.0, 633.0, 636.0, 582.0, 570.0, 590.0, 579.0, 576.0, 630.0, 581.0, 579.0, 518.0, 636.0, 636.0, 633.0, 576.0, 590.0, 633.0, 636.0, 633.0, 582.0, 567.0, 636.0, 570.0, 633.0, 633.0, 582.0, 582.0, 587.0, 579.0, 536.0, 587.0, 587.0, 567.0, 630.0, 587.0, 539.0, 582.0, 570.0, 473.0, 627.0, 627.0, 587.0, 587.0, 633.0, 581.0, 579.0, 576.0, 587.0, 636.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 304.0, 280.0, 327.0, 309.0, 316.0, 314.0, 316.0, 311.0, 316.0, 314.0, 269.0, 292.0, 316.0, 317.0, 285.0, 294.0, 313.0, 314.0, 288.0, 294.0, 298.0, 281.0, 293.0, 289.0, 324.0, 303.0, 286.0, 296.0, 316.0, 320.0, 316.0, 314.0, 313.0, 317.0, 290.0, 292.0, 314.0, 319.0, 285.0, 297.0, 317.0, 319.0, 324.0, 309.0, 310.0, 317.0, 299.0, 283.0, 323.0, 307.0, 282.0, 291.0, 294.0, 293.0, 308.0, 319.0, 332.0, 301.0, 309.0, 318.0, 314.0, 325.0, 309.0, 318.0, 311.0, 319.0, 308.0, 322.0, 289.0, 293.0, 319.0, 317.0, 317.0, 313.0, 312.0, 318.0, 314.0, 319.0, 317.0, 316.0, 323.0, 304.0, 292.0, 290.0, 289.0, 290.0, 316.0, 314.0, 306.0, 327.0, 316.0, 317.0, 299.0, 288.0, 311.0, 322.0, 319.0, 314.0, 322.0, 314.0, 296.0, 286.0, 291.0, 279.0, 296.0, 294.0, 291.0, 288.0, 288.0, 288.0, 313.0, 317.0, 290.0, 291.0, 294.0, 285.0, 258.0, 260.0, 311.0, 325.0, 316.0, 320.0, 315.0, 318.0, 282.0, 294.0, 301.0, 289.0, 318.0, 315.0, 314.0, 322.0, 316.0, 317.0, 288.0, 294.0, 285.0, 282.0, 319.0, 317.0, 278.0, 292.0, 321.0, 312.0, 318.0, 315.0, 288.0, 294.0, 290.0, 292.0, 303.0, 284.0, 299.0, 280.0, 268.0, 268.0, 288.0, 299.0, 291.0, 296.0, 278.0, 289.0, 315.0, 315.0, 295.0, 292.0, 265.0, 274.0, 294.0, 288.0, 288.0, 282.0, 242.0, 231.0, 316.0, 311.0, 319.0, 308.0, 288.0, 299.0, 303.0, 284.0, 317.0, 316.0, 289.0, 292.0, 283.0, 296.0, 287.0, 289.0, 293.0, 294.0, 314.0, 322.0, 289.0, 293.0, 296.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.041648317929554, "mean_processing_ms": 0.27652981742042726, "mean_inference_ms": 1.6063886219680592}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6096000, "num_steps_sampled": 3251200, "sample_time_ms": 22038.921, "load_time_ms": 36.872, "grad_time_ms": 9318.447, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0028869707603007555, "policy_loss": -0.005314534064382315, "vf_loss": 87.72003936767578, "vf_explained_var": 0.7518091797828674, "kl": 0.0020599865820258856, "entropy": 1.1409815549850464, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3251200, "episodes_total": 8128, "training_iteration": 254, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-53-05", "timestamp": 1660254785, "time_this_iter_s": 27.532819986343384, "time_total_s": 13200.177063703537, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13200.177063703537, "timesteps_since_restore": 3251200, "iterations_since_restore": 254, "perf": {"cpu_util_percent": 32.46923076923077, "ram_util_percent": 58.67179487179486}}
+{"episode_reward_max": 639.0, "episode_reward_min": 473.0, "episode_reward_mean": 606.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 303.43}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.06, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.02, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.74, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.94, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.72, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.69, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.69, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.43, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.49, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.74, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.69, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.69, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 639.0, 630.0, 570.0, 636.0, 584.0, 636.0, 633.0, 624.0, 579.0, 627.0, 630.0, 582.0, 639.0, 627.0, 630.0, 627.0, 627.0, 630.0, 630.0, 624.0, 587.0, 636.0, 633.0, 567.0, 630.0, 627.0, 630.0, 630.0, 636.0, 630.0, 636.0, 590.0, 633.0, 636.0, 633.0, 582.0, 567.0, 636.0, 570.0, 633.0, 633.0, 582.0, 582.0, 587.0, 579.0, 536.0, 587.0, 587.0, 567.0, 630.0, 587.0, 539.0, 582.0, 570.0, 473.0, 627.0, 627.0, 587.0, 587.0, 633.0, 581.0, 579.0, 576.0, 587.0, 636.0, 582.0, 582.0, 633.0, 584.0, 636.0, 630.0, 627.0, 630.0, 561.0, 633.0, 579.0, 627.0, 582.0, 579.0, 582.0, 627.0, 582.0, 636.0, 630.0, 630.0, 582.0, 633.0, 582.0, 636.0, 633.0, 627.0, 582.0, 630.0, 573.0, 587.0, 627.0, 633.0, 627.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 298.0, 314.0, 325.0, 306.0, 324.0, 291.0, 279.0, 327.0, 309.0, 296.0, 288.0, 323.0, 313.0, 316.0, 317.0, 313.0, 311.0, 298.0, 281.0, 316.0, 311.0, 316.0, 314.0, 294.0, 288.0, 322.0, 317.0, 316.0, 311.0, 319.0, 311.0, 320.0, 307.0, 307.0, 320.0, 313.0, 317.0, 317.0, 313.0, 307.0, 317.0, 294.0, 293.0, 319.0, 317.0, 310.0, 323.0, 285.0, 282.0, 314.0, 316.0, 314.0, 313.0, 309.0, 321.0, 313.0, 317.0, 319.0, 317.0, 321.0, 309.0, 315.0, 321.0, 301.0, 289.0, 318.0, 315.0, 314.0, 322.0, 316.0, 317.0, 288.0, 294.0, 285.0, 282.0, 319.0, 317.0, 278.0, 292.0, 321.0, 312.0, 318.0, 315.0, 288.0, 294.0, 290.0, 292.0, 303.0, 284.0, 299.0, 280.0, 268.0, 268.0, 288.0, 299.0, 291.0, 296.0, 278.0, 289.0, 315.0, 315.0, 295.0, 292.0, 265.0, 274.0, 294.0, 288.0, 288.0, 282.0, 242.0, 231.0, 316.0, 311.0, 319.0, 308.0, 288.0, 299.0, 303.0, 284.0, 317.0, 316.0, 289.0, 292.0, 283.0, 296.0, 287.0, 289.0, 293.0, 294.0, 314.0, 322.0, 289.0, 293.0, 296.0, 286.0, 314.0, 319.0, 304.0, 280.0, 327.0, 309.0, 316.0, 314.0, 316.0, 311.0, 316.0, 314.0, 269.0, 292.0, 316.0, 317.0, 285.0, 294.0, 313.0, 314.0, 288.0, 294.0, 298.0, 281.0, 293.0, 289.0, 324.0, 303.0, 286.0, 296.0, 316.0, 320.0, 316.0, 314.0, 313.0, 317.0, 290.0, 292.0, 314.0, 319.0, 285.0, 297.0, 317.0, 319.0, 324.0, 309.0, 310.0, 317.0, 299.0, 283.0, 323.0, 307.0, 282.0, 291.0, 294.0, 293.0, 308.0, 319.0, 332.0, 301.0, 309.0, 318.0, 314.0, 325.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0393196321657538, "mean_processing_ms": 0.2760628271968967, "mean_inference_ms": 1.603750642060521}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6120000, "num_steps_sampled": 3264000, "sample_time_ms": 22112.846, "load_time_ms": 37.19, "grad_time_ms": 9333.009, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005571722984313965, "policy_loss": -0.0022253356873989105, "vf_loss": 83.61035919189453, "vf_explained_var": 0.7589413523674011, "kl": 0.0018155118450522423, "entropy": 1.1279449462890625, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3264000, "episodes_total": 8160, "training_iteration": 255, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-53-36", "timestamp": 1660254816, "time_this_iter_s": 31.015226125717163, "time_total_s": 13231.192289829254, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13231.192289829254, "timesteps_since_restore": 3264000, "iterations_since_restore": 255, "perf": {"cpu_util_percent": 32.91162790697674, "ram_util_percent": 59.26976744186045}}
+{"episode_reward_max": 639.0, "episode_reward_min": 530.0, "episode_reward_mean": 612.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 265.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 306.185}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.17, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.01, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.92, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.1, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.76, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.12, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.81, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.41, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.53, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.66, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.12, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.81, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.12, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.81, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 639.0, 630.0, 639.0, 636.0, 569.0, 630.0, 579.0, 633.0, 576.0, 636.0, 579.0, 587.0, 627.0, 630.0, 579.0, 587.0, 633.0, 639.0, 630.0, 633.0, 636.0, 630.0, 630.0, 633.0, 576.0, 539.0, 630.0, 552.0, 590.0, 582.0, 630.0, 587.0, 636.0, 582.0, 582.0, 633.0, 584.0, 636.0, 630.0, 627.0, 630.0, 561.0, 633.0, 579.0, 627.0, 582.0, 579.0, 582.0, 627.0, 582.0, 636.0, 630.0, 630.0, 582.0, 633.0, 582.0, 636.0, 633.0, 627.0, 582.0, 630.0, 573.0, 587.0, 627.0, 633.0, 627.0, 639.0, 576.0, 639.0, 630.0, 570.0, 636.0, 584.0, 636.0, 633.0, 624.0, 579.0, 627.0, 630.0, 582.0, 639.0, 627.0, 630.0, 627.0, 627.0, 630.0, 630.0, 624.0, 587.0, 636.0, 633.0, 567.0, 630.0, 627.0, 630.0, 630.0, 636.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 265.0, 319.0, 320.0, 319.0, 311.0, 317.0, 322.0, 319.0, 317.0, 282.0, 287.0, 311.0, 319.0, 285.0, 294.0, 321.0, 312.0, 285.0, 291.0, 312.0, 324.0, 290.0, 289.0, 296.0, 291.0, 316.0, 311.0, 322.0, 308.0, 293.0, 286.0, 285.0, 302.0, 319.0, 314.0, 320.0, 319.0, 310.0, 320.0, 313.0, 320.0, 317.0, 319.0, 319.0, 311.0, 308.0, 322.0, 310.0, 323.0, 288.0, 288.0, 266.0, 273.0, 310.0, 320.0, 278.0, 274.0, 299.0, 291.0, 295.0, 287.0, 323.0, 307.0, 293.0, 294.0, 314.0, 322.0, 289.0, 293.0, 296.0, 286.0, 314.0, 319.0, 304.0, 280.0, 327.0, 309.0, 316.0, 314.0, 316.0, 311.0, 316.0, 314.0, 269.0, 292.0, 316.0, 317.0, 285.0, 294.0, 313.0, 314.0, 288.0, 294.0, 298.0, 281.0, 293.0, 289.0, 324.0, 303.0, 286.0, 296.0, 316.0, 320.0, 316.0, 314.0, 313.0, 317.0, 290.0, 292.0, 314.0, 319.0, 285.0, 297.0, 317.0, 319.0, 324.0, 309.0, 310.0, 317.0, 299.0, 283.0, 323.0, 307.0, 282.0, 291.0, 294.0, 293.0, 308.0, 319.0, 332.0, 301.0, 309.0, 318.0, 314.0, 325.0, 278.0, 298.0, 314.0, 325.0, 306.0, 324.0, 291.0, 279.0, 327.0, 309.0, 296.0, 288.0, 323.0, 313.0, 316.0, 317.0, 313.0, 311.0, 298.0, 281.0, 316.0, 311.0, 316.0, 314.0, 294.0, 288.0, 322.0, 317.0, 316.0, 311.0, 319.0, 311.0, 320.0, 307.0, 307.0, 320.0, 313.0, 317.0, 317.0, 313.0, 307.0, 317.0, 294.0, 293.0, 319.0, 317.0, 310.0, 323.0, 285.0, 282.0, 314.0, 316.0, 314.0, 313.0, 309.0, 321.0, 313.0, 317.0, 319.0, 317.0, 321.0, 309.0, 315.0, 321.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0370215932262006, "mean_processing_ms": 0.275602527422642, "mean_inference_ms": 1.6014130194901954}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6144000, "num_steps_sampled": 3276800, "sample_time_ms": 22185.118, "load_time_ms": 37.192, "grad_time_ms": 9265.444, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001450125128030777, "policy_loss": -0.006554553750902414, "vf_loss": 85.70040893554688, "vf_explained_var": 0.7625378966331482, "kl": 0.0019486347446218133, "entropy": 1.1307319402694702, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3276800, "episodes_total": 8192, "training_iteration": 256, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-54-07", "timestamp": 1660254847, "time_this_iter_s": 30.66650390625, "time_total_s": 13261.858793735504, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13261.858793735504, "timesteps_since_restore": 3276800, "iterations_since_restore": 256, "perf": {"cpu_util_percent": 32.206818181818186, "ram_util_percent": 58.78863636363636}}
+{"episode_reward_max": 639.0, "episode_reward_min": 476.0, "episode_reward_mean": 611.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 236.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 305.745}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.09, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.22, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.76, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.67, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.0, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.78, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.27, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.27, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.27, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.27, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 627.0, 579.0, 633.0, 627.0, 582.0, 579.0, 582.0, 576.0, 630.0, 579.0, 587.0, 630.0, 630.0, 582.0, 587.0, 576.0, 587.0, 476.0, 636.0, 633.0, 636.0, 636.0, 630.0, 630.0, 639.0, 636.0, 576.0, 627.0, 579.0, 582.0, 633.0, 627.0, 633.0, 627.0, 639.0, 576.0, 639.0, 630.0, 570.0, 636.0, 584.0, 636.0, 633.0, 624.0, 579.0, 627.0, 630.0, 582.0, 639.0, 627.0, 630.0, 627.0, 627.0, 630.0, 630.0, 624.0, 587.0, 636.0, 633.0, 567.0, 630.0, 627.0, 630.0, 630.0, 636.0, 630.0, 636.0, 530.0, 639.0, 630.0, 639.0, 636.0, 569.0, 630.0, 579.0, 633.0, 576.0, 636.0, 579.0, 587.0, 627.0, 630.0, 579.0, 587.0, 633.0, 639.0, 630.0, 633.0, 636.0, 630.0, 630.0, 633.0, 576.0, 539.0, 630.0, 552.0, 590.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 309.0, 313.0, 314.0, 290.0, 289.0, 322.0, 311.0, 310.0, 317.0, 294.0, 288.0, 294.0, 285.0, 292.0, 290.0, 288.0, 288.0, 319.0, 311.0, 284.0, 295.0, 291.0, 296.0, 314.0, 316.0, 311.0, 319.0, 291.0, 291.0, 297.0, 290.0, 292.0, 284.0, 287.0, 300.0, 236.0, 240.0, 316.0, 320.0, 310.0, 323.0, 317.0, 319.0, 324.0, 312.0, 311.0, 319.0, 316.0, 314.0, 319.0, 320.0, 316.0, 320.0, 285.0, 291.0, 313.0, 314.0, 289.0, 290.0, 290.0, 292.0, 319.0, 314.0, 308.0, 319.0, 332.0, 301.0, 309.0, 318.0, 314.0, 325.0, 278.0, 298.0, 314.0, 325.0, 306.0, 324.0, 291.0, 279.0, 327.0, 309.0, 296.0, 288.0, 323.0, 313.0, 316.0, 317.0, 313.0, 311.0, 298.0, 281.0, 316.0, 311.0, 316.0, 314.0, 294.0, 288.0, 322.0, 317.0, 316.0, 311.0, 319.0, 311.0, 320.0, 307.0, 307.0, 320.0, 313.0, 317.0, 317.0, 313.0, 307.0, 317.0, 294.0, 293.0, 319.0, 317.0, 310.0, 323.0, 285.0, 282.0, 314.0, 316.0, 314.0, 313.0, 309.0, 321.0, 313.0, 317.0, 319.0, 317.0, 321.0, 309.0, 315.0, 321.0, 265.0, 265.0, 319.0, 320.0, 319.0, 311.0, 317.0, 322.0, 319.0, 317.0, 282.0, 287.0, 311.0, 319.0, 285.0, 294.0, 321.0, 312.0, 285.0, 291.0, 312.0, 324.0, 290.0, 289.0, 296.0, 291.0, 316.0, 311.0, 322.0, 308.0, 293.0, 286.0, 285.0, 302.0, 319.0, 314.0, 320.0, 319.0, 310.0, 320.0, 313.0, 320.0, 317.0, 319.0, 319.0, 311.0, 308.0, 322.0, 310.0, 323.0, 288.0, 288.0, 266.0, 273.0, 310.0, 320.0, 278.0, 274.0, 299.0, 291.0, 295.0, 287.0, 323.0, 307.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0347705534764586, "mean_processing_ms": 0.2751542992936445, "mean_inference_ms": 1.5997483422105416}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6168000, "num_steps_sampled": 3289600, "sample_time_ms": 22403.591, "load_time_ms": 37.585, "grad_time_ms": 9159.695, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004127854947000742, "policy_loss": -0.004435718059539795, "vf_loss": 91.31246185302734, "vf_explained_var": 0.7648020386695862, "kl": 0.0019896463491022587, "entropy": 1.1353529691696167, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3289600, "episodes_total": 8224, "training_iteration": 257, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-54-43", "timestamp": 1660254883, "time_this_iter_s": 35.68418622016907, "time_total_s": 13297.542979955673, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13297.542979955673, "timesteps_since_restore": 3289600, "iterations_since_restore": 257, "perf": {"cpu_util_percent": 28.105999999999998, "ram_util_percent": 58.784}}
+{"episode_reward_max": 639.0, "episode_reward_min": 476.0, "episode_reward_mean": 605.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 236.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 302.555}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.51, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.34, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.68, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.63, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.82, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.32, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.33, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.06, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.32, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.33, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.32, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.33, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [533.0, 627.0, 633.0, 573.0, 627.0, 630.0, 636.0, 633.0, 582.0, 633.0, 527.0, 633.0, 582.0, 627.0, 582.0, 636.0, 627.0, 582.0, 587.0, 582.0, 630.0, 570.0, 630.0, 573.0, 630.0, 627.0, 633.0, 530.0, 630.0, 513.0, 582.0, 558.0, 630.0, 636.0, 630.0, 636.0, 530.0, 639.0, 630.0, 639.0, 636.0, 569.0, 630.0, 579.0, 633.0, 576.0, 636.0, 579.0, 587.0, 627.0, 630.0, 579.0, 587.0, 633.0, 639.0, 630.0, 633.0, 636.0, 630.0, 630.0, 633.0, 576.0, 539.0, 630.0, 552.0, 590.0, 582.0, 630.0, 630.0, 627.0, 579.0, 633.0, 627.0, 582.0, 579.0, 582.0, 576.0, 630.0, 579.0, 587.0, 630.0, 630.0, 582.0, 587.0, 576.0, 587.0, 476.0, 636.0, 633.0, 636.0, 636.0, 630.0, 630.0, 639.0, 636.0, 576.0, 627.0, 579.0, 582.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [266.0, 267.0, 308.0, 319.0, 309.0, 324.0, 288.0, 285.0, 313.0, 314.0, 311.0, 319.0, 322.0, 314.0, 314.0, 319.0, 298.0, 284.0, 319.0, 314.0, 265.0, 262.0, 316.0, 317.0, 284.0, 298.0, 316.0, 311.0, 284.0, 298.0, 309.0, 327.0, 313.0, 314.0, 299.0, 283.0, 287.0, 300.0, 290.0, 292.0, 316.0, 314.0, 289.0, 281.0, 316.0, 314.0, 281.0, 292.0, 310.0, 320.0, 306.0, 321.0, 326.0, 307.0, 270.0, 260.0, 319.0, 311.0, 260.0, 253.0, 289.0, 293.0, 289.0, 269.0, 313.0, 317.0, 319.0, 317.0, 321.0, 309.0, 315.0, 321.0, 265.0, 265.0, 319.0, 320.0, 319.0, 311.0, 317.0, 322.0, 319.0, 317.0, 282.0, 287.0, 311.0, 319.0, 285.0, 294.0, 321.0, 312.0, 285.0, 291.0, 312.0, 324.0, 290.0, 289.0, 296.0, 291.0, 316.0, 311.0, 322.0, 308.0, 293.0, 286.0, 285.0, 302.0, 319.0, 314.0, 320.0, 319.0, 310.0, 320.0, 313.0, 320.0, 317.0, 319.0, 319.0, 311.0, 308.0, 322.0, 310.0, 323.0, 288.0, 288.0, 266.0, 273.0, 310.0, 320.0, 278.0, 274.0, 299.0, 291.0, 295.0, 287.0, 323.0, 307.0, 321.0, 309.0, 313.0, 314.0, 290.0, 289.0, 322.0, 311.0, 310.0, 317.0, 294.0, 288.0, 294.0, 285.0, 292.0, 290.0, 288.0, 288.0, 319.0, 311.0, 284.0, 295.0, 291.0, 296.0, 314.0, 316.0, 311.0, 319.0, 291.0, 291.0, 297.0, 290.0, 292.0, 284.0, 287.0, 300.0, 236.0, 240.0, 316.0, 320.0, 310.0, 323.0, 317.0, 319.0, 324.0, 312.0, 311.0, 319.0, 316.0, 314.0, 319.0, 320.0, 316.0, 320.0, 285.0, 291.0, 313.0, 314.0, 289.0, 290.0, 290.0, 292.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 1.032534462731249, "mean_processing_ms": 0.2747100807108193, "mean_inference_ms": 1.5981869464429628}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6192000, "num_steps_sampled": 3302400, "sample_time_ms": 22164.935, "load_time_ms": 37.303, "grad_time_ms": 9157.709, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004157478455454111, "policy_loss": -0.004339639097452164, "vf_loss": 90.65621948242188, "vf_explained_var": 0.7486104965209961, "kl": 0.0018090683734044433, "entropy": 1.1370199918746948, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3302400, "episodes_total": 8256, "training_iteration": 258, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-55-14", "timestamp": 1660254914, "time_this_iter_s": 31.362817764282227, "time_total_s": 13328.905797719955, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13328.905797719955, "timesteps_since_restore": 3302400, "iterations_since_restore": 258, "perf": {"cpu_util_percent": 31.451111111111114, "ram_util_percent": 58.77555555555555}}
+{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 602.46, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 301.23}, "custom_metrics": {"sparse_reward_mean": 209.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.46, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.44, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.78, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.62, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.83, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.35, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.12, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.79, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.38, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.4, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.35, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.12, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.35, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.12, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 587.0, 573.0, 465.0, 581.0, 627.0, 633.0, 627.0, 576.0, 627.0, 639.0, 576.0, 636.0, 633.0, 555.0, 630.0, 636.0, 636.0, 627.0, 633.0, 582.0, 584.0, 584.0, 579.0, 633.0, 582.0, 627.0, 636.0, 627.0, 627.0, 587.0, 630.0, 552.0, 590.0, 582.0, 630.0, 630.0, 627.0, 579.0, 633.0, 627.0, 582.0, 579.0, 582.0, 576.0, 630.0, 579.0, 587.0, 630.0, 630.0, 582.0, 587.0, 576.0, 587.0, 476.0, 636.0, 633.0, 636.0, 636.0, 630.0, 630.0, 639.0, 636.0, 576.0, 627.0, 579.0, 582.0, 633.0, 533.0, 627.0, 633.0, 573.0, 627.0, 630.0, 636.0, 633.0, 582.0, 633.0, 527.0, 633.0, 582.0, 627.0, 582.0, 636.0, 627.0, 582.0, 587.0, 582.0, 630.0, 570.0, 630.0, 573.0, 630.0, 627.0, 633.0, 530.0, 630.0, 513.0, 582.0, 558.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 292.0, 296.0, 291.0, 283.0, 290.0, 234.0, 231.0, 290.0, 291.0, 321.0, 306.0, 305.0, 328.0, 316.0, 311.0, 271.0, 305.0, 311.0, 316.0, 311.0, 328.0, 287.0, 289.0, 316.0, 320.0, 313.0, 320.0, 281.0, 274.0, 316.0, 314.0, 324.0, 312.0, 325.0, 311.0, 313.0, 314.0, 314.0, 319.0, 287.0, 295.0, 303.0, 281.0, 290.0, 294.0, 293.0, 286.0, 321.0, 312.0, 297.0, 285.0, 320.0, 307.0, 314.0, 322.0, 313.0, 314.0, 311.0, 316.0, 301.0, 286.0, 319.0, 311.0, 278.0, 274.0, 299.0, 291.0, 295.0, 287.0, 323.0, 307.0, 321.0, 309.0, 313.0, 314.0, 290.0, 289.0, 322.0, 311.0, 310.0, 317.0, 294.0, 288.0, 294.0, 285.0, 292.0, 290.0, 288.0, 288.0, 319.0, 311.0, 284.0, 295.0, 291.0, 296.0, 314.0, 316.0, 311.0, 319.0, 291.0, 291.0, 297.0, 290.0, 292.0, 284.0, 287.0, 300.0, 236.0, 240.0, 316.0, 320.0, 310.0, 323.0, 317.0, 319.0, 324.0, 312.0, 311.0, 319.0, 316.0, 314.0, 319.0, 320.0, 316.0, 320.0, 285.0, 291.0, 313.0, 314.0, 289.0, 290.0, 290.0, 292.0, 319.0, 314.0, 266.0, 267.0, 308.0, 319.0, 309.0, 324.0, 288.0, 285.0, 313.0, 314.0, 311.0, 319.0, 322.0, 314.0, 314.0, 319.0, 298.0, 284.0, 319.0, 314.0, 265.0, 262.0, 316.0, 317.0, 284.0, 298.0, 316.0, 311.0, 284.0, 298.0, 309.0, 327.0, 313.0, 314.0, 299.0, 283.0, 287.0, 300.0, 290.0, 292.0, 316.0, 314.0, 289.0, 281.0, 316.0, 314.0, 281.0, 292.0, 310.0, 320.0, 306.0, 321.0, 326.0, 307.0, 270.0, 260.0, 319.0, 311.0, 260.0, 253.0, 289.0, 293.0, 289.0, 269.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0303222005081312, "mean_processing_ms": 0.27427243552224245, "mean_inference_ms": 1.5968410183934156}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6216000, "num_steps_sampled": 3315200, "sample_time_ms": 22358.82, "load_time_ms": 37.268, "grad_time_ms": 9112.824, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012977579608559608, "policy_loss": -0.0070681399665772915, "vf_loss": 89.34113311767578, "vf_explained_var": 0.751798152923584, "kl": 0.0021080432925373316, "entropy": 1.1364187002182007, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3315200, "episodes_total": 8288, "training_iteration": 259, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-55-48", "timestamp": 1660254948, "time_this_iter_s": 33.836853981018066, "time_total_s": 13362.742651700974, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13362.742651700974, "timesteps_since_restore": 3315200, "iterations_since_restore": 259, "perf": {"cpu_util_percent": 29.470212765957445, "ram_util_percent": 58.776595744680854}}
+{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 603.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 301.89}, "custom_metrics": {"sparse_reward_mean": 209.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.98, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.42, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.89, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.55, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.74, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.14, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.06, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.95, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.33, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.14, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.14, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 633.0, 633.0, 544.0, 587.0, 587.0, 639.0, 587.0, 636.0, 633.0, 573.0, 587.0, 636.0, 567.0, 587.0, 633.0, 630.0, 633.0, 579.0, 627.0, 624.0, 630.0, 582.0, 579.0, 627.0, 582.0, 579.0, 590.0, 630.0, 627.0, 630.0, 579.0, 627.0, 579.0, 582.0, 633.0, 533.0, 627.0, 633.0, 573.0, 627.0, 630.0, 636.0, 633.0, 582.0, 633.0, 527.0, 633.0, 582.0, 627.0, 582.0, 636.0, 627.0, 582.0, 587.0, 582.0, 630.0, 570.0, 630.0, 573.0, 630.0, 627.0, 633.0, 530.0, 630.0, 513.0, 582.0, 558.0, 587.0, 587.0, 573.0, 465.0, 581.0, 627.0, 633.0, 627.0, 576.0, 627.0, 639.0, 576.0, 636.0, 633.0, 555.0, 630.0, 636.0, 636.0, 627.0, 633.0, 582.0, 584.0, 584.0, 579.0, 633.0, 582.0, 627.0, 636.0, 627.0, 627.0, 587.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 311.0, 317.0, 316.0, 309.0, 324.0, 268.0, 276.0, 288.0, 299.0, 299.0, 288.0, 322.0, 317.0, 288.0, 299.0, 319.0, 317.0, 319.0, 314.0, 290.0, 283.0, 287.0, 300.0, 311.0, 325.0, 285.0, 282.0, 290.0, 297.0, 319.0, 314.0, 311.0, 319.0, 312.0, 321.0, 277.0, 302.0, 320.0, 307.0, 319.0, 305.0, 314.0, 316.0, 295.0, 287.0, 296.0, 283.0, 313.0, 314.0, 284.0, 298.0, 289.0, 290.0, 294.0, 296.0, 311.0, 319.0, 303.0, 324.0, 316.0, 314.0, 290.0, 289.0, 313.0, 314.0, 289.0, 290.0, 290.0, 292.0, 319.0, 314.0, 266.0, 267.0, 308.0, 319.0, 309.0, 324.0, 288.0, 285.0, 313.0, 314.0, 311.0, 319.0, 322.0, 314.0, 314.0, 319.0, 298.0, 284.0, 319.0, 314.0, 265.0, 262.0, 316.0, 317.0, 284.0, 298.0, 316.0, 311.0, 284.0, 298.0, 309.0, 327.0, 313.0, 314.0, 299.0, 283.0, 287.0, 300.0, 290.0, 292.0, 316.0, 314.0, 289.0, 281.0, 316.0, 314.0, 281.0, 292.0, 310.0, 320.0, 306.0, 321.0, 326.0, 307.0, 270.0, 260.0, 319.0, 311.0, 260.0, 253.0, 289.0, 293.0, 289.0, 269.0, 295.0, 292.0, 296.0, 291.0, 283.0, 290.0, 234.0, 231.0, 290.0, 291.0, 321.0, 306.0, 305.0, 328.0, 316.0, 311.0, 271.0, 305.0, 311.0, 316.0, 311.0, 328.0, 287.0, 289.0, 316.0, 320.0, 313.0, 320.0, 281.0, 274.0, 316.0, 314.0, 324.0, 312.0, 325.0, 311.0, 313.0, 314.0, 314.0, 319.0, 287.0, 295.0, 303.0, 281.0, 290.0, 294.0, 293.0, 286.0, 321.0, 312.0, 297.0, 285.0, 320.0, 307.0, 314.0, 322.0, 313.0, 314.0, 311.0, 316.0, 301.0, 286.0, 319.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0281105190585473, "mean_processing_ms": 0.27383411395901525, "mean_inference_ms": 1.5952417170076567}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6240000, "num_steps_sampled": 3328000, "sample_time_ms": 22352.291, "load_time_ms": 37.439, "grad_time_ms": 9262.711, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002568518975749612, "policy_loss": -0.005389755126088858, "vf_loss": 85.3111801147461, "vf_explained_var": 0.7668444514274597, "kl": 0.0014818129129707813, "entropy": 1.145686149597168, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3328000, "episodes_total": 8320, "training_iteration": 260, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-56-23", "timestamp": 1660254983, "time_this_iter_s": 34.4713191986084, "time_total_s": 13397.213970899582, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13397.213970899582, "timesteps_since_restore": 3328000, "iterations_since_restore": 260, "perf": {"cpu_util_percent": 30.669387755102036, "ram_util_percent": 58.697959183673476}}
+{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 602.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 168.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 301.485}, "custom_metrics": {"sparse_reward_mean": 209.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 184.57, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.18, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.71, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.11, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.37, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.86, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.32, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.11, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.37, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.11, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.37, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 630.0, 630.0, 582.0, 567.0, 576.0, 633.0, 639.0, 579.0, 624.0, 633.0, 627.0, 627.0, 630.0, 633.0, 630.0, 582.0, 567.0, 351.0, 584.0, 558.0, 639.0, 584.0, 627.0, 633.0, 587.0, 633.0, 587.0, 630.0, 630.0, 582.0, 582.0, 630.0, 513.0, 582.0, 558.0, 587.0, 587.0, 573.0, 465.0, 581.0, 627.0, 633.0, 627.0, 576.0, 627.0, 639.0, 576.0, 636.0, 633.0, 555.0, 630.0, 636.0, 636.0, 627.0, 633.0, 582.0, 584.0, 584.0, 579.0, 633.0, 582.0, 627.0, 636.0, 627.0, 627.0, 587.0, 630.0, 627.0, 633.0, 633.0, 544.0, 587.0, 587.0, 639.0, 587.0, 636.0, 633.0, 573.0, 587.0, 636.0, 567.0, 587.0, 633.0, 630.0, 633.0, 579.0, 627.0, 624.0, 630.0, 582.0, 579.0, 627.0, 582.0, 579.0, 590.0, 630.0, 627.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [324.0, 315.0, 314.0, 316.0, 313.0, 317.0, 280.0, 302.0, 284.0, 283.0, 286.0, 290.0, 316.0, 317.0, 311.0, 328.0, 287.0, 292.0, 312.0, 312.0, 311.0, 322.0, 313.0, 314.0, 313.0, 314.0, 306.0, 324.0, 319.0, 314.0, 317.0, 313.0, 293.0, 289.0, 285.0, 282.0, 168.0, 183.0, 289.0, 295.0, 277.0, 281.0, 327.0, 312.0, 285.0, 299.0, 318.0, 309.0, 313.0, 320.0, 288.0, 299.0, 319.0, 314.0, 294.0, 293.0, 316.0, 314.0, 316.0, 314.0, 298.0, 284.0, 278.0, 304.0, 319.0, 311.0, 260.0, 253.0, 289.0, 293.0, 289.0, 269.0, 295.0, 292.0, 296.0, 291.0, 283.0, 290.0, 234.0, 231.0, 290.0, 291.0, 321.0, 306.0, 305.0, 328.0, 316.0, 311.0, 271.0, 305.0, 311.0, 316.0, 311.0, 328.0, 287.0, 289.0, 316.0, 320.0, 313.0, 320.0, 281.0, 274.0, 316.0, 314.0, 324.0, 312.0, 325.0, 311.0, 313.0, 314.0, 314.0, 319.0, 287.0, 295.0, 303.0, 281.0, 290.0, 294.0, 293.0, 286.0, 321.0, 312.0, 297.0, 285.0, 320.0, 307.0, 314.0, 322.0, 313.0, 314.0, 311.0, 316.0, 301.0, 286.0, 319.0, 311.0, 316.0, 311.0, 317.0, 316.0, 309.0, 324.0, 268.0, 276.0, 288.0, 299.0, 299.0, 288.0, 322.0, 317.0, 288.0, 299.0, 319.0, 317.0, 319.0, 314.0, 290.0, 283.0, 287.0, 300.0, 311.0, 325.0, 285.0, 282.0, 290.0, 297.0, 319.0, 314.0, 311.0, 319.0, 312.0, 321.0, 277.0, 302.0, 320.0, 307.0, 319.0, 305.0, 314.0, 316.0, 295.0, 287.0, 296.0, 283.0, 313.0, 314.0, 284.0, 298.0, 289.0, 290.0, 294.0, 296.0, 311.0, 319.0, 303.0, 324.0, 316.0, 314.0, 290.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0259226733727673, "mean_processing_ms": 0.27340278893355835, "mean_inference_ms": 1.5937276386512644}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6264000, "num_steps_sampled": 3340800, "sample_time_ms": 22448.254, "load_time_ms": 37.569, "grad_time_ms": 9264.987, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003604738973081112, "policy_loss": -0.004238134250044823, "vf_loss": 84.2165298461914, "vf_explained_var": 0.770367443561554, "kl": 0.002041497267782688, "entropy": 1.1575653553009033, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3340800, "episodes_total": 8352, "training_iteration": 261, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-56-55", "timestamp": 1660255015, "time_this_iter_s": 32.84105324745178, "time_total_s": 13430.055024147034, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13430.055024147034, "timesteps_since_restore": 3340800, "iterations_since_restore": 261, "perf": {"cpu_util_percent": 32.742553191489364, "ram_util_percent": 58.7659574468085}}
+{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 605.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 168.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 302.81}, "custom_metrics": {"sparse_reward_mean": 210.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 185.62, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.95, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.41, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.79, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.03, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.63, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.26, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.76, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.03, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.63, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.03, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.63, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 579.0, 582.0, 633.0, 633.0, 630.0, 636.0, 633.0, 576.0, 582.0, 627.0, 587.0, 587.0, 582.0, 633.0, 639.0, 579.0, 582.0, 576.0, 630.0, 630.0, 615.0, 570.0, 633.0, 579.0, 630.0, 633.0, 587.0, 581.0, 627.0, 582.0, 627.0, 627.0, 587.0, 630.0, 627.0, 633.0, 633.0, 544.0, 587.0, 587.0, 639.0, 587.0, 636.0, 633.0, 573.0, 587.0, 636.0, 567.0, 587.0, 633.0, 630.0, 633.0, 579.0, 627.0, 624.0, 630.0, 582.0, 579.0, 627.0, 582.0, 579.0, 590.0, 630.0, 627.0, 630.0, 579.0, 639.0, 630.0, 630.0, 582.0, 567.0, 576.0, 633.0, 639.0, 579.0, 624.0, 633.0, 627.0, 627.0, 630.0, 633.0, 630.0, 582.0, 567.0, 351.0, 584.0, 558.0, 639.0, 584.0, 627.0, 633.0, 587.0, 633.0, 587.0, 630.0, 630.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [323.0, 310.0, 316.0, 317.0, 285.0, 294.0, 286.0, 296.0, 315.0, 318.0, 326.0, 307.0, 316.0, 314.0, 315.0, 321.0, 314.0, 319.0, 301.0, 275.0, 291.0, 291.0, 308.0, 319.0, 308.0, 279.0, 302.0, 285.0, 291.0, 291.0, 314.0, 319.0, 314.0, 325.0, 301.0, 278.0, 300.0, 282.0, 296.0, 280.0, 313.0, 317.0, 316.0, 314.0, 304.0, 311.0, 290.0, 280.0, 313.0, 320.0, 296.0, 283.0, 308.0, 322.0, 319.0, 314.0, 291.0, 296.0, 295.0, 286.0, 316.0, 311.0, 286.0, 296.0, 313.0, 314.0, 311.0, 316.0, 301.0, 286.0, 319.0, 311.0, 316.0, 311.0, 317.0, 316.0, 309.0, 324.0, 268.0, 276.0, 288.0, 299.0, 299.0, 288.0, 322.0, 317.0, 288.0, 299.0, 319.0, 317.0, 319.0, 314.0, 290.0, 283.0, 287.0, 300.0, 311.0, 325.0, 285.0, 282.0, 290.0, 297.0, 319.0, 314.0, 311.0, 319.0, 312.0, 321.0, 277.0, 302.0, 320.0, 307.0, 319.0, 305.0, 314.0, 316.0, 295.0, 287.0, 296.0, 283.0, 313.0, 314.0, 284.0, 298.0, 289.0, 290.0, 294.0, 296.0, 311.0, 319.0, 303.0, 324.0, 316.0, 314.0, 290.0, 289.0, 324.0, 315.0, 314.0, 316.0, 313.0, 317.0, 280.0, 302.0, 284.0, 283.0, 286.0, 290.0, 316.0, 317.0, 311.0, 328.0, 287.0, 292.0, 312.0, 312.0, 311.0, 322.0, 313.0, 314.0, 313.0, 314.0, 306.0, 324.0, 319.0, 314.0, 317.0, 313.0, 293.0, 289.0, 285.0, 282.0, 168.0, 183.0, 289.0, 295.0, 277.0, 281.0, 327.0, 312.0, 285.0, 299.0, 318.0, 309.0, 313.0, 320.0, 288.0, 299.0, 319.0, 314.0, 294.0, 293.0, 316.0, 314.0, 316.0, 314.0, 298.0, 284.0, 278.0, 304.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0237598370369554, "mean_processing_ms": 0.2729750841964574, "mean_inference_ms": 1.5924306371043695}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6288000, "num_steps_sampled": 3353600, "sample_time_ms": 22598.435, "load_time_ms": 37.639, "grad_time_ms": 9270.4, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0002879177627619356, "policy_loss": -0.007559783756732941, "vf_loss": 84.22747802734375, "vf_explained_var": 0.7608786225318909, "kl": 0.0017919730162248015, "entropy": 1.1500838994979858, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3353600, "episodes_total": 8384, "training_iteration": 262, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-57-31", "timestamp": 1660255051, "time_this_iter_s": 35.47017812728882, "time_total_s": 13465.525202274323, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13465.525202274323, "timesteps_since_restore": 3353600, "iterations_since_restore": 262, "perf": {"cpu_util_percent": 25.712, "ram_util_percent": 58.78}}
+{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 606.83, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 168.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 303.415}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 185.23, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.88, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.69, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.26, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.81, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.58, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.89, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.76, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.42, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.89, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.76, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.89, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.76, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 630.0, 636.0, 581.0, 624.0, 633.0, 581.0, 579.0, 459.0, 630.0, 573.0, 633.0, 630.0, 633.0, 633.0, 627.0, 630.0, 627.0, 630.0, 627.0, 633.0, 636.0, 573.0, 579.0, 570.0, 624.0, 630.0, 636.0, 582.0, 636.0, 582.0, 636.0, 630.0, 627.0, 630.0, 579.0, 639.0, 630.0, 630.0, 582.0, 567.0, 576.0, 633.0, 639.0, 579.0, 624.0, 633.0, 627.0, 627.0, 630.0, 633.0, 630.0, 582.0, 567.0, 351.0, 584.0, 558.0, 639.0, 584.0, 627.0, 633.0, 587.0, 633.0, 587.0, 630.0, 630.0, 582.0, 582.0, 633.0, 633.0, 579.0, 582.0, 633.0, 633.0, 630.0, 636.0, 633.0, 576.0, 582.0, 627.0, 587.0, 587.0, 582.0, 633.0, 639.0, 579.0, 582.0, 576.0, 630.0, 630.0, 615.0, 570.0, 633.0, 579.0, 630.0, 633.0, 587.0, 581.0, 627.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 316.0, 314.0, 322.0, 314.0, 285.0, 296.0, 306.0, 318.0, 317.0, 316.0, 288.0, 293.0, 288.0, 291.0, 235.0, 224.0, 324.0, 306.0, 285.0, 288.0, 313.0, 320.0, 312.0, 318.0, 315.0, 318.0, 310.0, 323.0, 311.0, 316.0, 308.0, 322.0, 306.0, 321.0, 315.0, 315.0, 321.0, 306.0, 314.0, 319.0, 321.0, 315.0, 283.0, 290.0, 301.0, 278.0, 282.0, 288.0, 305.0, 319.0, 310.0, 320.0, 327.0, 309.0, 288.0, 294.0, 319.0, 317.0, 298.0, 284.0, 316.0, 320.0, 311.0, 319.0, 303.0, 324.0, 316.0, 314.0, 290.0, 289.0, 324.0, 315.0, 314.0, 316.0, 313.0, 317.0, 280.0, 302.0, 284.0, 283.0, 286.0, 290.0, 316.0, 317.0, 311.0, 328.0, 287.0, 292.0, 312.0, 312.0, 311.0, 322.0, 313.0, 314.0, 313.0, 314.0, 306.0, 324.0, 319.0, 314.0, 317.0, 313.0, 293.0, 289.0, 285.0, 282.0, 168.0, 183.0, 289.0, 295.0, 277.0, 281.0, 327.0, 312.0, 285.0, 299.0, 318.0, 309.0, 313.0, 320.0, 288.0, 299.0, 319.0, 314.0, 294.0, 293.0, 316.0, 314.0, 316.0, 314.0, 298.0, 284.0, 278.0, 304.0, 323.0, 310.0, 316.0, 317.0, 285.0, 294.0, 286.0, 296.0, 315.0, 318.0, 326.0, 307.0, 316.0, 314.0, 315.0, 321.0, 314.0, 319.0, 301.0, 275.0, 291.0, 291.0, 308.0, 319.0, 308.0, 279.0, 302.0, 285.0, 291.0, 291.0, 314.0, 319.0, 314.0, 325.0, 301.0, 278.0, 300.0, 282.0, 296.0, 280.0, 313.0, 317.0, 316.0, 314.0, 304.0, 311.0, 290.0, 280.0, 313.0, 320.0, 296.0, 283.0, 308.0, 322.0, 319.0, 314.0, 291.0, 296.0, 295.0, 286.0, 316.0, 311.0, 286.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0216145913316899, "mean_processing_ms": 0.27255014515744136, "mean_inference_ms": 1.591234908963895}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6312000, "num_steps_sampled": 3366400, "sample_time_ms": 23169.124, "load_time_ms": 37.393, "grad_time_ms": 9419.779, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005937855690717697, "policy_loss": -0.0020803138613700867, "vf_loss": 85.89215087890625, "vf_explained_var": 0.7588068842887878, "kl": 0.001875289366580546, "entropy": 1.142077088356018, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3366400, "episodes_total": 8416, "training_iteration": 263, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-58-05", "timestamp": 1660255085, "time_this_iter_s": 33.64332914352417, "time_total_s": 13499.168531417847, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13499.168531417847, "timesteps_since_restore": 3366400, "iterations_since_restore": 263, "perf": {"cpu_util_percent": 28.2468085106383, "ram_util_percent": 58.74042553191488}}
+{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 605.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 224.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 302.84}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.28, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.71, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.72, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.78, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 16.06, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.61, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.12, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.06, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.61, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.06, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.61, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 633.0, 627.0, 579.0, 573.0, 627.0, 579.0, 582.0, 587.0, 582.0, 579.0, 627.0, 582.0, 627.0, 579.0, 627.0, 525.0, 630.0, 587.0, 576.0, 633.0, 636.0, 519.0, 582.0, 576.0, 636.0, 636.0, 582.0, 630.0, 579.0, 633.0, 633.0, 630.0, 630.0, 582.0, 582.0, 633.0, 633.0, 579.0, 582.0, 633.0, 633.0, 630.0, 636.0, 633.0, 576.0, 582.0, 627.0, 587.0, 587.0, 582.0, 633.0, 639.0, 579.0, 582.0, 576.0, 630.0, 630.0, 615.0, 570.0, 633.0, 579.0, 630.0, 633.0, 587.0, 581.0, 627.0, 582.0, 630.0, 630.0, 636.0, 581.0, 624.0, 633.0, 581.0, 579.0, 459.0, 630.0, 573.0, 633.0, 630.0, 633.0, 633.0, 627.0, 630.0, 627.0, 630.0, 627.0, 633.0, 636.0, 573.0, 579.0, 570.0, 624.0, 630.0, 636.0, 582.0, 636.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 289.0, 319.0, 314.0, 308.0, 319.0, 283.0, 296.0, 290.0, 283.0, 316.0, 311.0, 291.0, 288.0, 291.0, 291.0, 294.0, 293.0, 291.0, 291.0, 295.0, 284.0, 313.0, 314.0, 295.0, 287.0, 313.0, 314.0, 291.0, 288.0, 313.0, 314.0, 264.0, 261.0, 304.0, 326.0, 283.0, 304.0, 292.0, 284.0, 309.0, 324.0, 314.0, 322.0, 242.0, 277.0, 294.0, 288.0, 287.0, 289.0, 317.0, 319.0, 320.0, 316.0, 291.0, 291.0, 316.0, 314.0, 283.0, 296.0, 319.0, 314.0, 319.0, 314.0, 316.0, 314.0, 316.0, 314.0, 298.0, 284.0, 278.0, 304.0, 323.0, 310.0, 316.0, 317.0, 285.0, 294.0, 286.0, 296.0, 315.0, 318.0, 326.0, 307.0, 316.0, 314.0, 315.0, 321.0, 314.0, 319.0, 301.0, 275.0, 291.0, 291.0, 308.0, 319.0, 308.0, 279.0, 302.0, 285.0, 291.0, 291.0, 314.0, 319.0, 314.0, 325.0, 301.0, 278.0, 300.0, 282.0, 296.0, 280.0, 313.0, 317.0, 316.0, 314.0, 304.0, 311.0, 290.0, 280.0, 313.0, 320.0, 296.0, 283.0, 308.0, 322.0, 319.0, 314.0, 291.0, 296.0, 295.0, 286.0, 316.0, 311.0, 286.0, 296.0, 313.0, 317.0, 316.0, 314.0, 322.0, 314.0, 285.0, 296.0, 306.0, 318.0, 317.0, 316.0, 288.0, 293.0, 288.0, 291.0, 235.0, 224.0, 324.0, 306.0, 285.0, 288.0, 313.0, 320.0, 312.0, 318.0, 315.0, 318.0, 310.0, 323.0, 311.0, 316.0, 308.0, 322.0, 306.0, 321.0, 315.0, 315.0, 321.0, 306.0, 314.0, 319.0, 321.0, 315.0, 283.0, 290.0, 301.0, 278.0, 282.0, 288.0, 305.0, 319.0, 310.0, 320.0, 327.0, 309.0, 288.0, 294.0, 319.0, 317.0, 298.0, 284.0, 316.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0194825436603638, "mean_processing_ms": 0.27212618264491645, "mean_inference_ms": 1.590112958914433}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6336000, "num_steps_sampled": 3379200, "sample_time_ms": 23605.349, "load_time_ms": 37.251, "grad_time_ms": 9861.879, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0006588895921595395, "policy_loss": -0.008630036376416683, "vf_loss": 85.43579864501953, "vf_explained_var": 0.770875871181488, "kl": 0.0017466336721554399, "entropy": 1.1448642015457153, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3379200, "episodes_total": 8448, "training_iteration": 264, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-58-41", "timestamp": 1660255121, "time_this_iter_s": 36.31201386451721, "time_total_s": 13535.480545282364, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13535.480545282364, "timesteps_since_restore": 3379200, "iterations_since_restore": 264, "perf": {"cpu_util_percent": 29.313461538461542, "ram_util_percent": 59.09038461538463}}
+{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 605.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 224.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 302.72}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.04, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.23, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.36, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.74, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.66, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 16.21, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.4, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.74, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.41, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.21, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.4, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.21, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.4, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 633.0, 636.0, 639.0, 633.0, 630.0, 630.0, 633.0, 582.0, 576.0, 579.0, 587.0, 587.0, 582.0, 582.0, 633.0, 582.0, 630.0, 630.0, 621.0, 621.0, 582.0, 630.0, 630.0, 630.0, 564.0, 624.0, 582.0, 627.0, 573.0, 630.0, 587.0, 581.0, 627.0, 582.0, 630.0, 630.0, 636.0, 581.0, 624.0, 633.0, 581.0, 579.0, 459.0, 630.0, 573.0, 633.0, 630.0, 633.0, 633.0, 627.0, 630.0, 627.0, 630.0, 627.0, 633.0, 636.0, 573.0, 579.0, 570.0, 624.0, 630.0, 636.0, 582.0, 636.0, 582.0, 636.0, 579.0, 633.0, 627.0, 579.0, 573.0, 627.0, 579.0, 582.0, 587.0, 582.0, 579.0, 627.0, 582.0, 627.0, 579.0, 627.0, 525.0, 630.0, 587.0, 576.0, 633.0, 636.0, 519.0, 582.0, 576.0, 636.0, 636.0, 582.0, 630.0, 579.0, 633.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 283.0, 299.0, 316.0, 317.0, 316.0, 320.0, 319.0, 320.0, 324.0, 309.0, 321.0, 309.0, 309.0, 321.0, 324.0, 309.0, 289.0, 293.0, 292.0, 284.0, 291.0, 288.0, 293.0, 294.0, 294.0, 293.0, 289.0, 293.0, 289.0, 293.0, 316.0, 317.0, 291.0, 291.0, 313.0, 317.0, 316.0, 314.0, 313.0, 308.0, 316.0, 305.0, 292.0, 290.0, 312.0, 318.0, 316.0, 314.0, 314.0, 316.0, 284.0, 280.0, 321.0, 303.0, 301.0, 281.0, 313.0, 314.0, 288.0, 285.0, 314.0, 316.0, 291.0, 296.0, 295.0, 286.0, 316.0, 311.0, 286.0, 296.0, 313.0, 317.0, 316.0, 314.0, 322.0, 314.0, 285.0, 296.0, 306.0, 318.0, 317.0, 316.0, 288.0, 293.0, 288.0, 291.0, 235.0, 224.0, 324.0, 306.0, 285.0, 288.0, 313.0, 320.0, 312.0, 318.0, 315.0, 318.0, 310.0, 323.0, 311.0, 316.0, 308.0, 322.0, 306.0, 321.0, 315.0, 315.0, 321.0, 306.0, 314.0, 319.0, 321.0, 315.0, 283.0, 290.0, 301.0, 278.0, 282.0, 288.0, 305.0, 319.0, 310.0, 320.0, 327.0, 309.0, 288.0, 294.0, 319.0, 317.0, 298.0, 284.0, 316.0, 320.0, 290.0, 289.0, 319.0, 314.0, 308.0, 319.0, 283.0, 296.0, 290.0, 283.0, 316.0, 311.0, 291.0, 288.0, 291.0, 291.0, 294.0, 293.0, 291.0, 291.0, 295.0, 284.0, 313.0, 314.0, 295.0, 287.0, 313.0, 314.0, 291.0, 288.0, 313.0, 314.0, 264.0, 261.0, 304.0, 326.0, 283.0, 304.0, 292.0, 284.0, 309.0, 324.0, 314.0, 322.0, 242.0, 277.0, 294.0, 288.0, 287.0, 289.0, 317.0, 319.0, 320.0, 316.0, 291.0, 291.0, 316.0, 314.0, 283.0, 296.0, 319.0, 314.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0173660844031607, "mean_processing_ms": 0.27170594360174505, "mean_inference_ms": 1.5890528923225553}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6360000, "num_steps_sampled": 3392000, "sample_time_ms": 24130.635, "load_time_ms": 37.048, "grad_time_ms": 9999.028, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0024043002631515265, "policy_loss": -0.005360407754778862, "vf_loss": 83.37548828125, "vf_explained_var": 0.7662093043327332, "kl": 0.001953211845830083, "entropy": 1.145652174949646, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3392000, "episodes_total": 8480, "training_iteration": 265, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-59-19", "timestamp": 1660255159, "time_this_iter_s": 37.638370990753174, "time_total_s": 13573.118916273117, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13573.118916273117, "timesteps_since_restore": 3392000, "iterations_since_restore": 265, "perf": {"cpu_util_percent": 30.500000000000004, "ram_util_percent": 58.69433962264149}}
+{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 604.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 242.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 302.405}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 185.21, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.11, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.52, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.61, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.64, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.66, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.46, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.57, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.46, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.46, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 627.0, 582.0, 582.0, 627.0, 630.0, 630.0, 636.0, 579.0, 624.0, 576.0, 633.0, 630.0, 630.0, 576.0, 633.0, 636.0, 627.0, 582.0, 576.0, 621.0, 639.0, 582.0, 582.0, 630.0, 582.0, 579.0, 533.0, 579.0, 582.0, 630.0, 636.0, 582.0, 636.0, 582.0, 636.0, 579.0, 633.0, 627.0, 579.0, 573.0, 627.0, 579.0, 582.0, 587.0, 582.0, 579.0, 627.0, 582.0, 627.0, 579.0, 627.0, 525.0, 630.0, 587.0, 576.0, 633.0, 636.0, 519.0, 582.0, 576.0, 636.0, 636.0, 582.0, 630.0, 579.0, 633.0, 633.0, 582.0, 582.0, 633.0, 636.0, 639.0, 633.0, 630.0, 630.0, 633.0, 582.0, 576.0, 579.0, 587.0, 587.0, 582.0, 582.0, 633.0, 582.0, 630.0, 630.0, 621.0, 621.0, 582.0, 630.0, 630.0, 630.0, 564.0, 624.0, 582.0, 627.0, 573.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 323.0, 304.0, 288.0, 294.0, 285.0, 297.0, 330.0, 297.0, 321.0, 309.0, 315.0, 315.0, 312.0, 324.0, 295.0, 284.0, 315.0, 309.0, 293.0, 283.0, 322.0, 311.0, 324.0, 306.0, 316.0, 314.0, 288.0, 288.0, 314.0, 319.0, 314.0, 322.0, 313.0, 314.0, 287.0, 295.0, 290.0, 286.0, 313.0, 308.0, 314.0, 325.0, 291.0, 291.0, 286.0, 296.0, 309.0, 321.0, 288.0, 294.0, 285.0, 294.0, 267.0, 266.0, 293.0, 286.0, 296.0, 286.0, 314.0, 316.0, 317.0, 319.0, 288.0, 294.0, 319.0, 317.0, 298.0, 284.0, 316.0, 320.0, 290.0, 289.0, 319.0, 314.0, 308.0, 319.0, 283.0, 296.0, 290.0, 283.0, 316.0, 311.0, 291.0, 288.0, 291.0, 291.0, 294.0, 293.0, 291.0, 291.0, 295.0, 284.0, 313.0, 314.0, 295.0, 287.0, 313.0, 314.0, 291.0, 288.0, 313.0, 314.0, 264.0, 261.0, 304.0, 326.0, 283.0, 304.0, 292.0, 284.0, 309.0, 324.0, 314.0, 322.0, 242.0, 277.0, 294.0, 288.0, 287.0, 289.0, 317.0, 319.0, 320.0, 316.0, 291.0, 291.0, 316.0, 314.0, 283.0, 296.0, 319.0, 314.0, 319.0, 314.0, 293.0, 289.0, 283.0, 299.0, 316.0, 317.0, 316.0, 320.0, 319.0, 320.0, 324.0, 309.0, 321.0, 309.0, 309.0, 321.0, 324.0, 309.0, 289.0, 293.0, 292.0, 284.0, 291.0, 288.0, 293.0, 294.0, 294.0, 293.0, 289.0, 293.0, 289.0, 293.0, 316.0, 317.0, 291.0, 291.0, 313.0, 317.0, 316.0, 314.0, 313.0, 308.0, 316.0, 305.0, 292.0, 290.0, 312.0, 318.0, 316.0, 314.0, 314.0, 316.0, 284.0, 280.0, 321.0, 303.0, 301.0, 281.0, 313.0, 314.0, 288.0, 285.0, 314.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0152699162864507, "mean_processing_ms": 0.27129002186513274, "mean_inference_ms": 1.588063531007753}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6384000, "num_steps_sampled": 3404800, "sample_time_ms": 24421.033, "load_time_ms": 37.381, "grad_time_ms": 10403.492, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0023645039182156324, "policy_loss": -0.005783146247267723, "vf_loss": 87.20269775390625, "vf_explained_var": 0.7547242045402527, "kl": 0.0018250799039378762, "entropy": 1.1452516317367554, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3404800, "episodes_total": 8512, "training_iteration": 266, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-59-56", "timestamp": 1660255196, "time_this_iter_s": 37.62380003929138, "time_total_s": 13610.742716312408, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13610.742716312408, "timesteps_since_restore": 3404800, "iterations_since_restore": 266, "perf": {"cpu_util_percent": 28.747169811320756, "ram_util_percent": 58.75471698113208}}
+{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 608.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 304.32}, "custom_metrics": {"sparse_reward_mean": 211.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.24, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.18, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.56, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.66, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.71, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.65, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.59, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.29, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.59, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.59, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 573.0, 542.0, 630.0, 630.0, 630.0, 636.0, 633.0, 582.0, 522.0, 576.0, 636.0, 630.0, 587.0, 630.0, 581.0, 633.0, 587.0, 569.0, 627.0, 636.0, 582.0, 633.0, 576.0, 584.0, 633.0, 636.0, 636.0, 624.0, 627.0, 639.0, 636.0, 630.0, 579.0, 633.0, 633.0, 582.0, 582.0, 633.0, 636.0, 639.0, 633.0, 630.0, 630.0, 633.0, 582.0, 576.0, 579.0, 587.0, 587.0, 582.0, 582.0, 633.0, 582.0, 630.0, 630.0, 621.0, 621.0, 582.0, 630.0, 630.0, 630.0, 564.0, 624.0, 582.0, 627.0, 573.0, 630.0, 630.0, 627.0, 582.0, 582.0, 627.0, 630.0, 630.0, 636.0, 579.0, 624.0, 576.0, 633.0, 630.0, 630.0, 576.0, 633.0, 636.0, 627.0, 582.0, 576.0, 621.0, 639.0, 582.0, 582.0, 630.0, 582.0, 579.0, 533.0, 579.0, 582.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 313.0, 274.0, 299.0, 276.0, 266.0, 306.0, 324.0, 321.0, 309.0, 314.0, 316.0, 324.0, 312.0, 319.0, 314.0, 283.0, 299.0, 262.0, 260.0, 293.0, 283.0, 308.0, 328.0, 311.0, 319.0, 287.0, 300.0, 313.0, 317.0, 285.0, 296.0, 315.0, 318.0, 302.0, 285.0, 282.0, 287.0, 319.0, 308.0, 317.0, 319.0, 291.0, 291.0, 313.0, 320.0, 276.0, 300.0, 295.0, 289.0, 314.0, 319.0, 316.0, 320.0, 320.0, 316.0, 319.0, 305.0, 313.0, 314.0, 324.0, 315.0, 314.0, 322.0, 316.0, 314.0, 283.0, 296.0, 319.0, 314.0, 319.0, 314.0, 293.0, 289.0, 283.0, 299.0, 316.0, 317.0, 316.0, 320.0, 319.0, 320.0, 324.0, 309.0, 321.0, 309.0, 309.0, 321.0, 324.0, 309.0, 289.0, 293.0, 292.0, 284.0, 291.0, 288.0, 293.0, 294.0, 294.0, 293.0, 289.0, 293.0, 289.0, 293.0, 316.0, 317.0, 291.0, 291.0, 313.0, 317.0, 316.0, 314.0, 313.0, 308.0, 316.0, 305.0, 292.0, 290.0, 312.0, 318.0, 316.0, 314.0, 314.0, 316.0, 284.0, 280.0, 321.0, 303.0, 301.0, 281.0, 313.0, 314.0, 288.0, 285.0, 314.0, 316.0, 313.0, 317.0, 323.0, 304.0, 288.0, 294.0, 285.0, 297.0, 330.0, 297.0, 321.0, 309.0, 315.0, 315.0, 312.0, 324.0, 295.0, 284.0, 315.0, 309.0, 293.0, 283.0, 322.0, 311.0, 324.0, 306.0, 316.0, 314.0, 288.0, 288.0, 314.0, 319.0, 314.0, 322.0, 313.0, 314.0, 287.0, 295.0, 290.0, 286.0, 313.0, 308.0, 314.0, 325.0, 291.0, 291.0, 286.0, 296.0, 309.0, 321.0, 288.0, 294.0, 285.0, 294.0, 267.0, 266.0, 293.0, 286.0, 296.0, 286.0, 314.0, 316.0, 317.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0132060848493245, "mean_processing_ms": 0.27088101809255155, "mean_inference_ms": 1.5871323468376288}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6408000, "num_steps_sampled": 3417600, "sample_time_ms": 24236.974, "load_time_ms": 37.027, "grad_time_ms": 10553.07, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002214438281953335, "policy_loss": -0.00573044503107667, "vf_loss": 85.19269561767578, "vf_explained_var": 0.7630549073219299, "kl": 0.0019984643440693617, "entropy": 1.1487520933151245, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3417600, "episodes_total": 8544, "training_iteration": 267, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-00-32", "timestamp": 1660255232, "time_this_iter_s": 35.333903789520264, "time_total_s": 13646.076620101929, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13646.076620101929, "timesteps_since_restore": 3417600, "iterations_since_restore": 267, "perf": {"cpu_util_percent": 32.378, "ram_util_percent": 58.867999999999995}}
+{"episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 603.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 301.95}, "custom_metrics": {"sparse_reward_mean": 209.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.1, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.06, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.73, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.28, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.89, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.79, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.54, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.31, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.93, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.54, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.54, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 630.0, 576.0, 630.0, 636.0, 582.0, 522.0, 639.0, 587.0, 582.0, 473.0, 633.0, 630.0, 584.0, 581.0, 636.0, 587.0, 579.0, 402.0, 627.0, 630.0, 630.0, 636.0, 627.0, 633.0, 636.0, 582.0, 627.0, 462.0, 630.0, 630.0, 633.0, 582.0, 627.0, 573.0, 630.0, 630.0, 627.0, 582.0, 582.0, 627.0, 630.0, 630.0, 636.0, 579.0, 624.0, 576.0, 633.0, 630.0, 630.0, 576.0, 633.0, 636.0, 627.0, 582.0, 576.0, 621.0, 639.0, 582.0, 582.0, 630.0, 582.0, 579.0, 533.0, 579.0, 582.0, 630.0, 636.0, 630.0, 573.0, 542.0, 630.0, 630.0, 630.0, 636.0, 633.0, 582.0, 522.0, 576.0, 636.0, 630.0, 587.0, 630.0, 581.0, 633.0, 587.0, 569.0, 627.0, 636.0, 582.0, 633.0, 576.0, 584.0, 633.0, 636.0, 636.0, 624.0, 627.0, 639.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 308.0, 322.0, 285.0, 291.0, 321.0, 309.0, 321.0, 315.0, 294.0, 288.0, 266.0, 256.0, 322.0, 317.0, 295.0, 292.0, 296.0, 286.0, 229.0, 244.0, 331.0, 302.0, 316.0, 314.0, 287.0, 297.0, 296.0, 285.0, 327.0, 309.0, 291.0, 296.0, 284.0, 295.0, 202.0, 200.0, 308.0, 319.0, 311.0, 319.0, 322.0, 308.0, 316.0, 320.0, 306.0, 321.0, 317.0, 316.0, 314.0, 322.0, 294.0, 288.0, 316.0, 311.0, 242.0, 220.0, 313.0, 317.0, 324.0, 306.0, 322.0, 311.0, 301.0, 281.0, 313.0, 314.0, 288.0, 285.0, 314.0, 316.0, 313.0, 317.0, 323.0, 304.0, 288.0, 294.0, 285.0, 297.0, 330.0, 297.0, 321.0, 309.0, 315.0, 315.0, 312.0, 324.0, 295.0, 284.0, 315.0, 309.0, 293.0, 283.0, 322.0, 311.0, 324.0, 306.0, 316.0, 314.0, 288.0, 288.0, 314.0, 319.0, 314.0, 322.0, 313.0, 314.0, 287.0, 295.0, 290.0, 286.0, 313.0, 308.0, 314.0, 325.0, 291.0, 291.0, 286.0, 296.0, 309.0, 321.0, 288.0, 294.0, 285.0, 294.0, 267.0, 266.0, 293.0, 286.0, 296.0, 286.0, 314.0, 316.0, 317.0, 319.0, 317.0, 313.0, 274.0, 299.0, 276.0, 266.0, 306.0, 324.0, 321.0, 309.0, 314.0, 316.0, 324.0, 312.0, 319.0, 314.0, 283.0, 299.0, 262.0, 260.0, 293.0, 283.0, 308.0, 328.0, 311.0, 319.0, 287.0, 300.0, 313.0, 317.0, 285.0, 296.0, 315.0, 318.0, 302.0, 285.0, 282.0, 287.0, 319.0, 308.0, 317.0, 319.0, 291.0, 291.0, 313.0, 320.0, 276.0, 300.0, 295.0, 289.0, 314.0, 319.0, 316.0, 320.0, 320.0, 316.0, 319.0, 305.0, 313.0, 314.0, 324.0, 315.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0111539095910416, "mean_processing_ms": 0.2704732512620114, "mean_inference_ms": 1.5859630571947259}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6432000, "num_steps_sampled": 3430400, "sample_time_ms": 24395.085, "load_time_ms": 36.597, "grad_time_ms": 10568.153, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002145373960956931, "policy_loss": -0.006650958210229874, "vf_loss": 93.7173843383789, "vf_explained_var": 0.7514896392822266, "kl": 0.0022997509222477674, "entropy": 1.150799036026001, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3430400, "episodes_total": 8576, "training_iteration": 268, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-01-05", "timestamp": 1660255265, "time_this_iter_s": 33.08881878852844, "time_total_s": 13679.165438890457, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13679.165438890457, "timesteps_since_restore": 3430400, "iterations_since_restore": 268, "perf": {"cpu_util_percent": 29.461702127659574, "ram_util_percent": 58.71702127659575}}
+{"episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 603.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 301.685}, "custom_metrics": {"sparse_reward_mean": 209.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 184.97, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.28, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.71, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.49, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.93, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.77, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.87, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.16, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.47, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.16, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.47, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.16, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.47, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 561.0, 582.0, 627.0, 627.0, 633.0, 630.0, 636.0, 633.0, 558.0, 582.0, 579.0, 639.0, 636.0, 570.0, 582.0, 587.0, 639.0, 627.0, 584.0, 587.0, 579.0, 582.0, 624.0, 579.0, 636.0, 633.0, 633.0, 582.0, 627.0, 576.0, 570.0, 579.0, 582.0, 630.0, 636.0, 630.0, 573.0, 542.0, 630.0, 630.0, 630.0, 636.0, 633.0, 582.0, 522.0, 576.0, 636.0, 630.0, 587.0, 630.0, 581.0, 633.0, 587.0, 569.0, 627.0, 636.0, 582.0, 633.0, 576.0, 584.0, 633.0, 636.0, 636.0, 624.0, 627.0, 639.0, 636.0, 579.0, 630.0, 576.0, 630.0, 636.0, 582.0, 522.0, 639.0, 587.0, 582.0, 473.0, 633.0, 630.0, 584.0, 581.0, 636.0, 587.0, 579.0, 402.0, 627.0, 630.0, 630.0, 636.0, 627.0, 633.0, 636.0, 582.0, 627.0, 462.0, 630.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 312.0, 289.0, 272.0, 286.0, 296.0, 317.0, 310.0, 313.0, 314.0, 324.0, 309.0, 313.0, 317.0, 312.0, 324.0, 311.0, 322.0, 276.0, 282.0, 293.0, 289.0, 293.0, 286.0, 316.0, 323.0, 305.0, 331.0, 291.0, 279.0, 290.0, 292.0, 292.0, 295.0, 317.0, 322.0, 303.0, 324.0, 298.0, 286.0, 293.0, 294.0, 293.0, 286.0, 290.0, 292.0, 308.0, 316.0, 278.0, 301.0, 314.0, 322.0, 312.0, 321.0, 316.0, 317.0, 285.0, 297.0, 313.0, 314.0, 280.0, 296.0, 288.0, 282.0, 293.0, 286.0, 296.0, 286.0, 314.0, 316.0, 317.0, 319.0, 317.0, 313.0, 274.0, 299.0, 276.0, 266.0, 306.0, 324.0, 321.0, 309.0, 314.0, 316.0, 324.0, 312.0, 319.0, 314.0, 283.0, 299.0, 262.0, 260.0, 293.0, 283.0, 308.0, 328.0, 311.0, 319.0, 287.0, 300.0, 313.0, 317.0, 285.0, 296.0, 315.0, 318.0, 302.0, 285.0, 282.0, 287.0, 319.0, 308.0, 317.0, 319.0, 291.0, 291.0, 313.0, 320.0, 276.0, 300.0, 295.0, 289.0, 314.0, 319.0, 316.0, 320.0, 320.0, 316.0, 319.0, 305.0, 313.0, 314.0, 324.0, 315.0, 314.0, 322.0, 288.0, 291.0, 308.0, 322.0, 285.0, 291.0, 321.0, 309.0, 321.0, 315.0, 294.0, 288.0, 266.0, 256.0, 322.0, 317.0, 295.0, 292.0, 296.0, 286.0, 229.0, 244.0, 331.0, 302.0, 316.0, 314.0, 287.0, 297.0, 296.0, 285.0, 327.0, 309.0, 291.0, 296.0, 284.0, 295.0, 202.0, 200.0, 308.0, 319.0, 311.0, 319.0, 322.0, 308.0, 316.0, 320.0, 306.0, 321.0, 317.0, 316.0, 314.0, 322.0, 294.0, 288.0, 316.0, 311.0, 242.0, 220.0, 313.0, 317.0, 324.0, 306.0, 322.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0091026125530362, "mean_processing_ms": 0.27006446196399875, "mean_inference_ms": 1.584328225519923}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6456000, "num_steps_sampled": 3443200, "sample_time_ms": 23964.302, "load_time_ms": 37.107, "grad_time_ms": 10520.809, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005307864397764206, "policy_loss": -0.0031857620924711227, "vf_loss": 90.64007568359375, "vf_explained_var": 0.7599647641181946, "kl": 0.0021453702356666327, "entropy": 1.14076566696167, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3443200, "episodes_total": 8608, "training_iteration": 269, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-01-34", "timestamp": 1660255294, "time_this_iter_s": 29.06058406829834, "time_total_s": 13708.226022958755, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13708.226022958755, "timesteps_since_restore": 3443200, "iterations_since_restore": 269, "perf": {"cpu_util_percent": 32.897560975609764, "ram_util_percent": 58.64878048780488}}
+{"episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 601.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 300.96}, "custom_metrics": {"sparse_reward_mean": 208.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.12, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.36, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.66, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.57, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.89, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.97, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.33, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.33, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.33, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 576.0, 584.0, 587.0, 636.0, 564.0, 630.0, 587.0, 582.0, 639.0, 630.0, 587.0, 579.0, 639.0, 587.0, 636.0, 530.0, 636.0, 630.0, 633.0, 530.0, 633.0, 639.0, 636.0, 582.0, 587.0, 582.0, 627.0, 587.0, 576.0, 639.0, 590.0, 624.0, 627.0, 639.0, 636.0, 579.0, 630.0, 576.0, 630.0, 636.0, 582.0, 522.0, 639.0, 587.0, 582.0, 473.0, 633.0, 630.0, 584.0, 581.0, 636.0, 587.0, 579.0, 402.0, 627.0, 630.0, 630.0, 636.0, 627.0, 633.0, 636.0, 582.0, 627.0, 462.0, 630.0, 630.0, 633.0, 633.0, 561.0, 582.0, 627.0, 627.0, 633.0, 630.0, 636.0, 633.0, 558.0, 582.0, 579.0, 639.0, 636.0, 570.0, 582.0, 587.0, 639.0, 627.0, 584.0, 587.0, 579.0, 582.0, 624.0, 579.0, 636.0, 633.0, 633.0, 582.0, 627.0, 576.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 294.0, 282.0, 293.0, 291.0, 296.0, 291.0, 315.0, 321.0, 282.0, 282.0, 322.0, 308.0, 308.0, 279.0, 288.0, 294.0, 314.0, 325.0, 311.0, 319.0, 293.0, 294.0, 288.0, 291.0, 319.0, 320.0, 288.0, 299.0, 319.0, 317.0, 268.0, 262.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0, 258.0, 272.0, 314.0, 319.0, 325.0, 314.0, 321.0, 315.0, 283.0, 299.0, 293.0, 294.0, 288.0, 294.0, 316.0, 311.0, 290.0, 297.0, 285.0, 291.0, 322.0, 317.0, 291.0, 299.0, 319.0, 305.0, 313.0, 314.0, 324.0, 315.0, 314.0, 322.0, 288.0, 291.0, 308.0, 322.0, 285.0, 291.0, 321.0, 309.0, 321.0, 315.0, 294.0, 288.0, 266.0, 256.0, 322.0, 317.0, 295.0, 292.0, 296.0, 286.0, 229.0, 244.0, 331.0, 302.0, 316.0, 314.0, 287.0, 297.0, 296.0, 285.0, 327.0, 309.0, 291.0, 296.0, 284.0, 295.0, 202.0, 200.0, 308.0, 319.0, 311.0, 319.0, 322.0, 308.0, 316.0, 320.0, 306.0, 321.0, 317.0, 316.0, 314.0, 322.0, 294.0, 288.0, 316.0, 311.0, 242.0, 220.0, 313.0, 317.0, 324.0, 306.0, 322.0, 311.0, 321.0, 312.0, 289.0, 272.0, 286.0, 296.0, 317.0, 310.0, 313.0, 314.0, 324.0, 309.0, 313.0, 317.0, 312.0, 324.0, 311.0, 322.0, 276.0, 282.0, 293.0, 289.0, 293.0, 286.0, 316.0, 323.0, 305.0, 331.0, 291.0, 279.0, 290.0, 292.0, 292.0, 295.0, 317.0, 322.0, 303.0, 324.0, 298.0, 286.0, 293.0, 294.0, 293.0, 286.0, 290.0, 292.0, 308.0, 316.0, 278.0, 301.0, 314.0, 322.0, 312.0, 321.0, 316.0, 317.0, 285.0, 297.0, 313.0, 314.0, 280.0, 296.0, 288.0, 282.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0070340183284676, "mean_processing_ms": 0.26964921546752857, "mean_inference_ms": 1.5821965593270972}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6480000, "num_steps_sampled": 3456000, "sample_time_ms": 23540.277, "load_time_ms": 37.062, "grad_time_ms": 10283.314, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001998053165152669, "policy_loss": -0.00615869602188468, "vf_loss": 87.24394989013672, "vf_explained_var": 0.7729328274726868, "kl": 0.00186056864913553, "entropy": 1.1353095769882202, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3456000, "episodes_total": 8640, "training_iteration": 270, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-02-02", "timestamp": 1660255322, "time_this_iter_s": 27.856099128723145, "time_total_s": 13736.082122087479, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13736.082122087479, "timesteps_since_restore": 3456000, "iterations_since_restore": 270, "perf": {"cpu_util_percent": 34.1025641025641, "ram_util_percent": 58.69999999999998}}
+{"episode_reward_max": 639.0, "episode_reward_min": 436.0, "episode_reward_mean": 604.7, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 211.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 302.35}, "custom_metrics": {"sparse_reward_mean": 209.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 186.3, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.38, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.65, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.67, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.82, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.91, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.47, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.95, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.42, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.47, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.47, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 627.0, 636.0, 633.0, 636.0, 627.0, 633.0, 579.0, 582.0, 573.0, 633.0, 570.0, 624.0, 627.0, 639.0, 636.0, 633.0, 636.0, 630.0, 579.0, 576.0, 436.0, 639.0, 587.0, 636.0, 636.0, 639.0, 587.0, 636.0, 587.0, 639.0, 462.0, 630.0, 630.0, 633.0, 633.0, 561.0, 582.0, 627.0, 627.0, 633.0, 630.0, 636.0, 633.0, 558.0, 582.0, 579.0, 639.0, 636.0, 570.0, 582.0, 587.0, 639.0, 627.0, 584.0, 587.0, 579.0, 582.0, 624.0, 579.0, 636.0, 633.0, 633.0, 582.0, 627.0, 576.0, 570.0, 582.0, 576.0, 584.0, 587.0, 636.0, 564.0, 630.0, 587.0, 582.0, 639.0, 630.0, 587.0, 579.0, 639.0, 587.0, 636.0, 530.0, 636.0, 630.0, 633.0, 530.0, 633.0, 639.0, 636.0, 582.0, 587.0, 582.0, 627.0, 587.0, 576.0, 639.0, 590.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 284.0, 303.0, 315.0, 312.0, 314.0, 322.0, 311.0, 322.0, 319.0, 317.0, 310.0, 317.0, 324.0, 309.0, 289.0, 290.0, 294.0, 288.0, 284.0, 289.0, 303.0, 330.0, 277.0, 293.0, 316.0, 308.0, 321.0, 306.0, 314.0, 325.0, 319.0, 317.0, 314.0, 319.0, 331.0, 305.0, 321.0, 309.0, 293.0, 286.0, 288.0, 288.0, 211.0, 225.0, 312.0, 327.0, 293.0, 294.0, 314.0, 322.0, 317.0, 319.0, 314.0, 325.0, 302.0, 285.0, 314.0, 322.0, 293.0, 294.0, 317.0, 322.0, 242.0, 220.0, 313.0, 317.0, 324.0, 306.0, 322.0, 311.0, 321.0, 312.0, 289.0, 272.0, 286.0, 296.0, 317.0, 310.0, 313.0, 314.0, 324.0, 309.0, 313.0, 317.0, 312.0, 324.0, 311.0, 322.0, 276.0, 282.0, 293.0, 289.0, 293.0, 286.0, 316.0, 323.0, 305.0, 331.0, 291.0, 279.0, 290.0, 292.0, 292.0, 295.0, 317.0, 322.0, 303.0, 324.0, 298.0, 286.0, 293.0, 294.0, 293.0, 286.0, 290.0, 292.0, 308.0, 316.0, 278.0, 301.0, 314.0, 322.0, 312.0, 321.0, 316.0, 317.0, 285.0, 297.0, 313.0, 314.0, 280.0, 296.0, 288.0, 282.0, 293.0, 289.0, 294.0, 282.0, 293.0, 291.0, 296.0, 291.0, 315.0, 321.0, 282.0, 282.0, 322.0, 308.0, 308.0, 279.0, 288.0, 294.0, 314.0, 325.0, 311.0, 319.0, 293.0, 294.0, 288.0, 291.0, 319.0, 320.0, 288.0, 299.0, 319.0, 317.0, 268.0, 262.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0, 258.0, 272.0, 314.0, 319.0, 325.0, 314.0, 321.0, 315.0, 283.0, 299.0, 293.0, 294.0, 288.0, 294.0, 316.0, 311.0, 290.0, 297.0, 285.0, 291.0, 322.0, 317.0, 291.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0049654393783378, "mean_processing_ms": 0.26923341324594424, "mean_inference_ms": 1.5798214340992636}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6504000, "num_steps_sampled": 3468800, "sample_time_ms": 23269.747, "load_time_ms": 36.886, "grad_time_ms": 10264.548, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00183187669608742, "policy_loss": -0.006337564438581467, "vf_loss": 87.34710693359375, "vf_explained_var": 0.7639560103416443, "kl": 0.0018626012606546283, "entropy": 1.1305490732192993, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3468800, "episodes_total": 8672, "training_iteration": 271, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-02-32", "timestamp": 1660255352, "time_this_iter_s": 29.946385145187378, "time_total_s": 13766.028507232666, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13766.028507232666, "timesteps_since_restore": 3468800, "iterations_since_restore": 271, "perf": {"cpu_util_percent": 32.345238095238095, "ram_util_percent": 58.68095238095239}}
+{"episode_reward_max": 639.0, "episode_reward_min": 371.0, "episode_reward_mean": 604.34, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 302.17}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.14, "shaped_reward_min": 131, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.28, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.6, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.6, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.77, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.38, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.58, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.98, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.19, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.51, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.38, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.58, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.38, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.58, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 627.0, 636.0, 636.0, 633.0, 582.0, 582.0, 639.0, 639.0, 582.0, 639.0, 425.0, 587.0, 630.0, 636.0, 582.0, 636.0, 582.0, 371.0, 579.0, 582.0, 636.0, 636.0, 636.0, 636.0, 636.0, 627.0, 579.0, 639.0, 636.0, 582.0, 636.0, 582.0, 627.0, 576.0, 570.0, 582.0, 576.0, 584.0, 587.0, 636.0, 564.0, 630.0, 587.0, 582.0, 639.0, 630.0, 587.0, 579.0, 639.0, 587.0, 636.0, 530.0, 636.0, 630.0, 633.0, 530.0, 633.0, 639.0, 636.0, 582.0, 587.0, 582.0, 627.0, 587.0, 576.0, 639.0, 590.0, 582.0, 587.0, 627.0, 636.0, 633.0, 636.0, 627.0, 633.0, 579.0, 582.0, 573.0, 633.0, 570.0, 624.0, 627.0, 639.0, 636.0, 633.0, 636.0, 630.0, 579.0, 576.0, 436.0, 639.0, 587.0, 636.0, 636.0, 639.0, 587.0, 636.0, 587.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 322.0, 321.0, 306.0, 319.0, 317.0, 324.0, 312.0, 309.0, 324.0, 293.0, 289.0, 291.0, 291.0, 317.0, 322.0, 319.0, 320.0, 293.0, 289.0, 319.0, 320.0, 206.0, 219.0, 301.0, 286.0, 314.0, 316.0, 314.0, 322.0, 285.0, 297.0, 316.0, 320.0, 288.0, 294.0, 198.0, 173.0, 291.0, 288.0, 295.0, 287.0, 321.0, 315.0, 311.0, 325.0, 314.0, 322.0, 322.0, 314.0, 326.0, 310.0, 314.0, 313.0, 286.0, 293.0, 319.0, 320.0, 317.0, 319.0, 291.0, 291.0, 309.0, 327.0, 285.0, 297.0, 313.0, 314.0, 280.0, 296.0, 288.0, 282.0, 293.0, 289.0, 294.0, 282.0, 293.0, 291.0, 296.0, 291.0, 315.0, 321.0, 282.0, 282.0, 322.0, 308.0, 308.0, 279.0, 288.0, 294.0, 314.0, 325.0, 311.0, 319.0, 293.0, 294.0, 288.0, 291.0, 319.0, 320.0, 288.0, 299.0, 319.0, 317.0, 268.0, 262.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0, 258.0, 272.0, 314.0, 319.0, 325.0, 314.0, 321.0, 315.0, 283.0, 299.0, 293.0, 294.0, 288.0, 294.0, 316.0, 311.0, 290.0, 297.0, 285.0, 291.0, 322.0, 317.0, 291.0, 299.0, 294.0, 288.0, 284.0, 303.0, 315.0, 312.0, 314.0, 322.0, 311.0, 322.0, 319.0, 317.0, 310.0, 317.0, 324.0, 309.0, 289.0, 290.0, 294.0, 288.0, 284.0, 289.0, 303.0, 330.0, 277.0, 293.0, 316.0, 308.0, 321.0, 306.0, 314.0, 325.0, 319.0, 317.0, 314.0, 319.0, 331.0, 305.0, 321.0, 309.0, 293.0, 286.0, 288.0, 288.0, 211.0, 225.0, 312.0, 327.0, 293.0, 294.0, 314.0, 322.0, 317.0, 319.0, 314.0, 325.0, 302.0, 285.0, 314.0, 322.0, 293.0, 294.0, 317.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 1.002917566902111, "mean_processing_ms": 0.26882194776394586, "mean_inference_ms": 1.577540173061489}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6528000, "num_steps_sampled": 3481600, "sample_time_ms": 22780.285, "load_time_ms": 36.737, "grad_time_ms": 10254.225, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0052925958298146725, "policy_loss": -0.002937593497335911, "vf_loss": 88.02587890625, "vf_explained_var": 0.7725896835327148, "kl": 0.0019184405682608485, "entropy": 1.144766926765442, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3481600, "episodes_total": 8704, "training_iteration": 272, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-03-02", "timestamp": 1660255382, "time_this_iter_s": 30.47255301475525, "time_total_s": 13796.501060247421, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13796.501060247421, "timesteps_since_restore": 3481600, "iterations_since_restore": 272, "perf": {"cpu_util_percent": 32.334090909090904, "ram_util_percent": 58.665909090909096}}
+{"episode_reward_max": 639.0, "episode_reward_min": 371.0, "episode_reward_mean": 611.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 305.985}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.77, "shaped_reward_min": 131, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.15, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.89, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.49, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.97, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.55, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.72, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.91, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.69, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.91, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.91, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 636.0, 633.0, 636.0, 615.0, 636.0, 630.0, 636.0, 576.0, 633.0, 627.0, 639.0, 582.0, 633.0, 582.0, 633.0, 627.0, 630.0, 636.0, 582.0, 633.0, 627.0, 639.0, 639.0, 627.0, 633.0, 587.0, 576.0, 639.0, 590.0, 582.0, 587.0, 627.0, 636.0, 633.0, 636.0, 627.0, 633.0, 579.0, 582.0, 573.0, 633.0, 570.0, 624.0, 627.0, 639.0, 636.0, 633.0, 636.0, 630.0, 579.0, 576.0, 436.0, 639.0, 587.0, 636.0, 636.0, 639.0, 587.0, 636.0, 587.0, 639.0, 633.0, 627.0, 636.0, 636.0, 633.0, 582.0, 582.0, 639.0, 639.0, 582.0, 639.0, 425.0, 587.0, 630.0, 636.0, 582.0, 636.0, 582.0, 371.0, 579.0, 582.0, 636.0, 636.0, 636.0, 636.0, 636.0, 627.0, 579.0, 639.0, 636.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 298.0, 318.0, 312.0, 318.0, 312.0, 324.0, 309.0, 319.0, 317.0, 314.0, 322.0, 324.0, 312.0, 314.0, 322.0, 316.0, 317.0, 322.0, 314.0, 305.0, 310.0, 325.0, 311.0, 321.0, 309.0, 319.0, 317.0, 296.0, 280.0, 317.0, 316.0, 321.0, 306.0, 324.0, 315.0, 296.0, 286.0, 321.0, 312.0, 288.0, 294.0, 318.0, 315.0, 316.0, 311.0, 320.0, 310.0, 317.0, 319.0, 294.0, 288.0, 318.0, 315.0, 316.0, 311.0, 322.0, 317.0, 327.0, 312.0, 316.0, 311.0, 308.0, 325.0, 290.0, 297.0, 285.0, 291.0, 322.0, 317.0, 291.0, 299.0, 294.0, 288.0, 284.0, 303.0, 315.0, 312.0, 314.0, 322.0, 311.0, 322.0, 319.0, 317.0, 310.0, 317.0, 324.0, 309.0, 289.0, 290.0, 294.0, 288.0, 284.0, 289.0, 303.0, 330.0, 277.0, 293.0, 316.0, 308.0, 321.0, 306.0, 314.0, 325.0, 319.0, 317.0, 314.0, 319.0, 331.0, 305.0, 321.0, 309.0, 293.0, 286.0, 288.0, 288.0, 211.0, 225.0, 312.0, 327.0, 293.0, 294.0, 314.0, 322.0, 317.0, 319.0, 314.0, 325.0, 302.0, 285.0, 314.0, 322.0, 293.0, 294.0, 317.0, 322.0, 311.0, 322.0, 321.0, 306.0, 319.0, 317.0, 324.0, 312.0, 309.0, 324.0, 293.0, 289.0, 291.0, 291.0, 317.0, 322.0, 319.0, 320.0, 293.0, 289.0, 319.0, 320.0, 206.0, 219.0, 301.0, 286.0, 314.0, 316.0, 314.0, 322.0, 285.0, 297.0, 316.0, 320.0, 288.0, 294.0, 198.0, 173.0, 291.0, 288.0, 295.0, 287.0, 321.0, 315.0, 311.0, 325.0, 314.0, 322.0, 322.0, 314.0, 326.0, 310.0, 314.0, 313.0, 286.0, 293.0, 319.0, 320.0, 317.0, 319.0, 291.0, 291.0, 309.0, 327.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0008984560803753, "mean_processing_ms": 0.26841904280245515, "mean_inference_ms": 1.5754578335271856}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6552000, "num_steps_sampled": 3494400, "sample_time_ms": 22479.799, "load_time_ms": 36.968, "grad_time_ms": 10345.078, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.007351151201874018, "policy_loss": -0.0004584121925290674, "vf_loss": 83.76141357421875, "vf_explained_var": 0.7616392970085144, "kl": 0.0025154289323836565, "entropy": 1.1331415176391602, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3494400, "episodes_total": 8736, "training_iteration": 273, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-03-34", "timestamp": 1660255414, "time_this_iter_s": 31.54677987098694, "time_total_s": 13828.047840118408, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13828.047840118408, "timesteps_since_restore": 3494400, "iterations_since_restore": 273, "perf": {"cpu_util_percent": 36.990909090909085, "ram_util_percent": 59.28409090909091}}
+{"episode_reward_max": 639.0, "episode_reward_min": 371.0, "episode_reward_mean": 613.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 306.765}, "custom_metrics": {"sparse_reward_mean": 212.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.73, "shaped_reward_min": 131, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.81, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.5, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.98, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.34, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.74, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 636.0, 630.0, 630.0, 582.0, 522.0, 587.0, 630.0, 627.0, 633.0, 633.0, 630.0, 587.0, 582.0, 633.0, 636.0, 636.0, 621.0, 627.0, 587.0, 573.0, 630.0, 639.0, 630.0, 630.0, 636.0, 567.0, 579.0, 582.0, 579.0, 633.0, 639.0, 587.0, 636.0, 587.0, 639.0, 633.0, 627.0, 636.0, 636.0, 633.0, 582.0, 582.0, 639.0, 639.0, 582.0, 639.0, 425.0, 587.0, 630.0, 636.0, 582.0, 636.0, 582.0, 371.0, 579.0, 582.0, 636.0, 636.0, 636.0, 636.0, 636.0, 627.0, 579.0, 639.0, 636.0, 582.0, 636.0, 587.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 636.0, 633.0, 636.0, 615.0, 636.0, 630.0, 636.0, 576.0, 633.0, 627.0, 639.0, 582.0, 633.0, 582.0, 633.0, 627.0, 630.0, 636.0, 582.0, 633.0, 627.0, 639.0, 639.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 320.0, 316.0, 314.0, 316.0, 313.0, 317.0, 286.0, 296.0, 260.0, 262.0, 293.0, 294.0, 311.0, 319.0, 321.0, 306.0, 318.0, 315.0, 314.0, 319.0, 319.0, 311.0, 301.0, 286.0, 286.0, 296.0, 311.0, 322.0, 319.0, 317.0, 322.0, 314.0, 305.0, 316.0, 316.0, 311.0, 298.0, 289.0, 279.0, 294.0, 324.0, 306.0, 321.0, 318.0, 319.0, 311.0, 316.0, 314.0, 321.0, 315.0, 281.0, 286.0, 282.0, 297.0, 280.0, 302.0, 291.0, 288.0, 303.0, 330.0, 322.0, 317.0, 302.0, 285.0, 314.0, 322.0, 293.0, 294.0, 317.0, 322.0, 311.0, 322.0, 321.0, 306.0, 319.0, 317.0, 324.0, 312.0, 309.0, 324.0, 293.0, 289.0, 291.0, 291.0, 317.0, 322.0, 319.0, 320.0, 293.0, 289.0, 319.0, 320.0, 206.0, 219.0, 301.0, 286.0, 314.0, 316.0, 314.0, 322.0, 285.0, 297.0, 316.0, 320.0, 288.0, 294.0, 198.0, 173.0, 291.0, 288.0, 295.0, 287.0, 321.0, 315.0, 311.0, 325.0, 314.0, 322.0, 322.0, 314.0, 326.0, 310.0, 314.0, 313.0, 286.0, 293.0, 319.0, 320.0, 317.0, 319.0, 291.0, 291.0, 309.0, 327.0, 289.0, 298.0, 318.0, 312.0, 318.0, 312.0, 324.0, 309.0, 319.0, 317.0, 314.0, 322.0, 324.0, 312.0, 314.0, 322.0, 316.0, 317.0, 322.0, 314.0, 305.0, 310.0, 325.0, 311.0, 321.0, 309.0, 319.0, 317.0, 296.0, 280.0, 317.0, 316.0, 321.0, 306.0, 324.0, 315.0, 296.0, 286.0, 321.0, 312.0, 288.0, 294.0, 318.0, 315.0, 316.0, 311.0, 320.0, 310.0, 317.0, 319.0, 294.0, 288.0, 318.0, 315.0, 316.0, 311.0, 322.0, 317.0, 327.0, 312.0, 316.0, 311.0, 308.0, 325.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9989007894762186, "mean_processing_ms": 0.2680204764201923, "mean_inference_ms": 1.5734861760997552}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6576000, "num_steps_sampled": 3507200, "sample_time_ms": 22242.32, "load_time_ms": 36.671, "grad_time_ms": 10040.843, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004968150169588625, "policy_loss": -0.007466705050319433, "vf_loss": 85.29949188232422, "vf_explained_var": 0.7543535232543945, "kl": 0.0017724571516737342, "entropy": 1.1328660249710083, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3507200, "episodes_total": 8768, "training_iteration": 274, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-04-05", "timestamp": 1660255445, "time_this_iter_s": 30.891488075256348, "time_total_s": 13858.939328193665, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13858.939328193665, "timesteps_since_restore": 3507200, "iterations_since_restore": 274, "perf": {"cpu_util_percent": 32.32954545454545, "ram_util_percent": 58.86590909090909}}
+{"episode_reward_max": 639.0, "episode_reward_min": 362.0, "episode_reward_mean": 610.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 180.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 305.345}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.09, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.19, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.76, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.45, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.88, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.74, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.31, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.72, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.41, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.65, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.31, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.72, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.31, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.72, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 636.0, 522.0, 390.0, 582.0, 516.0, 630.0, 627.0, 587.0, 633.0, 582.0, 636.0, 582.0, 447.0, 630.0, 639.0, 636.0, 636.0, 633.0, 362.0, 627.0, 636.0, 630.0, 639.0, 579.0, 621.0, 639.0, 639.0, 587.0, 636.0, 587.0, 630.0, 639.0, 636.0, 582.0, 636.0, 587.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 636.0, 633.0, 636.0, 615.0, 636.0, 630.0, 636.0, 576.0, 633.0, 627.0, 639.0, 582.0, 633.0, 582.0, 633.0, 627.0, 630.0, 636.0, 582.0, 633.0, 627.0, 639.0, 639.0, 627.0, 633.0, 633.0, 636.0, 630.0, 630.0, 582.0, 522.0, 587.0, 630.0, 627.0, 633.0, 633.0, 630.0, 587.0, 582.0, 633.0, 636.0, 636.0, 621.0, 627.0, 587.0, 573.0, 630.0, 639.0, 630.0, 630.0, 636.0, 567.0, 579.0, 582.0, 579.0, 633.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 314.0, 322.0, 260.0, 262.0, 197.0, 193.0, 291.0, 291.0, 256.0, 260.0, 314.0, 316.0, 308.0, 319.0, 286.0, 301.0, 311.0, 322.0, 289.0, 293.0, 322.0, 314.0, 288.0, 294.0, 221.0, 226.0, 318.0, 312.0, 317.0, 322.0, 317.0, 319.0, 318.0, 318.0, 309.0, 324.0, 182.0, 180.0, 316.0, 311.0, 316.0, 320.0, 319.0, 311.0, 319.0, 320.0, 294.0, 285.0, 301.0, 320.0, 327.0, 312.0, 314.0, 325.0, 281.0, 306.0, 316.0, 320.0, 301.0, 286.0, 316.0, 314.0, 319.0, 320.0, 317.0, 319.0, 291.0, 291.0, 309.0, 327.0, 289.0, 298.0, 318.0, 312.0, 318.0, 312.0, 324.0, 309.0, 319.0, 317.0, 314.0, 322.0, 324.0, 312.0, 314.0, 322.0, 316.0, 317.0, 322.0, 314.0, 305.0, 310.0, 325.0, 311.0, 321.0, 309.0, 319.0, 317.0, 296.0, 280.0, 317.0, 316.0, 321.0, 306.0, 324.0, 315.0, 296.0, 286.0, 321.0, 312.0, 288.0, 294.0, 318.0, 315.0, 316.0, 311.0, 320.0, 310.0, 317.0, 319.0, 294.0, 288.0, 318.0, 315.0, 316.0, 311.0, 322.0, 317.0, 327.0, 312.0, 316.0, 311.0, 308.0, 325.0, 316.0, 317.0, 320.0, 316.0, 314.0, 316.0, 313.0, 317.0, 286.0, 296.0, 260.0, 262.0, 293.0, 294.0, 311.0, 319.0, 321.0, 306.0, 318.0, 315.0, 314.0, 319.0, 319.0, 311.0, 301.0, 286.0, 286.0, 296.0, 311.0, 322.0, 319.0, 317.0, 322.0, 314.0, 305.0, 316.0, 316.0, 311.0, 298.0, 289.0, 279.0, 294.0, 324.0, 306.0, 321.0, 318.0, 319.0, 311.0, 316.0, 314.0, 321.0, 315.0, 281.0, 286.0, 282.0, 297.0, 280.0, 302.0, 291.0, 288.0, 303.0, 330.0, 322.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9969205996800542, "mean_processing_ms": 0.26762558358937055, "mean_inference_ms": 1.571468951953135}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6600000, "num_steps_sampled": 3520000, "sample_time_ms": 21659.121, "load_time_ms": 36.576, "grad_time_ms": 9832.58, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005525531247258186, "policy_loss": -0.002899330807849765, "vf_loss": 89.90011596679688, "vf_explained_var": 0.7821382880210876, "kl": 0.002336545381695032, "entropy": 1.130285382270813, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3520000, "episodes_total": 8800, "training_iteration": 275, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-04-34", "timestamp": 1660255474, "time_this_iter_s": 29.723124265670776, "time_total_s": 13888.662452459335, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13888.662452459335, "timesteps_since_restore": 3520000, "iterations_since_restore": 275, "perf": {"cpu_util_percent": 34.31666666666666, "ram_util_percent": 58.82619047619047}}
+{"episode_reward_max": 639.0, "episode_reward_min": 362.0, "episode_reward_mean": 609.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 180.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 304.695}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 186.59, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.44, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.64, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.65, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.77, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.84, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.43, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.5, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.21, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.79, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.31, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.43, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.5, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.43, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.5, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 630.0, 639.0, 630.0, 587.0, 582.0, 639.0, 636.0, 639.0, 582.0, 630.0, 627.0, 627.0, 633.0, 639.0, 570.0, 579.0, 633.0, 627.0, 627.0, 636.0, 636.0, 630.0, 633.0, 633.0, 621.0, 573.0, 636.0, 627.0, 530.0, 627.0, 636.0, 639.0, 639.0, 627.0, 633.0, 633.0, 636.0, 630.0, 630.0, 582.0, 522.0, 587.0, 630.0, 627.0, 633.0, 633.0, 630.0, 587.0, 582.0, 633.0, 636.0, 636.0, 621.0, 627.0, 587.0, 573.0, 630.0, 639.0, 630.0, 630.0, 636.0, 567.0, 579.0, 582.0, 579.0, 633.0, 639.0, 633.0, 636.0, 522.0, 390.0, 582.0, 516.0, 630.0, 627.0, 587.0, 633.0, 582.0, 636.0, 582.0, 447.0, 630.0, 639.0, 636.0, 636.0, 633.0, 362.0, 627.0, 636.0, 630.0, 639.0, 579.0, 621.0, 639.0, 639.0, 587.0, 636.0, 587.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [325.0, 314.0, 309.0, 321.0, 315.0, 324.0, 310.0, 320.0, 297.0, 290.0, 284.0, 298.0, 322.0, 317.0, 319.0, 317.0, 319.0, 320.0, 283.0, 299.0, 314.0, 316.0, 314.0, 313.0, 308.0, 319.0, 322.0, 311.0, 321.0, 318.0, 287.0, 283.0, 283.0, 296.0, 319.0, 314.0, 311.0, 316.0, 314.0, 313.0, 322.0, 314.0, 317.0, 319.0, 318.0, 312.0, 309.0, 324.0, 319.0, 314.0, 297.0, 324.0, 279.0, 294.0, 316.0, 320.0, 318.0, 309.0, 251.0, 279.0, 317.0, 310.0, 319.0, 317.0, 322.0, 317.0, 327.0, 312.0, 316.0, 311.0, 308.0, 325.0, 316.0, 317.0, 320.0, 316.0, 314.0, 316.0, 313.0, 317.0, 286.0, 296.0, 260.0, 262.0, 293.0, 294.0, 311.0, 319.0, 321.0, 306.0, 318.0, 315.0, 314.0, 319.0, 319.0, 311.0, 301.0, 286.0, 286.0, 296.0, 311.0, 322.0, 319.0, 317.0, 322.0, 314.0, 305.0, 316.0, 316.0, 311.0, 298.0, 289.0, 279.0, 294.0, 324.0, 306.0, 321.0, 318.0, 319.0, 311.0, 316.0, 314.0, 321.0, 315.0, 281.0, 286.0, 282.0, 297.0, 280.0, 302.0, 291.0, 288.0, 303.0, 330.0, 322.0, 317.0, 314.0, 319.0, 314.0, 322.0, 260.0, 262.0, 197.0, 193.0, 291.0, 291.0, 256.0, 260.0, 314.0, 316.0, 308.0, 319.0, 286.0, 301.0, 311.0, 322.0, 289.0, 293.0, 322.0, 314.0, 288.0, 294.0, 221.0, 226.0, 318.0, 312.0, 317.0, 322.0, 317.0, 319.0, 318.0, 318.0, 309.0, 324.0, 182.0, 180.0, 316.0, 311.0, 316.0, 320.0, 319.0, 311.0, 319.0, 320.0, 294.0, 285.0, 301.0, 320.0, 327.0, 312.0, 314.0, 325.0, 281.0, 306.0, 316.0, 320.0, 301.0, 286.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9949570051845532, "mean_processing_ms": 0.2672332587743119, "mean_inference_ms": 1.569562529998314}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6624000, "num_steps_sampled": 3532800, "sample_time_ms": 21368.312, "load_time_ms": 36.431, "grad_time_ms": 9398.907, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0024656467139720917, "policy_loss": -0.005234332289546728, "vf_loss": 82.6478500366211, "vf_explained_var": 0.7628920078277588, "kl": 0.001980842323973775, "entropy": 1.1296080350875854, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3532800, "episodes_total": 8832, "training_iteration": 276, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-05-05", "timestamp": 1660255505, "time_this_iter_s": 30.375401973724365, "time_total_s": 13919.03785443306, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13919.03785443306, "timesteps_since_restore": 3532800, "iterations_since_restore": 276, "perf": {"cpu_util_percent": 33.07209302325582, "ram_util_percent": 58.767441860465105}}
+{"episode_reward_max": 639.0, "episode_reward_min": 362.0, "episode_reward_mean": 610.72, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 180.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 305.36}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.12, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.44, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.89, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.9, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.76, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.85, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.17, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.41, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.51, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 639.0, 636.0, 633.0, 633.0, 636.0, 627.0, 627.0, 587.0, 636.0, 552.0, 636.0, 636.0, 636.0, 639.0, 636.0, 587.0, 636.0, 621.0, 630.0, 636.0, 633.0, 527.0, 636.0, 633.0, 630.0, 587.0, 630.0, 627.0, 582.0, 630.0, 639.0, 582.0, 579.0, 633.0, 639.0, 633.0, 636.0, 522.0, 390.0, 582.0, 516.0, 630.0, 627.0, 587.0, 633.0, 582.0, 636.0, 582.0, 447.0, 630.0, 639.0, 636.0, 636.0, 633.0, 362.0, 627.0, 636.0, 630.0, 639.0, 579.0, 621.0, 639.0, 639.0, 587.0, 636.0, 587.0, 630.0, 639.0, 630.0, 639.0, 630.0, 587.0, 582.0, 639.0, 636.0, 639.0, 582.0, 630.0, 627.0, 627.0, 633.0, 639.0, 570.0, 579.0, 633.0, 627.0, 627.0, 636.0, 636.0, 630.0, 633.0, 633.0, 621.0, 573.0, 636.0, 627.0, 530.0, 627.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 297.0, 319.0, 320.0, 321.0, 315.0, 311.0, 322.0, 317.0, 316.0, 316.0, 320.0, 323.0, 304.0, 316.0, 311.0, 291.0, 296.0, 324.0, 312.0, 274.0, 278.0, 324.0, 312.0, 325.0, 311.0, 311.0, 325.0, 314.0, 325.0, 323.0, 313.0, 293.0, 294.0, 309.0, 327.0, 310.0, 311.0, 318.0, 312.0, 322.0, 314.0, 304.0, 329.0, 266.0, 261.0, 329.0, 307.0, 311.0, 322.0, 316.0, 314.0, 294.0, 293.0, 316.0, 314.0, 314.0, 313.0, 296.0, 286.0, 314.0, 316.0, 322.0, 317.0, 280.0, 302.0, 291.0, 288.0, 303.0, 330.0, 322.0, 317.0, 314.0, 319.0, 314.0, 322.0, 260.0, 262.0, 197.0, 193.0, 291.0, 291.0, 256.0, 260.0, 314.0, 316.0, 308.0, 319.0, 286.0, 301.0, 311.0, 322.0, 289.0, 293.0, 322.0, 314.0, 288.0, 294.0, 221.0, 226.0, 318.0, 312.0, 317.0, 322.0, 317.0, 319.0, 318.0, 318.0, 309.0, 324.0, 182.0, 180.0, 316.0, 311.0, 316.0, 320.0, 319.0, 311.0, 319.0, 320.0, 294.0, 285.0, 301.0, 320.0, 327.0, 312.0, 314.0, 325.0, 281.0, 306.0, 316.0, 320.0, 301.0, 286.0, 316.0, 314.0, 325.0, 314.0, 309.0, 321.0, 315.0, 324.0, 310.0, 320.0, 297.0, 290.0, 284.0, 298.0, 322.0, 317.0, 319.0, 317.0, 319.0, 320.0, 283.0, 299.0, 314.0, 316.0, 314.0, 313.0, 308.0, 319.0, 322.0, 311.0, 321.0, 318.0, 287.0, 283.0, 283.0, 296.0, 319.0, 314.0, 311.0, 316.0, 314.0, 313.0, 322.0, 314.0, 317.0, 319.0, 318.0, 312.0, 309.0, 324.0, 319.0, 314.0, 297.0, 324.0, 279.0, 294.0, 316.0, 320.0, 318.0, 309.0, 251.0, 279.0, 317.0, 310.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.993008865355136, "mean_processing_ms": 0.26684329670316986, "mean_inference_ms": 1.5676868637222179}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6648000, "num_steps_sampled": 3545600, "sample_time_ms": 21068.026, "load_time_ms": 36.686, "grad_time_ms": 9153.242, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004478854592889547, "policy_loss": -0.0033003378193825483, "vf_loss": 83.42573547363281, "vf_explained_var": 0.7645106911659241, "kl": 0.002364285057410598, "entropy": 1.1267634630203247, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3545600, "episodes_total": 8864, "training_iteration": 277, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-05-35", "timestamp": 1660255535, "time_this_iter_s": 29.876389980316162, "time_total_s": 13948.914244413376, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13948.914244413376, "timesteps_since_restore": 3545600, "iterations_since_restore": 277, "perf": {"cpu_util_percent": 33.73571428571429, "ram_util_percent": 58.82857142857141}}
+{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 617.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 243.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.89}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.38, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.68, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 19.09, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.87, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.09, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.92, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.85, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.3, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.35, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.47, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.87, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.89, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.25, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.18, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.47, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.87, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.47, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.87, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 587.0, 627.0, 576.0, 544.0, 636.0, 630.0, 633.0, 633.0, 615.0, 582.0, 636.0, 639.0, 636.0, 633.0, 579.0, 630.0, 633.0, 633.0, 636.0, 639.0, 579.0, 636.0, 630.0, 639.0, 633.0, 582.0, 630.0, 627.0, 516.0, 587.0, 639.0, 587.0, 636.0, 587.0, 630.0, 639.0, 630.0, 639.0, 630.0, 587.0, 582.0, 639.0, 636.0, 639.0, 582.0, 630.0, 627.0, 627.0, 633.0, 639.0, 570.0, 579.0, 633.0, 627.0, 627.0, 636.0, 636.0, 630.0, 633.0, 633.0, 621.0, 573.0, 636.0, 627.0, 530.0, 627.0, 636.0, 584.0, 639.0, 636.0, 633.0, 633.0, 636.0, 627.0, 627.0, 587.0, 636.0, 552.0, 636.0, 636.0, 636.0, 639.0, 636.0, 587.0, 636.0, 621.0, 630.0, 636.0, 633.0, 527.0, 636.0, 633.0, 630.0, 587.0, 630.0, 627.0, 582.0, 630.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 291.0, 296.0, 321.0, 306.0, 288.0, 288.0, 266.0, 278.0, 320.0, 316.0, 316.0, 314.0, 319.0, 314.0, 316.0, 317.0, 310.0, 305.0, 298.0, 284.0, 316.0, 320.0, 327.0, 312.0, 319.0, 317.0, 319.0, 314.0, 282.0, 297.0, 310.0, 320.0, 316.0, 317.0, 322.0, 311.0, 308.0, 328.0, 325.0, 314.0, 296.0, 283.0, 314.0, 322.0, 320.0, 310.0, 317.0, 322.0, 308.0, 325.0, 290.0, 292.0, 313.0, 317.0, 308.0, 319.0, 243.0, 273.0, 291.0, 296.0, 320.0, 319.0, 281.0, 306.0, 316.0, 320.0, 301.0, 286.0, 316.0, 314.0, 325.0, 314.0, 309.0, 321.0, 315.0, 324.0, 310.0, 320.0, 297.0, 290.0, 284.0, 298.0, 322.0, 317.0, 319.0, 317.0, 319.0, 320.0, 283.0, 299.0, 314.0, 316.0, 314.0, 313.0, 308.0, 319.0, 322.0, 311.0, 321.0, 318.0, 287.0, 283.0, 283.0, 296.0, 319.0, 314.0, 311.0, 316.0, 314.0, 313.0, 322.0, 314.0, 317.0, 319.0, 318.0, 312.0, 309.0, 324.0, 319.0, 314.0, 297.0, 324.0, 279.0, 294.0, 316.0, 320.0, 318.0, 309.0, 251.0, 279.0, 317.0, 310.0, 319.0, 317.0, 287.0, 297.0, 319.0, 320.0, 321.0, 315.0, 311.0, 322.0, 317.0, 316.0, 316.0, 320.0, 323.0, 304.0, 316.0, 311.0, 291.0, 296.0, 324.0, 312.0, 274.0, 278.0, 324.0, 312.0, 325.0, 311.0, 311.0, 325.0, 314.0, 325.0, 323.0, 313.0, 293.0, 294.0, 309.0, 327.0, 310.0, 311.0, 318.0, 312.0, 322.0, 314.0, 304.0, 329.0, 266.0, 261.0, 329.0, 307.0, 311.0, 322.0, 316.0, 314.0, 294.0, 293.0, 316.0, 314.0, 314.0, 313.0, 296.0, 286.0, 314.0, 316.0, 322.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9910719747703968, "mean_processing_ms": 0.26645500095607105, "mean_inference_ms": 1.5658690640351203}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6672000, "num_steps_sampled": 3558400, "sample_time_ms": 20841.044, "load_time_ms": 37.005, "grad_time_ms": 9278.753, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004964211490005255, "policy_loss": -0.0031354122329503298, "vf_loss": 86.61837768554688, "vf_explained_var": 0.7653247714042664, "kl": 0.0020841285586357117, "entropy": 1.1244021654129028, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3558400, "episodes_total": 8896, "training_iteration": 278, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-06-07", "timestamp": 1660255567, "time_this_iter_s": 32.07894992828369, "time_total_s": 13980.99319434166, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13980.99319434166, "timesteps_since_restore": 3558400, "iterations_since_restore": 278, "perf": {"cpu_util_percent": 33.79111111111111, "ram_util_percent": 58.81111111111109}}
+{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 617.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 243.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.905}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.41, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.73, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.95, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.9, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.11, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.86, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.76, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.29, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.6, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.75, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.26, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.22, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.6, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.75, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.6, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.75, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 567.0, 627.0, 633.0, 584.0, 639.0, 582.0, 633.0, 636.0, 579.0, 636.0, 627.0, 630.0, 582.0, 633.0, 639.0, 627.0, 582.0, 630.0, 633.0, 633.0, 636.0, 578.0, 633.0, 633.0, 639.0, 633.0, 630.0, 587.0, 633.0, 639.0, 627.0, 530.0, 627.0, 636.0, 584.0, 639.0, 636.0, 633.0, 633.0, 636.0, 627.0, 627.0, 587.0, 636.0, 552.0, 636.0, 636.0, 636.0, 639.0, 636.0, 587.0, 636.0, 621.0, 630.0, 636.0, 633.0, 527.0, 636.0, 633.0, 630.0, 587.0, 630.0, 627.0, 582.0, 630.0, 639.0, 633.0, 587.0, 627.0, 576.0, 544.0, 636.0, 630.0, 633.0, 633.0, 615.0, 582.0, 636.0, 639.0, 636.0, 633.0, 579.0, 630.0, 633.0, 633.0, 636.0, 639.0, 579.0, 636.0, 630.0, 639.0, 633.0, 582.0, 630.0, 627.0, 516.0, 587.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [307.0, 326.0, 304.0, 326.0, 293.0, 274.0, 305.0, 322.0, 319.0, 314.0, 293.0, 291.0, 319.0, 320.0, 292.0, 290.0, 324.0, 309.0, 319.0, 317.0, 293.0, 286.0, 320.0, 316.0, 306.0, 321.0, 319.0, 311.0, 291.0, 291.0, 309.0, 324.0, 325.0, 314.0, 314.0, 313.0, 301.0, 281.0, 306.0, 324.0, 306.0, 327.0, 319.0, 314.0, 324.0, 312.0, 287.0, 291.0, 319.0, 314.0, 319.0, 314.0, 324.0, 315.0, 317.0, 316.0, 311.0, 319.0, 305.0, 282.0, 315.0, 318.0, 322.0, 317.0, 318.0, 309.0, 251.0, 279.0, 317.0, 310.0, 319.0, 317.0, 287.0, 297.0, 319.0, 320.0, 321.0, 315.0, 311.0, 322.0, 317.0, 316.0, 316.0, 320.0, 323.0, 304.0, 316.0, 311.0, 291.0, 296.0, 324.0, 312.0, 274.0, 278.0, 324.0, 312.0, 325.0, 311.0, 311.0, 325.0, 314.0, 325.0, 323.0, 313.0, 293.0, 294.0, 309.0, 327.0, 310.0, 311.0, 318.0, 312.0, 322.0, 314.0, 304.0, 329.0, 266.0, 261.0, 329.0, 307.0, 311.0, 322.0, 316.0, 314.0, 294.0, 293.0, 316.0, 314.0, 314.0, 313.0, 296.0, 286.0, 314.0, 316.0, 322.0, 317.0, 319.0, 314.0, 291.0, 296.0, 321.0, 306.0, 288.0, 288.0, 266.0, 278.0, 320.0, 316.0, 316.0, 314.0, 319.0, 314.0, 316.0, 317.0, 310.0, 305.0, 298.0, 284.0, 316.0, 320.0, 327.0, 312.0, 319.0, 317.0, 319.0, 314.0, 282.0, 297.0, 310.0, 320.0, 316.0, 317.0, 322.0, 311.0, 308.0, 328.0, 325.0, 314.0, 296.0, 283.0, 314.0, 322.0, 320.0, 310.0, 317.0, 322.0, 308.0, 325.0, 290.0, 292.0, 313.0, 317.0, 308.0, 319.0, 243.0, 273.0, 291.0, 296.0, 320.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9891622022532315, "mean_processing_ms": 0.26607283169040735, "mean_inference_ms": 1.56436366595401}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6696000, "num_steps_sampled": 3571200, "sample_time_ms": 21399.6, "load_time_ms": 36.9, "grad_time_ms": 9533.999, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006114859133958817, "policy_loss": -0.007230747956782579, "vf_loss": 84.06954956054688, "vf_explained_var": 0.7658140063285828, "kl": 0.0017542889108881354, "entropy": 1.129442572593689, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3571200, "episodes_total": 8928, "training_iteration": 279, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-06-44", "timestamp": 1660255604, "time_this_iter_s": 37.19960618019104, "time_total_s": 14018.19280052185, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14018.19280052185, "timesteps_since_restore": 3571200, "iterations_since_restore": 279, "perf": {"cpu_util_percent": 33.281132075471696, "ram_util_percent": 58.9301886792453}}
+{"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 612.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 146.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 306.095}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 187.79, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.72, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.89, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.73, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.65, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.29, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.29, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.6, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.56, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.99, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 18, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.59, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.66, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 14, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.6, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.56, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.6, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.56, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 639.0, 630.0, 633.0, 633.0, 639.0, 639.0, 582.0, 294.0, 636.0, 587.0, 579.0, 636.0, 450.0, 579.0, 573.0, 582.0, 582.0, 582.0, 630.0, 636.0, 636.0, 636.0, 579.0, 561.0, 633.0, 636.0, 630.0, 630.0, 630.0, 639.0, 630.0, 627.0, 582.0, 630.0, 639.0, 633.0, 587.0, 627.0, 576.0, 544.0, 636.0, 630.0, 633.0, 633.0, 615.0, 582.0, 636.0, 639.0, 636.0, 633.0, 579.0, 630.0, 633.0, 633.0, 636.0, 639.0, 579.0, 636.0, 630.0, 639.0, 633.0, 582.0, 630.0, 627.0, 516.0, 587.0, 639.0, 633.0, 630.0, 567.0, 627.0, 633.0, 584.0, 639.0, 582.0, 633.0, 636.0, 579.0, 636.0, 627.0, 630.0, 582.0, 633.0, 639.0, 627.0, 582.0, 630.0, 633.0, 633.0, 636.0, 578.0, 633.0, 633.0, 639.0, 633.0, 630.0, 587.0, 633.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 319.0, 324.0, 315.0, 311.0, 319.0, 308.0, 325.0, 311.0, 322.0, 319.0, 320.0, 319.0, 320.0, 286.0, 296.0, 148.0, 146.0, 321.0, 315.0, 294.0, 293.0, 288.0, 291.0, 323.0, 313.0, 224.0, 226.0, 296.0, 283.0, 290.0, 283.0, 289.0, 293.0, 290.0, 292.0, 293.0, 289.0, 316.0, 314.0, 314.0, 322.0, 324.0, 312.0, 319.0, 317.0, 290.0, 289.0, 279.0, 282.0, 313.0, 320.0, 319.0, 317.0, 316.0, 314.0, 311.0, 319.0, 317.0, 313.0, 322.0, 317.0, 311.0, 319.0, 314.0, 313.0, 296.0, 286.0, 314.0, 316.0, 322.0, 317.0, 319.0, 314.0, 291.0, 296.0, 321.0, 306.0, 288.0, 288.0, 266.0, 278.0, 320.0, 316.0, 316.0, 314.0, 319.0, 314.0, 316.0, 317.0, 310.0, 305.0, 298.0, 284.0, 316.0, 320.0, 327.0, 312.0, 319.0, 317.0, 319.0, 314.0, 282.0, 297.0, 310.0, 320.0, 316.0, 317.0, 322.0, 311.0, 308.0, 328.0, 325.0, 314.0, 296.0, 283.0, 314.0, 322.0, 320.0, 310.0, 317.0, 322.0, 308.0, 325.0, 290.0, 292.0, 313.0, 317.0, 308.0, 319.0, 243.0, 273.0, 291.0, 296.0, 320.0, 319.0, 307.0, 326.0, 304.0, 326.0, 293.0, 274.0, 305.0, 322.0, 319.0, 314.0, 293.0, 291.0, 319.0, 320.0, 292.0, 290.0, 324.0, 309.0, 319.0, 317.0, 293.0, 286.0, 320.0, 316.0, 306.0, 321.0, 319.0, 311.0, 291.0, 291.0, 309.0, 324.0, 325.0, 314.0, 314.0, 313.0, 301.0, 281.0, 306.0, 324.0, 306.0, 327.0, 319.0, 314.0, 324.0, 312.0, 287.0, 291.0, 319.0, 314.0, 319.0, 314.0, 324.0, 315.0, 317.0, 316.0, 311.0, 319.0, 305.0, 282.0, 315.0, 318.0, 322.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.987279141463972, "mean_processing_ms": 0.26569893386655014, "mean_inference_ms": 1.5631172104354278}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6720000, "num_steps_sampled": 3584000, "sample_time_ms": 21992.586, "load_time_ms": 36.83, "grad_time_ms": 9822.342, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033407146111130714, "policy_loss": -0.005221154540777206, "vf_loss": 91.33563232421875, "vf_explained_var": 0.7713200449943542, "kl": 0.001954694977030158, "entropy": 1.143385887145996, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3584000, "episodes_total": 8960, "training_iteration": 280, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-07-21", "timestamp": 1660255641, "time_this_iter_s": 36.67114806175232, "time_total_s": 14054.863948583603, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14054.863948583603, "timesteps_since_restore": 3584000, "iterations_since_restore": 280, "perf": {"cpu_util_percent": 33.917307692307695, "ram_util_percent": 58.82115384615383}}
+{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 606.73, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 303.365}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.33, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.42, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.59, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.51, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.66, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.66, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.35, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.38, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.56, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.37, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.86, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 18, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 14, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.38, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.56, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.38, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.56, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 639.0, 636.0, 633.0, 636.0, 579.0, 579.0, 573.0, 180.0, 587.0, 636.0, 630.0, 579.0, 639.0, 570.0, 639.0, 633.0, 636.0, 582.0, 582.0, 633.0, 627.0, 630.0, 624.0, 624.0, 536.0, 636.0, 636.0, 636.0, 587.0, 639.0, 639.0, 627.0, 516.0, 587.0, 639.0, 633.0, 630.0, 567.0, 627.0, 633.0, 584.0, 639.0, 582.0, 633.0, 636.0, 579.0, 636.0, 627.0, 630.0, 582.0, 633.0, 639.0, 627.0, 582.0, 630.0, 633.0, 633.0, 636.0, 578.0, 633.0, 633.0, 639.0, 633.0, 630.0, 587.0, 633.0, 639.0, 636.0, 639.0, 630.0, 633.0, 633.0, 639.0, 639.0, 582.0, 294.0, 636.0, 587.0, 579.0, 636.0, 450.0, 579.0, 573.0, 582.0, 582.0, 582.0, 630.0, 636.0, 636.0, 636.0, 579.0, 561.0, 633.0, 636.0, 630.0, 630.0, 630.0, 639.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 319.0, 322.0, 317.0, 319.0, 317.0, 321.0, 312.0, 313.0, 323.0, 277.0, 302.0, 290.0, 289.0, 291.0, 282.0, 91.0, 89.0, 296.0, 291.0, 322.0, 314.0, 316.0, 314.0, 298.0, 281.0, 314.0, 325.0, 284.0, 286.0, 311.0, 328.0, 308.0, 325.0, 311.0, 325.0, 294.0, 288.0, 293.0, 289.0, 321.0, 312.0, 316.0, 311.0, 317.0, 313.0, 327.0, 297.0, 321.0, 303.0, 275.0, 261.0, 314.0, 322.0, 314.0, 322.0, 316.0, 320.0, 303.0, 284.0, 314.0, 325.0, 319.0, 320.0, 308.0, 319.0, 243.0, 273.0, 291.0, 296.0, 320.0, 319.0, 307.0, 326.0, 304.0, 326.0, 293.0, 274.0, 305.0, 322.0, 319.0, 314.0, 293.0, 291.0, 319.0, 320.0, 292.0, 290.0, 324.0, 309.0, 319.0, 317.0, 293.0, 286.0, 320.0, 316.0, 306.0, 321.0, 319.0, 311.0, 291.0, 291.0, 309.0, 324.0, 325.0, 314.0, 314.0, 313.0, 301.0, 281.0, 306.0, 324.0, 306.0, 327.0, 319.0, 314.0, 324.0, 312.0, 287.0, 291.0, 319.0, 314.0, 319.0, 314.0, 324.0, 315.0, 317.0, 316.0, 311.0, 319.0, 305.0, 282.0, 315.0, 318.0, 322.0, 317.0, 317.0, 319.0, 324.0, 315.0, 311.0, 319.0, 308.0, 325.0, 311.0, 322.0, 319.0, 320.0, 319.0, 320.0, 286.0, 296.0, 148.0, 146.0, 321.0, 315.0, 294.0, 293.0, 288.0, 291.0, 323.0, 313.0, 224.0, 226.0, 296.0, 283.0, 290.0, 283.0, 289.0, 293.0, 290.0, 292.0, 293.0, 289.0, 316.0, 314.0, 314.0, 322.0, 324.0, 312.0, 319.0, 317.0, 290.0, 289.0, 279.0, 282.0, 313.0, 320.0, 319.0, 317.0, 316.0, 314.0, 311.0, 319.0, 317.0, 313.0, 322.0, 317.0, 311.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.985427132070836, "mean_processing_ms": 0.26533447466026966, "mean_inference_ms": 1.5623431092422566}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6744000, "num_steps_sampled": 3596800, "sample_time_ms": 22602.144, "load_time_ms": 37.106, "grad_time_ms": 10160.693, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006504642311483622, "policy_loss": -0.002157183364033699, "vf_loss": 92.3310546875, "vf_explained_var": 0.768465518951416, "kl": 0.002224028343334794, "entropy": 1.1425694227218628, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3596800, "episodes_total": 8992, "training_iteration": 281, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-08-00", "timestamp": 1660255680, "time_this_iter_s": 39.430299043655396, "time_total_s": 14094.294247627258, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14094.294247627258, "timesteps_since_restore": 3596800, "iterations_since_restore": 281, "perf": {"cpu_util_percent": 32.93571428571428, "ram_util_percent": 58.800000000000004}}
+{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 606.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 303.4}, "custom_metrics": {"sparse_reward_mean": 210.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.8, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.25, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.74, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.33, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.76, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.72, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.28, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.44, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.9, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 18, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.73, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 14, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.65, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.28, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.28, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 596.0, 587.0, 627.0, 633.0, 636.0, 636.0, 636.0, 636.0, 636.0, 582.0, 630.0, 633.0, 636.0, 630.0, 590.0, 636.0, 633.0, 573.0, 558.0, 636.0, 636.0, 636.0, 630.0, 639.0, 636.0, 522.0, 587.0, 636.0, 636.0, 587.0, 639.0, 630.0, 587.0, 633.0, 639.0, 636.0, 639.0, 630.0, 633.0, 633.0, 639.0, 639.0, 582.0, 294.0, 636.0, 587.0, 579.0, 636.0, 450.0, 579.0, 573.0, 582.0, 582.0, 582.0, 630.0, 636.0, 636.0, 636.0, 579.0, 561.0, 633.0, 636.0, 630.0, 630.0, 630.0, 639.0, 630.0, 636.0, 639.0, 636.0, 633.0, 636.0, 579.0, 579.0, 573.0, 180.0, 587.0, 636.0, 630.0, 579.0, 639.0, 570.0, 639.0, 633.0, 636.0, 582.0, 582.0, 633.0, 627.0, 630.0, 624.0, 624.0, 536.0, 636.0, 636.0, 636.0, 587.0, 639.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 300.0, 296.0, 297.0, 290.0, 315.0, 312.0, 314.0, 319.0, 319.0, 317.0, 317.0, 319.0, 314.0, 322.0, 322.0, 314.0, 317.0, 319.0, 294.0, 288.0, 306.0, 324.0, 311.0, 322.0, 316.0, 320.0, 316.0, 314.0, 304.0, 286.0, 324.0, 312.0, 317.0, 316.0, 286.0, 287.0, 278.0, 280.0, 319.0, 317.0, 329.0, 307.0, 311.0, 325.0, 318.0, 312.0, 312.0, 327.0, 319.0, 317.0, 262.0, 260.0, 294.0, 293.0, 319.0, 317.0, 322.0, 314.0, 288.0, 299.0, 322.0, 317.0, 311.0, 319.0, 305.0, 282.0, 315.0, 318.0, 322.0, 317.0, 317.0, 319.0, 324.0, 315.0, 311.0, 319.0, 308.0, 325.0, 311.0, 322.0, 319.0, 320.0, 319.0, 320.0, 286.0, 296.0, 148.0, 146.0, 321.0, 315.0, 294.0, 293.0, 288.0, 291.0, 323.0, 313.0, 224.0, 226.0, 296.0, 283.0, 290.0, 283.0, 289.0, 293.0, 290.0, 292.0, 293.0, 289.0, 316.0, 314.0, 314.0, 322.0, 324.0, 312.0, 319.0, 317.0, 290.0, 289.0, 279.0, 282.0, 313.0, 320.0, 319.0, 317.0, 316.0, 314.0, 311.0, 319.0, 317.0, 313.0, 322.0, 317.0, 311.0, 319.0, 317.0, 319.0, 322.0, 317.0, 319.0, 317.0, 321.0, 312.0, 313.0, 323.0, 277.0, 302.0, 290.0, 289.0, 291.0, 282.0, 91.0, 89.0, 296.0, 291.0, 322.0, 314.0, 316.0, 314.0, 298.0, 281.0, 314.0, 325.0, 284.0, 286.0, 311.0, 328.0, 308.0, 325.0, 311.0, 325.0, 294.0, 288.0, 293.0, 289.0, 321.0, 312.0, 316.0, 311.0, 317.0, 313.0, 327.0, 297.0, 321.0, 303.0, 275.0, 261.0, 314.0, 322.0, 314.0, 322.0, 316.0, 320.0, 303.0, 284.0, 314.0, 325.0, 319.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9835752103024169, "mean_processing_ms": 0.26496930123552576, "mean_inference_ms": 1.5613527088644699}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6768000, "num_steps_sampled": 3609600, "sample_time_ms": 22700.667, "load_time_ms": 36.992, "grad_time_ms": 10562.273, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004271908197551966, "policy_loss": -0.0035006285179406404, "vf_loss": 83.40963745117188, "vf_explained_var": 0.7725582718849182, "kl": 0.0017563734436407685, "entropy": 1.1368495225906372, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3609600, "episodes_total": 9024, "training_iteration": 282, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-08-36", "timestamp": 1660255716, "time_this_iter_s": 35.46651792526245, "time_total_s": 14129.76076555252, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14129.76076555252, "timesteps_since_restore": 3609600, "iterations_since_restore": 282, "perf": {"cpu_util_percent": 33.821999999999996, "ram_util_percent": 59.328}}
+{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 614.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 307.035}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 188.87, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.29, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.89, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.45, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.0, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.45, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.95, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.41, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.62, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.45, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.95, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.45, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.95, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 573.0, 636.0, 630.0, 636.0, 630.0, 633.0, 579.0, 636.0, 639.0, 630.0, 633.0, 639.0, 639.0, 582.0, 636.0, 633.0, 630.0, 567.0, 633.0, 627.0, 627.0, 582.0, 639.0, 633.0, 636.0, 579.0, 630.0, 636.0, 636.0, 587.0, 639.0, 630.0, 630.0, 639.0, 630.0, 636.0, 639.0, 636.0, 633.0, 636.0, 579.0, 579.0, 573.0, 180.0, 587.0, 636.0, 630.0, 579.0, 639.0, 570.0, 639.0, 633.0, 636.0, 582.0, 582.0, 633.0, 627.0, 630.0, 624.0, 624.0, 536.0, 636.0, 636.0, 636.0, 587.0, 639.0, 639.0, 579.0, 596.0, 587.0, 627.0, 633.0, 636.0, 636.0, 636.0, 636.0, 636.0, 582.0, 630.0, 633.0, 636.0, 630.0, 590.0, 636.0, 633.0, 573.0, 558.0, 636.0, 636.0, 636.0, 630.0, 639.0, 636.0, 522.0, 587.0, 636.0, 636.0, 587.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 322.0, 292.0, 281.0, 308.0, 328.0, 304.0, 326.0, 317.0, 319.0, 319.0, 311.0, 311.0, 322.0, 284.0, 295.0, 313.0, 323.0, 319.0, 320.0, 319.0, 311.0, 316.0, 317.0, 319.0, 320.0, 321.0, 318.0, 283.0, 299.0, 316.0, 320.0, 316.0, 317.0, 316.0, 314.0, 279.0, 288.0, 313.0, 320.0, 313.0, 314.0, 311.0, 316.0, 288.0, 294.0, 319.0, 320.0, 314.0, 319.0, 319.0, 317.0, 294.0, 285.0, 318.0, 312.0, 317.0, 319.0, 322.0, 314.0, 298.0, 289.0, 322.0, 317.0, 311.0, 319.0, 317.0, 313.0, 322.0, 317.0, 311.0, 319.0, 317.0, 319.0, 322.0, 317.0, 319.0, 317.0, 321.0, 312.0, 313.0, 323.0, 277.0, 302.0, 290.0, 289.0, 291.0, 282.0, 91.0, 89.0, 296.0, 291.0, 322.0, 314.0, 316.0, 314.0, 298.0, 281.0, 314.0, 325.0, 284.0, 286.0, 311.0, 328.0, 308.0, 325.0, 311.0, 325.0, 294.0, 288.0, 293.0, 289.0, 321.0, 312.0, 316.0, 311.0, 317.0, 313.0, 327.0, 297.0, 321.0, 303.0, 275.0, 261.0, 314.0, 322.0, 314.0, 322.0, 316.0, 320.0, 303.0, 284.0, 314.0, 325.0, 319.0, 320.0, 283.0, 296.0, 300.0, 296.0, 297.0, 290.0, 315.0, 312.0, 314.0, 319.0, 319.0, 317.0, 317.0, 319.0, 314.0, 322.0, 322.0, 314.0, 317.0, 319.0, 294.0, 288.0, 306.0, 324.0, 311.0, 322.0, 316.0, 320.0, 316.0, 314.0, 304.0, 286.0, 324.0, 312.0, 317.0, 316.0, 286.0, 287.0, 278.0, 280.0, 319.0, 317.0, 329.0, 307.0, 311.0, 325.0, 318.0, 312.0, 312.0, 327.0, 319.0, 317.0, 262.0, 260.0, 294.0, 293.0, 319.0, 317.0, 322.0, 314.0, 288.0, 299.0, 322.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9817221783710357, "mean_processing_ms": 0.2646010611587108, "mean_inference_ms": 1.5601389392518195}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6792000, "num_steps_sampled": 3622400, "sample_time_ms": 22788.728, "load_time_ms": 36.79, "grad_time_ms": 10687.472, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0030612878035753965, "policy_loss": -0.0045290542766451836, "vf_loss": 81.5626449584961, "vf_explained_var": 0.7761082053184509, "kl": 0.0021392148919403553, "entropy": 1.131847858428955, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3622400, "episodes_total": 9056, "training_iteration": 283, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-09-09", "timestamp": 1660255749, "time_this_iter_s": 33.67844009399414, "time_total_s": 14163.439205646515, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14163.439205646515, "timesteps_since_restore": 3622400, "iterations_since_restore": 283, "perf": {"cpu_util_percent": 34.11489361702128, "ram_util_percent": 59.04893617021279}}
+{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 617.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.985}, "custom_metrics": {"sparse_reward_mean": 214.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.97, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.28, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.9, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.58, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.13, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.53, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.56, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.97, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.15, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.56, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.97, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.56, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.97, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 579.0, 582.0, 636.0, 575.0, 587.0, 582.0, 630.0, 630.0, 636.0, 633.0, 627.0, 627.0, 633.0, 593.0, 587.0, 636.0, 630.0, 627.0, 630.0, 636.0, 633.0, 630.0, 630.0, 573.0, 627.0, 579.0, 596.0, 579.0, 630.0, 633.0, 636.0, 587.0, 639.0, 639.0, 579.0, 596.0, 587.0, 627.0, 633.0, 636.0, 636.0, 636.0, 636.0, 636.0, 582.0, 630.0, 633.0, 636.0, 630.0, 590.0, 636.0, 633.0, 573.0, 558.0, 636.0, 636.0, 636.0, 630.0, 639.0, 636.0, 522.0, 587.0, 636.0, 636.0, 587.0, 639.0, 639.0, 573.0, 636.0, 630.0, 636.0, 630.0, 633.0, 579.0, 636.0, 639.0, 630.0, 633.0, 639.0, 639.0, 582.0, 636.0, 633.0, 630.0, 567.0, 633.0, 627.0, 627.0, 582.0, 639.0, 633.0, 636.0, 579.0, 630.0, 636.0, 636.0, 587.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 312.0, 311.0, 319.0, 288.0, 291.0, 286.0, 296.0, 311.0, 325.0, 291.0, 284.0, 306.0, 281.0, 294.0, 288.0, 314.0, 316.0, 311.0, 319.0, 319.0, 317.0, 316.0, 317.0, 305.0, 322.0, 308.0, 319.0, 314.0, 319.0, 288.0, 305.0, 299.0, 288.0, 319.0, 317.0, 316.0, 314.0, 318.0, 309.0, 311.0, 319.0, 319.0, 317.0, 321.0, 312.0, 316.0, 314.0, 311.0, 319.0, 287.0, 286.0, 311.0, 316.0, 291.0, 288.0, 294.0, 302.0, 291.0, 288.0, 314.0, 316.0, 311.0, 322.0, 316.0, 320.0, 303.0, 284.0, 314.0, 325.0, 319.0, 320.0, 283.0, 296.0, 300.0, 296.0, 297.0, 290.0, 315.0, 312.0, 314.0, 319.0, 319.0, 317.0, 317.0, 319.0, 314.0, 322.0, 322.0, 314.0, 317.0, 319.0, 294.0, 288.0, 306.0, 324.0, 311.0, 322.0, 316.0, 320.0, 316.0, 314.0, 304.0, 286.0, 324.0, 312.0, 317.0, 316.0, 286.0, 287.0, 278.0, 280.0, 319.0, 317.0, 329.0, 307.0, 311.0, 325.0, 318.0, 312.0, 312.0, 327.0, 319.0, 317.0, 262.0, 260.0, 294.0, 293.0, 319.0, 317.0, 322.0, 314.0, 288.0, 299.0, 322.0, 317.0, 317.0, 322.0, 292.0, 281.0, 308.0, 328.0, 304.0, 326.0, 317.0, 319.0, 319.0, 311.0, 311.0, 322.0, 284.0, 295.0, 313.0, 323.0, 319.0, 320.0, 319.0, 311.0, 316.0, 317.0, 319.0, 320.0, 321.0, 318.0, 283.0, 299.0, 316.0, 320.0, 316.0, 317.0, 316.0, 314.0, 279.0, 288.0, 313.0, 320.0, 313.0, 314.0, 311.0, 316.0, 288.0, 294.0, 319.0, 320.0, 314.0, 319.0, 319.0, 317.0, 294.0, 285.0, 318.0, 312.0, 317.0, 319.0, 322.0, 314.0, 298.0, 289.0, 322.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9798702213008988, "mean_processing_ms": 0.26423169692571163, "mean_inference_ms": 1.5586086454060646}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6816000, "num_steps_sampled": 3635200, "sample_time_ms": 22897.149, "load_time_ms": 36.929, "grad_time_ms": 10771.125, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005141934845596552, "policy_loss": -0.003231912851333618, "vf_loss": 89.45598602294922, "vf_explained_var": 0.7527138590812683, "kl": 0.0021111962851136923, "entropy": 1.1434991359710693, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3635200, "episodes_total": 9088, "training_iteration": 284, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-09-42", "timestamp": 1660255782, "time_this_iter_s": 32.81455707550049, "time_total_s": 14196.253762722015, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14196.253762722015, "timesteps_since_restore": 3635200, "iterations_since_restore": 284, "perf": {"cpu_util_percent": 32.710638297872336, "ram_util_percent": 58.93617021276594}}
+{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 616.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 308.355}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 189.51, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.26, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.86, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.6, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.07, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.56, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 0.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.46, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.94, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.19, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.65, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.16, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.14, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.46, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.94, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.46, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.94, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 636.0, 633.0, 639.0, 630.0, 636.0, 636.0, 636.0, 627.0, 633.0, 582.0, 636.0, 627.0, 180.0, 639.0, 633.0, 639.0, 636.0, 636.0, 633.0, 633.0, 539.0, 630.0, 636.0, 639.0, 636.0, 630.0, 587.0, 633.0, 636.0, 636.0, 636.0, 636.0, 636.0, 587.0, 639.0, 639.0, 573.0, 636.0, 630.0, 636.0, 630.0, 633.0, 579.0, 636.0, 639.0, 630.0, 633.0, 639.0, 639.0, 582.0, 636.0, 633.0, 630.0, 567.0, 633.0, 627.0, 627.0, 582.0, 639.0, 633.0, 636.0, 579.0, 630.0, 636.0, 636.0, 587.0, 639.0, 633.0, 630.0, 579.0, 582.0, 636.0, 575.0, 587.0, 582.0, 630.0, 630.0, 636.0, 633.0, 627.0, 627.0, 633.0, 593.0, 587.0, 636.0, 630.0, 627.0, 630.0, 636.0, 633.0, 630.0, 630.0, 573.0, 627.0, 579.0, 596.0, 579.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 317.0, 319.0, 317.0, 316.0, 322.0, 317.0, 316.0, 314.0, 327.0, 309.0, 316.0, 320.0, 326.0, 310.0, 316.0, 311.0, 326.0, 307.0, 288.0, 294.0, 311.0, 325.0, 308.0, 319.0, 91.0, 89.0, 317.0, 322.0, 324.0, 309.0, 324.0, 315.0, 317.0, 319.0, 317.0, 319.0, 314.0, 319.0, 314.0, 319.0, 268.0, 271.0, 311.0, 319.0, 314.0, 322.0, 322.0, 317.0, 322.0, 314.0, 329.0, 301.0, 296.0, 291.0, 314.0, 319.0, 306.0, 330.0, 314.0, 322.0, 324.0, 312.0, 319.0, 317.0, 322.0, 314.0, 288.0, 299.0, 322.0, 317.0, 317.0, 322.0, 292.0, 281.0, 308.0, 328.0, 304.0, 326.0, 317.0, 319.0, 319.0, 311.0, 311.0, 322.0, 284.0, 295.0, 313.0, 323.0, 319.0, 320.0, 319.0, 311.0, 316.0, 317.0, 319.0, 320.0, 321.0, 318.0, 283.0, 299.0, 316.0, 320.0, 316.0, 317.0, 316.0, 314.0, 279.0, 288.0, 313.0, 320.0, 313.0, 314.0, 311.0, 316.0, 288.0, 294.0, 319.0, 320.0, 314.0, 319.0, 319.0, 317.0, 294.0, 285.0, 318.0, 312.0, 317.0, 319.0, 322.0, 314.0, 298.0, 289.0, 322.0, 317.0, 321.0, 312.0, 311.0, 319.0, 288.0, 291.0, 286.0, 296.0, 311.0, 325.0, 291.0, 284.0, 306.0, 281.0, 294.0, 288.0, 314.0, 316.0, 311.0, 319.0, 319.0, 317.0, 316.0, 317.0, 305.0, 322.0, 308.0, 319.0, 314.0, 319.0, 288.0, 305.0, 299.0, 288.0, 319.0, 317.0, 316.0, 314.0, 318.0, 309.0, 311.0, 319.0, 319.0, 317.0, 321.0, 312.0, 316.0, 314.0, 311.0, 319.0, 287.0, 286.0, 311.0, 316.0, 291.0, 288.0, 294.0, 302.0, 291.0, 288.0, 314.0, 316.0, 311.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.978026720054701, "mean_processing_ms": 0.2638645692972578, "mean_inference_ms": 1.5570587102664553}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6840000, "num_steps_sampled": 3648000, "sample_time_ms": 23082.722, "load_time_ms": 36.933, "grad_time_ms": 10916.497, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002097133779898286, "policy_loss": -0.005807385314255953, "vf_loss": 84.70693969726562, "vf_explained_var": 0.7814067006111145, "kl": 0.0015371787594631314, "entropy": 1.1323403120040894, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3648000, "episodes_total": 9120, "training_iteration": 285, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-10-15", "timestamp": 1660255815, "time_this_iter_s": 33.031522035598755, "time_total_s": 14229.285284757614, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14229.285284757614, "timesteps_since_restore": 3648000, "iterations_since_restore": 285, "perf": {"cpu_util_percent": 32.80434782608696, "ram_util_percent": 58.88260869565216}}
+{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 616.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 308.135}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 189.47, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.33, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.63, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.67, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.8, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.55, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 0.44, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.29, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.5, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.84, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.75, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.5, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.84, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.5, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.84, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 582.0, 624.0, 636.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 630.0, 633.0, 630.0, 630.0, 639.0, 639.0, 630.0, 639.0, 579.0, 633.0, 582.0, 587.0, 636.0, 639.0, 587.0, 636.0, 578.0, 639.0, 630.0, 576.0, 630.0, 582.0, 636.0, 636.0, 587.0, 639.0, 633.0, 630.0, 579.0, 582.0, 636.0, 575.0, 587.0, 582.0, 630.0, 630.0, 636.0, 633.0, 627.0, 627.0, 633.0, 593.0, 587.0, 636.0, 630.0, 627.0, 630.0, 636.0, 633.0, 630.0, 630.0, 573.0, 627.0, 579.0, 596.0, 579.0, 630.0, 633.0, 582.0, 636.0, 633.0, 639.0, 630.0, 636.0, 636.0, 636.0, 627.0, 633.0, 582.0, 636.0, 627.0, 180.0, 639.0, 633.0, 639.0, 636.0, 636.0, 633.0, 633.0, 539.0, 630.0, 636.0, 639.0, 636.0, 630.0, 587.0, 633.0, 636.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 311.0, 296.0, 286.0, 313.0, 311.0, 314.0, 322.0, 309.0, 321.0, 319.0, 311.0, 318.0, 315.0, 325.0, 311.0, 314.0, 322.0, 312.0, 324.0, 319.0, 311.0, 313.0, 320.0, 325.0, 305.0, 324.0, 306.0, 319.0, 320.0, 317.0, 322.0, 313.0, 317.0, 316.0, 323.0, 288.0, 291.0, 319.0, 314.0, 296.0, 286.0, 295.0, 292.0, 332.0, 304.0, 319.0, 320.0, 296.0, 291.0, 316.0, 320.0, 282.0, 296.0, 314.0, 325.0, 316.0, 314.0, 293.0, 283.0, 323.0, 307.0, 291.0, 291.0, 317.0, 319.0, 322.0, 314.0, 298.0, 289.0, 322.0, 317.0, 321.0, 312.0, 311.0, 319.0, 288.0, 291.0, 286.0, 296.0, 311.0, 325.0, 291.0, 284.0, 306.0, 281.0, 294.0, 288.0, 314.0, 316.0, 311.0, 319.0, 319.0, 317.0, 316.0, 317.0, 305.0, 322.0, 308.0, 319.0, 314.0, 319.0, 288.0, 305.0, 299.0, 288.0, 319.0, 317.0, 316.0, 314.0, 318.0, 309.0, 311.0, 319.0, 319.0, 317.0, 321.0, 312.0, 316.0, 314.0, 311.0, 319.0, 287.0, 286.0, 311.0, 316.0, 291.0, 288.0, 294.0, 302.0, 291.0, 288.0, 314.0, 316.0, 311.0, 322.0, 288.0, 294.0, 317.0, 319.0, 317.0, 316.0, 322.0, 317.0, 316.0, 314.0, 327.0, 309.0, 316.0, 320.0, 326.0, 310.0, 316.0, 311.0, 326.0, 307.0, 288.0, 294.0, 311.0, 325.0, 308.0, 319.0, 91.0, 89.0, 317.0, 322.0, 324.0, 309.0, 324.0, 315.0, 317.0, 319.0, 317.0, 319.0, 314.0, 319.0, 314.0, 319.0, 268.0, 271.0, 311.0, 319.0, 314.0, 322.0, 322.0, 317.0, 322.0, 314.0, 329.0, 301.0, 296.0, 291.0, 314.0, 319.0, 306.0, 330.0, 314.0, 322.0, 324.0, 312.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9761954337234573, "mean_processing_ms": 0.26349990327404404, "mean_inference_ms": 1.5554986152813894}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6864000, "num_steps_sampled": 3660800, "sample_time_ms": 23037.067, "load_time_ms": 36.944, "grad_time_ms": 11090.889, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016592548927292228, "policy_loss": -0.006142645608633757, "vf_loss": 83.6804428100586, "vf_explained_var": 0.7674832344055176, "kl": 0.0020798875484615564, "entropy": 1.1322760581970215, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3660800, "episodes_total": 9152, "training_iteration": 286, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-10-47", "timestamp": 1660255847, "time_this_iter_s": 31.660379886627197, "time_total_s": 14260.945664644241, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14260.945664644241, "timesteps_since_restore": 3660800, "iterations_since_restore": 286, "perf": {"cpu_util_percent": 33.97111111111111, "ram_util_percent": 58.86666666666669}}
+{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 618.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 309.115}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 189.83, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.54, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.68, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.87, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.89, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 0.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.63, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.86, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.63, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.86, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.63, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.86, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 579.0, 627.0, 630.0, 636.0, 639.0, 633.0, 633.0, 636.0, 627.0, 636.0, 630.0, 582.0, 627.0, 636.0, 636.0, 636.0, 587.0, 636.0, 633.0, 636.0, 633.0, 633.0, 630.0, 633.0, 636.0, 627.0, 579.0, 639.0, 582.0, 546.0, 636.0, 596.0, 579.0, 630.0, 633.0, 582.0, 636.0, 633.0, 639.0, 630.0, 636.0, 636.0, 636.0, 627.0, 633.0, 582.0, 636.0, 627.0, 180.0, 639.0, 633.0, 639.0, 636.0, 636.0, 633.0, 633.0, 539.0, 630.0, 636.0, 639.0, 636.0, 630.0, 587.0, 633.0, 636.0, 636.0, 636.0, 633.0, 582.0, 624.0, 636.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 630.0, 633.0, 630.0, 630.0, 639.0, 639.0, 630.0, 639.0, 579.0, 633.0, 582.0, 587.0, 636.0, 639.0, 587.0, 636.0, 578.0, 639.0, 630.0, 576.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 323.0, 299.0, 280.0, 313.0, 314.0, 316.0, 314.0, 319.0, 317.0, 320.0, 319.0, 319.0, 314.0, 316.0, 317.0, 314.0, 322.0, 324.0, 303.0, 307.0, 329.0, 307.0, 323.0, 293.0, 289.0, 311.0, 316.0, 314.0, 322.0, 319.0, 317.0, 313.0, 323.0, 283.0, 304.0, 319.0, 317.0, 319.0, 314.0, 314.0, 322.0, 311.0, 322.0, 311.0, 322.0, 314.0, 316.0, 322.0, 311.0, 319.0, 317.0, 313.0, 314.0, 282.0, 297.0, 317.0, 322.0, 293.0, 289.0, 283.0, 263.0, 323.0, 313.0, 294.0, 302.0, 291.0, 288.0, 314.0, 316.0, 311.0, 322.0, 288.0, 294.0, 317.0, 319.0, 317.0, 316.0, 322.0, 317.0, 316.0, 314.0, 327.0, 309.0, 316.0, 320.0, 326.0, 310.0, 316.0, 311.0, 326.0, 307.0, 288.0, 294.0, 311.0, 325.0, 308.0, 319.0, 91.0, 89.0, 317.0, 322.0, 324.0, 309.0, 324.0, 315.0, 317.0, 319.0, 317.0, 319.0, 314.0, 319.0, 314.0, 319.0, 268.0, 271.0, 311.0, 319.0, 314.0, 322.0, 322.0, 317.0, 322.0, 314.0, 329.0, 301.0, 296.0, 291.0, 314.0, 319.0, 306.0, 330.0, 314.0, 322.0, 324.0, 312.0, 322.0, 311.0, 296.0, 286.0, 313.0, 311.0, 314.0, 322.0, 309.0, 321.0, 319.0, 311.0, 318.0, 315.0, 325.0, 311.0, 314.0, 322.0, 312.0, 324.0, 319.0, 311.0, 313.0, 320.0, 325.0, 305.0, 324.0, 306.0, 319.0, 320.0, 317.0, 322.0, 313.0, 317.0, 316.0, 323.0, 288.0, 291.0, 319.0, 314.0, 296.0, 286.0, 295.0, 292.0, 332.0, 304.0, 319.0, 320.0, 296.0, 291.0, 316.0, 320.0, 282.0, 296.0, 314.0, 325.0, 316.0, 314.0, 293.0, 283.0, 323.0, 307.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9743700047220856, "mean_processing_ms": 0.2631349992390798, "mean_inference_ms": 1.553816906350355}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6888000, "num_steps_sampled": 3673600, "sample_time_ms": 22960.493, "load_time_ms": 36.753, "grad_time_ms": 11340.647, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0018661068752408028, "policy_loss": -0.005918534938246012, "vf_loss": 83.52860260009766, "vf_explained_var": 0.7654721140861511, "kl": 0.0018988008378073573, "entropy": 1.136439561843872, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3673600, "episodes_total": 9184, "training_iteration": 287, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-11-18", "timestamp": 1660255878, "time_this_iter_s": 31.607279777526855, "time_total_s": 14292.552944421768, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14292.552944421768, "timesteps_since_restore": 3673600, "iterations_since_restore": 287, "perf": {"cpu_util_percent": 34.857777777777784, "ram_util_percent": 58.80666666666665}}
+{"episode_reward_max": 639.0, "episode_reward_min": 546.0, "episode_reward_mean": 622.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 263.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 311.005}, "custom_metrics": {"sparse_reward_mean": 215.6, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 190.81, "shaped_reward_min": 146, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.43, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.79, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.78, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.05, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.61, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.06, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.19, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.7, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.61, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.06, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.61, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.06, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 627.0, 633.0, 627.0, 584.0, 633.0, 636.0, 639.0, 639.0, 630.0, 587.0, 630.0, 582.0, 582.0, 584.0, 630.0, 636.0, 630.0, 624.0, 636.0, 633.0, 587.0, 639.0, 630.0, 639.0, 587.0, 639.0, 636.0, 630.0, 630.0, 636.0, 636.0, 633.0, 636.0, 636.0, 636.0, 633.0, 582.0, 624.0, 636.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 630.0, 633.0, 630.0, 630.0, 639.0, 639.0, 630.0, 639.0, 579.0, 633.0, 582.0, 587.0, 636.0, 639.0, 587.0, 636.0, 578.0, 639.0, 630.0, 576.0, 630.0, 582.0, 636.0, 579.0, 627.0, 630.0, 636.0, 639.0, 633.0, 633.0, 636.0, 627.0, 636.0, 630.0, 582.0, 627.0, 636.0, 636.0, 636.0, 587.0, 636.0, 633.0, 636.0, 633.0, 633.0, 630.0, 633.0, 636.0, 627.0, 579.0, 639.0, 582.0, 546.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 299.0, 316.0, 311.0, 324.0, 309.0, 318.0, 309.0, 289.0, 295.0, 314.0, 319.0, 316.0, 320.0, 319.0, 320.0, 317.0, 322.0, 314.0, 316.0, 293.0, 294.0, 318.0, 312.0, 288.0, 294.0, 289.0, 293.0, 292.0, 292.0, 315.0, 315.0, 324.0, 312.0, 324.0, 306.0, 313.0, 311.0, 314.0, 322.0, 311.0, 322.0, 295.0, 292.0, 322.0, 317.0, 308.0, 322.0, 314.0, 325.0, 293.0, 294.0, 317.0, 322.0, 318.0, 318.0, 319.0, 311.0, 314.0, 316.0, 319.0, 317.0, 314.0, 322.0, 314.0, 319.0, 306.0, 330.0, 314.0, 322.0, 324.0, 312.0, 322.0, 311.0, 296.0, 286.0, 313.0, 311.0, 314.0, 322.0, 309.0, 321.0, 319.0, 311.0, 318.0, 315.0, 325.0, 311.0, 314.0, 322.0, 312.0, 324.0, 319.0, 311.0, 313.0, 320.0, 325.0, 305.0, 324.0, 306.0, 319.0, 320.0, 317.0, 322.0, 313.0, 317.0, 316.0, 323.0, 288.0, 291.0, 319.0, 314.0, 296.0, 286.0, 295.0, 292.0, 332.0, 304.0, 319.0, 320.0, 296.0, 291.0, 316.0, 320.0, 282.0, 296.0, 314.0, 325.0, 316.0, 314.0, 293.0, 283.0, 323.0, 307.0, 291.0, 291.0, 313.0, 323.0, 299.0, 280.0, 313.0, 314.0, 316.0, 314.0, 319.0, 317.0, 320.0, 319.0, 319.0, 314.0, 316.0, 317.0, 314.0, 322.0, 324.0, 303.0, 307.0, 329.0, 307.0, 323.0, 293.0, 289.0, 311.0, 316.0, 314.0, 322.0, 319.0, 317.0, 313.0, 323.0, 283.0, 304.0, 319.0, 317.0, 319.0, 314.0, 314.0, 322.0, 311.0, 322.0, 311.0, 322.0, 314.0, 316.0, 322.0, 311.0, 319.0, 317.0, 313.0, 314.0, 282.0, 297.0, 317.0, 322.0, 293.0, 289.0, 283.0, 263.0, 323.0, 313.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9725594290722099, "mean_processing_ms": 0.2627719670023304, "mean_inference_ms": 1.5521036278405136}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6912000, "num_steps_sampled": 3686400, "sample_time_ms": 23043.972, "load_time_ms": 36.653, "grad_time_ms": 11285.002, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0023610887583345175, "policy_loss": -0.005447230767458677, "vf_loss": 83.72765350341797, "vf_explained_var": 0.7662909030914307, "kl": 0.001831754925660789, "entropy": 1.128881573677063, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3686400, "episodes_total": 9216, "training_iteration": 288, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-11-51", "timestamp": 1660255911, "time_this_iter_s": 32.35726475715637, "time_total_s": 14324.910209178925, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14324.910209178925, "timesteps_since_restore": 3686400, "iterations_since_restore": 288, "perf": {"cpu_util_percent": 32.42, "ram_util_percent": 58.875555555555565}}
+{"episode_reward_max": 639.0, "episode_reward_min": 546.0, "episode_reward_mean": 621.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 263.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 310.955}, "custom_metrics": {"sparse_reward_mean": 215.6, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 190.71, "shaped_reward_min": 146, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.17, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.98, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.24, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.49, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.38, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.24, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.21, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.71, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.08, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.38, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.24, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.38, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.24, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 636.0, 633.0, 636.0, 639.0, 627.0, 627.0, 639.0, 639.0, 636.0, 630.0, 636.0, 630.0, 633.0, 639.0, 579.0, 587.0, 630.0, 633.0, 630.0, 639.0, 582.0, 630.0, 639.0, 636.0, 630.0, 573.0, 582.0, 630.0, 582.0, 636.0, 639.0, 630.0, 576.0, 630.0, 582.0, 636.0, 579.0, 627.0, 630.0, 636.0, 639.0, 633.0, 633.0, 636.0, 627.0, 636.0, 630.0, 582.0, 627.0, 636.0, 636.0, 636.0, 587.0, 636.0, 633.0, 636.0, 633.0, 633.0, 630.0, 633.0, 636.0, 627.0, 579.0, 639.0, 582.0, 546.0, 636.0, 584.0, 627.0, 633.0, 627.0, 584.0, 633.0, 636.0, 639.0, 639.0, 630.0, 587.0, 630.0, 582.0, 582.0, 584.0, 630.0, 636.0, 630.0, 624.0, 636.0, 633.0, 587.0, 639.0, 630.0, 639.0, 587.0, 639.0, 636.0, 630.0, 630.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [324.0, 312.0, 319.0, 317.0, 321.0, 312.0, 318.0, 318.0, 319.0, 320.0, 305.0, 322.0, 310.0, 317.0, 319.0, 320.0, 314.0, 325.0, 321.0, 315.0, 311.0, 319.0, 311.0, 325.0, 311.0, 319.0, 311.0, 322.0, 319.0, 320.0, 285.0, 294.0, 298.0, 289.0, 319.0, 311.0, 313.0, 320.0, 308.0, 322.0, 319.0, 320.0, 293.0, 289.0, 319.0, 311.0, 324.0, 315.0, 314.0, 322.0, 311.0, 319.0, 276.0, 297.0, 286.0, 296.0, 308.0, 322.0, 286.0, 296.0, 324.0, 312.0, 321.0, 318.0, 316.0, 314.0, 293.0, 283.0, 323.0, 307.0, 291.0, 291.0, 313.0, 323.0, 299.0, 280.0, 313.0, 314.0, 316.0, 314.0, 319.0, 317.0, 320.0, 319.0, 319.0, 314.0, 316.0, 317.0, 314.0, 322.0, 324.0, 303.0, 307.0, 329.0, 307.0, 323.0, 293.0, 289.0, 311.0, 316.0, 314.0, 322.0, 319.0, 317.0, 313.0, 323.0, 283.0, 304.0, 319.0, 317.0, 319.0, 314.0, 314.0, 322.0, 311.0, 322.0, 311.0, 322.0, 314.0, 316.0, 322.0, 311.0, 319.0, 317.0, 313.0, 314.0, 282.0, 297.0, 317.0, 322.0, 293.0, 289.0, 283.0, 263.0, 323.0, 313.0, 285.0, 299.0, 316.0, 311.0, 324.0, 309.0, 318.0, 309.0, 289.0, 295.0, 314.0, 319.0, 316.0, 320.0, 319.0, 320.0, 317.0, 322.0, 314.0, 316.0, 293.0, 294.0, 318.0, 312.0, 288.0, 294.0, 289.0, 293.0, 292.0, 292.0, 315.0, 315.0, 324.0, 312.0, 324.0, 306.0, 313.0, 311.0, 314.0, 322.0, 311.0, 322.0, 295.0, 292.0, 322.0, 317.0, 308.0, 322.0, 314.0, 325.0, 293.0, 294.0, 317.0, 322.0, 318.0, 318.0, 319.0, 311.0, 314.0, 316.0, 319.0, 317.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9707646787589033, "mean_processing_ms": 0.26241176618575823, "mean_inference_ms": 1.5504455690384487}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6936000, "num_steps_sampled": 3699200, "sample_time_ms": 22728.739, "load_time_ms": 36.546, "grad_time_ms": 11195.454, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004752982931677252, "policy_loss": -0.00697875814512372, "vf_loss": 80.24703979492188, "vf_explained_var": 0.7700864672660828, "kl": 0.001980138709768653, "entropy": 1.1412941217422485, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3699200, "episodes_total": 9248, "training_iteration": 289, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-12-24", "timestamp": 1660255944, "time_this_iter_s": 33.15079879760742, "time_total_s": 14358.061007976532, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14358.061007976532, "timesteps_since_restore": 3699200, "iterations_since_restore": 289, "perf": {"cpu_util_percent": 32.6936170212766, "ram_util_percent": 58.93829787234045}}
+{"episode_reward_max": 639.0, "episode_reward_min": 405.0, "episode_reward_mean": 617.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 308.815}, "custom_metrics": {"sparse_reward_mean": 214.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 189.63, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.85, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 19.11, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.2, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.35, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.45, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.37, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.34, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.54, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.69, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.81, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.76, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.37, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.37, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [590.0, 633.0, 584.0, 636.0, 630.0, 630.0, 633.0, 587.0, 636.0, 630.0, 582.0, 405.0, 633.0, 630.0, 627.0, 579.0, 576.0, 587.0, 582.0, 636.0, 636.0, 618.0, 636.0, 630.0, 636.0, 624.0, 639.0, 633.0, 630.0, 587.0, 587.0, 630.0, 639.0, 582.0, 546.0, 636.0, 584.0, 627.0, 633.0, 627.0, 584.0, 633.0, 636.0, 639.0, 639.0, 630.0, 587.0, 630.0, 582.0, 582.0, 584.0, 630.0, 636.0, 630.0, 624.0, 636.0, 633.0, 587.0, 639.0, 630.0, 639.0, 587.0, 639.0, 636.0, 630.0, 630.0, 636.0, 636.0, 636.0, 636.0, 633.0, 636.0, 639.0, 627.0, 627.0, 639.0, 639.0, 636.0, 630.0, 636.0, 630.0, 633.0, 639.0, 579.0, 587.0, 630.0, 633.0, 630.0, 639.0, 582.0, 630.0, 639.0, 636.0, 630.0, 573.0, 582.0, 630.0, 582.0, 636.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 297.0, 316.0, 317.0, 290.0, 294.0, 317.0, 319.0, 311.0, 319.0, 323.0, 307.0, 316.0, 317.0, 292.0, 295.0, 319.0, 317.0, 313.0, 317.0, 281.0, 301.0, 198.0, 207.0, 313.0, 320.0, 310.0, 320.0, 311.0, 316.0, 282.0, 297.0, 285.0, 291.0, 309.0, 278.0, 293.0, 289.0, 311.0, 325.0, 321.0, 315.0, 302.0, 316.0, 314.0, 322.0, 311.0, 319.0, 319.0, 317.0, 319.0, 305.0, 319.0, 320.0, 322.0, 311.0, 326.0, 304.0, 301.0, 286.0, 293.0, 294.0, 313.0, 317.0, 317.0, 322.0, 293.0, 289.0, 283.0, 263.0, 323.0, 313.0, 285.0, 299.0, 316.0, 311.0, 324.0, 309.0, 318.0, 309.0, 289.0, 295.0, 314.0, 319.0, 316.0, 320.0, 319.0, 320.0, 317.0, 322.0, 314.0, 316.0, 293.0, 294.0, 318.0, 312.0, 288.0, 294.0, 289.0, 293.0, 292.0, 292.0, 315.0, 315.0, 324.0, 312.0, 324.0, 306.0, 313.0, 311.0, 314.0, 322.0, 311.0, 322.0, 295.0, 292.0, 322.0, 317.0, 308.0, 322.0, 314.0, 325.0, 293.0, 294.0, 317.0, 322.0, 318.0, 318.0, 319.0, 311.0, 314.0, 316.0, 319.0, 317.0, 314.0, 322.0, 324.0, 312.0, 319.0, 317.0, 321.0, 312.0, 318.0, 318.0, 319.0, 320.0, 305.0, 322.0, 310.0, 317.0, 319.0, 320.0, 314.0, 325.0, 321.0, 315.0, 311.0, 319.0, 311.0, 325.0, 311.0, 319.0, 311.0, 322.0, 319.0, 320.0, 285.0, 294.0, 298.0, 289.0, 319.0, 311.0, 313.0, 320.0, 308.0, 322.0, 319.0, 320.0, 293.0, 289.0, 319.0, 311.0, 324.0, 315.0, 314.0, 322.0, 311.0, 319.0, 276.0, 297.0, 286.0, 296.0, 308.0, 322.0, 286.0, 296.0, 324.0, 312.0, 321.0, 318.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9689852254621897, "mean_processing_ms": 0.26205467993702586, "mean_inference_ms": 1.548820818018191}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6960000, "num_steps_sampled": 3712000, "sample_time_ms": 22386.717, "load_time_ms": 37.444, "grad_time_ms": 11069.639, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0020440255757421255, "policy_loss": -0.0058852084912359715, "vf_loss": 84.9912338256836, "vf_explained_var": 0.7650973200798035, "kl": 0.0021299307700246572, "entropy": 1.1397589445114136, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3712000, "episodes_total": 9280, "training_iteration": 290, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-12-56", "timestamp": 1660255976, "time_this_iter_s": 32.003324031829834, "time_total_s": 14390.064332008362, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14390.064332008362, "timesteps_since_restore": 3712000, "iterations_since_restore": 290, "perf": {"cpu_util_percent": 30.039130434782606, "ram_util_percent": 58.817391304347815}}
+{"episode_reward_max": 639.0, "episode_reward_min": 405.0, "episode_reward_mean": 614.76, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 307.38}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 188.76, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.96, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 19.15, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.26, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.34, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.5, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.72, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.2, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.35, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.58, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.76, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.69, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.2, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.2, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [544.0, 633.0, 627.0, 618.0, 636.0, 627.0, 581.0, 582.0, 639.0, 587.0, 639.0, 573.0, 633.0, 636.0, 587.0, 639.0, 564.0, 639.0, 630.0, 633.0, 558.0, 582.0, 630.0, 630.0, 587.0, 624.0, 636.0, 582.0, 587.0, 633.0, 587.0, 576.0, 630.0, 630.0, 636.0, 636.0, 636.0, 636.0, 633.0, 636.0, 639.0, 627.0, 627.0, 639.0, 639.0, 636.0, 630.0, 636.0, 630.0, 633.0, 639.0, 579.0, 587.0, 630.0, 633.0, 630.0, 639.0, 582.0, 630.0, 639.0, 636.0, 630.0, 573.0, 582.0, 630.0, 582.0, 636.0, 639.0, 590.0, 633.0, 584.0, 636.0, 630.0, 630.0, 633.0, 587.0, 636.0, 630.0, 582.0, 405.0, 633.0, 630.0, 627.0, 579.0, 576.0, 587.0, 582.0, 636.0, 636.0, 618.0, 636.0, 630.0, 636.0, 624.0, 639.0, 633.0, 630.0, 587.0, 587.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 277.0, 319.0, 314.0, 311.0, 316.0, 315.0, 303.0, 309.0, 327.0, 305.0, 322.0, 284.0, 297.0, 290.0, 292.0, 311.0, 328.0, 288.0, 299.0, 317.0, 322.0, 292.0, 281.0, 316.0, 317.0, 324.0, 312.0, 293.0, 294.0, 322.0, 317.0, 287.0, 277.0, 319.0, 320.0, 318.0, 312.0, 322.0, 311.0, 292.0, 266.0, 288.0, 294.0, 310.0, 320.0, 314.0, 316.0, 293.0, 294.0, 311.0, 313.0, 319.0, 317.0, 289.0, 293.0, 295.0, 292.0, 316.0, 317.0, 306.0, 281.0, 283.0, 293.0, 319.0, 311.0, 314.0, 316.0, 319.0, 317.0, 314.0, 322.0, 324.0, 312.0, 319.0, 317.0, 321.0, 312.0, 318.0, 318.0, 319.0, 320.0, 305.0, 322.0, 310.0, 317.0, 319.0, 320.0, 314.0, 325.0, 321.0, 315.0, 311.0, 319.0, 311.0, 325.0, 311.0, 319.0, 311.0, 322.0, 319.0, 320.0, 285.0, 294.0, 298.0, 289.0, 319.0, 311.0, 313.0, 320.0, 308.0, 322.0, 319.0, 320.0, 293.0, 289.0, 319.0, 311.0, 324.0, 315.0, 314.0, 322.0, 311.0, 319.0, 276.0, 297.0, 286.0, 296.0, 308.0, 322.0, 286.0, 296.0, 324.0, 312.0, 321.0, 318.0, 293.0, 297.0, 316.0, 317.0, 290.0, 294.0, 317.0, 319.0, 311.0, 319.0, 323.0, 307.0, 316.0, 317.0, 292.0, 295.0, 319.0, 317.0, 313.0, 317.0, 281.0, 301.0, 198.0, 207.0, 313.0, 320.0, 310.0, 320.0, 311.0, 316.0, 282.0, 297.0, 285.0, 291.0, 309.0, 278.0, 293.0, 289.0, 311.0, 325.0, 321.0, 315.0, 302.0, 316.0, 314.0, 322.0, 311.0, 319.0, 319.0, 317.0, 319.0, 305.0, 319.0, 320.0, 322.0, 311.0, 326.0, 304.0, 301.0, 286.0, 293.0, 294.0, 313.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.967235186119729, "mean_processing_ms": 0.26170596959071135, "mean_inference_ms": 1.5471921568344904}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6984000, "num_steps_sampled": 3724800, "sample_time_ms": 21912.426, "load_time_ms": 37.292, "grad_time_ms": 10653.236, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003775561461225152, "policy_loss": -0.004369485657662153, "vf_loss": 87.11421966552734, "vf_explained_var": 0.7634318470954895, "kl": 0.0017795447492972016, "entropy": 1.1327377557754517, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3724800, "episodes_total": 9312, "training_iteration": 291, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-13-27", "timestamp": 1660256007, "time_this_iter_s": 30.522319793701172, "time_total_s": 14420.586651802063, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14420.586651802063, "timesteps_since_restore": 3724800, "iterations_since_restore": 291, "perf": {"cpu_util_percent": 38.19767441860465, "ram_util_percent": 59.255813953488385}}
+{"episode_reward_max": 639.0, "episode_reward_min": 405.0, "episode_reward_mean": 610.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 305.465}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 187.73, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.16, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 19.04, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.5, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.25, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.77, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.23, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.98, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.46, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.23, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.98, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.23, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.98, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 639.0, 633.0, 633.0, 633.0, 567.0, 636.0, 633.0, 633.0, 636.0, 636.0, 558.0, 584.0, 639.0, 639.0, 633.0, 567.0, 590.0, 636.0, 633.0, 636.0, 630.0, 582.0, 630.0, 636.0, 558.0, 639.0, 584.0, 587.0, 544.0, 636.0, 633.0, 630.0, 582.0, 636.0, 639.0, 590.0, 633.0, 584.0, 636.0, 630.0, 630.0, 633.0, 587.0, 636.0, 630.0, 582.0, 405.0, 633.0, 630.0, 627.0, 579.0, 576.0, 587.0, 582.0, 636.0, 636.0, 618.0, 636.0, 630.0, 636.0, 624.0, 639.0, 633.0, 630.0, 587.0, 587.0, 630.0, 544.0, 633.0, 627.0, 618.0, 636.0, 627.0, 581.0, 582.0, 639.0, 587.0, 639.0, 573.0, 633.0, 636.0, 587.0, 639.0, 564.0, 639.0, 630.0, 633.0, 558.0, 582.0, 630.0, 630.0, 587.0, 624.0, 636.0, 582.0, 587.0, 633.0, 587.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 325.0, 314.0, 319.0, 314.0, 309.0, 324.0, 311.0, 322.0, 277.0, 290.0, 316.0, 320.0, 325.0, 308.0, 312.0, 321.0, 327.0, 309.0, 318.0, 318.0, 274.0, 284.0, 288.0, 296.0, 324.0, 315.0, 319.0, 320.0, 324.0, 309.0, 279.0, 288.0, 301.0, 289.0, 312.0, 324.0, 311.0, 322.0, 319.0, 317.0, 321.0, 309.0, 291.0, 291.0, 319.0, 311.0, 317.0, 319.0, 273.0, 285.0, 322.0, 317.0, 299.0, 285.0, 295.0, 292.0, 275.0, 269.0, 319.0, 317.0, 317.0, 316.0, 308.0, 322.0, 286.0, 296.0, 324.0, 312.0, 321.0, 318.0, 293.0, 297.0, 316.0, 317.0, 290.0, 294.0, 317.0, 319.0, 311.0, 319.0, 323.0, 307.0, 316.0, 317.0, 292.0, 295.0, 319.0, 317.0, 313.0, 317.0, 281.0, 301.0, 198.0, 207.0, 313.0, 320.0, 310.0, 320.0, 311.0, 316.0, 282.0, 297.0, 285.0, 291.0, 309.0, 278.0, 293.0, 289.0, 311.0, 325.0, 321.0, 315.0, 302.0, 316.0, 314.0, 322.0, 311.0, 319.0, 319.0, 317.0, 319.0, 305.0, 319.0, 320.0, 322.0, 311.0, 326.0, 304.0, 301.0, 286.0, 293.0, 294.0, 313.0, 317.0, 267.0, 277.0, 319.0, 314.0, 311.0, 316.0, 315.0, 303.0, 309.0, 327.0, 305.0, 322.0, 284.0, 297.0, 290.0, 292.0, 311.0, 328.0, 288.0, 299.0, 317.0, 322.0, 292.0, 281.0, 316.0, 317.0, 324.0, 312.0, 293.0, 294.0, 322.0, 317.0, 287.0, 277.0, 319.0, 320.0, 318.0, 312.0, 322.0, 311.0, 292.0, 266.0, 288.0, 294.0, 310.0, 320.0, 314.0, 316.0, 293.0, 294.0, 311.0, 313.0, 319.0, 317.0, 289.0, 293.0, 295.0, 292.0, 316.0, 317.0, 306.0, 281.0, 283.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9654876642180179, "mean_processing_ms": 0.26135729355980103, "mean_inference_ms": 1.545357165029807}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7008000, "num_steps_sampled": 3737600, "sample_time_ms": 21647.86, "load_time_ms": 37.443, "grad_time_ms": 10160.016, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012383932480588555, "policy_loss": -0.006961038801819086, "vf_loss": 87.61873626708984, "vf_explained_var": 0.757759153842926, "kl": 0.001912236213684082, "entropy": 1.1248730421066284, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3737600, "episodes_total": 9344, "training_iteration": 292, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-13-55", "timestamp": 1660256035, "time_this_iter_s": 27.889997720718384, "time_total_s": 14448.476649522781, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14448.476649522781, "timesteps_since_restore": 3737600, "iterations_since_restore": 292, "perf": {"cpu_util_percent": 31.551282051282044, "ram_util_percent": 59.09743589743588}}
+{"episode_reward_max": 639.0, "episode_reward_min": 339.0, "episode_reward_mean": 610.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 167.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 305.435}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.67, "shaped_reward_min": 99, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 19.18, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.39, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.87, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.29, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.96, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.65, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.29, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.96, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.29, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.96, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [555.0, 633.0, 564.0, 639.0, 581.0, 636.0, 630.0, 639.0, 630.0, 590.0, 639.0, 636.0, 639.0, 627.0, 630.0, 639.0, 639.0, 636.0, 633.0, 630.0, 636.0, 639.0, 630.0, 639.0, 584.0, 633.0, 633.0, 636.0, 339.0, 579.0, 579.0, 587.0, 630.0, 587.0, 587.0, 630.0, 544.0, 633.0, 627.0, 618.0, 636.0, 627.0, 581.0, 582.0, 639.0, 587.0, 639.0, 573.0, 633.0, 636.0, 587.0, 639.0, 564.0, 639.0, 630.0, 633.0, 558.0, 582.0, 630.0, 630.0, 587.0, 624.0, 636.0, 582.0, 587.0, 633.0, 587.0, 576.0, 582.0, 639.0, 633.0, 633.0, 633.0, 567.0, 636.0, 633.0, 633.0, 636.0, 636.0, 558.0, 584.0, 639.0, 639.0, 633.0, 567.0, 590.0, 636.0, 633.0, 636.0, 630.0, 582.0, 630.0, 636.0, 558.0, 639.0, 584.0, 587.0, 544.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 270.0, 316.0, 317.0, 293.0, 271.0, 311.0, 328.0, 295.0, 286.0, 324.0, 312.0, 321.0, 309.0, 316.0, 323.0, 311.0, 319.0, 299.0, 291.0, 319.0, 320.0, 314.0, 322.0, 321.0, 318.0, 316.0, 311.0, 311.0, 319.0, 315.0, 324.0, 314.0, 325.0, 316.0, 320.0, 311.0, 322.0, 300.0, 330.0, 319.0, 317.0, 319.0, 320.0, 319.0, 311.0, 314.0, 325.0, 288.0, 296.0, 318.0, 315.0, 319.0, 314.0, 316.0, 320.0, 167.0, 172.0, 288.0, 291.0, 285.0, 294.0, 293.0, 294.0, 326.0, 304.0, 301.0, 286.0, 293.0, 294.0, 313.0, 317.0, 267.0, 277.0, 319.0, 314.0, 311.0, 316.0, 315.0, 303.0, 309.0, 327.0, 305.0, 322.0, 284.0, 297.0, 290.0, 292.0, 311.0, 328.0, 288.0, 299.0, 317.0, 322.0, 292.0, 281.0, 316.0, 317.0, 324.0, 312.0, 293.0, 294.0, 322.0, 317.0, 287.0, 277.0, 319.0, 320.0, 318.0, 312.0, 322.0, 311.0, 292.0, 266.0, 288.0, 294.0, 310.0, 320.0, 314.0, 316.0, 293.0, 294.0, 311.0, 313.0, 319.0, 317.0, 289.0, 293.0, 295.0, 292.0, 316.0, 317.0, 306.0, 281.0, 283.0, 293.0, 289.0, 293.0, 325.0, 314.0, 319.0, 314.0, 309.0, 324.0, 311.0, 322.0, 277.0, 290.0, 316.0, 320.0, 325.0, 308.0, 312.0, 321.0, 327.0, 309.0, 318.0, 318.0, 274.0, 284.0, 288.0, 296.0, 324.0, 315.0, 319.0, 320.0, 324.0, 309.0, 279.0, 288.0, 301.0, 289.0, 312.0, 324.0, 311.0, 322.0, 319.0, 317.0, 321.0, 309.0, 291.0, 291.0, 319.0, 311.0, 317.0, 319.0, 273.0, 285.0, 322.0, 317.0, 299.0, 285.0, 295.0, 292.0, 275.0, 269.0, 319.0, 317.0, 317.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9637668687735891, "mean_processing_ms": 0.2610164264718474, "mean_inference_ms": 1.5436541723929016}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7032000, "num_steps_sampled": 3750400, "sample_time_ms": 21753.951, "load_time_ms": 37.441, "grad_time_ms": 10065.741, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0023420238867402077, "policy_loss": -0.005699212197214365, "vf_loss": 86.0804214477539, "vf_explained_var": 0.7711854577064514, "kl": 0.0016376747516915202, "entropy": 1.1336089372634888, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3750400, "episodes_total": 9376, "training_iteration": 293, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-14-28", "timestamp": 1660256068, "time_this_iter_s": 33.79537034034729, "time_total_s": 14482.272019863129, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14482.272019863129, "timesteps_since_restore": 3750400, "iterations_since_restore": 293, "perf": {"cpu_util_percent": 29.666666666666668, "ram_util_percent": 58.67083333333335}}
+{"episode_reward_max": 639.0, "episode_reward_min": 339.0, "episode_reward_mean": 615.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 167.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 307.66}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.52, "shaped_reward_min": 99, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.26, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 19.3, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.68, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.56, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.84, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.12, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.37, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.74, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.68, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.12, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.12, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 633.0, 636.0, 636.0, 630.0, 636.0, 639.0, 630.0, 630.0, 630.0, 633.0, 630.0, 630.0, 582.0, 627.0, 633.0, 636.0, 636.0, 576.0, 630.0, 627.0, 627.0, 636.0, 561.0, 579.0, 636.0, 639.0, 639.0, 633.0, 630.0, 569.0, 587.0, 633.0, 587.0, 576.0, 582.0, 639.0, 633.0, 633.0, 633.0, 567.0, 636.0, 633.0, 633.0, 636.0, 636.0, 558.0, 584.0, 639.0, 639.0, 633.0, 567.0, 590.0, 636.0, 633.0, 636.0, 630.0, 582.0, 630.0, 636.0, 558.0, 639.0, 584.0, 587.0, 544.0, 636.0, 633.0, 555.0, 633.0, 564.0, 639.0, 581.0, 636.0, 630.0, 639.0, 630.0, 590.0, 639.0, 636.0, 639.0, 627.0, 630.0, 639.0, 639.0, 636.0, 633.0, 630.0, 636.0, 639.0, 630.0, 639.0, 584.0, 633.0, 633.0, 636.0, 339.0, 579.0, 579.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 306.0, 330.0, 327.0, 306.0, 319.0, 317.0, 324.0, 312.0, 318.0, 312.0, 314.0, 322.0, 314.0, 325.0, 319.0, 311.0, 313.0, 317.0, 316.0, 314.0, 313.0, 320.0, 313.0, 317.0, 316.0, 314.0, 299.0, 283.0, 320.0, 307.0, 312.0, 321.0, 322.0, 314.0, 319.0, 317.0, 288.0, 288.0, 312.0, 318.0, 316.0, 311.0, 313.0, 314.0, 314.0, 322.0, 279.0, 282.0, 299.0, 280.0, 314.0, 322.0, 319.0, 320.0, 319.0, 320.0, 316.0, 317.0, 318.0, 312.0, 288.0, 281.0, 295.0, 292.0, 316.0, 317.0, 306.0, 281.0, 283.0, 293.0, 289.0, 293.0, 325.0, 314.0, 319.0, 314.0, 309.0, 324.0, 311.0, 322.0, 277.0, 290.0, 316.0, 320.0, 325.0, 308.0, 312.0, 321.0, 327.0, 309.0, 318.0, 318.0, 274.0, 284.0, 288.0, 296.0, 324.0, 315.0, 319.0, 320.0, 324.0, 309.0, 279.0, 288.0, 301.0, 289.0, 312.0, 324.0, 311.0, 322.0, 319.0, 317.0, 321.0, 309.0, 291.0, 291.0, 319.0, 311.0, 317.0, 319.0, 273.0, 285.0, 322.0, 317.0, 299.0, 285.0, 295.0, 292.0, 275.0, 269.0, 319.0, 317.0, 317.0, 316.0, 285.0, 270.0, 316.0, 317.0, 293.0, 271.0, 311.0, 328.0, 295.0, 286.0, 324.0, 312.0, 321.0, 309.0, 316.0, 323.0, 311.0, 319.0, 299.0, 291.0, 319.0, 320.0, 314.0, 322.0, 321.0, 318.0, 316.0, 311.0, 311.0, 319.0, 315.0, 324.0, 314.0, 325.0, 316.0, 320.0, 311.0, 322.0, 300.0, 330.0, 319.0, 317.0, 319.0, 320.0, 319.0, 311.0, 314.0, 325.0, 288.0, 296.0, 318.0, 315.0, 319.0, 314.0, 316.0, 320.0, 167.0, 172.0, 288.0, 291.0, 285.0, 294.0, 293.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9620401402665292, "mean_processing_ms": 0.2606729789750365, "mean_inference_ms": 1.5418645190919325}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7056000, "num_steps_sampled": 3763200, "sample_time_ms": 21541.312, "load_time_ms": 37.573, "grad_time_ms": 10164.498, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0031338699627667665, "policy_loss": -0.004722007550299168, "vf_loss": 84.24658966064453, "vf_explained_var": 0.7650328278541565, "kl": 0.0023102371487766504, "entropy": 1.1375713348388672, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3763200, "episodes_total": 9408, "training_iteration": 294, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-15-00", "timestamp": 1660256100, "time_this_iter_s": 31.67550492286682, "time_total_s": 14513.947524785995, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14513.947524785995, "timesteps_since_restore": 3763200, "iterations_since_restore": 294, "perf": {"cpu_util_percent": 30.09333333333334, "ram_util_percent": 58.68444444444443}}
+{"episode_reward_max": 639.0, "episode_reward_min": 339.0, "episode_reward_mean": 616.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 167.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 308.195}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 189.19, "shaped_reward_min": 99, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.11, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 19.41, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.56, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.64, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.79, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.18, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.3, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.48, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.85, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.79, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.18, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.3, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.18, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.3, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 633.0, 573.0, 596.0, 636.0, 633.0, 630.0, 627.0, 636.0, 630.0, 639.0, 630.0, 539.0, 633.0, 633.0, 630.0, 639.0, 630.0, 587.0, 587.0, 633.0, 633.0, 639.0, 639.0, 413.0, 633.0, 636.0, 636.0, 587.0, 636.0, 627.0, 633.0, 587.0, 544.0, 636.0, 633.0, 555.0, 633.0, 564.0, 639.0, 581.0, 636.0, 630.0, 639.0, 630.0, 590.0, 639.0, 636.0, 639.0, 627.0, 630.0, 639.0, 639.0, 636.0, 633.0, 630.0, 636.0, 639.0, 630.0, 639.0, 584.0, 633.0, 633.0, 636.0, 339.0, 579.0, 579.0, 587.0, 630.0, 636.0, 633.0, 636.0, 636.0, 630.0, 636.0, 639.0, 630.0, 630.0, 630.0, 633.0, 630.0, 630.0, 582.0, 627.0, 633.0, 636.0, 636.0, 576.0, 630.0, 627.0, 627.0, 636.0, 561.0, 579.0, 636.0, 639.0, 639.0, 633.0, 630.0, 569.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 320.0, 317.0, 316.0, 288.0, 285.0, 293.0, 303.0, 324.0, 312.0, 321.0, 312.0, 322.0, 308.0, 313.0, 314.0, 316.0, 320.0, 321.0, 309.0, 325.0, 314.0, 317.0, 313.0, 265.0, 274.0, 313.0, 320.0, 319.0, 314.0, 317.0, 313.0, 317.0, 322.0, 321.0, 309.0, 306.0, 281.0, 292.0, 295.0, 310.0, 323.0, 319.0, 314.0, 319.0, 320.0, 319.0, 320.0, 199.0, 214.0, 321.0, 312.0, 319.0, 317.0, 319.0, 317.0, 295.0, 292.0, 322.0, 314.0, 308.0, 319.0, 317.0, 316.0, 295.0, 292.0, 275.0, 269.0, 319.0, 317.0, 317.0, 316.0, 285.0, 270.0, 316.0, 317.0, 293.0, 271.0, 311.0, 328.0, 295.0, 286.0, 324.0, 312.0, 321.0, 309.0, 316.0, 323.0, 311.0, 319.0, 299.0, 291.0, 319.0, 320.0, 314.0, 322.0, 321.0, 318.0, 316.0, 311.0, 311.0, 319.0, 315.0, 324.0, 314.0, 325.0, 316.0, 320.0, 311.0, 322.0, 300.0, 330.0, 319.0, 317.0, 319.0, 320.0, 319.0, 311.0, 314.0, 325.0, 288.0, 296.0, 318.0, 315.0, 319.0, 314.0, 316.0, 320.0, 167.0, 172.0, 288.0, 291.0, 285.0, 294.0, 293.0, 294.0, 316.0, 314.0, 306.0, 330.0, 327.0, 306.0, 319.0, 317.0, 324.0, 312.0, 318.0, 312.0, 314.0, 322.0, 314.0, 325.0, 319.0, 311.0, 313.0, 317.0, 316.0, 314.0, 313.0, 320.0, 313.0, 317.0, 316.0, 314.0, 299.0, 283.0, 320.0, 307.0, 312.0, 321.0, 322.0, 314.0, 319.0, 317.0, 288.0, 288.0, 312.0, 318.0, 316.0, 311.0, 313.0, 314.0, 314.0, 322.0, 279.0, 282.0, 299.0, 280.0, 314.0, 322.0, 319.0, 320.0, 319.0, 320.0, 316.0, 317.0, 318.0, 312.0, 288.0, 281.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9603539643629071, "mean_processing_ms": 0.26034177347662363, "mean_inference_ms": 1.5408082131746255}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7080000, "num_steps_sampled": 3776000, "sample_time_ms": 22187.522, "load_time_ms": 38.247, "grad_time_ms": 10375.626, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0001668116747168824, "policy_loss": -0.007916351780295372, "vf_loss": 83.1385726928711, "vf_explained_var": 0.7759819626808167, "kl": 0.0019673772621899843, "entropy": 1.1286202669143677, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3776000, "episodes_total": 9440, "training_iteration": 295, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-15-42", "timestamp": 1660256142, "time_this_iter_s": 41.61074709892273, "time_total_s": 14555.558271884918, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14555.558271884918, "timesteps_since_restore": 3776000, "iterations_since_restore": 295, "perf": {"cpu_util_percent": 32.182758620689654, "ram_util_percent": 58.76206896551724}}
+{"episode_reward_max": 639.0, "episode_reward_min": 339.0, "episode_reward_mean": 614.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 167.0}, "policy_reward_max": {"ppo": 333.0}, "policy_reward_mean": {"ppo": 307.085}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.57, "shaped_reward_min": 99, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.06, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 19.21, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.56, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.39, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.18, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.14, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.47, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.35, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.83, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.72, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.18, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.14, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.18, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.14, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [618.0, 639.0, 633.0, 630.0, 639.0, 630.0, 636.0, 587.0, 633.0, 639.0, 587.0, 630.0, 575.0, 473.0, 633.0, 627.0, 636.0, 639.0, 636.0, 636.0, 633.0, 633.0, 639.0, 630.0, 433.0, 630.0, 633.0, 636.0, 633.0, 639.0, 576.0, 582.0, 339.0, 579.0, 579.0, 587.0, 630.0, 636.0, 633.0, 636.0, 636.0, 630.0, 636.0, 639.0, 630.0, 630.0, 630.0, 633.0, 630.0, 630.0, 582.0, 627.0, 633.0, 636.0, 636.0, 576.0, 630.0, 627.0, 627.0, 636.0, 561.0, 579.0, 636.0, 639.0, 639.0, 633.0, 630.0, 569.0, 639.0, 633.0, 573.0, 596.0, 636.0, 633.0, 630.0, 627.0, 636.0, 630.0, 639.0, 630.0, 539.0, 633.0, 633.0, 630.0, 639.0, 630.0, 587.0, 587.0, 633.0, 633.0, 639.0, 639.0, 413.0, 633.0, 636.0, 636.0, 587.0, 636.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [300.0, 318.0, 317.0, 322.0, 324.0, 309.0, 316.0, 314.0, 322.0, 317.0, 316.0, 314.0, 320.0, 316.0, 290.0, 297.0, 319.0, 314.0, 317.0, 322.0, 293.0, 294.0, 312.0, 318.0, 284.0, 291.0, 237.0, 236.0, 319.0, 314.0, 317.0, 310.0, 322.0, 314.0, 306.0, 333.0, 319.0, 317.0, 316.0, 320.0, 316.0, 317.0, 318.0, 315.0, 317.0, 322.0, 318.0, 312.0, 218.0, 215.0, 319.0, 311.0, 314.0, 319.0, 316.0, 320.0, 319.0, 314.0, 319.0, 320.0, 286.0, 290.0, 295.0, 287.0, 167.0, 172.0, 288.0, 291.0, 285.0, 294.0, 293.0, 294.0, 316.0, 314.0, 306.0, 330.0, 327.0, 306.0, 319.0, 317.0, 324.0, 312.0, 318.0, 312.0, 314.0, 322.0, 314.0, 325.0, 319.0, 311.0, 313.0, 317.0, 316.0, 314.0, 313.0, 320.0, 313.0, 317.0, 316.0, 314.0, 299.0, 283.0, 320.0, 307.0, 312.0, 321.0, 322.0, 314.0, 319.0, 317.0, 288.0, 288.0, 312.0, 318.0, 316.0, 311.0, 313.0, 314.0, 314.0, 322.0, 279.0, 282.0, 299.0, 280.0, 314.0, 322.0, 319.0, 320.0, 319.0, 320.0, 316.0, 317.0, 318.0, 312.0, 288.0, 281.0, 319.0, 320.0, 317.0, 316.0, 288.0, 285.0, 293.0, 303.0, 324.0, 312.0, 321.0, 312.0, 322.0, 308.0, 313.0, 314.0, 316.0, 320.0, 321.0, 309.0, 325.0, 314.0, 317.0, 313.0, 265.0, 274.0, 313.0, 320.0, 319.0, 314.0, 317.0, 313.0, 317.0, 322.0, 321.0, 309.0, 306.0, 281.0, 292.0, 295.0, 310.0, 323.0, 319.0, 314.0, 319.0, 320.0, 319.0, 320.0, 199.0, 214.0, 321.0, 312.0, 319.0, 317.0, 319.0, 317.0, 295.0, 292.0, 322.0, 314.0, 308.0, 319.0, 317.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9586694910648496, "mean_processing_ms": 0.26000987158476824, "mean_inference_ms": 1.539770678675762}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7104000, "num_steps_sampled": 3788800, "sample_time_ms": 22271.193, "load_time_ms": 38.04, "grad_time_ms": 10225.976, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004963720217347145, "policy_loss": -0.003322723088786006, "vf_loss": 88.4856948852539, "vf_explained_var": 0.7634937167167664, "kl": 0.0021246925462037325, "entropy": 1.1242562532424927, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3788800, "episodes_total": 9472, "training_iteration": 296, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-16-13", "timestamp": 1660256173, "time_this_iter_s": 30.998157024383545, "time_total_s": 14586.556428909302, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14586.556428909302, "timesteps_since_restore": 3788800, "iterations_since_restore": 296, "perf": {"cpu_util_percent": 31.388636363636365, "ram_util_percent": 58.774999999999984}}
+{"episode_reward_max": 639.0, "episode_reward_min": 413.0, "episode_reward_mean": 616.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 199.0}, "policy_reward_max": {"ppo": 333.0}, "policy_reward_mean": {"ppo": 308.395}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 189.99, "shaped_reward_min": 133, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.23, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 19.2, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.8, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.4, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.34, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.16, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.77, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.78, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.34, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.16, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.34, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.16, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 579.0, 639.0, 636.0, 633.0, 630.0, 582.0, 636.0, 633.0, 636.0, 636.0, 639.0, 579.0, 584.0, 630.0, 639.0, 627.0, 527.0, 630.0, 582.0, 630.0, 639.0, 636.0, 633.0, 639.0, 639.0, 627.0, 579.0, 627.0, 639.0, 639.0, 639.0, 639.0, 633.0, 630.0, 569.0, 639.0, 633.0, 573.0, 596.0, 636.0, 633.0, 630.0, 627.0, 636.0, 630.0, 639.0, 630.0, 539.0, 633.0, 633.0, 630.0, 639.0, 630.0, 587.0, 587.0, 633.0, 633.0, 639.0, 639.0, 413.0, 633.0, 636.0, 636.0, 587.0, 636.0, 627.0, 633.0, 618.0, 639.0, 633.0, 630.0, 639.0, 630.0, 636.0, 587.0, 633.0, 639.0, 587.0, 630.0, 575.0, 473.0, 633.0, 627.0, 636.0, 639.0, 636.0, 636.0, 633.0, 633.0, 639.0, 630.0, 433.0, 630.0, 633.0, 636.0, 633.0, 639.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 294.0, 278.0, 301.0, 316.0, 323.0, 317.0, 319.0, 307.0, 326.0, 315.0, 315.0, 290.0, 292.0, 325.0, 311.0, 316.0, 317.0, 314.0, 322.0, 316.0, 320.0, 313.0, 326.0, 291.0, 288.0, 294.0, 290.0, 310.0, 320.0, 320.0, 319.0, 314.0, 313.0, 258.0, 269.0, 317.0, 313.0, 296.0, 286.0, 321.0, 309.0, 312.0, 327.0, 317.0, 319.0, 321.0, 312.0, 314.0, 325.0, 314.0, 325.0, 316.0, 311.0, 298.0, 281.0, 322.0, 305.0, 317.0, 322.0, 319.0, 320.0, 317.0, 322.0, 319.0, 320.0, 316.0, 317.0, 318.0, 312.0, 288.0, 281.0, 319.0, 320.0, 317.0, 316.0, 288.0, 285.0, 293.0, 303.0, 324.0, 312.0, 321.0, 312.0, 322.0, 308.0, 313.0, 314.0, 316.0, 320.0, 321.0, 309.0, 325.0, 314.0, 317.0, 313.0, 265.0, 274.0, 313.0, 320.0, 319.0, 314.0, 317.0, 313.0, 317.0, 322.0, 321.0, 309.0, 306.0, 281.0, 292.0, 295.0, 310.0, 323.0, 319.0, 314.0, 319.0, 320.0, 319.0, 320.0, 199.0, 214.0, 321.0, 312.0, 319.0, 317.0, 319.0, 317.0, 295.0, 292.0, 322.0, 314.0, 308.0, 319.0, 317.0, 316.0, 300.0, 318.0, 317.0, 322.0, 324.0, 309.0, 316.0, 314.0, 322.0, 317.0, 316.0, 314.0, 320.0, 316.0, 290.0, 297.0, 319.0, 314.0, 317.0, 322.0, 293.0, 294.0, 312.0, 318.0, 284.0, 291.0, 237.0, 236.0, 319.0, 314.0, 317.0, 310.0, 322.0, 314.0, 306.0, 333.0, 319.0, 317.0, 316.0, 320.0, 316.0, 317.0, 318.0, 315.0, 317.0, 322.0, 318.0, 312.0, 218.0, 215.0, 319.0, 311.0, 314.0, 319.0, 316.0, 320.0, 319.0, 314.0, 319.0, 320.0, 286.0, 290.0, 295.0, 287.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9570006029896723, "mean_processing_ms": 0.25968048709196123, "mean_inference_ms": 1.5389220446317904}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7128000, "num_steps_sampled": 3801600, "sample_time_ms": 22480.496, "load_time_ms": 38.114, "grad_time_ms": 10354.328, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004928060807287693, "policy_loss": -0.0034768336918205023, "vf_loss": 89.6873550415039, "vf_explained_var": 0.7655234336853027, "kl": 0.0019178093643859029, "entropy": 1.1276906728744507, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3801600, "episodes_total": 9504, "training_iteration": 297, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-16-48", "timestamp": 1660256208, "time_this_iter_s": 34.984565019607544, "time_total_s": 14621.54099392891, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14621.54099392891, "timesteps_since_restore": 3801600, "iterations_since_restore": 297, "perf": {"cpu_util_percent": 30.266, "ram_util_percent": 58.788000000000004}}
+{"episode_reward_max": 639.0, "episode_reward_min": 433.0, "episode_reward_mean": 615.99, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 215.0}, "policy_reward_max": {"ppo": 333.0}, "policy_reward_mean": {"ppo": 307.995}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 189.59, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.45, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.92, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.96, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.16, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.47, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.99, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.76, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.47, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.99, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.47, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.99, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 624.0, 576.0, 639.0, 636.0, 630.0, 522.0, 587.0, 639.0, 578.0, 636.0, 584.0, 639.0, 522.0, 630.0, 633.0, 633.0, 576.0, 567.0, 633.0, 636.0, 636.0, 587.0, 627.0, 636.0, 627.0, 639.0, 636.0, 587.0, 630.0, 633.0, 636.0, 587.0, 636.0, 627.0, 633.0, 618.0, 639.0, 633.0, 630.0, 639.0, 630.0, 636.0, 587.0, 633.0, 639.0, 587.0, 630.0, 575.0, 473.0, 633.0, 627.0, 636.0, 639.0, 636.0, 636.0, 633.0, 633.0, 639.0, 630.0, 433.0, 630.0, 633.0, 636.0, 633.0, 639.0, 576.0, 582.0, 587.0, 579.0, 639.0, 636.0, 633.0, 630.0, 582.0, 636.0, 633.0, 636.0, 636.0, 639.0, 579.0, 584.0, 630.0, 639.0, 627.0, 527.0, 630.0, 582.0, 630.0, 639.0, 636.0, 633.0, 639.0, 639.0, 627.0, 579.0, 627.0, 639.0, 639.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 320.0, 312.0, 312.0, 296.0, 280.0, 317.0, 322.0, 313.0, 323.0, 311.0, 319.0, 269.0, 253.0, 296.0, 291.0, 319.0, 320.0, 313.0, 265.0, 319.0, 317.0, 293.0, 291.0, 314.0, 325.0, 254.0, 268.0, 321.0, 309.0, 309.0, 324.0, 314.0, 319.0, 279.0, 297.0, 286.0, 281.0, 311.0, 322.0, 317.0, 319.0, 317.0, 319.0, 286.0, 301.0, 313.0, 314.0, 319.0, 317.0, 316.0, 311.0, 321.0, 318.0, 317.0, 319.0, 291.0, 296.0, 307.0, 323.0, 313.0, 320.0, 323.0, 313.0, 295.0, 292.0, 322.0, 314.0, 308.0, 319.0, 317.0, 316.0, 300.0, 318.0, 317.0, 322.0, 324.0, 309.0, 316.0, 314.0, 322.0, 317.0, 316.0, 314.0, 320.0, 316.0, 290.0, 297.0, 319.0, 314.0, 317.0, 322.0, 293.0, 294.0, 312.0, 318.0, 284.0, 291.0, 237.0, 236.0, 319.0, 314.0, 317.0, 310.0, 322.0, 314.0, 306.0, 333.0, 319.0, 317.0, 316.0, 320.0, 316.0, 317.0, 318.0, 315.0, 317.0, 322.0, 318.0, 312.0, 218.0, 215.0, 319.0, 311.0, 314.0, 319.0, 316.0, 320.0, 319.0, 314.0, 319.0, 320.0, 286.0, 290.0, 295.0, 287.0, 293.0, 294.0, 278.0, 301.0, 316.0, 323.0, 317.0, 319.0, 307.0, 326.0, 315.0, 315.0, 290.0, 292.0, 325.0, 311.0, 316.0, 317.0, 314.0, 322.0, 316.0, 320.0, 313.0, 326.0, 291.0, 288.0, 294.0, 290.0, 310.0, 320.0, 320.0, 319.0, 314.0, 313.0, 258.0, 269.0, 317.0, 313.0, 296.0, 286.0, 321.0, 309.0, 312.0, 327.0, 317.0, 319.0, 321.0, 312.0, 314.0, 325.0, 314.0, 325.0, 316.0, 311.0, 298.0, 281.0, 322.0, 305.0, 317.0, 322.0, 319.0, 320.0, 317.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9553216188480552, "mean_processing_ms": 0.25934597014735267, "mean_inference_ms": 1.5375890964227674}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7152000, "num_steps_sampled": 3814400, "sample_time_ms": 22450.487, "load_time_ms": 38.169, "grad_time_ms": 10228.148, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004324051551520824, "policy_loss": -0.003937617409974337, "vf_loss": 88.24027252197266, "vf_explained_var": 0.7693286538124084, "kl": 0.00227510672993958, "entropy": 1.1247196197509766, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3814400, "episodes_total": 9536, "training_iteration": 298, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-17-18", "timestamp": 1660256238, "time_this_iter_s": 30.79404616355896, "time_total_s": 14652.335040092468, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14652.335040092468, "timesteps_since_restore": 3814400, "iterations_since_restore": 298, "perf": {"cpu_util_percent": 31.34418604651162, "ram_util_percent": 58.76279069767441}}
+{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 615.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 307.765}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.53, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.58, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 19.1, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.95, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.27, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.8, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.43, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.0, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.28, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.73, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 4, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.43, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.0, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.43, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.0, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 630.0, 636.0, 573.0, 630.0, 587.0, 639.0, 582.0, 624.0, 630.0, 639.0, 587.0, 590.0, 639.0, 639.0, 636.0, 633.0, 579.0, 633.0, 633.0, 587.0, 630.0, 630.0, 525.0, 633.0, 636.0, 582.0, 633.0, 581.0, 582.0, 636.0, 633.0, 639.0, 576.0, 582.0, 587.0, 579.0, 639.0, 636.0, 633.0, 630.0, 582.0, 636.0, 633.0, 636.0, 636.0, 639.0, 579.0, 584.0, 630.0, 639.0, 627.0, 527.0, 630.0, 582.0, 630.0, 639.0, 636.0, 633.0, 639.0, 639.0, 627.0, 579.0, 627.0, 639.0, 639.0, 639.0, 639.0, 624.0, 576.0, 639.0, 636.0, 630.0, 522.0, 587.0, 639.0, 578.0, 636.0, 584.0, 639.0, 522.0, 630.0, 633.0, 633.0, 576.0, 567.0, 633.0, 636.0, 636.0, 587.0, 627.0, 636.0, 627.0, 639.0, 636.0, 587.0, 630.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 313.0, 323.0, 313.0, 317.0, 319.0, 317.0, 293.0, 280.0, 316.0, 314.0, 289.0, 298.0, 319.0, 320.0, 288.0, 294.0, 313.0, 311.0, 310.0, 320.0, 321.0, 318.0, 291.0, 296.0, 296.0, 294.0, 312.0, 327.0, 317.0, 322.0, 329.0, 307.0, 319.0, 314.0, 285.0, 294.0, 316.0, 317.0, 316.0, 317.0, 291.0, 296.0, 316.0, 314.0, 313.0, 317.0, 258.0, 267.0, 311.0, 322.0, 314.0, 322.0, 291.0, 291.0, 324.0, 309.0, 291.0, 290.0, 291.0, 291.0, 319.0, 317.0, 319.0, 314.0, 319.0, 320.0, 286.0, 290.0, 295.0, 287.0, 293.0, 294.0, 278.0, 301.0, 316.0, 323.0, 317.0, 319.0, 307.0, 326.0, 315.0, 315.0, 290.0, 292.0, 325.0, 311.0, 316.0, 317.0, 314.0, 322.0, 316.0, 320.0, 313.0, 326.0, 291.0, 288.0, 294.0, 290.0, 310.0, 320.0, 320.0, 319.0, 314.0, 313.0, 258.0, 269.0, 317.0, 313.0, 296.0, 286.0, 321.0, 309.0, 312.0, 327.0, 317.0, 319.0, 321.0, 312.0, 314.0, 325.0, 314.0, 325.0, 316.0, 311.0, 298.0, 281.0, 322.0, 305.0, 317.0, 322.0, 319.0, 320.0, 317.0, 322.0, 319.0, 320.0, 312.0, 312.0, 296.0, 280.0, 317.0, 322.0, 313.0, 323.0, 311.0, 319.0, 269.0, 253.0, 296.0, 291.0, 319.0, 320.0, 313.0, 265.0, 319.0, 317.0, 293.0, 291.0, 314.0, 325.0, 254.0, 268.0, 321.0, 309.0, 309.0, 324.0, 314.0, 319.0, 279.0, 297.0, 286.0, 281.0, 311.0, 322.0, 317.0, 319.0, 317.0, 319.0, 286.0, 301.0, 313.0, 314.0, 319.0, 317.0, 316.0, 311.0, 321.0, 318.0, 317.0, 319.0, 291.0, 296.0, 307.0, 323.0, 313.0, 320.0, 323.0, 313.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9536494258770276, "mean_processing_ms": 0.25901133252507974, "mean_inference_ms": 1.536150189883149}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7176000, "num_steps_sampled": 3827200, "sample_time_ms": 22404.117, "load_time_ms": 38.01, "grad_time_ms": 10081.123, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0014618774875998497, "policy_loss": -0.00924667902290821, "vf_loss": 83.49740600585938, "vf_explained_var": 0.7685635685920715, "kl": 0.0018500644946470857, "entropy": 1.12986421585083, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3827200, "episodes_total": 9568, "training_iteration": 299, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-17-50", "timestamp": 1660256270, "time_this_iter_s": 31.213135242462158, "time_total_s": 14683.54817533493, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14683.54817533493, "timesteps_since_restore": 3827200, "iterations_since_restore": 299, "perf": {"cpu_util_percent": 30.386363636363637, "ram_util_percent": 58.77272727272726}}
+{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 616.85, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.425}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.65, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.79, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.78, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 17.19, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.87, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.8, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.68, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.69, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.77, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.07, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.33, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.27, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.69, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.77, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.69, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.77, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 636.0, 636.0, 579.0, 636.0, 636.0, 582.0, 633.0, 639.0, 582.0, 587.0, 639.0, 630.0, 639.0, 636.0, 627.0, 600.0, 639.0, 630.0, 630.0, 627.0, 639.0, 582.0, 627.0, 587.0, 636.0, 633.0, 636.0, 636.0, 633.0, 630.0, 579.0, 627.0, 639.0, 639.0, 639.0, 639.0, 624.0, 576.0, 639.0, 636.0, 630.0, 522.0, 587.0, 639.0, 578.0, 636.0, 584.0, 639.0, 522.0, 630.0, 633.0, 633.0, 576.0, 567.0, 633.0, 636.0, 636.0, 587.0, 627.0, 636.0, 627.0, 639.0, 636.0, 587.0, 630.0, 633.0, 636.0, 630.0, 636.0, 630.0, 636.0, 573.0, 630.0, 587.0, 639.0, 582.0, 624.0, 630.0, 639.0, 587.0, 590.0, 639.0, 639.0, 636.0, 633.0, 579.0, 633.0, 633.0, 587.0, 630.0, 630.0, 525.0, 633.0, 636.0, 582.0, 633.0, 581.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 291.0, 314.0, 322.0, 314.0, 322.0, 278.0, 301.0, 314.0, 322.0, 316.0, 320.0, 291.0, 291.0, 314.0, 319.0, 319.0, 320.0, 286.0, 296.0, 283.0, 304.0, 320.0, 319.0, 324.0, 306.0, 322.0, 317.0, 318.0, 318.0, 308.0, 319.0, 299.0, 301.0, 321.0, 318.0, 308.0, 322.0, 319.0, 311.0, 308.0, 319.0, 320.0, 319.0, 288.0, 294.0, 319.0, 308.0, 289.0, 298.0, 314.0, 322.0, 317.0, 316.0, 316.0, 320.0, 312.0, 324.0, 319.0, 314.0, 311.0, 319.0, 283.0, 296.0, 322.0, 305.0, 317.0, 322.0, 319.0, 320.0, 317.0, 322.0, 319.0, 320.0, 312.0, 312.0, 296.0, 280.0, 317.0, 322.0, 313.0, 323.0, 311.0, 319.0, 269.0, 253.0, 296.0, 291.0, 319.0, 320.0, 313.0, 265.0, 319.0, 317.0, 293.0, 291.0, 314.0, 325.0, 254.0, 268.0, 321.0, 309.0, 309.0, 324.0, 314.0, 319.0, 279.0, 297.0, 286.0, 281.0, 311.0, 322.0, 317.0, 319.0, 317.0, 319.0, 286.0, 301.0, 313.0, 314.0, 319.0, 317.0, 316.0, 311.0, 321.0, 318.0, 317.0, 319.0, 291.0, 296.0, 307.0, 323.0, 313.0, 320.0, 323.0, 313.0, 316.0, 314.0, 313.0, 323.0, 313.0, 317.0, 319.0, 317.0, 293.0, 280.0, 316.0, 314.0, 289.0, 298.0, 319.0, 320.0, 288.0, 294.0, 313.0, 311.0, 310.0, 320.0, 321.0, 318.0, 291.0, 296.0, 296.0, 294.0, 312.0, 327.0, 317.0, 322.0, 329.0, 307.0, 319.0, 314.0, 285.0, 294.0, 316.0, 317.0, 316.0, 317.0, 291.0, 296.0, 316.0, 314.0, 313.0, 317.0, 258.0, 267.0, 311.0, 322.0, 314.0, 322.0, 291.0, 291.0, 324.0, 309.0, 291.0, 290.0, 291.0, 291.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9519895892120465, "mean_processing_ms": 0.2586797182005827, "mean_inference_ms": 1.5346818190110434}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7200000, "num_steps_sampled": 3840000, "sample_time_ms": 22522.727, "load_time_ms": 37.151, "grad_time_ms": 10013.326, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0025208042934536934, "policy_loss": -0.005239995662122965, "vf_loss": 83.27434539794922, "vf_explained_var": 0.7729237675666809, "kl": 0.0018271300941705704, "entropy": 1.133251667022705, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3840000, "episodes_total": 9600, "training_iteration": 300, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-18-22", "timestamp": 1660256302, "time_this_iter_s": 32.498526096343994, "time_total_s": 14716.046701431274, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14716.046701431274, "timesteps_since_restore": 3840000, "iterations_since_restore": 300, "perf": {"cpu_util_percent": 33.44782608695652, "ram_util_percent": 58.9717391304348}}
+{"episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 617.85, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.925}, "custom_metrics": {"sparse_reward_mean": 214.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.85, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.53, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.86, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 17.02, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.0, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.58, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.1, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.86, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.26, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.31, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.58, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.58, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 639.0, 630.0, 587.0, 627.0, 639.0, 633.0, 573.0, 627.0, 633.0, 576.0, 639.0, 630.0, 564.0, 633.0, 627.0, 636.0, 636.0, 627.0, 633.0, 579.0, 639.0, 636.0, 639.0, 576.0, 639.0, 636.0, 639.0, 587.0, 581.0, 582.0, 639.0, 587.0, 630.0, 633.0, 636.0, 630.0, 636.0, 630.0, 636.0, 573.0, 630.0, 587.0, 639.0, 582.0, 624.0, 630.0, 639.0, 587.0, 590.0, 639.0, 639.0, 636.0, 633.0, 579.0, 633.0, 633.0, 587.0, 630.0, 630.0, 525.0, 633.0, 636.0, 582.0, 633.0, 581.0, 582.0, 636.0, 587.0, 636.0, 636.0, 579.0, 636.0, 636.0, 582.0, 633.0, 639.0, 582.0, 587.0, 639.0, 630.0, 639.0, 636.0, 627.0, 600.0, 639.0, 630.0, 630.0, 627.0, 639.0, 582.0, 627.0, 587.0, 636.0, 633.0, 636.0, 636.0, 633.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 322.0, 317.0, 316.0, 314.0, 288.0, 299.0, 305.0, 322.0, 319.0, 320.0, 329.0, 304.0, 278.0, 295.0, 311.0, 316.0, 316.0, 317.0, 290.0, 286.0, 317.0, 322.0, 321.0, 309.0, 280.0, 284.0, 322.0, 311.0, 314.0, 313.0, 326.0, 310.0, 314.0, 322.0, 311.0, 316.0, 316.0, 317.0, 288.0, 291.0, 311.0, 328.0, 317.0, 319.0, 319.0, 320.0, 289.0, 287.0, 317.0, 322.0, 320.0, 316.0, 327.0, 312.0, 277.0, 310.0, 298.0, 283.0, 293.0, 289.0, 317.0, 322.0, 291.0, 296.0, 307.0, 323.0, 313.0, 320.0, 323.0, 313.0, 316.0, 314.0, 313.0, 323.0, 313.0, 317.0, 319.0, 317.0, 293.0, 280.0, 316.0, 314.0, 289.0, 298.0, 319.0, 320.0, 288.0, 294.0, 313.0, 311.0, 310.0, 320.0, 321.0, 318.0, 291.0, 296.0, 296.0, 294.0, 312.0, 327.0, 317.0, 322.0, 329.0, 307.0, 319.0, 314.0, 285.0, 294.0, 316.0, 317.0, 316.0, 317.0, 291.0, 296.0, 316.0, 314.0, 313.0, 317.0, 258.0, 267.0, 311.0, 322.0, 314.0, 322.0, 291.0, 291.0, 324.0, 309.0, 291.0, 290.0, 291.0, 291.0, 319.0, 317.0, 296.0, 291.0, 314.0, 322.0, 314.0, 322.0, 278.0, 301.0, 314.0, 322.0, 316.0, 320.0, 291.0, 291.0, 314.0, 319.0, 319.0, 320.0, 286.0, 296.0, 283.0, 304.0, 320.0, 319.0, 324.0, 306.0, 322.0, 317.0, 318.0, 318.0, 308.0, 319.0, 299.0, 301.0, 321.0, 318.0, 308.0, 322.0, 319.0, 311.0, 308.0, 319.0, 320.0, 319.0, 288.0, 294.0, 319.0, 308.0, 289.0, 298.0, 314.0, 322.0, 317.0, 316.0, 316.0, 320.0, 312.0, 324.0, 319.0, 314.0, 311.0, 319.0, 283.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9503424273012522, "mean_processing_ms": 0.25835032935285485, "mean_inference_ms": 1.5332766289322552}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7224000, "num_steps_sampled": 3852800, "sample_time_ms": 22579.338, "load_time_ms": 37.118, "grad_time_ms": 10147.106, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0018557598814368248, "policy_loss": -0.005808284040540457, "vf_loss": 82.31928253173828, "vf_explained_var": 0.7716462016105652, "kl": 0.001915976870805025, "entropy": 1.1357669830322266, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3852800, "episodes_total": 9632, "training_iteration": 301, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-18-56", "timestamp": 1660256336, "time_this_iter_s": 32.42415189743042, "time_total_s": 14748.470853328705, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14748.470853328705, "timesteps_since_restore": 3852800, "iterations_since_restore": 301, "perf": {"cpu_util_percent": 32.14468085106383, "ram_util_percent": 59.210638297872315}}
+{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 616.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 232.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 308.055}, "custom_metrics": {"sparse_reward_mean": 213.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.51, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.56, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.65, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 17.05, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.71, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.69, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.01, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.41, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.33, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.71, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.69, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.71, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.69, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 621.0, 582.0, 636.0, 630.0, 633.0, 527.0, 627.0, 582.0, 633.0, 603.0, 630.0, 639.0, 624.0, 630.0, 636.0, 639.0, 465.0, 639.0, 636.0, 570.0, 630.0, 567.0, 633.0, 582.0, 630.0, 633.0, 636.0, 563.0, 636.0, 582.0, 636.0, 633.0, 581.0, 582.0, 636.0, 587.0, 636.0, 636.0, 579.0, 636.0, 636.0, 582.0, 633.0, 639.0, 582.0, 587.0, 639.0, 630.0, 639.0, 636.0, 627.0, 600.0, 639.0, 630.0, 630.0, 627.0, 639.0, 582.0, 627.0, 587.0, 636.0, 633.0, 636.0, 636.0, 633.0, 630.0, 579.0, 630.0, 639.0, 630.0, 587.0, 627.0, 639.0, 633.0, 573.0, 627.0, 633.0, 576.0, 639.0, 630.0, 564.0, 633.0, 627.0, 636.0, 636.0, 627.0, 633.0, 579.0, 639.0, 636.0, 639.0, 576.0, 639.0, 636.0, 639.0, 587.0, 581.0, 582.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [306.0, 324.0, 318.0, 303.0, 290.0, 292.0, 314.0, 322.0, 317.0, 313.0, 314.0, 319.0, 259.0, 268.0, 305.0, 322.0, 281.0, 301.0, 314.0, 319.0, 302.0, 301.0, 309.0, 321.0, 319.0, 320.0, 311.0, 313.0, 310.0, 320.0, 304.0, 332.0, 325.0, 314.0, 232.0, 233.0, 314.0, 325.0, 311.0, 325.0, 291.0, 279.0, 318.0, 312.0, 277.0, 290.0, 315.0, 318.0, 298.0, 284.0, 309.0, 321.0, 311.0, 322.0, 319.0, 317.0, 279.0, 284.0, 319.0, 317.0, 291.0, 291.0, 324.0, 312.0, 324.0, 309.0, 291.0, 290.0, 291.0, 291.0, 319.0, 317.0, 296.0, 291.0, 314.0, 322.0, 314.0, 322.0, 278.0, 301.0, 314.0, 322.0, 316.0, 320.0, 291.0, 291.0, 314.0, 319.0, 319.0, 320.0, 286.0, 296.0, 283.0, 304.0, 320.0, 319.0, 324.0, 306.0, 322.0, 317.0, 318.0, 318.0, 308.0, 319.0, 299.0, 301.0, 321.0, 318.0, 308.0, 322.0, 319.0, 311.0, 308.0, 319.0, 320.0, 319.0, 288.0, 294.0, 319.0, 308.0, 289.0, 298.0, 314.0, 322.0, 317.0, 316.0, 316.0, 320.0, 312.0, 324.0, 319.0, 314.0, 311.0, 319.0, 283.0, 296.0, 313.0, 317.0, 322.0, 317.0, 316.0, 314.0, 288.0, 299.0, 305.0, 322.0, 319.0, 320.0, 329.0, 304.0, 278.0, 295.0, 311.0, 316.0, 316.0, 317.0, 290.0, 286.0, 317.0, 322.0, 321.0, 309.0, 280.0, 284.0, 322.0, 311.0, 314.0, 313.0, 326.0, 310.0, 314.0, 322.0, 311.0, 316.0, 316.0, 317.0, 288.0, 291.0, 311.0, 328.0, 317.0, 319.0, 319.0, 320.0, 289.0, 287.0, 317.0, 322.0, 320.0, 316.0, 327.0, 312.0, 277.0, 310.0, 298.0, 283.0, 293.0, 289.0, 317.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9487147885298458, "mean_processing_ms": 0.25802520052917743, "mean_inference_ms": 1.5320224616406188}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7248000, "num_steps_sampled": 3865600, "sample_time_ms": 22988.548, "load_time_ms": 36.983, "grad_time_ms": 10329.984, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00113882205914706, "policy_loss": -0.0069201975129544735, "vf_loss": 86.32308959960938, "vf_explained_var": 0.7628341317176819, "kl": 0.0021745546255260706, "entropy": 1.146581768989563, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3865600, "episodes_total": 9664, "training_iteration": 302, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-19-29", "timestamp": 1660256369, "time_this_iter_s": 33.80998110771179, "time_total_s": 14782.280834436417, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14782.280834436417, "timesteps_since_restore": 3865600, "iterations_since_restore": 302, "perf": {"cpu_util_percent": 31.58125, "ram_util_percent": 58.88958333333333}}
+{"episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 610.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 305.09}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 186.58, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.47, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.38, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 17.0, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.63, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.54, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.57, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.29, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.61, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.46, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.97, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.34, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.28, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.61, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.46, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.61, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.46, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 630.0, 633.0, 576.0, 630.0, 633.0, 621.0, 633.0, 633.0, 582.0, 123.0, 636.0, 633.0, 582.0, 627.0, 639.0, 633.0, 639.0, 587.0, 633.0, 630.0, 633.0, 621.0, 633.0, 630.0, 630.0, 633.0, 376.0, 630.0, 636.0, 582.0, 633.0, 636.0, 633.0, 630.0, 579.0, 630.0, 639.0, 630.0, 587.0, 627.0, 639.0, 633.0, 573.0, 627.0, 633.0, 576.0, 639.0, 630.0, 564.0, 633.0, 627.0, 636.0, 636.0, 627.0, 633.0, 579.0, 639.0, 636.0, 639.0, 576.0, 639.0, 636.0, 639.0, 587.0, 581.0, 582.0, 639.0, 630.0, 621.0, 582.0, 636.0, 630.0, 633.0, 527.0, 627.0, 582.0, 633.0, 603.0, 630.0, 639.0, 624.0, 630.0, 636.0, 639.0, 465.0, 639.0, 636.0, 570.0, 630.0, 567.0, 633.0, 582.0, 630.0, 633.0, 636.0, 563.0, 636.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 325.0, 309.0, 321.0, 315.0, 318.0, 280.0, 296.0, 311.0, 319.0, 321.0, 312.0, 308.0, 313.0, 317.0, 316.0, 316.0, 317.0, 292.0, 290.0, 60.0, 63.0, 314.0, 322.0, 324.0, 309.0, 291.0, 291.0, 316.0, 311.0, 319.0, 320.0, 304.0, 329.0, 319.0, 320.0, 283.0, 304.0, 318.0, 315.0, 321.0, 309.0, 321.0, 312.0, 321.0, 300.0, 314.0, 319.0, 319.0, 311.0, 311.0, 319.0, 316.0, 317.0, 182.0, 194.0, 319.0, 311.0, 324.0, 312.0, 291.0, 291.0, 316.0, 317.0, 312.0, 324.0, 319.0, 314.0, 311.0, 319.0, 283.0, 296.0, 313.0, 317.0, 322.0, 317.0, 316.0, 314.0, 288.0, 299.0, 305.0, 322.0, 319.0, 320.0, 329.0, 304.0, 278.0, 295.0, 311.0, 316.0, 316.0, 317.0, 290.0, 286.0, 317.0, 322.0, 321.0, 309.0, 280.0, 284.0, 322.0, 311.0, 314.0, 313.0, 326.0, 310.0, 314.0, 322.0, 311.0, 316.0, 316.0, 317.0, 288.0, 291.0, 311.0, 328.0, 317.0, 319.0, 319.0, 320.0, 289.0, 287.0, 317.0, 322.0, 320.0, 316.0, 327.0, 312.0, 277.0, 310.0, 298.0, 283.0, 293.0, 289.0, 317.0, 322.0, 306.0, 324.0, 318.0, 303.0, 290.0, 292.0, 314.0, 322.0, 317.0, 313.0, 314.0, 319.0, 259.0, 268.0, 305.0, 322.0, 281.0, 301.0, 314.0, 319.0, 302.0, 301.0, 309.0, 321.0, 319.0, 320.0, 311.0, 313.0, 310.0, 320.0, 304.0, 332.0, 325.0, 314.0, 232.0, 233.0, 314.0, 325.0, 311.0, 325.0, 291.0, 279.0, 318.0, 312.0, 277.0, 290.0, 315.0, 318.0, 298.0, 284.0, 309.0, 321.0, 311.0, 322.0, 319.0, 317.0, 279.0, 284.0, 319.0, 317.0, 291.0, 291.0, 324.0, 312.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9470934663079638, "mean_processing_ms": 0.2577011854235747, "mean_inference_ms": 1.5308098128397853}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7272000, "num_steps_sampled": 3878400, "sample_time_ms": 22956.932, "load_time_ms": 37.245, "grad_time_ms": 10308.821, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009572577546350658, "policy_loss": -0.00890685711055994, "vf_loss": 85.18466186523438, "vf_explained_var": 0.7909882068634033, "kl": 0.00206771120429039, "entropy": 1.137712836265564, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3878400, "episodes_total": 9696, "training_iteration": 303, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-20-03", "timestamp": 1660256403, "time_this_iter_s": 33.27155518531799, "time_total_s": 14815.552389621735, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14815.552389621735, "timesteps_since_restore": 3878400, "iterations_since_restore": 303, "perf": {"cpu_util_percent": 32.59574468085106, "ram_util_percent": 58.840425531914875}}
+{"episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 606.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 303.28}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 185.76, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.34, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.4, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.78, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.45, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.5, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.42, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.89, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.39, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.32, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.29, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.5, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.42, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.5, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.42, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 510.0, 587.0, 564.0, 579.0, 587.0, 582.0, 587.0, 633.0, 630.0, 630.0, 636.0, 630.0, 569.0, 633.0, 639.0, 582.0, 633.0, 633.0, 636.0, 587.0, 633.0, 636.0, 639.0, 633.0, 530.0, 636.0, 627.0, 582.0, 633.0, 636.0, 636.0, 587.0, 581.0, 582.0, 639.0, 630.0, 621.0, 582.0, 636.0, 630.0, 633.0, 527.0, 627.0, 582.0, 633.0, 603.0, 630.0, 639.0, 624.0, 630.0, 636.0, 639.0, 465.0, 639.0, 636.0, 570.0, 630.0, 567.0, 633.0, 582.0, 630.0, 633.0, 636.0, 563.0, 636.0, 582.0, 636.0, 639.0, 630.0, 633.0, 576.0, 630.0, 633.0, 621.0, 633.0, 633.0, 582.0, 123.0, 636.0, 633.0, 582.0, 627.0, 639.0, 633.0, 639.0, 587.0, 633.0, 630.0, 633.0, 621.0, 633.0, 630.0, 630.0, 633.0, 376.0, 630.0, 636.0, 582.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 256.0, 254.0, 286.0, 301.0, 276.0, 288.0, 290.0, 289.0, 289.0, 298.0, 296.0, 286.0, 295.0, 292.0, 313.0, 320.0, 318.0, 312.0, 311.0, 319.0, 314.0, 322.0, 313.0, 317.0, 283.0, 286.0, 319.0, 314.0, 319.0, 320.0, 294.0, 288.0, 324.0, 309.0, 315.0, 318.0, 325.0, 311.0, 294.0, 293.0, 313.0, 320.0, 320.0, 316.0, 316.0, 323.0, 316.0, 317.0, 265.0, 265.0, 317.0, 319.0, 317.0, 310.0, 288.0, 294.0, 316.0, 317.0, 322.0, 314.0, 314.0, 322.0, 277.0, 310.0, 298.0, 283.0, 293.0, 289.0, 317.0, 322.0, 306.0, 324.0, 318.0, 303.0, 290.0, 292.0, 314.0, 322.0, 317.0, 313.0, 314.0, 319.0, 259.0, 268.0, 305.0, 322.0, 281.0, 301.0, 314.0, 319.0, 302.0, 301.0, 309.0, 321.0, 319.0, 320.0, 311.0, 313.0, 310.0, 320.0, 304.0, 332.0, 325.0, 314.0, 232.0, 233.0, 314.0, 325.0, 311.0, 325.0, 291.0, 279.0, 318.0, 312.0, 277.0, 290.0, 315.0, 318.0, 298.0, 284.0, 309.0, 321.0, 311.0, 322.0, 319.0, 317.0, 279.0, 284.0, 319.0, 317.0, 291.0, 291.0, 324.0, 312.0, 314.0, 325.0, 309.0, 321.0, 315.0, 318.0, 280.0, 296.0, 311.0, 319.0, 321.0, 312.0, 308.0, 313.0, 317.0, 316.0, 316.0, 317.0, 292.0, 290.0, 60.0, 63.0, 314.0, 322.0, 324.0, 309.0, 291.0, 291.0, 316.0, 311.0, 319.0, 320.0, 304.0, 329.0, 319.0, 320.0, 283.0, 304.0, 318.0, 315.0, 321.0, 309.0, 321.0, 312.0, 321.0, 300.0, 314.0, 319.0, 319.0, 311.0, 311.0, 319.0, 316.0, 317.0, 182.0, 194.0, 319.0, 311.0, 324.0, 312.0, 291.0, 291.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9454753487433492, "mean_processing_ms": 0.2573778744956284, "mean_inference_ms": 1.529523880785963}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7296000, "num_steps_sampled": 3891200, "sample_time_ms": 23052.875, "load_time_ms": 37.015, "grad_time_ms": 10279.094, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008152422960847616, "policy_loss": -0.008779828436672688, "vf_loss": 85.31393432617188, "vf_explained_var": 0.7709566950798035, "kl": 0.0019413350382819772, "entropy": 1.1336184740066528, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3891200, "episodes_total": 9728, "training_iteration": 304, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-20-35", "timestamp": 1660256435, "time_this_iter_s": 32.335684061050415, "time_total_s": 14847.888073682785, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14847.888073682785, "timesteps_since_restore": 3891200, "iterations_since_restore": 304, "perf": {"cpu_util_percent": 32.43260869565216, "ram_util_percent": 58.8478260869565}}
+{"episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 608.06, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 304.03}, "custom_metrics": {"sparse_reward_mean": 210.6, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 186.86, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.32, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.35, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.81, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.5, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.5, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.79, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.5, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.5, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.5, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.5, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 633.0, 633.0, 639.0, 636.0, 587.0, 587.0, 636.0, 633.0, 636.0, 587.0, 587.0, 630.0, 587.0, 582.0, 630.0, 636.0, 581.0, 630.0, 630.0, 639.0, 590.0, 618.0, 627.0, 633.0, 575.0, 627.0, 630.0, 636.0, 636.0, 582.0, 582.0, 563.0, 636.0, 582.0, 636.0, 639.0, 630.0, 633.0, 576.0, 630.0, 633.0, 621.0, 633.0, 633.0, 582.0, 123.0, 636.0, 633.0, 582.0, 627.0, 639.0, 633.0, 639.0, 587.0, 633.0, 630.0, 633.0, 621.0, 633.0, 630.0, 630.0, 633.0, 376.0, 630.0, 636.0, 582.0, 633.0, 630.0, 510.0, 587.0, 564.0, 579.0, 587.0, 582.0, 587.0, 633.0, 630.0, 630.0, 636.0, 630.0, 569.0, 633.0, 639.0, 582.0, 633.0, 633.0, 636.0, 587.0, 633.0, 636.0, 639.0, 633.0, 530.0, 636.0, 627.0, 582.0, 633.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [299.0, 288.0, 309.0, 324.0, 315.0, 318.0, 312.0, 327.0, 317.0, 319.0, 290.0, 297.0, 298.0, 289.0, 320.0, 316.0, 313.0, 320.0, 321.0, 315.0, 288.0, 299.0, 293.0, 294.0, 318.0, 312.0, 294.0, 293.0, 286.0, 296.0, 318.0, 312.0, 319.0, 317.0, 290.0, 291.0, 320.0, 310.0, 314.0, 316.0, 324.0, 315.0, 295.0, 295.0, 314.0, 304.0, 320.0, 307.0, 319.0, 314.0, 285.0, 290.0, 316.0, 311.0, 313.0, 317.0, 320.0, 316.0, 327.0, 309.0, 288.0, 294.0, 293.0, 289.0, 279.0, 284.0, 319.0, 317.0, 291.0, 291.0, 324.0, 312.0, 314.0, 325.0, 309.0, 321.0, 315.0, 318.0, 280.0, 296.0, 311.0, 319.0, 321.0, 312.0, 308.0, 313.0, 317.0, 316.0, 316.0, 317.0, 292.0, 290.0, 60.0, 63.0, 314.0, 322.0, 324.0, 309.0, 291.0, 291.0, 316.0, 311.0, 319.0, 320.0, 304.0, 329.0, 319.0, 320.0, 283.0, 304.0, 318.0, 315.0, 321.0, 309.0, 321.0, 312.0, 321.0, 300.0, 314.0, 319.0, 319.0, 311.0, 311.0, 319.0, 316.0, 317.0, 182.0, 194.0, 319.0, 311.0, 324.0, 312.0, 291.0, 291.0, 316.0, 317.0, 316.0, 314.0, 256.0, 254.0, 286.0, 301.0, 276.0, 288.0, 290.0, 289.0, 289.0, 298.0, 296.0, 286.0, 295.0, 292.0, 313.0, 320.0, 318.0, 312.0, 311.0, 319.0, 314.0, 322.0, 313.0, 317.0, 283.0, 286.0, 319.0, 314.0, 319.0, 320.0, 294.0, 288.0, 324.0, 309.0, 315.0, 318.0, 325.0, 311.0, 294.0, 293.0, 313.0, 320.0, 320.0, 316.0, 316.0, 323.0, 316.0, 317.0, 265.0, 265.0, 317.0, 319.0, 317.0, 310.0, 288.0, 294.0, 316.0, 317.0, 322.0, 314.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9438635758880244, "mean_processing_ms": 0.25705717388077914, "mean_inference_ms": 1.528223444830069}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7320000, "num_steps_sampled": 3904000, "sample_time_ms": 22489.17, "load_time_ms": 36.377, "grad_time_ms": 10072.238, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005805303808301687, "policy_loss": -0.002536727814003825, "vf_loss": 89.14191436767578, "vf_explained_var": 0.7592394948005676, "kl": 0.0022071560379117727, "entropy": 1.1443239450454712, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3904000, "episodes_total": 9760, "training_iteration": 305, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-21-09", "timestamp": 1660256469, "time_this_iter_s": 33.905731201171875, "time_total_s": 14881.793804883957, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14881.793804883957, "timesteps_since_restore": 3904000, "iterations_since_restore": 305, "perf": {"cpu_util_percent": 30.185416666666665, "ram_util_percent": 58.73750000000001}}
+{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 612.03, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 306.015}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.43, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.14, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.82, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.57, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.05, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.54, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.96, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.36, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.66, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.96, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.96, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 351.0, 633.0, 582.0, 633.0, 636.0, 639.0, 630.0, 639.0, 639.0, 582.0, 579.0, 627.0, 636.0, 639.0, 633.0, 627.0, 630.0, 579.0, 636.0, 587.0, 587.0, 636.0, 579.0, 633.0, 630.0, 627.0, 639.0, 636.0, 636.0, 630.0, 536.0, 630.0, 636.0, 582.0, 633.0, 630.0, 510.0, 587.0, 564.0, 579.0, 587.0, 582.0, 587.0, 633.0, 630.0, 630.0, 636.0, 630.0, 569.0, 633.0, 639.0, 582.0, 633.0, 633.0, 636.0, 587.0, 633.0, 636.0, 639.0, 633.0, 530.0, 636.0, 627.0, 582.0, 633.0, 636.0, 636.0, 587.0, 633.0, 633.0, 639.0, 636.0, 587.0, 587.0, 636.0, 633.0, 636.0, 587.0, 587.0, 630.0, 587.0, 582.0, 630.0, 636.0, 581.0, 630.0, 630.0, 639.0, 590.0, 618.0, 627.0, 633.0, 575.0, 627.0, 630.0, 636.0, 636.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 172.0, 179.0, 313.0, 320.0, 280.0, 302.0, 318.0, 315.0, 322.0, 314.0, 324.0, 315.0, 316.0, 314.0, 322.0, 317.0, 319.0, 320.0, 294.0, 288.0, 290.0, 289.0, 310.0, 317.0, 319.0, 317.0, 314.0, 325.0, 316.0, 317.0, 316.0, 311.0, 315.0, 315.0, 285.0, 294.0, 319.0, 317.0, 283.0, 304.0, 285.0, 302.0, 332.0, 304.0, 284.0, 295.0, 316.0, 317.0, 311.0, 319.0, 323.0, 304.0, 319.0, 320.0, 317.0, 319.0, 320.0, 316.0, 308.0, 322.0, 263.0, 273.0, 319.0, 311.0, 324.0, 312.0, 291.0, 291.0, 316.0, 317.0, 316.0, 314.0, 256.0, 254.0, 286.0, 301.0, 276.0, 288.0, 290.0, 289.0, 289.0, 298.0, 296.0, 286.0, 295.0, 292.0, 313.0, 320.0, 318.0, 312.0, 311.0, 319.0, 314.0, 322.0, 313.0, 317.0, 283.0, 286.0, 319.0, 314.0, 319.0, 320.0, 294.0, 288.0, 324.0, 309.0, 315.0, 318.0, 325.0, 311.0, 294.0, 293.0, 313.0, 320.0, 320.0, 316.0, 316.0, 323.0, 316.0, 317.0, 265.0, 265.0, 317.0, 319.0, 317.0, 310.0, 288.0, 294.0, 316.0, 317.0, 322.0, 314.0, 314.0, 322.0, 299.0, 288.0, 309.0, 324.0, 315.0, 318.0, 312.0, 327.0, 317.0, 319.0, 290.0, 297.0, 298.0, 289.0, 320.0, 316.0, 313.0, 320.0, 321.0, 315.0, 288.0, 299.0, 293.0, 294.0, 318.0, 312.0, 294.0, 293.0, 286.0, 296.0, 318.0, 312.0, 319.0, 317.0, 290.0, 291.0, 320.0, 310.0, 314.0, 316.0, 324.0, 315.0, 295.0, 295.0, 314.0, 304.0, 320.0, 307.0, 319.0, 314.0, 285.0, 290.0, 316.0, 311.0, 313.0, 317.0, 320.0, 316.0, 327.0, 309.0, 288.0, 294.0, 293.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9422548001581954, "mean_processing_ms": 0.2567378512099078, "mean_inference_ms": 1.5267428709470499}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7344000, "num_steps_sampled": 3916800, "sample_time_ms": 22275.217, "load_time_ms": 36.758, "grad_time_ms": 10257.278, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0026395271997898817, "policy_loss": -0.005618779454380274, "vf_loss": 88.24600219726562, "vf_explained_var": 0.7727122902870178, "kl": 0.0020911165047436953, "entropy": 1.1326097249984741, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3916800, "episodes_total": 9792, "training_iteration": 306, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-21-40", "timestamp": 1660256500, "time_this_iter_s": 30.71598792076111, "time_total_s": 14912.509792804718, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14912.509792804718, "timesteps_since_restore": 3916800, "iterations_since_restore": 306, "perf": {"cpu_util_percent": 32.81395348837209, "ram_util_percent": 58.667441860465125}}
+{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 617.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 308.845}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 190.49, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.18, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.9, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.65, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.12, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.51, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.42, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.41, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.16, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.79, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.41, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.16, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.41, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.16, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 639.0, 633.0, 587.0, 627.0, 639.0, 633.0, 639.0, 633.0, 636.0, 639.0, 587.0, 636.0, 596.0, 639.0, 630.0, 596.0, 630.0, 639.0, 624.0, 633.0, 639.0, 636.0, 579.0, 636.0, 627.0, 636.0, 636.0, 639.0, 636.0, 624.0, 639.0, 582.0, 633.0, 636.0, 636.0, 587.0, 633.0, 633.0, 639.0, 636.0, 587.0, 587.0, 636.0, 633.0, 636.0, 587.0, 587.0, 630.0, 587.0, 582.0, 630.0, 636.0, 581.0, 630.0, 630.0, 639.0, 590.0, 618.0, 627.0, 633.0, 575.0, 627.0, 630.0, 636.0, 636.0, 582.0, 582.0, 636.0, 351.0, 633.0, 582.0, 633.0, 636.0, 639.0, 630.0, 639.0, 639.0, 582.0, 579.0, 627.0, 636.0, 639.0, 633.0, 627.0, 630.0, 579.0, 636.0, 587.0, 587.0, 636.0, 579.0, 633.0, 630.0, 627.0, 639.0, 636.0, 636.0, 630.0, 536.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 319.0, 315.0, 324.0, 318.0, 315.0, 292.0, 295.0, 302.0, 325.0, 327.0, 312.0, 311.0, 322.0, 327.0, 312.0, 311.0, 322.0, 314.0, 322.0, 319.0, 320.0, 296.0, 291.0, 315.0, 321.0, 294.0, 302.0, 319.0, 320.0, 310.0, 320.0, 304.0, 292.0, 316.0, 314.0, 317.0, 322.0, 313.0, 311.0, 316.0, 317.0, 314.0, 325.0, 319.0, 317.0, 297.0, 282.0, 319.0, 317.0, 304.0, 323.0, 321.0, 315.0, 314.0, 322.0, 322.0, 317.0, 314.0, 322.0, 308.0, 316.0, 320.0, 319.0, 288.0, 294.0, 316.0, 317.0, 322.0, 314.0, 314.0, 322.0, 299.0, 288.0, 309.0, 324.0, 315.0, 318.0, 312.0, 327.0, 317.0, 319.0, 290.0, 297.0, 298.0, 289.0, 320.0, 316.0, 313.0, 320.0, 321.0, 315.0, 288.0, 299.0, 293.0, 294.0, 318.0, 312.0, 294.0, 293.0, 286.0, 296.0, 318.0, 312.0, 319.0, 317.0, 290.0, 291.0, 320.0, 310.0, 314.0, 316.0, 324.0, 315.0, 295.0, 295.0, 314.0, 304.0, 320.0, 307.0, 319.0, 314.0, 285.0, 290.0, 316.0, 311.0, 313.0, 317.0, 320.0, 316.0, 327.0, 309.0, 288.0, 294.0, 293.0, 289.0, 314.0, 322.0, 172.0, 179.0, 313.0, 320.0, 280.0, 302.0, 318.0, 315.0, 322.0, 314.0, 324.0, 315.0, 316.0, 314.0, 322.0, 317.0, 319.0, 320.0, 294.0, 288.0, 290.0, 289.0, 310.0, 317.0, 319.0, 317.0, 314.0, 325.0, 316.0, 317.0, 316.0, 311.0, 315.0, 315.0, 285.0, 294.0, 319.0, 317.0, 283.0, 304.0, 285.0, 302.0, 332.0, 304.0, 284.0, 295.0, 316.0, 317.0, 311.0, 319.0, 323.0, 304.0, 319.0, 320.0, 317.0, 319.0, 320.0, 316.0, 308.0, 322.0, 263.0, 273.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9406496030307164, "mean_processing_ms": 0.25641787950901196, "mean_inference_ms": 1.5250430030640023}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7368000, "num_steps_sampled": 3929600, "sample_time_ms": 21842.439, "load_time_ms": 36.991, "grad_time_ms": 10104.518, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00218362919986248, "policy_loss": -0.005569128785282373, "vf_loss": 83.15591430664062, "vf_explained_var": 0.7728936076164246, "kl": 0.0017335275188088417, "entropy": 1.1256619691848755, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3929600, "episodes_total": 9824, "training_iteration": 307, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-22-09", "timestamp": 1660256529, "time_this_iter_s": 29.1308012008667, "time_total_s": 14941.640594005585, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14941.640594005585, "timesteps_since_restore": 3929600, "iterations_since_restore": 307, "perf": {"cpu_util_percent": 35.333333333333336, "ram_util_percent": 58.726190476190474}}
+{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 620.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 310.09}, "custom_metrics": {"sparse_reward_mean": 214.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 190.58, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.9, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 19.12, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.45, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.43, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.42, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.78, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.37, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.68, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.43, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.43, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 636.0, 639.0, 630.0, 627.0, 636.0, 633.0, 630.0, 630.0, 630.0, 630.0, 561.0, 627.0, 627.0, 639.0, 633.0, 579.0, 633.0, 633.0, 582.0, 630.0, 636.0, 636.0, 627.0, 639.0, 639.0, 630.0, 633.0, 576.0, 579.0, 636.0, 633.0, 636.0, 636.0, 582.0, 582.0, 636.0, 351.0, 633.0, 582.0, 633.0, 636.0, 639.0, 630.0, 639.0, 639.0, 582.0, 579.0, 627.0, 636.0, 639.0, 633.0, 627.0, 630.0, 579.0, 636.0, 587.0, 587.0, 636.0, 579.0, 633.0, 630.0, 627.0, 639.0, 636.0, 636.0, 630.0, 536.0, 636.0, 639.0, 633.0, 587.0, 627.0, 639.0, 633.0, 639.0, 633.0, 636.0, 639.0, 587.0, 636.0, 596.0, 639.0, 630.0, 596.0, 630.0, 639.0, 624.0, 633.0, 639.0, 636.0, 579.0, 636.0, 627.0, 636.0, 636.0, 639.0, 636.0, 624.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [310.0, 323.0, 316.0, 320.0, 317.0, 322.0, 310.0, 320.0, 311.0, 316.0, 319.0, 317.0, 306.0, 327.0, 321.0, 309.0, 313.0, 317.0, 311.0, 319.0, 308.0, 322.0, 278.0, 283.0, 318.0, 309.0, 315.0, 312.0, 327.0, 312.0, 316.0, 317.0, 282.0, 297.0, 314.0, 319.0, 316.0, 317.0, 290.0, 292.0, 314.0, 316.0, 313.0, 323.0, 316.0, 320.0, 313.0, 314.0, 319.0, 320.0, 323.0, 316.0, 316.0, 314.0, 314.0, 319.0, 293.0, 283.0, 296.0, 283.0, 316.0, 320.0, 313.0, 320.0, 320.0, 316.0, 327.0, 309.0, 288.0, 294.0, 293.0, 289.0, 314.0, 322.0, 172.0, 179.0, 313.0, 320.0, 280.0, 302.0, 318.0, 315.0, 322.0, 314.0, 324.0, 315.0, 316.0, 314.0, 322.0, 317.0, 319.0, 320.0, 294.0, 288.0, 290.0, 289.0, 310.0, 317.0, 319.0, 317.0, 314.0, 325.0, 316.0, 317.0, 316.0, 311.0, 315.0, 315.0, 285.0, 294.0, 319.0, 317.0, 283.0, 304.0, 285.0, 302.0, 332.0, 304.0, 284.0, 295.0, 316.0, 317.0, 311.0, 319.0, 323.0, 304.0, 319.0, 320.0, 317.0, 319.0, 320.0, 316.0, 308.0, 322.0, 263.0, 273.0, 317.0, 319.0, 315.0, 324.0, 318.0, 315.0, 292.0, 295.0, 302.0, 325.0, 327.0, 312.0, 311.0, 322.0, 327.0, 312.0, 311.0, 322.0, 314.0, 322.0, 319.0, 320.0, 296.0, 291.0, 315.0, 321.0, 294.0, 302.0, 319.0, 320.0, 310.0, 320.0, 304.0, 292.0, 316.0, 314.0, 317.0, 322.0, 313.0, 311.0, 316.0, 317.0, 314.0, 325.0, 319.0, 317.0, 297.0, 282.0, 319.0, 317.0, 304.0, 323.0, 321.0, 315.0, 314.0, 322.0, 322.0, 317.0, 314.0, 322.0, 308.0, 316.0, 320.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9390469484848887, "mean_processing_ms": 0.2560959206110981, "mean_inference_ms": 1.5230623769962466}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7392000, "num_steps_sampled": 3942400, "sample_time_ms": 21606.28, "load_time_ms": 36.86, "grad_time_ms": 10018.576, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012893896782770753, "policy_loss": -0.006317433435469866, "vf_loss": 81.66983795166016, "vf_explained_var": 0.7689216732978821, "kl": 0.0018363837152719498, "entropy": 1.1203217506408691, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3942400, "episodes_total": 9856, "training_iteration": 308, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-22-36", "timestamp": 1660256556, "time_this_iter_s": 27.574139833450317, "time_total_s": 14969.214733839035, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14969.214733839035, "timesteps_since_restore": 3942400, "iterations_since_restore": 308, "perf": {"cpu_util_percent": 34.44871794871795, "ram_util_percent": 58.748717948717946}}
+{"episode_reward_max": 639.0, "episode_reward_min": 533.0, "episode_reward_mean": 625.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 312.605}, "custom_metrics": {"sparse_reward_mean": 216.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 191.61, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.26, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.85, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.83, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.25, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.64, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.2, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.96, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.23, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.31, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.28, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.64, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.2, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.64, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.2, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 639.0, 630.0, 587.0, 639.0, 624.0, 636.0, 639.0, 639.0, 630.0, 630.0, 633.0, 606.0, 627.0, 627.0, 636.0, 630.0, 633.0, 627.0, 587.0, 639.0, 627.0, 639.0, 636.0, 639.0, 636.0, 636.0, 627.0, 636.0, 627.0, 533.0, 636.0, 636.0, 630.0, 536.0, 636.0, 639.0, 633.0, 587.0, 627.0, 639.0, 633.0, 639.0, 633.0, 636.0, 639.0, 587.0, 636.0, 596.0, 639.0, 630.0, 596.0, 630.0, 639.0, 624.0, 633.0, 639.0, 636.0, 579.0, 636.0, 627.0, 636.0, 636.0, 639.0, 636.0, 624.0, 639.0, 633.0, 636.0, 639.0, 630.0, 627.0, 636.0, 633.0, 630.0, 630.0, 630.0, 630.0, 561.0, 627.0, 627.0, 639.0, 633.0, 579.0, 633.0, 633.0, 582.0, 630.0, 636.0, 636.0, 627.0, 639.0, 639.0, 630.0, 633.0, 576.0, 579.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 322.0, 311.0, 322.0, 317.0, 311.0, 319.0, 288.0, 299.0, 319.0, 320.0, 311.0, 313.0, 319.0, 317.0, 330.0, 309.0, 317.0, 322.0, 314.0, 316.0, 311.0, 319.0, 319.0, 314.0, 294.0, 312.0, 310.0, 317.0, 309.0, 318.0, 320.0, 316.0, 314.0, 316.0, 309.0, 324.0, 313.0, 314.0, 291.0, 296.0, 329.0, 310.0, 321.0, 306.0, 323.0, 316.0, 319.0, 317.0, 319.0, 320.0, 319.0, 317.0, 319.0, 317.0, 314.0, 313.0, 312.0, 324.0, 313.0, 314.0, 260.0, 273.0, 317.0, 319.0, 320.0, 316.0, 308.0, 322.0, 263.0, 273.0, 317.0, 319.0, 315.0, 324.0, 318.0, 315.0, 292.0, 295.0, 302.0, 325.0, 327.0, 312.0, 311.0, 322.0, 327.0, 312.0, 311.0, 322.0, 314.0, 322.0, 319.0, 320.0, 296.0, 291.0, 315.0, 321.0, 294.0, 302.0, 319.0, 320.0, 310.0, 320.0, 304.0, 292.0, 316.0, 314.0, 317.0, 322.0, 313.0, 311.0, 316.0, 317.0, 314.0, 325.0, 319.0, 317.0, 297.0, 282.0, 319.0, 317.0, 304.0, 323.0, 321.0, 315.0, 314.0, 322.0, 322.0, 317.0, 314.0, 322.0, 308.0, 316.0, 320.0, 319.0, 310.0, 323.0, 316.0, 320.0, 317.0, 322.0, 310.0, 320.0, 311.0, 316.0, 319.0, 317.0, 306.0, 327.0, 321.0, 309.0, 313.0, 317.0, 311.0, 319.0, 308.0, 322.0, 278.0, 283.0, 318.0, 309.0, 315.0, 312.0, 327.0, 312.0, 316.0, 317.0, 282.0, 297.0, 314.0, 319.0, 316.0, 317.0, 290.0, 292.0, 314.0, 316.0, 313.0, 323.0, 316.0, 320.0, 313.0, 314.0, 319.0, 320.0, 323.0, 316.0, 316.0, 314.0, 314.0, 319.0, 293.0, 283.0, 296.0, 283.0, 316.0, 320.0, 313.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9374628711409474, "mean_processing_ms": 0.2557781242683051, "mean_inference_ms": 1.5211277740560474}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7416000, "num_steps_sampled": 3955200, "sample_time_ms": 21548.086, "load_time_ms": 36.789, "grad_time_ms": 9960.253, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0017820480279624462, "policy_loss": -0.005541125778108835, "vf_loss": 78.87618255615234, "vf_explained_var": 0.777707040309906, "kl": 0.0019577995408326387, "entropy": 1.1288975477218628, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3955200, "episodes_total": 9888, "training_iteration": 309, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-23-06", "timestamp": 1660256586, "time_this_iter_s": 30.049942016601562, "time_total_s": 14999.264675855637, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14999.264675855637, "timesteps_since_restore": 3955200, "iterations_since_restore": 309, "perf": {"cpu_util_percent": 34.733333333333334, "ram_util_percent": 58.76428571428571}}
+{"episode_reward_max": 639.0, "episode_reward_min": 533.0, "episode_reward_mean": 625.2, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 312.6}, "custom_metrics": {"sparse_reward_mean": 217.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 191.2, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.32, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.98, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.83, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.36, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.65, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.07, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.15, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.24, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.65, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.07, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.65, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.07, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 639.0, 587.0, 636.0, 593.0, 633.0, 627.0, 639.0, 627.0, 639.0, 633.0, 636.0, 630.0, 630.0, 630.0, 636.0, 633.0, 636.0, 582.0, 627.0, 639.0, 639.0, 582.0, 636.0, 627.0, 587.0, 636.0, 633.0, 633.0, 624.0, 636.0, 639.0, 636.0, 624.0, 639.0, 633.0, 636.0, 639.0, 630.0, 627.0, 636.0, 633.0, 630.0, 630.0, 630.0, 630.0, 561.0, 627.0, 627.0, 639.0, 633.0, 579.0, 633.0, 633.0, 582.0, 630.0, 636.0, 636.0, 627.0, 639.0, 639.0, 630.0, 633.0, 576.0, 579.0, 636.0, 633.0, 636.0, 633.0, 639.0, 630.0, 587.0, 639.0, 624.0, 636.0, 639.0, 639.0, 630.0, 630.0, 633.0, 606.0, 627.0, 627.0, 636.0, 630.0, 633.0, 627.0, 587.0, 639.0, 627.0, 639.0, 636.0, 639.0, 636.0, 636.0, 627.0, 636.0, 627.0, 533.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 296.0, 286.0, 317.0, 322.0, 296.0, 291.0, 321.0, 315.0, 302.0, 291.0, 308.0, 325.0, 318.0, 309.0, 322.0, 317.0, 315.0, 312.0, 322.0, 317.0, 309.0, 324.0, 321.0, 315.0, 322.0, 308.0, 313.0, 317.0, 317.0, 313.0, 322.0, 314.0, 320.0, 313.0, 319.0, 317.0, 293.0, 289.0, 316.0, 311.0, 322.0, 317.0, 317.0, 322.0, 291.0, 291.0, 318.0, 318.0, 305.0, 322.0, 296.0, 291.0, 311.0, 325.0, 309.0, 324.0, 319.0, 314.0, 326.0, 298.0, 317.0, 319.0, 322.0, 317.0, 314.0, 322.0, 308.0, 316.0, 320.0, 319.0, 310.0, 323.0, 316.0, 320.0, 317.0, 322.0, 310.0, 320.0, 311.0, 316.0, 319.0, 317.0, 306.0, 327.0, 321.0, 309.0, 313.0, 317.0, 311.0, 319.0, 308.0, 322.0, 278.0, 283.0, 318.0, 309.0, 315.0, 312.0, 327.0, 312.0, 316.0, 317.0, 282.0, 297.0, 314.0, 319.0, 316.0, 317.0, 290.0, 292.0, 314.0, 316.0, 313.0, 323.0, 316.0, 320.0, 313.0, 314.0, 319.0, 320.0, 323.0, 316.0, 316.0, 314.0, 314.0, 319.0, 293.0, 283.0, 296.0, 283.0, 316.0, 320.0, 313.0, 320.0, 314.0, 322.0, 322.0, 311.0, 322.0, 317.0, 311.0, 319.0, 288.0, 299.0, 319.0, 320.0, 311.0, 313.0, 319.0, 317.0, 330.0, 309.0, 317.0, 322.0, 314.0, 316.0, 311.0, 319.0, 319.0, 314.0, 294.0, 312.0, 310.0, 317.0, 309.0, 318.0, 320.0, 316.0, 314.0, 316.0, 309.0, 324.0, 313.0, 314.0, 291.0, 296.0, 329.0, 310.0, 321.0, 306.0, 323.0, 316.0, 319.0, 317.0, 319.0, 320.0, 319.0, 317.0, 319.0, 317.0, 314.0, 313.0, 312.0, 324.0, 313.0, 314.0, 260.0, 273.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9359034441976002, "mean_processing_ms": 0.2554667399859708, "mean_inference_ms": 1.5193431419939385}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7440000, "num_steps_sampled": 3968000, "sample_time_ms": 21381.604, "load_time_ms": 36.734, "grad_time_ms": 9849.343, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0021060302387923002, "policy_loss": -0.005640763323754072, "vf_loss": 83.09170532226562, "vf_explained_var": 0.7722363471984863, "kl": 0.0021093024406582117, "entropy": 1.12474524974823, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3968000, "episodes_total": 9920, "training_iteration": 310, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-23-36", "timestamp": 1660256616, "time_this_iter_s": 29.72802186012268, "time_total_s": 15028.99269771576, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15028.99269771576, "timesteps_since_restore": 3968000, "iterations_since_restore": 310, "perf": {"cpu_util_percent": 37.352380952380955, "ram_util_percent": 59.30714285714285}}
+{"episode_reward_max": 639.0, "episode_reward_min": 530.0, "episode_reward_mean": 623.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 311.605}, "custom_metrics": {"sparse_reward_mean": 216.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 191.21, "shaped_reward_min": 166, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.42, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.99, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.88, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.26, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.78, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 16.78, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.25, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.26, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.29, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.78, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.78, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 582.0, 630.0, 573.0, 636.0, 630.0, 639.0, 639.0, 636.0, 639.0, 630.0, 587.0, 630.0, 627.0, 633.0, 630.0, 636.0, 636.0, 587.0, 639.0, 633.0, 639.0, 587.0, 630.0, 633.0, 530.0, 630.0, 639.0, 633.0, 582.0, 633.0, 576.0, 579.0, 636.0, 633.0, 636.0, 633.0, 639.0, 630.0, 587.0, 639.0, 624.0, 636.0, 639.0, 639.0, 630.0, 630.0, 633.0, 606.0, 627.0, 627.0, 636.0, 630.0, 633.0, 627.0, 587.0, 639.0, 627.0, 639.0, 636.0, 639.0, 636.0, 636.0, 627.0, 636.0, 627.0, 533.0, 630.0, 582.0, 639.0, 587.0, 636.0, 593.0, 633.0, 627.0, 639.0, 627.0, 639.0, 633.0, 636.0, 630.0, 630.0, 630.0, 636.0, 633.0, 636.0, 582.0, 627.0, 639.0, 639.0, 582.0, 636.0, 627.0, 587.0, 636.0, 633.0, 633.0, 624.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 325.0, 316.0, 317.0, 288.0, 294.0, 319.0, 311.0, 285.0, 288.0, 322.0, 314.0, 311.0, 319.0, 322.0, 317.0, 320.0, 319.0, 309.0, 327.0, 319.0, 320.0, 313.0, 317.0, 296.0, 291.0, 311.0, 319.0, 308.0, 319.0, 314.0, 319.0, 319.0, 311.0, 315.0, 321.0, 312.0, 324.0, 296.0, 291.0, 322.0, 317.0, 319.0, 314.0, 319.0, 320.0, 296.0, 291.0, 303.0, 327.0, 309.0, 324.0, 258.0, 272.0, 321.0, 309.0, 319.0, 320.0, 324.0, 309.0, 287.0, 295.0, 319.0, 314.0, 293.0, 283.0, 296.0, 283.0, 316.0, 320.0, 313.0, 320.0, 314.0, 322.0, 322.0, 311.0, 322.0, 317.0, 311.0, 319.0, 288.0, 299.0, 319.0, 320.0, 311.0, 313.0, 319.0, 317.0, 330.0, 309.0, 317.0, 322.0, 314.0, 316.0, 311.0, 319.0, 319.0, 314.0, 294.0, 312.0, 310.0, 317.0, 309.0, 318.0, 320.0, 316.0, 314.0, 316.0, 309.0, 324.0, 313.0, 314.0, 291.0, 296.0, 329.0, 310.0, 321.0, 306.0, 323.0, 316.0, 319.0, 317.0, 319.0, 320.0, 319.0, 317.0, 319.0, 317.0, 314.0, 313.0, 312.0, 324.0, 313.0, 314.0, 260.0, 273.0, 316.0, 314.0, 296.0, 286.0, 317.0, 322.0, 296.0, 291.0, 321.0, 315.0, 302.0, 291.0, 308.0, 325.0, 318.0, 309.0, 322.0, 317.0, 315.0, 312.0, 322.0, 317.0, 309.0, 324.0, 321.0, 315.0, 322.0, 308.0, 313.0, 317.0, 317.0, 313.0, 322.0, 314.0, 320.0, 313.0, 319.0, 317.0, 293.0, 289.0, 316.0, 311.0, 322.0, 317.0, 317.0, 322.0, 291.0, 291.0, 318.0, 318.0, 305.0, 322.0, 296.0, 291.0, 311.0, 325.0, 309.0, 324.0, 319.0, 314.0, 326.0, 298.0, 317.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9343560450317893, "mean_processing_ms": 0.25515871155662695, "mean_inference_ms": 1.5176487497740194}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7464000, "num_steps_sampled": 3980800, "sample_time_ms": 21156.178, "load_time_ms": 36.97, "grad_time_ms": 9869.74, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005363213713280857, "policy_loss": -0.0070701222866773605, "vf_loss": 81.7235336303711, "vf_explained_var": 0.7686123847961426, "kl": 0.0019356707343831658, "entropy": 1.131825566291809, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3980800, "episodes_total": 9952, "training_iteration": 311, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-24-07", "timestamp": 1660256647, "time_this_iter_s": 30.375731229782104, "time_total_s": 15059.368428945541, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15059.368428945541, "timesteps_since_restore": 3980800, "iterations_since_restore": 311, "perf": {"cpu_util_percent": 34.15581395348838, "ram_util_percent": 58.86046511627907}}
+{"episode_reward_max": 639.0, "episode_reward_min": 530.0, "episode_reward_mean": 621.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 310.815}, "custom_metrics": {"sparse_reward_mean": 215.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 190.43, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.35, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.84, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.8, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.14, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 16.65, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.82, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.15, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.3, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.34, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.31, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.65, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.82, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.65, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.82, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 624.0, 630.0, 633.0, 636.0, 636.0, 618.0, 636.0, 627.0, 567.0, 636.0, 636.0, 582.0, 633.0, 561.0, 582.0, 582.0, 630.0, 627.0, 587.0, 630.0, 639.0, 630.0, 633.0, 633.0, 633.0, 633.0, 639.0, 630.0, 630.0, 630.0, 627.0, 627.0, 636.0, 627.0, 533.0, 630.0, 582.0, 639.0, 587.0, 636.0, 593.0, 633.0, 627.0, 639.0, 627.0, 639.0, 633.0, 636.0, 630.0, 630.0, 630.0, 636.0, 633.0, 636.0, 582.0, 627.0, 639.0, 639.0, 582.0, 636.0, 627.0, 587.0, 636.0, 633.0, 633.0, 624.0, 636.0, 636.0, 633.0, 582.0, 630.0, 573.0, 636.0, 630.0, 639.0, 639.0, 636.0, 639.0, 630.0, 587.0, 630.0, 627.0, 633.0, 630.0, 636.0, 636.0, 587.0, 639.0, 633.0, 639.0, 587.0, 630.0, 633.0, 530.0, 630.0, 639.0, 633.0, 582.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 313.0, 311.0, 316.0, 314.0, 319.0, 314.0, 312.0, 324.0, 316.0, 320.0, 305.0, 313.0, 309.0, 327.0, 309.0, 318.0, 273.0, 294.0, 319.0, 317.0, 317.0, 319.0, 294.0, 288.0, 311.0, 322.0, 280.0, 281.0, 290.0, 292.0, 291.0, 291.0, 311.0, 319.0, 313.0, 314.0, 296.0, 291.0, 308.0, 322.0, 322.0, 317.0, 320.0, 310.0, 318.0, 315.0, 319.0, 314.0, 317.0, 316.0, 316.0, 317.0, 317.0, 322.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 310.0, 317.0, 314.0, 313.0, 312.0, 324.0, 313.0, 314.0, 260.0, 273.0, 316.0, 314.0, 296.0, 286.0, 317.0, 322.0, 296.0, 291.0, 321.0, 315.0, 302.0, 291.0, 308.0, 325.0, 318.0, 309.0, 322.0, 317.0, 315.0, 312.0, 322.0, 317.0, 309.0, 324.0, 321.0, 315.0, 322.0, 308.0, 313.0, 317.0, 317.0, 313.0, 322.0, 314.0, 320.0, 313.0, 319.0, 317.0, 293.0, 289.0, 316.0, 311.0, 322.0, 317.0, 317.0, 322.0, 291.0, 291.0, 318.0, 318.0, 305.0, 322.0, 296.0, 291.0, 311.0, 325.0, 309.0, 324.0, 319.0, 314.0, 326.0, 298.0, 317.0, 319.0, 311.0, 325.0, 316.0, 317.0, 288.0, 294.0, 319.0, 311.0, 285.0, 288.0, 322.0, 314.0, 311.0, 319.0, 322.0, 317.0, 320.0, 319.0, 309.0, 327.0, 319.0, 320.0, 313.0, 317.0, 296.0, 291.0, 311.0, 319.0, 308.0, 319.0, 314.0, 319.0, 319.0, 311.0, 315.0, 321.0, 312.0, 324.0, 296.0, 291.0, 322.0, 317.0, 319.0, 314.0, 319.0, 320.0, 296.0, 291.0, 303.0, 327.0, 309.0, 324.0, 258.0, 272.0, 321.0, 309.0, 319.0, 320.0, 324.0, 309.0, 287.0, 295.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9328126111792946, "mean_processing_ms": 0.25484999330507013, "mean_inference_ms": 1.5158702163945572}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7488000, "num_steps_sampled": 3993600, "sample_time_ms": 20762.299, "load_time_ms": 37.78, "grad_time_ms": 9801.464, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0010485065868124366, "policy_loss": -0.00627841567620635, "vf_loss": 78.91202545166016, "vf_explained_var": 0.7650337219238281, "kl": 0.0021341259125620127, "entropy": 1.1285619735717773, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3993600, "episodes_total": 9984, "training_iteration": 312, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-24-36", "timestamp": 1660256676, "time_this_iter_s": 29.196868896484375, "time_total_s": 15088.565297842026, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15088.565297842026, "timesteps_since_restore": 3993600, "iterations_since_restore": 312, "perf": {"cpu_util_percent": 34.93170731707317, "ram_util_percent": 58.778048780487794}}
+{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 620.09, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 310.045}, "custom_metrics": {"sparse_reward_mean": 215.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.69, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.28, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.75, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.86, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.98, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 16.63, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.77, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.27, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.36, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.34, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.63, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.77, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.63, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.77, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 639.0, 630.0, 570.0, 633.0, 639.0, 636.0, 582.0, 576.0, 630.0, 639.0, 636.0, 639.0, 636.0, 630.0, 590.0, 639.0, 582.0, 621.0, 573.0, 627.0, 516.0, 639.0, 572.0, 639.0, 579.0, 636.0, 630.0, 639.0, 627.0, 630.0, 633.0, 633.0, 624.0, 636.0, 636.0, 633.0, 582.0, 630.0, 573.0, 636.0, 630.0, 639.0, 639.0, 636.0, 639.0, 630.0, 587.0, 630.0, 627.0, 633.0, 630.0, 636.0, 636.0, 587.0, 639.0, 633.0, 639.0, 587.0, 630.0, 633.0, 530.0, 630.0, 639.0, 633.0, 582.0, 633.0, 636.0, 624.0, 630.0, 633.0, 636.0, 636.0, 618.0, 636.0, 627.0, 567.0, 636.0, 636.0, 582.0, 633.0, 561.0, 582.0, 582.0, 630.0, 627.0, 587.0, 630.0, 639.0, 630.0, 633.0, 633.0, 633.0, 633.0, 639.0, 630.0, 630.0, 630.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 319.0, 317.0, 320.0, 319.0, 323.0, 307.0, 282.0, 288.0, 319.0, 314.0, 317.0, 322.0, 319.0, 317.0, 294.0, 288.0, 298.0, 278.0, 310.0, 320.0, 324.0, 315.0, 317.0, 319.0, 322.0, 317.0, 316.0, 320.0, 313.0, 317.0, 291.0, 299.0, 317.0, 322.0, 271.0, 311.0, 316.0, 305.0, 278.0, 295.0, 315.0, 312.0, 249.0, 267.0, 317.0, 322.0, 277.0, 295.0, 319.0, 320.0, 297.0, 282.0, 317.0, 319.0, 313.0, 317.0, 316.0, 323.0, 316.0, 311.0, 321.0, 309.0, 309.0, 324.0, 319.0, 314.0, 326.0, 298.0, 317.0, 319.0, 311.0, 325.0, 316.0, 317.0, 288.0, 294.0, 319.0, 311.0, 285.0, 288.0, 322.0, 314.0, 311.0, 319.0, 322.0, 317.0, 320.0, 319.0, 309.0, 327.0, 319.0, 320.0, 313.0, 317.0, 296.0, 291.0, 311.0, 319.0, 308.0, 319.0, 314.0, 319.0, 319.0, 311.0, 315.0, 321.0, 312.0, 324.0, 296.0, 291.0, 322.0, 317.0, 319.0, 314.0, 319.0, 320.0, 296.0, 291.0, 303.0, 327.0, 309.0, 324.0, 258.0, 272.0, 321.0, 309.0, 319.0, 320.0, 324.0, 309.0, 287.0, 295.0, 319.0, 314.0, 319.0, 317.0, 313.0, 311.0, 316.0, 314.0, 319.0, 314.0, 312.0, 324.0, 316.0, 320.0, 305.0, 313.0, 309.0, 327.0, 309.0, 318.0, 273.0, 294.0, 319.0, 317.0, 317.0, 319.0, 294.0, 288.0, 311.0, 322.0, 280.0, 281.0, 290.0, 292.0, 291.0, 291.0, 311.0, 319.0, 313.0, 314.0, 296.0, 291.0, 308.0, 322.0, 322.0, 317.0, 320.0, 310.0, 318.0, 315.0, 319.0, 314.0, 317.0, 316.0, 316.0, 317.0, 317.0, 322.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 310.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9312771580791189, "mean_processing_ms": 0.25454145444446286, "mean_inference_ms": 1.514166938443501}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7512000, "num_steps_sampled": 4006400, "sample_time_ms": 20653.913, "load_time_ms": 37.661, "grad_time_ms": 9796.127, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016649666940793395, "policy_loss": -0.005544379819184542, "vf_loss": 77.78628540039062, "vf_explained_var": 0.7735397815704346, "kl": 0.0018068948993459344, "entropy": 1.1385550498962402, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4006400, "episodes_total": 10016, "training_iteration": 313, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-25-08", "timestamp": 1660256708, "time_this_iter_s": 32.13484477996826, "time_total_s": 15120.700142621994, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15120.700142621994, "timesteps_since_restore": 4006400, "iterations_since_restore": 313, "perf": {"cpu_util_percent": 34.30434782608695, "ram_util_percent": 58.8478260869565}}
+{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 617.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.91}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.42, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.24, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.59, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.8, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.92, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.44, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.53, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.22, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.28, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.53, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.53, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 587.0, 639.0, 587.0, 630.0, 582.0, 636.0, 636.0, 627.0, 639.0, 639.0, 584.0, 587.0, 639.0, 582.0, 630.0, 579.0, 633.0, 636.0, 633.0, 636.0, 636.0, 582.0, 582.0, 633.0, 587.0, 630.0, 633.0, 630.0, 630.0, 593.0, 582.0, 639.0, 633.0, 582.0, 633.0, 636.0, 624.0, 630.0, 633.0, 636.0, 636.0, 618.0, 636.0, 627.0, 567.0, 636.0, 636.0, 582.0, 633.0, 561.0, 582.0, 582.0, 630.0, 627.0, 587.0, 630.0, 639.0, 630.0, 633.0, 633.0, 633.0, 633.0, 639.0, 630.0, 630.0, 630.0, 627.0, 630.0, 636.0, 639.0, 630.0, 570.0, 633.0, 639.0, 636.0, 582.0, 576.0, 630.0, 639.0, 636.0, 639.0, 636.0, 630.0, 590.0, 639.0, 582.0, 621.0, 573.0, 627.0, 516.0, 639.0, 572.0, 639.0, 579.0, 636.0, 630.0, 639.0, 627.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 292.0, 295.0, 314.0, 325.0, 301.0, 286.0, 311.0, 319.0, 295.0, 287.0, 319.0, 317.0, 317.0, 319.0, 305.0, 322.0, 315.0, 324.0, 317.0, 322.0, 298.0, 286.0, 298.0, 289.0, 319.0, 320.0, 286.0, 296.0, 309.0, 321.0, 296.0, 283.0, 318.0, 315.0, 314.0, 322.0, 314.0, 319.0, 316.0, 320.0, 314.0, 322.0, 291.0, 291.0, 285.0, 297.0, 319.0, 314.0, 298.0, 289.0, 319.0, 311.0, 316.0, 317.0, 323.0, 307.0, 315.0, 315.0, 297.0, 296.0, 291.0, 291.0, 319.0, 320.0, 324.0, 309.0, 287.0, 295.0, 319.0, 314.0, 319.0, 317.0, 313.0, 311.0, 316.0, 314.0, 319.0, 314.0, 312.0, 324.0, 316.0, 320.0, 305.0, 313.0, 309.0, 327.0, 309.0, 318.0, 273.0, 294.0, 319.0, 317.0, 317.0, 319.0, 294.0, 288.0, 311.0, 322.0, 280.0, 281.0, 290.0, 292.0, 291.0, 291.0, 311.0, 319.0, 313.0, 314.0, 296.0, 291.0, 308.0, 322.0, 322.0, 317.0, 320.0, 310.0, 318.0, 315.0, 319.0, 314.0, 317.0, 316.0, 316.0, 317.0, 317.0, 322.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 310.0, 317.0, 316.0, 314.0, 319.0, 317.0, 320.0, 319.0, 323.0, 307.0, 282.0, 288.0, 319.0, 314.0, 317.0, 322.0, 319.0, 317.0, 294.0, 288.0, 298.0, 278.0, 310.0, 320.0, 324.0, 315.0, 317.0, 319.0, 322.0, 317.0, 316.0, 320.0, 313.0, 317.0, 291.0, 299.0, 317.0, 322.0, 271.0, 311.0, 316.0, 305.0, 278.0, 295.0, 315.0, 312.0, 249.0, 267.0, 317.0, 322.0, 277.0, 295.0, 319.0, 320.0, 297.0, 282.0, 317.0, 319.0, 313.0, 317.0, 316.0, 323.0, 316.0, 311.0, 321.0, 309.0]}, "sampler_perf": {"mean_env_wait_ms": 0.929753206610253, "mean_processing_ms": 0.25423624145174695, "mean_inference_ms": 1.5125268663026497}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7536000, "num_steps_sampled": 4019200, "sample_time_ms": 20614.803, "load_time_ms": 37.576, "grad_time_ms": 9935.143, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005392418708652258, "policy_loss": -0.002403073711320758, "vf_loss": 83.61144256591797, "vf_explained_var": 0.7692582011222839, "kl": 0.0021780512761324644, "entropy": 1.131287932395935, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4019200, "episodes_total": 10048, "training_iteration": 314, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-25-41", "timestamp": 1660256741, "time_this_iter_s": 33.338226318359375, "time_total_s": 15154.038368940353, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15154.038368940353, "timesteps_since_restore": 4019200, "iterations_since_restore": 314, "perf": {"cpu_util_percent": 34.01914893617022, "ram_util_percent": 58.87021276595746}}
+{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 614.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 307.41}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.82, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.1, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.53, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.65, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.87, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.42, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.44, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.46, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.7, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.03, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.76, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.24, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.24, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.46, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.7, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.46, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.7, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 590.0, 633.0, 633.0, 630.0, 630.0, 627.0, 582.0, 573.0, 630.0, 630.0, 582.0, 636.0, 639.0, 630.0, 587.0, 639.0, 630.0, 627.0, 630.0, 630.0, 639.0, 459.0, 582.0, 627.0, 582.0, 627.0, 636.0, 627.0, 633.0, 530.0, 639.0, 630.0, 630.0, 630.0, 627.0, 630.0, 636.0, 639.0, 630.0, 570.0, 633.0, 639.0, 636.0, 582.0, 576.0, 630.0, 639.0, 636.0, 639.0, 636.0, 630.0, 590.0, 639.0, 582.0, 621.0, 573.0, 627.0, 516.0, 639.0, 572.0, 639.0, 579.0, 636.0, 630.0, 639.0, 627.0, 630.0, 630.0, 587.0, 639.0, 587.0, 630.0, 582.0, 636.0, 636.0, 627.0, 639.0, 639.0, 584.0, 587.0, 639.0, 582.0, 630.0, 579.0, 633.0, 636.0, 633.0, 636.0, 636.0, 582.0, 582.0, 633.0, 587.0, 630.0, 633.0, 630.0, 630.0, 593.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 297.0, 294.0, 296.0, 327.0, 306.0, 314.0, 319.0, 308.0, 322.0, 319.0, 311.0, 311.0, 316.0, 294.0, 288.0, 282.0, 291.0, 318.0, 312.0, 309.0, 321.0, 291.0, 291.0, 318.0, 318.0, 317.0, 322.0, 316.0, 314.0, 296.0, 291.0, 318.0, 321.0, 313.0, 317.0, 318.0, 309.0, 311.0, 319.0, 306.0, 324.0, 317.0, 322.0, 231.0, 228.0, 288.0, 294.0, 315.0, 312.0, 294.0, 288.0, 308.0, 319.0, 316.0, 320.0, 324.0, 303.0, 311.0, 322.0, 265.0, 265.0, 314.0, 325.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 310.0, 317.0, 316.0, 314.0, 319.0, 317.0, 320.0, 319.0, 323.0, 307.0, 282.0, 288.0, 319.0, 314.0, 317.0, 322.0, 319.0, 317.0, 294.0, 288.0, 298.0, 278.0, 310.0, 320.0, 324.0, 315.0, 317.0, 319.0, 322.0, 317.0, 316.0, 320.0, 313.0, 317.0, 291.0, 299.0, 317.0, 322.0, 271.0, 311.0, 316.0, 305.0, 278.0, 295.0, 315.0, 312.0, 249.0, 267.0, 317.0, 322.0, 277.0, 295.0, 319.0, 320.0, 297.0, 282.0, 317.0, 319.0, 313.0, 317.0, 316.0, 323.0, 316.0, 311.0, 321.0, 309.0, 316.0, 314.0, 292.0, 295.0, 314.0, 325.0, 301.0, 286.0, 311.0, 319.0, 295.0, 287.0, 319.0, 317.0, 317.0, 319.0, 305.0, 322.0, 315.0, 324.0, 317.0, 322.0, 298.0, 286.0, 298.0, 289.0, 319.0, 320.0, 286.0, 296.0, 309.0, 321.0, 296.0, 283.0, 318.0, 315.0, 314.0, 322.0, 314.0, 319.0, 316.0, 320.0, 314.0, 322.0, 291.0, 291.0, 285.0, 297.0, 319.0, 314.0, 298.0, 289.0, 319.0, 311.0, 316.0, 317.0, 323.0, 307.0, 315.0, 315.0, 297.0, 296.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9282592182519096, "mean_processing_ms": 0.253938832905362, "mean_inference_ms": 1.511735993488809}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7560000, "num_steps_sampled": 4032000, "sample_time_ms": 21363.9, "load_time_ms": 37.576, "grad_time_ms": 10037.47, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0003922081959899515, "policy_loss": -0.007403677329421043, "vf_loss": 83.57759857177734, "vf_explained_var": 0.7612032294273376, "kl": 0.001659790868870914, "entropy": 1.12375009059906, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4032000, "episodes_total": 10080, "training_iteration": 315, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-26-24", "timestamp": 1660256784, "time_this_iter_s": 42.419737100601196, "time_total_s": 15196.458106040955, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15196.458106040955, "timesteps_since_restore": 4032000, "iterations_since_restore": 315, "perf": {"cpu_util_percent": 30.92, "ram_util_percent": 58.89833333333333}}
+{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 612.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 306.395}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.39, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.82, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.71, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.34, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.12, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.45, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.14, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.92, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.14, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.92, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.14, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.92, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 633.0, 630.0, 587.0, 630.0, 587.0, 564.0, 576.0, 627.0, 636.0, 630.0, 526.0, 587.0, 582.0, 627.0, 633.0, 633.0, 630.0, 621.0, 579.0, 587.0, 582.0, 633.0, 636.0, 633.0, 576.0, 636.0, 633.0, 587.0, 639.0, 582.0, 636.0, 630.0, 639.0, 627.0, 630.0, 630.0, 587.0, 639.0, 587.0, 630.0, 582.0, 636.0, 636.0, 627.0, 639.0, 639.0, 584.0, 587.0, 639.0, 582.0, 630.0, 579.0, 633.0, 636.0, 633.0, 636.0, 636.0, 582.0, 582.0, 633.0, 587.0, 630.0, 633.0, 630.0, 630.0, 593.0, 582.0, 587.0, 590.0, 633.0, 633.0, 630.0, 630.0, 627.0, 582.0, 573.0, 630.0, 630.0, 582.0, 636.0, 639.0, 630.0, 587.0, 639.0, 630.0, 627.0, 630.0, 630.0, 639.0, 459.0, 582.0, 627.0, 582.0, 627.0, 636.0, 627.0, 633.0, 530.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [315.0, 315.0, 316.0, 317.0, 306.0, 324.0, 290.0, 297.0, 316.0, 314.0, 298.0, 289.0, 286.0, 278.0, 287.0, 289.0, 299.0, 328.0, 313.0, 323.0, 321.0, 309.0, 261.0, 265.0, 280.0, 307.0, 291.0, 291.0, 313.0, 314.0, 314.0, 319.0, 318.0, 315.0, 311.0, 319.0, 318.0, 303.0, 291.0, 288.0, 293.0, 294.0, 286.0, 296.0, 321.0, 312.0, 316.0, 320.0, 311.0, 322.0, 288.0, 288.0, 324.0, 312.0, 309.0, 324.0, 300.0, 287.0, 322.0, 317.0, 294.0, 288.0, 322.0, 314.0, 313.0, 317.0, 316.0, 323.0, 316.0, 311.0, 321.0, 309.0, 316.0, 314.0, 292.0, 295.0, 314.0, 325.0, 301.0, 286.0, 311.0, 319.0, 295.0, 287.0, 319.0, 317.0, 317.0, 319.0, 305.0, 322.0, 315.0, 324.0, 317.0, 322.0, 298.0, 286.0, 298.0, 289.0, 319.0, 320.0, 286.0, 296.0, 309.0, 321.0, 296.0, 283.0, 318.0, 315.0, 314.0, 322.0, 314.0, 319.0, 316.0, 320.0, 314.0, 322.0, 291.0, 291.0, 285.0, 297.0, 319.0, 314.0, 298.0, 289.0, 319.0, 311.0, 316.0, 317.0, 323.0, 307.0, 315.0, 315.0, 297.0, 296.0, 291.0, 291.0, 290.0, 297.0, 294.0, 296.0, 327.0, 306.0, 314.0, 319.0, 308.0, 322.0, 319.0, 311.0, 311.0, 316.0, 294.0, 288.0, 282.0, 291.0, 318.0, 312.0, 309.0, 321.0, 291.0, 291.0, 318.0, 318.0, 317.0, 322.0, 316.0, 314.0, 296.0, 291.0, 318.0, 321.0, 313.0, 317.0, 318.0, 309.0, 311.0, 319.0, 306.0, 324.0, 317.0, 322.0, 231.0, 228.0, 288.0, 294.0, 315.0, 312.0, 294.0, 288.0, 308.0, 319.0, 316.0, 320.0, 324.0, 303.0, 311.0, 322.0, 265.0, 265.0, 314.0, 325.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9267730740231102, "mean_processing_ms": 0.2536434066530759, "mean_inference_ms": 1.510930494877483}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7584000, "num_steps_sampled": 4044800, "sample_time_ms": 21428.224, "load_time_ms": 37.331, "grad_time_ms": 10025.054, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0037510674446821213, "policy_loss": -0.004000961780548096, "vf_loss": 83.20941925048828, "vf_explained_var": 0.7631545066833496, "kl": 0.0021077950950711966, "entropy": 1.1378254890441895, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4044800, "episodes_total": 10112, "training_iteration": 316, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-26-55", "timestamp": 1660256815, "time_this_iter_s": 31.23423171043396, "time_total_s": 15227.692337751389, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15227.692337751389, "timesteps_since_restore": 4044800, "iterations_since_restore": 316, "perf": {"cpu_util_percent": 34.638636363636365, "ram_util_percent": 58.979545454545466}}
+{"episode_reward_max": 639.0, "episode_reward_min": 342.0, "episode_reward_mean": 610.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 166.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 305.135}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.07, "shaped_reward_min": 102, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.65, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.26, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.92, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.02, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.76, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.02, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.76, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.02, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.76, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 587.0, 630.0, 627.0, 627.0, 630.0, 624.0, 587.0, 627.0, 639.0, 627.0, 630.0, 582.0, 633.0, 593.0, 633.0, 639.0, 639.0, 630.0, 639.0, 582.0, 342.0, 630.0, 564.0, 639.0, 587.0, 587.0, 627.0, 633.0, 627.0, 627.0, 627.0, 630.0, 630.0, 593.0, 582.0, 587.0, 590.0, 633.0, 633.0, 630.0, 630.0, 627.0, 582.0, 573.0, 630.0, 630.0, 582.0, 636.0, 639.0, 630.0, 587.0, 639.0, 630.0, 627.0, 630.0, 630.0, 639.0, 459.0, 582.0, 627.0, 582.0, 627.0, 636.0, 627.0, 633.0, 530.0, 639.0, 630.0, 633.0, 630.0, 587.0, 630.0, 587.0, 564.0, 576.0, 627.0, 636.0, 630.0, 526.0, 587.0, 582.0, 627.0, 633.0, 633.0, 630.0, 621.0, 579.0, 587.0, 582.0, 633.0, 636.0, 633.0, 576.0, 636.0, 633.0, 587.0, 639.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 295.0, 292.0, 311.0, 319.0, 311.0, 316.0, 308.0, 319.0, 322.0, 308.0, 319.0, 305.0, 285.0, 302.0, 306.0, 321.0, 319.0, 320.0, 321.0, 306.0, 321.0, 309.0, 295.0, 287.0, 317.0, 316.0, 299.0, 294.0, 319.0, 314.0, 329.0, 310.0, 319.0, 320.0, 324.0, 306.0, 314.0, 325.0, 292.0, 290.0, 166.0, 176.0, 311.0, 319.0, 265.0, 299.0, 322.0, 317.0, 294.0, 293.0, 287.0, 300.0, 313.0, 314.0, 309.0, 324.0, 316.0, 311.0, 315.0, 312.0, 319.0, 308.0, 323.0, 307.0, 315.0, 315.0, 297.0, 296.0, 291.0, 291.0, 290.0, 297.0, 294.0, 296.0, 327.0, 306.0, 314.0, 319.0, 308.0, 322.0, 319.0, 311.0, 311.0, 316.0, 294.0, 288.0, 282.0, 291.0, 318.0, 312.0, 309.0, 321.0, 291.0, 291.0, 318.0, 318.0, 317.0, 322.0, 316.0, 314.0, 296.0, 291.0, 318.0, 321.0, 313.0, 317.0, 318.0, 309.0, 311.0, 319.0, 306.0, 324.0, 317.0, 322.0, 231.0, 228.0, 288.0, 294.0, 315.0, 312.0, 294.0, 288.0, 308.0, 319.0, 316.0, 320.0, 324.0, 303.0, 311.0, 322.0, 265.0, 265.0, 314.0, 325.0, 315.0, 315.0, 316.0, 317.0, 306.0, 324.0, 290.0, 297.0, 316.0, 314.0, 298.0, 289.0, 286.0, 278.0, 287.0, 289.0, 299.0, 328.0, 313.0, 323.0, 321.0, 309.0, 261.0, 265.0, 280.0, 307.0, 291.0, 291.0, 313.0, 314.0, 314.0, 319.0, 318.0, 315.0, 311.0, 319.0, 318.0, 303.0, 291.0, 288.0, 293.0, 294.0, 286.0, 296.0, 321.0, 312.0, 316.0, 320.0, 311.0, 322.0, 288.0, 288.0, 324.0, 312.0, 309.0, 324.0, 300.0, 287.0, 322.0, 317.0, 294.0, 288.0, 322.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9252951052976854, "mean_processing_ms": 0.2533493735803219, "mean_inference_ms": 1.5101286664706006}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7608000, "num_steps_sampled": 4057600, "sample_time_ms": 21664.809, "load_time_ms": 37.646, "grad_time_ms": 10004.469, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0035793918650597334, "policy_loss": -0.004444916266947985, "vf_loss": 85.9527359008789, "vf_explained_var": 0.7614016532897949, "kl": 0.0019710592459887266, "entropy": 1.1419222354888916, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4057600, "episodes_total": 10144, "training_iteration": 317, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-27-26", "timestamp": 1660256846, "time_this_iter_s": 31.29483914375305, "time_total_s": 15258.987176895142, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15258.987176895142, "timesteps_since_restore": 4057600, "iterations_since_restore": 317, "perf": {"cpu_util_percent": 35.325, "ram_util_percent": 58.888636363636344}}
+{"episode_reward_max": 639.0, "episode_reward_min": 342.0, "episode_reward_mean": 610.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 166.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 305.105}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.41, "shaped_reward_min": 102, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.61, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.6, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.26, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.99, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.31, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.4, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.99, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.29, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.75, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.99, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.99, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 587.0, 639.0, 636.0, 639.0, 630.0, 639.0, 566.0, 639.0, 541.0, 590.0, 587.0, 627.0, 584.0, 639.0, 639.0, 518.0, 582.0, 590.0, 633.0, 630.0, 633.0, 627.0, 636.0, 630.0, 587.0, 633.0, 564.0, 639.0, 630.0, 633.0, 630.0, 627.0, 633.0, 530.0, 639.0, 630.0, 633.0, 630.0, 587.0, 630.0, 587.0, 564.0, 576.0, 627.0, 636.0, 630.0, 526.0, 587.0, 582.0, 627.0, 633.0, 633.0, 630.0, 621.0, 579.0, 587.0, 582.0, 633.0, 636.0, 633.0, 576.0, 636.0, 633.0, 587.0, 639.0, 582.0, 636.0, 633.0, 587.0, 630.0, 627.0, 627.0, 630.0, 624.0, 587.0, 627.0, 639.0, 627.0, 630.0, 582.0, 633.0, 593.0, 633.0, 639.0, 639.0, 630.0, 639.0, 582.0, 342.0, 630.0, 564.0, 639.0, 587.0, 587.0, 627.0, 633.0, 627.0, 627.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 282.0, 291.0, 296.0, 319.0, 320.0, 319.0, 317.0, 317.0, 322.0, 308.0, 322.0, 325.0, 314.0, 290.0, 276.0, 319.0, 320.0, 277.0, 264.0, 306.0, 284.0, 285.0, 302.0, 318.0, 309.0, 294.0, 290.0, 320.0, 319.0, 327.0, 312.0, 246.0, 272.0, 291.0, 291.0, 296.0, 294.0, 311.0, 322.0, 316.0, 314.0, 326.0, 307.0, 308.0, 319.0, 322.0, 314.0, 310.0, 320.0, 293.0, 294.0, 319.0, 314.0, 277.0, 287.0, 319.0, 320.0, 316.0, 314.0, 321.0, 312.0, 308.0, 322.0, 324.0, 303.0, 311.0, 322.0, 265.0, 265.0, 314.0, 325.0, 315.0, 315.0, 316.0, 317.0, 306.0, 324.0, 290.0, 297.0, 316.0, 314.0, 298.0, 289.0, 286.0, 278.0, 287.0, 289.0, 299.0, 328.0, 313.0, 323.0, 321.0, 309.0, 261.0, 265.0, 280.0, 307.0, 291.0, 291.0, 313.0, 314.0, 314.0, 319.0, 318.0, 315.0, 311.0, 319.0, 318.0, 303.0, 291.0, 288.0, 293.0, 294.0, 286.0, 296.0, 321.0, 312.0, 316.0, 320.0, 311.0, 322.0, 288.0, 288.0, 324.0, 312.0, 309.0, 324.0, 300.0, 287.0, 322.0, 317.0, 294.0, 288.0, 322.0, 314.0, 319.0, 314.0, 295.0, 292.0, 311.0, 319.0, 311.0, 316.0, 308.0, 319.0, 322.0, 308.0, 319.0, 305.0, 285.0, 302.0, 306.0, 321.0, 319.0, 320.0, 321.0, 306.0, 321.0, 309.0, 295.0, 287.0, 317.0, 316.0, 299.0, 294.0, 319.0, 314.0, 329.0, 310.0, 319.0, 320.0, 324.0, 306.0, 314.0, 325.0, 292.0, 290.0, 166.0, 176.0, 311.0, 319.0, 265.0, 299.0, 322.0, 317.0, 294.0, 293.0, 287.0, 300.0, 313.0, 314.0, 309.0, 324.0, 316.0, 311.0, 315.0, 312.0, 319.0, 308.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9238104630131461, "mean_processing_ms": 0.2530528359146936, "mean_inference_ms": 1.5086822690810806}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7632000, "num_steps_sampled": 4070400, "sample_time_ms": 21837.173, "load_time_ms": 37.475, "grad_time_ms": 10172.214, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033606337383389473, "policy_loss": -0.004180160816758871, "vf_loss": 81.06964111328125, "vf_explained_var": 0.7688854336738586, "kl": 0.0033983252942562103, "entropy": 1.1323426961898804, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4070400, "episodes_total": 10176, "training_iteration": 318, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-27-57", "timestamp": 1660256877, "time_this_iter_s": 30.971107959747314, "time_total_s": 15289.958284854889, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15289.958284854889, "timesteps_since_restore": 4070400, "iterations_since_restore": 318, "perf": {"cpu_util_percent": 34.67727272727273, "ram_util_percent": 58.75454545454544}}
+{"episode_reward_max": 639.0, "episode_reward_min": 342.0, "episode_reward_mean": 610.4, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 166.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 305.2}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.6, "shaped_reward_min": 102, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.4, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.75, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.13, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.74, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.79, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.67, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.13, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.74, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.13, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.74, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 627.0, 630.0, 633.0, 579.0, 530.0, 587.0, 570.0, 636.0, 627.0, 579.0, 636.0, 633.0, 630.0, 636.0, 624.0, 587.0, 621.0, 630.0, 582.0, 636.0, 630.0, 633.0, 596.0, 630.0, 541.0, 636.0, 630.0, 627.0, 582.0, 582.0, 587.0, 639.0, 582.0, 636.0, 633.0, 587.0, 630.0, 627.0, 627.0, 630.0, 624.0, 587.0, 627.0, 639.0, 627.0, 630.0, 582.0, 633.0, 593.0, 633.0, 639.0, 639.0, 630.0, 639.0, 582.0, 342.0, 630.0, 564.0, 639.0, 587.0, 587.0, 627.0, 633.0, 627.0, 627.0, 627.0, 579.0, 587.0, 639.0, 636.0, 639.0, 630.0, 639.0, 566.0, 639.0, 541.0, 590.0, 587.0, 627.0, 584.0, 639.0, 639.0, 518.0, 582.0, 590.0, 633.0, 630.0, 633.0, 627.0, 636.0, 630.0, 587.0, 633.0, 564.0, 639.0, 630.0, 633.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 290.0, 320.0, 310.0, 313.0, 314.0, 316.0, 314.0, 319.0, 314.0, 291.0, 288.0, 256.0, 274.0, 283.0, 304.0, 286.0, 284.0, 319.0, 317.0, 312.0, 315.0, 291.0, 288.0, 319.0, 317.0, 319.0, 314.0, 308.0, 322.0, 317.0, 319.0, 314.0, 310.0, 293.0, 294.0, 307.0, 314.0, 313.0, 317.0, 291.0, 291.0, 312.0, 324.0, 313.0, 317.0, 324.0, 309.0, 304.0, 292.0, 308.0, 322.0, 272.0, 269.0, 309.0, 327.0, 306.0, 324.0, 318.0, 309.0, 293.0, 289.0, 291.0, 291.0, 300.0, 287.0, 322.0, 317.0, 294.0, 288.0, 322.0, 314.0, 319.0, 314.0, 295.0, 292.0, 311.0, 319.0, 311.0, 316.0, 308.0, 319.0, 322.0, 308.0, 319.0, 305.0, 285.0, 302.0, 306.0, 321.0, 319.0, 320.0, 321.0, 306.0, 321.0, 309.0, 295.0, 287.0, 317.0, 316.0, 299.0, 294.0, 319.0, 314.0, 329.0, 310.0, 319.0, 320.0, 324.0, 306.0, 314.0, 325.0, 292.0, 290.0, 166.0, 176.0, 311.0, 319.0, 265.0, 299.0, 322.0, 317.0, 294.0, 293.0, 287.0, 300.0, 313.0, 314.0, 309.0, 324.0, 316.0, 311.0, 315.0, 312.0, 319.0, 308.0, 297.0, 282.0, 291.0, 296.0, 319.0, 320.0, 319.0, 317.0, 317.0, 322.0, 308.0, 322.0, 325.0, 314.0, 290.0, 276.0, 319.0, 320.0, 277.0, 264.0, 306.0, 284.0, 285.0, 302.0, 318.0, 309.0, 294.0, 290.0, 320.0, 319.0, 327.0, 312.0, 246.0, 272.0, 291.0, 291.0, 296.0, 294.0, 311.0, 322.0, 316.0, 314.0, 326.0, 307.0, 308.0, 319.0, 322.0, 314.0, 310.0, 320.0, 293.0, 294.0, 319.0, 314.0, 277.0, 287.0, 319.0, 320.0, 316.0, 314.0, 321.0, 312.0, 308.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9223351501134641, "mean_processing_ms": 0.2527584584336856, "mean_inference_ms": 1.5072340124708836}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7656000, "num_steps_sampled": 4083200, "sample_time_ms": 21920.847, "load_time_ms": 37.336, "grad_time_ms": 10402.485, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0003446048649493605, "policy_loss": -0.007382390554994345, "vf_loss": 82.95357513427734, "vf_explained_var": 0.759884774684906, "kl": 0.0017484420677646995, "entropy": 1.1367279291152954, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4083200, "episodes_total": 10208, "training_iteration": 319, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-28-31", "timestamp": 1660256911, "time_this_iter_s": 33.19297218322754, "time_total_s": 15323.151257038116, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15323.151257038116, "timesteps_since_restore": 4083200, "iterations_since_restore": 319, "perf": {"cpu_util_percent": 34.074468085106375, "ram_util_percent": 59.221276595744676}}
+{"episode_reward_max": 639.0, "episode_reward_min": 518.0, "episode_reward_mean": 612.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 246.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.085}, "custom_metrics": {"sparse_reward_mean": 212.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.17, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.9, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.56, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.53, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.91, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.41, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.13, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.85, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.95, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.13, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.85, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.13, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.85, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 630.0, 630.0, 639.0, 633.0, 627.0, 590.0, 587.0, 633.0, 630.0, 536.0, 624.0, 630.0, 575.0, 636.0, 587.0, 590.0, 630.0, 587.0, 633.0, 587.0, 633.0, 633.0, 630.0, 570.0, 630.0, 587.0, 633.0, 582.0, 627.0, 633.0, 636.0, 633.0, 627.0, 627.0, 627.0, 579.0, 587.0, 639.0, 636.0, 639.0, 630.0, 639.0, 566.0, 639.0, 541.0, 590.0, 587.0, 627.0, 584.0, 639.0, 639.0, 518.0, 582.0, 590.0, 633.0, 630.0, 633.0, 627.0, 636.0, 630.0, 587.0, 633.0, 564.0, 639.0, 630.0, 633.0, 630.0, 582.0, 630.0, 627.0, 630.0, 633.0, 579.0, 530.0, 587.0, 570.0, 636.0, 627.0, 579.0, 636.0, 633.0, 630.0, 636.0, 624.0, 587.0, 621.0, 630.0, 582.0, 636.0, 630.0, 633.0, 596.0, 630.0, 541.0, 636.0, 630.0, 627.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 311.0, 319.0, 313.0, 317.0, 316.0, 323.0, 314.0, 319.0, 311.0, 316.0, 299.0, 291.0, 296.0, 291.0, 311.0, 322.0, 313.0, 317.0, 257.0, 279.0, 318.0, 306.0, 308.0, 322.0, 281.0, 294.0, 314.0, 322.0, 291.0, 296.0, 298.0, 292.0, 309.0, 321.0, 290.0, 297.0, 313.0, 320.0, 293.0, 294.0, 321.0, 312.0, 319.0, 314.0, 314.0, 316.0, 288.0, 282.0, 315.0, 315.0, 291.0, 296.0, 319.0, 314.0, 302.0, 280.0, 313.0, 314.0, 314.0, 319.0, 313.0, 323.0, 309.0, 324.0, 316.0, 311.0, 315.0, 312.0, 319.0, 308.0, 297.0, 282.0, 291.0, 296.0, 319.0, 320.0, 319.0, 317.0, 317.0, 322.0, 308.0, 322.0, 325.0, 314.0, 290.0, 276.0, 319.0, 320.0, 277.0, 264.0, 306.0, 284.0, 285.0, 302.0, 318.0, 309.0, 294.0, 290.0, 320.0, 319.0, 327.0, 312.0, 246.0, 272.0, 291.0, 291.0, 296.0, 294.0, 311.0, 322.0, 316.0, 314.0, 326.0, 307.0, 308.0, 319.0, 322.0, 314.0, 310.0, 320.0, 293.0, 294.0, 319.0, 314.0, 277.0, 287.0, 319.0, 320.0, 316.0, 314.0, 321.0, 312.0, 308.0, 322.0, 292.0, 290.0, 320.0, 310.0, 313.0, 314.0, 316.0, 314.0, 319.0, 314.0, 291.0, 288.0, 256.0, 274.0, 283.0, 304.0, 286.0, 284.0, 319.0, 317.0, 312.0, 315.0, 291.0, 288.0, 319.0, 317.0, 319.0, 314.0, 308.0, 322.0, 317.0, 319.0, 314.0, 310.0, 293.0, 294.0, 307.0, 314.0, 313.0, 317.0, 291.0, 291.0, 312.0, 324.0, 313.0, 317.0, 324.0, 309.0, 304.0, 292.0, 308.0, 322.0, 272.0, 269.0, 309.0, 327.0, 306.0, 324.0, 318.0, 309.0, 293.0, 289.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9208740877938348, "mean_processing_ms": 0.25246719663876194, "mean_inference_ms": 1.5059256221319566}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7680000, "num_steps_sampled": 4096000, "sample_time_ms": 22117.687, "load_time_ms": 37.357, "grad_time_ms": 10501.528, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0030171778053045273, "policy_loss": -0.004927590023726225, "vf_loss": 85.14810943603516, "vf_explained_var": 0.76070237159729, "kl": 0.002144080586731434, "entropy": 1.140079379081726, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4096000, "episodes_total": 10240, "training_iteration": 320, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-29-03", "timestamp": 1660256943, "time_this_iter_s": 32.6832230091095, "time_total_s": 15355.834480047226, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15355.834480047226, "timesteps_since_restore": 4096000, "iterations_since_restore": 320, "perf": {"cpu_util_percent": 32.95652173913044, "ram_util_percent": 59.06739130434784}}
+{"episode_reward_max": 639.0, "episode_reward_min": 530.0, "episode_reward_mean": 613.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.82}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.44, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.75, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.81, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.11, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.37, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.06, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.04, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.47, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.06, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.04, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.06, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.04, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 582.0, 576.0, 587.0, 630.0, 636.0, 630.0, 636.0, 630.0, 636.0, 624.0, 639.0, 582.0, 624.0, 582.0, 633.0, 587.0, 636.0, 633.0, 636.0, 587.0, 636.0, 633.0, 633.0, 636.0, 633.0, 633.0, 627.0, 587.0, 633.0, 582.0, 579.0, 639.0, 630.0, 633.0, 630.0, 582.0, 630.0, 627.0, 630.0, 633.0, 579.0, 530.0, 587.0, 570.0, 636.0, 627.0, 579.0, 636.0, 633.0, 630.0, 636.0, 624.0, 587.0, 621.0, 630.0, 582.0, 636.0, 630.0, 633.0, 596.0, 630.0, 541.0, 636.0, 630.0, 627.0, 582.0, 582.0, 627.0, 630.0, 630.0, 639.0, 633.0, 627.0, 590.0, 587.0, 633.0, 630.0, 536.0, 624.0, 630.0, 575.0, 636.0, 587.0, 590.0, 630.0, 587.0, 633.0, 587.0, 633.0, 633.0, 630.0, 570.0, 630.0, 587.0, 633.0, 582.0, 627.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 282.0, 287.0, 295.0, 289.0, 287.0, 299.0, 288.0, 326.0, 304.0, 321.0, 315.0, 311.0, 319.0, 314.0, 322.0, 317.0, 313.0, 322.0, 314.0, 311.0, 313.0, 317.0, 322.0, 287.0, 295.0, 313.0, 311.0, 302.0, 280.0, 314.0, 319.0, 295.0, 292.0, 322.0, 314.0, 316.0, 317.0, 324.0, 312.0, 294.0, 293.0, 314.0, 322.0, 311.0, 322.0, 324.0, 309.0, 316.0, 320.0, 318.0, 315.0, 315.0, 318.0, 307.0, 320.0, 296.0, 291.0, 313.0, 320.0, 293.0, 289.0, 288.0, 291.0, 319.0, 320.0, 316.0, 314.0, 321.0, 312.0, 308.0, 322.0, 292.0, 290.0, 320.0, 310.0, 313.0, 314.0, 316.0, 314.0, 319.0, 314.0, 291.0, 288.0, 256.0, 274.0, 283.0, 304.0, 286.0, 284.0, 319.0, 317.0, 312.0, 315.0, 291.0, 288.0, 319.0, 317.0, 319.0, 314.0, 308.0, 322.0, 317.0, 319.0, 314.0, 310.0, 293.0, 294.0, 307.0, 314.0, 313.0, 317.0, 291.0, 291.0, 312.0, 324.0, 313.0, 317.0, 324.0, 309.0, 304.0, 292.0, 308.0, 322.0, 272.0, 269.0, 309.0, 327.0, 306.0, 324.0, 318.0, 309.0, 293.0, 289.0, 291.0, 291.0, 313.0, 314.0, 311.0, 319.0, 313.0, 317.0, 316.0, 323.0, 314.0, 319.0, 311.0, 316.0, 299.0, 291.0, 296.0, 291.0, 311.0, 322.0, 313.0, 317.0, 257.0, 279.0, 318.0, 306.0, 308.0, 322.0, 281.0, 294.0, 314.0, 322.0, 291.0, 296.0, 298.0, 292.0, 309.0, 321.0, 290.0, 297.0, 313.0, 320.0, 293.0, 294.0, 321.0, 312.0, 319.0, 314.0, 314.0, 316.0, 288.0, 282.0, 315.0, 315.0, 291.0, 296.0, 319.0, 314.0, 302.0, 280.0, 313.0, 314.0, 314.0, 319.0, 313.0, 323.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9194369912164846, "mean_processing_ms": 0.25218136491616727, "mean_inference_ms": 1.5049782377407859}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7704000, "num_steps_sampled": 4108800, "sample_time_ms": 22673.048, "load_time_ms": 37.017, "grad_time_ms": 10506.62, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004381106700748205, "policy_loss": -0.0034678278025239706, "vf_loss": 84.1359634399414, "vf_explained_var": 0.762717068195343, "kl": 0.0020634233951568604, "entropy": 1.12932288646698, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4108800, "episodes_total": 10272, "training_iteration": 321, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-29-39", "timestamp": 1660256979, "time_this_iter_s": 35.97740912437439, "time_total_s": 15391.8118891716, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15391.8118891716, "timesteps_since_restore": 4108800, "iterations_since_restore": 321, "perf": {"cpu_util_percent": 32.44705882352941, "ram_util_percent": 58.78039215686273}}
+{"episode_reward_max": 639.0, "episode_reward_min": 536.0, "episode_reward_mean": 611.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 305.78}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.96, "shaped_reward_min": 164, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.75, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.77, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.41, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.12, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.5, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.12, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.98, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.43, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.12, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.98, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.12, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.98, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 587.0, 636.0, 627.0, 582.0, 582.0, 579.0, 630.0, 636.0, 573.0, 639.0, 633.0, 578.0, 564.0, 579.0, 630.0, 639.0, 587.0, 636.0, 627.0, 636.0, 633.0, 579.0, 636.0, 536.0, 630.0, 633.0, 584.0, 587.0, 567.0, 627.0, 636.0, 630.0, 627.0, 582.0, 582.0, 627.0, 630.0, 630.0, 639.0, 633.0, 627.0, 590.0, 587.0, 633.0, 630.0, 536.0, 624.0, 630.0, 575.0, 636.0, 587.0, 590.0, 630.0, 587.0, 633.0, 587.0, 633.0, 633.0, 630.0, 570.0, 630.0, 587.0, 633.0, 582.0, 627.0, 633.0, 636.0, 567.0, 582.0, 576.0, 587.0, 630.0, 636.0, 630.0, 636.0, 630.0, 636.0, 624.0, 639.0, 582.0, 624.0, 582.0, 633.0, 587.0, 636.0, 633.0, 636.0, 587.0, 636.0, 633.0, 633.0, 636.0, 633.0, 633.0, 627.0, 587.0, 633.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 295.0, 285.0, 302.0, 316.0, 320.0, 321.0, 306.0, 292.0, 290.0, 285.0, 297.0, 289.0, 290.0, 319.0, 311.0, 322.0, 314.0, 284.0, 289.0, 322.0, 317.0, 305.0, 328.0, 301.0, 277.0, 287.0, 277.0, 295.0, 284.0, 306.0, 324.0, 324.0, 315.0, 285.0, 302.0, 322.0, 314.0, 308.0, 319.0, 316.0, 320.0, 314.0, 319.0, 291.0, 288.0, 319.0, 317.0, 262.0, 274.0, 308.0, 322.0, 316.0, 317.0, 292.0, 292.0, 290.0, 297.0, 285.0, 282.0, 314.0, 313.0, 319.0, 317.0, 306.0, 324.0, 318.0, 309.0, 293.0, 289.0, 291.0, 291.0, 313.0, 314.0, 311.0, 319.0, 313.0, 317.0, 316.0, 323.0, 314.0, 319.0, 311.0, 316.0, 299.0, 291.0, 296.0, 291.0, 311.0, 322.0, 313.0, 317.0, 257.0, 279.0, 318.0, 306.0, 308.0, 322.0, 281.0, 294.0, 314.0, 322.0, 291.0, 296.0, 298.0, 292.0, 309.0, 321.0, 290.0, 297.0, 313.0, 320.0, 293.0, 294.0, 321.0, 312.0, 319.0, 314.0, 314.0, 316.0, 288.0, 282.0, 315.0, 315.0, 291.0, 296.0, 319.0, 314.0, 302.0, 280.0, 313.0, 314.0, 314.0, 319.0, 313.0, 323.0, 285.0, 282.0, 287.0, 295.0, 289.0, 287.0, 299.0, 288.0, 326.0, 304.0, 321.0, 315.0, 311.0, 319.0, 314.0, 322.0, 317.0, 313.0, 322.0, 314.0, 311.0, 313.0, 317.0, 322.0, 287.0, 295.0, 313.0, 311.0, 302.0, 280.0, 314.0, 319.0, 295.0, 292.0, 322.0, 314.0, 316.0, 317.0, 324.0, 312.0, 294.0, 293.0, 314.0, 322.0, 311.0, 322.0, 324.0, 309.0, 316.0, 320.0, 318.0, 315.0, 315.0, 318.0, 307.0, 320.0, 296.0, 291.0, 313.0, 320.0, 293.0, 289.0, 288.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9180140863586184, "mean_processing_ms": 0.2518987033904737, "mean_inference_ms": 1.5041907922787607}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7728000, "num_steps_sampled": 4121600, "sample_time_ms": 23101.321, "load_time_ms": 36.599, "grad_time_ms": 10589.639, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00461611757054925, "policy_loss": -0.003138140542432666, "vf_loss": 83.23612213134766, "vf_explained_var": 0.7696110606193542, "kl": 0.0018815431976690888, "entropy": 1.1387158632278442, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4121600, "episodes_total": 10304, "training_iteration": 322, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-30-14", "timestamp": 1660257014, "time_this_iter_s": 34.30680704116821, "time_total_s": 15426.118696212769, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15426.118696212769, "timesteps_since_restore": 4121600, "iterations_since_restore": 322, "perf": {"cpu_util_percent": 32.239583333333336, "ram_util_percent": 58.845833333333324}}
+{"episode_reward_max": 639.0, "episode_reward_min": 536.0, "episode_reward_mean": 612.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 262.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 306.44}, "custom_metrics": {"sparse_reward_mean": 212.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.08, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.92, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.77, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.56, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.03, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.28, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.01, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.51, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.68, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.28, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.01, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.28, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.01, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 636.0, 630.0, 630.0, 627.0, 630.0, 636.0, 636.0, 636.0, 639.0, 536.0, 627.0, 630.0, 633.0, 639.0, 573.0, 636.0, 636.0, 561.0, 636.0, 582.0, 587.0, 633.0, 561.0, 639.0, 587.0, 633.0, 627.0, 570.0, 633.0, 579.0, 639.0, 582.0, 627.0, 633.0, 636.0, 567.0, 582.0, 576.0, 587.0, 630.0, 636.0, 630.0, 636.0, 630.0, 636.0, 624.0, 639.0, 582.0, 624.0, 582.0, 633.0, 587.0, 636.0, 633.0, 636.0, 587.0, 636.0, 633.0, 633.0, 636.0, 633.0, 633.0, 627.0, 587.0, 633.0, 582.0, 579.0, 587.0, 587.0, 636.0, 627.0, 582.0, 582.0, 579.0, 630.0, 636.0, 573.0, 639.0, 633.0, 578.0, 564.0, 579.0, 630.0, 639.0, 587.0, 636.0, 627.0, 636.0, 633.0, 579.0, 636.0, 536.0, 630.0, 633.0, 584.0, 587.0, 567.0, 627.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 314.0, 322.0, 316.0, 314.0, 319.0, 311.0, 308.0, 319.0, 318.0, 312.0, 320.0, 316.0, 319.0, 317.0, 317.0, 319.0, 319.0, 320.0, 262.0, 274.0, 311.0, 316.0, 319.0, 311.0, 322.0, 311.0, 319.0, 320.0, 291.0, 282.0, 318.0, 318.0, 316.0, 320.0, 279.0, 282.0, 324.0, 312.0, 295.0, 287.0, 293.0, 294.0, 319.0, 314.0, 283.0, 278.0, 319.0, 320.0, 288.0, 299.0, 319.0, 314.0, 316.0, 311.0, 276.0, 294.0, 324.0, 309.0, 288.0, 291.0, 319.0, 320.0, 302.0, 280.0, 313.0, 314.0, 314.0, 319.0, 313.0, 323.0, 285.0, 282.0, 287.0, 295.0, 289.0, 287.0, 299.0, 288.0, 326.0, 304.0, 321.0, 315.0, 311.0, 319.0, 314.0, 322.0, 317.0, 313.0, 322.0, 314.0, 311.0, 313.0, 317.0, 322.0, 287.0, 295.0, 313.0, 311.0, 302.0, 280.0, 314.0, 319.0, 295.0, 292.0, 322.0, 314.0, 316.0, 317.0, 324.0, 312.0, 294.0, 293.0, 314.0, 322.0, 311.0, 322.0, 324.0, 309.0, 316.0, 320.0, 318.0, 315.0, 315.0, 318.0, 307.0, 320.0, 296.0, 291.0, 313.0, 320.0, 293.0, 289.0, 288.0, 291.0, 292.0, 295.0, 285.0, 302.0, 316.0, 320.0, 321.0, 306.0, 292.0, 290.0, 285.0, 297.0, 289.0, 290.0, 319.0, 311.0, 322.0, 314.0, 284.0, 289.0, 322.0, 317.0, 305.0, 328.0, 301.0, 277.0, 287.0, 277.0, 295.0, 284.0, 306.0, 324.0, 324.0, 315.0, 285.0, 302.0, 322.0, 314.0, 308.0, 319.0, 316.0, 320.0, 314.0, 319.0, 291.0, 288.0, 319.0, 317.0, 262.0, 274.0, 308.0, 322.0, 316.0, 317.0, 292.0, 292.0, 290.0, 297.0, 285.0, 282.0, 314.0, 313.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9165951119530592, "mean_processing_ms": 0.25161700044619856, "mean_inference_ms": 1.5033026848004383}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7752000, "num_steps_sampled": 4134400, "sample_time_ms": 23061.265, "load_time_ms": 36.453, "grad_time_ms": 10583.338, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006471332162618637, "policy_loss": -0.0015162205090746284, "vf_loss": 85.53211212158203, "vf_explained_var": 0.7684184908866882, "kl": 0.002009378978982568, "entropy": 1.1313238143920898, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4134400, "episodes_total": 10336, "training_iteration": 323, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-30-45", "timestamp": 1660257045, "time_this_iter_s": 31.667726039886475, "time_total_s": 15457.786422252655, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15457.786422252655, "timesteps_since_restore": 4134400, "iterations_since_restore": 323, "perf": {"cpu_util_percent": 32.73111111111111, "ram_util_percent": 58.83555555555553}}
+{"episode_reward_max": 639.0, "episode_reward_min": 536.0, "episode_reward_mean": 610.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 262.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 305.28}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.36, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.28, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.82, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.62, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.57, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.98, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.57, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.57, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [596.0, 636.0, 639.0, 633.0, 633.0, 584.0, 582.0, 579.0, 636.0, 639.0, 587.0, 579.0, 558.0, 627.0, 639.0, 573.0, 630.0, 627.0, 581.0, 639.0, 639.0, 639.0, 636.0, 636.0, 567.0, 576.0, 573.0, 570.0, 621.0, 636.0, 630.0, 630.0, 587.0, 633.0, 582.0, 579.0, 587.0, 587.0, 636.0, 627.0, 582.0, 582.0, 579.0, 630.0, 636.0, 573.0, 639.0, 633.0, 578.0, 564.0, 579.0, 630.0, 639.0, 587.0, 636.0, 627.0, 636.0, 633.0, 579.0, 636.0, 536.0, 630.0, 633.0, 584.0, 587.0, 567.0, 627.0, 636.0, 633.0, 636.0, 630.0, 630.0, 627.0, 630.0, 636.0, 636.0, 636.0, 639.0, 536.0, 627.0, 630.0, 633.0, 639.0, 573.0, 636.0, 636.0, 561.0, 636.0, 582.0, 587.0, 633.0, 561.0, 639.0, 587.0, 633.0, 627.0, 570.0, 633.0, 579.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 299.0, 319.0, 317.0, 319.0, 320.0, 317.0, 316.0, 317.0, 316.0, 304.0, 280.0, 288.0, 294.0, 289.0, 290.0, 322.0, 314.0, 317.0, 322.0, 288.0, 299.0, 290.0, 289.0, 274.0, 284.0, 316.0, 311.0, 322.0, 317.0, 290.0, 283.0, 319.0, 311.0, 314.0, 313.0, 288.0, 293.0, 319.0, 320.0, 322.0, 317.0, 325.0, 314.0, 314.0, 322.0, 319.0, 317.0, 296.0, 271.0, 281.0, 295.0, 277.0, 296.0, 287.0, 283.0, 307.0, 314.0, 314.0, 322.0, 313.0, 317.0, 316.0, 314.0, 296.0, 291.0, 313.0, 320.0, 293.0, 289.0, 288.0, 291.0, 292.0, 295.0, 285.0, 302.0, 316.0, 320.0, 321.0, 306.0, 292.0, 290.0, 285.0, 297.0, 289.0, 290.0, 319.0, 311.0, 322.0, 314.0, 284.0, 289.0, 322.0, 317.0, 305.0, 328.0, 301.0, 277.0, 287.0, 277.0, 295.0, 284.0, 306.0, 324.0, 324.0, 315.0, 285.0, 302.0, 322.0, 314.0, 308.0, 319.0, 316.0, 320.0, 314.0, 319.0, 291.0, 288.0, 319.0, 317.0, 262.0, 274.0, 308.0, 322.0, 316.0, 317.0, 292.0, 292.0, 290.0, 297.0, 285.0, 282.0, 314.0, 313.0, 319.0, 317.0, 316.0, 317.0, 314.0, 322.0, 316.0, 314.0, 319.0, 311.0, 308.0, 319.0, 318.0, 312.0, 320.0, 316.0, 319.0, 317.0, 317.0, 319.0, 319.0, 320.0, 262.0, 274.0, 311.0, 316.0, 319.0, 311.0, 322.0, 311.0, 319.0, 320.0, 291.0, 282.0, 318.0, 318.0, 316.0, 320.0, 279.0, 282.0, 324.0, 312.0, 295.0, 287.0, 293.0, 294.0, 319.0, 314.0, 283.0, 278.0, 319.0, 320.0, 288.0, 299.0, 319.0, 314.0, 316.0, 311.0, 276.0, 294.0, 324.0, 309.0, 288.0, 291.0, 319.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9151684979449644, "mean_processing_ms": 0.2513316747000567, "mean_inference_ms": 1.5020585872568248}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7776000, "num_steps_sampled": 4147200, "sample_time_ms": 23007.183, "load_time_ms": 36.594, "grad_time_ms": 10309.706, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001403640490025282, "policy_loss": -0.009379498660564423, "vf_loss": 85.44359588623047, "vf_explained_var": 0.7652726769447327, "kl": 0.0018997077131643891, "entropy": 1.136988639831543, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4147200, "episodes_total": 10368, "training_iteration": 324, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-31-15", "timestamp": 1660257075, "time_this_iter_s": 30.05816674232483, "time_total_s": 15487.84458899498, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15487.84458899498, "timesteps_since_restore": 4147200, "iterations_since_restore": 324, "perf": {"cpu_util_percent": 31.88372093023256, "ram_util_percent": 58.74418604651163}}
+{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 611.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 305.985}, "custom_metrics": {"sparse_reward_mean": 212.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.17, "shaped_reward_min": 138, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.44, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.37, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.97, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.6, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.46, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.63, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.55, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.24, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.63, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.55, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.63, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.55, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 630.0, 630.0, 630.0, 579.0, 636.0, 633.0, 593.0, 630.0, 587.0, 575.0, 633.0, 470.0, 633.0, 465.0, 633.0, 498.0, 633.0, 627.0, 636.0, 639.0, 627.0, 630.0, 630.0, 636.0, 627.0, 636.0, 636.0, 627.0, 630.0, 639.0, 587.0, 567.0, 627.0, 636.0, 633.0, 636.0, 630.0, 630.0, 627.0, 630.0, 636.0, 636.0, 636.0, 639.0, 536.0, 627.0, 630.0, 633.0, 639.0, 573.0, 636.0, 636.0, 561.0, 636.0, 582.0, 587.0, 633.0, 561.0, 639.0, 587.0, 633.0, 627.0, 570.0, 633.0, 579.0, 639.0, 596.0, 636.0, 639.0, 633.0, 633.0, 584.0, 582.0, 579.0, 636.0, 639.0, 587.0, 579.0, 558.0, 627.0, 639.0, 573.0, 630.0, 627.0, 581.0, 639.0, 639.0, 639.0, 636.0, 636.0, 567.0, 576.0, 573.0, 570.0, 621.0, 636.0, 630.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 296.0, 286.0, 321.0, 309.0, 307.0, 323.0, 321.0, 309.0, 285.0, 294.0, 327.0, 309.0, 314.0, 319.0, 293.0, 300.0, 309.0, 321.0, 299.0, 288.0, 285.0, 290.0, 323.0, 310.0, 231.0, 239.0, 309.0, 324.0, 236.0, 229.0, 307.0, 326.0, 250.0, 248.0, 319.0, 314.0, 308.0, 319.0, 319.0, 317.0, 319.0, 320.0, 312.0, 315.0, 313.0, 317.0, 316.0, 314.0, 319.0, 317.0, 305.0, 322.0, 321.0, 315.0, 313.0, 323.0, 316.0, 311.0, 312.0, 318.0, 319.0, 320.0, 290.0, 297.0, 285.0, 282.0, 314.0, 313.0, 319.0, 317.0, 316.0, 317.0, 314.0, 322.0, 316.0, 314.0, 319.0, 311.0, 308.0, 319.0, 318.0, 312.0, 320.0, 316.0, 319.0, 317.0, 317.0, 319.0, 319.0, 320.0, 262.0, 274.0, 311.0, 316.0, 319.0, 311.0, 322.0, 311.0, 319.0, 320.0, 291.0, 282.0, 318.0, 318.0, 316.0, 320.0, 279.0, 282.0, 324.0, 312.0, 295.0, 287.0, 293.0, 294.0, 319.0, 314.0, 283.0, 278.0, 319.0, 320.0, 288.0, 299.0, 319.0, 314.0, 316.0, 311.0, 276.0, 294.0, 324.0, 309.0, 288.0, 291.0, 319.0, 320.0, 297.0, 299.0, 319.0, 317.0, 319.0, 320.0, 317.0, 316.0, 317.0, 316.0, 304.0, 280.0, 288.0, 294.0, 289.0, 290.0, 322.0, 314.0, 317.0, 322.0, 288.0, 299.0, 290.0, 289.0, 274.0, 284.0, 316.0, 311.0, 322.0, 317.0, 290.0, 283.0, 319.0, 311.0, 314.0, 313.0, 288.0, 293.0, 319.0, 320.0, 322.0, 317.0, 325.0, 314.0, 314.0, 322.0, 319.0, 317.0, 296.0, 271.0, 281.0, 295.0, 277.0, 296.0, 287.0, 283.0, 307.0, 314.0, 314.0, 322.0, 313.0, 317.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9137344731192653, "mean_processing_ms": 0.2510432909241615, "mean_inference_ms": 1.5004598778963754}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7800000, "num_steps_sampled": 4160000, "sample_time_ms": 21864.49, "load_time_ms": 36.474, "grad_time_ms": 10036.502, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015124385245144367, "policy_loss": -0.006040909793227911, "vf_loss": 81.20950317382812, "vf_explained_var": 0.7680754661560059, "kl": 0.0019407202489674091, "entropy": 1.135194182395935, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4160000, "episodes_total": 10400, "training_iteration": 325, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-31-44", "timestamp": 1660257104, "time_this_iter_s": 28.25439429283142, "time_total_s": 15516.098983287811, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15516.098983287811, "timesteps_since_restore": 4160000, "iterations_since_restore": 325, "perf": {"cpu_util_percent": 34.2675, "ram_util_percent": 58.785000000000004}}
+{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 613.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.815}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.63, "shaped_reward_min": 138, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.51, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.3, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 17.01, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.55, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.64, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.54, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.29, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.15, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.32, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.7, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.31, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.24, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.64, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.54, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.64, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.54, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 636.0, 630.0, 587.0, 639.0, 587.0, 633.0, 630.0, 639.0, 633.0, 582.0, 567.0, 630.0, 630.0, 633.0, 636.0, 576.0, 633.0, 587.0, 624.0, 636.0, 639.0, 639.0, 636.0, 630.0, 627.0, 612.0, 633.0, 570.0, 636.0, 630.0, 636.0, 570.0, 633.0, 579.0, 639.0, 596.0, 636.0, 639.0, 633.0, 633.0, 584.0, 582.0, 579.0, 636.0, 639.0, 587.0, 579.0, 558.0, 627.0, 639.0, 573.0, 630.0, 627.0, 581.0, 639.0, 639.0, 639.0, 636.0, 636.0, 567.0, 576.0, 573.0, 570.0, 621.0, 636.0, 630.0, 630.0, 630.0, 582.0, 630.0, 630.0, 630.0, 579.0, 636.0, 633.0, 593.0, 630.0, 587.0, 575.0, 633.0, 470.0, 633.0, 465.0, 633.0, 498.0, 633.0, 627.0, 636.0, 639.0, 627.0, 630.0, 630.0, 636.0, 627.0, 636.0, 636.0, 627.0, 630.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [324.0, 312.0, 322.0, 314.0, 310.0, 320.0, 286.0, 301.0, 319.0, 320.0, 290.0, 297.0, 314.0, 319.0, 311.0, 319.0, 321.0, 318.0, 314.0, 319.0, 289.0, 293.0, 291.0, 276.0, 311.0, 319.0, 318.0, 312.0, 319.0, 314.0, 314.0, 322.0, 288.0, 288.0, 317.0, 316.0, 298.0, 289.0, 316.0, 308.0, 319.0, 317.0, 316.0, 323.0, 317.0, 322.0, 311.0, 325.0, 303.0, 327.0, 305.0, 322.0, 310.0, 302.0, 316.0, 317.0, 285.0, 285.0, 319.0, 317.0, 312.0, 318.0, 314.0, 322.0, 276.0, 294.0, 324.0, 309.0, 288.0, 291.0, 319.0, 320.0, 297.0, 299.0, 319.0, 317.0, 319.0, 320.0, 317.0, 316.0, 317.0, 316.0, 304.0, 280.0, 288.0, 294.0, 289.0, 290.0, 322.0, 314.0, 317.0, 322.0, 288.0, 299.0, 290.0, 289.0, 274.0, 284.0, 316.0, 311.0, 322.0, 317.0, 290.0, 283.0, 319.0, 311.0, 314.0, 313.0, 288.0, 293.0, 319.0, 320.0, 322.0, 317.0, 325.0, 314.0, 314.0, 322.0, 319.0, 317.0, 296.0, 271.0, 281.0, 295.0, 277.0, 296.0, 287.0, 283.0, 307.0, 314.0, 314.0, 322.0, 313.0, 317.0, 316.0, 314.0, 313.0, 317.0, 296.0, 286.0, 321.0, 309.0, 307.0, 323.0, 321.0, 309.0, 285.0, 294.0, 327.0, 309.0, 314.0, 319.0, 293.0, 300.0, 309.0, 321.0, 299.0, 288.0, 285.0, 290.0, 323.0, 310.0, 231.0, 239.0, 309.0, 324.0, 236.0, 229.0, 307.0, 326.0, 250.0, 248.0, 319.0, 314.0, 308.0, 319.0, 319.0, 317.0, 319.0, 320.0, 312.0, 315.0, 313.0, 317.0, 316.0, 314.0, 319.0, 317.0, 305.0, 322.0, 321.0, 315.0, 313.0, 323.0, 316.0, 311.0, 312.0, 318.0, 319.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9123094574385331, "mean_processing_ms": 0.25075636823694536, "mean_inference_ms": 1.49889087709384}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7824000, "num_steps_sampled": 4172800, "sample_time_ms": 21942.597, "load_time_ms": 36.578, "grad_time_ms": 9969.611, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004511403385549784, "policy_loss": -0.0029741593170911074, "vf_loss": 80.4991683959961, "vf_explained_var": 0.7677297592163086, "kl": 0.002298202132806182, "entropy": 1.1287130117416382, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4172800, "episodes_total": 10432, "training_iteration": 326, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-32-15", "timestamp": 1660257135, "time_this_iter_s": 31.342418909072876, "time_total_s": 15547.441402196884, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15547.441402196884, "timesteps_since_restore": 4172800, "iterations_since_restore": 326, "perf": {"cpu_util_percent": 35.46363636363637, "ram_util_percent": 58.74999999999999}}
+{"episode_reward_max": 644.0, "episode_reward_min": 354.0, "episode_reward_mean": 615.31, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 307.655}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.11, "shaped_reward_min": 114, "shaped_reward_max": 204, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.28, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.47, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.8, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.84, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.42, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.75, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.35, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.42, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.75, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.42, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.75, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 579.0, 627.0, 354.0, 644.0, 639.0, 636.0, 627.0, 630.0, 630.0, 630.0, 579.0, 627.0, 636.0, 636.0, 636.0, 633.0, 587.0, 636.0, 627.0, 636.0, 633.0, 630.0, 636.0, 587.0, 633.0, 636.0, 576.0, 590.0, 627.0, 636.0, 636.0, 621.0, 636.0, 630.0, 630.0, 630.0, 582.0, 630.0, 630.0, 630.0, 579.0, 636.0, 633.0, 593.0, 630.0, 587.0, 575.0, 633.0, 470.0, 633.0, 465.0, 633.0, 498.0, 633.0, 627.0, 636.0, 639.0, 627.0, 630.0, 630.0, 636.0, 627.0, 636.0, 636.0, 627.0, 630.0, 639.0, 636.0, 636.0, 630.0, 587.0, 639.0, 587.0, 633.0, 630.0, 639.0, 633.0, 582.0, 567.0, 630.0, 630.0, 633.0, 636.0, 576.0, 633.0, 587.0, 624.0, 636.0, 639.0, 639.0, 636.0, 630.0, 627.0, 612.0, 633.0, 570.0, 636.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 285.0, 297.0, 282.0, 316.0, 311.0, 179.0, 175.0, 330.0, 314.0, 317.0, 322.0, 317.0, 319.0, 311.0, 316.0, 318.0, 312.0, 316.0, 314.0, 319.0, 311.0, 293.0, 286.0, 308.0, 319.0, 316.0, 320.0, 330.0, 306.0, 317.0, 319.0, 319.0, 314.0, 299.0, 288.0, 319.0, 317.0, 318.0, 309.0, 324.0, 312.0, 319.0, 314.0, 314.0, 316.0, 324.0, 312.0, 304.0, 283.0, 321.0, 312.0, 319.0, 317.0, 279.0, 297.0, 294.0, 296.0, 318.0, 309.0, 321.0, 315.0, 316.0, 320.0, 307.0, 314.0, 314.0, 322.0, 313.0, 317.0, 316.0, 314.0, 313.0, 317.0, 296.0, 286.0, 321.0, 309.0, 307.0, 323.0, 321.0, 309.0, 285.0, 294.0, 327.0, 309.0, 314.0, 319.0, 293.0, 300.0, 309.0, 321.0, 299.0, 288.0, 285.0, 290.0, 323.0, 310.0, 231.0, 239.0, 309.0, 324.0, 236.0, 229.0, 307.0, 326.0, 250.0, 248.0, 319.0, 314.0, 308.0, 319.0, 319.0, 317.0, 319.0, 320.0, 312.0, 315.0, 313.0, 317.0, 316.0, 314.0, 319.0, 317.0, 305.0, 322.0, 321.0, 315.0, 313.0, 323.0, 316.0, 311.0, 312.0, 318.0, 319.0, 320.0, 324.0, 312.0, 322.0, 314.0, 310.0, 320.0, 286.0, 301.0, 319.0, 320.0, 290.0, 297.0, 314.0, 319.0, 311.0, 319.0, 321.0, 318.0, 314.0, 319.0, 289.0, 293.0, 291.0, 276.0, 311.0, 319.0, 318.0, 312.0, 319.0, 314.0, 314.0, 322.0, 288.0, 288.0, 317.0, 316.0, 298.0, 289.0, 316.0, 308.0, 319.0, 317.0, 316.0, 323.0, 317.0, 322.0, 311.0, 325.0, 303.0, 327.0, 305.0, 322.0, 310.0, 302.0, 316.0, 317.0, 285.0, 285.0, 319.0, 317.0, 312.0, 318.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9108943505628629, "mean_processing_ms": 0.25047292677413735, "mean_inference_ms": 1.4973202589318924}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7848000, "num_steps_sampled": 4185600, "sample_time_ms": 21889.076, "load_time_ms": 35.811, "grad_time_ms": 9803.849, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004534369800239801, "policy_loss": -0.0032402947545051575, "vf_loss": 83.42310333251953, "vf_explained_var": 0.7677843570709229, "kl": 0.0018213322618976235, "entropy": 1.135262131690979, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4185600, "episodes_total": 10464, "training_iteration": 327, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-32-44", "timestamp": 1660257164, "time_this_iter_s": 29.095314025878906, "time_total_s": 15576.536716222763, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15576.536716222763, "timesteps_since_restore": 4185600, "iterations_since_restore": 327, "perf": {"cpu_util_percent": 33.5219512195122, "ram_util_percent": 58.87073170731708}}
+{"episode_reward_max": 644.0, "episode_reward_min": 354.0, "episode_reward_mean": 615.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 307.57}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 189.14, "shaped_reward_min": 114, "shaped_reward_max": 204, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.23, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.35, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.78, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.42, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.52, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.75, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.25, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.14, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.52, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.75, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.52, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.75, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 630.0, 630.0, 636.0, 465.0, 639.0, 639.0, 587.0, 639.0, 633.0, 582.0, 587.0, 639.0, 633.0, 636.0, 636.0, 596.0, 587.0, 639.0, 630.0, 633.0, 639.0, 582.0, 636.0, 479.0, 587.0, 630.0, 639.0, 639.0, 516.0, 582.0, 633.0, 636.0, 627.0, 630.0, 639.0, 636.0, 636.0, 630.0, 587.0, 639.0, 587.0, 633.0, 630.0, 639.0, 633.0, 582.0, 567.0, 630.0, 630.0, 633.0, 636.0, 576.0, 633.0, 587.0, 624.0, 636.0, 639.0, 639.0, 636.0, 630.0, 627.0, 612.0, 633.0, 570.0, 636.0, 630.0, 636.0, 573.0, 579.0, 627.0, 354.0, 644.0, 639.0, 636.0, 627.0, 630.0, 630.0, 630.0, 579.0, 627.0, 636.0, 636.0, 636.0, 633.0, 587.0, 636.0, 627.0, 636.0, 633.0, 630.0, 636.0, 587.0, 633.0, 636.0, 576.0, 590.0, 627.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [303.0, 327.0, 312.0, 318.0, 317.0, 313.0, 314.0, 322.0, 234.0, 231.0, 322.0, 317.0, 322.0, 317.0, 288.0, 299.0, 317.0, 322.0, 307.0, 326.0, 290.0, 292.0, 295.0, 292.0, 311.0, 328.0, 306.0, 327.0, 322.0, 314.0, 314.0, 322.0, 294.0, 302.0, 289.0, 298.0, 321.0, 318.0, 313.0, 317.0, 310.0, 323.0, 316.0, 323.0, 291.0, 291.0, 316.0, 320.0, 234.0, 245.0, 293.0, 294.0, 316.0, 314.0, 327.0, 312.0, 324.0, 315.0, 265.0, 251.0, 288.0, 294.0, 314.0, 319.0, 313.0, 323.0, 316.0, 311.0, 312.0, 318.0, 319.0, 320.0, 324.0, 312.0, 322.0, 314.0, 310.0, 320.0, 286.0, 301.0, 319.0, 320.0, 290.0, 297.0, 314.0, 319.0, 311.0, 319.0, 321.0, 318.0, 314.0, 319.0, 289.0, 293.0, 291.0, 276.0, 311.0, 319.0, 318.0, 312.0, 319.0, 314.0, 314.0, 322.0, 288.0, 288.0, 317.0, 316.0, 298.0, 289.0, 316.0, 308.0, 319.0, 317.0, 316.0, 323.0, 317.0, 322.0, 311.0, 325.0, 303.0, 327.0, 305.0, 322.0, 310.0, 302.0, 316.0, 317.0, 285.0, 285.0, 319.0, 317.0, 312.0, 318.0, 314.0, 322.0, 288.0, 285.0, 297.0, 282.0, 316.0, 311.0, 179.0, 175.0, 330.0, 314.0, 317.0, 322.0, 317.0, 319.0, 311.0, 316.0, 318.0, 312.0, 316.0, 314.0, 319.0, 311.0, 293.0, 286.0, 308.0, 319.0, 316.0, 320.0, 330.0, 306.0, 317.0, 319.0, 319.0, 314.0, 299.0, 288.0, 319.0, 317.0, 318.0, 309.0, 324.0, 312.0, 319.0, 314.0, 314.0, 316.0, 324.0, 312.0, 304.0, 283.0, 321.0, 312.0, 319.0, 317.0, 279.0, 297.0, 294.0, 296.0, 318.0, 309.0, 321.0, 315.0, 316.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9094951926222233, "mean_processing_ms": 0.25019344496392953, "mean_inference_ms": 1.495850505323708}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7872000, "num_steps_sampled": 4198400, "sample_time_ms": 21863.329, "load_time_ms": 36.113, "grad_time_ms": 9649.438, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0027417896781116724, "policy_loss": -0.00593235669657588, "vf_loss": 92.42369842529297, "vf_explained_var": 0.7659929394721985, "kl": 0.0018632843857631087, "entropy": 1.1364426612854004, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4198400, "episodes_total": 10496, "training_iteration": 328, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-33-13", "timestamp": 1660257193, "time_this_iter_s": 29.17238187789917, "time_total_s": 15605.709098100662, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15605.709098100662, "timesteps_since_restore": 4198400, "iterations_since_restore": 328, "perf": {"cpu_util_percent": 35.3780487804878, "ram_util_percent": 58.91219512195122}}
+{"episode_reward_max": 644.0, "episode_reward_min": 354.0, "episode_reward_mean": 609.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 304.77}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.54, "shaped_reward_min": 114, "shaped_reward_max": 204, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.91, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.35, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.27, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.53, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.02, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 633.0, 582.0, 627.0, 582.0, 639.0, 582.0, 582.0, 630.0, 630.0, 633.0, 636.0, 630.0, 584.0, 530.0, 587.0, 636.0, 573.0, 630.0, 624.0, 627.0, 627.0, 630.0, 549.0, 633.0, 633.0, 482.0, 630.0, 579.0, 636.0, 579.0, 630.0, 570.0, 636.0, 630.0, 636.0, 573.0, 579.0, 627.0, 354.0, 644.0, 639.0, 636.0, 627.0, 630.0, 630.0, 630.0, 579.0, 627.0, 636.0, 636.0, 636.0, 633.0, 587.0, 636.0, 627.0, 636.0, 633.0, 630.0, 636.0, 587.0, 633.0, 636.0, 576.0, 590.0, 627.0, 636.0, 636.0, 630.0, 630.0, 630.0, 636.0, 465.0, 639.0, 639.0, 587.0, 639.0, 633.0, 582.0, 587.0, 639.0, 633.0, 636.0, 636.0, 596.0, 587.0, 639.0, 630.0, 633.0, 639.0, 582.0, 636.0, 479.0, 587.0, 630.0, 639.0, 639.0, 516.0, 582.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 299.0, 317.0, 316.0, 291.0, 291.0, 321.0, 306.0, 296.0, 286.0, 316.0, 323.0, 293.0, 289.0, 295.0, 287.0, 319.0, 311.0, 319.0, 311.0, 319.0, 314.0, 319.0, 317.0, 314.0, 316.0, 288.0, 296.0, 265.0, 265.0, 292.0, 295.0, 313.0, 323.0, 291.0, 282.0, 323.0, 307.0, 310.0, 314.0, 309.0, 318.0, 324.0, 303.0, 316.0, 314.0, 275.0, 274.0, 318.0, 315.0, 322.0, 311.0, 245.0, 237.0, 313.0, 317.0, 293.0, 286.0, 314.0, 322.0, 290.0, 289.0, 316.0, 314.0, 285.0, 285.0, 319.0, 317.0, 312.0, 318.0, 314.0, 322.0, 288.0, 285.0, 297.0, 282.0, 316.0, 311.0, 179.0, 175.0, 330.0, 314.0, 317.0, 322.0, 317.0, 319.0, 311.0, 316.0, 318.0, 312.0, 316.0, 314.0, 319.0, 311.0, 293.0, 286.0, 308.0, 319.0, 316.0, 320.0, 330.0, 306.0, 317.0, 319.0, 319.0, 314.0, 299.0, 288.0, 319.0, 317.0, 318.0, 309.0, 324.0, 312.0, 319.0, 314.0, 314.0, 316.0, 324.0, 312.0, 304.0, 283.0, 321.0, 312.0, 319.0, 317.0, 279.0, 297.0, 294.0, 296.0, 318.0, 309.0, 321.0, 315.0, 316.0, 320.0, 303.0, 327.0, 312.0, 318.0, 317.0, 313.0, 314.0, 322.0, 234.0, 231.0, 322.0, 317.0, 322.0, 317.0, 288.0, 299.0, 317.0, 322.0, 307.0, 326.0, 290.0, 292.0, 295.0, 292.0, 311.0, 328.0, 306.0, 327.0, 322.0, 314.0, 314.0, 322.0, 294.0, 302.0, 289.0, 298.0, 321.0, 318.0, 313.0, 317.0, 310.0, 323.0, 316.0, 323.0, 291.0, 291.0, 316.0, 320.0, 234.0, 245.0, 293.0, 294.0, 316.0, 314.0, 327.0, 312.0, 324.0, 315.0, 265.0, 251.0, 288.0, 294.0, 314.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.908108829511899, "mean_processing_ms": 0.24991601885712256, "mean_inference_ms": 1.4943968227370834}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7896000, "num_steps_sampled": 4211200, "sample_time_ms": 21816.04, "load_time_ms": 36.245, "grad_time_ms": 9396.983, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00012729612353723496, "policy_loss": -0.007717677857726812, "vf_loss": 81.6099853515625, "vf_explained_var": 0.7742553353309631, "kl": 0.0021955876145511866, "entropy": 1.1412299871444702, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4211200, "episodes_total": 10528, "training_iteration": 329, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-33-44", "timestamp": 1660257224, "time_this_iter_s": 30.190826892852783, "time_total_s": 15635.899924993515, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15635.899924993515, "timesteps_since_restore": 4211200, "iterations_since_restore": 329, "perf": {"cpu_util_percent": 32.737209302325574, "ram_util_percent": 59.3720930232558}}
+{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 611.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 305.805}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.01, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.06, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.46, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.61, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.35, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.35, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.35, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 633.0, 525.0, 630.0, 587.0, 630.0, 630.0, 615.0, 633.0, 630.0, 636.0, 627.0, 584.0, 633.0, 639.0, 582.0, 630.0, 587.0, 630.0, 630.0, 579.0, 630.0, 633.0, 621.0, 630.0, 636.0, 639.0, 636.0, 639.0, 582.0, 627.0, 630.0, 590.0, 627.0, 636.0, 636.0, 630.0, 630.0, 630.0, 636.0, 465.0, 639.0, 639.0, 587.0, 639.0, 633.0, 582.0, 587.0, 639.0, 633.0, 636.0, 636.0, 596.0, 587.0, 639.0, 630.0, 633.0, 639.0, 582.0, 636.0, 479.0, 587.0, 630.0, 639.0, 639.0, 516.0, 582.0, 633.0, 587.0, 633.0, 582.0, 627.0, 582.0, 639.0, 582.0, 582.0, 630.0, 630.0, 633.0, 636.0, 630.0, 584.0, 530.0, 587.0, 636.0, 573.0, 630.0, 624.0, 627.0, 627.0, 630.0, 549.0, 633.0, 633.0, 482.0, 630.0, 579.0, 636.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 323.0, 309.0, 324.0, 258.0, 267.0, 314.0, 316.0, 291.0, 296.0, 314.0, 316.0, 313.0, 317.0, 307.0, 308.0, 322.0, 311.0, 321.0, 309.0, 317.0, 319.0, 310.0, 317.0, 285.0, 299.0, 319.0, 314.0, 322.0, 317.0, 294.0, 288.0, 321.0, 309.0, 286.0, 301.0, 319.0, 311.0, 314.0, 316.0, 291.0, 288.0, 319.0, 311.0, 316.0, 317.0, 311.0, 310.0, 316.0, 314.0, 319.0, 317.0, 324.0, 315.0, 319.0, 317.0, 319.0, 320.0, 278.0, 304.0, 316.0, 311.0, 316.0, 314.0, 294.0, 296.0, 318.0, 309.0, 321.0, 315.0, 316.0, 320.0, 303.0, 327.0, 312.0, 318.0, 317.0, 313.0, 314.0, 322.0, 234.0, 231.0, 322.0, 317.0, 322.0, 317.0, 288.0, 299.0, 317.0, 322.0, 307.0, 326.0, 290.0, 292.0, 295.0, 292.0, 311.0, 328.0, 306.0, 327.0, 322.0, 314.0, 314.0, 322.0, 294.0, 302.0, 289.0, 298.0, 321.0, 318.0, 313.0, 317.0, 310.0, 323.0, 316.0, 323.0, 291.0, 291.0, 316.0, 320.0, 234.0, 245.0, 293.0, 294.0, 316.0, 314.0, 327.0, 312.0, 324.0, 315.0, 265.0, 251.0, 288.0, 294.0, 314.0, 319.0, 288.0, 299.0, 317.0, 316.0, 291.0, 291.0, 321.0, 306.0, 296.0, 286.0, 316.0, 323.0, 293.0, 289.0, 295.0, 287.0, 319.0, 311.0, 319.0, 311.0, 319.0, 314.0, 319.0, 317.0, 314.0, 316.0, 288.0, 296.0, 265.0, 265.0, 292.0, 295.0, 313.0, 323.0, 291.0, 282.0, 323.0, 307.0, 310.0, 314.0, 309.0, 318.0, 324.0, 303.0, 316.0, 314.0, 275.0, 274.0, 318.0, 315.0, 322.0, 311.0, 245.0, 237.0, 313.0, 317.0, 293.0, 286.0, 314.0, 322.0, 290.0, 289.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9067267612642047, "mean_processing_ms": 0.2496377464704088, "mean_inference_ms": 1.4929051187388651}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7920000, "num_steps_sampled": 4224000, "sample_time_ms": 21531.777, "load_time_ms": 36.36, "grad_time_ms": 9360.92, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0021365683060139418, "policy_loss": -0.005692864302545786, "vf_loss": 83.99735260009766, "vf_explained_var": 0.7644996643066406, "kl": 0.0020622028969228268, "entropy": 1.140602469444275, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4224000, "episodes_total": 10560, "training_iteration": 330, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-34-13", "timestamp": 1660257253, "time_this_iter_s": 29.480799913406372, "time_total_s": 15665.380724906921, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15665.380724906921, "timesteps_since_restore": 4224000, "iterations_since_restore": 330, "perf": {"cpu_util_percent": 35.38095238095239, "ram_util_percent": 58.940476190476204}}
+{"episode_reward_max": 639.0, "episode_reward_min": 482.0, "episode_reward_mean": 614.36, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 307.18}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.96, "shaped_reward_min": 149, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.19, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.43, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.72, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.79, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.46, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.36, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 567.0, 587.0, 639.0, 627.0, 627.0, 639.0, 636.0, 636.0, 630.0, 636.0, 636.0, 627.0, 636.0, 584.0, 630.0, 627.0, 630.0, 633.0, 630.0, 579.0, 633.0, 633.0, 627.0, 639.0, 633.0, 633.0, 639.0, 633.0, 576.0, 636.0, 582.0, 639.0, 516.0, 582.0, 633.0, 587.0, 633.0, 582.0, 627.0, 582.0, 639.0, 582.0, 582.0, 630.0, 630.0, 633.0, 636.0, 630.0, 584.0, 530.0, 587.0, 636.0, 573.0, 630.0, 624.0, 627.0, 627.0, 630.0, 549.0, 633.0, 633.0, 482.0, 630.0, 579.0, 636.0, 579.0, 630.0, 639.0, 633.0, 525.0, 630.0, 587.0, 630.0, 630.0, 615.0, 633.0, 630.0, 636.0, 627.0, 584.0, 633.0, 639.0, 582.0, 630.0, 587.0, 630.0, 630.0, 579.0, 630.0, 633.0, 621.0, 630.0, 636.0, 639.0, 636.0, 639.0, 582.0, 627.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 285.0, 285.0, 282.0, 289.0, 298.0, 317.0, 322.0, 315.0, 312.0, 321.0, 306.0, 314.0, 325.0, 314.0, 322.0, 308.0, 328.0, 319.0, 311.0, 322.0, 314.0, 319.0, 317.0, 308.0, 319.0, 316.0, 320.0, 295.0, 289.0, 319.0, 311.0, 327.0, 300.0, 318.0, 312.0, 311.0, 322.0, 321.0, 309.0, 280.0, 299.0, 310.0, 323.0, 312.0, 321.0, 308.0, 319.0, 318.0, 321.0, 315.0, 318.0, 321.0, 312.0, 318.0, 321.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 290.0, 292.0, 324.0, 315.0, 265.0, 251.0, 288.0, 294.0, 314.0, 319.0, 288.0, 299.0, 317.0, 316.0, 291.0, 291.0, 321.0, 306.0, 296.0, 286.0, 316.0, 323.0, 293.0, 289.0, 295.0, 287.0, 319.0, 311.0, 319.0, 311.0, 319.0, 314.0, 319.0, 317.0, 314.0, 316.0, 288.0, 296.0, 265.0, 265.0, 292.0, 295.0, 313.0, 323.0, 291.0, 282.0, 323.0, 307.0, 310.0, 314.0, 309.0, 318.0, 324.0, 303.0, 316.0, 314.0, 275.0, 274.0, 318.0, 315.0, 322.0, 311.0, 245.0, 237.0, 313.0, 317.0, 293.0, 286.0, 314.0, 322.0, 290.0, 289.0, 316.0, 314.0, 316.0, 323.0, 309.0, 324.0, 258.0, 267.0, 314.0, 316.0, 291.0, 296.0, 314.0, 316.0, 313.0, 317.0, 307.0, 308.0, 322.0, 311.0, 321.0, 309.0, 317.0, 319.0, 310.0, 317.0, 285.0, 299.0, 319.0, 314.0, 322.0, 317.0, 294.0, 288.0, 321.0, 309.0, 286.0, 301.0, 319.0, 311.0, 314.0, 316.0, 291.0, 288.0, 319.0, 311.0, 316.0, 317.0, 311.0, 310.0, 316.0, 314.0, 319.0, 317.0, 324.0, 315.0, 319.0, 317.0, 319.0, 320.0, 278.0, 304.0, 316.0, 311.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9053530084533031, "mean_processing_ms": 0.24936183583486593, "mean_inference_ms": 1.491518263590993}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7944000, "num_steps_sampled": 4236800, "sample_time_ms": 21154.165, "load_time_ms": 36.748, "grad_time_ms": 9274.795, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -8.082172280410305e-05, "policy_loss": -0.00758820166811347, "vf_loss": 80.76020050048828, "vf_explained_var": 0.765857994556427, "kl": 0.001791672664694488, "entropy": 1.137281060218811, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4236800, "episodes_total": 10592, "training_iteration": 331, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-34-44", "timestamp": 1660257284, "time_this_iter_s": 31.342971086502075, "time_total_s": 15696.723695993423, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15696.723695993423, "timesteps_since_restore": 4236800, "iterations_since_restore": 331, "perf": {"cpu_util_percent": 33.804545454545455, "ram_util_percent": 58.92272727272726}}
+{"episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 616.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 308.005}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.81, "shaped_reward_min": 165, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.56, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.07, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 17.05, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.34, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.74, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.52, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.03, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.9, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.27, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.42, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.33, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.74, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.52, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.74, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.52, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 573.0, 633.0, 567.0, 630.0, 576.0, 639.0, 587.0, 630.0, 582.0, 636.0, 639.0, 633.0, 630.0, 636.0, 579.0, 576.0, 627.0, 587.0, 587.0, 579.0, 582.0, 636.0, 636.0, 579.0, 639.0, 587.0, 633.0, 630.0, 582.0, 633.0, 587.0, 579.0, 636.0, 579.0, 630.0, 639.0, 633.0, 525.0, 630.0, 587.0, 630.0, 630.0, 615.0, 633.0, 630.0, 636.0, 627.0, 584.0, 633.0, 639.0, 582.0, 630.0, 587.0, 630.0, 630.0, 579.0, 630.0, 633.0, 621.0, 630.0, 636.0, 639.0, 636.0, 639.0, 582.0, 627.0, 630.0, 582.0, 567.0, 587.0, 639.0, 627.0, 627.0, 639.0, 636.0, 636.0, 630.0, 636.0, 636.0, 627.0, 636.0, 584.0, 630.0, 627.0, 630.0, 633.0, 630.0, 579.0, 633.0, 633.0, 627.0, 639.0, 633.0, 633.0, 639.0, 633.0, 576.0, 636.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 287.0, 286.0, 316.0, 317.0, 280.0, 287.0, 313.0, 317.0, 281.0, 295.0, 319.0, 320.0, 295.0, 292.0, 311.0, 319.0, 284.0, 298.0, 320.0, 316.0, 316.0, 323.0, 317.0, 316.0, 319.0, 311.0, 324.0, 312.0, 281.0, 298.0, 285.0, 291.0, 311.0, 316.0, 288.0, 299.0, 297.0, 290.0, 288.0, 291.0, 291.0, 291.0, 319.0, 317.0, 314.0, 322.0, 280.0, 299.0, 317.0, 322.0, 298.0, 289.0, 314.0, 319.0, 316.0, 314.0, 291.0, 291.0, 312.0, 321.0, 298.0, 289.0, 293.0, 286.0, 314.0, 322.0, 290.0, 289.0, 316.0, 314.0, 316.0, 323.0, 309.0, 324.0, 258.0, 267.0, 314.0, 316.0, 291.0, 296.0, 314.0, 316.0, 313.0, 317.0, 307.0, 308.0, 322.0, 311.0, 321.0, 309.0, 317.0, 319.0, 310.0, 317.0, 285.0, 299.0, 319.0, 314.0, 322.0, 317.0, 294.0, 288.0, 321.0, 309.0, 286.0, 301.0, 319.0, 311.0, 314.0, 316.0, 291.0, 288.0, 319.0, 311.0, 316.0, 317.0, 311.0, 310.0, 316.0, 314.0, 319.0, 317.0, 324.0, 315.0, 319.0, 317.0, 319.0, 320.0, 278.0, 304.0, 316.0, 311.0, 316.0, 314.0, 297.0, 285.0, 285.0, 282.0, 289.0, 298.0, 317.0, 322.0, 315.0, 312.0, 321.0, 306.0, 314.0, 325.0, 314.0, 322.0, 308.0, 328.0, 319.0, 311.0, 322.0, 314.0, 319.0, 317.0, 308.0, 319.0, 316.0, 320.0, 295.0, 289.0, 319.0, 311.0, 327.0, 300.0, 318.0, 312.0, 311.0, 322.0, 321.0, 309.0, 280.0, 299.0, 310.0, 323.0, 312.0, 321.0, 308.0, 319.0, 318.0, 321.0, 315.0, 318.0, 321.0, 312.0, 318.0, 321.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 290.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9039942028756937, "mean_processing_ms": 0.24909087688920636, "mean_inference_ms": 1.490273663500262}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7968000, "num_steps_sampled": 4249600, "sample_time_ms": 21096.339, "load_time_ms": 36.622, "grad_time_ms": 9224.002, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002680680714547634, "policy_loss": -0.005275225732475519, "vf_loss": 85.20003509521484, "vf_explained_var": 0.7707304954528809, "kl": 0.00193729845341295, "entropy": 1.128203272819519, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4249600, "episodes_total": 10624, "training_iteration": 332, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-35-18", "timestamp": 1660257318, "time_this_iter_s": 33.216859102249146, "time_total_s": 15729.940555095673, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15729.940555095673, "timesteps_since_restore": 4249600, "iterations_since_restore": 332, "perf": {"cpu_util_percent": 34.92765957446809, "ram_util_percent": 58.93617021276598}}
+{"episode_reward_max": 639.0, "episode_reward_min": 354.0, "episode_reward_mean": 613.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 306.97}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.74, "shaped_reward_min": 114, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.39, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.05, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.95, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.29, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.46, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.6, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.63, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.89, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.26, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.21, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.6, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.63, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.6, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.63, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 354.0, 587.0, 636.0, 579.0, 636.0, 639.0, 587.0, 468.0, 636.0, 624.0, 630.0, 579.0, 630.0, 584.0, 633.0, 636.0, 636.0, 630.0, 639.0, 639.0, 639.0, 636.0, 639.0, 639.0, 633.0, 636.0, 630.0, 582.0, 636.0, 630.0, 633.0, 639.0, 582.0, 627.0, 630.0, 582.0, 567.0, 587.0, 639.0, 627.0, 627.0, 639.0, 636.0, 636.0, 630.0, 636.0, 636.0, 627.0, 636.0, 584.0, 630.0, 627.0, 630.0, 633.0, 630.0, 579.0, 633.0, 633.0, 627.0, 639.0, 633.0, 633.0, 639.0, 633.0, 576.0, 636.0, 582.0, 633.0, 573.0, 633.0, 567.0, 630.0, 576.0, 639.0, 587.0, 630.0, 582.0, 636.0, 639.0, 633.0, 630.0, 636.0, 579.0, 576.0, 627.0, 587.0, 587.0, 579.0, 582.0, 636.0, 636.0, 579.0, 639.0, 587.0, 633.0, 630.0, 582.0, 633.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 314.0, 174.0, 180.0, 288.0, 299.0, 316.0, 320.0, 291.0, 288.0, 326.0, 310.0, 322.0, 317.0, 290.0, 297.0, 225.0, 243.0, 314.0, 322.0, 321.0, 303.0, 311.0, 319.0, 293.0, 286.0, 317.0, 313.0, 301.0, 283.0, 319.0, 314.0, 314.0, 322.0, 324.0, 312.0, 316.0, 314.0, 319.0, 320.0, 317.0, 322.0, 312.0, 327.0, 319.0, 317.0, 320.0, 319.0, 322.0, 317.0, 314.0, 319.0, 314.0, 322.0, 313.0, 317.0, 290.0, 292.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0, 319.0, 320.0, 278.0, 304.0, 316.0, 311.0, 316.0, 314.0, 297.0, 285.0, 285.0, 282.0, 289.0, 298.0, 317.0, 322.0, 315.0, 312.0, 321.0, 306.0, 314.0, 325.0, 314.0, 322.0, 308.0, 328.0, 319.0, 311.0, 322.0, 314.0, 319.0, 317.0, 308.0, 319.0, 316.0, 320.0, 295.0, 289.0, 319.0, 311.0, 327.0, 300.0, 318.0, 312.0, 311.0, 322.0, 321.0, 309.0, 280.0, 299.0, 310.0, 323.0, 312.0, 321.0, 308.0, 319.0, 318.0, 321.0, 315.0, 318.0, 321.0, 312.0, 318.0, 321.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 290.0, 292.0, 319.0, 314.0, 287.0, 286.0, 316.0, 317.0, 280.0, 287.0, 313.0, 317.0, 281.0, 295.0, 319.0, 320.0, 295.0, 292.0, 311.0, 319.0, 284.0, 298.0, 320.0, 316.0, 316.0, 323.0, 317.0, 316.0, 319.0, 311.0, 324.0, 312.0, 281.0, 298.0, 285.0, 291.0, 311.0, 316.0, 288.0, 299.0, 297.0, 290.0, 288.0, 291.0, 291.0, 291.0, 319.0, 317.0, 314.0, 322.0, 280.0, 299.0, 317.0, 322.0, 298.0, 289.0, 314.0, 319.0, 316.0, 314.0, 291.0, 291.0, 312.0, 321.0, 298.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9026482821594658, "mean_processing_ms": 0.24882351646773487, "mean_inference_ms": 1.4891609969323358}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7992000, "num_steps_sampled": 4262400, "sample_time_ms": 21161.346, "load_time_ms": 36.685, "grad_time_ms": 9083.544, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006233640480786562, "policy_loss": -0.0020334760192781687, "vf_loss": 88.32830047607422, "vf_explained_var": 0.7596514821052551, "kl": 0.0019263379508629441, "entropy": 1.1314295530319214, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4262400, "episodes_total": 10656, "training_iteration": 333, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-35-49", "timestamp": 1660257349, "time_this_iter_s": 30.911512851715088, "time_total_s": 15760.852067947388, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15760.852067947388, "timesteps_since_restore": 4262400, "iterations_since_restore": 333, "perf": {"cpu_util_percent": 33.54318181818183, "ram_util_percent": 59.031818181818196}}
+{"episode_reward_max": 639.0, "episode_reward_min": 354.0, "episode_reward_mean": 609.4, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 304.7}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.4, "shaped_reward_min": 114, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.43, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.91, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.98, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.13, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.66, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.4, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.89, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.94, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.27, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.38, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.29, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.66, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.4, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.66, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.4, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 582.0, 633.0, 582.0, 365.0, 627.0, 633.0, 639.0, 639.0, 630.0, 630.0, 633.0, 630.0, 579.0, 630.0, 576.0, 579.0, 633.0, 582.0, 633.0, 633.0, 636.0, 630.0, 587.0, 618.0, 633.0, 636.0, 627.0, 561.0, 639.0, 624.0, 587.0, 633.0, 576.0, 636.0, 582.0, 633.0, 573.0, 633.0, 567.0, 630.0, 576.0, 639.0, 587.0, 630.0, 582.0, 636.0, 639.0, 633.0, 630.0, 636.0, 579.0, 576.0, 627.0, 587.0, 587.0, 579.0, 582.0, 636.0, 636.0, 579.0, 639.0, 587.0, 633.0, 630.0, 582.0, 633.0, 587.0, 636.0, 354.0, 587.0, 636.0, 579.0, 636.0, 639.0, 587.0, 468.0, 636.0, 624.0, 630.0, 579.0, 630.0, 584.0, 633.0, 636.0, 636.0, 630.0, 639.0, 639.0, 639.0, 636.0, 639.0, 639.0, 633.0, 636.0, 630.0, 582.0, 636.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 286.0, 296.0, 313.0, 320.0, 288.0, 294.0, 186.0, 179.0, 308.0, 319.0, 310.0, 323.0, 317.0, 322.0, 319.0, 320.0, 314.0, 316.0, 319.0, 311.0, 314.0, 319.0, 311.0, 319.0, 290.0, 289.0, 311.0, 319.0, 302.0, 274.0, 285.0, 294.0, 319.0, 314.0, 283.0, 299.0, 314.0, 319.0, 316.0, 317.0, 319.0, 317.0, 311.0, 319.0, 286.0, 301.0, 313.0, 305.0, 317.0, 316.0, 325.0, 311.0, 308.0, 319.0, 287.0, 274.0, 322.0, 317.0, 300.0, 324.0, 296.0, 291.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 290.0, 292.0, 319.0, 314.0, 287.0, 286.0, 316.0, 317.0, 280.0, 287.0, 313.0, 317.0, 281.0, 295.0, 319.0, 320.0, 295.0, 292.0, 311.0, 319.0, 284.0, 298.0, 320.0, 316.0, 316.0, 323.0, 317.0, 316.0, 319.0, 311.0, 324.0, 312.0, 281.0, 298.0, 285.0, 291.0, 311.0, 316.0, 288.0, 299.0, 297.0, 290.0, 288.0, 291.0, 291.0, 291.0, 319.0, 317.0, 314.0, 322.0, 280.0, 299.0, 317.0, 322.0, 298.0, 289.0, 314.0, 319.0, 316.0, 314.0, 291.0, 291.0, 312.0, 321.0, 298.0, 289.0, 322.0, 314.0, 174.0, 180.0, 288.0, 299.0, 316.0, 320.0, 291.0, 288.0, 326.0, 310.0, 322.0, 317.0, 290.0, 297.0, 225.0, 243.0, 314.0, 322.0, 321.0, 303.0, 311.0, 319.0, 293.0, 286.0, 317.0, 313.0, 301.0, 283.0, 319.0, 314.0, 314.0, 322.0, 324.0, 312.0, 316.0, 314.0, 319.0, 320.0, 317.0, 322.0, 312.0, 327.0, 319.0, 317.0, 320.0, 319.0, 322.0, 317.0, 314.0, 319.0, 314.0, 322.0, 313.0, 317.0, 290.0, 292.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9013131460094013, "mean_processing_ms": 0.24855772824532033, "mean_inference_ms": 1.488151121252854}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8016000, "num_steps_sampled": 4275200, "sample_time_ms": 21421.637, "load_time_ms": 36.594, "grad_time_ms": 9205.096, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013225991278886795, "policy_loss": -0.00687911594286561, "vf_loss": 87.6913833618164, "vf_explained_var": 0.7619670033454895, "kl": 0.0022256800439208746, "entropy": 1.1348274946212769, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4275200, "episodes_total": 10688, "training_iteration": 334, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-36-22", "timestamp": 1660257382, "time_this_iter_s": 33.877387046813965, "time_total_s": 15794.729454994202, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15794.729454994202, "timesteps_since_restore": 4275200, "iterations_since_restore": 334, "perf": {"cpu_util_percent": 34.24791666666667, "ram_util_percent": 59.02708333333334}}
+{"episode_reward_max": 639.0, "episode_reward_min": 354.0, "episode_reward_mean": 608.6, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 304.3}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 186.6, "shaped_reward_min": 114, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.21, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.16, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.75, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.44, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.5, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.44, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.5, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.44, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.5, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 627.0, 582.0, 636.0, 627.0, 633.0, 518.0, 587.0, 633.0, 582.0, 582.0, 582.0, 558.0, 561.0, 633.0, 582.0, 579.0, 579.0, 633.0, 639.0, 633.0, 630.0, 624.0, 633.0, 579.0, 636.0, 564.0, 627.0, 630.0, 639.0, 633.0, 584.0, 630.0, 582.0, 633.0, 587.0, 636.0, 354.0, 587.0, 636.0, 579.0, 636.0, 639.0, 587.0, 468.0, 636.0, 624.0, 630.0, 579.0, 630.0, 584.0, 633.0, 636.0, 636.0, 630.0, 639.0, 639.0, 639.0, 636.0, 639.0, 639.0, 633.0, 636.0, 630.0, 582.0, 636.0, 630.0, 633.0, 633.0, 582.0, 633.0, 582.0, 365.0, 627.0, 633.0, 639.0, 639.0, 630.0, 630.0, 633.0, 630.0, 579.0, 630.0, 576.0, 579.0, 633.0, 582.0, 633.0, 633.0, 636.0, 630.0, 587.0, 618.0, 633.0, 636.0, 627.0, 561.0, 639.0, 624.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 308.0, 319.0, 293.0, 289.0, 317.0, 319.0, 316.0, 311.0, 319.0, 314.0, 266.0, 252.0, 299.0, 288.0, 321.0, 312.0, 295.0, 287.0, 279.0, 303.0, 285.0, 297.0, 287.0, 271.0, 284.0, 277.0, 319.0, 314.0, 301.0, 281.0, 294.0, 285.0, 290.0, 289.0, 318.0, 315.0, 314.0, 325.0, 314.0, 319.0, 319.0, 311.0, 305.0, 319.0, 316.0, 317.0, 291.0, 288.0, 320.0, 316.0, 277.0, 287.0, 306.0, 321.0, 317.0, 313.0, 314.0, 325.0, 317.0, 316.0, 287.0, 297.0, 316.0, 314.0, 291.0, 291.0, 312.0, 321.0, 298.0, 289.0, 322.0, 314.0, 174.0, 180.0, 288.0, 299.0, 316.0, 320.0, 291.0, 288.0, 326.0, 310.0, 322.0, 317.0, 290.0, 297.0, 225.0, 243.0, 314.0, 322.0, 321.0, 303.0, 311.0, 319.0, 293.0, 286.0, 317.0, 313.0, 301.0, 283.0, 319.0, 314.0, 314.0, 322.0, 324.0, 312.0, 316.0, 314.0, 319.0, 320.0, 317.0, 322.0, 312.0, 327.0, 319.0, 317.0, 320.0, 319.0, 322.0, 317.0, 314.0, 319.0, 314.0, 322.0, 313.0, 317.0, 290.0, 292.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0, 314.0, 319.0, 286.0, 296.0, 313.0, 320.0, 288.0, 294.0, 186.0, 179.0, 308.0, 319.0, 310.0, 323.0, 317.0, 322.0, 319.0, 320.0, 314.0, 316.0, 319.0, 311.0, 314.0, 319.0, 311.0, 319.0, 290.0, 289.0, 311.0, 319.0, 302.0, 274.0, 285.0, 294.0, 319.0, 314.0, 283.0, 299.0, 314.0, 319.0, 316.0, 317.0, 319.0, 317.0, 311.0, 319.0, 286.0, 301.0, 313.0, 305.0, 317.0, 316.0, 325.0, 311.0, 308.0, 319.0, 287.0, 274.0, 322.0, 317.0, 300.0, 324.0, 296.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8999851577327678, "mean_processing_ms": 0.24829403649185813, "mean_inference_ms": 1.4872182879909173}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8040000, "num_steps_sampled": 4288000, "sample_time_ms": 21915.303, "load_time_ms": 37.24, "grad_time_ms": 9393.89, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0024422004353255033, "policy_loss": -0.005287020932883024, "vf_loss": 82.97665405273438, "vf_explained_var": 0.7636620402336121, "kl": 0.001807666034437716, "entropy": 1.136885643005371, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4288000, "episodes_total": 10720, "training_iteration": 335, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-36-58", "timestamp": 1660257418, "time_this_iter_s": 35.08472490310669, "time_total_s": 15829.814179897308, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15829.814179897308, "timesteps_since_restore": 4288000, "iterations_since_restore": 335, "perf": {"cpu_util_percent": 30.30408163265306, "ram_util_percent": 58.94081632653061}}
+{"episode_reward_max": 639.0, "episode_reward_min": 365.0, "episode_reward_mean": 608.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 179.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 304.165}, "custom_metrics": {"sparse_reward_mean": 211.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 185.93, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.14, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.68, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.45, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.81, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.36, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.45, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.45, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 465.0, 636.0, 633.0, 582.0, 579.0, 582.0, 633.0, 516.0, 582.0, 587.0, 630.0, 630.0, 633.0, 630.0, 624.0, 633.0, 630.0, 627.0, 630.0, 576.0, 582.0, 630.0, 639.0, 639.0, 633.0, 576.0, 633.0, 633.0, 576.0, 633.0, 633.0, 582.0, 636.0, 630.0, 633.0, 633.0, 582.0, 633.0, 582.0, 365.0, 627.0, 633.0, 639.0, 639.0, 630.0, 630.0, 633.0, 630.0, 579.0, 630.0, 576.0, 579.0, 633.0, 582.0, 633.0, 633.0, 636.0, 630.0, 587.0, 618.0, 633.0, 636.0, 627.0, 561.0, 639.0, 624.0, 587.0, 633.0, 627.0, 582.0, 636.0, 627.0, 633.0, 518.0, 587.0, 633.0, 582.0, 582.0, 582.0, 558.0, 561.0, 633.0, 582.0, 579.0, 579.0, 633.0, 639.0, 633.0, 630.0, 624.0, 633.0, 579.0, 636.0, 564.0, 627.0, 630.0, 639.0, 633.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [318.0, 312.0, 221.0, 244.0, 316.0, 320.0, 319.0, 314.0, 290.0, 292.0, 288.0, 291.0, 283.0, 299.0, 314.0, 319.0, 256.0, 260.0, 283.0, 299.0, 291.0, 296.0, 317.0, 313.0, 311.0, 319.0, 314.0, 319.0, 326.0, 304.0, 308.0, 316.0, 321.0, 312.0, 321.0, 309.0, 314.0, 313.0, 317.0, 313.0, 288.0, 288.0, 286.0, 296.0, 318.0, 312.0, 322.0, 317.0, 327.0, 312.0, 316.0, 317.0, 291.0, 285.0, 319.0, 314.0, 318.0, 315.0, 288.0, 288.0, 324.0, 309.0, 322.0, 311.0, 290.0, 292.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0, 314.0, 319.0, 286.0, 296.0, 313.0, 320.0, 288.0, 294.0, 186.0, 179.0, 308.0, 319.0, 310.0, 323.0, 317.0, 322.0, 319.0, 320.0, 314.0, 316.0, 319.0, 311.0, 314.0, 319.0, 311.0, 319.0, 290.0, 289.0, 311.0, 319.0, 302.0, 274.0, 285.0, 294.0, 319.0, 314.0, 283.0, 299.0, 314.0, 319.0, 316.0, 317.0, 319.0, 317.0, 311.0, 319.0, 286.0, 301.0, 313.0, 305.0, 317.0, 316.0, 325.0, 311.0, 308.0, 319.0, 287.0, 274.0, 322.0, 317.0, 300.0, 324.0, 296.0, 291.0, 316.0, 317.0, 308.0, 319.0, 293.0, 289.0, 317.0, 319.0, 316.0, 311.0, 319.0, 314.0, 266.0, 252.0, 299.0, 288.0, 321.0, 312.0, 295.0, 287.0, 279.0, 303.0, 285.0, 297.0, 287.0, 271.0, 284.0, 277.0, 319.0, 314.0, 301.0, 281.0, 294.0, 285.0, 290.0, 289.0, 318.0, 315.0, 314.0, 325.0, 314.0, 319.0, 319.0, 311.0, 305.0, 319.0, 316.0, 317.0, 291.0, 288.0, 320.0, 316.0, 277.0, 287.0, 306.0, 321.0, 317.0, 313.0, 314.0, 325.0, 317.0, 316.0, 287.0, 297.0]}, "sampler_perf": {"mean_env_wait_ms": 0.898665270354117, "mean_processing_ms": 0.24803277399210055, "mean_inference_ms": 1.4863570559836363}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8064000, "num_steps_sampled": 4300800, "sample_time_ms": 22027.266, "load_time_ms": 37.353, "grad_time_ms": 9612.106, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008330469136126339, "policy_loss": -0.006751233246177435, "vf_loss": 81.52507781982422, "vf_explained_var": 0.7658050656318665, "kl": 0.001944715972058475, "entropy": 1.1364573240280151, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4300800, "episodes_total": 10752, "training_iteration": 336, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-37-32", "timestamp": 1660257452, "time_this_iter_s": 34.64702320098877, "time_total_s": 15864.461203098297, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15864.461203098297, "timesteps_since_restore": 4300800, "iterations_since_restore": 336, "perf": {"cpu_util_percent": 29.189795918367345, "ram_util_percent": 58.936734693877554}}
+{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 608.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 221.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 304.205}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.41, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.94, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.46, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.76, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.31, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 636.0, 524.0, 630.0, 636.0, 636.0, 633.0, 639.0, 584.0, 636.0, 587.0, 636.0, 587.0, 639.0, 636.0, 587.0, 639.0, 627.0, 630.0, 636.0, 582.0, 590.0, 573.0, 636.0, 587.0, 633.0, 633.0, 578.0, 630.0, 573.0, 573.0, 624.0, 561.0, 639.0, 624.0, 587.0, 633.0, 627.0, 582.0, 636.0, 627.0, 633.0, 518.0, 587.0, 633.0, 582.0, 582.0, 582.0, 558.0, 561.0, 633.0, 582.0, 579.0, 579.0, 633.0, 639.0, 633.0, 630.0, 624.0, 633.0, 579.0, 636.0, 564.0, 627.0, 630.0, 639.0, 633.0, 584.0, 630.0, 465.0, 636.0, 633.0, 582.0, 579.0, 582.0, 633.0, 516.0, 582.0, 587.0, 630.0, 630.0, 633.0, 630.0, 624.0, 633.0, 630.0, 627.0, 630.0, 576.0, 582.0, 630.0, 639.0, 639.0, 633.0, 576.0, 633.0, 633.0, 576.0, 633.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [301.0, 286.0, 314.0, 322.0, 264.0, 260.0, 316.0, 314.0, 319.0, 317.0, 314.0, 322.0, 316.0, 317.0, 319.0, 320.0, 288.0, 296.0, 319.0, 317.0, 306.0, 281.0, 316.0, 320.0, 299.0, 288.0, 325.0, 314.0, 317.0, 319.0, 293.0, 294.0, 319.0, 320.0, 318.0, 309.0, 316.0, 314.0, 319.0, 317.0, 296.0, 286.0, 296.0, 294.0, 293.0, 280.0, 319.0, 317.0, 283.0, 304.0, 309.0, 324.0, 311.0, 322.0, 282.0, 296.0, 316.0, 314.0, 280.0, 293.0, 285.0, 288.0, 312.0, 312.0, 287.0, 274.0, 322.0, 317.0, 300.0, 324.0, 296.0, 291.0, 316.0, 317.0, 308.0, 319.0, 293.0, 289.0, 317.0, 319.0, 316.0, 311.0, 319.0, 314.0, 266.0, 252.0, 299.0, 288.0, 321.0, 312.0, 295.0, 287.0, 279.0, 303.0, 285.0, 297.0, 287.0, 271.0, 284.0, 277.0, 319.0, 314.0, 301.0, 281.0, 294.0, 285.0, 290.0, 289.0, 318.0, 315.0, 314.0, 325.0, 314.0, 319.0, 319.0, 311.0, 305.0, 319.0, 316.0, 317.0, 291.0, 288.0, 320.0, 316.0, 277.0, 287.0, 306.0, 321.0, 317.0, 313.0, 314.0, 325.0, 317.0, 316.0, 287.0, 297.0, 318.0, 312.0, 221.0, 244.0, 316.0, 320.0, 319.0, 314.0, 290.0, 292.0, 288.0, 291.0, 283.0, 299.0, 314.0, 319.0, 256.0, 260.0, 283.0, 299.0, 291.0, 296.0, 317.0, 313.0, 311.0, 319.0, 314.0, 319.0, 326.0, 304.0, 308.0, 316.0, 321.0, 312.0, 321.0, 309.0, 314.0, 313.0, 317.0, 313.0, 288.0, 288.0, 286.0, 296.0, 318.0, 312.0, 322.0, 317.0, 327.0, 312.0, 316.0, 317.0, 291.0, 285.0, 319.0, 314.0, 318.0, 315.0, 288.0, 288.0, 324.0, 309.0, 322.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8973618509446918, "mean_processing_ms": 0.2477754616951261, "mean_inference_ms": 1.4857111837613974}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8088000, "num_steps_sampled": 4313600, "sample_time_ms": 22572.125, "load_time_ms": 37.728, "grad_time_ms": 9903.43, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0030143249314278364, "policy_loss": -0.004624274093657732, "vf_loss": 82.12947845458984, "vf_explained_var": 0.7718231081962585, "kl": 0.0020513928029686213, "entropy": 1.1487096548080444, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4313600, "episodes_total": 10784, "training_iteration": 337, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-38-10", "timestamp": 1660257490, "time_this_iter_s": 37.458003759384155, "time_total_s": 15901.919206857681, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15901.919206857681, "timesteps_since_restore": 4313600, "iterations_since_restore": 337, "perf": {"cpu_util_percent": 29.675471698113206, "ram_util_percent": 58.94905660377358}}
+{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 610.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 221.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 305.345}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.49, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.77, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.44, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.34, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.93, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.81, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.6, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.81, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.81, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 630.0, 630.0, 636.0, 633.0, 582.0, 570.0, 633.0, 639.0, 582.0, 630.0, 630.0, 530.0, 633.0, 516.0, 633.0, 636.0, 627.0, 633.0, 587.0, 636.0, 633.0, 573.0, 579.0, 582.0, 633.0, 636.0, 627.0, 587.0, 582.0, 630.0, 630.0, 639.0, 633.0, 584.0, 630.0, 465.0, 636.0, 633.0, 582.0, 579.0, 582.0, 633.0, 516.0, 582.0, 587.0, 630.0, 630.0, 633.0, 630.0, 624.0, 633.0, 630.0, 627.0, 630.0, 576.0, 582.0, 630.0, 639.0, 639.0, 633.0, 576.0, 633.0, 633.0, 576.0, 633.0, 633.0, 587.0, 636.0, 524.0, 630.0, 636.0, 636.0, 633.0, 639.0, 584.0, 636.0, 587.0, 636.0, 587.0, 639.0, 636.0, 587.0, 639.0, 627.0, 630.0, 636.0, 582.0, 590.0, 573.0, 636.0, 587.0, 633.0, 633.0, 578.0, 630.0, 573.0, 573.0, 624.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 319.0, 311.0, 316.0, 314.0, 321.0, 309.0, 317.0, 319.0, 317.0, 316.0, 298.0, 284.0, 277.0, 293.0, 319.0, 314.0, 322.0, 317.0, 291.0, 291.0, 321.0, 309.0, 316.0, 314.0, 257.0, 273.0, 321.0, 312.0, 256.0, 260.0, 311.0, 322.0, 316.0, 320.0, 311.0, 316.0, 316.0, 317.0, 293.0, 294.0, 314.0, 322.0, 316.0, 317.0, 276.0, 297.0, 288.0, 291.0, 288.0, 294.0, 318.0, 315.0, 324.0, 312.0, 313.0, 314.0, 293.0, 294.0, 291.0, 291.0, 316.0, 314.0, 317.0, 313.0, 314.0, 325.0, 317.0, 316.0, 287.0, 297.0, 318.0, 312.0, 221.0, 244.0, 316.0, 320.0, 319.0, 314.0, 290.0, 292.0, 288.0, 291.0, 283.0, 299.0, 314.0, 319.0, 256.0, 260.0, 283.0, 299.0, 291.0, 296.0, 317.0, 313.0, 311.0, 319.0, 314.0, 319.0, 326.0, 304.0, 308.0, 316.0, 321.0, 312.0, 321.0, 309.0, 314.0, 313.0, 317.0, 313.0, 288.0, 288.0, 286.0, 296.0, 318.0, 312.0, 322.0, 317.0, 327.0, 312.0, 316.0, 317.0, 291.0, 285.0, 319.0, 314.0, 318.0, 315.0, 288.0, 288.0, 324.0, 309.0, 322.0, 311.0, 301.0, 286.0, 314.0, 322.0, 264.0, 260.0, 316.0, 314.0, 319.0, 317.0, 314.0, 322.0, 316.0, 317.0, 319.0, 320.0, 288.0, 296.0, 319.0, 317.0, 306.0, 281.0, 316.0, 320.0, 299.0, 288.0, 325.0, 314.0, 317.0, 319.0, 293.0, 294.0, 319.0, 320.0, 318.0, 309.0, 316.0, 314.0, 319.0, 317.0, 296.0, 286.0, 296.0, 294.0, 293.0, 280.0, 319.0, 317.0, 283.0, 304.0, 309.0, 324.0, 311.0, 322.0, 282.0, 296.0, 316.0, 314.0, 280.0, 293.0, 285.0, 288.0, 312.0, 312.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8960540337881886, "mean_processing_ms": 0.24751510966747803, "mean_inference_ms": 1.4847704330515064}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8112000, "num_steps_sampled": 4326400, "sample_time_ms": 22480.697, "load_time_ms": 37.651, "grad_time_ms": 10202.093, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001484702923335135, "policy_loss": -0.006018726620823145, "vf_loss": 80.70446014404297, "vf_explained_var": 0.7642549872398376, "kl": 0.0017236651619896293, "entropy": 1.1340447664260864, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4326400, "episodes_total": 10816, "training_iteration": 338, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-38-41", "timestamp": 1660257521, "time_this_iter_s": 31.244572162628174, "time_total_s": 15933.16377902031, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15933.16377902031, "timesteps_since_restore": 4326400, "iterations_since_restore": 338, "perf": {"cpu_util_percent": 27.328888888888894, "ram_util_percent": 59.27555555555555}}
+{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 612.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 306.0}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.4, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.62, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.03, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.16, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.95, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.34, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.49, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.74, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.75, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.16, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.95, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.16, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.95, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 636.0, 630.0, 633.0, 639.0, 630.0, 587.0, 636.0, 582.0, 636.0, 576.0, 584.0, 630.0, 639.0, 639.0, 633.0, 627.0, 582.0, 582.0, 630.0, 630.0, 633.0, 522.0, 587.0, 593.0, 633.0, 630.0, 582.0, 636.0, 630.0, 587.0, 636.0, 633.0, 576.0, 633.0, 633.0, 587.0, 636.0, 524.0, 630.0, 636.0, 636.0, 633.0, 639.0, 584.0, 636.0, 587.0, 636.0, 587.0, 639.0, 636.0, 587.0, 639.0, 627.0, 630.0, 636.0, 582.0, 590.0, 573.0, 636.0, 587.0, 633.0, 633.0, 578.0, 630.0, 573.0, 573.0, 624.0, 633.0, 630.0, 630.0, 630.0, 636.0, 633.0, 582.0, 570.0, 633.0, 639.0, 582.0, 630.0, 630.0, 530.0, 633.0, 516.0, 633.0, 636.0, 627.0, 633.0, 587.0, 636.0, 633.0, 573.0, 579.0, 582.0, 633.0, 636.0, 627.0, 587.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 299.0, 322.0, 314.0, 316.0, 314.0, 319.0, 314.0, 322.0, 317.0, 316.0, 314.0, 291.0, 296.0, 314.0, 322.0, 285.0, 297.0, 325.0, 311.0, 288.0, 288.0, 295.0, 289.0, 326.0, 304.0, 317.0, 322.0, 322.0, 317.0, 316.0, 317.0, 308.0, 319.0, 296.0, 286.0, 288.0, 294.0, 306.0, 324.0, 316.0, 314.0, 311.0, 322.0, 262.0, 260.0, 289.0, 298.0, 296.0, 297.0, 322.0, 311.0, 313.0, 317.0, 293.0, 289.0, 319.0, 317.0, 313.0, 317.0, 309.0, 278.0, 314.0, 322.0, 318.0, 315.0, 288.0, 288.0, 324.0, 309.0, 322.0, 311.0, 301.0, 286.0, 314.0, 322.0, 264.0, 260.0, 316.0, 314.0, 319.0, 317.0, 314.0, 322.0, 316.0, 317.0, 319.0, 320.0, 288.0, 296.0, 319.0, 317.0, 306.0, 281.0, 316.0, 320.0, 299.0, 288.0, 325.0, 314.0, 317.0, 319.0, 293.0, 294.0, 319.0, 320.0, 318.0, 309.0, 316.0, 314.0, 319.0, 317.0, 296.0, 286.0, 296.0, 294.0, 293.0, 280.0, 319.0, 317.0, 283.0, 304.0, 309.0, 324.0, 311.0, 322.0, 282.0, 296.0, 316.0, 314.0, 280.0, 293.0, 285.0, 288.0, 312.0, 312.0, 319.0, 314.0, 319.0, 311.0, 316.0, 314.0, 321.0, 309.0, 317.0, 319.0, 317.0, 316.0, 298.0, 284.0, 277.0, 293.0, 319.0, 314.0, 322.0, 317.0, 291.0, 291.0, 321.0, 309.0, 316.0, 314.0, 257.0, 273.0, 321.0, 312.0, 256.0, 260.0, 311.0, 322.0, 316.0, 320.0, 311.0, 316.0, 316.0, 317.0, 293.0, 294.0, 314.0, 322.0, 316.0, 317.0, 276.0, 297.0, 288.0, 291.0, 288.0, 294.0, 318.0, 315.0, 324.0, 312.0, 313.0, 314.0, 293.0, 294.0, 291.0, 291.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8947470159821566, "mean_processing_ms": 0.2472537044161699, "mean_inference_ms": 1.483669994240604}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8136000, "num_steps_sampled": 4339200, "sample_time_ms": 22385.99, "load_time_ms": 37.99, "grad_time_ms": 10282.952, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0002800325455609709, "policy_loss": -0.007263503968715668, "vf_loss": 81.1025161743164, "vf_explained_var": 0.7635498642921448, "kl": 0.0021122132893651724, "entropy": 1.1334240436553955, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4339200, "episodes_total": 10848, "training_iteration": 339, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-39-11", "timestamp": 1660257551, "time_this_iter_s": 30.060129165649414, "time_total_s": 15963.223908185959, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15963.223908185959, "timesteps_since_restore": 4339200, "iterations_since_restore": 339, "perf": {"cpu_util_percent": 32.38333333333334, "ram_util_percent": 58.776190476190465}}
+{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 611.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 305.805}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.01, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.45, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.7, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.11, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.07, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.02, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.4, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.51, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.75, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.07, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.02, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.07, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.02, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 590.0, 639.0, 636.0, 582.0, 624.0, 633.0, 587.0, 630.0, 582.0, 630.0, 630.0, 630.0, 636.0, 636.0, 579.0, 587.0, 587.0, 636.0, 584.0, 578.0, 630.0, 636.0, 630.0, 582.0, 579.0, 636.0, 627.0, 582.0, 633.0, 582.0, 630.0, 630.0, 573.0, 573.0, 624.0, 633.0, 630.0, 630.0, 630.0, 636.0, 633.0, 582.0, 570.0, 633.0, 639.0, 582.0, 630.0, 630.0, 530.0, 633.0, 516.0, 633.0, 636.0, 627.0, 633.0, 587.0, 636.0, 633.0, 573.0, 579.0, 582.0, 633.0, 636.0, 627.0, 587.0, 582.0, 630.0, 587.0, 636.0, 630.0, 633.0, 639.0, 630.0, 587.0, 636.0, 582.0, 636.0, 576.0, 584.0, 630.0, 639.0, 639.0, 633.0, 627.0, 582.0, 582.0, 630.0, 630.0, 633.0, 522.0, 587.0, 593.0, 633.0, 630.0, 582.0, 636.0, 630.0, 587.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [323.0, 307.0, 304.0, 286.0, 320.0, 319.0, 316.0, 320.0, 296.0, 286.0, 313.0, 311.0, 311.0, 322.0, 291.0, 296.0, 314.0, 316.0, 296.0, 286.0, 311.0, 319.0, 321.0, 309.0, 316.0, 314.0, 319.0, 317.0, 317.0, 319.0, 293.0, 286.0, 301.0, 286.0, 298.0, 289.0, 314.0, 322.0, 288.0, 296.0, 286.0, 292.0, 318.0, 312.0, 312.0, 324.0, 313.0, 317.0, 288.0, 294.0, 295.0, 284.0, 314.0, 322.0, 316.0, 311.0, 291.0, 291.0, 316.0, 317.0, 285.0, 297.0, 313.0, 317.0, 316.0, 314.0, 280.0, 293.0, 285.0, 288.0, 312.0, 312.0, 319.0, 314.0, 319.0, 311.0, 316.0, 314.0, 321.0, 309.0, 317.0, 319.0, 317.0, 316.0, 298.0, 284.0, 277.0, 293.0, 319.0, 314.0, 322.0, 317.0, 291.0, 291.0, 321.0, 309.0, 316.0, 314.0, 257.0, 273.0, 321.0, 312.0, 256.0, 260.0, 311.0, 322.0, 316.0, 320.0, 311.0, 316.0, 316.0, 317.0, 293.0, 294.0, 314.0, 322.0, 316.0, 317.0, 276.0, 297.0, 288.0, 291.0, 288.0, 294.0, 318.0, 315.0, 324.0, 312.0, 313.0, 314.0, 293.0, 294.0, 291.0, 291.0, 316.0, 314.0, 288.0, 299.0, 322.0, 314.0, 316.0, 314.0, 319.0, 314.0, 322.0, 317.0, 316.0, 314.0, 291.0, 296.0, 314.0, 322.0, 285.0, 297.0, 325.0, 311.0, 288.0, 288.0, 295.0, 289.0, 326.0, 304.0, 317.0, 322.0, 322.0, 317.0, 316.0, 317.0, 308.0, 319.0, 296.0, 286.0, 288.0, 294.0, 306.0, 324.0, 316.0, 314.0, 311.0, 322.0, 262.0, 260.0, 289.0, 298.0, 296.0, 297.0, 322.0, 311.0, 313.0, 317.0, 293.0, 289.0, 319.0, 317.0, 313.0, 317.0, 309.0, 278.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8934318351724546, "mean_processing_ms": 0.24699024834277958, "mean_inference_ms": 1.4821846503211202}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8160000, "num_steps_sampled": 4352000, "sample_time_ms": 22391.649, "load_time_ms": 38.471, "grad_time_ms": 10443.746, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004414581228047609, "policy_loss": -0.003194813383743167, "vf_loss": 81.79281616210938, "vf_explained_var": 0.764918863773346, "kl": 0.0018889306811615825, "entropy": 1.1397979259490967, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4352000, "episodes_total": 10880, "training_iteration": 340, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-39-42", "timestamp": 1660257582, "time_this_iter_s": 31.150686979293823, "time_total_s": 15994.374595165253, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15994.374595165253, "timesteps_since_restore": 4352000, "iterations_since_restore": 340, "perf": {"cpu_util_percent": 27.049999999999997, "ram_util_percent": 58.795454545454525}}
+{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 610.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 305.27}, "custom_metrics": {"sparse_reward_mean": 211.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.14, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.57, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.6, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.28, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.97, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.13, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.94, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.31, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.79, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.75, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.13, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.94, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.13, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.94, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 462.0, 636.0, 639.0, 587.0, 576.0, 639.0, 633.0, 633.0, 633.0, 633.0, 630.0, 579.0, 633.0, 636.0, 633.0, 636.0, 582.0, 573.0, 630.0, 582.0, 570.0, 587.0, 573.0, 544.0, 582.0, 630.0, 630.0, 582.0, 630.0, 633.0, 639.0, 627.0, 587.0, 582.0, 630.0, 587.0, 636.0, 630.0, 633.0, 639.0, 630.0, 587.0, 636.0, 582.0, 636.0, 576.0, 584.0, 630.0, 639.0, 639.0, 633.0, 627.0, 582.0, 582.0, 630.0, 630.0, 633.0, 522.0, 587.0, 593.0, 633.0, 630.0, 582.0, 636.0, 630.0, 587.0, 636.0, 630.0, 590.0, 639.0, 636.0, 582.0, 624.0, 633.0, 587.0, 630.0, 582.0, 630.0, 630.0, 630.0, 636.0, 636.0, 579.0, 587.0, 587.0, 636.0, 584.0, 578.0, 630.0, 636.0, 630.0, 582.0, 579.0, 636.0, 627.0, 582.0, 633.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 312.0, 228.0, 234.0, 314.0, 322.0, 327.0, 312.0, 293.0, 294.0, 290.0, 286.0, 320.0, 319.0, 322.0, 311.0, 324.0, 309.0, 314.0, 319.0, 317.0, 316.0, 308.0, 322.0, 287.0, 292.0, 314.0, 319.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 296.0, 286.0, 296.0, 277.0, 321.0, 309.0, 288.0, 294.0, 285.0, 285.0, 285.0, 302.0, 287.0, 286.0, 275.0, 269.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 301.0, 281.0, 315.0, 315.0, 318.0, 315.0, 316.0, 323.0, 313.0, 314.0, 293.0, 294.0, 291.0, 291.0, 316.0, 314.0, 288.0, 299.0, 322.0, 314.0, 316.0, 314.0, 319.0, 314.0, 322.0, 317.0, 316.0, 314.0, 291.0, 296.0, 314.0, 322.0, 285.0, 297.0, 325.0, 311.0, 288.0, 288.0, 295.0, 289.0, 326.0, 304.0, 317.0, 322.0, 322.0, 317.0, 316.0, 317.0, 308.0, 319.0, 296.0, 286.0, 288.0, 294.0, 306.0, 324.0, 316.0, 314.0, 311.0, 322.0, 262.0, 260.0, 289.0, 298.0, 296.0, 297.0, 322.0, 311.0, 313.0, 317.0, 293.0, 289.0, 319.0, 317.0, 313.0, 317.0, 309.0, 278.0, 314.0, 322.0, 323.0, 307.0, 304.0, 286.0, 320.0, 319.0, 316.0, 320.0, 296.0, 286.0, 313.0, 311.0, 311.0, 322.0, 291.0, 296.0, 314.0, 316.0, 296.0, 286.0, 311.0, 319.0, 321.0, 309.0, 316.0, 314.0, 319.0, 317.0, 317.0, 319.0, 293.0, 286.0, 301.0, 286.0, 298.0, 289.0, 314.0, 322.0, 288.0, 296.0, 286.0, 292.0, 318.0, 312.0, 312.0, 324.0, 313.0, 317.0, 288.0, 294.0, 295.0, 284.0, 314.0, 322.0, 316.0, 311.0, 291.0, 291.0, 316.0, 317.0, 285.0, 297.0, 313.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8921345690140595, "mean_processing_ms": 0.24673048959961144, "mean_inference_ms": 1.4810833291212553}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8184000, "num_steps_sampled": 4364800, "sample_time_ms": 22778.758, "load_time_ms": 38.164, "grad_time_ms": 10656.478, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005775378551334143, "policy_loss": -0.00215825904160738, "vf_loss": 85.0276870727539, "vf_explained_var": 0.7658646106719971, "kl": 0.0019542332738637924, "entropy": 1.1382619142532349, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4364800, "episodes_total": 10912, "training_iteration": 341, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-40-20", "timestamp": 1660257620, "time_this_iter_s": 37.338398933410645, "time_total_s": 16031.712994098663, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16031.712994098663, "timesteps_since_restore": 4364800, "iterations_since_restore": 341, "perf": {"cpu_util_percent": 27.592452830188673, "ram_util_percent": 58.783018867924525}}
+{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 610.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 305.22}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.64, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.65, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.62, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.96, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.93, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.58, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.93, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.93, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 573.0, 627.0, 630.0, 636.0, 636.0, 636.0, 582.0, 624.0, 576.0, 575.0, 633.0, 587.0, 630.0, 630.0, 636.0, 624.0, 633.0, 636.0, 590.0, 630.0, 584.0, 576.0, 582.0, 633.0, 633.0, 636.0, 582.0, 582.0, 639.0, 579.0, 564.0, 636.0, 630.0, 587.0, 636.0, 630.0, 590.0, 639.0, 636.0, 582.0, 624.0, 633.0, 587.0, 630.0, 582.0, 630.0, 630.0, 630.0, 636.0, 636.0, 579.0, 587.0, 587.0, 636.0, 584.0, 578.0, 630.0, 636.0, 630.0, 582.0, 579.0, 636.0, 627.0, 582.0, 633.0, 582.0, 630.0, 633.0, 462.0, 636.0, 639.0, 587.0, 576.0, 639.0, 633.0, 633.0, 633.0, 633.0, 630.0, 579.0, 633.0, 636.0, 633.0, 636.0, 582.0, 573.0, 630.0, 582.0, 570.0, 587.0, 573.0, 544.0, 582.0, 630.0, 630.0, 582.0, 630.0, 633.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 316.0, 288.0, 285.0, 305.0, 322.0, 319.0, 311.0, 314.0, 322.0, 319.0, 317.0, 317.0, 319.0, 287.0, 295.0, 308.0, 316.0, 283.0, 293.0, 290.0, 285.0, 322.0, 311.0, 285.0, 302.0, 318.0, 312.0, 314.0, 316.0, 314.0, 322.0, 311.0, 313.0, 311.0, 322.0, 314.0, 322.0, 285.0, 305.0, 316.0, 314.0, 301.0, 283.0, 295.0, 281.0, 290.0, 292.0, 314.0, 319.0, 311.0, 322.0, 317.0, 319.0, 291.0, 291.0, 286.0, 296.0, 322.0, 317.0, 299.0, 280.0, 281.0, 283.0, 319.0, 317.0, 313.0, 317.0, 309.0, 278.0, 314.0, 322.0, 323.0, 307.0, 304.0, 286.0, 320.0, 319.0, 316.0, 320.0, 296.0, 286.0, 313.0, 311.0, 311.0, 322.0, 291.0, 296.0, 314.0, 316.0, 296.0, 286.0, 311.0, 319.0, 321.0, 309.0, 316.0, 314.0, 319.0, 317.0, 317.0, 319.0, 293.0, 286.0, 301.0, 286.0, 298.0, 289.0, 314.0, 322.0, 288.0, 296.0, 286.0, 292.0, 318.0, 312.0, 312.0, 324.0, 313.0, 317.0, 288.0, 294.0, 295.0, 284.0, 314.0, 322.0, 316.0, 311.0, 291.0, 291.0, 316.0, 317.0, 285.0, 297.0, 313.0, 317.0, 321.0, 312.0, 228.0, 234.0, 314.0, 322.0, 327.0, 312.0, 293.0, 294.0, 290.0, 286.0, 320.0, 319.0, 322.0, 311.0, 324.0, 309.0, 314.0, 319.0, 317.0, 316.0, 308.0, 322.0, 287.0, 292.0, 314.0, 319.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 296.0, 286.0, 296.0, 277.0, 321.0, 309.0, 288.0, 294.0, 285.0, 285.0, 285.0, 302.0, 287.0, 286.0, 275.0, 269.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 301.0, 281.0, 315.0, 315.0, 318.0, 315.0, 316.0, 323.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8908455849362553, "mean_processing_ms": 0.24647214238020762, "mean_inference_ms": 1.4799101322874493}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8208000, "num_steps_sampled": 4377600, "sample_time_ms": 22385.575, "load_time_ms": 38.145, "grad_time_ms": 10547.897, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005002778489142656, "policy_loss": -0.002570929704234004, "vf_loss": 81.44794464111328, "vf_explained_var": 0.765848696231842, "kl": 0.002198006259277463, "entropy": 1.1421762704849243, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4377600, "episodes_total": 10944, "training_iteration": 342, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-40-48", "timestamp": 1660257648, "time_this_iter_s": 28.1991069316864, "time_total_s": 16059.91210103035, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16059.91210103035, "timesteps_since_restore": 4377600, "iterations_since_restore": 342, "perf": {"cpu_util_percent": 30.8525, "ram_util_percent": 58.825}}
+{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 608.72, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 304.36}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.12, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.7, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.5, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.47, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.92, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.33, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.77, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.25, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.57, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.37, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.77, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.77, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 633.0, 630.0, 582.0, 587.0, 582.0, 582.0, 630.0, 530.0, 633.0, 633.0, 639.0, 579.0, 627.0, 633.0, 636.0, 633.0, 587.0, 627.0, 627.0, 630.0, 630.0, 587.0, 582.0, 582.0, 582.0, 582.0, 596.0, 636.0, 633.0, 630.0, 576.0, 582.0, 633.0, 582.0, 630.0, 633.0, 462.0, 636.0, 639.0, 587.0, 576.0, 639.0, 633.0, 633.0, 633.0, 633.0, 630.0, 579.0, 633.0, 636.0, 633.0, 636.0, 582.0, 573.0, 630.0, 582.0, 570.0, 587.0, 573.0, 544.0, 582.0, 630.0, 630.0, 582.0, 630.0, 633.0, 639.0, 630.0, 573.0, 627.0, 630.0, 636.0, 636.0, 636.0, 582.0, 624.0, 576.0, 575.0, 633.0, 587.0, 630.0, 630.0, 636.0, 624.0, 633.0, 636.0, 590.0, 630.0, 584.0, 576.0, 582.0, 633.0, 633.0, 636.0, 582.0, 582.0, 639.0, 579.0, 564.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [308.0, 319.0, 324.0, 309.0, 311.0, 319.0, 296.0, 286.0, 296.0, 291.0, 288.0, 294.0, 293.0, 289.0, 321.0, 309.0, 259.0, 271.0, 319.0, 314.0, 329.0, 304.0, 314.0, 325.0, 283.0, 296.0, 308.0, 319.0, 314.0, 319.0, 319.0, 317.0, 317.0, 316.0, 288.0, 299.0, 308.0, 319.0, 311.0, 316.0, 316.0, 314.0, 308.0, 322.0, 296.0, 291.0, 288.0, 294.0, 296.0, 286.0, 286.0, 296.0, 282.0, 300.0, 301.0, 295.0, 321.0, 315.0, 319.0, 314.0, 316.0, 314.0, 296.0, 280.0, 291.0, 291.0, 316.0, 317.0, 285.0, 297.0, 313.0, 317.0, 321.0, 312.0, 228.0, 234.0, 314.0, 322.0, 327.0, 312.0, 293.0, 294.0, 290.0, 286.0, 320.0, 319.0, 322.0, 311.0, 324.0, 309.0, 314.0, 319.0, 317.0, 316.0, 308.0, 322.0, 287.0, 292.0, 314.0, 319.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 296.0, 286.0, 296.0, 277.0, 321.0, 309.0, 288.0, 294.0, 285.0, 285.0, 285.0, 302.0, 287.0, 286.0, 275.0, 269.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 301.0, 281.0, 315.0, 315.0, 318.0, 315.0, 316.0, 323.0, 314.0, 316.0, 288.0, 285.0, 305.0, 322.0, 319.0, 311.0, 314.0, 322.0, 319.0, 317.0, 317.0, 319.0, 287.0, 295.0, 308.0, 316.0, 283.0, 293.0, 290.0, 285.0, 322.0, 311.0, 285.0, 302.0, 318.0, 312.0, 314.0, 316.0, 314.0, 322.0, 311.0, 313.0, 311.0, 322.0, 314.0, 322.0, 285.0, 305.0, 316.0, 314.0, 301.0, 283.0, 295.0, 281.0, 290.0, 292.0, 314.0, 319.0, 311.0, 322.0, 317.0, 319.0, 291.0, 291.0, 286.0, 296.0, 322.0, 317.0, 299.0, 280.0, 281.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8895732427757973, "mean_processing_ms": 0.24621711936805896, "mean_inference_ms": 1.4789333750052633}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8232000, "num_steps_sampled": 4390400, "sample_time_ms": 22479.914, "load_time_ms": 38.369, "grad_time_ms": 10420.997, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0025722135324031115, "policy_loss": -0.00497409887611866, "vf_loss": 81.19109344482422, "vf_explained_var": 0.7659382820129395, "kl": 0.0019239649409428239, "entropy": 1.1455968618392944, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4390400, "episodes_total": 10976, "training_iteration": 343, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-41-18", "timestamp": 1660257678, "time_this_iter_s": 30.595246076583862, "time_total_s": 16090.507347106934, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16090.507347106934, "timesteps_since_restore": 4390400, "iterations_since_restore": 343, "perf": {"cpu_util_percent": 28.927906976744183, "ram_util_percent": 58.81860465116278}}
+{"episode_reward_max": 639.0, "episode_reward_min": 521.0, "episode_reward_mean": 607.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 259.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 303.955}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.11, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.38, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.68, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.19, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.04, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.9, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.0, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.34, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.9, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.0, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.9, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.0, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 582.0, 630.0, 587.0, 630.0, 587.0, 579.0, 581.0, 630.0, 587.0, 576.0, 521.0, 587.0, 639.0, 587.0, 630.0, 630.0, 639.0, 630.0, 630.0, 587.0, 582.0, 582.0, 630.0, 587.0, 639.0, 582.0, 582.0, 630.0, 630.0, 590.0, 630.0, 582.0, 630.0, 633.0, 639.0, 630.0, 573.0, 627.0, 630.0, 636.0, 636.0, 636.0, 582.0, 624.0, 576.0, 575.0, 633.0, 587.0, 630.0, 630.0, 636.0, 624.0, 633.0, 636.0, 590.0, 630.0, 584.0, 576.0, 582.0, 633.0, 633.0, 636.0, 582.0, 582.0, 639.0, 579.0, 564.0, 627.0, 633.0, 630.0, 582.0, 587.0, 582.0, 582.0, 630.0, 530.0, 633.0, 633.0, 639.0, 579.0, 627.0, 633.0, 636.0, 633.0, 587.0, 627.0, 627.0, 630.0, 630.0, 587.0, 582.0, 582.0, 582.0, 582.0, 596.0, 636.0, 633.0, 630.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 289.0, 290.0, 292.0, 321.0, 309.0, 288.0, 299.0, 318.0, 312.0, 288.0, 299.0, 287.0, 292.0, 284.0, 297.0, 323.0, 307.0, 288.0, 299.0, 287.0, 289.0, 260.0, 261.0, 296.0, 291.0, 317.0, 322.0, 291.0, 296.0, 316.0, 314.0, 313.0, 317.0, 322.0, 317.0, 311.0, 319.0, 319.0, 311.0, 285.0, 302.0, 299.0, 283.0, 291.0, 291.0, 306.0, 324.0, 296.0, 291.0, 324.0, 315.0, 293.0, 289.0, 298.0, 284.0, 311.0, 319.0, 316.0, 314.0, 296.0, 294.0, 318.0, 312.0, 301.0, 281.0, 315.0, 315.0, 318.0, 315.0, 316.0, 323.0, 314.0, 316.0, 288.0, 285.0, 305.0, 322.0, 319.0, 311.0, 314.0, 322.0, 319.0, 317.0, 317.0, 319.0, 287.0, 295.0, 308.0, 316.0, 283.0, 293.0, 290.0, 285.0, 322.0, 311.0, 285.0, 302.0, 318.0, 312.0, 314.0, 316.0, 314.0, 322.0, 311.0, 313.0, 311.0, 322.0, 314.0, 322.0, 285.0, 305.0, 316.0, 314.0, 301.0, 283.0, 295.0, 281.0, 290.0, 292.0, 314.0, 319.0, 311.0, 322.0, 317.0, 319.0, 291.0, 291.0, 286.0, 296.0, 322.0, 317.0, 299.0, 280.0, 281.0, 283.0, 308.0, 319.0, 324.0, 309.0, 311.0, 319.0, 296.0, 286.0, 296.0, 291.0, 288.0, 294.0, 293.0, 289.0, 321.0, 309.0, 259.0, 271.0, 319.0, 314.0, 329.0, 304.0, 314.0, 325.0, 283.0, 296.0, 308.0, 319.0, 314.0, 319.0, 319.0, 317.0, 317.0, 316.0, 288.0, 299.0, 308.0, 319.0, 311.0, 316.0, 316.0, 314.0, 308.0, 322.0, 296.0, 291.0, 288.0, 294.0, 296.0, 286.0, 286.0, 296.0, 282.0, 300.0, 301.0, 295.0, 321.0, 315.0, 319.0, 314.0, 316.0, 314.0, 296.0, 280.0]}, "sampler_perf": {"mean_env_wait_ms": 0.888300023019413, "mean_processing_ms": 0.24596248593117787, "mean_inference_ms": 1.4777042667426168}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8256000, "num_steps_sampled": 4403200, "sample_time_ms": 22324.862, "load_time_ms": 38.863, "grad_time_ms": 10570.078, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001806688029319048, "policy_loss": -0.005986546631902456, "vf_loss": 83.65050506591797, "vf_explained_var": 0.7647177577018738, "kl": 0.002452569780871272, "entropy": 1.1436399221420288, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4403200, "episodes_total": 11008, "training_iteration": 344, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-41-52", "timestamp": 1660257712, "time_this_iter_s": 33.82224774360657, "time_total_s": 16124.32959485054, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16124.32959485054, "timesteps_since_restore": 4403200, "iterations_since_restore": 344, "perf": {"cpu_util_percent": 29.40625, "ram_util_percent": 58.83958333333334}}
+{"episode_reward_max": 639.0, "episode_reward_min": 521.0, "episode_reward_mean": 603.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 255.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 301.93}, "custom_metrics": {"sparse_reward_mean": 208.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.26, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.41, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.77, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.95, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.74, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.28, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 4, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.95, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.74, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.95, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.74, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 627.0, 633.0, 636.0, 582.0, 633.0, 535.0, 621.0, 579.0, 624.0, 573.0, 579.0, 633.0, 587.0, 636.0, 579.0, 576.0, 636.0, 630.0, 587.0, 522.0, 582.0, 579.0, 627.0, 639.0, 627.0, 627.0, 541.0, 639.0, 639.0, 587.0, 582.0, 582.0, 639.0, 579.0, 564.0, 627.0, 633.0, 630.0, 582.0, 587.0, 582.0, 582.0, 630.0, 530.0, 633.0, 633.0, 639.0, 579.0, 627.0, 633.0, 636.0, 633.0, 587.0, 627.0, 627.0, 630.0, 630.0, 587.0, 582.0, 582.0, 582.0, 582.0, 596.0, 636.0, 633.0, 630.0, 576.0, 567.0, 582.0, 630.0, 587.0, 630.0, 587.0, 579.0, 581.0, 630.0, 587.0, 576.0, 521.0, 587.0, 639.0, 587.0, 630.0, 630.0, 639.0, 630.0, 630.0, 587.0, 582.0, 582.0, 630.0, 587.0, 639.0, 582.0, 582.0, 630.0, 630.0, 590.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 309.0, 318.0, 316.0, 317.0, 319.0, 317.0, 294.0, 288.0, 323.0, 310.0, 273.0, 262.0, 310.0, 311.0, 285.0, 294.0, 322.0, 302.0, 292.0, 281.0, 283.0, 296.0, 317.0, 316.0, 288.0, 299.0, 316.0, 320.0, 288.0, 291.0, 280.0, 296.0, 319.0, 317.0, 314.0, 316.0, 283.0, 304.0, 267.0, 255.0, 289.0, 293.0, 294.0, 285.0, 313.0, 314.0, 319.0, 320.0, 321.0, 306.0, 313.0, 314.0, 271.0, 270.0, 314.0, 325.0, 324.0, 315.0, 297.0, 290.0, 299.0, 283.0, 286.0, 296.0, 322.0, 317.0, 299.0, 280.0, 281.0, 283.0, 308.0, 319.0, 324.0, 309.0, 311.0, 319.0, 296.0, 286.0, 296.0, 291.0, 288.0, 294.0, 293.0, 289.0, 321.0, 309.0, 259.0, 271.0, 319.0, 314.0, 329.0, 304.0, 314.0, 325.0, 283.0, 296.0, 308.0, 319.0, 314.0, 319.0, 319.0, 317.0, 317.0, 316.0, 288.0, 299.0, 308.0, 319.0, 311.0, 316.0, 316.0, 314.0, 308.0, 322.0, 296.0, 291.0, 288.0, 294.0, 296.0, 286.0, 286.0, 296.0, 282.0, 300.0, 301.0, 295.0, 321.0, 315.0, 319.0, 314.0, 316.0, 314.0, 296.0, 280.0, 278.0, 289.0, 290.0, 292.0, 321.0, 309.0, 288.0, 299.0, 318.0, 312.0, 288.0, 299.0, 287.0, 292.0, 284.0, 297.0, 323.0, 307.0, 288.0, 299.0, 287.0, 289.0, 260.0, 261.0, 296.0, 291.0, 317.0, 322.0, 291.0, 296.0, 316.0, 314.0, 313.0, 317.0, 322.0, 317.0, 311.0, 319.0, 319.0, 311.0, 285.0, 302.0, 299.0, 283.0, 291.0, 291.0, 306.0, 324.0, 296.0, 291.0, 324.0, 315.0, 293.0, 289.0, 298.0, 284.0, 311.0, 319.0, 316.0, 314.0, 296.0, 294.0, 318.0, 312.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8870510518721875, "mean_processing_ms": 0.24571562902687222, "mean_inference_ms": 1.477060351190489}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8280000, "num_steps_sampled": 4416000, "sample_time_ms": 22762.706, "load_time_ms": 38.824, "grad_time_ms": 10571.457, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005129000172019005, "policy_loss": -0.0031147233676165342, "vf_loss": 88.14037322998047, "vf_explained_var": 0.763336181640625, "kl": 0.0019052595598623157, "entropy": 1.1406329870224, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4416000, "episodes_total": 11040, "training_iteration": 345, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-42-32", "timestamp": 1660257752, "time_this_iter_s": 39.47760009765625, "time_total_s": 16163.807194948196, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16163.807194948196, "timesteps_since_restore": 4416000, "iterations_since_restore": 345, "perf": {"cpu_util_percent": 25.21272727272727, "ram_util_percent": 58.801818181818156}}
+{"episode_reward_max": 639.0, "episode_reward_min": 521.0, "episode_reward_mean": 602.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 255.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 301.185}, "custom_metrics": {"sparse_reward_mean": 208.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 185.57, "shaped_reward_min": 155, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.45, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.19, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.95, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.4, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.94, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.95, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.95, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 587.0, 569.0, 555.0, 570.0, 639.0, 522.0, 633.0, 579.0, 639.0, 582.0, 627.0, 639.0, 579.0, 639.0, 630.0, 636.0, 639.0, 630.0, 627.0, 579.0, 579.0, 630.0, 573.0, 570.0, 579.0, 630.0, 581.0, 639.0, 587.0, 587.0, 541.0, 636.0, 633.0, 630.0, 576.0, 567.0, 582.0, 630.0, 587.0, 630.0, 587.0, 579.0, 581.0, 630.0, 587.0, 576.0, 521.0, 587.0, 639.0, 587.0, 630.0, 630.0, 639.0, 630.0, 630.0, 587.0, 582.0, 582.0, 630.0, 587.0, 639.0, 582.0, 582.0, 630.0, 630.0, 590.0, 630.0, 582.0, 627.0, 633.0, 636.0, 582.0, 633.0, 535.0, 621.0, 579.0, 624.0, 573.0, 579.0, 633.0, 587.0, 636.0, 579.0, 576.0, 636.0, 630.0, 587.0, 522.0, 582.0, 579.0, 627.0, 639.0, 627.0, 627.0, 541.0, 639.0, 639.0, 587.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [315.0, 312.0, 299.0, 288.0, 285.0, 284.0, 279.0, 276.0, 287.0, 283.0, 322.0, 317.0, 257.0, 265.0, 319.0, 314.0, 285.0, 294.0, 319.0, 320.0, 291.0, 291.0, 316.0, 311.0, 320.0, 319.0, 298.0, 281.0, 317.0, 322.0, 308.0, 322.0, 319.0, 317.0, 319.0, 320.0, 326.0, 304.0, 318.0, 309.0, 293.0, 286.0, 285.0, 294.0, 321.0, 309.0, 294.0, 279.0, 291.0, 279.0, 283.0, 296.0, 314.0, 316.0, 294.0, 287.0, 322.0, 317.0, 292.0, 295.0, 294.0, 293.0, 260.0, 281.0, 321.0, 315.0, 319.0, 314.0, 316.0, 314.0, 296.0, 280.0, 278.0, 289.0, 290.0, 292.0, 321.0, 309.0, 288.0, 299.0, 318.0, 312.0, 288.0, 299.0, 287.0, 292.0, 284.0, 297.0, 323.0, 307.0, 288.0, 299.0, 287.0, 289.0, 260.0, 261.0, 296.0, 291.0, 317.0, 322.0, 291.0, 296.0, 316.0, 314.0, 313.0, 317.0, 322.0, 317.0, 311.0, 319.0, 319.0, 311.0, 285.0, 302.0, 299.0, 283.0, 291.0, 291.0, 306.0, 324.0, 296.0, 291.0, 324.0, 315.0, 293.0, 289.0, 298.0, 284.0, 311.0, 319.0, 316.0, 314.0, 296.0, 294.0, 318.0, 312.0, 289.0, 293.0, 309.0, 318.0, 316.0, 317.0, 319.0, 317.0, 294.0, 288.0, 323.0, 310.0, 273.0, 262.0, 310.0, 311.0, 285.0, 294.0, 322.0, 302.0, 292.0, 281.0, 283.0, 296.0, 317.0, 316.0, 288.0, 299.0, 316.0, 320.0, 288.0, 291.0, 280.0, 296.0, 319.0, 317.0, 314.0, 316.0, 283.0, 304.0, 267.0, 255.0, 289.0, 293.0, 294.0, 285.0, 313.0, 314.0, 319.0, 320.0, 321.0, 306.0, 313.0, 314.0, 271.0, 270.0, 314.0, 325.0, 324.0, 315.0, 297.0, 290.0, 299.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 0.885814344606481, "mean_processing_ms": 0.24547265426163356, "mean_inference_ms": 1.476598213105171}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8304000, "num_steps_sampled": 4428800, "sample_time_ms": 22961.91, "load_time_ms": 38.412, "grad_time_ms": 10610.867, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004017222672700882, "policy_loss": -0.007523353677242994, "vf_loss": 84.93380737304688, "vf_explained_var": 0.7618634104728699, "kl": 0.0019265868468210101, "entropy": 1.1366103887557983, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4428800, "episodes_total": 11072, "training_iteration": 346, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-43-09", "timestamp": 1660257789, "time_this_iter_s": 37.03333592414856, "time_total_s": 16200.840530872345, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16200.840530872345, "timesteps_since_restore": 4428800, "iterations_since_restore": 346, "perf": {"cpu_util_percent": 30.683018867924527, "ram_util_percent": 58.82264150943394}}
+{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 600.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 300.28}, "custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.96, "shaped_reward_min": 155, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.59, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.88, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.34, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.39, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.9, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.02, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.64, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.39, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.39, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 587.0, 630.0, 579.0, 582.0, 519.0, 587.0, 630.0, 582.0, 582.0, 582.0, 630.0, 582.0, 570.0, 587.0, 636.0, 630.0, 587.0, 579.0, 530.0, 587.0, 630.0, 630.0, 582.0, 527.0, 582.0, 630.0, 627.0, 576.0, 636.0, 636.0, 630.0, 630.0, 630.0, 590.0, 630.0, 582.0, 627.0, 633.0, 636.0, 582.0, 633.0, 535.0, 621.0, 579.0, 624.0, 573.0, 579.0, 633.0, 587.0, 636.0, 579.0, 576.0, 636.0, 630.0, 587.0, 522.0, 582.0, 579.0, 627.0, 639.0, 627.0, 627.0, 541.0, 639.0, 639.0, 587.0, 582.0, 627.0, 587.0, 569.0, 555.0, 570.0, 639.0, 522.0, 633.0, 579.0, 639.0, 582.0, 627.0, 639.0, 579.0, 639.0, 630.0, 636.0, 639.0, 630.0, 627.0, 579.0, 579.0, 630.0, 573.0, 570.0, 579.0, 630.0, 581.0, 639.0, 587.0, 587.0, 541.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [312.0, 318.0, 287.0, 300.0, 316.0, 314.0, 283.0, 296.0, 285.0, 297.0, 270.0, 249.0, 287.0, 300.0, 319.0, 311.0, 285.0, 297.0, 291.0, 291.0, 288.0, 294.0, 311.0, 319.0, 291.0, 291.0, 295.0, 275.0, 285.0, 302.0, 309.0, 327.0, 311.0, 319.0, 296.0, 291.0, 288.0, 291.0, 262.0, 268.0, 293.0, 294.0, 316.0, 314.0, 311.0, 319.0, 286.0, 296.0, 265.0, 262.0, 287.0, 295.0, 316.0, 314.0, 321.0, 306.0, 287.0, 289.0, 316.0, 320.0, 322.0, 314.0, 313.0, 317.0, 311.0, 319.0, 316.0, 314.0, 296.0, 294.0, 318.0, 312.0, 289.0, 293.0, 309.0, 318.0, 316.0, 317.0, 319.0, 317.0, 294.0, 288.0, 323.0, 310.0, 273.0, 262.0, 310.0, 311.0, 285.0, 294.0, 322.0, 302.0, 292.0, 281.0, 283.0, 296.0, 317.0, 316.0, 288.0, 299.0, 316.0, 320.0, 288.0, 291.0, 280.0, 296.0, 319.0, 317.0, 314.0, 316.0, 283.0, 304.0, 267.0, 255.0, 289.0, 293.0, 294.0, 285.0, 313.0, 314.0, 319.0, 320.0, 321.0, 306.0, 313.0, 314.0, 271.0, 270.0, 314.0, 325.0, 324.0, 315.0, 297.0, 290.0, 299.0, 283.0, 315.0, 312.0, 299.0, 288.0, 285.0, 284.0, 279.0, 276.0, 287.0, 283.0, 322.0, 317.0, 257.0, 265.0, 319.0, 314.0, 285.0, 294.0, 319.0, 320.0, 291.0, 291.0, 316.0, 311.0, 320.0, 319.0, 298.0, 281.0, 317.0, 322.0, 308.0, 322.0, 319.0, 317.0, 319.0, 320.0, 326.0, 304.0, 318.0, 309.0, 293.0, 286.0, 285.0, 294.0, 321.0, 309.0, 294.0, 279.0, 291.0, 279.0, 283.0, 296.0, 314.0, 316.0, 294.0, 287.0, 322.0, 317.0, 292.0, 295.0, 294.0, 293.0, 260.0, 281.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8845895698087, "mean_processing_ms": 0.24523366351665968, "mean_inference_ms": 1.4761306525958986}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8328000, "num_steps_sampled": 4441600, "sample_time_ms": 22533.553, "load_time_ms": 38.187, "grad_time_ms": 10390.855, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0007659171824343503, "policy_loss": -0.008885729126632214, "vf_loss": 86.87432861328125, "vf_explained_var": 0.7518642544746399, "kl": 0.0019218157976865768, "entropy": 1.1352366209030151, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4441600, "episodes_total": 11104, "training_iteration": 347, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-43-40", "timestamp": 1660257820, "time_this_iter_s": 30.97221803665161, "time_total_s": 16231.812748908997, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16231.812748908997, "timesteps_since_restore": 4441600, "iterations_since_restore": 347, "perf": {"cpu_util_percent": 33.49999999999999, "ram_util_percent": 59.33636363636363}}
+{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 602.52, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 301.26}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 185.32, "shaped_reward_min": 155, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.59, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.02, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.5, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.04, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.48, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.93, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.04, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.48, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.04, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.48, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 633.0, 627.0, 633.0, 627.0, 576.0, 590.0, 633.0, 582.0, 633.0, 573.0, 576.0, 633.0, 582.0, 633.0, 630.0, 630.0, 587.0, 630.0, 633.0, 630.0, 582.0, 627.0, 582.0, 627.0, 636.0, 587.0, 633.0, 573.0, 633.0, 576.0, 639.0, 639.0, 587.0, 582.0, 627.0, 587.0, 569.0, 555.0, 570.0, 639.0, 522.0, 633.0, 579.0, 639.0, 582.0, 627.0, 639.0, 579.0, 639.0, 630.0, 636.0, 639.0, 630.0, 627.0, 579.0, 579.0, 630.0, 573.0, 570.0, 579.0, 630.0, 581.0, 639.0, 587.0, 587.0, 541.0, 630.0, 587.0, 630.0, 579.0, 582.0, 519.0, 587.0, 630.0, 582.0, 582.0, 582.0, 630.0, 582.0, 570.0, 587.0, 636.0, 630.0, 587.0, 579.0, 530.0, 587.0, 630.0, 630.0, 582.0, 527.0, 582.0, 630.0, 627.0, 576.0, 636.0, 636.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 286.0, 293.0, 316.0, 317.0, 308.0, 319.0, 316.0, 317.0, 308.0, 319.0, 299.0, 277.0, 288.0, 302.0, 324.0, 309.0, 291.0, 291.0, 319.0, 314.0, 291.0, 282.0, 290.0, 286.0, 322.0, 311.0, 293.0, 289.0, 321.0, 312.0, 316.0, 314.0, 311.0, 319.0, 294.0, 293.0, 316.0, 314.0, 311.0, 322.0, 322.0, 308.0, 288.0, 294.0, 313.0, 314.0, 297.0, 285.0, 313.0, 314.0, 319.0, 317.0, 290.0, 297.0, 316.0, 317.0, 285.0, 288.0, 316.0, 317.0, 285.0, 291.0, 314.0, 325.0, 324.0, 315.0, 297.0, 290.0, 299.0, 283.0, 315.0, 312.0, 299.0, 288.0, 285.0, 284.0, 279.0, 276.0, 287.0, 283.0, 322.0, 317.0, 257.0, 265.0, 319.0, 314.0, 285.0, 294.0, 319.0, 320.0, 291.0, 291.0, 316.0, 311.0, 320.0, 319.0, 298.0, 281.0, 317.0, 322.0, 308.0, 322.0, 319.0, 317.0, 319.0, 320.0, 326.0, 304.0, 318.0, 309.0, 293.0, 286.0, 285.0, 294.0, 321.0, 309.0, 294.0, 279.0, 291.0, 279.0, 283.0, 296.0, 314.0, 316.0, 294.0, 287.0, 322.0, 317.0, 292.0, 295.0, 294.0, 293.0, 260.0, 281.0, 312.0, 318.0, 287.0, 300.0, 316.0, 314.0, 283.0, 296.0, 285.0, 297.0, 270.0, 249.0, 287.0, 300.0, 319.0, 311.0, 285.0, 297.0, 291.0, 291.0, 288.0, 294.0, 311.0, 319.0, 291.0, 291.0, 295.0, 275.0, 285.0, 302.0, 309.0, 327.0, 311.0, 319.0, 296.0, 291.0, 288.0, 291.0, 262.0, 268.0, 293.0, 294.0, 316.0, 314.0, 311.0, 319.0, 286.0, 296.0, 265.0, 262.0, 287.0, 295.0, 316.0, 314.0, 321.0, 306.0, 287.0, 289.0, 316.0, 320.0, 322.0, 314.0, 313.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8833591807666914, "mean_processing_ms": 0.24499142247523886, "mean_inference_ms": 1.4752658321565872}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8352000, "num_steps_sampled": 4454400, "sample_time_ms": 22743.076, "load_time_ms": 38.058, "grad_time_ms": 10274.124, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003061985597014427, "policy_loss": -0.004540739115327597, "vf_loss": 81.69985961914062, "vf_explained_var": 0.7570112347602844, "kl": 0.0020776980090886354, "entropy": 1.1345181465148926, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4454400, "episodes_total": 11136, "training_iteration": 348, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-44-12", "timestamp": 1660257852, "time_this_iter_s": 32.17093515396118, "time_total_s": 16263.983684062958, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16263.983684062958, "timesteps_since_restore": 4454400, "iterations_since_restore": 348, "perf": {"cpu_util_percent": 33.459999999999994, "ram_util_percent": 58.76888888888889}}
+{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 601.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 300.855}, "custom_metrics": {"sparse_reward_mean": 208.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 185.71, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.56, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.99, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.46, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.73, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 4, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.51, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.46, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.46, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 639.0, 636.0, 579.0, 587.0, 587.0, 581.0, 639.0, 576.0, 630.0, 630.0, 582.0, 582.0, 584.0, 630.0, 636.0, 582.0, 576.0, 630.0, 590.0, 582.0, 582.0, 587.0, 582.0, 627.0, 582.0, 582.0, 527.0, 630.0, 639.0, 630.0, 582.0, 639.0, 587.0, 587.0, 541.0, 630.0, 587.0, 630.0, 579.0, 582.0, 519.0, 587.0, 630.0, 582.0, 582.0, 582.0, 630.0, 582.0, 570.0, 587.0, 636.0, 630.0, 587.0, 579.0, 530.0, 587.0, 630.0, 630.0, 582.0, 527.0, 582.0, 630.0, 627.0, 576.0, 636.0, 636.0, 630.0, 582.0, 579.0, 633.0, 627.0, 633.0, 627.0, 576.0, 590.0, 633.0, 582.0, 633.0, 573.0, 576.0, 633.0, 582.0, 633.0, 630.0, 630.0, 587.0, 630.0, 633.0, 630.0, 582.0, 627.0, 582.0, 627.0, 636.0, 587.0, 633.0, 573.0, 633.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 313.0, 322.0, 317.0, 317.0, 319.0, 291.0, 288.0, 291.0, 296.0, 299.0, 288.0, 296.0, 285.0, 317.0, 322.0, 291.0, 285.0, 319.0, 311.0, 311.0, 319.0, 283.0, 299.0, 293.0, 289.0, 288.0, 296.0, 314.0, 316.0, 319.0, 317.0, 291.0, 291.0, 285.0, 291.0, 310.0, 320.0, 291.0, 299.0, 290.0, 292.0, 283.0, 299.0, 293.0, 294.0, 298.0, 284.0, 320.0, 307.0, 291.0, 291.0, 288.0, 294.0, 256.0, 271.0, 319.0, 311.0, 315.0, 324.0, 316.0, 314.0, 291.0, 291.0, 322.0, 317.0, 292.0, 295.0, 294.0, 293.0, 260.0, 281.0, 312.0, 318.0, 287.0, 300.0, 316.0, 314.0, 283.0, 296.0, 285.0, 297.0, 270.0, 249.0, 287.0, 300.0, 319.0, 311.0, 285.0, 297.0, 291.0, 291.0, 288.0, 294.0, 311.0, 319.0, 291.0, 291.0, 295.0, 275.0, 285.0, 302.0, 309.0, 327.0, 311.0, 319.0, 296.0, 291.0, 288.0, 291.0, 262.0, 268.0, 293.0, 294.0, 316.0, 314.0, 311.0, 319.0, 286.0, 296.0, 265.0, 262.0, 287.0, 295.0, 316.0, 314.0, 321.0, 306.0, 287.0, 289.0, 316.0, 320.0, 322.0, 314.0, 313.0, 317.0, 294.0, 288.0, 286.0, 293.0, 316.0, 317.0, 308.0, 319.0, 316.0, 317.0, 308.0, 319.0, 299.0, 277.0, 288.0, 302.0, 324.0, 309.0, 291.0, 291.0, 319.0, 314.0, 291.0, 282.0, 290.0, 286.0, 322.0, 311.0, 293.0, 289.0, 321.0, 312.0, 316.0, 314.0, 311.0, 319.0, 294.0, 293.0, 316.0, 314.0, 311.0, 322.0, 322.0, 308.0, 288.0, 294.0, 313.0, 314.0, 297.0, 285.0, 313.0, 314.0, 319.0, 317.0, 290.0, 297.0, 316.0, 317.0, 285.0, 288.0, 316.0, 317.0, 285.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8821320498213592, "mean_processing_ms": 0.24474905703796943, "mean_inference_ms": 1.4742298243357532}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8376000, "num_steps_sampled": 4467200, "sample_time_ms": 22958.396, "load_time_ms": 37.77, "grad_time_ms": 10335.342, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004589398857206106, "policy_loss": -0.003508263034746051, "vf_loss": 86.6531982421875, "vf_explained_var": 0.7629675269126892, "kl": 0.0021643172949552536, "entropy": 1.1353298425674438, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4467200, "episodes_total": 11168, "training_iteration": 349, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-44-45", "timestamp": 1660257885, "time_this_iter_s": 32.81572699546814, "time_total_s": 16296.799411058426, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16296.799411058426, "timesteps_since_restore": 4467200, "iterations_since_restore": 349, "perf": {"cpu_util_percent": 33.702173913043474, "ram_util_percent": 58.7978260869565}}
+{"episode_reward_max": 639.0, "episode_reward_min": 527.0, "episode_reward_mean": 611.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 305.705}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.81, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.28, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.54, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.79, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.23, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.78, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.61, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.7, "useful_dish_pickup_agent_0_min": 4, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.23, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.78, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.23, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.78, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 627.0, 633.0, 579.0, 633.0, 633.0, 636.0, 630.0, 627.0, 633.0, 630.0, 630.0, 630.0, 630.0, 636.0, 630.0, 587.0, 627.0, 633.0, 630.0, 587.0, 633.0, 581.0, 630.0, 633.0, 593.0, 579.0, 636.0, 633.0, 633.0, 636.0, 636.0, 576.0, 636.0, 636.0, 630.0, 582.0, 579.0, 633.0, 627.0, 633.0, 627.0, 576.0, 590.0, 633.0, 582.0, 633.0, 573.0, 576.0, 633.0, 582.0, 633.0, 630.0, 630.0, 587.0, 630.0, 633.0, 630.0, 582.0, 627.0, 582.0, 627.0, 636.0, 587.0, 633.0, 573.0, 633.0, 576.0, 627.0, 639.0, 636.0, 579.0, 587.0, 587.0, 581.0, 639.0, 576.0, 630.0, 630.0, 582.0, 582.0, 584.0, 630.0, 636.0, 582.0, 576.0, 630.0, 590.0, 582.0, 582.0, 587.0, 582.0, 627.0, 582.0, 582.0, 527.0, 630.0, 639.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 320.0, 313.0, 314.0, 319.0, 314.0, 280.0, 299.0, 317.0, 316.0, 322.0, 311.0, 321.0, 315.0, 316.0, 314.0, 318.0, 309.0, 319.0, 314.0, 321.0, 309.0, 319.0, 311.0, 314.0, 316.0, 318.0, 312.0, 314.0, 322.0, 316.0, 314.0, 299.0, 288.0, 308.0, 319.0, 316.0, 317.0, 306.0, 324.0, 296.0, 291.0, 314.0, 319.0, 295.0, 286.0, 308.0, 322.0, 319.0, 314.0, 296.0, 297.0, 297.0, 282.0, 311.0, 325.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 314.0, 322.0, 287.0, 289.0, 316.0, 320.0, 322.0, 314.0, 313.0, 317.0, 294.0, 288.0, 286.0, 293.0, 316.0, 317.0, 308.0, 319.0, 316.0, 317.0, 308.0, 319.0, 299.0, 277.0, 288.0, 302.0, 324.0, 309.0, 291.0, 291.0, 319.0, 314.0, 291.0, 282.0, 290.0, 286.0, 322.0, 311.0, 293.0, 289.0, 321.0, 312.0, 316.0, 314.0, 311.0, 319.0, 294.0, 293.0, 316.0, 314.0, 311.0, 322.0, 322.0, 308.0, 288.0, 294.0, 313.0, 314.0, 297.0, 285.0, 313.0, 314.0, 319.0, 317.0, 290.0, 297.0, 316.0, 317.0, 285.0, 288.0, 316.0, 317.0, 285.0, 291.0, 314.0, 313.0, 322.0, 317.0, 317.0, 319.0, 291.0, 288.0, 291.0, 296.0, 299.0, 288.0, 296.0, 285.0, 317.0, 322.0, 291.0, 285.0, 319.0, 311.0, 311.0, 319.0, 283.0, 299.0, 293.0, 289.0, 288.0, 296.0, 314.0, 316.0, 319.0, 317.0, 291.0, 291.0, 285.0, 291.0, 310.0, 320.0, 291.0, 299.0, 290.0, 292.0, 283.0, 299.0, 293.0, 294.0, 298.0, 284.0, 320.0, 307.0, 291.0, 291.0, 288.0, 294.0, 256.0, 271.0, 319.0, 311.0, 315.0, 324.0, 316.0, 314.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8809197598148512, "mean_processing_ms": 0.2445094292995116, "mean_inference_ms": 1.4734182783968888}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8400000, "num_steps_sampled": 4480000, "sample_time_ms": 23484.152, "load_time_ms": 37.181, "grad_time_ms": 10284.073, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0043442933820188046, "policy_loss": -0.002717310329899192, "vf_loss": 76.19442749023438, "vf_explained_var": 0.7705621719360352, "kl": 0.0019369550282135606, "entropy": 1.1156750917434692, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4480000, "episodes_total": 11200, "training_iteration": 350, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-45-21", "timestamp": 1660257921, "time_this_iter_s": 35.891582012176514, "time_total_s": 16332.690993070602, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16332.690993070602, "timesteps_since_restore": 4480000, "iterations_since_restore": 350, "perf": {"cpu_util_percent": 33.81372549019608, "ram_util_percent": 58.90000000000001}}
+{"episode_reward_max": 639.0, "episode_reward_min": 524.0, "episode_reward_mean": 609.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.785}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.57, "shaped_reward_min": 164, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.75, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.79, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.79, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.79, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.79, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 587.0, 587.0, 593.0, 630.0, 627.0, 524.0, 630.0, 582.0, 576.0, 630.0, 579.0, 567.0, 579.0, 590.0, 630.0, 630.0, 627.0, 633.0, 636.0, 582.0, 582.0, 633.0, 636.0, 587.0, 636.0, 633.0, 633.0, 587.0, 573.0, 630.0, 582.0, 633.0, 573.0, 633.0, 576.0, 627.0, 639.0, 636.0, 579.0, 587.0, 587.0, 581.0, 639.0, 576.0, 630.0, 630.0, 582.0, 582.0, 584.0, 630.0, 636.0, 582.0, 576.0, 630.0, 590.0, 582.0, 582.0, 587.0, 582.0, 627.0, 582.0, 582.0, 527.0, 630.0, 639.0, 630.0, 582.0, 636.0, 627.0, 633.0, 579.0, 633.0, 633.0, 636.0, 630.0, 627.0, 633.0, 630.0, 630.0, 630.0, 630.0, 636.0, 630.0, 587.0, 627.0, 633.0, 630.0, 587.0, 633.0, 581.0, 630.0, 633.0, 593.0, 579.0, 636.0, 633.0, 633.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 299.0, 288.0, 301.0, 286.0, 302.0, 291.0, 314.0, 316.0, 319.0, 308.0, 253.0, 271.0, 313.0, 317.0, 283.0, 299.0, 277.0, 299.0, 318.0, 312.0, 288.0, 291.0, 274.0, 293.0, 291.0, 288.0, 290.0, 300.0, 318.0, 312.0, 311.0, 319.0, 313.0, 314.0, 316.0, 317.0, 314.0, 322.0, 290.0, 292.0, 300.0, 282.0, 319.0, 314.0, 319.0, 317.0, 291.0, 296.0, 324.0, 312.0, 316.0, 317.0, 316.0, 317.0, 299.0, 288.0, 279.0, 294.0, 311.0, 319.0, 291.0, 291.0, 316.0, 317.0, 285.0, 288.0, 316.0, 317.0, 285.0, 291.0, 314.0, 313.0, 322.0, 317.0, 317.0, 319.0, 291.0, 288.0, 291.0, 296.0, 299.0, 288.0, 296.0, 285.0, 317.0, 322.0, 291.0, 285.0, 319.0, 311.0, 311.0, 319.0, 283.0, 299.0, 293.0, 289.0, 288.0, 296.0, 314.0, 316.0, 319.0, 317.0, 291.0, 291.0, 285.0, 291.0, 310.0, 320.0, 291.0, 299.0, 290.0, 292.0, 283.0, 299.0, 293.0, 294.0, 298.0, 284.0, 320.0, 307.0, 291.0, 291.0, 288.0, 294.0, 256.0, 271.0, 319.0, 311.0, 315.0, 324.0, 316.0, 314.0, 291.0, 291.0, 316.0, 320.0, 313.0, 314.0, 319.0, 314.0, 280.0, 299.0, 317.0, 316.0, 322.0, 311.0, 321.0, 315.0, 316.0, 314.0, 318.0, 309.0, 319.0, 314.0, 321.0, 309.0, 319.0, 311.0, 314.0, 316.0, 318.0, 312.0, 314.0, 322.0, 316.0, 314.0, 299.0, 288.0, 308.0, 319.0, 316.0, 317.0, 306.0, 324.0, 296.0, 291.0, 314.0, 319.0, 295.0, 286.0, 308.0, 322.0, 319.0, 314.0, 296.0, 297.0, 297.0, 282.0, 311.0, 325.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.879725637823849, "mean_processing_ms": 0.24427457411119732, "mean_inference_ms": 1.472868024343041}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8424000, "num_steps_sampled": 4492800, "sample_time_ms": 23466.741, "load_time_ms": 37.693, "grad_time_ms": 10196.154, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0034591767471283674, "policy_loss": -0.0040799533016979694, "vf_loss": 81.06632232666016, "vf_explained_var": 0.7659358978271484, "kl": 0.0018826290033757687, "entropy": 1.135020136833191, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4492800, "episodes_total": 11232, "training_iteration": 351, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-45-57", "timestamp": 1660257957, "time_this_iter_s": 36.290544748306274, "time_total_s": 16368.981537818909, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16368.981537818909, "timesteps_since_restore": 4492800, "iterations_since_restore": 351, "perf": {"cpu_util_percent": 32.60196078431373, "ram_util_percent": 58.86274509803921}}
+{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 613.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 247.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.59}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.98, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.77, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.5, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.86, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.21, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.92, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.49, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.21, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.92, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.21, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.92, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 633.0, 587.0, 516.0, 587.0, 633.0, 609.0, 633.0, 582.0, 633.0, 570.0, 633.0, 579.0, 639.0, 627.0, 636.0, 533.0, 579.0, 633.0, 630.0, 633.0, 582.0, 627.0, 630.0, 630.0, 639.0, 636.0, 636.0, 636.0, 630.0, 582.0, 630.0, 630.0, 639.0, 630.0, 582.0, 636.0, 627.0, 633.0, 579.0, 633.0, 633.0, 636.0, 630.0, 627.0, 633.0, 630.0, 630.0, 630.0, 630.0, 636.0, 630.0, 587.0, 627.0, 633.0, 630.0, 587.0, 633.0, 581.0, 630.0, 633.0, 593.0, 579.0, 636.0, 633.0, 633.0, 636.0, 636.0, 636.0, 587.0, 587.0, 593.0, 630.0, 627.0, 524.0, 630.0, 582.0, 576.0, 630.0, 579.0, 567.0, 579.0, 590.0, 630.0, 630.0, 627.0, 633.0, 636.0, 582.0, 582.0, 633.0, 636.0, 587.0, 636.0, 633.0, 633.0, 587.0, 573.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 279.0, 318.0, 315.0, 292.0, 295.0, 269.0, 247.0, 286.0, 301.0, 319.0, 314.0, 304.0, 305.0, 306.0, 327.0, 291.0, 291.0, 309.0, 324.0, 282.0, 288.0, 319.0, 314.0, 288.0, 291.0, 317.0, 322.0, 321.0, 306.0, 319.0, 317.0, 267.0, 266.0, 295.0, 284.0, 326.0, 307.0, 311.0, 319.0, 319.0, 314.0, 288.0, 294.0, 313.0, 314.0, 316.0, 314.0, 321.0, 309.0, 316.0, 323.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 314.0, 316.0, 296.0, 286.0, 319.0, 311.0, 319.0, 311.0, 315.0, 324.0, 316.0, 314.0, 291.0, 291.0, 316.0, 320.0, 313.0, 314.0, 319.0, 314.0, 280.0, 299.0, 317.0, 316.0, 322.0, 311.0, 321.0, 315.0, 316.0, 314.0, 318.0, 309.0, 319.0, 314.0, 321.0, 309.0, 319.0, 311.0, 314.0, 316.0, 318.0, 312.0, 314.0, 322.0, 316.0, 314.0, 299.0, 288.0, 308.0, 319.0, 316.0, 317.0, 306.0, 324.0, 296.0, 291.0, 314.0, 319.0, 295.0, 286.0, 308.0, 322.0, 319.0, 314.0, 296.0, 297.0, 297.0, 282.0, 311.0, 325.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 314.0, 322.0, 314.0, 322.0, 299.0, 288.0, 301.0, 286.0, 302.0, 291.0, 314.0, 316.0, 319.0, 308.0, 253.0, 271.0, 313.0, 317.0, 283.0, 299.0, 277.0, 299.0, 318.0, 312.0, 288.0, 291.0, 274.0, 293.0, 291.0, 288.0, 290.0, 300.0, 318.0, 312.0, 311.0, 319.0, 313.0, 314.0, 316.0, 317.0, 314.0, 322.0, 290.0, 292.0, 300.0, 282.0, 319.0, 314.0, 319.0, 317.0, 291.0, 296.0, 324.0, 312.0, 316.0, 317.0, 316.0, 317.0, 299.0, 288.0, 279.0, 294.0, 311.0, 319.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.878542017918733, "mean_processing_ms": 0.2440427958629624, "mean_inference_ms": 1.4723767990503938}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8448000, "num_steps_sampled": 4505600, "sample_time_ms": 23873.275, "load_time_ms": 37.543, "grad_time_ms": 10389.923, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013407707447186112, "policy_loss": -0.006131558213382959, "vf_loss": 80.36180877685547, "vf_explained_var": 0.7696139812469482, "kl": 0.0018947357311844826, "entropy": 1.127698540687561, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4505600, "episodes_total": 11264, "training_iteration": 352, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-46-31", "timestamp": 1660257991, "time_this_iter_s": 34.20055317878723, "time_total_s": 16403.182090997696, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16403.182090997696, "timesteps_since_restore": 4505600, "iterations_since_restore": 352, "perf": {"cpu_util_percent": 33.57142857142857, "ram_util_percent": 58.8061224489796}}
+{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 602.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 301.135}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.07, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.25, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.53, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.04, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.43, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.65, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.04, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.04, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 582.0, 630.0, 582.0, 582.0, 576.0, 582.0, 582.0, 582.0, 579.0, 567.0, 633.0, 459.0, 630.0, 582.0, 579.0, 582.0, 636.0, 584.0, 587.0, 627.0, 579.0, 630.0, 639.0, 530.0, 582.0, 630.0, 582.0, 636.0, 513.0, 630.0, 579.0, 633.0, 633.0, 636.0, 636.0, 636.0, 587.0, 587.0, 593.0, 630.0, 627.0, 524.0, 630.0, 582.0, 576.0, 630.0, 579.0, 567.0, 579.0, 590.0, 630.0, 630.0, 627.0, 633.0, 636.0, 582.0, 582.0, 633.0, 636.0, 587.0, 636.0, 633.0, 633.0, 587.0, 573.0, 630.0, 582.0, 567.0, 633.0, 587.0, 516.0, 587.0, 633.0, 609.0, 633.0, 582.0, 633.0, 570.0, 633.0, 579.0, 639.0, 627.0, 636.0, 533.0, 579.0, 633.0, 630.0, 633.0, 582.0, 627.0, 630.0, 630.0, 639.0, 636.0, 636.0, 636.0, 630.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 255.0, 289.0, 293.0, 314.0, 316.0, 296.0, 286.0, 294.0, 288.0, 283.0, 293.0, 286.0, 296.0, 295.0, 287.0, 296.0, 286.0, 285.0, 294.0, 287.0, 280.0, 311.0, 322.0, 237.0, 222.0, 311.0, 319.0, 285.0, 297.0, 295.0, 284.0, 296.0, 286.0, 316.0, 320.0, 298.0, 286.0, 298.0, 289.0, 305.0, 322.0, 285.0, 294.0, 320.0, 310.0, 322.0, 317.0, 270.0, 260.0, 291.0, 291.0, 316.0, 314.0, 298.0, 284.0, 311.0, 325.0, 259.0, 254.0, 314.0, 316.0, 302.0, 277.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 314.0, 322.0, 314.0, 322.0, 299.0, 288.0, 301.0, 286.0, 302.0, 291.0, 314.0, 316.0, 319.0, 308.0, 253.0, 271.0, 313.0, 317.0, 283.0, 299.0, 277.0, 299.0, 318.0, 312.0, 288.0, 291.0, 274.0, 293.0, 291.0, 288.0, 290.0, 300.0, 318.0, 312.0, 311.0, 319.0, 313.0, 314.0, 316.0, 317.0, 314.0, 322.0, 290.0, 292.0, 300.0, 282.0, 319.0, 314.0, 319.0, 317.0, 291.0, 296.0, 324.0, 312.0, 316.0, 317.0, 316.0, 317.0, 299.0, 288.0, 279.0, 294.0, 311.0, 319.0, 291.0, 291.0, 288.0, 279.0, 318.0, 315.0, 292.0, 295.0, 269.0, 247.0, 286.0, 301.0, 319.0, 314.0, 304.0, 305.0, 306.0, 327.0, 291.0, 291.0, 309.0, 324.0, 282.0, 288.0, 319.0, 314.0, 288.0, 291.0, 317.0, 322.0, 321.0, 306.0, 319.0, 317.0, 267.0, 266.0, 295.0, 284.0, 326.0, 307.0, 311.0, 319.0, 319.0, 314.0, 288.0, 294.0, 313.0, 314.0, 316.0, 314.0, 321.0, 309.0, 316.0, 323.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 314.0, 316.0, 296.0, 286.0, 319.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8773547258582282, "mean_processing_ms": 0.24380845402332824, "mean_inference_ms": 1.471635610090532}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8472000, "num_steps_sampled": 4518400, "sample_time_ms": 23706.56, "load_time_ms": 37.425, "grad_time_ms": 10690.015, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011104041477665305, "policy_loss": -0.00902702659368515, "vf_loss": 84.78372955322266, "vf_explained_var": 0.7670722007751465, "kl": 0.0019426337676122785, "entropy": 1.1235073804855347, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4518400, "episodes_total": 11296, "training_iteration": 353, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-47-03", "timestamp": 1660258023, "time_this_iter_s": 31.92423105239868, "time_total_s": 16435.106322050095, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16435.106322050095, "timesteps_since_restore": 4518400, "iterations_since_restore": 353, "perf": {"cpu_util_percent": 34.76222222222222, "ram_util_percent": 59.38222222222222}}
+{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 600.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 300.055}, "custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.51, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.56, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.08, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.23, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.3, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.02, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.56, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.54, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.71, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.89, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.02, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.56, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.02, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.56, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 627.0, 579.0, 582.0, 579.0, 582.0, 633.0, 582.0, 573.0, 633.0, 630.0, 582.0, 639.0, 576.0, 582.0, 639.0, 633.0, 587.0, 627.0, 636.0, 636.0, 630.0, 627.0, 579.0, 536.0, 636.0, 627.0, 582.0, 633.0, 579.0, 582.0, 582.0, 587.0, 573.0, 630.0, 582.0, 567.0, 633.0, 587.0, 516.0, 587.0, 633.0, 609.0, 633.0, 582.0, 633.0, 570.0, 633.0, 579.0, 639.0, 627.0, 636.0, 533.0, 579.0, 633.0, 630.0, 633.0, 582.0, 627.0, 630.0, 630.0, 639.0, 636.0, 636.0, 636.0, 630.0, 582.0, 630.0, 519.0, 582.0, 630.0, 582.0, 582.0, 576.0, 582.0, 582.0, 582.0, 579.0, 567.0, 633.0, 459.0, 630.0, 582.0, 579.0, 582.0, 636.0, 584.0, 587.0, 627.0, 579.0, 630.0, 639.0, 530.0, 582.0, 630.0, 582.0, 636.0, 513.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 293.0, 313.0, 314.0, 287.0, 292.0, 291.0, 291.0, 285.0, 294.0, 296.0, 286.0, 321.0, 312.0, 293.0, 289.0, 280.0, 293.0, 324.0, 309.0, 314.0, 316.0, 298.0, 284.0, 317.0, 322.0, 283.0, 293.0, 301.0, 281.0, 322.0, 317.0, 311.0, 322.0, 293.0, 294.0, 311.0, 316.0, 319.0, 317.0, 322.0, 314.0, 318.0, 312.0, 313.0, 314.0, 288.0, 291.0, 274.0, 262.0, 316.0, 320.0, 311.0, 316.0, 286.0, 296.0, 314.0, 319.0, 289.0, 290.0, 288.0, 294.0, 291.0, 291.0, 299.0, 288.0, 279.0, 294.0, 311.0, 319.0, 291.0, 291.0, 288.0, 279.0, 318.0, 315.0, 292.0, 295.0, 269.0, 247.0, 286.0, 301.0, 319.0, 314.0, 304.0, 305.0, 306.0, 327.0, 291.0, 291.0, 309.0, 324.0, 282.0, 288.0, 319.0, 314.0, 288.0, 291.0, 317.0, 322.0, 321.0, 306.0, 319.0, 317.0, 267.0, 266.0, 295.0, 284.0, 326.0, 307.0, 311.0, 319.0, 319.0, 314.0, 288.0, 294.0, 313.0, 314.0, 316.0, 314.0, 321.0, 309.0, 316.0, 323.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 314.0, 316.0, 296.0, 286.0, 319.0, 311.0, 264.0, 255.0, 289.0, 293.0, 314.0, 316.0, 296.0, 286.0, 294.0, 288.0, 283.0, 293.0, 286.0, 296.0, 295.0, 287.0, 296.0, 286.0, 285.0, 294.0, 287.0, 280.0, 311.0, 322.0, 237.0, 222.0, 311.0, 319.0, 285.0, 297.0, 295.0, 284.0, 296.0, 286.0, 316.0, 320.0, 298.0, 286.0, 298.0, 289.0, 305.0, 322.0, 285.0, 294.0, 320.0, 310.0, 322.0, 317.0, 270.0, 260.0, 291.0, 291.0, 316.0, 314.0, 298.0, 284.0, 311.0, 325.0, 259.0, 254.0, 314.0, 316.0, 302.0, 277.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8761658335288303, "mean_processing_ms": 0.24357312011055882, "mean_inference_ms": 1.4706370891819096}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8496000, "num_steps_sampled": 4531200, "sample_time_ms": 23737.686, "load_time_ms": 37.309, "grad_time_ms": 10476.005, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0014189122011885047, "policy_loss": -0.006124518811702728, "vf_loss": 81.1131591796875, "vf_explained_var": 0.7619540095329285, "kl": 0.002155100228264928, "entropy": 1.1357545852661133, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4531200, "episodes_total": 11328, "training_iteration": 354, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-47-35", "timestamp": 1660258055, "time_this_iter_s": 31.991327047348022, "time_total_s": 16467.097649097443, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16467.097649097443, "timesteps_since_restore": 4531200, "iterations_since_restore": 354, "perf": {"cpu_util_percent": 34.031111111111116, "ram_util_percent": 59.01333333333334}}
+{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 601.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 300.845}, "custom_metrics": {"sparse_reward_mean": 208.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.29, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.61, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.08, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.33, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.02, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.79, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.02, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.02, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 636.0, 567.0, 570.0, 639.0, 636.0, 587.0, 630.0, 630.0, 636.0, 633.0, 587.0, 639.0, 570.0, 630.0, 636.0, 582.0, 587.0, 633.0, 639.0, 630.0, 633.0, 582.0, 639.0, 579.0, 587.0, 630.0, 587.0, 633.0, 627.0, 576.0, 630.0, 636.0, 630.0, 582.0, 630.0, 519.0, 582.0, 630.0, 582.0, 582.0, 576.0, 582.0, 582.0, 582.0, 579.0, 567.0, 633.0, 459.0, 630.0, 582.0, 579.0, 582.0, 636.0, 584.0, 587.0, 627.0, 579.0, 630.0, 639.0, 530.0, 582.0, 630.0, 582.0, 636.0, 513.0, 630.0, 579.0, 587.0, 627.0, 579.0, 582.0, 579.0, 582.0, 633.0, 582.0, 573.0, 633.0, 630.0, 582.0, 639.0, 576.0, 582.0, 639.0, 633.0, 587.0, 627.0, 636.0, 636.0, 630.0, 627.0, 579.0, 536.0, 636.0, 627.0, 582.0, 633.0, 579.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [299.0, 283.0, 316.0, 320.0, 284.0, 283.0, 278.0, 292.0, 317.0, 322.0, 316.0, 320.0, 293.0, 294.0, 311.0, 319.0, 323.0, 307.0, 311.0, 325.0, 319.0, 314.0, 296.0, 291.0, 324.0, 315.0, 285.0, 285.0, 323.0, 307.0, 324.0, 312.0, 296.0, 286.0, 289.0, 298.0, 319.0, 314.0, 322.0, 317.0, 326.0, 304.0, 316.0, 317.0, 288.0, 294.0, 319.0, 320.0, 288.0, 291.0, 291.0, 296.0, 314.0, 316.0, 299.0, 288.0, 308.0, 325.0, 313.0, 314.0, 286.0, 290.0, 324.0, 306.0, 319.0, 317.0, 314.0, 316.0, 296.0, 286.0, 319.0, 311.0, 264.0, 255.0, 289.0, 293.0, 314.0, 316.0, 296.0, 286.0, 294.0, 288.0, 283.0, 293.0, 286.0, 296.0, 295.0, 287.0, 296.0, 286.0, 285.0, 294.0, 287.0, 280.0, 311.0, 322.0, 237.0, 222.0, 311.0, 319.0, 285.0, 297.0, 295.0, 284.0, 296.0, 286.0, 316.0, 320.0, 298.0, 286.0, 298.0, 289.0, 305.0, 322.0, 285.0, 294.0, 320.0, 310.0, 322.0, 317.0, 270.0, 260.0, 291.0, 291.0, 316.0, 314.0, 298.0, 284.0, 311.0, 325.0, 259.0, 254.0, 314.0, 316.0, 302.0, 277.0, 294.0, 293.0, 313.0, 314.0, 287.0, 292.0, 291.0, 291.0, 285.0, 294.0, 296.0, 286.0, 321.0, 312.0, 293.0, 289.0, 280.0, 293.0, 324.0, 309.0, 314.0, 316.0, 298.0, 284.0, 317.0, 322.0, 283.0, 293.0, 301.0, 281.0, 322.0, 317.0, 311.0, 322.0, 293.0, 294.0, 311.0, 316.0, 319.0, 317.0, 322.0, 314.0, 318.0, 312.0, 313.0, 314.0, 288.0, 291.0, 274.0, 262.0, 316.0, 320.0, 311.0, 316.0, 286.0, 296.0, 314.0, 319.0, 289.0, 290.0, 288.0, 294.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.874971949450553, "mean_processing_ms": 0.24333505417716514, "mean_inference_ms": 1.4694074728869129}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8520000, "num_steps_sampled": 4544000, "sample_time_ms": 22906.057, "load_time_ms": 36.777, "grad_time_ms": 10398.662, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002145820064470172, "policy_loss": -0.005216358229517937, "vf_loss": 79.28690338134766, "vf_explained_var": 0.7675671577453613, "kl": 0.0018057804554700851, "entropy": 1.1330245733261108, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4544000, "episodes_total": 11360, "training_iteration": 355, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-48-06", "timestamp": 1660258086, "time_this_iter_s": 30.381834983825684, "time_total_s": 16497.47948408127, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16497.47948408127, "timesteps_since_restore": 4544000, "iterations_since_restore": 355, "perf": {"cpu_util_percent": 34.25348837209302, "ram_util_percent": 58.85813953488373}}
+{"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 605.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 302.665}, "custom_metrics": {"sparse_reward_mean": 209.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.53, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.51, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.29, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.74, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.87, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.54, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.72, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.9, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.86, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.87, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.87, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 633.0, 582.0, 590.0, 636.0, 627.0, 587.0, 630.0, 636.0, 630.0, 630.0, 630.0, 582.0, 582.0, 539.0, 582.0, 582.0, 576.0, 590.0, 627.0, 582.0, 627.0, 630.0, 579.0, 639.0, 582.0, 633.0, 576.0, 579.0, 582.0, 584.0, 636.0, 513.0, 630.0, 579.0, 587.0, 627.0, 579.0, 582.0, 579.0, 582.0, 633.0, 582.0, 573.0, 633.0, 630.0, 582.0, 639.0, 576.0, 582.0, 639.0, 633.0, 587.0, 627.0, 636.0, 636.0, 630.0, 627.0, 579.0, 536.0, 636.0, 627.0, 582.0, 633.0, 579.0, 582.0, 582.0, 582.0, 636.0, 567.0, 570.0, 639.0, 636.0, 587.0, 630.0, 630.0, 636.0, 633.0, 587.0, 639.0, 570.0, 630.0, 636.0, 582.0, 587.0, 633.0, 639.0, 630.0, 633.0, 582.0, 639.0, 579.0, 587.0, 630.0, 587.0, 633.0, 627.0, 576.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 294.0, 288.0, 316.0, 317.0, 293.0, 289.0, 288.0, 302.0, 319.0, 317.0, 311.0, 316.0, 301.0, 286.0, 318.0, 312.0, 316.0, 320.0, 316.0, 314.0, 316.0, 314.0, 316.0, 314.0, 289.0, 293.0, 288.0, 294.0, 265.0, 274.0, 283.0, 299.0, 288.0, 294.0, 274.0, 302.0, 296.0, 294.0, 321.0, 306.0, 294.0, 288.0, 308.0, 319.0, 321.0, 309.0, 287.0, 292.0, 320.0, 319.0, 301.0, 281.0, 318.0, 315.0, 293.0, 283.0, 290.0, 289.0, 291.0, 291.0, 296.0, 288.0, 311.0, 325.0, 259.0, 254.0, 314.0, 316.0, 302.0, 277.0, 294.0, 293.0, 313.0, 314.0, 287.0, 292.0, 291.0, 291.0, 285.0, 294.0, 296.0, 286.0, 321.0, 312.0, 293.0, 289.0, 280.0, 293.0, 324.0, 309.0, 314.0, 316.0, 298.0, 284.0, 317.0, 322.0, 283.0, 293.0, 301.0, 281.0, 322.0, 317.0, 311.0, 322.0, 293.0, 294.0, 311.0, 316.0, 319.0, 317.0, 322.0, 314.0, 318.0, 312.0, 313.0, 314.0, 288.0, 291.0, 274.0, 262.0, 316.0, 320.0, 311.0, 316.0, 286.0, 296.0, 314.0, 319.0, 289.0, 290.0, 288.0, 294.0, 291.0, 291.0, 299.0, 283.0, 316.0, 320.0, 284.0, 283.0, 278.0, 292.0, 317.0, 322.0, 316.0, 320.0, 293.0, 294.0, 311.0, 319.0, 323.0, 307.0, 311.0, 325.0, 319.0, 314.0, 296.0, 291.0, 324.0, 315.0, 285.0, 285.0, 323.0, 307.0, 324.0, 312.0, 296.0, 286.0, 289.0, 298.0, 319.0, 314.0, 322.0, 317.0, 326.0, 304.0, 316.0, 317.0, 288.0, 294.0, 319.0, 320.0, 288.0, 291.0, 291.0, 296.0, 314.0, 316.0, 299.0, 288.0, 308.0, 325.0, 313.0, 314.0, 286.0, 290.0, 324.0, 306.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8737833276000079, "mean_processing_ms": 0.243097780431969, "mean_inference_ms": 1.4681762334073296}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8544000, "num_steps_sampled": 4556800, "sample_time_ms": 22541.516, "load_time_ms": 36.891, "grad_time_ms": 10138.825, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00407541124150157, "policy_loss": -0.0034555860329419374, "vf_loss": 80.97249603271484, "vf_explained_var": 0.7684476375579834, "kl": 0.001921386457979679, "entropy": 1.1324900388717651, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4556800, "episodes_total": 11392, "training_iteration": 356, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-48-36", "timestamp": 1660258116, "time_this_iter_s": 30.78407096862793, "time_total_s": 16528.263555049896, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16528.263555049896, "timesteps_since_restore": 4556800, "iterations_since_restore": 356, "perf": {"cpu_util_percent": 32.61818181818182, "ram_util_percent": 59.45681818181819}}
+{"episode_reward_max": 639.0, "episode_reward_min": 539.0, "episode_reward_mean": 609.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 265.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 304.845}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.69, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.62, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.37, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.4, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.91, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.16, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.96, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.46, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.69, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.16, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.96, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.16, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.96, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 636.0, 639.0, 627.0, 630.0, 582.0, 633.0, 639.0, 573.0, 636.0, 579.0, 633.0, 636.0, 633.0, 636.0, 579.0, 582.0, 579.0, 630.0, 587.0, 636.0, 582.0, 636.0, 633.0, 627.0, 630.0, 639.0, 584.0, 627.0, 627.0, 576.0, 633.0, 579.0, 582.0, 582.0, 582.0, 636.0, 567.0, 570.0, 639.0, 636.0, 587.0, 630.0, 630.0, 636.0, 633.0, 587.0, 639.0, 570.0, 630.0, 636.0, 582.0, 587.0, 633.0, 639.0, 630.0, 633.0, 582.0, 639.0, 579.0, 587.0, 630.0, 587.0, 633.0, 627.0, 576.0, 630.0, 630.0, 582.0, 633.0, 582.0, 590.0, 636.0, 627.0, 587.0, 630.0, 636.0, 630.0, 630.0, 630.0, 582.0, 582.0, 539.0, 582.0, 582.0, 576.0, 590.0, 627.0, 582.0, 627.0, 630.0, 579.0, 639.0, 582.0, 633.0, 576.0, 579.0, 582.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 316.0, 317.0, 319.0, 317.0, 325.0, 314.0, 313.0, 314.0, 321.0, 309.0, 296.0, 286.0, 313.0, 320.0, 317.0, 322.0, 287.0, 286.0, 312.0, 324.0, 286.0, 293.0, 321.0, 312.0, 314.0, 322.0, 314.0, 319.0, 316.0, 320.0, 291.0, 288.0, 293.0, 289.0, 292.0, 287.0, 316.0, 314.0, 288.0, 299.0, 322.0, 314.0, 286.0, 296.0, 316.0, 320.0, 314.0, 319.0, 313.0, 314.0, 316.0, 314.0, 322.0, 317.0, 290.0, 294.0, 311.0, 316.0, 319.0, 308.0, 293.0, 283.0, 314.0, 319.0, 289.0, 290.0, 288.0, 294.0, 291.0, 291.0, 299.0, 283.0, 316.0, 320.0, 284.0, 283.0, 278.0, 292.0, 317.0, 322.0, 316.0, 320.0, 293.0, 294.0, 311.0, 319.0, 323.0, 307.0, 311.0, 325.0, 319.0, 314.0, 296.0, 291.0, 324.0, 315.0, 285.0, 285.0, 323.0, 307.0, 324.0, 312.0, 296.0, 286.0, 289.0, 298.0, 319.0, 314.0, 322.0, 317.0, 326.0, 304.0, 316.0, 317.0, 288.0, 294.0, 319.0, 320.0, 288.0, 291.0, 291.0, 296.0, 314.0, 316.0, 299.0, 288.0, 308.0, 325.0, 313.0, 314.0, 286.0, 290.0, 324.0, 306.0, 313.0, 317.0, 294.0, 288.0, 316.0, 317.0, 293.0, 289.0, 288.0, 302.0, 319.0, 317.0, 311.0, 316.0, 301.0, 286.0, 318.0, 312.0, 316.0, 320.0, 316.0, 314.0, 316.0, 314.0, 316.0, 314.0, 289.0, 293.0, 288.0, 294.0, 265.0, 274.0, 283.0, 299.0, 288.0, 294.0, 274.0, 302.0, 296.0, 294.0, 321.0, 306.0, 294.0, 288.0, 308.0, 319.0, 321.0, 309.0, 287.0, 292.0, 320.0, 319.0, 301.0, 281.0, 318.0, 315.0, 293.0, 283.0, 290.0, 289.0, 291.0, 291.0, 296.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8725992979631204, "mean_processing_ms": 0.24286049780551075, "mean_inference_ms": 1.4669091441555409}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8568000, "num_steps_sampled": 4569600, "sample_time_ms": 22487.797, "load_time_ms": 37.05, "grad_time_ms": 10111.337, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0030587399378418922, "policy_loss": -0.004343332722783089, "vf_loss": 79.63693237304688, "vf_explained_var": 0.7643921971321106, "kl": 0.0021028893534094095, "entropy": 1.1232417821884155, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4569600, "episodes_total": 11424, "training_iteration": 357, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-49-07", "timestamp": 1660258147, "time_this_iter_s": 30.161853790283203, "time_total_s": 16558.42540884018, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16558.42540884018, "timesteps_since_restore": 4569600, "iterations_since_restore": 357, "perf": {"cpu_util_percent": 33.550000000000004, "ram_util_percent": 58.85714285714285}}
+{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 606.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.095}, "custom_metrics": {"sparse_reward_mean": 210.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.19, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.4, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.42, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.17, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.86, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.9, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.76, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.9, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.9, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 582.0, 627.0, 639.0, 579.0, 579.0, 587.0, 630.0, 579.0, 627.0, 582.0, 636.0, 630.0, 624.0, 518.0, 570.0, 636.0, 576.0, 587.0, 624.0, 636.0, 587.0, 627.0, 630.0, 630.0, 584.0, 579.0, 636.0, 576.0, 630.0, 578.0, 467.0, 633.0, 627.0, 576.0, 630.0, 630.0, 582.0, 633.0, 582.0, 590.0, 636.0, 627.0, 587.0, 630.0, 636.0, 630.0, 630.0, 630.0, 582.0, 582.0, 539.0, 582.0, 582.0, 576.0, 590.0, 627.0, 582.0, 627.0, 630.0, 579.0, 639.0, 582.0, 633.0, 576.0, 579.0, 582.0, 584.0, 636.0, 633.0, 636.0, 639.0, 627.0, 630.0, 582.0, 633.0, 639.0, 573.0, 636.0, 579.0, 633.0, 636.0, 633.0, 636.0, 579.0, 582.0, 579.0, 630.0, 587.0, 636.0, 582.0, 636.0, 633.0, 627.0, 630.0, 639.0, 584.0, 627.0, 627.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 288.0, 293.0, 289.0, 313.0, 314.0, 322.0, 317.0, 296.0, 283.0, 290.0, 289.0, 288.0, 299.0, 316.0, 314.0, 291.0, 288.0, 318.0, 309.0, 288.0, 294.0, 324.0, 312.0, 316.0, 314.0, 302.0, 322.0, 247.0, 271.0, 285.0, 285.0, 319.0, 317.0, 291.0, 285.0, 296.0, 291.0, 313.0, 311.0, 321.0, 315.0, 301.0, 286.0, 308.0, 319.0, 322.0, 308.0, 312.0, 318.0, 286.0, 298.0, 288.0, 291.0, 319.0, 317.0, 285.0, 291.0, 318.0, 312.0, 292.0, 286.0, 223.0, 244.0, 308.0, 325.0, 313.0, 314.0, 286.0, 290.0, 324.0, 306.0, 313.0, 317.0, 294.0, 288.0, 316.0, 317.0, 293.0, 289.0, 288.0, 302.0, 319.0, 317.0, 311.0, 316.0, 301.0, 286.0, 318.0, 312.0, 316.0, 320.0, 316.0, 314.0, 316.0, 314.0, 316.0, 314.0, 289.0, 293.0, 288.0, 294.0, 265.0, 274.0, 283.0, 299.0, 288.0, 294.0, 274.0, 302.0, 296.0, 294.0, 321.0, 306.0, 294.0, 288.0, 308.0, 319.0, 321.0, 309.0, 287.0, 292.0, 320.0, 319.0, 301.0, 281.0, 318.0, 315.0, 293.0, 283.0, 290.0, 289.0, 291.0, 291.0, 296.0, 288.0, 319.0, 317.0, 316.0, 317.0, 319.0, 317.0, 325.0, 314.0, 313.0, 314.0, 321.0, 309.0, 296.0, 286.0, 313.0, 320.0, 317.0, 322.0, 287.0, 286.0, 312.0, 324.0, 286.0, 293.0, 321.0, 312.0, 314.0, 322.0, 314.0, 319.0, 316.0, 320.0, 291.0, 288.0, 293.0, 289.0, 292.0, 287.0, 316.0, 314.0, 288.0, 299.0, 322.0, 314.0, 286.0, 296.0, 316.0, 320.0, 314.0, 319.0, 313.0, 314.0, 316.0, 314.0, 322.0, 317.0, 290.0, 294.0, 311.0, 316.0, 319.0, 308.0, 293.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 0.871423269952275, "mean_processing_ms": 0.24262389296401352, "mean_inference_ms": 1.4655941462374882}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8592000, "num_steps_sampled": 4582400, "sample_time_ms": 22265.267, "load_time_ms": 37.124, "grad_time_ms": 9998.565, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00020107599266339093, "policy_loss": -0.007807094603776932, "vf_loss": 81.7130355834961, "vf_explained_var": 0.7695291638374329, "kl": 0.0018338669324293733, "entropy": 1.1305631399154663, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4582400, "episodes_total": 11456, "training_iteration": 358, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-49-36", "timestamp": 1660258176, "time_this_iter_s": 28.821206092834473, "time_total_s": 16587.246614933014, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16587.246614933014, "timesteps_since_restore": 4582400, "iterations_since_restore": 358, "perf": {"cpu_util_percent": 32.69024390243902, "ram_util_percent": 58.856097560975606}}
+{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 604.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 302.47}, "custom_metrics": {"sparse_reward_mean": 209.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.74, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.77, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.18, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.56, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.29, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.59, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.37, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.81, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.41, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.29, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.59, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.29, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.59, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 630.0, 630.0, 615.0, 579.0, 579.0, 587.0, 633.0, 630.0, 636.0, 584.0, 582.0, 636.0, 633.0, 576.0, 630.0, 581.0, 579.0, 579.0, 636.0, 630.0, 587.0, 576.0, 584.0, 630.0, 587.0, 576.0, 633.0, 582.0, 636.0, 579.0, 582.0, 576.0, 579.0, 582.0, 584.0, 636.0, 633.0, 636.0, 639.0, 627.0, 630.0, 582.0, 633.0, 639.0, 573.0, 636.0, 579.0, 633.0, 636.0, 633.0, 636.0, 579.0, 582.0, 579.0, 630.0, 587.0, 636.0, 582.0, 636.0, 633.0, 627.0, 630.0, 639.0, 584.0, 627.0, 627.0, 576.0, 570.0, 582.0, 627.0, 639.0, 579.0, 579.0, 587.0, 630.0, 579.0, 627.0, 582.0, 636.0, 630.0, 624.0, 518.0, 570.0, 636.0, 576.0, 587.0, 624.0, 636.0, 587.0, 627.0, 630.0, 630.0, 584.0, 579.0, 636.0, 576.0, 630.0, 578.0, 467.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [299.0, 280.0, 306.0, 324.0, 311.0, 319.0, 308.0, 307.0, 288.0, 291.0, 288.0, 291.0, 288.0, 299.0, 308.0, 325.0, 314.0, 316.0, 319.0, 317.0, 290.0, 294.0, 291.0, 291.0, 321.0, 315.0, 319.0, 314.0, 286.0, 290.0, 306.0, 324.0, 288.0, 293.0, 290.0, 289.0, 299.0, 280.0, 322.0, 314.0, 313.0, 317.0, 283.0, 304.0, 285.0, 291.0, 293.0, 291.0, 324.0, 306.0, 288.0, 299.0, 280.0, 296.0, 314.0, 319.0, 291.0, 291.0, 317.0, 319.0, 294.0, 285.0, 296.0, 286.0, 293.0, 283.0, 290.0, 289.0, 291.0, 291.0, 296.0, 288.0, 319.0, 317.0, 316.0, 317.0, 319.0, 317.0, 325.0, 314.0, 313.0, 314.0, 321.0, 309.0, 296.0, 286.0, 313.0, 320.0, 317.0, 322.0, 287.0, 286.0, 312.0, 324.0, 286.0, 293.0, 321.0, 312.0, 314.0, 322.0, 314.0, 319.0, 316.0, 320.0, 291.0, 288.0, 293.0, 289.0, 292.0, 287.0, 316.0, 314.0, 288.0, 299.0, 322.0, 314.0, 286.0, 296.0, 316.0, 320.0, 314.0, 319.0, 313.0, 314.0, 316.0, 314.0, 322.0, 317.0, 290.0, 294.0, 311.0, 316.0, 319.0, 308.0, 293.0, 283.0, 282.0, 288.0, 293.0, 289.0, 313.0, 314.0, 322.0, 317.0, 296.0, 283.0, 290.0, 289.0, 288.0, 299.0, 316.0, 314.0, 291.0, 288.0, 318.0, 309.0, 288.0, 294.0, 324.0, 312.0, 316.0, 314.0, 302.0, 322.0, 247.0, 271.0, 285.0, 285.0, 319.0, 317.0, 291.0, 285.0, 296.0, 291.0, 313.0, 311.0, 321.0, 315.0, 301.0, 286.0, 308.0, 319.0, 322.0, 308.0, 312.0, 318.0, 286.0, 298.0, 288.0, 291.0, 319.0, 317.0, 285.0, 291.0, 318.0, 312.0, 292.0, 286.0, 223.0, 244.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8702563301594608, "mean_processing_ms": 0.24238978711918313, "mean_inference_ms": 1.4642680017402931}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8616000, "num_steps_sampled": 4595200, "sample_time_ms": 22073.451, "load_time_ms": 37.129, "grad_time_ms": 9911.472, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011308585526421666, "policy_loss": -0.006429137196391821, "vf_loss": 81.29705047607422, "vf_explained_var": 0.7726876139640808, "kl": 0.0017355438321828842, "entropy": 1.139426350593567, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4595200, "episodes_total": 11488, "training_iteration": 359, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-50-06", "timestamp": 1660258206, "time_this_iter_s": 30.0307719707489, "time_total_s": 16617.277386903763, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16617.277386903763, "timesteps_since_restore": 4595200, "iterations_since_restore": 359, "perf": {"cpu_util_percent": 36.002325581395354, "ram_util_percent": 58.82325581395349}}
+{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 605.46, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 302.73}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.86, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.85, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.09, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.61, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.33, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.59, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.91, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.77, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.33, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.59, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.33, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.59, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 593.0, 633.0, 639.0, 627.0, 582.0, 587.0, 587.0, 639.0, 636.0, 582.0, 630.0, 633.0, 633.0, 630.0, 582.0, 627.0, 633.0, 639.0, 636.0, 639.0, 521.0, 633.0, 636.0, 630.0, 582.0, 558.0, 576.0, 639.0, 636.0, 624.0, 636.0, 584.0, 627.0, 627.0, 576.0, 570.0, 582.0, 627.0, 639.0, 579.0, 579.0, 587.0, 630.0, 579.0, 627.0, 582.0, 636.0, 630.0, 624.0, 518.0, 570.0, 636.0, 576.0, 587.0, 624.0, 636.0, 587.0, 627.0, 630.0, 630.0, 584.0, 579.0, 636.0, 576.0, 630.0, 578.0, 467.0, 579.0, 630.0, 630.0, 615.0, 579.0, 579.0, 587.0, 633.0, 630.0, 636.0, 584.0, 582.0, 636.0, 633.0, 576.0, 630.0, 581.0, 579.0, 579.0, 636.0, 630.0, 587.0, 576.0, 584.0, 630.0, 587.0, 576.0, 633.0, 582.0, 636.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 293.0, 300.0, 311.0, 322.0, 322.0, 317.0, 316.0, 311.0, 289.0, 293.0, 294.0, 293.0, 288.0, 299.0, 324.0, 315.0, 319.0, 317.0, 296.0, 286.0, 309.0, 321.0, 319.0, 314.0, 320.0, 313.0, 319.0, 311.0, 293.0, 289.0, 318.0, 309.0, 317.0, 316.0, 324.0, 315.0, 314.0, 322.0, 319.0, 320.0, 256.0, 265.0, 316.0, 317.0, 314.0, 322.0, 311.0, 319.0, 294.0, 288.0, 281.0, 277.0, 276.0, 300.0, 319.0, 320.0, 314.0, 322.0, 310.0, 314.0, 316.0, 320.0, 290.0, 294.0, 311.0, 316.0, 319.0, 308.0, 293.0, 283.0, 282.0, 288.0, 293.0, 289.0, 313.0, 314.0, 322.0, 317.0, 296.0, 283.0, 290.0, 289.0, 288.0, 299.0, 316.0, 314.0, 291.0, 288.0, 318.0, 309.0, 288.0, 294.0, 324.0, 312.0, 316.0, 314.0, 302.0, 322.0, 247.0, 271.0, 285.0, 285.0, 319.0, 317.0, 291.0, 285.0, 296.0, 291.0, 313.0, 311.0, 321.0, 315.0, 301.0, 286.0, 308.0, 319.0, 322.0, 308.0, 312.0, 318.0, 286.0, 298.0, 288.0, 291.0, 319.0, 317.0, 285.0, 291.0, 318.0, 312.0, 292.0, 286.0, 223.0, 244.0, 299.0, 280.0, 306.0, 324.0, 311.0, 319.0, 308.0, 307.0, 288.0, 291.0, 288.0, 291.0, 288.0, 299.0, 308.0, 325.0, 314.0, 316.0, 319.0, 317.0, 290.0, 294.0, 291.0, 291.0, 321.0, 315.0, 319.0, 314.0, 286.0, 290.0, 306.0, 324.0, 288.0, 293.0, 290.0, 289.0, 299.0, 280.0, 322.0, 314.0, 313.0, 317.0, 283.0, 304.0, 285.0, 291.0, 293.0, 291.0, 324.0, 306.0, 288.0, 299.0, 280.0, 296.0, 314.0, 319.0, 291.0, 291.0, 317.0, 319.0, 294.0, 285.0, 296.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 0.86909953447402, "mean_processing_ms": 0.24215862097735263, "mean_inference_ms": 1.4630909533739367}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8640000, "num_steps_sampled": 4608000, "sample_time_ms": 21874.868, "load_time_ms": 37.48, "grad_time_ms": 9746.645, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013312319060787559, "policy_loss": -0.005779942963272333, "vf_loss": 76.8069839477539, "vf_explained_var": 0.7698413729667664, "kl": 0.001875889953225851, "entropy": 1.1390520334243774, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4608000, "episodes_total": 11520, "training_iteration": 360, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-50-38", "timestamp": 1660258238, "time_this_iter_s": 32.26046180725098, "time_total_s": 16649.537848711014, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16649.537848711014, "timesteps_since_restore": 4608000, "iterations_since_restore": 360, "perf": {"cpu_util_percent": 32.559999999999995, "ram_util_percent": 58.84666666666664}}
+{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 605.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 302.965}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.33, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.2, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.6, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.5, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.42, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.75, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 630.0, 633.0, 582.0, 636.0, 590.0, 630.0, 587.0, 636.0, 576.0, 630.0, 630.0, 582.0, 576.0, 630.0, 582.0, 587.0, 633.0, 582.0, 636.0, 630.0, 582.0, 587.0, 630.0, 573.0, 630.0, 630.0, 525.0, 579.0, 627.0, 633.0, 576.0, 630.0, 578.0, 467.0, 579.0, 630.0, 630.0, 615.0, 579.0, 579.0, 587.0, 633.0, 630.0, 636.0, 584.0, 582.0, 636.0, 633.0, 576.0, 630.0, 581.0, 579.0, 579.0, 636.0, 630.0, 587.0, 576.0, 584.0, 630.0, 587.0, 576.0, 633.0, 582.0, 636.0, 579.0, 582.0, 636.0, 593.0, 633.0, 639.0, 627.0, 582.0, 587.0, 587.0, 639.0, 636.0, 582.0, 630.0, 633.0, 633.0, 630.0, 582.0, 627.0, 633.0, 639.0, 636.0, 639.0, 521.0, 633.0, 636.0, 630.0, 582.0, 558.0, 576.0, 639.0, 636.0, 624.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 282.0, 297.0, 319.0, 311.0, 321.0, 312.0, 286.0, 296.0, 314.0, 322.0, 296.0, 294.0, 311.0, 319.0, 298.0, 289.0, 322.0, 314.0, 285.0, 291.0, 313.0, 317.0, 316.0, 314.0, 291.0, 291.0, 298.0, 278.0, 308.0, 322.0, 293.0, 289.0, 293.0, 294.0, 316.0, 317.0, 291.0, 291.0, 319.0, 317.0, 321.0, 309.0, 290.0, 292.0, 296.0, 291.0, 316.0, 314.0, 287.0, 286.0, 313.0, 317.0, 318.0, 312.0, 265.0, 260.0, 287.0, 292.0, 314.0, 313.0, 316.0, 317.0, 285.0, 291.0, 318.0, 312.0, 292.0, 286.0, 223.0, 244.0, 299.0, 280.0, 306.0, 324.0, 311.0, 319.0, 308.0, 307.0, 288.0, 291.0, 288.0, 291.0, 288.0, 299.0, 308.0, 325.0, 314.0, 316.0, 319.0, 317.0, 290.0, 294.0, 291.0, 291.0, 321.0, 315.0, 319.0, 314.0, 286.0, 290.0, 306.0, 324.0, 288.0, 293.0, 290.0, 289.0, 299.0, 280.0, 322.0, 314.0, 313.0, 317.0, 283.0, 304.0, 285.0, 291.0, 293.0, 291.0, 324.0, 306.0, 288.0, 299.0, 280.0, 296.0, 314.0, 319.0, 291.0, 291.0, 317.0, 319.0, 294.0, 285.0, 296.0, 286.0, 314.0, 322.0, 293.0, 300.0, 311.0, 322.0, 322.0, 317.0, 316.0, 311.0, 289.0, 293.0, 294.0, 293.0, 288.0, 299.0, 324.0, 315.0, 319.0, 317.0, 296.0, 286.0, 309.0, 321.0, 319.0, 314.0, 320.0, 313.0, 319.0, 311.0, 293.0, 289.0, 318.0, 309.0, 317.0, 316.0, 324.0, 315.0, 314.0, 322.0, 319.0, 320.0, 256.0, 265.0, 316.0, 317.0, 314.0, 322.0, 311.0, 319.0, 294.0, 288.0, 281.0, 277.0, 276.0, 300.0, 319.0, 320.0, 314.0, 322.0, 310.0, 314.0, 316.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8679613450602781, "mean_processing_ms": 0.24193358188716718, "mean_inference_ms": 1.4622622611677922}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8664000, "num_steps_sampled": 4620800, "sample_time_ms": 21811.818, "load_time_ms": 37.251, "grad_time_ms": 9734.507, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007378067821264267, "policy_loss": -0.006473819259554148, "vf_loss": 77.80099487304688, "vf_explained_var": 0.7715656161308289, "kl": 0.0017216805135831237, "entropy": 1.1369411945343018, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4620800, "episodes_total": 11552, "training_iteration": 361, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-51-13", "timestamp": 1660258273, "time_this_iter_s": 35.53581404685974, "time_total_s": 16685.073662757874, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16685.073662757874, "timesteps_since_restore": 4620800, "iterations_since_restore": 361, "perf": {"cpu_util_percent": 32.418, "ram_util_percent": 58.85999999999999}}
+{"episode_reward_max": 639.0, "episode_reward_min": 521.0, "episode_reward_mean": 608.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.125}, "custom_metrics": {"sparse_reward_mean": 210.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.05, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.58, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.71, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.21, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.81, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.74, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.21, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.81, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.21, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.81, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [624.0, 587.0, 582.0, 582.0, 579.0, 636.0, 630.0, 567.0, 630.0, 633.0, 636.0, 582.0, 621.0, 630.0, 587.0, 587.0, 639.0, 630.0, 587.0, 587.0, 587.0, 630.0, 636.0, 627.0, 561.0, 627.0, 630.0, 636.0, 582.0, 582.0, 579.0, 587.0, 582.0, 636.0, 579.0, 582.0, 636.0, 593.0, 633.0, 639.0, 627.0, 582.0, 587.0, 587.0, 639.0, 636.0, 582.0, 630.0, 633.0, 633.0, 630.0, 582.0, 627.0, 633.0, 639.0, 636.0, 639.0, 521.0, 633.0, 636.0, 630.0, 582.0, 558.0, 576.0, 639.0, 636.0, 624.0, 636.0, 579.0, 579.0, 630.0, 633.0, 582.0, 636.0, 590.0, 630.0, 587.0, 636.0, 576.0, 630.0, 630.0, 582.0, 576.0, 630.0, 582.0, 587.0, 633.0, 582.0, 636.0, 630.0, 582.0, 587.0, 630.0, 573.0, 630.0, 630.0, 525.0, 579.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [306.0, 318.0, 296.0, 291.0, 288.0, 294.0, 287.0, 295.0, 288.0, 291.0, 319.0, 317.0, 316.0, 314.0, 281.0, 286.0, 321.0, 309.0, 316.0, 317.0, 319.0, 317.0, 294.0, 288.0, 307.0, 314.0, 308.0, 322.0, 291.0, 296.0, 285.0, 302.0, 314.0, 325.0, 313.0, 317.0, 295.0, 292.0, 293.0, 294.0, 294.0, 293.0, 311.0, 319.0, 321.0, 315.0, 316.0, 311.0, 284.0, 277.0, 316.0, 311.0, 311.0, 319.0, 320.0, 316.0, 294.0, 288.0, 297.0, 285.0, 282.0, 297.0, 304.0, 283.0, 291.0, 291.0, 317.0, 319.0, 294.0, 285.0, 296.0, 286.0, 314.0, 322.0, 293.0, 300.0, 311.0, 322.0, 322.0, 317.0, 316.0, 311.0, 289.0, 293.0, 294.0, 293.0, 288.0, 299.0, 324.0, 315.0, 319.0, 317.0, 296.0, 286.0, 309.0, 321.0, 319.0, 314.0, 320.0, 313.0, 319.0, 311.0, 293.0, 289.0, 318.0, 309.0, 317.0, 316.0, 324.0, 315.0, 314.0, 322.0, 319.0, 320.0, 256.0, 265.0, 316.0, 317.0, 314.0, 322.0, 311.0, 319.0, 294.0, 288.0, 281.0, 277.0, 276.0, 300.0, 319.0, 320.0, 314.0, 322.0, 310.0, 314.0, 316.0, 320.0, 288.0, 291.0, 282.0, 297.0, 319.0, 311.0, 321.0, 312.0, 286.0, 296.0, 314.0, 322.0, 296.0, 294.0, 311.0, 319.0, 298.0, 289.0, 322.0, 314.0, 285.0, 291.0, 313.0, 317.0, 316.0, 314.0, 291.0, 291.0, 298.0, 278.0, 308.0, 322.0, 293.0, 289.0, 293.0, 294.0, 316.0, 317.0, 291.0, 291.0, 319.0, 317.0, 321.0, 309.0, 290.0, 292.0, 296.0, 291.0, 316.0, 314.0, 287.0, 286.0, 313.0, 317.0, 318.0, 312.0, 265.0, 260.0, 287.0, 292.0, 314.0, 313.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8668385855769685, "mean_processing_ms": 0.2417133493697533, "mean_inference_ms": 1.4618269528034153}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8688000, "num_steps_sampled": 4633600, "sample_time_ms": 22112.322, "load_time_ms": 37.291, "grad_time_ms": 9542.038, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006689311121590436, "policy_loss": -0.006603518966585398, "vf_loss": 78.373291015625, "vf_explained_var": 0.7716686129570007, "kl": 0.0016899490728974342, "entropy": 1.1297602653503418, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4633600, "episodes_total": 11584, "training_iteration": 362, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-51-49", "timestamp": 1660258309, "time_this_iter_s": 35.281026124954224, "time_total_s": 16720.354688882828, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16720.354688882828, "timesteps_since_restore": 4633600, "iterations_since_restore": 362, "perf": {"cpu_util_percent": 29.87, "ram_util_percent": 58.83999999999998}}
+{"episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 605.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 302.605}, "custom_metrics": {"sparse_reward_mean": 209.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.01, "shaped_reward_min": 146, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.54, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.47, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.27, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.87, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.91, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.88, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.69, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.84, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.91, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.88, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.91, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.88, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 582.0, 579.0, 633.0, 579.0, 582.0, 582.0, 582.0, 627.0, 582.0, 584.0, 633.0, 630.0, 587.0, 546.0, 587.0, 576.0, 630.0, 636.0, 630.0, 630.0, 573.0, 587.0, 636.0, 590.0, 579.0, 582.0, 633.0, 639.0, 576.0, 630.0, 639.0, 636.0, 624.0, 636.0, 579.0, 579.0, 630.0, 633.0, 582.0, 636.0, 590.0, 630.0, 587.0, 636.0, 576.0, 630.0, 630.0, 582.0, 576.0, 630.0, 582.0, 587.0, 633.0, 582.0, 636.0, 630.0, 582.0, 587.0, 630.0, 573.0, 630.0, 630.0, 525.0, 579.0, 627.0, 633.0, 624.0, 587.0, 582.0, 582.0, 579.0, 636.0, 630.0, 567.0, 630.0, 633.0, 636.0, 582.0, 621.0, 630.0, 587.0, 587.0, 639.0, 630.0, 587.0, 587.0, 587.0, 630.0, 636.0, 627.0, 561.0, 627.0, 630.0, 636.0, 582.0, 582.0, 579.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 313.0, 317.0, 291.0, 291.0, 282.0, 297.0, 314.0, 319.0, 285.0, 294.0, 291.0, 291.0, 288.0, 294.0, 291.0, 291.0, 313.0, 314.0, 293.0, 289.0, 285.0, 299.0, 316.0, 317.0, 316.0, 314.0, 293.0, 294.0, 273.0, 273.0, 296.0, 291.0, 287.0, 289.0, 316.0, 314.0, 319.0, 317.0, 318.0, 312.0, 316.0, 314.0, 283.0, 290.0, 299.0, 288.0, 321.0, 315.0, 302.0, 288.0, 286.0, 293.0, 288.0, 294.0, 314.0, 319.0, 316.0, 323.0, 288.0, 288.0, 321.0, 309.0, 319.0, 320.0, 314.0, 322.0, 310.0, 314.0, 316.0, 320.0, 288.0, 291.0, 282.0, 297.0, 319.0, 311.0, 321.0, 312.0, 286.0, 296.0, 314.0, 322.0, 296.0, 294.0, 311.0, 319.0, 298.0, 289.0, 322.0, 314.0, 285.0, 291.0, 313.0, 317.0, 316.0, 314.0, 291.0, 291.0, 298.0, 278.0, 308.0, 322.0, 293.0, 289.0, 293.0, 294.0, 316.0, 317.0, 291.0, 291.0, 319.0, 317.0, 321.0, 309.0, 290.0, 292.0, 296.0, 291.0, 316.0, 314.0, 287.0, 286.0, 313.0, 317.0, 318.0, 312.0, 265.0, 260.0, 287.0, 292.0, 314.0, 313.0, 316.0, 317.0, 306.0, 318.0, 296.0, 291.0, 288.0, 294.0, 287.0, 295.0, 288.0, 291.0, 319.0, 317.0, 316.0, 314.0, 281.0, 286.0, 321.0, 309.0, 316.0, 317.0, 319.0, 317.0, 294.0, 288.0, 307.0, 314.0, 308.0, 322.0, 291.0, 296.0, 285.0, 302.0, 314.0, 325.0, 313.0, 317.0, 295.0, 292.0, 293.0, 294.0, 294.0, 293.0, 311.0, 319.0, 321.0, 315.0, 316.0, 311.0, 284.0, 277.0, 316.0, 311.0, 311.0, 319.0, 320.0, 316.0, 294.0, 288.0, 297.0, 285.0, 282.0, 297.0, 304.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8657202120154665, "mean_processing_ms": 0.2414933019778603, "mean_inference_ms": 1.4613549029197088}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8712000, "num_steps_sampled": 4646400, "sample_time_ms": 22230.157, "load_time_ms": 37.429, "grad_time_ms": 9536.697, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00019748850900214165, "policy_loss": -0.00744326738640666, "vf_loss": 82.0882568359375, "vf_explained_var": 0.7681159377098083, "kl": 0.0019463537028059363, "entropy": 1.1361408233642578, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4646400, "episodes_total": 11616, "training_iteration": 363, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-52-22", "timestamp": 1660258342, "time_this_iter_s": 33.05159020423889, "time_total_s": 16753.406279087067, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16753.406279087067, "timesteps_since_restore": 4646400, "iterations_since_restore": 363, "perf": {"cpu_util_percent": 32.35531914893617, "ram_util_percent": 58.848936170212795}}
+{"episode_reward_max": 639.0, "episode_reward_min": 353.0, "episode_reward_mean": 605.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 302.71}, "custom_metrics": {"sparse_reward_mean": 209.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 186.22, "shaped_reward_min": 113, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.62, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.37, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.68, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.82, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 16, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.85, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 11, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.82, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.82, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 587.0, 582.0, 636.0, 630.0, 636.0, 636.0, 579.0, 630.0, 639.0, 579.0, 636.0, 353.0, 639.0, 587.0, 630.0, 633.0, 630.0, 587.0, 582.0, 636.0, 633.0, 639.0, 636.0, 582.0, 636.0, 639.0, 630.0, 621.0, 582.0, 630.0, 636.0, 525.0, 579.0, 627.0, 633.0, 624.0, 587.0, 582.0, 582.0, 579.0, 636.0, 630.0, 567.0, 630.0, 633.0, 636.0, 582.0, 621.0, 630.0, 587.0, 587.0, 639.0, 630.0, 587.0, 587.0, 587.0, 630.0, 636.0, 627.0, 561.0, 627.0, 630.0, 636.0, 582.0, 582.0, 579.0, 587.0, 582.0, 630.0, 582.0, 579.0, 633.0, 579.0, 582.0, 582.0, 582.0, 627.0, 582.0, 584.0, 633.0, 630.0, 587.0, 546.0, 587.0, 576.0, 630.0, 636.0, 630.0, 630.0, 573.0, 587.0, 636.0, 590.0, 579.0, 582.0, 633.0, 639.0, 576.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [324.0, 309.0, 283.0, 304.0, 293.0, 289.0, 317.0, 319.0, 311.0, 319.0, 322.0, 314.0, 325.0, 311.0, 290.0, 289.0, 308.0, 322.0, 314.0, 325.0, 288.0, 291.0, 322.0, 314.0, 175.0, 178.0, 327.0, 312.0, 290.0, 297.0, 313.0, 317.0, 316.0, 317.0, 311.0, 319.0, 298.0, 289.0, 286.0, 296.0, 316.0, 320.0, 319.0, 314.0, 312.0, 327.0, 313.0, 323.0, 280.0, 302.0, 319.0, 317.0, 309.0, 330.0, 318.0, 312.0, 302.0, 319.0, 294.0, 288.0, 309.0, 321.0, 319.0, 317.0, 265.0, 260.0, 287.0, 292.0, 314.0, 313.0, 316.0, 317.0, 306.0, 318.0, 296.0, 291.0, 288.0, 294.0, 287.0, 295.0, 288.0, 291.0, 319.0, 317.0, 316.0, 314.0, 281.0, 286.0, 321.0, 309.0, 316.0, 317.0, 319.0, 317.0, 294.0, 288.0, 307.0, 314.0, 308.0, 322.0, 291.0, 296.0, 285.0, 302.0, 314.0, 325.0, 313.0, 317.0, 295.0, 292.0, 293.0, 294.0, 294.0, 293.0, 311.0, 319.0, 321.0, 315.0, 316.0, 311.0, 284.0, 277.0, 316.0, 311.0, 311.0, 319.0, 320.0, 316.0, 294.0, 288.0, 297.0, 285.0, 282.0, 297.0, 304.0, 283.0, 296.0, 286.0, 313.0, 317.0, 291.0, 291.0, 282.0, 297.0, 314.0, 319.0, 285.0, 294.0, 291.0, 291.0, 288.0, 294.0, 291.0, 291.0, 313.0, 314.0, 293.0, 289.0, 285.0, 299.0, 316.0, 317.0, 316.0, 314.0, 293.0, 294.0, 273.0, 273.0, 296.0, 291.0, 287.0, 289.0, 316.0, 314.0, 319.0, 317.0, 318.0, 312.0, 316.0, 314.0, 283.0, 290.0, 299.0, 288.0, 321.0, 315.0, 302.0, 288.0, 286.0, 293.0, 288.0, 294.0, 314.0, 319.0, 316.0, 323.0, 288.0, 288.0, 321.0, 309.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8646062243279308, "mean_processing_ms": 0.24127430142106637, "mean_inference_ms": 1.4608685672633468}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8736000, "num_steps_sampled": 4659200, "sample_time_ms": 22504.157, "load_time_ms": 37.127, "grad_time_ms": 9578.876, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00030447044991888106, "policy_loss": -0.007189334835857153, "vf_loss": 80.5628433227539, "vf_explained_var": 0.7757663130760193, "kl": 0.002137100091204047, "entropy": 1.1249442100524902, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4659200, "episodes_total": 11648, "training_iteration": 364, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-52-57", "timestamp": 1660258377, "time_this_iter_s": 35.15313506126404, "time_total_s": 16788.55941414833, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16788.55941414833, "timesteps_since_restore": 4659200, "iterations_since_restore": 364, "perf": {"cpu_util_percent": 30.822448979591837, "ram_util_percent": 58.930612244897965}}
+{"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 600.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 138.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 300.455}, "custom_metrics": {"sparse_reward_mean": 208.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 184.91, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.5, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.13, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.16, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.5, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.54, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.53, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 16, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.82, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 11, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.54, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.54, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 582.0, 579.0, 633.0, 579.0, 582.0, 627.0, 582.0, 636.0, 590.0, 294.0, 584.0, 630.0, 621.0, 576.0, 630.0, 582.0, 582.0, 584.0, 582.0, 627.0, 579.0, 576.0, 633.0, 590.0, 630.0, 630.0, 579.0, 627.0, 579.0, 630.0, 621.0, 582.0, 582.0, 579.0, 587.0, 582.0, 630.0, 582.0, 579.0, 633.0, 579.0, 582.0, 582.0, 582.0, 627.0, 582.0, 584.0, 633.0, 630.0, 587.0, 546.0, 587.0, 576.0, 630.0, 636.0, 630.0, 630.0, 573.0, 587.0, 636.0, 590.0, 579.0, 582.0, 633.0, 639.0, 576.0, 630.0, 633.0, 587.0, 582.0, 636.0, 630.0, 636.0, 636.0, 579.0, 630.0, 639.0, 579.0, 636.0, 353.0, 639.0, 587.0, 630.0, 633.0, 630.0, 587.0, 582.0, 636.0, 633.0, 639.0, 636.0, 582.0, 636.0, 639.0, 630.0, 621.0, 582.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [308.0, 319.0, 291.0, 291.0, 288.0, 291.0, 319.0, 314.0, 285.0, 294.0, 285.0, 297.0, 319.0, 308.0, 292.0, 290.0, 319.0, 317.0, 291.0, 299.0, 138.0, 156.0, 291.0, 293.0, 311.0, 319.0, 313.0, 308.0, 294.0, 282.0, 316.0, 314.0, 291.0, 291.0, 299.0, 283.0, 293.0, 291.0, 298.0, 284.0, 317.0, 310.0, 296.0, 283.0, 286.0, 290.0, 316.0, 317.0, 296.0, 294.0, 314.0, 316.0, 319.0, 311.0, 291.0, 288.0, 313.0, 314.0, 285.0, 294.0, 311.0, 319.0, 304.0, 317.0, 294.0, 288.0, 297.0, 285.0, 282.0, 297.0, 304.0, 283.0, 296.0, 286.0, 313.0, 317.0, 291.0, 291.0, 282.0, 297.0, 314.0, 319.0, 285.0, 294.0, 291.0, 291.0, 288.0, 294.0, 291.0, 291.0, 313.0, 314.0, 293.0, 289.0, 285.0, 299.0, 316.0, 317.0, 316.0, 314.0, 293.0, 294.0, 273.0, 273.0, 296.0, 291.0, 287.0, 289.0, 316.0, 314.0, 319.0, 317.0, 318.0, 312.0, 316.0, 314.0, 283.0, 290.0, 299.0, 288.0, 321.0, 315.0, 302.0, 288.0, 286.0, 293.0, 288.0, 294.0, 314.0, 319.0, 316.0, 323.0, 288.0, 288.0, 321.0, 309.0, 324.0, 309.0, 283.0, 304.0, 293.0, 289.0, 317.0, 319.0, 311.0, 319.0, 322.0, 314.0, 325.0, 311.0, 290.0, 289.0, 308.0, 322.0, 314.0, 325.0, 288.0, 291.0, 322.0, 314.0, 175.0, 178.0, 327.0, 312.0, 290.0, 297.0, 313.0, 317.0, 316.0, 317.0, 311.0, 319.0, 298.0, 289.0, 286.0, 296.0, 316.0, 320.0, 319.0, 314.0, 312.0, 327.0, 313.0, 323.0, 280.0, 302.0, 319.0, 317.0, 309.0, 330.0, 318.0, 312.0, 302.0, 319.0, 294.0, 288.0, 309.0, 321.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8635010378078678, "mean_processing_ms": 0.24105756914616358, "mean_inference_ms": 1.4603867065067317}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8760000, "num_steps_sampled": 4672000, "sample_time_ms": 23125.247, "load_time_ms": 37.852, "grad_time_ms": 9835.319, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003108972916379571, "policy_loss": -0.004532767925411463, "vf_loss": 82.0846176147461, "vf_explained_var": 0.7727766036987305, "kl": 0.00231738924048841, "entropy": 1.1334295272827148, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4672000, "episodes_total": 11680, "training_iteration": 365, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-53-36", "timestamp": 1660258416, "time_this_iter_s": 39.16720676422119, "time_total_s": 16827.726620912552, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16827.726620912552, "timesteps_since_restore": 4672000, "iterations_since_restore": 365, "perf": {"cpu_util_percent": 31.412499999999998, "ram_util_percent": 59.01071428571428}}
+{"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 599.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 138.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 299.95}, "custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 184.3, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.52, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.92, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.17, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.32, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.46, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.29, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 16, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 11, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.4, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.37, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.46, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.46, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 582.0, 582.0, 639.0, 579.0, 630.0, 576.0, 630.0, 402.0, 639.0, 633.0, 627.0, 587.0, 630.0, 465.0, 582.0, 581.0, 576.0, 582.0, 561.0, 633.0, 465.0, 633.0, 633.0, 633.0, 582.0, 627.0, 636.0, 630.0, 582.0, 582.0, 633.0, 639.0, 576.0, 630.0, 633.0, 587.0, 582.0, 636.0, 630.0, 636.0, 636.0, 579.0, 630.0, 639.0, 579.0, 636.0, 353.0, 639.0, 587.0, 630.0, 633.0, 630.0, 587.0, 582.0, 636.0, 633.0, 639.0, 636.0, 582.0, 636.0, 639.0, 630.0, 621.0, 582.0, 630.0, 636.0, 627.0, 582.0, 579.0, 633.0, 579.0, 582.0, 627.0, 582.0, 636.0, 590.0, 294.0, 584.0, 630.0, 621.0, 576.0, 630.0, 582.0, 582.0, 584.0, 582.0, 627.0, 579.0, 576.0, 633.0, 590.0, 630.0, 630.0, 579.0, 627.0, 579.0, 630.0, 621.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 319.0, 317.0, 291.0, 291.0, 291.0, 291.0, 319.0, 320.0, 296.0, 283.0, 316.0, 314.0, 287.0, 289.0, 315.0, 315.0, 198.0, 204.0, 324.0, 315.0, 316.0, 317.0, 311.0, 316.0, 288.0, 299.0, 314.0, 316.0, 231.0, 234.0, 291.0, 291.0, 279.0, 302.0, 296.0, 280.0, 288.0, 294.0, 288.0, 273.0, 321.0, 312.0, 231.0, 234.0, 316.0, 317.0, 313.0, 320.0, 313.0, 320.0, 288.0, 294.0, 308.0, 319.0, 320.0, 316.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0, 314.0, 319.0, 316.0, 323.0, 288.0, 288.0, 321.0, 309.0, 324.0, 309.0, 283.0, 304.0, 293.0, 289.0, 317.0, 319.0, 311.0, 319.0, 322.0, 314.0, 325.0, 311.0, 290.0, 289.0, 308.0, 322.0, 314.0, 325.0, 288.0, 291.0, 322.0, 314.0, 175.0, 178.0, 327.0, 312.0, 290.0, 297.0, 313.0, 317.0, 316.0, 317.0, 311.0, 319.0, 298.0, 289.0, 286.0, 296.0, 316.0, 320.0, 319.0, 314.0, 312.0, 327.0, 313.0, 323.0, 280.0, 302.0, 319.0, 317.0, 309.0, 330.0, 318.0, 312.0, 302.0, 319.0, 294.0, 288.0, 309.0, 321.0, 319.0, 317.0, 308.0, 319.0, 291.0, 291.0, 288.0, 291.0, 319.0, 314.0, 285.0, 294.0, 285.0, 297.0, 319.0, 308.0, 292.0, 290.0, 319.0, 317.0, 291.0, 299.0, 138.0, 156.0, 291.0, 293.0, 311.0, 319.0, 313.0, 308.0, 294.0, 282.0, 316.0, 314.0, 291.0, 291.0, 299.0, 283.0, 293.0, 291.0, 298.0, 284.0, 317.0, 310.0, 296.0, 283.0, 286.0, 290.0, 316.0, 317.0, 296.0, 294.0, 314.0, 316.0, 319.0, 311.0, 291.0, 288.0, 313.0, 314.0, 285.0, 294.0, 311.0, 319.0, 304.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8623994673017462, "mean_processing_ms": 0.24084175116520762, "mean_inference_ms": 1.4598463496186935}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8784000, "num_steps_sampled": 4684800, "sample_time_ms": 23187.668, "load_time_ms": 38.066, "grad_time_ms": 10176.48, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0020015325862914324, "policy_loss": -0.005890776868909597, "vf_loss": 84.59882354736328, "vf_explained_var": 0.7721861004829407, "kl": 0.002045721048489213, "entropy": 1.1351399421691895, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4684800, "episodes_total": 11712, "training_iteration": 366, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-54-11", "timestamp": 1660258451, "time_this_iter_s": 34.82252907752991, "time_total_s": 16862.54914999008, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16862.54914999008, "timesteps_since_restore": 4684800, "iterations_since_restore": 366, "perf": {"cpu_util_percent": 28.122448979591837, "ram_util_percent": 58.88775510204081}}
+{"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 598.83, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 138.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 299.415}, "custom_metrics": {"sparse_reward_mean": 207.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 183.63, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.72, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.37, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.1, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.24, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.36, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.24, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.24, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 633.0, 636.0, 627.0, 630.0, 627.0, 630.0, 627.0, 587.0, 630.0, 633.0, 630.0, 582.0, 582.0, 587.0, 582.0, 627.0, 627.0, 587.0, 576.0, 633.0, 573.0, 636.0, 630.0, 587.0, 582.0, 636.0, 587.0, 573.0, 627.0, 636.0, 587.0, 621.0, 582.0, 630.0, 636.0, 627.0, 582.0, 579.0, 633.0, 579.0, 582.0, 627.0, 582.0, 636.0, 590.0, 294.0, 584.0, 630.0, 621.0, 576.0, 630.0, 582.0, 582.0, 584.0, 582.0, 627.0, 579.0, 576.0, 633.0, 590.0, 630.0, 630.0, 579.0, 627.0, 579.0, 630.0, 621.0, 630.0, 636.0, 582.0, 582.0, 639.0, 579.0, 630.0, 576.0, 630.0, 402.0, 639.0, 633.0, 627.0, 587.0, 630.0, 465.0, 582.0, 581.0, 576.0, 582.0, 561.0, 633.0, 465.0, 633.0, 633.0, 633.0, 582.0, 627.0, 636.0, 630.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 254.0, 314.0, 319.0, 322.0, 314.0, 311.0, 316.0, 312.0, 318.0, 324.0, 303.0, 321.0, 309.0, 313.0, 314.0, 288.0, 299.0, 311.0, 319.0, 310.0, 323.0, 306.0, 324.0, 293.0, 289.0, 288.0, 294.0, 282.0, 305.0, 294.0, 288.0, 309.0, 318.0, 313.0, 314.0, 291.0, 296.0, 295.0, 281.0, 316.0, 317.0, 287.0, 286.0, 318.0, 318.0, 311.0, 319.0, 288.0, 299.0, 294.0, 288.0, 319.0, 317.0, 296.0, 291.0, 277.0, 296.0, 311.0, 316.0, 319.0, 317.0, 288.0, 299.0, 302.0, 319.0, 294.0, 288.0, 309.0, 321.0, 319.0, 317.0, 308.0, 319.0, 291.0, 291.0, 288.0, 291.0, 319.0, 314.0, 285.0, 294.0, 285.0, 297.0, 319.0, 308.0, 292.0, 290.0, 319.0, 317.0, 291.0, 299.0, 138.0, 156.0, 291.0, 293.0, 311.0, 319.0, 313.0, 308.0, 294.0, 282.0, 316.0, 314.0, 291.0, 291.0, 299.0, 283.0, 293.0, 291.0, 298.0, 284.0, 317.0, 310.0, 296.0, 283.0, 286.0, 290.0, 316.0, 317.0, 296.0, 294.0, 314.0, 316.0, 319.0, 311.0, 291.0, 288.0, 313.0, 314.0, 285.0, 294.0, 311.0, 319.0, 304.0, 317.0, 311.0, 319.0, 319.0, 317.0, 291.0, 291.0, 291.0, 291.0, 319.0, 320.0, 296.0, 283.0, 316.0, 314.0, 287.0, 289.0, 315.0, 315.0, 198.0, 204.0, 324.0, 315.0, 316.0, 317.0, 311.0, 316.0, 288.0, 299.0, 314.0, 316.0, 231.0, 234.0, 291.0, 291.0, 279.0, 302.0, 296.0, 280.0, 288.0, 294.0, 288.0, 273.0, 321.0, 312.0, 231.0, 234.0, 316.0, 317.0, 313.0, 320.0, 313.0, 320.0, 288.0, 294.0, 308.0, 319.0, 320.0, 316.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8612908936403926, "mean_processing_ms": 0.24062264484082838, "mean_inference_ms": 1.4590268461349842}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8808000, "num_steps_sampled": 4697600, "sample_time_ms": 23071.295, "load_time_ms": 37.913, "grad_time_ms": 10615.56, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0038227650802582502, "policy_loss": -0.003672233084216714, "vf_loss": 80.5904312133789, "vf_explained_var": 0.7626190185546875, "kl": 0.0024631840642541647, "entropy": 1.128088116645813, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4697600, "episodes_total": 11744, "training_iteration": 367, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-54-44", "timestamp": 1660258484, "time_this_iter_s": 33.39541292190552, "time_total_s": 16895.944562911987, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16895.944562911987, "timesteps_since_restore": 4697600, "iterations_since_restore": 367, "perf": {"cpu_util_percent": 29.602127659574467, "ram_util_percent": 58.99148936170216}}
+{"episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 603.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 301.92}, "custom_metrics": {"sparse_reward_mean": 209.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.44, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.7, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.99, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.28, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.07, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.57, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.07, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.57, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.07, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.57, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 636.0, 639.0, 579.0, 639.0, 636.0, 525.0, 581.0, 639.0, 573.0, 582.0, 587.0, 630.0, 636.0, 639.0, 633.0, 587.0, 582.0, 636.0, 636.0, 582.0, 636.0, 633.0, 627.0, 587.0, 516.0, 630.0, 633.0, 630.0, 639.0, 573.0, 582.0, 627.0, 579.0, 630.0, 621.0, 630.0, 636.0, 582.0, 582.0, 639.0, 579.0, 630.0, 576.0, 630.0, 402.0, 639.0, 633.0, 627.0, 587.0, 630.0, 465.0, 582.0, 581.0, 576.0, 582.0, 561.0, 633.0, 465.0, 633.0, 633.0, 633.0, 582.0, 627.0, 636.0, 630.0, 582.0, 582.0, 519.0, 633.0, 636.0, 627.0, 630.0, 627.0, 630.0, 627.0, 587.0, 630.0, 633.0, 630.0, 582.0, 582.0, 587.0, 582.0, 627.0, 627.0, 587.0, 576.0, 633.0, 573.0, 636.0, 630.0, 587.0, 582.0, 636.0, 587.0, 573.0, 627.0, 636.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [324.0, 309.0, 316.0, 320.0, 319.0, 320.0, 286.0, 293.0, 319.0, 320.0, 322.0, 314.0, 260.0, 265.0, 288.0, 293.0, 322.0, 317.0, 285.0, 288.0, 293.0, 289.0, 282.0, 305.0, 306.0, 324.0, 314.0, 322.0, 322.0, 317.0, 323.0, 310.0, 293.0, 294.0, 290.0, 292.0, 319.0, 317.0, 319.0, 317.0, 288.0, 294.0, 314.0, 322.0, 316.0, 317.0, 313.0, 314.0, 290.0, 297.0, 259.0, 257.0, 316.0, 314.0, 307.0, 326.0, 321.0, 309.0, 321.0, 318.0, 277.0, 296.0, 285.0, 297.0, 313.0, 314.0, 285.0, 294.0, 311.0, 319.0, 304.0, 317.0, 311.0, 319.0, 319.0, 317.0, 291.0, 291.0, 291.0, 291.0, 319.0, 320.0, 296.0, 283.0, 316.0, 314.0, 287.0, 289.0, 315.0, 315.0, 198.0, 204.0, 324.0, 315.0, 316.0, 317.0, 311.0, 316.0, 288.0, 299.0, 314.0, 316.0, 231.0, 234.0, 291.0, 291.0, 279.0, 302.0, 296.0, 280.0, 288.0, 294.0, 288.0, 273.0, 321.0, 312.0, 231.0, 234.0, 316.0, 317.0, 313.0, 320.0, 313.0, 320.0, 288.0, 294.0, 308.0, 319.0, 320.0, 316.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0, 265.0, 254.0, 314.0, 319.0, 322.0, 314.0, 311.0, 316.0, 312.0, 318.0, 324.0, 303.0, 321.0, 309.0, 313.0, 314.0, 288.0, 299.0, 311.0, 319.0, 310.0, 323.0, 306.0, 324.0, 293.0, 289.0, 288.0, 294.0, 282.0, 305.0, 294.0, 288.0, 309.0, 318.0, 313.0, 314.0, 291.0, 296.0, 295.0, 281.0, 316.0, 317.0, 287.0, 286.0, 318.0, 318.0, 311.0, 319.0, 288.0, 299.0, 294.0, 288.0, 319.0, 317.0, 296.0, 291.0, 277.0, 296.0, 311.0, 316.0, 319.0, 317.0, 288.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8601760204307758, "mean_processing_ms": 0.2404010276903208, "mean_inference_ms": 1.4579011173262801}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8832000, "num_steps_sampled": 4710400, "sample_time_ms": 23247.48, "load_time_ms": 37.926, "grad_time_ms": 10791.679, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004444511607289314, "policy_loss": -0.0034118040930479765, "vf_loss": 84.17324829101562, "vf_explained_var": 0.7645478844642639, "kl": 0.0020590554922819138, "entropy": 1.1220086812973022, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4710400, "episodes_total": 11776, "training_iteration": 368, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-55-17", "timestamp": 1660258517, "time_this_iter_s": 32.34189581871033, "time_total_s": 16928.286458730698, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16928.286458730698, "timesteps_since_restore": 4710400, "iterations_since_restore": 368, "perf": {"cpu_util_percent": 31.186956521739134, "ram_util_percent": 58.830434782608684}}
+{"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 607.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 303.555}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.71, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.11, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.33, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.13, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.13, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.13, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 579.0, 584.0, 636.0, 627.0, 624.0, 636.0, 587.0, 582.0, 636.0, 581.0, 587.0, 630.0, 636.0, 633.0, 630.0, 630.0, 636.0, 630.0, 584.0, 516.0, 587.0, 630.0, 513.0, 587.0, 630.0, 633.0, 627.0, 633.0, 582.0, 519.0, 627.0, 636.0, 630.0, 582.0, 582.0, 519.0, 633.0, 636.0, 627.0, 630.0, 627.0, 630.0, 627.0, 587.0, 630.0, 633.0, 630.0, 582.0, 582.0, 587.0, 582.0, 627.0, 627.0, 587.0, 576.0, 633.0, 573.0, 636.0, 630.0, 587.0, 582.0, 636.0, 587.0, 573.0, 627.0, 636.0, 587.0, 633.0, 636.0, 639.0, 579.0, 639.0, 636.0, 525.0, 581.0, 639.0, 573.0, 582.0, 587.0, 630.0, 636.0, 639.0, 633.0, 587.0, 582.0, 636.0, 636.0, 582.0, 636.0, 633.0, 627.0, 587.0, 516.0, 630.0, 633.0, 630.0, 639.0, 573.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [298.0, 289.0, 285.0, 294.0, 288.0, 296.0, 316.0, 320.0, 316.0, 311.0, 308.0, 316.0, 316.0, 320.0, 294.0, 293.0, 301.0, 281.0, 314.0, 322.0, 297.0, 284.0, 299.0, 288.0, 313.0, 317.0, 314.0, 322.0, 316.0, 317.0, 324.0, 306.0, 318.0, 312.0, 319.0, 317.0, 319.0, 311.0, 282.0, 302.0, 256.0, 260.0, 288.0, 299.0, 316.0, 314.0, 255.0, 258.0, 293.0, 294.0, 316.0, 314.0, 318.0, 315.0, 311.0, 316.0, 321.0, 312.0, 288.0, 294.0, 268.0, 251.0, 311.0, 316.0, 320.0, 316.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0, 265.0, 254.0, 314.0, 319.0, 322.0, 314.0, 311.0, 316.0, 312.0, 318.0, 324.0, 303.0, 321.0, 309.0, 313.0, 314.0, 288.0, 299.0, 311.0, 319.0, 310.0, 323.0, 306.0, 324.0, 293.0, 289.0, 288.0, 294.0, 282.0, 305.0, 294.0, 288.0, 309.0, 318.0, 313.0, 314.0, 291.0, 296.0, 295.0, 281.0, 316.0, 317.0, 287.0, 286.0, 318.0, 318.0, 311.0, 319.0, 288.0, 299.0, 294.0, 288.0, 319.0, 317.0, 296.0, 291.0, 277.0, 296.0, 311.0, 316.0, 319.0, 317.0, 288.0, 299.0, 324.0, 309.0, 316.0, 320.0, 319.0, 320.0, 286.0, 293.0, 319.0, 320.0, 322.0, 314.0, 260.0, 265.0, 288.0, 293.0, 322.0, 317.0, 285.0, 288.0, 293.0, 289.0, 282.0, 305.0, 306.0, 324.0, 314.0, 322.0, 322.0, 317.0, 323.0, 310.0, 293.0, 294.0, 290.0, 292.0, 319.0, 317.0, 319.0, 317.0, 288.0, 294.0, 314.0, 322.0, 316.0, 317.0, 313.0, 314.0, 290.0, 297.0, 259.0, 257.0, 316.0, 314.0, 307.0, 326.0, 321.0, 309.0, 321.0, 318.0, 277.0, 296.0, 285.0, 297.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8590666727700321, "mean_processing_ms": 0.24018037596280067, "mean_inference_ms": 1.4567772377738835}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8856000, "num_steps_sampled": 4723200, "sample_time_ms": 23393.671, "load_time_ms": 38.324, "grad_time_ms": 11132.048, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011790187563747168, "policy_loss": -0.008629883639514446, "vf_loss": 80.15734100341797, "vf_explained_var": 0.7653172016143799, "kl": 0.001749455346725881, "entropy": 1.129709243774414, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4723200, "episodes_total": 11808, "training_iteration": 369, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-55-52", "timestamp": 1660258552, "time_this_iter_s": 34.8981990814209, "time_total_s": 16963.18465781212, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16963.18465781212, "timesteps_since_restore": 4723200, "iterations_since_restore": 369, "perf": {"cpu_util_percent": 32.48979591836735, "ram_util_percent": 58.86734693877551}}
+{"episode_reward_max": 639.0, "episode_reward_min": 453.0, "episode_reward_mean": 604.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 302.025}, "custom_metrics": {"sparse_reward_mean": 209.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.65, "shaped_reward_min": 133, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.52, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.13, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.16, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 636.0, 630.0, 584.0, 636.0, 465.0, 627.0, 633.0, 624.0, 633.0, 465.0, 582.0, 587.0, 630.0, 627.0, 639.0, 630.0, 633.0, 453.0, 639.0, 630.0, 630.0, 627.0, 636.0, 579.0, 587.0, 636.0, 510.0, 587.0, 624.0, 630.0, 539.0, 573.0, 627.0, 636.0, 587.0, 633.0, 636.0, 639.0, 579.0, 639.0, 636.0, 525.0, 581.0, 639.0, 573.0, 582.0, 587.0, 630.0, 636.0, 639.0, 633.0, 587.0, 582.0, 636.0, 636.0, 582.0, 636.0, 633.0, 627.0, 587.0, 516.0, 630.0, 633.0, 630.0, 639.0, 573.0, 582.0, 587.0, 579.0, 584.0, 636.0, 627.0, 624.0, 636.0, 587.0, 582.0, 636.0, 581.0, 587.0, 630.0, 636.0, 633.0, 630.0, 630.0, 636.0, 630.0, 584.0, 516.0, 587.0, 630.0, 513.0, 587.0, 630.0, 633.0, 627.0, 633.0, 582.0, 519.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 322.0, 314.0, 316.0, 314.0, 298.0, 286.0, 313.0, 323.0, 222.0, 243.0, 318.0, 309.0, 321.0, 312.0, 310.0, 314.0, 319.0, 314.0, 220.0, 245.0, 297.0, 285.0, 294.0, 293.0, 318.0, 312.0, 311.0, 316.0, 317.0, 322.0, 316.0, 314.0, 319.0, 314.0, 222.0, 231.0, 319.0, 320.0, 322.0, 308.0, 322.0, 308.0, 311.0, 316.0, 319.0, 317.0, 294.0, 285.0, 283.0, 304.0, 324.0, 312.0, 259.0, 251.0, 301.0, 286.0, 305.0, 319.0, 313.0, 317.0, 271.0, 268.0, 277.0, 296.0, 311.0, 316.0, 319.0, 317.0, 288.0, 299.0, 324.0, 309.0, 316.0, 320.0, 319.0, 320.0, 286.0, 293.0, 319.0, 320.0, 322.0, 314.0, 260.0, 265.0, 288.0, 293.0, 322.0, 317.0, 285.0, 288.0, 293.0, 289.0, 282.0, 305.0, 306.0, 324.0, 314.0, 322.0, 322.0, 317.0, 323.0, 310.0, 293.0, 294.0, 290.0, 292.0, 319.0, 317.0, 319.0, 317.0, 288.0, 294.0, 314.0, 322.0, 316.0, 317.0, 313.0, 314.0, 290.0, 297.0, 259.0, 257.0, 316.0, 314.0, 307.0, 326.0, 321.0, 309.0, 321.0, 318.0, 277.0, 296.0, 285.0, 297.0, 298.0, 289.0, 285.0, 294.0, 288.0, 296.0, 316.0, 320.0, 316.0, 311.0, 308.0, 316.0, 316.0, 320.0, 294.0, 293.0, 301.0, 281.0, 314.0, 322.0, 297.0, 284.0, 299.0, 288.0, 313.0, 317.0, 314.0, 322.0, 316.0, 317.0, 324.0, 306.0, 318.0, 312.0, 319.0, 317.0, 319.0, 311.0, 282.0, 302.0, 256.0, 260.0, 288.0, 299.0, 316.0, 314.0, 255.0, 258.0, 293.0, 294.0, 316.0, 314.0, 318.0, 315.0, 311.0, 316.0, 321.0, 312.0, 288.0, 294.0, 268.0, 251.0, 311.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8579708589489354, "mean_processing_ms": 0.23996389125057788, "mean_inference_ms": 1.455828460743175}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8880000, "num_steps_sampled": 4736000, "sample_time_ms": 23349.429, "load_time_ms": 38.506, "grad_time_ms": 11333.701, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00012880750000476837, "policy_loss": -0.007648926693946123, "vf_loss": 83.42855072021484, "vf_explained_var": 0.7715353965759277, "kl": 0.0017624356551095843, "entropy": 1.1302567720413208, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4736000, "episodes_total": 11840, "training_iteration": 370, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-56-26", "timestamp": 1660258586, "time_this_iter_s": 33.842254877090454, "time_total_s": 16997.02691268921, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16997.02691268921, "timesteps_since_restore": 4736000, "iterations_since_restore": 370, "perf": {"cpu_util_percent": 32.75416666666667, "ram_util_percent": 58.89374999999999}}
+{"episode_reward_max": 639.0, "episode_reward_min": 453.0, "episode_reward_mean": 602.85, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 301.425}, "custom_metrics": {"sparse_reward_mean": 209.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.85, "shaped_reward_min": 133, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.57, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.99, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.99, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.5, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.99, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.5, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.99, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.5, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 579.0, 633.0, 630.0, 582.0, 582.0, 630.0, 627.0, 633.0, 630.0, 627.0, 579.0, 633.0, 587.0, 579.0, 582.0, 576.0, 579.0, 627.0, 627.0, 636.0, 639.0, 630.0, 630.0, 582.0, 618.0, 630.0, 587.0, 473.0, 587.0, 624.0, 587.0, 630.0, 639.0, 573.0, 582.0, 587.0, 579.0, 584.0, 636.0, 627.0, 624.0, 636.0, 587.0, 582.0, 636.0, 581.0, 587.0, 630.0, 636.0, 633.0, 630.0, 630.0, 636.0, 630.0, 584.0, 516.0, 587.0, 630.0, 513.0, 587.0, 630.0, 633.0, 627.0, 633.0, 582.0, 519.0, 627.0, 579.0, 636.0, 630.0, 584.0, 636.0, 465.0, 627.0, 633.0, 624.0, 633.0, 465.0, 582.0, 587.0, 630.0, 627.0, 639.0, 630.0, 633.0, 453.0, 639.0, 630.0, 630.0, 627.0, 636.0, 579.0, 587.0, 636.0, 510.0, 587.0, 624.0, 630.0, 539.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 288.0, 291.0, 317.0, 316.0, 311.0, 319.0, 289.0, 293.0, 290.0, 292.0, 314.0, 316.0, 319.0, 308.0, 311.0, 322.0, 314.0, 316.0, 310.0, 317.0, 302.0, 277.0, 319.0, 314.0, 299.0, 288.0, 293.0, 286.0, 283.0, 299.0, 297.0, 279.0, 291.0, 288.0, 319.0, 308.0, 314.0, 313.0, 313.0, 323.0, 319.0, 320.0, 310.0, 320.0, 319.0, 311.0, 286.0, 296.0, 314.0, 304.0, 311.0, 319.0, 293.0, 294.0, 231.0, 242.0, 291.0, 296.0, 308.0, 316.0, 293.0, 294.0, 321.0, 309.0, 321.0, 318.0, 277.0, 296.0, 285.0, 297.0, 298.0, 289.0, 285.0, 294.0, 288.0, 296.0, 316.0, 320.0, 316.0, 311.0, 308.0, 316.0, 316.0, 320.0, 294.0, 293.0, 301.0, 281.0, 314.0, 322.0, 297.0, 284.0, 299.0, 288.0, 313.0, 317.0, 314.0, 322.0, 316.0, 317.0, 324.0, 306.0, 318.0, 312.0, 319.0, 317.0, 319.0, 311.0, 282.0, 302.0, 256.0, 260.0, 288.0, 299.0, 316.0, 314.0, 255.0, 258.0, 293.0, 294.0, 316.0, 314.0, 318.0, 315.0, 311.0, 316.0, 321.0, 312.0, 288.0, 294.0, 268.0, 251.0, 311.0, 316.0, 283.0, 296.0, 322.0, 314.0, 316.0, 314.0, 298.0, 286.0, 313.0, 323.0, 222.0, 243.0, 318.0, 309.0, 321.0, 312.0, 310.0, 314.0, 319.0, 314.0, 220.0, 245.0, 297.0, 285.0, 294.0, 293.0, 318.0, 312.0, 311.0, 316.0, 317.0, 322.0, 316.0, 314.0, 319.0, 314.0, 222.0, 231.0, 319.0, 320.0, 322.0, 308.0, 322.0, 308.0, 311.0, 316.0, 319.0, 317.0, 294.0, 285.0, 283.0, 304.0, 324.0, 312.0, 259.0, 251.0, 301.0, 286.0, 305.0, 319.0, 313.0, 317.0, 271.0, 268.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8568823921280486, "mean_processing_ms": 0.23974756126791116, "mean_inference_ms": 1.4548800404150943}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8904000, "num_steps_sampled": 4748800, "sample_time_ms": 22963.635, "load_time_ms": 38.114, "grad_time_ms": 11201.711, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006220227223820984, "policy_loss": -0.007045889273285866, "vf_loss": 82.31112670898438, "vf_explained_var": 0.7558401226997375, "kl": 0.002209648722782731, "entropy": 1.1263946294784546, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4748800, "episodes_total": 11872, "training_iteration": 371, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-56-56", "timestamp": 1660258616, "time_this_iter_s": 30.35482382774353, "time_total_s": 17027.381736516953, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17027.381736516953, "timesteps_since_restore": 4748800, "iterations_since_restore": 371, "perf": {"cpu_util_percent": 31.702325581395346, "ram_util_percent": 58.86744186046512}}
+{"episode_reward_max": 639.0, "episode_reward_min": 453.0, "episode_reward_mean": 601.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 300.635}, "custom_metrics": {"sparse_reward_mean": 208.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.47, "shaped_reward_min": 133, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.51, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.03, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.21, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.04, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.47, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.46, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.62, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.04, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.47, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.04, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.47, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 633.0, 579.0, 633.0, 584.0, 633.0, 636.0, 561.0, 573.0, 570.0, 582.0, 627.0, 633.0, 587.0, 636.0, 579.0, 630.0, 627.0, 525.0, 576.0, 636.0, 636.0, 587.0, 587.0, 633.0, 633.0, 636.0, 630.0, 519.0, 587.0, 587.0, 633.0, 582.0, 519.0, 627.0, 579.0, 636.0, 630.0, 584.0, 636.0, 465.0, 627.0, 633.0, 624.0, 633.0, 465.0, 582.0, 587.0, 630.0, 627.0, 639.0, 630.0, 633.0, 453.0, 639.0, 630.0, 630.0, 627.0, 636.0, 579.0, 587.0, 636.0, 510.0, 587.0, 624.0, 630.0, 539.0, 630.0, 579.0, 633.0, 630.0, 582.0, 582.0, 630.0, 627.0, 633.0, 630.0, 627.0, 579.0, 633.0, 587.0, 579.0, 582.0, 576.0, 579.0, 627.0, 627.0, 636.0, 639.0, 630.0, 630.0, 582.0, 618.0, 630.0, 587.0, 473.0, 587.0, 624.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 298.0, 289.0, 311.0, 322.0, 296.0, 283.0, 319.0, 314.0, 295.0, 289.0, 317.0, 316.0, 314.0, 322.0, 281.0, 280.0, 285.0, 288.0, 287.0, 283.0, 290.0, 292.0, 313.0, 314.0, 314.0, 319.0, 298.0, 289.0, 319.0, 317.0, 286.0, 293.0, 311.0, 319.0, 320.0, 307.0, 262.0, 263.0, 285.0, 291.0, 320.0, 316.0, 319.0, 317.0, 288.0, 299.0, 301.0, 286.0, 322.0, 311.0, 317.0, 316.0, 319.0, 317.0, 319.0, 311.0, 264.0, 255.0, 293.0, 294.0, 293.0, 294.0, 321.0, 312.0, 288.0, 294.0, 268.0, 251.0, 311.0, 316.0, 283.0, 296.0, 322.0, 314.0, 316.0, 314.0, 298.0, 286.0, 313.0, 323.0, 222.0, 243.0, 318.0, 309.0, 321.0, 312.0, 310.0, 314.0, 319.0, 314.0, 220.0, 245.0, 297.0, 285.0, 294.0, 293.0, 318.0, 312.0, 311.0, 316.0, 317.0, 322.0, 316.0, 314.0, 319.0, 314.0, 222.0, 231.0, 319.0, 320.0, 322.0, 308.0, 322.0, 308.0, 311.0, 316.0, 319.0, 317.0, 294.0, 285.0, 283.0, 304.0, 324.0, 312.0, 259.0, 251.0, 301.0, 286.0, 305.0, 319.0, 313.0, 317.0, 271.0, 268.0, 313.0, 317.0, 288.0, 291.0, 317.0, 316.0, 311.0, 319.0, 289.0, 293.0, 290.0, 292.0, 314.0, 316.0, 319.0, 308.0, 311.0, 322.0, 314.0, 316.0, 310.0, 317.0, 302.0, 277.0, 319.0, 314.0, 299.0, 288.0, 293.0, 286.0, 283.0, 299.0, 297.0, 279.0, 291.0, 288.0, 319.0, 308.0, 314.0, 313.0, 313.0, 323.0, 319.0, 320.0, 310.0, 320.0, 319.0, 311.0, 286.0, 296.0, 314.0, 304.0, 311.0, 319.0, 293.0, 294.0, 231.0, 242.0, 291.0, 296.0, 308.0, 316.0, 293.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8557975282130863, "mean_processing_ms": 0.23953222980731334, "mean_inference_ms": 1.453823461548284}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8928000, "num_steps_sampled": 4761600, "sample_time_ms": 22359.762, "load_time_ms": 38.161, "grad_time_ms": 11242.342, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00044215377420186996, "policy_loss": -0.007962713949382305, "vf_loss": 80.8259506225586, "vf_explained_var": 0.7670674920082092, "kl": 0.0015741548268124461, "entropy": 1.1240602731704712, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4761600, "episodes_total": 11904, "training_iteration": 372, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-57-26", "timestamp": 1660258646, "time_this_iter_s": 29.648212909698486, "time_total_s": 17057.02994942665, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17057.02994942665, "timesteps_since_restore": 4761600, "iterations_since_restore": 372, "perf": {"cpu_util_percent": 34.21904761904762, "ram_util_percent": 58.84523809523809}}
+{"episode_reward_max": 639.0, "episode_reward_min": 390.0, "episode_reward_mean": 602.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 301.07}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 184.94, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.54, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.13, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.2, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.01, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.54, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.46, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.01, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.54, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.01, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.54, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 630.0, 582.0, 630.0, 630.0, 636.0, 633.0, 584.0, 630.0, 633.0, 408.0, 582.0, 630.0, 584.0, 630.0, 639.0, 630.0, 561.0, 630.0, 633.0, 587.0, 633.0, 630.0, 587.0, 587.0, 636.0, 636.0, 633.0, 390.0, 636.0, 579.0, 579.0, 587.0, 624.0, 630.0, 539.0, 630.0, 579.0, 633.0, 630.0, 582.0, 582.0, 630.0, 627.0, 633.0, 630.0, 627.0, 579.0, 633.0, 587.0, 579.0, 582.0, 576.0, 579.0, 627.0, 627.0, 636.0, 639.0, 630.0, 630.0, 582.0, 618.0, 630.0, 587.0, 473.0, 587.0, 624.0, 587.0, 582.0, 587.0, 633.0, 579.0, 633.0, 584.0, 633.0, 636.0, 561.0, 573.0, 570.0, 582.0, 627.0, 633.0, 587.0, 636.0, 579.0, 630.0, 627.0, 525.0, 576.0, 636.0, 636.0, 587.0, 587.0, 633.0, 633.0, 636.0, 630.0, 519.0, 587.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 291.0, 316.0, 314.0, 280.0, 302.0, 319.0, 311.0, 319.0, 311.0, 314.0, 322.0, 316.0, 317.0, 293.0, 291.0, 318.0, 312.0, 319.0, 314.0, 205.0, 203.0, 285.0, 297.0, 308.0, 322.0, 292.0, 292.0, 306.0, 324.0, 319.0, 320.0, 326.0, 304.0, 285.0, 276.0, 311.0, 319.0, 313.0, 320.0, 301.0, 286.0, 311.0, 322.0, 313.0, 317.0, 293.0, 294.0, 301.0, 286.0, 316.0, 320.0, 319.0, 317.0, 316.0, 317.0, 193.0, 197.0, 322.0, 314.0, 288.0, 291.0, 281.0, 298.0, 301.0, 286.0, 305.0, 319.0, 313.0, 317.0, 271.0, 268.0, 313.0, 317.0, 288.0, 291.0, 317.0, 316.0, 311.0, 319.0, 289.0, 293.0, 290.0, 292.0, 314.0, 316.0, 319.0, 308.0, 311.0, 322.0, 314.0, 316.0, 310.0, 317.0, 302.0, 277.0, 319.0, 314.0, 299.0, 288.0, 293.0, 286.0, 283.0, 299.0, 297.0, 279.0, 291.0, 288.0, 319.0, 308.0, 314.0, 313.0, 313.0, 323.0, 319.0, 320.0, 310.0, 320.0, 319.0, 311.0, 286.0, 296.0, 314.0, 304.0, 311.0, 319.0, 293.0, 294.0, 231.0, 242.0, 291.0, 296.0, 308.0, 316.0, 293.0, 294.0, 290.0, 292.0, 298.0, 289.0, 311.0, 322.0, 296.0, 283.0, 319.0, 314.0, 295.0, 289.0, 317.0, 316.0, 314.0, 322.0, 281.0, 280.0, 285.0, 288.0, 287.0, 283.0, 290.0, 292.0, 313.0, 314.0, 314.0, 319.0, 298.0, 289.0, 319.0, 317.0, 286.0, 293.0, 311.0, 319.0, 320.0, 307.0, 262.0, 263.0, 285.0, 291.0, 320.0, 316.0, 319.0, 317.0, 288.0, 299.0, 301.0, 286.0, 322.0, 311.0, 317.0, 316.0, 319.0, 317.0, 319.0, 311.0, 264.0, 255.0, 293.0, 294.0, 293.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8547135741769734, "mean_processing_ms": 0.23931513380752317, "mean_inference_ms": 1.4526421752907723}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8952000, "num_steps_sampled": 4774400, "sample_time_ms": 22196.797, "load_time_ms": 37.846, "grad_time_ms": 11312.767, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003103644819930196, "policy_loss": -0.004869487602263689, "vf_loss": 85.35115814208984, "vf_explained_var": 0.7750833630561829, "kl": 0.0021017238032072783, "entropy": 1.123950481414795, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4774400, "episodes_total": 11936, "training_iteration": 373, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-57-58", "timestamp": 1660258678, "time_this_iter_s": 32.121092796325684, "time_total_s": 17089.151042222977, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17089.151042222977, "timesteps_since_restore": 4774400, "iterations_since_restore": 373, "perf": {"cpu_util_percent": 33.684444444444445, "ram_util_percent": 58.78888888888888}}
+{"episode_reward_max": 639.0, "episode_reward_min": 390.0, "episode_reward_mean": 602.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 301.475}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.75, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.57, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.3, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.21, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.66, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.43, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.59, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.7, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 636.0, 636.0, 576.0, 636.0, 639.0, 590.0, 633.0, 636.0, 522.0, 633.0, 578.0, 544.0, 636.0, 630.0, 636.0, 636.0, 627.0, 621.0, 636.0, 582.0, 582.0, 636.0, 587.0, 587.0, 636.0, 636.0, 582.0, 630.0, 636.0, 627.0, 581.0, 473.0, 587.0, 624.0, 587.0, 582.0, 587.0, 633.0, 579.0, 633.0, 584.0, 633.0, 636.0, 561.0, 573.0, 570.0, 582.0, 627.0, 633.0, 587.0, 636.0, 579.0, 630.0, 627.0, 525.0, 576.0, 636.0, 636.0, 587.0, 587.0, 633.0, 633.0, 636.0, 630.0, 519.0, 587.0, 587.0, 587.0, 630.0, 582.0, 630.0, 630.0, 636.0, 633.0, 584.0, 630.0, 633.0, 408.0, 582.0, 630.0, 584.0, 630.0, 639.0, 630.0, 561.0, 630.0, 633.0, 587.0, 633.0, 630.0, 587.0, 587.0, 636.0, 636.0, 633.0, 390.0, 636.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 319.0, 317.0, 320.0, 316.0, 288.0, 288.0, 329.0, 307.0, 324.0, 315.0, 296.0, 294.0, 316.0, 317.0, 319.0, 317.0, 268.0, 254.0, 315.0, 318.0, 285.0, 293.0, 270.0, 274.0, 320.0, 316.0, 315.0, 315.0, 322.0, 314.0, 327.0, 309.0, 314.0, 313.0, 313.0, 308.0, 317.0, 319.0, 296.0, 286.0, 293.0, 289.0, 311.0, 325.0, 291.0, 296.0, 295.0, 292.0, 324.0, 312.0, 319.0, 317.0, 294.0, 288.0, 319.0, 311.0, 314.0, 322.0, 314.0, 313.0, 282.0, 299.0, 231.0, 242.0, 291.0, 296.0, 308.0, 316.0, 293.0, 294.0, 290.0, 292.0, 298.0, 289.0, 311.0, 322.0, 296.0, 283.0, 319.0, 314.0, 295.0, 289.0, 317.0, 316.0, 314.0, 322.0, 281.0, 280.0, 285.0, 288.0, 287.0, 283.0, 290.0, 292.0, 313.0, 314.0, 314.0, 319.0, 298.0, 289.0, 319.0, 317.0, 286.0, 293.0, 311.0, 319.0, 320.0, 307.0, 262.0, 263.0, 285.0, 291.0, 320.0, 316.0, 319.0, 317.0, 288.0, 299.0, 301.0, 286.0, 322.0, 311.0, 317.0, 316.0, 319.0, 317.0, 319.0, 311.0, 264.0, 255.0, 293.0, 294.0, 293.0, 294.0, 296.0, 291.0, 316.0, 314.0, 280.0, 302.0, 319.0, 311.0, 319.0, 311.0, 314.0, 322.0, 316.0, 317.0, 293.0, 291.0, 318.0, 312.0, 319.0, 314.0, 205.0, 203.0, 285.0, 297.0, 308.0, 322.0, 292.0, 292.0, 306.0, 324.0, 319.0, 320.0, 326.0, 304.0, 285.0, 276.0, 311.0, 319.0, 313.0, 320.0, 301.0, 286.0, 311.0, 322.0, 313.0, 317.0, 293.0, 294.0, 301.0, 286.0, 316.0, 320.0, 319.0, 317.0, 316.0, 317.0, 193.0, 197.0, 322.0, 314.0, 288.0, 291.0, 281.0, 298.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8536367311955897, "mean_processing_ms": 0.23909908370976882, "mean_inference_ms": 1.4514727184055203}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8976000, "num_steps_sampled": 4787200, "sample_time_ms": 21894.527, "load_time_ms": 38.299, "grad_time_ms": 11412.728, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0030573883559554815, "policy_loss": -0.004609658382833004, "vf_loss": 82.26570892333984, "vf_explained_var": 0.7665780186653137, "kl": 0.002119669923558831, "entropy": 1.119057059288025, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4787200, "episodes_total": 11968, "training_iteration": 374, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-58-31", "timestamp": 1660258711, "time_this_iter_s": 33.1311149597168, "time_total_s": 17122.282157182693, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17122.282157182693, "timesteps_since_restore": 4787200, "iterations_since_restore": 374, "perf": {"cpu_util_percent": 34.03404255319149, "ram_util_percent": 59.29999999999999}}
+{"episode_reward_max": 639.0, "episode_reward_min": 390.0, "episode_reward_mean": 608.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 304.09}, "custom_metrics": {"sparse_reward_mean": 210.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 186.98, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.45, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.7, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.44, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 630.0, 636.0, 630.0, 636.0, 564.0, 633.0, 633.0, 627.0, 630.0, 587.0, 627.0, 621.0, 576.0, 582.0, 630.0, 582.0, 639.0, 636.0, 636.0, 582.0, 630.0, 633.0, 575.0, 630.0, 587.0, 582.0, 639.0, 639.0, 587.0, 630.0, 630.0, 519.0, 587.0, 587.0, 587.0, 630.0, 582.0, 630.0, 630.0, 636.0, 633.0, 584.0, 630.0, 633.0, 408.0, 582.0, 630.0, 584.0, 630.0, 639.0, 630.0, 561.0, 630.0, 633.0, 587.0, 633.0, 630.0, 587.0, 587.0, 636.0, 636.0, 633.0, 390.0, 636.0, 579.0, 579.0, 582.0, 636.0, 636.0, 576.0, 636.0, 639.0, 590.0, 633.0, 636.0, 522.0, 633.0, 578.0, 544.0, 636.0, 630.0, 636.0, 636.0, 627.0, 621.0, 636.0, 582.0, 582.0, 636.0, 587.0, 587.0, 636.0, 636.0, 582.0, 630.0, 636.0, 627.0, 581.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 314.0, 316.0, 316.0, 314.0, 316.0, 320.0, 313.0, 317.0, 314.0, 322.0, 275.0, 289.0, 313.0, 320.0, 321.0, 312.0, 308.0, 319.0, 316.0, 314.0, 294.0, 293.0, 314.0, 313.0, 302.0, 319.0, 285.0, 291.0, 293.0, 289.0, 311.0, 319.0, 288.0, 294.0, 317.0, 322.0, 319.0, 317.0, 314.0, 322.0, 288.0, 294.0, 316.0, 314.0, 319.0, 314.0, 287.0, 288.0, 314.0, 316.0, 288.0, 299.0, 288.0, 294.0, 317.0, 322.0, 317.0, 322.0, 296.0, 291.0, 311.0, 319.0, 319.0, 311.0, 264.0, 255.0, 293.0, 294.0, 293.0, 294.0, 296.0, 291.0, 316.0, 314.0, 280.0, 302.0, 319.0, 311.0, 319.0, 311.0, 314.0, 322.0, 316.0, 317.0, 293.0, 291.0, 318.0, 312.0, 319.0, 314.0, 205.0, 203.0, 285.0, 297.0, 308.0, 322.0, 292.0, 292.0, 306.0, 324.0, 319.0, 320.0, 326.0, 304.0, 285.0, 276.0, 311.0, 319.0, 313.0, 320.0, 301.0, 286.0, 311.0, 322.0, 313.0, 317.0, 293.0, 294.0, 301.0, 286.0, 316.0, 320.0, 319.0, 317.0, 316.0, 317.0, 193.0, 197.0, 322.0, 314.0, 288.0, 291.0, 281.0, 298.0, 296.0, 286.0, 319.0, 317.0, 320.0, 316.0, 288.0, 288.0, 329.0, 307.0, 324.0, 315.0, 296.0, 294.0, 316.0, 317.0, 319.0, 317.0, 268.0, 254.0, 315.0, 318.0, 285.0, 293.0, 270.0, 274.0, 320.0, 316.0, 315.0, 315.0, 322.0, 314.0, 327.0, 309.0, 314.0, 313.0, 313.0, 308.0, 317.0, 319.0, 296.0, 286.0, 293.0, 289.0, 311.0, 325.0, 291.0, 296.0, 295.0, 292.0, 324.0, 312.0, 319.0, 317.0, 294.0, 288.0, 319.0, 311.0, 314.0, 322.0, 314.0, 313.0, 282.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8525659073078762, "mean_processing_ms": 0.23888337042997632, "mean_inference_ms": 1.4503574621583197}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9000000, "num_steps_sampled": 4800000, "sample_time_ms": 21378.834, "load_time_ms": 37.98, "grad_time_ms": 11184.622, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0034743063151836395, "policy_loss": -0.004098345525562763, "vf_loss": 81.30432891845703, "vf_explained_var": 0.7632368206977844, "kl": 0.0018746949499472976, "entropy": 1.1155738830566406, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4800000, "episodes_total": 12000, "training_iteration": 375, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-59-03", "timestamp": 1660258743, "time_this_iter_s": 31.725862979888916, "time_total_s": 17154.008020162582, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17154.008020162582, "timesteps_since_restore": 4800000, "iterations_since_restore": 375, "perf": {"cpu_util_percent": 34.54888888888888, "ram_util_percent": 59.05333333333331}}
+{"episode_reward_max": 639.0, "episode_reward_min": 390.0, "episode_reward_mean": 611.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 305.835}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 188.07, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.08, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.11, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.71, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.66, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.58, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.57, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.24, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.64, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.17, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.58, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.57, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.58, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.57, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 639.0, 636.0, 636.0, 636.0, 633.0, 633.0, 636.0, 590.0, 639.0, 582.0, 587.0, 627.0, 636.0, 581.0, 579.0, 582.0, 627.0, 639.0, 576.0, 630.0, 633.0, 633.0, 570.0, 630.0, 639.0, 639.0, 630.0, 582.0, 581.0, 630.0, 390.0, 636.0, 579.0, 579.0, 582.0, 636.0, 636.0, 576.0, 636.0, 639.0, 590.0, 633.0, 636.0, 522.0, 633.0, 578.0, 544.0, 636.0, 630.0, 636.0, 636.0, 627.0, 621.0, 636.0, 582.0, 582.0, 636.0, 587.0, 587.0, 636.0, 636.0, 582.0, 630.0, 636.0, 627.0, 581.0, 636.0, 630.0, 630.0, 636.0, 630.0, 636.0, 564.0, 633.0, 633.0, 627.0, 630.0, 587.0, 627.0, 621.0, 576.0, 582.0, 630.0, 582.0, 639.0, 636.0, 636.0, 582.0, 630.0, 633.0, 575.0, 630.0, 587.0, 582.0, 639.0, 639.0, 587.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 301.0, 321.0, 309.0, 317.0, 322.0, 311.0, 325.0, 317.0, 319.0, 319.0, 317.0, 319.0, 314.0, 321.0, 312.0, 311.0, 325.0, 293.0, 297.0, 320.0, 319.0, 288.0, 294.0, 291.0, 296.0, 316.0, 311.0, 314.0, 322.0, 281.0, 300.0, 296.0, 283.0, 288.0, 294.0, 319.0, 308.0, 314.0, 325.0, 288.0, 288.0, 316.0, 314.0, 309.0, 324.0, 316.0, 317.0, 283.0, 287.0, 314.0, 316.0, 314.0, 325.0, 319.0, 320.0, 318.0, 312.0, 293.0, 289.0, 299.0, 282.0, 316.0, 314.0, 193.0, 197.0, 322.0, 314.0, 288.0, 291.0, 281.0, 298.0, 296.0, 286.0, 319.0, 317.0, 320.0, 316.0, 288.0, 288.0, 329.0, 307.0, 324.0, 315.0, 296.0, 294.0, 316.0, 317.0, 319.0, 317.0, 268.0, 254.0, 315.0, 318.0, 285.0, 293.0, 270.0, 274.0, 320.0, 316.0, 315.0, 315.0, 322.0, 314.0, 327.0, 309.0, 314.0, 313.0, 313.0, 308.0, 317.0, 319.0, 296.0, 286.0, 293.0, 289.0, 311.0, 325.0, 291.0, 296.0, 295.0, 292.0, 324.0, 312.0, 319.0, 317.0, 294.0, 288.0, 319.0, 311.0, 314.0, 322.0, 314.0, 313.0, 282.0, 299.0, 314.0, 322.0, 314.0, 316.0, 316.0, 314.0, 316.0, 320.0, 313.0, 317.0, 314.0, 322.0, 275.0, 289.0, 313.0, 320.0, 321.0, 312.0, 308.0, 319.0, 316.0, 314.0, 294.0, 293.0, 314.0, 313.0, 302.0, 319.0, 285.0, 291.0, 293.0, 289.0, 311.0, 319.0, 288.0, 294.0, 317.0, 322.0, 319.0, 317.0, 314.0, 322.0, 288.0, 294.0, 316.0, 314.0, 319.0, 314.0, 287.0, 288.0, 314.0, 316.0, 288.0, 299.0, 288.0, 294.0, 317.0, 322.0, 317.0, 322.0, 296.0, 291.0, 311.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8515001170833599, "mean_processing_ms": 0.2386703929555994, "mean_inference_ms": 1.4492475264918965}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9024000, "num_steps_sampled": 4812800, "sample_time_ms": 21252.385, "load_time_ms": 37.854, "grad_time_ms": 10740.605, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007883608341217041, "policy_loss": -0.006749347317963839, "vf_loss": 80.9527359008789, "vf_explained_var": 0.7635239958763123, "kl": 0.0017555366503074765, "entropy": 1.115132212638855, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4812800, "episodes_total": 12032, "training_iteration": 376, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-59-32", "timestamp": 1660258772, "time_this_iter_s": 29.115790128707886, "time_total_s": 17183.12381029129, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17183.12381029129, "timesteps_since_restore": 4812800, "iterations_since_restore": 376, "perf": {"cpu_util_percent": 34.046341463414635, "ram_util_percent": 58.78536585365854}}
+{"episode_reward_max": 639.0, "episode_reward_min": 558.0, "episode_reward_mean": 615.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 273.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.97}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 189.14, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.19, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.07, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.83, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.69, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.1, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.29, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.33, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.69, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.69, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 581.0, 630.0, 630.0, 587.0, 636.0, 639.0, 558.0, 633.0, 582.0, 636.0, 582.0, 639.0, 633.0, 636.0, 587.0, 636.0, 636.0, 582.0, 630.0, 584.0, 636.0, 587.0, 633.0, 639.0, 564.0, 639.0, 630.0, 636.0, 582.0, 630.0, 639.0, 630.0, 636.0, 627.0, 581.0, 636.0, 630.0, 630.0, 636.0, 630.0, 636.0, 564.0, 633.0, 633.0, 627.0, 630.0, 587.0, 627.0, 621.0, 576.0, 582.0, 630.0, 582.0, 639.0, 636.0, 636.0, 582.0, 630.0, 633.0, 575.0, 630.0, 587.0, 582.0, 639.0, 639.0, 587.0, 630.0, 582.0, 630.0, 639.0, 636.0, 636.0, 636.0, 633.0, 633.0, 636.0, 590.0, 639.0, 582.0, 587.0, 627.0, 636.0, 581.0, 579.0, 582.0, 627.0, 639.0, 576.0, 630.0, 633.0, 633.0, 570.0, 630.0, 639.0, 639.0, 630.0, 582.0, 581.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 298.0, 283.0, 305.0, 325.0, 314.0, 316.0, 292.0, 295.0, 319.0, 317.0, 319.0, 320.0, 279.0, 279.0, 316.0, 317.0, 293.0, 289.0, 317.0, 319.0, 296.0, 286.0, 320.0, 319.0, 311.0, 322.0, 317.0, 319.0, 280.0, 307.0, 314.0, 322.0, 317.0, 319.0, 293.0, 289.0, 316.0, 314.0, 302.0, 282.0, 316.0, 320.0, 285.0, 302.0, 317.0, 316.0, 322.0, 317.0, 291.0, 273.0, 325.0, 314.0, 319.0, 311.0, 324.0, 312.0, 296.0, 286.0, 315.0, 315.0, 314.0, 325.0, 319.0, 311.0, 314.0, 322.0, 314.0, 313.0, 282.0, 299.0, 314.0, 322.0, 314.0, 316.0, 316.0, 314.0, 316.0, 320.0, 313.0, 317.0, 314.0, 322.0, 275.0, 289.0, 313.0, 320.0, 321.0, 312.0, 308.0, 319.0, 316.0, 314.0, 294.0, 293.0, 314.0, 313.0, 302.0, 319.0, 285.0, 291.0, 293.0, 289.0, 311.0, 319.0, 288.0, 294.0, 317.0, 322.0, 319.0, 317.0, 314.0, 322.0, 288.0, 294.0, 316.0, 314.0, 319.0, 314.0, 287.0, 288.0, 314.0, 316.0, 288.0, 299.0, 288.0, 294.0, 317.0, 322.0, 317.0, 322.0, 296.0, 291.0, 311.0, 319.0, 281.0, 301.0, 321.0, 309.0, 317.0, 322.0, 311.0, 325.0, 317.0, 319.0, 319.0, 317.0, 319.0, 314.0, 321.0, 312.0, 311.0, 325.0, 293.0, 297.0, 320.0, 319.0, 288.0, 294.0, 291.0, 296.0, 316.0, 311.0, 314.0, 322.0, 281.0, 300.0, 296.0, 283.0, 288.0, 294.0, 319.0, 308.0, 314.0, 325.0, 288.0, 288.0, 316.0, 314.0, 309.0, 324.0, 316.0, 317.0, 283.0, 287.0, 314.0, 316.0, 314.0, 325.0, 319.0, 320.0, 318.0, 312.0, 293.0, 289.0, 299.0, 282.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8504378248511929, "mean_processing_ms": 0.23845860870627447, "mean_inference_ms": 1.448146633207985}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9048000, "num_steps_sampled": 4825600, "sample_time_ms": 21404.603, "load_time_ms": 37.763, "grad_time_ms": 10323.133, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002214103704318404, "policy_loss": -0.005506592337042093, "vf_loss": 82.8126449584961, "vf_explained_var": 0.766756534576416, "kl": 0.0020635148975998163, "entropy": 1.1211366653442383, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4825600, "episodes_total": 12064, "training_iteration": 377, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-00-03", "timestamp": 1660258803, "time_this_iter_s": 30.737117767333984, "time_total_s": 17213.860928058624, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17213.860928058624, "timesteps_since_restore": 4825600, "iterations_since_restore": 377, "perf": {"cpu_util_percent": 36.85227272727273, "ram_util_percent": 58.872727272727275}}
+{"episode_reward_max": 639.0, "episode_reward_min": 546.0, "episode_reward_mean": 616.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 264.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 308.28}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 189.76, "shaped_reward_min": 146, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.81, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.71, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.7, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.06, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.3, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.24, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.7, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.7, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 587.0, 639.0, 576.0, 639.0, 633.0, 639.0, 636.0, 630.0, 636.0, 633.0, 633.0, 587.0, 636.0, 582.0, 593.0, 546.0, 579.0, 633.0, 639.0, 573.0, 630.0, 636.0, 633.0, 587.0, 590.0, 627.0, 630.0, 639.0, 636.0, 636.0, 639.0, 639.0, 587.0, 630.0, 582.0, 630.0, 639.0, 636.0, 636.0, 636.0, 633.0, 633.0, 636.0, 590.0, 639.0, 582.0, 587.0, 627.0, 636.0, 581.0, 579.0, 582.0, 627.0, 639.0, 576.0, 630.0, 633.0, 633.0, 570.0, 630.0, 639.0, 639.0, 630.0, 582.0, 581.0, 630.0, 630.0, 581.0, 630.0, 630.0, 587.0, 636.0, 639.0, 558.0, 633.0, 582.0, 636.0, 582.0, 639.0, 633.0, 636.0, 587.0, 636.0, 636.0, 582.0, 630.0, 584.0, 636.0, 587.0, 633.0, 639.0, 564.0, 639.0, 630.0, 636.0, 582.0, 630.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 308.0, 322.0, 290.0, 297.0, 322.0, 317.0, 288.0, 288.0, 320.0, 319.0, 314.0, 319.0, 325.0, 314.0, 312.0, 324.0, 316.0, 314.0, 316.0, 320.0, 311.0, 322.0, 319.0, 314.0, 291.0, 296.0, 319.0, 317.0, 288.0, 294.0, 294.0, 299.0, 264.0, 282.0, 288.0, 291.0, 319.0, 314.0, 316.0, 323.0, 279.0, 294.0, 321.0, 309.0, 317.0, 319.0, 316.0, 317.0, 293.0, 294.0, 296.0, 294.0, 310.0, 317.0, 314.0, 316.0, 319.0, 320.0, 319.0, 317.0, 316.0, 320.0, 317.0, 322.0, 317.0, 322.0, 296.0, 291.0, 311.0, 319.0, 281.0, 301.0, 321.0, 309.0, 317.0, 322.0, 311.0, 325.0, 317.0, 319.0, 319.0, 317.0, 319.0, 314.0, 321.0, 312.0, 311.0, 325.0, 293.0, 297.0, 320.0, 319.0, 288.0, 294.0, 291.0, 296.0, 316.0, 311.0, 314.0, 322.0, 281.0, 300.0, 296.0, 283.0, 288.0, 294.0, 319.0, 308.0, 314.0, 325.0, 288.0, 288.0, 316.0, 314.0, 309.0, 324.0, 316.0, 317.0, 283.0, 287.0, 314.0, 316.0, 314.0, 325.0, 319.0, 320.0, 318.0, 312.0, 293.0, 289.0, 299.0, 282.0, 316.0, 314.0, 316.0, 314.0, 298.0, 283.0, 305.0, 325.0, 314.0, 316.0, 292.0, 295.0, 319.0, 317.0, 319.0, 320.0, 279.0, 279.0, 316.0, 317.0, 293.0, 289.0, 317.0, 319.0, 296.0, 286.0, 320.0, 319.0, 311.0, 322.0, 317.0, 319.0, 280.0, 307.0, 314.0, 322.0, 317.0, 319.0, 293.0, 289.0, 316.0, 314.0, 302.0, 282.0, 316.0, 320.0, 285.0, 302.0, 317.0, 316.0, 322.0, 317.0, 291.0, 273.0, 325.0, 314.0, 319.0, 311.0, 324.0, 312.0, 296.0, 286.0, 315.0, 315.0, 314.0, 325.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8493908771025369, "mean_processing_ms": 0.23825289023894292, "mean_inference_ms": 1.447125693575985}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9072000, "num_steps_sampled": 4838400, "sample_time_ms": 21529.609, "load_time_ms": 37.587, "grad_time_ms": 10211.527, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015308427391573787, "policy_loss": -0.006051002535969019, "vf_loss": 81.36373901367188, "vf_explained_var": 0.7675411701202393, "kl": 0.00216054730117321, "entropy": 1.1090576648712158, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4838400, "episodes_total": 12096, "training_iteration": 378, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-00-35", "timestamp": 1660258835, "time_this_iter_s": 32.476667165756226, "time_total_s": 17246.33759522438, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17246.33759522438, "timesteps_since_restore": 4838400, "iterations_since_restore": 378, "perf": {"cpu_util_percent": 34.55434782608695, "ram_util_percent": 59.1304347826087}}
+{"episode_reward_max": 639.0, "episode_reward_min": 468.0, "episode_reward_mean": 614.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.265}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.93, "shaped_reward_min": 146, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.14, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.82, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.64, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.72, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.91, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.25, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.64, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.72, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.64, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.72, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 639.0, 633.0, 636.0, 630.0, 570.0, 630.0, 633.0, 579.0, 630.0, 639.0, 593.0, 633.0, 636.0, 627.0, 579.0, 590.0, 636.0, 468.0, 587.0, 576.0, 582.0, 582.0, 630.0, 636.0, 636.0, 633.0, 633.0, 630.0, 633.0, 630.0, 630.0, 630.0, 582.0, 581.0, 630.0, 630.0, 581.0, 630.0, 630.0, 587.0, 636.0, 639.0, 558.0, 633.0, 582.0, 636.0, 582.0, 639.0, 633.0, 636.0, 587.0, 636.0, 636.0, 582.0, 630.0, 584.0, 636.0, 587.0, 633.0, 639.0, 564.0, 639.0, 630.0, 636.0, 582.0, 630.0, 639.0, 633.0, 630.0, 587.0, 639.0, 576.0, 639.0, 633.0, 639.0, 636.0, 630.0, 636.0, 633.0, 633.0, 587.0, 636.0, 582.0, 593.0, 546.0, 579.0, 633.0, 639.0, 573.0, 630.0, 636.0, 633.0, 587.0, 590.0, 627.0, 630.0, 639.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 288.0, 322.0, 317.0, 316.0, 317.0, 316.0, 320.0, 311.0, 319.0, 282.0, 288.0, 316.0, 314.0, 319.0, 314.0, 294.0, 285.0, 314.0, 316.0, 319.0, 320.0, 307.0, 286.0, 314.0, 319.0, 314.0, 322.0, 313.0, 314.0, 287.0, 292.0, 288.0, 302.0, 324.0, 312.0, 237.0, 231.0, 291.0, 296.0, 282.0, 294.0, 291.0, 291.0, 289.0, 293.0, 319.0, 311.0, 314.0, 322.0, 324.0, 312.0, 314.0, 319.0, 314.0, 319.0, 314.0, 316.0, 316.0, 317.0, 308.0, 322.0, 319.0, 311.0, 318.0, 312.0, 293.0, 289.0, 299.0, 282.0, 316.0, 314.0, 316.0, 314.0, 298.0, 283.0, 305.0, 325.0, 314.0, 316.0, 292.0, 295.0, 319.0, 317.0, 319.0, 320.0, 279.0, 279.0, 316.0, 317.0, 293.0, 289.0, 317.0, 319.0, 296.0, 286.0, 320.0, 319.0, 311.0, 322.0, 317.0, 319.0, 280.0, 307.0, 314.0, 322.0, 317.0, 319.0, 293.0, 289.0, 316.0, 314.0, 302.0, 282.0, 316.0, 320.0, 285.0, 302.0, 317.0, 316.0, 322.0, 317.0, 291.0, 273.0, 325.0, 314.0, 319.0, 311.0, 324.0, 312.0, 296.0, 286.0, 315.0, 315.0, 314.0, 325.0, 316.0, 317.0, 308.0, 322.0, 290.0, 297.0, 322.0, 317.0, 288.0, 288.0, 320.0, 319.0, 314.0, 319.0, 325.0, 314.0, 312.0, 324.0, 316.0, 314.0, 316.0, 320.0, 311.0, 322.0, 319.0, 314.0, 291.0, 296.0, 319.0, 317.0, 288.0, 294.0, 294.0, 299.0, 264.0, 282.0, 288.0, 291.0, 319.0, 314.0, 316.0, 323.0, 279.0, 294.0, 321.0, 309.0, 317.0, 319.0, 316.0, 317.0, 293.0, 294.0, 296.0, 294.0, 310.0, 317.0, 314.0, 316.0, 319.0, 320.0, 319.0, 317.0, 316.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.84835648025371, "mean_processing_ms": 0.23804887644488537, "mean_inference_ms": 1.446224605883411}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9096000, "num_steps_sampled": 4851200, "sample_time_ms": 21545.942, "load_time_ms": 37.194, "grad_time_ms": 9930.811, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008399917860515416, "policy_loss": -0.0065034665167331696, "vf_loss": 79.03890991210938, "vf_explained_var": 0.7710984349250793, "kl": 0.0017613372765481472, "entropy": 1.1208573579788208, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4851200, "episodes_total": 12128, "training_iteration": 379, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-01-07", "timestamp": 1660258867, "time_this_iter_s": 32.251976013183594, "time_total_s": 17278.589571237564, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17278.589571237564, "timesteps_since_restore": 4851200, "iterations_since_restore": 379, "perf": {"cpu_util_percent": 33.40222222222222, "ram_util_percent": 58.955555555555534}}
+{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 614.36, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.18}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.76, "shaped_reward_min": 146, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.93, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.33, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.65, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.81, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.41, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.88, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.28, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.16, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.41, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.88, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.41, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.88, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 582.0, 636.0, 467.0, 576.0, 639.0, 582.0, 582.0, 627.0, 636.0, 639.0, 627.0, 579.0, 587.0, 636.0, 579.0, 633.0, 636.0, 582.0, 639.0, 630.0, 633.0, 633.0, 633.0, 630.0, 627.0, 633.0, 627.0, 582.0, 633.0, 633.0, 636.0, 636.0, 582.0, 630.0, 639.0, 633.0, 630.0, 587.0, 639.0, 576.0, 639.0, 633.0, 639.0, 636.0, 630.0, 636.0, 633.0, 633.0, 587.0, 636.0, 582.0, 593.0, 546.0, 579.0, 633.0, 639.0, 573.0, 630.0, 636.0, 633.0, 587.0, 590.0, 627.0, 630.0, 639.0, 636.0, 636.0, 573.0, 639.0, 633.0, 636.0, 630.0, 570.0, 630.0, 633.0, 579.0, 630.0, 639.0, 593.0, 633.0, 636.0, 627.0, 579.0, 590.0, 636.0, 468.0, 587.0, 576.0, 582.0, 582.0, 630.0, 636.0, 636.0, 633.0, 633.0, 630.0, 633.0, 630.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [308.0, 319.0, 286.0, 296.0, 319.0, 317.0, 230.0, 237.0, 287.0, 289.0, 317.0, 322.0, 291.0, 291.0, 288.0, 294.0, 310.0, 317.0, 314.0, 322.0, 322.0, 317.0, 308.0, 319.0, 290.0, 289.0, 298.0, 289.0, 324.0, 312.0, 291.0, 288.0, 316.0, 317.0, 319.0, 317.0, 296.0, 286.0, 319.0, 320.0, 311.0, 319.0, 311.0, 322.0, 319.0, 314.0, 318.0, 315.0, 316.0, 314.0, 316.0, 311.0, 317.0, 316.0, 308.0, 319.0, 291.0, 291.0, 311.0, 322.0, 321.0, 312.0, 319.0, 317.0, 324.0, 312.0, 296.0, 286.0, 315.0, 315.0, 314.0, 325.0, 316.0, 317.0, 308.0, 322.0, 290.0, 297.0, 322.0, 317.0, 288.0, 288.0, 320.0, 319.0, 314.0, 319.0, 325.0, 314.0, 312.0, 324.0, 316.0, 314.0, 316.0, 320.0, 311.0, 322.0, 319.0, 314.0, 291.0, 296.0, 319.0, 317.0, 288.0, 294.0, 294.0, 299.0, 264.0, 282.0, 288.0, 291.0, 319.0, 314.0, 316.0, 323.0, 279.0, 294.0, 321.0, 309.0, 317.0, 319.0, 316.0, 317.0, 293.0, 294.0, 296.0, 294.0, 310.0, 317.0, 314.0, 316.0, 319.0, 320.0, 319.0, 317.0, 316.0, 320.0, 285.0, 288.0, 322.0, 317.0, 316.0, 317.0, 316.0, 320.0, 311.0, 319.0, 282.0, 288.0, 316.0, 314.0, 319.0, 314.0, 294.0, 285.0, 314.0, 316.0, 319.0, 320.0, 307.0, 286.0, 314.0, 319.0, 314.0, 322.0, 313.0, 314.0, 287.0, 292.0, 288.0, 302.0, 324.0, 312.0, 237.0, 231.0, 291.0, 296.0, 282.0, 294.0, 291.0, 291.0, 289.0, 293.0, 319.0, 311.0, 314.0, 322.0, 324.0, 312.0, 314.0, 319.0, 314.0, 319.0, 314.0, 316.0, 316.0, 317.0, 308.0, 322.0, 319.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8473261545871941, "mean_processing_ms": 0.23784601732362667, "mean_inference_ms": 1.4452831057564066}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9120000, "num_steps_sampled": 4864000, "sample_time_ms": 21329.654, "load_time_ms": 36.87, "grad_time_ms": 9824.642, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0010081265354529023, "policy_loss": -0.006387272384017706, "vf_loss": 79.55323028564453, "vf_explained_var": 0.7746841311454773, "kl": 0.001845820457674563, "entropy": 1.1198536157608032, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4864000, "episodes_total": 12160, "training_iteration": 380, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-01-38", "timestamp": 1660258898, "time_this_iter_s": 30.608419179916382, "time_total_s": 17309.19799041748, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17309.19799041748, "timesteps_since_restore": 4864000, "iterations_since_restore": 380, "perf": {"cpu_util_percent": 34.48139534883721, "ram_util_percent": 58.923255813953475}}
+{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 615.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 307.95}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 189.1, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.11, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.87, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.83, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.55, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.81, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.2, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.55, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.81, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.55, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.81, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 587.0, 636.0, 639.0, 639.0, 633.0, 630.0, 587.0, 636.0, 636.0, 627.0, 639.0, 636.0, 633.0, 569.0, 633.0, 522.0, 630.0, 636.0, 633.0, 633.0, 630.0, 633.0, 579.0, 636.0, 633.0, 633.0, 639.0, 584.0, 633.0, 633.0, 630.0, 630.0, 639.0, 636.0, 636.0, 573.0, 639.0, 633.0, 636.0, 630.0, 570.0, 630.0, 633.0, 579.0, 630.0, 639.0, 593.0, 633.0, 636.0, 627.0, 579.0, 590.0, 636.0, 468.0, 587.0, 576.0, 582.0, 582.0, 630.0, 636.0, 636.0, 633.0, 633.0, 630.0, 633.0, 630.0, 630.0, 627.0, 582.0, 636.0, 467.0, 576.0, 639.0, 582.0, 582.0, 627.0, 636.0, 639.0, 627.0, 579.0, 587.0, 636.0, 579.0, 633.0, 636.0, 582.0, 639.0, 630.0, 633.0, 633.0, 633.0, 630.0, 627.0, 633.0, 627.0, 582.0, 633.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 293.0, 294.0, 313.0, 323.0, 319.0, 320.0, 319.0, 320.0, 319.0, 314.0, 316.0, 314.0, 293.0, 294.0, 316.0, 320.0, 314.0, 322.0, 311.0, 316.0, 319.0, 320.0, 314.0, 322.0, 314.0, 319.0, 287.0, 282.0, 311.0, 322.0, 264.0, 258.0, 311.0, 319.0, 319.0, 317.0, 319.0, 314.0, 317.0, 316.0, 321.0, 309.0, 319.0, 314.0, 288.0, 291.0, 319.0, 317.0, 311.0, 322.0, 319.0, 314.0, 322.0, 317.0, 294.0, 290.0, 321.0, 312.0, 319.0, 314.0, 311.0, 319.0, 314.0, 316.0, 319.0, 320.0, 319.0, 317.0, 316.0, 320.0, 285.0, 288.0, 322.0, 317.0, 316.0, 317.0, 316.0, 320.0, 311.0, 319.0, 282.0, 288.0, 316.0, 314.0, 319.0, 314.0, 294.0, 285.0, 314.0, 316.0, 319.0, 320.0, 307.0, 286.0, 314.0, 319.0, 314.0, 322.0, 313.0, 314.0, 287.0, 292.0, 288.0, 302.0, 324.0, 312.0, 237.0, 231.0, 291.0, 296.0, 282.0, 294.0, 291.0, 291.0, 289.0, 293.0, 319.0, 311.0, 314.0, 322.0, 324.0, 312.0, 314.0, 319.0, 314.0, 319.0, 314.0, 316.0, 316.0, 317.0, 308.0, 322.0, 319.0, 311.0, 308.0, 319.0, 286.0, 296.0, 319.0, 317.0, 230.0, 237.0, 287.0, 289.0, 317.0, 322.0, 291.0, 291.0, 288.0, 294.0, 310.0, 317.0, 314.0, 322.0, 322.0, 317.0, 308.0, 319.0, 290.0, 289.0, 298.0, 289.0, 324.0, 312.0, 291.0, 288.0, 316.0, 317.0, 319.0, 317.0, 296.0, 286.0, 319.0, 320.0, 311.0, 319.0, 311.0, 322.0, 319.0, 314.0, 318.0, 315.0, 316.0, 314.0, 316.0, 311.0, 317.0, 316.0, 308.0, 319.0, 291.0, 291.0, 311.0, 322.0, 321.0, 312.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8462923346422779, "mean_processing_ms": 0.23764003789944027, "mean_inference_ms": 1.4442668898213196}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9144000, "num_steps_sampled": 4876800, "sample_time_ms": 21323.81, "load_time_ms": 36.975, "grad_time_ms": 9811.843, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005420349538326263, "policy_loss": -0.0019105566898360848, "vf_loss": 78.93695068359375, "vf_explained_var": 0.772759735584259, "kl": 0.0018517466960474849, "entropy": 1.1255789995193481, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4876800, "episodes_total": 12192, "training_iteration": 381, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-02-08", "timestamp": 1660258928, "time_this_iter_s": 30.169427156448364, "time_total_s": 17339.36741757393, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17339.36741757393, "timesteps_since_restore": 4876800, "iterations_since_restore": 381, "perf": {"cpu_util_percent": 34.1, "ram_util_percent": 58.95116279069769}}
+{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 615.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 307.735}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.67, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.92, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.69, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 18.01, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.93, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.19, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.42, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.93, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.93, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 633.0, 630.0, 633.0, 636.0, 567.0, 627.0, 582.0, 630.0, 630.0, 582.0, 633.0, 636.0, 579.0, 636.0, 636.0, 630.0, 630.0, 630.0, 582.0, 630.0, 633.0, 630.0, 578.0, 587.0, 530.0, 587.0, 630.0, 630.0, 522.0, 582.0, 630.0, 633.0, 630.0, 630.0, 627.0, 582.0, 636.0, 467.0, 576.0, 639.0, 582.0, 582.0, 627.0, 636.0, 639.0, 627.0, 579.0, 587.0, 636.0, 579.0, 633.0, 636.0, 582.0, 639.0, 630.0, 633.0, 633.0, 633.0, 630.0, 627.0, 633.0, 627.0, 582.0, 633.0, 633.0, 636.0, 579.0, 587.0, 636.0, 639.0, 639.0, 633.0, 630.0, 587.0, 636.0, 636.0, 627.0, 639.0, 636.0, 633.0, 569.0, 633.0, 522.0, 630.0, 636.0, 633.0, 633.0, 630.0, 633.0, 579.0, 636.0, 633.0, 633.0, 639.0, 584.0, 633.0, 633.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [309.0, 324.0, 311.0, 322.0, 319.0, 314.0, 308.0, 322.0, 316.0, 317.0, 319.0, 317.0, 282.0, 285.0, 329.0, 298.0, 291.0, 291.0, 308.0, 322.0, 308.0, 322.0, 291.0, 291.0, 309.0, 324.0, 319.0, 317.0, 290.0, 289.0, 319.0, 317.0, 316.0, 320.0, 326.0, 304.0, 316.0, 314.0, 314.0, 316.0, 290.0, 292.0, 314.0, 316.0, 319.0, 314.0, 318.0, 312.0, 293.0, 285.0, 296.0, 291.0, 271.0, 259.0, 293.0, 294.0, 316.0, 314.0, 313.0, 317.0, 265.0, 257.0, 293.0, 289.0, 314.0, 316.0, 316.0, 317.0, 308.0, 322.0, 319.0, 311.0, 308.0, 319.0, 286.0, 296.0, 319.0, 317.0, 230.0, 237.0, 287.0, 289.0, 317.0, 322.0, 291.0, 291.0, 288.0, 294.0, 310.0, 317.0, 314.0, 322.0, 322.0, 317.0, 308.0, 319.0, 290.0, 289.0, 298.0, 289.0, 324.0, 312.0, 291.0, 288.0, 316.0, 317.0, 319.0, 317.0, 296.0, 286.0, 319.0, 320.0, 311.0, 319.0, 311.0, 322.0, 319.0, 314.0, 318.0, 315.0, 316.0, 314.0, 316.0, 311.0, 317.0, 316.0, 308.0, 319.0, 291.0, 291.0, 311.0, 322.0, 321.0, 312.0, 319.0, 317.0, 288.0, 291.0, 293.0, 294.0, 313.0, 323.0, 319.0, 320.0, 319.0, 320.0, 319.0, 314.0, 316.0, 314.0, 293.0, 294.0, 316.0, 320.0, 314.0, 322.0, 311.0, 316.0, 319.0, 320.0, 314.0, 322.0, 314.0, 319.0, 287.0, 282.0, 311.0, 322.0, 264.0, 258.0, 311.0, 319.0, 319.0, 317.0, 319.0, 314.0, 317.0, 316.0, 321.0, 309.0, 319.0, 314.0, 288.0, 291.0, 319.0, 317.0, 311.0, 322.0, 319.0, 314.0, 322.0, 317.0, 294.0, 290.0, 321.0, 312.0, 319.0, 314.0, 311.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.845260231721656, "mean_processing_ms": 0.23743439147057216, "mean_inference_ms": 1.443180349457874}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9168000, "num_steps_sampled": 4889600, "sample_time_ms": 21404.999, "load_time_ms": 36.99, "grad_time_ms": 9811.226, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002549513941630721, "policy_loss": -0.004884073510766029, "vf_loss": 79.93880462646484, "vf_explained_var": 0.7685417532920837, "kl": 0.002187439240515232, "entropy": 1.1205859184265137, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4889600, "episodes_total": 12224, "training_iteration": 382, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-02-39", "timestamp": 1660258959, "time_this_iter_s": 30.454362154006958, "time_total_s": 17369.821779727936, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17369.821779727936, "timesteps_since_restore": 4889600, "iterations_since_restore": 382, "perf": {"cpu_util_percent": 33.88139534883721, "ram_util_percent": 58.944186046511625}}
+{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 617.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.555}, "custom_metrics": {"sparse_reward_mean": 214.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.11, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.09, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.61, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.82, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 18.05, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.46, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.88, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.46, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.88, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.46, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.88, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 633.0, 587.0, 630.0, 582.0, 582.0, 633.0, 627.0, 576.0, 587.0, 624.0, 636.0, 633.0, 633.0, 636.0, 630.0, 630.0, 633.0, 630.0, 636.0, 584.0, 570.0, 630.0, 639.0, 630.0, 630.0, 630.0, 639.0, 582.0, 633.0, 636.0, 633.0, 582.0, 633.0, 633.0, 636.0, 579.0, 587.0, 636.0, 639.0, 639.0, 633.0, 630.0, 587.0, 636.0, 636.0, 627.0, 639.0, 636.0, 633.0, 569.0, 633.0, 522.0, 630.0, 636.0, 633.0, 633.0, 630.0, 633.0, 579.0, 636.0, 633.0, 633.0, 639.0, 584.0, 633.0, 633.0, 630.0, 633.0, 633.0, 633.0, 630.0, 633.0, 636.0, 567.0, 627.0, 582.0, 630.0, 630.0, 582.0, 633.0, 636.0, 579.0, 636.0, 636.0, 630.0, 630.0, 630.0, 582.0, 630.0, 633.0, 630.0, 578.0, 587.0, 530.0, 587.0, 630.0, 630.0, 522.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 319.0, 314.0, 291.0, 296.0, 313.0, 317.0, 291.0, 291.0, 285.0, 297.0, 319.0, 314.0, 314.0, 313.0, 291.0, 285.0, 296.0, 291.0, 304.0, 320.0, 314.0, 322.0, 316.0, 317.0, 318.0, 315.0, 319.0, 317.0, 311.0, 319.0, 311.0, 319.0, 317.0, 316.0, 311.0, 319.0, 317.0, 319.0, 299.0, 285.0, 285.0, 285.0, 308.0, 322.0, 319.0, 320.0, 316.0, 314.0, 316.0, 314.0, 308.0, 322.0, 319.0, 320.0, 288.0, 294.0, 317.0, 316.0, 318.0, 318.0, 319.0, 314.0, 291.0, 291.0, 311.0, 322.0, 321.0, 312.0, 319.0, 317.0, 288.0, 291.0, 293.0, 294.0, 313.0, 323.0, 319.0, 320.0, 319.0, 320.0, 319.0, 314.0, 316.0, 314.0, 293.0, 294.0, 316.0, 320.0, 314.0, 322.0, 311.0, 316.0, 319.0, 320.0, 314.0, 322.0, 314.0, 319.0, 287.0, 282.0, 311.0, 322.0, 264.0, 258.0, 311.0, 319.0, 319.0, 317.0, 319.0, 314.0, 317.0, 316.0, 321.0, 309.0, 319.0, 314.0, 288.0, 291.0, 319.0, 317.0, 311.0, 322.0, 319.0, 314.0, 322.0, 317.0, 294.0, 290.0, 321.0, 312.0, 319.0, 314.0, 311.0, 319.0, 309.0, 324.0, 311.0, 322.0, 319.0, 314.0, 308.0, 322.0, 316.0, 317.0, 319.0, 317.0, 282.0, 285.0, 329.0, 298.0, 291.0, 291.0, 308.0, 322.0, 308.0, 322.0, 291.0, 291.0, 309.0, 324.0, 319.0, 317.0, 290.0, 289.0, 319.0, 317.0, 316.0, 320.0, 326.0, 304.0, 316.0, 314.0, 314.0, 316.0, 290.0, 292.0, 314.0, 316.0, 319.0, 314.0, 318.0, 312.0, 293.0, 285.0, 296.0, 291.0, 271.0, 259.0, 293.0, 294.0, 316.0, 314.0, 313.0, 317.0, 265.0, 257.0, 293.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8442347187033086, "mean_processing_ms": 0.23722937400390215, "mean_inference_ms": 1.442092522505549}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9192000, "num_steps_sampled": 4902400, "sample_time_ms": 21412.011, "load_time_ms": 37.029, "grad_time_ms": 9601.556, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016393003752455115, "policy_loss": -0.005780236795544624, "vf_loss": 79.79308319091797, "vf_explained_var": 0.7686330676078796, "kl": 0.001640369649976492, "entropy": 1.1195478439331055, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4902400, "episodes_total": 12256, "training_iteration": 383, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-03-09", "timestamp": 1660258989, "time_this_iter_s": 30.096380949020386, "time_total_s": 17399.918160676956, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17399.918160676956, "timesteps_since_restore": 4902400, "iterations_since_restore": 383, "perf": {"cpu_util_percent": 34.86279069767443, "ram_util_percent": 58.95348837209304}}
+{"episode_reward_max": 639.0, "episode_reward_min": 507.0, "episode_reward_mean": 614.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 307.08}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.16, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.96, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.46, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.63, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.0, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.83, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.83, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.83, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 639.0, 584.0, 507.0, 573.0, 630.0, 582.0, 630.0, 633.0, 579.0, 582.0, 564.0, 633.0, 636.0, 582.0, 639.0, 582.0, 621.0, 636.0, 630.0, 633.0, 639.0, 579.0, 639.0, 639.0, 639.0, 639.0, 579.0, 639.0, 579.0, 636.0, 584.0, 633.0, 633.0, 630.0, 633.0, 633.0, 633.0, 630.0, 633.0, 636.0, 567.0, 627.0, 582.0, 630.0, 630.0, 582.0, 633.0, 636.0, 579.0, 636.0, 636.0, 630.0, 630.0, 630.0, 582.0, 630.0, 633.0, 630.0, 578.0, 587.0, 530.0, 587.0, 630.0, 630.0, 522.0, 582.0, 630.0, 633.0, 587.0, 630.0, 582.0, 582.0, 633.0, 627.0, 576.0, 587.0, 624.0, 636.0, 633.0, 633.0, 636.0, 630.0, 630.0, 633.0, 630.0, 636.0, 584.0, 570.0, 630.0, 639.0, 630.0, 630.0, 630.0, 639.0, 582.0, 633.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 316.0, 306.0, 324.0, 314.0, 325.0, 293.0, 291.0, 253.0, 254.0, 291.0, 282.0, 314.0, 316.0, 288.0, 294.0, 319.0, 311.0, 324.0, 309.0, 287.0, 292.0, 293.0, 289.0, 274.0, 290.0, 322.0, 311.0, 316.0, 320.0, 288.0, 294.0, 316.0, 323.0, 293.0, 289.0, 308.0, 313.0, 319.0, 317.0, 311.0, 319.0, 316.0, 317.0, 317.0, 322.0, 293.0, 286.0, 319.0, 320.0, 317.0, 322.0, 319.0, 320.0, 319.0, 320.0, 290.0, 289.0, 317.0, 322.0, 279.0, 300.0, 322.0, 314.0, 294.0, 290.0, 321.0, 312.0, 319.0, 314.0, 311.0, 319.0, 309.0, 324.0, 311.0, 322.0, 319.0, 314.0, 308.0, 322.0, 316.0, 317.0, 319.0, 317.0, 282.0, 285.0, 329.0, 298.0, 291.0, 291.0, 308.0, 322.0, 308.0, 322.0, 291.0, 291.0, 309.0, 324.0, 319.0, 317.0, 290.0, 289.0, 319.0, 317.0, 316.0, 320.0, 326.0, 304.0, 316.0, 314.0, 314.0, 316.0, 290.0, 292.0, 314.0, 316.0, 319.0, 314.0, 318.0, 312.0, 293.0, 285.0, 296.0, 291.0, 271.0, 259.0, 293.0, 294.0, 316.0, 314.0, 313.0, 317.0, 265.0, 257.0, 293.0, 289.0, 316.0, 314.0, 319.0, 314.0, 291.0, 296.0, 313.0, 317.0, 291.0, 291.0, 285.0, 297.0, 319.0, 314.0, 314.0, 313.0, 291.0, 285.0, 296.0, 291.0, 304.0, 320.0, 314.0, 322.0, 316.0, 317.0, 318.0, 315.0, 319.0, 317.0, 311.0, 319.0, 311.0, 319.0, 317.0, 316.0, 311.0, 319.0, 317.0, 319.0, 299.0, 285.0, 285.0, 285.0, 308.0, 322.0, 319.0, 320.0, 316.0, 314.0, 316.0, 314.0, 308.0, 322.0, 319.0, 320.0, 288.0, 294.0, 317.0, 316.0, 318.0, 318.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8432201147992054, "mean_processing_ms": 0.23702861990301977, "mean_inference_ms": 1.4411244641965177}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9216000, "num_steps_sampled": 4915200, "sample_time_ms": 21556.377, "load_time_ms": 37.038, "grad_time_ms": 9524.16, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002193765016272664, "policy_loss": -0.0053141750395298, "vf_loss": 80.70391082763672, "vf_explained_var": 0.7705557942390442, "kl": 0.0018228074768558145, "entropy": 1.124890685081482, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4915200, "episodes_total": 12288, "training_iteration": 384, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-03-43", "timestamp": 1660259023, "time_this_iter_s": 33.80204796791077, "time_total_s": 17433.720208644867, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17433.720208644867, "timesteps_since_restore": 4915200, "iterations_since_restore": 384, "perf": {"cpu_util_percent": 36.197872340425526, "ram_util_percent": 59.73617021276596}}
+{"episode_reward_max": 639.0, "episode_reward_min": 507.0, "episode_reward_mean": 611.52, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 305.76}, "custom_metrics": {"sparse_reward_mean": 212.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.52, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.93, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.3, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.54, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.88, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.34, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.77, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.19, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.62, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.34, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.77, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.34, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.77, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 630.0, 582.0, 633.0, 639.0, 630.0, 633.0, 630.0, 587.0, 567.0, 633.0, 582.0, 639.0, 582.0, 630.0, 582.0, 582.0, 630.0, 582.0, 582.0, 630.0, 630.0, 576.0, 636.0, 573.0, 573.0, 633.0, 582.0, 587.0, 636.0, 636.0, 579.0, 630.0, 630.0, 522.0, 582.0, 630.0, 633.0, 587.0, 630.0, 582.0, 582.0, 633.0, 627.0, 576.0, 587.0, 624.0, 636.0, 633.0, 633.0, 636.0, 630.0, 630.0, 633.0, 630.0, 636.0, 584.0, 570.0, 630.0, 639.0, 630.0, 630.0, 630.0, 639.0, 582.0, 633.0, 636.0, 633.0, 633.0, 630.0, 639.0, 584.0, 507.0, 573.0, 630.0, 582.0, 630.0, 633.0, 579.0, 582.0, 564.0, 633.0, 636.0, 582.0, 639.0, 582.0, 621.0, 636.0, 630.0, 633.0, 639.0, 579.0, 639.0, 639.0, 639.0, 639.0, 579.0, 639.0, 579.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 288.0, 316.0, 314.0, 283.0, 299.0, 316.0, 317.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 316.0, 314.0, 293.0, 294.0, 282.0, 285.0, 316.0, 317.0, 288.0, 294.0, 319.0, 320.0, 288.0, 294.0, 317.0, 313.0, 290.0, 292.0, 288.0, 294.0, 308.0, 322.0, 292.0, 290.0, 297.0, 285.0, 313.0, 317.0, 316.0, 314.0, 285.0, 291.0, 322.0, 314.0, 288.0, 285.0, 284.0, 289.0, 319.0, 314.0, 282.0, 300.0, 283.0, 304.0, 319.0, 317.0, 314.0, 322.0, 288.0, 291.0, 316.0, 314.0, 313.0, 317.0, 265.0, 257.0, 293.0, 289.0, 316.0, 314.0, 319.0, 314.0, 291.0, 296.0, 313.0, 317.0, 291.0, 291.0, 285.0, 297.0, 319.0, 314.0, 314.0, 313.0, 291.0, 285.0, 296.0, 291.0, 304.0, 320.0, 314.0, 322.0, 316.0, 317.0, 318.0, 315.0, 319.0, 317.0, 311.0, 319.0, 311.0, 319.0, 317.0, 316.0, 311.0, 319.0, 317.0, 319.0, 299.0, 285.0, 285.0, 285.0, 308.0, 322.0, 319.0, 320.0, 316.0, 314.0, 316.0, 314.0, 308.0, 322.0, 319.0, 320.0, 288.0, 294.0, 317.0, 316.0, 318.0, 318.0, 319.0, 314.0, 317.0, 316.0, 306.0, 324.0, 314.0, 325.0, 293.0, 291.0, 253.0, 254.0, 291.0, 282.0, 314.0, 316.0, 288.0, 294.0, 319.0, 311.0, 324.0, 309.0, 287.0, 292.0, 293.0, 289.0, 274.0, 290.0, 322.0, 311.0, 316.0, 320.0, 288.0, 294.0, 316.0, 323.0, 293.0, 289.0, 308.0, 313.0, 319.0, 317.0, 311.0, 319.0, 316.0, 317.0, 317.0, 322.0, 293.0, 286.0, 319.0, 320.0, 317.0, 322.0, 319.0, 320.0, 319.0, 320.0, 290.0, 289.0, 317.0, 322.0, 279.0, 300.0, 322.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8422101834912255, "mean_processing_ms": 0.23682867408475425, "mean_inference_ms": 1.4402525240623634}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9240000, "num_steps_sampled": 4928000, "sample_time_ms": 21695.699, "load_time_ms": 36.564, "grad_time_ms": 9521.463, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004186244681477547, "policy_loss": -0.0031982522923499346, "vf_loss": 79.4544906616211, "vf_explained_var": 0.7711065411567688, "kl": 0.002083372324705124, "entropy": 1.1219121217727661, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4928000, "episodes_total": 12320, "training_iteration": 385, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-04-16", "timestamp": 1660259056, "time_this_iter_s": 33.08577585220337, "time_total_s": 17466.80598449707, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17466.80598449707, "timesteps_since_restore": 4928000, "iterations_since_restore": 385, "perf": {"cpu_util_percent": 33.6468085106383, "ram_util_percent": 59.0808510638298}}
+{"episode_reward_max": 639.0, "episode_reward_min": 507.0, "episode_reward_mean": 609.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.845}, "custom_metrics": {"sparse_reward_mean": 211.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.29, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.92, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.56, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.76, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.41, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.94, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.2, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.41, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.41, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 590.0, 630.0, 636.0, 525.0, 530.0, 582.0, 587.0, 633.0, 627.0, 639.0, 624.0, 582.0, 630.0, 639.0, 630.0, 582.0, 636.0, 582.0, 522.0, 636.0, 633.0, 630.0, 627.0, 519.0, 639.0, 627.0, 636.0, 633.0, 636.0, 636.0, 627.0, 582.0, 633.0, 636.0, 633.0, 633.0, 630.0, 639.0, 584.0, 507.0, 573.0, 630.0, 582.0, 630.0, 633.0, 579.0, 582.0, 564.0, 633.0, 636.0, 582.0, 639.0, 582.0, 621.0, 636.0, 630.0, 633.0, 639.0, 579.0, 639.0, 639.0, 639.0, 639.0, 579.0, 639.0, 579.0, 636.0, 573.0, 630.0, 582.0, 633.0, 639.0, 630.0, 633.0, 630.0, 587.0, 567.0, 633.0, 582.0, 639.0, 582.0, 630.0, 582.0, 582.0, 630.0, 582.0, 582.0, 630.0, 630.0, 576.0, 636.0, 573.0, 573.0, 633.0, 582.0, 587.0, 636.0, 636.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 296.0, 294.0, 314.0, 316.0, 322.0, 314.0, 262.0, 263.0, 281.0, 249.0, 288.0, 294.0, 294.0, 293.0, 316.0, 317.0, 316.0, 311.0, 319.0, 320.0, 308.0, 316.0, 291.0, 291.0, 308.0, 322.0, 319.0, 320.0, 322.0, 308.0, 288.0, 294.0, 317.0, 319.0, 296.0, 286.0, 249.0, 273.0, 314.0, 322.0, 316.0, 317.0, 308.0, 322.0, 311.0, 316.0, 268.0, 251.0, 322.0, 317.0, 316.0, 311.0, 317.0, 319.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 316.0, 311.0, 288.0, 294.0, 317.0, 316.0, 318.0, 318.0, 319.0, 314.0, 317.0, 316.0, 306.0, 324.0, 314.0, 325.0, 293.0, 291.0, 253.0, 254.0, 291.0, 282.0, 314.0, 316.0, 288.0, 294.0, 319.0, 311.0, 324.0, 309.0, 287.0, 292.0, 293.0, 289.0, 274.0, 290.0, 322.0, 311.0, 316.0, 320.0, 288.0, 294.0, 316.0, 323.0, 293.0, 289.0, 308.0, 313.0, 319.0, 317.0, 311.0, 319.0, 316.0, 317.0, 317.0, 322.0, 293.0, 286.0, 319.0, 320.0, 317.0, 322.0, 319.0, 320.0, 319.0, 320.0, 290.0, 289.0, 317.0, 322.0, 279.0, 300.0, 322.0, 314.0, 285.0, 288.0, 316.0, 314.0, 283.0, 299.0, 316.0, 317.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 316.0, 314.0, 293.0, 294.0, 282.0, 285.0, 316.0, 317.0, 288.0, 294.0, 319.0, 320.0, 288.0, 294.0, 317.0, 313.0, 290.0, 292.0, 288.0, 294.0, 308.0, 322.0, 292.0, 290.0, 297.0, 285.0, 313.0, 317.0, 316.0, 314.0, 285.0, 291.0, 322.0, 314.0, 288.0, 285.0, 284.0, 289.0, 319.0, 314.0, 282.0, 300.0, 283.0, 304.0, 319.0, 317.0, 314.0, 322.0, 288.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8412065546272243, "mean_processing_ms": 0.23663028686685655, "mean_inference_ms": 1.4395227599407487}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9264000, "num_steps_sampled": 4940800, "sample_time_ms": 21927.826, "load_time_ms": 36.438, "grad_time_ms": 9711.085, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002509244019165635, "policy_loss": -0.005174743477255106, "vf_loss": 82.38806915283203, "vf_explained_var": 0.7595655918121338, "kl": 0.0020332231651991606, "entropy": 1.1096264123916626, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4940800, "episodes_total": 12352, "training_iteration": 386, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-04-49", "timestamp": 1660259089, "time_this_iter_s": 33.33205199241638, "time_total_s": 17500.138036489487, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17500.138036489487, "timesteps_since_restore": 4940800, "iterations_since_restore": 386, "perf": {"cpu_util_percent": 33.47234042553192, "ram_util_percent": 59.114893617021295}}
+{"episode_reward_max": 639.0, "episode_reward_min": 365.0, "episode_reward_mean": 607.03, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 303.515}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 186.63, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.05, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.03, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.71, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.39, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.57, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.14, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.39, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.57, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.39, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.57, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 639.0, 633.0, 636.0, 639.0, 558.0, 582.0, 633.0, 633.0, 630.0, 582.0, 627.0, 630.0, 636.0, 506.0, 630.0, 639.0, 630.0, 582.0, 630.0, 590.0, 584.0, 584.0, 365.0, 639.0, 582.0, 639.0, 587.0, 633.0, 633.0, 627.0, 579.0, 639.0, 579.0, 636.0, 573.0, 630.0, 582.0, 633.0, 639.0, 630.0, 633.0, 630.0, 587.0, 567.0, 633.0, 582.0, 639.0, 582.0, 630.0, 582.0, 582.0, 630.0, 582.0, 582.0, 630.0, 630.0, 576.0, 636.0, 573.0, 573.0, 633.0, 582.0, 587.0, 636.0, 636.0, 579.0, 636.0, 590.0, 630.0, 636.0, 525.0, 530.0, 582.0, 587.0, 633.0, 627.0, 639.0, 624.0, 582.0, 630.0, 639.0, 630.0, 582.0, 636.0, 582.0, 522.0, 636.0, 633.0, 630.0, 627.0, 519.0, 639.0, 627.0, 636.0, 633.0, 636.0, 636.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 285.0, 297.0, 315.0, 324.0, 313.0, 320.0, 319.0, 317.0, 322.0, 317.0, 282.0, 276.0, 296.0, 286.0, 319.0, 314.0, 317.0, 316.0, 302.0, 328.0, 291.0, 291.0, 312.0, 315.0, 303.0, 327.0, 314.0, 322.0, 249.0, 257.0, 316.0, 314.0, 317.0, 322.0, 316.0, 314.0, 287.0, 295.0, 319.0, 311.0, 299.0, 291.0, 293.0, 291.0, 296.0, 288.0, 183.0, 182.0, 320.0, 319.0, 288.0, 294.0, 319.0, 320.0, 296.0, 291.0, 316.0, 317.0, 317.0, 316.0, 308.0, 319.0, 290.0, 289.0, 317.0, 322.0, 279.0, 300.0, 322.0, 314.0, 285.0, 288.0, 316.0, 314.0, 283.0, 299.0, 316.0, 317.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 316.0, 314.0, 293.0, 294.0, 282.0, 285.0, 316.0, 317.0, 288.0, 294.0, 319.0, 320.0, 288.0, 294.0, 317.0, 313.0, 290.0, 292.0, 288.0, 294.0, 308.0, 322.0, 292.0, 290.0, 297.0, 285.0, 313.0, 317.0, 316.0, 314.0, 285.0, 291.0, 322.0, 314.0, 288.0, 285.0, 284.0, 289.0, 319.0, 314.0, 282.0, 300.0, 283.0, 304.0, 319.0, 317.0, 314.0, 322.0, 288.0, 291.0, 319.0, 317.0, 296.0, 294.0, 314.0, 316.0, 322.0, 314.0, 262.0, 263.0, 281.0, 249.0, 288.0, 294.0, 294.0, 293.0, 316.0, 317.0, 316.0, 311.0, 319.0, 320.0, 308.0, 316.0, 291.0, 291.0, 308.0, 322.0, 319.0, 320.0, 322.0, 308.0, 288.0, 294.0, 317.0, 319.0, 296.0, 286.0, 249.0, 273.0, 314.0, 322.0, 316.0, 317.0, 308.0, 322.0, 311.0, 316.0, 268.0, 251.0, 322.0, 317.0, 316.0, 311.0, 317.0, 319.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 316.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 0.840202535823751, "mean_processing_ms": 0.2364293047857562, "mean_inference_ms": 1.4386641797535789}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9288000, "num_steps_sampled": 4953600, "sample_time_ms": 21835.055, "load_time_ms": 36.557, "grad_time_ms": 9742.403, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00015397991228383034, "policy_loss": -0.007473704870790243, "vf_loss": 81.87383270263672, "vf_explained_var": 0.7745316028594971, "kl": 0.0020445811096578836, "entropy": 1.1193923950195312, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4953600, "episodes_total": 12384, "training_iteration": 387, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-05-19", "timestamp": 1660259119, "time_this_iter_s": 30.124536752700806, "time_total_s": 17530.262573242188, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17530.262573242188, "timesteps_since_restore": 4953600, "iterations_since_restore": 387, "perf": {"cpu_util_percent": 31.948837209302326, "ram_util_percent": 58.95581395348838}}
+{"episode_reward_max": 639.0, "episode_reward_min": 365.0, "episode_reward_mean": 611.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 305.82}, "custom_metrics": {"sparse_reward_mean": 212.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.64, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.96, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.89, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.31, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.55, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.51, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.55, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.51, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.55, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.51, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 639.0, 639.0, 627.0, 627.0, 633.0, 627.0, 639.0, 636.0, 636.0, 584.0, 573.0, 636.0, 630.0, 573.0, 630.0, 633.0, 630.0, 627.0, 636.0, 630.0, 636.0, 630.0, 630.0, 627.0, 636.0, 630.0, 630.0, 587.0, 579.0, 570.0, 633.0, 587.0, 636.0, 636.0, 579.0, 636.0, 590.0, 630.0, 636.0, 525.0, 530.0, 582.0, 587.0, 633.0, 627.0, 639.0, 624.0, 582.0, 630.0, 639.0, 630.0, 582.0, 636.0, 582.0, 522.0, 636.0, 633.0, 630.0, 627.0, 519.0, 639.0, 627.0, 636.0, 633.0, 636.0, 636.0, 627.0, 630.0, 582.0, 639.0, 633.0, 636.0, 639.0, 558.0, 582.0, 633.0, 633.0, 630.0, 582.0, 627.0, 630.0, 636.0, 506.0, 630.0, 639.0, 630.0, 582.0, 630.0, 590.0, 584.0, 584.0, 365.0, 639.0, 582.0, 639.0, 587.0, 633.0, 633.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 317.0, 322.0, 317.0, 322.0, 311.0, 316.0, 308.0, 319.0, 319.0, 314.0, 314.0, 313.0, 316.0, 323.0, 314.0, 322.0, 319.0, 317.0, 291.0, 293.0, 298.0, 275.0, 324.0, 312.0, 323.0, 307.0, 281.0, 292.0, 308.0, 322.0, 317.0, 316.0, 311.0, 319.0, 313.0, 314.0, 319.0, 317.0, 314.0, 316.0, 319.0, 317.0, 316.0, 314.0, 316.0, 314.0, 316.0, 311.0, 314.0, 322.0, 314.0, 316.0, 316.0, 314.0, 293.0, 294.0, 288.0, 291.0, 295.0, 275.0, 319.0, 314.0, 283.0, 304.0, 319.0, 317.0, 314.0, 322.0, 288.0, 291.0, 319.0, 317.0, 296.0, 294.0, 314.0, 316.0, 322.0, 314.0, 262.0, 263.0, 281.0, 249.0, 288.0, 294.0, 294.0, 293.0, 316.0, 317.0, 316.0, 311.0, 319.0, 320.0, 308.0, 316.0, 291.0, 291.0, 308.0, 322.0, 319.0, 320.0, 322.0, 308.0, 288.0, 294.0, 317.0, 319.0, 296.0, 286.0, 249.0, 273.0, 314.0, 322.0, 316.0, 317.0, 308.0, 322.0, 311.0, 316.0, 268.0, 251.0, 322.0, 317.0, 316.0, 311.0, 317.0, 319.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 316.0, 311.0, 316.0, 314.0, 285.0, 297.0, 315.0, 324.0, 313.0, 320.0, 319.0, 317.0, 322.0, 317.0, 282.0, 276.0, 296.0, 286.0, 319.0, 314.0, 317.0, 316.0, 302.0, 328.0, 291.0, 291.0, 312.0, 315.0, 303.0, 327.0, 314.0, 322.0, 249.0, 257.0, 316.0, 314.0, 317.0, 322.0, 316.0, 314.0, 287.0, 295.0, 319.0, 311.0, 299.0, 291.0, 293.0, 291.0, 296.0, 288.0, 183.0, 182.0, 320.0, 319.0, 288.0, 294.0, 319.0, 320.0, 296.0, 291.0, 316.0, 317.0, 317.0, 316.0, 308.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8391970766904484, "mean_processing_ms": 0.23622675772030782, "mean_inference_ms": 1.4376630323224628}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9312000, "num_steps_sampled": 4966400, "sample_time_ms": 21599.042, "load_time_ms": 36.591, "grad_time_ms": 9790.428, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0022788590285927057, "policy_loss": -0.00476012472063303, "vf_loss": 75.92620849609375, "vf_explained_var": 0.7665655016899109, "kl": 0.0023904216941446066, "entropy": 1.1072710752487183, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4966400, "episodes_total": 12416, "training_iteration": 388, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-05-50", "timestamp": 1660259150, "time_this_iter_s": 30.593504667282104, "time_total_s": 17560.85607790947, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17560.85607790947, "timesteps_since_restore": 4966400, "iterations_since_restore": 388, "perf": {"cpu_util_percent": 33.502325581395354, "ram_util_percent": 59.08372093023256}}
+{"episode_reward_max": 639.0, "episode_reward_min": 365.0, "episode_reward_mean": 614.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 307.46}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.12, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.31, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 17.01, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.31, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.61, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.12, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.38, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.21, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.61, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.61, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 639.0, 579.0, 582.0, 633.0, 579.0, 633.0, 636.0, 630.0, 633.0, 609.0, 582.0, 636.0, 630.0, 636.0, 576.0, 636.0, 633.0, 630.0, 636.0, 582.0, 582.0, 636.0, 630.0, 633.0, 630.0, 636.0, 579.0, 633.0, 570.0, 633.0, 633.0, 636.0, 636.0, 627.0, 630.0, 582.0, 639.0, 633.0, 636.0, 639.0, 558.0, 582.0, 633.0, 633.0, 630.0, 582.0, 627.0, 630.0, 636.0, 506.0, 630.0, 639.0, 630.0, 582.0, 630.0, 590.0, 584.0, 584.0, 365.0, 639.0, 582.0, 639.0, 587.0, 633.0, 633.0, 627.0, 582.0, 639.0, 639.0, 627.0, 627.0, 633.0, 627.0, 639.0, 636.0, 636.0, 584.0, 573.0, 636.0, 630.0, 573.0, 630.0, 633.0, 630.0, 627.0, 636.0, 630.0, 636.0, 630.0, 630.0, 627.0, 636.0, 630.0, 630.0, 587.0, 579.0, 570.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 322.0, 319.0, 311.0, 317.0, 322.0, 282.0, 297.0, 294.0, 288.0, 316.0, 317.0, 288.0, 291.0, 311.0, 322.0, 314.0, 322.0, 313.0, 317.0, 317.0, 316.0, 310.0, 299.0, 293.0, 289.0, 317.0, 319.0, 322.0, 308.0, 319.0, 317.0, 288.0, 288.0, 314.0, 322.0, 316.0, 317.0, 314.0, 316.0, 322.0, 314.0, 285.0, 297.0, 294.0, 288.0, 314.0, 322.0, 321.0, 309.0, 316.0, 317.0, 311.0, 319.0, 311.0, 325.0, 282.0, 297.0, 319.0, 314.0, 287.0, 283.0, 319.0, 314.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 316.0, 311.0, 316.0, 314.0, 285.0, 297.0, 315.0, 324.0, 313.0, 320.0, 319.0, 317.0, 322.0, 317.0, 282.0, 276.0, 296.0, 286.0, 319.0, 314.0, 317.0, 316.0, 302.0, 328.0, 291.0, 291.0, 312.0, 315.0, 303.0, 327.0, 314.0, 322.0, 249.0, 257.0, 316.0, 314.0, 317.0, 322.0, 316.0, 314.0, 287.0, 295.0, 319.0, 311.0, 299.0, 291.0, 293.0, 291.0, 296.0, 288.0, 183.0, 182.0, 320.0, 319.0, 288.0, 294.0, 319.0, 320.0, 296.0, 291.0, 316.0, 317.0, 317.0, 316.0, 308.0, 319.0, 291.0, 291.0, 317.0, 322.0, 317.0, 322.0, 311.0, 316.0, 308.0, 319.0, 319.0, 314.0, 314.0, 313.0, 316.0, 323.0, 314.0, 322.0, 319.0, 317.0, 291.0, 293.0, 298.0, 275.0, 324.0, 312.0, 323.0, 307.0, 281.0, 292.0, 308.0, 322.0, 317.0, 316.0, 311.0, 319.0, 313.0, 314.0, 319.0, 317.0, 314.0, 316.0, 319.0, 317.0, 316.0, 314.0, 316.0, 314.0, 316.0, 311.0, 314.0, 322.0, 314.0, 316.0, 316.0, 314.0, 293.0, 294.0, 288.0, 291.0, 295.0, 275.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.838196919665678, "mean_processing_ms": 0.23602508803676062, "mean_inference_ms": 1.4365150313753652}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9336000, "num_steps_sampled": 4979200, "sample_time_ms": 21400.03, "load_time_ms": 36.799, "grad_time_ms": 9763.193, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015943764010444283, "policy_loss": -0.005665285978466272, "vf_loss": 78.14656829833984, "vf_explained_var": 0.7691711783409119, "kl": 0.0016816608840599656, "entropy": 1.1099879741668701, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4979200, "episodes_total": 12448, "training_iteration": 389, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-06-20", "timestamp": 1660259180, "time_this_iter_s": 29.990082025527954, "time_total_s": 17590.846159934998, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17590.846159934998, "timesteps_since_restore": 4979200, "iterations_since_restore": 389, "perf": {"cpu_util_percent": 36.06279069767442, "ram_util_percent": 59.181395348837206}}
+{"episode_reward_max": 639.0, "episode_reward_min": 567.0, "episode_reward_mean": 617.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 275.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 308.805}, "custom_metrics": {"sparse_reward_mean": 214.4, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 188.81, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.3, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.97, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.76, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.68, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.24, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.22, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.68, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.68, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 633.0, 633.0, 582.0, 579.0, 582.0, 633.0, 579.0, 633.0, 633.0, 576.0, 567.0, 636.0, 576.0, 636.0, 633.0, 639.0, 636.0, 579.0, 636.0, 627.0, 636.0, 636.0, 630.0, 630.0, 633.0, 633.0, 633.0, 579.0, 639.0, 633.0, 579.0, 587.0, 633.0, 633.0, 627.0, 582.0, 639.0, 639.0, 627.0, 627.0, 633.0, 627.0, 639.0, 636.0, 636.0, 584.0, 573.0, 636.0, 630.0, 573.0, 630.0, 633.0, 630.0, 627.0, 636.0, 630.0, 636.0, 630.0, 630.0, 627.0, 636.0, 630.0, 630.0, 587.0, 579.0, 570.0, 633.0, 633.0, 630.0, 639.0, 579.0, 582.0, 633.0, 579.0, 633.0, 636.0, 630.0, 633.0, 609.0, 582.0, 636.0, 630.0, 636.0, 576.0, 636.0, 633.0, 630.0, 636.0, 582.0, 582.0, 636.0, 630.0, 633.0, 630.0, 636.0, 579.0, 633.0, 570.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 319.0, 314.0, 311.0, 322.0, 285.0, 297.0, 287.0, 292.0, 291.0, 291.0, 316.0, 317.0, 288.0, 291.0, 310.0, 323.0, 317.0, 316.0, 298.0, 278.0, 282.0, 285.0, 314.0, 322.0, 292.0, 284.0, 316.0, 320.0, 318.0, 315.0, 317.0, 322.0, 319.0, 317.0, 288.0, 291.0, 321.0, 315.0, 308.0, 319.0, 314.0, 322.0, 314.0, 322.0, 319.0, 311.0, 312.0, 318.0, 316.0, 317.0, 316.0, 317.0, 319.0, 314.0, 285.0, 294.0, 317.0, 322.0, 311.0, 322.0, 288.0, 291.0, 296.0, 291.0, 316.0, 317.0, 317.0, 316.0, 308.0, 319.0, 291.0, 291.0, 317.0, 322.0, 317.0, 322.0, 311.0, 316.0, 308.0, 319.0, 319.0, 314.0, 314.0, 313.0, 316.0, 323.0, 314.0, 322.0, 319.0, 317.0, 291.0, 293.0, 298.0, 275.0, 324.0, 312.0, 323.0, 307.0, 281.0, 292.0, 308.0, 322.0, 317.0, 316.0, 311.0, 319.0, 313.0, 314.0, 319.0, 317.0, 314.0, 316.0, 319.0, 317.0, 316.0, 314.0, 316.0, 314.0, 316.0, 311.0, 314.0, 322.0, 314.0, 316.0, 316.0, 314.0, 293.0, 294.0, 288.0, 291.0, 295.0, 275.0, 319.0, 314.0, 311.0, 322.0, 319.0, 311.0, 317.0, 322.0, 282.0, 297.0, 294.0, 288.0, 316.0, 317.0, 288.0, 291.0, 311.0, 322.0, 314.0, 322.0, 313.0, 317.0, 317.0, 316.0, 310.0, 299.0, 293.0, 289.0, 317.0, 319.0, 322.0, 308.0, 319.0, 317.0, 288.0, 288.0, 314.0, 322.0, 316.0, 317.0, 314.0, 316.0, 322.0, 314.0, 285.0, 297.0, 294.0, 288.0, 314.0, 322.0, 321.0, 309.0, 316.0, 317.0, 311.0, 319.0, 311.0, 325.0, 282.0, 297.0, 319.0, 314.0, 287.0, 283.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8371997037946857, "mean_processing_ms": 0.23582575562090372, "mean_inference_ms": 1.4353288532965658}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9360000, "num_steps_sampled": 4992000, "sample_time_ms": 21348.738, "load_time_ms": 36.486, "grad_time_ms": 9719.814, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004423701611813158, "policy_loss": -0.007186357397586107, "vf_loss": 81.85875701904297, "vf_explained_var": 0.7682639956474304, "kl": 0.002242224058136344, "entropy": 1.114312767982483, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4992000, "episodes_total": 12480, "training_iteration": 390, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-06-49", "timestamp": 1660259209, "time_this_iter_s": 29.65726089477539, "time_total_s": 17620.503420829773, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17620.503420829773, "timesteps_since_restore": 4992000, "iterations_since_restore": 390, "perf": {"cpu_util_percent": 34.275609756097566, "ram_util_percent": 59.390243902439025}}
+{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 614.77, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 307.385}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 188.37, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.97, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.52, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.03, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.45, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.02, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.37, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.45, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.02, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.45, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.02, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [593.0, 636.0, 633.0, 630.0, 630.0, 573.0, 630.0, 630.0, 630.0, 516.0, 636.0, 630.0, 633.0, 639.0, 630.0, 582.0, 633.0, 636.0, 636.0, 639.0, 639.0, 636.0, 567.0, 636.0, 636.0, 582.0, 636.0, 639.0, 636.0, 408.0, 636.0, 636.0, 587.0, 579.0, 570.0, 633.0, 633.0, 630.0, 639.0, 579.0, 582.0, 633.0, 579.0, 633.0, 636.0, 630.0, 633.0, 609.0, 582.0, 636.0, 630.0, 636.0, 576.0, 636.0, 633.0, 630.0, 636.0, 582.0, 582.0, 636.0, 630.0, 633.0, 630.0, 636.0, 579.0, 633.0, 570.0, 633.0, 582.0, 633.0, 633.0, 582.0, 579.0, 582.0, 633.0, 579.0, 633.0, 633.0, 576.0, 567.0, 636.0, 576.0, 636.0, 633.0, 639.0, 636.0, 579.0, 636.0, 627.0, 636.0, 636.0, 630.0, 630.0, 633.0, 633.0, 633.0, 579.0, 639.0, 633.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 297.0, 322.0, 314.0, 316.0, 317.0, 316.0, 314.0, 312.0, 318.0, 287.0, 286.0, 313.0, 317.0, 318.0, 312.0, 308.0, 322.0, 254.0, 262.0, 324.0, 312.0, 310.0, 320.0, 313.0, 320.0, 319.0, 320.0, 321.0, 309.0, 291.0, 291.0, 316.0, 317.0, 319.0, 317.0, 314.0, 322.0, 320.0, 319.0, 319.0, 320.0, 319.0, 317.0, 274.0, 293.0, 309.0, 327.0, 319.0, 317.0, 288.0, 294.0, 322.0, 314.0, 319.0, 320.0, 319.0, 317.0, 198.0, 210.0, 314.0, 322.0, 313.0, 323.0, 293.0, 294.0, 288.0, 291.0, 295.0, 275.0, 319.0, 314.0, 311.0, 322.0, 319.0, 311.0, 317.0, 322.0, 282.0, 297.0, 294.0, 288.0, 316.0, 317.0, 288.0, 291.0, 311.0, 322.0, 314.0, 322.0, 313.0, 317.0, 317.0, 316.0, 310.0, 299.0, 293.0, 289.0, 317.0, 319.0, 322.0, 308.0, 319.0, 317.0, 288.0, 288.0, 314.0, 322.0, 316.0, 317.0, 314.0, 316.0, 322.0, 314.0, 285.0, 297.0, 294.0, 288.0, 314.0, 322.0, 321.0, 309.0, 316.0, 317.0, 311.0, 319.0, 311.0, 325.0, 282.0, 297.0, 319.0, 314.0, 287.0, 283.0, 319.0, 314.0, 296.0, 286.0, 319.0, 314.0, 311.0, 322.0, 285.0, 297.0, 287.0, 292.0, 291.0, 291.0, 316.0, 317.0, 288.0, 291.0, 310.0, 323.0, 317.0, 316.0, 298.0, 278.0, 282.0, 285.0, 314.0, 322.0, 292.0, 284.0, 316.0, 320.0, 318.0, 315.0, 317.0, 322.0, 319.0, 317.0, 288.0, 291.0, 321.0, 315.0, 308.0, 319.0, 314.0, 322.0, 314.0, 322.0, 319.0, 311.0, 312.0, 318.0, 316.0, 317.0, 316.0, 317.0, 319.0, 314.0, 285.0, 294.0, 317.0, 322.0, 311.0, 322.0, 288.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.83620884479323, "mean_processing_ms": 0.23562873648902904, "mean_inference_ms": 1.4341204983718234}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9384000, "num_steps_sampled": 5004800, "sample_time_ms": 21231.946, "load_time_ms": 36.725, "grad_time_ms": 9739.689, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004321941174566746, "policy_loss": -0.0030343374237418175, "vf_loss": 79.1146011352539, "vf_explained_var": 0.7782495021820068, "kl": 0.0022527193650603294, "entropy": 1.1103630065917969, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5004800, "episodes_total": 12512, "training_iteration": 391, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-07-19", "timestamp": 1660259239, "time_this_iter_s": 29.202332973480225, "time_total_s": 17649.705753803253, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17649.705753803253, "timesteps_since_restore": 5004800, "iterations_since_restore": 391, "perf": {"cpu_util_percent": 32.102380952380955, "ram_util_percent": 59.785714285714285}}
+{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 613.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.805}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 188.41, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.84, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.63, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.4, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.31, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.1, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.45, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.71, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.08, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.31, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.1, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.31, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.1, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 636.0, 639.0, 587.0, 636.0, 630.0, 633.0, 639.0, 639.0, 639.0, 587.0, 582.0, 525.0, 579.0, 636.0, 582.0, 636.0, 627.0, 630.0, 630.0, 587.0, 639.0, 633.0, 570.0, 630.0, 630.0, 639.0, 633.0, 627.0, 582.0, 576.0, 579.0, 633.0, 570.0, 633.0, 582.0, 633.0, 633.0, 582.0, 579.0, 582.0, 633.0, 579.0, 633.0, 633.0, 576.0, 567.0, 636.0, 576.0, 636.0, 633.0, 639.0, 636.0, 579.0, 636.0, 627.0, 636.0, 636.0, 630.0, 630.0, 633.0, 633.0, 633.0, 579.0, 639.0, 633.0, 579.0, 593.0, 636.0, 633.0, 630.0, 630.0, 573.0, 630.0, 630.0, 630.0, 516.0, 636.0, 630.0, 633.0, 639.0, 630.0, 582.0, 633.0, 636.0, 636.0, 639.0, 639.0, 636.0, 567.0, 636.0, 636.0, 582.0, 636.0, 639.0, 636.0, 408.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 290.0, 286.0, 322.0, 314.0, 322.0, 317.0, 299.0, 288.0, 314.0, 322.0, 313.0, 317.0, 321.0, 312.0, 314.0, 325.0, 319.0, 320.0, 317.0, 322.0, 293.0, 294.0, 288.0, 294.0, 270.0, 255.0, 289.0, 290.0, 322.0, 314.0, 286.0, 296.0, 319.0, 317.0, 313.0, 314.0, 316.0, 314.0, 316.0, 314.0, 290.0, 297.0, 319.0, 320.0, 319.0, 314.0, 279.0, 291.0, 315.0, 315.0, 316.0, 314.0, 317.0, 322.0, 317.0, 316.0, 300.0, 327.0, 301.0, 281.0, 285.0, 291.0, 282.0, 297.0, 319.0, 314.0, 287.0, 283.0, 319.0, 314.0, 296.0, 286.0, 319.0, 314.0, 311.0, 322.0, 285.0, 297.0, 287.0, 292.0, 291.0, 291.0, 316.0, 317.0, 288.0, 291.0, 310.0, 323.0, 317.0, 316.0, 298.0, 278.0, 282.0, 285.0, 314.0, 322.0, 292.0, 284.0, 316.0, 320.0, 318.0, 315.0, 317.0, 322.0, 319.0, 317.0, 288.0, 291.0, 321.0, 315.0, 308.0, 319.0, 314.0, 322.0, 314.0, 322.0, 319.0, 311.0, 312.0, 318.0, 316.0, 317.0, 316.0, 317.0, 319.0, 314.0, 285.0, 294.0, 317.0, 322.0, 311.0, 322.0, 288.0, 291.0, 296.0, 297.0, 322.0, 314.0, 316.0, 317.0, 316.0, 314.0, 312.0, 318.0, 287.0, 286.0, 313.0, 317.0, 318.0, 312.0, 308.0, 322.0, 254.0, 262.0, 324.0, 312.0, 310.0, 320.0, 313.0, 320.0, 319.0, 320.0, 321.0, 309.0, 291.0, 291.0, 316.0, 317.0, 319.0, 317.0, 314.0, 322.0, 320.0, 319.0, 319.0, 320.0, 319.0, 317.0, 274.0, 293.0, 309.0, 327.0, 319.0, 317.0, 288.0, 294.0, 322.0, 314.0, 319.0, 320.0, 319.0, 317.0, 198.0, 210.0, 314.0, 322.0, 313.0, 323.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8352162722416216, "mean_processing_ms": 0.2354315996686975, "mean_inference_ms": 1.4328297296809587}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9408000, "num_steps_sampled": 5017600, "sample_time_ms": 20999.824, "load_time_ms": 36.668, "grad_time_ms": 9737.715, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033923883456736803, "policy_loss": -0.004631926771253347, "vf_loss": 85.770751953125, "vf_explained_var": 0.7699734568595886, "kl": 0.0020241406746208668, "entropy": 1.1055186986923218, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5017600, "episodes_total": 12544, "training_iteration": 392, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-07-47", "timestamp": 1660259267, "time_this_iter_s": 28.112272024154663, "time_total_s": 17677.818025827408, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17677.818025827408, "timesteps_since_restore": 5017600, "iterations_since_restore": 392, "perf": {"cpu_util_percent": 32.03076923076923, "ram_util_percent": 59.29230769230767}}
+{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 615.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 307.81}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 189.22, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.94, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.53, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.04, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.42, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.01, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.47, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.19, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.77, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.14, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.42, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.01, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.42, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.01, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 639.0, 582.0, 630.0, 633.0, 630.0, 579.0, 633.0, 630.0, 633.0, 630.0, 633.0, 630.0, 636.0, 633.0, 582.0, 633.0, 636.0, 636.0, 633.0, 582.0, 579.0, 627.0, 633.0, 587.0, 587.0, 581.0, 639.0, 630.0, 636.0, 639.0, 579.0, 639.0, 633.0, 579.0, 593.0, 636.0, 633.0, 630.0, 630.0, 573.0, 630.0, 630.0, 630.0, 516.0, 636.0, 630.0, 633.0, 639.0, 630.0, 582.0, 633.0, 636.0, 636.0, 639.0, 639.0, 636.0, 567.0, 636.0, 636.0, 582.0, 636.0, 639.0, 636.0, 408.0, 636.0, 636.0, 579.0, 576.0, 636.0, 639.0, 587.0, 636.0, 630.0, 633.0, 639.0, 639.0, 639.0, 587.0, 582.0, 525.0, 579.0, 636.0, 582.0, 636.0, 627.0, 630.0, 630.0, 587.0, 639.0, 633.0, 570.0, 630.0, 630.0, 639.0, 633.0, 627.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 319.0, 314.0, 319.0, 320.0, 288.0, 294.0, 313.0, 317.0, 314.0, 319.0, 319.0, 311.0, 288.0, 291.0, 309.0, 324.0, 309.0, 321.0, 316.0, 317.0, 313.0, 317.0, 314.0, 319.0, 314.0, 316.0, 329.0, 307.0, 314.0, 319.0, 285.0, 297.0, 314.0, 319.0, 319.0, 317.0, 322.0, 314.0, 321.0, 312.0, 291.0, 291.0, 287.0, 292.0, 308.0, 319.0, 314.0, 319.0, 283.0, 304.0, 293.0, 294.0, 291.0, 290.0, 322.0, 317.0, 316.0, 314.0, 322.0, 314.0, 317.0, 322.0, 285.0, 294.0, 317.0, 322.0, 311.0, 322.0, 288.0, 291.0, 296.0, 297.0, 322.0, 314.0, 316.0, 317.0, 316.0, 314.0, 312.0, 318.0, 287.0, 286.0, 313.0, 317.0, 318.0, 312.0, 308.0, 322.0, 254.0, 262.0, 324.0, 312.0, 310.0, 320.0, 313.0, 320.0, 319.0, 320.0, 321.0, 309.0, 291.0, 291.0, 316.0, 317.0, 319.0, 317.0, 314.0, 322.0, 320.0, 319.0, 319.0, 320.0, 319.0, 317.0, 274.0, 293.0, 309.0, 327.0, 319.0, 317.0, 288.0, 294.0, 322.0, 314.0, 319.0, 320.0, 319.0, 317.0, 198.0, 210.0, 314.0, 322.0, 313.0, 323.0, 291.0, 288.0, 290.0, 286.0, 322.0, 314.0, 322.0, 317.0, 299.0, 288.0, 314.0, 322.0, 313.0, 317.0, 321.0, 312.0, 314.0, 325.0, 319.0, 320.0, 317.0, 322.0, 293.0, 294.0, 288.0, 294.0, 270.0, 255.0, 289.0, 290.0, 322.0, 314.0, 286.0, 296.0, 319.0, 317.0, 313.0, 314.0, 316.0, 314.0, 316.0, 314.0, 290.0, 297.0, 319.0, 320.0, 319.0, 314.0, 279.0, 291.0, 315.0, 315.0, 316.0, 314.0, 317.0, 322.0, 317.0, 316.0, 300.0, 327.0, 301.0, 281.0, 285.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8342365209622892, "mean_processing_ms": 0.23523671051615616, "mean_inference_ms": 1.4317637574949895}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9432000, "num_steps_sampled": 5030400, "sample_time_ms": 21299.857, "load_time_ms": 37.172, "grad_time_ms": 9869.636, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 4.354631528258324e-05, "policy_loss": -0.0072962199337780476, "vf_loss": 78.87313079833984, "vf_explained_var": 0.7646682262420654, "kl": 0.0020736621227115393, "entropy": 1.0950974225997925, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5030400, "episodes_total": 12576, "training_iteration": 393, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-08-21", "timestamp": 1660259301, "time_this_iter_s": 34.420122146606445, "time_total_s": 17712.238147974014, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17712.238147974014, "timesteps_since_restore": 5030400, "iterations_since_restore": 393, "perf": {"cpu_util_percent": 32.710204081632654, "ram_util_percent": 59.30816326530613}}
+{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 614.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 307.225}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 188.85, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.08, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.29, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.72, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.77, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.59, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.9, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.28, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.76, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.18, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.59, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.59, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 624.0, 576.0, 576.0, 590.0, 590.0, 630.0, 633.0, 582.0, 636.0, 639.0, 633.0, 639.0, 639.0, 630.0, 627.0, 636.0, 636.0, 636.0, 587.0, 581.0, 636.0, 621.0, 639.0, 630.0, 633.0, 621.0, 636.0, 573.0, 636.0, 633.0, 522.0, 636.0, 408.0, 636.0, 636.0, 579.0, 576.0, 636.0, 639.0, 587.0, 636.0, 630.0, 633.0, 639.0, 639.0, 639.0, 587.0, 582.0, 525.0, 579.0, 636.0, 582.0, 636.0, 627.0, 630.0, 630.0, 587.0, 639.0, 633.0, 570.0, 630.0, 630.0, 639.0, 633.0, 627.0, 582.0, 576.0, 633.0, 633.0, 639.0, 582.0, 630.0, 633.0, 630.0, 579.0, 633.0, 630.0, 633.0, 630.0, 633.0, 630.0, 636.0, 633.0, 582.0, 633.0, 636.0, 636.0, 633.0, 582.0, 579.0, 627.0, 633.0, 587.0, 587.0, 581.0, 639.0, 630.0, 636.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 312.0, 312.0, 290.0, 286.0, 287.0, 289.0, 297.0, 293.0, 294.0, 296.0, 316.0, 314.0, 319.0, 314.0, 286.0, 296.0, 311.0, 325.0, 319.0, 320.0, 314.0, 319.0, 320.0, 319.0, 322.0, 317.0, 316.0, 314.0, 308.0, 319.0, 322.0, 314.0, 314.0, 322.0, 309.0, 327.0, 296.0, 291.0, 290.0, 291.0, 316.0, 320.0, 309.0, 312.0, 317.0, 322.0, 316.0, 314.0, 316.0, 317.0, 313.0, 308.0, 319.0, 317.0, 280.0, 293.0, 317.0, 319.0, 314.0, 319.0, 262.0, 260.0, 319.0, 317.0, 198.0, 210.0, 314.0, 322.0, 313.0, 323.0, 291.0, 288.0, 290.0, 286.0, 322.0, 314.0, 322.0, 317.0, 299.0, 288.0, 314.0, 322.0, 313.0, 317.0, 321.0, 312.0, 314.0, 325.0, 319.0, 320.0, 317.0, 322.0, 293.0, 294.0, 288.0, 294.0, 270.0, 255.0, 289.0, 290.0, 322.0, 314.0, 286.0, 296.0, 319.0, 317.0, 313.0, 314.0, 316.0, 314.0, 316.0, 314.0, 290.0, 297.0, 319.0, 320.0, 319.0, 314.0, 279.0, 291.0, 315.0, 315.0, 316.0, 314.0, 317.0, 322.0, 317.0, 316.0, 300.0, 327.0, 301.0, 281.0, 285.0, 291.0, 319.0, 314.0, 319.0, 314.0, 319.0, 320.0, 288.0, 294.0, 313.0, 317.0, 314.0, 319.0, 319.0, 311.0, 288.0, 291.0, 309.0, 324.0, 309.0, 321.0, 316.0, 317.0, 313.0, 317.0, 314.0, 319.0, 314.0, 316.0, 329.0, 307.0, 314.0, 319.0, 285.0, 297.0, 314.0, 319.0, 319.0, 317.0, 322.0, 314.0, 321.0, 312.0, 291.0, 291.0, 287.0, 292.0, 308.0, 319.0, 314.0, 319.0, 283.0, 304.0, 293.0, 294.0, 291.0, 290.0, 322.0, 317.0, 316.0, 314.0, 322.0, 314.0, 317.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8332639627657553, "mean_processing_ms": 0.23504271675970742, "mean_inference_ms": 1.430763529891338}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9456000, "num_steps_sampled": 5043200, "sample_time_ms": 21101.644, "load_time_ms": 36.639, "grad_time_ms": 9913.059, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006912912358529866, "policy_loss": -0.006923032458871603, "vf_loss": 81.60092163085938, "vf_explained_var": 0.7633647918701172, "kl": 0.001780605292879045, "entropy": 1.0915051698684692, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5043200, "episodes_total": 12608, "training_iteration": 394, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-08-53", "timestamp": 1660259333, "time_this_iter_s": 32.24967384338379, "time_total_s": 17744.487821817398, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17744.487821817398, "timesteps_since_restore": 5043200, "iterations_since_restore": 394, "perf": {"cpu_util_percent": 33.69347826086956, "ram_util_percent": 59.52391304347825}}
+{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 617.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.91}, "custom_metrics": {"sparse_reward_mean": 214.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.82, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.15, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.88, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.74, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.69, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.9, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.33, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.74, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.32, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.74, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.69, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.74, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.69, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 633.0, 630.0, 576.0, 639.0, 639.0, 627.0, 636.0, 630.0, 636.0, 633.0, 587.0, 633.0, 579.0, 587.0, 630.0, 579.0, 633.0, 587.0, 636.0, 639.0, 627.0, 639.0, 630.0, 633.0, 639.0, 584.0, 590.0, 630.0, 636.0, 582.0, 633.0, 627.0, 582.0, 576.0, 633.0, 633.0, 639.0, 582.0, 630.0, 633.0, 630.0, 579.0, 633.0, 630.0, 633.0, 630.0, 633.0, 630.0, 636.0, 633.0, 582.0, 633.0, 636.0, 636.0, 633.0, 582.0, 579.0, 627.0, 633.0, 587.0, 587.0, 581.0, 639.0, 630.0, 636.0, 639.0, 579.0, 624.0, 576.0, 576.0, 590.0, 590.0, 630.0, 633.0, 582.0, 636.0, 639.0, 633.0, 639.0, 639.0, 630.0, 627.0, 636.0, 636.0, 636.0, 587.0, 581.0, 636.0, 621.0, 639.0, 630.0, 633.0, 621.0, 636.0, 573.0, 636.0, 633.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [324.0, 312.0, 316.0, 317.0, 317.0, 316.0, 316.0, 314.0, 285.0, 291.0, 322.0, 317.0, 319.0, 320.0, 316.0, 311.0, 317.0, 319.0, 308.0, 322.0, 317.0, 319.0, 314.0, 319.0, 296.0, 291.0, 317.0, 316.0, 281.0, 298.0, 293.0, 294.0, 311.0, 319.0, 290.0, 289.0, 316.0, 317.0, 299.0, 288.0, 319.0, 317.0, 319.0, 320.0, 308.0, 319.0, 319.0, 320.0, 311.0, 319.0, 314.0, 319.0, 322.0, 317.0, 293.0, 291.0, 294.0, 296.0, 316.0, 314.0, 319.0, 317.0, 299.0, 283.0, 317.0, 316.0, 300.0, 327.0, 301.0, 281.0, 285.0, 291.0, 319.0, 314.0, 319.0, 314.0, 319.0, 320.0, 288.0, 294.0, 313.0, 317.0, 314.0, 319.0, 319.0, 311.0, 288.0, 291.0, 309.0, 324.0, 309.0, 321.0, 316.0, 317.0, 313.0, 317.0, 314.0, 319.0, 314.0, 316.0, 329.0, 307.0, 314.0, 319.0, 285.0, 297.0, 314.0, 319.0, 319.0, 317.0, 322.0, 314.0, 321.0, 312.0, 291.0, 291.0, 287.0, 292.0, 308.0, 319.0, 314.0, 319.0, 283.0, 304.0, 293.0, 294.0, 291.0, 290.0, 322.0, 317.0, 316.0, 314.0, 322.0, 314.0, 317.0, 322.0, 288.0, 291.0, 312.0, 312.0, 290.0, 286.0, 287.0, 289.0, 297.0, 293.0, 294.0, 296.0, 316.0, 314.0, 319.0, 314.0, 286.0, 296.0, 311.0, 325.0, 319.0, 320.0, 314.0, 319.0, 320.0, 319.0, 322.0, 317.0, 316.0, 314.0, 308.0, 319.0, 322.0, 314.0, 314.0, 322.0, 309.0, 327.0, 296.0, 291.0, 290.0, 291.0, 316.0, 320.0, 309.0, 312.0, 317.0, 322.0, 316.0, 314.0, 316.0, 317.0, 313.0, 308.0, 319.0, 317.0, 280.0, 293.0, 317.0, 319.0, 314.0, 319.0, 262.0, 260.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8323023893129786, "mean_processing_ms": 0.23485230859207035, "mean_inference_ms": 1.4300119711867751}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9480000, "num_steps_sampled": 5056000, "sample_time_ms": 21142.709, "load_time_ms": 37.292, "grad_time_ms": 10082.816, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00028009479865431786, "policy_loss": -0.007545720785856247, "vf_loss": 83.69845581054688, "vf_explained_var": 0.7653185725212097, "kl": 0.001969197066500783, "entropy": 1.0880564451217651, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5056000, "episodes_total": 12640, "training_iteration": 395, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-09-29", "timestamp": 1660259369, "time_this_iter_s": 35.19951057434082, "time_total_s": 17779.68733239174, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17779.68733239174, "timesteps_since_restore": 5056000, "iterations_since_restore": 395, "perf": {"cpu_util_percent": 30.6265306122449, "ram_util_percent": 59.11836734693878}}
+{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 618.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 309.44}, "custom_metrics": {"sparse_reward_mean": 214.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 190.08, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.01, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.44, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.79, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.88, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.7, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.92, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.41, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.91, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.27, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.7, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.92, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.7, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.92, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 633.0, 633.0, 636.0, 582.0, 633.0, 630.0, 633.0, 630.0, 636.0, 636.0, 633.0, 576.0, 633.0, 582.0, 582.0, 627.0, 639.0, 639.0, 633.0, 636.0, 630.0, 560.0, 636.0, 636.0, 636.0, 582.0, 633.0, 636.0, 582.0, 578.0, 639.0, 630.0, 636.0, 639.0, 579.0, 624.0, 576.0, 576.0, 590.0, 590.0, 630.0, 633.0, 582.0, 636.0, 639.0, 633.0, 639.0, 639.0, 630.0, 627.0, 636.0, 636.0, 636.0, 587.0, 581.0, 636.0, 621.0, 639.0, 630.0, 633.0, 621.0, 636.0, 573.0, 636.0, 633.0, 522.0, 636.0, 633.0, 633.0, 630.0, 576.0, 639.0, 639.0, 627.0, 636.0, 630.0, 636.0, 633.0, 587.0, 633.0, 579.0, 587.0, 630.0, 579.0, 633.0, 587.0, 636.0, 639.0, 627.0, 639.0, 630.0, 633.0, 639.0, 584.0, 590.0, 630.0, 636.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 325.0, 316.0, 314.0, 317.0, 316.0, 314.0, 319.0, 319.0, 317.0, 289.0, 293.0, 321.0, 312.0, 311.0, 319.0, 313.0, 320.0, 313.0, 317.0, 321.0, 315.0, 319.0, 317.0, 319.0, 314.0, 285.0, 291.0, 311.0, 322.0, 285.0, 297.0, 291.0, 291.0, 313.0, 314.0, 327.0, 312.0, 324.0, 315.0, 319.0, 314.0, 314.0, 322.0, 321.0, 309.0, 279.0, 281.0, 319.0, 317.0, 316.0, 320.0, 314.0, 322.0, 291.0, 291.0, 311.0, 322.0, 319.0, 317.0, 291.0, 291.0, 290.0, 288.0, 322.0, 317.0, 316.0, 314.0, 322.0, 314.0, 317.0, 322.0, 288.0, 291.0, 312.0, 312.0, 290.0, 286.0, 287.0, 289.0, 297.0, 293.0, 294.0, 296.0, 316.0, 314.0, 319.0, 314.0, 286.0, 296.0, 311.0, 325.0, 319.0, 320.0, 314.0, 319.0, 320.0, 319.0, 322.0, 317.0, 316.0, 314.0, 308.0, 319.0, 322.0, 314.0, 314.0, 322.0, 309.0, 327.0, 296.0, 291.0, 290.0, 291.0, 316.0, 320.0, 309.0, 312.0, 317.0, 322.0, 316.0, 314.0, 316.0, 317.0, 313.0, 308.0, 319.0, 317.0, 280.0, 293.0, 317.0, 319.0, 314.0, 319.0, 262.0, 260.0, 324.0, 312.0, 316.0, 317.0, 317.0, 316.0, 316.0, 314.0, 285.0, 291.0, 322.0, 317.0, 319.0, 320.0, 316.0, 311.0, 317.0, 319.0, 308.0, 322.0, 317.0, 319.0, 314.0, 319.0, 296.0, 291.0, 317.0, 316.0, 281.0, 298.0, 293.0, 294.0, 311.0, 319.0, 290.0, 289.0, 316.0, 317.0, 299.0, 288.0, 319.0, 317.0, 319.0, 320.0, 308.0, 319.0, 319.0, 320.0, 311.0, 319.0, 314.0, 319.0, 322.0, 317.0, 293.0, 291.0, 294.0, 296.0, 316.0, 314.0, 319.0, 317.0, 299.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8313395238935175, "mean_processing_ms": 0.2346607427713282, "mean_inference_ms": 1.4290797727914242}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9504000, "num_steps_sampled": 5068800, "sample_time_ms": 20854.832, "load_time_ms": 37.297, "grad_time_ms": 9880.508, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0009566675289534032, "policy_loss": -0.0062157814390957355, "vf_loss": 77.21820068359375, "vf_explained_var": 0.7768221497535706, "kl": 0.0020733082201331854, "entropy": 1.0987348556518555, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5068800, "episodes_total": 12672, "training_iteration": 396, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-09-57", "timestamp": 1660259397, "time_this_iter_s": 28.42993927001953, "time_total_s": 17808.11727166176, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17808.11727166176, "timesteps_since_restore": 5068800, "iterations_since_restore": 396, "perf": {"cpu_util_percent": 34.480487804878045, "ram_util_percent": 59.09024390243902}}
+{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 616.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.265}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.33, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.89, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.64, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.64, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.93, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.48, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.03, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.93, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.48, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.03, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.48, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.03, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 639.0, 627.0, 582.0, 636.0, 630.0, 630.0, 636.0, 582.0, 587.0, 579.0, 639.0, 582.0, 578.0, 618.0, 570.0, 636.0, 567.0, 630.0, 582.0, 630.0, 582.0, 636.0, 639.0, 633.0, 582.0, 636.0, 636.0, 633.0, 636.0, 582.0, 573.0, 636.0, 633.0, 522.0, 636.0, 633.0, 633.0, 630.0, 576.0, 639.0, 639.0, 627.0, 636.0, 630.0, 636.0, 633.0, 587.0, 633.0, 579.0, 587.0, 630.0, 579.0, 633.0, 587.0, 636.0, 639.0, 627.0, 639.0, 630.0, 633.0, 639.0, 584.0, 590.0, 630.0, 636.0, 582.0, 636.0, 630.0, 633.0, 633.0, 636.0, 582.0, 633.0, 630.0, 633.0, 630.0, 636.0, 636.0, 633.0, 576.0, 633.0, 582.0, 582.0, 627.0, 639.0, 639.0, 633.0, 636.0, 630.0, 560.0, 636.0, 636.0, 636.0, 582.0, 633.0, 636.0, 582.0, 578.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 316.0, 317.0, 322.0, 317.0, 313.0, 314.0, 291.0, 291.0, 321.0, 315.0, 321.0, 309.0, 316.0, 314.0, 316.0, 320.0, 285.0, 297.0, 294.0, 293.0, 288.0, 291.0, 319.0, 320.0, 289.0, 293.0, 293.0, 285.0, 313.0, 305.0, 276.0, 294.0, 319.0, 317.0, 279.0, 288.0, 316.0, 314.0, 291.0, 291.0, 313.0, 317.0, 295.0, 287.0, 319.0, 317.0, 324.0, 315.0, 313.0, 320.0, 293.0, 289.0, 316.0, 320.0, 320.0, 316.0, 308.0, 325.0, 314.0, 322.0, 285.0, 297.0, 280.0, 293.0, 317.0, 319.0, 314.0, 319.0, 262.0, 260.0, 324.0, 312.0, 316.0, 317.0, 317.0, 316.0, 316.0, 314.0, 285.0, 291.0, 322.0, 317.0, 319.0, 320.0, 316.0, 311.0, 317.0, 319.0, 308.0, 322.0, 317.0, 319.0, 314.0, 319.0, 296.0, 291.0, 317.0, 316.0, 281.0, 298.0, 293.0, 294.0, 311.0, 319.0, 290.0, 289.0, 316.0, 317.0, 299.0, 288.0, 319.0, 317.0, 319.0, 320.0, 308.0, 319.0, 319.0, 320.0, 311.0, 319.0, 314.0, 319.0, 322.0, 317.0, 293.0, 291.0, 294.0, 296.0, 316.0, 314.0, 319.0, 317.0, 299.0, 283.0, 311.0, 325.0, 316.0, 314.0, 317.0, 316.0, 314.0, 319.0, 319.0, 317.0, 289.0, 293.0, 321.0, 312.0, 311.0, 319.0, 313.0, 320.0, 313.0, 317.0, 321.0, 315.0, 319.0, 317.0, 319.0, 314.0, 285.0, 291.0, 311.0, 322.0, 285.0, 297.0, 291.0, 291.0, 313.0, 314.0, 327.0, 312.0, 324.0, 315.0, 319.0, 314.0, 314.0, 322.0, 321.0, 309.0, 279.0, 281.0, 319.0, 317.0, 316.0, 320.0, 314.0, 322.0, 291.0, 291.0, 311.0, 322.0, 319.0, 317.0, 291.0, 291.0, 290.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8303852563726812, "mean_processing_ms": 0.2344708309042675, "mean_inference_ms": 1.4281787126468246}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9528000, "num_steps_sampled": 5081600, "sample_time_ms": 20985.415, "load_time_ms": 37.239, "grad_time_ms": 9808.735, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003967406693845987, "policy_loss": -0.0038024026434868574, "vf_loss": 83.1785888671875, "vf_explained_var": 0.769153892993927, "kl": 0.0019865171052515507, "entropy": 1.0961049795150757, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5081600, "episodes_total": 12704, "training_iteration": 397, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-10-28", "timestamp": 1660259428, "time_this_iter_s": 30.70757508277893, "time_total_s": 17838.824846744537, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17838.824846744537, "timesteps_since_restore": 5081600, "iterations_since_restore": 397, "perf": {"cpu_util_percent": 33.08837209302326, "ram_util_percent": 59.13023255813955}}
+{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 617.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 308.56}, "custom_metrics": {"sparse_reward_mean": 213.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.52, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.65, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.54, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.89, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.42, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.17, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.33, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.08, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.42, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.17, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.42, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.17, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 576.0, 636.0, 636.0, 630.0, 582.0, 582.0, 639.0, 593.0, 639.0, 630.0, 630.0, 636.0, 633.0, 636.0, 630.0, 636.0, 636.0, 522.0, 633.0, 636.0, 636.0, 633.0, 636.0, 582.0, 576.0, 636.0, 627.0, 633.0, 636.0, 587.0, 633.0, 590.0, 630.0, 636.0, 582.0, 636.0, 630.0, 633.0, 633.0, 636.0, 582.0, 633.0, 630.0, 633.0, 630.0, 636.0, 636.0, 633.0, 576.0, 633.0, 582.0, 582.0, 627.0, 639.0, 639.0, 633.0, 636.0, 630.0, 560.0, 636.0, 636.0, 636.0, 582.0, 633.0, 636.0, 582.0, 578.0, 636.0, 633.0, 639.0, 627.0, 582.0, 636.0, 630.0, 630.0, 636.0, 582.0, 587.0, 579.0, 639.0, 582.0, 578.0, 618.0, 570.0, 636.0, 567.0, 630.0, 582.0, 630.0, 582.0, 636.0, 639.0, 633.0, 582.0, 636.0, 636.0, 633.0, 636.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 282.0, 294.0, 308.0, 328.0, 314.0, 322.0, 318.0, 312.0, 287.0, 295.0, 291.0, 291.0, 317.0, 322.0, 299.0, 294.0, 317.0, 322.0, 311.0, 319.0, 317.0, 313.0, 319.0, 317.0, 316.0, 317.0, 321.0, 315.0, 316.0, 314.0, 318.0, 318.0, 322.0, 314.0, 257.0, 265.0, 319.0, 314.0, 317.0, 319.0, 322.0, 314.0, 314.0, 319.0, 314.0, 322.0, 289.0, 293.0, 282.0, 294.0, 316.0, 320.0, 311.0, 316.0, 314.0, 319.0, 319.0, 317.0, 299.0, 288.0, 316.0, 317.0, 294.0, 296.0, 316.0, 314.0, 319.0, 317.0, 299.0, 283.0, 311.0, 325.0, 316.0, 314.0, 317.0, 316.0, 314.0, 319.0, 319.0, 317.0, 289.0, 293.0, 321.0, 312.0, 311.0, 319.0, 313.0, 320.0, 313.0, 317.0, 321.0, 315.0, 319.0, 317.0, 319.0, 314.0, 285.0, 291.0, 311.0, 322.0, 285.0, 297.0, 291.0, 291.0, 313.0, 314.0, 327.0, 312.0, 324.0, 315.0, 319.0, 314.0, 314.0, 322.0, 321.0, 309.0, 279.0, 281.0, 319.0, 317.0, 316.0, 320.0, 314.0, 322.0, 291.0, 291.0, 311.0, 322.0, 319.0, 317.0, 291.0, 291.0, 290.0, 288.0, 319.0, 317.0, 316.0, 317.0, 322.0, 317.0, 313.0, 314.0, 291.0, 291.0, 321.0, 315.0, 321.0, 309.0, 316.0, 314.0, 316.0, 320.0, 285.0, 297.0, 294.0, 293.0, 288.0, 291.0, 319.0, 320.0, 289.0, 293.0, 293.0, 285.0, 313.0, 305.0, 276.0, 294.0, 319.0, 317.0, 279.0, 288.0, 316.0, 314.0, 291.0, 291.0, 313.0, 317.0, 295.0, 287.0, 319.0, 317.0, 324.0, 315.0, 313.0, 320.0, 293.0, 289.0, 316.0, 320.0, 320.0, 316.0, 308.0, 325.0, 314.0, 322.0, 285.0, 297.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8294402136883693, "mean_processing_ms": 0.23428178262037597, "mean_inference_ms": 1.4273260562333872}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9552000, "num_steps_sampled": 5094400, "sample_time_ms": 21312.43, "load_time_ms": 37.532, "grad_time_ms": 9807.592, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002816990716382861, "policy_loss": -0.004787659738212824, "vf_loss": 81.49095916748047, "vf_explained_var": 0.7696583867073059, "kl": 0.0025824178010225296, "entropy": 1.0888774394989014, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5094400, "episodes_total": 12736, "training_iteration": 398, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-11-02", "timestamp": 1660259462, "time_this_iter_s": 33.86050295829773, "time_total_s": 17872.685349702835, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17872.685349702835, "timesteps_since_restore": 5094400, "iterations_since_restore": 398, "perf": {"cpu_util_percent": 30.977083333333336, "ram_util_percent": 59.083333333333336}}
+{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 609.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 304.595}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 187.59, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.74, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.71, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.27, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.63, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.27, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.27, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [197.0, 582.0, 636.0, 630.0, 636.0, 579.0, 636.0, 636.0, 639.0, 630.0, 582.0, 582.0, 630.0, 633.0, 633.0, 399.0, 636.0, 639.0, 636.0, 587.0, 576.0, 630.0, 582.0, 630.0, 587.0, 633.0, 636.0, 544.0, 624.0, 636.0, 581.0, 636.0, 633.0, 636.0, 582.0, 578.0, 636.0, 633.0, 639.0, 627.0, 582.0, 636.0, 630.0, 630.0, 636.0, 582.0, 587.0, 579.0, 639.0, 582.0, 578.0, 618.0, 570.0, 636.0, 567.0, 630.0, 582.0, 630.0, 582.0, 636.0, 639.0, 633.0, 582.0, 636.0, 636.0, 633.0, 636.0, 582.0, 627.0, 576.0, 636.0, 636.0, 630.0, 582.0, 582.0, 639.0, 593.0, 639.0, 630.0, 630.0, 636.0, 633.0, 636.0, 630.0, 636.0, 636.0, 522.0, 633.0, 636.0, 636.0, 633.0, 636.0, 582.0, 576.0, 636.0, 627.0, 633.0, 636.0, 587.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [100.0, 97.0, 296.0, 286.0, 319.0, 317.0, 311.0, 319.0, 316.0, 320.0, 291.0, 288.0, 317.0, 319.0, 319.0, 317.0, 317.0, 322.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0, 313.0, 317.0, 311.0, 322.0, 308.0, 325.0, 193.0, 206.0, 321.0, 315.0, 322.0, 317.0, 322.0, 314.0, 288.0, 299.0, 284.0, 292.0, 311.0, 319.0, 291.0, 291.0, 319.0, 311.0, 288.0, 299.0, 324.0, 309.0, 319.0, 317.0, 279.0, 265.0, 311.0, 313.0, 313.0, 323.0, 294.0, 287.0, 314.0, 322.0, 311.0, 322.0, 319.0, 317.0, 291.0, 291.0, 290.0, 288.0, 319.0, 317.0, 316.0, 317.0, 322.0, 317.0, 313.0, 314.0, 291.0, 291.0, 321.0, 315.0, 321.0, 309.0, 316.0, 314.0, 316.0, 320.0, 285.0, 297.0, 294.0, 293.0, 288.0, 291.0, 319.0, 320.0, 289.0, 293.0, 293.0, 285.0, 313.0, 305.0, 276.0, 294.0, 319.0, 317.0, 279.0, 288.0, 316.0, 314.0, 291.0, 291.0, 313.0, 317.0, 295.0, 287.0, 319.0, 317.0, 324.0, 315.0, 313.0, 320.0, 293.0, 289.0, 316.0, 320.0, 320.0, 316.0, 308.0, 325.0, 314.0, 322.0, 285.0, 297.0, 313.0, 314.0, 282.0, 294.0, 308.0, 328.0, 314.0, 322.0, 318.0, 312.0, 287.0, 295.0, 291.0, 291.0, 317.0, 322.0, 299.0, 294.0, 317.0, 322.0, 311.0, 319.0, 317.0, 313.0, 319.0, 317.0, 316.0, 317.0, 321.0, 315.0, 316.0, 314.0, 318.0, 318.0, 322.0, 314.0, 257.0, 265.0, 319.0, 314.0, 317.0, 319.0, 322.0, 314.0, 314.0, 319.0, 314.0, 322.0, 289.0, 293.0, 282.0, 294.0, 316.0, 320.0, 311.0, 316.0, 314.0, 319.0, 319.0, 317.0, 299.0, 288.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.82850652599667, "mean_processing_ms": 0.23409592110050972, "mean_inference_ms": 1.426659038222931}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9576000, "num_steps_sampled": 5107200, "sample_time_ms": 21603.638, "load_time_ms": 37.362, "grad_time_ms": 9958.009, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00230390764772892, "policy_loss": -0.00582013139501214, "vf_loss": 86.7406005859375, "vf_explained_var": 0.7753866314888, "kl": 0.0019396115094423294, "entropy": 1.1000421047210693, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5107200, "episodes_total": 12768, "training_iteration": 399, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-11-36", "timestamp": 1660259496, "time_this_iter_s": 34.40714716911316, "time_total_s": 17907.09249687195, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17907.09249687195, "timesteps_since_restore": 5107200, "iterations_since_restore": 399, "perf": {"cpu_util_percent": 33.638775510204084, "ram_util_percent": 59.13265306122449}}
+{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 612.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 306.07}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 188.54, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.01, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.18, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.72, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.55, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.72, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.92, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.21, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.69, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.17, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.55, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.72, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.55, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.72, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 587.0, 636.0, 636.0, 633.0, 579.0, 633.0, 582.0, 627.0, 630.0, 590.0, 639.0, 633.0, 579.0, 630.0, 630.0, 636.0, 639.0, 633.0, 630.0, 630.0, 630.0, 630.0, 639.0, 636.0, 633.0, 636.0, 582.0, 636.0, 582.0, 579.0, 636.0, 633.0, 636.0, 582.0, 627.0, 576.0, 636.0, 636.0, 630.0, 582.0, 582.0, 639.0, 593.0, 639.0, 630.0, 630.0, 636.0, 633.0, 636.0, 630.0, 636.0, 636.0, 522.0, 633.0, 636.0, 636.0, 633.0, 636.0, 582.0, 576.0, 636.0, 627.0, 633.0, 636.0, 587.0, 633.0, 197.0, 582.0, 636.0, 630.0, 636.0, 579.0, 636.0, 636.0, 639.0, 630.0, 582.0, 582.0, 630.0, 633.0, 633.0, 399.0, 636.0, 639.0, 636.0, 587.0, 576.0, 630.0, 582.0, 630.0, 587.0, 633.0, 636.0, 544.0, 624.0, 636.0, 581.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 314.0, 316.0, 294.0, 293.0, 316.0, 320.0, 319.0, 317.0, 317.0, 316.0, 289.0, 290.0, 319.0, 314.0, 293.0, 289.0, 319.0, 308.0, 316.0, 314.0, 293.0, 297.0, 322.0, 317.0, 319.0, 314.0, 294.0, 285.0, 321.0, 309.0, 319.0, 311.0, 319.0, 317.0, 317.0, 322.0, 322.0, 311.0, 319.0, 311.0, 311.0, 319.0, 308.0, 322.0, 313.0, 317.0, 314.0, 325.0, 317.0, 319.0, 316.0, 317.0, 319.0, 317.0, 296.0, 286.0, 314.0, 322.0, 291.0, 291.0, 290.0, 289.0, 320.0, 316.0, 308.0, 325.0, 314.0, 322.0, 285.0, 297.0, 313.0, 314.0, 282.0, 294.0, 308.0, 328.0, 314.0, 322.0, 318.0, 312.0, 287.0, 295.0, 291.0, 291.0, 317.0, 322.0, 299.0, 294.0, 317.0, 322.0, 311.0, 319.0, 317.0, 313.0, 319.0, 317.0, 316.0, 317.0, 321.0, 315.0, 316.0, 314.0, 318.0, 318.0, 322.0, 314.0, 257.0, 265.0, 319.0, 314.0, 317.0, 319.0, 322.0, 314.0, 314.0, 319.0, 314.0, 322.0, 289.0, 293.0, 282.0, 294.0, 316.0, 320.0, 311.0, 316.0, 314.0, 319.0, 319.0, 317.0, 299.0, 288.0, 316.0, 317.0, 100.0, 97.0, 296.0, 286.0, 319.0, 317.0, 311.0, 319.0, 316.0, 320.0, 291.0, 288.0, 317.0, 319.0, 319.0, 317.0, 317.0, 322.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0, 313.0, 317.0, 311.0, 322.0, 308.0, 325.0, 193.0, 206.0, 321.0, 315.0, 322.0, 317.0, 322.0, 314.0, 288.0, 299.0, 284.0, 292.0, 311.0, 319.0, 291.0, 291.0, 319.0, 311.0, 288.0, 299.0, 324.0, 309.0, 319.0, 317.0, 279.0, 265.0, 311.0, 313.0, 313.0, 323.0, 294.0, 287.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8275880756644836, "mean_processing_ms": 0.23391670126994718, "mean_inference_ms": 1.4262180371248092}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9600000, "num_steps_sampled": 5120000, "sample_time_ms": 22135.485, "load_time_ms": 37.429, "grad_time_ms": 9991.096, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019203064730390906, "policy_loss": -0.005054800305515528, "vf_loss": 75.28291320800781, "vf_explained_var": 0.7728467583656311, "kl": 0.00209710281342268, "entropy": 1.106364130973816, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5120000, "episodes_total": 12800, "training_iteration": 400, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-12-12", "timestamp": 1660259532, "time_this_iter_s": 35.30730485916138, "time_total_s": 17942.39980173111, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17942.39980173111, "timesteps_since_restore": 5120000, "iterations_since_restore": 400, "perf": {"cpu_util_percent": 33.525999999999996, "ram_util_percent": 59.168}}
+{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 613.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 306.635}, "custom_metrics": {"sparse_reward_mean": 212.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 188.47, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.0, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.22, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.71, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.7, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.48, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.76, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.44, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.92, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.48, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.76, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.48, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.76, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 636.0, 633.0, 627.0, 582.0, 633.0, 639.0, 624.0, 633.0, 630.0, 633.0, 636.0, 639.0, 582.0, 630.0, 627.0, 633.0, 630.0, 633.0, 630.0, 567.0, 630.0, 539.0, 630.0, 630.0, 633.0, 630.0, 633.0, 630.0, 639.0, 587.0, 633.0, 636.0, 587.0, 633.0, 197.0, 582.0, 636.0, 630.0, 636.0, 579.0, 636.0, 636.0, 639.0, 630.0, 582.0, 582.0, 630.0, 633.0, 633.0, 399.0, 636.0, 639.0, 636.0, 587.0, 576.0, 630.0, 582.0, 630.0, 587.0, 633.0, 636.0, 544.0, 624.0, 636.0, 581.0, 636.0, 636.0, 630.0, 587.0, 636.0, 636.0, 633.0, 579.0, 633.0, 582.0, 627.0, 630.0, 590.0, 639.0, 633.0, 579.0, 630.0, 630.0, 636.0, 639.0, 633.0, 630.0, 630.0, 630.0, 630.0, 639.0, 636.0, 633.0, 636.0, 582.0, 636.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 319.0, 317.0, 319.0, 317.0, 316.0, 317.0, 318.0, 309.0, 294.0, 288.0, 316.0, 317.0, 319.0, 320.0, 305.0, 319.0, 319.0, 314.0, 307.0, 323.0, 310.0, 323.0, 311.0, 325.0, 316.0, 323.0, 285.0, 297.0, 323.0, 307.0, 316.0, 311.0, 319.0, 314.0, 318.0, 312.0, 314.0, 319.0, 322.0, 308.0, 283.0, 284.0, 312.0, 318.0, 277.0, 262.0, 319.0, 311.0, 319.0, 311.0, 319.0, 314.0, 318.0, 312.0, 311.0, 322.0, 311.0, 319.0, 319.0, 320.0, 291.0, 296.0, 314.0, 319.0, 319.0, 317.0, 299.0, 288.0, 316.0, 317.0, 100.0, 97.0, 296.0, 286.0, 319.0, 317.0, 311.0, 319.0, 316.0, 320.0, 291.0, 288.0, 317.0, 319.0, 319.0, 317.0, 317.0, 322.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0, 313.0, 317.0, 311.0, 322.0, 308.0, 325.0, 193.0, 206.0, 321.0, 315.0, 322.0, 317.0, 322.0, 314.0, 288.0, 299.0, 284.0, 292.0, 311.0, 319.0, 291.0, 291.0, 319.0, 311.0, 288.0, 299.0, 324.0, 309.0, 319.0, 317.0, 279.0, 265.0, 311.0, 313.0, 313.0, 323.0, 294.0, 287.0, 314.0, 322.0, 314.0, 322.0, 314.0, 316.0, 294.0, 293.0, 316.0, 320.0, 319.0, 317.0, 317.0, 316.0, 289.0, 290.0, 319.0, 314.0, 293.0, 289.0, 319.0, 308.0, 316.0, 314.0, 293.0, 297.0, 322.0, 317.0, 319.0, 314.0, 294.0, 285.0, 321.0, 309.0, 319.0, 311.0, 319.0, 317.0, 317.0, 322.0, 322.0, 311.0, 319.0, 311.0, 311.0, 319.0, 308.0, 322.0, 313.0, 317.0, 314.0, 325.0, 317.0, 319.0, 316.0, 317.0, 319.0, 317.0, 296.0, 286.0, 314.0, 322.0, 291.0, 291.0, 290.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8266799330355084, "mean_processing_ms": 0.23374167551235864, "mean_inference_ms": 1.4257962598910456}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9624000, "num_steps_sampled": 5132800, "sample_time_ms": 22522.212, "load_time_ms": 37.519, "grad_time_ms": 10359.995, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.000927128829061985, "policy_loss": -0.006055487785488367, "vf_loss": 75.37408447265625, "vf_explained_var": 0.7751708030700684, "kl": 0.0019053876167163253, "entropy": 1.1095930337905884, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5132800, "episodes_total": 12832, "training_iteration": 401, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-12-49", "timestamp": 1660259569, "time_this_iter_s": 36.76053810119629, "time_total_s": 17979.160339832306, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17979.160339832306, "timesteps_since_restore": 5132800, "iterations_since_restore": 401, "perf": {"cpu_util_percent": 34.715094339622645, "ram_util_percent": 59.21886792452831}}
+{"episode_reward_max": 639.0, "episode_reward_min": 527.0, "episode_reward_mean": 619.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 309.825}, "custom_metrics": {"sparse_reward_mean": 214.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 190.05, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.07, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.47, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.77, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.95, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.6, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.53, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 4, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.6, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.6, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [527.0, 636.0, 633.0, 636.0, 633.0, 582.0, 587.0, 630.0, 630.0, 627.0, 582.0, 587.0, 627.0, 624.0, 636.0, 633.0, 587.0, 630.0, 639.0, 636.0, 578.0, 633.0, 639.0, 630.0, 639.0, 633.0, 636.0, 582.0, 636.0, 584.0, 630.0, 581.0, 624.0, 636.0, 581.0, 636.0, 636.0, 630.0, 587.0, 636.0, 636.0, 633.0, 579.0, 633.0, 582.0, 627.0, 630.0, 590.0, 639.0, 633.0, 579.0, 630.0, 630.0, 636.0, 639.0, 633.0, 630.0, 630.0, 630.0, 630.0, 639.0, 636.0, 633.0, 636.0, 582.0, 636.0, 582.0, 579.0, 630.0, 636.0, 636.0, 633.0, 627.0, 582.0, 633.0, 639.0, 624.0, 633.0, 630.0, 633.0, 636.0, 639.0, 582.0, 630.0, 627.0, 633.0, 630.0, 633.0, 630.0, 567.0, 630.0, 539.0, 630.0, 630.0, 633.0, 630.0, 633.0, 630.0, 639.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [273.0, 254.0, 316.0, 320.0, 316.0, 317.0, 316.0, 320.0, 319.0, 314.0, 291.0, 291.0, 301.0, 286.0, 314.0, 316.0, 321.0, 309.0, 318.0, 309.0, 296.0, 286.0, 291.0, 296.0, 318.0, 309.0, 308.0, 316.0, 316.0, 320.0, 316.0, 317.0, 301.0, 286.0, 314.0, 316.0, 319.0, 320.0, 320.0, 316.0, 295.0, 283.0, 314.0, 319.0, 319.0, 320.0, 311.0, 319.0, 322.0, 317.0, 314.0, 319.0, 319.0, 317.0, 288.0, 294.0, 311.0, 325.0, 290.0, 294.0, 313.0, 317.0, 296.0, 285.0, 311.0, 313.0, 313.0, 323.0, 294.0, 287.0, 314.0, 322.0, 314.0, 322.0, 314.0, 316.0, 294.0, 293.0, 316.0, 320.0, 319.0, 317.0, 317.0, 316.0, 289.0, 290.0, 319.0, 314.0, 293.0, 289.0, 319.0, 308.0, 316.0, 314.0, 293.0, 297.0, 322.0, 317.0, 319.0, 314.0, 294.0, 285.0, 321.0, 309.0, 319.0, 311.0, 319.0, 317.0, 317.0, 322.0, 322.0, 311.0, 319.0, 311.0, 311.0, 319.0, 308.0, 322.0, 313.0, 317.0, 314.0, 325.0, 317.0, 319.0, 316.0, 317.0, 319.0, 317.0, 296.0, 286.0, 314.0, 322.0, 291.0, 291.0, 290.0, 289.0, 311.0, 319.0, 319.0, 317.0, 319.0, 317.0, 316.0, 317.0, 318.0, 309.0, 294.0, 288.0, 316.0, 317.0, 319.0, 320.0, 305.0, 319.0, 319.0, 314.0, 307.0, 323.0, 310.0, 323.0, 311.0, 325.0, 316.0, 323.0, 285.0, 297.0, 323.0, 307.0, 316.0, 311.0, 319.0, 314.0, 318.0, 312.0, 314.0, 319.0, 322.0, 308.0, 283.0, 284.0, 312.0, 318.0, 277.0, 262.0, 319.0, 311.0, 319.0, 311.0, 319.0, 314.0, 318.0, 312.0, 311.0, 322.0, 311.0, 319.0, 319.0, 320.0, 291.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8257902245780139, "mean_processing_ms": 0.23357282477056074, "mean_inference_ms": 1.425400068641748}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9648000, "num_steps_sampled": 5145600, "sample_time_ms": 23036.82, "load_time_ms": 37.721, "grad_time_ms": 10540.081, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013805682538077235, "policy_loss": -0.006049450021237135, "vf_loss": 79.8260269165039, "vf_explained_var": 0.7674198746681213, "kl": 0.002044239779934287, "entropy": 1.1051733493804932, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5145600, "episodes_total": 12864, "training_iteration": 402, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-13-24", "timestamp": 1660259604, "time_this_iter_s": 35.064194202423096, "time_total_s": 18014.22453403473, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18014.22453403473, "timesteps_since_restore": 5145600, "iterations_since_restore": 402, "perf": {"cpu_util_percent": 38.665306122448975, "ram_util_percent": 59.40408163265307}}
+{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 614.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.27}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.14, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.78, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.59, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.08, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.28, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.02, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.51, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.28, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.02, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.28, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.02, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [527.0, 582.0, 630.0, 639.0, 630.0, 582.0, 522.0, 621.0, 582.0, 564.0, 636.0, 630.0, 630.0, 630.0, 579.0, 633.0, 627.0, 636.0, 630.0, 633.0, 567.0, 582.0, 630.0, 639.0, 636.0, 636.0, 633.0, 567.0, 582.0, 627.0, 630.0, 576.0, 582.0, 636.0, 582.0, 579.0, 630.0, 636.0, 636.0, 633.0, 627.0, 582.0, 633.0, 639.0, 624.0, 633.0, 630.0, 633.0, 636.0, 639.0, 582.0, 630.0, 627.0, 633.0, 630.0, 633.0, 630.0, 567.0, 630.0, 539.0, 630.0, 630.0, 633.0, 630.0, 633.0, 630.0, 639.0, 587.0, 527.0, 636.0, 633.0, 636.0, 633.0, 582.0, 587.0, 630.0, 630.0, 627.0, 582.0, 587.0, 627.0, 624.0, 636.0, 633.0, 587.0, 630.0, 639.0, 636.0, 578.0, 633.0, 639.0, 630.0, 639.0, 633.0, 636.0, 582.0, 636.0, 584.0, 630.0, 581.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [258.0, 269.0, 291.0, 291.0, 322.0, 308.0, 319.0, 320.0, 313.0, 317.0, 288.0, 294.0, 268.0, 254.0, 310.0, 311.0, 288.0, 294.0, 268.0, 296.0, 313.0, 323.0, 318.0, 312.0, 315.0, 315.0, 313.0, 317.0, 285.0, 294.0, 318.0, 315.0, 321.0, 306.0, 317.0, 319.0, 313.0, 317.0, 314.0, 319.0, 282.0, 285.0, 294.0, 288.0, 314.0, 316.0, 322.0, 317.0, 312.0, 324.0, 319.0, 317.0, 313.0, 320.0, 290.0, 277.0, 299.0, 283.0, 308.0, 319.0, 319.0, 311.0, 291.0, 285.0, 296.0, 286.0, 314.0, 322.0, 291.0, 291.0, 290.0, 289.0, 311.0, 319.0, 319.0, 317.0, 319.0, 317.0, 316.0, 317.0, 318.0, 309.0, 294.0, 288.0, 316.0, 317.0, 319.0, 320.0, 305.0, 319.0, 319.0, 314.0, 307.0, 323.0, 310.0, 323.0, 311.0, 325.0, 316.0, 323.0, 285.0, 297.0, 323.0, 307.0, 316.0, 311.0, 319.0, 314.0, 318.0, 312.0, 314.0, 319.0, 322.0, 308.0, 283.0, 284.0, 312.0, 318.0, 277.0, 262.0, 319.0, 311.0, 319.0, 311.0, 319.0, 314.0, 318.0, 312.0, 311.0, 322.0, 311.0, 319.0, 319.0, 320.0, 291.0, 296.0, 273.0, 254.0, 316.0, 320.0, 316.0, 317.0, 316.0, 320.0, 319.0, 314.0, 291.0, 291.0, 301.0, 286.0, 314.0, 316.0, 321.0, 309.0, 318.0, 309.0, 296.0, 286.0, 291.0, 296.0, 318.0, 309.0, 308.0, 316.0, 316.0, 320.0, 316.0, 317.0, 301.0, 286.0, 314.0, 316.0, 319.0, 320.0, 320.0, 316.0, 295.0, 283.0, 314.0, 319.0, 319.0, 320.0, 311.0, 319.0, 322.0, 317.0, 314.0, 319.0, 319.0, 317.0, 288.0, 294.0, 311.0, 325.0, 290.0, 294.0, 313.0, 317.0, 296.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8249056294785048, "mean_processing_ms": 0.2334031065407326, "mean_inference_ms": 1.4249795896358415}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9672000, "num_steps_sampled": 5158400, "sample_time_ms": 23134.408, "load_time_ms": 37.402, "grad_time_ms": 10502.988, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0036110735964030027, "policy_loss": -0.003584003308787942, "vf_loss": 77.51012420654297, "vf_explained_var": 0.770778238773346, "kl": 0.00190709566231817, "entropy": 1.1118710041046143, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5158400, "episodes_total": 12896, "training_iteration": 403, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-13-59", "timestamp": 1660259639, "time_this_iter_s": 35.02220106124878, "time_total_s": 18049.246735095978, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18049.246735095978, "timesteps_since_restore": 5158400, "iterations_since_restore": 403, "perf": {"cpu_util_percent": 34.604, "ram_util_percent": 59.326}}
+{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 614.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.125}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.25, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.51, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.38, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.91, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.95, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.61, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.75, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.95, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.95, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 587.0, 636.0, 633.0, 582.0, 578.0, 612.0, 630.0, 633.0, 633.0, 582.0, 633.0, 630.0, 636.0, 630.0, 630.0, 582.0, 636.0, 627.0, 636.0, 630.0, 633.0, 636.0, 536.0, 582.0, 636.0, 630.0, 639.0, 630.0, 639.0, 582.0, 633.0, 630.0, 639.0, 587.0, 527.0, 636.0, 633.0, 636.0, 633.0, 582.0, 587.0, 630.0, 630.0, 627.0, 582.0, 587.0, 627.0, 624.0, 636.0, 633.0, 587.0, 630.0, 639.0, 636.0, 578.0, 633.0, 639.0, 630.0, 639.0, 633.0, 636.0, 582.0, 636.0, 584.0, 630.0, 581.0, 527.0, 582.0, 630.0, 639.0, 630.0, 582.0, 522.0, 621.0, 582.0, 564.0, 636.0, 630.0, 630.0, 630.0, 579.0, 633.0, 627.0, 636.0, 630.0, 633.0, 567.0, 582.0, 630.0, 639.0, 636.0, 636.0, 633.0, 567.0, 582.0, 627.0, 630.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 310.0, 320.0, 293.0, 294.0, 324.0, 312.0, 319.0, 314.0, 288.0, 294.0, 301.0, 277.0, 307.0, 305.0, 321.0, 309.0, 317.0, 316.0, 319.0, 314.0, 283.0, 299.0, 314.0, 319.0, 313.0, 317.0, 319.0, 317.0, 314.0, 316.0, 314.0, 316.0, 283.0, 299.0, 319.0, 317.0, 305.0, 322.0, 317.0, 319.0, 319.0, 311.0, 323.0, 310.0, 324.0, 312.0, 268.0, 268.0, 287.0, 295.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 316.0, 314.0, 317.0, 322.0, 288.0, 294.0, 311.0, 322.0, 311.0, 319.0, 319.0, 320.0, 291.0, 296.0, 273.0, 254.0, 316.0, 320.0, 316.0, 317.0, 316.0, 320.0, 319.0, 314.0, 291.0, 291.0, 301.0, 286.0, 314.0, 316.0, 321.0, 309.0, 318.0, 309.0, 296.0, 286.0, 291.0, 296.0, 318.0, 309.0, 308.0, 316.0, 316.0, 320.0, 316.0, 317.0, 301.0, 286.0, 314.0, 316.0, 319.0, 320.0, 320.0, 316.0, 295.0, 283.0, 314.0, 319.0, 319.0, 320.0, 311.0, 319.0, 322.0, 317.0, 314.0, 319.0, 319.0, 317.0, 288.0, 294.0, 311.0, 325.0, 290.0, 294.0, 313.0, 317.0, 296.0, 285.0, 258.0, 269.0, 291.0, 291.0, 322.0, 308.0, 319.0, 320.0, 313.0, 317.0, 288.0, 294.0, 268.0, 254.0, 310.0, 311.0, 288.0, 294.0, 268.0, 296.0, 313.0, 323.0, 318.0, 312.0, 315.0, 315.0, 313.0, 317.0, 285.0, 294.0, 318.0, 315.0, 321.0, 306.0, 317.0, 319.0, 313.0, 317.0, 314.0, 319.0, 282.0, 285.0, 294.0, 288.0, 314.0, 316.0, 322.0, 317.0, 312.0, 324.0, 319.0, 317.0, 313.0, 320.0, 290.0, 277.0, 299.0, 283.0, 308.0, 319.0, 319.0, 311.0, 291.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8240164523926264, "mean_processing_ms": 0.23323028854357722, "mean_inference_ms": 1.4244623501474682}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9696000, "num_steps_sampled": 5171200, "sample_time_ms": 23257.709, "load_time_ms": 37.644, "grad_time_ms": 10547.246, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0014424080727621913, "policy_loss": -0.00573391281068325, "vf_loss": 77.33064270019531, "vf_explained_var": 0.7716807723045349, "kl": 0.0015154121210798621, "entropy": 1.1135029792785645, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5171200, "episodes_total": 12928, "training_iteration": 404, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-14-33", "timestamp": 1660259673, "time_this_iter_s": 33.92467999458313, "time_total_s": 18083.17141509056, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18083.17141509056, "timesteps_since_restore": 5171200, "iterations_since_restore": 404, "perf": {"cpu_util_percent": 34.637499999999996, "ram_util_percent": 58.86041666666667}}
+{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 612.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 306.235}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.07, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.66, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.56, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.23, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.83, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.21, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.09, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.68, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.77, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.68, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.21, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.09, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.21, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.09, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 633.0, 587.0, 582.0, 627.0, 539.0, 636.0, 633.0, 639.0, 630.0, 582.0, 579.0, 639.0, 636.0, 590.0, 627.0, 633.0, 639.0, 579.0, 587.0, 579.0, 636.0, 636.0, 630.0, 636.0, 633.0, 636.0, 584.0, 584.0, 633.0, 590.0, 630.0, 636.0, 584.0, 630.0, 581.0, 527.0, 582.0, 630.0, 639.0, 630.0, 582.0, 522.0, 621.0, 582.0, 564.0, 636.0, 630.0, 630.0, 630.0, 579.0, 633.0, 627.0, 636.0, 630.0, 633.0, 567.0, 582.0, 630.0, 639.0, 636.0, 636.0, 633.0, 567.0, 582.0, 627.0, 630.0, 576.0, 636.0, 630.0, 587.0, 636.0, 633.0, 582.0, 578.0, 612.0, 630.0, 633.0, 633.0, 582.0, 633.0, 630.0, 636.0, 630.0, 630.0, 582.0, 636.0, 627.0, 636.0, 630.0, 633.0, 636.0, 536.0, 582.0, 636.0, 630.0, 639.0, 630.0, 639.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 319.0, 314.0, 302.0, 285.0, 290.0, 292.0, 313.0, 314.0, 265.0, 274.0, 322.0, 314.0, 316.0, 317.0, 319.0, 320.0, 316.0, 314.0, 288.0, 294.0, 292.0, 287.0, 322.0, 317.0, 324.0, 312.0, 299.0, 291.0, 308.0, 319.0, 315.0, 318.0, 319.0, 320.0, 285.0, 294.0, 293.0, 294.0, 293.0, 286.0, 313.0, 323.0, 316.0, 320.0, 316.0, 314.0, 316.0, 320.0, 316.0, 317.0, 319.0, 317.0, 291.0, 293.0, 299.0, 285.0, 319.0, 314.0, 296.0, 294.0, 319.0, 311.0, 311.0, 325.0, 290.0, 294.0, 313.0, 317.0, 296.0, 285.0, 258.0, 269.0, 291.0, 291.0, 322.0, 308.0, 319.0, 320.0, 313.0, 317.0, 288.0, 294.0, 268.0, 254.0, 310.0, 311.0, 288.0, 294.0, 268.0, 296.0, 313.0, 323.0, 318.0, 312.0, 315.0, 315.0, 313.0, 317.0, 285.0, 294.0, 318.0, 315.0, 321.0, 306.0, 317.0, 319.0, 313.0, 317.0, 314.0, 319.0, 282.0, 285.0, 294.0, 288.0, 314.0, 316.0, 322.0, 317.0, 312.0, 324.0, 319.0, 317.0, 313.0, 320.0, 290.0, 277.0, 299.0, 283.0, 308.0, 319.0, 319.0, 311.0, 291.0, 285.0, 319.0, 317.0, 310.0, 320.0, 293.0, 294.0, 324.0, 312.0, 319.0, 314.0, 288.0, 294.0, 301.0, 277.0, 307.0, 305.0, 321.0, 309.0, 317.0, 316.0, 319.0, 314.0, 283.0, 299.0, 314.0, 319.0, 313.0, 317.0, 319.0, 317.0, 314.0, 316.0, 314.0, 316.0, 283.0, 299.0, 319.0, 317.0, 305.0, 322.0, 317.0, 319.0, 319.0, 311.0, 323.0, 310.0, 324.0, 312.0, 268.0, 268.0, 287.0, 295.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 316.0, 314.0, 317.0, 322.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8231192050910455, "mean_processing_ms": 0.23305365514326026, "mean_inference_ms": 1.4238788752206395}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9720000, "num_steps_sampled": 5184000, "sample_time_ms": 23218.954, "load_time_ms": 37.026, "grad_time_ms": 10357.169, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004148914944380522, "policy_loss": -0.003635302884504199, "vf_loss": 83.42072296142578, "vf_explained_var": 0.7650599479675293, "kl": 0.001778147299773991, "entropy": 1.115702509880066, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5184000, "episodes_total": 12960, "training_iteration": 405, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-15-06", "timestamp": 1660259706, "time_this_iter_s": 32.90920972824097, "time_total_s": 18116.080624818802, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18116.080624818802, "timesteps_since_restore": 5184000, "iterations_since_restore": 405, "perf": {"cpu_util_percent": 30.800000000000004, "ram_util_percent": 58.806521739130446}}
+{"episode_reward_max": 639.0, "episode_reward_min": 536.0, "episode_reward_mean": 615.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 265.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 307.555}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.11, "shaped_reward_min": 172, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.86, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.51, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.4, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.7, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.31, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.03, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.96, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.75, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.78, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.74, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.31, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.03, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.31, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.03, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 633.0, 636.0, 579.0, 630.0, 630.0, 630.0, 630.0, 582.0, 630.0, 576.0, 636.0, 624.0, 630.0, 639.0, 624.0, 636.0, 576.0, 587.0, 639.0, 630.0, 633.0, 630.0, 593.0, 633.0, 579.0, 630.0, 639.0, 587.0, 582.0, 636.0, 582.0, 582.0, 627.0, 630.0, 576.0, 636.0, 630.0, 587.0, 636.0, 633.0, 582.0, 578.0, 612.0, 630.0, 633.0, 633.0, 582.0, 633.0, 630.0, 636.0, 630.0, 630.0, 582.0, 636.0, 627.0, 636.0, 630.0, 633.0, 636.0, 536.0, 582.0, 636.0, 630.0, 639.0, 630.0, 639.0, 582.0, 579.0, 633.0, 587.0, 582.0, 627.0, 539.0, 636.0, 633.0, 639.0, 630.0, 582.0, 579.0, 639.0, 636.0, 590.0, 627.0, 633.0, 639.0, 579.0, 587.0, 579.0, 636.0, 636.0, 630.0, 636.0, 633.0, 636.0, 584.0, 584.0, 633.0, 590.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 308.0, 319.0, 314.0, 314.0, 322.0, 285.0, 294.0, 316.0, 314.0, 313.0, 317.0, 316.0, 314.0, 313.0, 317.0, 298.0, 284.0, 314.0, 316.0, 290.0, 286.0, 316.0, 320.0, 314.0, 310.0, 316.0, 314.0, 322.0, 317.0, 310.0, 314.0, 319.0, 317.0, 291.0, 285.0, 293.0, 294.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 316.0, 314.0, 302.0, 291.0, 316.0, 317.0, 299.0, 280.0, 316.0, 314.0, 319.0, 320.0, 296.0, 291.0, 291.0, 291.0, 308.0, 328.0, 293.0, 289.0, 299.0, 283.0, 308.0, 319.0, 319.0, 311.0, 291.0, 285.0, 319.0, 317.0, 310.0, 320.0, 293.0, 294.0, 324.0, 312.0, 319.0, 314.0, 288.0, 294.0, 301.0, 277.0, 307.0, 305.0, 321.0, 309.0, 317.0, 316.0, 319.0, 314.0, 283.0, 299.0, 314.0, 319.0, 313.0, 317.0, 319.0, 317.0, 314.0, 316.0, 314.0, 316.0, 283.0, 299.0, 319.0, 317.0, 305.0, 322.0, 317.0, 319.0, 319.0, 311.0, 323.0, 310.0, 324.0, 312.0, 268.0, 268.0, 287.0, 295.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 316.0, 314.0, 317.0, 322.0, 288.0, 294.0, 291.0, 288.0, 319.0, 314.0, 302.0, 285.0, 290.0, 292.0, 313.0, 314.0, 265.0, 274.0, 322.0, 314.0, 316.0, 317.0, 319.0, 320.0, 316.0, 314.0, 288.0, 294.0, 292.0, 287.0, 322.0, 317.0, 324.0, 312.0, 299.0, 291.0, 308.0, 319.0, 315.0, 318.0, 319.0, 320.0, 285.0, 294.0, 293.0, 294.0, 293.0, 286.0, 313.0, 323.0, 316.0, 320.0, 316.0, 314.0, 316.0, 320.0, 316.0, 317.0, 319.0, 317.0, 291.0, 293.0, 299.0, 285.0, 319.0, 314.0, 296.0, 294.0, 319.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8222123786920608, "mean_processing_ms": 0.23287368102080933, "mean_inference_ms": 1.4230607054783406}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9744000, "num_steps_sampled": 5196800, "sample_time_ms": 23260.688, "load_time_ms": 37.047, "grad_time_ms": 10547.131, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006928029470145702, "policy_loss": -0.006872573401778936, "vf_loss": 81.25198364257812, "vf_explained_var": 0.7684532999992371, "kl": 0.0019740292336791754, "entropy": 1.1196430921554565, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5196800, "episodes_total": 12992, "training_iteration": 406, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-15-37", "timestamp": 1660259737, "time_this_iter_s": 30.75086998939514, "time_total_s": 18146.831494808197, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18146.831494808197, "timesteps_since_restore": 5196800, "iterations_since_restore": 406, "perf": {"cpu_util_percent": 34.53863636363637, "ram_util_percent": 58.81590909090908}}
+{"episode_reward_max": 639.0, "episode_reward_min": 3.0, "episode_reward_mean": 605.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 302.91}, "custom_metrics": {"sparse_reward_mean": 209.4, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 187.02, "shaped_reward_min": 3, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.56, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.46, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.1, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.67, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.96, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.95, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.49, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.75, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.73, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.8, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.72, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.79, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.96, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.95, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.96, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.95, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 633.0, 587.0, 544.0, 630.0, 587.0, 587.0, 582.0, 633.0, 3.0, 579.0, 582.0, 587.0, 579.0, 627.0, 633.0, 633.0, 582.0, 582.0, 636.0, 567.0, 587.0, 636.0, 630.0, 636.0, 576.0, 636.0, 582.0, 587.0, 636.0, 636.0, 639.0, 630.0, 639.0, 582.0, 579.0, 633.0, 587.0, 582.0, 627.0, 539.0, 636.0, 633.0, 639.0, 630.0, 582.0, 579.0, 639.0, 636.0, 590.0, 627.0, 633.0, 639.0, 579.0, 587.0, 579.0, 636.0, 636.0, 630.0, 636.0, 633.0, 636.0, 584.0, 584.0, 633.0, 590.0, 630.0, 627.0, 633.0, 636.0, 579.0, 630.0, 630.0, 630.0, 630.0, 582.0, 630.0, 576.0, 636.0, 624.0, 630.0, 639.0, 624.0, 636.0, 576.0, 587.0, 639.0, 630.0, 633.0, 630.0, 593.0, 633.0, 579.0, 630.0, 639.0, 587.0, 582.0, 636.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [309.0, 324.0, 316.0, 317.0, 319.0, 314.0, 296.0, 291.0, 270.0, 274.0, 306.0, 324.0, 299.0, 288.0, 296.0, 291.0, 298.0, 284.0, 316.0, 317.0, 0.0, 3.0, 287.0, 292.0, 292.0, 290.0, 293.0, 294.0, 299.0, 280.0, 316.0, 311.0, 319.0, 314.0, 322.0, 311.0, 293.0, 289.0, 291.0, 291.0, 319.0, 317.0, 281.0, 286.0, 289.0, 298.0, 316.0, 320.0, 316.0, 314.0, 324.0, 312.0, 280.0, 296.0, 316.0, 320.0, 301.0, 281.0, 290.0, 297.0, 319.0, 317.0, 319.0, 317.0, 319.0, 320.0, 316.0, 314.0, 317.0, 322.0, 288.0, 294.0, 291.0, 288.0, 319.0, 314.0, 302.0, 285.0, 290.0, 292.0, 313.0, 314.0, 265.0, 274.0, 322.0, 314.0, 316.0, 317.0, 319.0, 320.0, 316.0, 314.0, 288.0, 294.0, 292.0, 287.0, 322.0, 317.0, 324.0, 312.0, 299.0, 291.0, 308.0, 319.0, 315.0, 318.0, 319.0, 320.0, 285.0, 294.0, 293.0, 294.0, 293.0, 286.0, 313.0, 323.0, 316.0, 320.0, 316.0, 314.0, 316.0, 320.0, 316.0, 317.0, 319.0, 317.0, 291.0, 293.0, 299.0, 285.0, 319.0, 314.0, 296.0, 294.0, 319.0, 311.0, 319.0, 308.0, 319.0, 314.0, 314.0, 322.0, 285.0, 294.0, 316.0, 314.0, 313.0, 317.0, 316.0, 314.0, 313.0, 317.0, 298.0, 284.0, 314.0, 316.0, 290.0, 286.0, 316.0, 320.0, 314.0, 310.0, 316.0, 314.0, 322.0, 317.0, 310.0, 314.0, 319.0, 317.0, 291.0, 285.0, 293.0, 294.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 316.0, 314.0, 302.0, 291.0, 316.0, 317.0, 299.0, 280.0, 316.0, 314.0, 319.0, 320.0, 296.0, 291.0, 291.0, 291.0, 308.0, 328.0, 293.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8213047734882832, "mean_processing_ms": 0.2326926130887054, "mean_inference_ms": 1.4220964273978254}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9768000, "num_steps_sampled": 5209600, "sample_time_ms": 23078.335, "load_time_ms": 37.045, "grad_time_ms": 10614.832, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0037456925492733717, "policy_loss": -0.004354165401309729, "vf_loss": 86.5316162109375, "vf_explained_var": 0.7801554799079895, "kl": 0.0024353403132408857, "entropy": 1.1066083908081055, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5209600, "episodes_total": 13024, "training_iteration": 407, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-16-07", "timestamp": 1660259767, "time_this_iter_s": 29.56272530555725, "time_total_s": 18176.394220113754, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18176.394220113754, "timesteps_since_restore": 5209600, "iterations_since_restore": 407, "perf": {"cpu_util_percent": 33.98809523809524, "ram_util_percent": 58.745238095238086}}
+{"episode_reward_max": 639.0, "episode_reward_min": 3.0, "episode_reward_mean": 607.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 303.94}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 187.08, "shaped_reward_min": 3, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.36, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.2, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.2, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.2, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 636.0, 636.0, 636.0, 633.0, 618.0, 582.0, 630.0, 636.0, 636.0, 633.0, 636.0, 639.0, 630.0, 576.0, 582.0, 633.0, 639.0, 579.0, 636.0, 636.0, 639.0, 579.0, 636.0, 630.0, 633.0, 636.0, 579.0, 630.0, 633.0, 516.0, 633.0, 584.0, 633.0, 590.0, 630.0, 627.0, 633.0, 636.0, 579.0, 630.0, 630.0, 630.0, 630.0, 582.0, 630.0, 576.0, 636.0, 624.0, 630.0, 639.0, 624.0, 636.0, 576.0, 587.0, 639.0, 630.0, 633.0, 630.0, 593.0, 633.0, 579.0, 630.0, 639.0, 587.0, 582.0, 636.0, 582.0, 633.0, 633.0, 633.0, 587.0, 544.0, 630.0, 587.0, 587.0, 582.0, 633.0, 3.0, 579.0, 582.0, 587.0, 579.0, 627.0, 633.0, 633.0, 582.0, 582.0, 636.0, 567.0, 587.0, 636.0, 630.0, 636.0, 576.0, 636.0, 582.0, 587.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 314.0, 315.0, 321.0, 314.0, 322.0, 319.0, 317.0, 311.0, 322.0, 312.0, 306.0, 294.0, 288.0, 310.0, 320.0, 314.0, 322.0, 314.0, 322.0, 316.0, 317.0, 316.0, 320.0, 317.0, 322.0, 319.0, 311.0, 281.0, 295.0, 293.0, 289.0, 316.0, 317.0, 322.0, 317.0, 293.0, 286.0, 316.0, 320.0, 325.0, 311.0, 319.0, 320.0, 293.0, 286.0, 319.0, 317.0, 319.0, 311.0, 317.0, 316.0, 314.0, 322.0, 291.0, 288.0, 314.0, 316.0, 313.0, 320.0, 265.0, 251.0, 321.0, 312.0, 299.0, 285.0, 319.0, 314.0, 296.0, 294.0, 319.0, 311.0, 319.0, 308.0, 319.0, 314.0, 314.0, 322.0, 285.0, 294.0, 316.0, 314.0, 313.0, 317.0, 316.0, 314.0, 313.0, 317.0, 298.0, 284.0, 314.0, 316.0, 290.0, 286.0, 316.0, 320.0, 314.0, 310.0, 316.0, 314.0, 322.0, 317.0, 310.0, 314.0, 319.0, 317.0, 291.0, 285.0, 293.0, 294.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 316.0, 314.0, 302.0, 291.0, 316.0, 317.0, 299.0, 280.0, 316.0, 314.0, 319.0, 320.0, 296.0, 291.0, 291.0, 291.0, 308.0, 328.0, 293.0, 289.0, 309.0, 324.0, 316.0, 317.0, 319.0, 314.0, 296.0, 291.0, 270.0, 274.0, 306.0, 324.0, 299.0, 288.0, 296.0, 291.0, 298.0, 284.0, 316.0, 317.0, 0.0, 3.0, 287.0, 292.0, 292.0, 290.0, 293.0, 294.0, 299.0, 280.0, 316.0, 311.0, 319.0, 314.0, 322.0, 311.0, 293.0, 289.0, 291.0, 291.0, 319.0, 317.0, 281.0, 286.0, 289.0, 298.0, 316.0, 320.0, 316.0, 314.0, 324.0, 312.0, 280.0, 296.0, 316.0, 320.0, 301.0, 281.0, 290.0, 297.0, 319.0, 317.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8203957464749945, "mean_processing_ms": 0.2325105755388932, "mean_inference_ms": 1.4210101321453532}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9792000, "num_steps_sampled": 5222400, "sample_time_ms": 22766.104, "load_time_ms": 36.963, "grad_time_ms": 10377.417, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0032091455068439245, "policy_loss": -0.004078669007867575, "vf_loss": 78.43866729736328, "vf_explained_var": 0.7684862613677979, "kl": 0.00216904329136014, "entropy": 1.1121129989624023, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5222400, "episodes_total": 13056, "training_iteration": 408, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-16-35", "timestamp": 1660259795, "time_this_iter_s": 28.361918210983276, "time_total_s": 18204.756138324738, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18204.756138324738, "timesteps_since_restore": 5222400, "iterations_since_restore": 408, "perf": {"cpu_util_percent": 30.642500000000002, "ram_util_percent": 58.74749999999999}}
+{"episode_reward_max": 639.0, "episode_reward_min": 3.0, "episode_reward_mean": 606.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 303.12}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 186.64, "shaped_reward_min": 3, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.61, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.4, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.16, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.69, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.15, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.81, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.53, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.76, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.15, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.81, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.15, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.81, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 570.0, 633.0, 633.0, 584.0, 621.0, 636.0, 584.0, 630.0, 576.0, 587.0, 630.0, 630.0, 630.0, 636.0, 587.0, 582.0, 630.0, 639.0, 633.0, 633.0, 465.0, 636.0, 587.0, 630.0, 630.0, 587.0, 636.0, 630.0, 630.0, 636.0, 587.0, 582.0, 636.0, 582.0, 633.0, 633.0, 633.0, 587.0, 544.0, 630.0, 587.0, 587.0, 582.0, 633.0, 3.0, 579.0, 582.0, 587.0, 579.0, 627.0, 633.0, 633.0, 582.0, 582.0, 636.0, 567.0, 587.0, 636.0, 630.0, 636.0, 576.0, 636.0, 582.0, 587.0, 636.0, 636.0, 636.0, 636.0, 636.0, 636.0, 633.0, 618.0, 582.0, 630.0, 636.0, 636.0, 633.0, 636.0, 639.0, 630.0, 576.0, 582.0, 633.0, 639.0, 579.0, 636.0, 636.0, 639.0, 579.0, 636.0, 630.0, 633.0, 636.0, 579.0, 630.0, 633.0, 516.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 322.0, 305.0, 325.0, 279.0, 291.0, 319.0, 314.0, 316.0, 317.0, 287.0, 297.0, 311.0, 310.0, 321.0, 315.0, 299.0, 285.0, 314.0, 316.0, 282.0, 294.0, 296.0, 291.0, 318.0, 312.0, 316.0, 314.0, 314.0, 316.0, 316.0, 320.0, 299.0, 288.0, 294.0, 288.0, 313.0, 317.0, 319.0, 320.0, 314.0, 319.0, 317.0, 316.0, 239.0, 226.0, 319.0, 317.0, 301.0, 286.0, 321.0, 309.0, 316.0, 314.0, 293.0, 294.0, 317.0, 319.0, 313.0, 317.0, 311.0, 319.0, 321.0, 315.0, 296.0, 291.0, 291.0, 291.0, 308.0, 328.0, 293.0, 289.0, 309.0, 324.0, 316.0, 317.0, 319.0, 314.0, 296.0, 291.0, 270.0, 274.0, 306.0, 324.0, 299.0, 288.0, 296.0, 291.0, 298.0, 284.0, 316.0, 317.0, 0.0, 3.0, 287.0, 292.0, 292.0, 290.0, 293.0, 294.0, 299.0, 280.0, 316.0, 311.0, 319.0, 314.0, 322.0, 311.0, 293.0, 289.0, 291.0, 291.0, 319.0, 317.0, 281.0, 286.0, 289.0, 298.0, 316.0, 320.0, 316.0, 314.0, 324.0, 312.0, 280.0, 296.0, 316.0, 320.0, 301.0, 281.0, 290.0, 297.0, 319.0, 317.0, 319.0, 317.0, 322.0, 314.0, 315.0, 321.0, 314.0, 322.0, 319.0, 317.0, 311.0, 322.0, 312.0, 306.0, 294.0, 288.0, 310.0, 320.0, 314.0, 322.0, 314.0, 322.0, 316.0, 317.0, 316.0, 320.0, 317.0, 322.0, 319.0, 311.0, 281.0, 295.0, 293.0, 289.0, 316.0, 317.0, 322.0, 317.0, 293.0, 286.0, 316.0, 320.0, 325.0, 311.0, 319.0, 320.0, 293.0, 286.0, 319.0, 317.0, 319.0, 311.0, 317.0, 316.0, 314.0, 322.0, 291.0, 288.0, 314.0, 316.0, 313.0, 320.0, 265.0, 251.0, 321.0, 312.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8194928769561236, "mean_processing_ms": 0.23232969366347853, "mean_inference_ms": 1.4199784153489992}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9816000, "num_steps_sampled": 5235200, "sample_time_ms": 22579.389, "load_time_ms": 36.726, "grad_time_ms": 10063.447, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015366753796115518, "policy_loss": -0.006077593192458153, "vf_loss": 81.69181060791016, "vf_explained_var": 0.7707114219665527, "kl": 0.001978269312530756, "entropy": 1.1098326444625854, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5235200, "episodes_total": 13088, "training_iteration": 409, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-17-04", "timestamp": 1660259824, "time_this_iter_s": 29.396647930145264, "time_total_s": 18234.152786254883, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18234.152786254883, "timesteps_since_restore": 5235200, "iterations_since_restore": 409, "perf": {"cpu_util_percent": 30.553658536585367, "ram_util_percent": 58.824390243902435}}
+{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 616.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.15}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 189.1, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.64, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.48, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.97, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.03, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.53, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.68, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.66, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.03, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.03, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 624.0, 636.0, 630.0, 627.0, 633.0, 587.0, 633.0, 639.0, 581.0, 630.0, 570.0, 636.0, 636.0, 587.0, 633.0, 627.0, 587.0, 633.0, 627.0, 582.0, 636.0, 633.0, 627.0, 587.0, 572.0, 582.0, 630.0, 630.0, 639.0, 636.0, 636.0, 582.0, 587.0, 636.0, 636.0, 636.0, 636.0, 636.0, 636.0, 633.0, 618.0, 582.0, 630.0, 636.0, 636.0, 633.0, 636.0, 639.0, 630.0, 576.0, 582.0, 633.0, 639.0, 579.0, 636.0, 636.0, 639.0, 579.0, 636.0, 630.0, 633.0, 636.0, 579.0, 630.0, 633.0, 516.0, 633.0, 633.0, 630.0, 570.0, 633.0, 633.0, 584.0, 621.0, 636.0, 584.0, 630.0, 576.0, 587.0, 630.0, 630.0, 630.0, 636.0, 587.0, 582.0, 630.0, 639.0, 633.0, 633.0, 465.0, 636.0, 587.0, 630.0, 630.0, 587.0, 636.0, 630.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 294.0, 310.0, 314.0, 321.0, 315.0, 316.0, 314.0, 305.0, 322.0, 322.0, 311.0, 298.0, 289.0, 316.0, 317.0, 312.0, 327.0, 287.0, 294.0, 316.0, 314.0, 299.0, 271.0, 312.0, 324.0, 314.0, 322.0, 301.0, 286.0, 321.0, 312.0, 316.0, 311.0, 293.0, 294.0, 313.0, 320.0, 315.0, 312.0, 286.0, 296.0, 317.0, 319.0, 319.0, 314.0, 310.0, 317.0, 293.0, 294.0, 284.0, 288.0, 287.0, 295.0, 316.0, 314.0, 319.0, 311.0, 314.0, 325.0, 319.0, 317.0, 317.0, 319.0, 301.0, 281.0, 290.0, 297.0, 319.0, 317.0, 319.0, 317.0, 322.0, 314.0, 315.0, 321.0, 314.0, 322.0, 319.0, 317.0, 311.0, 322.0, 312.0, 306.0, 294.0, 288.0, 310.0, 320.0, 314.0, 322.0, 314.0, 322.0, 316.0, 317.0, 316.0, 320.0, 317.0, 322.0, 319.0, 311.0, 281.0, 295.0, 293.0, 289.0, 316.0, 317.0, 322.0, 317.0, 293.0, 286.0, 316.0, 320.0, 325.0, 311.0, 319.0, 320.0, 293.0, 286.0, 319.0, 317.0, 319.0, 311.0, 317.0, 316.0, 314.0, 322.0, 291.0, 288.0, 314.0, 316.0, 313.0, 320.0, 265.0, 251.0, 321.0, 312.0, 311.0, 322.0, 305.0, 325.0, 279.0, 291.0, 319.0, 314.0, 316.0, 317.0, 287.0, 297.0, 311.0, 310.0, 321.0, 315.0, 299.0, 285.0, 314.0, 316.0, 282.0, 294.0, 296.0, 291.0, 318.0, 312.0, 316.0, 314.0, 314.0, 316.0, 316.0, 320.0, 299.0, 288.0, 294.0, 288.0, 313.0, 317.0, 319.0, 320.0, 314.0, 319.0, 317.0, 316.0, 239.0, 226.0, 319.0, 317.0, 301.0, 286.0, 321.0, 309.0, 316.0, 314.0, 293.0, 294.0, 317.0, 319.0, 313.0, 317.0, 311.0, 319.0, 321.0, 315.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8185939281976542, "mean_processing_ms": 0.23214957034967199, "mean_inference_ms": 1.4189514044158715}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9840000, "num_steps_sampled": 5248000, "sample_time_ms": 22039.582, "load_time_ms": 37.045, "grad_time_ms": 9893.172, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0014178849523887038, "policy_loss": -0.008465434424579144, "vf_loss": 76.02017974853516, "vf_explained_var": 0.7725793719291687, "kl": 0.0019942354410886765, "entropy": 1.1089389324188232, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5248000, "episodes_total": 13120, "training_iteration": 410, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-17-33", "timestamp": 1660259853, "time_this_iter_s": 28.213119983673096, "time_total_s": 18262.365906238556, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18262.365906238556, "timesteps_since_restore": 5248000, "iterations_since_restore": 410, "perf": {"cpu_util_percent": 35.04, "ram_util_percent": 58.745000000000005}}
+{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 615.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 307.625}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.45, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.63, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.79, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.29, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.07, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.11, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.55, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.71, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.84, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.77, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.11, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.11, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 633.0, 576.0, 630.0, 587.0, 630.0, 639.0, 627.0, 636.0, 627.0, 630.0, 630.0, 521.0, 636.0, 576.0, 633.0, 633.0, 636.0, 584.0, 624.0, 633.0, 630.0, 633.0, 584.0, 633.0, 636.0, 633.0, 633.0, 636.0, 636.0, 630.0, 579.0, 630.0, 633.0, 516.0, 633.0, 633.0, 630.0, 570.0, 633.0, 633.0, 584.0, 621.0, 636.0, 584.0, 630.0, 576.0, 587.0, 630.0, 630.0, 630.0, 636.0, 587.0, 582.0, 630.0, 639.0, 633.0, 633.0, 465.0, 636.0, 587.0, 630.0, 630.0, 587.0, 636.0, 630.0, 630.0, 636.0, 587.0, 624.0, 636.0, 630.0, 627.0, 633.0, 587.0, 633.0, 639.0, 581.0, 630.0, 570.0, 636.0, 636.0, 587.0, 633.0, 627.0, 587.0, 633.0, 627.0, 582.0, 636.0, 633.0, 627.0, 587.0, 572.0, 582.0, 630.0, 630.0, 639.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 314.0, 319.0, 286.0, 290.0, 319.0, 311.0, 293.0, 294.0, 316.0, 314.0, 319.0, 320.0, 310.0, 317.0, 316.0, 320.0, 313.0, 314.0, 324.0, 306.0, 320.0, 310.0, 254.0, 267.0, 319.0, 317.0, 295.0, 281.0, 319.0, 314.0, 316.0, 317.0, 319.0, 317.0, 291.0, 293.0, 316.0, 308.0, 316.0, 317.0, 311.0, 319.0, 313.0, 320.0, 293.0, 291.0, 317.0, 316.0, 314.0, 322.0, 314.0, 319.0, 326.0, 307.0, 321.0, 315.0, 319.0, 317.0, 319.0, 311.0, 290.0, 289.0, 314.0, 316.0, 313.0, 320.0, 265.0, 251.0, 321.0, 312.0, 311.0, 322.0, 305.0, 325.0, 279.0, 291.0, 319.0, 314.0, 316.0, 317.0, 287.0, 297.0, 311.0, 310.0, 321.0, 315.0, 299.0, 285.0, 314.0, 316.0, 282.0, 294.0, 296.0, 291.0, 318.0, 312.0, 316.0, 314.0, 314.0, 316.0, 316.0, 320.0, 299.0, 288.0, 294.0, 288.0, 313.0, 317.0, 319.0, 320.0, 314.0, 319.0, 317.0, 316.0, 239.0, 226.0, 319.0, 317.0, 301.0, 286.0, 321.0, 309.0, 316.0, 314.0, 293.0, 294.0, 317.0, 319.0, 313.0, 317.0, 311.0, 319.0, 321.0, 315.0, 293.0, 294.0, 310.0, 314.0, 321.0, 315.0, 316.0, 314.0, 305.0, 322.0, 322.0, 311.0, 298.0, 289.0, 316.0, 317.0, 312.0, 327.0, 287.0, 294.0, 316.0, 314.0, 299.0, 271.0, 312.0, 324.0, 314.0, 322.0, 301.0, 286.0, 321.0, 312.0, 316.0, 311.0, 293.0, 294.0, 313.0, 320.0, 315.0, 312.0, 286.0, 296.0, 317.0, 319.0, 319.0, 314.0, 310.0, 317.0, 293.0, 294.0, 284.0, 288.0, 287.0, 295.0, 316.0, 314.0, 319.0, 311.0, 314.0, 325.0, 319.0, 317.0, 317.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8176992911646718, "mean_processing_ms": 0.23196962818643072, "mean_inference_ms": 1.417972483148229}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9864000, "num_steps_sampled": 5260800, "sample_time_ms": 21770.07, "load_time_ms": 36.824, "grad_time_ms": 9519.356, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001577894203364849, "policy_loss": -0.005276820156723261, "vf_loss": 74.05913543701172, "vf_explained_var": 0.7708218693733215, "kl": 0.002156370086595416, "entropy": 1.1023942232131958, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5260800, "episodes_total": 13152, "training_iteration": 411, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-18-03", "timestamp": 1660259883, "time_this_iter_s": 30.32603693008423, "time_total_s": 18292.69194316864, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18292.69194316864, "timesteps_since_restore": 5260800, "iterations_since_restore": 411, "perf": {"cpu_util_percent": 34.49999999999999, "ram_util_percent": 58.81162790697674}}
+{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 608.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 304.465}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.93, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.53, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.57, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.15, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.81, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.04, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.92, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.95, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.83, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.69, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.04, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.92, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.04, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.92, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 630.0, 582.0, 633.0, 633.0, 639.0, 584.0, 639.0, 197.0, 579.0, 465.0, 579.0, 636.0, 630.0, 639.0, 630.0, 630.0, 413.0, 582.0, 630.0, 576.0, 633.0, 639.0, 582.0, 633.0, 630.0, 630.0, 584.0, 587.0, 522.0, 636.0, 630.0, 636.0, 630.0, 630.0, 636.0, 587.0, 624.0, 636.0, 630.0, 627.0, 633.0, 587.0, 633.0, 639.0, 581.0, 630.0, 570.0, 636.0, 636.0, 587.0, 633.0, 627.0, 587.0, 633.0, 627.0, 582.0, 636.0, 633.0, 627.0, 587.0, 572.0, 582.0, 630.0, 630.0, 639.0, 636.0, 636.0, 582.0, 633.0, 576.0, 630.0, 587.0, 630.0, 639.0, 627.0, 636.0, 627.0, 630.0, 630.0, 521.0, 636.0, 576.0, 633.0, 633.0, 636.0, 584.0, 624.0, 633.0, 630.0, 633.0, 584.0, 633.0, 636.0, 633.0, 633.0, 636.0, 636.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 316.0, 311.0, 319.0, 291.0, 291.0, 314.0, 319.0, 311.0, 322.0, 317.0, 322.0, 293.0, 291.0, 316.0, 323.0, 100.0, 97.0, 294.0, 285.0, 229.0, 236.0, 288.0, 291.0, 321.0, 315.0, 311.0, 319.0, 317.0, 322.0, 319.0, 311.0, 311.0, 319.0, 207.0, 206.0, 293.0, 289.0, 311.0, 319.0, 285.0, 291.0, 311.0, 322.0, 319.0, 320.0, 296.0, 286.0, 318.0, 315.0, 321.0, 309.0, 316.0, 314.0, 287.0, 297.0, 296.0, 291.0, 262.0, 260.0, 319.0, 317.0, 313.0, 317.0, 317.0, 319.0, 313.0, 317.0, 311.0, 319.0, 321.0, 315.0, 293.0, 294.0, 310.0, 314.0, 321.0, 315.0, 316.0, 314.0, 305.0, 322.0, 322.0, 311.0, 298.0, 289.0, 316.0, 317.0, 312.0, 327.0, 287.0, 294.0, 316.0, 314.0, 299.0, 271.0, 312.0, 324.0, 314.0, 322.0, 301.0, 286.0, 321.0, 312.0, 316.0, 311.0, 293.0, 294.0, 313.0, 320.0, 315.0, 312.0, 286.0, 296.0, 317.0, 319.0, 319.0, 314.0, 310.0, 317.0, 293.0, 294.0, 284.0, 288.0, 287.0, 295.0, 316.0, 314.0, 319.0, 311.0, 314.0, 325.0, 319.0, 317.0, 317.0, 319.0, 294.0, 288.0, 314.0, 319.0, 286.0, 290.0, 319.0, 311.0, 293.0, 294.0, 316.0, 314.0, 319.0, 320.0, 310.0, 317.0, 316.0, 320.0, 313.0, 314.0, 324.0, 306.0, 320.0, 310.0, 254.0, 267.0, 319.0, 317.0, 295.0, 281.0, 319.0, 314.0, 316.0, 317.0, 319.0, 317.0, 291.0, 293.0, 316.0, 308.0, 316.0, 317.0, 311.0, 319.0, 313.0, 320.0, 293.0, 291.0, 317.0, 316.0, 314.0, 322.0, 314.0, 319.0, 326.0, 307.0, 321.0, 315.0, 319.0, 317.0, 319.0, 311.0, 290.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8168045018063097, "mean_processing_ms": 0.23178953609537822, "mean_inference_ms": 1.4169011715931346}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9888000, "num_steps_sampled": 5273600, "sample_time_ms": 21329.9, "load_time_ms": 36.743, "grad_time_ms": 9279.775, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0034279574174433947, "policy_loss": -0.004527573008090258, "vf_loss": 85.10655975341797, "vf_explained_var": 0.7758853435516357, "kl": 0.0018181651830673218, "entropy": 1.1102546453475952, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5273600, "episodes_total": 13184, "training_iteration": 412, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-18-31", "timestamp": 1660259911, "time_this_iter_s": 28.26536202430725, "time_total_s": 18320.957305192947, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18320.957305192947, "timesteps_since_restore": 5273600, "iterations_since_restore": 412, "perf": {"cpu_util_percent": 35.269999999999996, "ram_util_percent": 59.315}}
+{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 608.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 304.08}, "custom_metrics": {"sparse_reward_mean": 210.6, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.96, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.73, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.35, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.28, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.7, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.7, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.7, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 636.0, 587.0, 621.0, 630.0, 582.0, 636.0, 587.0, 633.0, 630.0, 633.0, 627.0, 587.0, 633.0, 627.0, 630.0, 633.0, 587.0, 587.0, 633.0, 587.0, 587.0, 587.0, 636.0, 579.0, 627.0, 633.0, 582.0, 630.0, 630.0, 590.0, 633.0, 630.0, 639.0, 636.0, 636.0, 582.0, 633.0, 576.0, 630.0, 587.0, 630.0, 639.0, 627.0, 636.0, 627.0, 630.0, 630.0, 521.0, 636.0, 576.0, 633.0, 633.0, 636.0, 584.0, 624.0, 633.0, 630.0, 633.0, 584.0, 633.0, 636.0, 633.0, 633.0, 636.0, 636.0, 630.0, 579.0, 630.0, 630.0, 582.0, 633.0, 633.0, 639.0, 584.0, 639.0, 197.0, 579.0, 465.0, 579.0, 636.0, 630.0, 639.0, 630.0, 630.0, 413.0, 582.0, 630.0, 576.0, 633.0, 639.0, 582.0, 633.0, 630.0, 630.0, 584.0, 587.0, 522.0, 636.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 319.0, 317.0, 298.0, 289.0, 317.0, 304.0, 317.0, 313.0, 286.0, 296.0, 319.0, 317.0, 288.0, 299.0, 320.0, 313.0, 316.0, 314.0, 316.0, 317.0, 322.0, 305.0, 285.0, 302.0, 316.0, 317.0, 314.0, 313.0, 314.0, 316.0, 317.0, 316.0, 290.0, 297.0, 299.0, 288.0, 316.0, 317.0, 288.0, 299.0, 288.0, 299.0, 288.0, 299.0, 322.0, 314.0, 284.0, 295.0, 316.0, 311.0, 317.0, 316.0, 285.0, 297.0, 313.0, 317.0, 311.0, 319.0, 299.0, 291.0, 318.0, 315.0, 319.0, 311.0, 314.0, 325.0, 319.0, 317.0, 317.0, 319.0, 294.0, 288.0, 314.0, 319.0, 286.0, 290.0, 319.0, 311.0, 293.0, 294.0, 316.0, 314.0, 319.0, 320.0, 310.0, 317.0, 316.0, 320.0, 313.0, 314.0, 324.0, 306.0, 320.0, 310.0, 254.0, 267.0, 319.0, 317.0, 295.0, 281.0, 319.0, 314.0, 316.0, 317.0, 319.0, 317.0, 291.0, 293.0, 316.0, 308.0, 316.0, 317.0, 311.0, 319.0, 313.0, 320.0, 293.0, 291.0, 317.0, 316.0, 314.0, 322.0, 314.0, 319.0, 326.0, 307.0, 321.0, 315.0, 319.0, 317.0, 319.0, 311.0, 290.0, 289.0, 314.0, 316.0, 311.0, 319.0, 291.0, 291.0, 314.0, 319.0, 311.0, 322.0, 317.0, 322.0, 293.0, 291.0, 316.0, 323.0, 100.0, 97.0, 294.0, 285.0, 229.0, 236.0, 288.0, 291.0, 321.0, 315.0, 311.0, 319.0, 317.0, 322.0, 319.0, 311.0, 311.0, 319.0, 207.0, 206.0, 293.0, 289.0, 311.0, 319.0, 285.0, 291.0, 311.0, 322.0, 319.0, 320.0, 296.0, 286.0, 318.0, 315.0, 321.0, 309.0, 316.0, 314.0, 287.0, 297.0, 296.0, 291.0, 262.0, 260.0, 319.0, 317.0, 313.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8159183949251133, "mean_processing_ms": 0.23161113375748543, "mean_inference_ms": 1.4159039621746354}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9912000, "num_steps_sampled": 5286400, "sample_time_ms": 21002.373, "load_time_ms": 36.818, "grad_time_ms": 9265.226, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002250772900879383, "policy_loss": -0.005318752024322748, "vf_loss": 81.240478515625, "vf_explained_var": 0.7617523074150085, "kl": 0.0018393909558653831, "entropy": 1.1090354919433594, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5286400, "episodes_total": 13216, "training_iteration": 413, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-19-03", "timestamp": 1660259943, "time_this_iter_s": 31.600411891937256, "time_total_s": 18352.557717084885, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18352.557717084885, "timesteps_since_restore": 5286400, "iterations_since_restore": 413, "perf": {"cpu_util_percent": 31.806666666666665, "ram_util_percent": 58.973333333333315}}
+{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 608.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 304.405}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 187.21, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.78, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.38, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.2, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.7, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.56, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.13, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.2, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.7, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.2, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.7, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 633.0, 639.0, 530.0, 630.0, 582.0, 582.0, 624.0, 582.0, 630.0, 630.0, 630.0, 630.0, 587.0, 636.0, 639.0, 636.0, 627.0, 633.0, 627.0, 633.0, 636.0, 630.0, 630.0, 630.0, 630.0, 636.0, 639.0, 582.0, 636.0, 639.0, 636.0, 636.0, 636.0, 630.0, 579.0, 630.0, 630.0, 582.0, 633.0, 633.0, 639.0, 584.0, 639.0, 197.0, 579.0, 465.0, 579.0, 636.0, 630.0, 639.0, 630.0, 630.0, 413.0, 582.0, 630.0, 576.0, 633.0, 639.0, 582.0, 633.0, 630.0, 630.0, 584.0, 587.0, 522.0, 636.0, 630.0, 627.0, 636.0, 587.0, 621.0, 630.0, 582.0, 636.0, 587.0, 633.0, 630.0, 633.0, 627.0, 587.0, 633.0, 627.0, 630.0, 633.0, 587.0, 587.0, 633.0, 587.0, 587.0, 587.0, 636.0, 579.0, 627.0, 633.0, 582.0, 630.0, 630.0, 590.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 316.0, 314.0, 319.0, 324.0, 315.0, 260.0, 270.0, 313.0, 317.0, 290.0, 292.0, 286.0, 296.0, 313.0, 311.0, 291.0, 291.0, 316.0, 314.0, 316.0, 314.0, 314.0, 316.0, 310.0, 320.0, 288.0, 299.0, 319.0, 317.0, 319.0, 320.0, 314.0, 322.0, 307.0, 320.0, 316.0, 317.0, 311.0, 316.0, 316.0, 317.0, 314.0, 322.0, 311.0, 319.0, 313.0, 317.0, 316.0, 314.0, 316.0, 314.0, 314.0, 322.0, 319.0, 320.0, 290.0, 292.0, 319.0, 317.0, 322.0, 317.0, 321.0, 315.0, 321.0, 315.0, 319.0, 317.0, 319.0, 311.0, 290.0, 289.0, 314.0, 316.0, 311.0, 319.0, 291.0, 291.0, 314.0, 319.0, 311.0, 322.0, 317.0, 322.0, 293.0, 291.0, 316.0, 323.0, 100.0, 97.0, 294.0, 285.0, 229.0, 236.0, 288.0, 291.0, 321.0, 315.0, 311.0, 319.0, 317.0, 322.0, 319.0, 311.0, 311.0, 319.0, 207.0, 206.0, 293.0, 289.0, 311.0, 319.0, 285.0, 291.0, 311.0, 322.0, 319.0, 320.0, 296.0, 286.0, 318.0, 315.0, 321.0, 309.0, 316.0, 314.0, 287.0, 297.0, 296.0, 291.0, 262.0, 260.0, 319.0, 317.0, 313.0, 317.0, 313.0, 314.0, 319.0, 317.0, 298.0, 289.0, 317.0, 304.0, 317.0, 313.0, 286.0, 296.0, 319.0, 317.0, 288.0, 299.0, 320.0, 313.0, 316.0, 314.0, 316.0, 317.0, 322.0, 305.0, 285.0, 302.0, 316.0, 317.0, 314.0, 313.0, 314.0, 316.0, 317.0, 316.0, 290.0, 297.0, 299.0, 288.0, 316.0, 317.0, 288.0, 299.0, 288.0, 299.0, 288.0, 299.0, 322.0, 314.0, 284.0, 295.0, 316.0, 311.0, 317.0, 316.0, 285.0, 297.0, 313.0, 317.0, 311.0, 319.0, 299.0, 291.0, 318.0, 315.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8150462375742643, "mean_processing_ms": 0.23143722383890097, "mean_inference_ms": 1.4151601741062898}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9936000, "num_steps_sampled": 5299200, "sample_time_ms": 21315.468, "load_time_ms": 36.748, "grad_time_ms": 9192.863, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0032192638609558344, "policy_loss": -0.004382268991321325, "vf_loss": 81.57144927978516, "vf_explained_var": 0.7626829147338867, "kl": 0.001976991770789027, "entropy": 1.1112231016159058, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5299200, "episodes_total": 13248, "training_iteration": 414, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-19-39", "timestamp": 1660259979, "time_this_iter_s": 36.33256697654724, "time_total_s": 18388.890284061432, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18388.890284061432, "timesteps_since_restore": 5299200, "iterations_since_restore": 414, "perf": {"cpu_util_percent": 31.756862745098033, "ram_util_percent": 59.57450980392157}}
+{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 614.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 307.4}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.4, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.55, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.79, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.35, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.43, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.33, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.82, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.69, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.1, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.95, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.79, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.33, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.82, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.33, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.82, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 633.0, 570.0, 633.0, 582.0, 636.0, 630.0, 639.0, 639.0, 584.0, 570.0, 636.0, 627.0, 627.0, 630.0, 630.0, 573.0, 630.0, 582.0, 587.0, 633.0, 582.0, 633.0, 633.0, 630.0, 525.0, 582.0, 633.0, 627.0, 633.0, 627.0, 561.0, 587.0, 522.0, 636.0, 630.0, 627.0, 636.0, 587.0, 621.0, 630.0, 582.0, 636.0, 587.0, 633.0, 630.0, 633.0, 627.0, 587.0, 633.0, 627.0, 630.0, 633.0, 587.0, 587.0, 633.0, 587.0, 587.0, 587.0, 636.0, 579.0, 627.0, 633.0, 582.0, 630.0, 630.0, 590.0, 633.0, 627.0, 633.0, 639.0, 530.0, 630.0, 582.0, 582.0, 624.0, 582.0, 630.0, 630.0, 630.0, 630.0, 587.0, 636.0, 639.0, 636.0, 627.0, 633.0, 627.0, 633.0, 636.0, 630.0, 630.0, 630.0, 630.0, 636.0, 639.0, 582.0, 636.0, 639.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 317.0, 316.0, 284.0, 286.0, 316.0, 317.0, 291.0, 291.0, 314.0, 322.0, 316.0, 314.0, 324.0, 315.0, 319.0, 320.0, 287.0, 297.0, 287.0, 283.0, 321.0, 315.0, 313.0, 314.0, 316.0, 311.0, 319.0, 311.0, 311.0, 319.0, 285.0, 288.0, 318.0, 312.0, 286.0, 296.0, 291.0, 296.0, 314.0, 319.0, 280.0, 302.0, 314.0, 319.0, 316.0, 317.0, 319.0, 311.0, 265.0, 260.0, 293.0, 289.0, 314.0, 319.0, 310.0, 317.0, 314.0, 319.0, 318.0, 309.0, 285.0, 276.0, 296.0, 291.0, 262.0, 260.0, 319.0, 317.0, 313.0, 317.0, 313.0, 314.0, 319.0, 317.0, 298.0, 289.0, 317.0, 304.0, 317.0, 313.0, 286.0, 296.0, 319.0, 317.0, 288.0, 299.0, 320.0, 313.0, 316.0, 314.0, 316.0, 317.0, 322.0, 305.0, 285.0, 302.0, 316.0, 317.0, 314.0, 313.0, 314.0, 316.0, 317.0, 316.0, 290.0, 297.0, 299.0, 288.0, 316.0, 317.0, 288.0, 299.0, 288.0, 299.0, 288.0, 299.0, 322.0, 314.0, 284.0, 295.0, 316.0, 311.0, 317.0, 316.0, 285.0, 297.0, 313.0, 317.0, 311.0, 319.0, 299.0, 291.0, 318.0, 315.0, 311.0, 316.0, 314.0, 319.0, 324.0, 315.0, 260.0, 270.0, 313.0, 317.0, 290.0, 292.0, 286.0, 296.0, 313.0, 311.0, 291.0, 291.0, 316.0, 314.0, 316.0, 314.0, 314.0, 316.0, 310.0, 320.0, 288.0, 299.0, 319.0, 317.0, 319.0, 320.0, 314.0, 322.0, 307.0, 320.0, 316.0, 317.0, 311.0, 316.0, 316.0, 317.0, 314.0, 322.0, 311.0, 319.0, 313.0, 317.0, 316.0, 314.0, 316.0, 314.0, 314.0, 322.0, 319.0, 320.0, 290.0, 292.0, 319.0, 317.0, 322.0, 317.0, 321.0, 315.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8141797334624801, "mean_processing_ms": 0.23126474228719665, "mean_inference_ms": 1.4144802295158576}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9960000, "num_steps_sampled": 5312000, "sample_time_ms": 21138.109, "load_time_ms": 36.708, "grad_time_ms": 9062.52, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00037816105759702623, "policy_loss": -0.006607938092201948, "vf_loss": 75.41075897216797, "vf_explained_var": 0.7763264775276184, "kl": 0.0018363663693889976, "entropy": 1.1099668741226196, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5312000, "episodes_total": 13280, "training_iteration": 415, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-20-09", "timestamp": 1660260009, "time_this_iter_s": 29.82709288597107, "time_total_s": 18418.717376947403, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18418.717376947403, "timesteps_since_restore": 5312000, "iterations_since_restore": 415, "perf": {"cpu_util_percent": 32.99761904761905, "ram_util_percent": 59.095238095238095}}
+{"episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 615.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 307.985}, "custom_metrics": {"sparse_reward_mean": 213.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.37, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.97, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.66, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.96, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.99, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.94, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.99, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.99, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 552.0, 630.0, 633.0, 579.0, 636.0, 587.0, 579.0, 630.0, 630.0, 633.0, 579.0, 630.0, 636.0, 636.0, 633.0, 582.0, 630.0, 636.0, 636.0, 627.0, 582.0, 633.0, 633.0, 587.0, 636.0, 584.0, 579.0, 630.0, 582.0, 627.0, 633.0, 630.0, 630.0, 590.0, 633.0, 627.0, 633.0, 639.0, 530.0, 630.0, 582.0, 582.0, 624.0, 582.0, 630.0, 630.0, 630.0, 630.0, 587.0, 636.0, 639.0, 636.0, 627.0, 633.0, 627.0, 633.0, 636.0, 630.0, 630.0, 630.0, 630.0, 636.0, 639.0, 582.0, 636.0, 639.0, 636.0, 630.0, 633.0, 570.0, 633.0, 582.0, 636.0, 630.0, 639.0, 639.0, 584.0, 570.0, 636.0, 627.0, 627.0, 630.0, 630.0, 573.0, 630.0, 582.0, 587.0, 633.0, 582.0, 633.0, 633.0, 630.0, 525.0, 582.0, 633.0, 627.0, 633.0, 627.0, 561.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 314.0, 281.0, 271.0, 311.0, 319.0, 322.0, 311.0, 288.0, 291.0, 319.0, 317.0, 296.0, 291.0, 279.0, 300.0, 313.0, 317.0, 316.0, 314.0, 319.0, 314.0, 282.0, 297.0, 311.0, 319.0, 319.0, 317.0, 319.0, 317.0, 313.0, 320.0, 296.0, 286.0, 319.0, 311.0, 314.0, 322.0, 314.0, 322.0, 306.0, 321.0, 289.0, 293.0, 311.0, 322.0, 319.0, 314.0, 285.0, 302.0, 319.0, 317.0, 290.0, 294.0, 288.0, 291.0, 311.0, 319.0, 295.0, 287.0, 308.0, 319.0, 316.0, 317.0, 313.0, 317.0, 311.0, 319.0, 299.0, 291.0, 318.0, 315.0, 311.0, 316.0, 314.0, 319.0, 324.0, 315.0, 260.0, 270.0, 313.0, 317.0, 290.0, 292.0, 286.0, 296.0, 313.0, 311.0, 291.0, 291.0, 316.0, 314.0, 316.0, 314.0, 314.0, 316.0, 310.0, 320.0, 288.0, 299.0, 319.0, 317.0, 319.0, 320.0, 314.0, 322.0, 307.0, 320.0, 316.0, 317.0, 311.0, 316.0, 316.0, 317.0, 314.0, 322.0, 311.0, 319.0, 313.0, 317.0, 316.0, 314.0, 316.0, 314.0, 314.0, 322.0, 319.0, 320.0, 290.0, 292.0, 319.0, 317.0, 322.0, 317.0, 321.0, 315.0, 316.0, 314.0, 317.0, 316.0, 284.0, 286.0, 316.0, 317.0, 291.0, 291.0, 314.0, 322.0, 316.0, 314.0, 324.0, 315.0, 319.0, 320.0, 287.0, 297.0, 287.0, 283.0, 321.0, 315.0, 313.0, 314.0, 316.0, 311.0, 319.0, 311.0, 311.0, 319.0, 285.0, 288.0, 318.0, 312.0, 286.0, 296.0, 291.0, 296.0, 314.0, 319.0, 280.0, 302.0, 314.0, 319.0, 316.0, 317.0, 319.0, 311.0, 265.0, 260.0, 293.0, 289.0, 314.0, 319.0, 310.0, 317.0, 314.0, 319.0, 318.0, 309.0, 285.0, 276.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8133221505395263, "mean_processing_ms": 0.23109594102314795, "mean_inference_ms": 1.413916507245515}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9984000, "num_steps_sampled": 5324800, "sample_time_ms": 21409.91, "load_time_ms": 36.506, "grad_time_ms": 8913.404, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003510029288008809, "policy_loss": -0.0037950894329696894, "vf_loss": 78.6290054321289, "vf_explained_var": 0.7686605453491211, "kl": 0.0018828777829185128, "entropy": 1.1155847311019897, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5324800, "episodes_total": 13312, "training_iteration": 416, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-20-41", "timestamp": 1660260041, "time_this_iter_s": 31.975250005722046, "time_total_s": 18450.692626953125, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18450.692626953125, "timesteps_since_restore": 5324800, "iterations_since_restore": 416, "perf": {"cpu_util_percent": 31.96888888888889, "ram_util_percent": 59.13111111111111}}
+{"episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 613.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 306.745}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.49, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.1, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.4, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.76, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.81, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.46, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.33, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.86, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.57, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.33, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.86, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.33, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.86, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 636.0, 636.0, 636.0, 630.0, 633.0, 630.0, 630.0, 627.0, 584.0, 633.0, 630.0, 573.0, 636.0, 582.0, 579.0, 633.0, 630.0, 636.0, 633.0, 573.0, 587.0, 579.0, 627.0, 630.0, 564.0, 636.0, 582.0, 636.0, 633.0, 579.0, 633.0, 582.0, 636.0, 639.0, 636.0, 630.0, 633.0, 570.0, 633.0, 582.0, 636.0, 630.0, 639.0, 639.0, 584.0, 570.0, 636.0, 627.0, 627.0, 630.0, 630.0, 573.0, 630.0, 582.0, 587.0, 633.0, 582.0, 633.0, 633.0, 630.0, 525.0, 582.0, 633.0, 627.0, 633.0, 627.0, 561.0, 636.0, 552.0, 630.0, 633.0, 579.0, 636.0, 587.0, 579.0, 630.0, 630.0, 633.0, 579.0, 630.0, 636.0, 636.0, 633.0, 582.0, 630.0, 636.0, 636.0, 627.0, 582.0, 633.0, 633.0, 587.0, 636.0, 584.0, 579.0, 630.0, 582.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 289.0, 319.0, 317.0, 319.0, 317.0, 314.0, 322.0, 313.0, 317.0, 311.0, 322.0, 314.0, 316.0, 319.0, 311.0, 313.0, 314.0, 290.0, 294.0, 316.0, 317.0, 314.0, 316.0, 283.0, 290.0, 322.0, 314.0, 297.0, 285.0, 291.0, 288.0, 318.0, 315.0, 314.0, 316.0, 322.0, 314.0, 314.0, 319.0, 296.0, 277.0, 293.0, 294.0, 291.0, 288.0, 305.0, 322.0, 314.0, 316.0, 279.0, 285.0, 314.0, 322.0, 288.0, 294.0, 319.0, 317.0, 321.0, 312.0, 291.0, 288.0, 319.0, 314.0, 290.0, 292.0, 319.0, 317.0, 322.0, 317.0, 321.0, 315.0, 316.0, 314.0, 317.0, 316.0, 284.0, 286.0, 316.0, 317.0, 291.0, 291.0, 314.0, 322.0, 316.0, 314.0, 324.0, 315.0, 319.0, 320.0, 287.0, 297.0, 287.0, 283.0, 321.0, 315.0, 313.0, 314.0, 316.0, 311.0, 319.0, 311.0, 311.0, 319.0, 285.0, 288.0, 318.0, 312.0, 286.0, 296.0, 291.0, 296.0, 314.0, 319.0, 280.0, 302.0, 314.0, 319.0, 316.0, 317.0, 319.0, 311.0, 265.0, 260.0, 293.0, 289.0, 314.0, 319.0, 310.0, 317.0, 314.0, 319.0, 318.0, 309.0, 285.0, 276.0, 322.0, 314.0, 281.0, 271.0, 311.0, 319.0, 322.0, 311.0, 288.0, 291.0, 319.0, 317.0, 296.0, 291.0, 279.0, 300.0, 313.0, 317.0, 316.0, 314.0, 319.0, 314.0, 282.0, 297.0, 311.0, 319.0, 319.0, 317.0, 319.0, 317.0, 313.0, 320.0, 296.0, 286.0, 319.0, 311.0, 314.0, 322.0, 314.0, 322.0, 306.0, 321.0, 289.0, 293.0, 311.0, 322.0, 319.0, 314.0, 285.0, 302.0, 319.0, 317.0, 290.0, 294.0, 288.0, 291.0, 311.0, 319.0, 295.0, 287.0, 308.0, 319.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.812458332690278, "mean_processing_ms": 0.23092571272245643, "mean_inference_ms": 1.413108766021267}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10008000, "num_steps_sampled": 5337600, "sample_time_ms": 21466.731, "load_time_ms": 36.538, "grad_time_ms": 8937.935, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011007506400346756, "policy_loss": -0.005807527806609869, "vf_loss": 74.63658905029297, "vf_explained_var": 0.7761281132698059, "kl": 0.0020840545184910297, "entropy": 1.110751986503601, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5337600, "episodes_total": 13344, "training_iteration": 417, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-21-11", "timestamp": 1660260071, "time_this_iter_s": 30.376654863357544, "time_total_s": 18481.069281816483, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18481.069281816483, "timesteps_since_restore": 5337600, "iterations_since_restore": 417, "perf": {"cpu_util_percent": 32.944186046511625, "ram_util_percent": 59.151162790697676}}
+{"episode_reward_max": 639.0, "episode_reward_min": 536.0, "episode_reward_mean": 613.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 261.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 306.775}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.55, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.0, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.74, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.77, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.31, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.42, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.84, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.55, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.04, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.42, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.84, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.42, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.84, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 630.0, 639.0, 576.0, 630.0, 633.0, 582.0, 587.0, 618.0, 633.0, 582.0, 621.0, 636.0, 587.0, 636.0, 582.0, 636.0, 630.0, 582.0, 633.0, 581.0, 630.0, 636.0, 630.0, 627.0, 633.0, 636.0, 582.0, 627.0, 536.0, 633.0, 627.0, 627.0, 633.0, 627.0, 561.0, 636.0, 552.0, 630.0, 633.0, 579.0, 636.0, 587.0, 579.0, 630.0, 630.0, 633.0, 579.0, 630.0, 636.0, 636.0, 633.0, 582.0, 630.0, 636.0, 636.0, 627.0, 582.0, 633.0, 633.0, 587.0, 636.0, 584.0, 579.0, 630.0, 582.0, 627.0, 633.0, 567.0, 636.0, 636.0, 636.0, 630.0, 633.0, 630.0, 630.0, 627.0, 584.0, 633.0, 630.0, 573.0, 636.0, 582.0, 579.0, 633.0, 630.0, 636.0, 633.0, 573.0, 587.0, 579.0, 627.0, 630.0, 564.0, 636.0, 582.0, 636.0, 633.0, 579.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 294.0, 316.0, 314.0, 319.0, 320.0, 288.0, 288.0, 316.0, 314.0, 316.0, 317.0, 291.0, 291.0, 293.0, 294.0, 308.0, 310.0, 314.0, 319.0, 289.0, 293.0, 318.0, 303.0, 314.0, 322.0, 298.0, 289.0, 324.0, 312.0, 290.0, 292.0, 314.0, 322.0, 313.0, 317.0, 288.0, 294.0, 322.0, 311.0, 289.0, 292.0, 314.0, 316.0, 314.0, 322.0, 314.0, 316.0, 312.0, 315.0, 311.0, 322.0, 314.0, 322.0, 294.0, 288.0, 316.0, 311.0, 275.0, 261.0, 319.0, 314.0, 314.0, 313.0, 310.0, 317.0, 314.0, 319.0, 318.0, 309.0, 285.0, 276.0, 322.0, 314.0, 281.0, 271.0, 311.0, 319.0, 322.0, 311.0, 288.0, 291.0, 319.0, 317.0, 296.0, 291.0, 279.0, 300.0, 313.0, 317.0, 316.0, 314.0, 319.0, 314.0, 282.0, 297.0, 311.0, 319.0, 319.0, 317.0, 319.0, 317.0, 313.0, 320.0, 296.0, 286.0, 319.0, 311.0, 314.0, 322.0, 314.0, 322.0, 306.0, 321.0, 289.0, 293.0, 311.0, 322.0, 319.0, 314.0, 285.0, 302.0, 319.0, 317.0, 290.0, 294.0, 288.0, 291.0, 311.0, 319.0, 295.0, 287.0, 308.0, 319.0, 316.0, 317.0, 278.0, 289.0, 319.0, 317.0, 319.0, 317.0, 314.0, 322.0, 313.0, 317.0, 311.0, 322.0, 314.0, 316.0, 319.0, 311.0, 313.0, 314.0, 290.0, 294.0, 316.0, 317.0, 314.0, 316.0, 283.0, 290.0, 322.0, 314.0, 297.0, 285.0, 291.0, 288.0, 318.0, 315.0, 314.0, 316.0, 322.0, 314.0, 314.0, 319.0, 296.0, 277.0, 293.0, 294.0, 291.0, 288.0, 305.0, 322.0, 314.0, 316.0, 279.0, 285.0, 314.0, 322.0, 288.0, 294.0, 319.0, 317.0, 321.0, 312.0, 291.0, 288.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8116024056297, "mean_processing_ms": 0.23075773519114987, "mean_inference_ms": 1.4123586541833584}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10032000, "num_steps_sampled": 5350400, "sample_time_ms": 21668.296, "load_time_ms": 36.478, "grad_time_ms": 9333.309, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0028228743467479944, "policy_loss": -0.004008984658867121, "vf_loss": 73.87229919433594, "vf_explained_var": 0.7751579284667969, "kl": 0.0019005200592800975, "entropy": 1.110758900642395, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5350400, "episodes_total": 13376, "training_iteration": 418, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-21-46", "timestamp": 1660260106, "time_this_iter_s": 34.32990908622742, "time_total_s": 18515.39919090271, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18515.39919090271, "timesteps_since_restore": 5350400, "iterations_since_restore": 418, "perf": {"cpu_util_percent": 29.667346938775513, "ram_util_percent": 59.18979591836735}}
+{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 609.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.74}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.68, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.87, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.16, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.72, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.97, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.79, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 630.0, 639.0, 630.0, 579.0, 582.0, 636.0, 180.0, 582.0, 579.0, 636.0, 582.0, 573.0, 633.0, 639.0, 633.0, 630.0, 630.0, 633.0, 630.0, 630.0, 630.0, 579.0, 636.0, 630.0, 582.0, 633.0, 587.0, 587.0, 630.0, 627.0, 639.0, 630.0, 582.0, 627.0, 633.0, 567.0, 636.0, 636.0, 636.0, 630.0, 633.0, 630.0, 630.0, 627.0, 584.0, 633.0, 630.0, 573.0, 636.0, 582.0, 579.0, 633.0, 630.0, 636.0, 633.0, 573.0, 587.0, 579.0, 627.0, 630.0, 564.0, 636.0, 582.0, 636.0, 633.0, 579.0, 633.0, 587.0, 630.0, 639.0, 576.0, 630.0, 633.0, 582.0, 587.0, 618.0, 633.0, 582.0, 621.0, 636.0, 587.0, 636.0, 582.0, 636.0, 630.0, 582.0, 633.0, 581.0, 630.0, 636.0, 630.0, 627.0, 633.0, 636.0, 582.0, 627.0, 536.0, 633.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 316.0, 314.0, 324.0, 315.0, 311.0, 319.0, 288.0, 291.0, 294.0, 288.0, 319.0, 317.0, 92.0, 88.0, 293.0, 289.0, 287.0, 292.0, 319.0, 317.0, 289.0, 293.0, 284.0, 289.0, 316.0, 317.0, 322.0, 317.0, 311.0, 322.0, 316.0, 314.0, 316.0, 314.0, 322.0, 311.0, 319.0, 311.0, 311.0, 319.0, 317.0, 313.0, 288.0, 291.0, 319.0, 317.0, 311.0, 319.0, 289.0, 293.0, 319.0, 314.0, 296.0, 291.0, 291.0, 296.0, 313.0, 317.0, 311.0, 316.0, 314.0, 325.0, 311.0, 319.0, 295.0, 287.0, 308.0, 319.0, 316.0, 317.0, 278.0, 289.0, 319.0, 317.0, 319.0, 317.0, 314.0, 322.0, 313.0, 317.0, 311.0, 322.0, 314.0, 316.0, 319.0, 311.0, 313.0, 314.0, 290.0, 294.0, 316.0, 317.0, 314.0, 316.0, 283.0, 290.0, 322.0, 314.0, 297.0, 285.0, 291.0, 288.0, 318.0, 315.0, 314.0, 316.0, 322.0, 314.0, 314.0, 319.0, 296.0, 277.0, 293.0, 294.0, 291.0, 288.0, 305.0, 322.0, 314.0, 316.0, 279.0, 285.0, 314.0, 322.0, 288.0, 294.0, 319.0, 317.0, 321.0, 312.0, 291.0, 288.0, 319.0, 314.0, 293.0, 294.0, 316.0, 314.0, 319.0, 320.0, 288.0, 288.0, 316.0, 314.0, 316.0, 317.0, 291.0, 291.0, 293.0, 294.0, 308.0, 310.0, 314.0, 319.0, 289.0, 293.0, 318.0, 303.0, 314.0, 322.0, 298.0, 289.0, 324.0, 312.0, 290.0, 292.0, 314.0, 322.0, 313.0, 317.0, 288.0, 294.0, 322.0, 311.0, 289.0, 292.0, 314.0, 316.0, 314.0, 322.0, 314.0, 316.0, 312.0, 315.0, 311.0, 322.0, 314.0, 322.0, 294.0, 288.0, 316.0, 311.0, 275.0, 261.0, 319.0, 314.0, 314.0, 313.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8107503957777891, "mean_processing_ms": 0.2305898995792267, "mean_inference_ms": 1.4116530949413433}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10056000, "num_steps_sampled": 5363200, "sample_time_ms": 21868.435, "load_time_ms": 36.628, "grad_time_ms": 9569.244, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002508052857592702, "policy_loss": -0.004472339991480112, "vf_loss": 75.3826904296875, "vf_explained_var": 0.7911410927772522, "kl": 0.0020311845000833273, "entropy": 1.1157482862472534, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5363200, "episodes_total": 13408, "training_iteration": 419, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-22-20", "timestamp": 1660260140, "time_this_iter_s": 33.75737500190735, "time_total_s": 18549.156565904617, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18549.156565904617, "timesteps_since_restore": 5363200, "iterations_since_restore": 419, "perf": {"cpu_util_percent": 32.68936170212766, "ram_util_percent": 59.19574468085105}}
+{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 609.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.805}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.81, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.63, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.29, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.55, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.28, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.28, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.28, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 630.0, 576.0, 630.0, 516.0, 630.0, 636.0, 639.0, 630.0, 582.0, 633.0, 576.0, 582.0, 633.0, 587.0, 627.0, 630.0, 633.0, 630.0, 582.0, 582.0, 633.0, 633.0, 633.0, 630.0, 627.0, 636.0, 579.0, 630.0, 636.0, 633.0, 627.0, 636.0, 633.0, 579.0, 633.0, 587.0, 630.0, 639.0, 576.0, 630.0, 633.0, 582.0, 587.0, 618.0, 633.0, 582.0, 621.0, 636.0, 587.0, 636.0, 582.0, 636.0, 630.0, 582.0, 633.0, 581.0, 630.0, 636.0, 630.0, 627.0, 633.0, 636.0, 582.0, 627.0, 536.0, 633.0, 627.0, 579.0, 630.0, 639.0, 630.0, 579.0, 582.0, 636.0, 180.0, 582.0, 579.0, 636.0, 582.0, 573.0, 633.0, 639.0, 633.0, 630.0, 630.0, 633.0, 630.0, 630.0, 630.0, 579.0, 636.0, 630.0, 582.0, 633.0, 587.0, 587.0, 630.0, 627.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 300.0, 317.0, 313.0, 285.0, 291.0, 314.0, 316.0, 254.0, 262.0, 319.0, 311.0, 317.0, 319.0, 319.0, 320.0, 316.0, 314.0, 290.0, 292.0, 314.0, 319.0, 285.0, 291.0, 290.0, 292.0, 316.0, 317.0, 291.0, 296.0, 310.0, 317.0, 316.0, 314.0, 316.0, 317.0, 314.0, 316.0, 288.0, 294.0, 291.0, 291.0, 311.0, 322.0, 313.0, 320.0, 316.0, 317.0, 311.0, 319.0, 315.0, 312.0, 316.0, 320.0, 291.0, 288.0, 308.0, 322.0, 311.0, 325.0, 314.0, 319.0, 313.0, 314.0, 319.0, 317.0, 321.0, 312.0, 291.0, 288.0, 319.0, 314.0, 293.0, 294.0, 316.0, 314.0, 319.0, 320.0, 288.0, 288.0, 316.0, 314.0, 316.0, 317.0, 291.0, 291.0, 293.0, 294.0, 308.0, 310.0, 314.0, 319.0, 289.0, 293.0, 318.0, 303.0, 314.0, 322.0, 298.0, 289.0, 324.0, 312.0, 290.0, 292.0, 314.0, 322.0, 313.0, 317.0, 288.0, 294.0, 322.0, 311.0, 289.0, 292.0, 314.0, 316.0, 314.0, 322.0, 314.0, 316.0, 312.0, 315.0, 311.0, 322.0, 314.0, 322.0, 294.0, 288.0, 316.0, 311.0, 275.0, 261.0, 319.0, 314.0, 314.0, 313.0, 291.0, 288.0, 316.0, 314.0, 324.0, 315.0, 311.0, 319.0, 288.0, 291.0, 294.0, 288.0, 319.0, 317.0, 92.0, 88.0, 293.0, 289.0, 287.0, 292.0, 319.0, 317.0, 289.0, 293.0, 284.0, 289.0, 316.0, 317.0, 322.0, 317.0, 311.0, 322.0, 316.0, 314.0, 316.0, 314.0, 322.0, 311.0, 319.0, 311.0, 311.0, 319.0, 317.0, 313.0, 288.0, 291.0, 319.0, 317.0, 311.0, 319.0, 289.0, 293.0, 319.0, 314.0, 296.0, 291.0, 291.0, 296.0, 313.0, 317.0, 311.0, 316.0, 314.0, 325.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8099141458256162, "mean_processing_ms": 0.23042638477560978, "mean_inference_ms": 1.411223574078033}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10080000, "num_steps_sampled": 5376000, "sample_time_ms": 22436.969, "load_time_ms": 36.407, "grad_time_ms": 9832.43, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0009076216374523938, "policy_loss": -0.0059606158174574375, "vf_loss": 74.23489379882812, "vf_explained_var": 0.7686769366264343, "kl": 0.00233254861086607, "entropy": 1.1105002164840698, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5376000, "episodes_total": 13440, "training_iteration": 420, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-22-56", "timestamp": 1660260176, "time_this_iter_s": 36.52545118331909, "time_total_s": 18585.682017087936, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18585.682017087936, "timesteps_since_restore": 5376000, "iterations_since_restore": 420, "perf": {"cpu_util_percent": 30.815384615384616, "ram_util_percent": 59.175}}
+{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 604.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 302.165}, "custom_metrics": {"sparse_reward_mean": 209.6, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 185.13, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.5, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.15, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.15, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.55, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.58, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.15, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 17, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.94, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.55, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.55, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 630.0, 579.0, 576.0, 582.0, 639.0, 630.0, 518.0, 582.0, 582.0, 624.0, 624.0, 633.0, 630.0, 584.0, 633.0, 633.0, 627.0, 630.0, 633.0, 579.0, 633.0, 582.0, 582.0, 627.0, 633.0, 519.0, 582.0, 579.0, 470.0, 587.0, 579.0, 627.0, 536.0, 633.0, 627.0, 579.0, 630.0, 639.0, 630.0, 579.0, 582.0, 636.0, 180.0, 582.0, 579.0, 636.0, 582.0, 573.0, 633.0, 639.0, 633.0, 630.0, 630.0, 633.0, 630.0, 630.0, 630.0, 579.0, 636.0, 630.0, 582.0, 633.0, 587.0, 587.0, 630.0, 627.0, 639.0, 576.0, 630.0, 576.0, 630.0, 516.0, 630.0, 636.0, 639.0, 630.0, 582.0, 633.0, 576.0, 582.0, 633.0, 587.0, 627.0, 630.0, 633.0, 630.0, 582.0, 582.0, 633.0, 633.0, 633.0, 630.0, 627.0, 636.0, 579.0, 630.0, 636.0, 633.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 316.0, 314.0, 288.0, 291.0, 283.0, 293.0, 289.0, 293.0, 317.0, 322.0, 316.0, 314.0, 279.0, 239.0, 293.0, 289.0, 293.0, 289.0, 310.0, 314.0, 304.0, 320.0, 311.0, 322.0, 313.0, 317.0, 295.0, 289.0, 314.0, 319.0, 317.0, 316.0, 313.0, 314.0, 316.0, 314.0, 321.0, 312.0, 281.0, 298.0, 316.0, 317.0, 293.0, 289.0, 287.0, 295.0, 316.0, 311.0, 316.0, 317.0, 261.0, 258.0, 285.0, 297.0, 287.0, 292.0, 226.0, 244.0, 299.0, 288.0, 297.0, 282.0, 316.0, 311.0, 275.0, 261.0, 319.0, 314.0, 314.0, 313.0, 291.0, 288.0, 316.0, 314.0, 324.0, 315.0, 311.0, 319.0, 288.0, 291.0, 294.0, 288.0, 319.0, 317.0, 92.0, 88.0, 293.0, 289.0, 287.0, 292.0, 319.0, 317.0, 289.0, 293.0, 284.0, 289.0, 316.0, 317.0, 322.0, 317.0, 311.0, 322.0, 316.0, 314.0, 316.0, 314.0, 322.0, 311.0, 319.0, 311.0, 311.0, 319.0, 317.0, 313.0, 288.0, 291.0, 319.0, 317.0, 311.0, 319.0, 289.0, 293.0, 319.0, 314.0, 296.0, 291.0, 291.0, 296.0, 313.0, 317.0, 311.0, 316.0, 314.0, 325.0, 276.0, 300.0, 317.0, 313.0, 285.0, 291.0, 314.0, 316.0, 254.0, 262.0, 319.0, 311.0, 317.0, 319.0, 319.0, 320.0, 316.0, 314.0, 290.0, 292.0, 314.0, 319.0, 285.0, 291.0, 290.0, 292.0, 316.0, 317.0, 291.0, 296.0, 310.0, 317.0, 316.0, 314.0, 316.0, 317.0, 314.0, 316.0, 288.0, 294.0, 291.0, 291.0, 311.0, 322.0, 313.0, 320.0, 316.0, 317.0, 311.0, 319.0, 315.0, 312.0, 316.0, 320.0, 291.0, 288.0, 308.0, 322.0, 311.0, 325.0, 314.0, 319.0, 313.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8090875663620912, "mean_processing_ms": 0.23026522516611045, "mean_inference_ms": 1.4109073671228203}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10104000, "num_steps_sampled": 5388800, "sample_time_ms": 22748.439, "load_time_ms": 36.384, "grad_time_ms": 10256.981, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004364584165159613, "policy_loss": -0.007198403123766184, "vf_loss": 81.89620208740234, "vf_explained_var": 0.7658551335334778, "kl": 0.0018362547270953655, "entropy": 1.1095339059829712, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5388800, "episodes_total": 13472, "training_iteration": 421, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-23-34", "timestamp": 1660260214, "time_this_iter_s": 37.69177174568176, "time_total_s": 18623.373788833618, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18623.373788833618, "timesteps_since_restore": 5388800, "iterations_since_restore": 421, "perf": {"cpu_util_percent": 34.76037735849056, "ram_util_percent": 59.533962264150944}}
+{"episode_reward_max": 639.0, "episode_reward_min": 470.0, "episode_reward_mean": 605.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 302.835}, "custom_metrics": {"sparse_reward_mean": 210.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.67, "shaped_reward_min": 150, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.52, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.17, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.15, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.61, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.06, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.69, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.4, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.3, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 17, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.78, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.06, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.69, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.06, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.69, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 587.0, 630.0, 579.0, 633.0, 582.0, 579.0, 576.0, 630.0, 624.0, 630.0, 579.0, 636.0, 522.0, 636.0, 630.0, 587.0, 633.0, 582.0, 630.0, 630.0, 579.0, 573.0, 579.0, 582.0, 582.0, 636.0, 636.0, 633.0, 579.0, 587.0, 579.0, 587.0, 630.0, 627.0, 639.0, 576.0, 630.0, 576.0, 630.0, 516.0, 630.0, 636.0, 639.0, 630.0, 582.0, 633.0, 576.0, 582.0, 633.0, 587.0, 627.0, 630.0, 633.0, 630.0, 582.0, 582.0, 633.0, 633.0, 633.0, 630.0, 627.0, 636.0, 579.0, 630.0, 636.0, 633.0, 627.0, 627.0, 630.0, 579.0, 576.0, 582.0, 639.0, 630.0, 518.0, 582.0, 582.0, 624.0, 624.0, 633.0, 630.0, 584.0, 633.0, 633.0, 627.0, 630.0, 633.0, 579.0, 633.0, 582.0, 582.0, 627.0, 633.0, 519.0, 582.0, 579.0, 470.0, 587.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 322.0, 286.0, 301.0, 316.0, 314.0, 290.0, 289.0, 316.0, 317.0, 293.0, 289.0, 288.0, 291.0, 293.0, 283.0, 313.0, 317.0, 308.0, 316.0, 319.0, 311.0, 286.0, 293.0, 316.0, 320.0, 262.0, 260.0, 319.0, 317.0, 316.0, 314.0, 288.0, 299.0, 314.0, 319.0, 298.0, 284.0, 316.0, 314.0, 321.0, 309.0, 287.0, 292.0, 285.0, 288.0, 290.0, 289.0, 286.0, 296.0, 289.0, 293.0, 316.0, 320.0, 319.0, 317.0, 322.0, 311.0, 291.0, 288.0, 291.0, 296.0, 288.0, 291.0, 291.0, 296.0, 313.0, 317.0, 311.0, 316.0, 314.0, 325.0, 276.0, 300.0, 317.0, 313.0, 285.0, 291.0, 314.0, 316.0, 254.0, 262.0, 319.0, 311.0, 317.0, 319.0, 319.0, 320.0, 316.0, 314.0, 290.0, 292.0, 314.0, 319.0, 285.0, 291.0, 290.0, 292.0, 316.0, 317.0, 291.0, 296.0, 310.0, 317.0, 316.0, 314.0, 316.0, 317.0, 314.0, 316.0, 288.0, 294.0, 291.0, 291.0, 311.0, 322.0, 313.0, 320.0, 316.0, 317.0, 311.0, 319.0, 315.0, 312.0, 316.0, 320.0, 291.0, 288.0, 308.0, 322.0, 311.0, 325.0, 314.0, 319.0, 313.0, 314.0, 313.0, 314.0, 316.0, 314.0, 288.0, 291.0, 283.0, 293.0, 289.0, 293.0, 317.0, 322.0, 316.0, 314.0, 279.0, 239.0, 293.0, 289.0, 293.0, 289.0, 310.0, 314.0, 304.0, 320.0, 311.0, 322.0, 313.0, 317.0, 295.0, 289.0, 314.0, 319.0, 317.0, 316.0, 313.0, 314.0, 316.0, 314.0, 321.0, 312.0, 281.0, 298.0, 316.0, 317.0, 293.0, 289.0, 287.0, 295.0, 316.0, 311.0, 316.0, 317.0, 261.0, 258.0, 285.0, 297.0, 287.0, 292.0, 226.0, 244.0, 299.0, 288.0, 297.0, 282.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8082635107475633, "mean_processing_ms": 0.23010486784203785, "mean_inference_ms": 1.4106251231358269}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10128000, "num_steps_sampled": 5401600, "sample_time_ms": 23169.092, "load_time_ms": 36.151, "grad_time_ms": 10580.278, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0029069948941469193, "policy_loss": -0.004262510221451521, "vf_loss": 77.2344970703125, "vf_explained_var": 0.7713862061500549, "kl": 0.001992677804082632, "entropy": 1.1078964471817017, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5401600, "episodes_total": 13504, "training_iteration": 422, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-24-10", "timestamp": 1660260250, "time_this_iter_s": 35.70268106460571, "time_total_s": 18659.076469898224, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18659.076469898224, "timesteps_since_restore": 5401600, "iterations_since_restore": 422, "perf": {"cpu_util_percent": 33.46078431372549, "ram_util_percent": 59.009803921568626}}
+{"episode_reward_max": 639.0, "episode_reward_min": 470.0, "episode_reward_mean": 603.43, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 301.715}, "custom_metrics": {"sparse_reward_mean": 209.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.43, "shaped_reward_min": 150, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.55, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.19, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.57, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.63, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.49, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.22, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 17, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.81, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.63, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.63, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 576.0, 636.0, 576.0, 639.0, 579.0, 636.0, 630.0, 579.0, 582.0, 582.0, 624.0, 581.0, 636.0, 630.0, 582.0, 582.0, 633.0, 582.0, 636.0, 633.0, 587.0, 630.0, 627.0, 633.0, 633.0, 587.0, 582.0, 582.0, 530.0, 633.0, 630.0, 630.0, 636.0, 633.0, 627.0, 627.0, 630.0, 579.0, 576.0, 582.0, 639.0, 630.0, 518.0, 582.0, 582.0, 624.0, 624.0, 633.0, 630.0, 584.0, 633.0, 633.0, 627.0, 630.0, 633.0, 579.0, 633.0, 582.0, 582.0, 627.0, 633.0, 519.0, 582.0, 579.0, 470.0, 587.0, 579.0, 639.0, 587.0, 630.0, 579.0, 633.0, 582.0, 579.0, 576.0, 630.0, 624.0, 630.0, 579.0, 636.0, 522.0, 636.0, 630.0, 587.0, 633.0, 582.0, 630.0, 630.0, 579.0, 573.0, 579.0, 582.0, 582.0, 636.0, 636.0, 633.0, 579.0, 587.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 282.0, 294.0, 309.0, 327.0, 288.0, 288.0, 322.0, 317.0, 282.0, 297.0, 319.0, 317.0, 316.0, 314.0, 288.0, 291.0, 291.0, 291.0, 288.0, 294.0, 313.0, 311.0, 292.0, 289.0, 319.0, 317.0, 316.0, 314.0, 288.0, 294.0, 292.0, 290.0, 314.0, 319.0, 302.0, 280.0, 316.0, 320.0, 311.0, 322.0, 293.0, 294.0, 316.0, 314.0, 313.0, 314.0, 322.0, 311.0, 316.0, 317.0, 285.0, 302.0, 286.0, 296.0, 297.0, 285.0, 270.0, 260.0, 316.0, 317.0, 316.0, 314.0, 308.0, 322.0, 311.0, 325.0, 314.0, 319.0, 313.0, 314.0, 313.0, 314.0, 316.0, 314.0, 288.0, 291.0, 283.0, 293.0, 289.0, 293.0, 317.0, 322.0, 316.0, 314.0, 279.0, 239.0, 293.0, 289.0, 293.0, 289.0, 310.0, 314.0, 304.0, 320.0, 311.0, 322.0, 313.0, 317.0, 295.0, 289.0, 314.0, 319.0, 317.0, 316.0, 313.0, 314.0, 316.0, 314.0, 321.0, 312.0, 281.0, 298.0, 316.0, 317.0, 293.0, 289.0, 287.0, 295.0, 316.0, 311.0, 316.0, 317.0, 261.0, 258.0, 285.0, 297.0, 287.0, 292.0, 226.0, 244.0, 299.0, 288.0, 297.0, 282.0, 317.0, 322.0, 286.0, 301.0, 316.0, 314.0, 290.0, 289.0, 316.0, 317.0, 293.0, 289.0, 288.0, 291.0, 293.0, 283.0, 313.0, 317.0, 308.0, 316.0, 319.0, 311.0, 286.0, 293.0, 316.0, 320.0, 262.0, 260.0, 319.0, 317.0, 316.0, 314.0, 288.0, 299.0, 314.0, 319.0, 298.0, 284.0, 316.0, 314.0, 321.0, 309.0, 287.0, 292.0, 285.0, 288.0, 290.0, 289.0, 286.0, 296.0, 289.0, 293.0, 316.0, 320.0, 319.0, 317.0, 322.0, 311.0, 291.0, 288.0, 291.0, 296.0, 288.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8074351528225807, "mean_processing_ms": 0.2299430227686211, "mean_inference_ms": 1.4101585420796834}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10152000, "num_steps_sampled": 5414400, "sample_time_ms": 23238.003, "load_time_ms": 36.154, "grad_time_ms": 10699.531, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004093436989933252, "policy_loss": -0.003705031471326947, "vf_loss": 83.51854705810547, "vf_explained_var": 0.7628346085548401, "kl": 0.001839231583289802, "entropy": 1.1067644357681274, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5414400, "episodes_total": 13536, "training_iteration": 423, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-24-43", "timestamp": 1660260283, "time_this_iter_s": 33.482574224472046, "time_total_s": 18692.559044122696, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18692.559044122696, "timesteps_since_restore": 5414400, "iterations_since_restore": 423, "perf": {"cpu_util_percent": 33.295744680851065, "ram_util_percent": 58.97021276595746}}
+{"episode_reward_max": 639.0, "episode_reward_min": 470.0, "episode_reward_mean": 602.74, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 301.37}, "custom_metrics": {"sparse_reward_mean": 208.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.94, "shaped_reward_min": 150, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.04, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.51, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.07, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.71, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.04, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 17, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.94, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.89, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.07, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.07, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 627.0, 630.0, 582.0, 636.0, 582.0, 587.0, 636.0, 627.0, 636.0, 633.0, 633.0, 630.0, 579.0, 579.0, 587.0, 636.0, 582.0, 639.0, 630.0, 627.0, 582.0, 582.0, 584.0, 639.0, 582.0, 639.0, 630.0, 522.0, 587.0, 584.0, 579.0, 579.0, 470.0, 587.0, 579.0, 639.0, 587.0, 630.0, 579.0, 633.0, 582.0, 579.0, 576.0, 630.0, 624.0, 630.0, 579.0, 636.0, 522.0, 636.0, 630.0, 587.0, 633.0, 582.0, 630.0, 630.0, 579.0, 573.0, 579.0, 582.0, 582.0, 636.0, 636.0, 633.0, 579.0, 587.0, 579.0, 582.0, 576.0, 636.0, 576.0, 639.0, 579.0, 636.0, 630.0, 579.0, 582.0, 582.0, 624.0, 581.0, 636.0, 630.0, 582.0, 582.0, 633.0, 582.0, 636.0, 633.0, 587.0, 630.0, 627.0, 633.0, 633.0, 587.0, 582.0, 582.0, 530.0, 633.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 313.0, 314.0, 316.0, 314.0, 291.0, 291.0, 317.0, 319.0, 293.0, 289.0, 290.0, 297.0, 319.0, 317.0, 311.0, 316.0, 314.0, 322.0, 316.0, 317.0, 321.0, 312.0, 316.0, 314.0, 286.0, 293.0, 288.0, 291.0, 304.0, 283.0, 319.0, 317.0, 293.0, 289.0, 319.0, 320.0, 314.0, 316.0, 304.0, 323.0, 291.0, 291.0, 291.0, 291.0, 293.0, 291.0, 319.0, 320.0, 291.0, 291.0, 324.0, 315.0, 319.0, 311.0, 268.0, 254.0, 298.0, 289.0, 293.0, 291.0, 293.0, 286.0, 287.0, 292.0, 226.0, 244.0, 299.0, 288.0, 297.0, 282.0, 317.0, 322.0, 286.0, 301.0, 316.0, 314.0, 290.0, 289.0, 316.0, 317.0, 293.0, 289.0, 288.0, 291.0, 293.0, 283.0, 313.0, 317.0, 308.0, 316.0, 319.0, 311.0, 286.0, 293.0, 316.0, 320.0, 262.0, 260.0, 319.0, 317.0, 316.0, 314.0, 288.0, 299.0, 314.0, 319.0, 298.0, 284.0, 316.0, 314.0, 321.0, 309.0, 287.0, 292.0, 285.0, 288.0, 290.0, 289.0, 286.0, 296.0, 289.0, 293.0, 316.0, 320.0, 319.0, 317.0, 322.0, 311.0, 291.0, 288.0, 291.0, 296.0, 288.0, 291.0, 296.0, 286.0, 282.0, 294.0, 309.0, 327.0, 288.0, 288.0, 322.0, 317.0, 282.0, 297.0, 319.0, 317.0, 316.0, 314.0, 288.0, 291.0, 291.0, 291.0, 288.0, 294.0, 313.0, 311.0, 292.0, 289.0, 319.0, 317.0, 316.0, 314.0, 288.0, 294.0, 292.0, 290.0, 314.0, 319.0, 302.0, 280.0, 316.0, 320.0, 311.0, 322.0, 293.0, 294.0, 316.0, 314.0, 313.0, 314.0, 322.0, 311.0, 316.0, 317.0, 285.0, 302.0, 286.0, 296.0, 297.0, 285.0, 270.0, 260.0, 316.0, 317.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8066043021110226, "mean_processing_ms": 0.22977916427475648, "mean_inference_ms": 1.4095576278285673}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10176000, "num_steps_sampled": 5427200, "sample_time_ms": 22903.737, "load_time_ms": 36.042, "grad_time_ms": 10790.567, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005737189203500748, "policy_loss": -0.006916053593158722, "vf_loss": 80.42852783203125, "vf_explained_var": 0.7650584578514099, "kl": 0.0017220000736415386, "entropy": 1.1061476469039917, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5427200, "episodes_total": 13568, "training_iteration": 424, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-25-17", "timestamp": 1660260317, "time_this_iter_s": 33.90529203414917, "time_total_s": 18726.464336156845, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18726.464336156845, "timesteps_since_restore": 5427200, "iterations_since_restore": 424, "perf": {"cpu_util_percent": 32.64166666666667, "ram_util_percent": 58.96875}}
+{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 606.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.09}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.58, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.74, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.13, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.41, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.77, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.99, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.97, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 630.0, 639.0, 582.0, 582.0, 633.0, 630.0, 633.0, 579.0, 636.0, 582.0, 579.0, 636.0, 630.0, 579.0, 582.0, 587.0, 633.0, 630.0, 633.0, 587.0, 636.0, 582.0, 579.0, 576.0, 630.0, 561.0, 627.0, 627.0, 630.0, 564.0, 633.0, 579.0, 587.0, 579.0, 582.0, 576.0, 636.0, 576.0, 639.0, 579.0, 636.0, 630.0, 579.0, 582.0, 582.0, 624.0, 581.0, 636.0, 630.0, 582.0, 582.0, 633.0, 582.0, 636.0, 633.0, 587.0, 630.0, 627.0, 633.0, 633.0, 587.0, 582.0, 582.0, 530.0, 633.0, 630.0, 582.0, 627.0, 630.0, 582.0, 636.0, 582.0, 587.0, 636.0, 627.0, 636.0, 633.0, 633.0, 630.0, 579.0, 579.0, 587.0, 636.0, 582.0, 639.0, 630.0, 627.0, 582.0, 582.0, 584.0, 639.0, 582.0, 639.0, 630.0, 522.0, 587.0, 584.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 317.0, 316.0, 313.0, 317.0, 319.0, 320.0, 293.0, 289.0, 293.0, 289.0, 314.0, 319.0, 319.0, 311.0, 316.0, 317.0, 286.0, 293.0, 319.0, 317.0, 293.0, 289.0, 290.0, 289.0, 317.0, 319.0, 319.0, 311.0, 291.0, 288.0, 290.0, 292.0, 293.0, 294.0, 316.0, 317.0, 319.0, 311.0, 316.0, 317.0, 288.0, 299.0, 319.0, 317.0, 291.0, 291.0, 288.0, 291.0, 288.0, 288.0, 309.0, 321.0, 278.0, 283.0, 316.0, 311.0, 313.0, 314.0, 319.0, 311.0, 284.0, 280.0, 322.0, 311.0, 291.0, 288.0, 291.0, 296.0, 288.0, 291.0, 296.0, 286.0, 282.0, 294.0, 309.0, 327.0, 288.0, 288.0, 322.0, 317.0, 282.0, 297.0, 319.0, 317.0, 316.0, 314.0, 288.0, 291.0, 291.0, 291.0, 288.0, 294.0, 313.0, 311.0, 292.0, 289.0, 319.0, 317.0, 316.0, 314.0, 288.0, 294.0, 292.0, 290.0, 314.0, 319.0, 302.0, 280.0, 316.0, 320.0, 311.0, 322.0, 293.0, 294.0, 316.0, 314.0, 313.0, 314.0, 322.0, 311.0, 316.0, 317.0, 285.0, 302.0, 286.0, 296.0, 297.0, 285.0, 270.0, 260.0, 316.0, 317.0, 316.0, 314.0, 293.0, 289.0, 313.0, 314.0, 316.0, 314.0, 291.0, 291.0, 317.0, 319.0, 293.0, 289.0, 290.0, 297.0, 319.0, 317.0, 311.0, 316.0, 314.0, 322.0, 316.0, 317.0, 321.0, 312.0, 316.0, 314.0, 286.0, 293.0, 288.0, 291.0, 304.0, 283.0, 319.0, 317.0, 293.0, 289.0, 319.0, 320.0, 314.0, 316.0, 304.0, 323.0, 291.0, 291.0, 291.0, 291.0, 293.0, 291.0, 319.0, 320.0, 291.0, 291.0, 324.0, 315.0, 319.0, 311.0, 268.0, 254.0, 298.0, 289.0, 293.0, 291.0, 293.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8057716704864768, "mean_processing_ms": 0.22961462225140888, "mean_inference_ms": 1.4087586857000423}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10200000, "num_steps_sampled": 5440000, "sample_time_ms": 22855.818, "load_time_ms": 36.111, "grad_time_ms": 10847.011, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001967804506421089, "policy_loss": -0.00539380731061101, "vf_loss": 79.134033203125, "vf_explained_var": 0.7726359963417053, "kl": 0.0021053599193692207, "entropy": 1.1035689115524292, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5440000, "episodes_total": 13600, "training_iteration": 425, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-25-47", "timestamp": 1660260347, "time_this_iter_s": 29.913795948028564, "time_total_s": 18756.378132104874, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18756.378132104874, "timesteps_since_restore": 5440000, "iterations_since_restore": 425, "perf": {"cpu_util_percent": 31.83333333333333, "ram_util_percent": 58.778571428571425}}
+{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 610.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 305.285}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.37, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.51, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.46, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.2, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.87, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.96, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.87, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.01, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.7, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.96, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.89, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.74, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.68, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.96, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.96, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 624.0, 636.0, 627.0, 582.0, 636.0, 636.0, 630.0, 573.0, 630.0, 630.0, 579.0, 633.0, 636.0, 630.0, 639.0, 627.0, 582.0, 639.0, 587.0, 630.0, 636.0, 584.0, 633.0, 630.0, 579.0, 618.0, 630.0, 627.0, 636.0, 587.0, 636.0, 582.0, 530.0, 633.0, 630.0, 582.0, 627.0, 630.0, 582.0, 636.0, 582.0, 587.0, 636.0, 627.0, 636.0, 633.0, 633.0, 630.0, 579.0, 579.0, 587.0, 636.0, 582.0, 639.0, 630.0, 627.0, 582.0, 582.0, 584.0, 639.0, 582.0, 639.0, 630.0, 522.0, 587.0, 584.0, 579.0, 633.0, 633.0, 630.0, 639.0, 582.0, 582.0, 633.0, 630.0, 633.0, 579.0, 636.0, 582.0, 579.0, 636.0, 630.0, 579.0, 582.0, 587.0, 633.0, 630.0, 633.0, 587.0, 636.0, 582.0, 579.0, 576.0, 630.0, 561.0, 627.0, 627.0, 630.0, 564.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 320.0, 304.0, 319.0, 317.0, 318.0, 309.0, 296.0, 286.0, 317.0, 319.0, 319.0, 317.0, 313.0, 317.0, 284.0, 289.0, 323.0, 307.0, 311.0, 319.0, 298.0, 281.0, 316.0, 317.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 313.0, 314.0, 286.0, 296.0, 322.0, 317.0, 283.0, 304.0, 316.0, 314.0, 314.0, 322.0, 295.0, 289.0, 319.0, 314.0, 314.0, 316.0, 288.0, 291.0, 299.0, 319.0, 316.0, 314.0, 311.0, 316.0, 313.0, 323.0, 293.0, 294.0, 316.0, 320.0, 297.0, 285.0, 270.0, 260.0, 316.0, 317.0, 316.0, 314.0, 293.0, 289.0, 313.0, 314.0, 316.0, 314.0, 291.0, 291.0, 317.0, 319.0, 293.0, 289.0, 290.0, 297.0, 319.0, 317.0, 311.0, 316.0, 314.0, 322.0, 316.0, 317.0, 321.0, 312.0, 316.0, 314.0, 286.0, 293.0, 288.0, 291.0, 304.0, 283.0, 319.0, 317.0, 293.0, 289.0, 319.0, 320.0, 314.0, 316.0, 304.0, 323.0, 291.0, 291.0, 291.0, 291.0, 293.0, 291.0, 319.0, 320.0, 291.0, 291.0, 324.0, 315.0, 319.0, 311.0, 268.0, 254.0, 298.0, 289.0, 293.0, 291.0, 293.0, 286.0, 314.0, 319.0, 317.0, 316.0, 313.0, 317.0, 319.0, 320.0, 293.0, 289.0, 293.0, 289.0, 314.0, 319.0, 319.0, 311.0, 316.0, 317.0, 286.0, 293.0, 319.0, 317.0, 293.0, 289.0, 290.0, 289.0, 317.0, 319.0, 319.0, 311.0, 291.0, 288.0, 290.0, 292.0, 293.0, 294.0, 316.0, 317.0, 319.0, 311.0, 316.0, 317.0, 288.0, 299.0, 319.0, 317.0, 291.0, 291.0, 288.0, 291.0, 288.0, 288.0, 309.0, 321.0, 278.0, 283.0, 316.0, 311.0, 313.0, 314.0, 319.0, 311.0, 284.0, 280.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8049370656544826, "mean_processing_ms": 0.22944824169786282, "mean_inference_ms": 1.4078260577315087}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10224000, "num_steps_sampled": 5452800, "sample_time_ms": 22520.554, "load_time_ms": 36.416, "grad_time_ms": 10937.296, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 9.403874173585791e-06, "policy_loss": -0.006973860785365105, "vf_loss": 75.33930969238281, "vf_explained_var": 0.7694594264030457, "kl": 0.00176583684515208, "entropy": 1.1013368368148804, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5452800, "episodes_total": 13632, "training_iteration": 426, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-26-16", "timestamp": 1660260376, "time_this_iter_s": 29.526015043258667, "time_total_s": 18785.904147148132, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18785.904147148132, "timesteps_since_restore": 5452800, "iterations_since_restore": 426, "perf": {"cpu_util_percent": 31.057142857142853, "ram_util_percent": 58.84047619047618}}
+{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 614.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 307.115}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.83, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.73, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.36, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.74, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.35, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.84, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.7, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.04, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.35, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.84, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.35, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.84, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 633.0, 573.0, 582.0, 579.0, 639.0, 636.0, 636.0, 633.0, 579.0, 630.0, 630.0, 630.0, 582.0, 627.0, 582.0, 630.0, 621.0, 630.0, 630.0, 633.0, 636.0, 627.0, 636.0, 584.0, 636.0, 633.0, 636.0, 630.0, 627.0, 633.0, 636.0, 522.0, 587.0, 584.0, 579.0, 633.0, 633.0, 630.0, 639.0, 582.0, 582.0, 633.0, 630.0, 633.0, 579.0, 636.0, 582.0, 579.0, 636.0, 630.0, 579.0, 582.0, 587.0, 633.0, 630.0, 633.0, 587.0, 636.0, 582.0, 579.0, 576.0, 630.0, 561.0, 627.0, 627.0, 630.0, 564.0, 630.0, 624.0, 636.0, 627.0, 582.0, 636.0, 636.0, 630.0, 573.0, 630.0, 630.0, 579.0, 633.0, 636.0, 630.0, 639.0, 627.0, 582.0, 639.0, 587.0, 630.0, 636.0, 584.0, 633.0, 630.0, 579.0, 618.0, 630.0, 627.0, 636.0, 587.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 317.0, 316.0, 289.0, 284.0, 293.0, 289.0, 287.0, 292.0, 317.0, 322.0, 314.0, 322.0, 316.0, 320.0, 316.0, 317.0, 297.0, 282.0, 317.0, 313.0, 314.0, 316.0, 324.0, 306.0, 286.0, 296.0, 318.0, 309.0, 297.0, 285.0, 311.0, 319.0, 319.0, 302.0, 316.0, 314.0, 316.0, 314.0, 316.0, 317.0, 316.0, 320.0, 319.0, 308.0, 319.0, 317.0, 290.0, 294.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 314.0, 316.0, 316.0, 311.0, 317.0, 316.0, 319.0, 317.0, 268.0, 254.0, 298.0, 289.0, 293.0, 291.0, 293.0, 286.0, 314.0, 319.0, 317.0, 316.0, 313.0, 317.0, 319.0, 320.0, 293.0, 289.0, 293.0, 289.0, 314.0, 319.0, 319.0, 311.0, 316.0, 317.0, 286.0, 293.0, 319.0, 317.0, 293.0, 289.0, 290.0, 289.0, 317.0, 319.0, 319.0, 311.0, 291.0, 288.0, 290.0, 292.0, 293.0, 294.0, 316.0, 317.0, 319.0, 311.0, 316.0, 317.0, 288.0, 299.0, 319.0, 317.0, 291.0, 291.0, 288.0, 291.0, 288.0, 288.0, 309.0, 321.0, 278.0, 283.0, 316.0, 311.0, 313.0, 314.0, 319.0, 311.0, 284.0, 280.0, 313.0, 317.0, 320.0, 304.0, 319.0, 317.0, 318.0, 309.0, 296.0, 286.0, 317.0, 319.0, 319.0, 317.0, 313.0, 317.0, 284.0, 289.0, 323.0, 307.0, 311.0, 319.0, 298.0, 281.0, 316.0, 317.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 313.0, 314.0, 286.0, 296.0, 322.0, 317.0, 283.0, 304.0, 316.0, 314.0, 314.0, 322.0, 295.0, 289.0, 319.0, 314.0, 314.0, 316.0, 288.0, 291.0, 299.0, 319.0, 316.0, 314.0, 311.0, 316.0, 313.0, 323.0, 293.0, 294.0, 316.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8041036336722438, "mean_processing_ms": 0.2292823831802886, "mean_inference_ms": 1.4068110858832141}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10248000, "num_steps_sampled": 5465600, "sample_time_ms": 22514.038, "load_time_ms": 36.353, "grad_time_ms": 10948.965, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00023957279336173087, "policy_loss": -0.006577346473932266, "vf_loss": 73.66693878173828, "vf_explained_var": 0.7691845297813416, "kl": 0.001824389211833477, "entropy": 1.099536418914795, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5465600, "episodes_total": 13664, "training_iteration": 427, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-26-47", "timestamp": 1660260407, "time_this_iter_s": 30.428364992141724, "time_total_s": 18816.332512140274, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18816.332512140274, "timesteps_since_restore": 5465600, "iterations_since_restore": 427, "perf": {"cpu_util_percent": 33.06976744186046, "ram_util_percent": 59.05581395348838}}
+{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 614.26, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 307.13}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.86, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.72, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.42, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.32, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.88, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.79, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 6.03, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.98, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.32, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.88, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.32, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.88, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 582.0, 582.0, 633.0, 639.0, 633.0, 636.0, 636.0, 633.0, 465.0, 630.0, 584.0, 570.0, 587.0, 627.0, 633.0, 630.0, 587.0, 636.0, 639.0, 462.0, 639.0, 633.0, 567.0, 582.0, 576.0, 582.0, 636.0, 633.0, 590.0, 630.0, 579.0, 627.0, 627.0, 630.0, 564.0, 630.0, 624.0, 636.0, 627.0, 582.0, 636.0, 636.0, 630.0, 573.0, 630.0, 630.0, 579.0, 633.0, 636.0, 630.0, 639.0, 627.0, 582.0, 639.0, 587.0, 630.0, 636.0, 584.0, 633.0, 630.0, 579.0, 618.0, 630.0, 627.0, 636.0, 587.0, 636.0, 630.0, 633.0, 573.0, 582.0, 579.0, 639.0, 636.0, 636.0, 633.0, 579.0, 630.0, 630.0, 630.0, 582.0, 627.0, 582.0, 630.0, 621.0, 630.0, 630.0, 633.0, 636.0, 627.0, 636.0, 584.0, 636.0, 633.0, 636.0, 630.0, 627.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 296.0, 286.0, 289.0, 293.0, 316.0, 317.0, 322.0, 317.0, 321.0, 312.0, 316.0, 320.0, 319.0, 317.0, 314.0, 319.0, 231.0, 234.0, 317.0, 313.0, 288.0, 296.0, 294.0, 276.0, 293.0, 294.0, 321.0, 306.0, 314.0, 319.0, 316.0, 314.0, 288.0, 299.0, 319.0, 317.0, 316.0, 323.0, 230.0, 232.0, 319.0, 320.0, 311.0, 322.0, 276.0, 291.0, 288.0, 294.0, 293.0, 283.0, 280.0, 302.0, 317.0, 319.0, 314.0, 319.0, 301.0, 289.0, 316.0, 314.0, 290.0, 289.0, 316.0, 311.0, 313.0, 314.0, 319.0, 311.0, 284.0, 280.0, 313.0, 317.0, 320.0, 304.0, 319.0, 317.0, 318.0, 309.0, 296.0, 286.0, 317.0, 319.0, 319.0, 317.0, 313.0, 317.0, 284.0, 289.0, 323.0, 307.0, 311.0, 319.0, 298.0, 281.0, 316.0, 317.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 313.0, 314.0, 286.0, 296.0, 322.0, 317.0, 283.0, 304.0, 316.0, 314.0, 314.0, 322.0, 295.0, 289.0, 319.0, 314.0, 314.0, 316.0, 288.0, 291.0, 299.0, 319.0, 316.0, 314.0, 311.0, 316.0, 313.0, 323.0, 293.0, 294.0, 316.0, 320.0, 311.0, 319.0, 317.0, 316.0, 289.0, 284.0, 293.0, 289.0, 287.0, 292.0, 317.0, 322.0, 314.0, 322.0, 316.0, 320.0, 316.0, 317.0, 297.0, 282.0, 317.0, 313.0, 314.0, 316.0, 324.0, 306.0, 286.0, 296.0, 318.0, 309.0, 297.0, 285.0, 311.0, 319.0, 319.0, 302.0, 316.0, 314.0, 316.0, 314.0, 316.0, 317.0, 316.0, 320.0, 319.0, 308.0, 319.0, 317.0, 290.0, 294.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 314.0, 316.0, 316.0, 311.0, 317.0, 316.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.803277493605749, "mean_processing_ms": 0.22911824330865546, "mean_inference_ms": 1.4058271454809743}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10272000, "num_steps_sampled": 5478400, "sample_time_ms": 22384.398, "load_time_ms": 36.404, "grad_time_ms": 10797.09, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016918530454859138, "policy_loss": -0.005912030581384897, "vf_loss": 81.57828521728516, "vf_explained_var": 0.7692078948020935, "kl": 0.0022940493654459715, "entropy": 1.1078943014144897, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5478400, "episodes_total": 13696, "training_iteration": 428, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-27-18", "timestamp": 1660260438, "time_this_iter_s": 31.51498508453369, "time_total_s": 18847.847497224808, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18847.847497224808, "timesteps_since_restore": 5478400, "iterations_since_restore": 428, "perf": {"cpu_util_percent": 32.73555555555556, "ram_util_percent": 59.61555555555556}}
+{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 610.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 305.475}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.35, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.98, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.44, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.73, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.91, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.99, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.44, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.44, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 639.0, 570.0, 639.0, 590.0, 579.0, 579.0, 639.0, 636.0, 584.0, 522.0, 582.0, 627.0, 633.0, 618.0, 627.0, 584.0, 633.0, 639.0, 576.0, 582.0, 636.0, 630.0, 522.0, 627.0, 636.0, 630.0, 630.0, 525.0, 630.0, 630.0, 633.0, 627.0, 636.0, 587.0, 636.0, 630.0, 633.0, 573.0, 582.0, 579.0, 639.0, 636.0, 636.0, 633.0, 579.0, 630.0, 630.0, 630.0, 582.0, 627.0, 582.0, 630.0, 621.0, 630.0, 630.0, 633.0, 636.0, 627.0, 636.0, 584.0, 636.0, 633.0, 636.0, 630.0, 627.0, 633.0, 636.0, 636.0, 582.0, 582.0, 633.0, 639.0, 633.0, 636.0, 636.0, 633.0, 465.0, 630.0, 584.0, 570.0, 587.0, 627.0, 633.0, 630.0, 587.0, 636.0, 639.0, 462.0, 639.0, 633.0, 567.0, 582.0, 576.0, 582.0, 636.0, 633.0, 590.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 320.0, 319.0, 320.0, 290.0, 280.0, 324.0, 315.0, 293.0, 297.0, 297.0, 282.0, 285.0, 294.0, 319.0, 320.0, 314.0, 322.0, 293.0, 291.0, 265.0, 257.0, 297.0, 285.0, 316.0, 311.0, 317.0, 316.0, 307.0, 311.0, 323.0, 304.0, 299.0, 285.0, 319.0, 314.0, 319.0, 320.0, 285.0, 291.0, 291.0, 291.0, 322.0, 314.0, 314.0, 316.0, 267.0, 255.0, 313.0, 314.0, 319.0, 317.0, 319.0, 311.0, 313.0, 317.0, 265.0, 260.0, 311.0, 319.0, 319.0, 311.0, 314.0, 319.0, 311.0, 316.0, 313.0, 323.0, 293.0, 294.0, 316.0, 320.0, 311.0, 319.0, 317.0, 316.0, 289.0, 284.0, 293.0, 289.0, 287.0, 292.0, 317.0, 322.0, 314.0, 322.0, 316.0, 320.0, 316.0, 317.0, 297.0, 282.0, 317.0, 313.0, 314.0, 316.0, 324.0, 306.0, 286.0, 296.0, 318.0, 309.0, 297.0, 285.0, 311.0, 319.0, 319.0, 302.0, 316.0, 314.0, 316.0, 314.0, 316.0, 317.0, 316.0, 320.0, 319.0, 308.0, 319.0, 317.0, 290.0, 294.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 314.0, 316.0, 316.0, 311.0, 317.0, 316.0, 319.0, 317.0, 319.0, 317.0, 296.0, 286.0, 289.0, 293.0, 316.0, 317.0, 322.0, 317.0, 321.0, 312.0, 316.0, 320.0, 319.0, 317.0, 314.0, 319.0, 231.0, 234.0, 317.0, 313.0, 288.0, 296.0, 294.0, 276.0, 293.0, 294.0, 321.0, 306.0, 314.0, 319.0, 316.0, 314.0, 288.0, 299.0, 319.0, 317.0, 316.0, 323.0, 230.0, 232.0, 319.0, 320.0, 311.0, 322.0, 276.0, 291.0, 288.0, 294.0, 293.0, 283.0, 280.0, 302.0, 317.0, 319.0, 314.0, 319.0, 301.0, 289.0, 316.0, 314.0, 290.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8024594853265918, "mean_processing_ms": 0.2289562714759904, "mean_inference_ms": 1.4049375994877125}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10296000, "num_steps_sampled": 5491200, "sample_time_ms": 22179.532, "load_time_ms": 37.018, "grad_time_ms": 10788.772, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0015958556905388832, "policy_loss": -0.010051627643406391, "vf_loss": 90.0771713256836, "vf_explained_var": 0.7485197186470032, "kl": 0.0020946140866726637, "entropy": 1.1038951873779297, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5491200, "episodes_total": 13728, "training_iteration": 429, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-27-50", "timestamp": 1660260470, "time_this_iter_s": 31.638920783996582, "time_total_s": 18879.486418008804, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18879.486418008804, "timesteps_since_restore": 5491200, "iterations_since_restore": 429, "perf": {"cpu_util_percent": 30.328888888888887, "ram_util_percent": 59.14666666666665}}
+{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 605.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 302.585}, "custom_metrics": {"sparse_reward_mean": 209.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 185.97, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.86, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.3, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.35, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.61, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.68, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 6.17, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.99, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.61, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.61, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 579.0, 630.0, 630.0, 351.0, 633.0, 636.0, 582.0, 582.0, 587.0, 576.0, 582.0, 633.0, 633.0, 633.0, 633.0, 544.0, 633.0, 579.0, 582.0, 639.0, 633.0, 636.0, 639.0, 558.0, 576.0, 636.0, 627.0, 576.0, 636.0, 587.0, 630.0, 630.0, 627.0, 633.0, 636.0, 636.0, 582.0, 582.0, 633.0, 639.0, 633.0, 636.0, 636.0, 633.0, 465.0, 630.0, 584.0, 570.0, 587.0, 627.0, 633.0, 630.0, 587.0, 636.0, 639.0, 462.0, 639.0, 633.0, 567.0, 582.0, 576.0, 582.0, 636.0, 633.0, 590.0, 630.0, 579.0, 636.0, 639.0, 570.0, 639.0, 590.0, 579.0, 579.0, 639.0, 636.0, 584.0, 522.0, 582.0, 627.0, 633.0, 618.0, 627.0, 584.0, 633.0, 639.0, 576.0, 582.0, 636.0, 630.0, 522.0, 627.0, 636.0, 630.0, 630.0, 525.0, 630.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 293.0, 286.0, 311.0, 319.0, 313.0, 317.0, 177.0, 174.0, 314.0, 319.0, 319.0, 317.0, 294.0, 288.0, 286.0, 296.0, 293.0, 294.0, 282.0, 294.0, 288.0, 294.0, 316.0, 317.0, 316.0, 317.0, 324.0, 309.0, 316.0, 317.0, 270.0, 274.0, 311.0, 322.0, 288.0, 291.0, 286.0, 296.0, 317.0, 322.0, 319.0, 314.0, 319.0, 317.0, 322.0, 317.0, 276.0, 282.0, 285.0, 291.0, 319.0, 317.0, 313.0, 314.0, 288.0, 288.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0, 314.0, 316.0, 316.0, 311.0, 317.0, 316.0, 319.0, 317.0, 319.0, 317.0, 296.0, 286.0, 289.0, 293.0, 316.0, 317.0, 322.0, 317.0, 321.0, 312.0, 316.0, 320.0, 319.0, 317.0, 314.0, 319.0, 231.0, 234.0, 317.0, 313.0, 288.0, 296.0, 294.0, 276.0, 293.0, 294.0, 321.0, 306.0, 314.0, 319.0, 316.0, 314.0, 288.0, 299.0, 319.0, 317.0, 316.0, 323.0, 230.0, 232.0, 319.0, 320.0, 311.0, 322.0, 276.0, 291.0, 288.0, 294.0, 293.0, 283.0, 280.0, 302.0, 317.0, 319.0, 314.0, 319.0, 301.0, 289.0, 316.0, 314.0, 290.0, 289.0, 316.0, 320.0, 319.0, 320.0, 290.0, 280.0, 324.0, 315.0, 293.0, 297.0, 297.0, 282.0, 285.0, 294.0, 319.0, 320.0, 314.0, 322.0, 293.0, 291.0, 265.0, 257.0, 297.0, 285.0, 316.0, 311.0, 317.0, 316.0, 307.0, 311.0, 323.0, 304.0, 299.0, 285.0, 319.0, 314.0, 319.0, 320.0, 285.0, 291.0, 291.0, 291.0, 322.0, 314.0, 314.0, 316.0, 267.0, 255.0, 313.0, 314.0, 319.0, 317.0, 319.0, 311.0, 313.0, 317.0, 265.0, 260.0, 311.0, 319.0, 319.0, 311.0, 314.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.801647414418135, "mean_processing_ms": 0.22879516876474576, "mean_inference_ms": 1.404093752736684}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10320000, "num_steps_sampled": 5504000, "sample_time_ms": 21723.978, "load_time_ms": 37.011, "grad_time_ms": 10801.836, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -4.737731069326401e-05, "policy_loss": -0.00790297333151102, "vf_loss": 84.07501220703125, "vf_explained_var": 0.7714950442314148, "kl": 0.0018305158009752631, "entropy": 1.1038156747817993, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5504000, "episodes_total": 13760, "training_iteration": 430, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-28-22", "timestamp": 1660260502, "time_this_iter_s": 32.1021990776062, "time_total_s": 18911.58861708641, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18911.58861708641, "timesteps_since_restore": 5504000, "iterations_since_restore": 430, "perf": {"cpu_util_percent": 34.26, "ram_util_percent": 59.419999999999995}}
+{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 607.03, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.515}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 186.23, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.97, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.95, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.4, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.25, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.54, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.77, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.44, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.54, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.54, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 624.0, 630.0, 630.0, 633.0, 587.0, 630.0, 633.0, 630.0, 639.0, 639.0, 630.0, 584.0, 564.0, 533.0, 636.0, 561.0, 630.0, 627.0, 522.0, 630.0, 636.0, 636.0, 624.0, 630.0, 587.0, 582.0, 636.0, 587.0, 587.0, 624.0, 633.0, 590.0, 630.0, 579.0, 636.0, 639.0, 570.0, 639.0, 590.0, 579.0, 579.0, 639.0, 636.0, 584.0, 522.0, 582.0, 627.0, 633.0, 618.0, 627.0, 584.0, 633.0, 639.0, 576.0, 582.0, 636.0, 630.0, 522.0, 627.0, 636.0, 630.0, 630.0, 525.0, 630.0, 630.0, 633.0, 630.0, 579.0, 630.0, 630.0, 351.0, 633.0, 636.0, 582.0, 582.0, 587.0, 576.0, 582.0, 633.0, 633.0, 633.0, 633.0, 544.0, 633.0, 579.0, 582.0, 639.0, 633.0, 636.0, 639.0, 558.0, 576.0, 636.0, 627.0, 576.0, 636.0, 587.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 314.0, 322.0, 306.0, 318.0, 319.0, 311.0, 321.0, 309.0, 319.0, 314.0, 296.0, 291.0, 319.0, 311.0, 314.0, 319.0, 314.0, 316.0, 319.0, 320.0, 325.0, 314.0, 316.0, 314.0, 293.0, 291.0, 288.0, 276.0, 259.0, 274.0, 314.0, 322.0, 279.0, 282.0, 316.0, 314.0, 311.0, 316.0, 257.0, 265.0, 317.0, 313.0, 314.0, 322.0, 317.0, 319.0, 316.0, 308.0, 311.0, 319.0, 288.0, 299.0, 288.0, 294.0, 324.0, 312.0, 296.0, 291.0, 286.0, 301.0, 313.0, 311.0, 314.0, 319.0, 301.0, 289.0, 316.0, 314.0, 290.0, 289.0, 316.0, 320.0, 319.0, 320.0, 290.0, 280.0, 324.0, 315.0, 293.0, 297.0, 297.0, 282.0, 285.0, 294.0, 319.0, 320.0, 314.0, 322.0, 293.0, 291.0, 265.0, 257.0, 297.0, 285.0, 316.0, 311.0, 317.0, 316.0, 307.0, 311.0, 323.0, 304.0, 299.0, 285.0, 319.0, 314.0, 319.0, 320.0, 285.0, 291.0, 291.0, 291.0, 322.0, 314.0, 314.0, 316.0, 267.0, 255.0, 313.0, 314.0, 319.0, 317.0, 319.0, 311.0, 313.0, 317.0, 265.0, 260.0, 311.0, 319.0, 319.0, 311.0, 314.0, 319.0, 311.0, 319.0, 293.0, 286.0, 311.0, 319.0, 313.0, 317.0, 177.0, 174.0, 314.0, 319.0, 319.0, 317.0, 294.0, 288.0, 286.0, 296.0, 293.0, 294.0, 282.0, 294.0, 288.0, 294.0, 316.0, 317.0, 316.0, 317.0, 324.0, 309.0, 316.0, 317.0, 270.0, 274.0, 311.0, 322.0, 288.0, 291.0, 286.0, 296.0, 317.0, 322.0, 319.0, 314.0, 319.0, 317.0, 322.0, 317.0, 276.0, 282.0, 285.0, 291.0, 319.0, 317.0, 313.0, 314.0, 288.0, 288.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8008314445541891, "mean_processing_ms": 0.2286322337318132, "mean_inference_ms": 1.4031615335106213}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10344000, "num_steps_sampled": 5516800, "sample_time_ms": 21234.37, "load_time_ms": 37.138, "grad_time_ms": 10446.294, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004487487021833658, "policy_loss": -0.002838247222825885, "vf_loss": 78.8043441772461, "vf_explained_var": 0.7659228444099426, "kl": 0.0018056267872452736, "entropy": 1.109397053718567, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5516800, "episodes_total": 13792, "training_iteration": 431, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-28-51", "timestamp": 1660260531, "time_this_iter_s": 29.235426902770996, "time_total_s": 18940.82404398918, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18940.82404398918, "timesteps_since_restore": 5516800, "iterations_since_restore": 431, "perf": {"cpu_util_percent": 33.543902439024386, "ram_util_percent": 59.482926829268294}}
+{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 606.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.24}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 186.08, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.91, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.95, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.27, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.53, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.75, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.45, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.41, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.53, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.53, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 633.0, 630.0, 636.0, 587.0, 579.0, 633.0, 630.0, 587.0, 630.0, 576.0, 582.0, 630.0, 627.0, 525.0, 582.0, 630.0, 582.0, 582.0, 582.0, 633.0, 624.0, 582.0, 636.0, 522.0, 633.0, 636.0, 636.0, 636.0, 630.0, 582.0, 630.0, 525.0, 630.0, 630.0, 633.0, 630.0, 579.0, 630.0, 630.0, 351.0, 633.0, 636.0, 582.0, 582.0, 587.0, 576.0, 582.0, 633.0, 633.0, 633.0, 633.0, 544.0, 633.0, 579.0, 582.0, 639.0, 633.0, 636.0, 639.0, 558.0, 576.0, 636.0, 627.0, 576.0, 636.0, 587.0, 630.0, 630.0, 636.0, 624.0, 630.0, 630.0, 633.0, 587.0, 630.0, 633.0, 630.0, 639.0, 639.0, 630.0, 584.0, 564.0, 533.0, 636.0, 561.0, 630.0, 627.0, 522.0, 630.0, 636.0, 636.0, 624.0, 630.0, 587.0, 582.0, 636.0, 587.0, 587.0, 624.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 289.0, 316.0, 317.0, 311.0, 319.0, 314.0, 322.0, 288.0, 299.0, 288.0, 291.0, 311.0, 322.0, 316.0, 314.0, 301.0, 286.0, 311.0, 319.0, 305.0, 271.0, 286.0, 296.0, 313.0, 317.0, 316.0, 311.0, 265.0, 260.0, 291.0, 291.0, 313.0, 317.0, 287.0, 295.0, 285.0, 297.0, 293.0, 289.0, 319.0, 314.0, 316.0, 308.0, 291.0, 291.0, 319.0, 317.0, 267.0, 255.0, 321.0, 312.0, 314.0, 322.0, 317.0, 319.0, 317.0, 319.0, 316.0, 314.0, 298.0, 284.0, 316.0, 314.0, 265.0, 260.0, 311.0, 319.0, 319.0, 311.0, 314.0, 319.0, 311.0, 319.0, 293.0, 286.0, 311.0, 319.0, 313.0, 317.0, 177.0, 174.0, 314.0, 319.0, 319.0, 317.0, 294.0, 288.0, 286.0, 296.0, 293.0, 294.0, 282.0, 294.0, 288.0, 294.0, 316.0, 317.0, 316.0, 317.0, 324.0, 309.0, 316.0, 317.0, 270.0, 274.0, 311.0, 322.0, 288.0, 291.0, 286.0, 296.0, 317.0, 322.0, 319.0, 314.0, 319.0, 317.0, 322.0, 317.0, 276.0, 282.0, 285.0, 291.0, 319.0, 317.0, 313.0, 314.0, 288.0, 288.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0, 311.0, 319.0, 314.0, 322.0, 306.0, 318.0, 319.0, 311.0, 321.0, 309.0, 319.0, 314.0, 296.0, 291.0, 319.0, 311.0, 314.0, 319.0, 314.0, 316.0, 319.0, 320.0, 325.0, 314.0, 316.0, 314.0, 293.0, 291.0, 288.0, 276.0, 259.0, 274.0, 314.0, 322.0, 279.0, 282.0, 316.0, 314.0, 311.0, 316.0, 257.0, 265.0, 317.0, 313.0, 314.0, 322.0, 317.0, 319.0, 316.0, 308.0, 311.0, 319.0, 288.0, 299.0, 288.0, 294.0, 324.0, 312.0, 296.0, 291.0, 286.0, 301.0, 313.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8000283085438272, "mean_processing_ms": 0.22847315169555785, "mean_inference_ms": 1.4024220813317556}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10368000, "num_steps_sampled": 5529600, "sample_time_ms": 21335.175, "load_time_ms": 37.375, "grad_time_ms": 10351.537, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001511982991360128, "policy_loss": -0.006039683241397142, "vf_loss": 81.0816650390625, "vf_explained_var": 0.766996443271637, "kl": 0.0019059469923377037, "entropy": 1.1129895448684692, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5529600, "episodes_total": 13824, "training_iteration": 432, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-29-27", "timestamp": 1660260567, "time_this_iter_s": 35.764232873916626, "time_total_s": 18976.588276863098, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18976.588276863098, "timesteps_since_restore": 5529600, "iterations_since_restore": 432, "perf": {"cpu_util_percent": 31.023529411764706, "ram_util_percent": 59.011764705882364}}
+{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 609.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 255.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.97}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.14, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.89, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.51, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.53, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.86, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.37, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.24, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.51, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.53, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.51, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.53, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 582.0, 579.0, 636.0, 579.0, 582.0, 576.0, 587.0, 627.0, 630.0, 587.0, 630.0, 636.0, 633.0, 636.0, 636.0, 624.0, 630.0, 582.0, 633.0, 630.0, 633.0, 582.0, 636.0, 582.0, 630.0, 630.0, 633.0, 636.0, 582.0, 582.0, 582.0, 576.0, 636.0, 587.0, 630.0, 630.0, 636.0, 624.0, 630.0, 630.0, 633.0, 587.0, 630.0, 633.0, 630.0, 639.0, 639.0, 630.0, 584.0, 564.0, 533.0, 636.0, 561.0, 630.0, 627.0, 522.0, 630.0, 636.0, 636.0, 624.0, 630.0, 587.0, 582.0, 636.0, 587.0, 587.0, 624.0, 579.0, 633.0, 630.0, 636.0, 587.0, 579.0, 633.0, 630.0, 587.0, 630.0, 576.0, 582.0, 630.0, 627.0, 525.0, 582.0, 630.0, 582.0, 582.0, 582.0, 633.0, 624.0, 582.0, 636.0, 522.0, 633.0, 636.0, 636.0, 636.0, 630.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 286.0, 296.0, 288.0, 291.0, 319.0, 317.0, 293.0, 286.0, 286.0, 296.0, 292.0, 284.0, 296.0, 291.0, 313.0, 314.0, 316.0, 314.0, 293.0, 294.0, 314.0, 316.0, 324.0, 312.0, 319.0, 314.0, 319.0, 317.0, 317.0, 319.0, 310.0, 314.0, 322.0, 308.0, 290.0, 292.0, 316.0, 317.0, 316.0, 314.0, 317.0, 316.0, 293.0, 289.0, 317.0, 319.0, 289.0, 293.0, 316.0, 314.0, 306.0, 324.0, 314.0, 319.0, 317.0, 319.0, 293.0, 289.0, 289.0, 293.0, 288.0, 294.0, 288.0, 288.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0, 311.0, 319.0, 314.0, 322.0, 306.0, 318.0, 319.0, 311.0, 321.0, 309.0, 319.0, 314.0, 296.0, 291.0, 319.0, 311.0, 314.0, 319.0, 314.0, 316.0, 319.0, 320.0, 325.0, 314.0, 316.0, 314.0, 293.0, 291.0, 288.0, 276.0, 259.0, 274.0, 314.0, 322.0, 279.0, 282.0, 316.0, 314.0, 311.0, 316.0, 257.0, 265.0, 317.0, 313.0, 314.0, 322.0, 317.0, 319.0, 316.0, 308.0, 311.0, 319.0, 288.0, 299.0, 288.0, 294.0, 324.0, 312.0, 296.0, 291.0, 286.0, 301.0, 313.0, 311.0, 290.0, 289.0, 316.0, 317.0, 311.0, 319.0, 314.0, 322.0, 288.0, 299.0, 288.0, 291.0, 311.0, 322.0, 316.0, 314.0, 301.0, 286.0, 311.0, 319.0, 305.0, 271.0, 286.0, 296.0, 313.0, 317.0, 316.0, 311.0, 265.0, 260.0, 291.0, 291.0, 313.0, 317.0, 287.0, 295.0, 285.0, 297.0, 293.0, 289.0, 319.0, 314.0, 316.0, 308.0, 291.0, 291.0, 319.0, 317.0, 267.0, 255.0, 321.0, 312.0, 314.0, 322.0, 317.0, 319.0, 317.0, 319.0, 316.0, 314.0, 298.0, 284.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7992234752910505, "mean_processing_ms": 0.2283129313200136, "mean_inference_ms": 1.4016145546737357}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10392000, "num_steps_sampled": 5542400, "sample_time_ms": 21105.654, "load_time_ms": 37.195, "grad_time_ms": 10237.421, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00017731667321640998, "policy_loss": -0.007162818219512701, "vf_loss": 79.00240325927734, "vf_explained_var": 0.7636518478393555, "kl": 0.0019576705526560545, "entropy": 1.1202179193496704, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5542400, "episodes_total": 13856, "training_iteration": 433, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-29-57", "timestamp": 1660260597, "time_this_iter_s": 30.04263925552368, "time_total_s": 19006.630916118622, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19006.630916118622, "timesteps_since_restore": 5542400, "iterations_since_restore": 433, "perf": {"cpu_util_percent": 28.414285714285718, "ram_util_percent": 59.08571428571428}}
+{"episode_reward_max": 636.0, "episode_reward_min": 522.0, "episode_reward_mean": 608.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 255.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 304.035}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.27, "shaped_reward_min": 162, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.87, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.01, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.38, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.62, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.75, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.1, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.38, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.62, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.38, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.62, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 636.0, 582.0, 582.0, 633.0, 636.0, 582.0, 630.0, 630.0, 636.0, 587.0, 633.0, 633.0, 636.0, 636.0, 570.0, 576.0, 624.0, 587.0, 636.0, 582.0, 587.0, 582.0, 576.0, 579.0, 582.0, 630.0, 627.0, 633.0, 590.0, 636.0, 539.0, 636.0, 587.0, 587.0, 624.0, 579.0, 633.0, 630.0, 636.0, 587.0, 579.0, 633.0, 630.0, 587.0, 630.0, 576.0, 582.0, 630.0, 627.0, 525.0, 582.0, 630.0, 582.0, 582.0, 582.0, 633.0, 624.0, 582.0, 636.0, 522.0, 633.0, 636.0, 636.0, 636.0, 630.0, 582.0, 630.0, 633.0, 582.0, 579.0, 636.0, 579.0, 582.0, 576.0, 587.0, 627.0, 630.0, 587.0, 630.0, 636.0, 633.0, 636.0, 636.0, 624.0, 630.0, 582.0, 633.0, 630.0, 633.0, 582.0, 636.0, 582.0, 630.0, 630.0, 633.0, 636.0, 582.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 291.0, 322.0, 314.0, 293.0, 289.0, 288.0, 294.0, 319.0, 314.0, 317.0, 319.0, 294.0, 288.0, 313.0, 317.0, 316.0, 314.0, 319.0, 317.0, 293.0, 294.0, 319.0, 314.0, 319.0, 314.0, 319.0, 317.0, 317.0, 319.0, 277.0, 293.0, 295.0, 281.0, 308.0, 316.0, 296.0, 291.0, 319.0, 317.0, 296.0, 286.0, 291.0, 296.0, 288.0, 294.0, 290.0, 286.0, 293.0, 286.0, 296.0, 286.0, 316.0, 314.0, 313.0, 314.0, 319.0, 314.0, 299.0, 291.0, 319.0, 317.0, 268.0, 271.0, 324.0, 312.0, 296.0, 291.0, 286.0, 301.0, 313.0, 311.0, 290.0, 289.0, 316.0, 317.0, 311.0, 319.0, 314.0, 322.0, 288.0, 299.0, 288.0, 291.0, 311.0, 322.0, 316.0, 314.0, 301.0, 286.0, 311.0, 319.0, 305.0, 271.0, 286.0, 296.0, 313.0, 317.0, 316.0, 311.0, 265.0, 260.0, 291.0, 291.0, 313.0, 317.0, 287.0, 295.0, 285.0, 297.0, 293.0, 289.0, 319.0, 314.0, 316.0, 308.0, 291.0, 291.0, 319.0, 317.0, 267.0, 255.0, 321.0, 312.0, 314.0, 322.0, 317.0, 319.0, 317.0, 319.0, 316.0, 314.0, 298.0, 284.0, 316.0, 314.0, 314.0, 319.0, 286.0, 296.0, 288.0, 291.0, 319.0, 317.0, 293.0, 286.0, 286.0, 296.0, 292.0, 284.0, 296.0, 291.0, 313.0, 314.0, 316.0, 314.0, 293.0, 294.0, 314.0, 316.0, 324.0, 312.0, 319.0, 314.0, 319.0, 317.0, 317.0, 319.0, 310.0, 314.0, 322.0, 308.0, 290.0, 292.0, 316.0, 317.0, 316.0, 314.0, 317.0, 316.0, 293.0, 289.0, 317.0, 319.0, 289.0, 293.0, 316.0, 314.0, 306.0, 324.0, 314.0, 319.0, 317.0, 319.0, 293.0, 289.0, 289.0, 293.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7984267196562377, "mean_processing_ms": 0.2281543074598729, "mean_inference_ms": 1.4008634634421164}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10416000, "num_steps_sampled": 5555200, "sample_time_ms": 20963.356, "load_time_ms": 37.144, "grad_time_ms": 9878.857, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0020503299310803413, "policy_loss": -0.0053678578697144985, "vf_loss": 79.79612731933594, "vf_explained_var": 0.7709012627601624, "kl": 0.0022744529414922, "entropy": 1.1228529214859009, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5555200, "episodes_total": 13888, "training_iteration": 434, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-30-26", "timestamp": 1660260626, "time_this_iter_s": 28.891623735427856, "time_total_s": 19035.52253985405, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19035.52253985405, "timesteps_since_restore": 5555200, "iterations_since_restore": 434, "perf": {"cpu_util_percent": 35.10975609756097, "ram_util_percent": 59.18292682926829}}
+{"episode_reward_max": 639.0, "episode_reward_min": 539.0, "episode_reward_mean": 609.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 268.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 304.505}, "custom_metrics": {"sparse_reward_mean": 210.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.81, "shaped_reward_min": 170, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.86, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.97, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.51, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.39, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.58, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.62, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.16, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.87, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.39, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.58, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.39, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.58, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 630.0, 587.0, 582.0, 639.0, 587.0, 584.0, 584.0, 633.0, 633.0, 582.0, 587.0, 630.0, 633.0, 633.0, 582.0, 582.0, 630.0, 630.0, 630.0, 633.0, 590.0, 633.0, 630.0, 630.0, 587.0, 630.0, 584.0, 587.0, 576.0, 582.0, 636.0, 630.0, 582.0, 630.0, 633.0, 582.0, 579.0, 636.0, 579.0, 582.0, 576.0, 587.0, 627.0, 630.0, 587.0, 630.0, 636.0, 633.0, 636.0, 636.0, 624.0, 630.0, 582.0, 633.0, 630.0, 633.0, 582.0, 636.0, 582.0, 630.0, 630.0, 633.0, 636.0, 582.0, 582.0, 582.0, 587.0, 636.0, 582.0, 582.0, 633.0, 636.0, 582.0, 630.0, 630.0, 636.0, 587.0, 633.0, 633.0, 636.0, 636.0, 570.0, 576.0, 624.0, 587.0, 636.0, 582.0, 587.0, 582.0, 576.0, 579.0, 582.0, 630.0, 627.0, 633.0, 590.0, 636.0, 539.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 311.0, 319.0, 321.0, 309.0, 291.0, 296.0, 301.0, 281.0, 319.0, 320.0, 293.0, 294.0, 286.0, 298.0, 288.0, 296.0, 314.0, 319.0, 319.0, 314.0, 293.0, 289.0, 293.0, 294.0, 314.0, 316.0, 313.0, 320.0, 316.0, 317.0, 291.0, 291.0, 288.0, 294.0, 316.0, 314.0, 316.0, 314.0, 319.0, 311.0, 324.0, 309.0, 296.0, 294.0, 314.0, 319.0, 316.0, 314.0, 311.0, 319.0, 301.0, 286.0, 319.0, 311.0, 293.0, 291.0, 299.0, 288.0, 274.0, 302.0, 286.0, 296.0, 317.0, 319.0, 316.0, 314.0, 298.0, 284.0, 316.0, 314.0, 314.0, 319.0, 286.0, 296.0, 288.0, 291.0, 319.0, 317.0, 293.0, 286.0, 286.0, 296.0, 292.0, 284.0, 296.0, 291.0, 313.0, 314.0, 316.0, 314.0, 293.0, 294.0, 314.0, 316.0, 324.0, 312.0, 319.0, 314.0, 319.0, 317.0, 317.0, 319.0, 310.0, 314.0, 322.0, 308.0, 290.0, 292.0, 316.0, 317.0, 316.0, 314.0, 317.0, 316.0, 293.0, 289.0, 317.0, 319.0, 289.0, 293.0, 316.0, 314.0, 306.0, 324.0, 314.0, 319.0, 317.0, 319.0, 293.0, 289.0, 289.0, 293.0, 288.0, 294.0, 296.0, 291.0, 322.0, 314.0, 293.0, 289.0, 288.0, 294.0, 319.0, 314.0, 317.0, 319.0, 294.0, 288.0, 313.0, 317.0, 316.0, 314.0, 319.0, 317.0, 293.0, 294.0, 319.0, 314.0, 319.0, 314.0, 319.0, 317.0, 317.0, 319.0, 277.0, 293.0, 295.0, 281.0, 308.0, 316.0, 296.0, 291.0, 319.0, 317.0, 296.0, 286.0, 291.0, 296.0, 288.0, 294.0, 290.0, 286.0, 293.0, 286.0, 296.0, 286.0, 316.0, 314.0, 313.0, 314.0, 319.0, 314.0, 299.0, 291.0, 319.0, 317.0, 268.0, 271.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7976245186877899, "mean_processing_ms": 0.22799297194787038, "mean_inference_ms": 1.3999329365484723}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10440000, "num_steps_sampled": 5568000, "sample_time_ms": 21006.175, "load_time_ms": 37.426, "grad_time_ms": 9977.404, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013511897996068, "policy_loss": -0.006047597620636225, "vf_loss": 79.5729751586914, "vf_explained_var": 0.7702791094779968, "kl": 0.001890461309812963, "entropy": 1.1170209646224976, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5568000, "episodes_total": 13920, "training_iteration": 435, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-30-58", "timestamp": 1660260658, "time_this_iter_s": 31.331193923950195, "time_total_s": 19066.853733778, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19066.853733778, "timesteps_since_restore": 5568000, "iterations_since_restore": 435, "perf": {"cpu_util_percent": 33.626666666666665, "ram_util_percent": 58.966666666666676}}
+{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 605.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 250.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 302.93}, "custom_metrics": {"sparse_reward_mean": 209.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.46, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.81, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.0, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.36, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.42, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.31, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.61, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.51, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.86, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.74, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.31, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.61, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.31, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.61, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 579.0, 630.0, 582.0, 630.0, 587.0, 630.0, 522.0, 636.0, 590.0, 633.0, 582.0, 636.0, 636.0, 639.0, 582.0, 582.0, 633.0, 579.0, 579.0, 536.0, 582.0, 636.0, 630.0, 636.0, 587.0, 587.0, 639.0, 630.0, 633.0, 582.0, 582.0, 636.0, 582.0, 582.0, 582.0, 587.0, 636.0, 582.0, 582.0, 633.0, 636.0, 582.0, 630.0, 630.0, 636.0, 587.0, 633.0, 633.0, 636.0, 636.0, 570.0, 576.0, 624.0, 587.0, 636.0, 582.0, 587.0, 582.0, 576.0, 579.0, 582.0, 630.0, 627.0, 633.0, 590.0, 636.0, 539.0, 582.0, 630.0, 630.0, 587.0, 582.0, 639.0, 587.0, 584.0, 584.0, 633.0, 633.0, 582.0, 587.0, 630.0, 633.0, 633.0, 582.0, 582.0, 630.0, 630.0, 630.0, 633.0, 590.0, 633.0, 630.0, 630.0, 587.0, 630.0, 584.0, 587.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 316.0, 296.0, 283.0, 316.0, 314.0, 293.0, 289.0, 316.0, 314.0, 301.0, 286.0, 314.0, 316.0, 272.0, 250.0, 319.0, 317.0, 296.0, 294.0, 311.0, 322.0, 299.0, 283.0, 319.0, 317.0, 322.0, 314.0, 319.0, 320.0, 294.0, 288.0, 289.0, 293.0, 321.0, 312.0, 286.0, 293.0, 288.0, 291.0, 267.0, 269.0, 293.0, 289.0, 319.0, 317.0, 311.0, 319.0, 319.0, 317.0, 285.0, 302.0, 288.0, 299.0, 321.0, 318.0, 316.0, 314.0, 314.0, 319.0, 294.0, 288.0, 283.0, 299.0, 317.0, 319.0, 293.0, 289.0, 289.0, 293.0, 288.0, 294.0, 296.0, 291.0, 322.0, 314.0, 293.0, 289.0, 288.0, 294.0, 319.0, 314.0, 317.0, 319.0, 294.0, 288.0, 313.0, 317.0, 316.0, 314.0, 319.0, 317.0, 293.0, 294.0, 319.0, 314.0, 319.0, 314.0, 319.0, 317.0, 317.0, 319.0, 277.0, 293.0, 295.0, 281.0, 308.0, 316.0, 296.0, 291.0, 319.0, 317.0, 296.0, 286.0, 291.0, 296.0, 288.0, 294.0, 290.0, 286.0, 293.0, 286.0, 296.0, 286.0, 316.0, 314.0, 313.0, 314.0, 319.0, 314.0, 299.0, 291.0, 319.0, 317.0, 268.0, 271.0, 288.0, 294.0, 311.0, 319.0, 321.0, 309.0, 291.0, 296.0, 301.0, 281.0, 319.0, 320.0, 293.0, 294.0, 286.0, 298.0, 288.0, 296.0, 314.0, 319.0, 319.0, 314.0, 293.0, 289.0, 293.0, 294.0, 314.0, 316.0, 313.0, 320.0, 316.0, 317.0, 291.0, 291.0, 288.0, 294.0, 316.0, 314.0, 316.0, 314.0, 319.0, 311.0, 324.0, 309.0, 296.0, 294.0, 314.0, 319.0, 316.0, 314.0, 311.0, 319.0, 301.0, 286.0, 319.0, 311.0, 293.0, 291.0, 299.0, 288.0, 274.0, 302.0, 286.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7968373127766308, "mean_processing_ms": 0.22783629500024907, "mean_inference_ms": 1.3993492052434942}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10464000, "num_steps_sampled": 5580800, "sample_time_ms": 21691.27, "load_time_ms": 37.308, "grad_time_ms": 10128.337, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0022821128368377686, "policy_loss": -0.005102970637381077, "vf_loss": 79.3483657836914, "vf_explained_var": 0.7713219523429871, "kl": 0.0023417342454195023, "entropy": 1.0995064973831177, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5580800, "episodes_total": 13952, "training_iteration": 436, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-31-35", "timestamp": 1660260695, "time_this_iter_s": 37.88511109352112, "time_total_s": 19104.73884487152, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19104.73884487152, "timesteps_since_restore": 5580800, "iterations_since_restore": 436, "perf": {"cpu_util_percent": 29.92452830188679, "ram_util_percent": 59.107547169811326}}
+{"episode_reward_max": 639.0, "episode_reward_min": 311.0, "episode_reward_mean": 607.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 151.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.775}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 187.95, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.21, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.55, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.66, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.75, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.84, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 639.0, 627.0, 639.0, 633.0, 630.0, 636.0, 587.0, 587.0, 639.0, 630.0, 633.0, 630.0, 582.0, 573.0, 639.0, 587.0, 636.0, 633.0, 630.0, 581.0, 633.0, 582.0, 633.0, 311.0, 636.0, 630.0, 633.0, 636.0, 584.0, 636.0, 636.0, 633.0, 590.0, 636.0, 539.0, 582.0, 630.0, 630.0, 587.0, 582.0, 639.0, 587.0, 584.0, 584.0, 633.0, 633.0, 582.0, 587.0, 630.0, 633.0, 633.0, 582.0, 582.0, 630.0, 630.0, 630.0, 633.0, 590.0, 633.0, 630.0, 630.0, 587.0, 630.0, 584.0, 587.0, 576.0, 582.0, 630.0, 579.0, 630.0, 582.0, 630.0, 587.0, 630.0, 522.0, 636.0, 590.0, 633.0, 582.0, 636.0, 636.0, 639.0, 582.0, 582.0, 633.0, 579.0, 579.0, 536.0, 582.0, 636.0, 630.0, 636.0, 587.0, 587.0, 639.0, 630.0, 633.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 311.0, 322.0, 317.0, 313.0, 314.0, 325.0, 314.0, 314.0, 319.0, 321.0, 309.0, 316.0, 320.0, 288.0, 299.0, 301.0, 286.0, 317.0, 322.0, 319.0, 311.0, 316.0, 317.0, 316.0, 314.0, 279.0, 303.0, 289.0, 284.0, 319.0, 320.0, 291.0, 296.0, 321.0, 315.0, 316.0, 317.0, 316.0, 314.0, 293.0, 288.0, 316.0, 317.0, 293.0, 289.0, 316.0, 317.0, 151.0, 160.0, 319.0, 317.0, 316.0, 314.0, 319.0, 314.0, 319.0, 317.0, 291.0, 293.0, 311.0, 325.0, 319.0, 317.0, 319.0, 314.0, 299.0, 291.0, 319.0, 317.0, 268.0, 271.0, 288.0, 294.0, 311.0, 319.0, 321.0, 309.0, 291.0, 296.0, 301.0, 281.0, 319.0, 320.0, 293.0, 294.0, 286.0, 298.0, 288.0, 296.0, 314.0, 319.0, 319.0, 314.0, 293.0, 289.0, 293.0, 294.0, 314.0, 316.0, 313.0, 320.0, 316.0, 317.0, 291.0, 291.0, 288.0, 294.0, 316.0, 314.0, 316.0, 314.0, 319.0, 311.0, 324.0, 309.0, 296.0, 294.0, 314.0, 319.0, 316.0, 314.0, 311.0, 319.0, 301.0, 286.0, 319.0, 311.0, 293.0, 291.0, 299.0, 288.0, 274.0, 302.0, 286.0, 296.0, 314.0, 316.0, 296.0, 283.0, 316.0, 314.0, 293.0, 289.0, 316.0, 314.0, 301.0, 286.0, 314.0, 316.0, 272.0, 250.0, 319.0, 317.0, 296.0, 294.0, 311.0, 322.0, 299.0, 283.0, 319.0, 317.0, 322.0, 314.0, 319.0, 320.0, 294.0, 288.0, 289.0, 293.0, 321.0, 312.0, 286.0, 293.0, 288.0, 291.0, 267.0, 269.0, 293.0, 289.0, 319.0, 317.0, 311.0, 319.0, 319.0, 317.0, 285.0, 302.0, 288.0, 299.0, 321.0, 318.0, 316.0, 314.0, 314.0, 319.0, 294.0, 288.0, 283.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 0.796054313311972, "mean_processing_ms": 0.22768002877826965, "mean_inference_ms": 1.3988094341608432}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10488000, "num_steps_sampled": 5593600, "sample_time_ms": 21763.453, "load_time_ms": 37.236, "grad_time_ms": 10078.856, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00570017471909523, "policy_loss": -0.002029874362051487, "vf_loss": 82.7793960571289, "vf_explained_var": 0.7683680653572083, "kl": 0.002793658524751663, "entropy": 1.095770001411438, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5593600, "episodes_total": 13984, "training_iteration": 437, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-32-06", "timestamp": 1660260726, "time_this_iter_s": 30.656537771224976, "time_total_s": 19135.395382642746, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19135.395382642746, "timesteps_since_restore": 5593600, "iterations_since_restore": 437, "perf": {"cpu_util_percent": 32.4046511627907, "ram_util_percent": 59.06744186046511}}
+{"episode_reward_max": 639.0, "episode_reward_min": 311.0, "episode_reward_mean": 609.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 151.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.745}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 187.89, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.41, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.23, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.77, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.1, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.04, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.75, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.94, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.1, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.04, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.1, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.04, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 639.0, 630.0, 636.0, 633.0, 636.0, 587.0, 639.0, 579.0, 636.0, 630.0, 627.0, 633.0, 639.0, 636.0, 564.0, 630.0, 630.0, 636.0, 636.0, 636.0, 639.0, 636.0, 633.0, 630.0, 633.0, 579.0, 627.0, 408.0, 579.0, 630.0, 582.0, 584.0, 587.0, 576.0, 582.0, 630.0, 579.0, 630.0, 582.0, 630.0, 587.0, 630.0, 522.0, 636.0, 590.0, 633.0, 582.0, 636.0, 636.0, 639.0, 582.0, 582.0, 633.0, 579.0, 579.0, 536.0, 582.0, 636.0, 630.0, 636.0, 587.0, 587.0, 639.0, 630.0, 633.0, 582.0, 582.0, 627.0, 639.0, 627.0, 639.0, 633.0, 630.0, 636.0, 587.0, 587.0, 639.0, 630.0, 633.0, 630.0, 582.0, 573.0, 639.0, 587.0, 636.0, 633.0, 630.0, 581.0, 633.0, 582.0, 633.0, 311.0, 636.0, 630.0, 633.0, 636.0, 584.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [309.0, 318.0, 319.0, 320.0, 316.0, 314.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 285.0, 302.0, 319.0, 320.0, 285.0, 294.0, 319.0, 317.0, 311.0, 319.0, 313.0, 314.0, 313.0, 320.0, 319.0, 320.0, 324.0, 312.0, 274.0, 290.0, 315.0, 315.0, 316.0, 314.0, 319.0, 317.0, 319.0, 317.0, 319.0, 317.0, 319.0, 320.0, 319.0, 317.0, 314.0, 319.0, 316.0, 314.0, 319.0, 314.0, 285.0, 294.0, 316.0, 311.0, 210.0, 198.0, 283.0, 296.0, 316.0, 314.0, 290.0, 292.0, 293.0, 291.0, 299.0, 288.0, 274.0, 302.0, 286.0, 296.0, 314.0, 316.0, 296.0, 283.0, 316.0, 314.0, 293.0, 289.0, 316.0, 314.0, 301.0, 286.0, 314.0, 316.0, 272.0, 250.0, 319.0, 317.0, 296.0, 294.0, 311.0, 322.0, 299.0, 283.0, 319.0, 317.0, 322.0, 314.0, 319.0, 320.0, 294.0, 288.0, 289.0, 293.0, 321.0, 312.0, 286.0, 293.0, 288.0, 291.0, 267.0, 269.0, 293.0, 289.0, 319.0, 317.0, 311.0, 319.0, 319.0, 317.0, 285.0, 302.0, 288.0, 299.0, 321.0, 318.0, 316.0, 314.0, 314.0, 319.0, 294.0, 288.0, 283.0, 299.0, 316.0, 311.0, 322.0, 317.0, 313.0, 314.0, 325.0, 314.0, 314.0, 319.0, 321.0, 309.0, 316.0, 320.0, 288.0, 299.0, 301.0, 286.0, 317.0, 322.0, 319.0, 311.0, 316.0, 317.0, 316.0, 314.0, 279.0, 303.0, 289.0, 284.0, 319.0, 320.0, 291.0, 296.0, 321.0, 315.0, 316.0, 317.0, 316.0, 314.0, 293.0, 288.0, 316.0, 317.0, 293.0, 289.0, 316.0, 317.0, 151.0, 160.0, 319.0, 317.0, 316.0, 314.0, 319.0, 314.0, 319.0, 317.0, 291.0, 293.0, 311.0, 325.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7952795152199544, "mean_processing_ms": 0.22752629484006134, "mean_inference_ms": 1.3983582965229155}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10512000, "num_steps_sampled": 5606400, "sample_time_ms": 21892.052, "load_time_ms": 37.192, "grad_time_ms": 10267.0, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008096967940218747, "policy_loss": -0.008205131627619267, "vf_loss": 79.47277069091797, "vf_explained_var": 0.777022659778595, "kl": 0.002324033295735717, "entropy": 1.1036995649337769, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5606400, "episodes_total": 14016, "training_iteration": 438, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-32-41", "timestamp": 1660260761, "time_this_iter_s": 34.68048119544983, "time_total_s": 19170.075863838196, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19170.075863838196, "timesteps_since_restore": 5606400, "iterations_since_restore": 438, "perf": {"cpu_util_percent": 33.92857142857143, "ram_util_percent": 59.06734693877551}}
+{"episode_reward_max": 639.0, "episode_reward_min": 311.0, "episode_reward_mean": 616.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 151.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 308.265}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 189.33, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.54, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.7, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 18.11, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.12, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.34, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.91, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.89, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.77, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.74, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.12, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.34, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.12, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.34, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 639.0, 636.0, 630.0, 579.0, 587.0, 627.0, 636.0, 633.0, 633.0, 636.0, 636.0, 636.0, 630.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 639.0, 630.0, 627.0, 636.0, 579.0, 636.0, 590.0, 636.0, 570.0, 636.0, 579.0, 639.0, 630.0, 633.0, 582.0, 582.0, 627.0, 639.0, 627.0, 639.0, 633.0, 630.0, 636.0, 587.0, 587.0, 639.0, 630.0, 633.0, 630.0, 582.0, 573.0, 639.0, 587.0, 636.0, 633.0, 630.0, 581.0, 633.0, 582.0, 633.0, 311.0, 636.0, 630.0, 633.0, 636.0, 584.0, 636.0, 636.0, 627.0, 639.0, 630.0, 636.0, 633.0, 636.0, 587.0, 639.0, 579.0, 636.0, 630.0, 627.0, 633.0, 639.0, 636.0, 564.0, 630.0, 630.0, 636.0, 636.0, 636.0, 639.0, 636.0, 633.0, 630.0, 633.0, 579.0, 627.0, 408.0, 579.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 311.0, 322.0, 317.0, 319.0, 317.0, 318.0, 312.0, 277.0, 302.0, 291.0, 296.0, 313.0, 314.0, 319.0, 317.0, 315.0, 318.0, 314.0, 319.0, 316.0, 320.0, 324.0, 312.0, 319.0, 317.0, 321.0, 309.0, 316.0, 314.0, 311.0, 319.0, 316.0, 317.0, 319.0, 317.0, 324.0, 312.0, 316.0, 320.0, 319.0, 320.0, 319.0, 311.0, 310.0, 317.0, 319.0, 317.0, 293.0, 286.0, 322.0, 314.0, 296.0, 294.0, 314.0, 322.0, 290.0, 280.0, 311.0, 325.0, 288.0, 291.0, 319.0, 320.0, 316.0, 314.0, 314.0, 319.0, 294.0, 288.0, 283.0, 299.0, 316.0, 311.0, 322.0, 317.0, 313.0, 314.0, 325.0, 314.0, 314.0, 319.0, 321.0, 309.0, 316.0, 320.0, 288.0, 299.0, 301.0, 286.0, 317.0, 322.0, 319.0, 311.0, 316.0, 317.0, 316.0, 314.0, 279.0, 303.0, 289.0, 284.0, 319.0, 320.0, 291.0, 296.0, 321.0, 315.0, 316.0, 317.0, 316.0, 314.0, 293.0, 288.0, 316.0, 317.0, 293.0, 289.0, 316.0, 317.0, 151.0, 160.0, 319.0, 317.0, 316.0, 314.0, 319.0, 314.0, 319.0, 317.0, 291.0, 293.0, 311.0, 325.0, 319.0, 317.0, 309.0, 318.0, 319.0, 320.0, 316.0, 314.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 285.0, 302.0, 319.0, 320.0, 285.0, 294.0, 319.0, 317.0, 311.0, 319.0, 313.0, 314.0, 313.0, 320.0, 319.0, 320.0, 324.0, 312.0, 274.0, 290.0, 315.0, 315.0, 316.0, 314.0, 319.0, 317.0, 319.0, 317.0, 319.0, 317.0, 319.0, 320.0, 319.0, 317.0, 314.0, 319.0, 316.0, 314.0, 319.0, 314.0, 285.0, 294.0, 316.0, 311.0, 210.0, 198.0, 283.0, 296.0, 316.0, 314.0, 290.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7945004798552335, "mean_processing_ms": 0.22737090000954274, "mean_inference_ms": 1.3975837411593142}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10536000, "num_steps_sampled": 5619200, "sample_time_ms": 21744.649, "load_time_ms": 36.586, "grad_time_ms": 10113.951, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006775472429580986, "policy_loss": -0.006301699206233025, "vf_loss": 75.32054138183594, "vf_explained_var": 0.7757834792137146, "kl": 0.0017814143793657422, "entropy": 1.1056231260299683, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5619200, "episodes_total": 14048, "training_iteration": 439, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-33-09", "timestamp": 1660260789, "time_this_iter_s": 28.622015953063965, "time_total_s": 19198.69787979126, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19198.69787979126, "timesteps_since_restore": 5619200, "iterations_since_restore": 439, "perf": {"cpu_util_percent": 31.939024390243897, "ram_util_percent": 59.1219512195122}}
+{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 620.15, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 310.075}, "custom_metrics": {"sparse_reward_mean": 215.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 190.15, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.59, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.78, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.36, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 18.29, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.46, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.77, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.98, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.74, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.74, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.72, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.46, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.46, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 633.0, 627.0, 582.0, 630.0, 636.0, 636.0, 630.0, 639.0, 627.0, 579.0, 624.0, 636.0, 630.0, 582.0, 636.0, 639.0, 633.0, 633.0, 639.0, 576.0, 636.0, 573.0, 630.0, 587.0, 633.0, 630.0, 636.0, 639.0, 633.0, 630.0, 587.0, 636.0, 584.0, 636.0, 636.0, 627.0, 639.0, 630.0, 636.0, 633.0, 636.0, 587.0, 639.0, 579.0, 636.0, 630.0, 627.0, 633.0, 639.0, 636.0, 564.0, 630.0, 630.0, 636.0, 636.0, 636.0, 639.0, 636.0, 633.0, 630.0, 633.0, 579.0, 627.0, 408.0, 579.0, 630.0, 582.0, 627.0, 639.0, 636.0, 630.0, 579.0, 587.0, 627.0, 636.0, 633.0, 633.0, 636.0, 636.0, 636.0, 630.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 639.0, 630.0, 627.0, 636.0, 579.0, 636.0, 590.0, 636.0, 570.0, 636.0, 579.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 294.0, 319.0, 314.0, 311.0, 316.0, 288.0, 294.0, 314.0, 316.0, 319.0, 317.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 306.0, 321.0, 293.0, 286.0, 307.0, 317.0, 319.0, 317.0, 311.0, 319.0, 293.0, 289.0, 321.0, 315.0, 322.0, 317.0, 317.0, 316.0, 311.0, 322.0, 322.0, 317.0, 293.0, 283.0, 319.0, 317.0, 279.0, 294.0, 308.0, 322.0, 285.0, 302.0, 316.0, 317.0, 305.0, 325.0, 319.0, 317.0, 319.0, 320.0, 319.0, 314.0, 318.0, 312.0, 288.0, 299.0, 319.0, 317.0, 291.0, 293.0, 311.0, 325.0, 319.0, 317.0, 309.0, 318.0, 319.0, 320.0, 316.0, 314.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 285.0, 302.0, 319.0, 320.0, 285.0, 294.0, 319.0, 317.0, 311.0, 319.0, 313.0, 314.0, 313.0, 320.0, 319.0, 320.0, 324.0, 312.0, 274.0, 290.0, 315.0, 315.0, 316.0, 314.0, 319.0, 317.0, 319.0, 317.0, 319.0, 317.0, 319.0, 320.0, 319.0, 317.0, 314.0, 319.0, 316.0, 314.0, 319.0, 314.0, 285.0, 294.0, 316.0, 311.0, 210.0, 198.0, 283.0, 296.0, 316.0, 314.0, 290.0, 292.0, 316.0, 311.0, 322.0, 317.0, 319.0, 317.0, 318.0, 312.0, 277.0, 302.0, 291.0, 296.0, 313.0, 314.0, 319.0, 317.0, 315.0, 318.0, 314.0, 319.0, 316.0, 320.0, 324.0, 312.0, 319.0, 317.0, 321.0, 309.0, 316.0, 314.0, 311.0, 319.0, 316.0, 317.0, 319.0, 317.0, 324.0, 312.0, 316.0, 320.0, 319.0, 320.0, 319.0, 311.0, 310.0, 317.0, 319.0, 317.0, 293.0, 286.0, 322.0, 314.0, 296.0, 294.0, 314.0, 322.0, 290.0, 280.0, 311.0, 325.0, 288.0, 291.0, 319.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7937258148671773, "mean_processing_ms": 0.22721751341177562, "mean_inference_ms": 1.3967610737623508}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10560000, "num_steps_sampled": 5632000, "sample_time_ms": 21732.229, "load_time_ms": 36.475, "grad_time_ms": 9880.218, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00015868060290813446, "policy_loss": -0.007246671710163355, "vf_loss": 76.3777847290039, "vf_explained_var": 0.7677585482597351, "kl": 0.0021524711046367884, "entropy": 1.099584698677063, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5632000, "episodes_total": 14080, "training_iteration": 440, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-33-39", "timestamp": 1660260819, "time_this_iter_s": 29.637184143066406, "time_total_s": 19228.335063934326, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19228.335063934326, "timesteps_since_restore": 5632000, "iterations_since_restore": 440, "perf": {"cpu_util_percent": 33.75, "ram_util_percent": 59.899999999999984}}
+{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 617.76, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 308.88}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 189.36, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.6, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 18.01, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.27, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.84, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 6.16, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.69, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.97, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.75, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.73, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.27, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.27, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 627.0, 627.0, 636.0, 582.0, 630.0, 633.0, 639.0, 570.0, 582.0, 576.0, 636.0, 639.0, 636.0, 582.0, 630.0, 627.0, 624.0, 587.0, 630.0, 633.0, 582.0, 630.0, 636.0, 582.0, 576.0, 633.0, 633.0, 630.0, 639.0, 636.0, 408.0, 579.0, 630.0, 582.0, 627.0, 639.0, 636.0, 630.0, 579.0, 587.0, 627.0, 636.0, 633.0, 633.0, 636.0, 636.0, 636.0, 630.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 639.0, 630.0, 627.0, 636.0, 579.0, 636.0, 590.0, 636.0, 570.0, 636.0, 579.0, 639.0, 584.0, 633.0, 627.0, 582.0, 630.0, 636.0, 636.0, 630.0, 639.0, 627.0, 579.0, 624.0, 636.0, 630.0, 582.0, 636.0, 639.0, 633.0, 633.0, 639.0, 576.0, 636.0, 573.0, 630.0, 587.0, 633.0, 630.0, 636.0, 639.0, 633.0, 630.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 319.0, 314.0, 314.0, 313.0, 316.0, 311.0, 320.0, 316.0, 288.0, 294.0, 311.0, 319.0, 319.0, 314.0, 319.0, 320.0, 289.0, 281.0, 291.0, 291.0, 288.0, 288.0, 316.0, 320.0, 319.0, 320.0, 319.0, 317.0, 295.0, 287.0, 313.0, 317.0, 319.0, 308.0, 310.0, 314.0, 296.0, 291.0, 314.0, 316.0, 313.0, 320.0, 301.0, 281.0, 316.0, 314.0, 314.0, 322.0, 291.0, 291.0, 288.0, 288.0, 316.0, 317.0, 319.0, 314.0, 316.0, 314.0, 320.0, 319.0, 319.0, 317.0, 210.0, 198.0, 283.0, 296.0, 316.0, 314.0, 290.0, 292.0, 316.0, 311.0, 322.0, 317.0, 319.0, 317.0, 318.0, 312.0, 277.0, 302.0, 291.0, 296.0, 313.0, 314.0, 319.0, 317.0, 315.0, 318.0, 314.0, 319.0, 316.0, 320.0, 324.0, 312.0, 319.0, 317.0, 321.0, 309.0, 316.0, 314.0, 311.0, 319.0, 316.0, 317.0, 319.0, 317.0, 324.0, 312.0, 316.0, 320.0, 319.0, 320.0, 319.0, 311.0, 310.0, 317.0, 319.0, 317.0, 293.0, 286.0, 322.0, 314.0, 296.0, 294.0, 314.0, 322.0, 290.0, 280.0, 311.0, 325.0, 288.0, 291.0, 319.0, 320.0, 290.0, 294.0, 319.0, 314.0, 311.0, 316.0, 288.0, 294.0, 314.0, 316.0, 319.0, 317.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 306.0, 321.0, 293.0, 286.0, 307.0, 317.0, 319.0, 317.0, 311.0, 319.0, 293.0, 289.0, 321.0, 315.0, 322.0, 317.0, 317.0, 316.0, 311.0, 322.0, 322.0, 317.0, 293.0, 283.0, 319.0, 317.0, 279.0, 294.0, 308.0, 322.0, 285.0, 302.0, 316.0, 317.0, 305.0, 325.0, 319.0, 317.0, 319.0, 320.0, 319.0, 314.0, 318.0, 312.0, 288.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7929580463765498, "mean_processing_ms": 0.22706572704499173, "mean_inference_ms": 1.3960311893008097}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10584000, "num_steps_sampled": 5644800, "sample_time_ms": 22196.121, "load_time_ms": 36.22, "grad_time_ms": 10110.176, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019730820786207914, "policy_loss": -0.005001601297408342, "vf_loss": 75.2809066772461, "vf_explained_var": 0.770819902420044, "kl": 0.0019049126422032714, "entropy": 1.106797695159912, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5644800, "episodes_total": 14112, "training_iteration": 441, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-34-15", "timestamp": 1660260855, "time_this_iter_s": 36.16889190673828, "time_total_s": 19264.503955841064, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19264.503955841064, "timesteps_since_restore": 5644800, "iterations_since_restore": 441, "perf": {"cpu_util_percent": 30.368627450980394, "ram_util_percent": 59.160784313725486}}
+{"episode_reward_max": 639.0, "episode_reward_min": 564.0, "episode_reward_mean": 618.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 279.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 309.15}, "custom_metrics": {"sparse_reward_mean": 214.6, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 189.1, "shaped_reward_min": 164, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.65, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.51, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.37, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.85, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.09, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.74, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 6.31, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.02, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.91, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.09, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.09, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 624.0, 582.0, 636.0, 630.0, 630.0, 633.0, 587.0, 567.0, 630.0, 639.0, 630.0, 636.0, 633.0, 633.0, 630.0, 636.0, 639.0, 636.0, 630.0, 630.0, 630.0, 627.0, 630.0, 582.0, 582.0, 587.0, 630.0, 630.0, 564.0, 576.0, 636.0, 570.0, 636.0, 579.0, 639.0, 584.0, 633.0, 627.0, 582.0, 630.0, 636.0, 636.0, 630.0, 639.0, 627.0, 579.0, 624.0, 636.0, 630.0, 582.0, 636.0, 639.0, 633.0, 633.0, 639.0, 576.0, 636.0, 573.0, 630.0, 587.0, 633.0, 630.0, 636.0, 639.0, 633.0, 630.0, 587.0, 633.0, 633.0, 627.0, 627.0, 636.0, 582.0, 630.0, 633.0, 639.0, 570.0, 582.0, 576.0, 636.0, 639.0, 636.0, 582.0, 630.0, 627.0, 624.0, 587.0, 630.0, 633.0, 582.0, 630.0, 636.0, 582.0, 576.0, 633.0, 633.0, 630.0, 639.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 316.0, 313.0, 311.0, 291.0, 291.0, 319.0, 317.0, 316.0, 314.0, 313.0, 317.0, 316.0, 317.0, 290.0, 297.0, 282.0, 285.0, 319.0, 311.0, 319.0, 320.0, 316.0, 314.0, 314.0, 322.0, 319.0, 314.0, 311.0, 322.0, 314.0, 316.0, 314.0, 322.0, 322.0, 317.0, 319.0, 317.0, 319.0, 311.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0, 316.0, 314.0, 291.0, 291.0, 290.0, 292.0, 296.0, 291.0, 316.0, 314.0, 316.0, 314.0, 281.0, 283.0, 290.0, 286.0, 319.0, 317.0, 290.0, 280.0, 311.0, 325.0, 288.0, 291.0, 319.0, 320.0, 290.0, 294.0, 319.0, 314.0, 311.0, 316.0, 288.0, 294.0, 314.0, 316.0, 319.0, 317.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 306.0, 321.0, 293.0, 286.0, 307.0, 317.0, 319.0, 317.0, 311.0, 319.0, 293.0, 289.0, 321.0, 315.0, 322.0, 317.0, 317.0, 316.0, 311.0, 322.0, 322.0, 317.0, 293.0, 283.0, 319.0, 317.0, 279.0, 294.0, 308.0, 322.0, 285.0, 302.0, 316.0, 317.0, 305.0, 325.0, 319.0, 317.0, 319.0, 320.0, 319.0, 314.0, 318.0, 312.0, 288.0, 299.0, 319.0, 314.0, 319.0, 314.0, 314.0, 313.0, 316.0, 311.0, 320.0, 316.0, 288.0, 294.0, 311.0, 319.0, 319.0, 314.0, 319.0, 320.0, 289.0, 281.0, 291.0, 291.0, 288.0, 288.0, 316.0, 320.0, 319.0, 320.0, 319.0, 317.0, 295.0, 287.0, 313.0, 317.0, 319.0, 308.0, 310.0, 314.0, 296.0, 291.0, 314.0, 316.0, 313.0, 320.0, 301.0, 281.0, 316.0, 314.0, 314.0, 322.0, 291.0, 291.0, 288.0, 288.0, 316.0, 317.0, 319.0, 314.0, 316.0, 314.0, 320.0, 319.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7921926702018212, "mean_processing_ms": 0.22691451487922393, "mean_inference_ms": 1.3953653520568468}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10608000, "num_steps_sampled": 5657600, "sample_time_ms": 21841.972, "load_time_ms": 35.995, "grad_time_ms": 10215.543, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0027040394488722086, "policy_loss": -0.004097369499504566, "vf_loss": 73.54324340820312, "vf_explained_var": 0.7729549407958984, "kl": 0.0019481302006170154, "entropy": 1.105837106704712, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5657600, "episodes_total": 14144, "training_iteration": 442, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-34-49", "timestamp": 1660260889, "time_this_iter_s": 33.27770400047302, "time_total_s": 19297.781659841537, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19297.781659841537, "timesteps_since_restore": 5657600, "iterations_since_restore": 442, "perf": {"cpu_util_percent": 29.43404255319148, "ram_util_percent": 59.22553191489361}}
+{"episode_reward_max": 639.0, "episode_reward_min": 564.0, "episode_reward_mean": 615.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 280.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.955}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 188.71, "shaped_reward_min": 164, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.74, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.35, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.42, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.85, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.7, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 6.2, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.92, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.42, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.85, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.42, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.85, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 579.0, 633.0, 636.0, 636.0, 636.0, 567.0, 630.0, 587.0, 587.0, 587.0, 630.0, 639.0, 579.0, 636.0, 630.0, 630.0, 587.0, 633.0, 579.0, 587.0, 587.0, 582.0, 630.0, 630.0, 579.0, 636.0, 633.0, 627.0, 630.0, 587.0, 579.0, 639.0, 633.0, 630.0, 587.0, 633.0, 633.0, 627.0, 627.0, 636.0, 582.0, 630.0, 633.0, 639.0, 570.0, 582.0, 576.0, 636.0, 639.0, 636.0, 582.0, 630.0, 627.0, 624.0, 587.0, 630.0, 633.0, 582.0, 630.0, 636.0, 582.0, 576.0, 633.0, 633.0, 630.0, 639.0, 636.0, 627.0, 624.0, 582.0, 636.0, 630.0, 630.0, 633.0, 587.0, 567.0, 630.0, 639.0, 630.0, 636.0, 633.0, 633.0, 630.0, 636.0, 639.0, 636.0, 630.0, 630.0, 630.0, 627.0, 630.0, 582.0, 582.0, 587.0, 630.0, 630.0, 564.0, 576.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 296.0, 283.0, 314.0, 319.0, 319.0, 317.0, 317.0, 319.0, 319.0, 317.0, 280.0, 287.0, 311.0, 319.0, 288.0, 299.0, 291.0, 296.0, 296.0, 291.0, 316.0, 314.0, 314.0, 325.0, 285.0, 294.0, 319.0, 317.0, 314.0, 316.0, 319.0, 311.0, 293.0, 294.0, 314.0, 319.0, 283.0, 296.0, 288.0, 299.0, 301.0, 286.0, 296.0, 286.0, 316.0, 314.0, 316.0, 314.0, 298.0, 281.0, 319.0, 317.0, 317.0, 316.0, 308.0, 319.0, 311.0, 319.0, 295.0, 292.0, 290.0, 289.0, 319.0, 320.0, 319.0, 314.0, 318.0, 312.0, 288.0, 299.0, 319.0, 314.0, 319.0, 314.0, 314.0, 313.0, 316.0, 311.0, 320.0, 316.0, 288.0, 294.0, 311.0, 319.0, 319.0, 314.0, 319.0, 320.0, 289.0, 281.0, 291.0, 291.0, 288.0, 288.0, 316.0, 320.0, 319.0, 320.0, 319.0, 317.0, 295.0, 287.0, 313.0, 317.0, 319.0, 308.0, 310.0, 314.0, 296.0, 291.0, 314.0, 316.0, 313.0, 320.0, 301.0, 281.0, 316.0, 314.0, 314.0, 322.0, 291.0, 291.0, 288.0, 288.0, 316.0, 317.0, 319.0, 314.0, 316.0, 314.0, 320.0, 319.0, 319.0, 317.0, 311.0, 316.0, 313.0, 311.0, 291.0, 291.0, 319.0, 317.0, 316.0, 314.0, 313.0, 317.0, 316.0, 317.0, 290.0, 297.0, 282.0, 285.0, 319.0, 311.0, 319.0, 320.0, 316.0, 314.0, 314.0, 322.0, 319.0, 314.0, 311.0, 322.0, 314.0, 316.0, 314.0, 322.0, 322.0, 317.0, 319.0, 317.0, 319.0, 311.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0, 316.0, 314.0, 291.0, 291.0, 290.0, 292.0, 296.0, 291.0, 316.0, 314.0, 316.0, 314.0, 281.0, 283.0, 290.0, 286.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7914282474248366, "mean_processing_ms": 0.22676310167632485, "mean_inference_ms": 1.3947257800951314}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10632000, "num_steps_sampled": 5670400, "sample_time_ms": 21982.654, "load_time_ms": 36.099, "grad_time_ms": 10196.436, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0026817016769200563, "policy_loss": -0.004366503097116947, "vf_loss": 76.00869750976562, "vf_explained_var": 0.7792714238166809, "kl": 0.0014469980960711837, "entropy": 1.1053307056427002, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5670400, "episodes_total": 14176, "training_iteration": 443, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-35-20", "timestamp": 1660260920, "time_this_iter_s": 31.264520168304443, "time_total_s": 19329.046180009842, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19329.046180009842, "timesteps_since_restore": 5670400, "iterations_since_restore": 443, "perf": {"cpu_util_percent": 30.41136363636364, "ram_util_percent": 59.12272727272728}}
+{"episode_reward_max": 639.0, "episode_reward_min": 479.0, "episode_reward_mean": 614.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 239.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.27}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.54, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.78, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.25, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.3, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.54, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.47, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.69, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.11, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.22, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.47, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.69, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.47, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.69, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 636.0, 633.0, 624.0, 636.0, 630.0, 633.0, 639.0, 570.0, 636.0, 630.0, 636.0, 582.0, 587.0, 579.0, 584.0, 633.0, 582.0, 587.0, 636.0, 633.0, 621.0, 630.0, 633.0, 584.0, 627.0, 630.0, 633.0, 479.0, 587.0, 587.0, 633.0, 630.0, 639.0, 636.0, 627.0, 624.0, 582.0, 636.0, 630.0, 630.0, 633.0, 587.0, 567.0, 630.0, 639.0, 630.0, 636.0, 633.0, 633.0, 630.0, 636.0, 639.0, 636.0, 630.0, 630.0, 630.0, 627.0, 630.0, 582.0, 582.0, 587.0, 630.0, 630.0, 564.0, 576.0, 636.0, 633.0, 579.0, 633.0, 636.0, 636.0, 636.0, 567.0, 630.0, 587.0, 587.0, 587.0, 630.0, 639.0, 579.0, 636.0, 630.0, 630.0, 587.0, 633.0, 579.0, 587.0, 587.0, 582.0, 630.0, 630.0, 579.0, 636.0, 633.0, 627.0, 630.0, 587.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 311.0, 319.0, 314.0, 322.0, 314.0, 319.0, 314.0, 310.0, 314.0, 322.0, 311.0, 319.0, 316.0, 317.0, 317.0, 322.0, 288.0, 282.0, 319.0, 317.0, 311.0, 319.0, 319.0, 317.0, 291.0, 291.0, 298.0, 289.0, 294.0, 285.0, 288.0, 296.0, 314.0, 319.0, 296.0, 286.0, 299.0, 288.0, 319.0, 317.0, 316.0, 317.0, 310.0, 311.0, 319.0, 311.0, 322.0, 311.0, 288.0, 296.0, 310.0, 317.0, 313.0, 317.0, 316.0, 317.0, 239.0, 240.0, 293.0, 294.0, 296.0, 291.0, 319.0, 314.0, 316.0, 314.0, 320.0, 319.0, 319.0, 317.0, 311.0, 316.0, 313.0, 311.0, 291.0, 291.0, 319.0, 317.0, 316.0, 314.0, 313.0, 317.0, 316.0, 317.0, 290.0, 297.0, 282.0, 285.0, 319.0, 311.0, 319.0, 320.0, 316.0, 314.0, 314.0, 322.0, 319.0, 314.0, 311.0, 322.0, 314.0, 316.0, 314.0, 322.0, 322.0, 317.0, 319.0, 317.0, 319.0, 311.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0, 316.0, 314.0, 291.0, 291.0, 290.0, 292.0, 296.0, 291.0, 316.0, 314.0, 316.0, 314.0, 281.0, 283.0, 290.0, 286.0, 319.0, 317.0, 314.0, 319.0, 296.0, 283.0, 314.0, 319.0, 319.0, 317.0, 317.0, 319.0, 319.0, 317.0, 280.0, 287.0, 311.0, 319.0, 288.0, 299.0, 291.0, 296.0, 296.0, 291.0, 316.0, 314.0, 314.0, 325.0, 285.0, 294.0, 319.0, 317.0, 314.0, 316.0, 319.0, 311.0, 293.0, 294.0, 314.0, 319.0, 283.0, 296.0, 288.0, 299.0, 301.0, 286.0, 296.0, 286.0, 316.0, 314.0, 316.0, 314.0, 298.0, 281.0, 319.0, 317.0, 317.0, 316.0, 308.0, 319.0, 311.0, 319.0, 295.0, 292.0, 290.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7906650471868468, "mean_processing_ms": 0.2266115620582618, "mean_inference_ms": 1.3940369234530334}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10656000, "num_steps_sampled": 5683200, "sample_time_ms": 22188.83, "load_time_ms": 36.224, "grad_time_ms": 10331.481, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0037986973766237497, "policy_loss": -0.0035794072318822145, "vf_loss": 79.28974151611328, "vf_explained_var": 0.7626357078552246, "kl": 0.0019579820800572634, "entropy": 1.1017413139343262, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5683200, "episodes_total": 14208, "training_iteration": 444, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-35-52", "timestamp": 1660260952, "time_this_iter_s": 32.303210973739624, "time_total_s": 19361.34939098358, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19361.34939098358, "timesteps_since_restore": 5683200, "iterations_since_restore": 444, "perf": {"cpu_util_percent": 30.893478260869564, "ram_util_percent": 59.16521739130435}}
+{"episode_reward_max": 639.0, "episode_reward_min": 479.0, "episode_reward_mean": 610.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 239.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 305.46}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.12, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.7, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.18, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.15, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.42, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.74, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 582.0, 582.0, 582.0, 630.0, 578.0, 576.0, 587.0, 587.0, 630.0, 636.0, 630.0, 639.0, 636.0, 636.0, 579.0, 630.0, 636.0, 627.0, 636.0, 578.0, 633.0, 633.0, 582.0, 636.0, 636.0, 587.0, 590.0, 639.0, 630.0, 630.0, 582.0, 630.0, 564.0, 576.0, 636.0, 633.0, 579.0, 633.0, 636.0, 636.0, 636.0, 567.0, 630.0, 587.0, 587.0, 587.0, 630.0, 639.0, 579.0, 636.0, 630.0, 630.0, 587.0, 633.0, 579.0, 587.0, 587.0, 582.0, 630.0, 630.0, 579.0, 636.0, 633.0, 627.0, 630.0, 587.0, 579.0, 636.0, 630.0, 636.0, 633.0, 624.0, 636.0, 630.0, 633.0, 639.0, 570.0, 636.0, 630.0, 636.0, 582.0, 587.0, 579.0, 584.0, 633.0, 582.0, 587.0, 636.0, 633.0, 621.0, 630.0, 633.0, 584.0, 627.0, 630.0, 633.0, 479.0, 587.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 305.0, 291.0, 291.0, 291.0, 291.0, 294.0, 288.0, 316.0, 314.0, 284.0, 294.0, 295.0, 281.0, 299.0, 288.0, 288.0, 299.0, 308.0, 322.0, 319.0, 317.0, 311.0, 319.0, 319.0, 320.0, 319.0, 317.0, 317.0, 319.0, 288.0, 291.0, 316.0, 314.0, 327.0, 309.0, 315.0, 312.0, 319.0, 317.0, 280.0, 298.0, 311.0, 322.0, 319.0, 314.0, 293.0, 289.0, 319.0, 317.0, 319.0, 317.0, 293.0, 294.0, 291.0, 299.0, 319.0, 320.0, 316.0, 314.0, 311.0, 319.0, 291.0, 291.0, 316.0, 314.0, 281.0, 283.0, 290.0, 286.0, 319.0, 317.0, 314.0, 319.0, 296.0, 283.0, 314.0, 319.0, 319.0, 317.0, 317.0, 319.0, 319.0, 317.0, 280.0, 287.0, 311.0, 319.0, 288.0, 299.0, 291.0, 296.0, 296.0, 291.0, 316.0, 314.0, 314.0, 325.0, 285.0, 294.0, 319.0, 317.0, 314.0, 316.0, 319.0, 311.0, 293.0, 294.0, 314.0, 319.0, 283.0, 296.0, 288.0, 299.0, 301.0, 286.0, 296.0, 286.0, 316.0, 314.0, 316.0, 314.0, 298.0, 281.0, 319.0, 317.0, 317.0, 316.0, 308.0, 319.0, 311.0, 319.0, 295.0, 292.0, 290.0, 289.0, 319.0, 317.0, 311.0, 319.0, 314.0, 322.0, 314.0, 319.0, 314.0, 310.0, 314.0, 322.0, 311.0, 319.0, 316.0, 317.0, 317.0, 322.0, 288.0, 282.0, 319.0, 317.0, 311.0, 319.0, 319.0, 317.0, 291.0, 291.0, 298.0, 289.0, 294.0, 285.0, 288.0, 296.0, 314.0, 319.0, 296.0, 286.0, 299.0, 288.0, 319.0, 317.0, 316.0, 317.0, 310.0, 311.0, 319.0, 311.0, 322.0, 311.0, 288.0, 296.0, 310.0, 317.0, 313.0, 317.0, 316.0, 317.0, 239.0, 240.0, 293.0, 294.0, 296.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7899054631660849, "mean_processing_ms": 0.22646096684642672, "mean_inference_ms": 1.3933342141949396}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10680000, "num_steps_sampled": 5696000, "sample_time_ms": 22242.608, "load_time_ms": 35.979, "grad_time_ms": 10328.703, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013677343958988786, "policy_loss": -0.006408216897398233, "vf_loss": 83.29845428466797, "vf_explained_var": 0.7579674124717712, "kl": 0.001941792550496757, "entropy": 1.1077739000320435, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5696000, "episodes_total": 14240, "training_iteration": 445, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-36-24", "timestamp": 1660260984, "time_this_iter_s": 31.836724996566772, "time_total_s": 19393.18611598015, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19393.18611598015, "timesteps_since_restore": 5696000, "iterations_since_restore": 445, "perf": {"cpu_util_percent": 30.162222222222226, "ram_util_percent": 59.18000000000001}}
+{"episode_reward_max": 639.0, "episode_reward_min": 456.0, "episode_reward_mean": 608.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 304.41}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.22, "shaped_reward_min": 136, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.57, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.19, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.99, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.77, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.65, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 579.0, 570.0, 582.0, 636.0, 636.0, 630.0, 587.0, 630.0, 630.0, 456.0, 573.0, 630.0, 579.0, 576.0, 633.0, 587.0, 633.0, 630.0, 630.0, 630.0, 633.0, 582.0, 579.0, 630.0, 639.0, 630.0, 639.0, 587.0, 627.0, 522.0, 582.0, 627.0, 630.0, 587.0, 579.0, 636.0, 630.0, 636.0, 633.0, 624.0, 636.0, 630.0, 633.0, 639.0, 570.0, 636.0, 630.0, 636.0, 582.0, 587.0, 579.0, 584.0, 633.0, 582.0, 587.0, 636.0, 633.0, 621.0, 630.0, 633.0, 584.0, 627.0, 630.0, 633.0, 479.0, 587.0, 587.0, 587.0, 582.0, 582.0, 582.0, 630.0, 578.0, 576.0, 587.0, 587.0, 630.0, 636.0, 630.0, 639.0, 636.0, 636.0, 579.0, 630.0, 636.0, 627.0, 636.0, 578.0, 633.0, 633.0, 582.0, 636.0, 636.0, 587.0, 590.0, 639.0, 630.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [310.0, 317.0, 290.0, 289.0, 288.0, 282.0, 291.0, 291.0, 324.0, 312.0, 314.0, 322.0, 314.0, 316.0, 299.0, 288.0, 316.0, 314.0, 316.0, 314.0, 228.0, 228.0, 281.0, 292.0, 316.0, 314.0, 293.0, 286.0, 288.0, 288.0, 316.0, 317.0, 293.0, 294.0, 314.0, 319.0, 321.0, 309.0, 316.0, 314.0, 322.0, 308.0, 319.0, 314.0, 293.0, 289.0, 288.0, 291.0, 316.0, 314.0, 319.0, 320.0, 321.0, 309.0, 319.0, 320.0, 299.0, 288.0, 319.0, 308.0, 251.0, 271.0, 296.0, 286.0, 308.0, 319.0, 311.0, 319.0, 295.0, 292.0, 290.0, 289.0, 319.0, 317.0, 311.0, 319.0, 314.0, 322.0, 314.0, 319.0, 314.0, 310.0, 314.0, 322.0, 311.0, 319.0, 316.0, 317.0, 317.0, 322.0, 288.0, 282.0, 319.0, 317.0, 311.0, 319.0, 319.0, 317.0, 291.0, 291.0, 298.0, 289.0, 294.0, 285.0, 288.0, 296.0, 314.0, 319.0, 296.0, 286.0, 299.0, 288.0, 319.0, 317.0, 316.0, 317.0, 310.0, 311.0, 319.0, 311.0, 322.0, 311.0, 288.0, 296.0, 310.0, 317.0, 313.0, 317.0, 316.0, 317.0, 239.0, 240.0, 293.0, 294.0, 296.0, 291.0, 282.0, 305.0, 291.0, 291.0, 291.0, 291.0, 294.0, 288.0, 316.0, 314.0, 284.0, 294.0, 295.0, 281.0, 299.0, 288.0, 288.0, 299.0, 308.0, 322.0, 319.0, 317.0, 311.0, 319.0, 319.0, 320.0, 319.0, 317.0, 317.0, 319.0, 288.0, 291.0, 316.0, 314.0, 327.0, 309.0, 315.0, 312.0, 319.0, 317.0, 280.0, 298.0, 311.0, 322.0, 319.0, 314.0, 293.0, 289.0, 319.0, 317.0, 319.0, 317.0, 293.0, 294.0, 291.0, 299.0, 319.0, 320.0, 316.0, 314.0, 311.0, 319.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7891523423128755, "mean_processing_ms": 0.22631170679234558, "mean_inference_ms": 1.3926725732710878}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10704000, "num_steps_sampled": 5708800, "sample_time_ms": 21771.335, "load_time_ms": 36.027, "grad_time_ms": 10385.96, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0026401570066809654, "policy_loss": -0.0047949193976819515, "vf_loss": 79.88745880126953, "vf_explained_var": 0.7707352638244629, "kl": 0.0022178192157298326, "entropy": 1.107340693473816, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5708800, "episodes_total": 14272, "training_iteration": 446, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-36-58", "timestamp": 1660261018, "time_this_iter_s": 33.74682116508484, "time_total_s": 19426.932937145233, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19426.932937145233, "timesteps_since_restore": 5708800, "iterations_since_restore": 446, "perf": {"cpu_util_percent": 32.958333333333336, "ram_util_percent": 59.18541666666667}}
+{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 602.99, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 202.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 301.495}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.79, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.65, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.04, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.17, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.2, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.62, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.44, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.81, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.76, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.62, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.62, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 627.0, 587.0, 582.0, 636.0, 587.0, 627.0, 636.0, 630.0, 630.0, 587.0, 621.0, 575.0, 639.0, 633.0, 636.0, 584.0, 582.0, 573.0, 498.0, 587.0, 579.0, 408.0, 636.0, 587.0, 587.0, 633.0, 573.0, 636.0, 587.0, 636.0, 582.0, 633.0, 479.0, 587.0, 587.0, 587.0, 582.0, 582.0, 582.0, 630.0, 578.0, 576.0, 587.0, 587.0, 630.0, 636.0, 630.0, 639.0, 636.0, 636.0, 579.0, 630.0, 636.0, 627.0, 636.0, 578.0, 633.0, 633.0, 582.0, 636.0, 636.0, 587.0, 590.0, 639.0, 630.0, 630.0, 582.0, 627.0, 579.0, 570.0, 582.0, 636.0, 636.0, 630.0, 587.0, 630.0, 630.0, 456.0, 573.0, 630.0, 579.0, 576.0, 633.0, 587.0, 633.0, 630.0, 630.0, 630.0, 633.0, 582.0, 579.0, 630.0, 639.0, 630.0, 639.0, 587.0, 627.0, 522.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 308.0, 319.0, 286.0, 301.0, 293.0, 289.0, 316.0, 320.0, 291.0, 296.0, 313.0, 314.0, 314.0, 322.0, 311.0, 319.0, 314.0, 316.0, 288.0, 299.0, 310.0, 311.0, 288.0, 287.0, 317.0, 322.0, 321.0, 312.0, 319.0, 317.0, 302.0, 282.0, 291.0, 291.0, 282.0, 291.0, 253.0, 245.0, 290.0, 297.0, 288.0, 291.0, 206.0, 202.0, 319.0, 317.0, 293.0, 294.0, 288.0, 299.0, 316.0, 317.0, 285.0, 288.0, 319.0, 317.0, 299.0, 288.0, 319.0, 317.0, 293.0, 289.0, 316.0, 317.0, 239.0, 240.0, 293.0, 294.0, 296.0, 291.0, 282.0, 305.0, 291.0, 291.0, 291.0, 291.0, 294.0, 288.0, 316.0, 314.0, 284.0, 294.0, 295.0, 281.0, 299.0, 288.0, 288.0, 299.0, 308.0, 322.0, 319.0, 317.0, 311.0, 319.0, 319.0, 320.0, 319.0, 317.0, 317.0, 319.0, 288.0, 291.0, 316.0, 314.0, 327.0, 309.0, 315.0, 312.0, 319.0, 317.0, 280.0, 298.0, 311.0, 322.0, 319.0, 314.0, 293.0, 289.0, 319.0, 317.0, 319.0, 317.0, 293.0, 294.0, 291.0, 299.0, 319.0, 320.0, 316.0, 314.0, 311.0, 319.0, 291.0, 291.0, 310.0, 317.0, 290.0, 289.0, 288.0, 282.0, 291.0, 291.0, 324.0, 312.0, 314.0, 322.0, 314.0, 316.0, 299.0, 288.0, 316.0, 314.0, 316.0, 314.0, 228.0, 228.0, 281.0, 292.0, 316.0, 314.0, 293.0, 286.0, 288.0, 288.0, 316.0, 317.0, 293.0, 294.0, 314.0, 319.0, 321.0, 309.0, 316.0, 314.0, 322.0, 308.0, 319.0, 314.0, 293.0, 289.0, 288.0, 291.0, 316.0, 314.0, 319.0, 320.0, 321.0, 309.0, 319.0, 320.0, 299.0, 288.0, 319.0, 308.0, 251.0, 271.0, 296.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7883964067869212, "mean_processing_ms": 0.22616136742936413, "mean_inference_ms": 1.39185520221877}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10728000, "num_steps_sampled": 5721600, "sample_time_ms": 21601.13, "load_time_ms": 36.066, "grad_time_ms": 10530.811, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004505176562815905, "policy_loss": -0.00313469092361629, "vf_loss": 81.9230728149414, "vf_explained_var": 0.7751343250274658, "kl": 0.002040610648691654, "entropy": 1.1048672199249268, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5721600, "episodes_total": 14304, "training_iteration": 447, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-37-28", "timestamp": 1660261048, "time_this_iter_s": 30.400289058685303, "time_total_s": 19457.33322620392, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19457.33322620392, "timesteps_since_restore": 5721600, "iterations_since_restore": 447, "perf": {"cpu_util_percent": 36.12558139534883, "ram_util_percent": 59.20697674418605}}
+{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 604.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 202.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 302.49}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.38, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.19, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.14, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.39, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.53, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 630.0, 630.0, 630.0, 630.0, 627.0, 630.0, 606.0, 633.0, 519.0, 633.0, 636.0, 573.0, 630.0, 636.0, 636.0, 584.0, 636.0, 627.0, 630.0, 636.0, 633.0, 633.0, 579.0, 633.0, 582.0, 582.0, 590.0, 627.0, 576.0, 570.0, 582.0, 639.0, 630.0, 630.0, 582.0, 627.0, 579.0, 570.0, 582.0, 636.0, 636.0, 630.0, 587.0, 630.0, 630.0, 456.0, 573.0, 630.0, 579.0, 576.0, 633.0, 587.0, 633.0, 630.0, 630.0, 630.0, 633.0, 582.0, 579.0, 630.0, 639.0, 630.0, 639.0, 587.0, 627.0, 522.0, 582.0, 636.0, 627.0, 587.0, 582.0, 636.0, 587.0, 627.0, 636.0, 630.0, 630.0, 587.0, 621.0, 575.0, 639.0, 633.0, 636.0, 584.0, 582.0, 573.0, 498.0, 587.0, 579.0, 408.0, 636.0, 587.0, 587.0, 633.0, 573.0, 636.0, 587.0, 636.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 297.0, 316.0, 314.0, 318.0, 312.0, 316.0, 314.0, 319.0, 311.0, 313.0, 314.0, 316.0, 314.0, 304.0, 302.0, 316.0, 317.0, 267.0, 252.0, 322.0, 311.0, 313.0, 323.0, 278.0, 295.0, 316.0, 314.0, 314.0, 322.0, 319.0, 317.0, 298.0, 286.0, 316.0, 320.0, 313.0, 314.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 314.0, 319.0, 299.0, 280.0, 319.0, 314.0, 286.0, 296.0, 288.0, 294.0, 296.0, 294.0, 308.0, 319.0, 285.0, 291.0, 272.0, 298.0, 282.0, 300.0, 319.0, 320.0, 316.0, 314.0, 311.0, 319.0, 291.0, 291.0, 310.0, 317.0, 290.0, 289.0, 288.0, 282.0, 291.0, 291.0, 324.0, 312.0, 314.0, 322.0, 314.0, 316.0, 299.0, 288.0, 316.0, 314.0, 316.0, 314.0, 228.0, 228.0, 281.0, 292.0, 316.0, 314.0, 293.0, 286.0, 288.0, 288.0, 316.0, 317.0, 293.0, 294.0, 314.0, 319.0, 321.0, 309.0, 316.0, 314.0, 322.0, 308.0, 319.0, 314.0, 293.0, 289.0, 288.0, 291.0, 316.0, 314.0, 319.0, 320.0, 321.0, 309.0, 319.0, 320.0, 299.0, 288.0, 319.0, 308.0, 251.0, 271.0, 296.0, 286.0, 319.0, 317.0, 308.0, 319.0, 286.0, 301.0, 293.0, 289.0, 316.0, 320.0, 291.0, 296.0, 313.0, 314.0, 314.0, 322.0, 311.0, 319.0, 314.0, 316.0, 288.0, 299.0, 310.0, 311.0, 288.0, 287.0, 317.0, 322.0, 321.0, 312.0, 319.0, 317.0, 302.0, 282.0, 291.0, 291.0, 282.0, 291.0, 253.0, 245.0, 290.0, 297.0, 288.0, 291.0, 206.0, 202.0, 319.0, 317.0, 293.0, 294.0, 288.0, 299.0, 316.0, 317.0, 285.0, 288.0, 319.0, 317.0, 299.0, 288.0, 319.0, 317.0, 293.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7876488216946843, "mean_processing_ms": 0.22601276823059913, "mean_inference_ms": 1.3911503009579902}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10752000, "num_steps_sampled": 5734400, "sample_time_ms": 21718.783, "load_time_ms": 35.962, "grad_time_ms": 10435.936, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016340842703357339, "policy_loss": -0.0055215489119291306, "vf_loss": 77.1473388671875, "vf_explained_var": 0.7692286968231201, "kl": 0.001808720058761537, "entropy": 1.1181970834732056, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5734400, "episodes_total": 14336, "training_iteration": 448, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-38-03", "timestamp": 1660261083, "time_this_iter_s": 34.906923055648804, "time_total_s": 19492.240149259567, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19492.240149259567, "timesteps_since_restore": 5734400, "iterations_since_restore": 448, "perf": {"cpu_util_percent": 33.91428571428571, "ram_util_percent": 59.09591836734693}}
+{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 605.22, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 202.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 302.61}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.62, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.6, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.2, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.59, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.89, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.42, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.74, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 636.0, 587.0, 579.0, 636.0, 582.0, 606.0, 636.0, 587.0, 633.0, 630.0, 630.0, 636.0, 633.0, 633.0, 624.0, 587.0, 627.0, 576.0, 584.0, 570.0, 582.0, 630.0, 579.0, 636.0, 633.0, 576.0, 587.0, 636.0, 582.0, 630.0, 636.0, 587.0, 627.0, 522.0, 582.0, 636.0, 627.0, 587.0, 582.0, 636.0, 587.0, 627.0, 636.0, 630.0, 630.0, 587.0, 621.0, 575.0, 639.0, 633.0, 636.0, 584.0, 582.0, 573.0, 498.0, 587.0, 579.0, 408.0, 636.0, 587.0, 587.0, 633.0, 573.0, 636.0, 587.0, 636.0, 582.0, 587.0, 630.0, 630.0, 630.0, 630.0, 627.0, 630.0, 606.0, 633.0, 519.0, 633.0, 636.0, 573.0, 630.0, 636.0, 636.0, 584.0, 636.0, 627.0, 630.0, 636.0, 633.0, 633.0, 579.0, 633.0, 582.0, 582.0, 590.0, 627.0, 576.0, 570.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 322.0, 314.0, 290.0, 297.0, 283.0, 296.0, 324.0, 312.0, 285.0, 297.0, 302.0, 304.0, 319.0, 317.0, 287.0, 300.0, 314.0, 319.0, 316.0, 314.0, 311.0, 319.0, 319.0, 317.0, 317.0, 316.0, 316.0, 317.0, 313.0, 311.0, 298.0, 289.0, 313.0, 314.0, 294.0, 282.0, 296.0, 288.0, 279.0, 291.0, 301.0, 281.0, 316.0, 314.0, 293.0, 286.0, 319.0, 317.0, 316.0, 317.0, 285.0, 291.0, 302.0, 285.0, 319.0, 317.0, 288.0, 294.0, 319.0, 311.0, 319.0, 317.0, 299.0, 288.0, 319.0, 308.0, 251.0, 271.0, 296.0, 286.0, 319.0, 317.0, 308.0, 319.0, 286.0, 301.0, 293.0, 289.0, 316.0, 320.0, 291.0, 296.0, 313.0, 314.0, 314.0, 322.0, 311.0, 319.0, 314.0, 316.0, 288.0, 299.0, 310.0, 311.0, 288.0, 287.0, 317.0, 322.0, 321.0, 312.0, 319.0, 317.0, 302.0, 282.0, 291.0, 291.0, 282.0, 291.0, 253.0, 245.0, 290.0, 297.0, 288.0, 291.0, 206.0, 202.0, 319.0, 317.0, 293.0, 294.0, 288.0, 299.0, 316.0, 317.0, 285.0, 288.0, 319.0, 317.0, 299.0, 288.0, 319.0, 317.0, 293.0, 289.0, 290.0, 297.0, 316.0, 314.0, 318.0, 312.0, 316.0, 314.0, 319.0, 311.0, 313.0, 314.0, 316.0, 314.0, 304.0, 302.0, 316.0, 317.0, 267.0, 252.0, 322.0, 311.0, 313.0, 323.0, 278.0, 295.0, 316.0, 314.0, 314.0, 322.0, 319.0, 317.0, 298.0, 286.0, 316.0, 320.0, 313.0, 314.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 314.0, 319.0, 299.0, 280.0, 319.0, 314.0, 286.0, 296.0, 288.0, 294.0, 296.0, 294.0, 308.0, 319.0, 285.0, 291.0, 272.0, 298.0, 282.0, 300.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7869063801847372, "mean_processing_ms": 0.22586438145730278, "mean_inference_ms": 1.3903532948966142}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10776000, "num_steps_sampled": 5747200, "sample_time_ms": 21734.341, "load_time_ms": 36.357, "grad_time_ms": 10633.962, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015590289840474725, "policy_loss": -0.005610723048448563, "vf_loss": 77.2466812133789, "vf_explained_var": 0.7718032002449036, "kl": 0.0017093941569328308, "entropy": 1.1098432540893555, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5747200, "episodes_total": 14368, "training_iteration": 449, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-38-34", "timestamp": 1660261114, "time_this_iter_s": 30.762639045715332, "time_total_s": 19523.002788305283, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19523.002788305283, "timesteps_since_restore": 5747200, "iterations_since_restore": 449, "perf": {"cpu_util_percent": 34.67441860465116, "ram_util_percent": 60.09767441860465}}
+{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 607.51, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.755}, "custom_metrics": {"sparse_reward_mean": 210.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.31, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.27, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.5, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.83, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.75, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.87, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.0, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.74, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.65, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.85, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.62, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.77, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.9, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.7, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.87, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.0, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.87, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.0, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 582.0, 579.0, 633.0, 630.0, 587.0, 639.0, 582.0, 582.0, 522.0, 633.0, 630.0, 582.0, 633.0, 630.0, 579.0, 519.0, 627.0, 636.0, 627.0, 587.0, 587.0, 630.0, 573.0, 639.0, 630.0, 582.0, 627.0, 627.0, 587.0, 582.0, 587.0, 636.0, 587.0, 636.0, 582.0, 587.0, 630.0, 630.0, 630.0, 630.0, 627.0, 630.0, 606.0, 633.0, 519.0, 633.0, 636.0, 573.0, 630.0, 636.0, 636.0, 584.0, 636.0, 627.0, 630.0, 636.0, 633.0, 633.0, 579.0, 633.0, 582.0, 582.0, 590.0, 627.0, 576.0, 570.0, 582.0, 582.0, 636.0, 587.0, 579.0, 636.0, 582.0, 606.0, 636.0, 587.0, 633.0, 630.0, 630.0, 636.0, 633.0, 633.0, 624.0, 587.0, 627.0, 576.0, 584.0, 570.0, 582.0, 630.0, 579.0, 636.0, 633.0, 576.0, 587.0, 636.0, 582.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 291.0, 289.0, 293.0, 287.0, 292.0, 319.0, 314.0, 311.0, 319.0, 290.0, 297.0, 319.0, 320.0, 299.0, 283.0, 290.0, 292.0, 264.0, 258.0, 316.0, 317.0, 321.0, 309.0, 291.0, 291.0, 314.0, 319.0, 316.0, 314.0, 296.0, 283.0, 270.0, 249.0, 316.0, 311.0, 311.0, 325.0, 315.0, 312.0, 290.0, 297.0, 295.0, 292.0, 321.0, 309.0, 293.0, 280.0, 322.0, 317.0, 316.0, 314.0, 293.0, 289.0, 308.0, 319.0, 311.0, 316.0, 296.0, 291.0, 291.0, 291.0, 301.0, 286.0, 319.0, 317.0, 299.0, 288.0, 319.0, 317.0, 293.0, 289.0, 290.0, 297.0, 316.0, 314.0, 318.0, 312.0, 316.0, 314.0, 319.0, 311.0, 313.0, 314.0, 316.0, 314.0, 304.0, 302.0, 316.0, 317.0, 267.0, 252.0, 322.0, 311.0, 313.0, 323.0, 278.0, 295.0, 316.0, 314.0, 314.0, 322.0, 319.0, 317.0, 298.0, 286.0, 316.0, 320.0, 313.0, 314.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 314.0, 319.0, 299.0, 280.0, 319.0, 314.0, 286.0, 296.0, 288.0, 294.0, 296.0, 294.0, 308.0, 319.0, 285.0, 291.0, 272.0, 298.0, 282.0, 300.0, 288.0, 294.0, 322.0, 314.0, 290.0, 297.0, 283.0, 296.0, 324.0, 312.0, 285.0, 297.0, 302.0, 304.0, 319.0, 317.0, 287.0, 300.0, 314.0, 319.0, 316.0, 314.0, 311.0, 319.0, 319.0, 317.0, 317.0, 316.0, 316.0, 317.0, 313.0, 311.0, 298.0, 289.0, 313.0, 314.0, 294.0, 282.0, 296.0, 288.0, 279.0, 291.0, 301.0, 281.0, 316.0, 314.0, 293.0, 286.0, 319.0, 317.0, 316.0, 317.0, 285.0, 291.0, 302.0, 285.0, 319.0, 317.0, 288.0, 294.0, 319.0, 311.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7861726618280127, "mean_processing_ms": 0.22571819485718037, "mean_inference_ms": 1.389515005345911}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10800000, "num_steps_sampled": 5760000, "sample_time_ms": 21545.479, "load_time_ms": 37.135, "grad_time_ms": 10871.751, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00040180576615966856, "policy_loss": -0.007139734923839569, "vf_loss": 80.9995346069336, "vf_explained_var": 0.7637953758239746, "kl": 0.0017641382291913033, "entropy": 1.1168159246444702, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5760000, "episodes_total": 14400, "training_iteration": 450, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-39-04", "timestamp": 1660261144, "time_this_iter_s": 30.14027214050293, "time_total_s": 19553.143060445786, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19553.143060445786, "timesteps_since_restore": 5760000, "iterations_since_restore": 450, "perf": {"cpu_util_percent": 35.460465116279074, "ram_util_percent": 59.4627906976744}}
+{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 599.51, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 299.755}, "custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 183.91, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.25, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.17, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.82, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.82, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.59, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.87, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.82, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.82, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 576.0, 630.0, 408.0, 582.0, 582.0, 636.0, 636.0, 582.0, 627.0, 582.0, 630.0, 630.0, 633.0, 582.0, 630.0, 630.0, 636.0, 180.0, 630.0, 510.0, 633.0, 582.0, 630.0, 579.0, 636.0, 582.0, 582.0, 630.0, 639.0, 630.0, 570.0, 627.0, 576.0, 570.0, 582.0, 582.0, 636.0, 587.0, 579.0, 636.0, 582.0, 606.0, 636.0, 587.0, 633.0, 630.0, 630.0, 636.0, 633.0, 633.0, 624.0, 587.0, 627.0, 576.0, 584.0, 570.0, 582.0, 630.0, 579.0, 636.0, 633.0, 576.0, 587.0, 636.0, 582.0, 630.0, 636.0, 573.0, 582.0, 579.0, 633.0, 630.0, 587.0, 639.0, 582.0, 582.0, 522.0, 633.0, 630.0, 582.0, 633.0, 630.0, 579.0, 519.0, 627.0, 636.0, 627.0, 587.0, 587.0, 630.0, 573.0, 639.0, 630.0, 582.0, 627.0, 627.0, 587.0, 582.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 308.0, 285.0, 291.0, 311.0, 319.0, 213.0, 195.0, 288.0, 294.0, 294.0, 288.0, 321.0, 315.0, 319.0, 317.0, 291.0, 291.0, 311.0, 316.0, 301.0, 281.0, 316.0, 314.0, 321.0, 309.0, 327.0, 306.0, 291.0, 291.0, 311.0, 319.0, 311.0, 319.0, 311.0, 325.0, 91.0, 89.0, 311.0, 319.0, 247.0, 263.0, 311.0, 322.0, 283.0, 299.0, 316.0, 314.0, 288.0, 291.0, 316.0, 320.0, 293.0, 289.0, 291.0, 291.0, 306.0, 324.0, 317.0, 322.0, 314.0, 316.0, 282.0, 288.0, 308.0, 319.0, 285.0, 291.0, 272.0, 298.0, 282.0, 300.0, 288.0, 294.0, 322.0, 314.0, 290.0, 297.0, 283.0, 296.0, 324.0, 312.0, 285.0, 297.0, 302.0, 304.0, 319.0, 317.0, 287.0, 300.0, 314.0, 319.0, 316.0, 314.0, 311.0, 319.0, 319.0, 317.0, 317.0, 316.0, 316.0, 317.0, 313.0, 311.0, 298.0, 289.0, 313.0, 314.0, 294.0, 282.0, 296.0, 288.0, 279.0, 291.0, 301.0, 281.0, 316.0, 314.0, 293.0, 286.0, 319.0, 317.0, 316.0, 317.0, 285.0, 291.0, 302.0, 285.0, 319.0, 317.0, 288.0, 294.0, 319.0, 311.0, 319.0, 317.0, 282.0, 291.0, 289.0, 293.0, 287.0, 292.0, 319.0, 314.0, 311.0, 319.0, 290.0, 297.0, 319.0, 320.0, 299.0, 283.0, 290.0, 292.0, 264.0, 258.0, 316.0, 317.0, 321.0, 309.0, 291.0, 291.0, 314.0, 319.0, 316.0, 314.0, 296.0, 283.0, 270.0, 249.0, 316.0, 311.0, 311.0, 325.0, 315.0, 312.0, 290.0, 297.0, 295.0, 292.0, 321.0, 309.0, 293.0, 280.0, 322.0, 317.0, 316.0, 314.0, 293.0, 289.0, 308.0, 319.0, 311.0, 316.0, 296.0, 291.0, 291.0, 291.0, 301.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7854523352273275, "mean_processing_ms": 0.22557596598023225, "mean_inference_ms": 1.3886629869315275}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10824000, "num_steps_sampled": 5772800, "sample_time_ms": 21458.474, "load_time_ms": 37.866, "grad_time_ms": 10821.902, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004755727481096983, "policy_loss": -0.00296382955275476, "vf_loss": 82.76275634765625, "vf_explained_var": 0.7805452942848206, "kl": 0.0020347917452454567, "entropy": 1.1134214401245117, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5772800, "episodes_total": 14432, "training_iteration": 451, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-39-39", "timestamp": 1660261179, "time_this_iter_s": 34.819623947143555, "time_total_s": 19587.96268439293, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19587.96268439293, "timesteps_since_restore": 5772800, "iterations_since_restore": 451, "perf": {"cpu_util_percent": 35.726, "ram_util_percent": 59.13399999999999}}
+{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 600.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 300.195}, "custom_metrics": {"sparse_reward_mean": 208.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 184.39, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.35, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.11, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.9, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.4, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.87, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.63, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.51, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.84, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.87, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.63, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.87, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.63, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 582.0, 576.0, 609.0, 582.0, 630.0, 636.0, 630.0, 630.0, 633.0, 587.0, 584.0, 582.0, 633.0, 587.0, 636.0, 639.0, 630.0, 630.0, 522.0, 636.0, 587.0, 633.0, 579.0, 633.0, 582.0, 630.0, 582.0, 633.0, 633.0, 582.0, 636.0, 582.0, 630.0, 636.0, 573.0, 582.0, 579.0, 633.0, 630.0, 587.0, 639.0, 582.0, 582.0, 522.0, 633.0, 630.0, 582.0, 633.0, 630.0, 579.0, 519.0, 627.0, 636.0, 627.0, 587.0, 587.0, 630.0, 573.0, 639.0, 630.0, 582.0, 627.0, 627.0, 587.0, 582.0, 587.0, 627.0, 576.0, 630.0, 408.0, 582.0, 582.0, 636.0, 636.0, 582.0, 627.0, 582.0, 630.0, 630.0, 633.0, 582.0, 630.0, 630.0, 636.0, 180.0, 630.0, 510.0, 633.0, 582.0, 630.0, 579.0, 636.0, 582.0, 582.0, 630.0, 639.0, 630.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 309.0, 288.0, 294.0, 294.0, 288.0, 290.0, 286.0, 302.0, 307.0, 291.0, 291.0, 311.0, 319.0, 324.0, 312.0, 313.0, 317.0, 311.0, 319.0, 311.0, 322.0, 288.0, 299.0, 283.0, 301.0, 293.0, 289.0, 319.0, 314.0, 283.0, 304.0, 314.0, 322.0, 319.0, 320.0, 316.0, 314.0, 321.0, 309.0, 257.0, 265.0, 321.0, 315.0, 288.0, 299.0, 311.0, 322.0, 291.0, 288.0, 309.0, 324.0, 285.0, 297.0, 316.0, 314.0, 293.0, 289.0, 319.0, 314.0, 316.0, 317.0, 286.0, 296.0, 319.0, 317.0, 288.0, 294.0, 319.0, 311.0, 319.0, 317.0, 282.0, 291.0, 289.0, 293.0, 287.0, 292.0, 319.0, 314.0, 311.0, 319.0, 290.0, 297.0, 319.0, 320.0, 299.0, 283.0, 290.0, 292.0, 264.0, 258.0, 316.0, 317.0, 321.0, 309.0, 291.0, 291.0, 314.0, 319.0, 316.0, 314.0, 296.0, 283.0, 270.0, 249.0, 316.0, 311.0, 311.0, 325.0, 315.0, 312.0, 290.0, 297.0, 295.0, 292.0, 321.0, 309.0, 293.0, 280.0, 322.0, 317.0, 316.0, 314.0, 293.0, 289.0, 308.0, 319.0, 311.0, 316.0, 296.0, 291.0, 291.0, 291.0, 301.0, 286.0, 319.0, 308.0, 285.0, 291.0, 311.0, 319.0, 213.0, 195.0, 288.0, 294.0, 294.0, 288.0, 321.0, 315.0, 319.0, 317.0, 291.0, 291.0, 311.0, 316.0, 301.0, 281.0, 316.0, 314.0, 321.0, 309.0, 327.0, 306.0, 291.0, 291.0, 311.0, 319.0, 311.0, 319.0, 311.0, 325.0, 91.0, 89.0, 311.0, 319.0, 247.0, 263.0, 311.0, 322.0, 283.0, 299.0, 316.0, 314.0, 288.0, 291.0, 316.0, 320.0, 293.0, 289.0, 291.0, 291.0, 306.0, 324.0, 317.0, 322.0, 314.0, 316.0, 282.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7847405370697927, "mean_processing_ms": 0.22543724156360243, "mean_inference_ms": 1.3878004584531793}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10848000, "num_steps_sampled": 5785600, "sample_time_ms": 21368.51, "load_time_ms": 38.205, "grad_time_ms": 10677.018, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004785877245012671, "policy_loss": -0.0067210569977760315, "vf_loss": 77.5384750366211, "vf_explained_var": 0.777080774307251, "kl": 0.0022153640165925026, "entropy": 1.108397126197815, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5785600, "episodes_total": 14464, "training_iteration": 452, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-40-10", "timestamp": 1660261210, "time_this_iter_s": 30.929455280303955, "time_total_s": 19618.892139673233, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19618.892139673233, "timesteps_since_restore": 5785600, "iterations_since_restore": 452, "perf": {"cpu_util_percent": 34.12045454545454, "ram_util_percent": 59.23863636363635}}
+{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 601.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 300.725}, "custom_metrics": {"sparse_reward_mean": 208.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 185.05, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.52, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 17.97, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.17, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.3, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.1, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.54, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.98, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.75, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.1, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.54, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.1, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.54, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 636.0, 636.0, 636.0, 636.0, 582.0, 633.0, 587.0, 581.0, 633.0, 582.0, 630.0, 630.0, 587.0, 582.0, 636.0, 582.0, 633.0, 536.0, 582.0, 636.0, 639.0, 627.0, 582.0, 624.0, 579.0, 573.0, 633.0, 633.0, 587.0, 633.0, 627.0, 587.0, 582.0, 587.0, 627.0, 576.0, 630.0, 408.0, 582.0, 582.0, 636.0, 636.0, 582.0, 627.0, 582.0, 630.0, 630.0, 633.0, 582.0, 630.0, 630.0, 636.0, 180.0, 630.0, 510.0, 633.0, 582.0, 630.0, 579.0, 636.0, 582.0, 582.0, 630.0, 639.0, 630.0, 570.0, 630.0, 582.0, 582.0, 576.0, 609.0, 582.0, 630.0, 636.0, 630.0, 630.0, 633.0, 587.0, 584.0, 582.0, 633.0, 587.0, 636.0, 639.0, 630.0, 630.0, 522.0, 636.0, 587.0, 633.0, 579.0, 633.0, 582.0, 630.0, 582.0, 633.0, 633.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 291.0, 291.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 314.0, 322.0, 288.0, 294.0, 319.0, 314.0, 288.0, 299.0, 290.0, 291.0, 314.0, 319.0, 294.0, 288.0, 316.0, 314.0, 319.0, 311.0, 296.0, 291.0, 291.0, 291.0, 322.0, 314.0, 290.0, 292.0, 316.0, 317.0, 257.0, 279.0, 288.0, 294.0, 311.0, 325.0, 322.0, 317.0, 319.0, 308.0, 293.0, 289.0, 312.0, 312.0, 296.0, 283.0, 287.0, 286.0, 316.0, 317.0, 316.0, 317.0, 290.0, 297.0, 314.0, 319.0, 311.0, 316.0, 296.0, 291.0, 291.0, 291.0, 301.0, 286.0, 319.0, 308.0, 285.0, 291.0, 311.0, 319.0, 213.0, 195.0, 288.0, 294.0, 294.0, 288.0, 321.0, 315.0, 319.0, 317.0, 291.0, 291.0, 311.0, 316.0, 301.0, 281.0, 316.0, 314.0, 321.0, 309.0, 327.0, 306.0, 291.0, 291.0, 311.0, 319.0, 311.0, 319.0, 311.0, 325.0, 91.0, 89.0, 311.0, 319.0, 247.0, 263.0, 311.0, 322.0, 283.0, 299.0, 316.0, 314.0, 288.0, 291.0, 316.0, 320.0, 293.0, 289.0, 291.0, 291.0, 306.0, 324.0, 317.0, 322.0, 314.0, 316.0, 282.0, 288.0, 321.0, 309.0, 288.0, 294.0, 294.0, 288.0, 290.0, 286.0, 302.0, 307.0, 291.0, 291.0, 311.0, 319.0, 324.0, 312.0, 313.0, 317.0, 311.0, 319.0, 311.0, 322.0, 288.0, 299.0, 283.0, 301.0, 293.0, 289.0, 319.0, 314.0, 283.0, 304.0, 314.0, 322.0, 319.0, 320.0, 316.0, 314.0, 321.0, 309.0, 257.0, 265.0, 321.0, 315.0, 288.0, 299.0, 311.0, 322.0, 291.0, 288.0, 309.0, 324.0, 285.0, 297.0, 316.0, 314.0, 293.0, 289.0, 319.0, 314.0, 316.0, 317.0, 286.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7840324712041147, "mean_processing_ms": 0.22529998529372405, "mean_inference_ms": 1.3869181064814406}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10872000, "num_steps_sampled": 5798400, "sample_time_ms": 21127.393, "load_time_ms": 38.987, "grad_time_ms": 10639.287, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0022095281165093184, "policy_loss": -0.005497789476066828, "vf_loss": 82.6261215209961, "vf_explained_var": 0.7598109245300293, "kl": 0.0015994912246242166, "entropy": 1.110588550567627, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5798400, "episodes_total": 14496, "training_iteration": 453, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-40-38", "timestamp": 1660261238, "time_this_iter_s": 28.482766151428223, "time_total_s": 19647.37490582466, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19647.37490582466, "timesteps_since_restore": 5798400, "iterations_since_restore": 453, "perf": {"cpu_util_percent": 34.01, "ram_util_percent": 58.98499999999999}}
+{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 609.06, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.53}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.46, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.14, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.62, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.31, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.29, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.14, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.19, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.63, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.31, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.31, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 587.0, 627.0, 630.0, 630.0, 627.0, 582.0, 633.0, 582.0, 582.0, 636.0, 630.0, 579.0, 627.0, 627.0, 579.0, 587.0, 633.0, 633.0, 582.0, 582.0, 582.0, 636.0, 633.0, 587.0, 633.0, 579.0, 636.0, 582.0, 582.0, 627.0, 636.0, 630.0, 639.0, 630.0, 570.0, 630.0, 582.0, 582.0, 576.0, 609.0, 582.0, 630.0, 636.0, 630.0, 630.0, 633.0, 587.0, 584.0, 582.0, 633.0, 587.0, 636.0, 639.0, 630.0, 630.0, 522.0, 636.0, 587.0, 633.0, 579.0, 633.0, 582.0, 630.0, 582.0, 633.0, 633.0, 582.0, 582.0, 582.0, 636.0, 636.0, 636.0, 636.0, 582.0, 633.0, 587.0, 581.0, 633.0, 582.0, 630.0, 630.0, 587.0, 582.0, 636.0, 582.0, 633.0, 536.0, 582.0, 636.0, 639.0, 627.0, 582.0, 624.0, 579.0, 573.0, 633.0, 633.0, 587.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 320.0, 288.0, 299.0, 313.0, 314.0, 306.0, 324.0, 311.0, 319.0, 316.0, 311.0, 296.0, 286.0, 322.0, 311.0, 296.0, 286.0, 298.0, 284.0, 314.0, 322.0, 311.0, 319.0, 277.0, 302.0, 319.0, 308.0, 316.0, 311.0, 290.0, 289.0, 298.0, 289.0, 316.0, 317.0, 314.0, 319.0, 289.0, 293.0, 299.0, 283.0, 286.0, 296.0, 314.0, 322.0, 318.0, 315.0, 291.0, 296.0, 316.0, 317.0, 287.0, 292.0, 314.0, 322.0, 293.0, 289.0, 291.0, 291.0, 316.0, 311.0, 319.0, 317.0, 306.0, 324.0, 317.0, 322.0, 314.0, 316.0, 282.0, 288.0, 321.0, 309.0, 288.0, 294.0, 294.0, 288.0, 290.0, 286.0, 302.0, 307.0, 291.0, 291.0, 311.0, 319.0, 324.0, 312.0, 313.0, 317.0, 311.0, 319.0, 311.0, 322.0, 288.0, 299.0, 283.0, 301.0, 293.0, 289.0, 319.0, 314.0, 283.0, 304.0, 314.0, 322.0, 319.0, 320.0, 316.0, 314.0, 321.0, 309.0, 257.0, 265.0, 321.0, 315.0, 288.0, 299.0, 311.0, 322.0, 291.0, 288.0, 309.0, 324.0, 285.0, 297.0, 316.0, 314.0, 293.0, 289.0, 319.0, 314.0, 316.0, 317.0, 286.0, 296.0, 293.0, 289.0, 291.0, 291.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 314.0, 322.0, 288.0, 294.0, 319.0, 314.0, 288.0, 299.0, 290.0, 291.0, 314.0, 319.0, 294.0, 288.0, 316.0, 314.0, 319.0, 311.0, 296.0, 291.0, 291.0, 291.0, 322.0, 314.0, 290.0, 292.0, 316.0, 317.0, 257.0, 279.0, 288.0, 294.0, 311.0, 325.0, 322.0, 317.0, 319.0, 308.0, 293.0, 289.0, 312.0, 312.0, 296.0, 283.0, 287.0, 286.0, 316.0, 317.0, 316.0, 317.0, 290.0, 297.0, 314.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.783318071764331, "mean_processing_ms": 0.22516025845999352, "mean_inference_ms": 1.3858608320735286}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10896000, "num_steps_sampled": 5811200, "sample_time_ms": 20778.477, "load_time_ms": 39.252, "grad_time_ms": 10489.319, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007634037174284458, "policy_loss": -0.006680456455796957, "vf_loss": 80.01913452148438, "vf_explained_var": 0.7667891383171082, "kl": 0.0017371875001117587, "entropy": 1.1161128282546997, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5811200, "episodes_total": 14528, "training_iteration": 454, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-41-06", "timestamp": 1660261266, "time_this_iter_s": 27.318589210510254, "time_total_s": 19674.69349503517, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19674.69349503517, "timesteps_since_restore": 5811200, "iterations_since_restore": 454, "perf": {"cpu_util_percent": 37.051282051282044, "ram_util_percent": 58.9923076923077}}
+{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 606.28, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.14}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.68, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.2, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.57, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.27, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.68, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.83, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.27, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.27, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 582.0, 582.0, 579.0, 582.0, 633.0, 630.0, 606.0, 639.0, 630.0, 639.0, 633.0, 636.0, 587.0, 582.0, 582.0, 582.0, 579.0, 630.0, 462.0, 573.0, 579.0, 582.0, 624.0, 636.0, 587.0, 579.0, 579.0, 636.0, 627.0, 587.0, 624.0, 582.0, 633.0, 633.0, 582.0, 582.0, 582.0, 636.0, 636.0, 636.0, 636.0, 582.0, 633.0, 587.0, 581.0, 633.0, 582.0, 630.0, 630.0, 587.0, 582.0, 636.0, 582.0, 633.0, 536.0, 582.0, 636.0, 639.0, 627.0, 582.0, 624.0, 579.0, 573.0, 633.0, 633.0, 587.0, 633.0, 639.0, 587.0, 627.0, 630.0, 630.0, 627.0, 582.0, 633.0, 582.0, 582.0, 636.0, 630.0, 579.0, 627.0, 627.0, 579.0, 587.0, 633.0, 633.0, 582.0, 582.0, 582.0, 636.0, 633.0, 587.0, 633.0, 579.0, 636.0, 582.0, 582.0, 627.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 291.0, 291.0, 291.0, 291.0, 296.0, 283.0, 286.0, 296.0, 311.0, 322.0, 316.0, 314.0, 300.0, 306.0, 319.0, 320.0, 319.0, 311.0, 319.0, 320.0, 316.0, 317.0, 319.0, 317.0, 296.0, 291.0, 299.0, 283.0, 291.0, 291.0, 291.0, 291.0, 291.0, 288.0, 308.0, 322.0, 228.0, 234.0, 279.0, 294.0, 283.0, 296.0, 288.0, 294.0, 313.0, 311.0, 319.0, 317.0, 291.0, 296.0, 291.0, 288.0, 287.0, 292.0, 319.0, 317.0, 316.0, 311.0, 296.0, 291.0, 316.0, 308.0, 293.0, 289.0, 319.0, 314.0, 316.0, 317.0, 286.0, 296.0, 293.0, 289.0, 291.0, 291.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 314.0, 322.0, 288.0, 294.0, 319.0, 314.0, 288.0, 299.0, 290.0, 291.0, 314.0, 319.0, 294.0, 288.0, 316.0, 314.0, 319.0, 311.0, 296.0, 291.0, 291.0, 291.0, 322.0, 314.0, 290.0, 292.0, 316.0, 317.0, 257.0, 279.0, 288.0, 294.0, 311.0, 325.0, 322.0, 317.0, 319.0, 308.0, 293.0, 289.0, 312.0, 312.0, 296.0, 283.0, 287.0, 286.0, 316.0, 317.0, 316.0, 317.0, 290.0, 297.0, 314.0, 319.0, 319.0, 320.0, 288.0, 299.0, 313.0, 314.0, 306.0, 324.0, 311.0, 319.0, 316.0, 311.0, 296.0, 286.0, 322.0, 311.0, 296.0, 286.0, 298.0, 284.0, 314.0, 322.0, 311.0, 319.0, 277.0, 302.0, 319.0, 308.0, 316.0, 311.0, 290.0, 289.0, 298.0, 289.0, 316.0, 317.0, 314.0, 319.0, 289.0, 293.0, 299.0, 283.0, 286.0, 296.0, 314.0, 322.0, 318.0, 315.0, 291.0, 296.0, 316.0, 317.0, 287.0, 292.0, 314.0, 322.0, 293.0, 289.0, 291.0, 291.0, 316.0, 311.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7826053116086107, "mean_processing_ms": 0.22502024237804114, "mean_inference_ms": 1.384772842121544}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10920000, "num_steps_sampled": 5824000, "sample_time_ms": 20652.138, "load_time_ms": 39.437, "grad_time_ms": 10376.522, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0003411610086914152, "policy_loss": -0.0072668264620006084, "vf_loss": 81.6093978881836, "vf_explained_var": 0.7669034600257874, "kl": 0.0018620697082951665, "entropy": 1.1059015989303589, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5824000, "episodes_total": 14560, "training_iteration": 455, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-41-35", "timestamp": 1660261295, "time_this_iter_s": 29.449601650238037, "time_total_s": 19704.14309668541, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19704.14309668541, "timesteps_since_restore": 5824000, "iterations_since_restore": 455, "perf": {"cpu_util_percent": 37.61666666666666, "ram_util_percent": 59.06190476190477}}
+{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 607.34, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.67}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.54, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.63, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.3, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.26, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.61, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.2, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.75, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.51, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.7, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.2, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.75, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.2, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.75, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 624.0, 587.0, 573.0, 633.0, 636.0, 582.0, 630.0, 633.0, 639.0, 633.0, 633.0, 582.0, 630.0, 633.0, 630.0, 630.0, 633.0, 582.0, 630.0, 630.0, 587.0, 573.0, 576.0, 582.0, 636.0, 525.0, 633.0, 579.0, 633.0, 527.0, 633.0, 633.0, 587.0, 633.0, 639.0, 587.0, 627.0, 630.0, 630.0, 627.0, 582.0, 633.0, 582.0, 582.0, 636.0, 630.0, 579.0, 627.0, 627.0, 579.0, 587.0, 633.0, 633.0, 582.0, 582.0, 582.0, 636.0, 633.0, 587.0, 633.0, 579.0, 636.0, 582.0, 582.0, 627.0, 636.0, 633.0, 582.0, 582.0, 579.0, 582.0, 633.0, 630.0, 606.0, 639.0, 630.0, 639.0, 633.0, 636.0, 587.0, 582.0, 582.0, 582.0, 579.0, 630.0, 462.0, 573.0, 579.0, 582.0, 624.0, 636.0, 587.0, 579.0, 579.0, 636.0, 627.0, 587.0, 624.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 313.0, 317.0, 316.0, 308.0, 301.0, 286.0, 290.0, 283.0, 319.0, 314.0, 324.0, 312.0, 291.0, 291.0, 315.0, 315.0, 317.0, 316.0, 314.0, 325.0, 316.0, 317.0, 316.0, 317.0, 293.0, 289.0, 316.0, 314.0, 321.0, 312.0, 316.0, 314.0, 319.0, 311.0, 316.0, 317.0, 288.0, 294.0, 313.0, 317.0, 311.0, 319.0, 288.0, 299.0, 282.0, 291.0, 285.0, 291.0, 286.0, 296.0, 311.0, 325.0, 268.0, 257.0, 316.0, 317.0, 296.0, 283.0, 320.0, 313.0, 254.0, 273.0, 316.0, 317.0, 316.0, 317.0, 290.0, 297.0, 314.0, 319.0, 319.0, 320.0, 288.0, 299.0, 313.0, 314.0, 306.0, 324.0, 311.0, 319.0, 316.0, 311.0, 296.0, 286.0, 322.0, 311.0, 296.0, 286.0, 298.0, 284.0, 314.0, 322.0, 311.0, 319.0, 277.0, 302.0, 319.0, 308.0, 316.0, 311.0, 290.0, 289.0, 298.0, 289.0, 316.0, 317.0, 314.0, 319.0, 289.0, 293.0, 299.0, 283.0, 286.0, 296.0, 314.0, 322.0, 318.0, 315.0, 291.0, 296.0, 316.0, 317.0, 287.0, 292.0, 314.0, 322.0, 293.0, 289.0, 291.0, 291.0, 316.0, 311.0, 319.0, 317.0, 316.0, 317.0, 291.0, 291.0, 291.0, 291.0, 296.0, 283.0, 286.0, 296.0, 311.0, 322.0, 316.0, 314.0, 300.0, 306.0, 319.0, 320.0, 319.0, 311.0, 319.0, 320.0, 316.0, 317.0, 319.0, 317.0, 296.0, 291.0, 299.0, 283.0, 291.0, 291.0, 291.0, 291.0, 291.0, 288.0, 308.0, 322.0, 228.0, 234.0, 279.0, 294.0, 283.0, 296.0, 288.0, 294.0, 313.0, 311.0, 319.0, 317.0, 291.0, 296.0, 291.0, 288.0, 287.0, 292.0, 319.0, 317.0, 316.0, 311.0, 296.0, 291.0, 316.0, 308.0]}, "sampler_perf": {"mean_env_wait_ms": 0.781893848476972, "mean_processing_ms": 0.22487934380632985, "mean_inference_ms": 1.3836766382532326}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10944000, "num_steps_sampled": 5836800, "sample_time_ms": 20326.683, "load_time_ms": 39.601, "grad_time_ms": 10224.263, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009388479520566761, "policy_loss": -0.008330571465194225, "vf_loss": 79.4411849975586, "vf_explained_var": 0.7653481960296631, "kl": 0.0017687659710645676, "entropy": 1.1048110723495483, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5836800, "episodes_total": 14592, "training_iteration": 456, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-42-04", "timestamp": 1660261324, "time_this_iter_s": 28.971395254135132, "time_total_s": 19733.114491939545, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19733.114491939545, "timesteps_since_restore": 5836800, "iterations_since_restore": 456, "perf": {"cpu_util_percent": 36.80731707317073, "ram_util_percent": 59.075609756097556}}
+{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 598.66, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 98.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 299.33}, "custom_metrics": {"sparse_reward_mean": 207.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 184.26, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.33, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.14, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.96, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.96, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.64, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.54, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.74, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.64, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.96, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.64, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.96, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.64, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 630.0, 579.0, 579.0, 576.0, 575.0, 582.0, 579.0, 197.0, 639.0, 582.0, 636.0, 587.0, 579.0, 582.0, 636.0, 582.0, 582.0, 630.0, 582.0, 582.0, 570.0, 633.0, 627.0, 582.0, 587.0, 630.0, 639.0, 590.0, 633.0, 582.0, 630.0, 582.0, 582.0, 627.0, 636.0, 633.0, 582.0, 582.0, 579.0, 582.0, 633.0, 630.0, 606.0, 639.0, 630.0, 639.0, 633.0, 636.0, 587.0, 582.0, 582.0, 582.0, 579.0, 630.0, 462.0, 573.0, 579.0, 582.0, 624.0, 636.0, 587.0, 579.0, 579.0, 636.0, 627.0, 587.0, 624.0, 636.0, 630.0, 624.0, 587.0, 573.0, 633.0, 636.0, 582.0, 630.0, 633.0, 639.0, 633.0, 633.0, 582.0, 630.0, 633.0, 630.0, 630.0, 633.0, 582.0, 630.0, 630.0, 587.0, 573.0, 576.0, 582.0, 636.0, 525.0, 633.0, 579.0, 633.0, 527.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [248.0, 271.0, 311.0, 319.0, 288.0, 291.0, 285.0, 294.0, 290.0, 286.0, 289.0, 286.0, 288.0, 294.0, 285.0, 294.0, 99.0, 98.0, 322.0, 317.0, 288.0, 294.0, 319.0, 317.0, 298.0, 289.0, 288.0, 291.0, 299.0, 283.0, 314.0, 322.0, 296.0, 286.0, 291.0, 291.0, 311.0, 319.0, 290.0, 292.0, 288.0, 294.0, 277.0, 293.0, 316.0, 317.0, 316.0, 311.0, 294.0, 288.0, 290.0, 297.0, 316.0, 314.0, 319.0, 320.0, 294.0, 296.0, 316.0, 317.0, 296.0, 286.0, 311.0, 319.0, 293.0, 289.0, 291.0, 291.0, 316.0, 311.0, 319.0, 317.0, 316.0, 317.0, 291.0, 291.0, 291.0, 291.0, 296.0, 283.0, 286.0, 296.0, 311.0, 322.0, 316.0, 314.0, 300.0, 306.0, 319.0, 320.0, 319.0, 311.0, 319.0, 320.0, 316.0, 317.0, 319.0, 317.0, 296.0, 291.0, 299.0, 283.0, 291.0, 291.0, 291.0, 291.0, 291.0, 288.0, 308.0, 322.0, 228.0, 234.0, 279.0, 294.0, 283.0, 296.0, 288.0, 294.0, 313.0, 311.0, 319.0, 317.0, 291.0, 296.0, 291.0, 288.0, 287.0, 292.0, 319.0, 317.0, 316.0, 311.0, 296.0, 291.0, 316.0, 308.0, 319.0, 317.0, 313.0, 317.0, 316.0, 308.0, 301.0, 286.0, 290.0, 283.0, 319.0, 314.0, 324.0, 312.0, 291.0, 291.0, 315.0, 315.0, 317.0, 316.0, 314.0, 325.0, 316.0, 317.0, 316.0, 317.0, 293.0, 289.0, 316.0, 314.0, 321.0, 312.0, 316.0, 314.0, 319.0, 311.0, 316.0, 317.0, 288.0, 294.0, 313.0, 317.0, 311.0, 319.0, 288.0, 299.0, 282.0, 291.0, 285.0, 291.0, 286.0, 296.0, 311.0, 325.0, 268.0, 257.0, 316.0, 317.0, 296.0, 283.0, 320.0, 313.0, 254.0, 273.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7811987617860328, "mean_processing_ms": 0.2247433395983137, "mean_inference_ms": 1.3827621285902887}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10968000, "num_steps_sampled": 5849600, "sample_time_ms": 20643.82, "load_time_ms": 39.721, "grad_time_ms": 10128.234, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00031386129558086395, "policy_loss": -0.007833792828023434, "vf_loss": 87.0155258178711, "vf_explained_var": 0.759077787399292, "kl": 0.0023228460922837257, "entropy": 1.10780668258667, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5849600, "episodes_total": 14624, "training_iteration": 457, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-42-37", "timestamp": 1660261357, "time_this_iter_s": 32.612699031829834, "time_total_s": 19765.727190971375, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19765.727190971375, "timesteps_since_restore": 5849600, "iterations_since_restore": 457, "perf": {"cpu_util_percent": 35.49347826086956, "ram_util_percent": 59.11739130434784}}
+{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 600.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 98.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 300.29}, "custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 184.98, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.3, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.91, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.9, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.66, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.87, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.9, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.9, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 606.0, 582.0, 633.0, 579.0, 587.0, 639.0, 567.0, 630.0, 630.0, 582.0, 627.0, 636.0, 587.0, 636.0, 639.0, 582.0, 587.0, 587.0, 630.0, 633.0, 630.0, 579.0, 587.0, 576.0, 633.0, 587.0, 587.0, 630.0, 579.0, 630.0, 636.0, 627.0, 587.0, 624.0, 636.0, 630.0, 624.0, 587.0, 573.0, 633.0, 636.0, 582.0, 630.0, 633.0, 639.0, 633.0, 633.0, 582.0, 630.0, 633.0, 630.0, 630.0, 633.0, 582.0, 630.0, 630.0, 587.0, 573.0, 576.0, 582.0, 636.0, 525.0, 633.0, 579.0, 633.0, 527.0, 519.0, 630.0, 579.0, 579.0, 576.0, 575.0, 582.0, 579.0, 197.0, 639.0, 582.0, 636.0, 587.0, 579.0, 582.0, 636.0, 582.0, 582.0, 630.0, 582.0, 582.0, 570.0, 633.0, 627.0, 582.0, 587.0, 630.0, 639.0, 590.0, 633.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 291.0, 296.0, 301.0, 305.0, 286.0, 296.0, 319.0, 314.0, 290.0, 289.0, 290.0, 297.0, 319.0, 320.0, 282.0, 285.0, 310.0, 320.0, 319.0, 311.0, 288.0, 294.0, 313.0, 314.0, 319.0, 317.0, 293.0, 294.0, 319.0, 317.0, 319.0, 320.0, 293.0, 289.0, 299.0, 288.0, 290.0, 297.0, 310.0, 320.0, 324.0, 309.0, 311.0, 319.0, 291.0, 288.0, 299.0, 288.0, 286.0, 290.0, 317.0, 316.0, 296.0, 291.0, 293.0, 294.0, 316.0, 314.0, 294.0, 285.0, 326.0, 304.0, 319.0, 317.0, 316.0, 311.0, 296.0, 291.0, 316.0, 308.0, 319.0, 317.0, 313.0, 317.0, 316.0, 308.0, 301.0, 286.0, 290.0, 283.0, 319.0, 314.0, 324.0, 312.0, 291.0, 291.0, 315.0, 315.0, 317.0, 316.0, 314.0, 325.0, 316.0, 317.0, 316.0, 317.0, 293.0, 289.0, 316.0, 314.0, 321.0, 312.0, 316.0, 314.0, 319.0, 311.0, 316.0, 317.0, 288.0, 294.0, 313.0, 317.0, 311.0, 319.0, 288.0, 299.0, 282.0, 291.0, 285.0, 291.0, 286.0, 296.0, 311.0, 325.0, 268.0, 257.0, 316.0, 317.0, 296.0, 283.0, 320.0, 313.0, 254.0, 273.0, 248.0, 271.0, 311.0, 319.0, 288.0, 291.0, 285.0, 294.0, 290.0, 286.0, 289.0, 286.0, 288.0, 294.0, 285.0, 294.0, 99.0, 98.0, 322.0, 317.0, 288.0, 294.0, 319.0, 317.0, 298.0, 289.0, 288.0, 291.0, 299.0, 283.0, 314.0, 322.0, 296.0, 286.0, 291.0, 291.0, 311.0, 319.0, 290.0, 292.0, 288.0, 294.0, 277.0, 293.0, 316.0, 317.0, 316.0, 311.0, 294.0, 288.0, 290.0, 297.0, 316.0, 314.0, 319.0, 320.0, 294.0, 296.0, 316.0, 317.0, 296.0, 286.0, 311.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7805088940621119, "mean_processing_ms": 0.22460838488183463, "mean_inference_ms": 1.3819062694297763}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10992000, "num_steps_sampled": 5862400, "sample_time_ms": 20388.555, "load_time_ms": 39.901, "grad_time_ms": 10047.414, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00028213454061187804, "policy_loss": -0.007530031260102987, "vf_loss": 78.06029510498047, "vf_explained_var": 0.7737483382225037, "kl": 0.0017934959614649415, "entropy": 1.1162586212158203, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5862400, "episodes_total": 14656, "training_iteration": 458, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-43-08", "timestamp": 1660261388, "time_this_iter_s": 31.54933786392212, "time_total_s": 19797.276528835297, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19797.276528835297, "timesteps_since_restore": 5862400, "iterations_since_restore": 458, "perf": {"cpu_util_percent": 36.01111111111111, "ram_util_percent": 59.15777777777779}}
+{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 599.73, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 98.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 299.865}, "custom_metrics": {"sparse_reward_mean": 207.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 184.93, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.45, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.03, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.0, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.04, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.53, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.54, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.05, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.02, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.04, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.53, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.04, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.53, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 636.0, 639.0, 630.0, 630.0, 582.0, 636.0, 581.0, 636.0, 630.0, 630.0, 590.0, 633.0, 636.0, 636.0, 630.0, 582.0, 582.0, 630.0, 584.0, 579.0, 627.0, 504.0, 630.0, 582.0, 633.0, 630.0, 633.0, 587.0, 582.0, 582.0, 633.0, 633.0, 579.0, 633.0, 527.0, 519.0, 630.0, 579.0, 579.0, 576.0, 575.0, 582.0, 579.0, 197.0, 639.0, 582.0, 636.0, 587.0, 579.0, 582.0, 636.0, 582.0, 582.0, 630.0, 582.0, 582.0, 570.0, 633.0, 627.0, 582.0, 587.0, 630.0, 639.0, 590.0, 633.0, 582.0, 630.0, 582.0, 587.0, 606.0, 582.0, 633.0, 579.0, 587.0, 639.0, 567.0, 630.0, 630.0, 582.0, 627.0, 636.0, 587.0, 636.0, 639.0, 582.0, 587.0, 587.0, 630.0, 633.0, 630.0, 579.0, 587.0, 576.0, 633.0, 587.0, 587.0, 630.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 300.0, 319.0, 317.0, 319.0, 320.0, 319.0, 311.0, 311.0, 319.0, 293.0, 289.0, 319.0, 317.0, 290.0, 291.0, 319.0, 317.0, 316.0, 314.0, 311.0, 319.0, 296.0, 294.0, 317.0, 316.0, 319.0, 317.0, 319.0, 317.0, 311.0, 319.0, 288.0, 294.0, 288.0, 294.0, 314.0, 316.0, 298.0, 286.0, 288.0, 291.0, 308.0, 319.0, 254.0, 250.0, 311.0, 319.0, 288.0, 294.0, 313.0, 320.0, 314.0, 316.0, 314.0, 319.0, 291.0, 296.0, 293.0, 289.0, 289.0, 293.0, 317.0, 316.0, 316.0, 317.0, 296.0, 283.0, 320.0, 313.0, 254.0, 273.0, 248.0, 271.0, 311.0, 319.0, 288.0, 291.0, 285.0, 294.0, 290.0, 286.0, 289.0, 286.0, 288.0, 294.0, 285.0, 294.0, 99.0, 98.0, 322.0, 317.0, 288.0, 294.0, 319.0, 317.0, 298.0, 289.0, 288.0, 291.0, 299.0, 283.0, 314.0, 322.0, 296.0, 286.0, 291.0, 291.0, 311.0, 319.0, 290.0, 292.0, 288.0, 294.0, 277.0, 293.0, 316.0, 317.0, 316.0, 311.0, 294.0, 288.0, 290.0, 297.0, 316.0, 314.0, 319.0, 320.0, 294.0, 296.0, 316.0, 317.0, 296.0, 286.0, 311.0, 319.0, 289.0, 293.0, 291.0, 296.0, 301.0, 305.0, 286.0, 296.0, 319.0, 314.0, 290.0, 289.0, 290.0, 297.0, 319.0, 320.0, 282.0, 285.0, 310.0, 320.0, 319.0, 311.0, 288.0, 294.0, 313.0, 314.0, 319.0, 317.0, 293.0, 294.0, 319.0, 317.0, 319.0, 320.0, 293.0, 289.0, 299.0, 288.0, 290.0, 297.0, 310.0, 320.0, 324.0, 309.0, 311.0, 319.0, 291.0, 288.0, 299.0, 288.0, 286.0, 290.0, 317.0, 316.0, 296.0, 291.0, 293.0, 294.0, 316.0, 314.0, 294.0, 285.0, 326.0, 304.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7798432151982481, "mean_processing_ms": 0.22448068931407403, "mean_inference_ms": 1.3813562407452884}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11016000, "num_steps_sampled": 5875200, "sample_time_ms": 20860.599, "load_time_ms": 39.533, "grad_time_ms": 10085.729, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -4.9240032240049914e-05, "policy_loss": -0.007299743592739105, "vf_loss": 78.08226776123047, "vf_explained_var": 0.7718666195869446, "kl": 0.001796315424144268, "entropy": 1.1154268980026245, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5875200, "episodes_total": 14688, "training_iteration": 459, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-43-44", "timestamp": 1660261424, "time_this_iter_s": 35.86376190185547, "time_total_s": 19833.140290737152, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19833.140290737152, "timesteps_since_restore": 5875200, "iterations_since_restore": 459, "perf": {"cpu_util_percent": 35.09411764705882, "ram_util_percent": 59.11176470588236}}
+{"episode_reward_max": 639.0, "episode_reward_min": 504.0, "episode_reward_mean": 606.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 250.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.165}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.73, "shaped_reward_min": 144, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.61, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.11, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.51, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.18, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.58, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.13, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.87, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.18, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.18, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 582.0, 582.0, 639.0, 582.0, 636.0, 582.0, 627.0, 582.0, 582.0, 582.0, 582.0, 579.0, 636.0, 587.0, 636.0, 630.0, 633.0, 627.0, 582.0, 639.0, 627.0, 570.0, 633.0, 582.0, 579.0, 636.0, 630.0, 587.0, 582.0, 582.0, 573.0, 590.0, 633.0, 582.0, 630.0, 582.0, 587.0, 606.0, 582.0, 633.0, 579.0, 587.0, 639.0, 567.0, 630.0, 630.0, 582.0, 627.0, 636.0, 587.0, 636.0, 639.0, 582.0, 587.0, 587.0, 630.0, 633.0, 630.0, 579.0, 587.0, 576.0, 633.0, 587.0, 587.0, 630.0, 579.0, 630.0, 582.0, 636.0, 639.0, 630.0, 630.0, 582.0, 636.0, 581.0, 636.0, 630.0, 630.0, 590.0, 633.0, 636.0, 636.0, 630.0, 582.0, 582.0, 630.0, 584.0, 579.0, 627.0, 504.0, 630.0, 582.0, 633.0, 630.0, 633.0, 587.0, 582.0, 582.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [308.0, 319.0, 296.0, 286.0, 288.0, 294.0, 319.0, 320.0, 288.0, 294.0, 314.0, 322.0, 291.0, 291.0, 316.0, 311.0, 291.0, 291.0, 293.0, 289.0, 282.0, 300.0, 296.0, 286.0, 288.0, 291.0, 319.0, 317.0, 296.0, 291.0, 319.0, 317.0, 319.0, 311.0, 311.0, 322.0, 316.0, 311.0, 287.0, 295.0, 322.0, 317.0, 316.0, 311.0, 282.0, 288.0, 319.0, 314.0, 295.0, 287.0, 288.0, 291.0, 327.0, 309.0, 316.0, 314.0, 293.0, 294.0, 294.0, 288.0, 290.0, 292.0, 285.0, 288.0, 294.0, 296.0, 316.0, 317.0, 296.0, 286.0, 311.0, 319.0, 289.0, 293.0, 291.0, 296.0, 301.0, 305.0, 286.0, 296.0, 319.0, 314.0, 290.0, 289.0, 290.0, 297.0, 319.0, 320.0, 282.0, 285.0, 310.0, 320.0, 319.0, 311.0, 288.0, 294.0, 313.0, 314.0, 319.0, 317.0, 293.0, 294.0, 319.0, 317.0, 319.0, 320.0, 293.0, 289.0, 299.0, 288.0, 290.0, 297.0, 310.0, 320.0, 324.0, 309.0, 311.0, 319.0, 291.0, 288.0, 299.0, 288.0, 286.0, 290.0, 317.0, 316.0, 296.0, 291.0, 293.0, 294.0, 316.0, 314.0, 294.0, 285.0, 326.0, 304.0, 282.0, 300.0, 319.0, 317.0, 319.0, 320.0, 319.0, 311.0, 311.0, 319.0, 293.0, 289.0, 319.0, 317.0, 290.0, 291.0, 319.0, 317.0, 316.0, 314.0, 311.0, 319.0, 296.0, 294.0, 317.0, 316.0, 319.0, 317.0, 319.0, 317.0, 311.0, 319.0, 288.0, 294.0, 288.0, 294.0, 314.0, 316.0, 298.0, 286.0, 288.0, 291.0, 308.0, 319.0, 254.0, 250.0, 311.0, 319.0, 288.0, 294.0, 313.0, 320.0, 314.0, 316.0, 314.0, 319.0, 291.0, 296.0, 293.0, 289.0, 289.0, 293.0, 317.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7791820626361985, "mean_processing_ms": 0.22435411593063023, "mean_inference_ms": 1.3809254829785487}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11040000, "num_steps_sampled": 5888000, "sample_time_ms": 21384.075, "load_time_ms": 38.923, "grad_time_ms": 10181.416, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033431891351938248, "policy_loss": -0.0041669332422316074, "vf_loss": 80.67221069335938, "vf_explained_var": 0.7671453356742859, "kl": 0.0021624856162816286, "entropy": 1.1142171621322632, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5888000, "episodes_total": 14720, "training_iteration": 460, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-44-21", "timestamp": 1660261461, "time_this_iter_s": 36.32121300697327, "time_total_s": 19869.461503744125, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19869.461503744125, "timesteps_since_restore": 5888000, "iterations_since_restore": 460, "perf": {"cpu_util_percent": 35.05294117647058, "ram_util_percent": 59.078431372549026}}
+{"episode_reward_max": 639.0, "episode_reward_min": 504.0, "episode_reward_mean": 604.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 250.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 302.105}, "custom_metrics": {"sparse_reward_mean": 209.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.21, "shaped_reward_min": 144, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.49, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.93, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.32, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.03, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.59, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.99, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.87, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.03, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.03, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 579.0, 630.0, 527.0, 587.0, 636.0, 584.0, 587.0, 579.0, 570.0, 582.0, 579.0, 636.0, 627.0, 573.0, 633.0, 582.0, 630.0, 582.0, 582.0, 630.0, 636.0, 582.0, 579.0, 582.0, 587.0, 636.0, 587.0, 627.0, 587.0, 633.0, 587.0, 630.0, 579.0, 630.0, 582.0, 636.0, 639.0, 630.0, 630.0, 582.0, 636.0, 581.0, 636.0, 630.0, 630.0, 590.0, 633.0, 636.0, 636.0, 630.0, 582.0, 582.0, 630.0, 584.0, 579.0, 627.0, 504.0, 630.0, 582.0, 633.0, 630.0, 633.0, 587.0, 582.0, 582.0, 633.0, 627.0, 582.0, 582.0, 639.0, 582.0, 636.0, 582.0, 627.0, 582.0, 582.0, 582.0, 582.0, 579.0, 636.0, 587.0, 636.0, 630.0, 633.0, 627.0, 582.0, 639.0, 627.0, 570.0, 633.0, 582.0, 579.0, 636.0, 630.0, 587.0, 582.0, 582.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 293.0, 289.0, 288.0, 291.0, 316.0, 314.0, 273.0, 254.0, 291.0, 296.0, 316.0, 320.0, 290.0, 294.0, 290.0, 297.0, 293.0, 286.0, 285.0, 285.0, 294.0, 288.0, 290.0, 289.0, 314.0, 322.0, 313.0, 314.0, 273.0, 300.0, 319.0, 314.0, 299.0, 283.0, 311.0, 319.0, 288.0, 294.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 293.0, 289.0, 293.0, 286.0, 285.0, 297.0, 293.0, 294.0, 319.0, 317.0, 296.0, 291.0, 313.0, 314.0, 296.0, 291.0, 311.0, 322.0, 293.0, 294.0, 316.0, 314.0, 294.0, 285.0, 326.0, 304.0, 282.0, 300.0, 319.0, 317.0, 319.0, 320.0, 319.0, 311.0, 311.0, 319.0, 293.0, 289.0, 319.0, 317.0, 290.0, 291.0, 319.0, 317.0, 316.0, 314.0, 311.0, 319.0, 296.0, 294.0, 317.0, 316.0, 319.0, 317.0, 319.0, 317.0, 311.0, 319.0, 288.0, 294.0, 288.0, 294.0, 314.0, 316.0, 298.0, 286.0, 288.0, 291.0, 308.0, 319.0, 254.0, 250.0, 311.0, 319.0, 288.0, 294.0, 313.0, 320.0, 314.0, 316.0, 314.0, 319.0, 291.0, 296.0, 293.0, 289.0, 289.0, 293.0, 317.0, 316.0, 308.0, 319.0, 296.0, 286.0, 288.0, 294.0, 319.0, 320.0, 288.0, 294.0, 314.0, 322.0, 291.0, 291.0, 316.0, 311.0, 291.0, 291.0, 293.0, 289.0, 282.0, 300.0, 296.0, 286.0, 288.0, 291.0, 319.0, 317.0, 296.0, 291.0, 319.0, 317.0, 319.0, 311.0, 311.0, 322.0, 316.0, 311.0, 287.0, 295.0, 322.0, 317.0, 316.0, 311.0, 282.0, 288.0, 319.0, 314.0, 295.0, 287.0, 288.0, 291.0, 327.0, 309.0, 316.0, 314.0, 293.0, 294.0, 294.0, 288.0, 290.0, 292.0, 285.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7785286320058347, "mean_processing_ms": 0.2242302416319241, "mean_inference_ms": 1.3806320563992234}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11064000, "num_steps_sampled": 5900800, "sample_time_ms": 21464.8, "load_time_ms": 38.628, "grad_time_ms": 10142.381, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011879469966515899, "policy_loss": -0.00630860636010766, "vf_loss": 80.56136322021484, "vf_explained_var": 0.7605991363525391, "kl": 0.002013101242482662, "entropy": 1.1191506385803223, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5900800, "episodes_total": 14752, "training_iteration": 461, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-44-56", "timestamp": 1660261496, "time_this_iter_s": 35.2242169380188, "time_total_s": 19904.685720682144, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19904.685720682144, "timesteps_since_restore": 5900800, "iterations_since_restore": 461, "perf": {"cpu_util_percent": 34.286, "ram_util_percent": 59.076}}
+{"episode_reward_max": 639.0, "episode_reward_min": 527.0, "episode_reward_mean": 606.08, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.04}, "custom_metrics": {"sparse_reward_mean": 209.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.88, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.08, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.13, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.72, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.53, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 6.02, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 4, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.13, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.72, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.13, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.72, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 636.0, 639.0, 639.0, 582.0, 627.0, 576.0, 587.0, 636.0, 633.0, 636.0, 630.0, 587.0, 633.0, 582.0, 579.0, 573.0, 636.0, 627.0, 633.0, 636.0, 633.0, 587.0, 573.0, 636.0, 630.0, 636.0, 633.0, 636.0, 630.0, 633.0, 587.0, 582.0, 582.0, 633.0, 627.0, 582.0, 582.0, 639.0, 582.0, 636.0, 582.0, 627.0, 582.0, 582.0, 582.0, 582.0, 579.0, 636.0, 587.0, 636.0, 630.0, 633.0, 627.0, 582.0, 639.0, 627.0, 570.0, 633.0, 582.0, 579.0, 636.0, 630.0, 587.0, 582.0, 582.0, 573.0, 630.0, 582.0, 579.0, 630.0, 527.0, 587.0, 636.0, 584.0, 587.0, 579.0, 570.0, 582.0, 579.0, 636.0, 627.0, 573.0, 633.0, 582.0, 630.0, 582.0, 582.0, 630.0, 636.0, 582.0, 579.0, 582.0, 587.0, 636.0, 587.0, 627.0, 587.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 316.0, 314.0, 316.0, 320.0, 317.0, 322.0, 324.0, 315.0, 291.0, 291.0, 313.0, 314.0, 296.0, 280.0, 296.0, 291.0, 314.0, 322.0, 313.0, 320.0, 314.0, 322.0, 319.0, 311.0, 291.0, 296.0, 316.0, 317.0, 291.0, 291.0, 290.0, 289.0, 288.0, 285.0, 309.0, 327.0, 313.0, 314.0, 314.0, 319.0, 319.0, 317.0, 316.0, 317.0, 288.0, 299.0, 288.0, 285.0, 314.0, 322.0, 313.0, 317.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 319.0, 314.0, 291.0, 296.0, 293.0, 289.0, 289.0, 293.0, 317.0, 316.0, 308.0, 319.0, 296.0, 286.0, 288.0, 294.0, 319.0, 320.0, 288.0, 294.0, 314.0, 322.0, 291.0, 291.0, 316.0, 311.0, 291.0, 291.0, 293.0, 289.0, 282.0, 300.0, 296.0, 286.0, 288.0, 291.0, 319.0, 317.0, 296.0, 291.0, 319.0, 317.0, 319.0, 311.0, 311.0, 322.0, 316.0, 311.0, 287.0, 295.0, 322.0, 317.0, 316.0, 311.0, 282.0, 288.0, 319.0, 314.0, 295.0, 287.0, 288.0, 291.0, 327.0, 309.0, 316.0, 314.0, 293.0, 294.0, 294.0, 288.0, 290.0, 292.0, 285.0, 288.0, 316.0, 314.0, 293.0, 289.0, 288.0, 291.0, 316.0, 314.0, 273.0, 254.0, 291.0, 296.0, 316.0, 320.0, 290.0, 294.0, 290.0, 297.0, 293.0, 286.0, 285.0, 285.0, 294.0, 288.0, 290.0, 289.0, 314.0, 322.0, 313.0, 314.0, 273.0, 300.0, 319.0, 314.0, 299.0, 283.0, 311.0, 319.0, 288.0, 294.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 293.0, 289.0, 293.0, 286.0, 285.0, 297.0, 293.0, 294.0, 319.0, 317.0, 296.0, 291.0, 313.0, 314.0, 296.0, 291.0, 311.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7778691140112235, "mean_processing_ms": 0.22410468367497743, "mean_inference_ms": 1.3802370398093593}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11088000, "num_steps_sampled": 5913600, "sample_time_ms": 21647.909, "load_time_ms": 38.304, "grad_time_ms": 10153.272, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001676362007856369, "policy_loss": -0.005163781810551882, "vf_loss": 73.99588012695312, "vf_explained_var": 0.7706634402275085, "kl": 0.00203719618730247, "entropy": 1.1188966035842896, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5913600, "episodes_total": 14784, "training_iteration": 462, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-45-29", "timestamp": 1660261529, "time_this_iter_s": 32.866820096969604, "time_total_s": 19937.552540779114, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19937.552540779114, "timesteps_since_restore": 5913600, "iterations_since_restore": 462, "perf": {"cpu_util_percent": 36.0304347826087, "ram_util_percent": 59.14782608695653}}
+{"episode_reward_max": 639.0, "episode_reward_min": 527.0, "episode_reward_mean": 606.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.275}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.95, "shaped_reward_min": 164, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.11, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.1, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.21, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.84, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.21, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.21, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 636.0, 576.0, 587.0, 579.0, 633.0, 579.0, 630.0, 636.0, 636.0, 569.0, 582.0, 584.0, 633.0, 633.0, 627.0, 582.0, 582.0, 587.0, 564.0, 582.0, 639.0, 587.0, 582.0, 627.0, 636.0, 587.0, 630.0, 582.0, 630.0, 639.0, 587.0, 582.0, 582.0, 573.0, 630.0, 582.0, 579.0, 630.0, 527.0, 587.0, 636.0, 584.0, 587.0, 579.0, 570.0, 582.0, 579.0, 636.0, 627.0, 573.0, 633.0, 582.0, 630.0, 582.0, 582.0, 630.0, 636.0, 582.0, 579.0, 582.0, 587.0, 636.0, 587.0, 627.0, 587.0, 633.0, 582.0, 630.0, 636.0, 639.0, 639.0, 582.0, 627.0, 576.0, 587.0, 636.0, 633.0, 636.0, 630.0, 587.0, 633.0, 582.0, 579.0, 573.0, 636.0, 627.0, 633.0, 636.0, 633.0, 587.0, 573.0, 636.0, 630.0, 636.0, 633.0, 636.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 319.0, 311.0, 319.0, 317.0, 282.0, 294.0, 291.0, 296.0, 294.0, 285.0, 314.0, 319.0, 291.0, 288.0, 316.0, 314.0, 314.0, 322.0, 322.0, 314.0, 281.0, 288.0, 292.0, 290.0, 285.0, 299.0, 321.0, 312.0, 319.0, 314.0, 305.0, 322.0, 293.0, 289.0, 288.0, 294.0, 293.0, 294.0, 276.0, 288.0, 296.0, 286.0, 317.0, 322.0, 293.0, 294.0, 296.0, 286.0, 319.0, 308.0, 316.0, 320.0, 286.0, 301.0, 316.0, 314.0, 293.0, 289.0, 319.0, 311.0, 316.0, 323.0, 293.0, 294.0, 294.0, 288.0, 290.0, 292.0, 285.0, 288.0, 316.0, 314.0, 293.0, 289.0, 288.0, 291.0, 316.0, 314.0, 273.0, 254.0, 291.0, 296.0, 316.0, 320.0, 290.0, 294.0, 290.0, 297.0, 293.0, 286.0, 285.0, 285.0, 294.0, 288.0, 290.0, 289.0, 314.0, 322.0, 313.0, 314.0, 273.0, 300.0, 319.0, 314.0, 299.0, 283.0, 311.0, 319.0, 288.0, 294.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 293.0, 289.0, 293.0, 286.0, 285.0, 297.0, 293.0, 294.0, 319.0, 317.0, 296.0, 291.0, 313.0, 314.0, 296.0, 291.0, 311.0, 322.0, 291.0, 291.0, 316.0, 314.0, 316.0, 320.0, 317.0, 322.0, 324.0, 315.0, 291.0, 291.0, 313.0, 314.0, 296.0, 280.0, 296.0, 291.0, 314.0, 322.0, 313.0, 320.0, 314.0, 322.0, 319.0, 311.0, 291.0, 296.0, 316.0, 317.0, 291.0, 291.0, 290.0, 289.0, 288.0, 285.0, 309.0, 327.0, 313.0, 314.0, 314.0, 319.0, 319.0, 317.0, 316.0, 317.0, 288.0, 299.0, 288.0, 285.0, 314.0, 322.0, 313.0, 317.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7772074101048227, "mean_processing_ms": 0.22397937610682028, "mean_inference_ms": 1.379755174295214}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11112000, "num_steps_sampled": 5926400, "sample_time_ms": 22034.25, "load_time_ms": 37.344, "grad_time_ms": 10407.822, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004309405107051134, "policy_loss": -0.0030093893874436617, "vf_loss": 78.81652069091797, "vf_explained_var": 0.7744302749633789, "kl": 0.0022333713714033365, "entropy": 1.1256990432739258, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5926400, "episodes_total": 14816, "training_iteration": 463, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-46-04", "timestamp": 1660261564, "time_this_iter_s": 34.88473105430603, "time_total_s": 19972.43727183342, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19972.43727183342, "timesteps_since_restore": 5926400, "iterations_since_restore": 463, "perf": {"cpu_util_percent": 36.355999999999995, "ram_util_percent": 59.1}}
+{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 613.15, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.575}, "custom_metrics": {"sparse_reward_mean": 212.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.35, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.95, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.16, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 14, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.47, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 14, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.19, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.21, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.84, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 14, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 14, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 633.0, 633.0, 630.0, 630.0, 630.0, 630.0, 627.0, 587.0, 630.0, 576.0, 627.0, 587.0, 630.0, 630.0, 633.0, 582.0, 522.0, 630.0, 633.0, 587.0, 582.0, 639.0, 582.0, 630.0, 633.0, 582.0, 630.0, 633.0, 636.0, 636.0, 587.0, 627.0, 587.0, 633.0, 582.0, 630.0, 636.0, 639.0, 639.0, 582.0, 627.0, 576.0, 587.0, 636.0, 633.0, 636.0, 630.0, 587.0, 633.0, 582.0, 579.0, 573.0, 636.0, 627.0, 633.0, 636.0, 633.0, 587.0, 573.0, 636.0, 630.0, 636.0, 633.0, 636.0, 630.0, 633.0, 636.0, 630.0, 636.0, 576.0, 587.0, 579.0, 633.0, 579.0, 630.0, 636.0, 636.0, 569.0, 582.0, 584.0, 633.0, 633.0, 627.0, 582.0, 582.0, 587.0, 564.0, 582.0, 639.0, 587.0, 582.0, 627.0, 636.0, 587.0, 630.0, 582.0, 630.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 309.0, 321.0, 319.0, 314.0, 319.0, 314.0, 316.0, 314.0, 321.0, 309.0, 316.0, 314.0, 316.0, 314.0, 308.0, 319.0, 293.0, 294.0, 311.0, 319.0, 288.0, 288.0, 313.0, 314.0, 291.0, 296.0, 311.0, 319.0, 313.0, 317.0, 319.0, 314.0, 291.0, 291.0, 257.0, 265.0, 316.0, 314.0, 324.0, 309.0, 291.0, 296.0, 288.0, 294.0, 319.0, 320.0, 290.0, 292.0, 313.0, 317.0, 319.0, 314.0, 290.0, 292.0, 311.0, 319.0, 316.0, 317.0, 311.0, 325.0, 319.0, 317.0, 296.0, 291.0, 313.0, 314.0, 296.0, 291.0, 311.0, 322.0, 291.0, 291.0, 316.0, 314.0, 316.0, 320.0, 317.0, 322.0, 324.0, 315.0, 291.0, 291.0, 313.0, 314.0, 296.0, 280.0, 296.0, 291.0, 314.0, 322.0, 313.0, 320.0, 314.0, 322.0, 319.0, 311.0, 291.0, 296.0, 316.0, 317.0, 291.0, 291.0, 290.0, 289.0, 288.0, 285.0, 309.0, 327.0, 313.0, 314.0, 314.0, 319.0, 319.0, 317.0, 316.0, 317.0, 288.0, 299.0, 288.0, 285.0, 314.0, 322.0, 313.0, 317.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 319.0, 314.0, 319.0, 317.0, 319.0, 311.0, 319.0, 317.0, 282.0, 294.0, 291.0, 296.0, 294.0, 285.0, 314.0, 319.0, 291.0, 288.0, 316.0, 314.0, 314.0, 322.0, 322.0, 314.0, 281.0, 288.0, 292.0, 290.0, 285.0, 299.0, 321.0, 312.0, 319.0, 314.0, 305.0, 322.0, 293.0, 289.0, 288.0, 294.0, 293.0, 294.0, 276.0, 288.0, 296.0, 286.0, 317.0, 322.0, 293.0, 294.0, 296.0, 286.0, 319.0, 308.0, 316.0, 320.0, 286.0, 301.0, 316.0, 314.0, 293.0, 289.0, 319.0, 311.0, 316.0, 323.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7765494210947501, "mean_processing_ms": 0.2238566721049911, "mean_inference_ms": 1.3793130627015395}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11136000, "num_steps_sampled": 5939200, "sample_time_ms": 22597.793, "load_time_ms": 37.078, "grad_time_ms": 10833.136, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004432214889675379, "policy_loss": -0.0028620418161153793, "vf_loss": 78.4912338256836, "vf_explained_var": 0.7597255110740662, "kl": 0.0021421227138489485, "entropy": 1.10971999168396, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5939200, "episodes_total": 14848, "training_iteration": 464, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-46-41", "timestamp": 1660261601, "time_this_iter_s": 37.22005105018616, "time_total_s": 20009.657322883606, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20009.657322883606, "timesteps_since_restore": 5939200, "iterations_since_restore": 464, "perf": {"cpu_util_percent": 33.75576923076923, "ram_util_percent": 59.192307692307686}}
+{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 609.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.695}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.39, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.73, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.17, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.12, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.2, "potting_onion_agent_0_min": 14, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.75, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.12, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.87, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.2, "optimal_onion_potting_agent_0_min": 14, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.75, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.2, "viable_onion_potting_agent_0_min": 14, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.75, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 630.0, 522.0, 636.0, 573.0, 636.0, 636.0, 582.0, 582.0, 633.0, 633.0, 587.0, 633.0, 633.0, 582.0, 590.0, 582.0, 579.0, 630.0, 627.0, 587.0, 462.0, 639.0, 630.0, 579.0, 584.0, 630.0, 587.0, 579.0, 587.0, 630.0, 633.0, 633.0, 636.0, 630.0, 633.0, 636.0, 630.0, 636.0, 576.0, 587.0, 579.0, 633.0, 579.0, 630.0, 636.0, 636.0, 569.0, 582.0, 584.0, 633.0, 633.0, 627.0, 582.0, 582.0, 587.0, 564.0, 582.0, 639.0, 587.0, 582.0, 627.0, 636.0, 587.0, 630.0, 582.0, 630.0, 639.0, 633.0, 630.0, 633.0, 633.0, 630.0, 630.0, 630.0, 630.0, 627.0, 587.0, 630.0, 576.0, 627.0, 587.0, 630.0, 630.0, 633.0, 582.0, 522.0, 630.0, 633.0, 587.0, 582.0, 639.0, 582.0, 630.0, 633.0, 582.0, 630.0, 633.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 322.0, 316.0, 314.0, 268.0, 254.0, 319.0, 317.0, 299.0, 274.0, 314.0, 322.0, 313.0, 323.0, 288.0, 294.0, 293.0, 289.0, 324.0, 309.0, 316.0, 317.0, 298.0, 289.0, 319.0, 314.0, 316.0, 317.0, 293.0, 289.0, 296.0, 294.0, 288.0, 294.0, 288.0, 291.0, 316.0, 314.0, 307.0, 320.0, 296.0, 291.0, 223.0, 239.0, 324.0, 315.0, 313.0, 317.0, 290.0, 289.0, 293.0, 291.0, 311.0, 319.0, 296.0, 291.0, 288.0, 291.0, 298.0, 289.0, 316.0, 314.0, 314.0, 319.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 319.0, 314.0, 319.0, 317.0, 319.0, 311.0, 319.0, 317.0, 282.0, 294.0, 291.0, 296.0, 294.0, 285.0, 314.0, 319.0, 291.0, 288.0, 316.0, 314.0, 314.0, 322.0, 322.0, 314.0, 281.0, 288.0, 292.0, 290.0, 285.0, 299.0, 321.0, 312.0, 319.0, 314.0, 305.0, 322.0, 293.0, 289.0, 288.0, 294.0, 293.0, 294.0, 276.0, 288.0, 296.0, 286.0, 317.0, 322.0, 293.0, 294.0, 296.0, 286.0, 319.0, 308.0, 316.0, 320.0, 286.0, 301.0, 316.0, 314.0, 293.0, 289.0, 319.0, 311.0, 316.0, 323.0, 316.0, 317.0, 309.0, 321.0, 319.0, 314.0, 319.0, 314.0, 316.0, 314.0, 321.0, 309.0, 316.0, 314.0, 316.0, 314.0, 308.0, 319.0, 293.0, 294.0, 311.0, 319.0, 288.0, 288.0, 313.0, 314.0, 291.0, 296.0, 311.0, 319.0, 313.0, 317.0, 319.0, 314.0, 291.0, 291.0, 257.0, 265.0, 316.0, 314.0, 324.0, 309.0, 291.0, 296.0, 288.0, 294.0, 319.0, 320.0, 290.0, 292.0, 313.0, 317.0, 319.0, 314.0, 290.0, 292.0, 311.0, 319.0, 316.0, 317.0, 311.0, 325.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7758921958877883, "mean_processing_ms": 0.22373444703892312, "mean_inference_ms": 1.3788740058001947}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11160000, "num_steps_sampled": 5952000, "sample_time_ms": 22835.117, "load_time_ms": 36.936, "grad_time_ms": 10943.452, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008901534602046013, "policy_loss": -0.006549746263772249, "vf_loss": 80.0201416015625, "vf_explained_var": 0.7684802412986755, "kl": 0.0019470960833132267, "entropy": 1.1242157220840454, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5952000, "episodes_total": 14880, "training_iteration": 465, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-47-14", "timestamp": 1660261634, "time_this_iter_s": 32.92273998260498, "time_total_s": 20042.58006286621, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20042.58006286621, "timesteps_since_restore": 5952000, "iterations_since_restore": 465, "perf": {"cpu_util_percent": 33.19787234042553, "ram_util_percent": 58.98936170212765}}
+{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 611.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 305.81}, "custom_metrics": {"sparse_reward_mean": 212.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.62, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.66, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.35, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.01, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.63, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.13, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.9, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 630.0, 579.0, 639.0, 636.0, 630.0, 636.0, 633.0, 636.0, 582.0, 587.0, 624.0, 639.0, 630.0, 582.0, 636.0, 633.0, 633.0, 630.0, 630.0, 579.0, 555.0, 630.0, 576.0, 630.0, 627.0, 587.0, 639.0, 627.0, 627.0, 633.0, 630.0, 582.0, 630.0, 639.0, 633.0, 630.0, 633.0, 633.0, 630.0, 630.0, 630.0, 630.0, 627.0, 587.0, 630.0, 576.0, 627.0, 587.0, 630.0, 630.0, 633.0, 582.0, 522.0, 630.0, 633.0, 587.0, 582.0, 639.0, 582.0, 630.0, 633.0, 582.0, 630.0, 633.0, 636.0, 636.0, 639.0, 630.0, 522.0, 636.0, 573.0, 636.0, 636.0, 582.0, 582.0, 633.0, 633.0, 587.0, 633.0, 633.0, 582.0, 590.0, 582.0, 579.0, 630.0, 627.0, 587.0, 462.0, 639.0, 630.0, 579.0, 584.0, 630.0, 587.0, 579.0, 587.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 288.0, 294.0, 316.0, 314.0, 288.0, 291.0, 317.0, 322.0, 319.0, 317.0, 313.0, 317.0, 314.0, 322.0, 316.0, 317.0, 314.0, 322.0, 291.0, 291.0, 280.0, 307.0, 316.0, 308.0, 324.0, 315.0, 317.0, 313.0, 283.0, 299.0, 321.0, 315.0, 316.0, 317.0, 316.0, 317.0, 311.0, 319.0, 311.0, 319.0, 294.0, 285.0, 270.0, 285.0, 316.0, 314.0, 281.0, 295.0, 310.0, 320.0, 313.0, 314.0, 288.0, 299.0, 322.0, 317.0, 313.0, 314.0, 321.0, 306.0, 316.0, 317.0, 316.0, 314.0, 293.0, 289.0, 319.0, 311.0, 316.0, 323.0, 316.0, 317.0, 309.0, 321.0, 319.0, 314.0, 319.0, 314.0, 316.0, 314.0, 321.0, 309.0, 316.0, 314.0, 316.0, 314.0, 308.0, 319.0, 293.0, 294.0, 311.0, 319.0, 288.0, 288.0, 313.0, 314.0, 291.0, 296.0, 311.0, 319.0, 313.0, 317.0, 319.0, 314.0, 291.0, 291.0, 257.0, 265.0, 316.0, 314.0, 324.0, 309.0, 291.0, 296.0, 288.0, 294.0, 319.0, 320.0, 290.0, 292.0, 313.0, 317.0, 319.0, 314.0, 290.0, 292.0, 311.0, 319.0, 316.0, 317.0, 311.0, 325.0, 319.0, 317.0, 317.0, 322.0, 316.0, 314.0, 268.0, 254.0, 319.0, 317.0, 299.0, 274.0, 314.0, 322.0, 313.0, 323.0, 288.0, 294.0, 293.0, 289.0, 324.0, 309.0, 316.0, 317.0, 298.0, 289.0, 319.0, 314.0, 316.0, 317.0, 293.0, 289.0, 296.0, 294.0, 288.0, 294.0, 288.0, 291.0, 316.0, 314.0, 307.0, 320.0, 296.0, 291.0, 223.0, 239.0, 324.0, 315.0, 313.0, 317.0, 290.0, 289.0, 293.0, 291.0, 311.0, 319.0, 296.0, 291.0, 288.0, 291.0, 298.0, 289.0, 316.0, 314.0, 314.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7752302665514498, "mean_processing_ms": 0.22360935089938724, "mean_inference_ms": 1.378262918190576}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11184000, "num_steps_sampled": 5964800, "sample_time_ms": 22879.712, "load_time_ms": 37.04, "grad_time_ms": 10842.802, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008535028900951147, "policy_loss": -0.007580641657114029, "vf_loss": 72.87356567382812, "vf_explained_var": 0.7752940058708191, "kl": 0.0019255572697147727, "entropy": 1.12042236328125, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5964800, "episodes_total": 14912, "training_iteration": 466, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-47-42", "timestamp": 1660261662, "time_this_iter_s": 28.411120176315308, "time_total_s": 20070.991183042526, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20070.991183042526, "timesteps_since_restore": 5964800, "iterations_since_restore": 466, "perf": {"cpu_util_percent": 30.5725, "ram_util_percent": 58.955}}
+{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 607.06, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.53}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.26, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.71, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.14, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.11, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.33, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.14, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.67, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.24, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.96, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.98, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.14, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.14, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 584.0, 579.0, 582.0, 630.0, 627.0, 573.0, 582.0, 636.0, 587.0, 639.0, 636.0, 539.0, 615.0, 579.0, 582.0, 633.0, 630.0, 582.0, 582.0, 576.0, 636.0, 627.0, 630.0, 507.0, 633.0, 564.0, 627.0, 582.0, 630.0, 630.0, 582.0, 630.0, 633.0, 636.0, 636.0, 639.0, 630.0, 522.0, 636.0, 573.0, 636.0, 636.0, 582.0, 582.0, 633.0, 633.0, 587.0, 633.0, 633.0, 582.0, 590.0, 582.0, 579.0, 630.0, 627.0, 587.0, 462.0, 639.0, 630.0, 579.0, 584.0, 630.0, 587.0, 579.0, 587.0, 630.0, 633.0, 579.0, 582.0, 630.0, 579.0, 639.0, 636.0, 630.0, 636.0, 633.0, 636.0, 582.0, 587.0, 624.0, 639.0, 630.0, 582.0, 636.0, 633.0, 633.0, 630.0, 630.0, 579.0, 555.0, 630.0, 576.0, 630.0, 627.0, 587.0, 639.0, 627.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 296.0, 290.0, 294.0, 288.0, 291.0, 291.0, 291.0, 316.0, 314.0, 321.0, 306.0, 288.0, 285.0, 288.0, 294.0, 322.0, 314.0, 287.0, 300.0, 314.0, 325.0, 314.0, 322.0, 271.0, 268.0, 302.0, 313.0, 285.0, 294.0, 288.0, 294.0, 314.0, 319.0, 316.0, 314.0, 288.0, 294.0, 291.0, 291.0, 288.0, 288.0, 317.0, 319.0, 316.0, 311.0, 311.0, 319.0, 251.0, 256.0, 314.0, 319.0, 281.0, 283.0, 308.0, 319.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 297.0, 285.0, 311.0, 319.0, 316.0, 317.0, 311.0, 325.0, 319.0, 317.0, 317.0, 322.0, 316.0, 314.0, 268.0, 254.0, 319.0, 317.0, 299.0, 274.0, 314.0, 322.0, 313.0, 323.0, 288.0, 294.0, 293.0, 289.0, 324.0, 309.0, 316.0, 317.0, 298.0, 289.0, 319.0, 314.0, 316.0, 317.0, 293.0, 289.0, 296.0, 294.0, 288.0, 294.0, 288.0, 291.0, 316.0, 314.0, 307.0, 320.0, 296.0, 291.0, 223.0, 239.0, 324.0, 315.0, 313.0, 317.0, 290.0, 289.0, 293.0, 291.0, 311.0, 319.0, 296.0, 291.0, 288.0, 291.0, 298.0, 289.0, 316.0, 314.0, 314.0, 319.0, 291.0, 288.0, 288.0, 294.0, 316.0, 314.0, 288.0, 291.0, 317.0, 322.0, 319.0, 317.0, 313.0, 317.0, 314.0, 322.0, 316.0, 317.0, 314.0, 322.0, 291.0, 291.0, 280.0, 307.0, 316.0, 308.0, 324.0, 315.0, 317.0, 313.0, 283.0, 299.0, 321.0, 315.0, 316.0, 317.0, 316.0, 317.0, 311.0, 319.0, 311.0, 319.0, 294.0, 285.0, 270.0, 285.0, 316.0, 314.0, 281.0, 295.0, 310.0, 320.0, 313.0, 314.0, 288.0, 299.0, 322.0, 317.0, 313.0, 314.0, 321.0, 306.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7745646022660153, "mean_processing_ms": 0.2234810440216946, "mean_inference_ms": 1.3774520581026746}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11208000, "num_steps_sampled": 5977600, "sample_time_ms": 22667.166, "load_time_ms": 37.563, "grad_time_ms": 10782.505, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003179629857186228, "policy_loss": -0.007398936897516251, "vf_loss": 76.44898986816406, "vf_explained_var": 0.7757420539855957, "kl": 0.0019861727487295866, "entropy": 1.127841830253601, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5977600, "episodes_total": 14944, "training_iteration": 467, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-48-12", "timestamp": 1660261692, "time_this_iter_s": 29.89157724380493, "time_total_s": 20100.88276028633, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20100.88276028633, "timesteps_since_restore": 5977600, "iterations_since_restore": 467, "perf": {"cpu_util_percent": 33.63333333333334, "ram_util_percent": 58.976190476190474}}
+{"episode_reward_max": 639.0, "episode_reward_min": 507.0, "episode_reward_mean": 606.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.42}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.04, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.77, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.18, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.58, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.7, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.42, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.38, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.0, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.58, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.58, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 581.0, 582.0, 582.0, 636.0, 633.0, 633.0, 561.0, 630.0, 578.0, 633.0, 582.0, 582.0, 582.0, 579.0, 584.0, 639.0, 587.0, 582.0, 587.0, 633.0, 630.0, 630.0, 567.0, 587.0, 636.0, 630.0, 582.0, 639.0, 633.0, 630.0, 576.0, 579.0, 587.0, 630.0, 633.0, 579.0, 582.0, 630.0, 579.0, 639.0, 636.0, 630.0, 636.0, 633.0, 636.0, 582.0, 587.0, 624.0, 639.0, 630.0, 582.0, 636.0, 633.0, 633.0, 630.0, 630.0, 579.0, 555.0, 630.0, 576.0, 630.0, 627.0, 587.0, 639.0, 627.0, 627.0, 633.0, 582.0, 584.0, 579.0, 582.0, 630.0, 627.0, 573.0, 582.0, 636.0, 587.0, 639.0, 636.0, 539.0, 615.0, 579.0, 582.0, 633.0, 630.0, 582.0, 582.0, 576.0, 636.0, 627.0, 630.0, 507.0, 633.0, 564.0, 627.0, 582.0, 630.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 290.0, 291.0, 293.0, 289.0, 296.0, 286.0, 321.0, 315.0, 319.0, 314.0, 319.0, 314.0, 285.0, 276.0, 316.0, 314.0, 295.0, 283.0, 319.0, 314.0, 288.0, 294.0, 294.0, 288.0, 283.0, 299.0, 281.0, 298.0, 288.0, 296.0, 317.0, 322.0, 296.0, 291.0, 289.0, 293.0, 288.0, 299.0, 321.0, 312.0, 313.0, 317.0, 319.0, 311.0, 284.0, 283.0, 302.0, 285.0, 314.0, 322.0, 313.0, 317.0, 291.0, 291.0, 317.0, 322.0, 309.0, 324.0, 316.0, 314.0, 291.0, 285.0, 288.0, 291.0, 298.0, 289.0, 316.0, 314.0, 314.0, 319.0, 291.0, 288.0, 288.0, 294.0, 316.0, 314.0, 288.0, 291.0, 317.0, 322.0, 319.0, 317.0, 313.0, 317.0, 314.0, 322.0, 316.0, 317.0, 314.0, 322.0, 291.0, 291.0, 280.0, 307.0, 316.0, 308.0, 324.0, 315.0, 317.0, 313.0, 283.0, 299.0, 321.0, 315.0, 316.0, 317.0, 316.0, 317.0, 311.0, 319.0, 311.0, 319.0, 294.0, 285.0, 270.0, 285.0, 316.0, 314.0, 281.0, 295.0, 310.0, 320.0, 313.0, 314.0, 288.0, 299.0, 322.0, 317.0, 313.0, 314.0, 321.0, 306.0, 316.0, 317.0, 286.0, 296.0, 290.0, 294.0, 288.0, 291.0, 291.0, 291.0, 316.0, 314.0, 321.0, 306.0, 288.0, 285.0, 288.0, 294.0, 322.0, 314.0, 287.0, 300.0, 314.0, 325.0, 314.0, 322.0, 271.0, 268.0, 302.0, 313.0, 285.0, 294.0, 288.0, 294.0, 314.0, 319.0, 316.0, 314.0, 288.0, 294.0, 291.0, 291.0, 288.0, 288.0, 317.0, 319.0, 316.0, 311.0, 311.0, 319.0, 251.0, 256.0, 314.0, 319.0, 281.0, 283.0, 308.0, 319.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 297.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7738952247411988, "mean_processing_ms": 0.22335063238074299, "mean_inference_ms": 1.3764749095367632}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11232000, "num_steps_sampled": 5990400, "sample_time_ms": 22503.528, "load_time_ms": 37.786, "grad_time_ms": 10902.652, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012515783309936523, "policy_loss": -0.006079933140426874, "vf_loss": 78.91991424560547, "vf_explained_var": 0.7658045887947083, "kl": 0.0020609761122614145, "entropy": 1.1209732294082642, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5990400, "episodes_total": 14976, "training_iteration": 468, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-48-43", "timestamp": 1660261723, "time_this_iter_s": 31.115761756896973, "time_total_s": 20131.998522043228, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20131.998522043228, "timesteps_since_restore": 5990400, "iterations_since_restore": 468, "perf": {"cpu_util_percent": 34.11818181818182, "ram_util_percent": 59.265909090909076}}
+{"episode_reward_max": 639.0, "episode_reward_min": 507.0, "episode_reward_mean": 609.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.625}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.45, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.16, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.2, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.39, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.63, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.84, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.35, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.63, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.63, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 624.0, 627.0, 639.0, 636.0, 636.0, 636.0, 587.0, 633.0, 582.0, 630.0, 636.0, 633.0, 624.0, 630.0, 624.0, 582.0, 633.0, 570.0, 579.0, 633.0, 630.0, 633.0, 630.0, 587.0, 570.0, 633.0, 633.0, 633.0, 627.0, 633.0, 627.0, 639.0, 627.0, 627.0, 633.0, 582.0, 584.0, 579.0, 582.0, 630.0, 627.0, 573.0, 582.0, 636.0, 587.0, 639.0, 636.0, 539.0, 615.0, 579.0, 582.0, 633.0, 630.0, 582.0, 582.0, 576.0, 636.0, 627.0, 630.0, 507.0, 633.0, 564.0, 627.0, 582.0, 630.0, 630.0, 582.0, 630.0, 581.0, 582.0, 582.0, 636.0, 633.0, 633.0, 561.0, 630.0, 578.0, 633.0, 582.0, 582.0, 582.0, 579.0, 584.0, 639.0, 587.0, 582.0, 587.0, 633.0, 630.0, 630.0, 567.0, 587.0, 636.0, 630.0, 582.0, 639.0, 633.0, 630.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 313.0, 311.0, 316.0, 311.0, 319.0, 320.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 295.0, 292.0, 316.0, 317.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 316.0, 308.0, 311.0, 319.0, 315.0, 309.0, 288.0, 294.0, 316.0, 317.0, 282.0, 288.0, 294.0, 285.0, 319.0, 314.0, 316.0, 314.0, 313.0, 320.0, 316.0, 314.0, 291.0, 296.0, 296.0, 274.0, 321.0, 312.0, 316.0, 317.0, 317.0, 316.0, 316.0, 311.0, 316.0, 317.0, 311.0, 316.0, 322.0, 317.0, 313.0, 314.0, 321.0, 306.0, 316.0, 317.0, 286.0, 296.0, 290.0, 294.0, 288.0, 291.0, 291.0, 291.0, 316.0, 314.0, 321.0, 306.0, 288.0, 285.0, 288.0, 294.0, 322.0, 314.0, 287.0, 300.0, 314.0, 325.0, 314.0, 322.0, 271.0, 268.0, 302.0, 313.0, 285.0, 294.0, 288.0, 294.0, 314.0, 319.0, 316.0, 314.0, 288.0, 294.0, 291.0, 291.0, 288.0, 288.0, 317.0, 319.0, 316.0, 311.0, 311.0, 319.0, 251.0, 256.0, 314.0, 319.0, 281.0, 283.0, 308.0, 319.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 297.0, 285.0, 316.0, 314.0, 290.0, 291.0, 293.0, 289.0, 296.0, 286.0, 321.0, 315.0, 319.0, 314.0, 319.0, 314.0, 285.0, 276.0, 316.0, 314.0, 295.0, 283.0, 319.0, 314.0, 288.0, 294.0, 294.0, 288.0, 283.0, 299.0, 281.0, 298.0, 288.0, 296.0, 317.0, 322.0, 296.0, 291.0, 289.0, 293.0, 288.0, 299.0, 321.0, 312.0, 313.0, 317.0, 319.0, 311.0, 284.0, 283.0, 302.0, 285.0, 314.0, 322.0, 313.0, 317.0, 291.0, 291.0, 317.0, 322.0, 309.0, 324.0, 316.0, 314.0, 291.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7732342684064082, "mean_processing_ms": 0.22322351749716166, "mean_inference_ms": 1.3755872244816174}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11256000, "num_steps_sampled": 6003200, "sample_time_ms": 22143.958, "load_time_ms": 38.351, "grad_time_ms": 11066.072, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00040481146425008774, "policy_loss": -0.006932735443115234, "vf_loss": 70.86636352539062, "vf_explained_var": 0.7707180976867676, "kl": 0.0017913728952407837, "entropy": 1.1174226999282837, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6003200, "episodes_total": 15008, "training_iteration": 469, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-49-17", "timestamp": 1660261757, "time_this_iter_s": 33.90879726409912, "time_total_s": 20165.907319307327, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20165.907319307327, "timesteps_since_restore": 6003200, "iterations_since_restore": 469, "perf": {"cpu_util_percent": 34.28125, "ram_util_percent": 58.99583333333334}}
+{"episode_reward_max": 639.0, "episode_reward_min": 552.0, "episode_reward_mean": 612.77, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 274.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 306.385}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 187.17, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.85, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.26, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.2, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.28, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.74, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.73, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.28, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.92, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.28, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.74, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.28, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.74, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 633.0, 582.0, 639.0, 579.0, 579.0, 630.0, 630.0, 624.0, 636.0, 582.0, 630.0, 630.0, 633.0, 615.0, 627.0, 630.0, 587.0, 630.0, 630.0, 630.0, 636.0, 630.0, 636.0, 621.0, 587.0, 633.0, 582.0, 579.0, 633.0, 630.0, 552.0, 582.0, 630.0, 630.0, 582.0, 630.0, 581.0, 582.0, 582.0, 636.0, 633.0, 633.0, 561.0, 630.0, 578.0, 633.0, 582.0, 582.0, 582.0, 579.0, 584.0, 639.0, 587.0, 582.0, 587.0, 633.0, 630.0, 630.0, 567.0, 587.0, 636.0, 630.0, 582.0, 639.0, 633.0, 630.0, 576.0, 630.0, 624.0, 627.0, 639.0, 636.0, 636.0, 636.0, 587.0, 633.0, 582.0, 630.0, 636.0, 633.0, 624.0, 630.0, 624.0, 582.0, 633.0, 570.0, 579.0, 633.0, 630.0, 633.0, 630.0, 587.0, 570.0, 633.0, 633.0, 633.0, 627.0, 633.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 298.0, 314.0, 319.0, 294.0, 288.0, 317.0, 322.0, 297.0, 282.0, 285.0, 294.0, 311.0, 319.0, 319.0, 311.0, 315.0, 309.0, 319.0, 317.0, 291.0, 291.0, 316.0, 314.0, 319.0, 311.0, 316.0, 317.0, 307.0, 308.0, 319.0, 308.0, 316.0, 314.0, 296.0, 291.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 324.0, 312.0, 316.0, 314.0, 319.0, 317.0, 313.0, 308.0, 291.0, 296.0, 311.0, 322.0, 293.0, 289.0, 279.0, 300.0, 311.0, 322.0, 316.0, 314.0, 276.0, 276.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 297.0, 285.0, 316.0, 314.0, 290.0, 291.0, 293.0, 289.0, 296.0, 286.0, 321.0, 315.0, 319.0, 314.0, 319.0, 314.0, 285.0, 276.0, 316.0, 314.0, 295.0, 283.0, 319.0, 314.0, 288.0, 294.0, 294.0, 288.0, 283.0, 299.0, 281.0, 298.0, 288.0, 296.0, 317.0, 322.0, 296.0, 291.0, 289.0, 293.0, 288.0, 299.0, 321.0, 312.0, 313.0, 317.0, 319.0, 311.0, 284.0, 283.0, 302.0, 285.0, 314.0, 322.0, 313.0, 317.0, 291.0, 291.0, 317.0, 322.0, 309.0, 324.0, 316.0, 314.0, 291.0, 285.0, 316.0, 314.0, 313.0, 311.0, 316.0, 311.0, 319.0, 320.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 295.0, 292.0, 316.0, 317.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 316.0, 308.0, 311.0, 319.0, 315.0, 309.0, 288.0, 294.0, 316.0, 317.0, 282.0, 288.0, 294.0, 285.0, 319.0, 314.0, 316.0, 314.0, 313.0, 320.0, 316.0, 314.0, 291.0, 296.0, 296.0, 274.0, 321.0, 312.0, 316.0, 317.0, 317.0, 316.0, 316.0, 311.0, 316.0, 317.0, 311.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7725761312773625, "mean_processing_ms": 0.2230967942471684, "mean_inference_ms": 1.3747759311690726}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11280000, "num_steps_sampled": 6016000, "sample_time_ms": 21879.975, "load_time_ms": 38.961, "grad_time_ms": 10890.541, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0007060763309709728, "policy_loss": -0.0072075664065778255, "vf_loss": 70.60037231445312, "vf_explained_var": 0.7745871543884277, "kl": 0.0018414078513160348, "entropy": 1.1170852184295654, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6016000, "episodes_total": 15040, "training_iteration": 470, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-49-49", "timestamp": 1660261789, "time_this_iter_s": 31.930355072021484, "time_total_s": 20197.83767437935, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20197.83767437935, "timesteps_since_restore": 6016000, "iterations_since_restore": 470, "perf": {"cpu_util_percent": 29.486666666666665, "ram_util_percent": 59.02666666666667}}
+{"episode_reward_max": 639.0, "episode_reward_min": 552.0, "episode_reward_mean": 617.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 274.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 308.805}, "custom_metrics": {"sparse_reward_mean": 214.6, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 188.41, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.03, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.33, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.42, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.8, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.14, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.42, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.42, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 630.0, 587.0, 624.0, 633.0, 636.0, 630.0, 633.0, 633.0, 639.0, 633.0, 636.0, 636.0, 633.0, 627.0, 582.0, 633.0, 639.0, 584.0, 582.0, 636.0, 618.0, 636.0, 633.0, 627.0, 582.0, 582.0, 582.0, 633.0, 636.0, 630.0, 582.0, 639.0, 633.0, 630.0, 576.0, 630.0, 624.0, 627.0, 639.0, 636.0, 636.0, 636.0, 587.0, 633.0, 582.0, 630.0, 636.0, 633.0, 624.0, 630.0, 624.0, 582.0, 633.0, 570.0, 579.0, 633.0, 630.0, 633.0, 630.0, 587.0, 570.0, 633.0, 633.0, 633.0, 627.0, 633.0, 627.0, 582.0, 633.0, 582.0, 639.0, 579.0, 579.0, 630.0, 630.0, 624.0, 636.0, 582.0, 630.0, 630.0, 633.0, 615.0, 627.0, 630.0, 587.0, 630.0, 630.0, 630.0, 636.0, 630.0, 636.0, 621.0, 587.0, 633.0, 582.0, 579.0, 633.0, 630.0, 552.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 319.0, 311.0, 288.0, 299.0, 311.0, 313.0, 311.0, 322.0, 317.0, 319.0, 313.0, 317.0, 316.0, 317.0, 321.0, 312.0, 317.0, 322.0, 314.0, 319.0, 324.0, 312.0, 324.0, 312.0, 321.0, 312.0, 308.0, 319.0, 296.0, 286.0, 314.0, 319.0, 319.0, 320.0, 296.0, 288.0, 286.0, 296.0, 319.0, 317.0, 308.0, 310.0, 317.0, 319.0, 319.0, 314.0, 321.0, 306.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 291.0, 291.0, 317.0, 322.0, 309.0, 324.0, 316.0, 314.0, 291.0, 285.0, 316.0, 314.0, 313.0, 311.0, 316.0, 311.0, 319.0, 320.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 295.0, 292.0, 316.0, 317.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 316.0, 308.0, 311.0, 319.0, 315.0, 309.0, 288.0, 294.0, 316.0, 317.0, 282.0, 288.0, 294.0, 285.0, 319.0, 314.0, 316.0, 314.0, 313.0, 320.0, 316.0, 314.0, 291.0, 296.0, 296.0, 274.0, 321.0, 312.0, 316.0, 317.0, 317.0, 316.0, 316.0, 311.0, 316.0, 317.0, 311.0, 316.0, 284.0, 298.0, 314.0, 319.0, 294.0, 288.0, 317.0, 322.0, 297.0, 282.0, 285.0, 294.0, 311.0, 319.0, 319.0, 311.0, 315.0, 309.0, 319.0, 317.0, 291.0, 291.0, 316.0, 314.0, 319.0, 311.0, 316.0, 317.0, 307.0, 308.0, 319.0, 308.0, 316.0, 314.0, 296.0, 291.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 324.0, 312.0, 316.0, 314.0, 319.0, 317.0, 313.0, 308.0, 291.0, 296.0, 311.0, 322.0, 293.0, 289.0, 279.0, 300.0, 311.0, 322.0, 316.0, 314.0, 276.0, 276.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7719288658166613, "mean_processing_ms": 0.22297329438160504, "mean_inference_ms": 1.3741316094375031}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11304000, "num_steps_sampled": 6028800, "sample_time_ms": 21780.977, "load_time_ms": 38.854, "grad_time_ms": 10976.386, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0016910168342292309, "policy_loss": -0.008838978596031666, "vf_loss": 77.08248901367188, "vf_explained_var": 0.7691299319267273, "kl": 0.0020619730930775404, "entropy": 1.1205838918685913, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6028800, "episodes_total": 15072, "training_iteration": 471, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-50-24", "timestamp": 1660261824, "time_this_iter_s": 35.09460806846619, "time_total_s": 20232.932282447815, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20232.932282447815, "timesteps_since_restore": 6028800, "iterations_since_restore": 471, "perf": {"cpu_util_percent": 30.86, "ram_util_percent": 59.02799999999999}}
+{"episode_reward_max": 639.0, "episode_reward_min": 552.0, "episode_reward_mean": 617.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 276.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.91}, "custom_metrics": {"sparse_reward_mean": 214.6, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 188.62, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.21, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.26, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.51, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.49, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.59, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.78, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.62, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.19, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.22, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.59, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.78, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.59, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.78, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 582.0, 630.0, 624.0, 615.0, 582.0, 582.0, 633.0, 630.0, 633.0, 639.0, 582.0, 633.0, 621.0, 633.0, 636.0, 636.0, 636.0, 633.0, 633.0, 581.0, 633.0, 630.0, 582.0, 627.0, 584.0, 636.0, 582.0, 630.0, 636.0, 639.0, 633.0, 627.0, 633.0, 627.0, 582.0, 633.0, 582.0, 639.0, 579.0, 579.0, 630.0, 630.0, 624.0, 636.0, 582.0, 630.0, 630.0, 633.0, 615.0, 627.0, 630.0, 587.0, 630.0, 630.0, 630.0, 636.0, 630.0, 636.0, 621.0, 587.0, 633.0, 582.0, 579.0, 633.0, 630.0, 552.0, 579.0, 630.0, 587.0, 624.0, 633.0, 636.0, 630.0, 633.0, 633.0, 639.0, 633.0, 636.0, 636.0, 633.0, 627.0, 582.0, 633.0, 639.0, 584.0, 582.0, 636.0, 618.0, 636.0, 633.0, 627.0, 582.0, 582.0, 582.0, 633.0, 636.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 314.0, 319.0, 294.0, 288.0, 316.0, 314.0, 316.0, 308.0, 305.0, 310.0, 291.0, 291.0, 287.0, 295.0, 311.0, 322.0, 316.0, 314.0, 317.0, 316.0, 319.0, 320.0, 293.0, 289.0, 316.0, 317.0, 310.0, 311.0, 314.0, 319.0, 309.0, 327.0, 319.0, 317.0, 314.0, 322.0, 311.0, 322.0, 319.0, 314.0, 293.0, 288.0, 317.0, 316.0, 309.0, 321.0, 296.0, 286.0, 314.0, 313.0, 298.0, 286.0, 317.0, 319.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 323.0, 317.0, 316.0, 316.0, 311.0, 316.0, 317.0, 311.0, 316.0, 284.0, 298.0, 314.0, 319.0, 294.0, 288.0, 317.0, 322.0, 297.0, 282.0, 285.0, 294.0, 311.0, 319.0, 319.0, 311.0, 315.0, 309.0, 319.0, 317.0, 291.0, 291.0, 316.0, 314.0, 319.0, 311.0, 316.0, 317.0, 307.0, 308.0, 319.0, 308.0, 316.0, 314.0, 296.0, 291.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 324.0, 312.0, 316.0, 314.0, 319.0, 317.0, 313.0, 308.0, 291.0, 296.0, 311.0, 322.0, 293.0, 289.0, 279.0, 300.0, 311.0, 322.0, 316.0, 314.0, 276.0, 276.0, 286.0, 293.0, 319.0, 311.0, 288.0, 299.0, 311.0, 313.0, 311.0, 322.0, 317.0, 319.0, 313.0, 317.0, 316.0, 317.0, 321.0, 312.0, 317.0, 322.0, 314.0, 319.0, 324.0, 312.0, 324.0, 312.0, 321.0, 312.0, 308.0, 319.0, 296.0, 286.0, 314.0, 319.0, 319.0, 320.0, 296.0, 288.0, 286.0, 296.0, 319.0, 317.0, 308.0, 310.0, 317.0, 319.0, 319.0, 314.0, 321.0, 306.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7712881392644176, "mean_processing_ms": 0.2228504081573419, "mean_inference_ms": 1.3735625457302265}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11328000, "num_steps_sampled": 6041600, "sample_time_ms": 21798.833, "load_time_ms": 39.052, "grad_time_ms": 10918.523, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015959719894453883, "policy_loss": -0.005134529899805784, "vf_loss": 72.90253448486328, "vf_explained_var": 0.7736382484436035, "kl": 0.00227816472761333, "entropy": 1.1195167303085327, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6041600, "episodes_total": 15104, "training_iteration": 472, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-50-57", "timestamp": 1660261857, "time_this_iter_s": 32.47214722633362, "time_total_s": 20265.40442967415, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20265.40442967415, "timesteps_since_restore": 6041600, "iterations_since_restore": 472, "perf": {"cpu_util_percent": 31.686956521739138, "ram_util_percent": 58.99347826086958}}
+{"episode_reward_max": 639.0, "episode_reward_min": 498.0, "episode_reward_mean": 616.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 242.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.235}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.07, "shaped_reward_min": 138, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.18, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.56, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.6, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.78, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.59, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.16, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.91, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.6, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.78, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.6, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.78, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 582.0, 630.0, 609.0, 630.0, 636.0, 582.0, 636.0, 633.0, 633.0, 558.0, 630.0, 633.0, 630.0, 630.0, 636.0, 630.0, 636.0, 582.0, 587.0, 627.0, 639.0, 636.0, 587.0, 630.0, 627.0, 630.0, 498.0, 579.0, 630.0, 630.0, 579.0, 633.0, 630.0, 552.0, 579.0, 630.0, 587.0, 624.0, 633.0, 636.0, 630.0, 633.0, 633.0, 639.0, 633.0, 636.0, 636.0, 633.0, 627.0, 582.0, 633.0, 639.0, 584.0, 582.0, 636.0, 618.0, 636.0, 633.0, 627.0, 582.0, 582.0, 582.0, 633.0, 636.0, 630.0, 582.0, 633.0, 633.0, 582.0, 630.0, 624.0, 615.0, 582.0, 582.0, 633.0, 630.0, 633.0, 639.0, 582.0, 633.0, 621.0, 633.0, 636.0, 636.0, 636.0, 633.0, 633.0, 581.0, 633.0, 630.0, 582.0, 627.0, 584.0, 636.0, 582.0, 630.0, 636.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 311.0, 291.0, 291.0, 294.0, 288.0, 316.0, 314.0, 304.0, 305.0, 316.0, 314.0, 319.0, 317.0, 286.0, 296.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 276.0, 282.0, 316.0, 314.0, 314.0, 319.0, 324.0, 306.0, 319.0, 311.0, 319.0, 317.0, 316.0, 314.0, 314.0, 322.0, 294.0, 288.0, 296.0, 291.0, 308.0, 319.0, 319.0, 320.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0, 322.0, 305.0, 316.0, 314.0, 242.0, 256.0, 288.0, 291.0, 313.0, 317.0, 316.0, 314.0, 279.0, 300.0, 311.0, 322.0, 316.0, 314.0, 276.0, 276.0, 286.0, 293.0, 319.0, 311.0, 288.0, 299.0, 311.0, 313.0, 311.0, 322.0, 317.0, 319.0, 313.0, 317.0, 316.0, 317.0, 321.0, 312.0, 317.0, 322.0, 314.0, 319.0, 324.0, 312.0, 324.0, 312.0, 321.0, 312.0, 308.0, 319.0, 296.0, 286.0, 314.0, 319.0, 319.0, 320.0, 296.0, 288.0, 286.0, 296.0, 319.0, 317.0, 308.0, 310.0, 317.0, 319.0, 319.0, 314.0, 321.0, 306.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 291.0, 291.0, 314.0, 319.0, 314.0, 319.0, 294.0, 288.0, 316.0, 314.0, 316.0, 308.0, 305.0, 310.0, 291.0, 291.0, 287.0, 295.0, 311.0, 322.0, 316.0, 314.0, 317.0, 316.0, 319.0, 320.0, 293.0, 289.0, 316.0, 317.0, 310.0, 311.0, 314.0, 319.0, 309.0, 327.0, 319.0, 317.0, 314.0, 322.0, 311.0, 322.0, 319.0, 314.0, 293.0, 288.0, 317.0, 316.0, 309.0, 321.0, 296.0, 286.0, 314.0, 313.0, 298.0, 286.0, 317.0, 319.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 323.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7706549385151754, "mean_processing_ms": 0.22272985988411015, "mean_inference_ms": 1.3730067691447254}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11352000, "num_steps_sampled": 6054400, "sample_time_ms": 21729.717, "load_time_ms": 39.098, "grad_time_ms": 10717.617, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0005160535220056772, "policy_loss": -0.007616788614541292, "vf_loss": 76.61554718017578, "vf_explained_var": 0.7677715420722961, "kl": 0.0017990797059610486, "entropy": 1.1216602325439453, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6054400, "episodes_total": 15136, "training_iteration": 473, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-51-29", "timestamp": 1660261889, "time_this_iter_s": 32.178860902786255, "time_total_s": 20297.583290576935, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20297.583290576935, "timesteps_since_restore": 6054400, "iterations_since_restore": 473, "perf": {"cpu_util_percent": 29.955555555555556, "ram_util_percent": 59.05111111111111}}
+{"episode_reward_max": 639.0, "episode_reward_min": 498.0, "episode_reward_mean": 616.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 242.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.225}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.05, "shaped_reward_min": 138, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.93, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.38, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.6, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.41, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.94, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.67, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.89, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.41, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.94, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.41, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.94, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 579.0, 636.0, 639.0, 633.0, 582.0, 579.0, 627.0, 636.0, 639.0, 636.0, 630.0, 573.0, 558.0, 573.0, 636.0, 579.0, 630.0, 633.0, 630.0, 633.0, 579.0, 636.0, 630.0, 630.0, 630.0, 587.0, 624.0, 633.0, 639.0, 582.0, 636.0, 633.0, 636.0, 630.0, 582.0, 633.0, 633.0, 582.0, 630.0, 624.0, 615.0, 582.0, 582.0, 633.0, 630.0, 633.0, 639.0, 582.0, 633.0, 621.0, 633.0, 636.0, 636.0, 636.0, 633.0, 633.0, 581.0, 633.0, 630.0, 582.0, 627.0, 584.0, 636.0, 582.0, 630.0, 636.0, 639.0, 630.0, 582.0, 582.0, 630.0, 609.0, 630.0, 636.0, 582.0, 636.0, 633.0, 633.0, 558.0, 630.0, 633.0, 630.0, 630.0, 636.0, 630.0, 636.0, 582.0, 587.0, 627.0, 639.0, 636.0, 587.0, 630.0, 627.0, 630.0, 498.0, 579.0, 630.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 309.0, 285.0, 294.0, 311.0, 325.0, 319.0, 320.0, 319.0, 314.0, 291.0, 291.0, 287.0, 292.0, 313.0, 314.0, 324.0, 312.0, 324.0, 315.0, 319.0, 317.0, 311.0, 319.0, 282.0, 291.0, 278.0, 280.0, 280.0, 293.0, 319.0, 317.0, 288.0, 291.0, 311.0, 319.0, 319.0, 314.0, 317.0, 313.0, 316.0, 317.0, 293.0, 286.0, 314.0, 322.0, 321.0, 309.0, 326.0, 304.0, 316.0, 314.0, 293.0, 294.0, 305.0, 319.0, 316.0, 317.0, 314.0, 325.0, 285.0, 297.0, 319.0, 317.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 291.0, 291.0, 314.0, 319.0, 314.0, 319.0, 294.0, 288.0, 316.0, 314.0, 316.0, 308.0, 305.0, 310.0, 291.0, 291.0, 287.0, 295.0, 311.0, 322.0, 316.0, 314.0, 317.0, 316.0, 319.0, 320.0, 293.0, 289.0, 316.0, 317.0, 310.0, 311.0, 314.0, 319.0, 309.0, 327.0, 319.0, 317.0, 314.0, 322.0, 311.0, 322.0, 319.0, 314.0, 293.0, 288.0, 317.0, 316.0, 309.0, 321.0, 296.0, 286.0, 314.0, 313.0, 298.0, 286.0, 317.0, 319.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 323.0, 319.0, 311.0, 291.0, 291.0, 294.0, 288.0, 316.0, 314.0, 304.0, 305.0, 316.0, 314.0, 319.0, 317.0, 286.0, 296.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 276.0, 282.0, 316.0, 314.0, 314.0, 319.0, 324.0, 306.0, 319.0, 311.0, 319.0, 317.0, 316.0, 314.0, 314.0, 322.0, 294.0, 288.0, 296.0, 291.0, 308.0, 319.0, 319.0, 320.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0, 322.0, 305.0, 316.0, 314.0, 242.0, 256.0, 288.0, 291.0, 313.0, 317.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7700230204234435, "mean_processing_ms": 0.22260903764962753, "mean_inference_ms": 1.3724136807505178}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11376000, "num_steps_sampled": 6067200, "sample_time_ms": 21448.766, "load_time_ms": 39.222, "grad_time_ms": 10552.154, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001221023383550346, "policy_loss": -0.005727085750550032, "vf_loss": 75.09713745117188, "vf_explained_var": 0.7705094218254089, "kl": 0.002081832615658641, "entropy": 1.1232246160507202, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6067200, "episodes_total": 15168, "training_iteration": 474, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-52-02", "timestamp": 1660261922, "time_this_iter_s": 32.74073004722595, "time_total_s": 20330.32402062416, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20330.32402062416, "timesteps_since_restore": 6067200, "iterations_since_restore": 474, "perf": {"cpu_util_percent": 29.70652173913044, "ram_util_percent": 59.01521739130436}}
+{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 616.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 308.16}, "custom_metrics": {"sparse_reward_mean": 214.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 188.32, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.74, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.52, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.14, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.67, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.22, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.12, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.83, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.86, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.95, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.7, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.22, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.12, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.22, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.12, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 587.0, 633.0, 633.0, 630.0, 639.0, 639.0, 624.0, 630.0, 582.0, 408.0, 636.0, 579.0, 636.0, 633.0, 633.0, 636.0, 639.0, 627.0, 633.0, 636.0, 630.0, 639.0, 630.0, 587.0, 630.0, 636.0, 587.0, 639.0, 630.0, 633.0, 633.0, 582.0, 630.0, 636.0, 639.0, 630.0, 582.0, 582.0, 630.0, 609.0, 630.0, 636.0, 582.0, 636.0, 633.0, 633.0, 558.0, 630.0, 633.0, 630.0, 630.0, 636.0, 630.0, 636.0, 582.0, 587.0, 627.0, 639.0, 636.0, 587.0, 630.0, 627.0, 630.0, 498.0, 579.0, 630.0, 630.0, 630.0, 579.0, 636.0, 639.0, 633.0, 582.0, 579.0, 627.0, 636.0, 639.0, 636.0, 630.0, 573.0, 558.0, 573.0, 636.0, 579.0, 630.0, 633.0, 630.0, 633.0, 579.0, 636.0, 630.0, 630.0, 630.0, 587.0, 624.0, 633.0, 639.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 293.0, 294.0, 313.0, 320.0, 319.0, 314.0, 316.0, 314.0, 322.0, 317.0, 322.0, 317.0, 313.0, 311.0, 311.0, 319.0, 291.0, 291.0, 208.0, 200.0, 314.0, 322.0, 284.0, 295.0, 319.0, 317.0, 313.0, 320.0, 311.0, 322.0, 319.0, 317.0, 322.0, 317.0, 313.0, 314.0, 322.0, 311.0, 319.0, 317.0, 321.0, 309.0, 319.0, 320.0, 318.0, 312.0, 293.0, 294.0, 311.0, 319.0, 319.0, 317.0, 288.0, 299.0, 322.0, 317.0, 316.0, 314.0, 317.0, 316.0, 319.0, 314.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 323.0, 319.0, 311.0, 291.0, 291.0, 294.0, 288.0, 316.0, 314.0, 304.0, 305.0, 316.0, 314.0, 319.0, 317.0, 286.0, 296.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 276.0, 282.0, 316.0, 314.0, 314.0, 319.0, 324.0, 306.0, 319.0, 311.0, 319.0, 317.0, 316.0, 314.0, 314.0, 322.0, 294.0, 288.0, 296.0, 291.0, 308.0, 319.0, 319.0, 320.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0, 322.0, 305.0, 316.0, 314.0, 242.0, 256.0, 288.0, 291.0, 313.0, 317.0, 316.0, 314.0, 321.0, 309.0, 285.0, 294.0, 311.0, 325.0, 319.0, 320.0, 319.0, 314.0, 291.0, 291.0, 287.0, 292.0, 313.0, 314.0, 324.0, 312.0, 324.0, 315.0, 319.0, 317.0, 311.0, 319.0, 282.0, 291.0, 278.0, 280.0, 280.0, 293.0, 319.0, 317.0, 288.0, 291.0, 311.0, 319.0, 319.0, 314.0, 317.0, 313.0, 316.0, 317.0, 293.0, 286.0, 314.0, 322.0, 321.0, 309.0, 326.0, 304.0, 316.0, 314.0, 293.0, 294.0, 305.0, 319.0, 316.0, 317.0, 314.0, 325.0, 285.0, 297.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7693923762268673, "mean_processing_ms": 0.22248851172311768, "mean_inference_ms": 1.3718337576179396}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11400000, "num_steps_sampled": 6080000, "sample_time_ms": 21471.253, "load_time_ms": 39.181, "grad_time_ms": 10674.814, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001056800247170031, "policy_loss": -0.005863674450665712, "vf_loss": 74.7898178100586, "vf_explained_var": 0.7796471118927002, "kl": 0.002407137770205736, "entropy": 1.1170334815979004, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6080000, "episodes_total": 15200, "training_iteration": 475, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-52-36", "timestamp": 1660261956, "time_this_iter_s": 34.37432289123535, "time_total_s": 20364.698343515396, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20364.698343515396, "timesteps_since_restore": 6080000, "iterations_since_restore": 475, "perf": {"cpu_util_percent": 34.573469387755104, "ram_util_percent": 58.995918367346945}}
+{"episode_reward_max": 639.0, "episode_reward_min": 291.0, "episode_reward_mean": 611.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 305.56}, "custom_metrics": {"sparse_reward_mean": 212.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 187.12, "shaped_reward_min": 91, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.5, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.97, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.55, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.07, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.06, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.82, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.66, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.98, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.69, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.07, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.06, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.07, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.06, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 582.0, 636.0, 633.0, 636.0, 291.0, 639.0, 576.0, 587.0, 630.0, 587.0, 633.0, 621.0, 582.0, 630.0, 584.0, 465.0, 636.0, 582.0, 630.0, 636.0, 636.0, 630.0, 636.0, 624.0, 579.0, 624.0, 633.0, 636.0, 582.0, 636.0, 498.0, 579.0, 630.0, 630.0, 630.0, 579.0, 636.0, 639.0, 633.0, 582.0, 579.0, 627.0, 636.0, 639.0, 636.0, 630.0, 573.0, 558.0, 573.0, 636.0, 579.0, 630.0, 633.0, 630.0, 633.0, 579.0, 636.0, 630.0, 630.0, 630.0, 587.0, 624.0, 633.0, 639.0, 582.0, 636.0, 633.0, 587.0, 633.0, 633.0, 630.0, 639.0, 639.0, 624.0, 630.0, 582.0, 408.0, 636.0, 579.0, 636.0, 633.0, 633.0, 636.0, 639.0, 627.0, 633.0, 636.0, 630.0, 639.0, 630.0, 587.0, 630.0, 636.0, 587.0, 639.0, 630.0, 633.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 324.0, 312.0, 294.0, 288.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 145.0, 146.0, 319.0, 320.0, 292.0, 284.0, 293.0, 294.0, 313.0, 317.0, 296.0, 291.0, 318.0, 315.0, 313.0, 308.0, 296.0, 286.0, 314.0, 316.0, 298.0, 286.0, 228.0, 237.0, 319.0, 317.0, 296.0, 286.0, 313.0, 317.0, 327.0, 309.0, 322.0, 314.0, 316.0, 314.0, 319.0, 317.0, 311.0, 313.0, 283.0, 296.0, 310.0, 314.0, 316.0, 317.0, 319.0, 317.0, 289.0, 293.0, 319.0, 317.0, 242.0, 256.0, 288.0, 291.0, 313.0, 317.0, 316.0, 314.0, 321.0, 309.0, 285.0, 294.0, 311.0, 325.0, 319.0, 320.0, 319.0, 314.0, 291.0, 291.0, 287.0, 292.0, 313.0, 314.0, 324.0, 312.0, 324.0, 315.0, 319.0, 317.0, 311.0, 319.0, 282.0, 291.0, 278.0, 280.0, 280.0, 293.0, 319.0, 317.0, 288.0, 291.0, 311.0, 319.0, 319.0, 314.0, 317.0, 313.0, 316.0, 317.0, 293.0, 286.0, 314.0, 322.0, 321.0, 309.0, 326.0, 304.0, 316.0, 314.0, 293.0, 294.0, 305.0, 319.0, 316.0, 317.0, 314.0, 325.0, 285.0, 297.0, 319.0, 317.0, 316.0, 317.0, 293.0, 294.0, 313.0, 320.0, 319.0, 314.0, 316.0, 314.0, 322.0, 317.0, 322.0, 317.0, 313.0, 311.0, 311.0, 319.0, 291.0, 291.0, 208.0, 200.0, 314.0, 322.0, 284.0, 295.0, 319.0, 317.0, 313.0, 320.0, 311.0, 322.0, 319.0, 317.0, 322.0, 317.0, 313.0, 314.0, 322.0, 311.0, 319.0, 317.0, 321.0, 309.0, 319.0, 320.0, 318.0, 312.0, 293.0, 294.0, 311.0, 319.0, 319.0, 317.0, 288.0, 299.0, 322.0, 317.0, 316.0, 314.0, 317.0, 316.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7687649833136294, "mean_processing_ms": 0.2223678201524863, "mean_inference_ms": 1.3712901278888552}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11424000, "num_steps_sampled": 6092800, "sample_time_ms": 21822.996, "load_time_ms": 39.16, "grad_time_ms": 10774.782, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006356429657898843, "policy_loss": -0.006720335688441992, "vf_loss": 79.15064239501953, "vf_explained_var": 0.7751259207725525, "kl": 0.0025446319486945868, "entropy": 1.1181851625442505, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6092800, "episodes_total": 15232, "training_iteration": 476, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-53-09", "timestamp": 1660261989, "time_this_iter_s": 32.92729115486145, "time_total_s": 20397.625634670258, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20397.625634670258, "timesteps_since_restore": 6092800, "iterations_since_restore": 476, "perf": {"cpu_util_percent": 31.27608695652173, "ram_util_percent": 59.04347826086958}}
+{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 607.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.615}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.83, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.48, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.44, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.99, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.43, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.02, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.98, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.76, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.98, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.94, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.02, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.98, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.02, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.98, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 636.0, 578.0, 639.0, 582.0, 576.0, 587.0, 636.0, 582.0, 533.0, 633.0, 636.0, 630.0, 630.0, 579.0, 587.0, 630.0, 639.0, 639.0, 579.0, 636.0, 180.0, 587.0, 633.0, 630.0, 627.0, 587.0, 633.0, 633.0, 582.0, 630.0, 633.0, 639.0, 582.0, 636.0, 633.0, 587.0, 633.0, 633.0, 630.0, 639.0, 639.0, 624.0, 630.0, 582.0, 408.0, 636.0, 579.0, 636.0, 633.0, 633.0, 636.0, 639.0, 627.0, 633.0, 636.0, 630.0, 639.0, 630.0, 587.0, 630.0, 636.0, 587.0, 639.0, 630.0, 633.0, 633.0, 630.0, 636.0, 582.0, 636.0, 633.0, 636.0, 291.0, 639.0, 576.0, 587.0, 630.0, 587.0, 633.0, 621.0, 582.0, 630.0, 584.0, 465.0, 636.0, 582.0, 630.0, 636.0, 636.0, 630.0, 636.0, 624.0, 579.0, 624.0, 633.0, 636.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 319.0, 317.0, 316.0, 320.0, 279.0, 299.0, 319.0, 320.0, 293.0, 289.0, 291.0, 285.0, 293.0, 294.0, 314.0, 322.0, 288.0, 294.0, 254.0, 279.0, 319.0, 314.0, 319.0, 317.0, 317.0, 313.0, 311.0, 319.0, 282.0, 297.0, 295.0, 292.0, 316.0, 314.0, 319.0, 320.0, 316.0, 323.0, 288.0, 291.0, 324.0, 312.0, 91.0, 89.0, 296.0, 291.0, 319.0, 314.0, 311.0, 319.0, 316.0, 311.0, 306.0, 281.0, 321.0, 312.0, 316.0, 317.0, 288.0, 294.0, 311.0, 319.0, 316.0, 317.0, 314.0, 325.0, 285.0, 297.0, 319.0, 317.0, 316.0, 317.0, 293.0, 294.0, 313.0, 320.0, 319.0, 314.0, 316.0, 314.0, 322.0, 317.0, 322.0, 317.0, 313.0, 311.0, 311.0, 319.0, 291.0, 291.0, 208.0, 200.0, 314.0, 322.0, 284.0, 295.0, 319.0, 317.0, 313.0, 320.0, 311.0, 322.0, 319.0, 317.0, 322.0, 317.0, 313.0, 314.0, 322.0, 311.0, 319.0, 317.0, 321.0, 309.0, 319.0, 320.0, 318.0, 312.0, 293.0, 294.0, 311.0, 319.0, 319.0, 317.0, 288.0, 299.0, 322.0, 317.0, 316.0, 314.0, 317.0, 316.0, 319.0, 314.0, 311.0, 319.0, 324.0, 312.0, 294.0, 288.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 145.0, 146.0, 319.0, 320.0, 292.0, 284.0, 293.0, 294.0, 313.0, 317.0, 296.0, 291.0, 318.0, 315.0, 313.0, 308.0, 296.0, 286.0, 314.0, 316.0, 298.0, 286.0, 228.0, 237.0, 319.0, 317.0, 296.0, 286.0, 313.0, 317.0, 327.0, 309.0, 322.0, 314.0, 316.0, 314.0, 319.0, 317.0, 311.0, 313.0, 283.0, 296.0, 310.0, 314.0, 316.0, 317.0, 319.0, 317.0, 289.0, 293.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7681470637203944, "mean_processing_ms": 0.22224947461581387, "mean_inference_ms": 1.3708820792528533}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11448000, "num_steps_sampled": 6105600, "sample_time_ms": 22237.355, "load_time_ms": 38.878, "grad_time_ms": 10934.933, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003096085973083973, "policy_loss": -0.004263754468411207, "vf_loss": 79.16039276123047, "vf_explained_var": 0.7912160754203796, "kl": 0.001874853391200304, "entropy": 1.1124080419540405, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6105600, "episodes_total": 15264, "training_iteration": 477, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-53-45", "timestamp": 1660262025, "time_this_iter_s": 35.63289189338684, "time_total_s": 20433.258526563644, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20433.258526563644, "timesteps_since_restore": 6105600, "iterations_since_restore": 477, "perf": {"cpu_util_percent": 34.068627450980394, "ram_util_percent": 59.11960784313726}}
+{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 606.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.34}, "custom_metrics": {"sparse_reward_mean": 210.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.68, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.64, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.09, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.87, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.63, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.87, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.87, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 627.0, 633.0, 561.0, 636.0, 630.0, 525.0, 633.0, 636.0, 636.0, 582.0, 636.0, 636.0, 630.0, 633.0, 636.0, 639.0, 630.0, 636.0, 587.0, 582.0, 636.0, 544.0, 633.0, 636.0, 630.0, 639.0, 584.0, 636.0, 579.0, 576.0, 633.0, 639.0, 630.0, 633.0, 633.0, 630.0, 636.0, 582.0, 636.0, 633.0, 636.0, 291.0, 639.0, 576.0, 587.0, 630.0, 587.0, 633.0, 621.0, 582.0, 630.0, 584.0, 465.0, 636.0, 582.0, 630.0, 636.0, 636.0, 630.0, 636.0, 624.0, 579.0, 624.0, 633.0, 636.0, 582.0, 636.0, 630.0, 636.0, 636.0, 578.0, 639.0, 582.0, 576.0, 587.0, 636.0, 582.0, 533.0, 633.0, 636.0, 630.0, 630.0, 579.0, 587.0, 630.0, 639.0, 639.0, 579.0, 636.0, 180.0, 587.0, 633.0, 630.0, 627.0, 587.0, 633.0, 633.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 319.0, 308.0, 316.0, 317.0, 279.0, 282.0, 316.0, 320.0, 316.0, 314.0, 259.0, 266.0, 314.0, 319.0, 319.0, 317.0, 322.0, 314.0, 291.0, 291.0, 319.0, 317.0, 319.0, 317.0, 319.0, 311.0, 313.0, 320.0, 316.0, 320.0, 317.0, 322.0, 305.0, 325.0, 313.0, 323.0, 294.0, 293.0, 291.0, 291.0, 319.0, 317.0, 271.0, 273.0, 311.0, 322.0, 314.0, 322.0, 316.0, 314.0, 320.0, 319.0, 285.0, 299.0, 321.0, 315.0, 294.0, 285.0, 285.0, 291.0, 319.0, 314.0, 322.0, 317.0, 316.0, 314.0, 317.0, 316.0, 319.0, 314.0, 311.0, 319.0, 324.0, 312.0, 294.0, 288.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 145.0, 146.0, 319.0, 320.0, 292.0, 284.0, 293.0, 294.0, 313.0, 317.0, 296.0, 291.0, 318.0, 315.0, 313.0, 308.0, 296.0, 286.0, 314.0, 316.0, 298.0, 286.0, 228.0, 237.0, 319.0, 317.0, 296.0, 286.0, 313.0, 317.0, 327.0, 309.0, 322.0, 314.0, 316.0, 314.0, 319.0, 317.0, 311.0, 313.0, 283.0, 296.0, 310.0, 314.0, 316.0, 317.0, 319.0, 317.0, 289.0, 293.0, 319.0, 317.0, 313.0, 317.0, 319.0, 317.0, 316.0, 320.0, 279.0, 299.0, 319.0, 320.0, 293.0, 289.0, 291.0, 285.0, 293.0, 294.0, 314.0, 322.0, 288.0, 294.0, 254.0, 279.0, 319.0, 314.0, 319.0, 317.0, 317.0, 313.0, 311.0, 319.0, 282.0, 297.0, 295.0, 292.0, 316.0, 314.0, 319.0, 320.0, 316.0, 323.0, 288.0, 291.0, 324.0, 312.0, 91.0, 89.0, 296.0, 291.0, 319.0, 314.0, 311.0, 319.0, 316.0, 311.0, 306.0, 281.0, 321.0, 312.0, 316.0, 317.0, 288.0, 294.0, 311.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7675359476043465, "mean_processing_ms": 0.22213291684157827, "mean_inference_ms": 1.3705566142110346}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11472000, "num_steps_sampled": 6118400, "sample_time_ms": 22722.857, "load_time_ms": 39.067, "grad_time_ms": 10783.352, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003133426944259554, "policy_loss": -0.007279651705175638, "vf_loss": 75.2169418334961, "vf_explained_var": 0.7742903232574463, "kl": 0.0020874079782515764, "entropy": 1.110769271850586, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6118400, "episodes_total": 15296, "training_iteration": 478, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-54-19", "timestamp": 1660262059, "time_this_iter_s": 34.45740509033203, "time_total_s": 20467.715931653976, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20467.715931653976, "timesteps_since_restore": 6118400, "iterations_since_restore": 478, "perf": {"cpu_util_percent": 28.777083333333334, "ram_util_percent": 59.04374999999999}}
+{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 608.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.12}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.64, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.8, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.38, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.19, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.84, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.51, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.96, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.78, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.84, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.84, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [606.0, 582.0, 636.0, 563.0, 615.0, 627.0, 582.0, 633.0, 587.0, 627.0, 630.0, 579.0, 630.0, 633.0, 462.0, 630.0, 582.0, 633.0, 630.0, 633.0, 636.0, 636.0, 584.0, 633.0, 582.0, 636.0, 633.0, 630.0, 582.0, 630.0, 573.0, 627.0, 633.0, 636.0, 582.0, 636.0, 630.0, 636.0, 636.0, 578.0, 639.0, 582.0, 576.0, 587.0, 636.0, 582.0, 533.0, 633.0, 636.0, 630.0, 630.0, 579.0, 587.0, 630.0, 639.0, 639.0, 579.0, 636.0, 180.0, 587.0, 633.0, 630.0, 627.0, 587.0, 633.0, 633.0, 582.0, 630.0, 630.0, 627.0, 633.0, 561.0, 636.0, 630.0, 525.0, 633.0, 636.0, 636.0, 582.0, 636.0, 636.0, 630.0, 633.0, 636.0, 639.0, 630.0, 636.0, 587.0, 582.0, 636.0, 544.0, 633.0, 636.0, 630.0, 639.0, 584.0, 636.0, 579.0, 576.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [302.0, 304.0, 289.0, 293.0, 319.0, 317.0, 284.0, 279.0, 305.0, 310.0, 319.0, 308.0, 294.0, 288.0, 319.0, 314.0, 293.0, 294.0, 316.0, 311.0, 321.0, 309.0, 283.0, 296.0, 311.0, 319.0, 314.0, 319.0, 239.0, 223.0, 316.0, 314.0, 288.0, 294.0, 319.0, 314.0, 319.0, 311.0, 311.0, 322.0, 316.0, 320.0, 314.0, 322.0, 307.0, 277.0, 314.0, 319.0, 288.0, 294.0, 319.0, 317.0, 316.0, 317.0, 313.0, 317.0, 288.0, 294.0, 310.0, 320.0, 292.0, 281.0, 321.0, 306.0, 316.0, 317.0, 319.0, 317.0, 289.0, 293.0, 319.0, 317.0, 313.0, 317.0, 319.0, 317.0, 316.0, 320.0, 279.0, 299.0, 319.0, 320.0, 293.0, 289.0, 291.0, 285.0, 293.0, 294.0, 314.0, 322.0, 288.0, 294.0, 254.0, 279.0, 319.0, 314.0, 319.0, 317.0, 317.0, 313.0, 311.0, 319.0, 282.0, 297.0, 295.0, 292.0, 316.0, 314.0, 319.0, 320.0, 316.0, 323.0, 288.0, 291.0, 324.0, 312.0, 91.0, 89.0, 296.0, 291.0, 319.0, 314.0, 311.0, 319.0, 316.0, 311.0, 306.0, 281.0, 321.0, 312.0, 316.0, 317.0, 288.0, 294.0, 311.0, 319.0, 316.0, 314.0, 319.0, 308.0, 316.0, 317.0, 279.0, 282.0, 316.0, 320.0, 316.0, 314.0, 259.0, 266.0, 314.0, 319.0, 319.0, 317.0, 322.0, 314.0, 291.0, 291.0, 319.0, 317.0, 319.0, 317.0, 319.0, 311.0, 313.0, 320.0, 316.0, 320.0, 317.0, 322.0, 305.0, 325.0, 313.0, 323.0, 294.0, 293.0, 291.0, 291.0, 319.0, 317.0, 271.0, 273.0, 311.0, 322.0, 314.0, 322.0, 316.0, 314.0, 320.0, 319.0, 285.0, 299.0, 321.0, 315.0, 294.0, 285.0, 285.0, 291.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7669199085026867, "mean_processing_ms": 0.22201572097443972, "mean_inference_ms": 1.3701586168616826}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11496000, "num_steps_sampled": 6131200, "sample_time_ms": 22699.68, "load_time_ms": 39.016, "grad_time_ms": 10734.597, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008129358175210655, "policy_loss": -0.006242450326681137, "vf_loss": 76.11907196044922, "vf_explained_var": 0.7632293701171875, "kl": 0.0021639217156916857, "entropy": 1.1130343675613403, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6131200, "episodes_total": 15328, "training_iteration": 479, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-54-53", "timestamp": 1660262093, "time_this_iter_s": 33.185157775878906, "time_total_s": 20500.901089429855, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20500.901089429855, "timesteps_since_restore": 6131200, "iterations_since_restore": 479, "perf": {"cpu_util_percent": 30.472340425531915, "ram_util_percent": 58.97021276595746}}
+{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 613.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 306.835}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.07, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.9, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.41, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.34, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.56, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.41, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.9, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.05, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.74, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.41, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.9, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.41, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.9, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 630.0, 579.0, 639.0, 636.0, 639.0, 639.0, 587.0, 636.0, 587.0, 627.0, 633.0, 576.0, 582.0, 633.0, 636.0, 630.0, 630.0, 633.0, 636.0, 525.0, 630.0, 582.0, 587.0, 639.0, 627.0, 582.0, 582.0, 633.0, 636.0, 630.0, 633.0, 633.0, 582.0, 630.0, 630.0, 627.0, 633.0, 561.0, 636.0, 630.0, 525.0, 633.0, 636.0, 636.0, 582.0, 636.0, 636.0, 630.0, 633.0, 636.0, 639.0, 630.0, 636.0, 587.0, 582.0, 636.0, 544.0, 633.0, 636.0, 630.0, 639.0, 584.0, 636.0, 579.0, 576.0, 633.0, 606.0, 582.0, 636.0, 563.0, 615.0, 627.0, 582.0, 633.0, 587.0, 627.0, 630.0, 579.0, 630.0, 633.0, 462.0, 630.0, 582.0, 633.0, 630.0, 633.0, 636.0, 636.0, 584.0, 633.0, 582.0, 636.0, 633.0, 630.0, 582.0, 630.0, 573.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 322.0, 319.0, 314.0, 308.0, 322.0, 296.0, 283.0, 319.0, 320.0, 316.0, 320.0, 317.0, 322.0, 319.0, 320.0, 288.0, 299.0, 316.0, 320.0, 301.0, 286.0, 313.0, 314.0, 317.0, 316.0, 291.0, 285.0, 286.0, 296.0, 319.0, 314.0, 319.0, 317.0, 316.0, 314.0, 313.0, 317.0, 316.0, 317.0, 314.0, 322.0, 257.0, 268.0, 316.0, 314.0, 296.0, 286.0, 296.0, 291.0, 319.0, 320.0, 305.0, 322.0, 293.0, 289.0, 294.0, 288.0, 308.0, 325.0, 319.0, 317.0, 313.0, 317.0, 321.0, 312.0, 316.0, 317.0, 288.0, 294.0, 311.0, 319.0, 316.0, 314.0, 319.0, 308.0, 316.0, 317.0, 279.0, 282.0, 316.0, 320.0, 316.0, 314.0, 259.0, 266.0, 314.0, 319.0, 319.0, 317.0, 322.0, 314.0, 291.0, 291.0, 319.0, 317.0, 319.0, 317.0, 319.0, 311.0, 313.0, 320.0, 316.0, 320.0, 317.0, 322.0, 305.0, 325.0, 313.0, 323.0, 294.0, 293.0, 291.0, 291.0, 319.0, 317.0, 271.0, 273.0, 311.0, 322.0, 314.0, 322.0, 316.0, 314.0, 320.0, 319.0, 285.0, 299.0, 321.0, 315.0, 294.0, 285.0, 285.0, 291.0, 319.0, 314.0, 302.0, 304.0, 289.0, 293.0, 319.0, 317.0, 284.0, 279.0, 305.0, 310.0, 319.0, 308.0, 294.0, 288.0, 319.0, 314.0, 293.0, 294.0, 316.0, 311.0, 321.0, 309.0, 283.0, 296.0, 311.0, 319.0, 314.0, 319.0, 239.0, 223.0, 316.0, 314.0, 288.0, 294.0, 319.0, 314.0, 319.0, 311.0, 311.0, 322.0, 316.0, 320.0, 314.0, 322.0, 307.0, 277.0, 314.0, 319.0, 288.0, 294.0, 319.0, 317.0, 316.0, 317.0, 313.0, 317.0, 288.0, 294.0, 310.0, 320.0, 292.0, 281.0, 321.0, 306.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7662995807176626, "mean_processing_ms": 0.22189831707550936, "mean_inference_ms": 1.3696437835161013}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11520000, "num_steps_sampled": 6144000, "sample_time_ms": 22732.007, "load_time_ms": 38.46, "grad_time_ms": 10636.502, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033598102163523436, "policy_loss": -0.003906731028109789, "vf_loss": 78.25418090820312, "vf_explained_var": 0.768868625164032, "kl": 0.0016973327146843076, "entropy": 1.117727279663086, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6144000, "episodes_total": 15360, "training_iteration": 480, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-55-24", "timestamp": 1660262124, "time_this_iter_s": 31.27005410194397, "time_total_s": 20532.1711435318, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20532.1711435318, "timesteps_since_restore": 6144000, "iterations_since_restore": 480, "perf": {"cpu_util_percent": 31.795555555555556, "ram_util_percent": 59.01333333333335}}
+{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 611.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 305.56}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.72, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.73, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.38, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.18, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.57, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.29, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.29, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.29, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [578.0, 582.0, 630.0, 587.0, 570.0, 587.0, 630.0, 612.0, 624.0, 633.0, 570.0, 587.0, 582.0, 630.0, 587.0, 636.0, 636.0, 582.0, 552.0, 636.0, 584.0, 633.0, 590.0, 624.0, 630.0, 630.0, 636.0, 609.0, 630.0, 633.0, 633.0, 636.0, 636.0, 579.0, 576.0, 633.0, 606.0, 582.0, 636.0, 563.0, 615.0, 627.0, 582.0, 633.0, 587.0, 627.0, 630.0, 579.0, 630.0, 633.0, 462.0, 630.0, 582.0, 633.0, 630.0, 633.0, 636.0, 636.0, 584.0, 633.0, 582.0, 636.0, 633.0, 630.0, 582.0, 630.0, 573.0, 627.0, 633.0, 633.0, 630.0, 579.0, 639.0, 636.0, 639.0, 639.0, 587.0, 636.0, 587.0, 627.0, 633.0, 576.0, 582.0, 633.0, 636.0, 630.0, 630.0, 633.0, 636.0, 525.0, 630.0, 582.0, 587.0, 639.0, 627.0, 582.0, 582.0, 633.0, 636.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 283.0, 288.0, 294.0, 313.0, 317.0, 290.0, 297.0, 284.0, 286.0, 299.0, 288.0, 315.0, 315.0, 304.0, 308.0, 317.0, 307.0, 311.0, 322.0, 295.0, 275.0, 296.0, 291.0, 293.0, 289.0, 316.0, 314.0, 288.0, 299.0, 319.0, 317.0, 314.0, 322.0, 291.0, 291.0, 279.0, 273.0, 324.0, 312.0, 291.0, 293.0, 319.0, 314.0, 296.0, 294.0, 311.0, 313.0, 311.0, 319.0, 313.0, 317.0, 319.0, 317.0, 301.0, 308.0, 316.0, 314.0, 311.0, 322.0, 316.0, 317.0, 319.0, 317.0, 321.0, 315.0, 294.0, 285.0, 285.0, 291.0, 319.0, 314.0, 302.0, 304.0, 289.0, 293.0, 319.0, 317.0, 284.0, 279.0, 305.0, 310.0, 319.0, 308.0, 294.0, 288.0, 319.0, 314.0, 293.0, 294.0, 316.0, 311.0, 321.0, 309.0, 283.0, 296.0, 311.0, 319.0, 314.0, 319.0, 239.0, 223.0, 316.0, 314.0, 288.0, 294.0, 319.0, 314.0, 319.0, 311.0, 311.0, 322.0, 316.0, 320.0, 314.0, 322.0, 307.0, 277.0, 314.0, 319.0, 288.0, 294.0, 319.0, 317.0, 316.0, 317.0, 313.0, 317.0, 288.0, 294.0, 310.0, 320.0, 292.0, 281.0, 321.0, 306.0, 311.0, 322.0, 319.0, 314.0, 308.0, 322.0, 296.0, 283.0, 319.0, 320.0, 316.0, 320.0, 317.0, 322.0, 319.0, 320.0, 288.0, 299.0, 316.0, 320.0, 301.0, 286.0, 313.0, 314.0, 317.0, 316.0, 291.0, 285.0, 286.0, 296.0, 319.0, 314.0, 319.0, 317.0, 316.0, 314.0, 313.0, 317.0, 316.0, 317.0, 314.0, 322.0, 257.0, 268.0, 316.0, 314.0, 296.0, 286.0, 296.0, 291.0, 319.0, 320.0, 305.0, 322.0, 293.0, 289.0, 294.0, 288.0, 308.0, 325.0, 319.0, 317.0, 313.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7656785099973358, "mean_processing_ms": 0.2217805887765104, "mean_inference_ms": 1.3690072686883668}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11544000, "num_steps_sampled": 6156800, "sample_time_ms": 22631.205, "load_time_ms": 38.401, "grad_time_ms": 10438.368, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -1.2905181392852683e-05, "policy_loss": -0.007116043474525213, "vf_loss": 76.6054458618164, "vf_explained_var": 0.7700133323669434, "kl": 0.0019200993701815605, "entropy": 1.1147748231887817, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6156800, "episodes_total": 15392, "training_iteration": 481, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-55-56", "timestamp": 1660262156, "time_this_iter_s": 32.10103392601013, "time_total_s": 20564.27217745781, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20564.27217745781, "timesteps_since_restore": 6156800, "iterations_since_restore": 481, "perf": {"cpu_util_percent": 31.702222222222222, "ram_util_percent": 59.035555555555575}}
+{"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 613.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 306.58}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.56, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.59, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.53, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.04, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.63, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.08, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.7, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.08, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.08, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 587.0, 630.0, 621.0, 639.0, 636.0, 630.0, 630.0, 639.0, 561.0, 633.0, 630.0, 627.0, 582.0, 582.0, 636.0, 587.0, 636.0, 630.0, 636.0, 513.0, 576.0, 633.0, 633.0, 630.0, 636.0, 582.0, 587.0, 633.0, 624.0, 633.0, 582.0, 630.0, 573.0, 627.0, 633.0, 633.0, 630.0, 579.0, 639.0, 636.0, 639.0, 639.0, 587.0, 636.0, 587.0, 627.0, 633.0, 576.0, 582.0, 633.0, 636.0, 630.0, 630.0, 633.0, 636.0, 525.0, 630.0, 582.0, 587.0, 639.0, 627.0, 582.0, 582.0, 633.0, 636.0, 630.0, 578.0, 582.0, 630.0, 587.0, 570.0, 587.0, 630.0, 612.0, 624.0, 633.0, 570.0, 587.0, 582.0, 630.0, 587.0, 636.0, 636.0, 582.0, 552.0, 636.0, 584.0, 633.0, 590.0, 624.0, 630.0, 630.0, 636.0, 609.0, 630.0, 633.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 323.0, 313.0, 317.0, 293.0, 294.0, 319.0, 311.0, 308.0, 313.0, 319.0, 320.0, 314.0, 322.0, 316.0, 314.0, 311.0, 319.0, 317.0, 322.0, 273.0, 288.0, 316.0, 317.0, 316.0, 314.0, 313.0, 314.0, 291.0, 291.0, 291.0, 291.0, 314.0, 322.0, 298.0, 289.0, 314.0, 322.0, 316.0, 314.0, 324.0, 312.0, 249.0, 264.0, 280.0, 296.0, 316.0, 317.0, 316.0, 317.0, 311.0, 319.0, 319.0, 317.0, 288.0, 294.0, 296.0, 291.0, 316.0, 317.0, 308.0, 316.0, 319.0, 314.0, 288.0, 294.0, 310.0, 320.0, 292.0, 281.0, 321.0, 306.0, 311.0, 322.0, 319.0, 314.0, 308.0, 322.0, 296.0, 283.0, 319.0, 320.0, 316.0, 320.0, 317.0, 322.0, 319.0, 320.0, 288.0, 299.0, 316.0, 320.0, 301.0, 286.0, 313.0, 314.0, 317.0, 316.0, 291.0, 285.0, 286.0, 296.0, 319.0, 314.0, 319.0, 317.0, 316.0, 314.0, 313.0, 317.0, 316.0, 317.0, 314.0, 322.0, 257.0, 268.0, 316.0, 314.0, 296.0, 286.0, 296.0, 291.0, 319.0, 320.0, 305.0, 322.0, 293.0, 289.0, 294.0, 288.0, 308.0, 325.0, 319.0, 317.0, 313.0, 317.0, 295.0, 283.0, 288.0, 294.0, 313.0, 317.0, 290.0, 297.0, 284.0, 286.0, 299.0, 288.0, 315.0, 315.0, 304.0, 308.0, 317.0, 307.0, 311.0, 322.0, 295.0, 275.0, 296.0, 291.0, 293.0, 289.0, 316.0, 314.0, 288.0, 299.0, 319.0, 317.0, 314.0, 322.0, 291.0, 291.0, 279.0, 273.0, 324.0, 312.0, 291.0, 293.0, 319.0, 314.0, 296.0, 294.0, 311.0, 313.0, 311.0, 319.0, 313.0, 317.0, 319.0, 317.0, 301.0, 308.0, 316.0, 314.0, 311.0, 322.0, 316.0, 317.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7650611031190503, "mean_processing_ms": 0.2216638185152556, "mean_inference_ms": 1.3683447229242562}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11568000, "num_steps_sampled": 6169600, "sample_time_ms": 22476.544, "load_time_ms": 38.386, "grad_time_ms": 10500.152, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 5.255569703876972e-05, "policy_loss": -0.006667418871074915, "vf_loss": 72.75797271728516, "vf_explained_var": 0.775852620601654, "kl": 0.0019747635815292597, "entropy": 1.111660122871399, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6169600, "episodes_total": 15424, "training_iteration": 482, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-56-28", "timestamp": 1660262188, "time_this_iter_s": 31.53903889656067, "time_total_s": 20595.81121635437, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20595.81121635437, "timesteps_since_restore": 6169600, "iterations_since_restore": 482, "perf": {"cpu_util_percent": 34.43555555555556, "ram_util_percent": 58.98222222222224}}
+{"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 614.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.265}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.73, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.81, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.41, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.57, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.31, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.98, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.68, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.41, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.31, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.98, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.31, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.98, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 582.0, 627.0, 633.0, 636.0, 630.0, 582.0, 627.0, 630.0, 579.0, 630.0, 630.0, 633.0, 630.0, 630.0, 633.0, 530.0, 636.0, 633.0, 633.0, 576.0, 633.0, 636.0, 630.0, 587.0, 633.0, 633.0, 581.0, 587.0, 633.0, 630.0, 636.0, 582.0, 633.0, 636.0, 630.0, 578.0, 582.0, 630.0, 587.0, 570.0, 587.0, 630.0, 612.0, 624.0, 633.0, 570.0, 587.0, 582.0, 630.0, 587.0, 636.0, 636.0, 582.0, 552.0, 636.0, 584.0, 633.0, 590.0, 624.0, 630.0, 630.0, 636.0, 609.0, 630.0, 633.0, 633.0, 636.0, 636.0, 630.0, 587.0, 630.0, 621.0, 639.0, 636.0, 630.0, 630.0, 639.0, 561.0, 633.0, 630.0, 627.0, 582.0, 582.0, 636.0, 587.0, 636.0, 630.0, 636.0, 513.0, 576.0, 633.0, 633.0, 630.0, 636.0, 582.0, 587.0, 633.0, 624.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 314.0, 293.0, 289.0, 313.0, 314.0, 314.0, 319.0, 319.0, 317.0, 316.0, 314.0, 281.0, 301.0, 313.0, 314.0, 322.0, 308.0, 291.0, 288.0, 311.0, 319.0, 318.0, 312.0, 319.0, 314.0, 311.0, 319.0, 316.0, 314.0, 316.0, 317.0, 268.0, 262.0, 316.0, 320.0, 316.0, 317.0, 309.0, 324.0, 288.0, 288.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 296.0, 291.0, 318.0, 315.0, 318.0, 315.0, 296.0, 285.0, 293.0, 294.0, 309.0, 324.0, 316.0, 314.0, 319.0, 317.0, 294.0, 288.0, 308.0, 325.0, 319.0, 317.0, 313.0, 317.0, 295.0, 283.0, 288.0, 294.0, 313.0, 317.0, 290.0, 297.0, 284.0, 286.0, 299.0, 288.0, 315.0, 315.0, 304.0, 308.0, 317.0, 307.0, 311.0, 322.0, 295.0, 275.0, 296.0, 291.0, 293.0, 289.0, 316.0, 314.0, 288.0, 299.0, 319.0, 317.0, 314.0, 322.0, 291.0, 291.0, 279.0, 273.0, 324.0, 312.0, 291.0, 293.0, 319.0, 314.0, 296.0, 294.0, 311.0, 313.0, 311.0, 319.0, 313.0, 317.0, 319.0, 317.0, 301.0, 308.0, 316.0, 314.0, 311.0, 322.0, 316.0, 317.0, 319.0, 317.0, 313.0, 323.0, 313.0, 317.0, 293.0, 294.0, 319.0, 311.0, 308.0, 313.0, 319.0, 320.0, 314.0, 322.0, 316.0, 314.0, 311.0, 319.0, 317.0, 322.0, 273.0, 288.0, 316.0, 317.0, 316.0, 314.0, 313.0, 314.0, 291.0, 291.0, 291.0, 291.0, 314.0, 322.0, 298.0, 289.0, 314.0, 322.0, 316.0, 314.0, 324.0, 312.0, 249.0, 264.0, 280.0, 296.0, 316.0, 317.0, 316.0, 317.0, 311.0, 319.0, 319.0, 317.0, 288.0, 294.0, 296.0, 291.0, 316.0, 317.0, 308.0, 316.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.764446149993761, "mean_processing_ms": 0.22154654084728279, "mean_inference_ms": 1.3676326972645958}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11592000, "num_steps_sampled": 6182400, "sample_time_ms": 22364.392, "load_time_ms": 38.739, "grad_time_ms": 10348.578, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.000403035432100296, "policy_loss": -0.006678896490484476, "vf_loss": 76.43026733398438, "vf_explained_var": 0.76324862241745, "kl": 0.0020988413598388433, "entropy": 1.1221919059753418, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6182400, "episodes_total": 15456, "training_iteration": 483, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-56-57", "timestamp": 1660262217, "time_this_iter_s": 29.545005083084106, "time_total_s": 20625.356221437454, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20625.356221437454, "timesteps_since_restore": 6182400, "iterations_since_restore": 483, "perf": {"cpu_util_percent": 31.23658536585366, "ram_util_percent": 59.02682926829268}}
+{"episode_reward_max": 639.0, "episode_reward_min": 120.0, "episode_reward_mean": 614.09, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 54.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 307.045}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 188.09, "shaped_reward_min": 40, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.92, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.26, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.35, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.38, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.83, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.59, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.91, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.73, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.16, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.38, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.83, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.38, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.83, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 636.0, 582.0, 582.0, 633.0, 630.0, 636.0, 627.0, 633.0, 587.0, 633.0, 639.0, 630.0, 587.0, 582.0, 636.0, 630.0, 636.0, 636.0, 630.0, 636.0, 120.0, 636.0, 630.0, 633.0, 633.0, 633.0, 630.0, 630.0, 630.0, 639.0, 630.0, 633.0, 633.0, 636.0, 636.0, 630.0, 587.0, 630.0, 621.0, 639.0, 636.0, 630.0, 630.0, 639.0, 561.0, 633.0, 630.0, 627.0, 582.0, 582.0, 636.0, 587.0, 636.0, 630.0, 636.0, 513.0, 576.0, 633.0, 633.0, 630.0, 636.0, 582.0, 587.0, 633.0, 624.0, 633.0, 636.0, 582.0, 627.0, 633.0, 636.0, 630.0, 582.0, 627.0, 630.0, 579.0, 630.0, 630.0, 633.0, 630.0, 630.0, 633.0, 530.0, 636.0, 633.0, 633.0, 576.0, 633.0, 636.0, 630.0, 587.0, 633.0, 633.0, 581.0, 587.0, 633.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 301.0, 286.0, 324.0, 312.0, 289.0, 293.0, 291.0, 291.0, 313.0, 320.0, 321.0, 309.0, 319.0, 317.0, 316.0, 311.0, 319.0, 314.0, 291.0, 296.0, 319.0, 314.0, 319.0, 320.0, 316.0, 314.0, 291.0, 296.0, 293.0, 289.0, 314.0, 322.0, 311.0, 319.0, 316.0, 320.0, 314.0, 322.0, 319.0, 311.0, 319.0, 317.0, 66.0, 54.0, 314.0, 322.0, 310.0, 320.0, 314.0, 319.0, 319.0, 314.0, 322.0, 311.0, 308.0, 322.0, 311.0, 319.0, 311.0, 319.0, 317.0, 322.0, 316.0, 314.0, 311.0, 322.0, 316.0, 317.0, 319.0, 317.0, 313.0, 323.0, 313.0, 317.0, 293.0, 294.0, 319.0, 311.0, 308.0, 313.0, 319.0, 320.0, 314.0, 322.0, 316.0, 314.0, 311.0, 319.0, 317.0, 322.0, 273.0, 288.0, 316.0, 317.0, 316.0, 314.0, 313.0, 314.0, 291.0, 291.0, 291.0, 291.0, 314.0, 322.0, 298.0, 289.0, 314.0, 322.0, 316.0, 314.0, 324.0, 312.0, 249.0, 264.0, 280.0, 296.0, 316.0, 317.0, 316.0, 317.0, 311.0, 319.0, 319.0, 317.0, 288.0, 294.0, 296.0, 291.0, 316.0, 317.0, 308.0, 316.0, 319.0, 314.0, 322.0, 314.0, 293.0, 289.0, 313.0, 314.0, 314.0, 319.0, 319.0, 317.0, 316.0, 314.0, 281.0, 301.0, 313.0, 314.0, 322.0, 308.0, 291.0, 288.0, 311.0, 319.0, 318.0, 312.0, 319.0, 314.0, 311.0, 319.0, 316.0, 314.0, 316.0, 317.0, 268.0, 262.0, 316.0, 320.0, 316.0, 317.0, 309.0, 324.0, 288.0, 288.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 296.0, 291.0, 318.0, 315.0, 318.0, 315.0, 296.0, 285.0, 293.0, 294.0, 309.0, 324.0, 316.0, 314.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7638319008211926, "mean_processing_ms": 0.22142837816064478, "mean_inference_ms": 1.3669160139559398}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11616000, "num_steps_sampled": 6195200, "sample_time_ms": 22342.562, "load_time_ms": 38.969, "grad_time_ms": 10165.472, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003674185834825039, "policy_loss": -0.0030232470016926527, "vf_loss": 72.50147247314453, "vf_explained_var": 0.7972453236579895, "kl": 0.002131336135789752, "entropy": 1.1054468154907227, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6195200, "episodes_total": 15488, "training_iteration": 484, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-57-28", "timestamp": 1660262248, "time_this_iter_s": 30.70011305809021, "time_total_s": 20656.056334495544, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20656.056334495544, "timesteps_since_restore": 6195200, "iterations_since_restore": 484, "perf": {"cpu_util_percent": 29.970454545454547, "ram_util_percent": 59.07727272727273}}
+{"episode_reward_max": 639.0, "episode_reward_min": 120.0, "episode_reward_mean": 616.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 54.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.095}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 188.99, "shaped_reward_min": 40, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.99, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.25, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.54, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.01, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.78, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.54, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.54, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 636.0, 639.0, 590.0, 630.0, 639.0, 639.0, 630.0, 636.0, 636.0, 639.0, 633.0, 636.0, 582.0, 636.0, 621.0, 630.0, 633.0, 630.0, 633.0, 576.0, 582.0, 636.0, 630.0, 630.0, 587.0, 630.0, 630.0, 636.0, 633.0, 633.0, 633.0, 587.0, 633.0, 624.0, 633.0, 636.0, 582.0, 627.0, 633.0, 636.0, 630.0, 582.0, 627.0, 630.0, 579.0, 630.0, 630.0, 633.0, 630.0, 630.0, 633.0, 530.0, 636.0, 633.0, 633.0, 576.0, 633.0, 636.0, 630.0, 587.0, 633.0, 633.0, 581.0, 587.0, 633.0, 630.0, 636.0, 582.0, 587.0, 636.0, 582.0, 582.0, 633.0, 630.0, 636.0, 627.0, 633.0, 587.0, 633.0, 639.0, 630.0, 587.0, 582.0, 636.0, 630.0, 636.0, 636.0, 630.0, 636.0, 120.0, 636.0, 630.0, 633.0, 633.0, 633.0, 630.0, 630.0, 630.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 294.0, 319.0, 317.0, 319.0, 320.0, 299.0, 291.0, 316.0, 314.0, 319.0, 320.0, 324.0, 315.0, 316.0, 314.0, 314.0, 322.0, 324.0, 312.0, 317.0, 322.0, 314.0, 319.0, 314.0, 322.0, 286.0, 296.0, 317.0, 319.0, 308.0, 313.0, 313.0, 317.0, 319.0, 314.0, 321.0, 309.0, 319.0, 314.0, 299.0, 277.0, 293.0, 289.0, 314.0, 322.0, 311.0, 319.0, 316.0, 314.0, 288.0, 299.0, 311.0, 319.0, 316.0, 314.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 327.0, 306.0, 296.0, 291.0, 316.0, 317.0, 308.0, 316.0, 319.0, 314.0, 322.0, 314.0, 293.0, 289.0, 313.0, 314.0, 314.0, 319.0, 319.0, 317.0, 316.0, 314.0, 281.0, 301.0, 313.0, 314.0, 322.0, 308.0, 291.0, 288.0, 311.0, 319.0, 318.0, 312.0, 319.0, 314.0, 311.0, 319.0, 316.0, 314.0, 316.0, 317.0, 268.0, 262.0, 316.0, 320.0, 316.0, 317.0, 309.0, 324.0, 288.0, 288.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 296.0, 291.0, 318.0, 315.0, 318.0, 315.0, 296.0, 285.0, 293.0, 294.0, 309.0, 324.0, 316.0, 314.0, 319.0, 317.0, 290.0, 292.0, 301.0, 286.0, 324.0, 312.0, 289.0, 293.0, 291.0, 291.0, 313.0, 320.0, 321.0, 309.0, 319.0, 317.0, 316.0, 311.0, 319.0, 314.0, 291.0, 296.0, 319.0, 314.0, 319.0, 320.0, 316.0, 314.0, 291.0, 296.0, 293.0, 289.0, 314.0, 322.0, 311.0, 319.0, 316.0, 320.0, 314.0, 322.0, 319.0, 311.0, 319.0, 317.0, 66.0, 54.0, 314.0, 322.0, 310.0, 320.0, 314.0, 319.0, 319.0, 314.0, 322.0, 311.0, 308.0, 322.0, 311.0, 319.0, 311.0, 319.0, 317.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7632241989034774, "mean_processing_ms": 0.22131123162111455, "mean_inference_ms": 1.3662700284091551}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11640000, "num_steps_sampled": 6208000, "sample_time_ms": 22316.513, "load_time_ms": 39.137, "grad_time_ms": 10131.169, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0029844159726053476, "policy_loss": -0.003889852436259389, "vf_loss": 74.29019165039062, "vf_explained_var": 0.7697036862373352, "kl": 0.0019323105225339532, "entropy": 1.1094969511032104, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6208000, "episodes_total": 15520, "training_iteration": 485, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-58-02", "timestamp": 1660262282, "time_this_iter_s": 33.77160096168518, "time_total_s": 20689.82793545723, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20689.82793545723, "timesteps_since_restore": 6208000, "iterations_since_restore": 485, "perf": {"cpu_util_percent": 30.51875, "ram_util_percent": 58.97291666666666}}
+{"episode_reward_max": 639.0, "episode_reward_min": 120.0, "episode_reward_mean": 613.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 54.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.905}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 188.61, "shaped_reward_min": 40, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.93, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.16, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.21, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.61, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.46, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.02, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.16, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.79, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.16, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.61, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.61, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 576.0, 636.0, 621.0, 636.0, 636.0, 636.0, 576.0, 587.0, 633.0, 633.0, 582.0, 522.0, 633.0, 579.0, 636.0, 639.0, 582.0, 633.0, 576.0, 627.0, 636.0, 587.0, 639.0, 630.0, 636.0, 579.0, 582.0, 636.0, 582.0, 579.0, 633.0, 587.0, 633.0, 630.0, 636.0, 582.0, 587.0, 636.0, 582.0, 582.0, 633.0, 630.0, 636.0, 627.0, 633.0, 587.0, 633.0, 639.0, 630.0, 587.0, 582.0, 636.0, 630.0, 636.0, 636.0, 630.0, 636.0, 120.0, 636.0, 630.0, 633.0, 633.0, 633.0, 630.0, 630.0, 630.0, 639.0, 579.0, 636.0, 639.0, 590.0, 630.0, 639.0, 639.0, 630.0, 636.0, 636.0, 639.0, 633.0, 636.0, 582.0, 636.0, 621.0, 630.0, 633.0, 630.0, 633.0, 576.0, 582.0, 636.0, 630.0, 630.0, 587.0, 630.0, 630.0, 636.0, 633.0, 633.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 294.0, 282.0, 317.0, 319.0, 310.0, 311.0, 319.0, 317.0, 319.0, 317.0, 309.0, 327.0, 293.0, 283.0, 293.0, 294.0, 311.0, 322.0, 314.0, 319.0, 290.0, 292.0, 262.0, 260.0, 316.0, 317.0, 286.0, 293.0, 317.0, 319.0, 322.0, 317.0, 291.0, 291.0, 319.0, 314.0, 282.0, 294.0, 313.0, 314.0, 320.0, 316.0, 296.0, 291.0, 322.0, 317.0, 306.0, 324.0, 314.0, 322.0, 285.0, 294.0, 289.0, 293.0, 319.0, 317.0, 285.0, 297.0, 283.0, 296.0, 314.0, 319.0, 293.0, 294.0, 309.0, 324.0, 316.0, 314.0, 319.0, 317.0, 290.0, 292.0, 301.0, 286.0, 324.0, 312.0, 289.0, 293.0, 291.0, 291.0, 313.0, 320.0, 321.0, 309.0, 319.0, 317.0, 316.0, 311.0, 319.0, 314.0, 291.0, 296.0, 319.0, 314.0, 319.0, 320.0, 316.0, 314.0, 291.0, 296.0, 293.0, 289.0, 314.0, 322.0, 311.0, 319.0, 316.0, 320.0, 314.0, 322.0, 319.0, 311.0, 319.0, 317.0, 66.0, 54.0, 314.0, 322.0, 310.0, 320.0, 314.0, 319.0, 319.0, 314.0, 322.0, 311.0, 308.0, 322.0, 311.0, 319.0, 311.0, 319.0, 317.0, 322.0, 285.0, 294.0, 319.0, 317.0, 319.0, 320.0, 299.0, 291.0, 316.0, 314.0, 319.0, 320.0, 324.0, 315.0, 316.0, 314.0, 314.0, 322.0, 324.0, 312.0, 317.0, 322.0, 314.0, 319.0, 314.0, 322.0, 286.0, 296.0, 317.0, 319.0, 308.0, 313.0, 313.0, 317.0, 319.0, 314.0, 321.0, 309.0, 319.0, 314.0, 299.0, 277.0, 293.0, 289.0, 314.0, 322.0, 311.0, 319.0, 316.0, 314.0, 288.0, 299.0, 311.0, 319.0, 316.0, 314.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 327.0, 306.0]}, "sampler_perf": {"mean_env_wait_ms": 0.762623780459851, "mean_processing_ms": 0.2211964092332032, "mean_inference_ms": 1.3657033505370475}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11664000, "num_steps_sampled": 6220800, "sample_time_ms": 22297.013, "load_time_ms": 39.012, "grad_time_ms": 10179.524, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004206617828458548, "policy_loss": -0.003324081189930439, "vf_loss": 80.87010192871094, "vf_explained_var": 0.7655022740364075, "kl": 0.001813961542211473, "entropy": 1.1126155853271484, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6220800, "episodes_total": 15552, "training_iteration": 486, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-58-35", "timestamp": 1660262315, "time_this_iter_s": 33.220837116241455, "time_total_s": 20723.04877257347, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20723.04877257347, "timesteps_since_restore": 6220800, "iterations_since_restore": 486, "perf": {"cpu_util_percent": 32.0404255319149, "ram_util_percent": 59.24255319148937}}
+{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 618.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 309.46}, "custom_metrics": {"sparse_reward_mean": 214.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.72, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.92, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.48, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.4, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.57, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.0, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.42, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.7, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.22, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.57, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.0, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.57, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.0, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 639.0, 633.0, 636.0, 630.0, 587.0, 633.0, 630.0, 630.0, 636.0, 636.0, 587.0, 636.0, 630.0, 633.0, 582.0, 633.0, 633.0, 636.0, 561.0, 630.0, 630.0, 567.0, 639.0, 630.0, 627.0, 627.0, 630.0, 587.0, 633.0, 633.0, 582.0, 630.0, 630.0, 630.0, 639.0, 579.0, 636.0, 639.0, 590.0, 630.0, 639.0, 639.0, 630.0, 636.0, 636.0, 639.0, 633.0, 636.0, 582.0, 636.0, 621.0, 630.0, 633.0, 630.0, 633.0, 576.0, 582.0, 636.0, 630.0, 630.0, 587.0, 630.0, 630.0, 636.0, 633.0, 633.0, 633.0, 630.0, 576.0, 636.0, 621.0, 636.0, 636.0, 636.0, 576.0, 587.0, 633.0, 633.0, 582.0, 522.0, 633.0, 579.0, 636.0, 639.0, 582.0, 633.0, 576.0, 627.0, 636.0, 587.0, 639.0, 630.0, 636.0, 579.0, 582.0, 636.0, 582.0, 579.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 319.0, 320.0, 316.0, 317.0, 322.0, 314.0, 318.0, 312.0, 296.0, 291.0, 316.0, 317.0, 316.0, 314.0, 308.0, 322.0, 319.0, 317.0, 314.0, 322.0, 299.0, 288.0, 324.0, 312.0, 313.0, 317.0, 308.0, 325.0, 288.0, 294.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 277.0, 284.0, 316.0, 314.0, 316.0, 314.0, 278.0, 289.0, 319.0, 320.0, 316.0, 314.0, 305.0, 322.0, 310.0, 317.0, 316.0, 314.0, 296.0, 291.0, 316.0, 317.0, 316.0, 317.0, 288.0, 294.0, 308.0, 322.0, 311.0, 319.0, 311.0, 319.0, 317.0, 322.0, 285.0, 294.0, 319.0, 317.0, 319.0, 320.0, 299.0, 291.0, 316.0, 314.0, 319.0, 320.0, 324.0, 315.0, 316.0, 314.0, 314.0, 322.0, 324.0, 312.0, 317.0, 322.0, 314.0, 319.0, 314.0, 322.0, 286.0, 296.0, 317.0, 319.0, 308.0, 313.0, 313.0, 317.0, 319.0, 314.0, 321.0, 309.0, 319.0, 314.0, 299.0, 277.0, 293.0, 289.0, 314.0, 322.0, 311.0, 319.0, 316.0, 314.0, 288.0, 299.0, 311.0, 319.0, 316.0, 314.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 327.0, 306.0, 313.0, 317.0, 294.0, 282.0, 317.0, 319.0, 310.0, 311.0, 319.0, 317.0, 319.0, 317.0, 309.0, 327.0, 293.0, 283.0, 293.0, 294.0, 311.0, 322.0, 314.0, 319.0, 290.0, 292.0, 262.0, 260.0, 316.0, 317.0, 286.0, 293.0, 317.0, 319.0, 322.0, 317.0, 291.0, 291.0, 319.0, 314.0, 282.0, 294.0, 313.0, 314.0, 320.0, 316.0, 296.0, 291.0, 322.0, 317.0, 306.0, 324.0, 314.0, 322.0, 285.0, 294.0, 289.0, 293.0, 319.0, 317.0, 285.0, 297.0, 283.0, 296.0, 314.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.762033130729778, "mean_processing_ms": 0.22108464880455408, "mean_inference_ms": 1.3652218358269517}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11688000, "num_steps_sampled": 6233600, "sample_time_ms": 22173.711, "load_time_ms": 39.002, "grad_time_ms": 10239.525, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0017484220443293452, "policy_loss": -0.005304198246449232, "vf_loss": 76.0898666381836, "vf_explained_var": 0.767227828502655, "kl": 0.0019641267135739326, "entropy": 1.1127411127090454, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6233600, "episodes_total": 15584, "training_iteration": 487, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-59-10", "timestamp": 1660262350, "time_this_iter_s": 34.99830985069275, "time_total_s": 20758.047082424164, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20758.047082424164, "timesteps_since_restore": 6233600, "iterations_since_restore": 487, "perf": {"cpu_util_percent": 33.78367346938776, "ram_util_percent": 59.60612244897959}}
+{"episode_reward_max": 644.0, "episode_reward_min": 522.0, "episode_reward_mean": 619.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 309.9}, "custom_metrics": {"sparse_reward_mean": 215.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.8, "shaped_reward_min": 160, "shaped_reward_max": 204, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.95, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.48, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.5, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.75, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.57, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.03, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.09, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.15, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.63, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.75, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.57, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.03, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.57, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.03, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 639.0, 639.0, 600.0, 630.0, 639.0, 644.0, 639.0, 633.0, 630.0, 639.0, 630.0, 636.0, 582.0, 630.0, 633.0, 633.0, 633.0, 636.0, 630.0, 630.0, 633.0, 582.0, 636.0, 630.0, 633.0, 630.0, 627.0, 639.0, 582.0, 633.0, 636.0, 636.0, 633.0, 633.0, 633.0, 630.0, 576.0, 636.0, 621.0, 636.0, 636.0, 636.0, 576.0, 587.0, 633.0, 633.0, 582.0, 522.0, 633.0, 579.0, 636.0, 639.0, 582.0, 633.0, 576.0, 627.0, 636.0, 587.0, 639.0, 630.0, 636.0, 579.0, 582.0, 636.0, 582.0, 579.0, 633.0, 636.0, 639.0, 633.0, 636.0, 630.0, 587.0, 633.0, 630.0, 630.0, 636.0, 636.0, 587.0, 636.0, 630.0, 633.0, 582.0, 633.0, 633.0, 636.0, 561.0, 630.0, 630.0, 567.0, 639.0, 630.0, 627.0, 627.0, 630.0, 587.0, 633.0, 633.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 322.0, 317.0, 319.0, 320.0, 301.0, 299.0, 321.0, 309.0, 317.0, 322.0, 319.0, 325.0, 319.0, 320.0, 316.0, 317.0, 316.0, 314.0, 314.0, 325.0, 314.0, 316.0, 314.0, 322.0, 291.0, 291.0, 313.0, 317.0, 314.0, 319.0, 314.0, 319.0, 316.0, 317.0, 319.0, 317.0, 311.0, 319.0, 313.0, 317.0, 319.0, 314.0, 291.0, 291.0, 319.0, 317.0, 311.0, 319.0, 314.0, 319.0, 316.0, 314.0, 310.0, 317.0, 322.0, 317.0, 288.0, 294.0, 316.0, 317.0, 317.0, 319.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 327.0, 306.0, 313.0, 317.0, 294.0, 282.0, 317.0, 319.0, 310.0, 311.0, 319.0, 317.0, 319.0, 317.0, 309.0, 327.0, 293.0, 283.0, 293.0, 294.0, 311.0, 322.0, 314.0, 319.0, 290.0, 292.0, 262.0, 260.0, 316.0, 317.0, 286.0, 293.0, 317.0, 319.0, 322.0, 317.0, 291.0, 291.0, 319.0, 314.0, 282.0, 294.0, 313.0, 314.0, 320.0, 316.0, 296.0, 291.0, 322.0, 317.0, 306.0, 324.0, 314.0, 322.0, 285.0, 294.0, 289.0, 293.0, 319.0, 317.0, 285.0, 297.0, 283.0, 296.0, 314.0, 319.0, 319.0, 317.0, 319.0, 320.0, 316.0, 317.0, 322.0, 314.0, 318.0, 312.0, 296.0, 291.0, 316.0, 317.0, 316.0, 314.0, 308.0, 322.0, 319.0, 317.0, 314.0, 322.0, 299.0, 288.0, 324.0, 312.0, 313.0, 317.0, 308.0, 325.0, 288.0, 294.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 277.0, 284.0, 316.0, 314.0, 316.0, 314.0, 278.0, 289.0, 319.0, 320.0, 316.0, 314.0, 305.0, 322.0, 310.0, 317.0, 316.0, 314.0, 296.0, 291.0, 316.0, 317.0, 316.0, 317.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7614456124606471, "mean_processing_ms": 0.22097346860872114, "mean_inference_ms": 1.3647307081670101}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11712000, "num_steps_sampled": 6246400, "sample_time_ms": 21961.201, "load_time_ms": 38.731, "grad_time_ms": 10224.967, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00313456985168159, "policy_loss": -0.003917869180440903, "vf_loss": 76.04095458984375, "vf_explained_var": 0.7662898898124695, "kl": 0.0023237813729792833, "entropy": 1.1033259630203247, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6246400, "episodes_total": 15616, "training_iteration": 488, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-59-42", "timestamp": 1660262382, "time_this_iter_s": 32.1832549571991, "time_total_s": 20790.230337381363, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20790.230337381363, "timesteps_since_restore": 6246400, "iterations_since_restore": 488, "perf": {"cpu_util_percent": 31.12391304347826, "ram_util_percent": 59.10434782608694}}
+{"episode_reward_max": 644.0, "episode_reward_min": 561.0, "episode_reward_mean": 624.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 277.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 312.325}, "custom_metrics": {"sparse_reward_mean": 216.8, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 191.05, "shaped_reward_min": 160, "shaped_reward_max": 204, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.97, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.65, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.52, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 17.98, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.54, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.21, "potting_onion_agent_1_min": 16, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.33, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.68, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.16, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.54, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.21, "optimal_onion_potting_agent_1_min": 16, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.54, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.21, "viable_onion_potting_agent_1_min": 16, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 636.0, 636.0, 639.0, 639.0, 639.0, 633.0, 639.0, 624.0, 624.0, 636.0, 630.0, 582.0, 636.0, 582.0, 576.0, 639.0, 630.0, 627.0, 630.0, 627.0, 630.0, 630.0, 639.0, 636.0, 636.0, 639.0, 636.0, 636.0, 636.0, 630.0, 636.0, 582.0, 579.0, 633.0, 636.0, 639.0, 633.0, 636.0, 630.0, 587.0, 633.0, 630.0, 630.0, 636.0, 636.0, 587.0, 636.0, 630.0, 633.0, 582.0, 633.0, 633.0, 636.0, 561.0, 630.0, 630.0, 567.0, 639.0, 630.0, 627.0, 627.0, 630.0, 587.0, 633.0, 633.0, 582.0, 579.0, 639.0, 639.0, 600.0, 630.0, 639.0, 644.0, 639.0, 633.0, 630.0, 639.0, 630.0, 636.0, 582.0, 630.0, 633.0, 633.0, 633.0, 636.0, 630.0, 630.0, 633.0, 582.0, 636.0, 630.0, 633.0, 630.0, 627.0, 639.0, 582.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 318.0, 315.0, 319.0, 317.0, 314.0, 322.0, 322.0, 317.0, 319.0, 320.0, 319.0, 320.0, 316.0, 317.0, 327.0, 312.0, 314.0, 310.0, 316.0, 308.0, 316.0, 320.0, 308.0, 322.0, 291.0, 291.0, 319.0, 317.0, 294.0, 288.0, 285.0, 291.0, 319.0, 320.0, 311.0, 319.0, 308.0, 319.0, 319.0, 311.0, 313.0, 314.0, 319.0, 311.0, 316.0, 314.0, 322.0, 317.0, 319.0, 317.0, 316.0, 320.0, 319.0, 320.0, 317.0, 319.0, 324.0, 312.0, 319.0, 317.0, 316.0, 314.0, 319.0, 317.0, 285.0, 297.0, 283.0, 296.0, 314.0, 319.0, 319.0, 317.0, 319.0, 320.0, 316.0, 317.0, 322.0, 314.0, 318.0, 312.0, 296.0, 291.0, 316.0, 317.0, 316.0, 314.0, 308.0, 322.0, 319.0, 317.0, 314.0, 322.0, 299.0, 288.0, 324.0, 312.0, 313.0, 317.0, 308.0, 325.0, 288.0, 294.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 277.0, 284.0, 316.0, 314.0, 316.0, 314.0, 278.0, 289.0, 319.0, 320.0, 316.0, 314.0, 305.0, 322.0, 310.0, 317.0, 316.0, 314.0, 296.0, 291.0, 316.0, 317.0, 316.0, 317.0, 288.0, 294.0, 293.0, 286.0, 322.0, 317.0, 319.0, 320.0, 301.0, 299.0, 321.0, 309.0, 317.0, 322.0, 319.0, 325.0, 319.0, 320.0, 316.0, 317.0, 316.0, 314.0, 314.0, 325.0, 314.0, 316.0, 314.0, 322.0, 291.0, 291.0, 313.0, 317.0, 314.0, 319.0, 314.0, 319.0, 316.0, 317.0, 319.0, 317.0, 311.0, 319.0, 313.0, 317.0, 319.0, 314.0, 291.0, 291.0, 319.0, 317.0, 311.0, 319.0, 314.0, 319.0, 316.0, 314.0, 310.0, 317.0, 322.0, 317.0, 288.0, 294.0, 316.0, 317.0, 317.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7608592858442179, "mean_processing_ms": 0.22086239768906624, "mean_inference_ms": 1.3642056926460657}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11736000, "num_steps_sampled": 6259200, "sample_time_ms": 22026.8, "load_time_ms": 38.825, "grad_time_ms": 10224.17, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0021181919146329165, "policy_loss": -0.004830162972211838, "vf_loss": 74.9923324584961, "vf_explained_var": 0.7754970192909241, "kl": 0.002056455472484231, "entropy": 1.1017627716064453, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6259200, "episodes_total": 15648, "training_iteration": 489, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-00-16", "timestamp": 1660262416, "time_this_iter_s": 33.8461229801178, "time_total_s": 20824.07646036148, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20824.07646036148, "timesteps_since_restore": 6259200, "iterations_since_restore": 489, "perf": {"cpu_util_percent": 34.53541666666667, "ram_util_percent": 59.26458333333333}}
+{"episode_reward_max": 644.0, "episode_reward_min": 575.0, "episode_reward_mean": 626.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 285.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 313.145}, "custom_metrics": {"sparse_reward_mean": 217.2, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 191.89, "shaped_reward_min": 160, "shaped_reward_max": 204, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.1, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.53, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.67, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.99, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.72, "potting_onion_agent_0_min": 14, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.17, "potting_onion_agent_1_min": 16, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.43, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.97, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.24, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.24, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.66, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.21, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.72, "optimal_onion_potting_agent_0_min": 14, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.17, "optimal_onion_potting_agent_1_min": 16, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.72, "viable_onion_potting_agent_0_min": 14, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.17, "viable_onion_potting_agent_1_min": 16, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 636.0, 633.0, 575.0, 636.0, 633.0, 639.0, 636.0, 582.0, 636.0, 639.0, 636.0, 639.0, 579.0, 636.0, 636.0, 624.0, 630.0, 624.0, 639.0, 630.0, 633.0, 633.0, 639.0, 630.0, 633.0, 639.0, 630.0, 587.0, 630.0, 636.0, 587.0, 587.0, 633.0, 633.0, 582.0, 579.0, 639.0, 639.0, 600.0, 630.0, 639.0, 644.0, 639.0, 633.0, 630.0, 639.0, 630.0, 636.0, 582.0, 630.0, 633.0, 633.0, 633.0, 636.0, 630.0, 630.0, 633.0, 582.0, 636.0, 630.0, 633.0, 630.0, 627.0, 639.0, 582.0, 633.0, 636.0, 633.0, 633.0, 636.0, 636.0, 639.0, 639.0, 639.0, 633.0, 639.0, 624.0, 624.0, 636.0, 630.0, 582.0, 636.0, 582.0, 576.0, 639.0, 630.0, 627.0, 630.0, 627.0, 630.0, 630.0, 639.0, 636.0, 636.0, 639.0, 636.0, 636.0, 636.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 319.0, 317.0, 316.0, 317.0, 288.0, 287.0, 319.0, 317.0, 319.0, 314.0, 319.0, 320.0, 316.0, 320.0, 285.0, 297.0, 319.0, 317.0, 317.0, 322.0, 314.0, 322.0, 317.0, 322.0, 288.0, 291.0, 317.0, 319.0, 314.0, 322.0, 310.0, 314.0, 314.0, 316.0, 316.0, 308.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 314.0, 319.0, 319.0, 320.0, 318.0, 312.0, 313.0, 320.0, 315.0, 324.0, 319.0, 311.0, 288.0, 299.0, 316.0, 314.0, 314.0, 322.0, 290.0, 297.0, 296.0, 291.0, 316.0, 317.0, 316.0, 317.0, 288.0, 294.0, 293.0, 286.0, 322.0, 317.0, 319.0, 320.0, 301.0, 299.0, 321.0, 309.0, 317.0, 322.0, 319.0, 325.0, 319.0, 320.0, 316.0, 317.0, 316.0, 314.0, 314.0, 325.0, 314.0, 316.0, 314.0, 322.0, 291.0, 291.0, 313.0, 317.0, 314.0, 319.0, 314.0, 319.0, 316.0, 317.0, 319.0, 317.0, 311.0, 319.0, 313.0, 317.0, 319.0, 314.0, 291.0, 291.0, 319.0, 317.0, 311.0, 319.0, 314.0, 319.0, 316.0, 314.0, 310.0, 317.0, 322.0, 317.0, 288.0, 294.0, 316.0, 317.0, 317.0, 319.0, 314.0, 319.0, 318.0, 315.0, 319.0, 317.0, 314.0, 322.0, 322.0, 317.0, 319.0, 320.0, 319.0, 320.0, 316.0, 317.0, 327.0, 312.0, 314.0, 310.0, 316.0, 308.0, 316.0, 320.0, 308.0, 322.0, 291.0, 291.0, 319.0, 317.0, 294.0, 288.0, 285.0, 291.0, 319.0, 320.0, 311.0, 319.0, 308.0, 319.0, 319.0, 311.0, 313.0, 314.0, 319.0, 311.0, 316.0, 314.0, 322.0, 317.0, 319.0, 317.0, 316.0, 320.0, 319.0, 320.0, 317.0, 319.0, 324.0, 312.0, 319.0, 317.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.760275311275054, "mean_processing_ms": 0.22075194950197713, "mean_inference_ms": 1.36361755561513}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11760000, "num_steps_sampled": 6272000, "sample_time_ms": 22027.209, "load_time_ms": 38.894, "grad_time_ms": 10417.749, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0028506640810519457, "policy_loss": -0.003879321739077568, "vf_loss": 72.80782318115234, "vf_explained_var": 0.7767069935798645, "kl": 0.0016769763315096498, "entropy": 1.101601243019104, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6272000, "episodes_total": 15680, "training_iteration": 490, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-00-49", "timestamp": 1660262449, "time_this_iter_s": 33.20848989486694, "time_total_s": 20857.284950256348, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20857.284950256348, "timesteps_since_restore": 6272000, "iterations_since_restore": 490, "perf": {"cpu_util_percent": 36.26808510638298, "ram_util_percent": 59.0851063829787}}
+{"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 624.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 312.42}, "custom_metrics": {"sparse_reward_mean": 216.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 191.24, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.13, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.74, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.83, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.73, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.06, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.98, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.28, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.73, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.06, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.73, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.06, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 627.0, 633.0, 636.0, 627.0, 633.0, 630.0, 627.0, 579.0, 630.0, 636.0, 639.0, 633.0, 630.0, 579.0, 639.0, 633.0, 615.0, 636.0, 630.0, 579.0, 582.0, 639.0, 636.0, 639.0, 444.0, 630.0, 633.0, 624.0, 630.0, 639.0, 639.0, 639.0, 582.0, 633.0, 636.0, 633.0, 633.0, 636.0, 636.0, 639.0, 639.0, 639.0, 633.0, 639.0, 624.0, 624.0, 636.0, 630.0, 582.0, 636.0, 582.0, 576.0, 639.0, 630.0, 627.0, 630.0, 627.0, 630.0, 630.0, 639.0, 636.0, 636.0, 639.0, 636.0, 636.0, 636.0, 630.0, 636.0, 636.0, 633.0, 575.0, 636.0, 633.0, 639.0, 636.0, 582.0, 636.0, 639.0, 636.0, 639.0, 579.0, 636.0, 636.0, 624.0, 630.0, 624.0, 639.0, 630.0, 633.0, 633.0, 639.0, 630.0, 633.0, 639.0, 630.0, 587.0, 630.0, 636.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [318.0, 321.0, 311.0, 316.0, 316.0, 317.0, 319.0, 317.0, 311.0, 316.0, 311.0, 322.0, 311.0, 319.0, 307.0, 320.0, 288.0, 291.0, 317.0, 313.0, 319.0, 317.0, 319.0, 320.0, 314.0, 319.0, 314.0, 316.0, 291.0, 288.0, 319.0, 320.0, 316.0, 317.0, 307.0, 308.0, 314.0, 322.0, 311.0, 319.0, 289.0, 290.0, 291.0, 291.0, 316.0, 323.0, 314.0, 322.0, 322.0, 317.0, 222.0, 222.0, 311.0, 319.0, 319.0, 314.0, 313.0, 311.0, 311.0, 319.0, 322.0, 317.0, 314.0, 325.0, 322.0, 317.0, 288.0, 294.0, 316.0, 317.0, 317.0, 319.0, 314.0, 319.0, 318.0, 315.0, 319.0, 317.0, 314.0, 322.0, 322.0, 317.0, 319.0, 320.0, 319.0, 320.0, 316.0, 317.0, 327.0, 312.0, 314.0, 310.0, 316.0, 308.0, 316.0, 320.0, 308.0, 322.0, 291.0, 291.0, 319.0, 317.0, 294.0, 288.0, 285.0, 291.0, 319.0, 320.0, 311.0, 319.0, 308.0, 319.0, 319.0, 311.0, 313.0, 314.0, 319.0, 311.0, 316.0, 314.0, 322.0, 317.0, 319.0, 317.0, 316.0, 320.0, 319.0, 320.0, 317.0, 319.0, 324.0, 312.0, 319.0, 317.0, 316.0, 314.0, 319.0, 317.0, 319.0, 317.0, 316.0, 317.0, 288.0, 287.0, 319.0, 317.0, 319.0, 314.0, 319.0, 320.0, 316.0, 320.0, 285.0, 297.0, 319.0, 317.0, 317.0, 322.0, 314.0, 322.0, 317.0, 322.0, 288.0, 291.0, 317.0, 319.0, 314.0, 322.0, 310.0, 314.0, 314.0, 316.0, 316.0, 308.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 314.0, 319.0, 319.0, 320.0, 318.0, 312.0, 313.0, 320.0, 315.0, 324.0, 319.0, 311.0, 288.0, 299.0, 316.0, 314.0, 314.0, 322.0, 290.0, 297.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7596883332679625, "mean_processing_ms": 0.22064007492809667, "mean_inference_ms": 1.3629331607275919}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11784000, "num_steps_sampled": 6284800, "sample_time_ms": 21863.417, "load_time_ms": 38.973, "grad_time_ms": 10519.041, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0017596340039744973, "policy_loss": -0.008380659855902195, "vf_loss": 71.76502227783203, "vf_explained_var": 0.787438690662384, "kl": 0.0020715922582894564, "entropy": 1.110949158668518, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6284800, "episodes_total": 15712, "training_iteration": 491, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-01-21", "timestamp": 1660262481, "time_this_iter_s": 31.476083278656006, "time_total_s": 20888.761033535004, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20888.761033535004, "timesteps_since_restore": 6284800, "iterations_since_restore": 491, "perf": {"cpu_util_percent": 35.37777777777777, "ram_util_percent": 59.10222222222222}}
+{"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 619.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 309.65}, "custom_metrics": {"sparse_reward_mean": 215.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 189.3, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.17, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.3, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.72, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.53, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.68, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.82, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 6.1, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.69, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.29, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.68, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.82, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.68, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.82, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 570.0, 582.0, 636.0, 627.0, 636.0, 630.0, 633.0, 636.0, 630.0, 576.0, 587.0, 579.0, 633.0, 636.0, 615.0, 633.0, 630.0, 633.0, 573.0, 579.0, 633.0, 582.0, 636.0, 525.0, 621.0, 627.0, 587.0, 630.0, 639.0, 630.0, 579.0, 636.0, 636.0, 636.0, 630.0, 636.0, 636.0, 633.0, 575.0, 636.0, 633.0, 639.0, 636.0, 582.0, 636.0, 639.0, 636.0, 639.0, 579.0, 636.0, 636.0, 624.0, 630.0, 624.0, 639.0, 630.0, 633.0, 633.0, 639.0, 630.0, 633.0, 639.0, 630.0, 587.0, 630.0, 636.0, 587.0, 639.0, 627.0, 633.0, 636.0, 627.0, 633.0, 630.0, 627.0, 579.0, 630.0, 636.0, 639.0, 633.0, 630.0, 579.0, 639.0, 633.0, 615.0, 636.0, 630.0, 579.0, 582.0, 639.0, 636.0, 639.0, 444.0, 630.0, 633.0, 624.0, 630.0, 639.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 288.0, 293.0, 277.0, 290.0, 292.0, 317.0, 319.0, 314.0, 313.0, 311.0, 325.0, 313.0, 317.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 296.0, 280.0, 293.0, 294.0, 293.0, 286.0, 316.0, 317.0, 314.0, 322.0, 307.0, 308.0, 314.0, 319.0, 316.0, 314.0, 319.0, 314.0, 282.0, 291.0, 288.0, 291.0, 319.0, 314.0, 291.0, 291.0, 316.0, 320.0, 263.0, 262.0, 306.0, 315.0, 311.0, 316.0, 293.0, 294.0, 311.0, 319.0, 319.0, 320.0, 311.0, 319.0, 280.0, 299.0, 317.0, 319.0, 324.0, 312.0, 319.0, 317.0, 316.0, 314.0, 319.0, 317.0, 319.0, 317.0, 316.0, 317.0, 288.0, 287.0, 319.0, 317.0, 319.0, 314.0, 319.0, 320.0, 316.0, 320.0, 285.0, 297.0, 319.0, 317.0, 317.0, 322.0, 314.0, 322.0, 317.0, 322.0, 288.0, 291.0, 317.0, 319.0, 314.0, 322.0, 310.0, 314.0, 314.0, 316.0, 316.0, 308.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 314.0, 319.0, 319.0, 320.0, 318.0, 312.0, 313.0, 320.0, 315.0, 324.0, 319.0, 311.0, 288.0, 299.0, 316.0, 314.0, 314.0, 322.0, 290.0, 297.0, 318.0, 321.0, 311.0, 316.0, 316.0, 317.0, 319.0, 317.0, 311.0, 316.0, 311.0, 322.0, 311.0, 319.0, 307.0, 320.0, 288.0, 291.0, 317.0, 313.0, 319.0, 317.0, 319.0, 320.0, 314.0, 319.0, 314.0, 316.0, 291.0, 288.0, 319.0, 320.0, 316.0, 317.0, 307.0, 308.0, 314.0, 322.0, 311.0, 319.0, 289.0, 290.0, 291.0, 291.0, 316.0, 323.0, 314.0, 322.0, 322.0, 317.0, 222.0, 222.0, 311.0, 319.0, 319.0, 314.0, 313.0, 311.0, 311.0, 319.0, 322.0, 317.0, 314.0, 325.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7590978232117643, "mean_processing_ms": 0.22052625115278437, "mean_inference_ms": 1.3621520810826262}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11808000, "num_steps_sampled": 6297600, "sample_time_ms": 21732.6, "load_time_ms": 39.061, "grad_time_ms": 10636.833, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013283310690894723, "policy_loss": -0.00562013266608119, "vf_loss": 75.04083251953125, "vf_explained_var": 0.7633475661277771, "kl": 0.002308204537257552, "entropy": 1.1112407445907593, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6297600, "episodes_total": 15744, "training_iteration": 492, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-01-52", "timestamp": 1660262512, "time_this_iter_s": 31.412389039993286, "time_total_s": 20920.173422574997, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20920.173422574997, "timesteps_since_restore": 6297600, "iterations_since_restore": 492, "perf": {"cpu_util_percent": 32.93999999999999, "ram_util_percent": 59.11333333333331}}
+{"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 615.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.99}, "custom_metrics": {"sparse_reward_mean": 213.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.38, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.08, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.42, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.52, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.86, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.27, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 6.05, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.33, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.15, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.64, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.17, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.52, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.86, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.52, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.86, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 630.0, 639.0, 627.0, 636.0, 582.0, 630.0, 593.0, 582.0, 627.0, 630.0, 530.0, 639.0, 636.0, 636.0, 519.0, 627.0, 636.0, 636.0, 633.0, 630.0, 627.0, 581.0, 636.0, 633.0, 633.0, 633.0, 633.0, 633.0, 582.0, 636.0, 633.0, 587.0, 630.0, 636.0, 587.0, 639.0, 627.0, 633.0, 636.0, 627.0, 633.0, 630.0, 627.0, 579.0, 630.0, 636.0, 639.0, 633.0, 630.0, 579.0, 639.0, 633.0, 615.0, 636.0, 630.0, 579.0, 582.0, 639.0, 636.0, 639.0, 444.0, 630.0, 633.0, 624.0, 630.0, 639.0, 639.0, 573.0, 570.0, 582.0, 636.0, 627.0, 636.0, 630.0, 633.0, 636.0, 630.0, 576.0, 587.0, 579.0, 633.0, 636.0, 615.0, 633.0, 630.0, 633.0, 573.0, 579.0, 633.0, 582.0, 636.0, 525.0, 621.0, 627.0, 587.0, 630.0, 639.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 317.0, 311.0, 319.0, 314.0, 325.0, 308.0, 319.0, 316.0, 320.0, 291.0, 291.0, 311.0, 319.0, 291.0, 302.0, 296.0, 286.0, 316.0, 311.0, 318.0, 312.0, 253.0, 277.0, 322.0, 317.0, 319.0, 317.0, 319.0, 317.0, 264.0, 255.0, 314.0, 313.0, 319.0, 317.0, 319.0, 317.0, 315.0, 318.0, 311.0, 319.0, 316.0, 311.0, 295.0, 286.0, 319.0, 317.0, 316.0, 317.0, 317.0, 316.0, 319.0, 314.0, 319.0, 314.0, 321.0, 312.0, 293.0, 289.0, 311.0, 325.0, 316.0, 317.0, 288.0, 299.0, 316.0, 314.0, 314.0, 322.0, 290.0, 297.0, 318.0, 321.0, 311.0, 316.0, 316.0, 317.0, 319.0, 317.0, 311.0, 316.0, 311.0, 322.0, 311.0, 319.0, 307.0, 320.0, 288.0, 291.0, 317.0, 313.0, 319.0, 317.0, 319.0, 320.0, 314.0, 319.0, 314.0, 316.0, 291.0, 288.0, 319.0, 320.0, 316.0, 317.0, 307.0, 308.0, 314.0, 322.0, 311.0, 319.0, 289.0, 290.0, 291.0, 291.0, 316.0, 323.0, 314.0, 322.0, 322.0, 317.0, 222.0, 222.0, 311.0, 319.0, 319.0, 314.0, 313.0, 311.0, 311.0, 319.0, 322.0, 317.0, 314.0, 325.0, 285.0, 288.0, 293.0, 277.0, 290.0, 292.0, 317.0, 319.0, 314.0, 313.0, 311.0, 325.0, 313.0, 317.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 296.0, 280.0, 293.0, 294.0, 293.0, 286.0, 316.0, 317.0, 314.0, 322.0, 307.0, 308.0, 314.0, 319.0, 316.0, 314.0, 319.0, 314.0, 282.0, 291.0, 288.0, 291.0, 319.0, 314.0, 291.0, 291.0, 316.0, 320.0, 263.0, 262.0, 306.0, 315.0, 311.0, 316.0, 293.0, 294.0, 311.0, 319.0, 319.0, 320.0, 311.0, 319.0, 280.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7584994121289265, "mean_processing_ms": 0.22040925813735274, "mean_inference_ms": 1.3613199972214534}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11832000, "num_steps_sampled": 6310400, "sample_time_ms": 21713.634, "load_time_ms": 39.62, "grad_time_ms": 11039.119, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016831206157803535, "policy_loss": -0.005174629390239716, "vf_loss": 74.1271743774414, "vf_explained_var": 0.7761192321777344, "kl": 0.002214068779721856, "entropy": 1.1099259853363037, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6310400, "episodes_total": 15776, "training_iteration": 493, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-02-26", "timestamp": 1660262546, "time_this_iter_s": 33.39047908782959, "time_total_s": 20953.563901662827, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20953.563901662827, "timesteps_since_restore": 6310400, "iterations_since_restore": 493, "perf": {"cpu_util_percent": 34.62978723404255, "ram_util_percent": 59.10212765957445}}
+{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 617.75, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.875}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.35, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.12, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.53, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.51, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.61, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.32, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.5, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.07, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.41, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.99, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.66, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.65, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.5, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.07, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.5, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.07, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 567.0, 639.0, 630.0, 639.0, 587.0, 627.0, 639.0, 636.0, 639.0, 630.0, 636.0, 639.0, 636.0, 633.0, 639.0, 564.0, 636.0, 636.0, 633.0, 587.0, 636.0, 636.0, 633.0, 582.0, 630.0, 636.0, 633.0, 582.0, 639.0, 636.0, 633.0, 624.0, 630.0, 639.0, 639.0, 573.0, 570.0, 582.0, 636.0, 627.0, 636.0, 630.0, 633.0, 636.0, 630.0, 576.0, 587.0, 579.0, 633.0, 636.0, 615.0, 633.0, 630.0, 633.0, 573.0, 579.0, 633.0, 582.0, 636.0, 525.0, 621.0, 627.0, 587.0, 630.0, 639.0, 630.0, 579.0, 639.0, 630.0, 639.0, 627.0, 636.0, 582.0, 630.0, 593.0, 582.0, 627.0, 630.0, 530.0, 639.0, 636.0, 636.0, 519.0, 627.0, 636.0, 636.0, 633.0, 630.0, 627.0, 581.0, 636.0, 633.0, 633.0, 633.0, 633.0, 633.0, 582.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 284.0, 283.0, 319.0, 320.0, 316.0, 314.0, 322.0, 317.0, 294.0, 293.0, 313.0, 314.0, 319.0, 320.0, 314.0, 322.0, 319.0, 320.0, 314.0, 316.0, 319.0, 317.0, 322.0, 317.0, 309.0, 327.0, 316.0, 317.0, 319.0, 320.0, 275.0, 289.0, 319.0, 317.0, 321.0, 315.0, 316.0, 317.0, 286.0, 301.0, 319.0, 317.0, 319.0, 317.0, 319.0, 314.0, 296.0, 286.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 288.0, 294.0, 317.0, 322.0, 316.0, 320.0, 319.0, 314.0, 313.0, 311.0, 311.0, 319.0, 322.0, 317.0, 314.0, 325.0, 285.0, 288.0, 293.0, 277.0, 290.0, 292.0, 317.0, 319.0, 314.0, 313.0, 311.0, 325.0, 313.0, 317.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 296.0, 280.0, 293.0, 294.0, 293.0, 286.0, 316.0, 317.0, 314.0, 322.0, 307.0, 308.0, 314.0, 319.0, 316.0, 314.0, 319.0, 314.0, 282.0, 291.0, 288.0, 291.0, 319.0, 314.0, 291.0, 291.0, 316.0, 320.0, 263.0, 262.0, 306.0, 315.0, 311.0, 316.0, 293.0, 294.0, 311.0, 319.0, 319.0, 320.0, 311.0, 319.0, 280.0, 299.0, 322.0, 317.0, 311.0, 319.0, 314.0, 325.0, 308.0, 319.0, 316.0, 320.0, 291.0, 291.0, 311.0, 319.0, 291.0, 302.0, 296.0, 286.0, 316.0, 311.0, 318.0, 312.0, 253.0, 277.0, 322.0, 317.0, 319.0, 317.0, 319.0, 317.0, 264.0, 255.0, 314.0, 313.0, 319.0, 317.0, 319.0, 317.0, 315.0, 318.0, 311.0, 319.0, 316.0, 311.0, 295.0, 286.0, 319.0, 317.0, 316.0, 317.0, 317.0, 316.0, 319.0, 314.0, 319.0, 314.0, 321.0, 312.0, 293.0, 289.0, 311.0, 325.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7579059857165342, "mean_processing_ms": 0.22029379571210359, "mean_inference_ms": 1.3605505316111535}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11856000, "num_steps_sampled": 6323200, "sample_time_ms": 21716.733, "load_time_ms": 39.394, "grad_time_ms": 11172.934, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015474725514650345, "policy_loss": -0.005591364111751318, "vf_loss": 76.93659210205078, "vf_explained_var": 0.7724745869636536, "kl": 0.002216791734099388, "entropy": 1.1096433401107788, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6323200, "episodes_total": 15808, "training_iteration": 494, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-02-58", "timestamp": 1660262578, "time_this_iter_s": 32.0608389377594, "time_total_s": 20985.624740600586, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20985.624740600586, "timesteps_since_restore": 6323200, "iterations_since_restore": 494, "perf": {"cpu_util_percent": 31.900000000000002, "ram_util_percent": 59.13260869565216}}
+{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 615.83, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 307.915}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 189.03, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.02, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.42, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.54, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.45, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.04, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.49, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.95, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.66, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.45, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.04, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.45, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.04, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 579.0, 636.0, 636.0, 636.0, 627.0, 636.0, 579.0, 639.0, 633.0, 639.0, 576.0, 630.0, 573.0, 630.0, 636.0, 630.0, 519.0, 633.0, 633.0, 636.0, 636.0, 630.0, 639.0, 630.0, 639.0, 587.0, 584.0, 180.0, 630.0, 630.0, 627.0, 630.0, 639.0, 630.0, 579.0, 639.0, 630.0, 639.0, 627.0, 636.0, 582.0, 630.0, 593.0, 582.0, 627.0, 630.0, 530.0, 639.0, 636.0, 636.0, 519.0, 627.0, 636.0, 636.0, 633.0, 630.0, 627.0, 581.0, 636.0, 633.0, 633.0, 633.0, 633.0, 633.0, 582.0, 636.0, 633.0, 582.0, 567.0, 639.0, 630.0, 639.0, 587.0, 627.0, 639.0, 636.0, 639.0, 630.0, 636.0, 639.0, 636.0, 633.0, 639.0, 564.0, 636.0, 636.0, 633.0, 587.0, 636.0, 636.0, 633.0, 582.0, 630.0, 636.0, 633.0, 582.0, 639.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 311.0, 291.0, 288.0, 319.0, 317.0, 304.0, 332.0, 324.0, 312.0, 321.0, 306.0, 324.0, 312.0, 285.0, 294.0, 322.0, 317.0, 316.0, 317.0, 320.0, 319.0, 288.0, 288.0, 321.0, 309.0, 288.0, 285.0, 311.0, 319.0, 314.0, 322.0, 313.0, 317.0, 256.0, 263.0, 314.0, 319.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 314.0, 316.0, 319.0, 320.0, 319.0, 311.0, 312.0, 327.0, 296.0, 291.0, 296.0, 288.0, 91.0, 89.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0, 311.0, 319.0, 319.0, 320.0, 311.0, 319.0, 280.0, 299.0, 322.0, 317.0, 311.0, 319.0, 314.0, 325.0, 308.0, 319.0, 316.0, 320.0, 291.0, 291.0, 311.0, 319.0, 291.0, 302.0, 296.0, 286.0, 316.0, 311.0, 318.0, 312.0, 253.0, 277.0, 322.0, 317.0, 319.0, 317.0, 319.0, 317.0, 264.0, 255.0, 314.0, 313.0, 319.0, 317.0, 319.0, 317.0, 315.0, 318.0, 311.0, 319.0, 316.0, 311.0, 295.0, 286.0, 319.0, 317.0, 316.0, 317.0, 317.0, 316.0, 319.0, 314.0, 319.0, 314.0, 321.0, 312.0, 293.0, 289.0, 311.0, 325.0, 316.0, 317.0, 288.0, 294.0, 284.0, 283.0, 319.0, 320.0, 316.0, 314.0, 322.0, 317.0, 294.0, 293.0, 313.0, 314.0, 319.0, 320.0, 314.0, 322.0, 319.0, 320.0, 314.0, 316.0, 319.0, 317.0, 322.0, 317.0, 309.0, 327.0, 316.0, 317.0, 319.0, 320.0, 275.0, 289.0, 319.0, 317.0, 321.0, 315.0, 316.0, 317.0, 286.0, 301.0, 319.0, 317.0, 319.0, 317.0, 319.0, 314.0, 296.0, 286.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 288.0, 294.0, 317.0, 322.0, 316.0, 320.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7573280240404361, "mean_processing_ms": 0.22018399055339372, "mean_inference_ms": 1.359923906771843}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11880000, "num_steps_sampled": 6336000, "sample_time_ms": 21753.166, "load_time_ms": 39.587, "grad_time_ms": 11166.874, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002439265139400959, "policy_loss": -0.004945265594869852, "vf_loss": 79.36029815673828, "vf_explained_var": 0.7895925045013428, "kl": 0.0015944234328344464, "entropy": 1.1029914617538452, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6336000, "episodes_total": 15840, "training_iteration": 495, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-03-32", "timestamp": 1660262612, "time_this_iter_s": 34.07752990722656, "time_total_s": 21019.702270507812, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 21019.702270507812, "timesteps_since_restore": 6336000, "iterations_since_restore": 495, "perf": {"cpu_util_percent": 32.84375, "ram_util_percent": 59.13125}}
+{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 614.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 307.225}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 188.85, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.78, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.3, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.06, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.65, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.67, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.3, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.06, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.3, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.06, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 633.0, 636.0, 627.0, 579.0, 630.0, 636.0, 633.0, 582.0, 639.0, 624.0, 630.0, 587.0, 582.0, 582.0, 639.0, 522.0, 639.0, 639.0, 639.0, 636.0, 630.0, 624.0, 639.0, 587.0, 633.0, 636.0, 582.0, 630.0, 579.0, 582.0, 630.0, 633.0, 582.0, 636.0, 633.0, 582.0, 567.0, 639.0, 630.0, 639.0, 587.0, 627.0, 639.0, 636.0, 639.0, 630.0, 636.0, 639.0, 636.0, 633.0, 639.0, 564.0, 636.0, 636.0, 633.0, 587.0, 636.0, 636.0, 633.0, 582.0, 630.0, 636.0, 633.0, 582.0, 639.0, 636.0, 633.0, 630.0, 579.0, 636.0, 636.0, 636.0, 627.0, 636.0, 579.0, 639.0, 633.0, 639.0, 576.0, 630.0, 573.0, 630.0, 636.0, 630.0, 519.0, 633.0, 633.0, 636.0, 636.0, 630.0, 639.0, 630.0, 639.0, 587.0, 584.0, 180.0, 630.0, 630.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 294.0, 314.0, 319.0, 319.0, 317.0, 318.0, 309.0, 294.0, 285.0, 311.0, 319.0, 319.0, 317.0, 314.0, 319.0, 290.0, 292.0, 319.0, 320.0, 307.0, 317.0, 318.0, 312.0, 296.0, 291.0, 291.0, 291.0, 296.0, 286.0, 319.0, 320.0, 260.0, 262.0, 316.0, 323.0, 319.0, 320.0, 319.0, 320.0, 319.0, 317.0, 316.0, 314.0, 315.0, 309.0, 322.0, 317.0, 296.0, 291.0, 316.0, 317.0, 324.0, 312.0, 294.0, 288.0, 316.0, 314.0, 282.0, 297.0, 288.0, 294.0, 316.0, 314.0, 321.0, 312.0, 293.0, 289.0, 311.0, 325.0, 316.0, 317.0, 288.0, 294.0, 284.0, 283.0, 319.0, 320.0, 316.0, 314.0, 322.0, 317.0, 294.0, 293.0, 313.0, 314.0, 319.0, 320.0, 314.0, 322.0, 319.0, 320.0, 314.0, 316.0, 319.0, 317.0, 322.0, 317.0, 309.0, 327.0, 316.0, 317.0, 319.0, 320.0, 275.0, 289.0, 319.0, 317.0, 321.0, 315.0, 316.0, 317.0, 286.0, 301.0, 319.0, 317.0, 319.0, 317.0, 319.0, 314.0, 296.0, 286.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 288.0, 294.0, 317.0, 322.0, 316.0, 320.0, 319.0, 314.0, 319.0, 311.0, 291.0, 288.0, 319.0, 317.0, 304.0, 332.0, 324.0, 312.0, 321.0, 306.0, 324.0, 312.0, 285.0, 294.0, 322.0, 317.0, 316.0, 317.0, 320.0, 319.0, 288.0, 288.0, 321.0, 309.0, 288.0, 285.0, 311.0, 319.0, 314.0, 322.0, 313.0, 317.0, 256.0, 263.0, 314.0, 319.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 314.0, 316.0, 319.0, 320.0, 319.0, 311.0, 312.0, 327.0, 296.0, 291.0, 296.0, 288.0, 91.0, 89.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7567700204261155, "mean_processing_ms": 0.22008117197400467, "mean_inference_ms": 1.3596748332774586}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11904000, "num_steps_sampled": 6348800, "sample_time_ms": 22347.681, "load_time_ms": 39.524, "grad_time_ms": 11120.722, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003682489274069667, "policy_loss": -0.0037913068663328886, "vf_loss": 80.29010772705078, "vf_explained_var": 0.7666907906532288, "kl": 0.0018009584164246917, "entropy": 1.1104191541671753, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6348800, "episodes_total": 15872, "training_iteration": 496, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-04-10", "timestamp": 1660262650, "time_this_iter_s": 38.697832107543945, "time_total_s": 21058.400102615356, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 21058.400102615356, "timesteps_since_restore": 6348800, "iterations_since_restore": 496, "perf": {"cpu_util_percent": 33.66909090909091, "ram_util_percent": 59.103636363636355}}
+{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 612.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 306.34}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 188.28, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.26, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.26, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.32, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.84, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 16.32, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.84, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.32, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.84, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 636.0, 633.0, 636.0, 633.0, 630.0, 630.0, 627.0, 582.0, 630.0, 630.0, 633.0, 636.0, 579.0, 587.0, 633.0, 582.0, 582.0, 636.0, 579.0, 633.0, 633.0, 630.0, 630.0, 582.0, 630.0, 639.0, 579.0, 587.0, 582.0, 639.0, 582.0, 639.0, 636.0, 633.0, 630.0, 579.0, 636.0, 636.0, 636.0, 627.0, 636.0, 579.0, 639.0, 633.0, 639.0, 576.0, 630.0, 573.0, 630.0, 636.0, 630.0, 519.0, 633.0, 633.0, 636.0, 636.0, 630.0, 639.0, 630.0, 639.0, 587.0, 584.0, 180.0, 630.0, 630.0, 627.0, 587.0, 633.0, 636.0, 627.0, 579.0, 630.0, 636.0, 633.0, 582.0, 639.0, 624.0, 630.0, 587.0, 582.0, 582.0, 639.0, 522.0, 639.0, 639.0, 639.0, 636.0, 630.0, 624.0, 639.0, 587.0, 633.0, 636.0, 582.0, 630.0, 579.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 316.0, 317.0, 314.0, 322.0, 316.0, 317.0, 316.0, 314.0, 316.0, 314.0, 311.0, 316.0, 291.0, 291.0, 316.0, 314.0, 316.0, 314.0, 316.0, 317.0, 312.0, 324.0, 288.0, 291.0, 290.0, 297.0, 314.0, 319.0, 291.0, 291.0, 293.0, 289.0, 319.0, 317.0, 291.0, 288.0, 314.0, 319.0, 319.0, 314.0, 316.0, 314.0, 313.0, 317.0, 293.0, 289.0, 321.0, 309.0, 317.0, 322.0, 293.0, 286.0, 291.0, 296.0, 290.0, 292.0, 322.0, 317.0, 288.0, 294.0, 317.0, 322.0, 316.0, 320.0, 319.0, 314.0, 319.0, 311.0, 291.0, 288.0, 319.0, 317.0, 304.0, 332.0, 324.0, 312.0, 321.0, 306.0, 324.0, 312.0, 285.0, 294.0, 322.0, 317.0, 316.0, 317.0, 320.0, 319.0, 288.0, 288.0, 321.0, 309.0, 288.0, 285.0, 311.0, 319.0, 314.0, 322.0, 313.0, 317.0, 256.0, 263.0, 314.0, 319.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 314.0, 316.0, 319.0, 320.0, 319.0, 311.0, 312.0, 327.0, 296.0, 291.0, 296.0, 288.0, 91.0, 89.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0, 293.0, 294.0, 314.0, 319.0, 319.0, 317.0, 318.0, 309.0, 294.0, 285.0, 311.0, 319.0, 319.0, 317.0, 314.0, 319.0, 290.0, 292.0, 319.0, 320.0, 307.0, 317.0, 318.0, 312.0, 296.0, 291.0, 291.0, 291.0, 296.0, 286.0, 319.0, 320.0, 260.0, 262.0, 316.0, 323.0, 319.0, 320.0, 319.0, 320.0, 319.0, 317.0, 316.0, 314.0, 315.0, 309.0, 322.0, 317.0, 296.0, 291.0, 316.0, 317.0, 324.0, 312.0, 294.0, 288.0, 316.0, 314.0, 282.0, 297.0, 288.0, 294.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7562172148453271, "mean_processing_ms": 0.21998071761796992, "mean_inference_ms": 1.359487549617304}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11928000, "num_steps_sampled": 6361600, "sample_time_ms": 22295.567, "load_time_ms": 39.346, "grad_time_ms": 11042.076, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008341053617186844, "policy_loss": -0.007899199612438679, "vf_loss": 76.18367767333984, "vf_explained_var": 0.7670480608940125, "kl": 0.0021500647999346256, "entropy": 1.106536865234375, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6361600, "episodes_total": 15904, "training_iteration": 497, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-04-44", "timestamp": 1660262684, "time_this_iter_s": 33.68950605392456, "time_total_s": 21092.08960866928, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 21092.08960866928, "timesteps_since_restore": 6361600, "iterations_since_restore": 497, "perf": {"cpu_util_percent": 35.32553191489361, "ram_util_percent": 59.131914893617}}
+{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 612.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 306.32}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 188.24, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.69, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.63, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.23, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.91, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.7, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 16.23, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.91, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.23, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.91, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 587.0, 630.0, 624.0, 582.0, 636.0, 582.0, 579.0, 636.0, 582.0, 636.0, 636.0, 636.0, 633.0, 630.0, 587.0, 630.0, 587.0, 582.0, 582.0, 630.0, 633.0, 630.0, 624.0, 639.0, 630.0, 630.0, 636.0, 633.0, 639.0, 624.0, 636.0, 180.0, 630.0, 630.0, 627.0, 587.0, 633.0, 636.0, 627.0, 579.0, 630.0, 636.0, 633.0, 582.0, 639.0, 624.0, 630.0, 587.0, 582.0, 582.0, 639.0, 522.0, 639.0, 639.0, 639.0, 636.0, 630.0, 624.0, 639.0, 587.0, 633.0, 636.0, 582.0, 630.0, 579.0, 582.0, 630.0, 636.0, 633.0, 636.0, 633.0, 636.0, 633.0, 630.0, 630.0, 627.0, 582.0, 630.0, 630.0, 633.0, 636.0, 579.0, 587.0, 633.0, 582.0, 582.0, 636.0, 579.0, 633.0, 633.0, 630.0, 630.0, 582.0, 630.0, 639.0, 579.0, 587.0, 582.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 296.0, 291.0, 316.0, 314.0, 308.0, 316.0, 288.0, 294.0, 317.0, 319.0, 291.0, 291.0, 290.0, 289.0, 324.0, 312.0, 288.0, 294.0, 314.0, 322.0, 319.0, 317.0, 319.0, 317.0, 321.0, 312.0, 311.0, 319.0, 290.0, 297.0, 316.0, 314.0, 293.0, 294.0, 288.0, 294.0, 296.0, 286.0, 311.0, 319.0, 316.0, 317.0, 316.0, 314.0, 324.0, 300.0, 322.0, 317.0, 311.0, 319.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 319.0, 320.0, 310.0, 314.0, 314.0, 322.0, 91.0, 89.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0, 293.0, 294.0, 314.0, 319.0, 319.0, 317.0, 318.0, 309.0, 294.0, 285.0, 311.0, 319.0, 319.0, 317.0, 314.0, 319.0, 290.0, 292.0, 319.0, 320.0, 307.0, 317.0, 318.0, 312.0, 296.0, 291.0, 291.0, 291.0, 296.0, 286.0, 319.0, 320.0, 260.0, 262.0, 316.0, 323.0, 319.0, 320.0, 319.0, 320.0, 319.0, 317.0, 316.0, 314.0, 315.0, 309.0, 322.0, 317.0, 296.0, 291.0, 316.0, 317.0, 324.0, 312.0, 294.0, 288.0, 316.0, 314.0, 282.0, 297.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 316.0, 317.0, 314.0, 322.0, 316.0, 317.0, 316.0, 314.0, 316.0, 314.0, 311.0, 316.0, 291.0, 291.0, 316.0, 314.0, 316.0, 314.0, 316.0, 317.0, 312.0, 324.0, 288.0, 291.0, 290.0, 297.0, 314.0, 319.0, 291.0, 291.0, 293.0, 289.0, 319.0, 317.0, 291.0, 288.0, 314.0, 319.0, 319.0, 314.0, 316.0, 314.0, 313.0, 317.0, 293.0, 289.0, 321.0, 309.0, 317.0, 322.0, 293.0, 286.0, 291.0, 296.0, 290.0, 292.0, 322.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7556558584426459, "mean_processing_ms": 0.21987743266667745, "mean_inference_ms": 1.3592501447797594}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11952000, "num_steps_sampled": 6374400, "sample_time_ms": 22219.899, "load_time_ms": 39.602, "grad_time_ms": 11084.562, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010325837647542357, "policy_loss": -0.007863701321184635, "vf_loss": 73.90652465820312, "vf_explained_var": 0.7726984024047852, "kl": 0.0017767212120816112, "entropy": 1.1190696954727173, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6374400, "episodes_total": 15936, "training_iteration": 498, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-05-16", "timestamp": 1660262716, "time_this_iter_s": 31.8541898727417, "time_total_s": 21123.943798542023, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 21123.943798542023, "timesteps_since_restore": 6374400, "iterations_since_restore": 498, "perf": {"cpu_util_percent": 35.686666666666675, "ram_util_percent": 59.20666666666665}}
+{"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 613.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 221.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.735}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.27, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.05, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.53, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 14, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 14, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.85, "potting_onion_agent_1_min": 16, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.37, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.85, "optimal_onion_potting_agent_1_min": 16, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.85, "viable_onion_potting_agent_1_min": 16, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 578.0, 587.0, 570.0, 587.0, 582.0, 587.0, 582.0, 573.0, 636.0, 630.0, 636.0, 633.0, 600.0, 633.0, 444.0, 630.0, 636.0, 636.0, 633.0, 636.0, 630.0, 582.0, 584.0, 639.0, 633.0, 630.0, 587.0, 587.0, 582.0, 633.0, 630.0, 630.0, 579.0, 582.0, 630.0, 636.0, 633.0, 636.0, 633.0, 636.0, 633.0, 630.0, 630.0, 627.0, 582.0, 630.0, 630.0, 633.0, 636.0, 579.0, 587.0, 633.0, 582.0, 582.0, 636.0, 579.0, 633.0, 633.0, 630.0, 630.0, 582.0, 630.0, 639.0, 579.0, 587.0, 582.0, 639.0, 636.0, 587.0, 630.0, 624.0, 582.0, 636.0, 582.0, 579.0, 636.0, 582.0, 636.0, 636.0, 636.0, 633.0, 630.0, 587.0, 630.0, 587.0, 582.0, 582.0, 630.0, 633.0, 630.0, 624.0, 639.0, 630.0, 630.0, 636.0, 633.0, 639.0, 624.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 290.0, 288.0, 288.0, 299.0, 285.0, 285.0, 296.0, 291.0, 291.0, 291.0, 288.0, 299.0, 291.0, 291.0, 285.0, 288.0, 324.0, 312.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 301.0, 299.0, 311.0, 322.0, 221.0, 223.0, 308.0, 322.0, 319.0, 317.0, 317.0, 319.0, 319.0, 314.0, 309.0, 327.0, 314.0, 316.0, 286.0, 296.0, 293.0, 291.0, 324.0, 315.0, 319.0, 314.0, 314.0, 316.0, 296.0, 291.0, 293.0, 294.0, 296.0, 286.0, 314.0, 319.0, 316.0, 314.0, 316.0, 314.0, 282.0, 297.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 316.0, 317.0, 314.0, 322.0, 316.0, 317.0, 316.0, 314.0, 316.0, 314.0, 311.0, 316.0, 291.0, 291.0, 316.0, 314.0, 316.0, 314.0, 316.0, 317.0, 312.0, 324.0, 288.0, 291.0, 290.0, 297.0, 314.0, 319.0, 291.0, 291.0, 293.0, 289.0, 319.0, 317.0, 291.0, 288.0, 314.0, 319.0, 319.0, 314.0, 316.0, 314.0, 313.0, 317.0, 293.0, 289.0, 321.0, 309.0, 317.0, 322.0, 293.0, 286.0, 291.0, 296.0, 290.0, 292.0, 322.0, 317.0, 314.0, 322.0, 296.0, 291.0, 316.0, 314.0, 308.0, 316.0, 288.0, 294.0, 317.0, 319.0, 291.0, 291.0, 290.0, 289.0, 324.0, 312.0, 288.0, 294.0, 314.0, 322.0, 319.0, 317.0, 319.0, 317.0, 321.0, 312.0, 311.0, 319.0, 290.0, 297.0, 316.0, 314.0, 293.0, 294.0, 288.0, 294.0, 296.0, 286.0, 311.0, 319.0, 316.0, 317.0, 316.0, 314.0, 324.0, 300.0, 322.0, 317.0, 311.0, 319.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 319.0, 320.0, 310.0, 314.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7550828949863245, "mean_processing_ms": 0.21976950142528295, "mean_inference_ms": 1.358698286827804}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11976000, "num_steps_sampled": 6387200, "sample_time_ms": 22150.153, "load_time_ms": 39.223, "grad_time_ms": 10918.039, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016517750918865204, "policy_loss": -0.00551184406504035, "vf_loss": 77.22379302978516, "vf_explained_var": 0.7703518271446228, "kl": 0.002163690747693181, "entropy": 1.1175265312194824, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6387200, "episodes_total": 15968, "training_iteration": 499, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-05-48", "timestamp": 1660262748, "time_this_iter_s": 31.469547986984253, "time_total_s": 21155.413346529007, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 21155.413346529007, "timesteps_since_restore": 6387200, "iterations_since_restore": 499, "perf": {"cpu_util_percent": 36.49555555555556, "ram_util_percent": 59.12888888888887}}
+{"episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 605.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 302.84}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 186.08, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.84, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.11, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.05, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 14, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 14, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.21, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.69, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.45, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.81, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.21, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.69, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.21, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.69, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 639.0, 582.0, 636.0, 633.0, 66.0, 576.0, 630.0, 636.0, 582.0, 582.0, 627.0, 636.0, 639.0, 627.0, 421.0, 587.0, 582.0, 633.0, 627.0, 636.0, 582.0, 627.0, 630.0, 630.0, 633.0, 633.0, 636.0, 582.0, 633.0, 570.0, 579.0, 587.0, 582.0, 639.0, 636.0, 587.0, 630.0, 624.0, 582.0, 636.0, 582.0, 579.0, 636.0, 582.0, 636.0, 636.0, 636.0, 633.0, 630.0, 587.0, 630.0, 587.0, 582.0, 582.0, 630.0, 633.0, 630.0, 624.0, 639.0, 630.0, 630.0, 636.0, 633.0, 639.0, 624.0, 636.0, 636.0, 578.0, 587.0, 570.0, 587.0, 582.0, 587.0, 582.0, 573.0, 636.0, 630.0, 636.0, 633.0, 600.0, 633.0, 444.0, 630.0, 636.0, 636.0, 633.0, 636.0, 630.0, 582.0, 584.0, 639.0, 633.0, 630.0, 587.0, 587.0, 582.0, 633.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 325.0, 314.0, 319.0, 317.0, 322.0, 290.0, 292.0, 319.0, 317.0, 312.0, 321.0, 34.0, 32.0, 288.0, 288.0, 316.0, 314.0, 322.0, 314.0, 293.0, 289.0, 293.0, 289.0, 316.0, 311.0, 316.0, 320.0, 317.0, 322.0, 304.0, 323.0, 202.0, 219.0, 294.0, 293.0, 288.0, 294.0, 319.0, 314.0, 322.0, 305.0, 319.0, 317.0, 291.0, 291.0, 308.0, 319.0, 314.0, 316.0, 311.0, 319.0, 316.0, 317.0, 316.0, 317.0, 319.0, 317.0, 288.0, 294.0, 316.0, 317.0, 280.0, 290.0, 293.0, 286.0, 291.0, 296.0, 290.0, 292.0, 322.0, 317.0, 314.0, 322.0, 296.0, 291.0, 316.0, 314.0, 308.0, 316.0, 288.0, 294.0, 317.0, 319.0, 291.0, 291.0, 290.0, 289.0, 324.0, 312.0, 288.0, 294.0, 314.0, 322.0, 319.0, 317.0, 319.0, 317.0, 321.0, 312.0, 311.0, 319.0, 290.0, 297.0, 316.0, 314.0, 293.0, 294.0, 288.0, 294.0, 296.0, 286.0, 311.0, 319.0, 316.0, 317.0, 316.0, 314.0, 324.0, 300.0, 322.0, 317.0, 311.0, 319.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 319.0, 320.0, 310.0, 314.0, 314.0, 322.0, 319.0, 317.0, 290.0, 288.0, 288.0, 299.0, 285.0, 285.0, 296.0, 291.0, 291.0, 291.0, 288.0, 299.0, 291.0, 291.0, 285.0, 288.0, 324.0, 312.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 301.0, 299.0, 311.0, 322.0, 221.0, 223.0, 308.0, 322.0, 319.0, 317.0, 317.0, 319.0, 319.0, 314.0, 309.0, 327.0, 314.0, 316.0, 286.0, 296.0, 293.0, 291.0, 324.0, 315.0, 319.0, 314.0, 314.0, 316.0, 296.0, 291.0, 293.0, 294.0, 296.0, 286.0, 314.0, 319.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7545081908133792, "mean_processing_ms": 0.21966006909098515, "mean_inference_ms": 1.3581228643420677}, "off_policy_estimator": {}, "info": {"num_steps_trained": 12000000, "num_steps_sampled": 6400000, "sample_time_ms": 22245.607, "load_time_ms": 39.734, "grad_time_ms": 10935.257, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0013599375961348414, "policy_loss": -0.008364356122910976, "vf_loss": 75.6484375, "vf_explained_var": 0.7971202731132507, "kl": 0.002114498522132635, "entropy": 1.1208560466766357, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6400000, "episodes_total": 16000, "training_iteration": 500, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-06-22", "timestamp": 1660262782, "time_this_iter_s": 34.34099221229553, "time_total_s": 21189.754338741302, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 21189.754338741302, "timesteps_since_restore": 6400000, "iterations_since_restore": 500, "perf": {"cpu_util_percent": 34.074999999999996, "ram_util_percent": 59.19375}}

From e8ecd6990fa7a3b3d18c81f1116c18bf19cd3027 Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein
 <49325191+alexlichtenstein@users.noreply.github.com>
Date: Wed, 24 Aug 2022 01:15:38 +0200
Subject: [PATCH 13/38] training script for 5 classic layouts

---
 human_aware_rl/ppo/run_experiments.sh | 37 +++++++++++++++++++--------
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/human_aware_rl/ppo/run_experiments.sh b/human_aware_rl/ppo/run_experiments.sh
index 46a59b9f..0c6532a6 100755
--- a/human_aware_rl/ppo/run_experiments.sh
+++ b/human_aware_rl/ppo/run_experiments.sh
@@ -1,11 +1,28 @@
 #!/usr/bin/env bash
-python ppo_rllib_client.py with seeds="[2229]" lr=5e-4 reward_shaping_horizon=2.5e6 vf_loss_coeff=1e-4 num_training_iters=500 layout_name="cramped_room" experiment_name="ppo_sp_higher_S_hor_S_final_1e-2" entropy_coeff_horizon=3e6 entropy_coeff_end=1e-2
-python ppo_rllib_client.py with seeds="[2229]" lr=5e-4 reward_shaping_horizon=2.5e6 vf_loss_coeff=1e-4 num_training_iters=500 layout_name="cramped_room" experiment_name="ppo_sp_higher_S_hor_S_final_1e-3" entropy_coeff_horizon=3e6 entropy_coeff_end=1e-3
-python ppo_rllib_client.py with seeds="[2229]" lr=5e-4 reward_shaping_horizon=2.5e6 vf_loss_coeff=1e-4 num_training_iters=500 layout_name="cramped_room" experiment_name="ppo_sp_higher_S_hor_S_final_1e-4" entropy_coeff_horizon=3e6 entropy_coeff_end=1e-4
-python ppo_rllib_client.py with seeds="[2229]" lr=5e-4 reward_shaping_horizon=2.5e6 vf_loss_coeff=1e-4 num_training_iters=500 layout_name="cramped_room" experiment_name="ppo_sp_S_final_1e-2" entropy_coeff_end=1e-2
-python ppo_rllib_client.py with seeds="[2229]" lr=5e-4 reward_shaping_horizon=2.5e6 vf_loss_coeff=1e-4 num_training_iters=500 layout_name="cramped_room" experiment_name="ppo_sp_S_final_1e-3" entropy_coeff_end=1e-3
-python ppo_rllib_client.py with seeds="[2229]" lr=5e-4 reward_shaping_horizon=2.5e6 vf_loss_coeff=1e-4 num_training_iters=500 layout_name="cramped_room" experiment_name="ppo_sp_S_final_1e-4" entropy_coeff_end=1e-4
-# python ppo_rllib_client.py with temp_dir=/tmp/nathan_ray seeds="[2229, 7649, 7225, 9807, 386]" lr=6e-4 reward_shaping_horizon=3.5e6 vf_loss_coeff=1e-4 num_training_iters=833 layout_name="coordination_ring" experiment_name="ppo_sp_coord_ring" 
-# python ppo_rllib_client.py with temp_dir=/tmp/nathan_ray seeds="[2229, 7649, 7225, 9807, 386]" lr=8e-4 reward_shaping_horizon=2.5e6 vf_loss_coeff=1e-4 num_training_iters=667 layout_name="counter_circuit" experiment_name="ppo_sp_counter_circuit"
-# python ppo_rllib_client.py -tmp /tmp/nathan_ray -s 2229 7649 7225 9807 386 -lr 8e-4 -r 2.5e6 --gpus 1 -vf 0.5 -n 625 -l "forced_coordination"   -en "ppo_sp_forced_coord"
-# python ppo_rllib_client.py -tmp /tmp/nathan_ray -s 2229 7649 7225 9807 386 -lr 1e-3 -r 2.5e6 --gpus 1 -vf 0.5 -n 583 -l "asymmetric_advantages"   -en "ppo_sp_asymm_advs"
\ No newline at end of file
+# This file contains the script to generate the baseline ppo self-play agents for the 5 classic layouts
+
+# Please check if your computer has enough power for 16x parallelization, otherwise change the num_workers parameter
+python ppo_rllib_client.py with  seeds=[11] layout_name="cramped_room" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
+python ppo_rllib_client.py with  seeds=[21] layout_name="cramped_room" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
+python ppo_rllib_client.py with  seeds=[31] layout_name="cramped_room" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
+python ppo_rllib_client.py with  seeds=[41] layout_name="cramped_room" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
+
+python ppo_rllib_client.py with  seeds=[11] layout_name="asymmetric_advantages" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
+python ppo_rllib_client.py with  seeds=[21] layout_name="asymmetric_advantages" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
+python ppo_rllib_client.py with  seeds=[31] layout_name="asymmetric_advantages" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
+python ppo_rllib_client.py with  seeds=[41] layout_name="asymmetric_advantages" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
+
+python ppo_rllib_client.py with  seeds=[11] layout_name="coordination_ring" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
+python ppo_rllib_client.py with  seeds=[21] layout_name="coordination_ring" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
+python ppo_rllib_client.py with  seeds=[31] layout_name="coordination_ring" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
+python ppo_rllib_client.py with  seeds=[41] layout_name="coordination_ring" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
+
+python ppo_rllib_client.py with  seeds=[11] layout_name="forced_coordination" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
+python ppo_rllib_client.py with  seeds=[21] layout_name="forced_coordination" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
+python ppo_rllib_client.py with  seeds=[31] layout_name="forced_coordination" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
+python ppo_rllib_client.py with  seeds=[41] layout_name="forced_coordination" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
+
+python ppo_rllib_client.py with  seeds=[11] layout_name="counter_circuit_o_1order" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
+python ppo_rllib_client.py with  seeds=[21] layout_name="counter_circuit_o_1order" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
+python ppo_rllib_client.py with  seeds=[31] layout_name="counter_circuit_o_1order" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
+python ppo_rllib_client.py with  seeds=[41] layout_name="counter_circuit_o_1order" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
\ No newline at end of file

From a4e759a389c3dd090069d6928f7c47b9c4bfd555 Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein
 <49325191+alexlichtenstein@users.noreply.github.com>
Date: Wed, 24 Aug 2022 01:30:38 +0200
Subject: [PATCH 14/38] update on readme

---
 README.md | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/README.md b/README.md
index 0329aee2..c2ba5f0d 100644
--- a/README.md
+++ b/README.md
@@ -111,6 +111,18 @@ You should see all tests passing.
 
 Note: the tests are broken up into separate files because they rely on different tensorflow execution states (i.e. the bc tests run tf in eager mode, while rllib requires tensorflow to be running symbollically). Going forward, it would probably be best to standardize the tensorflow execution state, or re-write the code such that it is robust to execution state.
 
+## Training examples
+
+You can also try to train agents for 5 classical layouts
+
+```bash
+(harl_rllib) $ cd ppo
+(harl_rllib) ppo $ bash run_experiments.sh
+```
+
+Please select the correct `num_workers` parameter, otherwise the training of the agents wouldn't be able to start.
+
+
 # Repo Structure Overview
 
 `ppo/`:
@@ -118,6 +130,9 @@ Note: the tests are broken up into separate files because they rely on different
 - `ppo_rllib_client.py` Driver code for configuing and launching the training of an agent. More details about usage below
 - `ppo_rllib_from_params_client.py`: train one agent with PPO in Overcooked with variable-MDPs 
 - `ppo_rllib_test.py` Reproducibility tests for local sanity checks
+- `run_experiments.sh` Script for training agents on 5 classical layouts
+- `trained_example/` Pretrained model for testing purposes
+
 
 `rllib/`:
 - `rllib.py`: rllib agent and training utils that utilize Overcooked APIs

From 6d145c3b3abfb77c517da61b3412c44ac69ea425 Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein
 <49325191+alexlichtenstein@users.noreply.github.com>
Date: Fri, 2 Sep 2022 19:36:03 +0200
Subject: [PATCH 15/38] plotting, shifting function to utils, changes to README

---
 .../ppo/plot_example_experiments.py           | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 human_aware_rl/ppo/plot_example_experiments.py

diff --git a/human_aware_rl/ppo/plot_example_experiments.py b/human_aware_rl/ppo/plot_example_experiments.py
new file mode 100644
index 00000000..7ad63e54
--- /dev/null
+++ b/human_aware_rl/ppo/plot_example_experiments.py
@@ -0,0 +1,54 @@
+from human_aware_rl.utils import set_style
+from human_aware_rl.utils import *
+import os
+import numpy as np
+import re
+import matplotlib.pyplot as plt
+import matplotlib
+
+envs = ['cramped_room', 'forced_coordination', 'counter_circuit_o_1', 'coordination_ring', 'asymmetric_advantages']
+
+def get_list_experiments(path):
+    result = {}
+    subdirs = [name for name in os.listdir(path)
+            if os.path.isdir(os.path.join(path, name))]
+    for env in envs:
+        result[env] = {'files': [path+'/'+x for x in subdirs if re.search(env, x)]}
+    return result
+
+def get_statistics(dict):
+    for env in dict:
+        rewards = [get_last_episode_rewards(file+'/result.json')['episode_reward_mean'] for file in dict[env]['files']]
+        dict[env]['rewards'] = rewards
+        dict[env]['std'] = np.std(rewards)
+        dict[env]['mean'] = np.mean(rewards)
+    return dict
+
+def plot_statistics(dict):
+    names = []
+    stds = []
+    means = []
+    for env in dict:
+        names.append(env)
+        stds.append(dict[env]['std'])
+        means.append(dict[env]['mean'])
+
+    x_pos = np.arange(len(names))
+    matplotlib.rc('xtick', labelsize=7)
+    fig, ax = plt.subplots()
+    ax.bar(x_pos, means, yerr=stds, align='center', alpha=0.5, ecolor='black', capsize=10)
+    ax.set_ylabel('Average reward per episode')
+    ax.set_xticks(x_pos)
+    ax.set_xticklabels(names)
+    ax.yaxis.grid(True)
+
+    # Save the figure and show
+    plt.tight_layout()
+    plt.savefig('rewards.png')
+    plt.show()
+
+if __name__ == "__main__":
+    experiments = get_list_experiments('results')
+    experiments_results = get_statistics(experiments)
+    print(experiments_results['cramped_room']['std'])
+    plot_statistics(experiments_results)

From f8734cb2a6e3b64394ec8027c7951b769aeb9995 Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein
 <49325191+alexlichtenstein@users.noreply.github.com>
Date: Fri, 2 Sep 2022 23:13:01 +0200
Subject: [PATCH 16/38] readme, utils

---
 README.md                             |  4 ++--
 human_aware_rl/ppo/ppo_rllib_test.py  | 18 +++++++++---------
 human_aware_rl/ppo/results/readme.txt |  1 +
 human_aware_rl/ppo/run_experiments.sh |  1 +
 human_aware_rl/utils.py               | 12 +++++++++++-
 5 files changed, 24 insertions(+), 12 deletions(-)
 create mode 100644 human_aware_rl/ppo/results/readme.txt

diff --git a/README.md b/README.md
index c2ba5f0d..a0148c65 100644
--- a/README.md
+++ b/README.md
@@ -120,8 +120,8 @@ You can also try to train agents for 5 classical layouts
 (harl_rllib) ppo $ bash run_experiments.sh
 ```
 
-Please select the correct `num_workers` parameter, otherwise the training of the agents wouldn't be able to start.
-
+Please choose the `num_workers` parameters to be less than the number of CPUs on your machine. For more information
+check the ray [documentation](https://docs.ray.io/en/latest/rllib/rllib-training.html)
 
 # Repo Structure Overview
 
diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py
index bf46d7dd..c41b2326 100644
--- a/human_aware_rl/ppo/ppo_rllib_test.py
+++ b/human_aware_rl/ppo/ppo_rllib_test.py
@@ -10,7 +10,7 @@
 from overcooked_ai_py.agents.benchmarking import AgentEvaluator
 import tensorflow as tf
 import numpy as np
-import json
+from human_aware_rl.utils import get_last_episode_rewards
 
 # Note: using the same seed across architectures can still result in differing values
 def set_global_seed(seed):
@@ -298,14 +298,14 @@ def test_resume_functionality(self):
 
         threshold = 0.1
 
-        with open('trained_example/cramped_room/result.json') as f:
-            j = json.loads(f.readlines()[-1])
-            #Test total reward
-            self.assertAlmostEqual(j['episode_reward_mean'], results['average_total_reward'],
-                                   delta=threshold * j['episode_reward_mean'])
-            #Test sparse reward
-            self.assertAlmostEqual(j['custom_metrics']['sparse_reward_mean'], results['average_sparse_reward'],
-                                   delta=threshold * j['custom_metrics']['sparse_reward_mean'])
+        rewards = get_last_episode_rewards('trained_example/cramped_room/result.json')
+
+        #Test total reward
+        self.assertAlmostEqual(rewards['episode_reward_mean'], results['average_total_reward'],
+                                   delta=threshold * rewards['episode_reward_mean'])
+        #Test sparse reward
+        self.assertAlmostEqual(rewards['sparse_reward_mean'], results['average_sparse_reward'],
+                                   delta=threshold * rewards['sparse_reward_mean'])
 
 def _clear_pickle():
     # Write an empty dictionary to our static "expected" results location
diff --git a/human_aware_rl/ppo/results/readme.txt b/human_aware_rl/ppo/results/readme.txt
new file mode 100644
index 00000000..44d60515
--- /dev/null
+++ b/human_aware_rl/ppo/results/readme.txt
@@ -0,0 +1 @@
+You can download trained examples here - https://drive.google.com/file/d/1vIrZ7CphtRFBdLShO9WbGAl2qsbbJfMB/view?usp=sharing
\ No newline at end of file
diff --git a/human_aware_rl/ppo/run_experiments.sh b/human_aware_rl/ppo/run_experiments.sh
index 0c6532a6..5cac975f 100755
--- a/human_aware_rl/ppo/run_experiments.sh
+++ b/human_aware_rl/ppo/run_experiments.sh
@@ -7,6 +7,7 @@ python ppo_rllib_client.py with  seeds=[21] layout_name="cramped_room" num_worke
 python ppo_rllib_client.py with  seeds=[31] layout_name="cramped_room" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
 python ppo_rllib_client.py with  seeds=[41] layout_name="cramped_room" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
 
+
 python ppo_rllib_client.py with  seeds=[11] layout_name="asymmetric_advantages" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
 python ppo_rllib_client.py with  seeds=[21] layout_name="asymmetric_advantages" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
 python ppo_rllib_client.py with  seeds=[31] layout_name="asymmetric_advantages" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
diff --git a/human_aware_rl/utils.py b/human_aware_rl/utils.py
index 7ab1f4a3..b8cbb7cc 100644
--- a/human_aware_rl/utils.py
+++ b/human_aware_rl/utils.py
@@ -6,6 +6,7 @@
 import itertools
 import numpy as np
 import tensorflow as tf
+import json
 
 
 def delete_dir_if_exists(dir_path, verbose=False):
@@ -166,4 +167,13 @@ def get_dict_stats(d):
             'max': np.max(v),
             'n': len(v)
         }
-    return new_d
\ No newline at end of file
+    return new_d
+
+def get_last_episode_rewards(filename):
+    with open(filename) as f:
+        j = json.loads(f.readlines()[-1])
+        result = {
+            'episode_reward_mean' : j['episode_reward_mean'],
+            'sparse_reward_mean' : j['custom_metrics']['sparse_reward_mean']
+        }
+        return result

From f2e887faed157943b56e657478535aa18f0c6bb9 Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein
 <49325191+alexlichtenstein@users.noreply.github.com>
Date: Thu, 8 Sep 2022 01:06:32 +0200
Subject: [PATCH 17/38] readme + plotting

---
 human_aware_rl/ppo/plot_example_experiments.py |   5 ++---
 human_aware_rl/ppo/results/example_rewards.png | Bin 0 -> 20379 bytes
 human_aware_rl/ppo/run_experiments.sh          |   4 +++-
 3 files changed, 5 insertions(+), 4 deletions(-)
 create mode 100644 human_aware_rl/ppo/results/example_rewards.png

diff --git a/human_aware_rl/ppo/plot_example_experiments.py b/human_aware_rl/ppo/plot_example_experiments.py
index 7ad63e54..eec14b6e 100644
--- a/human_aware_rl/ppo/plot_example_experiments.py
+++ b/human_aware_rl/ppo/plot_example_experiments.py
@@ -18,7 +18,7 @@ def get_list_experiments(path):
 
 def get_statistics(dict):
     for env in dict:
-        rewards = [get_last_episode_rewards(file+'/result.json')['episode_reward_mean'] for file in dict[env]['files']]
+        rewards = [get_last_episode_rewards(file+'/result.json')['sparse_reward_mean'] for file in dict[env]['files']]
         dict[env]['rewards'] = rewards
         dict[env]['std'] = np.std(rewards)
         dict[env]['mean'] = np.mean(rewards)
@@ -44,11 +44,10 @@ def plot_statistics(dict):
 
     # Save the figure and show
     plt.tight_layout()
-    plt.savefig('rewards.png')
+    plt.savefig('example_rewards.png')
     plt.show()
 
 if __name__ == "__main__":
     experiments = get_list_experiments('results')
     experiments_results = get_statistics(experiments)
-    print(experiments_results['cramped_room']['std'])
     plot_statistics(experiments_results)
diff --git a/human_aware_rl/ppo/results/example_rewards.png b/human_aware_rl/ppo/results/example_rewards.png
new file mode 100644
index 0000000000000000000000000000000000000000..90b306e759fe2de87874773c0e991fcd797aa540
GIT binary patch
literal 20379
zcmd_SXH=C}yDq%2ccY>LQlfwg2nYx$3fKS_y`xC60n(8w9W`oH6h%4+D!q5<C1OFk
z6p@ZfFM{;Wxh9Fp`@MVbAA6r~oH5Qgj*-N`TF-jQocDdTd)-r%m)f|FVI74+*+@Ne
zTA4yw=0>5=w69r(p9p?z?!bSJSWBF>Rx#JNw!3JlN0GZ|ZDC?=ZDM%oS6e+xD?@WL
z0Ukjf{sX^WwYIjf65-`F{rd$x=9UJ$oI)%TILKOyGv}-*l+72(KQvL|k%knC!W!!7
z6ROvP23zdUs?~j6{871Oi^R&JwflqCvmdA3M_bNeaYn{e^=Wp1zTe))_VOT3?$or}
zSfR$}hNE(cM>^<^gzE<!39J9Y(7s{K8vcPcmW5-wRK+{a?L969!FRil7=+~2YdR(h
zIGuG&p}%u+4bH<yDI#qb{_Ex`L0LheB+)JV760@07YYr9a^MaHFS)?C;y7OLU^itM
zg>rTS%`X&6;iWZh_|cA2|K(fsWO8UKIQQ+7x_0eaj8;~PiO_jN;njF&#la2F#9ejr
z^Yb}XB8I=&uPAhS^Y-nB%F4vFKz!um#Krj`*X+SpP3!U)X%4b@b7}Zk8x}cSzwTve
zjE`JUu(V8>o0}W34^LX-Hl3N<R9{qdi=1YVveIUiZ)`Zb|K_Rw53df_;_&s`x9uz}
zx|hbMr>2VEzmI&=XJtzcr}(w2JTUU!wtYTloxi>JsQ+#4Ht9Gq11WO5*v*O`@9t|z
zHMr4TVDaVV>!KodYI1UNjN*%vFZwQz@d`(-ceCm$`xtb@%DSU?3$^;0U2F8UZ||aw
z>k~Y~CI*{~=V!*VeCYYF{N7dZ%-Gds;4|MwCJ|NqS}k1Kn|Ay5a1ND-D3f!M=e6+&
zqmy#3sHmt6dYn3WQZ@OKgw&x@{ve($Ew)Xj4uk3S*0X~dr3W`S&;59xJ=y9w-k(^|
zS8;U6i%s;$n{5Gy3_YFc?d<GkzYPqS*bKF5x3;!6W?l=$bqii7q-14h8;x{)V9yuB
zO|N*@yX=Aov#6!0>v)}ppg{$rX=`o+Go_`arE_rbS@#H+MnWx%>)Z?cqa<kjN%hQ`
z-*sH3^+XrvMgtCByjkMS)je#316k<HJJb8&1GzQR&O}E?uUo%9RY5oD!i^ZU*oeZy
z;}7WtBn=G>C+dYv8^fMI|FtgeT#XnvKfm&a4<8y+4HWLZV!^YaKhm=1Ion`s-ua6c
zFG?#YXyD$A4Q8}){`%{2&)N)g-MR$r^H_zA>Bhdnt_${qZ3Vh*PNNUmICk&etugiV
zifi!W$B)gQ(bLnH6c;CnE6B_Hu<7aRa|#I5)Q&HwP;Ogwn)4Nub56`0)uW6}PS#gE
zJL>J_6_%JkBdw&Qq*>sSw`|$6L)V8d%sQPoabo1h52Z-?XVyJcyXz8l3yN*z{rTA|
zYd&fuX!-Zkox++KsjjIR^w5Yq`(mWC)c2CHbep%M_=DH_bayT)pJ$A)>aK{<Ey%~h
zva2)3ZHZBh_VnS^O(-rt^Kh52e4FcHp6&N$!;tRbFGVpLiNUr5pI_{i3+Ol}rNipx
z;^HFWJd<|p*fCk%efTU393Z>X=R6y=yW&~u+_^&ByBPKO08dZP`(7$RmMeybEb-@d
zDwp;q2VVvD&Fk0K#3ts)Jl&@dBl(cQGfXX3{X<bvQU0zYyk=%*mR43dM%l5kv0B>N
z53*KKD9;Zk%yD;RoO`R|isgLyox4(ff;Qg6P4@o!ojkSKHvMUr-mYU3u|9+URn0JK
zQB5~e8*EIMVU>EMnqxai%gA_udjI#|6^|cxV|E@>l6xkiws!4WhNHG|FGE8|H(bX`
zQU7L5Gu-jvDZ{}FZdj6?y}i-x?HA(DCZDuxbIw?pELiM)&*EC^PA}jU5Wp!eE<WB{
zBX#-xqjcV(uU}t=%lI1gRK39RQh9rSJN|h0XWt-4wLQhS{49g#)2BG#xen)ktfphW
z9@D;IUS711Xb$MhU+e29wXo3Xe^5I9A9{;lN_Y$Ir{YXRqKIOnTxNv(dc%qL0vd}P
z8SB1>e-7y1Wk1Oj-twEVW%icBBUWAe4<A0u%*;G8Jgj*3te?x&m({cVy}iBnA3aj9
zi&0aHR%R#9Z=ZZnvD&-j%kOLR9PP4la$>O_9VQwKyuH2Es$=n3+P#8!D6N0hw>0Nj
zThpdja!f~UYx5>sV?`Y&&Ui{bVvJ^&@#!h$EvTIt@87<2XJl)hW0P-XMTPq0_ZFKU
zUnNYNGnG)NlDjHH5>b4;KXIZt9o`;zkxtT3_^U&1ZtmE-UFJOtvqKu=eRb&;Urx5T
zF3x*~InPc+D}_onrWrofOg9Q=ahW<X*=*(U_3KyFRDC&Wb#s>0_}pk^e{Hl1C$|2L
zK3UY2$jC@+ypAtlz9iW!N9k%_ljHWjx#_g$q0Y7fO_!Y(*R1ZVuq^j`_AH*aU{3kZ
znplHY;O*}pjk43!ZTLCqg;8CM-e$KzAyZ{V#mAy^ROKsI!tQKhioqpEQJ?O$|K$DT
z$qRS)4YQ*i9apHz`uY+40s>5FcW_;+1D|N)0qqS;u@beNm=F;;N}J&nxxp>RZb%)E
z0)>g(eP{HFb!*mC;S<#ow2qJ;)z6Vie)8nazshFL+)QWvh=5)Rtz6LIA2GeC`Cd6W
zR*Swgl;hXxxCd8hNjW%V&tH4CezjWbe%<@=S^0XUj7MJv`1sh)`zQRtvHW>KK>_Mm
zbXXXhGy2H=`}eEyh?q3H{rE1^ZrgU*H8?2f0JgHk>C;K;=_nR8fq4@b428c0@S7~|
zy#7e_>aQ6i6{3f?wY<1-s<a?CG9@aApW?%%A-|D9tI^`cix)GUzPe)*P3HYC?lM=u
zeX!Hp&(F`8cKFS`Z6gyCK5X`mj>i3;;<HL}oP35m?MJ?Lb?p@v&WMf3$<8jRt4q&}
zT|seE5isE)3zxscvp+dCqyA&Vi_FfWE$0gDQAd>|CGVU(b*i(kFQzV5qe-`^z_s8J
z)6s~eq=Qst+)dk|)_By>#_n_JM&78%w7>kK_|+8H$U;b7c-dk7GCG^VMzwUKy6WlC
zZnCoC&u7SUsmDFWGFi2WG0M-6&feZ0)%TQw_<Y*y*RN}1)VQe1r%(Teelyk+>S5GZ
z8!dh2Ox5S)%iEclo^`K5KXj`*tW7y)KN7ZT1Ksnfsq6L*4xHF-IBZpmT^lzwK+HM2
z#ET<T%z0)%E9<EzPoB)w7cA)Mw!VEUA>+$aSM}o5_8mJS?r-Pf<m8lox>t@fUJZNA
zf>VAcMSQCLmT|sLzLR}<u;}?zgUSlKV6;`^h7>)0;f>gk5f-SZ1Z<A}@50LeGu1O<
zWno<0f%&O#9s$DU!4)Cmywv*-9;ir5-#@2@t0-KdSH8ca=VNGJb@&7FWU;AQoF`id
z&ed_9x6!+D<?8q5EPYj=1j;r){$+<Ry;*~OB8y(Da(mtOff~p0zHqFKK&6#<h3uu0
zHSKtmVQAwxgk@GleYA>{>-@Mc{nDqgMYvPCIy&|rJvtlH``&{^qqD@jJPJ3TvX_sY
zB6<4s)pzdn6Wv0AavU5S4B{@i=&kya+wuE@_c(Ig0BYnUBqa&%K{4%X%1B(88&yCr
zmJ4xR$olyaD+-&;a^wo;CjxhHYldm(*mAJ5ODtc#NwvUb-trkOKFzJ#K$_wemZDc0
z<GQ$@za0yfvYNw^GKU(t{+M}Xu;>F#JZy@rdI#ltoAYF+gIQ|B6}oq7HEf;scMjq=
z(o#<<msNcDkh~bLm8A@rHS^^zi<h5Y)SEY_=ZA|n8#Sgy1JRi2JEGPq%0@6QpDGim
zB)4+i=FJ%j58T~ROLA6iW{%5qoD5J}`mhTnHk8dwB4M0rF<OExH8m<yj~Mwv34QZ>
z<3;(LYj5@T&6^%<?g>LQj;hp3u~=N7-(NRgnlgva1%yvTv5r1*Yn4&e^Bbt6;ROpb
zGAvHRH-6q8#^s-%pIA=dNvwK&b-wc~8}%WBpfVolpusXc=E5U>w!5zemPh>l`x;ax
zKQ%U-0b`+;cA;mQ%e=#Q&&Q(t^_d2h!85&)A!VG*`0ZXky%obapPFwT;t6O!##6&z
zJi|VJK5NrgOC9a5)M(3hUa$4Zi$i6sT)6Ej{tp!4re=m&e7&x#_QJx#gneU%dE$6~
zeYYl|6ah!=66UAT7Y|wYD8_5&sC#p&DXXe_MacTUOwp6uH3se>;pNm7=2;ocIXt&h
zpnA|_Q6x>Hv$<qvjM(PHzL6-+P{q>bGFj>=VAz-l`DZatSfv^xH?oLnndgoB0IYYO
zTv><@<Ps6lEDsdcP*?X^n67l?+_UGT>)emKoa%8`Lx%3{3$6;IJ{_n8@E`O@O--#s
zpC(oH+q;MNJv{2&XqI=NG=6xxZ>N<hX$z)vm6zh@Ow^zFJln1^taa_&>BWnE{O!Ga
z>g)tk=v@n3&u_`h$}+?P!_zsD(7Spwv+9Kl&(V}d&yCE;KNAUO5Hz|9Qe;%0@GMM*
z)eS2vfbX(oa4C7X+1<g8v}o`Fg2vLGAWX5{A+9<A;>uZ;oeAg7!6uK1iUPN@ZQks$
zCxeCJw(E~EwbjyT`L7>Qc4$yND35YyH{JEoZ4SaVeI61LqMoSZtVRJ}V(IMe)`&}a
z>gmZw)zQ((ir_t)bUeg)!ap}JuP#(-bKY3B%!E>*T%Zug{{6+w3t=*Ld+9~2qb3F#
zLIKQaW95L8uOEx~bivnzgP)!9h(S=g=<a61BkHnkjH%U?;)`);0sUYtJeFBUjvR65
z@;9iBklVF#)vCtG$-yR#bmMxT#Kc6~!NzcMS+kQv^}=mVtW;G0;_VA;4I%s3*gDQ>
zpds<|^AA)$efpH0>hJGgQdV|gL<H5FHHp3feJt>PXzHnJrNZo#O(2rscn+iG!sBmu
z>YmBAW-3IF>~D1(;<n8C`0?Y+{AAnqUAqE}XWIX`HRy5c=FN_8-$L;adeI@kPh6h#
z1@P<L-zy(<y>1NTy-Otj7m8b;QcJa2p`3mL|NHOC3NkWWpGL5|d)5@)*;rGsIB#zH
zIq8H=zs{_we?UNtv^S^W^hoD@clX-NYv0E3fyUTp6ZV@}#Nr&r`wZVdVzU3ftipIj
zRb{0@q(Vp+-Em5xocToKEpjQ+)^}IcheXo>zN^J+rdNxu8g^Zr4KWc4G|L$jqE>&3
zKTmq<$`va*7~;9s6dfEJt5N80GB3bA?@!o;n|3SM!^4A(<G=w~vJK>ek3};G@-XQp
z8P##xU%$Tp*I$3F<Zo_n?&#{Wt+Uq9&;SeGVZ=nCWPYR*@~t*|E%$Z_4g^~6?m?k2
zLecQ#VY*zOWo2O6lC8#@KXoZVCY|=QwRM`<?0^8bR;D8ElR7Ze)~#Euzr0>at$uOp
z?$}5vukq}}ponG1@A-=hjvMI?9><YXEG&}PII!PnXlNY9K1z+_#0|k-{-#Cts4VK2
zP}gwV>Rw8$r&jOOb$S7oOaSfr9S5rBrbpMSeS)~)1+EGRS^EC{`-P0_KQ2+pju=BR
zBHMZ6E}_~erO?_)1s3jeDQ|S{Jb3WHhew-p=gytCZ{Lna0|4hb{-S6<N?&;~${beJ
z#FKPF?EyEv*aS^JEB^TL<2Gl%!(FzS>1l17osV;C!MIC(xVcmbD^S0Hu1BPeEUWIm
z$Pkxmu>0C@nVnQ+Ev+C>QKO$7jAnSeUbjBiezZ2<*-><)nANB`GnuV-cBmj@h+!4_
zlGQ=Ioff!NHZx;}?ECih`<ypRR$w`H{a}3+DzgQD#ERp+)%SL4+!3e7s6;+G_m<Ra
zHi=Dc*qL>Gb+I<3+!+KQr!<&MSLA>mzEmfT`|0*06fFC^2?5ukoFTE)M)JeeT$U6@
zaTljx545QEX4#{w6g^*=HbtNzNqYi*(K0X?>HokB58mLQ8J_y_&7tFo)YxdHcq~@6
zXBZk?x><|ngB{!{ZLD;7S$L&aKV=icp)<IDjX8FS6~Ush08mClt$8IMK74$B;#S#T
zonCK+1xg$dU*_iL2W#*zL8~s=bkn9!?wmN#g{>-*l=-<iFMoggx-p0+(=#&xe|I^j
zZx4mZIyY{5WjbMqf7SfHh4qkW;~{|<yw{Cta_1u5Tka0rIVDZafRvOJ)mH=Ic_qFq
zP$T&2bJ4fB&1~5B9653>I5=2ZTH5pOU8yBG?`}YT-*KSt2>y}pM;SF;KvX#1Q!4!U
zirg>Z$coEk*SbLr(Wk}DbK82MgwM^BC?F4+j;ebc8~GiZi-Sr~K$<}%bL!H$ti?O}
z>k~D=VP2Uuyfn|BmISQv3~O_lec1GdkT80cW$o*EbDwMI*h`=rlVv+lbJTVq3N7g;
zqLQXG=Q(DeTG-T$p*!@9_15UC;?0z&ep@NAs?jf}rlw*e7H9esv&TQ3{|8Npp=CHM
zOm}-fe*7!Cc~B7JJlGn8s6!fX^M${cqy<zjBTy~~kzSsjFWuZIsKoNX>7RhPe;%z<
z#0^NT21_Us4AKz9QkqIt0(2&tTcFZ0oQ>=iX-aMx${6$+{T*w``S}A!;J?ju|EjS1
zU0O{Lsa(JzDgdBxs3=}xk%S)tjrBbgZdF7lrXR%hx!v+lS)YkDOynne3H+q+5812@
zvQS^fU$5uAij!Ekaibb4hE-lGv@(Gh@+NZ66-39q*m~nLTC@8EaphP+0x>t%LRA^f
zHBSx2?fO&S#B;0%4lMidfiINK%guhY%kb-)d+h3MSnGlpU;nl>b~A;d6%uW?>`B<z
zn6)>rZtK9Mi=Mkr-Jvz|ox8n$CoMBG5B2`ThiU-A>(p36UGYR&RF++^wSBc$Avlgy
z1E4h#i*`3`%}8Gz2P*K;$HE)S%cEB9-9Nz7P+O}y`wh}u?4uzd8ewLv2BJ)Ph%cN~
ziIlI~rf}?H2s0gB6up4{FO<ZKGcn5vQ!B}QtnYfvYCOa;wq1wN&L?(lN{#5ig}wfk
z^nMCr)2D~>XA?j*_o2qkj=g)h%NUUBpGAYU!Yg>Ssy(I`jTjB|&`*%5U5HvtU>#Ad
zv}y`w`nUuQtKYbDUhwP?*yrsup$ehb?}?e~T>XnY&TjS0ZBEB>V@gAf9-idJ-DE8F
zq8v7BJ}e<2(VJMXpe!%{D1W+Z7m*`FoJQ`U=%NxH{P1q^z^9T*{q2Sg=lGo0=gIz!
zkpROPi@Ye;hw>(kK6tVd$rd;)iU`JrH4&OASMGh!pEVPRaieP5xt_Ukkx3?wfIW8g
zofOJ`x|)RLpI@1tN24r>``Z31vzl>|%nk_X0^=vENSOdGW!DzrIe6$$ZP`~Wl?%&o
zyMKIpKVHo0(F4dqKqedY-o1N9E!pXWIe<l!yBlG7;*~th1+G^83eA+1GZDv$I8xnE
zDTys`Yt=^npLcKz!IT6Zj!aHwD2zf%E%O(s!)g6hon+~0#zsd=M{ia(G&GEik8`V@
zQ;CpO8tp1q%d*s~i&8o@*<u$mG1L}f9E>$5Ate<H3{UjYnX%r`ZTsXq0=ls;`&#pI
znp;}bLM1nf&JLuOl$3DJmEfA1uVDF`=l-}8>@s5rt*BfK|1xFZyYyRxT%dYX%BMu#
z#Gan3P_xgGas%CD&z?OH6pca*I*Ol=JDfi^A``D$paZX?uO%mAejK}psO39(b!xGY
z#(@=$7v`oJgw3>mXyEp?uQCwWP_*A{cSf`2gX-#PFK_Sg6g}QSwfwovOK<OIb)kov
zyeJW~>GLwGje3E*6AMO$x~u%Zp*1zz^vnIL)}$0AwDTRHE~EOZ?GJNjA61(zqclIB
z66N;|b@jL(s3Lo0l?8~UKJC*;XfaezUteD~L$q0}eO1DHMh~IKL#0Sr{SU-t<A4l*
z>h7kxU{S}BTv6?R2cDbqLXyjiZO=BSzsZZFd4Bchr=8E|pgG9NS>k>v0k|#HAlP2g
zv@uNyD4*y`Cs0OHb<Mg`fZ;iJ@BTxq()-KMNWZUcX(`yzp=)2?#6~27QGWma{jIjQ
zQ=QG(51-OZ=okN3c3f%L@d}UBB6E0n_;zD^72Xx5uka}6L0M6PY_4k)Wm;C&ect|0
zSBZAZmM>M>g{k$-);LGnmKP@sg=r{eXXcpwOFtG!`A(|t@UB@^c(Le?VQ+PKW75S_
z3G1njj*k6b-#(C*l`Ruocm3;|wIDl9-3@^72_rlq>`wN<1y)v8PXhy+)M~V~wV?}X
zHrIXdy=Zj}pV3jAzsxO=zCfzWa%@Nm7`*eq1p)dyFYpn)1eD^EA&!4P(OXreNbD9Y
zfC)o_^Fj1zckI|pg?wG}y(PzHq=Pzp4C@oE`7VHnv_3HzgS=`UQY59MjK93TJp(yB
z9wtkZN5kmYm@3L|2lVnA%U5&m-75+H0@<~yMnOCtG8LYxv?uPx3p6&-nV!%95t|r*
z(?hlcXR*1ukCdS3TwsUt(_QAzN;N{AOz35C9(%&<IKW4(Mz2+ikfkSDEzA=F;|mtX
zd1rHZcz86kt)qz~iO*1rI}2c-gnFe+jVCa!hSQRgJMy};9^9ji_Im40EuSQ(>5%|Y
z$5;B5!Ow<{p~Zeezgwrpi&v}BPSYJ%P*9i+!S#ZYJ?oeMwJF2A9!&$Py`l6eQmh{B
z@u0W>B~QEumC9vy8a`3}?5GA0k8j1El8TB?Dhi^VRWebyPtS*CkF1|6+ALlRRcG*v
zLbf%d4wePbz^R`#Xsz41bLT*~+tg~nydr@Ne*OmU#0IZNi?RW<Nx%B>q-UC5>5jaK
zMq>xi#Gi10ogyD7B=;A&g5<zUg(dWn2olhX{r`k&$^#TX)B?0gqOw(rP6o}lS_jSH
zTH1r|J8!rEMQyy}sRE~{sCIRDyVMl=hlNf=(28mx{ds^HeOe+YtrqB{D~t?wkPel6
zSdC)6oq-_+!d;hx=;6bke-`Iu?YP_nR0c7pOy_`d{ihe{fPZmUghCl`2){zKl&Rs1
z0{8LR$JZWYr+iA#Zu$q)`Of2#MQjKBcyx08j2*zu%j576DQ_=;dJMTP#_Hrb5X3Wa
zc`;1pJ&p8Na5M^KABYJf>T2Jo_*lqav1TpVjoCJ_6+uTraSse3nv;He{Knh&@0*5X
z)wVi0Ie}fOTpFi&s|Q2}P5o2cxlq*hXjy-L*vIZLPX(bhL!k&VUa^cKT??8(>cQ4n
z^SP-Xa1~G9t6NR$i{f(h=y_r#vk~13Wg4AE19U3gb<u@9#^;51Hqa|aeXHcv%6vXN
zY(Z5vF^S_;jZ%!7p_?yy_|A($2Sfn~DreLGNevuE(56on$CD?R0ZYz+AVb=-7wtYo
zL_{>Q-2Lr0Et^xz+x0KwdSO`27YP_vv$a@NiW)V&iU+xg0jK^4s=B+or-_?Ec>*1a
z1p4nI3?^jl2Hdf3#dq!}$wiNijcL`ubyx$uM3FD{+ER!T%gMzhLv$EIe(JUDqCseG
zi~WuULwRHXb_Fbz<}hwV)a3ISW@?E#d8*;k+tUqeBmocwe|BF=57Zb~sXZZ!bCK$B
zA92L~CsNEG!Ia6({khB=Ryws8(hw~g@^}q^60sA&eW2PIq6Fmql*(2V-r2Y-2GE;y
zP@U^vR^rdKWl(?Jp7QJM9ELk-(;uH<R2@cj82PYo{Obc=Bh+_k85x04;_3Mn0}ow(
zw_)&e@+so?3Fy0HjYWaQD!~L&fxYpQ5wTNreBg+bmB~VI-H$DSYSRo$fKvFu@74Ol
zP`9DE);`{SipW3vg0G*1s22;RLhmPx-*@B9`}fAf?SGKdU$%UC`TngqflY)xgMF9p
zCkvXrFzqr$e11%Y+&0+7A`W9wVKV#f&ds(h3dOD?CycFf7;ZOo#wgrov;kFMS+-~p
zKB!Q4e%_XWWx+?RB0wlbp|?8|>wTamP|!HyW2h8a3GJ(doa=5tJrLB>nkwl3RG~E=
z+02;<8#uwdqs7k4cALd50d@+rnA2IXfGnfzz`#Id6%`bhBzMU9*fG9moyEk&P*XUP
zXO@U1<!pwT_F!|C3JPb1pfe;DfQctKA0IyR)?@HzlHc_0f?T{;)=v>zj3OO^eS%&9
zl?Q&g??pPYH^mm`tmORpC58$X^OxRwrTCZefriw=QQb_76L74SBulj@#TSa>d^4*l
zpU9FwboDVd&vUdWPHt{FSYqA~sajoTO+`Vku*J_xVyEcX`$-@I0I=HgyFQxip0}Gw
ziPg-sc(I3-6&|@bnl@3P1&wMChB_aQ{?=||b!}LL;+20%cU6Q*Lkd|x-oe2<R7*R%
zeWXYp=2Nky$v^?dlkQjBcZ0G&cL{>OYM-pi{}%|JuQ+ahUCmcYZNYFkg$p9O*m$i9
zXrk967V(6ts;byWC)RD+gi2VrRcOH=S=~h__DpHT{oeCEdV6LztN79=?2Hzp6h8cb
zA~}89<4e)qrpN{pA^JPJ1IzUb1lG9q8@J5wUU|pnK=KT*zVBIW?BolLx1ZDa6@?mY
zwEeJ*!dT+vNCQ?j8{m37`C`#3D8#3*yikK@zUK^C<}G}_?D0WAwR5JwI$S2fo*-wJ
z`59vRsKuxfI^^x?soY!#29(JGN?Q&*8bde>#Kq}~8t5pqgDD_y<I~+C-41!s<&cc<
zP><IPC$2YSH-=-^lR^VGp?{o$e3Im!Z95o~pq-;*#Vjl=3;@$T-3aWT?lf&VkG){@
zDbAnGu&c};l{*lqc(1OmZcP&klm+WGc(*sP(w<>)?RIr_B~cWEilF3zSB5ovVG+J%
zUSNwxc7c>@5PD^Xa<nMeM#9Uki)v?ED}frQVV#m7z~WrDcwM@&2B04mU;!d#X5YzL
zy;2`<U{BzW5pH2vQ`qGb-LTm_!^qCp*B8;(V{6z@@mT07>&$&IeqUB`1q+>ug(E2(
zN9!*enDe&TR8)R!vw6$imPUcq7e(?JcQ-Lrqw{j{^74{8L+S*WuaUZ7)^~13TG}wA
z2(SQkQRqM^pWWB`{%<(*QBJzL_FZoixl$j0*CwHvVy#C#tcS%778$kk1HIYmvMdxk
zAHLRB%nSRW-95)RukHJnzv)HEG_L2d`QA)~6r$|~U;nxmZeDModHzT2m$47~Lj08E
zf(iFUpX-Il3N!C16lDOs4_c#F^z?eM`=*(N_f!8PcHO;s^X9whu24cZaW&n4dGyf1
zJlVg_t-y1Ek^R{bwWy<gS9Ho!#21e6$Ru>8B=Akv5jL7?^g&zY>5xcwch69EsUzou
z612Q++d|(nJIdk~sG&j+*5ugqN5GSCHMsfuu*+N~+BPc-3(JzHxpmvNbU5v|@7$60
zB<%|DDBYxizs>UBgi?`+AD$Zm9~70m+9Nf#-5J>2CAd9Tdd4d4T7$1derQFR-@<L3
zh_IokBD-g!R%3SBOS1F=)*!67R<#-!5uc2i?R<Ta#b7!%<+^yA5hd}c-FGjn5%foN
z|I-9{0%wU>_dE`E{C)(i{W$zg&oBf9qTyvae7(2tkllAJLQFp=U38fGa`(t`I?92Y
zNBb!C`qC6@E2~ng{Ug|8F*<n`Ecd|%qrqt=x_#|RB_$*baor>|B(^YhnSJlxlKOg$
zEnBuYOn3TPJ#6KM1%TZ}R*mXk7<edmRDMhSCW=Mqmq#Ln>Hb?OjuQjX&>MRn@a8|Z
z?iDM9pj`urFcID>ZA=`|WDmMK>FOq5dTUcUJTg-K`u6(uYD=Kk&yuMS0>26ru^6S$
zJ3K%uIp4EoHZmMi#1k6ti%tBg`N6AzT}(DS93|pg;Vr~p6S4blBDo|`K8fCWbJOO{
z8rQF9`b)syE4&i1qHs<=DXGDPQ#U`C;n3CR@D)D1y{%G)(kdOuL1O_rEdovo#%c}C
z^o%E*ihW3jp(0WDDFmzLIgF>-4r)$MPx~veqch$w_Ii4ph&sqD^#g&|`U@CXca_mI
z9kw{meg0J~_)R>J$cLt;^9cPM3KhY7Xm4?_y5Wa)FuCFZGMXaE<Ex2~Qwb4s9&1}%
z(7?Ur9%H|V*Ip2V=K1a0H*as7ap!HRDJcVX%YKZH*TFL>k3wl%LZbs!FKz+cz0`G`
zuM-=@QHhmZm;5dK{Eq>I!+_@me||8{@U6{akFk!3d%tA+3lrBC+-Ro#)@C*;w2o@*
zQ$tA`9DDyNl}9vV6BD1HTc??~iII!)>=+v2MGc%irqS94eT5SKzIOcyVen+=>4#F!
zP%K0sio*osO`iD&+h$Jt@1S^~^PtD-khK7CXq7kd>AbnW+D3|3_J5|a6_!_`lmOhJ
z7ps61*1&0?1w_StEIznF(6q4@s&Jv<273O}&=L;7?&Hx)YTGLyB&5EbQ+2<pv-<+g
zLGnn;{P>8v3BM)^+^`?<KXn+JwdlS!z144*4nk||Yhq*+Gk@)$ZhicQzvYf$GzgLc
zq$-1!*uc0T>j3g*H4b9-6JyaNLC~1^wt&ke!tbmx3>%@J&{Mk*!V`ho=)GYe1BUP%
z8ll?~0Sp|p<fpQ|to^)sUO09<wHn$WI^Rp;Ng>10z2O4x@;(*NCLlp1ExCaDs>56U
zK*Qm-EyC~G>E4R1szQhXdlYLw=%-3*x%CXxg$1Cv03}+yj<L*(GF8Hrcy+AjBVnoN
zo!&t~LH$rODHTOn$8d-CQW4vXgp=f){jY4PxP};pIcw{^uI(;gxZc$#-l_ga9_@K6
zXV#(o(6m4z!LD^fJX0`~Nd8_FNNC!2EqaoC_`m%$Fv9jzfiR!WOb#UibK~wUc(OS|
zQG`Sx_&3Cyf(yb0bp?;F3Yi11Cr@@$p$JtY{`Qk$4jv$ozYj5|r-<S?I{+)EU4Fmm
zCxBn;2HijdB4t9rcQAi=5&!&#l*{YG?P@ibFJA^x5dG&j3der@Ch1Q10voniKb3=r
zN1nJypvi);nSZ`4YeN}P<WcpBd5v%;v0_pB4>^oo6`dbj(%$7+r^~49A);#S)4cj&
zO)QHxR})LJym?ifnRV(qmC_|&_|Ui+1B^AA2<1(EeLyl@HpA^F!o$MgxgE#m;|D>o
z;M}#CdX`H(P%TW);O!Xq82^^}yV0eT1^-nSh!kuP;v9i>S=!Q~4Ov#7b}dCd)@Jf9
zE!`)e-O=F~ol7<}NvrU};wP+xO2@$4=xZw|0Oaia`t_3JM@qZ(k|le<ZC3!tpA6yS
zO9|jIhv7d~cf#u=QeboDNB1Q;bPtqkvid>Qul{j!r3eBx5egv**oIMP86>ay5B|^h
zM0x?AgYJKXNz_5raq_!h4q*^WHP6$_$0uUt`W=Kr5Klr790(g*2F-%+=OWlrc$dZH
zELxa1jzr`PVlhJi5(p)hfghG|4|0ug8241Bx+0Ed2c!6BPC}ZZm2}}mLU(s%NIitx
zgvmBnT?i%`;41WWcz5B72qrAVT|y>`2n}Gn5_`yI;ImSou(?p`(rKzBOHsB#26A0+
za>mV&mY4s8f*FPM3&JmU&D#M+3)gNpUXdi_!3=^8benSsSMm1kTLK;R(@9`slVaSK
zLXwIjPB$vC4e>(i_To2(6_X_?SbjDObN0XyF~EHgzkK9F#I)m95rbINVeBz@1omJ4
z?Drh4#9en@K;{1M{=F3Tgd!x#aa>ndKsWGFUwvW-l2wH6Ze%>Hh~D?s2=%zI{N-7i
zBuD@bP|InN`hYEQ6;BtMvKn-E(k&6Z*qL1hk3rC^`7DVtjgCjFME1d`{D|mYI&>W(
zTmepiB4t$8piT}pq)I_|s|HR_2aw;$d@QrLA6AZq0AJ^UcIYmBEQ?cjFI~Qz?l@_N
z+A5uxHxWQ&T@d&kF>$y_ecl>6N?_*+4&?_ol>70WBK?nq6#+w?!Eum1O7)<H$fr^2
z%UciQGtr0pJm;}vS^@?YwJ#+efZrOt>q84~u97X-nS=lx$!;J7eieqm3^EXMT(h9v
zz|-U|4b~v=P?w@7Ltyr1@wp@tqegrt>RhV+ggt6x9KT-4RcIXct7NE%J}W}RUD8L{
zI0OV#;8RA#7qvBPNO*E}@1GEErg-|JRE39_rOTtTP5NHu4zRRPaET@lDkD}?B75e{
zV>YC;1?tD}L>qNwa3Z)7mU%j_OwIxPHgtdZk3z1gLw=t~+!bTZR|9Undeu^0FFk@-
zoIB=vSi8G_7t2=G)YMG&u{%r2{fKz4Bp)B&5-AuN9aTXP4$q!ZeP|$kH50^b3A?Ko
z9Q?<gWZ|t+EJws~zDHhqBv{)k=qa@*(o_L{cv?fYMTY-#WXR!bCy^hSu!lA2Z=40w
z?U|_Kr+g=!e*>}sRWrHtLcaZ#zb&&mt^4u+b9%)C7bK@6TGv&41YP1XH#d*h$!iqj
zhI@Mcm5CA(fJ@;5m?n++&N<G(z!>}Ch@ik8_#p&E@9?h3T{N;(>c>BBEedaNh6+bW
zJWvRbhEK447k%DH30L;~L=)jnqyk|1rn4A-j#6Sp&z2wso8(-;j(|F3$rE;Rc9t=W
z#~us1TeGt8A3W<(`T+4tBm%~pttvSL1=Vh?-mHG<(o4u{g!vKkF2C&N%JoqC50b*J
zSK_5aU>F$?n93%5mKEvl!KMs|1=IP6dmVzz1$ly_%^pxZ4KWL;LujDwckE6vk^G<A
zSJwZ|49SY)_=pGpt+A8;%iq#$L1Xdyx3%atSqlt8CRbfWA!|X8iP21t!4?ezLm*kU
z@Xy3t@3t&jd7PR(T5%L=viahJhY!;Wge*JGAh2;-*JV1K_*R*jhsRMkXwR?5W6&z`
zv0Z-uC6p5+UGt3`l$AZjrrMX21PbEOUTn^g%85iIWZoKwTIg-90^lE1vGK^VDO6TM
zbkLAg5D2u!$O*OjU-QSP&m?~w4h7O%-HN=eWTWu1&!{rxA>z6StC8dv&YZY<#C;*P
z2);cEw6|7iZQMB(NPYDO{xf<Xkk9Pf;BIo)+f?FF4;R?%*pt!FKT83lKU3NMwm;`1
zq$RL!fjU6r9b^bbLXahxtC*^ctVQjMQ>*i!2u=G4TztLa&tNi!4oH}=cW@OjSWH^l
zp>f2is8lLkko{vL!p&>i+aO*@*fn3fyNUT&RL)TTdAND1mo8mOP%0#rqJ3GrH|()c
zq;Z!5)nGly<-9zz>(z7pMA4q4l=%JGr<T{0hIy1p{8@BRBrCT#TZa>R-FIiR8@dvq
zsARYS{SMh;wOEZrb?h%T>OTgLZTWwo&|!m~S3YYI+wREA#2!|wzltyJi`5m|gQ0V~
zcqosUMK#eb6+!$+xE&rqDJn8aT%h&~TC^XBSXo?JDucAW1Ar!yz3=D?v7{E;Htt-a
zB2vuur=j8~;el^wW=?=491BI)Gpx5JG8Wo<UrnS35@odX^wF;K{kq}JMr1u+D_U77
z@nDDc=r{&gh!O?w#&&HwHh%j%%Tqe5$!M0u(hSu9UsGoPmv8s~G7ViO$3y`zjR)I~
zf(K^QkP`k1xuHfmH2M8D!lo3h-0R6$i-%n1t`h+Z$zgS*{n4||HOird>{r8UoFKE0
zz1gPF1FJ>@Q#5D_Twv?Y&^y9g4xt(=tG!Cw)xL(f)Y!{{2%gWTq4!6DJVDb}2`;??
zU7eM+XyF(K2i4PhV&EK++v4KsIjBTmf(~-|KkKW&Z&c5VbP}2i5gXh<_tXI~iGXPA
zP#z$dZr+xE%-8?#h#+W{NZ3OQcfR_NmErEd5PxoOUF!Hp(0DR{)6;2ILODzOx7r1u
zM$}L=!xH^e?@Oi9x_^4NV+ax|U_pnsFm~Md`#`WIT{jd}kz@7upAYLa$Fbry1+y|U
zNg>UOApIWX$DZfgpE-Z68srAPM(u5V3=DB#@rIS{KlP?3ROUYah+Tu6{nj%mUdVdI
zz{P5c?Cj{cij4vw&h^#@wa+U*&~k;MC!P-(#?vnkh{moUUH}C9afHf!ZELU-r0e8%
zQCJ*$c0Uubkw*AI1qr~DCr_r~dB@6c_+vZ^nOB%t8iRZ8jz0ez`$)0)zt^W7pCkH5
z^JBA;%VbXTvG}rI@jU;1Xp{B><_-@Te~PO%yVD!&JaMkYzWbR2hANmeD{<eg7w0FB
zraij(NgVMLkTeAe&TiDX8d$;um8&;0gn_~MSHG)Hvg(#cV7vNar#S{`D#d0^l1uL)
z4v$8_=HJ#uVpt&S%J@f?E0yio#OKR|K(i6eOoGP%^5w*P4Ko9&DU^Ld=Nap}-b1pF
z7>_K`l(u@NEN;9a6x_b1B&RjIdB0g>X<_p~N@^<Tm~vzJ{>LDd-W*n!=MwqAJDwfB
z_VV~(-jkVjP19$vj-Qmy17oTJN48vh&*T{fR#Q-{cjVTyqjr7xyT<9$r*HUjl+8W|
zxWyVgmt69g4cax4>6AY<I&lNYGaR;fe)Hx{S&mAjgz>x-XkL4KkHX#)#oUgkrMW?>
zo`FFuHsdCaCVK5na+}dI)e>8-HhtCXw`9%YbMfZxgOjR?U?%b%aj<h@VR$Shb%A&Z
zjVHluh?fbF=VP(@k)QBl!#2=?JF9&4YShhJx6;AVNnQjxUXn*6vDmOClzn-$p(lkH
zt3pB9PNvkLt<)B{x@zZMSMnUgW%oe=(SSxx_LT9fJoH~+20vU&8f;C`(K3N`8#a(B
zOP9GHQq*dg%49f%OnqRmk__>%Ycpuiy%%420mrj0{`?ZVAafqS{PKKZel{@Ae-Jf4
z9Mym%i2{z<r)Z{KB~en6Ma3wAC(r!8X@ts1c2YA{zg!KC3&J`XI|9s)fJo5?lI<CW
z>uJpR!&leEzWfy<R5D2YidgS;$mX-Nvv=m?f#St8i#m|;FJQ@x`bl^nhM3GV`uOs;
zj^kjuQ-6VFM;Pu2856}8fy-+J!%HTcc8r`6veZ%F(lFj?!Hbsi!idtZTf6onGUjy%
zjS|4&8HNc9Rm@@Hp49F#&)wZyz0&|^fw8}E=)%1i22xf^I6Xc&m=E^lG}}Mox<KY$
zE_;aT>vnHpm5c&yB%nJTaek}_<L1n3Gtducw{Cq2rb@uGhv<YKdCo{`K-QA34xhXV
zCQSnSFo?Pv-u?F53ZmALT8$?;M#w6I{k$W+{rEH>_}~y%ud>@QSOR2oplEb9bmh(=
zv!MoecRM3v9Oz;nk^aq^Ag3a=8)?C$e0=3#25e|fHn!6@Zrr$k?_P3&RKlFj5v1qf
z>ir5S5)7WNK3<&HLni^tii|}PPDVCM*7r#<45tEmkb&RYG{YJ;Dx`y#kn*?VqV|P2
zRp&noH+I#<sysuY$3)%*TXw2<MV5)=oXGgWYUb;|YAkk*2YZEqG!wysB$E+)P=nBP
zOe*zzRyQ*~g7e@(Md(I~7@^6w?tO_x1Xq{@mx;1^h;$x)-n=nSTy{7VS>hM<eGI)z
zasqgU#+U#h+O_5@ljxdA1sTrI<`38ipfhUC&BFGvGHoKkW6(ASl(fVw?va^+j!vv(
z;<g6}nFhovK(Z!TW~9D*lHW6@7PvB_@W!7@eV6Y9#g$Aw8zD|cHW{R5;-&XO;i?~K
zDoB)Z8O&5~>r8)$5k8W=C%V8NfBXRlKqG&MU4_m<8W+kB_xK@B8Z-5DfK7bgv-^22
zLrI*lCr#27PxZ_<_)Q3X9bEbq77t$ejw><u(xIzWg@++bUV|!B)YQ~;a6{4A9NYLW
z-@ltNiaOL5BWc+9%G5YcJLl73Xzw4RRraxRb3-U7y51Y^>yElcPez553S6AOL7|XD
zGbDe%48>*om9A&9b+2;K!42Oq>Ub#IZpiG4fx)i3n^}Yal})uWE&jNUWUxE(<_tI=
z=!N{aog0e|x&txp0w*|xD6l&Wgiw^Aci3F+D8{S@4wNBHcb7i!Fqt9}#5Iwt2SnV3
zV<1Qw_M<D%oE#6nvk;!B9-U;X^A)-~P+%{@tocl?FwY_V8lF{C=^Uo<a0K_&EKXHD
zc)0n4SMg9`eXJvAjeYSBQtxPs4oJ{DqAOz<`haFyF`mxdi#X)=?Rv+C3ztjE%U1~Y
zd)&Xz?{nbXCEWr{vR58-mvI&cA0@}ZzL~Hj2J|;bEm=nAiZaJqn;<k_o13~8im1vv
zI1}!m1au&;e8@EDlk>E&u@MA_z1-2!L1))u;|rViPQQ%|kG<)n@pwHD1-VfeBc(%(
zX~K4LsI85BWNq?gDe{q_9%B3`EP9E?kl5d$`WfQXgG6jfh9@V5F<QNJi%}YdfTKrD
zyO(c1b|DwC7H*p)CM2JDdR`)rJild8M^eXmVRGWPJ9ik!gToYr3I5)N>R>(8+Hw{^
z5I5pSqj4fVD%_Z0x3K%->;b><=kq>3KKd99EIsTrmGtD<GgIsSPvuhx32iJYp3h6M
z=~oZM;vpZl1^Ur-2ZzSC2fj`DFmQ@WOH0q3Idc$a=Dunp10CeN^m=o;J5T)m4bvdY
zAI508TEBCrZT_c8Qk@<?*vZR)shmSZb{i6!LYHwz=b#6jf@wC(Q!v><9-TWK-)r*h
zG4;6n!$NR=b>}!<!3XXC%F-jnWE!sI$<wEo+1T0X;qcypl0{B@=>dR<T|>bCGAfZI
zf*=nuLCF9HC5e*r5Uhl(J>J~mn?GQi(BDx?YV+C7rA`McPJrBN$T*>ZBkw?kt@0)l
zGnOg@yP#m*83hGlEI*goLE%vB---O}Ip)Pm*M-Y%`h-INK!qnO8KYwdk+>%h99c+K
zE-rj!LeUh8v*`wgL&XQ(=Vm8{`aN}zj`Ziuz50k!>h8KQ8gl2NmV+(=L0ws``WIJJ
zQOQu&X3??7K$v@|BO@c8Y@%4Z6ZYiOZ{NMkwSWH^UFUJ{g{g0wXN`UD)?l~>)L4M<
zP*^KUaQKLHOjX7{RmCX1H}`pUf_I3tXg{h?Og?<o!Nil7kXFeEYv1SO&^-74jrAH)
ziYHof?0nhYzIzt~tU#=I0=kZV&pbse0T?jOrxQtk!}n2O5SfkIl5O)86CZ#Qsz}(9
zNE@aL(P1XG@JLY+aDgO<V$4A$BPdo(tae$ONBRhcBPcQ(nJCf~F+0`=D<j3%-u}=R
z&G*~MXa?Db+76KNJ!4~y^PBohdI6`NgW4GJ_M=Jrmg{q=&ZEzGvY}@*W{krTw}y^L
z?hLU}6DBpu5ZX?nML`cEHs$<0I~DQ*#*!bG>l1{ut|i2^?K_mJEPdC`ZlV+$i7uMq
z-jQT90S}nN2}ejvD&TwGqz;-VPuqy=Oz%FT6IX}l>{N|A`y<Al$M=bDNFZvHOLBqv
z`fTSi^(q%K$BRXv7Aeo<&7mTRX^hm2$&w*&j8UE==Zr)SWOMBQ_s9A%>5~q;8Ex5F
z!g26mm00#Df*7#?N~Dh=sqEU4$520$PlnUTFdU3dE<U~=0iCBMB}<BmSS#yC92Cqj
zu|t;2QhZ{f?PftBW^@V0Bqk9mFhMwoT8%-wGeq+uWAnI7qU@rvNkYi4BI7mSNz$Ge
z5{5f#L!|4B7TZ9CJve)-&5ON5Jo@Kf#UVPoh%vs}FzPmf22fkG9&O_(ezm{YK*|$E
zV9djHu1M^r)3hT8{6iurhRGDLlkD>R59r%ou-?oI=GBqx_F)6?md9VzVXh(jbz7^=
zd}VLbT;mhnw$6-)1RcTRxC&ZBhFuWG9*wI!z_|SSt*nuae2^vPyTuog1bj$Hixx|D
zX~qYr*s=-dyO6(-MEKj24FQ`FGFO_~;Dw+t_H#c9ZHd|tkd3eL!t1ou{p+e1sx9Mf
zN->BHzB!mf;gOM*Jy1Kr^@yuKD<ytq{Nl)j?X_#k=KX+W{TOmng<y=BeGDr4&Maq*
ziEeM3!zW?tct3s0+FR{wtpfnC3sofT%7?A^(vaDh`m!m@d>wj?SGw5_OzUF8S{)xv
zOb)6&9xv&ipCA@Be1lI&$&h?4)cS(rrimdNt;+P7V~LYtN^Ymvkfn?ztA@zIh-DFJ
zoP-e|&J*SY-bnmcBHm)CA(KdSBx;B*W1mpu-In9JSdrIiX9XVo&VwZ+)}*)k`_yZb
z2RAVwe}`;qFf>{6{UDfinorMd-Gx{*0ZKw9pPwW9QsVNu5@U7b%M)N#oGn;f$eCB%
z5`Dor@UQaq#JZOy5GkQPk^zLf$mC$c#_A>?16ZtyW-~8dmiwrIDkrCgyGy<)!5QN+
zw-der&VB-Fz_&1jW7lgU83r^}1DG_ph@HQ$9|i=$^CTRC5(-m~m=h}#_QA%0L<hZq
zB<sjz^7QO17k)?tI_uu4!%!?RtG>fQv`igt4oGn?HsupG!r)Nxa*XnGXNr-Ef{31G
z9)jFG0s9mDb_1VxQ|kwK5r`oMgNLvTMsM`>^_OZ6DY76APliOchb`o?;pEUXyf4xb
zJU>u_uPlgz3#2RPit{4*NwTC7I+_8hsRcGTS?3>8U_J?R*l9KvY_T!RDiR6<7NA{A
z%3{7434?S$vm2;fWFANihuHpScz~#fMn*>Yqh0mgB*}~&vzIypl@eraq5stpUfj+a
z#1ptV7AI3fx2LtC)Yu?!Nt{AVoXlVhY1G6VVGWeDql5xNF}*GFgFqxQ=>WCD8enGj
z+6U!@#b3P8n9%_S-w!05OWj-9b`#(DfN4lIe6I`{FC|lhBt{f-Hu)v~jA6?9b6XC<
zIOLlb$hZMa$cPMK3<8qeAHXx4w#av34vy&KLp7!AW*Vp%Bv`WI&L_A^<7;5j$^2k7
zRKG#Tyf7I@3xLm1xbdXmO<0=cjjK~RtHd*0b$x$U0tl3xFh({apWq=j6IJ9v!$uK_
zCSUGD$9IxM|3O;`^^5KD2j?Sn5_g*8@ZroVn>2*TFg>#LwMZmbl92PAj75wiS{bbn
zB8K3eIuN<_SdS7$;QAU2Lex+kw<FAsVBfO(njAZGh&Bpff=l0bMBW9(g?z^d1Z1FJ
zOfDwHN;1jR;Kt(H6XM~)J;Qg_06pXx1<%b~MUp-MK~7SK!D8U-XnKy}Aclo*G|05x
zbIy=)5kp**cr@fx5Cnp;>oYV#xGM^GB#uYEB>|7$Hdt>*#6kgyU^<NTydbQuva(Wa
z9^-JI0M|{#*LLtjt9ZyFu0!Y-qyj=(knKoQwCUQHI2f$!O57g=bsnP`8@7#K=LWSV
z;Q&&}VJFEFN`*i8DxEka`Dg!VT=|21@dZR4GDnK00O!!Ie0mhd_^<`~zZ_gS5-7k(
zO(<EC=(&&dD}u<E>JS4KPqZ?Fi5!%S(vodKnn|lefBdq<(RA9hHWK>8KF|RCPQ<Mw
z-folKL!D^g%)Z9-SilML#Y+&1VhT+SF~vz@ALw94?S(hUgnz*fka65p(dmw-Sw0-`
zL$UDMNifi^)jpEU%C_L6$oFI=*b@?oP0EE}02$InV25zH7(``ASA-)tu&?m5i4cig
zlgMK9U3#qs9{~MmTqRLO78mA`98JK5q3|{;1ZDc%bg>O~=;Hv5Q2>z?!E9whFQ}mk
zgwq|oI&t`ZfCiEW3<L|(7q6A@B+0Uej7OB=c;t0mWb&PN)*dmIa&h);69VObHvh&K
zo6Q&l>-DBG*FL-7bm#pRk!sc=f^9B?0;Farypr<R1HcqwLTP!q97$+nB-pt&Eefu{
zb3j@yHnulGl6w>$^&AzrwgCd4JSWnyL>a=gAHHY_U$GUm$CnI#kZdxPlYMjE6oJ`_
zEp#-elgGfsK^H0n2S>)+ESzzZ`qb)IknlY-M)j4AwyYzKGutR*7R=*0($ycMo3>)P
zqb(h&PeUEjhBUUxG;})&Nnwj#O)vy8ughq2IWNtfot!MnwDhHG7x;pZyZ@hkMcjWW
d-j6O)E;X7r^2_~@g~~~xO30s1IC<gb{{kSxFwp=2

literal 0
HcmV?d00001

diff --git a/human_aware_rl/ppo/run_experiments.sh b/human_aware_rl/ppo/run_experiments.sh
index 5cac975f..b70038fc 100755
--- a/human_aware_rl/ppo/run_experiments.sh
+++ b/human_aware_rl/ppo/run_experiments.sh
@@ -26,4 +26,6 @@ python ppo_rllib_client.py with  seeds=[41] layout_name="forced_coordination" nu
 python ppo_rllib_client.py with  seeds=[11] layout_name="counter_circuit_o_1order" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
 python ppo_rllib_client.py with  seeds=[21] layout_name="counter_circuit_o_1order" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
 python ppo_rllib_client.py with  seeds=[31] layout_name="counter_circuit_o_1order" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
-python ppo_rllib_client.py with  seeds=[41] layout_name="counter_circuit_o_1order" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
\ No newline at end of file
+python ppo_rllib_client.py with  seeds=[41] layout_name="counter_circuit_o_1order" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3
+
+python plot_example_experiments.py
\ No newline at end of file

From 40bafd653e95401b7677842c8b80feecf45444d6 Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein
 <49325191+alexlichtenstein@users.noreply.github.com>
Date: Mon, 12 Sep 2022 17:22:07 +0200
Subject: [PATCH 18/38] fix for test case

---
 human_aware_rl/ppo/ppo_rllib_test.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py
index c41b2326..7cc4a9cb 100644
--- a/human_aware_rl/ppo/ppo_rllib_test.py
+++ b/human_aware_rl/ppo/ppo_rllib_test.py
@@ -280,6 +280,8 @@ def test_ppo_bc(self):
             self.assertDictEqual(results, self.expected['test_ppo_bc'])
 
     def test_resume_functionality(self):
+        if not os.path.exists(self.temp_results_dir):
+            os.makedirs(self.temp_results_dir)
         load_path = os.path.join(os.path.abspath('.'), 'trained_example/cramped_room/checkpoint-500')
         # Load and train an agent for another iteration
         results = ex_fp.run(

From a9386f7c158a373d9a916b919b918c239cbfbb1a Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein
 <49325191+alexlichtenstein@users.noreply.github.com>
Date: Mon, 12 Sep 2022 17:52:58 +0200
Subject: [PATCH 19/38] fix for test case

---
 human_aware_rl/ppo/ppo_rllib_test.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py
index 7cc4a9cb..c89263d7 100644
--- a/human_aware_rl/ppo/ppo_rllib_test.py
+++ b/human_aware_rl/ppo/ppo_rllib_test.py
@@ -280,8 +280,7 @@ def test_ppo_bc(self):
             self.assertDictEqual(results, self.expected['test_ppo_bc'])
 
     def test_resume_functionality(self):
-        if not os.path.exists(self.temp_results_dir):
-            os.makedirs(self.temp_results_dir)
+
         load_path = os.path.join(os.path.abspath('.'), 'trained_example/cramped_room/checkpoint-500')
         # Load and train an agent for another iteration
         results = ex_fp.run(
@@ -290,7 +289,8 @@ def test_resume_functionality(self):
                 "num_workers": 1,
                 "num_training_iters": 1,
                 "resume_checkpoint_path": load_path,
-                "verbose": False
+                "verbose": False,
+                "evaluation_display": False
             },
             options={'--loglevel': 'ERROR'}
         ).result

From e4908808de5cf2c38b3006df6eca6b4ba6d12691 Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein
 <49325191+alexlichtenstein@users.noreply.github.com>
Date: Mon, 12 Sep 2022 23:30:26 +0200
Subject: [PATCH 20/38] get debug info

---
 human_aware_rl/ppo/ppo_rllib_test.py | 13 ++++++------
 run_tests.sh                         | 30 ++++++++++++++--------------
 2 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py
index c89263d7..b2229deb 100644
--- a/human_aware_rl/ppo/ppo_rllib_test.py
+++ b/human_aware_rl/ppo/ppo_rllib_test.py
@@ -282,6 +282,7 @@ def test_ppo_bc(self):
     def test_resume_functionality(self):
 
         load_path = os.path.join(os.path.abspath('.'), 'trained_example/cramped_room/checkpoint-500')
+        print(load_path)
         # Load and train an agent for another iteration
         results = ex_fp.run(
             config_updates={
@@ -327,12 +328,12 @@ def _clear_pickle():
         _clear_pickle()
 
     suite = unittest.TestSuite()
-    suite.addTest(TestPPORllib('test_save_load', **args))
-    suite.addTest(TestPPORllib('test_ppo_sp_no_phi', **args))
-    suite.addTest(TestPPORllib('test_ppo_sp_yes_phi', **args))
-    suite.addTest(TestPPORllib('test_ppo_fp_sp_no_phi', **args))
-    suite.addTest(TestPPORllib('test_ppo_fp_sp_yes_phi', **args))
-    suite.addTest(TestPPORllib('test_ppo_bc', **args))
+    # suite.addTest(TestPPORllib('test_save_load', **args))
+    # suite.addTest(TestPPORllib('test_ppo_sp_no_phi', **args))
+    # suite.addTest(TestPPORllib('test_ppo_sp_yes_phi', **args))
+    # suite.addTest(TestPPORllib('test_ppo_fp_sp_no_phi', **args))
+    # suite.addTest(TestPPORllib('test_ppo_fp_sp_yes_phi', **args))
+    # suite.addTest(TestPPORllib('test_ppo_bc', **args))
     suite.addTest(TestPPORllib('test_resume_functionality', **args))
 
     success = unittest.TextTestRunner(verbosity=2).run(suite).wasSuccessful()
diff --git a/run_tests.sh b/run_tests.sh
index 9cf36495..1ad4801e 100755
--- a/run_tests.sh
+++ b/run_tests.sh
@@ -4,21 +4,21 @@ cd ./human_aware_rl
 
 # Create a dummy data_dir.py if the file does not already exist
 [ ! -f data_dir.py ] && echo "import os; DATA_DIR = os.path.abspath('.')" >> data_dir.py
-
-# Human data tests
-cd ./human
-python tests.py
-cd ..
-
-# BC tests
-cd ./imitation
-python behavior_cloning_tf2_test.py
-cd ..
-
-# rllib tests
-cd ./rllib
-python tests.py
-cd ..
+#
+## Human data tests
+#cd ./human
+#python tests.py
+#cd ..
+#
+## BC tests
+#cd ./imitation
+#python behavior_cloning_tf2_test.py
+#cd ..
+#
+## rllib tests
+#cd ./rllib
+#python tests.py
+#cd ..
 
 # PPO tests
 cd ./ppo

From 81fec4248564ccf2dec0cee492e822325a143adf Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein
 <49325191+alexlichtenstein@users.noreply.github.com>
Date: Mon, 12 Sep 2022 23:46:35 +0200
Subject: [PATCH 21/38] disable logging

---
 human_aware_rl/ppo/ppo_rllib_test.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py
index b2229deb..6088d0a8 100644
--- a/human_aware_rl/ppo/ppo_rllib_test.py
+++ b/human_aware_rl/ppo/ppo_rllib_test.py
@@ -284,6 +284,8 @@ def test_resume_functionality(self):
         load_path = os.path.join(os.path.abspath('.'), 'trained_example/cramped_room/checkpoint-500')
         print(load_path)
         # Load and train an agent for another iteration
+        os.environ["TUNE_DISABLE_AUTO_CALLBACK_LOGGERS"] = "1"
+
         results = ex_fp.run(
             config_updates={
                 "results_dir": self.temp_results_dir,
@@ -291,6 +293,7 @@ def test_resume_functionality(self):
                 "num_training_iters": 1,
                 "resume_checkpoint_path": load_path,
                 "verbose": False,
+                "log_to_driver": False,
                 "evaluation_display": False
             },
             options={'--loglevel': 'ERROR'}

From 4f1f354e4ea634600eaa6dc5851dfe1bfe7d21bb Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein
 <49325191+alexlichtenstein@users.noreply.github.com>
Date: Tue, 13 Sep 2022 00:00:08 +0200
Subject: [PATCH 22/38] disable logging

---
 human_aware_rl/ppo/ppo_rllib_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py
index 6088d0a8..5f3494b4 100644
--- a/human_aware_rl/ppo/ppo_rllib_test.py
+++ b/human_aware_rl/ppo/ppo_rllib_test.py
@@ -291,7 +291,7 @@ def test_resume_functionality(self):
                 "results_dir": self.temp_results_dir,
                 "num_workers": 1,
                 "num_training_iters": 1,
-                "resume_checkpoint_path": load_path,
+                # "resume_checkpoint_path": load_path,
                 "verbose": False,
                 "log_to_driver": False,
                 "evaluation_display": False

From aa834b64d534ad37903d26fc6fadf2d519c2c3ce Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein
 <49325191+alexlichtenstein@users.noreply.github.com>
Date: Tue, 13 Sep 2022 00:39:41 +0200
Subject: [PATCH 23/38] disable logging

---
 human_aware_rl/ppo/ppo_rllib_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py
index 5f3494b4..91e7de47 100644
--- a/human_aware_rl/ppo/ppo_rllib_test.py
+++ b/human_aware_rl/ppo/ppo_rllib_test.py
@@ -291,9 +291,9 @@ def test_resume_functionality(self):
                 "results_dir": self.temp_results_dir,
                 "num_workers": 1,
                 "num_training_iters": 1,
-                # "resume_checkpoint_path": load_path,
+                "resume_checkpoint_path": load_path,
+                "bc_model_dir": load_path,
                 "verbose": False,
-                "log_to_driver": False,
                 "evaluation_display": False
             },
             options={'--loglevel': 'ERROR'}

From 41569c87b6763911a6c8d6ae408096432258f9db Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein
 <49325191+alexlichtenstein@users.noreply.github.com>
Date: Thu, 15 Sep 2022 21:28:54 +0200
Subject: [PATCH 24/38] disable logging

---
 human_aware_rl/ppo/ppo_rllib_test.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py
index 91e7de47..4b35e6e4 100644
--- a/human_aware_rl/ppo/ppo_rllib_test.py
+++ b/human_aware_rl/ppo/ppo_rllib_test.py
@@ -291,8 +291,7 @@ def test_resume_functionality(self):
                 "results_dir": self.temp_results_dir,
                 "num_workers": 1,
                 "num_training_iters": 1,
-                "resume_checkpoint_path": load_path,
-                "bc_model_dir": load_path,
+                # "resume_checkpoint_path": load_path,
                 "verbose": False,
                 "evaluation_display": False
             },

From 679ea3dcff024d7371f8814f39f4cdbf37c1e926 Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein
 <49325191+alexlichtenstein@users.noreply.github.com>
Date: Thu, 15 Sep 2022 21:54:36 +0200
Subject: [PATCH 25/38] logging check

---
 .../checkpoint-500                                  | Bin
 .../checkpoint-500.tune_metadata                    | Bin
 .../config.pkl                                      | Bin
 .../progress.csv                                    |   0
 .../result.json                                     |   0
 human_aware_rl/rllib/rllib.py                       |   3 ++-
 6 files changed, 2 insertions(+), 1 deletion(-)
 rename human_aware_rl/ppo/trained_example/{cramped_room => PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000}/checkpoint-500 (100%)
 rename human_aware_rl/ppo/trained_example/{cramped_room => PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000}/checkpoint-500.tune_metadata (100%)
 rename human_aware_rl/ppo/trained_example/{cramped_room => PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000}/config.pkl (100%)
 rename human_aware_rl/ppo/trained_example/{cramped_room => PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000}/progress.csv (100%)
 rename human_aware_rl/ppo/trained_example/{cramped_room => PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000}/result.json (100%)

diff --git a/human_aware_rl/ppo/trained_example/cramped_room/checkpoint-500 b/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500
similarity index 100%
rename from human_aware_rl/ppo/trained_example/cramped_room/checkpoint-500
rename to human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500
diff --git a/human_aware_rl/ppo/trained_example/cramped_room/checkpoint-500.tune_metadata b/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500.tune_metadata
similarity index 100%
rename from human_aware_rl/ppo/trained_example/cramped_room/checkpoint-500.tune_metadata
rename to human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500.tune_metadata
diff --git a/human_aware_rl/ppo/trained_example/cramped_room/config.pkl b/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/config.pkl
similarity index 100%
rename from human_aware_rl/ppo/trained_example/cramped_room/config.pkl
rename to human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/config.pkl
diff --git a/human_aware_rl/ppo/trained_example/cramped_room/progress.csv b/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/progress.csv
similarity index 100%
rename from human_aware_rl/ppo/trained_example/cramped_room/progress.csv
rename to human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/progress.csv
diff --git a/human_aware_rl/ppo/trained_example/cramped_room/result.json b/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/result.json
similarity index 100%
rename from human_aware_rl/ppo/trained_example/cramped_room/result.json
rename to human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/result.json
diff --git a/human_aware_rl/rllib/rllib.py b/human_aware_rl/rllib/rllib.py
index 222e820a..39ed8ab0 100644
--- a/human_aware_rl/rllib/rllib.py
+++ b/human_aware_rl/rllib/rllib.py
@@ -512,6 +512,7 @@ def gen_trainer_from_params(params):
             "log_to_driver" : params['verbose'],
             "logging_level" : logging.INFO if params['verbose'] else logging.CRITICAL
         }
+        print(init_params)
         ray.init(**init_params)
     register_env("overcooked_multi_agent", params['ray_params']['env_creator'])
     ModelCatalog.register_custom_model(params['ray_params']['custom_model_id'], params['ray_params']['custom_model_cls'])
@@ -635,7 +636,7 @@ def load_trainer(save_path, true_num_workers=False):
     with open(config_path, "rb") as f:
         # We use dill (instead of pickle) here because we must deserialize functions
         config = dill.load(f)
-    
+        print(config)
     if not true_num_workers:
         # Override this param to lower overhead in trainer creation
         config['training_params']['num_workers'] = 0

From 43e7fa15f0e3fa2f797b40584717db92a33ad520 Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein
 <49325191+alexlichtenstein@users.noreply.github.com>
Date: Thu, 15 Sep 2022 22:44:07 +0200
Subject: [PATCH 26/38] logging check

---
 human_aware_rl/ppo/ppo_rllib_test.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py
index 4b35e6e4..193da166 100644
--- a/human_aware_rl/ppo/ppo_rllib_test.py
+++ b/human_aware_rl/ppo/ppo_rllib_test.py
@@ -41,11 +41,15 @@ def setUp(self):
         set_global_seed(0)
 
         # Temporary disk space to store logging results from tests
+        self.temp_dir = os.path.join(os.path.abspath('.'), 'temp_dir')
         self.temp_results_dir = os.path.join(os.path.abspath('.'), 'results_temp')
         self.temp_model_dir = os.path.join(os.path.abspath('.'), 'model_temp')
 
 
         # Make all necessary directories
+        if not os.path.exists(self.temp_dir):
+            os.makedirs(self.temp_dir)
+
         if not os.path.exists(self.temp_model_dir):
             os.makedirs(self.temp_model_dir)
 
@@ -65,6 +69,7 @@ def tearDown(self):
                 pickle.dump(self.expected, f)
 
         # Cleanup
+        shutil.rmtree(self.temp_dir)
         shutil.rmtree(self.temp_results_dir)
         shutil.rmtree(self.temp_model_dir)
         ray.shutdown()
@@ -281,7 +286,7 @@ def test_ppo_bc(self):
 
     def test_resume_functionality(self):
 
-        load_path = os.path.join(os.path.abspath('.'), 'trained_example/cramped_room/checkpoint-500')
+        load_path = os.path.join(os.path.abspath('.'), 'trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500')
         print(load_path)
         # Load and train an agent for another iteration
         os.environ["TUNE_DISABLE_AUTO_CALLBACK_LOGGERS"] = "1"
@@ -291,7 +296,7 @@ def test_resume_functionality(self):
                 "results_dir": self.temp_results_dir,
                 "num_workers": 1,
                 "num_training_iters": 1,
-                # "resume_checkpoint_path": load_path,
+                "resume_checkpoint_path": load_path,
                 "verbose": False,
                 "evaluation_display": False
             },
@@ -303,7 +308,7 @@ def test_resume_functionality(self):
 
         threshold = 0.1
 
-        rewards = get_last_episode_rewards('trained_example/cramped_room/result.json')
+        rewards = get_last_episode_rewards('trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/result.json')
 
         #Test total reward
         self.assertAlmostEqual(rewards['episode_reward_mean'], results['average_total_reward'],

From 11286e3adef6a7e5eb54cbfe3a24e4d2da575f0f Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein
 <49325191+alexlichtenstein@users.noreply.github.com>
Date: Thu, 15 Sep 2022 23:29:47 +0200
Subject: [PATCH 27/38] logging check

---
 human_aware_rl/rllib/rllib.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/human_aware_rl/rllib/rllib.py b/human_aware_rl/rllib/rllib.py
index 39ed8ab0..c6398ea4 100644
--- a/human_aware_rl/rllib/rllib.py
+++ b/human_aware_rl/rllib/rllib.py
@@ -512,11 +512,10 @@ def gen_trainer_from_params(params):
             "log_to_driver" : params['verbose'],
             "logging_level" : logging.INFO if params['verbose'] else logging.CRITICAL
         }
-        print(init_params)
         ray.init(**init_params)
     register_env("overcooked_multi_agent", params['ray_params']['env_creator'])
     ModelCatalog.register_custom_model(params['ray_params']['custom_model_id'], params['ray_params']['custom_model_cls'])
-
+    print(params)
     # Parse params
     model_params = params['model_params']
     training_params = params['training_params']
@@ -636,7 +635,7 @@ def load_trainer(save_path, true_num_workers=False):
     with open(config_path, "rb") as f:
         # We use dill (instead of pickle) here because we must deserialize functions
         config = dill.load(f)
-        print(config)
+
     if not true_num_workers:
         # Override this param to lower overhead in trainer creation
         config['training_params']['num_workers'] = 0

From 2d299156fa87510b32f969f1cf3f3978c790f245 Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein
 <49325191+alexlichtenstein@users.noreply.github.com>
Date: Thu, 15 Sep 2022 23:44:05 +0200
Subject: [PATCH 28/38] logging check

---
 human_aware_rl/rllib/rllib.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/human_aware_rl/rllib/rllib.py b/human_aware_rl/rllib/rllib.py
index c6398ea4..2416a340 100644
--- a/human_aware_rl/rllib/rllib.py
+++ b/human_aware_rl/rllib/rllib.py
@@ -551,12 +551,14 @@ def custom_logger_creator(config):
                 """Creates a Unified logger that stores results in <params['results_dir']>/<params["experiment_name"]>_<seed>_<timestamp>
                 """
                 results_dir = params['results_dir']
+                print(results_dir)
                 if not os.path.exists(results_dir):
                     try:
                         os.makedirs(results_dir)
                     except Exception as e:
                         print("error creating custom logging dir. Falling back to default logdir {}".format(DEFAULT_RESULTS_DIR))
                         results_dir = DEFAULT_RESULTS_DIR
+                        print(results_dir)
                 logdir = tempfile.mkdtemp(
                     prefix=logdir_prefix, dir=results_dir)
                 logger = UnifiedLogger(config, logdir, loggers=None)

From cca9b1ca6c61f8bd3f5c32287922fd08f6ff57a9 Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein
 <49325191+alexlichtenstein@users.noreply.github.com>
Date: Thu, 15 Sep 2022 23:51:36 +0200
Subject: [PATCH 29/38] logging check

---
 human_aware_rl/ppo/ppo_rllib_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py
index 193da166..94db27fe 100644
--- a/human_aware_rl/ppo/ppo_rllib_test.py
+++ b/human_aware_rl/ppo/ppo_rllib_test.py
@@ -296,7 +296,7 @@ def test_resume_functionality(self):
                 "results_dir": self.temp_results_dir,
                 "num_workers": 1,
                 "num_training_iters": 1,
-                "resume_checkpoint_path": load_path,
+                # "resume_checkpoint_path": load_path,
                 "verbose": False,
                 "evaluation_display": False
             },

From 71d26226fb0676b6c4612f63e4e6146d430b3810 Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein
 <49325191+alexlichtenstein@users.noreply.github.com>
Date: Fri, 16 Sep 2022 01:16:29 +0200
Subject: [PATCH 30/38] logging check

---
 .../ppo/ppo_rllib_from_params_client.py       | 10 +++++--
 human_aware_rl/ppo/ppo_rllib_test.py          | 10 ++-----
 human_aware_rl/rllib/rllib.py                 | 29 +++++++++++++++++++
 3 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/human_aware_rl/ppo/ppo_rllib_from_params_client.py b/human_aware_rl/ppo/ppo_rllib_from_params_client.py
index 4da4c673..e74ec0de 100644
--- a/human_aware_rl/ppo/ppo_rllib_from_params_client.py
+++ b/human_aware_rl/ppo/ppo_rllib_from_params_client.py
@@ -32,7 +32,7 @@
 from ray.rllib.models import ModelCatalog
 from ray.rllib.agents.ppo.ppo import PPOTrainer
 from human_aware_rl.ppo.ppo_rllib import RllibPPOModel, RllibLSTMPPOModel
-from human_aware_rl.rllib.rllib import OvercookedMultiAgent, save_trainer, gen_trainer_from_params, load_trainer
+from human_aware_rl.rllib.rllib import OvercookedMultiAgent, save_trainer, gen_trainer_from_params, load_trainer, load_trainer_unittest
 from human_aware_rl.imitation.behavior_cloning_tf2 import BehaviorCloningPolicy, BC_SAVE_DIR
 
 
@@ -384,8 +384,14 @@ def run(params):
 
     # Check if any resume checkpoint given
     saved_path = params["resume_checkpoint_path"]
+    # Check if we load from unit test
+    unit_test = params["unit_test"]
+
     if saved_path:
-        trainer = load_trainer(save_path=saved_path, true_num_workers=True)
+        if unit_test:
+            trainer = load_trainer(save_path=saved_path, true_num_workers=True, unit_test=True)
+        else:
+            trainer = load_trainer(save_path=saved_path, true_num_workers=True)
     else:
         # Retrieve the tune.Trainable object that is used for the experiment
         trainer = gen_trainer_from_params(params)
diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py
index 94db27fe..db647a90 100644
--- a/human_aware_rl/ppo/ppo_rllib_test.py
+++ b/human_aware_rl/ppo/ppo_rllib_test.py
@@ -41,15 +41,11 @@ def setUp(self):
         set_global_seed(0)
 
         # Temporary disk space to store logging results from tests
-        self.temp_dir = os.path.join(os.path.abspath('.'), 'temp_dir')
         self.temp_results_dir = os.path.join(os.path.abspath('.'), 'results_temp')
         self.temp_model_dir = os.path.join(os.path.abspath('.'), 'model_temp')
 
 
         # Make all necessary directories
-        if not os.path.exists(self.temp_dir):
-            os.makedirs(self.temp_dir)
-
         if not os.path.exists(self.temp_model_dir):
             os.makedirs(self.temp_model_dir)
 
@@ -69,7 +65,6 @@ def tearDown(self):
                 pickle.dump(self.expected, f)
 
         # Cleanup
-        shutil.rmtree(self.temp_dir)
         shutil.rmtree(self.temp_results_dir)
         shutil.rmtree(self.temp_model_dir)
         ray.shutdown()
@@ -296,9 +291,10 @@ def test_resume_functionality(self):
                 "results_dir": self.temp_results_dir,
                 "num_workers": 1,
                 "num_training_iters": 1,
-                # "resume_checkpoint_path": load_path,
+                "resume_checkpoint_path": load_path,
                 "verbose": False,
-                "evaluation_display": False
+                "evaluation_display": False,
+                "unit_test": True
             },
             options={'--loglevel': 'ERROR'}
         ).result
diff --git a/human_aware_rl/rllib/rllib.py b/human_aware_rl/rllib/rllib.py
index 2416a340..550ec521 100644
--- a/human_aware_rl/rllib/rllib.py
+++ b/human_aware_rl/rllib/rllib.py
@@ -649,6 +649,35 @@ def load_trainer(save_path, true_num_workers=False):
     trainer.restore(save_path)
     return trainer
 
+def load_trainer(save_path, true_num_workers=False, unit_test=False):
+    """
+    Returns a ray compatible trainer object that was previously saved at `save_path` by a call to `save_trainer`
+    Note that `save_path` is the full path to the checkpoint FILE, not the checkpoint directory
+    Additionally we decide if we want to use the same number of remote workers (see ray library Training APIs)
+    as we store in the previous configuration, by default = False, we use only the local worker
+    (see ray library API)
+    """
+    # Read in params used to create trainer
+    config_path = os.path.join(os.path.dirname(save_path), "config.pkl")
+    with open(config_path, "rb") as f:
+        # We use dill (instead of pickle) here because we must deserialize functions
+        config = dill.load(f)
+
+    if not true_num_workers:
+        # Override this param to lower overhead in trainer creation
+        config['training_params']['num_workers'] = 0
+
+    if unit_test:
+        # For the unit testing we update the result directory in order to avoid an error
+        config['results_dir'] = "/Users/runner/work/human_aware_rl/human_aware_rl/human_aware_rl/ppo/results_temp"
+
+    # Get un-trained trainer object with proper config
+    trainer = gen_trainer_from_params(config)
+
+    # Load weights into dummy object
+    trainer.restore(save_path)
+    return trainer
+
 def get_agent_from_trainer(trainer, policy_id="ppo", agent_index=0):
     policy = trainer.get_policy(policy_id)
     dummy_env = trainer.env_creator(trainer.config['env_config'])

From 1ea9cd676cdfbb5383458ebdb538c586907ef768 Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein
 <49325191+alexlichtenstein@users.noreply.github.com>
Date: Fri, 16 Sep 2022 01:28:28 +0200
Subject: [PATCH 31/38] logging check

---
 human_aware_rl/ppo/ppo_rllib_from_params_client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/human_aware_rl/ppo/ppo_rllib_from_params_client.py b/human_aware_rl/ppo/ppo_rllib_from_params_client.py
index e74ec0de..29eaa8b5 100644
--- a/human_aware_rl/ppo/ppo_rllib_from_params_client.py
+++ b/human_aware_rl/ppo/ppo_rllib_from_params_client.py
@@ -32,7 +32,7 @@
 from ray.rllib.models import ModelCatalog
 from ray.rllib.agents.ppo.ppo import PPOTrainer
 from human_aware_rl.ppo.ppo_rllib import RllibPPOModel, RllibLSTMPPOModel
-from human_aware_rl.rllib.rllib import OvercookedMultiAgent, save_trainer, gen_trainer_from_params, load_trainer, load_trainer_unittest
+from human_aware_rl.rllib.rllib import OvercookedMultiAgent, save_trainer, gen_trainer_from_params, load_trainer
 from human_aware_rl.imitation.behavior_cloning_tf2 import BehaviorCloningPolicy, BC_SAVE_DIR
 
 

From e14105273e238c5bc9abc2ff7bcef3b379d49d2e Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein
 <49325191+alexlichtenstein@users.noreply.github.com>
Date: Fri, 16 Sep 2022 01:40:14 +0200
Subject: [PATCH 32/38] logging check

---
 human_aware_rl/ppo/ppo_rllib_from_params_client.py | 7 +------
 human_aware_rl/ppo/ppo_rllib_test.py               | 1 -
 human_aware_rl/rllib/rllib.py                      | 4 ++--
 3 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/human_aware_rl/ppo/ppo_rllib_from_params_client.py b/human_aware_rl/ppo/ppo_rllib_from_params_client.py
index 29eaa8b5..c150f35d 100644
--- a/human_aware_rl/ppo/ppo_rllib_from_params_client.py
+++ b/human_aware_rl/ppo/ppo_rllib_from_params_client.py
@@ -384,14 +384,9 @@ def run(params):
 
     # Check if any resume checkpoint given
     saved_path = params["resume_checkpoint_path"]
-    # Check if we load from unit test
-    unit_test = params["unit_test"]
 
     if saved_path:
-        if unit_test:
-            trainer = load_trainer(save_path=saved_path, true_num_workers=True, unit_test=True)
-        else:
-            trainer = load_trainer(save_path=saved_path, true_num_workers=True)
+        trainer = load_trainer(save_path=saved_path, true_num_workers=True)
     else:
         # Retrieve the tune.Trainable object that is used for the experiment
         trainer = gen_trainer_from_params(params)
diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py
index db647a90..2a9bdf4c 100644
--- a/human_aware_rl/ppo/ppo_rllib_test.py
+++ b/human_aware_rl/ppo/ppo_rllib_test.py
@@ -294,7 +294,6 @@ def test_resume_functionality(self):
                 "resume_checkpoint_path": load_path,
                 "verbose": False,
                 "evaluation_display": False,
-                "unit_test": True
             },
             options={'--loglevel': 'ERROR'}
         ).result
diff --git a/human_aware_rl/rllib/rllib.py b/human_aware_rl/rllib/rllib.py
index 550ec521..b04da1b6 100644
--- a/human_aware_rl/rllib/rllib.py
+++ b/human_aware_rl/rllib/rllib.py
@@ -649,7 +649,7 @@ def load_trainer(save_path, true_num_workers=False):
     trainer.restore(save_path)
     return trainer
 
-def load_trainer(save_path, true_num_workers=False, unit_test=False):
+def load_trainer(save_path, true_num_workers=False):
     """
     Returns a ray compatible trainer object that was previously saved at `save_path` by a call to `save_trainer`
     Note that `save_path` is the full path to the checkpoint FILE, not the checkpoint directory
@@ -667,7 +667,7 @@ def load_trainer(save_path, true_num_workers=False, unit_test=False):
         # Override this param to lower overhead in trainer creation
         config['training_params']['num_workers'] = 0
 
-    if unit_test:
+    if "trained_example" in save_path:
         # For the unit testing we update the result directory in order to avoid an error
         config['results_dir'] = "/Users/runner/work/human_aware_rl/human_aware_rl/human_aware_rl/ppo/results_temp"
 

From cda439037665b42c810e977036ea10ce17462364 Mon Sep 17 00:00:00 2001
From: Alexander Lichtenstein
 <49325191+alexlichtenstein@users.noreply.github.com>
Date: Fri, 16 Sep 2022 01:49:43 +0200
Subject: [PATCH 33/38] fix logging for unit test

---
 human_aware_rl/ppo/ppo_rllib_test.py | 16 ++++++----------
 human_aware_rl/rllib/rllib.py        |  3 ---
 run_tests.sh                         | 28 ++++++++++++++--------------
 3 files changed, 20 insertions(+), 27 deletions(-)

diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py
index 2a9bdf4c..00749e45 100644
--- a/human_aware_rl/ppo/ppo_rllib_test.py
+++ b/human_aware_rl/ppo/ppo_rllib_test.py
@@ -280,12 +280,8 @@ def test_ppo_bc(self):
             self.assertDictEqual(results, self.expected['test_ppo_bc'])
 
     def test_resume_functionality(self):
-
         load_path = os.path.join(os.path.abspath('.'), 'trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500')
-        print(load_path)
         # Load and train an agent for another iteration
-        os.environ["TUNE_DISABLE_AUTO_CALLBACK_LOGGERS"] = "1"
-
         results = ex_fp.run(
             config_updates={
                 "results_dir": self.temp_results_dir,
@@ -330,12 +326,12 @@ def _clear_pickle():
         _clear_pickle()
 
     suite = unittest.TestSuite()
-    # suite.addTest(TestPPORllib('test_save_load', **args))
-    # suite.addTest(TestPPORllib('test_ppo_sp_no_phi', **args))
-    # suite.addTest(TestPPORllib('test_ppo_sp_yes_phi', **args))
-    # suite.addTest(TestPPORllib('test_ppo_fp_sp_no_phi', **args))
-    # suite.addTest(TestPPORllib('test_ppo_fp_sp_yes_phi', **args))
-    # suite.addTest(TestPPORllib('test_ppo_bc', **args))
+    suite.addTest(TestPPORllib('test_save_load', **args))
+    suite.addTest(TestPPORllib('test_ppo_sp_no_phi', **args))
+    suite.addTest(TestPPORllib('test_ppo_sp_yes_phi', **args))
+    suite.addTest(TestPPORllib('test_ppo_fp_sp_no_phi', **args))
+    suite.addTest(TestPPORllib('test_ppo_fp_sp_yes_phi', **args))
+    suite.addTest(TestPPORllib('test_ppo_bc', **args))
     suite.addTest(TestPPORllib('test_resume_functionality', **args))
 
     success = unittest.TextTestRunner(verbosity=2).run(suite).wasSuccessful()
diff --git a/human_aware_rl/rllib/rllib.py b/human_aware_rl/rllib/rllib.py
index b04da1b6..228fceff 100644
--- a/human_aware_rl/rllib/rllib.py
+++ b/human_aware_rl/rllib/rllib.py
@@ -515,7 +515,6 @@ def gen_trainer_from_params(params):
         ray.init(**init_params)
     register_env("overcooked_multi_agent", params['ray_params']['env_creator'])
     ModelCatalog.register_custom_model(params['ray_params']['custom_model_id'], params['ray_params']['custom_model_cls'])
-    print(params)
     # Parse params
     model_params = params['model_params']
     training_params = params['training_params']
@@ -551,14 +550,12 @@ def custom_logger_creator(config):
                 """Creates a Unified logger that stores results in <params['results_dir']>/<params["experiment_name"]>_<seed>_<timestamp>
                 """
                 results_dir = params['results_dir']
-                print(results_dir)
                 if not os.path.exists(results_dir):
                     try:
                         os.makedirs(results_dir)
                     except Exception as e:
                         print("error creating custom logging dir. Falling back to default logdir {}".format(DEFAULT_RESULTS_DIR))
                         results_dir = DEFAULT_RESULTS_DIR
-                        print(results_dir)
                 logdir = tempfile.mkdtemp(
                     prefix=logdir_prefix, dir=results_dir)
                 logger = UnifiedLogger(config, logdir, loggers=None)
diff --git a/run_tests.sh b/run_tests.sh
index 1ad4801e..9f264a50 100755
--- a/run_tests.sh
+++ b/run_tests.sh
@@ -5,20 +5,20 @@ cd ./human_aware_rl
 # Create a dummy data_dir.py if the file does not already exist
 [ ! -f data_dir.py ] && echo "import os; DATA_DIR = os.path.abspath('.')" >> data_dir.py
 #
-## Human data tests
-#cd ./human
-#python tests.py
-#cd ..
-#
-## BC tests
-#cd ./imitation
-#python behavior_cloning_tf2_test.py
-#cd ..
-#
-## rllib tests
-#cd ./rllib
-#python tests.py
-#cd ..
+# Human data tests
+cd ./human
+python tests.py
+cd ..
+
+# BC tests
+cd ./imitation
+python behavior_cloning_tf2_test.py
+cd ..
+
+# rllib tests
+cd ./rllib
+python tests.py
+cd ..
 
 # PPO tests
 cd ./ppo

From d80da4298a1f5b16e37e8142dcab51c715842cd0 Mon Sep 17 00:00:00 2001
From: jyan1999 <jyan19991112@gmail.com>
Date: Tue, 4 Oct 2022 22:18:45 -0700
Subject: [PATCH 34/38] Setup improvement Removed install.sh Added an
 __init__.py file for human_aware_rl directory so it can be properly
 recognized as a module Updated workflow file to simplify setup and added pip
 cache action Included a requirements.txt file for caching github workflow,
 installation can now be done via pip install -r requirements.txt Updated test
 suite to allow unittest discovery (though it is pretty limited. More
 comprehensive discovery requires nontrivial refactoring) Included temporary
 fix to increase tests stability by setting sgd-size = training-size (it
 doesn't completely solve the issue, but does significantly increase
 stability) Updated the README to reflect the changes and fixed some errors

---
 .github/workflows/python-app.yml              |  39 ++--
 README.md                                     |  37 ++-
 human_aware_rl/__init__.py                    |   0
 human_aware_rl/human/tests.py                 | 127 +++++++----
 .../imitation/behavior_cloning_tf2_test.py    | 165 ++++++++------
 human_aware_rl/ppo/ppo_rllib_test.py          | 210 +++++++++---------
 human_aware_rl/rllib/tests.py                 |  84 ++++---
 install.sh                                    |  23 --
 requirements.txt                              |  17 ++
 run_tests.sh                                  |  24 +-
 setup.py                                      |  43 ++--
 11 files changed, 432 insertions(+), 337 deletions(-)
 create mode 100644 human_aware_rl/__init__.py
 delete mode 100755 install.sh
 create mode 100644 requirements.txt

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index 9c9339ac..f3f3b03a 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -8,31 +8,20 @@ on:
 
 jobs:
   build_osx:
-
     runs-on: macos-latest
 
     steps:
-    - uses: actions/checkout@v2
-      with:
-        submodules: true
-    - name: Python + Conda setup
-      uses: conda-incubator/setup-miniconda@v2
-      with:
-        python-version: 3.7
-    - name: Install dependencies
-      run: |
-        conda init bash
-        source ~/.bash_profile
-        conda create -n harl python=3.7
-        conda activate harl
-        python -m pip install --upgrade pip
-        ./install.sh
-        pip install tensorflow==2.0.2
-    - name: Test with unittest
-      run: |
-        conda init bash
-        source ~/.bash_profile
-        conda activate harl
-        sudo chmod 777 ./run_tests.sh
-        ./run_tests.sh
-    
+      - uses: actions/checkout@v2
+        with:
+          submodules: true
+      - name: Python setup #removed conda setup
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.7
+          cache: "pip"
+      - name: Installing dependencies
+        run: pip install -r requirements.txt
+      - name: Test with unittest
+        run: |
+          sudo chmod 777 ./run_tests.sh
+          ./run_tests.sh
diff --git a/README.md b/README.md
index a0148c65..0aa25938 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,9 @@ $ git clone --single-branch --branch BRANCH_NAME --recursive https://github.com/
 
 
 ## CUDA 10.0 Installation on Ubuntu 18.04
+
+**Note:**: The CUDA installation is **_NOT REQUIRED_**, feel free to skip this section if you are only planning on running on CPUs
+
 For Ubuntu 18.04, follow the direction [here](https://www.pugetsystems.com/labs/hpc/How-To-Install-CUDA-10-together-with-9-2-on-Ubuntu-18-04-with-support-for-NVIDIA-20XX-Turing-GPUs-1236/)
 
 The only difference being the very last step. 
@@ -49,6 +52,8 @@ $ sudo apt-get install cuda-10-0
 
 ## Conda Environment Setup
 
+While not strictly required, creating a conda environment simplifies the setup and can help avoid dependency conflicts
+
 Create a new conda environment and run the install script as before
 
 [Optional Conda Installation for 18.04](https://www.digitalocean.com/community/tutorials/how-to-install-the-anaconda-python-distribution-on-ubuntu-18-04)
@@ -56,14 +61,10 @@ Create a new conda environment and run the install script as before
 ```bash
 $ conda create -n harl_rllib python=3.7
 $ conda activate harl_rllib
-(harl_rllib) $ ./install.sh
+(harl_rllib) $ pip install -r requirements.txt
 ```
 
-Finally, install the latest stable version of tensorflow compatible with rllib
-```bash
-(harl_rllib) $ pip install tensorflow==2.0.2
-```
-Or, if working with gpus, install a version of tensorflow 2.*.* and cuDNN that is compatible with the available Cuda drivers. The following example works for Cuda 10.0.0. You can verify what version of Cuda is installed by running `nvcc --version`. For a full list of driver compatibility, refer [here](https://www.tensorflow.org/install/source#gpu)
+If working with gpus, install a version of tensorflow 2.*.* and cuDNN that is compatible with the available Cuda drivers. The following example works for Cuda 10.0.0. You can verify what version of Cuda is installed by running `nvcc --version`. For a full list of driver compatibility, refer [here](https://www.tensorflow.org/install/source#gpu)
 ```bash
 (harl_rllib) $ pip install tensorflow-gpu==2.0.0
 (harl_rllib) $ conda install -c anaconda cudnn=7.6.0
@@ -81,6 +82,10 @@ Note: if you ever get an import error, please first check if you activated the c
 
 If set-up was successful, all unit tests and local reproducibility tests should pass. They can be run as follows
 
+**NOTE**: Existing tests **_DOES NOT_** guarantee reproducibility. It is an known issue with version of ray\[rllib\] in use, and we are working on updating to the newest version, which should solve this problem. As a temporary fix, setting sgd-minibatch-size = training-batch-size increases stability
+
+Due to the randomess there is a slight chance that some tests can fail intermittently by not getting the expected total reward. This is an unlikely scenario and can usually be fixed by rerunning the test. 
+
 You can run all the tests with 
 ```bash
 (harl_rllib) $ ./run_tests.sh
@@ -92,20 +97,32 @@ Highest level integration tests that combine self play, bc training, and ppo_bc
 (harl_rllib) $ cd human_aware_rl/ppo
 (harl_rllib) human_aware_rl/ppo $ python ppo_rllib_test.py
 ```
+or 
+```bash
+(harl_rllib) $ python -m unittest human_aware_rl.ppo.ppo_rllib_test
+```
 
 ## BC Tests
 All tests involving creation, training, and saving of bc models. No dependency on rllib
+There are 2 test classes depending on whether the model is trained with LSTM, the run_tests.sh file by default only tests model without LSTM 
 ```bash
 (harl_rllib) $ cd imitation
-(harl_rllib) imitation $ python behavior_cloning_tf2_test.py
+(harl_rllib) imitation $ python behavior_cloning_tf2_test.py TestBCTraining
+```
+or 
+```bash
+(harl_rllib) $ python -m unittest human_aware_rl.imitation.behavior_cloning_tf2_test.TestBCTraining
 ```
-
 ## Rllib Tests
 Tests rllib environments and models, as well as various utility functions. Does not actually test rllib training
 ```bash
 (harl_rllib) $ cd rllib
 (harl_rllib) rllib $ python tests.py
 ```
+or 
+```bash
+(harl_rllib) $ python -m unittest human_aware_rl.rllib.tests
+```
 
 You should see all tests passing. 
 
@@ -215,11 +232,13 @@ ModuleNotFoundError: No module named 'human_aware_rl.data_dir'
 , please run 
 
 ```
-./run_tests.sh
+pip install -r requirements.txt
 ``` 
 
 to initiate those variables
 
+The reason this is needed is because code files refer to the subdirectories as modules, and we decided to use pip to automatically add the submodules paths.  This command invokes the `setup.py` file, which looks for packages in the _human_aware_rl_ directory through the `find_packages()` call, and register modules found so they can be referrenced. 
+
 # Reproducing Results
 
 The specific results in that paper were obtained using code that is no longer in the master branch. If you are interested in reproducing results, please check out [this](https://github.com/HumanCompatibleAI/human_aware_rl/tree/neurips2019) and follow the install instructions there.
diff --git a/human_aware_rl/__init__.py b/human_aware_rl/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/human_aware_rl/human/tests.py b/human_aware_rl/human/tests.py
index bb5a57d0..58f0cfaf 100644
--- a/human_aware_rl/human/tests.py
+++ b/human_aware_rl/human/tests.py
@@ -1,41 +1,54 @@
 import unittest, os, shutil
 import numpy as np
 import pickle, copy
+import sys
 from numpy.testing._private.utils import assert_raises
 from overcooked_ai_py.mdp.overcooked_mdp import OvercookedGridworld, OvercookedState
 from overcooked_ai_py.mdp.overcooked_env import OvercookedEnv
 from overcooked_ai_py.agents.agent import AgentPair, GreedyHumanModel
-from overcooked_ai_py.planning.planners import MediumLevelActionManager, NO_COUNTERS_PARAMS
+from overcooked_ai_py.planning.planners import (
+    MediumLevelActionManager,
+    NO_COUNTERS_PARAMS,
+)
 from human_aware_rl.utils import equal_dicts
 
 from human_aware_rl.static import *
-from human_aware_rl.human.process_dataframes import csv_to_df_pickle, get_trajs_from_data
+from human_aware_rl.human.process_dataframes import (
+    csv_to_df_pickle,
+    get_trajs_from_data,
+)
 from human_aware_rl.human.process_human_trials import main as process_human_trials_main
 
+
 class TestProcessDataFrames(unittest.TestCase):
 
-    temp_data_dir = 'this_is_a_temp'
+    temp_data_dir = "this_is_a_temp"
     data_len_2019 = 3546
     data_len_2020 = 1189
 
     base_csv_to_df_params = {
-        "csv_path" : DUMMY_2020_RAW_HUMAN_DATA_PATH,
-        "out_dir" : "this_is_a_temp",
-        "out_file_prefix" : 'unittest',
-        "button_presses_threshold" : 0.25,
-        "perform_train_test_split" : False,
-        "silent" : True
+        "csv_path": DUMMY_2020_RAW_HUMAN_DATA_PATH,
+        "out_dir": "this_is_a_temp",
+        "out_file_prefix": "unittest",
+        "button_presses_threshold": 0.25,
+        "perform_train_test_split": False,
+        "silent": True,
     }
 
     base_get_trajs_from_data_params = {
-        "data_path" : DUMMY_2019_CLEAN_HUMAN_DATA_PATH,
-        "featurize_states" : False,
-        "check_trajectories" : False,
-        "silent" : True,
-        "layouts" : ['cramped_room']
+        "data_path": DUMMY_2019_CLEAN_HUMAN_DATA_PATH,
+        "featurize_states": False,
+        "check_trajectories": False,
+        "silent": True,
+        "layouts": ["cramped_room"],
     }
 
     def setUp(self):
+        print(
+            "\nIn Class {}, in Method {}".format(
+                self.__class__.__name__, self._testMethodName
+            )
+        )
         if not os.path.exists(self.temp_data_dir):
             os.makedirs(self.temp_data_dir)
 
@@ -44,28 +57,27 @@ def tearDown(self):
 
     def test_csv_to_df_pickle_2019(self):
         params = copy.deepcopy(self.base_csv_to_df_params)
-        params['csv_path'] = DUMMY_2019_RAW_HUMAN_DATA_PATH
-        params['button_presses_threshold'] = 0.0
+        params["csv_path"] = DUMMY_2019_RAW_HUMAN_DATA_PATH
+        params["button_presses_threshold"] = 0.0
         data = csv_to_df_pickle(**params)
         self.assertEqual(len(data), self.data_len_2019)
 
         params = copy.deepcopy(self.base_csv_to_df_params)
-        params['csv_path'] = DUMMY_2019_RAW_HUMAN_DATA_PATH
-        params['button_presses_threshold'] = 0.7
+        params["csv_path"] = DUMMY_2019_RAW_HUMAN_DATA_PATH
+        params["button_presses_threshold"] = 0.7
         data = csv_to_df_pickle(**params)
         self.assertLess(len(data), self.data_len_2019)
 
     def test_csv_to_df_pickle_2020(self):
         params = copy.deepcopy(self.base_csv_to_df_params)
-        params['button_presses_threshold'] = 0.0
+        params["button_presses_threshold"] = 0.0
         data = csv_to_df_pickle(**params)
         self.assertEqual(len(data), self.data_len_2020)
 
         params = copy.deepcopy(self.base_csv_to_df_params)
-        params['button_presses_threshold'] = 0.7
+        params["button_presses_threshold"] = 0.7
         data = csv_to_df_pickle(**params)
         self.assertLess(len(data), self.data_len_2020)
-        
 
     def test_csv_to_df_pickle(self):
         # Try various button thresholds (hand-picked to lie between different values for dummy data games)
@@ -74,17 +86,17 @@ def test_csv_to_df_pickle(self):
         for threshold in button_thresholds:
             # dummy dataset is too small to partion so we set train_test_split=False
             params = copy.deepcopy(self.base_csv_to_df_params)
-            params['button_presses_threshold'] = threshold
+            params["button_presses_threshold"] = threshold
             data = csv_to_df_pickle(**params)
             lengths.append(len(data))
-        
+
         # Filtered data size should be monotonically decreasing wrt button_threshold
         for i in range(len(lengths) - 1):
-            self.assertGreaterEqual(lengths[i], lengths[i+1])
+            self.assertGreaterEqual(lengths[i], lengths[i + 1])
 
         # Picking a threshold that's suficiently high discards all data, should result in value error
         params = copy.deepcopy(self.base_csv_to_df_params)
-        params['button_presses_threshold'] = 0.8
+        params["button_presses_threshold"] = 0.8
         self.assertRaises(ValueError, csv_to_df_pickle, **params)
 
     def test_get_trajs_from_data_2019(self):
@@ -93,22 +105,22 @@ def test_get_trajs_from_data_2019(self):
 
     def test_get_trajs_from_data_2019_featurize(self):
         params = copy.deepcopy(self.base_get_trajs_from_data_params)
-        params['featurize_states'] = True
+        params["featurize_states"] = True
         trajectories, _ = get_trajs_from_data(**params)
 
     def test_get_trajs_from_data_2020(self):
         # Ensure we can properly deserialize states with updated objects (i.e tomatoes)
         params = copy.deepcopy(self.base_get_trajs_from_data_params)
-        params['layouts'] = ['inverse_marshmallow_experiment']
-        params['data_path'] = DUMMY_2020_CLEAN_HUMAN_DATA_PATH
+        params["layouts"] = ["inverse_marshmallow_experiment"]
+        params["data_path"] = DUMMY_2020_CLEAN_HUMAN_DATA_PATH
         trajectories, _ = get_trajs_from_data(**params)
 
     def test_get_trajs_from_data_2020_featurize(self):
         # Ensure we can properly featurize states with updated dynamics and updated objects (i.e tomatoes)
         params = copy.deepcopy(self.base_get_trajs_from_data_params)
-        params['layouts'] = ['inverse_marshmallow_experiment']
-        params['data_path'] = DUMMY_2020_CLEAN_HUMAN_DATA_PATH
-        params['featurize_states'] = True
+        params["layouts"] = ["inverse_marshmallow_experiment"]
+        params["data_path"] = DUMMY_2020_CLEAN_HUMAN_DATA_PATH
+        params["featurize_states"] = True
         trajectories, _ = get_trajs_from_data(**params)
 
     def test_csv_to_df_to_trajs_integration(self):
@@ -117,33 +129,53 @@ def test_csv_to_df_to_trajs_integration(self):
         _ = csv_to_df_pickle(**params)
 
         params = copy.deepcopy(self.base_get_trajs_from_data_params)
-        params['data_path'] = os.path.join(self.temp_data_dir, 'unittest_all.pickle')
-        params['layouts'] = ['inverse_marshmallow_experiment']
+        params["data_path"] = os.path.join(self.temp_data_dir, "unittest_all.pickle")
+        params["layouts"] = ["inverse_marshmallow_experiment"]
         _ = get_trajs_from_data(**params)
 
+
 class TestHumanDataConversion(unittest.TestCase):
 
-    temp_dir = 'this_is_also_a_temp'
+    temp_dir = "this_is_also_a_temp"
     infile = DUMMY_2019_CLEAN_HUMAN_DATA_PATH
     horizon = 400
     DATA_TYPE = "train"
     layout_name = "cramped_room"
 
     def _equal_pickle_and_env_state_dict(self, pickle_state_dict, env_state_dict):
-        return equal_dicts(pickle_state_dict, env_state_dict, ['timestep', 'all_orders', 'bonus_orders'])
+        return equal_dicts(
+            pickle_state_dict,
+            env_state_dict,
+            ["timestep", "all_orders", "bonus_orders"],
+        )
 
     def setUp(self):
+        print(
+            "\nIn Class {}, in Method {}".format(
+                self.__class__.__name__, self._testMethodName
+            )
+        )
         if not os.path.exists(self.temp_dir):
             os.makedirs(self.temp_dir)
-        
+
         self.base_mdp = OvercookedGridworld.from_layout_name(self.layout_name)
-        self.mlam = MediumLevelActionManager.from_pickle_or_compute(self.base_mdp, NO_COUNTERS_PARAMS,
-                                                                    force_compute=True, info=False)
-        self.env = OvercookedEnv.from_mdp(self.base_mdp, horizon=self.horizon, info_level=0)
+        self.mlam = MediumLevelActionManager.from_pickle_or_compute(
+            self.base_mdp, NO_COUNTERS_PARAMS, force_compute=True, info=False
+        )
+        self.env = OvercookedEnv.from_mdp(
+            self.base_mdp, horizon=self.horizon, info_level=0
+        )
         self.starting_state_dict = self.base_mdp.get_standard_start_state().to_dict()
 
-        outfile = process_human_trials_main(self.infile, self.temp_dir, insert_interacts=True, verbose=False,  forward_port=False, fix_json=False)
-        with open(outfile, 'rb') as f:
+        outfile = process_human_trials_main(
+            self.infile,
+            self.temp_dir,
+            insert_interacts=True,
+            verbose=False,
+            forward_port=False,
+            fix_json=False,
+        )
+        with open(outfile, "rb") as f:
             self.human_data = pickle.load(f)[self.layout_name]
 
     def tearDown(self):
@@ -156,15 +188,16 @@ def test_state(self):
                 self.env.reset()
             else:
                 self.assertTrue(
-                    self._equal_pickle_and_env_state_dict(state_dict, self.env.state.to_dict()),
+                    self._equal_pickle_and_env_state_dict(
+                        state_dict, self.env.state.to_dict()
+                    ),
                     "Expected state:\t\n{}\n\nActual state:\t\n{}".format(
-                        self.env.state.to_dict(),
-                        state_dict
-                    )
+                        self.env.state.to_dict(), state_dict
+                    ),
                 )
             self.env.step(joint_action=joint_action)
             idx += 1
 
 
-if __name__ == '__main__':
-    unittest.main()
\ No newline at end of file
+if __name__ == "__main__":
+    unittest.main()
diff --git a/human_aware_rl/imitation/behavior_cloning_tf2_test.py b/human_aware_rl/imitation/behavior_cloning_tf2_test.py
index f82085bc..e3a132a0 100644
--- a/human_aware_rl/imitation/behavior_cloning_tf2_test.py
+++ b/human_aware_rl/imitation/behavior_cloning_tf2_test.py
@@ -1,10 +1,26 @@
-import unittest, os, shutil, copy, pickle, random, argparse, sys
+import unittest, os, shutil, warnings, pickle, argparse, sys
 import numpy as np
 import tensorflow as tf
 from human_aware_rl.utils import set_global_seed
-from human_aware_rl.imitation.behavior_cloning_tf2 import BC_SAVE_DIR, get_bc_params, train_bc_model, build_bc_model, save_bc_model, load_bc_model, evaluate_bc_model
+from human_aware_rl.imitation.behavior_cloning_tf2 import (
+    BC_SAVE_DIR,
+    get_bc_params,
+    train_bc_model,
+    build_bc_model,
+    save_bc_model,
+    load_bc_model,
+    evaluate_bc_model,
+)
 from human_aware_rl.human.process_dataframes import get_trajs_from_data
-from human_aware_rl.static import BC_EXPECTED_DATA_PATH, DUMMY_2019_CLEAN_HUMAN_DATA_PATH
+from human_aware_rl.static import (
+    BC_EXPECTED_DATA_PATH,
+    DUMMY_2019_CLEAN_HUMAN_DATA_PATH,
+)
+
+
+def _clear_pickle():
+    with open(BC_EXPECTED_DATA_PATH, "wb") as f:
+        pickle.dump({}, f)
 
 
 class TestBCTraining(unittest.TestCase):
@@ -19,15 +35,31 @@ class TestBCTraining(unittest.TestCase):
     Note, this test always performs a basic sanity check to verify some learning is happening, even if the `strict` param is false
     """
 
-    def __init__(self, test_name, compute_pickle, strict, min_performance, **kwargs):
+    def __init__(self, test_name):
         super(TestBCTraining, self).__init__(test_name)
-        self.compute_pickle = compute_pickle
-        self.strict = strict
-        self.min_performance = min_performance
-    
+        self.compute_pickle = False
+        self.strict = False
+        self.min_performance = 0
+        assert not (
+            self.compute_pickle and self.strict
+        ), "Cannot compute pickle and run strict reproducibility tests at same time"
+        if self.compute_pickle:
+            _clear_pickle()
+
     def setUp(self):
         set_global_seed(0)
-        self.bc_params = get_bc_params(**{"data_path" : DUMMY_2019_CLEAN_HUMAN_DATA_PATH})
+        print(
+            "\nIn Class {}, in Method {}".format(
+                self.__class__.__name__, self._testMethodName
+            )
+        )
+        # unittest generates a lot of warning msgs due to third-party dependencies (e.g. ray[rllib] using outdated np methods)
+        # not a problem when directly ran, but when using -m unittest this helps filter out the warnings
+        warnings.simplefilter("ignore", ResourceWarning)
+        warnings.simplefilter("ignore", DeprecationWarning)
+        self.bc_params = get_bc_params(
+            **{"data_path": DUMMY_2019_CLEAN_HUMAN_DATA_PATH}
+        )
         self.bc_params["mdp_params"]["layout_name"] = "cramped_room"
         self.bc_params["training_params"]["epochs"] = 1
         self.model_dir = os.path.join(BC_SAVE_DIR, "test_model")
@@ -35,45 +67,57 @@ def setUp(self):
         if not os.path.exists(self.model_dir):
             os.makedirs(self.model_dir)
 
-        processed_trajs, _ = get_trajs_from_data(**self.bc_params["data_params"], silent=True)
+        processed_trajs, _ = get_trajs_from_data(
+            **self.bc_params["data_params"], silent=True
+        )
         self.dummy_input = np.vstack(processed_trajs["ep_states"])[:1, :]
-        self.initial_states = [np.zeros((1, self.bc_params['cell_size'])), np.zeros((1, self.bc_params['cell_size']))]
+        self.initial_states = [
+            np.zeros((1, self.bc_params["cell_size"])),
+            np.zeros((1, self.bc_params["cell_size"])),
+        ]
         with open(BC_EXPECTED_DATA_PATH, "rb") as f:
             self.expected = pickle.load(f)
 
         # Disable TF warnings and infos
-        tf.get_logger().setLevel('ERROR')
+        tf.get_logger().setLevel("ERROR")
 
     def tearDown(self):
         if self.compute_pickle:
-            with open(BC_EXPECTED_DATA_PATH, 'wb') as f:
+            with open(BC_EXPECTED_DATA_PATH, "wb") as f:
                 pickle.dump(self.expected, f)
 
         shutil.rmtree(self.model_dir)
 
     def test_model_construction(self):
         model = build_bc_model(**self.bc_params)
-        
+
         if self.compute_pickle:
-            self.expected['test_model_construction'] = model(self.dummy_input)
+            self.expected["test_model_construction"] = model(self.dummy_input)
         if self.strict:
-            self.assertTrue(np.allclose(model(self.dummy_input), self.expected["test_model_construction"]))
+            self.assertTrue(
+                np.allclose(
+                    model(self.dummy_input), self.expected["test_model_construction"]
+                )
+            )
 
     def test_save_and_load(self):
         model = build_bc_model(**self.bc_params)
         save_bc_model(self.model_dir, model, self.bc_params)
         loaded_model, loaded_params = load_bc_model(self.model_dir)
         self.assertDictEqual(self.bc_params, loaded_params)
-        self.assertTrue(np.allclose(model(self.dummy_input), loaded_model(self.dummy_input)))
-
+        self.assertTrue(
+            np.allclose(model(self.dummy_input), loaded_model(self.dummy_input))
+        )
 
-    def test_training(self):        
+    def test_training(self):
         model = train_bc_model(self.model_dir, self.bc_params)
 
         if self.compute_pickle:
-            self.expected['test_training'] = model(self.dummy_input)
+            self.expected["test_training"] = model(self.dummy_input)
         if self.strict:
-            self.assertTrue(np.allclose(model(self.dummy_input), self.expected["test_training"]))
+            self.assertTrue(
+                np.allclose(model(self.dummy_input), self.expected["test_training"])
+            )
 
     def test_agent_evaluation(self):
         self.bc_params["training_params"]["epochs"] = 20
@@ -84,30 +128,41 @@ def test_agent_evaluation(self):
         self.assertGreaterEqual(results, self.min_performance)
 
         if self.compute_pickle:
-            self.expected['test_agent_evaluation'] = results
+            self.expected["test_agent_evaluation"] = results
         if self.strict:
-            self.assertAlmostEqual(results, self.expected['test_agent_evaluation'])
+            self.assertAlmostEqual(results, self.expected["test_agent_evaluation"])
+
 
+class TestBCTrainingLSTM(TestBCTraining):
+    # LSTM tests break on older versions of tensorflow so be careful with this
     def test_lstm_construction(self):
-        self.bc_params['use_lstm'] = True
+        self.bc_params["use_lstm"] = True
         model = build_bc_model(**self.bc_params)
 
         if self.compute_pickle:
-            self.expected['test_lstm_construction'] = model(self.dummy_input)
+            self.expected["test_lstm_construction"] = model(self.dummy_input)
         if self.strict:
-            self.assertTrue(np.allclose(model(self.dummy_input), self.expected["test_lstm_construction"]))
+            self.assertTrue(
+                np.allclose(
+                    model(self.dummy_input), self.expected["test_lstm_construction"]
+                )
+            )
 
     def test_lstm_training(self):
-        self.bc_params['use_lstm'] = True
+        self.bc_params["use_lstm"] = True
         model = train_bc_model(self.model_dir, self.bc_params)
 
         if self.compute_pickle:
-            self.expected['test_lstm_training'] = model(self.dummy_input)
+            self.expected["test_lstm_training"] = model(self.dummy_input)
         if self.strict:
-            self.assertTrue(np.allclose(model(self.dummy_input), self.expected["test_lstm_training"]))
+            self.assertTrue(
+                np.allclose(
+                    model(self.dummy_input), self.expected["test_lstm_training"]
+                )
+            )
 
     def test_lstm_evaluation(self):
-        self.bc_params['use_lstm'] = True
+        self.bc_params["use_lstm"] = True
         self.bc_params["training_params"]["epochs"] = 1
         model = train_bc_model(self.model_dir, self.bc_params)
         results = evaluate_bc_model(model, self.bc_params)
@@ -116,17 +171,22 @@ def test_lstm_evaluation(self):
         self.assertGreaterEqual(results, self.min_performance)
 
         if self.compute_pickle:
-            self.expected['test_lstm_evaluation'] = results
+            self.expected["test_lstm_evaluation"] = results
         if self.strict:
-            self.assertAlmostEqual(results, self.expected['test_lstm_evaluation'])
+            self.assertAlmostEqual(results, self.expected["test_lstm_evaluation"])
 
     def test_lstm_save_and_load(self):
-        self.bc_params['use_lstm'] = True
+        self.bc_params["use_lstm"] = True
         model = build_bc_model(**self.bc_params)
         save_bc_model(self.model_dir, model, self.bc_params)
         loaded_model, loaded_params = load_bc_model(self.model_dir)
         self.assertDictEqual(self.bc_params, loaded_params)
-        self.assertTrue(np.allclose(self._lstm_forward(model, self.dummy_input)[0], self._lstm_forward(loaded_model, self.dummy_input)[0]))
+        self.assertTrue(
+            np.allclose(
+                self._lstm_forward(model, self.dummy_input)[0],
+                self._lstm_forward(loaded_model, self.dummy_input)[0],
+            )
+        )
 
     def _lstm_forward(self, model, obs_batch, states=None):
         obs_batch = np.expand_dims(obs_batch, 1)
@@ -138,38 +198,5 @@ def _lstm_forward(self, model, obs_batch, states=None):
         return logits, states
 
 
-
-def _clear_pickle():
-    with open(BC_EXPECTED_DATA_PATH, 'wb') as f:
-        pickle.dump({}, f)
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--compute-pickle', '-cp', action="store_true")
-    parser.add_argument('--strict', '-s', action="store_true")
-    parser.add_argument('--min-performance', '-mp', default=0)
-    parser.add_argument('--run-lstm-tests', action="store_true")
-
-    args = vars(parser.parse_args())
-
-    tf_version = tf.__version__
-
-    assert not (args['compute_pickle'] and args['strict']), "Cannot compute pickle and run strict reproducibility tests at same time"
-
-    if args['compute_pickle']:
-        _clear_pickle()
-
-    suite = unittest.TestSuite()
-    suite.addTest(TestBCTraining('test_model_construction', **args))
-    suite.addTest(TestBCTraining('test_save_and_load', **args))
-    suite.addTest(TestBCTraining('test_training', **args))
-    suite.addTest(TestBCTraining('test_agent_evaluation', **args))
-
-    # LSTM tests break on older versions of tensorflow so be careful with this
-    if args['run_lstm_tests']:
-        suite.addTest(TestBCTraining('test_lstm_save_and_load', **args))
-        suite.addTest(TestBCTraining('test_lstm_construction', **args))
-        suite.addTest(TestBCTraining('test_lstm_training', **args))
-        suite.addTest(TestBCTraining('test_lstm_evaluation', **args))
-    success = unittest.TextTestRunner(verbosity=2).run(suite).wasSuccessful()
-    sys.exit(not success)
\ No newline at end of file
+if __name__ == "__main__":
+    unittest.main()
diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py
index 00749e45..30c6923b 100644
--- a/human_aware_rl/ppo/ppo_rllib_test.py
+++ b/human_aware_rl/ppo/ppo_rllib_test.py
@@ -1,5 +1,6 @@
-import unittest, os, shutil, pickle, ray, random, argparse, sys, glob
-os.environ['RUN_ENV'] = 'local'
+import unittest, os, shutil, pickle, ray, random, glob, warnings, os
+
+os.environ["RUN_ENV"] = "local"
 from human_aware_rl.ppo.ppo_rllib_client import ex
 from human_aware_rl.ppo.ppo_rllib_from_params_client import ex_fp
 from human_aware_rl.static import PPO_EXPECTED_DATA_PATH
@@ -19,6 +20,13 @@ def set_global_seed(seed):
     tf.random.set_seed(seed)
     tf.compat.v1.set_random_seed(seed)
 
+
+def _clear_pickle():
+    # Write an empty dictionary to our static "expected" results location
+    with open(PPO_EXPECTED_DATA_PATH, "wb") as f:
+        pickle.dump({}, f)
+
+
 class TestPPORllib(unittest.TestCase):
 
     """
@@ -31,19 +39,37 @@ class TestPPORllib(unittest.TestCase):
     Note, this test always performs a basic sanity check to verify some learning is happening, even if the `strict` param is false
     """
 
-    def __init__(self, test_name, compute_pickle, strict, min_performance):
+    def __init__(self, test_name):
         super(TestPPORllib, self).__init__(test_name)
-        self.compute_pickle = compute_pickle
-        self.strict = strict
-        self.min_performance = min_performance
+        # changing the cwd to where the test file is
+        # default parameters, feel free to change
+        self.compute_pickle = False
+        # Reproducibility test
+        self.strict = False
+        self.min_performance = 5
+        assert not (
+            self.compute_pickle and self.strict
+        ), "Cannot compute pickle and run strict reproducibility tests at same time"
+        if self.compute_pickle:
+            _clear_pickle()
 
     def setUp(self):
         set_global_seed(0)
+        print(
+            "\nIn Class {}, in Method {}".format(
+                self.__class__.__name__, self._testMethodName
+            )
+        )
+        # unittest generates a lot of warning msgs due to third-party dependencies (e.g. ray[rllib] using outdated np methods)
+        # not a problem when directly ran, but when using -m unittest this helps filter out the warnings
+        warnings.simplefilter("ignore", ResourceWarning)
+        warnings.simplefilter("ignore", DeprecationWarning)
 
+        # Setting CWD 
+        os.chdir(os.path.dirname(os.path.abspath(__file__)))
         # Temporary disk space to store logging results from tests
-        self.temp_results_dir = os.path.join(os.path.abspath('.'), 'results_temp')
-        self.temp_model_dir = os.path.join(os.path.abspath('.'), 'model_temp')
-
+        self.temp_results_dir = os.path.join(os.path.abspath("."), "results_temp")
+        self.temp_model_dir = os.path.join(os.path.abspath("."), "model_temp")
 
         # Make all necessary directories
         if not os.path.exists(self.temp_model_dir):
@@ -53,7 +79,7 @@ def setUp(self):
             os.makedirs(self.temp_results_dir)
 
         # Load in expected values (this is an empty dict if compute_pickle=True)
-        with open(PPO_EXPECTED_DATA_PATH, 'rb') as f:
+        with open(PPO_EXPECTED_DATA_PATH, "rb") as f:
             self.expected = pickle.load(f)
 
     def tearDown(self):
@@ -61,7 +87,7 @@ def tearDown(self):
         # Note: This causes unit tests to have a side effect (generally frowned upon) and only works because
         #   unittest is single threaded. If tests were run concurrently this could result in a race condition!
         if self.compute_pickle:
-            with open(PPO_EXPECTED_DATA_PATH, 'wb') as f:
+            with open(PPO_EXPECTED_DATA_PATH, "wb") as f:
                 pickle.dump(self.expected, f)
 
         # Cleanup
@@ -75,8 +101,8 @@ def test_save_load(self):
             config_updates={
                 # Please feel free to modify the parameters below
                 "results_dir": self.temp_results_dir,
-                "experiment_name" : "save_load_test",
-                "layout_name" : "cramped_room",
+                "experiment_name": "save_load_test",
+                "layout_name": "cramped_room",
                 "num_workers": 1,
                 "train_batch_size": 800,
                 "sgd_minibatch_size": 800,
@@ -86,16 +112,20 @@ def test_save_load(self):
                 "entropy_coeff_end": 0.0,
                 "use_phi": False,
                 "evaluation_display": False,
-                "verbose" : False
+                "verbose": False,
             },
-            options={'--loglevel': 'ERROR'}
+            options={"--loglevel": "ERROR"},
         )
 
         # Kill all ray processes to ensure loading works in a vaccuum
         ray.shutdown()
 
         # Where the agent is stored (this is kind of hardcoded, would like for it to be more easily obtainable)
-        load_path = os.path.join(glob.glob(os.path.join(self.temp_results_dir, "save_load_test*"))[0], 'checkpoint_2', 'checkpoint-2')
+        load_path = os.path.join(
+            glob.glob(os.path.join(self.temp_results_dir, "save_load_test*"))[0],
+            "checkpoint_2",
+            "checkpoint-2",
+        )
 
         # Load a dummy state
         mdp = OvercookedGridworld.from_layout_name("cramped_room")
@@ -114,12 +144,13 @@ def test_save_load(self):
 
         # Now let's load an agent pair and evaluate it
         agent_pair = load_agent_pair(load_path)
-        ae = AgentEvaluator.from_layout_name(mdp_params={"layout_name" : "cramped_room"}, env_params={"horizon" : 400})
+        ae = AgentEvaluator.from_layout_name(
+            mdp_params={"layout_name": "cramped_room"}, env_params={"horizon": 400}
+        )
 
         # We assume no runtime errors => success, no performance consistency check for now
         ae.evaluate_agent_pair(agent_pair, 1, info=False)
 
-
     def test_ppo_sp_no_phi(self):
         # Train a self play agent for 20 iterations
         results = ex.run(
@@ -127,7 +158,7 @@ def test_ppo_sp_no_phi(self):
                 # Please feel free to modify the parameters below
                 "results_dir": self.temp_results_dir,
                 "num_workers": 2,
-                "train_batch_size": 1600,
+                "train_batch_size": 800,
                 "sgd_minibatch_size": 800,
                 "num_training_iters": 30,
                 "evaluation_interval": 10,
@@ -135,20 +166,19 @@ def test_ppo_sp_no_phi(self):
                 "entropy_coeff_end": 0.0,
                 "use_phi": False,
                 "evaluation_display": False,
-                "verbose" : False
+                "verbose": False,
             },
-            options={'--loglevel': 'ERROR'}
+            options={"--loglevel": "ERROR"},
         ).result
-
         # Sanity check (make sure it begins to learn to receive dense reward)
-        self.assertGreaterEqual(results['average_total_reward'], self.min_performance)
+        self.assertGreaterEqual(results["average_total_reward"], self.min_performance)
 
         if self.compute_pickle:
-            self.expected['test_ppo_sp_no_phi'] = results
+            self.expected["test_ppo_sp_no_phi"] = results
 
         # Reproducibility test
         if self.strict:
-            self.assertDictEqual(results, self.expected['test_ppo_sp_no_phi'])
+            self.assertDictEqual(results, self.expected["test_ppo_sp_no_phi"])
 
     def test_ppo_sp_yes_phi(self):
         # Train a self play agent for 20 iterations
@@ -157,7 +187,7 @@ def test_ppo_sp_yes_phi(self):
                 # Please feel free to modify the parameters below
                 "results_dir": self.temp_results_dir,
                 "num_workers": 2,
-                "train_batch_size": 1600,
+                "train_batch_size": 800,
                 "sgd_minibatch_size": 800,
                 "num_training_iters": 30,
                 "evaluation_interval": 10,
@@ -165,21 +195,20 @@ def test_ppo_sp_yes_phi(self):
                 "entropy_coeff_end": 0.0,
                 "use_phi": True,
                 "evaluation_display": False,
-                "verbose" : False
+                "verbose": False,
             },
-            options={'--loglevel': 'ERROR'}
+            options={"--loglevel": "ERROR"},
         ).result
 
         # Sanity check (make sure it begins to learn to receive dense reward)
-        self.assertGreaterEqual(results['average_total_reward'], self.min_performance)
+        self.assertGreaterEqual(results["average_total_reward"], self.min_performance)
 
         if self.compute_pickle:
-            self.expected['test_ppo_sp_yes_phi'] = results
+            self.expected["test_ppo_sp_yes_phi"] = results
 
         # Reproducibility test
         if self.strict:
-            self.assertDictEqual(results, self.expected['test_ppo_sp_yes_phi'])
-
+            self.assertDictEqual(results, self.expected["test_ppo_sp_yes_phi"])
 
     def test_ppo_fp_sp_no_phi(self):
         # Train a self play agent for 20 iterations
@@ -198,21 +227,19 @@ def test_ppo_fp_sp_no_phi(self):
                 "seeds": [0],
                 "outer_shape": (5, 4),
                 "evaluation_display": False,
-                "verbose" : False
+                "verbose": False,
             },
-            options={'--loglevel': 'ERROR'}
+            options={"--loglevel": "ERROR"},
         ).result
-
         # Sanity check (make sure it begins to learn to receive dense reward)
-        self.assertGreaterEqual(results['average_total_reward'], self.min_performance)
+        self.assertGreaterEqual(results["average_total_reward"], self.min_performance)
 
         if self.compute_pickle:
-            self.expected['test_ppo_fp_sp_no_phi'] = results
+            self.expected["test_ppo_fp_sp_no_phi"] = results
 
         # Reproducibility test
         if self.strict:
-            self.assertDictEqual(results, self.expected['test_ppo_fp_sp_no_phi'])
-
+            self.assertDictEqual(results, self.expected["test_ppo_fp_sp_no_phi"])
 
     def test_ppo_fp_sp_yes_phi(self):
         # Train a self play agent for 20 iterations
@@ -231,56 +258,59 @@ def test_ppo_fp_sp_yes_phi(self):
                 "seeds": [0],
                 "outer_shape": (5, 4),
                 "evaluation_display": False,
-                "verbose" : False
+                "verbose": False,
             },
-            options={'--loglevel': 'ERROR'}
+            options={"--loglevel": "ERROR"},
         ).result
 
         # Sanity check (make sure it begins to learn to receive dense reward)
-        self.assertGreaterEqual(results['average_total_reward'], self.min_performance)
+        self.assertGreaterEqual(results["average_total_reward"], self.min_performance)
 
         if self.compute_pickle:
-            self.expected['test_ppo_fp_sp_yes_phi'] = results
+            self.expected["test_ppo_fp_sp_yes_phi"] = results
 
         # Reproducibility test
         if self.strict:
-            self.assertDictEqual(results, self.expected['test_ppo_fp_sp_yes_phi'])
-
+            self.assertDictEqual(results, self.expected["test_ppo_fp_sp_yes_phi"])
 
     def test_ppo_bc(self):
         # Train bc model
         model_dir = self.temp_model_dir
         params_to_override = {
-            "layouts" : ['inverse_marshmallow_experiment'],
-            "data_path" : None,
-            "epochs" : 10
+            "layouts": ["inverse_marshmallow_experiment"],
+            "data_path": None,
+            "epochs": 10,
         }
         bc_params = get_bc_params(**params_to_override)
         train_bc_model(model_dir, bc_params)
 
         # Train rllib model
         config_updates = {
-            "results_dir" : self.temp_results_dir,
-            "bc_schedule" : [(0.0, 0.0), (8e3, 1.0)],
-            "num_training_iters" : 20,
-            "bc_model_dir" : model_dir,
-            "evaluation_interval" : 5,
-            "verbose" : False
+            "results_dir": self.temp_results_dir,
+            "bc_schedule": [(0.0, 0.0), (8e3, 1.0)],
+            "num_training_iters": 20,
+            "bc_model_dir": model_dir,
+            "evaluation_interval": 5,
+            "verbose": False,
         }
-        results = ex.run(config_updates=config_updates, options={'--loglevel': 'ERROR'}).result
-
+        results = ex.run(
+            config_updates=config_updates, options={"--loglevel": "ERROR"}
+        ).result
         # Sanity check
-        self.assertGreaterEqual(results['average_total_reward'], self.min_performance)
+        self.assertGreaterEqual(results["average_total_reward"], self.min_performance)
 
         if self.compute_pickle:
-            self.expected['test_ppo_bc'] = results
+            self.expected["test_ppo_bc"] = results
 
         # Reproducibility test
         if self.strict:
-            self.assertDictEqual(results, self.expected['test_ppo_bc'])
+            self.assertDictEqual(results, self.expected["test_ppo_bc"])
 
     def test_resume_functionality(self):
-        load_path = os.path.join(os.path.abspath('.'), 'trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500')
+        load_path = os.path.join(
+            os.path.abspath("."),
+            "trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500",
+        )
         # Load and train an agent for another iteration
         results = ex_fp.run(
             config_updates={
@@ -291,51 +321,31 @@ def test_resume_functionality(self):
                 "verbose": False,
                 "evaluation_display": False,
             },
-            options={'--loglevel': 'ERROR'}
+            options={"--loglevel": "ERROR"},
         ).result
-
-        #Test that the rewards from 1 additional iteration are not too different from the original model
-        #performance
+        # Test that the rewards from 1 additional iteration are not too different from the original model
+        # performance
 
         threshold = 0.1
 
-        rewards = get_last_episode_rewards('trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/result.json')
-
-        #Test total reward
-        self.assertAlmostEqual(rewards['episode_reward_mean'], results['average_total_reward'],
-                                   delta=threshold * rewards['episode_reward_mean'])
-        #Test sparse reward
-        self.assertAlmostEqual(rewards['sparse_reward_mean'], results['average_sparse_reward'],
-                                   delta=threshold * rewards['sparse_reward_mean'])
-
-def _clear_pickle():
-    # Write an empty dictionary to our static "expected" results location
-    with open(PPO_EXPECTED_DATA_PATH, 'wb') as f:
-        pickle.dump({}, f)
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--compute-pickle', '-cp', action="store_true")
-    parser.add_argument('--strict', '-s', action="store_true")
-    parser.add_argument('--min_performance', '-mp', default=5)
-
-    args = vars(parser.parse_args())
-
-    assert not (args['compute_pickle'] and args['strict']), "Cannot compute pickle and run strict reproducibility tests at same time"
-    if args['compute_pickle']:
-        _clear_pickle()
+        rewards = get_last_episode_rewards(
+            "trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/result.json"
+        )
 
-    suite = unittest.TestSuite()
-    suite.addTest(TestPPORllib('test_save_load', **args))
-    suite.addTest(TestPPORllib('test_ppo_sp_no_phi', **args))
-    suite.addTest(TestPPORllib('test_ppo_sp_yes_phi', **args))
-    suite.addTest(TestPPORllib('test_ppo_fp_sp_no_phi', **args))
-    suite.addTest(TestPPORllib('test_ppo_fp_sp_yes_phi', **args))
-    suite.addTest(TestPPORllib('test_ppo_bc', **args))
-    suite.addTest(TestPPORllib('test_resume_functionality', **args))
+        # Test total reward
+        self.assertAlmostEqual(
+            rewards["episode_reward_mean"],
+            results["average_total_reward"],
+            delta=threshold * rewards["episode_reward_mean"],
+        )
+        # Test sparse reward
+        self.assertAlmostEqual(
+            rewards["sparse_reward_mean"],
+            results["average_sparse_reward"],
+            delta=threshold * rewards["sparse_reward_mean"],
+        )
 
-    success = unittest.TextTestRunner(verbosity=2).run(suite).wasSuccessful()
-    sys.exit(not success)
-        
 
+if __name__ == "__main__":
+    unittest.main()
 
diff --git a/human_aware_rl/rllib/tests.py b/human_aware_rl/rllib/tests.py
index 0e8cae92..16cb138e 100644
--- a/human_aware_rl/rllib/tests.py
+++ b/human_aware_rl/rllib/tests.py
@@ -4,9 +4,14 @@
 import unittest, copy
 import numpy as np
 
-class RllibEnvTest(unittest.TestCase):
 
+class RllibEnvTest(unittest.TestCase):
     def setUp(self):
+        print(
+            "\nIn Class {}, in Method {}".format(
+                self.__class__.__name__, self._testMethodName
+            )
+        )
         self.params = copy.deepcopy(OvercookedMultiAgent.DEFAULT_CONFIG)
         self.timesteps = [0, 10, 100, 500, 1000, 1500, 2000, 2500]
 
@@ -18,7 +23,7 @@ def _assert_lists_almost_equal(self, first, second, places=7):
             self.assertAlmostEqual(a, b, places=places)
 
     def _test_bc_schedule(self, bc_schedule, expected_bc_factors):
-        self.params['multi_agent_params']['bc_schedule'] = bc_schedule
+        self.params["multi_agent_params"]["bc_schedule"] = bc_schedule
         env = OvercookedMultiAgent.from_config(self.params)
         actual_bc_factors = []
 
@@ -33,30 +38,37 @@ def _test_bc_creation_proportion(self, env, factor, trials=10000):
         tot_bc = 0
         for _ in range(trials):
             env.reset(regen_mdp=False)
-            num_bc = sum(map(lambda agent : int(agent.startswith('bc')), env.curr_agents))
+            num_bc = sum(
+                map(lambda agent: int(agent.startswith("bc")), env.curr_agents)
+            )
             self.assertLessEqual(num_bc, 1)
             tot_bc += num_bc
         actual_factor = tot_bc / trials
         self.assertAlmostEqual(actual_factor, factor, places=1)
 
-
     def test_env_creation(self):
         # Valid creation
         env = OvercookedMultiAgent.from_config(self.params)
-        for param, expected in self.params['multi_agent_params'].items():
+        for param, expected in self.params["multi_agent_params"].items():
             self.assertEqual(expected, getattr(env, param))
 
         # Invalid bc_schedules
-        invalid_schedules = [[(-1, 0.0), (1.0, 1e5)], [(0.0, 0.0), (10, 1),  (5, 0.5)], [(0, 0), (5, 1), (10, 1.5)]]
+        invalid_schedules = [
+            [(-1, 0.0), (1.0, 1e5)],
+            [(0.0, 0.0), (10, 1), (5, 0.5)],
+            [(0, 0), (5, 1), (10, 1.5)],
+        ]
         for sched in invalid_schedules:
-            self.params['multi_agent_params']['bc_schedule'] = sched
-            self.assertRaises(AssertionError, OvercookedMultiAgent.from_config, self.params)
+            self.params["multi_agent_params"]["bc_schedule"] = sched
+            self.assertRaises(
+                AssertionError, OvercookedMultiAgent.from_config, self.params
+            )
 
     def test_reward_shaping_annealing(self):
-        self.params['multi_agent_params']['reward_shaping_factor'] = 1
-        self.params['multi_agent_params']['reward_shaping_horizon'] = 1e3
+        self.params["multi_agent_params"]["reward_shaping_factor"] = 1
+        self.params["multi_agent_params"]["reward_shaping_horizon"] = 1e3
 
-        expected_rew_factors = [1, 990/1e3, 900/1e3, 500/1e3, 0.0, 0.0, 0.0, 0.0]
+        expected_rew_factors = [1, 990 / 1e3, 900 / 1e3, 500 / 1e3, 0.0, 0.0, 0.0, 0.0]
         actual_rew_factors = []
 
         env = OvercookedMultiAgent.from_config(self.params)
@@ -69,11 +81,13 @@ def test_reward_shaping_annealing(self):
 
     def test_bc_annealing(self):
         # Test no annealing
-        self._test_bc_schedule(OvercookedMultiAgent.self_play_bc_schedule, [0.0]*len(self.timesteps))
+        self._test_bc_schedule(
+            OvercookedMultiAgent.self_play_bc_schedule, [0.0] * len(self.timesteps)
+        )
 
         # Test annealing
         anneal_bc_schedule = [(0, 0.0), (1e3, 1.0), (2e3, 0.0)]
-        expected_bc_factors = [0.0, 10/1e3, 100/1e3, 500/1e3, 1.0, 500/1e3, 0.0, 0.0]
+        expected_bc_factors = [0.0, 10 / 1e3, 100 / 1e3, 500 / 1e3, 1.0, 500 / 1e3, 0.0, 0.0]
         self._test_bc_schedule(anneal_bc_schedule, expected_bc_factors)
 
     def test_agent_creation(self):
@@ -91,22 +105,34 @@ def test_agent_creation(self):
 
 
 class RllibUtilsTest(unittest.TestCase):
-
     def setUp(self):
+        print(
+            "\nIn Class {}, in Method {}".format(
+                self.__class__.__name__, self._testMethodName
+            )
+        )
         pass
 
     def tearDown(self):
         pass
 
     def test_softmax(self):
-        logits = np.array([[0.1, 0.1, 0.1],
-                           [-0.1, 0.0, 0.1],
-                           [0.5, -1.2, 3.2],
-                           [-1.6, -2.0, -1.5]])
-        expected = np.array([[0.33333333, 0.33333333, 0.33333333],
-                             [0.30060961, 0.33222499, 0.3671654 ],
-                             [0.06225714, 0.01137335, 0.92636951],
-                             [0.36029662, 0.24151404, 0.39818934]])
+        logits = np.array(
+            [
+                [0.1, 0.1, 0.1], 
+                [-0.1, 0.0, 0.1], 
+                [0.5, -1.2, 3.2], 
+                [-1.6, -2.0, -1.5],
+            ]
+        )
+        expected = np.array(
+            [
+                [0.33333333, 0.33333333, 0.33333333],
+                [0.30060961, 0.33222499, 0.3671654],
+                [0.06225714, 0.01137335, 0.92636951],
+                [0.36029662, 0.24151404, 0.39818934],
+            ]
+        )
 
         actual = softmax(logits)
 
@@ -124,16 +150,19 @@ def test_iterable_equal(self):
         self.assertFalse(iterable_equal(a, b))
 
     def test_get_required_arguments(self):
-        
         def foo1(a):
             pass
+
         def foo2(a, b):
             pass
+
         def foo3(a, b, c):
             pass
-        def foo4(a, b, c='bar'):
+
+        def foo4(a, b, c="bar"):
             pass
-        def foo5(a, b='bar', d='baz', **kwargs):
+
+        def foo5(a, b="bar", d="baz", **kwargs):
             pass
 
         fns = [foo1, foo2, foo3, foo4, foo5]
@@ -143,6 +172,5 @@ def foo5(a, b='bar', d='baz', **kwargs):
             self.assertEqual(expected, len(get_required_arguments(fn)))
 
 
-
-if __name__ == '__main__':
-    unittest.main()
\ No newline at end of file
+if __name__ == "__main__":
+    unittest.main()
diff --git a/install.sh b/install.sh
deleted file mode 100755
index d8fed619..00000000
--- a/install.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/bin/sh
-
-# Install git-lfs for OSX
-if [[ "$OSTYPE" =~ ^darwin ]]; then
-  if hash git lfs 2>/dev/null; then
-        git lfs install
-  else
-    if command -v brew; then
-        brew install git-lfs
-        git lfs install
-    else
-        echo "Please install brew and run the install script again"
-    fi
-  fi
-fi
-
-cd overcooked_ai
-pip install -e .
-cd ..
-
-pip install -e .
-
-conda install protobuf -y
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 00000000..c8d63a0a
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,17 @@
+#this file provides an alternative to set up via install.sh
+#mainly used to speed up github workflow by taking advantage of pip cache action 
+GitPython
+memory_profiler
+sacred
+pymongo
+dill
+matplotlib
+requests
+numpy==1.19.5
+seaborn==0.9.0
+pygame==1.9.5
+ray==0.8.5
+protobuf
+tensorflow==2.0.2
+-e ./overcooked_ai
+-e . 
\ No newline at end of file
diff --git a/run_tests.sh b/run_tests.sh
index 9f264a50..16db8afa 100755
--- a/run_tests.sh
+++ b/run_tests.sh
@@ -4,22 +4,14 @@ cd ./human_aware_rl
 
 # Create a dummy data_dir.py if the file does not already exist
 [ ! -f data_dir.py ] && echo "import os; DATA_DIR = os.path.abspath('.')" >> data_dir.py
-#
-# Human data tests
-cd ./human
-python tests.py
-cd ..
-
-# BC tests
-cd ./imitation
-python behavior_cloning_tf2_test.py
-cd ..
 
+# Human data tests
+python -m unittest human.tests
+# BC tests, skipping the LSTM tests by default
+python -m unittest imitation.behavior_cloning_tf2_test.TestBCTraining
 # rllib tests
-cd ./rllib
-python tests.py
-cd ..
-
+python -m unittest rllib.tests
 # PPO tests
-cd ./ppo
-python ppo_rllib_test.py
+python -m unittest ppo.ppo_rllib_test
+
+
diff --git a/setup.py b/setup.py
index 0a7e848c..7b59f4f4 100644
--- a/setup.py
+++ b/setup.py
@@ -2,23 +2,26 @@
 
 from setuptools import setup, find_packages
 
-setup(name='human_aware_rl',
-      version='0.0.1',
-      description='This package has shared components.',
-      author='Micah Carroll',
-      author_email='micah.d.carroll@berkeley.edu',
-      packages=find_packages(),
-      install_requires=[
-        'GitPython',
-        'memory_profiler',
-        'sacred',
-        'pymongo',
-        'dill',
-        'matplotlib',
-        'requests',
-        'numpy==1.19.5',
-        'seaborn==0.9.0',
-        'pygame==1.9.5',
-        'ray[rllib]==0.8.5'
-      ],
-    )
+setup(
+    name="human_aware_rl",
+    version="0.0.1",
+    description="This package has shared components.",
+    author="Micah Carroll",
+    author_email="micah.d.carroll@berkeley.edu",
+    packages=find_packages(),
+    install_requires=[
+        "GitPython",
+        "memory_profiler",
+        "sacred",
+        "pymongo",
+        "dill",
+        "matplotlib",
+        "requests",
+        "numpy==1.19.5",
+        "seaborn==0.9.0",
+        "pygame==1.9.5",
+        "ray[rllib]==0.8.5",
+        "protobuf",
+        "tensorflow==2.0.2",
+    ],
+)

From 938bceed2ce26bdf93bba3983657c07010bcd1f1 Mon Sep 17 00:00:00 2001
From: jyan1999 <49133332+jyan1999@users.noreply.github.com>
Date: Wed, 12 Oct 2022 13:07:11 -0700
Subject: [PATCH 35/38] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 0aa25938..c1b89b2c 100644
--- a/README.md
+++ b/README.md
@@ -82,7 +82,7 @@ Note: if you ever get an import error, please first check if you activated the c
 
 If set-up was successful, all unit tests and local reproducibility tests should pass. They can be run as follows
 
-**NOTE**: Existing tests **_DOES NOT_** guarantee reproducibility. It is an known issue with version of ray\[rllib\] in use, and we are working on updating to the newest version, which should solve this problem. As a temporary fix, setting sgd-minibatch-size = training-batch-size increases stability
+**NOTE**: Existing tests **_DOES NOT_** guarantee reproducibility. It is a known issue with version of ray\[rllib\] in use. We are working on updating to the newest version, which should solve this problem. As a temporary fix, setting sgd-minibatch-size = training-batch-size can increase output stability.
 
 Due to the randomess there is a slight chance that some tests can fail intermittently by not getting the expected total reward. This is an unlikely scenario and can usually be fixed by rerunning the test. 
 
@@ -237,7 +237,7 @@ pip install -r requirements.txt
 
 to initiate those variables
 
-The reason this is needed is because code files refer to the subdirectories as modules, and we decided to use pip to automatically add the submodules paths.  This command invokes the `setup.py` file, which looks for packages in the _human_aware_rl_ directory through the `find_packages()` call, and register modules found so they can be referrenced. 
+The reason this is needed is because code files refer to the subdirectories as modules, and we decided to use pip to automatically add the submodules paths. The requirements.txt file allows pip to register the current directory as a packege. Pip will look for and invoke the `setup.py` file, which looks for packages in the _human_aware_rl_ directory through the `find_packages()` call, and register modules found so they can be referrenced. 
 
 # Reproducing Results
 

From 32b1b0ed6ebd71433600e292810df05ab3350771 Mon Sep 17 00:00:00 2001
From: jyan1999 <jyan19991112@gmail.com>
Date: Wed, 19 Oct 2022 23:08:42 -0700
Subject: [PATCH 36/38] update ray Updated Ray to 2.0.0 and Tensorflow to 2.10
 Updated model configurations to comply with the new API

---
 human_aware_rl/ppo/ppo_rllib.py               |  22 +-
 human_aware_rl/ppo/ppo_rllib_client.py        |   8 +-
 .../ppo/ppo_rllib_from_params_client.py       |   8 +-
 human_aware_rl/ppo/ppo_rllib_test.py          |  29 +-
 .../checkpoint-500                            | Bin 185376 -> 0 bytes
 .../checkpoint-500.tune_metadata              | Bin 214 -> 0 bytes
 .../progress.csv                              |   3 -
 .../result.json                               | 500 ------------------
 .../checkpoint_000500/.is_checkpoint          |   0
 .../checkpoint_000500/.tune_metadata          | Bin 0 -> 34495 bytes
 .../checkpoint_000500/checkpoint-500          | Bin 0 -> 568778 bytes
 .../config.pkl                                | Bin 2544 -> 2578 bytes
 .../ppo/trained_example/result.json           | 500 ++++++++++++++++++
 human_aware_rl/rllib/rllib.py                 |  95 ++--
 requirements.txt                              |   6 +-
 setup.py                                      |   6 +-
 16 files changed, 591 insertions(+), 586 deletions(-)
 delete mode 100644 human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500
 delete mode 100644 human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500.tune_metadata
 delete mode 100644 human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/progress.csv
 delete mode 100644 human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/result.json
 create mode 100644 human_aware_rl/ppo/trained_example/checkpoint_000500/.is_checkpoint
 create mode 100644 human_aware_rl/ppo/trained_example/checkpoint_000500/.tune_metadata
 create mode 100644 human_aware_rl/ppo/trained_example/checkpoint_000500/checkpoint-500
 rename human_aware_rl/ppo/trained_example/{PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000 => }/config.pkl (54%)
 create mode 100644 human_aware_rl/ppo/trained_example/result.json

diff --git a/human_aware_rl/ppo/ppo_rllib.py b/human_aware_rl/ppo/ppo_rllib.py
index f636113e..c2ec7160 100644
--- a/human_aware_rl/ppo/ppo_rllib.py
+++ b/human_aware_rl/ppo/ppo_rllib.py
@@ -1,21 +1,21 @@
 from ray.rllib.models.tf.tf_modelv2 import TFModelV2
-from ray.rllib.models.tf.recurrent_tf_modelv2 import RecurrentTFModelV2
+from ray.rllib.models.tf.recurrent_net import RecurrentNetwork
 import numpy as np
 import tensorflow as tf
 
 
 
 
+
+
 class RllibPPOModel(TFModelV2):
     """
     Model that will map environment states to action probabilities. Will be shared across agents
     """
     def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kwargs):
-
         super(RllibPPOModel, self).__init__(obs_space, action_space, num_outputs, model_config, name)
-
         # params we got to pass in from the call to "run"
-        custom_params = model_config["custom_options"]
+        custom_params = model_config['custom_model_config']
 
 
         ## Parse custom network params
@@ -33,13 +33,14 @@ def __init__(self, obs_space, action_space, num_outputs, model_config, name, **k
 
         # Apply initial conv layer with a larger kenel (why?)
         if num_convs > 0:
-            out = tf.keras.layers.Conv2D(
+            y = tf.keras.layers.Conv2D(
                 filters=num_filters,
                 kernel_size=[5, 5],
                 padding="same",
                 activation=tf.nn.leaky_relu,
                 name="conv_initial"
-            )(out)
+            )
+            out = y(out)
 
         # Apply remaining conv layers, if any
         for i in range(0, num_convs-1):
@@ -68,7 +69,7 @@ def __init__(self, obs_space, action_space, num_outputs, model_config, name, **k
         value_out = tf.keras.layers.Dense(1)(out)
 
         self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out])
-        self.register_variables(self.base_model.variables)
+        #self.register_variables(self.base_model.variables)
 
 
     def forward(self, input_dict, state=None, seq_lens=None):
@@ -79,7 +80,7 @@ def value_function(self):
         return tf.reshape(self._value_out, [-1])
 
 
-class RllibLSTMPPOModel(RecurrentTFModelV2):
+class RllibLSTMPPOModel(RecurrentNetwork):
     """
     Model that will map encoded environment observations to action logits
 
@@ -96,7 +97,7 @@ def __init__(self, obs_space, action_space, num_outputs, model_config, name, **k
         super(RllibLSTMPPOModel, self).__init__(obs_space, action_space, num_outputs, model_config, name)
 
         # params we passed in from rllib client
-        custom_params = model_config["custom_options"]
+        custom_params = model_config['custom_model_config']
 
         ## Parse custom network params
         num_hidden_layers = custom_params["NUM_HIDDEN_LAYERS"]
@@ -130,6 +131,7 @@ def __init__(self, obs_space, action_space, num_outputs, model_config, name, **k
                 name="conv_initial"
             ))(out)
 
+
         # Apply remaining conv layers, if any
         for i in range(0, num_convs-1):
             padding = "same" if i < num_convs - 2 else "valid"
@@ -173,7 +175,7 @@ def __init__(self, obs_space, action_space, num_outputs, model_config, name, **k
             inputs=[flattened_obs_inputs, seq_in, lstm_h_in, lstm_c_in],
             outputs=[layer_out, value_out, h_out, c_out]
         )
-        self.register_variables(self.base_model.variables)
+        #self.register_variables(self.base_model.variables)
 
 
     def forward_rnn(self, inputs, state, seq_lens):
diff --git a/human_aware_rl/ppo/ppo_rllib_client.py b/human_aware_rl/ppo/ppo_rllib_client.py
index 593e9540..e7b96f17 100644
--- a/human_aware_rl/ppo/ppo_rllib_client.py
+++ b/human_aware_rl/ppo/ppo_rllib_client.py
@@ -2,6 +2,8 @@
 import argparse, os, sys
 from overcooked_ai_py.agents.benchmarking import AgentEvaluator
 import numpy as np
+import warnings
+warnings.simplefilter("ignore")
 
 # environment variable that tells us whether this code is running on the server or not
 LOCAL_TESTING = os.getenv('RUN_ENV', 'production') == 'local'
@@ -258,7 +260,7 @@ def my_config():
         "seed" : seed,
         "evaluation_interval" : evaluation_interval,
         "entropy_coeff_schedule" : [(0, entropy_coeff_start), (entropy_coeff_horizon, entropy_coeff_end)],
-        "eager" : eager,
+        "eager_tracing" : eager,
         "log_level" : "WARN" if verbose else "ERROR"
     }
 
@@ -328,7 +330,6 @@ def my_config():
 def run(params):
     # Retrieve the tune.Trainable object that is used for the experiment
     trainer = gen_trainer_from_params(params)
-
     # Object to store training results in
     result = {}
 
@@ -340,11 +341,14 @@ def run(params):
 
         if i % params['save_every'] == 0:
             save_path = save_trainer(trainer, params)
+
             if params['verbose']:
                 print("saved trainer at", save_path)
 
     # Save the state of the experiment at end
     save_path = save_trainer(trainer, params)
+
+
     if params['verbose']:
         print("saved trainer at", save_path)
 
diff --git a/human_aware_rl/ppo/ppo_rllib_from_params_client.py b/human_aware_rl/ppo/ppo_rllib_from_params_client.py
index c150f35d..1233fa18 100644
--- a/human_aware_rl/ppo/ppo_rllib_from_params_client.py
+++ b/human_aware_rl/ppo/ppo_rllib_from_params_client.py
@@ -291,7 +291,7 @@ def my_config():
         "seed" : seed,
         "evaluation_interval" : evaluation_interval,
         "entropy_coeff_schedule" : [(0, entropy_coeff_start), (entropy_coeff_horizon, entropy_coeff_end)],
-        "eager" : eager
+        "eager_tracing" : eager
     }
 
     # To be passed into AgentEvaluator constructor and _evaluate function
@@ -386,7 +386,7 @@ def run(params):
     saved_path = params["resume_checkpoint_path"]
 
     if saved_path:
-        trainer = load_trainer(save_path=saved_path, true_num_workers=True)
+        trainer = load_trainer(save_path=saved_path, true_num_workers=False)
     else:
         # Retrieve the tune.Trainable object that is used for the experiment
         trainer = gen_trainer_from_params(params)
@@ -418,8 +418,8 @@ def main(params):
     # All ray environment set-up
     init_params = {
             "ignore_reinit_error" : True,
-            "include_webui" : False,
-            "temp_dir" : params['ray_params']['temp_dir'],
+            "include_dashboard" : False,
+            "_temp_dir" : params['ray_params']['temp_dir'],
             "log_to_driver" : params['verbose'],
             "logging_level" : logging.INFO if params['verbose'] else logging.CRITICAL
     }
diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py
index 30c6923b..a0690a87 100644
--- a/human_aware_rl/ppo/ppo_rllib_test.py
+++ b/human_aware_rl/ppo/ppo_rllib_test.py
@@ -41,7 +41,6 @@ class TestPPORllib(unittest.TestCase):
 
     def __init__(self, test_name):
         super(TestPPORllib, self).__init__(test_name)
-        # changing the cwd to where the test file is
         # default parameters, feel free to change
         self.compute_pickle = False
         # Reproducibility test
@@ -61,10 +60,8 @@ def setUp(self):
             )
         )
         # unittest generates a lot of warning msgs due to third-party dependencies (e.g. ray[rllib] using outdated np methods)
-        # not a problem when directly ran, but when using -m unittest this helps filter out the warnings
-        warnings.simplefilter("ignore", ResourceWarning)
-        warnings.simplefilter("ignore", DeprecationWarning)
-
+        # not a problem when directly ran, but when using -m unittest this helps filter out the warnings  
+        warnings.filterwarnings("ignore")
         # Setting CWD 
         os.chdir(os.path.dirname(os.path.abspath(__file__)))
         # Temporary disk space to store logging results from tests
@@ -121,10 +118,11 @@ def test_save_load(self):
         ray.shutdown()
 
         # Where the agent is stored (this is kind of hardcoded, would like for it to be more easily obtainable)
+        # 2 checkpoints(checkpoint_000001 and checkpoint_000002) are saved
+        # since we are only interested in reproducing the same actions, either one should be fine
         load_path = os.path.join(
             glob.glob(os.path.join(self.temp_results_dir, "save_load_test*"))[0],
-            "checkpoint_2",
-            "checkpoint-2",
+            "checkpoint_000002",
         )
 
         # Load a dummy state
@@ -187,7 +185,7 @@ def test_ppo_sp_yes_phi(self):
                 # Please feel free to modify the parameters below
                 "results_dir": self.temp_results_dir,
                 "num_workers": 2,
-                "train_batch_size": 800,
+                "train_batch_size": 1600,
                 "sgd_minibatch_size": 800,
                 "num_training_iters": 30,
                 "evaluation_interval": 10,
@@ -199,7 +197,6 @@ def test_ppo_sp_yes_phi(self):
             },
             options={"--loglevel": "ERROR"},
         ).result
-
         # Sanity check (make sure it begins to learn to receive dense reward)
         self.assertGreaterEqual(results["average_total_reward"], self.min_performance)
 
@@ -215,8 +212,8 @@ def test_ppo_fp_sp_no_phi(self):
         results = ex_fp.run(
             config_updates={
                 "results_dir": self.temp_results_dir,
-                "num_workers": 1,
-                "train_batch_size": 1600,
+                "num_workers": 2,
+                "train_batch_size": 2400,
                 "sgd_minibatch_size": 800,
                 "num_training_iters": 30,
                 "evaluation_interval": 10,
@@ -246,7 +243,7 @@ def test_ppo_fp_sp_yes_phi(self):
         results = ex_fp.run(
             config_updates={
                 "results_dir": self.temp_results_dir,
-                "num_workers": 1,
+                "num_workers": 2,
                 "train_batch_size": 1600,
                 "sgd_minibatch_size": 800,
                 "num_training_iters": 30,
@@ -262,6 +259,8 @@ def test_ppo_fp_sp_yes_phi(self):
             },
             options={"--loglevel": "ERROR"},
         ).result
+        print(results["average_total_reward"])
+
 
         # Sanity check (make sure it begins to learn to receive dense reward)
         self.assertGreaterEqual(results["average_total_reward"], self.min_performance)
@@ -309,7 +308,7 @@ def test_ppo_bc(self):
     def test_resume_functionality(self):
         load_path = os.path.join(
             os.path.abspath("."),
-            "trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500",
+            "trained_example/checkpoint_000500",
         )
         # Load and train an agent for another iteration
         results = ex_fp.run(
@@ -323,13 +322,14 @@ def test_resume_functionality(self):
             },
             options={"--loglevel": "ERROR"},
         ).result
+
         # Test that the rewards from 1 additional iteration are not too different from the original model
         # performance
 
         threshold = 0.1
 
         rewards = get_last_episode_rewards(
-            "trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/result.json"
+            "trained_example/result.json"
         )
 
         # Test total reward
@@ -347,5 +347,6 @@ def test_resume_functionality(self):
 
 
 if __name__ == "__main__":
+    warnings.filterwarnings("ignore")
     unittest.main()
 
diff --git a/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500 b/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500
deleted file mode 100644
index 03752ea8362dd953385d305f6e2a046b697fac91..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 185376
zcmXVXd0bB47j=V1Nt%mDDN&Lks^{z@8qE<Zg(#JzIYg97GomO-lT^x(sp&cU2nk<P
zrXop%id0B4zrFA8eeUP;{BiES=gxbtz1F%>;=6)G44frJL`1d)1aICL93(pL{$nwb
zsQ-;i4UKzk@(bM<91<kDD@e>)d}tsrFd#_WBY1tdaj>7?rVYknp_}|djQ<xVyClH&
z|HaD&4V$zlNFu~pVrYwy(Dk7kgCutaN%;)TD{bJc_P=SSdIb1yUAM`9Q|PAkep83`
z?!VD5NXElIY)fFcu}47gM&m7Eenabmhju2rE_mZmct~h)m`A8=knI1WWc@w=w>D__
zLXo{ea#o@nf=0v#$uAEYX|Pb#V4=j&qPb8bG)Uq9$9sl`2X2%N8s#kZe@|^@kYZv&
ze2~)eAZ3Gv;?AN&V<bpr$&w`t{tf;9-`_%!&>+<fiSs)Dh=}wIxZ-Wq8T8U3!ghw=
z!Kw{yg4nJkloij%XM+7OtKvPZvKbCvH^<;G(PUQGCdNOfxuD6GQFQqHddM?1WAWN~
zL}zwZjrgSPAl<Z%8;VZEsAVI${!c}EH?Gy<;5>c4;F6Yw>#rEpKU55Q4{{*nUO$fc
zpvtd)k_J~(Y5ui!0(`g}0p=UV;c6RO9w{P@U%SskZFvPucb-Tq9TG^GSvp%Ly%|RB
zC=rCeHigLTPPFstXxL^IgbIG;WPQvQIQQO)PARfwo36fqnR*8i#^y5x`&jyXSqfLO
z&gS=)tl$>W1-S9#B(9`%m24YpKp%FW1()RpeC{+E-kq;Wm)WnRA1<E2-i$g_W5aoC
zj|9_tnnZ5Js`HeAv-FCHF`e}4Gu}Sz&PbOdc1h`?<u41a`CV*|>V!KqsYM0qd+j+}
z*IX00<ufe*w1}B+XoYOMSbF(f4rNPcvVX?1#819}uhdTCI?ihRR;V<69Pk+YR95ms
zQ~cQbwivvjmBvRkaHe?THU7D@8^ppG+6B(!Ym}zIrey~N<_QOAV}?77F0|s7GIG4u
z`UF-wO3_zaW@D-L6s+B8!F{g}g9NE&y!^wIw|7OMsAC~7pL&&~d$q7K9>e?Z9pTOa
z(O{aIhzb(AJl69dmPHm(J?r^E-J69$+!r-U7qMY_yI{dUCdfR_29XEeMB?c}n&@!?
z2e+AWI~Q~Qh<=2JpMK%rC$p(&c{oIm9KoBGx#EtsU-9R^9(Z{FD6~1tKzZjs+<#n`
z-~E<`W9OUTzda4`$MYQUv3|UyIFg;({SRy+6G1)ZFj(D6#p7{TVf2<#czwr}7v~Fw
zs#db}<wy;5l}HwzPpIP8t<3qD(*>Md@Z<sEVRZP%L)`I)8XeIuKj+KKcl=C~J`D{u
z<TJ|8fXc>7qWP-{KFfCE=2jQla#EGrK31epE{~@&bJoJW&8K0G*I&pkT}KU9ou-=`
zQqg{X6Vd5x0d2+I)F?)jJGvy2xA`{w^nx!4o3Ai~$z?3OR~^57?juuAoF(Duv&f?K
z>(sn#I;}Qzfy#^$Xc|=yEnX*35Y>)HuPme^$7t{;hKIl;W(l8QPbscV#~O)UFc{N}
z=N{Fv2)iGcFw9jLpIb*p3!dTC9WnU2%9;Lq6eVzIQAH};gJ~BNP|oow6VqKlo3BUV
z%Sr-QcRBGNuaNXfCef$qS~N#ljqaMYlVn5&V04QUtnNxdTlW~~-kXowo>|aq<wvHb
z*}+aBg&7abL2sBm)b)<U=-41A%|D9&SOF#sZxXCs`-!a39)VDngw0(NxIHw~Li?Ny
z1dP`vyZ%<N-q=KpO#ekPN@kP$_f1Ie!$YXNM;o&w_QQaM76t}w#C7IoV0z^=`?>ca
zizpU{&vFR@$$Mo;CtSre<xRM!ARDD+%s}(TGoq?@g>A7v2})89&_6N(6B<Xs;xoQj
zxLh9{9vl%|Xj?%%&F3<?C%#~=n1Br`2jJAS(}MYTY{2>cHk9}=8gh>M5!YHvEH4`c
z{k<trA|eACJ_IeEWi!J~s;pva3C7R1z!N*>Gi?nkn5<=wS(_Dcm|Zoq7E#3>+X2=y
z-V3|K=7GYPyTXIl7vomlsc<o755A53MAS!3CFuitAX3B3$*k3E)j~%Y=05?<6wVM!
zFK@IQbSG`OUSOHDQ$V^4iSrR(Xz2DJt>?Wl$NK{FTTn?(FF8(B{YQgQB9aMa&&kiz
z^Wn*01Ce%}hwi1?WcTG55Ge_&mayN8M#IvHUn&4oo&^rGOUZ=GtDttnQ9)mvH*3w<
zj)A#J!WE0ZkUkAA*gv_4xyj97%Gutqf3zK@XFX(#65>hunmEC*MP)?cy$z|Ayd*Sn
z+X1E0fw<c$idpI&7dq4x3F7SaP<mhj)RvsE7`9ChvM#r?{yEX0YZs0pcdphb%n1Rb
z<;gIRC5^-CFSEXX>2U4*U+lVT#@FXn!beMYp~UJq_*V29e9Pan;i?<?7MF6IRhJJZ
zJbd89TV<?zb{1{cP2yz>B=}{^i}10o4-@Z5^O!p)h(KJ0@3Yn-5n~Ud)mBrgKBt;(
z2#bNL<;y|(=?)Z$`da0-F962ekAY4tWAv;nCV{`A!9sf)nxBxtpT{Rd?P0XYlsAWk
z@!?o?^1aZZ$`tZms*<t2=UJ=ya_H<(Wy4pwVpK<+#WR03q8=FtI@|AqQe!%Fepw8^
zCIPfe{U$iEM;eo=9$>My7oPB41k(DqVA{qya;8{^pNsH<w`1?%wWATx{!s?bdPza;
zbY<F_up6~Ty@TwZ!|AV$*(`jA6J5FC4GBA_%HxiIWZloEf{5RRs?%y4;O?(L+};s}
z?O$Us@5?`7pJNn+dMbjCn>d@ieH2QkwKKDQlB}d&6&g-#Lbns5IB&u$mOn}vTmD5r
zn`;UNS9rl)o9S?&J_w_N1}#>;--L>N!*Ihn3N>P{*`_Dy<m<Q!QaDrx+f*h&gywqI
z@S+Mg?^VHOnbMkvU5lBH>J)q{>4>&-f>}b0E=He-z>fwK;be3;zS{qdT)bxvIASes
zk@!!rs$e1eE0}`!cU{6u6I?+lS4esrbTKJxE`NG#HGTH^9Q$?uFk4`J2G)Fi4|>xC
zT)6m>uzmSPj67#YV=||5Zm!4Ao+`xsGkhUA_B5R9Jxtwt9Ps0VDo7Sdpcw&qJW~Gw
zuCPAEd(?LGf0>2oSguY*FKf^St9Tf3SBtLG7zb%(_gL_K%D>fV(x4T#T>GLw%^lH?
zYrk(J-EMhw=3p!zbx9i!ZjT555_^8Sw+k=zkH#xS7rD}`Sni(SOFeGY!_HP+^3dxC
z^qH&hJzASEFl`_A9>OewBVo)$sKw*j47tMGw{W#92y{0Vpn~ccnC)rGue=}0=NH?;
ztmw1C;;}b}a`(`2+*ZZeov)!V{vA&L=FG-xNTTw<5z_FvQV=*wf|S?C<D<J3>~Cfg
zM5U()e;3REY3=c#aKILnzeTdRDogxZ76sZb@`aXTE|KzC>!DwNfFzuqjG3x>%%N40
z&AuIld4kQt&O*l4s*Z%8FWt%4>!FxuIS6zA3Q72*RPc0G0<A7}P>Az_$k8WZdiyF&
zDlCENUuOxgNan&1&C}2jUqois&g9w^8Q?rV9V;IhQ5}yKGV!Jp95FbJ#&;G#+RK^n
zdwevG+EmQ$Tr2>!a1kn98ABqo_T$>aZg6A%NoKV5H1p|MPXBYQBF986@Qu=A)_Q+G
z1TAl6GVhB}HSQ<=NSeo-cFPg3JZIkWbU8?DdPYP|)#>^->+rFfCZ8iXNt7-e27A~9
z;R~O`wa-qtdGH#uUmXS}BQN4a$JY?%G@Q;ab$|!Iy+Gw>nMKe2LoiKpCVX&Dfc)--
z_-5h)^j{>(>pD{5>_|71dLPK9w&~F?pT5J1c2h9TJ_R!%hRc0$q!;FOV!(lgyuri{
z>caPeZK*$%32~&kc@LoWX@AYt9S?BjM@`tgy#`#KrE&Qc!}#yduW^K`KRITxlP)^z
z&5K*q>BrJCXsFJHS5~v}Ytu!%(iX)WTot+Xl~b^q6wxVL?5X4P(Ohd+1l63>#M*r%
z`R18>i1OADYPekvNB(i)lf6n<o0ckgk};64k;pWZ($PQB5l;9N(`g&N!@!8AEF)~_
z{{I=l*XWHQxyMcMxrrGKB9mEONt?iV(oPzEf`feGf3U1-4bQx+%q0h1=@I)ZGF9v?
zJSn_Q8~rEH?#ctO<;p(#t<Q<8nkP{Wuj{CM><ea>t-#&7R*<P=N#(wMgt(WsJZ6^z
z4c{@Ijv|*y#_!8GUE~q6;v48`^$u@Vj_2Q-GH~^R12pS?FG>bKVO^cdXxY?71b!>A
zE+`Ls3dh3L@4xUz$rAE%@_BwLC!J^9ynyMie#0LF8S2`z7r(bp!}=?+U_b2`o|$kI
zYW7UR@+*&pk|RubphF@A91@{wfx{ti97mHALJ;5o2XuA?qP@*Ws8)&xTQ^;J`mhUZ
zbj10Ew9BCK_?+PH&P4Fst<CE`8>7ksX*&GcbI{i5hOaaox(pA3rqOIJd3}PwBzQ4s
zzF8#5N(}~qttUDz&LWH7+=n;a#@PIOGnSiv#Yta9NV|(MA34{M@jG#_B~OeeJuSD`
z9r7I`F75}hqGHxD;yw&3RiV~>Uo6H>7vav<i`dQ_d#?Vb5nAu&gVLl1d{!Avi>lOd
z%;GX=mR^9x(rVmoMH4LXm4>f=w!Br*nNNP!#g;0M<8@v}v^{wf_Dg)m{N$^29T`D4
zPW?fi?tTE<<^F@RMrEp)-vJBitH3BM3bswp1LKpCe6eX55yp4Ij~mUPxK5r6T{196
zN0gpAw;b)F#mTXv^C+7ofgww)K%w?C7)zc2wORr(&yIoX{x(S1t8Y;rsLJPeCNu4l
z3bJopCwn@OK|Ys!#Bq&kFfrQ$+CF~72Z<|T>~sm}?--9=YjjDl%_H(8>MW%1S;cyG
zuEV1kgxf#<A?mXh!uWEIM{;|GlQ*UaN_7NSkds?|IYb?L&TPS*&GFcKTp8yN^_ds1
zdNbJrKuo5bC)u8Ra76S;Gzp!IEve51%jFx1?%m7erqmkTVIzvp`V^-bWq{~IEi_O6
zfWD3ixM1c@7I#riXrbCcJlhnZa`txSo%H~#q?O6_RXX7EeH%DD^21vRhfo_^nbNW}
zIDhsC7XGdvu^P@~@O%<E=@EyHf2^V1<38Cs{Ul6DvB$M{9YCf%2&0Fopzp0R&~Nv^
z?;XcL&D{}{UZtX3?P%;C!f3J|l_2n7sqk)A0$3hSWKr!Z0*8lAD5o0$+goH{|MCmW
zqCb{3+!+|c?><n!#TZj(*RZ>PUXbilK6tah5=T_aV%9kie6+KPnT2<=&W)qt{S;9w
zpSunMi|(*5e`Z2dl3|UB(G0?Vt-&QTmty|`JB*rEE(p|`38wi{n7ZX4vA8oA_L%KJ
zucgvh-qtJ3blF0lEtp6K`quz4I?ZO58-h>kQSwHh4`N<Mke9GR5GY+qqBK7e<$2$T
zgN-@Uc2yS`4DMz#jcZ7^??g=bZ!}m|9~LNF+9S+soJ*GE%z=4r@>p)M9JP<n66RTM
zgIL!$%)~EM&~X2wu(mA`M8eEaVWtWU`<6jO^wtUgc^#q0S4&e%?LxA(t_t^_`H9}6
z?g>W9OYp3^SfsP6u&MP2+0E`lhr2jFcRY^{-8HCIC`W2`+=QG;4!^f<GuMcC2O&2M
zKr?U@yq$Itd(GX!XWjslAL>in{!V6@drjHW+)#A>u^IYiEo1$&Z;+H*rff`S1m+Fw
z5Gb7O6;zE$BP2qd<z=20w8=dsYgF`6u6`cw{uO|MR`S3W+QBV{x1|5wa@HfU6zT?^
zlDku5VQpq2njR6srWZNzD}OA9yi9iO!cQ!Yp9WD+y)FLsy<|swO}OPKcOEpq0g6Uw
zlO?fkv_jkg>{o=rd$BO#-i%D>l`2B_B31q+c^xkMAr8NDP7!NAS7zP#2d|i_Vb`!E
zR7+CF^n`erHL0Fi)|CoGB7B+gDPtH`Wej~Icfl}MA5i}Bp7lR=Muq(=FlqH3I3AjS
z#A7?Krd*;vn1DV>uSinYHeBVh7wW6Ov#1EPD0$dO2EtwNitAkVe*Sx=K6{VwdH8j~
z`PIqnNzYdnod1MD@>Ei0RbJDGqsXJ?+j#J51_r#e!FdPD7^(j&@asN^E<-)3u7MMn
zOq_wuMb0FDs2}ksuh>Y@ckD(ugWFU0psdGa-28Jq7(05Q)QL~1fBhzI88G7eRwu#U
zU6Nqpkj9^OL~zZM`{|7%KS1~B5qf&?1yP&R4)R^Yh`MtI%(<V-LbqQ>rSM~@HrRrr
zKFJHdFU=;R8lL=Z+!rj1b)nDfGlk(^D+KA^3c=^_B=%{VF>Xwgf+rsjL0QXbKB4gx
zZ(Z^R3xwlnX_qNi&AcMq+M-H77|o|gP11#=Z8`n7M2r3!o6PTuU*RV^JopHj&Cf(W
zBdxOv@LPX1Zl8G0;#|)<?j%tTuhvfGnz_oD_Fe|h{(MNhwcdcvqkNi}k_72-=iuP?
z0_Zxc4QzN56jwWdTAU1;99E!V%Q`T5^(UNaQ3@WiOSou`A#G9<p-Wmc_+MRT{8%{>
zGg&N|C!+;9cUIupv396EVXM${K`fMPyGrb}0j4ikhWg}a>@zPTb#dO<^4}(uwQgsj
zz3bSec_NVayi)+@qtJbO3~nF&gguSigm;^kG2;tXK<@s-^qQsUwD}gKzC>`@s6++$
zPmu9wI4oZC1ugwIlV>MGVe(2Ta<!uaZ}yqOv}Y4wv(Ioyd?-h6ui1>ZR)1uYW1}JY
z?nqcQQ5#baoq<Qcf7ZNebcet)A>PR@fvLTX`2N-zK)+5wN`NBItoHzM_pvBAo=oI~
z>F|2?RgnDXzz-ih%w*(lLW6$*`1{1rT!RsGcTEHse_lZK67;ymks|yO`h#2xa;9$z
z_foYHL%2|FER9KQ!@wa9X4AfT;F;G)Dq{bGlcq-8;N9w)N;Mf?C2PvRoXNzY+{$i!
z>cq7j)mT1xE}tRO$JY0xgUye{c(W)B<F~ZI-Ba4U=imjLr*RcJ<QJ2%OJAdokuR*a
zmw{zD|KZJ*NpMN_Hs2I5jf#4y;e=8b;fq!+9G^b`H#VBmebZl{$xa8JTzZ;y^7$|}
zq!HL=5$>|(B3SLOg^M+Z(J9OWMDITaS?{fM_-HRGvs;Wh2L$uUc40#Ei~u<CV;0xB
zBV=RL^Z4~|oGF-PL+N%37Ru6mZNoEI=eH9r#$`ZKku?6xmZlnsFR^)69N4)eVD~?5
zn!IETZyv(%WVak&+ItCJxk%BZN?)3{b|h73OeK2B8A98qbLf>LW>EiHkqqydL*+zg
z@Z?D+s35b4>DwM+O_u)LDzg-eWoqa)@hp60qKjd<4{*85ei{+>1Nn#BxGc38hsE@f
zwj<YxlieWv@Xy9xD<wL2M>7<3$?<Q+R(SvHDms0z4WFHT4&PPpk>30(C^ca?^*JEV
zXP<GxhD13U5-|vN3gWaYy^1okXxb`h#$%On!X=6ld~)9=F6ncSmAue_fnrHo-E&?b
zDwLuwi_c?{$utsEql5bv3YpBSm!RSziD@@3L$7xc>v^rs3qA_q#6eM7^KC6;J{E(B
z?M8f-<|LH1IRILN#lq`9){&Lh`eEsg5me@AD3<A~(9S+ldX*V*|3F>3KQS0D&uPJG
z$!@Toc%I}m|A6p$+mWl-P|>WL#HIAF!1VfR9Obwh|Fz%5$N9zFtndrIF_1!sYDM}i
z;|Xl2uO#`lN3g1SHP4UUi>a#<AtUh`-kux_L9tKa-T82y!gujiN1Gt`l@U=C`wzpG
zh;zFWmE@~V7EzZW@M)C}Z_*gWa>VvQrRZ#6cA7lGSBpnx)j-^jUN{<Bif5CKvBkQp
z`QLGcWV(kTWGPyM;gVFmmAnj}auE#Vqj*Mp3oN~4hdu_*aBO@E`7~(?Z){THY>qj{
zD|1Qa?<6J?;lZ^g|0Ly^q9_eo<g{B1{0)u5mg>tmx3LnGrE>6b=uRGc&V)N^`eLt9
zGM}9l!>tzl2bF_S=;QVpB<||KrPX8EgW02@ZB80i-3ev;o`>P+;5LEl+B66jG+^4;
z3<&w+N){&@kRIQ7_T8oef1iGipNIm9G}s1@jwRxQwXcO)D@p`Eln!Flye!h!av0a{
z=_l3P0X`W$Bq~KY>~P{@eA_w?4G+GD*~QDr#VxsHQ}=Ax8YW_4S|knAa<0O8<N0Jp
zg+2>f^MUx3o*Jr01lCQ;COPwz@${b(QnVukeQh(br*<WK<P`!BMHjG_qT6B8E*}<=
z)4_%ph%%3m+w4?UA|8=TL2c<x<kk>Nx}g0elq@M@lh@0`zF2eosQHIUD#~K#A8p+F
zDwll?Dq<lkb+Pnr1B#y6id)2n_^G|0S<UF5?9H4u7G;+sC^dP{+O{|gpM6an;@qxS
zw98ndxt=_pe!3Ton-^luhw+eVS10J4;!Vh}ad>8rEcUCvBjv&YQelyZYv;~~3gJw!
zye=iQo)w5s;vbL{S3exMS|E&&jfP!%Ga)(TAUOeX>_vY9Ry%Enuw_SuT|OCv&rQIc
zWywVEj2C2{pM&vpdzs6(3g#$$j^3uV7G=k*Sz+-62vnI$UJOXUGOtV~a;*sRN4P-9
zDSM2nlVsyR$D(xQTBseqK-gDzS)kMA1ksv0Xy#tT#%`N|Zgv6SHva<APFgLL{0L}x
zKnfo@t_0=ANQR#_pvb8RfpSI$krO>n`c}mYMW#8DVbi+En*arDu&fX|iCt$CBvvtl
z#Wz3^&a)SbJRzV)jAtu;0ol_3V9K5p7#^R+R30mG{-};DXewvH#Fs0nEP@5ce&I7^
zDV~ZR*mGRKKV1@~2Ucj&Gaqxo-Hk)X?MtAhR0`VYDCl35LRO?HfMv&t8p8#~_<qJC
zW_Hh?+?7*@Bi+$3es?5VxDv>7n<Z4#c7nEfr9$^gd9u1|4=l+Lfxmf|m^QZs6N^3)
zb<O}p8UvuH|2c~a+(1HC%Omr=k6Vo1VC%+MezJ2t=!!M7$b?fM%d$cC4HFLNJc1R$
zyM?z`{9tEyEAa1XJBe~cGBl0X#S*jc@G7SoWN$tp>n{eAfvFF0+}AvTzGEfcet8Hz
zr(R=)t=oyozbWucZXw+NQNk>L#0qb$3qT#^Nsw1pO&*O}4yi|ekqw_tGXJVTwCI<^
z&#z*zKfHq6{izLpaYNie^DZcfvLHvVspF#QXIZ-E5WoJ)15Y2f!iq`{eCBA+zQx<I
z`shk>VMdPdQfNJya@?6MHWx$XGIfy4kAhJ(Bj90@DF3Xk0MAz~BCg3#(cNASm;QGE
zm$HvUKWGtt4&8*UiQ{n9tZ0Z0TnkgqPQhin3<Wb6D8M4E6QHWR5h^W&V3%XWBX~SG
zTpvqen;nfj_>S4_-T`NmBZ$A^biThyk-B-ugXd8dYUz9sJZl~^wqBpy|7cA$*cl=d
zJ_bfU(Gw(l|7M$>>oWK4lPq?)ETt#qhH_qRK3%=r0}I^V!05?hur67TA2Cn{nTa3J
zZPsRHAbC;v$5D~Lwu<E;dt-Q4^~aizbu+Kz&ppahT(tO=mq6R$5WmOm>2ak9T&r?L
zsQ*NZ)>J<fm`^{;y}V-ak!uRw?L7#S-ekg*wPUD8=M4V!Mh&UccjB97#Nb5ryRdER
zV~~&0#=7cmJg7bwPn@x$(*{;xSL9jL9pV8}Lkw8(9Z~*LLY0{f0<Dh^<gZ(%fkLh{
zop*Q~xiu@4I9G+@@81W=tY9tZ?3{s_1!I}DzaHD(?!*fBM?y;COmuD!CbfsALwH{T
zG6e^Co3#-QY+Xr}Z8*HrTaJ|md(oz6HCksUF#2W*Mh&+iUp&)Ty0aY=eMrY6PCKzs
zX*q6wI2XD<za+vp8Boj*<JC7D%?r*DErBX+9K8_(&nbbt`))D^uETuo6J(xt7D$Ni
zfw{5QP;}gLu-dZ+3KU$?an>0@PNpgz4h@A@)mm)9bXC42@GrjX6UF`>Jzn~J4BxI{
z4Nr}#aOSo^SaP@;PVHL_k9Y6KkJ?K7!NdsoGNzRnn`IKOb=vfaz6qDL48;J;nOLTE
zl^hNogf&YI`L1#h#&ul)oq!ocbfP$G{$)b1S{K6l;fvAeo-(amxP*EMlEM39AkKV$
z0nXWeCx_CWv)!6%WNnQO&H0%pa9Hn2XRp-abE7RFd%h+A`~!(`rvxk}A~>c`mq#0n
z9=dOa0NREq-#(seuK9tvIi~!@-yFQ`Qir0pD{!FP0b|T7g{>Q(lKaJT@W9Du&@5Jk
zifS#;YV`$sgttI8Z8QDqdxeePcZVGs5aU^|%0XRRlbTJkgn)l)a5?2J9D7)Zd;Zwr
z6Q$!Qx`WW_^f}Bdq=z`$UuRaQjxwj^;?z#h2qosYL7S{6dME6qVJ_ZyCPad2z9!sA
z)(#`5Nb&H~+4$pi9hv#A0Lzvq5dG1FXEj}g(`+(GJr$uh{QUWdfjqvbe=;BO#h9<`
z*~|TwePZJZyWq^D9b{ry23x(dikR&mN$U))`RZF&VcJ>+d@38m-2~F~XR<Z7%#Y#u
zUygva=`cLAV*?Z%cBCGUoS^fn9-TAMoGvfFi2ow=!Cp?3rq1x;{s-pb)4b6n<k@KQ
zeEA97JS?3mJ@BU$CJCG-CG&$ux>W7LXexR~k8-bL=sWKeIem8lTqqg|ZKrPu?QgdW
z<2Fyh{Y4EpDJl^^uUW%W;^*<#9rD!LISJm+lBb6&VgQ9TAo;2umKSD_(DwIWT=9wg
zGZth1$K~1j!cb^gtV!#Yd!aE;i%)d*#1fAu7;;2~JLJgm<fFd)d7lJ7=$!|vR;8ib
z_EY%l!#J9AMU=J~n?id{8nFGR;GDNROsPBq?`76<$4&p(-ur5JTrHjW_zm$Ccpg6I
zzejiY4j&JEL(%jWaz|5?D{e~_$R%syrE9ODMddY4nDhhR&3gfV2LjNpyhAW9uo}Yh
z(rO}J-9jxfAzqIW;US;&`48_?G-&q-y0hXYw2ztsQ=Z&`Jgv1PX;UV>X?qQxuD9Yx
z1?%zMtPew)<37xG72%j<NIu4Hrz3w&r3P{rV8?zH_MpxKq{?Mcqss`E>rUcVzJx=&
zh#~!N`)9$;&~Y48lCkSoAr|!<gs9JwD0<$W+&bTkbCsoFaX}AQT}X#lo7Qldn2q#9
z))<;Pv9r2&U_HtYU5oEqJBiz$CUBUx7W_N(;h?(}N*xHt`w981@uE2xdA%mm_r^2N
z&3oXM$$!K$z*%@Aa2Bq;Z3ha1qk=E)`K-34%3{LKaiA@$3KND;Wkrk6u?_Fc&|zP>
zpu|U&xj(aqd8y0btmI;B-gt&-ha6%4F$sb!X)o3zrBx&4w?i;`w<dmV{=&9j(<ip)
zH?gj>HKc6hZp_T;WjfR4@SJpi^~;mmc%a||v8n%0Fm~ZhyxP1N|K+5k>D(69H+mS0
z-Dw6AfjT&1QxYx~+yVW$CNMELnuVK{!tObHnbOxZycgpPwx{&LPS03Sux~PCtd)Sm
zGgGl3LmgYg%kfQcv|!Z6BLr@`<GS-EFlBW#n<?E%iU-tT=cCv7J^3N14`Ggm>_ov6
zn@!+-LL48Bcg4E$J|?pMUbWd1eNw+o0WP(~z}<-}L1a&c1xJb^if)30LLcZmmqBhF
zhyZ1kxe%x|1HUwmhKldUnf*{bNSC`wd^D<w{J2<lG5#MZpOa7GZXCcR>vyw#B@}YZ
zm%<(gC!yt3du%>30#2uGA}$xdk)xeOpg8I<E^zqH)KgC)eHKT8Jp720cr-5D_(<@s
z=OP5xTI12n9t4uMF}t{xs9`#fIhVST-|PJ0?e-K%{ZYgEFFYd&tBsg1orumhUc^(&
z4W4`&51)-q@n-D`jM_V|#=bKai@KcAdAbz2;41_D?FHobvIf@I8xOf7?NEA-4r@Mw
zEJ|t|i4BSXAC;XX?QtBo8VrZjJt|li<qM79#US?KE!Jr?17}&fqF>EsSnsQV+DSD`
zpNd1&pYt^-VUt;ENDRzcID}bK7o)-L`^4UNH4JO)BNn%>vigNekjm$ghJ7!|sSPRE
zSJXt(RcztGmt<6*jifGbH70}{CG}ByXzy-A>eP`9HfzHz^DQLZAPV(Ij0gFATl~6n
zCraOqXUbiiOqicT%2)gmXjRMMd-oxoPU|+Sy>r20Y(qVpI(!yxA6WrTCHj0@O9Jfu
z{s01Jb;16<5<J6MgI+7$$laT*X#0OtVek0>e%LUG>ytoUAAFjOcpFHXY*x`pM-8B-
zUxVJ>_aC0MX(um~bYOW!FMIRrJD%R0Oj;JIqR$HgeXi%(<kS+TIl~^J6nbk?e`{gm
zw|F4?l33KXvBWx54Mg&K$mHa$XmSdLQL8J34JW2!@XyoC@V+lD*svKzp4XY1<nI+a
z2hM;gHfykcn;qmez7gErv4;e%Uq@~!&LyD-?vb|V>dd}pB=F*7DF16n{F?kA>ZGYf
zmzN4^Rq0^#O;z|b<RM6zm<V@c-jmJ;r^wnJuHYN$4x*1E(O9FLO}-w3+Y+Mi(!$Ls
zIcWm!7defF8D{WA^9Pn)?1y`6)nP&WcZ-*6uHh|aL&@A}eDkt!ns;a?xQ<rgYj>OQ
z!S;h(u>LWK{ksh%Gk<|fW&$+Umyr%fRo>RP1XjMdjBSFc+@iCMEdOs99u(K7=fc0B
z!phl5!zw}K;9mHnyo|e#>xcbUT7l)<<%1DQ@HFWVPQN^gzOui}@7>+cBi<duE6W0D
z-+EP=ZzN6+G&fP7isdx3;1%di%0-Vg&5+a|kJAJDYQ{K7(~DgW6kgb}vaFd9aOXJm
z)N0Tn<%JJivI4$5hVP1$q02t}g7Gg_@Ei4mLPbqa7PxL7H_w^RSAW^feV4hC>>x3k
zwQ38W9Vkh?QaV6GZ6!bYbvC!rJdVFVZeZzCZP=L9RZKc43~KGx2v--sVMUrlo)U>_
z^VV&O5F7A;t?e>|LE#%_9~nxd#u=d4oMblLGmh0ZXbX3o%VQ;LlcBOB3H#d?!n3`Z
zgqu!<gc=~h8a1pse>q7WHX!u#&?C0>hslXz5$v2PK}UQa!#BQZz)yKH==7f~RcjrE
z>+<5D>a-61<CH}Vt@Uxf^lV5AxX<?Ke-K9X>%!7k$;`tw9h_ToND#j#$Uikm9B14h
zm9;m>Y&#vky`Y`l2n*#|b~C{#Z6>~qPK8i#!cx~)!UoMBu=}kDtvvY_(vD4}^4>+b
za%K?@99N~~3+n{jRtuf-{y|Ik4wyg16gMUAMRW5-kojDXZaC@9Wyv|={Raz4$LCON
z66WH!TQ+p@lsw`y?>k28#n3J1go1`MOb|UufGxg@=mxQ8Z0jL^`1mdwtj^!WN`*Zb
z+^B$)5|ZGeQZD%aejqH^Y|Q0)mw}qSD_{K6mKV3p6VAT&2OcKoV31fSuAr;=+w`B<
zs}T$G_>;`vQ;SpniSy*kt^9^_0ad<qjoz-=%WvKO3B#9+;xgM9y?nQXMgOZ{7j-ge
z;G<mPk$94iFLR;mbIu9V=MHB@H#>;$r}ro}CVFVjqdXwEAM_e7qjBgcE{Hn>_aBMU
zDF*~#e$j>2*Ofv}`v(|3Y(Ae~v`o->`!d?Bt!006ykU#?BD!%&AiexeN#L)oi^2po
zSaIw(q_=P6KJ1JzMSmC%IJO*(#)weobvnGHpom=fQ8L6Su7r_--S9+bGTj#`gq8ch
zz?GX-T-9x;Z&R7U10r9+=LuT;)3ZFlkS090%APx`N8ql3uedpM3O#+US<o=18!9U}
zL|(dyv+t|&@+*@3!=M4qxpx>2?vUj($DicQ);G{VW-={bCPA%zHR;b|!6eIJC0_M*
z<I@-AQnRDiG1pm!UpX_Hju))qR-aP1Af^lzE{bqJxqWc9U=Q#3wGzKQImb_)IEl_p
zL)@{aFIB2kf*5N}Tvsc{vrQKYzm|-J^=hy9;&;(>)<H8`aAP&s-M*dcTKn-UR}=7H
zN+!6SQ>TBTQ|XcrQ`&H#3^Zki@Lje(5B6No4KaXh|FMLX1Rh4EAw9&?YBr5rGlt(X
zSE8?e*;1|ZPXyD<#tCJ86{y;*V)91IjBnH*s<$4spcdNA-rHF4!R|N)x)+Fg+&>VF
zItb@S{=r>~(qPGX37#7JahC4IE|8jC$%%g+x35=%*H)8gMr#XPa$JWa(}gr>*Gljk
zAwmz1G>1Di(KK!dZ%3#m(#-V+P<?kZp6kq_=VY&gPE;2wvv|tq?C8Xh4W6jl)Py6z
zR?sFB#f^S#Wj1DIaIIz*81<~>DjRO#wTg*?fs#bN=l})3!tvbw-7B(wRR&I)e;mvb
zZ^GE+cC;{z(s$c6QMsDnjN7wVqD(Ec8|Y#2+t+yi(p4g%GZE@VtN5G{2i7;Fm5MdQ
zU`cNPk#g9E@`EQ?M8$HJeb)+4JB&eV76suFu4wYTm|UfSSXwj_B>PlgXT^NXYS@W=
zj@O0S`tigBZwh7eV%W@*$+)Ib0;66GVeEY}Y_Kf`cFda(0l(+sj-Lwz&30c{a;_AX
zH|`g1nJG^WU%o8}Gx|(+4S5W`&yK-=HJ{k!xAAc7*gTL67U0+)F?hw#9PeDaC@?&?
zjftd8U{gEfuxQE1n%rqkjDDLckSTr6d|pM8j#OEkqaBU1#x9^Pw}M#iiy=`ht%BM&
zj5rKN!lAZd=#(9W@?KNG{idlvae5ql=$V3>%f_M6fnm%xx0<!otiZs_&xy9JH&mQ8
zhdIi@Sc~yBc^{;q!p{ez+m#{q$rHArQ39N=C=hv1C%CYnkp+JDW=2W9Br1KrMYM}4
z<`oVHg+-OZsDxk4Iy45}Is-^HjfC3z=9<yV640ek3N8O#VjsU6<CCMKu&r^K!2a_V
zwtUGc!K$taY=r9_cIQ_dJ|m++Deo+Cj@t`g7rY`9UMkcW-6_YfE&?#}DPqD@EwoL!
zOY}B;AQ3|x<5`_nvOguAO_RFHa#G*0snM>W^}v+{xxHaJ*MG8j`EW*lCBu@GILPi0
z66aPcl6u7x-LsK>RUY!gl@<sh9_kHwH`Ih8J|{@Px*@;o*fi2GHVV|oE0aU}q(HLg
zF%#K$qPjw$0msJd#eqh1eCGa%5sl^W?7umv+#v>*Q^E!NQ#A0>tQ5Taej5xr9VbVQ
zN`P!@1yk^iBYAPV;BD2AxAN3=Hg1hLp4S#bli&}+-{Y<l!o<OCzy|zY%R_3B8;Q6n
z4r8qMGHr)d0_R0x;BY>ctWGn4R0Rd{yTg^a2aAwk{T8---VkQbl!UyHEud{*BPhOV
zj017SY{9q~Ja=a!W@cU`a;>xR!p{VV=vD*uekuH2`<>_?lZM*%-NFqYmqNL^4k|Cb
zPO@!Zu=S~<@Kow$825KLL>~Tt9n*SH;b$+ZAAZX=9n|FyAJ#&tW4ds4l?Pw(X@lUL
z<3yg5rOu^fcjBvVPq-l(MM74+1>IZ&swojgBRAwjc#u6U9XExJZ8xLciUw5PFIng}
zqL$sS*?_Ae^O@V2<K$Ur2uV_Rf;U>fS*qC?;_@PNsJ0Bpo)Srn_PxO}U8fTJxc%g}
z_AYRH>w?>T?TLxQZV)lON5;!-M!9|^=x!y@9XE|BUr>drOatgQb%Q$fW-{>D8z-DO
z$)cWG5s?PF>T=sz5a$pLE5^MfJELRKSkDzCwW?TG%qSet84E|2jYj7eA|TS`Ba{~(
zjujiF;c1~fri^gI!H-+nuIkCCU7S0lB+bR#)*-FNZY6Gr<XAe>oP+l*mbP;gENF1y
zZYrvD_{JBg<*|a_(rsfQ=?v$Qsr*6IkY1X)54yXpgv%m;G#HPfMC}<|S3eK#df(ub
zsW%PMb)jtmBdO^997t`c2bq}7f)a;Bvdt<07514yg#jmH1Dznu(gW+0bBTB2OzNK@
z%XPKZ!MFuoNSqQN_ZLTLr=4(D;TqJR>VpSsROtc!4F1-o(QyA1KFqZUd?mV3?S}!D
zPEw;bTlc}AA9~cS@eU4{3+NZearARtqp+-bG?o64iJ!EbsqKvzzF+Gq)b(6qQbWFO
zw+APAV2v6t6jkL-4cTN-WeRP%H-=lOgbls_9jH0;IJ;$k2Df>v!H#bd7<OPUrYye6
zM4qY&(hvJUUXL0W^zCHs*S6r8rRQ0ds0}W-I1;XSB;eh9I_yQU3#tdEv7jt7%p4;i
z&Wl?}%C<Yq;K@6dr?-TNBvo0Yr!Byv?>3@yXAneHtO8P)NiN8I!1AZdFxD~(nwLHq
zsyU;$apx=^wXBRZ{gI|xmVl2nPQinx3S`A64~ScOk!;>7N)=9Xv{c*!wT2ZIYfi@D
z;QY&YSMd;xJu8cjhwI?n$hAzz&b(%ObvD@`)qoGxGRTPiDA4=15w6>ehYMSmlf=16
zu=?RT{v_}TlbwD`khv%e4qTIi`Rld8-6a?99H4mKk_q1K_QSlxro4CVZn|ncvdGG@
zpy+Q3qbg?cpwMihUarg|q{VRj%6%YRP=&D?({P5K7N1a(3;lnlVB4EzAXecFQ^QxH
zwZ~X0JLVy-UFD4_od?mobURlcr3i=gFA$a0f8mLC3ixa=p;MZg*o|j*(8|+^6vex-
z9mC(T87qvqaLssrG$tRnMY_W!X?tot>NBX1>t?r922kg+0*!FFi5t&O<w6}Vekbuc
zQFBSg*9rBkFl-%v@a!ltOgH1lU=N)%wFgvARY3pqljN;(951_dm%jUJO~1V^C%yL`
zL(H~exF}{b7FntAMdKc0d(%03|L8Z2JC(w_CwlSPqOa82`Y4I~jVw<YAi#eM*KJJT
zXa0`h7q~a=Ic`eF-=9N{zg>ra^tAbszwzwS8WVEQy9!3W*}*3KOr>#sad0@i7@wX`
z<B@h}@Q%*_37)cwf6F(d1qG2bsO=9tx&4S_N0{-Zx(^_|@CwV=SpmYvYz)63#d;){
zu;xquAi<=OO)d6i`O#9?Fw79dE{`Sg)d#TI#(=MoQ|If>3CWyQ#ZXn{O;pCzpzq&g
zp_}<09{$pq_3xO6Zx&n!`}U!}P5&fhhIfGB;21hHF9HrJtl<yc7VxoxRow5TA}!jw
z1Xta;#`L3PVgJNn{&&qZu*kkHj5W}}NgK`ihp|nV7V#BDN@wwh``-zZO>@9}-BNn`
zvo|kze~pL~nzDU=r}8gR&iMS_0vMMq3E5x{&*onT;o@@W3>4>69?J4m2XR>36b4`a
z^a$Q9kHW~e=V+47Ic)BpKo`aKf$hy)+Sw~kkKY_mW8;lczfOZca516>SKbr6un*;f
zikV1c1l&{Kg;w0~p;j*^aZQbAB<6)QrcaXVlvq)1u@qc$vsrNV>tB>B83l*;b)bUl
zY_Jk+!Ywu;Ao<@d80$2ZT~~Le>Ec_tbU`-XwBCX?FW*OEWKwYAw%zRgo1q@6`6b4W
zpFth1bLq_fpTg@yd9`-o5;XIR;Wrk@0-U;zw~v1(E4e<;?5_ZiiQ_PCmjxKRWZ~0O
zb~N<aHKw%a17v=S7Y42BB8JM_XyFUwdu)oJJ3at5J~ZKvcC6)LW4>X>>#dOAlt?P=
zwdhgB7Z5Zp1hYei!==l!AmzqAFk765ZFX0ghnqWm{8hu|Y_&wm#8e!;&=I}Q>9Yqv
z)i8a{WN>!Af^i?-5Q~y1+?DN(a%bvcrdB8$5KCl_!&8}|_Xsv~bTq8_)keHSzOeNs
zkJyb*UT8Qzf^D9y3%`ty*E|{FgHJ~7tZ`gC4;+_d;Vru^iwsx{3nXs~!_#t!(n?Qq
z$?pz1{UjYul47B0rY)L<ZNZb8F62Moomh7DA6qGRiX3+DCM)t^3hZSLTS$v9VWJfy
z*{}~o-u*A`;Pg9`$-a6-c3kWv4x1Ol?`0{-e(!{7pN77dYd_D*znlasyLi-L0<17Q
zODtrM!aAwLxGOdr+|9<pt=YZg&|oh6@j3%EFN+hCQ8PeMSr&9`_d?ytoiP1dGz>QW
zWW+QYViq`~^5nfZxNLy<zes|L0Zojxc}_a_co83ySoY=RR7js?kA0Pc!p7H&p>swY
z$xI7_-iiKLcp?Hsc7LnREMGz_7kgppkye)Zb|&d8O&|g*4~&);2RZ$3#LOuceflF{
z?L8qI_V5bPEb;)Ow5w!Ik_E1{T?YjZo#Bb;QRX>m4_>L*gEC`vF{0QM)T?ZWyyH^r
zys;G0*KNm!vms=9ohP_I^v6zbX*Bpa27TU7fiIpN%rkc(njDRWmfdyazxzpGZ0`nd
z&MpS)?=h?`!&zWi7$wYou@|N5_M&alEa=2d>=ysdO5`2TZQC`{P;!A}4e3`!Poh|Y
zvNDaW@Z#R21ewo0G&*<_vFisk%ABMF;tQc$Z3;|_PKO2Fdtpq$V$O|j!p6w!a4hT+
zo^rp5;<psJblyyQrZyMD3ghu-jvYX!IvJLKfS6qSDV(>c03I!!0Cx*w!Nf_4wU*n0
zg_t_zO?WSqeY72;GO|dgsU^(Ji5$|0dzp6ZG~x2*M{LX0HMk+i11ukV2~0ANkkez5
ziG9yy@;k;H>Mm)p!tzA8pXdc`kBtNqOuZrU>^T1Fp9MQHI2t>0-jF4FPW;$xd5ei^
zlJu&hj>X#{EoP<vBtB;4H{v!g0hJRp1Zx(nF^fe`Fl?g&{hBqTT}jvi-WTIDRE_zk
z-32fZHslq}P~p?s6zItdF3|Q`l+{l1B`KG>$s(Kcr0VVzR4BX13S`H_TQ^b2mfFf9
zgyO7HA_?=h0T_iGXYU_dKm)reu-HEZN)C-7D_)Jp+KymB;D1@d(`RPmERP_}Zb&BD
zjVoA#>jmL+OMRSOHjUg_HV-teD6`A@Z!o7z4e}GD$@unG=K1s;#;u)=FQ(^W&9xO6
zf7Xj-n#B<V;c0xcWG#7+G#UT=$;VQ&Jb~$6Rq}VKDp<}cLDPv}$gO`f&{JX(#@=5{
zHeFNT27kuUjTS3luOO8>D<zZKD-B?1ZcO#1!(d;)d^q=eD3?BZ4$2oD$>BpULH)fX
z_4n5V@kR^K^pt?aO_z}$jO7cr8&KJ-33Q~|Z|w5Rh3;2&>}`yv#YBmv)JSX_6dOG!
zU%hIPL~2ltle4+efCgRQoDbEGqv>9;^%(cChM&3=&h_;c(8sYMxWoM<$SN4|55KPQ
zkil~FoYBE{+?_=4w)=A3wKhykFq<~tHRYsmb<N53qEsH|;QSFQX^%-D-*x8zAr%+7
z|L!#?f5U=nOx}Ty>aF>%@7dgN<1sLkyu=67{V`r3hR27XY4s2W71ozf!~M#*S1tt>
z_GiMHr*DMY>pwuYX8>sSio@H`Vp69Pfnu$uSUExk-Fmbjp>`##hGg*B(@vKB(}DhX
zUXTLD*u7SYwYzv=vmW4+r?y~OrzwbSvSgC4hl7dXG}0i_B3R_Vj||F0LW51J@P&>l
zZthiJ<1F%UVWlIacF17&wI~?rH^eh_R>2cCo)#U9WcSMNVG!R>#4fFdX=Yk5ujM{O
z``m(vfyej^L&>z3`CMV>+qo|*tnt~jHrO6%$^V7VWqDJN;OoL%ynbjTHfv}=XuC81
z8@&hjX4q3%KY4!PnH%qVB#QeT-Qc9}L^yxho+vwSqUU2CF=MNJ5H0v*QEb+aIZ#OC
z8q45!i#Wgje313T%hQA7eNn_v795`pqu=`vLYZ3(j*sGa@9h~(>uF?F*Bynm`fuQF
zxj&SJSkwKN^jX@7bnu)mLkIUO(WG((7I0xCH(L6@qF~Q4Y^j+B!@VZ(@|G5Xwbx-_
zvWir4v@&EF?&IC(Uk~ZPI<%;}ntgq_7aT4n33b0X@iz?%sqLJPc<jGyR$Q?g4=?o?
zdUj^8M>D1pIS-)2e5Lr$xBxh1D@$eWYw)ytZoE8W8B{4B!3~MBFmr4lF}!u0yzVbz
zPw68FT=fs1ZZkr${4uyNIgPJN8%qy&P^dg#2NV8#3GT~csqR-<K1Q~VNrxn|BP*&Q
zZ{<H$R<Vw57Zc?ZOXO*X;!U`z@<vc_MvX6d=uB<KHo$X*0$9D=8;^}X%Pf_4!ULZ|
zw3?BGKd%3SW2e*LxqCLK_D$!Z^K*HH8pp*$UfD-mgcv;i4s6o6$22<Y(fhp!o<9}L
zJI)-y;bqsDMpG)ZPM^U_-6a8kKZ8jjQQWgpo199`Aa1X|lW@6_RH|JMg&D8FrdX8U
z{yGgjgAH)T?*qJA#S|wdcM4axbU?lIM4I$e6)IH2(X=fb3qxIbjMjd>Uqy_*Owgit
zPJR@8Bfy=j;^{rzQjGI4$L2q>_)Zt-DA%FB<6|3I>>k01zAg{?H5$`G<4N-681|s-
zGCb*s<>$6ZQm30<wA*|u7)G2xb$2Pg@A6YTVk|{>%sB>i*YtQ%`8d*a@15}c_19pl
z{et*89OEA5gG7Fe$p4|}yyK~U-!M)nNw!pEWt36&I?w${T1Z1Gl1inh2&F<Qn`|N_
zWR*xuDxBwj$ZBXOX-7-jSG$bg=l7R?yk4Bw=bZE0&vjqd`y#48>^Qgb%Q$4J#BG&W
z0g)r#L9g-*?)0U<e7<ZKH+XO@7xgiLK53spF73SuA(n&SKUIO5r^eGH)zx?-P7?B4
z>Zq65Pjqq~$6bw>gFn6v!S<kg^xsQ+rubcqGuB&A?>KxUhSwLfHDmcZ$j{^0P$|u6
zom7AwR>qu*iW;`}#>2Mzb(r{mB=<1TnyFMKK!|4+Tur}+1`QXf)xFJZ%?cOHv@+)c
z&W6G`{ux@zcF~`2+`(MQmFh*0Vh<K)fNY)=k%%+pT0i;2f>|+Ocl?<^C2Ktym9d_y
zKc!Lsv3&wg59H{JOBP)5PhC{XJ4L2c=do{tLP&MLk0vo{+$`Z5c+#i~2TjxYjOY$*
z?fgnD0`G$Q*F9+Q$QZ9^DZ}LOk<fOvjfDHUQnk#_^xHXcK}}f!%>CqBSA5h7{bQ9d
z-gplN)aQ{r%~4P=+5pP*v>;lti}nbv5vwW5!d)vjL9&+*DZUhizukhs+q0NXK43^)
z&Bo%PUzud#qC9Ym=tZUQ#r21tmC`)tT%7)N9cFe_Q{z3Q80oZ;iVwDti)%)J->>tS
zet9hn&$%YhzZi>_f3^!pPfWm*=ak`eZWTQfEsC923h3@tmZ+)rn2<SUL^|1n4)b%w
z{p5W(7<HXQ&l~}z%O+x<R7bu3r``0*+9n8j_fPO%?K-s+GllD+K@g*0kDgcO;h_%-
zye1U}j{eU`$K-Qlq_U@A<&kT6(SHK$-=&2j*^h;`pA@liG4C_HSwdcleipV=PR7^c
z%ZSTb8+6>_MIvpKNlB$Vil2=I*^zUw@|qIfs$T}T>m#vzWEl2VTLJeg8Vhf^;GtWh
z<i^VaqO?U4#;=S;@A#L(k=u=-cGouYL3}G}&DnudT#t}Dec#Al%jIYv#(PA@3={97
za^aHk7T|JsJf*vQXyV>+bhdL88R0hrBegEq2MspR#b5$O-Nr(lc5&J$|F*vI@--T{
zNt)Pv-;Od*g3&MDk9Mq70KJ9tQ8PPUaQsa_b#z}27H;9NH!%v9iibddLKMV4iKJ~_
zEwt>4I-R&76dTpj>MicI(L3IKWUE^++V9~r6i!xP|0Dt;eKlcMbqHqMT@2B~;rJsg
ziW_I;CKMJH;d1p4*w;}4u}gAc@k?=*Yh8jOD`nU%X=`pqOFn-3bqJ5i7%^Fo8SG%M
zHOsk~gU=m@aCCY(R<^Wa_VFC3{vpi;{3psL8~((W+A#DRTu$bmB3NbihWPLBfQm9-
z^zbvLg=tFQ5v2lhB_D)kH+i4eEd?4Y8;b+{YZIB~SO0py5!6l!hgeZhw0UAh8V%P&
zPt$bJ+4zNgs{Twx`1|8eD_&O?eL)OEtI3J#DA4$#!uE^^rUm8#@+|Tgs6XCC9)Dg8
z2BH4oQB;B#HKgH}&ks^7XTX_eiF2Q2E(=yEchU>*EAhc4mHP94CvXc_wUPk-WYoz}
z;6`~I#px3+;^Fx-c&&9aSa`+LbpE?Hd-DTY^lvqcXt+yF=I#djaxZ~)whfH8(ZxG0
zEkeUv+PHUk0zN*agPPn9I$KJM7A0E>x=tI>L!GCIkFx<>uvEqyRW0PDAeTgLF(yVQ
zKa=3xDdg9G>R>r~J)ai}f%qHh7}>Z@;4gZGsuVb*f{PTblQAUqcV18}O?kml$7r0k
zP8ToTxP+ame8wU(MUXKoiZ0rniLsv^kRtacx_<vS`gOHETzozntafJ#Y^vljzGD?a
zw*yP#cS>344EA&G(Ad4nEO?I<H*L%~R8V>YgQ7BUUc85_e7*uk*GO}MjjM(B*HgHR
z^X2sxW(}z9J(3GwdY5jL^CPRY-{FGVHdx*=hLc&T#B0vcxVl20YcR0^Va+nEy^_un
z!&;&8ODZn*oy4};et}?&;f@7}&<*PjF!Rns%=Fb8EKs-Nra1250#*1Oj%qGvxN<dT
z*_Z`IX36a9L3uK5)^={e#fi*QX9{=gdmJoV{RC8^_n?HK52yR=5Bns)g8pb$Cshib
zOyPqx^zLuPbA#8}m>6Yl|4Lbu+Fj1Zy4`}qco=usmSeV|1beRP$Qi6$M%)gh;gMyM
zEb_<+?s%RP_fjQ^<+UeLO_#&O`)Z&docH~n$QT0-_md&$s2GUM>9?!VIYka0DWdf)
zBVkWvGG+*cWZ>&exc|`ugO&^kCOvjWd$F^^z4HuUKzb`p**q2If8jj<3%iIT&raAp
zW;BX_cSflt&rp4%2JBlC54}A+S76iz!3ixPT!|~8CqH?^_bDGit9K1f7-<cwKl#Gn
zo;1SQ@q2+SfRQ)5=(SdkcD@Nl*F--O;G2(AUiagul)Jp{o`~~HbueRbBpY2Gg30r9
z;Mu=vbnUx8uzg+#y-^p=Ub&A2TiMSzaq3Hao)=wvDaH`A19q^(?|%^dBgT10+hAd>
z8soYWFzt&SG`Z@+{%Q%D(UT6sraE*e+zW!*X#Q^<LElTasm^*+vVW%`Q~G)f5~qy<
zzs>cqch_EQik->L@Jm9nC;;_#Kd7Izh5x+#`z8F8SWS1VwZ??wr-W6C+Nd3H5U-s(
zj~`XdaYLslTfeVGFv%NmiM0taWqb_h#$WR9up8?hKLh5J9)@p=JXm<#EY>!Z3R0c-
z@lwhVHa%#<quUjU@xn-!w{IV)j44E&4+F5Hpq);eZUmwU_0+;B0*7|FvSoZ0^1rV#
zT&IyTd*zi2<|)za{=Or)(sdJi@JS5)UEYJ=7F}+=b0l*vK8`<1+?iPLa<*~&Ahq6p
zi5>|_V+THGvA^<SO#Qt!o7XQ!pFU*hDHua^tCYAIoheLHeiEnM(1PD1THx<{W#+v8
zC;5D82?`0}-b!k7amxEpg1NAj$?Mpa)AF3+zDTO$pC$}WsG_6pS<?S<5!+41vejO{
zA$L_gywj6s=k@JaZT|=^*y}zx-`++<3UA?TQA2ii>2uI8DPq4*yRa*kr*Jsll*>B#
z5L{ebxWUEi$%h5`(9G`$PKelX?ryrA|0+!`cgJSjuql>%yIPwI%bms@5Pd@Btv(2)
zrE_?HlmwoOP8HVX26M$hQ@F=7mN3`#J+$3O2uA~|Nyf`{AknuLJ}f<o&rNQF@8(H@
zZk-b7ntO+utnLQ)EFE@FtqLyfGvW5GECIpNTrA_?$J)*N;q-<8GT8MP6)o0-*ESVa
zR8j}~{x;DqEuz@FelhjwNn+5wi#o15fpsMkEJe}=J`C-|4&@l3SdF1=w`2mfJ+=^B
zPvoFv-gH!UIzT67k41s20%SjzV{^~HM6&EUdY5QXgRyyd_=7UHPW}tN*=j=U1+9V+
zA<B%}hx2;5E_M~hu{5oR;G9x~mxoHpiy;ZFVbcyssQZF%)yHtfokxWMy*eb`L!DDl
zR{?Taf-6ak$49(BS!w%HGQl^EyJfwBTo2j;itmj%vlk~J{Pj9?cWr|1i!*U{2cOZs
zpwEd9w9|~+OV}QjI$@$t6g^P>5%pGxa-A0(!TRSp8frX&Md$_Ns*WN-L-Rz)t^5e)
z{ZUZd;e!Krd4^4p678APj*1ue(0W5T7Mqlc;g<2j$JQ}qz)BroxrD$r>$NmAW;On`
zu_nVMSE<T+K08C|A!bD+L`7(l^Jybs+O%qbOf}*^_X2F+cLsXKAHtr$kr-srOUaBH
z@N*0&3uh_O*U~w->LUS_k;mYIYBO#7b%m~7a2(5AVu`z*w?Oln1p1CWjTQ|ZLZdhK
z5R$fy9{6^UtbJW5xCM`C*UR>L>@2T;Rx*`bJz|Q-i=-j>x(q%DYM|W`=jqJRb~q!X
znCvk2Cg-{=K&rqVrPd5n!xf$Mpmqjc>+_=1zmCEGGDc&CaSnV+^uY&Li%}!r0cVK&
zLWa&}JQ~$R9B;0Iy46!?&)9WveO)D-oK%7b|D_7D%T~eeV1M*(E9UR<5vaVO2*pzi
zanrXEc=fCpt+>YTbxZ@`&e;rNdBzt>)Hb|ok%;-}H%Q~bWSa7Q5@lH~u+~HkMgHc|
z5ba(X`c@vjFU%Bn1ihpp-p=%8raf70@|`BVbmPBY_n=E@7@WPX4ObgXQ9ZAj%+%eF
zvlNp-y+szECQpHZ8;LmWPO$B}k#VT9SP!nTEE@MNuU_k_HF<eS6lcvmYqRWE2>o3p
zje@D7IHUXsUX~jL6Wi*j^XV~Aq<BSGB*xztiz<cNYq#Qf+liR^r3h9gHbdy~1emUt
zAPnp(q`#+s5UiF}gMXRhVeZ}|xM-ucpwV|R{t%gi;{2@6%WIG!^-U;x$`^;O_0j%G
z3h>vx1C<!hWEwMy_tWv7&x2hw?N==KT)7oTjaEf*ot@~W_=Ki7r{jidai$+O5lL4B
z9rJuU6Ca28X^%bK4R^6Q;TlaaX`yNJFTjqsr!eN_B^<FV2D4k+h?IN=DtE2HT`2}!
zo@peWUj3LXoiBrL7Q~@NYaAGyUWWtk;?Uk>jbL`S5pCJpPPTbJqAl-KLC)I~<&OU)
zKc*z1TuCx{F6n@a*N4+09d|+Fs!IZ&2X;{PEuQvPjE9s-x){-~0ynZ_Vfk|lOg-_B
z&|`TZQv47)^aaABvpcC<S{W?V<_Fa62Ke_G?=ub17tT-D;UwOl$E8!B(QADZS%Ozt
z{S3o?T4h@<6fKCR13T~1#jm2F)c6GvS;^~3UN-E*shwm@Hlvc3vEZBFizQ#@LTbTM
z966)|SKjT#Uawr@uv`!00u=F1QH?NiAReph^!V5Cl!(OV3F!Na^le=MEiseADJ=)c
z(j#uPk!%y1WC|c}vNRe|clzCMCrEd`B3qOC>96;;I6ZaPZm-lq+f_YDXcjn^D0a@q
z_&h~&<B%)4+rev&=K1u3YXGkCiX)9v-r;S%O|<U)R$Tp=(S{0ZxNO;pYAxDC_iiuM
z?7l)_%|(HjzmQB6xC`@7#M5u1<LJ9RGqC+yCY;}=g|jTKV~9~F=)Rwi6*gO7MHUsh
zH>g0jd>ZyT3F*$8-?07o2)N5@0J`5lV#O~f^4Pu&wmehk&mxBYqmYyuH-emqE|`3j
zf!)U|vH46rot{$1mNgs~j*uEaLEtWkui(g5PhX+-)@N``w;Q^LCU9HsXRuCDV|G#f
zB6|`<IlH-ELDA(4)R>>g&3}}b(ib^y-%dx=yQjf2@+Wgr8jG0*M!;Q_EnNGWFJQp?
ztAliBbNk){z8J8@LhTJG=5q}uy#0&Icz@;GRi*4tXe4_pmJU_U4{^bgYhdbC0cPie
z*tQi(T=#1+CgR^lX77;X>K9Q~XYd7SXEVI`cA7PXy{9rV$!Ox8MxZAEU4%zTV#7b7
z_Fz1?oEt|Mr*A}$CSNr2-VU1Yzfs5Jt#Id;9As2HA)J9McHNyu9{-7@@2<sQTI+IH
z*=~(~LCdJ4cO}Wlv>}bxb;*>h0^!iTGt}Q?1U_0{2D_OLuD?DNdtZ9d_sU<vd4dfL
zFRX-UnF;7#&`QUBUqYfT{v_!=ThVp!18sj9icgQ-rz?_uA*P`c<t=~HgU^OhuErU9
zJPwcx+v)|wug;;C{T}!b_W-89;W)4^fXGGnz)Saz;L^zvSf;MQMwp(Xhr&hi<>_Qh
zO#g${rqjv0#nHI@;S9ECTRB;?_5xALQezh^_Tsk<;k;G9h_!wFfH(Z7<JOz^=`58v
zxTrRb35(Lnr7^kqq9UE@LNdKu<Vga*-yrupfpocNl6%t|V0&mE3?5Gclb&AkZc83X
zZ;OY?7xmFQ+?=)OPsal%!-%3+Bs9jj^1hH}J`cBu`&NIJ&oFcfrk>s}<envwCh4(s
z+X{7VZsr9RT6`Dfmfxd2laG>`VOKU;|2GDv&c=&@tt9Qj71-nVgr+T0rT-Nz<+*W+
zZ0d1UGGsK8b@Xh8PYY|%B%jYrj`D&n(%Rh0nq)k<Tb8?bN11#*XUrXmwBlM%ujYJD
z-GH5si?O!;8~R)&Y^3P{Hvay8`XGNUZrSI@PF-FJ1B&Nx!p&0Ha&!!L#^ZwEgq|yx
zlP$x%9J5*Z5`VgT+C(h8_Y7AViQxMe#~>_pHb>ewaE@ElxPuOEVB@92ofwjXP4Y|G
z^Z6>A!R9&;|6UJEZ>F=VR#i@5Gm@Ev9b|U9OYocG8B{o)LpMwb63DA+a~{UN=$*NN
z9W6eGav!9))Qs75dbT_q7yV6UEn3M<w@N})n-}nP{CM_i(F|a1o;=I19UKRhxggsW
zFnrz*zBXwy^=lkEuqp)?o+<#@-e<5gD4rSCr88H-6gDz8jNRH9!}9t=*o-9yh~HX2
zJaS&Z*1Q(Qtob!U+SUe-^Hfl2pEJE0p9ue<HA?ioz?WwtaJtDjF2`HIbv5$7<K@5M
zM@<?0?rOoAV-)bO2f@yb5dsq>6;@X$LPs6vee7+Qq3F#4JfNb9<@knNA;z4ER4|wa
z-=PvecH$ygeGE~NM3=${Jfl@0nDuTDMjxss-(=nGs;!mLO>{i_T`DF}?h?U@{}Rzu
z(~<Od4Z*(N<0yZrfpE>o$ic88SaIYO9ElHySrg<*%DPver<KICHuL<@F9*<OdIjzo
zxl!O6umcJ>eK_^>9%#KyrrYKpK-(K;Q25!1?7N;zza+&0tBqv8Mij%Az}2u%_8b^V
zSHMt>E2gaAc^k`GVN;<sM6XO|DdGD-BTbntk(<JHY_{bVJ}M;-luPl&18r9H_Z`TD
z$a61;%><L+Ci*^jiw>o+kf=J3$vsTLbiLWE@!%9t{aGnkbMq}Y^7`cD539J=k}f!F
zcnVG!*U~%5o1wO*fO_qf1pBA}NV%U%YA46i+W0E+*H%c|C#s;`h)7@$Khf$-QBb7n
zB}n-ki(TovKyr#7K$9O9ZITgAQEnm88BSpLX)<KrRE0adj#xkWH&r~jAL^16K)xsg
zZ(s=8oH2t81$j`tEJsqE%hB4w4#S$$U|F0KeWe#H{5lvzWb2pXzlbNqvwa~P{k>mM
zOCF*1(M$Amvm`t%xhvEj-i(1^4EL>jg{z&M>xB-6I4)2PR*bwve`cDIbqi8JYAwM}
zVuy&M<5^f<5Jiv8Jx#q7Ps7qL2dKf;U2w{h*EbAO$(qa}P(9^M?>^Wjyg&38jI~8@
zZqGaFv~we<AF+huogQd>v4VC;<<UOJ$;7}!9-1ztQ0KMY_-XeL6>JYeA9)d~^m`;Y
z51L{pNP>dLSjfMdNtSMkr2bqIzPut&$Is^ZEFY(X-uEL^r&142)JKq@=Vz&O>Uq+2
z>n8699IT&mIEjWfucCKdi%8B2XDAcT!;8n&Xn#=|naF$1_P53gOFuZlwXg~b9WycD
zkU4&fFvs)eOCdme8IEZ^N_S~pqR-{FqZH2@ZGHQPL~LAxf>EZZJZln4>$}2_ms+SD
z@(wHJ%VGcHGdzPK34%+aq5VQWl`U|9k1o=Jz9UUwUKEFaJ166Qog2c$H5zok&qnm|
zN`YZ}Wze12X%~G+6@!ujF>CG-G+*;k=%}TI<(JB_FYz~Mw%sJX4`k?f=N;6ddl^nw
zUV^HF8!_{$8ybb%;Y(|6p?~pr^4L6y7RDN2mD&v=edQ{NoMT=exm=&@T{=u7)wz0F
zvIxBb15sr0q}q|!cnwCxm!I2ZQ8P+LC})?9c0Z-C<@{~>PkKIPY}iV?mN<dP)n_&$
zIk)Q$&HX7HF<J%ux_M4gnkXfAmqW+j4cH|#AT^)2f&Wk&jmVIL*Evme>$2s%&Zb6Q
zNXcQ4Zxa1}m_sFn9T0hC6q^@uiCjGA$W7}=z>fyeT<Ogxu=4F6K_5RCYt+k=t7~|c
zT51*a?C!&;!@y^UZ)4G8Q#h|J!O~8KfUf#CY}lGdbJy*_*M6ql`P~~aVs<ZtwB=*-
z&ptAcX$igK1W0|=X|0wai9V+SuUBjZpDZyvS!#$X^AoV~Td`ed?=I}~pT=iV9ALa*
zI{9ll7kn>>qKoEbx~^|Oc>iVs?@wn1;zJ@Zz~4Q4<?fQ%<;D<cxYKUyTUjt#>kZ;B
zf06i0j2?23gYh9%WUhEJBrQ$Aq6tkz(>GplHP05$UMnRvA`7XJbvVooQ-w1JN21GB
z7g{rp=j|B8)2>Npd0nHKmU#t1DAmIv_kSeTVkSmB%tC`j8==m;4CD*!Sg6lWc=NIm
ze8v~xaz#EP*)a{2g<G-j%Ny9*GoKz)bg7p+vI@^QPlqj<?U*8R05q<bBiEq846U5m
zi46hp;^!&Sa3BcOM=7z&vSVb|FLCxVbR+ZN&l9JXdbT>c7G4PZaRaHqD+TK8p<pH?
z#~nla4dTo!dKWj<X$og{$e(?<vWz=CaV8N9z6am#c%kIvfAIN@Hy5&fKR4SM$-mJa
zuwm3jYWH4&4ZN8R({{e*RNdxc$Mwx@yj3gApCQ4H9!h}wHGb^)=H)E4Lzx`Uy#u=K
z{QS|u=NV>&;9s9M%$@U)&WK%u3pPt|N`LmVtl2tDWa(VI_Tme4^xVfM&CzwkXH`l3
zw@9is)`8CHHODqXM~KN9g|Hxz*A2HoON#-Oem9=xyoyD~*g&4KBu`@Nq^ObKV>06A
zb!z`~n24BkT331T4CL4VOnH`waz}Z_RYsq%V4Wt`*e!y$&pYX!g$E%^XE~m*OcmUZ
z>>+16HsfM-CESmP&~17JRE6Hbv+gU1#I7lHaa<D4GcSgR16f2uazE<X$Wza8O;mSE
zHJ1HQXL9SB(fZ$QyftM#zS~}B>oX}E;^p~_XamFR>ms<H+M^(ITM}$pbC0Ag6p(TA
zXR{+iiFi@{y3khEi5mG+^7G7j*n9sFjvSo864utE^ieOi`;I0Q+?)(SuU11+^CDvR
z+>kBNUkz=%rv7fs5ttaQfNX;t6X`A@iMqbfETzFoJvs!kE1lWmzl(5Px*=LGm;jB5
z6sL)#qSLrv<VH#nyt<{tO&eH86H?P)+;)z=_`Du;y63@!+;Lp2`F2oIpUUdCY)37p
zMW{4^(#qUxD1EaQb<?}(u;v&VlX_41x&1wCE0L$wC%mBTR}h9bEn**QwYX)cgYaC9
zI=j>K5DVU1!QX{znR=@S?A#`Ua~ij>W%A`{HEA+zovX(k<N4O1a|gks@)XOddqddk
zL~hx$$Fy#qE$5>!L=1l0GpCDh@Zot^xaDHTQZ#}{v+He?^%G?@BpkST_b}=EXN<5l
zgAy+tFmPVY4cvMJ!gg`);M+UMJ*Dt^<0ZJK&`P3I7Q&h@yfQkoRXDZCl#8^eqjO#3
z!NjGDo3VZbO8!@e4OMHw_TMUQ&g1`3v*r|B3=%<Qn`v-j{d#yUF2+`LK8M6@Kd`Ya
z25znHzza2YY`yw8W}EX2H~qOswjDCUe(`c5xlod83iM?jSEs<6@@?R1QU(_CV_4#y
zIQF5f3(CIy!^!TeK{+mqGqsgw+Lu+?DEV8c_d9{9Egj9Iul`2jIS3~M?hAVra){|;
ze~in>!6dKKm~weO_wUy`$TuoLX^}K0_3Iip!q0?D_PGrS`_7Tf<nQoeM+2zdXy81{
z%~{NnMEF-x#_7*@<bS7%JbX*oJGlukIaQZ?yGp0t=$$)!N-DwK4nIk==WK54@GYEv
zB!Ofns<M~5*<^nEarn~JC~T^pz=U2!Ov<%@<Tvca+G3mfHSadD)SWpvv7@7Iq|YV#
z>B@evzmf{EzdgB>TPJaJh%<4WQ2<qv2#2qvQt_i&_$-PEfAs7iDovO14Dabh4Lz>!
z!zi{oW-O;zEzNaRbmEZaL@tQWAv?kX&aomBC#OdVr8gbNl_JZbaN;wPnf3@BwSt*s
z(l2tF&kLE~+`}rqXA0!f4B@s#Jq~{!qI)hXv&OTxps6sNxfm`aQMXg5(*{qNe@C7>
zywXl^!t^zYNn0?P;Kx|?PaluhJQkj<xrfHQr)>K6LsaVfJ5=9!TG)IqiY=@=%H7oN
zh2<)r!M|t_9;J6<+M1_0d<Xe`TOl?+Xte9OO~B>BKhgtNsY^jH{@qnVErQ+o?5;Z$
zDg7XF1C~@o+S~R{#U{+yu>@*GGUy1^G4S$hhahOQ36$p<pvgE}@>(I1o-N(Y=lW{|
z9(w-p(swWEEYZcSCldvys!!3tOjFqS-$$w+mx3xW;;hWk9)h;!fz94Xva4?n$|&Cy
z4vd*BSa$0%*_!rIaDJ)-40xW0j$So*@o5gq-hWEBPf~%KnzzZj(hD>s#gSP5NCU5@
z(I8UjM^^i)V%LKoMDG``-!-fvBYY-9dU+uBUe6~dB#%>z)5^TI7mV}HiJ?v8ZuB5&
z<U;ruoUN}8_Cw`%{xaLBBNq<89Rk6A`vzg-e_I58)kbvj>jYt<XdrG5y+*4{V~M2t
z2y{FfM6}!Od46J--7V2`bob?1*hQC+gVVm!(IW(?C7FOh*K0{+SQJrfj3b-l*5Q|C
zWyly;PD8B){OsCHmR|w1*S}p~DAYom8&habdoTu{a;CGDHbKEfSJb*zC*0y0Nj^l9
zddIwY96VG-H&i9#IuU8IJ8`q%n^=ln&TteR-Wi33O7(c_fr8zn%aTx;zYDViiBMlN
z3NNci!K>-xu+{bku?)+`jN1!IpwBftDsvj<xF_MJ715w<>4#;9X2G?YPH;kxe>Rm%
zp)>Oj3F!GPm^iZl1(p_AY$negN}2`R&936;<1SzyG9LC@U#5Yg%6QG#9|B|);eKVT
z!0NL<n!*)3d;3_j@sA96FVGP@6$$5m9wm)h<LNl-QSdUkRN&GrhiY{t#NKNm>|79w
zFGIqGN$n$%?hvQFZEu8$@@izf`vASZG6X%_M?lx~Fi3LjrcZmW6aSO;*dO+RdhL^^
zUG~v%Ftv$Z^$Nl+W8S+lYZeT|$bopz7HY53LW}fNu-d1RT5NCz69a3yZDIz!SSAH2
zBc>y!iD6%(E}hVAgMVFl-*2iCOswU=`=b)gUv!;hEdbQdy$v>3GDt{cHfC*E3=b#%
z#B=YbfmOi?*nRChbYIJdjnVIfvj1ebXT#E*WXxCmSCJ39%A2w0HL%k1r${zga}qNf
zgkL`J|Ih3;Jd=75CQLNs?%SUQt&pF>r#EkqhL$zxL4C0?Yzag}4Uw+o>%@Z#hT5@q
zm^k}}P-od@j2uo81X-HFx~4eXT^ol3>OtUd&`p=sgkVsa1r)j*rH3-&`8>QCguD-e
z2;~a$_|bguFx&+q6(fn}9aTY%TR2{I7==sjSc2cc46;Hc4$rBnVeJ|b;yoq~rj%Nv
z%UVOY+BXBMU+;&I)xq@0^}V<+E*hU!{i9W**HTR_p1*Fa02gzm;MON+3|gKo@Vqw}
zLo1KK&*nVT(yv6@_GyCCiN{beF%$0Gx&n2T<G3wLuff{N0r+-c7>XJXfsNWe)?{CZ
z3wqwr+^^bL;O&Pt4gs9q?LJ%?P>COOzLUVxpLBOtKS_PIgyV85!TYo{6Z^KGIXo6;
z68|m3g?|Z<;8Cn8JrNYYl%Q;vE4CKr;kPjdAnQURSiawjeY+Vf9lS26;h&HEtFdVC
z%Y*rNxe<YvEO*so68-LP!Bpg}xKo}-xQs_T*vqCIftPVT&i%wQm6yJQt$`Yx)Ak>N
z>1Dq_Ip2yKRbj!5UcZA~NBimPrRLngS_h&b&F5_I9^~qG)S;Z5H!dq%MWmjXg2=-S
zF#pd+I;&>@w~SC@i_|!_>)JJPu{jYp+a~bm^9K0cfi(N<I(qBg2GF-K#`=iykl{KN
zYt|+ZkIhR+;n6C3cGPC*QhP2uEAf%|rwd6{@i$WYVkyY|O`&oI(=pWB4ZZ(H!;J-Z
zh?&Q6nv&Q+^fILJ((~JP4=U0jgWnauO3+80Lk-0G%|W^#s+>6U9U4~;{RcN!jie6q
zO>tUHDt!Kt50_DgmPe++heUJc@ApBlVn~);NU5dY#x~<6epVGMe@w!)rHJ#0J78u#
z7bH*aXCME5z-xOd_-+qpw($Kb&TqqMShk{?=3U@fL{7KS)~Qc$S6_#-I`IHbH*Upo
zWzVtmXdMoR^V#1PTkdD~0B8@$u$k+|vVgwNc<}|#47agiN!ML4;z%c`SFPb3hv%W^
z&n!XihXT;+*aZP4<2bj7Pc%YL8#EN0u|<{V<?onnw``~rl=3ZT@MS6J_WuMvA*+NR
z#|{zih34#)h6)>18i9VZ#kpLa9iY^94*I0_@LXbTRyAM1$}?K&=gK@X`_ncwYknb+
z-D<}fgjEZ6S3km8&CBp*gAS)~<}N5)5aoOyE3wh*yqM)`cP4rujMFl-;Uw}zxkTq)
z{I#c>Xy~dj4^4`j{-vXu*JL<tc%Q-(KQ^Ui3!k0ciW@Y9cz&TdXQ-k9YZK<-sn-_V
z>1i^y(yeaHfM@)Fkq%)PHjPX1nT0i3$<!yH9lz*wa~o_6xeK=g@wsFs-ux|t>ZAGG
z;?hIxMdoSlfZBR)o&P2r7o<V{{Zs)j$7<Z8Dg!f?1(4;T8`vwJZLYIfj$OFA1dsRh
zLIcmaUSeSmBP)yHYn~Qc&1ZEM6({4ona$AkXdcTCxq@5s-QlcOg>d`>Wfr>IgdO%$
z#BO;Drl9p2HI|O&w&WP&qktGrHuVL1Js-;zua<!~SEgca_IVnRkP4HNb-BK59hekc
z1pUJj-21mbAbIp*xL$u<c&+&WT@Y7<19At^p;Cjh*>oM}&yGU}$GM!>ha|YV*9$GX
zUXX#)65N^3`YdX%Bq#pFf-K>)!^_gsak5(v3AOXUY2gN-_eP4y9T<(-7m}c(!GsxH
zlxO>jo!M}8rjYAVX4PZ7-~<%IzC3p>M)iwusvin`J{_klXbwJEeGe`^Z~_IbeVjp}
zJiQQR%N2TfGtXN(5c;c*Ufm1We7b-;FAQJ;_1nBJUWq&9TT6<2y6KHKE^Kl3S$115
z3%}*u1;_&~#9M=T_GxlgqQkLE!G*i09z)i+K19t8>Vi+b9-Kz*db(hOHv4%$mFMT*
z<U4PcgT09k`$JT?ACG0Z#!!A&Jaaa3CVL^)T8S(BCBv;>eiGIW)d&(QM7e-AYbG^n
zAwBZ55&flp<8Fy@+^a|OOeEzVHvALk{Fgd$>rOqOO&`QSt8W=^u1Eusmt7d+Gl7%h
z6p6gwO|sZ(1it!n5&nyq4t3uYIJx?949-krx9!~on+EPe&k{#4NT0xzY%UTBiwaEX
z9}D{(cfqZ`qqP6kP84`#(XGqfV3lV%1iY@Hhm&}0o1z2^cnwkW_uo+`<eA{y6>WUJ
zViN3JS1j1uAVLGZKhQVDlBi_;9G?1b$1|=2bj$BaXi$+ybyi%YqJ;<W#g`mhVsJo^
z$Fsj)-MvkEA05XL_6&7K7}2<oyGg<{BYZkCiJtP6r7I5WqtlB|^p5+mAl_xVpm5P4
zp<e$&7<zk?j=v*|`IC(VC!J>!HqQ!kM(>8fX-Re2t4Bldcnv(a<ta&cn-2yXk}+_@
zJRIe45HBw}NT)R;86mqJ4c%6Q+QdfkMR7BkQC2}Fx!1ugA9J#8gdH(7TL#_|`ysXd
zCwb7O0Rt}xG0M!ur@52pUs-wRQg$MJi}|@HyOLIoUjdEY>H^0pgY+KRh0%f;=#w4_
z->jUm@AXJ{`RcrI`Bp<HG)x4gQXR-hE)kynR6_1Wn?TT|$E4=GIeL2V-oB4t>`Wr|
zlD)bsG1ev=%r37-E-ePc^ygvcB_{|GGoZEpRkXiU9ZpDx(L}jqbhwcKT@Ovj?~w<I
zpPeLlsXX90V-|dmjS*P9yGz2Io>APTi+)l9eA`q@8)Nr?!7(#PjCwDOEeN2E=90pU
z#ox&0h^6p6oX>bhC}Q0}2p;TcqMg4Rs7G8d@m8CNS!*r9F+GTg2WZ1fvsj{*WDGh9
z{&-e32<)VF@sLX>)@r?@jn?B~shbZ(E?7&l_Gp3TU@oz$bs@W>_>7x$E9Nbn%vN%I
z_ICMZ98Kjo(;g+xN@F`#-K)aHnWJe-X$O~(eTrm^t;GJ6ZcLvVN%wn9=GOJ^g2ER!
z!M5!kqURg>AkdSUpIOZ%j>&~W8&@nEZB9h4jTWBHu|`MzAkuPU7udA#0Ke9)&?3`E
zrj1X4BE#$C_snp7v&$Ht4$K7+Ro=^bTov*MIgoN3rYF|%*`8nv&~E3MB=ZgmvfFtM
zsdhHmv1c+`>OLV+&5fwG_z2ymxdz`gXQ8XiU7BcfoIY>rhu2qQ>~iKF!y9v&>D-zE
zIPvWTS((Xm-UfEyuM{D*Jold7T_6hKA~Nit8^twGj*#tzvRLxw3%pe>#-7_X@Ol)V
zVV)p^-t(P=8?MFj+VEH``fnvnd>{#$o=5rY#s}*7pBr(!x|A3x?<Y3*;)v3IAJBZ3
zE{t^1p&1P`i1z|_y6x#o6zR>fIUbro$Hzqw5>!t#AN~{cJ$p$<9yAaPol^%DTV?p-
z+)h6FPXXZx6Y{AzTWI7X$F%>b!_)W6aFId)Ei&^bw`$VJ`9+O_e$$77X@*;1b3r0i
znK%i~r_O;|UZ-VRne_YeDAJ&-M_Thfk@Xq{RBDe4kiz4b_^1&FBASU+mIAk%_gR|u
zNpK%ZY&nk$(m3D!HuOK1=j!#$IrIKHl>In@J5+It(maZu4ue!mbrEJ=@W<5>c6@p-
zgheSju?f54(Xu}TRGBZmeq$K~<-CMRx>nqt7knRA*d}bcZ^{-dF5(V$Jc6+^wve}@
z#JRvzN5K4C0=cjD4))$w;2w7$<Gr967~VCBbDn;fHV3u}ovsG4(QcD5SL!VZa(9Q%
zpVq<~-&gReLy<dLWQN`qleq^s*Kl(exv}rX6*%}Ag}c5U=UMgAf~v_+F+bFj?KO{P
zch4rV@Sz^Gt+y5gue3nvs1)`qe6-LGwVCTt5!ULX$Z}VTv&wlJxHd3l8$gQ$7V_ux
z2phPOxf++`exn(KJ0Na<5}rubfS~s2B%)>?yxs#;c8Mf5DR@JXt$}c)=?sW;*-!e{
zEP%xi9@BAmy>Q-qeY}4;9A6KgrX8}+$svgtBL1M6y6kd-Bbm}5pLd1t)er}5r3^G^
z+lGUgsrWYU2z}tw2qe0QbSx~P!Qb12y19+yl@?(C@moUI|0MB5SSp0MZGnweU&$Am
z3CE7*)1Jh%`V(41G;UKlDrZ#EHJe`3%TvVIy59}Z<#!lvZQ|L1eD6xew*BOfzatmA
zxQrstybf7WNuuXyLj3^)7B9a7(=<e=O80vrCc6e)%N}5V&RDi6NSm4;yoEEnnglZj
z%VEctSa7oO7y6h@h3y?jq2OEuTs-uNWS{2q;Wn1guA{}~b7f?k^mp9pb{j6CE~^T;
zPPTWa<1O_f*dpeGK8O5h%Y-#T?$jx8_B+G(WDL=V-3-!pRD<$wE#`4o4t=&b(1leg
zuxMloot3cyA}T$Y)e0qWkkB9_%JoUXPbJQ&tOaLZeF*6f3^<7&FCh4t6kNEp2ac7-
zQ`d-S`XpHg1AAQ|SHg+~rHXNXCYkWrs~QkE{X)OhOSqqZ%F$(?1KqgwI6e1@*F>)i
zxIH6F*wXZmbjv&u*!ku$Y~p>!M<+Y6E4TQrKeOX}ey$OJj;_GOmJ}wo#+c1$&%<{M
z?&FyEJhQuV3v7=V#WTu^>Eqf5ux*AV+gLr5d!kc<D?VwElA0)3u}L3q>hb<>i$*x(
z`9k<OBpRx!a&V5!TVkzo00)yF()e>0tc%yjm+!8?@5XVQM};JJZQf2+6kLR=bGPBs
zFlDw}dlQpcXUt9aUMf60sLSU0#!{05Q&GxHmNP1?pmISeVE<w-%X@9W<gKMpHpZG0
zTh%WJ@zRCIIbJZcG?2-p?#D+-N4emgPGIBy1R7^PCrdt9bK>h%@UE{9^L%6kdFpMD
z{w^E!qA294`~rKIE;y_z3StgLkZ@27&kkyG?=Bsr{6K|cw1*+*%U*JN!8Z2!vj{E-
znv9<Up5t=+OLZB}@u+Ej4;ChTfTv<+n7N{mN^YwHtxG3_l^Lh`&bTG)lj{-Tna3Qt
zQu!WVO&o%er$%v?)+S@nQ#~%Fvl72(ZziWSUGb^UJUsum0_Uinfq{8a-1w1-R3>JH
zVDz0a+{;``ey{Kv*DU%<`sDO6#%vDPwl|1Vm{&xvgs#9<Wxs_RR#d{w)4#}z_6ejx
z)f25F@1b{A51wyOz?NV2u=2oKRPsxJM>~I0{bRi5<-JJoQ#T)KHk4w_r(?MC<57Bc
zXE_+Z(%{_oy}+j46yZ*+taI$vVh8WsfLv91FnkvVntUJF@AUoT(WNHgsg_c*b(AId
z&}bbSRvRWuT};@>bylEWP=N*4BhhFLV*iXf7=1#EojQCT@{6X@rzysqP1k(7*xw4s
z8XYDkX9rQ=m0`q~*N~%Lger>~1Wqw-G`i3Z4BHjaM|lNm?)^g*3+KYTVpG`P9*yM_
z&Ee0iljzq}%<FRNC@C2WIw|UCdwo1sS7nmk4IT8<?Et~u?rh=vp35{>X9T+R_R&9C
zKq`9A(T}3zgsoFsg`Ok*smkyRfdk*4xAbD4AV;klTgDdB^9D1irN9#Z7>AK$y;*o9
zPZ8xNmD3UYym;f>N1<)Y@A{b|*V4@~&Umyn1yfaKz<)|5sJQhi9xXkKiLE(!a?(CJ
zr!NR*FAJvXGlu9ri)Ol9KM+q0&Oz!wAEygFa0=5GuIki)$=b)s!=C?0Yvdoo*{9>A
zKvm#FHqh7PBdjrEG$Z~pZ88#LtA==PtXVq^eqszH<EdTY(F}NN(nh~4oFPUZ%`p7-
zCM-*lp}oaA^yQ_cf(SdL_6z0-{gj`Pan7+&*ewmxp5fH-M=RAE6obG*73}nuz`g^c
zNUZA=JTW?!#Ks>YX8b%LQ@#VA{&1&PveZ$;O9EYXt%gsX&X9O=8{e0hO(cI!C0}+q
zqw`05Xk9OXblV6FRc|EH(n2!fxdQ4;NT#YW8}LL(I8nd29IGGN!rAN=VZJoa-Mc=I
z-fNVCpjiP#vs+9!-qeVcPW9%yyi~F3Xfx^Bs09b#0MOPI)c)9X;nhk7c&s!D?PK+Y
zPoLeP8?D?yZTodXbk30qzr9Eono^fnTR}R)2s1Xzk!j*Nw7Nu_?}>RTIK+22YCeBR
zQX;m)pQswh6BK}e_idXDy+zV0&6d4NBR>`jATc<X&R)I^>ms&t!6F;+?~fr=HZ6k7
zyuP_aBOSeS=VQ*K`>5S>g6|5<KwCdc$i2Cf(=90G-rulgVYm4l%;iZ?CL_T!5JYf1
zm{4<XDKxqe2kYkVMi~ux(6K2eMTHZgF?6@!s^em`Z?qCht6ZVpBdo}s0U2<)d7o^w
z(FPH-%DQ{&^)UXq3EVL?z~3hy3L|9>+Iea8kX^Y}n4YBqJ95KtT*?x*%+C<RuO;EV
zBujQ<*L_UAX~#tzO9i>|er)`A3EdluAXK!JJPB;WMejAiI?0MX{v>7BpX?yu4Ut%K
z`WBqK`A$GLj^ozsy$c6ESi!oq8ZeS4a3g0GG|FuN8=l`(yJ7?Rez1V#nXN!>ejQZ|
z3`djxv*c`nDJl9D23|QM(5rAN*1L>>$R(Bnr6aQ-G50;4*Srs1_vm2PKSqjqu2o9z
zFzuP6290xu>>|=)=&>8l*l(YS<Adcu>~uFdQ(}n5#Z!oJsE5GzYb80*7DpFv5fwyc
zRnsis`Q7f)DB-P-3k-aqE#@OF4xEECHT)s}l^Xok=^)Mi`nY*^op2yV6uzBp#esu6
z$m!FQgr$>WnZ|4d?oZ?jR=@WtPFt9SuOBYJ=Y!3dou5whi|-+5WWkq~E*!Sa#Y}Ta
zt~>7o%3rul)1H;$p+$Sp<n27NdY&=&?C(MNoUj>UYt2#U(S+_3j?xcvPIIo|0c=&M
z1Bf-e#J2UOoKkWXTmC$k6YD5o?DGR|lbI_ftaV^pI|JE-ai;hpOpOhN+`+w0JmXj}
zpLm^cxBK_<8ovFv4Y{mcOvT_fOU{?!8m^sY555J$yx;@uO|d2$U+xY!KD>Z{@#naa
zfAz@aH<7IIjYBP(EQ2Gi+t@2rY0RBEj>Z1fWOwt;n8MNhBq%tF*%^F>@QUxaX8uiT
zdutZ`c=!}O*>nu%@cd0Jjab&vH52PYc!rZp3{lds#UkZCn$nm{T==`Vi}OihQ>ITs
z`Q1ia`xv+p#^IqjAJX$h5q5oDiE1@~ixrH)<iTkYl06stC6#dvjY78FlxH=pCB0XU
z(<#Prr0z&M-o8<U|3b{+oOK$#=zIrux-Nvxb~lC3*5~2FACu9H=?UEC3h-ffvfb?1
zQkq_>jWf0!B6n{V!{5)f_|rE9^3U^ZjKi%UeEuK3{CEQ1k=Tx+R2Ki(oyUjvEwI%g
zjyb32!stEapp-q14M!H>e;X|X{rQT#cQcmVA^bk3GZQS%d2-{c)0xCbcP7jCx>RrZ
zPV622gFg;=*poULd?h70NZ|8&+AndDIe=oXB76B$oQ<s-5~x3o6aIU$o0a|2<ZR|9
z!QOGj7<#q{t{!jzZOaIJRQ((+R*hj(Ha4KtT~G2(a}%m?$#8O+6VvC+`1Afel`G95
zfusNz<W{2kZfUaqvo`fi_P|%J#kjafmis89!7k7J2Brz(Y+j%r8zcOJgDMX&qLc3_
z5w~V$JLlHV=JQx}tCn-w_J+)qiojs#JgnBOu76~*9-g1-!3pjMpm<Ckf8RKX@r`?l
z=w^;<G%;pj`N4<-qgX(YD|?pGLnQhQxoW-N<VKzq8<{A}td{=;@3k_pGIJy$JR_id
z&j;A}vk)~scd}J6>p3~?QW~roPfAzy;`wh8oI_n2H{r};m@Ix38*?6!as7p``jRNi
zE6aurVd1=fTZ5LjpMg%!4CWMQ##L_i=VCroFxTW9++TheE4S;wU&*6zKq{o(U!fBp
zujOaKZ8<nY^#+}#+>Yvj9XOcLMIZQk;GHdnv~TtS;ppivap%-_y!XS3X-+%F+CE8e
zZq?h_*^$|FQ#Q{So79RrOPX-hyQA!#&R<A<eFblxs)L@nM_Er{Jnp!}aDGlaz_zLQ
z>AEqO=k}0}pDYJc^5gLj?@jNPI!)#DqnVrj9`tc^5FAtPr`0#R@YsW1$et=e&R;49
zg`Q&4t(5@WVI!QG7>lExdO)&DA2=KuhNGX`;PZAx@LWBcZd@CR_on*_v)_HBnI=0}
zYefc`vHK!bj|;{ngJ1Y=r7?mpsteHZ_68I;F2uAc&fKzZU2yAIGOEhe(x^9zFfTKJ
zF0V?({iP>RXXX^zI^r8R7G1?^?NsjmV{`C*m4GrYjId-xC(a%%!T$U99IWhaVAkJ$
z=>4O~By~pPtsNpz(qYd6M-`!)+k9^OwiCGN?r5g_S`j@~o`Sb~Wa!%?9M>bR#aPDy
zUYF-RnX1Dezb+CdD<5N9cF%=y<_<Dyz0kNQAOAhI$E=n{n3;c>9J#8@)$@6m?x}UQ
zyZYr}<>i}bp>`kp9wp&h={2m<lfincV!TZJ1ZJV~EPC^H;>i0S_s<!JQBMQ#z)eNS
z^4y23Ke|Dd<5R)OF)84D-LYP4#SECO-a^{LE1Ca+dpPo~AG<W>J`qz5#O)sAfp=*V
z`a24IU9BN$=UKeFt_1JpO5p4xhv=zm3kel{N;2svh^o*dH@c=_&8t)_wU>vPq4Ch%
zlZRsaM#76SeV7)TOWn%P;J3C)I^%+?P-Th}YCLm+YadS0W??%nS@eY1oQ*;;?@{oo
zb|WOk3{mTImFU=Pj*=0Fq0}aoln)*g6h<z^eX`k5ynBFHH65Xr>6-+9Ru2mc9rEmw
zZFgZ$njYr)j-fj?yAYo*qv6}ZN5sbMCk9U*3psmy39Uff=Q;_ihcDu&b)SXFpKDO_
z_H^j46=MEg7c^i05biJ&+#%xt5${uoCqHw2UOWZ%2OHunJ$2YW<`mCdiNGg=LDc?$
zDAkldPrDw5@b48x7-w8Zmz(^eB4NX}nv>#5%JcnTSv3bkppX0>RYoT48jFjK19?8O
zw!m@rXk1vd4yC4!fb7F{R9(g$8=v};1=FK(t=DGsPfwv;`@RZ2&o9D;*b;P7zlfLi
z=a2`_mcq*NO!CEZ95Vh69CTqeWQQ-L*-iWCy2v;hw%&kPJQxqp$~l3NBY(E+{!ZVX
z%z*DHd>?32DNO(UQ{cWclvHQVhd@OIn4;kf4F@tH%<L%4xbvK}k0lWN*9B;C3^6N-
z#1k9D!Nz3~9n13)+onk(ahinNb=3Ke=I^96dm}w5y9{>xJBHE1d~7<A1;>|kQ)#DW
zxOXfKRysZt%v>UY_tyr&j|vwkxa7p12CT*5JRPQ;zZrjIF-)%)(l>cCx$|<fKsR@u
z;H0I1Ikec~Wakv3q)PFRZxMRiE+R`OiE-QCT0^398T&pUq_SoHpxUA-n0&VwkLQ+?
z0@q+}v1z{`f2fsy+pLIIA@OMPU%4QXpT{)!^8Cqz-a!44)U0%a?;pp(+s5~F%u_vV
zJ1mNiU%jU5cqZsVo`>EqI}NPAn)Bz$emthS15LXA@4xd!PtnETH)sYTKJV?Ue%cA7
zO-*3-(+RNl{yQ2sF9-H;@wn2Zl_Yjc!DsT2Y;=&uFb7F;<=b2As~O81w${KFt2^Xr
z@h86TQiT}~InejlXOf`MQ@G9444-FPpuyd-uqI#<obz{J@A!VODXIoAZMreO3EPTR
zbIh>zR}<-5G@3Zyj=>I}1^A?W6V5i-1{t5mkfJaX;maS#Nl>jJeRX*sh6<9%ycBtK
zeQ1hhcU7Q^4Upgeqv*WDvHspTE?Y8^B%whiAua1U_d!N7qCqO5($J7fC8g{wqoq=a
zq)3v4=iDcWG}KqSRMMbGODbvoKEHom&*gIA`8?;`_xt^NEj1uEUB^Yb)8~L~f0S?+
zT@4x4W+KI?vur`rF!1+q!}(!fMWwr{#HlYEL`Dzg@ypK1_)85%G7Ih5*F&>#*zyo2
z=dc#579`-N1A;<a>l^-!{>n@axr>}KBXM)pe9?ao^U+OJ6TFr#gEb8*ROBkKednlS
zsfH%M+o}tO_vPW#u@(H-meaU?Bq6>-Lt$u;EB#dB%!Bl``Q9NRq}tL3vhsw_IH%$K
zsNfIHjZ))R?nFSJnKRwjGMaDRBE@(2pM_^3lZ3BV<B2&s+^r&mUuoRRJ3lJ%?1FRT
zM!;)+2UK8}>tjq_mdLN3?T4Y!oVN<udoPDNK0@A#%l$hc`m!b!y%t1bsj?$)lzoa-
zcfXL@&^GkF7!CK8wqdl24u8_U7?#}B;_iAUsrtMf*thj3B+2zNhuiBgaixgw&)mU7
zzXGkSaHKnN7x#Lz9}G@<fY~i$?o(n&rwixd(}Takv)BnVelgPU-`Z?W@F<#Rn*!7K
zx1yGuKYKhegGg*8BK4k!B=dTc&{Yp5RpolZejtp|w=1A$j&KK9AaLtVri<Ra{2`9%
z+5sM~RuUhV7o<0(fIJpn!{%7A!qEW`BOgQNZj8bDNEx))=SY&|<^VUgKsOz4(FxWm
zmT$X3E_k_#1Je)T&Om28_9j=fKmHI&acV@ZX<;~fLIs3%48(6^<1j>UqCJYs!Rs;u
zL=XOsW9ok{iPN>3L36<p=G=6ibr-6LhRj)p-P)1(W9mjByV{2T)mlN<hrKYy)Dc@$
z)ajaG6ZxBe`LHl(9Nab4wz?_(Pds4iQE?g(I&4Me^zR*0I%ZxWdN|*MLW}Vb+Y^E6
zOKqsFlLYtrwwdZrsKEF53LjhTg62sdh`Q-IxVwE1*}C;2o4Cb+bpM`;`v*Fqr)VGc
zEXjd3%@@piZ68aD6``J;1`C%S&i@^F4DR8w_%%iFpaoCHrD~tx#U)8@J4uC_Y!@-Z
z?UOK%Z^mlDg{L@pDQ(}~iv5c(!>n7s@v^{G*e^~e9Ru|Eh|Lqgc*6`j@4p(1zu-i}
zmCxX$#6rkqA0aZO77lF+Cd;R%<H}-Y^(%<LzWz?kJFu7=%1)=*HyBzAuhII&A;+>3
zb{+l+@7J_rK+a?^UY!Othd#q?>-B82=@lzd9}b-+*YNk3vv@8u1}=L^(A78pk?_BR
zaHy6vZ@G~GwoBvbnByODZVsW(@4v$Z?@q&tOUIz@j5Zx6?9&faG@{8j1OAm1<K6WO
z=*Y;s7#-DE^X;K8mlXCISA#P6{yCX6cjrx3ddh$YzM0D9(t2TUb0}?E*h)r!`T!e>
zyt(r5G(LtVFiF1&{K>H2@a-jF(vA~6f22QMO>}s*hZO$?aiS7!V>ah*3=XkKhT(79
zNaT(Ybdupcq80TVMX4U#RA7xZZz<$Pfj*?zLIrhirPhp9y=u88WeDAAI+A`!0xEyX
z0bZ>BjIDO={IsV5U!o;Ap46mioA*FEQ1%>2jLia@)k;`D)D=&e3Cx?-ZRFwSDApRS
zK^yvNz*=B;)g{e>GvPOJ=j%+k;j{yOhg?N7-Ab4!xLQ0%{KSXHCtzo>2@TK@vU$R6
zU}`qu#wN1#`np4qB^?XVPJh5N<0#N4H_6ohywG505xW&wL*CmT#@coLuygo()_sK#
zu|yrd*{VaIB_>0(h78EtO0t*RL=dq<hHu?I2r70>fCg2;Z~Af<<b8W2+LzY`yihDI
zZ8AaitKXUJ$+d7}gC7lxY8PEDq`YG3Ofu5*IZt?#g6kg60R>^#rQB48T_1+iAKIBP
zt>Xb4BO~x9KLpEktDxmw16aNvz(u!S6Y_m3vszFE%A1m5okt&N*dGzxwYk{gQVT7Q
zgx>t8^APrNB+aeMMqB?kux>>Lo=rT9>30JmjSDWC@7vH`>k_^?8%U(G0x?!J1Mq1*
zTAjZuII@-aj=O5G!eTG^QznheH%Fta!BlWP;m2Oy*@9u~JuqVaODh{qKL{VO5nIz-
zKv#``rsxj2uf7?#H3%7ZA!8R*I19>7CyOKUR4_rQn$&bnf#;iCvFTbOs9vyw4eCnR
zs;UcnCN2b$vkO-Z6LQ^O@2~>LCF1(+Cs|bNLG1K<EEb34vxu;vsOj*U<uAU@(zo?7
zk@G2hDAkAuiq-$`ixOO_7X)vKr!XhcVcWE%$-%)xL|$TlX6`=?A3Lhxb9ryLd*hR6
zp!^V=SX+VJd1GLPuRX;3t;JtWw)j+Q4O|zPVJjCc1}D>vtfcr5IqtMwboj_Nq;>zX
zF+&8d_)cTASWzMxbap1H4_SbIvrbvHxL1pwT6_{u@^2@-rk8QS3=NUv9-+t6JQEV~
zDv9^pbaMZ*m{b*?BR*TA*r*yUabZjh%ldC4kr+K(a0Mq~VU55WA1WnEI53bz>5moe
z{aW}ELNH0v4})9Yku^cxtSwz0RE2C%+j1Ak{eF`;)~B&}>0P3ChPnbTDhOjo>SDM|
z33G{}=(c19E;rr^v&*enS(h^0%n`am<(jOtYc5VpQ9%3X2h|hK*Q4W&<LtTm7xJ!B
zTKs5RI~zA~nb0G=h#Re+vPm=mpjHjE$5)X@zk`VBg-&+lbrVEQH^aeIABbYwX*97*
zCv%_OhIWSu<lfxp@N$;~torbeoPN~7#!AHF(-Xg$@di_plc)o4a%ZsJ243t+VIVlR
z?*Ug2?`m1saA=d*FOHa4#!|Y*W8&^e{CGE$z50BD_3w^C>wasr*<mDF_UJKtvugt8
z&QOQEGZ9#ptsroz)S>Ua0hTRshN|5$7(58T-zW+szy6=!CS*kS>R|KlZBW)@L{bh<
zg<oN+DDh`%O~T_<@GUJIo6UvZviWovWj%rE?UF)Mx`^e?b;jB&n&N4nwIRaWm`LBg
zLJsd63c=mxX!AmggjPE+4W|^e$&P?ga{F1)qnk|1Xe8F%kcOM(wWzWG3}iK|1o7QL
zJnP0uP+7f;hm|{1w(z6)y7yCJv}ppLetiKvv2w<|`k~x*aRc#QCPfvij>8^-Yb0SJ
z!O3WS+)}7a*S}NX=T>K9$KVlEC^2DJcRCtu@+Ok5>NqXX7evB&y}w{7?CW|<UgZsd
z^of7T-fJ>A+9(Qb0>0Pyyo`a>wyTME<1UOC_Od41<PDkpY6wmXp9*z{qM*g7m3=N*
z#bmc@LXGghHOP1}8?<*85WQ1m<0eNOb>X~NVWSGlj^oUxY^Z3^1`p<cf1+rdwleGr
znFEg+Bd|%)7>fd$h{W`1HBJThSWf;5>~U}an+}1|TP+D?LnUCEXc$)J89<1HJw{~4
zv54ad#9iqV^sLy91{X7!gsxuoo?m;}u3diop65gGER4dGtNE;Mq8``2br=q8Ed-w_
zE<Al+0A^R|Gj>#tT7UcxHhEXz+OR>wu2_l2zZYZuV>ABQOiJ7@Ux2?7#(-n9Aumcj
zOQol0@*L@7bbXu)_d3~znrrsKZmp^G(EWN6|2>kM&1O_IunG-6&mgOTbDL-V=!k-Y
zK<Gs5nqtN08QXKOQZLwLj@+Uni5T>la^1_JkRj|mTbmMj{E-2W+QPx>Pb<lNAIej0
zzreJDlhnaIjc@8R<6SrZSv@q5g}}F2FivhCYnY!13zwW@W)rmN$&9x+@x)gUFC9q9
z<Okxk;r{gY>n+e!u1HPnt>A~sbo_qa0-If{MJ-Y*!8h7W*p-H$zSIilH6{^0AGs)w
z{UbQJe<k2Mr(vWyvO<);$reg2Q*eHSA$n;35!csh!?gi3@SL&|&R4d9QRjBDeIo*4
zzQY#Cn(Ro5Hm5+4#zVI9w=nB(kcNf`fA&%6Q<=y)!?TyYxMoZlk@fLG*Y|ty=%6aL
zNw0zh@-4uO*5L8Yg{bgAxU-JcLp%NRtomd)9)7(N2Y4R9CkuCj(a$JcqO$^YUdn>k
zrAHVtcR<acHhEZo$BVbEU&r-y26DxUm9+3rIgEI_jQ?3)0S>VW__0dxG4>?zJ|UO7
z`tA%q4My{@n&ossVgY$8cpl5noA9sFS|D>{2-Qq>;YD{sxp{pN+;382jXf(wpQIwu
zbmS{2b~6RJ_-pv^{&4QL!I%xJ+|9+2+QNHpIW)h%f}$<6sQ=79vc5cuX?@;8>*y^O
z@-ULm+EEW}%NKM1b7$aN>{3`jU%?|K3BEUU0PaXshb1R1sK?NQ@Mdok{^mR2YwQD-
zQo%$4K8LtWcs@>l2YmMFqjW`)4jpvV2QS?Z<XT<bY=!RNOViJI2zOgIdY=U1(X=t#
z<#8=ayBqLRbQJjg*}`?c=F($xb@{KQCfv0=3{E6`KyPOkPD(F}p8N}={`qzo`$Lnj
z|2Ko)66tV-9|fWb=L>n~nicryT@qINZV{LX+lX9RAiC@`qhPk3NH>hf@lsddfnyRM
z&T_H%PCABoor0y3vuNJxMR@VdUT}Nv1nl2gF8wK*Mhm^I#>36Hr28=QAG4f-(CG-Y
zE#@0!_Hc)cd0bcZ0C*%`1h3MOnD%}Yuj-n}<*KLRivyB$ci}EROKu~tUffcnXsJyz
z7EKqOjjhF+xp7?a<9U=gI*A8tcjU9DbwH(TIGUf-rGphMxcxFQUJg{GPK%V`>5u=g
z_4_pJ@w-49%dbLdn!2#3@8l;*KbR*jr^?;4`L178IJ$H^UHEAM&Azn&1{}HvzYNpi
z-8P_jg>&bltvL5w2-lM8#7^NZr($4CS9)B82%8|-|K>8;9u--AWnd9KKPd~$_qOv9
zlak1uH(T+ytDN}O*d81{$cQeSR>fT`r|{;UB|`UGjf=l1a`$zQAY-{FT~sfC;uRFZ
zH`@`u2>yhNTHj!FfY8^|cjIome1#m}5dKxy2W8hKz@4Lwu;r-%x1F2?9x8r3U41G3
zQWfSW(tjb&aRH9Gw~uW5{Q%zmaG^(ZKQO2B8)3kN0rX~_8Jv}<$C>X&)9;fnh*KR8
zfzj+8a46~*NJo0nl6VJN8D>u_#$O`0ofF8|gld$L6*@gL`Y|O`ns!~=f^Hkmv9k&X
z;nG<#4yzkUKb<xrqg@V){vKX}dv<T(PYgBr-j#!JUjGwJyH&s?hFD;|tO~`B-x%3y
zOx1oohCdenv5qg*EI4fy+6c_k=XcDhk53y@`e_1sc_ZQ57Ckay{6~~GiiI!x6@?w7
zSd{JJ0|SeS*heADc};T`sVvonK~3(0k8(O{?OldXAMM8egbBDvb_j%S8VsX6CzH?d
z8-(*?3+X$(kBl-l5yjlsff$8m=09sKWJ?wkr%y8&F;`%7ss*3v-|-@ooOdiN#~r0!
zIisX<G_gtPVXM_5aC@dDcm`#lzP=@fAI*j-C$*qOz5rght|IfCKQZI=7AOvGwUSC&
zB%FT_iXN{!j0Y<(SxxU9FOt@c5bYm05DZ-m@tMH*_bBcYOWgKh#X;IQO6efE@E{I*
z^%bBls+L`|6_{~R^))fJZV>TdB$&>eNj}8x#A~agAXHbHEK-dYI?B@_JupjLuuB1|
zZXO`PNnU8CybNYpFUP<~@n~S~#`=#t3asrg2sjx5<>K?qLKu{I-)d#)bN?d}Pu8<n
zzQe(L+fMjsB?Tp6tKjyFbfy&*11sa>nZ1oHv;-G3-|Z^k-*QpZ_0|kZ>}1(o-ABws
zU>>gRBiNK=k9Q0lacSsl;{L-HyT`Oxt%}Y9f9Xm#r>2m7?Uu(g7ax+sP-|rGPu4tc
z9Kxo@rC{~_EVg!@8SD(0LT*(S<Gc$`$%g&21?G$`*=!Wf-gh_Ptl<M-w(dUI8?%oM
zHl+e9aRl>GRRgtq#w1|Zak1F5fGC@7h0>F$WShMjYUpN)YK6N<(3dZ4+B#)0`f4ds
zO$>qHLH2OaMHwQE%W>nETJfMK3Cu3WPuOdv!oHtcpdTEI#}{hAsK5*2wn1_vAY5Qq
zyXlLSeUYUI?~{tlk6A(D6PEJf4*C7{6X_jR%igu_V&r2VsWJD$E<J(sceqe2-DU(g
zcKu;77cQ`xs@F{55eXT7fr%}+P$RM@SgCIDCC6MRqBunG!lWAF_oSJ4O%w_R-ic`X
zEJGYJeKoV$U&^wwCz7oFiDXQ)DRe(;WVJb|;#V3*g2&;YC|LWtNJ3y}JU1}J&^8J5
z-zH7|nX3rhqO;`0se`cOlN(j?@54Hmxiox(G=Ki85`3p;gSsezDVp5EJl|z#_T&O&
zT=|cTF}nu#AKIbzx-0nnPQ{%z3-Llk7uIWrgVC2-+%^6X8FI;ux6~wq_i25m?Z1RO
z?@)oBD_VG9|12z13Sn1fD`E5ctq?I%Rn#M0#57%;V3d5Wcyiht^dHnomfnmPcFJ93
zzFZu9zPW{X_zI3NOIvI%ACJ=pOhccvwb0gZT3j1fU$cC`Mm$$4a3zlk{+qu+!oD~R
zw*K-43Adrdc~l%44h_N28{UzQd^5au^e3wde$J-&ig1SPXI5JhSz~kfwpHlr#l&Rm
zC`|3zBxK<8NWrQK=Dp|*Sy`CK*i2hCr(qkGAAQGmuG67|B?T7HnUm=EZ87Z}uFIDP
z3m&(^^Khg&9;>zgq5IrP+>h!I(ST1dp>H-lFv1ybB`hYg0fTv>buw6(yvNk6@zfwE
zSzt%Zpg-l>@#^onqDkKKk!B~cwE8oc>?KKm*%<Nusiu7L_I%JAEhbMc?Zg{xqlxI-
zK*&F@!^^w;=-Dm*u&3%0H*l2~9Xu`ri~M@PZhnB+{qQw%YWimSs!R#rEBfQ=bAh<e
z&w@%u7r`9A@pQN1YcxF<g+g;0t1mu8&)gD>e`AA71H{~+c?e%3p39%Fc@C0KTbYvJ
z;QM8^9Fs4d;a;CdasAs)XwbHm=k)6GBd@1ZSMAZ%Za^|M{Zzs9qdoaE4L4Ghdx^8|
z1a{kSFSCafQlvQ)yyXY6(}j;%mxnM%%?l;FWRh^<P7iEZ=?Cx3457ENnr;5D18YxA
zVBx{Z_%6a6q&loHOl>aIJ_y3sgJW^0k+I+sHb;pC$1RD_J$OE+oGcn!!F~pwXWq8I
z*qHDZV$jseHteXwjj}NqBk-{tPQD;XhNW=-kdRyZJd?GD9L3Hny<+WAxv*SeB;RUh
z1dm22llf<&$)2|VFsH-~7wc+~k)IQY;=P+h${`smi(<j@vH|$)(Bhly2QwNwm0v5@
z;k}M$F-?4zJQ(yCXBUhRbE9SC`p)(2?jT8QJ*EeD6k}20mLW#VPDa;3a!@;EH^{#@
z0{#yF*h}lXY{#=$xND$7zXUyp*CE@GYjlgx8El6abGP!@7vy+Z{}v&WWraun6u}AM
zXE?yLFvLv<;Kg8GZrw%R6c)m*sC=Q@G#veHDIR|B1qTQ3frooPGxR)++bjFv#kh-*
zuQV6N-Mmel-@j!O&neN5^-?@MZW}gRjOJQ1<RG}x7B648iV?j%IJ-In=f57rCzbCO
zJWtX*=5!SP=rqK-PaS0OkJsWGH!bP6Z@Gda)&*pi)PqNNCfE3HKB&BQge%ROXd%q=
zuC+YHx!-mH@J*cDI)lqq3;Ct_Iy~x8A=+Iy$h)^2;fJmI{M`q@{|4C8ccmiU`gH~`
z)XpX8MHlgR!8iOKcpJA=O3+n5vcdJD20Yr}M3>k-0;d}-U{er+=yC%dxenvwF9p*@
zHv9N{`VHkk5<gYEg1`lPpzY8S=$L9K-a1VUN-oR+WBZlx>)><Zm#vJYmll!erf6(W
zz9qPtltrZh<N3V<K;Jt{fnWbx`p?~%_l4~xc?l~yzuN=rgN5Ivz*e!$*@T*>qA_OD
z1x%b}h8r$U<+AxQv^T&Oju?k?HF6U?6Lt8~R2>p^WgB>Je+Tl1WO(s@U84MHA%7tr
zD*9z-N`GFpqc#VUiTnx%JKN$}k7*q033rNVg?3!}`8pWWYKQ!}JM2HKPJ6$7#Oxm@
z1mECs*3^}X@67|?lqipUUN6s=EIx=k@)}U(^im={LI(DDHo@VecgXjS62i7^fiJbD
zbV=i0?if&oRVy=S$>aO%^HgQt=zaqgT}JWuMLR+Jk_Oe$p2CMTYg4T;E_9s2Ag*UU
zk{Sw`m3ar;`1-R>tbdL)eLwaX&f41qK3~<jx7ulXLsA00lRE&0o`>hgb?jD76YQLl
zMz3CuLCycvaUEQQU;Q(<!|hezmQj!Y%r`T!tB?^Yu;AgDv0#$66Mi%&lGVErBeZnr
zvI$1?;3+ApTXTg(d`;uNZ3=LA!w1MqmF72TnUIyWguE%Ih~wI6H2Z}o-|<3`Pd+Eb
z6BvlL?dpOm`W}L>8&bVVmvFt%S0X*p^iuIPys<8ptC%NQ>4$FSm3s$)*uD`52!0%%
zDhX|#8=+~zSSnTU6sD1_I3^kehHEKVG(VrUoH&QUQ&q4#`Z%f=*je>O?}Za!hj!KJ
zO!;&?nvK{Aaek@joO761`b~rH+b$80A1Q+GOOCw~H?uXy>tW;%DUdo8f=zjXn=1A;
z3A>+yP8&U8tl%=~m~IM%$5Zk2gQ1|+UPj`astCSXZ`I%vfhz}igU4EVxbx-&-q|w{
z-R1V<=)VG6t>cV%eOLqOXwzo>9mmN5rKQ+ont~m*gCV$gC)}8sZH1@WNz{L8<mccs
z%-V9N=+E6;G_Xw|>t;M*gRHJt?Ko2;);n`ss73<rx7dO*W&-1H?jF(K5trDN9XBz$
zQx*2CTnFtx53pq6PHXgC1+HDw#1+Zj<kyzTa4>B%xZVweh<|?2=Dq{Qj9!3SJLS-+
zFOSp~G>DgVuY`|n(oopf!vb{WS>RK_Uvb0<yDxT;I=4D@JU^2?{^!TW>~%n!X%37O
zlrj5bUBtRZ$R=J@1K*An5Ii_Sw0fBfu-9`j{hm6g*4`1hZ`8!La}uC;Tj05TNMsT=
z#G>TPA5x?)umT5nS?OkHv(=BHLB1!3<jz?nemrG0F`XgIG}XsjSzO$V>9v<d7vkix
zEcQFeeCH1nQzNlxTa?h%i4fI)FvE58=Cbtl&E)uzN;Y@80cN&GplZrxQixjUvA0ev
zF{@YPzk!L!_&229;RrdP=?3##yNO189D10IV~<^QvAs?fv%mYp_dVLET%ZCWBSM)*
z`WIA4`T^c^CHS;qYL<WdUFmVxhe%7}xZ{<})LwcB>Q%e)bd@q}?bP7)y_sZc-FnOj
z9LrUPNI;Ks1P)LC&CCX7p_|bavcv5U9uUq9)ylCX^4cOk>GTj@dn3!LJ}DAwvXkKY
zR#}0wu%AiHXtdH8<&K&&v|+x$FbJM3oQYb_kz<emQ&ghBBBX_!TA+avyLGH|{k1^b
zi{iD0IP#`d3(NEtpyreu5|%vy)8b-rwz@h7o<51yQGzqAQIZcmb)Jp;y^N+wCqTk{
zS4g&XKpFiwwpj83{=;mnGI<Mjb?@+f{a_d_JlooP3q(!LpW#!23Z~;GkXhKz7B4&w
zE#?h)qbn0jO`BoR$yQR6vl6N#H-f6pPmyVfGx)!#5LI>VAdeg4Af_Oa)Wk1>!4sS?
z^splv75z)3rDzM@DQmEQuNJO8;>XH*CE0J6M5x-X0j3kW$;^qd=(OG#-}M}2A-B%3
z7)LWmTWW+8<F>-e=VmYq$KYH0&+L4a7CYZ$$3{;shsyce+0^k>Vz0lOh}M!tY_h!y
z<n7XiwrmwB{B&E~*R9BBbOVeHvd1MZv7)uE!%@-%Nt!_mD|Rp@^OV*w!^^+He7y@k
z*8YOsPqbiRK@<)jrAW2^4WZgSudoP@ar48nxcl08{?$fc36+k59CcqRHF^bmxGD^~
z_2$68wTYHTItJ5}6J=P+RcXVT2l#w~B7N~%iXZ-C$3w4ca;48oT(PmoDx~)Xmdf<t
z&-1A;Y@Grxwy(xYDN87?Jw|`{%;!0s0&}z`+ba3N1Rgj+hE{GbrqcrpkX?!ruMmsL
zi|*0fM*k|6R?LK_%Lu<@xrqlvxAQ#7=ghicCjT`=jrUb9qHmO9FhjVf%<fHP&1c8*
zKb|V^X>S+dLLTC{n+`2J_yu=;8b<qtj$~o|J#Z5IV#nb&Yg^v{9b2^d!1U=f=iq%<
zpf>_P?K=S)F}?Wo{v<52k|$=u+%iCZBCK>sWbSIoct70*o%NA@uId-X%(G^ny$6s`
zO-J@de>2Eet%t%oL&ywX0X9*QB7@C0Nra^Wk**iA2c2udrl+{3RW%-@U3vxXEMZ9-
zdRfN!1q7d{<D0Ox82vdFSNwd5Gkztrn-P*&IQ5g&zXEAqzPttw=~}|+1p@Q^y(>5d
zq@jv>Hy#{v3S?gQke99q3c2S=!;&d5^z2WuCR+t()7q^br1*o_Q;kl{7tZ?LJ*YFf
znMCd$2a}ql_}5EvbeYs)sMa^4N;SgWWt6~M`#6wyN2TM!C#RwA^naoSGvxX1={xAr
z7Jahz>~Gf7H-*bSUx*XLcfpAzvJLG6Fzt5>E0hT*k>Y*gNm?;*v{Z_R>U>9oyl;Y!
zJ{8)IOhY|Ib?$5KCrVX!2dAdrV)Yl5;PKIuIvyCyl}6lQJ$kbAN`@!CC`*Hjrz7Bk
zVWa57Ya#2^sz5(oF&9Y$EyQSf6Z-aR8hT2+#04{DKr!$PybM^v;jT3cs8^$#VGenu
zP>TOj>R8hkd%oN>18c5X(D7H_gGPQZt`E8n)j?8pUGOb9b0!ooZA*pA{=YG6$zbj@
zYP!J0z7CEmuf(Y%WayV3W4bGG6#A+uQNK^eYTTb3XTCSivMH_y>39n#exfc)G_HR?
zjCQT$t6c>LUBFzJ934qlU-qWbs~@0w%w;_3w@G|-*Z}Thw-jc(%J7ThBe~10@%S)i
zt@yK7A$;=L&Rd^-2e*YXeCGXuH1fOz4N~eS!>V&(S<gx$wQ4Li)7gUqjK<Q|I)Ozn
z<T#Ez`G}Wa8bcFD%ko<9x4f$1I$OBMlq+Bo)lrn^$K#jc#vfXst~mxB-k9?hN5=D2
z$JcX(_iIT&{R2E@vK*9*>cBl_7*o)hMn7a<L9epUIP=s?NY$Io1w$M2k)Oij1l~{A
z`y7<3TFHE+(-}{dr*BT*hx5;zXqjR${8;}TO-4lXk~x)_^RfZbY^uZo4=3=47gNcL
zRTiTC?f2P=3*)$E%3G^tazkNa|5`Bk@D5CSPl0CYHat7EABW$Mf<@;P`GeGRI4^NG
zzqFL{(<5Z*1hvI9C_$cf-1Q;*0)~RQh6(POt4fz;cq6nZ@$cLJ0~0-eESa2#yDWaN
zzO$z=dz~pg<t9gyKhD7HrcxNQMndSO2eCEhR?s6?9C+!iIvD?aBwcVh1Eh@a;g)ad
zXjJu#q<`NhIx;z(X8oB67F$;kp8!wNVpR>Qg$wwT^D<OX!5B-0`N^7DM__n(EYiE-
z<epywJe_VX9%MD28y@L`tLFD%{h5I@cgSV9x6?~3$NmE3JHXmaui?FhG?lB7<X8Rt
zAUW|S&NekAb59uYsd8>)cG4=8NxT4uL%lIiqXq-(p2ClX|A=b;IC}KPWMO774=RWL
zMNg$`puTbgliI6+DHlJo2|tC;cRN*ZmwJr8Mf33IrW$nnvJ;#-9oV$blfbBaFSZQ|
z$4wWTakl?-EHPP$p;F0Yn_C`UpEDUuBs}5fOAV34pKapENFSuVuf-8jrc5g?4J3X|
zLCcAUN!Q%68rCAilKL5@?I^%aJDr*B^maDkvKOlu7KQPc2m3P>u;aOb=&i>v-1c{5
z&E`d3@HVfFJ&y0gZ+^eYr*LiZ&UrJtTry0Yl+rF9@3;y#^{#}sNn0^f=MH|)Jd8Ky
zyk~NQQpC4Scc9$IgY3`XNMMRWPfGbSXt}pzSIa^2x;q)1<oDs)Nrm{{^$dO)>JP0J
z6U2w-JtL!iQ^iwHyWp3l87ynrE%sD9+R8U15uDa)LdcFcZ1Sc-kh=Av*i*>M%6WyO
zS(SH<osJW$_!9_OLLrM>4hN@$ee8+(4m=QU1(LRpSwc?)iMcZ$Jf=jT#M;K{Ympzx
zv$kZ&^6Y0JchgB(*8qHcXf;SUy|t9MalfYdcCPr*TN~_K7Ku&Izp-oPu6THZ0?zlS
zCr+79NYT!3M1S^nSo9|eLVCWE4xN#ZyK+1!J3p2Qc^u-O<|F=YEwBPtET~E8n*yVT
z>4CxP@$laaa~!uPhF#6|#@|Px$ZMhFq*1bmInJ00!e>1h_D>SC9&7>o-UU!vF9TEl
zK4X0zJ>u2L`lP(Cf)V9tJU-$evpD!#a8+kn1&o$s`>sY2*O=X~NM8wZ-U_^M`2=u$
z>4X95F~nwgDEvr{!)&Y5?0iH#epJwg?@yy3_}gjtU82Af3@m8V(+618Uk_1OfkQ2I
zc~f{Nu1gX!n9><=RkxAIcI?G78&sk4=oO(KB)DMmLh(Y711S2G(&%@JG;FmjU!!FM
zk+tXOMOA_Ic%c+458THlQ7IW}?~fPlG?0vVL#h<4A*<lA;2Kzh1GkSwjkk$VcHk``
zN7q3|!8$Z@P==2EdQdsn1$=&-WyC8Cu6eo>Z=I*iU!)>D(}v&-of*g)zLP!o$AX{r
zYy@FmdSb^Wx?fe6@*&62CF3B&F(3|=$;IC8V)4)KC%`%HIVha_B5=i4!SDE2WV@6!
zRDlHxs>s5i51FXEz?YVkuY#s6l5CdHQ{4Vh727qBv2{6vY1pwp@Vzw>jHX31djTnC
zvnR)D-H`J{{oiq>yxR>f_XtdR{UEfttsp9Bn*#P~lbF+mMXYA>M6`+BCdNrYD1B`P
zl($weZZ-kldFe0-IVa*VvYgy^?I0ejyG0e_)nScUDr)75arqmDug_{>a@-SA+1**=
zcGm{AEl%Mw#WE6J?IWsi6LO(Lhl9ie5uW`Uir0^okq-4Etn*)^$S&6(adRmdTwNwU
z*`Ow#b!aO*Ss~4=*4@Kj^D4#8>yr4O)aUTe#2P=#-xE3+SJBPG9!i3u_<_IS0^4C0
zcUTw+)p1#vmEnc|CS8V@{d1`EfeMgme}uRGOW=0aGI%!U4|5G4L%uDwq$TZB_(6Sb
znm1((ZT(O{{`SekJjau4(jz%Ei;?B=ulDlc|J}tko5T6`1uFFSzN7e{2B`6hR_5_3
z0;t7Pc=NKEyT6`9N83NfcK1E7Vr>NZno%z{*qX@=Ge=UHYYTYcJWp!Z<_i~7X5-gJ
z3urMOjGb=|!gZs$Jg;s7H*_3JXZD%GgALAnLu3}0I^Qb_IyaSnSse_b1V5gl{F?lv
zUbJeHj>WXBbUt}xED4y9Lv%7`k&yZXG#qe|sf`{)4M$dBi_r0|{bV6p^hD?y6}%!6
z6BnUV$^vE+6A3ETk?3D^lvN0sSc$EBt#W;WA*E3PQZ_V_fN%3ynf(y#x-a<UN@F47
z#W9g?lO~k?8&13g_f3TNJMpu;SS&f2P0F-3vIAjarussUm+PNK{absPpOu>EmE8b3
zHee)CKc<acv-QXnVdh_O*&KTQzJ-BtS8!vv6Be7E10t6N(_Rh`54vyy#;#mNMqcb^
zin{%x3^!+damEkQe-^REQ$n7iHkZckRU>M`&T{2vO?u_-dLn;+9&O%j01;bX;q{U{
zs0prQ0iFZ-+j0s1Ltb#}GzLQNsWvnpJ&tekk%!miam4YQ9DP*t38oBG;sJI0$n@1|
z9G=cb9q)-kwq`CL?$A_|`*neMM{YfKobqGK68bRsUpM|#K8FKU4#ID*txVC>8ka7S
z7rX!RVa`=kVZ-xmpwA8QNvJ-&xTOhK*9BN@Gs%Q)H`cQ3Th}dT4v>UBGlVSdflT(W
zXE8lkaGsqERVDAgW)NlPa)_lKxc!+1v`c7Gr31GB^(Ju%-5tC%r9fb}48%uL3qjs4
z8B6vo;wD6oYJ7Q(%er6VqS!8wE0g6<qk5T6jv`l?<3Mlb#h_$$6i5w+nohmZbnx^R
zJX}7Q&N_C8K2yv`OQCzaWaLXU7@Lg#nxpuNeogxD)qZ-v{u<=9b&7@`S%;CrbKbo8
z9WEg%aQ>nccPa6Kq?vcwlidltGa!hRhnyulcCMwh8&8r8HO)kOk|k}eDd%KnAKJdl
zgx|MvaKy3*TsQj?a}17wme6=^y{87$wpGK;D1Ba+D2;PAdGcf4HQ?>)N1tAlp%->m
z;ak_a1kRqr$!jyYU49MxS2BV&K9}dYza;spIXQ&QX(!%F)2OCRJ{#wjgxgko(${+>
z>DM{i;px3GoUXcplI|+})L9j}s`v`=wA10gi<dyjuW#bx&DK;>=ye?}$rZZlh<{4t
z_>|O9Xgp&Y4{e@G!jxP@Z_f*k+A$_Pt5^jtznKo!wkdE@Hy+J(p0PvgH{!0i+qnDk
z9#DiNP*+(gxOJb9h@X8ZqyB`v^tA$+mkt>C3h_UobCG8&Nkg`7<tzMspgzb{bjqOv
z!i1gOM3WHyl~14^TefnGQ@~GudxU!G|Cp4qHgues4~m74Am&{UYFl0w4H$ETjL9vA
zGN+uXEWvrU^*|lU-iqR0%bt>-H4X56xGWXv#-d%B3jZ$bv1<lfL)YzfU^^p~v_4ei
zKiw_((E@K$e5eISh93umW4BOY;vlLu)g5W03}19i19ZkoQS19kqN}DtRw8E_qq8Tm
z-!Xplpym#~qRbMkb#j^H;=6Fe@en%S8_f^a>GFTq7cu`nOLQ6VlHAd<hS4?;$?V4O
z;!(5I=yS*c|3oj;opA$CMOpKiG4EO0*?0&@j3TM?bC^&4V_b+zbgC}{CNCzRM&^Lj
ziW-(5Xo-POyTwQEhYNG-PAGCeii%;y@Rm1_3%P~pIyVLv$0We;E>~2XybLNX7K>E0
zm3T<_9$0EE10^yWQC)Q<xV!-L9+V1Gmv?|d`DPfTx>5Ay^luT}nIqo*>!o<I`5oe;
zeI2tkPmyEOkCWVIc1%7d8-8D2f-8e+*{C6=m@ahJXa33uSp|`Jq-qxA(2t@6QN?Ja
zWDVX|-SPd6JzyDQgioKS;;Mrt<W;-3sM7f~c`(Kvx9IF40z8e#PL78MgE~lFaFM9r
zXgu^aA~8IkBnpk3ir(u5K8m*|^a}f#M=#y6b+gc!%JV0e9*3ZrsVvCyeOCLf*+P?_
z6D)kO9(&t%VdJ0qR%Lyc*<u<6etV8v)_AOkO#+K%)R1^~#C#F9WjUf_PBN6`#<H!}
zk*sWAIP5f-4^iED>~u{67$hu)D3w(xao?S3=YL?9HA-j^XiL_q9V5?5HbcbT_L>L{
zLvg*yV|Mjk3mf>jgqgk=#SXR{C9j98lk`AuGP~abi;Z59G4vrD^L)SPr-2K0^o)X~
zYJbF+;#a{R!!mYF@+6Kwe~?WsS7Xx^R>5YAVwC=)4QHlYw;F6|AU5_{!UB5}(S5xF
z{77_wCf~8}Tm1qKvyT>sx>Sh^UWSv3BMuM{c?xPm``L%*MP!p&yWkH@5m~nheGhX(
zwq7xttQFaih=KYjJZD)}%N#OMqJecy)WrG6r?bV4QC60NtKs$;8!Y!*3a+P9Vacx3
zxZf^P+`KgkPM*z&#IqVa{9-Qc<>@u~#kW!Br3JrYJd39sb;S7EcqSCh*sGoE(CkYV
zYV)V~Xud3Ve;LEig`dDmWua>yavg`yxgnPG*avH_C)W(kkLRkpi$&>?y`t6LA;e?h
z3}`c5M0WJg!B@-O;gS@=cLNrY7WRehH0&U?La*uF1T8FAj)vhI!!Y212dk3V0qPHf
zz@~qqDE;Om67g^<+Vox*N0g8n|DCHrdW|9Q_x8Lla5~yZ%n|-(_nG^X^XUHVCYxUA
zAdZoF2QF{)=s0y@U*miL$BT<_W!f!t5xh<w-tpqa#;1sVPX%b|SCg#ErhG%LVA=~2
z;Q+t)Fudsm9zK}>hF|WJP78t(kuQnlGgsW-6^(Zt7QsL}!QT{-Njy&J5a~0X*s9gf
zu8kKmuTKR&Sd12iI(3qezz-z2X9JkI#sa(Q463OWB=`7Gyk=L-JX$V^_Gx>fYEKLC
zc@#^|+>;RJr#&PZb?GEaRvz5fRfvLTU1Se~-(Yd@3B0P5Nb*;fGPOD@*x8gw&bQBm
z!-4I@VzvnmIQmLtT2^PZZoVAcs?ot}@d<WS?=O)Sy=CJ{TZw9KBN@G?M0{(XCO_=A
ziFZ8QC|+MPl_)LPhfW5QdCQ@j1W(*#KUE9y>C<au>c*?^MtqOFu-}KvyBBkFsZ?ei
z|3uU#X~A2BZgRqfr>H5t34WgspbwsnAu3az;+^Uip;uOp8m0Yg#g{mKV9_Mrm@kJJ
z!uNJa5MhO_8GU><pIY41V&)1h<b(x-;P6q@pf?jFlBS6C#tFN1vjD!(As4!ij-)62
zYI&g5e-NNFjLvBlIBWxSsC?sb_)FdSY%lXmRH6fCe(c8}k9FA5y^U)O?M1@`bzbk>
z3y#*4sIKi>rY}2!p9?<0{!mjs>X$2d`J)_zSC-<!f=--iG?P+YX{erd2n(BQ$oVyU
z;nVWL^j}vswcMV<)!#LcQD2slYmo}jmUmXveM^#sE*XKn5h0jsTrZZNyp%+At|Q(@
zg?o-z8XA)XI=0Ie+`HTvrBjNCe5VwveXooDYb-_YE@_aV4L<nHxRa!-Z-?A_OPNIF
zN6}~nFBYYC85aib!IJGv$k#Noe=EIV)?G6gY9r)yZC(hoy}2m0_6^~~bs01@vBsP;
z<iYQ0SPAZ;xMQQ3ta~K8u9XW_H*R2*#2RQVrX>2vaJJlZIW~kSSOxpe<nx5yXF=#$
z(3W=PQ`L^6LUb<1Ohq_mHV+(!u7N(6q4aprBeC4K5X{?H1{wLUz{GnBznpm=LQdpj
z^N>sEd%PK+D@t(NzeDJtOM3inUo;fo?-E5CiEvJVLd~_sugUWMqvZCHA>65WFq|#8
zP0B4EVtVrv9CRugc3tH)Z4yl^-&YGZtx?DG)n2?Q1F`8pd!f@4N8Sp1UQOj)C^~RW
zq!ye2P1kPWBHv4Rr)v&)4z3j4kJaX0iO(Td`7P+_#qocqhSMqu1NtB?9cq_7hF|%Y
zU}1YPn;$G>3Z{O5YnPp=)=edPux%vIU7ARKJ)6wK90cD}?FHC6&6PW{i!giA4cv8f
zG$a(f#-;mi^M`T9Jfm5f&loH`3oBl-!XX`4mHiNn7kK0TW3$mPD22<Ej-zhId-3p@
z8u)(a7`G{}wi<rZo)^s@O7-#w(3SbRoJKC??oAEoA})d>uAJQy`plh9V!@YI1h3A>
zas5}BXtrVvNNv0!nv%SgCvNXVi;>45dx!zukh>WmCkM&}Hnq)`3Gkq@4y1N2;coe9
zFv4U$pKWpw)Es|8kVzMAt&yf0SE9k$vz&O2I{?G_eZg0&0)I<*!Ba06FwEEwW-ko*
zvq3YdWJa54A1T8NS0>T+24(u>aK30*S_^Ef4#wO76Dq~qV0v;I&Q>s>T{l{A7`kzX
z5u<SV$pkDih=FBWHWH7*Y*gPD1|ef6)7R&R@Z3=)aOt5V-U*w=4Xz80wB14;vHv_T
z3z~-uro4pAh8*(4GKTdJSA~wmacq3+2yS{$hrfLgPL?nJf=XY_S?iMbLJwgQYrNAS
za1-<K>R@dccex#s(;6Up`e|6YT7kBx=0Ss&9c+1*NxGkyp>%0E@g0~<g7ZY8e};k=
zWS!8#w|hWl`mbTvFBtG)7leI#n;I?k{!vvFdJcDf?h`NmauSvpI?=_QSJ;4&%WzHh
z7+&z3!)=@S=zcbi_rBeU3j>6%UdTkQ@I;5c)$)XXpAmTP-(Y%w!423oej`eDEMxCA
zgUGU@x6tpn9p=72gX@=mgrIW~H2dXR@#tSl{DrqX%=mJQ{LFXahi@##-dDSL;pG~n
zTfTz5WCkpie=g+A$AO36XC|7Xz+`__3VRDRzA?y9U<SJ3#}nmvUM`6zXMVuQ!}D>x
zZVQ^PjzZ(2BW&E#NRU+=K(peM_=Yq8ME0w+(P7_8yxE@w0&Euh?aa_(bTmr*%Mx8Q
z2|y345U9HAMQ(T3vC(HH<E9W3FzB!(DZbh`c<xT@v{k|2uVDg$&K4^MjDS7IGFhMR
zP?DSJi38VZfzRqwq<8gJY&zOX+KRiGGDMJRhy1X|)(O&<8HtoChGF4`DE4d1Jo0Xa
zhPd)g4&=Vt!!Bv8g5xf`tC#k_tijSg5&xk8k!KT7GW8;<U(tl)dl;MZqaM|UY-Y)?
zmC4QS2qKZ-1=3?FApw%Ov@V;Jtf^!4%~~?KH2~vg#$$tF5BVZ<3iitukevQIqEV3-
zS%ARnws`RfB5L$SNn^F3Qtb);=bc4-13HOgVKWqJBw%W~z_uw-ggG`5&^Xah#4akM
zcHK2rG;kGsT(y@CFiyY{M|Cv$yN@aRT0(=D2Fl9{``DJ}Y}EIqq^m^C&UH*eYa?~c
zI+_fVr39x&awv(qG!@!XN0Oq?PEhG}gV`6!;F=0!eASiAx|4UnSfzSW(lj3ZtQ>Hs
zp)Y#xKZ^~SX?Q|kR8xE8HCDBb$BKH6>LVlYd{q;>VYr_e-xua1_e*P*Jn_LfhZA9N
z$vb9nBpR*DV$knM9Ba87f$Npth;K{FgVk&!44Ze4R4kp2?VXR=t-Tu|NaqyRMrEM(
z>r%36m=cTMV1qlxI$`#g6u3=lh_z`k`!+cMQ|mWCo#o@2mn#k6153oi;}=71Mj}ki
z3J2SL`l9?FtBHAsEmZCQObR8;&_HJ{=5~A~_rn*XiPbSC`%M<tcgP86=MLh1e;tGw
z*+Yn{CjMS5#Z*_Mi&d{D!g8A=baFKo_6OlG*+2m!I-AJdnHO2g?O2w5*a+17;vv;i
z73HralVuHpheGB!S(NUMZCf^pANTzf+X#Dzcb#%9eUUOrE=a^7DG35k-IP#8CUiq7
zR1O%8mK_ne#=sRm3BH+v744+_;wZdU8wfik#Uy){BHmYt#v><YA+xh*pZ*9uzL(2!
z{@rufS&OW0lq-CR`-GFOhC@?v0wkVn0!zO@!DSyIZj9FF4*e6je(Y`Ze&~d1S4M#H
z0z-7$rbrf)a`N+VGMtZ}%N>3>ihf+RqJ2Yu)_6;u1MP9oiTbu}xT$XioPLqY{H=0{
zvb5m7I_3rmIR+RiT}(a;^LUH&JHq~49%M}hz@t7xEDIV<zRVJK=$;kT5}O^ZEb7C_
z{BjLOkB`K0UZybWu9~=ei855^bPMMSCE>mnhC68vyBakKTQm~cSfyy}T^Rs{R&k)E
zuo0hy`-8;UAgcrwUm{^;PloSXgKD>wu&VhO;l4IFLRRqb+Bl&6j<+P}Nd*25SO!TR
ziJ<-PIT<7D_`lDY1>F+D{?`cb&Zu&H{`nX5Nh14^nuW)94`VJlhZtY)!}WKa5RZ3D
z#viH4JS}1t&p&aHbiUEy4zZ0yH&F~(<0Ww4);%I$(Kxz)tsZRtn?^QHeQWt|!FOgO
z{eV2&_npkV7LCmr-%;Ip7+(3B03)JQVdkqOyfWXE1<EetD#5`_ZetQ8z1Qc>N$>aw
zyChuq&ye=Lx5kP;KXJoQYd&CfHg6c$3A@{+c)*T*d`NK)|CTI^?}G1%{$_gMYJ&{+
zxGMqsz=zMccLR+*#?tPM3xS2+vC3?oME}O@pjEe=xz>-N{K&BknsoCQn`Pw2cb=QT
zCEaXk!n$pI`lW7ox3&dNu2P|qUi(n-*HKtg^O2RjhyV{!q)2~|JX2J2MuW+KU)lrl
zyRjW)ud+euu$zKIQ%owqD1wBeN{#(Q5hFH&|5<-9>VK;x64!Dq_xG<w*XuGcZU1TE
zjuDIPdF!ynYZw-<)5EA$B3S(EHY5!l&I@ZtvU6jPAo<WntYvcvwnd`+s)aahTt2g0
znU2E-uOKroG%^k8O)O)pA-f}GkB@dA$NBYfq~_>7_F~t@n$D9ExWix*&W)W3sTanu
z+eSU4I#>eov=r%U!HMB=b12O}IEcO{8N|3GpL*Rk!4#QPb~$thdu}hqUtI5n`<C{&
zdRIfux`6`w^-3iP(5Xk};zIGeE<Cq4<);P5w$IHl7Ba}tyz7f59scu`_{{r_*q*V9
z_}(c5J3C3f=18d6cBQZjoi4)`ZD_{iFbfp5-GQ15IyAtq2Ip@4jvJ0lW6vho!-2S8
z<mLF&HBwo-aftJ2fope&4rwlCzjh_V!WBzEoM%Y165d$aUe4l!k`GYR1!pnyTs}_m
z*h}M855gj;F|^wM9Qv7z<{R4thweGT|IYJ4H`8#gyS{>EJXubsN@P;q_9AqiU(DX9
z9fGx93#h|CEo!kX2o}xz3l`VXxZJR(*mgjl`?-H*D~noj*!NRHZq$Q@=4U|vsxo?@
z^C>8~yP#~$5mCE-0f<8OQohlM4oOJG(x(Zc8C|Ey^@QPkK<idU!VK8F{5&H2U5~bY
z*X3JzJdUl=0QVL1=+t_7ZqOMA(!~S#?+-du*3+KZ&Nu?6W@^#tHx2mhbLFJ9e+ZZH
zJIp;4w(vm~37G5t6aR##2pJLsI2*ncKdszD-;8b)SWy1_;9O5~Zh0Ym*c*s1t^`qw
zdKErhwFtK>72>&zAJI}no35#j=D#|0>Cq=X-19^lj%~fj?pa*|7Znr!<eLIF-)w`)
zN>gB1?{n7uHUdJg1H99kLsK>eQX`iYTwJz-j?exHf!i+$*^^r|^x0xQB4Z|Pve`;U
z9?3yXt+!Bo>pj*@7UQxDtH|I&G1}j`0zJ24=;g7a>C;qaR8C&SKL%L9!oE+UFPr1}
z#^-gYtXhjQ?Irw@sh!YwNTYA!_CS50J}8*D(*1$ss8jnUb|UaBR=*s_<R*>a-Ogjc
ze!UF0*dn+A25uzJ8l>oM^)Pn*!))qX_lRlrSA*+G6Mi&OpGQ5|1Ur}eGL?D$eE+b+
z81Lc<*OKD-gVF;aZcBuN_dA&0GHJYgE{#7kjmIOCWMO}!4_{;02<uIbfXmSN@IR)`
zJSxZUi`$h-k_M7Sif9lep?c0f4U!}@3KdCw6OHD%d8R=U4T>a*k|aH6ADNOVi3ph!
zg~*usy?^go?^^Hjx3%u&&fVE(e?Hgs{^xCM)4PW!dX}O8^1m>qDwC%^RN=2j1ybE)
z6Phkl59^jk;p{E(5dF56?DfqMk4;pkbcP01HmZVR>x*DyJ_B73n3IqS1!^)=7pD9W
zSOXOtsY47OFylBZmDR?@kNx;^zoTH^D0qGPt5I%9Ge7!Pj(Exr0@v|panQyB2wFRV
z?$p{xejIy57j3nsiw_D6-~Y7v?@ixHhg>O%HuB?(5~3kos!L?jkc#tP8AE9#Me^qw
z@lRVXt{-X*r4w`@_lGrGW<CjRFP~!#e=@{9Yi449d<0bWnUkzxG0=I?5Q}~eCrJ@*
zME08C;C|P@svg^shOhBt?Po@07q4e(o|d><seny!+=KQToA7g2mEi9yXH}wV*uH5T
zp7XTFRSE0J(Z#7?Ba$b+7izKJY#!8?2u#Xkf7o=q)jGUws5pK`8g{=Mh1wV2i0?;C
zN5e<Ta9g#WSPD6`ezg&#K1K~jCr*cqqBc>+zlWj~t5dM{g&$Nk-VuG-KbkQmPj+Fu
z0fZ*c!H-@&V#8E5mht<OC~&(78*b=ARCq-_^RfpC11XV0aWSbf6*?A0_ON!RA=Ye<
z1BucDXQd}yA?4f#HkM|yD4F;5Z!~A(-F;p{-b4qgwmRU+S`}1kOTm=%;VALSlt@@O
zh{GrE7bktTha(}<SP>Y^w4<uX{GUR1z(I}OO;dy4i*4au=tf9ubAY7>4&%-R=S2TG
zT(tVHd!kr$ayL70<`ipwT~6F2hd@WxC3aieR&-H9=qdd>gO9PDCD-34j~cAtUaK^=
zKR<|S2?tm~VJ+FzF$%mSkKp9TFWFj^dR$xL!O8``WImR|Rn2*DaK>&-J9Si~)i2C1
zIsW)SbcB@ec+VV#-zDM~hIrHQHjxk*SnK=dWBC4okP|Y8m>DZGb3cI(WO#?|`yfK-
zZn7?K$gmC@cUpAE<1jwXjKM+_Gv`uyxa4RDANNWT|8W`OXKRq`>h(eG`eYD2567Tp
z15_9Hi*qDLg5{=B;AFA^46_1Rpx-9;F<qa$vp7xu^-7?cI0AQlhy&d<d&F+PC58M}
zH5<1imbH}s7GIWHflb#g6XW^>oS`6z!o&<|w_X#)fn!kD=?KXPDG|MC9LrjUJty0(
z%9#I=R+0b40I`H&Vco?0aS*lD5*-86nEYiaY-y_%HKuehCN&gIo~^>BZap%2+GwQ1
zfPQ#69xrrt5Dk@V^ck80u7iHT%(iyC(D@kd9(f7}l!acVn=&1-ZaCD&SE0)Ar{Xup
z)o|N*B>y7xH%(&1D4&)M8POB?x4X&$r$z+|+W)bbgO#{IxZlWZ4#9@2hAiM&3M}}u
z0iQ)}gw5ewu()3mO>Q{CGute2q;CVWjhQWu%2*;w6Yh3D;ULtVwo&w=xrA*OyMW=P
zV8UkKB9`CN+4J#!xZu4TbIIOEB!(%0#E}C;J#iybUNw+yc6!Yw-q!|&PazN`Ib9?h
zkN^>KahTh6m=u+MAvM=+Sl9krLTBh8dA5#=ikuFK*Zyf|ab|Cr;_B((Zd1pc79q*a
zawivGs^EVkrBPx_5_$93o&{zMhp){^;Ir)yhA5BZ(Z8cayL6nzd17B&x>bwnOO&#Q
z`V;7}g)Xq8F#vKlZNORG!aezrm&{u30ygZ<hZEoKA@HNHcz7lIJ94+R^Y%&bE_@tZ
zl6Zy%i<R;Ej-k{oco(k|oAKDMx)3vJHgT}OCfwTy4v<ensLdipZsc7KE9Q0Mu_L*B
z-;4KnxjYNU>5Qizx<+ySJQvNcFXB>P;`#jCaC~8zhXaQ@^Qzvr;2G4)y<gwQE~kT_
z)~U}2y2OEulQFFpmtc5zDYtfrr8T!pao52Fp~L(Nl=8fI<I``fdGrOeyebVCBFsHU
zTG_+n4$v^ghOV5X$>aAu7Og5?MwhNT3PiJ+jq#XBqRwVopNO3W4NVJ4nnop)Sb5p%
z!W|I=1x<x}rX@^H>Al#gVF2qlk0yx^W6&x7KZx=OCAQ9jGu37#vo)F|&id2Mem~v<
zx?jAA{(BkNJWn5TmN;NVu!%6c`Q!F6gIGp`1Xulj*m_N60#2CmSnN?9%}j~FpS8}z
z&mB|wl5^c4%IwCS7D>!#%5g?RQdsc6%V_gyB}h!y<lV1>;H_*byq{LXTnyHujY|iK
z->m}k{A*#5cO8=|>4jO|Ry=CqHJG<Yf(~zTrhyhaNpEo<%nTL2y`LN*(ZrgLk2)p3
z*`bRsLpz{8H4Em4+=Uq}!6^I56Cyua<I3G}P#!2j4=>c<fzr{Ot+Z!aEgn$9XXDDd
zzPzA31D-URaMzPd!96^iU$0lDW_GUp@83%#!haS<{vFG&+fAU`?B_5yy<({SF`Azy
zHvDP#BzjI@g@+q>L7BY)&+40q&1(-Zw}mxi<?B&6D!Urb=v08`fE0)<x`IP)NK^CU
zbExBSvG|XPGT)P51}=-ec&3n}TNdBSjAdM*)>)okJ3E|j&q-t|E_a~j#w#m@`T}&`
z(nHpDKZ6Kk4|-?+S}rT?$o2IEM|jLMs;VpG2q)`8p_d(AHsxez{VX21)1B_CkfP#_
zAX=Xp!&hpL#j$!<sBc2HIIip}?ce4|%_Au_l2}3Q-o0i=a?Gi%^nKiHpCa~~n!`Vi
z>W0ounLNnDi+_#W%O?vxiug6zfa}b-V}u@+cC*5~ge6q-R3<!;o=2w48%x|WCHUF!
zX~KJo<4aGq3VZHPP?@Ah)dD8Kqh%?iqR@|qE&Bp(I}=drXeh3@+YRsMDxf$-gOr^(
zi<_rBVFL$R34ZA9;2+<Ha^~8&MY)7!3SK_vi;L;A!96I-uZ2@ptLS?d;l2Ns;|*G^
zVE*O=-V50X%~EUm*6KEVrQZbM3my3NXF@j6@D|u#G2o9b&f*9EY0wovXVLa2v)H!i
z9Uvnx>+6mMGZwv`Ke3xYuB{pcxt?+0*bxOOKelrJf!=Us))kN#@`BuSK7p|v$MMF4
z4Dkiw9{AL>>m=6i5!0Emg_-M@V8q+)kYTRK2kPvBz|rk&_MA$=B{+tZD!1XlUz~ne
zXu;Pl4W_RLN%KeB<7wUVGq^f-0RLk(l@_!rbM4wu^oQJ9Ix!}Wzt$Xyn#q3z)~^=d
zcJ@E`dnAwFT)CHq6tzIo`*--LP6vISTMAu<9NhLwl3ES(6|FqwL{IIt;C(lI;hf$^
zK4Z!aanPuCYlWEC%&9|@1_)=cx8L`YV!@#(6{E(hYKmasT6q}Y6H3hgXy9jqtMKP?
z8!S`5gSHd(Y3RIU?mAbIe|+f4ORq%Zzj6~;m$ZqyZyw9Kb60@r?soLF(&mBc<AlA7
zEM;T=k>~@#c)sE(WK1*UCVLyW!3SBUJ2?p~j`@I8Y8EG#Pm!)CiI_5F9tIx$jS0&#
zIKu&SmDdCIEs<e-`h0AbErW03Xf&9p$i7ZlM^?t3U?Gyv$QOf0DAPZ@-q%#%az`J4
zWv|T9R%Z(iwUUC~C}}b8+6xNv^D!vNPDH-U##$k>p|5p}d~X)++S=?{*O7F*yGmB*
zsXk@?HDNg5Ni!B0*@NYVS>Wp@i%RYzn9a_cB+1Z}WF;3ciw}8h`{itGm>Wp~jTK?>
zz)hkFHdEoy#eQ~MN|9AM3Ei>OG?bmX7HUTY5$}NY*u1e4i<Vm9p?N{LQPTk@=L}&Q
zqZ`EV>L%f)3WPdO#7S;8Fl+AxsFa=q3j>dWX!AI5E1E{c(XC`_<{I3$^bq+M*&>Sc
z${;hg{$)}NBth9z52NHXAurbhT4q;?I;Wo!m>);$+h#^%6aOZ@pA(Dz4c_8+z6oTc
z-5VBVvbVnINx$IcFb3NV*`nUfs;Jz&4I2AwLF!C0=GW>$et|rcuei)YW#hnh8KCm-
z;W*!N64Wi8hx-pJVSDBrHabh#b;hhi#SLXF+r<QbzP3QDU4-|FGg+YWc8EHB*gERH
z8r<-?$js7ziZ&e(xFa(j5vM7>Xx@F0H63ato_BT;p>u-)G!95^Fw|v#W4aE~qV;>n
zLDB9ZL_IWu<ysZ7OAcKm>P3XO^VlkOZ;d8;1`w#|HDV1umqlii2N7#<#;Dv&*14I3
zu;1`8A$Fs|JVgqlqGpI}r=GNSn6ig7>Nd0NktIxGxJkYLbz`_Q%pYcIR>HpmN$X$A
zhuEN%{p6y)DV4Q&g#T>R=%cDR5PSYCJJqUARh^W@I$sca0>{z}&quibS_4jWjKXQp
zB5_HfKa8z92<3g<IISj?_R<WjJ0%5vs-`?wQkn!+A(7~jv<fdx5l5XqD&iW+@UwId
z+NKuOXYHB+${SXJ|K>5o)zca2!8Rse=nS6cb;0+T;6)n{0Ub}SlcXaJ#It7>gbwh>
zzlM&G(Ih4xo%V?u96Vs)M?)H}x0BqsKOb+uR^%bKjL3XFb#8I73EH!TT>J!qySPmZ
zN_8tSn54n-51)wYAsvYQ(hENVo``1lO4AtQk9gR+3WXFJuD*Bzvu=cvs|Gt*v)v*H
z`F0gnx7y&dx`*W5hi>Mp)XSb7iGj3J2bghhG-UXnAhtiI3v*7jIDWD-zPUFA6!wc)
z*LR^uG`~<3ldcRU&jgm@(Ji3qJ_4lzW@ExUp<h+`m>EkXqeSo-abf>(uoxDP&7Yj%
zuAd_;r$J<YUp>0k$KpA2B{<|>MIxQ=khFsz#h06tp<(eQ(id8d?nRC4gv|+YLT0?k
z(yWg?&zDE@l+jpw!3;#bRj|hBIa&O^0W<41;}Q2p<{+CPocDjB-{v+rvqhTMJuE=g
ze*^gAzR7rg(F^#uXb!o)@DWLioW#9Nl|=sQ%Ehby{)ByAYjE*xHPHBGOE>Qn6YaHy
zpcFn2BE_FsLDM$WifIPha7)neRiIxynsL>tR;ahR!*4b&<ma09^AA=TyxPHrOJ5sG
zAH)yh7QLCgYElm<=Pl(mdQrS@y$ViE4W@g7htpSw%J|p}Q+iC(leSog@c5RmjQHw7
zp;IZuK6;PZMXxbOMVZHVsM8hA)okFn-8^Yi5^lJ@9VRPm;x~GCVOLlgdAwd=qlH}r
z-J1dYMC}81PbY#Iosq#~p_06%CWrhdbf^baPvfQ@#;|ko1io9zN1W685+(}YT)CHi
zyfg4R`4T3~+Z_%Vy0Damx0Ms!U0P^6(@)%Ms)OyQ0S{lvLB2!*@pwKG({f`$Vf1b0
z`d*(r`|qmQ_1{s}b)cJtp0q*Vpfw<2TU%QcsRK9m-6o>xvZ&ttSM0&Q<Ni<Mn4OOT
zpPRJ=q!TMKHBTCkUtCI*=9Q7j`(xn@>t@q!hQj`jQFuXiG9=31hN!!vxkKV!@DSeJ
z@5gV+bc;evi5pIpUr34_OY0y;!kR41?YDkWqX>7m?_&~GqiEVb2Uz*y6vS1G;|=|U
z`f9}s3;<5dTN==L&I`<P+rap<&BS%n6mYE`hi_#le|h>7(7X@$bI)@y=%}>5oM=vO
z4jBx8@~=VR1zCEkdIocHl&1xW?Py-`@NC%IBnWyim-h?*?}n&95N}qA#Mcju@Wiko
zsQtPeb_;IHnDK95u<1zRA$5_RxMKrbzNGTao6m`qU!~MHR1Sa__3gMLCKT;*u98u4
z6@)vr!i^^;baa5b;1OL9E~Pi|zy4(G$q~5eweR8fO)s)M<s(rubKnLqGkFGHLivZD
z=)PO<%q`l*#$|+)y3YFo55pd&Hhg62b8djbZ^2`>!;`B!)#K^FObVIa{Exy|oT-ul
zqii#wlljtq*+aPf)j)i{Vl@l-IfbYDdhid~pWtp~omJmaYw*$;N!Ld$f~VKt;d<+A
zY&)_U_C21=y-U{POD*A9ylkN|(VHGGub~g8PT(uY6rkC_bTI!q6Vji}q`$3|`0E{c
z;;>8q#GC&*QWswpy8h@$cv7Rw_jM;jW!*J)aPIQ@$@9je$7nBZSoH??YhGjyYOOf>
z>?rDL8wrg8s<hS93AWDE<ugML<IhqFYOrc0oxWu(cdPZJmR~Qztd9vadB7CDY0+Z-
zN8=D2>X4%elO*Xg^S2l@>9C*&TTEOhifDR7FQ(U&V`)Gsys<Z6uV&qW>KU^|LG$D3
z#5-DiYsoO$cXl?~AC?D2!(R62!vMT8)<Bp)$I{$Qf3fu2X&m5YNMvU(;X6+}CetJr
zi!RS=VNF+;V=FxleZtPT!e}mUJe~kgz8po<hbOVx;52FWxQoQ(0T!!|g2C(5=$RGO
zM9cjYX;FFt<I*m{udIpeQvOktJT;x`jnJkK2i?Q1^$jHD<PdI=a1~B%L0Gmg8`4&-
z7J7iu7<NisG+yX^l}GCH5ut<eMcYlx+&&rdD^1Z??-~}rKacb0q`+z=5zl%eLBHS5
zWg}Z(vQx_r6ZzRM@XDNnnAU5=A)*>2ge?5#l;`9^+HcU9cNiTtJ_uR=_u|=~m(#(U
z2%eXj$KSk?r!TFuKzrjA*i@SXW2|L(i}_(_tA9!EZJ<#1qE~#dMvc2JJBaq%+MvO5
zD0MmM3RPX!RC>BRpVTv$4tIKm!6KnQulhk8EhF5Sm;57vq7syt<H4U_G2{1spT~)n
zQ@|`R7iR35Nv#wX!v~uaFld>;l<$s*E&JEg7@=P&|8_94`%l=#<I>nTa|Rf-XEUQB
zPk{%biAs42=%o4#2W(3Mc?f2kN8Z85FWrfgnLdWBQ=yG#4#NJZVl327V^g>1u=oXu
z<mn8-A0E1g#a`$YKNt8*cDqy9+JPEy>f;!ci`ak$7v)$@-UDH8V2M&h85VvTR_|;r
z4Tjs+f|7iO*m#x-*?C5p>BUX2Uo+GXt+$(y3;p-;OtlMpx-|y-zz0YdqIJOvaqG(j
zRyW3ugv~R-IcxU8hWRI0;*dF*-qc8Tc-#=nk1YVx0aNPx>VL2^?<{b8{v233=P15U
z{(*z%2B1&6JGM=8!zSV0(X}gt^(&2s>Hr(m-FO+)8>Zm&JZUE5)<bee-epQI$>=Y!
z06fnM^Ng^wywEityF6Dw$h@uCaxa5eZrDKNAd?;Pqu8Fh3T?~tMY)mNNYHKrxRkJ+
zHB{dtQ6<sVs!cnf+V?AS7S4vccIsmH4Z^M<JDW{BEh+S;6G6MSiPW@?#W>?Bm}XT;
zw%&|@6+*_{Dep0J-EoEt?Oc!bGv`3Vr#tnE-ezbvsfw6bRkN(U1EDD3q<F3RZPIkk
z6xJTECs8%Sp>*~#SQ8$CttxS-dMFG^Ov}g<Qzhs#&_-L?abmgbSmIw1BvzO71>@1$
zpe)?8#SL2z6&J$L-Twtsi_?Xv7ur~n+a6N2?~2Iw`zPYBR4%?F=Y{q=6QJ|fJ5o7X
z4U5e0GDDYAA&)Quw~eK6Zs&QnYA$ElQXA;e8xka}u?%XyYzDg=9p+c?h8&(D^nhFo
zxlVQo3Cw;>9=aC5?6J>4&wVhD9;3^xu6!iMllL%{nNl=V*bV>LuTEti1hF%-qL7?_
z2`<C_uy!FU9W-VKwkWM5kLn4&6Y_==n~pJ6eO2&pG!l1Q-wF-4KZuS^jzebnhp5ih
z1J{z<q%2;8yb~EgfwTu!)M}yxX%Ib0n2V-D55P%a{jDBuf`Po7?D#H;(I=JoOSvIf
zHTora^!hT6UndQfWF5%<6;Yok3nW?AVC^;@9(23%tjm*;UE0DF+74oMP%<3uT!^(X
zju>q_fnKyThll%&*_XL}Y}puLF1#EjYE%dkIz9>LtMQ4cuXP7~lUx?J=K_0kWF?q9
zRfQx4ZD!#iLcf~=ug6i2)EJHe?e)%3Go_O3oDq)!vdi#e;Tdvef)@Ta&ja778$i**
zNYU17o@n;Fk?fAt!Ht7-@ygt6*1R=}Bve+QrSEW<lOe*t>u-~#_o~R>8+qcT^SX)k
z$v6=9^q~0Z3pp!fJO{i{hl`7TiyVi(MZI)CQD561vc<L+T}{&KKfm{8U-J`5bFmbB
z-Xev|1;*)f$Kzn68V-d68gc(Ih7Qd_&gIE5GUk{q23*a8;{5mIh3OF3IrAHP)Spib
zWL~l#`V}y**MzsX9wTAjE-}Lt_xc5M#?nJ0{pmW@$pSjWR`g`?7#iz;m2FXv=7ar<
znaz#QxGQTmJ)3?KMjrdiQdEp+rfC=X{@svH`W}Y{{y)Jl9%0yXb3Wnm4DP6T2{+c~
zuuYb0sKsF!zI;<Tx7r`eTc;_~(NYey{&f~TF(C)7WH?BTb4R~)1z1`;lfFpU26uN9
z(s6o&XshiYqB2y0*?TWJn>aIrJnObWuYh1WYw$Mu<mm<)?j<;Xugqe1(&y0iLpN}(
z{AUQ)%kbAe18SOhjySxoWGmH$_ta;?d#g9|&=otdimxDYJ`12-em5(MNP;)7>lo|Z
z4E)71)VJv)TVs;h$i^7>si21+Yu6Fei^BZfJR5VD`9M{El2}4L!K$QL33AsalMan@
zZ2RZq%t>b(Sk6_&q{?)*-`ftmb<VTh;dZD}+eZc(?7>0BAHdJ843mX#w*QX?oG$RT
zBPUITr9U|<5ijEIRtq7{@&?vhABUg;U&MBr8dTQy8fqI$(NnPl@puk}((lP&KI$+Y
zUND^RP<ABYKc!&}y~Q4B>7i;<4?FIZVjXuo0k117;tpv9)1lfJrd$s*KiKhGQ*^K_
zZW0ff*<=+^6GbO9d|?j+{p$O<7ua;WF`Tby1(k<C@WdSj%>JVfyUsr%@@KxV>iLHu
zIjs@e7Ul@=fWfZo+sOw#cWSUkgC8^Kf}s17ylu@%61DULNmRTAOWP8`s=x~lhSfs0
zTQ5Wi^T=xNb+EHEpEo|Rha^6nTZ0JJ58HytHd<)us15(coy3F(cTid~nNQByE$~bJ
zuzB}5t64DzOh4_R4L|jHW9&dsa53O{V;jlU*et$U^a4M=ol3VYJ3z+E9TPiltH9fZ
zYU1m&a_ByR&3(sVA_=rSjV}+KBBsK3ZQoTL*ng;ly?^A-k0;Lp(*v<WC!<v~L_ZJZ
zgpNYMA5D7r<V_lo(8!d+wCMNdNAMzi248Y{H{JMiIDe35Bi!#uq2K8V*!@+LYxk~0
zw{|%mHhKuP__zyi&l8-5p=MknSdyNR+(nO#7)Z0eE`-t*y1cJ>C#(#)hogon^7xbi
z{Nwg^@g3hG{L`#RoVqoMRwo(JRX?*?PqZ%gy0?lQQdQyCht`wd<=eqKwcpy2TF}U7
zpf?2;=<`qZbh^`VzUK5PY`P_nTkmIxrj3|P6RKljnUxL5KTw1jKNRu&psU1mc)xIt
z+FSSEJPZ6$ZAg#lY=w_QhC%ZsE!tcyFp=uNvH8;H`O@MtOuDjzA98ua>`$4Ar2KC2
zt9lifm*hk{T_xb3(<-*+fDJ9l*v4<%(1!E5`rtIvh-n>qhQanK)Gy*26v)f+fe|J3
zH5>EcMo|VtIfde+Cq~?G_z5=U;TdcQeoiL3Ys0MjChW-f4p3VVNjJ<aAam`8vcgaV
z+xk+tD)9Q!{&S;EHm!tzs==o-b0Nsx5KpVFgnlh!y1h!+A6dlHvWha%dZp>yE9xSc
zZ+*)PzKn+tODD0ecfUdT*bi&NNl)=BXyb&CZ7{BHA>E{=!nb(ziBJFd3Kdlm@M0+8
z_A{4Z+}~(!k{wPz<m`ddst1IbZ4CE5YfCT2)_|1YH|+kSL;KquU|M?#O1fLXcF|b=
zL&i$fH(11f-mMh%M2z5=Y=sNW?9i)nBDXKmp%z0wlfyo9X`-_N4bvD-NBEzm#_fa7
zP1?Hz=DhC4=e90HdKM>31=gqR1VT=(6I?bXyTpf-%J|h|@8I*HmpHUy5Z#$G7S;E?
z#7iE5|2KD{e9VS;)<|-zMKZ)l>N?UbaU$v8CNSodGo~5OIJ@gs8us*T7tLMrTBIy}
z9KC0)g`rsn<jj>O*#6lP#`XS(E)Orksfd}((8eEj1cx!HKRd~rKnFP6S&#Eu{aH}^
z0#+LSfmzw7fx`-O(HiNk_%CHNY*jJ9x{E8A%9(D`*Ji}VR&8PxPMhk(#*Kx~za7MT
z_8uUmCcebZekv%|siMEHJ<dF-3|m?RCZS?7p2_wDqvHFb;3Wb}Vf8GMp_hs8C)Yvb
zwrm(KDktKdlkl(c2vVt1Mf9u9*{ke)w%sy|RGv(LqjnG3^RGLwJ!FgE<?CdpHwygn
zT{|#v!W8ia$tAd}u9%TA3Fwk%ivD+;#IsA)Q1w(8s4tSm3m(zfkt2e;D9vn3LdeBb
zNi140PrS}qLU6a*g3}8tc=5m$)vgGvyt}<bAzBV*501t4J5u0_;H9vRdc{88F~VJw
zmf~<9Z8V%33=NU_<oDYSQe=8kn8S+6LrXm&Z>LMh?bRT$uR&~fH-~Ke{f4anZvseI
z`-pv9W?+RzG|1^r$IdceCNE?ngZkCrP1;iuwIie6@Om)G9y0{4&31-ehU+m&__^KL
z|CA`WykmDPz2K&c!16i0gL%J5t)FJN7g{5X@#UyBcuak$==cFe*btozojXk6NM<$u
zdYTM9PbR?Ct>4)fuMtA8`f~kV`>#aec>&Z{sDalhLp*mc4dzrU6KCUAajpDH`cPUQ
zZqCT2Q~X`H-pqF*6)nNb^JO7~UB3$NW|tE_WfNY1CS=S%9snnm`TXkb2_Uy}3T1l)
zCf9r&>fx+O)dM8yuRGKD%ZCa4>Bd^r-Ma%-U%PNRuz_*Q1WbDA$U3e>LFm1HmUhAu
zRfl%6dNpUfARdepTPs;;$3}d*U>L5rrh`S#cayT5Q>_0?7inLc1j-k;GqcEGwr{!M
zi@f}mY;8;-j!%3AZYvi(YfurbnRgElCp^Q+=A+Sc<U@FH{vLZ<*o9VBad0zqApPE1
z4A6WGq77FF49yrUzU+*)zFN$nNt4_6e!|1TT;n-B2Ui6Moy^Zhq-$rBNZx-7`z_8R
z(#O<koQ&Xb4w{C$yoKGOjvUdK2nWMe_t~@SK_Uf#0Te#RglYTDfQr}zfn`4g+bbi9
z*Vh=F*lB|8ul?Z4sC8(YH;mbNPsb;-mm^tmo>|ZY7#9`@&Yz}%@ewIhZPSEFtCiT_
z3A4~B|CDH(LQws?z<6Q*egPL}&Bk-~8&NW#2!kYlu{rTk@N{xI`!+TK)TCasjaQ>s
zw4cCzUf+l2xoR-$lM~uJEE5gi@dL+-qs1~0wPF80ck2@`y-7xNIP3eK%8CXCK)<UD
zy<NEoy?ozdFds+*p9vmVdk%*ho(l{Z3rJN5o~#!qcC$s${bS9uos)RdqF<P*C*spJ
z`f;h!1AL|Q6gs!J!GE!?qMYzLe82l5+;Wp5svD>A-$EYz`2IaY*2I?c2`2PI#3Trf
zp90@z<O#b%MNYz2)2CuB_Et)gKTm2Scau+$-ZX1|ij=e9w<dh&Pz~5W*Mi@zO@rH7
ziTs3u6B(E_5T0D}=1*sQ15rjEpW726u(wTV@?~}EY^Ft*j?|?m1mD;sE`x(je&MnW
z75Lw^W-@ennJ8?xH_OtGB`KbAT=#PsU-PS;e0qEco)#H!a~4F0-ygtb>N?rSKj%SG
za5FfVd9i0VHxi4Z!rjKd52R|A2Fb3Z@afBBl#Y&pmTex`a_FdV3OUYh{GAIDtpV1K
zvc9;;Z6r!uj~AO5+$XllpG7G{;(;HUiJk5iDCMPx;paDj5*Kp!OU8@;ZoLbiURB}v
zSyEz)X?Ym=Vjq$1b7oUUHetZ0!6dp%n#1n|=A@ejHW4TBu+Xol6}}%<<AvSfwh|aK
z;UM0>^^mM-9)U{)(6Oh$<khR(2_@cj_~+XaI8;+XB2Q}bUbj5Rm>7g^v$Al`FCRYe
z(qKHjZWiDA_Y3wZM?=7mXm+NcrQSQR2W#VRgW|;?{%@`<ZN4^!I31VckzbKft#_>d
z^fH)kc@?aU10Z>-9sczb@j#(7EiJv1^bf7VllC=el#@rkwM~Q6_--68+zZuM$be~F
zvS@-xaQhrR45!Z~!pM|gII(C0o=wez3+t1igmsge{6k>3^Et-OHHCX!;XJ}`D89bg
z!`i}AAwu9H*1EjLl3$B(&}|3)K1`9ghTp?n`E>D%N`c`kXNwQyZh&Qg3?CY@h$l9T
zf=spDVCrCj8G09C``-aP%V7un7;J<3^R)Q)kwfT!n!E6yp)Z{)aF7z;D2aPNA7HIb
z#ay(!2F{-!#f^G8aDL|s^xCwZtJX{MeM`!4tGpIB`4b5%$E{(Cmo(|NvJvzpD`kp)
zv2aVug>KuM2CMhQz^Zy#YH=<aCiLs`+dZ)m`sJ9IXSNAkkJYRvC5+yvyD2*RWC2aO
zGMIPo9>U-MmZ2Z+2Jrl!sl0sjOFWbGfHltNFrYJ1K+>E<)^myPnxaZu4lct!uNqvp
zXC?GzCSqlgF2A2?OoL><;*BeEJR&WP?-TB(VWS_92;4@U9aiC{L2t1<O<*8S%jd!K
z60p}MjfT3*(Mz6t$@WE(WbCd7ATjedgq}Z7(k(;i)8<ih!HEKHJVO=l#+l*CeP-N!
zqB1RAHVJlZGNAX{55m^AmH6r7N%m-SDF(gXgMRny;YP6+1oYSAubdG=Kgk{{N;Cv7
z-ch{h)kW^9ydo#<<G9b+b}al)3pM51U}490v1VT=n1vl+b6yLc9hqQWH~6O2=NT*U
z%U27QXg7>1$ET9*jt}u?-84SxnhlQVU&D=U6VUUJ1T5+PK)SVGLay$1$oV!HZ@kyQ
zALk~E$Eo*#*~*Xb@5Bh!R6doxsXGLlrKUjXE}`4DC<$horhwh_z2x89!>~nJlgoTf
z2WJB#T4LpmJyMdOV6zi0&3lan+<^}K?*vSG@`Ln#mf|+{>h#3AE9Clf2Y5bXKDixz
zf!zKzUSu&n7hXXo)=m+AMpP>CPx}-$N$3;oTDseMibpIyG%dsP-*xH1x)b=~*ApQt
zwu+o}I|YuLD@c{eB?!LlBz}2kCYzr31}m<7#QVd?)3W?flDlCDA0F@z=QBMXzs#N9
zT^LGPvn(Itw+b}hUc=wYsq|8m9Nd*W3hA%BSWnM4_Os0&n`b7$-w8vYZrCQ)a$*gW
ze*KzAtxFIXC$ZRNW(FyIF)H_NB2hhk*5{%XF~j6BagL9JFOCMFpcOB8Jpu)OObLsr
zbVbAcElm0Pb+TH8ky5K0>}0YcT#Yh>4XJVBrBfnt+^gyMYsv*8$~C~oHKxSPG9SrK
zGc3(KjT-VB;Qn+ojL>*be(DQckdqsj>FFbwJz+LGC#4{qnf)-*zym+GXyO)~g{U_+
zLoEIMJ$a@3n}yy?V%HR|krB~<nS}Xm(T1f1!SL=F^onR_TDtk5zCB&MVs8#wuN(<m
zuDgI!Zmjs8;IenQegyrm*R#-FXTZBG8{2<J!KD#b*~b20(DYd&E{G^)k$q=K$p@j4
zv+$s`|6>)g`VkNK`>33)weW;&J2^O5HWvDYY{A?w{cNp9EHT?Nl(bel!|L`3kWhbD
zFEwoz2E7!xhBc4tRq0*k9~BQBKPm{R${{;KqVUTH6P7#HjuaK|7AtQtASH8CSVxP%
zNek2#_0%civ)fjpjl0JYvi3NM+gm|uH_e2pV<+O9P#<>Vhrk41qyar!7Q$xH2#7NI
zL*A_`BQ^WKll{9>p~gdC{wT(f)(bkY>#s6ySBQfx&pk2t;e&d~*u5lZz!406F$D}=
zMYw5i94gw3MswR(T$o)8k58<`zQb~`HfJQ<pAgDIeV&ljqkoXm2C;a)^*mFJ$siN=
zDMLVH6S-`yi06!6vDibSP)=R}Bmx@G7Ae<=e@>r{CTrHgptGa!&Gs^KQ%>lCA5ai|
zDX;^}n)#rvBn2e4hv+U@2ihvGn7cre{k~U0w57CRv~VY9n=B)Kx8V@E(-e(WX*=*s
zo^Y-gI=n#x63IukQ=;}ofiPMr0AGAk!`ACDnDTl6dN#je`73kC>;MD&wq!Bdnr(yr
zuOG!33AW5N!;vk>cu$h<-DdHPk(e!4PufT6fP#7f8T$J&d)Kj>DF52Rf>IWeLkspn
z-vN2iu%`#HI7u0lLz*y=DN-|)0X*ci99hxahN9Yda#&;s#vLJWBQYIZ%%Z5rKr43E
z2;u#Q82EC}6xE+w5H^S7-;NYqd8`Twg8qp+iVE1D-}~{K!eLes)XX9+I+?Ogvq+-n
zU;Ud|mzezN%j{u!5_m?e1OGXbiNpqbBuUaBaeV`e+&2jP_iRJ8ZPRgTE<vYd6Irw2
z9Pk$n5}kGk!!OfkL3vvtD;kl-j6+PYaLEJqcd4**x2+*1{63QyH$|Ku6oE}m=gBDR
zNvLe2%4)!wr7Us>r<{J0^2G=xRE2CoQ$EXgZeVfB9+0-Knv_~}v!4Ycq370H=8#)V
z6wi(VT}wYS`)0@VW+>AKAHvDn7~w4WZY128wu7B`t48<tsPczJg-q?UBQ7|wlWp3z
zlODO70A1k=>xJ(KB%2P#!-6-#XOafa30W*MPpcy;-@^p1O*Q*0ZA4BK_mW9&`_S*d
zY3#C-4QMG|VKGa!p!`J?_SsaTms=rCEt<)<yJzrA3ajyFstZjG7doXK($u1AJU?(b
z6*u>I;>FkcuyVXKJ#gtHe!f%z#nn+T>+Kd==r$EPMx^sW?|Rt$;fj2ca0b07u|hOf
zKNceYJphMmmiSF2OUUk<(yyA0{DF8KkDE6V<_-yBH<O0I$T5k`y>l|#`{^v&R_y~p
z^~JRUpTp&IR{USnG;;X5HP=`-pZZ135ZLX51P<L<tXbv;UCZTg?IIO+ElnO{GYF>r
ztR%kriBR$(lu1N6S)DF+#KLwD2zev?`z|RW^|hK1a9I|AczL7Lh%LBFY7=@6EhRoe
zU+c=?1!xg87jMKHvupm9>|U2IhV^h5-zQ|HTWlb_ND>UpbFp>ca-yGn1iv=EKs);f
z7`L~L4f&ld%IVt9T=lGYv!XO4HQdMQ^Xm9yus<4T8i@0BKcMR(!IvogTlC*~O&B4}
zW&_QaqWSoXFmd<_SXq7$;<NNg_=X%DdO45GtvHYVNt5`F%lSetznZMStHyNlW`aYV
z6Rt2R#MkmbKl&}CzAuVVGO!(X={<l8bx**u+X;5M%X4)rLuxu$Ox_tA(%G#gSpF=N
z%2t_?vJpclOH!oaLXY3GYbPr^Do<}eP-S|eWc(`@jxUj8M5eO&@ZF?~P4^hWbw*t$
z3tXr2zRn()XX=1@`L?3gCI^1>pBH@ll!)FRyGcu44!h;`30teLQpvTGxYsKqVwvHB
zbHfgR>an4;V&yb0*%isxJh_VnwQt1XZ)50^b2c#YZYuxdbH(Z#8Az=+ALOax=P>5X
zZ~F9G4*pbi#6@H&_|_Iv<2)O2m_z_w{Io&vkPIW!tzSZ<Wh2v!Izu+i7jhDnndp&v
z1y%b0<8P;R!JTW(^$!vcz=st(F(%T4E8P-Y=0|N|pj8Lf-&u}2#G4npoCRw141#S3
z^5N%i@>?nc=upE!w7^Fej=g$|zmIBgRZyayZ83Nnu5r8X0B35fnA_@1xSzZcf1Gfo
zzZz6{c(*F9FTDWiMeXd2YbSKvv1T4O8>r1-3+PQ;LDfBk%)|Y6Q2po-^|a8YUI&$^
z&z`}23G9KpkMyW@^HWlKG8TM0g6Vlt4SJ<z;fRI9*c~A!vc>E%UIJ@!Ytjj{oqC0+
zdH-RFm*0_$<we}%q!wNAX)>L?KNpwuToXKUoA?{|JUZjdU&zv#&R%=nU<<3R!-)ul
zxcNDBV_+t_9Tw3COXuJ@yLfnWO_^(+DaGfhiP*HG2kg2}<KI&=_@=jyczeqdYWM9d
ztdVyHg(Pp{XtReT+*ah*X3wOu<NlIqYR7Rg*W$mfR`6NZ47gfmEM%NifW7i+Y{R|)
zezo2b>a-K#$G0PRMy>{q?k~Uv=K1xV$-kgldkJ<fGlH<t5;E!SV;p_z37PV<9HokG
zK*`^s{QO2+I2|{cPpzDc9Cc~%$3FbH@(NWG`ai?M#!+MO7TRfYmNarp{@~Ui>Sp0b
zd(T|Qs+_NoX(|N<>onlx>*2KhS1kUN98JPI-1x=!A>`^OO*%VV7q>~9^A%az5Gdrx
zmPieQ@tYpw)y=VRx%L|*s21YPcS-p2`WM(gIh023^q}rjWoc?i2rS&+4Mo=y;6R}&
zH9we4)n$J}iryz2SMdkz3!}JqzXkQZr$g62w5F9e?vpmTmpE;JK5Prkq@zQPsrULc
zI_1)EJQ6+}U6=nA%^tY{|9+VPX?r&UDM|o|r%_<RtZ;qZ|GW=fWNzvk<}`Z{N^DqK
z-|{>b8#GUfV-&sNVBbiz(8yx@h2FwMV+(X#|CpKW+bL=ZJR(w!pA3sOUuGwz9*7pr
z-3v3M(r{(PLm1>6jMIxxVB4a>plp;xGPI9~4hAek=TT2s)i@oDQkh0R|9MS}JA+}#
zi*)?vmM-MOqOgDWDl*PH8V_{Kb3OIHqLZUO;*X!p$uqOZBDts#0t<(tlfND*+%yAf
za(A=l7Bl=-mI`un2cyZ6Rm{J*N4(%%C5d=87}K6PFtgG>qOCtiG4A@5wM-ItI-{SE
zi|HxIQ<Pwl)O_?;+=>>OW0-{RYU{cm%}lOr6}%f=O6o3a5c5`nVRlnOY^!{`e%IA;
z*lzKacy1X1^=}j5?q6@{cl0D(dP478$^)MzZ6Xy*s)z-5#T~oG3c0Zqp$|A2t{(8f
z?Y1JgbX@3E1uTP_)uwo@BbH@UE5Z7=%VECYhiTmSf!P*5tT!u&AzySNLHB~Mc;cfq
z*!fZk;@+NQEe<{8jZ;5sFTO`wgC;>zQV?q>epK(!HiET!Mxc6$4ry<UMaQaST+Iiv
zv6qbDuh4}n`q9f;L%dK&Z8CHBKZ}YvUr6VYuk1(nTUK?`7mxJjv!<F7jH?-nHXHWA
z`&bn)%%97$h1~1px+c8Yq>ul6@)`M@$`-6DWDAEL$E&j#TV3GA{8J6ZxjU)|Nz#FY
zLrUOk6bW}_hOjcT5s-gA44ZPSplHs0qI}#3Ew+@CwGT$4XIVOFHqK?^&c{KxRsy7r
zABHUri-@Pp2eRpE1ln%@EZ#Wbo!HWG4Afi6f`8aUcJ0O==DPEw*gPf}t5qX0e9l`@
z%9&2)`F%S(`&i)I2>HW&`zxeqXRUbKaeM6YzQkITC$j8+lR>vUUmR4j4$8X(*Nf3x
z78T+H?J0kWg#R|HhSqhkLN^f}v{=9$;|>VW+J$@X^x*!Co2>ZJU2JRj!acrfv|V6D
zWmd_eq>w?`d9fIVJ13&alac5hv!4E4Vv6b71vv8EMK)mAApZ92Tt0|;LGS`GoDGV@
zY3rhJ{IZMq?{YZKwX?_YvM8pmmq8?k`-r|pJYwp<0*JX)GM?2@!k}^A*ygq8*v8i<
ziC$O=UPvE}?XF2s==O|BtZ6yhC?5^FkC(9Zjw8X=Xc_d&nquUS7_j{>7UXT5Af`nD
zGPKUJ^&}OCl7)~kEDb&>Qz56iQA8%}A^vmph=gQ|^=cCUC)r@AToa9}-$g?9*=6iW
ztt&YF9zlYf-!oylfa>BEXghkCxFygQ`jfVR?YL;`X6prTwXzDr^HbT6lbx6+Z;hkk
z6|iscVZqD#9k=Bv;^eumd_Q#IN$W9i<H!ur|D4;Ce3s&PbxT^QH=YK^y+otWcHrfj
z&ZGvV;igm*SUM<*?QI$cE3Hnm5}jn6->(Tj-A}Vy(x>>-lS}BJd!x9N;5c}^ei1i}
zz6#QN7vQzJUPw7)%I*8~C`wE49|s?!L}xf8m`{gVD}VA-$%EfNK<Gu1&ptak@()8A
zN$z-Otn;Y_UB#oKQb%Pf4joS8#Ywbdbu$dzS;u`#me4!XZ6M~K8gJh0&kgg|!tZI8
z)T{Zv^}moeIQ92#`ml36jq{M>x2H=%N=GK$=~GSvPK*NMlpm1U;lpmPegGEM|JcFu
z5vU;{Lfvoc!7|N~nCV<5HA4o#M&&PTZPhFm{^J?3y>UdeyJaell}W^RZL#F-G@<9b
za}wm1Mv(_M9I&YDkJzi<Q1BwhVdtB>%thrtCa>;_4nkH+`idI98DGc7>2-m<WC?Ei
z8zO#|p^vFwPQusMTg1zM`s13>|A{hB2>!yw9++L&$Xpy8!0wEQxAS296ILu1!EBLC
z`b~Ce?`n8DznEy>ZNQpaRiw7~5AnIW32XJ7;duFE5;0<JgnGb&;Sv%O;o)JX%fdp}
zFIgG7a_!2ct4#w|E?pC$5hNcpc(z1*gl5o?#0V|-2yMgJlCw49BXojPlcxuIj5Wd+
z?_}`q^Ii25?Tt}eC5fHB)q)Q7%J?qa2Md4hVGnk_Vlm%kM7Htst>@$)XOG7oVT}d*
zS%Ah*@%;~W?3t1s-nu%o{;h5eo3?zHc#NtsCSQ2KHmI26DpDwl7`ZlLRKNn2|Nj}y
zOjj+B3|+oDLihhYpWgqTZ?t>F7~%O0XAcnmXw24F8=)^erhTWkghc50pR9AFJU#z@
z8Z$3zh9<S?^sLrv*7jr#_2?Z()y+!;(^wIj7tVuqk$$vG*b1x2_|Or(jX2}LP=VC!
zjxRS2<5T3aApY_**n6T3gUr3@yYMoh*;ywLNEK<V*&sCa-#`-8=aSK-0&U6d0~EaV
z!=c`1VBVW~q<h?d<Va={ei^h1wk%SE(n(2>GGB|IJ7|pd!f%8yzjRQPe-G2-jw4)k
zCnk@@SiZ3e4s4A>9c=?{|2>Tj7E&BCK37FGAF@DdKrv`eNoC%nbg0+;MRdNd9bR91
z7!P>4!lyn5GW)<erX^UK2WHChcdCl?^ueXLb;lvhJo%Ar5*a|ploV>%rAk#}heNjX
zK{Qv~ggw76zzC};(4abjSIgdnIpz}Vz<n)Yb19H~(&WH)fgx>=Jtz)zQNYfv22}dU
zI39e(pO?s*!Qg4bY0CQbpjaqugv)!O(%?8Y4^R~iNHc>=+zMLMqlJ@i6uQ~lpl5V3
zyz!L)W!p}C``iR?y}5wyPCC4Q1MnM(#c*f^!~VldV2f`liyA3d8H@L$weKI$Uo6W_
z(_hv<j=5;9Y8fC}(`5t$&98#R1uwW_ph4M6X&y8r290wx`HHt~u=?glvG;NWP5F4}
z$M2BTBtwIr8o|-{`G~s{VQQ2qy*0HKUfdH}?KaCWdy5*gc3D8DNc@98O+OeK>_rW)
zv3UGbAndw$oa~CW#=SoKaCylB*!yh??fv%_zHZ4OY6<y7=bQ#T<jJ8xZZ<z1dkQr2
z#Pz?%jG@lo9w2w>z@xev{IWt7uJxEh?PTPr^jj+~I(-zcW(=n8=EouIWg5$SJ{8LK
zE7*?3KUs#$5UlY{Kzo~bav<X)gddQEvlUsGa;+EV-p<6++vkGGoeWrWONLL&)n@JE
z)`Gsa4xK1n!mL*Npw;vS7@(hyA*xcerspkQD$RuN^NztCLxje2k8z9Va-7im15ZD>
zk0*SN2-Eg_y72uQ(X7r(AkwL0zvdi47q$St4Q>J59&Kb36IpbpDt)$WCDj);2wK-J
zz^@P{uGLE7sf|yVq-7qgygLut<ieTHxu5uS#b(jJ^-^?bpI}w*t|2=L(&5dcjqvr1
zHPpRPr>{bnpi=Hda9mJLPHUHea&sy(|2B_q%)TUg*V4$Qz72vYUDM&Rc_C>0P=G@j
zhskb>p**GVJ9@8iL$znU@K2xwY}oP^LjUun*6W0Ko1*}l1J1$NdtdSBXHOPfGFTwh
z7lVa*F9v0uN9R2y;={AQ;dQMm#PDMVpLog=XFf9|OExS(rL$x3mqQpo<=+bPZl{Bp
ziI@!w$;ZL+74XY(JQhhFhGF^k+}*)RfZjE;=Eaw=Hh2IZRB{UxqgDB<yw&{J!gRP_
zFG0UP*awC|WB95ECOq20oh44X2lfgBshjF67<c*}3tvAN&V>7N#l_3PZJsGzDsc!e
zl;6c}&t0IOISX!I-%U0j-$~j(*wS?+rgZpoeL!NsbNs?d)lyj+d+ija>%@T1x4WYJ
zt^KgbZ6X$zbiwOxb4*gO0H3uIG&%pSwfvmvJpM==Bp&eQF(*1vEy@`mw<xgQjs#q>
z^)Ou%;LQmD_3`m%M}{oLhuZ&P<K8py_uv@X_up8aZ0cX{;$bA}d$9;TJ_wV`0~xMx
z#)^B+PJxgAF2Snwb7=56iWPq_rn3V|*nkasd|pv7dj0DZFV7JEKKnrq9+9JWc5AS>
zE!$97Zo;55BLNP|@|fXo(6&^XYmR(W{}Pk&=gE2&_`Dhl3I@}JgFjGzVHM_^7Qvv8
zGH{}l@+_()*xbF*{<0F4o4JUzC(6)J+ia28IUPsr?Lv(q@-ST|0Q`1)Vd?V$bYJES
z=3cXfCkW(~7k+d3Z2ba!nA?p3YovK#<RM&C^9&EX*$NY^l5kDYA7-oAg>PKSL{mp?
zBYRrcQp03xwB*iQCPI?uj;FZcyAJ&+5MWLobHVs|vg}!CH7lHSi>)mv7h#Vvzx|~S
z9W#r7eU_qb8#aKMc9)=vO$8~xLX`A$!drVLz&5)x<igSC@Nc&Q9cEF6I-^zin!Wbq
ze%dH%R#!?Y1S^j8%Z>PVTq)6@)8K`}eh8dd4gubGL@PJy)BoPTXJxtvS?2hIF#q8g
zdU&G@H;K3|?)UzNSCXHj_%X*{aaoWm7zcu#C(u|F>{IjBkuOtKX|>SqaVfK=k7J(T
z;mqB1(xV^5aHo*yFtVcO4~60Nh37>HO|Kz1-47qm`HmLr#*=@k!Z%_gq3#2g;Xt#^
z)bh(=R+qjOhq~nBJI8Y{Fyar?Tev~6_cUBMMKBF&3WWAwMKHQ>2?Y0#W}S_5ASO|d
zZ@4gmE>@P|hjQ)sw&G~4_Rk|+X!S4q;>J;EPZ%yMhR?^t@lMG?NO@X<ZN(Fyb<kg2
z-;n|QWg#59l*E;X4WVX+#iUPo8F7@-uf;aVj1zJGl~9tm=rnN=zSS8m_rx-uE9q4m
zW1j31L55}jABxU9uEzfj<L$Kf9!eVxrBdg)KN5;GWJE$%vV{~45^W7-v}H6z%dYd>
zpM<7{@l_~fBqJguiGI)T|6Zqa>O7xu-PiTLW~0pCR#xrcAPmfELW{e(!130g+<Gdx
z??0I~I{VNOS1FkDpDN5L5r?_TY1p^+C|uav!p{%ije&avh?PSW`y+2HnZ-H&_2)Uz
z?SAJNC(Av=-PMzNNd;hiY!QrwjWc<VMM(BiE!vu^M*DisGp|!~;NVCLtDU4+GlTCy
zh35~k4?g~7gk~zDxBdxYc}W4f?;Qc&+c4hEbE`4m&z_e!yMkT)jY|cE^{{G*dNg5^
z0kM6$5mZ_o=+nALnB8nnpT6z_??8^U`AnK7_DiuB6DqLfz+q^$kt8bN;du4iU0#%{
zEZjB?g417=QL_6oX!I^2Dpqgt(ebHt=>l6!6NrPZN2)Y?Z4z^)_YR~ua7}uTgyG;B
zDfUlB7W6fT;M+S9po~^Dcf1KYkMKeFhX5Y9t%$#G?8Qkxt*LBH0Mu;Kp(Wfc-y-%d
zk_bz7XmK;&3dQO4d>iuk+Gfo6sAivP1d_-tD$uWbAE*613ey+#0{`4P%pGWA8sg65
zVXH{I_;4k=O0)~rlBJ1uH;)}8SMl2&e;i4A3(7n4Vfeyqs=nqD6kCqMeg`ueH#da6
zEf!Cl=T<`5D;6vKh0yBSdUC=_g$6rKWt`;48G)KiJUsM?*?EIY(N`vsje`?NW!*$v
zn<GwRA2@*7=qS&<vKa60pMhqg9~=$ibjg~s&CJu}2dtHb9n~CgAQPr|!W4x+Se^b5
z3;&psrk}^);sPuBOJoZki?ZSz$%xJgbD2Q53-tD2ERGi6#;;@&?3*8re_vL>g<Dqa
z#zX~7Qqv&4yWZnu!%<jVcNquLHp2Xn2`pxqlgxY@aM~)3wLKxYcHKDZr@00CKb3<^
ze`3w54Qc4H;5;Mr%a-gc832PIFMP1-AG2UnIPrY)2^(uKR!^+H1?fJ4aGs+(%YBz2
zlUB_q8!c-1Y3VcY^+r3ICYlZ{V(N^m<{|jLaVmVgP{!CD_zSy})?$Z1Dhg@uLvQbs
z%;vE-%-L<68{tnT+%#VVW0AWkgpR{|UJ^%a-;F6@O?;s_Id~vKmqr(O;-00=*jXd*
z*!c7wJh_+1Z?oj48(U}Koc+KgYTm#*hAJc+rbE+pMUYS$VHU6MU`JBwVZqZl5VEd@
zF@aq8<1au@1nCe_#}POkxrDr~ks`vquNn1wcNxFYJghP`rJ-Z8jE<!x8|v4{JJA1t
znSujwb#y0VxuBhqwn-vh$E85EssJpb7t`g|a-_b|A0KngcA2-LL2lTDepN0<nV4HR
z>M(&!eCS5g4s9h>&m72*fd|a6PeIG@hu|xA3sv3L!biyqa3o!p+_4oPyB5`e`O}9?
zP`oK_t<yvYUn#mE>j<`Q<Qm1A{$m~=y^MdQU5L|LNqQ|P1s;sP!G#H;G~en8v<Y!Z
zW8E6|r<EGrATWZ@xxM0(d^YOrnMCY&NYi6+f&`zI!qCs<yh>j!;`!PQ{Eo=di5DbM
zZ*UM+SG|GoA{`o3H3IkjoAHd~APQ&5W5<^1q}N}Q=w}wz#4>hpc|4YBRc-(?V}EM3
zlsn;aO+eyJ`OJCmeOS0A7E6nk<EML~Siek??A#pA?DdLfcLgxaaiKMEyZ$>yM%cr<
zqI}#wO`IAm=)qo55wg1B8w{`I9A`UD;Gf%EOX-*yZC+pvsZsk-EGz?#a!$5g*9mX7
zvk^OWQ#h!NS(2HdAJKe;3S}2QhtSjF(3muV^aN5|+kG1D*Lh)hRuXDUi%|ZvNK8;p
zMuSJEfjRdL$6uc0&x^eR)&`~|`KS;z&`XB(9zy)?Cx1}~R<a-Dzrqc_EzJ2@pJ1R;
zmTp}y0#8>ovWipAV(g<H=JWYvx>n&LxXZG<-fN|>=A%Cr&d<e2?z88Y7PC>$6=-a~
zBqn_<Wt~H|(1B^~@L9W;Bg~${d98Oahf6B26Y+z`mMu`e>OoDo4!3VxBx8P21vH&%
z!&J9d=rQmN4qi~872g$EBX51Gmv@RetFnMG-mMSjGeqd@1$EHu@*Qneg-Fd8F{nwM
zjcSEFnx&!xmp9!<bHyNxf4_t5XwPTMj>qA=qd=!9&2ofg8c@3;1Cpx5h@+!D{YT%h
zqGz;-#n)Nvxn=K}ul{!QPgVmQN$tXH<xZG!t&1m`y&S(N_+w>NE7N|Jb50g?;M|9T
z_+zCK)MW`$Q&T;1q}~terR=Dy%2aB(pc%*Fm*A|u!zi>v7pqN&*zdY-_*?A|CZusK
zP1>1^>alXXOI*qH^YKuB<|Hg?$->u51gW3xRfvsz!V}==Htd8E&XFNZ^VOwM+VdSN
z65<U_|26W}_?+U+X5o9+EEJym1~#6wfm1L4u~mjKydy?G(AfGv$K!8{p-;Vty=DCc
z1Mg1+>*!qAE_emMN&g3zzvkeA#zvlY?F9V2JPy5UBG9<NkUU#%OYca@P(g`#^z_I8
z=;ofM3q4N3$)8?WE7Su!LIN;kU=0Ylf92GwcxVh;2a9fL(d5|qP{gIQHLbdE=f5CY
zlP*C|DF0?O6V0eiqA)RYGXz<oP#BODru2y>J*}!pn}i76sb<CcZh8jabS}UP-Su!=
zNsu@z{KIRnj<ZVUuDr~ChUA&v#E+|g@eXAyflZ>VydC>j!($aEs<mSl<`~SRYb<9m
z&dhA+NLb07c>NfsyQ`AV?NX#fRfq07B1I+)pXP;Fv_aOqTo{`*$mX})!>O|JsOB$4
zBkn9`c*hv}esdQNPW2`drcbe1L6rWK_C(186X^kAGa~AK0K#@%ft$;sNQX%VHYXp0
z0#kdi`1ygqt;`S)xT?T*V#=4ecL`&h|1nbxc4F0wU)VNNmt5l-Ay<1ZfV2=%nsCR6
z>{9c=`0RL&cJ0kRm@31}8@_-ODmZ#`cNSamHXh0oo?z*2?s;;JhU8`m;v)DBdu6<!
zZn6~l)+<9V#K_{2tXrsOILzF8u1+6Tx1x9Td-$yX9*-BQQek&_rjzGfZPc9#6XG@L
zrh*pCZ_k0t2X3Rl&l<d&eH1^>R)MIzLFU`nH25#DkX`dioC>~fz**!n67Zp?*ABD0
z2JV8%EGLpCkxXSiK7<$fYB+YPrY3aDZm8+ts6$cWM51ml=tgj<!DeTy-|j>*+pMYR
zl}j)~@-4cR-(YU(hT~zkG<IvF6v<?Eu|2hG@%cpq*pXs|iMtG8nbJSrrMG-&JGq_4
zubYdp%ln|hX#gHsbmLLg$84XYA2B{8PV~19VyBieeXRBwBe}MCbF(D)a9<w94i)l_
z_^hLr*PQX{)^Co7UnPU=g*7mJ)-85)V=mOrG$WJaLwN<QFVJFs4l5cFgUsj{6Rg+B
z45#rNJ%pyvQI1k5;jkXPGuPv}{s!DXt&1JFo`VPN9C%LU%gMLOqj+iTJ-Uh=;;RMK
zLBqKix=*DUFYfUm0v>7*E%_dVhC<oN?swUor}wcdeI7PlG^c)!smLze0mUNE*x`M;
zxbxOx^h~{vuHmihh4;Id*xn_$;JYan%jZCl_f&du#!+VW8!M`mwt+py*x<K$zcEpL
z7`}aeh4xkFVb+cuY*^I@x47i>>-k)wYMwbU%pHV?I&nw8cLMC4_|te@x*ydSJJUnA
zli}J_MQ{t*hK_&qV3%7W9zW}gB44=wwU`n)wO)cm+q+Vp!wcv%xrN(rbU~HmdF<H#
zm5rP!Os@~gKxexgQ8tMr^P+A-(45&IoyjepHR3$MIXqBo4@3W#qwE6}Cp;)zi?zki
zSx@SMVpF*(jaL?Xf}F(PPmb`_j4$Es6^m%k?;M_}rvd3Is7J|5!dU)98bX#_!)!K?
zwOxLYx?H=7S7-xPj27b3N0Z2e;2`p@`U?yVltM}GLbmqLD5F#CO8=8R$1ly+CCgNe
zX}x|e<ilaKIbDk@PJTkU33>cB!|UiE8HwX>hjChFI9qJ811#kquv;x-VMNUvFNk`<
zcLg(8qcDhW>m=z8=|UKN_JOqs&%&hjVI<v8n{T(^0~qBguuWnaF#oPSq`#15h=B}Q
zlJpk>b7#|MyJujkXD0Xz-o;n%r_g;y4JfqvGi<sTMYn6V;<!*WO*&&qv$G88<EaJc
zv9E;D8-K?*-VMd=t_%cyI}Z=ml;Gm_Cgz`^Jkk|MI8yo<*u8xW{{D@{;9o6Fa`Hc@
zX*Yv&c1C2@NGphJe+zzdmOyZVAkF{eOvdBvQK;60B=zxF_seH6!e$0X)|yHR9*WR@
zUk7r@xeW!%J;;M5?#^d45pLQ^Q7xW4Ro7h1)0S3(ofAIuvvT*q<|YDG^Bm#n0zoo+
z=_#D?ss=I#B~dM04Q-=^*>g88;g+OTL}|AdLilW4(j7|9%RItg)>CN2$3z@DTLuTz
zTVR3EMCNdk6%B1_hAq#6aq)|O)<&%qr5{P~YNt~OTdhRtL@DZZUJJfm3uYvD#Nyd|
zS?s*6Lg2+{!>5&7=?n`yy073OdoQO8>leI4{!B4qA9)9Lgl@pP>9eV@8w<wIFXQZ2
zay632tVmf?4Xb$SGn9$O;!A~lFmI#+vhv=eo-@VFSWB8%8Vr2lE%@f;Fn{BG4g4aj
z10Ty@;4YIg__ua8)$Eidr~e94+q;jM?VXc>ck3NfZ~Pk8N7t~kHYekM`=5bB`E=;|
zElA_I_ucP&g9#0kp{FJ-;Vn~@A@Ucm!Yc(M5|^O~e+N2v)1N1>Zf(Fd6aHcIvYtR*
z>1=wl)&q;gFN4v)c|>xo4OeAo^Zk7Y7+kQTNmE`!oW%?(7tX@aiz?Q|>N+ylb}^C0
zr5L%{8D%fuz>(D<xFV+?ps^4(8ybU7TMTGFyN_*Yf7#;`HDUg$NG#DTgpvLf5KZIb
z9dQFXVxIwX^WtIaqG}X(1^Uv+1&!A7ap%lH+-t8!Z@-z1bM$^NzIE=rioSn1_edn2
z8LvuJ4@(iz!ATG)=ZU*iZBX%Y5>(7jW_+rQt0Vl?m>W~-FvIjRx@*5dQ)^XfRX)zf
zId-wD*JbnGCM#1%fm=APCr0NgKjz=~Q^rt}QA{XEfq6D&_^+)JPH5yXHI}6?Wtk=$
zvS%*}EKo&JUL7MRyn_C;n*b@1TwAl@MJNf9qzB|BY0v9NIB`b^6r6L$&)J4F#9Eq=
zMqx75?H;=&Jc#Ky6$_8N)#&|qg5dK*fF=~a#=eOrSe)7k<2`=ZeXkXdUE*A5X72%V
z6p)nnvu%bBcwEww)IZ)uwTI^5#u-0A>0kjg+N4vHes?A#PnlF*k)l!_-*MIUS%hBJ
zp_O~}=m*U-s9fa7#Liv^`QOKwxrr~)`ua&+beSV?JQgK0YQ$-afHQt<_JI9f>*2a=
zHNU+08@er=#<SJA0l|jNaQeR}nEf#k-brl3*CQ^F>Nku^FL`uL#~AJ!;U<$mHqzDK
z!`W@qtnsLCC4aZyDfqERn)IK$kL9NGX!q|JjF!`<YA+kWW1Az*R&<Bl;c<BLC=g=q
zmO)jkFRii2#ua;Pu$!+(6LY^{{TD4J@8K_e#T$l*;ClR(*w5Z95d+|<5cTO#aN{m*
zx>{5dA9_`TZTU_%pl1+wnw{Y04bBj1lmK6ZCs4DybD^{POHJ3E1g1uQ8mlj_Mc*6h
zk>vg>F!4<tZa8>~@Bh{VVf9noy?;JDz4!}mNFcj-eJ1W!y93KO?QFY)80N?K;IMTA
z2u-=lJA6%?&Odh$EuTwb<7&>mdn%m<{qn~%tA673{421!`3Q&|KZ}R+d~tH#Q?~s#
zq4Se8Fg@)GM6B!pmDgsp`u1eXk}tTMyOVv5&gP$du?&QJb~DH0hj~Rm<>9XGIMgo+
zWJUH*V7y<d6N4%X*!j!=j%{_O3CEto?CSroU7x|^1|P+3yI7w^JK(^YsWk1k3=Lgg
z4QqTQXxz;xh-E#nL30K%pBIF>1&C+T-^0_UN|apeh{sl1!oLmQLGcT>UVRhhS%?op
z=I%$3E1U>p^B<tj{$A$rSTO7OU@O_;<_>K#pBTBd&+tvi5bI*{k?o8%0l(e<!NO6f
zu~y&DYU_W*&;K1~5*nuALd%Kh>S03@w{k6BKFKIYSXS)CF2=6v9-Ok8PEMH(qRm@j
z8W!`4JvP;d*VLp;=a%f^TGB3a#3TXkj<Aq?^=;=xIgdi;BXhcgqpZG~w-@WqbTPrw
z8>p3BA**ixlOOY;1r&>YfFGQPzm4Y++dq3SeMdCAGsXl=;t%1mI3=pPJ^{pB|HDgx
zPnnAO@f;c06RVGTQ#CnP5>!+SVqTiCYLyc%IBiIjeWl6q<zYB$@fb>94TYz>gK$rM
z4ND(4;ln`<x=~q;oGod<kd!};8-iNEu*(HoZ|#SO-S60@J^5gtFH1d^4M5_MDd_L%
zho2+9yi7ZD;$k)%9jArCzSWKJYW`vf367>!OE;sg6GzM(EWrB9FR<n59BOsW7<^V=
zgO4(5aK!N$^vn*$&AHaRmny?B^_>GRev>kt@#i@I?%SQH6d4U$-c&(ZfIXC?&!h2P
zEPHMXqA*7W6giv=MH{qh)cP#w$D|&nQ&JS{V<xea6hcvX0UxF>w!yL)ACdT4;{7Y_
zsQmK|7Wb%<O{+NK<k>qg`))QmH)QgD?oVWoUKL^1bKF$JKi^=!(`Hycjcb1$?uMm(
zdKfHKgLAAWQRCk^c(Jkrb1t?(1~=<?DmWj?*OlXmutIoJS%E*+9R)vuD{%H@J=^8+
z4p(ly#r`~R3R@0o@_hQ@=$6(0*t1vENmzI{qoyoCovq54^UqE&4<e%J2QJkXI@OqB
z=TaQt{lK@!J~2*VPhhaA3pezYg6M`?5@lmb4$PWH&KZ@%GU@N^Ija|ZaXDFfTzVP!
z&iW6|)TiRZ1_4Mczsd9@81tT0rqZZ|hWKso1xWkS&%Ue>VH%2-5-XKADALHe>H5m~
z1Lp;4$&Y``<$WRSnWA=#TXh_Cx&~2pEFNQrxjU!20Lij3f$YWyXcTq?=B8@X#{11&
z(sB)Ltrw<GeRuJzkBno4!2$Mopf%>AC^`4t7%NV^VpnGD!}iC*)UsB9^dByVo5hoG
z=VKs>RZ-MYB82e03VGv-DQN9V!7nEfE!w{^&;P8Xug4mZdJLd|&M~|Vlh83-j*5(3
z;0@n$VqdWBAnF)^FKQ0(ldf}A*GZqz&P)bB$a*qOd8hDekUr*ltJ4pod-)m*g1~#)
zJ~-tY2^)VW^DemQ<G~l!@WyEy?pON4pZ+%xx))u9gn~1ee(e{(%={Viom+2q>#LJJ
zYogKhwgas1Pr<*;R`R9y172FRqbBO0CkpvWqT8$Ipl;-aczOr}|4BkoOf|@^xW$GZ
z)h5C#!*GV>Vcfjx3H#h!j%dD`3MP{pm`$I!W{$x@ZlZq#ye@MZm*Gwrv2Fsp6S3@V
zmlGsSXAz{X+ypINg7oSk9n#$qTrE73hYhkq%*&u>?1_F?*tJTCG#Z_Ue)lW*Pu>T{
z_;<k42Z7J6ouitHWUlvp_$Mk!n(vK*VZ}BkY~ceO`W(xQ?&sR$$JF6@o(SDjq)&@o
zufR$fbH?S|WNI{j3jQeS1#d337JU0AdpIx-?u`Cq`|ak#CDS6z;=N@|%D3RYnH{_p
zON_9IbH3+tG=}d-l!;-~d*(iO&z~M$3zrwC@FhzxU}C)mT@zLhn_{Za&U`cG-SwfN
z8BI(<lqg;Gb22?99uKjtw#4VSF<<6d7<_sD06qu-A<H*0Wq-0@+U+G&MMVNUnL!Nm
z7N?gZ2+ykX3`&12L4&ZPSg>p!J2$-#H#3`H;>trP&Q7L1Ssm<Q&oXA*Zwl4%@MNC$
ziLv(=tY;ebX_FWeDPr6N?6gBWnVz;0tZPuD>hEkxP_!nM(9px^k@@g;MLl%x+=y3I
zy!am$Boo;mm(gvtBOa8TNm`~@k}H2zQ9;8KAHIvnhR57oYicJ>cvy$LfXR?CZiElL
zpI~Ep3fpt*1@nAk5^FhH32~i%tcD<qOt%uMOq~Qup|k1V!<m?NU55lX3(>}bRahAO
z7dvj<Kr!iQ)Mru)2zEZ=PdEL6J`GYNH&Plrs{62{CmJ7BEM_8fIj7G|hBno^!{o7N
z@MB4K%^HprboW;R_B<eveJz?E(cX*cgZpqxls#{j!YHF1<itF1uV7YmDNwPOZ$Z+j
zigR>v1Vh*3xNA{2`|!sfE=_Dm@5${3U7fv9@s-n`#cQx?RXKjabd-ot#P5p~=&a8#
z!Bple99C|Fk#{|~*xL%fb6SzW=M%gZ_ez+2RUDF)g)wT1CJD0m%s!0Eg}kjt;9;}^
z%}?YpVOM_vzu_~x|6~gIFXfR)-*;F&Eta1%><89f2Dsa12gY%SbDw#0;a;9Ljz|6A
zoRJR@%H%<RKo$c%l{szZGt70Yg4V(J;4(7IeB#_(KUXR<X`|7g=5mZT<D4Y<`H&%P
zRdFcvBo0&eZ{X(!#6pP8bm&l7gaNYcz$-WbzjaISzs_n@n!1z!dd?-@d=d)@cgNVY
z-8P_jb{&zaS6~v`N7zScN}zevk2%JriFDr>pdS&ZcC{Sc;z${`JxfN>WFh>k`V?a1
zTNpXznK=0BI5J8nVQEh*4sDj9i;SjY-`@Z{QiC`%K$xzX`5F|y?WdPYs#r~Kwwvpg
z!nk(!v;E<=@H?GC-;fXfPM0Q%e=^t&Tsk?-)f@HC|HDZ&kxXG+DevZpEq!JE8go{^
z1-l)4*wyKl2y$F=>0<8Q61IZVbb7(_eGCSrap}|&E!Oe<1z0q^865QoQL9B1oy+z>
z&MBzLEaiUYwFT|FUJcXFtt2w#Gsx+&ai(W?J;wfi=jib;8$&EcaOVvL_H=_dbxO6S
zA)KctH0~EqJ)jFRj;w=i+&#@}zd3!Dod+&TuJmhBCsR_a15Kar!%bTmCj9G7jM09|
zh^)GelEjZq`@RjT6fVJB`FAku^d*RYJj@uUn-PJsCM;Q`Nq_%$6+FAQ5gYSv_#QdV
z{9d^e_@%L!nV>}K|1&11FPV~D;Q(~GYD>;(yn%<@jC;f33@B`$PF>SOVRcqKZ~W9A
zkQh{l6%vzSE0<o~a9s=j^lC97`qhkwqA+XXDot+}9^n#VzTkiOAYD1R%W;mMFo{_*
zg?^ebk>+}dGCxLr*_44&N2w=;Y?y``O}_G9&7+$Cc-QS8Gl@^~V6R>Wq<@cRm+1FG
zjQV0M6gUnKuKx!yw|2o=hYui@qJtNDTXAFI5?E9*m+D&^vB@i&SeN)BP^%Gugz@#v
ze2%u#T-ykq>W#3e*@tnJai(t4TuYB;AydN9i9kIC&xA~++Z1%Eve*q~`m<A*A0WZ)
z8}FF9eV>52@c_i@Q}_*v(eP)<Z02xw4P=Zi0+}x*>`jetU_Yoss<*pB*r}_~HdG7m
z$u|08NS2xx4<plG#iiPK)V03{uK6%jZS);qS>AxE*>B(-jJgi2*$-I$;3ls8pvzY%
z_=n4HHe+_T0K28zikH!*$Y{p-k_7>qn855u7^{|q`yT&bZ*Uv9$6ifL2^6H7yL_Ph
zY7%;yszSrTZa8$s5|wu)VA>{iyzoMkc<h`^&Zspo9xH{YnAKl2*((Lt**v)OMwsre
zaAFL0O{DtwuH(|3B4m#CM6};FgrfIuJE~7Sg-#OE?9lRN=Acjv`aFFG9UEq&a#An$
za(Z-oR0x!OI?w!YsA8i&NYm4y@9|ehG3Y$H3Xe`df}gIwkgrw<s^8==c*{$!4W|>8
z({tFfD+Ix~y^a;PGKXWg$8b%W3EiYHi~bfn1eYHNF*nT5V1`~J#<W#nu&60-mPs!*
zioJpUlN!Wi)_0WFJ<Yi`3t|2JF8tFkO%*lP!?h$q;_e|pBP*BF6kZEv2uG2AlXSGX
zu$Kv3dXr1C9VFSa2SAl000qDM3Ge20fML@_LT2`2`J_&qE>{AJvZWboT{%2|ayqrQ
z(7@blgbI~DXO}vKGvc-qI8>Q|QT|%QY&aBs_FL1qnsbo;P=V@izXNYucQUpE(QHP#
zE{^`tpucrDl7>(9Oh;Nf>hR3S-^82vuhgD*eU&$rcZp>EP>EWsREM(~av=Jj7P%rl
z4VT25)0qRV@TIk%6HH2B=IuM^<f#Wf^$+=9M*=WtdNtnHaimtW;;`;+1pYVLz*N;E
zoY-qdQs%6pP5Odda%MIy|Gk3wz;wXlPtKtH*pC`)ZF8Ld^&&^^Kj$FZCPsz!hl9>%
zdm3Qbid$ZN$8JquGH+%M9t(FR)Jzq_zsq1>?lJJHS`78=8BEJ6Tk7v}gSEMzgt7`}
z@sc@5*f+fneTM?^gXSLGI~c$&UG^42S|-4!6}nWru?!5W{n-L53G)1OIG$TlfWPl#
zu_w#3U`w(X`CG7r&Kh&W1m-*}^IeHcoEBrQ?IB#=b_+FIgsFFBJO9U|omf|w0P-Y_
zZ3#1_Rf$zNr^p5r;_rdgX-_t}Yc5@`Foy_kIe>T8OM!_<5i2p#f$ZwN%D3J5iJu^9
z0y7IY(=hiwP_6$3!E+Q~$6j-4d8>wXVL2^jg(tnO@RZHS{|kFnt;zdkcOZ237-nDP
z8V<F8z?oHicxLkhgt+r?mC-#^lPku9K5syxCV;*6QGo;m#-r(w6Ztj%ktZ8Ik1i=J
z$AINGz*lfRsS<mI51hZ_ddvltcJG?enPs^C_cWfI-C3wAe-H8}U0~L59_!xr0Z8jc
zY+X_Yw*FJd$%%S&b--shEGbRXrbvRb(NQ>V^A1Z+K7!19m)O<IH$m@AVN$6_ICuSD
z2v~X=HhuS{dwWJ<%QtRDZ*qwD?ATw(Ru08wll|efe-o#*i=ae-EPWCxOW!qU!NJB!
zWbc%%thW$1!*KhI?(fsd0x1i)Q&Y<%`EQ48?Jv-$q0BY0ABHLm9mYM^h`tSz<CmOp
zrH>U|Ve;GWV7Rgcr{ydl(OS81UcL@%>*VQktzde_PM*|PDuQ+GMMhRpm%I=QhnWF3
zxcb(4v^geEkA&U@x%G!w@0+jLMT;lW+DArI_vIjCFl`FGZE}v$u_@*Mml1`-ONyB=
ziAn60-IM8kSwan3zN2JZH6yX@9V0l>h)&IlkbiX?w{DNdS+Z|%M@%|qKKJ6B>E6`E
z#EO-Cvy^<jv<mtg<N3i~@8CwB5^<?m0nK7wteK}Bb*wUF{e5Ckz;HR)xqTL%(0q#T
z4U_pAIo9-&{TEob=@blOJ8zF%4Qo_%7nc7rB16qrfrfB|3V9FeSa}Awtumsy?)La~
z^?hc+wKE(Y>ktXglccpzrOCmUPB<Yq6csnDr1C;K*k&h1^drnj+rP~)YJLHYI8V;0
z1;uz%^)sUH2N3CrqRW@80Ks1j@$j4k8*5U@Wl>Wyur3T1)~3J(?172mhM4$KmxTS=
z!~XMG#{T<YNzVT2zzs>u@yz83<fQXbG7c7G?x#8UtS*N!AJ0awU^^nJd=xTeHZbcC
zZ-?3|YnU6=Gf0Bm12~a1jcT9W!Yqw@&Rcr)C75JPqM>JOh<=U|YD7qK&zchXReF=1
zvg096t}vm``__THdp6s2!xbh32H>t=6QJeL2j-Wb0WGV10GCr=Kx^zpj<j<DWioZ)
zYt}{9E6fD{_8n!53gsbYcQeetaSI$X%kfH#Ii5{Xh7XU_@Pw%VJ?XucX}+JxDDP%r
z<_!i`-4x`{a|p)Orkx<Q>?F=DnNA0`nh`U>ZrIIHIn~4*X<3K??Up#mrS0<}@ZAjR
z6etNv`AJZvSBRUr1lL0)CEDHL0byKPVA47{D*a{xaZePW$(;A{#>K7lP}x;B-oF%H
zpNWS~na$W1ZiMcB6>vyIhPWRd1O1@I<cw_!W1VS>QCd<s{wA2MsrdoTY!VY^%Qbwy
z8-tl!!r9%YuYuu%atJm1jV>nVVP9+!zU@$;KYhLM)@K2FM`a)L#77=>>!`3U=gP50
zNSXRYM?pAu##&K0i8YeScC6x38E33qLAm=QPPqFTii79UX)(oM>pX=nKO#(o4yu!F
zZNDLQRFT@9m4T%<eb}cJZZK^;hUc!TOs94`(>>iPOdP(#J#hy?;O1m%VsjnJIz-6T
zCR@h0Ta^B)9%Y6-`uL*5;!wKgGKLB1(`}11c>`O*9D%;a;4)74?0Cb3eOIS>uam&U
zvyORcqDhw?mZ2Zs3KPxA;<za4F7h0;>3!~RlD8=soTL6va%p0_qJQA&XM#*hM+<gH
zpJpokG&oAy6!Ny*gDxzY%j&x%qUC4=rkFV3zx|DP@xQz1wm6T?ebNYi5-Jo+W$CXb
z15!58fHrMVqB1J^99>C^%5r;gsN_`a&(xtg+a}VegHKVba}NC;$s;LJD~aSU3QlL!
zF>69CZdKL6f$JyW9sSKN7!f6MTlI*I=NZV(T?YDZj7dW5QQWg*5ciHLktS0s&QDW}
zi!UvOw1s01#r525Kb3108!yC*!s29XPYQf0=b@&XBH8vtksb(3Wv2~Gkde<H865*{
z@;fUZ6ha2U*T;q`kXzuZl8!D}2@oDHNZBH{YTIsK-t*!O^vcRi+{rreF1_jHg@1M?
zwZ5-Wk!vyB`QHd$4Q+w!<>_4OTs{B(!4cHwG}~sGb}$_|fbt5`>`ntNArwD@inTZq
zZTltobB-(Y-aE-|n>d5bHB1J7we76U$z_m`Ifusme1pbyJZQZw!0nx;#7v?MWs1k}
zk>mB6T(-{9a=!xIe#C_?ezJ{g@qPymvN90x%ZyUTSEw~bm?V8wp`9GqLqewuJhwlq
zE-xvE_g?#<lpcljSCyRin2-K}17P{+C(O}NC0dj2@`Y{nN%Qkt;H(se&jy^>q@S%2
z9I%=F&FR_uvTVrPHVY`*@e!mI=fjVt`%K=?oA5;Y4rV>l<MR{7LB00`#4q&6Lw!l;
zs3S$13}kU!M}gXQn2@&EJ~Yyh<GnpNooaVZqPA~#!|xh~ys(@^BrgcjWQF4}dwCRY
zeAfe2Q)|I=jVPJAGmfm3$bu|!aq>+t9>47igO}%2QHArlM+~W;R?vL#SXjlM8*K%z
z+>62E+bnukNSz%UP^W!A_TohDcmBRvf;{8>sI93$rE8|s<16fFkj7pb;robj(7DcD
z5G{h8VPEjo8VUm-PB%Gx#eFxwqD}uh)ShTeH|E=d)UXe`iKAjC&sU&6Li6B59v`&L
zqQJ@KICiT>!JoHL?92O}_$xGny&IZ<!|H!w!8T+5(e>eQ-Fi0p@k@jr`Sgvw;_;1n
zws;0PoR-ISW$r}V>3Zajo*reyrcmEgx^z>=U1)#Xh1Pq_`C>+@Fg<7l%)La=`JWYA
zsbYmn4mzxN=L*{K@;5#<W|^w5`4GH1i+Na;ikccS_+3+wbgaICM*<ole{~A%Yh3~X
z)`h5}p-Dyp7nAHy6vp?yWYnk5rL7iT;J5!V>R;g~I3GAK-i!!5cJCxO@=Ea7!hEJL
zM*y#79%APOt%F_dhV*xcGYUVeMRQ#v_-o?KubdiBcINHIV~=~-<?5=;^XsZ?`0=l}
z>j2<2Nl7AOz7VW;xWiRHC75G+474*VvG~?g)LS6|<9%<KH-~l-nG`X){qGy5?*>Od
z-zmko{WhRcWl^kyXEr`uD@$g-U%{kZ)S~hB>70*Tr^bBIVX`~vD@rdq3F67#Y`1C=
z(-*V|F3iZ_Klo}-Lm$Mzk6cUkk6Z_Cbj;EbM-p)9mg#h&Zz%76!4>Rl^@rz0
z$FT9`F<iG$mK^3?05^$vjw<=ZaZ4x#;{rQU8FUZhx6C13y#{psis$HVHv{|k>f)0*
z)2V`B1$Y!lkW>LXDiZJkGGtGp)%A0%(&TTLWtN4Ttw*6=VkLYPPQj~gUNGsv6XZ&7
zNbt>FP-~w6f0e|cJpMa6_4e40-I3u>6TQl6?SIZxn=T-I%LJH~o(|NOyoPxq(O7Q0
z6Gdzu!ub$?Do}9*^2<v3CCi*yg}vi!@YwO167Jl*Cp;2NzPOV6Wzpyze7fe}ts>TQ
zmm~Z#KM3<rM$iq-i(r{g0!&i>?HKJCP42XKF<mWr5L%^1lK*sJXPYY2e^iF`-0ZOt
zhFIg`^_akca<6daXVoqTI8b0f<yI<#U9mPQ#!bOP&y>hYpN}x_zZgdLuQYs~Q-r23
zMwuHk^1<_>2d!U!j7eA(g)=wHQ<3`D)rC3NtJ5~y(dQkHur!F^iX}p9dGk@Ma59AA
zl}^;;;{_a1L^N5rnC!TJnfZS6DO<7mE7S1K7^g6!V7s-Psjsl$*Yx%BF5OPXmXZ@#
zlq5uL-Gv}$uNXPnG>o@&&1va{i(EtTOPIfWJr(R>$eRU4{4stmTK~;~??MB3PBn*_
zcv~69(*=me?P78(JOETP0iMj&=J{^@$9BeiM#&IS(&AFh{9LF-X1sfdI=$av|E3i7
zcvlMh@7vj$r6qwF{7rxuj!neW*)nvjmoVu&UV`-2I&itVika}M4y%GDkhu=$!P<_a
z1akV2^YU7V8QMZGC;E`0<jM5<pd#H6agix>n@E}+x$jVrH9YcTVdLFC45e253IEB`
z(n^ZAo9}S#Zu4QmMiU5HB1kkBQTSQ61ww|GLiD^Y$BWggz}RSnku(~Db=p-Z{hUj`
zPCUYN%@C(&-Zk?SBBW_P_dQ6OFGd!h=x5tE$}(4ivuYYNR9UyEMNm@Sh~mckz~jPm
zh%B`Skpu2<c*SJ$qO+QH+PH~pfiCCv);BnmTMj*=f^=?AG=GR|Gut|HiT`K*A^1_#
zh~~$N@tRROIu7OI9d9kt5_=lXZziN~avSzUiIU?xUZRxfOJtW1GHrnyQP{r`P8xDq
zu622E?us-H^whEY^|yh)QzpdO7I7`CRxmeL7uSY;1i?1MBMxfdQm0ELGPJ44$1nJ)
zuLzcUM}VG@GQGJ(4dhSXWj3nU*0dI#$1m^W;KK?<>h!G;#V$?-GwwP1cgcjGvEVoF
zR=*4#PT2;*2kx>JDRIbL8iv7@rKl!)kBN|v#LwH0p}yr#B6uK>&HK2W{PC8dgF!Rc
zDV39{)x;>UX(#}NlM5mCmIN&}e8YKXWr$pu5b?TXMg=&p7Vm{T6K*R{T1*Yl&2<!w
zCrqI`qUC7i&}y9PFNn70d$1tY0`y0n@r?6vHb3AeY;ACd%0M-mrFfY6_r4WRXAeTf
zp><?&+kCouPaCIScZ2gG8)8)u1qOPT*a=p${LXd9;m>6ud}wkT%2wZH?_D&-jj5W{
zT+so;%-2!z+$&HUl8SM5+sN!&?;zvx6uRrC2|N*sWv{Qh3eyZWvp;8@MvM1JHP&+?
z=%q3}?5)+Nc5bIZVp9XZ@XT)Z#K##}-_OURSNqu8ObwRhnL^&IpLl;{I_+AT&$hPc
zV5vqdwj5DLC*7Br7ch$o6-twH>(gNVw|yvN<;0jq#Di~*B1de_U`$g_BRAe>MT>fH
zTBj<>9lL`3!~ZzqSqx+GLmordmh-ZuHaHH;7D3w+d9rlfWPDL3NH>|BVvfHRLF*7N
zwET4%qklNk4DFAwV(kLV(sZOB4S4jF(oc}ND1y^+-I>*yRqUI?9!zHcVe;dcI(<9#
zidEbB5iP%;f`<1S=z)S3Fux&6uUp$9b6^Vnny>;3&T!sr^D^EVV|iM!_aq~mTmZ+_
zFTiK3Yhaf-jBX;FU*{Z9w*#sq{p&D3EEh$=i5ledsxwT>+H$6|at`+0m_fvp<Euvs
z_R)e2Q+(^LNGJ1CG5vrOIxf_p&jKSc=jk_Gj%&%vN3-bO1sg!{mNsdZ_rsDCvUE|D
zHAYkk;*#P*rs&T@>^mvU=PMU74jXpBO+hi<PZ@oby}AK{9}hy2l?!{%{uxKF{{iAl
z6&U)Y9Vh*lz^hAW=D~B0=38?Scti2{dy*_2|8f#a!??T2)_tJIy3=i^mV&CXDt(f2
z5d5?i!OV%H?_Q~dUe=a|O_0X@&+6f6*fy@I<qSrvXw$#;J5eT}iE)UJM0v;i=sou}
zxZZw;rDfM4dO!o5)TH63*jh-}T}XT)$3XR2CmWNd2fK&QvF4G6K#FEi=XJ@frjtCZ
ztUS$|>F7nZV*|0GI|@F8$#D6~9*91@0h-T-;tUmlrnf6V&5WT!FMFUT_#xiQ;=!Td
zanyh30AH0niCL#8>93nbjl|RllORk6v&}%_UI%kZ_z|S;sDPXhYihzi=V;o?(O9bv
zyh}ysct<ey_Kd)y=b!nSmXh$L!-Z6CSx+skR>2m{bI{pz8+^XL!#lkR;PRlF^Cw4h
zzHJ5a@Ph{F@@IKbe-puCu!y-Exq<m@YC&7ys?h-XMX2)q2R`!SG*|;|8gMh6Q9FE;
zu{Dq2=61Wdxkn0nSy6;ke4CH!7Hwi;K75DT)0fz9XMwoQoIw0G$`J>-U$AxGBj$5!
zIzK>x0qG^PP~y*8y6Eg_+<e@fUb}J>FWBm{8ne}C`IYmq?Y%Ny-Z}~Vl|SR&C%L$k
zYa}gdkfL**D}t6|D_fs&kNNP(ki3nHLiOo(wB?E{wFw>vsmq?s_5Q0k`r;-d7wSp)
zZl^HAULH<z>udi)b0+onTDI0!italW&t-2vfv~nSyc};p!qMoao&CttpK^3U#{!s~
z@`ZV<qfJ%z48Va^&CIDaF({ch%FOexg?I9LG|f_!xNQ=lKLQ?u?#r(=7#+kljH$s{
zm9^L_kOvhqaU}0}H;%2bfe?*Adb4*8Y+J(MB-;_xOI!lIw}CY*n@mHF_CoIMG2mo3
zCT>YCDrw!v&&Q{c)jmf+(ES4bdVHQu5wB#lc4lI7`xo4NL6Ax;dBiAAaN<Zm3iO}H
zL&jC>3-(8Aq5kFn;H14f1ah8rms!fhXR#CLE*7IT<NvVvxgJ%0P|IH0w-rzAGiL^d
z^}y@KGd8|7({b)H#E$+mxI5%3Bpu(y9@hfoM;!*OsHx<OH=_AZU6S|gD08jxD=U7j
z7M%sAunTXMf|CV9`CV#MqIn7q6;A>;I0defZ*U|SXrf69Yq6&Uq`&6EzmuCVz*LXU
z+FZa+%yy$PH-w46{SWZ>Lp5u9;sBgr6EWze6uGwh9SV)Gppz1eFM3ZP@2NP+J==rZ
zrkl}UlBQHp{0u4t-GT2#T^Knb7`^u&fMZ7$s8-u0s1sG7r<S)sga1`%xh+PO!`t9l
zl08{IumV+j7Sj*q_AoThoC$0wLJQf|aOwRA=3iPkv+d$bJo8J8-VYmQ|NOoW`*o7w
zRaGyOXq$$HsV5<r&4KYLmm#`fF7ypbP=!1zdTMG06RWNP3%!i#{)-Nfeo7IOFFb{!
zJkE#ie+fM67SS#@b&}Chz$mcF<ngb3_AE!`*;2Tgkv5)&r@!82jdn=U)m{hLtjzE5
z8lN)jbJpO};25_1#6dLG@n-%#pG;hRPQj;|L#VMK25q?(?n^sn(LS#2pjoE`9H!e5
zKNmAR&&^wBT3Hb$Q~<iBm7&I`!;p7*8|m}OhG655xT>TLL-(@a+Bt!4)p!l>uIYhj
zYBJBdxt*OBZAS8#7a;5S51a}+u(<FMTa~?z_(!(D{7W9BZqqtE82E;<)v3c+jxJU|
zAOTcg1N;|A(5vnoWzm_VpZ0uWT36enb@XD8Uz!6(``_bU?lt+rbzEk%7)SbcL08)l
z{%TL-C~4Jra%B)>nnP&rQw<vV^8U@9T^F#R=^jSRL+A`KB7dsvnOj>w;O?|mhb3B#
zC^2OM^K^nW{!}{+?b&<T$0ujAW45}q_~l>rb88FJoqCF?YO<qeQ|F;vr7y`*H6VXi
z#lW}5B3N<bAtM}~iaU2+WY@HXvZ>|<<WGwmY@ee67e8u{CP9DN@*)zeCN?upAxaSO
z)f{y0wqwG-I*iMkOzj>T6Io72n!Q1U*}C;Eb3(QQ&sDx>{`^{wGsfRDB<Va#M<k%#
zS~;R0^q3!OFU)TWp3AnL@5JF&FZQ)!1l;RchG%S?!Ssy|T_E=nmK>BP`SQjPAb0|e
zil4(0lQi&QQn*>kHOJIfo|phxINzg@5zddqk%w|rP^lDM=DcU0tS`n#%8BUxCIuQd
z2V>&XOg2R06^PhOAnUhJ#K=cHYOiksy;{v!pH<;Fr??vfKPN)gcVV(_PY66Id&{in
zW{~ciWB5MhrufL-4Xf6+poQ`;%nwhbOHKRv29s^5-VF~L|3`pqJSa;iw1lvSedL+b
zB0(&Zu7LHQUC9drHP(Fd4JNZel*;%h0q^-nZ2xIUy{r4+uI(YFmz#_HoOmCHX4f%u
zK7GZCELD1Lw*r>NWTNYqD2$W%$)-&5hf)1xh;I(0skMZapI!s;-=)ctbUU(a&O;!-
zCR5im7uu6o2gfrFz~#vvTsPQ?7ApqW{(EC^v}hZwI#>*M#cwc<(GEEFO^s?bA3~??
zj?~UP4uVQ=VHB4g<9!%oeebBy75NwN<~A>S^-&Inbi|^y!4RnXu4L!hFNfVz*Ylrd
z9f9ROYIJeuc68v@^`$d<p?iuEV=-t$ri&k<2lvJ^4bu(L;bs|r*_FYcpPJ0puU_lW
zYGepX|Kw<vX`X|h;wETrk!Gqli?bUIf?>_ReITsW3YWG@!nrRKY5Ln#98yVv$@~21
zDKio1=FW8HkLMzuV92tTZxGSl&gf3<VWJ&4l4-;q@+2Xd9&0&|E?f?2k%}2sU2J8)
zsGo<m;STiuRAs^%tCK|27&z}@2Qqqx*exdC_&V*K@ag_Mc4GK<T(IOB94LB%gEz83
z$1wxea9W^Iu?ejY2!P+&{m6df+DFbrpv~ulIMr2<P8DruZrBQf9wPw0V2LBW&tT;E
z78<vHG0oU;0*YR<VCv)!;o=^&;gcsGO1O-5)hFO=zXQdyf1ui-5*2+_C@1<r>XwVl
z&BgkRiyX4n3$~$+$~88%U7B8Xj=_Nj0iJR75%zb=Fs^kq1S^Tvpte96=WgA|c*O|N
z8JlH@c0)YQe|Z(G&V6D=6|Z9Xy>ZYgR0pBbd3=|sePkq`Yrz@jV_<wHv^Hm>gxnL@
z<*}P?cG0It`)@<f)DNssy*ZuZZ%8AYeaQZc6G+^;dyFG_%(of|XMS@zwmqvvNKMO1
zW|CSHKkT|biTmnE7ykW$N>d+VQq@#?^kO&Ieyu>)1I9pCB%zDlMttXx=J@N82zlFO
z!0a{=g5ugt++aI^e+$l{{e<JNa+W$Rijkx48S5NED@L%sT8d1)`UM-OMRC5FMIc??
z%3uDXm;JKMo4@GAIlOQG96or8Q$2xdwmkL}9Y4DU-iUE~tw0HEi5r5ZywybfZx+)g
zvjVriSO|t;v+#UW4K9gjgZ&bc5Xd$1yz7{Xn{tHMn{oHx+Cg2isLhzhAK8wJ=XW5!
zktfMT$#6VOiDW%+2Q$M89JA1(V_xORN?qZ<DccG^)a>cd9~JE5h;qm6%prKnL2TK7
z3A;+TOdFIslui}JuKia)d+QCz47>%~3|z?e!=BJNVnY_HhLTl=*-&kuK~f@4G8v%*
znAdGh|8!o4$jDV#^Xfj%o%@tI7b_2&Ke~g=-}mhOL#Hv>!Go8{Y4M@+V(4(iCwLVQ
zgHy5((&iBXGQW8yWjHN#e6Jb1DE%3CC(p;$XPmxdvYGUs^#REn-X!B+0&DxA8S+fI
z<_?n^_`{b+HvFi=e}^nc!jaePU)gQ2?W7!ag<{4ovV|YiYzgg|CAdQVI;#s>^v<~t
zm@#93eXn#AQ;LuBEh4O`)sA+~<mF7RdT_+Kd249W#ZEN4BY*)V4^YtO9A?D<1eO_6
z#XZsZK)eEj^j6~Zs6mhyx&nRv<!owz2BT*&1TUr=lSM80`1j61aOM1wtU?<$t}B2g
z>PzVxopIPM=fV~U`Z3B~Dzs(49u#=RprH5;40=?~EP8kcuMB<Um(SkJ7OVdP-`-@j
ztqp_D^20DDrV4!>N!Tm+8BHR`aCJo`!VXR98_J!HT^r!sta)_hx6K^wZv#}0C`07=
zBrI)~#D}&?@UtcZbz&wFJ;9S`Se(dIOvz$5=-*`{)Gj%Gtqf()yL~~iO=9%S3<?=;
z;nb~gBi-ba3i}HiQQEi#$EBND{<@P4Pi-0z;Yfdz_WXf{?00y2WF2^#Szzn#iNs#9
z02M}uP~X&_sE@V7kjFK4*{~+ODm%(k5W5R@d*yJtm=h5>5skXPGI&j60<^b%CK);*
z4{Fu-AZn}_HosS5=Cr2a!fi3w`nrNYy4--e`QPCGF)`x5(>e&>Ul)LSQwHOIxF2e3
zoe*+z5Tjec?Ev@Q2j*c|m@&*Q%Yqkg&f}fgFCm0H#13~mjyj(TEoT>St=#j;wq5VR
zM?)1B7VpQNms<GXyA_p4yu^6+nbXB)6RH1m<o{8c3x`TyVX{8AH+S}7!euL}ay1{m
zaPz6y4|B1tL<Ow>-o~HRTqFE4Ir3bSkM9D7No?hDo{H5GIGneGcI$ev#kclSn;p;5
znit9nO#i@_{BMLgk`s&mi>|`$o?74!EXTZsqG+~03J;H1Q>o39h|aoF6mR#Y0t-_x
z>YEp~S@4+yQo3YF&IRkodGPSA0<oJ`0a6j(Bz0v8{C;^2_CHQ$F1<>Jt*@Pk-G)QV
zRF_b8ZL1NuepUka6NO-$Jew*S$nt}1+;DZP0eBgU(dY5=VC3j^_;4Z#cJ7{w>Ff1L
zlT-ltP`H9z^WBR)!}A!DR>O$)Xj5OOeT@3?RQA`bZ#bCa$*@wYHTGLN@yaDch0<D>
zQzuBDtxDqu`CMZaZi&L(|8e!^e>r|(+jc6=N`p!?r&LG@)wPagu7rqW_-0mS$vmdH
zXjW)aX+j!F({&z;kWeY4loSyWB~(gy&inbi_Ycqe7s%DsI?r_+`@U_(K16)M6lO7G
zj2b(OaqY^DETe2G+G;L?u0_32^4yExZMu@?G{SPDlh8eF9x+auh~XEqL0?)F^_J`=
z+Ruz|<icP)`;YHtMp{G1{A4(#xI%C{PmP`Euwr`s6JS+~BKTixMps*o$H|Uj>EDjP
z%mF#JZ*~lxATcQ4Bf$>x_l88D5Zv&s2{%V333tyIp*k&<sJ>@_oX@Ez;_>O2Vo-;h
zrT4*%q^;0;RDj>*DHl*X4r0!fq09bC=-YRbL<J>)o!3)=O=>8bSzW-zOSeO|V-c>V
z;?S_t0CO7e!m*J{V8#MvL3W%2xSXnhiPJslryZj7@t7&F)AKuqN*;oKeqJI^N=gUx
z|B@KT7(VY%&7CXQ&1813fhQi1$vFpecBwdDxS+5E{_#wuNe{nauxKi%Rz3hv3BZq~
z3bcqn`|PK+z^r@Lw0pt@)RrxWWw?+%xxJSw=o-aV$SKmrBfXHD%jb)`fd%&81DoCn
zLap(#w7#JaRkTc5u!RB9n&|@z_c=4F(+=8YKcIgNKWjaGj7;HYxQEL{!SJjKyOsMN
zjx#c#E3G%t8yBx*RKOyb6{O8rS(f#SAvbzMw3S?R8v)d72K&>E#PV}0s%_@K4;5$l
zJotGmTg7*vjmtPaZQe955&)sblI-`WC^VXygMMCR5c08w)BW&^*k?!J7%MBL{`Mgz
zD{qB225n%t={#4GI>a5@>W{0>`Qxv=B}8h|AZhjRg>Lr<qG3LpSsZJDQDz>@#drgl
zh4ER>>OrvhuO0394qku0D_Op<5mkHb*}(Eq^vS6$5Ef}pA9fysiO+K}L+2jGY}(3H
ze;cAyiYqk6%VYDo5HQ>w3aImqTkG;0q?47nx8@R{H+np~ye|d&?2Mr=#zf$#yqYch
zZxQ|T-)zXV=!3_{-N5UX2SogN3Byw);HkknHs#7I?&qGHWOgub3#)tyH^t1*Y0OWY
zlw}BQH41Er{R*}bK62$lhM;IVPM9K{LduywxaWFf%egW%ol(Rcagm@zhVPO^_zV8(
z{KXixJ3K$YP0-fm#Qw@XCy(E^ai@NX^EtaOxcEyFS+S#}?E8iilpTx!mw)5gjrD0<
zLwAB;z*ZjCc%Fex+EUzHbKXd*XA0BTy%*fAFT+~XC9uj(6wG?Vad2+{CRF8dvwcKq
zY5rBBfrl8})<oGME#cSaQ*nvK7A$LPfT#9{h0$N-*eWYAM(RS@(rLSFE)q#rVr#}z
z5{m`tHrMfH{BrK)(gj$Wy@$PM?T0(oArLcvu~6i@G01C9rcJJ$phCJJuDwcN8FC$V
z*R6-0(@TiSh0VMfCK-FYF2J<{dGh(>d@$R&Sy1Zvh<tjNgK-(juym<Dn{1>;ztvc?
z!W^EF^3fkLD;YgP@4;8OsqD)@E!n<89ZWvO<MA3l>iR1YSE$}9+okV=;cwq^cimK2
zkgFtqmAy-p!qvdD${cF%=7FC}6KVT$0DF}cSm*nlVBa@}w$G8}bWga#&4T5uz&RKj
z>y;rZToy%>w21U3A#_$Pp~GjR@aKVC%q)+9xutC!Z=4itduR`f3l6inD{qn$?$Yq{
z*k>>o55i}LS%g#7Mg71Y?!xnT_`^V(u4tRiW>v3cAD#9>-#BAN%y{0~s^{d5$^zW%
zQU}w5RoJHq;aE2K4*xyPf;r(=Q7D~AzCJ1k@0ePF)yd~jccmU0_5gH*hhXyiWmtTu
z5w}$Y!snjzIMUXfNIvSsOVj13R=x?>&GTReM|0eorf9HwBEXcqG2DB-M>sIwo@V~u
z&osF<(7s|y-D>8t32AOHF+K=Z{pkXM^coCUA;pp&aim-E8oX(KN{oNyplVwqx3MP;
zRg0Ek?=gRNZdDyBhUN22inFMnycx>xMnKECN>p`m#K2=K(EU&v7M{w)`ES;6m0L==
z$>}9{$-<TYdo#f5KK^~W>K;5;t%CZM*GTV?Cc)d)CEV@Y<<PfxBlr4$GNCt4unsSE
z>QX-+D?_TG@b@jyzOjTA<|U9Tx$)fn#~bNzTRAA7sE0cy1JFFKj{Hw9<Jhau5SEHy
zab^q=zyAc>3#ZUacLTw_Hd^2)xq?isQepn{#33@5aao~?uzpi5SE%H|661L@Z0ZI0
z`R6DRy}S%U$7|3S{{HQtnFfD{8}Y*BC{83}Ey{1mAP;JcVWQ1z!KtQn-1KWMoH~;Q
z7LFO9#P367R=p-B+Eq9|=Pgc;Hf0h4&Qv#NBdm=PV_o4dAz0@ur)8E3?5ZN|Fn$E#
zKeee%-d61RR)rfqWSHVM8&20J9?}fnal<WVtb<b0NL6|p%<Q~RZdJVDgu-SrDDje8
z9(WM8hGk*jOMNQ#lJ7&zkK!s4^=Qv)W7-nnLiQHy#AClkuuyp;JkZYbD)#4+6%{LR
z--?eUTQi8eJ8~9t&nU$kr#I07H)r^jHI0pC5-jf;pF6F|!uO-B;mkM{@VoyOBx0Y!
zT8Ymv;Oa_JC)u#^5>bfVlFZdhhQ3XFg+F}#1PZ1PxLY|n_^{?5Zc%;$Uz}Vyf7#La
z;8Z?j2_BQdAZwPpccCC{R2YUxXW#&zrR%)I@f<;UCcY*brdvqRGT%dj%8?70%bpd~
zV45}N+>RpNGQ-5z?h;B{@z%tY*EoiM-nC}##|eCAXvSh~I#@5x1uj<sf433rv1&3t
zy?h>}tapN{k%nMF-wycioD&=FD24pL{larb$A!jiFW{g)KerN3<&J&uMeS@Ecx<%-
zj%~ONj}NHR@zz`5;XyrC6X3`O?54xZr-39vT!|H07*mJY9;`00pJ>iL4ShI@{&K(1
z?Hggj7V1{QGLv_B)KrKAjn6@8>mPFEb0C=ei3zS;aHc(j@x-`nG`^kJ#C=#f5zlNM
zNjJiKn|Y^4L)NM3>{hfGmFA74Q%alo?v^}V5xX1LYb3#`V|o1NWJYV`PSY!m72wse
z6hfWPvu%fSVd5eSHb+OEtt#k)GK15!c<d-z(sBbUhi+i#MFCcJoade54s?F^L>7NE
z33hF|iVynsa!Q3_?8r=4lB?WLI;Wh$gpgc3b7~wbTRfTt4O+6e7m>I_%b9IG^aSfa
zctW^+1XS<ShcG8Q8|D;3)Yg1Ni*1FlPqCJWrs>00pA7E56R{+Gr6Kz>u>q&XL^6eY
zr*Qt!i`)d$rJP>NO(^btgz}Bz>`u}EymOL-cV9MO(Va`ErfA4&I>)2J`F4I+a|lKh
zy~iiRfr8Fu%Rn>iFtM98g-UDAf*&6zvtrLI)Yvsg=qPH2g*^r=uQwEj!o|6V1-i`P
zksEHb7zq`g>P$V=8u=h4)0{kkrdLS|H{Io}v9*~X`lC~Lpj@1Io(iFU6_+t4M;^Lw
z*~6CViA?5x5Cl74<N`8AWBGh0G?;Z9i@r?7w8v^R?axH&vv(sd(37Cw6v{b=3o>W`
zqiE7UL$I0M#Yv932n!d<v$rSgnB1*!!POtC3_~df92v>>hE$ZkEVzvMKL${G6oFF$
z0ryjTIY>ocfKF{onBmulH3tW=b%qPEc>h?qa%Mb8iT98e!x)lwe;YKcbLF<K)qqSP
z-<NzhhQGru;|iDNVUKGvCv2U;<hPe#!b3Owcu9fPxm&|RqgQAbF3sk88laNy20UyO
zB9QiT$BA%(EB>AUy8@%oe*PS8d){lfbK(;9G~C7HZqI_iwjum`VIF(XABVk5*3c<;
zDp7M&C8PvZbNjbG;6{G~roiV9udn$<tb$TuUv)b=pN++%e*bu;mC*XO$u!!~|CRfz
zT#C)>dN6xRBe|vb8mIfzlWE~U@N!BJckpH?Y}NjVS~k1jtf4$xG$CG~vhjsr%mYpQ
zmEgqr1jW)bHrwIZ7(Qzt7L6hNY`CK!2;Lmw*?l8t5Yyu$VNua_@G^GfEF7&N{KHi|
zxXg?2PZr|8;{cg^x6DST;}Oa!FJRAnBG~>7TC`tv57rd=3)3tnqx`&W<ofSZ>{$Fk
z=$Kj!ts+@?CC`}7aY!pWn{))T6C)sQX#=#2X`%C*dh9Zkpel(xgYw-cq4WqdHg0P%
zxBRm>l>Vc*Ps)cAY1<>Hy_5nQMpO2~Hxk=ko70+gNBEvk6S2J}1EwMsc*nz0(3@-k
za~GxK)F)%uVOLE$K(jz}?s05U-VEUl%i-Ye3D~u!zAW<HRIt?w=e`&%0oj3ZY}T#4
z_-yuk*c{z}t!6EP4+bLa;q*@s=PUzZJ&v?JJ_)`)*n{ES-WYW%1Nz?A;$;s#s>bK1
zD~&Yq^S3QD+*pxr*_?^iV?)Rtn>FOVP7oNy>d})uVN5oQ&q7Jb^E;~toacmto`#!O
zEBwEt;Qz}pHu_&q@WlV&1RF2+H2MEH#-{(5V=S9*z<Vasxr#S>f=5@^U_p`#%6pk%
z_epUigO*(7&qTp?v#)q!X$Q}BHs!Wwo<#52OF+wCjLYSdGl_G<utQt{O`fQ7GV#Yt
zOI$<*_PY}CXZ}AfF3ksfl9JJL(m~J3|38kg`Tuf^r~L2FXZAlo-_+%v=Kn9p*y4XV
z#`_x$S(C(1>khUajnyV{@3|zw5j_{ie}r7g+SzQ|j|uSFVl-SCy2qKX%zz?k-WuZn
zgj3oy9*3*#Nw}v3l~L4S-`iycqg~9Q+c+8*>=dWlR#d_K_dHYS&`SLLco}zC><TEB
z$<u-n@~qR^pXhE|ibqb)6D(BLM@P9QaG)X*KW~?ytCW^8c}oI(3J6uxg^+$|G+ULu
zi-lP6oMO{3Y#x!qP5rWxYcKF310MIe^{)%jI?WRd+Pk@hll29?Wf|OnycUg}`k6#7
z7p24F`JBY8=djD7l^go%i{UTBh4#-CD4SW&CCt-id+ynC=B{=$=7lB3pBH7)m&!1%
zE)Mm#XO>CMH^K_-D*!%$Sd)1V73yVigs~*HmP{qj7v<sQ2o6`wKf>xC1(p4`_$5Yl
zg>n-f#q-bBi;&!|LdCeN<ai2ChoAodI`a|*!D{_rkTV^RO^txDOGPLvn9UlVr=zfR
zB#l;{3tJ~@v!BbGNJP{Ro{FA_4x*tX--TxyBwxkXR`c1p@&>Z+RzBSFwqi@Ko`-};
zFQMDX>)0W>j_ZxkgydWDbjt!)7;SnV9+|1K`2+IoYEC@rdHfK(Nv_6>1XWtU=pGs^
z|Aw(u`>;T_oLemWgZ#~Xh(2+BxOL`A@|M}rOV;Dq)zmJsq<uHB>)DSQ2X=Bkpg|w>
zDM0WgImp=ZiNssIgPBh5oEpywS(h@N?wP}9@h<kEy32d4cq7S-clncDd0rUo%X5JJ
z<>>C|mvG~_Q=G+{R-7)wGcE?EkTpkzpe8no&h!h$qTtV@M<|NstD4B?2%!Hhh{Rhb
zKH>E4RG2XDInl2^0(TCqg_Hg_g(Ir=gZ#SPthlp<9KUKvKfR8{F0)1S=Ic)My8H$_
zM~`M>7fymF{s3PyRQP*98jPPZi)lALLc{q_d7f_uC)T9`2WL*A*PH!Nk4<E6*92jP
za2{0s*@}Prb=cp$sdPz17?kC_fVl6II17vI+{?Up?)35)Hd8^3`5km-ucF*(;^jnP
z>AHRh@csu9*^8N&<2e{77R@Az2eC#&jf(v4z{1;JWZfY{6kRcmh1u4^zwsYHW?CNf
z&E0}O%P(^uN=^uz=z9o#Fa`W(%HmhiqxiH-5yyQpq2r4}06&kz(8^Tk**XZv)nmw(
zd$G{#DND1YIT(6h1wF@`u!l}y@8&3?g}6B;7|YO<f~$hnT~+Y+x-R`_8iLc`sj!P`
z<tXvR67&S$NZy9;MDmvu-F@RddX%Mtjb9op4GBe=TRGsZa2RCHuY@J3ZtUstZ6Ibl
z6T9QmIiHEcXl7^({%O;>w(&27%6Ao*!@n_Xm0vUoIWGz0d?&+!qb-okbF-%uOo53h
z>#(|u&pK~B#WkrJVA$J>HYxWdk;F|FBqpfPzV6GUL`{pDW2}(M6Q=IBiYy+}#|63t
zl5Zn+!HuqZ3|*m&cO)#B;X1;^xL9<VBZsnPlnu`cCo>d35pT@mu8RA^j*B~~*?eQB
z$?rFul?EW?_jlMgw;g(aeItnlJ4o7<43HSF%|Z`0z^^;j<l~&Zw0HA3T6FFtw?s7n
z#ZPGpE4GIclc~O(^??Mmst}<b{36FNQe0qbD31po*5D6*zPg^f4NO!Ihk7*FB~O29
zFlP@o->4wdI)G{I?1$J{t5K%=Vrie(SkCuKBpgkOWbY5JgzGb<;hcUl4vx0q-t9@k
zij9|XpU(qmd!2$#yM!40VLOxf+AZvOvJ->n$iM*o%t;%)1Dz#xXtrsE&5xIF1s3m4
zLyfTt&AS>0QyX6pxsVxj#Ksub!ruee7-ZnL`!_fl!#-TS%8~6nB93P@<8WHiHSWYM
zSFZDuAHH^t#NX;~Q1M(F*O_EYTPL)l+C)uucrValHv`<y-)q0NTT+EpdMLBzJUq(!
zfp6A{VZq}%EPeJK>&w4k)yXuTS*O4jnx??79gDGG*C+VCCyzV3c{Ah}R$*?NKFj~r
z#u-US(urC(adyEh80mMG`)L_X9$Z-rm6O$J7)=7Jdtziq{Sdy}lnt`tjyN+~l%-yM
zjqi7R6D8+Dfrh#S_jRv6`+Tv6Yu|bq?>x1IFAqo1)J={oN9e-_Hb{W$qc@~?<t)}P
zoDJsovQVBmfq8zG;$FOcC#>4c-@^kJ<2a?;@biopj8c*$YddDM88zZ`tqah?=6iU`
zW+59|G#8A2JJ72iw{iW)bp-PkJp*;oR`8C$2<ieuoH4^5rYzyvg46e4vY8jyY~i0t
zYWp}F={YEeK>N8{B*$<poN|5+Z*^_ZEJcqd%uwZaWHj-4w3QH^D-ZU6Bv@^BJKUK1
z0f(laVGoaOhjhhEf%!`lbT!L?z`G)N_uCE5R%r;J<{qxH9|ZMZAK~*Pci0shk9H%E
zp=e|{+*7!UE+tQK^-rGvId~KD+l*Lk?<qlq`eX1gmZ#G-Ojy?F1Y)hsP+`nXjLlBO
z$7Tme+Sxx4w@|Na$(QY<;`@2-{A>sC!N1%AksvtdA;FZ|>d1%1E%;$D1>Z|8f$fpr
z!k;E;f~+9^UCL`dEL!#k%>sMj-~2a%A3a{s9%jrW=1(Q6N@k4hYR4zMLGZ>0#`n2+
zj<uNz`&M`o>TZ+?R;TYHX_?Jr=(R4LaAFo6r!$L*2>5<R`XMx&*Msw)enC?g5w`bN
zG#ROsj?Y$5+)w-9!Nvz<QOznr!krRQDj82+i`c<Y=@Jm=R|)OB9^uC-O<Z7g41ZkY
z(02R@)NSy^$wTHOWP~*<J2sZi335OtdJxAr)f2B~AF$h31v$T3NRiDdcsOr8+goN2
zJ1b2&M-?~xyT1=o$Em`JyPvqoacKw@J*48~I()u18&1sCX4{Ns3Mz9l_{{hnyg6?_
zH@Igc{Ibl1D06>)rWApL`?Z*hZV5QaJw@NKzBcWBPAvGg8reuh`0k_$O~?sAozZg4
zd36FAyzxQ!Q}s7I<KIg%hf48U`80OXDUvi4jij^V6zKAK**NA=Ak1_*gE7Y~+2v8~
zxOPVXETO)TW%LEszxSjwOeE=*&M{=kFohQ{Jn6e1tr%AoOa{X4!PE$Q6oX2b`$dJO
z>GHV{Z)y6E&qV&1vWlfHbOJr+xm?%HjZ}BP8rjfh$TT*X<DmZou35H`csD2D-@SJr
z`S4G03-)Ffi3t#9KLmE?!r;N~E2xm1h?XbM;-7}+IHoFtn_Mx2t#LU`4sPznt=r9b
zAHWv$GRuJI%c*F>8>BkcZ6qt~Oi*r=G<_dZLKYpB!s3)ykgb@7qkpTip!Gu-dMp>s
zn>(@T)Em-$@F~VA&x7cUKCWxMEuC{_JXK27q`uy6Wm6C9QmwKI&_2=3eN}X!NAEs{
zRrY-M-0CB2$PuCAc_y_-O$seBP@&h}^LFIoOL)M|4?^UZ;FJ;P;BvGwvpYMA{sjpp
zzCITe_nyW;iwVTMTaQ**|KRq`_2nd@Ux7<)8=lNC;T-@0IML(^h`oJ<{#JQdCtg9U
z#^#e>UnFVx`RnkuWDv{xHTmkF9(^Eb!=#GefnR<C4pf`L@RpzCmNDgisCVG0?rgmI
zPy;kyjbXa6-Nf0~5;mFYz@`Q97?~l2H*vY>rL>&IdJW^pR0oXITFQE@OTprnbJ_R(
z^I5ByGsmNiaKoYx;51FZGLj|meeZs#Vz=@7qop`J;t03fvIN#I$iu&?Bbdm_G8k{l
zz^5<+F5ozLYxPCwAa6%@crK@UQhRZPA#ZfsCjmiemh6t*R@%G3fqKpN!8<#g(V_GS
zggC3v+rC`_uZ@=>%%lj^4#&ghg{?T7)q}-{pD@zGh|2KBrh)qLEdOpOc$|)gp-Un(
zp=6LaWxc@Mz=hcN<BFi5GglB5ydI67y}+k65t!n%pB|Pui#bPE<7>}ETvkmm?9x_}
zfAA7)J!+4o2~+Wn^JT0ySk5)6hY<IPGOTmWL70=al-^!d3#QQrG2(@e;L5F9;f&oD
zm|ma@;(z<$(iSoLa>ZtLRCgSG_vIn}Gf@DWB0XmFM-+}^=E8q99DbiMgQmRD!nT+n
z*uQNJnvDw<+#BDGPd<Mqsz!SF&Ow9Km8SEorb>`<Z^sK}{JCc^hn;se!`g!rSO`C}
z(SKLOJrftBr@ALm2c9j!=kHm{OuqA;H-rl2>zMcPIvgi*kKg4C;FDBkb~4_TC6xK_
zOfEH=Sf@j66-=p?eI)E}OT<e?9iVjS9a#MKB@G^zv2eyhCTca7soycfG7)<=KOmoc
z9Qp;z<W|$!IyPYMqkvj+a?DR;fV{oA87j?2Fy-YO9{ZyO^5su(o%3Xj;-%YP50501
zUaAtq+CG>pt51jg8!+X~Bd##75sfay351zuB*c2MplWI`d8@<sma4KLaLZhXHkge+
zhpgDk`5FBA#(P7mXVAu|Equ=@l6+p@$~kOMV8RQzP*-fmhMk__pY`U@UswqT^UrXe
zi$v)nzf*#ev+^vnnK$u;NwJ*44q`A;k2ESba0YS@p>|^oID|9^#{Sd*-x0r|b=VkQ
zx~*Y^=7Yz^Y(cinC$6=`5!i$t*f7f)j%m;14AYF+k{L3vL+Lu?ACQ4#{I(=QZZ~_T
z6Ap%sW^CuiDDH97LoR3MWIAm~mY$>UAg^FATW%W2XC8%^Ti*$_H&23n`!%dQ9m}~~
z7$DtYvmw{67nde@k>9z^f~yMep?_-t=;)Zkh1g)Q+NZ<K?B`9QcK`6Js*vQx{l>eu
zOsIbSA6%vpiG|Onu+f`82r?f=fp<t1H%mDW#~NQkcV9ysULuZi?z&i*)C*#qCR=XD
z+scjBVd{o1o0}sq;atN7@Iby7U&~LyEgvS(_+2x=<hBPW^)|wz*$>g}y_`TfnB#6N
z<9!k9bzt$|<q+(^_bh}v@LGW~egCG7D|*85{rW*N%S@3i8216}P4~04CUsm~^e0jw
zJc1|Ie}h@QTWD3g4y@$U5WeIt*WIjw&kcC{+tfQ8u^fesToz{%*NVEU_d|NtL}ouD
z7;WkwgJYyNm&JG5+#mA!^CQT$d#(j-y+HE1)DMjFU%-g>WyJ8vci5m=Or-eVzbk*Y
z-{&x%z0eQg&e@J(rG7Q=I9?4i4|S2uIR=pIR*P?a^iU8Vfz!{H3lo3HG8y}++_srs
ztZ0o7;=mkuvE~TN?r_ID<_@HHXAg`q-biGYSYc0;1$<iPOTsk=@vnmxEu8HPYj(fl
z9{yZ__w6cRUivruXycFU?I^nU5U?ZH6Tv$5HYh|*pb&dUpmZsQ1O#hx^X?QwU@f5H
z)Fent&%_5jr}*)|UDS2VJBSOP%l=jgakH!|n^-ax<2?k>|J<J#4VB}>k0v<Ap%yo)
zaS(ZX0^IxQ2@f=8fQG<Wc)u@)1b%tTXU;!>zDW{f->E>&ECXgUQJ=jH4}l%#=Y_YP
z%A@R0{(WM>NUEnXlVuimVuk8;aIm+*5&y}N{ePp$EAvp4iqGWss;kgi>1zVBM?W#_
zP$uNuSp};OFQZFuiQ%u|Oi0`Q2tB;|$m)MD;ll<wei!`%YlD~3_B0uIX<LP77pv1+
z<K=kz&LB)Ut;psJU%=<qwXCIHnmSG%<Zc=HaWB7$vVVPi&L-A`t@pFS@jP2S)8iqU
z1*}GS@owSBQ)+DE&h^~ml5)=Tkq#|eITl-&X5*C|Z?I{}Sva{Y4Pt#2se_{uE#vdY
zE)hoPcB+WGGWR+cR}v+3S~mu7PreD;7xTOyzg&#j6%RL8e-v)e_(aye>gP;H>2L+N
zKXJ=OC&HrYYg~cC7tk@O<t^e}5Lgt4;YU9Tuh&(Qzh<Q{b*45`Pdfzh!bdpejv;wy
znoB&NoJP5blkmH|5lkoSC!><e5S&I+$(esqJ!lzT_~Os5EZYp;zn<X+C3CFw7){=u
znt@gMmh{oOFOW6&G={97!BuY(WqTLK!D5wVn0m(t?g*>NlI0@U5!FierB*}ZjV`EL
z9SUK}2Wewy9Bw`A#2&0XLtY)8LA}j8Va|+mxUN?MPfFxrtgb0Ju{ssM7LJ91Z$7-&
z$Bl)o5b&JN2q=@DFTC(|BE43bQx>Fu6Lto~VXOKK{6I!=XFp5QTMcz&b#^TrnJdAj
zM}C1nqaNX@|M;zm_eXd<Wf~V8>dzgy?#%rAW^>J45SnFavl~HfIMpJ6OA?c&E_{Z3
z(dy^8a7zp=UaZeV<)&~e-xh()LZ08&E6H9u%dntvqsTG7&*axS0P&tvQ0j9o@jX3=
zmQj-Q^T0>w*wTQak~hc{o~^j!v=!See;TujC27vAt9a2~1vDgv;g-T2^iCFI_s^ZC
zI(=i=0k<aZcHb?5Z<Z|4`ICVMyO*-+jB(t;i=mh_y#l8mRmEjmWiU6;8q*!V<E^jS
zAT!|}PSBjfStk;>p`QijS#BU$F^k1znZSj(y_7bX(;&DBZw4*sCV6WdyMGEU=XX27
zhYg{Yx`UiMU|{4l`fL`5Z|l!;+hgC8eEE-pG`j}0_t}B(9d`4Xq^sQ6l_O~SP!%>9
z1|gfP!?rfN;jgq6(ED~Ky&V)t*6Hs>%hK)m#@+|V?Qp^;wU6P+yv3wNWCu;RkfBAd
z2$TKE`>gn^jF!&}o1U3AV77;Mcl^i!-+&G*yYn4CDda)NTXhIhHeq>{pEyJJXM+3s
zZ(*ib6LbwVL)Wku&eCgv_Vv>2!#P7-)U85e`<KxhHb%5;PYb-B*^2FZd6p%oj<4cZ
z;<X*}%s0-GS>2e+>YF^+(sofU`;i@V{HF(wYjQy~L4*b@6<~S9O1{$&4KhUxJF6FR
zUcVPp#~?pAJ4cjdhdtxYKA(#<{p!Gq$3nGC0huD?dwD$DEhj$^-W(nT(FhxuC$vKE
z6KX_TbREg5c4E)0Z=pu%C*kr%vslm*8Fo!I0^bFl!&gJ<V1AUJmurorj;pRhoc?W?
z6?G4XT+C?K>}oh(ug0*bnKQZ=hS_J=!$ogd*16q~_DJxY>AD)Se|jhuXKR36$uM+p
zL;O}hkLeHo#Gxe;Y`^GB7#w4dQkPaUgD`b2`+S8^Jx`kTt0*z~<z|etZwB#&E@ZOj
z5}Y|44!htXeiK~>f2Q7s70Kgpb?q;>o*96{uNSbT^8F-PMTc3Zcfk8?BDnqVV^~}r
z4KI>{@T>ndI3cErDP9tMJ~RTF?hN3kL0jG=>w{a5w}HtuRmM}~*+O!W(@+zqm%{Et
z+9WY>neY|L9Ny#j<aTiLIfcR7*0VroJ)-vA2LlySpncYP&L(I+xJ&%Qn{o;8xqc-*
z{UcU*TIoM(c1oT#mixgI(Ey=M{2ux=w+++|F2@vYHRj{w2c>E=1k=i|a0|+uAm(5V
zB+l``oTvyWnsymYA|vojVFfD2DX`Y}=fQkY2SiO7&8F355}ywlWYVMG*cV=c|E+yq
zCS`pCJ9j8SW8qZ#W>h9wVv<Z+UL@nH-b)ZV)Qf?8LqV+SF{Y0H2t!W80_W`^B>3KX
zn0-`{+5H#J6h<EA-n6xYsgyaq1EVs-YcXW>uVZ|#jsJ`zu7RA+W8qqRN0#LBhbvw?
zf=0{4VA9Jlrns__6stdl2U|nA))nt@uA7Bm!-E5OLUIxG3!QLw4{v3j*IAbE%N&QY
z$8c&xmUPmYkr4hxge~dRhe?r{IBrxPY!jc#TH2&Zvesj8+JBOByV#AT*Aq~%A_ko+
zX2E&he0<JhE%z#qqR;mmP|>)Pm20#Ku6-85VYx>TcT$r5-Q15w`6Fmx<z}w+Oc)-X
zIg`D(W=M-x-9@=9J2vgpW)OL_3Kx$p=k$uo@ZT|A$SSBKKWZ+3(XSsw>Lz76o(i-o
z_#(=`?I&UC%b3rv>9l@@C$83#Wo2>h;B-uy8cj%o@h|=(cLy5bfcpV*dN=Pns=o*`
zz4}1Qj?YDWJi;Vyo#MF}8gOs9HNh9fcxIObz1cd1>n`2p(p)6z_tz@8a&iojyIu;%
zd;;*B6mM9!h=db(pWj?gCQtwKgo5~Lfk?C_tocEpd%7BKaoxt`d~=}U!gdZdE`wm^
zGA!M%Nm_?|u_IU(cV|}PhSK?1pmzsSszvC9^WGS}=AA&hvl)}(Cg5ffeR%QjD*lV)
z`GUrcu<U{y&&zEmpZ?Velx`Z5aRut&Gc5>{%4_gfvmws{lc!han$q#F`-Mw$yHWk2
z30#U_gm2#FarC1)y*}9jCSCSNzuN^^(yPlpw0wuz*UGuKhyGyVl9NPdlO~-rSr%`<
z`3)5fiZD4rp62nh-Q|&$;IZK`Hh%BHb@j^3@>_?X+x9xT_iuu^@++v_yfe6heBg!~
zm-E@1iEIJxXSbtYkfs-Yuzl-i>>7w+B^L^zuILcVtWl!}q@~#Dw?%khi#IG5Or;Oc
z&1R;yYxvnjA{_nxj9WCGcUFCh;x4y8L7mO%c;)^%IyPn{uA1jSum6^YMH_mt>oP)n
z+)6HEawFFhrvVf4|B&_>v*}Z#QWUQbfi30&rl?ng=2mj7uVx-wJL)%@M&x0rlPGi4
zuORKV8%bet7lw|vqV`iV@ZH#3@I&*0P`RrTPx$cPKNlIA)@VVmiAmuVGZ*?c>mXL|
z{EWK}@O?5zO;qXq#+?<NM9VA!VU3dl`#QQ28q}^p+37rx2vNiX+iZm%Yn|xV2)@&}
zkN2&F@P}ycZ*J~^2-H`*$O$(F!!MKDP+1v;S+BncHuM{DwhIiA_6FmL=erP}grH?x
z8VOu!NAg2%W8)MvaC~!}+w<TeiuLgCu9qzsCiY`Bt>He-nTe;y7=g$p3p!6(r))&@
zdGs2{z@o!i?5Uazbx6O7gNG|2@6aC@TN{CIySFo$dwbErz?z%uqDZfd&}KDSQgrri
z23}JhV+DU#d#IYqg@=SwQTG#6RCX?l+~mhSKV?GEJC=V|s)GEm3EoOhL%&C=ETDrE
zM)d6E{T4|qVb*%|iywnhDt~ba-?c1$H;6i2kx))_=&P%~==${sz8lB)yU*TW3vESM
zuVxkbqwLIc11wp?h5^ja?#9a!Etr`12IHlwU}AeCj(9o`&-ffiRo<Gc8nG06o5BRr
zliq`tZ4LNt=UM3asc=GK1}I4kk)r`+f?bstxy^j;TWW6=kLWO^*VXO|M+clGLvj`1
z{#2C~JSf7iC5`aZM1`7YNaCrk7K~v!Y~?gJl79XqjIHy8&Uz8y-bIs`O7?i}AV{(2
z`+0}^-bSpFe~4Y4YVcRWfF^`llc0e*azZo`78!R6V<L;O!GQ1T%sCEMbvp5CS2gdZ
zTn<((zevi*IcTYwjy>wz*@x;abVB<URNve!h@bZm+IMMy?F>pbJ^|ROa1f0}Hn7W`
zitLW-Gcy0W4?O-)4?pv)oW8mUp4n1B#-~UzHUb6L+A1K{yb_Q6{3RUP-wUUA$fMK~
zQ`)kqL?BX@g5Q2fp_1SMclyC<Zn2{_ljP@gau<^E89NP0-QBQzloJ2$QUVhEKC38v
z7ap+G;u(yZs1$z_=1aJ79x}bC__G2==uD=TzZTF{sn^NAYg@VI8#i$qzu#J<e}yMc
zG?CHUrJ23QOgw3qgYljtq4eZvpiW-gM?N>+pgN9{lyR&kejdAa{vNjT?_o{)t+-?Q
zDOj2-%i^B}gF%2HlD&yIX^sLT2QtyMb3dl0l%Vs^bnZ`OjbM*{8$LBnM{93>w!F>|
zJ}<wC!I_^yJpL|b{gI^Sj|5^<jUl@GSTOA)36S^a3MuO1bGsjM%aT1LX%V?>v$G%t
z7p;|pb?en=igpRvYY`wwUiS;ewW`p?<vT!9>>UJk^DZGZJ?6G72A+N2!XRl1^ZjH8
z+ei>5=FGzlhRZ?n#y6-)oB;WUy>R+&J^H=!ASdcwPpsdmv7)h*z2f^zU+xpG9>3v<
z>1pu$!oJeSkG-+Fobq$_iNXz)j#%mU9=C7JL#=O8%*WUmUrc(-Wj%UH%wBp6j;Y8q
z1C?9YUU(i0e7nkEk|pFc3dyeMdb~KH8+s;6F<ZXRne}fJPRi@UeJQ1IXhAz!u`UC;
zK1So@Z7X2)BEH|imNHpaU6dX%5yU@l#>vTcOwwo^&nkTd!FMc~nZXEldzlZXal4jh
zdUBvUNsWD>)%fkq8MyW>1Rfj|2W2KlGva?@$Si$)a<YcoF?BkdyuK1&9aO@vW8_%X
zpL(u2ya>jwkY!_ODO$y9b8gENSXGNWJ@IlK8kD~x)|OqE7Lg5cM^fPHYJJE|RS_5+
z_D0uqRklYM%JqCGfFZ5Xxaxa^U@5l`|9Hydxib$jDQE*MI?3NzuKa>86V#wfJ`?LN
zG(pt0CwyMM1>~k1QH4{ksPRCZz`0Sh&5X|pXNAxwuaY4^Ta_IgD@!Ylq98w0lHD6?
z#1tk35kHR4@)+Iclow54H4@g?c5Mg7+TMrLQj_VJb{p2B@PcbHXu<<c571RL5qve%
zxCFH`SjV5qGKN<%+dK$_$;xOlTZ`rj`Mu0r6BZEW$1_xJqEnMPtF@D1W{WjptJyk`
zJiZgd>#HIDOB5~{^MvbFt^;Z+!{r~;hpJIu@ZWC>s+yxg4O>%CuyZ<-F@A`5%Ga<a
zg%L2Wayc%3mWpLP3b4>K9wRiqgQfT}sNU;An)fO}&n8Q5hsaZW*_HrPR~|vNvr5pP
zz88DqmGJ5cP5Ax%I;3s*iK&wY1#i=RNu$eJbhGUOdBX@u{;wSUJ5JO2dzI;d-7gU~
zJi`lVT8ygbG9$Ays3~Q@ynjbQkdHN%XWPL|9X=D*@(e_s22sgM5+ge`ai#nTZflky
z*~Zz!`M;tp@R&M!i6-IhiLz|uCQq`_=rJrgk_tL)57D*7h?Tn%`eyPo9NTdP8b>AI
zul!25@<Rb$Jhc+OY6wJ0o_(Ou%+Gw!)}vtKS$sawg<(b2SaM<&ip}3n<@cI%TBVIB
z>U9T>d2D7*{<_pwF-g$BaxB{(<VAf4cXN~SY|FTp#&G7AJkPlvhVe(_@puQrsr!x5
zYIh2DwqAzZQ_1Ky;v2Zm{)juYO(8CgXMPrY5@Xfru>VFPPXF^1OeMO=Y4d*UtJ9;?
zUTkFrYYxGc>p`4!{Z~9Q>`vA`9nUUo(q^85P?-DOmv~o*;3tupuynW;KHEIQ+|)_T
zb+jV;c=HX~-o6jk>KaU<pJ%b8AXKL+poha~E_6{Z&Pzx_@oOS<+@chrUj)B3FOMZQ
zW|qJovSi=K`+_;llo==@jEs^4FZ;((bR+;ZPNYIz)HP6lt_a3+6&c2Gxc%o7c(yD-
zc<+TjjLvO>J~{plku-<zwDT@*@(Ma`x8w9Q2mHsbL3Y-CSn+%sTyJdV91kK|ZP8*=
zUx<Qmw?FsQ{xEYKugQwa^6|~K*ZAKk-ZdE`!fEqfA=!}UxMrk~1l}zM<qL=Km2N-q
z*C{q`c{~YIZh-V6aagpnllUCU<4iJt<CY`hH0b$nvP4RnY1SS9qrC^ADasjHbTqn5
z_b0PH{sVaj72$ymJb!n_dwiWyE^NxEfi8C$rqUcow3=rN_ZEJ`Jp&uq*V*B)?qoBu
zKeiaV4mabP7E_v>YlQW6{JBwQMa%6sa|=GL$7gNHFz9|B`#(p)&Zrg}PrWUunh-}^
zwx7dExv@~JX$z1t3^_MvL3z0bEA7*OpbJqjd!Pk2jr}J46mXd+tvrPJ-ZO#j9tLH;
zk6?I87vA2R%)TBe!{SPwg&I1SnGt2Cxb_9V54tVfu_O_<$$P=G(PC_x?kgl$g5YqD
zB}5Ju;}Lf+7(Y}^<c^M|4L9WJ-@${B`ppd^bTS2t*KcF3*TtFM;x4j1i|1l%NTZG3
z2$Z%ErT-KPVCmY?aO=Ajb@P!1KC?yE-uHpR*#YFbNjr2*D1;Hb+oo2;5_8n;@VfeG
zXk2Ci*X8eFagaFc?eNFhm&UMa%lB~owHS?(=l`!khj8bDTypQj5Lds@nbuwsklP+7
zVf4}huEI?e#fS4@Ro6&xeX*ZrL@Z?n$!<`v@E-pAb4+l#N({<6havUgAlR$Sq78i&
zP%zdP{x)ypwm;BhE8<gN@s~lqTRDn0I9Fiv)$c^QJ(k;+osC*=o}%;P23+^*BC$+W
zU>%n)KzD8ixUSB^Kqp2-%_4;T)z=`?av|PY@C3DE<1p8acXd28AP@MST4weH{B4(y
zF4rGGL5>-&Y*`HT>-e1NK396txdgRr?dZ7{cb;|n8lL<af>%#HIOXx}WO$oAl~z5&
zea)Rjhf1``HlrkX;5L@+4>n~VFJ9#KG@Jum5x}+ie}c5$MKE9y1IOn6AtOR#U`*Rg
z_#NFYpcCFfpx9Zs7;_E{2Or^Id0#jgdz!e6UCS2zXNhZfK0`S#C0OzD7CI!i2o-1Y
zti7W*;hx)F{GL}X@M-IV(DRdM?PyO7*k_DV4i9nF6g7I+>jMt=7|_@SNVG-XqP>|r
z9@a?&|0h*s+W~&JX+YrO{WB1GwV%^+@8(EOI||Jxw3NEzh*nkMJ<UMKJUp3}M1Mw;
z`Gyev^*B8GnZYwU{$sza&cieL5NxDRu*!1<C^<>taS0h*wf`~hN_`Jvh0D2F-_+UV
z9wkWruE(<f_LKgIc2J16#BZM(u)(>9Om`bazqjwimm6%!lm$1Tf2S!wk1W9APJc9a
zJjJPId?h+zk72CMC@K>B0GG8Ug5}^<D1O}tGy0UcE?q0e$xovC-ZQCFjtGj3^TL=4
zMOqO*87>A8dM?TgC8kwz1F~hfILnl!Wyn(F-Wkm2m=hP0sl_RO(PZ7>Z@3tl_hi~s
zS$a@D89#5T!CIbqofKEZHQOP1m=3f&FcfY)(x!)R+Cpkn8}2WxBkq?&Q0in94&I8z
zbDIb#eIyuhlm8n$yabzOuA}FN_d&E@B{~L-!rU?eXdf}fp(uT{dN7V1mgC(l+t#qZ
z5$6P*PmP)DBPr%~RUOP0xRGz#F)Yzl71gC}*_?18??+Z<N5dCD{`pEAWj+9w3jJXD
zj%P}X_LKNFU3QV*|Jv+c#LZe;h_jwgLHl(h*-xhebe}B>y5sMH|KDoRUOxn<Y%k-;
zgQ3v%a|4M(N9eK*151nbpkO<SROhI%^pz8t)v@WM@W52ge|II<EL});jMAZ{TTFRp
z)=m7>Ujo5fW(wt+E!Y^BTIgM)2M2z9!d|^fEJ(NkXQ#Q)+jq}kUc+r%V-(AY>3zoW
z$CU*x$(u>dxDe*NE0(zZy9P_+X9x~-e&Oz%HKg~%&hxn*SJ)soo6JxDgD1{>6DH-p
z!1J3{u*IuyV%C%0FyZL}<lf!p{;4Jl%V+1}!M!3NGk*jX+GldQGZJ9<)=hNSC&TmX
zHnUff53#X#9^WVB8P&fHV0STp&*%A~C&Qm}3x>3yF8DUs`uqVEcNd(yOO|m{?nBJq
zx8&Jn2TD)z+1`<Ru>7$J*d3e2eS_yXFPi7vz59rh)(OEzr_h?L{R7Ge6NK8u|KQU@
z2^zEf3Kw(29i`>I;+3baO!I4*@Mk<?DL*GYBkqYu9aDsn5|@B@Tmrv0YWVM#C?jbj
z@SL+DY_oM@X(=M?nY=Ey=BPic(M^NV!hASxuz>9{E639*qMX+8QTW~I0DA6`BF~&M
zp~*p<x__<1Pj|MFZM<JuILjM%+VO1c*s;tsmw(s%CW0F^MA;giy)Ri54)=eD5i%Ue
zjU37M!S(OL%Q6uf__|r3^R}`~)#V6u%&fvayL4$<Kp!-Ir~{9oX4G|FhbJTJ(0SfY
z6sG40zT6pxj-8=&q+bV&GqI#kjCj^lLK?T1&wVV*SjfCR`$%l`GP?P=Ih*t1AEpd@
z3%`oq#Nst3bc?bWy|K=k$p%QW@RgVGX-7YP3aaBW&tK-0`HtVl+77P!>lPR)%R$}S
zK2UjCf{WdBA0^Y@kR$PV2p!*G#D{A9wZ=p+v0a0uUUDIAeE&jq<9D1M8;lySC(un>
zBxuhZ9eOT)7XF;G8}vl4a0)wW!7eL=B;Jsq|8_*e<4b=8e_s2+1}#UFIJXV=@iU|^
zmWs5iR)y-9+yc|5G90WMTULJX5axM{vL#<jadWI0crCq#ZE+MO5_8~tb{IC;9Yo9F
zWNz~^F)V5d#<;Pecp_mmV^8&j)?MPX$y<u)dDRO7j)^mq44xIctppH4xSonQp07HD
zj%Je7o6pL>eCvQ8vd`i1zIV`W_Mh;FkuTfK=S~ky+*P(jIgW^3iiBGt)3N1i5sufL
z%_c`w3%4Iug&PeD)ZqAhSS2!@OBRf#*^<|xs4NSGht8Ae1G6!2nCHqqx{RY7av)On
zGIF|lEGAQgUMhG;o|@`&vKBIoTj7iC+s3ey3v`*1lnB%K9*$Q7|AVDlV*~+~W^5t4
zuv5a{xcNXHIw}rv(bJV#)h2QJRZ<joskA|aW&v5xpB0ac_1TW7#Z39k2x`E4Z+3+B
z<92GwGK1&X9Qu&P_f0HWt<7DO>Rp9LZiH}Q9ape*wFlJ~O4H|=Cvd~kO7uudg8F;1
zxViqaKzS_x?`c(`^`y(VWzR~Mcj*Ihe9*wf9eoG6#WwiP#1RvHIYC3lUGVVv4fjUN
zQmz2F`7e_2#~TB@m=%sr4yBx<tTXMOqRqbkvZBG-tMFj-QFs=c0baUyv8;U;=<PU*
zuhZY-zfLDi{1*cjhDP9>wIAHO(@}gg&sDhl8b>I6z|nP^V3K+e6s&F}`}!`FWt!hc
zugMiinywMI^S9yoif_W#3m3w7-B!>uZ^Da7!O;6uiq;<y#SKL-;gE_w`**t>E(vGD
z=`H`zk)OMMexOMsC%nd`YJpr~feNy~F8r&16MKFLp~@i_tBVrhVqiPB=w~Xjp-$MK
zb4HLc&zH2So<`qe-@x;NDLwgs_tl^_Y>o;nduQ5;T@lO35k5OodsUvgo>OPD=kWWp
z-L>#rqneD8pfqRb6zYZZJiLiQoE6Rc4SC<~YQ<vYJ}$>6RGK;V6+vI}UT$-CJU;PQ
z#;Pukg+og(kyrOdvfEZ#R3l_OlQ6I4isp2}^1Y%gzhn$O{b?H3q%VO>{H~C^^a0fH
zhV+fPY$!7rLLOfSD@R${)MP;?6wYPGey`^~pA$g(PX`$BQWE23Vo}?B8ui=Qiwop{
z??zK%f;!+NpTd>KEM!Lv=8$>358~?^6AZ9SWMS@ms9B~nEO>Jh_Pti3$+IP>a(yeF
zkR1msJs6HWw}wjTT~snklr9h52nNrKu{XeitZ<Z}hR5XS_uBRF@5dC_r2Gb6yFUZH
zy)U6<va0oP+(~M%D+cd9@y9O>Q<>&FKfIQC7Q#=QfNqnWWhuv=Vp9rXZ~1epqez5n
ziZEdIJY(;vLq9|mOQA`^T55V<g(|l4?(}D`Fluxt3G@F3CHBqe!+*D=<1_Ho8bzl6
zOPbYcM8miR4K_mmmspmPg?TkztmnQuzIfQojSCUN;3^-kcOB2X5;up}kJiC}aT<6O
zdZ3R&5{e~!5F{U62Qi<{!1l3i_@-kF`<XF=CjJfQ?#hS3lK;-Y&F=}Qb>Eo9yhy{$
zI%^!Mo6q-x{o$0xRGfY{l6>3fZ{vI*2*dBEV_U2P@~mkXb{DnDbiWH1XYQwy)Yfy}
zo;+)<$_C;C9)X6@R$R^JGY)?C;Z`-R;Z90xF_A}i@x?qRJf7RnjX9AG|1~9Imh=Ed
ze=g={11i{@RBw}UZ#iD+i6jfn6@<6u=i!W0TNp7W3ZFP=;gYZ|I7)0Ht8@0|^IyjB
zT{aPivl6U~r%K@oa0kJ{sY127ILPlm$-0VHGD|srWq7`p8>8LI)yWy6-JbQ#%jOKZ
zd{v!2so9T#@-3vXI*85DR|5B~novAtn3HH!042r87`5jyNIxD;&Gf`*ftebOxHFoq
z9vg`DzH>>etvI!<olgoIYVh?`e^i_u1s}HL!>gn&91=W(z>Nt434cBI<!3$YNRdKp
zXaTF>P}qLY8^@SUhvAlQ!gcFJvEk`bW{6g_MN*nQ%32K(%42EyO3EIL-N-chlHswM
zGn1L}iuefqN&ie+V&7*~W?>!(TKNWa?EbY_&$A@=J>^|CZ3jrI&0In6)=+HT@tpUv
zRl(r}YHYE`AV*aP@YN>1=dpe&Q}mcar#}n<iD@(0((iKcIxPaOyyJHve3rI<PA}2=
zYEBOwjS)oss^k8)KjR`Z4OvXN1ntz@#C-T1nD&h_%v_j<9$k$P_4p|qd6FTh(A8u%
zu|qg?<PRB~B|`)M0my7VWOM$QHti%WU}W`J5L|o#y%%-E6xs7A5--7|7H6aO!A;yR
z)xDg@?nUgq^%HcsQ2`tO&S$1(63pVoU7phvi07{yW*0Y4V9kZkamZ2^{?3vHhXNg*
zv8Ri$Y7cj7<$w5pxH|8DF5mF|o7rS1BO_UvMR?ukWt1dINh&ELQc7>Mv}Ki2h)_yO
zLc_@Ry3b3bB}GX^nGGr}?WDf<=ll46e);|f_k;U&U*~n4$MHN~y;CRG(lp52W&iAN
z3BSkFvQhAG-53@IHj)1(2~qcEInwL3nnYgrz^z(Rz~+p?K4B{|{nsv7yniE{ED)w8
zpFJV}p9jwIdknKex%;BVGUk=vPpmCih7aOCg2%84oXa>5YmK>Fc^t>nSP3YllgKs>
zoa1eEKZrxus^NT=EZr^Uhz~bh#++r!oQ5!qk=i{0qJ7-BexZ%<Fs_c>_jw1rZ%AUb
zQuN@`{%U+SAWM(>$kW=jgRuChEM8mCh^^N3Ox2oK7~UyC9Ju#JOIMtU&j>^Z^$biO
z%ZK?Qo5`CrYiir>LoN-(pq9fysLfXa+qa7J)CG=XP@qU&J>i3iR0iDJmcq8*Uq-%a
zWW)VVAu^iy1q2rOLGyv-jP@iO&{%f>LSy6LPSst8DBj|j7=uia$~8PY=!MHumf@kh
zWf1CGjqA4PgP8gNjyd~*Y&DNQ+I9-8c0Xq3Ck{aM`z*}YFogoec6?AikDh4VN|o9w
zz#-^32A=C+zP^d$rAGC^Z#50NOIx1CN}Yj2PZxq;+7_%hCP0INdN4?l^MrdhV@s1F
z=_!!GxFYWU?ViZZ-&%B9kRzS;Oo{HRT!!O&Gr(@LDY^eP8WwNdPi-^P_(gjr<Nbve
zC_UdAv)ep*YPMfM&hIKuiu<N4xb&$0>8LS0+X=8PNdSTiIezv)A|$TTphf2c;M5Ci
zx++DS80u%zZqFRNFB(cO2X29voKJ#1y9MOgbXc=A8DIZ<%+5OuAZ9uYy+(&1(pZ#!
zxZldYI2sOb_E&&riyVwR=)*N(d+^qse7KbQ5*|%{!@hBmp+PS`u}WSo*ehAYJUBm!
zYI{!OkJviq&srst-n5_gXNXV{_5~CN$KYhaO!%@-4PJc628jt$FxXHB%9jOE`Z?#L
zKD`JOWYeL3>wFT#Z$~z(1KyvPrAj)X@V=-UmL=u!1ix;<E1Op_^9sd@zQ;=}jx2(T
zLLKrrVFSE*c9QiT7{#FSzo4S{86WUV8CQkPthjJ1yU}MamM^!W?mx>oJ>(Sz2U-#@
z1%0}yT!Hy9kjU}Qo7h`xWl(RUF4N*O%(exlqk-TW8mgj-Tl<q?+nT9VTv!`B_eC*p
zMTF_!G9Pfh{s+ttE+#jIBtU0*CR=?m8J(kB;L^QaZ00RVBF{>Zn1rclH2oM|UlK!Z
zJH_IBwK?`L(;aztL;s+j)J=S{qMMmNu!QxkDP@G*!a>IOBgeBh!$rwrRQ+EE=M~Wb
zzr(<|E}TyfXs6=~iPijb#;Fh=BnAdCVRS^tnSP%j%k@ApxI|uz{o1vUNn61&mK_sd
z^8EuiH}naZ?`gsC-zG$$Rul7;pRmezIL^=oSGwpk!kz1H@nF|ASoyq-b?J?ULLD8(
zg0z9z1|At)9E1OU-@%$Ae=+5R2CN+Og%4{)XmN=&HGd;au4fIRqR|)T;NO#g8S>=R
z_4n+m$^g*4y_+tJmZF<D{XrjY@qde#!Mj{#nz!o%LyeZ;>|4Gx@S6oq>3z(%^qz<x
z<%e)>kva`;FUBXu|KaMuGw^<(3|(f~l8zB63WExGX3Ax@<qpTENmizFy<Onqz;tR@
zJ(U(Zm4kc!H@2(Ail+Rnhd@CG+PSO|P88UK&ANBsr4$LH)A;O%GN54@ArPbAfnBbS
zbXiR$yK-wHJZRs@D9&kxU$1?@yzUdEMv9aFw9^<p7V-6zV5~Yag)~bqz==BhLE=w1
zxf)F9LqlXYnjD5Bcl3$WG;b<j8HY0S8gaiv7Bm@tWF5DxBa7Z1MYs8#nEkUE=I49h
zJF}-OH9ZNNxqF$^=ku`W)gTJBoP^a!(;4Mv6{@u18e-l<l)veW(|Y@0hr<J=ieooC
z2^(NS&fElp{2SoE?k&t^&x7AN0cZ?p;j0+-vm1^Rs&uLdkJb#s-OX1a`VnRCDrK`v
z3+M5@6}WqWuNBFU<`^taYjIHMJJbZs1B0dtc=UnG#wWyZ48Lfk72V(<=1zCod((FF
zPq4Qyk=JhUi+PqR!w3(1Gfr=~j#+MYDadbUTcc*uJ<JGfT9LvgAIV{)?n%={ir(1j
zolfJDHloMO6-1oNUtUqsBr|69!g+}lSU0gABs*0x_fj<M2#A3Gt0gEuF^%!EQe-9k
zU&1@@x%ek<B1)D#gFr3^Eh2TBJ?xUnOUhM6pY2ycZomXR!aT`K{TTjP2SK{VC=(MU
z($U^|5f(1~j`Twpx*zI<Yud?dF44uRAYqE*Z<&iPzG0Md5Vl2+vj43-h~I=0@c!f>
zG+eZw=WpDG9qkL*xh4W+OXy!_oF5Ntha=35k7i1TO4;ZeN@V-D7Vd62i`GmKA`P~u
z@Zf?b_NSU2K6dtGx9zV4Y0C`|e$fmMeOpL+5~NA)2_D|vTZ`F;F5ysvC(N(j1e!PR
zV#I-qxMe$w55M-|919<u9B~1LYg^dL!XCVulgH@>)i`Lq7<U)n0X^AaCbIlKwEXUZ
zqlu&Jm#p=`bbNwiJ$Kl1$L5f2c19%D;34Z`5<s3meT;oe-{RTg4{Z9AdzgCWBI_?U
ziHrvZq1vfGOyg&UJRO=yj~}dotZY8&U7L&J?Yo(vj=Ok_DFr27GRNJ?1kuFj^$S+7
z#F~mYobh1|ms{y&zm==dedhD%$LiheqN&{ZVf6qGd9rYn^T-Fju!F$kV)RqWAE*gk
z%M>s9icuVEkvXNxAFTCY59`0f{(@o*Y7pZ7N0un~T9{)p*5URy6X^HUlla%=1ddf5
zM*a>9s(wt1cAF_N!`1u2w_gB;Di1Ih?_J@8#7_Fc%$qq`)dT;nkt3=@7HItQ1ypuB
z)2&LE*xWI0FH?7#IT@YLTx>hU`0j2;X-7AbIa3baaGp?uv2RR`T`auyF|%hs2Z7~|
zZkTvih<0>sr8fP6@NsxLIrg%?zQ9|W<BgiY&*N7ZP1{DcDx(S(6zqZ$mq%>LZs5f`
zP3QQ5dURG5gWrUHqU{+2s`pJFHIvrifeEKDVg5;2ICC|ZW4Vl(g|g5h9}oY<cd}EX
zFQW$MH|}jWps9nEup#6CJpUR8)s8p8uzw|_ItkT3U15TJw4(Kb7ohOQX)vE@j-S4*
zVs!Yy)K2jQPMROcu6kik8iy`o_J{-3KU@I&&L77)Du;2?_*vL57KbJK_OqRpRgB-9
z6OfzdPn#y3XHKougQLe|=zm++;glM2vgCdip5hn-e>?PWeAyKAPi=;$EthardIfWD
z#YxPWKZ`onCgAP^f^<`S8?M(s1r-K6A#B}h*!E14nkp|NBf8GabDtatp0C2OYX#{m
zxrx;6Xcm4gRHDa)Pr_Vo<|y)OhD_yFX73&q+?MSOp9VZh?)O{R>Glhcrtrv&v=TT~
z_zwj+*7RUyHFPB}f~J5kXr|E&1DZhA71$Bcs66V~dj$eYcC%d~UZm)%3CL-W^M_oo
zF}m_ecrExMnrLL9fMFS~Xm&%ZO^dL~_zUjq6QQ2pSKyAZ9Da$O6AJw7#t+|P7^_lG
z<Z-j|VbFloK0Bg)Se3diZn59SxPvOE!M`ln!B{SoW(8kez)7MROv|x};P69)?BhID
zwYe3zqMGw;Dej}1QgP^g;xPR7=PCegE5JM72ed;d`kcMXx|$1<mhwVqZ}<eJVXv93
zERS+b3I@HNpzN+B99Na2RS8Sk_91O{tR)F@7FD5sh6mBzaSFBvadUUSKYKUV3|HT-
zg%=AyLGreA$`4<`zPR!nV%#UvU61Raty~_SzT-TY((<s%Oo2+dWMBesCoGPXAZrg7
zA)F4P>*pBL*mK!5{<k*!x+nv#I*8HDPjABZ72;%k+F^Y9Vk+s7SH{y!4fwbW;iuzQ
zcx8{*V3fHXU4O%!wz+T&?0k-e==u>e@4teT?^H>(_FsF=hm+ArYZzzU<MzrahQ!=*
zA*nA7$JxTy;nC<%wmStGyE#*Em;XcX@>mAxPUDPy>sB&$y^)pKvznH#mqtyyGKhTK
z!*;L!z#Lm^fen9E(IqE<xERi0S6AfW+Af4~Hw8Mgq!98_SZ-(8!30&OVVHv~cxzo}
ztsecwd><|oxMw1b@lN2mUl1lYv;L!z8TV0kT#eGb67)>fN9bQ?jxUxaLR8#g{55|7
zZ%h6IznDezhs#;K67PaH)26U_8fNsIffy+e9DxPe8I1Lhe!lPIr8r^50IWRa2Z~x&
zOs8Qc$Oz2B{9W!;^@1$3R5OjYW3D{&t^P5t{~H4vzuB_(YpqDzZy);KH&JS|*&i~#
zT97g{Vs;BoqtuyWi`Sh5%j<8zcJ>4`D&fv8^Uv%#LsL5TK%Q(keG(dT8oAs<DX!(u
z#=ljzC~?7qpYk;YYZq=u*-lOB=Er%IS0}Tf+0*fLOdL$Ouz~qiIf<_EO2uzC+nHEz
zF>u;=g?^Rmhh<#uU;TPGt#ICp1|pi^sPmpR^v)!oOHQG3b2YroO<>n9RUneeG3@Uo
z9*Meq3<7-P>C#*AcBx$UFy`@MDDJ<(_{Pnl!B?`d5st9e7js_Vx-Hn5bPOLW1jDtZ
zt2l;O2~yoptd`haCM75as`CrcXwGxA2Q{3%UyT~t`QYu*?XY8y5<E5eg@(2btliS-
zByLQGXF0H+W1Ht=(!y=<<k}rbo<0?S>%C%L?>WaT<gzd;%Q*i`jUs!!_90)k&x19*
z%IU5d@=#&vLBD=WVXu54B>8$b^StUio7&=zO#*Jrz;j#h%-IA?S`=<9>*j6rF{E{p
zlGOO0C@tf@!N+#Rp~l@?TsLR}bB){I_ipq7&PYuKJX)}3_aio`W)IjIY(|fd`qY8*
zHWsKJp;P+3LETZ6&T7oYvazYOOkRg<42fmc_V>Zu2|W7ZkvC&0*~DdC)9_wPF?Uwn
zwtrs2b=G;EhtH2Kad5m4?FDQ}==wIiE$0fqH@^bWO%?3-^J&a|OsyZ;u!Ue2mqC_X
zgKg*X(E7R~_3kg=_Pqxfj~Ppd{0>K^rD{8Suv?2-+e~H4wAXOmH_h;{{4STD)q})Y
zSJ)#1pU`905XN1d2hlHeN%gn_+1{v1Pt8^#T{1$DBCAA-!Vi!lv2N6CT!{SMDMYV_
zht8*uF(wPH!tRO=TvMR}q7fQU80<jn1`SD>#3A_jdMf6Ady0{NyFo~0J$&jY#UYN5
z_C6>VRzK`y*5`<l*Nu9#V#+MISdEy_GlyC#NKvx<JiEha5>0*TOgq&Rp;!7Y{{3AE
zoA%f-XRGp|D_<J^EHbAf<9orG)9deE&tjziJILEBEQ9+v4wYDA0oE1TvsYs0VV=oJ
zMqjZBiiM(>kMYgWuQ!Wij+{gb{~YLEdYP}gt`GNIAK^SNOMr0a^Nf;d><NyOKW$L~
zEUM0j$=hy0{tj_sd+7@(wU)u1&6<Rm{lQYziDbbH52z_^uj|nH2>bULLhe#C&f9Vt
z594lVxY>boPS2#ZCW>IvHVpM4yFv1n5KWghCF5Jea958YlM%R=oRC%_LQ`*H#nf&T
zw?Bs)Rf~D;+qUtV1cbmO^%Xpr`xe8VC*#spiZnlb6kiLSWzM*`<IC>%s4f|cJc%xh
z*D;}$_1o&FCT(F3j68%TzLSaXHFMU?SAg`a%tX<^44P|eL6eLIpwdN^z*{+D-Q103
zeJ5~x?HGSgf+dYst_OqX9PD(BBf6+M)3l!pFv#B<ha1~rJ}XJ^q$m~LsLec9-2u?#
z$sDL}z}7cz>>bG{eEQoTKj>N$hwqm_@T(%)9?O8?g>8^=r5~<{b1XrlS|~p~lY|8o
zfW#FZy|mDZzKh_rkegq?H)I}jljFOMZTJHH<3(tBX&an<zk%-fA`AV22S8`N4T{8i
z;h{`*vQcOfJ`7op3~^u*yIts|M{8*M33vQpWK4gV>}7Y}oCqRuuIww07yI<Y4H&)#
zq`0LR*WIYZmFd~I=+`Bb&=<gsS`X1Xp0Y-F{-P+C^VzU+2S!g~ap*`j$IFs}v#^=d
z8Llz``A@Nu1~Wm;g79#%E4`T_M!($_hq>SXA<2Bk#srPw#olhVs<8#9+cd(uG({Nw
zD~~@f_24GeN;diUSl!(AAtq8T0`&hHkqyHOsOHrPbVcAhwm7eySv*}IjQb>Mj?5Z*
ze%^odLQMiZ@HC_IRE_DSuKR34uQaU_?88t`H!5{zG8z3g1ZK{OV76C;vEA2!+Zvo<
z=fx;GY4<1|WS5ck(G8&dDFoscb3L>XvgA_fM4B??9^=<|2ZVfd=oBews+l^M^tlR=
zAJ>mCE|QBer{p!hG4w{)w|($M<{u^w3)5#~9_+)zoVHij!z-Sihc*UHaHvuSS6^<%
zyIH>>re7CKUBBRyNO6$!7(<uq=BO*yjE^~<xbadkI{wIrD71Zm)^uHJqjQa8$|p1J
z(VbANb-KPy_7^P3Z^E#tx%|k@Q|XBATUh<`J-X$^!%KB(YLXJoO7GRRS1uBvdCO9m
z-EO+{s^4__`G*K<Zn^?-qz`=jl<Ch!$QWMIrf1Sp_@N<AeByY(e%J7Mydts+7rlId
z&nLe{1KkAFk($Ju*;>f@jxNVnRthu<Cosq6EW~Sr5!mxX5O#BX7+;QGvphKv_Y`;V
zy8dyTFb7Gd&u}9vpbFHhR+mg^kw)#zK&-QE#ax97FuF4a7MpD7+t@dtGjyJxn-gvS
z)Zi$O=Rbo=jxHyjs}v#Om_FWVmZY5{s#IIs7p7enp~rpBGs$@qiB4$)#~s&(3!f_R
zRJJ=o<`snOUPIoCrh!Un8b($wqd#U=@S@hrli{2cZ2LY8GdDG}Rg?7Sk3S}K%Wiq%
zW+qN5MJ$=j><YZ4u^COR)?%031mZng9pCMu_+QK%*f{$>_T8<++WSo`|Jq4#R^Gu(
zt8HZ-TdC7YuE$_S&@nh=?8y3k+fDY&+lWRF6v$+gk8pP2CCua)POg_vLq~uPuJP4`
zXI(yc-$5OZWjcVV!6z_~%;NQa@MPa!<5;z>m+<HXV<MU3fgH7h6~14@gdXEV<QXBN
zJH+MUd%mzQw_L!Akv_-|K8<6`C((ZO%i!PVgFC-y(ypVvTzB_rSfqLakLdzd&empX
zKj*S=jO!8DAj~E=tC0DDCG7M+e_=)4bN17_F^ni)f{7fvO7utqwGnNGXax^?A}^nP
z>3a`<)SO4jq(s~!EJZWA=Ahmt7j}Dd2;Cu9%g#@9r72Mq9@jraquVy%p??;1vm9A3
zL1j`{RAj&8@>;N!7oZa52bgI_vEV=90Dp;12I>l|#F21GI^)=Uwz+p9yE=CP1p0np
zmctCXC`N!xvA@KwJuF-QI$4q|Qyb+uIqzqU&%b3nw`AaP87XS>YYp{TIDy_YN37Vk
z9Zy)uz=$Pfv(=76)Um7R^lcH^?Yaq>mg(S_HxB}TweY`$UB>e|H*rDaM#v4+A{mt`
z&{Lj8(mJZ(9mg2XOW6Vs-kMPJNp|e>*~^&9h2ODpz6O^6OUJ+7T41<M9AibZV9%j^
zFwl4lc8mbs9%Mla{>{b{*VJg$M_H0Vgy@i87?r;uMgsRYGYcxl@Tu%7s<g5S$73&n
z_ex33>sw0C9iIe|t@cDJyO>$hdkzP~8!)BuD;nn?wOjlA1rr*pOD?A@q{oDVG1Nnz
z*jm+L()XJ%w$+Zz@8Lrs#?Yn!A#$H%wI#U};mj?3w6?N_g`8jZQCK<D=t|&x2X)q8
zYAe+EPbXSaYC&#s27Siq0mgsz(9uE;_T97Rr+IPgfigRc`nL&gSk53vS9L+r%p}%N
z;vkagFJQtcbsCl}3$K>y(GyX}Ahr1&^KhmJswP;|FG^Jq)|QJOGk!389%q6`K7(!n
z<{-G9GR+(7nByt2>{0&#)VGL-19u#$y*%ehce{#<k9k8})<g)jxz5;HPKC=_GQ?Y@
z5K~=tqiG=5YqnB}y05i^s{51Z%mQC3-FyYF7WA>5U_>e+?tuT**Gw4Z<NW3nFiY(P
zkHg=Y5-&;GJFLh4yKDmC?U!)zov-X^tJC;#|3;?UY!(~3Bp(}+^=SL84fJ1FFe$D}
zXUQ%XYUCI|2XFtv#rh_czC4WOHF+FMfHF6^JLRt#SFr0p6Yz1@gq;cod>?fNKT&HK
zoTJEG>=GyUt;JcXvkd;8z7HRGEF)}A3v6>rVST^KlI-i*_HJRC7;;IIsN`|_q-+e#
zI%G$@&XzH*D}yO*`^e^372&hbVR-c=3maC7visf4VfWJyu<TVc6YQuD-m<INZ@J%4
zyE&E~H+jr<tO>w%Zv*K>E<2#?R|Sswg0M<?jGJL!W9;wWOrOVL_Ot7KOyPXJJDPcL
z_wQ#I=n{e1pREy=OroP7l|bDu2bVci<D&s7a@?HD<;Nandhd;~4^s~_IvQbgY}gJb
zNEgE%w_nVPdG6%xS##=WtqY_3z0q-#F4OqUntT~?XR@!~gvWB<@!pjZ=;m_Q+}@B}
ztQ<l$j@x3EcmtTjden3F4B#ofv>!dc2Isxchn&HQRKso-v3JzR4elB2m2OkI{=yKT
z-AVA#`-WcX*I9|Gc1-B}Yo9-0jefcl*^szUxacB5$>M#GTAzhEo$9puc`Ali1aQ39
zW9&|{h1qb}ipkiS$NpQ|0eer2z$3GCj40P(Pm@Oc8W)PfSJ$vJEL)fyhog9o$-<CO
zS!$u0%k@jX0o4c9xOCVJlOiV(v-9uRnp4_z$@F>H@t$IBmlA{~>(DdhrS+DhWlXxK
z5vUn=f`WS|41U(Yzn>-Oh2k|(z!QhOEFtQb+Q_CInL#B^?5{5=JBzqmiJ*rUJuxE_
zFKCZr((|`WGk3RI_WBi=)(cR<mE4}8;v*Basg99~lETiMJDAWi59MPTINkgfxcW;G
zgHVE!E4$eb4;85Qyf}9L>3&vlLMzt?_Yb?GCsA{0U;0x18Jp#<iI+uW$+`|@vWoN2
zPP^QM$-?HOuwe!f{1l7tR+{o3?8;-U`xNLd2{qg>b{mFD#b~9l0J+g|na#N443oR(
zkig|9$gR&?ac068HcSzstHkZ-rjd(~5v59YFyDD(>1r-pC&=r`l1BOM_p!m`ExgNE
z1uw<8{>{gCLHzX=$TyXx^|D+iP>>@irb&_S_NrLZ*99G}+rU)48GnxSvd=F>!lOr}
z=)`GulMCZu@uKrs8#jeK|Jr~ePWm)Cuo=8`UNgOSCei8Re^|MNZ_zbK1!YD>h*9D(
zPSd!AW9qHAX=nf|sw2_j(_6gdX+~zuo<b(BQp4fnfn2}R8!(RWLFre2u=lC}Q563U
z7ySf@!{~VkRZ)SJ9!t5tNDDY1HWRb12D8CI&v0t;YYcpCN%e$gQqdf32+Dp7A-`tR
z@w;O1D(fQp3`_tiO#>RxzZyS_6+;x4&+Tt>!Nu2PNmK413fshST_Jzj{T;5LeNUM_
zk9feWUpA8-s7(gmfFo?AUzz#b{&MI}BICB>0@!}wLH^ZnI>h~MDsOhezw?Qh);)&K
z3Vmz|cTU%+?j|kaE~MV_45OT5K;OPGqaV04&f@$HOz$utPlgh>-|BhPvOxHC6{y5c
zE}tT~9v;Q#z-f*Zc|Aa%R^Qx5Gd~YAH%buya*V+VTOOl|hXh(ioPc*xpYc#M*E>aQ
zVD;V{cFvJ%=GKXQaB#&&e5=<96+3I$LgUlW^c~p{>$zAKF^Bn5Rt3WUVxVx*Om2T*
zLdOO#<Mij5>`%^1E^DAfmhx88-^P*n$ij*Uh$u38rP8F=WC_;?Ax}G&cQG%XC2>28
zcsem;IhzpJkGefF5R~IVbfOQldo!`#aPI(nu>TqqevHJ$mCAIhQ5p&^HD?$1NmHMH
zWh~$4DYNkNZ|J$LNQ7!lVfVLDs6VX2?%NT8LW>?_c=`@p@*ozfTr;rKHX5w|n+_Xt
zcq|cDqXE$(T>d!;Q!eX~O}`ZAD^Ypk@@py_knv%wr6v<U^%?xqxH$fG5(kOCeGmlB
z7~0c;hFKkqmaaV6bmaydA4;!p%2B4$uZ@@;I!TQ9mi=VgK2dONnvZYKCcqtw=j{GD
zl&xCd2kM&}aOg!P{L#;4?bb??Fk5kQ<G(RnlKKUA{u6?Ph<QYPKd0yX6=7~^sE`}8
zm2lpKP0Z7lbQW`*aQ=la%!cQC(7Mo>w5#h8`<N88^OA+Bt2Pqh<9E@2RTFp}Hsbh{
zud#yjKb*C40fRp$;DyBinjA31&kq(b313yYJ8vxtnaqJ|vrPPS;)(r&_+nf=TEO+W
zL}0|S{mi`i-w>`I0h2gW`fhz1xjB%8n=5YM*p1uRy=f(t2;7e*LI)AAsnegIRcO0v
z2`+F@XPiWgNo&6hjXm^*>CavP$`PH+Mafy{_vR}rHn)hGzw<H^RCE{gbIw4Gz9bR2
zU_v)M3c^-y52W>LGRL-@PX2C>B})ZlFfvhsSTb>#mE(*}BIQueN$_mdO5(Fxo@R<q
zBC{sSvz`kQ_=zjTs3@l`T3iyKUfMfYeVb|QjgQ4p@j;bJa!jO`GCg?nf(uzZcpc5f
zcF+Yz*WiX=J4D?|<h(e>D54+;8fnQ;zj(a<X^B7a%Ie2k&hKEljSNX{6C;ad!|-{4
zDzQ8$gKFx=bbpI95&n6J|D0<(xR&`6&*m(LOjS#}@)L(>fW-x@th>!FDwqs6wmf0;
zHl2s^t;>kRqDFSFp*0m&SU_$+l0zq_QZQU?j(7Gq;{vBPsN|jFcNqQR$IL&-xEd;8
zPwOQ-u;M1O=vN55UjcOT%}~5->xvT^O|f(PVfyO6H>_XsoF~6W6Q6fB)l1JPz>}+g
zU`_QNxK{4T+!fa#<+>?g$N4~(&dk6Zu0uf4H=PU_In&5ITliG44E{)&(aZVS=wi}}
z2Ya9M%M^^@$)jX^_unCW6VB;MJ3iy0*SU2U-7B!#ri}OfYXQ4u%~X;ms7<<qN8#7I
zYY-CQ$mDryGPY+c*s+DLaOS<q5TG`J-ky5^x6Mms{<fLWD+Lxre3~4Y>o1MXr#m@@
zdK&8-&tZ<%HKDK4Y388l9{T>JG<|kY53)l7Y5&Eu@IyohVv0}0-|)9E&#eKDd}!t8
zzZnPD<Q1^xkPJKjq6N{mJjEJ_J_gSL?zg+n3B$baz~Y??v2YL9|Fte3kFPW$=L(Ei
z#l<RM<Ms`X537;L8CxP}{(l@}OY{HBF}D6+PO#1YaDr!fMcV#9j<MbU<rp``IKxKQ
zGwhA+Hn8B~QD#rMAoj|b;6_#tUy~?oKR=smS};HfaZ_gJ_$U4+A0c{e?^D+MtOC~j
zoQ;)3CHCq|uJHyQ)v+R`Q&2*qkDa))hkdnO0Ie&Z@>73{vXS;%B6&f}C;mUma&X9|
z%_0AbF=qbv@!)?Rk9b8o{LkZMx;rLDI{pvASRhkCAkI9LnwOu2vKblhw8)07nvGC8
zCP>HhuVBCWY?}Vg4!-UC4J*=>F*!JtZfgI?s7*4UTI?-mLrXO@d9{JGL@7KNnnFuQ
zB#GM3iELWC4EdfsnOvVi;Y7`RstY{UE6j+_*r`G94w}(*pEbx72P2YO%Fs<|8uWbs
z6kM+;NrX`eX0%TwtDgn&e$2MUQ)QFr#*CAU&9oJ?E@l{BCumcd=4rI}*c~WM+DM<D
zDuHEMvQ)xhHF<yG8smH5CFUBO#k3#$@VZMHx}*w_1=U>JUyV6#R!(KLQyu8SVj*IE
zY7gUc#2aUkRwmxR2fmU;q;KaGRGGXKa;E%&t{39;NoWTgyFHIRe#Dq;4V=WaOIh$c
zq-K!domNyPER2Lq8|Po<nsd*kEy9*{Pw+(AF<fT4h8j4(hT2QX>=SP}o}a>3C_g6%
z0oIC4`aw5xA#nnp=$nZ0h78=^lnYU=z4&Ud1I4!{qJaNak{c{cYu!|7w&pCd_K7@o
zm)OgtPrt&p$(iigonego=qL2yQjD@?^T^)GIp|b>7_+(CfwE{0^E|y6WWRO8g=7IT
zkIuxf&FA_4BPt-WPK~U%>WEW<4VX_}7a;13HT{*nn(WB7BY8IGP%^-Z*7@h7aNsU{
zIJAXI80gUbp4ue4$BtP4YO3dFodzFK6S`lb7iu>A!we%nifr%4?Cs;MM$#y6-irmK
zsXYVj+>23TPC6sLq>TM=cmn<RjxyZ~%Gouk`j|D%9i65h1j(&u!87b5tJQu6ju>$c
zl;-VJ+OnP}Y`UNQDa38U{M&H?&z<-`e2qD}TzYQZe>BrZnO-P4h+Q({IQ3&C=8FTh
zK0B3jfSHoizdn59%imFOuO?Zg`vXk)jd)`|L!$2QWPdN3&d&DrB@bXZU3N$qB)LXZ
zq4g9NygLv7?JB5GN;!fpQ_u06bK4=^%87dD%%$H%CWFU&6S^Wzj+jQo!s8|GbiJS)
z`8o41;08IGvpp6LcB#_ehY6ZIIF0IV(F`kc0_JjxoxtxTh!|Q9TXmN5MVnjUUu7UI
zX_uic$+p;V_b3xE@|0&$`-kawHzTHwCt&U1Uo6r)O4~Q<k!_FU>HPt3xE}e2J&IG|
z*`^yfw_1|=_T7e6GcGgM7qkigIv-7j&51x)4UP*dkazx?H0+KDH3|KI!Xj%)K$jo(
zmC4XKts}6dN16VfoPi=PjP0ox0Z?KijjMLmfiw5@?hamst9oSVZ8ZjSo-Ki*MU}86
z<S_V*Z-n-$ZA`$1R+P9gpVjt^#l97~yt$?2SRefm@;{ZL&YNK9?v^Gl3K3Mk{1Kiw
zQp5l5K9e-}<iXTUlElAsHd;%&lK{pOlXTVKVCpxx^(6&Hj{ET&To&NY_Ef&}8V^ER
zmFe+YO7w5_6-aQ=Buq~qY)$=%pyN$59i!0aye3uNwHV#qKf~Gk4_U>>3&`nJd+d|$
z>!U+}I%%G93?j-8k^JRm^uI2KD(#J>%?7bd(4v#@Ppb*UH`HP26dO3yu$w5kucFPN
z<FJHta8O<~Y>8GTnWxQZ;<@|y?8|bdX691TQgZ=+&s#;xtM`!iS$FK!bMp9?pGuMy
zh8yTlNma&h>u0!a*2U;g@Ps_|2oj68m|#_Ndgpo%<5v6y^0Zivc6%MX7AJ8jjVg3M
z5)Suuim>}-4VDWi5Cg+xD7>}}(=I#Hx@GyuFJDMTeworwCzaUAbDMY)`(~36mvOL+
z7sg1&8$a-u(@o56CVOl;@wjP5gYFf>w1#+g^0gj5JHH>dyyQ|dTcXIG+F^9Oa@F?V
zJ3;tQI+-R1=|an<Mn-P;W%gH;II+9^9QE(jKuhUbRI@)yECnU#`$HQTO_@?Sx*`ZQ
z9vpyY>yt?IsWrq#bQNv%Yi6hNe{cjPZac~7&~PbDNZ<1nE{6Yr2^RxE&!!MsAOB*m
zSep`o3^fq*&R{z$j^hWDuek8jBAoTgnp0;l-~m@Tc16?#+BtVQt2oxjlD}8EZM`dT
zDpY1~230a~cl4O$l{{Kz<w#_wde|Q^RHD`fi>Oj$I=-}ihv#g~iOX+8mV0Vy@^Lz`
zIco-UsRL=h;zONuZSec*6r3|$gCQaPDD-e0Gh<6TYmj%0+R8}Nz~UgTxl@W-CO1P7
ziDp`7jxn3xx8cbBU5ur56>L&GP1{Y*6Q<0N98VA=KTB5PnNy1)syv0UZ0%u>Y-1VU
z^h3<#JQK2_YZAPww&J!!KE&os7{mvwK-HRys1$J@<a?IU&>S_oWquq~+9uNGKVO+A
zXFmgj#<=>P3Hhct4}aJPWAz*%+AAc%`>kVa=k;2Yt~ww?rtUH%&*D~)wS}+nkB~dM
zckTxN-*e#1{S35;IuF6VTvF!Xd}<i-6xUr21z(Gou&zRa=ss=1h39ght7ALPN>d{)
z(lM~Ka0AYz5_rs28&VhQvYT6r*srr^5aZdpRC$jLUdZ{6T{G2&Zj00=T9J~ZFx~>Y
zG|!>u=`QH6i$<qS2XWPhJpL7{_v|63_xLnsBRvyw2j}O%ho6it>B_$fgRZ%xIa-d^
zPkRIhJya+!eiZM@Y$v6Wu4Kn>Ez|zpnv^aur9n2D<Xp%)`p%`7U4BH4?pke(H9G|<
zlnkJ`fDko0GK$6Gwj_B+E<f{aKK9>;12?}tOlhn#ZIVr3kLp)2FPSNX3qzvA^d!2W
zcL{m*XCY~S<pnOTdCX7e{}_#p%}}|)mZ*;8umbVLAQ@2u4X5>K{6Gf3PNou{jSpj$
zMmYAg^us>w0T>%U!1q~l6|e2kN8Kw4^w$k%IBRD`#947N>=(?~W!;5Wp}X+7t_(Tt
zV~6R{94YLh6nHg?(I$UO(x@s;?%!EM<<0d-LvtRkXpn{2h0}@ITWh-bgEVn?wTI`L
z>ccf<g_0FV3h1$mLy&!43!T3!fNf1M4)yc{$*lywavMBLK8T~)4a|}$ld)zd=Neu;
z3`wU>qP(&!)A4U1Py0_CwA$)nc#|m2nr22e+h*e$=~48kY6H@DlB1n{XY;uB7&+B>
z93>|YosuV!Pv>>%lnCy|r7RAUqN~B3bJ6C{4x~3_*OOfvPLlqmBJ`lC1T{}EX6&w7
zkl$(ydHL}rrxq`Ou={Hv{qF!Px+9%RSEo?s{t_C`XQ+UxBo#h<h8=z8iAF||^pcMl
zI{%qO4gdJjinXRR>k~y^lVc=+suSfWGGy1JT^zYcmF~<k2P=Dq_wmVh6yZSim!4gO
zb^h|SqR50kVN4hmoxODaK^x+JPKFHm8PcJx@kpj9L1vo(E}c0S1{W&Rp66k7RA4S^
z8eEJHXR}aOVIc|T&I5z9`|xN03o_HAL86q9qBYh`!5jkR>N>>2s|AL(u&_*~A6?R2
z=+{k=OeLJdurn(`Ou~|WpuVtknkdbyGNZ2^{DcqHlwDW3l^U7vrg7>qs5T{<MlSXt
z+S7KS?X>{>*=t6BS<J*&T(k1TpGHKbX$buGh><UYJ-Dx0nP|ni;fAkM(ez_){a9Nh
zqyHioHtzieGqS?zh36W?qQ8MzkTL`H+&HI(-FX~`b);wWGO#%*PacGGL=ibtRyR+O
z9>8kQyQ2np=Oj-5<3X-1j3brq7BoISg{_EKOw?;yQFzKkqA{`<yE+U>vSApQEBg?m
z?jr2x$So(%c#=Ei+cEg8855CH1{z8$$zGX5^tzTHO>>^f*zQ`!w_LlH+GYf!*tA%*
zPr3nad*0YJ-Be&7n%83JmI%`H*oHI;>Y@3a{b;6e4em^9$KWSUI7@dLn9Q+c*HrHS
zStV;a;weX9o*4OU(1Wd;!^xO^1wCbQ1Cr+b!K0GB(CGUFuKu1w>=Wvt<=`MsLawYn
zQ9Kqj9IY_vhdRvGYT;Uu&)`qvbv$`(1)5sa%FK<oCW8W^e5w7xq#=9<Gt2cG<Dn^r
zM}$R5MUp-~dhbH6{#?f_wiTioPkQ0fHwETy!8le0mVrzBL-=)g5)BXOKwD#OW7V;n
zF$-D+-C2rsdZ0Od|7k9>%Swr?UbqHUexF3Xm@Fqkg97xt_a6Mf&DfIX*3srzhFH4z
z@I;^6(FgMS#53h9-U{RtH`&QFbmu)*Fj0%tmRGO?8->tSjw6WsmP3Nge0s9Gf@{S&
zMjHp$lIl4vjC8vMyz}A_H?3Rv>#;H^_#9IAS=WY28t`anaXPLI5U0P2-Z7gdFeG<x
zJw7OqB)VJpAXb|~_6Qs$`ZGVG%#8-dEo>_J{Ne|6YJ5Vk+F4{^R1EJWKERW6CV<{O
z8>llMK^Kl-JF>@|c@}JmkAFLoXNzUXnrnyPh?O2OJ3E;~v|Xk}{Ojzv6xWE&EW)<=
zMIdp|7>DK6Xoy${tP(UN7kT>7rj-Z0%MVcJfD;@@6r@K(8Zi5NDDF_W#rB^TBnEtC
z)?%qRq#lqX+sCbmwy+{P{N!eRKDRXvFJhcHida^FGY*#*pu>+SvV8mln4A;g+JwsN
ze>$m=^b%!!m#RhF&PkI4%Cl(S(g&=qlPP^6ph8oVW)PjOQatrHk%nv8@J{n)vu{n7
zkQ`<|jahyQuKp~<%oiQ_vd4m+N*5)At`o3BD+g3(2BFA=L~40(E$U|`!P@WuydJDV
zcN|P%s!PStAgu&t^j?9%^g)pMc^fR3j6nbELUupbcD7`-D?J~600&}3`3FkEp&+P}
zy)r?OYHZ$1$HQybFQa_?mcN|tdi0$2(cDGSZ_Feg{il)AfC89)qkwgg>%|woZ(zy6
zX~ZSfiC)Q@KssM7h4C}-%of*GNDE5X7sUpslb6Ow|2@Mjh+yEK&VBINx{S8JbRpZ0
zcO$uLidlQr>7|BnGzb)--5-L;ulKj$I+qY(_kRO-{i#&Fr4*RSea!T>mE_FxFr0W<
zk|J*_tHujq9vsMnv^P4Wxp6UlJn<ux7cZy5Lm#kgXFUe{Yhy<85&Qu^V9w>;oGYjc
ztCmM%-Hb_a(o=@l$x-t^jLf3~TPR%KlR{JEzrseNbF9;CQ5dZ|LWVE-!{eiV$O_IN
zCpNec<IzF%`IpPYMv9S>d-74}uK`5ZTwr%j{{YppE%sz@D%rlo2wvtq!*H);#GE)f
zGV&E2c3R?-gM#$rQ)A+CPm9Exd;ovJLfp6f5g6O3bFEs=wExgJ4z`|QWV?}-Y~2R-
zT3kQQba`6%NuF-g*C#n!FEYiOZK;m<3hH300G2{5%K!J9{oXCas<Ik%_OZQq{d*a<
zXnkP)?uiktUAfG|I6HRMU?6U&pAN>)4>M|ajY)rv44w_Wg_jFD8I#kgXek(uSer)6
zUUMz8srgX%y#X$)vVy`&H@b#vD!0C~4B{TC!jw>ddeL<=-MKait&W7kuInmf_rEE~
zzcQE6xB86YHc5P=Gt$Jh4XA+rYfS1oM1$ftf>iVk{5m0-qx;B_3uOuHPnU8mt(-_E
z-Dza)7d=9_{fId;IT3T=GjqgdG2`#qi;G+nk#BH{4jH&YAh*SMX0(PbU9^Id0Bg$2
zvZdqR7eOGT9Uho00kgVzEGp3>_nx$&<VO)kzxEgWIitxoZ~g;^?wkR>(<HKGumqpf
z>JfzrrJ%ShnwGl><B?HCuFxTcC$UwNp4s&dP7Qldm1!d6{9q?O9-aWM&vNjCp%j@I
zV@ZOhUq*{lM`^Ii5!!NO3jGxQ2kJ!f*(Ig3$-`M^X?M^>Hl|XNKHJlTdO?3_aPkva
z>hu$(R+bWWOEBf2$uPcSCePwX83$nG64D{VDEIa<E%Rx>h{|WUCx9W3rn#~|rHr9%
z`vg`QHBeHVq02TM1;y!VSR7i*jI`Qt@6Qiz<GYfs`gZ_diAIvM@!>>mk}PF8ky*Jy
z3nh8lG^;QPo@%Ydt$IDEH<6DaOJ3kL2Ps_Q<czH)v&f~mKsdDY0$z-eCzB#XXvkVo
zT5hw7l{S|l2fm*p1p~s^QKQIhU9us^&zW9oF5+!4mH?$`{;+Gq1X`aYO=fG9k%?_a
z^o8|Y2o5kHQz8{`lG_E=KR+2eQ+$bo;70a{&>5nv-2=^$|54V|p6DG^AQu<Tg_GC0
zq-ePXm9*Z^-c4?TK%cKL0q=nGn-y^Pmpzuef5G80<mmFdZY0qzgDiY(0r%PzSibr~
zxVrZrM}b)eN)NK|*)nk^abF@&a+M!hwMvD%M?S*>KNb$F407NPB{H9}ge7lgQJKUz
zkP`Zb$FFO_quyS|zbOIp`&8)frPJy6xUI~!1(^`mIE+hr6zMb0#r&~Kk=*UFU|Jqu
z!v(i&Alxe#r#WhqzWaud6j01R#+>JUwi9Iwe|Qn$DMJ|c_y-CFe1MZeszhNcMH9{?
zb8#pIjJh<KK_yut$2C*qW_8+H&yk})>5^J%OuMa=@Mdl&n=Eb*A9$x(#T{C(@L~*G
z?kG*F6ogod9vKo>Crvu<1)++UBE8-EgBkC4Bx>$<U{_>>%2t!eP9b@kz*&gz{SkpT
z6C+utl{w6niZ`ehkVJN88*^l|X8!P(dd6d-HZ7Zcn$?p&fNRa4@zo^1vq$$ez&wSE
z5Xs$5F2~h@X~0virP7_4Z+{AAiwvl%oiJ5RUjUJ!qgeAk20pgrV`+*t3by7k_C%g$
z2v{>BqKBBcqU~hnih5Lf{|mC;p2E{~8xd<xLCG265H>rT?g(<FT8tyHy3h=__jC8>
z>JVzWDT-}ye}zj!t?91zQ?RPUk3_tbXV%Bwh1*vzu(e<OXqCiLD*4EQ_N~@q7W?L*
z+oI{Ha{4I#c1)nJ>)T*{@GE$}yMTGl8Z#kF?($3?oaEeff^7Y0Ht)vucyyO(#hi^t
zFmHDRQxK~|&2Lx`$&eH}rJKdHtT5U$x}QCGekOT-u>s#&=d*M~9rR{IK*sQ6_#~bN
z=eiSc%e@dX&)kT%6q*5j_7UZ6&F#NVt$<{6O=`6%nYfq-5c`&KTw>vclRqbsdoPQb
zoA-Cn(DT#L*C!Ds3R}ahWfl0uT7=%{=zy(aUc^#e7U!he;DH6d@KBo*eHt@~XjQbK
zQ{QLKu`mL=&Kok02lf-G$;0edb$23Duo{lNOQqB5d`NDaIladz`YY@@QP%Jl6DD|=
zC`50fZzj}%ZLBmCBke^WjBJ2&Y(0+8_9k*WCFr@%3Z_NblKL)YC^cwfGS(T<y^=g!
zb3~4~`ai+$<7=pmga>3U?q?N??nCpWgFuHKFcFWYG0F;a;1oxUt=FAOvyRr+KfHE?
zn%<d8=fsqN!9yOYSQbi#dSzL?VsVl}Mxf11n1oC9L5XlC=vf58XLASEPJIggbhM`q
zQIC*p;W0l;F7jH7S5ONH9qN;r4m*RsLa{&so)~O}>Fgye^))1slcOMgni8=X?`0p(
z7)Dbgpts$nL0`ERyn}T3E;7?F{O~rswo;EcegBSX=tqiTF5&SJW4hj!I}?5s!++YZ
z;O&KM7?rStkkM|~d{&Gm1pi^3)RqE>?&EbnxCQ3_{bl#YHb7WjHk+n;98ERjiI9*2
z@k%#_GJ8*0V0eU1wB%AiL-ACz_&NACy~5eHg4A0{oz8G^!&K#BC>5x|6@Aj=W0W!T
zBE23i{7xh}F4i<erXHPgTX5p~46J_;%XB^Y4m(Oj$lk9~WNiHp)C;=^ySY}ZKmSBI
zmyRS3O1y+YsY@{Nzn74;=QLHy5+$p`7f_wMZBVEYOdcM}hQSpJ;Qo_KU=s1PKJRxN
zrmoUr3#?MmeaBUd@xI6=y|$q>`L`I`o%1;ItqW=5NIcWF*s&s4MCm@B4i!I9!}Q8L
z!l%pKsI%HP=F!72R@_yLxV@SM67!A7>P=DP-|#*nd*(UL2{8w@UV=oeDTT-56=+f`
z1?%sn!0{-=)wMkETo(Z4e}1Bmn;S6`d<dP3CFvmLL%nGO6l$*^MgeEpejO+J<?&%0
z+*<}#hhE_QyhYS3V>|O>zYHldO=7om-wT&-|6wvp5S@1pxTH}H_wAC$$%E@aNj-o$
zk!V1q8_%NE6?LR(ck#q)9(izkHa>P(N`zx~(Y22+GiMyN=!q5Q@X%l@l37OByV!)h
zU6ux)(?8>x3Bf?bI7jW10cKk;rzd;Qq4tB@ASq&u?JV_%+n%49tFo*3Qu#_Wftxqq
ztNNl3lZI9F1-Qu-<JzfpaE^21x2JiqgAKB1@n-<OO;M(98f~28;0gMFGU5dn&n5l^
z&U8Vv6zD(Y{>GVY{7uTyWc9%)Dm!HaUcHQk_IZYE%F{6j-la*d*^gpW&R#lly9Lc-
z7t#EHZ_Hj-#PY|nbP{urc-DB4wvifAZ*mLgow283LmwHV&64z|o&&g_(8D+9##nL9
zW>z!WoCKbAr&@-JAbPbEc%KU)<ySFw>^cJ#PBY1WJ2`FZ+Z@LCpDsOiteE#vDGmoh
z=Fmml>>kbOD+Bc((3-cOk)5o`%~Mz4Vf<uj+?-7^za)}b^Fyh`nL+mV@CTT^$%fm4
zSAaP8y=__fk@>gn7#8zKAUV7Zrt;6A)xBpJ*^^iAimBk~tO+IuY~jq`QnXH<hB<}{
zz~bXf=-VMocH}kVkgOQ->j|e$6?HiNei#xhb<r<hiA?_I#+n#PquUz=qEc2363xyu
zJV}YfIef(Opwl$+S2rF^nu1<hJo0u_koRpc9_ISy;P=rQNcs00JpO%yN#Er`Tz?|%
zyXOt37pajg`#jmChohV}t45EFHL@l97QnET4=hYnB%550GZwvl{DT)|aDvBSnB-*x
zdJaD@>zX=ITQdrc{l~bL$ba}E{{}|LW}{?eJRDvfNYBekll7{T@Yf+8TydI6o=BA9
zYPUBKWX!N#VurLybCmr!oqJE8pJXm~{D6i#$|Nv*JGZI*1RgJMbChXC^5D~TwEpn{
z;8O$t&fimD>EMkgxp__c*%LVGa1j;X7-3_pxc#lz1Xw<oYmJ#>2K|SoQIo!_oNMd?
z<h(QBG+H;bO&sL?s-8y;rOV;pjAq7fYzYzDYD?s#XV7^EO7WI~2AQ6m3H8pk@a|eT
zTs86Kzj>$$>JI?4<Nk0&NHv@@G@H8T9E8_#y|}Mm7FtK@;A8hyR&eH8@ay7ecVqte
z)no=`^!{)&Y7*pL*awY@P9&}F8^mw3BR+e@AeVbj7i?ZbBCizU*|n0qsJSW3cTYbm
z;M-^)U3eO6kE@d=l`O0^SWik{J7PoCM0)=<LD#3{=rx=GYddO*Zb3Pyt(Kskp-spO
zDZ%>F)}(b}GfJ*bpz$x|ad?Uy2yIg%Z;jSd(*6;?cBG@n`$q8CvzYM0zN6^PBG@|1
z7Qb&1CDNQr1au;qSE{Y3ucQnDHZrhFISFg;wnNZ<IhvWc5cMv(VOu~l`*2<-&N0cN
zhlj(_T-ub*+V&J*Y}KX2mPgg4PeSU8Pk3hHJ@C@VWMlRxl6gMc$gbp-jOTX&(oh}%
za!Jb6$5N97{i?(N!_ay7W7$S=JlUCLB{Cu_WQ*rMS3)AGD59hkr8God?T}=PqEbTA
zQc6Yf+~;U0O)c%AL0Y0zwDjJ8fsfB~KhJ$#=lp)(ueP%a@aKG)X0RnozN*EhUsIvE
zHws~+{zF_M|6H_Waw>l*+>TO~m%+|7WmeF25%d&`QBFY?=b0PT#|^H6aWfBy>{3pN
z)N>OsrOOaBrrr^6T!nZiCmVXygUO&yAa4#HO`@bwh}|cToA(g!Z<xze!xC^vBam8@
zD&7mf4;sS$;BMh(STitJY+)A5Wb_F5E6bR|-Bs9>rOnO0yO2()59g%<htiwCP$4xp
zfdcmAL7C5Vyy8BFHBHlC@2VA9riUcUaVo_~^YQ%U^vN`7{~n<kJQdCU<iezFY1CD(
z#XdKkMhh2Z=6B=(WSW>zS70Vhtl7vqmX4!oTT+>I`(3fbg%4beag1Ol$pS-xFn6gu
zl-{p)CU0kHN@B%O`M{Xg>{f=V@>7Dr#Q~>0GiH*lacog~3~(hCki0a7uTrpuDU*(a
z>w$B4uy+!UAMDNo;@8pm*9Smz>UGp9I?124xPUvxmq6KkZ+Ngal$YIdK&0dHyiWg7
zHdYk5((fIsX>X+l+u0w)TsEy^I<w}px8A~DT5AuO3MSeG%d=QoT)gPpL0h^V`xt_5
zE3#>yoAKt*A2=m1RqUX#A4X)_(`i3x8qzPz2A0gHh9^h4=#Wd|uf@W>u2hMdK32fy
zAWM<mevU;Z+w)0_WidlIBaV8X!PS>E@TSAP(Y$CT)U^Yu<SCNj=7(5b@)t@MG{fGZ
z`?yOVO9TqgA>`OMd{p=p_ZJMn4FXN1JFEhaYq+!8<D*IH%nbhSzo$YH$C-8XtU{5;
zSLhRH_qJa*aXsfH*j>T?;dW{lv^eNvYx70XEX^U9I_)B#+oZ{*bl8)5nmSX-HlT?g
zU+`}(v_Q<Rm!emH7eO*Fg^O0G!NtaQ&dc#B_(%y>2B#p1F-d_@tInZst{>|=D8ZCh
z%%sK|!Gf&6mAf8ei<aIUkau7|SCw!P?M9f=^95_+KUYoWZ=y?86~^Edc7nUUVvIlu
zGlc>dOFC5k9-Eg=#n#L<aPnxdXkG4YEZ?HTPK}UY5dty7we=*kYmjBzB6|2pcVH!|
zwWyl>3PL0QV(L(7W)q}BM@qw3T*GLlYkUk%KgV;`e+E%P&JVn_N&&|<J;&?sb?Bi^
z3;(IAhnIS##rECI!0sFV{ERpw_|t2|+<d>`vd};E!`GdM{uM4n7w^L5-cE2xOTdgN
zfB5)M&D=fdD_lZS0+YP56>1mjvb_R@CQ{fZ#j5S$E-dro6-^9TUU??uAG(dh%T7Re
z>}&j2?n>)!CX(h_SC|&@4E>cKp}c_xEgg~uJB3|vYquxL2_&f{%3*Br9dq`@?j0O?
zL0}`i3*ReMK!WCOO3|}qGXmsjwwPmU3s2%w{~5fZh5~z{7eou6?T0lrBiLOVRXV1$
z1dm%@#N{EW7%0*s^ZnQOq?n@^m@o;Go=jujH=lDeE_q<0yf&%oN->q+(oFPBNY*O)
z3Eww!Y|S`@1^gVc3bvur2ZPWlQ=6B&YYe+B<zVq@S+W_ciCNMj);PaXboX{Y)(&}u
z$2@ak!d6QfH1oE2ugq0=b4-)%>dZr#9W~hb!x<(mnm~*0?I*L%)tEk2iCgI>5Nk~Q
z@Lts;oNjE$GD}ME^vz8a=6DOlI!%!1s!bWb=W(si05;N1nWFAp;cge%(m^K0uGano
z<Id;sxib#WI}O9tUb+~r+>Cav-ZW+M0Q$UZ6Z}+8MCr;Dy6)5q1#c%ow2KB)8qtsU
zEC%7vUB|(z@EwLryW{L*V@R&!1HbA@Dy&$d&NRf@V$+;Ly!xjU)+Wp(6N$^BcaC{5
z<swI0JBG6}+mi)y<3>0+(2HKq?5uNhYQS5*)!c{E&%|-o%J|J!npOQ$6koh|jWd}2
z9{2heGT(8&{O5H(q+2dzGM1e}=xV_~&ZoJ;z9G2kZ3~R2^L)fv6`U4!0#92ECbjrI
z^lR@jP?a0R#*Hb)wyB5VQs!-b%E|$FjQ?97In4np_J-3P7aMrCbpy;$-$`>nY{wh<
zKlr}Sk|eKF23>8}!SUN%Qm9m5_4f9ZdG;gfCTLNqkpw>SK8+@4^TF~E4-Mh7_z6CG
zWcJMz9$e35flmW~o3Q~}xJo$sL5@`))nd~Bnc`f%X>9V2SeC5u6dR7&P(j=wSKT?A
zDLuCUMN1#!k})Y{C)j8`3p0ci=Qlnzr~~JhCb2W~;_%x9L!pW7hbL6lk}%f=mRIlk
z=94x&tsh7O(@){5bUQqhz7ER<nS*lQSo%I&g%(MD$5Wz2?uq#=m}+XzTpD*HthQkT
zEGr@SQ!00J^9i_h)|AF?>;)~)<y5>*M9XRtSmxR+<{wuNSxe?{>;I`^c&I?x`@V{v
zc`c@nv7?yuYYUcRRYKDJKfz1U6^y6XbGA+YG2YsfZ8uHB78xPYw>*%gObZg-bD2U3
zK8n<KYBTmdoD7oAdepo72pY?8gWYFDIAQiE>XFc+L31{N?EN5c;I89FNQ96x^XToY
z&CsImjhsNNI<}yRv!C#qJ9Tje`|-FD4FfxH)xL)?Jy7W3&oIJOZAO?mZyDr8CJ5=|
zO#b~Lb&@yVi3@yN`FYW^U7pP<;eM^Sinp>y;CE$rcK=~DO!zSo3fkL2UMCd$kL7||
zhYl<};XsT1!r<8s6K;O77Rl!Ia+<dsaCpT9e5HRMGvf^?GiemFiLhWps<lY!-xuil
zH;z>NUUIx{Hf%W)2VEP!z<~9qxgK$fU}0U2Y~5Now<DR2SYHT!0(sF^Xb=tDn~Uvg
z0jPL7!gZ_HbmkN(%T^rS$TsX73O&Q}NOQ?GaLlwNy&6?)a7A3AJdS<*{S0%nzryor
zHf-|fQk1hX5ziXrK!;M2uwcLoaB(*07p{7Qx%(B^v|bAqa5ox9Ez0J+zg44T_7l7_
zDN8JGeu~%eihx@?XE5F9tw=o=#oASpq;@$Q)P|3Q2Mzvs-zrxm^?oO4jGIQrPmP%7
zjpfYskvu30^KEl~cU)D~DZVqR3w6#c$F-N*Kq1!}UdV+r%Uz0Wh}m9I)J<84N;YH#
z9|zNd@_iVzbuWs4ji8p<*;uP#Ln<*cv~Hy-I}#$nmReSF5gOU>Dg3Xf{F^6^Tr2$U
z#p|QnRt+*p`py5lTY#%S{U^Q<(hS`P4sbd7&$<1J(_m4!3I<l_Q-aAViv7StRDl|Q
zUE(e7$+l$|c8|pUw{+Ov&*|*$ds!~2vWqwOS%rQ-MOgRHf!*9w0Nm9gP|Xw3fInK~
zo$vxpKbCW=Dm&2WuDj5Jp2rMwPxC*=snP-QV|aQ(hFq03S&zyT)Xjd1PnCzWZKr=?
z@!fOitzpM}#j?!SH;pm{(&hS<^F{h!UcvTP1KEzQO}xU%mH4Oo4j7~fHtf6U5T_?e
z+e*|a;^%nbuD1ulK98aYjo}*Y(&%I82ul2KH+p0iV6&PNE~%`<mh77_A>=lu8E#^Q
z<}J`T&VeE))j(1&z^{PmAooNB(^wZ~MoZJLkvS}D=pdRVP%ZCo*JH;DJz(q0GuYL)
zmkrS8xC6_2ajO4u&`aIQM?}Ps-Iho2@mLW1H*F7IFXLdL^JTDEJe&2*DuXSb?C7}l
z8lE$H$xlt0&o6Vj37%^@;d4MfcZ=J`v~nBy`oLB^@}~km48Dh6u~HE8>LSR`zYRq@
zx3J}-ZsAd{ap?WUmV2Vsj%R~rWAFGOxG;S(dRPiZ4XYbm;@D{Lyr;lI$FHNb+(g*u
z<H6fC#Di#V5Ihc_i<8UDN#|@k*bU1<)jMC&>-Ie;Up0vR*fa=6xSFtI&?g$z-UsD}
z^hoKHkQ4io&Cip{g4M&sxc%HL7PtH!9Gg3U$xe8WYZrBJKUWvStT(GDZ+#Ih^Hbs!
z6&~~JFM6~2j`7^*^cb9KVkR`C|A2GuA>8y)h9na0xwM(9P-PEeQlmEF>)W>Q?Ylhv
z{<IU1e|gM}8$N*~Bz}s^XXem=4M!kS;{j}_>3|IXleoJvmQBAs5+#h!!)V2`T+IGg
zcm}59DIpKzmv|2nzinV!Hh2)XXE-w+a2!lF5(~E$$o^6aQ1aT2N#i9@xA$S~ism%T
zO3Z>?cr4mB2ZDPB)9$vjn2{+53y0R?4Uc{FXo@ZSP%{d~{wTsZ=A)SWc)_-n_lD1R
z-N<?eIkH1z(s1(QA>4;RQ#iNmjf<XwC0UPI0`qt&(%bS1*F{$21mhX>-{l}YsJH>W
zyb~z%*m976JBpr9sDm5NjZspo7lOul&|}^O)E@beCTPG9A459$-&+h;)@P@LXYs!G
zV~{=lf}d!y73XQDQ{QHHdMEC{iuW(@TDd^qlSl&hjZ<lDpE13Vl?CZFv&Cw&kGlGu
z?GPDFm<L|#Oh|jG15-1aMf<#Of*RZeg?M8ozRSUc>Zh)v4gEOqz6zW3P#w~hG<mI}
z-L5^f9}K27)kox&;NlH()XFdA+c#$7oNc3dou#FGP~<wA_xLiOeMO6Pw{EAAGoFi-
zPP_)0n6YF&{SBH`O=2UZT$yZ<4D@vm1^@a2R8YIg67KJ$%i~NyMN`Z5!|EUWoEM?&
z93Cete@*su*aptU@CJ_SA76jkLWd=;R3nA8hsFIG10nm!7znvjKwEmQLUB|QKi6LP
zJWhTQ8W-n4yJ96ho@BuF-V*LHyOu+K`U*I7ZVW6sV#^@rAzJA^fblkU*z>1|yX&gY
z9#~akBY9x_${OT9InnA}S5cE!gQW*$SjLTNE~CsI<IW^t)cvLS{*hq*+MUh*E}DoR
zhuY%EFKsY#WCiqEo&d+&p&;IO6R&@s1OB^*@ay9iVfzHe46RK$8^MNg-~I~*L=UB3
zy|OGbIE*&=sW55JBe?YK1$f#k&yQ~X%;U>kE@Jy<cyfLpdW<$kSn?9?X_&DuU#Ic&
zElT+lsl~XX`x*b<@E|z;31Ihi$FVzKYH{DDVS?G7L(X3b&Q7)vQu(IbN{`XPdj)7<
zV;PogsY9(b`QZC|6ZL6}xc9r0v2UIN4cRb)w5Ry7edc~7wLB5`Mkk{Cmz7-o*d%<b
z5y;;DZo>MO0Pufc$Z4oJiB&y3=(d6``80jtJ9-CDq04oYZYk%u#ecYTw@`E$I}r6Q
zx5I+Eix{k5fNs9yNGIk6w`h(M&Kv&8r9onwK<SrcQQp?<X^Rq^(=X?%wrt`qo)-%i
z_C`E;$B{p#zmdKSEdldO3#i2*pUd==bv>G|#v(_Jq6|kFN`0+JXWm<}WpC|pn`tE<
zFC`0|GV*Lpsu8=}VnC|)b0~7U4Q1?AVr_lnDehtxNS%HRN$(_J!7gPXl5>jl4sV9r
z@m9<-$DZ9E*uf>!Aof+ZTC8Cd#qLi2i~O@L(Z@Vzt}QKvT`Q5}W_EAFZw_Nv>*{A<
zQ?v>cUjBs2B1_yMm`ArZ2t=iZM7DKx85{BKJlyCsr$Hr~*oIblmOQV5OL@72HLp&`
z(iJyhpRzq0XPwC;s&zS)oibGFKp+ufPcEPASX1l*mc25M9-Y@_qr+Zv@s@o!O)$B>
zJN5&-guQdL^*fy3xRKY(I*d1zV(7sJOUyVePgy$q;ZRAtXkT^_rgoNM3_l2>cBkTG
z2My-9FN6YpT=As-G=5pXCY|d4gkx=nQO1QJdghrx=Z{&_aOVTywlo%V8XT!xEJ00c
z6)?eGml}n<^pXJ+px?Uz=Up2B4|`SFhVhwvtU($o`Q5_}?*zNy)v?UlXbFU0AI+9~
z?t{5Eg`9`A6Ft1Nf$6oFV!rPRoF!<ucdS#OSG!{HYWr}!bK*(efw)~bLw`J!otq9*
zDz{<%oHkUS_z3ofjbx3<!fr-qAKzK-C=z8~!S||J%&k%%hs;$Y#};>bB>jOOJy)C6
z_iB*zG$#sJs=#k(T_WU;_Cme-Ni4p83p*D+hXqI1v$*?tV6{||N?j+hW$6LzjbS2{
zeounL&VGJq>j7?2<qCW<VjX&TJ2RV!7EC=Pg>~#!VAHpKLgnhQylk(Cia9SFd36*u
z4=CYp%6gMjlqL4N-4)+*9M0Xlp2^~>?}BC181}k$Eepg{mhi%brT460_Z468s=33c
zyP^q{thCubtwpr2If(5vc#Ca1p)7ew4|H^;vFbiYoYyf^$l$252fmw8uk$l+*LR7%
z?^7fh*IQuJ?<cmu>&A{WtFf73KT$4*gjT5peSDJ1JOk}0?|BaDteEP0B|i}o&GN8l
z^bK^a`it|9D$(?hmTdbnffCtXk1JzGvxM1+?ALS!_B87dv@My8wkN*AIaPCZt8Eh)
zwbb)<I&0}g`feB--HumlKjGmLMU30xiEDx^aoWg(pz7elB84VtanWWtZFCVA|Gs=S
zv!WOcZ~WmFI&tLhros+BwdZ`cWRueLP=R!L8rR?XgWf&gc(^3ZYK8k<nB4$)zUwa@
z)ym?+suok$M&W(ewFmat3}81pw&U2trS$RJQ1*7A1=z|*!H7vM$lbb&E+J3Aq~-+w
z?L#$=eDMwAe)-b2-)d~+#09Wp&Rg`3Ne7i*!FVNq9Q-yO%>Gng$9<|iq$y?LopOCv
zVb%`om9%+rycYADA<(k4jHqF!G~Fz%fOUg5VN00~iNBlBll*8l`fnAK=#=2*u%q~P
z=MZ+i)d~ICNV>3Jk7n#D;+<COu|I>pbN@aEvW`OvR5wbVSVb(VIxrd&t;5o%T2aoO
zk8rg_ho$Yf!bf`xW`eWw>`-Kts3T!NAN5d$dA>RYK28PV`gJ^JN1Xt_f+(>=A)~W>
zFZf4^L)hcW=kR#uDVV+IIG1(R4{vGeQi^*a&VFe@A52d{Rkkry?X<;Ve~m#_QwvVq
zKF;6F7)ARtf~hbslr8a3=dQ_na%FJ_u+Srs1zxVe+u4eAN!X#c3HwOHvDR$wNNJk&
zU?aep+i+u{U=a^4<vfemV@JtMC|*<u-QVVMYxpkQJ;0DH&2z!-^FR~!33sz_KQ^u+
zocG<B0{`VWGvCG%u28j?|L|iM-oF;W6oWFb+TN0S1_hzK#|UUJ@nlDz=hqL;s6fBr
zGjQ-m0~Wo05iSpxWPkbuqJQ-`-XUQlQ|OGs=HMcxecFUxH@$-Lce`N1-J5*-W=XP~
z`3{^!fpmSC1lx5imS)NMu})!!ls4Xu83yOU#wp=+K=TRTUn>TG!B#f+XAV9;kC^2W
zkG8Xo$nk9^Eb6kP>fnQD<JN~78~enkcfUoNut~6y$-s!wtEuatBx!sZOn0)~n3;10
zHzA6nx~oT_XGjLRRy*PLLGGk4`<DOlt4lOVYBs*N7($CmCP0i}8k#sdi!(f{gNl+C
zZ2d%87{9|^wEo9Qe$g($qFvA+(E4I%YTf|$cW8XQ-jgbPcIiCdKVTFq-PZ&II?QNe
z`ZZXrw-MI%q++>1Mp3d!fuC1x*=tQr%#_-KO`bA%A)yqmNGP$X>n?+yp0F$HRz!Gf
zLZyyEU+ADB4DC1vEB?em=viqx(0URcE)n6Ful=BYq!rWESJQ&@f9Ti!fSWB=$AGv1
zHfq~x==<;&$}JSw1rKj(A8o_3+>uRscZpAQuEhl}K0@R|M>ziTAv8RCg_A4O$l7R;
zIPBP9+UvC!D~8>`{BgN7A$$zXnY4+8U$@6=d0)_rD~0@wV5at=hQHw+0-dYw<I$KL
z-foy9bGA>xo&Oz&<5FKiMzRcVeUha2vApZD!F>WPd=gHbsY=quN!Yn99boDw^tEe*
z37XlQPo?m?u&5h{I!z_`l8iGw&co815;V4W8aoxWfOP*z!zul(urS=4FP?WE`Zi=?
z_xxh;{G0>d77l>P@xc6sS7A(xCaIV$#jR!nAtUA*9!-`L@@Y;?cdr?3S)Y#28(xCL
z%uviT$*G^QSD(E_2lN~)yaPA1a+lIq^LgJF(WAeo>f-{uY5OXHfU!;jjx>g_b3%qQ
zI?su<JU+qOt&RfSwfdk?vz7XHUxDsx9XN8s2PltIV-l8e{5fkA7Jtc@WL)mR8i&ik
zEh>N`>d$$Xnd3;J{w7LD55(L8DJ%?5#D$6+&5ByhJx<PKhpg{HZ<`FcyNYOjk~SOY
zbB#N7JrnD0hM@HQ3Y2`=0{fCyiv2T`Y0pMo)~<PoOBKi#W!IyG-E=7N1G-_!SOZ+F
zRSJ89q;NudAvJ71i7VxUD7Sbep1F|>sh?JW+BSFg?~6IR`rZf@Np!&8*`qLV?qT?|
zQ<mb#D^h{tSN>3$0jt)|rp3x@u%`cr_?_BD@OT+SgP9U7xBrYuKHuQ;xaGJ)AVx(T
z975j5mcW+SDb(U4$LyL^DRibARSnMIXFXbukNS4u$ecSk^xkn)-6zY2D(2zjrOkXs
z{6Mr241+)F<(TT86S!NcQxsd&jjwZ**~5>)+<`-`G`Z#>{C!!C8<m6D*9a9FA@z&P
zr&<tOZJ={TJXk4w!FM`(6tYQ@K1l9j_frGe%{e~oU}-tpPt;)+zqjLm(gZ!evp`ON
zGgOUDVR^HsGy5`S+W#|)|M&Pcyt}SJ9+ypF?pSH2Dd*2AC~jms-#Ww7`*v(i@e|y9
z;V9m}c?%VVUdyMF7|@Pd4?j8z@zQe{ys@PSv(8$=>!DBJ_C*ki=JQyMNC6z@%HZPE
zCtMkv#P46^*{LQg<m&CXGkcez=h#%(v}^@SxcUTd{+kW=stwrT5GOYIpAR#&Pi6Lg
zjr=)Il2sU;$4ToCvK=1^!FRhll?wSk{S!fy`Fkf@d0;PGSmngesrra}RytFC${bcx
zpU55i(<eS5DnYBlGawiC3mbJ;!}@6>Sdi%`aopU4qWsLKXt+y{c=h*KQ}0Au6GKQM
z?jFuoJ_pZK?FDk>54g8z6h1dop-EDMv3&JPX0$mJeX$Xr{L;o<xw)8=Pz%9t+OhGP
zE4A*w4M*>s;SQe`$PJQ5p{?}=rrtRQIuB&<W04AZ&yr@hI5CfgbC`l$C_X!%%-*P~
z(b3n1kecYo+Um+U+1nDlx!*;2(6*5I)C%;#-_o>PbtFp|WKJ`iBOzQ$i5=Hg#m8SK
zvmY(-;3)HlOW0tD8{Fc>mCD+<e|k5Rp4A|y^tHIGW=MVRwgUXrAPA4r=c3fZOs=bX
z19E>3i>CIMxY&2=Q_rP&aQwnr9HgEHHP3G2SL-_%|0A8v&=fLQ$1`9^vl5Ne8b=;p
zD)>85f-~+?<3HCIV^aKBAwx2rjq=o_?N8hIYn^rQ?ne#04h`jw`VVLBy<W6_+$-o)
zH-|L?6sWRlA^3Q1z*w6i{NobLnjhrwt)ZjYnnz>UqoETic<ci_&OYL9uQRB=#uPLb
z4q#PR_d?S6JKT{q5%6%J1=Eyt<E2OIu<)O2S>=p5q<L&QeR=0V&37+|;$Av|e2Who
zVK9jGdi3xUWo%%6w+lDMQkTA7(xeh6#DZ2gAwwqg=oXpLx^4Thro0W4Cnd1I-g>yK
z(-9Jn3DiE-jgYtLpO6RjhYGz;c=?|el@xD;mQ3W0D$e4^=S?EXa#OY{ei(JGEaUwx
zHsQ4mLSOM-1lxaO5Whcb7q7QW1hIqL_`hoe)7ZAP{PmRvqUZDLaL;r(rlLI=Z6vEP
zq$mxYx21C5!#qIh;#JYg$km+Y8UrwE8o@S9JA+3jEui^_+F_MmJv1Jx;CICLg03)U
z_xP^N{-q2gV-HD`+TX;T>vv=A(~okNGK-jI>ot+I!eQ9H?gB(VOQ9``6iF_=3FEJZ
zu`q$C>+x<Lzu4K4Ex4<V)kYj^NI68|cZZY0+eDZv>@;dG2eJlDbqtw$1OoblpxjxY
z&kyjx?<XV4>&gh2Ff@Yo^v=W7ojgqOv0$}(R>8~PJb&idV79zMUu-kx7S=_N!F6g|
zSge;FEX^;3zcC0ebJW;YkpUbjQ6fQH%wA>8fZ=kw0(Dn`CG5Q;jyzP0A;z=0mAP&-
zY{?C7^L!gz*R9C5xjcl~$@(C<`6#pb@Edjsw7Fyci<w7=k)WJThS}%sq2$*K_<qll
zbfz5S8ou2}jU~#U_)UW4=G4JU<!{{dQlW^oXb?RX?BumG1-qGCA5=Ae;D-MUrpzb%
zdEU01r60AU`D26G;8kjrepZe)R1akrybB>YdNTX2rA(i{PN!qHinxbLZjA4F22W#x
z2)o{MP4kjr(dT^EidJ2A=;BbeH=&U$`#p)hdRxuk{d@<@rcVH!>2ty2>sQDuS%}t?
z@<df*8^mGj%Rqgm(DRB}!|YEz!^LkOL*OVSI@!DjE--{oZzM?ZzzF8Jd?X(>Y$EQd
zGh(G(pSg&mAxuG-1rHtd1`f7zY)Q)&?t9fUFpm5I#lrJGtkwiAS39w{XBVSe`Un2t
zn8y%1?kY|%Ps5u}pP})pR-9TC0$R(Gn8SA~7Fn|e&W?|v&FTVKFF%^RS`9F?eF>C}
z6sXs>*3h-ynT}{l;qo+XD6%;RYj%yGfxg3qb6E%H^j49^4xf#Ne<bmUTNb>D6-+_a
zTPP;)2|jZ_1)}@YC@CftkL!-2sKn{C=fw?JIdmt@ex8Kaf`+q(nI@<@Ae6UiDF$0r
zcj8QynM_bR{un<KHw$@jvp=@b_s$Gzj(xy;GbW36zs`j7Jp-v_Sp#Mp_(IBv^IWF6
zCR8#RI$#*bM#ap9oX~JGxUvTIP8SHosA_O)@}??_*-$<GIPbVO9|P5bS&pv~g)jMn
zE$@j}T~~u8Ue|e@$5sp?v?({GN<7fnAJQKDMn~6r%>0om*q^e6ZnY}c=rn{?u1-Z!
z#A0}<q6l6v72jUehp#>>*|r7e@bo+tP<)%o9TlFJXFG3#>6tM0v856G%0@AtFo6-!
z_l+}oQjS)s#%#V}0DJXg0RHFSh`Xco8P@yL&f<K~+r)E2NrFy(;^0I;CM-ETPt;aW
zkG$1qe&L26yxP$eAxp6yN%R_$gWvK_{|@m6l0zus$4F+Oqrm>VZO_>pUN36@mc;^%
zKf+Y61K2+E4MtTi=iE;J0(DtM%3i)7C#gl_Z`Yx$+(e*&`mD#;1sMoTg~pYB=7XD)
z>z7_j#6i8TlrQ1Of}3KwriY^-G%AMqvlAfU5G{HPvsuH_q0DBID?9xlmQ9&DgYpK=
zfX|zS9z*dgrgU%$`B625X+Oo;)v|2XWP9P>y$e>@rm_7wyU-%I0c)T4azBf`VQ2AY
zh<CgX&*d^8agzh=6}n>eGv>4XZB@8@TNIP~B~Wfvtl5CK>0p*{j1{_g(x8#6sCHtu
z=w?TaVB}iD<Q>hZ_sl?k*aAIa*KRD#L|;N(@N4*SVjxZ7uky?0mc#1HTG+BX4R(K(
zg&|HAxT#Z@dP{D>*<~n}>z{}Yp~pbFI~4~#kQZot;kejFpB`3B;-t=5&~~4{{Ld>U
z6ce0GTXr7?idUfQDa9~zd=lNR>Jx3e(+LrpUQ{*mHoTb@MXOef#rrw`U|Z5W(zi+C
zH@vxm8?PupZe9eYNgfu3T+E|sSCa98oicV_m*o%ZTarSWKq<T@%?e7?Sj4nkC=>R~
zEATml-4qC%2ZfB2t}1s!U5&QsT!x2Vp1`RcS#ai?Hy(7=$CMUZ%6aw_#tXj{wXvg^
zt!@a|-xcN<+w@t6WH{a%90>iHx~wMRBAV!_LC$6^hG`b8^x7mQu`8HnKU|Nqas+x=
zxdXGEHU+NSD+7MPC)j_}g>L+u$2<~yF(GycJ7oNdr+{Ye>arv(RtTa~+ZrKxqXpab
zS1`Gc3FMwT9>WpAlGxpT2e0mVgH@aC+14jZSg=+ArIjyYxg$#;a)%+4zA8z>2I`ST
z?`mrBJ`at;d|iEL4;aMv0SjM*A2Ku9&d4DsaqcKYosgr({LS#U=?z{u;|rp#uIx!@
zIY~}*rQ>Ih;p+L*g|qbwU_aY1Wb_g~(x(rHD9FI0C~4T!eHj+}F6M8^RO9P?rEFtB
z6hHn?0W9BBR<~HGonN>31AnhYlO<|#G=6Os#|haBd^#I~N{v~r)F4V0@^AF&9~zYu
zf#cHO_)NYHhlRa{vOn&;fn_TOb(i6{M{+o;?jOh|H@YgMJwTOZhV*Wb3R@m+2e0?~
zq4dNM+S8}TI}The*iV09{@+$um~w-k9Fzu?&W5y6_`Mxj7{sKkK7dDaH80Z=D;Vh=
zgzwKN!TONI;*3;iaLWo@@lFcIR$oE2ExYiq>0X%Jwv_F&`^g1s&ca(YOIg?Ha@g?l
zDi{WEtS4&=%sIbSpa+j(hg(8m+5Xk6e|`j5^sCb12gYEzZ3~+orbGsRH1YeYo4ih&
z747;jf}5r7Me0FPEJnhTKlJ67Xye>q`n=eb{H)se9D#Xp)|O*_QL23IeFDW-j_h#G
zOjg&U38Qt2vFLRxd@=N)c{Yk5GJWd0Yh^p;eVoJ9cLu<s9ie2jFkck5z>K+=O$G%=
zF<%l`jEgR>6=r@4w7W5x{XJsI<|!C3!#n3;_%Wd`my<$^PfOD7DT)C5?U_l^7o2Fl
z4K-H3<M~o^lHD|dRStax`d$<Hducy;d?!OgtTL$UiVAVEZO|Y55EXYF#pl9v>$$T}
z;L`+iGpu?*fu->~&Gx|N)En5j*%_|;%VUM!3L(pBi|?kK#gp3Be5jT!$3@!kgFjl~
z$IJ0haxDV_&bbI@02wejqRUkMzM<rFOFDJcjVWw1q{5{#>~fr7={O=#cn4d<rs{!M
zb;p4#>JFk4W(KtQj2Q593N{tn(x`$+a80=nYYZ)U-(!dIj4&Tv=d+D>z5h*A`sok%
zFw=>>i<`_Ib`PV2OLpM7y%{(*S9l*h&4V8re(*zS3p=@P1bf{zjGk_ug~2lhQlLRI
z?tAzFt<^Jl6bE8_Pb{cg-@xz1W;A++D!Im4&>FiskiYqZV+(}N%~?Vvg*X~JViXK3
zwWp8`!cNC|05yAkf|&=@n0r7rJkaqW3tuffFS-w_RI0G3R1qFNxQ1_TFJ#)*$|M~v
z!A^{PfnoV_w4m!Xev}?W|LnS8i5sJpRyRd*-_OGK?0ax%)g3ICOl0*9TVZ~YNTA6&
z3nsR?pwKh{Y;SL(^K%*LOt)dW-CfWdVa>};y@03Y$gr4!`#FdAQtag}<VG8-K*BjY
zy7_r2T3;!Jn%9l^x}}I?Wh$(D;wQmWtjF@3dGu7d4`)($;#r|nGr?4!R>dfjI|$uP
zh3x{B^s@N+n`!*om`gBqoiy1eG=uduH|qMD1%4O3fRl=4Cq$E&!uwcuOUEBW6ig^e
zClQpQ27^pZHU4(o0)dfIFyAHxeYWa=LtPKBOhbHrQiU?!E3iXV_h6x31&*<ti%&NS
zro4ZbV4+(AD3(gFkG`dl{y>^0_HTpeAV>68FXToG9HKw_4&vqj#=4A!EYm0}v@p}8
z)9yo<{QGvi7h*svZxtwii7~B+9sxO%$1#_o3Ia2xg<p+nc-Z0!T+J*ZzrH$wYchtJ
z-Bn?CK8#~8&9(XWb&7oC$>HpXLogmsIfr^rMuKnqM99nthK{8(Ai-)3J4a^_9Oja0
zt2%^e)^gF=KcUq0E=-#2Ae{ShAm>UO8VJPBA3==}*Eot5W<J7<<HlsLej9)7?LQb1
zZ~*KBe_@k!GM`bj2+B^&Gu_SRT%p|xa5Acerv)C&WPv0z-SC`i{P+_jSGW>I+TkMS
z9cVgx9jg&K_x?k4X@1*CHm%Hy)?FJ4H_i^^l|IX|+g|>{y|)42_<I<6{SFQ}@sIna
z_XU^Kwt-a5CZLdCp!=U-7Ay1;pS#in^XFEHXZ%<W-<waP!69FE^^O}J4zXlo>%~yu
zf1IDW(OjUSNy3?gZD2I~FP7`>;_ZAV&?V1sNM0br+@b`s*~@Tzp+A##I%vbfBk!Sq
zR3g5%m_%C!s<OKy+tGVvIoGcHf~y-Ubl>0Pp-GiBJ=h=2e$IQ%nv7DI{0;>+{D~4(
zrM=`NW|x8G4t0V4C&@;(3MP(yjUc!Gl1qBJuybl1!OV2*nDO*stTxnzt!nb3>jhWP
z%c}xS<ZOt)<;Ifc?15_2PSh+E?&CFsM4Oj5;z_;T+*${7W?F6oE9`&56OlAI-O%Oo
zq6)CDVK=nu3M`0WGQi2HLgelr;?3%d7+gNZk7>9qbT@{xk^@e3KFp17RD6SRy0_Ss
z=pUdqYdZzCeS-hKDl(>Xm-(F$jHzyG(EZd~agVG9Gf6Dq<$vl?2Uvm0twWGgBaQET
zg#5RVF)t}ThDOyg{D~cJu+Cy2EzC=IxolHN?MH+=_TWBzF-@1P*{y^MzCX}@yen*v
zipB5W$Ivk+DfZ>14*OUx&oYY3glvvF<+SaC`mI-Cz13!X+C7y*jrZaW#SC1Yorb%^
zI-yajwk~gMF6z$oLHE}uxGWh1N^KHG`Pl>)gluX@%rX}C;3q6>Rpv`QBjL)P<GgIl
zWt6#plwNI-BJa2MX!p;W1uylW?jA|uY`kB*vP+o7X70s_X(!Q4;NhJ8a2qS*T-b>C
zAmSS)aIY?<!4R9{yed5qW}?n)l$8vL+}6P3cj`<mItHr0)WOu#SU86WB*m9Hv{Ex2
z7cVuU3nMvDe#Y}3P3P00DT_r5C7z?I{X4ijTN#|c>Z1FKW8BEOUm-<m3%zh}g+20n
zLDox?W|=2)Z+3)Hm@#A8Aw$`=j+3GnNlHS!YYcmNz=Sla&*0a^86<w^Pg6}Uqj9)0
zl#I_u?f)#O{JJSj-L6Rof1B|AVP~;wP7ZGtcmsw@M}k|!RGfUO5JLG3u36<D{C%$n
zJz=(#yV8U;R>@HO2R-z;y$xR(JmXHJ{=-P+dcJjMDwk4F35u$B(f9jZ?xfg+tu1ha
zOF}j_&pDocCH)gRKmTx5U<?jjtx6@<Ie0MVF*h-<8WKExaM;6iaqvwBaUbL;<%SDI
zUeCrk;}qD*I6qcdl`R-#Y?#ESAiOrGUod1^uqi%+h@1q2mF*7k#ZwRX*<Wlh;qnMJ
zEh`FEE)Id?slD8lu3`vVBTXk`hf`*SKkNRM!2LFKqRm+c@zllf;(He@;L3l!SaQ3T
zPj%?Uh~cd$Cr8k1V@_%(uRw3)aGb0^kkyCVz;riD!ID=36EZ%)rSWE1G5#~Gx+v_=
z=l<bK2cPBKjut}AEoD}JbUvxYO5qS|U#`s~MLhG?Vti*Zg7#>h#7CJNnGVcEr}Js9
z-$UlJaMv>K+=r9gE|D&)@EQT3Pn=j}gPC}zs0vk1PXYh$KIrs0mZmOygM%AJbE8)m
z;MEvKz9Cv2{~LP@3ig*mReu$XeRPfQwH=AI!R11PqY9$}Cd0Zb`7ofm0zS>PC#Nq4
zB8P7u@si9v2ynEZRC{Uef!cMrelrQB_Kbm?hce7AC5?L<VM~VAN)%*w3*VV(3fb}$
z^!Gc0N|(>VZ-LFQ_-ioh%}*10#+=Y`X~wm22F(739o0x};G}|UfS-^;sheKoQO*iV
z)s;!IHW|Y*7*ldd$BhpOa^CI0;Zm0Dg7yHm`ivuP?)ZgA+qKa3fg<Y=`g$F9v20YV
zG(1e$i7RDo1pB2ETlvonY&_2MKP!w#$=`v3mv%tnf`cp!BcNxgG^V=$g!`*hDPnOk
zMCn=4ey^*1qW)K$-N2Y$v=ZKP4o6%4VK7rci{#QrvziNHct5NkR*eWm&*xGgEA^P~
zNXg(%Zn=jy>gJ;7!$%-WJOE2fC*a~6H8@D<6YQ-H<;DkHh3f)?%PG?Y4y-#Rbj{pp
zWVHp29~VY0`s%Fl=OgZ=auzuD>BHS~PD0jsJFclWqA;i35P!&(%~lGbnTgBEqpA|T
z&R^thS26KtDOJ(byPNpSUeoEkS0KE&FBYwjJ%Q=T^XOXQCD6Joi{<)a@zC)#_)<H9
zN&Pk9gii$q$x5>5LAm(Qv>X3G79MwwVyk}S2zRepZ1dk*<PCnK@}F|_RP6v=ivjq&
z{~Y&wW+?=WK8$mZ2nOyPTZ-Q<#_%I`Xc+ku7Oj^MI>0)VY&4O4$1Nf&TUS2f&R{5%
z{>pa-565XIzqmNOw_%CZ>P*^q5J<bWgQBm4sDE(_r#HD41Ndg(@2FE_(hl(-IYs<7
zbG-1}YM@nX3A`GgN^^%>;t5Ahdi=IsJm>m(zS&>cCwB?-{Wu3|Py5JMYS%$=*CLoa
zD?{|_&<awVI*4UUY-I~~I^oPKI;j5Go`%1f!Y%igVkb_Qb5pkW2y-4`x8yt+wk9ux
zf1N)taDo)I-!x<vcNSBVydo8D3S_SWmqS6|c*5C7IB}Lgo6uVhf0kwmbl^E;JK2h+
z$$b))UPuJNN5#Ese*lFJUtm_<V3Ha!1BV0$Q}CNh`0`VR=<nT+(7uA_TUUOE5&FxR
zv&~N--#eCWjgz7E`sVDQ_e-dXya>(5525oXSKO&ziw_IFKwsow_GwHUth)3T&umx@
zO-1|gPH`d4neD{2t<Z-b$}YITPn+IJJcGeGo2b`J4$tas!rAHqQ)bF#?&@|2aA?{?
z`D?9EN7!|b?bo4wTKY8O-C5XXY)b2wn_)WZ#tWB!LD~yT*8H&pcN9H=bGAdt{@p<s
zyZ<XpwsE5L3sP+06hByA@skgCH=_UgLnuIQ0JBw4f}g5dP&qstuAjE0Unj%S$D<q*
zRFv_Uhp<L;-3}hd>Y$SLa-wBx;r6bz`0Ge1U$erOA9w#UCeL%GVM7>pM6~mrTTXJ5
zGIOzW@HtGM{v9Tpe1;(tl_5<36V_Sf0w?UfUPVu(?_;DnwJGIXv%Mk<;41lYE)^c_
zoWtzeOi6jz9QNhiO5A6DmXB4`!@cQgwBqa(lCx5xl`cOZYW_m@Ds%uv8tcN9ORh|=
z{1+_I=)?707jWUBB1|7=MgbS^;M?(<bTxe&b(xJpT4#xqPh?<uj{zH8TL>~^g#X>W
z5*B&M(`>(=xUAZrHI11^T}{?t@OB^g*%;AUVNQ~f(<eUaoP-JozQBX80-4#W3k7pH
z%YHwY?S5dvW_{Jd+Ws84keEj)rVDY7U^hH<W-xh9iRbqlnbG0f<MES$0;*S9v3(D%
zsizVkzD%CYlgk%b>xiMb%AB8Z-4hkZn8B%vDcsgy=kfOg9-eAs0=ziRy{<s`_B0Dd
zO6FkX@c^t9tYF#|(!w)8k(Q6K5E=jVV8#;>w_a9<vAZUcY<975E)dZHUvvB{S%o)^
z)k!<-8s}8bM2au{X`fsR3~AATbTuKXYjTczB|isyza4>^R%vKY-Hgi}2jJQhJGfu+
z?QkmOdYz)yU>Nl!5(e)OX6X;?*}>nBV1M6I$l97n%TCw9^KH8LCP;@|q7Oh%M<9Qr
zXA<9T*^Y2-A^n_Vj&9XGP_ROSHI=C`-({{eRp}MqHRA$uw<YNEtMd>m7`Nu_{0!=<
zLf1~p11|Xf<>l{RfHlou0L;g-EB$|AvqBKp{AW9MaDEKKx`f?|JGDqAz<bvrbi{lJ
z?Vh1tf3flku221p(cM|_?0Pu={q`Id*02f9Zk2Nr1q!a;$X)ebmx-Ho`y$tUNP%T`
zs?v{70sKqlVk}RXOa)%4)V+2!b%rOAKQrVb?%hI#8x2rq8B2}ZttlXXDvUodg`J%e
zN&AZQXz+FeUR<~f#dd4a`uJY7wynZ73fExxS0_ja8jfWX3czt!9JL;7;J(M4$B9a6
zc=baBY+mOEwdXiiZE7Ks&rPN6v-LSg%fW2TN=+K@O^?28+XIT77W78#5hf+tk)w_T
zPL#~2_Jsx*9}qy^pM|sWU^fbU;R%;tDo}vXMT_@*%#D%i=XXw%X6aWZkd%(D$SyB{
zt#0|l-%*?nQSQrGNbf=JnyDHKJv$u#dnJPLu~|&Us)_qzJc0^k$FSeSh5pr9M#I*}
za-Zt7(D!u^o=ECKr?GG0%uEmVtWk@ubqJaN@d{L1nv1pHC54=gFHKS?rP`k9c-3b(
z#)XRc1EEHg{Gk;8{*%NrN}7<>>5KXm+SHTe07FB6KxWEB2(KE2Du>+B`H~mAEIE&c
z-<M@U)}^>J-wA#_xWXOC5oYnJ)3D9-AMD>W45m3I3wH2&{BP-GwqW%>tSc;_NJ{|w
zk{%?lB-r+RBC$5X+&;DeB*H!MLF-s7@s6QcHXWEBzEiMANK(+=W9-JRhr+Hl6r;Bo
zu;Jk^cnw!2cEI8+>=_`9>AB_9R5_CUnfC}9JT>sHfgkI*8Uw}CE^uZyZgB-OywF&<
zQ;*SV<sM)4qxUGqmJIITj!nDHmG(Tv9p>>&;+6yZ##vL1pFMx!`*r#`>pW~7Jb()t
zIGoPk74u*AAK_NMs)9g?8?dYySly7Rbne+DxL2CbAC+(e<Fz&r5!C|O+k}jCL>9<(
z`mixNnv6=#+2~|DR;=iVJCe;{uIDiJ&R_;JmC<BnqKn+%qw?stpdI(j8_l+s&tSRB
zW}?FcISLGTCon#~pm&iuY>78#wb6#0#Ih~i?^Pq{it9)GR#V0;ZT${2cMc@e&Ln&=
zDG0hZ>X77zJa}{X13U@Xh*{3fe38;V-2O&`I^t&1@X<-)LFSIEY5i8%5i5!LOOHZ|
z&vbzqTS|6EA}~+w0la%G$p(J^g;}q+qwZIE+Ux!j{tEZ^#gZRjVc7xR<;n_DD0+sL
zlFn#<aVLn{I5y1Cj!$}T%C2l^1eg5%;-*|J%6hVy4=)>wOOtP6nRF5n6Ygtk2o&sR
zV%Nh`RNLo9T2*;CRCyVzk(a^kar?pJkvth6i=yRk=J7KhTZtE~HKSD#3bf^XCmz1B
z1Fsxb<BNRFAU66gh6RqGuKsvx9VkWN*QT)ax@n|d^BNsDOrXk-H8{d+G&>WbM(!iu
z<IyZ0A7|`g-SdW0bJrOho7je7O0OWdvIZighO--64&oT~ILJ~4Qhu&NZ?<PZ)%7%L
z@|UEYe^;`CY89xTRe&!A7V7bf@@!^MHOIIA$Lb=a*qj%^G<l3RH+f3}D=2g)>q37x
zy>S*7^JE!Ic(@u$w!Xlx-?TBw-i5y|zKDLS$FV(5c`Se9J8URegb(zJ_{arL5UszL
zGwcUC5}nR%-?9=Nx1I$Xjg>Grd_6l|nn(HRceo9H*D+wOI;~BbO4G81Tzv0J@S7k!
zzri~Avuq8>84q9<8K#(c-ycE>c3{wFfkU@^6CUqA4TXRA@TWBDasDm|ays$@LC1)l
z6WM}hM=uV&W5kYs-UspDv*7)QV)U9AhgW7u(vGyzaBjpc_<{X=e&sXh{gsIucHf0_
zeQ!DUxM!&LLy@U!dEr=XK{_@26*p1V7r6P)A*3^$MZ9%q2QGRt&1WCrV0So;^}EP_
zy8I4`h5l!Qvm))ks!F?_`$LRcBs2}UhbvF4#T%WCV7D}nC5Fr=U5)t|>Sqr!4VU={
z1sBeBwLZ(fGKWoAID$?6Y>#c0K3q?%2PSO#$Z77Cf%`^0roNpZ9?|=Y4-L(OWhcCG
z=Yj+1u_>OSx7y(wk954YLI%1**3%k+iS%Xe5Inr*2JZ(&tcb3{WA#Mt)_n&mN{PmI
z^OHp$D-5YuQ=OhK6gb?cRG4q`e!AEd#8Q1F$glI3Xspng?Xdg<9Rj~erF=UMh;e7T
z=X3xURRvXi2Cg4?55H*n(W<`Fe6VB$>#gX7vtO5sCraMoy2WPb*>@ganLGli{u%gF
zdK}v%*u<tS9SWsewqxe=P4(iHy6oW4F5Dv{<Z3@Xz{7V;sBqy`95gbUoe(Y_!`>#M
zQ_*oaRXiIKOw5@377cnh#*nu+G7$QGY3Sq>4Vjl7LFUHIY|<~``!b~h{A1l{ch*I;
zT%ZA?4@aZ-t4@I*xf5z)g)UvBCw;y#4WhS4v!!aj@O8>=eEv6pMz{~7-__?)&nyLV
zpS<DxUPQAYiqYu&@f9jr`mu~V7IeL_5+?Ugfy6(JY*^)FI5I_^cApRBE)>}?Pfg+P
zwJWot<vY=RZ6Rn466}&A=2FPrSHREjfR`JJ>XkN0kiX;y+?JqAdfLx0tIUE`9=i%T
z&K2;cycCK?yTFz-fgARBKldn>X9<t@vQMAyqix(Z-n`3!=?^htxF-b;9CW5Po>FYs
znj?JVjxy|-qeXSy?r=cJH0`sQ3ZD<!xkf0}aW9J%SY?(q{WxyPPknKr)@0pNaotNj
zmXWaqx9`zry8dBUcVIW{NOoaIYS!YKh)q~K&zcM~4ahm78oxX5<C=sH^{O^c!H1Lv
z@4PDnv+-p#oN3LJs?T$G1Z#ZRbs4esVN;s7bu+xI&cgEBIgn)#g(vHt;)IjZ;G-G`
z7K$~{EU{4R*OJe#T%3p&OEWnG{ZkOVUW-j|G2{(eRA@td0aTxmp!ACZ9>7z`A-ghW
zXBC67+<k29&4&&b0>26w+WuaH-I|z1d95Eos%<XD%NAfsI$)sc1MKt5hR0vO;IVD9
z*z~sJc=?+Y9o3%#LCG6oV!fC@a^IWuhvbSLkrj0WK1A(|P@0>s%I<G@gmUv*A<xa0
zY1b~|-#8B<53O0)(;Q4aU#j@8$<4Sh_Zha8aU>gc9$gQnp<41*Xz5m<xxJR$#Y!=R
zFNsC7Y;WrRZwcOV$wtT7Qz`gp8s$zGn5YYbvG&wH5EXCX&h9Kc`^ruD{yZJV61+FS
zEBV3HG*p<m`J`ad=RTa;JPi}t{=wiZ6}BKu%*)1x(T1<SR9Mvuv&PgSyZ9X2dnZ#%
z)nuaH9MG<prXbB1n2@E0c|%&Dc*ha$)k_t&?CThEtIvn!o5oQ#PIS#{)kb}lAAHun
zN?7rDJOq3U0^jf$_*QL6MLlg`6r#k+g+1M^_8iQ(=|n1n!l*6q2|xMOu=?NEFN20k
z8Xn9r<BgNuSig_3N7kwZXbz-3Khto2{!cC`D2c9>kE4rz;cRo|Z0=I)R$LLhjP3oO
zsWb7$>W%uhAtXhKq6|^Sj7^5;>^ljG=9C7dQ5q;klakEJ5Rsus8B%;Dsqmb=l@cjQ
zk_IZ3CMgX{H2lu*UF%)z{R>#f^BniR_vdq6wNoiKbRextsuu;BOY<_PbXciF8(uC<
z<GiM4f>`ntzHZlHUw3z+!iyYAbakMmUF9$^S)Y^&CxXl(T}1ao<hry2k`wjVG|fN!
z?36szF0f|?_ENOc$AVsK+lwRW_JW1bH@T+RAa<y$$AscvsMquihT08adO`<qwBsBW
zuVaC_JI9i&a7Vw}5<`(Izw#x%H88pH7~Btjg1!kOaNH|KkE`A3n_nZAjhA9g+v~(i
z5uf;Jcb&n|_7Hx}uR+CLj*j0;#PT^gq}eG!g*%U8O|=o5IA}Y|@&xXLY!5c?97;{;
z54rj{IT|o7m>F@4*`u;R_OIwZzw?I-d~i<3fwki?s>7IkE=S|vCJC|;STYmF_u&Th
z*=&TBn|RqmVdrt7hy^_3h&;b?c4L?0#F!KuGiNbrxqQchN<;YeN0sB@ICVy(i+(UU
za&{fU+WAX-dgDBb)Eh>7FX+Ro{$UtrB|^5+6cV{IvGTJxY<Q5t_6PKG;}@NTy2m4E
z*}`y~qI?ak`B0kPKaSab96=QqYvFda0hGxk^Hq0&3TFwshMXQ;vU(%EJQGKqBjRk8
zG{fL`#0{?HObLpPj-gV^Al5j46Yh^p;a9HO42>&-VOeY(HyM|3%hGtXiXFyg-H~U1
z_dG+*R6Azhewba~y%X2_tb(&Y7+Y2u!^As^K`h*{B~#uA8S0f(5UxUduE+2Rd5SQj
zb0%Nhm5oEz9l;K@cX;a21Tydb0`Gt1@&T2Z;9q5lT5qQa8Df1lZkZ#kGv9y`qCa8*
z1Ptw4(%CDGNIqAu2V7e6_-Ky+`ZA;&J@yK=z|^4>YY__*BPW53`%TeanntaIwhQmw
zgY0m%57R%N0)dUmAb#42(Sh5^3lC#Vj|rVJk7g<b^J#s+N%%d`jG1@FVb+f|+_399
z_^7Uh=hrL4hk?ROquGHOHy^>fDp6#D%jsVV!?Vivl<EEojPDJnt!b^Cf21&roA1Vq
zr5%{7+X!mwO-4)oGz4=&W?&x8eH|9gx84%wFVj}C`)hLP&L20LBi{$hxB+DGd>+(k
zzU1uv^oaSdW3y-4u(NtzIClIRa@x3r4b4aa>v$z*bS)hA+c`2-UW%Q2=z{u7wNPS@
z3|n3mjDK72bNP;G%z22wd-6EQ=@grR^XU!v<-q`Uvc8<pE;nN~Uk-!$^w~`Ewgof&
zF%#9y+C=9QzH)u{hjTtpci}*R*%Gie69%rCM_tRL7%p~U-QGF4^ANyTvPOpwwygPH
z98SFN#d$tIfonoSVZD|q<C5FZ=~p<Peu85fxicu_tOhAZ9bgR`yZGlQN#jED`Nf69
z>FUcw*y3BsbzG0boX}XTT&_q`kA0c!z88=XQHcu{Skj=b&*hu%e1qlz&a{706An_p
zf*)GiaNvLAvD$w;xe2}Cr^`J+)4>Pa15^0y^pjwI;ugLW#o!EMJ2pqyX(wm)g2J0;
zXn1rl6$rf+t<59Y%k&{!iGB@d)i9nVhh4{I3P<r_-Fpz<*+R9K_1GMNrPcpE8s?dP
z61X;!^k&EzZ1S*Ws#n@!j6ovykDE+moTDIa@iWeSZ8v|;BobEZKEN<HEAi^4G${NY
zjB5@b5g%VEF!hA9mY%7=^A~&zf8?XN&zpt!{k|Bu$wDEkB^xFvjUb=cNKo7L5<iRo
zpopJ{PMw!wT<T%0U!})y2@IgDkyRk+sti#lq%fhWTl^tPiRnL>E!fEIVRJ|`E{xia
zHp6nnLLmsw#u&2o%jMbF&ttH3?QirKcFU<f^QijWTilnng1eXZ0K}_%Kvios*S$Fb
z8!}y4!MrPYMD{A2aJU2$);xsLM{WFkPk;9JO*yYCwHYOsKE%^G$)FQDhKAj^&Slnx
zF;8oC_T-!{Ho5-6O}hrO15Mo^kuS?PJh7t>OOJxtj=?l#<uh*6U^O=Kyc@<wjV0|t
zQmkaW5({kCp-<nR!Nloa;4JKyt_q*a#P8!kYjqBO{A9*5w%UmlI-Y{`^hgH#odo7$
z1f3Q#5+_n_ls}ZzrkHWlS>W|_xTU4finB8?U{N1uvZMl)Moq>!PrWd3>^V5|WfDrL
zO{ZHXo8Z5_*TlbyVnC}+k-?Q$m>MSV_>jlY2nS~RF&a&5Y?$#IN0!qdaPdPQgT>!4
z2%239lMCh9g2czT;lm;tyJa>l-<5_<pQV`lE=&H_U3pmABK!_TFSvE?`B2?t15LY9
zpxgNXYKmGQH+wyW*GW;x=aI~I#dEA1<$#;6d9laB+;!-)4|rm^64UIy#Q*zv5RAs_
zL-VF|)LJx)4XrkX(*c7aw9T5jblRX`vM#v^=Lt81+q}A)7B1>_XJ)sBUe&=={5Ld9
z@DlVu+(K(Al&OL9y`B&kFU^v!rje?FI#XMA7sHg}VOeejr``S!_Blr}`;|}mLcbU`
zbDBTaRqsR3Rnl-TJAqBjyUclBS4FF>nv_|kPh(PA;OZMI_Fm!z^0G#F{9_<~N=f6K
z&!5AZbLB8{!e*AM;7=2#iCDLr43o9ig^Ia@Xh!Z@YVdgj)>GUm%-K-r0ISorEe2E`
zmW^i@?Plw1E<<(hL-<hg9@<{XGNT8r*z|EW1-dMwL&rkEY+orR7W{<CFXC{YVJ4by
zN`<L?2Wi6RF!uh_KsG-nLGbEqhx5~$F~>St0FOn3o0B<({uBE6m)iJ?=f*Sp0S&OK
zLXPQ%-$S|4FL-?yM*R}0?D=gTr^y~iyAO3(+3*PKXCOCsn*lu#9031JYB;UaOQ=w}
z6(qJfk^RsPlrR>d<Off*@Og(YOgMuo3%&NkhhUBM4C-pT$JJNsFtc$YHte}6-ApaV
zf;1&=_LBt2Uswafw&Xy}o)>Uk?K>x50bGM+34HA@gV;c0{*mo^&`)$>I|6iQ(b`e$
z`>kb6XN?mz&$OlkYu3_$9m8PiZYAu#<O7zI#?z$m8|V__#9rUBrRkkvY<Fiktvqo7
zWw>piRBz0(^RDB_X<GOoB^S5um1W9fR<l`Yx!9on1~&}92F@8v+0qeDQS~SXGbi1^
ze*(v-^}|Rey{{er7;3_)jFC9eLYB$hb_2}~LU(z3JzUY2plv_);+s%!adN~dW|~z<
zzR|+Y$8!MtTBuJ8Ia9Ixw?eGaJWl#C)nL0vi{!gk3Ih`>ysekajfmeu0h?ox-yh8u
zOv=WdbAokUIQQ+EFo!-bF^7!0C)}T;?If9Y23^1J<UWoN(bd{ED1Gu8@$+`k8-)ru
zv||DFglj<LJ{_3iIs^}k-1v7whEqt7+s?gT0Kuk5&?%@MRToXB%XjBfZLkafpH_wF
z)T?!57`c%<`p{dH=WqbW_y$2mP?0!im^>-Bed23b1s-Yk;NZaqGC3#Y8+M%(W;3}^
zI$2;J)|B!-f{k5teizlcD8oI!Xz@&@PK!RD#_Y5j+?30U<FD6><ZB!FGtSSUX{s8|
z`CA27{;i=3i!(4YG7tW%?Bd3!3jNK_A&}dVgs*gJc#rvod?+^qj{G-<eO{!;qAr@V
z#fRM~{mWg>=FxEo+{WV_a);^lN$5Fl1dAvRp-TrJ;K6^7ISIQ{xcK65uBFXboSL_V
zHu+5@tue!aj2~cKv;{n>nL%zRD?xGEDmKh@E$N^4W*&QZFj*x-S7WWo_@OjQ9bF}4
z@AqO#qy@7&l1qW7#=>+BXS!o3Lr*715-(Wp1LJb3ufUjEs+O=jO4I55#DTPZ?-z(2
zCd1afR>3SqX<B)6JEv+Yhj1^7GA3KnOu<O}RN)?oizo1_d^5O<ORwVl*<J8Fd@2|7
zs|)gSr!x7xg>-l6A?7CR2bQ=U##NW6QS{(g3^~<+8)Y*o3pFXeE(XP=9ikF}r4SN$
z1z%+5vu|<tLFf1(&@|czZR0GUUnPkKN(y{;NelKeYdktvZKBiHby#Pk;08D<M+T<l
zc<W*tY<&~Ota=jJx~pb*eb5w6uVWOe_xTE^_nKghYzR%#3F5+vyqVpTUKpUGO_E)Y
zxW$4x<nDtzctQA{>N`x~{7y|J&(9Ch??=4o-<lC@;Ql!<pB0Mqe!s$ztIe6t3{94P
za54_P_7avnKS4?H-h%TYlNp=-gx7~>fX|j#DB)VT|FYMyGN)!RId;u<`_OVQnxTU}
zVIQ$l<rBu6DRU1?tHJ+tHg=n9aFtuG^PxBnPW}leg{M*+_y}z7W5=Mp=nK5DTt|&p
zTOn}C2>y@i653U4hj#AE(W_R4&8SzQ585-Bp`<f}UeAEvg%))Fr7GJZ9zY{58$s#d
zF-&(_7)q-Q7C*L5#SJd4BF*;WnB<y3jlFl^!DUmjef^7je`%mFE1Cm;r)_|My(&=t
zZ4Zj7^5Njs$y^ewgy*e7Z(AV`4m?Nn)IW$;`_ec^Lti)`Wkbw%EScX-fbuePe7i~u
ziYg6RhT1TCZeRr17lAc(4F7FX2Z__0*>Ib^?DM9DY|+HU^vBYf691{t_q&J0Hmbj&
zzWXNbE<K292hYRl*SlcTZC&>7496CytFxu^9oWQ=duV>42MN6Z+7Yyrf^+p~Wppqd
zPwj@GYjxQuLm|`p)Q~NU>%aq!%1mw37PcUEG--*7#K~Uk`7^Pd@MxqFWoK;QAx@Vj
zn;Wu+`}Jt!mSWiY$$~DLr?L5BXNsw>N1JMO{Mg}6c9$Pw+lyrGj9gd6nN|rFY@<$b
zq)tC1SFzi@|L~lUO)mH=&94h+fL@9w*~Ra%VVe}i6@;Sgo=X@uONM>@zLswFYto9K
zDiEd5W-$pWl#}a87bidFJ&QMRXWbWpxY3l_ZFbP8(thr9hci7HwwOw~4#UAW<}74m
z3^!Bq0^Be&XKHK0psv0e_DsKs!&<MRY_%SV<b!d-sQH{&V-US7-h(0Us%$R3-GQg|
zw9xUXJe5x`;*VT8&&v*)0kV#cA{l=>(XRqOS|4IVJq5*Z;%*%*+R}xUmE~Y%vjiWg
zD~R@KJ;86K^?b?KY4B|SIKFw~M4B2uma=b7!x?)F`IgpL^cOEjc`Yf3E8E3+>W5H_
zTqBfBNx<m+yI^^;BdfFV<fL2`$vm3yEw9g}zZncKz8nCv#Y;&4j|M3Uvxkp+eqyJc
zkL~XOMYjBf;CHfdC&SAlV4Sua-AX%%{o*Vp`HrK`!x8wgU$}t_&whQ>CU!(amTkA5
zM8*?s>Er}EPO0x5`Yb+yvn`Hb+=Vpk96yrX-jfZkqgCne-9+??SEQ|$zOdUoiT}Oo
z9fps-i|RH?w0Ke|nGBYrt>=G$N#}HKxU}Gfsa;HSV%O6=xvP+*_ZB`|a#Rv_8a8wV
z6I-Ii!iSsC#QWOJ&UzuuJ`#;foOIaFSHkyoS_bz{#hblv9t`ugzXG==d&rLv@~?YF
z<AnChHom(~aeWEBwzuT|Lj^eraGLlL{bNJ9XCe0m-d8B}yzfI3Wo<UjI|FLG3gF1S
z;cQ}yK2&@(L9e7Oc=C`VRgV4y4YE3vT~<&rLe`ayo1Mu0KtpzNs|E9Se~DKn%o9hI
z4P^s#)Tn;gV^L&}9X%NtOj<&>=K9#<816fU>{F!J*;gy!LU{=c`}`e`uQ-qNR11uD
z1>-d788mz45?Xp@E6%NP;6BBU<Qz5A;bn_HbL}___vR?G9PKx7J~tFP%O^27{2f;8
zc7mfrl<B^^I?cU(oO3vvh5l!v#FPKIFtD$Mo|0*>3p}84oZw(s)`6S9DzVR1^I_Ps
z`~0m{V=+%!1<TL(qm}#@7&<$R8&=T|ug{HT?h`t=%Pw^waX1ej6#nG4)x3Z&at-iS
zX&GfdJ`X4lw0wjrtyi=peJP>4-WtF!a6ZMYJarFeeD%Q{51--bk5g&&p(0pQ9L)x{
ze?Xi08E`h!knQm1&{x8ej+V*NaF&4cW+kxcdORAmZ)S0Q!ECeOn`p_=f=#2IS8SRh
zFeTi#F>z}>6mp$>eA#gN9`+r+1qHKWsbsKy{0{rS)j<BiXBA=g0-M<JEruPCL0r@f
zf5Hf7ZydnJj!uKVS0A9}%XTn3HjJISx|I5F3?-k&P}Gf`gAYWKY`8)V7ARTLT)#g&
z>*YZ=Uf?&43ZdEoo5kJ_HK<)U&rCDZA<LpT*gL8Ls<VY&@yDU;sjf9@UoghiLJq#5
zq76d(-NbQseZY3|P#V5)I`_u=BRt4Y;=CKKLga`~cz(7DZL+%$ZNL6O_$5s$7DeE_
z>wodde;SO<J}oM~_8N<~e#K-%BX)A3E3TB{xoNnLRn1z!bOy!4-$_z*dVVa9iub3)
zw=OVXv<vw8Zswm2DiC-V#UiIAl5ljVFQ=&34^BZsSMcXoh>jdZksd1KQ?m~aHYt*)
zFk{GG`dTox@4|&uJE-dBESj&q8Xcck!E}L<-7j>gx>hZv0=H<KE~Jj!OI+BU3I4oy
zN)10eV*opKLyweB6yTi$4`KZeeR9fr%iG<tW%o@a*r}2B?An8~SZH+^XQ~OF)59mJ
zez8AmsyTyMhf5*vm*5<X`GYCtwVZlVEU&dS2pV$Up?BIbZpRR3N>EnElo!6VQz8|L
z_RnMAg<kGLk79TkH;rwvS^&=7P4Lchw7?6uwXwMN77eFw#%D$%8uQ^lKC}Efe|N1W
zxt@9pFY^zBbEzC1khsU&I<3Xu+C%9<XIe$_fJeA{#$r}_uaNb;XcB+a)~3a_$8iYX
zfb+^7`4nFpc0DtcSHHT3buF65+NBKGfw$4PpjMtAckvT0bGX1wbeqH8j&mTTBPIB}
zDo<1^<fV_NFBjM|o&4&B#`Jyj3FIa_l4On{OB=M9SIbprUdOy?d~YbPcuR2Ojr@zn
zI*ROIS{Yb51=Dq{eh3=%4S&oPa#dA9u)V>NwJA1nnKE_UI5WW&<^L7u*sHQ9*CHtR
zv^pgIaAvoz7ee17A-9$z#}bO8Kz1ggak3e0D0Cq@n8FQ}?ZZvzLq4nD!dSTwmj1RM
zn}xVV&2N9u6xfkB5?9mL%Tp<J#1G6EH-Pm&4+E*aMUeStDD^jA0#z(ww%7iH+709Q
zFx3a}v$zPq`7LB0s)9x9r2g{HN5x|IX+Qe*<T+0{p7`m_Q}{kkPJE~9p{Q+qKTODy
z2OHM|Og485_Kz3bCqmA=?!;G6mW<;+{auQ!FBerPXPUCFeyuR=suIQaRAJzp07_Y3
z2JJn+V8=`gdJ$g(iGpjyLPFRBPJD=~vQ0pdBKfG@TiBW9Ls)6Tb>2nkFPHgU6(;lx
zZZ)-^P;)*8|7%+cXIIJ)jt;?R@73vz!eP8%ZHx8q9NGEyE0`{IfjdKoVYc>Dd}MJH
zR~B}l^s5klqq75(HLSwN**X-);>h9Pbaa$(L&IbB`03{?`Vu>WF0T$_vn{W4&ej}L
z+hYWG=T8@#d0&CT%@gR>;yv`^z;<}Kynuci@8GN-n6rb`n_1)eHKbUU3QwLzLbcG(
zeki-0ERz&zpm+*Rs-J}FqoUx`!EkzHH=5o1ro%Er^w^$oSBP(_$BNnBurhr&HE-bP
z{gzhHyD7&^6n&}X+aMe$+sAv`c){TLM`(c(2Ua;<+~4v}?(*kA3<<tSYEfT6@y%)8
zL2(e7NDW|TC!D3Rb@#d9hXNtfBn}M*bwOof3bcNC!$sG967El>Xr{dn=Jze(8d|q;
zhT`vB)~!y?Dfcc6U-N{oPVGUJMW1me9%D{I&!l&X7t7ptl=nL#%Z`~!kXqIv<~7C?
z^Mq`8kjyN$$tDUrjzoeK6@c-_L9k$<7JKYDknRsaZt$vwG$?N*y!t$k7L0D=W&Xso
zZ69putLsuoh;d-kst)2ze^Zc|aSrFMl@c#7FX2_EJmRlU*ugd>{;YVf9Yui-VNe}&
z6Etr)a{=zF=~wq%2)ZA^zPB4uZbAsC#{R*prz0R^$PRQ5Znmv|C-ji(6xjheIkH_h
zlwH|ki`^sCV8_k3LO$x9@I7gVs-plQdxU&fvXF_3UPI9?&dhSE0o5=3!S1e|Mn}iq
zKrp<^f7Kqq8V%loe!Mb8HYmeD^LE((TZ?7qzT&6YtVHXBJTG_8h*Vw`LVBhiOHvw7
zc9~HyxB3rnbHR|^oVHqIYc&h)j{QWZfZM#<tGDR1!Wo-OTS5J2Cikzf4fd-?qd0FC
zj&D-|-{3?_y}S@b8+F)+8S^<q>2G|?%S)UUH-Tc0)#7c(_uR@pbw;i~Ir)JbF()kx
z-nMfXIZ2z&^~<u&xp%k~^G8uquPod0XDCExeCB>{Y=o~SS=h2niMxD6oo&0fjHQh$
z!JM#juxqv8$;!~BX5VKxb4V*!zUl~m%)0=+^OnHRUpF!0=vcPT&=F<@<w5PwbYQ=&
z>DbE&#6PP9?Ze4nU;GJv?{s8KUe3m;cV3~BgfUzUY3GMmZsk55>j0V8{rKX`M9^QG
zf#>8+@l{`#sH5oxTFeZfH8ulTTe?2x?o1ce{HG6o4k=()rp0=s6&M@R%hfh@R=7GJ
z0DG0;u)k##H#kg#6&g&!&~GAEceNAt>Ph08QI9}>`W$AtO_PZX0&#w49p1bhiGwFE
zhnEZUam&yLP+~ucA29I`X!y@T=~24u^6+=?G^31ruBXa4!Qk#`*uYzvXy91?Zgd{q
z%KzGF!72t_gX;qu!7S68$(JQS;gTSbniL~C;}OT*F06uvu}!$WVL1(v8Og%N3fx<_
zBV5EHQ})+B560b|gwsj{Ze&6Y+IU-#^|xzqC?kTTXAfsT##{40?JJ<<J!3aEZN__o
znZ5qB4IQ&xNKOVLsrODQ)=U&SJ}HAB-tG-nk5l9ZG6}Xgbr6k_xeO+rGuW!wJ@oDL
z6*zqV4sI>4g`0y$Gw(V3sH<lQy*#v^vnhCi+;MpfSeK7W0`yrFifF{SUvTq;0n=F@
zMte*4(F$8rQuPR7IcW}T1wRR1nqEiF!;<Ed_(Q^h3AB1b6<S4PK>eI-uEOLLHt)Lv
z#dl>P*nKAZs*=Zf+MU4iMMmPExk|Wn=Ue{h<{6~gszyg_^PyHJ16@Spsc6GeaUGut
zL%I*pzm^$9*Cvug&pY5-h1t{7bk5<mGJP7W#Ol?QE2I?>S2g?x+A1?xpMM+Pkk?>k
zLN_L1$b7Ntz;C!d?gLludIkT<O-0&oL{?W*anI==aP5*bT)5^SPIzF<C#vbge^)s4
zKDwQUUo|-YmJFHa*Wn}WZp^weil6m68wz|U<Lsb=cqulD6fbJSuh%o+LY@a)Hat!r
zQhf+!9N_<)x-ODE5e*7L7vWe)Bp2{jfknt^(*<R3YMy(Vf?SU9L3_e*`wI=GwX_+G
zrJlet;pa8in$x4Lcd)mM_}Qk1K)cYJNyTfh-_s?@<j+sk=}l+R+Ye%_ryW{6iX*GH
z8nj31Jr>oDX2at~v9jX5m_20-l+H1t>bo0Q*7rpGR_{)qKD_3=+!*g>sKaJ?+R+yo
zM=H9U#V(Iaqm37}aP*EP+}T5KQ1{CpF0lGP{MuV8FhoP)O7ACr>Yu+{T~;slm%oJ#
zU$VK-&0FzW+Z*U#V9)9n|AZq>2JHOMG|>nl*Z1;W0&0(Oz>~**qsCDyUN=zK_biWr
z+MFS*W$Hc{suGWNH?{EHnGh6S{>*SkGW`7euA=jNJr>;S0GCi1P|O~UNkuAb^A|%}
znKKQ11`mZixA&mbGzK!|!?<=`C-`vX6htRJ2eZ|~Sl_dk5aXpxCBn{ZpT<qpUOE9L
zjjTh9m1&{?!8PMmHiPw_9><hyR$#qvFNVH%fS^}0<ho|QO~DW^%0G1npC>e<*Q5m8
zc>bH%WBUkHiylT2GJ3dfk_yiEv!mL*!||0%BHqaB0OJ*H@O|24*qt(%eH!G;=M1QV
zl)^!vUX1*%4tuE0JOnM~li0<UG1Sl|Mwtyp?7wS`kdti2*$r($%MrD<H$D7u!y^;s
zdiE3iIeraQ?gTOi-41x~r9_tR>p3s|1<=s-1JX`C;{5&?Q2Wo1+>#f2@uuc)yx&=Z
zJKD$M?G#UTa{nhB*i;0wO`Wiz*Nsw6TeFLmTI{Tbz;2b}Xr;jP)y@q<b*-PMuwV*3
z*3w~ZCih_Ri1+x>XSsO4$xpnhZq8bNoEOcy*^3)jjv+^#{irv=RG4{tv+gMzOBQ^Y
z2f~3K>V84XN$Z%ySp}&0Wrjy$m7qOBnG4;<;hd6rRGC<cMcdU_ZB8|K702RvZZdqI
z8-#6X)v$Plz3u0wU2J}79v<?&i{J0Y;-bi7U~lw>Ygn2j+#N?S%krJfLNb75961Ha
z!#d#6os~@fODrCbeg<WyU*o)>xhzUfo@67}Gp9&<@t_?eae`qn*V|`~2js5cZgnkK
zF3fE^?T1oI>`1o2*MJ%yWq`)%6#jtaYI@NA690@i4QWx2P`cchC0~!k%%{iT*u5gi
zl`6*M_@;7~O9^82XL+bw`4Loh`%~d%3;N?2#$C_m*o~<z;LtUVN_!-kd3p@5pFSRf
zeuvVh*RN6f$Z@uQrV<5U5x$(!1+y<Eaea2*u|A*xmn~|AO{JN<zpz8>_;mn2{F0)V
zDem-c?MAw4@PeOeEy-%vbm5Ij88mrM1r`b4i+O(~!Mt9Wy9us7?wC58`#P1|>1Rcy
zmBp|$<1REf3c1AIe!N>>h<lcs@~^ipW`1%>7^!B0KLT}d&8?a2{?_BTR$~iX?7q*1
zh1}u_qP6hidIPR{=WsUk9tTR_{aI>FG86>c!F4lhJXXCGzRlHT=?atJgnI-EoO-PH
zoC>z*o(X}>g)HA&pQ$cP72R@_Vy2;`9G#tH3l^invsQ~r<0|>|%Y=S|TcE(pf%V`1
z4g&)>fPRNLTV|XOsrv+vY{3aw{#g^8w{B<Y=cTDqS0A=%JcqLx#;kYvWIokT=sIi7
zW3uh`{1d-;)*|>|S~w%-{H+L0YQxw(fdzQ<O|_^|*bnO2M$+e=Fqrl|n0aov#|=NL
z2vJizVIVlr?#M4fZ>61|knO`PpG~9pMZR!G^%;EY5Zrbi+O+YnNXU-*K*8-fY)XU<
z%Q!oUWG|b-#KtYceNzE6+}6{LNdkjv$}Kq1mP!HtouH-Pt>Bo6h_Tc%@#DEC*(iM}
zs@w7x4rjgMr_FwZbBm-PYvUnQc{&Uu`>$|U-SY7KaVxS4&jf`B?d3c7n~LAGKM{SY
zHloM=u^8~a7>8eQLfg|StgTWP`d8b~;<?$NS9F})lDLR&6%*DS9ZjJNiT&sK4sX`X
z<SK10a#uW4;c?vxQ0V`FW94!nWVSjRlJ*25ciqA_*(2B!>$!Yw#UR`;p#)VP9v4fx
zNr}d98w0;>Bx%W`Vw|OM9!IQQj#`<P7^Hd_e=FQZ<HJ3m_{fe$>K&#Jl@gd86a{<k
zTe69QZ}RV@)BOGdXY%7V(Y!DE?B0rpSSxV(WhbTycU(V;?jA#l8Y5wYss`I}s2C<z
zKF5+%7qHjK1>UAC#_ZY^Y(T#$^_~d95Z$kEd$BTgeV-+`m&cOn$~mycz8F?UT*Wmz
z{wu3JXTwQcGGLYg!Kjs_1EJ$SV_8lMwidKOw)Ai+SjEAm!~IwvITMtA4#xH0Etpb-
zH3r^uVL^rfYg!*-#2pFoDE$HCm_3`N^(!;uVr9%fn!w+Ng?QlaSlGARm+VLF#^SYd
zwCMf`c%6`oo#Fb#Dmt;s(2xckR$(PWzk<o}QrP)=yY2ji4!Az32|j5bMvcXzD5J{(
zkIdGg0Oc!i@6`nS@0bVMZEeZ6v`)k+B4d2jp@%&?CSZf$qk9=~8BR7>F@5V?xH?UW
zef#PLQ#&>3kg)8$VYC?3-_1j<;~Ur+BMYW8L6_x5N5h?Oqw)62r7R}?IMmyl0m#jR
z=jNx;>QanoKzbrqUE@Kum1p_PS)0h{?helDtRd~!8ZS1OkjHLVjTiV*nIPY@hSn&C
z)831ZF+Wii^n=6b?<#!?32Nf}UQ4ng=?`(C;5Ky|y8%02&!mj)NumK^9ys1inGH-{
zM|S#UaBAyNw&-&kW|&HIJ7blZ{(MasX7>RNwm#s3j_1P@-9B!zel(vyxr*~VFc<^l
zg2m1~LPkQELH@q%&A0CodP}A{6dh*HnWzuLg@gLBXZi(bae2p|m}rP=uG@k2S4Fn$
zaX1CLyWyba3z*rXE`ExK9)8g8fun9`xJAEibM0$K;?i9S_{}MhD*lFIbHy2WJLWeo
zIUNGmR%hX;_k+;aOyJASt`aX~39vvh40oC~V)fHAU~bXJJ(#G<vernm9okJ$xlot|
z>7Eg@oDV`)<1?1ahVuo<OIcc~&<hi~Iif0aa`hh#FIBQ2^<XE$*F^D!m$w1O8j$V6
z?SzlDSm*JJ;&27v9QV8i>3OOocXoxqwo#-<etF#Ekp|RcV?pB-?_m43R(@^lAo{sf
z@Uktor?;o_IZcU=XtvD)C;ND^GX1llDL<2KY97Sq-aW~;j!P7gfigE@{a~8auP2(5
zeg+?{TfiPx9O54y7v3>WH(~JjE^gG6B+hAJCuqBDqyQnit#jreuJ5ZCKHVxbz`%fQ
z=~@r+&!334zMI7M2!4APX+!QotO^B`+QPp3UO2u;8n=iq@cG?oFgGh1-)hfetp|c>
zPWpCqJS)k%K2&j&f6K5{)i1b$#~r*@{UY$me*{ZaUgP>VHmu^4IT-|~fZh`xE^eH_
zw*8ult1*P%<)X{fm2zRJgFAXxIf1tBeyCl3TcmJC3o=u3`Mq-lZe;ut*6;BF@*YRB
z8&Ruip@ud2&b$Zb3SQ!-aq6TtY%TAIhuP1M7x|b>23r^Y2FYE2AU9cX2`*tw&%PU^
zo+-02mY?BZpe=hRbcb#x_rr(;FYeXC4|ukEI17|N#6BA((mc~hHuK#?e7JBHzLxO9
zH=gA*^hW@1;+YL|Eqk%hunh8s?!pZ^AH<gWUr<_iF!g-+0me#>Y@VnHK30rkwHtrH
zCuMyqnspbK8Rdfd&BM4gZv>NhsDz>{C5&IZf!sP)aL?0si(A)S<m11}vRyWP@OoK7
zId2q0Z-1<3iLWcLIJyr#DhIO|<-xS&{azR;?3x-vzk&SA3=nco0{1-+t|q?%@n&JZ
z))GgSQzD^n><&2ZC(O22o8icTMZB6d6BsLx;o>SM+B)5cO&NZRs}%=Bs=PM+k-Y@(
zo{gYQCz`p3qkB+hGx4VsWGVN(38g;$4d<H|!ar407EmtuZb}qb)4W-NzwQs}9SFqa
zbAr3};c)hLP9kS0vZ0G+f`{168md?6^Pc-p!S81XwZAWOAzD79DP;N68`VHz{w5f*
z%$$sV9)?l-$I|beTKIn7CT4N%Cx&}`<V{`+oa01mnq%Q5%x#4`89#xReJ#ObI^Q|J
zHHKW-sTd5jIl#X7t3vSpW$Z|m2U7<_(Zw6Z&=M=nRy&^*ua+lpn<Yn;_mrr!d=ZTL
zYfO!L?;-rZ>8O8cE13T}$lQK5!}y0|(W?0&C$p)Ow+R-`lao&Ieda0LvGQyzObh4#
z$W3Fu>Wf%+Y5_`Z+<-5FpMjCD9kiN0<)`#(k#s->4osUtRi#C6@zhAR>5C%QcS4P^
zYjG5qA@pFMIq=*ZZB{gTDnC!9jr(x@6gHiE&*^XBIS=zrF65br?OHU2rbkynQuqb2
zD5AU~h_!>8+eFs?Z!?tlc$2(B2-pl!r3KGS+4WEn%UAKD;$CC2DbK;z*Q~K_lQl&j
z+m8$6!a4VkLzv^h0kEohCV9$4lV08v(eV^-`dw^BKMUJ2qG}WbX?Am$wdB~pE_<x_
zt1a+g98u0S6Kw+~;@$mSxHI+u=HH*pSNFcbnPZjdOVUSl{<#_t*B0TT6i+gJYQvuF
z)S?~c#<(<LIhjOVtf&d~M76%jFs*Md^jtE5%Hl!{3J3tDq+r@SJRGJ)1u&(gC2(m*
zywLetgEi?pn30mJz*}5|9}Dt0wUm)GOnX1y>u<_E8RsEhCMC=At_~!bC2EwN`xCS`
z4P@53Dx%S+HE8A*b>_bPBV6yvrhVUD!J8lLSgtn<oa^?YxuF+YW#{vKhx^1XMq(W5
z{tr)hMZl~q5gS}v$A!(Y$3H>cocE%tGVfbH*!F8Sdp30znmshbt&XR0yS6>ux7X#I
zDud{g+#;;|9!6Ha%c-PF65FDd;g>Ws{3Q5zPuJ@Sj_}*)c%%|N+ey^7PYiLhC0X3_
z&s_HhMYNhB$G$8!#S#-ImL>5-@LB3W{}v?}DrCrB#vr&15L|K60!LtN4)L~1n6f~R
z?hP0WrK9t>jz&G0d`{>k8qQ^t4?PCyC8sMa{`u0>?w7zMhLDj*KRB<LNkhlX<Moar
zthe2VpT9am$*o9IovzR7dk28x`a39|sY<Ggf#P&e!Lp`fyq{l|=+|y(%q^3kY>Vem
z*&T~R&&!kD$cvzR&y};4I0)^T%kZ3VCRY^tg;^5yIK6QawzMZ=T-ItdX!wj56=sll
z`~-fHlOKsn&QL_+2G*8yk{{otLW^$S0JDYtusCr$zdY+Zw0&EN3nwc8PyL);Mie|8
z<<2_8D<G0@1PRXsbRTvCjBXESD^3oi+dCBqH=5Chb`!>{w6RmZSNY~vC3N;3OEYc7
zT!xi4wXOWZhi;dl))h<1##)7)oGI*JI8$L}3QX2(FpFg=xa-UdKJ&?Y&^>qyxA#or
z_VL+hq$vEK4g1U02w58Y7Zu!4-+5#pa3T`CLg49=zu;u<PoD}C*=&7lcIvo@J#Bu1
zT0&+q`GNsm+t$H5#t8YrY0KD~6VEX$v6SEXbQY`(l4Q4iB)FG>|F}&%Ply{j%-H6!
zE8xxM8s25w37Ee8KK4oE@*3%KEG@W^yT17=jx0Cf-VF7#_3#khcbApvS>jfD?QBg7
zE!wo~dodi@x?6acKj2;qyIZ|I(bT`T6P&Yv=0_tqolAl7mycuhNCy~oV=L9vnz5U*
zb5NBNJ_ic_!0cWKOU#jEy~3XK@9YumuF!uhS~?YjSP+ewuD~~cEf5&|dSv{}n$5F~
zV*U#Z*@PfFs()R;@!nP7o*U0bzbZlV6UFQkFT;)>96>QRCy}+_sc=))XP7SR4EGe;
z#NTd!pm*QETH`!C`*;kTD!r+pdKlZ1nF6ks!oH!`f@(|B=_q93MwemaF-ML+pC<HA
zJ>%HJuxWT@oeDkqsen2KNiezT7JB8Mq4|&Ma8juzdVGD17w0!Z>)uPkZodhSp9_ZH
z<0SCL(H!W$agV!oJ%n~2PQ%Q>#`H$ukyJ+xhUt)vR@xh3)FGksCAfsATsnl3ogsAQ
ztvNo`&c;}4X<GaKFLo&$!lKVQ?ARthoVay9AD3yxHh4wCL-mV-m*h0A4|ZZZEC1qJ
zpA&TF^(;<oKZ(v%_TsIlFSrt6hnEtZ#F<<RM}sIYSP`3vSuR8An(QFaNa1hfKc@iw
z8kLy4PZ1yb?H=UmRdahLX=D7J7|72Px+4?h`Lrh`wC}zv$trCGDd}jw*l`66p0tqs
zzhyzsxAE-m_8DyJazXa=$O}&mc@BEp&d`CU9wg3H75JB__)5q@ENIlnWs}3$!{9MC
zDn|qttE?SW)Xieme^$}R83v@a-iob?m!q@q%vkF5H!!bUPRJ_QVtj8V{NjAbzV9h+
z^%=)?K5KznJD%ebDiIlEXrkY-FevYS%p0w;q3gEiA%Z&tMl;hy)|N-;kFF8kGI<Pb
zC-!rrBkkZ<Bl2@UN7Ce`Ml5*gOjes^m|v<Q`}t@osc-uLOJvLhHqU%03<0*o=RD{R
ze+nm0WiqQA3tm&*ls^8*2M?DAAknjcDeiiW)(YCp=b+GWsJhOV2DD+(hQs`2&Wdf?
z(#Sv9mxT9YZ{u)<z>1X9MmXOqocZjsWCNrg@IA*%g^ZO4EEdk^)kWj!sPQ<_t}n^x
z{PGJZO_ayvci(ZOehiuKPJqhqd#Ka!F1%H~hIeaID?;{f0Jj0F;8DyAyt$zP9osHr
zzvE=6yI~6-WK&>N`$>Mn;=yFe4WwbtIWXb9E=o-2VfRrnhAhyeAG2q2ht&JQZSNs^
z`RgD&tMFioqEJr9J%q(CdMlQ9*CL&9E10dk3yn7Tgw}8j=lVo2w>d8X_CH09o5zJ&
z{Xk0o6bTQiEyQsKO<2BbH9Wf$4XNp8LHej2u5|8zkW5+po%s}o77Ogx+genjzKpi_
z*4ZAv*T=u{U5Mv>a<Jo?(EWUIMI_r^jn2>H`PHBY^It~O^V?H!sD=j<d`d8UTO=+O
z_Q^@VrZ5x3c91<X9yJ<9;R7uo(|4*c<E9AfP}rTdzQ<shi(KROV0^yRf!S#6rkt1i
zP-EL__V(HiFrE7vS^~!K1CPyO3&sNd^HrgW-axQA`4o#D-RBK<4x}9_F4VwJqC0(I
z?CVtrIy_8_%WZ~ZzTjnYtW<^F`59#<jR#=YzI-_OL;&_CttE3|M|@7Fm~TH^44OjD
zD^r_8pRVuX#&uaN$NT{1==|k!8Vevi{vR$qI|H+yS;OR%Eu6FTT_HQWiKdUfjNJ)Q
z{J-suxZSrAK73n&GLO9|!#f+kWtoHd*E~*XaSq)V7^`4n#D0|R#d(55pglZ`OD$bN
zMT_RJ{77#qu3>chqp+XrbD|{CROWJfCpC43<I$GEl-*Sc;ay2EuWuv#@i+<dp8nyC
zm-$lfLE;sRv{;qN4i^3HH2O{R_p@KKL{UOQV%xT@=Ihq`?pSZWX5;D|e$%}Nc`LX|
zg!?&o4~g`1T;S(q=ql-I7w$L1TP50FLQ+CPd+>R_^p=z8z;`jfeZfPUhzKSMbTj~d
z@dEDB{sp*ZOEU2PP_$v>HkioyaOXxp7wexLgjrjDSNz?IT<ga`h$zv)j3Lb;Q^TWT
zw+Ab^Rj&qsZs0h)&&s)g!g+k~&R9_E_2lf`W%+sW8oW%>P?Uc2lh2f1Pj>BYoT5ns
zmlyj<H1GF8-gxH`{z$4moZq@%RJ0)<&xR(5?RCp|JL6$^TO^NS$zj~Z<bg0o;waC@
z4uHFZl(~?UKz1f7m@9jBpHrUz+{;WGPI9LLTef7Xkef~9yIY2e(wE4f!h4R(Q7Pc$
z70z*K@jtlhrix-eXMew$YnCYg-)HA(X})>=cHi||{9OL``MLi8=QnGC-|YW+ey(;A
zeslik3ED|Wyu4V0aeH2JENKUO{S<^0h#Gs6XoNd+df@Pv1WtYG81hxDhRdUWqWoKV
z(y&XyHOgHOHdmiq*9mPOg*`O)wIO?TD2_x)BUx<uE^vte_Wf}o_MsJ~YHdU_r4;Um
zu#wyIzylxI$bdx%!KS2C=sM8@_ir7>8>O9aVniS|`pYtZ^ANVN<tk^pMTKs!PiFE7
zYhmRlO>xuUIJW84>N3~I#Vmi|Tllm|2KCGBnZ&0?xR>h1e0D_8<O^YB6db{KzSRKP
z&0@iPWJa$Co<`Sgh3vntHRvsj_6AF&Qo+wvY(%+0V><R93^FtauaHFXiR$ZE6M0ZD
z8o2S@g*vo6w+=gs55mkvg)BN}8hu<hnf(hr2|-&Y@qusFun|p<#hzQwGgtF%Y~3MQ
z$~ZBJe1Dend$esiPI&X!M2;iViPynxp)1Rm`~qcD8*t@mW16Xu#1F9R7uW3{NTG|q
z<8bqHxN<}njyoO;ga7IxxoEOeZAP@BU@`Rn(#N>mxx(ftoSBC&W*%N2g<1DgC>v19
zDcH@%D0Kt&=CKs6lu%^D^+vMfj$qFJN(y`SW->-~zT(^qFT&?<vc%3Au$yD7*xivg
zps1NgO~*zIyQoY@zU~9XWWj>FaweOVn~opbf8qG%L!860Bf=C!Br;j|6o$MwMnOG7
z)8<$rbJ}o;rmuTI+k}?JpM-~SvU~!`^=7i;Z^lwaNhw!9J0CmzuVB7I2y^o+g3;24
zSjUz{G&kIkWVTA<p{7Ru=e`KKaM_s2f0SWw&FpFP5Lxoo(`Sy$&M+1C?;sY8ZMTbr
zjZ<VWTpsL0Y_kS;+5HMUKJA4G<t6C&U7F>;Q)llwTd{TdK@=xG<Z9hw;B31g^SqD-
zmPh09+qpRLUK2C6qVW=a_5yZcjy8%%h+&m`D?b=fH01mpC{#)0_ct2jnSn=1x;KVh
z`y2pEl)s{Cq!kT0tVa5s?cfqpCEoPYj%}3Hr6K+sSXA0j=9j#H&1sInJvDAD;g1Cy
zCrour&yArgLYmThb|dSvG2z?)hEZ4XFzU(|%+Kw;w*6Pf(BSWV(33l!HkX95ElVXy
z^vRf&-ATiM_-3wKYA#!FK$=x&B*BKqer)oBgJ_qdNYR%LF~2-x%1}yXOO<$LSu~R!
z*C}GN+b7eZMrAg=UW&bo+QA}t5gGkbXLh!S=<U0S^o3W4<oklfG-Cic2@E{&m4w>|
zD?m_s(S)eKkTW&{*EOBPFLj=5)$KCOG+0JITpD2AB6Bu+PBc{gr_J^*wq)CH%2A+1
z7+Zg{1xGbUF(rK`c5YNUeom8S4PE2eG^caeli9@Wlpf7|H%?*)RvUtK_GMVVa4Y$x
z2$Z%2cQVk-qRkhQuvYI3x%{o<4{WStGlj(4f<dl$w8@$^ZrMrNz3K4erxU!GRmhzf
zHG$p!D75*UwhG2>bvQgj4P*Dd0oSO5tas09HhjccT>P*UkK8U4#~q1c^#v`g;A9eW
z^OGU9)*75*dLMfZ30qFhmGq)+Im@pb$clAN3r42H>}ly(m?SjbDyz%TL+v<rwC!WE
zX^b7Xe;SGdQ>dd{1C6hSlr@z;=0}Ic)58)C;iNN+S$S^f{5QHXKh;cu$f3*Tgw7Q|
z5zc&N)8skZRx{>wR)$?sn80fL)tK1{S5DVFh}0VEu(9?oY~JM$KUNK-*4d3%arP}t
z+t$vt=L<BXWrY}VTY-9C9-;3o55x|M%b{w@NS5Vz3=hcKGQB60LE-N>id=YyJzDk-
zeqKJoeR?v5c+I7xwUgr}^bCNu7ytM#<JQoJ5p8_F%r5r0JBtm^34_6{vl$<+!-9P?
znEh`vb|yefN%t(ctd=nHAFvNPljo84Qe##eRtvk+e{mk`LfMeav24htb1=AB6E3(0
z)Alk?G7dOFhegKld(<c<xxpNji?;IBPI@d*G8Y{}4?t*WHyjE$O^fcRuv)D=9KFhb
zHLKO&JNIe0=Y<O|j!a=wekOC34L(e}OohUiO=OWXLYa1a4G!L_$7Y$>Gee6fTuSY7
zvQCg;o+;y)_J9^nHu*Nwlq-fVcUM^3pvLMhRq%t|PT|7lPEk_cIy#{<mvzilq8)FQ
zsC?-N@%n`V5z5#KmmAL{k1=xWwXoSfxBoK?^?ZP9rumYycO$xtJq6{w0u}qNV@q1@
z!)P_(|J{*U?D^5<v?632y_ofkyHj?C^XhLFoz)DdmhWMlrT#!}jE*bsv*Z_VBwh{M
zw0^_-N>k?F+6{yFc)D447mx0p4rhEm;kkR^_@L$j#_7(YJ)3t^O13$jyCfy{4|V24
ztj@xZj?0ky!5h0QEXlJs65rl(WxJvzaYbJrNdMY_`AJ`3$jCbW96JaB^QLl2YDyGo
z(~jQ9l9+qaI?Q7Om|LVId+=LCH;2{XoHy<KsgAF>{L}+X?Cgh?IUMZ^F65*p+6bxe
zN(kOpjZL~5WVJ__Q0GcA*T7?Fb$J87`ba02bzumVFPCNq?)b1DqlYo4E!~j&a0JU&
zOoW*G)m)Q82iL6=&)Gh(=cjJGizB)>L-oXm@W~;De1?x<PEVR~x|$SKD!c}Xy{>@5
zR<Fga7{b*$Z1l$OXI)`!P-p)O=M4CT)4vv>TV5rO6i)FL+vf<4;SjM!;1I^2a%Jm>
z%d&X_NqFTbdt4lEMSXE|@lsPPn^={Jy6HR)&z!<8YQMznK^1US%K;Y)r%k6dZ{TeA
z9hf~f2r@<`qsOISsJuQ#*dF<y)RtndwE4E!==~5nT;ssPYTTgqVJCPzwPz<*Hp7WW
z%IyB$<?P$O;k@GA7pVPhF2-<U#8x5Z%w<mpw<1iJO+Vv8b$>RJizt+>+OZU`otQ__
zpS_vFVq-AxRAwcTVIXanz$wZpV29#ZI3+ZTKFM*oE~tlVO$de9-FNv%!Yw9RF_@*k
zHi543DZ&(_9kd$UsC(2UwDP$OHgh+y-Krw?aV{{oRkF+@un?!3tf0XsqcPa$82|3{
zT*^^;!ev#LqEhc&QHt3pwrSOMe3cx;F6?;=_ss{h*Bf@guoHvW5yuZ)qiPY_M{mOf
zwJ55)xC{=Tjo|Q23c~VcnAnsGa#@=>Rf%F4dTJJ2nH3Ih8Ftj9x`R}H4?v&gjre-!
zK5zqD<_6{EGj%5k35224{Ch8D2iC)*##Hhu-pOR*WB3mqsw~7l4P}BV;eO;8=##ai
z3mYO?O}-Rs70ydbF6H30k7L>V23?VRtt@F;tJ8hy-&pP=$(}u%Ng`zp`j%os<yqkn
z?O@CH?bN0pk3!khveg3FNeQp6n#f|O-omgODs+93CtChnB~TDDxteZUgemnHw*CQU
zdFTaJFBQnqt={~jGBv_oe}zrLZ06H*gf&$Q%@^-_cy5x8iF1XvLiJ?$PrDy7_Joo}
z_b}4DZpBsx-RC54N>aK0eERQ54138Lps8#sZGO6hQz(eT$sa`QxrRXXd+o@gREOZ@
z_)%={XFZacxdL=Y4g&YNb)s=WSK-;pfwcK#1go-&<&HX?hbKE?*u71^FgErHjQls8
zlDhUWr{i^S!E^@eX|x6L;52MhOM_|+VX8cNKdV(bkN3n!nBKof;(4XVF(E$=*1Rvr
zjr!-g7v7U8ZB;kQN8EuG#g3Tq-vptJH4HSji$DRkkXyEp3b5(K27&Hjr}-E+T@X=n
zpja@>_oJ=xd4V$jmAf*1B;9YH&NgkT1QX>L7^Pdp?g?Al2Oj$DWays%$JLvEQ~gGN
z|7MCx(I66$WK7C*UVDoMQ4vvODvcUV6{QHtTv9ZVqEs3+kn`HRL4%@s?yb2r&l+g(
zeSN;`{@g#@|A4ieb=GyRYwzdt@qil3iI|&cCq(9&)9^1vux-x|>i_yJRbJI4Sz8w9
z?wQDbJ1fajO$oZr9?c)U;?VqH0?vQ80dL*DMn#f+>q7N#{N|{Hsv0u!fx~Wz0(t;H
zer%yP=Ld5^oGmKO?v9R6gV0K2O|5HBf9`eiITQ`(4W}MhVnzObTDT^Z_jk^PzstPI
zbe%c$SgFm+{WhSL#XRv$S1<O;*@g2qZ^y%0X~GA~u{idaln^%aM-5Xi+4=$A=(<`P
zZQmpd^51S8l)g!@9k?HwM^3@x-42rCkMr>FUzr#^{gnLk%S=3OJ&6;Vjj+R>cl4sm
zEYAHTH5E?hQ^J|v{G|8*Sv>tIn%#IyM^toh-DXcbQ@$15mDO4OMj1T1bdHV()k4Yn
z?tFdM8Y*!61N+t{!-V^-a5>`?S^d<Jh?ilq*5|GAXC;@(^HM8`mWCMc@d|jvs=@+e
zGoIcxP_m#YV5NBu{&rAf?Z_G2+-Abs>K1rp+GBD$J{fKLz5)w{l@fu~4kn!2$*RNE
zDAWt^)O97$Shy9tehe2b&d-$hQgPvg=u*jYc^UeITq2!evpGU<CI9->4o6j(;*`QM
zDPgIse~X#;A~*xfe7?gQTXU(6ppA<b{S;kv2l3ponix_d8BvbBgY#-kg>zQ&pPT<^
zWBD=IctbK|&CtNsZ@cNqhF%<TcPzh5Gh_G0L3rI@HTTe;#TkBQ!0MGge7ohu-R|9k
zeT_kQWoACk{L>8<UHt>Xwh{{SU(DmrTGENJDfDc4UtGH(52x-N&t|J@XkF|eRPQOm
z`tUr8u|6*kFrUanGP}~U;oDux@^;ayC$^BJSs?_j8;>2nZ^OVx^Z06B2^|R82cbLC
z$V9FoC9<7ygY97|w6bJh%N-o_cou9lU&WK`UGe?d$sDvQ8qZw%NWIl-_(OgkXsHBY
ztWhGLKXMSJKO9PZHY8y8=NaT<rpK!CHj$$)3VQ+);dItWoKyByHj?LX*2X$W8sQHS
zWqWCe_C>*0^NR39-w%czdkzCS9fkVIyYNnjJ2DIXjcgLNKz=k)6rAoqhqKpI(QWPt
zx}W4L7&oX=WPLW=elm^Mr%4o#UzaG@`z|>C-Xb%&po0_Ft8uvcBUpT37yfoT&YYEv
zUGJTs+jb`*>&{8B=C(U~9jq1Zt#TB<9{weigpcB%xxXmTZk=$uO`Vl5`$*fljnKTQ
zH|WJbg9q8);Bt)-X1y+lb<d{o<jg(fE~QMD1z4evWhMsRn1z}xMwo4A2Ok!D$ad?G
zm#B3Q;m;%+eAIO?jd?H-N33*XryE*SRFTWi;!l$Qn{b?bp;d_XxhG%lSt;N2C<9%K
z{2BC}IDO|2+81VBTUH~Pat-rv;@1OExT;oO^k^))S{#FJuJ^$<eIk#PN$sf3eaLK6
z20#6;1D;P#p&M<-h@><?W2Fvf9MctDWcs+NunX5e3`O@9F7$An7CusHC8aOt1pnTC
z=<)IvYzsU9zecRVXZwA4cf?IlNYci?vuDx%2gUq%lpC9NY7yr@91YLB6(PR#6$vBz
z3rkbFVr0o}Qo1umnvy$l-p0YW?Vc)pi(JMH)9ZztF=@OvzXKc&SuI=sZW_c}{0IFc
z3+z$N0x>+?3L9rAaF1RgXygujdDa-Iy{p7s61~`T`BMm2Xr?_<((F>4f=uthVz$|{
zf|u7;iUZT4@n&oT*!Lya{&a<CbyKfaS#=5zz7&H`m$<U~KM{PTNq@On2R>BS3E$Mu
zqZ^rjV2j5qVQc1Z$i1C|ljk={WchO1y7(^Lkt{{R;|@IbKpojfM{{k(4eI+y8MQQH
z@ng|3`t1H$&|mVBB9Ev;RcZ*GTr&z!?6!yX9~xlM5>4(OyoI{rV9c_YHjm?tQO|B7
zFJ1US%vZ4CG1D}$o2?4hbvQ_gezU~d%R8mSNsD-ev~crV0}LPaMm*)QgSy>*FATY@
z&28;j(v+-_tNXs8^{WDeM(LgW>}|5VO!~XCh71-9R_4=RiL!Y%BEn_IsAT9EDWx5@
zA16ICxoG>w8Fwp<M-6F;eeTu(bhtbL7jMbs4*}_@d21uP%nze^XS(CTXStAn-vzJO
zRR|elN8^sH9oVb?a|j(_4p~#@@#2yI>|gkl;(O>|@z2gY=UE{O+1jKe(Z_op?u3K%
z{-Zv|$<)KklH=0+xSOdRz5FbZ(Ib*!?iW$WX`U^tt5_;uQxwdRgK{}3PimOUdtloe
zYj&+NL9-A!Sp{`tm+kvxXKG|{uK5xqw?CqNT^@kR@m#E$ln-9Zd$C4GL(~X<OR8^{
z@}xhC>_4{!rWF>5DG7(+y);i7qrR60+`Ir2Iv*4AIb9gK;VC@(3iu<^lYd6$utB6`
z-_sg^L)O2c>;hK~E$@SMMT2NwmxXZXRW>zrPiL2J`C`J28ezoxK(5UjkM2<k*qAnj
zhlls(-TQoSyp|c~Ug*ZAiuxRpbqZvAir{9~U&8u>-n6D5lBIwyUou%k3Mo=b;CvV+
zD%=N~y;JyorwqC{Plx3RwV)N`f~S|x<<ePWxx>(9j049CaeGh0u#j(1Jv$RGTi%cj
zuy(@%L+ZfbS~e-TAdb&eX8Cm$SmWClucSucLCH)scl;Q>|8_368&=WF^nYT*t}dAL
z7<ux#ci`$;F0^ItrNnEiS#kP%&^+=3>}N~&wvfd_gh7tzux9`|iRo}CryPt&4Fj9I
zTKG>K%NMM}VBNw99Q)r3NKE=6)=N!VuP96IlW!r}dyC2aq#KXO9>7L@{KdKvp?IrH
zAVxY1wL>>-7p!w<bF<WLi@RHjlPgFFI^35R-)ex;j;qB3-_7AslMJj+o1sR^KpcBt
zO5`t`%lAu{a=*9BDZXeekn(=&6S9ZezlR9vKNn+(eJ?f&N(a+RrQ$`&BGC2Ra~g8R
z6jfZ5xwhI7ow~<}PlpK<`BnpFJv##zVjN@>zGQ-Obr|bdhrnqmJ-n>wGc3ON7`SpM
zgq_|AHiK5en|+(;$KwsOz)TrNRT*Hvj(dcU5#w=o##7q3s8EQTI2yH5bTIS5RCp4v
zjaoJlye~0<Ri_L5H0i9ccy}9B#23=u3o`b<eFN%Z08>Bpz?x~#sJ8cB8Z)UInC6d!
zt@J|hUXg?Qr8BwGHdrWH5W&$+0eH!=OtkA{O@?w04*%_pLURu`tGGism&f6WtK(tK
z4QKK5%<C{;YFqYH90Ge5_@nAzU;Gwq!V!zju=V?3UZeGnnq%{2pHmFj=cF2|o{kZ}
zDo7M9YeP;9mPuRmHNu~`N96YH9`*2_j<VKvT64AoMvmA6Rbf_CgA(amcNo0ytd4&J
z^RQ|9E|{CQo*Wie6YdXTpEf=I>6Jk%PukL)*HhqDl?I-w)TD#u|G|}qG3YY0ghqb~
zq6KZPVz;L!X{*w6veozmQHNB7Ub7_Yl2ZjwyfKS6&v4+x!%p;Vd=UCw&?41+4q&%2
zf}QqMOPjs-!oVd}khCQh>R&0tQLoK>^werRKF|?+g-imza-OCSw&kX-qeyFUE1cBQ
z$MUc+Zp-Y3KSL|zPo?C>Lcf`OM6l;=5tDG)f09(Z=PSu(jda~(D*uZf&cy{j5Na}#
zr*2Tki*r@z<;GAlukFL<a$mvcms!GAQyr;kQ$oic>0tL!d3a`Cv6wj56{0Ga)0@kG
z#ZEFMJZKSx1J4KY^5tIW-Qln_16eHpd~zKGWCr24t8c-&Z99BjG@c(`TT5Tmy*YaL
zA{;3!fw+?oh07OhFjnzBExU0{P?b{LRd+<HR_Vq`#;W+HZvstQHIZRMG4y)f;(Q@q
z1yx0DjQ#S99;$l_kAh<<??6xfdf$R4tRF>>Ovga7l=d&`d=bhHKcmb;yHGphG`-Tw
z<JRuO`Q#ZTc6xdfTzvcRix*PLCAv3X$u@@Re)V*-`zUD^D#IFMBltA%xe#TvOPFY8
z%6nF{xcnL#!1_zBL*;E{eEswyDb<w-0sU`7#}0wi7TW{gkMKpqR8_8@+f2!pKDcYC
zA*$qc=3!c~@{3iqVxo2of47(@?(RJcy<{;MHOz`{^lOG$YcGq>*0&0`w}s&m-L-g6
z+8kC^w}H{lwQ|o(ari)E1qOCofphPyf*(#^SX6A!v*(TGZ(60GKk7bwza0%)l0mrP
ziVv@^SCy#C?a(kdly{#<gfP`aely4q^YZ<~@t4Qoib2;Q>{1F|Y}CMKk?Z08pN<mk
z#DZNPW^nFnSG*`S7S^BKEiV7EoZo+aExRJo`Yv^wz`0uzg!*A#xTp_EO_2?f^>mf6
zlse;D$5z4Z%`N(_kVbVoj|-(KenP-vsd=!`9xa>FK*vdq15J;}y{@UigiQum*)1NH
z%`TR2OU*%X?KHgmAQtPTgwmNkrMR|zF_>3p(D}+ebZ6*1++X8FPcOH?{pq{tb+Zl{
zelz7%DY<iG#WPy^@tMrtW(PP~h1I6$1f$ZSbotMIdN4J@kcKQZq_2;Cd00VDyn3P*
zX0+I#)rkOZe~={E9V=+<5{W#&Wv#eN@it9;Uxk*-qNqG(Ij`uw8XL6J@cXzFie7L~
zEM8Rywoe;G%@YeL@#QyAA5=?QOom~5VFNY0s^il5o}_KMg06?(mgr=97)eK9%4Rii
zn>mNytk%Vw-toBVy#+q6Ek@OuEp&RoDNsB$3HFz1)4I=H_(mT+4oMq`jk|`kYX3Df
z+{h544yt3^oTG3%bRk^6e~eVi6M5VVUG|OMPA<`psN*_!y#GUnZS#BZ@4`^_{PvT~
zo;8BX{IBAtL-lY#^(=(AKcSt{JaF-y=i-lsZ8T=$1k}iULjJdhbBpx0y4xlZIXA2F
zH??#0-^MOHyF(;vbTP$!Pgjd|>o%Z~L|pu$ID=nhI&il>Tjif#Si<Ak(*4x-1<bWg
z<q>CI3e}0%sP_sByj1&McqrX}Kkj={`)&U>GFfzyM!(3Qx*iAFeDng?XY9@^{&f<*
zC5`4U+RGuyVFn)255hOn_p5`!C#be<hV;*F*z{m68*bYTzkY5Z;nfdPwvWQ~^7U{*
z?17d^-MHnRGtN3t0&@cHg1pN>d{|S(hj-}XQE>#`s4bGd!#k<(htrUq|5EV2VI@xY
zwL#yPJB7VJv{A6nBb5@sfrtCBnx_%}mwjKX>|=)+V}s#c{AzyWm?U5QML^-yZWj~(
zm3*L52Tw^9^4YhRIyGu0f#n!ad^!3B6|Kpp^LzVY=IDVO`80<*->m}wmaaHSYO>FG
zEfHLQug6YOyIUp27mrxH6;AHoN<G5r<!2Xdr#t;((C(W9D>lSHijgt4WP0Fj#av;8
ztA~^--XNT9b{C924^Z<2ZJht%E&X;agVmPz>B-bD)M4`<VbrcJ?73tKKTcL8?V5Ga
z?(8e{>GeTau4%)OhT#%@`7b?x(G8c^<dDk2!Q4`8kJr36*9<?sUTm{!0d5SzJ5mE<
z%iY~@ODq;*=lh_~y-;@OFp%aQUJEfN=h1`wC~P?~8GE%_3l*ESv8Gj@r=<=@W0kLh
z`@Ds6-OEnm)(yMaRjW$YB{dLbqGV>WP{P@#WAWv*!8oAr3%YZ90(N{8jdu=e^BVP+
z!W>5voH2A5?w>YNY6oOOuw>W1E$#Pa59)<C%pC#W4#ceWy>Rz`i^xB;369x!=E_0H
zE4Hq}pVEHvPjLvN-zsS*V~O1#8}q25<!t9ZUA{2#p0IVp8vIcEMChfG!e+4o&b1y5
zRo6T6oWR2n7&Mq8A8r-~O$!tZRwuGyCr`TG{TBJnl=i&W?YUFIL19a-4{8*R!h$9b
zeDb^tAGP0unF~(9GxZNr$~^<Tb|&+e)Ai7_pQya;AN>2;6}QGEah=;7%Dc20lvT$=
z#48UvCpDOLd-kM&JA24ZYG#?w^_CSsbQQZ`PhQ(W6-y57<3lUvz;~xjq@Xd9|NcBo
zuk3eFu<a&j?&!e>Uk0O3RTZ7y`I-EaYIvxWir7~9)MZ#{e>#`@hn)J3B%gI->9pT4
zj@agpMHM>iVzFLIEag*)*q84IJco}rvbpH9GIF>Hwka61`+_6%YQG{Ms;rSLFOO;S
zZn==Yaw$Ag{sGCI3*g-!O>CW+%+`i)h15$o>1>)cUw_aM3tnq-l&@rGklJ6JN1ue=
zv-JfoHRG*AllX740%YlL!O8Lqu+J3nXO|-R%Cwr=AIiDhk|NQy{FUg-#~#%7$B}Qj
z`fxzhcF|#-5=P;BXg?5!mV45ub7BP@*wO|&`8lnza^ay8rE$8^N#RYIGb|}hr`pHb
zJUR6Ug&HdG()gkH{@*CvU)B-qS9)Roqa3(B1bL%0Z_WA~h$+8r!2VScv}n_OY&qYR
zd#4|wtUfiAb$cLcZYhB<SO^`IE9lqe58xeMh9RNWm}6(mjRC><g@$w7;LFtF)1QOK
zW}u2n0;Ojhg|(d@!jt12m_8)%=<H4SzI7rip6!P<cf)zyj~u*r>4@m?#+ZAqvg5Su
z2)vcv0-a}GfxzW=MIG^+Y{yz{)^O>LqsR8;q~-JI^OkM$z^-;YUr!aa$J>&&=YH`_
z$`*PuXDoJhkKs9!Iz#ldB8bWK1;f~7@}7B7e74*JO_DO;^mz+3s%eBF?sGZ(Xbpv~
zb3uDEZ^%=S3{kiLgMbGEd0oOPEUm~VKOJLKlFX5D=!D85x?Jn+$E~q3Sbguki$%*5
z`Vwsq20m%LWn>QSxG&jI!n?@y%)8>lu(i0m&V~*AHSj-}%zF02=(U9#23-lpm1gJU
zR&U(Ve!U4+X?~$ovwKJszUzXkZ%;J-l>!evthwWQ>Hf4sYAU^Lf`b~G6yX=cTkL<6
zcgPx4sUC-?Rp!FzKS~tpW&*DjlxTp*CGzTekW@c-aE6XMPs$&TSDxfUzdj0>cSR9B
zhu1?{o0-%gAmC!Dz1yU~FG~Nwf=gZ)D81XprM#kD?-YcjAv?vPHOIhQuRCRG6CC~+
zg@d9ebL8S`nEz!9#5e1JqD>qwJCV<~P1+zfr7sqGo}r-KuVKcv3Di11R^HI#zUcVy
z4k<Vf=C1FHXsTHZ-pjRuF7YYUBJC1O>!i0$gaQ_>Euo<J(=c$?8}N4QC)?rd!Kt~<
z>@d}kyvQH7*SA5RVY<BOa4H+W%n=sIN1(cE3hyw8;hiofT;JS4YoEpAFLcCL_R{&d
z+KhcFJ-~HZtwcr-r5!7}qeIzsatMrIebs1Q>)cNF&r}Q3y?gNP$5}Y;`v?TX6x>qf
zMRqrziV3aZd^>I{)1VWe95S2i$MnJcUwT|>^oIT|)<wT}jifwb8I{{>^ODcDaL6kM
zw-j|^=eU)qFl&=Is`U}P&8xtn+zod13_+8!b|H2DJz?>Z8C2*w9NXLVNTZ~fj$Tm0
z?-kylpl!yhE&L!OPZvU)Y*|J13TWItOlH!q?x)`~imvO#A(HXL|F<KpyJLu%`2xkC
z4#Lr+pTcn+eKxwXnCA`d%WwY{aQnbT!Vb@G6!NG)_L;bcbmvQH@CGH$|MDKTJM172
zjXZvybX|n#)o3yyA8P6YS+}=3JJ@d*b0s_5n`~2Dvb8^y=Z@gK0as)n^8%&4S|{qW
zKT(XmwF{^F{-f=~eaNkOEq*m{<kaxtT$eS7H(7?@rm_M&S2tT2`$v`LXh(>J()_&0
zw}N&TeTSzrRCv{y)!f^$75ZHqj5DP@`pP^L?zvo>{(WwMgdIVck=d7DdG?YBX6Ipz
zXQPM?dGx1p5zqgV0E36Fq$$m{bP(-1#e5{mcUVyHRv#W<;RG)(hHwuZ51d=|7VN6-
z!7{D>*gvThJ@<5lgQB&#p=kt{{;}qgVIKVSmJhaMhtp>rXI|i%BN_!J;dIw5!KXM{
zFwlPw@5c#{a7_`%etIjJbT$a)p2;kzPQ_5S2{<^yiSA2u?Bun<*z=Vl&%CmRcTIao
zf6ko|>l2Sa!MX?Z{gM_&zB^9m9|fZIwR_M~?oZBd#_`nv6Fix_2`sm`;DrNYctW-V
z`Kx*0C7n9qc>?mm+2grvz#KmPV*y*|DY5=;PYzl+S00di8?N5k2Jb9n`15c_7<Y9I
zB`Qk$lQLT{nlK9gn~IzoUqyW_b@@YK7<T&l8=f0Z!0opjaKzo)&`V)Fe|)}=O2w{R
zYaWB$kB-9B_ksA@_Z%&}E&Wzd9-mm<3pei7=9AJsX41~CIPh@+%7-gp?3Gf{?^J{E
zv1t+xnQFlcJN*XJAL@MMs5-_LSfU2{LAM@bc~9|rS)k<_{G45c{jVIrzS0h7#^?DQ
z5!e%)Dl*yhQ#OrrO5xa`O?Ys=m1OKv<Pl-+tfu%3iyDuC)#t5fkmSp~`zGLTv)5?a
zU25Ni8sYX|J27tS2yoeDDbWm*`S|+&0uJ=%UMA&mX0AK`40tGd&)dP`5f_+KUx#<v
zB@1EWYAU;ZReml+%4wADhO-ZYuu?%;)Ze6uYxWdFYoFs{<Js3xRXq?Av-+Zs^Jyq7
ze=Q6Dk%Fyp9q{nbaX3w}AGR9$&?MWgc-;R1czgaQ-k+6CmKKv~gYgz&bn;4`?X{eD
zO}ZrRN=l^@rx%j>r!;7KmW)G}25|pU5i-{2;=5l*;l-2!`rY@5Fl<yCoUQg0jHiXd
zc&9*~+f|ba?iUJ|GBX9!Z>B=pXEi?VqANciwwV9ZX`zv_wYXvKS+blMNCU@hp_CV9
z?Dahj<(XNvRNsexE_(?TW~ppFS|Y3uE5aL7kAmL8Ch_aAO#J@umu%A90Q}{?8@^j;
z^8QM7+!g#43hY&Qa)LKJYg-Mu{)12{F&39in}cFpZ)h*=g&S&}`B&3H*$0dHQe#ku
ze_zUkLX$wQ6Ss<22dHrm%`~#@mkpmnefjIW)3{<$I`7-*EYz>dMrXs0uI-B=a6!{F
zUg2>NN~H5+%{DJQ^1dU_IJyT9uZ-v678~)kOB%GTJw^{pvsg(bRZ!bBmI`-xV}q(O
z|5cjKjUC<i#kRwuRlU?OyR0JY(~gly2Y2Ak4;`|f;lzg`+hObad2HP3#MTEixM=GS
z*mbsr=7n7Yg|;-dkZSav{{iO@Jc>D%Tk%2UbIMUw;f8iso@HHeVzcm;{4x$e<((At
z+#CyUF3#hgrgcKUyuCv0v1Ba0*DMddqrf>QFNoh8#^R6A^JJ)A$X9emN`!}vqW0uj
zJY{zjBvfvtm8bgQ!e71d%b$-FRF^7VH0?-5hB|oXQaeRGdF1#kZ5G8HA4+P!_0Y)6
zfcNFCqv-ItG<$S!9{;-pY%Is&>(;Rt_<R|vFNwvbiwbP!WG~IRGH~2_Z}H{VcVbeB
z4#yRi%01`zB{joYcy3QD9_&3D&-R%^-4vX`ME4V!kGJO1gdVW&VuskOlOGSuv_<U-
zdvT&~s&K*dBQ$(X!GEo4q%2$}uaM6)-EKdfjmlu_tKGr!-Z{wL){|Rpj?ky1mEyOo
zdTu<^AG(|wPKK%v;O)WneBg#IEVeVlnR7~L{uCRqoN_>D7(WGnC9R}Am?PWlnJ@S*
zpTToiF5y)d_R-R^d`dGL2`vjX@y{(?&OEE(qBmhECb--nm-u=7;BLK`zFL*D6inHo
zyP3;V$M3YwNG?1~nac}rb>kU-QX%0`Jdc}D3U#W$Isa-zW1Cr&zgm%t3kC8Sd<yd3
z`s420&eV8D2S4ad<S910!L7d$uRSY;EDXkAgRi+*STvchom$8PqW9oUuY4Y)yFjx0
zC39@NAJ3aIP0$$DRrax36E}62)BB;<Xl#2rzq@V>=hkbp8~%n5d-U+GdnL&CeTR{I
zU8rbpKmOA)fQOvg4*^Tncv5!<^m8j?7j;uMbWov-%f@5z7&lzBKuYlErO36z6u4Gb
zMVbYDqr*uzNPmcwBi*^2HKXh3OSL7RbD02{`7Chf86f`Z0>u)^tm4gec(`vGss7kb
zRe!(3gW<1W{E>&kj(dUh{Oc8RNNJ<L(>BnerN(g6VIpfEuxE>7TG%aJ9e+G}M;#;r
z%Z8ABG%7<B7GZx39-vJZtG2<y4!*deW+40R?f^o4f9zIm!D=0+3Og+qalyMkP_=Ck
zPw)L6)K-sUp;HrR>N;?zcqhEuW`P0UXJU+6JIx<E6dzmu6xN>~Al;Lyg*S8<W?R05
zX(osz5f$L)8HaN|M&P=s>F}SyD5)V6!19rTaQ}50+f-i_N+(<;8^w4EJRZb<k9%<3
zyTN$ZsTQ7DU84)LW{BmrnP|G&AEV#gCO@-WxcejoH%NEaRcB`Ke;W6w;iVaS{+h&z
zO>@a-<4ECT_YzJoR~JW$jkIFYXe#Te#|!RBv#tDJG%R90?`ZFcUkv(+otie&(XDyh
zd(cJtA-&U-d#~qldU{mz{)6nZlpWc8%>u)stZ?L-EKnH|!Xp-SK!u}6;BZkb{XF7G
zi&U<Or!SnMfh&x;_FX+ZdmM>9&OV~hpnr70XfDVTE>r81>#}u5hVb)NHm@xxfJOZ?
zfKx57p>>T=x8%3%MV>w$tDHw!q3$%i(Tz|4*I8;CnPJQmIW?|7L{nnBpsIHf=j6sh
zrffBDm1Yh@q<yS*4{bg^%L%OPqv-Yfmvl1qKG~i<NjaT|V%ijC4w|_JO8Pd-M@B!R
zhlQOXD0>Eb{r2Ir9S8EK552%dV=52079`X>?N9e!ouK)ThH#j|N4VFVfVB4+BtFn!
zhr|LwA*Y%;o%%@k8f@`g_-H!fxrM{e<zVYWIih7XRj0dRxb<OtFkw1JMr-i87HMB`
za5`QR?AW1eK3`ioh`%m9N`>aG*uQBtKi6G=GWSk6W~4;O8NV70*VTh#&t%f?{#Ne4
zAdd?Tcc2%ckX3V1NK8(~_-~Fp;Hn`SnysN%1rvGS=1$mY;VarSay?H8x4?@dVmWA(
z7H({~M|u*8c=J@Lk>FcSQMTz2meET756i_T-3!p+;Y{vVO3*I92D*l0NH*61N2J@M
ztSuAU>KwSkn@w<i!a^vTsEyMO45#8y8*DkNBG$Ze#N?69baudaZazAX$E54y_?ht-
z@8Ai0PFwOnm2=>;lw==-9Ml~#n8Vzq{P6}Wnm9wET?NS4Y@?~La6&H_;*kyA-v`jw
zx9a$)XB7YTO66gBHNckVVX;Dnu-YR8&6HJn*}aL>90913TS)4PsT6e0K{T)YPM^Ox
zpysK+wEOW-%F&bZ4ujK$n9>sZ5aP=-H>`z?)d@K8YQ3=ED2>$mj^clN;<>Eao@>md
zmUD!ZEiK8!h`IyNVSWy_*^Yo|kDd6I)FfH!v`-A_pv<HDO6Q2OWV=rfV$Gnjd^N!z
z=e&C=vnxM>{~p-k;^#xy{n=;1ukx;NQdvf2_RneGSx?wJ`Zf$1b&@Vw4&jIg7WBKK
zE8gDX4*MRL2}Z4B;LWxrcsgu5ZJB#tD3ARFAv>mHY4lq1-8%?6R~F%4M~T+j>!0&~
zQjTGK56S#8XE3(CZV@MZ%z#b}=B%`>C%)b-nO#m4fo-&k_`C8eY3K%^)npwWwP+))
zJ>!cr|5{?Fi>+Y$WgshR?4c)Wi?Qp{i=bWN&ud={z|k9nsSaKFO<5Pr>C_vaclwW>
z1UtjX5NQr}qL{G51{^UU5<@;62aCqtqGj+Bbd5~IrhOLZaOr~>U!w`>f7E4ve%tf)
zwF^bBTqEvcVnEXmOZy(bZFr*kDjc)(Lan8_IP$b2#rUeQp6)7?Pktn(t?VH9nXIE#
zNA#%Y3}qbocCaw<&Lv^W(mD9pR~hZaS(yKyG8lF@;i&N@((Zd6Y!IaXzw$23-Wy3i
z6>|C(^cvL1Xj81~cj|9m4gR->p;m?hZcy>Yru9zf6&{M(D<lif!|&AmbSo_Fpu$^w
z9-vK%HlVpggRKTgSr2a|zM!0nt!w8~OYi4!cynirvdN%E?~$x`$DQxjro;4eazVCL
z5x>huvAih+lXMPJ|N5R-uh<PPJ9)FJ*Drc@U3yE$xPi^XSK{@AQd)KLBz>Miv^}v-
zY-qX#YNMsMqop+!@BRV{<3@44!Wo*o;2+KUy^_w^NG41p1Bsl!g(g}%@`VR)C@07R
zKD?JqTWJmuCmw>jo4+BvxDU^l=zsIK4WWZ~1t_gd;@YGUyiw_tC{OIk-Iwf!@#TY9
zU(1*`sVw4*gR7x)%NsIj9|rAtO^|!V9bF?@MV%yIMd2I-B>of{4cuANI7aX-x+iuW
zG6Z+ubihk9ywUx6o7mf^jO?oy!mzekEQL~pvfmE0?8Iu}meee^t_kMP3!`LXlVV_1
zm1NU;+!eR~>x!En?L@s(Ik@XfCsaFfSF%wZhW(~BFf%`t!?pyX-u0Q}7B-eNj;XRz
zw=sA!xdB#pxeIzhYcSyM4tjf=T{Qk)=fiVW(|_A5;Z)F8d@@3pT|&*_UziOQ<tBmd
z@naBcmInj%mXp;I1GrJ+jqh(hrtl?Af_aDyxes;V^t=W9F=UnK_FyRQkD7>)AttQV
zpvIAjujs~XExG>3j;M6e5@!?*#JmGz+0l3q|M6~zcAE@du_lYfgN9f)w-g%EG}+c5
zm+$Ti!uH13a60?0IQv6^s51NlS-Tiw^|7^hR-prXs8qs|juY|Gc0E#@GMb0<x(3&C
z6ggwfj}uRhdWw-nf9O<p99q?zVpGUW{CrdTy{c1(CyP7qn~%MyZ1zi_^4aLpGy$^T
z>9gOOo1{eR_=D9@u6BPbcpQqtu$iaDgpeNiNO>Er=@yA&K5Ov#=qOyXaw+=NWPqEC
zL<Y38rW-{GlzJvvWWRF3xpXQvI)!to-!fiyCmnvw+a-%zYK7)&l2~O`GGuk^fp1@Y
z5cekZpgX}Ih$^)C+X#K!n6poCoU>DisaN4tMRm@NS78HpC3LaQ;q)mW&98j8^O73b
z(v%mnNuv^kiko%trg0{o{56{=D{PkA+v=mn#!|40_du)XQ*o5WQ2ctjBae7Imix`<
zfCKE@VZvW?q3G5>Sg8x*n0bD5_rWx-{OC;Ct2VODv)*`r@N)=1j_4piBYf1@z@6Nz
z_&1-HpF7qpUe!HA`i&`k#$v0O^XfUpiz$-D6~O&tE?=1T8Y-nXeYRgQxV)JPEA35r
z{J&iwh^pe1+5?>PuwAsAN_1mnAWxmY0|xwf1M@R~(^Z#k(1iCPzuyv2{?bgNHI>-n
zyd}COJ)rSR6|mXy4WyiE2K`zMT)n%R9?$KEeH2sRk7hqSveA)ySYH<ld~I=xdLZw&
zSH>I%UCuIy!0xK=$>dr$e3H5eXww-gyJ}tAqvv0`I>(ANv&%rgd<Aq1Tmb5SHq#!Z
zc-XF9KxR?3u!F8bd%6o3FLvUB2QF-G`iT}UNCBlgAL(>MA52)<0lUmoM+=LW@OSky
zN?T@u*;<F`_zFv$;<JYe=C+YujTxyn4?%yek(eH=L3S0>@M!Q=GVK<IK9^Hq|F}BX
zAGMqEUiF3B+gH>4L<3CeZzV3<afOamjKnPOWK@vmN|nPaDDLh}SfA(z-S^vKRHPGE
zl~zOM(-@wiv<LEQo9OTs12l?ELA7)rtQuJ>wmeLRok_~%tvUhotVe;`eO>gHdKGv3
z3_x+C2k(O>Xq#OJK@SqJ`Aq|DbXqD}4(KEEUg5-d^D7|M!HRtJ^zl{X6R2pE$R{hW
zh-O`zg{%-S9MwG)--kuOv9Jy}Lh1lP`AB|Kt<M+k*t69W={+|l10tO*$SSxW=iJQ1
z=zdH1oc2HRY4OBm;WeQ4sRIW6egzr#!YHE6fO{FLgZ3F`Hvct&@_q+!6B|qS`Jue*
zco~eF@=nZ%$wT8V^<oF_7vhAZ@z~es5k)oS!>vK{aNXo0a-Uv8cCcI61TpfDBTaF^
zs3C|W9Ps+ytF&c`x_DTkH}vR`N7V)0_?i9;>OJ?o+#32~zmOqZG_51I-h2kzXPHsh
z#}Zs>e4Ym27l;a!T9#9_XxOG2DIe0GH5T+|%NtwK)~#B4d#Yo@i9&Ea(o0bOktxLt
z6Y-Hr5}kad$>Z7%itQe6VRE|`Cj0{U_v?zN_H~myso=YdUDzL(*U|vJ-%fQ=b<d+d
zZIdY2H3{2xDza**HqOp=<1v@g@xeeXI(9IRYV9L&P0$%uX^xbC+cpDRCY$gAvK31G
zZqatlV-z=3oBj;Shw{yG`h3(1L;Iefif8LNNt&nWl%=59s0&;F{-V|eYlxru^Xylv
z1wDr@P;e+4|IEmsO-oZqjM)jhCDN6~00%y$YsEc_L(yU4Hu5x(%tiW2X!`guWEPbO
zy$4DL&y3r2tMDZKS@9h{`|gy;S>C9>X%UZjevFoS<&sI@Dv5ZNfzfuJ{Pmaz*XV0N
z)QCl#BkiZ1U#_6w*^{wi-C)?gE}c6#597MxzHFoH&Cu@wG$d!!lCL_DI7^57?dpS5
zYP|XBB5QsZ7cHD<7|H7f%HePQXXrP^iC*~ihK#XOg{gUeBnkL<{C@YNU^%EOTDrY;
z!Eqlc=R<(tax;o`cORscHy_Z(fUW$oemf}~n7}%9g~C56pQwAFP&D1W1>5?M;L}^z
zqr;d9SX<L3KR)|5XqnhUi$oDU{9&EgTRJ!PYM;g#67eo(ZBOh~zY1lp^H`xU4qvZG
z!=Yahe@V8B@&40Y+zoo6S7<jpHGZSe-`F0*MmXWgC;GU%_d43#VoqJRb?2YOx^%r`
zFRV1^hI_pYu(ACX1P|-PPda_4Jz82~<JB@4d2yE5YuYYwc0K`#d*gZ7nF(l_eV2@H
z{Q|c<8}9k;vZysj8!T-d@zAPqTsQKvaI<_FZC=@3qN=EH(4(>3_pULX*fty;w6_VH
zJ(_9e<|%wfveL}o9tD!^jaTJo!lB{qqJ>%(IrdqD%Oq;&jr(cB@@4vLJH-i3ERCe#
z#eZNb7{dT6q{AaU#Dwd&W!vqCiXj=s(CvT?UV4yBr8_6_CS9OgW?_(R)?doT4CapS
zYGpg#YGbeIXGrV!SloHCK{mBe+Q$rELuYF`vMjq@th4vQuo5pU^Ht-yCc`13+!5(@
zIk(sq@p-4tn5YRTdON}@iAE8*ca6N^8`Hx&J+5+@j?emdaj0CKXFY!ho9d+RtLT95
zK5vDmi{g2Qy<`XNby*12%HnTxoPnFyf^d2fSASi^dOeKg7aOm^p?w!cb9DvoP?=4S
zZTj%u$uVH{zyQmwLwVO7b=KW?Q(Tpz$6sj|E%#L8m5*(4$JBJ%?iR&Ae6?BKyq%iu
zzl$H;BIQO0A5g;ET$bm(hkd(4<o=s|aZ#5^(Bsr68sk5Y&l_9tv~!C51bu4Lvv1Rm
z)7MDD?2Gi~@Dq21hH`uLXx{(hf-p6;3r;(K9vZiWW1o^qSTH|}!r~pdwX>P@`|T!k
z*It4*Vgj-2vXv0{^oKYK6ma7@D{h(8hrd*eV~31g+%K<K#LE}uHrLlt%q}zhzN?RL
zMcUWYy7gxFnNlzD-4O1*`42p#G%U{7<NX85h3<_vK~*yuv|PuF%jR^1gRhU^4Beq<
z@!xS+_sxT20*^ua#(LTxnhFOq!|^1I=7p=zP_HakzL+?S|89`(BGsN4)o90A$A6Qi
z!e{8wrh_{dr@^rD8nJe%5{BHJjQ!^<7jq62qIzIAjvOV?f_b!PbJ3UW7slb){O6F?
z$B9>GO~ccRpTQ@|;JIm<Ts~^>Mre_CxNaGv@#ct+@ZnPy*j)N4y8o?&Kl63qsGAu-
zO&~f~ypC10!zFUE6X1+FG`saT$Tg2s_mDBHmtw?a)4qc8${I>L7>u8;UJ+(@ZzAW^
zld^etd~oK9j{J^~(z~)fFf`biyV<+pboF}_9&kdoGiNCd`}|FqH&6vzzw{wLk3h7a
zgsf3h1W$t&LdCr_zIHR7M+Q4UVOcR4p4JrBpV=UIni=uFDG%V8S_U5UETtahyP)q0
zX$ITmfVp~S_^?9&7pzc${8JV5ex4V8tvBLbx86~p)n@+mz>;TZ5WOntAsN*d3JnI$
zv`wcs8y)Z?zk539ty#<!|3Yw_@j&ciWy7zJ@4&*vQRujG5Y3v*AXh687uS!%6pyal
zJn8_vwhZTu$;rIb=ZSn+pbCANqs>j}=D1Y(2eoq|x(rT$5{I>vQR9c}bEK~7Djnfh
z)HYbMz#l$jeFBI4y(hx{zQyD!0X7#80Qr*5bkQ*%TyMr;=PsXN&P#LLyfqE)&FRaP
z;w|W+S_bA{9z#LF7;Krd1U(a?Fk7t$PP=*l6H2;Cq{{KUaJn@r+#inLAE{zY{(D&(
z_u;^<<JjJ4I{mmkR6IYsn-FE-htWQ9(c>cmwf`T2!2i!*@cmzYfZzY{119)I`~M&Q
zLcss>7j(KU#rsxA<a?xK>4ekkL91#Jym>7#1UqySs)j#-h5h|#_VPaB&RweVYc{jQ
z_X~bGU->09EEg4nepH5N@HrRmNzDxXs_x=Soqgi+@-NU`Cr^0rvRT-E##E*^!B@1j
z0Z9CmRXf<l1n7Ti&=aT2KOdbWykBHVF~6V7zAM|&mB>`FC}*wgnp2Lf*~wK%to{sc
zK|95gvRa{P`j*;&zABV^&jBWFF&6_U8m`^Z5o*S&ka6m9F=F)+VXya8*frp$@bRV-
zOrd14hithJ75xBSzJDnG*iz?m`@R+EnVEz9{AQ>soIp8lqXo@|7$JIMT=b;>m%lLh
zfB6f6|NHp`{m<t&*)KZy|MC|?{+GW{FhII%Imcj?i-OecP7ynPo+Pd;FqJaY(vCiG
z1~#wJ=WyMxuwdX=Jk{TyFB}@nO-r8A`!FrOA3m6m4_pZgm-x_M>s;KheJ(onSEBw`
zx?+h<E^X2@<oyv6QFuzR(5vhn9IfxhZTF-d>?>DJEgg++vc>S+D4gedxbe9912F1)
zz37xJk>S+~VAby=Tw#14!c2`YImZn>pV!L<-%EnziRqYrM*;6`?j-lQ{*at1P6#g)
z^5F$4QrDZ->@ax|=zhtds|)uDQ<4q~-d5wGeR(pDR$3=0T~Wp;W65-UQHw*p6r>Zu
zkvtEW3l#y9-E>D0J36j`fhh^RZ{d2B?|BXw|4KX*mqImO*~Ddcgo|1=puJGKYfUr7
z%$$+zmo*-jsqes-)Fu@CaOYF=v~f=QII;XcU3f}IU}23FJ9xZ=Jip^~e02&>|FRrb
z{QE;)hu##d_ZXqy@JFB%m(DuNkATY*O`h{~7oAXCBffYxgtj>Pz(Qp=jw~~$>BUk<
z^O8H;#IJ>_^h#=Ri07k6)nIXAJ=t8T6r3Z0s@`6Ki4%OVQK=Jd9Gi@bgD!z#!W?Kh
zr^~xt7mIS!OZ0oVH?W5tU$7`elUyB+ERcbF>m=d$PdohhE(mvTGvb(h9qE42LjCF@
z#E}z{(CUmGMAb!M^Zqvwt15$6F?T6wyED8UY0KMMJwW)}EZEjNkWv3oQgXQ}8{~gg
z*jjQ$9yrn#UzHl-0lAdvDIWnVz4r>AZk~XCn`TkZMH21s;RmwNy$8#kdrIc7RXk$G
z0=8YgUAQzl8e?O%IXHiq)aUiV?ZPxZA5lepegg-&e4?4#g0c9P0`5GVgfq9A(Ic;p
zqC6-B*XKWgk$1u+`l=D{K41rilO9up%`ULhTOiyET7iS@^62-D{&;+bZ*6#{1-Gx9
zi{YzIi1X7!v1w*+R<k<>9>agi9RFp*%Y$;+Z-;TTPG1EA<85K?nIJkj&l`<C8Of{%
z{D!?tR#CG3bXb@df^+&RQ-iyna9d?D8a^(>LB%<^z#tQj?L8_4nD^u>GnIr)iG1`r
zV81v@+y^eF{zCN-iTb4he8luWI5qPWyit>O&$d5gtIkUK)mV{E&94UqvqRLLm5NJJ
z6}dY2fy{VhGTnVSR*cnIh95TML&r6CF!s|_>e8nXVz*Djz}WXdnKvoR{2djH&_-)4
zl+IUgKJ;9P8hQ=j#^MTkF~0*^3ZXcfW^h(b1KEwxheIa4ang&9RCmKpBK6n96s-c`
zhu<6VzS<v%+VFy6i;bXKFCBhc7?VZTDY~Voj9aUE;x2aq*4|jfCB5=wwXr5-IV};7
zde%U2a4b0Go5IJF&%na+tPuV;0!r@d^Ia>+Hd@ym%zfi|inUC>qE#|%J<7y&XI0tL
z-4`AEUKaQD^Cj6k1@!J|hwVL>>Q{B;sNvJm=x{W&+9qK`eX2ZQ=P%gaKLU^BsNv&E
zg%jBt&7%LvMzZ<tC>)rQf|ZkZ(t(mfp`VgF_SKQPFXuB*sf#yux@~~hoU>ry*j2Q7
zONc}?GiSxdZ4`FM5bJ-f#SuH6NHd*|JU{s&>{q@Hj_<BexPB*moHSk@w=M)v^vl4A
z-}-#@ts7_e>&%ONu8XVRYV%5oGB@k}20p)GD*jH?5FU>zrsm*s@bvmg`|s?KuL<Y~
z6R!8;g`v~H_x@ZVx!E1ekM_W<_I&a1v?t>Ax?qS6_T*JB`{0PCAL2`S9$D#}Ct>C(
zvdp+f^S=v}{@V{WnCA&A$bdVYS%q=Zxtyh0LzaC9;kIyR9<^aVOvsrd8{=9`YK1Nw
zKF*gW-MSz&woU?n^@TX*>Ikw)?T?~!m6-i1g@P=rVbAP|c<W~rx(4l{3u|3j|5OiL
ze0&NYxtcB9NNX0RZcT@R8e4X1OhnU+T-vd%JLZ-5Mz1k(5*>aPinFI-c;FXUAE=40
zp|^!7<;7^PJ<sKuqrg$hlJWO=iS#~66Fy6GsTb*5+*cx8HBvgimaJ9{O}%lWmK)UW
zn}7?I-$~TtbTNCNKSzDvg1^5PVq;S$-t(?Xa62a59o>6NJ#PozzbKdX1uo>P%|$Sy
ze>SdC8;>0e&%?Rvo1t@RCk$EGm+kzey;YD8HRP9p!^1~na;rbiE(ydHMS1X}b&$L^
zQWN9kmMk;M6I>VN;UM!(^2R_jH1EF#n!Z=jpx<vr+kZNk;QAS6?Rf!d!Nc(IS66J<
zFp<PQWn`41!!m3UN5B3HizM=Ht7CWYzOxc$Kxh6U-!4y^*@e36pCA}hAap*F0dp2f
zy=>=xIOC}wf2-XkKQYUl2ToVV$C`iWN99g>AMJ^Uq;uG!_gyL%uEN?*4z#Gd2M+t<
zMF~BsVXd)0Ug~AR4T1f!*Jo!=3RPe`^J79`lLHSd2*8##+oAK2!T43WZLA;ji)e%$
zZ>!u(cF$L%x9>+dHOd3W`E(|kw-FaO^}@!|BHChl71|6mIq>jKiMVwN__{5>EObPx
zlHRpiC;V8!Vg{SskY*4&61dHr#914hxiL}df=qZ$anlU3*QT#Dafmm#h-)D^@TIg2
z*W=2nhmuKiDjcjZ6Ec75@$rm%Y$0Z#;&BsH@1!H)=l)4M&!@7}FT3NJ3ko=I#vwYe
zd^y?=*+G|=RZ5veWzsK+;Sn0Wa9O-GZvz9IJywQOKkM*DiC!2})dRbZHD{;po5gsK
zVpy&$b>nMRg6cPd0d;DaT)vF&e3{6n2d0U$#FN6(xw>o-J&@;r0v@zr2Q_x86wU{9
zpkwx_Lcfg{xa`;v?y+ewR<D|g8)e;io7zO4!7^Uz5k$R(=OoY86y6+pCr*5M32IjO
zz^0D_Kxsssc)3p;Pu-}4hrSD#{BIn4YpY;Q)Eqb>@q^L}mhnEE0Dn96;%v86%&<gs
zzX*INT!F_ZzlVoPo|2VG4L3atrMUl;cvwRkcir}whCY$<46pn1As|rfuSTzWmvDa%
zQ>k~^gD1N@0k=(^@$X?DtlyLh^HZ0h?8a9*JSrZ(v`k_ZDX*t^qB~5zby5tqUP$9U
zsZxHz6R~B<4f&;h8nk_{7Kcms`17YL=unh{^p?(+C7cSy`8{1Yb&ey0ay$7SdkgiK
zocZdpGjMlEG-OKOXWylzV*Hm^P>$}xsGL6RTC@_4rX`B2HcB(G)m`D-KQkUXN$TEr
zYo)mZ50J*CN+|0#2*scMBwo&J2$`FSTkr3HVZI%xLxq>9r22>y6}36S#1k(ygmQS4
zJJ^hJ!aX@EG%UG-<HE<|y|_R0cz!4jOP#}$>uo@B(itIsrVajW{|UnnRzYrpE6#D9
zi)Xb9`T5lhj;+w*y|z~&cv~uGFCKzRwL-D#S~^epaT&5ZFCvp}z2MB<TX20-2Yz|L
zfVJWmL+lkFFt6`IDbpnrh<zn(ZG0j>cyu7Esp(Kb9bnfVsrX;Av#{}Af3`Yp!NX^S
z;oyV{C{|U5+&gBx|9dP=mwE}0nvMy!xsgKR7&}aeieQD-I{HYx2udqNC+DMNnAiq!
zgR<~`+#YJRzDd18dvW9Q3Ygf{7>||T626Vl#NStP=<s=S;d%5F7An{C<~uLw!~HtA
zv^@pip8?jr;(>R<F2dq1PMjY<7}Kqt+2mJeekJ9rvOl_E^-VKgac>shl=>qDNx2kM
za-BL|I|;9<-oX%w^|7Rb5~=x5hq28Qs7<nbK6rZ(gx>A2J6ea|Spi%$wg8WaSFplZ
zq~w#wMdt{KUOS)@t$G_M&E1r^ZpmF}xRQ@1QR~GjH50yhxk_}_Ho+$v<9M1+N1UgW
z&bLx6$>jTSc_O@p#l!8nL&kX0IW1YGRr~R_UjM;uQ!ARf=sGntS@M=NE4(+gE6Hc~
zzzdVh=)uyV;)~>V@zw2dII68Lrx&>6yr-_L{z2O5j6F&Q1}=2pYAr_nbcS++0Erv)
z86@orl*b+x52o%CmcE%!)1<vcH05F2k*RFfD6szO48D`To`!4MqU9)SJZRv<O2efc
zXCHgEYp4J^brpsvoD=#bOLyQE6U5L)ecWz1OrHB@GgLe7gUdJ1(iyid*fy*m+Wztq
zzB)_1l&dk2b2l5V^xPqtP11PU>}7Ccc`%OnSt2?gdM1_+YoTYWm(bT+0XTWjC@!}f
z#9JqO^X(a-)THGhsx`)7bJPTylinsK3?DC9UpBziP48&<|JT)<M`Iav|NoL9^N>mr
z8B)m*N^xI%BWVy7X^>PhRYF2(L{yYn(m;wzBhiTKY@*U2qEeAYQc8wOrRjG)-_P&)
z=UM)=*1g>4vaWs3KJWc{Re7>s5_bIU^S61kr7F1es}FV<&4B9;O0a*wEt=6f>TG`j
z19ExxGg*!-m3Fe3%dFspHRC0JZN{=cYpL5~2rk)i6J{;h$sf|vMa3Y-ZYc;XwDJlF
zYxh9^nhP*?+Gx7_ES*0UvAk?s{SPn>Fu{0<Qa0mP3txf$V7*x|ZjSrTTMjkAt9sGQ
zZNr|@*K_YNqmYRL7p0_BGj9*$vox?_Mi{;*T!|r<t#IAD-~6H#i7@)%A5Ov2h7FTf
zWzxUAan6JtI4pT2T6m3O?FY_@$~V7fvxc^^z)46Bw<S@d^Q3U=%(LCUXX53DGx_M(
zOV|dnFwUgUk)mf6^P{51(wW_vY-XVp9uYqb;vW^Mf6pG4Ev|~Et;}$%x;*~*^}b}%
zn1N`C^Khl)8&DAXvnSjV(5++`9Cxf`tFPC=IO#^VwQm|rweY~)m>ZnRyFv(mFM_$F
z4~n+#QOD!+yjiVDA;`?QE_$&jfVAAtgP)ip+LjOGI(NKfy^#`BvgH7;PKbFMqXg4{
zHd`sbi-KN`WJMY}IB~QIE1sCe5`}r=5znTxG1sSJ;NdUqqUs%{pOc6aVy18!%Rce_
ze{M4M$V2Se;_Wzg={d~1I2ebzOTxTiK5U|qC%e=%8m%&x<Lhm4AU#f=Du+n1q{vXR
z-Fug1&CJDw6lH8&BOy@aa|Jth9J_K#M7Dy>uxRRXno@M0>6||R^PdLbn)oVcwXlPD
zQ)3pRHHChuePDe9%S8VB>;-ye1kJw>xJj6sttzy|d+as%t<_`RMLt~9<XudAcM!Ys
z?IbQdYeKm;ecYH$sqpUcVsPKG43_mg<D_@S;I)0DxW<k)IQ@1X^;JznzunsOR?G-x
z`jTj+Kn#6psV|szCt}ZAS9~T)gO=VjW|W*++Ogc36}l&|2~Mj@tq+9r+u9iSsHGob
z=bwSe2b5vC^E7JcJ_2?@-7vxEEgPQ1Gwr*kq*Im+-u7~|wR9@@4-dn3;rT{hi)T6;
z&A5JHM(oQo9~Klj0;l-b!6xHTAetq@H{i~fYue-0X~H{?^WX&a4L%QvM%TC9;Bs~n
zHRW-1Vb&oya@~kY$$n+FrTNS*c_#$CeGIs2A~w}Vb4Q2I!-CY0Fs5q|KK~Sn4^Ks4
z*cB~ETCa^WUU!2^s|wQ&{s7Z|7*f8SG3^VGD%&|kg$6F(&+ZHFyYAK5OyivvHHw*A
z`_yS6e4B(5^o-%k<2`Il)D8;UEsx{>1oE+MwiF*KMx7H)=|Sx>awuxxRl^2ShWcez
zD)W-vXv~1VG+Ffdx*6r8OZm@+XIR;@ckpK860DkIj<P#uV!K%+GktoTJ9K9sJ2=r2
zZ`@19?ftRL^$>>>Qxc%+i6vGK%YnbXQ)#xrU~(95imHc;!T-WVP@LDumQ~(l>w-Et
znE~I~Cksm)sU(I^I`!#08&1!CqS&LhbUt*|czE>B7dx-nVyE49y3+g^{u!<y&nFUP
zbpuQU>(Wm4BE1M=)}MlcJsgA`&&2eQAb6*@m;0<T7|jn|ViM=Zu@4fdI6~$Ge9_p!
z#>kyz<(B!l>hC9R`Sjf|UjHoPj!mQZrccndS;*8cjK`*@O6dO7NHG1%VEB?)(r74R
zehS%)8BBz>vq$(zriWSZsjJwv!;=#ESQ;NV5FhN&K*crb;5}6r<%@&q_>`gSxkWx%
zxO%Z{fvfa<)*$-F-{(DqgLjH=5BqRWjY3{VU`>1mi^(bk<|jeB+g79JcrT1|)SwF=
z%rGt5fZ2}@V?+BVv*vr(xtLEuFyzfHdhjkC($>Y$Z0BSQYDvKHg`=UT&W&X)HD!Ze
zM#GFztMKLD-MGlm0*nutLvr0ENbOq3d>)jcMY}DGn|g`PTz`y-`VFjGx<jyWW%9wA
z57@wje17rk3oLu=9%ve_1obZqF#c8#7kzR@nR>M;HfP=g(Yd{}ENc=>&gRhK+iX%8
zt3|E+d%m7_GT%*SnEQtaVJ-Gz_S+O%Aw7#<uIfzlBBSt@kXPPZW{d+xBk0TD?<{bW
zKN!<My3M|pjy<~z8&WTDolo8HyJ<Z8YBNR@>cO)#u~~R}ax3o`uLhaIEJ=#ckDpl>
zPq_jmTG`E&U;JL^qg^(Gnh;&^wk%=h(-&du3SG1d-Gf12<_P5IW$@!cBII~0W666V
zpC-)smh`No$I1?PWkCj<s5a$qEKz1R)*RzzJO+3y{PZV&W!lQ|m>W3{tMx|WN~J68
zyWcD3GRXsDv{TDomhOTtOMWn$+)1e4h$s<X#ciHq4bT1kSy1(JRylbWwod9}c7sCj
zpMN-Z)$f8=#lCQ|O_)F2aF?CgGJ=2ez>m(RNaN=r_t}pyM|8<u#|^nQ4ju|wSfvZ6
z*l(BboPM$;8t+cwTZCurSGkdCM+oMrqyD7UA5EVsS3pVrI8fR-9~Rjx!X<G#*vDsH
zxcr+R+a&J{fC)^NTSq5Gy<tPQsFTT&d^qzg8J2B}A^D-R*=NUDc=XUBauoC8x|V0M
zEo&vIFK-5px#P!tQ~J5@u6F2h*n(QzeaP;0I8CZtg}QwU(Xe(5H4EqVrZqe8ovk=t
z`}TniYoAT66Ap8&bM<L<=1#WboC-$BS+lthhEZ{F6D&OFjhp9%Vw3F%jJM9?igM~f
z`R8SBYIP+0H9?m)B@c!7^RwXokrS-55-H)!Z<f`4kr!IIBxiWEw7uyHDoG)`xYCX{
zP3mN7RpB({d=kG>S}?qBxWjgx@uBnD9o)7V^_-(!I8N*d!wx+g^xUmN;x851Kfic7
zHggGzpKf6rL$<RO7ptLAFmF%vOcu;Y(kxVS7H+aE=O?c<!8Z<KHr9%%wDm<gM4y(V
zCqLZ<6NL<Q$sVI4E;fQOxR^!y`{TlYVU)bpiVAM;qG`uBk}m$>8oaCV%MD3v^a>zn
z#ZIm*v4-&}3(>PY2Q0SzWtmU%x%{nBaIV{r3z(aWX)7;M{lc-BT`Z3l_ATJOZXcWO
z^^(bV&!i_G4DdyP6zT8%A(|o>YIk3nL4WjKv(u+9P|spb+P6m&O<RuPv{4PL{&5D}
zSK9%>0hhq=ks(&-&SZ9b6RD+b0L6-ZfS&>Z`kL)~sPPzyi-Q`itJ>b+S#b||u;dCP
zSqx|Uk`nOA1Q&WTbv|{P3b~;nCM=!#W8XzJ%3XQ@o*xfkbB2t97W;ULmoMao^=@UC
z-rZzcXD8DF(;T`Q+6=`%glE}UjjfS$G2Ppc$}G*<f@llul8OLR$w^FX%o`>toOgru
zv!QivIfX1W#tn;v`E<GCxWeWz3lOp(8aMCp{ypa*x~Gbb{PVJGFmn<5oe7v=)XiT{
zjKbb)waka#Vg01)J2+VHFZCFv2J;leXj#4|{t5of^<{R!!$cjv_>3?^WM+e5pQKQE
zcRI^`I|H?+K7gw949>SPi)&Y$U21Y9kCoJF(`}nLnlRi2W!DOs)Ydl89%TTZ4_ngG
zuj}E_A#3_;ybC?_#3^Qq2m^0f)8$1{SkfBC?Y6xkusdwoyk(OqHk`wQ!7J%Nk%*6t
zI|uT?8`%X9EtF|o2{X#`ILqXN{5jhoIwQR8S|T@N(y7tll-<a(I`Sazrx|%wdthpA
z3}$a|g8lEDD5)zD)ujh>b2i&hgp3~8OccYm&2rRz?JU=&5<yFh2ePm2+1z8n{C6ru
z0<+#pP_aY_x4%7#?X5Q=rF3c5KF$H}J&fmlb#L&){>bC;S?^$kk`XO>?IPIaji}Mb
zloooq(s84MWkX^N$<h7}+`C!B{$|_YsuL^O6n;Ct`g8<lI*r7fqJ9>1Ns4AwZ{hze
zDdJN$1)=}6Veoai8HstOVPLrnIq&((zCBuwg)==VG;TI^uaPe;d~*$Ii-TZy{6Ms>
zH^IGLzshWqN0R8}T-?N^f!_=X`m?|uQzr{NS>?gxF5Sh?8DL3L%WpxrP8?jb-bdfZ
zv~sVWUS@4u+(7fs2>L1X@Km1(ww{+msl#asDHfaYu&bKiHhc-&zDlsB{0nA!TT^NN
zscY<T+7SAzd=&ScH$pdI?yl%dFtu2w)5iH0cxI?IhEB)?jnkKe++`eOwEMt9jU4vX
zIEKtb48*dF*%O6PjPyJCLn<}Uz4IE3lQ_uET8pt;Kh?-g+7b_(j-=<O&hsjt2h*P-
zS87;%gvPXSr0sH>H57D~RV~VAb(%ZbRlQzT5$^?~&M47niV)72OYv@tKI>0%!!y<c
z*iN->kc-?-@weB3b>1R4X!3^jMODH&r9RjxbDdwM?m@~jkD1l3-B54%n|pq%8(vg@
zU{&vjiUurH!ROy*($N__H#Ecu><lBZNqYz1d}tiMTa;D0Pc|G6<P64NU261O$jaQa
zF~EOwx6z`8Xc{vAGIyqGC(QZM4uS5s*jI@`)OtUbYPOs5<=I2fJ!=8J-8cgsj9S=^
z&}00hD+=V;PyuHz7zr5VH()(no{GnN;F1nc{4IS1X4l+juX2n*YULib+R_L|bgE+1
zYy*sRZse;^9$^W;3fWIvd3x198C#lGQ%D8JsuWfTeX>(zaY~0YPu+#2x<G2!KAW_-
zDO8YB0B>F|z-OI{DD1s8TX!%YO*5*n>qiLg*qX`W1eT^%_AU$@#BiTpJID<lg7EVK
zTU#1~UH50<$sc{}^Q%N`aLDH^w$+h(l`*fgTA$l~N*yMbzGOcydeVzQ^HAejx5)qg
zc<O({*w)J%SZY%NAF;B8xi;rAqs4>i!Wmhpj=jMKyt!HS_?jX^Z(z~YwvgBtDp&}<
zv#FKo7_&f~c0a#E_qcVK(cz8v9wejml22^HwP)~IJ|CPm$)f!XU)u1c2$J2eK;_3v
z<fwfBzpVel?Yj|)^&dnOtzw9c(nr|FFN$mv#e>UbFZMd4021d#;D$E_>`9IsWh8{-
z8~Igb?nC7`*;8lA?xfu0dZx`K--<kHk#%JIn>XR88>7m`*@lAquM;HOD2Lm-hER|A
zeq3|i8BgwtA@id-B$Ca+h4#jz<<<?O7hPq4C)=ZKVJ{ooW{#`MM&k&{E#QD0<}91c
zyykzkR<%38256pw!cu8Gd`ALb)dW+3`cRb1olg>4Z$!s*qHx`$k08~ZiMktXv1?K<
zOFui7)(mi<%ksBaQ}cK_C+p10vfL?Nur6IW>VQcbl0g6dedrNbI4_a|aI8Tz{26kC
zL-}>sMyjYkPz(E?d$7K>o0y)sHFdr^%%3++=f>K;Wj8dPam}OzuKl$;^QnEu9}XHo
z%@eFx@X>gH@22EB;})koql#@lHH}UvS!3+%C9vrGBwF#Ri}P5UK-2#T|J%C%n9o32
z8gVt8dZ&8BnN#uTY~e<meDA^b=}x%mMFZ?9sfFjOm08EVR@S>_EXp<Kv*`BcyyQ|d
z@=@<&o*fhUKOtSP?9c*KG<w4&jqT){HlKyz+TM`!p_R=a5{}6V)i5^XCdkeYVIPgA
z(8_;C__fXohX?qu#4~@nW4Y_dT6Ze_D;b3!C63Txry6LrYG%ismI&D)XS(sjm`?MN
zxY<yK0w*`YL+4efQ$3&Q-&rS`)T>D~n<i3S+cr3Or-#|?&xXHt&)L(4GjPBt8R9Os
zajl_}B==wr+<NqoTW(Oqe5O2N-^_M1x9WJPcK*fx{j&_m$Zeyo&r(RzBo(|`PNB&Z
zU3@y$kv`9tz)rzzJHPfMwQtuVaf+oX(_m))RhsoAs!-~o8fIwP!)8U!!gFr!cvN5v
zOuaLUu5)7aa)czEuaSrP8b>B2xrkL1EXB$)As2G$9`shsW!;LsP@kR3rN^oBD^W?b
z{#826bMoO|96k+NH|FA4he-0Puw-!^Rowc$Yq+50g-kI*0$hvFkxa)jT6l32+<Y8L
zu^)@el6_Y|-u^jU@&-GafO5F*w-YZ_C&6+qhmrn7TXdLcN$V6ZLwd+@HuvKs92n3G
z-1-FkW^<l?8z?YC2R9b1BkVg=ZUX(#qqi}`aA(&I=Kfh4T|@I=RQ+_SzbKp$4@yFB
z>uz{0c9fny3dG93^ir0V%96@*$ihGiN32soZ*5cBoqvF(ju7Y0%4fh%$9fhZQA}?`
zOW@}6jj-};Ci9*7+q!pV8oOoohFvMphoX2#TAsWOQg_&bLFql_94=(Jy00?-`$kkJ
zIgVO4$x@~C61KkR49k6RfEy_ti<Zy=L4#|#TZ*kvv7m*`xI2orkE~?VzmJ7^V#QFg
zFGIA<{WROJa+0-l?c~=!Y2wIvEH$kXLl!p?LmU}9vHlm#e$Vs8=|WZ^qKk`Ob&1XL
z9F9lrZo=n>I;{8HZNWMf5B`aA=wqfp$}uwp8}CPU3>;{{&efP8{gd}NXUX^;Es|_4
zU?)W_Fu?vVT&{lv6AY>ZzTS44Uow-;v$?_Dk|<|N7meuKNNr{!*p^-PM4?B!2fG;Z
z7H*ve8v5%qyE}vvM%?eSt`QH|+~Y&>VNnH6GVNtc+t<;-0~6W#fZJdj6(_LJw&JpY
z!Fcx6YJ7kDXjx2!G|subnI7L=#jl8)O9!+KNU2!?Qzq;n9qoCncDXF5{Z_+!<F)9^
z!sB>Z^&EIuxqy;^76r~oVxPVVM&E!;W-1yDt@q{-TPyHXc{A$2Q3`s`_u#~}3k0*X
zEw~26-~)&0#AMv5#VL@MJy?UU6-ML5d*jgL(LwrByAIO6)iUP%k|lrDf*&EO6zwvP
zJvlrVRx6iq+D;J^q`ig~et*Q`eIB6U97%Gy+|Tw&OJi+H6jfMrEbFoXj$g3=({lFF
zD}5)rU^k8XT|W!Qt+~c0ww<DX{cg-;@gXSen2Q>Jj#1A1Xng+T0epD57J_wNGp~1h
zDM7uHH5cy3e_MC5O>g{hSeqe=6}+>mS+Ejw%F=M)yup|^dm^T5Hn5sWGr&+!j-K4~
z;qouoQD>4X>07SC@a`qFE-9Qv9R)OWQ>2AICZWg9-Pq$lk*Uc_(TWjLbRp3IZ}~st
z(`hyN8|`K_j|RiP#dB%(R2?$j@>mpAs6oE3+F_t}0hMd!a+BvOVDn;qEYPcF1FP*w
z>v}k%f(N}2=6^TN7*5YJj3{)RA%xo;fp1!fiX%IiS;Yl7HqQ^W4`kxb1_c<Waun8k
z4kFi$SD-&)J3b7a4BQ4sRG%i|rwiuW8#*pHKP`<F`SiirOHB~3d>=*$_Ib&KF7C>i
z``3=ho5SB5@gk>oN47d^JpCEG8I)Rk*h*=661Tp`J|*3SSNaAdChWrvKF`EH&8O`1
z{^2xf^BFkHDPm^MOp3le5;yJ)MYHGT6ym%R_dbgOoh&6hbD>)FjF-hC(>?e(R+z=?
zkRn&(Z0@Le1|IF$&!s(l#%i0SSeX9k(uPhQ_GFGC^y{a<vUh{{U&8rb2od4XAYE8}
z`y<rGn(~Vz??7(-NYe8Pg+_Hv4EiAn+awRrzFXH=c++4i4o_#gF&p9i&8b5Dn!s|}
zDuI8O&0b12v8`vPVRuI)GYKe$$2u0|oZ(BR#v9S6RM?{y)<a>79;!OdVmDqTk;{q*
zYCosTY}S2*`ug+WJw=nUW)39L3`;zju84u-mavM}SyW!L2mcwCvm5tzk?T)4I<^&P
zu*NrLutI~L#|r$Dm^l8=u0~kaZ^RB1onk{%HCc^)K6q=`i0pf1q5RVyz9G|uDJ^j1
zjyK+inQqtN>eP2o5x4+*xODDr)mCgB=!IXbLRj$5Zs`6LgzGN+gh2gr5SN<Is%A=o
z5NZNxD>aha5W^OQeJnfGDX`5g-Ppki76Ri~4o&Y)LtCj)SSOakNlzF_70IWBdmLkm
zJuDc3&Ba+ltON#~+Q`QBO(&aqda!Mez<`bZ0NZ^Aqh8Sm*5>QOg$#KG0WAVE-_I6b
zhmT{+KF+}vn8|ioiOBq>V6DDsiIbGY&?bC6{wNW6wYSaidGTDq7C8d0mu|x*xesjJ
zizrdYg7d5<JPDl5=HXBmQ?PaLU>iSe#kT`xF|bdsY*h6`%;rwh4L2vUQy&Q?PD-rW
z(ikq}=;P)bZR&RnMh&5-c28Ch&l?tlihmr|-L>QsW-LJI1Urg#oX1Ygl0&J&YbbSG
z6XIuG;(RR*!GvZpvJ}_ETjsrBwV{d26POcnPevkHZo$4~u~-u(OF4h8@OBGLMbhSZ
z?7;~K*kv~odrcGB8C6L#?QmteS~9$%MgXRdRK|!xEppXaf>+<Y10(H)0(;4d>Agu}
zNg6|NgswQgOPa}4SINR>-{%lyXibA2?gHfzM_Ja8N1zcXu-a`DaB{3a)>&8c8{Q6s
zgHjXu=iNMO3Yy9mZ(NS~$M-@`?OZJFl?U5hLO<V3kM^vd4k<?~;i2~~k&j%LXrYuX
zS$^dxSwcaWt6j>?O0;8^^Urdl8_q+9a86YilVUwpxDPuhFw6grX=kp}>anh-8nhP*
zdycLrY}q3<HfRS!zbR`$OKLLhjSR==N0Vsjro$}fwiK>;IUb)riDFwX%HZk4g4MRA
zgKz4|hy0>%{3~fKrlo$H^=IBk$=RBa`_YfI%QvvT8R6L9YlV)+6DjJ~NNU&7q|LVw
z`6UzJp^%X{Z1bD9&@Y2d`A8@hzGLpCQdkxl#(Z%$rM)Wvf0co#sWpsr=lD{2Ulg4#
zTE*?MKT(#>XW<Pu9U9lV0*m|S(fA{Aa6hz|nRnUoxdYpoq_WWGT4B$PIXMY7UarIW
zYlFFO8y~>n<MOQYtB`9}5Ij%uDVVQ39CfZZ<Bq_1N}n9VFS9Pj`h%X}+rADOv@C3%
zKR!!N{)6!NH3m5ff_q5WgI#|6mT5XXfaj}Z@RHwX9DmIdBi;$Uzt{s%YnO&ah4-n4
zt$+_+x4GRj=cC<TD{3!S$Bs907(X(IrFZ1u_v}Mh?RA$wb|W4hfH_ukJ7HPMS*TpU
z4?1LP*<L?^<s&M@Z^in=SM8$Yd5W}k<VdvtWktHlE9q|$f&ZZ;I9>Y<yP3R}4onpW
z>T?#d`9VvOYuw4Z4Y>hdt<vF!n+@IY&%u*E_UI>}M6xwT{Iu&kSea83OP|~he=8^8
zUYR>wVWR?#=g;AA?-oAFc?W=DEgP`L8BA6WL(LVZ(JyTx+P`RKIq_A@*-YRlrWXpX
zhdmeteq}E;ebH`-KYlLrq3*$7S?^a3)N0s>Hy2DMzUwu+^zjzEw55`z@7PRsSAWA5
zzj)LfJs!#~tRvFsg+eDCY_^s}_o-@}`NQk{27V|cC?8>m-tWZCA*JkXM6qaF5~9<b
zx9q2vB0Z{IkEI>)oZp(0oK)OOCVe>zB^q5(K5-SDGKypqqC&}|brzO}8-RO=4O|a6
z1Ls?Bb0g-Z@k$z-KsHVl|IUxcg2i8%v5qjJB&LOmGW+1^+F4Z7ZiaQIjd0Ww5x!7X
z#n1m1<E>Al@L5|IXk~b!d6*fDxj&aezY4w8AOB!)U$JPn^=x{5)gF6nMYv*q01UT0
z0lF$GtW{uY%j(af(}A+M4yACgrwYVZ`&0jL6?|#A8QuhpqzIiM^x8+2&+=Kz#<c0-
zu<br*mmbN>ZLtGMKXZ~;SPSKLVbqdW39E#;nX{@u`+RmXt3eLn+u^}l?#-vWIjyX6
zWf!yhoD8)!#_ZBBJ)Hl^4P#`1Hcvjt$zDB;ty>G>lcoxLn3};1KdPgO<RlC=Jqf-&
zL+MVQ8P-qJriHG{`Qf*ma8(!Mc25|E1L6SX=jihpUlZA=Ya`Ko_90Mq86f;GYSH2;
zjL9_B!Z9svY?<<cg?UL*#<BY>-nD~eJkFrtHOC+=%t+|lp9KH4U2NN;Q{dtt%p^Hp
z<cepH!b+XtSf!%}gH0#k>}yjgb;2Cz@KD3|YvjrKff8E~;*GQ9_F$5)8@7~=!kg`p
z9BkbL>EkMSYspMlXV}WNd&G;hJG61rAn~&6ORdl++ZT_h=u_f)arAkX#p|nTK~KLH
zEu4Ou)!N$8)RDs7UuzP6=_W{_yIlHdRTeCh!$OXj(#UhesJmq!H_ynIt#=5gf?r$N
zzw?Vp{j9LL8$5u#UPr-FKR<R)@FHyfahUmds8GW90l0pV2tAEF@tD0Nv1NJ`;bzFc
z6~#b>kzls_QNuD$?}j;%5%^MdHwLCE;QP)5p~o|ejZ_t*cRiXg^1ddWj?u(>nRciW
z76mu<e`9#x5f5a>uo#P(H0kOzH20l?YjX{;ca)kayvLc`7bJ4$U8CTcRRX%4KgK>P
z1~B(Wqu8F@gP5Zph2ENuWMi=mwWr9?5@CP#xK|7>SrxJK^)J9rGnZYzR0d&X@7S0P
zYV>SS084x~R$vCSi|S_&CqttHqVgGYDAl}>(_a||MglBKVOqIJ_L(9*i`1gkuY!d=
zw+{9E8O9fdWbn1q50~||db80lCSX8cJU+^@;1wPVOqFt7i1Zftvm?A&qV=V+Wx|@6
zS*sz9f5L6A@xw1?&*Px;8Qg)ATLSy+Bpbf^0z0zgB?~q4r_0m*s9bS9`@L}=E6ovl
zA;Ov1<={}<_TGt?+@eNLt!CrdZ<;vJDUM<qIv}EW3}k=W2E$%eu!OV#tnuHElkC=D
zf~_<EsiBZvSbH5>e_m&Qcnun#T*tPZFs2iEj<|2VKgMLn;uqaa@ceE@e$RjNqXdRm
z__B7EKGhB%Bp=7L1umGmkV7+0i(EYAvDo(*%q$uW)`O0f$iMx~it^jojyJl(jMW%k
zbX*CN?<@eL1C6ZyX9^oqk%Vh&XQOjzAnSTB4~8W&7%~0`%PSd*Q`LgeWUx1-6w6}9
zP~rZ`CKk3tjTg8VKSA<wFR*duv@LHUF0M32t8WSvrSONnHq*zxRSlw9^|k!I4h6a)
zS1a;<wU=UoBEZb78VuC0u}6Vb@Xsp%3+C$K(obR7|HB$L#l@D^M$6Df`$gE<s864j
z3@FgUm^|KfiL?xDuxPUcy&iXwX`~LqZ{h+W^WJR25<|8jcr#SSOvEokM&ZiGi|}`7
z9Y1H50d9Af#whhcFln#?eAx1vRgRiZ$@CK@u6zlzl(b2%SC}hGDCEwr5gdxSxv>B4
zNUEzJ0C#-ENL6(kUjM<-7KP1dX{`v_{qOnlHZhEwH5}Vjbf|ojGo^i$WdrvpklSHn
zQc;#9H_b{u|9dKE*nVS96<68H!B%+mKs%eS)Wgn;O=B**A3(Nt0VwP0vGRR}bbjC*
z{P)=g$Gy*DkCiwYnEZ>$3p^rrN*{k2%fQe_GVJr5A=n|zJ1E#+0JHUxpub=*Tls7U
z)z6)RC&QfB%lo^?^Vt!&ZW>Hgokqf4^;Nhp?BAaGhLA_eE?hGuopt%Ir@X_v!NBS=
zM7$AWPlv_9g|5jMnQKi;R_}+fg&eoxi#TrBlLE<?W8sic*Y>X52)Z|ILFF4kbn{}I
zd&+@pJd_IJIp&!A`xbk*EsdQRuoZhoK4O6>e$Xk30t)wHWyd^mNbzv`sX7kLPb#88
z<8XR4Zz{U@1#smBKJ@UFJ%~?r;KqH@#(SIF!Lv%AhN|VUW8Tl1-1<*tC(mxCC7xDR
z&&J21O|t{4zBfdL1B=M!pDa!EYbw)iDuj_s-t*(?R4BauIBYp9_(tUSG6_k6c{PmD
zg^D;FUiOeB-JHUXC3(U48i5D&H5ncRJ^|0(;i$2AKdKnSuvh=2q1i_gJI7`)t#|ME
zuP!TafzciuEBFKQU<oV}^|HEI&*12AXI}Jh2M(;U#pr*gpwP3Ezbu@~$MAE>f6zzR
za_Ky)-Ef6D2^{L+L($ex;sWsSgUwVe?7{Q;JSbN_0VR#@!3i%T?x4HSuL{1-l#cA6
zhJS+~IKGH)I=q96x2@x9Po7~a_U7#7kA<v5BZlh#dhrHDI<PWuJv|+JLxclWu=@`{
z*;rjln;K83`3f#)>*Bth%4B}c+D313CET*ppfO{-&^7QA9K61t#$Gy3Pv-@|i`%2A
zc7HY-dF>$nU6PH{?^QsTQw8cTm_{`X#<bW<U^{yXY|rCDmhEmusayJU&~vb4=T~h9
z^|!_>%+^u3>z88Adw=qCQ^aZ7pP3lcu><ejSEc9g-(X*&q>X#?3QFj6LXD?7=yS~%
zzLFJ<(iuykgMRRFj)PG7<!H3245USC8@NYzO1Z0cB6zTC5H@)G&|XU+E3{|}DqkE=
z^Y%s2q0kDhna@Dzje@{)t4XS%3Iv04>Bfw`MDI`Hn6+=PUuq5=vl@w#-rwQDT4_?8
ze-KUIW@0HPxQzDN<CKmlP#8OdS9o%Z%l_vC)h9x@7O72aac2^#+3hF8WT6+h^926v
zkU`bm1K@Mo8d@pT5*h7QxO+8__Vs+lu@#S*Lz+2SU$%v^6K7!m5*76AABf#DMP!os
zjxFl9BKQ78$PP0j#Vz`@qTnT4tyBRw6oh%$eS648S{>A6oS<Lm4JJMo_5{Y#?2%$I
z%onXivyEx&!B%g|oTf=`;agec;;m3OeLwBpb!W=`rhL)n6ApCkx-jGEehto*ZDi${
zR_N#Pf$iEo3GE7f=+SmX_EW|Nl0t0h;*2P4?ORYLvHK2p{_Zen8?=mlx>vw7C)#7D
zTRLV{o`=ONgJIwg2|Vv}33Rgz>AH#v9u~z|Z@KTr@BWp*l?23M_;^)zI97?a-BqL1
zq%Llw{}Biu;>S9qX4AvAN#qwHc>YRNn2x~FcpaCF=OS&0Ke-1M(n&lelEL~x6PTx;
zJ+@Tnlh3BH@X2v+*_q{c;Jb}4Eoe$YrMGq5TnP*Q%7-XgzFbG>vs=*^S4G^tW<Mki
zF%w)BDtOoQI68(bqTkav*l>9p7}g10uEAM&Of&-z)r*n2syKh=r!tL~`^+r=DAKJF
zr&ves3T#h##rJ6;a~Ipq>OTm+ybI+lCFB~n@Z1#G_T3rPPd@?^jKH>-a!5V?74py#
zV^;={mBizccclj*>uMzLGjJj%91muf{zYK)qCR%VWja@NOW5Pw5bm*TlUZwnE7&A8
zuro{TL7^oIrOuc@^Aio~7<-;i?Gt00Hb?V8Te^6)lT|Qeh79VaM`EhMcTr)eDlMIw
z$n`vp65Z2phnsktOCKzFBF@jo#Ko;l+3y<{Ewqd}iW1?tm9=p9Wr=H#JK(B4#;AAi
zJ1bcmiAO8mm)u(10sg9p_C<%G^WO>#(wYDZ*V@zFGv?6sXC7vRj;1k%`odb2g7(hY
z)Hi7rKg)XxJ}R6i__)Tyn75Af{k<DD4?PQKK40JrZ+YP6NviO`>lb5%W9jn~WjxrN
z!MFfznsi|%7R9@=)$W6+_SH5loNY-*)(+s*22V$?+t(p{=SJp#_z$d;-OF-V7We3E
zDoiXHL<z^cL4DpycrShi8e5C`1c9&R)w_~rH!Y`kM;76q&U~gQZ-Zq5Z|p{Q2zT^*
zHNWgpGsZfuBJ0JM;6u{{TpzF<@`U_)+khaJiZK*!?M)_whvDQsqjABI62U~ifNO0T
zf#a2ADBec{hGp3Er=+V`w%$yXtGmh#eJboZ-f3dTm6DQ%ucL$j^bU5%!W14`RkH5;
za=6|=oC*#Fg58B5T&<9EE&po+23y@qclDm;w5<IwGqDgH4jtfgF4@6}=aJx6IfzUq
zUtls*BXCAgK1)-H!AX0r!m%Hx*xV17xVVZu$ZrpY9|B9!(O8pY&0EWIA_kGnv_x+E
zKA|Tq+;49GyPe)N-x1B&Xg~@!$=Gx2Hr_G01SLDhpvT)4Fk*dIneMA=*tlsFKE1sK
zN3@BNO>P&vBz_KTj%U+(<0zOmO%e3%)X?3?i2JMXiCM=v2=_Sg>~>x^+a$WlC-!c{
z-r~c|`i2q>e7_e`nxfHaLL%|JIp=zG1k^5m%a3Z2rD1Z~WE|%tc%25}SVt|KdsGio
zf7bCso@X(=Dcf;yk0O05RbYyjMkA@$p@q9G$q5;wj6>7WJERZ~-5rAa%MY_aF-~Nc
z_K|&zZV@uA)=XT@jy>EW!ds^=vYwSYXw5TslGYqg2}d;9n1A>9Ap4b=Ki&`)UULKe
zesh{AW{z7`=Fzne$FaYMfD5c-%S-Y=LA?W3$QD?yEKS7D+WqXTr!fDMTMKU+T}fk1
zFs61Z<ClyGxYTx4G@v0G-R_1`u)Yn7^l!0#VNP_=4u5*%U4YAs_JZYl6}<S?l0DsB
z%<FyCqF1^n=)%7VoM~+{v>dvH7k5vgti-9<Xs!t5;%`~aS#L}ea`i4^Z&{v35e@pN
zMg5=WU}5HKD4uF2xC<Y`gv(MeE`A!QztRJ%--7cqi=(xxcfw1fh4ggEI`pb^!U+n&
z<heAS=U-{S&|Ts*#v+>cQ?<fBZF0=6cN4Y-$J5dlH7w0jqR-_ssV&->?7rkOod+Vy
z|0iU52Os3h+s2~_W{Ik$rGy@T44jEIWHv*Uak`LAzxZ8+%#ArxcIQFd>Hr&Pxr5CZ
z_(GUhT}7^6tYD$oZ#K9}no^9;v-zzB_(#eTvfN^+V2TDc-!;Q2u6xT$XIf(6rf4`P
zb&_>gW-;F}4dA?29rkQ?hrbEC*<!DaWcz0Uy%pxlUKqS**NPJ%&j`@mi^Il}Ff?iB
zMQ@@{)9OdMY-sWt>~0Cbgh$8VPQE6+eZkm5A%8c%P4F5;XNny5%!TXOZ{gg0Rg!yF
z0NVR}!9m)BrrZ?v_!B>t4tc7IJxfe5N>VtdzDKnEFc%fKnd4LA3|93;5}sNm)B47*
zFy_%>_-s6cI@LF`f1%1a_1IoAm@}5X<(gp3u5FMsGmKTEKC9l^1(`AtWY%8JcWgb%
zZylS*_Ui59qCOUKs>ds#WWyMo7d`;@z4jpUrLo+jv7PL}IZaq};Sn=D9*_UdR<X|M
z<1lRh1Pq(D2Ib8xaO+fC+G1{pvJ0MqbaN71ZM($AeESLO4Uf{Xi>@dYrAo?^meG4J
zEAnr?%cj-Uagq6}Xw(TcoEOqu>a;|LGVKj8%bvG>>7Ky`8yE7Shg=A84tOaJ#ub)F
zxn%?6d8f+<S*f}S>hAGo<z=22kem)fVtp~YNfMupi)WYnb=mzG1xR&X3RO$z^Y?-#
zQ1Js(%oFmB=^F#cu;w?X<oOrwY|NmP+3wVk`hiK>J_J))bGmg*i%GsZ!qVggw(;eg
z{1>xG3OVA*vRkK6+g)u6dvF-rGu*I8M-vTS8*&|=UU8n;JE`Z#FYcoFF{m#81upT6
zXsWysyb3iG?$DgztW5z9xYo!v`gX(hL}zTT9?LpTCJJ?FJq=K^p>f=38qxihJH1<Q
zGQJpwWe-<Sth6j$8b1ZMZc>1m+uDH3RKsnLqiFvHo~tqmA)k*MIJ?XW`2I-+)Ldfl
zMeRlSQ2K^z7Pv2ZgMF#4X*<9DMFEbmX=cq9zgc+fVB9reDJFP5WAY!esmxOXpXmKz
z726iWx&29^`V}5no+Cn4hP1zT2fZAx$4}K1982wLcvCwHz9c2ExO-vHlpn<;dV^VN
zK@OYn<q3Q)*e%Ro^h4c^S@d9{Geo`Cp&>rknCv1E9GJ3>=@?~_RP=A|hNKupggZdr
z_-_zVKa^De<g*uZ8km^M0SH{^$<OfL!JoFWqYnRh*cvB=CzX1cf4m&mKjagCdugj^
zOa6XlNR<$AOcrH#>f@rH%G@^>HDN9@jU0jmpL554EXsS$#<b3)S=oaqW>FFTsH|cB
zOSS0r!UX(NE`h)NFEQ6gX$TTUe5GC{-JLrQC(QT)bNV*Xt}zNIduK1Olmr~5&hsk<
zl<_vVO<37UX|VXRpJv4kpdO1m>}jkQ=|3`J_Kh3q{>k+$Yg~iyH_Q?I#AWPJaWFQu
z=TOz(<+y5fS6OT9F_4DCr45Y&gLIlIg?NUAEc;*ng1PDc<1Z}#FF#<#fA|4jt|2S`
zfBu5^fB6dq2Zf1d%z|Ojx*&E=5kyX7A=c>`H|OMFI5w&g)|n|m>-i#1GFj-9N|f<q
zUi~b+nyA5OfgH2dEEL`QvA^v3nZH&?zj}h=y+RnaN)?Ra8@b2}TRD@b*4&@li{ZeT
zv!xI2FXNjdS6QD~(NmUsHj_Km^sY>AZ7^)tUc~YPEz24o+Q82%N^JU8buj7Q499|#
zK>Kw#Y~NA94G^0OuY(4{>w|g_;~4~&72CKw<8rv4m)+pg;%yLEqz609Ea3a1$NXp&
zJE3dP#nruZg2Go4V6fVQdvV7J4FA688m3JFy^*%?WXTu)fvOER-{2n?;u99)`@j4J
zi~sT${QkRs{{LCORjwhc|Bt^QwJT)J|64&RF|o@NlIYj4G_1)uNx#njV5!fQnQCsX
zz=YX}H5(FfzIv%h;oujRpLPLWSex*J?u@|g$}QJarf0D)ujZoOu)XxBQeX^frZClz
zlkl)}9sLlvt9K72W1J>~4?{<DMzV>dI47Sgmf1krneuGT*BKaV=f^#L<%FIS7oca%
zM%txqfv4NoVD{*IYQORWawMF&W#12A<Mtg;@?H~L*Zcy#4Y8<}c7>X6<smbij4sE-
zkvp*#EzKw3osOHJQei6iu?6l_mn(&A&cx?++bFb7hSg13NQVxM<C4q>9xl2r)XV)O
zTX7DLzGwogEHQez`aQI*JVG0+pA#qWXceMLG2de!{QY9j6%XG^c8BL+%CR^^E*D*f
z2<I(XNBpU=h5T>3(&z~eRM3+^S09f<*Tf|JRD6v$=n1guDyl|YC-^4BwqU@rZ2FcJ
zPwV;z;lG*bY>3J=+_i5QKCrHZ`HNGiU%Q#!?5$_%WsE_RB`Rw@M3;biY`l4#I&Pk&
z$1i1R`>~q>D{e04x(70i-g<UN$R$|G#?Y>n!$~$x3Ceuic-^^*Sg8A(Z+krs=c)%_
zUg3DUJ+y(f*r?#;nq9Pd;Bid<ZcOJBcaWy18;j6Pp+%$RAg=l}tdOuqmj(CPxS@y9
zcrH(-KYvoG${ptL>M4IHNMLV7$fKoqJpFLX!q%}#!aSZArQQg@uXCL7wv!BOv2=j#
z_l57+DM_RKGto!$K3jU?1q{!~;D<Jx=C?$paQl}MRb-7oU-?m(vZ;XWj;Y70jl$ou
zp_tlJ?!rjnT|TYOg$}$QO0gvi@XhQ*{32%tGo_E>^p<ZFVChfwM^@2fb6q%+B+iPa
z#!*pSG-tovf(Dw!!PTr{dM<qn)~6M+s6ngf?5`3^R8>Y#>$Rve>;Qc@_)l=^D51ZB
z8GehHLB(QmM9w?0a%><NbjxCByE9JPdWfcI#&BC)&Jmg>p+n4F1`g>|Q=W$6Bf_x0
zE|s?jZ~T{E$hQ`x@*nsAg7{10QLCh*Oc~a&qQG=4Nz!KWc4M*p_&K_fBkY$X_fc_P
z1KvKfnEMW;bUQ<t?){M$98`0-xKs0JOl2f>L`l-B@9Qc5v=xpHkE460TiMLw0hqTy
zh20#z8RFWtaQ*rSvi_SzZ>5J}?{^6-IIxH<Idh!$UQowp)hFqBc0cIsjbSTwcH>lb
zln$Lgfb+)ffWpfI$Z57U&YpP-lkyYk?{G787Th&fx}h}ShbgIz_ruwP+xhhu_OVCX
z#LJIv{KyREW?*Ks244N)iu*f*uwjc1UX<5B)o2^imd&CW?dRFkdO13zqKc`d)l4)0
zGAWxq2gy%Y;9cNV3M=>vab`O?i_7}l(db0lHGDXv4t@r~iC;Ojbw%*jbu?5rr{L`K
z580*9^YOIqIVuzK-cG$SwARTJau+WkS>e4fR6PeF@DJQPmWcC4TQW!g2SBZMSm61T
zf0F?8%VIswDi}i}FQih=PF=j(8jY5Yhf)27J2&ZrB+U?N@bu6M`ZfC>o7(+Dbi>ph
zf-;-I(b$8+UB405Z-AYbTu{0~@H`k5<1e*LS|NTGL#JhvXYNueTzr5sg7<NceWYpX
z9uDpA=E2aC09rU;0RGO{f<8CLvVCoeY-otkZoBJ6CZt9b-pT#;7=~|NSF%U9bI@wo
zK}<QQkKZ&Uu<zJr+%a8}zTTLFSJUF@(cw#+``!!~te=PC6KCP%!U^Die=#NR+Rv7Y
z)bW?=c>GmR#3s0o;velBj+=%XV2Sx*Tr<Fgx{}hs@KXh&{@3i!`xDq!Z$;{Nd`QAO
zlP(UpLf7&vXuSPTmN_{ZMrFrf&vG~TTRa>;aoVKu;wYT+iKdW%u#mO?TYoM7ul@%9
zw|)fur+x&xhOGNf{gB!j5+YO&d-c|dH$|R3Cft3s9^Nk}goB6E`Qu9*%0kwMg@pcZ
zQJVPQq7?Su4-Nm%hi-5U+4z4&X~3?Ki2s)L#P!OypRSIz^)gqnec(61_No3j+xP#J
zZBtfB+b%gb##Xyh+IEfmcN;!^if!*#Wt+H7H8#<0A5psXRr#T=6r0C;9+zK=^(t5V
zrEOa@aH8$H?b5cF;)dHElAmoeV(k^SI%kmWSEcxJ@z!1CMlQuRPcpPCp025}`PkP`
zUX;*e^Xj>EMWWOJo5Y&WHn-nfR#erU69OLcw(YZwY%gld+BS|HPHV!&Z5JmGxBdL)
zlZ{k_fvwBUD>mEC-m^KGzsaVwYM`yoTMJv)gOavKZx5@;ml|n1WBL-CY0ATGi+_mQ
z%G_wMsrV{x`#?0ncIc;Co4!UP+g$I@<u<pcRyfy*+lFik3)%d?4`t&2J(Ml~UB9jW
zS-)+rA>046egk%fME>WYNW6b7X&Vw1781QOJT%-SM0~M~K(-2A7q&WZ^=9AoAp<=_
sB*r?5X<QX^6g%Z5=DS!y_;uKNuhl`m>z9Xyd4>6gNNx*}3OD)x00Wh#x&QzG

diff --git a/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500.tune_metadata b/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500.tune_metadata
deleted file mode 100644
index 1b1a3cedee628b3a79ddc3581036927903274e2a..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 214
zcmXwzJrBV^7{`x#7i?yusVREJBDN-y?zdf@y62Kh&|dEdgT(5aSbY~C#RqVt*5Cj2
z=3O-zvmPM?&p8%qj|a<@&=odvY#xDJ1VE{rrQt-%B}ytILB%wkg%E|h%G%DREEZr@
zcF<n(uCRea)-@T6x#2dmu+y~-c>rOZQYvXZUca7sJS2qmor1YCnZSRhk=$0lYX(jz
hI2bO}*6QY+b)C6@3qN6DzOyY>cO>u^KGp4`{{b=NKEnV2

diff --git a/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/progress.csv b/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/progress.csv
deleted file mode 100644
index 4592a9c8..00000000
--- a/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/progress.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3c8e4335a85e9b0dab4e306c3cc0f7abfd151f09c2a5229e97a88d2c1036d4b9
-size 1787766
diff --git a/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/result.json b/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/result.json
deleted file mode 100644
index f37fb6d8..00000000
--- a/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/result.json
+++ /dev/null
@@ -1,500 +0,0 @@
-{"episode_reward_max": 25.0, "episode_reward_min": 0.0, "episode_reward_mean": 8.65625, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 14.0}, "policy_reward_mean": {"ppo": 4.328125}, "custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 8.65625, "shaped_reward_min": 0, "shaped_reward_max": 25, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.46875, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.34375, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.78125, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.75, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 3.84375, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 5.0, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.28125, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.28125, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 1.03125, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 3, "potting_onion_agent_1_mean": 0.84375, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.96875, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 2.5625, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.15625, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.125, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.125, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.03125, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.9375, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 0.4375, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.28125, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 2, "soup_delivery_agent_0_mean": 0.125, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.0625, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.3125, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.125, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 1.03125, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 3, "optimal_onion_potting_agent_1_mean": 0.84375, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.03125, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 3, "viable_onion_potting_agent_1_mean": 0.84375, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [3.0, 6.0, 8.0, 6.0, 3.0, 9.0, 9.0, 9.0, 17.0, 25.0, 8.0, 6.0, 6.0, 3.0, 3.0, 17.0, 14.0, 17.0, 6.0, 3.0, 14.0, 11.0, 9.0, 3.0, 8.0, 11.0, 11.0, 14.0, 0.0, 3.0, 9.0, 6.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 0.0, 0.0, 6.0, 0.0, 8.0, 3.0, 3.0, 3.0, 0.0, 3.0, 6.0, 6.0, 3.0, 0.0, 9.0, 14.0, 3.0, 14.0, 11.0, 3.0, 5.0, 3.0, 3.0, 0.0, 6.0, 3.0, 0.0, 0.0, 3.0, 14.0, 3.0, 14.0, 0.0, 11.0, 6.0, 6.0, 0.0, 3.0, 0.0, 9.0, 5.0, 3.0, 8.0, 3.0, 6.0, 3.0, 0.0, 3.0, 5.0, 3.0, 8.0, 8.0, 3.0, 11.0, 3.0, 0.0, 0.0, 0.0, 3.0, 3.0, 6.0, 0.0, 6.0]}, "sampler_perf": {"mean_env_wait_ms": 0.45519290086990427, "mean_processing_ms": 0.16432746798999048, "mean_inference_ms": 0.7360864547945244}, "off_policy_estimator": {}, "info": {"num_steps_trained": 24000, "num_steps_sampled": 12800, "sample_time_ms": 17464.662, "load_time_ms": 141.686, "grad_time_ms": 6292.799, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.20000000298023224, "cur_lr": 0.0010000000474974513, "total_loss": -0.3615521490573883, "policy_loss": -0.003929345868527889, "vf_loss": 0.7481115460395813, "vf_explained_var": 0.002059757709503174, "kl": 0.00042938394472002983, "entropy": 1.7889174222946167, "entropy_coeff": 0.20000000298023224, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 12800, "episodes_total": 32, "training_iteration": 1, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-13-21", "timestamp": 1660241601, "time_this_iter_s": 23.966287851333618, "time_total_s": 23.966287851333618, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 23.966287851333618, "timesteps_since_restore": 12800, "iterations_since_restore": 1, "perf": {"cpu_util_percent": 43.42857142857144, "ram_util_percent": 57.03714285714286}}
-{"episode_reward_max": 25.0, "episode_reward_min": 0.0, "episode_reward_mean": 9.140625, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 14.0}, "policy_reward_mean": {"ppo": 4.5703125}, "custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 9.140625, "shaped_reward_min": 0, "shaped_reward_max": 25, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.734375, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.328125, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.65625, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.46875, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 4.109375, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 4.90625, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.65625, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.515625, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.03125, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 3, "potting_onion_agent_1_mean": 0.90625, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.90625, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.4375, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.15625, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.171875, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.4375, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.03125, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.953125, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.859375, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 0.53125, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 0.375, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.171875, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.046875, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.34375, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.21875, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.03125, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 3, "optimal_onion_potting_agent_1_mean": 0.90625, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.03125, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 3, "viable_onion_potting_agent_1_mean": 0.90625, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [0.0, 6.0, 11.0, 20.0, 3.0, 6.0, 14.0, 6.0, 9.0, 3.0, 3.0, 6.0, 9.0, 12.0, 12.0, 19.0, 8.0, 12.0, 8.0, 14.0, 3.0, 3.0, 17.0, 3.0, 20.0, 11.0, 3.0, 14.0, 22.0, 3.0, 14.0, 14.0, 3.0, 6.0, 8.0, 6.0, 3.0, 9.0, 9.0, 9.0, 17.0, 25.0, 8.0, 6.0, 6.0, 3.0, 3.0, 17.0, 14.0, 17.0, 6.0, 3.0, 14.0, 11.0, 9.0, 3.0, 8.0, 11.0, 11.0, 14.0, 0.0, 3.0, 9.0, 6.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [0.0, 0.0, 3.0, 3.0, 3.0, 8.0, 6.0, 14.0, 0.0, 3.0, 0.0, 6.0, 11.0, 3.0, 0.0, 6.0, 3.0, 6.0, 3.0, 0.0, 0.0, 3.0, 3.0, 3.0, 0.0, 9.0, 12.0, 0.0, 3.0, 9.0, 14.0, 5.0, 8.0, 0.0, 6.0, 6.0, 3.0, 5.0, 11.0, 3.0, 0.0, 3.0, 3.0, 0.0, 14.0, 3.0, 3.0, 0.0, 14.0, 6.0, 6.0, 5.0, 0.0, 3.0, 0.0, 14.0, 13.0, 9.0, 0.0, 3.0, 11.0, 3.0, 11.0, 3.0, 3.0, 0.0, 0.0, 6.0, 0.0, 8.0, 3.0, 3.0, 3.0, 0.0, 3.0, 6.0, 6.0, 3.0, 0.0, 9.0, 14.0, 3.0, 14.0, 11.0, 3.0, 5.0, 3.0, 3.0, 0.0, 6.0, 3.0, 0.0, 0.0, 3.0, 14.0, 3.0, 14.0, 0.0, 11.0, 6.0, 6.0, 0.0, 3.0, 0.0, 9.0, 5.0, 3.0, 8.0, 3.0, 6.0, 3.0, 0.0, 3.0, 5.0, 3.0, 8.0, 8.0, 3.0, 11.0, 3.0, 0.0, 0.0, 0.0, 3.0, 3.0, 6.0, 0.0, 6.0]}, "sampler_perf": {"mean_env_wait_ms": 0.45293878883424954, "mean_processing_ms": 0.1625872490925028, "mean_inference_ms": 0.7353549498376587}, "off_policy_estimator": {}, "info": {"num_steps_trained": 48000, "num_steps_sampled": 25600, "sample_time_ms": 17329.738, "load_time_ms": 90.796, "grad_time_ms": 6126.382, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.10000000149011612, "cur_lr": 0.0010000000474974513, "total_loss": -0.35112297534942627, "policy_loss": -0.008805765770375729, "vf_loss": 0.7840461730957031, "vf_explained_var": -0.002521991729736328, "kl": 0.00048407851136289537, "entropy": 1.7883315086364746, "entropy_coeff": 0.19148799777030945, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 25600, "episodes_total": 64, "training_iteration": 2, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-13-45", "timestamp": 1660241625, "time_this_iter_s": 23.218619108200073, "time_total_s": 47.18490695953369, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 47.18490695953369, "timesteps_since_restore": 25600, "iterations_since_restore": 2, "perf": {"cpu_util_percent": 37.300000000000004, "ram_util_percent": 57.44117647058823}}
-{"episode_reward_max": 25.0, "episode_reward_min": 0.0, "episode_reward_mean": 9.052083333333334, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 16.0}, "policy_reward_mean": {"ppo": 4.526041666666667}, "custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 9.052083333333334, "shaped_reward_min": 0, "shaped_reward_max": 25, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.791666666666667, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 6.104166666666667, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.8854166666666665, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 4.260416666666667, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 4.208333333333333, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 4.697916666666667, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.5208333333333333, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.5104166666666667, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.0416666666666667, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 3, "potting_onion_agent_1_mean": 0.875, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.8229166666666665, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.46875, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.125, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.17708333333333334, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.3333333333333335, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.0416666666666665, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.9895833333333334, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.8125, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 0.4479166666666667, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 0.3854166666666667, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.14583333333333334, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.07291666666666667, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.2708333333333333, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.20833333333333334, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.0416666666666667, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 3, "optimal_onion_potting_agent_1_mean": 0.875, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.0416666666666667, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 3, "viable_onion_potting_agent_1_mean": 0.875, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [9.0, 17.0, 9.0, 8.0, 19.0, 3.0, 9.0, 14.0, 11.0, 22.0, 6.0, 3.0, 3.0, 6.0, 9.0, 3.0, 11.0, 11.0, 3.0, 3.0, 11.0, 9.0, 9.0, 3.0, 11.0, 6.0, 11.0, 6.0, 16.0, 8.0, 6.0, 9.0, 3.0, 6.0, 8.0, 6.0, 3.0, 9.0, 9.0, 9.0, 17.0, 25.0, 8.0, 6.0, 6.0, 3.0, 3.0, 17.0, 14.0, 17.0, 6.0, 3.0, 14.0, 11.0, 9.0, 3.0, 8.0, 11.0, 11.0, 14.0, 0.0, 3.0, 9.0, 6.0, 0.0, 6.0, 11.0, 20.0, 3.0, 6.0, 14.0, 6.0, 9.0, 3.0, 3.0, 6.0, 9.0, 12.0, 12.0, 19.0, 8.0, 12.0, 8.0, 14.0, 3.0, 3.0, 17.0, 3.0, 20.0, 11.0, 3.0, 14.0, 22.0, 3.0, 14.0, 14.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 3.0, 3.0, 14.0, 3.0, 6.0, 3.0, 5.0, 3.0, 16.0, 3.0, 0.0, 6.0, 3.0, 5.0, 9.0, 3.0, 8.0, 14.0, 8.0, 0.0, 6.0, 3.0, 0.0, 0.0, 3.0, 0.0, 6.0, 9.0, 0.0, 0.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0, 0.0, 0.0, 3.0, 8.0, 3.0, 9.0, 0.0, 9.0, 0.0, 3.0, 0.0, 3.0, 8.0, 3.0, 3.0, 11.0, 0.0, 3.0, 3.0, 8.0, 8.0, 5.0, 3.0, 6.0, 0.0, 3.0, 6.0, 3.0, 0.0, 0.0, 6.0, 0.0, 8.0, 3.0, 3.0, 3.0, 0.0, 3.0, 6.0, 6.0, 3.0, 0.0, 9.0, 14.0, 3.0, 14.0, 11.0, 3.0, 5.0, 3.0, 3.0, 0.0, 6.0, 3.0, 0.0, 0.0, 3.0, 14.0, 3.0, 14.0, 0.0, 11.0, 6.0, 6.0, 0.0, 3.0, 0.0, 9.0, 5.0, 3.0, 8.0, 3.0, 6.0, 3.0, 0.0, 3.0, 5.0, 3.0, 8.0, 8.0, 3.0, 11.0, 3.0, 0.0, 0.0, 0.0, 3.0, 3.0, 6.0, 0.0, 6.0, 0.0, 0.0, 3.0, 3.0, 3.0, 8.0, 6.0, 14.0, 0.0, 3.0, 0.0, 6.0, 11.0, 3.0, 0.0, 6.0, 3.0, 6.0, 3.0, 0.0, 0.0, 3.0, 3.0, 3.0, 0.0, 9.0, 12.0, 0.0, 3.0, 9.0, 14.0, 5.0, 8.0, 0.0, 6.0, 6.0, 3.0, 5.0, 11.0, 3.0, 0.0, 3.0, 3.0, 0.0, 14.0, 3.0, 3.0, 0.0, 14.0, 6.0, 6.0, 5.0, 0.0, 3.0, 0.0, 14.0, 13.0, 9.0, 0.0, 3.0, 11.0, 3.0, 11.0, 3.0]}, "sampler_perf": {"mean_env_wait_ms": 0.45189034776232173, "mean_processing_ms": 0.1615299805648739, "mean_inference_ms": 0.7382304408765018}, "off_policy_estimator": {}, "info": {"num_steps_trained": 72000, "num_steps_sampled": 38400, "sample_time_ms": 17410.737, "load_time_ms": 73.556, "grad_time_ms": 6485.631, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.05000000074505806, "cur_lr": 0.0010000000474974513, "total_loss": -0.3324081599712372, "policy_loss": -0.005397057626396418, "vf_loss": 0.7086341977119446, "vf_explained_var": -0.000792384147644043, "kl": 0.0004734609683509916, "entropy": 1.7876968383789062, "entropy_coeff": 0.18297599256038666, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 38400, "episodes_total": 96, "training_iteration": 3, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-14-10", "timestamp": 1660241650, "time_this_iter_s": 24.84310221672058, "time_total_s": 72.02800917625427, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 72.02800917625427, "timesteps_since_restore": 38400, "iterations_since_restore": 3, "perf": {"cpu_util_percent": 38.59428571428571, "ram_util_percent": 57.505714285714284}}
-{"episode_reward_max": 24.0, "episode_reward_min": 0.0, "episode_reward_mean": 9.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 18.0}, "policy_reward_mean": {"ppo": 4.775}, "custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 9.55, "shaped_reward_min": 0, "shaped_reward_max": 24, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.79, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.74, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.83, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 3.88, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 4.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 4.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.51, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.58, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.09, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.85, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.64, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.36, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.12, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.94, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.68, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 0.36, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 0.53, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.12, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.13, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.17, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.27, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.09, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.85, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.09, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.85, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [8.0, 6.0, 8.0, 8.0, 6.0, 3.0, 24.0, 6.0, 6.0, 6.0, 19.0, 11.0, 14.0, 8.0, 9.0, 3.0, 8.0, 14.0, 14.0, 9.0, 17.0, 3.0, 19.0, 19.0, 17.0, 12.0, 6.0, 14.0, 11.0, 14.0, 14.0, 9.0, 0.0, 3.0, 9.0, 6.0, 0.0, 6.0, 11.0, 20.0, 3.0, 6.0, 14.0, 6.0, 9.0, 3.0, 3.0, 6.0, 9.0, 12.0, 12.0, 19.0, 8.0, 12.0, 8.0, 14.0, 3.0, 3.0, 17.0, 3.0, 20.0, 11.0, 3.0, 14.0, 22.0, 3.0, 14.0, 14.0, 9.0, 17.0, 9.0, 8.0, 19.0, 3.0, 9.0, 14.0, 11.0, 22.0, 6.0, 3.0, 3.0, 6.0, 9.0, 3.0, 11.0, 11.0, 3.0, 3.0, 11.0, 9.0, 9.0, 3.0, 11.0, 6.0, 11.0, 6.0, 16.0, 8.0, 6.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [5.0, 3.0, 3.0, 3.0, 3.0, 5.0, 8.0, 0.0, 0.0, 6.0, 0.0, 3.0, 6.0, 18.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 11.0, 8.0, 3.0, 8.0, 6.0, 8.0, 8.0, 0.0, 6.0, 3.0, 0.0, 3.0, 0.0, 8.0, 14.0, 0.0, 9.0, 5.0, 9.0, 0.0, 6.0, 11.0, 3.0, 0.0, 11.0, 8.0, 6.0, 13.0, 12.0, 5.0, 6.0, 6.0, 6.0, 0.0, 3.0, 11.0, 3.0, 8.0, 6.0, 8.0, 5.0, 9.0, 6.0, 3.0, 0.0, 0.0, 0.0, 3.0, 3.0, 6.0, 0.0, 6.0, 0.0, 0.0, 3.0, 3.0, 3.0, 8.0, 6.0, 14.0, 0.0, 3.0, 0.0, 6.0, 11.0, 3.0, 0.0, 6.0, 3.0, 6.0, 3.0, 0.0, 0.0, 3.0, 3.0, 3.0, 0.0, 9.0, 12.0, 0.0, 3.0, 9.0, 14.0, 5.0, 8.0, 0.0, 6.0, 6.0, 3.0, 5.0, 11.0, 3.0, 0.0, 3.0, 3.0, 0.0, 14.0, 3.0, 3.0, 0.0, 14.0, 6.0, 6.0, 5.0, 0.0, 3.0, 0.0, 14.0, 13.0, 9.0, 0.0, 3.0, 11.0, 3.0, 11.0, 3.0, 6.0, 3.0, 3.0, 14.0, 3.0, 6.0, 3.0, 5.0, 3.0, 16.0, 3.0, 0.0, 6.0, 3.0, 5.0, 9.0, 3.0, 8.0, 14.0, 8.0, 0.0, 6.0, 3.0, 0.0, 0.0, 3.0, 0.0, 6.0, 9.0, 0.0, 0.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0, 0.0, 0.0, 3.0, 8.0, 3.0, 9.0, 0.0, 9.0, 0.0, 3.0, 0.0, 3.0, 8.0, 3.0, 3.0, 11.0, 0.0, 3.0, 3.0, 8.0, 8.0, 5.0, 3.0, 6.0, 0.0, 3.0, 6.0]}, "sampler_perf": {"mean_env_wait_ms": 0.45063714229107177, "mean_processing_ms": 0.15983031380236057, "mean_inference_ms": 0.739669952549497}, "off_policy_estimator": {}, "info": {"num_steps_trained": 96000, "num_steps_sampled": 51200, "sample_time_ms": 17369.146, "load_time_ms": 64.562, "grad_time_ms": 6646.22, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.02500000037252903, "cur_lr": 0.0010000000474974513, "total_loss": -0.3157036006450653, "policy_loss": -0.004088650923222303, "vf_loss": 0.8062646985054016, "vf_explained_var": 0.0032039880752563477, "kl": 0.0005627681966871023, "entropy": 1.7866708040237427, "entropy_coeff": 0.17446400225162506, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 51200, "episodes_total": 128, "training_iteration": 4, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-14-35", "timestamp": 1660241675, "time_this_iter_s": 24.43727397918701, "time_total_s": 96.46528315544128, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 96.46528315544128, "timesteps_since_restore": 51200, "iterations_since_restore": 4, "perf": {"cpu_util_percent": 39.84571428571428, "ram_util_percent": 57.64285714285714}}
-{"episode_reward_max": 54.0, "episode_reward_min": 3.0, "episode_reward_mean": 10.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 28.0}, "policy_reward_mean": {"ppo": 5.42}, "custom_metrics": {"sparse_reward_mean": 0.2, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 10.44, "shaped_reward_min": 3, "shaped_reward_max": 25, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.52, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.69, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.68, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 3.83, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 3.91, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 4.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.44, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.05, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.94, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.7, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 2.33, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.15, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.77, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.92, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.63, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 0.43, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 0.51, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.14, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.16, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.18, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.22, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 1.05, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.94, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.05, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.94, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [16.0, 9.0, 8.0, 14.0, 9.0, 9.0, 6.0, 11.0, 19.0, 9.0, 12.0, 8.0, 17.0, 3.0, 11.0, 16.0, 11.0, 6.0, 22.0, 6.0, 3.0, 14.0, 11.0, 20.0, 25.0, 54.0, 14.0, 16.0, 3.0, 14.0, 3.0, 3.0, 22.0, 3.0, 14.0, 14.0, 9.0, 17.0, 9.0, 8.0, 19.0, 3.0, 9.0, 14.0, 11.0, 22.0, 6.0, 3.0, 3.0, 6.0, 9.0, 3.0, 11.0, 11.0, 3.0, 3.0, 11.0, 9.0, 9.0, 3.0, 11.0, 6.0, 11.0, 6.0, 16.0, 8.0, 6.0, 9.0, 8.0, 6.0, 8.0, 8.0, 6.0, 3.0, 24.0, 6.0, 6.0, 6.0, 19.0, 11.0, 14.0, 8.0, 9.0, 3.0, 8.0, 14.0, 14.0, 9.0, 17.0, 3.0, 19.0, 19.0, 17.0, 12.0, 6.0, 14.0, 11.0, 14.0, 14.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [11.0, 5.0, 6.0, 3.0, 0.0, 8.0, 8.0, 6.0, 6.0, 3.0, 0.0, 9.0, 0.0, 6.0, 5.0, 6.0, 11.0, 8.0, 0.0, 9.0, 12.0, 0.0, 8.0, 0.0, 5.0, 12.0, 0.0, 3.0, 6.0, 5.0, 5.0, 11.0, 5.0, 6.0, 3.0, 3.0, 0.0, 22.0, 3.0, 3.0, 3.0, 0.0, 11.0, 3.0, 5.0, 6.0, 14.0, 6.0, 16.0, 9.0, 26.0, 28.0, 11.0, 3.0, 8.0, 8.0, 3.0, 0.0, 0.0, 14.0, 0.0, 3.0, 3.0, 0.0, 13.0, 9.0, 0.0, 3.0, 11.0, 3.0, 11.0, 3.0, 6.0, 3.0, 3.0, 14.0, 3.0, 6.0, 3.0, 5.0, 3.0, 16.0, 3.0, 0.0, 6.0, 3.0, 5.0, 9.0, 3.0, 8.0, 14.0, 8.0, 0.0, 6.0, 3.0, 0.0, 0.0, 3.0, 0.0, 6.0, 9.0, 0.0, 0.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0, 0.0, 0.0, 3.0, 8.0, 3.0, 9.0, 0.0, 9.0, 0.0, 3.0, 0.0, 3.0, 8.0, 3.0, 3.0, 11.0, 0.0, 3.0, 3.0, 8.0, 8.0, 5.0, 3.0, 6.0, 0.0, 3.0, 6.0, 5.0, 3.0, 3.0, 3.0, 3.0, 5.0, 8.0, 0.0, 0.0, 6.0, 0.0, 3.0, 6.0, 18.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 11.0, 8.0, 3.0, 8.0, 6.0, 8.0, 8.0, 0.0, 6.0, 3.0, 0.0, 3.0, 0.0, 8.0, 14.0, 0.0, 9.0, 5.0, 9.0, 0.0, 6.0, 11.0, 3.0, 0.0, 11.0, 8.0, 6.0, 13.0, 12.0, 5.0, 6.0, 6.0, 6.0, 0.0, 3.0, 11.0, 3.0, 8.0, 6.0, 8.0, 5.0, 9.0, 6.0, 3.0]}, "sampler_perf": {"mean_env_wait_ms": 0.45066530826567375, "mean_processing_ms": 0.15893004682590756, "mean_inference_ms": 0.7420671329840245}, "off_policy_estimator": {}, "info": {"num_steps_trained": 120000, "num_steps_sampled": 64000, "sample_time_ms": 17388.622, "load_time_ms": 59.359, "grad_time_ms": 6772.673, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.012500000186264515, "cur_lr": 0.0010000000474974513, "total_loss": -0.30251750349998474, "policy_loss": -0.006208862643688917, "vf_loss": 1.4635206460952759, "vf_explained_var": 0.0046030678786337376, "kl": 0.0005594257963821292, "entropy": 1.7864326238632202, "entropy_coeff": 0.16595199704170227, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 64000, "episodes_total": 160, "training_iteration": 5, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-14-59", "timestamp": 1660241699, "time_this_iter_s": 24.809880018234253, "time_total_s": 121.27516317367554, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 121.27516317367554, "timesteps_since_restore": 64000, "iterations_since_restore": 5, "perf": {"cpu_util_percent": 39.71142857142857, "ram_util_percent": 57.60285714285714}}
-{"episode_reward_max": 68.0, "episode_reward_min": 0.0, "episode_reward_mean": 12.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 37.0}, "policy_reward_mean": {"ppo": 6.195}, "custom_metrics": {"sparse_reward_mean": 0.4, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 11.59, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.22, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 10, "onion_pickup_agent_1_mean": 5.75, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.39, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.9, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 3.53, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 4.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.43, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.59, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.1, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.99, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.59, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 2.07, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.21, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.74, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 0.51, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 3, "soup_pickup_agent_1_mean": 0.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.17, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.22, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.23, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.2, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 1.1, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.99, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.1, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.99, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [14.0, 6.0, 20.0, 17.0, 14.0, 11.0, 6.0, 8.0, 6.0, 0.0, 11.0, 3.0, 22.0, 11.0, 19.0, 9.0, 20.0, 23.0, 6.0, 11.0, 11.0, 22.0, 19.0, 11.0, 11.0, 17.0, 14.0, 6.0, 68.0, 20.0, 9.0, 8.0, 16.0, 8.0, 6.0, 9.0, 8.0, 6.0, 8.0, 8.0, 6.0, 3.0, 24.0, 6.0, 6.0, 6.0, 19.0, 11.0, 14.0, 8.0, 9.0, 3.0, 8.0, 14.0, 14.0, 9.0, 17.0, 3.0, 19.0, 19.0, 17.0, 12.0, 6.0, 14.0, 11.0, 14.0, 14.0, 9.0, 16.0, 9.0, 8.0, 14.0, 9.0, 9.0, 6.0, 11.0, 19.0, 9.0, 12.0, 8.0, 17.0, 3.0, 11.0, 16.0, 11.0, 6.0, 22.0, 6.0, 3.0, 14.0, 11.0, 20.0, 25.0, 54.0, 14.0, 16.0, 3.0, 14.0, 3.0, 3.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [8.0, 6.0, 6.0, 0.0, 14.0, 6.0, 14.0, 3.0, 14.0, 0.0, 8.0, 3.0, 0.0, 6.0, 8.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0, 11.0, 3.0, 0.0, 8.0, 14.0, 3.0, 8.0, 6.0, 13.0, 9.0, 0.0, 17.0, 3.0, 12.0, 11.0, 0.0, 6.0, 11.0, 0.0, 8.0, 3.0, 6.0, 16.0, 9.0, 10.0, 5.0, 6.0, 5.0, 6.0, 9.0, 8.0, 3.0, 11.0, 6.0, 0.0, 31.0, 37.0, 11.0, 9.0, 0.0, 9.0, 0.0, 8.0, 8.0, 8.0, 5.0, 3.0, 6.0, 0.0, 3.0, 6.0, 5.0, 3.0, 3.0, 3.0, 3.0, 5.0, 8.0, 0.0, 0.0, 6.0, 0.0, 3.0, 6.0, 18.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 11.0, 8.0, 3.0, 8.0, 6.0, 8.0, 8.0, 0.0, 6.0, 3.0, 0.0, 3.0, 0.0, 8.0, 14.0, 0.0, 9.0, 5.0, 9.0, 0.0, 6.0, 11.0, 3.0, 0.0, 11.0, 8.0, 6.0, 13.0, 12.0, 5.0, 6.0, 6.0, 6.0, 0.0, 3.0, 11.0, 3.0, 8.0, 6.0, 8.0, 5.0, 9.0, 6.0, 3.0, 11.0, 5.0, 6.0, 3.0, 0.0, 8.0, 8.0, 6.0, 6.0, 3.0, 0.0, 9.0, 0.0, 6.0, 5.0, 6.0, 11.0, 8.0, 0.0, 9.0, 12.0, 0.0, 8.0, 0.0, 5.0, 12.0, 0.0, 3.0, 6.0, 5.0, 5.0, 11.0, 5.0, 6.0, 3.0, 3.0, 0.0, 22.0, 3.0, 3.0, 3.0, 0.0, 11.0, 3.0, 5.0, 6.0, 14.0, 6.0, 16.0, 9.0, 26.0, 28.0, 11.0, 3.0, 8.0, 8.0, 3.0, 0.0, 0.0, 14.0, 0.0, 3.0, 3.0, 0.0]}, "sampler_perf": {"mean_env_wait_ms": 0.4527646689746759, "mean_processing_ms": 0.15894443082147025, "mean_inference_ms": 0.7459920431247151}, "off_policy_estimator": {}, "info": {"num_steps_trained": 144000, "num_steps_sampled": 76800, "sample_time_ms": 17631.781, "load_time_ms": 55.878, "grad_time_ms": 7055.375, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0062500000931322575, "cur_lr": 0.0010000000474974513, "total_loss": -0.285332590341568, "policy_loss": -0.004330330062657595, "vf_loss": 1.753544807434082, "vf_explained_var": 0.007292529102414846, "kl": 0.0005500561674125493, "entropy": 1.7859567403793335, "entropy_coeff": 0.15744000673294067, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 76800, "episodes_total": 192, "training_iteration": 6, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-15-27", "timestamp": 1660241727, "time_this_iter_s": 27.381940841674805, "time_total_s": 148.65710401535034, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 148.65710401535034, "timesteps_since_restore": 76800, "iterations_since_restore": 6, "perf": {"cpu_util_percent": 46.235897435897435, "ram_util_percent": 57.91025641025641}}
-{"episode_reward_max": 68.0, "episode_reward_min": 0.0, "episode_reward_mean": 13.96, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 37.0}, "policy_reward_mean": {"ppo": 6.98}, "custom_metrics": {"sparse_reward_mean": 0.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 12.36, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.26, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.86, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.13, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 8, "useful_onion_pickup_agent_1_mean": 3.81, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 3.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 4.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.74, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.72, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.13, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.16, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.81, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.17, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.22, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.26, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.85, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.61, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 0.6, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 3, "soup_pickup_agent_1_mean": 0.59, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.23, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.17, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.31, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.3, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 1.13, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.16, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.13, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.16, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [6.0, 14.0, 19.0, 6.0, 6.0, 17.0, 14.0, 3.0, 25.0, 11.0, 14.0, 9.0, 17.0, 14.0, 17.0, 14.0, 25.0, 17.0, 6.0, 19.0, 14.0, 3.0, 6.0, 3.0, 6.0, 63.0, 9.0, 28.0, 14.0, 8.0, 57.0, 9.0, 11.0, 14.0, 14.0, 9.0, 16.0, 9.0, 8.0, 14.0, 9.0, 9.0, 6.0, 11.0, 19.0, 9.0, 12.0, 8.0, 17.0, 3.0, 11.0, 16.0, 11.0, 6.0, 22.0, 6.0, 3.0, 14.0, 11.0, 20.0, 25.0, 54.0, 14.0, 16.0, 3.0, 14.0, 3.0, 3.0, 14.0, 6.0, 20.0, 17.0, 14.0, 11.0, 6.0, 8.0, 6.0, 0.0, 11.0, 3.0, 22.0, 11.0, 19.0, 9.0, 20.0, 23.0, 6.0, 11.0, 11.0, 22.0, 19.0, 11.0, 11.0, 17.0, 14.0, 6.0, 68.0, 20.0, 9.0, 8.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 3.0, 6.0, 8.0, 10.0, 9.0, 0.0, 6.0, 3.0, 3.0, 9.0, 8.0, 6.0, 8.0, 0.0, 3.0, 17.0, 8.0, 8.0, 3.0, 5.0, 9.0, 6.0, 3.0, 3.0, 14.0, 11.0, 3.0, 14.0, 3.0, 6.0, 8.0, 9.0, 16.0, 6.0, 11.0, 0.0, 6.0, 14.0, 5.0, 6.0, 8.0, 3.0, 0.0, 6.0, 0.0, 0.0, 3.0, 3.0, 3.0, 29.0, 34.0, 6.0, 3.0, 6.0, 22.0, 8.0, 6.0, 5.0, 3.0, 31.0, 26.0, 3.0, 6.0, 3.0, 8.0, 6.0, 8.0, 5.0, 9.0, 6.0, 3.0, 11.0, 5.0, 6.0, 3.0, 0.0, 8.0, 8.0, 6.0, 6.0, 3.0, 0.0, 9.0, 0.0, 6.0, 5.0, 6.0, 11.0, 8.0, 0.0, 9.0, 12.0, 0.0, 8.0, 0.0, 5.0, 12.0, 0.0, 3.0, 6.0, 5.0, 5.0, 11.0, 5.0, 6.0, 3.0, 3.0, 0.0, 22.0, 3.0, 3.0, 3.0, 0.0, 11.0, 3.0, 5.0, 6.0, 14.0, 6.0, 16.0, 9.0, 26.0, 28.0, 11.0, 3.0, 8.0, 8.0, 3.0, 0.0, 0.0, 14.0, 0.0, 3.0, 3.0, 0.0, 8.0, 6.0, 6.0, 0.0, 14.0, 6.0, 14.0, 3.0, 14.0, 0.0, 8.0, 3.0, 0.0, 6.0, 8.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0, 11.0, 3.0, 0.0, 8.0, 14.0, 3.0, 8.0, 6.0, 13.0, 9.0, 0.0, 17.0, 3.0, 12.0, 11.0, 0.0, 6.0, 11.0, 0.0, 8.0, 3.0, 6.0, 16.0, 9.0, 10.0, 5.0, 6.0, 5.0, 6.0, 9.0, 8.0, 3.0, 11.0, 6.0, 0.0, 31.0, 37.0, 11.0, 9.0, 0.0, 9.0, 0.0, 8.0]}, "sampler_perf": {"mean_env_wait_ms": 7.041783650517479, "mean_processing_ms": 0.16106120541031635, "mean_inference_ms": 2.816008339885107}, "off_policy_estimator": {}, "info": {"num_steps_trained": 168000, "num_steps_sampled": 89600, "sample_time_ms": 363821.155, "load_time_ms": 54.883, "grad_time_ms": 7100.098, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0031250000465661287, "cur_lr": 0.0010000000474974513, "total_loss": -0.26905307173728943, "policy_loss": -0.0034452469553798437, "vf_loss": 2.160554885864258, "vf_explained_var": 0.012243330478668213, "kl": 0.0006163662183098495, "entropy": 1.784928321838379, "entropy_coeff": 0.14892800152301788, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 89600, "episodes_total": 224, "training_iteration": 7, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-56-15", "timestamp": 1660244175, "time_this_iter_s": 2448.401287794113, "time_total_s": 2597.0583918094635, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2597.0583918094635, "timesteps_since_restore": 89600, "iterations_since_restore": 7, "perf": {"cpu_util_percent": 53.55, "ram_util_percent": 58.647826086956535}}
-{"episode_reward_max": 68.0, "episode_reward_min": 0.0, "episode_reward_mean": 14.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 37.0}, "policy_reward_mean": {"ppo": 7.205}, "custom_metrics": {"sparse_reward_mean": 1.0, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 12.41, "shaped_reward_min": 0, "shaped_reward_max": 33, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.3, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.32, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.3, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.44, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 3.5, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.73, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.61, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.59, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.2, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.11, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.73, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.53, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.16, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.89, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.94, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.81, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 0.61, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 3, "soup_pickup_agent_1_mean": 0.74, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.2, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.19, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.4, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.41, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 1.2, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.11, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.2, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.11, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [11.0, 9.0, 17.0, 8.0, 14.0, 19.0, 6.0, 33.0, 11.0, 6.0, 11.0, 14.0, 16.0, 3.0, 17.0, 6.0, 16.0, 11.0, 3.0, 57.0, 16.0, 8.0, 17.0, 9.0, 57.0, 11.0, 0.0, 11.0, 11.0, 11.0, 22.0, 11.0, 3.0, 14.0, 3.0, 3.0, 14.0, 6.0, 20.0, 17.0, 14.0, 11.0, 6.0, 8.0, 6.0, 0.0, 11.0, 3.0, 22.0, 11.0, 19.0, 9.0, 20.0, 23.0, 6.0, 11.0, 11.0, 22.0, 19.0, 11.0, 11.0, 17.0, 14.0, 6.0, 68.0, 20.0, 9.0, 8.0, 6.0, 14.0, 19.0, 6.0, 6.0, 17.0, 14.0, 3.0, 25.0, 11.0, 14.0, 9.0, 17.0, 14.0, 17.0, 14.0, 25.0, 17.0, 6.0, 19.0, 14.0, 3.0, 6.0, 3.0, 6.0, 63.0, 9.0, 28.0, 14.0, 8.0, 57.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 8.0, 6.0, 3.0, 11.0, 6.0, 0.0, 8.0, 0.0, 14.0, 3.0, 16.0, 3.0, 3.0, 19.0, 14.0, 3.0, 8.0, 3.0, 3.0, 8.0, 3.0, 11.0, 3.0, 3.0, 13.0, 0.0, 3.0, 8.0, 9.0, 3.0, 3.0, 6.0, 10.0, 3.0, 8.0, 0.0, 3.0, 23.0, 34.0, 8.0, 8.0, 8.0, 0.0, 3.0, 14.0, 0.0, 9.0, 28.0, 29.0, 6.0, 5.0, 0.0, 0.0, 6.0, 5.0, 6.0, 5.0, 3.0, 8.0, 14.0, 8.0, 3.0, 8.0, 3.0, 0.0, 0.0, 14.0, 0.0, 3.0, 3.0, 0.0, 8.0, 6.0, 6.0, 0.0, 14.0, 6.0, 14.0, 3.0, 14.0, 0.0, 8.0, 3.0, 0.0, 6.0, 8.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0, 11.0, 3.0, 0.0, 8.0, 14.0, 3.0, 8.0, 6.0, 13.0, 9.0, 0.0, 17.0, 3.0, 12.0, 11.0, 0.0, 6.0, 11.0, 0.0, 8.0, 3.0, 6.0, 16.0, 9.0, 10.0, 5.0, 6.0, 5.0, 6.0, 9.0, 8.0, 3.0, 11.0, 6.0, 0.0, 31.0, 37.0, 11.0, 9.0, 0.0, 9.0, 0.0, 8.0, 3.0, 3.0, 6.0, 8.0, 10.0, 9.0, 0.0, 6.0, 3.0, 3.0, 9.0, 8.0, 6.0, 8.0, 0.0, 3.0, 17.0, 8.0, 8.0, 3.0, 5.0, 9.0, 6.0, 3.0, 3.0, 14.0, 11.0, 3.0, 14.0, 3.0, 6.0, 8.0, 9.0, 16.0, 6.0, 11.0, 0.0, 6.0, 14.0, 5.0, 6.0, 8.0, 3.0, 0.0, 6.0, 0.0, 0.0, 3.0, 3.0, 3.0, 29.0, 34.0, 6.0, 3.0, 6.0, 22.0, 8.0, 6.0, 5.0, 3.0, 31.0, 26.0, 3.0, 6.0]}, "sampler_perf": {"mean_env_wait_ms": 12.808520770186506, "mean_processing_ms": 0.16344551542853641, "mean_inference_ms": 4.631216998135988}, "off_policy_estimator": {}, "info": {"num_steps_trained": 192000, "num_steps_sampled": 102400, "sample_time_ms": 320761.187, "load_time_ms": 52.674, "grad_time_ms": 7271.518, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0015625000232830644, "cur_lr": 0.0010000000474974513, "total_loss": -0.25552046298980713, "policy_loss": -0.005265455227345228, "vf_loss": 1.9171754121780396, "vf_explained_var": 0.015465259552001953, "kl": 0.0006017824052833021, "entropy": 1.7836121320724487, "entropy_coeff": 0.1404159963130951, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 102400, "episodes_total": 256, "training_iteration": 8, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-56-43", "timestamp": 1660244203, "time_this_iter_s": 27.877708196640015, "time_total_s": 2624.9361000061035, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2624.9361000061035, "timesteps_since_restore": 102400, "iterations_since_restore": 8, "perf": {"cpu_util_percent": 42.6075, "ram_util_percent": 58.39000000000001}}
-{"episode_reward_max": 68.0, "episode_reward_min": 0.0, "episode_reward_mean": 16.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 37.0}, "policy_reward_mean": {"ppo": 8.09}, "custom_metrics": {"sparse_reward_mean": 1.2, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 13.78, "shaped_reward_min": 0, "shaped_reward_max": 36, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.25, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.27, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.31, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.46, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.5, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.57, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.48, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.21, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 3, "potting_onion_agent_1_mean": 1.33, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 3.05, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.8, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.19, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 1.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.92, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 0.81, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 0.8, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.22, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.17, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.54, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.49, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 1.21, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 3, "optimal_onion_potting_agent_1_mean": 1.33, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.21, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 3, "viable_onion_potting_agent_1_mean": 1.33, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [6.0, 28.0, 3.0, 14.0, 19.0, 57.0, 9.0, 11.0, 9.0, 17.0, 14.0, 14.0, 23.0, 25.0, 11.0, 28.0, 14.0, 14.0, 17.0, 12.0, 3.0, 16.0, 14.0, 22.0, 36.0, 17.0, 12.0, 22.0, 14.0, 16.0, 14.0, 17.0, 68.0, 20.0, 9.0, 8.0, 6.0, 14.0, 19.0, 6.0, 6.0, 17.0, 14.0, 3.0, 25.0, 11.0, 14.0, 9.0, 17.0, 14.0, 17.0, 14.0, 25.0, 17.0, 6.0, 19.0, 14.0, 3.0, 6.0, 3.0, 6.0, 63.0, 9.0, 28.0, 14.0, 8.0, 57.0, 9.0, 11.0, 9.0, 17.0, 8.0, 14.0, 19.0, 6.0, 33.0, 11.0, 6.0, 11.0, 14.0, 16.0, 3.0, 17.0, 6.0, 16.0, 11.0, 3.0, 57.0, 16.0, 8.0, 17.0, 9.0, 57.0, 11.0, 0.0, 11.0, 11.0, 11.0, 22.0, 11.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [0.0, 6.0, 22.0, 6.0, 3.0, 0.0, 8.0, 6.0, 10.0, 9.0, 23.0, 34.0, 3.0, 6.0, 11.0, 0.0, 6.0, 3.0, 11.0, 6.0, 11.0, 3.0, 5.0, 9.0, 17.0, 6.0, 9.0, 16.0, 8.0, 3.0, 6.0, 22.0, 3.0, 11.0, 14.0, 0.0, 6.0, 11.0, 6.0, 6.0, 0.0, 3.0, 5.0, 11.0, 8.0, 6.0, 6.0, 16.0, 11.0, 25.0, 8.0, 9.0, 3.0, 9.0, 16.0, 6.0, 8.0, 6.0, 16.0, 0.0, 11.0, 3.0, 6.0, 11.0, 31.0, 37.0, 11.0, 9.0, 0.0, 9.0, 0.0, 8.0, 3.0, 3.0, 6.0, 8.0, 10.0, 9.0, 0.0, 6.0, 3.0, 3.0, 9.0, 8.0, 6.0, 8.0, 0.0, 3.0, 17.0, 8.0, 8.0, 3.0, 5.0, 9.0, 6.0, 3.0, 3.0, 14.0, 11.0, 3.0, 14.0, 3.0, 6.0, 8.0, 9.0, 16.0, 6.0, 11.0, 0.0, 6.0, 14.0, 5.0, 6.0, 8.0, 3.0, 0.0, 6.0, 0.0, 0.0, 3.0, 3.0, 3.0, 29.0, 34.0, 6.0, 3.0, 6.0, 22.0, 8.0, 6.0, 5.0, 3.0, 31.0, 26.0, 3.0, 6.0, 3.0, 8.0, 6.0, 3.0, 11.0, 6.0, 0.0, 8.0, 0.0, 14.0, 3.0, 16.0, 3.0, 3.0, 19.0, 14.0, 3.0, 8.0, 3.0, 3.0, 8.0, 3.0, 11.0, 3.0, 3.0, 13.0, 0.0, 3.0, 8.0, 9.0, 3.0, 3.0, 6.0, 10.0, 3.0, 8.0, 0.0, 3.0, 23.0, 34.0, 8.0, 8.0, 8.0, 0.0, 3.0, 14.0, 0.0, 9.0, 28.0, 29.0, 6.0, 5.0, 0.0, 0.0, 6.0, 5.0, 6.0, 5.0, 3.0, 8.0, 14.0, 8.0, 3.0, 8.0]}, "sampler_perf": {"mean_env_wait_ms": 17.933374555696833, "mean_processing_ms": 0.16517403131021888, "mean_inference_ms": 6.240884020190002}, "off_policy_estimator": {}, "info": {"num_steps_trained": 216000, "num_steps_sampled": 115200, "sample_time_ms": 287052.402, "load_time_ms": 50.896, "grad_time_ms": 7408.088, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0007812500116415322, "cur_lr": 0.0010000000474974513, "total_loss": -0.24049125611782074, "policy_loss": -0.005544388201087713, "vf_loss": 1.8025983572006226, "vf_explained_var": 0.016161540523171425, "kl": 0.0006836934480816126, "entropy": 1.7825666666030884, "entropy_coeff": 0.1319040060043335, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 115200, "episodes_total": 288, "training_iteration": 9, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-57-09", "timestamp": 1660244229, "time_this_iter_s": 25.946558237075806, "time_total_s": 2650.8826582431793, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2650.8826582431793, "timesteps_since_restore": 115200, "iterations_since_restore": 9, "perf": {"cpu_util_percent": 38.36216216216216, "ram_util_percent": 57.93513513513512}}
-{"episode_reward_max": 57.0, "episode_reward_min": 0.0, "episode_reward_mean": 15.66, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 34.0}, "policy_reward_mean": {"ppo": 7.83}, "custom_metrics": {"sparse_reward_mean": 1.0, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 13.66, "shaped_reward_min": 0, "shaped_reward_max": 36, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.85, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 10, "onion_pickup_agent_1_mean": 5.36, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.23, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.74, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.36, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 1.1, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 3, "potting_onion_agent_1_mean": 1.43, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 4, "dish_pickup_agent_0_mean": 3.36, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.81, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.21, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 1.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.93, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 0.89, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 0.62, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.25, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.15, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.55, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.33, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.1, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 3, "optimal_onion_potting_agent_1_mean": 1.43, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 4, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.1, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 3, "viable_onion_potting_agent_1_mean": 1.43, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 4, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [14.0, 19.0, 9.0, 6.0, 9.0, 17.0, 6.0, 6.0, 20.0, 20.0, 17.0, 14.0, 11.0, 9.0, 57.0, 16.0, 17.0, 14.0, 11.0, 8.0, 11.0, 16.0, 14.0, 25.0, 22.0, 3.0, 12.0, 9.0, 9.0, 17.0, 17.0, 3.0, 14.0, 8.0, 57.0, 9.0, 11.0, 9.0, 17.0, 8.0, 14.0, 19.0, 6.0, 33.0, 11.0, 6.0, 11.0, 14.0, 16.0, 3.0, 17.0, 6.0, 16.0, 11.0, 3.0, 57.0, 16.0, 8.0, 17.0, 9.0, 57.0, 11.0, 0.0, 11.0, 11.0, 11.0, 22.0, 11.0, 6.0, 28.0, 3.0, 14.0, 19.0, 57.0, 9.0, 11.0, 9.0, 17.0, 14.0, 14.0, 23.0, 25.0, 11.0, 28.0, 14.0, 14.0, 17.0, 12.0, 3.0, 16.0, 14.0, 22.0, 36.0, 17.0, 12.0, 22.0, 14.0, 16.0, 14.0, 17.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [8.0, 6.0, 10.0, 9.0, 3.0, 6.0, 0.0, 6.0, 3.0, 6.0, 8.0, 9.0, 6.0, 0.0, 3.0, 3.0, 14.0, 6.0, 9.0, 11.0, 14.0, 3.0, 3.0, 11.0, 11.0, 0.0, 3.0, 6.0, 23.0, 34.0, 11.0, 5.0, 14.0, 3.0, 8.0, 6.0, 8.0, 3.0, 0.0, 8.0, 8.0, 3.0, 10.0, 6.0, 6.0, 8.0, 13.0, 12.0, 8.0, 14.0, 0.0, 3.0, 6.0, 6.0, 0.0, 9.0, 3.0, 6.0, 9.0, 8.0, 8.0, 9.0, 0.0, 3.0, 8.0, 6.0, 5.0, 3.0, 31.0, 26.0, 3.0, 6.0, 3.0, 8.0, 6.0, 3.0, 11.0, 6.0, 0.0, 8.0, 0.0, 14.0, 3.0, 16.0, 3.0, 3.0, 19.0, 14.0, 3.0, 8.0, 3.0, 3.0, 8.0, 3.0, 11.0, 3.0, 3.0, 13.0, 0.0, 3.0, 8.0, 9.0, 3.0, 3.0, 6.0, 10.0, 3.0, 8.0, 0.0, 3.0, 23.0, 34.0, 8.0, 8.0, 8.0, 0.0, 3.0, 14.0, 0.0, 9.0, 28.0, 29.0, 6.0, 5.0, 0.0, 0.0, 6.0, 5.0, 6.0, 5.0, 3.0, 8.0, 14.0, 8.0, 3.0, 8.0, 0.0, 6.0, 22.0, 6.0, 3.0, 0.0, 8.0, 6.0, 10.0, 9.0, 23.0, 34.0, 3.0, 6.0, 11.0, 0.0, 6.0, 3.0, 11.0, 6.0, 11.0, 3.0, 5.0, 9.0, 17.0, 6.0, 9.0, 16.0, 8.0, 3.0, 6.0, 22.0, 3.0, 11.0, 14.0, 0.0, 6.0, 11.0, 6.0, 6.0, 0.0, 3.0, 5.0, 11.0, 8.0, 6.0, 6.0, 16.0, 11.0, 25.0, 8.0, 9.0, 3.0, 9.0, 16.0, 6.0, 8.0, 6.0, 16.0, 0.0, 11.0, 3.0, 6.0, 11.0]}, "sampler_perf": {"mean_env_wait_ms": 16.781937376069852, "mean_processing_ms": 0.16502285382446033, "mean_inference_ms": 5.880940450206554}, "off_policy_estimator": {}, "info": {"num_steps_trained": 240000, "num_steps_sampled": 128000, "sample_time_ms": 260081.675, "load_time_ms": 49.456, "grad_time_ms": 7564.799, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0003906250058207661, "cur_lr": 0.0010000000474974513, "total_loss": -0.22780847549438477, "policy_loss": -0.00820181891322136, "vf_loss": 1.5030304193496704, "vf_explained_var": 0.01960124634206295, "kl": 0.0007011755951680243, "entropy": 1.7809678316116333, "entropy_coeff": 0.1233920007944107, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 128000, "episodes_total": 320, "training_iteration": 10, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-57-36", "timestamp": 1660244256, "time_this_iter_s": 26.38225793838501, "time_total_s": 2677.2649161815643, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2677.2649161815643, "timesteps_since_restore": 128000, "iterations_since_restore": 10, "perf": {"cpu_util_percent": 35.91621621621621, "ram_util_percent": 58.01891891891893}}
-{"episode_reward_max": 65.0, "episode_reward_min": 3.0, "episode_reward_mean": 16.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 37.0}, "policy_reward_mean": {"ppo": 8.47}, "custom_metrics": {"sparse_reward_mean": 1.0, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 14.94, "shaped_reward_min": 3, "shaped_reward_max": 36, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.74, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 10, "onion_pickup_agent_1_mean": 5.61, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.22, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.89, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.57, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.46, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 1.18, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.59, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 4, "dish_pickup_agent_0_mean": 3.46, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.72, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.23, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.98, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 1.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.85, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.08, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 0.69, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.29, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.19, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.65, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.4, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.18, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.59, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 4, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.18, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.59, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 4, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [17.0, 14.0, 20.0, 25.0, 20.0, 14.0, 11.0, 9.0, 3.0, 17.0, 65.0, 11.0, 9.0, 23.0, 65.0, 11.0, 8.0, 25.0, 11.0, 28.0, 19.0, 20.0, 17.0, 9.0, 11.0, 60.0, 17.0, 3.0, 27.0, 25.0, 11.0, 8.0, 11.0, 11.0, 22.0, 11.0, 6.0, 28.0, 3.0, 14.0, 19.0, 57.0, 9.0, 11.0, 9.0, 17.0, 14.0, 14.0, 23.0, 25.0, 11.0, 28.0, 14.0, 14.0, 17.0, 12.0, 3.0, 16.0, 14.0, 22.0, 36.0, 17.0, 12.0, 22.0, 14.0, 16.0, 14.0, 17.0, 14.0, 19.0, 9.0, 6.0, 9.0, 17.0, 6.0, 6.0, 20.0, 20.0, 17.0, 14.0, 11.0, 9.0, 57.0, 16.0, 17.0, 14.0, 11.0, 8.0, 11.0, 16.0, 14.0, 25.0, 22.0, 3.0, 12.0, 9.0, 9.0, 17.0, 17.0, 3.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [9.0, 8.0, 8.0, 6.0, 14.0, 6.0, 6.0, 19.0, 6.0, 14.0, 0.0, 14.0, 3.0, 8.0, 3.0, 6.0, 0.0, 3.0, 9.0, 8.0, 28.0, 37.0, 5.0, 6.0, 3.0, 6.0, 14.0, 9.0, 31.0, 34.0, 5.0, 6.0, 0.0, 8.0, 11.0, 14.0, 5.0, 6.0, 9.0, 19.0, 8.0, 11.0, 12.0, 8.0, 3.0, 14.0, 0.0, 9.0, 0.0, 11.0, 26.0, 34.0, 8.0, 9.0, 0.0, 3.0, 13.0, 14.0, 14.0, 11.0, 11.0, 0.0, 8.0, 0.0, 6.0, 5.0, 3.0, 8.0, 14.0, 8.0, 3.0, 8.0, 0.0, 6.0, 22.0, 6.0, 3.0, 0.0, 8.0, 6.0, 10.0, 9.0, 23.0, 34.0, 3.0, 6.0, 11.0, 0.0, 6.0, 3.0, 11.0, 6.0, 11.0, 3.0, 5.0, 9.0, 17.0, 6.0, 9.0, 16.0, 8.0, 3.0, 6.0, 22.0, 3.0, 11.0, 14.0, 0.0, 6.0, 11.0, 6.0, 6.0, 0.0, 3.0, 5.0, 11.0, 8.0, 6.0, 6.0, 16.0, 11.0, 25.0, 8.0, 9.0, 3.0, 9.0, 16.0, 6.0, 8.0, 6.0, 16.0, 0.0, 11.0, 3.0, 6.0, 11.0, 8.0, 6.0, 10.0, 9.0, 3.0, 6.0, 0.0, 6.0, 3.0, 6.0, 8.0, 9.0, 6.0, 0.0, 3.0, 3.0, 14.0, 6.0, 9.0, 11.0, 14.0, 3.0, 3.0, 11.0, 11.0, 0.0, 3.0, 6.0, 23.0, 34.0, 11.0, 5.0, 14.0, 3.0, 8.0, 6.0, 8.0, 3.0, 0.0, 8.0, 8.0, 3.0, 10.0, 6.0, 6.0, 8.0, 13.0, 12.0, 8.0, 14.0, 0.0, 3.0, 6.0, 6.0, 0.0, 9.0, 3.0, 6.0, 9.0, 8.0, 8.0, 9.0, 0.0, 3.0]}, "sampler_perf": {"mean_env_wait_ms": 15.10732110386456, "mean_processing_ms": 0.16432950718550896, "mean_inference_ms": 5.354497783846054}, "off_policy_estimator": {}, "info": {"num_steps_trained": 264000, "num_steps_sampled": 140800, "sample_time_ms": 260129.157, "load_time_ms": 39.286, "grad_time_ms": 7759.155, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.00019531250291038305, "cur_lr": 0.0010000000474974513, "total_loss": -0.21097473800182343, "policy_loss": -0.006903436034917831, "vf_loss": 2.839796781539917, "vf_explained_var": 0.029899099841713905, "kl": 0.0006908049690537155, "entropy": 1.7788597345352173, "entropy_coeff": 0.11488000303506851, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 140800, "episodes_total": 352, "training_iteration": 11, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-58-02", "timestamp": 1660244282, "time_this_iter_s": 26.244181156158447, "time_total_s": 2703.509097337723, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2703.509097337723, "timesteps_since_restore": 140800, "iterations_since_restore": 11, "perf": {"cpu_util_percent": 36.42631578947368, "ram_util_percent": 57.83157894736843}}
-{"episode_reward_max": 65.0, "episode_reward_min": 3.0, "episode_reward_mean": 18.34, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 40.0}, "policy_reward_mean": {"ppo": 9.17}, "custom_metrics": {"sparse_reward_mean": 1.4, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 15.54, "shaped_reward_min": 3, "shaped_reward_max": 31, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.67, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 10, "onion_pickup_agent_1_mean": 5.71, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.21, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.04, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 3.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 1.18, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.74, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.55, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 2.98, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.18, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 1.2, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.92, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.05, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 0.76, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.29, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.22, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.61, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.42, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.18, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.74, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.18, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.74, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [20.0, 11.0, 60.0, 25.0, 22.0, 65.0, 12.0, 11.0, 23.0, 3.0, 30.0, 17.0, 19.0, 31.0, 28.0, 19.0, 14.0, 25.0, 25.0, 22.0, 20.0, 11.0, 11.0, 57.0, 17.0, 6.0, 17.0, 11.0, 11.0, 11.0, 14.0, 14.0, 14.0, 16.0, 14.0, 17.0, 14.0, 19.0, 9.0, 6.0, 9.0, 17.0, 6.0, 6.0, 20.0, 20.0, 17.0, 14.0, 11.0, 9.0, 57.0, 16.0, 17.0, 14.0, 11.0, 8.0, 11.0, 16.0, 14.0, 25.0, 22.0, 3.0, 12.0, 9.0, 9.0, 17.0, 17.0, 3.0, 17.0, 14.0, 20.0, 25.0, 20.0, 14.0, 11.0, 9.0, 3.0, 17.0, 65.0, 11.0, 9.0, 23.0, 65.0, 11.0, 8.0, 25.0, 11.0, 28.0, 19.0, 20.0, 17.0, 9.0, 11.0, 60.0, 17.0, 3.0, 27.0, 25.0, 11.0, 8.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [11.0, 9.0, 0.0, 11.0, 20.0, 40.0, 12.0, 13.0, 11.0, 11.0, 34.0, 31.0, 3.0, 9.0, 5.0, 6.0, 0.0, 23.0, 3.0, 0.0, 16.0, 14.0, 6.0, 11.0, 13.0, 6.0, 17.0, 14.0, 14.0, 14.0, 6.0, 13.0, 5.0, 9.0, 16.0, 9.0, 16.0, 9.0, 5.0, 17.0, 8.0, 12.0, 3.0, 8.0, 0.0, 11.0, 26.0, 31.0, 0.0, 17.0, 6.0, 0.0, 3.0, 14.0, 8.0, 3.0, 6.0, 5.0, 8.0, 3.0, 3.0, 11.0, 11.0, 3.0, 8.0, 6.0, 16.0, 0.0, 11.0, 3.0, 6.0, 11.0, 8.0, 6.0, 10.0, 9.0, 3.0, 6.0, 0.0, 6.0, 3.0, 6.0, 8.0, 9.0, 6.0, 0.0, 3.0, 3.0, 14.0, 6.0, 9.0, 11.0, 14.0, 3.0, 3.0, 11.0, 11.0, 0.0, 3.0, 6.0, 23.0, 34.0, 11.0, 5.0, 14.0, 3.0, 8.0, 6.0, 8.0, 3.0, 0.0, 8.0, 8.0, 3.0, 10.0, 6.0, 6.0, 8.0, 13.0, 12.0, 8.0, 14.0, 0.0, 3.0, 6.0, 6.0, 0.0, 9.0, 3.0, 6.0, 9.0, 8.0, 8.0, 9.0, 0.0, 3.0, 9.0, 8.0, 8.0, 6.0, 14.0, 6.0, 6.0, 19.0, 6.0, 14.0, 0.0, 14.0, 3.0, 8.0, 3.0, 6.0, 0.0, 3.0, 9.0, 8.0, 28.0, 37.0, 5.0, 6.0, 3.0, 6.0, 14.0, 9.0, 31.0, 34.0, 5.0, 6.0, 0.0, 8.0, 11.0, 14.0, 5.0, 6.0, 9.0, 19.0, 8.0, 11.0, 12.0, 8.0, 3.0, 14.0, 0.0, 9.0, 0.0, 11.0, 26.0, 34.0, 8.0, 9.0, 0.0, 3.0, 13.0, 14.0, 14.0, 11.0, 11.0, 0.0, 8.0, 0.0]}, "sampler_perf": {"mean_env_wait_ms": 13.746840147909538, "mean_processing_ms": 0.16393515170904638, "mean_inference_ms": 4.926997257535761}, "off_policy_estimator": {}, "info": {"num_steps_trained": 288000, "num_steps_sampled": 153600, "sample_time_ms": 260207.009, "load_time_ms": 38.842, "grad_time_ms": 8068.402, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 9.765625145519152e-05, "cur_lr": 0.0010000000474974513, "total_loss": -0.19599168002605438, "policy_loss": -0.007250078488141298, "vf_loss": 2.85541033744812, "vf_explained_var": 0.045025069266557693, "kl": 0.0006896388367749751, "entropy": 1.7771064043045044, "entropy_coeff": 0.10636799782514572, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 153600, "episodes_total": 384, "training_iteration": 12, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-58-29", "timestamp": 1660244309, "time_this_iter_s": 27.08998394012451, "time_total_s": 2730.5990812778473, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2730.5990812778473, "timesteps_since_restore": 153600, "iterations_since_restore": 12, "perf": {"cpu_util_percent": 37.42368421052632, "ram_util_percent": 58.20789473684212}}
-{"episode_reward_max": 76.0, "episode_reward_min": 3.0, "episode_reward_mean": 21.2, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 45.0}, "policy_reward_mean": {"ppo": 10.6}, "custom_metrics": {"sparse_reward_mean": 1.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 17.6, "shaped_reward_min": 3, "shaped_reward_max": 37, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.63, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 10, "onion_pickup_agent_1_mean": 5.81, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.24, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.08, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.93, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 3.44, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.43, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.33, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.98, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.66, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.21, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.21, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 1.24, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.92, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.19, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 0.96, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 0.27, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.31, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.74, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 11, "soup_drop_agent_1_mean": 0.54, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 1.33, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.98, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.33, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.98, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [76.0, 30.0, 25.0, 9.0, 28.0, 17.0, 68.0, 6.0, 3.0, 22.0, 17.0, 37.0, 14.0, 34.0, 12.0, 17.0, 27.0, 11.0, 25.0, 11.0, 17.0, 17.0, 25.0, 11.0, 12.0, 3.0, 19.0, 22.0, 60.0, 25.0, 25.0, 34.0, 9.0, 17.0, 17.0, 3.0, 17.0, 14.0, 20.0, 25.0, 20.0, 14.0, 11.0, 9.0, 3.0, 17.0, 65.0, 11.0, 9.0, 23.0, 65.0, 11.0, 8.0, 25.0, 11.0, 28.0, 19.0, 20.0, 17.0, 9.0, 11.0, 60.0, 17.0, 3.0, 27.0, 25.0, 11.0, 8.0, 20.0, 11.0, 60.0, 25.0, 22.0, 65.0, 12.0, 11.0, 23.0, 3.0, 30.0, 17.0, 19.0, 31.0, 28.0, 19.0, 14.0, 25.0, 25.0, 22.0, 20.0, 11.0, 11.0, 57.0, 17.0, 6.0, 17.0, 11.0, 11.0, 11.0, 14.0, 14.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [31.0, 45.0, 19.0, 11.0, 5.0, 20.0, 6.0, 3.0, 6.0, 22.0, 6.0, 11.0, 28.0, 40.0, 6.0, 0.0, 3.0, 0.0, 11.0, 11.0, 8.0, 9.0, 20.0, 17.0, 5.0, 9.0, 20.0, 14.0, 9.0, 3.0, 5.0, 12.0, 14.0, 13.0, 3.0, 8.0, 13.0, 12.0, 11.0, 0.0, 5.0, 12.0, 9.0, 8.0, 17.0, 8.0, 0.0, 11.0, 9.0, 3.0, 3.0, 0.0, 6.0, 13.0, 8.0, 14.0, 28.0, 32.0, 19.0, 6.0, 8.0, 17.0, 19.0, 15.0, 3.0, 6.0, 9.0, 8.0, 8.0, 9.0, 0.0, 3.0, 9.0, 8.0, 8.0, 6.0, 14.0, 6.0, 6.0, 19.0, 6.0, 14.0, 0.0, 14.0, 3.0, 8.0, 3.0, 6.0, 0.0, 3.0, 9.0, 8.0, 28.0, 37.0, 5.0, 6.0, 3.0, 6.0, 14.0, 9.0, 31.0, 34.0, 5.0, 6.0, 0.0, 8.0, 11.0, 14.0, 5.0, 6.0, 9.0, 19.0, 8.0, 11.0, 12.0, 8.0, 3.0, 14.0, 0.0, 9.0, 0.0, 11.0, 26.0, 34.0, 8.0, 9.0, 0.0, 3.0, 13.0, 14.0, 14.0, 11.0, 11.0, 0.0, 8.0, 0.0, 11.0, 9.0, 0.0, 11.0, 20.0, 40.0, 12.0, 13.0, 11.0, 11.0, 34.0, 31.0, 3.0, 9.0, 5.0, 6.0, 0.0, 23.0, 3.0, 0.0, 16.0, 14.0, 6.0, 11.0, 13.0, 6.0, 17.0, 14.0, 14.0, 14.0, 6.0, 13.0, 5.0, 9.0, 16.0, 9.0, 16.0, 9.0, 5.0, 17.0, 8.0, 12.0, 3.0, 8.0, 0.0, 11.0, 26.0, 31.0, 0.0, 17.0, 6.0, 0.0, 3.0, 14.0, 8.0, 3.0, 6.0, 5.0, 8.0, 3.0, 3.0, 11.0, 11.0, 3.0]}, "sampler_perf": {"mean_env_wait_ms": 12.621089528193751, "mean_processing_ms": 0.1639912702671728, "mean_inference_ms": 4.5967771099672925}, "off_policy_estimator": {}, "info": {"num_steps_trained": 312000, "num_steps_sampled": 166400, "sample_time_ms": 261554.79, "load_time_ms": 38.918, "grad_time_ms": 8201.071, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 4.882812572759576e-05, "cur_lr": 0.0010000000474974513, "total_loss": -0.18183590471744537, "policy_loss": -0.00839205738157034, "vf_loss": 3.3925907611846924, "vf_explained_var": 0.04012133553624153, "kl": 0.0007842599879950285, "entropy": 1.775907039642334, "entropy_coeff": 0.09785600006580353, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 166400, "episodes_total": 416, "training_iteration": 13, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-59-09", "timestamp": 1660244349, "time_this_iter_s": 39.64977407455444, "time_total_s": 2770.2488553524017, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2770.2488553524017, "timesteps_since_restore": 166400, "iterations_since_restore": 13, "perf": {"cpu_util_percent": 43.457142857142856, "ram_util_percent": 59.38095238095238}}
-{"episode_reward_max": 76.0, "episode_reward_min": 3.0, "episode_reward_mean": 22.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 45.0}, "policy_reward_mean": {"ppo": 11.325}, "custom_metrics": {"sparse_reward_mean": 1.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 19.05, "shaped_reward_min": 3, "shaped_reward_max": 44, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.74, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 10, "onion_pickup_agent_1_mean": 5.54, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.3, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.91, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 3.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.51, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 1.98, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.54, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.23, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.27, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.29, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 0.88, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.17, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 1.08, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 0.27, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.34, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.71, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 11, "soup_drop_agent_1_mean": 0.6, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 1.51, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 1.98, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.51, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 1.98, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [19.0, 28.0, 14.0, 17.0, 17.0, 19.0, 17.0, 22.0, 63.0, 66.0, 22.0, 14.0, 9.0, 22.0, 14.0, 22.0, 20.0, 71.0, 25.0, 23.0, 3.0, 22.0, 22.0, 16.0, 20.0, 9.0, 24.0, 44.0, 12.0, 17.0, 20.0, 20.0, 27.0, 25.0, 11.0, 8.0, 20.0, 11.0, 60.0, 25.0, 22.0, 65.0, 12.0, 11.0, 23.0, 3.0, 30.0, 17.0, 19.0, 31.0, 28.0, 19.0, 14.0, 25.0, 25.0, 22.0, 20.0, 11.0, 11.0, 57.0, 17.0, 6.0, 17.0, 11.0, 11.0, 11.0, 14.0, 14.0, 76.0, 30.0, 25.0, 9.0, 28.0, 17.0, 68.0, 6.0, 3.0, 22.0, 17.0, 37.0, 14.0, 34.0, 12.0, 17.0, 27.0, 11.0, 25.0, 11.0, 17.0, 17.0, 25.0, 11.0, 12.0, 3.0, 19.0, 22.0, 60.0, 25.0, 25.0, 34.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [8.0, 11.0, 17.0, 11.0, 3.0, 11.0, 5.0, 12.0, 3.0, 14.0, 3.0, 16.0, 11.0, 6.0, 14.0, 8.0, 31.0, 32.0, 29.0, 37.0, 11.0, 11.0, 5.0, 9.0, 9.0, 0.0, 11.0, 11.0, 8.0, 6.0, 8.0, 14.0, 11.0, 9.0, 41.0, 30.0, 11.0, 14.0, 9.0, 14.0, 0.0, 3.0, 14.0, 8.0, 14.0, 8.0, 8.0, 8.0, 14.0, 6.0, 3.0, 6.0, 3.0, 21.0, 14.0, 30.0, 3.0, 9.0, 11.0, 6.0, 12.0, 8.0, 9.0, 11.0, 13.0, 14.0, 14.0, 11.0, 11.0, 0.0, 8.0, 0.0, 11.0, 9.0, 0.0, 11.0, 20.0, 40.0, 12.0, 13.0, 11.0, 11.0, 34.0, 31.0, 3.0, 9.0, 5.0, 6.0, 0.0, 23.0, 3.0, 0.0, 16.0, 14.0, 6.0, 11.0, 13.0, 6.0, 17.0, 14.0, 14.0, 14.0, 6.0, 13.0, 5.0, 9.0, 16.0, 9.0, 16.0, 9.0, 5.0, 17.0, 8.0, 12.0, 3.0, 8.0, 0.0, 11.0, 26.0, 31.0, 0.0, 17.0, 6.0, 0.0, 3.0, 14.0, 8.0, 3.0, 6.0, 5.0, 8.0, 3.0, 3.0, 11.0, 11.0, 3.0, 31.0, 45.0, 19.0, 11.0, 5.0, 20.0, 6.0, 3.0, 6.0, 22.0, 6.0, 11.0, 28.0, 40.0, 6.0, 0.0, 3.0, 0.0, 11.0, 11.0, 8.0, 9.0, 20.0, 17.0, 5.0, 9.0, 20.0, 14.0, 9.0, 3.0, 5.0, 12.0, 14.0, 13.0, 3.0, 8.0, 13.0, 12.0, 11.0, 0.0, 5.0, 12.0, 9.0, 8.0, 17.0, 8.0, 0.0, 11.0, 9.0, 3.0, 3.0, 0.0, 6.0, 13.0, 8.0, 14.0, 28.0, 32.0, 19.0, 6.0, 8.0, 17.0, 19.0, 15.0]}, "sampler_perf": {"mean_env_wait_ms": 11.674808393625103, "mean_processing_ms": 0.16471740447247565, "mean_inference_ms": 4.325696825802514}, "off_policy_estimator": {}, "info": {"num_steps_trained": 336000, "num_steps_sampled": 179200, "sample_time_ms": 262067.662, "load_time_ms": 39.631, "grad_time_ms": 8508.383, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.441406286379788e-05, "cur_lr": 0.0010000000474974513, "total_loss": -0.169602632522583, "policy_loss": -0.011421400122344494, "vf_loss": 3.2296648025512695, "vf_explained_var": 0.07911600917577744, "kl": 0.0008258241578005254, "entropy": 1.7740892171859741, "entropy_coeff": 0.08934400230646133, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 179200, "episodes_total": 448, "training_iteration": 14, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-59-41", "timestamp": 1660244381, "time_this_iter_s": 32.64548587799072, "time_total_s": 2802.8943412303925, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2802.8943412303925, "timesteps_since_restore": 179200, "iterations_since_restore": 14, "perf": {"cpu_util_percent": 44.92765957446808, "ram_util_percent": 58.32340425531916}}
-{"episode_reward_max": 84.0, "episode_reward_min": 3.0, "episode_reward_mean": 23.09, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 51.0}, "policy_reward_mean": {"ppo": 11.545}, "custom_metrics": {"sparse_reward_mean": 1.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 19.49, "shaped_reward_min": 3, "shaped_reward_max": 44, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.62, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.9, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 2.94, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.0, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 3.41, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.61, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.53, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.07, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.42, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 3.01, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.35, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.91, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 0.75, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.13, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 1.03, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 0.28, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.31, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.69, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 11, "soup_drop_agent_1_mean": 0.59, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 1.53, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.07, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.53, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.07, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [17.0, 84.0, 14.0, 9.0, 9.0, 34.0, 22.0, 9.0, 23.0, 20.0, 9.0, 36.0, 14.0, 22.0, 11.0, 20.0, 25.0, 17.0, 6.0, 9.0, 30.0, 60.0, 9.0, 25.0, 22.0, 14.0, 23.0, 28.0, 19.0, 6.0, 76.0, 25.0, 11.0, 11.0, 14.0, 14.0, 76.0, 30.0, 25.0, 9.0, 28.0, 17.0, 68.0, 6.0, 3.0, 22.0, 17.0, 37.0, 14.0, 34.0, 12.0, 17.0, 27.0, 11.0, 25.0, 11.0, 17.0, 17.0, 25.0, 11.0, 12.0, 3.0, 19.0, 22.0, 60.0, 25.0, 25.0, 34.0, 19.0, 28.0, 14.0, 17.0, 17.0, 19.0, 17.0, 22.0, 63.0, 66.0, 22.0, 14.0, 9.0, 22.0, 14.0, 22.0, 20.0, 71.0, 25.0, 23.0, 3.0, 22.0, 22.0, 16.0, 20.0, 9.0, 24.0, 44.0, 12.0, 17.0, 20.0, 20.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [14.0, 3.0, 33.0, 51.0, 3.0, 11.0, 3.0, 6.0, 0.0, 9.0, 14.0, 20.0, 9.0, 13.0, 0.0, 9.0, 17.0, 6.0, 14.0, 6.0, 3.0, 6.0, 27.0, 9.0, 8.0, 6.0, 3.0, 19.0, 3.0, 8.0, 8.0, 12.0, 11.0, 14.0, 8.0, 9.0, 3.0, 3.0, 6.0, 3.0, 13.0, 17.0, 26.0, 34.0, 6.0, 3.0, 14.0, 11.0, 11.0, 11.0, 3.0, 11.0, 9.0, 14.0, 13.0, 15.0, 8.0, 11.0, 3.0, 3.0, 38.0, 38.0, 17.0, 8.0, 6.0, 5.0, 8.0, 3.0, 3.0, 11.0, 11.0, 3.0, 31.0, 45.0, 19.0, 11.0, 5.0, 20.0, 6.0, 3.0, 6.0, 22.0, 6.0, 11.0, 28.0, 40.0, 6.0, 0.0, 3.0, 0.0, 11.0, 11.0, 8.0, 9.0, 20.0, 17.0, 5.0, 9.0, 20.0, 14.0, 9.0, 3.0, 5.0, 12.0, 14.0, 13.0, 3.0, 8.0, 13.0, 12.0, 11.0, 0.0, 5.0, 12.0, 9.0, 8.0, 17.0, 8.0, 0.0, 11.0, 9.0, 3.0, 3.0, 0.0, 6.0, 13.0, 8.0, 14.0, 28.0, 32.0, 19.0, 6.0, 8.0, 17.0, 19.0, 15.0, 8.0, 11.0, 17.0, 11.0, 3.0, 11.0, 5.0, 12.0, 3.0, 14.0, 3.0, 16.0, 11.0, 6.0, 14.0, 8.0, 31.0, 32.0, 29.0, 37.0, 11.0, 11.0, 5.0, 9.0, 9.0, 0.0, 11.0, 11.0, 8.0, 6.0, 8.0, 14.0, 11.0, 9.0, 41.0, 30.0, 11.0, 14.0, 9.0, 14.0, 0.0, 3.0, 14.0, 8.0, 14.0, 8.0, 8.0, 8.0, 14.0, 6.0, 3.0, 6.0, 3.0, 21.0, 14.0, 30.0, 3.0, 9.0, 11.0, 6.0, 12.0, 8.0, 9.0, 11.0]}, "sampler_perf": {"mean_env_wait_ms": 10.867209759257396, "mean_processing_ms": 0.16544513042105083, "mean_inference_ms": 4.098041869605518}, "off_policy_estimator": {}, "info": {"num_steps_trained": 360000, "num_steps_sampled": 192000, "sample_time_ms": 262263.341, "load_time_ms": 39.79, "grad_time_ms": 8700.33, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.220703143189894e-05, "cur_lr": 0.0010000000474974513, "total_loss": -0.14661313593387604, "policy_loss": -0.0037220455706119537, "vf_loss": 3.146031618118286, "vf_explained_var": 0.09564539045095444, "kl": 0.0008609917131252587, "entropy": 1.7716461420059204, "entropy_coeff": 0.08083199709653854, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 192000, "episodes_total": 480, "training_iteration": 15, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-00-10", "timestamp": 1660244410, "time_this_iter_s": 28.69369125366211, "time_total_s": 2831.5880324840546, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2831.5880324840546, "timesteps_since_restore": 192000, "iterations_since_restore": 15, "perf": {"cpu_util_percent": 32.489999999999995, "ram_util_percent": 57.802499999999995}}
-{"episode_reward_max": 84.0, "episode_reward_min": 3.0, "episode_reward_mean": 25.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 51.0}, "policy_reward_mean": {"ppo": 12.94}, "custom_metrics": {"sparse_reward_mean": 2.6, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 20.68, "shaped_reward_min": 3, "shaped_reward_max": 44, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.11, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 6.01, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.28, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.06, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.99, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 3.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.49, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.61, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.25, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.28, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.36, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.34, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.95, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 0.65, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 0.98, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 1.13, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.29, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.37, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.58, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.62, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.61, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.25, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.61, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.25, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [17.0, 26.0, 42.0, 36.0, 11.0, 17.0, 9.0, 12.0, 20.0, 30.0, 22.0, 60.0, 23.0, 66.0, 36.0, 28.0, 79.0, 22.0, 31.0, 68.0, 28.0, 20.0, 54.0, 9.0, 14.0, 17.0, 66.0, 20.0, 14.0, 11.0, 17.0, 19.0, 60.0, 25.0, 25.0, 34.0, 19.0, 28.0, 14.0, 17.0, 17.0, 19.0, 17.0, 22.0, 63.0, 66.0, 22.0, 14.0, 9.0, 22.0, 14.0, 22.0, 20.0, 71.0, 25.0, 23.0, 3.0, 22.0, 22.0, 16.0, 20.0, 9.0, 24.0, 44.0, 12.0, 17.0, 20.0, 20.0, 17.0, 84.0, 14.0, 9.0, 9.0, 34.0, 22.0, 9.0, 23.0, 20.0, 9.0, 36.0, 14.0, 22.0, 11.0, 20.0, 25.0, 17.0, 6.0, 9.0, 30.0, 60.0, 9.0, 25.0, 22.0, 14.0, 23.0, 28.0, 19.0, 6.0, 76.0, 25.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [12.0, 5.0, 20.0, 6.0, 14.0, 28.0, 19.0, 17.0, 3.0, 8.0, 3.0, 14.0, 3.0, 6.0, 6.0, 6.0, 3.0, 17.0, 17.0, 13.0, 9.0, 13.0, 26.0, 34.0, 14.0, 9.0, 37.0, 29.0, 8.0, 28.0, 12.0, 16.0, 29.0, 50.0, 3.0, 19.0, 8.0, 23.0, 33.0, 35.0, 5.0, 23.0, 14.0, 6.0, 31.0, 23.0, 9.0, 0.0, 3.0, 11.0, 3.0, 14.0, 26.0, 40.0, 3.0, 17.0, 6.0, 8.0, 5.0, 6.0, 9.0, 8.0, 8.0, 11.0, 28.0, 32.0, 19.0, 6.0, 8.0, 17.0, 19.0, 15.0, 8.0, 11.0, 17.0, 11.0, 3.0, 11.0, 5.0, 12.0, 3.0, 14.0, 3.0, 16.0, 11.0, 6.0, 14.0, 8.0, 31.0, 32.0, 29.0, 37.0, 11.0, 11.0, 5.0, 9.0, 9.0, 0.0, 11.0, 11.0, 8.0, 6.0, 8.0, 14.0, 11.0, 9.0, 41.0, 30.0, 11.0, 14.0, 9.0, 14.0, 0.0, 3.0, 14.0, 8.0, 14.0, 8.0, 8.0, 8.0, 14.0, 6.0, 3.0, 6.0, 3.0, 21.0, 14.0, 30.0, 3.0, 9.0, 11.0, 6.0, 12.0, 8.0, 9.0, 11.0, 14.0, 3.0, 33.0, 51.0, 3.0, 11.0, 3.0, 6.0, 0.0, 9.0, 14.0, 20.0, 9.0, 13.0, 0.0, 9.0, 17.0, 6.0, 14.0, 6.0, 3.0, 6.0, 27.0, 9.0, 8.0, 6.0, 3.0, 19.0, 3.0, 8.0, 8.0, 12.0, 11.0, 14.0, 8.0, 9.0, 3.0, 3.0, 6.0, 3.0, 13.0, 17.0, 26.0, 34.0, 6.0, 3.0, 14.0, 11.0, 11.0, 11.0, 3.0, 11.0, 9.0, 14.0, 13.0, 15.0, 8.0, 11.0, 3.0, 3.0, 38.0, 38.0, 17.0, 8.0]}, "sampler_perf": {"mean_env_wait_ms": 10.168319641902713, "mean_processing_ms": 0.16598079643560795, "mean_inference_ms": 3.884933201233045}, "off_policy_estimator": {}, "info": {"num_steps_trained": 384000, "num_steps_sampled": 204800, "sample_time_ms": 262331.542, "load_time_ms": 40.118, "grad_time_ms": 8762.055, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 6.10351571594947e-06, "cur_lr": 0.0010000000474974513, "total_loss": -0.13645179569721222, "policy_loss": -0.00917948316782713, "vf_loss": 5.045528888702393, "vf_explained_var": 0.08776132017374039, "kl": 0.0009270868613384664, "entropy": 1.7668260335922241, "entropy_coeff": 0.07231999933719635, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 204800, "episodes_total": 512, "training_iteration": 16, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-00-39", "timestamp": 1660244439, "time_this_iter_s": 28.684066772460938, "time_total_s": 2860.2720992565155, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2860.2720992565155, "timesteps_since_restore": 204800, "iterations_since_restore": 16, "perf": {"cpu_util_percent": 36.62439024390244, "ram_util_percent": 57.81951219512195}}
-{"episode_reward_max": 84.0, "episode_reward_min": 6.0, "episode_reward_mean": 27.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 51.0}, "policy_reward_mean": {"ppo": 13.93}, "custom_metrics": {"sparse_reward_mean": 3.4, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 21.06, "shaped_reward_min": 6, "shaped_reward_max": 44, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.37, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 6.13, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.43, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.09, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 3.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.62, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.64, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.71, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 5, "potting_onion_agent_1_mean": 2.35, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.08, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 2.84, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.38, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.33, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.78, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.57, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 0.98, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 1.08, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.35, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.36, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.57, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.61, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.71, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 5, "optimal_onion_potting_agent_1_mean": 2.35, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.71, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 5, "viable_onion_potting_agent_1_mean": 2.35, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [17.0, 28.0, 34.0, 20.0, 57.0, 14.0, 71.0, 12.0, 12.0, 66.0, 28.0, 63.0, 17.0, 28.0, 23.0, 9.0, 20.0, 22.0, 71.0, 26.0, 27.0, 20.0, 20.0, 9.0, 19.0, 20.0, 76.0, 19.0, 68.0, 68.0, 17.0, 25.0, 12.0, 17.0, 20.0, 20.0, 17.0, 84.0, 14.0, 9.0, 9.0, 34.0, 22.0, 9.0, 23.0, 20.0, 9.0, 36.0, 14.0, 22.0, 11.0, 20.0, 25.0, 17.0, 6.0, 9.0, 30.0, 60.0, 9.0, 25.0, 22.0, 14.0, 23.0, 28.0, 19.0, 6.0, 76.0, 25.0, 17.0, 26.0, 42.0, 36.0, 11.0, 17.0, 9.0, 12.0, 20.0, 30.0, 22.0, 60.0, 23.0, 66.0, 36.0, 28.0, 79.0, 22.0, 31.0, 68.0, 28.0, 20.0, 54.0, 9.0, 14.0, 17.0, 66.0, 20.0, 14.0, 11.0, 17.0, 19.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 11.0, 19.0, 9.0, 17.0, 17.0, 3.0, 17.0, 26.0, 31.0, 8.0, 6.0, 38.0, 33.0, 6.0, 6.0, 3.0, 9.0, 34.0, 32.0, 17.0, 11.0, 31.0, 32.0, 6.0, 11.0, 16.0, 12.0, 12.0, 11.0, 3.0, 6.0, 3.0, 17.0, 10.0, 12.0, 36.0, 35.0, 11.0, 15.0, 13.0, 14.0, 3.0, 17.0, 14.0, 6.0, 6.0, 3.0, 6.0, 13.0, 12.0, 8.0, 40.0, 36.0, 8.0, 11.0, 40.0, 28.0, 34.0, 34.0, 3.0, 14.0, 9.0, 16.0, 3.0, 9.0, 11.0, 6.0, 12.0, 8.0, 9.0, 11.0, 14.0, 3.0, 33.0, 51.0, 3.0, 11.0, 3.0, 6.0, 0.0, 9.0, 14.0, 20.0, 9.0, 13.0, 0.0, 9.0, 17.0, 6.0, 14.0, 6.0, 3.0, 6.0, 27.0, 9.0, 8.0, 6.0, 3.0, 19.0, 3.0, 8.0, 8.0, 12.0, 11.0, 14.0, 8.0, 9.0, 3.0, 3.0, 6.0, 3.0, 13.0, 17.0, 26.0, 34.0, 6.0, 3.0, 14.0, 11.0, 11.0, 11.0, 3.0, 11.0, 9.0, 14.0, 13.0, 15.0, 8.0, 11.0, 3.0, 3.0, 38.0, 38.0, 17.0, 8.0, 12.0, 5.0, 20.0, 6.0, 14.0, 28.0, 19.0, 17.0, 3.0, 8.0, 3.0, 14.0, 3.0, 6.0, 6.0, 6.0, 3.0, 17.0, 17.0, 13.0, 9.0, 13.0, 26.0, 34.0, 14.0, 9.0, 37.0, 29.0, 8.0, 28.0, 12.0, 16.0, 29.0, 50.0, 3.0, 19.0, 8.0, 23.0, 33.0, 35.0, 5.0, 23.0, 14.0, 6.0, 31.0, 23.0, 9.0, 0.0, 3.0, 11.0, 3.0, 14.0, 26.0, 40.0, 3.0, 17.0, 6.0, 8.0, 5.0, 6.0, 9.0, 8.0, 8.0, 11.0]}, "sampler_perf": {"mean_env_wait_ms": 9.556971747293405, "mean_processing_ms": 0.16611483871912305, "mean_inference_ms": 3.69458132267773}, "off_policy_estimator": {}, "info": {"num_steps_trained": 408000, "num_steps_sampled": 217600, "sample_time_ms": 20254.523, "load_time_ms": 38.861, "grad_time_ms": 8931.015, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 3.051757857974735e-06, "cur_lr": 0.0010000000474974513, "total_loss": -0.12081533670425415, "policy_loss": -0.008616355247795582, "vf_loss": 5.614309310913086, "vf_explained_var": 0.13559557497501373, "kl": 0.0008749772678129375, "entropy": 1.76718270778656, "entropy_coeff": 0.06380800157785416, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 217600, "episodes_total": 544, "training_iteration": 17, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-01-08", "timestamp": 1660244468, "time_this_iter_s": 29.310136079788208, "time_total_s": 2889.5822353363037, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2889.5822353363037, "timesteps_since_restore": 217600, "iterations_since_restore": 17, "perf": {"cpu_util_percent": 39.28333333333334, "ram_util_percent": 57.69761904761903}}
-{"episode_reward_max": 82.0, "episode_reward_min": 6.0, "episode_reward_mean": 31.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 50.0}, "policy_reward_mean": {"ppo": 15.58}, "custom_metrics": {"sparse_reward_mean": 4.0, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 23.16, "shaped_reward_min": 6, "shaped_reward_max": 47, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.73, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.83, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.76, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.82, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 3.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.52, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.57, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 2.08, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 5, "potting_onion_agent_1_mean": 2.25, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.02, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.94, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.45, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.79, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.77, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.54, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.08, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.32, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.47, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.49, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.57, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.73, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 2.08, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 5, "optimal_onion_potting_agent_1_mean": 2.25, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.08, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 5, "viable_onion_potting_agent_1_mean": 2.25, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [76.0, 44.0, 28.0, 12.0, 36.0, 33.0, 9.0, 24.0, 28.0, 25.0, 31.0, 68.0, 17.0, 47.0, 23.0, 12.0, 17.0, 22.0, 28.0, 23.0, 20.0, 37.0, 14.0, 8.0, 28.0, 22.0, 39.0, 82.0, 71.0, 57.0, 30.0, 9.0, 19.0, 6.0, 76.0, 25.0, 17.0, 26.0, 42.0, 36.0, 11.0, 17.0, 9.0, 12.0, 20.0, 30.0, 22.0, 60.0, 23.0, 66.0, 36.0, 28.0, 79.0, 22.0, 31.0, 68.0, 28.0, 20.0, 54.0, 9.0, 14.0, 17.0, 66.0, 20.0, 14.0, 11.0, 17.0, 19.0, 17.0, 28.0, 34.0, 20.0, 57.0, 14.0, 71.0, 12.0, 12.0, 66.0, 28.0, 63.0, 17.0, 28.0, 23.0, 9.0, 20.0, 22.0, 71.0, 26.0, 27.0, 20.0, 20.0, 9.0, 19.0, 20.0, 76.0, 19.0, 68.0, 68.0, 17.0, 25.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [34.0, 42.0, 18.0, 26.0, 11.0, 17.0, 9.0, 3.0, 17.0, 19.0, 19.0, 14.0, 3.0, 6.0, 16.0, 8.0, 16.0, 12.0, 12.0, 13.0, 20.0, 11.0, 37.0, 31.0, 9.0, 8.0, 28.0, 19.0, 9.0, 14.0, 3.0, 9.0, 11.0, 6.0, 8.0, 14.0, 11.0, 17.0, 9.0, 14.0, 9.0, 11.0, 20.0, 17.0, 9.0, 5.0, 5.0, 3.0, 16.0, 12.0, 12.0, 10.0, 12.0, 27.0, 38.0, 44.0, 35.0, 36.0, 29.0, 28.0, 11.0, 19.0, 0.0, 9.0, 8.0, 11.0, 3.0, 3.0, 38.0, 38.0, 17.0, 8.0, 12.0, 5.0, 20.0, 6.0, 14.0, 28.0, 19.0, 17.0, 3.0, 8.0, 3.0, 14.0, 3.0, 6.0, 6.0, 6.0, 3.0, 17.0, 17.0, 13.0, 9.0, 13.0, 26.0, 34.0, 14.0, 9.0, 37.0, 29.0, 8.0, 28.0, 12.0, 16.0, 29.0, 50.0, 3.0, 19.0, 8.0, 23.0, 33.0, 35.0, 5.0, 23.0, 14.0, 6.0, 31.0, 23.0, 9.0, 0.0, 3.0, 11.0, 3.0, 14.0, 26.0, 40.0, 3.0, 17.0, 6.0, 8.0, 5.0, 6.0, 9.0, 8.0, 8.0, 11.0, 6.0, 11.0, 19.0, 9.0, 17.0, 17.0, 3.0, 17.0, 26.0, 31.0, 8.0, 6.0, 38.0, 33.0, 6.0, 6.0, 3.0, 9.0, 34.0, 32.0, 17.0, 11.0, 31.0, 32.0, 6.0, 11.0, 16.0, 12.0, 12.0, 11.0, 3.0, 6.0, 3.0, 17.0, 10.0, 12.0, 36.0, 35.0, 11.0, 15.0, 13.0, 14.0, 3.0, 17.0, 14.0, 6.0, 6.0, 3.0, 6.0, 13.0, 12.0, 8.0, 40.0, 36.0, 8.0, 11.0, 40.0, 28.0, 34.0, 34.0, 3.0, 14.0, 9.0, 16.0]}, "sampler_perf": {"mean_env_wait_ms": 9.01879810548929, "mean_processing_ms": 0.16630796767792247, "mean_inference_ms": 3.5295458802458652}, "off_policy_estimator": {}, "info": {"num_steps_trained": 432000, "num_steps_sampled": 230400, "sample_time_ms": 20513.179, "load_time_ms": 38.872, "grad_time_ms": 8952.524, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.5258789289873675e-06, "cur_lr": 0.0010000000474974513, "total_loss": -0.10658890753984451, "policy_loss": -0.009598230011761189, "vf_loss": 4.846475601196289, "vf_explained_var": 0.11691506952047348, "kl": 0.0009377954411320388, "entropy": 1.762791633605957, "entropy_coeff": 0.055296000093221664, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 230400, "episodes_total": 576, "training_iteration": 18, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-01-39", "timestamp": 1660244499, "time_this_iter_s": 30.67889380455017, "time_total_s": 2920.261129140854, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2920.261129140854, "timesteps_since_restore": 230400, "iterations_since_restore": 18, "perf": {"cpu_util_percent": 40.46744186046512, "ram_util_percent": 57.767441860465105}}
-{"episode_reward_max": 88.0, "episode_reward_min": 8.0, "episode_reward_mean": 32.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 53.0}, "policy_reward_mean": {"ppo": 16.475}, "custom_metrics": {"sparse_reward_mean": 3.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 25.35, "shaped_reward_min": 8, "shaped_reward_max": 48, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.56, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.61, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.65, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 8, "useful_onion_pickup_agent_1_mean": 3.81, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.8, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.85, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.39, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.41, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 2.27, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.23, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 2.97, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.91, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.41, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.54, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.0, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.61, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 1.22, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.52, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.6, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.51, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.55, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.94, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.27, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.23, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.27, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.23, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [34.0, 42.0, 88.0, 33.0, 17.0, 30.0, 76.0, 85.0, 33.0, 31.0, 68.0, 12.0, 16.0, 79.0, 68.0, 19.0, 33.0, 19.0, 25.0, 31.0, 36.0, 44.0, 39.0, 11.0, 22.0, 42.0, 31.0, 25.0, 20.0, 31.0, 31.0, 17.0, 14.0, 11.0, 17.0, 19.0, 17.0, 28.0, 34.0, 20.0, 57.0, 14.0, 71.0, 12.0, 12.0, 66.0, 28.0, 63.0, 17.0, 28.0, 23.0, 9.0, 20.0, 22.0, 71.0, 26.0, 27.0, 20.0, 20.0, 9.0, 19.0, 20.0, 76.0, 19.0, 68.0, 68.0, 17.0, 25.0, 76.0, 44.0, 28.0, 12.0, 36.0, 33.0, 9.0, 24.0, 28.0, 25.0, 31.0, 68.0, 17.0, 47.0, 23.0, 12.0, 17.0, 22.0, 28.0, 23.0, 20.0, 37.0, 14.0, 8.0, 28.0, 22.0, 39.0, 82.0, 71.0, 57.0, 30.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [11.0, 23.0, 25.0, 17.0, 45.0, 43.0, 22.0, 11.0, 5.0, 12.0, 17.0, 13.0, 48.0, 28.0, 32.0, 53.0, 22.0, 11.0, 18.0, 13.0, 29.0, 39.0, 6.0, 6.0, 3.0, 13.0, 37.0, 42.0, 32.0, 36.0, 11.0, 8.0, 13.0, 20.0, 11.0, 8.0, 8.0, 17.0, 14.0, 17.0, 19.0, 17.0, 14.0, 30.0, 14.0, 25.0, 5.0, 6.0, 6.0, 16.0, 9.0, 33.0, 17.0, 14.0, 11.0, 14.0, 6.0, 14.0, 12.0, 19.0, 11.0, 20.0, 14.0, 3.0, 6.0, 8.0, 5.0, 6.0, 9.0, 8.0, 8.0, 11.0, 6.0, 11.0, 19.0, 9.0, 17.0, 17.0, 3.0, 17.0, 26.0, 31.0, 8.0, 6.0, 38.0, 33.0, 6.0, 6.0, 3.0, 9.0, 34.0, 32.0, 17.0, 11.0, 31.0, 32.0, 6.0, 11.0, 16.0, 12.0, 12.0, 11.0, 3.0, 6.0, 3.0, 17.0, 10.0, 12.0, 36.0, 35.0, 11.0, 15.0, 13.0, 14.0, 3.0, 17.0, 14.0, 6.0, 6.0, 3.0, 6.0, 13.0, 12.0, 8.0, 40.0, 36.0, 8.0, 11.0, 40.0, 28.0, 34.0, 34.0, 3.0, 14.0, 9.0, 16.0, 34.0, 42.0, 18.0, 26.0, 11.0, 17.0, 9.0, 3.0, 17.0, 19.0, 19.0, 14.0, 3.0, 6.0, 16.0, 8.0, 16.0, 12.0, 12.0, 13.0, 20.0, 11.0, 37.0, 31.0, 9.0, 8.0, 28.0, 19.0, 9.0, 14.0, 3.0, 9.0, 11.0, 6.0, 8.0, 14.0, 11.0, 17.0, 9.0, 14.0, 9.0, 11.0, 20.0, 17.0, 9.0, 5.0, 5.0, 3.0, 16.0, 12.0, 12.0, 10.0, 12.0, 27.0, 38.0, 44.0, 35.0, 36.0, 29.0, 28.0, 11.0, 19.0, 0.0, 9.0]}, "sampler_perf": {"mean_env_wait_ms": 8.541158675961446, "mean_processing_ms": 0.1664622179417048, "mean_inference_ms": 3.3849019348875076}, "off_policy_estimator": {}, "info": {"num_steps_trained": 456000, "num_steps_sampled": 243200, "sample_time_ms": 20842.585, "load_time_ms": 38.976, "grad_time_ms": 8953.309, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 7.629394644936838e-07, "cur_lr": 0.0010000000474974513, "total_loss": -0.09107109159231186, "policy_loss": -0.009291496127843857, "vf_loss": 5.607062816619873, "vf_explained_var": 0.08896120637655258, "kl": 0.0008400729275308549, "entropy": 1.7600102424621582, "entropy_coeff": 0.04678399860858917, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 243200, "episodes_total": 608, "training_iteration": 19, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-02-08", "timestamp": 1660244528, "time_this_iter_s": 29.248838186264038, "time_total_s": 2949.509967327118, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2949.509967327118, "timesteps_since_restore": 243200, "iterations_since_restore": 19, "perf": {"cpu_util_percent": 33.43571428571428, "ram_util_percent": 57.790476190476205}}
-{"episode_reward_max": 88.0, "episode_reward_min": 8.0, "episode_reward_mean": 37.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 56.0}, "policy_reward_mean": {"ppo": 18.55}, "custom_metrics": {"sparse_reward_mean": 4.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 27.5, "shaped_reward_min": 8, "shaped_reward_max": 48, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.54, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.78, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.75, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.94, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.83, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.41, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 2.45, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.42, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.16, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.13, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.43, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.72, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.31, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.65, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.61, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.62, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 1.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.45, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.42, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.45, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.42, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [23.0, 74.0, 17.0, 14.0, 9.0, 76.0, 63.0, 88.0, 66.0, 37.0, 37.0, 37.0, 68.0, 77.0, 28.0, 28.0, 68.0, 25.0, 60.0, 20.0, 22.0, 36.0, 23.0, 82.0, 12.0, 9.0, 23.0, 19.0, 42.0, 79.0, 42.0, 20.0, 68.0, 68.0, 17.0, 25.0, 76.0, 44.0, 28.0, 12.0, 36.0, 33.0, 9.0, 24.0, 28.0, 25.0, 31.0, 68.0, 17.0, 47.0, 23.0, 12.0, 17.0, 22.0, 28.0, 23.0, 20.0, 37.0, 14.0, 8.0, 28.0, 22.0, 39.0, 82.0, 71.0, 57.0, 30.0, 9.0, 34.0, 42.0, 88.0, 33.0, 17.0, 30.0, 76.0, 85.0, 33.0, 31.0, 68.0, 12.0, 16.0, 79.0, 68.0, 19.0, 33.0, 19.0, 25.0, 31.0, 36.0, 44.0, 39.0, 11.0, 22.0, 42.0, 31.0, 25.0, 20.0, 31.0, 31.0, 17.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [11.0, 12.0, 43.0, 31.0, 11.0, 6.0, 14.0, 0.0, 9.0, 0.0, 45.0, 31.0, 28.0, 35.0, 32.0, 56.0, 38.0, 28.0, 14.0, 23.0, 8.0, 29.0, 14.0, 23.0, 34.0, 34.0, 34.0, 43.0, 14.0, 14.0, 14.0, 14.0, 30.0, 38.0, 14.0, 11.0, 26.0, 34.0, 6.0, 14.0, 8.0, 14.0, 17.0, 19.0, 17.0, 6.0, 39.0, 43.0, 6.0, 6.0, 3.0, 6.0, 11.0, 12.0, 13.0, 6.0, 17.0, 25.0, 37.0, 42.0, 22.0, 20.0, 6.0, 14.0, 40.0, 28.0, 34.0, 34.0, 3.0, 14.0, 9.0, 16.0, 34.0, 42.0, 18.0, 26.0, 11.0, 17.0, 9.0, 3.0, 17.0, 19.0, 19.0, 14.0, 3.0, 6.0, 16.0, 8.0, 16.0, 12.0, 12.0, 13.0, 20.0, 11.0, 37.0, 31.0, 9.0, 8.0, 28.0, 19.0, 9.0, 14.0, 3.0, 9.0, 11.0, 6.0, 8.0, 14.0, 11.0, 17.0, 9.0, 14.0, 9.0, 11.0, 20.0, 17.0, 9.0, 5.0, 5.0, 3.0, 16.0, 12.0, 12.0, 10.0, 12.0, 27.0, 38.0, 44.0, 35.0, 36.0, 29.0, 28.0, 11.0, 19.0, 0.0, 9.0, 11.0, 23.0, 25.0, 17.0, 45.0, 43.0, 22.0, 11.0, 5.0, 12.0, 17.0, 13.0, 48.0, 28.0, 32.0, 53.0, 22.0, 11.0, 18.0, 13.0, 29.0, 39.0, 6.0, 6.0, 3.0, 13.0, 37.0, 42.0, 32.0, 36.0, 11.0, 8.0, 13.0, 20.0, 11.0, 8.0, 8.0, 17.0, 14.0, 17.0, 19.0, 17.0, 14.0, 30.0, 14.0, 25.0, 5.0, 6.0, 6.0, 16.0, 9.0, 33.0, 17.0, 14.0, 11.0, 14.0, 6.0, 14.0, 12.0, 19.0, 11.0, 20.0, 14.0, 3.0]}, "sampler_perf": {"mean_env_wait_ms": 8.114151695990262, "mean_processing_ms": 0.1664756703889973, "mean_inference_ms": 3.2561915126083236}, "off_policy_estimator": {}, "info": {"num_steps_trained": 480000, "num_steps_sampled": 256000, "sample_time_ms": 21118.245, "load_time_ms": 39.16, "grad_time_ms": 8931.236, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 3.814697322468419e-07, "cur_lr": 0.0010000000474974513, "total_loss": -0.07289835065603256, "policy_loss": -0.006338973995298147, "vf_loss": 7.939427852630615, "vf_explained_var": 0.1275780349969864, "kl": 0.000996587099507451, "entropy": 1.7598587274551392, "entropy_coeff": 0.03827200084924698, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 256000, "episodes_total": 640, "training_iteration": 20, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-02-37", "timestamp": 1660244557, "time_this_iter_s": 28.921189069747925, "time_total_s": 2978.431156396866, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2978.431156396866, "timesteps_since_restore": 256000, "iterations_since_restore": 20, "perf": {"cpu_util_percent": 30.78048780487805, "ram_util_percent": 57.77073170731706}}
-{"episode_reward_max": 116.0, "episode_reward_min": 9.0, "episode_reward_mean": 39.04, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 19.52}, "custom_metrics": {"sparse_reward_mean": 5.8, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 27.44, "shaped_reward_min": 9, "shaped_reward_max": 48, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.18, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.61, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.52, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.9, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 2.3, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.63, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.32, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 3.21, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.46, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.54, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.87, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.61, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.36, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.49, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.58, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.57, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.69, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.85, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.3, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.63, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.3, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.63, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [25.0, 9.0, 17.0, 27.0, 14.0, 19.0, 60.0, 36.0, 25.0, 17.0, 16.0, 28.0, 34.0, 45.0, 23.0, 36.0, 22.0, 22.0, 63.0, 12.0, 68.0, 25.0, 63.0, 25.0, 11.0, 57.0, 76.0, 30.0, 116.0, 45.0, 79.0, 80.0, 71.0, 57.0, 30.0, 9.0, 34.0, 42.0, 88.0, 33.0, 17.0, 30.0, 76.0, 85.0, 33.0, 31.0, 68.0, 12.0, 16.0, 79.0, 68.0, 19.0, 33.0, 19.0, 25.0, 31.0, 36.0, 44.0, 39.0, 11.0, 22.0, 42.0, 31.0, 25.0, 20.0, 31.0, 31.0, 17.0, 23.0, 74.0, 17.0, 14.0, 9.0, 76.0, 63.0, 88.0, 66.0, 37.0, 37.0, 37.0, 68.0, 77.0, 28.0, 28.0, 68.0, 25.0, 60.0, 20.0, 22.0, 36.0, 23.0, 82.0, 12.0, 9.0, 23.0, 19.0, 42.0, 79.0, 42.0, 20.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [12.0, 13.0, 3.0, 6.0, 6.0, 11.0, 10.0, 17.0, 11.0, 3.0, 6.0, 13.0, 26.0, 34.0, 17.0, 19.0, 3.0, 22.0, 8.0, 9.0, 8.0, 8.0, 19.0, 9.0, 28.0, 6.0, 25.0, 20.0, 14.0, 9.0, 9.0, 27.0, 16.0, 6.0, 11.0, 11.0, 31.0, 32.0, 6.0, 6.0, 33.0, 35.0, 13.0, 12.0, 29.0, 34.0, 6.0, 19.0, 3.0, 8.0, 26.0, 31.0, 34.0, 42.0, 10.0, 20.0, 68.0, 48.0, 22.0, 23.0, 48.0, 31.0, 45.0, 35.0, 35.0, 36.0, 29.0, 28.0, 11.0, 19.0, 0.0, 9.0, 11.0, 23.0, 25.0, 17.0, 45.0, 43.0, 22.0, 11.0, 5.0, 12.0, 17.0, 13.0, 48.0, 28.0, 32.0, 53.0, 22.0, 11.0, 18.0, 13.0, 29.0, 39.0, 6.0, 6.0, 3.0, 13.0, 37.0, 42.0, 32.0, 36.0, 11.0, 8.0, 13.0, 20.0, 11.0, 8.0, 8.0, 17.0, 14.0, 17.0, 19.0, 17.0, 14.0, 30.0, 14.0, 25.0, 5.0, 6.0, 6.0, 16.0, 9.0, 33.0, 17.0, 14.0, 11.0, 14.0, 6.0, 14.0, 12.0, 19.0, 11.0, 20.0, 14.0, 3.0, 11.0, 12.0, 43.0, 31.0, 11.0, 6.0, 14.0, 0.0, 9.0, 0.0, 45.0, 31.0, 28.0, 35.0, 32.0, 56.0, 38.0, 28.0, 14.0, 23.0, 8.0, 29.0, 14.0, 23.0, 34.0, 34.0, 34.0, 43.0, 14.0, 14.0, 14.0, 14.0, 30.0, 38.0, 14.0, 11.0, 26.0, 34.0, 6.0, 14.0, 8.0, 14.0, 17.0, 19.0, 17.0, 6.0, 39.0, 43.0, 6.0, 6.0, 3.0, 6.0, 11.0, 12.0, 13.0, 6.0, 17.0, 25.0, 37.0, 42.0, 22.0, 20.0, 6.0, 14.0]}, "sampler_perf": {"mean_env_wait_ms": 7.729743796447544, "mean_processing_ms": 0.1663097758329898, "mean_inference_ms": 3.1401875416297957}, "off_policy_estimator": {}, "info": {"num_steps_trained": 504000, "num_steps_sampled": 268800, "sample_time_ms": 21421.608, "load_time_ms": 38.85, "grad_time_ms": 8956.305, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.9073486612342094e-07, "cur_lr": 0.0010000000474974513, "total_loss": -0.05712709203362465, "policy_loss": -0.005733281373977661, "vf_loss": 7.644298553466797, "vf_explained_var": 0.10351377725601196, "kl": 0.0011409734142944217, "entropy": 1.7526286840438843, "entropy_coeff": 0.029759999364614487, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 268800, "episodes_total": 672, "training_iteration": 21, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-03-07", "timestamp": 1660244587, "time_this_iter_s": 29.522944927215576, "time_total_s": 3007.9541013240814, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3007.9541013240814, "timesteps_since_restore": 268800, "iterations_since_restore": 21, "perf": {"cpu_util_percent": 34.18809523809524, "ram_util_percent": 57.730952380952374}}
-{"episode_reward_max": 116.0, "episode_reward_min": 9.0, "episode_reward_mean": 37.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 18.735}, "custom_metrics": {"sparse_reward_mean": 5.4, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 26.67, "shaped_reward_min": 9, "shaped_reward_max": 50, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.24, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.51, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.51, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.59, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.46, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.49, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 2.32, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.55, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.18, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 3.39, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.52, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.81, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.7, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.35, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.35, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.52, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.55, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.73, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.71, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.32, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.55, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.32, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.55, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [73.0, 25.0, 31.0, 34.0, 16.0, 9.0, 23.0, 39.0, 35.0, 28.0, 79.0, 36.0, 57.0, 25.0, 28.0, 12.0, 17.0, 25.0, 25.0, 71.0, 44.0, 62.0, 22.0, 17.0, 23.0, 25.0, 37.0, 66.0, 50.0, 23.0, 12.0, 30.0, 20.0, 31.0, 31.0, 17.0, 23.0, 74.0, 17.0, 14.0, 9.0, 76.0, 63.0, 88.0, 66.0, 37.0, 37.0, 37.0, 68.0, 77.0, 28.0, 28.0, 68.0, 25.0, 60.0, 20.0, 22.0, 36.0, 23.0, 82.0, 12.0, 9.0, 23.0, 19.0, 42.0, 79.0, 42.0, 20.0, 25.0, 9.0, 17.0, 27.0, 14.0, 19.0, 60.0, 36.0, 25.0, 17.0, 16.0, 28.0, 34.0, 45.0, 23.0, 36.0, 22.0, 22.0, 63.0, 12.0, 68.0, 25.0, 63.0, 25.0, 11.0, 57.0, 76.0, 30.0, 116.0, 45.0, 79.0, 80.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [41.0, 32.0, 17.0, 8.0, 10.0, 21.0, 21.0, 13.0, 6.0, 10.0, 6.0, 3.0, 9.0, 14.0, 25.0, 14.0, 27.0, 8.0, 5.0, 23.0, 34.0, 45.0, 25.0, 11.0, 23.0, 34.0, 14.0, 11.0, 14.0, 14.0, 6.0, 6.0, 3.0, 14.0, 14.0, 11.0, 11.0, 14.0, 34.0, 37.0, 28.0, 16.0, 26.0, 36.0, 13.0, 9.0, 6.0, 11.0, 9.0, 14.0, 20.0, 5.0, 23.0, 14.0, 32.0, 34.0, 19.0, 31.0, 11.0, 12.0, 6.0, 6.0, 9.0, 21.0, 6.0, 14.0, 12.0, 19.0, 11.0, 20.0, 14.0, 3.0, 11.0, 12.0, 43.0, 31.0, 11.0, 6.0, 14.0, 0.0, 9.0, 0.0, 45.0, 31.0, 28.0, 35.0, 32.0, 56.0, 38.0, 28.0, 14.0, 23.0, 8.0, 29.0, 14.0, 23.0, 34.0, 34.0, 34.0, 43.0, 14.0, 14.0, 14.0, 14.0, 30.0, 38.0, 14.0, 11.0, 26.0, 34.0, 6.0, 14.0, 8.0, 14.0, 17.0, 19.0, 17.0, 6.0, 39.0, 43.0, 6.0, 6.0, 3.0, 6.0, 11.0, 12.0, 13.0, 6.0, 17.0, 25.0, 37.0, 42.0, 22.0, 20.0, 6.0, 14.0, 12.0, 13.0, 3.0, 6.0, 6.0, 11.0, 10.0, 17.0, 11.0, 3.0, 6.0, 13.0, 26.0, 34.0, 17.0, 19.0, 3.0, 22.0, 8.0, 9.0, 8.0, 8.0, 19.0, 9.0, 28.0, 6.0, 25.0, 20.0, 14.0, 9.0, 9.0, 27.0, 16.0, 6.0, 11.0, 11.0, 31.0, 32.0, 6.0, 6.0, 33.0, 35.0, 13.0, 12.0, 29.0, 34.0, 6.0, 19.0, 3.0, 8.0, 26.0, 31.0, 34.0, 42.0, 10.0, 20.0, 68.0, 48.0, 22.0, 23.0, 48.0, 31.0, 45.0, 35.0]}, "sampler_perf": {"mean_env_wait_ms": 7.382168276478265, "mean_processing_ms": 0.16610498275202595, "mean_inference_ms": 3.034581035402087}, "off_policy_estimator": {}, "info": {"num_steps_trained": 528000, "num_steps_sampled": 281600, "sample_time_ms": 21629.558, "load_time_ms": 39.186, "grad_time_ms": 8958.867, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 9.536743306171047e-08, "cur_lr": 0.0010000000474974513, "total_loss": -0.045350998640060425, "policy_loss": -0.008628163486719131, "vf_loss": 5.3433098793029785, "vf_explained_var": 0.18131445348262787, "kl": 0.0009087324724532664, "entropy": 1.7534428834915161, "entropy_coeff": 0.021247999742627144, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 281600, "episodes_total": 704, "training_iteration": 22, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-03-36", "timestamp": 1660244616, "time_this_iter_s": 29.197812795639038, "time_total_s": 3037.1519141197205, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3037.1519141197205, "timesteps_since_restore": 281600, "iterations_since_restore": 22, "perf": {"cpu_util_percent": 36.32142857142857, "ram_util_percent": 57.66904761904762}}
-{"episode_reward_max": 128.0, "episode_reward_min": 9.0, "episode_reward_mean": 40.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 20.145}, "custom_metrics": {"sparse_reward_mean": 6.0, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 28.29, "shaped_reward_min": 9, "shaped_reward_max": 53, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.24, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.54, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.44, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.7, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 2.43, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.43, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.42, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 2.43, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.7, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.2, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 3.37, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 2.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.72, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.77, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 1.49, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.41, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.55, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.61, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.82, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.67, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.43, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.7, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.43, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.7, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [63.0, 9.0, 12.0, 125.0, 82.0, 68.0, 47.0, 24.0, 28.0, 30.0, 33.0, 20.0, 47.0, 128.0, 22.0, 37.0, 43.0, 74.0, 80.0, 53.0, 62.0, 76.0, 36.0, 31.0, 39.0, 20.0, 28.0, 9.0, 34.0, 17.0, 60.0, 85.0, 42.0, 79.0, 42.0, 20.0, 25.0, 9.0, 17.0, 27.0, 14.0, 19.0, 60.0, 36.0, 25.0, 17.0, 16.0, 28.0, 34.0, 45.0, 23.0, 36.0, 22.0, 22.0, 63.0, 12.0, 68.0, 25.0, 63.0, 25.0, 11.0, 57.0, 76.0, 30.0, 116.0, 45.0, 79.0, 80.0, 73.0, 25.0, 31.0, 34.0, 16.0, 9.0, 23.0, 39.0, 35.0, 28.0, 79.0, 36.0, 57.0, 25.0, 28.0, 12.0, 17.0, 25.0, 25.0, 71.0, 44.0, 62.0, 22.0, 17.0, 23.0, 25.0, 37.0, 66.0, 50.0, 23.0, 12.0, 30.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [29.0, 34.0, 3.0, 6.0, 6.0, 6.0, 58.0, 67.0, 48.0, 34.0, 32.0, 36.0, 8.0, 39.0, 9.0, 15.0, 13.0, 15.0, 19.0, 11.0, 14.0, 19.0, 8.0, 12.0, 28.0, 19.0, 61.0, 67.0, 10.0, 12.0, 11.0, 26.0, 14.0, 29.0, 40.0, 34.0, 38.0, 42.0, 17.0, 36.0, 26.0, 36.0, 39.0, 37.0, 14.0, 22.0, 22.0, 9.0, 22.0, 17.0, 3.0, 17.0, 8.0, 20.0, 9.0, 0.0, 17.0, 17.0, 11.0, 6.0, 31.0, 29.0, 48.0, 37.0, 17.0, 25.0, 37.0, 42.0, 22.0, 20.0, 6.0, 14.0, 12.0, 13.0, 3.0, 6.0, 6.0, 11.0, 10.0, 17.0, 11.0, 3.0, 6.0, 13.0, 26.0, 34.0, 17.0, 19.0, 3.0, 22.0, 8.0, 9.0, 8.0, 8.0, 19.0, 9.0, 28.0, 6.0, 25.0, 20.0, 14.0, 9.0, 9.0, 27.0, 16.0, 6.0, 11.0, 11.0, 31.0, 32.0, 6.0, 6.0, 33.0, 35.0, 13.0, 12.0, 29.0, 34.0, 6.0, 19.0, 3.0, 8.0, 26.0, 31.0, 34.0, 42.0, 10.0, 20.0, 68.0, 48.0, 22.0, 23.0, 48.0, 31.0, 45.0, 35.0, 41.0, 32.0, 17.0, 8.0, 10.0, 21.0, 21.0, 13.0, 6.0, 10.0, 6.0, 3.0, 9.0, 14.0, 25.0, 14.0, 27.0, 8.0, 5.0, 23.0, 34.0, 45.0, 25.0, 11.0, 23.0, 34.0, 14.0, 11.0, 14.0, 14.0, 6.0, 6.0, 3.0, 14.0, 14.0, 11.0, 11.0, 14.0, 34.0, 37.0, 28.0, 16.0, 26.0, 36.0, 13.0, 9.0, 6.0, 11.0, 9.0, 14.0, 20.0, 5.0, 23.0, 14.0, 32.0, 34.0, 19.0, 31.0, 11.0, 12.0, 6.0, 6.0, 9.0, 21.0]}, "sampler_perf": {"mean_env_wait_ms": 7.066502405705404, "mean_processing_ms": 0.16594670413037083, "mean_inference_ms": 2.938769748646424}, "off_policy_estimator": {}, "info": {"num_steps_trained": 552000, "num_steps_sampled": 294400, "sample_time_ms": 20554.7, "load_time_ms": 39.212, "grad_time_ms": 9005.588, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 4.7683716530855236e-08, "cur_lr": 0.0010000000474974513, "total_loss": -0.025955183431506157, "policy_loss": -0.0047044227831065655, "vf_loss": 9.28939437866211, "vf_explained_var": 0.20618398487567902, "kl": 0.0012655678438022733, "entropy": 1.741496205329895, "entropy_coeff": 0.012736000120639801, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 294400, "episodes_total": 736, "training_iteration": 23, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-04-05", "timestamp": 1660244645, "time_this_iter_s": 29.364897966384888, "time_total_s": 3066.5168120861053, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3066.5168120861053, "timesteps_since_restore": 294400, "iterations_since_restore": 23, "perf": {"cpu_util_percent": 34.0, "ram_util_percent": 57.70487804878048}}
-{"episode_reward_max": 128.0, "episode_reward_min": 9.0, "episode_reward_mean": 43.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 21.51}, "custom_metrics": {"sparse_reward_mean": 6.8, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 29.42, "shaped_reward_min": 9, "shaped_reward_max": 53, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.56, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 5.35, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.71, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.74, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.41, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 2.68, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.66, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.29, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 3.32, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.52, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.48, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.0, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.94, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.76, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.7, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.46, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.47, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.6, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.71, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.76, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.66, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.68, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.66, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.68, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.66, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [9.0, 42.0, 31.0, 28.0, 34.0, 41.0, 14.0, 20.0, 31.0, 41.0, 79.0, 30.0, 74.0, 28.0, 9.0, 22.0, 39.0, 65.0, 63.0, 66.0, 68.0, 22.0, 76.0, 47.0, 63.0, 79.0, 79.0, 74.0, 25.0, 20.0, 17.0, 25.0, 116.0, 45.0, 79.0, 80.0, 73.0, 25.0, 31.0, 34.0, 16.0, 9.0, 23.0, 39.0, 35.0, 28.0, 79.0, 36.0, 57.0, 25.0, 28.0, 12.0, 17.0, 25.0, 25.0, 71.0, 44.0, 62.0, 22.0, 17.0, 23.0, 25.0, 37.0, 66.0, 50.0, 23.0, 12.0, 30.0, 63.0, 9.0, 12.0, 125.0, 82.0, 68.0, 47.0, 24.0, 28.0, 30.0, 33.0, 20.0, 47.0, 128.0, 22.0, 37.0, 43.0, 74.0, 80.0, 53.0, 62.0, 76.0, 36.0, 31.0, 39.0, 20.0, 28.0, 9.0, 34.0, 17.0, 60.0, 85.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 3.0, 22.0, 20.0, 19.0, 12.0, 14.0, 14.0, 29.0, 5.0, 25.0, 16.0, 14.0, 0.0, 9.0, 11.0, 16.0, 15.0, 14.0, 27.0, 37.0, 42.0, 15.0, 15.0, 42.0, 32.0, 16.0, 12.0, 6.0, 3.0, 19.0, 3.0, 28.0, 11.0, 28.0, 37.0, 31.0, 32.0, 35.0, 31.0, 26.0, 42.0, 5.0, 17.0, 34.0, 42.0, 31.0, 16.0, 28.0, 35.0, 35.0, 44.0, 40.0, 39.0, 38.0, 36.0, 8.0, 17.0, 12.0, 8.0, 8.0, 9.0, 9.0, 16.0, 68.0, 48.0, 22.0, 23.0, 48.0, 31.0, 45.0, 35.0, 41.0, 32.0, 17.0, 8.0, 10.0, 21.0, 21.0, 13.0, 6.0, 10.0, 6.0, 3.0, 9.0, 14.0, 25.0, 14.0, 27.0, 8.0, 5.0, 23.0, 34.0, 45.0, 25.0, 11.0, 23.0, 34.0, 14.0, 11.0, 14.0, 14.0, 6.0, 6.0, 3.0, 14.0, 14.0, 11.0, 11.0, 14.0, 34.0, 37.0, 28.0, 16.0, 26.0, 36.0, 13.0, 9.0, 6.0, 11.0, 9.0, 14.0, 20.0, 5.0, 23.0, 14.0, 32.0, 34.0, 19.0, 31.0, 11.0, 12.0, 6.0, 6.0, 9.0, 21.0, 29.0, 34.0, 3.0, 6.0, 6.0, 6.0, 58.0, 67.0, 48.0, 34.0, 32.0, 36.0, 8.0, 39.0, 9.0, 15.0, 13.0, 15.0, 19.0, 11.0, 14.0, 19.0, 8.0, 12.0, 28.0, 19.0, 61.0, 67.0, 10.0, 12.0, 11.0, 26.0, 14.0, 29.0, 40.0, 34.0, 38.0, 42.0, 17.0, 36.0, 26.0, 36.0, 39.0, 37.0, 14.0, 22.0, 22.0, 9.0, 22.0, 17.0, 3.0, 17.0, 8.0, 20.0, 9.0, 0.0, 17.0, 17.0, 11.0, 6.0, 31.0, 29.0, 48.0, 37.0]}, "sampler_perf": {"mean_env_wait_ms": 6.778628865492887, "mean_processing_ms": 0.1658515378049688, "mean_inference_ms": 2.8523193064237637}, "off_policy_estimator": {}, "info": {"num_steps_trained": 576000, "num_steps_sampled": 307200, "sample_time_ms": 20518.226, "load_time_ms": 38.592, "grad_time_ms": 9000.858, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.3841858265427618e-08, "cur_lr": 0.0010000000474974513, "total_loss": -0.01208301167935133, "policy_loss": -0.0054773432202637196, "vf_loss": 7.661229610443115, "vf_explained_var": 0.24070757627487183, "kl": 0.001010378822684288, "entropy": 1.7452141046524048, "entropy_coeff": 0.004224000032991171, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 307200, "episodes_total": 768, "training_iteration": 24, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-04-37", "timestamp": 1660244677, "time_this_iter_s": 32.22774386405945, "time_total_s": 3098.744555950165, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3098.744555950165, "timesteps_since_restore": 307200, "iterations_since_restore": 24, "perf": {"cpu_util_percent": 33.75869565217391, "ram_util_percent": 57.7217391304348}}
-{"episode_reward_max": 128.0, "episode_reward_min": 9.0, "episode_reward_mean": 45.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 67.0}, "policy_reward_mean": {"ppo": 22.71}, "custom_metrics": {"sparse_reward_mean": 7.4, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 30.62, "shaped_reward_min": 9, "shaped_reward_max": 56, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.3, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 5.66, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.55, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.23, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 2.55, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.99, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.67, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 3.36, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.62, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.48, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.91, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.77, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.57, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.59, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.63, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.79, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.87, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.68, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.55, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.99, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.55, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.99, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [38.0, 33.0, 12.0, 96.0, 77.0, 31.0, 16.0, 48.0, 28.0, 53.0, 20.0, 28.0, 20.0, 74.0, 12.0, 79.0, 17.0, 91.0, 76.0, 31.0, 9.0, 23.0, 84.0, 68.0, 125.0, 82.0, 31.0, 33.0, 17.0, 42.0, 79.0, 71.0, 50.0, 23.0, 12.0, 30.0, 63.0, 9.0, 12.0, 125.0, 82.0, 68.0, 47.0, 24.0, 28.0, 30.0, 33.0, 20.0, 47.0, 128.0, 22.0, 37.0, 43.0, 74.0, 80.0, 53.0, 62.0, 76.0, 36.0, 31.0, 39.0, 20.0, 28.0, 9.0, 34.0, 17.0, 60.0, 85.0, 9.0, 42.0, 31.0, 28.0, 34.0, 41.0, 14.0, 20.0, 31.0, 41.0, 79.0, 30.0, 74.0, 28.0, 9.0, 22.0, 39.0, 65.0, 63.0, 66.0, 68.0, 22.0, 76.0, 47.0, 63.0, 79.0, 79.0, 74.0, 25.0, 20.0, 17.0, 25.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [25.0, 13.0, 8.0, 25.0, 6.0, 6.0, 59.0, 37.0, 34.0, 43.0, 20.0, 11.0, 0.0, 16.0, 20.0, 28.0, 16.0, 12.0, 30.0, 23.0, 8.0, 12.0, 14.0, 14.0, 11.0, 9.0, 39.0, 35.0, 6.0, 6.0, 37.0, 42.0, 14.0, 3.0, 45.0, 46.0, 42.0, 34.0, 20.0, 11.0, 3.0, 6.0, 12.0, 11.0, 33.0, 51.0, 40.0, 28.0, 59.0, 66.0, 40.0, 42.0, 17.0, 14.0, 14.0, 19.0, 5.0, 12.0, 6.0, 36.0, 34.0, 45.0, 29.0, 42.0, 19.0, 31.0, 11.0, 12.0, 6.0, 6.0, 9.0, 21.0, 29.0, 34.0, 3.0, 6.0, 6.0, 6.0, 58.0, 67.0, 48.0, 34.0, 32.0, 36.0, 8.0, 39.0, 9.0, 15.0, 13.0, 15.0, 19.0, 11.0, 14.0, 19.0, 8.0, 12.0, 28.0, 19.0, 61.0, 67.0, 10.0, 12.0, 11.0, 26.0, 14.0, 29.0, 40.0, 34.0, 38.0, 42.0, 17.0, 36.0, 26.0, 36.0, 39.0, 37.0, 14.0, 22.0, 22.0, 9.0, 22.0, 17.0, 3.0, 17.0, 8.0, 20.0, 9.0, 0.0, 17.0, 17.0, 11.0, 6.0, 31.0, 29.0, 48.0, 37.0, 6.0, 3.0, 22.0, 20.0, 19.0, 12.0, 14.0, 14.0, 29.0, 5.0, 25.0, 16.0, 14.0, 0.0, 9.0, 11.0, 16.0, 15.0, 14.0, 27.0, 37.0, 42.0, 15.0, 15.0, 42.0, 32.0, 16.0, 12.0, 6.0, 3.0, 19.0, 3.0, 28.0, 11.0, 28.0, 37.0, 31.0, 32.0, 35.0, 31.0, 26.0, 42.0, 5.0, 17.0, 34.0, 42.0, 31.0, 16.0, 28.0, 35.0, 35.0, 44.0, 40.0, 39.0, 38.0, 36.0, 8.0, 17.0, 12.0, 8.0, 8.0, 9.0, 9.0, 16.0]}, "sampler_perf": {"mean_env_wait_ms": 6.515320221213355, "mean_processing_ms": 0.1659420994573044, "mean_inference_ms": 2.777987966122339}, "off_policy_estimator": {}, "info": {"num_steps_trained": 600000, "num_steps_sampled": 320000, "sample_time_ms": 21126.015, "load_time_ms": 38.663, "grad_time_ms": 9096.932, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.1920929132713809e-08, "cur_lr": 0.0010000000474974513, "total_loss": -0.00892395805567503, "policy_loss": -0.008948341012001038, "vf_loss": 8.925480842590332, "vf_explained_var": 0.24435751140117645, "kl": 0.0012184166116639972, "entropy": 1.7363275289535522, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 320000, "episodes_total": 800, "training_iteration": 25, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-05-13", "timestamp": 1660244713, "time_this_iter_s": 35.73040580749512, "time_total_s": 3134.47496175766, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3134.47496175766, "timesteps_since_restore": 320000, "iterations_since_restore": 25, "perf": {"cpu_util_percent": 36.32, "ram_util_percent": 57.904}}
-{"episode_reward_max": 142.0, "episode_reward_min": 9.0, "episode_reward_mean": 46.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 79.0}, "policy_reward_mean": {"ppo": 23.465}, "custom_metrics": {"sparse_reward_mean": 7.8, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 31.33, "shaped_reward_min": 9, "shaped_reward_max": 65, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.5, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 5.81, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.81, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.34, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.31, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.3, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 2.7, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 3.0, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.76, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 3.26, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.49, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.96, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.73, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.52, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.63, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.68, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.77, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.78, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.76, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.7, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 3.0, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.7, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 3.0, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [9.0, 142.0, 120.0, 26.0, 59.0, 71.0, 122.0, 44.0, 53.0, 12.0, 45.0, 47.0, 36.0, 83.0, 36.0, 74.0, 31.0, 9.0, 31.0, 9.0, 105.0, 25.0, 79.0, 33.0, 74.0, 41.0, 63.0, 31.0, 9.0, 20.0, 22.0, 31.0, 34.0, 17.0, 60.0, 85.0, 9.0, 42.0, 31.0, 28.0, 34.0, 41.0, 14.0, 20.0, 31.0, 41.0, 79.0, 30.0, 74.0, 28.0, 9.0, 22.0, 39.0, 65.0, 63.0, 66.0, 68.0, 22.0, 76.0, 47.0, 63.0, 79.0, 79.0, 74.0, 25.0, 20.0, 17.0, 25.0, 38.0, 33.0, 12.0, 96.0, 77.0, 31.0, 16.0, 48.0, 28.0, 53.0, 20.0, 28.0, 20.0, 74.0, 12.0, 79.0, 17.0, 91.0, 76.0, 31.0, 9.0, 23.0, 84.0, 68.0, 125.0, 82.0, 31.0, 33.0, 17.0, 42.0, 79.0, 71.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [0.0, 9.0, 63.0, 79.0, 63.0, 57.0, 9.0, 17.0, 36.0, 23.0, 37.0, 34.0, 60.0, 62.0, 36.0, 8.0, 28.0, 25.0, 9.0, 3.0, 14.0, 31.0, 21.0, 26.0, 16.0, 20.0, 48.0, 35.0, 17.0, 19.0, 37.0, 37.0, 3.0, 28.0, 3.0, 6.0, 9.0, 22.0, 3.0, 6.0, 51.0, 54.0, 14.0, 11.0, 28.0, 51.0, 11.0, 22.0, 34.0, 40.0, 19.0, 22.0, 37.0, 26.0, 17.0, 14.0, 0.0, 9.0, 8.0, 12.0, 11.0, 11.0, 17.0, 14.0, 17.0, 17.0, 11.0, 6.0, 31.0, 29.0, 48.0, 37.0, 6.0, 3.0, 22.0, 20.0, 19.0, 12.0, 14.0, 14.0, 29.0, 5.0, 25.0, 16.0, 14.0, 0.0, 9.0, 11.0, 16.0, 15.0, 14.0, 27.0, 37.0, 42.0, 15.0, 15.0, 42.0, 32.0, 16.0, 12.0, 6.0, 3.0, 19.0, 3.0, 28.0, 11.0, 28.0, 37.0, 31.0, 32.0, 35.0, 31.0, 26.0, 42.0, 5.0, 17.0, 34.0, 42.0, 31.0, 16.0, 28.0, 35.0, 35.0, 44.0, 40.0, 39.0, 38.0, 36.0, 8.0, 17.0, 12.0, 8.0, 8.0, 9.0, 9.0, 16.0, 25.0, 13.0, 8.0, 25.0, 6.0, 6.0, 59.0, 37.0, 34.0, 43.0, 20.0, 11.0, 0.0, 16.0, 20.0, 28.0, 16.0, 12.0, 30.0, 23.0, 8.0, 12.0, 14.0, 14.0, 11.0, 9.0, 39.0, 35.0, 6.0, 6.0, 37.0, 42.0, 14.0, 3.0, 45.0, 46.0, 42.0, 34.0, 20.0, 11.0, 3.0, 6.0, 12.0, 11.0, 33.0, 51.0, 40.0, 28.0, 59.0, 66.0, 40.0, 42.0, 17.0, 14.0, 14.0, 19.0, 5.0, 12.0, 6.0, 36.0, 34.0, 45.0, 29.0, 42.0]}, "sampler_perf": {"mean_env_wait_ms": 6.273535039263677, "mean_processing_ms": 0.16612280496799353, "mean_inference_ms": 2.713084381170351}, "off_policy_estimator": {}, "info": {"num_steps_trained": 624000, "num_steps_sampled": 332800, "sample_time_ms": 21568.501, "load_time_ms": 38.757, "grad_time_ms": 9275.576, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 5.9604645663569045e-09, "cur_lr": 0.0010000000474974513, "total_loss": -0.011042184196412563, "policy_loss": -0.01108124852180481, "vf_loss": 9.051116943359375, "vf_explained_var": 0.3293954133987427, "kl": 0.0011855209013447165, "entropy": 1.7320860624313354, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 332800, "episodes_total": 832, "training_iteration": 26, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-05-48", "timestamp": 1660244748, "time_this_iter_s": 34.898388147354126, "time_total_s": 3169.373349905014, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3169.373349905014, "timesteps_since_restore": 332800, "iterations_since_restore": 26, "perf": {"cpu_util_percent": 38.21224489795919, "ram_util_percent": 57.97551020408163}}
-{"episode_reward_max": 142.0, "episode_reward_min": 9.0, "episode_reward_mean": 48.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 79.0}, "policy_reward_mean": {"ppo": 24.285}, "custom_metrics": {"sparse_reward_mean": 8.2, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 32.17, "shaped_reward_min": 9, "shaped_reward_max": 73, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.38, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.95, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.8, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.51, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.33, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 2.69, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 3.14, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.9, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 3.46, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.66, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.94, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.83, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.54, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.75, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.7, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.77, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.75, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.87, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.69, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 3.14, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.69, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 3.14, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [44.0, 12.0, 63.0, 25.0, 17.0, 49.0, 9.0, 79.0, 119.0, 43.0, 12.0, 130.0, 79.0, 53.0, 22.0, 24.0, 36.0, 17.0, 82.0, 42.0, 65.0, 88.0, 23.0, 80.0, 66.0, 44.0, 113.0, 9.0, 66.0, 76.0, 9.0, 38.0, 25.0, 20.0, 17.0, 25.0, 38.0, 33.0, 12.0, 96.0, 77.0, 31.0, 16.0, 48.0, 28.0, 53.0, 20.0, 28.0, 20.0, 74.0, 12.0, 79.0, 17.0, 91.0, 76.0, 31.0, 9.0, 23.0, 84.0, 68.0, 125.0, 82.0, 31.0, 33.0, 17.0, 42.0, 79.0, 71.0, 9.0, 142.0, 120.0, 26.0, 59.0, 71.0, 122.0, 44.0, 53.0, 12.0, 45.0, 47.0, 36.0, 83.0, 36.0, 74.0, 31.0, 9.0, 31.0, 9.0, 105.0, 25.0, 79.0, 33.0, 74.0, 41.0, 63.0, 31.0, 9.0, 20.0, 22.0, 31.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [23.0, 21.0, 9.0, 3.0, 43.0, 20.0, 9.0, 16.0, 12.0, 5.0, 18.0, 31.0, 6.0, 3.0, 34.0, 45.0, 60.0, 59.0, 14.0, 29.0, 6.0, 6.0, 65.0, 65.0, 42.0, 37.0, 19.0, 34.0, 9.0, 13.0, 8.0, 16.0, 19.0, 17.0, 5.0, 12.0, 34.0, 48.0, 17.0, 25.0, 34.0, 31.0, 57.0, 31.0, 17.0, 6.0, 32.0, 48.0, 29.0, 37.0, 19.0, 25.0, 70.0, 43.0, 0.0, 9.0, 34.0, 32.0, 44.0, 32.0, 3.0, 6.0, 22.0, 16.0, 8.0, 17.0, 12.0, 8.0, 8.0, 9.0, 9.0, 16.0, 25.0, 13.0, 8.0, 25.0, 6.0, 6.0, 59.0, 37.0, 34.0, 43.0, 20.0, 11.0, 0.0, 16.0, 20.0, 28.0, 16.0, 12.0, 30.0, 23.0, 8.0, 12.0, 14.0, 14.0, 11.0, 9.0, 39.0, 35.0, 6.0, 6.0, 37.0, 42.0, 14.0, 3.0, 45.0, 46.0, 42.0, 34.0, 20.0, 11.0, 3.0, 6.0, 12.0, 11.0, 33.0, 51.0, 40.0, 28.0, 59.0, 66.0, 40.0, 42.0, 17.0, 14.0, 14.0, 19.0, 5.0, 12.0, 6.0, 36.0, 34.0, 45.0, 29.0, 42.0, 0.0, 9.0, 63.0, 79.0, 63.0, 57.0, 9.0, 17.0, 36.0, 23.0, 37.0, 34.0, 60.0, 62.0, 36.0, 8.0, 28.0, 25.0, 9.0, 3.0, 14.0, 31.0, 21.0, 26.0, 16.0, 20.0, 48.0, 35.0, 17.0, 19.0, 37.0, 37.0, 3.0, 28.0, 3.0, 6.0, 9.0, 22.0, 3.0, 6.0, 51.0, 54.0, 14.0, 11.0, 28.0, 51.0, 11.0, 22.0, 34.0, 40.0, 19.0, 22.0, 37.0, 26.0, 17.0, 14.0, 0.0, 9.0, 8.0, 12.0, 11.0, 11.0, 17.0, 14.0]}, "sampler_perf": {"mean_env_wait_ms": 6.050716001931983, "mean_processing_ms": 0.1663751803875143, "mean_inference_ms": 2.655688107582492}, "off_policy_estimator": {}, "info": {"num_steps_trained": 648000, "num_steps_sampled": 345600, "sample_time_ms": 22006.752, "load_time_ms": 38.851, "grad_time_ms": 9447.322, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.9802322831784522e-09, "cur_lr": 0.0010000000474974513, "total_loss": -0.010166086256504059, "policy_loss": -0.010217566043138504, "vf_loss": 9.166760444641113, "vf_explained_var": 0.3867878019809723, "kl": 0.001088446588255465, "entropy": 1.7303863763809204, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 345600, "episodes_total": 864, "training_iteration": 27, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-06-24", "timestamp": 1660244784, "time_this_iter_s": 35.4101459980011, "time_total_s": 3204.783495903015, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3204.783495903015, "timesteps_since_restore": 345600, "iterations_since_restore": 27, "perf": {"cpu_util_percent": 38.552, "ram_util_percent": 58.32}}
-{"episode_reward_max": 145.0, "episode_reward_min": 9.0, "episode_reward_mean": 55.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 79.0}, "policy_reward_mean": {"ppo": 27.97}, "custom_metrics": {"sparse_reward_mean": 10.0, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 35.94, "shaped_reward_min": 9, "shaped_reward_max": 73, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.38, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 6.64, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.83, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 5.04, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 2.73, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 3.69, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.85, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.25, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.71, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.55, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.73, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.77, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.58, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.93, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.87, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.87, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.87, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.97, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.89, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.73, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 3.69, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.73, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 3.69, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [93.0, 93.0, 74.0, 133.0, 106.0, 85.0, 23.0, 47.0, 93.0, 23.0, 47.0, 74.0, 33.0, 98.0, 96.0, 17.0, 49.0, 110.0, 84.0, 145.0, 34.0, 99.0, 48.0, 55.0, 71.0, 71.0, 65.0, 46.0, 44.0, 12.0, 20.0, 71.0, 17.0, 42.0, 79.0, 71.0, 9.0, 142.0, 120.0, 26.0, 59.0, 71.0, 122.0, 44.0, 53.0, 12.0, 45.0, 47.0, 36.0, 83.0, 36.0, 74.0, 31.0, 9.0, 31.0, 9.0, 105.0, 25.0, 79.0, 33.0, 74.0, 41.0, 63.0, 31.0, 9.0, 20.0, 22.0, 31.0, 44.0, 12.0, 63.0, 25.0, 17.0, 49.0, 9.0, 79.0, 119.0, 43.0, 12.0, 130.0, 79.0, 53.0, 22.0, 24.0, 36.0, 17.0, 82.0, 42.0, 65.0, 88.0, 23.0, 80.0, 66.0, 44.0, 113.0, 9.0, 66.0, 76.0, 9.0, 38.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [47.0, 46.0, 53.0, 40.0, 37.0, 37.0, 59.0, 74.0, 55.0, 51.0, 40.0, 45.0, 3.0, 20.0, 22.0, 25.0, 53.0, 40.0, 14.0, 9.0, 24.0, 23.0, 36.0, 38.0, 21.0, 12.0, 37.0, 61.0, 44.0, 52.0, 6.0, 11.0, 29.0, 20.0, 48.0, 62.0, 38.0, 46.0, 76.0, 69.0, 6.0, 28.0, 47.0, 52.0, 17.0, 31.0, 16.0, 39.0, 40.0, 31.0, 35.0, 36.0, 32.0, 33.0, 21.0, 25.0, 23.0, 21.0, 6.0, 6.0, 8.0, 12.0, 33.0, 38.0, 5.0, 12.0, 6.0, 36.0, 34.0, 45.0, 29.0, 42.0, 0.0, 9.0, 63.0, 79.0, 63.0, 57.0, 9.0, 17.0, 36.0, 23.0, 37.0, 34.0, 60.0, 62.0, 36.0, 8.0, 28.0, 25.0, 9.0, 3.0, 14.0, 31.0, 21.0, 26.0, 16.0, 20.0, 48.0, 35.0, 17.0, 19.0, 37.0, 37.0, 3.0, 28.0, 3.0, 6.0, 9.0, 22.0, 3.0, 6.0, 51.0, 54.0, 14.0, 11.0, 28.0, 51.0, 11.0, 22.0, 34.0, 40.0, 19.0, 22.0, 37.0, 26.0, 17.0, 14.0, 0.0, 9.0, 8.0, 12.0, 11.0, 11.0, 17.0, 14.0, 23.0, 21.0, 9.0, 3.0, 43.0, 20.0, 9.0, 16.0, 12.0, 5.0, 18.0, 31.0, 6.0, 3.0, 34.0, 45.0, 60.0, 59.0, 14.0, 29.0, 6.0, 6.0, 65.0, 65.0, 42.0, 37.0, 19.0, 34.0, 9.0, 13.0, 8.0, 16.0, 19.0, 17.0, 5.0, 12.0, 34.0, 48.0, 17.0, 25.0, 34.0, 31.0, 57.0, 31.0, 17.0, 6.0, 32.0, 48.0, 29.0, 37.0, 19.0, 25.0, 70.0, 43.0, 0.0, 9.0, 34.0, 32.0, 44.0, 32.0, 3.0, 6.0, 22.0, 16.0]}, "sampler_perf": {"mean_env_wait_ms": 5.8446114836959895, "mean_processing_ms": 0.16658688353388335, "mean_inference_ms": 2.6012841728705705}, "off_policy_estimator": {}, "info": {"num_steps_trained": 672000, "num_steps_sampled": 358400, "sample_time_ms": 22190.055, "load_time_ms": 38.857, "grad_time_ms": 9598.179, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.4901161415892261e-09, "cur_lr": 0.0010000000474974513, "total_loss": -0.008647923357784748, "policy_loss": -0.00907482486218214, "vf_loss": 12.8626708984375, "vf_explained_var": 0.32375723123550415, "kl": 0.0009376012603752315, "entropy": 1.7187572717666626, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 358400, "episodes_total": 896, "training_iteration": 28, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-06-58", "timestamp": 1660244818, "time_this_iter_s": 34.0201780796051, "time_total_s": 3238.8036739826202, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3238.8036739826202, "timesteps_since_restore": 358400, "iterations_since_restore": 28, "perf": {"cpu_util_percent": 38.32708333333333, "ram_util_percent": 57.17499999999999}}
-{"episode_reward_max": 145.0, "episode_reward_min": 9.0, "episode_reward_mean": 55.35, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 76.0}, "policy_reward_mean": {"ppo": 27.675}, "custom_metrics": {"sparse_reward_mean": 9.6, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 36.15, "shaped_reward_min": 9, "shaped_reward_max": 73, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.38, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 6.69, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.84, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 4.96, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.39, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.75, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 3.61, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.54, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.31, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.66, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.65, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.58, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.06, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.82, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.92, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.89, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 1.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.83, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.75, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 3.61, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.75, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 3.61, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [39.0, 96.0, 81.0, 77.0, 9.0, 34.0, 47.0, 20.0, 74.0, 44.0, 120.0, 34.0, 22.0, 42.0, 76.0, 66.0, 38.0, 44.0, 9.0, 101.0, 37.0, 39.0, 34.0, 36.0, 53.0, 9.0, 66.0, 80.0, 95.0, 34.0, 50.0, 54.0, 9.0, 20.0, 22.0, 31.0, 44.0, 12.0, 63.0, 25.0, 17.0, 49.0, 9.0, 79.0, 119.0, 43.0, 12.0, 130.0, 79.0, 53.0, 22.0, 24.0, 36.0, 17.0, 82.0, 42.0, 65.0, 88.0, 23.0, 80.0, 66.0, 44.0, 113.0, 9.0, 66.0, 76.0, 9.0, 38.0, 93.0, 93.0, 74.0, 133.0, 106.0, 85.0, 23.0, 47.0, 93.0, 23.0, 47.0, 74.0, 33.0, 98.0, 96.0, 17.0, 49.0, 110.0, 84.0, 145.0, 34.0, 99.0, 48.0, 55.0, 71.0, 71.0, 65.0, 46.0, 44.0, 12.0, 20.0, 71.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [15.0, 24.0, 32.0, 64.0, 38.0, 43.0, 39.0, 38.0, 0.0, 9.0, 14.0, 20.0, 18.0, 29.0, 14.0, 6.0, 40.0, 34.0, 22.0, 22.0, 62.0, 58.0, 15.0, 19.0, 10.0, 12.0, 25.0, 17.0, 34.0, 42.0, 37.0, 29.0, 22.0, 16.0, 24.0, 20.0, 0.0, 9.0, 45.0, 56.0, 20.0, 17.0, 30.0, 9.0, 12.0, 22.0, 14.0, 22.0, 27.0, 26.0, 0.0, 9.0, 26.0, 40.0, 34.0, 46.0, 50.0, 45.0, 18.0, 16.0, 28.0, 22.0, 28.0, 26.0, 0.0, 9.0, 8.0, 12.0, 11.0, 11.0, 17.0, 14.0, 23.0, 21.0, 9.0, 3.0, 43.0, 20.0, 9.0, 16.0, 12.0, 5.0, 18.0, 31.0, 6.0, 3.0, 34.0, 45.0, 60.0, 59.0, 14.0, 29.0, 6.0, 6.0, 65.0, 65.0, 42.0, 37.0, 19.0, 34.0, 9.0, 13.0, 8.0, 16.0, 19.0, 17.0, 5.0, 12.0, 34.0, 48.0, 17.0, 25.0, 34.0, 31.0, 57.0, 31.0, 17.0, 6.0, 32.0, 48.0, 29.0, 37.0, 19.0, 25.0, 70.0, 43.0, 0.0, 9.0, 34.0, 32.0, 44.0, 32.0, 3.0, 6.0, 22.0, 16.0, 47.0, 46.0, 53.0, 40.0, 37.0, 37.0, 59.0, 74.0, 55.0, 51.0, 40.0, 45.0, 3.0, 20.0, 22.0, 25.0, 53.0, 40.0, 14.0, 9.0, 24.0, 23.0, 36.0, 38.0, 21.0, 12.0, 37.0, 61.0, 44.0, 52.0, 6.0, 11.0, 29.0, 20.0, 48.0, 62.0, 38.0, 46.0, 76.0, 69.0, 6.0, 28.0, 47.0, 52.0, 17.0, 31.0, 16.0, 39.0, 40.0, 31.0, 35.0, 36.0, 32.0, 33.0, 21.0, 25.0, 23.0, 21.0, 6.0, 6.0, 8.0, 12.0, 33.0, 38.0]}, "sampler_perf": {"mean_env_wait_ms": 5.653214857264523, "mean_processing_ms": 0.16674718188605944, "mean_inference_ms": 2.549555614199102}, "off_policy_estimator": {}, "info": {"num_steps_trained": 696000, "num_steps_sampled": 371200, "sample_time_ms": 22379.228, "load_time_ms": 38.745, "grad_time_ms": 9750.752, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 7.450580707946131e-10, "cur_lr": 0.0010000000474974513, "total_loss": -0.007846680469810963, "policy_loss": -0.007850968278944492, "vf_loss": 8.63664722442627, "vf_explained_var": 0.4092896282672882, "kl": 0.001057352521456778, "entropy": 1.7187713384628296, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 371200, "episodes_total": 928, "training_iteration": 29, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-07-30", "timestamp": 1660244850, "time_this_iter_s": 32.66524410247803, "time_total_s": 3271.4689180850983, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3271.4689180850983, "timesteps_since_restore": 371200, "iterations_since_restore": 29, "perf": {"cpu_util_percent": 40.74130434782609, "ram_util_percent": 58.79130434782609}}
-{"episode_reward_max": 179.0, "episode_reward_min": 9.0, "episode_reward_mean": 58.6, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 92.0}, "policy_reward_mean": {"ppo": 29.3}, "custom_metrics": {"sparse_reward_mean": 10.2, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 38.2, "shaped_reward_min": 9, "shaped_reward_max": 70, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.55, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 7.06, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 4.05, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.26, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.83, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.86, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.66, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.29, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.74, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.67, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.98, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.67, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.45, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.26, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.9, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.07, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.92, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.1, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.9, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.83, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.86, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.83, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.86, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [90.0, 48.0, 179.0, 39.0, 60.0, 91.0, 39.0, 122.0, 20.0, 34.0, 42.0, 23.0, 50.0, 12.0, 20.0, 71.0, 66.0, 35.0, 44.0, 73.0, 64.0, 17.0, 131.0, 38.0, 41.0, 36.0, 105.0, 53.0, 39.0, 71.0, 20.0, 79.0, 66.0, 76.0, 9.0, 38.0, 93.0, 93.0, 74.0, 133.0, 106.0, 85.0, 23.0, 47.0, 93.0, 23.0, 47.0, 74.0, 33.0, 98.0, 96.0, 17.0, 49.0, 110.0, 84.0, 145.0, 34.0, 99.0, 48.0, 55.0, 71.0, 71.0, 65.0, 46.0, 44.0, 12.0, 20.0, 71.0, 39.0, 96.0, 81.0, 77.0, 9.0, 34.0, 47.0, 20.0, 74.0, 44.0, 120.0, 34.0, 22.0, 42.0, 76.0, 66.0, 38.0, 44.0, 9.0, 101.0, 37.0, 39.0, 34.0, 36.0, 53.0, 9.0, 66.0, 80.0, 95.0, 34.0, 50.0, 54.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [39.0, 51.0, 20.0, 28.0, 87.0, 92.0, 25.0, 14.0, 29.0, 31.0, 45.0, 46.0, 25.0, 14.0, 65.0, 57.0, 11.0, 9.0, 22.0, 12.0, 14.0, 28.0, 6.0, 17.0, 30.0, 20.0, 6.0, 6.0, 12.0, 8.0, 23.0, 48.0, 29.0, 37.0, 13.0, 22.0, 20.0, 24.0, 36.0, 37.0, 31.0, 33.0, 11.0, 6.0, 68.0, 63.0, 11.0, 27.0, 24.0, 17.0, 20.0, 16.0, 64.0, 41.0, 31.0, 22.0, 12.0, 27.0, 26.0, 45.0, 11.0, 9.0, 33.0, 46.0, 34.0, 32.0, 44.0, 32.0, 3.0, 6.0, 22.0, 16.0, 47.0, 46.0, 53.0, 40.0, 37.0, 37.0, 59.0, 74.0, 55.0, 51.0, 40.0, 45.0, 3.0, 20.0, 22.0, 25.0, 53.0, 40.0, 14.0, 9.0, 24.0, 23.0, 36.0, 38.0, 21.0, 12.0, 37.0, 61.0, 44.0, 52.0, 6.0, 11.0, 29.0, 20.0, 48.0, 62.0, 38.0, 46.0, 76.0, 69.0, 6.0, 28.0, 47.0, 52.0, 17.0, 31.0, 16.0, 39.0, 40.0, 31.0, 35.0, 36.0, 32.0, 33.0, 21.0, 25.0, 23.0, 21.0, 6.0, 6.0, 8.0, 12.0, 33.0, 38.0, 15.0, 24.0, 32.0, 64.0, 38.0, 43.0, 39.0, 38.0, 0.0, 9.0, 14.0, 20.0, 18.0, 29.0, 14.0, 6.0, 40.0, 34.0, 22.0, 22.0, 62.0, 58.0, 15.0, 19.0, 10.0, 12.0, 25.0, 17.0, 34.0, 42.0, 37.0, 29.0, 22.0, 16.0, 24.0, 20.0, 0.0, 9.0, 45.0, 56.0, 20.0, 17.0, 30.0, 9.0, 12.0, 22.0, 14.0, 22.0, 27.0, 26.0, 0.0, 9.0, 26.0, 40.0, 34.0, 46.0, 50.0, 45.0, 18.0, 16.0, 28.0, 22.0, 28.0, 26.0]}, "sampler_perf": {"mean_env_wait_ms": 5.475175554008815, "mean_processing_ms": 0.16692031317355585, "mean_inference_ms": 2.4996312531417773}, "off_policy_estimator": {}, "info": {"num_steps_trained": 720000, "num_steps_sampled": 384000, "sample_time_ms": 22626.486, "load_time_ms": 38.637, "grad_time_ms": 9834.774, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.7252903539730653e-10, "cur_lr": 0.0010000000474974513, "total_loss": -0.010750534944236279, "policy_loss": -0.01101712416857481, "vf_loss": 11.21933650970459, "vf_explained_var": 0.33813270926475525, "kl": 0.0012414826778694987, "entropy": 1.7106833457946777, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 384000, "episodes_total": 960, "training_iteration": 30, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-08-02", "timestamp": 1660244882, "time_this_iter_s": 32.23107981681824, "time_total_s": 3303.6999979019165, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3303.6999979019165, "timesteps_since_restore": 384000, "iterations_since_restore": 30, "perf": {"cpu_util_percent": 45.34130434782608, "ram_util_percent": 56.88478260869565}}
-{"episode_reward_max": 182.0, "episode_reward_min": 9.0, "episode_reward_mean": 57.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 96.0}, "policy_reward_mean": {"ppo": 28.805}, "custom_metrics": {"sparse_reward_mean": 10.4, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 36.81, "shaped_reward_min": 9, "shaped_reward_max": 65, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.93, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 6.72, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 4.36, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 4.83, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.46, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.99, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.62, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.62, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.3, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.64, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.68, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.5, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.05, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.73, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.09, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.82, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.86, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.85, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.99, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.62, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.99, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.62, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [34.0, 182.0, 133.0, 66.0, 23.0, 42.0, 53.0, 125.0, 22.0, 12.0, 87.0, 98.0, 9.0, 46.0, 30.0, 12.0, 37.0, 99.0, 63.0, 85.0, 77.0, 42.0, 133.0, 173.0, 66.0, 35.0, 39.0, 88.0, 34.0, 52.0, 82.0, 23.0, 44.0, 12.0, 20.0, 71.0, 39.0, 96.0, 81.0, 77.0, 9.0, 34.0, 47.0, 20.0, 74.0, 44.0, 120.0, 34.0, 22.0, 42.0, 76.0, 66.0, 38.0, 44.0, 9.0, 101.0, 37.0, 39.0, 34.0, 36.0, 53.0, 9.0, 66.0, 80.0, 95.0, 34.0, 50.0, 54.0, 90.0, 48.0, 179.0, 39.0, 60.0, 91.0, 39.0, 122.0, 20.0, 34.0, 42.0, 23.0, 50.0, 12.0, 20.0, 71.0, 66.0, 35.0, 44.0, 73.0, 64.0, 17.0, 131.0, 38.0, 41.0, 36.0, 105.0, 53.0, 39.0, 71.0, 20.0, 79.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [23.0, 11.0, 86.0, 96.0, 68.0, 65.0, 37.0, 29.0, 17.0, 6.0, 6.0, 36.0, 22.0, 31.0, 62.0, 63.0, 8.0, 14.0, 3.0, 9.0, 41.0, 46.0, 55.0, 43.0, 0.0, 9.0, 28.0, 18.0, 19.0, 11.0, 9.0, 3.0, 23.0, 14.0, 59.0, 40.0, 31.0, 32.0, 45.0, 40.0, 39.0, 38.0, 19.0, 23.0, 67.0, 66.0, 85.0, 88.0, 34.0, 32.0, 20.0, 15.0, 22.0, 17.0, 37.0, 51.0, 20.0, 14.0, 27.0, 25.0, 43.0, 39.0, 11.0, 12.0, 23.0, 21.0, 6.0, 6.0, 8.0, 12.0, 33.0, 38.0, 15.0, 24.0, 32.0, 64.0, 38.0, 43.0, 39.0, 38.0, 0.0, 9.0, 14.0, 20.0, 18.0, 29.0, 14.0, 6.0, 40.0, 34.0, 22.0, 22.0, 62.0, 58.0, 15.0, 19.0, 10.0, 12.0, 25.0, 17.0, 34.0, 42.0, 37.0, 29.0, 22.0, 16.0, 24.0, 20.0, 0.0, 9.0, 45.0, 56.0, 20.0, 17.0, 30.0, 9.0, 12.0, 22.0, 14.0, 22.0, 27.0, 26.0, 0.0, 9.0, 26.0, 40.0, 34.0, 46.0, 50.0, 45.0, 18.0, 16.0, 28.0, 22.0, 28.0, 26.0, 39.0, 51.0, 20.0, 28.0, 87.0, 92.0, 25.0, 14.0, 29.0, 31.0, 45.0, 46.0, 25.0, 14.0, 65.0, 57.0, 11.0, 9.0, 22.0, 12.0, 14.0, 28.0, 6.0, 17.0, 30.0, 20.0, 6.0, 6.0, 12.0, 8.0, 23.0, 48.0, 29.0, 37.0, 13.0, 22.0, 20.0, 24.0, 36.0, 37.0, 31.0, 33.0, 11.0, 6.0, 68.0, 63.0, 11.0, 27.0, 24.0, 17.0, 20.0, 16.0, 64.0, 41.0, 31.0, 22.0, 12.0, 27.0, 26.0, 45.0, 11.0, 9.0, 33.0, 46.0]}, "sampler_perf": {"mean_env_wait_ms": 5.308917467290777, "mean_processing_ms": 0.16704433507360725, "mean_inference_ms": 2.4512479594099825}, "off_policy_estimator": {}, "info": {"num_steps_trained": 744000, "num_steps_sampled": 396800, "sample_time_ms": 22677.978, "load_time_ms": 38.542, "grad_time_ms": 9985.158, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.8626451769865326e-10, "cur_lr": 0.0010000000474974513, "total_loss": -0.00831019040197134, "policy_loss": -0.008908797055482864, "vf_loss": 14.524895668029785, "vf_explained_var": 0.35295844078063965, "kl": 0.0011723049683496356, "entropy": 1.7077676057815552, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 396800, "episodes_total": 992, "training_iteration": 31, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-08-34", "timestamp": 1660244914, "time_this_iter_s": 31.540908813476562, "time_total_s": 3335.240906715393, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3335.240906715393, "timesteps_since_restore": 396800, "iterations_since_restore": 31, "perf": {"cpu_util_percent": 42.184090909090905, "ram_util_percent": 56.9090909090909}}
-{"episode_reward_max": 187.0, "episode_reward_min": 9.0, "episode_reward_mean": 63.31, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 101.0}, "policy_reward_mean": {"ppo": 31.655}, "custom_metrics": {"sparse_reward_mean": 12.0, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 39.31, "shaped_reward_min": 9, "shaped_reward_max": 77, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.12, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 6.89, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 4.48, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.08, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.41, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.38, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.33, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 3.08, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.95, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 4.02, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.61, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.77, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.62, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.98, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.76, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.57, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.24, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.81, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.26, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.91, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.9, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.86, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.08, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.95, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.08, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.95, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [87.0, 34.0, 187.0, 74.0, 128.0, 47.0, 23.0, 157.0, 12.0, 85.0, 20.0, 42.0, 20.0, 92.0, 82.0, 44.0, 9.0, 87.0, 20.0, 98.0, 31.0, 42.0, 83.0, 58.0, 45.0, 96.0, 81.0, 93.0, 36.0, 69.0, 88.0, 74.0, 95.0, 34.0, 50.0, 54.0, 90.0, 48.0, 179.0, 39.0, 60.0, 91.0, 39.0, 122.0, 20.0, 34.0, 42.0, 23.0, 50.0, 12.0, 20.0, 71.0, 66.0, 35.0, 44.0, 73.0, 64.0, 17.0, 131.0, 38.0, 41.0, 36.0, 105.0, 53.0, 39.0, 71.0, 20.0, 79.0, 34.0, 182.0, 133.0, 66.0, 23.0, 42.0, 53.0, 125.0, 22.0, 12.0, 87.0, 98.0, 9.0, 46.0, 30.0, 12.0, 37.0, 99.0, 63.0, 85.0, 77.0, 42.0, 133.0, 173.0, 66.0, 35.0, 39.0, 88.0, 34.0, 52.0, 82.0, 23.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [50.0, 37.0, 17.0, 17.0, 86.0, 101.0, 37.0, 37.0, 55.0, 73.0, 24.0, 23.0, 17.0, 6.0, 79.0, 78.0, 6.0, 6.0, 37.0, 48.0, 5.0, 15.0, 13.0, 29.0, 9.0, 11.0, 45.0, 47.0, 39.0, 43.0, 13.0, 31.0, 6.0, 3.0, 42.0, 45.0, 14.0, 6.0, 58.0, 40.0, 16.0, 15.0, 11.0, 31.0, 40.0, 43.0, 14.0, 44.0, 17.0, 28.0, 45.0, 51.0, 44.0, 37.0, 49.0, 44.0, 14.0, 22.0, 35.0, 34.0, 36.0, 52.0, 48.0, 26.0, 50.0, 45.0, 18.0, 16.0, 28.0, 22.0, 28.0, 26.0, 39.0, 51.0, 20.0, 28.0, 87.0, 92.0, 25.0, 14.0, 29.0, 31.0, 45.0, 46.0, 25.0, 14.0, 65.0, 57.0, 11.0, 9.0, 22.0, 12.0, 14.0, 28.0, 6.0, 17.0, 30.0, 20.0, 6.0, 6.0, 12.0, 8.0, 23.0, 48.0, 29.0, 37.0, 13.0, 22.0, 20.0, 24.0, 36.0, 37.0, 31.0, 33.0, 11.0, 6.0, 68.0, 63.0, 11.0, 27.0, 24.0, 17.0, 20.0, 16.0, 64.0, 41.0, 31.0, 22.0, 12.0, 27.0, 26.0, 45.0, 11.0, 9.0, 33.0, 46.0, 23.0, 11.0, 86.0, 96.0, 68.0, 65.0, 37.0, 29.0, 17.0, 6.0, 6.0, 36.0, 22.0, 31.0, 62.0, 63.0, 8.0, 14.0, 3.0, 9.0, 41.0, 46.0, 55.0, 43.0, 0.0, 9.0, 28.0, 18.0, 19.0, 11.0, 9.0, 3.0, 23.0, 14.0, 59.0, 40.0, 31.0, 32.0, 45.0, 40.0, 39.0, 38.0, 19.0, 23.0, 67.0, 66.0, 85.0, 88.0, 34.0, 32.0, 20.0, 15.0, 22.0, 17.0, 37.0, 51.0, 20.0, 14.0, 27.0, 25.0, 43.0, 39.0, 11.0, 12.0]}, "sampler_perf": {"mean_env_wait_ms": 5.15345566138169, "mean_processing_ms": 0.16717489002220728, "mean_inference_ms": 2.4061553716842257}, "off_policy_estimator": {}, "info": {"num_steps_trained": 768000, "num_steps_sampled": 409600, "sample_time_ms": 22972.316, "load_time_ms": 38.916, "grad_time_ms": 10035.28, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 9.313225884932663e-11, "cur_lr": 0.0010000000474974513, "total_loss": -0.007412114646285772, "policy_loss": -0.007903209887444973, "vf_loss": 13.404266357421875, "vf_explained_var": 0.34650716185569763, "kl": 0.0011789536802098155, "entropy": 1.6986547708511353, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 409600, "episodes_total": 1024, "training_iteration": 32, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-09-07", "timestamp": 1660244947, "time_this_iter_s": 32.6441330909729, "time_total_s": 3367.885039806366, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3367.885039806366, "timesteps_since_restore": 409600, "iterations_since_restore": 32, "perf": {"cpu_util_percent": 40.13191489361702, "ram_util_percent": 57.20638297872342}}
-{"episode_reward_max": 187.0, "episode_reward_min": 9.0, "episode_reward_mean": 66.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 101.0}, "policy_reward_mean": {"ppo": 33.165}, "custom_metrics": {"sparse_reward_mean": 13.6, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 39.13, "shaped_reward_min": 9, "shaped_reward_max": 77, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.01, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 6.99, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 4.25, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.15, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.51, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.97, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 4.08, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 3.85, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.66, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.71, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.65, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.67, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.1, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.91, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.96, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.87, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.91, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.97, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 4.08, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.97, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 4.08, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [9.0, 76.0, 98.0, 45.0, 41.0, 127.0, 12.0, 46.0, 70.0, 144.0, 71.0, 117.0, 111.0, 9.0, 54.0, 40.0, 79.0, 14.0, 62.0, 63.0, 106.0, 20.0, 27.0, 136.0, 90.0, 34.0, 52.0, 94.0, 117.0, 90.0, 39.0, 85.0, 39.0, 71.0, 20.0, 79.0, 34.0, 182.0, 133.0, 66.0, 23.0, 42.0, 53.0, 125.0, 22.0, 12.0, 87.0, 98.0, 9.0, 46.0, 30.0, 12.0, 37.0, 99.0, 63.0, 85.0, 77.0, 42.0, 133.0, 173.0, 66.0, 35.0, 39.0, 88.0, 34.0, 52.0, 82.0, 23.0, 87.0, 34.0, 187.0, 74.0, 128.0, 47.0, 23.0, 157.0, 12.0, 85.0, 20.0, 42.0, 20.0, 92.0, 82.0, 44.0, 9.0, 87.0, 20.0, 98.0, 31.0, 42.0, 83.0, 58.0, 45.0, 96.0, 81.0, 93.0, 36.0, 69.0, 88.0, 74.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 3.0, 41.0, 35.0, 51.0, 47.0, 23.0, 22.0, 19.0, 22.0, 57.0, 70.0, 6.0, 6.0, 21.0, 25.0, 31.0, 39.0, 73.0, 71.0, 34.0, 37.0, 55.0, 62.0, 51.0, 60.0, 6.0, 3.0, 20.0, 34.0, 20.0, 20.0, 29.0, 50.0, 11.0, 3.0, 25.0, 37.0, 23.0, 40.0, 52.0, 54.0, 8.0, 12.0, 18.0, 9.0, 66.0, 70.0, 41.0, 49.0, 12.0, 22.0, 25.0, 27.0, 46.0, 48.0, 66.0, 51.0, 42.0, 48.0, 19.0, 20.0, 48.0, 37.0, 12.0, 27.0, 26.0, 45.0, 11.0, 9.0, 33.0, 46.0, 23.0, 11.0, 86.0, 96.0, 68.0, 65.0, 37.0, 29.0, 17.0, 6.0, 6.0, 36.0, 22.0, 31.0, 62.0, 63.0, 8.0, 14.0, 3.0, 9.0, 41.0, 46.0, 55.0, 43.0, 0.0, 9.0, 28.0, 18.0, 19.0, 11.0, 9.0, 3.0, 23.0, 14.0, 59.0, 40.0, 31.0, 32.0, 45.0, 40.0, 39.0, 38.0, 19.0, 23.0, 67.0, 66.0, 85.0, 88.0, 34.0, 32.0, 20.0, 15.0, 22.0, 17.0, 37.0, 51.0, 20.0, 14.0, 27.0, 25.0, 43.0, 39.0, 11.0, 12.0, 50.0, 37.0, 17.0, 17.0, 86.0, 101.0, 37.0, 37.0, 55.0, 73.0, 24.0, 23.0, 17.0, 6.0, 79.0, 78.0, 6.0, 6.0, 37.0, 48.0, 5.0, 15.0, 13.0, 29.0, 9.0, 11.0, 45.0, 47.0, 39.0, 43.0, 13.0, 31.0, 6.0, 3.0, 42.0, 45.0, 14.0, 6.0, 58.0, 40.0, 16.0, 15.0, 11.0, 31.0, 40.0, 43.0, 14.0, 44.0, 17.0, 28.0, 45.0, 51.0, 44.0, 37.0, 49.0, 44.0, 14.0, 22.0, 35.0, 34.0, 36.0, 52.0, 48.0, 26.0]}, "sampler_perf": {"mean_env_wait_ms": 5.0079354762159145, "mean_processing_ms": 0.1673845002022688, "mean_inference_ms": 2.3652145638679087}, "off_policy_estimator": {}, "info": {"num_steps_trained": 792000, "num_steps_sampled": 422400, "sample_time_ms": 23410.855, "load_time_ms": 38.911, "grad_time_ms": 10160.504, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 4.6566129424663316e-11, "cur_lr": 0.0010000000474974513, "total_loss": -0.01017048116773367, "policy_loss": -0.010584059171378613, "vf_loss": 12.619880676269531, "vf_explained_var": 0.45027461647987366, "kl": 0.001254777773283422, "entropy": 1.6968183517456055, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 422400, "episodes_total": 1056, "training_iteration": 33, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-09-42", "timestamp": 1660244982, "time_this_iter_s": 35.00341510772705, "time_total_s": 3402.888454914093, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3402.888454914093, "timesteps_since_restore": 422400, "iterations_since_restore": 33, "perf": {"cpu_util_percent": 42.62857142857143, "ram_util_percent": 58.25510204081633}}
-{"episode_reward_max": 187.0, "episode_reward_min": 9.0, "episode_reward_mean": 68.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 101.0}, "policy_reward_mean": {"ppo": 34.395}, "custom_metrics": {"sparse_reward_mean": 13.8, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 41.19, "shaped_reward_min": 9, "shaped_reward_max": 77, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.84, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 7.2, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 4.04, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.36, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.45, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 3.01, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 4.16, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 3.86, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 3.65, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.85, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.61, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.7, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.17, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.21, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.04, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.17, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 1.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.96, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.01, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 4.16, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.01, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 4.16, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [90.0, 79.0, 36.0, 58.0, 49.0, 76.0, 79.0, 98.0, 136.0, 87.0, 50.0, 145.0, 28.0, 23.0, 110.0, 19.0, 134.0, 14.0, 12.0, 58.0, 77.0, 38.0, 99.0, 80.0, 142.0, 42.0, 124.0, 74.0, 93.0, 23.0, 103.0, 90.0, 34.0, 52.0, 82.0, 23.0, 87.0, 34.0, 187.0, 74.0, 128.0, 47.0, 23.0, 157.0, 12.0, 85.0, 20.0, 42.0, 20.0, 92.0, 82.0, 44.0, 9.0, 87.0, 20.0, 98.0, 31.0, 42.0, 83.0, 58.0, 45.0, 96.0, 81.0, 93.0, 36.0, 69.0, 88.0, 74.0, 9.0, 76.0, 98.0, 45.0, 41.0, 127.0, 12.0, 46.0, 70.0, 144.0, 71.0, 117.0, 111.0, 9.0, 54.0, 40.0, 79.0, 14.0, 62.0, 63.0, 106.0, 20.0, 27.0, 136.0, 90.0, 34.0, 52.0, 94.0, 117.0, 90.0, 39.0, 85.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [42.0, 48.0, 41.0, 38.0, 12.0, 24.0, 26.0, 32.0, 24.0, 25.0, 35.0, 41.0, 36.0, 43.0, 44.0, 54.0, 74.0, 62.0, 45.0, 42.0, 28.0, 22.0, 65.0, 80.0, 14.0, 14.0, 6.0, 17.0, 40.0, 70.0, 6.0, 13.0, 70.0, 64.0, 11.0, 3.0, 3.0, 9.0, 36.0, 22.0, 39.0, 38.0, 19.0, 19.0, 54.0, 45.0, 35.0, 45.0, 63.0, 79.0, 16.0, 26.0, 63.0, 61.0, 34.0, 40.0, 48.0, 45.0, 12.0, 11.0, 49.0, 54.0, 41.0, 49.0, 20.0, 14.0, 27.0, 25.0, 43.0, 39.0, 11.0, 12.0, 50.0, 37.0, 17.0, 17.0, 86.0, 101.0, 37.0, 37.0, 55.0, 73.0, 24.0, 23.0, 17.0, 6.0, 79.0, 78.0, 6.0, 6.0, 37.0, 48.0, 5.0, 15.0, 13.0, 29.0, 9.0, 11.0, 45.0, 47.0, 39.0, 43.0, 13.0, 31.0, 6.0, 3.0, 42.0, 45.0, 14.0, 6.0, 58.0, 40.0, 16.0, 15.0, 11.0, 31.0, 40.0, 43.0, 14.0, 44.0, 17.0, 28.0, 45.0, 51.0, 44.0, 37.0, 49.0, 44.0, 14.0, 22.0, 35.0, 34.0, 36.0, 52.0, 48.0, 26.0, 6.0, 3.0, 41.0, 35.0, 51.0, 47.0, 23.0, 22.0, 19.0, 22.0, 57.0, 70.0, 6.0, 6.0, 21.0, 25.0, 31.0, 39.0, 73.0, 71.0, 34.0, 37.0, 55.0, 62.0, 51.0, 60.0, 6.0, 3.0, 20.0, 34.0, 20.0, 20.0, 29.0, 50.0, 11.0, 3.0, 25.0, 37.0, 23.0, 40.0, 52.0, 54.0, 8.0, 12.0, 18.0, 9.0, 66.0, 70.0, 41.0, 49.0, 12.0, 22.0, 25.0, 27.0, 46.0, 48.0, 66.0, 51.0, 42.0, 48.0, 19.0, 20.0, 48.0, 37.0]}, "sampler_perf": {"mean_env_wait_ms": 4.8713540125875445, "mean_processing_ms": 0.16758394883061775, "mean_inference_ms": 2.326528288901312}, "off_policy_estimator": {}, "info": {"num_steps_trained": 816000, "num_steps_sampled": 435200, "sample_time_ms": 23336.79, "load_time_ms": 39.064, "grad_time_ms": 10020.897, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.3283064712331658e-11, "cur_lr": 0.0010000000474974513, "total_loss": -0.007203067187219858, "policy_loss": -0.007930143736302853, "vf_loss": 15.71717357635498, "vf_explained_var": 0.34764334559440613, "kl": 0.0010395334102213383, "entropy": 1.6892824172973633, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 435200, "episodes_total": 1088, "training_iteration": 34, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-10-12", "timestamp": 1660245012, "time_this_iter_s": 30.092119216918945, "time_total_s": 3432.980574131012, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3432.980574131012, "timesteps_since_restore": 435200, "iterations_since_restore": 34, "perf": {"cpu_util_percent": 41.03023255813954, "ram_util_percent": 57.66976744186048}}
-{"episode_reward_max": 146.0, "episode_reward_min": 9.0, "episode_reward_mean": 73.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 80.0}, "policy_reward_mean": {"ppo": 36.89}, "custom_metrics": {"sparse_reward_mean": 15.0, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 43.78, "shaped_reward_min": 9, "shaped_reward_max": 89, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.77, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.47, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 4.16, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 5.66, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.57, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 3.1, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 4.4, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 4.23, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 3.69, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 0.95, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 12, "useful_dish_drop_agent_0_mean": 0.62, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.69, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.44, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 12, "soup_pickup_agent_1_mean": 2.28, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.32, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.16, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 9, "soup_drop_agent_1_mean": 0.86, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.1, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 4.4, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.1, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 4.4, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [42.0, 125.0, 20.0, 87.0, 46.0, 53.0, 93.0, 74.0, 121.0, 84.0, 139.0, 75.0, 57.0, 48.0, 146.0, 44.0, 61.0, 90.0, 20.0, 95.0, 31.0, 125.0, 145.0, 98.0, 129.0, 68.0, 66.0, 48.0, 105.0, 87.0, 47.0, 98.0, 36.0, 69.0, 88.0, 74.0, 9.0, 76.0, 98.0, 45.0, 41.0, 127.0, 12.0, 46.0, 70.0, 144.0, 71.0, 117.0, 111.0, 9.0, 54.0, 40.0, 79.0, 14.0, 62.0, 63.0, 106.0, 20.0, 27.0, 136.0, 90.0, 34.0, 52.0, 94.0, 117.0, 90.0, 39.0, 85.0, 90.0, 79.0, 36.0, 58.0, 49.0, 76.0, 79.0, 98.0, 136.0, 87.0, 50.0, 145.0, 28.0, 23.0, 110.0, 19.0, 134.0, 14.0, 12.0, 58.0, 77.0, 38.0, 99.0, 80.0, 142.0, 42.0, 124.0, 74.0, 93.0, 23.0, 103.0, 90.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [19.0, 23.0, 67.0, 58.0, 11.0, 9.0, 45.0, 42.0, 16.0, 30.0, 13.0, 40.0, 35.0, 58.0, 37.0, 37.0, 62.0, 59.0, 33.0, 51.0, 72.0, 67.0, 25.0, 50.0, 28.0, 29.0, 27.0, 21.0, 73.0, 73.0, 14.0, 30.0, 32.0, 29.0, 48.0, 42.0, 14.0, 6.0, 50.0, 45.0, 11.0, 20.0, 71.0, 54.0, 73.0, 72.0, 56.0, 42.0, 64.0, 65.0, 31.0, 37.0, 26.0, 40.0, 20.0, 28.0, 54.0, 51.0, 41.0, 46.0, 15.0, 32.0, 53.0, 45.0, 14.0, 22.0, 35.0, 34.0, 36.0, 52.0, 48.0, 26.0, 6.0, 3.0, 41.0, 35.0, 51.0, 47.0, 23.0, 22.0, 19.0, 22.0, 57.0, 70.0, 6.0, 6.0, 21.0, 25.0, 31.0, 39.0, 73.0, 71.0, 34.0, 37.0, 55.0, 62.0, 51.0, 60.0, 6.0, 3.0, 20.0, 34.0, 20.0, 20.0, 29.0, 50.0, 11.0, 3.0, 25.0, 37.0, 23.0, 40.0, 52.0, 54.0, 8.0, 12.0, 18.0, 9.0, 66.0, 70.0, 41.0, 49.0, 12.0, 22.0, 25.0, 27.0, 46.0, 48.0, 66.0, 51.0, 42.0, 48.0, 19.0, 20.0, 48.0, 37.0, 42.0, 48.0, 41.0, 38.0, 12.0, 24.0, 26.0, 32.0, 24.0, 25.0, 35.0, 41.0, 36.0, 43.0, 44.0, 54.0, 74.0, 62.0, 45.0, 42.0, 28.0, 22.0, 65.0, 80.0, 14.0, 14.0, 6.0, 17.0, 40.0, 70.0, 6.0, 13.0, 70.0, 64.0, 11.0, 3.0, 3.0, 9.0, 36.0, 22.0, 39.0, 38.0, 19.0, 19.0, 54.0, 45.0, 35.0, 45.0, 63.0, 79.0, 16.0, 26.0, 63.0, 61.0, 34.0, 40.0, 48.0, 45.0, 12.0, 11.0, 49.0, 54.0, 41.0, 49.0]}, "sampler_perf": {"mean_env_wait_ms": 4.742888771715567, "mean_processing_ms": 0.4291925216501232, "mean_inference_ms": 2.2890734350045245}, "off_policy_estimator": {}, "info": {"num_steps_trained": 840000, "num_steps_sampled": 448000, "sample_time_ms": 59523.327, "load_time_ms": 38.502, "grad_time_ms": 106209.033, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.1641532356165829e-11, "cur_lr": 0.0010000000474974513, "total_loss": -0.007091447710990906, "policy_loss": -0.007865053601562977, "vf_loss": 16.12926483154297, "vf_explained_var": 0.35502591729164124, "kl": 0.0012615231098607183, "entropy": 1.6786518096923828, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 448000, "episodes_total": 1120, "training_iteration": 35, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-32-51", "timestamp": 1660246371, "time_this_iter_s": 1359.4666819572449, "time_total_s": 4792.447256088257, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 4792.447256088257, "timesteps_since_restore": 448000, "iterations_since_restore": 35, "perf": {"cpu_util_percent": 73.38606557377048, "ram_util_percent": 58.19344262295081}}
-{"episode_reward_max": 195.0, "episode_reward_min": 9.0, "episode_reward_mean": 78.31, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 98.0}, "policy_reward_mean": {"ppo": 39.155}, "custom_metrics": {"sparse_reward_mean": 16.0, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 46.31, "shaped_reward_min": 9, "shaped_reward_max": 89, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.99, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.5, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 4.36, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 5.74, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 3.39, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 4.55, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.39, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 3.86, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 0.99, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.79, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.93, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 12, "useful_dish_drop_agent_0_mean": 0.6, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.65, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.62, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 12, "soup_pickup_agent_1_mean": 2.33, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.3, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.4, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.2, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 9, "soup_drop_agent_1_mean": 0.83, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.39, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 4.55, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.39, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 4.55, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [31.0, 142.0, 23.0, 96.0, 64.0, 98.0, 65.0, 112.0, 36.0, 23.0, 42.0, 113.0, 33.0, 41.0, 98.0, 148.0, 130.0, 119.0, 39.0, 88.0, 42.0, 142.0, 105.0, 120.0, 179.0, 195.0, 12.0, 12.0, 56.0, 60.0, 9.0, 94.0, 117.0, 90.0, 39.0, 85.0, 90.0, 79.0, 36.0, 58.0, 49.0, 76.0, 79.0, 98.0, 136.0, 87.0, 50.0, 145.0, 28.0, 23.0, 110.0, 19.0, 134.0, 14.0, 12.0, 58.0, 77.0, 38.0, 99.0, 80.0, 142.0, 42.0, 124.0, 74.0, 93.0, 23.0, 103.0, 90.0, 42.0, 125.0, 20.0, 87.0, 46.0, 53.0, 93.0, 74.0, 121.0, 84.0, 139.0, 75.0, 57.0, 48.0, 146.0, 44.0, 61.0, 90.0, 20.0, 95.0, 31.0, 125.0, 145.0, 98.0, 129.0, 68.0, 66.0, 48.0, 105.0, 87.0, 47.0, 98.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [15.0, 16.0, 76.0, 66.0, 9.0, 14.0, 45.0, 51.0, 33.0, 31.0, 43.0, 55.0, 34.0, 31.0, 60.0, 52.0, 10.0, 26.0, 3.0, 20.0, 22.0, 20.0, 59.0, 54.0, 17.0, 16.0, 22.0, 19.0, 46.0, 52.0, 74.0, 74.0, 59.0, 71.0, 54.0, 65.0, 20.0, 19.0, 47.0, 41.0, 26.0, 16.0, 73.0, 69.0, 49.0, 56.0, 59.0, 61.0, 81.0, 98.0, 98.0, 97.0, 3.0, 9.0, 6.0, 6.0, 30.0, 26.0, 32.0, 28.0, 6.0, 3.0, 46.0, 48.0, 66.0, 51.0, 42.0, 48.0, 19.0, 20.0, 48.0, 37.0, 42.0, 48.0, 41.0, 38.0, 12.0, 24.0, 26.0, 32.0, 24.0, 25.0, 35.0, 41.0, 36.0, 43.0, 44.0, 54.0, 74.0, 62.0, 45.0, 42.0, 28.0, 22.0, 65.0, 80.0, 14.0, 14.0, 6.0, 17.0, 40.0, 70.0, 6.0, 13.0, 70.0, 64.0, 11.0, 3.0, 3.0, 9.0, 36.0, 22.0, 39.0, 38.0, 19.0, 19.0, 54.0, 45.0, 35.0, 45.0, 63.0, 79.0, 16.0, 26.0, 63.0, 61.0, 34.0, 40.0, 48.0, 45.0, 12.0, 11.0, 49.0, 54.0, 41.0, 49.0, 19.0, 23.0, 67.0, 58.0, 11.0, 9.0, 45.0, 42.0, 16.0, 30.0, 13.0, 40.0, 35.0, 58.0, 37.0, 37.0, 62.0, 59.0, 33.0, 51.0, 72.0, 67.0, 25.0, 50.0, 28.0, 29.0, 27.0, 21.0, 73.0, 73.0, 14.0, 30.0, 32.0, 29.0, 48.0, 42.0, 14.0, 6.0, 50.0, 45.0, 11.0, 20.0, 71.0, 54.0, 73.0, 72.0, 56.0, 42.0, 64.0, 65.0, 31.0, 37.0, 26.0, 40.0, 20.0, 28.0, 54.0, 51.0, 41.0, 46.0, 15.0, 32.0, 53.0, 45.0]}, "sampler_perf": {"mean_env_wait_ms": 4.6429756749225914, "mean_processing_ms": 0.6884190143076668, "mean_inference_ms": 3.1849506897639785}, "off_policy_estimator": {}, "info": {"num_steps_trained": 864000, "num_steps_sampled": 460800, "sample_time_ms": 197454.884, "load_time_ms": 38.154, "grad_time_ms": 142553.757, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 5.8207661780829145e-12, "cur_lr": 0.0010000000474974513, "total_loss": -0.009596621617674828, "policy_loss": -0.010532871820032597, "vf_loss": 17.772741317749023, "vf_explained_var": 0.41850244998931885, "kl": 0.0012102305190637708, "entropy": 1.6820656061172485, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 460800, "episodes_total": 1152, "training_iteration": 36, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-02-29", "timestamp": 1660248149, "time_this_iter_s": 1777.6666460037231, "time_total_s": 6570.11390209198, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6570.11390209198, "timesteps_since_restore": 460800, "iterations_since_restore": 36, "perf": {"cpu_util_percent": 79.74032921810701, "ram_util_percent": 58.72098765432099}}
-{"episode_reward_max": 195.0, "episode_reward_min": 9.0, "episode_reward_mean": 82.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 98.0}, "policy_reward_mean": {"ppo": 41.135}, "custom_metrics": {"sparse_reward_mean": 16.0, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 50.27, "shaped_reward_min": 9, "shaped_reward_max": 89, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.38, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.68, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 4.88, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 6.05, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.31, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 3.66, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.96, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.49, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.05, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 1.05, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.82, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 12, "useful_dish_drop_agent_0_mean": 0.62, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.82, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 12, "soup_pickup_agent_1_mean": 2.31, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.57, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.45, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.13, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 9, "soup_drop_agent_1_mean": 0.77, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 3.66, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.96, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.66, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.96, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [118.0, 107.0, 31.0, 150.0, 71.0, 58.0, 88.0, 100.0, 155.0, 98.0, 42.0, 110.0, 38.0, 94.0, 61.0, 77.0, 59.0, 66.0, 65.0, 36.0, 130.0, 67.0, 112.0, 72.0, 164.0, 45.0, 111.0, 149.0, 37.0, 82.0, 93.0, 98.0, 93.0, 23.0, 103.0, 90.0, 42.0, 125.0, 20.0, 87.0, 46.0, 53.0, 93.0, 74.0, 121.0, 84.0, 139.0, 75.0, 57.0, 48.0, 146.0, 44.0, 61.0, 90.0, 20.0, 95.0, 31.0, 125.0, 145.0, 98.0, 129.0, 68.0, 66.0, 48.0, 105.0, 87.0, 47.0, 98.0, 31.0, 142.0, 23.0, 96.0, 64.0, 98.0, 65.0, 112.0, 36.0, 23.0, 42.0, 113.0, 33.0, 41.0, 98.0, 148.0, 130.0, 119.0, 39.0, 88.0, 42.0, 142.0, 105.0, 120.0, 179.0, 195.0, 12.0, 12.0, 56.0, 60.0, 9.0, 94.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [61.0, 57.0, 48.0, 59.0, 16.0, 15.0, 60.0, 90.0, 26.0, 45.0, 40.0, 18.0, 45.0, 43.0, 63.0, 37.0, 78.0, 77.0, 51.0, 47.0, 11.0, 31.0, 60.0, 50.0, 30.0, 8.0, 48.0, 46.0, 37.0, 24.0, 37.0, 40.0, 25.0, 34.0, 29.0, 37.0, 39.0, 26.0, 19.0, 17.0, 62.0, 68.0, 44.0, 23.0, 61.0, 51.0, 39.0, 33.0, 79.0, 85.0, 17.0, 28.0, 56.0, 55.0, 73.0, 76.0, 12.0, 25.0, 26.0, 56.0, 55.0, 38.0, 48.0, 50.0, 48.0, 45.0, 12.0, 11.0, 49.0, 54.0, 41.0, 49.0, 19.0, 23.0, 67.0, 58.0, 11.0, 9.0, 45.0, 42.0, 16.0, 30.0, 13.0, 40.0, 35.0, 58.0, 37.0, 37.0, 62.0, 59.0, 33.0, 51.0, 72.0, 67.0, 25.0, 50.0, 28.0, 29.0, 27.0, 21.0, 73.0, 73.0, 14.0, 30.0, 32.0, 29.0, 48.0, 42.0, 14.0, 6.0, 50.0, 45.0, 11.0, 20.0, 71.0, 54.0, 73.0, 72.0, 56.0, 42.0, 64.0, 65.0, 31.0, 37.0, 26.0, 40.0, 20.0, 28.0, 54.0, 51.0, 41.0, 46.0, 15.0, 32.0, 53.0, 45.0, 15.0, 16.0, 76.0, 66.0, 9.0, 14.0, 45.0, 51.0, 33.0, 31.0, 43.0, 55.0, 34.0, 31.0, 60.0, 52.0, 10.0, 26.0, 3.0, 20.0, 22.0, 20.0, 59.0, 54.0, 17.0, 16.0, 22.0, 19.0, 46.0, 52.0, 74.0, 74.0, 59.0, 71.0, 54.0, 65.0, 20.0, 19.0, 47.0, 41.0, 26.0, 16.0, 73.0, 69.0, 49.0, 56.0, 59.0, 61.0, 81.0, 98.0, 98.0, 97.0, 3.0, 9.0, 6.0, 6.0, 30.0, 26.0, 32.0, 28.0, 6.0, 3.0, 46.0, 48.0]}, "sampler_perf": {"mean_env_wait_ms": 4.550001067823368, "mean_processing_ms": 0.9407599736993785, "mean_inference_ms": 4.060064536997679}, "off_policy_estimator": {}, "info": {"num_steps_trained": 888000, "num_steps_sampled": 473600, "sample_time_ms": 197652.347, "load_time_ms": 38.247, "grad_time_ms": 142451.276, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.9103830890414573e-12, "cur_lr": 0.0010000000474974513, "total_loss": -0.00908196996897459, "policy_loss": -0.009920346550643444, "vf_loss": 16.691673278808594, "vf_explained_var": 0.3790724277496338, "kl": 0.0013888808898627758, "entropy": 1.661569595336914, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 473600, "episodes_total": 1184, "training_iteration": 37, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-03-05", "timestamp": 1660248185, "time_this_iter_s": 36.35908007621765, "time_total_s": 6606.472982168198, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6606.472982168198, "timesteps_since_restore": 473600, "iterations_since_restore": 37, "perf": {"cpu_util_percent": 52.89999999999999, "ram_util_percent": 59.76923076923076}}
-{"episode_reward_max": 195.0, "episode_reward_min": 9.0, "episode_reward_mean": 82.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 98.0}, "policy_reward_mean": {"ppo": 41.22}, "custom_metrics": {"sparse_reward_mean": 15.6, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 51.24, "shaped_reward_min": 9, "shaped_reward_max": 84, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.6, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.44, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 4.92, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.88, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.45, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 3.85, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.87, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.4, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.09, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.1, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.52, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.59, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.75, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 2.47, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.51, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.42, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 9, "soup_drop_agent_1_mean": 0.97, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 3.85, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.87, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.85, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.87, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [155.0, 107.0, 66.0, 64.0, 59.0, 130.0, 80.0, 101.0, 98.0, 109.0, 71.0, 68.0, 84.0, 79.0, 57.0, 20.0, 37.0, 82.0, 48.0, 64.0, 84.0, 113.0, 101.0, 62.0, 133.0, 71.0, 23.0, 109.0, 125.0, 23.0, 58.0, 75.0, 105.0, 87.0, 47.0, 98.0, 31.0, 142.0, 23.0, 96.0, 64.0, 98.0, 65.0, 112.0, 36.0, 23.0, 42.0, 113.0, 33.0, 41.0, 98.0, 148.0, 130.0, 119.0, 39.0, 88.0, 42.0, 142.0, 105.0, 120.0, 179.0, 195.0, 12.0, 12.0, 56.0, 60.0, 9.0, 94.0, 118.0, 107.0, 31.0, 150.0, 71.0, 58.0, 88.0, 100.0, 155.0, 98.0, 42.0, 110.0, 38.0, 94.0, 61.0, 77.0, 59.0, 66.0, 65.0, 36.0, 130.0, 67.0, 112.0, 72.0, 164.0, 45.0, 111.0, 149.0, 37.0, 82.0, 93.0, 98.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [82.0, 73.0, 64.0, 43.0, 37.0, 29.0, 30.0, 34.0, 28.0, 31.0, 62.0, 68.0, 37.0, 43.0, 50.0, 51.0, 44.0, 54.0, 50.0, 59.0, 42.0, 29.0, 29.0, 39.0, 42.0, 42.0, 34.0, 45.0, 37.0, 20.0, 8.0, 12.0, 20.0, 17.0, 42.0, 40.0, 22.0, 26.0, 38.0, 26.0, 34.0, 50.0, 54.0, 59.0, 55.0, 46.0, 40.0, 22.0, 60.0, 73.0, 37.0, 34.0, 11.0, 12.0, 51.0, 58.0, 56.0, 69.0, 17.0, 6.0, 28.0, 30.0, 39.0, 36.0, 54.0, 51.0, 41.0, 46.0, 15.0, 32.0, 53.0, 45.0, 15.0, 16.0, 76.0, 66.0, 9.0, 14.0, 45.0, 51.0, 33.0, 31.0, 43.0, 55.0, 34.0, 31.0, 60.0, 52.0, 10.0, 26.0, 3.0, 20.0, 22.0, 20.0, 59.0, 54.0, 17.0, 16.0, 22.0, 19.0, 46.0, 52.0, 74.0, 74.0, 59.0, 71.0, 54.0, 65.0, 20.0, 19.0, 47.0, 41.0, 26.0, 16.0, 73.0, 69.0, 49.0, 56.0, 59.0, 61.0, 81.0, 98.0, 98.0, 97.0, 3.0, 9.0, 6.0, 6.0, 30.0, 26.0, 32.0, 28.0, 6.0, 3.0, 46.0, 48.0, 61.0, 57.0, 48.0, 59.0, 16.0, 15.0, 60.0, 90.0, 26.0, 45.0, 40.0, 18.0, 45.0, 43.0, 63.0, 37.0, 78.0, 77.0, 51.0, 47.0, 11.0, 31.0, 60.0, 50.0, 30.0, 8.0, 48.0, 46.0, 37.0, 24.0, 37.0, 40.0, 25.0, 34.0, 29.0, 37.0, 39.0, 26.0, 19.0, 17.0, 62.0, 68.0, 44.0, 23.0, 61.0, 51.0, 39.0, 33.0, 79.0, 85.0, 17.0, 28.0, 56.0, 55.0, 73.0, 76.0, 12.0, 25.0, 26.0, 56.0, 55.0, 38.0, 48.0, 50.0]}, "sampler_perf": {"mean_env_wait_ms": 4.462790236915632, "mean_processing_ms": 0.9577209626577212, "mean_inference_ms": 4.91290526345304}, "off_policy_estimator": {}, "info": {"num_steps_trained": 912000, "num_steps_sampled": 486400, "sample_time_ms": 197395.402, "load_time_ms": 38.358, "grad_time_ms": 142364.304, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.4551915445207286e-12, "cur_lr": 0.0010000000474974513, "total_loss": -0.0073294141329824924, "policy_loss": -0.007997877895832062, "vf_loss": 15.018708229064941, "vf_explained_var": 0.4496181905269623, "kl": 0.0011589183704927564, "entropy": 1.666812539100647, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 486400, "episodes_total": 1216, "training_iteration": 38, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-03-36", "timestamp": 1660248216, "time_this_iter_s": 30.582061052322388, "time_total_s": 6637.05504322052, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6637.05504322052, "timesteps_since_restore": 486400, "iterations_since_restore": 38, "perf": {"cpu_util_percent": 42.890697674418604, "ram_util_percent": 58.16976744186046}}
-{"episode_reward_max": 164.0, "episode_reward_min": 9.0, "episode_reward_mean": 80.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 90.0}, "policy_reward_mean": {"ppo": 40.455}, "custom_metrics": {"sparse_reward_mean": 14.0, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 52.91, "shaped_reward_min": 9, "shaped_reward_max": 87, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.33, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.33, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 4.82, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.89, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.99, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 3.84, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.96, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.6, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.1, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.2, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.0, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.6, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.66, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.88, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 2.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.56, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.41, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.1, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 3.84, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.96, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.84, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.96, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [95.0, 36.0, 158.0, 96.0, 95.0, 45.0, 66.0, 127.0, 84.0, 147.0, 34.0, 84.0, 96.0, 118.0, 42.0, 112.0, 112.0, 11.0, 28.0, 66.0, 42.0, 39.0, 85.0, 137.0, 96.0, 31.0, 92.0, 60.0, 66.0, 72.0, 56.0, 104.0, 56.0, 60.0, 9.0, 94.0, 118.0, 107.0, 31.0, 150.0, 71.0, 58.0, 88.0, 100.0, 155.0, 98.0, 42.0, 110.0, 38.0, 94.0, 61.0, 77.0, 59.0, 66.0, 65.0, 36.0, 130.0, 67.0, 112.0, 72.0, 164.0, 45.0, 111.0, 149.0, 37.0, 82.0, 93.0, 98.0, 155.0, 107.0, 66.0, 64.0, 59.0, 130.0, 80.0, 101.0, 98.0, 109.0, 71.0, 68.0, 84.0, 79.0, 57.0, 20.0, 37.0, 82.0, 48.0, 64.0, 84.0, 113.0, 101.0, 62.0, 133.0, 71.0, 23.0, 109.0, 125.0, 23.0, 58.0, 75.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [50.0, 45.0, 6.0, 30.0, 84.0, 74.0, 46.0, 50.0, 45.0, 50.0, 20.0, 25.0, 33.0, 33.0, 67.0, 60.0, 42.0, 42.0, 66.0, 81.0, 14.0, 20.0, 37.0, 47.0, 50.0, 46.0, 62.0, 56.0, 17.0, 25.0, 41.0, 71.0, 45.0, 67.0, 8.0, 3.0, 16.0, 12.0, 29.0, 37.0, 25.0, 17.0, 28.0, 11.0, 52.0, 33.0, 65.0, 72.0, 47.0, 49.0, 22.0, 9.0, 31.0, 61.0, 30.0, 30.0, 34.0, 32.0, 28.0, 44.0, 22.0, 34.0, 51.0, 53.0, 30.0, 26.0, 32.0, 28.0, 6.0, 3.0, 46.0, 48.0, 61.0, 57.0, 48.0, 59.0, 16.0, 15.0, 60.0, 90.0, 26.0, 45.0, 40.0, 18.0, 45.0, 43.0, 63.0, 37.0, 78.0, 77.0, 51.0, 47.0, 11.0, 31.0, 60.0, 50.0, 30.0, 8.0, 48.0, 46.0, 37.0, 24.0, 37.0, 40.0, 25.0, 34.0, 29.0, 37.0, 39.0, 26.0, 19.0, 17.0, 62.0, 68.0, 44.0, 23.0, 61.0, 51.0, 39.0, 33.0, 79.0, 85.0, 17.0, 28.0, 56.0, 55.0, 73.0, 76.0, 12.0, 25.0, 26.0, 56.0, 55.0, 38.0, 48.0, 50.0, 82.0, 73.0, 64.0, 43.0, 37.0, 29.0, 30.0, 34.0, 28.0, 31.0, 62.0, 68.0, 37.0, 43.0, 50.0, 51.0, 44.0, 54.0, 50.0, 59.0, 42.0, 29.0, 29.0, 39.0, 42.0, 42.0, 34.0, 45.0, 37.0, 20.0, 8.0, 12.0, 20.0, 17.0, 42.0, 40.0, 22.0, 26.0, 38.0, 26.0, 34.0, 50.0, 54.0, 59.0, 55.0, 46.0, 40.0, 22.0, 60.0, 73.0, 37.0, 34.0, 11.0, 12.0, 51.0, 58.0, 56.0, 69.0, 17.0, 6.0, 28.0, 30.0, 39.0, 36.0]}, "sampler_perf": {"mean_env_wait_ms": 4.361961744597006, "mean_processing_ms": 0.9376235930507917, "mean_inference_ms": 4.9270347290029886}, "off_policy_estimator": {}, "info": {"num_steps_trained": 936000, "num_steps_sampled": 499200, "sample_time_ms": 197058.326, "load_time_ms": 38.236, "grad_time_ms": 142247.976, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 7.275957722603643e-13, "cur_lr": 0.0010000000474974513, "total_loss": -0.009562704712152481, "policy_loss": -0.010270781815052032, "vf_loss": 15.400076866149902, "vf_explained_var": 0.39905285835266113, "kl": 0.0014264689525589347, "entropy": 1.6638473272323608, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 499200, "episodes_total": 1248, "training_iteration": 39, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-04-04", "timestamp": 1660248244, "time_this_iter_s": 28.12965416908264, "time_total_s": 6665.184697389603, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6665.184697389603, "timesteps_since_restore": 499200, "iterations_since_restore": 39, "perf": {"cpu_util_percent": 32.9825, "ram_util_percent": 58.30499999999999}}
-{"episode_reward_max": 193.0, "episode_reward_min": 9.0, "episode_reward_mean": 81.89, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 98.0}, "policy_reward_mean": {"ppo": 40.945}, "custom_metrics": {"sparse_reward_mean": 14.4, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 53.09, "shaped_reward_min": 9, "shaped_reward_max": 87, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.34, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.25, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 4.88, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.78, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 2.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 3.9, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.92, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 12, "dish_pickup_agent_0_mean": 4.72, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.2, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.07, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.98, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.64, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.67, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.82, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 2.74, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.41, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.51, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.9, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.92, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 12, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.9, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.92, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 12, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [63.0, 122.0, 107.0, 193.0, 107.0, 44.0, 20.0, 144.0, 80.0, 53.0, 156.0, 100.0, 55.0, 74.0, 89.0, 9.0, 69.0, 52.0, 50.0, 96.0, 115.0, 58.0, 87.0, 144.0, 92.0, 20.0, 63.0, 81.0, 115.0, 84.0, 99.0, 150.0, 37.0, 82.0, 93.0, 98.0, 155.0, 107.0, 66.0, 64.0, 59.0, 130.0, 80.0, 101.0, 98.0, 109.0, 71.0, 68.0, 84.0, 79.0, 57.0, 20.0, 37.0, 82.0, 48.0, 64.0, 84.0, 113.0, 101.0, 62.0, 133.0, 71.0, 23.0, 109.0, 125.0, 23.0, 58.0, 75.0, 95.0, 36.0, 158.0, 96.0, 95.0, 45.0, 66.0, 127.0, 84.0, 147.0, 34.0, 84.0, 96.0, 118.0, 42.0, 112.0, 112.0, 11.0, 28.0, 66.0, 42.0, 39.0, 85.0, 137.0, 96.0, 31.0, 92.0, 60.0, 66.0, 72.0, 56.0, 104.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [31.0, 32.0, 55.0, 67.0, 50.0, 57.0, 95.0, 98.0, 52.0, 55.0, 14.0, 30.0, 6.0, 14.0, 78.0, 66.0, 40.0, 40.0, 20.0, 33.0, 75.0, 81.0, 47.0, 53.0, 27.0, 28.0, 39.0, 35.0, 49.0, 40.0, 6.0, 3.0, 41.0, 28.0, 27.0, 25.0, 30.0, 20.0, 54.0, 42.0, 59.0, 56.0, 27.0, 31.0, 37.0, 50.0, 76.0, 68.0, 37.0, 55.0, 5.0, 15.0, 24.0, 39.0, 43.0, 38.0, 56.0, 59.0, 32.0, 52.0, 50.0, 49.0, 70.0, 80.0, 12.0, 25.0, 26.0, 56.0, 55.0, 38.0, 48.0, 50.0, 82.0, 73.0, 64.0, 43.0, 37.0, 29.0, 30.0, 34.0, 28.0, 31.0, 62.0, 68.0, 37.0, 43.0, 50.0, 51.0, 44.0, 54.0, 50.0, 59.0, 42.0, 29.0, 29.0, 39.0, 42.0, 42.0, 34.0, 45.0, 37.0, 20.0, 8.0, 12.0, 20.0, 17.0, 42.0, 40.0, 22.0, 26.0, 38.0, 26.0, 34.0, 50.0, 54.0, 59.0, 55.0, 46.0, 40.0, 22.0, 60.0, 73.0, 37.0, 34.0, 11.0, 12.0, 51.0, 58.0, 56.0, 69.0, 17.0, 6.0, 28.0, 30.0, 39.0, 36.0, 50.0, 45.0, 6.0, 30.0, 84.0, 74.0, 46.0, 50.0, 45.0, 50.0, 20.0, 25.0, 33.0, 33.0, 67.0, 60.0, 42.0, 42.0, 66.0, 81.0, 14.0, 20.0, 37.0, 47.0, 50.0, 46.0, 62.0, 56.0, 17.0, 25.0, 41.0, 71.0, 45.0, 67.0, 8.0, 3.0, 16.0, 12.0, 29.0, 37.0, 25.0, 17.0, 28.0, 11.0, 52.0, 33.0, 65.0, 72.0, 47.0, 49.0, 22.0, 9.0, 31.0, 61.0, 30.0, 30.0, 34.0, 32.0, 28.0, 44.0, 22.0, 34.0, 51.0, 53.0]}, "sampler_perf": {"mean_env_wait_ms": 4.262822214946647, "mean_processing_ms": 0.9177406233023881, "mean_inference_ms": 4.823257056931315}, "off_policy_estimator": {}, "info": {"num_steps_trained": 960000, "num_steps_sampled": 512000, "sample_time_ms": 196701.65, "load_time_ms": 38.048, "grad_time_ms": 142153.928, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.6379788613018216e-13, "cur_lr": 0.0010000000474974513, "total_loss": -0.008069280534982681, "policy_loss": -0.008976585231721401, "vf_loss": 17.312698364257812, "vf_explained_var": 0.4009813070297241, "kl": 0.0012740670936182141, "entropy": 1.647910237312317, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 512000, "episodes_total": 1280, "training_iteration": 40, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-04-32", "timestamp": 1660248272, "time_this_iter_s": 27.727252960205078, "time_total_s": 6692.911950349808, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6692.911950349808, "timesteps_since_restore": 512000, "iterations_since_restore": 40, "perf": {"cpu_util_percent": 32.13, "ram_util_percent": 58.30499999999999}}
-{"episode_reward_max": 213.0, "episode_reward_min": 6.0, "episode_reward_mean": 85.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 113.0}, "policy_reward_mean": {"ppo": 42.77}, "custom_metrics": {"sparse_reward_mean": 14.8, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 55.94, "shaped_reward_min": 6, "shaped_reward_max": 101, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.53, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.11, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.17, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.71, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 1.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.9, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.34, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.79, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 12, "dish_pickup_agent_0_mean": 4.45, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.39, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 1.11, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 1.14, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.99, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.99, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.55, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.54, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.81, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.11, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.45, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.72, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.15, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 1.23, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 4.34, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.79, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 12, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.34, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.79, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 12, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [104.0, 141.0, 53.0, 70.0, 96.0, 127.0, 58.0, 31.0, 6.0, 52.0, 141.0, 55.0, 64.0, 88.0, 148.0, 69.0, 187.0, 50.0, 124.0, 107.0, 93.0, 99.0, 116.0, 72.0, 89.0, 42.0, 146.0, 213.0, 118.0, 82.0, 81.0, 28.0, 125.0, 23.0, 58.0, 75.0, 95.0, 36.0, 158.0, 96.0, 95.0, 45.0, 66.0, 127.0, 84.0, 147.0, 34.0, 84.0, 96.0, 118.0, 42.0, 112.0, 112.0, 11.0, 28.0, 66.0, 42.0, 39.0, 85.0, 137.0, 96.0, 31.0, 92.0, 60.0, 66.0, 72.0, 56.0, 104.0, 63.0, 122.0, 107.0, 193.0, 107.0, 44.0, 20.0, 144.0, 80.0, 53.0, 156.0, 100.0, 55.0, 74.0, 89.0, 9.0, 69.0, 52.0, 50.0, 96.0, 115.0, 58.0, 87.0, 144.0, 92.0, 20.0, 63.0, 81.0, 115.0, 84.0, 99.0, 150.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [48.0, 56.0, 68.0, 73.0, 25.0, 28.0, 42.0, 28.0, 53.0, 43.0, 58.0, 69.0, 34.0, 24.0, 19.0, 12.0, 3.0, 3.0, 23.0, 29.0, 73.0, 68.0, 21.0, 34.0, 42.0, 22.0, 35.0, 53.0, 68.0, 80.0, 41.0, 28.0, 99.0, 88.0, 30.0, 20.0, 60.0, 64.0, 59.0, 48.0, 48.0, 45.0, 53.0, 46.0, 57.0, 59.0, 37.0, 35.0, 44.0, 45.0, 20.0, 22.0, 76.0, 70.0, 100.0, 113.0, 57.0, 61.0, 48.0, 34.0, 31.0, 50.0, 3.0, 25.0, 56.0, 69.0, 17.0, 6.0, 28.0, 30.0, 39.0, 36.0, 50.0, 45.0, 6.0, 30.0, 84.0, 74.0, 46.0, 50.0, 45.0, 50.0, 20.0, 25.0, 33.0, 33.0, 67.0, 60.0, 42.0, 42.0, 66.0, 81.0, 14.0, 20.0, 37.0, 47.0, 50.0, 46.0, 62.0, 56.0, 17.0, 25.0, 41.0, 71.0, 45.0, 67.0, 8.0, 3.0, 16.0, 12.0, 29.0, 37.0, 25.0, 17.0, 28.0, 11.0, 52.0, 33.0, 65.0, 72.0, 47.0, 49.0, 22.0, 9.0, 31.0, 61.0, 30.0, 30.0, 34.0, 32.0, 28.0, 44.0, 22.0, 34.0, 51.0, 53.0, 31.0, 32.0, 55.0, 67.0, 50.0, 57.0, 95.0, 98.0, 52.0, 55.0, 14.0, 30.0, 6.0, 14.0, 78.0, 66.0, 40.0, 40.0, 20.0, 33.0, 75.0, 81.0, 47.0, 53.0, 27.0, 28.0, 39.0, 35.0, 49.0, 40.0, 6.0, 3.0, 41.0, 28.0, 27.0, 25.0, 30.0, 20.0, 54.0, 42.0, 59.0, 56.0, 27.0, 31.0, 37.0, 50.0, 76.0, 68.0, 37.0, 55.0, 5.0, 15.0, 24.0, 39.0, 43.0, 38.0, 56.0, 59.0, 32.0, 52.0, 50.0, 49.0, 70.0, 80.0]}, "sampler_perf": {"mean_env_wait_ms": 4.168357407076552, "mean_processing_ms": 0.8988004870947216, "mean_inference_ms": 4.723239868801954}, "off_policy_estimator": {}, "info": {"num_steps_trained": 984000, "num_steps_sampled": 524800, "sample_time_ms": 196459.456, "load_time_ms": 38.195, "grad_time_ms": 142037.515, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.8189894306509108e-13, "cur_lr": 0.0010000000474974513, "total_loss": -0.008718971163034439, "policy_loss": -0.009683111682534218, "vf_loss": 17.845956802368164, "vf_explained_var": 0.43686649203300476, "kl": 0.0014183915918692946, "entropy": 1.6409085988998413, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 524800, "episodes_total": 1312, "training_iteration": 41, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-05-00", "timestamp": 1660248300, "time_this_iter_s": 27.954697370529175, "time_total_s": 6720.866647720337, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6720.866647720337, "timesteps_since_restore": 524800, "iterations_since_restore": 41, "perf": {"cpu_util_percent": 35.58461538461538, "ram_util_percent": 58.16923076923076}}
-{"episode_reward_max": 213.0, "episode_reward_min": 6.0, "episode_reward_mean": 92.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 116.0}, "policy_reward_mean": {"ppo": 46.035}, "custom_metrics": {"sparse_reward_mean": 16.8, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 58.47, "shaped_reward_min": 6, "shaped_reward_max": 101, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.0, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 7.36, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.54, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.75, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 1.99, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.0, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.6, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.96, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 12, "dish_pickup_agent_0_mean": 4.36, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.4, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 1.2, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.18, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.9, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.64, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.48, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 2.87, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 12, "soup_pickup_agent_1_mean": 3.27, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.58, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.83, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.25, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 4.6, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.96, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 12, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.6, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.96, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 12, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [73.0, 64.0, 96.0, 101.0, 66.0, 168.0, 146.0, 144.0, 87.0, 61.0, 95.0, 58.0, 122.0, 91.0, 58.0, 138.0, 78.0, 58.0, 58.0, 36.0, 110.0, 76.0, 99.0, 169.0, 167.0, 201.0, 55.0, 104.0, 212.0, 31.0, 115.0, 31.0, 66.0, 72.0, 56.0, 104.0, 63.0, 122.0, 107.0, 193.0, 107.0, 44.0, 20.0, 144.0, 80.0, 53.0, 156.0, 100.0, 55.0, 74.0, 89.0, 9.0, 69.0, 52.0, 50.0, 96.0, 115.0, 58.0, 87.0, 144.0, 92.0, 20.0, 63.0, 81.0, 115.0, 84.0, 99.0, 150.0, 104.0, 141.0, 53.0, 70.0, 96.0, 127.0, 58.0, 31.0, 6.0, 52.0, 141.0, 55.0, 64.0, 88.0, 148.0, 69.0, 187.0, 50.0, 124.0, 107.0, 93.0, 99.0, 116.0, 72.0, 89.0, 42.0, 146.0, 213.0, 118.0, 82.0, 81.0, 28.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [41.0, 32.0, 30.0, 34.0, 43.0, 53.0, 48.0, 53.0, 29.0, 37.0, 91.0, 77.0, 76.0, 70.0, 68.0, 76.0, 51.0, 36.0, 28.0, 33.0, 54.0, 41.0, 19.0, 39.0, 62.0, 60.0, 48.0, 43.0, 30.0, 28.0, 57.0, 81.0, 42.0, 36.0, 30.0, 28.0, 34.0, 24.0, 15.0, 21.0, 57.0, 53.0, 49.0, 27.0, 50.0, 49.0, 76.0, 93.0, 83.0, 84.0, 85.0, 116.0, 35.0, 20.0, 41.0, 63.0, 105.0, 107.0, 17.0, 14.0, 56.0, 59.0, 8.0, 23.0, 34.0, 32.0, 28.0, 44.0, 22.0, 34.0, 51.0, 53.0, 31.0, 32.0, 55.0, 67.0, 50.0, 57.0, 95.0, 98.0, 52.0, 55.0, 14.0, 30.0, 6.0, 14.0, 78.0, 66.0, 40.0, 40.0, 20.0, 33.0, 75.0, 81.0, 47.0, 53.0, 27.0, 28.0, 39.0, 35.0, 49.0, 40.0, 6.0, 3.0, 41.0, 28.0, 27.0, 25.0, 30.0, 20.0, 54.0, 42.0, 59.0, 56.0, 27.0, 31.0, 37.0, 50.0, 76.0, 68.0, 37.0, 55.0, 5.0, 15.0, 24.0, 39.0, 43.0, 38.0, 56.0, 59.0, 32.0, 52.0, 50.0, 49.0, 70.0, 80.0, 48.0, 56.0, 68.0, 73.0, 25.0, 28.0, 42.0, 28.0, 53.0, 43.0, 58.0, 69.0, 34.0, 24.0, 19.0, 12.0, 3.0, 3.0, 23.0, 29.0, 73.0, 68.0, 21.0, 34.0, 42.0, 22.0, 35.0, 53.0, 68.0, 80.0, 41.0, 28.0, 99.0, 88.0, 30.0, 20.0, 60.0, 64.0, 59.0, 48.0, 48.0, 45.0, 53.0, 46.0, 57.0, 59.0, 37.0, 35.0, 44.0, 45.0, 20.0, 22.0, 76.0, 70.0, 100.0, 113.0, 57.0, 61.0, 48.0, 34.0, 31.0, 50.0, 3.0, 25.0]}, "sampler_perf": {"mean_env_wait_ms": 4.078369518030482, "mean_processing_ms": 0.8807722404539655, "mean_inference_ms": 4.6286338379623215}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1008000, "num_steps_sampled": 537600, "sample_time_ms": 196166.577, "load_time_ms": 37.661, "grad_time_ms": 141994.493, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 9.094947153254554e-14, "cur_lr": 0.0010000000474974513, "total_loss": -0.00832280796021223, "policy_loss": -0.009453889913856983, "vf_loss": 19.490577697753906, "vf_explained_var": 0.44570884108543396, "kl": 0.0015499308938160539, "entropy": 1.6359552145004272, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 537600, "episodes_total": 1344, "training_iteration": 42, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-05-29", "timestamp": 1660248329, "time_this_iter_s": 29.278310775756836, "time_total_s": 6750.144958496094, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6750.144958496094, "timesteps_since_restore": 537600, "iterations_since_restore": 42, "perf": {"cpu_util_percent": 35.96428571428572, "ram_util_percent": 58.190476190476204}}
-{"episode_reward_max": 213.0, "episode_reward_min": 6.0, "episode_reward_mean": 92.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 116.0}, "policy_reward_mean": {"ppo": 46.195}, "custom_metrics": {"sparse_reward_mean": 17.2, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 57.99, "shaped_reward_min": 6, "shaped_reward_max": 101, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.96, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 7.36, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 5.46, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.68, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 1.94, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 4.56, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.96, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.25, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.38, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 1.21, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.2, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.64, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.47, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 2.99, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 12, "soup_pickup_agent_1_mean": 3.11, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.63, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.73, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.25, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.19, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 4.56, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.96, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.56, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.96, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [45.0, 93.0, 46.0, 61.0, 123.0, 57.0, 42.0, 23.0, 31.0, 128.0, 52.0, 85.0, 146.0, 137.0, 25.0, 112.0, 102.0, 119.0, 28.0, 84.0, 68.0, 101.0, 144.0, 92.0, 88.0, 58.0, 98.0, 109.0, 139.0, 91.0, 48.0, 98.0, 115.0, 84.0, 99.0, 150.0, 104.0, 141.0, 53.0, 70.0, 96.0, 127.0, 58.0, 31.0, 6.0, 52.0, 141.0, 55.0, 64.0, 88.0, 148.0, 69.0, 187.0, 50.0, 124.0, 107.0, 93.0, 99.0, 116.0, 72.0, 89.0, 42.0, 146.0, 213.0, 118.0, 82.0, 81.0, 28.0, 73.0, 64.0, 96.0, 101.0, 66.0, 168.0, 146.0, 144.0, 87.0, 61.0, 95.0, 58.0, 122.0, 91.0, 58.0, 138.0, 78.0, 58.0, 58.0, 36.0, 110.0, 76.0, 99.0, 169.0, 167.0, 201.0, 55.0, 104.0, 212.0, 31.0, 115.0, 31.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [31.0, 14.0, 37.0, 56.0, 21.0, 25.0, 41.0, 20.0, 65.0, 58.0, 19.0, 38.0, 16.0, 26.0, 9.0, 14.0, 14.0, 17.0, 49.0, 79.0, 22.0, 30.0, 43.0, 42.0, 72.0, 74.0, 63.0, 74.0, 19.0, 6.0, 56.0, 56.0, 55.0, 47.0, 55.0, 64.0, 11.0, 17.0, 31.0, 53.0, 37.0, 31.0, 53.0, 48.0, 66.0, 78.0, 53.0, 39.0, 42.0, 46.0, 19.0, 39.0, 52.0, 46.0, 53.0, 56.0, 65.0, 74.0, 42.0, 49.0, 19.0, 29.0, 50.0, 48.0, 56.0, 59.0, 32.0, 52.0, 50.0, 49.0, 70.0, 80.0, 48.0, 56.0, 68.0, 73.0, 25.0, 28.0, 42.0, 28.0, 53.0, 43.0, 58.0, 69.0, 34.0, 24.0, 19.0, 12.0, 3.0, 3.0, 23.0, 29.0, 73.0, 68.0, 21.0, 34.0, 42.0, 22.0, 35.0, 53.0, 68.0, 80.0, 41.0, 28.0, 99.0, 88.0, 30.0, 20.0, 60.0, 64.0, 59.0, 48.0, 48.0, 45.0, 53.0, 46.0, 57.0, 59.0, 37.0, 35.0, 44.0, 45.0, 20.0, 22.0, 76.0, 70.0, 100.0, 113.0, 57.0, 61.0, 48.0, 34.0, 31.0, 50.0, 3.0, 25.0, 41.0, 32.0, 30.0, 34.0, 43.0, 53.0, 48.0, 53.0, 29.0, 37.0, 91.0, 77.0, 76.0, 70.0, 68.0, 76.0, 51.0, 36.0, 28.0, 33.0, 54.0, 41.0, 19.0, 39.0, 62.0, 60.0, 48.0, 43.0, 30.0, 28.0, 57.0, 81.0, 42.0, 36.0, 30.0, 28.0, 34.0, 24.0, 15.0, 21.0, 57.0, 53.0, 49.0, 27.0, 50.0, 49.0, 76.0, 93.0, 83.0, 84.0, 85.0, 116.0, 35.0, 20.0, 41.0, 63.0, 105.0, 107.0, 17.0, 14.0, 56.0, 59.0, 8.0, 23.0]}, "sampler_perf": {"mean_env_wait_ms": 3.992670298084334, "mean_processing_ms": 0.8636158543743789, "mean_inference_ms": 4.538596678243932}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1032000, "num_steps_sampled": 550400, "sample_time_ms": 195606.597, "load_time_ms": 37.682, "grad_time_ms": 141791.558, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 4.547473576627277e-14, "cur_lr": 0.0010000000474974513, "total_loss": -0.009549283422529697, "policy_loss": -0.010450693778693676, "vf_loss": 17.197433471679688, "vf_explained_var": 0.4546402394771576, "kl": 0.00132859090808779, "entropy": 1.6366652250289917, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 550400, "episodes_total": 1376, "training_iteration": 43, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-05-57", "timestamp": 1660248357, "time_this_iter_s": 27.376117944717407, "time_total_s": 6777.521076440811, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6777.521076440811, "timesteps_since_restore": 550400, "iterations_since_restore": 43, "perf": {"cpu_util_percent": 34.52051282051282, "ram_util_percent": 58.123076923076916}}
-{"episode_reward_max": 212.0, "episode_reward_min": 9.0, "episode_reward_mean": 89.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 116.0}, "policy_reward_mean": {"ppo": 44.72}, "custom_metrics": {"sparse_reward_mean": 17.0, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 55.44, "shaped_reward_min": 9, "shaped_reward_max": 92, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.54, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 7.64, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 5.03, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 6.0, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 1.95, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 4.16, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 5.24, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.56, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.45, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 1.09, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.82, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.69, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 3.0, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 12, "soup_pickup_agent_1_mean": 2.76, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 1.68, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.64, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.22, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 0.96, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 4.16, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 5.24, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.16, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 5.24, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [147.0, 136.0, 50.0, 114.0, 28.0, 49.0, 161.0, 95.0, 135.0, 98.0, 121.0, 165.0, 12.0, 110.0, 104.0, 84.0, 103.0, 9.0, 66.0, 101.0, 31.0, 42.0, 34.0, 50.0, 90.0, 101.0, 33.0, 191.0, 47.0, 61.0, 144.0, 82.0, 118.0, 82.0, 81.0, 28.0, 73.0, 64.0, 96.0, 101.0, 66.0, 168.0, 146.0, 144.0, 87.0, 61.0, 95.0, 58.0, 122.0, 91.0, 58.0, 138.0, 78.0, 58.0, 58.0, 36.0, 110.0, 76.0, 99.0, 169.0, 167.0, 201.0, 55.0, 104.0, 212.0, 31.0, 115.0, 31.0, 45.0, 93.0, 46.0, 61.0, 123.0, 57.0, 42.0, 23.0, 31.0, 128.0, 52.0, 85.0, 146.0, 137.0, 25.0, 112.0, 102.0, 119.0, 28.0, 84.0, 68.0, 101.0, 144.0, 92.0, 88.0, 58.0, 98.0, 109.0, 139.0, 91.0, 48.0, 98.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [76.0, 71.0, 62.0, 74.0, 27.0, 23.0, 61.0, 53.0, 12.0, 16.0, 18.0, 31.0, 69.0, 92.0, 44.0, 51.0, 81.0, 54.0, 40.0, 58.0, 63.0, 58.0, 85.0, 80.0, 3.0, 9.0, 54.0, 56.0, 50.0, 54.0, 42.0, 42.0, 42.0, 61.0, 0.0, 9.0, 35.0, 31.0, 53.0, 48.0, 22.0, 9.0, 20.0, 22.0, 20.0, 14.0, 14.0, 36.0, 43.0, 47.0, 53.0, 48.0, 13.0, 20.0, 85.0, 106.0, 16.0, 31.0, 26.0, 35.0, 80.0, 64.0, 34.0, 48.0, 57.0, 61.0, 48.0, 34.0, 31.0, 50.0, 3.0, 25.0, 41.0, 32.0, 30.0, 34.0, 43.0, 53.0, 48.0, 53.0, 29.0, 37.0, 91.0, 77.0, 76.0, 70.0, 68.0, 76.0, 51.0, 36.0, 28.0, 33.0, 54.0, 41.0, 19.0, 39.0, 62.0, 60.0, 48.0, 43.0, 30.0, 28.0, 57.0, 81.0, 42.0, 36.0, 30.0, 28.0, 34.0, 24.0, 15.0, 21.0, 57.0, 53.0, 49.0, 27.0, 50.0, 49.0, 76.0, 93.0, 83.0, 84.0, 85.0, 116.0, 35.0, 20.0, 41.0, 63.0, 105.0, 107.0, 17.0, 14.0, 56.0, 59.0, 8.0, 23.0, 31.0, 14.0, 37.0, 56.0, 21.0, 25.0, 41.0, 20.0, 65.0, 58.0, 19.0, 38.0, 16.0, 26.0, 9.0, 14.0, 14.0, 17.0, 49.0, 79.0, 22.0, 30.0, 43.0, 42.0, 72.0, 74.0, 63.0, 74.0, 19.0, 6.0, 56.0, 56.0, 55.0, 47.0, 55.0, 64.0, 11.0, 17.0, 31.0, 53.0, 37.0, 31.0, 53.0, 48.0, 66.0, 78.0, 53.0, 39.0, 42.0, 46.0, 19.0, 39.0, 52.0, 46.0, 53.0, 56.0, 65.0, 74.0, 42.0, 49.0, 19.0, 29.0, 50.0, 48.0]}, "sampler_perf": {"mean_env_wait_ms": 3.9108563485299497, "mean_processing_ms": 0.8472286288222008, "mean_inference_ms": 4.453100666428265}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1056000, "num_steps_sampled": 563200, "sample_time_ms": 195418.359, "load_time_ms": 37.483, "grad_time_ms": 141705.307, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.2737367883136385e-14, "cur_lr": 0.0010000000474974513, "total_loss": -0.010453901253640652, "policy_loss": -0.011599976569414139, "vf_loss": 19.665088653564453, "vf_explained_var": 0.43753400444984436, "kl": 0.0012759790988638997, "entropy": 1.640870451927185, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 563200, "episodes_total": 1408, "training_iteration": 44, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-06-24", "timestamp": 1660248384, "time_this_iter_s": 27.344013929367065, "time_total_s": 6804.865090370178, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6804.865090370178, "timesteps_since_restore": 563200, "iterations_since_restore": 44, "perf": {"cpu_util_percent": 32.94102564102564, "ram_util_percent": 58.05128205128204}}
-{"episode_reward_max": 239.0, "episode_reward_min": 9.0, "episode_reward_mean": 90.38, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 127.0}, "policy_reward_mean": {"ppo": 45.19}, "custom_metrics": {"sparse_reward_mean": 19.0, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 52.38, "shaped_reward_min": 9, "shaped_reward_max": 92, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.55, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 7.59, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 5.0, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.95, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 2.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 4.08, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 5.02, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.4, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.28, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.02, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.74, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.74, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.79, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 2.8, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 2.43, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 1.55, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.53, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.16, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.8, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 4.08, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 5.02, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.08, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 5.02, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [239.0, 130.0, 113.0, 17.0, 130.0, 162.0, 144.0, 153.0, 152.0, 55.0, 105.0, 53.0, 182.0, 77.0, 161.0, 71.0, 31.0, 95.0, 93.0, 92.0, 25.0, 77.0, 12.0, 47.0, 77.0, 88.0, 193.0, 58.0, 115.0, 63.0, 74.0, 98.0, 212.0, 31.0, 115.0, 31.0, 45.0, 93.0, 46.0, 61.0, 123.0, 57.0, 42.0, 23.0, 31.0, 128.0, 52.0, 85.0, 146.0, 137.0, 25.0, 112.0, 102.0, 119.0, 28.0, 84.0, 68.0, 101.0, 144.0, 92.0, 88.0, 58.0, 98.0, 109.0, 139.0, 91.0, 48.0, 98.0, 147.0, 136.0, 50.0, 114.0, 28.0, 49.0, 161.0, 95.0, 135.0, 98.0, 121.0, 165.0, 12.0, 110.0, 104.0, 84.0, 103.0, 9.0, 66.0, 101.0, 31.0, 42.0, 34.0, 50.0, 90.0, 101.0, 33.0, 191.0, 47.0, 61.0, 144.0, 82.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [127.0, 112.0, 68.0, 62.0, 46.0, 67.0, 5.0, 12.0, 62.0, 68.0, 79.0, 83.0, 71.0, 73.0, 84.0, 69.0, 83.0, 69.0, 30.0, 25.0, 51.0, 54.0, 23.0, 30.0, 91.0, 91.0, 40.0, 37.0, 71.0, 90.0, 23.0, 48.0, 17.0, 14.0, 48.0, 47.0, 50.0, 43.0, 46.0, 46.0, 11.0, 14.0, 29.0, 48.0, 3.0, 9.0, 30.0, 17.0, 40.0, 37.0, 56.0, 32.0, 97.0, 96.0, 33.0, 25.0, 67.0, 48.0, 34.0, 29.0, 37.0, 37.0, 54.0, 44.0, 105.0, 107.0, 17.0, 14.0, 56.0, 59.0, 8.0, 23.0, 31.0, 14.0, 37.0, 56.0, 21.0, 25.0, 41.0, 20.0, 65.0, 58.0, 19.0, 38.0, 16.0, 26.0, 9.0, 14.0, 14.0, 17.0, 49.0, 79.0, 22.0, 30.0, 43.0, 42.0, 72.0, 74.0, 63.0, 74.0, 19.0, 6.0, 56.0, 56.0, 55.0, 47.0, 55.0, 64.0, 11.0, 17.0, 31.0, 53.0, 37.0, 31.0, 53.0, 48.0, 66.0, 78.0, 53.0, 39.0, 42.0, 46.0, 19.0, 39.0, 52.0, 46.0, 53.0, 56.0, 65.0, 74.0, 42.0, 49.0, 19.0, 29.0, 50.0, 48.0, 76.0, 71.0, 62.0, 74.0, 27.0, 23.0, 61.0, 53.0, 12.0, 16.0, 18.0, 31.0, 69.0, 92.0, 44.0, 51.0, 81.0, 54.0, 40.0, 58.0, 63.0, 58.0, 85.0, 80.0, 3.0, 9.0, 54.0, 56.0, 50.0, 54.0, 42.0, 42.0, 42.0, 61.0, 0.0, 9.0, 35.0, 31.0, 53.0, 48.0, 22.0, 9.0, 20.0, 22.0, 20.0, 14.0, 14.0, 36.0, 43.0, 47.0, 53.0, 48.0, 13.0, 20.0, 85.0, 106.0, 16.0, 31.0, 26.0, 35.0, 80.0, 64.0, 34.0, 48.0]}, "sampler_perf": {"mean_env_wait_ms": 3.83276732749063, "mean_processing_ms": 0.8315868647990772, "mean_inference_ms": 4.371610853440936}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1080000, "num_steps_sampled": 576000, "sample_time_ms": 158697.168, "load_time_ms": 37.561, "grad_time_ms": 45441.027, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.1368683941568192e-14, "cur_lr": 0.0010000000474974513, "total_loss": -0.007090561557561159, "policy_loss": -0.008278795517981052, "vf_loss": 20.059175491333008, "vf_explained_var": 0.4839383065700531, "kl": 0.0014106096932664514, "entropy": 1.63534414768219, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 576000, "episodes_total": 1440, "training_iteration": 45, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-06-54", "timestamp": 1660248414, "time_this_iter_s": 29.613693952560425, "time_total_s": 6834.478784322739, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6834.478784322739, "timesteps_since_restore": 576000, "iterations_since_restore": 45, "perf": {"cpu_util_percent": 32.607142857142854, "ram_util_percent": 58.099999999999994}}
-{"episode_reward_max": 239.0, "episode_reward_min": 9.0, "episode_reward_mean": 94.76, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 127.0}, "policy_reward_mean": {"ppo": 47.38}, "custom_metrics": {"sparse_reward_mean": 20.8, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 53.16, "shaped_reward_min": 9, "shaped_reward_max": 87, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.72, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 7.12, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 5.09, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 5.56, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 1.94, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.98, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 4.37, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.82, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.27, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.46, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.02, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.77, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.78, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 2.53, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 2.52, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 1.43, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.71, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.99, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.73, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 4.37, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.82, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.37, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.82, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [164.0, 92.0, 107.0, 36.0, 41.0, 99.0, 107.0, 73.0, 42.0, 204.0, 99.0, 139.0, 45.0, 93.0, 104.0, 93.0, 198.0, 148.0, 74.0, 66.0, 152.0, 50.0, 122.0, 112.0, 39.0, 87.0, 122.0, 34.0, 115.0, 84.0, 144.0, 39.0, 139.0, 91.0, 48.0, 98.0, 147.0, 136.0, 50.0, 114.0, 28.0, 49.0, 161.0, 95.0, 135.0, 98.0, 121.0, 165.0, 12.0, 110.0, 104.0, 84.0, 103.0, 9.0, 66.0, 101.0, 31.0, 42.0, 34.0, 50.0, 90.0, 101.0, 33.0, 191.0, 47.0, 61.0, 144.0, 82.0, 239.0, 130.0, 113.0, 17.0, 130.0, 162.0, 144.0, 153.0, 152.0, 55.0, 105.0, 53.0, 182.0, 77.0, 161.0, 71.0, 31.0, 95.0, 93.0, 92.0, 25.0, 77.0, 12.0, 47.0, 77.0, 88.0, 193.0, 58.0, 115.0, 63.0, 74.0, 98.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [91.0, 73.0, 32.0, 60.0, 45.0, 62.0, 22.0, 14.0, 14.0, 27.0, 53.0, 46.0, 59.0, 48.0, 37.0, 36.0, 17.0, 25.0, 92.0, 112.0, 42.0, 57.0, 65.0, 74.0, 17.0, 28.0, 45.0, 48.0, 43.0, 61.0, 43.0, 50.0, 97.0, 101.0, 72.0, 76.0, 38.0, 36.0, 32.0, 34.0, 81.0, 71.0, 27.0, 23.0, 60.0, 62.0, 59.0, 53.0, 27.0, 12.0, 37.0, 50.0, 58.0, 64.0, 12.0, 22.0, 51.0, 64.0, 45.0, 39.0, 74.0, 70.0, 25.0, 14.0, 65.0, 74.0, 42.0, 49.0, 19.0, 29.0, 50.0, 48.0, 76.0, 71.0, 62.0, 74.0, 27.0, 23.0, 61.0, 53.0, 12.0, 16.0, 18.0, 31.0, 69.0, 92.0, 44.0, 51.0, 81.0, 54.0, 40.0, 58.0, 63.0, 58.0, 85.0, 80.0, 3.0, 9.0, 54.0, 56.0, 50.0, 54.0, 42.0, 42.0, 42.0, 61.0, 0.0, 9.0, 35.0, 31.0, 53.0, 48.0, 22.0, 9.0, 20.0, 22.0, 20.0, 14.0, 14.0, 36.0, 43.0, 47.0, 53.0, 48.0, 13.0, 20.0, 85.0, 106.0, 16.0, 31.0, 26.0, 35.0, 80.0, 64.0, 34.0, 48.0, 127.0, 112.0, 68.0, 62.0, 46.0, 67.0, 5.0, 12.0, 62.0, 68.0, 79.0, 83.0, 71.0, 73.0, 84.0, 69.0, 83.0, 69.0, 30.0, 25.0, 51.0, 54.0, 23.0, 30.0, 91.0, 91.0, 40.0, 37.0, 71.0, 90.0, 23.0, 48.0, 17.0, 14.0, 48.0, 47.0, 50.0, 43.0, 46.0, 46.0, 11.0, 14.0, 29.0, 48.0, 3.0, 9.0, 30.0, 17.0, 40.0, 37.0, 56.0, 32.0, 97.0, 96.0, 33.0, 25.0, 67.0, 48.0, 34.0, 29.0, 37.0, 37.0, 54.0, 44.0]}, "sampler_perf": {"mean_env_wait_ms": 3.75819263232233, "mean_processing_ms": 0.8166563749373907, "mean_inference_ms": 4.294229000839884}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1104000, "num_steps_sampled": 588800, "sample_time_ms": 20385.134, "load_time_ms": 37.538, "grad_time_ms": 8901.075, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 5.684341970784096e-15, "cur_lr": 0.0010000000474974513, "total_loss": -0.00807119719684124, "policy_loss": -0.009133302606642246, "vf_loss": 18.76689338684082, "vf_explained_var": 0.5084854960441589, "kl": 0.0014663866022601724, "entropy": 1.6291638612747192, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 588800, "episodes_total": 1472, "training_iteration": 46, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-07-23", "timestamp": 1660248443, "time_this_iter_s": 29.134671926498413, "time_total_s": 6863.613456249237, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6863.613456249237, "timesteps_since_restore": 588800, "iterations_since_restore": 46, "perf": {"cpu_util_percent": 34.358536585365854, "ram_util_percent": 58.190243902439015}}
-{"episode_reward_max": 245.0, "episode_reward_min": 9.0, "episode_reward_mean": 94.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 135.0}, "policy_reward_mean": {"ppo": 47.0}, "custom_metrics": {"sparse_reward_mean": 21.0, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 52.0, "shaped_reward_min": 9, "shaped_reward_max": 90, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.57, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 6.59, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 5.07, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 5.13, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 1.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.65, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 4.41, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.63, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.29, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.49, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.0, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.84, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.78, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.32, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.43, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.31, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.65, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.89, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.71, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 4.41, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.63, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.41, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.63, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [245.0, 133.0, 23.0, 133.0, 37.0, 55.0, 22.0, 55.0, 69.0, 87.0, 68.0, 71.0, 9.0, 9.0, 51.0, 88.0, 136.0, 125.0, 118.0, 139.0, 95.0, 130.0, 67.0, 79.0, 38.0, 82.0, 20.0, 98.0, 190.0, 31.0, 153.0, 104.0, 47.0, 61.0, 144.0, 82.0, 239.0, 130.0, 113.0, 17.0, 130.0, 162.0, 144.0, 153.0, 152.0, 55.0, 105.0, 53.0, 182.0, 77.0, 161.0, 71.0, 31.0, 95.0, 93.0, 92.0, 25.0, 77.0, 12.0, 47.0, 77.0, 88.0, 193.0, 58.0, 115.0, 63.0, 74.0, 98.0, 164.0, 92.0, 107.0, 36.0, 41.0, 99.0, 107.0, 73.0, 42.0, 204.0, 99.0, 139.0, 45.0, 93.0, 104.0, 93.0, 198.0, 148.0, 74.0, 66.0, 152.0, 50.0, 122.0, 112.0, 39.0, 87.0, 122.0, 34.0, 115.0, 84.0, 144.0, 39.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [110.0, 135.0, 59.0, 74.0, 11.0, 12.0, 68.0, 65.0, 22.0, 15.0, 34.0, 21.0, 11.0, 11.0, 23.0, 32.0, 25.0, 44.0, 46.0, 41.0, 42.0, 26.0, 37.0, 34.0, 0.0, 9.0, 0.0, 9.0, 24.0, 27.0, 43.0, 45.0, 66.0, 70.0, 64.0, 61.0, 54.0, 64.0, 63.0, 76.0, 56.0, 39.0, 59.0, 71.0, 30.0, 37.0, 36.0, 43.0, 16.0, 22.0, 40.0, 42.0, 3.0, 17.0, 45.0, 53.0, 91.0, 99.0, 19.0, 12.0, 78.0, 75.0, 59.0, 45.0, 16.0, 31.0, 26.0, 35.0, 80.0, 64.0, 34.0, 48.0, 127.0, 112.0, 68.0, 62.0, 46.0, 67.0, 5.0, 12.0, 62.0, 68.0, 79.0, 83.0, 71.0, 73.0, 84.0, 69.0, 83.0, 69.0, 30.0, 25.0, 51.0, 54.0, 23.0, 30.0, 91.0, 91.0, 40.0, 37.0, 71.0, 90.0, 23.0, 48.0, 17.0, 14.0, 48.0, 47.0, 50.0, 43.0, 46.0, 46.0, 11.0, 14.0, 29.0, 48.0, 3.0, 9.0, 30.0, 17.0, 40.0, 37.0, 56.0, 32.0, 97.0, 96.0, 33.0, 25.0, 67.0, 48.0, 34.0, 29.0, 37.0, 37.0, 54.0, 44.0, 91.0, 73.0, 32.0, 60.0, 45.0, 62.0, 22.0, 14.0, 14.0, 27.0, 53.0, 46.0, 59.0, 48.0, 37.0, 36.0, 17.0, 25.0, 92.0, 112.0, 42.0, 57.0, 65.0, 74.0, 17.0, 28.0, 45.0, 48.0, 43.0, 61.0, 43.0, 50.0, 97.0, 101.0, 72.0, 76.0, 38.0, 36.0, 32.0, 34.0, 81.0, 71.0, 27.0, 23.0, 60.0, 62.0, 59.0, 53.0, 27.0, 12.0, 37.0, 50.0, 58.0, 64.0, 12.0, 22.0, 51.0, 64.0, 45.0, 39.0, 74.0, 70.0, 25.0, 14.0]}, "sampler_perf": {"mean_env_wait_ms": 3.6869910389254943, "mean_processing_ms": 0.8024023118044294, "mean_inference_ms": 4.221576344650746}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1128000, "num_steps_sampled": 601600, "sample_time_ms": 19943.652, "load_time_ms": 37.337, "grad_time_ms": 8965.172, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.842170985392048e-15, "cur_lr": 0.0010000000474974513, "total_loss": -0.007924961857497692, "policy_loss": -0.009038448333740234, "vf_loss": 19.34569549560547, "vf_explained_var": 0.504978597164154, "kl": 0.001396413892507553, "entropy": 1.6421631574630737, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 601600, "episodes_total": 1504, "training_iteration": 47, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-07-55", "timestamp": 1660248475, "time_this_iter_s": 32.583869218826294, "time_total_s": 6896.197325468063, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6896.197325468063, "timesteps_since_restore": 601600, "iterations_since_restore": 47, "perf": {"cpu_util_percent": 34.10869565217391, "ram_util_percent": 58.16956521739129}}
-{"episode_reward_max": 245.0, "episode_reward_min": 9.0, "episode_reward_mean": 101.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 135.0}, "policy_reward_mean": {"ppo": 50.81}, "custom_metrics": {"sparse_reward_mean": 22.8, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 56.02, "shaped_reward_min": 9, "shaped_reward_max": 98, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.76, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 6.77, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.56, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 5.52, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 1.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.89, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.87, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 4.79, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.97, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.5, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.66, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.03, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 1.0, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.83, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.39, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.68, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.42, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 1.81, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.82, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.79, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 4.79, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.97, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.79, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.97, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [173.0, 93.0, 109.0, 218.0, 177.0, 155.0, 136.0, 150.0, 96.0, 191.0, 93.0, 98.0, 150.0, 110.0, 76.0, 107.0, 121.0, 158.0, 139.0, 159.0, 38.0, 148.0, 47.0, 117.0, 141.0, 9.0, 141.0, 87.0, 148.0, 144.0, 93.0, 106.0, 115.0, 63.0, 74.0, 98.0, 164.0, 92.0, 107.0, 36.0, 41.0, 99.0, 107.0, 73.0, 42.0, 204.0, 99.0, 139.0, 45.0, 93.0, 104.0, 93.0, 198.0, 148.0, 74.0, 66.0, 152.0, 50.0, 122.0, 112.0, 39.0, 87.0, 122.0, 34.0, 115.0, 84.0, 144.0, 39.0, 245.0, 133.0, 23.0, 133.0, 37.0, 55.0, 22.0, 55.0, 69.0, 87.0, 68.0, 71.0, 9.0, 9.0, 51.0, 88.0, 136.0, 125.0, 118.0, 139.0, 95.0, 130.0, 67.0, 79.0, 38.0, 82.0, 20.0, 98.0, 190.0, 31.0, 153.0, 104.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [88.0, 85.0, 50.0, 43.0, 57.0, 52.0, 94.0, 124.0, 74.0, 103.0, 85.0, 70.0, 76.0, 60.0, 64.0, 86.0, 53.0, 43.0, 89.0, 102.0, 49.0, 44.0, 52.0, 46.0, 84.0, 66.0, 51.0, 59.0, 40.0, 36.0, 53.0, 54.0, 65.0, 56.0, 74.0, 84.0, 68.0, 71.0, 81.0, 78.0, 11.0, 27.0, 67.0, 81.0, 19.0, 28.0, 51.0, 66.0, 73.0, 68.0, 6.0, 3.0, 71.0, 70.0, 36.0, 51.0, 68.0, 80.0, 74.0, 70.0, 42.0, 51.0, 58.0, 48.0, 67.0, 48.0, 34.0, 29.0, 37.0, 37.0, 54.0, 44.0, 91.0, 73.0, 32.0, 60.0, 45.0, 62.0, 22.0, 14.0, 14.0, 27.0, 53.0, 46.0, 59.0, 48.0, 37.0, 36.0, 17.0, 25.0, 92.0, 112.0, 42.0, 57.0, 65.0, 74.0, 17.0, 28.0, 45.0, 48.0, 43.0, 61.0, 43.0, 50.0, 97.0, 101.0, 72.0, 76.0, 38.0, 36.0, 32.0, 34.0, 81.0, 71.0, 27.0, 23.0, 60.0, 62.0, 59.0, 53.0, 27.0, 12.0, 37.0, 50.0, 58.0, 64.0, 12.0, 22.0, 51.0, 64.0, 45.0, 39.0, 74.0, 70.0, 25.0, 14.0, 110.0, 135.0, 59.0, 74.0, 11.0, 12.0, 68.0, 65.0, 22.0, 15.0, 34.0, 21.0, 11.0, 11.0, 23.0, 32.0, 25.0, 44.0, 46.0, 41.0, 42.0, 26.0, 37.0, 34.0, 0.0, 9.0, 0.0, 9.0, 24.0, 27.0, 43.0, 45.0, 66.0, 70.0, 64.0, 61.0, 54.0, 64.0, 63.0, 76.0, 56.0, 39.0, 59.0, 71.0, 30.0, 37.0, 36.0, 43.0, 16.0, 22.0, 40.0, 42.0, 3.0, 17.0, 45.0, 53.0, 91.0, 99.0, 19.0, 12.0, 78.0, 75.0, 59.0, 45.0]}, "sampler_perf": {"mean_env_wait_ms": 3.619074133252118, "mean_processing_ms": 0.7888363298173107, "mean_inference_ms": 4.154787775271583}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1152000, "num_steps_sampled": 614400, "sample_time_ms": 20418.706, "load_time_ms": 37.151, "grad_time_ms": 9069.585, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.421085492696024e-15, "cur_lr": 0.0010000000474974513, "total_loss": -0.01129829604178667, "policy_loss": -0.012764283455908298, "vf_loss": 22.684043884277344, "vf_explained_var": 0.5366321206092834, "kl": 0.0014537613606080413, "entropy": 1.6048468351364136, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 614400, "episodes_total": 1536, "training_iteration": 48, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-08-32", "timestamp": 1660248512, "time_this_iter_s": 36.37463116645813, "time_total_s": 6932.5719566345215, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6932.5719566345215, "timesteps_since_restore": 614400, "iterations_since_restore": 48, "perf": {"cpu_util_percent": 36.49999999999999, "ram_util_percent": 58.29423076923076}}
-{"episode_reward_max": 245.0, "episode_reward_min": 9.0, "episode_reward_mean": 109.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 135.0}, "policy_reward_mean": {"ppo": 54.89}, "custom_metrics": {"sparse_reward_mean": 25.6, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 58.58, "shaped_reward_min": 9, "shaped_reward_max": 101, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.59, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 7.48, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.59, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 6.35, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 1.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.5, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.82, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.8, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 4.63, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.66, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.66, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.45, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.17, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.9, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.0, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.74, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.69, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.6, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.67, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.72, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.71, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.77, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.88, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 4.63, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.66, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.63, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.66, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [101.0, 110.0, 157.0, 167.0, 84.0, 71.0, 128.0, 42.0, 162.0, 184.0, 88.0, 108.0, 204.0, 95.0, 179.0, 155.0, 147.0, 121.0, 207.0, 76.0, 113.0, 39.0, 134.0, 75.0, 31.0, 9.0, 181.0, 162.0, 204.0, 187.0, 20.0, 167.0, 115.0, 84.0, 144.0, 39.0, 245.0, 133.0, 23.0, 133.0, 37.0, 55.0, 22.0, 55.0, 69.0, 87.0, 68.0, 71.0, 9.0, 9.0, 51.0, 88.0, 136.0, 125.0, 118.0, 139.0, 95.0, 130.0, 67.0, 79.0, 38.0, 82.0, 20.0, 98.0, 190.0, 31.0, 153.0, 104.0, 173.0, 93.0, 109.0, 218.0, 177.0, 155.0, 136.0, 150.0, 96.0, 191.0, 93.0, 98.0, 150.0, 110.0, 76.0, 107.0, 121.0, 158.0, 139.0, 159.0, 38.0, 148.0, 47.0, 117.0, 141.0, 9.0, 141.0, 87.0, 148.0, 144.0, 93.0, 106.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [54.0, 47.0, 50.0, 60.0, 71.0, 86.0, 76.0, 91.0, 46.0, 38.0, 30.0, 41.0, 65.0, 63.0, 24.0, 18.0, 75.0, 87.0, 85.0, 99.0, 42.0, 46.0, 53.0, 55.0, 105.0, 99.0, 50.0, 45.0, 96.0, 83.0, 76.0, 79.0, 69.0, 78.0, 67.0, 54.0, 99.0, 108.0, 30.0, 46.0, 53.0, 60.0, 28.0, 11.0, 57.0, 77.0, 22.0, 53.0, 20.0, 11.0, 3.0, 6.0, 98.0, 83.0, 92.0, 70.0, 93.0, 111.0, 85.0, 102.0, 17.0, 3.0, 82.0, 85.0, 51.0, 64.0, 45.0, 39.0, 74.0, 70.0, 25.0, 14.0, 110.0, 135.0, 59.0, 74.0, 11.0, 12.0, 68.0, 65.0, 22.0, 15.0, 34.0, 21.0, 11.0, 11.0, 23.0, 32.0, 25.0, 44.0, 46.0, 41.0, 42.0, 26.0, 37.0, 34.0, 0.0, 9.0, 0.0, 9.0, 24.0, 27.0, 43.0, 45.0, 66.0, 70.0, 64.0, 61.0, 54.0, 64.0, 63.0, 76.0, 56.0, 39.0, 59.0, 71.0, 30.0, 37.0, 36.0, 43.0, 16.0, 22.0, 40.0, 42.0, 3.0, 17.0, 45.0, 53.0, 91.0, 99.0, 19.0, 12.0, 78.0, 75.0, 59.0, 45.0, 88.0, 85.0, 50.0, 43.0, 57.0, 52.0, 94.0, 124.0, 74.0, 103.0, 85.0, 70.0, 76.0, 60.0, 64.0, 86.0, 53.0, 43.0, 89.0, 102.0, 49.0, 44.0, 52.0, 46.0, 84.0, 66.0, 51.0, 59.0, 40.0, 36.0, 53.0, 54.0, 65.0, 56.0, 74.0, 84.0, 68.0, 71.0, 81.0, 78.0, 11.0, 27.0, 67.0, 81.0, 19.0, 28.0, 51.0, 66.0, 73.0, 68.0, 6.0, 3.0, 71.0, 70.0, 36.0, 51.0, 68.0, 80.0, 74.0, 70.0, 42.0, 51.0, 58.0, 48.0]}, "sampler_perf": {"mean_env_wait_ms": 3.5541442522194506, "mean_processing_ms": 0.7759022948286116, "mean_inference_ms": 4.093132668646798}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1176000, "num_steps_sampled": 627200, "sample_time_ms": 20998.642, "load_time_ms": 37.333, "grad_time_ms": 9227.616, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 7.10542746348012e-16, "cur_lr": 0.0010000000474974513, "total_loss": -0.0032997550442814827, "policy_loss": -0.004892440978437662, "vf_loss": 23.954416275024414, "vf_explained_var": 0.525080680847168, "kl": 0.0015437895199283957, "entropy": 1.6054998636245728, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 627200, "episodes_total": 1568, "training_iteration": 49, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-09-07", "timestamp": 1660248547, "time_this_iter_s": 35.51046395301819, "time_total_s": 6968.08242058754, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6968.08242058754, "timesteps_since_restore": 627200, "iterations_since_restore": 49, "perf": {"cpu_util_percent": 33.118, "ram_util_percent": 57.68999999999998}}
-{"episode_reward_max": 238.0, "episode_reward_min": 9.0, "episode_reward_mean": 123.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 124.0}, "policy_reward_mean": {"ppo": 61.525}, "custom_metrics": {"sparse_reward_mean": 29.6, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 63.85, "shaped_reward_min": 9, "shaped_reward_max": 101, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.32, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 8.04, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 6.33, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 6.84, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 1.88, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.89, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.82, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 5.07, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 6.16, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.73, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.48, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.33, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.6, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.93, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.89, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.03, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.84, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.77, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.96, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 5.07, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 6.16, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.07, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 6.16, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [127.0, 147.0, 128.0, 210.0, 179.0, 107.0, 141.0, 75.0, 107.0, 175.0, 129.0, 153.0, 88.0, 98.0, 71.0, 105.0, 238.0, 110.0, 164.0, 68.0, 118.0, 107.0, 65.0, 171.0, 209.0, 50.0, 164.0, 99.0, 88.0, 12.0, 122.0, 166.0, 190.0, 31.0, 153.0, 104.0, 173.0, 93.0, 109.0, 218.0, 177.0, 155.0, 136.0, 150.0, 96.0, 191.0, 93.0, 98.0, 150.0, 110.0, 76.0, 107.0, 121.0, 158.0, 139.0, 159.0, 38.0, 148.0, 47.0, 117.0, 141.0, 9.0, 141.0, 87.0, 148.0, 144.0, 93.0, 106.0, 101.0, 110.0, 157.0, 167.0, 84.0, 71.0, 128.0, 42.0, 162.0, 184.0, 88.0, 108.0, 204.0, 95.0, 179.0, 155.0, 147.0, 121.0, 207.0, 76.0, 113.0, 39.0, 134.0, 75.0, 31.0, 9.0, 181.0, 162.0, 204.0, 187.0, 20.0, 167.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [48.0, 79.0, 74.0, 73.0, 68.0, 60.0, 102.0, 108.0, 87.0, 92.0, 51.0, 56.0, 64.0, 77.0, 41.0, 34.0, 67.0, 40.0, 89.0, 86.0, 68.0, 61.0, 79.0, 74.0, 43.0, 45.0, 49.0, 49.0, 35.0, 36.0, 56.0, 49.0, 122.0, 116.0, 48.0, 62.0, 90.0, 74.0, 34.0, 34.0, 64.0, 54.0, 51.0, 56.0, 34.0, 31.0, 91.0, 80.0, 108.0, 101.0, 25.0, 25.0, 80.0, 84.0, 57.0, 42.0, 42.0, 46.0, 3.0, 9.0, 54.0, 68.0, 88.0, 78.0, 91.0, 99.0, 19.0, 12.0, 78.0, 75.0, 59.0, 45.0, 88.0, 85.0, 50.0, 43.0, 57.0, 52.0, 94.0, 124.0, 74.0, 103.0, 85.0, 70.0, 76.0, 60.0, 64.0, 86.0, 53.0, 43.0, 89.0, 102.0, 49.0, 44.0, 52.0, 46.0, 84.0, 66.0, 51.0, 59.0, 40.0, 36.0, 53.0, 54.0, 65.0, 56.0, 74.0, 84.0, 68.0, 71.0, 81.0, 78.0, 11.0, 27.0, 67.0, 81.0, 19.0, 28.0, 51.0, 66.0, 73.0, 68.0, 6.0, 3.0, 71.0, 70.0, 36.0, 51.0, 68.0, 80.0, 74.0, 70.0, 42.0, 51.0, 58.0, 48.0, 54.0, 47.0, 50.0, 60.0, 71.0, 86.0, 76.0, 91.0, 46.0, 38.0, 30.0, 41.0, 65.0, 63.0, 24.0, 18.0, 75.0, 87.0, 85.0, 99.0, 42.0, 46.0, 53.0, 55.0, 105.0, 99.0, 50.0, 45.0, 96.0, 83.0, 76.0, 79.0, 69.0, 78.0, 67.0, 54.0, 99.0, 108.0, 30.0, 46.0, 53.0, 60.0, 28.0, 11.0, 57.0, 77.0, 22.0, 53.0, 20.0, 11.0, 3.0, 6.0, 98.0, 83.0, 92.0, 70.0, 93.0, 111.0, 85.0, 102.0, 17.0, 3.0, 82.0, 85.0]}, "sampler_perf": {"mean_env_wait_ms": 3.491898537786064, "mean_processing_ms": 0.7635432106455172, "mean_inference_ms": 4.03419160595968}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1200000, "num_steps_sampled": 640000, "sample_time_ms": 21322.647, "load_time_ms": 37.352, "grad_time_ms": 9260.248, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 3.55271373174006e-16, "cur_lr": 0.0010000000474974513, "total_loss": -0.005630036350339651, "policy_loss": -0.007203007582575083, "vf_loss": 23.694684982299805, "vf_explained_var": 0.5489806532859802, "kl": 0.0013687704922631383, "entropy": 1.592978835105896, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 640000, "episodes_total": 1600, "training_iteration": 50, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-09-39", "timestamp": 1660248579, "time_this_iter_s": 31.292397022247314, "time_total_s": 6999.374817609787, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6999.374817609787, "timesteps_since_restore": 640000, "iterations_since_restore": 50, "perf": {"cpu_util_percent": 38.30227272727273, "ram_util_percent": 57.75909090909093}}
-{"episode_reward_max": 238.0, "episode_reward_min": 9.0, "episode_reward_mean": 125.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 122.0}, "policy_reward_mean": {"ppo": 62.735}, "custom_metrics": {"sparse_reward_mean": 29.2, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 67.07, "shaped_reward_min": 9, "shaped_reward_max": 115, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.57, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 8.17, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 6.51, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 6.96, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 1.87, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.89, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.35, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 6.28, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.84, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.48, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.41, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.05, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.97, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.93, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.58, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.75, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 3.1, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.95, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.2, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.96, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.79, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.85, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 5.35, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 6.28, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.35, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 6.28, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [98.0, 63.0, 63.0, 192.0, 93.0, 178.0, 196.0, 53.0, 109.0, 156.0, 136.0, 166.0, 163.0, 175.0, 118.0, 115.0, 137.0, 115.0, 110.0, 155.0, 132.0, 79.0, 158.0, 213.0, 104.0, 153.0, 70.0, 195.0, 153.0, 85.0, 101.0, 123.0, 148.0, 144.0, 93.0, 106.0, 101.0, 110.0, 157.0, 167.0, 84.0, 71.0, 128.0, 42.0, 162.0, 184.0, 88.0, 108.0, 204.0, 95.0, 179.0, 155.0, 147.0, 121.0, 207.0, 76.0, 113.0, 39.0, 134.0, 75.0, 31.0, 9.0, 181.0, 162.0, 204.0, 187.0, 20.0, 167.0, 127.0, 147.0, 128.0, 210.0, 179.0, 107.0, 141.0, 75.0, 107.0, 175.0, 129.0, 153.0, 88.0, 98.0, 71.0, 105.0, 238.0, 110.0, 164.0, 68.0, 118.0, 107.0, 65.0, 171.0, 209.0, 50.0, 164.0, 99.0, 88.0, 12.0, 122.0, 166.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [36.0, 62.0, 38.0, 25.0, 35.0, 28.0, 94.0, 98.0, 41.0, 52.0, 86.0, 92.0, 106.0, 90.0, 24.0, 29.0, 63.0, 46.0, 63.0, 93.0, 73.0, 63.0, 85.0, 81.0, 90.0, 73.0, 71.0, 104.0, 70.0, 48.0, 56.0, 59.0, 71.0, 66.0, 56.0, 59.0, 54.0, 56.0, 73.0, 82.0, 65.0, 67.0, 44.0, 35.0, 71.0, 87.0, 108.0, 105.0, 58.0, 46.0, 88.0, 65.0, 35.0, 35.0, 107.0, 88.0, 70.0, 83.0, 48.0, 37.0, 59.0, 42.0, 63.0, 60.0, 68.0, 80.0, 74.0, 70.0, 42.0, 51.0, 58.0, 48.0, 54.0, 47.0, 50.0, 60.0, 71.0, 86.0, 76.0, 91.0, 46.0, 38.0, 30.0, 41.0, 65.0, 63.0, 24.0, 18.0, 75.0, 87.0, 85.0, 99.0, 42.0, 46.0, 53.0, 55.0, 105.0, 99.0, 50.0, 45.0, 96.0, 83.0, 76.0, 79.0, 69.0, 78.0, 67.0, 54.0, 99.0, 108.0, 30.0, 46.0, 53.0, 60.0, 28.0, 11.0, 57.0, 77.0, 22.0, 53.0, 20.0, 11.0, 3.0, 6.0, 98.0, 83.0, 92.0, 70.0, 93.0, 111.0, 85.0, 102.0, 17.0, 3.0, 82.0, 85.0, 48.0, 79.0, 74.0, 73.0, 68.0, 60.0, 102.0, 108.0, 87.0, 92.0, 51.0, 56.0, 64.0, 77.0, 41.0, 34.0, 67.0, 40.0, 89.0, 86.0, 68.0, 61.0, 79.0, 74.0, 43.0, 45.0, 49.0, 49.0, 35.0, 36.0, 56.0, 49.0, 122.0, 116.0, 48.0, 62.0, 90.0, 74.0, 34.0, 34.0, 64.0, 54.0, 51.0, 56.0, 34.0, 31.0, 91.0, 80.0, 108.0, 101.0, 25.0, 25.0, 80.0, 84.0, 57.0, 42.0, 42.0, 46.0, 3.0, 9.0, 54.0, 68.0, 88.0, 78.0]}, "sampler_perf": {"mean_env_wait_ms": 3.4320001935473794, "mean_processing_ms": 0.7516646134052514, "mean_inference_ms": 3.975737374961292}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1224000, "num_steps_sampled": 652800, "sample_time_ms": 21552.858, "load_time_ms": 37.216, "grad_time_ms": 9227.975, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 1.77635686587003e-16, "cur_lr": 0.0010000000474974513, "total_loss": -0.0032480310183018446, "policy_loss": -0.004836531355977058, "vf_loss": 23.794113159179688, "vf_explained_var": 0.5322676301002502, "kl": 0.0011860225349664688, "entropy": 1.5818275213241577, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 652800, "episodes_total": 1632, "training_iteration": 51, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-10-09", "timestamp": 1660248609, "time_this_iter_s": 29.93578290939331, "time_total_s": 7029.31060051918, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7029.31060051918, "timesteps_since_restore": 652800, "iterations_since_restore": 51, "perf": {"cpu_util_percent": 36.09523809523809, "ram_util_percent": 57.82142857142857}}
-{"episode_reward_max": 264.0, "episode_reward_min": 12.0, "episode_reward_mean": 128.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 139.0}, "policy_reward_mean": {"ppo": 64.12}, "custom_metrics": {"sparse_reward_mean": 30.2, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 67.84, "shaped_reward_min": 12, "shaped_reward_max": 115, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.73, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 7.95, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 6.6, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 6.65, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 1.75, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.97, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.97, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.65, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.99, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.73, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.52, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.15, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.82, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.97, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.57, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.7, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 3.06, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.08, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.14, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.04, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.78, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.88, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 5.65, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.99, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.65, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.99, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [141.0, 138.0, 143.0, 211.0, 127.0, 20.0, 65.0, 150.0, 93.0, 153.0, 47.0, 144.0, 264.0, 148.0, 59.0, 121.0, 39.0, 204.0, 187.0, 208.0, 121.0, 124.0, 14.0, 95.0, 81.0, 107.0, 198.0, 114.0, 104.0, 152.0, 238.0, 88.0, 204.0, 187.0, 20.0, 167.0, 127.0, 147.0, 128.0, 210.0, 179.0, 107.0, 141.0, 75.0, 107.0, 175.0, 129.0, 153.0, 88.0, 98.0, 71.0, 105.0, 238.0, 110.0, 164.0, 68.0, 118.0, 107.0, 65.0, 171.0, 209.0, 50.0, 164.0, 99.0, 88.0, 12.0, 122.0, 166.0, 98.0, 63.0, 63.0, 192.0, 93.0, 178.0, 196.0, 53.0, 109.0, 156.0, 136.0, 166.0, 163.0, 175.0, 118.0, 115.0, 137.0, 115.0, 110.0, 155.0, 132.0, 79.0, 158.0, 213.0, 104.0, 153.0, 70.0, 195.0, 153.0, 85.0, 101.0, 123.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [65.0, 76.0, 57.0, 81.0, 92.0, 51.0, 115.0, 96.0, 62.0, 65.0, 9.0, 11.0, 36.0, 29.0, 65.0, 85.0, 54.0, 39.0, 88.0, 65.0, 33.0, 14.0, 65.0, 79.0, 125.0, 139.0, 77.0, 71.0, 33.0, 26.0, 65.0, 56.0, 20.0, 19.0, 91.0, 113.0, 98.0, 89.0, 110.0, 98.0, 53.0, 68.0, 57.0, 67.0, 8.0, 6.0, 56.0, 39.0, 47.0, 34.0, 46.0, 61.0, 93.0, 105.0, 55.0, 59.0, 64.0, 40.0, 73.0, 79.0, 125.0, 113.0, 40.0, 48.0, 93.0, 111.0, 85.0, 102.0, 17.0, 3.0, 82.0, 85.0, 48.0, 79.0, 74.0, 73.0, 68.0, 60.0, 102.0, 108.0, 87.0, 92.0, 51.0, 56.0, 64.0, 77.0, 41.0, 34.0, 67.0, 40.0, 89.0, 86.0, 68.0, 61.0, 79.0, 74.0, 43.0, 45.0, 49.0, 49.0, 35.0, 36.0, 56.0, 49.0, 122.0, 116.0, 48.0, 62.0, 90.0, 74.0, 34.0, 34.0, 64.0, 54.0, 51.0, 56.0, 34.0, 31.0, 91.0, 80.0, 108.0, 101.0, 25.0, 25.0, 80.0, 84.0, 57.0, 42.0, 42.0, 46.0, 3.0, 9.0, 54.0, 68.0, 88.0, 78.0, 36.0, 62.0, 38.0, 25.0, 35.0, 28.0, 94.0, 98.0, 41.0, 52.0, 86.0, 92.0, 106.0, 90.0, 24.0, 29.0, 63.0, 46.0, 63.0, 93.0, 73.0, 63.0, 85.0, 81.0, 90.0, 73.0, 71.0, 104.0, 70.0, 48.0, 56.0, 59.0, 71.0, 66.0, 56.0, 59.0, 54.0, 56.0, 73.0, 82.0, 65.0, 67.0, 44.0, 35.0, 71.0, 87.0, 108.0, 105.0, 58.0, 46.0, 88.0, 65.0, 35.0, 35.0, 107.0, 88.0, 70.0, 83.0, 48.0, 37.0, 59.0, 42.0, 63.0, 60.0]}, "sampler_perf": {"mean_env_wait_ms": 3.3744175644507366, "mean_processing_ms": 0.7402432912819328, "mean_inference_ms": 3.918069988577329}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1248000, "num_steps_sampled": 665600, "sample_time_ms": 21746.558, "load_time_ms": 37.515, "grad_time_ms": 9170.716, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 8.88178432935015e-17, "cur_lr": 0.0010000000474974513, "total_loss": -0.00774852791801095, "policy_loss": -0.009392179548740387, "vf_loss": 24.351181030273438, "vf_explained_var": 0.5798514485359192, "kl": 0.0016348478384315968, "entropy": 1.5829213857650757, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 665600, "episodes_total": 1664, "training_iteration": 52, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-10-39", "timestamp": 1660248639, "time_this_iter_s": 30.649518966674805, "time_total_s": 7059.960119485855, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7059.960119485855, "timesteps_since_restore": 665600, "iterations_since_restore": 52, "perf": {"cpu_util_percent": 41.01818181818182, "ram_util_percent": 57.649999999999984}}
-{"episode_reward_max": 293.0, "episode_reward_min": 12.0, "episode_reward_mean": 128.76, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 154.0}, "policy_reward_mean": {"ppo": 64.38}, "custom_metrics": {"sparse_reward_mean": 30.0, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 68.76, "shaped_reward_min": 12, "shaped_reward_max": 115, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.92, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 7.71, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 6.68, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 6.5, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 1.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.57, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.92, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.85, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.81, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.75, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.45, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.26, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.75, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.64, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.72, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 3.16, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.06, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.19, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.04, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.85, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.87, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 5.85, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.81, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.85, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.81, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [207.0, 153.0, 195.0, 101.0, 90.0, 210.0, 225.0, 106.0, 121.0, 75.0, 60.0, 170.0, 23.0, 130.0, 147.0, 136.0, 39.0, 133.0, 158.0, 101.0, 66.0, 116.0, 50.0, 167.0, 146.0, 293.0, 36.0, 109.0, 245.0, 121.0, 207.0, 97.0, 88.0, 12.0, 122.0, 166.0, 98.0, 63.0, 63.0, 192.0, 93.0, 178.0, 196.0, 53.0, 109.0, 156.0, 136.0, 166.0, 163.0, 175.0, 118.0, 115.0, 137.0, 115.0, 110.0, 155.0, 132.0, 79.0, 158.0, 213.0, 104.0, 153.0, 70.0, 195.0, 153.0, 85.0, 101.0, 123.0, 141.0, 138.0, 143.0, 211.0, 127.0, 20.0, 65.0, 150.0, 93.0, 153.0, 47.0, 144.0, 264.0, 148.0, 59.0, 121.0, 39.0, 204.0, 187.0, 208.0, 121.0, 124.0, 14.0, 95.0, 81.0, 107.0, 198.0, 114.0, 104.0, 152.0, 238.0, 88.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [105.0, 102.0, 68.0, 85.0, 96.0, 99.0, 53.0, 48.0, 38.0, 52.0, 106.0, 104.0, 113.0, 112.0, 53.0, 53.0, 59.0, 62.0, 47.0, 28.0, 22.0, 38.0, 90.0, 80.0, 9.0, 14.0, 70.0, 60.0, 82.0, 65.0, 70.0, 66.0, 19.0, 20.0, 64.0, 69.0, 96.0, 62.0, 62.0, 39.0, 31.0, 35.0, 70.0, 46.0, 27.0, 23.0, 82.0, 85.0, 71.0, 75.0, 154.0, 139.0, 16.0, 20.0, 56.0, 53.0, 126.0, 119.0, 62.0, 59.0, 103.0, 104.0, 45.0, 52.0, 42.0, 46.0, 3.0, 9.0, 54.0, 68.0, 88.0, 78.0, 36.0, 62.0, 38.0, 25.0, 35.0, 28.0, 94.0, 98.0, 41.0, 52.0, 86.0, 92.0, 106.0, 90.0, 24.0, 29.0, 63.0, 46.0, 63.0, 93.0, 73.0, 63.0, 85.0, 81.0, 90.0, 73.0, 71.0, 104.0, 70.0, 48.0, 56.0, 59.0, 71.0, 66.0, 56.0, 59.0, 54.0, 56.0, 73.0, 82.0, 65.0, 67.0, 44.0, 35.0, 71.0, 87.0, 108.0, 105.0, 58.0, 46.0, 88.0, 65.0, 35.0, 35.0, 107.0, 88.0, 70.0, 83.0, 48.0, 37.0, 59.0, 42.0, 63.0, 60.0, 65.0, 76.0, 57.0, 81.0, 92.0, 51.0, 115.0, 96.0, 62.0, 65.0, 9.0, 11.0, 36.0, 29.0, 65.0, 85.0, 54.0, 39.0, 88.0, 65.0, 33.0, 14.0, 65.0, 79.0, 125.0, 139.0, 77.0, 71.0, 33.0, 26.0, 65.0, 56.0, 20.0, 19.0, 91.0, 113.0, 98.0, 89.0, 110.0, 98.0, 53.0, 68.0, 57.0, 67.0, 8.0, 6.0, 56.0, 39.0, 47.0, 34.0, 46.0, 61.0, 93.0, 105.0, 55.0, 59.0, 64.0, 40.0, 73.0, 79.0, 125.0, 113.0, 40.0, 48.0]}, "sampler_perf": {"mean_env_wait_ms": 3.319165284336134, "mean_processing_ms": 0.7292810578162132, "mean_inference_ms": 3.8629630777095305}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1272000, "num_steps_sampled": 678400, "sample_time_ms": 22200.855, "load_time_ms": 37.251, "grad_time_ms": 9256.826, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 4.440892164675075e-17, "cur_lr": 0.0010000000474974513, "total_loss": -0.006719778757542372, "policy_loss": -0.008363676257431507, "vf_loss": 24.369796752929688, "vf_explained_var": 0.5794721245765686, "kl": 0.0014526437735185027, "entropy": 1.586159348487854, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 678400, "episodes_total": 1696, "training_iteration": 53, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-11-12", "timestamp": 1660248672, "time_this_iter_s": 32.775245904922485, "time_total_s": 7092.735365390778, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7092.735365390778, "timesteps_since_restore": 678400, "iterations_since_restore": 53, "perf": {"cpu_util_percent": 38.126086956521746, "ram_util_percent": 57.626086956521746}}
-{"episode_reward_max": 296.0, "episode_reward_min": 14.0, "episode_reward_mean": 132.59, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 154.0}, "policy_reward_mean": {"ppo": 66.295}, "custom_metrics": {"sparse_reward_mean": 31.4, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 69.79, "shaped_reward_min": 14, "shaped_reward_max": 110, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.04, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 7.88, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 6.88, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 6.68, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 15, "onion_drop_agent_0_mean": 1.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.93, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.95, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.92, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 13, "dish_pickup_agent_0_mean": 4.44, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.47, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.4, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.36, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.73, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.57, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.61, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 3.12, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.22, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 2.14, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.14, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.84, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.97, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 5.95, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.92, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 13, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.95, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.92, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 13, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [205.0, 61.0, 126.0, 104.0, 179.0, 296.0, 72.0, 126.0, 172.0, 17.0, 155.0, 207.0, 227.0, 101.0, 110.0, 101.0, 207.0, 77.0, 155.0, 159.0, 155.0, 158.0, 150.0, 96.0, 81.0, 125.0, 155.0, 42.0, 270.0, 155.0, 147.0, 75.0, 153.0, 85.0, 101.0, 123.0, 141.0, 138.0, 143.0, 211.0, 127.0, 20.0, 65.0, 150.0, 93.0, 153.0, 47.0, 144.0, 264.0, 148.0, 59.0, 121.0, 39.0, 204.0, 187.0, 208.0, 121.0, 124.0, 14.0, 95.0, 81.0, 107.0, 198.0, 114.0, 104.0, 152.0, 238.0, 88.0, 207.0, 153.0, 195.0, 101.0, 90.0, 210.0, 225.0, 106.0, 121.0, 75.0, 60.0, 170.0, 23.0, 130.0, 147.0, 136.0, 39.0, 133.0, 158.0, 101.0, 66.0, 116.0, 50.0, 167.0, 146.0, 293.0, 36.0, 109.0, 245.0, 121.0, 207.0, 97.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [105.0, 100.0, 28.0, 33.0, 65.0, 61.0, 51.0, 53.0, 92.0, 87.0, 143.0, 153.0, 32.0, 40.0, 69.0, 57.0, 75.0, 97.0, 3.0, 14.0, 79.0, 76.0, 107.0, 100.0, 102.0, 125.0, 37.0, 64.0, 49.0, 61.0, 42.0, 59.0, 104.0, 103.0, 43.0, 34.0, 68.0, 87.0, 77.0, 82.0, 64.0, 91.0, 74.0, 84.0, 84.0, 66.0, 50.0, 46.0, 42.0, 39.0, 69.0, 56.0, 86.0, 69.0, 23.0, 19.0, 138.0, 132.0, 65.0, 90.0, 74.0, 73.0, 33.0, 42.0, 70.0, 83.0, 48.0, 37.0, 59.0, 42.0, 63.0, 60.0, 65.0, 76.0, 57.0, 81.0, 92.0, 51.0, 115.0, 96.0, 62.0, 65.0, 9.0, 11.0, 36.0, 29.0, 65.0, 85.0, 54.0, 39.0, 88.0, 65.0, 33.0, 14.0, 65.0, 79.0, 125.0, 139.0, 77.0, 71.0, 33.0, 26.0, 65.0, 56.0, 20.0, 19.0, 91.0, 113.0, 98.0, 89.0, 110.0, 98.0, 53.0, 68.0, 57.0, 67.0, 8.0, 6.0, 56.0, 39.0, 47.0, 34.0, 46.0, 61.0, 93.0, 105.0, 55.0, 59.0, 64.0, 40.0, 73.0, 79.0, 125.0, 113.0, 40.0, 48.0, 105.0, 102.0, 68.0, 85.0, 96.0, 99.0, 53.0, 48.0, 38.0, 52.0, 106.0, 104.0, 113.0, 112.0, 53.0, 53.0, 59.0, 62.0, 47.0, 28.0, 22.0, 38.0, 90.0, 80.0, 9.0, 14.0, 70.0, 60.0, 82.0, 65.0, 70.0, 66.0, 19.0, 20.0, 64.0, 69.0, 96.0, 62.0, 62.0, 39.0, 31.0, 35.0, 70.0, 46.0, 27.0, 23.0, 82.0, 85.0, 71.0, 75.0, 154.0, 139.0, 16.0, 20.0, 56.0, 53.0, 126.0, 119.0, 62.0, 59.0, 103.0, 104.0, 45.0, 52.0]}, "sampler_perf": {"mean_env_wait_ms": 3.2660665746708606, "mean_processing_ms": 0.718736935276724, "mean_inference_ms": 3.8108664134221066}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1296000, "num_steps_sampled": 691200, "sample_time_ms": 22619.672, "load_time_ms": 37.409, "grad_time_ms": 9296.582, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 2.2204460823375376e-17, "cur_lr": 0.0010000000474974513, "total_loss": -0.006839328911155462, "policy_loss": -0.008672266267240047, "vf_loss": 26.157081604003906, "vf_explained_var": 0.5799071192741394, "kl": 0.0013235282385721803, "entropy": 1.5655454397201538, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 691200, "episodes_total": 1728, "training_iteration": 54, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-11-44", "timestamp": 1660248704, "time_this_iter_s": 31.93130087852478, "time_total_s": 7124.666666269302, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7124.666666269302, "timesteps_since_restore": 691200, "iterations_since_restore": 54, "perf": {"cpu_util_percent": 38.36222222222223, "ram_util_percent": 57.54666666666667}}
-{"episode_reward_max": 301.0, "episode_reward_min": 17.0, "episode_reward_mean": 138.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 155.0}, "policy_reward_mean": {"ppo": 69.05}, "custom_metrics": {"sparse_reward_mean": 33.6, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 70.9, "shaped_reward_min": 17, "shaped_reward_max": 113, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.15, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 8.01, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 6.92, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 6.88, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 15, "onion_drop_agent_0_mean": 1.72, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.45, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.97, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.92, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 6.05, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 12, "potting_onion_agent_1_mean": 6.19, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 13, "dish_pickup_agent_0_mean": 4.59, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.72, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.4, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.26, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.79, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.83, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.58, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.66, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 3.16, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.24, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 2.13, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.25, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.93, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 0.89, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 6.05, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 12, "optimal_onion_potting_agent_1_mean": 6.19, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 13, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.05, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 12, "viable_onion_potting_agent_1_mean": 6.19, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 13, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [144.0, 88.0, 37.0, 77.0, 106.0, 117.0, 253.0, 98.0, 179.0, 104.0, 139.0, 152.0, 64.0, 74.0, 273.0, 67.0, 150.0, 163.0, 78.0, 68.0, 141.0, 187.0, 39.0, 158.0, 202.0, 301.0, 153.0, 247.0, 199.0, 163.0, 115.0, 193.0, 104.0, 152.0, 238.0, 88.0, 207.0, 153.0, 195.0, 101.0, 90.0, 210.0, 225.0, 106.0, 121.0, 75.0, 60.0, 170.0, 23.0, 130.0, 147.0, 136.0, 39.0, 133.0, 158.0, 101.0, 66.0, 116.0, 50.0, 167.0, 146.0, 293.0, 36.0, 109.0, 245.0, 121.0, 207.0, 97.0, 205.0, 61.0, 126.0, 104.0, 179.0, 296.0, 72.0, 126.0, 172.0, 17.0, 155.0, 207.0, 227.0, 101.0, 110.0, 101.0, 207.0, 77.0, 155.0, 159.0, 155.0, 158.0, 150.0, 96.0, 81.0, 125.0, 155.0, 42.0, 270.0, 155.0, 147.0, 75.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [65.0, 79.0, 41.0, 47.0, 14.0, 23.0, 26.0, 51.0, 61.0, 45.0, 55.0, 62.0, 125.0, 128.0, 53.0, 45.0, 81.0, 98.0, 50.0, 54.0, 68.0, 71.0, 80.0, 72.0, 33.0, 31.0, 29.0, 45.0, 142.0, 131.0, 39.0, 28.0, 62.0, 88.0, 88.0, 75.0, 27.0, 51.0, 39.0, 29.0, 71.0, 70.0, 82.0, 105.0, 20.0, 19.0, 66.0, 92.0, 97.0, 105.0, 146.0, 155.0, 71.0, 82.0, 128.0, 119.0, 113.0, 86.0, 94.0, 69.0, 62.0, 53.0, 107.0, 86.0, 64.0, 40.0, 73.0, 79.0, 125.0, 113.0, 40.0, 48.0, 105.0, 102.0, 68.0, 85.0, 96.0, 99.0, 53.0, 48.0, 38.0, 52.0, 106.0, 104.0, 113.0, 112.0, 53.0, 53.0, 59.0, 62.0, 47.0, 28.0, 22.0, 38.0, 90.0, 80.0, 9.0, 14.0, 70.0, 60.0, 82.0, 65.0, 70.0, 66.0, 19.0, 20.0, 64.0, 69.0, 96.0, 62.0, 62.0, 39.0, 31.0, 35.0, 70.0, 46.0, 27.0, 23.0, 82.0, 85.0, 71.0, 75.0, 154.0, 139.0, 16.0, 20.0, 56.0, 53.0, 126.0, 119.0, 62.0, 59.0, 103.0, 104.0, 45.0, 52.0, 105.0, 100.0, 28.0, 33.0, 65.0, 61.0, 51.0, 53.0, 92.0, 87.0, 143.0, 153.0, 32.0, 40.0, 69.0, 57.0, 75.0, 97.0, 3.0, 14.0, 79.0, 76.0, 107.0, 100.0, 102.0, 125.0, 37.0, 64.0, 49.0, 61.0, 42.0, 59.0, 104.0, 103.0, 43.0, 34.0, 68.0, 87.0, 77.0, 82.0, 64.0, 91.0, 74.0, 84.0, 84.0, 66.0, 50.0, 46.0, 42.0, 39.0, 69.0, 56.0, 86.0, 69.0, 23.0, 19.0, 138.0, 132.0, 65.0, 90.0, 74.0, 73.0, 33.0, 42.0]}, "sampler_perf": {"mean_env_wait_ms": 3.214861984478974, "mean_processing_ms": 0.7085517018126862, "mean_inference_ms": 3.760458478559153}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1320000, "num_steps_sampled": 704000, "sample_time_ms": 22712.594, "load_time_ms": 37.614, "grad_time_ms": 9158.532, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 1.1102230411687688e-17, "cur_lr": 0.0010000000474974513, "total_loss": -0.007639638613909483, "policy_loss": -0.00974108837544918, "vf_loss": 28.871795654296875, "vf_explained_var": 0.5772756934165955, "kl": 0.0015572212869301438, "entropy": 1.5714462995529175, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 704000, "episodes_total": 1760, "training_iteration": 55, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-12-13", "timestamp": 1660248733, "time_this_iter_s": 29.164530992507935, "time_total_s": 7153.83119726181, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7153.83119726181, "timesteps_since_restore": 704000, "iterations_since_restore": 55, "perf": {"cpu_util_percent": 37.4609756097561, "ram_util_percent": 57.50487804878048}}
-{"episode_reward_max": 301.0, "episode_reward_min": 12.0, "episode_reward_mean": 149.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 161.0}, "policy_reward_mean": {"ppo": 74.835}, "custom_metrics": {"sparse_reward_mean": 37.8, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 74.07, "shaped_reward_min": 12, "shaped_reward_max": 118, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.1, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 8.78, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 6.98, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 7.57, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 5.98, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 12, "potting_onion_agent_1_mean": 6.87, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.43, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.52, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.59, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.1, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.33, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 2.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.34, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.8, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 0.89, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 5.98, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 12, "optimal_onion_potting_agent_1_mean": 6.87, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.98, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 12, "viable_onion_potting_agent_1_mean": 6.87, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [203.0, 136.0, 112.0, 259.0, 164.0, 150.0, 167.0, 93.0, 178.0, 227.0, 236.0, 250.0, 218.0, 169.0, 145.0, 159.0, 41.0, 12.0, 144.0, 131.0, 298.0, 47.0, 201.0, 213.0, 176.0, 155.0, 238.0, 115.0, 219.0, 82.0, 216.0, 148.0, 245.0, 121.0, 207.0, 97.0, 205.0, 61.0, 126.0, 104.0, 179.0, 296.0, 72.0, 126.0, 172.0, 17.0, 155.0, 207.0, 227.0, 101.0, 110.0, 101.0, 207.0, 77.0, 155.0, 159.0, 155.0, 158.0, 150.0, 96.0, 81.0, 125.0, 155.0, 42.0, 270.0, 155.0, 147.0, 75.0, 144.0, 88.0, 37.0, 77.0, 106.0, 117.0, 253.0, 98.0, 179.0, 104.0, 139.0, 152.0, 64.0, 74.0, 273.0, 67.0, 150.0, 163.0, 78.0, 68.0, 141.0, 187.0, 39.0, 158.0, 202.0, 301.0, 153.0, 247.0, 199.0, 163.0, 115.0, 193.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [103.0, 100.0, 70.0, 66.0, 69.0, 43.0, 134.0, 125.0, 81.0, 83.0, 62.0, 88.0, 71.0, 96.0, 39.0, 54.0, 89.0, 89.0, 104.0, 123.0, 124.0, 112.0, 119.0, 131.0, 111.0, 107.0, 73.0, 96.0, 74.0, 71.0, 60.0, 99.0, 19.0, 22.0, 6.0, 6.0, 77.0, 67.0, 66.0, 65.0, 161.0, 137.0, 9.0, 38.0, 92.0, 109.0, 106.0, 107.0, 79.0, 97.0, 74.0, 81.0, 107.0, 131.0, 56.0, 59.0, 106.0, 113.0, 40.0, 42.0, 111.0, 105.0, 55.0, 93.0, 126.0, 119.0, 62.0, 59.0, 103.0, 104.0, 45.0, 52.0, 105.0, 100.0, 28.0, 33.0, 65.0, 61.0, 51.0, 53.0, 92.0, 87.0, 143.0, 153.0, 32.0, 40.0, 69.0, 57.0, 75.0, 97.0, 3.0, 14.0, 79.0, 76.0, 107.0, 100.0, 102.0, 125.0, 37.0, 64.0, 49.0, 61.0, 42.0, 59.0, 104.0, 103.0, 43.0, 34.0, 68.0, 87.0, 77.0, 82.0, 64.0, 91.0, 74.0, 84.0, 84.0, 66.0, 50.0, 46.0, 42.0, 39.0, 69.0, 56.0, 86.0, 69.0, 23.0, 19.0, 138.0, 132.0, 65.0, 90.0, 74.0, 73.0, 33.0, 42.0, 65.0, 79.0, 41.0, 47.0, 14.0, 23.0, 26.0, 51.0, 61.0, 45.0, 55.0, 62.0, 125.0, 128.0, 53.0, 45.0, 81.0, 98.0, 50.0, 54.0, 68.0, 71.0, 80.0, 72.0, 33.0, 31.0, 29.0, 45.0, 142.0, 131.0, 39.0, 28.0, 62.0, 88.0, 88.0, 75.0, 27.0, 51.0, 39.0, 29.0, 71.0, 70.0, 82.0, 105.0, 20.0, 19.0, 66.0, 92.0, 97.0, 105.0, 146.0, 155.0, 71.0, 82.0, 128.0, 119.0, 113.0, 86.0, 94.0, 69.0, 62.0, 53.0, 107.0, 86.0]}, "sampler_perf": {"mean_env_wait_ms": 3.16541739890218, "mean_processing_ms": 0.6987189186943766, "mean_inference_ms": 3.711599248384498}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1344000, "num_steps_sampled": 716800, "sample_time_ms": 22951.468, "load_time_ms": 37.343, "grad_time_ms": 9065.669, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 5.551115205843844e-18, "cur_lr": 0.0010000000474974513, "total_loss": -0.004821139387786388, "policy_loss": -0.006896324921399355, "vf_loss": 28.4981746673584, "vf_explained_var": 0.6428199410438538, "kl": 0.0015486044576391578, "entropy": 1.5492569208145142, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 716800, "episodes_total": 1792, "training_iteration": 56, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-12-44", "timestamp": 1660248764, "time_this_iter_s": 30.594375133514404, "time_total_s": 7184.425572395325, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7184.425572395325, "timesteps_since_restore": 716800, "iterations_since_restore": 56, "perf": {"cpu_util_percent": 38.95348837209303, "ram_util_percent": 57.599999999999994}}
-{"episode_reward_max": 305.0, "episode_reward_min": 12.0, "episode_reward_mean": 157.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 6.0}, "policy_reward_max": {"ppo": 161.0}, "policy_reward_mean": {"ppo": 78.625}, "custom_metrics": {"sparse_reward_mean": 41.6, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 74.05, "shaped_reward_min": 12, "shaped_reward_max": 118, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.82, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 9.13, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 6.75, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 8.03, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.96, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 5.8, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 12, "potting_onion_agent_1_mean": 7.28, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.56, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.7, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.49, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.38, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.75, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.51, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.56, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.06, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.3, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.2, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.25, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.78, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 0.91, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 5.8, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 12, "optimal_onion_potting_agent_1_mean": 7.28, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.8, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 12, "viable_onion_potting_agent_1_mean": 7.28, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [201.0, 66.0, 180.0, 121.0, 247.0, 190.0, 66.0, 139.0, 139.0, 205.0, 215.0, 267.0, 62.0, 213.0, 155.0, 181.0, 58.0, 174.0, 305.0, 207.0, 155.0, 178.0, 107.0, 166.0, 213.0, 99.0, 152.0, 246.0, 167.0, 110.0, 155.0, 108.0, 270.0, 155.0, 147.0, 75.0, 144.0, 88.0, 37.0, 77.0, 106.0, 117.0, 253.0, 98.0, 179.0, 104.0, 139.0, 152.0, 64.0, 74.0, 273.0, 67.0, 150.0, 163.0, 78.0, 68.0, 141.0, 187.0, 39.0, 158.0, 202.0, 301.0, 153.0, 247.0, 199.0, 163.0, 115.0, 193.0, 203.0, 136.0, 112.0, 259.0, 164.0, 150.0, 167.0, 93.0, 178.0, 227.0, 236.0, 250.0, 218.0, 169.0, 145.0, 159.0, 41.0, 12.0, 144.0, 131.0, 298.0, 47.0, 201.0, 213.0, 176.0, 155.0, 238.0, 115.0, 219.0, 82.0, 216.0, 148.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [95.0, 106.0, 37.0, 29.0, 94.0, 86.0, 62.0, 59.0, 113.0, 134.0, 76.0, 114.0, 26.0, 40.0, 57.0, 82.0, 57.0, 82.0, 99.0, 106.0, 100.0, 115.0, 121.0, 146.0, 25.0, 37.0, 108.0, 105.0, 76.0, 79.0, 95.0, 86.0, 22.0, 36.0, 85.0, 89.0, 148.0, 157.0, 103.0, 104.0, 71.0, 84.0, 95.0, 83.0, 56.0, 51.0, 85.0, 81.0, 111.0, 102.0, 45.0, 54.0, 70.0, 82.0, 105.0, 141.0, 76.0, 91.0, 65.0, 45.0, 68.0, 87.0, 64.0, 44.0, 138.0, 132.0, 65.0, 90.0, 74.0, 73.0, 33.0, 42.0, 65.0, 79.0, 41.0, 47.0, 14.0, 23.0, 26.0, 51.0, 61.0, 45.0, 55.0, 62.0, 125.0, 128.0, 53.0, 45.0, 81.0, 98.0, 50.0, 54.0, 68.0, 71.0, 80.0, 72.0, 33.0, 31.0, 29.0, 45.0, 142.0, 131.0, 39.0, 28.0, 62.0, 88.0, 88.0, 75.0, 27.0, 51.0, 39.0, 29.0, 71.0, 70.0, 82.0, 105.0, 20.0, 19.0, 66.0, 92.0, 97.0, 105.0, 146.0, 155.0, 71.0, 82.0, 128.0, 119.0, 113.0, 86.0, 94.0, 69.0, 62.0, 53.0, 107.0, 86.0, 103.0, 100.0, 70.0, 66.0, 69.0, 43.0, 134.0, 125.0, 81.0, 83.0, 62.0, 88.0, 71.0, 96.0, 39.0, 54.0, 89.0, 89.0, 104.0, 123.0, 124.0, 112.0, 119.0, 131.0, 111.0, 107.0, 73.0, 96.0, 74.0, 71.0, 60.0, 99.0, 19.0, 22.0, 6.0, 6.0, 77.0, 67.0, 66.0, 65.0, 161.0, 137.0, 9.0, 38.0, 92.0, 109.0, 106.0, 107.0, 79.0, 97.0, 74.0, 81.0, 107.0, 131.0, 56.0, 59.0, 106.0, 113.0, 40.0, 42.0, 111.0, 105.0, 55.0, 93.0]}, "sampler_perf": {"mean_env_wait_ms": 3.1176592492930246, "mean_processing_ms": 0.6892173986565695, "mean_inference_ms": 3.663860513787198}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1368000, "num_steps_sampled": 729600, "sample_time_ms": 22935.784, "load_time_ms": 37.435, "grad_time_ms": 8913.484, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 2.775557602921922e-18, "cur_lr": 0.0010000000474974513, "total_loss": -0.004853060003370047, "policy_loss": -0.0074228327721357346, "vf_loss": 33.384822845458984, "vf_explained_var": 0.6208257079124451, "kl": 0.0016279626870527864, "entropy": 1.5374183654785156, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 729600, "episodes_total": 1824, "training_iteration": 57, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-13-15", "timestamp": 1660248795, "time_this_iter_s": 30.907179594039917, "time_total_s": 7215.332751989365, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7215.332751989365, "timesteps_since_restore": 729600, "iterations_since_restore": 57, "perf": {"cpu_util_percent": 39.13636363636363, "ram_util_percent": 57.62499999999999}}
-{"episode_reward_max": 305.0, "episode_reward_min": 12.0, "episode_reward_mean": 166.72, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 6.0}, "policy_reward_max": {"ppo": 161.0}, "policy_reward_mean": {"ppo": 83.36}, "custom_metrics": {"sparse_reward_mean": 44.8, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 77.12, "shaped_reward_min": 12, "shaped_reward_max": 118, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.95, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 9.49, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 7.04, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.32, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.51, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.99, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.99, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 6.02, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 7.65, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.56, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.61, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.54, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.48, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.63, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.71, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.54, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.52, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.17, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.16, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.43, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.24, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.65, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.75, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 6.02, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 7.65, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.02, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 7.65, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [120.0, 115.0, 175.0, 270.0, 118.0, 90.0, 239.0, 267.0, 206.0, 239.0, 256.0, 67.0, 87.0, 93.0, 43.0, 230.0, 164.0, 193.0, 148.0, 222.0, 158.0, 138.0, 144.0, 171.0, 181.0, 241.0, 239.0, 93.0, 167.0, 152.0, 164.0, 263.0, 199.0, 163.0, 115.0, 193.0, 203.0, 136.0, 112.0, 259.0, 164.0, 150.0, 167.0, 93.0, 178.0, 227.0, 236.0, 250.0, 218.0, 169.0, 145.0, 159.0, 41.0, 12.0, 144.0, 131.0, 298.0, 47.0, 201.0, 213.0, 176.0, 155.0, 238.0, 115.0, 219.0, 82.0, 216.0, 148.0, 201.0, 66.0, 180.0, 121.0, 247.0, 190.0, 66.0, 139.0, 139.0, 205.0, 215.0, 267.0, 62.0, 213.0, 155.0, 181.0, 58.0, 174.0, 305.0, 207.0, 155.0, 178.0, 107.0, 166.0, 213.0, 99.0, 152.0, 246.0, 167.0, 110.0, 155.0, 108.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [65.0, 55.0, 50.0, 65.0, 83.0, 92.0, 134.0, 136.0, 59.0, 59.0, 44.0, 46.0, 107.0, 132.0, 129.0, 138.0, 93.0, 113.0, 119.0, 120.0, 122.0, 134.0, 36.0, 31.0, 36.0, 51.0, 45.0, 48.0, 25.0, 18.0, 113.0, 117.0, 72.0, 92.0, 90.0, 103.0, 82.0, 66.0, 123.0, 99.0, 82.0, 76.0, 52.0, 86.0, 72.0, 72.0, 84.0, 87.0, 93.0, 88.0, 130.0, 111.0, 138.0, 101.0, 45.0, 48.0, 79.0, 88.0, 66.0, 86.0, 79.0, 85.0, 138.0, 125.0, 113.0, 86.0, 94.0, 69.0, 62.0, 53.0, 107.0, 86.0, 103.0, 100.0, 70.0, 66.0, 69.0, 43.0, 134.0, 125.0, 81.0, 83.0, 62.0, 88.0, 71.0, 96.0, 39.0, 54.0, 89.0, 89.0, 104.0, 123.0, 124.0, 112.0, 119.0, 131.0, 111.0, 107.0, 73.0, 96.0, 74.0, 71.0, 60.0, 99.0, 19.0, 22.0, 6.0, 6.0, 77.0, 67.0, 66.0, 65.0, 161.0, 137.0, 9.0, 38.0, 92.0, 109.0, 106.0, 107.0, 79.0, 97.0, 74.0, 81.0, 107.0, 131.0, 56.0, 59.0, 106.0, 113.0, 40.0, 42.0, 111.0, 105.0, 55.0, 93.0, 95.0, 106.0, 37.0, 29.0, 94.0, 86.0, 62.0, 59.0, 113.0, 134.0, 76.0, 114.0, 26.0, 40.0, 57.0, 82.0, 57.0, 82.0, 99.0, 106.0, 100.0, 115.0, 121.0, 146.0, 25.0, 37.0, 108.0, 105.0, 76.0, 79.0, 95.0, 86.0, 22.0, 36.0, 85.0, 89.0, 148.0, 157.0, 103.0, 104.0, 71.0, 84.0, 95.0, 83.0, 56.0, 51.0, 85.0, 81.0, 111.0, 102.0, 45.0, 54.0, 70.0, 82.0, 105.0, 141.0, 76.0, 91.0, 65.0, 45.0, 68.0, 87.0, 64.0, 44.0]}, "sampler_perf": {"mean_env_wait_ms": 3.0715992457802104, "mean_processing_ms": 0.6800724399450163, "mean_inference_ms": 3.6184030024872835}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1392000, "num_steps_sampled": 742400, "sample_time_ms": 22627.432, "load_time_ms": 37.419, "grad_time_ms": 8703.731, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 1.387778801460961e-18, "cur_lr": 0.0010000000474974513, "total_loss": -0.004274281207472086, "policy_loss": -0.006773001980036497, "vf_loss": 32.668846130371094, "vf_explained_var": 0.6147891879081726, "kl": 0.0016452163690701127, "entropy": 1.5363364219665527, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 742400, "episodes_total": 1856, "training_iteration": 58, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-13-46", "timestamp": 1660248826, "time_this_iter_s": 31.19256901741028, "time_total_s": 7246.525321006775, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7246.525321006775, "timesteps_since_restore": 742400, "iterations_since_restore": 58, "perf": {"cpu_util_percent": 37.85, "ram_util_percent": 57.636363636363626}}
-{"episode_reward_max": 347.0, "episode_reward_min": 9.0, "episode_reward_mean": 173.36, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 177.0}, "policy_reward_mean": {"ppo": 86.68}, "custom_metrics": {"sparse_reward_mean": 48.4, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 76.56, "shaped_reward_min": 9, "shaped_reward_max": 110, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.08, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 9.37, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 7.26, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.3, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.49, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.82, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.89, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 6.31, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 7.56, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.77, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.59, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.36, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 1.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.85, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.71, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.56, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.55, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.05, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 3.11, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.34, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.34, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.64, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.64, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 6.31, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 7.56, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.31, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 7.56, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [195.0, 147.0, 179.0, 305.0, 302.0, 199.0, 156.0, 201.0, 202.0, 53.0, 144.0, 131.0, 215.0, 347.0, 204.0, 241.0, 209.0, 112.0, 85.0, 238.0, 207.0, 158.0, 193.0, 185.0, 244.0, 185.0, 194.0, 193.0, 170.0, 9.0, 253.0, 115.0, 219.0, 82.0, 216.0, 148.0, 201.0, 66.0, 180.0, 121.0, 247.0, 190.0, 66.0, 139.0, 139.0, 205.0, 215.0, 267.0, 62.0, 213.0, 155.0, 181.0, 58.0, 174.0, 305.0, 207.0, 155.0, 178.0, 107.0, 166.0, 213.0, 99.0, 152.0, 246.0, 167.0, 110.0, 155.0, 108.0, 120.0, 115.0, 175.0, 270.0, 118.0, 90.0, 239.0, 267.0, 206.0, 239.0, 256.0, 67.0, 87.0, 93.0, 43.0, 230.0, 164.0, 193.0, 148.0, 222.0, 158.0, 138.0, 144.0, 171.0, 181.0, 241.0, 239.0, 93.0, 167.0, 152.0, 164.0, 263.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [87.0, 108.0, 90.0, 57.0, 94.0, 85.0, 140.0, 165.0, 160.0, 142.0, 93.0, 106.0, 81.0, 75.0, 93.0, 108.0, 97.0, 105.0, 25.0, 28.0, 69.0, 75.0, 63.0, 68.0, 127.0, 88.0, 170.0, 177.0, 97.0, 107.0, 122.0, 119.0, 103.0, 106.0, 58.0, 54.0, 40.0, 45.0, 110.0, 128.0, 105.0, 102.0, 66.0, 92.0, 83.0, 110.0, 94.0, 91.0, 119.0, 125.0, 104.0, 81.0, 102.0, 92.0, 91.0, 102.0, 83.0, 87.0, 6.0, 3.0, 123.0, 130.0, 64.0, 51.0, 106.0, 113.0, 40.0, 42.0, 111.0, 105.0, 55.0, 93.0, 95.0, 106.0, 37.0, 29.0, 94.0, 86.0, 62.0, 59.0, 113.0, 134.0, 76.0, 114.0, 26.0, 40.0, 57.0, 82.0, 57.0, 82.0, 99.0, 106.0, 100.0, 115.0, 121.0, 146.0, 25.0, 37.0, 108.0, 105.0, 76.0, 79.0, 95.0, 86.0, 22.0, 36.0, 85.0, 89.0, 148.0, 157.0, 103.0, 104.0, 71.0, 84.0, 95.0, 83.0, 56.0, 51.0, 85.0, 81.0, 111.0, 102.0, 45.0, 54.0, 70.0, 82.0, 105.0, 141.0, 76.0, 91.0, 65.0, 45.0, 68.0, 87.0, 64.0, 44.0, 65.0, 55.0, 50.0, 65.0, 83.0, 92.0, 134.0, 136.0, 59.0, 59.0, 44.0, 46.0, 107.0, 132.0, 129.0, 138.0, 93.0, 113.0, 119.0, 120.0, 122.0, 134.0, 36.0, 31.0, 36.0, 51.0, 45.0, 48.0, 25.0, 18.0, 113.0, 117.0, 72.0, 92.0, 90.0, 103.0, 82.0, 66.0, 123.0, 99.0, 82.0, 76.0, 52.0, 86.0, 72.0, 72.0, 84.0, 87.0, 93.0, 88.0, 130.0, 111.0, 138.0, 101.0, 45.0, 48.0, 79.0, 88.0, 66.0, 86.0, 79.0, 85.0, 138.0, 125.0]}, "sampler_perf": {"mean_env_wait_ms": 3.027099137658147, "mean_processing_ms": 0.6712374159631969, "mean_inference_ms": 3.574879048855646}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1416000, "num_steps_sampled": 755200, "sample_time_ms": 22439.192, "load_time_ms": 37.393, "grad_time_ms": 8622.999, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 6.938894007304805e-19, "cur_lr": 0.0010000000474974513, "total_loss": -0.005061946343630552, "policy_loss": -0.0077269431203603745, "vf_loss": 34.30827713012695, "vf_explained_var": 0.6426100730895996, "kl": 0.0014075502986088395, "entropy": 1.5316654443740845, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 755200, "episodes_total": 1888, "training_iteration": 59, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-14-19", "timestamp": 1660248859, "time_this_iter_s": 32.82003712654114, "time_total_s": 7279.345358133316, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7279.345358133316, "timesteps_since_restore": 755200, "iterations_since_restore": 59, "perf": {"cpu_util_percent": 38.134782608695645, "ram_util_percent": 57.654347826086926}}
-{"episode_reward_max": 347.0, "episode_reward_min": 9.0, "episode_reward_mean": 183.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 177.0}, "policy_reward_mean": {"ppo": 91.965}, "custom_metrics": {"sparse_reward_mean": 52.8, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 78.33, "shaped_reward_min": 9, "shaped_reward_max": 124, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.49, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 9.58, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 7.62, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.41, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.48, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.65, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.88, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.99, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 6.64, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 7.61, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 14, "dish_pickup_agent_0_mean": 4.84, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.61, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.42, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.76, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.59, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.56, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.14, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.85, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.47, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.33, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.56, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.4, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 6.64, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 7.61, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 14, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.64, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 7.61, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 14, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [167.0, 264.0, 176.0, 241.0, 333.0, 82.0, 213.0, 145.0, 167.0, 188.0, 259.0, 307.0, 74.0, 213.0, 299.0, 284.0, 193.0, 136.0, 161.0, 188.0, 238.0, 79.0, 193.0, 241.0, 133.0, 172.0, 238.0, 168.0, 256.0, 213.0, 196.0, 212.0, 167.0, 110.0, 155.0, 108.0, 120.0, 115.0, 175.0, 270.0, 118.0, 90.0, 239.0, 267.0, 206.0, 239.0, 256.0, 67.0, 87.0, 93.0, 43.0, 230.0, 164.0, 193.0, 148.0, 222.0, 158.0, 138.0, 144.0, 171.0, 181.0, 241.0, 239.0, 93.0, 167.0, 152.0, 164.0, 263.0, 195.0, 147.0, 179.0, 305.0, 302.0, 199.0, 156.0, 201.0, 202.0, 53.0, 144.0, 131.0, 215.0, 347.0, 204.0, 241.0, 209.0, 112.0, 85.0, 238.0, 207.0, 158.0, 193.0, 185.0, 244.0, 185.0, 194.0, 193.0, 170.0, 9.0, 253.0, 115.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [75.0, 92.0, 131.0, 133.0, 79.0, 97.0, 128.0, 113.0, 160.0, 173.0, 35.0, 47.0, 111.0, 102.0, 74.0, 71.0, 78.0, 89.0, 83.0, 105.0, 133.0, 126.0, 161.0, 146.0, 37.0, 37.0, 94.0, 119.0, 155.0, 144.0, 142.0, 142.0, 86.0, 107.0, 65.0, 71.0, 79.0, 82.0, 91.0, 97.0, 129.0, 109.0, 34.0, 45.0, 103.0, 90.0, 128.0, 113.0, 73.0, 60.0, 88.0, 84.0, 120.0, 118.0, 86.0, 82.0, 128.0, 128.0, 113.0, 100.0, 108.0, 88.0, 95.0, 117.0, 76.0, 91.0, 65.0, 45.0, 68.0, 87.0, 64.0, 44.0, 65.0, 55.0, 50.0, 65.0, 83.0, 92.0, 134.0, 136.0, 59.0, 59.0, 44.0, 46.0, 107.0, 132.0, 129.0, 138.0, 93.0, 113.0, 119.0, 120.0, 122.0, 134.0, 36.0, 31.0, 36.0, 51.0, 45.0, 48.0, 25.0, 18.0, 113.0, 117.0, 72.0, 92.0, 90.0, 103.0, 82.0, 66.0, 123.0, 99.0, 82.0, 76.0, 52.0, 86.0, 72.0, 72.0, 84.0, 87.0, 93.0, 88.0, 130.0, 111.0, 138.0, 101.0, 45.0, 48.0, 79.0, 88.0, 66.0, 86.0, 79.0, 85.0, 138.0, 125.0, 87.0, 108.0, 90.0, 57.0, 94.0, 85.0, 140.0, 165.0, 160.0, 142.0, 93.0, 106.0, 81.0, 75.0, 93.0, 108.0, 97.0, 105.0, 25.0, 28.0, 69.0, 75.0, 63.0, 68.0, 127.0, 88.0, 170.0, 177.0, 97.0, 107.0, 122.0, 119.0, 103.0, 106.0, 58.0, 54.0, 40.0, 45.0, 110.0, 128.0, 105.0, 102.0, 66.0, 92.0, 83.0, 110.0, 94.0, 91.0, 119.0, 125.0, 104.0, 81.0, 102.0, 92.0, 91.0, 102.0, 83.0, 87.0, 6.0, 3.0, 123.0, 130.0, 64.0, 51.0]}, "sampler_perf": {"mean_env_wait_ms": 2.9840674048083304, "mean_processing_ms": 0.6627022470508764, "mean_inference_ms": 3.532781736604636}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1440000, "num_steps_sampled": 768000, "sample_time_ms": 22393.025, "load_time_ms": 37.431, "grad_time_ms": 8732.011, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.4694470036524025e-19, "cur_lr": 0.0010000000474974513, "total_loss": -0.0032793853897601366, "policy_loss": -0.0061605386435985565, "vf_loss": 36.42392349243164, "vf_explained_var": 0.6542922854423523, "kl": 0.0015746770659461617, "entropy": 1.5224775075912476, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 768000, "episodes_total": 1920, "training_iteration": 60, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-14-51", "timestamp": 1660248891, "time_this_iter_s": 31.919984817504883, "time_total_s": 7311.265342950821, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7311.265342950821, "timesteps_since_restore": 768000, "iterations_since_restore": 60, "perf": {"cpu_util_percent": 38.15, "ram_util_percent": 57.70217391304345}}
-{"episode_reward_max": 347.0, "episode_reward_min": 9.0, "episode_reward_mean": 199.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 185.0}, "policy_reward_mean": {"ppo": 99.975}, "custom_metrics": {"sparse_reward_mean": 59.6, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 80.75, "shaped_reward_min": 9, "shaped_reward_max": 124, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.59, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 9.82, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 7.86, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.95, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.32, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.42, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.68, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.78, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 6.94, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 8.02, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 5.14, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.61, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.24, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.61, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.59, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.19, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.78, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 2.59, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.44, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.48, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.28, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 6.94, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 8.02, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.94, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 8.02, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [255.0, 247.0, 262.0, 209.0, 147.0, 316.0, 267.0, 155.0, 275.0, 139.0, 256.0, 341.0, 209.0, 204.0, 250.0, 164.0, 86.0, 301.0, 247.0, 249.0, 244.0, 136.0, 123.0, 185.0, 167.0, 99.0, 153.0, 264.0, 210.0, 298.0, 150.0, 241.0, 167.0, 152.0, 164.0, 263.0, 195.0, 147.0, 179.0, 305.0, 302.0, 199.0, 156.0, 201.0, 202.0, 53.0, 144.0, 131.0, 215.0, 347.0, 204.0, 241.0, 209.0, 112.0, 85.0, 238.0, 207.0, 158.0, 193.0, 185.0, 244.0, 185.0, 194.0, 193.0, 170.0, 9.0, 253.0, 115.0, 167.0, 264.0, 176.0, 241.0, 333.0, 82.0, 213.0, 145.0, 167.0, 188.0, 259.0, 307.0, 74.0, 213.0, 299.0, 284.0, 193.0, 136.0, 161.0, 188.0, 238.0, 79.0, 193.0, 241.0, 133.0, 172.0, 238.0, 168.0, 256.0, 213.0, 196.0, 212.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [115.0, 140.0, 119.0, 128.0, 140.0, 122.0, 100.0, 109.0, 68.0, 79.0, 153.0, 163.0, 127.0, 140.0, 69.0, 86.0, 150.0, 125.0, 78.0, 61.0, 138.0, 118.0, 185.0, 156.0, 118.0, 91.0, 101.0, 103.0, 120.0, 130.0, 91.0, 73.0, 53.0, 33.0, 141.0, 160.0, 135.0, 112.0, 116.0, 133.0, 108.0, 136.0, 63.0, 73.0, 66.0, 57.0, 97.0, 88.0, 87.0, 80.0, 36.0, 63.0, 84.0, 69.0, 127.0, 137.0, 114.0, 96.0, 136.0, 162.0, 76.0, 74.0, 128.0, 113.0, 79.0, 88.0, 66.0, 86.0, 79.0, 85.0, 138.0, 125.0, 87.0, 108.0, 90.0, 57.0, 94.0, 85.0, 140.0, 165.0, 160.0, 142.0, 93.0, 106.0, 81.0, 75.0, 93.0, 108.0, 97.0, 105.0, 25.0, 28.0, 69.0, 75.0, 63.0, 68.0, 127.0, 88.0, 170.0, 177.0, 97.0, 107.0, 122.0, 119.0, 103.0, 106.0, 58.0, 54.0, 40.0, 45.0, 110.0, 128.0, 105.0, 102.0, 66.0, 92.0, 83.0, 110.0, 94.0, 91.0, 119.0, 125.0, 104.0, 81.0, 102.0, 92.0, 91.0, 102.0, 83.0, 87.0, 6.0, 3.0, 123.0, 130.0, 64.0, 51.0, 75.0, 92.0, 131.0, 133.0, 79.0, 97.0, 128.0, 113.0, 160.0, 173.0, 35.0, 47.0, 111.0, 102.0, 74.0, 71.0, 78.0, 89.0, 83.0, 105.0, 133.0, 126.0, 161.0, 146.0, 37.0, 37.0, 94.0, 119.0, 155.0, 144.0, 142.0, 142.0, 86.0, 107.0, 65.0, 71.0, 79.0, 82.0, 91.0, 97.0, 129.0, 109.0, 34.0, 45.0, 103.0, 90.0, 128.0, 113.0, 73.0, 60.0, 88.0, 84.0, 120.0, 118.0, 86.0, 82.0, 128.0, 128.0, 113.0, 100.0, 108.0, 88.0, 95.0, 117.0]}, "sampler_perf": {"mean_env_wait_ms": 2.942382408677298, "mean_processing_ms": 0.6544447985694, "mean_inference_ms": 3.4920936282643287}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1464000, "num_steps_sampled": 780800, "sample_time_ms": 22508.979, "load_time_ms": 37.523, "grad_time_ms": 8924.852, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.7347235018262012e-19, "cur_lr": 0.0010000000474974513, "total_loss": -0.0013272188371047378, "policy_loss": -0.004394210409373045, "vf_loss": 38.1645622253418, "vf_explained_var": 0.6507807374000549, "kl": 0.002042042789980769, "entropy": 1.4989361763000488, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 780800, "episodes_total": 1952, "training_iteration": 61, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-15-24", "timestamp": 1660248924, "time_this_iter_s": 33.02385997772217, "time_total_s": 7344.289202928543, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7344.289202928543, "timesteps_since_restore": 780800, "iterations_since_restore": 61, "perf": {"cpu_util_percent": 36.91521739130434, "ram_util_percent": 57.791304347826106}}
-{"episode_reward_max": 341.0, "episode_reward_min": 9.0, "episode_reward_mean": 206.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 185.0}, "policy_reward_mean": {"ppo": 103.06}, "custom_metrics": {"sparse_reward_mean": 60.8, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 84.52, "shaped_reward_min": 9, "shaped_reward_max": 130, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.75, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 10.13, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 8.0, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 9.26, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.38, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.63, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.8, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 7.2, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 8.37, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 5.08, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.7, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.62, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.3, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.76, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.61, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.54, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.3, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.86, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.76, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.61, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.44, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.2, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 7.2, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 8.37, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 7.2, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 8.37, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [176.0, 235.0, 158.0, 301.0, 201.0, 221.0, 176.0, 223.0, 169.0, 313.0, 196.0, 215.0, 118.0, 166.0, 290.0, 81.0, 145.0, 196.0, 161.0, 310.0, 256.0, 298.0, 244.0, 110.0, 319.0, 179.0, 152.0, 207.0, 301.0, 127.0, 307.0, 236.0, 170.0, 9.0, 253.0, 115.0, 167.0, 264.0, 176.0, 241.0, 333.0, 82.0, 213.0, 145.0, 167.0, 188.0, 259.0, 307.0, 74.0, 213.0, 299.0, 284.0, 193.0, 136.0, 161.0, 188.0, 238.0, 79.0, 193.0, 241.0, 133.0, 172.0, 238.0, 168.0, 256.0, 213.0, 196.0, 212.0, 255.0, 247.0, 262.0, 209.0, 147.0, 316.0, 267.0, 155.0, 275.0, 139.0, 256.0, 341.0, 209.0, 204.0, 250.0, 164.0, 86.0, 301.0, 247.0, 249.0, 244.0, 136.0, 123.0, 185.0, 167.0, 99.0, 153.0, 264.0, 210.0, 298.0, 150.0, 241.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [83.0, 93.0, 110.0, 125.0, 80.0, 78.0, 156.0, 145.0, 100.0, 101.0, 106.0, 115.0, 82.0, 94.0, 107.0, 116.0, 99.0, 70.0, 161.0, 152.0, 102.0, 94.0, 115.0, 100.0, 58.0, 60.0, 84.0, 82.0, 150.0, 140.0, 47.0, 34.0, 71.0, 74.0, 99.0, 97.0, 72.0, 89.0, 139.0, 171.0, 139.0, 117.0, 152.0, 146.0, 119.0, 125.0, 53.0, 57.0, 144.0, 175.0, 94.0, 85.0, 67.0, 85.0, 100.0, 107.0, 133.0, 168.0, 50.0, 77.0, 165.0, 142.0, 102.0, 134.0, 83.0, 87.0, 6.0, 3.0, 123.0, 130.0, 64.0, 51.0, 75.0, 92.0, 131.0, 133.0, 79.0, 97.0, 128.0, 113.0, 160.0, 173.0, 35.0, 47.0, 111.0, 102.0, 74.0, 71.0, 78.0, 89.0, 83.0, 105.0, 133.0, 126.0, 161.0, 146.0, 37.0, 37.0, 94.0, 119.0, 155.0, 144.0, 142.0, 142.0, 86.0, 107.0, 65.0, 71.0, 79.0, 82.0, 91.0, 97.0, 129.0, 109.0, 34.0, 45.0, 103.0, 90.0, 128.0, 113.0, 73.0, 60.0, 88.0, 84.0, 120.0, 118.0, 86.0, 82.0, 128.0, 128.0, 113.0, 100.0, 108.0, 88.0, 95.0, 117.0, 115.0, 140.0, 119.0, 128.0, 140.0, 122.0, 100.0, 109.0, 68.0, 79.0, 153.0, 163.0, 127.0, 140.0, 69.0, 86.0, 150.0, 125.0, 78.0, 61.0, 138.0, 118.0, 185.0, 156.0, 118.0, 91.0, 101.0, 103.0, 120.0, 130.0, 91.0, 73.0, 53.0, 33.0, 141.0, 160.0, 135.0, 112.0, 116.0, 133.0, 108.0, 136.0, 63.0, 73.0, 66.0, 57.0, 97.0, 88.0, 87.0, 80.0, 36.0, 63.0, 84.0, 69.0, 127.0, 137.0, 114.0, 96.0, 136.0, 162.0, 76.0, 74.0, 128.0, 113.0]}, "sampler_perf": {"mean_env_wait_ms": 2.902001918125836, "mean_processing_ms": 0.6464539836010921, "mean_inference_ms": 3.4531930074110506}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1488000, "num_steps_sampled": 793600, "sample_time_ms": 22714.251, "load_time_ms": 37.548, "grad_time_ms": 9167.09, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 8.673617509131006e-20, "cur_lr": 0.0010000000474974513, "total_loss": -0.0036004248540848494, "policy_loss": -0.006528293248265982, "vf_loss": 36.78936767578125, "vf_explained_var": 0.6745734810829163, "kl": 0.0014449331210926175, "entropy": 1.5021357536315918, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 793600, "episodes_total": 1984, "training_iteration": 62, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-15-59", "timestamp": 1660248959, "time_this_iter_s": 35.12303113937378, "time_total_s": 7379.412234067917, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7379.412234067917, "timesteps_since_restore": 793600, "iterations_since_restore": 62, "perf": {"cpu_util_percent": 35.66, "ram_util_percent": 57.732}}
-{"episode_reward_max": 368.0, "episode_reward_min": 9.0, "episode_reward_mean": 220.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 186.0}, "policy_reward_mean": {"ppo": 110.09}, "custom_metrics": {"sparse_reward_mean": 65.8, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 88.58, "shaped_reward_min": 9, "shaped_reward_max": 130, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.92, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 10.43, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 8.21, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 9.74, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.6, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 7.47, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 8.83, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 5.07, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.8, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.74, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.42, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.75, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.6, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.29, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.14, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.73, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.87, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.48, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.21, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 7.47, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 8.83, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 7.47, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 8.83, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [316.0, 212.0, 368.0, 304.0, 253.0, 351.0, 178.0, 9.0, 122.0, 253.0, 164.0, 253.0, 213.0, 250.0, 179.0, 219.0, 330.0, 169.0, 241.0, 207.0, 319.0, 241.0, 264.0, 284.0, 307.0, 139.0, 201.0, 267.0, 71.0, 267.0, 241.0, 313.0, 256.0, 213.0, 196.0, 212.0, 255.0, 247.0, 262.0, 209.0, 147.0, 316.0, 267.0, 155.0, 275.0, 139.0, 256.0, 341.0, 209.0, 204.0, 250.0, 164.0, 86.0, 301.0, 247.0, 249.0, 244.0, 136.0, 123.0, 185.0, 167.0, 99.0, 153.0, 264.0, 210.0, 298.0, 150.0, 241.0, 176.0, 235.0, 158.0, 301.0, 201.0, 221.0, 176.0, 223.0, 169.0, 313.0, 196.0, 215.0, 118.0, 166.0, 290.0, 81.0, 145.0, 196.0, 161.0, 310.0, 256.0, 298.0, 244.0, 110.0, 319.0, 179.0, 152.0, 207.0, 301.0, 127.0, 307.0, 236.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [149.0, 167.0, 107.0, 105.0, 182.0, 186.0, 140.0, 164.0, 124.0, 129.0, 185.0, 166.0, 85.0, 93.0, 6.0, 3.0, 52.0, 70.0, 111.0, 142.0, 55.0, 109.0, 116.0, 137.0, 97.0, 116.0, 129.0, 121.0, 80.0, 99.0, 120.0, 99.0, 166.0, 164.0, 80.0, 89.0, 108.0, 133.0, 98.0, 109.0, 170.0, 149.0, 124.0, 117.0, 125.0, 139.0, 135.0, 149.0, 151.0, 156.0, 71.0, 68.0, 97.0, 104.0, 128.0, 139.0, 34.0, 37.0, 122.0, 145.0, 131.0, 110.0, 163.0, 150.0, 128.0, 128.0, 113.0, 100.0, 108.0, 88.0, 95.0, 117.0, 115.0, 140.0, 119.0, 128.0, 140.0, 122.0, 100.0, 109.0, 68.0, 79.0, 153.0, 163.0, 127.0, 140.0, 69.0, 86.0, 150.0, 125.0, 78.0, 61.0, 138.0, 118.0, 185.0, 156.0, 118.0, 91.0, 101.0, 103.0, 120.0, 130.0, 91.0, 73.0, 53.0, 33.0, 141.0, 160.0, 135.0, 112.0, 116.0, 133.0, 108.0, 136.0, 63.0, 73.0, 66.0, 57.0, 97.0, 88.0, 87.0, 80.0, 36.0, 63.0, 84.0, 69.0, 127.0, 137.0, 114.0, 96.0, 136.0, 162.0, 76.0, 74.0, 128.0, 113.0, 83.0, 93.0, 110.0, 125.0, 80.0, 78.0, 156.0, 145.0, 100.0, 101.0, 106.0, 115.0, 82.0, 94.0, 107.0, 116.0, 99.0, 70.0, 161.0, 152.0, 102.0, 94.0, 115.0, 100.0, 58.0, 60.0, 84.0, 82.0, 150.0, 140.0, 47.0, 34.0, 71.0, 74.0, 99.0, 97.0, 72.0, 89.0, 139.0, 171.0, 139.0, 117.0, 152.0, 146.0, 119.0, 125.0, 53.0, 57.0, 144.0, 175.0, 94.0, 85.0, 67.0, 85.0, 100.0, 107.0, 133.0, 168.0, 50.0, 77.0, 165.0, 142.0, 102.0, 134.0]}, "sampler_perf": {"mean_env_wait_ms": 2.8628696517904486, "mean_processing_ms": 0.6387085923696654, "mean_inference_ms": 3.4155991200139666}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1512000, "num_steps_sampled": 806400, "sample_time_ms": 22536.552, "load_time_ms": 37.579, "grad_time_ms": 9310.446, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 4.336808754565503e-20, "cur_lr": 0.0010000000474974513, "total_loss": -0.0014393635792657733, "policy_loss": -0.0051459651440382, "vf_loss": 44.491573333740234, "vf_explained_var": 0.6412068009376526, "kl": 0.001486484077759087, "entropy": 1.485115885734558, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 806400, "episodes_total": 2016, "training_iteration": 63, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-16-31", "timestamp": 1660248991, "time_this_iter_s": 32.43239998817444, "time_total_s": 7411.844634056091, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7411.844634056091, "timesteps_since_restore": 806400, "iterations_since_restore": 63, "perf": {"cpu_util_percent": 36.01739130434783, "ram_util_percent": 57.59347826086956}}
-{"episode_reward_max": 368.0, "episode_reward_min": 9.0, "episode_reward_mean": 232.66, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 190.0}, "policy_reward_mean": {"ppo": 116.33}, "custom_metrics": {"sparse_reward_mean": 70.6, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 91.46, "shaped_reward_min": 9, "shaped_reward_max": 130, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 9.53, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 10.35, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 8.77, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 9.57, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.64, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.72, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.08, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 8.75, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.7, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.59, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.51, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.48, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.22, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.44, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.67, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 3.04, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.42, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.32, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 8.08, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 8.75, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.08, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 8.75, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [55.0, 264.0, 127.0, 313.0, 163.0, 287.0, 298.0, 216.0, 341.0, 287.0, 235.0, 298.0, 204.0, 295.0, 225.0, 218.0, 287.0, 215.0, 313.0, 255.0, 367.0, 239.0, 210.0, 367.0, 258.0, 252.0, 353.0, 128.0, 178.0, 284.0, 270.0, 273.0, 210.0, 298.0, 150.0, 241.0, 176.0, 235.0, 158.0, 301.0, 201.0, 221.0, 176.0, 223.0, 169.0, 313.0, 196.0, 215.0, 118.0, 166.0, 290.0, 81.0, 145.0, 196.0, 161.0, 310.0, 256.0, 298.0, 244.0, 110.0, 319.0, 179.0, 152.0, 207.0, 301.0, 127.0, 307.0, 236.0, 316.0, 212.0, 368.0, 304.0, 253.0, 351.0, 178.0, 9.0, 122.0, 253.0, 164.0, 253.0, 213.0, 250.0, 179.0, 219.0, 330.0, 169.0, 241.0, 207.0, 319.0, 241.0, 264.0, 284.0, 307.0, 139.0, 201.0, 267.0, 71.0, 267.0, 241.0, 313.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [26.0, 29.0, 134.0, 130.0, 63.0, 64.0, 144.0, 169.0, 76.0, 87.0, 142.0, 145.0, 131.0, 167.0, 113.0, 103.0, 152.0, 189.0, 161.0, 126.0, 111.0, 124.0, 164.0, 134.0, 105.0, 99.0, 133.0, 162.0, 119.0, 106.0, 110.0, 108.0, 153.0, 134.0, 100.0, 115.0, 147.0, 166.0, 129.0, 126.0, 190.0, 177.0, 122.0, 117.0, 107.0, 103.0, 189.0, 178.0, 118.0, 140.0, 117.0, 135.0, 188.0, 165.0, 60.0, 68.0, 86.0, 92.0, 143.0, 141.0, 128.0, 142.0, 131.0, 142.0, 114.0, 96.0, 136.0, 162.0, 76.0, 74.0, 128.0, 113.0, 83.0, 93.0, 110.0, 125.0, 80.0, 78.0, 156.0, 145.0, 100.0, 101.0, 106.0, 115.0, 82.0, 94.0, 107.0, 116.0, 99.0, 70.0, 161.0, 152.0, 102.0, 94.0, 115.0, 100.0, 58.0, 60.0, 84.0, 82.0, 150.0, 140.0, 47.0, 34.0, 71.0, 74.0, 99.0, 97.0, 72.0, 89.0, 139.0, 171.0, 139.0, 117.0, 152.0, 146.0, 119.0, 125.0, 53.0, 57.0, 144.0, 175.0, 94.0, 85.0, 67.0, 85.0, 100.0, 107.0, 133.0, 168.0, 50.0, 77.0, 165.0, 142.0, 102.0, 134.0, 149.0, 167.0, 107.0, 105.0, 182.0, 186.0, 140.0, 164.0, 124.0, 129.0, 185.0, 166.0, 85.0, 93.0, 6.0, 3.0, 52.0, 70.0, 111.0, 142.0, 55.0, 109.0, 116.0, 137.0, 97.0, 116.0, 129.0, 121.0, 80.0, 99.0, 120.0, 99.0, 166.0, 164.0, 80.0, 89.0, 108.0, 133.0, 98.0, 109.0, 170.0, 149.0, 124.0, 117.0, 125.0, 139.0, 135.0, 149.0, 151.0, 156.0, 71.0, 68.0, 97.0, 104.0, 128.0, 139.0, 34.0, 37.0, 122.0, 145.0, 131.0, 110.0, 163.0, 150.0]}, "sampler_perf": {"mean_env_wait_ms": 2.824862261482626, "mean_processing_ms": 0.6311660417813414, "mean_inference_ms": 3.3776913519982266}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1536000, "num_steps_sampled": 819200, "sample_time_ms": 21992.331, "load_time_ms": 37.617, "grad_time_ms": 9517.458, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.1684043772827515e-20, "cur_lr": 0.0010000000474974513, "total_loss": -0.0046628438867628574, "policy_loss": -0.008121621794998646, "vf_loss": 41.953346252441406, "vf_explained_var": 0.6826162934303284, "kl": 0.0015492010861635208, "entropy": 1.4731155633926392, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 819200, "episodes_total": 2048, "training_iteration": 64, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-17-00", "timestamp": 1660249020, "time_this_iter_s": 28.55878710746765, "time_total_s": 7440.403421163559, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7440.403421163559, "timesteps_since_restore": 819200, "iterations_since_restore": 64, "perf": {"cpu_util_percent": 38.065, "ram_util_percent": 57.504999999999995}}
-{"episode_reward_max": 390.0, "episode_reward_min": 9.0, "episode_reward_mean": 243.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 201.0}, "policy_reward_mean": {"ppo": 121.615}, "custom_metrics": {"sparse_reward_mean": 74.8, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 93.63, "shaped_reward_min": 9, "shaped_reward_max": 130, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 9.88, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 10.52, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 9.11, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 9.77, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.62, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.73, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.38, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 8.85, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.62, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.54, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.52, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.24, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 2.71, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 3.06, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.41, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.36, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 8.38, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 8.85, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.38, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 8.85, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [207.0, 187.0, 252.0, 141.0, 266.0, 299.0, 182.0, 344.0, 352.0, 232.0, 324.0, 260.0, 182.0, 336.0, 170.0, 193.0, 239.0, 255.0, 212.0, 295.0, 267.0, 166.0, 142.0, 65.0, 163.0, 330.0, 361.0, 390.0, 290.0, 259.0, 159.0, 252.0, 301.0, 127.0, 307.0, 236.0, 316.0, 212.0, 368.0, 304.0, 253.0, 351.0, 178.0, 9.0, 122.0, 253.0, 164.0, 253.0, 213.0, 250.0, 179.0, 219.0, 330.0, 169.0, 241.0, 207.0, 319.0, 241.0, 264.0, 284.0, 307.0, 139.0, 201.0, 267.0, 71.0, 267.0, 241.0, 313.0, 55.0, 264.0, 127.0, 313.0, 163.0, 287.0, 298.0, 216.0, 341.0, 287.0, 235.0, 298.0, 204.0, 295.0, 225.0, 218.0, 287.0, 215.0, 313.0, 255.0, 367.0, 239.0, 210.0, 367.0, 258.0, 252.0, 353.0, 128.0, 178.0, 284.0, 270.0, 273.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [99.0, 108.0, 93.0, 94.0, 123.0, 129.0, 76.0, 65.0, 128.0, 138.0, 144.0, 155.0, 95.0, 87.0, 161.0, 183.0, 182.0, 170.0, 124.0, 108.0, 170.0, 154.0, 119.0, 141.0, 89.0, 93.0, 165.0, 171.0, 97.0, 73.0, 100.0, 93.0, 134.0, 105.0, 127.0, 128.0, 103.0, 109.0, 155.0, 140.0, 118.0, 149.0, 82.0, 84.0, 61.0, 81.0, 32.0, 33.0, 89.0, 74.0, 155.0, 175.0, 168.0, 193.0, 201.0, 189.0, 137.0, 153.0, 121.0, 138.0, 84.0, 75.0, 121.0, 131.0, 133.0, 168.0, 50.0, 77.0, 165.0, 142.0, 102.0, 134.0, 149.0, 167.0, 107.0, 105.0, 182.0, 186.0, 140.0, 164.0, 124.0, 129.0, 185.0, 166.0, 85.0, 93.0, 6.0, 3.0, 52.0, 70.0, 111.0, 142.0, 55.0, 109.0, 116.0, 137.0, 97.0, 116.0, 129.0, 121.0, 80.0, 99.0, 120.0, 99.0, 166.0, 164.0, 80.0, 89.0, 108.0, 133.0, 98.0, 109.0, 170.0, 149.0, 124.0, 117.0, 125.0, 139.0, 135.0, 149.0, 151.0, 156.0, 71.0, 68.0, 97.0, 104.0, 128.0, 139.0, 34.0, 37.0, 122.0, 145.0, 131.0, 110.0, 163.0, 150.0, 26.0, 29.0, 134.0, 130.0, 63.0, 64.0, 144.0, 169.0, 76.0, 87.0, 142.0, 145.0, 131.0, 167.0, 113.0, 103.0, 152.0, 189.0, 161.0, 126.0, 111.0, 124.0, 164.0, 134.0, 105.0, 99.0, 133.0, 162.0, 119.0, 106.0, 110.0, 108.0, 153.0, 134.0, 100.0, 115.0, 147.0, 166.0, 129.0, 126.0, 190.0, 177.0, 122.0, 117.0, 107.0, 103.0, 189.0, 178.0, 118.0, 140.0, 117.0, 135.0, 188.0, 165.0, 60.0, 68.0, 86.0, 92.0, 143.0, 141.0, 128.0, 142.0, 131.0, 142.0]}, "sampler_perf": {"mean_env_wait_ms": 2.787957702502074, "mean_processing_ms": 0.623833190465171, "mean_inference_ms": 3.33934185787935}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1560000, "num_steps_sampled": 832000, "sample_time_ms": 21829.313, "load_time_ms": 37.489, "grad_time_ms": 9503.726, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.0842021886413758e-20, "cur_lr": 0.0010000000474974513, "total_loss": -0.0026782825589179993, "policy_loss": -0.0062830038368701935, "vf_loss": 43.38319778442383, "vf_explained_var": 0.6999297738075256, "kl": 0.0015296392375603318, "entropy": 1.4671941995620728, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 832000, "episodes_total": 2080, "training_iteration": 65, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-17-27", "timestamp": 1660249047, "time_this_iter_s": 27.39732599258423, "time_total_s": 7467.800747156143, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7467.800747156143, "timesteps_since_restore": 832000, "iterations_since_restore": 65, "perf": {"cpu_util_percent": 36.88717948717949, "ram_util_percent": 57.59230769230768}}
-{"episode_reward_max": 402.0, "episode_reward_min": 55.0, "episode_reward_mean": 253.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 26.0}, "policy_reward_max": {"ppo": 201.0}, "policy_reward_mean": {"ppo": 126.685}, "custom_metrics": {"sparse_reward_mean": 79.0, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 95.37, "shaped_reward_min": 31, "shaped_reward_max": 133, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.36, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 10.55, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 9.6, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 9.72, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.33, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.56, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.73, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 8.84, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 8.85, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 4.65, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.47, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.93, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.47, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.55, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.55, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.36, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.02, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.86, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.4, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.4, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 8.84, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 8.85, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.84, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 8.85, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [282.0, 319.0, 247.0, 267.0, 244.0, 256.0, 305.0, 402.0, 352.0, 210.0, 139.0, 185.0, 293.0, 324.0, 164.0, 244.0, 345.0, 293.0, 333.0, 339.0, 150.0, 206.0, 344.0, 132.0, 204.0, 319.0, 258.0, 307.0, 250.0, 282.0, 298.0, 305.0, 71.0, 267.0, 241.0, 313.0, 55.0, 264.0, 127.0, 313.0, 163.0, 287.0, 298.0, 216.0, 341.0, 287.0, 235.0, 298.0, 204.0, 295.0, 225.0, 218.0, 287.0, 215.0, 313.0, 255.0, 367.0, 239.0, 210.0, 367.0, 258.0, 252.0, 353.0, 128.0, 178.0, 284.0, 270.0, 273.0, 207.0, 187.0, 252.0, 141.0, 266.0, 299.0, 182.0, 344.0, 352.0, 232.0, 324.0, 260.0, 182.0, 336.0, 170.0, 193.0, 239.0, 255.0, 212.0, 295.0, 267.0, 166.0, 142.0, 65.0, 163.0, 330.0, 361.0, 390.0, 290.0, 259.0, 159.0, 252.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [151.0, 131.0, 158.0, 161.0, 122.0, 125.0, 134.0, 133.0, 112.0, 132.0, 130.0, 126.0, 153.0, 152.0, 201.0, 201.0, 173.0, 179.0, 99.0, 111.0, 72.0, 67.0, 97.0, 88.0, 152.0, 141.0, 168.0, 156.0, 85.0, 79.0, 125.0, 119.0, 168.0, 177.0, 139.0, 154.0, 165.0, 168.0, 182.0, 157.0, 82.0, 68.0, 89.0, 117.0, 170.0, 174.0, 73.0, 59.0, 112.0, 92.0, 136.0, 183.0, 119.0, 139.0, 162.0, 145.0, 119.0, 131.0, 150.0, 132.0, 167.0, 131.0, 158.0, 147.0, 34.0, 37.0, 122.0, 145.0, 131.0, 110.0, 163.0, 150.0, 26.0, 29.0, 134.0, 130.0, 63.0, 64.0, 144.0, 169.0, 76.0, 87.0, 142.0, 145.0, 131.0, 167.0, 113.0, 103.0, 152.0, 189.0, 161.0, 126.0, 111.0, 124.0, 164.0, 134.0, 105.0, 99.0, 133.0, 162.0, 119.0, 106.0, 110.0, 108.0, 153.0, 134.0, 100.0, 115.0, 147.0, 166.0, 129.0, 126.0, 190.0, 177.0, 122.0, 117.0, 107.0, 103.0, 189.0, 178.0, 118.0, 140.0, 117.0, 135.0, 188.0, 165.0, 60.0, 68.0, 86.0, 92.0, 143.0, 141.0, 128.0, 142.0, 131.0, 142.0, 99.0, 108.0, 93.0, 94.0, 123.0, 129.0, 76.0, 65.0, 128.0, 138.0, 144.0, 155.0, 95.0, 87.0, 161.0, 183.0, 182.0, 170.0, 124.0, 108.0, 170.0, 154.0, 119.0, 141.0, 89.0, 93.0, 165.0, 171.0, 97.0, 73.0, 100.0, 93.0, 134.0, 105.0, 127.0, 128.0, 103.0, 109.0, 155.0, 140.0, 118.0, 149.0, 82.0, 84.0, 61.0, 81.0, 32.0, 33.0, 89.0, 74.0, 155.0, 175.0, 168.0, 193.0, 201.0, 189.0, 137.0, 153.0, 121.0, 138.0, 84.0, 75.0, 121.0, 131.0]}, "sampler_perf": {"mean_env_wait_ms": 2.7521510850720086, "mean_processing_ms": 0.6167185711534096, "mean_inference_ms": 3.3015786839102956}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1584000, "num_steps_sampled": 844800, "sample_time_ms": 21629.389, "load_time_ms": 37.73, "grad_time_ms": 9476.984, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 5.421010943206879e-21, "cur_lr": 0.0010000000474974513, "total_loss": -0.005265243351459503, "policy_loss": -0.009142073802649975, "vf_loss": 46.01101303100586, "vf_explained_var": 0.713275671005249, "kl": 0.001622045412659645, "entropy": 1.4485527276992798, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 844800, "episodes_total": 2112, "training_iteration": 66, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-17-56", "timestamp": 1660249076, "time_this_iter_s": 28.3277370929718, "time_total_s": 7496.128484249115, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7496.128484249115, "timesteps_since_restore": 844800, "iterations_since_restore": 66, "perf": {"cpu_util_percent": 32.035000000000004, "ram_util_percent": 57.5875}}
-{"episode_reward_max": 421.0, "episode_reward_min": 23.0, "episode_reward_mean": 258.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 9.0}, "policy_reward_max": {"ppo": 211.0}, "policy_reward_mean": {"ppo": 129.21}, "custom_metrics": {"sparse_reward_mean": 81.6, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 95.22, "shaped_reward_min": 20, "shaped_reward_max": 144, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.07, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 10.87, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 9.37, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 10.02, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.53, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.75, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 8.65, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 9.14, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.79, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.54, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.81, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.57, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.52, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.59, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.24, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.05, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.87, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.45, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.29, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 8.65, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 9.14, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.65, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 9.14, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [218.0, 187.0, 60.0, 267.0, 71.0, 244.0, 238.0, 379.0, 373.0, 258.0, 339.0, 198.0, 330.0, 309.0, 358.0, 267.0, 293.0, 298.0, 264.0, 356.0, 309.0, 253.0, 236.0, 421.0, 287.0, 296.0, 384.0, 254.0, 264.0, 208.0, 23.0, 225.0, 178.0, 284.0, 270.0, 273.0, 207.0, 187.0, 252.0, 141.0, 266.0, 299.0, 182.0, 344.0, 352.0, 232.0, 324.0, 260.0, 182.0, 336.0, 170.0, 193.0, 239.0, 255.0, 212.0, 295.0, 267.0, 166.0, 142.0, 65.0, 163.0, 330.0, 361.0, 390.0, 290.0, 259.0, 159.0, 252.0, 282.0, 319.0, 247.0, 267.0, 244.0, 256.0, 305.0, 402.0, 352.0, 210.0, 139.0, 185.0, 293.0, 324.0, 164.0, 244.0, 345.0, 293.0, 333.0, 339.0, 150.0, 206.0, 344.0, 132.0, 204.0, 319.0, 258.0, 307.0, 250.0, 282.0, 298.0, 305.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [114.0, 104.0, 93.0, 94.0, 29.0, 31.0, 151.0, 116.0, 34.0, 37.0, 113.0, 131.0, 114.0, 124.0, 193.0, 186.0, 188.0, 185.0, 132.0, 126.0, 159.0, 180.0, 96.0, 102.0, 157.0, 173.0, 158.0, 151.0, 180.0, 178.0, 136.0, 131.0, 153.0, 140.0, 146.0, 152.0, 127.0, 137.0, 186.0, 170.0, 151.0, 158.0, 107.0, 146.0, 108.0, 128.0, 211.0, 210.0, 144.0, 143.0, 145.0, 151.0, 204.0, 180.0, 124.0, 130.0, 133.0, 131.0, 99.0, 109.0, 9.0, 14.0, 109.0, 116.0, 86.0, 92.0, 143.0, 141.0, 128.0, 142.0, 131.0, 142.0, 99.0, 108.0, 93.0, 94.0, 123.0, 129.0, 76.0, 65.0, 128.0, 138.0, 144.0, 155.0, 95.0, 87.0, 161.0, 183.0, 182.0, 170.0, 124.0, 108.0, 170.0, 154.0, 119.0, 141.0, 89.0, 93.0, 165.0, 171.0, 97.0, 73.0, 100.0, 93.0, 134.0, 105.0, 127.0, 128.0, 103.0, 109.0, 155.0, 140.0, 118.0, 149.0, 82.0, 84.0, 61.0, 81.0, 32.0, 33.0, 89.0, 74.0, 155.0, 175.0, 168.0, 193.0, 201.0, 189.0, 137.0, 153.0, 121.0, 138.0, 84.0, 75.0, 121.0, 131.0, 151.0, 131.0, 158.0, 161.0, 122.0, 125.0, 134.0, 133.0, 112.0, 132.0, 130.0, 126.0, 153.0, 152.0, 201.0, 201.0, 173.0, 179.0, 99.0, 111.0, 72.0, 67.0, 97.0, 88.0, 152.0, 141.0, 168.0, 156.0, 85.0, 79.0, 125.0, 119.0, 168.0, 177.0, 139.0, 154.0, 165.0, 168.0, 182.0, 157.0, 82.0, 68.0, 89.0, 117.0, 170.0, 174.0, 73.0, 59.0, 112.0, 92.0, 136.0, 183.0, 119.0, 139.0, 162.0, 145.0, 119.0, 131.0, 150.0, 132.0, 167.0, 131.0, 158.0, 147.0]}, "sampler_perf": {"mean_env_wait_ms": 2.717456296666147, "mean_processing_ms": 0.6098308335159816, "mean_inference_ms": 3.265231126103296}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1608000, "num_steps_sampled": 857600, "sample_time_ms": 21370.114, "load_time_ms": 37.697, "grad_time_ms": 9313.497, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.7105054716034394e-21, "cur_lr": 0.0010000000474974513, "total_loss": -0.0019875967409461737, "policy_loss": -0.006022992078214884, "vf_loss": 47.62739562988281, "vf_explained_var": 0.6981029510498047, "kl": 0.0015933552058413625, "entropy": 1.4546891450881958, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 857600, "episodes_total": 2144, "training_iteration": 67, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-18-22", "timestamp": 1660249102, "time_this_iter_s": 26.67682385444641, "time_total_s": 7522.805308103561, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7522.805308103561, "timesteps_since_restore": 857600, "iterations_since_restore": 67, "perf": {"cpu_util_percent": 34.505405405405405, "ram_util_percent": 57.59189189189188}}
-{"episode_reward_max": 421.0, "episode_reward_min": 23.0, "episode_reward_mean": 275.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 9.0}, "policy_reward_max": {"ppo": 211.0}, "policy_reward_mean": {"ppo": 137.5}, "custom_metrics": {"sparse_reward_mean": 88.8, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 97.4, "shaped_reward_min": 20, "shaped_reward_max": 144, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.21, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 11.29, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 9.56, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 10.48, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.53, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.74, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 8.75, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 9.63, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 5.05, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.81, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.75, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.62, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.54, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.52, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.73, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.19, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.17, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.87, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.46, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.29, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 8.75, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 9.63, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.75, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 9.63, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [330.0, 248.0, 387.0, 301.0, 298.0, 190.0, 359.0, 353.0, 296.0, 365.0, 293.0, 364.0, 236.0, 325.0, 321.0, 259.0, 303.0, 255.0, 273.0, 256.0, 261.0, 290.0, 307.0, 298.0, 359.0, 287.0, 261.0, 249.0, 255.0, 307.0, 176.0, 413.0, 290.0, 259.0, 159.0, 252.0, 282.0, 319.0, 247.0, 267.0, 244.0, 256.0, 305.0, 402.0, 352.0, 210.0, 139.0, 185.0, 293.0, 324.0, 164.0, 244.0, 345.0, 293.0, 333.0, 339.0, 150.0, 206.0, 344.0, 132.0, 204.0, 319.0, 258.0, 307.0, 250.0, 282.0, 298.0, 305.0, 218.0, 187.0, 60.0, 267.0, 71.0, 244.0, 238.0, 379.0, 373.0, 258.0, 339.0, 198.0, 330.0, 309.0, 358.0, 267.0, 293.0, 298.0, 264.0, 356.0, 309.0, 253.0, 236.0, 421.0, 287.0, 296.0, 384.0, 254.0, 264.0, 208.0, 23.0, 225.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [151.0, 179.0, 118.0, 130.0, 193.0, 194.0, 162.0, 139.0, 139.0, 159.0, 105.0, 85.0, 180.0, 179.0, 172.0, 181.0, 159.0, 137.0, 168.0, 197.0, 137.0, 156.0, 180.0, 184.0, 114.0, 122.0, 171.0, 154.0, 173.0, 148.0, 135.0, 124.0, 149.0, 154.0, 124.0, 131.0, 148.0, 125.0, 111.0, 145.0, 139.0, 122.0, 144.0, 146.0, 153.0, 154.0, 152.0, 146.0, 182.0, 177.0, 156.0, 131.0, 109.0, 152.0, 121.0, 128.0, 130.0, 125.0, 140.0, 167.0, 85.0, 91.0, 207.0, 206.0, 137.0, 153.0, 121.0, 138.0, 84.0, 75.0, 121.0, 131.0, 151.0, 131.0, 158.0, 161.0, 122.0, 125.0, 134.0, 133.0, 112.0, 132.0, 130.0, 126.0, 153.0, 152.0, 201.0, 201.0, 173.0, 179.0, 99.0, 111.0, 72.0, 67.0, 97.0, 88.0, 152.0, 141.0, 168.0, 156.0, 85.0, 79.0, 125.0, 119.0, 168.0, 177.0, 139.0, 154.0, 165.0, 168.0, 182.0, 157.0, 82.0, 68.0, 89.0, 117.0, 170.0, 174.0, 73.0, 59.0, 112.0, 92.0, 136.0, 183.0, 119.0, 139.0, 162.0, 145.0, 119.0, 131.0, 150.0, 132.0, 167.0, 131.0, 158.0, 147.0, 114.0, 104.0, 93.0, 94.0, 29.0, 31.0, 151.0, 116.0, 34.0, 37.0, 113.0, 131.0, 114.0, 124.0, 193.0, 186.0, 188.0, 185.0, 132.0, 126.0, 159.0, 180.0, 96.0, 102.0, 157.0, 173.0, 158.0, 151.0, 180.0, 178.0, 136.0, 131.0, 153.0, 140.0, 146.0, 152.0, 127.0, 137.0, 186.0, 170.0, 151.0, 158.0, 107.0, 146.0, 108.0, 128.0, 211.0, 210.0, 144.0, 143.0, 145.0, 151.0, 204.0, 180.0, 124.0, 130.0, 133.0, 131.0, 99.0, 109.0, 9.0, 14.0, 109.0, 116.0]}, "sampler_perf": {"mean_env_wait_ms": 2.683811918728699, "mean_processing_ms": 0.6031493203010873, "mean_inference_ms": 3.2301262692347574}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1632000, "num_steps_sampled": 870400, "sample_time_ms": 21057.05, "load_time_ms": 37.776, "grad_time_ms": 9180.759, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.3552527358017197e-21, "cur_lr": 0.0010000000474974513, "total_loss": -0.0049219937063753605, "policy_loss": -0.009040978737175465, "vf_loss": 48.340152740478516, "vf_explained_var": 0.6955335140228271, "kl": 0.0016705109737813473, "entropy": 1.430059552192688, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 870400, "episodes_total": 2176, "training_iteration": 68, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-18-49", "timestamp": 1660249129, "time_this_iter_s": 26.73872995376587, "time_total_s": 7549.544038057327, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7549.544038057327, "timesteps_since_restore": 870400, "iterations_since_restore": 68, "perf": {"cpu_util_percent": 34.623684210526314, "ram_util_percent": 57.58947368421052}}
-{"episode_reward_max": 462.0, "episode_reward_min": 23.0, "episode_reward_mean": 287.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 9.0}, "policy_reward_max": {"ppo": 236.0}, "policy_reward_mean": {"ppo": 143.99}, "custom_metrics": {"sparse_reward_mean": 94.8, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 98.38, "shaped_reward_min": 20, "shaped_reward_max": 144, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.38, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 11.18, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 9.77, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 10.46, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.55, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.59, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.86, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 9.74, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 16, "dish_pickup_agent_0_mean": 4.92, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.78, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.84, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.72, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.49, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.6, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.24, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.11, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.95, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.41, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.24, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 8.86, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 9.74, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 16, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.86, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 9.74, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 16, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [242.0, 301.0, 234.0, 344.0, 450.0, 312.0, 239.0, 344.0, 359.0, 253.0, 316.0, 255.0, 356.0, 171.0, 396.0, 398.0, 276.0, 402.0, 339.0, 338.0, 353.0, 462.0, 296.0, 237.0, 293.0, 298.0, 231.0, 250.0, 347.0, 63.0, 356.0, 210.0, 250.0, 282.0, 298.0, 305.0, 218.0, 187.0, 60.0, 267.0, 71.0, 244.0, 238.0, 379.0, 373.0, 258.0, 339.0, 198.0, 330.0, 309.0, 358.0, 267.0, 293.0, 298.0, 264.0, 356.0, 309.0, 253.0, 236.0, 421.0, 287.0, 296.0, 384.0, 254.0, 264.0, 208.0, 23.0, 225.0, 330.0, 248.0, 387.0, 301.0, 298.0, 190.0, 359.0, 353.0, 296.0, 365.0, 293.0, 364.0, 236.0, 325.0, 321.0, 259.0, 303.0, 255.0, 273.0, 256.0, 261.0, 290.0, 307.0, 298.0, 359.0, 287.0, 261.0, 249.0, 255.0, 307.0, 176.0, 413.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [122.0, 120.0, 142.0, 159.0, 117.0, 117.0, 178.0, 166.0, 214.0, 236.0, 150.0, 162.0, 115.0, 124.0, 174.0, 170.0, 187.0, 172.0, 114.0, 139.0, 169.0, 147.0, 122.0, 133.0, 172.0, 184.0, 83.0, 88.0, 186.0, 210.0, 190.0, 208.0, 137.0, 139.0, 193.0, 209.0, 182.0, 157.0, 177.0, 161.0, 193.0, 160.0, 236.0, 226.0, 147.0, 149.0, 125.0, 112.0, 138.0, 155.0, 144.0, 154.0, 111.0, 120.0, 136.0, 114.0, 170.0, 177.0, 23.0, 40.0, 176.0, 180.0, 108.0, 102.0, 119.0, 131.0, 150.0, 132.0, 167.0, 131.0, 158.0, 147.0, 114.0, 104.0, 93.0, 94.0, 29.0, 31.0, 151.0, 116.0, 34.0, 37.0, 113.0, 131.0, 114.0, 124.0, 193.0, 186.0, 188.0, 185.0, 132.0, 126.0, 159.0, 180.0, 96.0, 102.0, 157.0, 173.0, 158.0, 151.0, 180.0, 178.0, 136.0, 131.0, 153.0, 140.0, 146.0, 152.0, 127.0, 137.0, 186.0, 170.0, 151.0, 158.0, 107.0, 146.0, 108.0, 128.0, 211.0, 210.0, 144.0, 143.0, 145.0, 151.0, 204.0, 180.0, 124.0, 130.0, 133.0, 131.0, 99.0, 109.0, 9.0, 14.0, 109.0, 116.0, 151.0, 179.0, 118.0, 130.0, 193.0, 194.0, 162.0, 139.0, 139.0, 159.0, 105.0, 85.0, 180.0, 179.0, 172.0, 181.0, 159.0, 137.0, 168.0, 197.0, 137.0, 156.0, 180.0, 184.0, 114.0, 122.0, 171.0, 154.0, 173.0, 148.0, 135.0, 124.0, 149.0, 154.0, 124.0, 131.0, 148.0, 125.0, 111.0, 145.0, 139.0, 122.0, 144.0, 146.0, 153.0, 154.0, 152.0, 146.0, 182.0, 177.0, 156.0, 131.0, 109.0, 152.0, 121.0, 128.0, 130.0, 125.0, 140.0, 167.0, 85.0, 91.0, 207.0, 206.0]}, "sampler_perf": {"mean_env_wait_ms": 2.6511088970025942, "mean_processing_ms": 0.5966469783379228, "mean_inference_ms": 3.1956784822649578}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1656000, "num_steps_sampled": 883200, "sample_time_ms": 20666.066, "load_time_ms": 37.699, "grad_time_ms": 8951.146, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 6.776263679008599e-22, "cur_lr": 0.0010000000474974513, "total_loss": -0.007915745489299297, "policy_loss": -0.011840385384857655, "vf_loss": 46.363162994384766, "vf_explained_var": 0.7722532153129578, "kl": 0.0015700907679274678, "entropy": 1.423343300819397, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 883200, "episodes_total": 2208, "training_iteration": 69, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-19-16", "timestamp": 1660249156, "time_this_iter_s": 26.615740060806274, "time_total_s": 7576.1597781181335, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7576.1597781181335, "timesteps_since_restore": 883200, "iterations_since_restore": 69, "perf": {"cpu_util_percent": 33.539473684210535, "ram_util_percent": 57.605263157894726}}
-{"episode_reward_max": 462.0, "episode_reward_min": 23.0, "episode_reward_mean": 296.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 9.0}, "policy_reward_max": {"ppo": 236.0}, "policy_reward_mean": {"ppo": 148.435}, "custom_metrics": {"sparse_reward_mean": 98.6, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 99.67, "shaped_reward_min": 23, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.41, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 11.35, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 9.87, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 10.64, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.57, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.56, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.93, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 10.01, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.72, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.92, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.78, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.4, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.48, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.68, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.17, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.2, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.83, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.39, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.27, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 8.93, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 10.01, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.93, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 10.01, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [249.0, 68.0, 319.0, 402.0, 327.0, 293.0, 350.0, 396.0, 396.0, 247.0, 387.0, 134.0, 387.0, 301.0, 356.0, 365.0, 308.0, 344.0, 348.0, 333.0, 345.0, 359.0, 259.0, 307.0, 330.0, 269.0, 350.0, 247.0, 356.0, 302.0, 269.0, 68.0, 264.0, 208.0, 23.0, 225.0, 330.0, 248.0, 387.0, 301.0, 298.0, 190.0, 359.0, 353.0, 296.0, 365.0, 293.0, 364.0, 236.0, 325.0, 321.0, 259.0, 303.0, 255.0, 273.0, 256.0, 261.0, 290.0, 307.0, 298.0, 359.0, 287.0, 261.0, 249.0, 255.0, 307.0, 176.0, 413.0, 242.0, 301.0, 234.0, 344.0, 450.0, 312.0, 239.0, 344.0, 359.0, 253.0, 316.0, 255.0, 356.0, 171.0, 396.0, 398.0, 276.0, 402.0, 339.0, 338.0, 353.0, 462.0, 296.0, 237.0, 293.0, 298.0, 231.0, 250.0, 347.0, 63.0, 356.0, 210.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [129.0, 120.0, 28.0, 40.0, 165.0, 154.0, 191.0, 211.0, 161.0, 166.0, 152.0, 141.0, 179.0, 171.0, 222.0, 174.0, 205.0, 191.0, 131.0, 116.0, 188.0, 199.0, 60.0, 74.0, 212.0, 175.0, 147.0, 154.0, 170.0, 186.0, 185.0, 180.0, 146.0, 162.0, 167.0, 177.0, 177.0, 171.0, 153.0, 180.0, 169.0, 176.0, 189.0, 170.0, 133.0, 126.0, 139.0, 168.0, 176.0, 154.0, 142.0, 127.0, 183.0, 167.0, 124.0, 123.0, 176.0, 180.0, 142.0, 160.0, 133.0, 136.0, 28.0, 40.0, 133.0, 131.0, 99.0, 109.0, 9.0, 14.0, 109.0, 116.0, 151.0, 179.0, 118.0, 130.0, 193.0, 194.0, 162.0, 139.0, 139.0, 159.0, 105.0, 85.0, 180.0, 179.0, 172.0, 181.0, 159.0, 137.0, 168.0, 197.0, 137.0, 156.0, 180.0, 184.0, 114.0, 122.0, 171.0, 154.0, 173.0, 148.0, 135.0, 124.0, 149.0, 154.0, 124.0, 131.0, 148.0, 125.0, 111.0, 145.0, 139.0, 122.0, 144.0, 146.0, 153.0, 154.0, 152.0, 146.0, 182.0, 177.0, 156.0, 131.0, 109.0, 152.0, 121.0, 128.0, 130.0, 125.0, 140.0, 167.0, 85.0, 91.0, 207.0, 206.0, 122.0, 120.0, 142.0, 159.0, 117.0, 117.0, 178.0, 166.0, 214.0, 236.0, 150.0, 162.0, 115.0, 124.0, 174.0, 170.0, 187.0, 172.0, 114.0, 139.0, 169.0, 147.0, 122.0, 133.0, 172.0, 184.0, 83.0, 88.0, 186.0, 210.0, 190.0, 208.0, 137.0, 139.0, 193.0, 209.0, 182.0, 157.0, 177.0, 161.0, 193.0, 160.0, 236.0, 226.0, 147.0, 149.0, 125.0, 112.0, 138.0, 155.0, 144.0, 154.0, 111.0, 120.0, 136.0, 114.0, 170.0, 177.0, 23.0, 40.0, 176.0, 180.0, 108.0, 102.0]}, "sampler_perf": {"mean_env_wait_ms": 2.619364465918477, "mean_processing_ms": 0.59033913982099, "mean_inference_ms": 3.1628679481393043}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1680000, "num_steps_sampled": 896000, "sample_time_ms": 20622.52, "load_time_ms": 37.691, "grad_time_ms": 8717.912, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.3881318395042993e-22, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007909121923148632, "policy_loss": -0.0035155529621988535, "vf_loss": 50.137577056884766, "vf_explained_var": 0.7450786232948303, "kl": 0.0021507267374545336, "entropy": 1.4145766496658325, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 896000, "episodes_total": 2240, "training_iteration": 70, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-19-45", "timestamp": 1660249185, "time_this_iter_s": 29.150850772857666, "time_total_s": 7605.310628890991, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7605.310628890991, "timesteps_since_restore": 896000, "iterations_since_restore": 70, "perf": {"cpu_util_percent": 33.670731707317074, "ram_util_percent": 57.6219512195122}}
-{"episode_reward_max": 462.0, "episode_reward_min": 63.0, "episode_reward_mean": 310.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 23.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 155.035}, "custom_metrics": {"sparse_reward_mean": 103.8, "sparse_reward_min": 20, "sparse_reward_max": 160, "shaped_reward_mean": 102.47, "shaped_reward_min": 23, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.6, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 11.29, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 10.1, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 10.61, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.0, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.0, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.53, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.51, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 9.29, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 9.98, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.56, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.21, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.96, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.34, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.4, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.65, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.17, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.23, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.85, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.33, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.23, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 9.29, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 9.98, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.29, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 9.98, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [367.0, 350.0, 230.0, 359.0, 362.0, 348.0, 408.0, 313.0, 345.0, 393.0, 399.0, 324.0, 293.0, 296.0, 396.0, 159.0, 236.0, 264.0, 408.0, 456.0, 270.0, 304.0, 356.0, 327.0, 213.0, 275.0, 350.0, 284.0, 390.0, 237.0, 402.0, 250.0, 255.0, 307.0, 176.0, 413.0, 242.0, 301.0, 234.0, 344.0, 450.0, 312.0, 239.0, 344.0, 359.0, 253.0, 316.0, 255.0, 356.0, 171.0, 396.0, 398.0, 276.0, 402.0, 339.0, 338.0, 353.0, 462.0, 296.0, 237.0, 293.0, 298.0, 231.0, 250.0, 347.0, 63.0, 356.0, 210.0, 249.0, 68.0, 319.0, 402.0, 327.0, 293.0, 350.0, 396.0, 396.0, 247.0, 387.0, 134.0, 387.0, 301.0, 356.0, 365.0, 308.0, 344.0, 348.0, 333.0, 345.0, 359.0, 259.0, 307.0, 330.0, 269.0, 350.0, 247.0, 356.0, 302.0, 269.0, 68.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [195.0, 172.0, 171.0, 179.0, 123.0, 107.0, 183.0, 176.0, 184.0, 178.0, 178.0, 170.0, 211.0, 197.0, 154.0, 159.0, 174.0, 171.0, 204.0, 189.0, 205.0, 194.0, 167.0, 157.0, 156.0, 137.0, 150.0, 146.0, 206.0, 190.0, 88.0, 71.0, 121.0, 115.0, 125.0, 139.0, 205.0, 203.0, 241.0, 215.0, 135.0, 135.0, 138.0, 166.0, 182.0, 174.0, 162.0, 165.0, 119.0, 94.0, 128.0, 147.0, 177.0, 173.0, 148.0, 136.0, 201.0, 189.0, 106.0, 131.0, 195.0, 207.0, 123.0, 127.0, 130.0, 125.0, 140.0, 167.0, 85.0, 91.0, 207.0, 206.0, 122.0, 120.0, 142.0, 159.0, 117.0, 117.0, 178.0, 166.0, 214.0, 236.0, 150.0, 162.0, 115.0, 124.0, 174.0, 170.0, 187.0, 172.0, 114.0, 139.0, 169.0, 147.0, 122.0, 133.0, 172.0, 184.0, 83.0, 88.0, 186.0, 210.0, 190.0, 208.0, 137.0, 139.0, 193.0, 209.0, 182.0, 157.0, 177.0, 161.0, 193.0, 160.0, 236.0, 226.0, 147.0, 149.0, 125.0, 112.0, 138.0, 155.0, 144.0, 154.0, 111.0, 120.0, 136.0, 114.0, 170.0, 177.0, 23.0, 40.0, 176.0, 180.0, 108.0, 102.0, 129.0, 120.0, 28.0, 40.0, 165.0, 154.0, 191.0, 211.0, 161.0, 166.0, 152.0, 141.0, 179.0, 171.0, 222.0, 174.0, 205.0, 191.0, 131.0, 116.0, 188.0, 199.0, 60.0, 74.0, 212.0, 175.0, 147.0, 154.0, 170.0, 186.0, 185.0, 180.0, 146.0, 162.0, 167.0, 177.0, 177.0, 171.0, 153.0, 180.0, 169.0, 176.0, 189.0, 170.0, 133.0, 126.0, 139.0, 168.0, 176.0, 154.0, 142.0, 127.0, 183.0, 167.0, 124.0, 123.0, 176.0, 180.0, 142.0, 160.0, 133.0, 136.0, 28.0, 40.0]}, "sampler_perf": {"mean_env_wait_ms": 2.5885045396886737, "mean_processing_ms": 0.584211440514898, "mean_inference_ms": 3.1312910646882246}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1704000, "num_steps_sampled": 908800, "sample_time_ms": 20412.625, "load_time_ms": 37.645, "grad_time_ms": 8491.272, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.6940659197521496e-22, "cur_lr": 0.0010000000474974513, "total_loss": 9.037616109708324e-05, "policy_loss": -0.004211378749459982, "vf_loss": 49.97343826293945, "vf_explained_var": 0.7645077705383301, "kl": 0.0018662656657397747, "entropy": 1.391157627105713, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 908800, "episodes_total": 2272, "training_iteration": 71, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-20-14", "timestamp": 1660249214, "time_this_iter_s": 28.656519889831543, "time_total_s": 7633.967148780823, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7633.967148780823, "timesteps_since_restore": 908800, "iterations_since_restore": 71, "perf": {"cpu_util_percent": 34.982926829268294, "ram_util_percent": 57.707317073170735}}
-{"episode_reward_max": 456.0, "episode_reward_min": 63.0, "episode_reward_mean": 316.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 23.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 158.135}, "custom_metrics": {"sparse_reward_mean": 105.8, "sparse_reward_min": 20, "sparse_reward_max": 160, "shaped_reward_mean": 104.67, "shaped_reward_min": 23, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.74, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 11.52, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 10.21, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 10.81, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.0, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.52, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.64, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 9.4, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 10.18, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 4.86, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.41, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.42, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 2.04, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.35, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.36, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.75, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.22, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.28, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.84, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.38, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.29, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 9.4, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 10.18, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.4, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 10.18, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [408.0, 316.0, 234.0, 245.0, 373.0, 336.0, 450.0, 416.0, 413.0, 356.0, 247.0, 410.0, 182.0, 347.0, 233.0, 316.0, 284.0, 316.0, 242.0, 296.0, 396.0, 142.0, 324.0, 351.0, 359.0, 361.0, 399.0, 353.0, 344.0, 275.0, 410.0, 382.0, 347.0, 63.0, 356.0, 210.0, 249.0, 68.0, 319.0, 402.0, 327.0, 293.0, 350.0, 396.0, 396.0, 247.0, 387.0, 134.0, 387.0, 301.0, 356.0, 365.0, 308.0, 344.0, 348.0, 333.0, 345.0, 359.0, 259.0, 307.0, 330.0, 269.0, 350.0, 247.0, 356.0, 302.0, 269.0, 68.0, 367.0, 350.0, 230.0, 359.0, 362.0, 348.0, 408.0, 313.0, 345.0, 393.0, 399.0, 324.0, 293.0, 296.0, 396.0, 159.0, 236.0, 264.0, 408.0, 456.0, 270.0, 304.0, 356.0, 327.0, 213.0, 275.0, 350.0, 284.0, 390.0, 237.0, 402.0, 250.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [199.0, 209.0, 151.0, 165.0, 95.0, 139.0, 128.0, 117.0, 175.0, 198.0, 156.0, 180.0, 239.0, 211.0, 208.0, 208.0, 210.0, 203.0, 174.0, 182.0, 123.0, 124.0, 204.0, 206.0, 88.0, 94.0, 178.0, 169.0, 116.0, 117.0, 151.0, 165.0, 149.0, 135.0, 163.0, 153.0, 144.0, 98.0, 151.0, 145.0, 179.0, 217.0, 62.0, 80.0, 156.0, 168.0, 184.0, 167.0, 165.0, 194.0, 182.0, 179.0, 207.0, 192.0, 179.0, 174.0, 178.0, 166.0, 141.0, 134.0, 219.0, 191.0, 188.0, 194.0, 170.0, 177.0, 23.0, 40.0, 176.0, 180.0, 108.0, 102.0, 129.0, 120.0, 28.0, 40.0, 165.0, 154.0, 191.0, 211.0, 161.0, 166.0, 152.0, 141.0, 179.0, 171.0, 222.0, 174.0, 205.0, 191.0, 131.0, 116.0, 188.0, 199.0, 60.0, 74.0, 212.0, 175.0, 147.0, 154.0, 170.0, 186.0, 185.0, 180.0, 146.0, 162.0, 167.0, 177.0, 177.0, 171.0, 153.0, 180.0, 169.0, 176.0, 189.0, 170.0, 133.0, 126.0, 139.0, 168.0, 176.0, 154.0, 142.0, 127.0, 183.0, 167.0, 124.0, 123.0, 176.0, 180.0, 142.0, 160.0, 133.0, 136.0, 28.0, 40.0, 195.0, 172.0, 171.0, 179.0, 123.0, 107.0, 183.0, 176.0, 184.0, 178.0, 178.0, 170.0, 211.0, 197.0, 154.0, 159.0, 174.0, 171.0, 204.0, 189.0, 205.0, 194.0, 167.0, 157.0, 156.0, 137.0, 150.0, 146.0, 206.0, 190.0, 88.0, 71.0, 121.0, 115.0, 125.0, 139.0, 205.0, 203.0, 241.0, 215.0, 135.0, 135.0, 138.0, 166.0, 182.0, 174.0, 162.0, 165.0, 119.0, 94.0, 128.0, 147.0, 177.0, 173.0, 148.0, 136.0, 201.0, 189.0, 106.0, 131.0, 195.0, 207.0, 123.0, 127.0]}, "sampler_perf": {"mean_env_wait_ms": 2.558557313555292, "mean_processing_ms": 0.5782709476223633, "mean_inference_ms": 3.1013750793848702}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1728000, "num_steps_sampled": 921600, "sample_time_ms": 20167.129, "load_time_ms": 37.249, "grad_time_ms": 8246.669, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 8.470329598760748e-23, "cur_lr": 0.0010000000474974513, "total_loss": -0.0024793706834316254, "policy_loss": -0.007412927225232124, "vf_loss": 56.26578903198242, "vf_explained_var": 0.7433841228485107, "kl": 0.0019004354253411293, "entropy": 1.3860511779785156, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 921600, "episodes_total": 2304, "training_iteration": 72, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-20-44", "timestamp": 1660249244, "time_this_iter_s": 30.219820022583008, "time_total_s": 7664.186968803406, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7664.186968803406, "timesteps_since_restore": 921600, "iterations_since_restore": 72, "perf": {"cpu_util_percent": 35.07380952380952, "ram_util_percent": 57.70714285714284}}
-{"episode_reward_max": 465.0, "episode_reward_min": 68.0, "episode_reward_mean": 332.46, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 28.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 166.23}, "custom_metrics": {"sparse_reward_mean": 111.6, "sparse_reward_min": 20, "sparse_reward_max": 160, "shaped_reward_mean": 109.26, "shaped_reward_min": 28, "shaped_reward_max": 145, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.81, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 12.13, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 10.31, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 11.46, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 0.91, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.46, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 9.53, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 10.79, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 4.84, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.44, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.6, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.3, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.36, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.29, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.63, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.46, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.27, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.07, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.28, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.31, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 9.53, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 10.79, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.53, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 10.79, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [373.0, 465.0, 251.0, 348.0, 399.0, 284.0, 344.0, 459.0, 416.0, 410.0, 416.0, 358.0, 422.0, 402.0, 405.0, 422.0, 247.0, 296.0, 407.0, 246.0, 120.0, 310.0, 413.0, 338.0, 353.0, 405.0, 416.0, 365.0, 236.0, 287.0, 350.0, 408.0, 356.0, 302.0, 269.0, 68.0, 367.0, 350.0, 230.0, 359.0, 362.0, 348.0, 408.0, 313.0, 345.0, 393.0, 399.0, 324.0, 293.0, 296.0, 396.0, 159.0, 236.0, 264.0, 408.0, 456.0, 270.0, 304.0, 356.0, 327.0, 213.0, 275.0, 350.0, 284.0, 390.0, 237.0, 402.0, 250.0, 408.0, 316.0, 234.0, 245.0, 373.0, 336.0, 450.0, 416.0, 413.0, 356.0, 247.0, 410.0, 182.0, 347.0, 233.0, 316.0, 284.0, 316.0, 242.0, 296.0, 396.0, 142.0, 324.0, 351.0, 359.0, 361.0, 399.0, 353.0, 344.0, 275.0, 410.0, 382.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [170.0, 203.0, 226.0, 239.0, 131.0, 120.0, 162.0, 186.0, 205.0, 194.0, 132.0, 152.0, 166.0, 178.0, 235.0, 224.0, 205.0, 211.0, 188.0, 222.0, 197.0, 219.0, 177.0, 181.0, 212.0, 210.0, 196.0, 206.0, 213.0, 192.0, 199.0, 223.0, 111.0, 136.0, 147.0, 149.0, 193.0, 214.0, 121.0, 125.0, 55.0, 65.0, 165.0, 145.0, 203.0, 210.0, 170.0, 168.0, 161.0, 192.0, 191.0, 214.0, 213.0, 203.0, 178.0, 187.0, 98.0, 138.0, 150.0, 137.0, 176.0, 174.0, 202.0, 206.0, 176.0, 180.0, 142.0, 160.0, 133.0, 136.0, 28.0, 40.0, 195.0, 172.0, 171.0, 179.0, 123.0, 107.0, 183.0, 176.0, 184.0, 178.0, 178.0, 170.0, 211.0, 197.0, 154.0, 159.0, 174.0, 171.0, 204.0, 189.0, 205.0, 194.0, 167.0, 157.0, 156.0, 137.0, 150.0, 146.0, 206.0, 190.0, 88.0, 71.0, 121.0, 115.0, 125.0, 139.0, 205.0, 203.0, 241.0, 215.0, 135.0, 135.0, 138.0, 166.0, 182.0, 174.0, 162.0, 165.0, 119.0, 94.0, 128.0, 147.0, 177.0, 173.0, 148.0, 136.0, 201.0, 189.0, 106.0, 131.0, 195.0, 207.0, 123.0, 127.0, 199.0, 209.0, 151.0, 165.0, 95.0, 139.0, 128.0, 117.0, 175.0, 198.0, 156.0, 180.0, 239.0, 211.0, 208.0, 208.0, 210.0, 203.0, 174.0, 182.0, 123.0, 124.0, 204.0, 206.0, 88.0, 94.0, 178.0, 169.0, 116.0, 117.0, 151.0, 165.0, 149.0, 135.0, 163.0, 153.0, 144.0, 98.0, 151.0, 145.0, 179.0, 217.0, 62.0, 80.0, 156.0, 168.0, 184.0, 167.0, 165.0, 194.0, 182.0, 179.0, 207.0, 192.0, 179.0, 174.0, 178.0, 166.0, 141.0, 134.0, 219.0, 191.0, 188.0, 194.0]}, "sampler_perf": {"mean_env_wait_ms": 2.5294421557916076, "mean_processing_ms": 0.5725031041756122, "mean_inference_ms": 3.0723080495869532}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1752000, "num_steps_sampled": 934400, "sample_time_ms": 20097.062, "load_time_ms": 37.14, "grad_time_ms": 8126.211, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 4.235164799380374e-23, "cur_lr": 0.0010000000474974513, "total_loss": -0.0012375875376164913, "policy_loss": -0.006141460034996271, "vf_loss": 55.8723258972168, "vf_explained_var": 0.7437755465507507, "kl": 0.0014161770232021809, "entropy": 1.3667305707931519, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 934400, "episodes_total": 2336, "training_iteration": 73, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-21-14", "timestamp": 1660249274, "time_this_iter_s": 30.526150941848755, "time_total_s": 7694.7131197452545, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7694.7131197452545, "timesteps_since_restore": 934400, "iterations_since_restore": 73, "perf": {"cpu_util_percent": 36.46279069767442, "ram_util_percent": 57.75116279069769}}
-{"episode_reward_max": 465.0, "episode_reward_min": 120.0, "episode_reward_mean": 346.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 55.0}, "policy_reward_max": {"ppo": 239.0}, "policy_reward_mean": {"ppo": 173.395}, "custom_metrics": {"sparse_reward_mean": 117.2, "sparse_reward_min": 40, "sparse_reward_max": 160, "shaped_reward_mean": 112.39, "shaped_reward_min": 40, "shaped_reward_max": 147, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.26, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 12.36, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 10.7, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 11.66, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.6, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.69, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 9.87, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.09, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 4.89, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.77, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.49, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.43, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.37, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.39, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.72, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.58, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.36, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.29, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.29, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.24, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 9.87, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.09, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.87, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.09, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [358.0, 413.0, 455.0, 395.0, 402.0, 341.0, 290.0, 427.0, 413.0, 253.0, 355.0, 365.0, 316.0, 458.0, 196.0, 459.0, 398.0, 359.0, 407.0, 322.0, 410.0, 416.0, 390.0, 301.0, 416.0, 279.0, 301.0, 215.0, 359.0, 395.0, 310.0, 339.0, 390.0, 237.0, 402.0, 250.0, 408.0, 316.0, 234.0, 245.0, 373.0, 336.0, 450.0, 416.0, 413.0, 356.0, 247.0, 410.0, 182.0, 347.0, 233.0, 316.0, 284.0, 316.0, 242.0, 296.0, 396.0, 142.0, 324.0, 351.0, 359.0, 361.0, 399.0, 353.0, 344.0, 275.0, 410.0, 382.0, 373.0, 465.0, 251.0, 348.0, 399.0, 284.0, 344.0, 459.0, 416.0, 410.0, 416.0, 358.0, 422.0, 402.0, 405.0, 422.0, 247.0, 296.0, 407.0, 246.0, 120.0, 310.0, 413.0, 338.0, 353.0, 405.0, 416.0, 365.0, 236.0, 287.0, 350.0, 408.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [187.0, 171.0, 213.0, 200.0, 222.0, 233.0, 208.0, 187.0, 203.0, 199.0, 173.0, 168.0, 128.0, 162.0, 217.0, 210.0, 211.0, 202.0, 119.0, 134.0, 180.0, 175.0, 169.0, 196.0, 160.0, 156.0, 235.0, 223.0, 107.0, 89.0, 229.0, 230.0, 188.0, 210.0, 179.0, 180.0, 199.0, 208.0, 157.0, 165.0, 225.0, 185.0, 208.0, 208.0, 187.0, 203.0, 157.0, 144.0, 204.0, 212.0, 151.0, 128.0, 142.0, 159.0, 108.0, 107.0, 182.0, 177.0, 195.0, 200.0, 144.0, 166.0, 171.0, 168.0, 201.0, 189.0, 106.0, 131.0, 195.0, 207.0, 123.0, 127.0, 199.0, 209.0, 151.0, 165.0, 95.0, 139.0, 128.0, 117.0, 175.0, 198.0, 156.0, 180.0, 239.0, 211.0, 208.0, 208.0, 210.0, 203.0, 174.0, 182.0, 123.0, 124.0, 204.0, 206.0, 88.0, 94.0, 178.0, 169.0, 116.0, 117.0, 151.0, 165.0, 149.0, 135.0, 163.0, 153.0, 144.0, 98.0, 151.0, 145.0, 179.0, 217.0, 62.0, 80.0, 156.0, 168.0, 184.0, 167.0, 165.0, 194.0, 182.0, 179.0, 207.0, 192.0, 179.0, 174.0, 178.0, 166.0, 141.0, 134.0, 219.0, 191.0, 188.0, 194.0, 170.0, 203.0, 226.0, 239.0, 131.0, 120.0, 162.0, 186.0, 205.0, 194.0, 132.0, 152.0, 166.0, 178.0, 235.0, 224.0, 205.0, 211.0, 188.0, 222.0, 197.0, 219.0, 177.0, 181.0, 212.0, 210.0, 196.0, 206.0, 213.0, 192.0, 199.0, 223.0, 111.0, 136.0, 147.0, 149.0, 193.0, 214.0, 121.0, 125.0, 55.0, 65.0, 165.0, 145.0, 203.0, 210.0, 170.0, 168.0, 161.0, 192.0, 191.0, 214.0, 213.0, 203.0, 178.0, 187.0, 98.0, 138.0, 150.0, 137.0, 176.0, 174.0, 202.0, 206.0]}, "sampler_perf": {"mean_env_wait_ms": 2.5011550162167318, "mean_processing_ms": 0.5669031638789668, "mean_inference_ms": 3.044470138401616}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1776000, "num_steps_sampled": 947200, "sample_time_ms": 20475.505, "load_time_ms": 36.886, "grad_time_ms": 8011.391, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 2.117582399690187e-23, "cur_lr": 0.0010000000474974513, "total_loss": -0.002805360360071063, "policy_loss": -0.007409963756799698, "vf_loss": 52.88139724731445, "vf_explained_var": 0.7572636008262634, "kl": 0.0014988663606345654, "entropy": 1.3671082258224487, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 947200, "episodes_total": 2368, "training_iteration": 74, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-21-46", "timestamp": 1660249306, "time_this_iter_s": 31.191842079162598, "time_total_s": 7725.904961824417, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7725.904961824417, "timesteps_since_restore": 947200, "iterations_since_restore": 74, "perf": {"cpu_util_percent": 34.40666666666667, "ram_util_percent": 57.844444444444456}}
-{"episode_reward_max": 465.0, "episode_reward_min": 120.0, "episode_reward_mean": 364.08, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 55.0}, "policy_reward_max": {"ppo": 239.0}, "policy_reward_mean": {"ppo": 182.04}, "custom_metrics": {"sparse_reward_mean": 124.0, "sparse_reward_min": 40, "sparse_reward_max": 160, "shaped_reward_mean": 116.08, "shaped_reward_min": 40, "shaped_reward_max": 147, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.3, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 12.93, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 10.86, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 12.23, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 0.93, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.98, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.55, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.57, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.09, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.68, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.25, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.56, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.33, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.38, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.42, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.93, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.62, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.36, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.21, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.16, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 10.09, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.68, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.09, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.68, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [438.0, 390.0, 364.0, 396.0, 413.0, 456.0, 346.0, 453.0, 332.0, 413.0, 251.0, 379.0, 384.0, 464.0, 384.0, 416.0, 401.0, 345.0, 339.0, 344.0, 355.0, 398.0, 419.0, 387.0, 302.0, 299.0, 453.0, 230.0, 362.0, 399.0, 405.0, 396.0, 344.0, 275.0, 410.0, 382.0, 373.0, 465.0, 251.0, 348.0, 399.0, 284.0, 344.0, 459.0, 416.0, 410.0, 416.0, 358.0, 422.0, 402.0, 405.0, 422.0, 247.0, 296.0, 407.0, 246.0, 120.0, 310.0, 413.0, 338.0, 353.0, 405.0, 416.0, 365.0, 236.0, 287.0, 350.0, 408.0, 358.0, 413.0, 455.0, 395.0, 402.0, 341.0, 290.0, 427.0, 413.0, 253.0, 355.0, 365.0, 316.0, 458.0, 196.0, 459.0, 398.0, 359.0, 407.0, 322.0, 410.0, 416.0, 390.0, 301.0, 416.0, 279.0, 301.0, 215.0, 359.0, 395.0, 310.0, 339.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [216.0, 222.0, 208.0, 182.0, 173.0, 191.0, 206.0, 190.0, 213.0, 200.0, 219.0, 237.0, 169.0, 177.0, 239.0, 214.0, 169.0, 163.0, 209.0, 204.0, 133.0, 118.0, 191.0, 188.0, 176.0, 208.0, 233.0, 231.0, 204.0, 180.0, 211.0, 205.0, 196.0, 205.0, 179.0, 166.0, 156.0, 183.0, 167.0, 177.0, 174.0, 181.0, 192.0, 206.0, 201.0, 218.0, 171.0, 216.0, 149.0, 153.0, 153.0, 146.0, 236.0, 217.0, 111.0, 119.0, 183.0, 179.0, 207.0, 192.0, 197.0, 208.0, 202.0, 194.0, 178.0, 166.0, 141.0, 134.0, 219.0, 191.0, 188.0, 194.0, 170.0, 203.0, 226.0, 239.0, 131.0, 120.0, 162.0, 186.0, 205.0, 194.0, 132.0, 152.0, 166.0, 178.0, 235.0, 224.0, 205.0, 211.0, 188.0, 222.0, 197.0, 219.0, 177.0, 181.0, 212.0, 210.0, 196.0, 206.0, 213.0, 192.0, 199.0, 223.0, 111.0, 136.0, 147.0, 149.0, 193.0, 214.0, 121.0, 125.0, 55.0, 65.0, 165.0, 145.0, 203.0, 210.0, 170.0, 168.0, 161.0, 192.0, 191.0, 214.0, 213.0, 203.0, 178.0, 187.0, 98.0, 138.0, 150.0, 137.0, 176.0, 174.0, 202.0, 206.0, 187.0, 171.0, 213.0, 200.0, 222.0, 233.0, 208.0, 187.0, 203.0, 199.0, 173.0, 168.0, 128.0, 162.0, 217.0, 210.0, 211.0, 202.0, 119.0, 134.0, 180.0, 175.0, 169.0, 196.0, 160.0, 156.0, 235.0, 223.0, 107.0, 89.0, 229.0, 230.0, 188.0, 210.0, 179.0, 180.0, 199.0, 208.0, 157.0, 165.0, 225.0, 185.0, 208.0, 208.0, 187.0, 203.0, 157.0, 144.0, 204.0, 212.0, 151.0, 128.0, 142.0, 159.0, 108.0, 107.0, 182.0, 177.0, 195.0, 200.0, 144.0, 166.0, 171.0, 168.0]}, "sampler_perf": {"mean_env_wait_ms": 2.4736327447914648, "mean_processing_ms": 0.5614637333952731, "mean_inference_ms": 3.0177932889211685}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1800000, "num_steps_sampled": 960000, "sample_time_ms": 20800.663, "load_time_ms": 36.895, "grad_time_ms": 8168.473, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.0587911998450935e-23, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010099885985255241, "policy_loss": -0.006211612839251757, "vf_loss": 58.7685546875, "vf_explained_var": 0.7208888530731201, "kl": 0.0020332669373601675, "entropy": 1.3504695892333984, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 960000, "episodes_total": 2400, "training_iteration": 75, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-22-18", "timestamp": 1660249338, "time_this_iter_s": 32.21927499771118, "time_total_s": 7758.124236822128, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7758.124236822128, "timesteps_since_restore": 960000, "iterations_since_restore": 75, "perf": {"cpu_util_percent": 32.595555555555556, "ram_util_percent": 57.83555555555553}}
-{"episode_reward_max": 507.0, "episode_reward_min": 196.0, "episode_reward_mean": 369.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 258.0}, "policy_reward_mean": {"ppo": 184.535}, "custom_metrics": {"sparse_reward_mean": 125.8, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 117.47, "shaped_reward_min": 70, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.42, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.2, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 10.97, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 12.42, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 0.9, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.54, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.26, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.75, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 2.64, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.24, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.45, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.4, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 4.05, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.53, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.78, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.35, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.18, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.15, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 10.26, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.75, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.26, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.75, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [272.0, 313.0, 408.0, 444.0, 304.0, 362.0, 304.0, 450.0, 408.0, 370.0, 476.0, 293.0, 453.0, 241.0, 410.0, 453.0, 405.0, 350.0, 350.0, 507.0, 427.0, 393.0, 267.0, 407.0, 419.0, 303.0, 410.0, 378.0, 350.0, 345.0, 376.0, 352.0, 236.0, 287.0, 350.0, 408.0, 358.0, 413.0, 455.0, 395.0, 402.0, 341.0, 290.0, 427.0, 413.0, 253.0, 355.0, 365.0, 316.0, 458.0, 196.0, 459.0, 398.0, 359.0, 407.0, 322.0, 410.0, 416.0, 390.0, 301.0, 416.0, 279.0, 301.0, 215.0, 359.0, 395.0, 310.0, 339.0, 438.0, 390.0, 364.0, 396.0, 413.0, 456.0, 346.0, 453.0, 332.0, 413.0, 251.0, 379.0, 384.0, 464.0, 384.0, 416.0, 401.0, 345.0, 339.0, 344.0, 355.0, 398.0, 419.0, 387.0, 302.0, 299.0, 453.0, 230.0, 362.0, 399.0, 405.0, 396.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [136.0, 136.0, 156.0, 157.0, 194.0, 214.0, 212.0, 232.0, 153.0, 151.0, 177.0, 185.0, 158.0, 146.0, 219.0, 231.0, 205.0, 203.0, 198.0, 172.0, 250.0, 226.0, 135.0, 158.0, 244.0, 209.0, 125.0, 116.0, 194.0, 216.0, 229.0, 224.0, 201.0, 204.0, 160.0, 190.0, 157.0, 193.0, 258.0, 249.0, 208.0, 219.0, 196.0, 197.0, 138.0, 129.0, 192.0, 215.0, 211.0, 208.0, 140.0, 163.0, 206.0, 204.0, 189.0, 189.0, 171.0, 179.0, 177.0, 168.0, 206.0, 170.0, 173.0, 179.0, 98.0, 138.0, 150.0, 137.0, 176.0, 174.0, 202.0, 206.0, 187.0, 171.0, 213.0, 200.0, 222.0, 233.0, 208.0, 187.0, 203.0, 199.0, 173.0, 168.0, 128.0, 162.0, 217.0, 210.0, 211.0, 202.0, 119.0, 134.0, 180.0, 175.0, 169.0, 196.0, 160.0, 156.0, 235.0, 223.0, 107.0, 89.0, 229.0, 230.0, 188.0, 210.0, 179.0, 180.0, 199.0, 208.0, 157.0, 165.0, 225.0, 185.0, 208.0, 208.0, 187.0, 203.0, 157.0, 144.0, 204.0, 212.0, 151.0, 128.0, 142.0, 159.0, 108.0, 107.0, 182.0, 177.0, 195.0, 200.0, 144.0, 166.0, 171.0, 168.0, 216.0, 222.0, 208.0, 182.0, 173.0, 191.0, 206.0, 190.0, 213.0, 200.0, 219.0, 237.0, 169.0, 177.0, 239.0, 214.0, 169.0, 163.0, 209.0, 204.0, 133.0, 118.0, 191.0, 188.0, 176.0, 208.0, 233.0, 231.0, 204.0, 180.0, 211.0, 205.0, 196.0, 205.0, 179.0, 166.0, 156.0, 183.0, 167.0, 177.0, 174.0, 181.0, 192.0, 206.0, 201.0, 218.0, 171.0, 216.0, 149.0, 153.0, 153.0, 146.0, 236.0, 217.0, 111.0, 119.0, 183.0, 179.0, 207.0, 192.0, 197.0, 208.0, 202.0, 194.0]}, "sampler_perf": {"mean_env_wait_ms": 2.4468834694970774, "mean_processing_ms": 0.5561781773126093, "mean_inference_ms": 2.992510104410383}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1824000, "num_steps_sampled": 972800, "sample_time_ms": 21085.438, "load_time_ms": 36.907, "grad_time_ms": 8381.058, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 5.293955999225468e-24, "cur_lr": 0.0010000000474974513, "total_loss": -0.000600266270339489, "policy_loss": -0.005276266019791365, "vf_loss": 53.540836334228516, "vf_explained_var": 0.7716453671455383, "kl": 0.0016209534369409084, "entropy": 1.3561688661575317, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 972800, "episodes_total": 2432, "training_iteration": 76, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-22-51", "timestamp": 1660249371, "time_this_iter_s": 33.30055785179138, "time_total_s": 7791.42479467392, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7791.42479467392, "timesteps_since_restore": 972800, "iterations_since_restore": 76, "perf": {"cpu_util_percent": 33.693617021276594, "ram_util_percent": 57.704255319148906}}
-{"episode_reward_max": 510.0, "episode_reward_min": 208.0, "episode_reward_mean": 377.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 104.0}, "policy_reward_max": {"ppo": 274.0}, "policy_reward_mean": {"ppo": 188.74}, "custom_metrics": {"sparse_reward_mean": 129.0, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 119.48, "shaped_reward_min": 70, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.62, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 13.48, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 11.01, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 17, "useful_onion_pickup_agent_1_mean": 12.72, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.0, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.62, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.63, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.34, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 12.03, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.39, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.71, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 2.78, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.31, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.0, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.4, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.29, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.04, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.6, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.78, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.39, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.19, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.18, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 10.34, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 12.03, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.34, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 12.03, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [376.0, 305.0, 302.0, 355.0, 447.0, 413.0, 364.0, 453.0, 459.0, 393.0, 368.0, 399.0, 404.0, 402.0, 373.0, 399.0, 381.0, 395.0, 367.0, 404.0, 208.0, 459.0, 287.0, 510.0, 347.0, 324.0, 410.0, 453.0, 404.0, 408.0, 319.0, 344.0, 359.0, 395.0, 310.0, 339.0, 438.0, 390.0, 364.0, 396.0, 413.0, 456.0, 346.0, 453.0, 332.0, 413.0, 251.0, 379.0, 384.0, 464.0, 384.0, 416.0, 401.0, 345.0, 339.0, 344.0, 355.0, 398.0, 419.0, 387.0, 302.0, 299.0, 453.0, 230.0, 362.0, 399.0, 405.0, 396.0, 272.0, 313.0, 408.0, 444.0, 304.0, 362.0, 304.0, 450.0, 408.0, 370.0, 476.0, 293.0, 453.0, 241.0, 410.0, 453.0, 405.0, 350.0, 350.0, 507.0, 427.0, 393.0, 267.0, 407.0, 419.0, 303.0, 410.0, 378.0, 350.0, 345.0, 376.0, 352.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [191.0, 185.0, 153.0, 152.0, 132.0, 170.0, 182.0, 173.0, 226.0, 221.0, 212.0, 201.0, 181.0, 183.0, 213.0, 240.0, 238.0, 221.0, 191.0, 202.0, 174.0, 194.0, 194.0, 205.0, 208.0, 196.0, 208.0, 194.0, 195.0, 178.0, 205.0, 194.0, 189.0, 192.0, 221.0, 174.0, 189.0, 178.0, 205.0, 199.0, 104.0, 104.0, 224.0, 235.0, 131.0, 156.0, 236.0, 274.0, 180.0, 167.0, 151.0, 173.0, 199.0, 211.0, 222.0, 231.0, 211.0, 193.0, 194.0, 214.0, 162.0, 157.0, 172.0, 172.0, 182.0, 177.0, 195.0, 200.0, 144.0, 166.0, 171.0, 168.0, 216.0, 222.0, 208.0, 182.0, 173.0, 191.0, 206.0, 190.0, 213.0, 200.0, 219.0, 237.0, 169.0, 177.0, 239.0, 214.0, 169.0, 163.0, 209.0, 204.0, 133.0, 118.0, 191.0, 188.0, 176.0, 208.0, 233.0, 231.0, 204.0, 180.0, 211.0, 205.0, 196.0, 205.0, 179.0, 166.0, 156.0, 183.0, 167.0, 177.0, 174.0, 181.0, 192.0, 206.0, 201.0, 218.0, 171.0, 216.0, 149.0, 153.0, 153.0, 146.0, 236.0, 217.0, 111.0, 119.0, 183.0, 179.0, 207.0, 192.0, 197.0, 208.0, 202.0, 194.0, 136.0, 136.0, 156.0, 157.0, 194.0, 214.0, 212.0, 232.0, 153.0, 151.0, 177.0, 185.0, 158.0, 146.0, 219.0, 231.0, 205.0, 203.0, 198.0, 172.0, 250.0, 226.0, 135.0, 158.0, 244.0, 209.0, 125.0, 116.0, 194.0, 216.0, 229.0, 224.0, 201.0, 204.0, 160.0, 190.0, 157.0, 193.0, 258.0, 249.0, 208.0, 219.0, 196.0, 197.0, 138.0, 129.0, 192.0, 215.0, 211.0, 208.0, 140.0, 163.0, 206.0, 204.0, 189.0, 189.0, 171.0, 179.0, 177.0, 168.0, 206.0, 170.0, 173.0, 179.0]}, "sampler_perf": {"mean_env_wait_ms": 2.4209020546417674, "mean_processing_ms": 0.551048674766232, "mean_inference_ms": 2.9680431709223565}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1848000, "num_steps_sampled": 985600, "sample_time_ms": 21397.355, "load_time_ms": 37.117, "grad_time_ms": 8669.98, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 2.646977999612734e-24, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010796785354614258, "policy_loss": -0.006340180989354849, "vf_loss": 59.34244918823242, "vf_explained_var": 0.7488496899604797, "kl": 0.0016171737806871533, "entropy": 1.3474963903427124, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 985600, "episodes_total": 2464, "training_iteration": 77, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-23-24", "timestamp": 1660249404, "time_this_iter_s": 32.688453912734985, "time_total_s": 7824.113248586655, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7824.113248586655, "timesteps_since_restore": 985600, "iterations_since_restore": 77, "perf": {"cpu_util_percent": 40.12173913043479, "ram_util_percent": 58.68478260869566}}
-{"episode_reward_max": 525.0, "episode_reward_min": 194.0, "episode_reward_mean": 380.13, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 94.0}, "policy_reward_max": {"ppo": 274.0}, "policy_reward_mean": {"ppo": 190.065}, "custom_metrics": {"sparse_reward_mean": 129.4, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 121.33, "shaped_reward_min": 74, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.11, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 13.42, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 11.42, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 17, "useful_onion_pickup_agent_1_mean": 12.66, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.73, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.78, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 10.7, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.85, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.71, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 2.94, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.57, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.92, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.34, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.26, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 3.93, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.73, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.73, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.45, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.16, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.23, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 10.7, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.85, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.7, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.85, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [416.0, 411.0, 419.0, 358.0, 367.0, 413.0, 427.0, 456.0, 450.0, 461.0, 461.0, 301.0, 405.0, 447.0, 293.0, 285.0, 399.0, 430.0, 422.0, 362.0, 416.0, 525.0, 194.0, 365.0, 407.0, 294.0, 239.0, 304.0, 296.0, 356.0, 387.0, 453.0, 362.0, 399.0, 405.0, 396.0, 272.0, 313.0, 408.0, 444.0, 304.0, 362.0, 304.0, 450.0, 408.0, 370.0, 476.0, 293.0, 453.0, 241.0, 410.0, 453.0, 405.0, 350.0, 350.0, 507.0, 427.0, 393.0, 267.0, 407.0, 419.0, 303.0, 410.0, 378.0, 350.0, 345.0, 376.0, 352.0, 376.0, 305.0, 302.0, 355.0, 447.0, 413.0, 364.0, 453.0, 459.0, 393.0, 368.0, 399.0, 404.0, 402.0, 373.0, 399.0, 381.0, 395.0, 367.0, 404.0, 208.0, 459.0, 287.0, 510.0, 347.0, 324.0, 410.0, 453.0, 404.0, 408.0, 319.0, 344.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [205.0, 211.0, 205.0, 206.0, 200.0, 219.0, 176.0, 182.0, 191.0, 176.0, 204.0, 209.0, 219.0, 208.0, 225.0, 231.0, 219.0, 231.0, 221.0, 240.0, 231.0, 230.0, 152.0, 149.0, 205.0, 200.0, 234.0, 213.0, 143.0, 150.0, 143.0, 142.0, 202.0, 197.0, 220.0, 210.0, 200.0, 222.0, 185.0, 177.0, 215.0, 201.0, 254.0, 271.0, 100.0, 94.0, 186.0, 179.0, 205.0, 202.0, 151.0, 143.0, 108.0, 131.0, 161.0, 143.0, 143.0, 153.0, 171.0, 185.0, 201.0, 186.0, 227.0, 226.0, 183.0, 179.0, 207.0, 192.0, 197.0, 208.0, 202.0, 194.0, 136.0, 136.0, 156.0, 157.0, 194.0, 214.0, 212.0, 232.0, 153.0, 151.0, 177.0, 185.0, 158.0, 146.0, 219.0, 231.0, 205.0, 203.0, 198.0, 172.0, 250.0, 226.0, 135.0, 158.0, 244.0, 209.0, 125.0, 116.0, 194.0, 216.0, 229.0, 224.0, 201.0, 204.0, 160.0, 190.0, 157.0, 193.0, 258.0, 249.0, 208.0, 219.0, 196.0, 197.0, 138.0, 129.0, 192.0, 215.0, 211.0, 208.0, 140.0, 163.0, 206.0, 204.0, 189.0, 189.0, 171.0, 179.0, 177.0, 168.0, 206.0, 170.0, 173.0, 179.0, 191.0, 185.0, 153.0, 152.0, 132.0, 170.0, 182.0, 173.0, 226.0, 221.0, 212.0, 201.0, 181.0, 183.0, 213.0, 240.0, 238.0, 221.0, 191.0, 202.0, 174.0, 194.0, 194.0, 205.0, 208.0, 196.0, 208.0, 194.0, 195.0, 178.0, 205.0, 194.0, 189.0, 192.0, 221.0, 174.0, 189.0, 178.0, 205.0, 199.0, 104.0, 104.0, 224.0, 235.0, 131.0, 156.0, 236.0, 274.0, 180.0, 167.0, 151.0, 173.0, 199.0, 211.0, 222.0, 231.0, 211.0, 193.0, 194.0, 214.0, 162.0, 157.0, 172.0, 172.0]}, "sampler_perf": {"mean_env_wait_ms": 2.3955189497497584, "mean_processing_ms": 0.5460179273755849, "mean_inference_ms": 2.943733434085924}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1872000, "num_steps_sampled": 998400, "sample_time_ms": 21499.137, "load_time_ms": 37.019, "grad_time_ms": 8919.956, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.323488999806367e-24, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033416959922760725, "policy_loss": -0.003114718245342374, "vf_loss": 71.20785522460938, "vf_explained_var": 0.7243476510047913, "kl": 0.001916095265187323, "entropy": 1.3287501335144043, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 998400, "episodes_total": 2496, "training_iteration": 78, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-23-54", "timestamp": 1660249434, "time_this_iter_s": 30.256299018859863, "time_total_s": 7854.3695476055145, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7854.3695476055145, "timesteps_since_restore": 998400, "iterations_since_restore": 78, "perf": {"cpu_util_percent": 32.448837209302326, "ram_util_percent": 58.1279069767442}}
-{"episode_reward_max": 525.0, "episode_reward_min": 194.0, "episode_reward_mean": 380.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 94.0}, "policy_reward_max": {"ppo": 274.0}, "policy_reward_mean": {"ppo": 190.095}, "custom_metrics": {"sparse_reward_mean": 130.0, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 120.19, "shaped_reward_min": 74, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.34, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 13.71, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 11.45, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 12.72, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.47, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.4, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.97, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.84, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.48, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 11.9, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.1, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.51, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.92, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.22, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.22, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 3.88, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.67, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.69, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.36, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.16, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.26, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 10.48, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 11.9, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.48, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 11.9, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [408.0, 398.0, 294.0, 405.0, 291.0, 450.0, 350.0, 419.0, 465.0, 462.0, 312.0, 465.0, 370.0, 416.0, 510.0, 290.0, 237.0, 376.0, 404.0, 407.0, 344.0, 450.0, 462.0, 351.0, 405.0, 333.0, 344.0, 237.0, 288.0, 465.0, 384.0, 353.0, 350.0, 345.0, 376.0, 352.0, 376.0, 305.0, 302.0, 355.0, 447.0, 413.0, 364.0, 453.0, 459.0, 393.0, 368.0, 399.0, 404.0, 402.0, 373.0, 399.0, 381.0, 395.0, 367.0, 404.0, 208.0, 459.0, 287.0, 510.0, 347.0, 324.0, 410.0, 453.0, 404.0, 408.0, 319.0, 344.0, 416.0, 411.0, 419.0, 358.0, 367.0, 413.0, 427.0, 456.0, 450.0, 461.0, 461.0, 301.0, 405.0, 447.0, 293.0, 285.0, 399.0, 430.0, 422.0, 362.0, 416.0, 525.0, 194.0, 365.0, 407.0, 294.0, 239.0, 304.0, 296.0, 356.0, 387.0, 453.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [199.0, 209.0, 194.0, 204.0, 159.0, 135.0, 208.0, 197.0, 136.0, 155.0, 228.0, 222.0, 162.0, 188.0, 209.0, 210.0, 231.0, 234.0, 217.0, 245.0, 140.0, 172.0, 236.0, 229.0, 184.0, 186.0, 226.0, 190.0, 267.0, 243.0, 150.0, 140.0, 117.0, 120.0, 187.0, 189.0, 197.0, 207.0, 203.0, 204.0, 164.0, 180.0, 221.0, 229.0, 227.0, 235.0, 171.0, 180.0, 199.0, 206.0, 159.0, 174.0, 159.0, 185.0, 119.0, 118.0, 140.0, 148.0, 229.0, 236.0, 180.0, 204.0, 175.0, 178.0, 171.0, 179.0, 177.0, 168.0, 206.0, 170.0, 173.0, 179.0, 191.0, 185.0, 153.0, 152.0, 132.0, 170.0, 182.0, 173.0, 226.0, 221.0, 212.0, 201.0, 181.0, 183.0, 213.0, 240.0, 238.0, 221.0, 191.0, 202.0, 174.0, 194.0, 194.0, 205.0, 208.0, 196.0, 208.0, 194.0, 195.0, 178.0, 205.0, 194.0, 189.0, 192.0, 221.0, 174.0, 189.0, 178.0, 205.0, 199.0, 104.0, 104.0, 224.0, 235.0, 131.0, 156.0, 236.0, 274.0, 180.0, 167.0, 151.0, 173.0, 199.0, 211.0, 222.0, 231.0, 211.0, 193.0, 194.0, 214.0, 162.0, 157.0, 172.0, 172.0, 205.0, 211.0, 205.0, 206.0, 200.0, 219.0, 176.0, 182.0, 191.0, 176.0, 204.0, 209.0, 219.0, 208.0, 225.0, 231.0, 219.0, 231.0, 221.0, 240.0, 231.0, 230.0, 152.0, 149.0, 205.0, 200.0, 234.0, 213.0, 143.0, 150.0, 143.0, 142.0, 202.0, 197.0, 220.0, 210.0, 200.0, 222.0, 185.0, 177.0, 215.0, 201.0, 254.0, 271.0, 100.0, 94.0, 186.0, 179.0, 205.0, 202.0, 151.0, 143.0, 108.0, 131.0, 161.0, 143.0, 143.0, 153.0, 171.0, 185.0, 201.0, 186.0, 227.0, 226.0]}, "sampler_perf": {"mean_env_wait_ms": 2.3706807134794996, "mean_processing_ms": 0.5410767279997776, "mean_inference_ms": 2.9190331024760856}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1896000, "num_steps_sampled": 1011200, "sample_time_ms": 21546.342, "load_time_ms": 37.021, "grad_time_ms": 9020.101, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 6.617444999031835e-25, "cur_lr": 0.0010000000474974513, "total_loss": -0.0019535624887794256, "policy_loss": -0.0075730024836957455, "vf_loss": 62.825687408447266, "vf_explained_var": 0.7674410939216614, "kl": 0.001638473360799253, "entropy": 1.3262617588043213, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1011200, "episodes_total": 2528, "training_iteration": 79, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-24-22", "timestamp": 1660249462, "time_this_iter_s": 28.0881450176239, "time_total_s": 7882.457692623138, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7882.457692623138, "timesteps_since_restore": 1011200, "iterations_since_restore": 79, "perf": {"cpu_util_percent": 32.120000000000005, "ram_util_percent": 58.1375}}
-{"episode_reward_max": 525.0, "episode_reward_min": 9.0, "episode_reward_mean": 390.28, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 195.14}, "custom_metrics": {"sparse_reward_mean": 134.0, "sparse_reward_min": 0, "sparse_reward_max": 180, "shaped_reward_mean": 122.28, "shaped_reward_min": 9, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.84, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 13.96, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 11.85, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 17, "useful_onion_pickup_agent_1_mean": 12.82, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.91, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.79, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 11.99, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 4.93, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.67, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.89, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.84, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.24, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.22, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 3.83, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.83, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.64, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.5, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.27, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 10.79, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 11.99, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.79, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 11.99, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [453.0, 516.0, 456.0, 467.0, 470.0, 410.0, 9.0, 464.0, 390.0, 330.0, 456.0, 516.0, 297.0, 330.0, 450.0, 465.0, 288.0, 413.0, 393.0, 410.0, 455.0, 456.0, 402.0, 455.0, 367.0, 516.0, 441.0, 459.0, 438.0, 408.0, 399.0, 410.0, 404.0, 408.0, 319.0, 344.0, 416.0, 411.0, 419.0, 358.0, 367.0, 413.0, 427.0, 456.0, 450.0, 461.0, 461.0, 301.0, 405.0, 447.0, 293.0, 285.0, 399.0, 430.0, 422.0, 362.0, 416.0, 525.0, 194.0, 365.0, 407.0, 294.0, 239.0, 304.0, 296.0, 356.0, 387.0, 453.0, 408.0, 398.0, 294.0, 405.0, 291.0, 450.0, 350.0, 419.0, 465.0, 462.0, 312.0, 465.0, 370.0, 416.0, 510.0, 290.0, 237.0, 376.0, 404.0, 407.0, 344.0, 450.0, 462.0, 351.0, 405.0, 333.0, 344.0, 237.0, 288.0, 465.0, 384.0, 353.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [222.0, 231.0, 269.0, 247.0, 214.0, 242.0, 239.0, 228.0, 228.0, 242.0, 198.0, 212.0, 3.0, 6.0, 232.0, 232.0, 188.0, 202.0, 163.0, 167.0, 234.0, 222.0, 253.0, 263.0, 162.0, 135.0, 159.0, 171.0, 221.0, 229.0, 230.0, 235.0, 145.0, 143.0, 202.0, 211.0, 201.0, 192.0, 191.0, 219.0, 225.0, 230.0, 231.0, 225.0, 190.0, 212.0, 219.0, 236.0, 179.0, 188.0, 270.0, 246.0, 227.0, 214.0, 226.0, 233.0, 207.0, 231.0, 198.0, 210.0, 196.0, 203.0, 203.0, 207.0, 211.0, 193.0, 194.0, 214.0, 162.0, 157.0, 172.0, 172.0, 205.0, 211.0, 205.0, 206.0, 200.0, 219.0, 176.0, 182.0, 191.0, 176.0, 204.0, 209.0, 219.0, 208.0, 225.0, 231.0, 219.0, 231.0, 221.0, 240.0, 231.0, 230.0, 152.0, 149.0, 205.0, 200.0, 234.0, 213.0, 143.0, 150.0, 143.0, 142.0, 202.0, 197.0, 220.0, 210.0, 200.0, 222.0, 185.0, 177.0, 215.0, 201.0, 254.0, 271.0, 100.0, 94.0, 186.0, 179.0, 205.0, 202.0, 151.0, 143.0, 108.0, 131.0, 161.0, 143.0, 143.0, 153.0, 171.0, 185.0, 201.0, 186.0, 227.0, 226.0, 199.0, 209.0, 194.0, 204.0, 159.0, 135.0, 208.0, 197.0, 136.0, 155.0, 228.0, 222.0, 162.0, 188.0, 209.0, 210.0, 231.0, 234.0, 217.0, 245.0, 140.0, 172.0, 236.0, 229.0, 184.0, 186.0, 226.0, 190.0, 267.0, 243.0, 150.0, 140.0, 117.0, 120.0, 187.0, 189.0, 197.0, 207.0, 203.0, 204.0, 164.0, 180.0, 221.0, 229.0, 227.0, 235.0, 171.0, 180.0, 199.0, 206.0, 159.0, 174.0, 159.0, 185.0, 119.0, 118.0, 140.0, 148.0, 229.0, 236.0, 180.0, 204.0, 175.0, 178.0]}, "sampler_perf": {"mean_env_wait_ms": 2.3463084552547957, "mean_processing_ms": 0.5362072894726034, "mean_inference_ms": 2.894108730963018}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1920000, "num_steps_sampled": 1024000, "sample_time_ms": 21336.358, "load_time_ms": 36.943, "grad_time_ms": 8894.576, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 3.3087224995159173e-25, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009382636635564268, "policy_loss": -0.007344415877014399, "vf_loss": 70.56519317626953, "vf_explained_var": 0.7276310324668884, "kl": 0.001774398609995842, "entropy": 1.3007346391677856, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1024000, "episodes_total": 2560, "training_iteration": 80, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-24-48", "timestamp": 1660249488, "time_this_iter_s": 25.79700207710266, "time_total_s": 7908.254694700241, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7908.254694700241, "timesteps_since_restore": 1024000, "iterations_since_restore": 80, "perf": {"cpu_util_percent": 33.88055555555556, "ram_util_percent": 58.030555555555566}}
-{"episode_reward_max": 522.0, "episode_reward_min": 9.0, "episode_reward_mean": 403.96, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 201.98}, "custom_metrics": {"sparse_reward_mean": 139.0, "sparse_reward_min": 0, "sparse_reward_max": 180, "shaped_reward_mean": 125.96, "shaped_reward_min": 9, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.08, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.26, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 12.09, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.06, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.53, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.5, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.96, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.9, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 11.08, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 12.3, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.64, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.08, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.23, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 3.96, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.91, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.72, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.62, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.16, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.23, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 11.08, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 12.3, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.08, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 12.3, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [459.0, 342.0, 513.0, 519.0, 410.0, 390.0, 424.0, 458.0, 362.0, 373.0, 399.0, 358.0, 456.0, 421.0, 459.0, 467.0, 419.0, 359.0, 327.0, 419.0, 393.0, 430.0, 447.0, 447.0, 522.0, 398.0, 405.0, 507.0, 468.0, 465.0, 410.0, 344.0, 296.0, 356.0, 387.0, 453.0, 408.0, 398.0, 294.0, 405.0, 291.0, 450.0, 350.0, 419.0, 465.0, 462.0, 312.0, 465.0, 370.0, 416.0, 510.0, 290.0, 237.0, 376.0, 404.0, 407.0, 344.0, 450.0, 462.0, 351.0, 405.0, 333.0, 344.0, 237.0, 288.0, 465.0, 384.0, 353.0, 453.0, 516.0, 456.0, 467.0, 470.0, 410.0, 9.0, 464.0, 390.0, 330.0, 456.0, 516.0, 297.0, 330.0, 450.0, 465.0, 288.0, 413.0, 393.0, 410.0, 455.0, 456.0, 402.0, 455.0, 367.0, 516.0, 441.0, 459.0, 438.0, 408.0, 399.0, 410.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [235.0, 224.0, 165.0, 177.0, 253.0, 260.0, 262.0, 257.0, 197.0, 213.0, 191.0, 199.0, 199.0, 225.0, 240.0, 218.0, 181.0, 181.0, 193.0, 180.0, 207.0, 192.0, 190.0, 168.0, 231.0, 225.0, 216.0, 205.0, 225.0, 234.0, 230.0, 237.0, 206.0, 213.0, 180.0, 179.0, 155.0, 172.0, 208.0, 211.0, 202.0, 191.0, 219.0, 211.0, 214.0, 233.0, 212.0, 235.0, 265.0, 257.0, 204.0, 194.0, 205.0, 200.0, 257.0, 250.0, 239.0, 229.0, 249.0, 216.0, 211.0, 199.0, 162.0, 182.0, 143.0, 153.0, 171.0, 185.0, 201.0, 186.0, 227.0, 226.0, 199.0, 209.0, 194.0, 204.0, 159.0, 135.0, 208.0, 197.0, 136.0, 155.0, 228.0, 222.0, 162.0, 188.0, 209.0, 210.0, 231.0, 234.0, 217.0, 245.0, 140.0, 172.0, 236.0, 229.0, 184.0, 186.0, 226.0, 190.0, 267.0, 243.0, 150.0, 140.0, 117.0, 120.0, 187.0, 189.0, 197.0, 207.0, 203.0, 204.0, 164.0, 180.0, 221.0, 229.0, 227.0, 235.0, 171.0, 180.0, 199.0, 206.0, 159.0, 174.0, 159.0, 185.0, 119.0, 118.0, 140.0, 148.0, 229.0, 236.0, 180.0, 204.0, 175.0, 178.0, 222.0, 231.0, 269.0, 247.0, 214.0, 242.0, 239.0, 228.0, 228.0, 242.0, 198.0, 212.0, 3.0, 6.0, 232.0, 232.0, 188.0, 202.0, 163.0, 167.0, 234.0, 222.0, 253.0, 263.0, 162.0, 135.0, 159.0, 171.0, 221.0, 229.0, 230.0, 235.0, 145.0, 143.0, 202.0, 211.0, 201.0, 192.0, 191.0, 219.0, 225.0, 230.0, 231.0, 225.0, 190.0, 212.0, 219.0, 236.0, 179.0, 188.0, 270.0, 246.0, 227.0, 214.0, 226.0, 233.0, 207.0, 231.0, 198.0, 210.0, 196.0, 203.0, 203.0, 207.0]}, "sampler_perf": {"mean_env_wait_ms": 2.3225268766730203, "mean_processing_ms": 0.5314526800460904, "mean_inference_ms": 2.8694240871343926}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1944000, "num_steps_sampled": 1036800, "sample_time_ms": 21260.609, "load_time_ms": 36.811, "grad_time_ms": 8748.654, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.6543612497579586e-25, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011773156002163887, "policy_loss": -0.006681745406240225, "vf_loss": 61.517730712890625, "vf_explained_var": 0.7553827166557312, "kl": 0.0021572383120656013, "entropy": 1.2946891784667969, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1036800, "episodes_total": 2592, "training_iteration": 81, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-25-15", "timestamp": 1660249515, "time_this_iter_s": 26.438808917999268, "time_total_s": 7934.69350361824, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7934.69350361824, "timesteps_since_restore": 1036800, "iterations_since_restore": 81, "perf": {"cpu_util_percent": 30.592105263157894, "ram_util_percent": 58.057894736842115}}
-{"episode_reward_max": 522.0, "episode_reward_min": 9.0, "episode_reward_mean": 412.5, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 206.25}, "custom_metrics": {"sparse_reward_mean": 142.0, "sparse_reward_min": 0, "sparse_reward_max": 180, "shaped_reward_mean": 128.5, "shaped_reward_min": 9, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.49, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.04, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 12.53, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.01, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.9, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.78, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 11.56, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 12.31, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 5.19, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.02, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.93, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.31, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.3, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.12, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.06, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.82, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.76, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.21, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.19, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 11.56, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 12.31, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.56, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 12.31, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [316.0, 235.0, 462.0, 501.0, 449.0, 411.0, 357.0, 401.0, 462.0, 408.0, 393.0, 410.0, 347.0, 401.0, 393.0, 444.0, 363.0, 470.0, 470.0, 413.0, 347.0, 459.0, 516.0, 456.0, 476.0, 398.0, 353.0, 401.0, 421.0, 413.0, 300.0, 355.0, 288.0, 465.0, 384.0, 353.0, 453.0, 516.0, 456.0, 467.0, 470.0, 410.0, 9.0, 464.0, 390.0, 330.0, 456.0, 516.0, 297.0, 330.0, 450.0, 465.0, 288.0, 413.0, 393.0, 410.0, 455.0, 456.0, 402.0, 455.0, 367.0, 516.0, 441.0, 459.0, 438.0, 408.0, 399.0, 410.0, 459.0, 342.0, 513.0, 519.0, 410.0, 390.0, 424.0, 458.0, 362.0, 373.0, 399.0, 358.0, 456.0, 421.0, 459.0, 467.0, 419.0, 359.0, 327.0, 419.0, 393.0, 430.0, 447.0, 447.0, 522.0, 398.0, 405.0, 507.0, 468.0, 465.0, 410.0, 344.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [165.0, 151.0, 116.0, 119.0, 239.0, 223.0, 252.0, 249.0, 229.0, 220.0, 204.0, 207.0, 190.0, 167.0, 210.0, 191.0, 228.0, 234.0, 200.0, 208.0, 214.0, 179.0, 203.0, 207.0, 179.0, 168.0, 176.0, 225.0, 185.0, 208.0, 216.0, 228.0, 160.0, 203.0, 233.0, 237.0, 228.0, 242.0, 204.0, 209.0, 186.0, 161.0, 218.0, 241.0, 251.0, 265.0, 230.0, 226.0, 234.0, 242.0, 185.0, 213.0, 186.0, 167.0, 208.0, 193.0, 225.0, 196.0, 218.0, 195.0, 152.0, 148.0, 188.0, 167.0, 140.0, 148.0, 229.0, 236.0, 180.0, 204.0, 175.0, 178.0, 222.0, 231.0, 269.0, 247.0, 214.0, 242.0, 239.0, 228.0, 228.0, 242.0, 198.0, 212.0, 3.0, 6.0, 232.0, 232.0, 188.0, 202.0, 163.0, 167.0, 234.0, 222.0, 253.0, 263.0, 162.0, 135.0, 159.0, 171.0, 221.0, 229.0, 230.0, 235.0, 145.0, 143.0, 202.0, 211.0, 201.0, 192.0, 191.0, 219.0, 225.0, 230.0, 231.0, 225.0, 190.0, 212.0, 219.0, 236.0, 179.0, 188.0, 270.0, 246.0, 227.0, 214.0, 226.0, 233.0, 207.0, 231.0, 198.0, 210.0, 196.0, 203.0, 203.0, 207.0, 235.0, 224.0, 165.0, 177.0, 253.0, 260.0, 262.0, 257.0, 197.0, 213.0, 191.0, 199.0, 199.0, 225.0, 240.0, 218.0, 181.0, 181.0, 193.0, 180.0, 207.0, 192.0, 190.0, 168.0, 231.0, 225.0, 216.0, 205.0, 225.0, 234.0, 230.0, 237.0, 206.0, 213.0, 180.0, 179.0, 155.0, 172.0, 208.0, 211.0, 202.0, 191.0, 219.0, 211.0, 214.0, 233.0, 212.0, 235.0, 265.0, 257.0, 204.0, 194.0, 205.0, 200.0, 257.0, 250.0, 239.0, 229.0, 249.0, 216.0, 211.0, 199.0, 162.0, 182.0]}, "sampler_perf": {"mean_env_wait_ms": 2.299333204784243, "mean_processing_ms": 0.5268100926342811, "mean_inference_ms": 2.8452761548654255}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1968000, "num_steps_sampled": 1049600, "sample_time_ms": 21056.327, "load_time_ms": 36.751, "grad_time_ms": 8586.844, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 8.271806248789793e-26, "cur_lr": 0.0010000000474974513, "total_loss": -0.0015234133461490273, "policy_loss": -0.007057014852762222, "vf_loss": 61.880123138427734, "vf_explained_var": 0.7578676342964172, "kl": 0.002027077367529273, "entropy": 1.3088246583938599, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1049600, "episodes_total": 2624, "training_iteration": 82, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-25-41", "timestamp": 1660249541, "time_this_iter_s": 26.555142879486084, "time_total_s": 7961.248646497726, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7961.248646497726, "timesteps_since_restore": 1049600, "iterations_since_restore": 82, "perf": {"cpu_util_percent": 33.34324324324324, "ram_util_percent": 58.07837837837838}}
-{"episode_reward_max": 522.0, "episode_reward_min": 177.0, "episode_reward_mean": 411.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 205.56}, "custom_metrics": {"sparse_reward_mean": 141.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 127.92, "shaped_reward_min": 57, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.28, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.01, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 12.41, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.19, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.37, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.75, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.72, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.55, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 12.4, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.89, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.89, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.37, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.2, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.94, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.91, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.2, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.11, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.55, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 12.4, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.55, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 12.4, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [350.0, 419.0, 422.0, 465.0, 177.0, 450.0, 415.0, 344.0, 282.0, 467.0, 407.0, 404.0, 516.0, 456.0, 396.0, 453.0, 458.0, 418.0, 295.0, 353.0, 406.0, 351.0, 516.0, 519.0, 444.0, 412.0, 444.0, 467.0, 287.0, 406.0, 237.0, 450.0, 438.0, 408.0, 399.0, 410.0, 459.0, 342.0, 513.0, 519.0, 410.0, 390.0, 424.0, 458.0, 362.0, 373.0, 399.0, 358.0, 456.0, 421.0, 459.0, 467.0, 419.0, 359.0, 327.0, 419.0, 393.0, 430.0, 447.0, 447.0, 522.0, 398.0, 405.0, 507.0, 468.0, 465.0, 410.0, 344.0, 316.0, 235.0, 462.0, 501.0, 449.0, 411.0, 357.0, 401.0, 462.0, 408.0, 393.0, 410.0, 347.0, 401.0, 393.0, 444.0, 363.0, 470.0, 470.0, 413.0, 347.0, 459.0, 516.0, 456.0, 476.0, 398.0, 353.0, 401.0, 421.0, 413.0, 300.0, 355.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [177.0, 173.0, 202.0, 217.0, 200.0, 222.0, 240.0, 225.0, 94.0, 83.0, 227.0, 223.0, 205.0, 210.0, 171.0, 173.0, 139.0, 143.0, 239.0, 228.0, 194.0, 213.0, 189.0, 215.0, 250.0, 266.0, 230.0, 226.0, 213.0, 183.0, 230.0, 223.0, 237.0, 221.0, 205.0, 213.0, 150.0, 145.0, 159.0, 194.0, 206.0, 200.0, 179.0, 172.0, 251.0, 265.0, 255.0, 264.0, 210.0, 234.0, 199.0, 213.0, 233.0, 211.0, 233.0, 234.0, 143.0, 144.0, 212.0, 194.0, 120.0, 117.0, 226.0, 224.0, 207.0, 231.0, 198.0, 210.0, 196.0, 203.0, 203.0, 207.0, 235.0, 224.0, 165.0, 177.0, 253.0, 260.0, 262.0, 257.0, 197.0, 213.0, 191.0, 199.0, 199.0, 225.0, 240.0, 218.0, 181.0, 181.0, 193.0, 180.0, 207.0, 192.0, 190.0, 168.0, 231.0, 225.0, 216.0, 205.0, 225.0, 234.0, 230.0, 237.0, 206.0, 213.0, 180.0, 179.0, 155.0, 172.0, 208.0, 211.0, 202.0, 191.0, 219.0, 211.0, 214.0, 233.0, 212.0, 235.0, 265.0, 257.0, 204.0, 194.0, 205.0, 200.0, 257.0, 250.0, 239.0, 229.0, 249.0, 216.0, 211.0, 199.0, 162.0, 182.0, 165.0, 151.0, 116.0, 119.0, 239.0, 223.0, 252.0, 249.0, 229.0, 220.0, 204.0, 207.0, 190.0, 167.0, 210.0, 191.0, 228.0, 234.0, 200.0, 208.0, 214.0, 179.0, 203.0, 207.0, 179.0, 168.0, 176.0, 225.0, 185.0, 208.0, 216.0, 228.0, 160.0, 203.0, 233.0, 237.0, 228.0, 242.0, 204.0, 209.0, 186.0, 161.0, 218.0, 241.0, 251.0, 265.0, 230.0, 226.0, 234.0, 242.0, 185.0, 213.0, 186.0, 167.0, 208.0, 193.0, 225.0, 196.0, 218.0, 195.0, 152.0, 148.0, 188.0, 167.0]}, "sampler_perf": {"mean_env_wait_ms": 2.2767232665412354, "mean_processing_ms": 0.5222866661980672, "mean_inference_ms": 2.821797669244603}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1992000, "num_steps_sampled": 1062400, "sample_time_ms": 20905.085, "load_time_ms": 36.646, "grad_time_ms": 8463.059, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 4.1359031243948966e-26, "cur_lr": 0.0010000000474974513, "total_loss": -0.00031348783522844315, "policy_loss": -0.006104966159909964, "vf_loss": 64.42190551757812, "vf_explained_var": 0.7651865482330322, "kl": 0.0017986185848712921, "entropy": 1.3014076948165894, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1062400, "episodes_total": 2656, "training_iteration": 83, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-26-09", "timestamp": 1660249569, "time_this_iter_s": 27.776076078414917, "time_total_s": 7989.024722576141, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7989.024722576141, "timesteps_since_restore": 1062400, "iterations_since_restore": 83, "perf": {"cpu_util_percent": 33.82000000000001, "ram_util_percent": 58.82000000000001}}
-{"episode_reward_max": 573.0, "episode_reward_min": 126.0, "episode_reward_mean": 408.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 204.43}, "custom_metrics": {"sparse_reward_mean": 141.0, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 126.86, "shaped_reward_min": 46, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.31, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.9, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 12.53, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 13.04, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.42, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.74, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.72, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.51, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 12.31, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.89, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.35, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.37, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.3, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.07, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.53, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.15, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.12, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.51, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 12.31, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.51, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 12.31, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [126.0, 424.0, 458.0, 418.0, 404.0, 465.0, 228.0, 353.0, 465.0, 350.0, 375.0, 441.0, 422.0, 418.0, 467.0, 441.0, 339.0, 370.0, 404.0, 467.0, 453.0, 458.0, 465.0, 345.0, 476.0, 459.0, 516.0, 573.0, 467.0, 459.0, 393.0, 413.0, 468.0, 465.0, 410.0, 344.0, 316.0, 235.0, 462.0, 501.0, 449.0, 411.0, 357.0, 401.0, 462.0, 408.0, 393.0, 410.0, 347.0, 401.0, 393.0, 444.0, 363.0, 470.0, 470.0, 413.0, 347.0, 459.0, 516.0, 456.0, 476.0, 398.0, 353.0, 401.0, 421.0, 413.0, 300.0, 355.0, 350.0, 419.0, 422.0, 465.0, 177.0, 450.0, 415.0, 344.0, 282.0, 467.0, 407.0, 404.0, 516.0, 456.0, 396.0, 453.0, 458.0, 418.0, 295.0, 353.0, 406.0, 351.0, 516.0, 519.0, 444.0, 412.0, 444.0, 467.0, 287.0, 406.0, 237.0, 450.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [63.0, 63.0, 224.0, 200.0, 224.0, 234.0, 210.0, 208.0, 202.0, 202.0, 240.0, 225.0, 117.0, 111.0, 173.0, 180.0, 223.0, 242.0, 178.0, 172.0, 181.0, 194.0, 219.0, 222.0, 222.0, 200.0, 207.0, 211.0, 230.0, 237.0, 232.0, 209.0, 165.0, 174.0, 185.0, 185.0, 204.0, 200.0, 230.0, 237.0, 238.0, 215.0, 256.0, 202.0, 236.0, 229.0, 194.0, 151.0, 226.0, 250.0, 231.0, 228.0, 252.0, 264.0, 293.0, 280.0, 238.0, 229.0, 222.0, 237.0, 207.0, 186.0, 212.0, 201.0, 239.0, 229.0, 249.0, 216.0, 211.0, 199.0, 162.0, 182.0, 165.0, 151.0, 116.0, 119.0, 239.0, 223.0, 252.0, 249.0, 229.0, 220.0, 204.0, 207.0, 190.0, 167.0, 210.0, 191.0, 228.0, 234.0, 200.0, 208.0, 214.0, 179.0, 203.0, 207.0, 179.0, 168.0, 176.0, 225.0, 185.0, 208.0, 216.0, 228.0, 160.0, 203.0, 233.0, 237.0, 228.0, 242.0, 204.0, 209.0, 186.0, 161.0, 218.0, 241.0, 251.0, 265.0, 230.0, 226.0, 234.0, 242.0, 185.0, 213.0, 186.0, 167.0, 208.0, 193.0, 225.0, 196.0, 218.0, 195.0, 152.0, 148.0, 188.0, 167.0, 177.0, 173.0, 202.0, 217.0, 200.0, 222.0, 240.0, 225.0, 94.0, 83.0, 227.0, 223.0, 205.0, 210.0, 171.0, 173.0, 139.0, 143.0, 239.0, 228.0, 194.0, 213.0, 189.0, 215.0, 250.0, 266.0, 230.0, 226.0, 213.0, 183.0, 230.0, 223.0, 237.0, 221.0, 205.0, 213.0, 150.0, 145.0, 159.0, 194.0, 206.0, 200.0, 179.0, 172.0, 251.0, 265.0, 255.0, 264.0, 210.0, 234.0, 199.0, 213.0, 233.0, 211.0, 233.0, 234.0, 143.0, 144.0, 212.0, 194.0, 120.0, 117.0, 226.0, 224.0]}, "sampler_perf": {"mean_env_wait_ms": 2.254677940793998, "mean_processing_ms": 0.5178804318399313, "mean_inference_ms": 2.7992231247898705}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2016000, "num_steps_sampled": 1075200, "sample_time_ms": 20798.01, "load_time_ms": 36.647, "grad_time_ms": 8403.274, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.0679515621974483e-26, "cur_lr": 0.0010000000474974513, "total_loss": 0.001430995762348175, "policy_loss": -0.004144120961427689, "vf_loss": 62.17998123168945, "vf_explained_var": 0.801994264125824, "kl": 0.0024192428681999445, "entropy": 1.2857705354690552, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1075200, "episodes_total": 2688, "training_iteration": 84, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-26-39", "timestamp": 1660249599, "time_this_iter_s": 29.525622129440308, "time_total_s": 8018.550344705582, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8018.550344705582, "timesteps_since_restore": 1075200, "iterations_since_restore": 84, "perf": {"cpu_util_percent": 28.77560975609756, "ram_util_percent": 58.31951219512194}}
-{"episode_reward_max": 573.0, "episode_reward_min": 126.0, "episode_reward_mean": 415.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 207.665}, "custom_metrics": {"sparse_reward_mean": 143.2, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 128.93, "shaped_reward_min": 46, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.57, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.2, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 12.84, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 13.3, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.45, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.38, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.73, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.82, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 11.79, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 12.45, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.83, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 2.59, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.4, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.27, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.95, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.99, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.72, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.17, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.15, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.79, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 12.45, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.79, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 12.45, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [516.0, 287.0, 462.0, 456.0, 449.0, 402.0, 272.0, 452.0, 407.0, 430.0, 450.0, 464.0, 465.0, 461.0, 407.0, 458.0, 404.0, 456.0, 310.0, 470.0, 352.0, 458.0, 376.0, 444.0, 501.0, 495.0, 458.0, 464.0, 447.0, 473.0, 516.0, 384.0, 421.0, 413.0, 300.0, 355.0, 350.0, 419.0, 422.0, 465.0, 177.0, 450.0, 415.0, 344.0, 282.0, 467.0, 407.0, 404.0, 516.0, 456.0, 396.0, 453.0, 458.0, 418.0, 295.0, 353.0, 406.0, 351.0, 516.0, 519.0, 444.0, 412.0, 444.0, 467.0, 287.0, 406.0, 237.0, 450.0, 126.0, 424.0, 458.0, 418.0, 404.0, 465.0, 228.0, 353.0, 465.0, 350.0, 375.0, 441.0, 422.0, 418.0, 467.0, 441.0, 339.0, 370.0, 404.0, 467.0, 453.0, 458.0, 465.0, 345.0, 476.0, 459.0, 516.0, 573.0, 467.0, 459.0, 393.0, 413.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [256.0, 260.0, 150.0, 137.0, 229.0, 233.0, 230.0, 226.0, 203.0, 246.0, 189.0, 213.0, 146.0, 126.0, 210.0, 242.0, 200.0, 207.0, 221.0, 209.0, 225.0, 225.0, 224.0, 240.0, 231.0, 234.0, 242.0, 219.0, 208.0, 199.0, 220.0, 238.0, 193.0, 211.0, 231.0, 225.0, 140.0, 170.0, 230.0, 240.0, 178.0, 174.0, 224.0, 234.0, 180.0, 196.0, 228.0, 216.0, 252.0, 249.0, 239.0, 256.0, 223.0, 235.0, 242.0, 222.0, 225.0, 222.0, 226.0, 247.0, 249.0, 267.0, 197.0, 187.0, 225.0, 196.0, 218.0, 195.0, 152.0, 148.0, 188.0, 167.0, 177.0, 173.0, 202.0, 217.0, 200.0, 222.0, 240.0, 225.0, 94.0, 83.0, 227.0, 223.0, 205.0, 210.0, 171.0, 173.0, 139.0, 143.0, 239.0, 228.0, 194.0, 213.0, 189.0, 215.0, 250.0, 266.0, 230.0, 226.0, 213.0, 183.0, 230.0, 223.0, 237.0, 221.0, 205.0, 213.0, 150.0, 145.0, 159.0, 194.0, 206.0, 200.0, 179.0, 172.0, 251.0, 265.0, 255.0, 264.0, 210.0, 234.0, 199.0, 213.0, 233.0, 211.0, 233.0, 234.0, 143.0, 144.0, 212.0, 194.0, 120.0, 117.0, 226.0, 224.0, 63.0, 63.0, 224.0, 200.0, 224.0, 234.0, 210.0, 208.0, 202.0, 202.0, 240.0, 225.0, 117.0, 111.0, 173.0, 180.0, 223.0, 242.0, 178.0, 172.0, 181.0, 194.0, 219.0, 222.0, 222.0, 200.0, 207.0, 211.0, 230.0, 237.0, 232.0, 209.0, 165.0, 174.0, 185.0, 185.0, 204.0, 200.0, 230.0, 237.0, 238.0, 215.0, 256.0, 202.0, 236.0, 229.0, 194.0, 151.0, 226.0, 250.0, 231.0, 228.0, 252.0, 264.0, 293.0, 280.0, 238.0, 229.0, 222.0, 237.0, 207.0, 186.0, 212.0, 201.0]}, "sampler_perf": {"mean_env_wait_ms": 2.2331698554816866, "mean_processing_ms": 0.5135892745725562, "mean_inference_ms": 2.7773374770155983}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2040000, "num_steps_sampled": 1088000, "sample_time_ms": 20542.192, "load_time_ms": 36.513, "grad_time_ms": 8356.672, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.0339757810987241e-26, "cur_lr": 0.0010000000474974513, "total_loss": 9.005811443785205e-05, "policy_loss": -0.005502933170646429, "vf_loss": 62.30662536621094, "vf_explained_var": 0.7652042508125305, "kl": 0.0015233332524076104, "entropy": 1.275335431098938, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1088000, "episodes_total": 2720, "training_iteration": 85, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-27-08", "timestamp": 1660249628, "time_this_iter_s": 29.196897983551025, "time_total_s": 8047.747242689133, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8047.747242689133, "timesteps_since_restore": 1088000, "iterations_since_restore": 85, "perf": {"cpu_util_percent": 31.616666666666664, "ram_util_percent": 58.38809523809524}}
-{"episode_reward_max": 573.0, "episode_reward_min": 126.0, "episode_reward_mean": 429.06, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 214.53}, "custom_metrics": {"sparse_reward_mean": 148.0, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 133.06, "shaped_reward_min": 46, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.29, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.59, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 13.5, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.69, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.8, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.38, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.89, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.77, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.11, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.83, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.2, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.0, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 2.83, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.33, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.36, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.32, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.0, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.12, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.82, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.12, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.13, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 12.11, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.83, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.11, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.83, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [452.0, 461.0, 501.0, 241.0, 462.0, 459.0, 519.0, 413.0, 405.0, 302.0, 465.0, 519.0, 498.0, 570.0, 465.0, 444.0, 398.0, 479.0, 419.0, 516.0, 419.0, 522.0, 419.0, 516.0, 470.0, 470.0, 519.0, 418.0, 504.0, 444.0, 355.0, 324.0, 287.0, 406.0, 237.0, 450.0, 126.0, 424.0, 458.0, 418.0, 404.0, 465.0, 228.0, 353.0, 465.0, 350.0, 375.0, 441.0, 422.0, 418.0, 467.0, 441.0, 339.0, 370.0, 404.0, 467.0, 453.0, 458.0, 465.0, 345.0, 476.0, 459.0, 516.0, 573.0, 467.0, 459.0, 393.0, 413.0, 516.0, 287.0, 462.0, 456.0, 449.0, 402.0, 272.0, 452.0, 407.0, 430.0, 450.0, 464.0, 465.0, 461.0, 407.0, 458.0, 404.0, 456.0, 310.0, 470.0, 352.0, 458.0, 376.0, 444.0, 501.0, 495.0, 458.0, 464.0, 447.0, 473.0, 516.0, 384.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [230.0, 222.0, 235.0, 226.0, 254.0, 247.0, 114.0, 127.0, 234.0, 228.0, 215.0, 244.0, 256.0, 263.0, 208.0, 205.0, 208.0, 197.0, 151.0, 151.0, 239.0, 226.0, 254.0, 265.0, 248.0, 250.0, 279.0, 291.0, 226.0, 239.0, 214.0, 230.0, 200.0, 198.0, 239.0, 240.0, 201.0, 218.0, 250.0, 266.0, 212.0, 207.0, 265.0, 257.0, 208.0, 211.0, 264.0, 252.0, 234.0, 236.0, 238.0, 232.0, 262.0, 257.0, 204.0, 214.0, 240.0, 264.0, 238.0, 206.0, 185.0, 170.0, 157.0, 167.0, 143.0, 144.0, 212.0, 194.0, 120.0, 117.0, 226.0, 224.0, 63.0, 63.0, 224.0, 200.0, 224.0, 234.0, 210.0, 208.0, 202.0, 202.0, 240.0, 225.0, 117.0, 111.0, 173.0, 180.0, 223.0, 242.0, 178.0, 172.0, 181.0, 194.0, 219.0, 222.0, 222.0, 200.0, 207.0, 211.0, 230.0, 237.0, 232.0, 209.0, 165.0, 174.0, 185.0, 185.0, 204.0, 200.0, 230.0, 237.0, 238.0, 215.0, 256.0, 202.0, 236.0, 229.0, 194.0, 151.0, 226.0, 250.0, 231.0, 228.0, 252.0, 264.0, 293.0, 280.0, 238.0, 229.0, 222.0, 237.0, 207.0, 186.0, 212.0, 201.0, 256.0, 260.0, 150.0, 137.0, 229.0, 233.0, 230.0, 226.0, 203.0, 246.0, 189.0, 213.0, 146.0, 126.0, 210.0, 242.0, 200.0, 207.0, 221.0, 209.0, 225.0, 225.0, 224.0, 240.0, 231.0, 234.0, 242.0, 219.0, 208.0, 199.0, 220.0, 238.0, 193.0, 211.0, 231.0, 225.0, 140.0, 170.0, 230.0, 240.0, 178.0, 174.0, 224.0, 234.0, 180.0, 196.0, 228.0, 216.0, 252.0, 249.0, 239.0, 256.0, 223.0, 235.0, 242.0, 222.0, 225.0, 222.0, 226.0, 247.0, 249.0, 267.0, 197.0, 187.0]}, "sampler_perf": {"mean_env_wait_ms": 2.2121816621028128, "mean_processing_ms": 0.509404154557512, "mean_inference_ms": 2.7561899846531217}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2064000, "num_steps_sampled": 1100800, "sample_time_ms": 20247.187, "load_time_ms": 36.147, "grad_time_ms": 8204.595, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 5.169878905493621e-27, "cur_lr": 0.0010000000474974513, "total_loss": 0.0034205808769911528, "policy_loss": -0.0028535639867186546, "vf_loss": 69.0377197265625, "vf_explained_var": 0.760657787322998, "kl": 0.002043861197307706, "entropy": 1.2592506408691406, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1100800, "episodes_total": 2752, "training_iteration": 86, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-27-37", "timestamp": 1660249657, "time_this_iter_s": 28.82673192024231, "time_total_s": 8076.573974609375, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8076.573974609375, "timesteps_since_restore": 1100800, "iterations_since_restore": 86, "perf": {"cpu_util_percent": 33.515, "ram_util_percent": 58.42750000000001}}
-{"episode_reward_max": 570.0, "episode_reward_min": 142.0, "episode_reward_mean": 435.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 70.0}, "policy_reward_max": {"ppo": 291.0}, "policy_reward_mean": {"ppo": 217.895}, "custom_metrics": {"sparse_reward_mean": 150.0, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 135.79, "shaped_reward_min": 62, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.42, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.29, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.49, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.24, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.9, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.69, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.97, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.92, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 12.14, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.18, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.1, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 3.06, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.9, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.33, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.27, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.21, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.05, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.99, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.13, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.17, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.14, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.18, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.14, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.18, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [461.0, 404.0, 464.0, 393.0, 467.0, 459.0, 419.0, 142.0, 419.0, 456.0, 481.0, 507.0, 407.0, 470.0, 513.0, 467.0, 459.0, 419.0, 473.0, 462.0, 316.0, 294.0, 416.0, 513.0, 465.0, 465.0, 424.0, 401.0, 470.0, 419.0, 367.0, 341.0, 467.0, 459.0, 393.0, 413.0, 516.0, 287.0, 462.0, 456.0, 449.0, 402.0, 272.0, 452.0, 407.0, 430.0, 450.0, 464.0, 465.0, 461.0, 407.0, 458.0, 404.0, 456.0, 310.0, 470.0, 352.0, 458.0, 376.0, 444.0, 501.0, 495.0, 458.0, 464.0, 447.0, 473.0, 516.0, 384.0, 452.0, 461.0, 501.0, 241.0, 462.0, 459.0, 519.0, 413.0, 405.0, 302.0, 465.0, 519.0, 498.0, 570.0, 465.0, 444.0, 398.0, 479.0, 419.0, 516.0, 419.0, 522.0, 419.0, 516.0, 470.0, 470.0, 519.0, 418.0, 504.0, 444.0, 355.0, 324.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [227.0, 234.0, 210.0, 194.0, 236.0, 228.0, 186.0, 207.0, 242.0, 225.0, 226.0, 233.0, 218.0, 201.0, 70.0, 72.0, 202.0, 217.0, 222.0, 234.0, 234.0, 247.0, 235.0, 272.0, 197.0, 210.0, 228.0, 242.0, 245.0, 268.0, 247.0, 220.0, 231.0, 228.0, 211.0, 208.0, 230.0, 243.0, 224.0, 238.0, 162.0, 154.0, 139.0, 155.0, 186.0, 230.0, 265.0, 248.0, 236.0, 229.0, 236.0, 229.0, 223.0, 201.0, 191.0, 210.0, 237.0, 233.0, 207.0, 212.0, 187.0, 180.0, 175.0, 166.0, 238.0, 229.0, 222.0, 237.0, 207.0, 186.0, 212.0, 201.0, 256.0, 260.0, 150.0, 137.0, 229.0, 233.0, 230.0, 226.0, 203.0, 246.0, 189.0, 213.0, 146.0, 126.0, 210.0, 242.0, 200.0, 207.0, 221.0, 209.0, 225.0, 225.0, 224.0, 240.0, 231.0, 234.0, 242.0, 219.0, 208.0, 199.0, 220.0, 238.0, 193.0, 211.0, 231.0, 225.0, 140.0, 170.0, 230.0, 240.0, 178.0, 174.0, 224.0, 234.0, 180.0, 196.0, 228.0, 216.0, 252.0, 249.0, 239.0, 256.0, 223.0, 235.0, 242.0, 222.0, 225.0, 222.0, 226.0, 247.0, 249.0, 267.0, 197.0, 187.0, 230.0, 222.0, 235.0, 226.0, 254.0, 247.0, 114.0, 127.0, 234.0, 228.0, 215.0, 244.0, 256.0, 263.0, 208.0, 205.0, 208.0, 197.0, 151.0, 151.0, 239.0, 226.0, 254.0, 265.0, 248.0, 250.0, 279.0, 291.0, 226.0, 239.0, 214.0, 230.0, 200.0, 198.0, 239.0, 240.0, 201.0, 218.0, 250.0, 266.0, 212.0, 207.0, 265.0, 257.0, 208.0, 211.0, 264.0, 252.0, 234.0, 236.0, 238.0, 232.0, 262.0, 257.0, 204.0, 214.0, 240.0, 264.0, 238.0, 206.0, 185.0, 170.0, 157.0, 167.0]}, "sampler_perf": {"mean_env_wait_ms": 2.191700194557133, "mean_processing_ms": 0.5053220551327391, "mean_inference_ms": 2.7357250964199444}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2088000, "num_steps_sampled": 1113600, "sample_time_ms": 20142.29, "load_time_ms": 35.752, "grad_time_ms": 8101.845, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 2.5849394527468104e-27, "cur_lr": 0.0010000000474974513, "total_loss": 0.0009223390952683985, "policy_loss": -0.005599660333245993, "vf_loss": 71.4854736328125, "vf_explained_var": 0.7612900733947754, "kl": 0.002260145964100957, "entropy": 1.2530813217163086, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1113600, "episodes_total": 2784, "training_iteration": 87, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-28-07", "timestamp": 1660249687, "time_this_iter_s": 30.60737180709839, "time_total_s": 8107.181346416473, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8107.181346416473, "timesteps_since_restore": 1113600, "iterations_since_restore": 87, "perf": {"cpu_util_percent": 30.168181818181814, "ram_util_percent": 58.34545454545453}}
-{"episode_reward_max": 570.0, "episode_reward_min": 142.0, "episode_reward_mean": 443.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 70.0}, "policy_reward_max": {"ppo": 291.0}, "policy_reward_mean": {"ppo": 221.745}, "custom_metrics": {"sparse_reward_mean": 152.4, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 138.69, "shaped_reward_min": 62, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.81, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.03, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.9, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.16, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.91, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.58, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.94, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.86, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 12.54, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.09, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.22, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.33, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.42, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 3.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.46, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.38, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.13, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.23, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.97, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.12, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.54, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.09, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.54, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.09, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [302.0, 366.0, 313.0, 504.0, 522.0, 530.0, 473.0, 468.0, 447.0, 403.0, 522.0, 519.0, 461.0, 516.0, 401.0, 516.0, 459.0, 464.0, 467.0, 373.0, 458.0, 525.0, 513.0, 273.0, 415.0, 461.0, 467.0, 473.0, 522.0, 406.0, 522.0, 467.0, 447.0, 473.0, 516.0, 384.0, 452.0, 461.0, 501.0, 241.0, 462.0, 459.0, 519.0, 413.0, 405.0, 302.0, 465.0, 519.0, 498.0, 570.0, 465.0, 444.0, 398.0, 479.0, 419.0, 516.0, 419.0, 522.0, 419.0, 516.0, 470.0, 470.0, 519.0, 418.0, 504.0, 444.0, 355.0, 324.0, 461.0, 404.0, 464.0, 393.0, 467.0, 459.0, 419.0, 142.0, 419.0, 456.0, 481.0, 507.0, 407.0, 470.0, 513.0, 467.0, 459.0, 419.0, 473.0, 462.0, 316.0, 294.0, 416.0, 513.0, 465.0, 465.0, 424.0, 401.0, 470.0, 419.0, 367.0, 341.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [151.0, 151.0, 185.0, 181.0, 153.0, 160.0, 262.0, 242.0, 265.0, 257.0, 268.0, 262.0, 253.0, 220.0, 244.0, 224.0, 221.0, 226.0, 192.0, 211.0, 265.0, 257.0, 249.0, 270.0, 237.0, 224.0, 264.0, 252.0, 215.0, 186.0, 259.0, 257.0, 245.0, 214.0, 238.0, 226.0, 229.0, 238.0, 184.0, 189.0, 230.0, 228.0, 262.0, 263.0, 265.0, 248.0, 143.0, 130.0, 208.0, 207.0, 228.0, 233.0, 241.0, 226.0, 232.0, 241.0, 265.0, 257.0, 207.0, 199.0, 255.0, 267.0, 244.0, 223.0, 225.0, 222.0, 226.0, 247.0, 249.0, 267.0, 197.0, 187.0, 230.0, 222.0, 235.0, 226.0, 254.0, 247.0, 114.0, 127.0, 234.0, 228.0, 215.0, 244.0, 256.0, 263.0, 208.0, 205.0, 208.0, 197.0, 151.0, 151.0, 239.0, 226.0, 254.0, 265.0, 248.0, 250.0, 279.0, 291.0, 226.0, 239.0, 214.0, 230.0, 200.0, 198.0, 239.0, 240.0, 201.0, 218.0, 250.0, 266.0, 212.0, 207.0, 265.0, 257.0, 208.0, 211.0, 264.0, 252.0, 234.0, 236.0, 238.0, 232.0, 262.0, 257.0, 204.0, 214.0, 240.0, 264.0, 238.0, 206.0, 185.0, 170.0, 157.0, 167.0, 227.0, 234.0, 210.0, 194.0, 236.0, 228.0, 186.0, 207.0, 242.0, 225.0, 226.0, 233.0, 218.0, 201.0, 70.0, 72.0, 202.0, 217.0, 222.0, 234.0, 234.0, 247.0, 235.0, 272.0, 197.0, 210.0, 228.0, 242.0, 245.0, 268.0, 247.0, 220.0, 231.0, 228.0, 211.0, 208.0, 230.0, 243.0, 224.0, 238.0, 162.0, 154.0, 139.0, 155.0, 186.0, 230.0, 265.0, 248.0, 236.0, 229.0, 236.0, 229.0, 223.0, 201.0, 191.0, 210.0, 237.0, 233.0, 207.0, 212.0, 187.0, 180.0, 175.0, 166.0]}, "sampler_perf": {"mean_env_wait_ms": 2.1716872563286627, "mean_processing_ms": 0.5013361308454948, "mean_inference_ms": 2.7160497473266743}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2112000, "num_steps_sampled": 1126400, "sample_time_ms": 20200.821, "load_time_ms": 36.161, "grad_time_ms": 8143.064, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.2924697263734052e-27, "cur_lr": 0.0010000000474974513, "total_loss": 0.004522919189184904, "policy_loss": -0.0018036967376247048, "vf_loss": 69.45938110351562, "vf_explained_var": 0.7786126732826233, "kl": 0.001827276311814785, "entropy": 1.2386289834976196, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1126400, "episodes_total": 2816, "training_iteration": 88, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-28-39", "timestamp": 1660249719, "time_this_iter_s": 31.2521071434021, "time_total_s": 8138.4334535598755, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8138.4334535598755, "timesteps_since_restore": 1126400, "iterations_since_restore": 88, "perf": {"cpu_util_percent": 29.57045454545455, "ram_util_percent": 58.37954545454544}}
-{"episode_reward_max": 530.0, "episode_reward_min": 142.0, "episode_reward_mean": 443.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 70.0}, "policy_reward_max": {"ppo": 276.0}, "policy_reward_mean": {"ppo": 221.975}, "custom_metrics": {"sparse_reward_mean": 152.6, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 138.75, "shaped_reward_min": 62, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.54, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.22, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.68, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.28, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.74, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 0.81, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.97, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 12.52, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 13.12, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.55, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.11, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.22, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.48, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.46, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.08, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.28, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.87, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.12, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.17, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.52, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 13.12, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.52, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 13.12, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [473.0, 468.0, 447.0, 473.0, 522.0, 513.0, 470.0, 464.0, 352.0, 504.0, 476.0, 398.0, 358.0, 410.0, 519.0, 405.0, 465.0, 416.0, 406.0, 525.0, 393.0, 459.0, 456.0, 408.0, 465.0, 438.0, 476.0, 462.0, 478.0, 479.0, 507.0, 522.0, 504.0, 444.0, 355.0, 324.0, 461.0, 404.0, 464.0, 393.0, 467.0, 459.0, 419.0, 142.0, 419.0, 456.0, 481.0, 507.0, 407.0, 470.0, 513.0, 467.0, 459.0, 419.0, 473.0, 462.0, 316.0, 294.0, 416.0, 513.0, 465.0, 465.0, 424.0, 401.0, 470.0, 419.0, 367.0, 341.0, 302.0, 366.0, 313.0, 504.0, 522.0, 530.0, 473.0, 468.0, 447.0, 403.0, 522.0, 519.0, 461.0, 516.0, 401.0, 516.0, 459.0, 464.0, 467.0, 373.0, 458.0, 525.0, 513.0, 273.0, 415.0, 461.0, 467.0, 473.0, 522.0, 406.0, 522.0, 467.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [238.0, 235.0, 234.0, 234.0, 226.0, 221.0, 236.0, 237.0, 257.0, 265.0, 240.0, 273.0, 236.0, 234.0, 248.0, 216.0, 178.0, 174.0, 250.0, 254.0, 231.0, 245.0, 210.0, 188.0, 181.0, 177.0, 202.0, 208.0, 243.0, 276.0, 199.0, 206.0, 244.0, 221.0, 208.0, 208.0, 193.0, 213.0, 259.0, 266.0, 210.0, 183.0, 229.0, 230.0, 234.0, 222.0, 212.0, 196.0, 234.0, 231.0, 224.0, 214.0, 232.0, 244.0, 248.0, 214.0, 241.0, 237.0, 238.0, 241.0, 252.0, 255.0, 255.0, 267.0, 240.0, 264.0, 238.0, 206.0, 185.0, 170.0, 157.0, 167.0, 227.0, 234.0, 210.0, 194.0, 236.0, 228.0, 186.0, 207.0, 242.0, 225.0, 226.0, 233.0, 218.0, 201.0, 70.0, 72.0, 202.0, 217.0, 222.0, 234.0, 234.0, 247.0, 235.0, 272.0, 197.0, 210.0, 228.0, 242.0, 245.0, 268.0, 247.0, 220.0, 231.0, 228.0, 211.0, 208.0, 230.0, 243.0, 224.0, 238.0, 162.0, 154.0, 139.0, 155.0, 186.0, 230.0, 265.0, 248.0, 236.0, 229.0, 236.0, 229.0, 223.0, 201.0, 191.0, 210.0, 237.0, 233.0, 207.0, 212.0, 187.0, 180.0, 175.0, 166.0, 151.0, 151.0, 185.0, 181.0, 153.0, 160.0, 262.0, 242.0, 265.0, 257.0, 268.0, 262.0, 253.0, 220.0, 244.0, 224.0, 221.0, 226.0, 192.0, 211.0, 265.0, 257.0, 249.0, 270.0, 237.0, 224.0, 264.0, 252.0, 215.0, 186.0, 259.0, 257.0, 245.0, 214.0, 238.0, 226.0, 229.0, 238.0, 184.0, 189.0, 230.0, 228.0, 262.0, 263.0, 265.0, 248.0, 143.0, 130.0, 208.0, 207.0, 228.0, 233.0, 241.0, 226.0, 232.0, 241.0, 265.0, 257.0, 207.0, 199.0, 255.0, 267.0, 244.0, 223.0]}, "sampler_perf": {"mean_env_wait_ms": 2.1521722183648633, "mean_processing_ms": 0.4974515268120716, "mean_inference_ms": 2.697593198173253}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2136000, "num_steps_sampled": 1139200, "sample_time_ms": 20564.193, "load_time_ms": 36.199, "grad_time_ms": 8238.148, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 6.462348631867026e-28, "cur_lr": 0.0010000000474974513, "total_loss": 0.0003657890483736992, "policy_loss": -0.005678663495928049, "vf_loss": 66.65350341796875, "vf_explained_var": 0.7769116759300232, "kl": 0.0020363712683320045, "entropy": 1.2417923212051392, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1139200, "episodes_total": 2848, "training_iteration": 89, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-29-11", "timestamp": 1660249751, "time_this_iter_s": 32.67408323287964, "time_total_s": 8171.107536792755, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8171.107536792755, "timesteps_since_restore": 1139200, "iterations_since_restore": 89, "perf": {"cpu_util_percent": 27.073913043478264, "ram_util_percent": 58.2586956521739}}
-{"episode_reward_max": 570.0, "episode_reward_min": 273.0, "episode_reward_mean": 452.2, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 130.0}, "policy_reward_max": {"ppo": 287.0}, "policy_reward_mean": {"ppo": 226.1}, "custom_metrics": {"sparse_reward_mean": 156.0, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 140.2, "shaped_reward_min": 73, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.85, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.29, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.98, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 14.39, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.79, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 0.82, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.98, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 12.77, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 13.33, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.71, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.36, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.61, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.85, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.19, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.47, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.72, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.91, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 4.53, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.73, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.15, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.12, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 12.77, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 13.33, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.77, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 13.33, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [398.0, 419.0, 462.0, 419.0, 339.0, 519.0, 467.0, 522.0, 450.0, 449.0, 444.0, 495.0, 453.0, 438.0, 462.0, 476.0, 456.0, 413.0, 403.0, 430.0, 459.0, 476.0, 476.0, 458.0, 347.0, 424.0, 476.0, 570.0, 401.0, 516.0, 455.0, 516.0, 470.0, 419.0, 367.0, 341.0, 302.0, 366.0, 313.0, 504.0, 522.0, 530.0, 473.0, 468.0, 447.0, 403.0, 522.0, 519.0, 461.0, 516.0, 401.0, 516.0, 459.0, 464.0, 467.0, 373.0, 458.0, 525.0, 513.0, 273.0, 415.0, 461.0, 467.0, 473.0, 522.0, 406.0, 522.0, 467.0, 473.0, 468.0, 447.0, 473.0, 522.0, 513.0, 470.0, 464.0, 352.0, 504.0, 476.0, 398.0, 358.0, 410.0, 519.0, 405.0, 465.0, 416.0, 406.0, 525.0, 393.0, 459.0, 456.0, 408.0, 465.0, 438.0, 476.0, 462.0, 478.0, 479.0, 507.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [207.0, 191.0, 217.0, 202.0, 231.0, 231.0, 211.0, 208.0, 176.0, 163.0, 260.0, 259.0, 234.0, 233.0, 262.0, 260.0, 238.0, 212.0, 230.0, 219.0, 232.0, 212.0, 261.0, 234.0, 219.0, 234.0, 226.0, 212.0, 237.0, 225.0, 247.0, 229.0, 236.0, 220.0, 200.0, 213.0, 200.0, 203.0, 202.0, 228.0, 225.0, 234.0, 236.0, 240.0, 228.0, 248.0, 242.0, 216.0, 177.0, 170.0, 217.0, 207.0, 231.0, 245.0, 287.0, 283.0, 201.0, 200.0, 264.0, 252.0, 233.0, 222.0, 260.0, 256.0, 237.0, 233.0, 207.0, 212.0, 187.0, 180.0, 175.0, 166.0, 151.0, 151.0, 185.0, 181.0, 153.0, 160.0, 262.0, 242.0, 265.0, 257.0, 268.0, 262.0, 253.0, 220.0, 244.0, 224.0, 221.0, 226.0, 192.0, 211.0, 265.0, 257.0, 249.0, 270.0, 237.0, 224.0, 264.0, 252.0, 215.0, 186.0, 259.0, 257.0, 245.0, 214.0, 238.0, 226.0, 229.0, 238.0, 184.0, 189.0, 230.0, 228.0, 262.0, 263.0, 265.0, 248.0, 143.0, 130.0, 208.0, 207.0, 228.0, 233.0, 241.0, 226.0, 232.0, 241.0, 265.0, 257.0, 207.0, 199.0, 255.0, 267.0, 244.0, 223.0, 238.0, 235.0, 234.0, 234.0, 226.0, 221.0, 236.0, 237.0, 257.0, 265.0, 240.0, 273.0, 236.0, 234.0, 248.0, 216.0, 178.0, 174.0, 250.0, 254.0, 231.0, 245.0, 210.0, 188.0, 181.0, 177.0, 202.0, 208.0, 243.0, 276.0, 199.0, 206.0, 244.0, 221.0, 208.0, 208.0, 193.0, 213.0, 259.0, 266.0, 210.0, 183.0, 229.0, 230.0, 234.0, 222.0, 212.0, 196.0, 234.0, 231.0, 224.0, 214.0, 232.0, 244.0, 248.0, 214.0, 241.0, 237.0, 238.0, 241.0, 252.0, 255.0, 255.0, 267.0]}, "sampler_perf": {"mean_env_wait_ms": 2.133109953902478, "mean_processing_ms": 0.4936599987837839, "mean_inference_ms": 2.6799343629485026}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2160000, "num_steps_sampled": 1152000, "sample_time_ms": 20913.282, "load_time_ms": 36.475, "grad_time_ms": 8548.769, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 3.231174315933513e-28, "cur_lr": 0.0010000000474974513, "total_loss": 0.007687473203986883, "policy_loss": 0.0007542042876593769, "vf_loss": 75.50032806396484, "vf_explained_var": 0.7544476985931396, "kl": 0.0026988324243575335, "entropy": 1.2335320711135864, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1152000, "episodes_total": 2880, "training_iteration": 90, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-29-44", "timestamp": 1660249784, "time_this_iter_s": 32.394510984420776, "time_total_s": 8203.502047777176, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8203.502047777176, "timesteps_since_restore": 1152000, "iterations_since_restore": 90, "perf": {"cpu_util_percent": 31.20434782608696, "ram_util_percent": 58.23260869565217}}
-{"episode_reward_max": 570.0, "episode_reward_min": 339.0, "episode_reward_mean": 459.06, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 163.0}, "policy_reward_max": {"ppo": 287.0}, "policy_reward_mean": {"ppo": 229.53}, "custom_metrics": {"sparse_reward_mean": 158.4, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 142.26, "shaped_reward_min": 99, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.9, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 15.54, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.05, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 14.67, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 0.7, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.94, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 12.97, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 13.54, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.78, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.99, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.98, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.2, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.3, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.65, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.07, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.46, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.84, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.13, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.17, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 12.97, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 13.54, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.97, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 13.54, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [510.0, 411.0, 418.0, 522.0, 465.0, 519.0, 465.0, 408.0, 473.0, 525.0, 522.0, 481.0, 447.0, 527.0, 476.0, 527.0, 449.0, 396.0, 453.0, 453.0, 470.0, 395.0, 481.0, 516.0, 516.0, 449.0, 516.0, 373.0, 416.0, 407.0, 392.0, 516.0, 522.0, 406.0, 522.0, 467.0, 473.0, 468.0, 447.0, 473.0, 522.0, 513.0, 470.0, 464.0, 352.0, 504.0, 476.0, 398.0, 358.0, 410.0, 519.0, 405.0, 465.0, 416.0, 406.0, 525.0, 393.0, 459.0, 456.0, 408.0, 465.0, 438.0, 476.0, 462.0, 478.0, 479.0, 507.0, 522.0, 398.0, 419.0, 462.0, 419.0, 339.0, 519.0, 467.0, 522.0, 450.0, 449.0, 444.0, 495.0, 453.0, 438.0, 462.0, 476.0, 456.0, 413.0, 403.0, 430.0, 459.0, 476.0, 476.0, 458.0, 347.0, 424.0, 476.0, 570.0, 401.0, 516.0, 455.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 245.0, 210.0, 201.0, 218.0, 200.0, 267.0, 255.0, 240.0, 225.0, 275.0, 244.0, 218.0, 247.0, 202.0, 206.0, 252.0, 221.0, 257.0, 268.0, 262.0, 260.0, 237.0, 244.0, 217.0, 230.0, 265.0, 262.0, 249.0, 227.0, 259.0, 268.0, 227.0, 222.0, 192.0, 204.0, 233.0, 220.0, 222.0, 231.0, 225.0, 245.0, 195.0, 200.0, 239.0, 242.0, 254.0, 262.0, 259.0, 257.0, 237.0, 212.0, 267.0, 249.0, 184.0, 189.0, 211.0, 205.0, 198.0, 209.0, 202.0, 190.0, 249.0, 267.0, 265.0, 257.0, 207.0, 199.0, 255.0, 267.0, 244.0, 223.0, 238.0, 235.0, 234.0, 234.0, 226.0, 221.0, 236.0, 237.0, 257.0, 265.0, 240.0, 273.0, 236.0, 234.0, 248.0, 216.0, 178.0, 174.0, 250.0, 254.0, 231.0, 245.0, 210.0, 188.0, 181.0, 177.0, 202.0, 208.0, 243.0, 276.0, 199.0, 206.0, 244.0, 221.0, 208.0, 208.0, 193.0, 213.0, 259.0, 266.0, 210.0, 183.0, 229.0, 230.0, 234.0, 222.0, 212.0, 196.0, 234.0, 231.0, 224.0, 214.0, 232.0, 244.0, 248.0, 214.0, 241.0, 237.0, 238.0, 241.0, 252.0, 255.0, 255.0, 267.0, 207.0, 191.0, 217.0, 202.0, 231.0, 231.0, 211.0, 208.0, 176.0, 163.0, 260.0, 259.0, 234.0, 233.0, 262.0, 260.0, 238.0, 212.0, 230.0, 219.0, 232.0, 212.0, 261.0, 234.0, 219.0, 234.0, 226.0, 212.0, 237.0, 225.0, 247.0, 229.0, 236.0, 220.0, 200.0, 213.0, 200.0, 203.0, 202.0, 228.0, 225.0, 234.0, 236.0, 240.0, 228.0, 248.0, 242.0, 216.0, 177.0, 170.0, 217.0, 207.0, 231.0, 245.0, 287.0, 283.0, 201.0, 200.0, 264.0, 252.0, 233.0, 222.0, 260.0, 256.0]}, "sampler_perf": {"mean_env_wait_ms": 2.114456633491147, "mean_processing_ms": 0.4899465714962644, "mean_inference_ms": 2.662192402592387}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2184000, "num_steps_sampled": 1164800, "sample_time_ms": 20895.618, "load_time_ms": 36.529, "grad_time_ms": 8787.695, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.6155871579667565e-28, "cur_lr": 0.0010000000474974513, "total_loss": 0.003199361963197589, "policy_loss": -0.003974525723606348, "vf_loss": 77.90489959716797, "vf_explained_var": 0.7496511936187744, "kl": 0.001869131694547832, "entropy": 1.2332016229629517, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1164800, "episodes_total": 2912, "training_iteration": 91, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-30-12", "timestamp": 1660249812, "time_this_iter_s": 28.652703046798706, "time_total_s": 8232.154750823975, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8232.154750823975, "timesteps_since_restore": 1164800, "iterations_since_restore": 91, "perf": {"cpu_util_percent": 34.4075, "ram_util_percent": 58.25750000000001}}
-{"episode_reward_max": 570.0, "episode_reward_min": 296.0, "episode_reward_mean": 465.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 133.0}, "policy_reward_max": {"ppo": 287.0}, "policy_reward_mean": {"ppo": 232.56}, "custom_metrics": {"sparse_reward_mean": 160.6, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 143.92, "shaped_reward_min": 96, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.61, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.05, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.75, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.24, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.71, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.81, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.71, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 14.12, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.98, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.72, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.99, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.21, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.31, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.77, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.1, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.5, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.86, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.16, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.18, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 12.71, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 14.12, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.71, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 14.12, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [513.0, 453.0, 516.0, 533.0, 296.0, 453.0, 501.0, 422.0, 465.0, 395.0, 530.0, 473.0, 519.0, 458.0, 404.0, 487.0, 449.0, 516.0, 516.0, 510.0, 504.0, 516.0, 516.0, 473.0, 516.0, 519.0, 470.0, 387.0, 522.0, 453.0, 421.0, 438.0, 478.0, 479.0, 507.0, 522.0, 398.0, 419.0, 462.0, 419.0, 339.0, 519.0, 467.0, 522.0, 450.0, 449.0, 444.0, 495.0, 453.0, 438.0, 462.0, 476.0, 456.0, 413.0, 403.0, 430.0, 459.0, 476.0, 476.0, 458.0, 347.0, 424.0, 476.0, 570.0, 401.0, 516.0, 455.0, 516.0, 510.0, 411.0, 418.0, 522.0, 465.0, 519.0, 465.0, 408.0, 473.0, 525.0, 522.0, 481.0, 447.0, 527.0, 476.0, 527.0, 449.0, 396.0, 453.0, 453.0, 470.0, 395.0, 481.0, 516.0, 516.0, 449.0, 516.0, 373.0, 416.0, 407.0, 392.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 256.0, 218.0, 235.0, 254.0, 262.0, 273.0, 260.0, 133.0, 163.0, 219.0, 234.0, 235.0, 266.0, 207.0, 215.0, 239.0, 226.0, 195.0, 200.0, 269.0, 261.0, 230.0, 243.0, 263.0, 256.0, 229.0, 229.0, 202.0, 202.0, 247.0, 240.0, 225.0, 224.0, 248.0, 268.0, 250.0, 266.0, 252.0, 258.0, 249.0, 255.0, 272.0, 244.0, 254.0, 262.0, 232.0, 241.0, 260.0, 256.0, 250.0, 269.0, 233.0, 237.0, 189.0, 198.0, 265.0, 257.0, 233.0, 220.0, 209.0, 212.0, 218.0, 220.0, 241.0, 237.0, 238.0, 241.0, 252.0, 255.0, 255.0, 267.0, 207.0, 191.0, 217.0, 202.0, 231.0, 231.0, 211.0, 208.0, 176.0, 163.0, 260.0, 259.0, 234.0, 233.0, 262.0, 260.0, 238.0, 212.0, 230.0, 219.0, 232.0, 212.0, 261.0, 234.0, 219.0, 234.0, 226.0, 212.0, 237.0, 225.0, 247.0, 229.0, 236.0, 220.0, 200.0, 213.0, 200.0, 203.0, 202.0, 228.0, 225.0, 234.0, 236.0, 240.0, 228.0, 248.0, 242.0, 216.0, 177.0, 170.0, 217.0, 207.0, 231.0, 245.0, 287.0, 283.0, 201.0, 200.0, 264.0, 252.0, 233.0, 222.0, 260.0, 256.0, 265.0, 245.0, 210.0, 201.0, 218.0, 200.0, 267.0, 255.0, 240.0, 225.0, 275.0, 244.0, 218.0, 247.0, 202.0, 206.0, 252.0, 221.0, 257.0, 268.0, 262.0, 260.0, 237.0, 244.0, 217.0, 230.0, 265.0, 262.0, 249.0, 227.0, 259.0, 268.0, 227.0, 222.0, 192.0, 204.0, 233.0, 220.0, 222.0, 231.0, 225.0, 245.0, 195.0, 200.0, 239.0, 242.0, 254.0, 262.0, 259.0, 257.0, 237.0, 212.0, 267.0, 249.0, 184.0, 189.0, 211.0, 205.0, 198.0, 209.0, 202.0, 190.0, 249.0, 267.0]}, "sampler_perf": {"mean_env_wait_ms": 2.0961781849266816, "mean_processing_ms": 0.48630585862921505, "mean_inference_ms": 2.6442580305818364}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2208000, "num_steps_sampled": 1177600, "sample_time_ms": 21020.172, "load_time_ms": 36.643, "grad_time_ms": 9041.599, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 8.077935789833782e-29, "cur_lr": 0.0010000000474974513, "total_loss": 0.009082547388970852, "policy_loss": 0.0023276470601558685, "vf_loss": 73.66332244873047, "vf_explained_var": 0.7556483149528503, "kl": 0.003871823428198695, "entropy": 1.2228628396987915, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1177600, "episodes_total": 2944, "training_iteration": 92, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-30-43", "timestamp": 1660249843, "time_this_iter_s": 30.34039807319641, "time_total_s": 8262.495148897171, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8262.495148897171, "timesteps_since_restore": 1177600, "iterations_since_restore": 92, "perf": {"cpu_util_percent": 30.85116279069767, "ram_util_percent": 58.283720930232555}}
-{"episode_reward_max": 573.0, "episode_reward_min": 230.0, "episode_reward_mean": 467.31, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 106.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 233.655}, "custom_metrics": {"sparse_reward_mean": 161.6, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 144.11, "shaped_reward_min": 70, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.51, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 15.93, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.74, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.27, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.56, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.45, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.65, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.68, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.66, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 14.2, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.04, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 0.96, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.35, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.62, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.23, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.37, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.05, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.12, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 12.66, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 14.2, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.66, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 14.2, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [444.0, 410.0, 481.0, 411.0, 456.0, 461.0, 230.0, 370.0, 398.0, 473.0, 513.0, 507.0, 461.0, 522.0, 573.0, 495.0, 516.0, 390.0, 501.0, 519.0, 525.0, 524.0, 479.0, 447.0, 467.0, 464.0, 450.0, 392.0, 522.0, 416.0, 478.0, 510.0, 401.0, 516.0, 455.0, 516.0, 510.0, 411.0, 418.0, 522.0, 465.0, 519.0, 465.0, 408.0, 473.0, 525.0, 522.0, 481.0, 447.0, 527.0, 476.0, 527.0, 449.0, 396.0, 453.0, 453.0, 470.0, 395.0, 481.0, 516.0, 516.0, 449.0, 516.0, 373.0, 416.0, 407.0, 392.0, 516.0, 513.0, 453.0, 516.0, 533.0, 296.0, 453.0, 501.0, 422.0, 465.0, 395.0, 530.0, 473.0, 519.0, 458.0, 404.0, 487.0, 449.0, 516.0, 516.0, 510.0, 504.0, 516.0, 516.0, 473.0, 516.0, 519.0, 470.0, 387.0, 522.0, 453.0, 421.0, 438.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [224.0, 220.0, 205.0, 205.0, 229.0, 252.0, 210.0, 201.0, 236.0, 220.0, 223.0, 238.0, 124.0, 106.0, 174.0, 196.0, 191.0, 207.0, 241.0, 232.0, 260.0, 253.0, 268.0, 239.0, 227.0, 234.0, 262.0, 260.0, 293.0, 280.0, 250.0, 245.0, 249.0, 267.0, 195.0, 195.0, 255.0, 246.0, 261.0, 258.0, 254.0, 271.0, 262.0, 262.0, 247.0, 232.0, 201.0, 246.0, 230.0, 237.0, 223.0, 241.0, 221.0, 229.0, 197.0, 195.0, 254.0, 268.0, 211.0, 205.0, 239.0, 239.0, 259.0, 251.0, 201.0, 200.0, 264.0, 252.0, 233.0, 222.0, 260.0, 256.0, 265.0, 245.0, 210.0, 201.0, 218.0, 200.0, 267.0, 255.0, 240.0, 225.0, 275.0, 244.0, 218.0, 247.0, 202.0, 206.0, 252.0, 221.0, 257.0, 268.0, 262.0, 260.0, 237.0, 244.0, 217.0, 230.0, 265.0, 262.0, 249.0, 227.0, 259.0, 268.0, 227.0, 222.0, 192.0, 204.0, 233.0, 220.0, 222.0, 231.0, 225.0, 245.0, 195.0, 200.0, 239.0, 242.0, 254.0, 262.0, 259.0, 257.0, 237.0, 212.0, 267.0, 249.0, 184.0, 189.0, 211.0, 205.0, 198.0, 209.0, 202.0, 190.0, 249.0, 267.0, 257.0, 256.0, 218.0, 235.0, 254.0, 262.0, 273.0, 260.0, 133.0, 163.0, 219.0, 234.0, 235.0, 266.0, 207.0, 215.0, 239.0, 226.0, 195.0, 200.0, 269.0, 261.0, 230.0, 243.0, 263.0, 256.0, 229.0, 229.0, 202.0, 202.0, 247.0, 240.0, 225.0, 224.0, 248.0, 268.0, 250.0, 266.0, 252.0, 258.0, 249.0, 255.0, 272.0, 244.0, 254.0, 262.0, 232.0, 241.0, 260.0, 256.0, 250.0, 269.0, 233.0, 237.0, 189.0, 198.0, 265.0, 257.0, 233.0, 220.0, 209.0, 212.0, 218.0, 220.0]}, "sampler_perf": {"mean_env_wait_ms": 2.0782697759005244, "mean_processing_ms": 0.48273945160484155, "mean_inference_ms": 2.6262237371699033}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2232000, "num_steps_sampled": 1190400, "sample_time_ms": 21149.002, "load_time_ms": 36.704, "grad_time_ms": 9227.888, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 4.038967894916891e-29, "cur_lr": 0.0010000000474974513, "total_loss": 0.0010848678648471832, "policy_loss": -0.005190685391426086, "vf_loss": 68.93277740478516, "vf_explained_var": 0.7608636021614075, "kl": 0.0020486123394221067, "entropy": 1.2354419231414795, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1190400, "episodes_total": 2976, "training_iteration": 93, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-31-14", "timestamp": 1660249874, "time_this_iter_s": 30.926449298858643, "time_total_s": 8293.42159819603, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8293.42159819603, "timesteps_since_restore": 1190400, "iterations_since_restore": 93, "perf": {"cpu_util_percent": 31.343181818181815, "ram_util_percent": 58.222727272727276}}
-{"episode_reward_max": 573.0, "episode_reward_min": 63.0, "episode_reward_mean": 462.26, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 231.13}, "custom_metrics": {"sparse_reward_mean": 160.2, "sparse_reward_min": 20, "sparse_reward_max": 200, "shaped_reward_mean": 141.86, "shaped_reward_min": 23, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.41, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.74, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.57, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.05, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.71, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.32, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.82, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.39, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 14.12, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.43, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.08, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.39, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.41, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.52, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.31, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.22, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.11, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.19, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.17, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 12.39, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 14.12, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.39, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 14.12, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [482.0, 525.0, 507.0, 516.0, 478.0, 519.0, 516.0, 410.0, 513.0, 295.0, 401.0, 462.0, 501.0, 519.0, 63.0, 456.0, 381.0, 510.0, 464.0, 461.0, 473.0, 407.0, 570.0, 444.0, 444.0, 384.0, 467.0, 428.0, 456.0, 462.0, 513.0, 519.0, 416.0, 407.0, 392.0, 516.0, 513.0, 453.0, 516.0, 533.0, 296.0, 453.0, 501.0, 422.0, 465.0, 395.0, 530.0, 473.0, 519.0, 458.0, 404.0, 487.0, 449.0, 516.0, 516.0, 510.0, 504.0, 516.0, 516.0, 473.0, 516.0, 519.0, 470.0, 387.0, 522.0, 453.0, 421.0, 438.0, 444.0, 410.0, 481.0, 411.0, 456.0, 461.0, 230.0, 370.0, 398.0, 473.0, 513.0, 507.0, 461.0, 522.0, 573.0, 495.0, 516.0, 390.0, 501.0, 519.0, 525.0, 524.0, 479.0, 447.0, 467.0, 464.0, 450.0, 392.0, 522.0, 416.0, 478.0, 510.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [250.0, 232.0, 259.0, 266.0, 244.0, 263.0, 261.0, 255.0, 253.0, 225.0, 259.0, 260.0, 251.0, 265.0, 210.0, 200.0, 248.0, 265.0, 140.0, 155.0, 194.0, 207.0, 222.0, 240.0, 252.0, 249.0, 259.0, 260.0, 29.0, 34.0, 206.0, 250.0, 184.0, 197.0, 256.0, 254.0, 239.0, 225.0, 231.0, 230.0, 225.0, 248.0, 204.0, 203.0, 288.0, 282.0, 215.0, 229.0, 226.0, 218.0, 174.0, 210.0, 231.0, 236.0, 222.0, 206.0, 235.0, 221.0, 218.0, 244.0, 251.0, 262.0, 271.0, 248.0, 211.0, 205.0, 198.0, 209.0, 202.0, 190.0, 249.0, 267.0, 257.0, 256.0, 218.0, 235.0, 254.0, 262.0, 273.0, 260.0, 133.0, 163.0, 219.0, 234.0, 235.0, 266.0, 207.0, 215.0, 239.0, 226.0, 195.0, 200.0, 269.0, 261.0, 230.0, 243.0, 263.0, 256.0, 229.0, 229.0, 202.0, 202.0, 247.0, 240.0, 225.0, 224.0, 248.0, 268.0, 250.0, 266.0, 252.0, 258.0, 249.0, 255.0, 272.0, 244.0, 254.0, 262.0, 232.0, 241.0, 260.0, 256.0, 250.0, 269.0, 233.0, 237.0, 189.0, 198.0, 265.0, 257.0, 233.0, 220.0, 209.0, 212.0, 218.0, 220.0, 224.0, 220.0, 205.0, 205.0, 229.0, 252.0, 210.0, 201.0, 236.0, 220.0, 223.0, 238.0, 124.0, 106.0, 174.0, 196.0, 191.0, 207.0, 241.0, 232.0, 260.0, 253.0, 268.0, 239.0, 227.0, 234.0, 262.0, 260.0, 293.0, 280.0, 250.0, 245.0, 249.0, 267.0, 195.0, 195.0, 255.0, 246.0, 261.0, 258.0, 254.0, 271.0, 262.0, 262.0, 247.0, 232.0, 201.0, 246.0, 230.0, 237.0, 223.0, 241.0, 221.0, 229.0, 197.0, 195.0, 254.0, 268.0, 211.0, 205.0, 239.0, 239.0, 259.0, 251.0]}, "sampler_perf": {"mean_env_wait_ms": 2.060765834369586, "mean_processing_ms": 0.47925634757705055, "mean_inference_ms": 2.608906134199901}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2256000, "num_steps_sampled": 1203200, "sample_time_ms": 21186.356, "load_time_ms": 36.604, "grad_time_ms": 9318.639, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 2.0194839474584456e-29, "cur_lr": 0.0010000000474974513, "total_loss": 0.00026301448815502226, "policy_loss": -0.005903394427150488, "vf_loss": 67.8399658203125, "vf_explained_var": 0.7958834171295166, "kl": 0.0017271721735596657, "entropy": 1.2351765632629395, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1203200, "episodes_total": 3008, "training_iteration": 94, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-31-44", "timestamp": 1660249904, "time_this_iter_s": 30.80340886116028, "time_total_s": 8324.22500705719, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8324.22500705719, "timesteps_since_restore": 1203200, "iterations_since_restore": 94, "perf": {"cpu_util_percent": 33.47727272727272, "ram_util_percent": 58.21363636363637}}
-{"episode_reward_max": 573.0, "episode_reward_min": 63.0, "episode_reward_mean": 461.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 230.965}, "custom_metrics": {"sparse_reward_mean": 160.2, "sparse_reward_min": 20, "sparse_reward_max": 200, "shaped_reward_mean": 141.53, "shaped_reward_min": 23, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.54, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.69, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.8, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 14.97, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.8, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.82, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.44, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 14.16, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.46, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.36, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.4, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.56, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.12, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.31, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.02, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.17, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.44, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 14.16, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.44, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 14.16, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [441.0, 453.0, 479.0, 405.0, 504.0, 456.0, 525.0, 519.0, 390.0, 379.0, 465.0, 522.0, 530.0, 359.0, 473.0, 453.0, 524.0, 441.0, 407.0, 525.0, 530.0, 453.0, 464.0, 525.0, 422.0, 492.0, 465.0, 398.0, 519.0, 513.0, 464.0, 513.0, 522.0, 453.0, 421.0, 438.0, 444.0, 410.0, 481.0, 411.0, 456.0, 461.0, 230.0, 370.0, 398.0, 473.0, 513.0, 507.0, 461.0, 522.0, 573.0, 495.0, 516.0, 390.0, 501.0, 519.0, 525.0, 524.0, 479.0, 447.0, 467.0, 464.0, 450.0, 392.0, 522.0, 416.0, 478.0, 510.0, 482.0, 525.0, 507.0, 516.0, 478.0, 519.0, 516.0, 410.0, 513.0, 295.0, 401.0, 462.0, 501.0, 519.0, 63.0, 456.0, 381.0, 510.0, 464.0, 461.0, 473.0, 407.0, 570.0, 444.0, 444.0, 384.0, 467.0, 428.0, 456.0, 462.0, 513.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [213.0, 228.0, 227.0, 226.0, 246.0, 233.0, 199.0, 206.0, 253.0, 251.0, 230.0, 226.0, 265.0, 260.0, 258.0, 261.0, 193.0, 197.0, 197.0, 182.0, 228.0, 237.0, 249.0, 273.0, 264.0, 266.0, 183.0, 176.0, 242.0, 231.0, 205.0, 248.0, 262.0, 262.0, 234.0, 207.0, 192.0, 215.0, 264.0, 261.0, 270.0, 260.0, 232.0, 221.0, 236.0, 228.0, 266.0, 259.0, 195.0, 227.0, 257.0, 235.0, 230.0, 235.0, 212.0, 186.0, 264.0, 255.0, 250.0, 263.0, 229.0, 235.0, 253.0, 260.0, 265.0, 257.0, 233.0, 220.0, 209.0, 212.0, 218.0, 220.0, 224.0, 220.0, 205.0, 205.0, 229.0, 252.0, 210.0, 201.0, 236.0, 220.0, 223.0, 238.0, 124.0, 106.0, 174.0, 196.0, 191.0, 207.0, 241.0, 232.0, 260.0, 253.0, 268.0, 239.0, 227.0, 234.0, 262.0, 260.0, 293.0, 280.0, 250.0, 245.0, 249.0, 267.0, 195.0, 195.0, 255.0, 246.0, 261.0, 258.0, 254.0, 271.0, 262.0, 262.0, 247.0, 232.0, 201.0, 246.0, 230.0, 237.0, 223.0, 241.0, 221.0, 229.0, 197.0, 195.0, 254.0, 268.0, 211.0, 205.0, 239.0, 239.0, 259.0, 251.0, 250.0, 232.0, 259.0, 266.0, 244.0, 263.0, 261.0, 255.0, 253.0, 225.0, 259.0, 260.0, 251.0, 265.0, 210.0, 200.0, 248.0, 265.0, 140.0, 155.0, 194.0, 207.0, 222.0, 240.0, 252.0, 249.0, 259.0, 260.0, 29.0, 34.0, 206.0, 250.0, 184.0, 197.0, 256.0, 254.0, 239.0, 225.0, 231.0, 230.0, 225.0, 248.0, 204.0, 203.0, 288.0, 282.0, 215.0, 229.0, 226.0, 218.0, 174.0, 210.0, 231.0, 236.0, 222.0, 206.0, 235.0, 221.0, 218.0, 244.0, 251.0, 262.0, 271.0, 248.0]}, "sampler_perf": {"mean_env_wait_ms": 2.043636062637732, "mean_processing_ms": 0.47584888992539853, "mean_inference_ms": 2.592145322205639}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2280000, "num_steps_sampled": 1216000, "sample_time_ms": 21306.082, "load_time_ms": 36.597, "grad_time_ms": 9453.053, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.0097419737292228e-29, "cur_lr": 0.0010000000474974513, "total_loss": 0.001733560231514275, "policy_loss": -0.004548916593194008, "vf_loss": 68.90572357177734, "vf_explained_var": 0.7648184895515442, "kl": 0.0019422214245423675, "entropy": 1.216185212135315, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1216000, "episodes_total": 3040, "training_iteration": 95, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-32-16", "timestamp": 1660249936, "time_this_iter_s": 31.733500242233276, "time_total_s": 8355.958507299423, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8355.958507299423, "timesteps_since_restore": 1216000, "iterations_since_restore": 95, "perf": {"cpu_util_percent": 30.170454545454547, "ram_util_percent": 58.22272727272727}}
-{"episode_reward_max": 576.0, "episode_reward_min": 63.0, "episode_reward_mean": 464.77, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 288.0}, "policy_reward_mean": {"ppo": 232.385}, "custom_metrics": {"sparse_reward_mean": 161.0, "sparse_reward_min": 20, "sparse_reward_max": 200, "shaped_reward_mean": 142.77, "shaped_reward_min": 23, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.82, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.62, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.01, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 14.82, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.84, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.65, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 12.83, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 13.99, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.39, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.41, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.11, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.32, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.34, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.54, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.18, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.32, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.09, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.83, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 13.99, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.83, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 13.99, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [447.0, 513.0, 487.0, 413.0, 348.0, 492.0, 421.0, 470.0, 476.0, 447.0, 504.0, 412.0, 470.0, 519.0, 516.0, 533.0, 404.0, 441.0, 447.0, 576.0, 459.0, 510.0, 408.0, 510.0, 329.0, 450.0, 510.0, 516.0, 525.0, 510.0, 450.0, 484.0, 522.0, 416.0, 478.0, 510.0, 482.0, 525.0, 507.0, 516.0, 478.0, 519.0, 516.0, 410.0, 513.0, 295.0, 401.0, 462.0, 501.0, 519.0, 63.0, 456.0, 381.0, 510.0, 464.0, 461.0, 473.0, 407.0, 570.0, 444.0, 444.0, 384.0, 467.0, 428.0, 456.0, 462.0, 513.0, 519.0, 441.0, 453.0, 479.0, 405.0, 504.0, 456.0, 525.0, 519.0, 390.0, 379.0, 465.0, 522.0, 530.0, 359.0, 473.0, 453.0, 524.0, 441.0, 407.0, 525.0, 530.0, 453.0, 464.0, 525.0, 422.0, 492.0, 465.0, 398.0, 519.0, 513.0, 464.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [217.0, 230.0, 252.0, 261.0, 246.0, 241.0, 204.0, 209.0, 166.0, 182.0, 240.0, 252.0, 210.0, 211.0, 230.0, 240.0, 228.0, 248.0, 232.0, 215.0, 258.0, 246.0, 224.0, 188.0, 228.0, 242.0, 265.0, 254.0, 249.0, 267.0, 267.0, 266.0, 202.0, 202.0, 235.0, 206.0, 212.0, 235.0, 288.0, 288.0, 225.0, 234.0, 248.0, 262.0, 219.0, 189.0, 257.0, 253.0, 177.0, 152.0, 229.0, 221.0, 259.0, 251.0, 254.0, 262.0, 261.0, 264.0, 267.0, 243.0, 221.0, 229.0, 244.0, 240.0, 254.0, 268.0, 211.0, 205.0, 239.0, 239.0, 259.0, 251.0, 250.0, 232.0, 259.0, 266.0, 244.0, 263.0, 261.0, 255.0, 253.0, 225.0, 259.0, 260.0, 251.0, 265.0, 210.0, 200.0, 248.0, 265.0, 140.0, 155.0, 194.0, 207.0, 222.0, 240.0, 252.0, 249.0, 259.0, 260.0, 29.0, 34.0, 206.0, 250.0, 184.0, 197.0, 256.0, 254.0, 239.0, 225.0, 231.0, 230.0, 225.0, 248.0, 204.0, 203.0, 288.0, 282.0, 215.0, 229.0, 226.0, 218.0, 174.0, 210.0, 231.0, 236.0, 222.0, 206.0, 235.0, 221.0, 218.0, 244.0, 251.0, 262.0, 271.0, 248.0, 213.0, 228.0, 227.0, 226.0, 246.0, 233.0, 199.0, 206.0, 253.0, 251.0, 230.0, 226.0, 265.0, 260.0, 258.0, 261.0, 193.0, 197.0, 197.0, 182.0, 228.0, 237.0, 249.0, 273.0, 264.0, 266.0, 183.0, 176.0, 242.0, 231.0, 205.0, 248.0, 262.0, 262.0, 234.0, 207.0, 192.0, 215.0, 264.0, 261.0, 270.0, 260.0, 232.0, 221.0, 236.0, 228.0, 266.0, 259.0, 195.0, 227.0, 257.0, 235.0, 230.0, 235.0, 212.0, 186.0, 264.0, 255.0, 250.0, 263.0, 229.0, 235.0, 253.0, 260.0]}, "sampler_perf": {"mean_env_wait_ms": 2.0268528055210124, "mean_processing_ms": 0.47250865851188434, "mean_inference_ms": 2.5756730471163247}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2304000, "num_steps_sampled": 1228800, "sample_time_ms": 21320.865, "load_time_ms": 36.696, "grad_time_ms": 9593.729, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 5.048709868646114e-30, "cur_lr": 0.0010000000474974513, "total_loss": -0.0013039499754086137, "policy_loss": -0.007722912821918726, "vf_loss": 70.26915740966797, "vf_explained_var": 0.7757861018180847, "kl": 0.001609964296221733, "entropy": 1.2159069776535034, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1228800, "episodes_total": 3072, "training_iteration": 96, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-32-47", "timestamp": 1660249967, "time_this_iter_s": 30.381797075271606, "time_total_s": 8386.340304374695, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8386.340304374695, "timesteps_since_restore": 1228800, "iterations_since_restore": 96, "perf": {"cpu_util_percent": 31.758139534883718, "ram_util_percent": 58.16046511627907}}
-{"episode_reward_max": 579.0, "episode_reward_min": 128.0, "episode_reward_mean": 470.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 294.0}, "policy_reward_mean": {"ppo": 235.265}, "custom_metrics": {"sparse_reward_mean": 163.2, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 144.13, "shaped_reward_min": 48, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.93, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.66, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.25, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 14.85, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.47, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.72, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.73, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.11, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 14.05, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.73, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.43, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.12, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.32, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.55, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.16, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.4, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.08, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.11, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 14.05, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.11, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 14.05, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 522.0, 522.0, 462.0, 446.0, 519.0, 573.0, 352.0, 473.0, 519.0, 507.0, 524.0, 519.0, 467.0, 513.0, 522.0, 504.0, 519.0, 456.0, 453.0, 168.0, 516.0, 476.0, 470.0, 507.0, 570.0, 516.0, 444.0, 128.0, 510.0, 579.0, 317.0, 456.0, 462.0, 513.0, 519.0, 441.0, 453.0, 479.0, 405.0, 504.0, 456.0, 525.0, 519.0, 390.0, 379.0, 465.0, 522.0, 530.0, 359.0, 473.0, 453.0, 524.0, 441.0, 407.0, 525.0, 530.0, 453.0, 464.0, 525.0, 422.0, 492.0, 465.0, 398.0, 519.0, 513.0, 464.0, 513.0, 447.0, 513.0, 487.0, 413.0, 348.0, 492.0, 421.0, 470.0, 476.0, 447.0, 504.0, 412.0, 470.0, 519.0, 516.0, 533.0, 404.0, 441.0, 447.0, 576.0, 459.0, 510.0, 408.0, 510.0, 329.0, 450.0, 510.0, 516.0, 525.0, 510.0, 450.0, 484.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 268.0, 252.0, 270.0, 258.0, 264.0, 238.0, 224.0, 227.0, 219.0, 246.0, 273.0, 285.0, 288.0, 168.0, 184.0, 240.0, 233.0, 266.0, 253.0, 258.0, 249.0, 249.0, 275.0, 259.0, 260.0, 240.0, 227.0, 246.0, 267.0, 267.0, 255.0, 256.0, 248.0, 269.0, 250.0, 225.0, 231.0, 225.0, 228.0, 85.0, 83.0, 265.0, 251.0, 232.0, 244.0, 222.0, 248.0, 253.0, 254.0, 286.0, 284.0, 256.0, 260.0, 223.0, 221.0, 65.0, 63.0, 256.0, 254.0, 294.0, 285.0, 162.0, 155.0, 235.0, 221.0, 218.0, 244.0, 251.0, 262.0, 271.0, 248.0, 213.0, 228.0, 227.0, 226.0, 246.0, 233.0, 199.0, 206.0, 253.0, 251.0, 230.0, 226.0, 265.0, 260.0, 258.0, 261.0, 193.0, 197.0, 197.0, 182.0, 228.0, 237.0, 249.0, 273.0, 264.0, 266.0, 183.0, 176.0, 242.0, 231.0, 205.0, 248.0, 262.0, 262.0, 234.0, 207.0, 192.0, 215.0, 264.0, 261.0, 270.0, 260.0, 232.0, 221.0, 236.0, 228.0, 266.0, 259.0, 195.0, 227.0, 257.0, 235.0, 230.0, 235.0, 212.0, 186.0, 264.0, 255.0, 250.0, 263.0, 229.0, 235.0, 253.0, 260.0, 217.0, 230.0, 252.0, 261.0, 246.0, 241.0, 204.0, 209.0, 166.0, 182.0, 240.0, 252.0, 210.0, 211.0, 230.0, 240.0, 228.0, 248.0, 232.0, 215.0, 258.0, 246.0, 224.0, 188.0, 228.0, 242.0, 265.0, 254.0, 249.0, 267.0, 267.0, 266.0, 202.0, 202.0, 235.0, 206.0, 212.0, 235.0, 288.0, 288.0, 225.0, 234.0, 248.0, 262.0, 219.0, 189.0, 257.0, 253.0, 177.0, 152.0, 229.0, 221.0, 259.0, 251.0, 254.0, 262.0, 261.0, 264.0, 267.0, 243.0, 221.0, 229.0, 244.0, 240.0]}, "sampler_perf": {"mean_env_wait_ms": 2.0104424851575393, "mean_processing_ms": 0.4692484587452608, "mean_inference_ms": 2.5596152283112645}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2328000, "num_steps_sampled": 1241600, "sample_time_ms": 21335.822, "load_time_ms": 37.307, "grad_time_ms": 9680.502, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.524354934323057e-30, "cur_lr": 0.0010000000474974513, "total_loss": 0.001703931367956102, "policy_loss": -0.005316242575645447, "vf_loss": 76.20516204833984, "vf_explained_var": 0.7805307507514954, "kl": 0.002101513324305415, "entropy": 1.2007073163986206, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1241600, "episodes_total": 3104, "training_iteration": 97, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-33-18", "timestamp": 1660249998, "time_this_iter_s": 31.63303232192993, "time_total_s": 8417.973336696625, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8417.973336696625, "timesteps_since_restore": 1241600, "iterations_since_restore": 97, "perf": {"cpu_util_percent": 34.47777777777778, "ram_util_percent": 58.24}}
-{"episode_reward_max": 579.0, "episode_reward_min": 128.0, "episode_reward_mean": 478.77, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 294.0}, "policy_reward_mean": {"ppo": 239.385}, "custom_metrics": {"sparse_reward_mean": 166.0, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 146.77, "shaped_reward_min": 48, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.02, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.94, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.22, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.14, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.7, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.74, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.12, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 14.32, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.2, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.63, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.32, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.99, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.89, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.29, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.59, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.25, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.44, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.13, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.12, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 14.32, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.12, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 14.32, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [473.0, 522.0, 510.0, 525.0, 513.0, 525.0, 459.0, 410.0, 504.0, 522.0, 522.0, 507.0, 237.0, 527.0, 525.0, 472.0, 501.0, 459.0, 450.0, 530.0, 519.0, 525.0, 530.0, 579.0, 522.0, 519.0, 479.0, 573.0, 530.0, 513.0, 344.0, 447.0, 519.0, 513.0, 464.0, 513.0, 447.0, 513.0, 487.0, 413.0, 348.0, 492.0, 421.0, 470.0, 476.0, 447.0, 504.0, 412.0, 470.0, 519.0, 516.0, 533.0, 404.0, 441.0, 447.0, 576.0, 459.0, 510.0, 408.0, 510.0, 329.0, 450.0, 510.0, 516.0, 525.0, 510.0, 450.0, 484.0, 525.0, 522.0, 522.0, 462.0, 446.0, 519.0, 573.0, 352.0, 473.0, 519.0, 507.0, 524.0, 519.0, 467.0, 513.0, 522.0, 504.0, 519.0, 456.0, 453.0, 168.0, 516.0, 476.0, 470.0, 507.0, 570.0, 516.0, 444.0, 128.0, 510.0, 579.0, 317.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [232.0, 241.0, 262.0, 260.0, 252.0, 258.0, 255.0, 270.0, 260.0, 253.0, 259.0, 266.0, 227.0, 232.0, 195.0, 215.0, 254.0, 250.0, 258.0, 264.0, 254.0, 268.0, 248.0, 259.0, 117.0, 120.0, 261.0, 266.0, 259.0, 266.0, 227.0, 245.0, 248.0, 253.0, 232.0, 227.0, 233.0, 217.0, 267.0, 263.0, 265.0, 254.0, 264.0, 261.0, 265.0, 265.0, 291.0, 288.0, 262.0, 260.0, 259.0, 260.0, 244.0, 235.0, 290.0, 283.0, 255.0, 275.0, 253.0, 260.0, 175.0, 169.0, 217.0, 230.0, 264.0, 255.0, 250.0, 263.0, 229.0, 235.0, 253.0, 260.0, 217.0, 230.0, 252.0, 261.0, 246.0, 241.0, 204.0, 209.0, 166.0, 182.0, 240.0, 252.0, 210.0, 211.0, 230.0, 240.0, 228.0, 248.0, 232.0, 215.0, 258.0, 246.0, 224.0, 188.0, 228.0, 242.0, 265.0, 254.0, 249.0, 267.0, 267.0, 266.0, 202.0, 202.0, 235.0, 206.0, 212.0, 235.0, 288.0, 288.0, 225.0, 234.0, 248.0, 262.0, 219.0, 189.0, 257.0, 253.0, 177.0, 152.0, 229.0, 221.0, 259.0, 251.0, 254.0, 262.0, 261.0, 264.0, 267.0, 243.0, 221.0, 229.0, 244.0, 240.0, 257.0, 268.0, 252.0, 270.0, 258.0, 264.0, 238.0, 224.0, 227.0, 219.0, 246.0, 273.0, 285.0, 288.0, 168.0, 184.0, 240.0, 233.0, 266.0, 253.0, 258.0, 249.0, 249.0, 275.0, 259.0, 260.0, 240.0, 227.0, 246.0, 267.0, 267.0, 255.0, 256.0, 248.0, 269.0, 250.0, 225.0, 231.0, 225.0, 228.0, 85.0, 83.0, 265.0, 251.0, 232.0, 244.0, 222.0, 248.0, 253.0, 254.0, 286.0, 284.0, 256.0, 260.0, 223.0, 221.0, 65.0, 63.0, 256.0, 254.0, 294.0, 285.0, 162.0, 155.0]}, "sampler_perf": {"mean_env_wait_ms": 1.9943851070346994, "mean_processing_ms": 0.4660638204377501, "mean_inference_ms": 2.5438091293770433}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2352000, "num_steps_sampled": 1254400, "sample_time_ms": 21313.715, "load_time_ms": 36.778, "grad_time_ms": 9499.638, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.2621774671615285e-30, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019398670410737395, "policy_loss": -0.005300960969179869, "vf_loss": 78.3524398803711, "vf_explained_var": 0.7676915526390076, "kl": 0.0015995064750313759, "entropy": 1.188806414604187, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1254400, "episodes_total": 3136, "training_iteration": 98, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-33-47", "timestamp": 1660250027, "time_this_iter_s": 29.219672203063965, "time_total_s": 8447.193008899689, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8447.193008899689, "timesteps_since_restore": 1254400, "iterations_since_restore": 98, "perf": {"cpu_util_percent": 33.91190476190476, "ram_util_percent": 58.29285714285714}}
-{"episode_reward_max": 579.0, "episode_reward_min": 128.0, "episode_reward_mean": 485.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 294.0}, "policy_reward_mean": {"ppo": 242.725}, "custom_metrics": {"sparse_reward_mean": 168.4, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 148.65, "shaped_reward_min": 48, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.54, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.76, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.85, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.04, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.33, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.69, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.74, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.5, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.11, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.61, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.25, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.41, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.54, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.28, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.37, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.5, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.11, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.5, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.11, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [458.0, 473.0, 516.0, 522.0, 524.0, 570.0, 453.0, 476.0, 458.0, 522.0, 576.0, 498.0, 519.0, 406.0, 516.0, 533.0, 450.0, 473.0, 525.0, 525.0, 464.0, 447.0, 441.0, 479.0, 467.0, 504.0, 507.0, 513.0, 519.0, 406.0, 467.0, 498.0, 525.0, 510.0, 450.0, 484.0, 525.0, 522.0, 522.0, 462.0, 446.0, 519.0, 573.0, 352.0, 473.0, 519.0, 507.0, 524.0, 519.0, 467.0, 513.0, 522.0, 504.0, 519.0, 456.0, 453.0, 168.0, 516.0, 476.0, 470.0, 507.0, 570.0, 516.0, 444.0, 128.0, 510.0, 579.0, 317.0, 473.0, 522.0, 510.0, 525.0, 513.0, 525.0, 459.0, 410.0, 504.0, 522.0, 522.0, 507.0, 237.0, 527.0, 525.0, 472.0, 501.0, 459.0, 450.0, 530.0, 519.0, 525.0, 530.0, 579.0, 522.0, 519.0, 479.0, 573.0, 530.0, 513.0, 344.0, 447.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [217.0, 241.0, 237.0, 236.0, 268.0, 248.0, 250.0, 272.0, 256.0, 268.0, 285.0, 285.0, 233.0, 220.0, 231.0, 245.0, 227.0, 231.0, 263.0, 259.0, 288.0, 288.0, 245.0, 253.0, 257.0, 262.0, 197.0, 209.0, 256.0, 260.0, 251.0, 282.0, 246.0, 204.0, 232.0, 241.0, 261.0, 264.0, 263.0, 262.0, 223.0, 241.0, 221.0, 226.0, 224.0, 217.0, 241.0, 238.0, 233.0, 234.0, 253.0, 251.0, 259.0, 248.0, 256.0, 257.0, 254.0, 265.0, 185.0, 221.0, 229.0, 238.0, 234.0, 264.0, 261.0, 264.0, 267.0, 243.0, 221.0, 229.0, 244.0, 240.0, 257.0, 268.0, 252.0, 270.0, 258.0, 264.0, 238.0, 224.0, 227.0, 219.0, 246.0, 273.0, 285.0, 288.0, 168.0, 184.0, 240.0, 233.0, 266.0, 253.0, 258.0, 249.0, 249.0, 275.0, 259.0, 260.0, 240.0, 227.0, 246.0, 267.0, 267.0, 255.0, 256.0, 248.0, 269.0, 250.0, 225.0, 231.0, 225.0, 228.0, 85.0, 83.0, 265.0, 251.0, 232.0, 244.0, 222.0, 248.0, 253.0, 254.0, 286.0, 284.0, 256.0, 260.0, 223.0, 221.0, 65.0, 63.0, 256.0, 254.0, 294.0, 285.0, 162.0, 155.0, 232.0, 241.0, 262.0, 260.0, 252.0, 258.0, 255.0, 270.0, 260.0, 253.0, 259.0, 266.0, 227.0, 232.0, 195.0, 215.0, 254.0, 250.0, 258.0, 264.0, 254.0, 268.0, 248.0, 259.0, 117.0, 120.0, 261.0, 266.0, 259.0, 266.0, 227.0, 245.0, 248.0, 253.0, 232.0, 227.0, 233.0, 217.0, 267.0, 263.0, 265.0, 254.0, 264.0, 261.0, 265.0, 265.0, 291.0, 288.0, 262.0, 260.0, 259.0, 260.0, 244.0, 235.0, 290.0, 283.0, 255.0, 275.0, 253.0, 260.0, 175.0, 169.0, 217.0, 230.0]}, "sampler_perf": {"mean_env_wait_ms": 1.978664058751599, "mean_processing_ms": 0.4629441310230788, "mean_inference_ms": 2.5282732021268366}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2376000, "num_steps_sampled": 1267200, "sample_time_ms": 21031.143, "load_time_ms": 36.859, "grad_time_ms": 9358.331, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 6.3108873358076425e-31, "cur_lr": 0.0010000000474974513, "total_loss": 0.00523378886282444, "policy_loss": -0.0017726494697853923, "vf_loss": 76.06880950927734, "vf_explained_var": 0.753373920917511, "kl": 0.001648509525693953, "entropy": 1.2008789777755737, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1267200, "episodes_total": 3168, "training_iteration": 99, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-34-16", "timestamp": 1660250056, "time_this_iter_s": 28.433568000793457, "time_total_s": 8475.626576900482, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8475.626576900482, "timesteps_since_restore": 1267200, "iterations_since_restore": 99, "perf": {"cpu_util_percent": 33.795, "ram_util_percent": 58.2625}}
-{"episode_reward_max": 582.0, "episode_reward_min": 128.0, "episode_reward_mean": 480.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 240.47}, "custom_metrics": {"sparse_reward_mean": 166.8, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 147.34, "shaped_reward_min": 48, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.37, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.81, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.59, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.1, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.32, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.69, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.32, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.21, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.45, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.96, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.92, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.27, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.36, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.23, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.39, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.12, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 13.32, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.21, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.32, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.21, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 504.0, 524.0, 513.0, 347.0, 522.0, 355.0, 336.0, 533.0, 459.0, 522.0, 384.0, 576.0, 567.0, 177.0, 582.0, 467.0, 470.0, 519.0, 533.0, 398.0, 510.0, 513.0, 441.0, 398.0, 516.0, 409.0, 507.0, 525.0, 516.0, 530.0, 404.0, 128.0, 510.0, 579.0, 317.0, 473.0, 522.0, 510.0, 525.0, 513.0, 525.0, 459.0, 410.0, 504.0, 522.0, 522.0, 507.0, 237.0, 527.0, 525.0, 472.0, 501.0, 459.0, 450.0, 530.0, 519.0, 525.0, 530.0, 579.0, 522.0, 519.0, 479.0, 573.0, 530.0, 513.0, 344.0, 447.0, 458.0, 473.0, 516.0, 522.0, 524.0, 570.0, 453.0, 476.0, 458.0, 522.0, 576.0, 498.0, 519.0, 406.0, 516.0, 533.0, 450.0, 473.0, 525.0, 525.0, 464.0, 447.0, 441.0, 479.0, 467.0, 504.0, 507.0, 513.0, 519.0, 406.0, 467.0, 498.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [252.0, 273.0, 265.0, 239.0, 251.0, 273.0, 254.0, 259.0, 188.0, 159.0, 260.0, 262.0, 168.0, 187.0, 151.0, 185.0, 275.0, 258.0, 239.0, 220.0, 264.0, 258.0, 187.0, 197.0, 288.0, 288.0, 270.0, 297.0, 89.0, 88.0, 284.0, 298.0, 241.0, 226.0, 241.0, 229.0, 259.0, 260.0, 254.0, 279.0, 194.0, 204.0, 259.0, 251.0, 256.0, 257.0, 227.0, 214.0, 186.0, 212.0, 240.0, 276.0, 193.0, 216.0, 242.0, 265.0, 259.0, 266.0, 262.0, 254.0, 276.0, 254.0, 205.0, 199.0, 65.0, 63.0, 256.0, 254.0, 294.0, 285.0, 162.0, 155.0, 232.0, 241.0, 262.0, 260.0, 252.0, 258.0, 255.0, 270.0, 260.0, 253.0, 259.0, 266.0, 227.0, 232.0, 195.0, 215.0, 254.0, 250.0, 258.0, 264.0, 254.0, 268.0, 248.0, 259.0, 117.0, 120.0, 261.0, 266.0, 259.0, 266.0, 227.0, 245.0, 248.0, 253.0, 232.0, 227.0, 233.0, 217.0, 267.0, 263.0, 265.0, 254.0, 264.0, 261.0, 265.0, 265.0, 291.0, 288.0, 262.0, 260.0, 259.0, 260.0, 244.0, 235.0, 290.0, 283.0, 255.0, 275.0, 253.0, 260.0, 175.0, 169.0, 217.0, 230.0, 217.0, 241.0, 237.0, 236.0, 268.0, 248.0, 250.0, 272.0, 256.0, 268.0, 285.0, 285.0, 233.0, 220.0, 231.0, 245.0, 227.0, 231.0, 263.0, 259.0, 288.0, 288.0, 245.0, 253.0, 257.0, 262.0, 197.0, 209.0, 256.0, 260.0, 251.0, 282.0, 246.0, 204.0, 232.0, 241.0, 261.0, 264.0, 263.0, 262.0, 223.0, 241.0, 221.0, 226.0, 224.0, 217.0, 241.0, 238.0, 233.0, 234.0, 253.0, 251.0, 259.0, 248.0, 256.0, 257.0, 254.0, 265.0, 185.0, 221.0, 229.0, 238.0, 234.0, 264.0]}, "sampler_perf": {"mean_env_wait_ms": 1.9632342892537746, "mean_processing_ms": 0.4598755283783044, "mean_inference_ms": 2.5128753018749035}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2400000, "num_steps_sampled": 1280000, "sample_time_ms": 20819.102, "load_time_ms": 36.792, "grad_time_ms": 9258.115, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.1554436679038213e-31, "cur_lr": 0.0010000000474974513, "total_loss": -0.0015751657774671912, "policy_loss": -0.008594638668000698, "vf_loss": 76.2179183959961, "vf_explained_var": 0.7723303437232971, "kl": 0.002320505678653717, "entropy": 1.2046717405319214, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1280000, "episodes_total": 3200, "training_iteration": 100, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-34-45", "timestamp": 1660250085, "time_this_iter_s": 29.270292043685913, "time_total_s": 8504.896868944168, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8504.896868944168, "timesteps_since_restore": 1280000, "iterations_since_restore": 100, "perf": {"cpu_util_percent": 32.380487804878044, "ram_util_percent": 58.27560975609755}}
-{"episode_reward_max": 582.0, "episode_reward_min": 177.0, "episode_reward_mean": 476.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 86.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 238.185}, "custom_metrics": {"sparse_reward_mean": 165.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 145.97, "shaped_reward_min": 57, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.43, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.61, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.55, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.93, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.79, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 1.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.83, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.36, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.91, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.54, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.31, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.26, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.16, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.37, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.13, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 13.36, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.91, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.36, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.91, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [447.0, 450.0, 522.0, 513.0, 519.0, 351.0, 573.0, 425.0, 473.0, 530.0, 479.0, 404.0, 470.0, 522.0, 525.0, 516.0, 197.0, 458.0, 519.0, 464.0, 180.0, 299.0, 519.0, 527.0, 506.0, 516.0, 522.0, 473.0, 570.0, 504.0, 530.0, 513.0, 530.0, 513.0, 344.0, 447.0, 458.0, 473.0, 516.0, 522.0, 524.0, 570.0, 453.0, 476.0, 458.0, 522.0, 576.0, 498.0, 519.0, 406.0, 516.0, 533.0, 450.0, 473.0, 525.0, 525.0, 464.0, 447.0, 441.0, 479.0, 467.0, 504.0, 507.0, 513.0, 519.0, 406.0, 467.0, 498.0, 525.0, 504.0, 524.0, 513.0, 347.0, 522.0, 355.0, 336.0, 533.0, 459.0, 522.0, 384.0, 576.0, 567.0, 177.0, 582.0, 467.0, 470.0, 519.0, 533.0, 398.0, 510.0, 513.0, 441.0, 398.0, 516.0, 409.0, 507.0, 525.0, 516.0, 530.0, 404.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [225.0, 222.0, 232.0, 218.0, 254.0, 268.0, 254.0, 259.0, 258.0, 261.0, 180.0, 171.0, 288.0, 285.0, 221.0, 204.0, 256.0, 217.0, 269.0, 261.0, 236.0, 243.0, 197.0, 207.0, 242.0, 228.0, 260.0, 262.0, 262.0, 263.0, 270.0, 246.0, 102.0, 95.0, 217.0, 241.0, 268.0, 251.0, 217.0, 247.0, 86.0, 94.0, 143.0, 156.0, 262.0, 257.0, 259.0, 268.0, 261.0, 245.0, 241.0, 275.0, 260.0, 262.0, 220.0, 253.0, 285.0, 285.0, 240.0, 264.0, 272.0, 258.0, 244.0, 269.0, 255.0, 275.0, 253.0, 260.0, 175.0, 169.0, 217.0, 230.0, 217.0, 241.0, 237.0, 236.0, 268.0, 248.0, 250.0, 272.0, 256.0, 268.0, 285.0, 285.0, 233.0, 220.0, 231.0, 245.0, 227.0, 231.0, 263.0, 259.0, 288.0, 288.0, 245.0, 253.0, 257.0, 262.0, 197.0, 209.0, 256.0, 260.0, 251.0, 282.0, 246.0, 204.0, 232.0, 241.0, 261.0, 264.0, 263.0, 262.0, 223.0, 241.0, 221.0, 226.0, 224.0, 217.0, 241.0, 238.0, 233.0, 234.0, 253.0, 251.0, 259.0, 248.0, 256.0, 257.0, 254.0, 265.0, 185.0, 221.0, 229.0, 238.0, 234.0, 264.0, 252.0, 273.0, 265.0, 239.0, 251.0, 273.0, 254.0, 259.0, 188.0, 159.0, 260.0, 262.0, 168.0, 187.0, 151.0, 185.0, 275.0, 258.0, 239.0, 220.0, 264.0, 258.0, 187.0, 197.0, 288.0, 288.0, 270.0, 297.0, 89.0, 88.0, 284.0, 298.0, 241.0, 226.0, 241.0, 229.0, 259.0, 260.0, 254.0, 279.0, 194.0, 204.0, 259.0, 251.0, 256.0, 257.0, 227.0, 214.0, 186.0, 212.0, 240.0, 276.0, 193.0, 216.0, 242.0, 265.0, 259.0, 266.0, 262.0, 254.0, 276.0, 254.0, 205.0, 199.0]}, "sampler_perf": {"mean_env_wait_ms": 1.9480966486925166, "mean_processing_ms": 0.45685958478274097, "mean_inference_ms": 2.497748516500124}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2424000, "num_steps_sampled": 1292800, "sample_time_ms": 20962.686, "load_time_ms": 36.873, "grad_time_ms": 9154.171, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.5777218339519106e-31, "cur_lr": 0.0010000000474974513, "total_loss": 0.00011571295181056485, "policy_loss": -0.006676681339740753, "vf_loss": 73.92855834960938, "vf_explained_var": 0.7952176928520203, "kl": 0.0016933353617787361, "entropy": 1.2009243965148926, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1292800, "episodes_total": 3232, "training_iteration": 101, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-35-15", "timestamp": 1660250115, "time_this_iter_s": 29.051042795181274, "time_total_s": 8533.94791173935, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8533.94791173935, "timesteps_since_restore": 1292800, "iterations_since_restore": 101, "perf": {"cpu_util_percent": 29.43571428571429, "ram_util_percent": 58.27142857142857}}
-{"episode_reward_max": 582.0, "episode_reward_min": 177.0, "episode_reward_mean": 479.09, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 86.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 239.545}, "custom_metrics": {"sparse_reward_mean": 166.0, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 147.09, "shaped_reward_min": 57, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.17, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 15.98, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.22, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.32, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.87, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.88, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.56, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.04, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.4, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.51, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.26, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.4, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.45, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.29, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.25, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.13, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 13.04, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.4, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.04, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.4, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 487.0, 513.0, 570.0, 513.0, 570.0, 522.0, 476.0, 446.0, 453.0, 459.0, 522.0, 504.0, 468.0, 516.0, 479.0, 464.0, 522.0, 516.0, 467.0, 444.0, 401.0, 522.0, 510.0, 530.0, 513.0, 482.0, 507.0, 516.0, 516.0, 513.0, 470.0, 519.0, 406.0, 467.0, 498.0, 525.0, 504.0, 524.0, 513.0, 347.0, 522.0, 355.0, 336.0, 533.0, 459.0, 522.0, 384.0, 576.0, 567.0, 177.0, 582.0, 467.0, 470.0, 519.0, 533.0, 398.0, 510.0, 513.0, 441.0, 398.0, 516.0, 409.0, 507.0, 525.0, 516.0, 530.0, 404.0, 447.0, 450.0, 522.0, 513.0, 519.0, 351.0, 573.0, 425.0, 473.0, 530.0, 479.0, 404.0, 470.0, 522.0, 525.0, 516.0, 197.0, 458.0, 519.0, 464.0, 180.0, 299.0, 519.0, 527.0, 506.0, 516.0, 522.0, 473.0, 570.0, 504.0, 530.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 269.0, 234.0, 253.0, 261.0, 252.0, 282.0, 288.0, 251.0, 262.0, 287.0, 283.0, 265.0, 257.0, 243.0, 233.0, 212.0, 234.0, 244.0, 209.0, 226.0, 233.0, 254.0, 268.0, 247.0, 257.0, 236.0, 232.0, 265.0, 251.0, 240.0, 239.0, 226.0, 238.0, 259.0, 263.0, 237.0, 279.0, 233.0, 234.0, 213.0, 231.0, 196.0, 205.0, 246.0, 276.0, 251.0, 259.0, 272.0, 258.0, 256.0, 257.0, 226.0, 256.0, 244.0, 263.0, 267.0, 249.0, 260.0, 256.0, 251.0, 262.0, 233.0, 237.0, 254.0, 265.0, 185.0, 221.0, 229.0, 238.0, 234.0, 264.0, 252.0, 273.0, 265.0, 239.0, 251.0, 273.0, 254.0, 259.0, 188.0, 159.0, 260.0, 262.0, 168.0, 187.0, 151.0, 185.0, 275.0, 258.0, 239.0, 220.0, 264.0, 258.0, 187.0, 197.0, 288.0, 288.0, 270.0, 297.0, 89.0, 88.0, 284.0, 298.0, 241.0, 226.0, 241.0, 229.0, 259.0, 260.0, 254.0, 279.0, 194.0, 204.0, 259.0, 251.0, 256.0, 257.0, 227.0, 214.0, 186.0, 212.0, 240.0, 276.0, 193.0, 216.0, 242.0, 265.0, 259.0, 266.0, 262.0, 254.0, 276.0, 254.0, 205.0, 199.0, 225.0, 222.0, 232.0, 218.0, 254.0, 268.0, 254.0, 259.0, 258.0, 261.0, 180.0, 171.0, 288.0, 285.0, 221.0, 204.0, 256.0, 217.0, 269.0, 261.0, 236.0, 243.0, 197.0, 207.0, 242.0, 228.0, 260.0, 262.0, 262.0, 263.0, 270.0, 246.0, 102.0, 95.0, 217.0, 241.0, 268.0, 251.0, 217.0, 247.0, 86.0, 94.0, 143.0, 156.0, 262.0, 257.0, 259.0, 268.0, 261.0, 245.0, 241.0, 275.0, 260.0, 262.0, 220.0, 253.0, 285.0, 285.0, 240.0, 264.0, 272.0, 258.0, 244.0, 269.0]}, "sampler_perf": {"mean_env_wait_ms": 1.9332518889221049, "mean_processing_ms": 0.4539033753742656, "mean_inference_ms": 2.483090315311066}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2448000, "num_steps_sampled": 1305600, "sample_time_ms": 21004.371, "load_time_ms": 37.153, "grad_time_ms": 9035.152, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 7.888609169759553e-32, "cur_lr": 0.0010000000474974513, "total_loss": 0.00245770625770092, "policy_loss": -0.003976076375693083, "vf_loss": 70.27108764648438, "vf_explained_var": 0.7766384482383728, "kl": 0.001931712031364441, "entropy": 1.1866337060928345, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1305600, "episodes_total": 3264, "training_iteration": 102, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-35-45", "timestamp": 1660250145, "time_this_iter_s": 29.56961703300476, "time_total_s": 8563.517528772354, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8563.517528772354, "timesteps_since_restore": 1305600, "iterations_since_restore": 102, "perf": {"cpu_util_percent": 29.607142857142858, "ram_util_percent": 58.27380952380951}}
-{"episode_reward_max": 579.0, "episode_reward_min": 180.0, "episode_reward_mean": 488.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 86.0}, "policy_reward_max": {"ppo": 296.0}, "policy_reward_mean": {"ppo": 244.08}, "custom_metrics": {"sparse_reward_mean": 169.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 149.76, "shaped_reward_min": 60, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.36, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.19, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.44, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.55, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.71, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.85, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.56, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.33, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 14.56, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.48, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.73, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.96, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.2, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.21, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.54, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.38, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.43, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.22, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.1, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 13.33, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 14.56, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.33, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 14.56, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 525.0, 396.0, 513.0, 479.0, 455.0, 525.0, 518.0, 519.0, 350.0, 579.0, 516.0, 519.0, 504.0, 455.0, 510.0, 384.0, 570.0, 498.0, 473.0, 519.0, 576.0, 516.0, 573.0, 465.0, 510.0, 533.0, 504.0, 525.0, 450.0, 482.0, 444.0, 525.0, 516.0, 530.0, 404.0, 447.0, 450.0, 522.0, 513.0, 519.0, 351.0, 573.0, 425.0, 473.0, 530.0, 479.0, 404.0, 470.0, 522.0, 525.0, 516.0, 197.0, 458.0, 519.0, 464.0, 180.0, 299.0, 519.0, 527.0, 506.0, 516.0, 522.0, 473.0, 570.0, 504.0, 530.0, 513.0, 530.0, 487.0, 513.0, 570.0, 513.0, 570.0, 522.0, 476.0, 446.0, 453.0, 459.0, 522.0, 504.0, 468.0, 516.0, 479.0, 464.0, 522.0, 516.0, 467.0, 444.0, 401.0, 522.0, 510.0, 530.0, 513.0, 482.0, 507.0, 516.0, 516.0, 513.0, 470.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [249.0, 270.0, 262.0, 263.0, 196.0, 200.0, 259.0, 254.0, 234.0, 245.0, 226.0, 229.0, 260.0, 265.0, 267.0, 251.0, 251.0, 268.0, 176.0, 174.0, 285.0, 294.0, 259.0, 257.0, 262.0, 257.0, 260.0, 244.0, 219.0, 236.0, 251.0, 259.0, 179.0, 205.0, 277.0, 293.0, 250.0, 248.0, 244.0, 229.0, 256.0, 263.0, 280.0, 296.0, 254.0, 262.0, 290.0, 283.0, 242.0, 223.0, 264.0, 246.0, 260.0, 273.0, 245.0, 259.0, 267.0, 258.0, 222.0, 228.0, 236.0, 246.0, 240.0, 204.0, 259.0, 266.0, 262.0, 254.0, 276.0, 254.0, 205.0, 199.0, 225.0, 222.0, 232.0, 218.0, 254.0, 268.0, 254.0, 259.0, 258.0, 261.0, 180.0, 171.0, 288.0, 285.0, 221.0, 204.0, 256.0, 217.0, 269.0, 261.0, 236.0, 243.0, 197.0, 207.0, 242.0, 228.0, 260.0, 262.0, 262.0, 263.0, 270.0, 246.0, 102.0, 95.0, 217.0, 241.0, 268.0, 251.0, 217.0, 247.0, 86.0, 94.0, 143.0, 156.0, 262.0, 257.0, 259.0, 268.0, 261.0, 245.0, 241.0, 275.0, 260.0, 262.0, 220.0, 253.0, 285.0, 285.0, 240.0, 264.0, 272.0, 258.0, 244.0, 269.0, 261.0, 269.0, 234.0, 253.0, 261.0, 252.0, 282.0, 288.0, 251.0, 262.0, 287.0, 283.0, 265.0, 257.0, 243.0, 233.0, 212.0, 234.0, 244.0, 209.0, 226.0, 233.0, 254.0, 268.0, 247.0, 257.0, 236.0, 232.0, 265.0, 251.0, 240.0, 239.0, 226.0, 238.0, 259.0, 263.0, 237.0, 279.0, 233.0, 234.0, 213.0, 231.0, 196.0, 205.0, 246.0, 276.0, 251.0, 259.0, 272.0, 258.0, 256.0, 257.0, 226.0, 256.0, 244.0, 263.0, 267.0, 249.0, 260.0, 256.0, 251.0, 262.0, 233.0, 237.0]}, "sampler_perf": {"mean_env_wait_ms": 1.9186906528346697, "mean_processing_ms": 0.45100660003140314, "mean_inference_ms": 2.4688879649561444}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2472000, "num_steps_sampled": 1318400, "sample_time_ms": 21037.799, "load_time_ms": 37.038, "grad_time_ms": 8875.986, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.9443045848797766e-32, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011967640602961183, "policy_loss": -0.0056260935962200165, "vf_loss": 74.17142486572266, "vf_explained_var": 0.7644608616828918, "kl": 0.0018772757612168789, "entropy": 1.188565731048584, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1318400, "episodes_total": 3296, "training_iteration": 103, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-36-14", "timestamp": 1660250174, "time_this_iter_s": 29.66763973236084, "time_total_s": 8593.185168504715, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8593.185168504715, "timesteps_since_restore": 1318400, "iterations_since_restore": 103, "perf": {"cpu_util_percent": 31.43809523809524, "ram_util_percent": 58.23095238095237}}
-{"episode_reward_max": 579.0, "episode_reward_min": 350.0, "episode_reward_mean": 503.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 251.985}, "custom_metrics": {"sparse_reward_mean": 175.4, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 153.17, "shaped_reward_min": 104, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.39, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.67, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.84, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.53, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.76, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.51, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.58, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 14.95, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.84, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.79, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.62, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.96, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.21, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.24, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.71, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.41, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.62, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.34, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.58, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 14.95, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.58, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 14.95, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 573.0, 513.0, 519.0, 393.0, 564.0, 570.0, 570.0, 513.0, 513.0, 522.0, 492.0, 519.0, 413.0, 570.0, 482.0, 570.0, 510.0, 513.0, 462.0, 522.0, 498.0, 570.0, 498.0, 518.0, 510.0, 522.0, 504.0, 524.0, 536.0, 447.0, 452.0, 570.0, 504.0, 530.0, 513.0, 530.0, 487.0, 513.0, 570.0, 513.0, 570.0, 522.0, 476.0, 446.0, 453.0, 459.0, 522.0, 504.0, 468.0, 516.0, 479.0, 464.0, 522.0, 516.0, 467.0, 444.0, 401.0, 522.0, 510.0, 530.0, 513.0, 482.0, 507.0, 516.0, 516.0, 513.0, 470.0, 519.0, 525.0, 396.0, 513.0, 479.0, 455.0, 525.0, 518.0, 519.0, 350.0, 579.0, 516.0, 519.0, 504.0, 455.0, 510.0, 384.0, 570.0, 498.0, 473.0, 519.0, 576.0, 516.0, 573.0, 465.0, 510.0, 533.0, 504.0, 525.0, 450.0, 482.0, 444.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 280.0, 293.0, 280.0, 255.0, 258.0, 259.0, 260.0, 194.0, 199.0, 272.0, 292.0, 295.0, 275.0, 278.0, 292.0, 250.0, 263.0, 265.0, 248.0, 264.0, 258.0, 246.0, 246.0, 259.0, 260.0, 217.0, 196.0, 276.0, 294.0, 249.0, 233.0, 272.0, 298.0, 243.0, 267.0, 259.0, 254.0, 237.0, 225.0, 249.0, 273.0, 255.0, 243.0, 293.0, 277.0, 250.0, 248.0, 248.0, 270.0, 259.0, 251.0, 260.0, 262.0, 251.0, 253.0, 269.0, 255.0, 281.0, 255.0, 227.0, 220.0, 227.0, 225.0, 285.0, 285.0, 240.0, 264.0, 272.0, 258.0, 244.0, 269.0, 261.0, 269.0, 234.0, 253.0, 261.0, 252.0, 282.0, 288.0, 251.0, 262.0, 287.0, 283.0, 265.0, 257.0, 243.0, 233.0, 212.0, 234.0, 244.0, 209.0, 226.0, 233.0, 254.0, 268.0, 247.0, 257.0, 236.0, 232.0, 265.0, 251.0, 240.0, 239.0, 226.0, 238.0, 259.0, 263.0, 237.0, 279.0, 233.0, 234.0, 213.0, 231.0, 196.0, 205.0, 246.0, 276.0, 251.0, 259.0, 272.0, 258.0, 256.0, 257.0, 226.0, 256.0, 244.0, 263.0, 267.0, 249.0, 260.0, 256.0, 251.0, 262.0, 233.0, 237.0, 249.0, 270.0, 262.0, 263.0, 196.0, 200.0, 259.0, 254.0, 234.0, 245.0, 226.0, 229.0, 260.0, 265.0, 267.0, 251.0, 251.0, 268.0, 176.0, 174.0, 285.0, 294.0, 259.0, 257.0, 262.0, 257.0, 260.0, 244.0, 219.0, 236.0, 251.0, 259.0, 179.0, 205.0, 277.0, 293.0, 250.0, 248.0, 244.0, 229.0, 256.0, 263.0, 280.0, 296.0, 254.0, 262.0, 290.0, 283.0, 242.0, 223.0, 264.0, 246.0, 260.0, 273.0, 245.0, 259.0, 267.0, 258.0, 222.0, 228.0, 236.0, 246.0, 240.0, 204.0]}, "sampler_perf": {"mean_env_wait_ms": 1.9044104614216666, "mean_processing_ms": 0.44816608164827715, "mean_inference_ms": 2.4550939840364316}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2496000, "num_steps_sampled": 1331200, "sample_time_ms": 21059.641, "load_time_ms": 36.972, "grad_time_ms": 8872.978, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.9721522924398883e-32, "cur_lr": 0.0010000000474974513, "total_loss": 0.002359784208238125, "policy_loss": -0.004356598015874624, "vf_loss": 73.04959106445312, "vf_explained_var": 0.7670376896858215, "kl": 0.0017897128127515316, "entropy": 1.1771515607833862, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1331200, "episodes_total": 3328, "training_iteration": 104, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-36-45", "timestamp": 1660250205, "time_this_iter_s": 30.991883993148804, "time_total_s": 8624.177052497864, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8624.177052497864, "timesteps_since_restore": 1331200, "iterations_since_restore": 104, "perf": {"cpu_util_percent": 35.49545454545455, "ram_util_percent": 58.338636363636354}}
-{"episode_reward_max": 579.0, "episode_reward_min": 350.0, "episode_reward_mean": 506.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 253.335}, "custom_metrics": {"sparse_reward_mean": 176.4, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 153.87, "shaped_reward_min": 104, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.18, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.08, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.67, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.52, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.93, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.56, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.47, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.78, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.76, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.8, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.93, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.67, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.18, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.19, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.73, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.38, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.64, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.31, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.78, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.76, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.78, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.76, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [467.0, 525.0, 527.0, 464.0, 516.0, 459.0, 516.0, 570.0, 519.0, 522.0, 416.0, 504.0, 507.0, 513.0, 458.0, 489.0, 522.0, 519.0, 510.0, 519.0, 525.0, 576.0, 476.0, 522.0, 513.0, 530.0, 530.0, 459.0, 516.0, 498.0, 530.0, 576.0, 516.0, 516.0, 513.0, 470.0, 519.0, 525.0, 396.0, 513.0, 479.0, 455.0, 525.0, 518.0, 519.0, 350.0, 579.0, 516.0, 519.0, 504.0, 455.0, 510.0, 384.0, 570.0, 498.0, 473.0, 519.0, 576.0, 516.0, 573.0, 465.0, 510.0, 533.0, 504.0, 525.0, 450.0, 482.0, 444.0, 573.0, 573.0, 513.0, 519.0, 393.0, 564.0, 570.0, 570.0, 513.0, 513.0, 522.0, 492.0, 519.0, 413.0, 570.0, 482.0, 570.0, 510.0, 513.0, 462.0, 522.0, 498.0, 570.0, 498.0, 518.0, 510.0, 522.0, 504.0, 524.0, 536.0, 447.0, 452.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [237.0, 230.0, 259.0, 266.0, 271.0, 256.0, 211.0, 253.0, 256.0, 260.0, 232.0, 227.0, 267.0, 249.0, 280.0, 290.0, 262.0, 257.0, 259.0, 263.0, 210.0, 206.0, 247.0, 257.0, 254.0, 253.0, 252.0, 261.0, 231.0, 227.0, 240.0, 249.0, 273.0, 249.0, 259.0, 260.0, 253.0, 257.0, 270.0, 249.0, 255.0, 270.0, 284.0, 292.0, 247.0, 229.0, 265.0, 257.0, 245.0, 268.0, 260.0, 270.0, 273.0, 257.0, 214.0, 245.0, 260.0, 256.0, 251.0, 247.0, 262.0, 268.0, 291.0, 285.0, 267.0, 249.0, 260.0, 256.0, 251.0, 262.0, 233.0, 237.0, 249.0, 270.0, 262.0, 263.0, 196.0, 200.0, 259.0, 254.0, 234.0, 245.0, 226.0, 229.0, 260.0, 265.0, 267.0, 251.0, 251.0, 268.0, 176.0, 174.0, 285.0, 294.0, 259.0, 257.0, 262.0, 257.0, 260.0, 244.0, 219.0, 236.0, 251.0, 259.0, 179.0, 205.0, 277.0, 293.0, 250.0, 248.0, 244.0, 229.0, 256.0, 263.0, 280.0, 296.0, 254.0, 262.0, 290.0, 283.0, 242.0, 223.0, 264.0, 246.0, 260.0, 273.0, 245.0, 259.0, 267.0, 258.0, 222.0, 228.0, 236.0, 246.0, 240.0, 204.0, 293.0, 280.0, 293.0, 280.0, 255.0, 258.0, 259.0, 260.0, 194.0, 199.0, 272.0, 292.0, 295.0, 275.0, 278.0, 292.0, 250.0, 263.0, 265.0, 248.0, 264.0, 258.0, 246.0, 246.0, 259.0, 260.0, 217.0, 196.0, 276.0, 294.0, 249.0, 233.0, 272.0, 298.0, 243.0, 267.0, 259.0, 254.0, 237.0, 225.0, 249.0, 273.0, 255.0, 243.0, 293.0, 277.0, 250.0, 248.0, 248.0, 270.0, 259.0, 251.0, 260.0, 262.0, 251.0, 253.0, 269.0, 255.0, 281.0, 255.0, 227.0, 220.0, 227.0, 225.0]}, "sampler_perf": {"mean_env_wait_ms": 1.890394604549175, "mean_processing_ms": 0.4453764179105544, "mean_inference_ms": 2.441389523770417}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2520000, "num_steps_sampled": 1344000, "sample_time_ms": 20963.704, "load_time_ms": 37.317, "grad_time_ms": 8768.329, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 9.860761462199441e-33, "cur_lr": 0.0010000000474974513, "total_loss": 0.002573954639956355, "policy_loss": -0.004460552707314491, "vf_loss": 76.1985855102539, "vf_explained_var": 0.7691051959991455, "kl": 0.002485529985278845, "entropy": 1.1707229614257812, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1344000, "episodes_total": 3360, "training_iteration": 105, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-37-15", "timestamp": 1660250235, "time_this_iter_s": 29.730670928955078, "time_total_s": 8653.907723426819, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8653.907723426819, "timesteps_since_restore": 1344000, "iterations_since_restore": 105, "perf": {"cpu_util_percent": 34.530952380952385, "ram_util_percent": 58.22619047619047}}
-{"episode_reward_max": 576.0, "episode_reward_min": 390.0, "episode_reward_mean": 508.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 191.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 254.47}, "custom_metrics": {"sparse_reward_mean": 177.0, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 154.94, "shaped_reward_min": 110, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.12, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.03, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.67, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.45, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.97, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.46, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.54, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.88, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.73, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.11, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.71, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.79, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.7, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.19, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.88, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.27, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.76, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.19, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.88, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.73, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.88, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.73, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [462.0, 533.0, 546.0, 518.0, 522.0, 513.0, 492.0, 501.0, 525.0, 522.0, 467.0, 390.0, 522.0, 527.0, 473.0, 525.0, 519.0, 519.0, 468.0, 504.0, 564.0, 470.0, 513.0, 516.0, 522.0, 421.0, 525.0, 573.0, 525.0, 527.0, 525.0, 516.0, 525.0, 450.0, 482.0, 444.0, 573.0, 573.0, 513.0, 519.0, 393.0, 564.0, 570.0, 570.0, 513.0, 513.0, 522.0, 492.0, 519.0, 413.0, 570.0, 482.0, 570.0, 510.0, 513.0, 462.0, 522.0, 498.0, 570.0, 498.0, 518.0, 510.0, 522.0, 504.0, 524.0, 536.0, 447.0, 452.0, 467.0, 525.0, 527.0, 464.0, 516.0, 459.0, 516.0, 570.0, 519.0, 522.0, 416.0, 504.0, 507.0, 513.0, 458.0, 489.0, 522.0, 519.0, 510.0, 519.0, 525.0, 576.0, 476.0, 522.0, 513.0, 530.0, 530.0, 459.0, 516.0, 498.0, 530.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [238.0, 224.0, 270.0, 263.0, 275.0, 271.0, 267.0, 251.0, 254.0, 268.0, 245.0, 268.0, 256.0, 236.0, 270.0, 231.0, 273.0, 252.0, 269.0, 253.0, 231.0, 236.0, 191.0, 199.0, 267.0, 255.0, 272.0, 255.0, 241.0, 232.0, 260.0, 265.0, 266.0, 253.0, 259.0, 260.0, 238.0, 230.0, 260.0, 244.0, 281.0, 283.0, 231.0, 239.0, 259.0, 254.0, 254.0, 262.0, 268.0, 254.0, 210.0, 211.0, 263.0, 262.0, 285.0, 288.0, 259.0, 266.0, 248.0, 279.0, 268.0, 257.0, 264.0, 252.0, 267.0, 258.0, 222.0, 228.0, 236.0, 246.0, 240.0, 204.0, 293.0, 280.0, 293.0, 280.0, 255.0, 258.0, 259.0, 260.0, 194.0, 199.0, 272.0, 292.0, 295.0, 275.0, 278.0, 292.0, 250.0, 263.0, 265.0, 248.0, 264.0, 258.0, 246.0, 246.0, 259.0, 260.0, 217.0, 196.0, 276.0, 294.0, 249.0, 233.0, 272.0, 298.0, 243.0, 267.0, 259.0, 254.0, 237.0, 225.0, 249.0, 273.0, 255.0, 243.0, 293.0, 277.0, 250.0, 248.0, 248.0, 270.0, 259.0, 251.0, 260.0, 262.0, 251.0, 253.0, 269.0, 255.0, 281.0, 255.0, 227.0, 220.0, 227.0, 225.0, 237.0, 230.0, 259.0, 266.0, 271.0, 256.0, 211.0, 253.0, 256.0, 260.0, 232.0, 227.0, 267.0, 249.0, 280.0, 290.0, 262.0, 257.0, 259.0, 263.0, 210.0, 206.0, 247.0, 257.0, 254.0, 253.0, 252.0, 261.0, 231.0, 227.0, 240.0, 249.0, 273.0, 249.0, 259.0, 260.0, 253.0, 257.0, 270.0, 249.0, 255.0, 270.0, 284.0, 292.0, 247.0, 229.0, 265.0, 257.0, 245.0, 268.0, 260.0, 270.0, 273.0, 257.0, 214.0, 245.0, 260.0, 256.0, 251.0, 247.0, 262.0, 268.0, 291.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 1.8766403613441696, "mean_processing_ms": 0.44263891296008706, "mean_inference_ms": 2.427842279334728}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2544000, "num_steps_sampled": 1356800, "sample_time_ms": 20984.654, "load_time_ms": 37.36, "grad_time_ms": 8798.762, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 4.930380731099721e-33, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012900071451440454, "policy_loss": -0.00579724321141839, "vf_loss": 76.69783782958984, "vf_explained_var": 0.7642709612846375, "kl": 0.0020595567766577005, "entropy": 1.1650750637054443, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1356800, "episodes_total": 3392, "training_iteration": 106, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-37-46", "timestamp": 1660250266, "time_this_iter_s": 30.89556574821472, "time_total_s": 8684.803289175034, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8684.803289175034, "timesteps_since_restore": 1356800, "iterations_since_restore": 106, "perf": {"cpu_util_percent": 35.19772727272727, "ram_util_percent": 58.284090909090885}}
-{"episode_reward_max": 576.0, "episode_reward_min": 390.0, "episode_reward_mean": 509.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 254.605}, "custom_metrics": {"sparse_reward_mean": 176.8, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 155.61, "shaped_reward_min": 110, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.26, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.32, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.78, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.59, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.0, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.97, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.83, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 4.17, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.7, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.17, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.9, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.24, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.76, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.15, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.97, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.83, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.97, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.83, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 576.0, 468.0, 510.0, 522.0, 510.0, 497.0, 453.0, 462.0, 519.0, 530.0, 522.0, 530.0, 527.0, 561.0, 525.0, 507.0, 522.0, 462.0, 501.0, 522.0, 522.0, 522.0, 530.0, 525.0, 516.0, 522.0, 522.0, 515.0, 573.0, 522.0, 404.0, 524.0, 536.0, 447.0, 452.0, 467.0, 525.0, 527.0, 464.0, 516.0, 459.0, 516.0, 570.0, 519.0, 522.0, 416.0, 504.0, 507.0, 513.0, 458.0, 489.0, 522.0, 519.0, 510.0, 519.0, 525.0, 576.0, 476.0, 522.0, 513.0, 530.0, 530.0, 459.0, 516.0, 498.0, 530.0, 576.0, 462.0, 533.0, 546.0, 518.0, 522.0, 513.0, 492.0, 501.0, 525.0, 522.0, 467.0, 390.0, 522.0, 527.0, 473.0, 525.0, 519.0, 519.0, 468.0, 504.0, 564.0, 470.0, 513.0, 516.0, 522.0, 421.0, 525.0, 573.0, 525.0, 527.0, 525.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [263.0, 262.0, 290.0, 286.0, 236.0, 232.0, 242.0, 268.0, 268.0, 254.0, 271.0, 239.0, 248.0, 249.0, 214.0, 239.0, 230.0, 232.0, 258.0, 261.0, 264.0, 266.0, 267.0, 255.0, 273.0, 257.0, 251.0, 276.0, 293.0, 268.0, 264.0, 261.0, 244.0, 263.0, 267.0, 255.0, 239.0, 223.0, 244.0, 257.0, 273.0, 249.0, 256.0, 266.0, 262.0, 260.0, 252.0, 278.0, 262.0, 263.0, 263.0, 253.0, 251.0, 271.0, 254.0, 268.0, 270.0, 245.0, 288.0, 285.0, 262.0, 260.0, 222.0, 182.0, 269.0, 255.0, 281.0, 255.0, 227.0, 220.0, 227.0, 225.0, 237.0, 230.0, 259.0, 266.0, 271.0, 256.0, 211.0, 253.0, 256.0, 260.0, 232.0, 227.0, 267.0, 249.0, 280.0, 290.0, 262.0, 257.0, 259.0, 263.0, 210.0, 206.0, 247.0, 257.0, 254.0, 253.0, 252.0, 261.0, 231.0, 227.0, 240.0, 249.0, 273.0, 249.0, 259.0, 260.0, 253.0, 257.0, 270.0, 249.0, 255.0, 270.0, 284.0, 292.0, 247.0, 229.0, 265.0, 257.0, 245.0, 268.0, 260.0, 270.0, 273.0, 257.0, 214.0, 245.0, 260.0, 256.0, 251.0, 247.0, 262.0, 268.0, 291.0, 285.0, 238.0, 224.0, 270.0, 263.0, 275.0, 271.0, 267.0, 251.0, 254.0, 268.0, 245.0, 268.0, 256.0, 236.0, 270.0, 231.0, 273.0, 252.0, 269.0, 253.0, 231.0, 236.0, 191.0, 199.0, 267.0, 255.0, 272.0, 255.0, 241.0, 232.0, 260.0, 265.0, 266.0, 253.0, 259.0, 260.0, 238.0, 230.0, 260.0, 244.0, 281.0, 283.0, 231.0, 239.0, 259.0, 254.0, 254.0, 262.0, 268.0, 254.0, 210.0, 211.0, 263.0, 262.0, 285.0, 288.0, 259.0, 266.0, 248.0, 279.0, 268.0, 257.0, 264.0, 252.0]}, "sampler_perf": {"mean_env_wait_ms": 1.863128513341346, "mean_processing_ms": 0.43994753351318544, "mean_inference_ms": 2.414282806471956}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2568000, "num_steps_sampled": 1369600, "sample_time_ms": 20842.684, "load_time_ms": 37.196, "grad_time_ms": 8837.48, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 2.4651903655498604e-33, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005029598250985146, "policy_loss": -0.0072616818360984325, "vf_loss": 83.45578002929688, "vf_explained_var": 0.7516160011291504, "kl": 0.0016769097419455647, "entropy": 1.1618729829788208, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1369600, "episodes_total": 3424, "training_iteration": 107, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-38-17", "timestamp": 1660250297, "time_this_iter_s": 30.596415996551514, "time_total_s": 8715.399705171585, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8715.399705171585, "timesteps_since_restore": 1369600, "iterations_since_restore": 107, "perf": {"cpu_util_percent": 35.45581395348837, "ram_util_percent": 58.237209302325574}}
-{"episode_reward_max": 582.0, "episode_reward_min": 390.0, "episode_reward_mean": 513.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 297.0}, "policy_reward_mean": {"ppo": 256.905}, "custom_metrics": {"sparse_reward_mean": 178.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 157.41, "shaped_reward_min": 110, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.26, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.75, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.79, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.94, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.0, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.44, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.95, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.15, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.64, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 4.38, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.74, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.71, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.18, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.25, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.18, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.95, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.15, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.95, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.15, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [495.0, 525.0, 579.0, 513.0, 465.0, 519.0, 525.0, 525.0, 503.0, 522.0, 522.0, 533.0, 522.0, 468.0, 522.0, 525.0, 582.0, 573.0, 527.0, 519.0, 519.0, 570.0, 501.0, 519.0, 510.0, 467.0, 522.0, 522.0, 527.0, 533.0, 473.0, 465.0, 516.0, 498.0, 530.0, 576.0, 462.0, 533.0, 546.0, 518.0, 522.0, 513.0, 492.0, 501.0, 525.0, 522.0, 467.0, 390.0, 522.0, 527.0, 473.0, 525.0, 519.0, 519.0, 468.0, 504.0, 564.0, 470.0, 513.0, 516.0, 522.0, 421.0, 525.0, 573.0, 525.0, 527.0, 525.0, 516.0, 525.0, 576.0, 468.0, 510.0, 522.0, 510.0, 497.0, 453.0, 462.0, 519.0, 530.0, 522.0, 530.0, 527.0, 561.0, 525.0, 507.0, 522.0, 462.0, 501.0, 522.0, 522.0, 522.0, 530.0, 525.0, 516.0, 522.0, 522.0, 515.0, 573.0, 522.0, 404.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [243.0, 252.0, 252.0, 273.0, 293.0, 286.0, 267.0, 246.0, 232.0, 233.0, 260.0, 259.0, 264.0, 261.0, 272.0, 253.0, 250.0, 253.0, 266.0, 256.0, 272.0, 250.0, 269.0, 264.0, 261.0, 261.0, 223.0, 245.0, 249.0, 273.0, 252.0, 273.0, 285.0, 297.0, 287.0, 286.0, 268.0, 259.0, 267.0, 252.0, 260.0, 259.0, 280.0, 290.0, 239.0, 262.0, 267.0, 252.0, 263.0, 247.0, 246.0, 221.0, 249.0, 273.0, 257.0, 265.0, 262.0, 265.0, 264.0, 269.0, 234.0, 239.0, 229.0, 236.0, 260.0, 256.0, 251.0, 247.0, 262.0, 268.0, 291.0, 285.0, 238.0, 224.0, 270.0, 263.0, 275.0, 271.0, 267.0, 251.0, 254.0, 268.0, 245.0, 268.0, 256.0, 236.0, 270.0, 231.0, 273.0, 252.0, 269.0, 253.0, 231.0, 236.0, 191.0, 199.0, 267.0, 255.0, 272.0, 255.0, 241.0, 232.0, 260.0, 265.0, 266.0, 253.0, 259.0, 260.0, 238.0, 230.0, 260.0, 244.0, 281.0, 283.0, 231.0, 239.0, 259.0, 254.0, 254.0, 262.0, 268.0, 254.0, 210.0, 211.0, 263.0, 262.0, 285.0, 288.0, 259.0, 266.0, 248.0, 279.0, 268.0, 257.0, 264.0, 252.0, 263.0, 262.0, 290.0, 286.0, 236.0, 232.0, 242.0, 268.0, 268.0, 254.0, 271.0, 239.0, 248.0, 249.0, 214.0, 239.0, 230.0, 232.0, 258.0, 261.0, 264.0, 266.0, 267.0, 255.0, 273.0, 257.0, 251.0, 276.0, 293.0, 268.0, 264.0, 261.0, 244.0, 263.0, 267.0, 255.0, 239.0, 223.0, 244.0, 257.0, 273.0, 249.0, 256.0, 266.0, 262.0, 260.0, 252.0, 278.0, 262.0, 263.0, 263.0, 253.0, 251.0, 271.0, 254.0, 268.0, 270.0, 245.0, 288.0, 285.0, 262.0, 260.0, 222.0, 182.0]}, "sampler_perf": {"mean_env_wait_ms": 1.8498555104022234, "mean_processing_ms": 0.43730168549860937, "mean_inference_ms": 2.400809449110995}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2592000, "num_steps_sampled": 1382400, "sample_time_ms": 20702.738, "load_time_ms": 37.249, "grad_time_ms": 9000.45, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.2325951827749302e-33, "cur_lr": 0.0010000000474974513, "total_loss": 0.004501763265579939, "policy_loss": -0.002659810474142432, "vf_loss": 77.439453125, "vf_explained_var": 0.7766797542572021, "kl": 0.002080060075968504, "entropy": 1.1647237539291382, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1382400, "episodes_total": 3456, "training_iteration": 108, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-38-46", "timestamp": 1660250326, "time_this_iter_s": 29.44796586036682, "time_total_s": 8744.847671031952, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8744.847671031952, "timesteps_since_restore": 1382400, "iterations_since_restore": 108, "perf": {"cpu_util_percent": 35.6452380952381, "ram_util_percent": 58.221428571428575}}
-{"episode_reward_max": 582.0, "episode_reward_min": 404.0, "episode_reward_mean": 517.15, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 297.0}, "policy_reward_mean": {"ppo": 258.575}, "custom_metrics": {"sparse_reward_mean": 179.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 158.75, "shaped_reward_min": 121, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.45, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.7, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.93, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.97, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.45, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.06, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.24, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 4.4, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.9, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.12, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.88, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.36, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.75, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.26, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.06, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.24, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.06, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.24, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [498.0, 525.0, 519.0, 522.0, 522.0, 516.0, 452.0, 530.0, 530.0, 522.0, 519.0, 473.0, 453.0, 570.0, 522.0, 441.0, 576.0, 476.0, 516.0, 507.0, 573.0, 530.0, 519.0, 579.0, 525.0, 473.0, 510.0, 567.0, 576.0, 516.0, 525.0, 524.0, 525.0, 527.0, 525.0, 516.0, 525.0, 576.0, 468.0, 510.0, 522.0, 510.0, 497.0, 453.0, 462.0, 519.0, 530.0, 522.0, 530.0, 527.0, 561.0, 525.0, 507.0, 522.0, 462.0, 501.0, 522.0, 522.0, 522.0, 530.0, 525.0, 516.0, 522.0, 522.0, 515.0, 573.0, 522.0, 404.0, 495.0, 525.0, 579.0, 513.0, 465.0, 519.0, 525.0, 525.0, 503.0, 522.0, 522.0, 533.0, 522.0, 468.0, 522.0, 525.0, 582.0, 573.0, 527.0, 519.0, 519.0, 570.0, 501.0, 519.0, 510.0, 467.0, 522.0, 522.0, 527.0, 533.0, 473.0, 465.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [246.0, 252.0, 261.0, 264.0, 269.0, 250.0, 258.0, 264.0, 264.0, 258.0, 272.0, 244.0, 239.0, 213.0, 263.0, 267.0, 263.0, 267.0, 268.0, 254.0, 267.0, 252.0, 240.0, 233.0, 221.0, 232.0, 284.0, 286.0, 264.0, 258.0, 225.0, 216.0, 292.0, 284.0, 245.0, 231.0, 245.0, 271.0, 258.0, 249.0, 290.0, 283.0, 266.0, 264.0, 251.0, 268.0, 296.0, 283.0, 263.0, 262.0, 219.0, 254.0, 264.0, 246.0, 291.0, 276.0, 282.0, 294.0, 248.0, 268.0, 252.0, 273.0, 265.0, 259.0, 259.0, 266.0, 248.0, 279.0, 268.0, 257.0, 264.0, 252.0, 263.0, 262.0, 290.0, 286.0, 236.0, 232.0, 242.0, 268.0, 268.0, 254.0, 271.0, 239.0, 248.0, 249.0, 214.0, 239.0, 230.0, 232.0, 258.0, 261.0, 264.0, 266.0, 267.0, 255.0, 273.0, 257.0, 251.0, 276.0, 293.0, 268.0, 264.0, 261.0, 244.0, 263.0, 267.0, 255.0, 239.0, 223.0, 244.0, 257.0, 273.0, 249.0, 256.0, 266.0, 262.0, 260.0, 252.0, 278.0, 262.0, 263.0, 263.0, 253.0, 251.0, 271.0, 254.0, 268.0, 270.0, 245.0, 288.0, 285.0, 262.0, 260.0, 222.0, 182.0, 243.0, 252.0, 252.0, 273.0, 293.0, 286.0, 267.0, 246.0, 232.0, 233.0, 260.0, 259.0, 264.0, 261.0, 272.0, 253.0, 250.0, 253.0, 266.0, 256.0, 272.0, 250.0, 269.0, 264.0, 261.0, 261.0, 223.0, 245.0, 249.0, 273.0, 252.0, 273.0, 285.0, 297.0, 287.0, 286.0, 268.0, 259.0, 267.0, 252.0, 260.0, 259.0, 280.0, 290.0, 239.0, 262.0, 267.0, 252.0, 263.0, 247.0, 246.0, 221.0, 249.0, 273.0, 257.0, 265.0, 262.0, 265.0, 264.0, 269.0, 234.0, 239.0, 229.0, 236.0]}, "sampler_perf": {"mean_env_wait_ms": 1.8368123627881316, "mean_processing_ms": 0.4346999202219803, "mean_inference_ms": 2.3873597355001146}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2616000, "num_steps_sampled": 1395200, "sample_time_ms": 20622.092, "load_time_ms": 37.297, "grad_time_ms": 9168.951, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 6.162975913874651e-34, "cur_lr": 0.0010000000474974513, "total_loss": -0.0004981299280188978, "policy_loss": -0.007735797669738531, "vf_loss": 78.24005889892578, "vf_explained_var": 0.7600134015083313, "kl": 0.0021366437431424856, "entropy": 1.1726828813552856, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1395200, "episodes_total": 3488, "training_iteration": 109, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-39-16", "timestamp": 1660250356, "time_this_iter_s": 29.312750816345215, "time_total_s": 8774.160421848297, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8774.160421848297, "timesteps_since_restore": 1395200, "iterations_since_restore": 109, "perf": {"cpu_util_percent": 34.91219512195122, "ram_util_percent": 58.29999999999999}}
-{"episode_reward_max": 582.0, "episode_reward_min": 390.0, "episode_reward_mean": 513.77, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 256.885}, "custom_metrics": {"sparse_reward_mean": 177.8, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 158.17, "shaped_reward_min": 116, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.47, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.76, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.68, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.53, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.52, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 14.01, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.08, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.76, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.52, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.83, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.89, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.33, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.22, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.01, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.08, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.01, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.08, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [396.0, 516.0, 522.0, 465.0, 390.0, 522.0, 530.0, 570.0, 487.0, 519.0, 525.0, 527.0, 507.0, 522.0, 473.0, 519.0, 519.0, 573.0, 519.0, 422.0, 495.0, 525.0, 519.0, 473.0, 522.0, 530.0, 522.0, 510.0, 498.0, 510.0, 516.0, 522.0, 515.0, 573.0, 522.0, 404.0, 495.0, 525.0, 579.0, 513.0, 465.0, 519.0, 525.0, 525.0, 503.0, 522.0, 522.0, 533.0, 522.0, 468.0, 522.0, 525.0, 582.0, 573.0, 527.0, 519.0, 519.0, 570.0, 501.0, 519.0, 510.0, 467.0, 522.0, 522.0, 527.0, 533.0, 473.0, 465.0, 498.0, 525.0, 519.0, 522.0, 522.0, 516.0, 452.0, 530.0, 530.0, 522.0, 519.0, 473.0, 453.0, 570.0, 522.0, 441.0, 576.0, 476.0, 516.0, 507.0, 573.0, 530.0, 519.0, 579.0, 525.0, 473.0, 510.0, 567.0, 576.0, 516.0, 525.0, 524.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [203.0, 193.0, 260.0, 256.0, 268.0, 254.0, 236.0, 229.0, 203.0, 187.0, 261.0, 261.0, 271.0, 259.0, 272.0, 298.0, 236.0, 251.0, 260.0, 259.0, 269.0, 256.0, 258.0, 269.0, 256.0, 251.0, 270.0, 252.0, 252.0, 221.0, 270.0, 249.0, 256.0, 263.0, 285.0, 288.0, 270.0, 249.0, 203.0, 219.0, 254.0, 241.0, 250.0, 275.0, 273.0, 246.0, 236.0, 237.0, 250.0, 272.0, 270.0, 260.0, 262.0, 260.0, 243.0, 267.0, 243.0, 255.0, 266.0, 244.0, 269.0, 247.0, 249.0, 273.0, 270.0, 245.0, 288.0, 285.0, 262.0, 260.0, 222.0, 182.0, 243.0, 252.0, 252.0, 273.0, 293.0, 286.0, 267.0, 246.0, 232.0, 233.0, 260.0, 259.0, 264.0, 261.0, 272.0, 253.0, 250.0, 253.0, 266.0, 256.0, 272.0, 250.0, 269.0, 264.0, 261.0, 261.0, 223.0, 245.0, 249.0, 273.0, 252.0, 273.0, 285.0, 297.0, 287.0, 286.0, 268.0, 259.0, 267.0, 252.0, 260.0, 259.0, 280.0, 290.0, 239.0, 262.0, 267.0, 252.0, 263.0, 247.0, 246.0, 221.0, 249.0, 273.0, 257.0, 265.0, 262.0, 265.0, 264.0, 269.0, 234.0, 239.0, 229.0, 236.0, 246.0, 252.0, 261.0, 264.0, 269.0, 250.0, 258.0, 264.0, 264.0, 258.0, 272.0, 244.0, 239.0, 213.0, 263.0, 267.0, 263.0, 267.0, 268.0, 254.0, 267.0, 252.0, 240.0, 233.0, 221.0, 232.0, 284.0, 286.0, 264.0, 258.0, 225.0, 216.0, 292.0, 284.0, 245.0, 231.0, 245.0, 271.0, 258.0, 249.0, 290.0, 283.0, 266.0, 264.0, 251.0, 268.0, 296.0, 283.0, 263.0, 262.0, 219.0, 254.0, 264.0, 246.0, 291.0, 276.0, 282.0, 294.0, 248.0, 268.0, 252.0, 273.0, 265.0, 259.0]}, "sampler_perf": {"mean_env_wait_ms": 1.8240069603143507, "mean_processing_ms": 0.43214623654293904, "mean_inference_ms": 2.374213479802633}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2640000, "num_steps_sampled": 1408000, "sample_time_ms": 20611.05, "load_time_ms": 37.227, "grad_time_ms": 9292.4, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 3.0814879569373254e-34, "cur_lr": 0.0010000000474974513, "total_loss": 0.006001986563205719, "policy_loss": -0.0016462085768580437, "vf_loss": 82.35639953613281, "vf_explained_var": 0.7567899823188782, "kl": 0.0018497154815122485, "entropy": 1.174903154373169, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1408000, "episodes_total": 3520, "training_iteration": 110, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-39-46", "timestamp": 1660250386, "time_this_iter_s": 30.394602060317993, "time_total_s": 8804.555023908615, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8804.555023908615, "timesteps_since_restore": 1408000, "iterations_since_restore": 110, "perf": {"cpu_util_percent": 34.141860465116274, "ram_util_percent": 58.19302325581395}}
-{"episode_reward_max": 579.0, "episode_reward_min": 390.0, "episode_reward_mean": 511.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 187.0}, "policy_reward_max": {"ppo": 299.0}, "policy_reward_mean": {"ppo": 255.505}, "custom_metrics": {"sparse_reward_mean": 177.0, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 157.01, "shaped_reward_min": 116, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.44, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.74, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.65, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.53, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.59, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.96, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 15.02, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.87, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.77, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.24, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.85, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.33, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.75, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.22, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.96, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 15.02, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.96, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 15.02, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 507.0, 513.0, 504.0, 462.0, 522.0, 498.0, 576.0, 516.0, 519.0, 519.0, 564.0, 519.0, 561.0, 401.0, 522.0, 519.0, 570.0, 501.0, 484.0, 519.0, 522.0, 507.0, 413.0, 516.0, 573.0, 516.0, 522.0, 522.0, 422.0, 465.0, 482.0, 527.0, 533.0, 473.0, 465.0, 498.0, 525.0, 519.0, 522.0, 522.0, 516.0, 452.0, 530.0, 530.0, 522.0, 519.0, 473.0, 453.0, 570.0, 522.0, 441.0, 576.0, 476.0, 516.0, 507.0, 573.0, 530.0, 519.0, 579.0, 525.0, 473.0, 510.0, 567.0, 576.0, 516.0, 525.0, 524.0, 396.0, 516.0, 522.0, 465.0, 390.0, 522.0, 530.0, 570.0, 487.0, 519.0, 525.0, 527.0, 507.0, 522.0, 473.0, 519.0, 519.0, 573.0, 519.0, 422.0, 495.0, 525.0, 519.0, 473.0, 522.0, 530.0, 522.0, 510.0, 498.0, 510.0, 516.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 287.0, 250.0, 257.0, 251.0, 262.0, 242.0, 262.0, 240.0, 222.0, 250.0, 272.0, 244.0, 254.0, 283.0, 293.0, 256.0, 260.0, 261.0, 258.0, 263.0, 256.0, 265.0, 299.0, 261.0, 258.0, 276.0, 285.0, 212.0, 189.0, 259.0, 263.0, 262.0, 257.0, 282.0, 288.0, 259.0, 242.0, 250.0, 234.0, 257.0, 262.0, 259.0, 263.0, 258.0, 249.0, 203.0, 210.0, 268.0, 248.0, 298.0, 275.0, 265.0, 251.0, 260.0, 262.0, 267.0, 255.0, 203.0, 219.0, 230.0, 235.0, 247.0, 235.0, 262.0, 265.0, 264.0, 269.0, 234.0, 239.0, 229.0, 236.0, 246.0, 252.0, 261.0, 264.0, 269.0, 250.0, 258.0, 264.0, 264.0, 258.0, 272.0, 244.0, 239.0, 213.0, 263.0, 267.0, 263.0, 267.0, 268.0, 254.0, 267.0, 252.0, 240.0, 233.0, 221.0, 232.0, 284.0, 286.0, 264.0, 258.0, 225.0, 216.0, 292.0, 284.0, 245.0, 231.0, 245.0, 271.0, 258.0, 249.0, 290.0, 283.0, 266.0, 264.0, 251.0, 268.0, 296.0, 283.0, 263.0, 262.0, 219.0, 254.0, 264.0, 246.0, 291.0, 276.0, 282.0, 294.0, 248.0, 268.0, 252.0, 273.0, 265.0, 259.0, 203.0, 193.0, 260.0, 256.0, 268.0, 254.0, 236.0, 229.0, 203.0, 187.0, 261.0, 261.0, 271.0, 259.0, 272.0, 298.0, 236.0, 251.0, 260.0, 259.0, 269.0, 256.0, 258.0, 269.0, 256.0, 251.0, 270.0, 252.0, 252.0, 221.0, 270.0, 249.0, 256.0, 263.0, 285.0, 288.0, 270.0, 249.0, 203.0, 219.0, 254.0, 241.0, 250.0, 275.0, 273.0, 246.0, 236.0, 237.0, 250.0, 272.0, 270.0, 260.0, 262.0, 260.0, 243.0, 267.0, 243.0, 255.0, 266.0, 244.0, 269.0, 247.0, 249.0, 273.0]}, "sampler_perf": {"mean_env_wait_ms": 1.8114390530893243, "mean_processing_ms": 0.4296416576287852, "mean_inference_ms": 2.361419613419488}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2664000, "num_steps_sampled": 1420800, "sample_time_ms": 20548.052, "load_time_ms": 37.151, "grad_time_ms": 9411.24, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.5407439784686627e-34, "cur_lr": 0.0010000000474974513, "total_loss": 0.0037929911632090807, "policy_loss": -0.003666130593046546, "vf_loss": 80.49629974365234, "vf_explained_var": 0.7547799944877625, "kl": 0.002083237050101161, "entropy": 1.1810179948806763, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1420800, "episodes_total": 3552, "training_iteration": 111, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-40-16", "timestamp": 1660250416, "time_this_iter_s": 29.606700897216797, "time_total_s": 8834.161724805832, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8834.161724805832, "timesteps_since_restore": 1420800, "iterations_since_restore": 111, "perf": {"cpu_util_percent": 34.73809523809524, "ram_util_percent": 58.2547619047619}}
-{"episode_reward_max": 576.0, "episode_reward_min": 390.0, "episode_reward_mean": 510.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 187.0}, "policy_reward_max": {"ppo": 299.0}, "policy_reward_mean": {"ppo": 255.4}, "custom_metrics": {"sparse_reward_mean": 177.0, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 156.8, "shaped_reward_min": 115, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.15, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.34, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.57, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.58, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.0, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.52, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.57, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.89, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 15.0, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.96, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.41, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.8, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.97, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.31, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.13, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.78, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.37, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.73, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.28, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.89, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 15.0, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.89, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 15.0, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [507.0, 527.0, 513.0, 516.0, 479.0, 516.0, 441.0, 573.0, 522.0, 516.0, 513.0, 395.0, 525.0, 519.0, 521.0, 522.0, 522.0, 525.0, 530.0, 525.0, 462.0, 522.0, 441.0, 530.0, 573.0, 525.0, 527.0, 525.0, 522.0, 513.0, 570.0, 525.0, 576.0, 516.0, 525.0, 524.0, 396.0, 516.0, 522.0, 465.0, 390.0, 522.0, 530.0, 570.0, 487.0, 519.0, 525.0, 527.0, 507.0, 522.0, 473.0, 519.0, 519.0, 573.0, 519.0, 422.0, 495.0, 525.0, 519.0, 473.0, 522.0, 530.0, 522.0, 510.0, 498.0, 510.0, 516.0, 522.0, 576.0, 507.0, 513.0, 504.0, 462.0, 522.0, 498.0, 576.0, 516.0, 519.0, 519.0, 564.0, 519.0, 561.0, 401.0, 522.0, 519.0, 570.0, 501.0, 484.0, 519.0, 522.0, 507.0, 413.0, 516.0, 573.0, 516.0, 522.0, 522.0, 422.0, 465.0, 482.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 245.0, 263.0, 264.0, 258.0, 255.0, 262.0, 254.0, 235.0, 244.0, 256.0, 260.0, 221.0, 220.0, 293.0, 280.0, 256.0, 266.0, 273.0, 243.0, 262.0, 251.0, 195.0, 200.0, 272.0, 253.0, 253.0, 266.0, 258.0, 263.0, 263.0, 259.0, 262.0, 260.0, 262.0, 263.0, 255.0, 275.0, 252.0, 273.0, 226.0, 236.0, 274.0, 248.0, 233.0, 208.0, 270.0, 260.0, 288.0, 285.0, 249.0, 276.0, 265.0, 262.0, 259.0, 266.0, 261.0, 261.0, 254.0, 259.0, 277.0, 293.0, 255.0, 270.0, 282.0, 294.0, 248.0, 268.0, 252.0, 273.0, 265.0, 259.0, 203.0, 193.0, 260.0, 256.0, 268.0, 254.0, 236.0, 229.0, 203.0, 187.0, 261.0, 261.0, 271.0, 259.0, 272.0, 298.0, 236.0, 251.0, 260.0, 259.0, 269.0, 256.0, 258.0, 269.0, 256.0, 251.0, 270.0, 252.0, 252.0, 221.0, 270.0, 249.0, 256.0, 263.0, 285.0, 288.0, 270.0, 249.0, 203.0, 219.0, 254.0, 241.0, 250.0, 275.0, 273.0, 246.0, 236.0, 237.0, 250.0, 272.0, 270.0, 260.0, 262.0, 260.0, 243.0, 267.0, 243.0, 255.0, 266.0, 244.0, 269.0, 247.0, 249.0, 273.0, 289.0, 287.0, 250.0, 257.0, 251.0, 262.0, 242.0, 262.0, 240.0, 222.0, 250.0, 272.0, 244.0, 254.0, 283.0, 293.0, 256.0, 260.0, 261.0, 258.0, 263.0, 256.0, 265.0, 299.0, 261.0, 258.0, 276.0, 285.0, 212.0, 189.0, 259.0, 263.0, 262.0, 257.0, 282.0, 288.0, 259.0, 242.0, 250.0, 234.0, 257.0, 262.0, 259.0, 263.0, 258.0, 249.0, 203.0, 210.0, 268.0, 248.0, 298.0, 275.0, 265.0, 251.0, 260.0, 262.0, 267.0, 255.0, 203.0, 219.0, 230.0, 235.0, 247.0, 235.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7991027412203113, "mean_processing_ms": 0.4271859275092463, "mean_inference_ms": 2.3490714276117277}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2688000, "num_steps_sampled": 1433600, "sample_time_ms": 20492.657, "load_time_ms": 37.09, "grad_time_ms": 9588.563, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 7.703719892343314e-35, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006233404856175184, "policy_loss": -0.00705617293715477, "vf_loss": 82.67167663574219, "vf_explained_var": 0.7619187235832214, "kl": 0.0019442345947027206, "entropy": 1.175291895866394, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1433600, "episodes_total": 3584, "training_iteration": 112, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-40-46", "timestamp": 1660250446, "time_this_iter_s": 30.788507223129272, "time_total_s": 8864.950232028961, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8864.950232028961, "timesteps_since_restore": 1433600, "iterations_since_restore": 112, "perf": {"cpu_util_percent": 33.402325581395345, "ram_util_percent": 58.2720930232558}}
-{"episode_reward_max": 576.0, "episode_reward_min": 174.0, "episode_reward_mean": 509.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 85.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 254.9}, "custom_metrics": {"sparse_reward_mean": 177.0, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 155.8, "shaped_reward_min": 54, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.12, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.21, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.57, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.54, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.93, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.99, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.49, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.92, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.89, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.15, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.66, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.35, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.78, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.32, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.7, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.28, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 13.92, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.89, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.92, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.89, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 525.0, 361.0, 525.0, 524.0, 465.0, 461.0, 525.0, 174.0, 524.0, 570.0, 522.0, 519.0, 522.0, 444.0, 570.0, 570.0, 525.0, 519.0, 525.0, 516.0, 479.0, 522.0, 525.0, 513.0, 516.0, 570.0, 462.0, 573.0, 530.0, 492.0, 522.0, 498.0, 510.0, 516.0, 522.0, 576.0, 507.0, 513.0, 504.0, 462.0, 522.0, 498.0, 576.0, 516.0, 519.0, 519.0, 564.0, 519.0, 561.0, 401.0, 522.0, 519.0, 570.0, 501.0, 484.0, 519.0, 522.0, 507.0, 413.0, 516.0, 573.0, 516.0, 522.0, 522.0, 422.0, 465.0, 482.0, 507.0, 527.0, 513.0, 516.0, 479.0, 516.0, 441.0, 573.0, 522.0, 516.0, 513.0, 395.0, 525.0, 519.0, 521.0, 522.0, 522.0, 525.0, 530.0, 525.0, 462.0, 522.0, 441.0, 530.0, 573.0, 525.0, 527.0, 525.0, 522.0, 513.0, 570.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 282.0, 263.0, 262.0, 187.0, 174.0, 246.0, 279.0, 256.0, 268.0, 230.0, 235.0, 234.0, 227.0, 265.0, 260.0, 89.0, 85.0, 262.0, 262.0, 288.0, 282.0, 262.0, 260.0, 258.0, 261.0, 257.0, 265.0, 224.0, 220.0, 287.0, 283.0, 305.0, 265.0, 264.0, 261.0, 259.0, 260.0, 267.0, 258.0, 259.0, 257.0, 236.0, 243.0, 278.0, 244.0, 262.0, 263.0, 248.0, 265.0, 258.0, 258.0, 285.0, 285.0, 237.0, 225.0, 286.0, 287.0, 272.0, 258.0, 248.0, 244.0, 267.0, 255.0, 243.0, 255.0, 266.0, 244.0, 269.0, 247.0, 249.0, 273.0, 289.0, 287.0, 250.0, 257.0, 251.0, 262.0, 242.0, 262.0, 240.0, 222.0, 250.0, 272.0, 244.0, 254.0, 283.0, 293.0, 256.0, 260.0, 261.0, 258.0, 263.0, 256.0, 265.0, 299.0, 261.0, 258.0, 276.0, 285.0, 212.0, 189.0, 259.0, 263.0, 262.0, 257.0, 282.0, 288.0, 259.0, 242.0, 250.0, 234.0, 257.0, 262.0, 259.0, 263.0, 258.0, 249.0, 203.0, 210.0, 268.0, 248.0, 298.0, 275.0, 265.0, 251.0, 260.0, 262.0, 267.0, 255.0, 203.0, 219.0, 230.0, 235.0, 247.0, 235.0, 262.0, 245.0, 263.0, 264.0, 258.0, 255.0, 262.0, 254.0, 235.0, 244.0, 256.0, 260.0, 221.0, 220.0, 293.0, 280.0, 256.0, 266.0, 273.0, 243.0, 262.0, 251.0, 195.0, 200.0, 272.0, 253.0, 253.0, 266.0, 258.0, 263.0, 263.0, 259.0, 262.0, 260.0, 262.0, 263.0, 255.0, 275.0, 252.0, 273.0, 226.0, 236.0, 274.0, 248.0, 233.0, 208.0, 270.0, 260.0, 288.0, 285.0, 249.0, 276.0, 265.0, 262.0, 259.0, 266.0, 261.0, 261.0, 254.0, 259.0, 277.0, 293.0, 255.0, 270.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7869849940199092, "mean_processing_ms": 0.4247741951637306, "mean_inference_ms": 2.3369101082045303}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2712000, "num_steps_sampled": 1446400, "sample_time_ms": 20390.667, "load_time_ms": 37.347, "grad_time_ms": 9843.106, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 3.851859946171657e-35, "cur_lr": 0.0010000000474974513, "total_loss": 0.0046621630899608135, "policy_loss": -0.0027472442016005516, "vf_loss": 80.03414916992188, "vf_explained_var": 0.7775616645812988, "kl": 0.0022294942755252123, "entropy": 1.1880191564559937, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1446400, "episodes_total": 3616, "training_iteration": 113, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-41-18", "timestamp": 1660250478, "time_this_iter_s": 31.195298194885254, "time_total_s": 8896.145530223846, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8896.145530223846, "timesteps_since_restore": 1446400, "iterations_since_restore": 113, "perf": {"cpu_util_percent": 34.73181818181818, "ram_util_percent": 58.20681818181818}}
-{"episode_reward_max": 576.0, "episode_reward_min": 174.0, "episode_reward_mean": 507.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 85.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 253.95}, "custom_metrics": {"sparse_reward_mean": 175.8, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.3, "shaped_reward_min": 54, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.0, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.27, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.44, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.7, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.01, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.94, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.51, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.38, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.77, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.97, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.77, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.25, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.86, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.31, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.72, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.26, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.1, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 13.77, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.97, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.77, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.97, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 479.0, 516.0, 449.0, 507.0, 573.0, 504.0, 492.0, 424.0, 536.0, 573.0, 478.0, 576.0, 522.0, 522.0, 519.0, 567.0, 401.0, 490.0, 522.0, 513.0, 522.0, 245.0, 521.0, 533.0, 570.0, 573.0, 516.0, 464.0, 576.0, 573.0, 522.0, 522.0, 422.0, 465.0, 482.0, 507.0, 527.0, 513.0, 516.0, 479.0, 516.0, 441.0, 573.0, 522.0, 516.0, 513.0, 395.0, 525.0, 519.0, 521.0, 522.0, 522.0, 525.0, 530.0, 525.0, 462.0, 522.0, 441.0, 530.0, 573.0, 525.0, 527.0, 525.0, 522.0, 513.0, 570.0, 525.0, 570.0, 525.0, 361.0, 525.0, 524.0, 465.0, 461.0, 525.0, 174.0, 524.0, 570.0, 522.0, 519.0, 522.0, 444.0, 570.0, 570.0, 525.0, 519.0, 525.0, 516.0, 479.0, 522.0, 525.0, 513.0, 516.0, 570.0, 462.0, 573.0, 530.0, 492.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 265.0, 241.0, 238.0, 257.0, 259.0, 219.0, 230.0, 263.0, 244.0, 289.0, 284.0, 241.0, 263.0, 247.0, 245.0, 214.0, 210.0, 263.0, 273.0, 272.0, 301.0, 230.0, 248.0, 291.0, 285.0, 275.0, 247.0, 260.0, 262.0, 259.0, 260.0, 279.0, 288.0, 209.0, 192.0, 244.0, 246.0, 254.0, 268.0, 241.0, 272.0, 267.0, 255.0, 122.0, 123.0, 264.0, 257.0, 270.0, 263.0, 303.0, 267.0, 287.0, 286.0, 249.0, 267.0, 234.0, 230.0, 294.0, 282.0, 292.0, 281.0, 256.0, 266.0, 267.0, 255.0, 203.0, 219.0, 230.0, 235.0, 247.0, 235.0, 262.0, 245.0, 263.0, 264.0, 258.0, 255.0, 262.0, 254.0, 235.0, 244.0, 256.0, 260.0, 221.0, 220.0, 293.0, 280.0, 256.0, 266.0, 273.0, 243.0, 262.0, 251.0, 195.0, 200.0, 272.0, 253.0, 253.0, 266.0, 258.0, 263.0, 263.0, 259.0, 262.0, 260.0, 262.0, 263.0, 255.0, 275.0, 252.0, 273.0, 226.0, 236.0, 274.0, 248.0, 233.0, 208.0, 270.0, 260.0, 288.0, 285.0, 249.0, 276.0, 265.0, 262.0, 259.0, 266.0, 261.0, 261.0, 254.0, 259.0, 277.0, 293.0, 255.0, 270.0, 288.0, 282.0, 263.0, 262.0, 187.0, 174.0, 246.0, 279.0, 256.0, 268.0, 230.0, 235.0, 234.0, 227.0, 265.0, 260.0, 89.0, 85.0, 262.0, 262.0, 288.0, 282.0, 262.0, 260.0, 258.0, 261.0, 257.0, 265.0, 224.0, 220.0, 287.0, 283.0, 305.0, 265.0, 264.0, 261.0, 259.0, 260.0, 267.0, 258.0, 259.0, 257.0, 236.0, 243.0, 278.0, 244.0, 262.0, 263.0, 248.0, 265.0, 258.0, 258.0, 285.0, 285.0, 237.0, 225.0, 286.0, 287.0, 272.0, 258.0, 248.0, 244.0, 267.0, 255.0]}, "sampler_perf": {"mean_env_wait_ms": 1.775081398673085, "mean_processing_ms": 0.4224038975736352, "mean_inference_ms": 2.3249390744039835}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2736000, "num_steps_sampled": 1459200, "sample_time_ms": 20266.804, "load_time_ms": 37.384, "grad_time_ms": 9843.514, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.9259299730858284e-35, "cur_lr": 0.0010000000474974513, "total_loss": -0.0005095542292110622, "policy_loss": -0.007585855200886726, "vf_loss": 76.6465072631836, "vf_explained_var": 0.7633175849914551, "kl": 0.0019467826932668686, "entropy": 1.1766948699951172, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1459200, "episodes_total": 3648, "training_iteration": 114, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-41-47", "timestamp": 1660250507, "time_this_iter_s": 29.75877094268799, "time_total_s": 8925.904301166534, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8925.904301166534, "timesteps_since_restore": 1459200, "iterations_since_restore": 114, "perf": {"cpu_util_percent": 34.199999999999996, "ram_util_percent": 58.228571428571435}}
-{"episode_reward_max": 576.0, "episode_reward_min": 174.0, "episode_reward_mean": 512.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 85.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 256.345}, "custom_metrics": {"sparse_reward_mean": 177.8, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 157.09, "shaped_reward_min": 54, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.25, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.43, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.59, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.87, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.56, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.43, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.9, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.09, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.72, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.18, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.13, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.88, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.36, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.71, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.33, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.1, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 13.9, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.09, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.9, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.09, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 516.0, 467.0, 530.0, 507.0, 513.0, 519.0, 558.0, 504.0, 522.0, 501.0, 525.0, 525.0, 524.0, 519.0, 513.0, 513.0, 525.0, 516.0, 567.0, 510.0, 521.0, 513.0, 522.0, 518.0, 576.0, 530.0, 527.0, 459.0, 525.0, 570.0, 525.0, 522.0, 513.0, 570.0, 525.0, 570.0, 525.0, 361.0, 525.0, 524.0, 465.0, 461.0, 525.0, 174.0, 524.0, 570.0, 522.0, 519.0, 522.0, 444.0, 570.0, 570.0, 525.0, 519.0, 525.0, 516.0, 479.0, 522.0, 525.0, 513.0, 516.0, 570.0, 462.0, 573.0, 530.0, 492.0, 522.0, 519.0, 479.0, 516.0, 449.0, 507.0, 573.0, 504.0, 492.0, 424.0, 536.0, 573.0, 478.0, 576.0, 522.0, 522.0, 519.0, 567.0, 401.0, 490.0, 522.0, 513.0, 522.0, 245.0, 521.0, 533.0, 570.0, 573.0, 516.0, 464.0, 576.0, 573.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 260.0, 251.0, 265.0, 239.0, 228.0, 266.0, 264.0, 255.0, 252.0, 253.0, 260.0, 254.0, 265.0, 285.0, 273.0, 244.0, 260.0, 270.0, 252.0, 247.0, 254.0, 270.0, 255.0, 260.0, 265.0, 265.0, 259.0, 258.0, 261.0, 264.0, 249.0, 268.0, 245.0, 268.0, 257.0, 251.0, 265.0, 276.0, 291.0, 253.0, 257.0, 263.0, 258.0, 264.0, 249.0, 264.0, 258.0, 264.0, 254.0, 288.0, 288.0, 266.0, 264.0, 262.0, 265.0, 241.0, 218.0, 258.0, 267.0, 279.0, 291.0, 249.0, 276.0, 261.0, 261.0, 254.0, 259.0, 277.0, 293.0, 255.0, 270.0, 288.0, 282.0, 263.0, 262.0, 187.0, 174.0, 246.0, 279.0, 256.0, 268.0, 230.0, 235.0, 234.0, 227.0, 265.0, 260.0, 89.0, 85.0, 262.0, 262.0, 288.0, 282.0, 262.0, 260.0, 258.0, 261.0, 257.0, 265.0, 224.0, 220.0, 287.0, 283.0, 305.0, 265.0, 264.0, 261.0, 259.0, 260.0, 267.0, 258.0, 259.0, 257.0, 236.0, 243.0, 278.0, 244.0, 262.0, 263.0, 248.0, 265.0, 258.0, 258.0, 285.0, 285.0, 237.0, 225.0, 286.0, 287.0, 272.0, 258.0, 248.0, 244.0, 267.0, 255.0, 254.0, 265.0, 241.0, 238.0, 257.0, 259.0, 219.0, 230.0, 263.0, 244.0, 289.0, 284.0, 241.0, 263.0, 247.0, 245.0, 214.0, 210.0, 263.0, 273.0, 272.0, 301.0, 230.0, 248.0, 291.0, 285.0, 275.0, 247.0, 260.0, 262.0, 259.0, 260.0, 279.0, 288.0, 209.0, 192.0, 244.0, 246.0, 254.0, 268.0, 241.0, 272.0, 267.0, 255.0, 122.0, 123.0, 264.0, 257.0, 270.0, 263.0, 303.0, 267.0, 287.0, 286.0, 249.0, 267.0, 234.0, 230.0, 294.0, 282.0, 292.0, 281.0, 256.0, 266.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7633797791154415, "mean_processing_ms": 0.42007059281459463, "mean_inference_ms": 2.3128736955847145}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2760000, "num_steps_sampled": 1472000, "sample_time_ms": 20145.685, "load_time_ms": 37.355, "grad_time_ms": 9802.708, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 9.629649865429142e-36, "cur_lr": 0.0010000000474974513, "total_loss": -0.001834428054280579, "policy_loss": -0.008199676871299744, "vf_loss": 69.51961517333984, "vf_explained_var": 0.7745820879936218, "kl": 0.002087961183860898, "entropy": 1.1734023094177246, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1472000, "episodes_total": 3680, "training_iteration": 115, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-42-15", "timestamp": 1660250535, "time_this_iter_s": 28.112826824188232, "time_total_s": 8954.017127990723, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8954.017127990723, "timesteps_since_restore": 1472000, "iterations_since_restore": 115, "perf": {"cpu_util_percent": 35.5225, "ram_util_percent": 58.23499999999999}}
-{"episode_reward_max": 582.0, "episode_reward_min": 180.0, "episode_reward_mean": 512.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 75.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 256.435}, "custom_metrics": {"sparse_reward_mean": 178.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.47, "shaped_reward_min": 60, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.4, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.32, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.79, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.8, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.94, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.04, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.97, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.18, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.95, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.24, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.15, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.8, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.44, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.65, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.4, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.04, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.97, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.04, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.97, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 504.0, 522.0, 339.0, 465.0, 416.0, 516.0, 476.0, 573.0, 519.0, 530.0, 180.0, 570.0, 570.0, 470.0, 573.0, 456.0, 573.0, 513.0, 501.0, 567.0, 579.0, 519.0, 518.0, 582.0, 519.0, 564.0, 504.0, 582.0, 461.0, 456.0, 501.0, 573.0, 530.0, 492.0, 522.0, 519.0, 479.0, 516.0, 449.0, 507.0, 573.0, 504.0, 492.0, 424.0, 536.0, 573.0, 478.0, 576.0, 522.0, 522.0, 519.0, 567.0, 401.0, 490.0, 522.0, 513.0, 522.0, 245.0, 521.0, 533.0, 570.0, 573.0, 516.0, 464.0, 576.0, 573.0, 522.0, 522.0, 516.0, 467.0, 530.0, 507.0, 513.0, 519.0, 558.0, 504.0, 522.0, 501.0, 525.0, 525.0, 524.0, 519.0, 513.0, 513.0, 525.0, 516.0, 567.0, 510.0, 521.0, 513.0, 522.0, 518.0, 576.0, 530.0, 527.0, 459.0, 525.0, 570.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 290.0, 254.0, 250.0, 251.0, 271.0, 161.0, 178.0, 228.0, 237.0, 211.0, 205.0, 248.0, 268.0, 241.0, 235.0, 300.0, 273.0, 271.0, 248.0, 269.0, 261.0, 105.0, 75.0, 287.0, 283.0, 289.0, 281.0, 248.0, 222.0, 283.0, 290.0, 216.0, 240.0, 289.0, 284.0, 261.0, 252.0, 248.0, 253.0, 266.0, 301.0, 284.0, 295.0, 268.0, 251.0, 245.0, 273.0, 291.0, 291.0, 257.0, 262.0, 277.0, 287.0, 258.0, 246.0, 287.0, 295.0, 234.0, 227.0, 233.0, 223.0, 248.0, 253.0, 286.0, 287.0, 272.0, 258.0, 248.0, 244.0, 267.0, 255.0, 254.0, 265.0, 241.0, 238.0, 257.0, 259.0, 219.0, 230.0, 263.0, 244.0, 289.0, 284.0, 241.0, 263.0, 247.0, 245.0, 214.0, 210.0, 263.0, 273.0, 272.0, 301.0, 230.0, 248.0, 291.0, 285.0, 275.0, 247.0, 260.0, 262.0, 259.0, 260.0, 279.0, 288.0, 209.0, 192.0, 244.0, 246.0, 254.0, 268.0, 241.0, 272.0, 267.0, 255.0, 122.0, 123.0, 264.0, 257.0, 270.0, 263.0, 303.0, 267.0, 287.0, 286.0, 249.0, 267.0, 234.0, 230.0, 294.0, 282.0, 292.0, 281.0, 256.0, 266.0, 262.0, 260.0, 251.0, 265.0, 239.0, 228.0, 266.0, 264.0, 255.0, 252.0, 253.0, 260.0, 254.0, 265.0, 285.0, 273.0, 244.0, 260.0, 270.0, 252.0, 247.0, 254.0, 270.0, 255.0, 260.0, 265.0, 265.0, 259.0, 258.0, 261.0, 264.0, 249.0, 268.0, 245.0, 268.0, 257.0, 251.0, 265.0, 276.0, 291.0, 253.0, 257.0, 263.0, 258.0, 264.0, 249.0, 264.0, 258.0, 264.0, 254.0, 288.0, 288.0, 266.0, 264.0, 262.0, 265.0, 241.0, 218.0, 258.0, 267.0, 279.0, 291.0, 249.0, 276.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7518867216658078, "mean_processing_ms": 0.4177743622386954, "mean_inference_ms": 2.300854372548392}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2784000, "num_steps_sampled": 1484800, "sample_time_ms": 20021.401, "load_time_ms": 37.489, "grad_time_ms": 9776.788, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 4.814824932714571e-36, "cur_lr": 0.0010000000474974513, "total_loss": 0.00206244015134871, "policy_loss": -0.005156705155968666, "vf_loss": 78.12344360351562, "vf_explained_var": 0.7715883851051331, "kl": 0.0026745833456516266, "entropy": 1.1864006519317627, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1484800, "episodes_total": 3712, "training_iteration": 116, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-42-45", "timestamp": 1660250565, "time_this_iter_s": 29.3955659866333, "time_total_s": 8983.412693977356, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8983.412693977356, "timesteps_since_restore": 1484800, "iterations_since_restore": 116, "perf": {"cpu_util_percent": 35.66904761904762, "ram_util_percent": 58.25}}
-{"episode_reward_max": 582.0, "episode_reward_min": 180.0, "episode_reward_mean": 513.52, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 75.0}, "policy_reward_max": {"ppo": 301.0}, "policy_reward_mean": {"ppo": 256.76}, "custom_metrics": {"sparse_reward_mean": 178.4, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.72, "shaped_reward_min": 60, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.34, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.93, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.72, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.0, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.55, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.54, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.11, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.0, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.81, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.48, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.12, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.76, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.96, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.79, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.19, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.19, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.67, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.53, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.11, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.0, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.11, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.0, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 530.0, 519.0, 455.0, 222.0, 524.0, 519.0, 525.0, 519.0, 576.0, 467.0, 522.0, 579.0, 570.0, 522.0, 512.0, 510.0, 495.0, 522.0, 573.0, 522.0, 573.0, 525.0, 576.0, 482.0, 522.0, 484.0, 525.0, 456.0, 461.0, 516.0, 522.0, 464.0, 576.0, 573.0, 522.0, 522.0, 516.0, 467.0, 530.0, 507.0, 513.0, 519.0, 558.0, 504.0, 522.0, 501.0, 525.0, 525.0, 524.0, 519.0, 513.0, 513.0, 525.0, 516.0, 567.0, 510.0, 521.0, 513.0, 522.0, 518.0, 576.0, 530.0, 527.0, 459.0, 525.0, 570.0, 525.0, 573.0, 504.0, 522.0, 339.0, 465.0, 416.0, 516.0, 476.0, 573.0, 519.0, 530.0, 180.0, 570.0, 570.0, 470.0, 573.0, 456.0, 573.0, 513.0, 501.0, 567.0, 579.0, 519.0, 518.0, 582.0, 519.0, 564.0, 504.0, 582.0, 461.0, 456.0, 501.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 265.0, 273.0, 257.0, 254.0, 265.0, 220.0, 235.0, 105.0, 117.0, 259.0, 265.0, 265.0, 254.0, 260.0, 265.0, 261.0, 258.0, 278.0, 298.0, 227.0, 240.0, 253.0, 269.0, 286.0, 293.0, 290.0, 280.0, 266.0, 256.0, 252.0, 260.0, 252.0, 258.0, 237.0, 258.0, 264.0, 258.0, 277.0, 296.0, 268.0, 254.0, 285.0, 288.0, 270.0, 255.0, 279.0, 297.0, 231.0, 251.0, 257.0, 265.0, 237.0, 247.0, 263.0, 262.0, 221.0, 235.0, 234.0, 227.0, 260.0, 256.0, 262.0, 260.0, 234.0, 230.0, 294.0, 282.0, 292.0, 281.0, 256.0, 266.0, 262.0, 260.0, 251.0, 265.0, 239.0, 228.0, 266.0, 264.0, 255.0, 252.0, 253.0, 260.0, 254.0, 265.0, 285.0, 273.0, 244.0, 260.0, 270.0, 252.0, 247.0, 254.0, 270.0, 255.0, 260.0, 265.0, 265.0, 259.0, 258.0, 261.0, 264.0, 249.0, 268.0, 245.0, 268.0, 257.0, 251.0, 265.0, 276.0, 291.0, 253.0, 257.0, 263.0, 258.0, 264.0, 249.0, 264.0, 258.0, 264.0, 254.0, 288.0, 288.0, 266.0, 264.0, 262.0, 265.0, 241.0, 218.0, 258.0, 267.0, 279.0, 291.0, 249.0, 276.0, 283.0, 290.0, 254.0, 250.0, 251.0, 271.0, 161.0, 178.0, 228.0, 237.0, 211.0, 205.0, 248.0, 268.0, 241.0, 235.0, 300.0, 273.0, 271.0, 248.0, 269.0, 261.0, 105.0, 75.0, 287.0, 283.0, 289.0, 281.0, 248.0, 222.0, 283.0, 290.0, 216.0, 240.0, 289.0, 284.0, 261.0, 252.0, 248.0, 253.0, 266.0, 301.0, 284.0, 295.0, 268.0, 251.0, 245.0, 273.0, 291.0, 291.0, 257.0, 262.0, 277.0, 287.0, 258.0, 246.0, 287.0, 295.0, 234.0, 227.0, 233.0, 223.0, 248.0, 253.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7406015862798738, "mean_processing_ms": 0.4155246745297136, "mean_inference_ms": 2.2890629120226706}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2808000, "num_steps_sampled": 1497600, "sample_time_ms": 20042.808, "load_time_ms": 37.283, "grad_time_ms": 9721.955, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.4074124663572855e-36, "cur_lr": 0.0010000000474974513, "total_loss": 0.00018632395949680358, "policy_loss": -0.007065422832965851, "vf_loss": 78.37664794921875, "vf_explained_var": 0.7705362439155579, "kl": 0.0016083299415186048, "entropy": 1.171847939491272, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1497600, "episodes_total": 3744, "training_iteration": 117, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-43-15", "timestamp": 1660250595, "time_this_iter_s": 30.259077787399292, "time_total_s": 9013.671771764755, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9013.671771764755, "timesteps_since_restore": 1497600, "iterations_since_restore": 117, "perf": {"cpu_util_percent": 36.71627906976744, "ram_util_percent": 58.1906976744186}}
-{"episode_reward_max": 582.0, "episode_reward_min": 180.0, "episode_reward_mean": 512.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 75.0}, "policy_reward_max": {"ppo": 301.0}, "policy_reward_mean": {"ppo": 256.43}, "custom_metrics": {"sparse_reward_mean": 178.0, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.86, "shaped_reward_min": 60, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.66, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.32, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.99, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.63, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.98, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.6, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.55, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 14.1, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.01, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.63, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.57, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.07, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.16, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.58, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.45, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.53, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.1, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.01, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.1, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.01, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 516.0, 510.0, 525.0, 570.0, 468.0, 495.0, 525.0, 519.0, 519.0, 522.0, 579.0, 579.0, 501.0, 527.0, 530.0, 525.0, 573.0, 525.0, 522.0, 522.0, 527.0, 444.0, 515.0, 462.0, 533.0, 525.0, 519.0, 519.0, 527.0, 504.0, 515.0, 459.0, 525.0, 570.0, 525.0, 573.0, 504.0, 522.0, 339.0, 465.0, 416.0, 516.0, 476.0, 573.0, 519.0, 530.0, 180.0, 570.0, 570.0, 470.0, 573.0, 456.0, 573.0, 513.0, 501.0, 567.0, 579.0, 519.0, 518.0, 582.0, 519.0, 564.0, 504.0, 582.0, 461.0, 456.0, 501.0, 519.0, 530.0, 519.0, 455.0, 222.0, 524.0, 519.0, 525.0, 519.0, 576.0, 467.0, 522.0, 579.0, 570.0, 522.0, 512.0, 510.0, 495.0, 522.0, 573.0, 522.0, 573.0, 525.0, 576.0, 482.0, 522.0, 484.0, 525.0, 456.0, 461.0, 516.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [263.0, 267.0, 265.0, 251.0, 249.0, 261.0, 265.0, 260.0, 283.0, 287.0, 227.0, 241.0, 239.0, 256.0, 260.0, 265.0, 256.0, 263.0, 269.0, 250.0, 263.0, 259.0, 282.0, 297.0, 288.0, 291.0, 249.0, 252.0, 273.0, 254.0, 275.0, 255.0, 276.0, 249.0, 281.0, 292.0, 256.0, 269.0, 267.0, 255.0, 253.0, 269.0, 265.0, 262.0, 220.0, 224.0, 258.0, 257.0, 218.0, 244.0, 263.0, 270.0, 262.0, 263.0, 251.0, 268.0, 267.0, 252.0, 261.0, 266.0, 253.0, 251.0, 251.0, 264.0, 241.0, 218.0, 258.0, 267.0, 279.0, 291.0, 249.0, 276.0, 283.0, 290.0, 254.0, 250.0, 251.0, 271.0, 161.0, 178.0, 228.0, 237.0, 211.0, 205.0, 248.0, 268.0, 241.0, 235.0, 300.0, 273.0, 271.0, 248.0, 269.0, 261.0, 105.0, 75.0, 287.0, 283.0, 289.0, 281.0, 248.0, 222.0, 283.0, 290.0, 216.0, 240.0, 289.0, 284.0, 261.0, 252.0, 248.0, 253.0, 266.0, 301.0, 284.0, 295.0, 268.0, 251.0, 245.0, 273.0, 291.0, 291.0, 257.0, 262.0, 277.0, 287.0, 258.0, 246.0, 287.0, 295.0, 234.0, 227.0, 233.0, 223.0, 248.0, 253.0, 254.0, 265.0, 273.0, 257.0, 254.0, 265.0, 220.0, 235.0, 105.0, 117.0, 259.0, 265.0, 265.0, 254.0, 260.0, 265.0, 261.0, 258.0, 278.0, 298.0, 227.0, 240.0, 253.0, 269.0, 286.0, 293.0, 290.0, 280.0, 266.0, 256.0, 252.0, 260.0, 252.0, 258.0, 237.0, 258.0, 264.0, 258.0, 277.0, 296.0, 268.0, 254.0, 285.0, 288.0, 270.0, 255.0, 279.0, 297.0, 231.0, 251.0, 257.0, 265.0, 237.0, 247.0, 263.0, 262.0, 221.0, 235.0, 234.0, 227.0, 260.0, 256.0, 262.0, 260.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7295179891011523, "mean_processing_ms": 0.4133177002223833, "mean_inference_ms": 2.2776485040760637}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2832000, "num_steps_sampled": 1510400, "sample_time_ms": 20089.612, "load_time_ms": 37.29, "grad_time_ms": 9589.898, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.2037062331786428e-36, "cur_lr": 0.0010000000474974513, "total_loss": 0.0031413733959198, "policy_loss": -0.00419240677729249, "vf_loss": 79.22246551513672, "vf_explained_var": 0.7654686570167542, "kl": 0.0017640552250668406, "entropy": 1.1769217252731323, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1510400, "episodes_total": 3776, "training_iteration": 118, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-43-44", "timestamp": 1660250624, "time_this_iter_s": 28.596869230270386, "time_total_s": 9042.268640995026, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9042.268640995026, "timesteps_since_restore": 1510400, "iterations_since_restore": 118, "perf": {"cpu_util_percent": 34.160000000000004, "ram_util_percent": 58.245000000000005}}
-{"episode_reward_max": 582.0, "episode_reward_min": 222.0, "episode_reward_mean": 517.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 105.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 258.905}, "custom_metrics": {"sparse_reward_mean": 179.4, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 159.01, "shaped_reward_min": 62, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.89, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.83, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.07, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 1.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.64, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.8, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.55, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.76, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.4, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.87, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.77, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.84, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.46, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.7, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.36, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.8, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.55, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.8, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.55, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 573.0, 570.0, 525.0, 507.0, 558.0, 522.0, 465.0, 525.0, 522.0, 519.0, 507.0, 519.0, 525.0, 482.0, 576.0, 522.0, 484.0, 530.0, 507.0, 516.0, 462.0, 530.0, 525.0, 561.0, 519.0, 573.0, 530.0, 519.0, 464.0, 533.0, 525.0, 582.0, 461.0, 456.0, 501.0, 519.0, 530.0, 519.0, 455.0, 222.0, 524.0, 519.0, 525.0, 519.0, 576.0, 467.0, 522.0, 579.0, 570.0, 522.0, 512.0, 510.0, 495.0, 522.0, 573.0, 522.0, 573.0, 525.0, 576.0, 482.0, 522.0, 484.0, 525.0, 456.0, 461.0, 516.0, 522.0, 530.0, 516.0, 510.0, 525.0, 570.0, 468.0, 495.0, 525.0, 519.0, 519.0, 522.0, 579.0, 579.0, 501.0, 527.0, 530.0, 525.0, 573.0, 525.0, 522.0, 522.0, 527.0, 444.0, 515.0, 462.0, 533.0, 525.0, 519.0, 519.0, 527.0, 504.0, 515.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 294.0, 293.0, 280.0, 284.0, 286.0, 275.0, 250.0, 246.0, 261.0, 279.0, 279.0, 281.0, 241.0, 223.0, 242.0, 255.0, 270.0, 258.0, 264.0, 268.0, 251.0, 244.0, 263.0, 256.0, 263.0, 255.0, 270.0, 253.0, 229.0, 282.0, 294.0, 258.0, 264.0, 238.0, 246.0, 264.0, 266.0, 259.0, 248.0, 263.0, 253.0, 232.0, 230.0, 270.0, 260.0, 256.0, 269.0, 276.0, 285.0, 261.0, 258.0, 303.0, 270.0, 270.0, 260.0, 254.0, 265.0, 231.0, 233.0, 262.0, 271.0, 260.0, 265.0, 287.0, 295.0, 234.0, 227.0, 233.0, 223.0, 248.0, 253.0, 254.0, 265.0, 273.0, 257.0, 254.0, 265.0, 220.0, 235.0, 105.0, 117.0, 259.0, 265.0, 265.0, 254.0, 260.0, 265.0, 261.0, 258.0, 278.0, 298.0, 227.0, 240.0, 253.0, 269.0, 286.0, 293.0, 290.0, 280.0, 266.0, 256.0, 252.0, 260.0, 252.0, 258.0, 237.0, 258.0, 264.0, 258.0, 277.0, 296.0, 268.0, 254.0, 285.0, 288.0, 270.0, 255.0, 279.0, 297.0, 231.0, 251.0, 257.0, 265.0, 237.0, 247.0, 263.0, 262.0, 221.0, 235.0, 234.0, 227.0, 260.0, 256.0, 262.0, 260.0, 263.0, 267.0, 265.0, 251.0, 249.0, 261.0, 265.0, 260.0, 283.0, 287.0, 227.0, 241.0, 239.0, 256.0, 260.0, 265.0, 256.0, 263.0, 269.0, 250.0, 263.0, 259.0, 282.0, 297.0, 288.0, 291.0, 249.0, 252.0, 273.0, 254.0, 275.0, 255.0, 276.0, 249.0, 281.0, 292.0, 256.0, 269.0, 267.0, 255.0, 253.0, 269.0, 265.0, 262.0, 220.0, 224.0, 258.0, 257.0, 218.0, 244.0, 263.0, 270.0, 262.0, 263.0, 251.0, 268.0, 267.0, 252.0, 261.0, 266.0, 253.0, 251.0, 251.0, 264.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7186253925564765, "mean_processing_ms": 0.4111520953158576, "mean_inference_ms": 2.2666653584244876}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2856000, "num_steps_sampled": 1523200, "sample_time_ms": 20187.204, "load_time_ms": 37.338, "grad_time_ms": 9575.103, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 6.018531165893214e-37, "cur_lr": 0.0010000000474974513, "total_loss": 0.002520867856219411, "policy_loss": -0.0053411815315485, "vf_loss": 84.43938446044922, "vf_explained_var": 0.7439851760864258, "kl": 0.0023064902052283287, "entropy": 1.1637717485427856, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1523200, "episodes_total": 3808, "training_iteration": 119, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-44-14", "timestamp": 1660250654, "time_this_iter_s": 30.140514850616455, "time_total_s": 9072.409155845642, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9072.409155845642, "timesteps_since_restore": 1523200, "iterations_since_restore": 119, "perf": {"cpu_util_percent": 32.737209302325574, "ram_util_percent": 58.15116279069766}}
-{"episode_reward_max": 582.0, "episode_reward_min": 398.0, "episode_reward_mean": 521.43, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 191.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 260.715}, "custom_metrics": {"sparse_reward_mean": 180.8, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 159.83, "shaped_reward_min": 118, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.01, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.86, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.2, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 1.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.62, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.87, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.65, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.74, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.78, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.16, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.13, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.07, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.31, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.95, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.19, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.87, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.65, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.87, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.65, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [510.0, 576.0, 507.0, 527.0, 570.0, 510.0, 501.0, 522.0, 570.0, 516.0, 467.0, 518.0, 507.0, 579.0, 530.0, 570.0, 398.0, 582.0, 573.0, 433.0, 530.0, 570.0, 516.0, 524.0, 399.0, 570.0, 533.0, 573.0, 522.0, 525.0, 501.0, 522.0, 456.0, 461.0, 516.0, 522.0, 530.0, 516.0, 510.0, 525.0, 570.0, 468.0, 495.0, 525.0, 519.0, 519.0, 522.0, 579.0, 579.0, 501.0, 527.0, 530.0, 525.0, 573.0, 525.0, 522.0, 522.0, 527.0, 444.0, 515.0, 462.0, 533.0, 525.0, 519.0, 519.0, 527.0, 504.0, 515.0, 570.0, 573.0, 570.0, 525.0, 507.0, 558.0, 522.0, 465.0, 525.0, 522.0, 519.0, 507.0, 519.0, 525.0, 482.0, 576.0, 522.0, 484.0, 530.0, 507.0, 516.0, 462.0, 530.0, 525.0, 561.0, 519.0, 573.0, 530.0, 519.0, 464.0, 533.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [249.0, 261.0, 286.0, 290.0, 251.0, 256.0, 280.0, 247.0, 287.0, 283.0, 259.0, 251.0, 249.0, 252.0, 257.0, 265.0, 286.0, 284.0, 256.0, 260.0, 236.0, 231.0, 265.0, 253.0, 262.0, 245.0, 280.0, 299.0, 276.0, 254.0, 301.0, 269.0, 205.0, 193.0, 291.0, 291.0, 292.0, 281.0, 216.0, 217.0, 283.0, 247.0, 292.0, 278.0, 272.0, 244.0, 255.0, 269.0, 208.0, 191.0, 274.0, 296.0, 271.0, 262.0, 278.0, 295.0, 252.0, 270.0, 263.0, 262.0, 255.0, 246.0, 249.0, 273.0, 221.0, 235.0, 234.0, 227.0, 260.0, 256.0, 262.0, 260.0, 263.0, 267.0, 265.0, 251.0, 249.0, 261.0, 265.0, 260.0, 283.0, 287.0, 227.0, 241.0, 239.0, 256.0, 260.0, 265.0, 256.0, 263.0, 269.0, 250.0, 263.0, 259.0, 282.0, 297.0, 288.0, 291.0, 249.0, 252.0, 273.0, 254.0, 275.0, 255.0, 276.0, 249.0, 281.0, 292.0, 256.0, 269.0, 267.0, 255.0, 253.0, 269.0, 265.0, 262.0, 220.0, 224.0, 258.0, 257.0, 218.0, 244.0, 263.0, 270.0, 262.0, 263.0, 251.0, 268.0, 267.0, 252.0, 261.0, 266.0, 253.0, 251.0, 251.0, 264.0, 276.0, 294.0, 293.0, 280.0, 284.0, 286.0, 275.0, 250.0, 246.0, 261.0, 279.0, 279.0, 281.0, 241.0, 223.0, 242.0, 255.0, 270.0, 258.0, 264.0, 268.0, 251.0, 244.0, 263.0, 256.0, 263.0, 255.0, 270.0, 253.0, 229.0, 282.0, 294.0, 258.0, 264.0, 238.0, 246.0, 264.0, 266.0, 259.0, 248.0, 263.0, 253.0, 232.0, 230.0, 270.0, 260.0, 256.0, 269.0, 276.0, 285.0, 261.0, 258.0, 303.0, 270.0, 270.0, 260.0, 254.0, 265.0, 231.0, 233.0, 262.0, 271.0, 260.0, 265.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7079238505690706, "mean_processing_ms": 0.40902061791269845, "mean_inference_ms": 2.2560343134520804}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2880000, "num_steps_sampled": 1536000, "sample_time_ms": 20247.355, "load_time_ms": 37.42, "grad_time_ms": 9479.74, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.009265582946607e-37, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019442923367023468, "policy_loss": -0.005335395690053701, "vf_loss": 78.6177749633789, "vf_explained_var": 0.7651795744895935, "kl": 0.0017719753086566925, "entropy": 1.164175033569336, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1536000, "episodes_total": 3840, "training_iteration": 120, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-44-44", "timestamp": 1660250684, "time_this_iter_s": 30.042346954345703, "time_total_s": 9102.451502799988, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9102.451502799988, "timesteps_since_restore": 1536000, "iterations_since_restore": 120, "perf": {"cpu_util_percent": 33.52142857142857, "ram_util_percent": 58.276190476190465}}
-{"episode_reward_max": 582.0, "episode_reward_min": 365.0, "episode_reward_mean": 523.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 180.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 261.805}, "custom_metrics": {"sparse_reward_mean": 181.6, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 160.41, "shaped_reward_min": 118, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.47, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.88, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.39, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 1.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.52, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.64, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.93, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.7, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.21, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.61, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 3.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.25, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.28, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.16, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.93, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.7, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.93, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.7, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [564.0, 579.0, 507.0, 530.0, 525.0, 365.0, 518.0, 522.0, 501.0, 525.0, 515.0, 576.0, 582.0, 530.0, 507.0, 522.0, 530.0, 522.0, 456.0, 504.0, 582.0, 579.0, 519.0, 533.0, 573.0, 525.0, 498.0, 567.0, 573.0, 450.0, 479.0, 522.0, 519.0, 527.0, 504.0, 515.0, 570.0, 573.0, 570.0, 525.0, 507.0, 558.0, 522.0, 465.0, 525.0, 522.0, 519.0, 507.0, 519.0, 525.0, 482.0, 576.0, 522.0, 484.0, 530.0, 507.0, 516.0, 462.0, 530.0, 525.0, 561.0, 519.0, 573.0, 530.0, 519.0, 464.0, 533.0, 525.0, 510.0, 576.0, 507.0, 527.0, 570.0, 510.0, 501.0, 522.0, 570.0, 516.0, 467.0, 518.0, 507.0, 579.0, 530.0, 570.0, 398.0, 582.0, 573.0, 433.0, 530.0, 570.0, 516.0, 524.0, 399.0, 570.0, 533.0, 573.0, 522.0, 525.0, 501.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 272.0, 290.0, 289.0, 251.0, 256.0, 252.0, 278.0, 263.0, 262.0, 185.0, 180.0, 260.0, 258.0, 254.0, 268.0, 247.0, 254.0, 265.0, 260.0, 262.0, 253.0, 290.0, 286.0, 296.0, 286.0, 257.0, 273.0, 255.0, 252.0, 265.0, 257.0, 265.0, 265.0, 274.0, 248.0, 218.0, 238.0, 260.0, 244.0, 291.0, 291.0, 293.0, 286.0, 260.0, 259.0, 270.0, 263.0, 285.0, 288.0, 279.0, 246.0, 252.0, 246.0, 275.0, 292.0, 287.0, 286.0, 218.0, 232.0, 248.0, 231.0, 260.0, 262.0, 267.0, 252.0, 261.0, 266.0, 253.0, 251.0, 251.0, 264.0, 276.0, 294.0, 293.0, 280.0, 284.0, 286.0, 275.0, 250.0, 246.0, 261.0, 279.0, 279.0, 281.0, 241.0, 223.0, 242.0, 255.0, 270.0, 258.0, 264.0, 268.0, 251.0, 244.0, 263.0, 256.0, 263.0, 255.0, 270.0, 253.0, 229.0, 282.0, 294.0, 258.0, 264.0, 238.0, 246.0, 264.0, 266.0, 259.0, 248.0, 263.0, 253.0, 232.0, 230.0, 270.0, 260.0, 256.0, 269.0, 276.0, 285.0, 261.0, 258.0, 303.0, 270.0, 270.0, 260.0, 254.0, 265.0, 231.0, 233.0, 262.0, 271.0, 260.0, 265.0, 249.0, 261.0, 286.0, 290.0, 251.0, 256.0, 280.0, 247.0, 287.0, 283.0, 259.0, 251.0, 249.0, 252.0, 257.0, 265.0, 286.0, 284.0, 256.0, 260.0, 236.0, 231.0, 265.0, 253.0, 262.0, 245.0, 280.0, 299.0, 276.0, 254.0, 301.0, 269.0, 205.0, 193.0, 291.0, 291.0, 292.0, 281.0, 216.0, 217.0, 283.0, 247.0, 292.0, 278.0, 272.0, 244.0, 255.0, 269.0, 208.0, 191.0, 274.0, 296.0, 271.0, 262.0, 278.0, 295.0, 252.0, 270.0, 263.0, 262.0, 255.0, 246.0, 249.0, 273.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6974210688663596, "mean_processing_ms": 0.40692675131976414, "mean_inference_ms": 2.2458724940047134}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2904000, "num_steps_sampled": 1548800, "sample_time_ms": 20389.537, "load_time_ms": 37.524, "grad_time_ms": 9428.918, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.5046327914733034e-37, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008685672655701637, "policy_loss": -0.007173554971814156, "vf_loss": 86.26753997802734, "vf_explained_var": 0.7487472891807556, "kl": 0.001581608667038381, "entropy": 1.1692520380020142, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1548800, "episodes_total": 3872, "training_iteration": 121, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-45-15", "timestamp": 1660250715, "time_this_iter_s": 30.52119469642639, "time_total_s": 9132.972697496414, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9132.972697496414, "timesteps_since_restore": 1548800, "iterations_since_restore": 121, "perf": {"cpu_util_percent": 33.260465116279065, "ram_util_percent": 58.28837209302325}}
-{"episode_reward_max": 582.0, "episode_reward_min": 123.0, "episode_reward_mean": 521.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 58.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 260.89}, "custom_metrics": {"sparse_reward_mean": 180.8, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 160.18, "shaped_reward_min": 43, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.75, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.91, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.02, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.03, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.32, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 0.7, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.67, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 14.12, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.46, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.01, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.55, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 3.78, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.66, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.21, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.17, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.0, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.35, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.94, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.25, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.12, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.46, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.12, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.46, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 522.0, 522.0, 482.0, 522.0, 579.0, 573.0, 527.0, 530.0, 522.0, 123.0, 519.0, 570.0, 525.0, 470.0, 522.0, 573.0, 492.0, 570.0, 576.0, 465.0, 411.0, 525.0, 525.0, 579.0, 510.0, 516.0, 573.0, 525.0, 536.0, 573.0, 573.0, 519.0, 464.0, 533.0, 525.0, 510.0, 576.0, 507.0, 527.0, 570.0, 510.0, 501.0, 522.0, 570.0, 516.0, 467.0, 518.0, 507.0, 579.0, 530.0, 570.0, 398.0, 582.0, 573.0, 433.0, 530.0, 570.0, 516.0, 524.0, 399.0, 570.0, 533.0, 573.0, 522.0, 525.0, 501.0, 522.0, 564.0, 579.0, 507.0, 530.0, 525.0, 365.0, 518.0, 522.0, 501.0, 525.0, 515.0, 576.0, 582.0, 530.0, 507.0, 522.0, 530.0, 522.0, 456.0, 504.0, 582.0, 579.0, 519.0, 533.0, 573.0, 525.0, 498.0, 567.0, 573.0, 450.0, 479.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 267.0, 255.0, 272.0, 250.0, 246.0, 236.0, 273.0, 249.0, 283.0, 296.0, 298.0, 275.0, 264.0, 263.0, 266.0, 264.0, 255.0, 267.0, 58.0, 65.0, 249.0, 270.0, 298.0, 272.0, 261.0, 264.0, 245.0, 225.0, 252.0, 270.0, 276.0, 297.0, 243.0, 249.0, 273.0, 297.0, 292.0, 284.0, 231.0, 234.0, 201.0, 210.0, 269.0, 256.0, 262.0, 263.0, 287.0, 292.0, 265.0, 245.0, 248.0, 268.0, 292.0, 281.0, 259.0, 266.0, 273.0, 263.0, 271.0, 302.0, 295.0, 278.0, 254.0, 265.0, 231.0, 233.0, 262.0, 271.0, 260.0, 265.0, 249.0, 261.0, 286.0, 290.0, 251.0, 256.0, 280.0, 247.0, 287.0, 283.0, 259.0, 251.0, 249.0, 252.0, 257.0, 265.0, 286.0, 284.0, 256.0, 260.0, 236.0, 231.0, 265.0, 253.0, 262.0, 245.0, 280.0, 299.0, 276.0, 254.0, 301.0, 269.0, 205.0, 193.0, 291.0, 291.0, 292.0, 281.0, 216.0, 217.0, 283.0, 247.0, 292.0, 278.0, 272.0, 244.0, 255.0, 269.0, 208.0, 191.0, 274.0, 296.0, 271.0, 262.0, 278.0, 295.0, 252.0, 270.0, 263.0, 262.0, 255.0, 246.0, 249.0, 273.0, 292.0, 272.0, 290.0, 289.0, 251.0, 256.0, 252.0, 278.0, 263.0, 262.0, 185.0, 180.0, 260.0, 258.0, 254.0, 268.0, 247.0, 254.0, 265.0, 260.0, 262.0, 253.0, 290.0, 286.0, 296.0, 286.0, 257.0, 273.0, 255.0, 252.0, 265.0, 257.0, 265.0, 265.0, 274.0, 248.0, 218.0, 238.0, 260.0, 244.0, 291.0, 291.0, 293.0, 286.0, 260.0, 259.0, 270.0, 263.0, 285.0, 288.0, 279.0, 246.0, 252.0, 246.0, 275.0, 292.0, 287.0, 286.0, 218.0, 232.0, 248.0, 231.0, 260.0, 262.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6870995124483497, "mean_processing_ms": 0.4048687165306798, "mean_inference_ms": 2.2359032456104564}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2928000, "num_steps_sampled": 1561600, "sample_time_ms": 20397.642, "load_time_ms": 37.125, "grad_time_ms": 9297.193, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 7.523163957366517e-38, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016971243312582374, "policy_loss": -0.006050370167940855, "vf_loss": 83.2496109008789, "vf_explained_var": 0.7647652626037598, "kl": 0.0023221501614898443, "entropy": 1.154932975769043, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1561600, "episodes_total": 3904, "training_iteration": 122, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-45-44", "timestamp": 1660250744, "time_this_iter_s": 29.548327922821045, "time_total_s": 9162.521025419235, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9162.521025419235, "timesteps_since_restore": 1561600, "iterations_since_restore": 122, "perf": {"cpu_util_percent": 34.20476190476191, "ram_util_percent": 58.335714285714296}}
-{"episode_reward_max": 582.0, "episode_reward_min": 123.0, "episode_reward_mean": 527.77, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 58.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 263.885}, "custom_metrics": {"sparse_reward_mean": 182.6, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 162.57, "shaped_reward_min": 43, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.77, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.84, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.97, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.03, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 0.97, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 0.64, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.53, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 14.24, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.6, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.15, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.63, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.07, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.98, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.48, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.91, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.38, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.24, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.6, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.24, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.6, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [536.0, 573.0, 564.0, 532.0, 527.0, 579.0, 573.0, 519.0, 530.0, 576.0, 576.0, 576.0, 573.0, 576.0, 476.0, 516.0, 501.0, 522.0, 498.0, 576.0, 527.0, 482.0, 519.0, 576.0, 533.0, 570.0, 522.0, 527.0, 530.0, 536.0, 527.0, 573.0, 522.0, 525.0, 501.0, 522.0, 564.0, 579.0, 507.0, 530.0, 525.0, 365.0, 518.0, 522.0, 501.0, 525.0, 515.0, 576.0, 582.0, 530.0, 507.0, 522.0, 530.0, 522.0, 456.0, 504.0, 582.0, 579.0, 519.0, 533.0, 573.0, 525.0, 498.0, 567.0, 573.0, 450.0, 479.0, 522.0, 576.0, 522.0, 522.0, 482.0, 522.0, 579.0, 573.0, 527.0, 530.0, 522.0, 123.0, 519.0, 570.0, 525.0, 470.0, 522.0, 573.0, 492.0, 570.0, 576.0, 465.0, 411.0, 525.0, 525.0, 579.0, 510.0, 516.0, 573.0, 525.0, 536.0, 573.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 258.0, 292.0, 281.0, 269.0, 295.0, 260.0, 272.0, 253.0, 274.0, 305.0, 274.0, 287.0, 286.0, 256.0, 263.0, 262.0, 268.0, 287.0, 289.0, 281.0, 295.0, 285.0, 291.0, 278.0, 295.0, 293.0, 283.0, 248.0, 228.0, 251.0, 265.0, 249.0, 252.0, 268.0, 254.0, 251.0, 247.0, 293.0, 283.0, 270.0, 257.0, 235.0, 247.0, 258.0, 261.0, 294.0, 282.0, 265.0, 268.0, 285.0, 285.0, 271.0, 251.0, 259.0, 268.0, 257.0, 273.0, 270.0, 266.0, 259.0, 268.0, 285.0, 288.0, 252.0, 270.0, 263.0, 262.0, 255.0, 246.0, 249.0, 273.0, 292.0, 272.0, 290.0, 289.0, 251.0, 256.0, 252.0, 278.0, 263.0, 262.0, 185.0, 180.0, 260.0, 258.0, 254.0, 268.0, 247.0, 254.0, 265.0, 260.0, 262.0, 253.0, 290.0, 286.0, 296.0, 286.0, 257.0, 273.0, 255.0, 252.0, 265.0, 257.0, 265.0, 265.0, 274.0, 248.0, 218.0, 238.0, 260.0, 244.0, 291.0, 291.0, 293.0, 286.0, 260.0, 259.0, 270.0, 263.0, 285.0, 288.0, 279.0, 246.0, 252.0, 246.0, 275.0, 292.0, 287.0, 286.0, 218.0, 232.0, 248.0, 231.0, 260.0, 262.0, 285.0, 291.0, 267.0, 255.0, 272.0, 250.0, 246.0, 236.0, 273.0, 249.0, 283.0, 296.0, 298.0, 275.0, 264.0, 263.0, 266.0, 264.0, 255.0, 267.0, 58.0, 65.0, 249.0, 270.0, 298.0, 272.0, 261.0, 264.0, 245.0, 225.0, 252.0, 270.0, 276.0, 297.0, 243.0, 249.0, 273.0, 297.0, 292.0, 284.0, 231.0, 234.0, 201.0, 210.0, 269.0, 256.0, 262.0, 263.0, 287.0, 292.0, 265.0, 245.0, 248.0, 268.0, 292.0, 281.0, 259.0, 266.0, 273.0, 263.0, 271.0, 302.0, 295.0, 278.0]}, "sampler_perf": {"mean_env_wait_ms": 1.676944844024091, "mean_processing_ms": 0.4028482080008365, "mean_inference_ms": 2.2261191245028336}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2952000, "num_steps_sampled": 1574400, "sample_time_ms": 20489.857, "load_time_ms": 37.388, "grad_time_ms": 9147.038, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.7615819786832586e-38, "cur_lr": 0.0010000000474974513, "total_loss": 0.004776147659868002, "policy_loss": -0.0032110288739204407, "vf_loss": 85.63726806640625, "vf_explained_var": 0.7386021614074707, "kl": 0.0019908936228603125, "entropy": 1.1530929803848267, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1574400, "episodes_total": 3936, "training_iteration": 123, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-46-15", "timestamp": 1660250775, "time_this_iter_s": 30.61848020553589, "time_total_s": 9193.139505624771, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9193.139505624771, "timesteps_since_restore": 1574400, "iterations_since_restore": 123, "perf": {"cpu_util_percent": 33.09302325581395, "ram_util_percent": 58.90232558139535}}
-{"episode_reward_max": 582.0, "episode_reward_min": 123.0, "episode_reward_mean": 532.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 58.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 266.085}, "custom_metrics": {"sparse_reward_mean": 184.2, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 163.77, "shaped_reward_min": 43, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.6, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.9, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.81, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.11, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 0.84, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 0.59, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.53, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 14.18, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.78, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.98, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.81, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.07, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.69, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.42, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.03, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.33, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.18, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.78, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.18, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.78, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 501.0, 519.0, 573.0, 579.0, 527.0, 567.0, 519.0, 579.0, 522.0, 519.0, 524.0, 570.0, 519.0, 573.0, 521.0, 576.0, 507.0, 576.0, 573.0, 455.0, 533.0, 525.0, 582.0, 579.0, 576.0, 516.0, 510.0, 522.0, 513.0, 513.0, 573.0, 573.0, 450.0, 479.0, 522.0, 576.0, 522.0, 522.0, 482.0, 522.0, 579.0, 573.0, 527.0, 530.0, 522.0, 123.0, 519.0, 570.0, 525.0, 470.0, 522.0, 573.0, 492.0, 570.0, 576.0, 465.0, 411.0, 525.0, 525.0, 579.0, 510.0, 516.0, 573.0, 525.0, 536.0, 573.0, 573.0, 536.0, 573.0, 564.0, 532.0, 527.0, 579.0, 573.0, 519.0, 530.0, 576.0, 576.0, 576.0, 573.0, 576.0, 476.0, 516.0, 501.0, 522.0, 498.0, 576.0, 527.0, 482.0, 519.0, 576.0, 533.0, 570.0, 522.0, 527.0, 530.0, 536.0, 527.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [270.0, 255.0, 250.0, 251.0, 260.0, 259.0, 289.0, 284.0, 296.0, 283.0, 279.0, 248.0, 288.0, 279.0, 256.0, 263.0, 298.0, 281.0, 264.0, 258.0, 267.0, 252.0, 260.0, 264.0, 272.0, 298.0, 253.0, 266.0, 289.0, 284.0, 271.0, 250.0, 297.0, 279.0, 243.0, 264.0, 290.0, 286.0, 288.0, 285.0, 219.0, 236.0, 270.0, 263.0, 280.0, 245.0, 293.0, 289.0, 294.0, 285.0, 270.0, 306.0, 255.0, 261.0, 263.0, 247.0, 251.0, 271.0, 255.0, 258.0, 267.0, 246.0, 284.0, 289.0, 287.0, 286.0, 218.0, 232.0, 248.0, 231.0, 260.0, 262.0, 285.0, 291.0, 267.0, 255.0, 272.0, 250.0, 246.0, 236.0, 273.0, 249.0, 283.0, 296.0, 298.0, 275.0, 264.0, 263.0, 266.0, 264.0, 255.0, 267.0, 58.0, 65.0, 249.0, 270.0, 298.0, 272.0, 261.0, 264.0, 245.0, 225.0, 252.0, 270.0, 276.0, 297.0, 243.0, 249.0, 273.0, 297.0, 292.0, 284.0, 231.0, 234.0, 201.0, 210.0, 269.0, 256.0, 262.0, 263.0, 287.0, 292.0, 265.0, 245.0, 248.0, 268.0, 292.0, 281.0, 259.0, 266.0, 273.0, 263.0, 271.0, 302.0, 295.0, 278.0, 278.0, 258.0, 292.0, 281.0, 269.0, 295.0, 260.0, 272.0, 253.0, 274.0, 305.0, 274.0, 287.0, 286.0, 256.0, 263.0, 262.0, 268.0, 287.0, 289.0, 281.0, 295.0, 285.0, 291.0, 278.0, 295.0, 293.0, 283.0, 248.0, 228.0, 251.0, 265.0, 249.0, 252.0, 268.0, 254.0, 251.0, 247.0, 293.0, 283.0, 270.0, 257.0, 235.0, 247.0, 258.0, 261.0, 294.0, 282.0, 265.0, 268.0, 285.0, 285.0, 271.0, 251.0, 259.0, 268.0, 257.0, 273.0, 270.0, 266.0, 259.0, 268.0, 285.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 1.666936267539323, "mean_processing_ms": 0.40085873681350664, "mean_inference_ms": 2.216239678267129}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2976000, "num_steps_sampled": 1587200, "sample_time_ms": 20498.249, "load_time_ms": 37.366, "grad_time_ms": 8983.735, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.8807909893416293e-38, "cur_lr": 0.0010000000474974513, "total_loss": 0.001341886818408966, "policy_loss": -0.006108943372964859, "vf_loss": 80.26326751708984, "vf_explained_var": 0.763457715511322, "kl": 0.0015635616146028042, "entropy": 1.1509909629821777, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1587200, "episodes_total": 3968, "training_iteration": 124, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-46-43", "timestamp": 1660250803, "time_this_iter_s": 28.20863699913025, "time_total_s": 9221.348142623901, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9221.348142623901, "timesteps_since_restore": 1587200, "iterations_since_restore": 124, "perf": {"cpu_util_percent": 33.417500000000004, "ram_util_percent": 58.46}}
-{"episode_reward_max": 582.0, "episode_reward_min": 450.0, "episode_reward_mean": 544.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 212.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 272.12}, "custom_metrics": {"sparse_reward_mean": 188.6, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 167.04, "shaped_reward_min": 130, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.82, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.58, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.42, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.37, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.29, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.25, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.31, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.41, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.23, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.33, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.29, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.25, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.29, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.25, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 533.0, 573.0, 579.0, 564.0, 579.0, 579.0, 570.0, 567.0, 524.0, 573.0, 573.0, 516.0, 525.0, 522.0, 450.0, 576.0, 513.0, 476.0, 573.0, 576.0, 576.0, 579.0, 573.0, 522.0, 519.0, 579.0, 539.0, 570.0, 573.0, 507.0, 573.0, 525.0, 536.0, 573.0, 573.0, 536.0, 573.0, 564.0, 532.0, 527.0, 579.0, 573.0, 519.0, 530.0, 576.0, 576.0, 576.0, 573.0, 576.0, 476.0, 516.0, 501.0, 522.0, 498.0, 576.0, 527.0, 482.0, 519.0, 576.0, 533.0, 570.0, 522.0, 527.0, 530.0, 536.0, 527.0, 573.0, 525.0, 501.0, 519.0, 573.0, 579.0, 527.0, 567.0, 519.0, 579.0, 522.0, 519.0, 524.0, 570.0, 519.0, 573.0, 521.0, 576.0, 507.0, 576.0, 573.0, 455.0, 533.0, 525.0, 582.0, 579.0, 576.0, 516.0, 510.0, 522.0, 513.0, 513.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 295.0, 270.0, 263.0, 304.0, 269.0, 285.0, 294.0, 286.0, 278.0, 279.0, 300.0, 290.0, 289.0, 284.0, 286.0, 297.0, 270.0, 281.0, 243.0, 283.0, 290.0, 288.0, 285.0, 261.0, 255.0, 257.0, 268.0, 258.0, 264.0, 212.0, 238.0, 292.0, 284.0, 259.0, 254.0, 239.0, 237.0, 290.0, 283.0, 279.0, 297.0, 288.0, 288.0, 280.0, 299.0, 289.0, 284.0, 259.0, 263.0, 274.0, 245.0, 299.0, 280.0, 261.0, 278.0, 295.0, 275.0, 285.0, 288.0, 240.0, 267.0, 288.0, 285.0, 259.0, 266.0, 273.0, 263.0, 271.0, 302.0, 295.0, 278.0, 278.0, 258.0, 292.0, 281.0, 269.0, 295.0, 260.0, 272.0, 253.0, 274.0, 305.0, 274.0, 287.0, 286.0, 256.0, 263.0, 262.0, 268.0, 287.0, 289.0, 281.0, 295.0, 285.0, 291.0, 278.0, 295.0, 293.0, 283.0, 248.0, 228.0, 251.0, 265.0, 249.0, 252.0, 268.0, 254.0, 251.0, 247.0, 293.0, 283.0, 270.0, 257.0, 235.0, 247.0, 258.0, 261.0, 294.0, 282.0, 265.0, 268.0, 285.0, 285.0, 271.0, 251.0, 259.0, 268.0, 257.0, 273.0, 270.0, 266.0, 259.0, 268.0, 285.0, 288.0, 270.0, 255.0, 250.0, 251.0, 260.0, 259.0, 289.0, 284.0, 296.0, 283.0, 279.0, 248.0, 288.0, 279.0, 256.0, 263.0, 298.0, 281.0, 264.0, 258.0, 267.0, 252.0, 260.0, 264.0, 272.0, 298.0, 253.0, 266.0, 289.0, 284.0, 271.0, 250.0, 297.0, 279.0, 243.0, 264.0, 290.0, 286.0, 288.0, 285.0, 219.0, 236.0, 270.0, 263.0, 280.0, 245.0, 293.0, 289.0, 294.0, 285.0, 270.0, 306.0, 255.0, 261.0, 263.0, 247.0, 251.0, 271.0, 255.0, 258.0, 267.0, 246.0, 284.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6570861935641006, "mean_processing_ms": 0.3989029759372638, "mean_inference_ms": 2.206484585731059}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3000000, "num_steps_sampled": 1600000, "sample_time_ms": 20649.468, "load_time_ms": 37.298, "grad_time_ms": 8980.547, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001723404973745346, "policy_loss": -0.005563261453062296, "vf_loss": 78.65084075927734, "vf_explained_var": 0.7562505602836609, "kl": 0.00201344583183527, "entropy": 1.1568351984024048, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1600000, "episodes_total": 4000, "training_iteration": 125, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-47-13", "timestamp": 1660250833, "time_this_iter_s": 29.59022808074951, "time_total_s": 9250.93837070465, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9250.93837070465, "timesteps_since_restore": 1600000, "iterations_since_restore": 125, "perf": {"cpu_util_percent": 29.699999999999996, "ram_util_percent": 58.414285714285725}}
-{"episode_reward_max": 627.0, "episode_reward_min": 288.0, "episode_reward_mean": 541.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 136.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 270.505}, "custom_metrics": {"sparse_reward_mean": 187.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 165.81, "shaped_reward_min": 88, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.39, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.05, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.86, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 16.42, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.65, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.41, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.46, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.28, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.03, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.15, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.12, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.13, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.46, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.38, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.28, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.03, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.28, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.03, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [470.0, 522.0, 519.0, 570.0, 522.0, 522.0, 522.0, 627.0, 461.0, 484.0, 573.0, 573.0, 288.0, 561.0, 513.0, 576.0, 573.0, 576.0, 522.0, 525.0, 576.0, 519.0, 510.0, 576.0, 576.0, 522.0, 533.0, 530.0, 527.0, 579.0, 570.0, 522.0, 530.0, 536.0, 527.0, 573.0, 525.0, 501.0, 519.0, 573.0, 579.0, 527.0, 567.0, 519.0, 579.0, 522.0, 519.0, 524.0, 570.0, 519.0, 573.0, 521.0, 576.0, 507.0, 576.0, 573.0, 455.0, 533.0, 525.0, 582.0, 579.0, 576.0, 516.0, 510.0, 522.0, 513.0, 513.0, 573.0, 579.0, 533.0, 573.0, 579.0, 564.0, 579.0, 579.0, 570.0, 567.0, 524.0, 573.0, 573.0, 516.0, 525.0, 522.0, 450.0, 576.0, 513.0, 476.0, 573.0, 576.0, 576.0, 579.0, 573.0, 522.0, 519.0, 579.0, 539.0, 570.0, 573.0, 507.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [234.0, 236.0, 268.0, 254.0, 259.0, 260.0, 285.0, 285.0, 260.0, 262.0, 261.0, 261.0, 253.0, 269.0, 316.0, 311.0, 216.0, 245.0, 240.0, 244.0, 301.0, 272.0, 287.0, 286.0, 136.0, 152.0, 282.0, 279.0, 255.0, 258.0, 282.0, 294.0, 284.0, 289.0, 275.0, 301.0, 249.0, 273.0, 274.0, 251.0, 299.0, 277.0, 271.0, 248.0, 258.0, 252.0, 285.0, 291.0, 282.0, 294.0, 252.0, 270.0, 256.0, 277.0, 265.0, 265.0, 248.0, 279.0, 296.0, 283.0, 277.0, 293.0, 278.0, 244.0, 257.0, 273.0, 270.0, 266.0, 259.0, 268.0, 285.0, 288.0, 270.0, 255.0, 250.0, 251.0, 260.0, 259.0, 289.0, 284.0, 296.0, 283.0, 279.0, 248.0, 288.0, 279.0, 256.0, 263.0, 298.0, 281.0, 264.0, 258.0, 267.0, 252.0, 260.0, 264.0, 272.0, 298.0, 253.0, 266.0, 289.0, 284.0, 271.0, 250.0, 297.0, 279.0, 243.0, 264.0, 290.0, 286.0, 288.0, 285.0, 219.0, 236.0, 270.0, 263.0, 280.0, 245.0, 293.0, 289.0, 294.0, 285.0, 270.0, 306.0, 255.0, 261.0, 263.0, 247.0, 251.0, 271.0, 255.0, 258.0, 267.0, 246.0, 284.0, 289.0, 284.0, 295.0, 270.0, 263.0, 304.0, 269.0, 285.0, 294.0, 286.0, 278.0, 279.0, 300.0, 290.0, 289.0, 284.0, 286.0, 297.0, 270.0, 281.0, 243.0, 283.0, 290.0, 288.0, 285.0, 261.0, 255.0, 257.0, 268.0, 258.0, 264.0, 212.0, 238.0, 292.0, 284.0, 259.0, 254.0, 239.0, 237.0, 290.0, 283.0, 279.0, 297.0, 288.0, 288.0, 280.0, 299.0, 289.0, 284.0, 259.0, 263.0, 274.0, 245.0, 299.0, 280.0, 261.0, 278.0, 295.0, 275.0, 285.0, 288.0, 240.0, 267.0, 288.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6473988294860027, "mean_processing_ms": 0.3969770866429698, "mean_inference_ms": 2.1969066690858874}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3024000, "num_steps_sampled": 1612800, "sample_time_ms": 20818.492, "load_time_ms": 37.263, "grad_time_ms": 8921.308, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004004280548542738, "policy_loss": -0.004071192815899849, "vf_loss": 86.5199966430664, "vf_explained_var": 0.7602561116218567, "kl": 0.0020587241742759943, "entropy": 1.153051495552063, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1612800, "episodes_total": 4032, "training_iteration": 126, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-47-43", "timestamp": 1660250863, "time_this_iter_s": 30.492609977722168, "time_total_s": 9281.430980682373, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9281.430980682373, "timesteps_since_restore": 1612800, "iterations_since_restore": 126, "perf": {"cpu_util_percent": 30.204651162790697, "ram_util_percent": 58.4372093023256}}
-{"episode_reward_max": 630.0, "episode_reward_min": 288.0, "episode_reward_mean": 546.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 136.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 273.07}, "custom_metrics": {"sparse_reward_mean": 190.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 166.14, "shaped_reward_min": 88, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.74, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.0, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.13, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.38, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.83, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.43, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.45, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.54, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.95, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.88, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.55, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.29, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.74, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.97, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.72, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.9, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.54, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.95, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.54, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.95, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 570.0, 576.0, 573.0, 510.0, 573.0, 570.0, 527.0, 543.0, 570.0, 546.0, 525.0, 573.0, 579.0, 573.0, 495.0, 576.0, 579.0, 576.0, 522.0, 579.0, 519.0, 506.0, 576.0, 579.0, 579.0, 498.0, 567.0, 576.0, 504.0, 630.0, 579.0, 522.0, 513.0, 513.0, 573.0, 579.0, 533.0, 573.0, 579.0, 564.0, 579.0, 579.0, 570.0, 567.0, 524.0, 573.0, 573.0, 516.0, 525.0, 522.0, 450.0, 576.0, 513.0, 476.0, 573.0, 576.0, 576.0, 579.0, 573.0, 522.0, 519.0, 579.0, 539.0, 570.0, 573.0, 507.0, 573.0, 470.0, 522.0, 519.0, 570.0, 522.0, 522.0, 522.0, 627.0, 461.0, 484.0, 573.0, 573.0, 288.0, 561.0, 513.0, 576.0, 573.0, 576.0, 522.0, 525.0, 576.0, 519.0, 510.0, 576.0, 576.0, 522.0, 533.0, 530.0, 527.0, 579.0, 570.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 290.0, 274.0, 296.0, 290.0, 286.0, 283.0, 290.0, 264.0, 246.0, 290.0, 283.0, 277.0, 293.0, 248.0, 279.0, 273.0, 270.0, 279.0, 291.0, 272.0, 274.0, 273.0, 252.0, 289.0, 284.0, 290.0, 289.0, 282.0, 291.0, 239.0, 256.0, 287.0, 289.0, 284.0, 295.0, 281.0, 295.0, 262.0, 260.0, 299.0, 280.0, 244.0, 275.0, 253.0, 253.0, 287.0, 289.0, 283.0, 296.0, 280.0, 299.0, 253.0, 245.0, 291.0, 276.0, 291.0, 285.0, 244.0, 260.0, 303.0, 327.0, 274.0, 305.0, 251.0, 271.0, 255.0, 258.0, 267.0, 246.0, 284.0, 289.0, 284.0, 295.0, 270.0, 263.0, 304.0, 269.0, 285.0, 294.0, 286.0, 278.0, 279.0, 300.0, 290.0, 289.0, 284.0, 286.0, 297.0, 270.0, 281.0, 243.0, 283.0, 290.0, 288.0, 285.0, 261.0, 255.0, 257.0, 268.0, 258.0, 264.0, 212.0, 238.0, 292.0, 284.0, 259.0, 254.0, 239.0, 237.0, 290.0, 283.0, 279.0, 297.0, 288.0, 288.0, 280.0, 299.0, 289.0, 284.0, 259.0, 263.0, 274.0, 245.0, 299.0, 280.0, 261.0, 278.0, 295.0, 275.0, 285.0, 288.0, 240.0, 267.0, 288.0, 285.0, 234.0, 236.0, 268.0, 254.0, 259.0, 260.0, 285.0, 285.0, 260.0, 262.0, 261.0, 261.0, 253.0, 269.0, 316.0, 311.0, 216.0, 245.0, 240.0, 244.0, 301.0, 272.0, 287.0, 286.0, 136.0, 152.0, 282.0, 279.0, 255.0, 258.0, 282.0, 294.0, 284.0, 289.0, 275.0, 301.0, 249.0, 273.0, 274.0, 251.0, 299.0, 277.0, 271.0, 248.0, 258.0, 252.0, 285.0, 291.0, 282.0, 294.0, 252.0, 270.0, 256.0, 277.0, 265.0, 265.0, 248.0, 279.0, 296.0, 283.0, 277.0, 293.0, 278.0, 244.0]}, "sampler_perf": {"mean_env_wait_ms": 1.637864403277332, "mean_processing_ms": 0.39508190605522825, "mean_inference_ms": 2.1874563334987878}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3048000, "num_steps_sampled": 1625600, "sample_time_ms": 20804.416, "load_time_ms": 37.185, "grad_time_ms": 8880.278, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00042137285345233977, "policy_loss": -0.007074173539876938, "vf_loss": 72.28662872314453, "vf_explained_var": 0.7638903260231018, "kl": 0.0020576624665409327, "entropy": 1.1517143249511719, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1625600, "episodes_total": 4064, "training_iteration": 127, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-48-13", "timestamp": 1660250893, "time_this_iter_s": 29.709146738052368, "time_total_s": 9311.140127420425, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9311.140127420425, "timesteps_since_restore": 1625600, "iterations_since_restore": 127, "perf": {"cpu_util_percent": 33.61904761904762, "ram_util_percent": 58.37380952380953}}
-{"episode_reward_max": 630.0, "episode_reward_min": 288.0, "episode_reward_mean": 546.4, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 136.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 273.2}, "custom_metrics": {"sparse_reward_mean": 190.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 166.4, "shaped_reward_min": 88, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.49, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.0, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.98, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.33, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.33, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.42, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.44, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.96, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.97, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.72, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.7, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.99, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.44, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.96, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.44, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.96, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 576.0, 567.0, 567.0, 558.0, 530.0, 573.0, 582.0, 582.0, 521.0, 576.0, 510.0, 579.0, 579.0, 522.0, 573.0, 573.0, 576.0, 473.0, 419.0, 539.0, 573.0, 576.0, 570.0, 573.0, 484.0, 582.0, 576.0, 533.0, 396.0, 573.0, 570.0, 570.0, 573.0, 507.0, 573.0, 470.0, 522.0, 519.0, 570.0, 522.0, 522.0, 522.0, 627.0, 461.0, 484.0, 573.0, 573.0, 288.0, 561.0, 513.0, 576.0, 573.0, 576.0, 522.0, 525.0, 576.0, 519.0, 510.0, 576.0, 576.0, 522.0, 533.0, 530.0, 527.0, 579.0, 570.0, 522.0, 576.0, 570.0, 576.0, 573.0, 510.0, 573.0, 570.0, 527.0, 543.0, 570.0, 546.0, 525.0, 573.0, 579.0, 573.0, 495.0, 576.0, 579.0, 576.0, 522.0, 579.0, 519.0, 506.0, 576.0, 579.0, 579.0, 498.0, 567.0, 576.0, 504.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 282.0, 285.0, 291.0, 281.0, 286.0, 282.0, 285.0, 289.0, 269.0, 264.0, 266.0, 303.0, 270.0, 307.0, 275.0, 285.0, 297.0, 253.0, 268.0, 304.0, 272.0, 231.0, 279.0, 299.0, 280.0, 289.0, 290.0, 271.0, 251.0, 300.0, 273.0, 288.0, 285.0, 290.0, 286.0, 238.0, 235.0, 212.0, 207.0, 265.0, 274.0, 288.0, 285.0, 304.0, 272.0, 277.0, 293.0, 285.0, 288.0, 261.0, 223.0, 298.0, 284.0, 289.0, 287.0, 281.0, 252.0, 196.0, 200.0, 285.0, 288.0, 280.0, 290.0, 295.0, 275.0, 285.0, 288.0, 240.0, 267.0, 288.0, 285.0, 234.0, 236.0, 268.0, 254.0, 259.0, 260.0, 285.0, 285.0, 260.0, 262.0, 261.0, 261.0, 253.0, 269.0, 316.0, 311.0, 216.0, 245.0, 240.0, 244.0, 301.0, 272.0, 287.0, 286.0, 136.0, 152.0, 282.0, 279.0, 255.0, 258.0, 282.0, 294.0, 284.0, 289.0, 275.0, 301.0, 249.0, 273.0, 274.0, 251.0, 299.0, 277.0, 271.0, 248.0, 258.0, 252.0, 285.0, 291.0, 282.0, 294.0, 252.0, 270.0, 256.0, 277.0, 265.0, 265.0, 248.0, 279.0, 296.0, 283.0, 277.0, 293.0, 278.0, 244.0, 286.0, 290.0, 274.0, 296.0, 290.0, 286.0, 283.0, 290.0, 264.0, 246.0, 290.0, 283.0, 277.0, 293.0, 248.0, 279.0, 273.0, 270.0, 279.0, 291.0, 272.0, 274.0, 273.0, 252.0, 289.0, 284.0, 290.0, 289.0, 282.0, 291.0, 239.0, 256.0, 287.0, 289.0, 284.0, 295.0, 281.0, 295.0, 262.0, 260.0, 299.0, 280.0, 244.0, 275.0, 253.0, 253.0, 287.0, 289.0, 283.0, 296.0, 280.0, 299.0, 253.0, 245.0, 291.0, 276.0, 291.0, 285.0, 244.0, 260.0, 303.0, 327.0, 274.0, 305.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6284902770333296, "mean_processing_ms": 0.39321693792454526, "mean_inference_ms": 2.178397267354796}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3072000, "num_steps_sampled": 1638400, "sample_time_ms": 20978.899, "load_time_ms": 37.247, "grad_time_ms": 8964.602, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0014722892083227634, "policy_loss": -0.0057091922499239445, "vf_loss": 77.60167694091797, "vf_explained_var": 0.7587153315544128, "kl": 0.0015954332193359733, "entropy": 1.1573811769485474, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1638400, "episodes_total": 4096, "training_iteration": 128, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-48-44", "timestamp": 1660250924, "time_this_iter_s": 31.189378023147583, "time_total_s": 9342.329505443573, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9342.329505443573, "timesteps_since_restore": 1638400, "iterations_since_restore": 128, "perf": {"cpu_util_percent": 28.313636363636366, "ram_util_percent": 58.377272727272725}}
-{"episode_reward_max": 630.0, "episode_reward_min": 396.0, "episode_reward_mean": 548.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 196.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 274.355}, "custom_metrics": {"sparse_reward_mean": 191.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 166.71, "shaped_reward_min": 116, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.06, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.11, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.38, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.77, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.72, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.33, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.37, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.46, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.05, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.02, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.61, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.2, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.77, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.19, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.17, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.7, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.1, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.46, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.05, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.46, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.05, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [561.0, 510.0, 525.0, 576.0, 582.0, 573.0, 564.0, 516.0, 501.0, 567.0, 524.0, 570.0, 573.0, 530.0, 501.0, 444.0, 576.0, 476.0, 576.0, 507.0, 573.0, 519.0, 570.0, 530.0, 573.0, 512.0, 527.0, 570.0, 570.0, 487.0, 530.0, 582.0, 527.0, 579.0, 570.0, 522.0, 576.0, 570.0, 576.0, 573.0, 510.0, 573.0, 570.0, 527.0, 543.0, 570.0, 546.0, 525.0, 573.0, 579.0, 573.0, 495.0, 576.0, 579.0, 576.0, 522.0, 579.0, 519.0, 506.0, 576.0, 579.0, 579.0, 498.0, 567.0, 576.0, 504.0, 630.0, 579.0, 573.0, 576.0, 567.0, 567.0, 558.0, 530.0, 573.0, 582.0, 582.0, 521.0, 576.0, 510.0, 579.0, 579.0, 522.0, 573.0, 573.0, 576.0, 473.0, 419.0, 539.0, 573.0, 576.0, 570.0, 573.0, 484.0, 582.0, 576.0, 533.0, 396.0, 573.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 280.0, 253.0, 257.0, 267.0, 258.0, 296.0, 280.0, 283.0, 299.0, 275.0, 298.0, 287.0, 277.0, 267.0, 249.0, 247.0, 254.0, 284.0, 283.0, 267.0, 257.0, 279.0, 291.0, 287.0, 286.0, 272.0, 258.0, 234.0, 267.0, 218.0, 226.0, 275.0, 301.0, 242.0, 234.0, 289.0, 287.0, 240.0, 267.0, 274.0, 299.0, 263.0, 256.0, 270.0, 300.0, 268.0, 262.0, 274.0, 299.0, 255.0, 257.0, 271.0, 256.0, 286.0, 284.0, 269.0, 301.0, 232.0, 255.0, 270.0, 260.0, 296.0, 286.0, 248.0, 279.0, 296.0, 283.0, 277.0, 293.0, 278.0, 244.0, 286.0, 290.0, 274.0, 296.0, 290.0, 286.0, 283.0, 290.0, 264.0, 246.0, 290.0, 283.0, 277.0, 293.0, 248.0, 279.0, 273.0, 270.0, 279.0, 291.0, 272.0, 274.0, 273.0, 252.0, 289.0, 284.0, 290.0, 289.0, 282.0, 291.0, 239.0, 256.0, 287.0, 289.0, 284.0, 295.0, 281.0, 295.0, 262.0, 260.0, 299.0, 280.0, 244.0, 275.0, 253.0, 253.0, 287.0, 289.0, 283.0, 296.0, 280.0, 299.0, 253.0, 245.0, 291.0, 276.0, 291.0, 285.0, 244.0, 260.0, 303.0, 327.0, 274.0, 305.0, 291.0, 282.0, 285.0, 291.0, 281.0, 286.0, 282.0, 285.0, 289.0, 269.0, 264.0, 266.0, 303.0, 270.0, 307.0, 275.0, 285.0, 297.0, 253.0, 268.0, 304.0, 272.0, 231.0, 279.0, 299.0, 280.0, 289.0, 290.0, 271.0, 251.0, 300.0, 273.0, 288.0, 285.0, 290.0, 286.0, 238.0, 235.0, 212.0, 207.0, 265.0, 274.0, 288.0, 285.0, 304.0, 272.0, 277.0, 293.0, 285.0, 288.0, 261.0, 223.0, 298.0, 284.0, 289.0, 287.0, 281.0, 252.0, 196.0, 200.0, 285.0, 288.0, 280.0, 290.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6192641289400398, "mean_processing_ms": 0.39137958658922545, "mean_inference_ms": 2.169420054882037}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3096000, "num_steps_sampled": 1651200, "sample_time_ms": 21000.262, "load_time_ms": 37.071, "grad_time_ms": 9066.202, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002854668302461505, "policy_loss": -0.004409888293594122, "vf_loss": 78.45098114013672, "vf_explained_var": 0.7681138515472412, "kl": 0.0020372606813907623, "entropy": 1.1610809564590454, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1651200, "episodes_total": 4128, "training_iteration": 129, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-49-15", "timestamp": 1660250955, "time_this_iter_s": 31.373005151748657, "time_total_s": 9373.702510595322, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9373.702510595322, "timesteps_since_restore": 1651200, "iterations_since_restore": 129, "perf": {"cpu_util_percent": 30.084444444444443, "ram_util_percent": 58.27111111111109}}
-{"episode_reward_max": 630.0, "episode_reward_min": 396.0, "episode_reward_mean": 544.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 196.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 272.245}, "custom_metrics": {"sparse_reward_mean": 189.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 166.09, "shaped_reward_min": 116, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.51, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.0, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.38, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.79, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.37, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.39, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.37, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.02, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.07, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.62, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.72, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.12, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.75, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.83, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.16, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.21, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.11, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.37, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.02, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.37, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.02, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 576.0, 567.0, 516.0, 522.0, 573.0, 510.0, 479.0, 524.0, 501.0, 512.0, 573.0, 522.0, 582.0, 579.0, 576.0, 576.0, 513.0, 570.0, 456.0, 567.0, 539.0, 567.0, 570.0, 576.0, 522.0, 519.0, 525.0, 570.0, 579.0, 453.0, 515.0, 576.0, 504.0, 630.0, 579.0, 573.0, 576.0, 567.0, 567.0, 558.0, 530.0, 573.0, 582.0, 582.0, 521.0, 576.0, 510.0, 579.0, 579.0, 522.0, 573.0, 573.0, 576.0, 473.0, 419.0, 539.0, 573.0, 576.0, 570.0, 573.0, 484.0, 582.0, 576.0, 533.0, 396.0, 573.0, 570.0, 561.0, 510.0, 525.0, 576.0, 582.0, 573.0, 564.0, 516.0, 501.0, 567.0, 524.0, 570.0, 573.0, 530.0, 501.0, 444.0, 576.0, 476.0, 576.0, 507.0, 573.0, 519.0, 570.0, 530.0, 573.0, 512.0, 527.0, 570.0, 570.0, 487.0, 530.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 288.0, 288.0, 299.0, 268.0, 272.0, 244.0, 252.0, 270.0, 282.0, 291.0, 256.0, 254.0, 250.0, 229.0, 266.0, 258.0, 236.0, 265.0, 238.0, 274.0, 267.0, 306.0, 262.0, 260.0, 288.0, 294.0, 291.0, 288.0, 295.0, 281.0, 277.0, 299.0, 272.0, 241.0, 285.0, 285.0, 226.0, 230.0, 281.0, 286.0, 265.0, 274.0, 287.0, 280.0, 290.0, 280.0, 293.0, 283.0, 256.0, 266.0, 253.0, 266.0, 266.0, 259.0, 287.0, 283.0, 280.0, 299.0, 236.0, 217.0, 268.0, 247.0, 291.0, 285.0, 244.0, 260.0, 303.0, 327.0, 274.0, 305.0, 291.0, 282.0, 285.0, 291.0, 281.0, 286.0, 282.0, 285.0, 289.0, 269.0, 264.0, 266.0, 303.0, 270.0, 307.0, 275.0, 285.0, 297.0, 253.0, 268.0, 304.0, 272.0, 231.0, 279.0, 299.0, 280.0, 289.0, 290.0, 271.0, 251.0, 300.0, 273.0, 288.0, 285.0, 290.0, 286.0, 238.0, 235.0, 212.0, 207.0, 265.0, 274.0, 288.0, 285.0, 304.0, 272.0, 277.0, 293.0, 285.0, 288.0, 261.0, 223.0, 298.0, 284.0, 289.0, 287.0, 281.0, 252.0, 196.0, 200.0, 285.0, 288.0, 280.0, 290.0, 281.0, 280.0, 253.0, 257.0, 267.0, 258.0, 296.0, 280.0, 283.0, 299.0, 275.0, 298.0, 287.0, 277.0, 267.0, 249.0, 247.0, 254.0, 284.0, 283.0, 267.0, 257.0, 279.0, 291.0, 287.0, 286.0, 272.0, 258.0, 234.0, 267.0, 218.0, 226.0, 275.0, 301.0, 242.0, 234.0, 289.0, 287.0, 240.0, 267.0, 274.0, 299.0, 263.0, 256.0, 270.0, 300.0, 268.0, 262.0, 274.0, 299.0, 255.0, 257.0, 271.0, 256.0, 286.0, 284.0, 269.0, 301.0, 232.0, 255.0, 270.0, 260.0, 296.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6101955338743283, "mean_processing_ms": 0.3895722397312522, "mean_inference_ms": 2.1607416335063014}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3120000, "num_steps_sampled": 1664000, "sample_time_ms": 21017.48, "load_time_ms": 36.902, "grad_time_ms": 9228.931, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00922582671046257, "policy_loss": 0.0016869133105501533, "vf_loss": 81.20984649658203, "vf_explained_var": 0.7594642043113708, "kl": 0.003354247659444809, "entropy": 1.164129376411438, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1664000, "episodes_total": 4160, "training_iteration": 130, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-49-47", "timestamp": 1660250987, "time_this_iter_s": 31.841378211975098, "time_total_s": 9405.543888807297, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9405.543888807297, "timesteps_since_restore": 1664000, "iterations_since_restore": 130, "perf": {"cpu_util_percent": 32.67111111111111, "ram_util_percent": 58.35999999999998}}
-{"episode_reward_max": 582.0, "episode_reward_min": 396.0, "episode_reward_mean": 538.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 191.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 269.245}, "custom_metrics": {"sparse_reward_mean": 187.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 164.09, "shaped_reward_min": 116, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.79, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.75, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.29, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.98, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.94, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.81, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.39, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.41, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.59, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.61, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.01, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.08, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.85, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.17, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.21, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.04, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.92, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.59, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.61, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.59, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.61, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 570.0, 516.0, 573.0, 570.0, 473.0, 573.0, 521.0, 570.0, 530.0, 582.0, 525.0, 579.0, 533.0, 564.0, 582.0, 461.0, 573.0, 524.0, 576.0, 516.0, 573.0, 573.0, 396.0, 576.0, 507.0, 467.0, 549.0, 527.0, 522.0, 418.0, 579.0, 533.0, 396.0, 573.0, 570.0, 561.0, 510.0, 525.0, 576.0, 582.0, 573.0, 564.0, 516.0, 501.0, 567.0, 524.0, 570.0, 573.0, 530.0, 501.0, 444.0, 576.0, 476.0, 576.0, 507.0, 573.0, 519.0, 570.0, 530.0, 573.0, 512.0, 527.0, 570.0, 570.0, 487.0, 530.0, 582.0, 582.0, 576.0, 567.0, 516.0, 522.0, 573.0, 510.0, 479.0, 524.0, 501.0, 512.0, 573.0, 522.0, 582.0, 579.0, 576.0, 576.0, 513.0, 570.0, 456.0, 567.0, 539.0, 567.0, 570.0, 576.0, 522.0, 519.0, 525.0, 570.0, 579.0, 453.0, 515.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [279.0, 294.0, 280.0, 290.0, 253.0, 263.0, 288.0, 285.0, 298.0, 272.0, 235.0, 238.0, 282.0, 291.0, 265.0, 256.0, 293.0, 277.0, 264.0, 266.0, 291.0, 291.0, 271.0, 254.0, 287.0, 292.0, 274.0, 259.0, 286.0, 278.0, 287.0, 295.0, 215.0, 246.0, 288.0, 285.0, 268.0, 256.0, 288.0, 288.0, 276.0, 240.0, 292.0, 281.0, 300.0, 273.0, 191.0, 205.0, 290.0, 286.0, 250.0, 257.0, 241.0, 226.0, 274.0, 275.0, 272.0, 255.0, 262.0, 260.0, 207.0, 211.0, 284.0, 295.0, 281.0, 252.0, 196.0, 200.0, 285.0, 288.0, 280.0, 290.0, 281.0, 280.0, 253.0, 257.0, 267.0, 258.0, 296.0, 280.0, 283.0, 299.0, 275.0, 298.0, 287.0, 277.0, 267.0, 249.0, 247.0, 254.0, 284.0, 283.0, 267.0, 257.0, 279.0, 291.0, 287.0, 286.0, 272.0, 258.0, 234.0, 267.0, 218.0, 226.0, 275.0, 301.0, 242.0, 234.0, 289.0, 287.0, 240.0, 267.0, 274.0, 299.0, 263.0, 256.0, 270.0, 300.0, 268.0, 262.0, 274.0, 299.0, 255.0, 257.0, 271.0, 256.0, 286.0, 284.0, 269.0, 301.0, 232.0, 255.0, 270.0, 260.0, 296.0, 286.0, 294.0, 288.0, 288.0, 288.0, 299.0, 268.0, 272.0, 244.0, 252.0, 270.0, 282.0, 291.0, 256.0, 254.0, 250.0, 229.0, 266.0, 258.0, 236.0, 265.0, 238.0, 274.0, 267.0, 306.0, 262.0, 260.0, 288.0, 294.0, 291.0, 288.0, 295.0, 281.0, 277.0, 299.0, 272.0, 241.0, 285.0, 285.0, 226.0, 230.0, 281.0, 286.0, 265.0, 274.0, 287.0, 280.0, 290.0, 280.0, 293.0, 283.0, 256.0, 266.0, 253.0, 266.0, 266.0, 259.0, 287.0, 283.0, 280.0, 299.0, 236.0, 217.0, 268.0, 247.0]}, "sampler_perf": {"mean_env_wait_ms": 1.601265947983415, "mean_processing_ms": 0.38779407949911077, "mean_inference_ms": 2.1521646972676964}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3144000, "num_steps_sampled": 1676800, "sample_time_ms": 21002.089, "load_time_ms": 36.986, "grad_time_ms": 9370.108, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003206493565812707, "policy_loss": -0.00454886956140399, "vf_loss": 83.29342651367188, "vf_explained_var": 0.7723144888877869, "kl": 0.0017231384990736842, "entropy": 1.1479605436325073, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1676800, "episodes_total": 4192, "training_iteration": 131, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-50-19", "timestamp": 1660251019, "time_this_iter_s": 31.782477855682373, "time_total_s": 9437.32636666298, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9437.32636666298, "timesteps_since_restore": 1676800, "iterations_since_restore": 131, "perf": {"cpu_util_percent": 30.406818181818174, "ram_util_percent": 58.26818181818181}}
-{"episode_reward_max": 587.0, "episode_reward_min": 396.0, "episode_reward_mean": 544.22, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 191.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 272.11}, "custom_metrics": {"sparse_reward_mean": 189.0, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 166.22, "shaped_reward_min": 116, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.9, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.98, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.5, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.29, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.95, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.82, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.38, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.73, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.85, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.49, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.17, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.15, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.08, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.96, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.73, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.85, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.73, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.85, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 504.0, 573.0, 561.0, 576.0, 582.0, 530.0, 498.0, 573.0, 573.0, 579.0, 579.0, 533.0, 522.0, 579.0, 576.0, 539.0, 519.0, 573.0, 573.0, 576.0, 573.0, 573.0, 573.0, 573.0, 495.0, 587.0, 570.0, 524.0, 579.0, 509.0, 527.0, 570.0, 487.0, 530.0, 582.0, 582.0, 576.0, 567.0, 516.0, 522.0, 573.0, 510.0, 479.0, 524.0, 501.0, 512.0, 573.0, 522.0, 582.0, 579.0, 576.0, 576.0, 513.0, 570.0, 456.0, 567.0, 539.0, 567.0, 570.0, 576.0, 522.0, 519.0, 525.0, 570.0, 579.0, 453.0, 515.0, 573.0, 570.0, 516.0, 573.0, 570.0, 473.0, 573.0, 521.0, 570.0, 530.0, 582.0, 525.0, 579.0, 533.0, 564.0, 582.0, 461.0, 573.0, 524.0, 576.0, 516.0, 573.0, 573.0, 396.0, 576.0, 507.0, 467.0, 549.0, 527.0, 522.0, 418.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 278.0, 232.0, 272.0, 287.0, 286.0, 289.0, 272.0, 282.0, 294.0, 286.0, 296.0, 268.0, 262.0, 253.0, 245.0, 290.0, 283.0, 278.0, 295.0, 273.0, 306.0, 290.0, 289.0, 260.0, 273.0, 267.0, 255.0, 285.0, 294.0, 293.0, 283.0, 272.0, 267.0, 248.0, 271.0, 287.0, 286.0, 288.0, 285.0, 291.0, 285.0, 289.0, 284.0, 274.0, 299.0, 282.0, 291.0, 296.0, 277.0, 250.0, 245.0, 288.0, 299.0, 289.0, 281.0, 260.0, 264.0, 283.0, 296.0, 246.0, 263.0, 267.0, 260.0, 269.0, 301.0, 232.0, 255.0, 270.0, 260.0, 296.0, 286.0, 294.0, 288.0, 288.0, 288.0, 299.0, 268.0, 272.0, 244.0, 252.0, 270.0, 282.0, 291.0, 256.0, 254.0, 250.0, 229.0, 266.0, 258.0, 236.0, 265.0, 238.0, 274.0, 267.0, 306.0, 262.0, 260.0, 288.0, 294.0, 291.0, 288.0, 295.0, 281.0, 277.0, 299.0, 272.0, 241.0, 285.0, 285.0, 226.0, 230.0, 281.0, 286.0, 265.0, 274.0, 287.0, 280.0, 290.0, 280.0, 293.0, 283.0, 256.0, 266.0, 253.0, 266.0, 266.0, 259.0, 287.0, 283.0, 280.0, 299.0, 236.0, 217.0, 268.0, 247.0, 279.0, 294.0, 280.0, 290.0, 253.0, 263.0, 288.0, 285.0, 298.0, 272.0, 235.0, 238.0, 282.0, 291.0, 265.0, 256.0, 293.0, 277.0, 264.0, 266.0, 291.0, 291.0, 271.0, 254.0, 287.0, 292.0, 274.0, 259.0, 286.0, 278.0, 287.0, 295.0, 215.0, 246.0, 288.0, 285.0, 268.0, 256.0, 288.0, 288.0, 276.0, 240.0, 292.0, 281.0, 300.0, 273.0, 191.0, 205.0, 290.0, 286.0, 250.0, 257.0, 241.0, 226.0, 274.0, 275.0, 272.0, 255.0, 262.0, 260.0, 207.0, 211.0, 284.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5924734608627003, "mean_processing_ms": 0.38604375166496974, "mean_inference_ms": 2.1438413106785164}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3168000, "num_steps_sampled": 1689600, "sample_time_ms": 21092.562, "load_time_ms": 37.447, "grad_time_ms": 9515.71, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0031676997896283865, "policy_loss": -0.004229032900184393, "vf_loss": 79.702880859375, "vf_explained_var": 0.7654879093170166, "kl": 0.0019305540481582284, "entropy": 1.1470965147018433, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1689600, "episodes_total": 4224, "training_iteration": 132, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-50-51", "timestamp": 1660251051, "time_this_iter_s": 31.913390159606934, "time_total_s": 9469.239756822586, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9469.239756822586, "timesteps_since_restore": 1689600, "iterations_since_restore": 132, "perf": {"cpu_util_percent": 33.193478260869554, "ram_util_percent": 58.276086956521716}}
-{"episode_reward_max": 587.0, "episode_reward_min": 123.0, "episode_reward_mean": 540.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 270.16}, "custom_metrics": {"sparse_reward_mean": 187.6, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 165.12, "shaped_reward_min": 43, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.9, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.8, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.56, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.01, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.83, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.89, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 0.35, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.41, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 14.81, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.58, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.98, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.46, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.76, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.15, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.08, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 14.81, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.58, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.81, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.58, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [396.0, 576.0, 519.0, 461.0, 522.0, 570.0, 576.0, 501.0, 570.0, 536.0, 579.0, 576.0, 522.0, 573.0, 573.0, 530.0, 519.0, 573.0, 525.0, 576.0, 582.0, 576.0, 570.0, 573.0, 522.0, 419.0, 123.0, 582.0, 576.0, 522.0, 579.0, 576.0, 570.0, 579.0, 453.0, 515.0, 573.0, 570.0, 516.0, 573.0, 570.0, 473.0, 573.0, 521.0, 570.0, 530.0, 582.0, 525.0, 579.0, 533.0, 564.0, 582.0, 461.0, 573.0, 524.0, 576.0, 516.0, 573.0, 573.0, 396.0, 576.0, 507.0, 467.0, 549.0, 527.0, 522.0, 418.0, 579.0, 570.0, 504.0, 573.0, 561.0, 576.0, 582.0, 530.0, 498.0, 573.0, 573.0, 579.0, 579.0, 533.0, 522.0, 579.0, 576.0, 539.0, 519.0, 573.0, 573.0, 576.0, 573.0, 573.0, 573.0, 573.0, 495.0, 587.0, 570.0, 524.0, 579.0, 509.0, 527.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [203.0, 193.0, 298.0, 278.0, 254.0, 265.0, 231.0, 230.0, 259.0, 263.0, 283.0, 287.0, 287.0, 289.0, 244.0, 257.0, 282.0, 288.0, 271.0, 265.0, 287.0, 292.0, 281.0, 295.0, 263.0, 259.0, 293.0, 280.0, 281.0, 292.0, 273.0, 257.0, 252.0, 267.0, 290.0, 283.0, 265.0, 260.0, 296.0, 280.0, 297.0, 285.0, 298.0, 278.0, 298.0, 272.0, 298.0, 275.0, 256.0, 266.0, 215.0, 204.0, 60.0, 63.0, 287.0, 295.0, 281.0, 295.0, 256.0, 266.0, 287.0, 292.0, 288.0, 288.0, 287.0, 283.0, 280.0, 299.0, 236.0, 217.0, 268.0, 247.0, 279.0, 294.0, 280.0, 290.0, 253.0, 263.0, 288.0, 285.0, 298.0, 272.0, 235.0, 238.0, 282.0, 291.0, 265.0, 256.0, 293.0, 277.0, 264.0, 266.0, 291.0, 291.0, 271.0, 254.0, 287.0, 292.0, 274.0, 259.0, 286.0, 278.0, 287.0, 295.0, 215.0, 246.0, 288.0, 285.0, 268.0, 256.0, 288.0, 288.0, 276.0, 240.0, 292.0, 281.0, 300.0, 273.0, 191.0, 205.0, 290.0, 286.0, 250.0, 257.0, 241.0, 226.0, 274.0, 275.0, 272.0, 255.0, 262.0, 260.0, 207.0, 211.0, 284.0, 295.0, 292.0, 278.0, 232.0, 272.0, 287.0, 286.0, 289.0, 272.0, 282.0, 294.0, 286.0, 296.0, 268.0, 262.0, 253.0, 245.0, 290.0, 283.0, 278.0, 295.0, 273.0, 306.0, 290.0, 289.0, 260.0, 273.0, 267.0, 255.0, 285.0, 294.0, 293.0, 283.0, 272.0, 267.0, 248.0, 271.0, 287.0, 286.0, 288.0, 285.0, 291.0, 285.0, 289.0, 284.0, 274.0, 299.0, 282.0, 291.0, 296.0, 277.0, 250.0, 245.0, 288.0, 299.0, 289.0, 281.0, 260.0, 264.0, 283.0, 296.0, 246.0, 263.0, 267.0, 260.0]}, "sampler_perf": {"mean_env_wait_ms": 1.583806400101539, "mean_processing_ms": 0.3843212568870559, "mean_inference_ms": 2.1355512648653474}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3192000, "num_steps_sampled": 1702400, "sample_time_ms": 21045.575, "load_time_ms": 37.092, "grad_time_ms": 9547.761, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004417246673256159, "policy_loss": -0.0036684710066765547, "vf_loss": 86.54926300048828, "vf_explained_var": 0.7708062529563904, "kl": 0.0019647751469165087, "entropy": 1.1384211778640747, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1702400, "episodes_total": 4256, "training_iteration": 133, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-51-22", "timestamp": 1660251082, "time_this_iter_s": 30.465492963790894, "time_total_s": 9499.705249786377, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9499.705249786377, "timesteps_since_restore": 1702400, "iterations_since_restore": 133, "perf": {"cpu_util_percent": 27.94883720930233, "ram_util_percent": 58.35813953488371}}
-{"episode_reward_max": 587.0, "episode_reward_min": 123.0, "episode_reward_mean": 547.7, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 273.85}, "custom_metrics": {"sparse_reward_mean": 189.8, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 168.1, "shaped_reward_min": 43, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.85, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.27, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.49, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.46, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.86, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.85, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 0.35, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.42, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 14.74, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.03, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.74, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.69, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.95, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 14.74, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.03, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.74, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.03, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 582.0, 582.0, 579.0, 579.0, 507.0, 576.0, 522.0, 516.0, 579.0, 582.0, 573.0, 576.0, 579.0, 530.0, 582.0, 522.0, 579.0, 576.0, 525.0, 519.0, 539.0, 570.0, 567.0, 570.0, 576.0, 579.0, 582.0, 573.0, 579.0, 582.0, 579.0, 527.0, 522.0, 418.0, 579.0, 570.0, 504.0, 573.0, 561.0, 576.0, 582.0, 530.0, 498.0, 573.0, 573.0, 579.0, 579.0, 533.0, 522.0, 579.0, 576.0, 539.0, 519.0, 573.0, 573.0, 576.0, 573.0, 573.0, 573.0, 573.0, 495.0, 587.0, 570.0, 524.0, 579.0, 509.0, 527.0, 396.0, 576.0, 519.0, 461.0, 522.0, 570.0, 576.0, 501.0, 570.0, 536.0, 579.0, 576.0, 522.0, 573.0, 573.0, 530.0, 519.0, 573.0, 525.0, 576.0, 582.0, 576.0, 570.0, 573.0, 522.0, 419.0, 123.0, 582.0, 576.0, 522.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [256.0, 263.0, 277.0, 305.0, 296.0, 286.0, 288.0, 291.0, 306.0, 273.0, 258.0, 249.0, 277.0, 299.0, 278.0, 244.0, 245.0, 271.0, 279.0, 300.0, 291.0, 291.0, 288.0, 285.0, 290.0, 286.0, 286.0, 293.0, 270.0, 260.0, 293.0, 289.0, 263.0, 259.0, 297.0, 282.0, 282.0, 294.0, 257.0, 268.0, 264.0, 255.0, 270.0, 269.0, 280.0, 290.0, 263.0, 304.0, 285.0, 285.0, 290.0, 286.0, 301.0, 278.0, 296.0, 286.0, 296.0, 277.0, 284.0, 295.0, 280.0, 302.0, 289.0, 290.0, 272.0, 255.0, 262.0, 260.0, 207.0, 211.0, 284.0, 295.0, 292.0, 278.0, 232.0, 272.0, 287.0, 286.0, 289.0, 272.0, 282.0, 294.0, 286.0, 296.0, 268.0, 262.0, 253.0, 245.0, 290.0, 283.0, 278.0, 295.0, 273.0, 306.0, 290.0, 289.0, 260.0, 273.0, 267.0, 255.0, 285.0, 294.0, 293.0, 283.0, 272.0, 267.0, 248.0, 271.0, 287.0, 286.0, 288.0, 285.0, 291.0, 285.0, 289.0, 284.0, 274.0, 299.0, 282.0, 291.0, 296.0, 277.0, 250.0, 245.0, 288.0, 299.0, 289.0, 281.0, 260.0, 264.0, 283.0, 296.0, 246.0, 263.0, 267.0, 260.0, 203.0, 193.0, 298.0, 278.0, 254.0, 265.0, 231.0, 230.0, 259.0, 263.0, 283.0, 287.0, 287.0, 289.0, 244.0, 257.0, 282.0, 288.0, 271.0, 265.0, 287.0, 292.0, 281.0, 295.0, 263.0, 259.0, 293.0, 280.0, 281.0, 292.0, 273.0, 257.0, 252.0, 267.0, 290.0, 283.0, 265.0, 260.0, 296.0, 280.0, 297.0, 285.0, 298.0, 278.0, 298.0, 272.0, 298.0, 275.0, 256.0, 266.0, 215.0, 204.0, 60.0, 63.0, 287.0, 295.0, 281.0, 295.0, 256.0, 266.0, 287.0, 292.0, 288.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5752673474082683, "mean_processing_ms": 0.382627028657334, "mean_inference_ms": 2.127290316097115}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3216000, "num_steps_sampled": 1715200, "sample_time_ms": 21118.905, "load_time_ms": 37.128, "grad_time_ms": 9783.423, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003744603367522359, "policy_loss": -0.004061851184815168, "vf_loss": 83.74505615234375, "vf_explained_var": 0.7541170120239258, "kl": 0.001809759414754808, "entropy": 1.1361082792282104, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1715200, "episodes_total": 4288, "training_iteration": 134, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-51-53", "timestamp": 1660251113, "time_this_iter_s": 31.301603078842163, "time_total_s": 9531.00685286522, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9531.00685286522, "timesteps_since_restore": 1715200, "iterations_since_restore": 134, "perf": {"cpu_util_percent": 35.08181818181818, "ram_util_percent": 58.252272727272725}}
-{"episode_reward_max": 582.0, "episode_reward_min": 123.0, "episode_reward_mean": 546.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 273.49}, "custom_metrics": {"sparse_reward_mean": 189.4, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 168.18, "shaped_reward_min": 43, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.7, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.28, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.45, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.75, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.82, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 14.66, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.09, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.91, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.4, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.01, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 14.66, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.09, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.66, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.09, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 561.0, 533.0, 582.0, 573.0, 533.0, 581.0, 459.0, 570.0, 522.0, 579.0, 570.0, 573.0, 582.0, 576.0, 582.0, 579.0, 576.0, 576.0, 567.0, 582.0, 579.0, 564.0, 582.0, 570.0, 228.0, 479.0, 573.0, 516.0, 576.0, 582.0, 522.0, 524.0, 579.0, 509.0, 527.0, 396.0, 576.0, 519.0, 461.0, 522.0, 570.0, 576.0, 501.0, 570.0, 536.0, 579.0, 576.0, 522.0, 573.0, 573.0, 530.0, 519.0, 573.0, 525.0, 576.0, 582.0, 576.0, 570.0, 573.0, 522.0, 419.0, 123.0, 582.0, 576.0, 522.0, 579.0, 576.0, 519.0, 582.0, 582.0, 579.0, 579.0, 507.0, 576.0, 522.0, 516.0, 579.0, 582.0, 573.0, 576.0, 579.0, 530.0, 582.0, 522.0, 579.0, 576.0, 525.0, 519.0, 539.0, 570.0, 567.0, 570.0, 576.0, 579.0, 582.0, 573.0, 579.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [301.0, 278.0, 274.0, 287.0, 266.0, 267.0, 284.0, 298.0, 292.0, 281.0, 273.0, 260.0, 288.0, 293.0, 233.0, 226.0, 287.0, 283.0, 253.0, 269.0, 290.0, 289.0, 287.0, 283.0, 279.0, 294.0, 289.0, 293.0, 297.0, 279.0, 291.0, 291.0, 277.0, 302.0, 299.0, 277.0, 299.0, 277.0, 265.0, 302.0, 290.0, 292.0, 288.0, 291.0, 289.0, 275.0, 285.0, 297.0, 277.0, 293.0, 114.0, 114.0, 248.0, 231.0, 285.0, 288.0, 256.0, 260.0, 292.0, 284.0, 300.0, 282.0, 253.0, 269.0, 260.0, 264.0, 283.0, 296.0, 246.0, 263.0, 267.0, 260.0, 203.0, 193.0, 298.0, 278.0, 254.0, 265.0, 231.0, 230.0, 259.0, 263.0, 283.0, 287.0, 287.0, 289.0, 244.0, 257.0, 282.0, 288.0, 271.0, 265.0, 287.0, 292.0, 281.0, 295.0, 263.0, 259.0, 293.0, 280.0, 281.0, 292.0, 273.0, 257.0, 252.0, 267.0, 290.0, 283.0, 265.0, 260.0, 296.0, 280.0, 297.0, 285.0, 298.0, 278.0, 298.0, 272.0, 298.0, 275.0, 256.0, 266.0, 215.0, 204.0, 60.0, 63.0, 287.0, 295.0, 281.0, 295.0, 256.0, 266.0, 287.0, 292.0, 288.0, 288.0, 256.0, 263.0, 277.0, 305.0, 296.0, 286.0, 288.0, 291.0, 306.0, 273.0, 258.0, 249.0, 277.0, 299.0, 278.0, 244.0, 245.0, 271.0, 279.0, 300.0, 291.0, 291.0, 288.0, 285.0, 290.0, 286.0, 286.0, 293.0, 270.0, 260.0, 293.0, 289.0, 263.0, 259.0, 297.0, 282.0, 282.0, 294.0, 257.0, 268.0, 264.0, 255.0, 270.0, 269.0, 280.0, 290.0, 263.0, 304.0, 285.0, 285.0, 290.0, 286.0, 301.0, 278.0, 296.0, 286.0, 296.0, 277.0, 284.0, 295.0, 280.0, 302.0, 289.0, 290.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5668312042090953, "mean_processing_ms": 0.38095382690757534, "mean_inference_ms": 2.118642826517617}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3240000, "num_steps_sampled": 1728000, "sample_time_ms": 20924.032, "load_time_ms": 37.391, "grad_time_ms": 9937.539, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0035265153273940086, "policy_loss": -0.004777689930051565, "vf_loss": 88.75411224365234, "vf_explained_var": 0.7641527056694031, "kl": 0.002029512310400605, "entropy": 1.1424118280410767, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1728000, "episodes_total": 4320, "training_iteration": 135, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-52-22", "timestamp": 1660251142, "time_this_iter_s": 29.18880271911621, "time_total_s": 9560.195655584335, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9560.195655584335, "timesteps_since_restore": 1728000, "iterations_since_restore": 135, "perf": {"cpu_util_percent": 35.34146341463415, "ram_util_percent": 58.2390243902439}}
-{"episode_reward_max": 582.0, "episode_reward_min": 228.0, "episode_reward_mean": 550.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 309.0}, "policy_reward_mean": {"ppo": 275.345}, "custom_metrics": {"sparse_reward_mean": 190.6, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 169.49, "shaped_reward_min": 68, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.86, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.31, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.28, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.55, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.78, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.35, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.44, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.71, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.22, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.29, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.01, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.96, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.71, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.22, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.71, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.22, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 579.0, 510.0, 582.0, 573.0, 573.0, 470.0, 579.0, 530.0, 567.0, 573.0, 533.0, 582.0, 461.0, 496.0, 510.0, 513.0, 579.0, 530.0, 570.0, 525.0, 525.0, 573.0, 294.0, 576.0, 539.0, 522.0, 498.0, 573.0, 576.0, 579.0, 570.0, 576.0, 522.0, 579.0, 576.0, 519.0, 582.0, 582.0, 579.0, 579.0, 507.0, 576.0, 522.0, 516.0, 579.0, 582.0, 573.0, 576.0, 579.0, 530.0, 582.0, 522.0, 579.0, 576.0, 525.0, 519.0, 539.0, 570.0, 567.0, 570.0, 576.0, 579.0, 582.0, 573.0, 579.0, 582.0, 579.0, 579.0, 561.0, 533.0, 582.0, 573.0, 533.0, 581.0, 459.0, 570.0, 522.0, 579.0, 570.0, 573.0, 582.0, 576.0, 582.0, 579.0, 576.0, 576.0, 567.0, 582.0, 579.0, 564.0, 582.0, 570.0, 228.0, 479.0, 573.0, 516.0, 576.0, 582.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 279.0, 270.0, 309.0, 255.0, 255.0, 282.0, 300.0, 275.0, 298.0, 293.0, 280.0, 239.0, 231.0, 293.0, 286.0, 260.0, 270.0, 287.0, 280.0, 273.0, 300.0, 275.0, 258.0, 285.0, 297.0, 229.0, 232.0, 252.0, 244.0, 270.0, 240.0, 241.0, 272.0, 288.0, 291.0, 249.0, 281.0, 287.0, 283.0, 257.0, 268.0, 258.0, 267.0, 279.0, 294.0, 142.0, 152.0, 285.0, 291.0, 281.0, 258.0, 262.0, 260.0, 261.0, 237.0, 292.0, 281.0, 287.0, 289.0, 285.0, 294.0, 279.0, 291.0, 281.0, 295.0, 256.0, 266.0, 287.0, 292.0, 288.0, 288.0, 256.0, 263.0, 277.0, 305.0, 296.0, 286.0, 288.0, 291.0, 306.0, 273.0, 258.0, 249.0, 277.0, 299.0, 278.0, 244.0, 245.0, 271.0, 279.0, 300.0, 291.0, 291.0, 288.0, 285.0, 290.0, 286.0, 286.0, 293.0, 270.0, 260.0, 293.0, 289.0, 263.0, 259.0, 297.0, 282.0, 282.0, 294.0, 257.0, 268.0, 264.0, 255.0, 270.0, 269.0, 280.0, 290.0, 263.0, 304.0, 285.0, 285.0, 290.0, 286.0, 301.0, 278.0, 296.0, 286.0, 296.0, 277.0, 284.0, 295.0, 280.0, 302.0, 289.0, 290.0, 301.0, 278.0, 274.0, 287.0, 266.0, 267.0, 284.0, 298.0, 292.0, 281.0, 273.0, 260.0, 288.0, 293.0, 233.0, 226.0, 287.0, 283.0, 253.0, 269.0, 290.0, 289.0, 287.0, 283.0, 279.0, 294.0, 289.0, 293.0, 297.0, 279.0, 291.0, 291.0, 277.0, 302.0, 299.0, 277.0, 299.0, 277.0, 265.0, 302.0, 290.0, 292.0, 288.0, 291.0, 289.0, 275.0, 285.0, 297.0, 277.0, 293.0, 114.0, 114.0, 248.0, 231.0, 285.0, 288.0, 256.0, 260.0, 292.0, 284.0, 300.0, 282.0, 253.0, 269.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5585142836902617, "mean_processing_ms": 0.37930323172658476, "mean_inference_ms": 2.109956620242284}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3264000, "num_steps_sampled": 1740800, "sample_time_ms": 20787.416, "load_time_ms": 37.15, "grad_time_ms": 10003.018, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003316950984299183, "policy_loss": -0.004835940897464752, "vf_loss": 87.2677993774414, "vf_explained_var": 0.7657222151756287, "kl": 0.0019325317116454244, "entropy": 1.1477751731872559, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1740800, "episodes_total": 4352, "training_iteration": 136, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-52-52", "timestamp": 1660251172, "time_this_iter_s": 29.77871298789978, "time_total_s": 9589.974368572235, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9589.974368572235, "timesteps_since_restore": 1740800, "iterations_since_restore": 136, "perf": {"cpu_util_percent": 32.416666666666664, "ram_util_percent": 58.35476190476191}}
-{"episode_reward_max": 630.0, "episode_reward_min": 228.0, "episode_reward_mean": 546.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 273.105}, "custom_metrics": {"sparse_reward_mean": 189.2, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 167.81, "shaped_reward_min": 68, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.06, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.14, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.48, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.26, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.89, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.37, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.48, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.88, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.37, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.85, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.32, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.51, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.88, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.88, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 576.0, 479.0, 576.0, 570.0, 342.0, 470.0, 573.0, 510.0, 518.0, 570.0, 582.0, 539.0, 570.0, 510.0, 587.0, 579.0, 516.0, 516.0, 582.0, 576.0, 576.0, 570.0, 576.0, 519.0, 570.0, 581.0, 630.0, 573.0, 522.0, 573.0, 516.0, 573.0, 579.0, 582.0, 579.0, 579.0, 561.0, 533.0, 582.0, 573.0, 533.0, 581.0, 459.0, 570.0, 522.0, 579.0, 570.0, 573.0, 582.0, 576.0, 582.0, 579.0, 576.0, 576.0, 567.0, 582.0, 579.0, 564.0, 582.0, 570.0, 228.0, 479.0, 573.0, 516.0, 576.0, 582.0, 522.0, 570.0, 579.0, 510.0, 582.0, 573.0, 573.0, 470.0, 579.0, 530.0, 567.0, 573.0, 533.0, 582.0, 461.0, 496.0, 510.0, 513.0, 579.0, 530.0, 570.0, 525.0, 525.0, 573.0, 294.0, 576.0, 539.0, 522.0, 498.0, 573.0, 576.0, 579.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [274.0, 251.0, 298.0, 278.0, 254.0, 225.0, 297.0, 279.0, 286.0, 284.0, 166.0, 176.0, 221.0, 249.0, 296.0, 277.0, 259.0, 251.0, 259.0, 259.0, 298.0, 272.0, 283.0, 299.0, 271.0, 268.0, 293.0, 277.0, 264.0, 246.0, 285.0, 302.0, 291.0, 288.0, 286.0, 230.0, 249.0, 267.0, 284.0, 298.0, 293.0, 283.0, 288.0, 288.0, 283.0, 287.0, 285.0, 291.0, 257.0, 262.0, 286.0, 284.0, 291.0, 290.0, 316.0, 314.0, 277.0, 296.0, 262.0, 260.0, 283.0, 290.0, 264.0, 252.0, 296.0, 277.0, 284.0, 295.0, 280.0, 302.0, 289.0, 290.0, 301.0, 278.0, 274.0, 287.0, 266.0, 267.0, 284.0, 298.0, 292.0, 281.0, 273.0, 260.0, 288.0, 293.0, 233.0, 226.0, 287.0, 283.0, 253.0, 269.0, 290.0, 289.0, 287.0, 283.0, 279.0, 294.0, 289.0, 293.0, 297.0, 279.0, 291.0, 291.0, 277.0, 302.0, 299.0, 277.0, 299.0, 277.0, 265.0, 302.0, 290.0, 292.0, 288.0, 291.0, 289.0, 275.0, 285.0, 297.0, 277.0, 293.0, 114.0, 114.0, 248.0, 231.0, 285.0, 288.0, 256.0, 260.0, 292.0, 284.0, 300.0, 282.0, 253.0, 269.0, 291.0, 279.0, 270.0, 309.0, 255.0, 255.0, 282.0, 300.0, 275.0, 298.0, 293.0, 280.0, 239.0, 231.0, 293.0, 286.0, 260.0, 270.0, 287.0, 280.0, 273.0, 300.0, 275.0, 258.0, 285.0, 297.0, 229.0, 232.0, 252.0, 244.0, 270.0, 240.0, 241.0, 272.0, 288.0, 291.0, 249.0, 281.0, 287.0, 283.0, 257.0, 268.0, 258.0, 267.0, 279.0, 294.0, 142.0, 152.0, 285.0, 291.0, 281.0, 258.0, 262.0, 260.0, 261.0, 237.0, 292.0, 281.0, 287.0, 289.0, 285.0, 294.0, 279.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5503391624395824, "mean_processing_ms": 0.37768347850915746, "mean_inference_ms": 2.101391542622976}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3288000, "num_steps_sampled": 1753600, "sample_time_ms": 20872.885, "load_time_ms": 37.097, "grad_time_ms": 9999.872, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012538364389911294, "policy_loss": -0.005918627139180899, "vf_loss": 77.47673797607422, "vf_explained_var": 0.7781977653503418, "kl": 0.0019029680406674743, "entropy": 1.1504276990890503, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1753600, "episodes_total": 4384, "training_iteration": 137, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-53-22", "timestamp": 1660251202, "time_this_iter_s": 30.53275179862976, "time_total_s": 9620.507120370865, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9620.507120370865, "timesteps_since_restore": 1753600, "iterations_since_restore": 137, "perf": {"cpu_util_percent": 31.753488372093024, "ram_util_percent": 58.406976744186025}}
-{"episode_reward_max": 630.0, "episode_reward_min": 294.0, "episode_reward_mean": 546.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 142.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 273.055}, "custom_metrics": {"sparse_reward_mean": 189.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 168.11, "shaped_reward_min": 94, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.28, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.22, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.76, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.22, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 1.01, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.4, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.95, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.87, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.37, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.72, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.38, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.94, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.84, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.95, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.87, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.95, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.87, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 459.0, 573.0, 579.0, 527.0, 519.0, 579.0, 525.0, 516.0, 579.0, 579.0, 579.0, 579.0, 573.0, 449.0, 579.0, 579.0, 576.0, 441.0, 576.0, 533.0, 516.0, 582.0, 516.0, 579.0, 579.0, 567.0, 587.0, 527.0, 579.0, 573.0, 579.0, 516.0, 576.0, 582.0, 522.0, 570.0, 579.0, 510.0, 582.0, 573.0, 573.0, 470.0, 579.0, 530.0, 567.0, 573.0, 533.0, 582.0, 461.0, 496.0, 510.0, 513.0, 579.0, 530.0, 570.0, 525.0, 525.0, 573.0, 294.0, 576.0, 539.0, 522.0, 498.0, 573.0, 576.0, 579.0, 570.0, 525.0, 576.0, 479.0, 576.0, 570.0, 342.0, 470.0, 573.0, 510.0, 518.0, 570.0, 582.0, 539.0, 570.0, 510.0, 587.0, 579.0, 516.0, 516.0, 582.0, 576.0, 576.0, 570.0, 576.0, 519.0, 570.0, 581.0, 630.0, 573.0, 522.0, 573.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 230.0, 229.0, 295.0, 278.0, 295.0, 284.0, 275.0, 252.0, 266.0, 253.0, 285.0, 294.0, 260.0, 265.0, 261.0, 255.0, 285.0, 294.0, 272.0, 307.0, 284.0, 295.0, 273.0, 306.0, 290.0, 283.0, 228.0, 221.0, 296.0, 283.0, 279.0, 300.0, 299.0, 277.0, 226.0, 215.0, 280.0, 296.0, 262.0, 271.0, 261.0, 255.0, 288.0, 294.0, 267.0, 249.0, 289.0, 290.0, 281.0, 298.0, 279.0, 288.0, 286.0, 301.0, 256.0, 271.0, 280.0, 299.0, 275.0, 298.0, 283.0, 296.0, 256.0, 260.0, 292.0, 284.0, 300.0, 282.0, 253.0, 269.0, 291.0, 279.0, 270.0, 309.0, 255.0, 255.0, 282.0, 300.0, 275.0, 298.0, 293.0, 280.0, 239.0, 231.0, 293.0, 286.0, 260.0, 270.0, 287.0, 280.0, 273.0, 300.0, 275.0, 258.0, 285.0, 297.0, 229.0, 232.0, 252.0, 244.0, 270.0, 240.0, 241.0, 272.0, 288.0, 291.0, 249.0, 281.0, 287.0, 283.0, 257.0, 268.0, 258.0, 267.0, 279.0, 294.0, 142.0, 152.0, 285.0, 291.0, 281.0, 258.0, 262.0, 260.0, 261.0, 237.0, 292.0, 281.0, 287.0, 289.0, 285.0, 294.0, 279.0, 291.0, 274.0, 251.0, 298.0, 278.0, 254.0, 225.0, 297.0, 279.0, 286.0, 284.0, 166.0, 176.0, 221.0, 249.0, 296.0, 277.0, 259.0, 251.0, 259.0, 259.0, 298.0, 272.0, 283.0, 299.0, 271.0, 268.0, 293.0, 277.0, 264.0, 246.0, 285.0, 302.0, 291.0, 288.0, 286.0, 230.0, 249.0, 267.0, 284.0, 298.0, 293.0, 283.0, 288.0, 288.0, 283.0, 287.0, 285.0, 291.0, 257.0, 262.0, 286.0, 284.0, 291.0, 290.0, 316.0, 314.0, 277.0, 296.0, 262.0, 260.0, 283.0, 290.0, 264.0, 252.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5422943636827329, "mean_processing_ms": 0.376087334740523, "mean_inference_ms": 2.0930739405664296}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3312000, "num_steps_sampled": 1766400, "sample_time_ms": 20654.49, "load_time_ms": 37.188, "grad_time_ms": 9982.936, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0023388864938169718, "policy_loss": -0.0055216290056705475, "vf_loss": 84.28978729248047, "vf_explained_var": 0.7621362209320068, "kl": 0.0017433507600799203, "entropy": 1.136921763420105, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1766400, "episodes_total": 4416, "training_iteration": 138, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-53-51", "timestamp": 1660251231, "time_this_iter_s": 28.834796905517578, "time_total_s": 9649.341917276382, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9649.341917276382, "timesteps_since_restore": 1766400, "iterations_since_restore": 138, "perf": {"cpu_util_percent": 31.565853658536582, "ram_util_percent": 58.34634146341463}}
-{"episode_reward_max": 630.0, "episode_reward_min": 342.0, "episode_reward_mean": 553.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 166.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 276.825}, "custom_metrics": {"sparse_reward_mean": 191.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 170.05, "shaped_reward_min": 102, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.36, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.1, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.92, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.18, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.93, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.38, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.44, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.22, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.88, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.69, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.73, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.57, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.86, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.22, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.88, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.22, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.88, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 345.0, 465.0, 558.0, 570.0, 573.0, 582.0, 573.0, 576.0, 573.0, 573.0, 573.0, 582.0, 579.0, 582.0, 582.0, 567.0, 582.0, 576.0, 579.0, 587.0, 519.0, 570.0, 567.0, 579.0, 582.0, 533.0, 522.0, 587.0, 504.0, 630.0, 536.0, 573.0, 576.0, 579.0, 570.0, 525.0, 576.0, 479.0, 576.0, 570.0, 342.0, 470.0, 573.0, 510.0, 518.0, 570.0, 582.0, 539.0, 570.0, 510.0, 587.0, 579.0, 516.0, 516.0, 582.0, 576.0, 576.0, 570.0, 576.0, 519.0, 570.0, 581.0, 630.0, 573.0, 522.0, 573.0, 516.0, 630.0, 459.0, 573.0, 579.0, 527.0, 519.0, 579.0, 525.0, 516.0, 579.0, 579.0, 579.0, 579.0, 573.0, 449.0, 579.0, 579.0, 576.0, 441.0, 576.0, 533.0, 516.0, 582.0, 516.0, 579.0, 579.0, 567.0, 587.0, 527.0, 579.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 300.0, 177.0, 168.0, 229.0, 236.0, 275.0, 283.0, 271.0, 299.0, 292.0, 281.0, 286.0, 296.0, 287.0, 286.0, 283.0, 293.0, 293.0, 280.0, 280.0, 293.0, 296.0, 277.0, 293.0, 289.0, 272.0, 307.0, 294.0, 288.0, 298.0, 284.0, 272.0, 295.0, 293.0, 289.0, 287.0, 289.0, 287.0, 292.0, 291.0, 296.0, 257.0, 262.0, 273.0, 297.0, 281.0, 286.0, 298.0, 281.0, 282.0, 300.0, 273.0, 260.0, 272.0, 250.0, 295.0, 292.0, 252.0, 252.0, 307.0, 323.0, 267.0, 269.0, 292.0, 281.0, 287.0, 289.0, 285.0, 294.0, 279.0, 291.0, 274.0, 251.0, 298.0, 278.0, 254.0, 225.0, 297.0, 279.0, 286.0, 284.0, 166.0, 176.0, 221.0, 249.0, 296.0, 277.0, 259.0, 251.0, 259.0, 259.0, 298.0, 272.0, 283.0, 299.0, 271.0, 268.0, 293.0, 277.0, 264.0, 246.0, 285.0, 302.0, 291.0, 288.0, 286.0, 230.0, 249.0, 267.0, 284.0, 298.0, 293.0, 283.0, 288.0, 288.0, 283.0, 287.0, 285.0, 291.0, 257.0, 262.0, 286.0, 284.0, 291.0, 290.0, 316.0, 314.0, 277.0, 296.0, 262.0, 260.0, 283.0, 290.0, 264.0, 252.0, 313.0, 317.0, 230.0, 229.0, 295.0, 278.0, 295.0, 284.0, 275.0, 252.0, 266.0, 253.0, 285.0, 294.0, 260.0, 265.0, 261.0, 255.0, 285.0, 294.0, 272.0, 307.0, 284.0, 295.0, 273.0, 306.0, 290.0, 283.0, 228.0, 221.0, 296.0, 283.0, 279.0, 300.0, 299.0, 277.0, 226.0, 215.0, 280.0, 296.0, 262.0, 271.0, 261.0, 255.0, 288.0, 294.0, 267.0, 249.0, 289.0, 290.0, 281.0, 298.0, 279.0, 288.0, 286.0, 301.0, 256.0, 271.0, 280.0, 299.0, 275.0, 298.0, 283.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.534348979817542, "mean_processing_ms": 0.37451084317148714, "mean_inference_ms": 2.0846763834338202}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3336000, "num_steps_sampled": 1779200, "sample_time_ms": 20417.117, "load_time_ms": 37.045, "grad_time_ms": 9852.38, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006708970759063959, "policy_loss": -0.0014362437650561333, "vf_loss": 87.18399810791016, "vf_explained_var": 0.7458827495574951, "kl": 0.0019282657885923982, "entropy": 1.1463767290115356, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1779200, "episodes_total": 4448, "training_iteration": 139, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-54-19", "timestamp": 1660251259, "time_this_iter_s": 27.688152074813843, "time_total_s": 9677.030069351196, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9677.030069351196, "timesteps_since_restore": 1779200, "iterations_since_restore": 139, "perf": {"cpu_util_percent": 30.3025641025641, "ram_util_percent": 58.341025641025624}}
-{"episode_reward_max": 630.0, "episode_reward_min": 345.0, "episode_reward_mean": 556.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 168.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 278.16}, "custom_metrics": {"sparse_reward_mean": 192.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 170.72, "shaped_reward_min": 105, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.18, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.21, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.77, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.37, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.72, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.8, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.38, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.19, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.1, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.56, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.53, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.62, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.91, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.85, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.19, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.1, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.19, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.1, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 567.0, 530.0, 573.0, 576.0, 521.0, 570.0, 579.0, 501.0, 573.0, 522.0, 579.0, 630.0, 576.0, 570.0, 576.0, 579.0, 522.0, 576.0, 525.0, 576.0, 579.0, 567.0, 579.0, 576.0, 579.0, 582.0, 576.0, 504.0, 579.0, 573.0, 408.0, 573.0, 522.0, 573.0, 516.0, 630.0, 459.0, 573.0, 579.0, 527.0, 519.0, 579.0, 525.0, 516.0, 579.0, 579.0, 579.0, 579.0, 573.0, 449.0, 579.0, 579.0, 576.0, 441.0, 576.0, 533.0, 516.0, 582.0, 516.0, 579.0, 579.0, 567.0, 587.0, 527.0, 579.0, 573.0, 579.0, 576.0, 345.0, 465.0, 558.0, 570.0, 573.0, 582.0, 573.0, 576.0, 573.0, 573.0, 573.0, 582.0, 579.0, 582.0, 582.0, 567.0, 582.0, 576.0, 579.0, 587.0, 519.0, 570.0, 567.0, 579.0, 582.0, 533.0, 522.0, 587.0, 504.0, 630.0, 536.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 261.0, 280.0, 287.0, 249.0, 281.0, 293.0, 280.0, 288.0, 288.0, 251.0, 270.0, 280.0, 290.0, 277.0, 302.0, 242.0, 259.0, 295.0, 278.0, 258.0, 264.0, 285.0, 294.0, 320.0, 310.0, 284.0, 292.0, 287.0, 283.0, 296.0, 280.0, 285.0, 294.0, 257.0, 265.0, 297.0, 279.0, 262.0, 263.0, 288.0, 288.0, 298.0, 281.0, 293.0, 274.0, 301.0, 278.0, 293.0, 283.0, 298.0, 281.0, 291.0, 291.0, 266.0, 310.0, 236.0, 268.0, 285.0, 294.0, 290.0, 283.0, 205.0, 203.0, 277.0, 296.0, 262.0, 260.0, 283.0, 290.0, 264.0, 252.0, 313.0, 317.0, 230.0, 229.0, 295.0, 278.0, 295.0, 284.0, 275.0, 252.0, 266.0, 253.0, 285.0, 294.0, 260.0, 265.0, 261.0, 255.0, 285.0, 294.0, 272.0, 307.0, 284.0, 295.0, 273.0, 306.0, 290.0, 283.0, 228.0, 221.0, 296.0, 283.0, 279.0, 300.0, 299.0, 277.0, 226.0, 215.0, 280.0, 296.0, 262.0, 271.0, 261.0, 255.0, 288.0, 294.0, 267.0, 249.0, 289.0, 290.0, 281.0, 298.0, 279.0, 288.0, 286.0, 301.0, 256.0, 271.0, 280.0, 299.0, 275.0, 298.0, 283.0, 296.0, 276.0, 300.0, 177.0, 168.0, 229.0, 236.0, 275.0, 283.0, 271.0, 299.0, 292.0, 281.0, 286.0, 296.0, 287.0, 286.0, 283.0, 293.0, 293.0, 280.0, 280.0, 293.0, 296.0, 277.0, 293.0, 289.0, 272.0, 307.0, 294.0, 288.0, 298.0, 284.0, 272.0, 295.0, 293.0, 289.0, 287.0, 289.0, 287.0, 292.0, 291.0, 296.0, 257.0, 262.0, 273.0, 297.0, 281.0, 286.0, 298.0, 281.0, 282.0, 300.0, 273.0, 260.0, 272.0, 250.0, 295.0, 292.0, 252.0, 252.0, 307.0, 323.0, 267.0, 269.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5264986068464634, "mean_processing_ms": 0.37294987476110114, "mean_inference_ms": 2.0764572301568647}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3360000, "num_steps_sampled": 1792000, "sample_time_ms": 20405.098, "load_time_ms": 37.005, "grad_time_ms": 9490.101, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0018680343637242913, "policy_loss": -0.005901841446757317, "vf_loss": 83.4326400756836, "vf_explained_var": 0.7634987831115723, "kl": 0.002031019888818264, "entropy": 1.1467581987380981, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1792000, "episodes_total": 4480, "training_iteration": 140, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-54-47", "timestamp": 1660251287, "time_this_iter_s": 28.096507787704468, "time_total_s": 9705.1265771389, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9705.1265771389, "timesteps_since_restore": 1792000, "iterations_since_restore": 140, "perf": {"cpu_util_percent": 30.9875, "ram_util_percent": 58.3925}}
-{"episode_reward_max": 630.0, "episode_reward_min": 345.0, "episode_reward_mean": 560.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 168.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 280.105}, "custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 171.81, "shaped_reward_min": 105, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.15, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.17, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.73, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.41, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.21, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.23, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.53, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.75, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.77, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.21, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.23, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.21, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.23, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 522.0, 504.0, 573.0, 579.0, 530.0, 582.0, 576.0, 579.0, 579.0, 570.0, 525.0, 573.0, 582.0, 630.0, 558.0, 576.0, 576.0, 576.0, 533.0, 582.0, 582.0, 530.0, 530.0, 630.0, 576.0, 516.0, 476.0, 579.0, 582.0, 573.0, 576.0, 527.0, 579.0, 573.0, 579.0, 576.0, 345.0, 465.0, 558.0, 570.0, 573.0, 582.0, 573.0, 576.0, 573.0, 573.0, 573.0, 582.0, 579.0, 582.0, 582.0, 567.0, 582.0, 576.0, 579.0, 587.0, 519.0, 570.0, 567.0, 579.0, 582.0, 533.0, 522.0, 587.0, 504.0, 630.0, 536.0, 530.0, 567.0, 530.0, 573.0, 576.0, 521.0, 570.0, 579.0, 501.0, 573.0, 522.0, 579.0, 630.0, 576.0, 570.0, 576.0, 579.0, 522.0, 576.0, 525.0, 576.0, 579.0, 567.0, 579.0, 576.0, 579.0, 582.0, 576.0, 504.0, 579.0, 573.0, 408.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 280.0, 262.0, 260.0, 260.0, 244.0, 277.0, 296.0, 296.0, 283.0, 264.0, 266.0, 301.0, 281.0, 285.0, 291.0, 289.0, 290.0, 291.0, 288.0, 287.0, 283.0, 263.0, 262.0, 292.0, 281.0, 301.0, 281.0, 303.0, 327.0, 272.0, 286.0, 296.0, 280.0, 282.0, 294.0, 282.0, 294.0, 268.0, 265.0, 298.0, 284.0, 298.0, 284.0, 255.0, 275.0, 253.0, 277.0, 311.0, 319.0, 288.0, 288.0, 250.0, 266.0, 238.0, 238.0, 292.0, 287.0, 309.0, 273.0, 286.0, 287.0, 289.0, 287.0, 256.0, 271.0, 280.0, 299.0, 275.0, 298.0, 283.0, 296.0, 276.0, 300.0, 177.0, 168.0, 229.0, 236.0, 275.0, 283.0, 271.0, 299.0, 292.0, 281.0, 286.0, 296.0, 287.0, 286.0, 283.0, 293.0, 293.0, 280.0, 280.0, 293.0, 296.0, 277.0, 293.0, 289.0, 272.0, 307.0, 294.0, 288.0, 298.0, 284.0, 272.0, 295.0, 293.0, 289.0, 287.0, 289.0, 287.0, 292.0, 291.0, 296.0, 257.0, 262.0, 273.0, 297.0, 281.0, 286.0, 298.0, 281.0, 282.0, 300.0, 273.0, 260.0, 272.0, 250.0, 295.0, 292.0, 252.0, 252.0, 307.0, 323.0, 267.0, 269.0, 269.0, 261.0, 280.0, 287.0, 249.0, 281.0, 293.0, 280.0, 288.0, 288.0, 251.0, 270.0, 280.0, 290.0, 277.0, 302.0, 242.0, 259.0, 295.0, 278.0, 258.0, 264.0, 285.0, 294.0, 320.0, 310.0, 284.0, 292.0, 287.0, 283.0, 296.0, 280.0, 285.0, 294.0, 257.0, 265.0, 297.0, 279.0, 262.0, 263.0, 288.0, 288.0, 298.0, 281.0, 293.0, 274.0, 301.0, 278.0, 293.0, 283.0, 298.0, 281.0, 291.0, 291.0, 266.0, 310.0, 236.0, 268.0, 285.0, 294.0, 290.0, 283.0, 205.0, 203.0]}, "sampler_perf": {"mean_env_wait_ms": 1.518751324463327, "mean_processing_ms": 0.37140910407762817, "mean_inference_ms": 2.0683253134575508}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3384000, "num_steps_sampled": 1804800, "sample_time_ms": 20217.154, "load_time_ms": 36.826, "grad_time_ms": 9453.413, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004852355923503637, "policy_loss": -0.003466278314590454, "vf_loss": 88.89630126953125, "vf_explained_var": 0.7491546273231506, "kl": 0.0020531185436993837, "entropy": 1.1419917345046997, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1804800, "episodes_total": 4512, "training_iteration": 141, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-55-17", "timestamp": 1660251317, "time_this_iter_s": 29.532893180847168, "time_total_s": 9734.659470319748, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9734.659470319748, "timesteps_since_restore": 1804800, "iterations_since_restore": 141, "perf": {"cpu_util_percent": 29.842857142857145, "ram_util_percent": 58.3642857142857}}
-{"episode_reward_max": 630.0, "episode_reward_min": 123.0, "episode_reward_mean": 554.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 277.34}, "custom_metrics": {"sparse_reward_mean": 192.0, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 170.68, "shaped_reward_min": 43, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.13, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.18, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.63, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.41, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.33, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.06, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.23, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.71, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.82, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.15, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.06, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.23, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.06, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.23, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 455.0, 582.0, 123.0, 476.0, 527.0, 579.0, 582.0, 567.0, 579.0, 579.0, 579.0, 579.0, 481.0, 576.0, 587.0, 582.0, 576.0, 579.0, 570.0, 579.0, 582.0, 573.0, 579.0, 579.0, 413.0, 579.0, 570.0, 465.0, 579.0, 579.0, 573.0, 587.0, 504.0, 630.0, 536.0, 530.0, 567.0, 530.0, 573.0, 576.0, 521.0, 570.0, 579.0, 501.0, 573.0, 522.0, 579.0, 630.0, 576.0, 570.0, 576.0, 579.0, 522.0, 576.0, 525.0, 576.0, 579.0, 567.0, 579.0, 576.0, 579.0, 582.0, 576.0, 504.0, 579.0, 573.0, 408.0, 573.0, 522.0, 504.0, 573.0, 579.0, 530.0, 582.0, 576.0, 579.0, 579.0, 570.0, 525.0, 573.0, 582.0, 630.0, 558.0, 576.0, 576.0, 576.0, 533.0, 582.0, 582.0, 530.0, 530.0, 630.0, 576.0, 516.0, 476.0, 579.0, 582.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [248.0, 274.0, 248.0, 207.0, 280.0, 302.0, 60.0, 63.0, 249.0, 227.0, 275.0, 252.0, 284.0, 295.0, 297.0, 285.0, 282.0, 285.0, 301.0, 278.0, 291.0, 288.0, 278.0, 301.0, 286.0, 293.0, 236.0, 245.0, 277.0, 299.0, 294.0, 293.0, 293.0, 289.0, 279.0, 297.0, 286.0, 293.0, 283.0, 287.0, 288.0, 291.0, 273.0, 309.0, 287.0, 286.0, 301.0, 278.0, 294.0, 285.0, 211.0, 202.0, 295.0, 284.0, 285.0, 285.0, 233.0, 232.0, 292.0, 287.0, 289.0, 290.0, 288.0, 285.0, 295.0, 292.0, 252.0, 252.0, 307.0, 323.0, 267.0, 269.0, 269.0, 261.0, 280.0, 287.0, 249.0, 281.0, 293.0, 280.0, 288.0, 288.0, 251.0, 270.0, 280.0, 290.0, 277.0, 302.0, 242.0, 259.0, 295.0, 278.0, 258.0, 264.0, 285.0, 294.0, 320.0, 310.0, 284.0, 292.0, 287.0, 283.0, 296.0, 280.0, 285.0, 294.0, 257.0, 265.0, 297.0, 279.0, 262.0, 263.0, 288.0, 288.0, 298.0, 281.0, 293.0, 274.0, 301.0, 278.0, 293.0, 283.0, 298.0, 281.0, 291.0, 291.0, 266.0, 310.0, 236.0, 268.0, 285.0, 294.0, 290.0, 283.0, 205.0, 203.0, 293.0, 280.0, 262.0, 260.0, 260.0, 244.0, 277.0, 296.0, 296.0, 283.0, 264.0, 266.0, 301.0, 281.0, 285.0, 291.0, 289.0, 290.0, 291.0, 288.0, 287.0, 283.0, 263.0, 262.0, 292.0, 281.0, 301.0, 281.0, 303.0, 327.0, 272.0, 286.0, 296.0, 280.0, 282.0, 294.0, 282.0, 294.0, 268.0, 265.0, 298.0, 284.0, 298.0, 284.0, 255.0, 275.0, 253.0, 277.0, 311.0, 319.0, 288.0, 288.0, 250.0, 266.0, 238.0, 238.0, 292.0, 287.0, 309.0, 273.0, 286.0, 287.0, 289.0, 287.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5111310718572366, "mean_processing_ms": 0.36989117514475767, "mean_inference_ms": 2.0605337474583503}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3408000, "num_steps_sampled": 1817600, "sample_time_ms": 20052.563, "load_time_ms": 36.402, "grad_time_ms": 9487.641, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00461258739233017, "policy_loss": -0.0034613541793078184, "vf_loss": 86.5114974975586, "vf_explained_var": 0.770569384098053, "kl": 0.0022539596538990736, "entropy": 1.154403805732727, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1817600, "episodes_total": 4544, "training_iteration": 142, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-55-47", "timestamp": 1660251347, "time_this_iter_s": 30.608631134033203, "time_total_s": 9765.268101453781, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9765.268101453781, "timesteps_since_restore": 1817600, "iterations_since_restore": 142, "perf": {"cpu_util_percent": 30.76046511627907, "ram_util_percent": 58.4186046511628}}
-{"episode_reward_max": 633.0, "episode_reward_min": 123.0, "episode_reward_mean": 553.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 276.845}, "custom_metrics": {"sparse_reward_mean": 191.6, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 170.49, "shaped_reward_min": 43, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.23, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.87, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.68, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.13, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.76, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.37, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.29, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.14, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.01, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.6, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.61, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.73, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.98, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.14, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.01, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.14, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.01, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 533.0, 570.0, 633.0, 582.0, 579.0, 573.0, 582.0, 123.0, 573.0, 579.0, 576.0, 579.0, 582.0, 576.0, 524.0, 573.0, 584.0, 573.0, 582.0, 579.0, 518.0, 573.0, 573.0, 564.0, 576.0, 573.0, 567.0, 633.0, 527.0, 579.0, 630.0, 504.0, 579.0, 573.0, 408.0, 573.0, 522.0, 504.0, 573.0, 579.0, 530.0, 582.0, 576.0, 579.0, 579.0, 570.0, 525.0, 573.0, 582.0, 630.0, 558.0, 576.0, 576.0, 576.0, 533.0, 582.0, 582.0, 530.0, 530.0, 630.0, 576.0, 516.0, 476.0, 579.0, 582.0, 573.0, 576.0, 522.0, 455.0, 582.0, 123.0, 476.0, 527.0, 579.0, 582.0, 567.0, 579.0, 579.0, 579.0, 579.0, 481.0, 576.0, 587.0, 582.0, 576.0, 579.0, 570.0, 579.0, 582.0, 573.0, 579.0, 579.0, 413.0, 579.0, 570.0, 465.0, 579.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 298.0, 259.0, 274.0, 285.0, 285.0, 302.0, 331.0, 280.0, 302.0, 281.0, 298.0, 286.0, 287.0, 286.0, 296.0, 60.0, 63.0, 288.0, 285.0, 295.0, 284.0, 275.0, 301.0, 280.0, 299.0, 303.0, 279.0, 289.0, 287.0, 275.0, 249.0, 292.0, 281.0, 291.0, 293.0, 292.0, 281.0, 291.0, 291.0, 288.0, 291.0, 235.0, 283.0, 293.0, 280.0, 284.0, 289.0, 276.0, 288.0, 284.0, 292.0, 294.0, 279.0, 278.0, 289.0, 301.0, 332.0, 260.0, 267.0, 278.0, 301.0, 314.0, 316.0, 236.0, 268.0, 285.0, 294.0, 290.0, 283.0, 205.0, 203.0, 293.0, 280.0, 262.0, 260.0, 260.0, 244.0, 277.0, 296.0, 296.0, 283.0, 264.0, 266.0, 301.0, 281.0, 285.0, 291.0, 289.0, 290.0, 291.0, 288.0, 287.0, 283.0, 263.0, 262.0, 292.0, 281.0, 301.0, 281.0, 303.0, 327.0, 272.0, 286.0, 296.0, 280.0, 282.0, 294.0, 282.0, 294.0, 268.0, 265.0, 298.0, 284.0, 298.0, 284.0, 255.0, 275.0, 253.0, 277.0, 311.0, 319.0, 288.0, 288.0, 250.0, 266.0, 238.0, 238.0, 292.0, 287.0, 309.0, 273.0, 286.0, 287.0, 289.0, 287.0, 248.0, 274.0, 248.0, 207.0, 280.0, 302.0, 60.0, 63.0, 249.0, 227.0, 275.0, 252.0, 284.0, 295.0, 297.0, 285.0, 282.0, 285.0, 301.0, 278.0, 291.0, 288.0, 278.0, 301.0, 286.0, 293.0, 236.0, 245.0, 277.0, 299.0, 294.0, 293.0, 293.0, 289.0, 279.0, 297.0, 286.0, 293.0, 283.0, 287.0, 288.0, 291.0, 273.0, 309.0, 287.0, 286.0, 301.0, 278.0, 294.0, 285.0, 211.0, 202.0, 295.0, 284.0, 285.0, 285.0, 233.0, 232.0, 292.0, 287.0, 289.0, 290.0, 288.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5036560426122845, "mean_processing_ms": 0.36840756428267724, "mean_inference_ms": 2.053955603003225}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3432000, "num_steps_sampled": 1830400, "sample_time_ms": 20726.741, "load_time_ms": 36.338, "grad_time_ms": 9326.632, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004940376617014408, "policy_loss": -0.002967018634080887, "vf_loss": 84.7812271118164, "vf_explained_var": 0.7767437100410461, "kl": 0.0015952900284901261, "entropy": 1.1414709091186523, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1830400, "episodes_total": 4576, "training_iteration": 143, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-56-23", "timestamp": 1660251383, "time_this_iter_s": 35.59740996360779, "time_total_s": 9800.865511417389, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9800.865511417389, "timesteps_since_restore": 1830400, "iterations_since_restore": 143, "perf": {"cpu_util_percent": 28.452, "ram_util_percent": 58.38199999999999}}
-{"episode_reward_max": 636.0, "episode_reward_min": 123.0, "episode_reward_mean": 555.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 277.9}, "custom_metrics": {"sparse_reward_mean": 192.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 171.4, "shaped_reward_min": 43, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.55, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.67, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.01, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.97, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.76, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.5, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 15.79, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.54, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.61, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.68, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.9, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.79, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.5, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 15.79, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.5, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 15.79, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 570.0, 582.0, 579.0, 582.0, 533.0, 495.0, 573.0, 533.0, 582.0, 582.0, 582.0, 522.0, 518.0, 524.0, 587.0, 579.0, 579.0, 579.0, 582.0, 579.0, 636.0, 510.0, 419.0, 570.0, 576.0, 579.0, 576.0, 573.0, 570.0, 587.0, 579.0, 582.0, 573.0, 576.0, 522.0, 455.0, 582.0, 123.0, 476.0, 527.0, 579.0, 582.0, 567.0, 579.0, 579.0, 579.0, 579.0, 481.0, 576.0, 587.0, 582.0, 576.0, 579.0, 570.0, 579.0, 582.0, 573.0, 579.0, 579.0, 413.0, 579.0, 570.0, 465.0, 579.0, 579.0, 573.0, 579.0, 533.0, 570.0, 633.0, 582.0, 579.0, 573.0, 582.0, 123.0, 573.0, 579.0, 576.0, 579.0, 582.0, 576.0, 524.0, 573.0, 584.0, 573.0, 582.0, 579.0, 518.0, 573.0, 573.0, 564.0, 576.0, 573.0, 567.0, 633.0, 527.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 282.0, 294.0, 273.0, 297.0, 286.0, 296.0, 296.0, 283.0, 280.0, 302.0, 268.0, 265.0, 235.0, 260.0, 279.0, 294.0, 266.0, 267.0, 299.0, 283.0, 292.0, 290.0, 290.0, 292.0, 263.0, 259.0, 256.0, 262.0, 261.0, 263.0, 314.0, 273.0, 298.0, 281.0, 288.0, 291.0, 297.0, 282.0, 289.0, 293.0, 277.0, 302.0, 320.0, 316.0, 261.0, 249.0, 211.0, 208.0, 299.0, 271.0, 283.0, 293.0, 280.0, 299.0, 296.0, 280.0, 288.0, 285.0, 301.0, 269.0, 278.0, 309.0, 292.0, 287.0, 309.0, 273.0, 286.0, 287.0, 289.0, 287.0, 248.0, 274.0, 248.0, 207.0, 280.0, 302.0, 60.0, 63.0, 249.0, 227.0, 275.0, 252.0, 284.0, 295.0, 297.0, 285.0, 282.0, 285.0, 301.0, 278.0, 291.0, 288.0, 278.0, 301.0, 286.0, 293.0, 236.0, 245.0, 277.0, 299.0, 294.0, 293.0, 293.0, 289.0, 279.0, 297.0, 286.0, 293.0, 283.0, 287.0, 288.0, 291.0, 273.0, 309.0, 287.0, 286.0, 301.0, 278.0, 294.0, 285.0, 211.0, 202.0, 295.0, 284.0, 285.0, 285.0, 233.0, 232.0, 292.0, 287.0, 289.0, 290.0, 288.0, 285.0, 281.0, 298.0, 259.0, 274.0, 285.0, 285.0, 302.0, 331.0, 280.0, 302.0, 281.0, 298.0, 286.0, 287.0, 286.0, 296.0, 60.0, 63.0, 288.0, 285.0, 295.0, 284.0, 275.0, 301.0, 280.0, 299.0, 303.0, 279.0, 289.0, 287.0, 275.0, 249.0, 292.0, 281.0, 291.0, 293.0, 292.0, 281.0, 291.0, 291.0, 288.0, 291.0, 235.0, 283.0, 293.0, 280.0, 284.0, 289.0, 276.0, 288.0, 284.0, 292.0, 294.0, 279.0, 278.0, 289.0, 301.0, 332.0, 260.0, 267.0, 278.0, 301.0, 314.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4962976590401342, "mean_processing_ms": 0.3669482669309259, "mean_inference_ms": 2.047611703358859}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3456000, "num_steps_sampled": 1843200, "sample_time_ms": 20697.837, "load_time_ms": 36.511, "grad_time_ms": 9260.983, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.007136467844247818, "policy_loss": -0.0009602725622244179, "vf_loss": 86.6334457397461, "vf_explained_var": 0.7632217407226562, "kl": 0.0016821371391415596, "entropy": 1.1331907510757446, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1843200, "episodes_total": 4608, "training_iteration": 144, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-56-53", "timestamp": 1660251413, "time_this_iter_s": 30.354671239852905, "time_total_s": 9831.220182657242, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9831.220182657242, "timesteps_since_restore": 1843200, "iterations_since_restore": 144, "perf": {"cpu_util_percent": 32.744186046511636, "ram_util_percent": 58.41860465116278}}
-{"episode_reward_max": 636.0, "episode_reward_min": 123.0, "episode_reward_mean": 559.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 279.92}, "custom_metrics": {"sparse_reward_mean": 194.0, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 171.84, "shaped_reward_min": 43, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.68, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.06, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.03, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.75, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.39, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.61, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 15.8, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.52, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.78, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.69, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.61, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 15.8, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.61, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 15.8, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 501.0, 582.0, 573.0, 576.0, 582.0, 570.0, 530.0, 579.0, 234.0, 576.0, 567.0, 630.0, 573.0, 582.0, 530.0, 573.0, 579.0, 576.0, 570.0, 576.0, 579.0, 522.0, 570.0, 522.0, 552.0, 576.0, 576.0, 579.0, 579.0, 579.0, 465.0, 579.0, 579.0, 573.0, 579.0, 533.0, 570.0, 633.0, 582.0, 579.0, 573.0, 582.0, 123.0, 573.0, 579.0, 576.0, 579.0, 582.0, 576.0, 524.0, 573.0, 584.0, 573.0, 582.0, 579.0, 518.0, 573.0, 573.0, 564.0, 576.0, 573.0, 567.0, 633.0, 527.0, 579.0, 630.0, 579.0, 576.0, 570.0, 582.0, 579.0, 582.0, 533.0, 495.0, 573.0, 533.0, 582.0, 582.0, 582.0, 522.0, 518.0, 524.0, 587.0, 579.0, 579.0, 579.0, 582.0, 579.0, 636.0, 510.0, 419.0, 570.0, 576.0, 579.0, 576.0, 573.0, 570.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [273.0, 306.0, 290.0, 286.0, 255.0, 246.0, 283.0, 299.0, 280.0, 293.0, 297.0, 279.0, 292.0, 290.0, 280.0, 290.0, 277.0, 253.0, 283.0, 296.0, 112.0, 122.0, 283.0, 293.0, 285.0, 282.0, 316.0, 314.0, 292.0, 281.0, 282.0, 300.0, 271.0, 259.0, 280.0, 293.0, 279.0, 300.0, 291.0, 285.0, 272.0, 298.0, 294.0, 282.0, 289.0, 290.0, 259.0, 263.0, 284.0, 286.0, 252.0, 270.0, 285.0, 267.0, 298.0, 278.0, 277.0, 299.0, 283.0, 296.0, 282.0, 297.0, 276.0, 303.0, 233.0, 232.0, 292.0, 287.0, 289.0, 290.0, 288.0, 285.0, 281.0, 298.0, 259.0, 274.0, 285.0, 285.0, 302.0, 331.0, 280.0, 302.0, 281.0, 298.0, 286.0, 287.0, 286.0, 296.0, 60.0, 63.0, 288.0, 285.0, 295.0, 284.0, 275.0, 301.0, 280.0, 299.0, 303.0, 279.0, 289.0, 287.0, 275.0, 249.0, 292.0, 281.0, 291.0, 293.0, 292.0, 281.0, 291.0, 291.0, 288.0, 291.0, 235.0, 283.0, 293.0, 280.0, 284.0, 289.0, 276.0, 288.0, 284.0, 292.0, 294.0, 279.0, 278.0, 289.0, 301.0, 332.0, 260.0, 267.0, 278.0, 301.0, 314.0, 316.0, 283.0, 296.0, 282.0, 294.0, 273.0, 297.0, 286.0, 296.0, 296.0, 283.0, 280.0, 302.0, 268.0, 265.0, 235.0, 260.0, 279.0, 294.0, 266.0, 267.0, 299.0, 283.0, 292.0, 290.0, 290.0, 292.0, 263.0, 259.0, 256.0, 262.0, 261.0, 263.0, 314.0, 273.0, 298.0, 281.0, 288.0, 291.0, 297.0, 282.0, 289.0, 293.0, 277.0, 302.0, 320.0, 316.0, 261.0, 249.0, 211.0, 208.0, 299.0, 271.0, 283.0, 293.0, 280.0, 299.0, 296.0, 280.0, 288.0, 285.0, 301.0, 269.0, 278.0, 309.0]}, "sampler_perf": {"mean_env_wait_ms": 1.489071157531513, "mean_processing_ms": 0.36552392885765655, "mean_inference_ms": 2.041836182574639}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3480000, "num_steps_sampled": 1856000, "sample_time_ms": 21121.64, "load_time_ms": 36.034, "grad_time_ms": 9412.787, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004381807986646891, "policy_loss": -0.0034314494114369154, "vf_loss": 83.83314514160156, "vf_explained_var": 0.7805802226066589, "kl": 0.0022449749521911144, "entropy": 1.1401251554489136, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1856000, "episodes_total": 4640, "training_iteration": 145, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-57-28", "timestamp": 1660251448, "time_this_iter_s": 34.93755006790161, "time_total_s": 9866.157732725143, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9866.157732725143, "timesteps_since_restore": 1856000, "iterations_since_restore": 145, "perf": {"cpu_util_percent": 34.077999999999996, "ram_util_percent": 58.46000000000001}}
-{"episode_reward_max": 636.0, "episode_reward_min": 234.0, "episode_reward_mean": 565.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 112.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 282.895}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.19, "shaped_reward_min": 74, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.64, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.49, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.05, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 15.75, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.68, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.78, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.7, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 15.75, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 15.75, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 579.0, 582.0, 522.0, 522.0, 579.0, 627.0, 582.0, 525.0, 627.0, 576.0, 579.0, 579.0, 530.0, 582.0, 582.0, 579.0, 582.0, 582.0, 519.0, 575.0, 576.0, 579.0, 582.0, 576.0, 587.0, 579.0, 633.0, 527.0, 579.0, 630.0, 579.0, 576.0, 570.0, 582.0, 579.0, 582.0, 533.0, 495.0, 573.0, 533.0, 582.0, 582.0, 582.0, 522.0, 518.0, 524.0, 587.0, 579.0, 579.0, 579.0, 582.0, 579.0, 636.0, 510.0, 419.0, 570.0, 576.0, 579.0, 576.0, 573.0, 570.0, 587.0, 579.0, 576.0, 501.0, 582.0, 573.0, 576.0, 582.0, 570.0, 530.0, 579.0, 234.0, 576.0, 567.0, 630.0, 573.0, 582.0, 530.0, 573.0, 579.0, 576.0, 570.0, 576.0, 579.0, 522.0, 570.0, 522.0, 552.0, 576.0, 576.0, 579.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 296.0, 283.0, 274.0, 305.0, 286.0, 296.0, 293.0, 286.0, 285.0, 297.0, 296.0, 283.0, 280.0, 302.0, 269.0, 253.0, 273.0, 249.0, 281.0, 298.0, 321.0, 306.0, 283.0, 299.0, 249.0, 276.0, 320.0, 307.0, 293.0, 283.0, 299.0, 280.0, 283.0, 296.0, 270.0, 260.0, 296.0, 286.0, 297.0, 285.0, 294.0, 285.0, 289.0, 293.0, 279.0, 303.0, 258.0, 261.0, 296.0, 279.0, 293.0, 283.0, 292.0, 287.0, 279.0, 303.0, 292.0, 284.0, 287.0, 300.0, 291.0, 288.0, 301.0, 332.0, 260.0, 267.0, 278.0, 301.0, 314.0, 316.0, 283.0, 296.0, 282.0, 294.0, 273.0, 297.0, 286.0, 296.0, 296.0, 283.0, 280.0, 302.0, 268.0, 265.0, 235.0, 260.0, 279.0, 294.0, 266.0, 267.0, 299.0, 283.0, 292.0, 290.0, 290.0, 292.0, 263.0, 259.0, 256.0, 262.0, 261.0, 263.0, 314.0, 273.0, 298.0, 281.0, 288.0, 291.0, 297.0, 282.0, 289.0, 293.0, 277.0, 302.0, 320.0, 316.0, 261.0, 249.0, 211.0, 208.0, 299.0, 271.0, 283.0, 293.0, 280.0, 299.0, 296.0, 280.0, 288.0, 285.0, 301.0, 269.0, 278.0, 309.0, 273.0, 306.0, 290.0, 286.0, 255.0, 246.0, 283.0, 299.0, 280.0, 293.0, 297.0, 279.0, 292.0, 290.0, 280.0, 290.0, 277.0, 253.0, 283.0, 296.0, 112.0, 122.0, 283.0, 293.0, 285.0, 282.0, 316.0, 314.0, 292.0, 281.0, 282.0, 300.0, 271.0, 259.0, 280.0, 293.0, 279.0, 300.0, 291.0, 285.0, 272.0, 298.0, 294.0, 282.0, 289.0, 290.0, 259.0, 263.0, 284.0, 286.0, 252.0, 270.0, 285.0, 267.0, 298.0, 278.0, 277.0, 299.0, 283.0, 296.0, 282.0, 297.0, 276.0, 303.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4819236352558276, "mean_processing_ms": 0.3641093342877964, "mean_inference_ms": 2.0353369545862554}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3504000, "num_steps_sampled": 1868800, "sample_time_ms": 21304.847, "load_time_ms": 36.48, "grad_time_ms": 9579.852, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003786050481721759, "policy_loss": -0.004469693172723055, "vf_loss": 88.19132232666016, "vf_explained_var": 0.7629249095916748, "kl": 0.0019031836418434978, "entropy": 1.1267800331115723, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1868800, "episodes_total": 4672, "training_iteration": 146, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-58-02", "timestamp": 1660251482, "time_this_iter_s": 33.29244089126587, "time_total_s": 9899.45017361641, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9899.45017361641, "timesteps_since_restore": 1868800, "iterations_since_restore": 146, "perf": {"cpu_util_percent": 34.12765957446809, "ram_util_percent": 58.40212765957448}}
-{"episode_reward_max": 630.0, "episode_reward_min": 234.0, "episode_reward_mean": 569.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 112.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 284.95}, "custom_metrics": {"sparse_reward_mean": 197.2, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 175.5, "shaped_reward_min": 74, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.1, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.88, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.51, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.15, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.33, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.03, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.93, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.76, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.89, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.85, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.03, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.93, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.03, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.93, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 579.0, 579.0, 582.0, 582.0, 579.0, 582.0, 582.0, 576.0, 579.0, 587.0, 581.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 536.0, 579.0, 587.0, 510.0, 582.0, 582.0, 579.0, 582.0, 570.0, 573.0, 579.0, 576.0, 576.0, 576.0, 573.0, 570.0, 587.0, 579.0, 576.0, 501.0, 582.0, 573.0, 576.0, 582.0, 570.0, 530.0, 579.0, 234.0, 576.0, 567.0, 630.0, 573.0, 582.0, 530.0, 573.0, 579.0, 576.0, 570.0, 576.0, 579.0, 522.0, 570.0, 522.0, 552.0, 576.0, 576.0, 579.0, 579.0, 579.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 579.0, 582.0, 522.0, 522.0, 579.0, 627.0, 582.0, 525.0, 627.0, 576.0, 579.0, 579.0, 530.0, 582.0, 582.0, 579.0, 582.0, 582.0, 519.0, 575.0, 576.0, 579.0, 582.0, 576.0, 587.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [312.0, 315.0, 280.0, 299.0, 291.0, 288.0, 293.0, 289.0, 285.0, 297.0, 288.0, 291.0, 283.0, 299.0, 288.0, 294.0, 294.0, 282.0, 288.0, 291.0, 304.0, 283.0, 282.0, 299.0, 270.0, 303.0, 289.0, 290.0, 278.0, 298.0, 289.0, 290.0, 288.0, 288.0, 278.0, 304.0, 296.0, 280.0, 279.0, 257.0, 297.0, 282.0, 288.0, 299.0, 240.0, 270.0, 302.0, 280.0, 293.0, 289.0, 286.0, 293.0, 301.0, 281.0, 285.0, 285.0, 296.0, 277.0, 280.0, 299.0, 288.0, 288.0, 290.0, 286.0, 296.0, 280.0, 288.0, 285.0, 301.0, 269.0, 278.0, 309.0, 273.0, 306.0, 290.0, 286.0, 255.0, 246.0, 283.0, 299.0, 280.0, 293.0, 297.0, 279.0, 292.0, 290.0, 280.0, 290.0, 277.0, 253.0, 283.0, 296.0, 112.0, 122.0, 283.0, 293.0, 285.0, 282.0, 316.0, 314.0, 292.0, 281.0, 282.0, 300.0, 271.0, 259.0, 280.0, 293.0, 279.0, 300.0, 291.0, 285.0, 272.0, 298.0, 294.0, 282.0, 289.0, 290.0, 259.0, 263.0, 284.0, 286.0, 252.0, 270.0, 285.0, 267.0, 298.0, 278.0, 277.0, 299.0, 283.0, 296.0, 282.0, 297.0, 276.0, 303.0, 291.0, 288.0, 296.0, 283.0, 274.0, 305.0, 286.0, 296.0, 293.0, 286.0, 285.0, 297.0, 296.0, 283.0, 280.0, 302.0, 269.0, 253.0, 273.0, 249.0, 281.0, 298.0, 321.0, 306.0, 283.0, 299.0, 249.0, 276.0, 320.0, 307.0, 293.0, 283.0, 299.0, 280.0, 283.0, 296.0, 270.0, 260.0, 296.0, 286.0, 297.0, 285.0, 294.0, 285.0, 289.0, 293.0, 279.0, 303.0, 258.0, 261.0, 296.0, 279.0, 293.0, 283.0, 292.0, 287.0, 279.0, 303.0, 292.0, 284.0, 287.0, 300.0, 291.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4748545298111457, "mean_processing_ms": 0.362707314353448, "mean_inference_ms": 2.028542543914196}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3528000, "num_steps_sampled": 1881600, "sample_time_ms": 21110.787, "load_time_ms": 36.619, "grad_time_ms": 9537.52, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004449079744517803, "policy_loss": -0.0040388829074800014, "vf_loss": 90.50656127929688, "vf_explained_var": 0.7546594142913818, "kl": 0.0021286073606461287, "entropy": 1.1253728866577148, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1881600, "episodes_total": 4704, "training_iteration": 147, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-58-30", "timestamp": 1660251510, "time_this_iter_s": 28.167391061782837, "time_total_s": 9927.617564678192, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9927.617564678192, "timesteps_since_restore": 1881600, "iterations_since_restore": 147, "perf": {"cpu_util_percent": 35.05, "ram_util_percent": 58.895}}
-{"episode_reward_max": 630.0, "episode_reward_min": 419.0, "episode_reward_mean": 573.52, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 206.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 286.76}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 177.12, "shaped_reward_min": 139, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.82, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.32, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.36, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.36, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.89, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.37, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.57, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.11, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.89, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.37, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.89, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.37, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 570.0, 579.0, 419.0, 567.0, 579.0, 579.0, 582.0, 579.0, 519.0, 582.0, 510.0, 582.0, 579.0, 582.0, 573.0, 630.0, 579.0, 579.0, 579.0, 624.0, 518.0, 579.0, 519.0, 579.0, 579.0, 573.0, 581.0, 576.0, 587.0, 576.0, 576.0, 579.0, 579.0, 579.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 579.0, 582.0, 522.0, 522.0, 579.0, 627.0, 582.0, 525.0, 627.0, 576.0, 579.0, 579.0, 530.0, 582.0, 582.0, 579.0, 582.0, 582.0, 519.0, 575.0, 576.0, 579.0, 582.0, 576.0, 587.0, 579.0, 627.0, 579.0, 579.0, 582.0, 582.0, 579.0, 582.0, 582.0, 576.0, 579.0, 587.0, 581.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 536.0, 579.0, 587.0, 510.0, 582.0, 582.0, 579.0, 582.0, 570.0, 573.0, 579.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [300.0, 282.0, 289.0, 293.0, 290.0, 280.0, 291.0, 288.0, 206.0, 213.0, 283.0, 284.0, 292.0, 287.0, 291.0, 288.0, 284.0, 298.0, 289.0, 290.0, 265.0, 254.0, 283.0, 299.0, 259.0, 251.0, 299.0, 283.0, 290.0, 289.0, 285.0, 297.0, 280.0, 293.0, 328.0, 302.0, 286.0, 293.0, 296.0, 283.0, 293.0, 286.0, 320.0, 304.0, 282.0, 236.0, 306.0, 273.0, 262.0, 257.0, 287.0, 292.0, 295.0, 284.0, 293.0, 280.0, 292.0, 289.0, 291.0, 285.0, 303.0, 284.0, 275.0, 301.0, 277.0, 299.0, 283.0, 296.0, 282.0, 297.0, 276.0, 303.0, 291.0, 288.0, 296.0, 283.0, 274.0, 305.0, 286.0, 296.0, 293.0, 286.0, 285.0, 297.0, 296.0, 283.0, 280.0, 302.0, 269.0, 253.0, 273.0, 249.0, 281.0, 298.0, 321.0, 306.0, 283.0, 299.0, 249.0, 276.0, 320.0, 307.0, 293.0, 283.0, 299.0, 280.0, 283.0, 296.0, 270.0, 260.0, 296.0, 286.0, 297.0, 285.0, 294.0, 285.0, 289.0, 293.0, 279.0, 303.0, 258.0, 261.0, 296.0, 279.0, 293.0, 283.0, 292.0, 287.0, 279.0, 303.0, 292.0, 284.0, 287.0, 300.0, 291.0, 288.0, 312.0, 315.0, 280.0, 299.0, 291.0, 288.0, 293.0, 289.0, 285.0, 297.0, 288.0, 291.0, 283.0, 299.0, 288.0, 294.0, 294.0, 282.0, 288.0, 291.0, 304.0, 283.0, 282.0, 299.0, 270.0, 303.0, 289.0, 290.0, 278.0, 298.0, 289.0, 290.0, 288.0, 288.0, 278.0, 304.0, 296.0, 280.0, 279.0, 257.0, 297.0, 282.0, 288.0, 299.0, 240.0, 270.0, 302.0, 280.0, 293.0, 289.0, 286.0, 293.0, 301.0, 281.0, 285.0, 285.0, 296.0, 277.0, 280.0, 299.0, 288.0, 288.0, 290.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4678650225470864, "mean_processing_ms": 0.36131377085229416, "mean_inference_ms": 2.021530568417585}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3552000, "num_steps_sampled": 1894400, "sample_time_ms": 21206.603, "load_time_ms": 36.564, "grad_time_ms": 9559.344, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005005656275898218, "policy_loss": -0.0032577281817793846, "vf_loss": 88.27960205078125, "vf_explained_var": 0.7724118232727051, "kl": 0.001774882897734642, "entropy": 1.1291688680648804, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1894400, "episodes_total": 4736, "training_iteration": 148, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-59-00", "timestamp": 1660251540, "time_this_iter_s": 30.007760047912598, "time_total_s": 9957.625324726105, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9957.625324726105, "timesteps_since_restore": 1894400, "iterations_since_restore": 148, "perf": {"cpu_util_percent": 34.11190476190476, "ram_util_percent": 58.61190476190477}}
-{"episode_reward_max": 630.0, "episode_reward_min": 180.0, "episode_reward_mean": 568.89, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 86.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 284.445}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 175.29, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.47, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.64, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.01, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.62, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.79, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.42, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.5, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.55, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.54, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.1, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.19, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.82, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.5, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.55, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.5, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.55, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 582.0, 576.0, 582.0, 582.0, 462.0, 573.0, 564.0, 582.0, 579.0, 573.0, 180.0, 582.0, 579.0, 564.0, 579.0, 564.0, 579.0, 530.0, 579.0, 576.0, 579.0, 630.0, 582.0, 579.0, 582.0, 579.0, 560.0, 524.0, 576.0, 582.0, 576.0, 587.0, 579.0, 627.0, 579.0, 579.0, 582.0, 582.0, 579.0, 582.0, 582.0, 576.0, 579.0, 587.0, 581.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 536.0, 579.0, 587.0, 510.0, 582.0, 582.0, 579.0, 582.0, 570.0, 573.0, 579.0, 576.0, 576.0, 582.0, 582.0, 570.0, 579.0, 419.0, 567.0, 579.0, 579.0, 582.0, 579.0, 519.0, 582.0, 510.0, 582.0, 579.0, 582.0, 573.0, 630.0, 579.0, 579.0, 579.0, 624.0, 518.0, 579.0, 519.0, 579.0, 579.0, 573.0, 581.0, 576.0, 587.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 298.0, 281.0, 280.0, 299.0, 291.0, 291.0, 284.0, 292.0, 288.0, 294.0, 295.0, 287.0, 230.0, 232.0, 280.0, 293.0, 267.0, 297.0, 296.0, 286.0, 291.0, 288.0, 305.0, 268.0, 86.0, 94.0, 296.0, 286.0, 288.0, 291.0, 285.0, 279.0, 281.0, 298.0, 277.0, 287.0, 293.0, 286.0, 276.0, 254.0, 274.0, 305.0, 285.0, 291.0, 299.0, 280.0, 312.0, 318.0, 288.0, 294.0, 293.0, 286.0, 294.0, 288.0, 273.0, 306.0, 282.0, 278.0, 256.0, 268.0, 280.0, 296.0, 279.0, 303.0, 292.0, 284.0, 287.0, 300.0, 291.0, 288.0, 312.0, 315.0, 280.0, 299.0, 291.0, 288.0, 293.0, 289.0, 285.0, 297.0, 288.0, 291.0, 283.0, 299.0, 288.0, 294.0, 294.0, 282.0, 288.0, 291.0, 304.0, 283.0, 282.0, 299.0, 270.0, 303.0, 289.0, 290.0, 278.0, 298.0, 289.0, 290.0, 288.0, 288.0, 278.0, 304.0, 296.0, 280.0, 279.0, 257.0, 297.0, 282.0, 288.0, 299.0, 240.0, 270.0, 302.0, 280.0, 293.0, 289.0, 286.0, 293.0, 301.0, 281.0, 285.0, 285.0, 296.0, 277.0, 280.0, 299.0, 288.0, 288.0, 290.0, 286.0, 300.0, 282.0, 289.0, 293.0, 290.0, 280.0, 291.0, 288.0, 206.0, 213.0, 283.0, 284.0, 292.0, 287.0, 291.0, 288.0, 284.0, 298.0, 289.0, 290.0, 265.0, 254.0, 283.0, 299.0, 259.0, 251.0, 299.0, 283.0, 290.0, 289.0, 285.0, 297.0, 280.0, 293.0, 328.0, 302.0, 286.0, 293.0, 296.0, 283.0, 293.0, 286.0, 320.0, 304.0, 282.0, 236.0, 306.0, 273.0, 262.0, 257.0, 287.0, 292.0, 295.0, 284.0, 293.0, 280.0, 292.0, 289.0, 291.0, 285.0, 303.0, 284.0, 275.0, 301.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4609623899935302, "mean_processing_ms": 0.3599358567345953, "mean_inference_ms": 2.0144329368432397}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3576000, "num_steps_sampled": 1907200, "sample_time_ms": 21431.224, "load_time_ms": 36.685, "grad_time_ms": 9588.21, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004353505093604326, "policy_loss": -0.003862809156998992, "vf_loss": 87.85071563720703, "vf_explained_var": 0.7780687212944031, "kl": 0.002437218790873885, "entropy": 1.137519359588623, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1907200, "episodes_total": 4768, "training_iteration": 149, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-59-30", "timestamp": 1660251570, "time_this_iter_s": 30.225661993026733, "time_total_s": 9987.850986719131, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9987.850986719131, "timesteps_since_restore": 1907200, "iterations_since_restore": 149, "perf": {"cpu_util_percent": 31.69069767441861, "ram_util_percent": 58.481395348837225}}
-{"episode_reward_max": 630.0, "episode_reward_min": 180.0, "episode_reward_mean": 565.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 86.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 282.965}, "custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.93, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.19, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.63, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.86, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.68, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.79, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.3, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.39, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.26, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.57, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.44, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.06, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.57, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.68, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.13, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.26, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.21, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.26, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.57, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.26, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.57, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 587.0, 573.0, 579.0, 522.0, 573.0, 579.0, 582.0, 570.0, 525.0, 579.0, 567.0, 576.0, 570.0, 576.0, 582.0, 582.0, 587.0, 576.0, 530.0, 630.0, 582.0, 579.0, 456.0, 579.0, 581.0, 573.0, 576.0, 516.0, 579.0, 582.0, 576.0, 573.0, 579.0, 576.0, 576.0, 582.0, 582.0, 570.0, 579.0, 419.0, 567.0, 579.0, 579.0, 582.0, 579.0, 519.0, 582.0, 510.0, 582.0, 579.0, 582.0, 573.0, 630.0, 579.0, 579.0, 579.0, 624.0, 518.0, 579.0, 519.0, 579.0, 579.0, 573.0, 581.0, 576.0, 587.0, 576.0, 579.0, 579.0, 579.0, 582.0, 576.0, 582.0, 582.0, 462.0, 573.0, 564.0, 582.0, 579.0, 573.0, 180.0, 582.0, 579.0, 564.0, 579.0, 564.0, 579.0, 530.0, 579.0, 576.0, 579.0, 630.0, 582.0, 579.0, 582.0, 579.0, 560.0, 524.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 275.0, 291.0, 296.0, 296.0, 277.0, 301.0, 278.0, 269.0, 253.0, 288.0, 285.0, 293.0, 286.0, 283.0, 299.0, 275.0, 295.0, 260.0, 265.0, 292.0, 287.0, 278.0, 289.0, 281.0, 295.0, 272.0, 298.0, 292.0, 284.0, 288.0, 294.0, 285.0, 297.0, 292.0, 295.0, 291.0, 285.0, 276.0, 254.0, 319.0, 311.0, 308.0, 274.0, 277.0, 302.0, 225.0, 231.0, 295.0, 284.0, 285.0, 296.0, 294.0, 279.0, 277.0, 299.0, 249.0, 267.0, 290.0, 289.0, 276.0, 306.0, 282.0, 294.0, 296.0, 277.0, 280.0, 299.0, 288.0, 288.0, 290.0, 286.0, 300.0, 282.0, 289.0, 293.0, 290.0, 280.0, 291.0, 288.0, 206.0, 213.0, 283.0, 284.0, 292.0, 287.0, 291.0, 288.0, 284.0, 298.0, 289.0, 290.0, 265.0, 254.0, 283.0, 299.0, 259.0, 251.0, 299.0, 283.0, 290.0, 289.0, 285.0, 297.0, 280.0, 293.0, 328.0, 302.0, 286.0, 293.0, 296.0, 283.0, 293.0, 286.0, 320.0, 304.0, 282.0, 236.0, 306.0, 273.0, 262.0, 257.0, 287.0, 292.0, 295.0, 284.0, 293.0, 280.0, 292.0, 289.0, 291.0, 285.0, 303.0, 284.0, 275.0, 301.0, 287.0, 292.0, 298.0, 281.0, 280.0, 299.0, 291.0, 291.0, 284.0, 292.0, 288.0, 294.0, 295.0, 287.0, 230.0, 232.0, 280.0, 293.0, 267.0, 297.0, 296.0, 286.0, 291.0, 288.0, 305.0, 268.0, 86.0, 94.0, 296.0, 286.0, 288.0, 291.0, 285.0, 279.0, 281.0, 298.0, 277.0, 287.0, 293.0, 286.0, 276.0, 254.0, 274.0, 305.0, 285.0, 291.0, 299.0, 280.0, 312.0, 318.0, 288.0, 294.0, 293.0, 286.0, 294.0, 288.0, 273.0, 306.0, 282.0, 278.0, 256.0, 268.0, 280.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4541632476370934, "mean_processing_ms": 0.35857867420290873, "mean_inference_ms": 2.0077131328660243}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3600000, "num_steps_sampled": 1920000, "sample_time_ms": 21437.009, "load_time_ms": 37.013, "grad_time_ms": 9883.239, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004526351112872362, "policy_loss": -0.0033871959894895554, "vf_loss": 84.81644439697266, "vf_explained_var": 0.7658727169036865, "kl": 0.002766131656244397, "entropy": 1.1361898183822632, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1920000, "episodes_total": 4800, "training_iteration": 150, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-00-01", "timestamp": 1660251601, "time_this_iter_s": 31.108325004577637, "time_total_s": 10018.95931172371, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10018.95931172371, "timesteps_since_restore": 1920000, "iterations_since_restore": 150, "perf": {"cpu_util_percent": 34.15227272727273, "ram_util_percent": 58.540909090909096}}
-{"episode_reward_max": 630.0, "episode_reward_min": 180.0, "episode_reward_mean": 565.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 86.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 282.735}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.87, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.15, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.46, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.82, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.73, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.74, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.28, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.45, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.98, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.28, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.45, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.28, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.45, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 570.0, 516.0, 425.0, 570.0, 584.0, 570.0, 573.0, 579.0, 579.0, 576.0, 579.0, 567.0, 579.0, 573.0, 579.0, 582.0, 576.0, 579.0, 576.0, 579.0, 582.0, 576.0, 582.0, 579.0, 464.0, 582.0, 582.0, 570.0, 579.0, 573.0, 579.0, 581.0, 576.0, 587.0, 576.0, 579.0, 579.0, 579.0, 582.0, 576.0, 582.0, 582.0, 462.0, 573.0, 564.0, 582.0, 579.0, 573.0, 180.0, 582.0, 579.0, 564.0, 579.0, 564.0, 579.0, 530.0, 579.0, 576.0, 579.0, 630.0, 582.0, 579.0, 582.0, 579.0, 560.0, 524.0, 576.0, 567.0, 587.0, 573.0, 579.0, 522.0, 573.0, 579.0, 582.0, 570.0, 525.0, 579.0, 567.0, 576.0, 570.0, 576.0, 582.0, 582.0, 587.0, 576.0, 530.0, 630.0, 582.0, 579.0, 456.0, 579.0, 581.0, 573.0, 576.0, 516.0, 579.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 297.0, 295.0, 275.0, 268.0, 248.0, 220.0, 205.0, 285.0, 285.0, 301.0, 283.0, 285.0, 285.0, 283.0, 290.0, 282.0, 297.0, 299.0, 280.0, 268.0, 308.0, 288.0, 291.0, 270.0, 297.0, 294.0, 285.0, 282.0, 291.0, 280.0, 299.0, 289.0, 293.0, 290.0, 286.0, 292.0, 287.0, 288.0, 288.0, 300.0, 279.0, 302.0, 280.0, 286.0, 290.0, 286.0, 296.0, 283.0, 296.0, 232.0, 232.0, 285.0, 297.0, 302.0, 280.0, 278.0, 292.0, 291.0, 288.0, 292.0, 281.0, 299.0, 280.0, 292.0, 289.0, 291.0, 285.0, 303.0, 284.0, 275.0, 301.0, 287.0, 292.0, 298.0, 281.0, 280.0, 299.0, 291.0, 291.0, 284.0, 292.0, 288.0, 294.0, 295.0, 287.0, 230.0, 232.0, 280.0, 293.0, 267.0, 297.0, 296.0, 286.0, 291.0, 288.0, 305.0, 268.0, 86.0, 94.0, 296.0, 286.0, 288.0, 291.0, 285.0, 279.0, 281.0, 298.0, 277.0, 287.0, 293.0, 286.0, 276.0, 254.0, 274.0, 305.0, 285.0, 291.0, 299.0, 280.0, 312.0, 318.0, 288.0, 294.0, 293.0, 286.0, 294.0, 288.0, 273.0, 306.0, 282.0, 278.0, 256.0, 268.0, 280.0, 296.0, 292.0, 275.0, 291.0, 296.0, 296.0, 277.0, 301.0, 278.0, 269.0, 253.0, 288.0, 285.0, 293.0, 286.0, 283.0, 299.0, 275.0, 295.0, 260.0, 265.0, 292.0, 287.0, 278.0, 289.0, 281.0, 295.0, 272.0, 298.0, 292.0, 284.0, 288.0, 294.0, 285.0, 297.0, 292.0, 295.0, 291.0, 285.0, 276.0, 254.0, 319.0, 311.0, 308.0, 274.0, 277.0, 302.0, 225.0, 231.0, 295.0, 284.0, 285.0, 296.0, 294.0, 279.0, 277.0, 299.0, 249.0, 267.0, 290.0, 289.0, 276.0, 306.0, 282.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4474584223491644, "mean_processing_ms": 0.3572431217885297, "mean_inference_ms": 2.0013634824095012}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3624000, "num_steps_sampled": 1932800, "sample_time_ms": 21691.813, "load_time_ms": 37.111, "grad_time_ms": 9921.937, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0037641674280166626, "policy_loss": -0.004076274111866951, "vf_loss": 84.02509307861328, "vf_explained_var": 0.7596387267112732, "kl": 0.001788324792869389, "entropy": 1.124145746231079, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1932800, "episodes_total": 4832, "training_iteration": 151, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-00-34", "timestamp": 1660251634, "time_this_iter_s": 32.471389293670654, "time_total_s": 10051.43070101738, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10051.43070101738, "timesteps_since_restore": 1932800, "iterations_since_restore": 151, "perf": {"cpu_util_percent": 34.02391304347826, "ram_util_percent": 58.56739130434782}}
-{"episode_reward_max": 630.0, "episode_reward_min": 402.0, "episode_reward_mean": 568.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 189.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 284.035}, "custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.27, "shaped_reward_min": 122, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.39, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.35, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.12, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.7, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.51, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.4, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.59, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.97, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.05, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.51, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.4, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.51, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.4, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 582.0, 576.0, 576.0, 582.0, 587.0, 402.0, 564.0, 582.0, 533.0, 525.0, 579.0, 576.0, 587.0, 579.0, 539.0, 582.0, 582.0, 582.0, 533.0, 570.0, 584.0, 579.0, 579.0, 582.0, 630.0, 579.0, 587.0, 582.0, 579.0, 579.0, 560.0, 524.0, 576.0, 567.0, 587.0, 573.0, 579.0, 522.0, 573.0, 579.0, 582.0, 570.0, 525.0, 579.0, 567.0, 576.0, 570.0, 576.0, 582.0, 582.0, 587.0, 576.0, 530.0, 630.0, 582.0, 579.0, 456.0, 579.0, 581.0, 573.0, 576.0, 516.0, 579.0, 582.0, 576.0, 582.0, 570.0, 516.0, 425.0, 570.0, 584.0, 570.0, 573.0, 579.0, 579.0, 576.0, 579.0, 567.0, 579.0, 573.0, 579.0, 582.0, 576.0, 579.0, 576.0, 579.0, 582.0, 576.0, 582.0, 579.0, 464.0, 582.0, 582.0, 570.0, 579.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 293.0, 286.0, 282.0, 297.0, 276.0, 306.0, 285.0, 291.0, 285.0, 291.0, 291.0, 291.0, 289.0, 298.0, 189.0, 213.0, 290.0, 274.0, 293.0, 289.0, 272.0, 261.0, 262.0, 263.0, 293.0, 286.0, 275.0, 301.0, 294.0, 293.0, 280.0, 299.0, 264.0, 275.0, 281.0, 301.0, 299.0, 283.0, 281.0, 301.0, 263.0, 270.0, 276.0, 294.0, 301.0, 283.0, 296.0, 283.0, 295.0, 284.0, 288.0, 294.0, 321.0, 309.0, 288.0, 291.0, 286.0, 301.0, 286.0, 296.0, 304.0, 275.0, 273.0, 306.0, 282.0, 278.0, 256.0, 268.0, 280.0, 296.0, 292.0, 275.0, 291.0, 296.0, 296.0, 277.0, 301.0, 278.0, 269.0, 253.0, 288.0, 285.0, 293.0, 286.0, 283.0, 299.0, 275.0, 295.0, 260.0, 265.0, 292.0, 287.0, 278.0, 289.0, 281.0, 295.0, 272.0, 298.0, 292.0, 284.0, 288.0, 294.0, 285.0, 297.0, 292.0, 295.0, 291.0, 285.0, 276.0, 254.0, 319.0, 311.0, 308.0, 274.0, 277.0, 302.0, 225.0, 231.0, 295.0, 284.0, 285.0, 296.0, 294.0, 279.0, 277.0, 299.0, 249.0, 267.0, 290.0, 289.0, 276.0, 306.0, 282.0, 294.0, 285.0, 297.0, 295.0, 275.0, 268.0, 248.0, 220.0, 205.0, 285.0, 285.0, 301.0, 283.0, 285.0, 285.0, 283.0, 290.0, 282.0, 297.0, 299.0, 280.0, 268.0, 308.0, 288.0, 291.0, 270.0, 297.0, 294.0, 285.0, 282.0, 291.0, 280.0, 299.0, 289.0, 293.0, 290.0, 286.0, 292.0, 287.0, 288.0, 288.0, 300.0, 279.0, 302.0, 280.0, 286.0, 290.0, 286.0, 296.0, 283.0, 296.0, 232.0, 232.0, 285.0, 297.0, 302.0, 280.0, 278.0, 292.0, 291.0, 288.0, 292.0, 281.0, 299.0, 280.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4408300434942674, "mean_processing_ms": 0.35592594931553234, "mean_inference_ms": 1.9949418939108405}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3648000, "num_steps_sampled": 1945600, "sample_time_ms": 21661.173, "load_time_ms": 37.278, "grad_time_ms": 9874.5, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006686341017484665, "policy_loss": -0.0018611648119986057, "vf_loss": 91.119873046875, "vf_explained_var": 0.7503556609153748, "kl": 0.002358483849093318, "entropy": 1.128965973854065, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1945600, "episodes_total": 4864, "training_iteration": 152, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-01-03", "timestamp": 1660251663, "time_this_iter_s": 29.826536893844604, "time_total_s": 10081.257237911224, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10081.257237911224, "timesteps_since_restore": 1945600, "iterations_since_restore": 152, "perf": {"cpu_util_percent": 34.71904761904763, "ram_util_percent": 58.37619047619047}}
-{"episode_reward_max": 630.0, "episode_reward_min": 402.0, "episode_reward_mean": 569.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 189.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 284.96}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.32, "shaped_reward_min": 122, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.59, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.27, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.3, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.7, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.76, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.38, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.49, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.93, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.94, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.76, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.38, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.76, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.38, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 582.0, 510.0, 579.0, 579.0, 587.0, 576.0, 582.0, 579.0, 573.0, 582.0, 573.0, 582.0, 579.0, 576.0, 522.0, 582.0, 587.0, 579.0, 576.0, 512.0, 579.0, 582.0, 587.0, 576.0, 573.0, 582.0, 582.0, 582.0, 582.0, 579.0, 516.0, 579.0, 582.0, 576.0, 582.0, 570.0, 516.0, 425.0, 570.0, 584.0, 570.0, 573.0, 579.0, 579.0, 576.0, 579.0, 567.0, 579.0, 573.0, 579.0, 582.0, 576.0, 579.0, 576.0, 579.0, 582.0, 576.0, 582.0, 579.0, 464.0, 582.0, 582.0, 570.0, 579.0, 573.0, 579.0, 579.0, 579.0, 579.0, 582.0, 576.0, 576.0, 582.0, 587.0, 402.0, 564.0, 582.0, 533.0, 525.0, 579.0, 576.0, 587.0, 579.0, 539.0, 582.0, 582.0, 582.0, 533.0, 570.0, 584.0, 579.0, 579.0, 582.0, 630.0, 579.0, 587.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 290.0, 293.0, 289.0, 285.0, 297.0, 244.0, 266.0, 290.0, 289.0, 282.0, 297.0, 309.0, 278.0, 287.0, 289.0, 293.0, 289.0, 273.0, 306.0, 294.0, 279.0, 289.0, 293.0, 280.0, 293.0, 279.0, 303.0, 281.0, 298.0, 291.0, 285.0, 262.0, 260.0, 286.0, 296.0, 293.0, 294.0, 288.0, 291.0, 283.0, 293.0, 262.0, 250.0, 285.0, 294.0, 280.0, 302.0, 296.0, 291.0, 285.0, 291.0, 290.0, 283.0, 288.0, 294.0, 291.0, 291.0, 285.0, 297.0, 288.0, 294.0, 298.0, 281.0, 249.0, 267.0, 290.0, 289.0, 276.0, 306.0, 282.0, 294.0, 285.0, 297.0, 295.0, 275.0, 268.0, 248.0, 220.0, 205.0, 285.0, 285.0, 301.0, 283.0, 285.0, 285.0, 283.0, 290.0, 282.0, 297.0, 299.0, 280.0, 268.0, 308.0, 288.0, 291.0, 270.0, 297.0, 294.0, 285.0, 282.0, 291.0, 280.0, 299.0, 289.0, 293.0, 290.0, 286.0, 292.0, 287.0, 288.0, 288.0, 300.0, 279.0, 302.0, 280.0, 286.0, 290.0, 286.0, 296.0, 283.0, 296.0, 232.0, 232.0, 285.0, 297.0, 302.0, 280.0, 278.0, 292.0, 291.0, 288.0, 292.0, 281.0, 299.0, 280.0, 283.0, 296.0, 293.0, 286.0, 282.0, 297.0, 276.0, 306.0, 285.0, 291.0, 285.0, 291.0, 291.0, 291.0, 289.0, 298.0, 189.0, 213.0, 290.0, 274.0, 293.0, 289.0, 272.0, 261.0, 262.0, 263.0, 293.0, 286.0, 275.0, 301.0, 294.0, 293.0, 280.0, 299.0, 264.0, 275.0, 281.0, 301.0, 299.0, 283.0, 281.0, 301.0, 263.0, 270.0, 276.0, 294.0, 301.0, 283.0, 296.0, 283.0, 295.0, 284.0, 288.0, 294.0, 321.0, 309.0, 288.0, 291.0, 286.0, 301.0, 286.0, 296.0, 304.0, 275.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4342721716334546, "mean_processing_ms": 0.3546223616494384, "mean_inference_ms": 1.9881963342076971}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3672000, "num_steps_sampled": 1958400, "sample_time_ms": 20770.751, "load_time_ms": 37.491, "grad_time_ms": 9746.757, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0023278051521629095, "policy_loss": -0.0060347276739776134, "vf_loss": 89.3071060180664, "vf_explained_var": 0.7670709490776062, "kl": 0.0017067408189177513, "entropy": 1.1363595724105835, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1958400, "episodes_total": 4896, "training_iteration": 153, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-01-29", "timestamp": 1660251689, "time_this_iter_s": 25.417139053344727, "time_total_s": 10106.67437696457, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10106.67437696457, "timesteps_since_restore": 1958400, "iterations_since_restore": 153, "perf": {"cpu_util_percent": 33.84166666666667, "ram_util_percent": 58.383333333333326}}
-{"episode_reward_max": 630.0, "episode_reward_min": 402.0, "episode_reward_mean": 571.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 189.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 285.78}, "custom_metrics": {"sparse_reward_mean": 197.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.76, "shaped_reward_min": 122, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.4, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.56, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.14, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.91, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.69, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.58, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.6, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.69, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.66, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.1, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.58, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.6, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.58, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.6, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 582.0, 576.0, 576.0, 579.0, 582.0, 579.0, 573.0, 576.0, 582.0, 579.0, 582.0, 579.0, 522.0, 576.0, 581.0, 510.0, 527.0, 584.0, 576.0, 573.0, 582.0, 576.0, 522.0, 576.0, 561.0, 579.0, 579.0, 579.0, 579.0, 581.0, 579.0, 570.0, 579.0, 573.0, 579.0, 579.0, 579.0, 579.0, 582.0, 576.0, 576.0, 582.0, 587.0, 402.0, 564.0, 582.0, 533.0, 525.0, 579.0, 576.0, 587.0, 579.0, 539.0, 582.0, 582.0, 582.0, 533.0, 570.0, 584.0, 579.0, 579.0, 582.0, 630.0, 579.0, 587.0, 582.0, 579.0, 579.0, 582.0, 582.0, 510.0, 579.0, 579.0, 587.0, 576.0, 582.0, 579.0, 573.0, 582.0, 573.0, 582.0, 579.0, 576.0, 522.0, 582.0, 587.0, 579.0, 576.0, 512.0, 579.0, 582.0, 587.0, 576.0, 573.0, 582.0, 582.0, 582.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 278.0, 273.0, 309.0, 290.0, 286.0, 298.0, 278.0, 282.0, 297.0, 285.0, 297.0, 293.0, 286.0, 283.0, 290.0, 294.0, 282.0, 298.0, 284.0, 294.0, 285.0, 284.0, 298.0, 285.0, 294.0, 280.0, 242.0, 292.0, 284.0, 303.0, 278.0, 262.0, 248.0, 281.0, 246.0, 290.0, 294.0, 299.0, 277.0, 282.0, 291.0, 290.0, 292.0, 282.0, 294.0, 270.0, 252.0, 277.0, 299.0, 283.0, 278.0, 280.0, 299.0, 279.0, 300.0, 282.0, 297.0, 278.0, 301.0, 297.0, 284.0, 291.0, 288.0, 278.0, 292.0, 291.0, 288.0, 292.0, 281.0, 299.0, 280.0, 283.0, 296.0, 293.0, 286.0, 282.0, 297.0, 276.0, 306.0, 285.0, 291.0, 285.0, 291.0, 291.0, 291.0, 289.0, 298.0, 189.0, 213.0, 290.0, 274.0, 293.0, 289.0, 272.0, 261.0, 262.0, 263.0, 293.0, 286.0, 275.0, 301.0, 294.0, 293.0, 280.0, 299.0, 264.0, 275.0, 281.0, 301.0, 299.0, 283.0, 281.0, 301.0, 263.0, 270.0, 276.0, 294.0, 301.0, 283.0, 296.0, 283.0, 295.0, 284.0, 288.0, 294.0, 321.0, 309.0, 288.0, 291.0, 286.0, 301.0, 286.0, 296.0, 304.0, 275.0, 289.0, 290.0, 293.0, 289.0, 285.0, 297.0, 244.0, 266.0, 290.0, 289.0, 282.0, 297.0, 309.0, 278.0, 287.0, 289.0, 293.0, 289.0, 273.0, 306.0, 294.0, 279.0, 289.0, 293.0, 280.0, 293.0, 279.0, 303.0, 281.0, 298.0, 291.0, 285.0, 262.0, 260.0, 286.0, 296.0, 293.0, 294.0, 288.0, 291.0, 283.0, 293.0, 262.0, 250.0, 285.0, 294.0, 280.0, 302.0, 296.0, 291.0, 285.0, 291.0, 290.0, 283.0, 288.0, 294.0, 291.0, 291.0, 285.0, 297.0, 288.0, 294.0, 298.0, 281.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4277901540826492, "mean_processing_ms": 0.3533349618976105, "mean_inference_ms": 1.9812328755781439}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3696000, "num_steps_sampled": 1971200, "sample_time_ms": 20717.538, "load_time_ms": 37.296, "grad_time_ms": 9558.366, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0045716362074017525, "policy_loss": -0.004014193546026945, "vf_loss": 91.47116088867188, "vf_explained_var": 0.753397524356842, "kl": 0.001791521324776113, "entropy": 1.1225804090499878, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1971200, "episodes_total": 4928, "training_iteration": 154, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-01-57", "timestamp": 1660251717, "time_this_iter_s": 27.938206911087036, "time_total_s": 10134.612583875656, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10134.612583875656, "timesteps_since_restore": 1971200, "iterations_since_restore": 154, "perf": {"cpu_util_percent": 33.69230769230769, "ram_util_percent": 58.38717948717951}}
-{"episode_reward_max": 627.0, "episode_reward_min": 510.0, "episode_reward_mean": 573.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 242.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.615}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.83, "shaped_reward_min": 150, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.36, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.54, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.06, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.93, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.53, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.7, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.76, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.62, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.84, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.15, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.53, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.7, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.53, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.7, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 576.0, 582.0, 582.0, 579.0, 576.0, 579.0, 567.0, 576.0, 579.0, 579.0, 579.0, 579.0, 579.0, 570.0, 579.0, 576.0, 627.0, 587.0, 582.0, 512.0, 564.0, 582.0, 579.0, 582.0, 527.0, 570.0, 579.0, 525.0, 582.0, 582.0, 587.0, 579.0, 587.0, 582.0, 579.0, 579.0, 582.0, 582.0, 510.0, 579.0, 579.0, 587.0, 576.0, 582.0, 579.0, 573.0, 582.0, 573.0, 582.0, 579.0, 576.0, 522.0, 582.0, 587.0, 579.0, 576.0, 512.0, 579.0, 582.0, 587.0, 576.0, 573.0, 582.0, 582.0, 582.0, 582.0, 579.0, 570.0, 582.0, 576.0, 576.0, 579.0, 582.0, 579.0, 573.0, 576.0, 582.0, 579.0, 582.0, 579.0, 522.0, 576.0, 581.0, 510.0, 527.0, 584.0, 576.0, 573.0, 582.0, 576.0, 522.0, 576.0, 561.0, 579.0, 579.0, 579.0, 579.0, 581.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 277.0, 298.0, 278.0, 287.0, 295.0, 287.0, 295.0, 295.0, 284.0, 290.0, 286.0, 285.0, 294.0, 286.0, 281.0, 293.0, 283.0, 298.0, 281.0, 288.0, 291.0, 296.0, 283.0, 293.0, 286.0, 288.0, 291.0, 287.0, 283.0, 281.0, 298.0, 294.0, 282.0, 306.0, 321.0, 301.0, 286.0, 288.0, 294.0, 260.0, 252.0, 285.0, 279.0, 288.0, 294.0, 284.0, 295.0, 281.0, 301.0, 256.0, 271.0, 290.0, 280.0, 293.0, 286.0, 260.0, 265.0, 283.0, 299.0, 289.0, 293.0, 294.0, 293.0, 288.0, 291.0, 286.0, 301.0, 286.0, 296.0, 304.0, 275.0, 289.0, 290.0, 293.0, 289.0, 285.0, 297.0, 244.0, 266.0, 290.0, 289.0, 282.0, 297.0, 309.0, 278.0, 287.0, 289.0, 293.0, 289.0, 273.0, 306.0, 294.0, 279.0, 289.0, 293.0, 280.0, 293.0, 279.0, 303.0, 281.0, 298.0, 291.0, 285.0, 262.0, 260.0, 286.0, 296.0, 293.0, 294.0, 288.0, 291.0, 283.0, 293.0, 262.0, 250.0, 285.0, 294.0, 280.0, 302.0, 296.0, 291.0, 285.0, 291.0, 290.0, 283.0, 288.0, 294.0, 291.0, 291.0, 285.0, 297.0, 288.0, 294.0, 298.0, 281.0, 292.0, 278.0, 273.0, 309.0, 290.0, 286.0, 298.0, 278.0, 282.0, 297.0, 285.0, 297.0, 293.0, 286.0, 283.0, 290.0, 294.0, 282.0, 298.0, 284.0, 294.0, 285.0, 284.0, 298.0, 285.0, 294.0, 280.0, 242.0, 292.0, 284.0, 303.0, 278.0, 262.0, 248.0, 281.0, 246.0, 290.0, 294.0, 299.0, 277.0, 282.0, 291.0, 290.0, 292.0, 282.0, 294.0, 270.0, 252.0, 277.0, 299.0, 283.0, 278.0, 280.0, 299.0, 279.0, 300.0, 282.0, 297.0, 278.0, 301.0, 297.0, 284.0, 291.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4213938109933664, "mean_processing_ms": 0.35206327984916896, "mean_inference_ms": 1.974322466189253}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3720000, "num_steps_sampled": 1984000, "sample_time_ms": 20399.728, "load_time_ms": 37.504, "grad_time_ms": 9171.651, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0077364686876535416, "policy_loss": -0.000512867234647274, "vf_loss": 88.10808563232422, "vf_explained_var": 0.7624195218086243, "kl": 0.0021189304534345865, "entropy": 1.1229437589645386, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1984000, "episodes_total": 4960, "training_iteration": 155, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-02-25", "timestamp": 1660251745, "time_this_iter_s": 27.89369297027588, "time_total_s": 10162.506276845932, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10162.506276845932, "timesteps_since_restore": 1984000, "iterations_since_restore": 155, "perf": {"cpu_util_percent": 34.404999999999994, "ram_util_percent": 58.395}}
-{"episode_reward_max": 627.0, "episode_reward_min": 462.0, "episode_reward_mean": 570.6, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 285.3}, "custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.4, "shaped_reward_min": 142, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.04, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.7, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.71, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.95, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.17, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.87, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.02, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.54, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.14, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.62, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.64, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.3, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.17, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.87, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.17, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.87, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 462.0, 578.0, 579.0, 519.0, 579.0, 582.0, 567.0, 576.0, 573.0, 573.0, 579.0, 579.0, 576.0, 579.0, 564.0, 519.0, 567.0, 579.0, 570.0, 582.0, 573.0, 579.0, 527.0, 582.0, 576.0, 573.0, 570.0, 515.0, 576.0, 582.0, 579.0, 582.0, 582.0, 582.0, 579.0, 570.0, 582.0, 576.0, 576.0, 579.0, 582.0, 579.0, 573.0, 576.0, 582.0, 579.0, 582.0, 579.0, 522.0, 576.0, 581.0, 510.0, 527.0, 584.0, 576.0, 573.0, 582.0, 576.0, 522.0, 576.0, 561.0, 579.0, 579.0, 579.0, 579.0, 581.0, 579.0, 573.0, 576.0, 582.0, 582.0, 579.0, 576.0, 579.0, 567.0, 576.0, 579.0, 579.0, 579.0, 579.0, 579.0, 570.0, 579.0, 576.0, 627.0, 587.0, 582.0, 512.0, 564.0, 582.0, 579.0, 582.0, 527.0, 570.0, 579.0, 525.0, 582.0, 582.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 294.0, 236.0, 226.0, 295.0, 283.0, 287.0, 292.0, 268.0, 251.0, 295.0, 284.0, 285.0, 297.0, 281.0, 286.0, 293.0, 283.0, 289.0, 284.0, 285.0, 288.0, 290.0, 289.0, 280.0, 299.0, 286.0, 290.0, 295.0, 284.0, 285.0, 279.0, 264.0, 255.0, 279.0, 288.0, 295.0, 284.0, 273.0, 297.0, 286.0, 296.0, 290.0, 283.0, 284.0, 295.0, 249.0, 278.0, 288.0, 294.0, 277.0, 299.0, 282.0, 291.0, 290.0, 280.0, 260.0, 255.0, 278.0, 298.0, 280.0, 302.0, 294.0, 285.0, 291.0, 291.0, 285.0, 297.0, 288.0, 294.0, 298.0, 281.0, 292.0, 278.0, 273.0, 309.0, 290.0, 286.0, 298.0, 278.0, 282.0, 297.0, 285.0, 297.0, 293.0, 286.0, 283.0, 290.0, 294.0, 282.0, 298.0, 284.0, 294.0, 285.0, 284.0, 298.0, 285.0, 294.0, 280.0, 242.0, 292.0, 284.0, 303.0, 278.0, 262.0, 248.0, 281.0, 246.0, 290.0, 294.0, 299.0, 277.0, 282.0, 291.0, 290.0, 292.0, 282.0, 294.0, 270.0, 252.0, 277.0, 299.0, 283.0, 278.0, 280.0, 299.0, 279.0, 300.0, 282.0, 297.0, 278.0, 301.0, 297.0, 284.0, 291.0, 288.0, 296.0, 277.0, 298.0, 278.0, 287.0, 295.0, 287.0, 295.0, 295.0, 284.0, 290.0, 286.0, 285.0, 294.0, 286.0, 281.0, 293.0, 283.0, 298.0, 281.0, 288.0, 291.0, 296.0, 283.0, 293.0, 286.0, 288.0, 291.0, 287.0, 283.0, 281.0, 298.0, 294.0, 282.0, 306.0, 321.0, 301.0, 286.0, 288.0, 294.0, 260.0, 252.0, 285.0, 279.0, 288.0, 294.0, 284.0, 295.0, 281.0, 301.0, 256.0, 271.0, 290.0, 280.0, 293.0, 286.0, 260.0, 265.0, 283.0, 299.0, 289.0, 293.0, 294.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4150911429857618, "mean_processing_ms": 0.3508138897349896, "mean_inference_ms": 1.9677135371948458}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3744000, "num_steps_sampled": 1996800, "sample_time_ms": 20238.789, "load_time_ms": 37.088, "grad_time_ms": 8942.988, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004439468961209059, "policy_loss": -0.0041490718722343445, "vf_loss": 91.5320053100586, "vf_explained_var": 0.7567749619483948, "kl": 0.001588103943504393, "entropy": 1.1293169260025024, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1996800, "episodes_total": 4992, "training_iteration": 156, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-02-54", "timestamp": 1660251774, "time_this_iter_s": 29.386072158813477, "time_total_s": 10191.892349004745, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10191.892349004745, "timesteps_since_restore": 1996800, "iterations_since_restore": 156, "perf": {"cpu_util_percent": 34.03658536585366, "ram_util_percent": 58.353658536585364}}
-{"episode_reward_max": 630.0, "episode_reward_min": 462.0, "episode_reward_mean": 569.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 284.82}, "custom_metrics": {"sparse_reward_mean": 197.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.84, "shaped_reward_min": 141, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.09, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.54, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.8, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.79, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.54, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.38, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.2, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.86, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.98, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.78, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 4.91, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.25, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.21, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.2, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.86, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.2, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.86, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 533.0, 564.0, 630.0, 579.0, 587.0, 579.0, 593.0, 582.0, 582.0, 584.0, 582.0, 573.0, 513.0, 570.0, 587.0, 507.0, 579.0, 564.0, 516.0, 579.0, 579.0, 576.0, 579.0, 579.0, 576.0, 579.0, 530.0, 573.0, 549.0, 501.0, 582.0, 579.0, 579.0, 581.0, 579.0, 573.0, 576.0, 582.0, 582.0, 579.0, 576.0, 579.0, 567.0, 576.0, 579.0, 579.0, 579.0, 579.0, 579.0, 570.0, 579.0, 576.0, 627.0, 587.0, 582.0, 512.0, 564.0, 582.0, 579.0, 582.0, 527.0, 570.0, 579.0, 525.0, 582.0, 582.0, 587.0, 587.0, 462.0, 578.0, 579.0, 519.0, 579.0, 582.0, 567.0, 576.0, 573.0, 573.0, 579.0, 579.0, 576.0, 579.0, 564.0, 519.0, 567.0, 579.0, 570.0, 582.0, 573.0, 579.0, 527.0, 582.0, 576.0, 573.0, 570.0, 515.0, 576.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [299.0, 283.0, 271.0, 262.0, 284.0, 280.0, 309.0, 321.0, 294.0, 285.0, 291.0, 296.0, 283.0, 296.0, 289.0, 304.0, 294.0, 288.0, 286.0, 296.0, 295.0, 289.0, 283.0, 299.0, 280.0, 293.0, 263.0, 250.0, 274.0, 296.0, 293.0, 294.0, 256.0, 251.0, 295.0, 284.0, 285.0, 279.0, 245.0, 271.0, 288.0, 291.0, 284.0, 295.0, 284.0, 292.0, 289.0, 290.0, 293.0, 286.0, 276.0, 300.0, 299.0, 280.0, 253.0, 277.0, 278.0, 295.0, 270.0, 279.0, 264.0, 237.0, 286.0, 296.0, 282.0, 297.0, 278.0, 301.0, 297.0, 284.0, 291.0, 288.0, 296.0, 277.0, 298.0, 278.0, 287.0, 295.0, 287.0, 295.0, 295.0, 284.0, 290.0, 286.0, 285.0, 294.0, 286.0, 281.0, 293.0, 283.0, 298.0, 281.0, 288.0, 291.0, 296.0, 283.0, 293.0, 286.0, 288.0, 291.0, 287.0, 283.0, 281.0, 298.0, 294.0, 282.0, 306.0, 321.0, 301.0, 286.0, 288.0, 294.0, 260.0, 252.0, 285.0, 279.0, 288.0, 294.0, 284.0, 295.0, 281.0, 301.0, 256.0, 271.0, 290.0, 280.0, 293.0, 286.0, 260.0, 265.0, 283.0, 299.0, 289.0, 293.0, 294.0, 293.0, 293.0, 294.0, 236.0, 226.0, 295.0, 283.0, 287.0, 292.0, 268.0, 251.0, 295.0, 284.0, 285.0, 297.0, 281.0, 286.0, 293.0, 283.0, 289.0, 284.0, 285.0, 288.0, 290.0, 289.0, 280.0, 299.0, 286.0, 290.0, 295.0, 284.0, 285.0, 279.0, 264.0, 255.0, 279.0, 288.0, 295.0, 284.0, 273.0, 297.0, 286.0, 296.0, 290.0, 283.0, 284.0, 295.0, 249.0, 278.0, 288.0, 294.0, 277.0, 299.0, 282.0, 291.0, 290.0, 280.0, 260.0, 255.0, 278.0, 298.0, 280.0, 302.0, 294.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4088928730851003, "mean_processing_ms": 0.34958857715576797, "mean_inference_ms": 1.9613751884714845}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3768000, "num_steps_sampled": 2009600, "sample_time_ms": 20457.246, "load_time_ms": 36.907, "grad_time_ms": 8867.46, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003602199489250779, "policy_loss": -0.004857169929891825, "vf_loss": 90.2380599975586, "vf_explained_var": 0.7651500105857849, "kl": 0.0019707006867974997, "entropy": 1.1288973093032837, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2009600, "episodes_total": 5024, "training_iteration": 157, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-03-24", "timestamp": 1660251804, "time_this_iter_s": 29.596789121627808, "time_total_s": 10221.489138126373, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10221.489138126373, "timesteps_since_restore": 2009600, "iterations_since_restore": 157, "perf": {"cpu_util_percent": 35.76428571428571, "ram_util_percent": 58.59285714285714}}
-{"episode_reward_max": 630.0, "episode_reward_min": 185.0, "episode_reward_mean": 563.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 281.585}, "custom_metrics": {"sparse_reward_mean": 195.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 172.77, "shaped_reward_min": 65, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.97, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.41, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.66, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.69, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.42, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.12, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.65, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.07, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.77, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.2, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.67, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.12, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.65, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.12, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.65, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 573.0, 495.0, 579.0, 579.0, 582.0, 570.0, 576.0, 567.0, 579.0, 582.0, 582.0, 579.0, 530.0, 573.0, 536.0, 533.0, 579.0, 185.0, 576.0, 582.0, 582.0, 544.0, 579.0, 576.0, 552.0, 570.0, 573.0, 630.0, 579.0, 495.0, 579.0, 525.0, 582.0, 582.0, 587.0, 587.0, 462.0, 578.0, 579.0, 519.0, 579.0, 582.0, 567.0, 576.0, 573.0, 573.0, 579.0, 579.0, 576.0, 579.0, 564.0, 519.0, 567.0, 579.0, 570.0, 582.0, 573.0, 579.0, 527.0, 582.0, 576.0, 573.0, 570.0, 515.0, 576.0, 582.0, 579.0, 582.0, 533.0, 564.0, 630.0, 579.0, 587.0, 579.0, 593.0, 582.0, 582.0, 584.0, 582.0, 573.0, 513.0, 570.0, 587.0, 507.0, 579.0, 564.0, 516.0, 579.0, 579.0, 576.0, 579.0, 579.0, 576.0, 579.0, 530.0, 573.0, 549.0, 501.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 300.0, 280.0, 293.0, 248.0, 247.0, 295.0, 284.0, 282.0, 297.0, 278.0, 304.0, 276.0, 294.0, 288.0, 288.0, 299.0, 268.0, 299.0, 280.0, 285.0, 297.0, 294.0, 288.0, 291.0, 288.0, 277.0, 253.0, 297.0, 276.0, 273.0, 263.0, 275.0, 258.0, 288.0, 291.0, 89.0, 96.0, 301.0, 275.0, 290.0, 292.0, 288.0, 294.0, 276.0, 268.0, 282.0, 297.0, 290.0, 286.0, 281.0, 271.0, 293.0, 277.0, 275.0, 298.0, 316.0, 314.0, 286.0, 293.0, 256.0, 239.0, 287.0, 292.0, 260.0, 265.0, 283.0, 299.0, 289.0, 293.0, 294.0, 293.0, 293.0, 294.0, 236.0, 226.0, 295.0, 283.0, 287.0, 292.0, 268.0, 251.0, 295.0, 284.0, 285.0, 297.0, 281.0, 286.0, 293.0, 283.0, 289.0, 284.0, 285.0, 288.0, 290.0, 289.0, 280.0, 299.0, 286.0, 290.0, 295.0, 284.0, 285.0, 279.0, 264.0, 255.0, 279.0, 288.0, 295.0, 284.0, 273.0, 297.0, 286.0, 296.0, 290.0, 283.0, 284.0, 295.0, 249.0, 278.0, 288.0, 294.0, 277.0, 299.0, 282.0, 291.0, 290.0, 280.0, 260.0, 255.0, 278.0, 298.0, 280.0, 302.0, 294.0, 285.0, 299.0, 283.0, 271.0, 262.0, 284.0, 280.0, 309.0, 321.0, 294.0, 285.0, 291.0, 296.0, 283.0, 296.0, 289.0, 304.0, 294.0, 288.0, 286.0, 296.0, 295.0, 289.0, 283.0, 299.0, 280.0, 293.0, 263.0, 250.0, 274.0, 296.0, 293.0, 294.0, 256.0, 251.0, 295.0, 284.0, 285.0, 279.0, 245.0, 271.0, 288.0, 291.0, 284.0, 295.0, 284.0, 292.0, 289.0, 290.0, 293.0, 286.0, 276.0, 300.0, 299.0, 280.0, 253.0, 277.0, 278.0, 295.0, 270.0, 279.0, 264.0, 237.0, 286.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4027851249897816, "mean_processing_ms": 0.3483803806446401, "mean_inference_ms": 1.9551724322503146}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3792000, "num_steps_sampled": 2022400, "sample_time_ms": 20413.446, "load_time_ms": 37.245, "grad_time_ms": 8954.782, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005850760731846094, "policy_loss": -0.002336603356525302, "vf_loss": 87.48675537109375, "vf_explained_var": 0.7656591534614563, "kl": 0.0021419422701001167, "entropy": 1.1226191520690918, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2022400, "episodes_total": 5056, "training_iteration": 158, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-03-54", "timestamp": 1660251834, "time_this_iter_s": 30.44686508178711, "time_total_s": 10251.93600320816, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10251.93600320816, "timesteps_since_restore": 2022400, "iterations_since_restore": 158, "perf": {"cpu_util_percent": 37.25348837209302, "ram_util_percent": 58.44883720930233}}
-{"episode_reward_max": 630.0, "episode_reward_min": 185.0, "episode_reward_mean": 564.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 282.115}, "custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.03, "shaped_reward_min": 65, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.88, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.41, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.6, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.77, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.4, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.08, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.67, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.19, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.95, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.36, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.31, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.28, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.08, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.67, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.08, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.67, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 582.0, 573.0, 561.0, 584.0, 489.0, 573.0, 512.0, 582.0, 582.0, 576.0, 539.0, 570.0, 519.0, 627.0, 579.0, 518.0, 516.0, 576.0, 582.0, 582.0, 587.0, 582.0, 582.0, 576.0, 582.0, 573.0, 579.0, 579.0, 582.0, 570.0, 630.0, 515.0, 576.0, 582.0, 579.0, 582.0, 533.0, 564.0, 630.0, 579.0, 587.0, 579.0, 593.0, 582.0, 582.0, 584.0, 582.0, 573.0, 513.0, 570.0, 587.0, 507.0, 579.0, 564.0, 516.0, 579.0, 579.0, 576.0, 579.0, 579.0, 576.0, 579.0, 530.0, 573.0, 549.0, 501.0, 582.0, 576.0, 573.0, 495.0, 579.0, 579.0, 582.0, 570.0, 576.0, 567.0, 579.0, 582.0, 582.0, 579.0, 530.0, 573.0, 536.0, 533.0, 579.0, 185.0, 576.0, 582.0, 582.0, 544.0, 579.0, 576.0, 552.0, 570.0, 573.0, 630.0, 579.0, 495.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [309.0, 278.0, 294.0, 288.0, 287.0, 286.0, 297.0, 264.0, 291.0, 293.0, 240.0, 249.0, 292.0, 281.0, 262.0, 250.0, 297.0, 285.0, 285.0, 297.0, 279.0, 297.0, 262.0, 277.0, 287.0, 283.0, 266.0, 253.0, 321.0, 306.0, 286.0, 293.0, 272.0, 246.0, 274.0, 242.0, 282.0, 294.0, 284.0, 298.0, 288.0, 294.0, 285.0, 302.0, 295.0, 287.0, 288.0, 294.0, 279.0, 297.0, 289.0, 293.0, 277.0, 296.0, 286.0, 293.0, 304.0, 275.0, 290.0, 292.0, 294.0, 276.0, 326.0, 304.0, 260.0, 255.0, 278.0, 298.0, 280.0, 302.0, 294.0, 285.0, 299.0, 283.0, 271.0, 262.0, 284.0, 280.0, 309.0, 321.0, 294.0, 285.0, 291.0, 296.0, 283.0, 296.0, 289.0, 304.0, 294.0, 288.0, 286.0, 296.0, 295.0, 289.0, 283.0, 299.0, 280.0, 293.0, 263.0, 250.0, 274.0, 296.0, 293.0, 294.0, 256.0, 251.0, 295.0, 284.0, 285.0, 279.0, 245.0, 271.0, 288.0, 291.0, 284.0, 295.0, 284.0, 292.0, 289.0, 290.0, 293.0, 286.0, 276.0, 300.0, 299.0, 280.0, 253.0, 277.0, 278.0, 295.0, 270.0, 279.0, 264.0, 237.0, 286.0, 296.0, 276.0, 300.0, 280.0, 293.0, 248.0, 247.0, 295.0, 284.0, 282.0, 297.0, 278.0, 304.0, 276.0, 294.0, 288.0, 288.0, 299.0, 268.0, 299.0, 280.0, 285.0, 297.0, 294.0, 288.0, 291.0, 288.0, 277.0, 253.0, 297.0, 276.0, 273.0, 263.0, 275.0, 258.0, 288.0, 291.0, 89.0, 96.0, 301.0, 275.0, 290.0, 292.0, 288.0, 294.0, 276.0, 268.0, 282.0, 297.0, 290.0, 286.0, 281.0, 271.0, 293.0, 277.0, 275.0, 298.0, 316.0, 314.0, 286.0, 293.0, 256.0, 239.0, 287.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3967794054069242, "mean_processing_ms": 0.3471925853842631, "mean_inference_ms": 1.9496805791927851}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3816000, "num_steps_sampled": 2035200, "sample_time_ms": 20765.512, "load_time_ms": 37.179, "grad_time_ms": 8845.352, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005116061773151159, "policy_loss": -0.0030946088954806328, "vf_loss": 87.75751495361328, "vf_explained_var": 0.7570715546607971, "kl": 0.0022622861433774233, "entropy": 1.1301772594451904, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2035200, "episodes_total": 5088, "training_iteration": 159, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-04-27", "timestamp": 1660251867, "time_this_iter_s": 32.650943994522095, "time_total_s": 10284.586947202682, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10284.586947202682, "timesteps_since_restore": 2035200, "iterations_since_restore": 159, "perf": {"cpu_util_percent": 33.56739130434783, "ram_util_percent": 58.49565217391306}}
-{"episode_reward_max": 630.0, "episode_reward_min": 185.0, "episode_reward_mean": 562.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 281.46}, "custom_metrics": {"sparse_reward_mean": 195.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 172.92, "shaped_reward_min": 65, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.87, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.29, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.57, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.7, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.54, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.07, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.62, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.05, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.4, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.29, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.27, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.07, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.62, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.07, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.62, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [237.0, 573.0, 590.0, 573.0, 582.0, 576.0, 579.0, 576.0, 587.0, 582.0, 582.0, 579.0, 576.0, 579.0, 521.0, 582.0, 570.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 579.0, 582.0, 579.0, 513.0, 582.0, 582.0, 582.0, 576.0, 582.0, 573.0, 549.0, 501.0, 582.0, 576.0, 573.0, 495.0, 579.0, 579.0, 582.0, 570.0, 576.0, 567.0, 579.0, 582.0, 582.0, 579.0, 530.0, 573.0, 536.0, 533.0, 579.0, 185.0, 576.0, 582.0, 582.0, 544.0, 579.0, 576.0, 552.0, 570.0, 573.0, 630.0, 579.0, 495.0, 579.0, 587.0, 582.0, 573.0, 561.0, 584.0, 489.0, 573.0, 512.0, 582.0, 582.0, 576.0, 539.0, 570.0, 519.0, 627.0, 579.0, 518.0, 516.0, 576.0, 582.0, 582.0, 587.0, 582.0, 582.0, 576.0, 582.0, 573.0, 579.0, 579.0, 582.0, 570.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [122.0, 115.0, 280.0, 293.0, 291.0, 299.0, 293.0, 280.0, 291.0, 291.0, 291.0, 285.0, 292.0, 287.0, 287.0, 289.0, 285.0, 302.0, 299.0, 283.0, 285.0, 297.0, 283.0, 296.0, 289.0, 287.0, 291.0, 288.0, 269.0, 252.0, 287.0, 295.0, 269.0, 301.0, 282.0, 294.0, 293.0, 289.0, 293.0, 289.0, 293.0, 289.0, 289.0, 293.0, 291.0, 288.0, 281.0, 298.0, 275.0, 307.0, 294.0, 285.0, 256.0, 257.0, 295.0, 287.0, 280.0, 302.0, 285.0, 297.0, 291.0, 285.0, 302.0, 280.0, 278.0, 295.0, 270.0, 279.0, 264.0, 237.0, 286.0, 296.0, 276.0, 300.0, 280.0, 293.0, 248.0, 247.0, 295.0, 284.0, 282.0, 297.0, 278.0, 304.0, 276.0, 294.0, 288.0, 288.0, 299.0, 268.0, 299.0, 280.0, 285.0, 297.0, 294.0, 288.0, 291.0, 288.0, 277.0, 253.0, 297.0, 276.0, 273.0, 263.0, 275.0, 258.0, 288.0, 291.0, 89.0, 96.0, 301.0, 275.0, 290.0, 292.0, 288.0, 294.0, 276.0, 268.0, 282.0, 297.0, 290.0, 286.0, 281.0, 271.0, 293.0, 277.0, 275.0, 298.0, 316.0, 314.0, 286.0, 293.0, 256.0, 239.0, 287.0, 292.0, 309.0, 278.0, 294.0, 288.0, 287.0, 286.0, 297.0, 264.0, 291.0, 293.0, 240.0, 249.0, 292.0, 281.0, 262.0, 250.0, 297.0, 285.0, 285.0, 297.0, 279.0, 297.0, 262.0, 277.0, 287.0, 283.0, 266.0, 253.0, 321.0, 306.0, 286.0, 293.0, 272.0, 246.0, 274.0, 242.0, 282.0, 294.0, 284.0, 298.0, 288.0, 294.0, 285.0, 302.0, 295.0, 287.0, 288.0, 294.0, 279.0, 297.0, 289.0, 293.0, 277.0, 296.0, 286.0, 293.0, 304.0, 275.0, 290.0, 292.0, 294.0, 276.0, 326.0, 304.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3908344800853478, "mean_processing_ms": 0.3460119024024818, "mean_inference_ms": 1.9441766821864475}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3840000, "num_steps_sampled": 2048000, "sample_time_ms": 20695.053, "load_time_ms": 36.846, "grad_time_ms": 8667.722, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033892595674842596, "policy_loss": -0.004953155294060707, "vf_loss": 89.03093719482422, "vf_explained_var": 0.7680574059486389, "kl": 0.0018749010050669312, "entropy": 1.1213653087615967, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2048000, "episodes_total": 5120, "training_iteration": 160, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-04-56", "timestamp": 1660251896, "time_this_iter_s": 28.625488996505737, "time_total_s": 10313.212436199188, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10313.212436199188, "timesteps_since_restore": 2048000, "iterations_since_restore": 160, "perf": {"cpu_util_percent": 34.958536585365856, "ram_util_percent": 58.548780487804876}}
-{"episode_reward_max": 630.0, "episode_reward_min": 237.0, "episode_reward_mean": 567.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 115.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 283.645}, "custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.49, "shaped_reward_min": 77, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.94, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.55, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.56, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.93, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.03, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.83, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.23, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.49, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.29, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.03, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.83, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.03, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.83, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 567.0, 582.0, 582.0, 579.0, 576.0, 587.0, 504.0, 579.0, 522.0, 516.0, 579.0, 579.0, 584.0, 587.0, 525.0, 630.0, 582.0, 408.0, 582.0, 530.0, 630.0, 570.0, 579.0, 582.0, 576.0, 582.0, 573.0, 561.0, 582.0, 576.0, 573.0, 630.0, 579.0, 495.0, 579.0, 587.0, 582.0, 573.0, 561.0, 584.0, 489.0, 573.0, 512.0, 582.0, 582.0, 576.0, 539.0, 570.0, 519.0, 627.0, 579.0, 518.0, 516.0, 576.0, 582.0, 582.0, 587.0, 582.0, 582.0, 576.0, 582.0, 573.0, 579.0, 579.0, 582.0, 570.0, 630.0, 237.0, 573.0, 590.0, 573.0, 582.0, 576.0, 579.0, 576.0, 587.0, 582.0, 582.0, 579.0, 576.0, 579.0, 521.0, 582.0, 570.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 579.0, 582.0, 579.0, 513.0, 582.0, 582.0, 582.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 289.0, 277.0, 290.0, 291.0, 291.0, 279.0, 303.0, 295.0, 284.0, 294.0, 282.0, 295.0, 292.0, 257.0, 247.0, 289.0, 290.0, 250.0, 272.0, 249.0, 267.0, 290.0, 289.0, 302.0, 277.0, 299.0, 285.0, 304.0, 283.0, 267.0, 258.0, 299.0, 331.0, 285.0, 297.0, 205.0, 203.0, 291.0, 291.0, 269.0, 261.0, 313.0, 317.0, 288.0, 282.0, 289.0, 290.0, 288.0, 294.0, 293.0, 283.0, 298.0, 284.0, 288.0, 285.0, 284.0, 277.0, 296.0, 286.0, 283.0, 293.0, 291.0, 282.0, 316.0, 314.0, 286.0, 293.0, 256.0, 239.0, 287.0, 292.0, 309.0, 278.0, 294.0, 288.0, 287.0, 286.0, 297.0, 264.0, 291.0, 293.0, 240.0, 249.0, 292.0, 281.0, 262.0, 250.0, 297.0, 285.0, 285.0, 297.0, 279.0, 297.0, 262.0, 277.0, 287.0, 283.0, 266.0, 253.0, 321.0, 306.0, 286.0, 293.0, 272.0, 246.0, 274.0, 242.0, 282.0, 294.0, 284.0, 298.0, 288.0, 294.0, 285.0, 302.0, 295.0, 287.0, 288.0, 294.0, 279.0, 297.0, 289.0, 293.0, 277.0, 296.0, 286.0, 293.0, 304.0, 275.0, 290.0, 292.0, 294.0, 276.0, 326.0, 304.0, 122.0, 115.0, 280.0, 293.0, 291.0, 299.0, 293.0, 280.0, 291.0, 291.0, 291.0, 285.0, 292.0, 287.0, 287.0, 289.0, 285.0, 302.0, 299.0, 283.0, 285.0, 297.0, 283.0, 296.0, 289.0, 287.0, 291.0, 288.0, 269.0, 252.0, 287.0, 295.0, 269.0, 301.0, 282.0, 294.0, 293.0, 289.0, 293.0, 289.0, 293.0, 289.0, 289.0, 293.0, 291.0, 288.0, 281.0, 298.0, 275.0, 307.0, 294.0, 285.0, 256.0, 257.0, 295.0, 287.0, 280.0, 302.0, 285.0, 297.0, 291.0, 285.0, 302.0, 280.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3849427098912996, "mean_processing_ms": 0.3448405344667729, "mean_inference_ms": 1.9385368397952782}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3864000, "num_steps_sampled": 2060800, "sample_time_ms": 20359.83, "load_time_ms": 36.714, "grad_time_ms": 8574.991, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003171335905790329, "policy_loss": -0.005784957204014063, "vf_loss": 95.20501708984375, "vf_explained_var": 0.7632928490638733, "kl": 0.001863123499788344, "entropy": 1.1284128427505493, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2060800, "episodes_total": 5152, "training_iteration": 161, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-05-24", "timestamp": 1660251924, "time_this_iter_s": 28.188406705856323, "time_total_s": 10341.400842905045, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10341.400842905045, "timesteps_since_restore": 2060800, "iterations_since_restore": 161, "perf": {"cpu_util_percent": 36.3875, "ram_util_percent": 58.585}}
-{"episode_reward_max": 630.0, "episode_reward_min": 237.0, "episode_reward_mean": 568.36, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 115.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 284.18}, "custom_metrics": {"sparse_reward_mean": 196.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 175.16, "shaped_reward_min": 77, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.88, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.91, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.43, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.21, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.57, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.85, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.1, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.37, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.32, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.85, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.1, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.85, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.1, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 579.0, 582.0, 587.0, 582.0, 573.0, 576.0, 579.0, 573.0, 627.0, 558.0, 570.0, 579.0, 579.0, 573.0, 581.0, 582.0, 539.0, 582.0, 579.0, 582.0, 579.0, 570.0, 567.0, 522.0, 587.0, 564.0, 584.0, 507.0, 493.0, 570.0, 579.0, 579.0, 582.0, 570.0, 630.0, 237.0, 573.0, 590.0, 573.0, 582.0, 576.0, 579.0, 576.0, 587.0, 582.0, 582.0, 579.0, 576.0, 579.0, 521.0, 582.0, 570.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 579.0, 582.0, 579.0, 513.0, 582.0, 582.0, 582.0, 576.0, 582.0, 567.0, 567.0, 582.0, 582.0, 579.0, 576.0, 587.0, 504.0, 579.0, 522.0, 516.0, 579.0, 579.0, 584.0, 587.0, 525.0, 630.0, 582.0, 408.0, 582.0, 530.0, 630.0, 570.0, 579.0, 582.0, 576.0, 582.0, 573.0, 561.0, 582.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 285.0, 291.0, 288.0, 296.0, 286.0, 293.0, 294.0, 291.0, 291.0, 287.0, 286.0, 282.0, 294.0, 290.0, 289.0, 283.0, 290.0, 313.0, 314.0, 289.0, 269.0, 284.0, 286.0, 295.0, 284.0, 283.0, 296.0, 284.0, 289.0, 284.0, 297.0, 283.0, 299.0, 268.0, 271.0, 302.0, 280.0, 294.0, 285.0, 297.0, 285.0, 293.0, 286.0, 282.0, 288.0, 302.0, 265.0, 252.0, 270.0, 291.0, 296.0, 284.0, 280.0, 285.0, 299.0, 249.0, 258.0, 250.0, 243.0, 279.0, 291.0, 280.0, 299.0, 304.0, 275.0, 290.0, 292.0, 294.0, 276.0, 326.0, 304.0, 122.0, 115.0, 280.0, 293.0, 291.0, 299.0, 293.0, 280.0, 291.0, 291.0, 291.0, 285.0, 292.0, 287.0, 287.0, 289.0, 285.0, 302.0, 299.0, 283.0, 285.0, 297.0, 283.0, 296.0, 289.0, 287.0, 291.0, 288.0, 269.0, 252.0, 287.0, 295.0, 269.0, 301.0, 282.0, 294.0, 293.0, 289.0, 293.0, 289.0, 293.0, 289.0, 289.0, 293.0, 291.0, 288.0, 281.0, 298.0, 275.0, 307.0, 294.0, 285.0, 256.0, 257.0, 295.0, 287.0, 280.0, 302.0, 285.0, 297.0, 291.0, 285.0, 302.0, 280.0, 278.0, 289.0, 277.0, 290.0, 291.0, 291.0, 279.0, 303.0, 295.0, 284.0, 294.0, 282.0, 295.0, 292.0, 257.0, 247.0, 289.0, 290.0, 250.0, 272.0, 249.0, 267.0, 290.0, 289.0, 302.0, 277.0, 299.0, 285.0, 304.0, 283.0, 267.0, 258.0, 299.0, 331.0, 285.0, 297.0, 205.0, 203.0, 291.0, 291.0, 269.0, 261.0, 313.0, 317.0, 288.0, 282.0, 289.0, 290.0, 288.0, 294.0, 293.0, 283.0, 298.0, 284.0, 288.0, 285.0, 284.0, 277.0, 296.0, 286.0, 283.0, 293.0, 291.0, 282.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3791010078296764, "mean_processing_ms": 0.34367607505559905, "mean_inference_ms": 1.932410583140313}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3888000, "num_steps_sampled": 2073600, "sample_time_ms": 20389.495, "load_time_ms": 36.868, "grad_time_ms": 8516.869, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.007552561815828085, "policy_loss": -0.0015357719967141747, "vf_loss": 96.43359375, "vf_explained_var": 0.7504541277885437, "kl": 0.0026693844702094793, "entropy": 1.110058307647705, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2073600, "episodes_total": 5184, "training_iteration": 162, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-05-53", "timestamp": 1660251953, "time_this_iter_s": 29.546289205551147, "time_total_s": 10370.947132110596, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10370.947132110596, "timesteps_since_restore": 2073600, "iterations_since_restore": 162, "perf": {"cpu_util_percent": 34.892682926829266, "ram_util_percent": 58.55365853658536}}
-{"episode_reward_max": 630.0, "episode_reward_min": 345.0, "episode_reward_mean": 568.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 284.395}, "custom_metrics": {"sparse_reward_mean": 197.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.79, "shaped_reward_min": 105, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.0, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.92, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.5, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.25, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.41, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.01, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.21, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.51, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.58, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.69, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.3, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.23, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.65, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.01, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.01, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 582.0, 470.0, 564.0, 576.0, 576.0, 576.0, 582.0, 587.0, 579.0, 582.0, 579.0, 345.0, 570.0, 576.0, 582.0, 576.0, 576.0, 579.0, 576.0, 567.0, 567.0, 582.0, 579.0, 564.0, 570.0, 579.0, 582.0, 579.0, 576.0, 627.0, 582.0, 582.0, 576.0, 582.0, 567.0, 567.0, 582.0, 582.0, 579.0, 576.0, 587.0, 504.0, 579.0, 522.0, 516.0, 579.0, 579.0, 584.0, 587.0, 525.0, 630.0, 582.0, 408.0, 582.0, 530.0, 630.0, 570.0, 579.0, 582.0, 576.0, 582.0, 573.0, 561.0, 582.0, 576.0, 573.0, 576.0, 579.0, 582.0, 587.0, 582.0, 573.0, 576.0, 579.0, 573.0, 627.0, 558.0, 570.0, 579.0, 579.0, 573.0, 581.0, 582.0, 539.0, 582.0, 579.0, 582.0, 579.0, 570.0, 567.0, 522.0, 587.0, 564.0, 584.0, 507.0, 493.0, 570.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 301.0, 285.0, 294.0, 279.0, 303.0, 227.0, 243.0, 263.0, 301.0, 280.0, 296.0, 276.0, 300.0, 283.0, 293.0, 286.0, 296.0, 301.0, 286.0, 288.0, 291.0, 288.0, 294.0, 302.0, 277.0, 173.0, 172.0, 289.0, 281.0, 285.0, 291.0, 306.0, 276.0, 280.0, 296.0, 290.0, 286.0, 293.0, 286.0, 283.0, 293.0, 280.0, 287.0, 289.0, 278.0, 298.0, 284.0, 285.0, 294.0, 280.0, 284.0, 282.0, 288.0, 281.0, 298.0, 295.0, 287.0, 282.0, 297.0, 285.0, 291.0, 306.0, 321.0, 280.0, 302.0, 285.0, 297.0, 291.0, 285.0, 302.0, 280.0, 278.0, 289.0, 277.0, 290.0, 291.0, 291.0, 279.0, 303.0, 295.0, 284.0, 294.0, 282.0, 295.0, 292.0, 257.0, 247.0, 289.0, 290.0, 250.0, 272.0, 249.0, 267.0, 290.0, 289.0, 302.0, 277.0, 299.0, 285.0, 304.0, 283.0, 267.0, 258.0, 299.0, 331.0, 285.0, 297.0, 205.0, 203.0, 291.0, 291.0, 269.0, 261.0, 313.0, 317.0, 288.0, 282.0, 289.0, 290.0, 288.0, 294.0, 293.0, 283.0, 298.0, 284.0, 288.0, 285.0, 284.0, 277.0, 296.0, 286.0, 283.0, 293.0, 291.0, 282.0, 291.0, 285.0, 291.0, 288.0, 296.0, 286.0, 293.0, 294.0, 291.0, 291.0, 287.0, 286.0, 282.0, 294.0, 290.0, 289.0, 283.0, 290.0, 313.0, 314.0, 289.0, 269.0, 284.0, 286.0, 295.0, 284.0, 283.0, 296.0, 284.0, 289.0, 284.0, 297.0, 283.0, 299.0, 268.0, 271.0, 302.0, 280.0, 294.0, 285.0, 297.0, 285.0, 293.0, 286.0, 282.0, 288.0, 302.0, 265.0, 252.0, 270.0, 291.0, 296.0, 284.0, 280.0, 285.0, 299.0, 249.0, 258.0, 250.0, 243.0, 279.0, 291.0, 280.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3733429258954066, "mean_processing_ms": 0.3425317863430243, "mean_inference_ms": 1.9266299653449164}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3912000, "num_steps_sampled": 2086400, "sample_time_ms": 20824.632, "load_time_ms": 36.596, "grad_time_ms": 8758.37, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004362883511930704, "policy_loss": -0.003907353617250919, "vf_loss": 88.24420166015625, "vf_explained_var": 0.7741295695304871, "kl": 0.002105970401316881, "entropy": 1.1083542108535767, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2086400, "episodes_total": 5216, "training_iteration": 163, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-06-26", "timestamp": 1660251986, "time_this_iter_s": 32.18382000923157, "time_total_s": 10403.130952119827, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10403.130952119827, "timesteps_since_restore": 2086400, "iterations_since_restore": 163, "perf": {"cpu_util_percent": 33.87608695652174, "ram_util_percent": 58.582608695652176}}
-{"episode_reward_max": 630.0, "episode_reward_min": 336.0, "episode_reward_mean": 565.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 162.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 282.99}, "custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 173.98, "shaped_reward_min": 96, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.87, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.89, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.41, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.2, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.72, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.58, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.0, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.12, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.15, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.31, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.26, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.0, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.0, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 587.0, 501.0, 579.0, 573.0, 558.0, 582.0, 525.0, 579.0, 430.0, 576.0, 530.0, 519.0, 579.0, 587.0, 582.0, 336.0, 570.0, 465.0, 582.0, 582.0, 582.0, 573.0, 582.0, 576.0, 582.0, 582.0, 627.0, 587.0, 630.0, 579.0, 579.0, 561.0, 582.0, 576.0, 573.0, 576.0, 579.0, 582.0, 587.0, 582.0, 573.0, 576.0, 579.0, 573.0, 627.0, 558.0, 570.0, 579.0, 579.0, 573.0, 581.0, 582.0, 539.0, 582.0, 579.0, 582.0, 579.0, 570.0, 567.0, 522.0, 587.0, 564.0, 584.0, 507.0, 493.0, 570.0, 579.0, 582.0, 579.0, 582.0, 470.0, 564.0, 576.0, 576.0, 576.0, 582.0, 587.0, 579.0, 582.0, 579.0, 345.0, 570.0, 576.0, 582.0, 576.0, 576.0, 579.0, 576.0, 567.0, 567.0, 582.0, 579.0, 564.0, 570.0, 579.0, 582.0, 579.0, 576.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 295.0, 292.0, 255.0, 246.0, 279.0, 300.0, 283.0, 290.0, 285.0, 273.0, 301.0, 281.0, 264.0, 261.0, 285.0, 294.0, 218.0, 212.0, 286.0, 290.0, 263.0, 267.0, 272.0, 247.0, 300.0, 279.0, 304.0, 283.0, 301.0, 281.0, 162.0, 174.0, 279.0, 291.0, 231.0, 234.0, 276.0, 306.0, 286.0, 296.0, 281.0, 301.0, 288.0, 285.0, 290.0, 292.0, 291.0, 285.0, 298.0, 284.0, 296.0, 286.0, 311.0, 316.0, 290.0, 297.0, 319.0, 311.0, 289.0, 290.0, 293.0, 286.0, 284.0, 277.0, 296.0, 286.0, 283.0, 293.0, 291.0, 282.0, 291.0, 285.0, 291.0, 288.0, 296.0, 286.0, 293.0, 294.0, 291.0, 291.0, 287.0, 286.0, 282.0, 294.0, 290.0, 289.0, 283.0, 290.0, 313.0, 314.0, 289.0, 269.0, 284.0, 286.0, 295.0, 284.0, 283.0, 296.0, 284.0, 289.0, 284.0, 297.0, 283.0, 299.0, 268.0, 271.0, 302.0, 280.0, 294.0, 285.0, 297.0, 285.0, 293.0, 286.0, 282.0, 288.0, 302.0, 265.0, 252.0, 270.0, 291.0, 296.0, 284.0, 280.0, 285.0, 299.0, 249.0, 258.0, 250.0, 243.0, 279.0, 291.0, 280.0, 299.0, 281.0, 301.0, 285.0, 294.0, 279.0, 303.0, 227.0, 243.0, 263.0, 301.0, 280.0, 296.0, 276.0, 300.0, 283.0, 293.0, 286.0, 296.0, 301.0, 286.0, 288.0, 291.0, 288.0, 294.0, 302.0, 277.0, 173.0, 172.0, 289.0, 281.0, 285.0, 291.0, 306.0, 276.0, 280.0, 296.0, 290.0, 286.0, 293.0, 286.0, 283.0, 293.0, 280.0, 287.0, 289.0, 278.0, 298.0, 284.0, 285.0, 294.0, 280.0, 284.0, 282.0, 288.0, 281.0, 298.0, 295.0, 287.0, 282.0, 297.0, 285.0, 291.0, 306.0, 321.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3676747484724832, "mean_processing_ms": 0.341408599904806, "mean_inference_ms": 1.9212242933819834}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3936000, "num_steps_sampled": 2099200, "sample_time_ms": 20928.906, "load_time_ms": 37.45, "grad_time_ms": 9167.09, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0035528007429093122, "policy_loss": -0.005154869984835386, "vf_loss": 92.63870239257812, "vf_explained_var": 0.7672746181488037, "kl": 0.0020837958436459303, "entropy": 1.1124038696289062, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2099200, "episodes_total": 5248, "training_iteration": 164, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-06-59", "timestamp": 1660252019, "time_this_iter_s": 33.07875204086304, "time_total_s": 10436.20970416069, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10436.20970416069, "timesteps_since_restore": 2099200, "iterations_since_restore": 164, "perf": {"cpu_util_percent": 35.41276595744681, "ram_util_percent": 58.536170212765946}}
-{"episode_reward_max": 633.0, "episode_reward_min": 336.0, "episode_reward_mean": 565.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 162.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 282.81}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.02, "shaped_reward_min": 96, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.68, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.95, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.35, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.25, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.29, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.78, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.06, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.36, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.37, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.36, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.55, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.78, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.06, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.78, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.06, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 579.0, 576.0, 573.0, 587.0, 573.0, 633.0, 630.0, 630.0, 573.0, 582.0, 582.0, 582.0, 516.0, 579.0, 582.0, 627.0, 576.0, 416.0, 582.0, 582.0, 579.0, 558.0, 582.0, 576.0, 576.0, 525.0, 579.0, 579.0, 513.0, 582.0, 582.0, 507.0, 493.0, 570.0, 579.0, 582.0, 579.0, 582.0, 470.0, 564.0, 576.0, 576.0, 576.0, 582.0, 587.0, 579.0, 582.0, 579.0, 345.0, 570.0, 576.0, 582.0, 576.0, 576.0, 579.0, 576.0, 567.0, 567.0, 582.0, 579.0, 564.0, 570.0, 579.0, 582.0, 579.0, 576.0, 627.0, 579.0, 587.0, 501.0, 579.0, 573.0, 558.0, 582.0, 525.0, 579.0, 430.0, 576.0, 530.0, 519.0, 579.0, 587.0, 582.0, 336.0, 570.0, 465.0, 582.0, 582.0, 582.0, 573.0, 582.0, 576.0, 582.0, 582.0, 627.0, 587.0, 630.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 290.0, 283.0, 296.0, 295.0, 281.0, 305.0, 268.0, 297.0, 290.0, 296.0, 277.0, 314.0, 319.0, 319.0, 311.0, 308.0, 322.0, 291.0, 282.0, 296.0, 286.0, 297.0, 285.0, 285.0, 297.0, 266.0, 250.0, 295.0, 284.0, 291.0, 291.0, 324.0, 303.0, 298.0, 278.0, 213.0, 203.0, 287.0, 295.0, 296.0, 286.0, 283.0, 296.0, 279.0, 279.0, 285.0, 297.0, 282.0, 294.0, 285.0, 291.0, 274.0, 251.0, 293.0, 286.0, 289.0, 290.0, 267.0, 246.0, 283.0, 299.0, 299.0, 283.0, 249.0, 258.0, 250.0, 243.0, 279.0, 291.0, 280.0, 299.0, 281.0, 301.0, 285.0, 294.0, 279.0, 303.0, 227.0, 243.0, 263.0, 301.0, 280.0, 296.0, 276.0, 300.0, 283.0, 293.0, 286.0, 296.0, 301.0, 286.0, 288.0, 291.0, 288.0, 294.0, 302.0, 277.0, 173.0, 172.0, 289.0, 281.0, 285.0, 291.0, 306.0, 276.0, 280.0, 296.0, 290.0, 286.0, 293.0, 286.0, 283.0, 293.0, 280.0, 287.0, 289.0, 278.0, 298.0, 284.0, 285.0, 294.0, 280.0, 284.0, 282.0, 288.0, 281.0, 298.0, 295.0, 287.0, 282.0, 297.0, 285.0, 291.0, 306.0, 321.0, 287.0, 292.0, 295.0, 292.0, 255.0, 246.0, 279.0, 300.0, 283.0, 290.0, 285.0, 273.0, 301.0, 281.0, 264.0, 261.0, 285.0, 294.0, 218.0, 212.0, 286.0, 290.0, 263.0, 267.0, 272.0, 247.0, 300.0, 279.0, 304.0, 283.0, 301.0, 281.0, 162.0, 174.0, 279.0, 291.0, 231.0, 234.0, 276.0, 306.0, 286.0, 296.0, 281.0, 301.0, 288.0, 285.0, 290.0, 292.0, 291.0, 285.0, 298.0, 284.0, 296.0, 286.0, 311.0, 316.0, 290.0, 297.0, 319.0, 311.0, 289.0, 290.0, 293.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3620936512767547, "mean_processing_ms": 0.34030575499793914, "mean_inference_ms": 1.9161448666876886}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3960000, "num_steps_sampled": 2112000, "sample_time_ms": 21120.168, "load_time_ms": 37.608, "grad_time_ms": 9339.122, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019619378726929426, "policy_loss": -0.006335819140076637, "vf_loss": 88.54428100585938, "vf_explained_var": 0.7676218152046204, "kl": 0.0017338074976578355, "entropy": 1.1133431196212769, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2112000, "episodes_total": 5280, "training_iteration": 165, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-07-30", "timestamp": 1660252050, "time_this_iter_s": 31.535957098007202, "time_total_s": 10467.745661258698, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10467.745661258698, "timesteps_since_restore": 2112000, "iterations_since_restore": 165, "perf": {"cpu_util_percent": 34.425000000000004, "ram_util_percent": 58.65909090909092}}
-{"episode_reward_max": 633.0, "episode_reward_min": 336.0, "episode_reward_mean": 569.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 162.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.81}, "custom_metrics": {"sparse_reward_mean": 197.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 175.22, "shaped_reward_min": 96, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.54, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.19, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.24, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.44, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.67, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.3, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.43, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.38, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.34, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.67, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.3, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.67, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.3, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 567.0, 536.0, 579.0, 582.0, 582.0, 567.0, 587.0, 522.0, 582.0, 579.0, 578.0, 570.0, 582.0, 582.0, 582.0, 582.0, 582.0, 582.0, 570.0, 573.0, 573.0, 564.0, 576.0, 576.0, 573.0, 570.0, 579.0, 573.0, 579.0, 570.0, 579.0, 582.0, 579.0, 576.0, 627.0, 579.0, 587.0, 501.0, 579.0, 573.0, 558.0, 582.0, 525.0, 579.0, 430.0, 576.0, 530.0, 519.0, 579.0, 587.0, 582.0, 336.0, 570.0, 465.0, 582.0, 582.0, 582.0, 573.0, 582.0, 576.0, 582.0, 582.0, 627.0, 587.0, 630.0, 579.0, 579.0, 576.0, 579.0, 576.0, 573.0, 587.0, 573.0, 633.0, 630.0, 630.0, 573.0, 582.0, 582.0, 582.0, 516.0, 579.0, 582.0, 627.0, 576.0, 416.0, 582.0, 582.0, 579.0, 558.0, 582.0, 576.0, 576.0, 525.0, 579.0, 579.0, 513.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 289.0, 284.0, 283.0, 261.0, 275.0, 289.0, 290.0, 283.0, 299.0, 299.0, 283.0, 275.0, 292.0, 298.0, 289.0, 252.0, 270.0, 293.0, 289.0, 303.0, 276.0, 290.0, 288.0, 293.0, 277.0, 278.0, 304.0, 285.0, 297.0, 301.0, 281.0, 297.0, 285.0, 281.0, 301.0, 283.0, 299.0, 283.0, 287.0, 292.0, 281.0, 287.0, 286.0, 273.0, 291.0, 296.0, 280.0, 281.0, 295.0, 290.0, 283.0, 296.0, 274.0, 297.0, 282.0, 297.0, 276.0, 303.0, 276.0, 292.0, 278.0, 284.0, 295.0, 295.0, 287.0, 282.0, 297.0, 285.0, 291.0, 306.0, 321.0, 287.0, 292.0, 295.0, 292.0, 255.0, 246.0, 279.0, 300.0, 283.0, 290.0, 285.0, 273.0, 301.0, 281.0, 264.0, 261.0, 285.0, 294.0, 218.0, 212.0, 286.0, 290.0, 263.0, 267.0, 272.0, 247.0, 300.0, 279.0, 304.0, 283.0, 301.0, 281.0, 162.0, 174.0, 279.0, 291.0, 231.0, 234.0, 276.0, 306.0, 286.0, 296.0, 281.0, 301.0, 288.0, 285.0, 290.0, 292.0, 291.0, 285.0, 298.0, 284.0, 296.0, 286.0, 311.0, 316.0, 290.0, 297.0, 319.0, 311.0, 289.0, 290.0, 293.0, 286.0, 286.0, 290.0, 283.0, 296.0, 295.0, 281.0, 305.0, 268.0, 297.0, 290.0, 296.0, 277.0, 314.0, 319.0, 319.0, 311.0, 308.0, 322.0, 291.0, 282.0, 296.0, 286.0, 297.0, 285.0, 285.0, 297.0, 266.0, 250.0, 295.0, 284.0, 291.0, 291.0, 324.0, 303.0, 298.0, 278.0, 213.0, 203.0, 287.0, 295.0, 296.0, 286.0, 283.0, 296.0, 279.0, 279.0, 285.0, 297.0, 282.0, 294.0, 285.0, 291.0, 274.0, 251.0, 293.0, 286.0, 289.0, 290.0, 267.0, 246.0, 283.0, 299.0, 299.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3565665278792982, "mean_processing_ms": 0.33921520895760066, "mean_inference_ms": 1.9109392629722073}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3984000, "num_steps_sampled": 2124800, "sample_time_ms": 21230.051, "load_time_ms": 38.109, "grad_time_ms": 9623.189, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006461843382567167, "policy_loss": -0.0018003573641180992, "vf_loss": 88.1545181274414, "vf_explained_var": 0.7546200752258301, "kl": 0.00197615590877831, "entropy": 1.106500267982483, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2124800, "episodes_total": 5312, "training_iteration": 166, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-08-04", "timestamp": 1660252084, "time_this_iter_s": 33.33124303817749, "time_total_s": 10501.076904296875, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10501.076904296875, "timesteps_since_restore": 2124800, "iterations_since_restore": 166, "perf": {"cpu_util_percent": 33.48510638297872, "ram_util_percent": 58.49574468085109}}
-{"episode_reward_max": 633.0, "episode_reward_min": 416.0, "episode_reward_mean": 574.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 203.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 287.395}, "custom_metrics": {"sparse_reward_mean": 198.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 177.19, "shaped_reward_min": 136, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.54, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.32, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.6, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.86, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.42, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.78, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.32, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.3, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.7, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.25, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.86, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.42, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.86, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.42, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 576.0, 582.0, 573.0, 576.0, 576.0, 576.0, 582.0, 579.0, 579.0, 582.0, 582.0, 582.0, 579.0, 579.0, 582.0, 530.0, 579.0, 582.0, 579.0, 579.0, 525.0, 582.0, 579.0, 582.0, 582.0, 576.0, 582.0, 582.0, 522.0, 579.0, 587.0, 630.0, 579.0, 579.0, 576.0, 579.0, 576.0, 573.0, 587.0, 573.0, 633.0, 630.0, 630.0, 573.0, 582.0, 582.0, 582.0, 516.0, 579.0, 582.0, 627.0, 576.0, 416.0, 582.0, 582.0, 579.0, 558.0, 582.0, 576.0, 576.0, 525.0, 579.0, 579.0, 513.0, 582.0, 582.0, 573.0, 567.0, 536.0, 579.0, 582.0, 582.0, 567.0, 587.0, 522.0, 582.0, 579.0, 578.0, 570.0, 582.0, 582.0, 582.0, 582.0, 582.0, 582.0, 570.0, 573.0, 573.0, 564.0, 576.0, 576.0, 573.0, 570.0, 579.0, 573.0, 579.0, 570.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 301.0, 281.0, 293.0, 283.0, 294.0, 288.0, 288.0, 285.0, 292.0, 284.0, 282.0, 294.0, 291.0, 285.0, 293.0, 289.0, 292.0, 287.0, 288.0, 291.0, 301.0, 281.0, 295.0, 287.0, 291.0, 291.0, 297.0, 282.0, 294.0, 285.0, 293.0, 289.0, 269.0, 261.0, 279.0, 300.0, 298.0, 284.0, 270.0, 309.0, 288.0, 291.0, 263.0, 262.0, 293.0, 289.0, 303.0, 276.0, 286.0, 296.0, 301.0, 281.0, 286.0, 290.0, 296.0, 286.0, 300.0, 282.0, 272.0, 250.0, 303.0, 276.0, 290.0, 297.0, 319.0, 311.0, 289.0, 290.0, 293.0, 286.0, 286.0, 290.0, 283.0, 296.0, 295.0, 281.0, 305.0, 268.0, 297.0, 290.0, 296.0, 277.0, 314.0, 319.0, 319.0, 311.0, 308.0, 322.0, 291.0, 282.0, 296.0, 286.0, 297.0, 285.0, 285.0, 297.0, 266.0, 250.0, 295.0, 284.0, 291.0, 291.0, 324.0, 303.0, 298.0, 278.0, 213.0, 203.0, 287.0, 295.0, 296.0, 286.0, 283.0, 296.0, 279.0, 279.0, 285.0, 297.0, 282.0, 294.0, 285.0, 291.0, 274.0, 251.0, 293.0, 286.0, 289.0, 290.0, 267.0, 246.0, 283.0, 299.0, 299.0, 283.0, 284.0, 289.0, 284.0, 283.0, 261.0, 275.0, 289.0, 290.0, 283.0, 299.0, 299.0, 283.0, 275.0, 292.0, 298.0, 289.0, 252.0, 270.0, 293.0, 289.0, 303.0, 276.0, 290.0, 288.0, 293.0, 277.0, 278.0, 304.0, 285.0, 297.0, 301.0, 281.0, 297.0, 285.0, 281.0, 301.0, 283.0, 299.0, 283.0, 287.0, 292.0, 281.0, 287.0, 286.0, 273.0, 291.0, 296.0, 280.0, 281.0, 295.0, 290.0, 283.0, 296.0, 274.0, 297.0, 282.0, 297.0, 276.0, 303.0, 276.0, 292.0, 278.0, 284.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.351083451607247, "mean_processing_ms": 0.3381304952990823, "mean_inference_ms": 1.9054854328203157}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4008000, "num_steps_sampled": 2137600, "sample_time_ms": 21012.721, "load_time_ms": 38.367, "grad_time_ms": 10014.737, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006168690975755453, "policy_loss": -0.002181840827688575, "vf_loss": 88.96065521240234, "vf_explained_var": 0.762434184551239, "kl": 0.0017693521222099662, "entropy": 1.0910512208938599, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2137600, "episodes_total": 5344, "training_iteration": 167, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-08-35", "timestamp": 1660252115, "time_this_iter_s": 31.34629511833191, "time_total_s": 10532.423199415207, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10532.423199415207, "timesteps_since_restore": 2137600, "iterations_since_restore": 167, "perf": {"cpu_util_percent": 33.334090909090904, "ram_util_percent": 58.479545454545466}}
-{"episode_reward_max": 633.0, "episode_reward_min": 452.0, "episode_reward_mean": 571.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 285.905}, "custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.21, "shaped_reward_min": 132, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.71, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.73, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.4, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.13, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.01, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.11, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.25, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.41, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.43, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.39, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.01, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.11, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.01, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.11, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 570.0, 525.0, 582.0, 579.0, 452.0, 579.0, 567.0, 530.0, 576.0, 582.0, 582.0, 525.0, 576.0, 627.0, 573.0, 573.0, 527.0, 576.0, 579.0, 564.0, 633.0, 582.0, 567.0, 536.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 576.0, 579.0, 513.0, 582.0, 582.0, 573.0, 567.0, 536.0, 579.0, 582.0, 582.0, 567.0, 587.0, 522.0, 582.0, 579.0, 578.0, 570.0, 582.0, 582.0, 582.0, 582.0, 582.0, 582.0, 570.0, 573.0, 573.0, 564.0, 576.0, 576.0, 573.0, 570.0, 579.0, 573.0, 579.0, 570.0, 579.0, 579.0, 582.0, 576.0, 582.0, 573.0, 576.0, 576.0, 576.0, 582.0, 579.0, 579.0, 582.0, 582.0, 582.0, 579.0, 579.0, 582.0, 530.0, 579.0, 582.0, 579.0, 579.0, 525.0, 582.0, 579.0, 582.0, 582.0, 576.0, 582.0, 582.0, 522.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 299.0, 292.0, 278.0, 255.0, 270.0, 287.0, 295.0, 292.0, 287.0, 223.0, 229.0, 274.0, 305.0, 272.0, 295.0, 272.0, 258.0, 280.0, 296.0, 291.0, 291.0, 288.0, 294.0, 269.0, 256.0, 294.0, 282.0, 312.0, 315.0, 291.0, 282.0, 302.0, 271.0, 261.0, 266.0, 281.0, 295.0, 298.0, 281.0, 280.0, 284.0, 326.0, 307.0, 296.0, 286.0, 288.0, 279.0, 267.0, 269.0, 298.0, 281.0, 288.0, 291.0, 291.0, 288.0, 294.0, 285.0, 275.0, 301.0, 294.0, 282.0, 303.0, 273.0, 289.0, 290.0, 267.0, 246.0, 283.0, 299.0, 299.0, 283.0, 284.0, 289.0, 284.0, 283.0, 261.0, 275.0, 289.0, 290.0, 283.0, 299.0, 299.0, 283.0, 275.0, 292.0, 298.0, 289.0, 252.0, 270.0, 293.0, 289.0, 303.0, 276.0, 290.0, 288.0, 293.0, 277.0, 278.0, 304.0, 285.0, 297.0, 301.0, 281.0, 297.0, 285.0, 281.0, 301.0, 283.0, 299.0, 283.0, 287.0, 292.0, 281.0, 287.0, 286.0, 273.0, 291.0, 296.0, 280.0, 281.0, 295.0, 290.0, 283.0, 296.0, 274.0, 297.0, 282.0, 297.0, 276.0, 303.0, 276.0, 292.0, 278.0, 284.0, 295.0, 283.0, 296.0, 301.0, 281.0, 293.0, 283.0, 294.0, 288.0, 288.0, 285.0, 292.0, 284.0, 282.0, 294.0, 291.0, 285.0, 293.0, 289.0, 292.0, 287.0, 288.0, 291.0, 301.0, 281.0, 295.0, 287.0, 291.0, 291.0, 297.0, 282.0, 294.0, 285.0, 293.0, 289.0, 269.0, 261.0, 279.0, 300.0, 298.0, 284.0, 270.0, 309.0, 288.0, 291.0, 263.0, 262.0, 293.0, 289.0, 303.0, 276.0, 286.0, 296.0, 301.0, 281.0, 286.0, 290.0, 296.0, 286.0, 300.0, 282.0, 272.0, 250.0, 303.0, 276.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3456449831196562, "mean_processing_ms": 0.3370529700799381, "mean_inference_ms": 1.8997987412977424}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4032000, "num_steps_sampled": 2150400, "sample_time_ms": 20935.463, "load_time_ms": 38.009, "grad_time_ms": 9985.412, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0029837340116500854, "policy_loss": -0.005580740049481392, "vf_loss": 91.1910629272461, "vf_explained_var": 0.7490768432617188, "kl": 0.0017398769268766046, "entropy": 1.1092572212219238, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2150400, "episodes_total": 5376, "training_iteration": 168, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-09-04", "timestamp": 1660252144, "time_this_iter_s": 29.37734818458557, "time_total_s": 10561.800547599792, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10561.800547599792, "timesteps_since_restore": 2150400, "iterations_since_restore": 168, "perf": {"cpu_util_percent": 32.31428571428572, "ram_util_percent": 58.38571428571428}}
-{"episode_reward_max": 633.0, "episode_reward_min": 452.0, "episode_reward_mean": 570.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 285.355}, "custom_metrics": {"sparse_reward_mean": 197.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.91, "shaped_reward_min": 132, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.72, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.74, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.45, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.19, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.44, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.94, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.14, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.07, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.52, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.35, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.5, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.94, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.14, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.94, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.14, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 522.0, 576.0, 576.0, 582.0, 533.0, 536.0, 573.0, 570.0, 576.0, 579.0, 582.0, 576.0, 516.0, 579.0, 579.0, 579.0, 504.0, 582.0, 558.0, 579.0, 579.0, 576.0, 582.0, 582.0, 573.0, 573.0, 582.0, 576.0, 579.0, 579.0, 573.0, 579.0, 570.0, 579.0, 579.0, 582.0, 576.0, 582.0, 573.0, 576.0, 576.0, 576.0, 582.0, 579.0, 579.0, 582.0, 582.0, 582.0, 579.0, 579.0, 582.0, 530.0, 579.0, 582.0, 579.0, 579.0, 525.0, 582.0, 579.0, 582.0, 582.0, 576.0, 582.0, 582.0, 522.0, 579.0, 582.0, 570.0, 525.0, 582.0, 579.0, 452.0, 579.0, 567.0, 530.0, 576.0, 582.0, 582.0, 525.0, 576.0, 627.0, 573.0, 573.0, 527.0, 576.0, 579.0, 564.0, 633.0, 582.0, 567.0, 536.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 280.0, 299.0, 262.0, 260.0, 283.0, 293.0, 284.0, 292.0, 293.0, 289.0, 249.0, 284.0, 273.0, 263.0, 274.0, 299.0, 288.0, 282.0, 297.0, 279.0, 296.0, 283.0, 273.0, 309.0, 285.0, 291.0, 251.0, 265.0, 285.0, 294.0, 283.0, 296.0, 280.0, 299.0, 249.0, 255.0, 296.0, 286.0, 284.0, 274.0, 293.0, 286.0, 287.0, 292.0, 292.0, 284.0, 283.0, 299.0, 289.0, 293.0, 275.0, 298.0, 290.0, 283.0, 285.0, 297.0, 290.0, 286.0, 289.0, 290.0, 286.0, 293.0, 297.0, 276.0, 303.0, 276.0, 292.0, 278.0, 284.0, 295.0, 283.0, 296.0, 301.0, 281.0, 293.0, 283.0, 294.0, 288.0, 288.0, 285.0, 292.0, 284.0, 282.0, 294.0, 291.0, 285.0, 293.0, 289.0, 292.0, 287.0, 288.0, 291.0, 301.0, 281.0, 295.0, 287.0, 291.0, 291.0, 297.0, 282.0, 294.0, 285.0, 293.0, 289.0, 269.0, 261.0, 279.0, 300.0, 298.0, 284.0, 270.0, 309.0, 288.0, 291.0, 263.0, 262.0, 293.0, 289.0, 303.0, 276.0, 286.0, 296.0, 301.0, 281.0, 286.0, 290.0, 296.0, 286.0, 300.0, 282.0, 272.0, 250.0, 303.0, 276.0, 283.0, 299.0, 292.0, 278.0, 255.0, 270.0, 287.0, 295.0, 292.0, 287.0, 223.0, 229.0, 274.0, 305.0, 272.0, 295.0, 272.0, 258.0, 280.0, 296.0, 291.0, 291.0, 288.0, 294.0, 269.0, 256.0, 294.0, 282.0, 312.0, 315.0, 291.0, 282.0, 302.0, 271.0, 261.0, 266.0, 281.0, 295.0, 298.0, 281.0, 280.0, 284.0, 326.0, 307.0, 296.0, 286.0, 288.0, 279.0, 267.0, 269.0, 298.0, 281.0, 288.0, 291.0, 291.0, 288.0, 294.0, 285.0, 275.0, 301.0, 294.0, 282.0, 303.0, 273.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3402643108658208, "mean_processing_ms": 0.3359831351985412, "mean_inference_ms": 1.8939334033513233}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4056000, "num_steps_sampled": 2163200, "sample_time_ms": 20463.428, "load_time_ms": 38.087, "grad_time_ms": 10025.502, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005885738879442215, "policy_loss": -0.001977432519197464, "vf_loss": 84.17040252685547, "vf_explained_var": 0.7570996880531311, "kl": 0.0022582625970244408, "entropy": 1.1077399253845215, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2163200, "episodes_total": 5408, "training_iteration": 169, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-09-33", "timestamp": 1660252173, "time_this_iter_s": 28.335352182388306, "time_total_s": 10590.13589978218, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10590.13589978218, "timesteps_since_restore": 2163200, "iterations_since_restore": 169, "perf": {"cpu_util_percent": 29.0625, "ram_util_percent": 58.379999999999995}}
-{"episode_reward_max": 633.0, "episode_reward_min": 422.0, "episode_reward_mean": 569.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 209.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 284.605}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.61, "shaped_reward_min": 132, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.25, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.33, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.01, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.65, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.51, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.4, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.4, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.63, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.02, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.03, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.87, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.4, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.63, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.4, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.63, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 536.0, 561.0, 576.0, 570.0, 587.0, 582.0, 573.0, 582.0, 576.0, 579.0, 576.0, 573.0, 582.0, 579.0, 579.0, 582.0, 576.0, 579.0, 630.0, 536.0, 582.0, 587.0, 579.0, 582.0, 422.0, 587.0, 579.0, 576.0, 536.0, 573.0, 582.0, 582.0, 522.0, 579.0, 582.0, 570.0, 525.0, 582.0, 579.0, 452.0, 579.0, 567.0, 530.0, 576.0, 582.0, 582.0, 525.0, 576.0, 627.0, 573.0, 573.0, 527.0, 576.0, 579.0, 564.0, 633.0, 582.0, 567.0, 536.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 576.0, 579.0, 579.0, 522.0, 576.0, 576.0, 582.0, 533.0, 536.0, 573.0, 570.0, 576.0, 579.0, 582.0, 576.0, 516.0, 579.0, 579.0, 579.0, 504.0, 582.0, 558.0, 579.0, 579.0, 576.0, 582.0, 582.0, 573.0, 573.0, 582.0, 576.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 293.0, 283.0, 250.0, 286.0, 275.0, 286.0, 282.0, 294.0, 276.0, 294.0, 296.0, 291.0, 283.0, 299.0, 280.0, 293.0, 286.0, 296.0, 295.0, 281.0, 288.0, 291.0, 293.0, 283.0, 292.0, 281.0, 292.0, 290.0, 281.0, 298.0, 299.0, 280.0, 293.0, 289.0, 286.0, 290.0, 293.0, 286.0, 311.0, 319.0, 254.0, 282.0, 279.0, 303.0, 296.0, 291.0, 278.0, 301.0, 294.0, 288.0, 209.0, 213.0, 282.0, 305.0, 287.0, 292.0, 291.0, 285.0, 258.0, 278.0, 278.0, 295.0, 296.0, 286.0, 300.0, 282.0, 272.0, 250.0, 303.0, 276.0, 283.0, 299.0, 292.0, 278.0, 255.0, 270.0, 287.0, 295.0, 292.0, 287.0, 223.0, 229.0, 274.0, 305.0, 272.0, 295.0, 272.0, 258.0, 280.0, 296.0, 291.0, 291.0, 288.0, 294.0, 269.0, 256.0, 294.0, 282.0, 312.0, 315.0, 291.0, 282.0, 302.0, 271.0, 261.0, 266.0, 281.0, 295.0, 298.0, 281.0, 280.0, 284.0, 326.0, 307.0, 296.0, 286.0, 288.0, 279.0, 267.0, 269.0, 298.0, 281.0, 288.0, 291.0, 291.0, 288.0, 294.0, 285.0, 275.0, 301.0, 294.0, 282.0, 303.0, 273.0, 291.0, 288.0, 280.0, 299.0, 262.0, 260.0, 283.0, 293.0, 284.0, 292.0, 293.0, 289.0, 249.0, 284.0, 273.0, 263.0, 274.0, 299.0, 288.0, 282.0, 297.0, 279.0, 296.0, 283.0, 273.0, 309.0, 285.0, 291.0, 251.0, 265.0, 285.0, 294.0, 283.0, 296.0, 280.0, 299.0, 249.0, 255.0, 296.0, 286.0, 284.0, 274.0, 293.0, 286.0, 287.0, 292.0, 292.0, 284.0, 283.0, 299.0, 289.0, 293.0, 275.0, 298.0, 290.0, 283.0, 285.0, 297.0, 290.0, 286.0, 289.0, 290.0, 286.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 1.334950887862658, "mean_processing_ms": 0.3349244818889894, "mean_inference_ms": 1.8881606558059565}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4080000, "num_steps_sampled": 2176000, "sample_time_ms": 20370.044, "load_time_ms": 38.206, "grad_time_ms": 10005.991, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00501619465649128, "policy_loss": -0.0036706894170492887, "vf_loss": 92.4554214477539, "vf_explained_var": 0.7515974044799805, "kl": 0.0018303836695849895, "entropy": 1.1173133850097656, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2176000, "episodes_total": 5440, "training_iteration": 170, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-10-00", "timestamp": 1660252200, "time_this_iter_s": 27.505138874053955, "time_total_s": 10617.641038656235, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10617.641038656235, "timesteps_since_restore": 2176000, "iterations_since_restore": 170, "perf": {"cpu_util_percent": 30.13076923076923, "ram_util_percent": 58.446153846153834}}
-{"episode_reward_max": 630.0, "episode_reward_min": 422.0, "episode_reward_mean": 571.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 209.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 285.645}, "custom_metrics": {"sparse_reward_mean": 197.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.89, "shaped_reward_min": 141, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.63, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.26, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.6, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.46, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.74, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.52, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.52, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.97, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.89, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.74, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.52, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.74, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.52, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 579.0, 573.0, 587.0, 579.0, 576.0, 582.0, 587.0, 582.0, 582.0, 579.0, 579.0, 536.0, 582.0, 579.0, 576.0, 582.0, 579.0, 582.0, 582.0, 573.0, 501.0, 576.0, 522.0, 582.0, 582.0, 579.0, 576.0, 590.0, 539.0, 582.0, 582.0, 579.0, 576.0, 576.0, 576.0, 579.0, 579.0, 522.0, 576.0, 576.0, 582.0, 533.0, 536.0, 573.0, 570.0, 576.0, 579.0, 582.0, 576.0, 516.0, 579.0, 579.0, 579.0, 504.0, 582.0, 558.0, 579.0, 579.0, 576.0, 582.0, 582.0, 573.0, 573.0, 582.0, 576.0, 579.0, 579.0, 579.0, 576.0, 536.0, 561.0, 576.0, 570.0, 587.0, 582.0, 573.0, 582.0, 576.0, 579.0, 576.0, 573.0, 582.0, 579.0, 579.0, 582.0, 576.0, 579.0, 630.0, 536.0, 582.0, 587.0, 579.0, 582.0, 422.0, 587.0, 579.0, 576.0, 536.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [300.0, 287.0, 303.0, 276.0, 280.0, 293.0, 294.0, 293.0, 295.0, 284.0, 305.0, 271.0, 293.0, 289.0, 292.0, 295.0, 287.0, 295.0, 283.0, 299.0, 296.0, 283.0, 288.0, 291.0, 271.0, 265.0, 295.0, 287.0, 288.0, 291.0, 278.0, 298.0, 286.0, 296.0, 289.0, 290.0, 287.0, 295.0, 297.0, 285.0, 279.0, 294.0, 269.0, 232.0, 287.0, 289.0, 254.0, 268.0, 291.0, 291.0, 276.0, 306.0, 295.0, 284.0, 278.0, 298.0, 293.0, 297.0, 271.0, 268.0, 299.0, 283.0, 290.0, 292.0, 294.0, 285.0, 275.0, 301.0, 294.0, 282.0, 303.0, 273.0, 291.0, 288.0, 280.0, 299.0, 262.0, 260.0, 283.0, 293.0, 284.0, 292.0, 293.0, 289.0, 249.0, 284.0, 273.0, 263.0, 274.0, 299.0, 288.0, 282.0, 297.0, 279.0, 296.0, 283.0, 273.0, 309.0, 285.0, 291.0, 251.0, 265.0, 285.0, 294.0, 283.0, 296.0, 280.0, 299.0, 249.0, 255.0, 296.0, 286.0, 284.0, 274.0, 293.0, 286.0, 287.0, 292.0, 292.0, 284.0, 283.0, 299.0, 289.0, 293.0, 275.0, 298.0, 290.0, 283.0, 285.0, 297.0, 290.0, 286.0, 289.0, 290.0, 286.0, 293.0, 293.0, 286.0, 293.0, 283.0, 250.0, 286.0, 275.0, 286.0, 282.0, 294.0, 276.0, 294.0, 296.0, 291.0, 283.0, 299.0, 280.0, 293.0, 286.0, 296.0, 295.0, 281.0, 288.0, 291.0, 293.0, 283.0, 292.0, 281.0, 292.0, 290.0, 281.0, 298.0, 299.0, 280.0, 293.0, 289.0, 286.0, 290.0, 293.0, 286.0, 311.0, 319.0, 254.0, 282.0, 279.0, 303.0, 296.0, 291.0, 278.0, 301.0, 294.0, 288.0, 209.0, 213.0, 282.0, 305.0, 287.0, 292.0, 291.0, 285.0, 258.0, 278.0, 278.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3296971139486637, "mean_processing_ms": 0.333876638718542, "mean_inference_ms": 1.8823863210387035}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4104000, "num_steps_sampled": 2188800, "sample_time_ms": 20376.689, "load_time_ms": 38.395, "grad_time_ms": 9898.441, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001497833989560604, "policy_loss": -0.006948364432901144, "vf_loss": 90.00249481201172, "vf_explained_var": 0.7635095119476318, "kl": 0.0017910072347149253, "entropy": 1.1081151962280273, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2188800, "episodes_total": 5472, "training_iteration": 171, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-10-27", "timestamp": 1660252227, "time_this_iter_s": 27.183032989501953, "time_total_s": 10644.824071645737, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10644.824071645737, "timesteps_since_restore": 2188800, "iterations_since_restore": 171, "perf": {"cpu_util_percent": 32.57105263157895, "ram_util_percent": 58.3842105263158}}
-{"episode_reward_max": 630.0, "episode_reward_min": 422.0, "episode_reward_mean": 573.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 209.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 286.845}, "custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 178.09, "shaped_reward_min": 141, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.25, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.54, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.64, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.86, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.51, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.61, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.01, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.86, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.51, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.86, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.51, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 576.0, 582.0, 582.0, 630.0, 573.0, 579.0, 530.0, 576.0, 627.0, 533.0, 530.0, 579.0, 579.0, 579.0, 525.0, 630.0, 519.0, 530.0, 582.0, 627.0, 576.0, 587.0, 582.0, 582.0, 579.0, 582.0, 582.0, 582.0, 567.0, 579.0, 582.0, 576.0, 579.0, 579.0, 579.0, 576.0, 536.0, 561.0, 576.0, 570.0, 587.0, 582.0, 573.0, 582.0, 576.0, 579.0, 576.0, 573.0, 582.0, 579.0, 579.0, 582.0, 576.0, 579.0, 630.0, 536.0, 582.0, 587.0, 579.0, 582.0, 422.0, 587.0, 579.0, 576.0, 536.0, 573.0, 587.0, 579.0, 573.0, 587.0, 579.0, 576.0, 582.0, 587.0, 582.0, 582.0, 579.0, 579.0, 536.0, 582.0, 579.0, 576.0, 582.0, 579.0, 582.0, 582.0, 573.0, 501.0, 576.0, 522.0, 582.0, 582.0, 579.0, 576.0, 590.0, 539.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 304.0, 286.0, 293.0, 290.0, 286.0, 296.0, 286.0, 289.0, 293.0, 316.0, 314.0, 298.0, 275.0, 286.0, 293.0, 261.0, 269.0, 278.0, 298.0, 308.0, 319.0, 265.0, 268.0, 272.0, 258.0, 283.0, 296.0, 294.0, 285.0, 292.0, 287.0, 263.0, 262.0, 305.0, 325.0, 280.0, 239.0, 272.0, 258.0, 306.0, 276.0, 314.0, 313.0, 288.0, 288.0, 296.0, 291.0, 292.0, 290.0, 289.0, 293.0, 275.0, 304.0, 294.0, 288.0, 305.0, 277.0, 297.0, 285.0, 288.0, 279.0, 293.0, 286.0, 285.0, 297.0, 290.0, 286.0, 289.0, 290.0, 286.0, 293.0, 293.0, 286.0, 293.0, 283.0, 250.0, 286.0, 275.0, 286.0, 282.0, 294.0, 276.0, 294.0, 296.0, 291.0, 283.0, 299.0, 280.0, 293.0, 286.0, 296.0, 295.0, 281.0, 288.0, 291.0, 293.0, 283.0, 292.0, 281.0, 292.0, 290.0, 281.0, 298.0, 299.0, 280.0, 293.0, 289.0, 286.0, 290.0, 293.0, 286.0, 311.0, 319.0, 254.0, 282.0, 279.0, 303.0, 296.0, 291.0, 278.0, 301.0, 294.0, 288.0, 209.0, 213.0, 282.0, 305.0, 287.0, 292.0, 291.0, 285.0, 258.0, 278.0, 278.0, 295.0, 300.0, 287.0, 303.0, 276.0, 280.0, 293.0, 294.0, 293.0, 295.0, 284.0, 305.0, 271.0, 293.0, 289.0, 292.0, 295.0, 287.0, 295.0, 283.0, 299.0, 296.0, 283.0, 288.0, 291.0, 271.0, 265.0, 295.0, 287.0, 288.0, 291.0, 278.0, 298.0, 286.0, 296.0, 289.0, 290.0, 287.0, 295.0, 297.0, 285.0, 279.0, 294.0, 269.0, 232.0, 287.0, 289.0, 254.0, 268.0, 291.0, 291.0, 276.0, 306.0, 295.0, 284.0, 278.0, 298.0, 293.0, 297.0, 271.0, 268.0, 299.0, 283.0, 290.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3245227944112767, "mean_processing_ms": 0.33284763340426393, "mean_inference_ms": 1.876875864691374}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4128000, "num_steps_sampled": 2201600, "sample_time_ms": 20481.131, "load_time_ms": 38.136, "grad_time_ms": 9697.559, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0045767915435135365, "policy_loss": -0.0035035184118896723, "vf_loss": 86.42507934570312, "vf_explained_var": 0.7563931345939636, "kl": 0.002320564817637205, "entropy": 1.1244043111801147, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2201600, "episodes_total": 5504, "training_iteration": 172, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-10-56", "timestamp": 1660252256, "time_this_iter_s": 28.577091932296753, "time_total_s": 10673.401163578033, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10673.401163578033, "timesteps_since_restore": 2201600, "iterations_since_restore": 172, "perf": {"cpu_util_percent": 35.19024390243903, "ram_util_percent": 58.548780487804876}}
-{"episode_reward_max": 630.0, "episode_reward_min": 496.0, "episode_reward_mean": 574.03, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 232.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 287.015}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.63, "shaped_reward_min": 138, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.48, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.83, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.69, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.68, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.78, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.24, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.85, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.69, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.68, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.69, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.68, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 579.0, 579.0, 582.0, 498.0, 579.0, 579.0, 576.0, 561.0, 587.0, 570.0, 552.0, 587.0, 576.0, 579.0, 582.0, 576.0, 587.0, 576.0, 579.0, 579.0, 579.0, 582.0, 573.0, 570.0, 496.0, 579.0, 630.0, 576.0, 567.0, 582.0, 579.0, 576.0, 536.0, 573.0, 587.0, 579.0, 573.0, 587.0, 579.0, 576.0, 582.0, 587.0, 582.0, 582.0, 579.0, 579.0, 536.0, 582.0, 579.0, 576.0, 582.0, 579.0, 582.0, 582.0, 573.0, 501.0, 576.0, 522.0, 582.0, 582.0, 579.0, 576.0, 590.0, 539.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 582.0, 630.0, 573.0, 579.0, 530.0, 576.0, 627.0, 533.0, 530.0, 579.0, 579.0, 579.0, 525.0, 630.0, 519.0, 530.0, 582.0, 627.0, 576.0, 587.0, 582.0, 582.0, 579.0, 582.0, 582.0, 582.0, 567.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 282.0, 297.0, 291.0, 291.0, 241.0, 257.0, 289.0, 290.0, 290.0, 289.0, 280.0, 296.0, 275.0, 286.0, 301.0, 286.0, 281.0, 289.0, 271.0, 281.0, 302.0, 285.0, 276.0, 300.0, 295.0, 284.0, 292.0, 290.0, 291.0, 285.0, 295.0, 292.0, 299.0, 277.0, 289.0, 290.0, 293.0, 286.0, 284.0, 295.0, 290.0, 292.0, 285.0, 288.0, 291.0, 279.0, 253.0, 243.0, 297.0, 282.0, 318.0, 312.0, 283.0, 293.0, 291.0, 276.0, 286.0, 296.0, 287.0, 292.0, 291.0, 285.0, 258.0, 278.0, 278.0, 295.0, 300.0, 287.0, 303.0, 276.0, 280.0, 293.0, 294.0, 293.0, 295.0, 284.0, 305.0, 271.0, 293.0, 289.0, 292.0, 295.0, 287.0, 295.0, 283.0, 299.0, 296.0, 283.0, 288.0, 291.0, 271.0, 265.0, 295.0, 287.0, 288.0, 291.0, 278.0, 298.0, 286.0, 296.0, 289.0, 290.0, 287.0, 295.0, 297.0, 285.0, 279.0, 294.0, 269.0, 232.0, 287.0, 289.0, 254.0, 268.0, 291.0, 291.0, 276.0, 306.0, 295.0, 284.0, 278.0, 298.0, 293.0, 297.0, 271.0, 268.0, 299.0, 283.0, 290.0, 292.0, 278.0, 304.0, 286.0, 293.0, 290.0, 286.0, 296.0, 286.0, 289.0, 293.0, 316.0, 314.0, 298.0, 275.0, 286.0, 293.0, 261.0, 269.0, 278.0, 298.0, 308.0, 319.0, 265.0, 268.0, 272.0, 258.0, 283.0, 296.0, 294.0, 285.0, 292.0, 287.0, 263.0, 262.0, 305.0, 325.0, 280.0, 239.0, 272.0, 258.0, 306.0, 276.0, 314.0, 313.0, 288.0, 288.0, 296.0, 291.0, 292.0, 290.0, 289.0, 293.0, 275.0, 304.0, 294.0, 288.0, 305.0, 277.0, 297.0, 285.0, 288.0, 279.0, 293.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3194261704658106, "mean_processing_ms": 0.3318357537689677, "mean_inference_ms": 1.8715822466645085}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4152000, "num_steps_sampled": 2214400, "sample_time_ms": 20271.412, "load_time_ms": 38.227, "grad_time_ms": 9546.44, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0062603577971458435, "policy_loss": -0.0018654250307008624, "vf_loss": 86.83306121826172, "vf_explained_var": 0.7576972842216492, "kl": 0.0021647585090249777, "entropy": 1.1150306463241577, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2214400, "episodes_total": 5536, "training_iteration": 173, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-11-25", "timestamp": 1660252285, "time_this_iter_s": 28.57458209991455, "time_total_s": 10701.975745677948, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10701.975745677948, "timesteps_since_restore": 2214400, "iterations_since_restore": 173, "perf": {"cpu_util_percent": 30.26, "ram_util_percent": 58.657500000000006}}
-{"episode_reward_max": 633.0, "episode_reward_min": 496.0, "episode_reward_mean": 576.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 239.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 288.29}, "custom_metrics": {"sparse_reward_mean": 199.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.78, "shaped_reward_min": 138, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.4, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.65, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.13, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.01, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.53, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.86, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.38, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.64, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.38, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.53, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.86, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.53, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.86, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 579.0, 627.0, 573.0, 633.0, 582.0, 567.0, 530.0, 573.0, 564.0, 582.0, 573.0, 579.0, 582.0, 579.0, 558.0, 582.0, 582.0, 582.0, 576.0, 573.0, 582.0, 627.0, 630.0, 582.0, 582.0, 582.0, 573.0, 536.0, 582.0, 579.0, 582.0, 590.0, 539.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 582.0, 630.0, 573.0, 579.0, 530.0, 576.0, 627.0, 533.0, 530.0, 579.0, 579.0, 579.0, 525.0, 630.0, 519.0, 530.0, 582.0, 627.0, 576.0, 587.0, 582.0, 582.0, 579.0, 582.0, 582.0, 582.0, 567.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 498.0, 579.0, 579.0, 576.0, 561.0, 587.0, 570.0, 552.0, 587.0, 576.0, 579.0, 582.0, 576.0, 587.0, 576.0, 579.0, 579.0, 579.0, 582.0, 573.0, 570.0, 496.0, 579.0, 630.0, 576.0, 567.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 285.0, 290.0, 289.0, 298.0, 329.0, 270.0, 303.0, 308.0, 325.0, 301.0, 281.0, 279.0, 288.0, 268.0, 262.0, 296.0, 277.0, 286.0, 278.0, 289.0, 293.0, 285.0, 288.0, 294.0, 285.0, 301.0, 281.0, 295.0, 284.0, 281.0, 277.0, 291.0, 291.0, 308.0, 274.0, 286.0, 296.0, 300.0, 276.0, 284.0, 289.0, 289.0, 293.0, 316.0, 311.0, 321.0, 309.0, 286.0, 296.0, 297.0, 285.0, 298.0, 284.0, 283.0, 290.0, 270.0, 266.0, 285.0, 297.0, 292.0, 287.0, 293.0, 289.0, 293.0, 297.0, 271.0, 268.0, 299.0, 283.0, 290.0, 292.0, 278.0, 304.0, 286.0, 293.0, 290.0, 286.0, 296.0, 286.0, 289.0, 293.0, 316.0, 314.0, 298.0, 275.0, 286.0, 293.0, 261.0, 269.0, 278.0, 298.0, 308.0, 319.0, 265.0, 268.0, 272.0, 258.0, 283.0, 296.0, 294.0, 285.0, 292.0, 287.0, 263.0, 262.0, 305.0, 325.0, 280.0, 239.0, 272.0, 258.0, 306.0, 276.0, 314.0, 313.0, 288.0, 288.0, 296.0, 291.0, 292.0, 290.0, 289.0, 293.0, 275.0, 304.0, 294.0, 288.0, 305.0, 277.0, 297.0, 285.0, 288.0, 279.0, 293.0, 286.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 282.0, 297.0, 291.0, 291.0, 241.0, 257.0, 289.0, 290.0, 290.0, 289.0, 280.0, 296.0, 275.0, 286.0, 301.0, 286.0, 281.0, 289.0, 271.0, 281.0, 302.0, 285.0, 276.0, 300.0, 295.0, 284.0, 292.0, 290.0, 291.0, 285.0, 295.0, 292.0, 299.0, 277.0, 289.0, 290.0, 293.0, 286.0, 284.0, 295.0, 290.0, 292.0, 285.0, 288.0, 291.0, 279.0, 253.0, 243.0, 297.0, 282.0, 318.0, 312.0, 283.0, 293.0, 291.0, 276.0, 286.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3143927753552085, "mean_processing_ms": 0.33083733112110686, "mean_inference_ms": 1.8663998879774686}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4176000, "num_steps_sampled": 2227200, "sample_time_ms": 20085.918, "load_time_ms": 37.439, "grad_time_ms": 9177.291, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0001482805237174034, "policy_loss": -0.00769606651738286, "vf_loss": 81.13143920898438, "vf_explained_var": 0.764965295791626, "kl": 0.0018476974219083786, "entropy": 1.1307072639465332, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2227200, "episodes_total": 5568, "training_iteration": 174, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-11-52", "timestamp": 1660252312, "time_this_iter_s": 27.522704124450684, "time_total_s": 10729.498449802399, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10729.498449802399, "timesteps_since_restore": 2227200, "iterations_since_restore": 174, "perf": {"cpu_util_percent": 34.294871794871796, "ram_util_percent": 58.587179487179476}}
-{"episode_reward_max": 633.0, "episode_reward_min": 393.0, "episode_reward_mean": 573.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 196.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 286.51}, "custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.82, "shaped_reward_min": 113, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.37, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.56, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.07, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.82, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.43, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.68, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.37, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.58, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.3, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.26, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.43, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.68, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.43, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.68, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [507.0, 579.0, 582.0, 576.0, 582.0, 393.0, 465.0, 579.0, 582.0, 570.0, 627.0, 579.0, 573.0, 576.0, 579.0, 504.0, 579.0, 579.0, 576.0, 527.0, 579.0, 519.0, 579.0, 587.0, 576.0, 633.0, 579.0, 576.0, 582.0, 579.0, 524.0, 627.0, 582.0, 582.0, 567.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 498.0, 579.0, 579.0, 576.0, 561.0, 587.0, 570.0, 552.0, 587.0, 576.0, 579.0, 582.0, 576.0, 587.0, 576.0, 579.0, 579.0, 579.0, 582.0, 573.0, 570.0, 496.0, 579.0, 630.0, 576.0, 567.0, 582.0, 567.0, 579.0, 627.0, 573.0, 633.0, 582.0, 567.0, 530.0, 573.0, 564.0, 582.0, 573.0, 579.0, 582.0, 579.0, 558.0, 582.0, 582.0, 582.0, 576.0, 573.0, 582.0, 627.0, 630.0, 582.0, 582.0, 582.0, 573.0, 536.0, 582.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [239.0, 268.0, 286.0, 293.0, 283.0, 299.0, 284.0, 292.0, 297.0, 285.0, 197.0, 196.0, 229.0, 236.0, 294.0, 285.0, 285.0, 297.0, 289.0, 281.0, 317.0, 310.0, 301.0, 278.0, 278.0, 295.0, 282.0, 294.0, 284.0, 295.0, 262.0, 242.0, 297.0, 282.0, 290.0, 289.0, 295.0, 281.0, 272.0, 255.0, 288.0, 291.0, 260.0, 259.0, 287.0, 292.0, 294.0, 293.0, 286.0, 290.0, 309.0, 324.0, 285.0, 294.0, 285.0, 291.0, 297.0, 285.0, 288.0, 291.0, 270.0, 254.0, 305.0, 322.0, 305.0, 277.0, 297.0, 285.0, 288.0, 279.0, 293.0, 286.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 282.0, 297.0, 291.0, 291.0, 241.0, 257.0, 289.0, 290.0, 290.0, 289.0, 280.0, 296.0, 275.0, 286.0, 301.0, 286.0, 281.0, 289.0, 271.0, 281.0, 302.0, 285.0, 276.0, 300.0, 295.0, 284.0, 292.0, 290.0, 291.0, 285.0, 295.0, 292.0, 299.0, 277.0, 289.0, 290.0, 293.0, 286.0, 284.0, 295.0, 290.0, 292.0, 285.0, 288.0, 291.0, 279.0, 253.0, 243.0, 297.0, 282.0, 318.0, 312.0, 283.0, 293.0, 291.0, 276.0, 286.0, 296.0, 282.0, 285.0, 290.0, 289.0, 298.0, 329.0, 270.0, 303.0, 308.0, 325.0, 301.0, 281.0, 279.0, 288.0, 268.0, 262.0, 296.0, 277.0, 286.0, 278.0, 289.0, 293.0, 285.0, 288.0, 294.0, 285.0, 301.0, 281.0, 295.0, 284.0, 281.0, 277.0, 291.0, 291.0, 308.0, 274.0, 286.0, 296.0, 300.0, 276.0, 284.0, 289.0, 289.0, 293.0, 316.0, 311.0, 321.0, 309.0, 286.0, 296.0, 297.0, 285.0, 298.0, 284.0, 283.0, 290.0, 270.0, 266.0, 285.0, 297.0, 292.0, 287.0, 293.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3094165014431445, "mean_processing_ms": 0.3298474823059415, "mean_inference_ms": 1.8613034748518011}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4200000, "num_steps_sampled": 2240000, "sample_time_ms": 20009.535, "load_time_ms": 37.081, "grad_time_ms": 9049.935, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033403884153813124, "policy_loss": -0.004778089467436075, "vf_loss": 86.8664779663086, "vf_explained_var": 0.7622640132904053, "kl": 0.0018111681565642357, "entropy": 1.1363428831100464, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2240000, "episodes_total": 5600, "training_iteration": 175, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-12-22", "timestamp": 1660252342, "time_this_iter_s": 29.488188982009888, "time_total_s": 10758.986638784409, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10758.986638784409, "timesteps_since_restore": 2240000, "iterations_since_restore": 175, "perf": {"cpu_util_percent": 31.97380952380952, "ram_util_percent": 58.61666666666667}}
-{"episode_reward_max": 633.0, "episode_reward_min": 393.0, "episode_reward_mean": 573.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 196.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 286.835}, "custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.47, "shaped_reward_min": 113, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.4, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.7, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.58, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.29, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.57, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.55, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.2, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.82, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.77, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.57, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.55, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.57, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.55, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 582.0, 627.0, 582.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 579.0, 573.0, 576.0, 579.0, 530.0, 576.0, 579.0, 576.0, 525.0, 576.0, 519.0, 530.0, 576.0, 579.0, 582.0, 630.0, 576.0, 567.0, 582.0, 567.0, 579.0, 627.0, 573.0, 633.0, 582.0, 567.0, 530.0, 573.0, 564.0, 582.0, 573.0, 579.0, 582.0, 579.0, 558.0, 582.0, 582.0, 582.0, 576.0, 573.0, 582.0, 627.0, 630.0, 582.0, 582.0, 582.0, 573.0, 536.0, 582.0, 579.0, 582.0, 507.0, 579.0, 582.0, 576.0, 582.0, 393.0, 465.0, 579.0, 582.0, 570.0, 627.0, 579.0, 573.0, 576.0, 579.0, 504.0, 579.0, 579.0, 576.0, 527.0, 579.0, 519.0, 579.0, 587.0, 576.0, 633.0, 579.0, 576.0, 582.0, 579.0, 524.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [280.0, 302.0, 293.0, 289.0, 294.0, 288.0, 310.0, 317.0, 294.0, 288.0, 294.0, 288.0, 276.0, 311.0, 282.0, 297.0, 291.0, 288.0, 296.0, 286.0, 280.0, 299.0, 298.0, 281.0, 296.0, 286.0, 277.0, 299.0, 303.0, 279.0, 291.0, 288.0, 283.0, 299.0, 280.0, 299.0, 295.0, 278.0, 293.0, 283.0, 296.0, 283.0, 266.0, 264.0, 277.0, 299.0, 283.0, 296.0, 291.0, 285.0, 264.0, 261.0, 286.0, 290.0, 267.0, 252.0, 272.0, 258.0, 278.0, 298.0, 290.0, 289.0, 291.0, 291.0, 318.0, 312.0, 283.0, 293.0, 291.0, 276.0, 286.0, 296.0, 282.0, 285.0, 290.0, 289.0, 298.0, 329.0, 270.0, 303.0, 308.0, 325.0, 301.0, 281.0, 279.0, 288.0, 268.0, 262.0, 296.0, 277.0, 286.0, 278.0, 289.0, 293.0, 285.0, 288.0, 294.0, 285.0, 301.0, 281.0, 295.0, 284.0, 281.0, 277.0, 291.0, 291.0, 308.0, 274.0, 286.0, 296.0, 300.0, 276.0, 284.0, 289.0, 289.0, 293.0, 316.0, 311.0, 321.0, 309.0, 286.0, 296.0, 297.0, 285.0, 298.0, 284.0, 283.0, 290.0, 270.0, 266.0, 285.0, 297.0, 292.0, 287.0, 293.0, 289.0, 239.0, 268.0, 286.0, 293.0, 283.0, 299.0, 284.0, 292.0, 297.0, 285.0, 197.0, 196.0, 229.0, 236.0, 294.0, 285.0, 285.0, 297.0, 289.0, 281.0, 317.0, 310.0, 301.0, 278.0, 278.0, 295.0, 282.0, 294.0, 284.0, 295.0, 262.0, 242.0, 297.0, 282.0, 290.0, 289.0, 295.0, 281.0, 272.0, 255.0, 288.0, 291.0, 260.0, 259.0, 287.0, 292.0, 294.0, 293.0, 286.0, 290.0, 309.0, 324.0, 285.0, 294.0, 285.0, 291.0, 297.0, 285.0, 288.0, 291.0, 270.0, 254.0, 305.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 1.304511305303639, "mean_processing_ms": 0.3288745301367266, "mean_inference_ms": 1.8566250484766516}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4224000, "num_steps_sampled": 2252800, "sample_time_ms": 20239.261, "load_time_ms": 36.617, "grad_time_ms": 8693.262, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0060581061989068985, "policy_loss": -0.0023995088413357735, "vf_loss": 90.20238494873047, "vf_explained_var": 0.7652048468589783, "kl": 0.0019277030369266868, "entropy": 1.1252202987670898, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2252800, "episodes_total": 5632, "training_iteration": 176, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-12-54", "timestamp": 1660252374, "time_this_iter_s": 32.0580530166626, "time_total_s": 10791.044691801071, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10791.044691801071, "timesteps_since_restore": 2252800, "iterations_since_restore": 176, "perf": {"cpu_util_percent": 31.34666666666667, "ram_util_percent": 58.57333333333334}}
-{"episode_reward_max": 633.0, "episode_reward_min": 393.0, "episode_reward_mean": 571.4, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 196.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.7}, "custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.2, "shaped_reward_min": 113, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.35, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.28, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.61, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.36, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.59, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.44, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.84, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.24, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.2, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.59, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.44, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.59, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.44, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 525.0, 627.0, 570.0, 504.0, 579.0, 582.0, 579.0, 576.0, 570.0, 579.0, 579.0, 576.0, 582.0, 519.0, 573.0, 579.0, 573.0, 582.0, 579.0, 582.0, 582.0, 587.0, 519.0, 573.0, 579.0, 633.0, 590.0, 579.0, 573.0, 587.0, 536.0, 582.0, 579.0, 582.0, 507.0, 579.0, 582.0, 576.0, 582.0, 393.0, 465.0, 579.0, 582.0, 570.0, 627.0, 579.0, 573.0, 576.0, 579.0, 504.0, 579.0, 579.0, 576.0, 527.0, 579.0, 519.0, 579.0, 587.0, 576.0, 633.0, 579.0, 576.0, 582.0, 579.0, 524.0, 627.0, 582.0, 582.0, 582.0, 627.0, 582.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 579.0, 573.0, 576.0, 579.0, 530.0, 576.0, 579.0, 576.0, 525.0, 576.0, 519.0, 530.0, 576.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [312.0, 318.0, 290.0, 292.0, 272.0, 253.0, 314.0, 313.0, 275.0, 295.0, 261.0, 243.0, 298.0, 281.0, 291.0, 291.0, 299.0, 280.0, 294.0, 282.0, 287.0, 283.0, 288.0, 291.0, 288.0, 291.0, 288.0, 288.0, 302.0, 280.0, 267.0, 252.0, 286.0, 287.0, 288.0, 291.0, 285.0, 288.0, 283.0, 299.0, 290.0, 289.0, 298.0, 284.0, 289.0, 293.0, 296.0, 291.0, 276.0, 243.0, 300.0, 273.0, 286.0, 293.0, 309.0, 324.0, 302.0, 288.0, 292.0, 287.0, 289.0, 284.0, 292.0, 295.0, 270.0, 266.0, 285.0, 297.0, 292.0, 287.0, 293.0, 289.0, 239.0, 268.0, 286.0, 293.0, 283.0, 299.0, 284.0, 292.0, 297.0, 285.0, 197.0, 196.0, 229.0, 236.0, 294.0, 285.0, 285.0, 297.0, 289.0, 281.0, 317.0, 310.0, 301.0, 278.0, 278.0, 295.0, 282.0, 294.0, 284.0, 295.0, 262.0, 242.0, 297.0, 282.0, 290.0, 289.0, 295.0, 281.0, 272.0, 255.0, 288.0, 291.0, 260.0, 259.0, 287.0, 292.0, 294.0, 293.0, 286.0, 290.0, 309.0, 324.0, 285.0, 294.0, 285.0, 291.0, 297.0, 285.0, 288.0, 291.0, 270.0, 254.0, 305.0, 322.0, 280.0, 302.0, 293.0, 289.0, 294.0, 288.0, 310.0, 317.0, 294.0, 288.0, 294.0, 288.0, 276.0, 311.0, 282.0, 297.0, 291.0, 288.0, 296.0, 286.0, 280.0, 299.0, 298.0, 281.0, 296.0, 286.0, 277.0, 299.0, 303.0, 279.0, 291.0, 288.0, 283.0, 299.0, 280.0, 299.0, 295.0, 278.0, 293.0, 283.0, 296.0, 283.0, 266.0, 264.0, 277.0, 299.0, 283.0, 296.0, 291.0, 285.0, 264.0, 261.0, 286.0, 290.0, 267.0, 252.0, 272.0, 258.0, 278.0, 298.0, 290.0, 289.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2996756400527443, "mean_processing_ms": 0.3279171800323935, "mean_inference_ms": 1.852052219769451}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4248000, "num_steps_sampled": 2265600, "sample_time_ms": 20324.791, "load_time_ms": 36.647, "grad_time_ms": 8435.096, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008986306493170559, "policy_loss": -0.007334645837545395, "vf_loss": 87.94988250732422, "vf_explained_var": 0.7740858197212219, "kl": 0.001811654889024794, "entropy": 1.123410940170288, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2265600, "episodes_total": 5664, "training_iteration": 177, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-13-23", "timestamp": 1660252403, "time_this_iter_s": 29.61364197731018, "time_total_s": 10820.658333778381, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10820.658333778381, "timesteps_since_restore": 2265600, "iterations_since_restore": 177, "perf": {"cpu_util_percent": 35.069047619047616, "ram_util_percent": 58.67619047619048}}
-{"episode_reward_max": 633.0, "episode_reward_min": 504.0, "episode_reward_mean": 576.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 243.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.0}, "custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 178.0, "shaped_reward_min": 144, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.81, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.32, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.41, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.63, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.75, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.54, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.85, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.43, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.33, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.22, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.87, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.19, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.75, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.54, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.75, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.54, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 570.0, 519.0, 579.0, 582.0, 627.0, 576.0, 582.0, 579.0, 573.0, 582.0, 582.0, 527.0, 582.0, 579.0, 587.0, 630.0, 582.0, 576.0, 579.0, 570.0, 582.0, 582.0, 579.0, 525.0, 579.0, 576.0, 587.0, 584.0, 582.0, 573.0, 587.0, 582.0, 579.0, 524.0, 627.0, 582.0, 582.0, 582.0, 627.0, 582.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 579.0, 573.0, 576.0, 579.0, 530.0, 576.0, 579.0, 576.0, 525.0, 576.0, 519.0, 530.0, 576.0, 579.0, 582.0, 630.0, 582.0, 525.0, 627.0, 570.0, 504.0, 579.0, 582.0, 579.0, 576.0, 570.0, 579.0, 579.0, 576.0, 582.0, 519.0, 573.0, 579.0, 573.0, 582.0, 579.0, 582.0, 582.0, 587.0, 519.0, 573.0, 579.0, 633.0, 590.0, 579.0, 573.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 281.0, 289.0, 270.0, 249.0, 292.0, 287.0, 286.0, 296.0, 305.0, 322.0, 289.0, 287.0, 295.0, 287.0, 300.0, 279.0, 290.0, 283.0, 285.0, 297.0, 289.0, 293.0, 254.0, 273.0, 294.0, 288.0, 291.0, 288.0, 301.0, 286.0, 314.0, 316.0, 295.0, 287.0, 289.0, 287.0, 277.0, 302.0, 292.0, 278.0, 278.0, 304.0, 288.0, 294.0, 291.0, 288.0, 259.0, 266.0, 275.0, 304.0, 299.0, 277.0, 296.0, 291.0, 290.0, 294.0, 288.0, 294.0, 288.0, 285.0, 290.0, 297.0, 297.0, 285.0, 288.0, 291.0, 270.0, 254.0, 305.0, 322.0, 280.0, 302.0, 293.0, 289.0, 294.0, 288.0, 310.0, 317.0, 294.0, 288.0, 294.0, 288.0, 276.0, 311.0, 282.0, 297.0, 291.0, 288.0, 296.0, 286.0, 280.0, 299.0, 298.0, 281.0, 296.0, 286.0, 277.0, 299.0, 303.0, 279.0, 291.0, 288.0, 283.0, 299.0, 280.0, 299.0, 295.0, 278.0, 293.0, 283.0, 296.0, 283.0, 266.0, 264.0, 277.0, 299.0, 283.0, 296.0, 291.0, 285.0, 264.0, 261.0, 286.0, 290.0, 267.0, 252.0, 272.0, 258.0, 278.0, 298.0, 290.0, 289.0, 291.0, 291.0, 312.0, 318.0, 290.0, 292.0, 272.0, 253.0, 314.0, 313.0, 275.0, 295.0, 261.0, 243.0, 298.0, 281.0, 291.0, 291.0, 299.0, 280.0, 294.0, 282.0, 287.0, 283.0, 288.0, 291.0, 288.0, 291.0, 288.0, 288.0, 302.0, 280.0, 267.0, 252.0, 286.0, 287.0, 288.0, 291.0, 285.0, 288.0, 283.0, 299.0, 290.0, 289.0, 298.0, 284.0, 289.0, 293.0, 296.0, 291.0, 276.0, 243.0, 300.0, 273.0, 286.0, 293.0, 309.0, 324.0, 302.0, 288.0, 292.0, 287.0, 289.0, 284.0, 292.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2948917928576587, "mean_processing_ms": 0.3269695972321587, "mean_inference_ms": 1.8475779345693215}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4272000, "num_steps_sampled": 2278400, "sample_time_ms": 20480.726, "load_time_ms": 37.228, "grad_time_ms": 8437.297, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004881067667156458, "policy_loss": -0.003187847323715687, "vf_loss": 86.31526947021484, "vf_explained_var": 0.7646486163139343, "kl": 0.0018008003244176507, "entropy": 1.125217080116272, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2278400, "episodes_total": 5696, "training_iteration": 178, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-13-54", "timestamp": 1660252434, "time_this_iter_s": 30.965723037719727, "time_total_s": 10851.624056816101, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10851.624056816101, "timesteps_since_restore": 2278400, "iterations_since_restore": 178, "perf": {"cpu_util_percent": 34.43636363636364, "ram_util_percent": 58.54318181818183}}
-{"episode_reward_max": 633.0, "episode_reward_min": 504.0, "episode_reward_mean": 577.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 243.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.99}, "custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 178.38, "shaped_reward_min": 144, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.7, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.35, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.38, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.65, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.73, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.62, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.95, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.38, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.28, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.84, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.26, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.73, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.62, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.73, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.62, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 561.0, 579.0, 582.0, 627.0, 527.0, 579.0, 576.0, 582.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 561.0, 576.0, 587.0, 630.0, 582.0, 627.0, 579.0, 582.0, 579.0, 582.0, 582.0, 576.0, 582.0, 579.0, 579.0, 576.0, 530.0, 576.0, 579.0, 582.0, 630.0, 582.0, 525.0, 627.0, 570.0, 504.0, 579.0, 582.0, 579.0, 576.0, 570.0, 579.0, 579.0, 576.0, 582.0, 519.0, 573.0, 579.0, 573.0, 582.0, 579.0, 582.0, 582.0, 587.0, 519.0, 573.0, 579.0, 633.0, 590.0, 579.0, 573.0, 587.0, 582.0, 570.0, 519.0, 579.0, 582.0, 627.0, 576.0, 582.0, 579.0, 573.0, 582.0, 582.0, 527.0, 582.0, 579.0, 587.0, 630.0, 582.0, 576.0, 579.0, 570.0, 582.0, 582.0, 579.0, 525.0, 579.0, 576.0, 587.0, 584.0, 582.0, 573.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 296.0, 273.0, 288.0, 294.0, 285.0, 285.0, 297.0, 318.0, 309.0, 264.0, 263.0, 287.0, 292.0, 280.0, 296.0, 288.0, 294.0, 293.0, 289.0, 288.0, 294.0, 300.0, 279.0, 294.0, 285.0, 288.0, 291.0, 294.0, 288.0, 278.0, 301.0, 279.0, 282.0, 290.0, 286.0, 302.0, 285.0, 320.0, 310.0, 297.0, 285.0, 315.0, 312.0, 295.0, 284.0, 304.0, 278.0, 278.0, 301.0, 289.0, 293.0, 290.0, 292.0, 275.0, 301.0, 293.0, 289.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0, 272.0, 258.0, 278.0, 298.0, 290.0, 289.0, 291.0, 291.0, 312.0, 318.0, 290.0, 292.0, 272.0, 253.0, 314.0, 313.0, 275.0, 295.0, 261.0, 243.0, 298.0, 281.0, 291.0, 291.0, 299.0, 280.0, 294.0, 282.0, 287.0, 283.0, 288.0, 291.0, 288.0, 291.0, 288.0, 288.0, 302.0, 280.0, 267.0, 252.0, 286.0, 287.0, 288.0, 291.0, 285.0, 288.0, 283.0, 299.0, 290.0, 289.0, 298.0, 284.0, 289.0, 293.0, 296.0, 291.0, 276.0, 243.0, 300.0, 273.0, 286.0, 293.0, 309.0, 324.0, 302.0, 288.0, 292.0, 287.0, 289.0, 284.0, 292.0, 295.0, 289.0, 293.0, 281.0, 289.0, 270.0, 249.0, 292.0, 287.0, 286.0, 296.0, 305.0, 322.0, 289.0, 287.0, 295.0, 287.0, 300.0, 279.0, 290.0, 283.0, 285.0, 297.0, 289.0, 293.0, 254.0, 273.0, 294.0, 288.0, 291.0, 288.0, 301.0, 286.0, 314.0, 316.0, 295.0, 287.0, 289.0, 287.0, 277.0, 302.0, 292.0, 278.0, 278.0, 304.0, 288.0, 294.0, 291.0, 288.0, 259.0, 266.0, 275.0, 304.0, 299.0, 277.0, 296.0, 291.0, 290.0, 294.0, 288.0, 294.0, 288.0, 285.0, 290.0, 297.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2901493032290514, "mean_processing_ms": 0.3260293499521716, "mean_inference_ms": 1.842905806043276}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4296000, "num_steps_sampled": 2291200, "sample_time_ms": 20658.749, "load_time_ms": 37.127, "grad_time_ms": 8627.523, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005556942895054817, "policy_loss": -0.0025492331478744745, "vf_loss": 86.67485809326172, "vf_explained_var": 0.7664775848388672, "kl": 0.0018904004245996475, "entropy": 1.1226133108139038, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2291200, "episodes_total": 5728, "training_iteration": 179, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-14-26", "timestamp": 1660252466, "time_this_iter_s": 32.01629400253296, "time_total_s": 10883.640350818634, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10883.640350818634, "timesteps_since_restore": 2291200, "iterations_since_restore": 179, "perf": {"cpu_util_percent": 31.479999999999997, "ram_util_percent": 58.526666666666664}}
-{"episode_reward_max": 630.0, "episode_reward_min": 516.0, "episode_reward_mean": 578.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 289.07}, "custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 178.54, "shaped_reward_min": 156, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.34, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.69, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.3, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.82, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.62, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.88, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.82, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.62, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.82, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.62, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 567.0, 530.0, 627.0, 582.0, 579.0, 587.0, 579.0, 579.0, 579.0, 579.0, 582.0, 582.0, 576.0, 576.0, 516.0, 579.0, 587.0, 582.0, 570.0, 570.0, 519.0, 573.0, 582.0, 576.0, 579.0, 579.0, 576.0, 530.0, 582.0, 624.0, 590.0, 579.0, 573.0, 587.0, 582.0, 570.0, 519.0, 579.0, 582.0, 627.0, 576.0, 582.0, 579.0, 573.0, 582.0, 582.0, 527.0, 582.0, 579.0, 587.0, 630.0, 582.0, 576.0, 579.0, 570.0, 582.0, 582.0, 579.0, 525.0, 579.0, 576.0, 587.0, 584.0, 582.0, 573.0, 587.0, 587.0, 561.0, 579.0, 582.0, 627.0, 527.0, 579.0, 576.0, 582.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 561.0, 576.0, 587.0, 630.0, 582.0, 627.0, 579.0, 582.0, 579.0, 582.0, 582.0, 576.0, 582.0, 579.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 283.0, 289.0, 287.0, 288.0, 279.0, 270.0, 260.0, 309.0, 318.0, 298.0, 284.0, 276.0, 303.0, 291.0, 296.0, 298.0, 281.0, 283.0, 296.0, 284.0, 295.0, 277.0, 302.0, 297.0, 285.0, 294.0, 288.0, 293.0, 283.0, 284.0, 292.0, 263.0, 253.0, 290.0, 289.0, 302.0, 285.0, 297.0, 285.0, 291.0, 279.0, 294.0, 276.0, 261.0, 258.0, 282.0, 291.0, 293.0, 289.0, 293.0, 283.0, 280.0, 299.0, 282.0, 297.0, 298.0, 278.0, 269.0, 261.0, 285.0, 297.0, 298.0, 326.0, 302.0, 288.0, 292.0, 287.0, 289.0, 284.0, 292.0, 295.0, 289.0, 293.0, 281.0, 289.0, 270.0, 249.0, 292.0, 287.0, 286.0, 296.0, 305.0, 322.0, 289.0, 287.0, 295.0, 287.0, 300.0, 279.0, 290.0, 283.0, 285.0, 297.0, 289.0, 293.0, 254.0, 273.0, 294.0, 288.0, 291.0, 288.0, 301.0, 286.0, 314.0, 316.0, 295.0, 287.0, 289.0, 287.0, 277.0, 302.0, 292.0, 278.0, 278.0, 304.0, 288.0, 294.0, 291.0, 288.0, 259.0, 266.0, 275.0, 304.0, 299.0, 277.0, 296.0, 291.0, 290.0, 294.0, 288.0, 294.0, 288.0, 285.0, 290.0, 297.0, 291.0, 296.0, 273.0, 288.0, 294.0, 285.0, 285.0, 297.0, 318.0, 309.0, 264.0, 263.0, 287.0, 292.0, 280.0, 296.0, 288.0, 294.0, 293.0, 289.0, 288.0, 294.0, 300.0, 279.0, 294.0, 285.0, 288.0, 291.0, 294.0, 288.0, 278.0, 301.0, 279.0, 282.0, 290.0, 286.0, 302.0, 285.0, 320.0, 310.0, 297.0, 285.0, 315.0, 312.0, 295.0, 284.0, 304.0, 278.0, 278.0, 301.0, 289.0, 293.0, 290.0, 292.0, 275.0, 301.0, 293.0, 289.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2854556266765977, "mean_processing_ms": 0.3250985864852822, "mean_inference_ms": 1.838409331377913}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4320000, "num_steps_sampled": 2304000, "sample_time_ms": 20831.137, "load_time_ms": 37.325, "grad_time_ms": 8787.85, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003588956082239747, "policy_loss": -0.004645919892936945, "vf_loss": 88.00481414794922, "vf_explained_var": 0.7581232190132141, "kl": 0.0017625847831368446, "entropy": 1.131211280822754, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2304000, "episodes_total": 5760, "training_iteration": 180, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-14-57", "timestamp": 1660252497, "time_this_iter_s": 30.824997186660767, "time_total_s": 10914.465348005295, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10914.465348005295, "timesteps_since_restore": 2304000, "iterations_since_restore": 180, "perf": {"cpu_util_percent": 31.02954545454545, "ram_util_percent": 58.488636363636374}}
-{"episode_reward_max": 630.0, "episode_reward_min": 501.0, "episode_reward_mean": 575.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 246.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 287.89}, "custom_metrics": {"sparse_reward_mean": 199.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 177.38, "shaped_reward_min": 141, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.23, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.73, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.91, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.04, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.5, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.39, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.93, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.35, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.59, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.39, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.65, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.39, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.93, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.39, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.93, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [521.0, 576.0, 573.0, 539.0, 579.0, 582.0, 536.0, 630.0, 579.0, 582.0, 582.0, 579.0, 576.0, 582.0, 558.0, 579.0, 552.0, 576.0, 579.0, 587.0, 579.0, 579.0, 587.0, 567.0, 582.0, 576.0, 501.0, 504.0, 573.0, 587.0, 587.0, 579.0, 584.0, 582.0, 573.0, 587.0, 587.0, 561.0, 579.0, 582.0, 627.0, 527.0, 579.0, 576.0, 582.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 561.0, 576.0, 587.0, 630.0, 582.0, 627.0, 579.0, 582.0, 579.0, 582.0, 582.0, 576.0, 582.0, 579.0, 579.0, 576.0, 579.0, 576.0, 567.0, 530.0, 627.0, 582.0, 579.0, 587.0, 579.0, 579.0, 579.0, 579.0, 582.0, 582.0, 576.0, 576.0, 516.0, 579.0, 587.0, 582.0, 570.0, 570.0, 519.0, 573.0, 582.0, 576.0, 579.0, 579.0, 576.0, 530.0, 582.0, 624.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 254.0, 283.0, 293.0, 285.0, 288.0, 275.0, 264.0, 288.0, 291.0, 304.0, 278.0, 267.0, 269.0, 323.0, 307.0, 298.0, 281.0, 303.0, 279.0, 297.0, 285.0, 280.0, 299.0, 277.0, 299.0, 286.0, 296.0, 286.0, 272.0, 308.0, 271.0, 275.0, 277.0, 292.0, 284.0, 276.0, 303.0, 286.0, 301.0, 291.0, 288.0, 301.0, 278.0, 292.0, 295.0, 291.0, 276.0, 293.0, 289.0, 293.0, 283.0, 254.0, 247.0, 246.0, 258.0, 281.0, 292.0, 286.0, 301.0, 295.0, 292.0, 282.0, 297.0, 290.0, 294.0, 288.0, 294.0, 288.0, 285.0, 290.0, 297.0, 291.0, 296.0, 273.0, 288.0, 294.0, 285.0, 285.0, 297.0, 318.0, 309.0, 264.0, 263.0, 287.0, 292.0, 280.0, 296.0, 288.0, 294.0, 293.0, 289.0, 288.0, 294.0, 300.0, 279.0, 294.0, 285.0, 288.0, 291.0, 294.0, 288.0, 278.0, 301.0, 279.0, 282.0, 290.0, 286.0, 302.0, 285.0, 320.0, 310.0, 297.0, 285.0, 315.0, 312.0, 295.0, 284.0, 304.0, 278.0, 278.0, 301.0, 289.0, 293.0, 290.0, 292.0, 275.0, 301.0, 293.0, 289.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0, 296.0, 283.0, 289.0, 287.0, 288.0, 279.0, 270.0, 260.0, 309.0, 318.0, 298.0, 284.0, 276.0, 303.0, 291.0, 296.0, 298.0, 281.0, 283.0, 296.0, 284.0, 295.0, 277.0, 302.0, 297.0, 285.0, 294.0, 288.0, 293.0, 283.0, 284.0, 292.0, 263.0, 253.0, 290.0, 289.0, 302.0, 285.0, 297.0, 285.0, 291.0, 279.0, 294.0, 276.0, 261.0, 258.0, 282.0, 291.0, 293.0, 289.0, 293.0, 283.0, 280.0, 299.0, 282.0, 297.0, 298.0, 278.0, 269.0, 261.0, 285.0, 297.0, 298.0, 326.0]}, "sampler_perf": {"mean_env_wait_ms": 1.280816050662255, "mean_processing_ms": 0.3241780547884837, "mean_inference_ms": 1.833999332912814}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4344000, "num_steps_sampled": 2316800, "sample_time_ms": 21054.918, "load_time_ms": 37.113, "grad_time_ms": 8943.853, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0029653788078576326, "policy_loss": -0.005187256261706352, "vf_loss": 87.18419647216797, "vf_explained_var": 0.7553746104240417, "kl": 0.0017378958873450756, "entropy": 1.13156259059906, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2316800, "episodes_total": 5792, "training_iteration": 181, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-15-28", "timestamp": 1660252528, "time_this_iter_s": 30.97549271583557, "time_total_s": 10945.44084072113, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10945.44084072113, "timesteps_since_restore": 2316800, "iterations_since_restore": 181, "perf": {"cpu_util_percent": 34.48409090909092, "ram_util_percent": 58.6068181818182}}
-{"episode_reward_max": 630.0, "episode_reward_min": 465.0, "episode_reward_mean": 573.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 227.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 286.56}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.72, "shaped_reward_min": 141, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.09, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.86, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.71, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.03, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.58, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.22, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.99, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.35, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.36, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.22, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.99, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.22, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.99, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 530.0, 530.0, 627.0, 582.0, 579.0, 538.0, 576.0, 582.0, 579.0, 519.0, 582.0, 582.0, 582.0, 630.0, 579.0, 579.0, 576.0, 567.0, 576.0, 465.0, 587.0, 582.0, 587.0, 570.0, 582.0, 579.0, 573.0, 582.0, 584.0, 627.0, 576.0, 582.0, 579.0, 579.0, 576.0, 579.0, 576.0, 567.0, 530.0, 627.0, 582.0, 579.0, 587.0, 579.0, 579.0, 579.0, 579.0, 582.0, 582.0, 576.0, 576.0, 516.0, 579.0, 587.0, 582.0, 570.0, 570.0, 519.0, 573.0, 582.0, 576.0, 579.0, 579.0, 576.0, 530.0, 582.0, 624.0, 521.0, 576.0, 573.0, 539.0, 579.0, 582.0, 536.0, 630.0, 579.0, 582.0, 582.0, 579.0, 576.0, 582.0, 558.0, 579.0, 552.0, 576.0, 579.0, 587.0, 579.0, 579.0, 587.0, 567.0, 582.0, 576.0, 501.0, 504.0, 573.0, 587.0, 587.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 272.0, 258.0, 270.0, 260.0, 313.0, 314.0, 281.0, 301.0, 285.0, 294.0, 263.0, 275.0, 280.0, 296.0, 292.0, 290.0, 294.0, 285.0, 270.0, 249.0, 296.0, 286.0, 286.0, 296.0, 282.0, 300.0, 318.0, 312.0, 295.0, 284.0, 291.0, 288.0, 288.0, 288.0, 291.0, 276.0, 278.0, 298.0, 227.0, 238.0, 308.0, 279.0, 298.0, 284.0, 313.0, 274.0, 284.0, 286.0, 285.0, 297.0, 270.0, 309.0, 282.0, 291.0, 284.0, 298.0, 295.0, 289.0, 313.0, 314.0, 285.0, 291.0, 293.0, 289.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0, 296.0, 283.0, 289.0, 287.0, 288.0, 279.0, 270.0, 260.0, 309.0, 318.0, 298.0, 284.0, 276.0, 303.0, 291.0, 296.0, 298.0, 281.0, 283.0, 296.0, 284.0, 295.0, 277.0, 302.0, 297.0, 285.0, 294.0, 288.0, 293.0, 283.0, 284.0, 292.0, 263.0, 253.0, 290.0, 289.0, 302.0, 285.0, 297.0, 285.0, 291.0, 279.0, 294.0, 276.0, 261.0, 258.0, 282.0, 291.0, 293.0, 289.0, 293.0, 283.0, 280.0, 299.0, 282.0, 297.0, 298.0, 278.0, 269.0, 261.0, 285.0, 297.0, 298.0, 326.0, 267.0, 254.0, 283.0, 293.0, 285.0, 288.0, 275.0, 264.0, 288.0, 291.0, 304.0, 278.0, 267.0, 269.0, 323.0, 307.0, 298.0, 281.0, 303.0, 279.0, 297.0, 285.0, 280.0, 299.0, 277.0, 299.0, 286.0, 296.0, 286.0, 272.0, 308.0, 271.0, 275.0, 277.0, 292.0, 284.0, 276.0, 303.0, 286.0, 301.0, 291.0, 288.0, 301.0, 278.0, 292.0, 295.0, 291.0, 276.0, 293.0, 289.0, 293.0, 283.0, 254.0, 247.0, 246.0, 258.0, 281.0, 292.0, 286.0, 301.0, 295.0, 292.0, 282.0, 297.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2762174298521924, "mean_processing_ms": 0.3232647451131093, "mean_inference_ms": 1.8295321417191508}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4368000, "num_steps_sampled": 2329600, "sample_time_ms": 20991.973, "load_time_ms": 37.186, "grad_time_ms": 9226.22, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002576154889538884, "policy_loss": -0.005821262951940298, "vf_loss": 89.62581634521484, "vf_explained_var": 0.7608991265296936, "kl": 0.002179400995373726, "entropy": 1.1303036212921143, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2329600, "episodes_total": 5824, "training_iteration": 182, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-15-59", "timestamp": 1660252559, "time_this_iter_s": 30.775686264038086, "time_total_s": 10976.216526985168, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10976.216526985168, "timesteps_since_restore": 2329600, "iterations_since_restore": 182, "perf": {"cpu_util_percent": 31.2, "ram_util_percent": 58.5}}
-{"episode_reward_max": 633.0, "episode_reward_min": 465.0, "episode_reward_mean": 575.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 227.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 287.805}, "custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.61, "shaped_reward_min": 141, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.48, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.18, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.42, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.72, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.58, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.31, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.42, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.47, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.39, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.72, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.58, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.72, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.58, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 587.0, 627.0, 579.0, 576.0, 582.0, 576.0, 573.0, 576.0, 633.0, 525.0, 576.0, 579.0, 579.0, 582.0, 579.0, 536.0, 576.0, 627.0, 579.0, 576.0, 582.0, 582.0, 579.0, 573.0, 587.0, 576.0, 576.0, 630.0, 582.0, 582.0, 576.0, 530.0, 582.0, 624.0, 521.0, 576.0, 573.0, 539.0, 579.0, 582.0, 536.0, 630.0, 579.0, 582.0, 582.0, 579.0, 576.0, 582.0, 558.0, 579.0, 552.0, 576.0, 579.0, 587.0, 579.0, 579.0, 587.0, 567.0, 582.0, 576.0, 501.0, 504.0, 573.0, 587.0, 587.0, 579.0, 576.0, 530.0, 530.0, 627.0, 582.0, 579.0, 538.0, 576.0, 582.0, 579.0, 519.0, 582.0, 582.0, 582.0, 630.0, 579.0, 579.0, 576.0, 567.0, 576.0, 465.0, 587.0, 582.0, 587.0, 570.0, 582.0, 579.0, 573.0, 582.0, 584.0, 627.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 299.0, 283.0, 290.0, 297.0, 313.0, 314.0, 292.0, 287.0, 284.0, 292.0, 291.0, 291.0, 291.0, 285.0, 291.0, 282.0, 272.0, 304.0, 311.0, 322.0, 255.0, 270.0, 283.0, 293.0, 289.0, 290.0, 294.0, 285.0, 284.0, 298.0, 297.0, 282.0, 261.0, 275.0, 288.0, 288.0, 305.0, 322.0, 294.0, 285.0, 290.0, 286.0, 290.0, 292.0, 295.0, 287.0, 300.0, 279.0, 293.0, 280.0, 293.0, 294.0, 293.0, 283.0, 296.0, 280.0, 321.0, 309.0, 305.0, 277.0, 288.0, 294.0, 298.0, 278.0, 269.0, 261.0, 285.0, 297.0, 298.0, 326.0, 267.0, 254.0, 283.0, 293.0, 285.0, 288.0, 275.0, 264.0, 288.0, 291.0, 304.0, 278.0, 267.0, 269.0, 323.0, 307.0, 298.0, 281.0, 303.0, 279.0, 297.0, 285.0, 280.0, 299.0, 277.0, 299.0, 286.0, 296.0, 286.0, 272.0, 308.0, 271.0, 275.0, 277.0, 292.0, 284.0, 276.0, 303.0, 286.0, 301.0, 291.0, 288.0, 301.0, 278.0, 292.0, 295.0, 291.0, 276.0, 293.0, 289.0, 293.0, 283.0, 254.0, 247.0, 246.0, 258.0, 281.0, 292.0, 286.0, 301.0, 295.0, 292.0, 282.0, 297.0, 285.0, 291.0, 272.0, 258.0, 270.0, 260.0, 313.0, 314.0, 281.0, 301.0, 285.0, 294.0, 263.0, 275.0, 280.0, 296.0, 292.0, 290.0, 294.0, 285.0, 270.0, 249.0, 296.0, 286.0, 286.0, 296.0, 282.0, 300.0, 318.0, 312.0, 295.0, 284.0, 291.0, 288.0, 288.0, 288.0, 291.0, 276.0, 278.0, 298.0, 227.0, 238.0, 308.0, 279.0, 298.0, 284.0, 313.0, 274.0, 284.0, 286.0, 285.0, 297.0, 270.0, 309.0, 282.0, 291.0, 284.0, 298.0, 295.0, 289.0, 313.0, 314.0, 285.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2716706020385649, "mean_processing_ms": 0.32236021091809197, "mean_inference_ms": 1.8252783373393515}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4392000, "num_steps_sampled": 2342400, "sample_time_ms": 21166.824, "load_time_ms": 37.427, "grad_time_ms": 9604.389, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001810177811421454, "policy_loss": -0.006374426186084747, "vf_loss": 87.48321533203125, "vf_explained_var": 0.7590639591217041, "kl": 0.00198071519844234, "entropy": 1.1274290084838867, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2342400, "episodes_total": 5856, "training_iteration": 183, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-16-33", "timestamp": 1660252593, "time_this_iter_s": 34.10594201087952, "time_total_s": 11010.322468996048, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11010.322468996048, "timesteps_since_restore": 2342400, "iterations_since_restore": 183, "perf": {"cpu_util_percent": 32.16041666666667, "ram_util_percent": 58.54791666666666}}
-{"episode_reward_max": 636.0, "episode_reward_min": 465.0, "episode_reward_mean": 581.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 227.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 290.625}, "custom_metrics": {"sparse_reward_mean": 200.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.65, "shaped_reward_min": 145, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.68, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.48, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.43, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.67, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.45, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.26, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.03, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.72, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.57, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.65, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.54, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 579.0, 630.0, 579.0, 636.0, 582.0, 579.0, 587.0, 582.0, 582.0, 630.0, 576.0, 582.0, 573.0, 630.0, 582.0, 570.0, 576.0, 582.0, 582.0, 582.0, 579.0, 582.0, 576.0, 587.0, 579.0, 582.0, 630.0, 590.0, 579.0, 561.0, 579.0, 573.0, 587.0, 587.0, 579.0, 576.0, 530.0, 530.0, 627.0, 582.0, 579.0, 538.0, 576.0, 582.0, 579.0, 519.0, 582.0, 582.0, 582.0, 630.0, 579.0, 579.0, 576.0, 567.0, 576.0, 465.0, 587.0, 582.0, 587.0, 570.0, 582.0, 579.0, 573.0, 582.0, 584.0, 627.0, 576.0, 582.0, 582.0, 587.0, 627.0, 579.0, 576.0, 582.0, 576.0, 573.0, 576.0, 633.0, 525.0, 576.0, 579.0, 579.0, 582.0, 579.0, 536.0, 576.0, 627.0, 579.0, 576.0, 582.0, 582.0, 579.0, 573.0, 587.0, 576.0, 576.0, 630.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 285.0, 288.0, 291.0, 318.0, 312.0, 289.0, 290.0, 316.0, 320.0, 296.0, 286.0, 290.0, 289.0, 290.0, 297.0, 286.0, 296.0, 293.0, 289.0, 314.0, 316.0, 301.0, 275.0, 293.0, 289.0, 278.0, 295.0, 318.0, 312.0, 286.0, 296.0, 276.0, 294.0, 298.0, 278.0, 295.0, 287.0, 296.0, 286.0, 293.0, 289.0, 292.0, 287.0, 296.0, 286.0, 282.0, 294.0, 307.0, 280.0, 288.0, 291.0, 290.0, 292.0, 316.0, 314.0, 298.0, 292.0, 293.0, 286.0, 278.0, 283.0, 299.0, 280.0, 281.0, 292.0, 286.0, 301.0, 295.0, 292.0, 282.0, 297.0, 285.0, 291.0, 272.0, 258.0, 270.0, 260.0, 313.0, 314.0, 281.0, 301.0, 285.0, 294.0, 263.0, 275.0, 280.0, 296.0, 292.0, 290.0, 294.0, 285.0, 270.0, 249.0, 296.0, 286.0, 286.0, 296.0, 282.0, 300.0, 318.0, 312.0, 295.0, 284.0, 291.0, 288.0, 288.0, 288.0, 291.0, 276.0, 278.0, 298.0, 227.0, 238.0, 308.0, 279.0, 298.0, 284.0, 313.0, 274.0, 284.0, 286.0, 285.0, 297.0, 270.0, 309.0, 282.0, 291.0, 284.0, 298.0, 295.0, 289.0, 313.0, 314.0, 285.0, 291.0, 293.0, 289.0, 299.0, 283.0, 290.0, 297.0, 313.0, 314.0, 292.0, 287.0, 284.0, 292.0, 291.0, 291.0, 291.0, 285.0, 291.0, 282.0, 272.0, 304.0, 311.0, 322.0, 255.0, 270.0, 283.0, 293.0, 289.0, 290.0, 294.0, 285.0, 284.0, 298.0, 297.0, 282.0, 261.0, 275.0, 288.0, 288.0, 305.0, 322.0, 294.0, 285.0, 290.0, 286.0, 290.0, 292.0, 295.0, 287.0, 300.0, 279.0, 293.0, 280.0, 293.0, 294.0, 293.0, 283.0, 296.0, 280.0, 321.0, 309.0, 305.0, 277.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.26716887743788, "mean_processing_ms": 0.3214640615198408, "mean_inference_ms": 1.8211055910807965}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4416000, "num_steps_sampled": 2355200, "sample_time_ms": 21369.68, "load_time_ms": 37.437, "grad_time_ms": 9828.237, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0049454327672719955, "policy_loss": -0.002732283202931285, "vf_loss": 82.44231414794922, "vf_explained_var": 0.771254301071167, "kl": 0.0019334623357281089, "entropy": 1.1330214738845825, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2355200, "episodes_total": 5888, "training_iteration": 184, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-17-05", "timestamp": 1660252625, "time_this_iter_s": 31.787577867507935, "time_total_s": 11042.110046863556, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11042.110046863556, "timesteps_since_restore": 2355200, "iterations_since_restore": 184, "perf": {"cpu_util_percent": 30.99777777777778, "ram_util_percent": 58.44666666666665}}
-{"episode_reward_max": 636.0, "episode_reward_min": 194.0, "episode_reward_mean": 578.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 93.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 289.095}, "custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 178.59, "shaped_reward_min": 74, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.6, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.44, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.57, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.61, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.05, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.55, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.61, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.61, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 584.0, 576.0, 573.0, 579.0, 515.0, 576.0, 582.0, 576.0, 582.0, 587.0, 522.0, 573.0, 582.0, 519.0, 576.0, 582.0, 518.0, 570.0, 633.0, 582.0, 194.0, 582.0, 630.0, 587.0, 576.0, 579.0, 527.0, 576.0, 630.0, 587.0, 582.0, 584.0, 627.0, 576.0, 582.0, 582.0, 587.0, 627.0, 579.0, 576.0, 582.0, 576.0, 573.0, 576.0, 633.0, 525.0, 576.0, 579.0, 579.0, 582.0, 579.0, 536.0, 576.0, 627.0, 579.0, 576.0, 582.0, 582.0, 579.0, 573.0, 587.0, 576.0, 576.0, 630.0, 582.0, 582.0, 573.0, 579.0, 630.0, 579.0, 636.0, 582.0, 579.0, 587.0, 582.0, 582.0, 630.0, 576.0, 582.0, 573.0, 630.0, 582.0, 570.0, 576.0, 582.0, 582.0, 582.0, 579.0, 582.0, 576.0, 587.0, 579.0, 582.0, 630.0, 590.0, 579.0, 561.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [298.0, 281.0, 286.0, 296.0, 283.0, 301.0, 286.0, 290.0, 282.0, 291.0, 292.0, 287.0, 260.0, 255.0, 275.0, 301.0, 294.0, 288.0, 280.0, 296.0, 291.0, 291.0, 288.0, 299.0, 259.0, 263.0, 272.0, 301.0, 303.0, 279.0, 261.0, 258.0, 278.0, 298.0, 286.0, 296.0, 268.0, 250.0, 287.0, 283.0, 313.0, 320.0, 283.0, 299.0, 93.0, 101.0, 287.0, 295.0, 315.0, 315.0, 300.0, 287.0, 279.0, 297.0, 288.0, 291.0, 262.0, 265.0, 277.0, 299.0, 306.0, 324.0, 285.0, 302.0, 284.0, 298.0, 295.0, 289.0, 313.0, 314.0, 285.0, 291.0, 293.0, 289.0, 299.0, 283.0, 290.0, 297.0, 313.0, 314.0, 292.0, 287.0, 284.0, 292.0, 291.0, 291.0, 291.0, 285.0, 291.0, 282.0, 272.0, 304.0, 311.0, 322.0, 255.0, 270.0, 283.0, 293.0, 289.0, 290.0, 294.0, 285.0, 284.0, 298.0, 297.0, 282.0, 261.0, 275.0, 288.0, 288.0, 305.0, 322.0, 294.0, 285.0, 290.0, 286.0, 290.0, 292.0, 295.0, 287.0, 300.0, 279.0, 293.0, 280.0, 293.0, 294.0, 293.0, 283.0, 296.0, 280.0, 321.0, 309.0, 305.0, 277.0, 288.0, 294.0, 288.0, 285.0, 288.0, 291.0, 318.0, 312.0, 289.0, 290.0, 316.0, 320.0, 296.0, 286.0, 290.0, 289.0, 290.0, 297.0, 286.0, 296.0, 293.0, 289.0, 314.0, 316.0, 301.0, 275.0, 293.0, 289.0, 278.0, 295.0, 318.0, 312.0, 286.0, 296.0, 276.0, 294.0, 298.0, 278.0, 295.0, 287.0, 296.0, 286.0, 293.0, 289.0, 292.0, 287.0, 296.0, 286.0, 282.0, 294.0, 307.0, 280.0, 288.0, 291.0, 290.0, 292.0, 316.0, 314.0, 298.0, 292.0, 293.0, 286.0, 278.0, 283.0, 299.0, 280.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2627159762487399, "mean_processing_ms": 0.320576860334056, "mean_inference_ms": 1.8169666521005257}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4440000, "num_steps_sampled": 2368000, "sample_time_ms": 21316.526, "load_time_ms": 37.828, "grad_time_ms": 10200.15, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004557406529784203, "policy_loss": -0.004057899583131075, "vf_loss": 91.82827758789062, "vf_explained_var": 0.7658367156982422, "kl": 0.001969862962141633, "entropy": 1.135046362876892, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2368000, "episodes_total": 5920, "training_iteration": 185, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-17-38", "timestamp": 1660252658, "time_this_iter_s": 32.679043769836426, "time_total_s": 11074.789090633392, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11074.789090633392, "timesteps_since_restore": 2368000, "iterations_since_restore": 185, "perf": {"cpu_util_percent": 33.50425531914893, "ram_util_percent": 58.438297872340435}}
-{"episode_reward_max": 636.0, "episode_reward_min": 194.0, "episode_reward_mean": 576.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 93.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.105}, "custom_metrics": {"sparse_reward_mean": 199.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 177.81, "shaped_reward_min": 74, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.66, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.42, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.81, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.75, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.44, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.45, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.75, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.75, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 627.0, 527.0, 582.0, 624.0, 581.0, 576.0, 576.0, 584.0, 576.0, 587.0, 527.0, 582.0, 576.0, 498.0, 579.0, 582.0, 518.0, 630.0, 582.0, 576.0, 587.0, 582.0, 582.0, 582.0, 627.0, 582.0, 468.0, 627.0, 576.0, 582.0, 582.0, 576.0, 630.0, 582.0, 582.0, 573.0, 579.0, 630.0, 579.0, 636.0, 582.0, 579.0, 587.0, 582.0, 582.0, 630.0, 576.0, 582.0, 573.0, 630.0, 582.0, 570.0, 576.0, 582.0, 582.0, 582.0, 579.0, 582.0, 576.0, 587.0, 579.0, 582.0, 630.0, 590.0, 579.0, 561.0, 579.0, 579.0, 582.0, 584.0, 576.0, 573.0, 579.0, 515.0, 576.0, 582.0, 576.0, 582.0, 587.0, 522.0, 573.0, 582.0, 519.0, 576.0, 582.0, 518.0, 570.0, 633.0, 582.0, 194.0, 582.0, 630.0, 587.0, 576.0, 579.0, 527.0, 576.0, 630.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 284.0, 310.0, 317.0, 277.0, 250.0, 281.0, 301.0, 316.0, 308.0, 289.0, 292.0, 288.0, 288.0, 283.0, 293.0, 286.0, 298.0, 285.0, 291.0, 302.0, 285.0, 270.0, 257.0, 298.0, 284.0, 277.0, 299.0, 255.0, 243.0, 287.0, 292.0, 293.0, 289.0, 250.0, 268.0, 316.0, 314.0, 290.0, 292.0, 305.0, 271.0, 278.0, 309.0, 300.0, 282.0, 294.0, 288.0, 284.0, 298.0, 313.0, 314.0, 297.0, 285.0, 227.0, 241.0, 310.0, 317.0, 289.0, 287.0, 294.0, 288.0, 293.0, 289.0, 296.0, 280.0, 321.0, 309.0, 305.0, 277.0, 288.0, 294.0, 288.0, 285.0, 288.0, 291.0, 318.0, 312.0, 289.0, 290.0, 316.0, 320.0, 296.0, 286.0, 290.0, 289.0, 290.0, 297.0, 286.0, 296.0, 293.0, 289.0, 314.0, 316.0, 301.0, 275.0, 293.0, 289.0, 278.0, 295.0, 318.0, 312.0, 286.0, 296.0, 276.0, 294.0, 298.0, 278.0, 295.0, 287.0, 296.0, 286.0, 293.0, 289.0, 292.0, 287.0, 296.0, 286.0, 282.0, 294.0, 307.0, 280.0, 288.0, 291.0, 290.0, 292.0, 316.0, 314.0, 298.0, 292.0, 293.0, 286.0, 278.0, 283.0, 299.0, 280.0, 298.0, 281.0, 286.0, 296.0, 283.0, 301.0, 286.0, 290.0, 282.0, 291.0, 292.0, 287.0, 260.0, 255.0, 275.0, 301.0, 294.0, 288.0, 280.0, 296.0, 291.0, 291.0, 288.0, 299.0, 259.0, 263.0, 272.0, 301.0, 303.0, 279.0, 261.0, 258.0, 278.0, 298.0, 286.0, 296.0, 268.0, 250.0, 287.0, 283.0, 313.0, 320.0, 283.0, 299.0, 93.0, 101.0, 287.0, 295.0, 315.0, 315.0, 300.0, 287.0, 279.0, 297.0, 288.0, 291.0, 262.0, 265.0, 277.0, 299.0, 306.0, 324.0, 285.0, 302.0]}, "sampler_perf": {"mean_env_wait_ms": 1.258340305403844, "mean_processing_ms": 0.31970797918049665, "mean_inference_ms": 1.8136714986418816}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4464000, "num_steps_sampled": 2380800, "sample_time_ms": 21817.869, "load_time_ms": 37.793, "grad_time_ms": 10598.659, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005312865134328604, "policy_loss": -0.0029478278011083603, "vf_loss": 88.26638793945312, "vf_explained_var": 0.762065589427948, "kl": 0.0017753179417923093, "entropy": 1.1319037675857544, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2380800, "episodes_total": 5952, "training_iteration": 186, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-18-19", "timestamp": 1660252699, "time_this_iter_s": 41.059054136276245, "time_total_s": 11115.848144769669, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11115.848144769669, "timesteps_since_restore": 2380800, "iterations_since_restore": 186, "perf": {"cpu_util_percent": 27.889655172413793, "ram_util_percent": 58.474137931034484}}
-{"episode_reward_max": 633.0, "episode_reward_min": 194.0, "episode_reward_mean": 572.75, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 93.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 286.375}, "custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 176.75, "shaped_reward_min": 74, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.75, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.43, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.81, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.41, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.17, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.27, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.56, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.81, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.41, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.81, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.41, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 582.0, 510.0, 579.0, 587.0, 582.0, 579.0, 627.0, 576.0, 576.0, 530.0, 579.0, 587.0, 582.0, 587.0, 461.0, 576.0, 582.0, 573.0, 630.0, 587.0, 582.0, 582.0, 627.0, 579.0, 573.0, 582.0, 630.0, 579.0, 576.0, 579.0, 576.0, 590.0, 579.0, 561.0, 579.0, 579.0, 582.0, 584.0, 576.0, 573.0, 579.0, 515.0, 576.0, 582.0, 576.0, 582.0, 587.0, 522.0, 573.0, 582.0, 519.0, 576.0, 582.0, 518.0, 570.0, 633.0, 582.0, 194.0, 582.0, 630.0, 587.0, 576.0, 579.0, 527.0, 576.0, 630.0, 587.0, 570.0, 627.0, 527.0, 582.0, 624.0, 581.0, 576.0, 576.0, 584.0, 576.0, 587.0, 527.0, 582.0, 576.0, 498.0, 579.0, 582.0, 518.0, 630.0, 582.0, 576.0, 587.0, 582.0, 582.0, 582.0, 627.0, 582.0, 468.0, 627.0, 576.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 293.0, 287.0, 295.0, 258.0, 252.0, 291.0, 288.0, 296.0, 291.0, 298.0, 284.0, 285.0, 294.0, 315.0, 312.0, 289.0, 287.0, 285.0, 291.0, 270.0, 260.0, 285.0, 294.0, 282.0, 305.0, 297.0, 285.0, 301.0, 286.0, 226.0, 235.0, 292.0, 284.0, 287.0, 295.0, 285.0, 288.0, 308.0, 322.0, 287.0, 300.0, 295.0, 287.0, 292.0, 290.0, 304.0, 323.0, 286.0, 293.0, 289.0, 284.0, 284.0, 298.0, 326.0, 304.0, 297.0, 282.0, 285.0, 291.0, 281.0, 298.0, 285.0, 291.0, 298.0, 292.0, 293.0, 286.0, 278.0, 283.0, 299.0, 280.0, 298.0, 281.0, 286.0, 296.0, 283.0, 301.0, 286.0, 290.0, 282.0, 291.0, 292.0, 287.0, 260.0, 255.0, 275.0, 301.0, 294.0, 288.0, 280.0, 296.0, 291.0, 291.0, 288.0, 299.0, 259.0, 263.0, 272.0, 301.0, 303.0, 279.0, 261.0, 258.0, 278.0, 298.0, 286.0, 296.0, 268.0, 250.0, 287.0, 283.0, 313.0, 320.0, 283.0, 299.0, 93.0, 101.0, 287.0, 295.0, 315.0, 315.0, 300.0, 287.0, 279.0, 297.0, 288.0, 291.0, 262.0, 265.0, 277.0, 299.0, 306.0, 324.0, 285.0, 302.0, 286.0, 284.0, 310.0, 317.0, 277.0, 250.0, 281.0, 301.0, 316.0, 308.0, 289.0, 292.0, 288.0, 288.0, 283.0, 293.0, 286.0, 298.0, 285.0, 291.0, 302.0, 285.0, 270.0, 257.0, 298.0, 284.0, 277.0, 299.0, 255.0, 243.0, 287.0, 292.0, 293.0, 289.0, 250.0, 268.0, 316.0, 314.0, 290.0, 292.0, 305.0, 271.0, 278.0, 309.0, 300.0, 282.0, 294.0, 288.0, 284.0, 298.0, 313.0, 314.0, 297.0, 285.0, 227.0, 241.0, 310.0, 317.0, 289.0, 287.0, 294.0, 288.0, 293.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2540029262486032, "mean_processing_ms": 0.31884364369781465, "mean_inference_ms": 1.8101035219779944}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4488000, "num_steps_sampled": 2393600, "sample_time_ms": 21725.743, "load_time_ms": 37.383, "grad_time_ms": 10590.941, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005064256023615599, "policy_loss": -0.0036211840342730284, "vf_loss": 92.49484252929688, "vf_explained_var": 0.7542417645454407, "kl": 0.001856558839790523, "entropy": 1.1280810832977295, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2393600, "episodes_total": 5984, "training_iteration": 187, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-18-47", "timestamp": 1660252727, "time_this_iter_s": 28.611520051956177, "time_total_s": 11144.459664821625, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11144.459664821625, "timesteps_since_restore": 2393600, "iterations_since_restore": 187, "perf": {"cpu_util_percent": 32.7725, "ram_util_percent": 58.567499999999995}}
-{"episode_reward_max": 636.0, "episode_reward_min": 461.0, "episode_reward_mean": 580.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 337.0}, "policy_reward_mean": {"ppo": 290.15}, "custom_metrics": {"sparse_reward_mean": 200.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.1, "shaped_reward_min": 138, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.13, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.74, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.64, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.08, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.48, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.67, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.08, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.48, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.08, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.48, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 570.0, 582.0, 570.0, 579.0, 576.0, 593.0, 587.0, 582.0, 582.0, 579.0, 576.0, 633.0, 636.0, 636.0, 587.0, 579.0, 582.0, 582.0, 519.0, 582.0, 570.0, 633.0, 579.0, 587.0, 581.0, 579.0, 582.0, 582.0, 630.0, 582.0, 567.0, 527.0, 576.0, 630.0, 587.0, 570.0, 627.0, 527.0, 582.0, 624.0, 581.0, 576.0, 576.0, 584.0, 576.0, 587.0, 527.0, 582.0, 576.0, 498.0, 579.0, 582.0, 518.0, 630.0, 582.0, 576.0, 587.0, 582.0, 582.0, 582.0, 627.0, 582.0, 468.0, 627.0, 576.0, 582.0, 582.0, 576.0, 582.0, 510.0, 579.0, 587.0, 582.0, 579.0, 627.0, 576.0, 576.0, 530.0, 579.0, 587.0, 582.0, 587.0, 461.0, 576.0, 582.0, 573.0, 630.0, 587.0, 582.0, 582.0, 627.0, 579.0, 573.0, 582.0, 630.0, 579.0, 576.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 281.0, 294.0, 276.0, 279.0, 303.0, 289.0, 281.0, 298.0, 281.0, 278.0, 298.0, 295.0, 298.0, 292.0, 295.0, 286.0, 296.0, 297.0, 285.0, 289.0, 290.0, 275.0, 301.0, 325.0, 308.0, 317.0, 319.0, 310.0, 326.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 301.0, 281.0, 259.0, 260.0, 292.0, 290.0, 279.0, 291.0, 296.0, 337.0, 283.0, 296.0, 289.0, 298.0, 295.0, 286.0, 292.0, 287.0, 281.0, 301.0, 306.0, 276.0, 308.0, 322.0, 288.0, 294.0, 290.0, 277.0, 262.0, 265.0, 277.0, 299.0, 306.0, 324.0, 285.0, 302.0, 286.0, 284.0, 310.0, 317.0, 277.0, 250.0, 281.0, 301.0, 316.0, 308.0, 289.0, 292.0, 288.0, 288.0, 283.0, 293.0, 286.0, 298.0, 285.0, 291.0, 302.0, 285.0, 270.0, 257.0, 298.0, 284.0, 277.0, 299.0, 255.0, 243.0, 287.0, 292.0, 293.0, 289.0, 250.0, 268.0, 316.0, 314.0, 290.0, 292.0, 305.0, 271.0, 278.0, 309.0, 300.0, 282.0, 294.0, 288.0, 284.0, 298.0, 313.0, 314.0, 297.0, 285.0, 227.0, 241.0, 310.0, 317.0, 289.0, 287.0, 294.0, 288.0, 293.0, 289.0, 283.0, 293.0, 287.0, 295.0, 258.0, 252.0, 291.0, 288.0, 296.0, 291.0, 298.0, 284.0, 285.0, 294.0, 315.0, 312.0, 289.0, 287.0, 285.0, 291.0, 270.0, 260.0, 285.0, 294.0, 282.0, 305.0, 297.0, 285.0, 301.0, 286.0, 226.0, 235.0, 292.0, 284.0, 287.0, 295.0, 285.0, 288.0, 308.0, 322.0, 287.0, 300.0, 295.0, 287.0, 292.0, 290.0, 304.0, 323.0, 286.0, 293.0, 289.0, 284.0, 284.0, 298.0, 326.0, 304.0, 297.0, 282.0, 285.0, 291.0, 281.0, 298.0, 285.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2497077213230872, "mean_processing_ms": 0.3179872495448108, "mean_inference_ms": 1.8063396156782892}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4512000, "num_steps_sampled": 2406400, "sample_time_ms": 21491.638, "load_time_ms": 36.743, "grad_time_ms": 10320.581, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005896018352359533, "policy_loss": -0.002354246797040105, "vf_loss": 88.0772933959961, "vf_explained_var": 0.767683744430542, "kl": 0.0020883409306406975, "entropy": 1.1149283647537231, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2406400, "episodes_total": 6016, "training_iteration": 188, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-19-13", "timestamp": 1660252753, "time_this_iter_s": 25.91284203529358, "time_total_s": 11170.372506856918, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11170.372506856918, "timesteps_since_restore": 2406400, "iterations_since_restore": 188, "perf": {"cpu_util_percent": 33.778378378378385, "ram_util_percent": 58.56486486486485}}
-{"episode_reward_max": 636.0, "episode_reward_min": 458.0, "episode_reward_mean": 582.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 337.0}, "policy_reward_mean": {"ppo": 291.145}, "custom_metrics": {"sparse_reward_mean": 201.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.89, "shaped_reward_min": 138, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.36, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.03, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.0, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.4, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.32, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.97, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.36, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.4, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.33, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.4, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.32, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.4, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.32, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 458.0, 587.0, 579.0, 516.0, 582.0, 579.0, 590.0, 587.0, 582.0, 630.0, 533.0, 579.0, 630.0, 582.0, 630.0, 576.0, 582.0, 582.0, 587.0, 582.0, 582.0, 576.0, 584.0, 567.0, 582.0, 584.0, 576.0, 564.0, 582.0, 627.0, 633.0, 627.0, 576.0, 582.0, 582.0, 576.0, 582.0, 510.0, 579.0, 587.0, 582.0, 579.0, 627.0, 576.0, 576.0, 530.0, 579.0, 587.0, 582.0, 587.0, 461.0, 576.0, 582.0, 573.0, 630.0, 587.0, 582.0, 582.0, 627.0, 579.0, 573.0, 582.0, 630.0, 579.0, 576.0, 579.0, 576.0, 576.0, 570.0, 582.0, 570.0, 579.0, 576.0, 593.0, 587.0, 582.0, 582.0, 579.0, 576.0, 633.0, 636.0, 636.0, 587.0, 579.0, 582.0, 582.0, 519.0, 582.0, 570.0, 633.0, 579.0, 587.0, 581.0, 579.0, 582.0, 582.0, 630.0, 582.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 226.0, 232.0, 298.0, 289.0, 290.0, 289.0, 249.0, 267.0, 297.0, 285.0, 303.0, 276.0, 301.0, 289.0, 283.0, 304.0, 293.0, 289.0, 319.0, 311.0, 268.0, 265.0, 283.0, 296.0, 322.0, 308.0, 288.0, 294.0, 311.0, 319.0, 286.0, 290.0, 283.0, 299.0, 283.0, 299.0, 295.0, 292.0, 286.0, 296.0, 278.0, 304.0, 289.0, 287.0, 284.0, 300.0, 290.0, 277.0, 283.0, 299.0, 309.0, 275.0, 282.0, 294.0, 285.0, 279.0, 296.0, 286.0, 318.0, 309.0, 319.0, 314.0, 310.0, 317.0, 289.0, 287.0, 294.0, 288.0, 293.0, 289.0, 283.0, 293.0, 287.0, 295.0, 258.0, 252.0, 291.0, 288.0, 296.0, 291.0, 298.0, 284.0, 285.0, 294.0, 315.0, 312.0, 289.0, 287.0, 285.0, 291.0, 270.0, 260.0, 285.0, 294.0, 282.0, 305.0, 297.0, 285.0, 301.0, 286.0, 226.0, 235.0, 292.0, 284.0, 287.0, 295.0, 285.0, 288.0, 308.0, 322.0, 287.0, 300.0, 295.0, 287.0, 292.0, 290.0, 304.0, 323.0, 286.0, 293.0, 289.0, 284.0, 284.0, 298.0, 326.0, 304.0, 297.0, 282.0, 285.0, 291.0, 281.0, 298.0, 285.0, 291.0, 295.0, 281.0, 294.0, 276.0, 279.0, 303.0, 289.0, 281.0, 298.0, 281.0, 278.0, 298.0, 295.0, 298.0, 292.0, 295.0, 286.0, 296.0, 297.0, 285.0, 289.0, 290.0, 275.0, 301.0, 325.0, 308.0, 317.0, 319.0, 310.0, 326.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 301.0, 281.0, 259.0, 260.0, 292.0, 290.0, 279.0, 291.0, 296.0, 337.0, 283.0, 296.0, 289.0, 298.0, 295.0, 286.0, 292.0, 287.0, 281.0, 301.0, 306.0, 276.0, 308.0, 322.0, 288.0, 294.0, 290.0, 277.0]}, "sampler_perf": {"mean_env_wait_ms": 1.245424462248642, "mean_processing_ms": 0.3171301897785842, "mean_inference_ms": 1.801636761317335}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4536000, "num_steps_sampled": 2419200, "sample_time_ms": 21345.619, "load_time_ms": 36.86, "grad_time_ms": 10234.708, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0035058397334069014, "policy_loss": -0.0047208876349031925, "vf_loss": 87.84651947021484, "vf_explained_var": 0.7590529918670654, "kl": 0.0018027568003162742, "entropy": 1.1158560514450073, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2419200, "episodes_total": 6048, "training_iteration": 189, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-19-43", "timestamp": 1660252783, "time_this_iter_s": 29.704707860946655, "time_total_s": 11200.077214717865, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11200.077214717865, "timesteps_since_restore": 2419200, "iterations_since_restore": 189, "perf": {"cpu_util_percent": 33.13571428571428, "ram_util_percent": 58.52142857142859}}
-{"episode_reward_max": 636.0, "episode_reward_min": 458.0, "episode_reward_mean": 581.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 337.0}, "policy_reward_mean": {"ppo": 290.745}, "custom_metrics": {"sparse_reward_mean": 200.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.89, "shaped_reward_min": 138, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.59, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.76, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.06, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.65, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.04, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.93, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.87, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.31, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.65, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.04, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.65, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.04, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 582.0, 582.0, 582.0, 582.0, 579.0, 582.0, 521.0, 582.0, 579.0, 582.0, 522.0, 582.0, 527.0, 576.0, 522.0, 582.0, 582.0, 627.0, 576.0, 587.0, 627.0, 582.0, 579.0, 587.0, 573.0, 584.0, 630.0, 576.0, 579.0, 579.0, 576.0, 579.0, 576.0, 576.0, 570.0, 582.0, 570.0, 579.0, 576.0, 593.0, 587.0, 582.0, 582.0, 579.0, 576.0, 633.0, 636.0, 636.0, 587.0, 579.0, 582.0, 582.0, 519.0, 582.0, 570.0, 633.0, 579.0, 587.0, 581.0, 579.0, 582.0, 582.0, 630.0, 582.0, 567.0, 579.0, 458.0, 587.0, 579.0, 516.0, 582.0, 579.0, 590.0, 587.0, 582.0, 630.0, 533.0, 579.0, 630.0, 582.0, 630.0, 576.0, 582.0, 582.0, 587.0, 582.0, 582.0, 576.0, 584.0, 567.0, 582.0, 584.0, 576.0, 564.0, 582.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 289.0, 288.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 288.0, 286.0, 296.0, 297.0, 285.0, 282.0, 297.0, 293.0, 289.0, 270.0, 251.0, 295.0, 287.0, 288.0, 291.0, 285.0, 297.0, 263.0, 259.0, 285.0, 297.0, 275.0, 252.0, 290.0, 286.0, 279.0, 243.0, 294.0, 288.0, 290.0, 292.0, 311.0, 316.0, 289.0, 287.0, 295.0, 292.0, 314.0, 313.0, 291.0, 291.0, 288.0, 291.0, 304.0, 283.0, 292.0, 281.0, 290.0, 294.0, 320.0, 310.0, 286.0, 290.0, 283.0, 296.0, 297.0, 282.0, 285.0, 291.0, 281.0, 298.0, 285.0, 291.0, 295.0, 281.0, 294.0, 276.0, 279.0, 303.0, 289.0, 281.0, 298.0, 281.0, 278.0, 298.0, 295.0, 298.0, 292.0, 295.0, 286.0, 296.0, 297.0, 285.0, 289.0, 290.0, 275.0, 301.0, 325.0, 308.0, 317.0, 319.0, 310.0, 326.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 301.0, 281.0, 259.0, 260.0, 292.0, 290.0, 279.0, 291.0, 296.0, 337.0, 283.0, 296.0, 289.0, 298.0, 295.0, 286.0, 292.0, 287.0, 281.0, 301.0, 306.0, 276.0, 308.0, 322.0, 288.0, 294.0, 290.0, 277.0, 287.0, 292.0, 226.0, 232.0, 298.0, 289.0, 290.0, 289.0, 249.0, 267.0, 297.0, 285.0, 303.0, 276.0, 301.0, 289.0, 283.0, 304.0, 293.0, 289.0, 319.0, 311.0, 268.0, 265.0, 283.0, 296.0, 322.0, 308.0, 288.0, 294.0, 311.0, 319.0, 286.0, 290.0, 283.0, 299.0, 283.0, 299.0, 295.0, 292.0, 286.0, 296.0, 278.0, 304.0, 289.0, 287.0, 284.0, 300.0, 290.0, 277.0, 283.0, 299.0, 309.0, 275.0, 282.0, 294.0, 285.0, 279.0, 296.0, 286.0, 318.0, 309.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2411902050042325, "mean_processing_ms": 0.3162859757027142, "mean_inference_ms": 1.7969666432132458}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4560000, "num_steps_sampled": 2432000, "sample_time_ms": 21237.531, "load_time_ms": 36.84, "grad_time_ms": 10068.85, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006991778966039419, "policy_loss": -0.0012481998419389129, "vf_loss": 87.9997329711914, "vf_explained_var": 0.7513763904571533, "kl": 0.0021018313709646463, "entropy": 1.119996428489685, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2432000, "episodes_total": 6080, "training_iteration": 190, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-20-11", "timestamp": 1660252811, "time_this_iter_s": 28.08810520172119, "time_total_s": 11228.165319919586, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11228.165319919586, "timesteps_since_restore": 2432000, "iterations_since_restore": 190, "perf": {"cpu_util_percent": 33.77, "ram_util_percent": 58.345000000000006}}
-{"episode_reward_max": 636.0, "episode_reward_min": 441.0, "episode_reward_mean": 580.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 290.06}, "custom_metrics": {"sparse_reward_mean": 200.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 178.92, "shaped_reward_min": 121, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.13, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.02, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.84, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.29, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.37, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.17, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.21, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.33, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.37, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.17, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.37, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.17, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 441.0, 582.0, 636.0, 582.0, 576.0, 582.0, 579.0, 621.0, 567.0, 579.0, 576.0, 567.0, 579.0, 582.0, 587.0, 573.0, 522.0, 581.0, 633.0, 627.0, 579.0, 587.0, 573.0, 579.0, 627.0, 587.0, 579.0, 582.0, 627.0, 522.0, 582.0, 630.0, 582.0, 567.0, 579.0, 458.0, 587.0, 579.0, 516.0, 582.0, 579.0, 590.0, 587.0, 582.0, 630.0, 533.0, 579.0, 630.0, 582.0, 630.0, 576.0, 582.0, 582.0, 587.0, 582.0, 582.0, 576.0, 584.0, 567.0, 582.0, 584.0, 576.0, 564.0, 582.0, 627.0, 633.0, 579.0, 579.0, 579.0, 582.0, 582.0, 582.0, 582.0, 579.0, 582.0, 521.0, 582.0, 579.0, 582.0, 522.0, 582.0, 527.0, 576.0, 522.0, 582.0, 582.0, 627.0, 576.0, 587.0, 627.0, 582.0, 579.0, 587.0, 573.0, 584.0, 630.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 287.0, 295.0, 284.0, 220.0, 221.0, 286.0, 296.0, 320.0, 316.0, 294.0, 288.0, 298.0, 278.0, 293.0, 289.0, 288.0, 291.0, 319.0, 302.0, 292.0, 275.0, 296.0, 283.0, 289.0, 287.0, 298.0, 269.0, 289.0, 290.0, 288.0, 294.0, 287.0, 300.0, 290.0, 283.0, 256.0, 266.0, 299.0, 282.0, 321.0, 312.0, 311.0, 316.0, 296.0, 283.0, 303.0, 284.0, 282.0, 291.0, 286.0, 293.0, 321.0, 306.0, 295.0, 292.0, 285.0, 294.0, 285.0, 297.0, 303.0, 324.0, 261.0, 261.0, 306.0, 276.0, 308.0, 322.0, 288.0, 294.0, 290.0, 277.0, 287.0, 292.0, 226.0, 232.0, 298.0, 289.0, 290.0, 289.0, 249.0, 267.0, 297.0, 285.0, 303.0, 276.0, 301.0, 289.0, 283.0, 304.0, 293.0, 289.0, 319.0, 311.0, 268.0, 265.0, 283.0, 296.0, 322.0, 308.0, 288.0, 294.0, 311.0, 319.0, 286.0, 290.0, 283.0, 299.0, 283.0, 299.0, 295.0, 292.0, 286.0, 296.0, 278.0, 304.0, 289.0, 287.0, 284.0, 300.0, 290.0, 277.0, 283.0, 299.0, 309.0, 275.0, 282.0, 294.0, 285.0, 279.0, 296.0, 286.0, 318.0, 309.0, 319.0, 314.0, 290.0, 289.0, 288.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 288.0, 286.0, 296.0, 297.0, 285.0, 282.0, 297.0, 293.0, 289.0, 270.0, 251.0, 295.0, 287.0, 288.0, 291.0, 285.0, 297.0, 263.0, 259.0, 285.0, 297.0, 275.0, 252.0, 290.0, 286.0, 279.0, 243.0, 294.0, 288.0, 290.0, 292.0, 311.0, 316.0, 289.0, 287.0, 295.0, 292.0, 314.0, 313.0, 291.0, 291.0, 288.0, 291.0, 304.0, 283.0, 292.0, 281.0, 290.0, 294.0, 320.0, 310.0, 286.0, 290.0, 283.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.237012566934364, "mean_processing_ms": 0.3154539706719903, "mean_inference_ms": 1.7926176479402052}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4584000, "num_steps_sampled": 2444800, "sample_time_ms": 21212.772, "load_time_ms": 37.115, "grad_time_ms": 9943.576, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002480272436514497, "policy_loss": -0.005884131882339716, "vf_loss": 89.30957794189453, "vf_explained_var": 0.7648332118988037, "kl": 0.0016885297372937202, "entropy": 1.1330945491790771, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2444800, "episodes_total": 6112, "training_iteration": 191, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-20-41", "timestamp": 1660252841, "time_this_iter_s": 29.47701120376587, "time_total_s": 11257.642331123352, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11257.642331123352, "timesteps_since_restore": 2444800, "iterations_since_restore": 191, "perf": {"cpu_util_percent": 32.38536585365854, "ram_util_percent": 58.368292682926814}}
-{"episode_reward_max": 636.0, "episode_reward_min": 441.0, "episode_reward_mean": 583.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 291.615}, "custom_metrics": {"sparse_reward_mean": 201.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.63, "shaped_reward_min": 121, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.94, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.16, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.69, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.33, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.3, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.67, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.46, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.33, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.3, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.33, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.3, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 587.0, 582.0, 630.0, 584.0, 579.0, 587.0, 584.0, 630.0, 522.0, 573.0, 630.0, 582.0, 570.0, 627.0, 627.0, 573.0, 579.0, 582.0, 582.0, 582.0, 633.0, 582.0, 576.0, 630.0, 582.0, 573.0, 579.0, 544.0, 579.0, 627.0, 564.0, 582.0, 627.0, 633.0, 579.0, 579.0, 579.0, 582.0, 582.0, 582.0, 582.0, 579.0, 582.0, 521.0, 582.0, 579.0, 582.0, 522.0, 582.0, 527.0, 576.0, 522.0, 582.0, 582.0, 627.0, 576.0, 587.0, 627.0, 582.0, 579.0, 587.0, 573.0, 584.0, 630.0, 576.0, 579.0, 579.0, 579.0, 441.0, 582.0, 636.0, 582.0, 576.0, 582.0, 579.0, 621.0, 567.0, 579.0, 576.0, 567.0, 579.0, 582.0, 587.0, 573.0, 522.0, 581.0, 633.0, 627.0, 579.0, 587.0, 573.0, 579.0, 627.0, 587.0, 579.0, 582.0, 627.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 288.0, 291.0, 289.0, 298.0, 296.0, 286.0, 311.0, 319.0, 301.0, 283.0, 298.0, 281.0, 295.0, 292.0, 295.0, 289.0, 311.0, 319.0, 260.0, 262.0, 289.0, 284.0, 331.0, 299.0, 296.0, 286.0, 288.0, 282.0, 328.0, 299.0, 315.0, 312.0, 289.0, 284.0, 290.0, 289.0, 284.0, 298.0, 300.0, 282.0, 299.0, 283.0, 318.0, 315.0, 291.0, 291.0, 285.0, 291.0, 317.0, 313.0, 281.0, 301.0, 290.0, 283.0, 290.0, 289.0, 269.0, 275.0, 291.0, 288.0, 316.0, 311.0, 285.0, 279.0, 296.0, 286.0, 318.0, 309.0, 319.0, 314.0, 290.0, 289.0, 288.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 288.0, 286.0, 296.0, 297.0, 285.0, 282.0, 297.0, 293.0, 289.0, 270.0, 251.0, 295.0, 287.0, 288.0, 291.0, 285.0, 297.0, 263.0, 259.0, 285.0, 297.0, 275.0, 252.0, 290.0, 286.0, 279.0, 243.0, 294.0, 288.0, 290.0, 292.0, 311.0, 316.0, 289.0, 287.0, 295.0, 292.0, 314.0, 313.0, 291.0, 291.0, 288.0, 291.0, 304.0, 283.0, 292.0, 281.0, 290.0, 294.0, 320.0, 310.0, 286.0, 290.0, 283.0, 296.0, 292.0, 287.0, 295.0, 284.0, 220.0, 221.0, 286.0, 296.0, 320.0, 316.0, 294.0, 288.0, 298.0, 278.0, 293.0, 289.0, 288.0, 291.0, 319.0, 302.0, 292.0, 275.0, 296.0, 283.0, 289.0, 287.0, 298.0, 269.0, 289.0, 290.0, 288.0, 294.0, 287.0, 300.0, 290.0, 283.0, 256.0, 266.0, 299.0, 282.0, 321.0, 312.0, 311.0, 316.0, 296.0, 283.0, 303.0, 284.0, 282.0, 291.0, 286.0, 293.0, 321.0, 306.0, 295.0, 292.0, 285.0, 294.0, 285.0, 297.0, 303.0, 324.0, 261.0, 261.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2328827841512373, "mean_processing_ms": 0.3146313961274523, "mean_inference_ms": 1.788385259276164}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4608000, "num_steps_sampled": 2457600, "sample_time_ms": 21198.373, "load_time_ms": 37.126, "grad_time_ms": 9801.163, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001665265765041113, "policy_loss": -0.006540585309267044, "vf_loss": 87.64542388916016, "vf_explained_var": 0.7604849338531494, "kl": 0.0022042018827050924, "entropy": 1.1173783540725708, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2457600, "episodes_total": 6144, "training_iteration": 192, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-21-10", "timestamp": 1660252870, "time_this_iter_s": 29.205125331878662, "time_total_s": 11286.84745645523, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11286.84745645523, "timesteps_since_restore": 2457600, "iterations_since_restore": 192, "perf": {"cpu_util_percent": 31.859523809523814, "ram_util_percent": 58.37619047619048}}
-{"episode_reward_max": 636.0, "episode_reward_min": 441.0, "episode_reward_mean": 586.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 293.055}, "custom_metrics": {"sparse_reward_mean": 202.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 180.51, "shaped_reward_min": 121, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.32, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.17, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.98, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.5, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.5, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.55, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.24, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.48, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.55, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.24, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.55, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.24, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 587.0, 582.0, 636.0, 579.0, 570.0, 513.0, 579.0, 582.0, 587.0, 579.0, 582.0, 564.0, 633.0, 587.0, 630.0, 627.0, 579.0, 636.0, 582.0, 587.0, 584.0, 582.0, 582.0, 627.0, 582.0, 579.0, 579.0, 579.0, 587.0, 579.0, 567.0, 584.0, 630.0, 576.0, 579.0, 579.0, 579.0, 441.0, 582.0, 636.0, 582.0, 576.0, 582.0, 579.0, 621.0, 567.0, 579.0, 576.0, 567.0, 579.0, 582.0, 587.0, 573.0, 522.0, 581.0, 633.0, 627.0, 579.0, 587.0, 573.0, 579.0, 627.0, 587.0, 579.0, 582.0, 627.0, 522.0, 579.0, 579.0, 587.0, 582.0, 630.0, 584.0, 579.0, 587.0, 584.0, 630.0, 522.0, 573.0, 630.0, 582.0, 570.0, 627.0, 627.0, 573.0, 579.0, 582.0, 582.0, 582.0, 633.0, 582.0, 576.0, 630.0, 582.0, 573.0, 579.0, 544.0, 579.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 296.0, 292.0, 295.0, 274.0, 308.0, 312.0, 324.0, 304.0, 275.0, 287.0, 283.0, 261.0, 252.0, 277.0, 302.0, 298.0, 284.0, 291.0, 296.0, 278.0, 301.0, 285.0, 297.0, 285.0, 279.0, 323.0, 310.0, 278.0, 309.0, 308.0, 322.0, 311.0, 316.0, 288.0, 291.0, 314.0, 322.0, 286.0, 296.0, 294.0, 293.0, 286.0, 298.0, 293.0, 289.0, 294.0, 288.0, 312.0, 315.0, 287.0, 295.0, 295.0, 284.0, 293.0, 286.0, 296.0, 283.0, 298.0, 289.0, 284.0, 295.0, 288.0, 279.0, 290.0, 294.0, 320.0, 310.0, 286.0, 290.0, 283.0, 296.0, 292.0, 287.0, 295.0, 284.0, 220.0, 221.0, 286.0, 296.0, 320.0, 316.0, 294.0, 288.0, 298.0, 278.0, 293.0, 289.0, 288.0, 291.0, 319.0, 302.0, 292.0, 275.0, 296.0, 283.0, 289.0, 287.0, 298.0, 269.0, 289.0, 290.0, 288.0, 294.0, 287.0, 300.0, 290.0, 283.0, 256.0, 266.0, 299.0, 282.0, 321.0, 312.0, 311.0, 316.0, 296.0, 283.0, 303.0, 284.0, 282.0, 291.0, 286.0, 293.0, 321.0, 306.0, 295.0, 292.0, 285.0, 294.0, 285.0, 297.0, 303.0, 324.0, 261.0, 261.0, 291.0, 288.0, 288.0, 291.0, 289.0, 298.0, 296.0, 286.0, 311.0, 319.0, 301.0, 283.0, 298.0, 281.0, 295.0, 292.0, 295.0, 289.0, 311.0, 319.0, 260.0, 262.0, 289.0, 284.0, 331.0, 299.0, 296.0, 286.0, 288.0, 282.0, 328.0, 299.0, 315.0, 312.0, 289.0, 284.0, 290.0, 289.0, 284.0, 298.0, 300.0, 282.0, 299.0, 283.0, 318.0, 315.0, 291.0, 291.0, 285.0, 291.0, 317.0, 313.0, 281.0, 301.0, 290.0, 283.0, 290.0, 289.0, 269.0, 275.0, 291.0, 288.0, 316.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2287946124429, "mean_processing_ms": 0.3138148366539807, "mean_inference_ms": 1.7841696712783897}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4632000, "num_steps_sampled": 2470400, "sample_time_ms": 20938.204, "load_time_ms": 37.066, "grad_time_ms": 9514.831, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005852494388818741, "policy_loss": -0.0018877206603065133, "vf_loss": 83.014892578125, "vf_explained_var": 0.7724275588989258, "kl": 0.0019637763034552336, "entropy": 1.1225537061691284, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2470400, "episodes_total": 6176, "training_iteration": 193, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-21-39", "timestamp": 1660252899, "time_this_iter_s": 28.640799045562744, "time_total_s": 11315.488255500793, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11315.488255500793, "timesteps_since_restore": 2470400, "iterations_since_restore": 193, "perf": {"cpu_util_percent": 32.46, "ram_util_percent": 58.379999999999995}}
-{"episode_reward_max": 636.0, "episode_reward_min": 513.0, "episode_reward_mean": 587.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 252.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 293.645}, "custom_metrics": {"sparse_reward_mean": 202.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.69, "shaped_reward_min": 153, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.56, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.21, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.44, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.71, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.3, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.71, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.3, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.71, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.3, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 576.0, 582.0, 579.0, 636.0, 576.0, 582.0, 590.0, 584.0, 582.0, 573.0, 582.0, 525.0, 573.0, 587.0, 624.0, 576.0, 579.0, 582.0, 636.0, 582.0, 579.0, 581.0, 576.0, 587.0, 582.0, 582.0, 582.0, 582.0, 590.0, 582.0, 587.0, 579.0, 582.0, 627.0, 522.0, 579.0, 579.0, 587.0, 582.0, 630.0, 584.0, 579.0, 587.0, 584.0, 630.0, 522.0, 573.0, 630.0, 582.0, 570.0, 627.0, 627.0, 573.0, 579.0, 582.0, 582.0, 582.0, 633.0, 582.0, 576.0, 630.0, 582.0, 573.0, 579.0, 544.0, 579.0, 627.0, 587.0, 587.0, 582.0, 636.0, 579.0, 570.0, 513.0, 579.0, 582.0, 587.0, 579.0, 582.0, 564.0, 633.0, 587.0, 630.0, 627.0, 579.0, 636.0, 582.0, 587.0, 584.0, 582.0, 582.0, 627.0, 582.0, 579.0, 579.0, 579.0, 587.0, 579.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 311.0, 295.0, 281.0, 288.0, 294.0, 303.0, 276.0, 317.0, 319.0, 288.0, 288.0, 290.0, 292.0, 291.0, 299.0, 292.0, 292.0, 290.0, 292.0, 284.0, 289.0, 293.0, 289.0, 260.0, 265.0, 272.0, 301.0, 286.0, 301.0, 314.0, 310.0, 280.0, 296.0, 275.0, 304.0, 281.0, 301.0, 316.0, 320.0, 292.0, 290.0, 300.0, 279.0, 297.0, 284.0, 285.0, 291.0, 294.0, 293.0, 291.0, 291.0, 293.0, 289.0, 287.0, 295.0, 292.0, 290.0, 301.0, 289.0, 286.0, 296.0, 294.0, 293.0, 285.0, 294.0, 285.0, 297.0, 303.0, 324.0, 261.0, 261.0, 291.0, 288.0, 288.0, 291.0, 289.0, 298.0, 296.0, 286.0, 311.0, 319.0, 301.0, 283.0, 298.0, 281.0, 295.0, 292.0, 295.0, 289.0, 311.0, 319.0, 260.0, 262.0, 289.0, 284.0, 331.0, 299.0, 296.0, 286.0, 288.0, 282.0, 328.0, 299.0, 315.0, 312.0, 289.0, 284.0, 290.0, 289.0, 284.0, 298.0, 300.0, 282.0, 299.0, 283.0, 318.0, 315.0, 291.0, 291.0, 285.0, 291.0, 317.0, 313.0, 281.0, 301.0, 290.0, 283.0, 290.0, 289.0, 269.0, 275.0, 291.0, 288.0, 316.0, 311.0, 291.0, 296.0, 292.0, 295.0, 274.0, 308.0, 312.0, 324.0, 304.0, 275.0, 287.0, 283.0, 261.0, 252.0, 277.0, 302.0, 298.0, 284.0, 291.0, 296.0, 278.0, 301.0, 285.0, 297.0, 285.0, 279.0, 323.0, 310.0, 278.0, 309.0, 308.0, 322.0, 311.0, 316.0, 288.0, 291.0, 314.0, 322.0, 286.0, 296.0, 294.0, 293.0, 286.0, 298.0, 293.0, 289.0, 294.0, 288.0, 312.0, 315.0, 287.0, 295.0, 295.0, 284.0, 293.0, 286.0, 296.0, 283.0, 298.0, 289.0, 284.0, 295.0, 288.0, 279.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2247614188456033, "mean_processing_ms": 0.3130098403023273, "mean_inference_ms": 1.7801660461855682}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4656000, "num_steps_sampled": 2483200, "sample_time_ms": 21025.315, "load_time_ms": 37.175, "grad_time_ms": 9483.469, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005179767496883869, "policy_loss": -0.003016052069142461, "vf_loss": 87.5873031616211, "vf_explained_var": 0.7668092250823975, "kl": 0.0019739444833248854, "entropy": 1.125815987586975, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2483200, "episodes_total": 6208, "training_iteration": 194, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-22-11", "timestamp": 1660252931, "time_this_iter_s": 32.347792863845825, "time_total_s": 11347.83604836464, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11347.83604836464, "timesteps_since_restore": 2483200, "iterations_since_restore": 194, "perf": {"cpu_util_percent": 33.310869565217395, "ram_util_percent": 58.36521739130432}}
-{"episode_reward_max": 636.0, "episode_reward_min": 460.0, "episode_reward_mean": 584.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 292.085}, "custom_metrics": {"sparse_reward_mean": 201.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 180.97, "shaped_reward_min": 140, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.83, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.02, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.33, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.24, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.57, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.83, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.18, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.74, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.27, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.83, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.18, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.83, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.18, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 582.0, 579.0, 579.0, 579.0, 536.0, 636.0, 576.0, 579.0, 582.0, 590.0, 587.0, 512.0, 582.0, 576.0, 567.0, 579.0, 579.0, 576.0, 579.0, 582.0, 460.0, 582.0, 633.0, 582.0, 573.0, 630.0, 582.0, 576.0, 587.0, 579.0, 630.0, 579.0, 544.0, 579.0, 627.0, 587.0, 587.0, 582.0, 636.0, 579.0, 570.0, 513.0, 579.0, 582.0, 587.0, 579.0, 582.0, 564.0, 633.0, 587.0, 630.0, 627.0, 579.0, 636.0, 582.0, 587.0, 584.0, 582.0, 582.0, 627.0, 582.0, 579.0, 579.0, 579.0, 587.0, 579.0, 567.0, 633.0, 576.0, 582.0, 579.0, 636.0, 576.0, 582.0, 590.0, 584.0, 582.0, 573.0, 582.0, 525.0, 573.0, 587.0, 624.0, 576.0, 579.0, 582.0, 636.0, 582.0, 579.0, 581.0, 576.0, 587.0, 582.0, 582.0, 582.0, 582.0, 590.0, 582.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 277.0, 290.0, 292.0, 286.0, 293.0, 288.0, 291.0, 285.0, 294.0, 266.0, 270.0, 326.0, 310.0, 290.0, 286.0, 293.0, 286.0, 291.0, 291.0, 303.0, 287.0, 295.0, 292.0, 247.0, 265.0, 283.0, 299.0, 298.0, 278.0, 289.0, 278.0, 283.0, 296.0, 293.0, 286.0, 301.0, 275.0, 287.0, 292.0, 291.0, 291.0, 229.0, 231.0, 281.0, 301.0, 313.0, 320.0, 290.0, 292.0, 281.0, 292.0, 322.0, 308.0, 301.0, 281.0, 288.0, 288.0, 306.0, 281.0, 290.0, 289.0, 322.0, 308.0, 290.0, 289.0, 269.0, 275.0, 291.0, 288.0, 316.0, 311.0, 291.0, 296.0, 292.0, 295.0, 274.0, 308.0, 312.0, 324.0, 304.0, 275.0, 287.0, 283.0, 261.0, 252.0, 277.0, 302.0, 298.0, 284.0, 291.0, 296.0, 278.0, 301.0, 285.0, 297.0, 285.0, 279.0, 323.0, 310.0, 278.0, 309.0, 308.0, 322.0, 311.0, 316.0, 288.0, 291.0, 314.0, 322.0, 286.0, 296.0, 294.0, 293.0, 286.0, 298.0, 293.0, 289.0, 294.0, 288.0, 312.0, 315.0, 287.0, 295.0, 295.0, 284.0, 293.0, 286.0, 296.0, 283.0, 298.0, 289.0, 284.0, 295.0, 288.0, 279.0, 322.0, 311.0, 295.0, 281.0, 288.0, 294.0, 303.0, 276.0, 317.0, 319.0, 288.0, 288.0, 290.0, 292.0, 291.0, 299.0, 292.0, 292.0, 290.0, 292.0, 284.0, 289.0, 293.0, 289.0, 260.0, 265.0, 272.0, 301.0, 286.0, 301.0, 314.0, 310.0, 280.0, 296.0, 275.0, 304.0, 281.0, 301.0, 316.0, 320.0, 292.0, 290.0, 300.0, 279.0, 297.0, 284.0, 285.0, 291.0, 294.0, 293.0, 291.0, 291.0, 293.0, 289.0, 287.0, 295.0, 292.0, 290.0, 301.0, 289.0, 286.0, 296.0, 294.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2207711535428367, "mean_processing_ms": 0.31221340698687855, "mean_inference_ms": 1.7762098630677763}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4680000, "num_steps_sampled": 2496000, "sample_time_ms": 21023.805, "load_time_ms": 36.814, "grad_time_ms": 9071.865, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004175250884145498, "policy_loss": -0.004759882111102343, "vf_loss": 95.0163803100586, "vf_explained_var": 0.7534318566322327, "kl": 0.0021568441297858953, "entropy": 1.1329950094223022, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2496000, "episodes_total": 6240, "training_iteration": 195, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-22-40", "timestamp": 1660252960, "time_this_iter_s": 28.54381275177002, "time_total_s": 11376.37986111641, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11376.37986111641, "timesteps_since_restore": 2496000, "iterations_since_restore": 195, "perf": {"cpu_util_percent": 32.82000000000001, "ram_util_percent": 58.44}}
-{"episode_reward_max": 636.0, "episode_reward_min": 460.0, "episode_reward_mean": 583.03, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 291.515}, "custom_metrics": {"sparse_reward_mean": 201.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 180.23, "shaped_reward_min": 140, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.29, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.47, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.81, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.61, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.38, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.52, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.17, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.41, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.42, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.38, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.52, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.38, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.52, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 519.0, 510.0, 584.0, 587.0, 630.0, 587.0, 582.0, 587.0, 579.0, 573.0, 579.0, 633.0, 579.0, 587.0, 633.0, 579.0, 582.0, 579.0, 570.0, 630.0, 630.0, 579.0, 573.0, 582.0, 573.0, 579.0, 567.0, 582.0, 573.0, 630.0, 582.0, 579.0, 587.0, 579.0, 567.0, 633.0, 576.0, 582.0, 579.0, 636.0, 576.0, 582.0, 590.0, 584.0, 582.0, 573.0, 582.0, 525.0, 573.0, 587.0, 624.0, 576.0, 579.0, 582.0, 636.0, 582.0, 579.0, 581.0, 576.0, 587.0, 582.0, 582.0, 582.0, 582.0, 590.0, 582.0, 587.0, 573.0, 582.0, 579.0, 579.0, 579.0, 536.0, 636.0, 576.0, 579.0, 582.0, 590.0, 587.0, 512.0, 582.0, 576.0, 567.0, 579.0, 579.0, 576.0, 579.0, 582.0, 460.0, 582.0, 633.0, 582.0, 573.0, 630.0, 582.0, 576.0, 587.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [298.0, 281.0, 257.0, 262.0, 242.0, 268.0, 289.0, 295.0, 298.0, 289.0, 313.0, 317.0, 285.0, 302.0, 286.0, 296.0, 285.0, 302.0, 283.0, 296.0, 289.0, 284.0, 296.0, 283.0, 314.0, 319.0, 285.0, 294.0, 293.0, 294.0, 306.0, 327.0, 287.0, 292.0, 298.0, 284.0, 298.0, 281.0, 281.0, 289.0, 316.0, 314.0, 311.0, 319.0, 293.0, 286.0, 282.0, 291.0, 292.0, 290.0, 288.0, 285.0, 285.0, 294.0, 274.0, 293.0, 293.0, 289.0, 277.0, 296.0, 321.0, 309.0, 288.0, 294.0, 296.0, 283.0, 298.0, 289.0, 284.0, 295.0, 288.0, 279.0, 322.0, 311.0, 295.0, 281.0, 288.0, 294.0, 303.0, 276.0, 317.0, 319.0, 288.0, 288.0, 290.0, 292.0, 291.0, 299.0, 292.0, 292.0, 290.0, 292.0, 284.0, 289.0, 293.0, 289.0, 260.0, 265.0, 272.0, 301.0, 286.0, 301.0, 314.0, 310.0, 280.0, 296.0, 275.0, 304.0, 281.0, 301.0, 316.0, 320.0, 292.0, 290.0, 300.0, 279.0, 297.0, 284.0, 285.0, 291.0, 294.0, 293.0, 291.0, 291.0, 293.0, 289.0, 287.0, 295.0, 292.0, 290.0, 301.0, 289.0, 286.0, 296.0, 294.0, 293.0, 296.0, 277.0, 290.0, 292.0, 286.0, 293.0, 288.0, 291.0, 285.0, 294.0, 266.0, 270.0, 326.0, 310.0, 290.0, 286.0, 293.0, 286.0, 291.0, 291.0, 303.0, 287.0, 295.0, 292.0, 247.0, 265.0, 283.0, 299.0, 298.0, 278.0, 289.0, 278.0, 283.0, 296.0, 293.0, 286.0, 301.0, 275.0, 287.0, 292.0, 291.0, 291.0, 229.0, 231.0, 281.0, 301.0, 313.0, 320.0, 290.0, 292.0, 281.0, 292.0, 322.0, 308.0, 301.0, 281.0, 288.0, 288.0, 306.0, 281.0, 290.0, 289.0, 322.0, 308.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2168243946409252, "mean_processing_ms": 0.31142476273286085, "mean_inference_ms": 1.7722672001307933}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4704000, "num_steps_sampled": 2508800, "sample_time_ms": 20149.8, "load_time_ms": 36.819, "grad_time_ms": 8683.766, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005854760762304068, "policy_loss": -0.0029166024178266525, "vf_loss": 93.378173828125, "vf_explained_var": 0.7535201907157898, "kl": 0.00207762373611331, "entropy": 1.1329069137573242, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2508800, "episodes_total": 6272, "training_iteration": 196, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-23-08", "timestamp": 1660252988, "time_this_iter_s": 28.434014320373535, "time_total_s": 11404.813875436783, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11404.813875436783, "timesteps_since_restore": 2508800, "iterations_since_restore": 196, "perf": {"cpu_util_percent": 34.197500000000005, "ram_util_percent": 58.575}}
-{"episode_reward_max": 636.0, "episode_reward_min": 460.0, "episode_reward_mean": 581.72, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 290.86}, "custom_metrics": {"sparse_reward_mean": 201.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.72, "shaped_reward_min": 140, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.38, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.2, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.95, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.32, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.52, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.23, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.05, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.87, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.52, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.23, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.52, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.23, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 587.0, 587.0, 570.0, 579.0, 564.0, 582.0, 627.0, 539.0, 582.0, 630.0, 582.0, 579.0, 587.0, 573.0, 579.0, 579.0, 633.0, 582.0, 584.0, 582.0, 582.0, 582.0, 587.0, 579.0, 582.0, 579.0, 627.0, 576.0, 582.0, 516.0, 503.0, 582.0, 590.0, 582.0, 587.0, 573.0, 582.0, 579.0, 579.0, 579.0, 536.0, 636.0, 576.0, 579.0, 582.0, 590.0, 587.0, 512.0, 582.0, 576.0, 567.0, 579.0, 579.0, 576.0, 579.0, 582.0, 460.0, 582.0, 633.0, 582.0, 573.0, 630.0, 582.0, 576.0, 587.0, 579.0, 630.0, 579.0, 519.0, 510.0, 584.0, 587.0, 630.0, 587.0, 582.0, 587.0, 579.0, 573.0, 579.0, 633.0, 579.0, 587.0, 633.0, 579.0, 582.0, 579.0, 570.0, 630.0, 630.0, 579.0, 573.0, 582.0, 573.0, 579.0, 567.0, 582.0, 573.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 291.0, 297.0, 290.0, 287.0, 300.0, 289.0, 281.0, 290.0, 289.0, 273.0, 291.0, 291.0, 291.0, 327.0, 300.0, 272.0, 267.0, 283.0, 299.0, 319.0, 311.0, 292.0, 290.0, 289.0, 290.0, 294.0, 293.0, 288.0, 285.0, 290.0, 289.0, 276.0, 303.0, 310.0, 323.0, 286.0, 296.0, 299.0, 285.0, 295.0, 287.0, 281.0, 301.0, 288.0, 294.0, 289.0, 298.0, 293.0, 286.0, 288.0, 294.0, 281.0, 298.0, 321.0, 306.0, 296.0, 280.0, 293.0, 289.0, 250.0, 266.0, 256.0, 247.0, 292.0, 290.0, 301.0, 289.0, 286.0, 296.0, 294.0, 293.0, 296.0, 277.0, 290.0, 292.0, 286.0, 293.0, 288.0, 291.0, 285.0, 294.0, 266.0, 270.0, 326.0, 310.0, 290.0, 286.0, 293.0, 286.0, 291.0, 291.0, 303.0, 287.0, 295.0, 292.0, 247.0, 265.0, 283.0, 299.0, 298.0, 278.0, 289.0, 278.0, 283.0, 296.0, 293.0, 286.0, 301.0, 275.0, 287.0, 292.0, 291.0, 291.0, 229.0, 231.0, 281.0, 301.0, 313.0, 320.0, 290.0, 292.0, 281.0, 292.0, 322.0, 308.0, 301.0, 281.0, 288.0, 288.0, 306.0, 281.0, 290.0, 289.0, 322.0, 308.0, 298.0, 281.0, 257.0, 262.0, 242.0, 268.0, 289.0, 295.0, 298.0, 289.0, 313.0, 317.0, 285.0, 302.0, 286.0, 296.0, 285.0, 302.0, 283.0, 296.0, 289.0, 284.0, 296.0, 283.0, 314.0, 319.0, 285.0, 294.0, 293.0, 294.0, 306.0, 327.0, 287.0, 292.0, 298.0, 284.0, 298.0, 281.0, 281.0, 289.0, 316.0, 314.0, 311.0, 319.0, 293.0, 286.0, 282.0, 291.0, 292.0, 290.0, 288.0, 285.0, 285.0, 294.0, 274.0, 293.0, 293.0, 289.0, 277.0, 296.0, 321.0, 309.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2129132177848776, "mean_processing_ms": 0.31064427839869574, "mean_inference_ms": 1.768252985066466}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4728000, "num_steps_sampled": 2521600, "sample_time_ms": 20346.302, "load_time_ms": 37.457, "grad_time_ms": 8681.875, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004015960264950991, "policy_loss": -0.004478786140680313, "vf_loss": 90.65137481689453, "vf_explained_var": 0.7576496601104736, "kl": 0.0018428467446938157, "entropy": 1.140787959098816, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2521600, "episodes_total": 6304, "training_iteration": 197, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-23-39", "timestamp": 1660253019, "time_this_iter_s": 30.565216064453125, "time_total_s": 11435.379091501236, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11435.379091501236, "timesteps_since_restore": 2521600, "iterations_since_restore": 197, "perf": {"cpu_util_percent": 35.77272727272727, "ram_util_percent": 59.17272727272728}}
-{"episode_reward_max": 633.0, "episode_reward_min": 498.0, "episode_reward_mean": 580.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 242.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 290.44}, "custom_metrics": {"sparse_reward_mean": 200.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.28, "shaped_reward_min": 138, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.22, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.24, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.92, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.42, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.51, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.7, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.43, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.19, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.32, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.41, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.43, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.19, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.43, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.19, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 498.0, 587.0, 573.0, 576.0, 587.0, 582.0, 587.0, 525.0, 576.0, 533.0, 630.0, 576.0, 573.0, 582.0, 582.0, 579.0, 582.0, 527.0, 573.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 576.0, 579.0, 579.0, 579.0, 579.0, 582.0, 576.0, 587.0, 579.0, 630.0, 579.0, 519.0, 510.0, 584.0, 587.0, 630.0, 587.0, 582.0, 587.0, 579.0, 573.0, 579.0, 633.0, 579.0, 587.0, 633.0, 579.0, 582.0, 579.0, 570.0, 630.0, 630.0, 579.0, 573.0, 582.0, 573.0, 579.0, 567.0, 582.0, 573.0, 630.0, 582.0, 587.0, 587.0, 587.0, 570.0, 579.0, 564.0, 582.0, 627.0, 539.0, 582.0, 630.0, 582.0, 579.0, 587.0, 573.0, 579.0, 579.0, 633.0, 582.0, 584.0, 582.0, 582.0, 582.0, 587.0, 579.0, 582.0, 579.0, 627.0, 576.0, 582.0, 516.0, 503.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [309.0, 321.0, 253.0, 245.0, 287.0, 300.0, 281.0, 292.0, 288.0, 288.0, 294.0, 293.0, 285.0, 297.0, 288.0, 299.0, 266.0, 259.0, 283.0, 293.0, 265.0, 268.0, 299.0, 331.0, 293.0, 283.0, 293.0, 280.0, 286.0, 296.0, 285.0, 297.0, 299.0, 280.0, 303.0, 279.0, 267.0, 260.0, 293.0, 280.0, 282.0, 294.0, 299.0, 283.0, 295.0, 284.0, 286.0, 293.0, 298.0, 284.0, 293.0, 286.0, 292.0, 284.0, 287.0, 292.0, 293.0, 286.0, 288.0, 291.0, 294.0, 285.0, 298.0, 284.0, 288.0, 288.0, 306.0, 281.0, 290.0, 289.0, 322.0, 308.0, 298.0, 281.0, 257.0, 262.0, 242.0, 268.0, 289.0, 295.0, 298.0, 289.0, 313.0, 317.0, 285.0, 302.0, 286.0, 296.0, 285.0, 302.0, 283.0, 296.0, 289.0, 284.0, 296.0, 283.0, 314.0, 319.0, 285.0, 294.0, 293.0, 294.0, 306.0, 327.0, 287.0, 292.0, 298.0, 284.0, 298.0, 281.0, 281.0, 289.0, 316.0, 314.0, 311.0, 319.0, 293.0, 286.0, 282.0, 291.0, 292.0, 290.0, 288.0, 285.0, 285.0, 294.0, 274.0, 293.0, 293.0, 289.0, 277.0, 296.0, 321.0, 309.0, 288.0, 294.0, 296.0, 291.0, 297.0, 290.0, 287.0, 300.0, 289.0, 281.0, 290.0, 289.0, 273.0, 291.0, 291.0, 291.0, 327.0, 300.0, 272.0, 267.0, 283.0, 299.0, 319.0, 311.0, 292.0, 290.0, 289.0, 290.0, 294.0, 293.0, 288.0, 285.0, 290.0, 289.0, 276.0, 303.0, 310.0, 323.0, 286.0, 296.0, 299.0, 285.0, 295.0, 287.0, 281.0, 301.0, 288.0, 294.0, 289.0, 298.0, 293.0, 286.0, 288.0, 294.0, 281.0, 298.0, 321.0, 306.0, 296.0, 280.0, 293.0, 289.0, 250.0, 266.0, 256.0, 247.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2090434460421848, "mean_processing_ms": 0.3098731145088783, "mean_inference_ms": 1.7643973758778697}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4752000, "num_steps_sampled": 2534400, "sample_time_ms": 20635.573, "load_time_ms": 37.478, "grad_time_ms": 8930.84, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.008007452823221684, "policy_loss": -0.00045695496373809874, "vf_loss": 90.38675689697266, "vf_explained_var": 0.7534659504890442, "kl": 0.0025916944723576307, "entropy": 1.1485199928283691, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2534400, "episodes_total": 6336, "training_iteration": 198, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-24-10", "timestamp": 1660253050, "time_this_iter_s": 31.295607089996338, "time_total_s": 11466.674698591232, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11466.674698591232, "timesteps_since_restore": 2534400, "iterations_since_restore": 198, "perf": {"cpu_util_percent": 30.265909090909087, "ram_util_percent": 58.70227272727273}}
-{"episode_reward_max": 633.0, "episode_reward_min": 498.0, "episode_reward_mean": 579.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 245.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 289.78}, "custom_metrics": {"sparse_reward_mean": 200.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.16, "shaped_reward_min": 138, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.35, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.22, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.02, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.77, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.48, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.14, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.37, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.48, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.14, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.48, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.14, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 576.0, 576.0, 582.0, 627.0, 582.0, 630.0, 579.0, 579.0, 576.0, 516.0, 579.0, 590.0, 573.0, 582.0, 587.0, 579.0, 576.0, 576.0, 582.0, 536.0, 582.0, 582.0, 633.0, 582.0, 582.0, 582.0, 579.0, 564.0, 579.0, 582.0, 582.0, 573.0, 630.0, 582.0, 587.0, 587.0, 587.0, 570.0, 579.0, 564.0, 582.0, 627.0, 539.0, 582.0, 630.0, 582.0, 579.0, 587.0, 573.0, 579.0, 579.0, 633.0, 582.0, 584.0, 582.0, 582.0, 582.0, 587.0, 579.0, 582.0, 579.0, 627.0, 576.0, 582.0, 516.0, 503.0, 630.0, 498.0, 587.0, 573.0, 576.0, 587.0, 582.0, 587.0, 525.0, 576.0, 533.0, 630.0, 576.0, 573.0, 582.0, 582.0, 579.0, 582.0, 527.0, 573.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 576.0, 579.0, 579.0, 579.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [301.0, 278.0, 285.0, 297.0, 287.0, 289.0, 284.0, 292.0, 290.0, 292.0, 316.0, 311.0, 280.0, 302.0, 314.0, 316.0, 296.0, 283.0, 293.0, 286.0, 283.0, 293.0, 265.0, 251.0, 288.0, 291.0, 296.0, 294.0, 287.0, 286.0, 291.0, 291.0, 294.0, 293.0, 283.0, 296.0, 298.0, 278.0, 290.0, 286.0, 293.0, 289.0, 266.0, 270.0, 298.0, 284.0, 295.0, 287.0, 318.0, 315.0, 296.0, 286.0, 280.0, 302.0, 288.0, 294.0, 293.0, 286.0, 287.0, 277.0, 280.0, 299.0, 296.0, 286.0, 293.0, 289.0, 277.0, 296.0, 321.0, 309.0, 288.0, 294.0, 296.0, 291.0, 297.0, 290.0, 287.0, 300.0, 289.0, 281.0, 290.0, 289.0, 273.0, 291.0, 291.0, 291.0, 327.0, 300.0, 272.0, 267.0, 283.0, 299.0, 319.0, 311.0, 292.0, 290.0, 289.0, 290.0, 294.0, 293.0, 288.0, 285.0, 290.0, 289.0, 276.0, 303.0, 310.0, 323.0, 286.0, 296.0, 299.0, 285.0, 295.0, 287.0, 281.0, 301.0, 288.0, 294.0, 289.0, 298.0, 293.0, 286.0, 288.0, 294.0, 281.0, 298.0, 321.0, 306.0, 296.0, 280.0, 293.0, 289.0, 250.0, 266.0, 256.0, 247.0, 309.0, 321.0, 253.0, 245.0, 287.0, 300.0, 281.0, 292.0, 288.0, 288.0, 294.0, 293.0, 285.0, 297.0, 288.0, 299.0, 266.0, 259.0, 283.0, 293.0, 265.0, 268.0, 299.0, 331.0, 293.0, 283.0, 293.0, 280.0, 286.0, 296.0, 285.0, 297.0, 299.0, 280.0, 303.0, 279.0, 267.0, 260.0, 293.0, 280.0, 282.0, 294.0, 299.0, 283.0, 295.0, 284.0, 286.0, 293.0, 298.0, 284.0, 293.0, 286.0, 292.0, 284.0, 287.0, 292.0, 293.0, 286.0, 288.0, 291.0, 294.0, 285.0, 298.0, 284.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2052169809302002, "mean_processing_ms": 0.3091122486533884, "mean_inference_ms": 1.7607399677301792}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4776000, "num_steps_sampled": 2547200, "sample_time_ms": 20764.539, "load_time_ms": 37.37, "grad_time_ms": 8925.603, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002967844484373927, "policy_loss": -0.005320979747921228, "vf_loss": 88.59487915039062, "vf_explained_var": 0.7679054141044617, "kl": 0.0019444593926891685, "entropy": 1.141340732574463, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2547200, "episodes_total": 6368, "training_iteration": 199, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-24-41", "timestamp": 1660253081, "time_this_iter_s": 30.933609008789062, "time_total_s": 11497.608307600021, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11497.608307600021, "timesteps_since_restore": 2547200, "iterations_since_restore": 199, "perf": {"cpu_util_percent": 31.409090909090903, "ram_util_percent": 58.724999999999994}}
-{"episode_reward_max": 633.0, "episode_reward_min": 368.0, "episode_reward_mean": 574.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 179.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 287.24}, "custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 177.28, "shaped_reward_min": 128, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.35, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.95, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.96, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.14, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.54, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.7, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.48, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.93, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.05, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.43, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.24, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.38, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.28, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.48, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.93, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.48, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.93, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 576.0, 587.0, 579.0, 579.0, 582.0, 576.0, 489.0, 530.0, 579.0, 570.0, 630.0, 582.0, 587.0, 587.0, 567.0, 587.0, 368.0, 579.0, 498.0, 587.0, 573.0, 582.0, 579.0, 633.0, 587.0, 525.0, 579.0, 567.0, 633.0, 582.0, 582.0, 576.0, 582.0, 516.0, 503.0, 630.0, 498.0, 587.0, 573.0, 576.0, 587.0, 582.0, 587.0, 525.0, 576.0, 533.0, 630.0, 576.0, 573.0, 582.0, 582.0, 579.0, 582.0, 527.0, 573.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 576.0, 579.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 576.0, 576.0, 582.0, 627.0, 582.0, 630.0, 579.0, 579.0, 576.0, 516.0, 579.0, 590.0, 573.0, 582.0, 587.0, 579.0, 576.0, 576.0, 582.0, 536.0, 582.0, 582.0, 633.0, 582.0, 582.0, 582.0, 579.0, 564.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 279.0, 297.0, 309.0, 278.0, 286.0, 293.0, 285.0, 294.0, 295.0, 287.0, 284.0, 292.0, 249.0, 240.0, 264.0, 266.0, 273.0, 306.0, 281.0, 289.0, 316.0, 314.0, 283.0, 299.0, 292.0, 295.0, 285.0, 302.0, 289.0, 278.0, 299.0, 288.0, 189.0, 179.0, 293.0, 286.0, 253.0, 245.0, 296.0, 291.0, 291.0, 282.0, 293.0, 289.0, 290.0, 289.0, 322.0, 311.0, 293.0, 294.0, 256.0, 269.0, 280.0, 299.0, 284.0, 283.0, 316.0, 317.0, 291.0, 291.0, 291.0, 291.0, 296.0, 280.0, 293.0, 289.0, 250.0, 266.0, 256.0, 247.0, 309.0, 321.0, 253.0, 245.0, 287.0, 300.0, 281.0, 292.0, 288.0, 288.0, 294.0, 293.0, 285.0, 297.0, 288.0, 299.0, 266.0, 259.0, 283.0, 293.0, 265.0, 268.0, 299.0, 331.0, 293.0, 283.0, 293.0, 280.0, 286.0, 296.0, 285.0, 297.0, 299.0, 280.0, 303.0, 279.0, 267.0, 260.0, 293.0, 280.0, 282.0, 294.0, 299.0, 283.0, 295.0, 284.0, 286.0, 293.0, 298.0, 284.0, 293.0, 286.0, 292.0, 284.0, 287.0, 292.0, 293.0, 286.0, 288.0, 291.0, 294.0, 285.0, 298.0, 284.0, 301.0, 278.0, 285.0, 297.0, 287.0, 289.0, 284.0, 292.0, 290.0, 292.0, 316.0, 311.0, 280.0, 302.0, 314.0, 316.0, 296.0, 283.0, 293.0, 286.0, 283.0, 293.0, 265.0, 251.0, 288.0, 291.0, 296.0, 294.0, 287.0, 286.0, 291.0, 291.0, 294.0, 293.0, 283.0, 296.0, 298.0, 278.0, 290.0, 286.0, 293.0, 289.0, 266.0, 270.0, 298.0, 284.0, 295.0, 287.0, 318.0, 315.0, 296.0, 286.0, 280.0, 302.0, 288.0, 294.0, 293.0, 286.0, 287.0, 277.0, 280.0, 299.0, 296.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2014273105628746, "mean_processing_ms": 0.30835462294201915, "mean_inference_ms": 1.7571376095037237}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4800000, "num_steps_sampled": 2560000, "sample_time_ms": 20842.97, "load_time_ms": 37.195, "grad_time_ms": 9109.166, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005231037735939026, "policy_loss": -0.0033684810623526573, "vf_loss": 91.692626953125, "vf_explained_var": 0.7593931555747986, "kl": 0.002331085503101349, "entropy": 1.1394835710525513, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2560000, "episodes_total": 6400, "training_iteration": 200, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-25-12", "timestamp": 1660253112, "time_this_iter_s": 30.703901290893555, "time_total_s": 11528.312208890915, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11528.312208890915, "timesteps_since_restore": 2560000, "iterations_since_restore": 200, "perf": {"cpu_util_percent": 32.688372093023254, "ram_util_percent": 58.67906976744187}}
-{"episode_reward_max": 633.0, "episode_reward_min": 299.0, "episode_reward_mean": 576.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 139.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 288.47}, "custom_metrics": {"sparse_reward_mean": 199.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 177.74, "shaped_reward_min": 99, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.98, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.07, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.25, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.66, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.75, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.35, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.63, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.87, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.26, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.9, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.86, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.63, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.87, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.63, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.87, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 587.0, 627.0, 579.0, 627.0, 579.0, 630.0, 630.0, 630.0, 579.0, 579.0, 299.0, 633.0, 582.0, 582.0, 522.0, 630.0, 582.0, 627.0, 567.0, 564.0, 582.0, 555.0, 582.0, 579.0, 576.0, 579.0, 533.0, 576.0, 576.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 576.0, 576.0, 582.0, 627.0, 582.0, 630.0, 579.0, 579.0, 576.0, 516.0, 579.0, 590.0, 573.0, 582.0, 587.0, 579.0, 576.0, 576.0, 582.0, 536.0, 582.0, 582.0, 633.0, 582.0, 582.0, 582.0, 579.0, 564.0, 579.0, 582.0, 630.0, 576.0, 587.0, 579.0, 579.0, 582.0, 576.0, 489.0, 530.0, 579.0, 570.0, 630.0, 582.0, 587.0, 587.0, 567.0, 587.0, 368.0, 579.0, 498.0, 587.0, 573.0, 582.0, 579.0, 633.0, 587.0, 525.0, 579.0, 567.0, 633.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 293.0, 287.0, 300.0, 309.0, 318.0, 298.0, 281.0, 304.0, 323.0, 297.0, 282.0, 322.0, 308.0, 305.0, 325.0, 308.0, 322.0, 277.0, 302.0, 284.0, 295.0, 160.0, 139.0, 322.0, 311.0, 289.0, 293.0, 304.0, 278.0, 253.0, 269.0, 304.0, 326.0, 302.0, 280.0, 303.0, 324.0, 280.0, 287.0, 285.0, 279.0, 286.0, 296.0, 279.0, 276.0, 295.0, 287.0, 283.0, 296.0, 283.0, 293.0, 282.0, 297.0, 265.0, 268.0, 293.0, 283.0, 289.0, 287.0, 291.0, 291.0, 295.0, 287.0, 293.0, 286.0, 288.0, 291.0, 294.0, 285.0, 298.0, 284.0, 301.0, 278.0, 285.0, 297.0, 287.0, 289.0, 284.0, 292.0, 290.0, 292.0, 316.0, 311.0, 280.0, 302.0, 314.0, 316.0, 296.0, 283.0, 293.0, 286.0, 283.0, 293.0, 265.0, 251.0, 288.0, 291.0, 296.0, 294.0, 287.0, 286.0, 291.0, 291.0, 294.0, 293.0, 283.0, 296.0, 298.0, 278.0, 290.0, 286.0, 293.0, 289.0, 266.0, 270.0, 298.0, 284.0, 295.0, 287.0, 318.0, 315.0, 296.0, 286.0, 280.0, 302.0, 288.0, 294.0, 293.0, 286.0, 287.0, 277.0, 280.0, 299.0, 296.0, 286.0, 316.0, 314.0, 279.0, 297.0, 309.0, 278.0, 286.0, 293.0, 285.0, 294.0, 295.0, 287.0, 284.0, 292.0, 249.0, 240.0, 264.0, 266.0, 273.0, 306.0, 281.0, 289.0, 316.0, 314.0, 283.0, 299.0, 292.0, 295.0, 285.0, 302.0, 289.0, 278.0, 299.0, 288.0, 189.0, 179.0, 293.0, 286.0, 253.0, 245.0, 296.0, 291.0, 291.0, 282.0, 293.0, 289.0, 290.0, 289.0, 322.0, 311.0, 293.0, 294.0, 256.0, 269.0, 280.0, 299.0, 284.0, 283.0, 316.0, 317.0, 291.0, 291.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1976757528000228, "mean_processing_ms": 0.3076033986967843, "mean_inference_ms": 1.7536061115922081}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4824000, "num_steps_sampled": 2572800, "sample_time_ms": 20929.774, "load_time_ms": 36.844, "grad_time_ms": 9259.169, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019615469500422478, "policy_loss": -0.006233252584934235, "vf_loss": 87.63289642333984, "vf_explained_var": 0.7635285258293152, "kl": 0.0017622611485421658, "entropy": 1.1369844675064087, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2572800, "episodes_total": 6432, "training_iteration": 201, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-25-44", "timestamp": 1660253144, "time_this_iter_s": 31.842552185058594, "time_total_s": 11560.154761075974, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11560.154761075974, "timesteps_since_restore": 2572800, "iterations_since_restore": 201, "perf": {"cpu_util_percent": 24.447826086956525, "ram_util_percent": 58.667391304347845}}
-{"episode_reward_max": 633.0, "episode_reward_min": 299.0, "episode_reward_mean": 573.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 139.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 286.725}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 177.05, "shaped_reward_min": 99, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.26, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.02, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.77, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.2, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.56, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.76, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.33, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.46, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.88, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.24, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.62, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.71, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.28, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.46, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.88, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.46, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.88, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 533.0, 576.0, 582.0, 579.0, 582.0, 579.0, 582.0, 582.0, 582.0, 479.0, 576.0, 519.0, 525.0, 627.0, 522.0, 582.0, 587.0, 587.0, 627.0, 582.0, 582.0, 582.0, 579.0, 582.0, 579.0, 579.0, 582.0, 475.0, 587.0, 582.0, 579.0, 579.0, 564.0, 579.0, 582.0, 630.0, 576.0, 587.0, 579.0, 579.0, 582.0, 576.0, 489.0, 530.0, 579.0, 570.0, 630.0, 582.0, 587.0, 587.0, 567.0, 587.0, 368.0, 579.0, 498.0, 587.0, 573.0, 582.0, 579.0, 633.0, 587.0, 525.0, 579.0, 567.0, 633.0, 582.0, 582.0, 576.0, 587.0, 627.0, 579.0, 627.0, 579.0, 630.0, 630.0, 630.0, 579.0, 579.0, 299.0, 633.0, 582.0, 582.0, 522.0, 630.0, 582.0, 627.0, 567.0, 564.0, 582.0, 555.0, 582.0, 579.0, 576.0, 579.0, 533.0, 576.0, 576.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 285.0, 261.0, 272.0, 283.0, 293.0, 297.0, 285.0, 288.0, 291.0, 283.0, 299.0, 291.0, 288.0, 281.0, 301.0, 283.0, 299.0, 299.0, 283.0, 248.0, 231.0, 295.0, 281.0, 264.0, 255.0, 260.0, 265.0, 309.0, 318.0, 274.0, 248.0, 294.0, 288.0, 283.0, 304.0, 301.0, 286.0, 318.0, 309.0, 288.0, 294.0, 293.0, 289.0, 296.0, 286.0, 299.0, 280.0, 281.0, 301.0, 300.0, 279.0, 288.0, 291.0, 289.0, 293.0, 243.0, 232.0, 300.0, 287.0, 297.0, 285.0, 284.0, 295.0, 293.0, 286.0, 287.0, 277.0, 280.0, 299.0, 296.0, 286.0, 316.0, 314.0, 279.0, 297.0, 309.0, 278.0, 286.0, 293.0, 285.0, 294.0, 295.0, 287.0, 284.0, 292.0, 249.0, 240.0, 264.0, 266.0, 273.0, 306.0, 281.0, 289.0, 316.0, 314.0, 283.0, 299.0, 292.0, 295.0, 285.0, 302.0, 289.0, 278.0, 299.0, 288.0, 189.0, 179.0, 293.0, 286.0, 253.0, 245.0, 296.0, 291.0, 291.0, 282.0, 293.0, 289.0, 290.0, 289.0, 322.0, 311.0, 293.0, 294.0, 256.0, 269.0, 280.0, 299.0, 284.0, 283.0, 316.0, 317.0, 291.0, 291.0, 291.0, 291.0, 283.0, 293.0, 287.0, 300.0, 309.0, 318.0, 298.0, 281.0, 304.0, 323.0, 297.0, 282.0, 322.0, 308.0, 305.0, 325.0, 308.0, 322.0, 277.0, 302.0, 284.0, 295.0, 160.0, 139.0, 322.0, 311.0, 289.0, 293.0, 304.0, 278.0, 253.0, 269.0, 304.0, 326.0, 302.0, 280.0, 303.0, 324.0, 280.0, 287.0, 285.0, 279.0, 286.0, 296.0, 279.0, 276.0, 295.0, 287.0, 283.0, 296.0, 283.0, 293.0, 282.0, 297.0, 265.0, 268.0, 293.0, 283.0, 289.0, 287.0, 291.0, 291.0, 295.0, 287.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1939639805139826, "mean_processing_ms": 0.3068617868060299, "mean_inference_ms": 1.750206276185966}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4848000, "num_steps_sampled": 2585600, "sample_time_ms": 21104.737, "load_time_ms": 36.737, "grad_time_ms": 9324.388, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0035205576568841934, "policy_loss": -0.004760665353387594, "vf_loss": 88.47342681884766, "vf_explained_var": 0.7671054005622864, "kl": 0.0017035487107932568, "entropy": 1.1322449445724487, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2585600, "episodes_total": 6464, "training_iteration": 202, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-26-16", "timestamp": 1660253176, "time_this_iter_s": 31.605774879455566, "time_total_s": 11591.760535955429, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11591.760535955429, "timesteps_since_restore": 2585600, "iterations_since_restore": 202, "perf": {"cpu_util_percent": 31.240000000000006, "ram_util_percent": 58.77555555555557}}
-{"episode_reward_max": 633.0, "episode_reward_min": 299.0, "episode_reward_mean": 580.31, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 139.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 290.155}, "custom_metrics": {"sparse_reward_mean": 200.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 179.51, "shaped_reward_min": 99, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.11, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.4, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.71, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.64, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.53, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.34, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.27, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.36, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.33, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.7, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.34, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.27, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.34, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.27, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 564.0, 579.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 630.0, 633.0, 579.0, 539.0, 536.0, 582.0, 582.0, 587.0, 627.0, 587.0, 579.0, 630.0, 584.0, 579.0, 579.0, 579.0, 579.0, 630.0, 633.0, 630.0, 582.0, 630.0, 567.0, 633.0, 582.0, 582.0, 576.0, 587.0, 627.0, 579.0, 627.0, 579.0, 630.0, 630.0, 630.0, 579.0, 579.0, 299.0, 633.0, 582.0, 582.0, 522.0, 630.0, 582.0, 627.0, 567.0, 564.0, 582.0, 555.0, 582.0, 579.0, 576.0, 579.0, 533.0, 576.0, 576.0, 582.0, 582.0, 579.0, 533.0, 576.0, 582.0, 579.0, 582.0, 579.0, 582.0, 582.0, 582.0, 479.0, 576.0, 519.0, 525.0, 627.0, 522.0, 582.0, 587.0, 587.0, 627.0, 582.0, 582.0, 582.0, 579.0, 582.0, 579.0, 579.0, 582.0, 475.0, 587.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 297.0, 286.0, 301.0, 280.0, 284.0, 288.0, 291.0, 291.0, 288.0, 299.0, 283.0, 301.0, 281.0, 292.0, 290.0, 291.0, 291.0, 293.0, 289.0, 307.0, 323.0, 318.0, 315.0, 294.0, 285.0, 270.0, 269.0, 266.0, 270.0, 291.0, 291.0, 302.0, 280.0, 290.0, 297.0, 316.0, 311.0, 302.0, 285.0, 291.0, 288.0, 321.0, 309.0, 291.0, 293.0, 293.0, 286.0, 286.0, 293.0, 294.0, 285.0, 290.0, 289.0, 320.0, 310.0, 313.0, 320.0, 311.0, 319.0, 305.0, 277.0, 313.0, 317.0, 284.0, 283.0, 316.0, 317.0, 291.0, 291.0, 291.0, 291.0, 283.0, 293.0, 287.0, 300.0, 309.0, 318.0, 298.0, 281.0, 304.0, 323.0, 297.0, 282.0, 322.0, 308.0, 305.0, 325.0, 308.0, 322.0, 277.0, 302.0, 284.0, 295.0, 160.0, 139.0, 322.0, 311.0, 289.0, 293.0, 304.0, 278.0, 253.0, 269.0, 304.0, 326.0, 302.0, 280.0, 303.0, 324.0, 280.0, 287.0, 285.0, 279.0, 286.0, 296.0, 279.0, 276.0, 295.0, 287.0, 283.0, 296.0, 283.0, 293.0, 282.0, 297.0, 265.0, 268.0, 293.0, 283.0, 289.0, 287.0, 291.0, 291.0, 295.0, 287.0, 294.0, 285.0, 261.0, 272.0, 283.0, 293.0, 297.0, 285.0, 288.0, 291.0, 283.0, 299.0, 291.0, 288.0, 281.0, 301.0, 283.0, 299.0, 299.0, 283.0, 248.0, 231.0, 295.0, 281.0, 264.0, 255.0, 260.0, 265.0, 309.0, 318.0, 274.0, 248.0, 294.0, 288.0, 283.0, 304.0, 301.0, 286.0, 318.0, 309.0, 288.0, 294.0, 293.0, 289.0, 296.0, 286.0, 299.0, 280.0, 281.0, 301.0, 300.0, 279.0, 288.0, 291.0, 289.0, 293.0, 243.0, 232.0, 300.0, 287.0, 297.0, 285.0, 284.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.190287620141578, "mean_processing_ms": 0.3061308463803027, "mean_inference_ms": 1.7468323528445506}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4872000, "num_steps_sampled": 2598400, "sample_time_ms": 21208.335, "load_time_ms": 36.421, "grad_time_ms": 9429.96, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0062708244659006596, "policy_loss": -0.0019446747610345483, "vf_loss": 87.80118560791016, "vf_explained_var": 0.7648043632507324, "kl": 0.001872226013801992, "entropy": 1.1292202472686768, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2598400, "episodes_total": 6496, "training_iteration": 203, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-26-47", "timestamp": 1660253207, "time_this_iter_s": 30.729102849960327, "time_total_s": 11622.48963880539, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11622.48963880539, "timesteps_since_restore": 2598400, "iterations_since_restore": 203, "perf": {"cpu_util_percent": 31.509302325581398, "ram_util_percent": 58.665116279069764}}
-{"episode_reward_max": 633.0, "episode_reward_min": 475.0, "episode_reward_mean": 581.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 290.785}, "custom_metrics": {"sparse_reward_mean": 200.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 180.37, "shaped_reward_min": 155, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.37, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.35, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.05, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.54, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.78, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.55, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.19, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.45, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.55, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.19, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.55, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.19, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 587.0, 579.0, 633.0, 582.0, 576.0, 564.0, 633.0, 525.0, 582.0, 579.0, 630.0, 579.0, 573.0, 573.0, 579.0, 587.0, 579.0, 582.0, 579.0, 521.0, 576.0, 530.0, 584.0, 630.0, 587.0, 582.0, 579.0, 582.0, 630.0, 627.0, 582.0, 576.0, 576.0, 582.0, 582.0, 579.0, 533.0, 576.0, 582.0, 579.0, 582.0, 579.0, 582.0, 582.0, 582.0, 479.0, 576.0, 519.0, 525.0, 627.0, 522.0, 582.0, 587.0, 587.0, 627.0, 582.0, 582.0, 582.0, 579.0, 582.0, 579.0, 579.0, 582.0, 475.0, 587.0, 582.0, 579.0, 582.0, 587.0, 564.0, 579.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 630.0, 633.0, 579.0, 539.0, 536.0, 582.0, 582.0, 587.0, 627.0, 587.0, 579.0, 630.0, 584.0, 579.0, 579.0, 579.0, 579.0, 630.0, 633.0, 630.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 285.0, 291.0, 296.0, 290.0, 289.0, 314.0, 319.0, 288.0, 294.0, 280.0, 296.0, 278.0, 286.0, 316.0, 317.0, 260.0, 265.0, 292.0, 290.0, 292.0, 287.0, 326.0, 304.0, 294.0, 285.0, 293.0, 280.0, 287.0, 286.0, 288.0, 291.0, 298.0, 289.0, 303.0, 276.0, 291.0, 291.0, 277.0, 302.0, 253.0, 268.0, 288.0, 288.0, 267.0, 263.0, 290.0, 294.0, 307.0, 323.0, 297.0, 290.0, 296.0, 286.0, 293.0, 286.0, 285.0, 297.0, 317.0, 313.0, 317.0, 310.0, 288.0, 294.0, 293.0, 283.0, 289.0, 287.0, 291.0, 291.0, 295.0, 287.0, 294.0, 285.0, 261.0, 272.0, 283.0, 293.0, 297.0, 285.0, 288.0, 291.0, 283.0, 299.0, 291.0, 288.0, 281.0, 301.0, 283.0, 299.0, 299.0, 283.0, 248.0, 231.0, 295.0, 281.0, 264.0, 255.0, 260.0, 265.0, 309.0, 318.0, 274.0, 248.0, 294.0, 288.0, 283.0, 304.0, 301.0, 286.0, 318.0, 309.0, 288.0, 294.0, 293.0, 289.0, 296.0, 286.0, 299.0, 280.0, 281.0, 301.0, 300.0, 279.0, 288.0, 291.0, 289.0, 293.0, 243.0, 232.0, 300.0, 287.0, 297.0, 285.0, 284.0, 295.0, 285.0, 297.0, 286.0, 301.0, 280.0, 284.0, 288.0, 291.0, 291.0, 288.0, 299.0, 283.0, 301.0, 281.0, 292.0, 290.0, 291.0, 291.0, 293.0, 289.0, 307.0, 323.0, 318.0, 315.0, 294.0, 285.0, 270.0, 269.0, 266.0, 270.0, 291.0, 291.0, 302.0, 280.0, 290.0, 297.0, 316.0, 311.0, 302.0, 285.0, 291.0, 288.0, 321.0, 309.0, 291.0, 293.0, 293.0, 286.0, 286.0, 293.0, 294.0, 285.0, 290.0, 289.0, 320.0, 310.0, 313.0, 320.0, 311.0, 319.0, 305.0, 277.0, 313.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1866467197845827, "mean_processing_ms": 0.3054065028212096, "mean_inference_ms": 1.7433019705968333}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4896000, "num_steps_sampled": 2611200, "sample_time_ms": 20995.947, "load_time_ms": 36.336, "grad_time_ms": 9492.439, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 3.609728810261004e-05, "policy_loss": -0.0082255182787776, "vf_loss": 88.31702423095703, "vf_explained_var": 0.7638809680938721, "kl": 0.0019561152439564466, "entropy": 1.140177845954895, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2611200, "episodes_total": 6528, "training_iteration": 204, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-27-18", "timestamp": 1660253238, "time_this_iter_s": 30.846153020858765, "time_total_s": 11653.335791826248, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11653.335791826248, "timesteps_since_restore": 2611200, "iterations_since_restore": 204, "perf": {"cpu_util_percent": 30.906818181818178, "ram_util_percent": 58.60227272727274}}
-{"episode_reward_max": 633.0, "episode_reward_min": 237.0, "episode_reward_mean": 583.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 115.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 291.985}, "custom_metrics": {"sparse_reward_mean": 201.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 180.37, "shaped_reward_min": 77, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.57, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.28, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.69, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.56, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.19, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.56, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.19, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.56, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.19, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [237.0, 576.0, 542.0, 573.0, 630.0, 624.0, 633.0, 582.0, 630.0, 579.0, 630.0, 582.0, 582.0, 567.0, 579.0, 618.0, 579.0, 627.0, 630.0, 582.0, 582.0, 576.0, 633.0, 627.0, 624.0, 582.0, 539.0, 567.0, 579.0, 582.0, 544.0, 573.0, 475.0, 587.0, 582.0, 579.0, 582.0, 587.0, 564.0, 579.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 630.0, 633.0, 579.0, 539.0, 536.0, 582.0, 582.0, 587.0, 627.0, 587.0, 579.0, 630.0, 584.0, 579.0, 579.0, 579.0, 579.0, 630.0, 633.0, 630.0, 582.0, 630.0, 576.0, 587.0, 579.0, 633.0, 582.0, 576.0, 564.0, 633.0, 525.0, 582.0, 579.0, 630.0, 579.0, 573.0, 573.0, 579.0, 587.0, 579.0, 582.0, 579.0, 521.0, 576.0, 530.0, 584.0, 630.0, 587.0, 582.0, 579.0, 582.0, 630.0, 627.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [122.0, 115.0, 285.0, 291.0, 284.0, 258.0, 281.0, 292.0, 314.0, 316.0, 312.0, 312.0, 320.0, 313.0, 293.0, 289.0, 314.0, 316.0, 283.0, 296.0, 316.0, 314.0, 284.0, 298.0, 284.0, 298.0, 278.0, 289.0, 280.0, 299.0, 305.0, 313.0, 291.0, 288.0, 316.0, 311.0, 304.0, 326.0, 288.0, 294.0, 291.0, 291.0, 295.0, 281.0, 315.0, 318.0, 302.0, 325.0, 313.0, 311.0, 290.0, 292.0, 259.0, 280.0, 278.0, 289.0, 292.0, 287.0, 282.0, 300.0, 275.0, 269.0, 286.0, 287.0, 243.0, 232.0, 300.0, 287.0, 297.0, 285.0, 284.0, 295.0, 285.0, 297.0, 286.0, 301.0, 280.0, 284.0, 288.0, 291.0, 291.0, 288.0, 299.0, 283.0, 301.0, 281.0, 292.0, 290.0, 291.0, 291.0, 293.0, 289.0, 307.0, 323.0, 318.0, 315.0, 294.0, 285.0, 270.0, 269.0, 266.0, 270.0, 291.0, 291.0, 302.0, 280.0, 290.0, 297.0, 316.0, 311.0, 302.0, 285.0, 291.0, 288.0, 321.0, 309.0, 291.0, 293.0, 293.0, 286.0, 286.0, 293.0, 294.0, 285.0, 290.0, 289.0, 320.0, 310.0, 313.0, 320.0, 311.0, 319.0, 305.0, 277.0, 313.0, 317.0, 291.0, 285.0, 291.0, 296.0, 290.0, 289.0, 314.0, 319.0, 288.0, 294.0, 280.0, 296.0, 278.0, 286.0, 316.0, 317.0, 260.0, 265.0, 292.0, 290.0, 292.0, 287.0, 326.0, 304.0, 294.0, 285.0, 293.0, 280.0, 287.0, 286.0, 288.0, 291.0, 298.0, 289.0, 303.0, 276.0, 291.0, 291.0, 277.0, 302.0, 253.0, 268.0, 288.0, 288.0, 267.0, 263.0, 290.0, 294.0, 307.0, 323.0, 297.0, 290.0, 296.0, 286.0, 293.0, 286.0, 285.0, 297.0, 317.0, 313.0, 317.0, 310.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1830353872295725, "mean_processing_ms": 0.3046858000734139, "mean_inference_ms": 1.7395956173502736}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4920000, "num_steps_sampled": 2624000, "sample_time_ms": 20976.168, "load_time_ms": 36.272, "grad_time_ms": 9605.806, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005292419344186783, "policy_loss": -0.002614696277305484, "vf_loss": 84.73992156982422, "vf_explained_var": 0.7728293538093567, "kl": 0.0027176842559129, "entropy": 1.1337858438491821, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2624000, "episodes_total": 6560, "training_iteration": 205, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-27-47", "timestamp": 1660253267, "time_this_iter_s": 29.478952169418335, "time_total_s": 11682.814743995667, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11682.814743995667, "timesteps_since_restore": 2624000, "iterations_since_restore": 205, "perf": {"cpu_util_percent": 32.5452380952381, "ram_util_percent": 58.56666666666667}}
-{"episode_reward_max": 636.0, "episode_reward_min": 237.0, "episode_reward_mean": 586.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 115.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 293.06}, "custom_metrics": {"sparse_reward_mean": 203.0, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 180.12, "shaped_reward_min": 77, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.63, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.28, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.3, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.75, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.29, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.56, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.23, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.57, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.59, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.41, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.56, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.23, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.56, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.23, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [621.0, 570.0, 587.0, 630.0, 633.0, 579.0, 630.0, 587.0, 582.0, 624.0, 624.0, 579.0, 636.0, 579.0, 576.0, 624.0, 525.0, 627.0, 579.0, 579.0, 630.0, 581.0, 465.0, 579.0, 582.0, 579.0, 573.0, 579.0, 582.0, 573.0, 579.0, 587.0, 633.0, 630.0, 582.0, 630.0, 576.0, 587.0, 579.0, 633.0, 582.0, 576.0, 564.0, 633.0, 525.0, 582.0, 579.0, 630.0, 579.0, 573.0, 573.0, 579.0, 587.0, 579.0, 582.0, 579.0, 521.0, 576.0, 530.0, 584.0, 630.0, 587.0, 582.0, 579.0, 582.0, 630.0, 627.0, 582.0, 237.0, 576.0, 542.0, 573.0, 630.0, 624.0, 633.0, 582.0, 630.0, 579.0, 630.0, 582.0, 582.0, 567.0, 579.0, 618.0, 579.0, 627.0, 630.0, 582.0, 582.0, 576.0, 633.0, 627.0, 624.0, 582.0, 539.0, 567.0, 579.0, 582.0, 544.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [310.0, 311.0, 271.0, 299.0, 298.0, 289.0, 324.0, 306.0, 316.0, 317.0, 299.0, 280.0, 322.0, 308.0, 290.0, 297.0, 295.0, 287.0, 307.0, 317.0, 315.0, 309.0, 291.0, 288.0, 322.0, 314.0, 288.0, 291.0, 285.0, 291.0, 305.0, 319.0, 260.0, 265.0, 313.0, 314.0, 277.0, 302.0, 285.0, 294.0, 309.0, 321.0, 285.0, 296.0, 234.0, 231.0, 288.0, 291.0, 296.0, 286.0, 285.0, 294.0, 285.0, 288.0, 290.0, 289.0, 296.0, 286.0, 293.0, 280.0, 298.0, 281.0, 295.0, 292.0, 313.0, 320.0, 311.0, 319.0, 305.0, 277.0, 313.0, 317.0, 291.0, 285.0, 291.0, 296.0, 290.0, 289.0, 314.0, 319.0, 288.0, 294.0, 280.0, 296.0, 278.0, 286.0, 316.0, 317.0, 260.0, 265.0, 292.0, 290.0, 292.0, 287.0, 326.0, 304.0, 294.0, 285.0, 293.0, 280.0, 287.0, 286.0, 288.0, 291.0, 298.0, 289.0, 303.0, 276.0, 291.0, 291.0, 277.0, 302.0, 253.0, 268.0, 288.0, 288.0, 267.0, 263.0, 290.0, 294.0, 307.0, 323.0, 297.0, 290.0, 296.0, 286.0, 293.0, 286.0, 285.0, 297.0, 317.0, 313.0, 317.0, 310.0, 288.0, 294.0, 122.0, 115.0, 285.0, 291.0, 284.0, 258.0, 281.0, 292.0, 314.0, 316.0, 312.0, 312.0, 320.0, 313.0, 293.0, 289.0, 314.0, 316.0, 283.0, 296.0, 316.0, 314.0, 284.0, 298.0, 284.0, 298.0, 278.0, 289.0, 280.0, 299.0, 305.0, 313.0, 291.0, 288.0, 316.0, 311.0, 304.0, 326.0, 288.0, 294.0, 291.0, 291.0, 295.0, 281.0, 315.0, 318.0, 302.0, 325.0, 313.0, 311.0, 290.0, 292.0, 259.0, 280.0, 278.0, 289.0, 292.0, 287.0, 282.0, 300.0, 275.0, 269.0, 286.0, 287.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1794594867485337, "mean_processing_ms": 0.3039706521282891, "mean_inference_ms": 1.735821597361437}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4944000, "num_steps_sampled": 2636800, "sample_time_ms": 21024.303, "load_time_ms": 36.234, "grad_time_ms": 9682.83, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00302000530064106, "policy_loss": -0.005335552152246237, "vf_loss": 89.21270751953125, "vf_explained_var": 0.7561216354370117, "kl": 0.0017618268029764295, "entropy": 1.131414532661438, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2636800, "episodes_total": 6592, "training_iteration": 206, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-28-17", "timestamp": 1660253297, "time_this_iter_s": 29.685957193374634, "time_total_s": 11712.500701189041, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11712.500701189041, "timesteps_since_restore": 2636800, "iterations_since_restore": 206, "perf": {"cpu_util_percent": 30.95714285714286, "ram_util_percent": 58.669047619047625}}
-{"episode_reward_max": 636.0, "episode_reward_min": 237.0, "episode_reward_mean": 589.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 115.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 294.77}, "custom_metrics": {"sparse_reward_mean": 204.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 180.34, "shaped_reward_min": 77, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.51, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.3, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.12, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.5, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.54, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.25, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.27, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.62, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.37, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.54, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.25, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.54, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.25, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 530.0, 582.0, 630.0, 624.0, 633.0, 558.0, 570.0, 627.0, 624.0, 573.0, 590.0, 576.0, 573.0, 587.0, 627.0, 582.0, 576.0, 582.0, 579.0, 582.0, 564.0, 576.0, 627.0, 627.0, 579.0, 633.0, 630.0, 633.0, 579.0, 630.0, 627.0, 582.0, 630.0, 627.0, 582.0, 237.0, 576.0, 542.0, 573.0, 630.0, 624.0, 633.0, 582.0, 630.0, 579.0, 630.0, 582.0, 582.0, 567.0, 579.0, 618.0, 579.0, 627.0, 630.0, 582.0, 582.0, 576.0, 633.0, 627.0, 624.0, 582.0, 539.0, 567.0, 579.0, 582.0, 544.0, 573.0, 621.0, 570.0, 587.0, 630.0, 633.0, 579.0, 630.0, 587.0, 582.0, 624.0, 624.0, 579.0, 636.0, 579.0, 576.0, 624.0, 525.0, 627.0, 579.0, 579.0, 630.0, 581.0, 465.0, 579.0, 582.0, 579.0, 573.0, 579.0, 582.0, 573.0, 579.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [274.0, 299.0, 263.0, 267.0, 289.0, 293.0, 321.0, 309.0, 309.0, 315.0, 309.0, 324.0, 271.0, 287.0, 293.0, 277.0, 318.0, 309.0, 313.0, 311.0, 290.0, 283.0, 291.0, 299.0, 288.0, 288.0, 300.0, 273.0, 286.0, 301.0, 308.0, 319.0, 296.0, 286.0, 285.0, 291.0, 293.0, 289.0, 288.0, 291.0, 292.0, 290.0, 281.0, 283.0, 289.0, 287.0, 314.0, 313.0, 313.0, 314.0, 289.0, 290.0, 324.0, 309.0, 313.0, 317.0, 316.0, 317.0, 288.0, 291.0, 311.0, 319.0, 326.0, 301.0, 285.0, 297.0, 317.0, 313.0, 317.0, 310.0, 288.0, 294.0, 122.0, 115.0, 285.0, 291.0, 284.0, 258.0, 281.0, 292.0, 314.0, 316.0, 312.0, 312.0, 320.0, 313.0, 293.0, 289.0, 314.0, 316.0, 283.0, 296.0, 316.0, 314.0, 284.0, 298.0, 284.0, 298.0, 278.0, 289.0, 280.0, 299.0, 305.0, 313.0, 291.0, 288.0, 316.0, 311.0, 304.0, 326.0, 288.0, 294.0, 291.0, 291.0, 295.0, 281.0, 315.0, 318.0, 302.0, 325.0, 313.0, 311.0, 290.0, 292.0, 259.0, 280.0, 278.0, 289.0, 292.0, 287.0, 282.0, 300.0, 275.0, 269.0, 286.0, 287.0, 310.0, 311.0, 271.0, 299.0, 298.0, 289.0, 324.0, 306.0, 316.0, 317.0, 299.0, 280.0, 322.0, 308.0, 290.0, 297.0, 295.0, 287.0, 307.0, 317.0, 315.0, 309.0, 291.0, 288.0, 322.0, 314.0, 288.0, 291.0, 285.0, 291.0, 305.0, 319.0, 260.0, 265.0, 313.0, 314.0, 277.0, 302.0, 285.0, 294.0, 309.0, 321.0, 285.0, 296.0, 234.0, 231.0, 288.0, 291.0, 296.0, 286.0, 285.0, 294.0, 285.0, 288.0, 290.0, 289.0, 296.0, 286.0, 293.0, 280.0, 298.0, 281.0, 295.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1759167909615367, "mean_processing_ms": 0.30326280237978454, "mean_inference_ms": 1.7321279401839695}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4968000, "num_steps_sampled": 2649600, "sample_time_ms": 20977.446, "load_time_ms": 35.842, "grad_time_ms": 9697.985, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015924535691738129, "policy_loss": -0.005817517638206482, "vf_loss": 79.77727508544922, "vf_explained_var": 0.7645978927612305, "kl": 0.001973592210561037, "entropy": 1.1355053186416626, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2649600, "episodes_total": 6624, "training_iteration": 207, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-28-47", "timestamp": 1660253327, "time_this_iter_s": 30.242400884628296, "time_total_s": 11742.74310207367, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11742.74310207367, "timesteps_since_restore": 2649600, "iterations_since_restore": 207, "perf": {"cpu_util_percent": 33.359523809523814, "ram_util_percent": 58.971428571428575}}
-{"episode_reward_max": 636.0, "episode_reward_min": 465.0, "episode_reward_mean": 592.22, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 296.11}, "custom_metrics": {"sparse_reward_mean": 205.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 181.02, "shaped_reward_min": 145, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.49, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.37, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.04, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.73, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.62, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.32, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.27, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.86, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 15.62, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.32, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.62, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.32, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 582.0, 630.0, 630.0, 582.0, 564.0, 576.0, 582.0, 630.0, 561.0, 633.0, 587.0, 579.0, 579.0, 627.0, 627.0, 627.0, 518.0, 633.0, 633.0, 582.0, 630.0, 582.0, 582.0, 587.0, 573.0, 627.0, 582.0, 573.0, 582.0, 564.0, 573.0, 579.0, 582.0, 544.0, 573.0, 621.0, 570.0, 587.0, 630.0, 633.0, 579.0, 630.0, 587.0, 582.0, 624.0, 624.0, 579.0, 636.0, 579.0, 576.0, 624.0, 525.0, 627.0, 579.0, 579.0, 630.0, 581.0, 465.0, 579.0, 582.0, 579.0, 573.0, 579.0, 582.0, 573.0, 579.0, 587.0, 573.0, 530.0, 582.0, 630.0, 624.0, 633.0, 558.0, 570.0, 627.0, 624.0, 573.0, 590.0, 576.0, 573.0, 587.0, 627.0, 582.0, 576.0, 582.0, 579.0, 582.0, 564.0, 576.0, 627.0, 627.0, 579.0, 633.0, 630.0, 633.0, 579.0, 630.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 299.0, 296.0, 286.0, 311.0, 319.0, 306.0, 324.0, 291.0, 291.0, 283.0, 281.0, 289.0, 287.0, 286.0, 296.0, 315.0, 315.0, 277.0, 284.0, 321.0, 312.0, 290.0, 297.0, 296.0, 283.0, 284.0, 295.0, 310.0, 317.0, 313.0, 314.0, 326.0, 301.0, 252.0, 266.0, 319.0, 314.0, 323.0, 310.0, 296.0, 286.0, 315.0, 315.0, 290.0, 292.0, 283.0, 299.0, 292.0, 295.0, 283.0, 290.0, 308.0, 319.0, 279.0, 303.0, 279.0, 294.0, 290.0, 292.0, 283.0, 281.0, 278.0, 295.0, 292.0, 287.0, 282.0, 300.0, 275.0, 269.0, 286.0, 287.0, 310.0, 311.0, 271.0, 299.0, 298.0, 289.0, 324.0, 306.0, 316.0, 317.0, 299.0, 280.0, 322.0, 308.0, 290.0, 297.0, 295.0, 287.0, 307.0, 317.0, 315.0, 309.0, 291.0, 288.0, 322.0, 314.0, 288.0, 291.0, 285.0, 291.0, 305.0, 319.0, 260.0, 265.0, 313.0, 314.0, 277.0, 302.0, 285.0, 294.0, 309.0, 321.0, 285.0, 296.0, 234.0, 231.0, 288.0, 291.0, 296.0, 286.0, 285.0, 294.0, 285.0, 288.0, 290.0, 289.0, 296.0, 286.0, 293.0, 280.0, 298.0, 281.0, 295.0, 292.0, 274.0, 299.0, 263.0, 267.0, 289.0, 293.0, 321.0, 309.0, 309.0, 315.0, 309.0, 324.0, 271.0, 287.0, 293.0, 277.0, 318.0, 309.0, 313.0, 311.0, 290.0, 283.0, 291.0, 299.0, 288.0, 288.0, 300.0, 273.0, 286.0, 301.0, 308.0, 319.0, 296.0, 286.0, 285.0, 291.0, 293.0, 289.0, 288.0, 291.0, 292.0, 290.0, 281.0, 283.0, 289.0, 287.0, 314.0, 313.0, 313.0, 314.0, 289.0, 290.0, 324.0, 309.0, 313.0, 317.0, 316.0, 317.0, 288.0, 291.0, 311.0, 319.0, 326.0, 301.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1724058892059666, "mean_processing_ms": 0.3025608135721768, "mean_inference_ms": 1.7284261128230096}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4992000, "num_steps_sampled": 2662400, "sample_time_ms": 20797.656, "load_time_ms": 35.752, "grad_time_ms": 9645.805, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0059862625785171986, "policy_loss": -0.00204761722125113, "vf_loss": 86.01973724365234, "vf_explained_var": 0.7589619755744934, "kl": 0.0022174532059580088, "entropy": 1.136189579963684, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2662400, "episodes_total": 6656, "training_iteration": 208, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-29-16", "timestamp": 1660253356, "time_this_iter_s": 28.974893808364868, "time_total_s": 11771.717995882034, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11771.717995882034, "timesteps_since_restore": 2662400, "iterations_since_restore": 208, "perf": {"cpu_util_percent": 35.22682926829268, "ram_util_percent": 58.67073170731708}}
-{"episode_reward_max": 639.0, "episode_reward_min": 518.0, "episode_reward_mean": 593.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 252.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 296.525}, "custom_metrics": {"sparse_reward_mean": 205.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.45, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.55, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.33, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.15, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.62, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.54, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.74, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.76, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.22, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.27, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.59, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 15.76, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.22, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.76, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.22, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 627.0, 579.0, 582.0, 579.0, 618.0, 522.0, 581.0, 590.0, 587.0, 582.0, 582.0, 579.0, 564.0, 630.0, 582.0, 587.0, 639.0, 570.0, 576.0, 567.0, 576.0, 582.0, 630.0, 584.0, 587.0, 633.0, 582.0, 630.0, 582.0, 579.0, 582.0, 582.0, 573.0, 579.0, 587.0, 573.0, 530.0, 582.0, 630.0, 624.0, 633.0, 558.0, 570.0, 627.0, 624.0, 573.0, 590.0, 576.0, 573.0, 587.0, 627.0, 582.0, 576.0, 582.0, 579.0, 582.0, 564.0, 576.0, 627.0, 627.0, 579.0, 633.0, 630.0, 633.0, 579.0, 630.0, 627.0, 584.0, 582.0, 630.0, 630.0, 582.0, 564.0, 576.0, 582.0, 630.0, 561.0, 633.0, 587.0, 579.0, 579.0, 627.0, 627.0, 627.0, 518.0, 633.0, 633.0, 582.0, 630.0, 582.0, 582.0, 587.0, 573.0, 627.0, 582.0, 573.0, 582.0, 564.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 313.0, 315.0, 312.0, 290.0, 289.0, 297.0, 285.0, 281.0, 298.0, 313.0, 305.0, 255.0, 267.0, 287.0, 294.0, 304.0, 286.0, 298.0, 289.0, 287.0, 295.0, 288.0, 294.0, 289.0, 290.0, 286.0, 278.0, 314.0, 316.0, 285.0, 297.0, 300.0, 287.0, 316.0, 323.0, 273.0, 297.0, 286.0, 290.0, 278.0, 289.0, 291.0, 285.0, 293.0, 289.0, 321.0, 309.0, 287.0, 297.0, 286.0, 301.0, 321.0, 312.0, 291.0, 291.0, 316.0, 314.0, 286.0, 296.0, 302.0, 277.0, 294.0, 288.0, 296.0, 286.0, 293.0, 280.0, 298.0, 281.0, 295.0, 292.0, 274.0, 299.0, 263.0, 267.0, 289.0, 293.0, 321.0, 309.0, 309.0, 315.0, 309.0, 324.0, 271.0, 287.0, 293.0, 277.0, 318.0, 309.0, 313.0, 311.0, 290.0, 283.0, 291.0, 299.0, 288.0, 288.0, 300.0, 273.0, 286.0, 301.0, 308.0, 319.0, 296.0, 286.0, 285.0, 291.0, 293.0, 289.0, 288.0, 291.0, 292.0, 290.0, 281.0, 283.0, 289.0, 287.0, 314.0, 313.0, 313.0, 314.0, 289.0, 290.0, 324.0, 309.0, 313.0, 317.0, 316.0, 317.0, 288.0, 291.0, 311.0, 319.0, 326.0, 301.0, 285.0, 299.0, 296.0, 286.0, 311.0, 319.0, 306.0, 324.0, 291.0, 291.0, 283.0, 281.0, 289.0, 287.0, 286.0, 296.0, 315.0, 315.0, 277.0, 284.0, 321.0, 312.0, 290.0, 297.0, 296.0, 283.0, 284.0, 295.0, 310.0, 317.0, 313.0, 314.0, 326.0, 301.0, 252.0, 266.0, 319.0, 314.0, 323.0, 310.0, 296.0, 286.0, 315.0, 315.0, 290.0, 292.0, 283.0, 299.0, 292.0, 295.0, 283.0, 290.0, 308.0, 319.0, 279.0, 303.0, 279.0, 294.0, 290.0, 292.0, 283.0, 281.0, 278.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.168925444559228, "mean_processing_ms": 0.30186423192914913, "mean_inference_ms": 1.7247612604215892}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5016000, "num_steps_sampled": 2675200, "sample_time_ms": 20704.168, "load_time_ms": 35.714, "grad_time_ms": 9573.901, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0030685996171087027, "policy_loss": -0.005047030281275511, "vf_loss": 86.80921173095703, "vf_explained_var": 0.7612468600273132, "kl": 0.0021123213227838278, "entropy": 1.1305813789367676, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2675200, "episodes_total": 6688, "training_iteration": 209, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-29-45", "timestamp": 1660253385, "time_this_iter_s": 29.278584241867065, "time_total_s": 11800.996580123901, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11800.996580123901, "timesteps_since_restore": 2675200, "iterations_since_restore": 209, "perf": {"cpu_util_percent": 32.47857142857143, "ram_util_percent": 58.55952380952381}}
-{"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 592.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 252.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 296.49}, "custom_metrics": {"sparse_reward_mean": 205.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.18, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.55, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.36, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.12, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.75, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.66, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.39, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 15.66, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.39, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.66, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.39, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 579.0, 570.0, 582.0, 576.0, 633.0, 582.0, 582.0, 582.0, 627.0, 633.0, 587.0, 579.0, 587.0, 587.0, 579.0, 582.0, 584.0, 513.0, 630.0, 582.0, 630.0, 630.0, 633.0, 590.0, 582.0, 582.0, 582.0, 579.0, 630.0, 582.0, 579.0, 633.0, 579.0, 630.0, 627.0, 584.0, 582.0, 630.0, 630.0, 582.0, 564.0, 576.0, 582.0, 630.0, 561.0, 633.0, 587.0, 579.0, 579.0, 627.0, 627.0, 627.0, 518.0, 633.0, 633.0, 582.0, 630.0, 582.0, 582.0, 587.0, 573.0, 627.0, 582.0, 573.0, 582.0, 564.0, 573.0, 630.0, 627.0, 579.0, 582.0, 579.0, 618.0, 522.0, 581.0, 590.0, 587.0, 582.0, 582.0, 579.0, 564.0, 630.0, 582.0, 587.0, 639.0, 570.0, 576.0, 567.0, 576.0, 582.0, 630.0, 584.0, 587.0, 633.0, 582.0, 630.0, 582.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 285.0, 287.0, 292.0, 288.0, 282.0, 295.0, 287.0, 283.0, 293.0, 319.0, 314.0, 282.0, 300.0, 291.0, 291.0, 303.0, 279.0, 306.0, 321.0, 314.0, 319.0, 290.0, 297.0, 278.0, 301.0, 300.0, 287.0, 304.0, 283.0, 298.0, 281.0, 299.0, 283.0, 289.0, 295.0, 259.0, 254.0, 321.0, 309.0, 285.0, 297.0, 311.0, 319.0, 318.0, 312.0, 318.0, 315.0, 293.0, 297.0, 296.0, 286.0, 294.0, 288.0, 297.0, 285.0, 295.0, 284.0, 306.0, 324.0, 293.0, 289.0, 286.0, 293.0, 316.0, 317.0, 288.0, 291.0, 311.0, 319.0, 326.0, 301.0, 285.0, 299.0, 296.0, 286.0, 311.0, 319.0, 306.0, 324.0, 291.0, 291.0, 283.0, 281.0, 289.0, 287.0, 286.0, 296.0, 315.0, 315.0, 277.0, 284.0, 321.0, 312.0, 290.0, 297.0, 296.0, 283.0, 284.0, 295.0, 310.0, 317.0, 313.0, 314.0, 326.0, 301.0, 252.0, 266.0, 319.0, 314.0, 323.0, 310.0, 296.0, 286.0, 315.0, 315.0, 290.0, 292.0, 283.0, 299.0, 292.0, 295.0, 283.0, 290.0, 308.0, 319.0, 279.0, 303.0, 279.0, 294.0, 290.0, 292.0, 283.0, 281.0, 278.0, 295.0, 317.0, 313.0, 315.0, 312.0, 290.0, 289.0, 297.0, 285.0, 281.0, 298.0, 313.0, 305.0, 255.0, 267.0, 287.0, 294.0, 304.0, 286.0, 298.0, 289.0, 287.0, 295.0, 288.0, 294.0, 289.0, 290.0, 286.0, 278.0, 314.0, 316.0, 285.0, 297.0, 300.0, 287.0, 316.0, 323.0, 273.0, 297.0, 286.0, 290.0, 278.0, 289.0, 291.0, 285.0, 293.0, 289.0, 321.0, 309.0, 287.0, 297.0, 286.0, 301.0, 321.0, 312.0, 291.0, 291.0, 316.0, 314.0, 286.0, 296.0, 302.0, 277.0, 294.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 1.165477981572783, "mean_processing_ms": 0.3011732242601824, "mean_inference_ms": 1.7211386547427134}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5040000, "num_steps_sampled": 2688000, "sample_time_ms": 20664.196, "load_time_ms": 35.919, "grad_time_ms": 9521.919, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002112786052748561, "policy_loss": -0.006137066055089235, "vf_loss": 88.17215728759766, "vf_explained_var": 0.7567508220672607, "kl": 0.0019861103501170874, "entropy": 1.1347342729568481, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2688000, "episodes_total": 6720, "training_iteration": 210, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-30-15", "timestamp": 1660253415, "time_this_iter_s": 29.789448976516724, "time_total_s": 11830.786029100418, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11830.786029100418, "timesteps_since_restore": 2688000, "iterations_since_restore": 210, "perf": {"cpu_util_percent": 30.638095238095236, "ram_util_percent": 58.67142857142858}}
-{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 586.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 293.28}, "custom_metrics": {"sparse_reward_mean": 202.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 181.36, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.32, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.57, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.89, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.35, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.33, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.52, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.34, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.67, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.33, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.52, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.33, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.52, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 573.0, 579.0, 465.0, 579.0, 630.0, 582.0, 582.0, 536.0, 587.0, 587.0, 630.0, 579.0, 630.0, 579.0, 582.0, 579.0, 579.0, 576.0, 582.0, 530.0, 484.0, 627.0, 582.0, 579.0, 584.0, 579.0, 590.0, 587.0, 587.0, 579.0, 636.0, 573.0, 582.0, 564.0, 573.0, 630.0, 627.0, 579.0, 582.0, 579.0, 618.0, 522.0, 581.0, 590.0, 587.0, 582.0, 582.0, 579.0, 564.0, 630.0, 582.0, 587.0, 639.0, 570.0, 576.0, 567.0, 576.0, 582.0, 630.0, 584.0, 587.0, 633.0, 582.0, 630.0, 582.0, 579.0, 582.0, 573.0, 579.0, 570.0, 582.0, 576.0, 633.0, 582.0, 582.0, 582.0, 627.0, 633.0, 587.0, 579.0, 587.0, 587.0, 579.0, 582.0, 584.0, 513.0, 630.0, 582.0, 630.0, 630.0, 633.0, 590.0, 582.0, 582.0, 582.0, 579.0, 630.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 298.0, 284.0, 289.0, 288.0, 291.0, 231.0, 234.0, 279.0, 300.0, 313.0, 317.0, 297.0, 285.0, 291.0, 291.0, 271.0, 265.0, 302.0, 285.0, 290.0, 297.0, 316.0, 314.0, 286.0, 293.0, 313.0, 317.0, 294.0, 285.0, 296.0, 286.0, 287.0, 292.0, 288.0, 291.0, 290.0, 286.0, 293.0, 289.0, 260.0, 270.0, 234.0, 250.0, 308.0, 319.0, 294.0, 288.0, 290.0, 289.0, 296.0, 288.0, 292.0, 287.0, 301.0, 289.0, 301.0, 286.0, 300.0, 287.0, 300.0, 279.0, 320.0, 316.0, 279.0, 294.0, 290.0, 292.0, 283.0, 281.0, 278.0, 295.0, 317.0, 313.0, 315.0, 312.0, 290.0, 289.0, 297.0, 285.0, 281.0, 298.0, 313.0, 305.0, 255.0, 267.0, 287.0, 294.0, 304.0, 286.0, 298.0, 289.0, 287.0, 295.0, 288.0, 294.0, 289.0, 290.0, 286.0, 278.0, 314.0, 316.0, 285.0, 297.0, 300.0, 287.0, 316.0, 323.0, 273.0, 297.0, 286.0, 290.0, 278.0, 289.0, 291.0, 285.0, 293.0, 289.0, 321.0, 309.0, 287.0, 297.0, 286.0, 301.0, 321.0, 312.0, 291.0, 291.0, 316.0, 314.0, 286.0, 296.0, 302.0, 277.0, 294.0, 288.0, 288.0, 285.0, 287.0, 292.0, 288.0, 282.0, 295.0, 287.0, 283.0, 293.0, 319.0, 314.0, 282.0, 300.0, 291.0, 291.0, 303.0, 279.0, 306.0, 321.0, 314.0, 319.0, 290.0, 297.0, 278.0, 301.0, 300.0, 287.0, 304.0, 283.0, 298.0, 281.0, 299.0, 283.0, 289.0, 295.0, 259.0, 254.0, 321.0, 309.0, 285.0, 297.0, 311.0, 319.0, 318.0, 312.0, 318.0, 315.0, 293.0, 297.0, 296.0, 286.0, 294.0, 288.0, 297.0, 285.0, 295.0, 284.0, 306.0, 324.0, 293.0, 289.0, 286.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 1.162072675470566, "mean_processing_ms": 0.30049261071423955, "mean_inference_ms": 1.7176923877441694}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5064000, "num_steps_sampled": 2700800, "sample_time_ms": 20604.341, "load_time_ms": 36.396, "grad_time_ms": 9451.079, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0014451188035309315, "policy_loss": -0.0075116343796253204, "vf_loss": 95.30281829833984, "vf_explained_var": 0.7530279755592346, "kl": 0.001810736837796867, "entropy": 1.147046446800232, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2700800, "episodes_total": 6752, "training_iteration": 211, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-30-46", "timestamp": 1660253446, "time_this_iter_s": 30.540673971176147, "time_total_s": 11861.326703071594, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11861.326703071594, "timesteps_since_restore": 2700800, "iterations_since_restore": 211, "perf": {"cpu_util_percent": 31.16511627906976, "ram_util_percent": 58.63023255813955}}
-{"episode_reward_max": 636.0, "episode_reward_min": 465.0, "episode_reward_mean": 585.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 292.975}, "custom_metrics": {"sparse_reward_mean": 202.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 181.55, "shaped_reward_min": 138, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.63, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.41, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.19, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.82, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.7, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.39, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.48, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.36, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.19, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.81, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.48, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.36, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.48, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.36, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 627.0, 582.0, 542.0, 582.0, 579.0, 468.0, 539.0, 584.0, 587.0, 582.0, 633.0, 587.0, 579.0, 498.0, 627.0, 579.0, 630.0, 582.0, 630.0, 630.0, 582.0, 576.0, 587.0, 633.0, 582.0, 621.0, 627.0, 582.0, 587.0, 582.0, 525.0, 630.0, 582.0, 579.0, 582.0, 573.0, 579.0, 570.0, 582.0, 576.0, 633.0, 582.0, 582.0, 582.0, 627.0, 633.0, 587.0, 579.0, 587.0, 587.0, 579.0, 582.0, 584.0, 513.0, 630.0, 582.0, 630.0, 630.0, 633.0, 590.0, 582.0, 582.0, 582.0, 579.0, 630.0, 582.0, 579.0, 576.0, 573.0, 579.0, 465.0, 579.0, 630.0, 582.0, 582.0, 536.0, 587.0, 587.0, 630.0, 579.0, 630.0, 579.0, 582.0, 579.0, 579.0, 576.0, 582.0, 530.0, 484.0, 627.0, 582.0, 579.0, 584.0, 579.0, 590.0, 587.0, 587.0, 579.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [303.0, 324.0, 318.0, 309.0, 294.0, 288.0, 267.0, 275.0, 288.0, 294.0, 293.0, 286.0, 236.0, 232.0, 277.0, 262.0, 288.0, 296.0, 302.0, 285.0, 285.0, 297.0, 314.0, 319.0, 296.0, 291.0, 293.0, 286.0, 245.0, 253.0, 318.0, 309.0, 295.0, 284.0, 318.0, 312.0, 293.0, 289.0, 309.0, 321.0, 310.0, 320.0, 280.0, 302.0, 293.0, 283.0, 286.0, 301.0, 320.0, 313.0, 286.0, 296.0, 314.0, 307.0, 319.0, 308.0, 301.0, 281.0, 282.0, 305.0, 288.0, 294.0, 256.0, 269.0, 316.0, 314.0, 286.0, 296.0, 302.0, 277.0, 294.0, 288.0, 288.0, 285.0, 287.0, 292.0, 288.0, 282.0, 295.0, 287.0, 283.0, 293.0, 319.0, 314.0, 282.0, 300.0, 291.0, 291.0, 303.0, 279.0, 306.0, 321.0, 314.0, 319.0, 290.0, 297.0, 278.0, 301.0, 300.0, 287.0, 304.0, 283.0, 298.0, 281.0, 299.0, 283.0, 289.0, 295.0, 259.0, 254.0, 321.0, 309.0, 285.0, 297.0, 311.0, 319.0, 318.0, 312.0, 318.0, 315.0, 293.0, 297.0, 296.0, 286.0, 294.0, 288.0, 297.0, 285.0, 295.0, 284.0, 306.0, 324.0, 293.0, 289.0, 286.0, 293.0, 278.0, 298.0, 284.0, 289.0, 288.0, 291.0, 231.0, 234.0, 279.0, 300.0, 313.0, 317.0, 297.0, 285.0, 291.0, 291.0, 271.0, 265.0, 302.0, 285.0, 290.0, 297.0, 316.0, 314.0, 286.0, 293.0, 313.0, 317.0, 294.0, 285.0, 296.0, 286.0, 287.0, 292.0, 288.0, 291.0, 290.0, 286.0, 293.0, 289.0, 260.0, 270.0, 234.0, 250.0, 308.0, 319.0, 294.0, 288.0, 290.0, 289.0, 296.0, 288.0, 292.0, 287.0, 301.0, 289.0, 301.0, 286.0, 300.0, 287.0, 300.0, 279.0, 320.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 1.15872347920077, "mean_processing_ms": 0.2998198878857747, "mean_inference_ms": 1.7145174243808747}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5088000, "num_steps_sampled": 2713600, "sample_time_ms": 20641.235, "load_time_ms": 36.613, "grad_time_ms": 9485.245, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.010028759017586708, "policy_loss": 0.0009867753833532333, "vf_loss": 96.11052703857422, "vf_explained_var": 0.7489395141601562, "kl": 0.0021745015401393175, "entropy": 1.1381220817565918, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2713600, "episodes_total": 6784, "training_iteration": 212, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-31-18", "timestamp": 1660253478, "time_this_iter_s": 32.31651592254639, "time_total_s": 11893.64321899414, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11893.64321899414, "timesteps_since_restore": 2713600, "iterations_since_restore": 212, "perf": {"cpu_util_percent": 33.958695652173915, "ram_util_percent": 58.643478260869585}}
-{"episode_reward_max": 636.0, "episode_reward_min": 465.0, "episode_reward_mean": 583.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 291.905}, "custom_metrics": {"sparse_reward_mean": 201.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 181.01, "shaped_reward_min": 138, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.71, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.22, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.33, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.27, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.79, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.43, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.72, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.08, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.13, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.79, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.72, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.08, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.72, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.08, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [478.0, 582.0, 582.0, 582.0, 576.0, 576.0, 627.0, 579.0, 630.0, 536.0, 579.0, 630.0, 582.0, 582.0, 579.0, 582.0, 570.0, 576.0, 630.0, 627.0, 536.0, 579.0, 582.0, 579.0, 587.0, 584.0, 630.0, 582.0, 630.0, 582.0, 582.0, 579.0, 579.0, 630.0, 582.0, 579.0, 576.0, 573.0, 579.0, 465.0, 579.0, 630.0, 582.0, 582.0, 536.0, 587.0, 587.0, 630.0, 579.0, 630.0, 579.0, 582.0, 579.0, 579.0, 576.0, 582.0, 530.0, 484.0, 627.0, 582.0, 579.0, 584.0, 579.0, 590.0, 587.0, 587.0, 579.0, 636.0, 627.0, 627.0, 582.0, 542.0, 582.0, 579.0, 468.0, 539.0, 584.0, 587.0, 582.0, 633.0, 587.0, 579.0, 498.0, 627.0, 579.0, 630.0, 582.0, 630.0, 630.0, 582.0, 576.0, 587.0, 633.0, 582.0, 621.0, 627.0, 582.0, 587.0, 582.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [246.0, 232.0, 301.0, 281.0, 298.0, 284.0, 291.0, 291.0, 303.0, 273.0, 282.0, 294.0, 316.0, 311.0, 293.0, 286.0, 308.0, 322.0, 270.0, 266.0, 288.0, 291.0, 324.0, 306.0, 291.0, 291.0, 291.0, 291.0, 288.0, 291.0, 293.0, 289.0, 296.0, 274.0, 297.0, 279.0, 315.0, 315.0, 308.0, 319.0, 266.0, 270.0, 283.0, 296.0, 290.0, 292.0, 283.0, 296.0, 296.0, 291.0, 293.0, 291.0, 316.0, 314.0, 281.0, 301.0, 316.0, 314.0, 290.0, 292.0, 289.0, 293.0, 286.0, 293.0, 295.0, 284.0, 306.0, 324.0, 293.0, 289.0, 286.0, 293.0, 278.0, 298.0, 284.0, 289.0, 288.0, 291.0, 231.0, 234.0, 279.0, 300.0, 313.0, 317.0, 297.0, 285.0, 291.0, 291.0, 271.0, 265.0, 302.0, 285.0, 290.0, 297.0, 316.0, 314.0, 286.0, 293.0, 313.0, 317.0, 294.0, 285.0, 296.0, 286.0, 287.0, 292.0, 288.0, 291.0, 290.0, 286.0, 293.0, 289.0, 260.0, 270.0, 234.0, 250.0, 308.0, 319.0, 294.0, 288.0, 290.0, 289.0, 296.0, 288.0, 292.0, 287.0, 301.0, 289.0, 301.0, 286.0, 300.0, 287.0, 300.0, 279.0, 320.0, 316.0, 303.0, 324.0, 318.0, 309.0, 294.0, 288.0, 267.0, 275.0, 288.0, 294.0, 293.0, 286.0, 236.0, 232.0, 277.0, 262.0, 288.0, 296.0, 302.0, 285.0, 285.0, 297.0, 314.0, 319.0, 296.0, 291.0, 293.0, 286.0, 245.0, 253.0, 318.0, 309.0, 295.0, 284.0, 318.0, 312.0, 293.0, 289.0, 309.0, 321.0, 310.0, 320.0, 280.0, 302.0, 293.0, 283.0, 286.0, 301.0, 320.0, 313.0, 286.0, 296.0, 314.0, 307.0, 319.0, 308.0, 301.0, 281.0, 282.0, 305.0, 288.0, 294.0, 256.0, 269.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1554138563428238, "mean_processing_ms": 0.299155513024685, "mean_inference_ms": 1.7115208998476246}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5112000, "num_steps_sampled": 2726400, "sample_time_ms": 20734.364, "load_time_ms": 36.691, "grad_time_ms": 9415.374, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0028692474588751793, "policy_loss": -0.0050502982921898365, "vf_loss": 84.87030029296875, "vf_explained_var": 0.7659473419189453, "kl": 0.0017100750701501966, "entropy": 1.134959101676941, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2726400, "episodes_total": 6816, "training_iteration": 213, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-31-49", "timestamp": 1660253509, "time_this_iter_s": 30.962037086486816, "time_total_s": 11924.605256080627, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11924.605256080627, "timesteps_since_restore": 2726400, "iterations_since_restore": 213, "perf": {"cpu_util_percent": 30.947727272727267, "ram_util_percent": 58.58863636363639}}
-{"episode_reward_max": 636.0, "episode_reward_min": 345.0, "episode_reward_mean": 584.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 292.095}, "custom_metrics": {"sparse_reward_mean": 201.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 180.59, "shaped_reward_min": 105, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.49, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.4, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.08, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.65, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.89, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.44, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.56, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.14, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.25, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.78, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.85, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.36, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.56, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.14, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.56, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.14, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 587.0, 576.0, 582.0, 587.0, 630.0, 587.0, 630.0, 476.0, 579.0, 587.0, 584.0, 587.0, 627.0, 579.0, 558.0, 479.0, 630.0, 579.0, 630.0, 579.0, 630.0, 584.0, 576.0, 627.0, 627.0, 345.0, 579.0, 621.0, 582.0, 519.0, 587.0, 587.0, 579.0, 636.0, 627.0, 627.0, 582.0, 542.0, 582.0, 579.0, 468.0, 539.0, 584.0, 587.0, 582.0, 633.0, 587.0, 579.0, 498.0, 627.0, 579.0, 630.0, 582.0, 630.0, 630.0, 582.0, 576.0, 587.0, 633.0, 582.0, 621.0, 627.0, 582.0, 587.0, 582.0, 525.0, 478.0, 582.0, 582.0, 582.0, 576.0, 576.0, 627.0, 579.0, 630.0, 536.0, 579.0, 630.0, 582.0, 582.0, 579.0, 582.0, 570.0, 576.0, 630.0, 627.0, 536.0, 579.0, 582.0, 579.0, 587.0, 584.0, 630.0, 582.0, 630.0, 582.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [299.0, 283.0, 309.0, 321.0, 287.0, 300.0, 290.0, 286.0, 289.0, 293.0, 296.0, 291.0, 314.0, 316.0, 303.0, 284.0, 314.0, 316.0, 243.0, 233.0, 273.0, 306.0, 296.0, 291.0, 291.0, 293.0, 298.0, 289.0, 307.0, 320.0, 284.0, 295.0, 264.0, 294.0, 238.0, 241.0, 326.0, 304.0, 288.0, 291.0, 319.0, 311.0, 289.0, 290.0, 311.0, 319.0, 292.0, 292.0, 295.0, 281.0, 321.0, 306.0, 316.0, 311.0, 173.0, 172.0, 295.0, 284.0, 302.0, 319.0, 291.0, 291.0, 259.0, 260.0, 301.0, 286.0, 300.0, 287.0, 300.0, 279.0, 320.0, 316.0, 303.0, 324.0, 318.0, 309.0, 294.0, 288.0, 267.0, 275.0, 288.0, 294.0, 293.0, 286.0, 236.0, 232.0, 277.0, 262.0, 288.0, 296.0, 302.0, 285.0, 285.0, 297.0, 314.0, 319.0, 296.0, 291.0, 293.0, 286.0, 245.0, 253.0, 318.0, 309.0, 295.0, 284.0, 318.0, 312.0, 293.0, 289.0, 309.0, 321.0, 310.0, 320.0, 280.0, 302.0, 293.0, 283.0, 286.0, 301.0, 320.0, 313.0, 286.0, 296.0, 314.0, 307.0, 319.0, 308.0, 301.0, 281.0, 282.0, 305.0, 288.0, 294.0, 256.0, 269.0, 246.0, 232.0, 301.0, 281.0, 298.0, 284.0, 291.0, 291.0, 303.0, 273.0, 282.0, 294.0, 316.0, 311.0, 293.0, 286.0, 308.0, 322.0, 270.0, 266.0, 288.0, 291.0, 324.0, 306.0, 291.0, 291.0, 291.0, 291.0, 288.0, 291.0, 293.0, 289.0, 296.0, 274.0, 297.0, 279.0, 315.0, 315.0, 308.0, 319.0, 266.0, 270.0, 283.0, 296.0, 290.0, 292.0, 283.0, 296.0, 296.0, 291.0, 293.0, 291.0, 316.0, 314.0, 281.0, 301.0, 316.0, 314.0, 290.0, 292.0, 289.0, 293.0, 286.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1521297133023998, "mean_processing_ms": 0.2984947227408811, "mean_inference_ms": 1.7084618155808986}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5136000, "num_steps_sampled": 2739200, "sample_time_ms": 20745.898, "load_time_ms": 36.566, "grad_time_ms": 9296.474, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004741498734802008, "policy_loss": -0.003847965970635414, "vf_loss": 91.54241943359375, "vf_explained_var": 0.7623968124389648, "kl": 0.00236759171821177, "entropy": 1.1295729875564575, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2739200, "episodes_total": 6848, "training_iteration": 214, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-32-19", "timestamp": 1660253539, "time_this_iter_s": 29.774744749069214, "time_total_s": 11954.380000829697, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11954.380000829697, "timesteps_since_restore": 2739200, "iterations_since_restore": 214, "perf": {"cpu_util_percent": 27.035714285714292, "ram_util_percent": 58.526190476190486}}
-{"episode_reward_max": 636.0, "episode_reward_min": 345.0, "episode_reward_mean": 584.13, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 292.065}, "custom_metrics": {"sparse_reward_mean": 202.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 180.13, "shaped_reward_min": 105, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.09, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.77, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.7, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.86, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.43, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.22, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.38, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.73, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.22, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.38, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.22, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.38, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 558.0, 579.0, 576.0, 579.0, 536.0, 633.0, 584.0, 587.0, 576.0, 536.0, 627.0, 582.0, 627.0, 456.0, 584.0, 579.0, 582.0, 587.0, 621.0, 518.0, 567.0, 633.0, 627.0, 624.0, 582.0, 590.0, 621.0, 630.0, 630.0, 582.0, 636.0, 582.0, 587.0, 582.0, 525.0, 478.0, 582.0, 582.0, 582.0, 576.0, 576.0, 627.0, 579.0, 630.0, 536.0, 579.0, 630.0, 582.0, 582.0, 579.0, 582.0, 570.0, 576.0, 630.0, 627.0, 536.0, 579.0, 582.0, 579.0, 587.0, 584.0, 630.0, 582.0, 630.0, 582.0, 582.0, 579.0, 582.0, 630.0, 587.0, 576.0, 582.0, 587.0, 630.0, 587.0, 630.0, 476.0, 579.0, 587.0, 584.0, 587.0, 627.0, 579.0, 558.0, 479.0, 630.0, 579.0, 630.0, 579.0, 630.0, 584.0, 576.0, 627.0, 627.0, 345.0, 579.0, 621.0, 582.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 281.0, 277.0, 292.0, 287.0, 287.0, 289.0, 286.0, 293.0, 262.0, 274.0, 319.0, 314.0, 297.0, 287.0, 288.0, 299.0, 285.0, 291.0, 278.0, 258.0, 308.0, 319.0, 304.0, 278.0, 326.0, 301.0, 225.0, 231.0, 295.0, 289.0, 300.0, 279.0, 301.0, 281.0, 293.0, 294.0, 315.0, 306.0, 247.0, 271.0, 280.0, 287.0, 326.0, 307.0, 311.0, 316.0, 317.0, 307.0, 286.0, 296.0, 290.0, 300.0, 313.0, 308.0, 313.0, 317.0, 318.0, 312.0, 296.0, 286.0, 321.0, 315.0, 301.0, 281.0, 282.0, 305.0, 288.0, 294.0, 256.0, 269.0, 246.0, 232.0, 301.0, 281.0, 298.0, 284.0, 291.0, 291.0, 303.0, 273.0, 282.0, 294.0, 316.0, 311.0, 293.0, 286.0, 308.0, 322.0, 270.0, 266.0, 288.0, 291.0, 324.0, 306.0, 291.0, 291.0, 291.0, 291.0, 288.0, 291.0, 293.0, 289.0, 296.0, 274.0, 297.0, 279.0, 315.0, 315.0, 308.0, 319.0, 266.0, 270.0, 283.0, 296.0, 290.0, 292.0, 283.0, 296.0, 296.0, 291.0, 293.0, 291.0, 316.0, 314.0, 281.0, 301.0, 316.0, 314.0, 290.0, 292.0, 289.0, 293.0, 286.0, 293.0, 299.0, 283.0, 309.0, 321.0, 287.0, 300.0, 290.0, 286.0, 289.0, 293.0, 296.0, 291.0, 314.0, 316.0, 303.0, 284.0, 314.0, 316.0, 243.0, 233.0, 273.0, 306.0, 296.0, 291.0, 291.0, 293.0, 298.0, 289.0, 307.0, 320.0, 284.0, 295.0, 264.0, 294.0, 238.0, 241.0, 326.0, 304.0, 288.0, 291.0, 319.0, 311.0, 289.0, 290.0, 311.0, 319.0, 292.0, 292.0, 295.0, 281.0, 321.0, 306.0, 316.0, 311.0, 173.0, 172.0, 295.0, 284.0, 302.0, 319.0, 291.0, 291.0, 259.0, 260.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1488626119910377, "mean_processing_ms": 0.297841305997747, "mean_inference_ms": 1.7053304969218863}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5160000, "num_steps_sampled": 2752000, "sample_time_ms": 20853.401, "load_time_ms": 36.595, "grad_time_ms": 9300.168, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004025696776807308, "policy_loss": -0.0038255956023931503, "vf_loss": 84.18643951416016, "vf_explained_var": 0.7665885090827942, "kl": 0.0019039264880120754, "entropy": 1.1346958875656128, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2752000, "episodes_total": 6880, "training_iteration": 215, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-32-49", "timestamp": 1660253569, "time_this_iter_s": 30.592424869537354, "time_total_s": 11984.972425699234, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11984.972425699234, "timesteps_since_restore": 2752000, "iterations_since_restore": 215, "perf": {"cpu_util_percent": 30.46511627906977, "ram_util_percent": 58.576744186046504}}
-{"episode_reward_max": 636.0, "episode_reward_min": 345.0, "episode_reward_mean": 589.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 294.55}, "custom_metrics": {"sparse_reward_mean": 204.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 181.1, "shaped_reward_min": 105, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.05, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.94, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.63, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.02, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.88, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.38, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.09, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.64, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.49, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.8, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.09, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.64, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.09, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.64, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 630.0, 627.0, 582.0, 627.0, 579.0, 579.0, 567.0, 587.0, 587.0, 630.0, 579.0, 582.0, 576.0, 582.0, 630.0, 570.0, 582.0, 627.0, 587.0, 582.0, 582.0, 630.0, 582.0, 627.0, 582.0, 627.0, 587.0, 633.0, 587.0, 582.0, 630.0, 630.0, 582.0, 582.0, 579.0, 582.0, 630.0, 587.0, 576.0, 582.0, 587.0, 630.0, 587.0, 630.0, 476.0, 579.0, 587.0, 584.0, 587.0, 627.0, 579.0, 558.0, 479.0, 630.0, 579.0, 630.0, 579.0, 630.0, 584.0, 576.0, 627.0, 627.0, 345.0, 579.0, 621.0, 582.0, 519.0, 636.0, 558.0, 579.0, 576.0, 579.0, 536.0, 633.0, 584.0, 587.0, 576.0, 536.0, 627.0, 582.0, 627.0, 456.0, 584.0, 579.0, 582.0, 587.0, 621.0, 518.0, 567.0, 633.0, 627.0, 624.0, 582.0, 590.0, 621.0, 630.0, 630.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 282.0, 317.0, 313.0, 309.0, 318.0, 296.0, 286.0, 316.0, 311.0, 288.0, 291.0, 285.0, 294.0, 286.0, 281.0, 298.0, 289.0, 298.0, 289.0, 314.0, 316.0, 290.0, 289.0, 289.0, 293.0, 291.0, 285.0, 285.0, 297.0, 306.0, 324.0, 298.0, 272.0, 283.0, 299.0, 311.0, 316.0, 293.0, 294.0, 300.0, 282.0, 293.0, 289.0, 319.0, 311.0, 285.0, 297.0, 310.0, 317.0, 293.0, 289.0, 310.0, 317.0, 300.0, 287.0, 316.0, 317.0, 298.0, 289.0, 283.0, 299.0, 319.0, 311.0, 316.0, 314.0, 290.0, 292.0, 289.0, 293.0, 286.0, 293.0, 299.0, 283.0, 309.0, 321.0, 287.0, 300.0, 290.0, 286.0, 289.0, 293.0, 296.0, 291.0, 314.0, 316.0, 303.0, 284.0, 314.0, 316.0, 243.0, 233.0, 273.0, 306.0, 296.0, 291.0, 291.0, 293.0, 298.0, 289.0, 307.0, 320.0, 284.0, 295.0, 264.0, 294.0, 238.0, 241.0, 326.0, 304.0, 288.0, 291.0, 319.0, 311.0, 289.0, 290.0, 311.0, 319.0, 292.0, 292.0, 295.0, 281.0, 321.0, 306.0, 316.0, 311.0, 173.0, 172.0, 295.0, 284.0, 302.0, 319.0, 291.0, 291.0, 259.0, 260.0, 314.0, 322.0, 281.0, 277.0, 292.0, 287.0, 287.0, 289.0, 286.0, 293.0, 262.0, 274.0, 319.0, 314.0, 297.0, 287.0, 288.0, 299.0, 285.0, 291.0, 278.0, 258.0, 308.0, 319.0, 304.0, 278.0, 326.0, 301.0, 225.0, 231.0, 295.0, 289.0, 300.0, 279.0, 301.0, 281.0, 293.0, 294.0, 315.0, 306.0, 247.0, 271.0, 280.0, 287.0, 326.0, 307.0, 311.0, 316.0, 317.0, 307.0, 286.0, 296.0, 290.0, 300.0, 313.0, 308.0, 313.0, 317.0, 318.0, 312.0, 296.0, 286.0, 321.0, 315.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1456158536618073, "mean_processing_ms": 0.29719304474995795, "mean_inference_ms": 1.7020409366400755}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5184000, "num_steps_sampled": 2764800, "sample_time_ms": 20854.663, "load_time_ms": 36.839, "grad_time_ms": 9330.064, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0055513703264296055, "policy_loss": -0.0025626528076827526, "vf_loss": 86.77967071533203, "vf_explained_var": 0.7667043805122375, "kl": 0.00211916770786047, "entropy": 1.1278961896896362, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2764800, "episodes_total": 6912, "training_iteration": 216, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-33-19", "timestamp": 1660253599, "time_this_iter_s": 29.99899387359619, "time_total_s": 12014.97141957283, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12014.97141957283, "timesteps_since_restore": 2764800, "iterations_since_restore": 216, "perf": {"cpu_util_percent": 32.32380952380952, "ram_util_percent": 58.607142857142854}}
-{"episode_reward_max": 639.0, "episode_reward_min": 456.0, "episode_reward_mean": 596.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 225.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 298.185}, "custom_metrics": {"sparse_reward_mean": 206.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 183.17, "shaped_reward_min": 136, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.25, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.9, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.82, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.07, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.31, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.83, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.48, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.77, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.7, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.67, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.31, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.83, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.31, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.83, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 630.0, 636.0, 582.0, 584.0, 587.0, 636.0, 630.0, 573.0, 573.0, 630.0, 582.0, 627.0, 576.0, 633.0, 630.0, 633.0, 587.0, 633.0, 579.0, 579.0, 587.0, 582.0, 627.0, 630.0, 639.0, 587.0, 582.0, 579.0, 582.0, 576.0, 636.0, 579.0, 621.0, 582.0, 519.0, 636.0, 558.0, 579.0, 576.0, 579.0, 536.0, 633.0, 584.0, 587.0, 576.0, 536.0, 627.0, 582.0, 627.0, 456.0, 584.0, 579.0, 582.0, 587.0, 621.0, 518.0, 567.0, 633.0, 627.0, 624.0, 582.0, 590.0, 621.0, 630.0, 630.0, 582.0, 636.0, 576.0, 630.0, 627.0, 582.0, 627.0, 579.0, 579.0, 567.0, 587.0, 587.0, 630.0, 579.0, 582.0, 576.0, 582.0, 630.0, 570.0, 582.0, 627.0, 587.0, 582.0, 582.0, 630.0, 582.0, 627.0, 582.0, 627.0, 587.0, 633.0, 587.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [298.0, 329.0, 321.0, 309.0, 321.0, 315.0, 289.0, 293.0, 294.0, 290.0, 281.0, 306.0, 306.0, 330.0, 313.0, 317.0, 279.0, 294.0, 288.0, 285.0, 316.0, 314.0, 293.0, 289.0, 319.0, 308.0, 288.0, 288.0, 324.0, 309.0, 322.0, 308.0, 311.0, 322.0, 295.0, 292.0, 316.0, 317.0, 283.0, 296.0, 287.0, 292.0, 293.0, 294.0, 278.0, 304.0, 316.0, 311.0, 321.0, 309.0, 315.0, 324.0, 306.0, 281.0, 293.0, 289.0, 293.0, 286.0, 293.0, 289.0, 291.0, 285.0, 314.0, 322.0, 295.0, 284.0, 302.0, 319.0, 291.0, 291.0, 259.0, 260.0, 314.0, 322.0, 281.0, 277.0, 292.0, 287.0, 287.0, 289.0, 286.0, 293.0, 262.0, 274.0, 319.0, 314.0, 297.0, 287.0, 288.0, 299.0, 285.0, 291.0, 278.0, 258.0, 308.0, 319.0, 304.0, 278.0, 326.0, 301.0, 225.0, 231.0, 295.0, 289.0, 300.0, 279.0, 301.0, 281.0, 293.0, 294.0, 315.0, 306.0, 247.0, 271.0, 280.0, 287.0, 326.0, 307.0, 311.0, 316.0, 317.0, 307.0, 286.0, 296.0, 290.0, 300.0, 313.0, 308.0, 313.0, 317.0, 318.0, 312.0, 296.0, 286.0, 321.0, 315.0, 294.0, 282.0, 317.0, 313.0, 309.0, 318.0, 296.0, 286.0, 316.0, 311.0, 288.0, 291.0, 285.0, 294.0, 286.0, 281.0, 298.0, 289.0, 298.0, 289.0, 314.0, 316.0, 290.0, 289.0, 289.0, 293.0, 291.0, 285.0, 285.0, 297.0, 306.0, 324.0, 298.0, 272.0, 283.0, 299.0, 311.0, 316.0, 293.0, 294.0, 300.0, 282.0, 293.0, 289.0, 319.0, 311.0, 285.0, 297.0, 310.0, 317.0, 293.0, 289.0, 310.0, 317.0, 300.0, 287.0, 316.0, 317.0, 298.0, 289.0, 283.0, 299.0, 319.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1424035037874523, "mean_processing_ms": 0.29655443936404674, "mean_inference_ms": 1.6989240589354977}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5208000, "num_steps_sampled": 2777600, "sample_time_ms": 20971.783, "load_time_ms": 36.769, "grad_time_ms": 9428.242, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002736276714131236, "policy_loss": -0.004988871049135923, "vf_loss": 82.89418029785156, "vf_explained_var": 0.7724503874778748, "kl": 0.00226503680460155, "entropy": 1.1285419464111328, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2777600, "episodes_total": 6944, "training_iteration": 217, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-33-52", "timestamp": 1660253632, "time_this_iter_s": 32.39657115936279, "time_total_s": 12047.367990732193, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12047.367990732193, "timesteps_since_restore": 2777600, "iterations_since_restore": 217, "perf": {"cpu_util_percent": 33.11304347826087, "ram_util_percent": 58.56521739130436}}
-{"episode_reward_max": 639.0, "episode_reward_min": 291.0, "episode_reward_mean": 593.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 296.97}, "custom_metrics": {"sparse_reward_mean": 205.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 183.14, "shaped_reward_min": 91, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.36, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.16, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.67, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.59, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.53, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.59, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.53, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.59, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.53, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 579.0, 587.0, 582.0, 630.0, 636.0, 579.0, 633.0, 579.0, 584.0, 582.0, 579.0, 579.0, 579.0, 630.0, 630.0, 587.0, 587.0, 518.0, 633.0, 587.0, 582.0, 530.0, 582.0, 462.0, 582.0, 627.0, 291.0, 587.0, 579.0, 582.0, 630.0, 630.0, 582.0, 636.0, 576.0, 630.0, 627.0, 582.0, 627.0, 579.0, 579.0, 567.0, 587.0, 587.0, 630.0, 579.0, 582.0, 576.0, 582.0, 630.0, 570.0, 582.0, 627.0, 587.0, 582.0, 582.0, 630.0, 582.0, 627.0, 582.0, 627.0, 587.0, 633.0, 587.0, 582.0, 630.0, 627.0, 630.0, 636.0, 582.0, 584.0, 587.0, 636.0, 630.0, 573.0, 573.0, 630.0, 582.0, 627.0, 576.0, 633.0, 630.0, 633.0, 587.0, 633.0, 579.0, 579.0, 587.0, 582.0, 627.0, 630.0, 639.0, 587.0, 582.0, 579.0, 582.0, 576.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 306.0, 282.0, 297.0, 298.0, 281.0, 301.0, 286.0, 283.0, 299.0, 319.0, 311.0, 321.0, 315.0, 299.0, 280.0, 315.0, 318.0, 294.0, 285.0, 298.0, 286.0, 288.0, 294.0, 297.0, 282.0, 282.0, 297.0, 290.0, 289.0, 311.0, 319.0, 309.0, 321.0, 296.0, 291.0, 291.0, 296.0, 262.0, 256.0, 317.0, 316.0, 294.0, 293.0, 296.0, 286.0, 262.0, 268.0, 289.0, 293.0, 228.0, 234.0, 288.0, 294.0, 313.0, 314.0, 145.0, 146.0, 291.0, 296.0, 293.0, 286.0, 286.0, 296.0, 313.0, 317.0, 318.0, 312.0, 296.0, 286.0, 321.0, 315.0, 294.0, 282.0, 317.0, 313.0, 309.0, 318.0, 296.0, 286.0, 316.0, 311.0, 288.0, 291.0, 285.0, 294.0, 286.0, 281.0, 298.0, 289.0, 298.0, 289.0, 314.0, 316.0, 290.0, 289.0, 289.0, 293.0, 291.0, 285.0, 285.0, 297.0, 306.0, 324.0, 298.0, 272.0, 283.0, 299.0, 311.0, 316.0, 293.0, 294.0, 300.0, 282.0, 293.0, 289.0, 319.0, 311.0, 285.0, 297.0, 310.0, 317.0, 293.0, 289.0, 310.0, 317.0, 300.0, 287.0, 316.0, 317.0, 298.0, 289.0, 283.0, 299.0, 319.0, 311.0, 298.0, 329.0, 321.0, 309.0, 321.0, 315.0, 289.0, 293.0, 294.0, 290.0, 281.0, 306.0, 306.0, 330.0, 313.0, 317.0, 279.0, 294.0, 288.0, 285.0, 316.0, 314.0, 293.0, 289.0, 319.0, 308.0, 288.0, 288.0, 324.0, 309.0, 322.0, 308.0, 311.0, 322.0, 295.0, 292.0, 316.0, 317.0, 283.0, 296.0, 287.0, 292.0, 293.0, 294.0, 278.0, 304.0, 316.0, 311.0, 321.0, 309.0, 315.0, 324.0, 306.0, 281.0, 293.0, 289.0, 293.0, 286.0, 293.0, 289.0, 291.0, 285.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1392367943050357, "mean_processing_ms": 0.29592632211468906, "mean_inference_ms": 1.6959597907664128}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5232000, "num_steps_sampled": 2790400, "sample_time_ms": 21233.48, "load_time_ms": 36.919, "grad_time_ms": 9592.49, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006457938347011805, "policy_loss": -0.0026744985952973366, "vf_loss": 97.0146713256836, "vf_explained_var": 0.7470273375511169, "kl": 0.0016420072643086314, "entropy": 1.1380563974380493, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2790400, "episodes_total": 6976, "training_iteration": 218, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-34-25", "timestamp": 1660253665, "time_this_iter_s": 33.2370343208313, "time_total_s": 12080.605025053024, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12080.605025053024, "timesteps_since_restore": 2790400, "iterations_since_restore": 218, "perf": {"cpu_util_percent": 35.75531914893618, "ram_util_percent": 58.63191489361703}}
-{"episode_reward_max": 639.0, "episode_reward_min": 291.0, "episode_reward_mean": 593.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 296.79}, "custom_metrics": {"sparse_reward_mean": 205.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 182.78, "shaped_reward_min": 91, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.95, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.17, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.44, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.72, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.81, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.31, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.21, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.81, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.31, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.81, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.31, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 627.0, 555.0, 636.0, 627.0, 582.0, 630.0, 408.0, 582.0, 587.0, 570.0, 573.0, 633.0, 639.0, 633.0, 630.0, 630.0, 587.0, 573.0, 630.0, 587.0, 576.0, 630.0, 576.0, 630.0, 630.0, 584.0, 582.0, 582.0, 567.0, 633.0, 582.0, 633.0, 587.0, 582.0, 630.0, 627.0, 630.0, 636.0, 582.0, 584.0, 587.0, 636.0, 630.0, 573.0, 573.0, 630.0, 582.0, 627.0, 576.0, 633.0, 630.0, 633.0, 587.0, 633.0, 579.0, 579.0, 587.0, 582.0, 627.0, 630.0, 639.0, 587.0, 582.0, 579.0, 582.0, 576.0, 636.0, 582.0, 579.0, 579.0, 587.0, 582.0, 630.0, 636.0, 579.0, 633.0, 579.0, 584.0, 582.0, 579.0, 579.0, 579.0, 630.0, 630.0, 587.0, 587.0, 518.0, 633.0, 587.0, 582.0, 530.0, 582.0, 462.0, 582.0, 627.0, 291.0, 587.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 318.0, 309.0, 269.0, 286.0, 324.0, 312.0, 319.0, 308.0, 291.0, 291.0, 318.0, 312.0, 202.0, 206.0, 292.0, 290.0, 293.0, 294.0, 278.0, 292.0, 289.0, 284.0, 332.0, 301.0, 322.0, 317.0, 311.0, 322.0, 326.0, 304.0, 317.0, 313.0, 301.0, 286.0, 277.0, 296.0, 316.0, 314.0, 296.0, 291.0, 272.0, 304.0, 319.0, 311.0, 291.0, 285.0, 313.0, 317.0, 316.0, 314.0, 290.0, 294.0, 299.0, 283.0, 285.0, 297.0, 281.0, 286.0, 317.0, 316.0, 287.0, 295.0, 316.0, 317.0, 298.0, 289.0, 283.0, 299.0, 319.0, 311.0, 298.0, 329.0, 321.0, 309.0, 321.0, 315.0, 289.0, 293.0, 294.0, 290.0, 281.0, 306.0, 306.0, 330.0, 313.0, 317.0, 279.0, 294.0, 288.0, 285.0, 316.0, 314.0, 293.0, 289.0, 319.0, 308.0, 288.0, 288.0, 324.0, 309.0, 322.0, 308.0, 311.0, 322.0, 295.0, 292.0, 316.0, 317.0, 283.0, 296.0, 287.0, 292.0, 293.0, 294.0, 278.0, 304.0, 316.0, 311.0, 321.0, 309.0, 315.0, 324.0, 306.0, 281.0, 293.0, 289.0, 293.0, 286.0, 293.0, 289.0, 291.0, 285.0, 314.0, 322.0, 276.0, 306.0, 282.0, 297.0, 298.0, 281.0, 301.0, 286.0, 283.0, 299.0, 319.0, 311.0, 321.0, 315.0, 299.0, 280.0, 315.0, 318.0, 294.0, 285.0, 298.0, 286.0, 288.0, 294.0, 297.0, 282.0, 282.0, 297.0, 290.0, 289.0, 311.0, 319.0, 309.0, 321.0, 296.0, 291.0, 291.0, 296.0, 262.0, 256.0, 317.0, 316.0, 294.0, 293.0, 296.0, 286.0, 262.0, 268.0, 289.0, 293.0, 228.0, 234.0, 288.0, 294.0, 313.0, 314.0, 145.0, 146.0, 291.0, 296.0, 293.0, 286.0, 286.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1361092001504756, "mean_processing_ms": 0.29530600936138574, "mean_inference_ms": 1.6931104739373604}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5256000, "num_steps_sampled": 2803200, "sample_time_ms": 21336.415, "load_time_ms": 37.483, "grad_time_ms": 9561.229, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005191893433220685, "policy_loss": -0.0074623264372348785, "vf_loss": 85.4925765991211, "vf_explained_var": 0.7601101994514465, "kl": 0.0019686671439558268, "entropy": 1.1354910135269165, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2803200, "episodes_total": 7008, "training_iteration": 219, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-34-55", "timestamp": 1660253695, "time_this_iter_s": 30.000843048095703, "time_total_s": 12110.60586810112, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12110.60586810112, "timesteps_since_restore": 2803200, "iterations_since_restore": 219, "perf": {"cpu_util_percent": 30.46666666666667, "ram_util_percent": 58.68571428571429}}
-{"episode_reward_max": 639.0, "episode_reward_min": 291.0, "episode_reward_mean": 588.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 294.01}, "custom_metrics": {"sparse_reward_mean": 203.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 181.62, "shaped_reward_min": 91, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.07, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.97, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.25, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.77, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.68, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.87, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.03, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.15, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.59, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.87, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.03, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.87, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.03, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 624.0, 582.0, 518.0, 582.0, 630.0, 576.0, 633.0, 587.0, 582.0, 587.0, 582.0, 584.0, 582.0, 627.0, 544.0, 579.0, 576.0, 487.0, 582.0, 582.0, 584.0, 636.0, 582.0, 633.0, 630.0, 539.0, 579.0, 579.0, 627.0, 630.0, 579.0, 582.0, 576.0, 636.0, 582.0, 579.0, 579.0, 587.0, 582.0, 630.0, 636.0, 579.0, 633.0, 579.0, 584.0, 582.0, 579.0, 579.0, 579.0, 630.0, 630.0, 587.0, 587.0, 518.0, 633.0, 587.0, 582.0, 530.0, 582.0, 462.0, 582.0, 627.0, 291.0, 587.0, 579.0, 582.0, 636.0, 627.0, 555.0, 636.0, 627.0, 582.0, 630.0, 408.0, 582.0, 587.0, 570.0, 573.0, 633.0, 639.0, 633.0, 630.0, 630.0, 587.0, 573.0, 630.0, 587.0, 576.0, 630.0, 576.0, 630.0, 630.0, 584.0, 582.0, 582.0, 567.0, 633.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [308.0, 322.0, 292.0, 290.0, 321.0, 303.0, 292.0, 290.0, 252.0, 266.0, 297.0, 285.0, 313.0, 317.0, 298.0, 278.0, 324.0, 309.0, 285.0, 302.0, 291.0, 291.0, 291.0, 296.0, 291.0, 291.0, 287.0, 297.0, 291.0, 291.0, 310.0, 317.0, 272.0, 272.0, 290.0, 289.0, 293.0, 283.0, 257.0, 230.0, 285.0, 297.0, 285.0, 297.0, 300.0, 284.0, 312.0, 324.0, 277.0, 305.0, 313.0, 320.0, 321.0, 309.0, 265.0, 274.0, 294.0, 285.0, 299.0, 280.0, 311.0, 316.0, 309.0, 321.0, 293.0, 286.0, 293.0, 289.0, 291.0, 285.0, 314.0, 322.0, 276.0, 306.0, 282.0, 297.0, 298.0, 281.0, 301.0, 286.0, 283.0, 299.0, 319.0, 311.0, 321.0, 315.0, 299.0, 280.0, 315.0, 318.0, 294.0, 285.0, 298.0, 286.0, 288.0, 294.0, 297.0, 282.0, 282.0, 297.0, 290.0, 289.0, 311.0, 319.0, 309.0, 321.0, 296.0, 291.0, 291.0, 296.0, 262.0, 256.0, 317.0, 316.0, 294.0, 293.0, 296.0, 286.0, 262.0, 268.0, 289.0, 293.0, 228.0, 234.0, 288.0, 294.0, 313.0, 314.0, 145.0, 146.0, 291.0, 296.0, 293.0, 286.0, 286.0, 296.0, 319.0, 317.0, 318.0, 309.0, 269.0, 286.0, 324.0, 312.0, 319.0, 308.0, 291.0, 291.0, 318.0, 312.0, 202.0, 206.0, 292.0, 290.0, 293.0, 294.0, 278.0, 292.0, 289.0, 284.0, 332.0, 301.0, 322.0, 317.0, 311.0, 322.0, 326.0, 304.0, 317.0, 313.0, 301.0, 286.0, 277.0, 296.0, 316.0, 314.0, 296.0, 291.0, 272.0, 304.0, 319.0, 311.0, 291.0, 285.0, 313.0, 317.0, 316.0, 314.0, 290.0, 294.0, 299.0, 283.0, 285.0, 297.0, 281.0, 286.0, 317.0, 316.0, 287.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.133009827763421, "mean_processing_ms": 0.29469029655090995, "mean_inference_ms": 1.6902501086005228}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5280000, "num_steps_sampled": 2816000, "sample_time_ms": 21400.362, "load_time_ms": 37.346, "grad_time_ms": 9606.188, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002275400562211871, "policy_loss": -0.0062116296030581, "vf_loss": 90.5528793334961, "vf_explained_var": 0.7516798973083496, "kl": 0.0019114302704110742, "entropy": 1.1365100145339966, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2816000, "episodes_total": 7040, "training_iteration": 220, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-35-26", "timestamp": 1660253726, "time_this_iter_s": 30.873941659927368, "time_total_s": 12141.479809761047, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12141.479809761047, "timesteps_since_restore": 2816000, "iterations_since_restore": 220, "perf": {"cpu_util_percent": 31.518181818181816, "ram_util_percent": 58.6159090909091}}
-{"episode_reward_max": 639.0, "episode_reward_min": 291.0, "episode_reward_mean": 590.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 295.165}, "custom_metrics": {"sparse_reward_mean": 204.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 181.93, "shaped_reward_min": 91, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.88, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.64, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.09, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.71, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.69, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.1, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.9, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.89, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.02, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.63, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.64, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.34, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.29, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.1, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.9, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.1, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.9, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 579.0, 573.0, 579.0, 627.0, 630.0, 582.0, 587.0, 627.0, 630.0, 636.0, 573.0, 582.0, 582.0, 579.0, 582.0, 627.0, 570.0, 582.0, 573.0, 593.0, 590.0, 582.0, 579.0, 555.0, 627.0, 539.0, 636.0, 582.0, 633.0, 582.0, 291.0, 587.0, 579.0, 582.0, 636.0, 627.0, 555.0, 636.0, 627.0, 582.0, 630.0, 408.0, 582.0, 587.0, 570.0, 573.0, 633.0, 639.0, 633.0, 630.0, 630.0, 587.0, 573.0, 630.0, 587.0, 576.0, 630.0, 576.0, 630.0, 630.0, 584.0, 582.0, 582.0, 567.0, 633.0, 582.0, 630.0, 582.0, 624.0, 582.0, 518.0, 582.0, 630.0, 576.0, 633.0, 587.0, 582.0, 587.0, 582.0, 584.0, 582.0, 627.0, 544.0, 579.0, 576.0, 487.0, 582.0, 582.0, 584.0, 636.0, 582.0, 633.0, 630.0, 539.0, 579.0, 579.0, 627.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 306.0, 324.0, 302.0, 277.0, 288.0, 285.0, 284.0, 295.0, 322.0, 305.0, 313.0, 317.0, 289.0, 293.0, 287.0, 300.0, 310.0, 317.0, 308.0, 322.0, 314.0, 322.0, 291.0, 282.0, 298.0, 284.0, 301.0, 281.0, 285.0, 294.0, 300.0, 282.0, 299.0, 328.0, 284.0, 286.0, 288.0, 294.0, 283.0, 290.0, 289.0, 304.0, 291.0, 299.0, 294.0, 288.0, 283.0, 296.0, 285.0, 270.0, 311.0, 316.0, 260.0, 279.0, 327.0, 309.0, 285.0, 297.0, 324.0, 309.0, 290.0, 292.0, 145.0, 146.0, 291.0, 296.0, 293.0, 286.0, 286.0, 296.0, 319.0, 317.0, 318.0, 309.0, 269.0, 286.0, 324.0, 312.0, 319.0, 308.0, 291.0, 291.0, 318.0, 312.0, 202.0, 206.0, 292.0, 290.0, 293.0, 294.0, 278.0, 292.0, 289.0, 284.0, 332.0, 301.0, 322.0, 317.0, 311.0, 322.0, 326.0, 304.0, 317.0, 313.0, 301.0, 286.0, 277.0, 296.0, 316.0, 314.0, 296.0, 291.0, 272.0, 304.0, 319.0, 311.0, 291.0, 285.0, 313.0, 317.0, 316.0, 314.0, 290.0, 294.0, 299.0, 283.0, 285.0, 297.0, 281.0, 286.0, 317.0, 316.0, 287.0, 295.0, 308.0, 322.0, 292.0, 290.0, 321.0, 303.0, 292.0, 290.0, 252.0, 266.0, 297.0, 285.0, 313.0, 317.0, 298.0, 278.0, 324.0, 309.0, 285.0, 302.0, 291.0, 291.0, 291.0, 296.0, 291.0, 291.0, 287.0, 297.0, 291.0, 291.0, 310.0, 317.0, 272.0, 272.0, 290.0, 289.0, 293.0, 283.0, 257.0, 230.0, 285.0, 297.0, 285.0, 297.0, 300.0, 284.0, 312.0, 324.0, 277.0, 305.0, 313.0, 320.0, 321.0, 309.0, 265.0, 274.0, 294.0, 285.0, 299.0, 280.0, 311.0, 316.0, 309.0, 321.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1299217967931974, "mean_processing_ms": 0.29407401911825776, "mean_inference_ms": 1.6873151851132406}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5304000, "num_steps_sampled": 2828800, "sample_time_ms": 21404.207, "load_time_ms": 36.943, "grad_time_ms": 9639.523, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004844650160521269, "policy_loss": -0.004174739122390747, "vf_loss": 95.8445816040039, "vf_explained_var": 0.7459821701049805, "kl": 0.0019909220281988382, "entropy": 1.1301350593566895, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2828800, "episodes_total": 7072, "training_iteration": 221, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-35-57", "timestamp": 1660253757, "time_this_iter_s": 30.906293869018555, "time_total_s": 12172.386103630066, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12172.386103630066, "timesteps_since_restore": 2828800, "iterations_since_restore": 221, "perf": {"cpu_util_percent": 27.49545454545455, "ram_util_percent": 58.63636363636363}}
-{"episode_reward_max": 639.0, "episode_reward_min": 487.0, "episode_reward_mean": 596.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 298.01}, "custom_metrics": {"sparse_reward_mean": 206.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 183.62, "shaped_reward_min": 155, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.36, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.93, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.85, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.1, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.23, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.04, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.99, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.22, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.25, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.23, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.04, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.23, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.04, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 579.0, 630.0, 630.0, 627.0, 587.0, 639.0, 587.0, 630.0, 582.0, 633.0, 587.0, 627.0, 579.0, 576.0, 633.0, 582.0, 570.0, 630.0, 627.0, 579.0, 627.0, 582.0, 633.0, 633.0, 630.0, 579.0, 582.0, 627.0, 627.0, 576.0, 582.0, 567.0, 633.0, 582.0, 630.0, 582.0, 624.0, 582.0, 518.0, 582.0, 630.0, 576.0, 633.0, 587.0, 582.0, 587.0, 582.0, 584.0, 582.0, 627.0, 544.0, 579.0, 576.0, 487.0, 582.0, 582.0, 584.0, 636.0, 582.0, 633.0, 630.0, 539.0, 579.0, 579.0, 627.0, 630.0, 582.0, 630.0, 579.0, 573.0, 579.0, 627.0, 630.0, 582.0, 587.0, 627.0, 630.0, 636.0, 573.0, 582.0, 582.0, 579.0, 582.0, 627.0, 570.0, 582.0, 573.0, 593.0, 590.0, 582.0, 579.0, 555.0, 627.0, 539.0, 636.0, 582.0, 633.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 283.0, 296.0, 293.0, 286.0, 310.0, 320.0, 318.0, 312.0, 318.0, 309.0, 290.0, 297.0, 325.0, 314.0, 290.0, 297.0, 319.0, 311.0, 291.0, 291.0, 309.0, 324.0, 295.0, 292.0, 298.0, 329.0, 280.0, 299.0, 280.0, 296.0, 318.0, 315.0, 293.0, 289.0, 283.0, 287.0, 319.0, 311.0, 305.0, 322.0, 285.0, 294.0, 309.0, 318.0, 285.0, 297.0, 320.0, 313.0, 311.0, 322.0, 313.0, 317.0, 296.0, 283.0, 286.0, 296.0, 305.0, 322.0, 310.0, 317.0, 289.0, 287.0, 285.0, 297.0, 281.0, 286.0, 317.0, 316.0, 287.0, 295.0, 308.0, 322.0, 292.0, 290.0, 321.0, 303.0, 292.0, 290.0, 252.0, 266.0, 297.0, 285.0, 313.0, 317.0, 298.0, 278.0, 324.0, 309.0, 285.0, 302.0, 291.0, 291.0, 291.0, 296.0, 291.0, 291.0, 287.0, 297.0, 291.0, 291.0, 310.0, 317.0, 272.0, 272.0, 290.0, 289.0, 293.0, 283.0, 257.0, 230.0, 285.0, 297.0, 285.0, 297.0, 300.0, 284.0, 312.0, 324.0, 277.0, 305.0, 313.0, 320.0, 321.0, 309.0, 265.0, 274.0, 294.0, 285.0, 299.0, 280.0, 311.0, 316.0, 309.0, 321.0, 290.0, 292.0, 306.0, 324.0, 302.0, 277.0, 288.0, 285.0, 284.0, 295.0, 322.0, 305.0, 313.0, 317.0, 289.0, 293.0, 287.0, 300.0, 310.0, 317.0, 308.0, 322.0, 314.0, 322.0, 291.0, 282.0, 298.0, 284.0, 301.0, 281.0, 285.0, 294.0, 300.0, 282.0, 299.0, 328.0, 284.0, 286.0, 288.0, 294.0, 283.0, 290.0, 289.0, 304.0, 291.0, 299.0, 294.0, 288.0, 283.0, 296.0, 285.0, 270.0, 311.0, 316.0, 260.0, 279.0, 327.0, 309.0, 285.0, 297.0, 324.0, 309.0, 290.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1268569615111614, "mean_processing_ms": 0.2934624000398477, "mean_inference_ms": 1.6844698808001166}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5328000, "num_steps_sampled": 2841600, "sample_time_ms": 21356.225, "load_time_ms": 36.973, "grad_time_ms": 9654.508, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0037463188637048006, "policy_loss": -0.0037510646507143974, "vf_loss": 80.60189056396484, "vf_explained_var": 0.7646245360374451, "kl": 0.002355078933760524, "entropy": 1.125628113746643, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2841600, "episodes_total": 7104, "training_iteration": 222, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-36-29", "timestamp": 1660253789, "time_this_iter_s": 31.9894540309906, "time_total_s": 12204.375557661057, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12204.375557661057, "timesteps_since_restore": 2841600, "iterations_since_restore": 222, "perf": {"cpu_util_percent": 30.18222222222223, "ram_util_percent": 58.70444444444445}}
-{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 601.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 300.665}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.13, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.83, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.26, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.21, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.06, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.79, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.37, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.21, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.21, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 633.0, 579.0, 582.0, 582.0, 633.0, 627.0, 630.0, 630.0, 636.0, 633.0, 579.0, 519.0, 552.0, 627.0, 582.0, 582.0, 627.0, 630.0, 582.0, 582.0, 627.0, 630.0, 582.0, 587.0, 630.0, 627.0, 582.0, 627.0, 576.0, 630.0, 630.0, 579.0, 579.0, 627.0, 630.0, 582.0, 630.0, 579.0, 573.0, 579.0, 627.0, 630.0, 582.0, 587.0, 627.0, 630.0, 636.0, 573.0, 582.0, 582.0, 579.0, 582.0, 627.0, 570.0, 582.0, 573.0, 593.0, 590.0, 582.0, 579.0, 555.0, 627.0, 539.0, 636.0, 582.0, 633.0, 582.0, 582.0, 579.0, 579.0, 630.0, 630.0, 627.0, 587.0, 639.0, 587.0, 630.0, 582.0, 633.0, 587.0, 627.0, 579.0, 576.0, 633.0, 582.0, 570.0, 630.0, 627.0, 579.0, 627.0, 582.0, 633.0, 633.0, 630.0, 579.0, 582.0, 627.0, 627.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 327.0, 306.0, 292.0, 287.0, 280.0, 302.0, 282.0, 300.0, 318.0, 315.0, 305.0, 322.0, 314.0, 316.0, 323.0, 307.0, 319.0, 317.0, 322.0, 311.0, 297.0, 282.0, 261.0, 258.0, 274.0, 278.0, 308.0, 319.0, 290.0, 292.0, 296.0, 286.0, 308.0, 319.0, 322.0, 308.0, 288.0, 294.0, 290.0, 292.0, 324.0, 303.0, 309.0, 321.0, 290.0, 292.0, 298.0, 289.0, 314.0, 316.0, 319.0, 308.0, 288.0, 294.0, 313.0, 314.0, 288.0, 288.0, 309.0, 321.0, 324.0, 306.0, 294.0, 285.0, 299.0, 280.0, 311.0, 316.0, 309.0, 321.0, 290.0, 292.0, 306.0, 324.0, 302.0, 277.0, 288.0, 285.0, 284.0, 295.0, 322.0, 305.0, 313.0, 317.0, 289.0, 293.0, 287.0, 300.0, 310.0, 317.0, 308.0, 322.0, 314.0, 322.0, 291.0, 282.0, 298.0, 284.0, 301.0, 281.0, 285.0, 294.0, 300.0, 282.0, 299.0, 328.0, 284.0, 286.0, 288.0, 294.0, 283.0, 290.0, 289.0, 304.0, 291.0, 299.0, 294.0, 288.0, 283.0, 296.0, 285.0, 270.0, 311.0, 316.0, 260.0, 279.0, 327.0, 309.0, 285.0, 297.0, 324.0, 309.0, 290.0, 292.0, 296.0, 286.0, 283.0, 296.0, 293.0, 286.0, 310.0, 320.0, 318.0, 312.0, 318.0, 309.0, 290.0, 297.0, 325.0, 314.0, 290.0, 297.0, 319.0, 311.0, 291.0, 291.0, 309.0, 324.0, 295.0, 292.0, 298.0, 329.0, 280.0, 299.0, 280.0, 296.0, 318.0, 315.0, 293.0, 289.0, 283.0, 287.0, 319.0, 311.0, 305.0, 322.0, 285.0, 294.0, 309.0, 318.0, 285.0, 297.0, 320.0, 313.0, 311.0, 322.0, 313.0, 317.0, 296.0, 283.0, 286.0, 296.0, 305.0, 322.0, 310.0, 317.0, 289.0, 287.0]}, "sampler_perf": {"mean_env_wait_ms": 1.123810088157581, "mean_processing_ms": 0.2928524721479363, "mean_inference_ms": 1.6814399707435803}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5352000, "num_steps_sampled": 2854400, "sample_time_ms": 21091.047, "load_time_ms": 36.918, "grad_time_ms": 9784.155, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -5.6165892601711676e-05, "policy_loss": -0.007852478884160519, "vf_loss": 83.60053253173828, "vf_explained_var": 0.7575058937072754, "kl": 0.001709200325421989, "entropy": 1.1274746656417847, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2854400, "episodes_total": 7136, "training_iteration": 223, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-36-59", "timestamp": 1660253819, "time_this_iter_s": 29.606478929519653, "time_total_s": 12233.982036590576, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12233.982036590576, "timesteps_since_restore": 2854400, "iterations_since_restore": 223, "perf": {"cpu_util_percent": 31.040476190476188, "ram_util_percent": 58.538095238095245}}
-{"episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 595.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 297.745}, "custom_metrics": {"sparse_reward_mean": 206.4, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 182.69, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.37, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.27, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.82, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.3, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.15, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.55, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.41, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.39, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.82, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.3, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.82, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.3, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 582.0, 636.0, 630.0, 66.0, 573.0, 633.0, 573.0, 570.0, 579.0, 582.0, 630.0, 633.0, 582.0, 582.0, 579.0, 572.0, 579.0, 582.0, 587.0, 587.0, 630.0, 630.0, 630.0, 582.0, 582.0, 579.0, 419.0, 587.0, 633.0, 587.0, 636.0, 582.0, 633.0, 582.0, 582.0, 579.0, 579.0, 630.0, 630.0, 627.0, 587.0, 639.0, 587.0, 630.0, 582.0, 633.0, 587.0, 627.0, 579.0, 576.0, 633.0, 582.0, 570.0, 630.0, 627.0, 579.0, 627.0, 582.0, 633.0, 633.0, 630.0, 579.0, 582.0, 627.0, 627.0, 576.0, 582.0, 633.0, 579.0, 582.0, 582.0, 633.0, 627.0, 630.0, 630.0, 636.0, 633.0, 579.0, 519.0, 552.0, 627.0, 582.0, 582.0, 627.0, 630.0, 582.0, 582.0, 627.0, 630.0, 582.0, 587.0, 630.0, 627.0, 582.0, 627.0, 576.0, 630.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 295.0, 321.0, 309.0, 298.0, 284.0, 316.0, 320.0, 316.0, 314.0, 34.0, 32.0, 277.0, 296.0, 309.0, 324.0, 279.0, 294.0, 285.0, 285.0, 285.0, 294.0, 285.0, 297.0, 332.0, 298.0, 316.0, 317.0, 296.0, 286.0, 296.0, 286.0, 288.0, 291.0, 274.0, 298.0, 290.0, 289.0, 283.0, 299.0, 291.0, 296.0, 305.0, 282.0, 311.0, 319.0, 303.0, 327.0, 312.0, 318.0, 296.0, 286.0, 291.0, 291.0, 293.0, 286.0, 214.0, 205.0, 287.0, 300.0, 323.0, 310.0, 288.0, 299.0, 327.0, 309.0, 285.0, 297.0, 324.0, 309.0, 290.0, 292.0, 296.0, 286.0, 283.0, 296.0, 293.0, 286.0, 310.0, 320.0, 318.0, 312.0, 318.0, 309.0, 290.0, 297.0, 325.0, 314.0, 290.0, 297.0, 319.0, 311.0, 291.0, 291.0, 309.0, 324.0, 295.0, 292.0, 298.0, 329.0, 280.0, 299.0, 280.0, 296.0, 318.0, 315.0, 293.0, 289.0, 283.0, 287.0, 319.0, 311.0, 305.0, 322.0, 285.0, 294.0, 309.0, 318.0, 285.0, 297.0, 320.0, 313.0, 311.0, 322.0, 313.0, 317.0, 296.0, 283.0, 286.0, 296.0, 305.0, 322.0, 310.0, 317.0, 289.0, 287.0, 290.0, 292.0, 327.0, 306.0, 292.0, 287.0, 280.0, 302.0, 282.0, 300.0, 318.0, 315.0, 305.0, 322.0, 314.0, 316.0, 323.0, 307.0, 319.0, 317.0, 322.0, 311.0, 297.0, 282.0, 261.0, 258.0, 274.0, 278.0, 308.0, 319.0, 290.0, 292.0, 296.0, 286.0, 308.0, 319.0, 322.0, 308.0, 288.0, 294.0, 290.0, 292.0, 324.0, 303.0, 309.0, 321.0, 290.0, 292.0, 298.0, 289.0, 314.0, 316.0, 319.0, 308.0, 288.0, 294.0, 313.0, 314.0, 288.0, 288.0, 309.0, 321.0, 324.0, 306.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1207907910306545, "mean_processing_ms": 0.2922476172198019, "mean_inference_ms": 1.6784104187428721}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5376000, "num_steps_sampled": 2867200, "sample_time_ms": 21165.857, "load_time_ms": 36.996, "grad_time_ms": 9704.336, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003032231703400612, "policy_loss": -0.005307988729327917, "vf_loss": 89.09744262695312, "vf_explained_var": 0.7809851765632629, "kl": 0.0017985772574320436, "entropy": 1.139058232307434, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2867200, "episodes_total": 7168, "training_iteration": 224, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-37-28", "timestamp": 1660253848, "time_this_iter_s": 29.72331213951111, "time_total_s": 12263.705348730087, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12263.705348730087, "timesteps_since_restore": 2867200, "iterations_since_restore": 224, "perf": {"cpu_util_percent": 31.121428571428574, "ram_util_percent": 58.642857142857146}}
-{"episode_reward_max": 636.0, "episode_reward_min": 66.0, "episode_reward_mean": 594.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 297.15}, "custom_metrics": {"sparse_reward_mean": 206.0, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 182.3, "shaped_reward_min": 26, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.08, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.41, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.5, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.89, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.24, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.42, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.89, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.24, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.89, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.24, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 582.0, 630.0, 587.0, 627.0, 579.0, 633.0, 582.0, 624.0, 582.0, 579.0, 630.0, 579.0, 636.0, 582.0, 630.0, 576.0, 621.0, 633.0, 582.0, 584.0, 636.0, 579.0, 576.0, 579.0, 636.0, 630.0, 633.0, 582.0, 570.0, 630.0, 582.0, 627.0, 627.0, 576.0, 582.0, 633.0, 579.0, 582.0, 582.0, 633.0, 627.0, 630.0, 630.0, 636.0, 633.0, 579.0, 519.0, 552.0, 627.0, 582.0, 582.0, 627.0, 630.0, 582.0, 582.0, 627.0, 630.0, 582.0, 587.0, 630.0, 627.0, 582.0, 627.0, 576.0, 630.0, 630.0, 582.0, 630.0, 582.0, 636.0, 630.0, 66.0, 573.0, 633.0, 573.0, 570.0, 579.0, 582.0, 630.0, 633.0, 582.0, 582.0, 579.0, 572.0, 579.0, 582.0, 587.0, 587.0, 630.0, 630.0, 630.0, 582.0, 582.0, 579.0, 419.0, 587.0, 633.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 289.0, 293.0, 283.0, 299.0, 306.0, 324.0, 293.0, 294.0, 323.0, 304.0, 284.0, 295.0, 319.0, 314.0, 283.0, 299.0, 322.0, 302.0, 294.0, 288.0, 284.0, 295.0, 319.0, 311.0, 291.0, 288.0, 316.0, 320.0, 294.0, 288.0, 326.0, 304.0, 292.0, 284.0, 314.0, 307.0, 319.0, 314.0, 286.0, 296.0, 285.0, 299.0, 324.0, 312.0, 297.0, 282.0, 291.0, 285.0, 287.0, 292.0, 321.0, 315.0, 313.0, 317.0, 317.0, 316.0, 296.0, 286.0, 292.0, 278.0, 316.0, 314.0, 286.0, 296.0, 305.0, 322.0, 310.0, 317.0, 289.0, 287.0, 290.0, 292.0, 327.0, 306.0, 292.0, 287.0, 280.0, 302.0, 282.0, 300.0, 318.0, 315.0, 305.0, 322.0, 314.0, 316.0, 323.0, 307.0, 319.0, 317.0, 322.0, 311.0, 297.0, 282.0, 261.0, 258.0, 274.0, 278.0, 308.0, 319.0, 290.0, 292.0, 296.0, 286.0, 308.0, 319.0, 322.0, 308.0, 288.0, 294.0, 290.0, 292.0, 324.0, 303.0, 309.0, 321.0, 290.0, 292.0, 298.0, 289.0, 314.0, 316.0, 319.0, 308.0, 288.0, 294.0, 313.0, 314.0, 288.0, 288.0, 309.0, 321.0, 324.0, 306.0, 287.0, 295.0, 321.0, 309.0, 298.0, 284.0, 316.0, 320.0, 316.0, 314.0, 34.0, 32.0, 277.0, 296.0, 309.0, 324.0, 279.0, 294.0, 285.0, 285.0, 285.0, 294.0, 285.0, 297.0, 332.0, 298.0, 316.0, 317.0, 296.0, 286.0, 296.0, 286.0, 288.0, 291.0, 274.0, 298.0, 290.0, 289.0, 283.0, 299.0, 291.0, 296.0, 305.0, 282.0, 311.0, 319.0, 303.0, 327.0, 312.0, 318.0, 296.0, 286.0, 291.0, 291.0, 293.0, 286.0, 214.0, 205.0, 287.0, 300.0, 323.0, 310.0, 288.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 1.117796938264296, "mean_processing_ms": 0.29164906507723115, "mean_inference_ms": 1.675395869901085}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5400000, "num_steps_sampled": 2880000, "sample_time_ms": 21210.508, "load_time_ms": 36.928, "grad_time_ms": 9651.029, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003380303969606757, "policy_loss": -0.0046376134268939495, "vf_loss": 85.82404327392578, "vf_explained_var": 0.7595102190971375, "kl": 0.0017190409125760198, "entropy": 1.1289840936660767, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2880000, "episodes_total": 7200, "training_iteration": 225, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-37-59", "timestamp": 1660253879, "time_this_iter_s": 30.507438898086548, "time_total_s": 12294.212787628174, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12294.212787628174, "timesteps_since_restore": 2880000, "iterations_since_restore": 225, "perf": {"cpu_util_percent": 29.595348837209304, "ram_util_percent": 58.604651162790695}}
-{"episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 592.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 296.345}, "custom_metrics": {"sparse_reward_mean": 205.2, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 182.29, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.96, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.88, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.49, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.08, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.1, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.44, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.36, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.1, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.1, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 575.0, 582.0, 627.0, 630.0, 627.0, 579.0, 579.0, 630.0, 630.0, 579.0, 633.0, 584.0, 567.0, 627.0, 627.0, 582.0, 627.0, 587.0, 587.0, 582.0, 576.0, 579.0, 633.0, 639.0, 582.0, 587.0, 579.0, 582.0, 533.0, 582.0, 627.0, 576.0, 630.0, 630.0, 582.0, 630.0, 582.0, 636.0, 630.0, 66.0, 573.0, 633.0, 573.0, 570.0, 579.0, 582.0, 630.0, 633.0, 582.0, 582.0, 579.0, 572.0, 579.0, 582.0, 587.0, 587.0, 630.0, 630.0, 630.0, 582.0, 582.0, 579.0, 419.0, 587.0, 633.0, 587.0, 582.0, 582.0, 582.0, 630.0, 587.0, 627.0, 579.0, 633.0, 582.0, 624.0, 582.0, 579.0, 630.0, 579.0, 636.0, 582.0, 630.0, 576.0, 621.0, 633.0, 582.0, 584.0, 636.0, 579.0, 576.0, 579.0, 636.0, 630.0, 633.0, 582.0, 570.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 309.0, 288.0, 294.0, 284.0, 291.0, 293.0, 289.0, 311.0, 316.0, 314.0, 316.0, 313.0, 314.0, 290.0, 289.0, 290.0, 289.0, 324.0, 306.0, 311.0, 319.0, 292.0, 287.0, 319.0, 314.0, 291.0, 293.0, 296.0, 271.0, 323.0, 304.0, 309.0, 318.0, 284.0, 298.0, 309.0, 318.0, 282.0, 305.0, 297.0, 290.0, 287.0, 295.0, 291.0, 285.0, 287.0, 292.0, 316.0, 317.0, 317.0, 322.0, 294.0, 288.0, 301.0, 286.0, 282.0, 297.0, 298.0, 284.0, 270.0, 263.0, 288.0, 294.0, 313.0, 314.0, 288.0, 288.0, 309.0, 321.0, 324.0, 306.0, 287.0, 295.0, 321.0, 309.0, 298.0, 284.0, 316.0, 320.0, 316.0, 314.0, 34.0, 32.0, 277.0, 296.0, 309.0, 324.0, 279.0, 294.0, 285.0, 285.0, 285.0, 294.0, 285.0, 297.0, 332.0, 298.0, 316.0, 317.0, 296.0, 286.0, 296.0, 286.0, 288.0, 291.0, 274.0, 298.0, 290.0, 289.0, 283.0, 299.0, 291.0, 296.0, 305.0, 282.0, 311.0, 319.0, 303.0, 327.0, 312.0, 318.0, 296.0, 286.0, 291.0, 291.0, 293.0, 286.0, 214.0, 205.0, 287.0, 300.0, 323.0, 310.0, 288.0, 299.0, 294.0, 288.0, 289.0, 293.0, 283.0, 299.0, 306.0, 324.0, 293.0, 294.0, 323.0, 304.0, 284.0, 295.0, 319.0, 314.0, 283.0, 299.0, 322.0, 302.0, 294.0, 288.0, 284.0, 295.0, 319.0, 311.0, 291.0, 288.0, 316.0, 320.0, 294.0, 288.0, 326.0, 304.0, 292.0, 284.0, 314.0, 307.0, 319.0, 314.0, 286.0, 296.0, 285.0, 299.0, 324.0, 312.0, 297.0, 282.0, 291.0, 285.0, 287.0, 292.0, 321.0, 315.0, 313.0, 317.0, 317.0, 316.0, 296.0, 286.0, 292.0, 278.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1148293138411964, "mean_processing_ms": 0.29105729699863353, "mean_inference_ms": 1.6724660361311725}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5424000, "num_steps_sampled": 2892800, "sample_time_ms": 21194.238, "load_time_ms": 37.016, "grad_time_ms": 9566.171, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004350067116320133, "policy_loss": -0.004312645178288221, "vf_loss": 92.26403045654297, "vf_explained_var": 0.7493538856506348, "kl": 0.0016388074727728963, "entropy": 1.1273828744888306, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2892800, "episodes_total": 7232, "training_iteration": 226, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-38-28", "timestamp": 1660253908, "time_this_iter_s": 28.989330291748047, "time_total_s": 12323.202117919922, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12323.202117919922, "timesteps_since_restore": 2892800, "iterations_since_restore": 226, "perf": {"cpu_util_percent": 31.509756097560977, "ram_util_percent": 58.60975609756099}}
-{"episode_reward_max": 639.0, "episode_reward_min": 419.0, "episode_reward_mean": 596.7, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 205.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 298.35}, "custom_metrics": {"sparse_reward_mean": 206.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 183.9, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.96, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.09, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.54, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.38, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.04, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.3, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.6, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.91, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.87, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.04, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.3, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.04, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.3, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 582.0, 587.0, 624.0, 582.0, 618.0, 582.0, 539.0, 633.0, 582.0, 582.0, 582.0, 582.0, 636.0, 633.0, 582.0, 582.0, 579.0, 630.0, 630.0, 587.0, 630.0, 582.0, 576.0, 627.0, 579.0, 573.0, 582.0, 582.0, 587.0, 636.0, 419.0, 587.0, 633.0, 587.0, 582.0, 582.0, 582.0, 630.0, 587.0, 627.0, 579.0, 633.0, 582.0, 624.0, 582.0, 579.0, 630.0, 579.0, 636.0, 582.0, 630.0, 576.0, 621.0, 633.0, 582.0, 584.0, 636.0, 579.0, 576.0, 579.0, 636.0, 630.0, 633.0, 582.0, 570.0, 630.0, 630.0, 582.0, 575.0, 582.0, 627.0, 630.0, 627.0, 579.0, 579.0, 630.0, 630.0, 579.0, 633.0, 584.0, 567.0, 627.0, 627.0, 582.0, 627.0, 587.0, 587.0, 582.0, 576.0, 579.0, 633.0, 639.0, 582.0, 587.0, 579.0, 582.0, 533.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 287.0, 292.0, 295.0, 287.0, 292.0, 295.0, 321.0, 303.0, 291.0, 291.0, 305.0, 313.0, 283.0, 299.0, 270.0, 269.0, 316.0, 317.0, 288.0, 294.0, 290.0, 292.0, 298.0, 284.0, 283.0, 299.0, 319.0, 317.0, 309.0, 324.0, 291.0, 291.0, 287.0, 295.0, 293.0, 286.0, 312.0, 318.0, 314.0, 316.0, 309.0, 278.0, 324.0, 306.0, 291.0, 291.0, 297.0, 279.0, 317.0, 310.0, 295.0, 284.0, 282.0, 291.0, 288.0, 294.0, 290.0, 292.0, 294.0, 293.0, 325.0, 311.0, 214.0, 205.0, 287.0, 300.0, 323.0, 310.0, 288.0, 299.0, 294.0, 288.0, 289.0, 293.0, 283.0, 299.0, 306.0, 324.0, 293.0, 294.0, 323.0, 304.0, 284.0, 295.0, 319.0, 314.0, 283.0, 299.0, 322.0, 302.0, 294.0, 288.0, 284.0, 295.0, 319.0, 311.0, 291.0, 288.0, 316.0, 320.0, 294.0, 288.0, 326.0, 304.0, 292.0, 284.0, 314.0, 307.0, 319.0, 314.0, 286.0, 296.0, 285.0, 299.0, 324.0, 312.0, 297.0, 282.0, 291.0, 285.0, 287.0, 292.0, 321.0, 315.0, 313.0, 317.0, 317.0, 316.0, 296.0, 286.0, 292.0, 278.0, 316.0, 314.0, 321.0, 309.0, 288.0, 294.0, 284.0, 291.0, 293.0, 289.0, 311.0, 316.0, 314.0, 316.0, 313.0, 314.0, 290.0, 289.0, 290.0, 289.0, 324.0, 306.0, 311.0, 319.0, 292.0, 287.0, 319.0, 314.0, 291.0, 293.0, 296.0, 271.0, 323.0, 304.0, 309.0, 318.0, 284.0, 298.0, 309.0, 318.0, 282.0, 305.0, 297.0, 290.0, 287.0, 295.0, 291.0, 285.0, 287.0, 292.0, 316.0, 317.0, 317.0, 322.0, 294.0, 288.0, 301.0, 286.0, 282.0, 297.0, 298.0, 284.0, 270.0, 263.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.111882925643259, "mean_processing_ms": 0.2904711783343595, "mean_inference_ms": 1.6695128259184024}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5448000, "num_steps_sampled": 2905600, "sample_time_ms": 21046.741, "load_time_ms": 36.918, "grad_time_ms": 9303.678, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001480274717323482, "policy_loss": -0.006882220506668091, "vf_loss": 89.27208709716797, "vf_explained_var": 0.7621426582336426, "kl": 0.0023567674215883017, "entropy": 1.1294348239898682, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2905600, "episodes_total": 7264, "training_iteration": 227, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-38-56", "timestamp": 1660253936, "time_this_iter_s": 28.29434609413147, "time_total_s": 12351.496464014053, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12351.496464014053, "timesteps_since_restore": 2905600, "iterations_since_restore": 227, "perf": {"cpu_util_percent": 34.097500000000004, "ram_util_percent": 58.625000000000014}}
-{"episode_reward_max": 639.0, "episode_reward_min": 533.0, "episode_reward_mean": 598.73, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 263.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 299.365}, "custom_metrics": {"sparse_reward_mean": 207.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.33, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.07, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.63, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.32, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.27, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.9, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.15, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.36, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.27, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.27, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 633.0, 582.0, 630.0, 630.0, 579.0, 630.0, 587.0, 630.0, 582.0, 627.0, 582.0, 627.0, 633.0, 541.0, 579.0, 582.0, 633.0, 630.0, 624.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 630.0, 630.0, 630.0, 633.0, 576.0, 582.0, 633.0, 582.0, 570.0, 630.0, 630.0, 582.0, 575.0, 582.0, 627.0, 630.0, 627.0, 579.0, 579.0, 630.0, 630.0, 579.0, 633.0, 584.0, 567.0, 627.0, 627.0, 582.0, 627.0, 587.0, 587.0, 582.0, 576.0, 579.0, 633.0, 639.0, 582.0, 587.0, 579.0, 582.0, 533.0, 582.0, 579.0, 579.0, 582.0, 587.0, 624.0, 582.0, 618.0, 582.0, 539.0, 633.0, 582.0, 582.0, 582.0, 582.0, 636.0, 633.0, 582.0, 582.0, 579.0, 630.0, 630.0, 587.0, 630.0, 582.0, 576.0, 627.0, 579.0, 573.0, 582.0, 582.0, 587.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [279.0, 297.0, 316.0, 317.0, 293.0, 289.0, 306.0, 324.0, 311.0, 319.0, 292.0, 287.0, 314.0, 316.0, 290.0, 297.0, 312.0, 318.0, 291.0, 291.0, 315.0, 312.0, 291.0, 291.0, 313.0, 314.0, 317.0, 316.0, 266.0, 275.0, 289.0, 290.0, 279.0, 303.0, 322.0, 311.0, 303.0, 327.0, 308.0, 316.0, 291.0, 291.0, 288.0, 294.0, 285.0, 297.0, 280.0, 302.0, 289.0, 293.0, 291.0, 288.0, 317.0, 313.0, 317.0, 313.0, 319.0, 311.0, 317.0, 316.0, 266.0, 310.0, 294.0, 288.0, 317.0, 316.0, 296.0, 286.0, 292.0, 278.0, 316.0, 314.0, 321.0, 309.0, 288.0, 294.0, 284.0, 291.0, 293.0, 289.0, 311.0, 316.0, 314.0, 316.0, 313.0, 314.0, 290.0, 289.0, 290.0, 289.0, 324.0, 306.0, 311.0, 319.0, 292.0, 287.0, 319.0, 314.0, 291.0, 293.0, 296.0, 271.0, 323.0, 304.0, 309.0, 318.0, 284.0, 298.0, 309.0, 318.0, 282.0, 305.0, 297.0, 290.0, 287.0, 295.0, 291.0, 285.0, 287.0, 292.0, 316.0, 317.0, 317.0, 322.0, 294.0, 288.0, 301.0, 286.0, 282.0, 297.0, 298.0, 284.0, 270.0, 263.0, 288.0, 294.0, 287.0, 292.0, 287.0, 292.0, 295.0, 287.0, 292.0, 295.0, 321.0, 303.0, 291.0, 291.0, 305.0, 313.0, 283.0, 299.0, 270.0, 269.0, 316.0, 317.0, 288.0, 294.0, 290.0, 292.0, 298.0, 284.0, 283.0, 299.0, 319.0, 317.0, 309.0, 324.0, 291.0, 291.0, 287.0, 295.0, 293.0, 286.0, 312.0, 318.0, 314.0, 316.0, 309.0, 278.0, 324.0, 306.0, 291.0, 291.0, 297.0, 279.0, 317.0, 310.0, 295.0, 284.0, 282.0, 291.0, 288.0, 294.0, 290.0, 292.0, 294.0, 293.0, 325.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.108955063453815, "mean_processing_ms": 0.2898914011168066, "mean_inference_ms": 1.6664528501755567}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5472000, "num_steps_sampled": 2918400, "sample_time_ms": 20842.466, "load_time_ms": 36.811, "grad_time_ms": 9088.977, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0036609917879104614, "policy_loss": -0.0044582299888134, "vf_loss": 86.8133316040039, "vf_explained_var": 0.7590463161468506, "kl": 0.0019074537558481097, "entropy": 1.124218463897705, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2918400, "episodes_total": 7296, "training_iteration": 228, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-39-25", "timestamp": 1660253965, "time_this_iter_s": 29.044671058654785, "time_total_s": 12380.541135072708, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12380.541135072708, "timesteps_since_restore": 2918400, "iterations_since_restore": 228, "perf": {"cpu_util_percent": 34.02195121951219, "ram_util_percent": 58.739024390243905}}
-{"episode_reward_max": 636.0, "episode_reward_min": 524.0, "episode_reward_mean": 595.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 297.94}, "custom_metrics": {"sparse_reward_mean": 206.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.88, "shaped_reward_min": 164, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.65, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.9, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.34, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.9, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.19, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.39, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.9, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.34, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.9, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.34, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 576.0, 627.0, 630.0, 582.0, 579.0, 582.0, 633.0, 525.0, 587.0, 573.0, 627.0, 587.0, 624.0, 524.0, 587.0, 633.0, 579.0, 536.0, 582.0, 587.0, 627.0, 579.0, 533.0, 582.0, 627.0, 587.0, 633.0, 627.0, 582.0, 573.0, 633.0, 579.0, 582.0, 533.0, 582.0, 579.0, 579.0, 582.0, 587.0, 624.0, 582.0, 618.0, 582.0, 539.0, 633.0, 582.0, 582.0, 582.0, 582.0, 636.0, 633.0, 582.0, 582.0, 579.0, 630.0, 630.0, 587.0, 630.0, 582.0, 576.0, 627.0, 579.0, 573.0, 582.0, 582.0, 587.0, 636.0, 576.0, 633.0, 582.0, 630.0, 630.0, 579.0, 630.0, 587.0, 630.0, 582.0, 627.0, 582.0, 627.0, 633.0, 541.0, 579.0, 582.0, 633.0, 630.0, 624.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 630.0, 630.0, 630.0, 633.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [320.0, 316.0, 287.0, 289.0, 312.0, 315.0, 316.0, 314.0, 296.0, 286.0, 286.0, 293.0, 285.0, 297.0, 320.0, 313.0, 259.0, 266.0, 291.0, 296.0, 292.0, 281.0, 310.0, 317.0, 297.0, 290.0, 317.0, 307.0, 253.0, 271.0, 280.0, 307.0, 319.0, 314.0, 284.0, 295.0, 267.0, 269.0, 290.0, 292.0, 298.0, 289.0, 310.0, 317.0, 288.0, 291.0, 275.0, 258.0, 294.0, 288.0, 313.0, 314.0, 283.0, 304.0, 322.0, 311.0, 316.0, 311.0, 300.0, 282.0, 278.0, 295.0, 319.0, 314.0, 282.0, 297.0, 298.0, 284.0, 270.0, 263.0, 288.0, 294.0, 287.0, 292.0, 287.0, 292.0, 295.0, 287.0, 292.0, 295.0, 321.0, 303.0, 291.0, 291.0, 305.0, 313.0, 283.0, 299.0, 270.0, 269.0, 316.0, 317.0, 288.0, 294.0, 290.0, 292.0, 298.0, 284.0, 283.0, 299.0, 319.0, 317.0, 309.0, 324.0, 291.0, 291.0, 287.0, 295.0, 293.0, 286.0, 312.0, 318.0, 314.0, 316.0, 309.0, 278.0, 324.0, 306.0, 291.0, 291.0, 297.0, 279.0, 317.0, 310.0, 295.0, 284.0, 282.0, 291.0, 288.0, 294.0, 290.0, 292.0, 294.0, 293.0, 325.0, 311.0, 279.0, 297.0, 316.0, 317.0, 293.0, 289.0, 306.0, 324.0, 311.0, 319.0, 292.0, 287.0, 314.0, 316.0, 290.0, 297.0, 312.0, 318.0, 291.0, 291.0, 315.0, 312.0, 291.0, 291.0, 313.0, 314.0, 317.0, 316.0, 266.0, 275.0, 289.0, 290.0, 279.0, 303.0, 322.0, 311.0, 303.0, 327.0, 308.0, 316.0, 291.0, 291.0, 288.0, 294.0, 285.0, 297.0, 280.0, 302.0, 289.0, 293.0, 291.0, 288.0, 317.0, 313.0, 317.0, 313.0, 319.0, 311.0, 317.0, 316.0, 266.0, 310.0, 294.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1060616527900746, "mean_processing_ms": 0.28931749831274756, "mean_inference_ms": 1.663577876904456}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5496000, "num_steps_sampled": 2931200, "sample_time_ms": 20902.121, "load_time_ms": 36.225, "grad_time_ms": 9147.239, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004833377432078123, "policy_loss": -0.003439890220761299, "vf_loss": 88.37776947021484, "vf_explained_var": 0.7585814595222473, "kl": 0.0015477427514269948, "entropy": 1.1290167570114136, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2931200, "episodes_total": 7328, "training_iteration": 229, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-39-56", "timestamp": 1660253996, "time_this_iter_s": 31.170966863632202, "time_total_s": 12411.71210193634, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12411.71210193634, "timesteps_since_restore": 2931200, "iterations_since_restore": 229, "perf": {"cpu_util_percent": 34.31136363636364, "ram_util_percent": 58.67272727272726}}
-{"episode_reward_max": 636.0, "episode_reward_min": 524.0, "episode_reward_mean": 598.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 299.285}, "custom_metrics": {"sparse_reward_mean": 207.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.57, "shaped_reward_min": 164, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.94, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.43, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.56, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.03, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.33, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.95, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.2, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.39, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.03, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.33, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.03, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.33, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 633.0, 582.0, 587.0, 630.0, 630.0, 627.0, 582.0, 630.0, 579.0, 627.0, 579.0, 579.0, 587.0, 576.0, 579.0, 576.0, 587.0, 582.0, 582.0, 576.0, 582.0, 539.0, 633.0, 587.0, 579.0, 630.0, 633.0, 633.0, 633.0, 636.0, 579.0, 582.0, 582.0, 587.0, 636.0, 576.0, 633.0, 582.0, 630.0, 630.0, 579.0, 630.0, 587.0, 630.0, 582.0, 627.0, 582.0, 627.0, 633.0, 541.0, 579.0, 582.0, 633.0, 630.0, 624.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 630.0, 630.0, 630.0, 633.0, 576.0, 582.0, 636.0, 576.0, 627.0, 630.0, 582.0, 579.0, 582.0, 633.0, 525.0, 587.0, 573.0, 627.0, 587.0, 624.0, 524.0, 587.0, 633.0, 579.0, 536.0, 582.0, 587.0, 627.0, 579.0, 533.0, 582.0, 627.0, 587.0, 633.0, 627.0, 582.0, 573.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 311.0, 316.0, 317.0, 296.0, 286.0, 303.0, 284.0, 316.0, 314.0, 313.0, 317.0, 317.0, 310.0, 293.0, 289.0, 314.0, 316.0, 285.0, 294.0, 316.0, 311.0, 286.0, 293.0, 283.0, 296.0, 289.0, 298.0, 285.0, 291.0, 287.0, 292.0, 297.0, 279.0, 294.0, 293.0, 288.0, 294.0, 284.0, 298.0, 287.0, 289.0, 290.0, 292.0, 273.0, 266.0, 310.0, 323.0, 295.0, 292.0, 294.0, 285.0, 321.0, 309.0, 321.0, 312.0, 313.0, 320.0, 331.0, 302.0, 321.0, 315.0, 296.0, 283.0, 288.0, 294.0, 290.0, 292.0, 294.0, 293.0, 325.0, 311.0, 279.0, 297.0, 316.0, 317.0, 293.0, 289.0, 306.0, 324.0, 311.0, 319.0, 292.0, 287.0, 314.0, 316.0, 290.0, 297.0, 312.0, 318.0, 291.0, 291.0, 315.0, 312.0, 291.0, 291.0, 313.0, 314.0, 317.0, 316.0, 266.0, 275.0, 289.0, 290.0, 279.0, 303.0, 322.0, 311.0, 303.0, 327.0, 308.0, 316.0, 291.0, 291.0, 288.0, 294.0, 285.0, 297.0, 280.0, 302.0, 289.0, 293.0, 291.0, 288.0, 317.0, 313.0, 317.0, 313.0, 319.0, 311.0, 317.0, 316.0, 266.0, 310.0, 294.0, 288.0, 320.0, 316.0, 287.0, 289.0, 312.0, 315.0, 316.0, 314.0, 296.0, 286.0, 286.0, 293.0, 285.0, 297.0, 320.0, 313.0, 259.0, 266.0, 291.0, 296.0, 292.0, 281.0, 310.0, 317.0, 297.0, 290.0, 317.0, 307.0, 253.0, 271.0, 280.0, 307.0, 319.0, 314.0, 284.0, 295.0, 267.0, 269.0, 290.0, 292.0, 298.0, 289.0, 310.0, 317.0, 288.0, 291.0, 275.0, 258.0, 294.0, 288.0, 313.0, 314.0, 283.0, 304.0, 322.0, 311.0, 316.0, 311.0, 300.0, 282.0, 278.0, 295.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1032059640461922, "mean_processing_ms": 0.28875135486760906, "mean_inference_ms": 1.6608895336787544}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5520000, "num_steps_sampled": 2944000, "sample_time_ms": 20966.765, "load_time_ms": 36.396, "grad_time_ms": 9172.251, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005542902275919914, "policy_loss": -0.0032051329035311937, "vf_loss": 93.1218490600586, "vf_explained_var": 0.7535824775695801, "kl": 0.0018033984815701842, "entropy": 1.1283119916915894, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2944000, "episodes_total": 7360, "training_iteration": 230, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-40-28", "timestamp": 1660254028, "time_this_iter_s": 31.772056102752686, "time_total_s": 12443.484158039093, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12443.484158039093, "timesteps_since_restore": 2944000, "iterations_since_restore": 230, "perf": {"cpu_util_percent": 32.88666666666666, "ram_util_percent": 58.577777777777776}}
-{"episode_reward_max": 636.0, "episode_reward_min": 444.0, "episode_reward_mean": 595.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 215.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 297.695}, "custom_metrics": {"sparse_reward_mean": 205.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 183.79, "shaped_reward_min": 124, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.74, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.2, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.96, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.34, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.02, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.96, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.34, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.96, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.34, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [527.0, 582.0, 576.0, 587.0, 582.0, 633.0, 444.0, 630.0, 590.0, 633.0, 522.0, 636.0, 579.0, 587.0, 633.0, 627.0, 587.0, 630.0, 636.0, 587.0, 582.0, 582.0, 582.0, 627.0, 636.0, 582.0, 627.0, 582.0, 579.0, 579.0, 633.0, 536.0, 630.0, 633.0, 576.0, 582.0, 636.0, 576.0, 627.0, 630.0, 582.0, 579.0, 582.0, 633.0, 525.0, 587.0, 573.0, 627.0, 587.0, 624.0, 524.0, 587.0, 633.0, 579.0, 536.0, 582.0, 587.0, 627.0, 579.0, 533.0, 582.0, 627.0, 587.0, 633.0, 627.0, 582.0, 573.0, 633.0, 630.0, 633.0, 582.0, 587.0, 630.0, 630.0, 627.0, 582.0, 630.0, 579.0, 627.0, 579.0, 579.0, 587.0, 576.0, 579.0, 576.0, 587.0, 582.0, 582.0, 576.0, 582.0, 539.0, 633.0, 587.0, 579.0, 630.0, 633.0, 633.0, 633.0, 636.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 265.0, 287.0, 295.0, 277.0, 299.0, 285.0, 302.0, 293.0, 289.0, 313.0, 320.0, 229.0, 215.0, 321.0, 309.0, 292.0, 298.0, 311.0, 322.0, 259.0, 263.0, 309.0, 327.0, 288.0, 291.0, 292.0, 295.0, 315.0, 318.0, 321.0, 306.0, 295.0, 292.0, 318.0, 312.0, 313.0, 323.0, 293.0, 294.0, 294.0, 288.0, 285.0, 297.0, 291.0, 291.0, 322.0, 305.0, 329.0, 307.0, 291.0, 291.0, 313.0, 314.0, 281.0, 301.0, 296.0, 283.0, 291.0, 288.0, 316.0, 317.0, 273.0, 263.0, 319.0, 311.0, 317.0, 316.0, 266.0, 310.0, 294.0, 288.0, 320.0, 316.0, 287.0, 289.0, 312.0, 315.0, 316.0, 314.0, 296.0, 286.0, 286.0, 293.0, 285.0, 297.0, 320.0, 313.0, 259.0, 266.0, 291.0, 296.0, 292.0, 281.0, 310.0, 317.0, 297.0, 290.0, 317.0, 307.0, 253.0, 271.0, 280.0, 307.0, 319.0, 314.0, 284.0, 295.0, 267.0, 269.0, 290.0, 292.0, 298.0, 289.0, 310.0, 317.0, 288.0, 291.0, 275.0, 258.0, 294.0, 288.0, 313.0, 314.0, 283.0, 304.0, 322.0, 311.0, 316.0, 311.0, 300.0, 282.0, 278.0, 295.0, 319.0, 314.0, 319.0, 311.0, 316.0, 317.0, 296.0, 286.0, 303.0, 284.0, 316.0, 314.0, 313.0, 317.0, 317.0, 310.0, 293.0, 289.0, 314.0, 316.0, 285.0, 294.0, 316.0, 311.0, 286.0, 293.0, 283.0, 296.0, 289.0, 298.0, 285.0, 291.0, 287.0, 292.0, 297.0, 279.0, 294.0, 293.0, 288.0, 294.0, 284.0, 298.0, 287.0, 289.0, 290.0, 292.0, 273.0, 266.0, 310.0, 323.0, 295.0, 292.0, 294.0, 285.0, 321.0, 309.0, 321.0, 312.0, 313.0, 320.0, 331.0, 302.0, 321.0, 315.0, 296.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 1.100385875299439, "mean_processing_ms": 0.2881910582639845, "mean_inference_ms": 1.6584370926312206}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5544000, "num_steps_sampled": 2956800, "sample_time_ms": 21086.396, "load_time_ms": 36.806, "grad_time_ms": 9224.029, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0032783111091703176, "policy_loss": -0.005397517699748278, "vf_loss": 92.44506072998047, "vf_explained_var": 0.7564309239387512, "kl": 0.001717855571769178, "entropy": 1.137366771697998, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2956800, "episodes_total": 7392, "training_iteration": 231, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-41-01", "timestamp": 1660254061, "time_this_iter_s": 32.62551975250244, "time_total_s": 12476.109677791595, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12476.109677791595, "timesteps_since_restore": 2956800, "iterations_since_restore": 231, "perf": {"cpu_util_percent": 34.25652173913044, "ram_util_percent": 58.589130434782625}}
-{"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 597.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 215.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 298.635}, "custom_metrics": {"sparse_reward_mean": 206.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.07, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.1, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.42, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.44, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.96, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.42, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.24, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.96, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.42, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.96, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.42, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 630.0, 579.0, 639.0, 582.0, 633.0, 582.0, 587.0, 630.0, 573.0, 627.0, 579.0, 576.0, 582.0, 630.0, 587.0, 590.0, 582.0, 630.0, 627.0, 582.0, 582.0, 582.0, 573.0, 576.0, 570.0, 582.0, 582.0, 579.0, 633.0, 627.0, 633.0, 627.0, 582.0, 573.0, 633.0, 630.0, 633.0, 582.0, 587.0, 630.0, 630.0, 627.0, 582.0, 630.0, 579.0, 627.0, 579.0, 579.0, 587.0, 576.0, 579.0, 576.0, 587.0, 582.0, 582.0, 576.0, 582.0, 539.0, 633.0, 587.0, 579.0, 630.0, 633.0, 633.0, 633.0, 636.0, 579.0, 527.0, 582.0, 576.0, 587.0, 582.0, 633.0, 444.0, 630.0, 590.0, 633.0, 522.0, 636.0, 579.0, 587.0, 633.0, 627.0, 587.0, 630.0, 636.0, 587.0, 582.0, 582.0, 582.0, 627.0, 636.0, 582.0, 627.0, 582.0, 579.0, 579.0, 633.0, 536.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 306.0, 316.0, 314.0, 277.0, 302.0, 318.0, 321.0, 286.0, 296.0, 303.0, 330.0, 286.0, 296.0, 288.0, 299.0, 324.0, 306.0, 274.0, 299.0, 311.0, 316.0, 291.0, 288.0, 292.0, 284.0, 294.0, 288.0, 318.0, 312.0, 300.0, 287.0, 296.0, 294.0, 290.0, 292.0, 304.0, 326.0, 315.0, 312.0, 294.0, 288.0, 299.0, 283.0, 291.0, 291.0, 282.0, 291.0, 290.0, 286.0, 282.0, 288.0, 294.0, 288.0, 293.0, 289.0, 297.0, 282.0, 314.0, 319.0, 308.0, 319.0, 313.0, 320.0, 316.0, 311.0, 300.0, 282.0, 278.0, 295.0, 319.0, 314.0, 319.0, 311.0, 316.0, 317.0, 296.0, 286.0, 303.0, 284.0, 316.0, 314.0, 313.0, 317.0, 317.0, 310.0, 293.0, 289.0, 314.0, 316.0, 285.0, 294.0, 316.0, 311.0, 286.0, 293.0, 283.0, 296.0, 289.0, 298.0, 285.0, 291.0, 287.0, 292.0, 297.0, 279.0, 294.0, 293.0, 288.0, 294.0, 284.0, 298.0, 287.0, 289.0, 290.0, 292.0, 273.0, 266.0, 310.0, 323.0, 295.0, 292.0, 294.0, 285.0, 321.0, 309.0, 321.0, 312.0, 313.0, 320.0, 331.0, 302.0, 321.0, 315.0, 296.0, 283.0, 262.0, 265.0, 287.0, 295.0, 277.0, 299.0, 285.0, 302.0, 293.0, 289.0, 313.0, 320.0, 229.0, 215.0, 321.0, 309.0, 292.0, 298.0, 311.0, 322.0, 259.0, 263.0, 309.0, 327.0, 288.0, 291.0, 292.0, 295.0, 315.0, 318.0, 321.0, 306.0, 295.0, 292.0, 318.0, 312.0, 313.0, 323.0, 293.0, 294.0, 294.0, 288.0, 285.0, 297.0, 291.0, 291.0, 322.0, 305.0, 329.0, 307.0, 291.0, 291.0, 313.0, 314.0, 281.0, 301.0, 296.0, 283.0, 291.0, 288.0, 316.0, 317.0, 273.0, 263.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0975911945131436, "mean_processing_ms": 0.28763548597333904, "mean_inference_ms": 1.656000718644699}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5568000, "num_steps_sampled": 2969600, "sample_time_ms": 21056.358, "load_time_ms": 36.577, "grad_time_ms": 9266.344, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002132798545062542, "policy_loss": -0.0064245969988405704, "vf_loss": 91.22052001953125, "vf_explained_var": 0.7570000290870667, "kl": 0.002030483214184642, "entropy": 1.129306674003601, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2969600, "episodes_total": 7424, "training_iteration": 232, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-41-33", "timestamp": 1660254093, "time_this_iter_s": 32.109358072280884, "time_total_s": 12508.219035863876, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12508.219035863876, "timesteps_since_restore": 2969600, "iterations_since_restore": 232, "perf": {"cpu_util_percent": 33.97777777777779, "ram_util_percent": 58.6088888888889}}
-{"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 596.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 215.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 298.275}, "custom_metrics": {"sparse_reward_mean": 206.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 183.75, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.95, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.95, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.46, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.26, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.11, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.17, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.11, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.17, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.11, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.17, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 576.0, 579.0, 573.0, 627.0, 579.0, 582.0, 630.0, 530.0, 630.0, 636.0, 627.0, 570.0, 582.0, 627.0, 582.0, 584.0, 582.0, 587.0, 587.0, 582.0, 576.0, 636.0, 579.0, 582.0, 579.0, 584.0, 630.0, 627.0, 630.0, 630.0, 633.0, 633.0, 636.0, 579.0, 527.0, 582.0, 576.0, 587.0, 582.0, 633.0, 444.0, 630.0, 590.0, 633.0, 522.0, 636.0, 579.0, 587.0, 633.0, 627.0, 587.0, 630.0, 636.0, 587.0, 582.0, 582.0, 582.0, 627.0, 636.0, 582.0, 627.0, 582.0, 579.0, 579.0, 633.0, 536.0, 627.0, 630.0, 579.0, 639.0, 582.0, 633.0, 582.0, 587.0, 630.0, 573.0, 627.0, 579.0, 576.0, 582.0, 630.0, 587.0, 590.0, 582.0, 630.0, 627.0, 582.0, 582.0, 582.0, 573.0, 576.0, 570.0, 582.0, 582.0, 579.0, 633.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 284.0, 296.0, 286.0, 288.0, 288.0, 286.0, 293.0, 278.0, 295.0, 318.0, 309.0, 285.0, 294.0, 295.0, 287.0, 308.0, 322.0, 268.0, 262.0, 314.0, 316.0, 314.0, 322.0, 305.0, 322.0, 287.0, 283.0, 296.0, 286.0, 322.0, 305.0, 286.0, 296.0, 295.0, 289.0, 298.0, 284.0, 301.0, 286.0, 284.0, 303.0, 288.0, 294.0, 300.0, 276.0, 332.0, 304.0, 282.0, 297.0, 304.0, 278.0, 277.0, 302.0, 301.0, 283.0, 316.0, 314.0, 310.0, 317.0, 324.0, 306.0, 321.0, 309.0, 313.0, 320.0, 331.0, 302.0, 321.0, 315.0, 296.0, 283.0, 262.0, 265.0, 287.0, 295.0, 277.0, 299.0, 285.0, 302.0, 293.0, 289.0, 313.0, 320.0, 229.0, 215.0, 321.0, 309.0, 292.0, 298.0, 311.0, 322.0, 259.0, 263.0, 309.0, 327.0, 288.0, 291.0, 292.0, 295.0, 315.0, 318.0, 321.0, 306.0, 295.0, 292.0, 318.0, 312.0, 313.0, 323.0, 293.0, 294.0, 294.0, 288.0, 285.0, 297.0, 291.0, 291.0, 322.0, 305.0, 329.0, 307.0, 291.0, 291.0, 313.0, 314.0, 281.0, 301.0, 296.0, 283.0, 291.0, 288.0, 316.0, 317.0, 273.0, 263.0, 321.0, 306.0, 316.0, 314.0, 277.0, 302.0, 318.0, 321.0, 286.0, 296.0, 303.0, 330.0, 286.0, 296.0, 288.0, 299.0, 324.0, 306.0, 274.0, 299.0, 311.0, 316.0, 291.0, 288.0, 292.0, 284.0, 294.0, 288.0, 318.0, 312.0, 300.0, 287.0, 296.0, 294.0, 290.0, 292.0, 304.0, 326.0, 315.0, 312.0, 294.0, 288.0, 299.0, 283.0, 291.0, 291.0, 282.0, 291.0, 290.0, 286.0, 282.0, 288.0, 294.0, 288.0, 293.0, 289.0, 297.0, 282.0, 314.0, 319.0, 308.0, 319.0, 313.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0948199723537093, "mean_processing_ms": 0.2870853418047666, "mean_inference_ms": 1.653643049405544}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5592000, "num_steps_sampled": 2982400, "sample_time_ms": 21374.201, "load_time_ms": 36.582, "grad_time_ms": 9304.523, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033536478877067566, "policy_loss": -0.004937517922371626, "vf_loss": 88.6025161743164, "vf_explained_var": 0.7515634894371033, "kl": 0.0023627106565982103, "entropy": 1.138161540031433, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2982400, "episodes_total": 7456, "training_iteration": 233, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-42-06", "timestamp": 1660254126, "time_this_iter_s": 33.16590905189514, "time_total_s": 12541.384944915771, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12541.384944915771, "timesteps_since_restore": 2982400, "iterations_since_restore": 233, "perf": {"cpu_util_percent": 33.06170212765958, "ram_util_percent": 58.5808510638298}}
-{"episode_reward_max": 639.0, "episode_reward_min": 530.0, "episode_reward_mean": 598.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 262.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 299.29}, "custom_metrics": {"sparse_reward_mean": 207.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.18, "shaped_reward_min": 170, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.81, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.49, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.91, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.39, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.36, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.51, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.75, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.91, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.39, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.91, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.39, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 582.0, 630.0, 582.0, 627.0, 576.0, 576.0, 636.0, 630.0, 579.0, 630.0, 582.0, 582.0, 587.0, 636.0, 579.0, 581.0, 587.0, 582.0, 630.0, 639.0, 627.0, 639.0, 630.0, 630.0, 627.0, 570.0, 587.0, 579.0, 582.0, 576.0, 579.0, 579.0, 633.0, 536.0, 627.0, 630.0, 579.0, 639.0, 582.0, 633.0, 582.0, 587.0, 630.0, 573.0, 627.0, 579.0, 576.0, 582.0, 630.0, 587.0, 590.0, 582.0, 630.0, 627.0, 582.0, 582.0, 582.0, 573.0, 576.0, 570.0, 582.0, 582.0, 579.0, 633.0, 627.0, 633.0, 579.0, 582.0, 576.0, 579.0, 573.0, 627.0, 579.0, 582.0, 630.0, 530.0, 630.0, 636.0, 627.0, 570.0, 582.0, 627.0, 582.0, 584.0, 582.0, 587.0, 587.0, 582.0, 576.0, 636.0, 579.0, 582.0, 579.0, 584.0, 630.0, 627.0, 630.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [328.0, 302.0, 284.0, 298.0, 283.0, 299.0, 308.0, 322.0, 292.0, 290.0, 300.0, 327.0, 291.0, 285.0, 295.0, 281.0, 326.0, 310.0, 313.0, 317.0, 302.0, 277.0, 318.0, 312.0, 293.0, 289.0, 298.0, 284.0, 296.0, 291.0, 332.0, 304.0, 293.0, 286.0, 285.0, 296.0, 303.0, 284.0, 299.0, 283.0, 324.0, 306.0, 327.0, 312.0, 316.0, 311.0, 319.0, 320.0, 317.0, 313.0, 319.0, 311.0, 309.0, 318.0, 293.0, 277.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 290.0, 286.0, 296.0, 283.0, 291.0, 288.0, 316.0, 317.0, 273.0, 263.0, 321.0, 306.0, 316.0, 314.0, 277.0, 302.0, 318.0, 321.0, 286.0, 296.0, 303.0, 330.0, 286.0, 296.0, 288.0, 299.0, 324.0, 306.0, 274.0, 299.0, 311.0, 316.0, 291.0, 288.0, 292.0, 284.0, 294.0, 288.0, 318.0, 312.0, 300.0, 287.0, 296.0, 294.0, 290.0, 292.0, 304.0, 326.0, 315.0, 312.0, 294.0, 288.0, 299.0, 283.0, 291.0, 291.0, 282.0, 291.0, 290.0, 286.0, 282.0, 288.0, 294.0, 288.0, 293.0, 289.0, 297.0, 282.0, 314.0, 319.0, 308.0, 319.0, 313.0, 320.0, 295.0, 284.0, 296.0, 286.0, 288.0, 288.0, 286.0, 293.0, 278.0, 295.0, 318.0, 309.0, 285.0, 294.0, 295.0, 287.0, 308.0, 322.0, 268.0, 262.0, 314.0, 316.0, 314.0, 322.0, 305.0, 322.0, 287.0, 283.0, 296.0, 286.0, 322.0, 305.0, 286.0, 296.0, 295.0, 289.0, 298.0, 284.0, 301.0, 286.0, 284.0, 303.0, 288.0, 294.0, 300.0, 276.0, 332.0, 304.0, 282.0, 297.0, 304.0, 278.0, 277.0, 302.0, 301.0, 283.0, 316.0, 314.0, 310.0, 317.0, 324.0, 306.0, 321.0, 309.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0920719973611583, "mean_processing_ms": 0.2865396383505603, "mean_inference_ms": 1.6512949554166665}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5616000, "num_steps_sampled": 2995200, "sample_time_ms": 21497.979, "load_time_ms": 36.457, "grad_time_ms": 9378.106, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0021230385173112154, "policy_loss": -0.0060439333319664, "vf_loss": 87.32781982421875, "vf_explained_var": 0.7546737194061279, "kl": 0.0017831752775236964, "entropy": 1.1316334009170532, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2995200, "episodes_total": 7488, "training_iteration": 234, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-42-38", "timestamp": 1660254158, "time_this_iter_s": 31.695109128952026, "time_total_s": 12573.080054044724, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12573.080054044724, "timesteps_since_restore": 2995200, "iterations_since_restore": 234, "perf": {"cpu_util_percent": 33.72888888888888, "ram_util_percent": 58.67111111111112}}
-{"episode_reward_max": 639.0, "episode_reward_min": 530.0, "episode_reward_mean": 599.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 261.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 299.585}, "custom_metrics": {"sparse_reward_mean": 207.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.37, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.78, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.66, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.93, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.37, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.37, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.93, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.37, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.93, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.37, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 582.0, 627.0, 630.0, 582.0, 582.0, 579.0, 582.0, 582.0, 590.0, 627.0, 579.0, 627.0, 576.0, 636.0, 579.0, 579.0, 582.0, 627.0, 561.0, 630.0, 587.0, 630.0, 582.0, 579.0, 639.0, 630.0, 579.0, 636.0, 584.0, 541.0, 579.0, 633.0, 627.0, 633.0, 579.0, 582.0, 576.0, 579.0, 573.0, 627.0, 579.0, 582.0, 630.0, 530.0, 630.0, 636.0, 627.0, 570.0, 582.0, 627.0, 582.0, 584.0, 582.0, 587.0, 587.0, 582.0, 576.0, 636.0, 579.0, 582.0, 579.0, 584.0, 630.0, 627.0, 630.0, 630.0, 630.0, 582.0, 582.0, 630.0, 582.0, 627.0, 576.0, 576.0, 636.0, 630.0, 579.0, 630.0, 582.0, 582.0, 587.0, 636.0, 579.0, 581.0, 587.0, 582.0, 630.0, 639.0, 627.0, 639.0, 630.0, 630.0, 627.0, 570.0, 587.0, 579.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 298.0, 295.0, 284.0, 285.0, 297.0, 305.0, 322.0, 323.0, 307.0, 283.0, 299.0, 294.0, 288.0, 287.0, 292.0, 305.0, 277.0, 293.0, 289.0, 299.0, 291.0, 308.0, 319.0, 301.0, 278.0, 322.0, 305.0, 283.0, 293.0, 319.0, 317.0, 288.0, 291.0, 294.0, 285.0, 286.0, 296.0, 306.0, 321.0, 291.0, 270.0, 306.0, 324.0, 286.0, 301.0, 303.0, 327.0, 291.0, 291.0, 290.0, 289.0, 312.0, 327.0, 321.0, 309.0, 283.0, 296.0, 319.0, 317.0, 283.0, 301.0, 280.0, 261.0, 297.0, 282.0, 314.0, 319.0, 308.0, 319.0, 313.0, 320.0, 295.0, 284.0, 296.0, 286.0, 288.0, 288.0, 286.0, 293.0, 278.0, 295.0, 318.0, 309.0, 285.0, 294.0, 295.0, 287.0, 308.0, 322.0, 268.0, 262.0, 314.0, 316.0, 314.0, 322.0, 305.0, 322.0, 287.0, 283.0, 296.0, 286.0, 322.0, 305.0, 286.0, 296.0, 295.0, 289.0, 298.0, 284.0, 301.0, 286.0, 284.0, 303.0, 288.0, 294.0, 300.0, 276.0, 332.0, 304.0, 282.0, 297.0, 304.0, 278.0, 277.0, 302.0, 301.0, 283.0, 316.0, 314.0, 310.0, 317.0, 324.0, 306.0, 321.0, 309.0, 328.0, 302.0, 284.0, 298.0, 283.0, 299.0, 308.0, 322.0, 292.0, 290.0, 300.0, 327.0, 291.0, 285.0, 295.0, 281.0, 326.0, 310.0, 313.0, 317.0, 302.0, 277.0, 318.0, 312.0, 293.0, 289.0, 298.0, 284.0, 296.0, 291.0, 332.0, 304.0, 293.0, 286.0, 285.0, 296.0, 303.0, 284.0, 299.0, 283.0, 324.0, 306.0, 327.0, 312.0, 316.0, 311.0, 319.0, 320.0, 317.0, 313.0, 319.0, 311.0, 309.0, 318.0, 293.0, 277.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 290.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0893523875509898, "mean_processing_ms": 0.2860034188911333, "mean_inference_ms": 1.6490498343131736}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5640000, "num_steps_sampled": 3008000, "sample_time_ms": 21585.027, "load_time_ms": 36.709, "grad_time_ms": 9663.091, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033673776779323816, "policy_loss": -0.0045895627699792385, "vf_loss": 85.23816680908203, "vf_explained_var": 0.7584102749824524, "kl": 0.0018025357276201248, "entropy": 1.1337394714355469, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3008000, "episodes_total": 7520, "training_iteration": 235, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-43-12", "timestamp": 1660254192, "time_this_iter_s": 34.23338508605957, "time_total_s": 12607.313439130783, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12607.313439130783, "timesteps_since_restore": 3008000, "iterations_since_restore": 235, "perf": {"cpu_util_percent": 33.239583333333336, "ram_util_percent": 58.65}}
-{"episode_reward_max": 639.0, "episode_reward_min": 541.0, "episode_reward_mean": 603.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 261.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 301.58}, "custom_metrics": {"sparse_reward_mean": 208.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 185.56, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.48, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.55, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.97, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.73, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.76, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.49, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.39, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.86, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.76, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.82, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.71, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.76, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.76, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 582.0, 627.0, 584.0, 633.0, 579.0, 630.0, 582.0, 627.0, 582.0, 584.0, 587.0, 630.0, 582.0, 627.0, 630.0, 627.0, 587.0, 582.0, 639.0, 582.0, 582.0, 633.0, 582.0, 587.0, 627.0, 633.0, 582.0, 639.0, 579.0, 630.0, 630.0, 627.0, 630.0, 630.0, 630.0, 582.0, 582.0, 630.0, 582.0, 627.0, 576.0, 576.0, 636.0, 630.0, 579.0, 630.0, 582.0, 582.0, 587.0, 636.0, 579.0, 581.0, 587.0, 582.0, 630.0, 639.0, 627.0, 639.0, 630.0, 630.0, 627.0, 570.0, 587.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 627.0, 630.0, 582.0, 582.0, 579.0, 582.0, 582.0, 590.0, 627.0, 579.0, 627.0, 576.0, 636.0, 579.0, 579.0, 582.0, 627.0, 561.0, 630.0, 587.0, 630.0, 582.0, 579.0, 639.0, 630.0, 579.0, 636.0, 584.0, 541.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 312.0, 324.0, 306.0, 285.0, 297.0, 316.0, 311.0, 283.0, 301.0, 311.0, 322.0, 288.0, 291.0, 321.0, 309.0, 293.0, 289.0, 315.0, 312.0, 301.0, 281.0, 292.0, 292.0, 295.0, 292.0, 316.0, 314.0, 291.0, 291.0, 309.0, 318.0, 321.0, 309.0, 313.0, 314.0, 301.0, 286.0, 291.0, 291.0, 322.0, 317.0, 294.0, 288.0, 290.0, 292.0, 311.0, 322.0, 288.0, 294.0, 288.0, 299.0, 316.0, 311.0, 316.0, 317.0, 281.0, 301.0, 324.0, 315.0, 294.0, 285.0, 317.0, 313.0, 316.0, 314.0, 310.0, 317.0, 324.0, 306.0, 321.0, 309.0, 328.0, 302.0, 284.0, 298.0, 283.0, 299.0, 308.0, 322.0, 292.0, 290.0, 300.0, 327.0, 291.0, 285.0, 295.0, 281.0, 326.0, 310.0, 313.0, 317.0, 302.0, 277.0, 318.0, 312.0, 293.0, 289.0, 298.0, 284.0, 296.0, 291.0, 332.0, 304.0, 293.0, 286.0, 285.0, 296.0, 303.0, 284.0, 299.0, 283.0, 324.0, 306.0, 327.0, 312.0, 316.0, 311.0, 319.0, 320.0, 317.0, 313.0, 319.0, 311.0, 309.0, 318.0, 293.0, 277.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 290.0, 286.0, 284.0, 298.0, 295.0, 284.0, 285.0, 297.0, 305.0, 322.0, 323.0, 307.0, 283.0, 299.0, 294.0, 288.0, 287.0, 292.0, 305.0, 277.0, 293.0, 289.0, 299.0, 291.0, 308.0, 319.0, 301.0, 278.0, 322.0, 305.0, 283.0, 293.0, 319.0, 317.0, 288.0, 291.0, 294.0, 285.0, 286.0, 296.0, 306.0, 321.0, 291.0, 270.0, 306.0, 324.0, 286.0, 301.0, 303.0, 327.0, 291.0, 291.0, 290.0, 289.0, 312.0, 327.0, 321.0, 309.0, 283.0, 296.0, 319.0, 317.0, 283.0, 301.0, 280.0, 261.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0866480049346467, "mean_processing_ms": 0.2854697984995614, "mean_inference_ms": 1.6467374423655963}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5664000, "num_steps_sampled": 3020800, "sample_time_ms": 21716.507, "load_time_ms": 36.498, "grad_time_ms": 9682.814, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004189230967313051, "policy_loss": -0.003748750314116478, "vf_loss": 85.03255462646484, "vf_explained_var": 0.76678067445755, "kl": 0.001733882469125092, "entropy": 1.130557656288147, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3020800, "episodes_total": 7552, "training_iteration": 236, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-43-43", "timestamp": 1660254223, "time_this_iter_s": 30.502008199691772, "time_total_s": 12637.815447330475, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12637.815447330475, "timesteps_since_restore": 3020800, "iterations_since_restore": 236, "perf": {"cpu_util_percent": 34.461363636363636, "ram_util_percent": 59.190909090909095}}
-{"episode_reward_max": 639.0, "episode_reward_min": 515.0, "episode_reward_mean": 596.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 298.165}, "custom_metrics": {"sparse_reward_mean": 206.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.53, "shaped_reward_min": 155, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.6, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.98, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.74, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.73, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.76, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.51, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.41, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.86, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.76, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.51, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.76, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.51, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 539.0, 630.0, 587.0, 630.0, 579.0, 579.0, 582.0, 627.0, 521.0, 636.0, 582.0, 582.0, 587.0, 576.0, 627.0, 527.0, 627.0, 567.0, 576.0, 570.0, 627.0, 538.0, 630.0, 636.0, 561.0, 515.0, 582.0, 630.0, 633.0, 558.0, 587.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 627.0, 630.0, 582.0, 582.0, 579.0, 582.0, 582.0, 590.0, 627.0, 579.0, 627.0, 576.0, 636.0, 579.0, 579.0, 582.0, 627.0, 561.0, 630.0, 587.0, 630.0, 582.0, 579.0, 639.0, 630.0, 579.0, 636.0, 584.0, 541.0, 633.0, 630.0, 582.0, 627.0, 584.0, 633.0, 579.0, 630.0, 582.0, 627.0, 582.0, 584.0, 587.0, 630.0, 582.0, 627.0, 630.0, 627.0, 587.0, 582.0, 639.0, 582.0, 582.0, 633.0, 582.0, 587.0, 627.0, 633.0, 582.0, 639.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 295.0, 297.0, 282.0, 265.0, 274.0, 316.0, 314.0, 294.0, 293.0, 319.0, 311.0, 293.0, 286.0, 293.0, 286.0, 294.0, 288.0, 310.0, 317.0, 260.0, 261.0, 324.0, 312.0, 303.0, 279.0, 293.0, 289.0, 290.0, 297.0, 282.0, 294.0, 318.0, 309.0, 268.0, 259.0, 311.0, 316.0, 289.0, 278.0, 282.0, 294.0, 276.0, 294.0, 306.0, 321.0, 273.0, 265.0, 306.0, 324.0, 317.0, 319.0, 293.0, 268.0, 258.0, 257.0, 291.0, 291.0, 316.0, 314.0, 308.0, 325.0, 264.0, 294.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 290.0, 286.0, 284.0, 298.0, 295.0, 284.0, 285.0, 297.0, 305.0, 322.0, 323.0, 307.0, 283.0, 299.0, 294.0, 288.0, 287.0, 292.0, 305.0, 277.0, 293.0, 289.0, 299.0, 291.0, 308.0, 319.0, 301.0, 278.0, 322.0, 305.0, 283.0, 293.0, 319.0, 317.0, 288.0, 291.0, 294.0, 285.0, 286.0, 296.0, 306.0, 321.0, 291.0, 270.0, 306.0, 324.0, 286.0, 301.0, 303.0, 327.0, 291.0, 291.0, 290.0, 289.0, 312.0, 327.0, 321.0, 309.0, 283.0, 296.0, 319.0, 317.0, 283.0, 301.0, 280.0, 261.0, 321.0, 312.0, 324.0, 306.0, 285.0, 297.0, 316.0, 311.0, 283.0, 301.0, 311.0, 322.0, 288.0, 291.0, 321.0, 309.0, 293.0, 289.0, 315.0, 312.0, 301.0, 281.0, 292.0, 292.0, 295.0, 292.0, 316.0, 314.0, 291.0, 291.0, 309.0, 318.0, 321.0, 309.0, 313.0, 314.0, 301.0, 286.0, 291.0, 291.0, 322.0, 317.0, 294.0, 288.0, 290.0, 292.0, 311.0, 322.0, 288.0, 294.0, 288.0, 299.0, 316.0, 311.0, 316.0, 317.0, 281.0, 301.0, 324.0, 315.0, 294.0, 285.0, 317.0, 313.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0839571838028972, "mean_processing_ms": 0.28493791430015475, "mean_inference_ms": 1.6442666845730367}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5688000, "num_steps_sampled": 3033600, "sample_time_ms": 21735.725, "load_time_ms": 37.102, "grad_time_ms": 9981.103, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00033502434962429106, "policy_loss": -0.007877787575125694, "vf_loss": 87.85860443115234, "vf_explained_var": 0.7610828280448914, "kl": 0.0018075080588459969, "entropy": 1.1460970640182495, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3033600, "episodes_total": 7584, "training_iteration": 237, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-44-14", "timestamp": 1660254254, "time_this_iter_s": 31.47483992576599, "time_total_s": 12669.29028725624, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12669.29028725624, "timesteps_since_restore": 3033600, "iterations_since_restore": 237, "perf": {"cpu_util_percent": 33.54772727272728, "ram_util_percent": 58.545454545454554}}
-{"episode_reward_max": 639.0, "episode_reward_min": 515.0, "episode_reward_mean": 598.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 299.205}, "custom_metrics": {"sparse_reward_mean": 207.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.41, "shaped_reward_min": 155, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.85, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.61, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.21, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.64, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.68, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.82, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.56, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.94, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.87, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.82, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.56, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.82, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.56, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 579.0, 627.0, 582.0, 582.0, 544.0, 630.0, 587.0, 630.0, 584.0, 636.0, 624.0, 582.0, 627.0, 579.0, 630.0, 633.0, 590.0, 633.0, 630.0, 636.0, 576.0, 633.0, 582.0, 579.0, 582.0, 544.0, 630.0, 636.0, 579.0, 587.0, 573.0, 579.0, 636.0, 584.0, 541.0, 633.0, 630.0, 582.0, 627.0, 584.0, 633.0, 579.0, 630.0, 582.0, 627.0, 582.0, 584.0, 587.0, 630.0, 582.0, 627.0, 630.0, 627.0, 587.0, 582.0, 639.0, 582.0, 582.0, 633.0, 582.0, 587.0, 627.0, 633.0, 582.0, 639.0, 579.0, 630.0, 582.0, 579.0, 539.0, 630.0, 587.0, 630.0, 579.0, 579.0, 582.0, 627.0, 521.0, 636.0, 582.0, 582.0, 587.0, 576.0, 627.0, 527.0, 627.0, 567.0, 576.0, 570.0, 627.0, 538.0, 630.0, 636.0, 561.0, 515.0, 582.0, 630.0, 633.0, 558.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 312.0, 298.0, 281.0, 313.0, 314.0, 293.0, 289.0, 285.0, 297.0, 270.0, 274.0, 321.0, 309.0, 286.0, 301.0, 310.0, 320.0, 290.0, 294.0, 322.0, 314.0, 327.0, 297.0, 288.0, 294.0, 310.0, 317.0, 288.0, 291.0, 311.0, 319.0, 313.0, 320.0, 288.0, 302.0, 313.0, 320.0, 324.0, 306.0, 312.0, 324.0, 289.0, 287.0, 311.0, 322.0, 288.0, 294.0, 283.0, 296.0, 301.0, 281.0, 273.0, 271.0, 311.0, 319.0, 319.0, 317.0, 291.0, 288.0, 302.0, 285.0, 299.0, 274.0, 283.0, 296.0, 319.0, 317.0, 283.0, 301.0, 280.0, 261.0, 321.0, 312.0, 324.0, 306.0, 285.0, 297.0, 316.0, 311.0, 283.0, 301.0, 311.0, 322.0, 288.0, 291.0, 321.0, 309.0, 293.0, 289.0, 315.0, 312.0, 301.0, 281.0, 292.0, 292.0, 295.0, 292.0, 316.0, 314.0, 291.0, 291.0, 309.0, 318.0, 321.0, 309.0, 313.0, 314.0, 301.0, 286.0, 291.0, 291.0, 322.0, 317.0, 294.0, 288.0, 290.0, 292.0, 311.0, 322.0, 288.0, 294.0, 288.0, 299.0, 316.0, 311.0, 316.0, 317.0, 281.0, 301.0, 324.0, 315.0, 294.0, 285.0, 317.0, 313.0, 287.0, 295.0, 297.0, 282.0, 265.0, 274.0, 316.0, 314.0, 294.0, 293.0, 319.0, 311.0, 293.0, 286.0, 293.0, 286.0, 294.0, 288.0, 310.0, 317.0, 260.0, 261.0, 324.0, 312.0, 303.0, 279.0, 293.0, 289.0, 290.0, 297.0, 282.0, 294.0, 318.0, 309.0, 268.0, 259.0, 311.0, 316.0, 289.0, 278.0, 282.0, 294.0, 276.0, 294.0, 306.0, 321.0, 273.0, 265.0, 306.0, 324.0, 317.0, 319.0, 293.0, 268.0, 258.0, 257.0, 291.0, 291.0, 316.0, 314.0, 308.0, 325.0, 264.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0812829517018399, "mean_processing_ms": 0.2844054156337277, "mean_inference_ms": 1.641726673758305}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5712000, "num_steps_sampled": 3046400, "sample_time_ms": 21856.898, "load_time_ms": 37.298, "grad_time_ms": 10099.958, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0027887988835573196, "policy_loss": -0.005770063493400812, "vf_loss": 91.25625610351562, "vf_explained_var": 0.7579948306083679, "kl": 0.001784983091056347, "entropy": 1.1335158348083496, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3046400, "episodes_total": 7616, "training_iteration": 238, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-44-46", "timestamp": 1660254286, "time_this_iter_s": 31.44696879386902, "time_total_s": 12700.73725605011, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12700.73725605011, "timesteps_since_restore": 3046400, "iterations_since_restore": 238, "perf": {"cpu_util_percent": 33.757777777777775, "ram_util_percent": 58.49555555555556}}
-{"episode_reward_max": 639.0, "episode_reward_min": 242.0, "episode_reward_mean": 593.31, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 296.655}, "custom_metrics": {"sparse_reward_mean": 205.2, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 182.91, "shaped_reward_min": 82, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.1, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.37, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.34, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.76, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.73, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.02, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.22, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.1, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.02, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.22, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.02, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.22, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 524.0, 582.0, 636.0, 582.0, 630.0, 582.0, 633.0, 630.0, 630.0, 582.0, 582.0, 539.0, 579.0, 639.0, 579.0, 576.0, 582.0, 579.0, 636.0, 579.0, 633.0, 630.0, 587.0, 242.0, 573.0, 627.0, 582.0, 636.0, 587.0, 627.0, 636.0, 582.0, 639.0, 579.0, 630.0, 582.0, 579.0, 539.0, 630.0, 587.0, 630.0, 579.0, 579.0, 582.0, 627.0, 521.0, 636.0, 582.0, 582.0, 587.0, 576.0, 627.0, 527.0, 627.0, 567.0, 576.0, 570.0, 627.0, 538.0, 630.0, 636.0, 561.0, 515.0, 582.0, 630.0, 633.0, 558.0, 633.0, 579.0, 627.0, 582.0, 582.0, 544.0, 630.0, 587.0, 630.0, 584.0, 636.0, 624.0, 582.0, 627.0, 579.0, 630.0, 633.0, 590.0, 633.0, 630.0, 636.0, 576.0, 633.0, 582.0, 579.0, 582.0, 544.0, 630.0, 636.0, 579.0, 587.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 259.0, 265.0, 288.0, 294.0, 314.0, 322.0, 290.0, 292.0, 319.0, 311.0, 295.0, 287.0, 317.0, 316.0, 326.0, 304.0, 309.0, 321.0, 293.0, 289.0, 298.0, 284.0, 263.0, 276.0, 286.0, 293.0, 320.0, 319.0, 295.0, 284.0, 290.0, 286.0, 284.0, 298.0, 278.0, 301.0, 319.0, 317.0, 288.0, 291.0, 313.0, 320.0, 322.0, 308.0, 299.0, 288.0, 117.0, 125.0, 290.0, 283.0, 319.0, 308.0, 286.0, 296.0, 324.0, 312.0, 301.0, 286.0, 311.0, 316.0, 314.0, 322.0, 281.0, 301.0, 324.0, 315.0, 294.0, 285.0, 317.0, 313.0, 287.0, 295.0, 297.0, 282.0, 265.0, 274.0, 316.0, 314.0, 294.0, 293.0, 319.0, 311.0, 293.0, 286.0, 293.0, 286.0, 294.0, 288.0, 310.0, 317.0, 260.0, 261.0, 324.0, 312.0, 303.0, 279.0, 293.0, 289.0, 290.0, 297.0, 282.0, 294.0, 318.0, 309.0, 268.0, 259.0, 311.0, 316.0, 289.0, 278.0, 282.0, 294.0, 276.0, 294.0, 306.0, 321.0, 273.0, 265.0, 306.0, 324.0, 317.0, 319.0, 293.0, 268.0, 258.0, 257.0, 291.0, 291.0, 316.0, 314.0, 308.0, 325.0, 264.0, 294.0, 321.0, 312.0, 298.0, 281.0, 313.0, 314.0, 293.0, 289.0, 285.0, 297.0, 270.0, 274.0, 321.0, 309.0, 286.0, 301.0, 310.0, 320.0, 290.0, 294.0, 322.0, 314.0, 327.0, 297.0, 288.0, 294.0, 310.0, 317.0, 288.0, 291.0, 311.0, 319.0, 313.0, 320.0, 288.0, 302.0, 313.0, 320.0, 324.0, 306.0, 312.0, 324.0, 289.0, 287.0, 311.0, 322.0, 288.0, 294.0, 283.0, 296.0, 301.0, 281.0, 273.0, 271.0, 311.0, 319.0, 319.0, 317.0, 291.0, 288.0, 302.0, 285.0, 299.0, 274.0]}, "sampler_perf": {"mean_env_wait_ms": 1.078628112981745, "mean_processing_ms": 0.2838764625844994, "mean_inference_ms": 1.6391328483371586}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5736000, "num_steps_sampled": 3059200, "sample_time_ms": 21753.38, "load_time_ms": 37.651, "grad_time_ms": 10111.988, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005386353936046362, "policy_loss": -0.003314490430057049, "vf_loss": 92.68680572509766, "vf_explained_var": 0.7602830529212952, "kl": 0.0021554683335125446, "entropy": 1.135677456855774, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3059200, "episodes_total": 7648, "training_iteration": 239, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-45-16", "timestamp": 1660254316, "time_this_iter_s": 30.261106967926025, "time_total_s": 12730.998363018036, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12730.998363018036, "timesteps_since_restore": 3059200, "iterations_since_restore": 239, "perf": {"cpu_util_percent": 33.04761904761905, "ram_util_percent": 58.58571428571428}}
-{"episode_reward_max": 639.0, "episode_reward_min": 242.0, "episode_reward_mean": 598.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 299.29}, "custom_metrics": {"sparse_reward_mean": 207.0, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 184.58, "shaped_reward_min": 82, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.27, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.47, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.3, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.22, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.24, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.1, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.22, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.24, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.22, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.24, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 636.0, 630.0, 587.0, 567.0, 587.0, 582.0, 624.0, 539.0, 633.0, 633.0, 630.0, 636.0, 633.0, 633.0, 630.0, 636.0, 630.0, 630.0, 582.0, 576.0, 636.0, 587.0, 630.0, 576.0, 584.0, 579.0, 579.0, 570.0, 630.0, 587.0, 582.0, 582.0, 630.0, 633.0, 558.0, 633.0, 579.0, 627.0, 582.0, 582.0, 544.0, 630.0, 587.0, 630.0, 584.0, 636.0, 624.0, 582.0, 627.0, 579.0, 630.0, 633.0, 590.0, 633.0, 630.0, 636.0, 576.0, 633.0, 582.0, 579.0, 582.0, 544.0, 630.0, 636.0, 579.0, 587.0, 573.0, 579.0, 524.0, 582.0, 636.0, 582.0, 630.0, 582.0, 633.0, 630.0, 630.0, 582.0, 582.0, 539.0, 579.0, 639.0, 579.0, 576.0, 582.0, 579.0, 636.0, 579.0, 633.0, 630.0, 587.0, 242.0, 573.0, 627.0, 582.0, 636.0, 587.0, 627.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 298.0, 327.0, 309.0, 319.0, 311.0, 301.0, 286.0, 284.0, 283.0, 286.0, 301.0, 289.0, 293.0, 311.0, 313.0, 273.0, 266.0, 317.0, 316.0, 306.0, 327.0, 313.0, 317.0, 324.0, 312.0, 309.0, 324.0, 319.0, 314.0, 313.0, 317.0, 327.0, 309.0, 309.0, 321.0, 321.0, 309.0, 292.0, 290.0, 293.0, 283.0, 322.0, 314.0, 283.0, 304.0, 322.0, 308.0, 283.0, 293.0, 291.0, 293.0, 285.0, 294.0, 296.0, 283.0, 282.0, 288.0, 315.0, 315.0, 298.0, 289.0, 301.0, 281.0, 291.0, 291.0, 316.0, 314.0, 308.0, 325.0, 264.0, 294.0, 321.0, 312.0, 298.0, 281.0, 313.0, 314.0, 293.0, 289.0, 285.0, 297.0, 270.0, 274.0, 321.0, 309.0, 286.0, 301.0, 310.0, 320.0, 290.0, 294.0, 322.0, 314.0, 327.0, 297.0, 288.0, 294.0, 310.0, 317.0, 288.0, 291.0, 311.0, 319.0, 313.0, 320.0, 288.0, 302.0, 313.0, 320.0, 324.0, 306.0, 312.0, 324.0, 289.0, 287.0, 311.0, 322.0, 288.0, 294.0, 283.0, 296.0, 301.0, 281.0, 273.0, 271.0, 311.0, 319.0, 319.0, 317.0, 291.0, 288.0, 302.0, 285.0, 299.0, 274.0, 288.0, 291.0, 259.0, 265.0, 288.0, 294.0, 314.0, 322.0, 290.0, 292.0, 319.0, 311.0, 295.0, 287.0, 317.0, 316.0, 326.0, 304.0, 309.0, 321.0, 293.0, 289.0, 298.0, 284.0, 263.0, 276.0, 286.0, 293.0, 320.0, 319.0, 295.0, 284.0, 290.0, 286.0, 284.0, 298.0, 278.0, 301.0, 319.0, 317.0, 288.0, 291.0, 313.0, 320.0, 322.0, 308.0, 299.0, 288.0, 117.0, 125.0, 290.0, 283.0, 319.0, 308.0, 286.0, 296.0, 324.0, 312.0, 301.0, 286.0, 311.0, 316.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0759937679443485, "mean_processing_ms": 0.28335070451542615, "mean_inference_ms": 1.6365163711120108}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5760000, "num_steps_sampled": 3072000, "sample_time_ms": 21577.613, "load_time_ms": 37.508, "grad_time_ms": 10049.353, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0017478683730587363, "policy_loss": -0.006376888602972031, "vf_loss": 86.9516372680664, "vf_explained_var": 0.7652549743652344, "kl": 0.0021124929189682007, "entropy": 1.1408079862594604, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3072000, "episodes_total": 7680, "training_iteration": 240, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-45-45", "timestamp": 1660254345, "time_this_iter_s": 29.390948057174683, "time_total_s": 12760.38931107521, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12760.38931107521, "timesteps_since_restore": 3072000, "iterations_since_restore": 240, "perf": {"cpu_util_percent": 32.80238095238095, "ram_util_percent": 58.55238095238095}}
-{"episode_reward_max": 639.0, "episode_reward_min": 242.0, "episode_reward_mean": 595.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 297.615}, "custom_metrics": {"sparse_reward_mean": 205.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 183.63, "shaped_reward_min": 82, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.09, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.26, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.38, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.65, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.72, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.14, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.17, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.14, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.17, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.14, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.17, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 630.0, 590.0, 627.0, 582.0, 639.0, 630.0, 576.0, 630.0, 458.0, 630.0, 550.0, 630.0, 587.0, 630.0, 582.0, 627.0, 570.0, 587.0, 630.0, 579.0, 558.0, 584.0, 538.0, 587.0, 587.0, 564.0, 630.0, 582.0, 633.0, 579.0, 539.0, 636.0, 579.0, 587.0, 573.0, 579.0, 524.0, 582.0, 636.0, 582.0, 630.0, 582.0, 633.0, 630.0, 630.0, 582.0, 582.0, 539.0, 579.0, 639.0, 579.0, 576.0, 582.0, 579.0, 636.0, 579.0, 633.0, 630.0, 587.0, 242.0, 573.0, 627.0, 582.0, 636.0, 587.0, 627.0, 636.0, 582.0, 636.0, 630.0, 587.0, 567.0, 587.0, 582.0, 624.0, 539.0, 633.0, 633.0, 630.0, 636.0, 633.0, 633.0, 630.0, 636.0, 630.0, 630.0, 582.0, 576.0, 636.0, 587.0, 630.0, 576.0, 584.0, 579.0, 579.0, 570.0, 630.0, 587.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [318.0, 309.0, 305.0, 325.0, 294.0, 296.0, 316.0, 311.0, 292.0, 290.0, 317.0, 322.0, 311.0, 319.0, 276.0, 300.0, 314.0, 316.0, 230.0, 228.0, 326.0, 304.0, 276.0, 274.0, 328.0, 302.0, 294.0, 293.0, 306.0, 324.0, 286.0, 296.0, 310.0, 317.0, 294.0, 276.0, 286.0, 301.0, 321.0, 309.0, 293.0, 286.0, 286.0, 272.0, 293.0, 291.0, 267.0, 271.0, 302.0, 285.0, 296.0, 291.0, 278.0, 286.0, 311.0, 319.0, 295.0, 287.0, 316.0, 317.0, 299.0, 280.0, 268.0, 271.0, 319.0, 317.0, 291.0, 288.0, 302.0, 285.0, 299.0, 274.0, 288.0, 291.0, 259.0, 265.0, 288.0, 294.0, 314.0, 322.0, 290.0, 292.0, 319.0, 311.0, 295.0, 287.0, 317.0, 316.0, 326.0, 304.0, 309.0, 321.0, 293.0, 289.0, 298.0, 284.0, 263.0, 276.0, 286.0, 293.0, 320.0, 319.0, 295.0, 284.0, 290.0, 286.0, 284.0, 298.0, 278.0, 301.0, 319.0, 317.0, 288.0, 291.0, 313.0, 320.0, 322.0, 308.0, 299.0, 288.0, 117.0, 125.0, 290.0, 283.0, 319.0, 308.0, 286.0, 296.0, 324.0, 312.0, 301.0, 286.0, 311.0, 316.0, 314.0, 322.0, 284.0, 298.0, 327.0, 309.0, 319.0, 311.0, 301.0, 286.0, 284.0, 283.0, 286.0, 301.0, 289.0, 293.0, 311.0, 313.0, 273.0, 266.0, 317.0, 316.0, 306.0, 327.0, 313.0, 317.0, 324.0, 312.0, 309.0, 324.0, 319.0, 314.0, 313.0, 317.0, 327.0, 309.0, 309.0, 321.0, 321.0, 309.0, 292.0, 290.0, 293.0, 283.0, 322.0, 314.0, 283.0, 304.0, 322.0, 308.0, 283.0, 293.0, 291.0, 293.0, 285.0, 294.0, 296.0, 283.0, 282.0, 288.0, 315.0, 315.0, 298.0, 289.0, 301.0, 281.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0733850104445417, "mean_processing_ms": 0.2828300453202057, "mean_inference_ms": 1.6339069456399316}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5784000, "num_steps_sampled": 3084800, "sample_time_ms": 21470.585, "load_time_ms": 37.079, "grad_time_ms": 10121.915, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0020955076906830072, "policy_loss": -0.006229180842638016, "vf_loss": 88.9510269165039, "vf_explained_var": 0.7567486763000488, "kl": 0.0017531089251860976, "entropy": 1.140811562538147, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3084800, "episodes_total": 7712, "training_iteration": 241, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-46-18", "timestamp": 1660254378, "time_this_iter_s": 32.28085994720459, "time_total_s": 12792.670171022415, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12792.670171022415, "timesteps_since_restore": 3084800, "iterations_since_restore": 241, "perf": {"cpu_util_percent": 31.733333333333334, "ram_util_percent": 58.655555555555544}}
-{"episode_reward_max": 639.0, "episode_reward_min": 458.0, "episode_reward_mean": 603.13, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 301.565}, "custom_metrics": {"sparse_reward_mean": 208.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.53, "shaped_reward_min": 138, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.74, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.4, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.95, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.84, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.07, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.53, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.51, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.07, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.53, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.07, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.53, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 590.0, 639.0, 639.0, 627.0, 570.0, 582.0, 633.0, 590.0, 627.0, 633.0, 576.0, 576.0, 627.0, 582.0, 624.0, 630.0, 633.0, 630.0, 576.0, 633.0, 633.0, 590.0, 587.0, 639.0, 579.0, 582.0, 570.0, 630.0, 582.0, 636.0, 627.0, 636.0, 587.0, 627.0, 636.0, 582.0, 636.0, 630.0, 587.0, 567.0, 587.0, 582.0, 624.0, 539.0, 633.0, 633.0, 630.0, 636.0, 633.0, 633.0, 630.0, 636.0, 630.0, 630.0, 582.0, 576.0, 636.0, 587.0, 630.0, 576.0, 584.0, 579.0, 579.0, 570.0, 630.0, 587.0, 582.0, 627.0, 630.0, 590.0, 627.0, 582.0, 639.0, 630.0, 576.0, 630.0, 458.0, 630.0, 550.0, 630.0, 587.0, 630.0, 582.0, 627.0, 570.0, 587.0, 630.0, 579.0, 558.0, 584.0, 538.0, 587.0, 587.0, 564.0, 630.0, 582.0, 633.0, 579.0, 539.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 306.0, 294.0, 296.0, 322.0, 317.0, 317.0, 322.0, 316.0, 311.0, 275.0, 295.0, 300.0, 282.0, 326.0, 307.0, 294.0, 296.0, 313.0, 314.0, 311.0, 322.0, 291.0, 285.0, 272.0, 304.0, 313.0, 314.0, 294.0, 288.0, 315.0, 309.0, 313.0, 317.0, 314.0, 319.0, 316.0, 314.0, 280.0, 296.0, 308.0, 325.0, 321.0, 312.0, 293.0, 297.0, 296.0, 291.0, 317.0, 322.0, 291.0, 288.0, 292.0, 290.0, 294.0, 276.0, 311.0, 319.0, 288.0, 294.0, 314.0, 322.0, 316.0, 311.0, 324.0, 312.0, 301.0, 286.0, 311.0, 316.0, 314.0, 322.0, 284.0, 298.0, 327.0, 309.0, 319.0, 311.0, 301.0, 286.0, 284.0, 283.0, 286.0, 301.0, 289.0, 293.0, 311.0, 313.0, 273.0, 266.0, 317.0, 316.0, 306.0, 327.0, 313.0, 317.0, 324.0, 312.0, 309.0, 324.0, 319.0, 314.0, 313.0, 317.0, 327.0, 309.0, 309.0, 321.0, 321.0, 309.0, 292.0, 290.0, 293.0, 283.0, 322.0, 314.0, 283.0, 304.0, 322.0, 308.0, 283.0, 293.0, 291.0, 293.0, 285.0, 294.0, 296.0, 283.0, 282.0, 288.0, 315.0, 315.0, 298.0, 289.0, 301.0, 281.0, 318.0, 309.0, 305.0, 325.0, 294.0, 296.0, 316.0, 311.0, 292.0, 290.0, 317.0, 322.0, 311.0, 319.0, 276.0, 300.0, 314.0, 316.0, 230.0, 228.0, 326.0, 304.0, 276.0, 274.0, 328.0, 302.0, 294.0, 293.0, 306.0, 324.0, 286.0, 296.0, 310.0, 317.0, 294.0, 276.0, 286.0, 301.0, 321.0, 309.0, 293.0, 286.0, 286.0, 272.0, 293.0, 291.0, 267.0, 271.0, 302.0, 285.0, 296.0, 291.0, 278.0, 286.0, 311.0, 319.0, 295.0, 287.0, 316.0, 317.0, 299.0, 280.0, 268.0, 271.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0708107094165102, "mean_processing_ms": 0.2823168700385721, "mean_inference_ms": 1.631412939127947}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5808000, "num_steps_sampled": 3097600, "sample_time_ms": 21506.392, "load_time_ms": 37.292, "grad_time_ms": 9971.815, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004354400560259819, "policy_loss": -0.0035108765587210655, "vf_loss": 84.29744720458984, "vf_explained_var": 0.7617435455322266, "kl": 0.0018548279767856002, "entropy": 1.1289268732070923, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3097600, "episodes_total": 7744, "training_iteration": 242, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-46-49", "timestamp": 1660254409, "time_this_iter_s": 30.967852115631104, "time_total_s": 12823.638023138046, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12823.638023138046, "timesteps_since_restore": 3097600, "iterations_since_restore": 242, "perf": {"cpu_util_percent": 31.486363636363638, "ram_util_percent": 58.63863636363636}}
-{"episode_reward_max": 639.0, "episode_reward_min": 458.0, "episode_reward_mean": 602.75, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 301.375}, "custom_metrics": {"sparse_reward_mean": 208.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.15, "shaped_reward_min": 138, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.81, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.82, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.0, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.89, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.93, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.57, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.78, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.93, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.57, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.93, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.57, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 633.0, 630.0, 627.0, 627.0, 627.0, 582.0, 633.0, 630.0, 636.0, 584.0, 627.0, 582.0, 633.0, 573.0, 587.0, 627.0, 587.0, 630.0, 582.0, 576.0, 627.0, 579.0, 639.0, 570.0, 579.0, 582.0, 633.0, 579.0, 633.0, 627.0, 544.0, 570.0, 630.0, 587.0, 582.0, 627.0, 630.0, 590.0, 627.0, 582.0, 639.0, 630.0, 576.0, 630.0, 458.0, 630.0, 550.0, 630.0, 587.0, 630.0, 582.0, 627.0, 570.0, 587.0, 630.0, 579.0, 558.0, 584.0, 538.0, 587.0, 587.0, 564.0, 630.0, 582.0, 633.0, 579.0, 539.0, 627.0, 590.0, 639.0, 639.0, 627.0, 570.0, 582.0, 633.0, 590.0, 627.0, 633.0, 576.0, 576.0, 627.0, 582.0, 624.0, 630.0, 633.0, 630.0, 576.0, 633.0, 633.0, 590.0, 587.0, 639.0, 579.0, 582.0, 570.0, 630.0, 582.0, 636.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 316.0, 317.0, 301.0, 329.0, 313.0, 314.0, 311.0, 316.0, 311.0, 316.0, 286.0, 296.0, 314.0, 319.0, 311.0, 319.0, 324.0, 312.0, 286.0, 298.0, 324.0, 303.0, 278.0, 304.0, 307.0, 326.0, 288.0, 285.0, 298.0, 289.0, 308.0, 319.0, 293.0, 294.0, 326.0, 304.0, 295.0, 287.0, 280.0, 296.0, 315.0, 312.0, 289.0, 290.0, 322.0, 317.0, 288.0, 282.0, 283.0, 296.0, 299.0, 283.0, 309.0, 324.0, 282.0, 297.0, 312.0, 321.0, 305.0, 322.0, 278.0, 266.0, 282.0, 288.0, 315.0, 315.0, 298.0, 289.0, 301.0, 281.0, 318.0, 309.0, 305.0, 325.0, 294.0, 296.0, 316.0, 311.0, 292.0, 290.0, 317.0, 322.0, 311.0, 319.0, 276.0, 300.0, 314.0, 316.0, 230.0, 228.0, 326.0, 304.0, 276.0, 274.0, 328.0, 302.0, 294.0, 293.0, 306.0, 324.0, 286.0, 296.0, 310.0, 317.0, 294.0, 276.0, 286.0, 301.0, 321.0, 309.0, 293.0, 286.0, 286.0, 272.0, 293.0, 291.0, 267.0, 271.0, 302.0, 285.0, 296.0, 291.0, 278.0, 286.0, 311.0, 319.0, 295.0, 287.0, 316.0, 317.0, 299.0, 280.0, 268.0, 271.0, 321.0, 306.0, 294.0, 296.0, 322.0, 317.0, 317.0, 322.0, 316.0, 311.0, 275.0, 295.0, 300.0, 282.0, 326.0, 307.0, 294.0, 296.0, 313.0, 314.0, 311.0, 322.0, 291.0, 285.0, 272.0, 304.0, 313.0, 314.0, 294.0, 288.0, 315.0, 309.0, 313.0, 317.0, 314.0, 319.0, 316.0, 314.0, 280.0, 296.0, 308.0, 325.0, 321.0, 312.0, 293.0, 297.0, 296.0, 291.0, 317.0, 322.0, 291.0, 288.0, 292.0, 290.0, 294.0, 276.0, 311.0, 319.0, 288.0, 294.0, 314.0, 322.0, 316.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.068272032402952, "mean_processing_ms": 0.2818129859025947, "mean_inference_ms": 1.6291161273108918}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5832000, "num_steps_sampled": 3110400, "sample_time_ms": 21475.079, "load_time_ms": 37.422, "grad_time_ms": 9892.717, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0034763356670737267, "policy_loss": -0.004455787595361471, "vf_loss": 84.99886322021484, "vf_explained_var": 0.7575659155845642, "kl": 0.0017217934364452958, "entropy": 1.135510802268982, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3110400, "episodes_total": 7776, "training_iteration": 243, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-47-21", "timestamp": 1660254441, "time_this_iter_s": 32.067052125930786, "time_total_s": 12855.705075263977, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12855.705075263977, "timesteps_since_restore": 3110400, "iterations_since_restore": 243, "perf": {"cpu_util_percent": 31.317777777777778, "ram_util_percent": 58.61555555555556}}
-{"episode_reward_max": 639.0, "episode_reward_min": 468.0, "episode_reward_mean": 602.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 301.15}, "custom_metrics": {"sparse_reward_mean": 208.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.7, "shaped_reward_min": 144, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.09, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.47, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.57, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.6, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.84, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.07, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.35, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.02, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.95, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.37, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.14, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.07, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.35, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.07, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.35, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 539.0, 630.0, 549.0, 627.0, 564.0, 539.0, 636.0, 627.0, 587.0, 582.0, 636.0, 582.0, 630.0, 627.0, 576.0, 581.0, 627.0, 630.0, 582.0, 576.0, 579.0, 582.0, 627.0, 587.0, 587.0, 627.0, 582.0, 468.0, 636.0, 630.0, 504.0, 582.0, 633.0, 579.0, 539.0, 627.0, 590.0, 639.0, 639.0, 627.0, 570.0, 582.0, 633.0, 590.0, 627.0, 633.0, 576.0, 576.0, 627.0, 582.0, 624.0, 630.0, 633.0, 630.0, 576.0, 633.0, 633.0, 590.0, 587.0, 639.0, 579.0, 582.0, 570.0, 630.0, 582.0, 636.0, 627.0, 630.0, 633.0, 630.0, 627.0, 627.0, 627.0, 582.0, 633.0, 630.0, 636.0, 584.0, 627.0, 582.0, 633.0, 573.0, 587.0, 627.0, 587.0, 630.0, 582.0, 576.0, 627.0, 579.0, 639.0, 570.0, 579.0, 582.0, 633.0, 579.0, 633.0, 627.0, 544.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 311.0, 269.0, 270.0, 311.0, 319.0, 275.0, 274.0, 308.0, 319.0, 292.0, 272.0, 271.0, 268.0, 324.0, 312.0, 313.0, 314.0, 302.0, 285.0, 299.0, 283.0, 322.0, 314.0, 293.0, 289.0, 310.0, 320.0, 319.0, 308.0, 281.0, 295.0, 282.0, 299.0, 307.0, 320.0, 321.0, 309.0, 283.0, 299.0, 277.0, 299.0, 293.0, 286.0, 275.0, 307.0, 311.0, 316.0, 285.0, 302.0, 299.0, 288.0, 316.0, 311.0, 299.0, 283.0, 229.0, 239.0, 318.0, 318.0, 308.0, 322.0, 238.0, 266.0, 295.0, 287.0, 316.0, 317.0, 299.0, 280.0, 268.0, 271.0, 321.0, 306.0, 294.0, 296.0, 322.0, 317.0, 317.0, 322.0, 316.0, 311.0, 275.0, 295.0, 300.0, 282.0, 326.0, 307.0, 294.0, 296.0, 313.0, 314.0, 311.0, 322.0, 291.0, 285.0, 272.0, 304.0, 313.0, 314.0, 294.0, 288.0, 315.0, 309.0, 313.0, 317.0, 314.0, 319.0, 316.0, 314.0, 280.0, 296.0, 308.0, 325.0, 321.0, 312.0, 293.0, 297.0, 296.0, 291.0, 317.0, 322.0, 291.0, 288.0, 292.0, 290.0, 294.0, 276.0, 311.0, 319.0, 288.0, 294.0, 314.0, 322.0, 316.0, 311.0, 313.0, 317.0, 316.0, 317.0, 301.0, 329.0, 313.0, 314.0, 311.0, 316.0, 311.0, 316.0, 286.0, 296.0, 314.0, 319.0, 311.0, 319.0, 324.0, 312.0, 286.0, 298.0, 324.0, 303.0, 278.0, 304.0, 307.0, 326.0, 288.0, 285.0, 298.0, 289.0, 308.0, 319.0, 293.0, 294.0, 326.0, 304.0, 295.0, 287.0, 280.0, 296.0, 315.0, 312.0, 289.0, 290.0, 322.0, 317.0, 288.0, 282.0, 283.0, 296.0, 299.0, 283.0, 309.0, 324.0, 282.0, 297.0, 312.0, 321.0, 305.0, 322.0, 278.0, 266.0]}, "sampler_perf": {"mean_env_wait_ms": 1.065748558737823, "mean_processing_ms": 0.28131319823148404, "mean_inference_ms": 1.6267302844305909}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5856000, "num_steps_sampled": 3123200, "sample_time_ms": 21258.847, "load_time_ms": 37.597, "grad_time_ms": 9965.955, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00222679297439754, "policy_loss": -0.005516994744539261, "vf_loss": 83.11483764648438, "vf_explained_var": 0.7694733142852783, "kl": 0.002387256594374776, "entropy": 1.135390281677246, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3123200, "episodes_total": 7808, "training_iteration": 244, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-47-51", "timestamp": 1660254471, "time_this_iter_s": 30.266911029815674, "time_total_s": 12885.971986293793, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12885.971986293793, "timesteps_since_restore": 3123200, "iterations_since_restore": 244, "perf": {"cpu_util_percent": 32.02093023255814, "ram_util_percent": 58.54186046511629}}
-{"episode_reward_max": 639.0, "episode_reward_min": 468.0, "episode_reward_mean": 599.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 299.72}, "custom_metrics": {"sparse_reward_mean": 207.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.24, "shaped_reward_min": 144, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.11, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.54, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.87, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.97, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.28, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.42, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.97, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.28, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.97, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.28, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 633.0, 633.0, 570.0, 582.0, 593.0, 633.0, 636.0, 587.0, 579.0, 627.0, 579.0, 539.0, 587.0, 630.0, 636.0, 584.0, 579.0, 582.0, 630.0, 544.0, 584.0, 630.0, 587.0, 587.0, 573.0, 582.0, 573.0, 627.0, 527.0, 624.0, 587.0, 630.0, 582.0, 636.0, 627.0, 630.0, 633.0, 630.0, 627.0, 627.0, 627.0, 582.0, 633.0, 630.0, 636.0, 584.0, 627.0, 582.0, 633.0, 573.0, 587.0, 627.0, 587.0, 630.0, 582.0, 576.0, 627.0, 579.0, 639.0, 570.0, 579.0, 582.0, 633.0, 579.0, 633.0, 627.0, 544.0, 627.0, 539.0, 630.0, 549.0, 627.0, 564.0, 539.0, 636.0, 627.0, 587.0, 582.0, 636.0, 582.0, 630.0, 627.0, 576.0, 581.0, 627.0, 630.0, 582.0, 576.0, 579.0, 582.0, 627.0, 587.0, 587.0, 627.0, 582.0, 468.0, 636.0, 630.0, 504.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [318.0, 309.0, 318.0, 315.0, 322.0, 311.0, 290.0, 280.0, 285.0, 297.0, 302.0, 291.0, 322.0, 311.0, 324.0, 312.0, 304.0, 283.0, 293.0, 286.0, 321.0, 306.0, 285.0, 294.0, 275.0, 264.0, 293.0, 294.0, 318.0, 312.0, 319.0, 317.0, 277.0, 307.0, 291.0, 288.0, 288.0, 294.0, 319.0, 311.0, 268.0, 276.0, 300.0, 284.0, 314.0, 316.0, 290.0, 297.0, 299.0, 288.0, 291.0, 282.0, 287.0, 295.0, 287.0, 286.0, 313.0, 314.0, 259.0, 268.0, 311.0, 313.0, 288.0, 299.0, 311.0, 319.0, 288.0, 294.0, 314.0, 322.0, 316.0, 311.0, 313.0, 317.0, 316.0, 317.0, 301.0, 329.0, 313.0, 314.0, 311.0, 316.0, 311.0, 316.0, 286.0, 296.0, 314.0, 319.0, 311.0, 319.0, 324.0, 312.0, 286.0, 298.0, 324.0, 303.0, 278.0, 304.0, 307.0, 326.0, 288.0, 285.0, 298.0, 289.0, 308.0, 319.0, 293.0, 294.0, 326.0, 304.0, 295.0, 287.0, 280.0, 296.0, 315.0, 312.0, 289.0, 290.0, 322.0, 317.0, 288.0, 282.0, 283.0, 296.0, 299.0, 283.0, 309.0, 324.0, 282.0, 297.0, 312.0, 321.0, 305.0, 322.0, 278.0, 266.0, 316.0, 311.0, 269.0, 270.0, 311.0, 319.0, 275.0, 274.0, 308.0, 319.0, 292.0, 272.0, 271.0, 268.0, 324.0, 312.0, 313.0, 314.0, 302.0, 285.0, 299.0, 283.0, 322.0, 314.0, 293.0, 289.0, 310.0, 320.0, 319.0, 308.0, 281.0, 295.0, 282.0, 299.0, 307.0, 320.0, 321.0, 309.0, 283.0, 299.0, 277.0, 299.0, 293.0, 286.0, 275.0, 307.0, 311.0, 316.0, 285.0, 302.0, 299.0, 288.0, 316.0, 311.0, 299.0, 283.0, 229.0, 239.0, 318.0, 318.0, 308.0, 322.0, 238.0, 266.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0632381583217039, "mean_processing_ms": 0.2808155253638906, "mean_inference_ms": 1.6242198083388235}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5880000, "num_steps_sampled": 3136000, "sample_time_ms": 21038.899, "load_time_ms": 37.35, "grad_time_ms": 9776.148, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004664632026106119, "policy_loss": -0.003766902955248952, "vf_loss": 90.03823852539062, "vf_explained_var": 0.7575922012329102, "kl": 0.002137060509994626, "entropy": 1.1445802450180054, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3136000, "episodes_total": 7840, "training_iteration": 245, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-48-21", "timestamp": 1660254501, "time_this_iter_s": 30.129722118377686, "time_total_s": 12916.10170841217, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12916.10170841217, "timesteps_since_restore": 3136000, "iterations_since_restore": 245, "perf": {"cpu_util_percent": 34.127906976744185, "ram_util_percent": 58.56744186046512}}
-{"episode_reward_max": 639.0, "episode_reward_min": 468.0, "episode_reward_mean": 600.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 300.025}, "custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.45, "shaped_reward_min": 144, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.85, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.75, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.28, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.79, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.87, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.61, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.64, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.54, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.61, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.64, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.61, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.64, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 630.0, 579.0, 630.0, 627.0, 633.0, 630.0, 630.0, 582.0, 639.0, 627.0, 587.0, 639.0, 636.0, 582.0, 627.0, 582.0, 630.0, 633.0, 636.0, 513.0, 582.0, 630.0, 570.0, 584.0, 630.0, 633.0, 582.0, 630.0, 582.0, 636.0, 630.0, 579.0, 633.0, 627.0, 544.0, 627.0, 539.0, 630.0, 549.0, 627.0, 564.0, 539.0, 636.0, 627.0, 587.0, 582.0, 636.0, 582.0, 630.0, 627.0, 576.0, 581.0, 627.0, 630.0, 582.0, 576.0, 579.0, 582.0, 627.0, 587.0, 587.0, 627.0, 582.0, 468.0, 636.0, 630.0, 504.0, 627.0, 633.0, 633.0, 570.0, 582.0, 593.0, 633.0, 636.0, 587.0, 579.0, 627.0, 579.0, 539.0, 587.0, 630.0, 636.0, 584.0, 579.0, 582.0, 630.0, 544.0, 584.0, 630.0, 587.0, 587.0, 573.0, 582.0, 573.0, 627.0, 527.0, 624.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 311.0, 319.0, 290.0, 289.0, 321.0, 309.0, 309.0, 318.0, 329.0, 304.0, 323.0, 307.0, 318.0, 312.0, 293.0, 289.0, 314.0, 325.0, 314.0, 313.0, 297.0, 290.0, 314.0, 325.0, 311.0, 325.0, 290.0, 292.0, 303.0, 324.0, 296.0, 286.0, 314.0, 316.0, 315.0, 318.0, 324.0, 312.0, 261.0, 252.0, 293.0, 289.0, 321.0, 309.0, 288.0, 282.0, 285.0, 299.0, 315.0, 315.0, 315.0, 318.0, 285.0, 297.0, 319.0, 311.0, 292.0, 290.0, 319.0, 317.0, 319.0, 311.0, 282.0, 297.0, 312.0, 321.0, 305.0, 322.0, 278.0, 266.0, 316.0, 311.0, 269.0, 270.0, 311.0, 319.0, 275.0, 274.0, 308.0, 319.0, 292.0, 272.0, 271.0, 268.0, 324.0, 312.0, 313.0, 314.0, 302.0, 285.0, 299.0, 283.0, 322.0, 314.0, 293.0, 289.0, 310.0, 320.0, 319.0, 308.0, 281.0, 295.0, 282.0, 299.0, 307.0, 320.0, 321.0, 309.0, 283.0, 299.0, 277.0, 299.0, 293.0, 286.0, 275.0, 307.0, 311.0, 316.0, 285.0, 302.0, 299.0, 288.0, 316.0, 311.0, 299.0, 283.0, 229.0, 239.0, 318.0, 318.0, 308.0, 322.0, 238.0, 266.0, 318.0, 309.0, 318.0, 315.0, 322.0, 311.0, 290.0, 280.0, 285.0, 297.0, 302.0, 291.0, 322.0, 311.0, 324.0, 312.0, 304.0, 283.0, 293.0, 286.0, 321.0, 306.0, 285.0, 294.0, 275.0, 264.0, 293.0, 294.0, 318.0, 312.0, 319.0, 317.0, 277.0, 307.0, 291.0, 288.0, 288.0, 294.0, 319.0, 311.0, 268.0, 276.0, 300.0, 284.0, 314.0, 316.0, 290.0, 297.0, 299.0, 288.0, 291.0, 282.0, 287.0, 295.0, 287.0, 286.0, 313.0, 314.0, 259.0, 268.0, 311.0, 313.0, 288.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0607418368814732, "mean_processing_ms": 0.280319937755019, "mean_inference_ms": 1.6216711984881527}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5904000, "num_steps_sampled": 3148800, "sample_time_ms": 21042.313, "load_time_ms": 37.258, "grad_time_ms": 9784.882, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003808257170021534, "policy_loss": -0.0040723783895373344, "vf_loss": 84.46407318115234, "vf_explained_var": 0.7558939456939697, "kl": 0.0020272734109312296, "entropy": 1.1315315961837769, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3148800, "episodes_total": 7872, "training_iteration": 246, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-48-52", "timestamp": 1660254532, "time_this_iter_s": 30.6191668510437, "time_total_s": 12946.720875263214, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12946.720875263214, "timesteps_since_restore": 3148800, "iterations_since_restore": 246, "perf": {"cpu_util_percent": 29.048837209302324, "ram_util_percent": 58.57906976744185}}
-{"episode_reward_max": 639.0, "episode_reward_min": 468.0, "episode_reward_mean": 601.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 300.71}, "custom_metrics": {"sparse_reward_mean": 208.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.02, "shaped_reward_min": 144, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.83, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.97, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.91, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.35, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.36, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.59, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.77, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.24, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.74, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.74, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.7, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.59, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.77, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.59, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.77, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 582.0, 630.0, 576.0, 582.0, 633.0, 573.0, 587.0, 582.0, 630.0, 633.0, 582.0, 633.0, 530.0, 636.0, 582.0, 636.0, 584.0, 630.0, 633.0, 630.0, 582.0, 630.0, 521.0, 587.0, 627.0, 576.0, 630.0, 579.0, 630.0, 582.0, 584.0, 468.0, 636.0, 630.0, 504.0, 627.0, 633.0, 633.0, 570.0, 582.0, 593.0, 633.0, 636.0, 587.0, 579.0, 627.0, 579.0, 539.0, 587.0, 630.0, 636.0, 584.0, 579.0, 582.0, 630.0, 544.0, 584.0, 630.0, 587.0, 587.0, 573.0, 582.0, 573.0, 627.0, 527.0, 624.0, 587.0, 627.0, 630.0, 579.0, 630.0, 627.0, 633.0, 630.0, 630.0, 582.0, 639.0, 627.0, 587.0, 639.0, 636.0, 582.0, 627.0, 582.0, 630.0, 633.0, 636.0, 513.0, 582.0, 630.0, 570.0, 584.0, 630.0, 633.0, 582.0, 630.0, 582.0, 636.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 295.0, 287.0, 314.0, 316.0, 291.0, 285.0, 288.0, 294.0, 319.0, 314.0, 279.0, 294.0, 290.0, 297.0, 288.0, 294.0, 303.0, 327.0, 324.0, 309.0, 286.0, 296.0, 316.0, 317.0, 261.0, 269.0, 322.0, 314.0, 288.0, 294.0, 317.0, 319.0, 296.0, 288.0, 316.0, 314.0, 321.0, 312.0, 301.0, 329.0, 293.0, 289.0, 313.0, 317.0, 259.0, 262.0, 293.0, 294.0, 308.0, 319.0, 283.0, 293.0, 308.0, 322.0, 297.0, 282.0, 321.0, 309.0, 303.0, 279.0, 302.0, 282.0, 229.0, 239.0, 318.0, 318.0, 308.0, 322.0, 238.0, 266.0, 318.0, 309.0, 318.0, 315.0, 322.0, 311.0, 290.0, 280.0, 285.0, 297.0, 302.0, 291.0, 322.0, 311.0, 324.0, 312.0, 304.0, 283.0, 293.0, 286.0, 321.0, 306.0, 285.0, 294.0, 275.0, 264.0, 293.0, 294.0, 318.0, 312.0, 319.0, 317.0, 277.0, 307.0, 291.0, 288.0, 288.0, 294.0, 319.0, 311.0, 268.0, 276.0, 300.0, 284.0, 314.0, 316.0, 290.0, 297.0, 299.0, 288.0, 291.0, 282.0, 287.0, 295.0, 287.0, 286.0, 313.0, 314.0, 259.0, 268.0, 311.0, 313.0, 288.0, 299.0, 313.0, 314.0, 311.0, 319.0, 290.0, 289.0, 321.0, 309.0, 309.0, 318.0, 329.0, 304.0, 323.0, 307.0, 318.0, 312.0, 293.0, 289.0, 314.0, 325.0, 314.0, 313.0, 297.0, 290.0, 314.0, 325.0, 311.0, 325.0, 290.0, 292.0, 303.0, 324.0, 296.0, 286.0, 314.0, 316.0, 315.0, 318.0, 324.0, 312.0, 261.0, 252.0, 293.0, 289.0, 321.0, 309.0, 288.0, 282.0, 285.0, 299.0, 315.0, 315.0, 315.0, 318.0, 285.0, 297.0, 319.0, 311.0, 292.0, 290.0, 319.0, 317.0, 319.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0582836436059244, "mean_processing_ms": 0.2798326074687889, "mean_inference_ms": 1.6195165404601743}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5928000, "num_steps_sampled": 3161600, "sample_time_ms": 21417.89, "load_time_ms": 36.65, "grad_time_ms": 9717.385, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.007093754131346941, "policy_loss": -0.0009618126205168664, "vf_loss": 86.259033203125, "vf_explained_var": 0.7558541893959045, "kl": 0.001976242521777749, "entropy": 1.140650749206543, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3161600, "episodes_total": 7904, "training_iteration": 247, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-49-26", "timestamp": 1660254566, "time_this_iter_s": 34.550382137298584, "time_total_s": 12981.271257400513, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12981.271257400513, "timesteps_since_restore": 3161600, "iterations_since_restore": 247, "perf": {"cpu_util_percent": 29.197959183673472, "ram_util_percent": 58.64285714285715}}
-{"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 608.15, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 252.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 304.075}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.55, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.87, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.14, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.7, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.29, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.82, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.88, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.17, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.93, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.82, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.88, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.82, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.88, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 636.0, 576.0, 633.0, 587.0, 633.0, 630.0, 636.0, 596.0, 576.0, 636.0, 633.0, 630.0, 582.0, 582.0, 627.0, 630.0, 627.0, 639.0, 573.0, 627.0, 633.0, 582.0, 582.0, 582.0, 561.0, 627.0, 636.0, 636.0, 636.0, 584.0, 633.0, 627.0, 527.0, 624.0, 587.0, 627.0, 630.0, 579.0, 630.0, 627.0, 633.0, 630.0, 630.0, 582.0, 639.0, 627.0, 587.0, 639.0, 636.0, 582.0, 627.0, 582.0, 630.0, 633.0, 636.0, 513.0, 582.0, 630.0, 570.0, 584.0, 630.0, 633.0, 582.0, 630.0, 582.0, 636.0, 630.0, 633.0, 582.0, 630.0, 576.0, 582.0, 633.0, 573.0, 587.0, 582.0, 630.0, 633.0, 582.0, 633.0, 530.0, 636.0, 582.0, 636.0, 584.0, 630.0, 633.0, 630.0, 582.0, 630.0, 521.0, 587.0, 627.0, 576.0, 630.0, 579.0, 630.0, 582.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 309.0, 327.0, 270.0, 306.0, 305.0, 328.0, 286.0, 301.0, 322.0, 311.0, 308.0, 322.0, 314.0, 322.0, 299.0, 297.0, 293.0, 283.0, 316.0, 320.0, 319.0, 314.0, 311.0, 319.0, 288.0, 294.0, 298.0, 284.0, 312.0, 315.0, 324.0, 306.0, 311.0, 316.0, 322.0, 317.0, 297.0, 276.0, 307.0, 320.0, 321.0, 312.0, 280.0, 302.0, 304.0, 278.0, 300.0, 282.0, 266.0, 295.0, 303.0, 324.0, 324.0, 312.0, 324.0, 312.0, 318.0, 318.0, 296.0, 288.0, 323.0, 310.0, 313.0, 314.0, 259.0, 268.0, 311.0, 313.0, 288.0, 299.0, 313.0, 314.0, 311.0, 319.0, 290.0, 289.0, 321.0, 309.0, 309.0, 318.0, 329.0, 304.0, 323.0, 307.0, 318.0, 312.0, 293.0, 289.0, 314.0, 325.0, 314.0, 313.0, 297.0, 290.0, 314.0, 325.0, 311.0, 325.0, 290.0, 292.0, 303.0, 324.0, 296.0, 286.0, 314.0, 316.0, 315.0, 318.0, 324.0, 312.0, 261.0, 252.0, 293.0, 289.0, 321.0, 309.0, 288.0, 282.0, 285.0, 299.0, 315.0, 315.0, 315.0, 318.0, 285.0, 297.0, 319.0, 311.0, 292.0, 290.0, 319.0, 317.0, 319.0, 311.0, 319.0, 314.0, 295.0, 287.0, 314.0, 316.0, 291.0, 285.0, 288.0, 294.0, 319.0, 314.0, 279.0, 294.0, 290.0, 297.0, 288.0, 294.0, 303.0, 327.0, 324.0, 309.0, 286.0, 296.0, 316.0, 317.0, 261.0, 269.0, 322.0, 314.0, 288.0, 294.0, 317.0, 319.0, 296.0, 288.0, 316.0, 314.0, 321.0, 312.0, 301.0, 329.0, 293.0, 289.0, 313.0, 317.0, 259.0, 262.0, 293.0, 294.0, 308.0, 319.0, 283.0, 293.0, 308.0, 322.0, 297.0, 282.0, 321.0, 309.0, 303.0, 279.0, 302.0, 282.0]}, "sampler_perf": {"mean_env_wait_ms": 1.055854669948681, "mean_processing_ms": 0.27935130516970164, "mean_inference_ms": 1.6177570824217435}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5952000, "num_steps_sampled": 3174400, "sample_time_ms": 21678.331, "load_time_ms": 37.099, "grad_time_ms": 9688.848, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009431429207324982, "policy_loss": -0.008797372691333294, "vf_loss": 84.26534271240234, "vf_explained_var": 0.7609202265739441, "kl": 0.001977160107344389, "entropy": 1.14460289478302, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3174400, "episodes_total": 7936, "training_iteration": 248, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-50-00", "timestamp": 1660254600, "time_this_iter_s": 33.773277044296265, "time_total_s": 13015.044534444809, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13015.044534444809, "timesteps_since_restore": 3174400, "iterations_since_restore": 248, "perf": {"cpu_util_percent": 27.302083333333332, "ram_util_percent": 58.68958333333333}}
-{"episode_reward_max": 639.0, "episode_reward_min": 425.0, "episode_reward_mean": 606.43, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 211.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 303.215}, "custom_metrics": {"sparse_reward_mean": 210.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 186.43, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.3, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.63, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.93, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.82, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.14, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.6, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.14, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.6, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.14, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.6, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 627.0, 636.0, 627.0, 582.0, 582.0, 587.0, 630.0, 630.0, 636.0, 633.0, 633.0, 582.0, 633.0, 630.0, 582.0, 425.0, 576.0, 578.0, 627.0, 587.0, 639.0, 636.0, 582.0, 630.0, 630.0, 525.0, 630.0, 579.0, 579.0, 630.0, 587.0, 630.0, 582.0, 636.0, 630.0, 633.0, 582.0, 630.0, 576.0, 582.0, 633.0, 573.0, 587.0, 582.0, 630.0, 633.0, 582.0, 633.0, 530.0, 636.0, 582.0, 636.0, 584.0, 630.0, 633.0, 630.0, 582.0, 630.0, 521.0, 587.0, 627.0, 576.0, 630.0, 579.0, 630.0, 582.0, 584.0, 636.0, 636.0, 576.0, 633.0, 587.0, 633.0, 630.0, 636.0, 596.0, 576.0, 636.0, 633.0, 630.0, 582.0, 582.0, 627.0, 630.0, 627.0, 639.0, 573.0, 627.0, 633.0, 582.0, 582.0, 582.0, 561.0, 627.0, 636.0, 636.0, 636.0, 584.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 306.0, 321.0, 317.0, 319.0, 308.0, 319.0, 286.0, 296.0, 296.0, 286.0, 301.0, 286.0, 309.0, 321.0, 313.0, 317.0, 327.0, 309.0, 320.0, 313.0, 319.0, 314.0, 293.0, 289.0, 319.0, 314.0, 314.0, 316.0, 293.0, 289.0, 214.0, 211.0, 299.0, 277.0, 295.0, 283.0, 308.0, 319.0, 298.0, 289.0, 330.0, 309.0, 316.0, 320.0, 291.0, 291.0, 314.0, 316.0, 311.0, 319.0, 260.0, 265.0, 318.0, 312.0, 291.0, 288.0, 295.0, 284.0, 309.0, 321.0, 295.0, 292.0, 319.0, 311.0, 292.0, 290.0, 319.0, 317.0, 319.0, 311.0, 319.0, 314.0, 295.0, 287.0, 314.0, 316.0, 291.0, 285.0, 288.0, 294.0, 319.0, 314.0, 279.0, 294.0, 290.0, 297.0, 288.0, 294.0, 303.0, 327.0, 324.0, 309.0, 286.0, 296.0, 316.0, 317.0, 261.0, 269.0, 322.0, 314.0, 288.0, 294.0, 317.0, 319.0, 296.0, 288.0, 316.0, 314.0, 321.0, 312.0, 301.0, 329.0, 293.0, 289.0, 313.0, 317.0, 259.0, 262.0, 293.0, 294.0, 308.0, 319.0, 283.0, 293.0, 308.0, 322.0, 297.0, 282.0, 321.0, 309.0, 303.0, 279.0, 302.0, 282.0, 319.0, 317.0, 309.0, 327.0, 270.0, 306.0, 305.0, 328.0, 286.0, 301.0, 322.0, 311.0, 308.0, 322.0, 314.0, 322.0, 299.0, 297.0, 293.0, 283.0, 316.0, 320.0, 319.0, 314.0, 311.0, 319.0, 288.0, 294.0, 298.0, 284.0, 312.0, 315.0, 324.0, 306.0, 311.0, 316.0, 322.0, 317.0, 297.0, 276.0, 307.0, 320.0, 321.0, 312.0, 280.0, 302.0, 304.0, 278.0, 300.0, 282.0, 266.0, 295.0, 303.0, 324.0, 324.0, 312.0, 324.0, 312.0, 318.0, 318.0, 296.0, 288.0, 323.0, 310.0]}, "sampler_perf": {"mean_env_wait_ms": 1.053454756557531, "mean_processing_ms": 0.27887402151447716, "mean_inference_ms": 1.6161016017922192}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5976000, "num_steps_sampled": 3187200, "sample_time_ms": 21826.606, "load_time_ms": 36.816, "grad_time_ms": 9749.499, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0024596769362688065, "policy_loss": -0.005453174468129873, "vf_loss": 84.88723754882812, "vf_explained_var": 0.7672951221466064, "kl": 0.0021601892076432705, "entropy": 1.1517353057861328, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3187200, "episodes_total": 7968, "training_iteration": 249, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-50-33", "timestamp": 1660254633, "time_this_iter_s": 32.3484160900116, "time_total_s": 13047.39295053482, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13047.39295053482, "timesteps_since_restore": 3187200, "iterations_since_restore": 249, "perf": {"cpu_util_percent": 29.615217391304352, "ram_util_percent": 58.70434782608695}}
-{"episode_reward_max": 639.0, "episode_reward_min": 425.0, "episode_reward_mean": 604.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 211.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 302.285}, "custom_metrics": {"sparse_reward_mean": 209.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.77, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.29, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.5, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.63, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.89, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.14, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.47, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.13, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.48, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.14, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.47, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.14, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.47, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 584.0, 624.0, 570.0, 579.0, 587.0, 567.0, 627.0, 587.0, 576.0, 636.0, 630.0, 539.0, 627.0, 639.0, 636.0, 587.0, 630.0, 570.0, 570.0, 584.0, 621.0, 582.0, 627.0, 582.0, 627.0, 579.0, 533.0, 630.0, 630.0, 579.0, 596.0, 579.0, 630.0, 582.0, 584.0, 636.0, 636.0, 576.0, 633.0, 587.0, 633.0, 630.0, 636.0, 596.0, 576.0, 636.0, 633.0, 630.0, 582.0, 582.0, 627.0, 630.0, 627.0, 639.0, 573.0, 627.0, 633.0, 582.0, 582.0, 582.0, 561.0, 627.0, 636.0, 636.0, 636.0, 584.0, 633.0, 633.0, 627.0, 636.0, 627.0, 582.0, 582.0, 587.0, 630.0, 630.0, 636.0, 633.0, 633.0, 582.0, 633.0, 630.0, 582.0, 425.0, 576.0, 578.0, 627.0, 587.0, 639.0, 636.0, 582.0, 630.0, 630.0, 525.0, 630.0, 579.0, 579.0, 630.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 306.0, 291.0, 293.0, 310.0, 314.0, 280.0, 290.0, 296.0, 283.0, 285.0, 302.0, 287.0, 280.0, 318.0, 309.0, 277.0, 310.0, 292.0, 284.0, 314.0, 322.0, 306.0, 324.0, 278.0, 261.0, 309.0, 318.0, 319.0, 320.0, 322.0, 314.0, 286.0, 301.0, 326.0, 304.0, 295.0, 275.0, 292.0, 278.0, 295.0, 289.0, 308.0, 313.0, 290.0, 292.0, 305.0, 322.0, 291.0, 291.0, 327.0, 300.0, 279.0, 300.0, 263.0, 270.0, 316.0, 314.0, 313.0, 317.0, 296.0, 283.0, 302.0, 294.0, 297.0, 282.0, 321.0, 309.0, 303.0, 279.0, 302.0, 282.0, 319.0, 317.0, 309.0, 327.0, 270.0, 306.0, 305.0, 328.0, 286.0, 301.0, 322.0, 311.0, 308.0, 322.0, 314.0, 322.0, 299.0, 297.0, 293.0, 283.0, 316.0, 320.0, 319.0, 314.0, 311.0, 319.0, 288.0, 294.0, 298.0, 284.0, 312.0, 315.0, 324.0, 306.0, 311.0, 316.0, 322.0, 317.0, 297.0, 276.0, 307.0, 320.0, 321.0, 312.0, 280.0, 302.0, 304.0, 278.0, 300.0, 282.0, 266.0, 295.0, 303.0, 324.0, 324.0, 312.0, 324.0, 312.0, 318.0, 318.0, 296.0, 288.0, 323.0, 310.0, 319.0, 314.0, 306.0, 321.0, 317.0, 319.0, 308.0, 319.0, 286.0, 296.0, 296.0, 286.0, 301.0, 286.0, 309.0, 321.0, 313.0, 317.0, 327.0, 309.0, 320.0, 313.0, 319.0, 314.0, 293.0, 289.0, 319.0, 314.0, 314.0, 316.0, 293.0, 289.0, 214.0, 211.0, 299.0, 277.0, 295.0, 283.0, 308.0, 319.0, 298.0, 289.0, 330.0, 309.0, 316.0, 320.0, 291.0, 291.0, 314.0, 316.0, 311.0, 319.0, 260.0, 265.0, 318.0, 312.0, 291.0, 288.0, 295.0, 284.0, 309.0, 321.0, 295.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0510667862182812, "mean_processing_ms": 0.27840025138599184, "mean_inference_ms": 1.6143808795038155}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6000000, "num_steps_sampled": 3200000, "sample_time_ms": 22115.848, "load_time_ms": 36.609, "grad_time_ms": 9825.426, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0038353295531123877, "policy_loss": -0.004546869080513716, "vf_loss": 89.5432357788086, "vf_explained_var": 0.7639234662055969, "kl": 0.002313032979145646, "entropy": 1.144262671470642, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3200000, "episodes_total": 8000, "training_iteration": 250, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-51-06", "timestamp": 1660254666, "time_this_iter_s": 33.03909492492676, "time_total_s": 13080.432045459747, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13080.432045459747, "timesteps_since_restore": 3200000, "iterations_since_restore": 250, "perf": {"cpu_util_percent": 30.089130434782607, "ram_util_percent": 58.62826086956523}}
-{"episode_reward_max": 639.0, "episode_reward_min": 425.0, "episode_reward_mean": 605.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 211.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 302.845}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 186.09, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.03, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.8, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.08, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.77, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.94, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.55, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.94, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.94, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 579.0, 627.0, 630.0, 633.0, 633.0, 587.0, 639.0, 582.0, 581.0, 587.0, 624.0, 633.0, 621.0, 582.0, 639.0, 636.0, 582.0, 587.0, 627.0, 582.0, 582.0, 627.0, 639.0, 630.0, 630.0, 587.0, 630.0, 627.0, 630.0, 630.0, 582.0, 636.0, 636.0, 584.0, 633.0, 633.0, 627.0, 636.0, 627.0, 582.0, 582.0, 587.0, 630.0, 630.0, 636.0, 633.0, 633.0, 582.0, 633.0, 630.0, 582.0, 425.0, 576.0, 578.0, 627.0, 587.0, 639.0, 636.0, 582.0, 630.0, 630.0, 525.0, 630.0, 579.0, 579.0, 630.0, 587.0, 627.0, 584.0, 624.0, 570.0, 579.0, 587.0, 567.0, 627.0, 587.0, 576.0, 636.0, 630.0, 539.0, 627.0, 639.0, 636.0, 587.0, 630.0, 570.0, 570.0, 584.0, 621.0, 582.0, 627.0, 582.0, 627.0, 579.0, 533.0, 630.0, 630.0, 579.0, 596.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 304.0, 275.0, 320.0, 307.0, 316.0, 314.0, 312.0, 321.0, 316.0, 317.0, 295.0, 292.0, 312.0, 327.0, 294.0, 288.0, 284.0, 297.0, 295.0, 292.0, 302.0, 322.0, 316.0, 317.0, 308.0, 313.0, 291.0, 291.0, 322.0, 317.0, 319.0, 317.0, 291.0, 291.0, 290.0, 297.0, 307.0, 320.0, 290.0, 292.0, 280.0, 302.0, 319.0, 308.0, 316.0, 323.0, 313.0, 317.0, 305.0, 325.0, 296.0, 291.0, 313.0, 317.0, 309.0, 318.0, 311.0, 319.0, 308.0, 322.0, 289.0, 293.0, 324.0, 312.0, 318.0, 318.0, 296.0, 288.0, 323.0, 310.0, 319.0, 314.0, 306.0, 321.0, 317.0, 319.0, 308.0, 319.0, 286.0, 296.0, 296.0, 286.0, 301.0, 286.0, 309.0, 321.0, 313.0, 317.0, 327.0, 309.0, 320.0, 313.0, 319.0, 314.0, 293.0, 289.0, 319.0, 314.0, 314.0, 316.0, 293.0, 289.0, 214.0, 211.0, 299.0, 277.0, 295.0, 283.0, 308.0, 319.0, 298.0, 289.0, 330.0, 309.0, 316.0, 320.0, 291.0, 291.0, 314.0, 316.0, 311.0, 319.0, 260.0, 265.0, 318.0, 312.0, 291.0, 288.0, 295.0, 284.0, 309.0, 321.0, 295.0, 292.0, 321.0, 306.0, 291.0, 293.0, 310.0, 314.0, 280.0, 290.0, 296.0, 283.0, 285.0, 302.0, 287.0, 280.0, 318.0, 309.0, 277.0, 310.0, 292.0, 284.0, 314.0, 322.0, 306.0, 324.0, 278.0, 261.0, 309.0, 318.0, 319.0, 320.0, 322.0, 314.0, 286.0, 301.0, 326.0, 304.0, 295.0, 275.0, 292.0, 278.0, 295.0, 289.0, 308.0, 313.0, 290.0, 292.0, 305.0, 322.0, 291.0, 291.0, 327.0, 300.0, 279.0, 300.0, 263.0, 270.0, 316.0, 314.0, 313.0, 317.0, 296.0, 283.0, 302.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0486945411620672, "mean_processing_ms": 0.2779294488920563, "mean_inference_ms": 1.612487194421866}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6024000, "num_steps_sampled": 3212800, "sample_time_ms": 22207.762, "load_time_ms": 36.674, "grad_time_ms": 9691.515, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005205323453992605, "policy_loss": -0.0029822138603776693, "vf_loss": 87.57022857666016, "vf_explained_var": 0.7586490511894226, "kl": 0.0020639507565647364, "entropy": 1.1389611959457397, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3212800, "episodes_total": 8032, "training_iteration": 251, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-51-37", "timestamp": 1660254697, "time_this_iter_s": 31.857529878616333, "time_total_s": 13112.289575338364, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13112.289575338364, "timesteps_since_restore": 3212800, "iterations_since_restore": 251, "perf": {"cpu_util_percent": 30.984444444444442, "ram_util_percent": 58.69777777777778}}
-{"episode_reward_max": 639.0, "episode_reward_min": 518.0, "episode_reward_mean": 606.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.325}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.25, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.82, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.86, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.27, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.22, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.66, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.8, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.8, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.8, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 630.0, 633.0, 633.0, 627.0, 582.0, 579.0, 630.0, 633.0, 633.0, 587.0, 633.0, 633.0, 636.0, 582.0, 570.0, 590.0, 579.0, 576.0, 630.0, 581.0, 579.0, 518.0, 636.0, 636.0, 633.0, 576.0, 590.0, 633.0, 636.0, 633.0, 579.0, 579.0, 630.0, 587.0, 627.0, 584.0, 624.0, 570.0, 579.0, 587.0, 567.0, 627.0, 587.0, 576.0, 636.0, 630.0, 539.0, 627.0, 639.0, 636.0, 587.0, 630.0, 570.0, 570.0, 584.0, 621.0, 582.0, 627.0, 582.0, 627.0, 579.0, 533.0, 630.0, 630.0, 579.0, 596.0, 630.0, 579.0, 627.0, 630.0, 633.0, 633.0, 587.0, 639.0, 582.0, 581.0, 587.0, 624.0, 633.0, 621.0, 582.0, 639.0, 636.0, 582.0, 587.0, 627.0, 582.0, 582.0, 627.0, 639.0, 630.0, 630.0, 587.0, 630.0, 627.0, 630.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 317.0, 313.0, 312.0, 318.0, 314.0, 319.0, 317.0, 316.0, 323.0, 304.0, 292.0, 290.0, 289.0, 290.0, 316.0, 314.0, 306.0, 327.0, 316.0, 317.0, 299.0, 288.0, 311.0, 322.0, 319.0, 314.0, 322.0, 314.0, 296.0, 286.0, 291.0, 279.0, 296.0, 294.0, 291.0, 288.0, 288.0, 288.0, 313.0, 317.0, 290.0, 291.0, 294.0, 285.0, 258.0, 260.0, 311.0, 325.0, 316.0, 320.0, 315.0, 318.0, 282.0, 294.0, 301.0, 289.0, 318.0, 315.0, 314.0, 322.0, 316.0, 317.0, 291.0, 288.0, 295.0, 284.0, 309.0, 321.0, 295.0, 292.0, 321.0, 306.0, 291.0, 293.0, 310.0, 314.0, 280.0, 290.0, 296.0, 283.0, 285.0, 302.0, 287.0, 280.0, 318.0, 309.0, 277.0, 310.0, 292.0, 284.0, 314.0, 322.0, 306.0, 324.0, 278.0, 261.0, 309.0, 318.0, 319.0, 320.0, 322.0, 314.0, 286.0, 301.0, 326.0, 304.0, 295.0, 275.0, 292.0, 278.0, 295.0, 289.0, 308.0, 313.0, 290.0, 292.0, 305.0, 322.0, 291.0, 291.0, 327.0, 300.0, 279.0, 300.0, 263.0, 270.0, 316.0, 314.0, 313.0, 317.0, 296.0, 283.0, 302.0, 294.0, 311.0, 319.0, 304.0, 275.0, 320.0, 307.0, 316.0, 314.0, 312.0, 321.0, 316.0, 317.0, 295.0, 292.0, 312.0, 327.0, 294.0, 288.0, 284.0, 297.0, 295.0, 292.0, 302.0, 322.0, 316.0, 317.0, 308.0, 313.0, 291.0, 291.0, 322.0, 317.0, 319.0, 317.0, 291.0, 291.0, 290.0, 297.0, 307.0, 320.0, 290.0, 292.0, 280.0, 302.0, 319.0, 308.0, 316.0, 323.0, 313.0, 317.0, 305.0, 325.0, 296.0, 291.0, 313.0, 317.0, 309.0, 318.0, 311.0, 319.0, 308.0, 322.0, 289.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0463435116537838, "mean_processing_ms": 0.277465645143399, "mean_inference_ms": 1.6108149086882515}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6048000, "num_steps_sampled": 3225600, "sample_time_ms": 22477.661, "load_time_ms": 36.527, "grad_time_ms": 9715.891, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003005747450515628, "policy_loss": -0.005394397769123316, "vf_loss": 89.67745208740234, "vf_explained_var": 0.7541216015815735, "kl": 0.0018617714522406459, "entropy": 1.1351839303970337, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3225600, "episodes_total": 8064, "training_iteration": 252, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-52-11", "timestamp": 1660254731, "time_this_iter_s": 33.91162323951721, "time_total_s": 13146.20119857788, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13146.20119857788, "timesteps_since_restore": 3225600, "iterations_since_restore": 252, "perf": {"cpu_util_percent": 29.32083333333333, "ram_util_percent": 58.725}}
-{"episode_reward_max": 639.0, "episode_reward_min": 473.0, "episode_reward_mean": 603.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 301.78}, "custom_metrics": {"sparse_reward_mean": 208.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.96, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.81, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.78, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.23, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.71, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.66, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.78, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.85, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.51, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.78, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.85, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.78, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.85, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 567.0, 636.0, 570.0, 633.0, 633.0, 582.0, 582.0, 587.0, 579.0, 536.0, 587.0, 587.0, 567.0, 630.0, 587.0, 539.0, 582.0, 570.0, 473.0, 627.0, 627.0, 587.0, 587.0, 633.0, 581.0, 579.0, 576.0, 587.0, 636.0, 582.0, 582.0, 630.0, 630.0, 579.0, 596.0, 630.0, 579.0, 627.0, 630.0, 633.0, 633.0, 587.0, 639.0, 582.0, 581.0, 587.0, 624.0, 633.0, 621.0, 582.0, 639.0, 636.0, 582.0, 587.0, 627.0, 582.0, 582.0, 627.0, 639.0, 630.0, 630.0, 587.0, 630.0, 627.0, 630.0, 630.0, 582.0, 636.0, 630.0, 630.0, 633.0, 633.0, 627.0, 582.0, 579.0, 630.0, 633.0, 633.0, 587.0, 633.0, 633.0, 636.0, 582.0, 570.0, 590.0, 579.0, 576.0, 630.0, 581.0, 579.0, 518.0, 636.0, 636.0, 633.0, 576.0, 590.0, 633.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 285.0, 282.0, 319.0, 317.0, 278.0, 292.0, 321.0, 312.0, 318.0, 315.0, 288.0, 294.0, 290.0, 292.0, 303.0, 284.0, 299.0, 280.0, 268.0, 268.0, 288.0, 299.0, 291.0, 296.0, 278.0, 289.0, 315.0, 315.0, 295.0, 292.0, 265.0, 274.0, 294.0, 288.0, 288.0, 282.0, 242.0, 231.0, 316.0, 311.0, 319.0, 308.0, 288.0, 299.0, 303.0, 284.0, 317.0, 316.0, 289.0, 292.0, 283.0, 296.0, 287.0, 289.0, 293.0, 294.0, 314.0, 322.0, 289.0, 293.0, 296.0, 286.0, 316.0, 314.0, 313.0, 317.0, 296.0, 283.0, 302.0, 294.0, 311.0, 319.0, 304.0, 275.0, 320.0, 307.0, 316.0, 314.0, 312.0, 321.0, 316.0, 317.0, 295.0, 292.0, 312.0, 327.0, 294.0, 288.0, 284.0, 297.0, 295.0, 292.0, 302.0, 322.0, 316.0, 317.0, 308.0, 313.0, 291.0, 291.0, 322.0, 317.0, 319.0, 317.0, 291.0, 291.0, 290.0, 297.0, 307.0, 320.0, 290.0, 292.0, 280.0, 302.0, 319.0, 308.0, 316.0, 323.0, 313.0, 317.0, 305.0, 325.0, 296.0, 291.0, 313.0, 317.0, 309.0, 318.0, 311.0, 319.0, 308.0, 322.0, 289.0, 293.0, 319.0, 317.0, 317.0, 313.0, 312.0, 318.0, 314.0, 319.0, 317.0, 316.0, 323.0, 304.0, 292.0, 290.0, 289.0, 290.0, 316.0, 314.0, 306.0, 327.0, 316.0, 317.0, 299.0, 288.0, 311.0, 322.0, 319.0, 314.0, 322.0, 314.0, 296.0, 286.0, 291.0, 279.0, 296.0, 294.0, 291.0, 288.0, 288.0, 288.0, 313.0, 317.0, 290.0, 291.0, 294.0, 285.0, 258.0, 260.0, 311.0, 325.0, 316.0, 320.0, 315.0, 318.0, 282.0, 294.0, 301.0, 289.0, 318.0, 315.0, 314.0, 322.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0439919117658958, "mean_processing_ms": 0.2769978233028824, "mean_inference_ms": 1.608730530277712}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6072000, "num_steps_sampled": 3238400, "sample_time_ms": 22102.374, "load_time_ms": 36.62, "grad_time_ms": 9528.927, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.007198518142104149, "policy_loss": -0.002070576651021838, "vf_loss": 98.38677215576172, "vf_explained_var": 0.7492752075195312, "kl": 0.0017245132476091385, "entropy": 1.1391605138778687, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3238400, "episodes_total": 8096, "training_iteration": 253, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-52-38", "timestamp": 1660254758, "time_this_iter_s": 26.443045139312744, "time_total_s": 13172.644243717194, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13172.644243717194, "timesteps_since_restore": 3238400, "iterations_since_restore": 253, "perf": {"cpu_util_percent": 30.831578947368424, "ram_util_percent": 58.665789473684214}}
-{"episode_reward_max": 639.0, "episode_reward_min": 473.0, "episode_reward_mean": 603.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 301.77}, "custom_metrics": {"sparse_reward_mean": 209.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.54, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.9, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.7, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.96, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.71, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.87, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.74, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.87, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.74, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.87, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.74, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 584.0, 636.0, 630.0, 627.0, 630.0, 561.0, 633.0, 579.0, 627.0, 582.0, 579.0, 582.0, 627.0, 582.0, 636.0, 630.0, 630.0, 582.0, 633.0, 582.0, 636.0, 633.0, 627.0, 582.0, 630.0, 573.0, 587.0, 627.0, 633.0, 627.0, 639.0, 627.0, 630.0, 630.0, 582.0, 636.0, 630.0, 630.0, 633.0, 633.0, 627.0, 582.0, 579.0, 630.0, 633.0, 633.0, 587.0, 633.0, 633.0, 636.0, 582.0, 570.0, 590.0, 579.0, 576.0, 630.0, 581.0, 579.0, 518.0, 636.0, 636.0, 633.0, 576.0, 590.0, 633.0, 636.0, 633.0, 582.0, 567.0, 636.0, 570.0, 633.0, 633.0, 582.0, 582.0, 587.0, 579.0, 536.0, 587.0, 587.0, 567.0, 630.0, 587.0, 539.0, 582.0, 570.0, 473.0, 627.0, 627.0, 587.0, 587.0, 633.0, 581.0, 579.0, 576.0, 587.0, 636.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 304.0, 280.0, 327.0, 309.0, 316.0, 314.0, 316.0, 311.0, 316.0, 314.0, 269.0, 292.0, 316.0, 317.0, 285.0, 294.0, 313.0, 314.0, 288.0, 294.0, 298.0, 281.0, 293.0, 289.0, 324.0, 303.0, 286.0, 296.0, 316.0, 320.0, 316.0, 314.0, 313.0, 317.0, 290.0, 292.0, 314.0, 319.0, 285.0, 297.0, 317.0, 319.0, 324.0, 309.0, 310.0, 317.0, 299.0, 283.0, 323.0, 307.0, 282.0, 291.0, 294.0, 293.0, 308.0, 319.0, 332.0, 301.0, 309.0, 318.0, 314.0, 325.0, 309.0, 318.0, 311.0, 319.0, 308.0, 322.0, 289.0, 293.0, 319.0, 317.0, 317.0, 313.0, 312.0, 318.0, 314.0, 319.0, 317.0, 316.0, 323.0, 304.0, 292.0, 290.0, 289.0, 290.0, 316.0, 314.0, 306.0, 327.0, 316.0, 317.0, 299.0, 288.0, 311.0, 322.0, 319.0, 314.0, 322.0, 314.0, 296.0, 286.0, 291.0, 279.0, 296.0, 294.0, 291.0, 288.0, 288.0, 288.0, 313.0, 317.0, 290.0, 291.0, 294.0, 285.0, 258.0, 260.0, 311.0, 325.0, 316.0, 320.0, 315.0, 318.0, 282.0, 294.0, 301.0, 289.0, 318.0, 315.0, 314.0, 322.0, 316.0, 317.0, 288.0, 294.0, 285.0, 282.0, 319.0, 317.0, 278.0, 292.0, 321.0, 312.0, 318.0, 315.0, 288.0, 294.0, 290.0, 292.0, 303.0, 284.0, 299.0, 280.0, 268.0, 268.0, 288.0, 299.0, 291.0, 296.0, 278.0, 289.0, 315.0, 315.0, 295.0, 292.0, 265.0, 274.0, 294.0, 288.0, 288.0, 282.0, 242.0, 231.0, 316.0, 311.0, 319.0, 308.0, 288.0, 299.0, 303.0, 284.0, 317.0, 316.0, 289.0, 292.0, 283.0, 296.0, 287.0, 289.0, 293.0, 294.0, 314.0, 322.0, 289.0, 293.0, 296.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.041648317929554, "mean_processing_ms": 0.27652981742042726, "mean_inference_ms": 1.6063886219680592}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6096000, "num_steps_sampled": 3251200, "sample_time_ms": 22038.921, "load_time_ms": 36.872, "grad_time_ms": 9318.447, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0028869707603007555, "policy_loss": -0.005314534064382315, "vf_loss": 87.72003936767578, "vf_explained_var": 0.7518091797828674, "kl": 0.0020599865820258856, "entropy": 1.1409815549850464, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3251200, "episodes_total": 8128, "training_iteration": 254, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-53-05", "timestamp": 1660254785, "time_this_iter_s": 27.532819986343384, "time_total_s": 13200.177063703537, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13200.177063703537, "timesteps_since_restore": 3251200, "iterations_since_restore": 254, "perf": {"cpu_util_percent": 32.46923076923077, "ram_util_percent": 58.67179487179486}}
-{"episode_reward_max": 639.0, "episode_reward_min": 473.0, "episode_reward_mean": 606.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 303.43}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.06, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.02, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.74, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.94, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.72, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.69, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.69, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.43, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.49, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.74, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.69, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.69, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 639.0, 630.0, 570.0, 636.0, 584.0, 636.0, 633.0, 624.0, 579.0, 627.0, 630.0, 582.0, 639.0, 627.0, 630.0, 627.0, 627.0, 630.0, 630.0, 624.0, 587.0, 636.0, 633.0, 567.0, 630.0, 627.0, 630.0, 630.0, 636.0, 630.0, 636.0, 590.0, 633.0, 636.0, 633.0, 582.0, 567.0, 636.0, 570.0, 633.0, 633.0, 582.0, 582.0, 587.0, 579.0, 536.0, 587.0, 587.0, 567.0, 630.0, 587.0, 539.0, 582.0, 570.0, 473.0, 627.0, 627.0, 587.0, 587.0, 633.0, 581.0, 579.0, 576.0, 587.0, 636.0, 582.0, 582.0, 633.0, 584.0, 636.0, 630.0, 627.0, 630.0, 561.0, 633.0, 579.0, 627.0, 582.0, 579.0, 582.0, 627.0, 582.0, 636.0, 630.0, 630.0, 582.0, 633.0, 582.0, 636.0, 633.0, 627.0, 582.0, 630.0, 573.0, 587.0, 627.0, 633.0, 627.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 298.0, 314.0, 325.0, 306.0, 324.0, 291.0, 279.0, 327.0, 309.0, 296.0, 288.0, 323.0, 313.0, 316.0, 317.0, 313.0, 311.0, 298.0, 281.0, 316.0, 311.0, 316.0, 314.0, 294.0, 288.0, 322.0, 317.0, 316.0, 311.0, 319.0, 311.0, 320.0, 307.0, 307.0, 320.0, 313.0, 317.0, 317.0, 313.0, 307.0, 317.0, 294.0, 293.0, 319.0, 317.0, 310.0, 323.0, 285.0, 282.0, 314.0, 316.0, 314.0, 313.0, 309.0, 321.0, 313.0, 317.0, 319.0, 317.0, 321.0, 309.0, 315.0, 321.0, 301.0, 289.0, 318.0, 315.0, 314.0, 322.0, 316.0, 317.0, 288.0, 294.0, 285.0, 282.0, 319.0, 317.0, 278.0, 292.0, 321.0, 312.0, 318.0, 315.0, 288.0, 294.0, 290.0, 292.0, 303.0, 284.0, 299.0, 280.0, 268.0, 268.0, 288.0, 299.0, 291.0, 296.0, 278.0, 289.0, 315.0, 315.0, 295.0, 292.0, 265.0, 274.0, 294.0, 288.0, 288.0, 282.0, 242.0, 231.0, 316.0, 311.0, 319.0, 308.0, 288.0, 299.0, 303.0, 284.0, 317.0, 316.0, 289.0, 292.0, 283.0, 296.0, 287.0, 289.0, 293.0, 294.0, 314.0, 322.0, 289.0, 293.0, 296.0, 286.0, 314.0, 319.0, 304.0, 280.0, 327.0, 309.0, 316.0, 314.0, 316.0, 311.0, 316.0, 314.0, 269.0, 292.0, 316.0, 317.0, 285.0, 294.0, 313.0, 314.0, 288.0, 294.0, 298.0, 281.0, 293.0, 289.0, 324.0, 303.0, 286.0, 296.0, 316.0, 320.0, 316.0, 314.0, 313.0, 317.0, 290.0, 292.0, 314.0, 319.0, 285.0, 297.0, 317.0, 319.0, 324.0, 309.0, 310.0, 317.0, 299.0, 283.0, 323.0, 307.0, 282.0, 291.0, 294.0, 293.0, 308.0, 319.0, 332.0, 301.0, 309.0, 318.0, 314.0, 325.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0393196321657538, "mean_processing_ms": 0.2760628271968967, "mean_inference_ms": 1.603750642060521}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6120000, "num_steps_sampled": 3264000, "sample_time_ms": 22112.846, "load_time_ms": 37.19, "grad_time_ms": 9333.009, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005571722984313965, "policy_loss": -0.0022253356873989105, "vf_loss": 83.61035919189453, "vf_explained_var": 0.7589413523674011, "kl": 0.0018155118450522423, "entropy": 1.1279449462890625, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3264000, "episodes_total": 8160, "training_iteration": 255, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-53-36", "timestamp": 1660254816, "time_this_iter_s": 31.015226125717163, "time_total_s": 13231.192289829254, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13231.192289829254, "timesteps_since_restore": 3264000, "iterations_since_restore": 255, "perf": {"cpu_util_percent": 32.91162790697674, "ram_util_percent": 59.26976744186045}}
-{"episode_reward_max": 639.0, "episode_reward_min": 530.0, "episode_reward_mean": 612.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 265.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 306.185}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.17, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.01, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.92, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.1, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.76, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.12, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.81, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.41, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.53, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.66, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.12, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.81, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.12, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.81, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 639.0, 630.0, 639.0, 636.0, 569.0, 630.0, 579.0, 633.0, 576.0, 636.0, 579.0, 587.0, 627.0, 630.0, 579.0, 587.0, 633.0, 639.0, 630.0, 633.0, 636.0, 630.0, 630.0, 633.0, 576.0, 539.0, 630.0, 552.0, 590.0, 582.0, 630.0, 587.0, 636.0, 582.0, 582.0, 633.0, 584.0, 636.0, 630.0, 627.0, 630.0, 561.0, 633.0, 579.0, 627.0, 582.0, 579.0, 582.0, 627.0, 582.0, 636.0, 630.0, 630.0, 582.0, 633.0, 582.0, 636.0, 633.0, 627.0, 582.0, 630.0, 573.0, 587.0, 627.0, 633.0, 627.0, 639.0, 576.0, 639.0, 630.0, 570.0, 636.0, 584.0, 636.0, 633.0, 624.0, 579.0, 627.0, 630.0, 582.0, 639.0, 627.0, 630.0, 627.0, 627.0, 630.0, 630.0, 624.0, 587.0, 636.0, 633.0, 567.0, 630.0, 627.0, 630.0, 630.0, 636.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 265.0, 319.0, 320.0, 319.0, 311.0, 317.0, 322.0, 319.0, 317.0, 282.0, 287.0, 311.0, 319.0, 285.0, 294.0, 321.0, 312.0, 285.0, 291.0, 312.0, 324.0, 290.0, 289.0, 296.0, 291.0, 316.0, 311.0, 322.0, 308.0, 293.0, 286.0, 285.0, 302.0, 319.0, 314.0, 320.0, 319.0, 310.0, 320.0, 313.0, 320.0, 317.0, 319.0, 319.0, 311.0, 308.0, 322.0, 310.0, 323.0, 288.0, 288.0, 266.0, 273.0, 310.0, 320.0, 278.0, 274.0, 299.0, 291.0, 295.0, 287.0, 323.0, 307.0, 293.0, 294.0, 314.0, 322.0, 289.0, 293.0, 296.0, 286.0, 314.0, 319.0, 304.0, 280.0, 327.0, 309.0, 316.0, 314.0, 316.0, 311.0, 316.0, 314.0, 269.0, 292.0, 316.0, 317.0, 285.0, 294.0, 313.0, 314.0, 288.0, 294.0, 298.0, 281.0, 293.0, 289.0, 324.0, 303.0, 286.0, 296.0, 316.0, 320.0, 316.0, 314.0, 313.0, 317.0, 290.0, 292.0, 314.0, 319.0, 285.0, 297.0, 317.0, 319.0, 324.0, 309.0, 310.0, 317.0, 299.0, 283.0, 323.0, 307.0, 282.0, 291.0, 294.0, 293.0, 308.0, 319.0, 332.0, 301.0, 309.0, 318.0, 314.0, 325.0, 278.0, 298.0, 314.0, 325.0, 306.0, 324.0, 291.0, 279.0, 327.0, 309.0, 296.0, 288.0, 323.0, 313.0, 316.0, 317.0, 313.0, 311.0, 298.0, 281.0, 316.0, 311.0, 316.0, 314.0, 294.0, 288.0, 322.0, 317.0, 316.0, 311.0, 319.0, 311.0, 320.0, 307.0, 307.0, 320.0, 313.0, 317.0, 317.0, 313.0, 307.0, 317.0, 294.0, 293.0, 319.0, 317.0, 310.0, 323.0, 285.0, 282.0, 314.0, 316.0, 314.0, 313.0, 309.0, 321.0, 313.0, 317.0, 319.0, 317.0, 321.0, 309.0, 315.0, 321.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0370215932262006, "mean_processing_ms": 0.275602527422642, "mean_inference_ms": 1.6014130194901954}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6144000, "num_steps_sampled": 3276800, "sample_time_ms": 22185.118, "load_time_ms": 37.192, "grad_time_ms": 9265.444, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001450125128030777, "policy_loss": -0.006554553750902414, "vf_loss": 85.70040893554688, "vf_explained_var": 0.7625378966331482, "kl": 0.0019486347446218133, "entropy": 1.1307319402694702, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3276800, "episodes_total": 8192, "training_iteration": 256, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-54-07", "timestamp": 1660254847, "time_this_iter_s": 30.66650390625, "time_total_s": 13261.858793735504, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13261.858793735504, "timesteps_since_restore": 3276800, "iterations_since_restore": 256, "perf": {"cpu_util_percent": 32.206818181818186, "ram_util_percent": 58.78863636363636}}
-{"episode_reward_max": 639.0, "episode_reward_min": 476.0, "episode_reward_mean": 611.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 236.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 305.745}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.09, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.22, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.76, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.67, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.0, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.78, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.27, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.27, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.27, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.27, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 627.0, 579.0, 633.0, 627.0, 582.0, 579.0, 582.0, 576.0, 630.0, 579.0, 587.0, 630.0, 630.0, 582.0, 587.0, 576.0, 587.0, 476.0, 636.0, 633.0, 636.0, 636.0, 630.0, 630.0, 639.0, 636.0, 576.0, 627.0, 579.0, 582.0, 633.0, 627.0, 633.0, 627.0, 639.0, 576.0, 639.0, 630.0, 570.0, 636.0, 584.0, 636.0, 633.0, 624.0, 579.0, 627.0, 630.0, 582.0, 639.0, 627.0, 630.0, 627.0, 627.0, 630.0, 630.0, 624.0, 587.0, 636.0, 633.0, 567.0, 630.0, 627.0, 630.0, 630.0, 636.0, 630.0, 636.0, 530.0, 639.0, 630.0, 639.0, 636.0, 569.0, 630.0, 579.0, 633.0, 576.0, 636.0, 579.0, 587.0, 627.0, 630.0, 579.0, 587.0, 633.0, 639.0, 630.0, 633.0, 636.0, 630.0, 630.0, 633.0, 576.0, 539.0, 630.0, 552.0, 590.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 309.0, 313.0, 314.0, 290.0, 289.0, 322.0, 311.0, 310.0, 317.0, 294.0, 288.0, 294.0, 285.0, 292.0, 290.0, 288.0, 288.0, 319.0, 311.0, 284.0, 295.0, 291.0, 296.0, 314.0, 316.0, 311.0, 319.0, 291.0, 291.0, 297.0, 290.0, 292.0, 284.0, 287.0, 300.0, 236.0, 240.0, 316.0, 320.0, 310.0, 323.0, 317.0, 319.0, 324.0, 312.0, 311.0, 319.0, 316.0, 314.0, 319.0, 320.0, 316.0, 320.0, 285.0, 291.0, 313.0, 314.0, 289.0, 290.0, 290.0, 292.0, 319.0, 314.0, 308.0, 319.0, 332.0, 301.0, 309.0, 318.0, 314.0, 325.0, 278.0, 298.0, 314.0, 325.0, 306.0, 324.0, 291.0, 279.0, 327.0, 309.0, 296.0, 288.0, 323.0, 313.0, 316.0, 317.0, 313.0, 311.0, 298.0, 281.0, 316.0, 311.0, 316.0, 314.0, 294.0, 288.0, 322.0, 317.0, 316.0, 311.0, 319.0, 311.0, 320.0, 307.0, 307.0, 320.0, 313.0, 317.0, 317.0, 313.0, 307.0, 317.0, 294.0, 293.0, 319.0, 317.0, 310.0, 323.0, 285.0, 282.0, 314.0, 316.0, 314.0, 313.0, 309.0, 321.0, 313.0, 317.0, 319.0, 317.0, 321.0, 309.0, 315.0, 321.0, 265.0, 265.0, 319.0, 320.0, 319.0, 311.0, 317.0, 322.0, 319.0, 317.0, 282.0, 287.0, 311.0, 319.0, 285.0, 294.0, 321.0, 312.0, 285.0, 291.0, 312.0, 324.0, 290.0, 289.0, 296.0, 291.0, 316.0, 311.0, 322.0, 308.0, 293.0, 286.0, 285.0, 302.0, 319.0, 314.0, 320.0, 319.0, 310.0, 320.0, 313.0, 320.0, 317.0, 319.0, 319.0, 311.0, 308.0, 322.0, 310.0, 323.0, 288.0, 288.0, 266.0, 273.0, 310.0, 320.0, 278.0, 274.0, 299.0, 291.0, 295.0, 287.0, 323.0, 307.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0347705534764586, "mean_processing_ms": 0.2751542992936445, "mean_inference_ms": 1.5997483422105416}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6168000, "num_steps_sampled": 3289600, "sample_time_ms": 22403.591, "load_time_ms": 37.585, "grad_time_ms": 9159.695, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004127854947000742, "policy_loss": -0.004435718059539795, "vf_loss": 91.31246185302734, "vf_explained_var": 0.7648020386695862, "kl": 0.0019896463491022587, "entropy": 1.1353529691696167, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3289600, "episodes_total": 8224, "training_iteration": 257, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-54-43", "timestamp": 1660254883, "time_this_iter_s": 35.68418622016907, "time_total_s": 13297.542979955673, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13297.542979955673, "timesteps_since_restore": 3289600, "iterations_since_restore": 257, "perf": {"cpu_util_percent": 28.105999999999998, "ram_util_percent": 58.784}}
-{"episode_reward_max": 639.0, "episode_reward_min": 476.0, "episode_reward_mean": 605.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 236.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 302.555}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.51, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.34, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.68, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.63, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.82, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.32, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.33, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.06, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.32, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.33, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.32, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.33, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [533.0, 627.0, 633.0, 573.0, 627.0, 630.0, 636.0, 633.0, 582.0, 633.0, 527.0, 633.0, 582.0, 627.0, 582.0, 636.0, 627.0, 582.0, 587.0, 582.0, 630.0, 570.0, 630.0, 573.0, 630.0, 627.0, 633.0, 530.0, 630.0, 513.0, 582.0, 558.0, 630.0, 636.0, 630.0, 636.0, 530.0, 639.0, 630.0, 639.0, 636.0, 569.0, 630.0, 579.0, 633.0, 576.0, 636.0, 579.0, 587.0, 627.0, 630.0, 579.0, 587.0, 633.0, 639.0, 630.0, 633.0, 636.0, 630.0, 630.0, 633.0, 576.0, 539.0, 630.0, 552.0, 590.0, 582.0, 630.0, 630.0, 627.0, 579.0, 633.0, 627.0, 582.0, 579.0, 582.0, 576.0, 630.0, 579.0, 587.0, 630.0, 630.0, 582.0, 587.0, 576.0, 587.0, 476.0, 636.0, 633.0, 636.0, 636.0, 630.0, 630.0, 639.0, 636.0, 576.0, 627.0, 579.0, 582.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [266.0, 267.0, 308.0, 319.0, 309.0, 324.0, 288.0, 285.0, 313.0, 314.0, 311.0, 319.0, 322.0, 314.0, 314.0, 319.0, 298.0, 284.0, 319.0, 314.0, 265.0, 262.0, 316.0, 317.0, 284.0, 298.0, 316.0, 311.0, 284.0, 298.0, 309.0, 327.0, 313.0, 314.0, 299.0, 283.0, 287.0, 300.0, 290.0, 292.0, 316.0, 314.0, 289.0, 281.0, 316.0, 314.0, 281.0, 292.0, 310.0, 320.0, 306.0, 321.0, 326.0, 307.0, 270.0, 260.0, 319.0, 311.0, 260.0, 253.0, 289.0, 293.0, 289.0, 269.0, 313.0, 317.0, 319.0, 317.0, 321.0, 309.0, 315.0, 321.0, 265.0, 265.0, 319.0, 320.0, 319.0, 311.0, 317.0, 322.0, 319.0, 317.0, 282.0, 287.0, 311.0, 319.0, 285.0, 294.0, 321.0, 312.0, 285.0, 291.0, 312.0, 324.0, 290.0, 289.0, 296.0, 291.0, 316.0, 311.0, 322.0, 308.0, 293.0, 286.0, 285.0, 302.0, 319.0, 314.0, 320.0, 319.0, 310.0, 320.0, 313.0, 320.0, 317.0, 319.0, 319.0, 311.0, 308.0, 322.0, 310.0, 323.0, 288.0, 288.0, 266.0, 273.0, 310.0, 320.0, 278.0, 274.0, 299.0, 291.0, 295.0, 287.0, 323.0, 307.0, 321.0, 309.0, 313.0, 314.0, 290.0, 289.0, 322.0, 311.0, 310.0, 317.0, 294.0, 288.0, 294.0, 285.0, 292.0, 290.0, 288.0, 288.0, 319.0, 311.0, 284.0, 295.0, 291.0, 296.0, 314.0, 316.0, 311.0, 319.0, 291.0, 291.0, 297.0, 290.0, 292.0, 284.0, 287.0, 300.0, 236.0, 240.0, 316.0, 320.0, 310.0, 323.0, 317.0, 319.0, 324.0, 312.0, 311.0, 319.0, 316.0, 314.0, 319.0, 320.0, 316.0, 320.0, 285.0, 291.0, 313.0, 314.0, 289.0, 290.0, 290.0, 292.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 1.032534462731249, "mean_processing_ms": 0.2747100807108193, "mean_inference_ms": 1.5981869464429628}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6192000, "num_steps_sampled": 3302400, "sample_time_ms": 22164.935, "load_time_ms": 37.303, "grad_time_ms": 9157.709, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004157478455454111, "policy_loss": -0.004339639097452164, "vf_loss": 90.65621948242188, "vf_explained_var": 0.7486104965209961, "kl": 0.0018090683734044433, "entropy": 1.1370199918746948, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3302400, "episodes_total": 8256, "training_iteration": 258, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-55-14", "timestamp": 1660254914, "time_this_iter_s": 31.362817764282227, "time_total_s": 13328.905797719955, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13328.905797719955, "timesteps_since_restore": 3302400, "iterations_since_restore": 258, "perf": {"cpu_util_percent": 31.451111111111114, "ram_util_percent": 58.77555555555555}}
-{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 602.46, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 301.23}, "custom_metrics": {"sparse_reward_mean": 209.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.46, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.44, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.78, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.62, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.83, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.35, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.12, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.79, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.38, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.4, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.35, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.12, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.35, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.12, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 587.0, 573.0, 465.0, 581.0, 627.0, 633.0, 627.0, 576.0, 627.0, 639.0, 576.0, 636.0, 633.0, 555.0, 630.0, 636.0, 636.0, 627.0, 633.0, 582.0, 584.0, 584.0, 579.0, 633.0, 582.0, 627.0, 636.0, 627.0, 627.0, 587.0, 630.0, 552.0, 590.0, 582.0, 630.0, 630.0, 627.0, 579.0, 633.0, 627.0, 582.0, 579.0, 582.0, 576.0, 630.0, 579.0, 587.0, 630.0, 630.0, 582.0, 587.0, 576.0, 587.0, 476.0, 636.0, 633.0, 636.0, 636.0, 630.0, 630.0, 639.0, 636.0, 576.0, 627.0, 579.0, 582.0, 633.0, 533.0, 627.0, 633.0, 573.0, 627.0, 630.0, 636.0, 633.0, 582.0, 633.0, 527.0, 633.0, 582.0, 627.0, 582.0, 636.0, 627.0, 582.0, 587.0, 582.0, 630.0, 570.0, 630.0, 573.0, 630.0, 627.0, 633.0, 530.0, 630.0, 513.0, 582.0, 558.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 292.0, 296.0, 291.0, 283.0, 290.0, 234.0, 231.0, 290.0, 291.0, 321.0, 306.0, 305.0, 328.0, 316.0, 311.0, 271.0, 305.0, 311.0, 316.0, 311.0, 328.0, 287.0, 289.0, 316.0, 320.0, 313.0, 320.0, 281.0, 274.0, 316.0, 314.0, 324.0, 312.0, 325.0, 311.0, 313.0, 314.0, 314.0, 319.0, 287.0, 295.0, 303.0, 281.0, 290.0, 294.0, 293.0, 286.0, 321.0, 312.0, 297.0, 285.0, 320.0, 307.0, 314.0, 322.0, 313.0, 314.0, 311.0, 316.0, 301.0, 286.0, 319.0, 311.0, 278.0, 274.0, 299.0, 291.0, 295.0, 287.0, 323.0, 307.0, 321.0, 309.0, 313.0, 314.0, 290.0, 289.0, 322.0, 311.0, 310.0, 317.0, 294.0, 288.0, 294.0, 285.0, 292.0, 290.0, 288.0, 288.0, 319.0, 311.0, 284.0, 295.0, 291.0, 296.0, 314.0, 316.0, 311.0, 319.0, 291.0, 291.0, 297.0, 290.0, 292.0, 284.0, 287.0, 300.0, 236.0, 240.0, 316.0, 320.0, 310.0, 323.0, 317.0, 319.0, 324.0, 312.0, 311.0, 319.0, 316.0, 314.0, 319.0, 320.0, 316.0, 320.0, 285.0, 291.0, 313.0, 314.0, 289.0, 290.0, 290.0, 292.0, 319.0, 314.0, 266.0, 267.0, 308.0, 319.0, 309.0, 324.0, 288.0, 285.0, 313.0, 314.0, 311.0, 319.0, 322.0, 314.0, 314.0, 319.0, 298.0, 284.0, 319.0, 314.0, 265.0, 262.0, 316.0, 317.0, 284.0, 298.0, 316.0, 311.0, 284.0, 298.0, 309.0, 327.0, 313.0, 314.0, 299.0, 283.0, 287.0, 300.0, 290.0, 292.0, 316.0, 314.0, 289.0, 281.0, 316.0, 314.0, 281.0, 292.0, 310.0, 320.0, 306.0, 321.0, 326.0, 307.0, 270.0, 260.0, 319.0, 311.0, 260.0, 253.0, 289.0, 293.0, 289.0, 269.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0303222005081312, "mean_processing_ms": 0.27427243552224245, "mean_inference_ms": 1.5968410183934156}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6216000, "num_steps_sampled": 3315200, "sample_time_ms": 22358.82, "load_time_ms": 37.268, "grad_time_ms": 9112.824, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012977579608559608, "policy_loss": -0.0070681399665772915, "vf_loss": 89.34113311767578, "vf_explained_var": 0.751798152923584, "kl": 0.0021080432925373316, "entropy": 1.1364187002182007, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3315200, "episodes_total": 8288, "training_iteration": 259, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-55-48", "timestamp": 1660254948, "time_this_iter_s": 33.836853981018066, "time_total_s": 13362.742651700974, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13362.742651700974, "timesteps_since_restore": 3315200, "iterations_since_restore": 259, "perf": {"cpu_util_percent": 29.470212765957445, "ram_util_percent": 58.776595744680854}}
-{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 603.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 301.89}, "custom_metrics": {"sparse_reward_mean": 209.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.98, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.42, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.89, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.55, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.74, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.14, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.06, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.95, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.33, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.14, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.14, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 633.0, 633.0, 544.0, 587.0, 587.0, 639.0, 587.0, 636.0, 633.0, 573.0, 587.0, 636.0, 567.0, 587.0, 633.0, 630.0, 633.0, 579.0, 627.0, 624.0, 630.0, 582.0, 579.0, 627.0, 582.0, 579.0, 590.0, 630.0, 627.0, 630.0, 579.0, 627.0, 579.0, 582.0, 633.0, 533.0, 627.0, 633.0, 573.0, 627.0, 630.0, 636.0, 633.0, 582.0, 633.0, 527.0, 633.0, 582.0, 627.0, 582.0, 636.0, 627.0, 582.0, 587.0, 582.0, 630.0, 570.0, 630.0, 573.0, 630.0, 627.0, 633.0, 530.0, 630.0, 513.0, 582.0, 558.0, 587.0, 587.0, 573.0, 465.0, 581.0, 627.0, 633.0, 627.0, 576.0, 627.0, 639.0, 576.0, 636.0, 633.0, 555.0, 630.0, 636.0, 636.0, 627.0, 633.0, 582.0, 584.0, 584.0, 579.0, 633.0, 582.0, 627.0, 636.0, 627.0, 627.0, 587.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 311.0, 317.0, 316.0, 309.0, 324.0, 268.0, 276.0, 288.0, 299.0, 299.0, 288.0, 322.0, 317.0, 288.0, 299.0, 319.0, 317.0, 319.0, 314.0, 290.0, 283.0, 287.0, 300.0, 311.0, 325.0, 285.0, 282.0, 290.0, 297.0, 319.0, 314.0, 311.0, 319.0, 312.0, 321.0, 277.0, 302.0, 320.0, 307.0, 319.0, 305.0, 314.0, 316.0, 295.0, 287.0, 296.0, 283.0, 313.0, 314.0, 284.0, 298.0, 289.0, 290.0, 294.0, 296.0, 311.0, 319.0, 303.0, 324.0, 316.0, 314.0, 290.0, 289.0, 313.0, 314.0, 289.0, 290.0, 290.0, 292.0, 319.0, 314.0, 266.0, 267.0, 308.0, 319.0, 309.0, 324.0, 288.0, 285.0, 313.0, 314.0, 311.0, 319.0, 322.0, 314.0, 314.0, 319.0, 298.0, 284.0, 319.0, 314.0, 265.0, 262.0, 316.0, 317.0, 284.0, 298.0, 316.0, 311.0, 284.0, 298.0, 309.0, 327.0, 313.0, 314.0, 299.0, 283.0, 287.0, 300.0, 290.0, 292.0, 316.0, 314.0, 289.0, 281.0, 316.0, 314.0, 281.0, 292.0, 310.0, 320.0, 306.0, 321.0, 326.0, 307.0, 270.0, 260.0, 319.0, 311.0, 260.0, 253.0, 289.0, 293.0, 289.0, 269.0, 295.0, 292.0, 296.0, 291.0, 283.0, 290.0, 234.0, 231.0, 290.0, 291.0, 321.0, 306.0, 305.0, 328.0, 316.0, 311.0, 271.0, 305.0, 311.0, 316.0, 311.0, 328.0, 287.0, 289.0, 316.0, 320.0, 313.0, 320.0, 281.0, 274.0, 316.0, 314.0, 324.0, 312.0, 325.0, 311.0, 313.0, 314.0, 314.0, 319.0, 287.0, 295.0, 303.0, 281.0, 290.0, 294.0, 293.0, 286.0, 321.0, 312.0, 297.0, 285.0, 320.0, 307.0, 314.0, 322.0, 313.0, 314.0, 311.0, 316.0, 301.0, 286.0, 319.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0281105190585473, "mean_processing_ms": 0.27383411395901525, "mean_inference_ms": 1.5952417170076567}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6240000, "num_steps_sampled": 3328000, "sample_time_ms": 22352.291, "load_time_ms": 37.439, "grad_time_ms": 9262.711, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002568518975749612, "policy_loss": -0.005389755126088858, "vf_loss": 85.3111801147461, "vf_explained_var": 0.7668444514274597, "kl": 0.0014818129129707813, "entropy": 1.145686149597168, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3328000, "episodes_total": 8320, "training_iteration": 260, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-56-23", "timestamp": 1660254983, "time_this_iter_s": 34.4713191986084, "time_total_s": 13397.213970899582, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13397.213970899582, "timesteps_since_restore": 3328000, "iterations_since_restore": 260, "perf": {"cpu_util_percent": 30.669387755102036, "ram_util_percent": 58.697959183673476}}
-{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 602.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 168.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 301.485}, "custom_metrics": {"sparse_reward_mean": 209.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 184.57, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.18, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.71, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.11, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.37, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.86, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.32, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.11, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.37, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.11, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.37, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 630.0, 630.0, 582.0, 567.0, 576.0, 633.0, 639.0, 579.0, 624.0, 633.0, 627.0, 627.0, 630.0, 633.0, 630.0, 582.0, 567.0, 351.0, 584.0, 558.0, 639.0, 584.0, 627.0, 633.0, 587.0, 633.0, 587.0, 630.0, 630.0, 582.0, 582.0, 630.0, 513.0, 582.0, 558.0, 587.0, 587.0, 573.0, 465.0, 581.0, 627.0, 633.0, 627.0, 576.0, 627.0, 639.0, 576.0, 636.0, 633.0, 555.0, 630.0, 636.0, 636.0, 627.0, 633.0, 582.0, 584.0, 584.0, 579.0, 633.0, 582.0, 627.0, 636.0, 627.0, 627.0, 587.0, 630.0, 627.0, 633.0, 633.0, 544.0, 587.0, 587.0, 639.0, 587.0, 636.0, 633.0, 573.0, 587.0, 636.0, 567.0, 587.0, 633.0, 630.0, 633.0, 579.0, 627.0, 624.0, 630.0, 582.0, 579.0, 627.0, 582.0, 579.0, 590.0, 630.0, 627.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [324.0, 315.0, 314.0, 316.0, 313.0, 317.0, 280.0, 302.0, 284.0, 283.0, 286.0, 290.0, 316.0, 317.0, 311.0, 328.0, 287.0, 292.0, 312.0, 312.0, 311.0, 322.0, 313.0, 314.0, 313.0, 314.0, 306.0, 324.0, 319.0, 314.0, 317.0, 313.0, 293.0, 289.0, 285.0, 282.0, 168.0, 183.0, 289.0, 295.0, 277.0, 281.0, 327.0, 312.0, 285.0, 299.0, 318.0, 309.0, 313.0, 320.0, 288.0, 299.0, 319.0, 314.0, 294.0, 293.0, 316.0, 314.0, 316.0, 314.0, 298.0, 284.0, 278.0, 304.0, 319.0, 311.0, 260.0, 253.0, 289.0, 293.0, 289.0, 269.0, 295.0, 292.0, 296.0, 291.0, 283.0, 290.0, 234.0, 231.0, 290.0, 291.0, 321.0, 306.0, 305.0, 328.0, 316.0, 311.0, 271.0, 305.0, 311.0, 316.0, 311.0, 328.0, 287.0, 289.0, 316.0, 320.0, 313.0, 320.0, 281.0, 274.0, 316.0, 314.0, 324.0, 312.0, 325.0, 311.0, 313.0, 314.0, 314.0, 319.0, 287.0, 295.0, 303.0, 281.0, 290.0, 294.0, 293.0, 286.0, 321.0, 312.0, 297.0, 285.0, 320.0, 307.0, 314.0, 322.0, 313.0, 314.0, 311.0, 316.0, 301.0, 286.0, 319.0, 311.0, 316.0, 311.0, 317.0, 316.0, 309.0, 324.0, 268.0, 276.0, 288.0, 299.0, 299.0, 288.0, 322.0, 317.0, 288.0, 299.0, 319.0, 317.0, 319.0, 314.0, 290.0, 283.0, 287.0, 300.0, 311.0, 325.0, 285.0, 282.0, 290.0, 297.0, 319.0, 314.0, 311.0, 319.0, 312.0, 321.0, 277.0, 302.0, 320.0, 307.0, 319.0, 305.0, 314.0, 316.0, 295.0, 287.0, 296.0, 283.0, 313.0, 314.0, 284.0, 298.0, 289.0, 290.0, 294.0, 296.0, 311.0, 319.0, 303.0, 324.0, 316.0, 314.0, 290.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0259226733727673, "mean_processing_ms": 0.27340278893355835, "mean_inference_ms": 1.5937276386512644}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6264000, "num_steps_sampled": 3340800, "sample_time_ms": 22448.254, "load_time_ms": 37.569, "grad_time_ms": 9264.987, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003604738973081112, "policy_loss": -0.004238134250044823, "vf_loss": 84.2165298461914, "vf_explained_var": 0.770367443561554, "kl": 0.002041497267782688, "entropy": 1.1575653553009033, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3340800, "episodes_total": 8352, "training_iteration": 261, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-56-55", "timestamp": 1660255015, "time_this_iter_s": 32.84105324745178, "time_total_s": 13430.055024147034, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13430.055024147034, "timesteps_since_restore": 3340800, "iterations_since_restore": 261, "perf": {"cpu_util_percent": 32.742553191489364, "ram_util_percent": 58.7659574468085}}
-{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 605.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 168.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 302.81}, "custom_metrics": {"sparse_reward_mean": 210.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 185.62, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.95, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.41, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.79, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.03, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.63, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.26, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.76, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.03, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.63, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.03, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.63, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 579.0, 582.0, 633.0, 633.0, 630.0, 636.0, 633.0, 576.0, 582.0, 627.0, 587.0, 587.0, 582.0, 633.0, 639.0, 579.0, 582.0, 576.0, 630.0, 630.0, 615.0, 570.0, 633.0, 579.0, 630.0, 633.0, 587.0, 581.0, 627.0, 582.0, 627.0, 627.0, 587.0, 630.0, 627.0, 633.0, 633.0, 544.0, 587.0, 587.0, 639.0, 587.0, 636.0, 633.0, 573.0, 587.0, 636.0, 567.0, 587.0, 633.0, 630.0, 633.0, 579.0, 627.0, 624.0, 630.0, 582.0, 579.0, 627.0, 582.0, 579.0, 590.0, 630.0, 627.0, 630.0, 579.0, 639.0, 630.0, 630.0, 582.0, 567.0, 576.0, 633.0, 639.0, 579.0, 624.0, 633.0, 627.0, 627.0, 630.0, 633.0, 630.0, 582.0, 567.0, 351.0, 584.0, 558.0, 639.0, 584.0, 627.0, 633.0, 587.0, 633.0, 587.0, 630.0, 630.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [323.0, 310.0, 316.0, 317.0, 285.0, 294.0, 286.0, 296.0, 315.0, 318.0, 326.0, 307.0, 316.0, 314.0, 315.0, 321.0, 314.0, 319.0, 301.0, 275.0, 291.0, 291.0, 308.0, 319.0, 308.0, 279.0, 302.0, 285.0, 291.0, 291.0, 314.0, 319.0, 314.0, 325.0, 301.0, 278.0, 300.0, 282.0, 296.0, 280.0, 313.0, 317.0, 316.0, 314.0, 304.0, 311.0, 290.0, 280.0, 313.0, 320.0, 296.0, 283.0, 308.0, 322.0, 319.0, 314.0, 291.0, 296.0, 295.0, 286.0, 316.0, 311.0, 286.0, 296.0, 313.0, 314.0, 311.0, 316.0, 301.0, 286.0, 319.0, 311.0, 316.0, 311.0, 317.0, 316.0, 309.0, 324.0, 268.0, 276.0, 288.0, 299.0, 299.0, 288.0, 322.0, 317.0, 288.0, 299.0, 319.0, 317.0, 319.0, 314.0, 290.0, 283.0, 287.0, 300.0, 311.0, 325.0, 285.0, 282.0, 290.0, 297.0, 319.0, 314.0, 311.0, 319.0, 312.0, 321.0, 277.0, 302.0, 320.0, 307.0, 319.0, 305.0, 314.0, 316.0, 295.0, 287.0, 296.0, 283.0, 313.0, 314.0, 284.0, 298.0, 289.0, 290.0, 294.0, 296.0, 311.0, 319.0, 303.0, 324.0, 316.0, 314.0, 290.0, 289.0, 324.0, 315.0, 314.0, 316.0, 313.0, 317.0, 280.0, 302.0, 284.0, 283.0, 286.0, 290.0, 316.0, 317.0, 311.0, 328.0, 287.0, 292.0, 312.0, 312.0, 311.0, 322.0, 313.0, 314.0, 313.0, 314.0, 306.0, 324.0, 319.0, 314.0, 317.0, 313.0, 293.0, 289.0, 285.0, 282.0, 168.0, 183.0, 289.0, 295.0, 277.0, 281.0, 327.0, 312.0, 285.0, 299.0, 318.0, 309.0, 313.0, 320.0, 288.0, 299.0, 319.0, 314.0, 294.0, 293.0, 316.0, 314.0, 316.0, 314.0, 298.0, 284.0, 278.0, 304.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0237598370369554, "mean_processing_ms": 0.2729750841964574, "mean_inference_ms": 1.5924306371043695}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6288000, "num_steps_sampled": 3353600, "sample_time_ms": 22598.435, "load_time_ms": 37.639, "grad_time_ms": 9270.4, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0002879177627619356, "policy_loss": -0.007559783756732941, "vf_loss": 84.22747802734375, "vf_explained_var": 0.7608786225318909, "kl": 0.0017919730162248015, "entropy": 1.1500838994979858, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3353600, "episodes_total": 8384, "training_iteration": 262, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-57-31", "timestamp": 1660255051, "time_this_iter_s": 35.47017812728882, "time_total_s": 13465.525202274323, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13465.525202274323, "timesteps_since_restore": 3353600, "iterations_since_restore": 262, "perf": {"cpu_util_percent": 25.712, "ram_util_percent": 58.78}}
-{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 606.83, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 168.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 303.415}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 185.23, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.88, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.69, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.26, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.81, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.58, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.89, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.76, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.42, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.89, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.76, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.89, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.76, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 630.0, 636.0, 581.0, 624.0, 633.0, 581.0, 579.0, 459.0, 630.0, 573.0, 633.0, 630.0, 633.0, 633.0, 627.0, 630.0, 627.0, 630.0, 627.0, 633.0, 636.0, 573.0, 579.0, 570.0, 624.0, 630.0, 636.0, 582.0, 636.0, 582.0, 636.0, 630.0, 627.0, 630.0, 579.0, 639.0, 630.0, 630.0, 582.0, 567.0, 576.0, 633.0, 639.0, 579.0, 624.0, 633.0, 627.0, 627.0, 630.0, 633.0, 630.0, 582.0, 567.0, 351.0, 584.0, 558.0, 639.0, 584.0, 627.0, 633.0, 587.0, 633.0, 587.0, 630.0, 630.0, 582.0, 582.0, 633.0, 633.0, 579.0, 582.0, 633.0, 633.0, 630.0, 636.0, 633.0, 576.0, 582.0, 627.0, 587.0, 587.0, 582.0, 633.0, 639.0, 579.0, 582.0, 576.0, 630.0, 630.0, 615.0, 570.0, 633.0, 579.0, 630.0, 633.0, 587.0, 581.0, 627.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 316.0, 314.0, 322.0, 314.0, 285.0, 296.0, 306.0, 318.0, 317.0, 316.0, 288.0, 293.0, 288.0, 291.0, 235.0, 224.0, 324.0, 306.0, 285.0, 288.0, 313.0, 320.0, 312.0, 318.0, 315.0, 318.0, 310.0, 323.0, 311.0, 316.0, 308.0, 322.0, 306.0, 321.0, 315.0, 315.0, 321.0, 306.0, 314.0, 319.0, 321.0, 315.0, 283.0, 290.0, 301.0, 278.0, 282.0, 288.0, 305.0, 319.0, 310.0, 320.0, 327.0, 309.0, 288.0, 294.0, 319.0, 317.0, 298.0, 284.0, 316.0, 320.0, 311.0, 319.0, 303.0, 324.0, 316.0, 314.0, 290.0, 289.0, 324.0, 315.0, 314.0, 316.0, 313.0, 317.0, 280.0, 302.0, 284.0, 283.0, 286.0, 290.0, 316.0, 317.0, 311.0, 328.0, 287.0, 292.0, 312.0, 312.0, 311.0, 322.0, 313.0, 314.0, 313.0, 314.0, 306.0, 324.0, 319.0, 314.0, 317.0, 313.0, 293.0, 289.0, 285.0, 282.0, 168.0, 183.0, 289.0, 295.0, 277.0, 281.0, 327.0, 312.0, 285.0, 299.0, 318.0, 309.0, 313.0, 320.0, 288.0, 299.0, 319.0, 314.0, 294.0, 293.0, 316.0, 314.0, 316.0, 314.0, 298.0, 284.0, 278.0, 304.0, 323.0, 310.0, 316.0, 317.0, 285.0, 294.0, 286.0, 296.0, 315.0, 318.0, 326.0, 307.0, 316.0, 314.0, 315.0, 321.0, 314.0, 319.0, 301.0, 275.0, 291.0, 291.0, 308.0, 319.0, 308.0, 279.0, 302.0, 285.0, 291.0, 291.0, 314.0, 319.0, 314.0, 325.0, 301.0, 278.0, 300.0, 282.0, 296.0, 280.0, 313.0, 317.0, 316.0, 314.0, 304.0, 311.0, 290.0, 280.0, 313.0, 320.0, 296.0, 283.0, 308.0, 322.0, 319.0, 314.0, 291.0, 296.0, 295.0, 286.0, 316.0, 311.0, 286.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0216145913316899, "mean_processing_ms": 0.27255014515744136, "mean_inference_ms": 1.591234908963895}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6312000, "num_steps_sampled": 3366400, "sample_time_ms": 23169.124, "load_time_ms": 37.393, "grad_time_ms": 9419.779, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005937855690717697, "policy_loss": -0.0020803138613700867, "vf_loss": 85.89215087890625, "vf_explained_var": 0.7588068842887878, "kl": 0.001875289366580546, "entropy": 1.142077088356018, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3366400, "episodes_total": 8416, "training_iteration": 263, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-58-05", "timestamp": 1660255085, "time_this_iter_s": 33.64332914352417, "time_total_s": 13499.168531417847, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13499.168531417847, "timesteps_since_restore": 3366400, "iterations_since_restore": 263, "perf": {"cpu_util_percent": 28.2468085106383, "ram_util_percent": 58.74042553191488}}
-{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 605.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 224.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 302.84}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.28, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.71, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.72, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.78, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 16.06, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.61, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.12, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.06, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.61, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.06, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.61, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 633.0, 627.0, 579.0, 573.0, 627.0, 579.0, 582.0, 587.0, 582.0, 579.0, 627.0, 582.0, 627.0, 579.0, 627.0, 525.0, 630.0, 587.0, 576.0, 633.0, 636.0, 519.0, 582.0, 576.0, 636.0, 636.0, 582.0, 630.0, 579.0, 633.0, 633.0, 630.0, 630.0, 582.0, 582.0, 633.0, 633.0, 579.0, 582.0, 633.0, 633.0, 630.0, 636.0, 633.0, 576.0, 582.0, 627.0, 587.0, 587.0, 582.0, 633.0, 639.0, 579.0, 582.0, 576.0, 630.0, 630.0, 615.0, 570.0, 633.0, 579.0, 630.0, 633.0, 587.0, 581.0, 627.0, 582.0, 630.0, 630.0, 636.0, 581.0, 624.0, 633.0, 581.0, 579.0, 459.0, 630.0, 573.0, 633.0, 630.0, 633.0, 633.0, 627.0, 630.0, 627.0, 630.0, 627.0, 633.0, 636.0, 573.0, 579.0, 570.0, 624.0, 630.0, 636.0, 582.0, 636.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 289.0, 319.0, 314.0, 308.0, 319.0, 283.0, 296.0, 290.0, 283.0, 316.0, 311.0, 291.0, 288.0, 291.0, 291.0, 294.0, 293.0, 291.0, 291.0, 295.0, 284.0, 313.0, 314.0, 295.0, 287.0, 313.0, 314.0, 291.0, 288.0, 313.0, 314.0, 264.0, 261.0, 304.0, 326.0, 283.0, 304.0, 292.0, 284.0, 309.0, 324.0, 314.0, 322.0, 242.0, 277.0, 294.0, 288.0, 287.0, 289.0, 317.0, 319.0, 320.0, 316.0, 291.0, 291.0, 316.0, 314.0, 283.0, 296.0, 319.0, 314.0, 319.0, 314.0, 316.0, 314.0, 316.0, 314.0, 298.0, 284.0, 278.0, 304.0, 323.0, 310.0, 316.0, 317.0, 285.0, 294.0, 286.0, 296.0, 315.0, 318.0, 326.0, 307.0, 316.0, 314.0, 315.0, 321.0, 314.0, 319.0, 301.0, 275.0, 291.0, 291.0, 308.0, 319.0, 308.0, 279.0, 302.0, 285.0, 291.0, 291.0, 314.0, 319.0, 314.0, 325.0, 301.0, 278.0, 300.0, 282.0, 296.0, 280.0, 313.0, 317.0, 316.0, 314.0, 304.0, 311.0, 290.0, 280.0, 313.0, 320.0, 296.0, 283.0, 308.0, 322.0, 319.0, 314.0, 291.0, 296.0, 295.0, 286.0, 316.0, 311.0, 286.0, 296.0, 313.0, 317.0, 316.0, 314.0, 322.0, 314.0, 285.0, 296.0, 306.0, 318.0, 317.0, 316.0, 288.0, 293.0, 288.0, 291.0, 235.0, 224.0, 324.0, 306.0, 285.0, 288.0, 313.0, 320.0, 312.0, 318.0, 315.0, 318.0, 310.0, 323.0, 311.0, 316.0, 308.0, 322.0, 306.0, 321.0, 315.0, 315.0, 321.0, 306.0, 314.0, 319.0, 321.0, 315.0, 283.0, 290.0, 301.0, 278.0, 282.0, 288.0, 305.0, 319.0, 310.0, 320.0, 327.0, 309.0, 288.0, 294.0, 319.0, 317.0, 298.0, 284.0, 316.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0194825436603638, "mean_processing_ms": 0.27212618264491645, "mean_inference_ms": 1.590112958914433}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6336000, "num_steps_sampled": 3379200, "sample_time_ms": 23605.349, "load_time_ms": 37.251, "grad_time_ms": 9861.879, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0006588895921595395, "policy_loss": -0.008630036376416683, "vf_loss": 85.43579864501953, "vf_explained_var": 0.770875871181488, "kl": 0.0017466336721554399, "entropy": 1.1448642015457153, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3379200, "episodes_total": 8448, "training_iteration": 264, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-58-41", "timestamp": 1660255121, "time_this_iter_s": 36.31201386451721, "time_total_s": 13535.480545282364, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13535.480545282364, "timesteps_since_restore": 3379200, "iterations_since_restore": 264, "perf": {"cpu_util_percent": 29.313461538461542, "ram_util_percent": 59.09038461538463}}
-{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 605.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 224.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 302.72}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.04, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.23, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.36, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.74, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.66, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 16.21, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.4, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.74, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.41, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.21, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.4, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.21, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.4, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 633.0, 636.0, 639.0, 633.0, 630.0, 630.0, 633.0, 582.0, 576.0, 579.0, 587.0, 587.0, 582.0, 582.0, 633.0, 582.0, 630.0, 630.0, 621.0, 621.0, 582.0, 630.0, 630.0, 630.0, 564.0, 624.0, 582.0, 627.0, 573.0, 630.0, 587.0, 581.0, 627.0, 582.0, 630.0, 630.0, 636.0, 581.0, 624.0, 633.0, 581.0, 579.0, 459.0, 630.0, 573.0, 633.0, 630.0, 633.0, 633.0, 627.0, 630.0, 627.0, 630.0, 627.0, 633.0, 636.0, 573.0, 579.0, 570.0, 624.0, 630.0, 636.0, 582.0, 636.0, 582.0, 636.0, 579.0, 633.0, 627.0, 579.0, 573.0, 627.0, 579.0, 582.0, 587.0, 582.0, 579.0, 627.0, 582.0, 627.0, 579.0, 627.0, 525.0, 630.0, 587.0, 576.0, 633.0, 636.0, 519.0, 582.0, 576.0, 636.0, 636.0, 582.0, 630.0, 579.0, 633.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 283.0, 299.0, 316.0, 317.0, 316.0, 320.0, 319.0, 320.0, 324.0, 309.0, 321.0, 309.0, 309.0, 321.0, 324.0, 309.0, 289.0, 293.0, 292.0, 284.0, 291.0, 288.0, 293.0, 294.0, 294.0, 293.0, 289.0, 293.0, 289.0, 293.0, 316.0, 317.0, 291.0, 291.0, 313.0, 317.0, 316.0, 314.0, 313.0, 308.0, 316.0, 305.0, 292.0, 290.0, 312.0, 318.0, 316.0, 314.0, 314.0, 316.0, 284.0, 280.0, 321.0, 303.0, 301.0, 281.0, 313.0, 314.0, 288.0, 285.0, 314.0, 316.0, 291.0, 296.0, 295.0, 286.0, 316.0, 311.0, 286.0, 296.0, 313.0, 317.0, 316.0, 314.0, 322.0, 314.0, 285.0, 296.0, 306.0, 318.0, 317.0, 316.0, 288.0, 293.0, 288.0, 291.0, 235.0, 224.0, 324.0, 306.0, 285.0, 288.0, 313.0, 320.0, 312.0, 318.0, 315.0, 318.0, 310.0, 323.0, 311.0, 316.0, 308.0, 322.0, 306.0, 321.0, 315.0, 315.0, 321.0, 306.0, 314.0, 319.0, 321.0, 315.0, 283.0, 290.0, 301.0, 278.0, 282.0, 288.0, 305.0, 319.0, 310.0, 320.0, 327.0, 309.0, 288.0, 294.0, 319.0, 317.0, 298.0, 284.0, 316.0, 320.0, 290.0, 289.0, 319.0, 314.0, 308.0, 319.0, 283.0, 296.0, 290.0, 283.0, 316.0, 311.0, 291.0, 288.0, 291.0, 291.0, 294.0, 293.0, 291.0, 291.0, 295.0, 284.0, 313.0, 314.0, 295.0, 287.0, 313.0, 314.0, 291.0, 288.0, 313.0, 314.0, 264.0, 261.0, 304.0, 326.0, 283.0, 304.0, 292.0, 284.0, 309.0, 324.0, 314.0, 322.0, 242.0, 277.0, 294.0, 288.0, 287.0, 289.0, 317.0, 319.0, 320.0, 316.0, 291.0, 291.0, 316.0, 314.0, 283.0, 296.0, 319.0, 314.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0173660844031607, "mean_processing_ms": 0.27170594360174505, "mean_inference_ms": 1.5890528923225553}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6360000, "num_steps_sampled": 3392000, "sample_time_ms": 24130.635, "load_time_ms": 37.048, "grad_time_ms": 9999.028, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0024043002631515265, "policy_loss": -0.005360407754778862, "vf_loss": 83.37548828125, "vf_explained_var": 0.7662093043327332, "kl": 0.001953211845830083, "entropy": 1.145652174949646, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3392000, "episodes_total": 8480, "training_iteration": 265, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-59-19", "timestamp": 1660255159, "time_this_iter_s": 37.638370990753174, "time_total_s": 13573.118916273117, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13573.118916273117, "timesteps_since_restore": 3392000, "iterations_since_restore": 265, "perf": {"cpu_util_percent": 30.500000000000004, "ram_util_percent": 58.69433962264149}}
-{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 604.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 242.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 302.405}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 185.21, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.11, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.52, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.61, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.64, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.66, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.46, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.57, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.46, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.46, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 627.0, 582.0, 582.0, 627.0, 630.0, 630.0, 636.0, 579.0, 624.0, 576.0, 633.0, 630.0, 630.0, 576.0, 633.0, 636.0, 627.0, 582.0, 576.0, 621.0, 639.0, 582.0, 582.0, 630.0, 582.0, 579.0, 533.0, 579.0, 582.0, 630.0, 636.0, 582.0, 636.0, 582.0, 636.0, 579.0, 633.0, 627.0, 579.0, 573.0, 627.0, 579.0, 582.0, 587.0, 582.0, 579.0, 627.0, 582.0, 627.0, 579.0, 627.0, 525.0, 630.0, 587.0, 576.0, 633.0, 636.0, 519.0, 582.0, 576.0, 636.0, 636.0, 582.0, 630.0, 579.0, 633.0, 633.0, 582.0, 582.0, 633.0, 636.0, 639.0, 633.0, 630.0, 630.0, 633.0, 582.0, 576.0, 579.0, 587.0, 587.0, 582.0, 582.0, 633.0, 582.0, 630.0, 630.0, 621.0, 621.0, 582.0, 630.0, 630.0, 630.0, 564.0, 624.0, 582.0, 627.0, 573.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 323.0, 304.0, 288.0, 294.0, 285.0, 297.0, 330.0, 297.0, 321.0, 309.0, 315.0, 315.0, 312.0, 324.0, 295.0, 284.0, 315.0, 309.0, 293.0, 283.0, 322.0, 311.0, 324.0, 306.0, 316.0, 314.0, 288.0, 288.0, 314.0, 319.0, 314.0, 322.0, 313.0, 314.0, 287.0, 295.0, 290.0, 286.0, 313.0, 308.0, 314.0, 325.0, 291.0, 291.0, 286.0, 296.0, 309.0, 321.0, 288.0, 294.0, 285.0, 294.0, 267.0, 266.0, 293.0, 286.0, 296.0, 286.0, 314.0, 316.0, 317.0, 319.0, 288.0, 294.0, 319.0, 317.0, 298.0, 284.0, 316.0, 320.0, 290.0, 289.0, 319.0, 314.0, 308.0, 319.0, 283.0, 296.0, 290.0, 283.0, 316.0, 311.0, 291.0, 288.0, 291.0, 291.0, 294.0, 293.0, 291.0, 291.0, 295.0, 284.0, 313.0, 314.0, 295.0, 287.0, 313.0, 314.0, 291.0, 288.0, 313.0, 314.0, 264.0, 261.0, 304.0, 326.0, 283.0, 304.0, 292.0, 284.0, 309.0, 324.0, 314.0, 322.0, 242.0, 277.0, 294.0, 288.0, 287.0, 289.0, 317.0, 319.0, 320.0, 316.0, 291.0, 291.0, 316.0, 314.0, 283.0, 296.0, 319.0, 314.0, 319.0, 314.0, 293.0, 289.0, 283.0, 299.0, 316.0, 317.0, 316.0, 320.0, 319.0, 320.0, 324.0, 309.0, 321.0, 309.0, 309.0, 321.0, 324.0, 309.0, 289.0, 293.0, 292.0, 284.0, 291.0, 288.0, 293.0, 294.0, 294.0, 293.0, 289.0, 293.0, 289.0, 293.0, 316.0, 317.0, 291.0, 291.0, 313.0, 317.0, 316.0, 314.0, 313.0, 308.0, 316.0, 305.0, 292.0, 290.0, 312.0, 318.0, 316.0, 314.0, 314.0, 316.0, 284.0, 280.0, 321.0, 303.0, 301.0, 281.0, 313.0, 314.0, 288.0, 285.0, 314.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0152699162864507, "mean_processing_ms": 0.27129002186513274, "mean_inference_ms": 1.588063531007753}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6384000, "num_steps_sampled": 3404800, "sample_time_ms": 24421.033, "load_time_ms": 37.381, "grad_time_ms": 10403.492, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0023645039182156324, "policy_loss": -0.005783146247267723, "vf_loss": 87.20269775390625, "vf_explained_var": 0.7547242045402527, "kl": 0.0018250799039378762, "entropy": 1.1452516317367554, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3404800, "episodes_total": 8512, "training_iteration": 266, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-59-56", "timestamp": 1660255196, "time_this_iter_s": 37.62380003929138, "time_total_s": 13610.742716312408, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13610.742716312408, "timesteps_since_restore": 3404800, "iterations_since_restore": 266, "perf": {"cpu_util_percent": 28.747169811320756, "ram_util_percent": 58.75471698113208}}
-{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 608.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 304.32}, "custom_metrics": {"sparse_reward_mean": 211.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.24, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.18, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.56, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.66, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.71, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.65, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.59, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.29, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.59, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.59, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 573.0, 542.0, 630.0, 630.0, 630.0, 636.0, 633.0, 582.0, 522.0, 576.0, 636.0, 630.0, 587.0, 630.0, 581.0, 633.0, 587.0, 569.0, 627.0, 636.0, 582.0, 633.0, 576.0, 584.0, 633.0, 636.0, 636.0, 624.0, 627.0, 639.0, 636.0, 630.0, 579.0, 633.0, 633.0, 582.0, 582.0, 633.0, 636.0, 639.0, 633.0, 630.0, 630.0, 633.0, 582.0, 576.0, 579.0, 587.0, 587.0, 582.0, 582.0, 633.0, 582.0, 630.0, 630.0, 621.0, 621.0, 582.0, 630.0, 630.0, 630.0, 564.0, 624.0, 582.0, 627.0, 573.0, 630.0, 630.0, 627.0, 582.0, 582.0, 627.0, 630.0, 630.0, 636.0, 579.0, 624.0, 576.0, 633.0, 630.0, 630.0, 576.0, 633.0, 636.0, 627.0, 582.0, 576.0, 621.0, 639.0, 582.0, 582.0, 630.0, 582.0, 579.0, 533.0, 579.0, 582.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 313.0, 274.0, 299.0, 276.0, 266.0, 306.0, 324.0, 321.0, 309.0, 314.0, 316.0, 324.0, 312.0, 319.0, 314.0, 283.0, 299.0, 262.0, 260.0, 293.0, 283.0, 308.0, 328.0, 311.0, 319.0, 287.0, 300.0, 313.0, 317.0, 285.0, 296.0, 315.0, 318.0, 302.0, 285.0, 282.0, 287.0, 319.0, 308.0, 317.0, 319.0, 291.0, 291.0, 313.0, 320.0, 276.0, 300.0, 295.0, 289.0, 314.0, 319.0, 316.0, 320.0, 320.0, 316.0, 319.0, 305.0, 313.0, 314.0, 324.0, 315.0, 314.0, 322.0, 316.0, 314.0, 283.0, 296.0, 319.0, 314.0, 319.0, 314.0, 293.0, 289.0, 283.0, 299.0, 316.0, 317.0, 316.0, 320.0, 319.0, 320.0, 324.0, 309.0, 321.0, 309.0, 309.0, 321.0, 324.0, 309.0, 289.0, 293.0, 292.0, 284.0, 291.0, 288.0, 293.0, 294.0, 294.0, 293.0, 289.0, 293.0, 289.0, 293.0, 316.0, 317.0, 291.0, 291.0, 313.0, 317.0, 316.0, 314.0, 313.0, 308.0, 316.0, 305.0, 292.0, 290.0, 312.0, 318.0, 316.0, 314.0, 314.0, 316.0, 284.0, 280.0, 321.0, 303.0, 301.0, 281.0, 313.0, 314.0, 288.0, 285.0, 314.0, 316.0, 313.0, 317.0, 323.0, 304.0, 288.0, 294.0, 285.0, 297.0, 330.0, 297.0, 321.0, 309.0, 315.0, 315.0, 312.0, 324.0, 295.0, 284.0, 315.0, 309.0, 293.0, 283.0, 322.0, 311.0, 324.0, 306.0, 316.0, 314.0, 288.0, 288.0, 314.0, 319.0, 314.0, 322.0, 313.0, 314.0, 287.0, 295.0, 290.0, 286.0, 313.0, 308.0, 314.0, 325.0, 291.0, 291.0, 286.0, 296.0, 309.0, 321.0, 288.0, 294.0, 285.0, 294.0, 267.0, 266.0, 293.0, 286.0, 296.0, 286.0, 314.0, 316.0, 317.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0132060848493245, "mean_processing_ms": 0.27088101809255155, "mean_inference_ms": 1.5871323468376288}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6408000, "num_steps_sampled": 3417600, "sample_time_ms": 24236.974, "load_time_ms": 37.027, "grad_time_ms": 10553.07, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002214438281953335, "policy_loss": -0.00573044503107667, "vf_loss": 85.19269561767578, "vf_explained_var": 0.7630549073219299, "kl": 0.0019984643440693617, "entropy": 1.1487520933151245, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3417600, "episodes_total": 8544, "training_iteration": 267, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-00-32", "timestamp": 1660255232, "time_this_iter_s": 35.333903789520264, "time_total_s": 13646.076620101929, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13646.076620101929, "timesteps_since_restore": 3417600, "iterations_since_restore": 267, "perf": {"cpu_util_percent": 32.378, "ram_util_percent": 58.867999999999995}}
-{"episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 603.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 301.95}, "custom_metrics": {"sparse_reward_mean": 209.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.1, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.06, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.73, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.28, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.89, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.79, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.54, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.31, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.93, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.54, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.54, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 630.0, 576.0, 630.0, 636.0, 582.0, 522.0, 639.0, 587.0, 582.0, 473.0, 633.0, 630.0, 584.0, 581.0, 636.0, 587.0, 579.0, 402.0, 627.0, 630.0, 630.0, 636.0, 627.0, 633.0, 636.0, 582.0, 627.0, 462.0, 630.0, 630.0, 633.0, 582.0, 627.0, 573.0, 630.0, 630.0, 627.0, 582.0, 582.0, 627.0, 630.0, 630.0, 636.0, 579.0, 624.0, 576.0, 633.0, 630.0, 630.0, 576.0, 633.0, 636.0, 627.0, 582.0, 576.0, 621.0, 639.0, 582.0, 582.0, 630.0, 582.0, 579.0, 533.0, 579.0, 582.0, 630.0, 636.0, 630.0, 573.0, 542.0, 630.0, 630.0, 630.0, 636.0, 633.0, 582.0, 522.0, 576.0, 636.0, 630.0, 587.0, 630.0, 581.0, 633.0, 587.0, 569.0, 627.0, 636.0, 582.0, 633.0, 576.0, 584.0, 633.0, 636.0, 636.0, 624.0, 627.0, 639.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 308.0, 322.0, 285.0, 291.0, 321.0, 309.0, 321.0, 315.0, 294.0, 288.0, 266.0, 256.0, 322.0, 317.0, 295.0, 292.0, 296.0, 286.0, 229.0, 244.0, 331.0, 302.0, 316.0, 314.0, 287.0, 297.0, 296.0, 285.0, 327.0, 309.0, 291.0, 296.0, 284.0, 295.0, 202.0, 200.0, 308.0, 319.0, 311.0, 319.0, 322.0, 308.0, 316.0, 320.0, 306.0, 321.0, 317.0, 316.0, 314.0, 322.0, 294.0, 288.0, 316.0, 311.0, 242.0, 220.0, 313.0, 317.0, 324.0, 306.0, 322.0, 311.0, 301.0, 281.0, 313.0, 314.0, 288.0, 285.0, 314.0, 316.0, 313.0, 317.0, 323.0, 304.0, 288.0, 294.0, 285.0, 297.0, 330.0, 297.0, 321.0, 309.0, 315.0, 315.0, 312.0, 324.0, 295.0, 284.0, 315.0, 309.0, 293.0, 283.0, 322.0, 311.0, 324.0, 306.0, 316.0, 314.0, 288.0, 288.0, 314.0, 319.0, 314.0, 322.0, 313.0, 314.0, 287.0, 295.0, 290.0, 286.0, 313.0, 308.0, 314.0, 325.0, 291.0, 291.0, 286.0, 296.0, 309.0, 321.0, 288.0, 294.0, 285.0, 294.0, 267.0, 266.0, 293.0, 286.0, 296.0, 286.0, 314.0, 316.0, 317.0, 319.0, 317.0, 313.0, 274.0, 299.0, 276.0, 266.0, 306.0, 324.0, 321.0, 309.0, 314.0, 316.0, 324.0, 312.0, 319.0, 314.0, 283.0, 299.0, 262.0, 260.0, 293.0, 283.0, 308.0, 328.0, 311.0, 319.0, 287.0, 300.0, 313.0, 317.0, 285.0, 296.0, 315.0, 318.0, 302.0, 285.0, 282.0, 287.0, 319.0, 308.0, 317.0, 319.0, 291.0, 291.0, 313.0, 320.0, 276.0, 300.0, 295.0, 289.0, 314.0, 319.0, 316.0, 320.0, 320.0, 316.0, 319.0, 305.0, 313.0, 314.0, 324.0, 315.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0111539095910416, "mean_processing_ms": 0.2704732512620114, "mean_inference_ms": 1.5859630571947259}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6432000, "num_steps_sampled": 3430400, "sample_time_ms": 24395.085, "load_time_ms": 36.597, "grad_time_ms": 10568.153, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002145373960956931, "policy_loss": -0.006650958210229874, "vf_loss": 93.7173843383789, "vf_explained_var": 0.7514896392822266, "kl": 0.0022997509222477674, "entropy": 1.150799036026001, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3430400, "episodes_total": 8576, "training_iteration": 268, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-01-05", "timestamp": 1660255265, "time_this_iter_s": 33.08881878852844, "time_total_s": 13679.165438890457, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13679.165438890457, "timesteps_since_restore": 3430400, "iterations_since_restore": 268, "perf": {"cpu_util_percent": 29.461702127659574, "ram_util_percent": 58.71702127659575}}
-{"episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 603.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 301.685}, "custom_metrics": {"sparse_reward_mean": 209.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 184.97, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.28, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.71, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.49, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.93, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.77, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.87, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.16, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.47, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.16, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.47, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.16, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.47, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 561.0, 582.0, 627.0, 627.0, 633.0, 630.0, 636.0, 633.0, 558.0, 582.0, 579.0, 639.0, 636.0, 570.0, 582.0, 587.0, 639.0, 627.0, 584.0, 587.0, 579.0, 582.0, 624.0, 579.0, 636.0, 633.0, 633.0, 582.0, 627.0, 576.0, 570.0, 579.0, 582.0, 630.0, 636.0, 630.0, 573.0, 542.0, 630.0, 630.0, 630.0, 636.0, 633.0, 582.0, 522.0, 576.0, 636.0, 630.0, 587.0, 630.0, 581.0, 633.0, 587.0, 569.0, 627.0, 636.0, 582.0, 633.0, 576.0, 584.0, 633.0, 636.0, 636.0, 624.0, 627.0, 639.0, 636.0, 579.0, 630.0, 576.0, 630.0, 636.0, 582.0, 522.0, 639.0, 587.0, 582.0, 473.0, 633.0, 630.0, 584.0, 581.0, 636.0, 587.0, 579.0, 402.0, 627.0, 630.0, 630.0, 636.0, 627.0, 633.0, 636.0, 582.0, 627.0, 462.0, 630.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 312.0, 289.0, 272.0, 286.0, 296.0, 317.0, 310.0, 313.0, 314.0, 324.0, 309.0, 313.0, 317.0, 312.0, 324.0, 311.0, 322.0, 276.0, 282.0, 293.0, 289.0, 293.0, 286.0, 316.0, 323.0, 305.0, 331.0, 291.0, 279.0, 290.0, 292.0, 292.0, 295.0, 317.0, 322.0, 303.0, 324.0, 298.0, 286.0, 293.0, 294.0, 293.0, 286.0, 290.0, 292.0, 308.0, 316.0, 278.0, 301.0, 314.0, 322.0, 312.0, 321.0, 316.0, 317.0, 285.0, 297.0, 313.0, 314.0, 280.0, 296.0, 288.0, 282.0, 293.0, 286.0, 296.0, 286.0, 314.0, 316.0, 317.0, 319.0, 317.0, 313.0, 274.0, 299.0, 276.0, 266.0, 306.0, 324.0, 321.0, 309.0, 314.0, 316.0, 324.0, 312.0, 319.0, 314.0, 283.0, 299.0, 262.0, 260.0, 293.0, 283.0, 308.0, 328.0, 311.0, 319.0, 287.0, 300.0, 313.0, 317.0, 285.0, 296.0, 315.0, 318.0, 302.0, 285.0, 282.0, 287.0, 319.0, 308.0, 317.0, 319.0, 291.0, 291.0, 313.0, 320.0, 276.0, 300.0, 295.0, 289.0, 314.0, 319.0, 316.0, 320.0, 320.0, 316.0, 319.0, 305.0, 313.0, 314.0, 324.0, 315.0, 314.0, 322.0, 288.0, 291.0, 308.0, 322.0, 285.0, 291.0, 321.0, 309.0, 321.0, 315.0, 294.0, 288.0, 266.0, 256.0, 322.0, 317.0, 295.0, 292.0, 296.0, 286.0, 229.0, 244.0, 331.0, 302.0, 316.0, 314.0, 287.0, 297.0, 296.0, 285.0, 327.0, 309.0, 291.0, 296.0, 284.0, 295.0, 202.0, 200.0, 308.0, 319.0, 311.0, 319.0, 322.0, 308.0, 316.0, 320.0, 306.0, 321.0, 317.0, 316.0, 314.0, 322.0, 294.0, 288.0, 316.0, 311.0, 242.0, 220.0, 313.0, 317.0, 324.0, 306.0, 322.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0091026125530362, "mean_processing_ms": 0.27006446196399875, "mean_inference_ms": 1.584328225519923}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6456000, "num_steps_sampled": 3443200, "sample_time_ms": 23964.302, "load_time_ms": 37.107, "grad_time_ms": 10520.809, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005307864397764206, "policy_loss": -0.0031857620924711227, "vf_loss": 90.64007568359375, "vf_explained_var": 0.7599647641181946, "kl": 0.0021453702356666327, "entropy": 1.14076566696167, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3443200, "episodes_total": 8608, "training_iteration": 269, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-01-34", "timestamp": 1660255294, "time_this_iter_s": 29.06058406829834, "time_total_s": 13708.226022958755, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13708.226022958755, "timesteps_since_restore": 3443200, "iterations_since_restore": 269, "perf": {"cpu_util_percent": 32.897560975609764, "ram_util_percent": 58.64878048780488}}
-{"episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 601.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 300.96}, "custom_metrics": {"sparse_reward_mean": 208.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.12, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.36, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.66, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.57, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.89, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.97, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.33, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.33, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.33, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 576.0, 584.0, 587.0, 636.0, 564.0, 630.0, 587.0, 582.0, 639.0, 630.0, 587.0, 579.0, 639.0, 587.0, 636.0, 530.0, 636.0, 630.0, 633.0, 530.0, 633.0, 639.0, 636.0, 582.0, 587.0, 582.0, 627.0, 587.0, 576.0, 639.0, 590.0, 624.0, 627.0, 639.0, 636.0, 579.0, 630.0, 576.0, 630.0, 636.0, 582.0, 522.0, 639.0, 587.0, 582.0, 473.0, 633.0, 630.0, 584.0, 581.0, 636.0, 587.0, 579.0, 402.0, 627.0, 630.0, 630.0, 636.0, 627.0, 633.0, 636.0, 582.0, 627.0, 462.0, 630.0, 630.0, 633.0, 633.0, 561.0, 582.0, 627.0, 627.0, 633.0, 630.0, 636.0, 633.0, 558.0, 582.0, 579.0, 639.0, 636.0, 570.0, 582.0, 587.0, 639.0, 627.0, 584.0, 587.0, 579.0, 582.0, 624.0, 579.0, 636.0, 633.0, 633.0, 582.0, 627.0, 576.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 294.0, 282.0, 293.0, 291.0, 296.0, 291.0, 315.0, 321.0, 282.0, 282.0, 322.0, 308.0, 308.0, 279.0, 288.0, 294.0, 314.0, 325.0, 311.0, 319.0, 293.0, 294.0, 288.0, 291.0, 319.0, 320.0, 288.0, 299.0, 319.0, 317.0, 268.0, 262.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0, 258.0, 272.0, 314.0, 319.0, 325.0, 314.0, 321.0, 315.0, 283.0, 299.0, 293.0, 294.0, 288.0, 294.0, 316.0, 311.0, 290.0, 297.0, 285.0, 291.0, 322.0, 317.0, 291.0, 299.0, 319.0, 305.0, 313.0, 314.0, 324.0, 315.0, 314.0, 322.0, 288.0, 291.0, 308.0, 322.0, 285.0, 291.0, 321.0, 309.0, 321.0, 315.0, 294.0, 288.0, 266.0, 256.0, 322.0, 317.0, 295.0, 292.0, 296.0, 286.0, 229.0, 244.0, 331.0, 302.0, 316.0, 314.0, 287.0, 297.0, 296.0, 285.0, 327.0, 309.0, 291.0, 296.0, 284.0, 295.0, 202.0, 200.0, 308.0, 319.0, 311.0, 319.0, 322.0, 308.0, 316.0, 320.0, 306.0, 321.0, 317.0, 316.0, 314.0, 322.0, 294.0, 288.0, 316.0, 311.0, 242.0, 220.0, 313.0, 317.0, 324.0, 306.0, 322.0, 311.0, 321.0, 312.0, 289.0, 272.0, 286.0, 296.0, 317.0, 310.0, 313.0, 314.0, 324.0, 309.0, 313.0, 317.0, 312.0, 324.0, 311.0, 322.0, 276.0, 282.0, 293.0, 289.0, 293.0, 286.0, 316.0, 323.0, 305.0, 331.0, 291.0, 279.0, 290.0, 292.0, 292.0, 295.0, 317.0, 322.0, 303.0, 324.0, 298.0, 286.0, 293.0, 294.0, 293.0, 286.0, 290.0, 292.0, 308.0, 316.0, 278.0, 301.0, 314.0, 322.0, 312.0, 321.0, 316.0, 317.0, 285.0, 297.0, 313.0, 314.0, 280.0, 296.0, 288.0, 282.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0070340183284676, "mean_processing_ms": 0.26964921546752857, "mean_inference_ms": 1.5821965593270972}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6480000, "num_steps_sampled": 3456000, "sample_time_ms": 23540.277, "load_time_ms": 37.062, "grad_time_ms": 10283.314, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001998053165152669, "policy_loss": -0.00615869602188468, "vf_loss": 87.24394989013672, "vf_explained_var": 0.7729328274726868, "kl": 0.00186056864913553, "entropy": 1.1353095769882202, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3456000, "episodes_total": 8640, "training_iteration": 270, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-02-02", "timestamp": 1660255322, "time_this_iter_s": 27.856099128723145, "time_total_s": 13736.082122087479, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13736.082122087479, "timesteps_since_restore": 3456000, "iterations_since_restore": 270, "perf": {"cpu_util_percent": 34.1025641025641, "ram_util_percent": 58.69999999999998}}
-{"episode_reward_max": 639.0, "episode_reward_min": 436.0, "episode_reward_mean": 604.7, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 211.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 302.35}, "custom_metrics": {"sparse_reward_mean": 209.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 186.3, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.38, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.65, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.67, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.82, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.91, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.47, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.95, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.42, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.47, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.47, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 627.0, 636.0, 633.0, 636.0, 627.0, 633.0, 579.0, 582.0, 573.0, 633.0, 570.0, 624.0, 627.0, 639.0, 636.0, 633.0, 636.0, 630.0, 579.0, 576.0, 436.0, 639.0, 587.0, 636.0, 636.0, 639.0, 587.0, 636.0, 587.0, 639.0, 462.0, 630.0, 630.0, 633.0, 633.0, 561.0, 582.0, 627.0, 627.0, 633.0, 630.0, 636.0, 633.0, 558.0, 582.0, 579.0, 639.0, 636.0, 570.0, 582.0, 587.0, 639.0, 627.0, 584.0, 587.0, 579.0, 582.0, 624.0, 579.0, 636.0, 633.0, 633.0, 582.0, 627.0, 576.0, 570.0, 582.0, 576.0, 584.0, 587.0, 636.0, 564.0, 630.0, 587.0, 582.0, 639.0, 630.0, 587.0, 579.0, 639.0, 587.0, 636.0, 530.0, 636.0, 630.0, 633.0, 530.0, 633.0, 639.0, 636.0, 582.0, 587.0, 582.0, 627.0, 587.0, 576.0, 639.0, 590.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 284.0, 303.0, 315.0, 312.0, 314.0, 322.0, 311.0, 322.0, 319.0, 317.0, 310.0, 317.0, 324.0, 309.0, 289.0, 290.0, 294.0, 288.0, 284.0, 289.0, 303.0, 330.0, 277.0, 293.0, 316.0, 308.0, 321.0, 306.0, 314.0, 325.0, 319.0, 317.0, 314.0, 319.0, 331.0, 305.0, 321.0, 309.0, 293.0, 286.0, 288.0, 288.0, 211.0, 225.0, 312.0, 327.0, 293.0, 294.0, 314.0, 322.0, 317.0, 319.0, 314.0, 325.0, 302.0, 285.0, 314.0, 322.0, 293.0, 294.0, 317.0, 322.0, 242.0, 220.0, 313.0, 317.0, 324.0, 306.0, 322.0, 311.0, 321.0, 312.0, 289.0, 272.0, 286.0, 296.0, 317.0, 310.0, 313.0, 314.0, 324.0, 309.0, 313.0, 317.0, 312.0, 324.0, 311.0, 322.0, 276.0, 282.0, 293.0, 289.0, 293.0, 286.0, 316.0, 323.0, 305.0, 331.0, 291.0, 279.0, 290.0, 292.0, 292.0, 295.0, 317.0, 322.0, 303.0, 324.0, 298.0, 286.0, 293.0, 294.0, 293.0, 286.0, 290.0, 292.0, 308.0, 316.0, 278.0, 301.0, 314.0, 322.0, 312.0, 321.0, 316.0, 317.0, 285.0, 297.0, 313.0, 314.0, 280.0, 296.0, 288.0, 282.0, 293.0, 289.0, 294.0, 282.0, 293.0, 291.0, 296.0, 291.0, 315.0, 321.0, 282.0, 282.0, 322.0, 308.0, 308.0, 279.0, 288.0, 294.0, 314.0, 325.0, 311.0, 319.0, 293.0, 294.0, 288.0, 291.0, 319.0, 320.0, 288.0, 299.0, 319.0, 317.0, 268.0, 262.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0, 258.0, 272.0, 314.0, 319.0, 325.0, 314.0, 321.0, 315.0, 283.0, 299.0, 293.0, 294.0, 288.0, 294.0, 316.0, 311.0, 290.0, 297.0, 285.0, 291.0, 322.0, 317.0, 291.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0049654393783378, "mean_processing_ms": 0.26923341324594424, "mean_inference_ms": 1.5798214340992636}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6504000, "num_steps_sampled": 3468800, "sample_time_ms": 23269.747, "load_time_ms": 36.886, "grad_time_ms": 10264.548, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00183187669608742, "policy_loss": -0.006337564438581467, "vf_loss": 87.34710693359375, "vf_explained_var": 0.7639560103416443, "kl": 0.0018626012606546283, "entropy": 1.1305490732192993, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3468800, "episodes_total": 8672, "training_iteration": 271, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-02-32", "timestamp": 1660255352, "time_this_iter_s": 29.946385145187378, "time_total_s": 13766.028507232666, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13766.028507232666, "timesteps_since_restore": 3468800, "iterations_since_restore": 271, "perf": {"cpu_util_percent": 32.345238095238095, "ram_util_percent": 58.68095238095239}}
-{"episode_reward_max": 639.0, "episode_reward_min": 371.0, "episode_reward_mean": 604.34, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 302.17}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.14, "shaped_reward_min": 131, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.28, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.6, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.6, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.77, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.38, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.58, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.98, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.19, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.51, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.38, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.58, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.38, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.58, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 627.0, 636.0, 636.0, 633.0, 582.0, 582.0, 639.0, 639.0, 582.0, 639.0, 425.0, 587.0, 630.0, 636.0, 582.0, 636.0, 582.0, 371.0, 579.0, 582.0, 636.0, 636.0, 636.0, 636.0, 636.0, 627.0, 579.0, 639.0, 636.0, 582.0, 636.0, 582.0, 627.0, 576.0, 570.0, 582.0, 576.0, 584.0, 587.0, 636.0, 564.0, 630.0, 587.0, 582.0, 639.0, 630.0, 587.0, 579.0, 639.0, 587.0, 636.0, 530.0, 636.0, 630.0, 633.0, 530.0, 633.0, 639.0, 636.0, 582.0, 587.0, 582.0, 627.0, 587.0, 576.0, 639.0, 590.0, 582.0, 587.0, 627.0, 636.0, 633.0, 636.0, 627.0, 633.0, 579.0, 582.0, 573.0, 633.0, 570.0, 624.0, 627.0, 639.0, 636.0, 633.0, 636.0, 630.0, 579.0, 576.0, 436.0, 639.0, 587.0, 636.0, 636.0, 639.0, 587.0, 636.0, 587.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 322.0, 321.0, 306.0, 319.0, 317.0, 324.0, 312.0, 309.0, 324.0, 293.0, 289.0, 291.0, 291.0, 317.0, 322.0, 319.0, 320.0, 293.0, 289.0, 319.0, 320.0, 206.0, 219.0, 301.0, 286.0, 314.0, 316.0, 314.0, 322.0, 285.0, 297.0, 316.0, 320.0, 288.0, 294.0, 198.0, 173.0, 291.0, 288.0, 295.0, 287.0, 321.0, 315.0, 311.0, 325.0, 314.0, 322.0, 322.0, 314.0, 326.0, 310.0, 314.0, 313.0, 286.0, 293.0, 319.0, 320.0, 317.0, 319.0, 291.0, 291.0, 309.0, 327.0, 285.0, 297.0, 313.0, 314.0, 280.0, 296.0, 288.0, 282.0, 293.0, 289.0, 294.0, 282.0, 293.0, 291.0, 296.0, 291.0, 315.0, 321.0, 282.0, 282.0, 322.0, 308.0, 308.0, 279.0, 288.0, 294.0, 314.0, 325.0, 311.0, 319.0, 293.0, 294.0, 288.0, 291.0, 319.0, 320.0, 288.0, 299.0, 319.0, 317.0, 268.0, 262.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0, 258.0, 272.0, 314.0, 319.0, 325.0, 314.0, 321.0, 315.0, 283.0, 299.0, 293.0, 294.0, 288.0, 294.0, 316.0, 311.0, 290.0, 297.0, 285.0, 291.0, 322.0, 317.0, 291.0, 299.0, 294.0, 288.0, 284.0, 303.0, 315.0, 312.0, 314.0, 322.0, 311.0, 322.0, 319.0, 317.0, 310.0, 317.0, 324.0, 309.0, 289.0, 290.0, 294.0, 288.0, 284.0, 289.0, 303.0, 330.0, 277.0, 293.0, 316.0, 308.0, 321.0, 306.0, 314.0, 325.0, 319.0, 317.0, 314.0, 319.0, 331.0, 305.0, 321.0, 309.0, 293.0, 286.0, 288.0, 288.0, 211.0, 225.0, 312.0, 327.0, 293.0, 294.0, 314.0, 322.0, 317.0, 319.0, 314.0, 325.0, 302.0, 285.0, 314.0, 322.0, 293.0, 294.0, 317.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 1.002917566902111, "mean_processing_ms": 0.26882194776394586, "mean_inference_ms": 1.577540173061489}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6528000, "num_steps_sampled": 3481600, "sample_time_ms": 22780.285, "load_time_ms": 36.737, "grad_time_ms": 10254.225, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0052925958298146725, "policy_loss": -0.002937593497335911, "vf_loss": 88.02587890625, "vf_explained_var": 0.7725896835327148, "kl": 0.0019184405682608485, "entropy": 1.144766926765442, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3481600, "episodes_total": 8704, "training_iteration": 272, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-03-02", "timestamp": 1660255382, "time_this_iter_s": 30.47255301475525, "time_total_s": 13796.501060247421, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13796.501060247421, "timesteps_since_restore": 3481600, "iterations_since_restore": 272, "perf": {"cpu_util_percent": 32.334090909090904, "ram_util_percent": 58.665909090909096}}
-{"episode_reward_max": 639.0, "episode_reward_min": 371.0, "episode_reward_mean": 611.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 305.985}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.77, "shaped_reward_min": 131, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.15, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.89, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.49, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.97, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.55, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.72, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.91, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.69, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.91, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.91, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 636.0, 633.0, 636.0, 615.0, 636.0, 630.0, 636.0, 576.0, 633.0, 627.0, 639.0, 582.0, 633.0, 582.0, 633.0, 627.0, 630.0, 636.0, 582.0, 633.0, 627.0, 639.0, 639.0, 627.0, 633.0, 587.0, 576.0, 639.0, 590.0, 582.0, 587.0, 627.0, 636.0, 633.0, 636.0, 627.0, 633.0, 579.0, 582.0, 573.0, 633.0, 570.0, 624.0, 627.0, 639.0, 636.0, 633.0, 636.0, 630.0, 579.0, 576.0, 436.0, 639.0, 587.0, 636.0, 636.0, 639.0, 587.0, 636.0, 587.0, 639.0, 633.0, 627.0, 636.0, 636.0, 633.0, 582.0, 582.0, 639.0, 639.0, 582.0, 639.0, 425.0, 587.0, 630.0, 636.0, 582.0, 636.0, 582.0, 371.0, 579.0, 582.0, 636.0, 636.0, 636.0, 636.0, 636.0, 627.0, 579.0, 639.0, 636.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 298.0, 318.0, 312.0, 318.0, 312.0, 324.0, 309.0, 319.0, 317.0, 314.0, 322.0, 324.0, 312.0, 314.0, 322.0, 316.0, 317.0, 322.0, 314.0, 305.0, 310.0, 325.0, 311.0, 321.0, 309.0, 319.0, 317.0, 296.0, 280.0, 317.0, 316.0, 321.0, 306.0, 324.0, 315.0, 296.0, 286.0, 321.0, 312.0, 288.0, 294.0, 318.0, 315.0, 316.0, 311.0, 320.0, 310.0, 317.0, 319.0, 294.0, 288.0, 318.0, 315.0, 316.0, 311.0, 322.0, 317.0, 327.0, 312.0, 316.0, 311.0, 308.0, 325.0, 290.0, 297.0, 285.0, 291.0, 322.0, 317.0, 291.0, 299.0, 294.0, 288.0, 284.0, 303.0, 315.0, 312.0, 314.0, 322.0, 311.0, 322.0, 319.0, 317.0, 310.0, 317.0, 324.0, 309.0, 289.0, 290.0, 294.0, 288.0, 284.0, 289.0, 303.0, 330.0, 277.0, 293.0, 316.0, 308.0, 321.0, 306.0, 314.0, 325.0, 319.0, 317.0, 314.0, 319.0, 331.0, 305.0, 321.0, 309.0, 293.0, 286.0, 288.0, 288.0, 211.0, 225.0, 312.0, 327.0, 293.0, 294.0, 314.0, 322.0, 317.0, 319.0, 314.0, 325.0, 302.0, 285.0, 314.0, 322.0, 293.0, 294.0, 317.0, 322.0, 311.0, 322.0, 321.0, 306.0, 319.0, 317.0, 324.0, 312.0, 309.0, 324.0, 293.0, 289.0, 291.0, 291.0, 317.0, 322.0, 319.0, 320.0, 293.0, 289.0, 319.0, 320.0, 206.0, 219.0, 301.0, 286.0, 314.0, 316.0, 314.0, 322.0, 285.0, 297.0, 316.0, 320.0, 288.0, 294.0, 198.0, 173.0, 291.0, 288.0, 295.0, 287.0, 321.0, 315.0, 311.0, 325.0, 314.0, 322.0, 322.0, 314.0, 326.0, 310.0, 314.0, 313.0, 286.0, 293.0, 319.0, 320.0, 317.0, 319.0, 291.0, 291.0, 309.0, 327.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0008984560803753, "mean_processing_ms": 0.26841904280245515, "mean_inference_ms": 1.5754578335271856}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6552000, "num_steps_sampled": 3494400, "sample_time_ms": 22479.799, "load_time_ms": 36.968, "grad_time_ms": 10345.078, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.007351151201874018, "policy_loss": -0.0004584121925290674, "vf_loss": 83.76141357421875, "vf_explained_var": 0.7616392970085144, "kl": 0.0025154289323836565, "entropy": 1.1331415176391602, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3494400, "episodes_total": 8736, "training_iteration": 273, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-03-34", "timestamp": 1660255414, "time_this_iter_s": 31.54677987098694, "time_total_s": 13828.047840118408, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13828.047840118408, "timesteps_since_restore": 3494400, "iterations_since_restore": 273, "perf": {"cpu_util_percent": 36.990909090909085, "ram_util_percent": 59.28409090909091}}
-{"episode_reward_max": 639.0, "episode_reward_min": 371.0, "episode_reward_mean": 613.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 306.765}, "custom_metrics": {"sparse_reward_mean": 212.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.73, "shaped_reward_min": 131, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.81, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.5, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.98, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.34, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.74, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 636.0, 630.0, 630.0, 582.0, 522.0, 587.0, 630.0, 627.0, 633.0, 633.0, 630.0, 587.0, 582.0, 633.0, 636.0, 636.0, 621.0, 627.0, 587.0, 573.0, 630.0, 639.0, 630.0, 630.0, 636.0, 567.0, 579.0, 582.0, 579.0, 633.0, 639.0, 587.0, 636.0, 587.0, 639.0, 633.0, 627.0, 636.0, 636.0, 633.0, 582.0, 582.0, 639.0, 639.0, 582.0, 639.0, 425.0, 587.0, 630.0, 636.0, 582.0, 636.0, 582.0, 371.0, 579.0, 582.0, 636.0, 636.0, 636.0, 636.0, 636.0, 627.0, 579.0, 639.0, 636.0, 582.0, 636.0, 587.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 636.0, 633.0, 636.0, 615.0, 636.0, 630.0, 636.0, 576.0, 633.0, 627.0, 639.0, 582.0, 633.0, 582.0, 633.0, 627.0, 630.0, 636.0, 582.0, 633.0, 627.0, 639.0, 639.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 320.0, 316.0, 314.0, 316.0, 313.0, 317.0, 286.0, 296.0, 260.0, 262.0, 293.0, 294.0, 311.0, 319.0, 321.0, 306.0, 318.0, 315.0, 314.0, 319.0, 319.0, 311.0, 301.0, 286.0, 286.0, 296.0, 311.0, 322.0, 319.0, 317.0, 322.0, 314.0, 305.0, 316.0, 316.0, 311.0, 298.0, 289.0, 279.0, 294.0, 324.0, 306.0, 321.0, 318.0, 319.0, 311.0, 316.0, 314.0, 321.0, 315.0, 281.0, 286.0, 282.0, 297.0, 280.0, 302.0, 291.0, 288.0, 303.0, 330.0, 322.0, 317.0, 302.0, 285.0, 314.0, 322.0, 293.0, 294.0, 317.0, 322.0, 311.0, 322.0, 321.0, 306.0, 319.0, 317.0, 324.0, 312.0, 309.0, 324.0, 293.0, 289.0, 291.0, 291.0, 317.0, 322.0, 319.0, 320.0, 293.0, 289.0, 319.0, 320.0, 206.0, 219.0, 301.0, 286.0, 314.0, 316.0, 314.0, 322.0, 285.0, 297.0, 316.0, 320.0, 288.0, 294.0, 198.0, 173.0, 291.0, 288.0, 295.0, 287.0, 321.0, 315.0, 311.0, 325.0, 314.0, 322.0, 322.0, 314.0, 326.0, 310.0, 314.0, 313.0, 286.0, 293.0, 319.0, 320.0, 317.0, 319.0, 291.0, 291.0, 309.0, 327.0, 289.0, 298.0, 318.0, 312.0, 318.0, 312.0, 324.0, 309.0, 319.0, 317.0, 314.0, 322.0, 324.0, 312.0, 314.0, 322.0, 316.0, 317.0, 322.0, 314.0, 305.0, 310.0, 325.0, 311.0, 321.0, 309.0, 319.0, 317.0, 296.0, 280.0, 317.0, 316.0, 321.0, 306.0, 324.0, 315.0, 296.0, 286.0, 321.0, 312.0, 288.0, 294.0, 318.0, 315.0, 316.0, 311.0, 320.0, 310.0, 317.0, 319.0, 294.0, 288.0, 318.0, 315.0, 316.0, 311.0, 322.0, 317.0, 327.0, 312.0, 316.0, 311.0, 308.0, 325.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9989007894762186, "mean_processing_ms": 0.2680204764201923, "mean_inference_ms": 1.5734861760997552}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6576000, "num_steps_sampled": 3507200, "sample_time_ms": 22242.32, "load_time_ms": 36.671, "grad_time_ms": 10040.843, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004968150169588625, "policy_loss": -0.007466705050319433, "vf_loss": 85.29949188232422, "vf_explained_var": 0.7543535232543945, "kl": 0.0017724571516737342, "entropy": 1.1328660249710083, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3507200, "episodes_total": 8768, "training_iteration": 274, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-04-05", "timestamp": 1660255445, "time_this_iter_s": 30.891488075256348, "time_total_s": 13858.939328193665, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13858.939328193665, "timesteps_since_restore": 3507200, "iterations_since_restore": 274, "perf": {"cpu_util_percent": 32.32954545454545, "ram_util_percent": 58.86590909090909}}
-{"episode_reward_max": 639.0, "episode_reward_min": 362.0, "episode_reward_mean": 610.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 180.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 305.345}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.09, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.19, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.76, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.45, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.88, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.74, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.31, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.72, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.41, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.65, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.31, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.72, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.31, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.72, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 636.0, 522.0, 390.0, 582.0, 516.0, 630.0, 627.0, 587.0, 633.0, 582.0, 636.0, 582.0, 447.0, 630.0, 639.0, 636.0, 636.0, 633.0, 362.0, 627.0, 636.0, 630.0, 639.0, 579.0, 621.0, 639.0, 639.0, 587.0, 636.0, 587.0, 630.0, 639.0, 636.0, 582.0, 636.0, 587.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 636.0, 633.0, 636.0, 615.0, 636.0, 630.0, 636.0, 576.0, 633.0, 627.0, 639.0, 582.0, 633.0, 582.0, 633.0, 627.0, 630.0, 636.0, 582.0, 633.0, 627.0, 639.0, 639.0, 627.0, 633.0, 633.0, 636.0, 630.0, 630.0, 582.0, 522.0, 587.0, 630.0, 627.0, 633.0, 633.0, 630.0, 587.0, 582.0, 633.0, 636.0, 636.0, 621.0, 627.0, 587.0, 573.0, 630.0, 639.0, 630.0, 630.0, 636.0, 567.0, 579.0, 582.0, 579.0, 633.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 314.0, 322.0, 260.0, 262.0, 197.0, 193.0, 291.0, 291.0, 256.0, 260.0, 314.0, 316.0, 308.0, 319.0, 286.0, 301.0, 311.0, 322.0, 289.0, 293.0, 322.0, 314.0, 288.0, 294.0, 221.0, 226.0, 318.0, 312.0, 317.0, 322.0, 317.0, 319.0, 318.0, 318.0, 309.0, 324.0, 182.0, 180.0, 316.0, 311.0, 316.0, 320.0, 319.0, 311.0, 319.0, 320.0, 294.0, 285.0, 301.0, 320.0, 327.0, 312.0, 314.0, 325.0, 281.0, 306.0, 316.0, 320.0, 301.0, 286.0, 316.0, 314.0, 319.0, 320.0, 317.0, 319.0, 291.0, 291.0, 309.0, 327.0, 289.0, 298.0, 318.0, 312.0, 318.0, 312.0, 324.0, 309.0, 319.0, 317.0, 314.0, 322.0, 324.0, 312.0, 314.0, 322.0, 316.0, 317.0, 322.0, 314.0, 305.0, 310.0, 325.0, 311.0, 321.0, 309.0, 319.0, 317.0, 296.0, 280.0, 317.0, 316.0, 321.0, 306.0, 324.0, 315.0, 296.0, 286.0, 321.0, 312.0, 288.0, 294.0, 318.0, 315.0, 316.0, 311.0, 320.0, 310.0, 317.0, 319.0, 294.0, 288.0, 318.0, 315.0, 316.0, 311.0, 322.0, 317.0, 327.0, 312.0, 316.0, 311.0, 308.0, 325.0, 316.0, 317.0, 320.0, 316.0, 314.0, 316.0, 313.0, 317.0, 286.0, 296.0, 260.0, 262.0, 293.0, 294.0, 311.0, 319.0, 321.0, 306.0, 318.0, 315.0, 314.0, 319.0, 319.0, 311.0, 301.0, 286.0, 286.0, 296.0, 311.0, 322.0, 319.0, 317.0, 322.0, 314.0, 305.0, 316.0, 316.0, 311.0, 298.0, 289.0, 279.0, 294.0, 324.0, 306.0, 321.0, 318.0, 319.0, 311.0, 316.0, 314.0, 321.0, 315.0, 281.0, 286.0, 282.0, 297.0, 280.0, 302.0, 291.0, 288.0, 303.0, 330.0, 322.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9969205996800542, "mean_processing_ms": 0.26762558358937055, "mean_inference_ms": 1.571468951953135}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6600000, "num_steps_sampled": 3520000, "sample_time_ms": 21659.121, "load_time_ms": 36.576, "grad_time_ms": 9832.58, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005525531247258186, "policy_loss": -0.002899330807849765, "vf_loss": 89.90011596679688, "vf_explained_var": 0.7821382880210876, "kl": 0.002336545381695032, "entropy": 1.130285382270813, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3520000, "episodes_total": 8800, "training_iteration": 275, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-04-34", "timestamp": 1660255474, "time_this_iter_s": 29.723124265670776, "time_total_s": 13888.662452459335, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13888.662452459335, "timesteps_since_restore": 3520000, "iterations_since_restore": 275, "perf": {"cpu_util_percent": 34.31666666666666, "ram_util_percent": 58.82619047619047}}
-{"episode_reward_max": 639.0, "episode_reward_min": 362.0, "episode_reward_mean": 609.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 180.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 304.695}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 186.59, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.44, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.64, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.65, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.77, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.84, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.43, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.5, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.21, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.79, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.31, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.43, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.5, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.43, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.5, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 630.0, 639.0, 630.0, 587.0, 582.0, 639.0, 636.0, 639.0, 582.0, 630.0, 627.0, 627.0, 633.0, 639.0, 570.0, 579.0, 633.0, 627.0, 627.0, 636.0, 636.0, 630.0, 633.0, 633.0, 621.0, 573.0, 636.0, 627.0, 530.0, 627.0, 636.0, 639.0, 639.0, 627.0, 633.0, 633.0, 636.0, 630.0, 630.0, 582.0, 522.0, 587.0, 630.0, 627.0, 633.0, 633.0, 630.0, 587.0, 582.0, 633.0, 636.0, 636.0, 621.0, 627.0, 587.0, 573.0, 630.0, 639.0, 630.0, 630.0, 636.0, 567.0, 579.0, 582.0, 579.0, 633.0, 639.0, 633.0, 636.0, 522.0, 390.0, 582.0, 516.0, 630.0, 627.0, 587.0, 633.0, 582.0, 636.0, 582.0, 447.0, 630.0, 639.0, 636.0, 636.0, 633.0, 362.0, 627.0, 636.0, 630.0, 639.0, 579.0, 621.0, 639.0, 639.0, 587.0, 636.0, 587.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [325.0, 314.0, 309.0, 321.0, 315.0, 324.0, 310.0, 320.0, 297.0, 290.0, 284.0, 298.0, 322.0, 317.0, 319.0, 317.0, 319.0, 320.0, 283.0, 299.0, 314.0, 316.0, 314.0, 313.0, 308.0, 319.0, 322.0, 311.0, 321.0, 318.0, 287.0, 283.0, 283.0, 296.0, 319.0, 314.0, 311.0, 316.0, 314.0, 313.0, 322.0, 314.0, 317.0, 319.0, 318.0, 312.0, 309.0, 324.0, 319.0, 314.0, 297.0, 324.0, 279.0, 294.0, 316.0, 320.0, 318.0, 309.0, 251.0, 279.0, 317.0, 310.0, 319.0, 317.0, 322.0, 317.0, 327.0, 312.0, 316.0, 311.0, 308.0, 325.0, 316.0, 317.0, 320.0, 316.0, 314.0, 316.0, 313.0, 317.0, 286.0, 296.0, 260.0, 262.0, 293.0, 294.0, 311.0, 319.0, 321.0, 306.0, 318.0, 315.0, 314.0, 319.0, 319.0, 311.0, 301.0, 286.0, 286.0, 296.0, 311.0, 322.0, 319.0, 317.0, 322.0, 314.0, 305.0, 316.0, 316.0, 311.0, 298.0, 289.0, 279.0, 294.0, 324.0, 306.0, 321.0, 318.0, 319.0, 311.0, 316.0, 314.0, 321.0, 315.0, 281.0, 286.0, 282.0, 297.0, 280.0, 302.0, 291.0, 288.0, 303.0, 330.0, 322.0, 317.0, 314.0, 319.0, 314.0, 322.0, 260.0, 262.0, 197.0, 193.0, 291.0, 291.0, 256.0, 260.0, 314.0, 316.0, 308.0, 319.0, 286.0, 301.0, 311.0, 322.0, 289.0, 293.0, 322.0, 314.0, 288.0, 294.0, 221.0, 226.0, 318.0, 312.0, 317.0, 322.0, 317.0, 319.0, 318.0, 318.0, 309.0, 324.0, 182.0, 180.0, 316.0, 311.0, 316.0, 320.0, 319.0, 311.0, 319.0, 320.0, 294.0, 285.0, 301.0, 320.0, 327.0, 312.0, 314.0, 325.0, 281.0, 306.0, 316.0, 320.0, 301.0, 286.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9949570051845532, "mean_processing_ms": 0.2672332587743119, "mean_inference_ms": 1.569562529998314}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6624000, "num_steps_sampled": 3532800, "sample_time_ms": 21368.312, "load_time_ms": 36.431, "grad_time_ms": 9398.907, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0024656467139720917, "policy_loss": -0.005234332289546728, "vf_loss": 82.6478500366211, "vf_explained_var": 0.7628920078277588, "kl": 0.001980842323973775, "entropy": 1.1296080350875854, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3532800, "episodes_total": 8832, "training_iteration": 276, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-05-05", "timestamp": 1660255505, "time_this_iter_s": 30.375401973724365, "time_total_s": 13919.03785443306, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13919.03785443306, "timesteps_since_restore": 3532800, "iterations_since_restore": 276, "perf": {"cpu_util_percent": 33.07209302325582, "ram_util_percent": 58.767441860465105}}
-{"episode_reward_max": 639.0, "episode_reward_min": 362.0, "episode_reward_mean": 610.72, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 180.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 305.36}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.12, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.44, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.89, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.9, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.76, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.85, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.17, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.41, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.51, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 639.0, 636.0, 633.0, 633.0, 636.0, 627.0, 627.0, 587.0, 636.0, 552.0, 636.0, 636.0, 636.0, 639.0, 636.0, 587.0, 636.0, 621.0, 630.0, 636.0, 633.0, 527.0, 636.0, 633.0, 630.0, 587.0, 630.0, 627.0, 582.0, 630.0, 639.0, 582.0, 579.0, 633.0, 639.0, 633.0, 636.0, 522.0, 390.0, 582.0, 516.0, 630.0, 627.0, 587.0, 633.0, 582.0, 636.0, 582.0, 447.0, 630.0, 639.0, 636.0, 636.0, 633.0, 362.0, 627.0, 636.0, 630.0, 639.0, 579.0, 621.0, 639.0, 639.0, 587.0, 636.0, 587.0, 630.0, 639.0, 630.0, 639.0, 630.0, 587.0, 582.0, 639.0, 636.0, 639.0, 582.0, 630.0, 627.0, 627.0, 633.0, 639.0, 570.0, 579.0, 633.0, 627.0, 627.0, 636.0, 636.0, 630.0, 633.0, 633.0, 621.0, 573.0, 636.0, 627.0, 530.0, 627.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 297.0, 319.0, 320.0, 321.0, 315.0, 311.0, 322.0, 317.0, 316.0, 316.0, 320.0, 323.0, 304.0, 316.0, 311.0, 291.0, 296.0, 324.0, 312.0, 274.0, 278.0, 324.0, 312.0, 325.0, 311.0, 311.0, 325.0, 314.0, 325.0, 323.0, 313.0, 293.0, 294.0, 309.0, 327.0, 310.0, 311.0, 318.0, 312.0, 322.0, 314.0, 304.0, 329.0, 266.0, 261.0, 329.0, 307.0, 311.0, 322.0, 316.0, 314.0, 294.0, 293.0, 316.0, 314.0, 314.0, 313.0, 296.0, 286.0, 314.0, 316.0, 322.0, 317.0, 280.0, 302.0, 291.0, 288.0, 303.0, 330.0, 322.0, 317.0, 314.0, 319.0, 314.0, 322.0, 260.0, 262.0, 197.0, 193.0, 291.0, 291.0, 256.0, 260.0, 314.0, 316.0, 308.0, 319.0, 286.0, 301.0, 311.0, 322.0, 289.0, 293.0, 322.0, 314.0, 288.0, 294.0, 221.0, 226.0, 318.0, 312.0, 317.0, 322.0, 317.0, 319.0, 318.0, 318.0, 309.0, 324.0, 182.0, 180.0, 316.0, 311.0, 316.0, 320.0, 319.0, 311.0, 319.0, 320.0, 294.0, 285.0, 301.0, 320.0, 327.0, 312.0, 314.0, 325.0, 281.0, 306.0, 316.0, 320.0, 301.0, 286.0, 316.0, 314.0, 325.0, 314.0, 309.0, 321.0, 315.0, 324.0, 310.0, 320.0, 297.0, 290.0, 284.0, 298.0, 322.0, 317.0, 319.0, 317.0, 319.0, 320.0, 283.0, 299.0, 314.0, 316.0, 314.0, 313.0, 308.0, 319.0, 322.0, 311.0, 321.0, 318.0, 287.0, 283.0, 283.0, 296.0, 319.0, 314.0, 311.0, 316.0, 314.0, 313.0, 322.0, 314.0, 317.0, 319.0, 318.0, 312.0, 309.0, 324.0, 319.0, 314.0, 297.0, 324.0, 279.0, 294.0, 316.0, 320.0, 318.0, 309.0, 251.0, 279.0, 317.0, 310.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.993008865355136, "mean_processing_ms": 0.26684329670316986, "mean_inference_ms": 1.5676868637222179}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6648000, "num_steps_sampled": 3545600, "sample_time_ms": 21068.026, "load_time_ms": 36.686, "grad_time_ms": 9153.242, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004478854592889547, "policy_loss": -0.0033003378193825483, "vf_loss": 83.42573547363281, "vf_explained_var": 0.7645106911659241, "kl": 0.002364285057410598, "entropy": 1.1267634630203247, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3545600, "episodes_total": 8864, "training_iteration": 277, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-05-35", "timestamp": 1660255535, "time_this_iter_s": 29.876389980316162, "time_total_s": 13948.914244413376, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13948.914244413376, "timesteps_since_restore": 3545600, "iterations_since_restore": 277, "perf": {"cpu_util_percent": 33.73571428571429, "ram_util_percent": 58.82857142857141}}
-{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 617.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 243.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.89}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.38, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.68, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 19.09, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.87, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.09, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.92, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.85, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.3, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.35, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.47, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.87, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.89, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.25, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.18, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.47, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.87, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.47, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.87, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 587.0, 627.0, 576.0, 544.0, 636.0, 630.0, 633.0, 633.0, 615.0, 582.0, 636.0, 639.0, 636.0, 633.0, 579.0, 630.0, 633.0, 633.0, 636.0, 639.0, 579.0, 636.0, 630.0, 639.0, 633.0, 582.0, 630.0, 627.0, 516.0, 587.0, 639.0, 587.0, 636.0, 587.0, 630.0, 639.0, 630.0, 639.0, 630.0, 587.0, 582.0, 639.0, 636.0, 639.0, 582.0, 630.0, 627.0, 627.0, 633.0, 639.0, 570.0, 579.0, 633.0, 627.0, 627.0, 636.0, 636.0, 630.0, 633.0, 633.0, 621.0, 573.0, 636.0, 627.0, 530.0, 627.0, 636.0, 584.0, 639.0, 636.0, 633.0, 633.0, 636.0, 627.0, 627.0, 587.0, 636.0, 552.0, 636.0, 636.0, 636.0, 639.0, 636.0, 587.0, 636.0, 621.0, 630.0, 636.0, 633.0, 527.0, 636.0, 633.0, 630.0, 587.0, 630.0, 627.0, 582.0, 630.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 291.0, 296.0, 321.0, 306.0, 288.0, 288.0, 266.0, 278.0, 320.0, 316.0, 316.0, 314.0, 319.0, 314.0, 316.0, 317.0, 310.0, 305.0, 298.0, 284.0, 316.0, 320.0, 327.0, 312.0, 319.0, 317.0, 319.0, 314.0, 282.0, 297.0, 310.0, 320.0, 316.0, 317.0, 322.0, 311.0, 308.0, 328.0, 325.0, 314.0, 296.0, 283.0, 314.0, 322.0, 320.0, 310.0, 317.0, 322.0, 308.0, 325.0, 290.0, 292.0, 313.0, 317.0, 308.0, 319.0, 243.0, 273.0, 291.0, 296.0, 320.0, 319.0, 281.0, 306.0, 316.0, 320.0, 301.0, 286.0, 316.0, 314.0, 325.0, 314.0, 309.0, 321.0, 315.0, 324.0, 310.0, 320.0, 297.0, 290.0, 284.0, 298.0, 322.0, 317.0, 319.0, 317.0, 319.0, 320.0, 283.0, 299.0, 314.0, 316.0, 314.0, 313.0, 308.0, 319.0, 322.0, 311.0, 321.0, 318.0, 287.0, 283.0, 283.0, 296.0, 319.0, 314.0, 311.0, 316.0, 314.0, 313.0, 322.0, 314.0, 317.0, 319.0, 318.0, 312.0, 309.0, 324.0, 319.0, 314.0, 297.0, 324.0, 279.0, 294.0, 316.0, 320.0, 318.0, 309.0, 251.0, 279.0, 317.0, 310.0, 319.0, 317.0, 287.0, 297.0, 319.0, 320.0, 321.0, 315.0, 311.0, 322.0, 317.0, 316.0, 316.0, 320.0, 323.0, 304.0, 316.0, 311.0, 291.0, 296.0, 324.0, 312.0, 274.0, 278.0, 324.0, 312.0, 325.0, 311.0, 311.0, 325.0, 314.0, 325.0, 323.0, 313.0, 293.0, 294.0, 309.0, 327.0, 310.0, 311.0, 318.0, 312.0, 322.0, 314.0, 304.0, 329.0, 266.0, 261.0, 329.0, 307.0, 311.0, 322.0, 316.0, 314.0, 294.0, 293.0, 316.0, 314.0, 314.0, 313.0, 296.0, 286.0, 314.0, 316.0, 322.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9910719747703968, "mean_processing_ms": 0.26645500095607105, "mean_inference_ms": 1.5658690640351203}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6672000, "num_steps_sampled": 3558400, "sample_time_ms": 20841.044, "load_time_ms": 37.005, "grad_time_ms": 9278.753, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004964211490005255, "policy_loss": -0.0031354122329503298, "vf_loss": 86.61837768554688, "vf_explained_var": 0.7653247714042664, "kl": 0.0020841285586357117, "entropy": 1.1244021654129028, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3558400, "episodes_total": 8896, "training_iteration": 278, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-06-07", "timestamp": 1660255567, "time_this_iter_s": 32.07894992828369, "time_total_s": 13980.99319434166, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13980.99319434166, "timesteps_since_restore": 3558400, "iterations_since_restore": 278, "perf": {"cpu_util_percent": 33.79111111111111, "ram_util_percent": 58.81111111111109}}
-{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 617.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 243.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.905}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.41, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.73, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.95, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.9, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.11, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.86, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.76, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.29, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.6, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.75, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.26, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.22, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.6, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.75, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.6, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.75, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 567.0, 627.0, 633.0, 584.0, 639.0, 582.0, 633.0, 636.0, 579.0, 636.0, 627.0, 630.0, 582.0, 633.0, 639.0, 627.0, 582.0, 630.0, 633.0, 633.0, 636.0, 578.0, 633.0, 633.0, 639.0, 633.0, 630.0, 587.0, 633.0, 639.0, 627.0, 530.0, 627.0, 636.0, 584.0, 639.0, 636.0, 633.0, 633.0, 636.0, 627.0, 627.0, 587.0, 636.0, 552.0, 636.0, 636.0, 636.0, 639.0, 636.0, 587.0, 636.0, 621.0, 630.0, 636.0, 633.0, 527.0, 636.0, 633.0, 630.0, 587.0, 630.0, 627.0, 582.0, 630.0, 639.0, 633.0, 587.0, 627.0, 576.0, 544.0, 636.0, 630.0, 633.0, 633.0, 615.0, 582.0, 636.0, 639.0, 636.0, 633.0, 579.0, 630.0, 633.0, 633.0, 636.0, 639.0, 579.0, 636.0, 630.0, 639.0, 633.0, 582.0, 630.0, 627.0, 516.0, 587.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [307.0, 326.0, 304.0, 326.0, 293.0, 274.0, 305.0, 322.0, 319.0, 314.0, 293.0, 291.0, 319.0, 320.0, 292.0, 290.0, 324.0, 309.0, 319.0, 317.0, 293.0, 286.0, 320.0, 316.0, 306.0, 321.0, 319.0, 311.0, 291.0, 291.0, 309.0, 324.0, 325.0, 314.0, 314.0, 313.0, 301.0, 281.0, 306.0, 324.0, 306.0, 327.0, 319.0, 314.0, 324.0, 312.0, 287.0, 291.0, 319.0, 314.0, 319.0, 314.0, 324.0, 315.0, 317.0, 316.0, 311.0, 319.0, 305.0, 282.0, 315.0, 318.0, 322.0, 317.0, 318.0, 309.0, 251.0, 279.0, 317.0, 310.0, 319.0, 317.0, 287.0, 297.0, 319.0, 320.0, 321.0, 315.0, 311.0, 322.0, 317.0, 316.0, 316.0, 320.0, 323.0, 304.0, 316.0, 311.0, 291.0, 296.0, 324.0, 312.0, 274.0, 278.0, 324.0, 312.0, 325.0, 311.0, 311.0, 325.0, 314.0, 325.0, 323.0, 313.0, 293.0, 294.0, 309.0, 327.0, 310.0, 311.0, 318.0, 312.0, 322.0, 314.0, 304.0, 329.0, 266.0, 261.0, 329.0, 307.0, 311.0, 322.0, 316.0, 314.0, 294.0, 293.0, 316.0, 314.0, 314.0, 313.0, 296.0, 286.0, 314.0, 316.0, 322.0, 317.0, 319.0, 314.0, 291.0, 296.0, 321.0, 306.0, 288.0, 288.0, 266.0, 278.0, 320.0, 316.0, 316.0, 314.0, 319.0, 314.0, 316.0, 317.0, 310.0, 305.0, 298.0, 284.0, 316.0, 320.0, 327.0, 312.0, 319.0, 317.0, 319.0, 314.0, 282.0, 297.0, 310.0, 320.0, 316.0, 317.0, 322.0, 311.0, 308.0, 328.0, 325.0, 314.0, 296.0, 283.0, 314.0, 322.0, 320.0, 310.0, 317.0, 322.0, 308.0, 325.0, 290.0, 292.0, 313.0, 317.0, 308.0, 319.0, 243.0, 273.0, 291.0, 296.0, 320.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9891622022532315, "mean_processing_ms": 0.26607283169040735, "mean_inference_ms": 1.56436366595401}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6696000, "num_steps_sampled": 3571200, "sample_time_ms": 21399.6, "load_time_ms": 36.9, "grad_time_ms": 9533.999, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006114859133958817, "policy_loss": -0.007230747956782579, "vf_loss": 84.06954956054688, "vf_explained_var": 0.7658140063285828, "kl": 0.0017542889108881354, "entropy": 1.129442572593689, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3571200, "episodes_total": 8928, "training_iteration": 279, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-06-44", "timestamp": 1660255604, "time_this_iter_s": 37.19960618019104, "time_total_s": 14018.19280052185, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14018.19280052185, "timesteps_since_restore": 3571200, "iterations_since_restore": 279, "perf": {"cpu_util_percent": 33.281132075471696, "ram_util_percent": 58.9301886792453}}
-{"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 612.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 146.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 306.095}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 187.79, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.72, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.89, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.73, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.65, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.29, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.29, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.6, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.56, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.99, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 18, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.59, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.66, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 14, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.6, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.56, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.6, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.56, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 639.0, 630.0, 633.0, 633.0, 639.0, 639.0, 582.0, 294.0, 636.0, 587.0, 579.0, 636.0, 450.0, 579.0, 573.0, 582.0, 582.0, 582.0, 630.0, 636.0, 636.0, 636.0, 579.0, 561.0, 633.0, 636.0, 630.0, 630.0, 630.0, 639.0, 630.0, 627.0, 582.0, 630.0, 639.0, 633.0, 587.0, 627.0, 576.0, 544.0, 636.0, 630.0, 633.0, 633.0, 615.0, 582.0, 636.0, 639.0, 636.0, 633.0, 579.0, 630.0, 633.0, 633.0, 636.0, 639.0, 579.0, 636.0, 630.0, 639.0, 633.0, 582.0, 630.0, 627.0, 516.0, 587.0, 639.0, 633.0, 630.0, 567.0, 627.0, 633.0, 584.0, 639.0, 582.0, 633.0, 636.0, 579.0, 636.0, 627.0, 630.0, 582.0, 633.0, 639.0, 627.0, 582.0, 630.0, 633.0, 633.0, 636.0, 578.0, 633.0, 633.0, 639.0, 633.0, 630.0, 587.0, 633.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 319.0, 324.0, 315.0, 311.0, 319.0, 308.0, 325.0, 311.0, 322.0, 319.0, 320.0, 319.0, 320.0, 286.0, 296.0, 148.0, 146.0, 321.0, 315.0, 294.0, 293.0, 288.0, 291.0, 323.0, 313.0, 224.0, 226.0, 296.0, 283.0, 290.0, 283.0, 289.0, 293.0, 290.0, 292.0, 293.0, 289.0, 316.0, 314.0, 314.0, 322.0, 324.0, 312.0, 319.0, 317.0, 290.0, 289.0, 279.0, 282.0, 313.0, 320.0, 319.0, 317.0, 316.0, 314.0, 311.0, 319.0, 317.0, 313.0, 322.0, 317.0, 311.0, 319.0, 314.0, 313.0, 296.0, 286.0, 314.0, 316.0, 322.0, 317.0, 319.0, 314.0, 291.0, 296.0, 321.0, 306.0, 288.0, 288.0, 266.0, 278.0, 320.0, 316.0, 316.0, 314.0, 319.0, 314.0, 316.0, 317.0, 310.0, 305.0, 298.0, 284.0, 316.0, 320.0, 327.0, 312.0, 319.0, 317.0, 319.0, 314.0, 282.0, 297.0, 310.0, 320.0, 316.0, 317.0, 322.0, 311.0, 308.0, 328.0, 325.0, 314.0, 296.0, 283.0, 314.0, 322.0, 320.0, 310.0, 317.0, 322.0, 308.0, 325.0, 290.0, 292.0, 313.0, 317.0, 308.0, 319.0, 243.0, 273.0, 291.0, 296.0, 320.0, 319.0, 307.0, 326.0, 304.0, 326.0, 293.0, 274.0, 305.0, 322.0, 319.0, 314.0, 293.0, 291.0, 319.0, 320.0, 292.0, 290.0, 324.0, 309.0, 319.0, 317.0, 293.0, 286.0, 320.0, 316.0, 306.0, 321.0, 319.0, 311.0, 291.0, 291.0, 309.0, 324.0, 325.0, 314.0, 314.0, 313.0, 301.0, 281.0, 306.0, 324.0, 306.0, 327.0, 319.0, 314.0, 324.0, 312.0, 287.0, 291.0, 319.0, 314.0, 319.0, 314.0, 324.0, 315.0, 317.0, 316.0, 311.0, 319.0, 305.0, 282.0, 315.0, 318.0, 322.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.987279141463972, "mean_processing_ms": 0.26569893386655014, "mean_inference_ms": 1.5631172104354278}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6720000, "num_steps_sampled": 3584000, "sample_time_ms": 21992.586, "load_time_ms": 36.83, "grad_time_ms": 9822.342, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033407146111130714, "policy_loss": -0.005221154540777206, "vf_loss": 91.33563232421875, "vf_explained_var": 0.7713200449943542, "kl": 0.001954694977030158, "entropy": 1.143385887145996, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3584000, "episodes_total": 8960, "training_iteration": 280, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-07-21", "timestamp": 1660255641, "time_this_iter_s": 36.67114806175232, "time_total_s": 14054.863948583603, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14054.863948583603, "timesteps_since_restore": 3584000, "iterations_since_restore": 280, "perf": {"cpu_util_percent": 33.917307692307695, "ram_util_percent": 58.82115384615383}}
-{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 606.73, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 303.365}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.33, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.42, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.59, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.51, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.66, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.66, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.35, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.38, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.56, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.37, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.86, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 18, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 14, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.38, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.56, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.38, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.56, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 639.0, 636.0, 633.0, 636.0, 579.0, 579.0, 573.0, 180.0, 587.0, 636.0, 630.0, 579.0, 639.0, 570.0, 639.0, 633.0, 636.0, 582.0, 582.0, 633.0, 627.0, 630.0, 624.0, 624.0, 536.0, 636.0, 636.0, 636.0, 587.0, 639.0, 639.0, 627.0, 516.0, 587.0, 639.0, 633.0, 630.0, 567.0, 627.0, 633.0, 584.0, 639.0, 582.0, 633.0, 636.0, 579.0, 636.0, 627.0, 630.0, 582.0, 633.0, 639.0, 627.0, 582.0, 630.0, 633.0, 633.0, 636.0, 578.0, 633.0, 633.0, 639.0, 633.0, 630.0, 587.0, 633.0, 639.0, 636.0, 639.0, 630.0, 633.0, 633.0, 639.0, 639.0, 582.0, 294.0, 636.0, 587.0, 579.0, 636.0, 450.0, 579.0, 573.0, 582.0, 582.0, 582.0, 630.0, 636.0, 636.0, 636.0, 579.0, 561.0, 633.0, 636.0, 630.0, 630.0, 630.0, 639.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 319.0, 322.0, 317.0, 319.0, 317.0, 321.0, 312.0, 313.0, 323.0, 277.0, 302.0, 290.0, 289.0, 291.0, 282.0, 91.0, 89.0, 296.0, 291.0, 322.0, 314.0, 316.0, 314.0, 298.0, 281.0, 314.0, 325.0, 284.0, 286.0, 311.0, 328.0, 308.0, 325.0, 311.0, 325.0, 294.0, 288.0, 293.0, 289.0, 321.0, 312.0, 316.0, 311.0, 317.0, 313.0, 327.0, 297.0, 321.0, 303.0, 275.0, 261.0, 314.0, 322.0, 314.0, 322.0, 316.0, 320.0, 303.0, 284.0, 314.0, 325.0, 319.0, 320.0, 308.0, 319.0, 243.0, 273.0, 291.0, 296.0, 320.0, 319.0, 307.0, 326.0, 304.0, 326.0, 293.0, 274.0, 305.0, 322.0, 319.0, 314.0, 293.0, 291.0, 319.0, 320.0, 292.0, 290.0, 324.0, 309.0, 319.0, 317.0, 293.0, 286.0, 320.0, 316.0, 306.0, 321.0, 319.0, 311.0, 291.0, 291.0, 309.0, 324.0, 325.0, 314.0, 314.0, 313.0, 301.0, 281.0, 306.0, 324.0, 306.0, 327.0, 319.0, 314.0, 324.0, 312.0, 287.0, 291.0, 319.0, 314.0, 319.0, 314.0, 324.0, 315.0, 317.0, 316.0, 311.0, 319.0, 305.0, 282.0, 315.0, 318.0, 322.0, 317.0, 317.0, 319.0, 324.0, 315.0, 311.0, 319.0, 308.0, 325.0, 311.0, 322.0, 319.0, 320.0, 319.0, 320.0, 286.0, 296.0, 148.0, 146.0, 321.0, 315.0, 294.0, 293.0, 288.0, 291.0, 323.0, 313.0, 224.0, 226.0, 296.0, 283.0, 290.0, 283.0, 289.0, 293.0, 290.0, 292.0, 293.0, 289.0, 316.0, 314.0, 314.0, 322.0, 324.0, 312.0, 319.0, 317.0, 290.0, 289.0, 279.0, 282.0, 313.0, 320.0, 319.0, 317.0, 316.0, 314.0, 311.0, 319.0, 317.0, 313.0, 322.0, 317.0, 311.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.985427132070836, "mean_processing_ms": 0.26533447466026966, "mean_inference_ms": 1.5623431092422566}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6744000, "num_steps_sampled": 3596800, "sample_time_ms": 22602.144, "load_time_ms": 37.106, "grad_time_ms": 10160.693, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006504642311483622, "policy_loss": -0.002157183364033699, "vf_loss": 92.3310546875, "vf_explained_var": 0.768465518951416, "kl": 0.002224028343334794, "entropy": 1.1425694227218628, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3596800, "episodes_total": 8992, "training_iteration": 281, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-08-00", "timestamp": 1660255680, "time_this_iter_s": 39.430299043655396, "time_total_s": 14094.294247627258, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14094.294247627258, "timesteps_since_restore": 3596800, "iterations_since_restore": 281, "perf": {"cpu_util_percent": 32.93571428571428, "ram_util_percent": 58.800000000000004}}
-{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 606.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 303.4}, "custom_metrics": {"sparse_reward_mean": 210.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.8, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.25, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.74, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.33, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.76, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.72, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.28, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.44, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.9, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 18, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.73, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 14, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.65, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.28, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.28, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 596.0, 587.0, 627.0, 633.0, 636.0, 636.0, 636.0, 636.0, 636.0, 582.0, 630.0, 633.0, 636.0, 630.0, 590.0, 636.0, 633.0, 573.0, 558.0, 636.0, 636.0, 636.0, 630.0, 639.0, 636.0, 522.0, 587.0, 636.0, 636.0, 587.0, 639.0, 630.0, 587.0, 633.0, 639.0, 636.0, 639.0, 630.0, 633.0, 633.0, 639.0, 639.0, 582.0, 294.0, 636.0, 587.0, 579.0, 636.0, 450.0, 579.0, 573.0, 582.0, 582.0, 582.0, 630.0, 636.0, 636.0, 636.0, 579.0, 561.0, 633.0, 636.0, 630.0, 630.0, 630.0, 639.0, 630.0, 636.0, 639.0, 636.0, 633.0, 636.0, 579.0, 579.0, 573.0, 180.0, 587.0, 636.0, 630.0, 579.0, 639.0, 570.0, 639.0, 633.0, 636.0, 582.0, 582.0, 633.0, 627.0, 630.0, 624.0, 624.0, 536.0, 636.0, 636.0, 636.0, 587.0, 639.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 300.0, 296.0, 297.0, 290.0, 315.0, 312.0, 314.0, 319.0, 319.0, 317.0, 317.0, 319.0, 314.0, 322.0, 322.0, 314.0, 317.0, 319.0, 294.0, 288.0, 306.0, 324.0, 311.0, 322.0, 316.0, 320.0, 316.0, 314.0, 304.0, 286.0, 324.0, 312.0, 317.0, 316.0, 286.0, 287.0, 278.0, 280.0, 319.0, 317.0, 329.0, 307.0, 311.0, 325.0, 318.0, 312.0, 312.0, 327.0, 319.0, 317.0, 262.0, 260.0, 294.0, 293.0, 319.0, 317.0, 322.0, 314.0, 288.0, 299.0, 322.0, 317.0, 311.0, 319.0, 305.0, 282.0, 315.0, 318.0, 322.0, 317.0, 317.0, 319.0, 324.0, 315.0, 311.0, 319.0, 308.0, 325.0, 311.0, 322.0, 319.0, 320.0, 319.0, 320.0, 286.0, 296.0, 148.0, 146.0, 321.0, 315.0, 294.0, 293.0, 288.0, 291.0, 323.0, 313.0, 224.0, 226.0, 296.0, 283.0, 290.0, 283.0, 289.0, 293.0, 290.0, 292.0, 293.0, 289.0, 316.0, 314.0, 314.0, 322.0, 324.0, 312.0, 319.0, 317.0, 290.0, 289.0, 279.0, 282.0, 313.0, 320.0, 319.0, 317.0, 316.0, 314.0, 311.0, 319.0, 317.0, 313.0, 322.0, 317.0, 311.0, 319.0, 317.0, 319.0, 322.0, 317.0, 319.0, 317.0, 321.0, 312.0, 313.0, 323.0, 277.0, 302.0, 290.0, 289.0, 291.0, 282.0, 91.0, 89.0, 296.0, 291.0, 322.0, 314.0, 316.0, 314.0, 298.0, 281.0, 314.0, 325.0, 284.0, 286.0, 311.0, 328.0, 308.0, 325.0, 311.0, 325.0, 294.0, 288.0, 293.0, 289.0, 321.0, 312.0, 316.0, 311.0, 317.0, 313.0, 327.0, 297.0, 321.0, 303.0, 275.0, 261.0, 314.0, 322.0, 314.0, 322.0, 316.0, 320.0, 303.0, 284.0, 314.0, 325.0, 319.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9835752103024169, "mean_processing_ms": 0.26496930123552576, "mean_inference_ms": 1.5613527088644699}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6768000, "num_steps_sampled": 3609600, "sample_time_ms": 22700.667, "load_time_ms": 36.992, "grad_time_ms": 10562.273, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004271908197551966, "policy_loss": -0.0035006285179406404, "vf_loss": 83.40963745117188, "vf_explained_var": 0.7725582718849182, "kl": 0.0017563734436407685, "entropy": 1.1368495225906372, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3609600, "episodes_total": 9024, "training_iteration": 282, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-08-36", "timestamp": 1660255716, "time_this_iter_s": 35.46651792526245, "time_total_s": 14129.76076555252, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14129.76076555252, "timesteps_since_restore": 3609600, "iterations_since_restore": 282, "perf": {"cpu_util_percent": 33.821999999999996, "ram_util_percent": 59.328}}
-{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 614.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 307.035}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 188.87, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.29, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.89, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.45, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.0, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.45, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.95, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.41, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.62, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.45, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.95, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.45, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.95, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 573.0, 636.0, 630.0, 636.0, 630.0, 633.0, 579.0, 636.0, 639.0, 630.0, 633.0, 639.0, 639.0, 582.0, 636.0, 633.0, 630.0, 567.0, 633.0, 627.0, 627.0, 582.0, 639.0, 633.0, 636.0, 579.0, 630.0, 636.0, 636.0, 587.0, 639.0, 630.0, 630.0, 639.0, 630.0, 636.0, 639.0, 636.0, 633.0, 636.0, 579.0, 579.0, 573.0, 180.0, 587.0, 636.0, 630.0, 579.0, 639.0, 570.0, 639.0, 633.0, 636.0, 582.0, 582.0, 633.0, 627.0, 630.0, 624.0, 624.0, 536.0, 636.0, 636.0, 636.0, 587.0, 639.0, 639.0, 579.0, 596.0, 587.0, 627.0, 633.0, 636.0, 636.0, 636.0, 636.0, 636.0, 582.0, 630.0, 633.0, 636.0, 630.0, 590.0, 636.0, 633.0, 573.0, 558.0, 636.0, 636.0, 636.0, 630.0, 639.0, 636.0, 522.0, 587.0, 636.0, 636.0, 587.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 322.0, 292.0, 281.0, 308.0, 328.0, 304.0, 326.0, 317.0, 319.0, 319.0, 311.0, 311.0, 322.0, 284.0, 295.0, 313.0, 323.0, 319.0, 320.0, 319.0, 311.0, 316.0, 317.0, 319.0, 320.0, 321.0, 318.0, 283.0, 299.0, 316.0, 320.0, 316.0, 317.0, 316.0, 314.0, 279.0, 288.0, 313.0, 320.0, 313.0, 314.0, 311.0, 316.0, 288.0, 294.0, 319.0, 320.0, 314.0, 319.0, 319.0, 317.0, 294.0, 285.0, 318.0, 312.0, 317.0, 319.0, 322.0, 314.0, 298.0, 289.0, 322.0, 317.0, 311.0, 319.0, 317.0, 313.0, 322.0, 317.0, 311.0, 319.0, 317.0, 319.0, 322.0, 317.0, 319.0, 317.0, 321.0, 312.0, 313.0, 323.0, 277.0, 302.0, 290.0, 289.0, 291.0, 282.0, 91.0, 89.0, 296.0, 291.0, 322.0, 314.0, 316.0, 314.0, 298.0, 281.0, 314.0, 325.0, 284.0, 286.0, 311.0, 328.0, 308.0, 325.0, 311.0, 325.0, 294.0, 288.0, 293.0, 289.0, 321.0, 312.0, 316.0, 311.0, 317.0, 313.0, 327.0, 297.0, 321.0, 303.0, 275.0, 261.0, 314.0, 322.0, 314.0, 322.0, 316.0, 320.0, 303.0, 284.0, 314.0, 325.0, 319.0, 320.0, 283.0, 296.0, 300.0, 296.0, 297.0, 290.0, 315.0, 312.0, 314.0, 319.0, 319.0, 317.0, 317.0, 319.0, 314.0, 322.0, 322.0, 314.0, 317.0, 319.0, 294.0, 288.0, 306.0, 324.0, 311.0, 322.0, 316.0, 320.0, 316.0, 314.0, 304.0, 286.0, 324.0, 312.0, 317.0, 316.0, 286.0, 287.0, 278.0, 280.0, 319.0, 317.0, 329.0, 307.0, 311.0, 325.0, 318.0, 312.0, 312.0, 327.0, 319.0, 317.0, 262.0, 260.0, 294.0, 293.0, 319.0, 317.0, 322.0, 314.0, 288.0, 299.0, 322.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9817221783710357, "mean_processing_ms": 0.2646010611587108, "mean_inference_ms": 1.5601389392518195}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6792000, "num_steps_sampled": 3622400, "sample_time_ms": 22788.728, "load_time_ms": 36.79, "grad_time_ms": 10687.472, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0030612878035753965, "policy_loss": -0.0045290542766451836, "vf_loss": 81.5626449584961, "vf_explained_var": 0.7761082053184509, "kl": 0.0021392148919403553, "entropy": 1.131847858428955, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3622400, "episodes_total": 9056, "training_iteration": 283, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-09-09", "timestamp": 1660255749, "time_this_iter_s": 33.67844009399414, "time_total_s": 14163.439205646515, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14163.439205646515, "timesteps_since_restore": 3622400, "iterations_since_restore": 283, "perf": {"cpu_util_percent": 34.11489361702128, "ram_util_percent": 59.04893617021279}}
-{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 617.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.985}, "custom_metrics": {"sparse_reward_mean": 214.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.97, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.28, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.9, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.58, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.13, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.53, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.56, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.97, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.15, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.56, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.97, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.56, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.97, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 579.0, 582.0, 636.0, 575.0, 587.0, 582.0, 630.0, 630.0, 636.0, 633.0, 627.0, 627.0, 633.0, 593.0, 587.0, 636.0, 630.0, 627.0, 630.0, 636.0, 633.0, 630.0, 630.0, 573.0, 627.0, 579.0, 596.0, 579.0, 630.0, 633.0, 636.0, 587.0, 639.0, 639.0, 579.0, 596.0, 587.0, 627.0, 633.0, 636.0, 636.0, 636.0, 636.0, 636.0, 582.0, 630.0, 633.0, 636.0, 630.0, 590.0, 636.0, 633.0, 573.0, 558.0, 636.0, 636.0, 636.0, 630.0, 639.0, 636.0, 522.0, 587.0, 636.0, 636.0, 587.0, 639.0, 639.0, 573.0, 636.0, 630.0, 636.0, 630.0, 633.0, 579.0, 636.0, 639.0, 630.0, 633.0, 639.0, 639.0, 582.0, 636.0, 633.0, 630.0, 567.0, 633.0, 627.0, 627.0, 582.0, 639.0, 633.0, 636.0, 579.0, 630.0, 636.0, 636.0, 587.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 312.0, 311.0, 319.0, 288.0, 291.0, 286.0, 296.0, 311.0, 325.0, 291.0, 284.0, 306.0, 281.0, 294.0, 288.0, 314.0, 316.0, 311.0, 319.0, 319.0, 317.0, 316.0, 317.0, 305.0, 322.0, 308.0, 319.0, 314.0, 319.0, 288.0, 305.0, 299.0, 288.0, 319.0, 317.0, 316.0, 314.0, 318.0, 309.0, 311.0, 319.0, 319.0, 317.0, 321.0, 312.0, 316.0, 314.0, 311.0, 319.0, 287.0, 286.0, 311.0, 316.0, 291.0, 288.0, 294.0, 302.0, 291.0, 288.0, 314.0, 316.0, 311.0, 322.0, 316.0, 320.0, 303.0, 284.0, 314.0, 325.0, 319.0, 320.0, 283.0, 296.0, 300.0, 296.0, 297.0, 290.0, 315.0, 312.0, 314.0, 319.0, 319.0, 317.0, 317.0, 319.0, 314.0, 322.0, 322.0, 314.0, 317.0, 319.0, 294.0, 288.0, 306.0, 324.0, 311.0, 322.0, 316.0, 320.0, 316.0, 314.0, 304.0, 286.0, 324.0, 312.0, 317.0, 316.0, 286.0, 287.0, 278.0, 280.0, 319.0, 317.0, 329.0, 307.0, 311.0, 325.0, 318.0, 312.0, 312.0, 327.0, 319.0, 317.0, 262.0, 260.0, 294.0, 293.0, 319.0, 317.0, 322.0, 314.0, 288.0, 299.0, 322.0, 317.0, 317.0, 322.0, 292.0, 281.0, 308.0, 328.0, 304.0, 326.0, 317.0, 319.0, 319.0, 311.0, 311.0, 322.0, 284.0, 295.0, 313.0, 323.0, 319.0, 320.0, 319.0, 311.0, 316.0, 317.0, 319.0, 320.0, 321.0, 318.0, 283.0, 299.0, 316.0, 320.0, 316.0, 317.0, 316.0, 314.0, 279.0, 288.0, 313.0, 320.0, 313.0, 314.0, 311.0, 316.0, 288.0, 294.0, 319.0, 320.0, 314.0, 319.0, 319.0, 317.0, 294.0, 285.0, 318.0, 312.0, 317.0, 319.0, 322.0, 314.0, 298.0, 289.0, 322.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9798702213008988, "mean_processing_ms": 0.26423169692571163, "mean_inference_ms": 1.5586086454060646}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6816000, "num_steps_sampled": 3635200, "sample_time_ms": 22897.149, "load_time_ms": 36.929, "grad_time_ms": 10771.125, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005141934845596552, "policy_loss": -0.003231912851333618, "vf_loss": 89.45598602294922, "vf_explained_var": 0.7527138590812683, "kl": 0.0021111962851136923, "entropy": 1.1434991359710693, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3635200, "episodes_total": 9088, "training_iteration": 284, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-09-42", "timestamp": 1660255782, "time_this_iter_s": 32.81455707550049, "time_total_s": 14196.253762722015, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14196.253762722015, "timesteps_since_restore": 3635200, "iterations_since_restore": 284, "perf": {"cpu_util_percent": 32.710638297872336, "ram_util_percent": 58.93617021276594}}
-{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 616.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 308.355}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 189.51, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.26, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.86, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.6, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.07, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.56, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 0.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.46, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.94, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.19, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.65, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.16, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.14, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.46, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.94, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.46, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.94, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 636.0, 633.0, 639.0, 630.0, 636.0, 636.0, 636.0, 627.0, 633.0, 582.0, 636.0, 627.0, 180.0, 639.0, 633.0, 639.0, 636.0, 636.0, 633.0, 633.0, 539.0, 630.0, 636.0, 639.0, 636.0, 630.0, 587.0, 633.0, 636.0, 636.0, 636.0, 636.0, 636.0, 587.0, 639.0, 639.0, 573.0, 636.0, 630.0, 636.0, 630.0, 633.0, 579.0, 636.0, 639.0, 630.0, 633.0, 639.0, 639.0, 582.0, 636.0, 633.0, 630.0, 567.0, 633.0, 627.0, 627.0, 582.0, 639.0, 633.0, 636.0, 579.0, 630.0, 636.0, 636.0, 587.0, 639.0, 633.0, 630.0, 579.0, 582.0, 636.0, 575.0, 587.0, 582.0, 630.0, 630.0, 636.0, 633.0, 627.0, 627.0, 633.0, 593.0, 587.0, 636.0, 630.0, 627.0, 630.0, 636.0, 633.0, 630.0, 630.0, 573.0, 627.0, 579.0, 596.0, 579.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 317.0, 319.0, 317.0, 316.0, 322.0, 317.0, 316.0, 314.0, 327.0, 309.0, 316.0, 320.0, 326.0, 310.0, 316.0, 311.0, 326.0, 307.0, 288.0, 294.0, 311.0, 325.0, 308.0, 319.0, 91.0, 89.0, 317.0, 322.0, 324.0, 309.0, 324.0, 315.0, 317.0, 319.0, 317.0, 319.0, 314.0, 319.0, 314.0, 319.0, 268.0, 271.0, 311.0, 319.0, 314.0, 322.0, 322.0, 317.0, 322.0, 314.0, 329.0, 301.0, 296.0, 291.0, 314.0, 319.0, 306.0, 330.0, 314.0, 322.0, 324.0, 312.0, 319.0, 317.0, 322.0, 314.0, 288.0, 299.0, 322.0, 317.0, 317.0, 322.0, 292.0, 281.0, 308.0, 328.0, 304.0, 326.0, 317.0, 319.0, 319.0, 311.0, 311.0, 322.0, 284.0, 295.0, 313.0, 323.0, 319.0, 320.0, 319.0, 311.0, 316.0, 317.0, 319.0, 320.0, 321.0, 318.0, 283.0, 299.0, 316.0, 320.0, 316.0, 317.0, 316.0, 314.0, 279.0, 288.0, 313.0, 320.0, 313.0, 314.0, 311.0, 316.0, 288.0, 294.0, 319.0, 320.0, 314.0, 319.0, 319.0, 317.0, 294.0, 285.0, 318.0, 312.0, 317.0, 319.0, 322.0, 314.0, 298.0, 289.0, 322.0, 317.0, 321.0, 312.0, 311.0, 319.0, 288.0, 291.0, 286.0, 296.0, 311.0, 325.0, 291.0, 284.0, 306.0, 281.0, 294.0, 288.0, 314.0, 316.0, 311.0, 319.0, 319.0, 317.0, 316.0, 317.0, 305.0, 322.0, 308.0, 319.0, 314.0, 319.0, 288.0, 305.0, 299.0, 288.0, 319.0, 317.0, 316.0, 314.0, 318.0, 309.0, 311.0, 319.0, 319.0, 317.0, 321.0, 312.0, 316.0, 314.0, 311.0, 319.0, 287.0, 286.0, 311.0, 316.0, 291.0, 288.0, 294.0, 302.0, 291.0, 288.0, 314.0, 316.0, 311.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.978026720054701, "mean_processing_ms": 0.2638645692972578, "mean_inference_ms": 1.5570587102664553}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6840000, "num_steps_sampled": 3648000, "sample_time_ms": 23082.722, "load_time_ms": 36.933, "grad_time_ms": 10916.497, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002097133779898286, "policy_loss": -0.005807385314255953, "vf_loss": 84.70693969726562, "vf_explained_var": 0.7814067006111145, "kl": 0.0015371787594631314, "entropy": 1.1323403120040894, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3648000, "episodes_total": 9120, "training_iteration": 285, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-10-15", "timestamp": 1660255815, "time_this_iter_s": 33.031522035598755, "time_total_s": 14229.285284757614, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14229.285284757614, "timesteps_since_restore": 3648000, "iterations_since_restore": 285, "perf": {"cpu_util_percent": 32.80434782608696, "ram_util_percent": 58.88260869565216}}
-{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 616.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 308.135}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 189.47, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.33, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.63, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.67, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.8, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.55, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 0.44, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.29, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.5, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.84, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.75, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.5, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.84, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.5, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.84, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 582.0, 624.0, 636.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 630.0, 633.0, 630.0, 630.0, 639.0, 639.0, 630.0, 639.0, 579.0, 633.0, 582.0, 587.0, 636.0, 639.0, 587.0, 636.0, 578.0, 639.0, 630.0, 576.0, 630.0, 582.0, 636.0, 636.0, 587.0, 639.0, 633.0, 630.0, 579.0, 582.0, 636.0, 575.0, 587.0, 582.0, 630.0, 630.0, 636.0, 633.0, 627.0, 627.0, 633.0, 593.0, 587.0, 636.0, 630.0, 627.0, 630.0, 636.0, 633.0, 630.0, 630.0, 573.0, 627.0, 579.0, 596.0, 579.0, 630.0, 633.0, 582.0, 636.0, 633.0, 639.0, 630.0, 636.0, 636.0, 636.0, 627.0, 633.0, 582.0, 636.0, 627.0, 180.0, 639.0, 633.0, 639.0, 636.0, 636.0, 633.0, 633.0, 539.0, 630.0, 636.0, 639.0, 636.0, 630.0, 587.0, 633.0, 636.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 311.0, 296.0, 286.0, 313.0, 311.0, 314.0, 322.0, 309.0, 321.0, 319.0, 311.0, 318.0, 315.0, 325.0, 311.0, 314.0, 322.0, 312.0, 324.0, 319.0, 311.0, 313.0, 320.0, 325.0, 305.0, 324.0, 306.0, 319.0, 320.0, 317.0, 322.0, 313.0, 317.0, 316.0, 323.0, 288.0, 291.0, 319.0, 314.0, 296.0, 286.0, 295.0, 292.0, 332.0, 304.0, 319.0, 320.0, 296.0, 291.0, 316.0, 320.0, 282.0, 296.0, 314.0, 325.0, 316.0, 314.0, 293.0, 283.0, 323.0, 307.0, 291.0, 291.0, 317.0, 319.0, 322.0, 314.0, 298.0, 289.0, 322.0, 317.0, 321.0, 312.0, 311.0, 319.0, 288.0, 291.0, 286.0, 296.0, 311.0, 325.0, 291.0, 284.0, 306.0, 281.0, 294.0, 288.0, 314.0, 316.0, 311.0, 319.0, 319.0, 317.0, 316.0, 317.0, 305.0, 322.0, 308.0, 319.0, 314.0, 319.0, 288.0, 305.0, 299.0, 288.0, 319.0, 317.0, 316.0, 314.0, 318.0, 309.0, 311.0, 319.0, 319.0, 317.0, 321.0, 312.0, 316.0, 314.0, 311.0, 319.0, 287.0, 286.0, 311.0, 316.0, 291.0, 288.0, 294.0, 302.0, 291.0, 288.0, 314.0, 316.0, 311.0, 322.0, 288.0, 294.0, 317.0, 319.0, 317.0, 316.0, 322.0, 317.0, 316.0, 314.0, 327.0, 309.0, 316.0, 320.0, 326.0, 310.0, 316.0, 311.0, 326.0, 307.0, 288.0, 294.0, 311.0, 325.0, 308.0, 319.0, 91.0, 89.0, 317.0, 322.0, 324.0, 309.0, 324.0, 315.0, 317.0, 319.0, 317.0, 319.0, 314.0, 319.0, 314.0, 319.0, 268.0, 271.0, 311.0, 319.0, 314.0, 322.0, 322.0, 317.0, 322.0, 314.0, 329.0, 301.0, 296.0, 291.0, 314.0, 319.0, 306.0, 330.0, 314.0, 322.0, 324.0, 312.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9761954337234573, "mean_processing_ms": 0.26349990327404404, "mean_inference_ms": 1.5554986152813894}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6864000, "num_steps_sampled": 3660800, "sample_time_ms": 23037.067, "load_time_ms": 36.944, "grad_time_ms": 11090.889, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016592548927292228, "policy_loss": -0.006142645608633757, "vf_loss": 83.6804428100586, "vf_explained_var": 0.7674832344055176, "kl": 0.0020798875484615564, "entropy": 1.1322760581970215, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3660800, "episodes_total": 9152, "training_iteration": 286, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-10-47", "timestamp": 1660255847, "time_this_iter_s": 31.660379886627197, "time_total_s": 14260.945664644241, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14260.945664644241, "timesteps_since_restore": 3660800, "iterations_since_restore": 286, "perf": {"cpu_util_percent": 33.97111111111111, "ram_util_percent": 58.86666666666669}}
-{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 618.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 309.115}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 189.83, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.54, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.68, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.87, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.89, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 0.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.63, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.86, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.63, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.86, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.63, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.86, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 579.0, 627.0, 630.0, 636.0, 639.0, 633.0, 633.0, 636.0, 627.0, 636.0, 630.0, 582.0, 627.0, 636.0, 636.0, 636.0, 587.0, 636.0, 633.0, 636.0, 633.0, 633.0, 630.0, 633.0, 636.0, 627.0, 579.0, 639.0, 582.0, 546.0, 636.0, 596.0, 579.0, 630.0, 633.0, 582.0, 636.0, 633.0, 639.0, 630.0, 636.0, 636.0, 636.0, 627.0, 633.0, 582.0, 636.0, 627.0, 180.0, 639.0, 633.0, 639.0, 636.0, 636.0, 633.0, 633.0, 539.0, 630.0, 636.0, 639.0, 636.0, 630.0, 587.0, 633.0, 636.0, 636.0, 636.0, 633.0, 582.0, 624.0, 636.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 630.0, 633.0, 630.0, 630.0, 639.0, 639.0, 630.0, 639.0, 579.0, 633.0, 582.0, 587.0, 636.0, 639.0, 587.0, 636.0, 578.0, 639.0, 630.0, 576.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 323.0, 299.0, 280.0, 313.0, 314.0, 316.0, 314.0, 319.0, 317.0, 320.0, 319.0, 319.0, 314.0, 316.0, 317.0, 314.0, 322.0, 324.0, 303.0, 307.0, 329.0, 307.0, 323.0, 293.0, 289.0, 311.0, 316.0, 314.0, 322.0, 319.0, 317.0, 313.0, 323.0, 283.0, 304.0, 319.0, 317.0, 319.0, 314.0, 314.0, 322.0, 311.0, 322.0, 311.0, 322.0, 314.0, 316.0, 322.0, 311.0, 319.0, 317.0, 313.0, 314.0, 282.0, 297.0, 317.0, 322.0, 293.0, 289.0, 283.0, 263.0, 323.0, 313.0, 294.0, 302.0, 291.0, 288.0, 314.0, 316.0, 311.0, 322.0, 288.0, 294.0, 317.0, 319.0, 317.0, 316.0, 322.0, 317.0, 316.0, 314.0, 327.0, 309.0, 316.0, 320.0, 326.0, 310.0, 316.0, 311.0, 326.0, 307.0, 288.0, 294.0, 311.0, 325.0, 308.0, 319.0, 91.0, 89.0, 317.0, 322.0, 324.0, 309.0, 324.0, 315.0, 317.0, 319.0, 317.0, 319.0, 314.0, 319.0, 314.0, 319.0, 268.0, 271.0, 311.0, 319.0, 314.0, 322.0, 322.0, 317.0, 322.0, 314.0, 329.0, 301.0, 296.0, 291.0, 314.0, 319.0, 306.0, 330.0, 314.0, 322.0, 324.0, 312.0, 322.0, 311.0, 296.0, 286.0, 313.0, 311.0, 314.0, 322.0, 309.0, 321.0, 319.0, 311.0, 318.0, 315.0, 325.0, 311.0, 314.0, 322.0, 312.0, 324.0, 319.0, 311.0, 313.0, 320.0, 325.0, 305.0, 324.0, 306.0, 319.0, 320.0, 317.0, 322.0, 313.0, 317.0, 316.0, 323.0, 288.0, 291.0, 319.0, 314.0, 296.0, 286.0, 295.0, 292.0, 332.0, 304.0, 319.0, 320.0, 296.0, 291.0, 316.0, 320.0, 282.0, 296.0, 314.0, 325.0, 316.0, 314.0, 293.0, 283.0, 323.0, 307.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9743700047220856, "mean_processing_ms": 0.2631349992390798, "mean_inference_ms": 1.553816906350355}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6888000, "num_steps_sampled": 3673600, "sample_time_ms": 22960.493, "load_time_ms": 36.753, "grad_time_ms": 11340.647, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0018661068752408028, "policy_loss": -0.005918534938246012, "vf_loss": 83.52860260009766, "vf_explained_var": 0.7654721140861511, "kl": 0.0018988008378073573, "entropy": 1.136439561843872, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3673600, "episodes_total": 9184, "training_iteration": 287, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-11-18", "timestamp": 1660255878, "time_this_iter_s": 31.607279777526855, "time_total_s": 14292.552944421768, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14292.552944421768, "timesteps_since_restore": 3673600, "iterations_since_restore": 287, "perf": {"cpu_util_percent": 34.857777777777784, "ram_util_percent": 58.80666666666665}}
-{"episode_reward_max": 639.0, "episode_reward_min": 546.0, "episode_reward_mean": 622.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 263.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 311.005}, "custom_metrics": {"sparse_reward_mean": 215.6, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 190.81, "shaped_reward_min": 146, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.43, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.79, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.78, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.05, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.61, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.06, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.19, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.7, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.61, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.06, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.61, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.06, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 627.0, 633.0, 627.0, 584.0, 633.0, 636.0, 639.0, 639.0, 630.0, 587.0, 630.0, 582.0, 582.0, 584.0, 630.0, 636.0, 630.0, 624.0, 636.0, 633.0, 587.0, 639.0, 630.0, 639.0, 587.0, 639.0, 636.0, 630.0, 630.0, 636.0, 636.0, 633.0, 636.0, 636.0, 636.0, 633.0, 582.0, 624.0, 636.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 630.0, 633.0, 630.0, 630.0, 639.0, 639.0, 630.0, 639.0, 579.0, 633.0, 582.0, 587.0, 636.0, 639.0, 587.0, 636.0, 578.0, 639.0, 630.0, 576.0, 630.0, 582.0, 636.0, 579.0, 627.0, 630.0, 636.0, 639.0, 633.0, 633.0, 636.0, 627.0, 636.0, 630.0, 582.0, 627.0, 636.0, 636.0, 636.0, 587.0, 636.0, 633.0, 636.0, 633.0, 633.0, 630.0, 633.0, 636.0, 627.0, 579.0, 639.0, 582.0, 546.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 299.0, 316.0, 311.0, 324.0, 309.0, 318.0, 309.0, 289.0, 295.0, 314.0, 319.0, 316.0, 320.0, 319.0, 320.0, 317.0, 322.0, 314.0, 316.0, 293.0, 294.0, 318.0, 312.0, 288.0, 294.0, 289.0, 293.0, 292.0, 292.0, 315.0, 315.0, 324.0, 312.0, 324.0, 306.0, 313.0, 311.0, 314.0, 322.0, 311.0, 322.0, 295.0, 292.0, 322.0, 317.0, 308.0, 322.0, 314.0, 325.0, 293.0, 294.0, 317.0, 322.0, 318.0, 318.0, 319.0, 311.0, 314.0, 316.0, 319.0, 317.0, 314.0, 322.0, 314.0, 319.0, 306.0, 330.0, 314.0, 322.0, 324.0, 312.0, 322.0, 311.0, 296.0, 286.0, 313.0, 311.0, 314.0, 322.0, 309.0, 321.0, 319.0, 311.0, 318.0, 315.0, 325.0, 311.0, 314.0, 322.0, 312.0, 324.0, 319.0, 311.0, 313.0, 320.0, 325.0, 305.0, 324.0, 306.0, 319.0, 320.0, 317.0, 322.0, 313.0, 317.0, 316.0, 323.0, 288.0, 291.0, 319.0, 314.0, 296.0, 286.0, 295.0, 292.0, 332.0, 304.0, 319.0, 320.0, 296.0, 291.0, 316.0, 320.0, 282.0, 296.0, 314.0, 325.0, 316.0, 314.0, 293.0, 283.0, 323.0, 307.0, 291.0, 291.0, 313.0, 323.0, 299.0, 280.0, 313.0, 314.0, 316.0, 314.0, 319.0, 317.0, 320.0, 319.0, 319.0, 314.0, 316.0, 317.0, 314.0, 322.0, 324.0, 303.0, 307.0, 329.0, 307.0, 323.0, 293.0, 289.0, 311.0, 316.0, 314.0, 322.0, 319.0, 317.0, 313.0, 323.0, 283.0, 304.0, 319.0, 317.0, 319.0, 314.0, 314.0, 322.0, 311.0, 322.0, 311.0, 322.0, 314.0, 316.0, 322.0, 311.0, 319.0, 317.0, 313.0, 314.0, 282.0, 297.0, 317.0, 322.0, 293.0, 289.0, 283.0, 263.0, 323.0, 313.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9725594290722099, "mean_processing_ms": 0.2627719670023304, "mean_inference_ms": 1.5521036278405136}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6912000, "num_steps_sampled": 3686400, "sample_time_ms": 23043.972, "load_time_ms": 36.653, "grad_time_ms": 11285.002, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0023610887583345175, "policy_loss": -0.005447230767458677, "vf_loss": 83.72765350341797, "vf_explained_var": 0.7662909030914307, "kl": 0.001831754925660789, "entropy": 1.128881573677063, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3686400, "episodes_total": 9216, "training_iteration": 288, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-11-51", "timestamp": 1660255911, "time_this_iter_s": 32.35726475715637, "time_total_s": 14324.910209178925, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14324.910209178925, "timesteps_since_restore": 3686400, "iterations_since_restore": 288, "perf": {"cpu_util_percent": 32.42, "ram_util_percent": 58.875555555555565}}
-{"episode_reward_max": 639.0, "episode_reward_min": 546.0, "episode_reward_mean": 621.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 263.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 310.955}, "custom_metrics": {"sparse_reward_mean": 215.6, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 190.71, "shaped_reward_min": 146, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.17, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.98, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.24, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.49, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.38, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.24, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.21, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.71, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.08, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.38, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.24, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.38, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.24, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 636.0, 633.0, 636.0, 639.0, 627.0, 627.0, 639.0, 639.0, 636.0, 630.0, 636.0, 630.0, 633.0, 639.0, 579.0, 587.0, 630.0, 633.0, 630.0, 639.0, 582.0, 630.0, 639.0, 636.0, 630.0, 573.0, 582.0, 630.0, 582.0, 636.0, 639.0, 630.0, 576.0, 630.0, 582.0, 636.0, 579.0, 627.0, 630.0, 636.0, 639.0, 633.0, 633.0, 636.0, 627.0, 636.0, 630.0, 582.0, 627.0, 636.0, 636.0, 636.0, 587.0, 636.0, 633.0, 636.0, 633.0, 633.0, 630.0, 633.0, 636.0, 627.0, 579.0, 639.0, 582.0, 546.0, 636.0, 584.0, 627.0, 633.0, 627.0, 584.0, 633.0, 636.0, 639.0, 639.0, 630.0, 587.0, 630.0, 582.0, 582.0, 584.0, 630.0, 636.0, 630.0, 624.0, 636.0, 633.0, 587.0, 639.0, 630.0, 639.0, 587.0, 639.0, 636.0, 630.0, 630.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [324.0, 312.0, 319.0, 317.0, 321.0, 312.0, 318.0, 318.0, 319.0, 320.0, 305.0, 322.0, 310.0, 317.0, 319.0, 320.0, 314.0, 325.0, 321.0, 315.0, 311.0, 319.0, 311.0, 325.0, 311.0, 319.0, 311.0, 322.0, 319.0, 320.0, 285.0, 294.0, 298.0, 289.0, 319.0, 311.0, 313.0, 320.0, 308.0, 322.0, 319.0, 320.0, 293.0, 289.0, 319.0, 311.0, 324.0, 315.0, 314.0, 322.0, 311.0, 319.0, 276.0, 297.0, 286.0, 296.0, 308.0, 322.0, 286.0, 296.0, 324.0, 312.0, 321.0, 318.0, 316.0, 314.0, 293.0, 283.0, 323.0, 307.0, 291.0, 291.0, 313.0, 323.0, 299.0, 280.0, 313.0, 314.0, 316.0, 314.0, 319.0, 317.0, 320.0, 319.0, 319.0, 314.0, 316.0, 317.0, 314.0, 322.0, 324.0, 303.0, 307.0, 329.0, 307.0, 323.0, 293.0, 289.0, 311.0, 316.0, 314.0, 322.0, 319.0, 317.0, 313.0, 323.0, 283.0, 304.0, 319.0, 317.0, 319.0, 314.0, 314.0, 322.0, 311.0, 322.0, 311.0, 322.0, 314.0, 316.0, 322.0, 311.0, 319.0, 317.0, 313.0, 314.0, 282.0, 297.0, 317.0, 322.0, 293.0, 289.0, 283.0, 263.0, 323.0, 313.0, 285.0, 299.0, 316.0, 311.0, 324.0, 309.0, 318.0, 309.0, 289.0, 295.0, 314.0, 319.0, 316.0, 320.0, 319.0, 320.0, 317.0, 322.0, 314.0, 316.0, 293.0, 294.0, 318.0, 312.0, 288.0, 294.0, 289.0, 293.0, 292.0, 292.0, 315.0, 315.0, 324.0, 312.0, 324.0, 306.0, 313.0, 311.0, 314.0, 322.0, 311.0, 322.0, 295.0, 292.0, 322.0, 317.0, 308.0, 322.0, 314.0, 325.0, 293.0, 294.0, 317.0, 322.0, 318.0, 318.0, 319.0, 311.0, 314.0, 316.0, 319.0, 317.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9707646787589033, "mean_processing_ms": 0.26241176618575823, "mean_inference_ms": 1.5504455690384487}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6936000, "num_steps_sampled": 3699200, "sample_time_ms": 22728.739, "load_time_ms": 36.546, "grad_time_ms": 11195.454, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004752982931677252, "policy_loss": -0.00697875814512372, "vf_loss": 80.24703979492188, "vf_explained_var": 0.7700864672660828, "kl": 0.001980138709768653, "entropy": 1.1412941217422485, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3699200, "episodes_total": 9248, "training_iteration": 289, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-12-24", "timestamp": 1660255944, "time_this_iter_s": 33.15079879760742, "time_total_s": 14358.061007976532, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14358.061007976532, "timesteps_since_restore": 3699200, "iterations_since_restore": 289, "perf": {"cpu_util_percent": 32.6936170212766, "ram_util_percent": 58.93829787234045}}
-{"episode_reward_max": 639.0, "episode_reward_min": 405.0, "episode_reward_mean": 617.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 308.815}, "custom_metrics": {"sparse_reward_mean": 214.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 189.63, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.85, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 19.11, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.2, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.35, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.45, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.37, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.34, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.54, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.69, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.81, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.76, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.37, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.37, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [590.0, 633.0, 584.0, 636.0, 630.0, 630.0, 633.0, 587.0, 636.0, 630.0, 582.0, 405.0, 633.0, 630.0, 627.0, 579.0, 576.0, 587.0, 582.0, 636.0, 636.0, 618.0, 636.0, 630.0, 636.0, 624.0, 639.0, 633.0, 630.0, 587.0, 587.0, 630.0, 639.0, 582.0, 546.0, 636.0, 584.0, 627.0, 633.0, 627.0, 584.0, 633.0, 636.0, 639.0, 639.0, 630.0, 587.0, 630.0, 582.0, 582.0, 584.0, 630.0, 636.0, 630.0, 624.0, 636.0, 633.0, 587.0, 639.0, 630.0, 639.0, 587.0, 639.0, 636.0, 630.0, 630.0, 636.0, 636.0, 636.0, 636.0, 633.0, 636.0, 639.0, 627.0, 627.0, 639.0, 639.0, 636.0, 630.0, 636.0, 630.0, 633.0, 639.0, 579.0, 587.0, 630.0, 633.0, 630.0, 639.0, 582.0, 630.0, 639.0, 636.0, 630.0, 573.0, 582.0, 630.0, 582.0, 636.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 297.0, 316.0, 317.0, 290.0, 294.0, 317.0, 319.0, 311.0, 319.0, 323.0, 307.0, 316.0, 317.0, 292.0, 295.0, 319.0, 317.0, 313.0, 317.0, 281.0, 301.0, 198.0, 207.0, 313.0, 320.0, 310.0, 320.0, 311.0, 316.0, 282.0, 297.0, 285.0, 291.0, 309.0, 278.0, 293.0, 289.0, 311.0, 325.0, 321.0, 315.0, 302.0, 316.0, 314.0, 322.0, 311.0, 319.0, 319.0, 317.0, 319.0, 305.0, 319.0, 320.0, 322.0, 311.0, 326.0, 304.0, 301.0, 286.0, 293.0, 294.0, 313.0, 317.0, 317.0, 322.0, 293.0, 289.0, 283.0, 263.0, 323.0, 313.0, 285.0, 299.0, 316.0, 311.0, 324.0, 309.0, 318.0, 309.0, 289.0, 295.0, 314.0, 319.0, 316.0, 320.0, 319.0, 320.0, 317.0, 322.0, 314.0, 316.0, 293.0, 294.0, 318.0, 312.0, 288.0, 294.0, 289.0, 293.0, 292.0, 292.0, 315.0, 315.0, 324.0, 312.0, 324.0, 306.0, 313.0, 311.0, 314.0, 322.0, 311.0, 322.0, 295.0, 292.0, 322.0, 317.0, 308.0, 322.0, 314.0, 325.0, 293.0, 294.0, 317.0, 322.0, 318.0, 318.0, 319.0, 311.0, 314.0, 316.0, 319.0, 317.0, 314.0, 322.0, 324.0, 312.0, 319.0, 317.0, 321.0, 312.0, 318.0, 318.0, 319.0, 320.0, 305.0, 322.0, 310.0, 317.0, 319.0, 320.0, 314.0, 325.0, 321.0, 315.0, 311.0, 319.0, 311.0, 325.0, 311.0, 319.0, 311.0, 322.0, 319.0, 320.0, 285.0, 294.0, 298.0, 289.0, 319.0, 311.0, 313.0, 320.0, 308.0, 322.0, 319.0, 320.0, 293.0, 289.0, 319.0, 311.0, 324.0, 315.0, 314.0, 322.0, 311.0, 319.0, 276.0, 297.0, 286.0, 296.0, 308.0, 322.0, 286.0, 296.0, 324.0, 312.0, 321.0, 318.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9689852254621897, "mean_processing_ms": 0.26205467993702586, "mean_inference_ms": 1.548820818018191}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6960000, "num_steps_sampled": 3712000, "sample_time_ms": 22386.717, "load_time_ms": 37.444, "grad_time_ms": 11069.639, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0020440255757421255, "policy_loss": -0.0058852084912359715, "vf_loss": 84.9912338256836, "vf_explained_var": 0.7650973200798035, "kl": 0.0021299307700246572, "entropy": 1.1397589445114136, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3712000, "episodes_total": 9280, "training_iteration": 290, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-12-56", "timestamp": 1660255976, "time_this_iter_s": 32.003324031829834, "time_total_s": 14390.064332008362, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14390.064332008362, "timesteps_since_restore": 3712000, "iterations_since_restore": 290, "perf": {"cpu_util_percent": 30.039130434782606, "ram_util_percent": 58.817391304347815}}
-{"episode_reward_max": 639.0, "episode_reward_min": 405.0, "episode_reward_mean": 614.76, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 307.38}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 188.76, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.96, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 19.15, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.26, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.34, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.5, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.72, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.2, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.35, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.58, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.76, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.69, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.2, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.2, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [544.0, 633.0, 627.0, 618.0, 636.0, 627.0, 581.0, 582.0, 639.0, 587.0, 639.0, 573.0, 633.0, 636.0, 587.0, 639.0, 564.0, 639.0, 630.0, 633.0, 558.0, 582.0, 630.0, 630.0, 587.0, 624.0, 636.0, 582.0, 587.0, 633.0, 587.0, 576.0, 630.0, 630.0, 636.0, 636.0, 636.0, 636.0, 633.0, 636.0, 639.0, 627.0, 627.0, 639.0, 639.0, 636.0, 630.0, 636.0, 630.0, 633.0, 639.0, 579.0, 587.0, 630.0, 633.0, 630.0, 639.0, 582.0, 630.0, 639.0, 636.0, 630.0, 573.0, 582.0, 630.0, 582.0, 636.0, 639.0, 590.0, 633.0, 584.0, 636.0, 630.0, 630.0, 633.0, 587.0, 636.0, 630.0, 582.0, 405.0, 633.0, 630.0, 627.0, 579.0, 576.0, 587.0, 582.0, 636.0, 636.0, 618.0, 636.0, 630.0, 636.0, 624.0, 639.0, 633.0, 630.0, 587.0, 587.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 277.0, 319.0, 314.0, 311.0, 316.0, 315.0, 303.0, 309.0, 327.0, 305.0, 322.0, 284.0, 297.0, 290.0, 292.0, 311.0, 328.0, 288.0, 299.0, 317.0, 322.0, 292.0, 281.0, 316.0, 317.0, 324.0, 312.0, 293.0, 294.0, 322.0, 317.0, 287.0, 277.0, 319.0, 320.0, 318.0, 312.0, 322.0, 311.0, 292.0, 266.0, 288.0, 294.0, 310.0, 320.0, 314.0, 316.0, 293.0, 294.0, 311.0, 313.0, 319.0, 317.0, 289.0, 293.0, 295.0, 292.0, 316.0, 317.0, 306.0, 281.0, 283.0, 293.0, 319.0, 311.0, 314.0, 316.0, 319.0, 317.0, 314.0, 322.0, 324.0, 312.0, 319.0, 317.0, 321.0, 312.0, 318.0, 318.0, 319.0, 320.0, 305.0, 322.0, 310.0, 317.0, 319.0, 320.0, 314.0, 325.0, 321.0, 315.0, 311.0, 319.0, 311.0, 325.0, 311.0, 319.0, 311.0, 322.0, 319.0, 320.0, 285.0, 294.0, 298.0, 289.0, 319.0, 311.0, 313.0, 320.0, 308.0, 322.0, 319.0, 320.0, 293.0, 289.0, 319.0, 311.0, 324.0, 315.0, 314.0, 322.0, 311.0, 319.0, 276.0, 297.0, 286.0, 296.0, 308.0, 322.0, 286.0, 296.0, 324.0, 312.0, 321.0, 318.0, 293.0, 297.0, 316.0, 317.0, 290.0, 294.0, 317.0, 319.0, 311.0, 319.0, 323.0, 307.0, 316.0, 317.0, 292.0, 295.0, 319.0, 317.0, 313.0, 317.0, 281.0, 301.0, 198.0, 207.0, 313.0, 320.0, 310.0, 320.0, 311.0, 316.0, 282.0, 297.0, 285.0, 291.0, 309.0, 278.0, 293.0, 289.0, 311.0, 325.0, 321.0, 315.0, 302.0, 316.0, 314.0, 322.0, 311.0, 319.0, 319.0, 317.0, 319.0, 305.0, 319.0, 320.0, 322.0, 311.0, 326.0, 304.0, 301.0, 286.0, 293.0, 294.0, 313.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.967235186119729, "mean_processing_ms": 0.26170596959071135, "mean_inference_ms": 1.5471921568344904}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6984000, "num_steps_sampled": 3724800, "sample_time_ms": 21912.426, "load_time_ms": 37.292, "grad_time_ms": 10653.236, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003775561461225152, "policy_loss": -0.004369485657662153, "vf_loss": 87.11421966552734, "vf_explained_var": 0.7634318470954895, "kl": 0.0017795447492972016, "entropy": 1.1327377557754517, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3724800, "episodes_total": 9312, "training_iteration": 291, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-13-27", "timestamp": 1660256007, "time_this_iter_s": 30.522319793701172, "time_total_s": 14420.586651802063, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14420.586651802063, "timesteps_since_restore": 3724800, "iterations_since_restore": 291, "perf": {"cpu_util_percent": 38.19767441860465, "ram_util_percent": 59.255813953488385}}
-{"episode_reward_max": 639.0, "episode_reward_min": 405.0, "episode_reward_mean": 610.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 305.465}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 187.73, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.16, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 19.04, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.5, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.25, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.77, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.23, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.98, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.46, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.23, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.98, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.23, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.98, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 639.0, 633.0, 633.0, 633.0, 567.0, 636.0, 633.0, 633.0, 636.0, 636.0, 558.0, 584.0, 639.0, 639.0, 633.0, 567.0, 590.0, 636.0, 633.0, 636.0, 630.0, 582.0, 630.0, 636.0, 558.0, 639.0, 584.0, 587.0, 544.0, 636.0, 633.0, 630.0, 582.0, 636.0, 639.0, 590.0, 633.0, 584.0, 636.0, 630.0, 630.0, 633.0, 587.0, 636.0, 630.0, 582.0, 405.0, 633.0, 630.0, 627.0, 579.0, 576.0, 587.0, 582.0, 636.0, 636.0, 618.0, 636.0, 630.0, 636.0, 624.0, 639.0, 633.0, 630.0, 587.0, 587.0, 630.0, 544.0, 633.0, 627.0, 618.0, 636.0, 627.0, 581.0, 582.0, 639.0, 587.0, 639.0, 573.0, 633.0, 636.0, 587.0, 639.0, 564.0, 639.0, 630.0, 633.0, 558.0, 582.0, 630.0, 630.0, 587.0, 624.0, 636.0, 582.0, 587.0, 633.0, 587.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 325.0, 314.0, 319.0, 314.0, 309.0, 324.0, 311.0, 322.0, 277.0, 290.0, 316.0, 320.0, 325.0, 308.0, 312.0, 321.0, 327.0, 309.0, 318.0, 318.0, 274.0, 284.0, 288.0, 296.0, 324.0, 315.0, 319.0, 320.0, 324.0, 309.0, 279.0, 288.0, 301.0, 289.0, 312.0, 324.0, 311.0, 322.0, 319.0, 317.0, 321.0, 309.0, 291.0, 291.0, 319.0, 311.0, 317.0, 319.0, 273.0, 285.0, 322.0, 317.0, 299.0, 285.0, 295.0, 292.0, 275.0, 269.0, 319.0, 317.0, 317.0, 316.0, 308.0, 322.0, 286.0, 296.0, 324.0, 312.0, 321.0, 318.0, 293.0, 297.0, 316.0, 317.0, 290.0, 294.0, 317.0, 319.0, 311.0, 319.0, 323.0, 307.0, 316.0, 317.0, 292.0, 295.0, 319.0, 317.0, 313.0, 317.0, 281.0, 301.0, 198.0, 207.0, 313.0, 320.0, 310.0, 320.0, 311.0, 316.0, 282.0, 297.0, 285.0, 291.0, 309.0, 278.0, 293.0, 289.0, 311.0, 325.0, 321.0, 315.0, 302.0, 316.0, 314.0, 322.0, 311.0, 319.0, 319.0, 317.0, 319.0, 305.0, 319.0, 320.0, 322.0, 311.0, 326.0, 304.0, 301.0, 286.0, 293.0, 294.0, 313.0, 317.0, 267.0, 277.0, 319.0, 314.0, 311.0, 316.0, 315.0, 303.0, 309.0, 327.0, 305.0, 322.0, 284.0, 297.0, 290.0, 292.0, 311.0, 328.0, 288.0, 299.0, 317.0, 322.0, 292.0, 281.0, 316.0, 317.0, 324.0, 312.0, 293.0, 294.0, 322.0, 317.0, 287.0, 277.0, 319.0, 320.0, 318.0, 312.0, 322.0, 311.0, 292.0, 266.0, 288.0, 294.0, 310.0, 320.0, 314.0, 316.0, 293.0, 294.0, 311.0, 313.0, 319.0, 317.0, 289.0, 293.0, 295.0, 292.0, 316.0, 317.0, 306.0, 281.0, 283.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9654876642180179, "mean_processing_ms": 0.26135729355980103, "mean_inference_ms": 1.545357165029807}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7008000, "num_steps_sampled": 3737600, "sample_time_ms": 21647.86, "load_time_ms": 37.443, "grad_time_ms": 10160.016, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012383932480588555, "policy_loss": -0.006961038801819086, "vf_loss": 87.61873626708984, "vf_explained_var": 0.757759153842926, "kl": 0.001912236213684082, "entropy": 1.1248730421066284, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3737600, "episodes_total": 9344, "training_iteration": 292, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-13-55", "timestamp": 1660256035, "time_this_iter_s": 27.889997720718384, "time_total_s": 14448.476649522781, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14448.476649522781, "timesteps_since_restore": 3737600, "iterations_since_restore": 292, "perf": {"cpu_util_percent": 31.551282051282044, "ram_util_percent": 59.09743589743588}}
-{"episode_reward_max": 639.0, "episode_reward_min": 339.0, "episode_reward_mean": 610.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 167.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 305.435}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.67, "shaped_reward_min": 99, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 19.18, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.39, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.87, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.29, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.96, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.65, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.29, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.96, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.29, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.96, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [555.0, 633.0, 564.0, 639.0, 581.0, 636.0, 630.0, 639.0, 630.0, 590.0, 639.0, 636.0, 639.0, 627.0, 630.0, 639.0, 639.0, 636.0, 633.0, 630.0, 636.0, 639.0, 630.0, 639.0, 584.0, 633.0, 633.0, 636.0, 339.0, 579.0, 579.0, 587.0, 630.0, 587.0, 587.0, 630.0, 544.0, 633.0, 627.0, 618.0, 636.0, 627.0, 581.0, 582.0, 639.0, 587.0, 639.0, 573.0, 633.0, 636.0, 587.0, 639.0, 564.0, 639.0, 630.0, 633.0, 558.0, 582.0, 630.0, 630.0, 587.0, 624.0, 636.0, 582.0, 587.0, 633.0, 587.0, 576.0, 582.0, 639.0, 633.0, 633.0, 633.0, 567.0, 636.0, 633.0, 633.0, 636.0, 636.0, 558.0, 584.0, 639.0, 639.0, 633.0, 567.0, 590.0, 636.0, 633.0, 636.0, 630.0, 582.0, 630.0, 636.0, 558.0, 639.0, 584.0, 587.0, 544.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 270.0, 316.0, 317.0, 293.0, 271.0, 311.0, 328.0, 295.0, 286.0, 324.0, 312.0, 321.0, 309.0, 316.0, 323.0, 311.0, 319.0, 299.0, 291.0, 319.0, 320.0, 314.0, 322.0, 321.0, 318.0, 316.0, 311.0, 311.0, 319.0, 315.0, 324.0, 314.0, 325.0, 316.0, 320.0, 311.0, 322.0, 300.0, 330.0, 319.0, 317.0, 319.0, 320.0, 319.0, 311.0, 314.0, 325.0, 288.0, 296.0, 318.0, 315.0, 319.0, 314.0, 316.0, 320.0, 167.0, 172.0, 288.0, 291.0, 285.0, 294.0, 293.0, 294.0, 326.0, 304.0, 301.0, 286.0, 293.0, 294.0, 313.0, 317.0, 267.0, 277.0, 319.0, 314.0, 311.0, 316.0, 315.0, 303.0, 309.0, 327.0, 305.0, 322.0, 284.0, 297.0, 290.0, 292.0, 311.0, 328.0, 288.0, 299.0, 317.0, 322.0, 292.0, 281.0, 316.0, 317.0, 324.0, 312.0, 293.0, 294.0, 322.0, 317.0, 287.0, 277.0, 319.0, 320.0, 318.0, 312.0, 322.0, 311.0, 292.0, 266.0, 288.0, 294.0, 310.0, 320.0, 314.0, 316.0, 293.0, 294.0, 311.0, 313.0, 319.0, 317.0, 289.0, 293.0, 295.0, 292.0, 316.0, 317.0, 306.0, 281.0, 283.0, 293.0, 289.0, 293.0, 325.0, 314.0, 319.0, 314.0, 309.0, 324.0, 311.0, 322.0, 277.0, 290.0, 316.0, 320.0, 325.0, 308.0, 312.0, 321.0, 327.0, 309.0, 318.0, 318.0, 274.0, 284.0, 288.0, 296.0, 324.0, 315.0, 319.0, 320.0, 324.0, 309.0, 279.0, 288.0, 301.0, 289.0, 312.0, 324.0, 311.0, 322.0, 319.0, 317.0, 321.0, 309.0, 291.0, 291.0, 319.0, 311.0, 317.0, 319.0, 273.0, 285.0, 322.0, 317.0, 299.0, 285.0, 295.0, 292.0, 275.0, 269.0, 319.0, 317.0, 317.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9637668687735891, "mean_processing_ms": 0.2610164264718474, "mean_inference_ms": 1.5436541723929016}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7032000, "num_steps_sampled": 3750400, "sample_time_ms": 21753.951, "load_time_ms": 37.441, "grad_time_ms": 10065.741, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0023420238867402077, "policy_loss": -0.005699212197214365, "vf_loss": 86.0804214477539, "vf_explained_var": 0.7711854577064514, "kl": 0.0016376747516915202, "entropy": 1.1336089372634888, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3750400, "episodes_total": 9376, "training_iteration": 293, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-14-28", "timestamp": 1660256068, "time_this_iter_s": 33.79537034034729, "time_total_s": 14482.272019863129, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14482.272019863129, "timesteps_since_restore": 3750400, "iterations_since_restore": 293, "perf": {"cpu_util_percent": 29.666666666666668, "ram_util_percent": 58.67083333333335}}
-{"episode_reward_max": 639.0, "episode_reward_min": 339.0, "episode_reward_mean": 615.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 167.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 307.66}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.52, "shaped_reward_min": 99, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.26, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 19.3, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.68, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.56, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.84, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.12, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.37, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.74, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.68, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.12, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.12, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 633.0, 636.0, 636.0, 630.0, 636.0, 639.0, 630.0, 630.0, 630.0, 633.0, 630.0, 630.0, 582.0, 627.0, 633.0, 636.0, 636.0, 576.0, 630.0, 627.0, 627.0, 636.0, 561.0, 579.0, 636.0, 639.0, 639.0, 633.0, 630.0, 569.0, 587.0, 633.0, 587.0, 576.0, 582.0, 639.0, 633.0, 633.0, 633.0, 567.0, 636.0, 633.0, 633.0, 636.0, 636.0, 558.0, 584.0, 639.0, 639.0, 633.0, 567.0, 590.0, 636.0, 633.0, 636.0, 630.0, 582.0, 630.0, 636.0, 558.0, 639.0, 584.0, 587.0, 544.0, 636.0, 633.0, 555.0, 633.0, 564.0, 639.0, 581.0, 636.0, 630.0, 639.0, 630.0, 590.0, 639.0, 636.0, 639.0, 627.0, 630.0, 639.0, 639.0, 636.0, 633.0, 630.0, 636.0, 639.0, 630.0, 639.0, 584.0, 633.0, 633.0, 636.0, 339.0, 579.0, 579.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 306.0, 330.0, 327.0, 306.0, 319.0, 317.0, 324.0, 312.0, 318.0, 312.0, 314.0, 322.0, 314.0, 325.0, 319.0, 311.0, 313.0, 317.0, 316.0, 314.0, 313.0, 320.0, 313.0, 317.0, 316.0, 314.0, 299.0, 283.0, 320.0, 307.0, 312.0, 321.0, 322.0, 314.0, 319.0, 317.0, 288.0, 288.0, 312.0, 318.0, 316.0, 311.0, 313.0, 314.0, 314.0, 322.0, 279.0, 282.0, 299.0, 280.0, 314.0, 322.0, 319.0, 320.0, 319.0, 320.0, 316.0, 317.0, 318.0, 312.0, 288.0, 281.0, 295.0, 292.0, 316.0, 317.0, 306.0, 281.0, 283.0, 293.0, 289.0, 293.0, 325.0, 314.0, 319.0, 314.0, 309.0, 324.0, 311.0, 322.0, 277.0, 290.0, 316.0, 320.0, 325.0, 308.0, 312.0, 321.0, 327.0, 309.0, 318.0, 318.0, 274.0, 284.0, 288.0, 296.0, 324.0, 315.0, 319.0, 320.0, 324.0, 309.0, 279.0, 288.0, 301.0, 289.0, 312.0, 324.0, 311.0, 322.0, 319.0, 317.0, 321.0, 309.0, 291.0, 291.0, 319.0, 311.0, 317.0, 319.0, 273.0, 285.0, 322.0, 317.0, 299.0, 285.0, 295.0, 292.0, 275.0, 269.0, 319.0, 317.0, 317.0, 316.0, 285.0, 270.0, 316.0, 317.0, 293.0, 271.0, 311.0, 328.0, 295.0, 286.0, 324.0, 312.0, 321.0, 309.0, 316.0, 323.0, 311.0, 319.0, 299.0, 291.0, 319.0, 320.0, 314.0, 322.0, 321.0, 318.0, 316.0, 311.0, 311.0, 319.0, 315.0, 324.0, 314.0, 325.0, 316.0, 320.0, 311.0, 322.0, 300.0, 330.0, 319.0, 317.0, 319.0, 320.0, 319.0, 311.0, 314.0, 325.0, 288.0, 296.0, 318.0, 315.0, 319.0, 314.0, 316.0, 320.0, 167.0, 172.0, 288.0, 291.0, 285.0, 294.0, 293.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9620401402665292, "mean_processing_ms": 0.2606729789750365, "mean_inference_ms": 1.5418645190919325}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7056000, "num_steps_sampled": 3763200, "sample_time_ms": 21541.312, "load_time_ms": 37.573, "grad_time_ms": 10164.498, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0031338699627667665, "policy_loss": -0.004722007550299168, "vf_loss": 84.24658966064453, "vf_explained_var": 0.7650328278541565, "kl": 0.0023102371487766504, "entropy": 1.1375713348388672, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3763200, "episodes_total": 9408, "training_iteration": 294, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-15-00", "timestamp": 1660256100, "time_this_iter_s": 31.67550492286682, "time_total_s": 14513.947524785995, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14513.947524785995, "timesteps_since_restore": 3763200, "iterations_since_restore": 294, "perf": {"cpu_util_percent": 30.09333333333334, "ram_util_percent": 58.68444444444443}}
-{"episode_reward_max": 639.0, "episode_reward_min": 339.0, "episode_reward_mean": 616.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 167.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 308.195}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 189.19, "shaped_reward_min": 99, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.11, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 19.41, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.56, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.64, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.79, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.18, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.3, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.48, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.85, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.79, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.18, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.3, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.18, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.3, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 633.0, 573.0, 596.0, 636.0, 633.0, 630.0, 627.0, 636.0, 630.0, 639.0, 630.0, 539.0, 633.0, 633.0, 630.0, 639.0, 630.0, 587.0, 587.0, 633.0, 633.0, 639.0, 639.0, 413.0, 633.0, 636.0, 636.0, 587.0, 636.0, 627.0, 633.0, 587.0, 544.0, 636.0, 633.0, 555.0, 633.0, 564.0, 639.0, 581.0, 636.0, 630.0, 639.0, 630.0, 590.0, 639.0, 636.0, 639.0, 627.0, 630.0, 639.0, 639.0, 636.0, 633.0, 630.0, 636.0, 639.0, 630.0, 639.0, 584.0, 633.0, 633.0, 636.0, 339.0, 579.0, 579.0, 587.0, 630.0, 636.0, 633.0, 636.0, 636.0, 630.0, 636.0, 639.0, 630.0, 630.0, 630.0, 633.0, 630.0, 630.0, 582.0, 627.0, 633.0, 636.0, 636.0, 576.0, 630.0, 627.0, 627.0, 636.0, 561.0, 579.0, 636.0, 639.0, 639.0, 633.0, 630.0, 569.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 320.0, 317.0, 316.0, 288.0, 285.0, 293.0, 303.0, 324.0, 312.0, 321.0, 312.0, 322.0, 308.0, 313.0, 314.0, 316.0, 320.0, 321.0, 309.0, 325.0, 314.0, 317.0, 313.0, 265.0, 274.0, 313.0, 320.0, 319.0, 314.0, 317.0, 313.0, 317.0, 322.0, 321.0, 309.0, 306.0, 281.0, 292.0, 295.0, 310.0, 323.0, 319.0, 314.0, 319.0, 320.0, 319.0, 320.0, 199.0, 214.0, 321.0, 312.0, 319.0, 317.0, 319.0, 317.0, 295.0, 292.0, 322.0, 314.0, 308.0, 319.0, 317.0, 316.0, 295.0, 292.0, 275.0, 269.0, 319.0, 317.0, 317.0, 316.0, 285.0, 270.0, 316.0, 317.0, 293.0, 271.0, 311.0, 328.0, 295.0, 286.0, 324.0, 312.0, 321.0, 309.0, 316.0, 323.0, 311.0, 319.0, 299.0, 291.0, 319.0, 320.0, 314.0, 322.0, 321.0, 318.0, 316.0, 311.0, 311.0, 319.0, 315.0, 324.0, 314.0, 325.0, 316.0, 320.0, 311.0, 322.0, 300.0, 330.0, 319.0, 317.0, 319.0, 320.0, 319.0, 311.0, 314.0, 325.0, 288.0, 296.0, 318.0, 315.0, 319.0, 314.0, 316.0, 320.0, 167.0, 172.0, 288.0, 291.0, 285.0, 294.0, 293.0, 294.0, 316.0, 314.0, 306.0, 330.0, 327.0, 306.0, 319.0, 317.0, 324.0, 312.0, 318.0, 312.0, 314.0, 322.0, 314.0, 325.0, 319.0, 311.0, 313.0, 317.0, 316.0, 314.0, 313.0, 320.0, 313.0, 317.0, 316.0, 314.0, 299.0, 283.0, 320.0, 307.0, 312.0, 321.0, 322.0, 314.0, 319.0, 317.0, 288.0, 288.0, 312.0, 318.0, 316.0, 311.0, 313.0, 314.0, 314.0, 322.0, 279.0, 282.0, 299.0, 280.0, 314.0, 322.0, 319.0, 320.0, 319.0, 320.0, 316.0, 317.0, 318.0, 312.0, 288.0, 281.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9603539643629071, "mean_processing_ms": 0.26034177347662363, "mean_inference_ms": 1.5408082131746255}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7080000, "num_steps_sampled": 3776000, "sample_time_ms": 22187.522, "load_time_ms": 38.247, "grad_time_ms": 10375.626, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0001668116747168824, "policy_loss": -0.007916351780295372, "vf_loss": 83.1385726928711, "vf_explained_var": 0.7759819626808167, "kl": 0.0019673772621899843, "entropy": 1.1286202669143677, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3776000, "episodes_total": 9440, "training_iteration": 295, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-15-42", "timestamp": 1660256142, "time_this_iter_s": 41.61074709892273, "time_total_s": 14555.558271884918, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14555.558271884918, "timesteps_since_restore": 3776000, "iterations_since_restore": 295, "perf": {"cpu_util_percent": 32.182758620689654, "ram_util_percent": 58.76206896551724}}
-{"episode_reward_max": 639.0, "episode_reward_min": 339.0, "episode_reward_mean": 614.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 167.0}, "policy_reward_max": {"ppo": 333.0}, "policy_reward_mean": {"ppo": 307.085}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.57, "shaped_reward_min": 99, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.06, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 19.21, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.56, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.39, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.18, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.14, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.47, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.35, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.83, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.72, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.18, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.14, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.18, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.14, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [618.0, 639.0, 633.0, 630.0, 639.0, 630.0, 636.0, 587.0, 633.0, 639.0, 587.0, 630.0, 575.0, 473.0, 633.0, 627.0, 636.0, 639.0, 636.0, 636.0, 633.0, 633.0, 639.0, 630.0, 433.0, 630.0, 633.0, 636.0, 633.0, 639.0, 576.0, 582.0, 339.0, 579.0, 579.0, 587.0, 630.0, 636.0, 633.0, 636.0, 636.0, 630.0, 636.0, 639.0, 630.0, 630.0, 630.0, 633.0, 630.0, 630.0, 582.0, 627.0, 633.0, 636.0, 636.0, 576.0, 630.0, 627.0, 627.0, 636.0, 561.0, 579.0, 636.0, 639.0, 639.0, 633.0, 630.0, 569.0, 639.0, 633.0, 573.0, 596.0, 636.0, 633.0, 630.0, 627.0, 636.0, 630.0, 639.0, 630.0, 539.0, 633.0, 633.0, 630.0, 639.0, 630.0, 587.0, 587.0, 633.0, 633.0, 639.0, 639.0, 413.0, 633.0, 636.0, 636.0, 587.0, 636.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [300.0, 318.0, 317.0, 322.0, 324.0, 309.0, 316.0, 314.0, 322.0, 317.0, 316.0, 314.0, 320.0, 316.0, 290.0, 297.0, 319.0, 314.0, 317.0, 322.0, 293.0, 294.0, 312.0, 318.0, 284.0, 291.0, 237.0, 236.0, 319.0, 314.0, 317.0, 310.0, 322.0, 314.0, 306.0, 333.0, 319.0, 317.0, 316.0, 320.0, 316.0, 317.0, 318.0, 315.0, 317.0, 322.0, 318.0, 312.0, 218.0, 215.0, 319.0, 311.0, 314.0, 319.0, 316.0, 320.0, 319.0, 314.0, 319.0, 320.0, 286.0, 290.0, 295.0, 287.0, 167.0, 172.0, 288.0, 291.0, 285.0, 294.0, 293.0, 294.0, 316.0, 314.0, 306.0, 330.0, 327.0, 306.0, 319.0, 317.0, 324.0, 312.0, 318.0, 312.0, 314.0, 322.0, 314.0, 325.0, 319.0, 311.0, 313.0, 317.0, 316.0, 314.0, 313.0, 320.0, 313.0, 317.0, 316.0, 314.0, 299.0, 283.0, 320.0, 307.0, 312.0, 321.0, 322.0, 314.0, 319.0, 317.0, 288.0, 288.0, 312.0, 318.0, 316.0, 311.0, 313.0, 314.0, 314.0, 322.0, 279.0, 282.0, 299.0, 280.0, 314.0, 322.0, 319.0, 320.0, 319.0, 320.0, 316.0, 317.0, 318.0, 312.0, 288.0, 281.0, 319.0, 320.0, 317.0, 316.0, 288.0, 285.0, 293.0, 303.0, 324.0, 312.0, 321.0, 312.0, 322.0, 308.0, 313.0, 314.0, 316.0, 320.0, 321.0, 309.0, 325.0, 314.0, 317.0, 313.0, 265.0, 274.0, 313.0, 320.0, 319.0, 314.0, 317.0, 313.0, 317.0, 322.0, 321.0, 309.0, 306.0, 281.0, 292.0, 295.0, 310.0, 323.0, 319.0, 314.0, 319.0, 320.0, 319.0, 320.0, 199.0, 214.0, 321.0, 312.0, 319.0, 317.0, 319.0, 317.0, 295.0, 292.0, 322.0, 314.0, 308.0, 319.0, 317.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9586694910648496, "mean_processing_ms": 0.26000987158476824, "mean_inference_ms": 1.539770678675762}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7104000, "num_steps_sampled": 3788800, "sample_time_ms": 22271.193, "load_time_ms": 38.04, "grad_time_ms": 10225.976, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004963720217347145, "policy_loss": -0.003322723088786006, "vf_loss": 88.4856948852539, "vf_explained_var": 0.7634937167167664, "kl": 0.0021246925462037325, "entropy": 1.1242562532424927, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3788800, "episodes_total": 9472, "training_iteration": 296, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-16-13", "timestamp": 1660256173, "time_this_iter_s": 30.998157024383545, "time_total_s": 14586.556428909302, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14586.556428909302, "timesteps_since_restore": 3788800, "iterations_since_restore": 296, "perf": {"cpu_util_percent": 31.388636363636365, "ram_util_percent": 58.774999999999984}}
-{"episode_reward_max": 639.0, "episode_reward_min": 413.0, "episode_reward_mean": 616.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 199.0}, "policy_reward_max": {"ppo": 333.0}, "policy_reward_mean": {"ppo": 308.395}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 189.99, "shaped_reward_min": 133, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.23, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 19.2, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.8, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.4, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.34, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.16, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.77, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.78, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.34, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.16, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.34, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.16, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 579.0, 639.0, 636.0, 633.0, 630.0, 582.0, 636.0, 633.0, 636.0, 636.0, 639.0, 579.0, 584.0, 630.0, 639.0, 627.0, 527.0, 630.0, 582.0, 630.0, 639.0, 636.0, 633.0, 639.0, 639.0, 627.0, 579.0, 627.0, 639.0, 639.0, 639.0, 639.0, 633.0, 630.0, 569.0, 639.0, 633.0, 573.0, 596.0, 636.0, 633.0, 630.0, 627.0, 636.0, 630.0, 639.0, 630.0, 539.0, 633.0, 633.0, 630.0, 639.0, 630.0, 587.0, 587.0, 633.0, 633.0, 639.0, 639.0, 413.0, 633.0, 636.0, 636.0, 587.0, 636.0, 627.0, 633.0, 618.0, 639.0, 633.0, 630.0, 639.0, 630.0, 636.0, 587.0, 633.0, 639.0, 587.0, 630.0, 575.0, 473.0, 633.0, 627.0, 636.0, 639.0, 636.0, 636.0, 633.0, 633.0, 639.0, 630.0, 433.0, 630.0, 633.0, 636.0, 633.0, 639.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 294.0, 278.0, 301.0, 316.0, 323.0, 317.0, 319.0, 307.0, 326.0, 315.0, 315.0, 290.0, 292.0, 325.0, 311.0, 316.0, 317.0, 314.0, 322.0, 316.0, 320.0, 313.0, 326.0, 291.0, 288.0, 294.0, 290.0, 310.0, 320.0, 320.0, 319.0, 314.0, 313.0, 258.0, 269.0, 317.0, 313.0, 296.0, 286.0, 321.0, 309.0, 312.0, 327.0, 317.0, 319.0, 321.0, 312.0, 314.0, 325.0, 314.0, 325.0, 316.0, 311.0, 298.0, 281.0, 322.0, 305.0, 317.0, 322.0, 319.0, 320.0, 317.0, 322.0, 319.0, 320.0, 316.0, 317.0, 318.0, 312.0, 288.0, 281.0, 319.0, 320.0, 317.0, 316.0, 288.0, 285.0, 293.0, 303.0, 324.0, 312.0, 321.0, 312.0, 322.0, 308.0, 313.0, 314.0, 316.0, 320.0, 321.0, 309.0, 325.0, 314.0, 317.0, 313.0, 265.0, 274.0, 313.0, 320.0, 319.0, 314.0, 317.0, 313.0, 317.0, 322.0, 321.0, 309.0, 306.0, 281.0, 292.0, 295.0, 310.0, 323.0, 319.0, 314.0, 319.0, 320.0, 319.0, 320.0, 199.0, 214.0, 321.0, 312.0, 319.0, 317.0, 319.0, 317.0, 295.0, 292.0, 322.0, 314.0, 308.0, 319.0, 317.0, 316.0, 300.0, 318.0, 317.0, 322.0, 324.0, 309.0, 316.0, 314.0, 322.0, 317.0, 316.0, 314.0, 320.0, 316.0, 290.0, 297.0, 319.0, 314.0, 317.0, 322.0, 293.0, 294.0, 312.0, 318.0, 284.0, 291.0, 237.0, 236.0, 319.0, 314.0, 317.0, 310.0, 322.0, 314.0, 306.0, 333.0, 319.0, 317.0, 316.0, 320.0, 316.0, 317.0, 318.0, 315.0, 317.0, 322.0, 318.0, 312.0, 218.0, 215.0, 319.0, 311.0, 314.0, 319.0, 316.0, 320.0, 319.0, 314.0, 319.0, 320.0, 286.0, 290.0, 295.0, 287.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9570006029896723, "mean_processing_ms": 0.25968048709196123, "mean_inference_ms": 1.5389220446317904}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7128000, "num_steps_sampled": 3801600, "sample_time_ms": 22480.496, "load_time_ms": 38.114, "grad_time_ms": 10354.328, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004928060807287693, "policy_loss": -0.0034768336918205023, "vf_loss": 89.6873550415039, "vf_explained_var": 0.7655234336853027, "kl": 0.0019178093643859029, "entropy": 1.1276906728744507, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3801600, "episodes_total": 9504, "training_iteration": 297, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-16-48", "timestamp": 1660256208, "time_this_iter_s": 34.984565019607544, "time_total_s": 14621.54099392891, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14621.54099392891, "timesteps_since_restore": 3801600, "iterations_since_restore": 297, "perf": {"cpu_util_percent": 30.266, "ram_util_percent": 58.788000000000004}}
-{"episode_reward_max": 639.0, "episode_reward_min": 433.0, "episode_reward_mean": 615.99, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 215.0}, "policy_reward_max": {"ppo": 333.0}, "policy_reward_mean": {"ppo": 307.995}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 189.59, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.45, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.92, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.96, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.16, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.47, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.99, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.76, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.47, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.99, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.47, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.99, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 624.0, 576.0, 639.0, 636.0, 630.0, 522.0, 587.0, 639.0, 578.0, 636.0, 584.0, 639.0, 522.0, 630.0, 633.0, 633.0, 576.0, 567.0, 633.0, 636.0, 636.0, 587.0, 627.0, 636.0, 627.0, 639.0, 636.0, 587.0, 630.0, 633.0, 636.0, 587.0, 636.0, 627.0, 633.0, 618.0, 639.0, 633.0, 630.0, 639.0, 630.0, 636.0, 587.0, 633.0, 639.0, 587.0, 630.0, 575.0, 473.0, 633.0, 627.0, 636.0, 639.0, 636.0, 636.0, 633.0, 633.0, 639.0, 630.0, 433.0, 630.0, 633.0, 636.0, 633.0, 639.0, 576.0, 582.0, 587.0, 579.0, 639.0, 636.0, 633.0, 630.0, 582.0, 636.0, 633.0, 636.0, 636.0, 639.0, 579.0, 584.0, 630.0, 639.0, 627.0, 527.0, 630.0, 582.0, 630.0, 639.0, 636.0, 633.0, 639.0, 639.0, 627.0, 579.0, 627.0, 639.0, 639.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 320.0, 312.0, 312.0, 296.0, 280.0, 317.0, 322.0, 313.0, 323.0, 311.0, 319.0, 269.0, 253.0, 296.0, 291.0, 319.0, 320.0, 313.0, 265.0, 319.0, 317.0, 293.0, 291.0, 314.0, 325.0, 254.0, 268.0, 321.0, 309.0, 309.0, 324.0, 314.0, 319.0, 279.0, 297.0, 286.0, 281.0, 311.0, 322.0, 317.0, 319.0, 317.0, 319.0, 286.0, 301.0, 313.0, 314.0, 319.0, 317.0, 316.0, 311.0, 321.0, 318.0, 317.0, 319.0, 291.0, 296.0, 307.0, 323.0, 313.0, 320.0, 323.0, 313.0, 295.0, 292.0, 322.0, 314.0, 308.0, 319.0, 317.0, 316.0, 300.0, 318.0, 317.0, 322.0, 324.0, 309.0, 316.0, 314.0, 322.0, 317.0, 316.0, 314.0, 320.0, 316.0, 290.0, 297.0, 319.0, 314.0, 317.0, 322.0, 293.0, 294.0, 312.0, 318.0, 284.0, 291.0, 237.0, 236.0, 319.0, 314.0, 317.0, 310.0, 322.0, 314.0, 306.0, 333.0, 319.0, 317.0, 316.0, 320.0, 316.0, 317.0, 318.0, 315.0, 317.0, 322.0, 318.0, 312.0, 218.0, 215.0, 319.0, 311.0, 314.0, 319.0, 316.0, 320.0, 319.0, 314.0, 319.0, 320.0, 286.0, 290.0, 295.0, 287.0, 293.0, 294.0, 278.0, 301.0, 316.0, 323.0, 317.0, 319.0, 307.0, 326.0, 315.0, 315.0, 290.0, 292.0, 325.0, 311.0, 316.0, 317.0, 314.0, 322.0, 316.0, 320.0, 313.0, 326.0, 291.0, 288.0, 294.0, 290.0, 310.0, 320.0, 320.0, 319.0, 314.0, 313.0, 258.0, 269.0, 317.0, 313.0, 296.0, 286.0, 321.0, 309.0, 312.0, 327.0, 317.0, 319.0, 321.0, 312.0, 314.0, 325.0, 314.0, 325.0, 316.0, 311.0, 298.0, 281.0, 322.0, 305.0, 317.0, 322.0, 319.0, 320.0, 317.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9553216188480552, "mean_processing_ms": 0.25934597014735267, "mean_inference_ms": 1.5375890964227674}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7152000, "num_steps_sampled": 3814400, "sample_time_ms": 22450.487, "load_time_ms": 38.169, "grad_time_ms": 10228.148, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004324051551520824, "policy_loss": -0.003937617409974337, "vf_loss": 88.24027252197266, "vf_explained_var": 0.7693286538124084, "kl": 0.00227510672993958, "entropy": 1.1247196197509766, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3814400, "episodes_total": 9536, "training_iteration": 298, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-17-18", "timestamp": 1660256238, "time_this_iter_s": 30.79404616355896, "time_total_s": 14652.335040092468, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14652.335040092468, "timesteps_since_restore": 3814400, "iterations_since_restore": 298, "perf": {"cpu_util_percent": 31.34418604651162, "ram_util_percent": 58.76279069767441}}
-{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 615.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 307.765}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.53, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.58, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 19.1, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.95, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.27, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.8, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.43, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.0, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.28, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.73, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 4, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.43, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.0, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.43, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.0, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 630.0, 636.0, 573.0, 630.0, 587.0, 639.0, 582.0, 624.0, 630.0, 639.0, 587.0, 590.0, 639.0, 639.0, 636.0, 633.0, 579.0, 633.0, 633.0, 587.0, 630.0, 630.0, 525.0, 633.0, 636.0, 582.0, 633.0, 581.0, 582.0, 636.0, 633.0, 639.0, 576.0, 582.0, 587.0, 579.0, 639.0, 636.0, 633.0, 630.0, 582.0, 636.0, 633.0, 636.0, 636.0, 639.0, 579.0, 584.0, 630.0, 639.0, 627.0, 527.0, 630.0, 582.0, 630.0, 639.0, 636.0, 633.0, 639.0, 639.0, 627.0, 579.0, 627.0, 639.0, 639.0, 639.0, 639.0, 624.0, 576.0, 639.0, 636.0, 630.0, 522.0, 587.0, 639.0, 578.0, 636.0, 584.0, 639.0, 522.0, 630.0, 633.0, 633.0, 576.0, 567.0, 633.0, 636.0, 636.0, 587.0, 627.0, 636.0, 627.0, 639.0, 636.0, 587.0, 630.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 313.0, 323.0, 313.0, 317.0, 319.0, 317.0, 293.0, 280.0, 316.0, 314.0, 289.0, 298.0, 319.0, 320.0, 288.0, 294.0, 313.0, 311.0, 310.0, 320.0, 321.0, 318.0, 291.0, 296.0, 296.0, 294.0, 312.0, 327.0, 317.0, 322.0, 329.0, 307.0, 319.0, 314.0, 285.0, 294.0, 316.0, 317.0, 316.0, 317.0, 291.0, 296.0, 316.0, 314.0, 313.0, 317.0, 258.0, 267.0, 311.0, 322.0, 314.0, 322.0, 291.0, 291.0, 324.0, 309.0, 291.0, 290.0, 291.0, 291.0, 319.0, 317.0, 319.0, 314.0, 319.0, 320.0, 286.0, 290.0, 295.0, 287.0, 293.0, 294.0, 278.0, 301.0, 316.0, 323.0, 317.0, 319.0, 307.0, 326.0, 315.0, 315.0, 290.0, 292.0, 325.0, 311.0, 316.0, 317.0, 314.0, 322.0, 316.0, 320.0, 313.0, 326.0, 291.0, 288.0, 294.0, 290.0, 310.0, 320.0, 320.0, 319.0, 314.0, 313.0, 258.0, 269.0, 317.0, 313.0, 296.0, 286.0, 321.0, 309.0, 312.0, 327.0, 317.0, 319.0, 321.0, 312.0, 314.0, 325.0, 314.0, 325.0, 316.0, 311.0, 298.0, 281.0, 322.0, 305.0, 317.0, 322.0, 319.0, 320.0, 317.0, 322.0, 319.0, 320.0, 312.0, 312.0, 296.0, 280.0, 317.0, 322.0, 313.0, 323.0, 311.0, 319.0, 269.0, 253.0, 296.0, 291.0, 319.0, 320.0, 313.0, 265.0, 319.0, 317.0, 293.0, 291.0, 314.0, 325.0, 254.0, 268.0, 321.0, 309.0, 309.0, 324.0, 314.0, 319.0, 279.0, 297.0, 286.0, 281.0, 311.0, 322.0, 317.0, 319.0, 317.0, 319.0, 286.0, 301.0, 313.0, 314.0, 319.0, 317.0, 316.0, 311.0, 321.0, 318.0, 317.0, 319.0, 291.0, 296.0, 307.0, 323.0, 313.0, 320.0, 323.0, 313.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9536494258770276, "mean_processing_ms": 0.25901133252507974, "mean_inference_ms": 1.536150189883149}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7176000, "num_steps_sampled": 3827200, "sample_time_ms": 22404.117, "load_time_ms": 38.01, "grad_time_ms": 10081.123, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0014618774875998497, "policy_loss": -0.00924667902290821, "vf_loss": 83.49740600585938, "vf_explained_var": 0.7685635685920715, "kl": 0.0018500644946470857, "entropy": 1.12986421585083, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3827200, "episodes_total": 9568, "training_iteration": 299, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-17-50", "timestamp": 1660256270, "time_this_iter_s": 31.213135242462158, "time_total_s": 14683.54817533493, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14683.54817533493, "timesteps_since_restore": 3827200, "iterations_since_restore": 299, "perf": {"cpu_util_percent": 30.386363636363637, "ram_util_percent": 58.77272727272726}}
-{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 616.85, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.425}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.65, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.79, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.78, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 17.19, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.87, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.8, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.68, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.69, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.77, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.07, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.33, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.27, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.69, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.77, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.69, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.77, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 636.0, 636.0, 579.0, 636.0, 636.0, 582.0, 633.0, 639.0, 582.0, 587.0, 639.0, 630.0, 639.0, 636.0, 627.0, 600.0, 639.0, 630.0, 630.0, 627.0, 639.0, 582.0, 627.0, 587.0, 636.0, 633.0, 636.0, 636.0, 633.0, 630.0, 579.0, 627.0, 639.0, 639.0, 639.0, 639.0, 624.0, 576.0, 639.0, 636.0, 630.0, 522.0, 587.0, 639.0, 578.0, 636.0, 584.0, 639.0, 522.0, 630.0, 633.0, 633.0, 576.0, 567.0, 633.0, 636.0, 636.0, 587.0, 627.0, 636.0, 627.0, 639.0, 636.0, 587.0, 630.0, 633.0, 636.0, 630.0, 636.0, 630.0, 636.0, 573.0, 630.0, 587.0, 639.0, 582.0, 624.0, 630.0, 639.0, 587.0, 590.0, 639.0, 639.0, 636.0, 633.0, 579.0, 633.0, 633.0, 587.0, 630.0, 630.0, 525.0, 633.0, 636.0, 582.0, 633.0, 581.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 291.0, 314.0, 322.0, 314.0, 322.0, 278.0, 301.0, 314.0, 322.0, 316.0, 320.0, 291.0, 291.0, 314.0, 319.0, 319.0, 320.0, 286.0, 296.0, 283.0, 304.0, 320.0, 319.0, 324.0, 306.0, 322.0, 317.0, 318.0, 318.0, 308.0, 319.0, 299.0, 301.0, 321.0, 318.0, 308.0, 322.0, 319.0, 311.0, 308.0, 319.0, 320.0, 319.0, 288.0, 294.0, 319.0, 308.0, 289.0, 298.0, 314.0, 322.0, 317.0, 316.0, 316.0, 320.0, 312.0, 324.0, 319.0, 314.0, 311.0, 319.0, 283.0, 296.0, 322.0, 305.0, 317.0, 322.0, 319.0, 320.0, 317.0, 322.0, 319.0, 320.0, 312.0, 312.0, 296.0, 280.0, 317.0, 322.0, 313.0, 323.0, 311.0, 319.0, 269.0, 253.0, 296.0, 291.0, 319.0, 320.0, 313.0, 265.0, 319.0, 317.0, 293.0, 291.0, 314.0, 325.0, 254.0, 268.0, 321.0, 309.0, 309.0, 324.0, 314.0, 319.0, 279.0, 297.0, 286.0, 281.0, 311.0, 322.0, 317.0, 319.0, 317.0, 319.0, 286.0, 301.0, 313.0, 314.0, 319.0, 317.0, 316.0, 311.0, 321.0, 318.0, 317.0, 319.0, 291.0, 296.0, 307.0, 323.0, 313.0, 320.0, 323.0, 313.0, 316.0, 314.0, 313.0, 323.0, 313.0, 317.0, 319.0, 317.0, 293.0, 280.0, 316.0, 314.0, 289.0, 298.0, 319.0, 320.0, 288.0, 294.0, 313.0, 311.0, 310.0, 320.0, 321.0, 318.0, 291.0, 296.0, 296.0, 294.0, 312.0, 327.0, 317.0, 322.0, 329.0, 307.0, 319.0, 314.0, 285.0, 294.0, 316.0, 317.0, 316.0, 317.0, 291.0, 296.0, 316.0, 314.0, 313.0, 317.0, 258.0, 267.0, 311.0, 322.0, 314.0, 322.0, 291.0, 291.0, 324.0, 309.0, 291.0, 290.0, 291.0, 291.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9519895892120465, "mean_processing_ms": 0.2586797182005827, "mean_inference_ms": 1.5346818190110434}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7200000, "num_steps_sampled": 3840000, "sample_time_ms": 22522.727, "load_time_ms": 37.151, "grad_time_ms": 10013.326, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0025208042934536934, "policy_loss": -0.005239995662122965, "vf_loss": 83.27434539794922, "vf_explained_var": 0.7729237675666809, "kl": 0.0018271300941705704, "entropy": 1.133251667022705, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3840000, "episodes_total": 9600, "training_iteration": 300, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-18-22", "timestamp": 1660256302, "time_this_iter_s": 32.498526096343994, "time_total_s": 14716.046701431274, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14716.046701431274, "timesteps_since_restore": 3840000, "iterations_since_restore": 300, "perf": {"cpu_util_percent": 33.44782608695652, "ram_util_percent": 58.9717391304348}}
-{"episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 617.85, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.925}, "custom_metrics": {"sparse_reward_mean": 214.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.85, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.53, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.86, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 17.02, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.0, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.58, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.1, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.86, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.26, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.31, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.58, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.58, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 639.0, 630.0, 587.0, 627.0, 639.0, 633.0, 573.0, 627.0, 633.0, 576.0, 639.0, 630.0, 564.0, 633.0, 627.0, 636.0, 636.0, 627.0, 633.0, 579.0, 639.0, 636.0, 639.0, 576.0, 639.0, 636.0, 639.0, 587.0, 581.0, 582.0, 639.0, 587.0, 630.0, 633.0, 636.0, 630.0, 636.0, 630.0, 636.0, 573.0, 630.0, 587.0, 639.0, 582.0, 624.0, 630.0, 639.0, 587.0, 590.0, 639.0, 639.0, 636.0, 633.0, 579.0, 633.0, 633.0, 587.0, 630.0, 630.0, 525.0, 633.0, 636.0, 582.0, 633.0, 581.0, 582.0, 636.0, 587.0, 636.0, 636.0, 579.0, 636.0, 636.0, 582.0, 633.0, 639.0, 582.0, 587.0, 639.0, 630.0, 639.0, 636.0, 627.0, 600.0, 639.0, 630.0, 630.0, 627.0, 639.0, 582.0, 627.0, 587.0, 636.0, 633.0, 636.0, 636.0, 633.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 322.0, 317.0, 316.0, 314.0, 288.0, 299.0, 305.0, 322.0, 319.0, 320.0, 329.0, 304.0, 278.0, 295.0, 311.0, 316.0, 316.0, 317.0, 290.0, 286.0, 317.0, 322.0, 321.0, 309.0, 280.0, 284.0, 322.0, 311.0, 314.0, 313.0, 326.0, 310.0, 314.0, 322.0, 311.0, 316.0, 316.0, 317.0, 288.0, 291.0, 311.0, 328.0, 317.0, 319.0, 319.0, 320.0, 289.0, 287.0, 317.0, 322.0, 320.0, 316.0, 327.0, 312.0, 277.0, 310.0, 298.0, 283.0, 293.0, 289.0, 317.0, 322.0, 291.0, 296.0, 307.0, 323.0, 313.0, 320.0, 323.0, 313.0, 316.0, 314.0, 313.0, 323.0, 313.0, 317.0, 319.0, 317.0, 293.0, 280.0, 316.0, 314.0, 289.0, 298.0, 319.0, 320.0, 288.0, 294.0, 313.0, 311.0, 310.0, 320.0, 321.0, 318.0, 291.0, 296.0, 296.0, 294.0, 312.0, 327.0, 317.0, 322.0, 329.0, 307.0, 319.0, 314.0, 285.0, 294.0, 316.0, 317.0, 316.0, 317.0, 291.0, 296.0, 316.0, 314.0, 313.0, 317.0, 258.0, 267.0, 311.0, 322.0, 314.0, 322.0, 291.0, 291.0, 324.0, 309.0, 291.0, 290.0, 291.0, 291.0, 319.0, 317.0, 296.0, 291.0, 314.0, 322.0, 314.0, 322.0, 278.0, 301.0, 314.0, 322.0, 316.0, 320.0, 291.0, 291.0, 314.0, 319.0, 319.0, 320.0, 286.0, 296.0, 283.0, 304.0, 320.0, 319.0, 324.0, 306.0, 322.0, 317.0, 318.0, 318.0, 308.0, 319.0, 299.0, 301.0, 321.0, 318.0, 308.0, 322.0, 319.0, 311.0, 308.0, 319.0, 320.0, 319.0, 288.0, 294.0, 319.0, 308.0, 289.0, 298.0, 314.0, 322.0, 317.0, 316.0, 316.0, 320.0, 312.0, 324.0, 319.0, 314.0, 311.0, 319.0, 283.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9503424273012522, "mean_processing_ms": 0.25835032935285485, "mean_inference_ms": 1.5332766289322552}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7224000, "num_steps_sampled": 3852800, "sample_time_ms": 22579.338, "load_time_ms": 37.118, "grad_time_ms": 10147.106, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0018557598814368248, "policy_loss": -0.005808284040540457, "vf_loss": 82.31928253173828, "vf_explained_var": 0.7716462016105652, "kl": 0.001915976870805025, "entropy": 1.1357669830322266, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3852800, "episodes_total": 9632, "training_iteration": 301, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-18-56", "timestamp": 1660256336, "time_this_iter_s": 32.42415189743042, "time_total_s": 14748.470853328705, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14748.470853328705, "timesteps_since_restore": 3852800, "iterations_since_restore": 301, "perf": {"cpu_util_percent": 32.14468085106383, "ram_util_percent": 59.210638297872315}}
-{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 616.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 232.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 308.055}, "custom_metrics": {"sparse_reward_mean": 213.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.51, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.56, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.65, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 17.05, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.71, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.69, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.01, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.41, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.33, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.71, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.69, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.71, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.69, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 621.0, 582.0, 636.0, 630.0, 633.0, 527.0, 627.0, 582.0, 633.0, 603.0, 630.0, 639.0, 624.0, 630.0, 636.0, 639.0, 465.0, 639.0, 636.0, 570.0, 630.0, 567.0, 633.0, 582.0, 630.0, 633.0, 636.0, 563.0, 636.0, 582.0, 636.0, 633.0, 581.0, 582.0, 636.0, 587.0, 636.0, 636.0, 579.0, 636.0, 636.0, 582.0, 633.0, 639.0, 582.0, 587.0, 639.0, 630.0, 639.0, 636.0, 627.0, 600.0, 639.0, 630.0, 630.0, 627.0, 639.0, 582.0, 627.0, 587.0, 636.0, 633.0, 636.0, 636.0, 633.0, 630.0, 579.0, 630.0, 639.0, 630.0, 587.0, 627.0, 639.0, 633.0, 573.0, 627.0, 633.0, 576.0, 639.0, 630.0, 564.0, 633.0, 627.0, 636.0, 636.0, 627.0, 633.0, 579.0, 639.0, 636.0, 639.0, 576.0, 639.0, 636.0, 639.0, 587.0, 581.0, 582.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [306.0, 324.0, 318.0, 303.0, 290.0, 292.0, 314.0, 322.0, 317.0, 313.0, 314.0, 319.0, 259.0, 268.0, 305.0, 322.0, 281.0, 301.0, 314.0, 319.0, 302.0, 301.0, 309.0, 321.0, 319.0, 320.0, 311.0, 313.0, 310.0, 320.0, 304.0, 332.0, 325.0, 314.0, 232.0, 233.0, 314.0, 325.0, 311.0, 325.0, 291.0, 279.0, 318.0, 312.0, 277.0, 290.0, 315.0, 318.0, 298.0, 284.0, 309.0, 321.0, 311.0, 322.0, 319.0, 317.0, 279.0, 284.0, 319.0, 317.0, 291.0, 291.0, 324.0, 312.0, 324.0, 309.0, 291.0, 290.0, 291.0, 291.0, 319.0, 317.0, 296.0, 291.0, 314.0, 322.0, 314.0, 322.0, 278.0, 301.0, 314.0, 322.0, 316.0, 320.0, 291.0, 291.0, 314.0, 319.0, 319.0, 320.0, 286.0, 296.0, 283.0, 304.0, 320.0, 319.0, 324.0, 306.0, 322.0, 317.0, 318.0, 318.0, 308.0, 319.0, 299.0, 301.0, 321.0, 318.0, 308.0, 322.0, 319.0, 311.0, 308.0, 319.0, 320.0, 319.0, 288.0, 294.0, 319.0, 308.0, 289.0, 298.0, 314.0, 322.0, 317.0, 316.0, 316.0, 320.0, 312.0, 324.0, 319.0, 314.0, 311.0, 319.0, 283.0, 296.0, 313.0, 317.0, 322.0, 317.0, 316.0, 314.0, 288.0, 299.0, 305.0, 322.0, 319.0, 320.0, 329.0, 304.0, 278.0, 295.0, 311.0, 316.0, 316.0, 317.0, 290.0, 286.0, 317.0, 322.0, 321.0, 309.0, 280.0, 284.0, 322.0, 311.0, 314.0, 313.0, 326.0, 310.0, 314.0, 322.0, 311.0, 316.0, 316.0, 317.0, 288.0, 291.0, 311.0, 328.0, 317.0, 319.0, 319.0, 320.0, 289.0, 287.0, 317.0, 322.0, 320.0, 316.0, 327.0, 312.0, 277.0, 310.0, 298.0, 283.0, 293.0, 289.0, 317.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9487147885298458, "mean_processing_ms": 0.25802520052917743, "mean_inference_ms": 1.5320224616406188}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7248000, "num_steps_sampled": 3865600, "sample_time_ms": 22988.548, "load_time_ms": 36.983, "grad_time_ms": 10329.984, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00113882205914706, "policy_loss": -0.0069201975129544735, "vf_loss": 86.32308959960938, "vf_explained_var": 0.7628341317176819, "kl": 0.0021745546255260706, "entropy": 1.146581768989563, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3865600, "episodes_total": 9664, "training_iteration": 302, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-19-29", "timestamp": 1660256369, "time_this_iter_s": 33.80998110771179, "time_total_s": 14782.280834436417, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14782.280834436417, "timesteps_since_restore": 3865600, "iterations_since_restore": 302, "perf": {"cpu_util_percent": 31.58125, "ram_util_percent": 58.88958333333333}}
-{"episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 610.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 305.09}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 186.58, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.47, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.38, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 17.0, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.63, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.54, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.57, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.29, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.61, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.46, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.97, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.34, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.28, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.61, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.46, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.61, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.46, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 630.0, 633.0, 576.0, 630.0, 633.0, 621.0, 633.0, 633.0, 582.0, 123.0, 636.0, 633.0, 582.0, 627.0, 639.0, 633.0, 639.0, 587.0, 633.0, 630.0, 633.0, 621.0, 633.0, 630.0, 630.0, 633.0, 376.0, 630.0, 636.0, 582.0, 633.0, 636.0, 633.0, 630.0, 579.0, 630.0, 639.0, 630.0, 587.0, 627.0, 639.0, 633.0, 573.0, 627.0, 633.0, 576.0, 639.0, 630.0, 564.0, 633.0, 627.0, 636.0, 636.0, 627.0, 633.0, 579.0, 639.0, 636.0, 639.0, 576.0, 639.0, 636.0, 639.0, 587.0, 581.0, 582.0, 639.0, 630.0, 621.0, 582.0, 636.0, 630.0, 633.0, 527.0, 627.0, 582.0, 633.0, 603.0, 630.0, 639.0, 624.0, 630.0, 636.0, 639.0, 465.0, 639.0, 636.0, 570.0, 630.0, 567.0, 633.0, 582.0, 630.0, 633.0, 636.0, 563.0, 636.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 325.0, 309.0, 321.0, 315.0, 318.0, 280.0, 296.0, 311.0, 319.0, 321.0, 312.0, 308.0, 313.0, 317.0, 316.0, 316.0, 317.0, 292.0, 290.0, 60.0, 63.0, 314.0, 322.0, 324.0, 309.0, 291.0, 291.0, 316.0, 311.0, 319.0, 320.0, 304.0, 329.0, 319.0, 320.0, 283.0, 304.0, 318.0, 315.0, 321.0, 309.0, 321.0, 312.0, 321.0, 300.0, 314.0, 319.0, 319.0, 311.0, 311.0, 319.0, 316.0, 317.0, 182.0, 194.0, 319.0, 311.0, 324.0, 312.0, 291.0, 291.0, 316.0, 317.0, 312.0, 324.0, 319.0, 314.0, 311.0, 319.0, 283.0, 296.0, 313.0, 317.0, 322.0, 317.0, 316.0, 314.0, 288.0, 299.0, 305.0, 322.0, 319.0, 320.0, 329.0, 304.0, 278.0, 295.0, 311.0, 316.0, 316.0, 317.0, 290.0, 286.0, 317.0, 322.0, 321.0, 309.0, 280.0, 284.0, 322.0, 311.0, 314.0, 313.0, 326.0, 310.0, 314.0, 322.0, 311.0, 316.0, 316.0, 317.0, 288.0, 291.0, 311.0, 328.0, 317.0, 319.0, 319.0, 320.0, 289.0, 287.0, 317.0, 322.0, 320.0, 316.0, 327.0, 312.0, 277.0, 310.0, 298.0, 283.0, 293.0, 289.0, 317.0, 322.0, 306.0, 324.0, 318.0, 303.0, 290.0, 292.0, 314.0, 322.0, 317.0, 313.0, 314.0, 319.0, 259.0, 268.0, 305.0, 322.0, 281.0, 301.0, 314.0, 319.0, 302.0, 301.0, 309.0, 321.0, 319.0, 320.0, 311.0, 313.0, 310.0, 320.0, 304.0, 332.0, 325.0, 314.0, 232.0, 233.0, 314.0, 325.0, 311.0, 325.0, 291.0, 279.0, 318.0, 312.0, 277.0, 290.0, 315.0, 318.0, 298.0, 284.0, 309.0, 321.0, 311.0, 322.0, 319.0, 317.0, 279.0, 284.0, 319.0, 317.0, 291.0, 291.0, 324.0, 312.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9470934663079638, "mean_processing_ms": 0.2577011854235747, "mean_inference_ms": 1.5308098128397853}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7272000, "num_steps_sampled": 3878400, "sample_time_ms": 22956.932, "load_time_ms": 37.245, "grad_time_ms": 10308.821, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009572577546350658, "policy_loss": -0.00890685711055994, "vf_loss": 85.18466186523438, "vf_explained_var": 0.7909882068634033, "kl": 0.00206771120429039, "entropy": 1.137712836265564, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3878400, "episodes_total": 9696, "training_iteration": 303, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-20-03", "timestamp": 1660256403, "time_this_iter_s": 33.27155518531799, "time_total_s": 14815.552389621735, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14815.552389621735, "timesteps_since_restore": 3878400, "iterations_since_restore": 303, "perf": {"cpu_util_percent": 32.59574468085106, "ram_util_percent": 58.840425531914875}}
-{"episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 606.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 303.28}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 185.76, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.34, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.4, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.78, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.45, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.5, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.42, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.89, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.39, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.32, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.29, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.5, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.42, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.5, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.42, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 510.0, 587.0, 564.0, 579.0, 587.0, 582.0, 587.0, 633.0, 630.0, 630.0, 636.0, 630.0, 569.0, 633.0, 639.0, 582.0, 633.0, 633.0, 636.0, 587.0, 633.0, 636.0, 639.0, 633.0, 530.0, 636.0, 627.0, 582.0, 633.0, 636.0, 636.0, 587.0, 581.0, 582.0, 639.0, 630.0, 621.0, 582.0, 636.0, 630.0, 633.0, 527.0, 627.0, 582.0, 633.0, 603.0, 630.0, 639.0, 624.0, 630.0, 636.0, 639.0, 465.0, 639.0, 636.0, 570.0, 630.0, 567.0, 633.0, 582.0, 630.0, 633.0, 636.0, 563.0, 636.0, 582.0, 636.0, 639.0, 630.0, 633.0, 576.0, 630.0, 633.0, 621.0, 633.0, 633.0, 582.0, 123.0, 636.0, 633.0, 582.0, 627.0, 639.0, 633.0, 639.0, 587.0, 633.0, 630.0, 633.0, 621.0, 633.0, 630.0, 630.0, 633.0, 376.0, 630.0, 636.0, 582.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 256.0, 254.0, 286.0, 301.0, 276.0, 288.0, 290.0, 289.0, 289.0, 298.0, 296.0, 286.0, 295.0, 292.0, 313.0, 320.0, 318.0, 312.0, 311.0, 319.0, 314.0, 322.0, 313.0, 317.0, 283.0, 286.0, 319.0, 314.0, 319.0, 320.0, 294.0, 288.0, 324.0, 309.0, 315.0, 318.0, 325.0, 311.0, 294.0, 293.0, 313.0, 320.0, 320.0, 316.0, 316.0, 323.0, 316.0, 317.0, 265.0, 265.0, 317.0, 319.0, 317.0, 310.0, 288.0, 294.0, 316.0, 317.0, 322.0, 314.0, 314.0, 322.0, 277.0, 310.0, 298.0, 283.0, 293.0, 289.0, 317.0, 322.0, 306.0, 324.0, 318.0, 303.0, 290.0, 292.0, 314.0, 322.0, 317.0, 313.0, 314.0, 319.0, 259.0, 268.0, 305.0, 322.0, 281.0, 301.0, 314.0, 319.0, 302.0, 301.0, 309.0, 321.0, 319.0, 320.0, 311.0, 313.0, 310.0, 320.0, 304.0, 332.0, 325.0, 314.0, 232.0, 233.0, 314.0, 325.0, 311.0, 325.0, 291.0, 279.0, 318.0, 312.0, 277.0, 290.0, 315.0, 318.0, 298.0, 284.0, 309.0, 321.0, 311.0, 322.0, 319.0, 317.0, 279.0, 284.0, 319.0, 317.0, 291.0, 291.0, 324.0, 312.0, 314.0, 325.0, 309.0, 321.0, 315.0, 318.0, 280.0, 296.0, 311.0, 319.0, 321.0, 312.0, 308.0, 313.0, 317.0, 316.0, 316.0, 317.0, 292.0, 290.0, 60.0, 63.0, 314.0, 322.0, 324.0, 309.0, 291.0, 291.0, 316.0, 311.0, 319.0, 320.0, 304.0, 329.0, 319.0, 320.0, 283.0, 304.0, 318.0, 315.0, 321.0, 309.0, 321.0, 312.0, 321.0, 300.0, 314.0, 319.0, 319.0, 311.0, 311.0, 319.0, 316.0, 317.0, 182.0, 194.0, 319.0, 311.0, 324.0, 312.0, 291.0, 291.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9454753487433492, "mean_processing_ms": 0.2573778744956284, "mean_inference_ms": 1.529523880785963}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7296000, "num_steps_sampled": 3891200, "sample_time_ms": 23052.875, "load_time_ms": 37.015, "grad_time_ms": 10279.094, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008152422960847616, "policy_loss": -0.008779828436672688, "vf_loss": 85.31393432617188, "vf_explained_var": 0.7709566950798035, "kl": 0.0019413350382819772, "entropy": 1.1336184740066528, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3891200, "episodes_total": 9728, "training_iteration": 304, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-20-35", "timestamp": 1660256435, "time_this_iter_s": 32.335684061050415, "time_total_s": 14847.888073682785, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14847.888073682785, "timesteps_since_restore": 3891200, "iterations_since_restore": 304, "perf": {"cpu_util_percent": 32.43260869565216, "ram_util_percent": 58.8478260869565}}
-{"episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 608.06, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 304.03}, "custom_metrics": {"sparse_reward_mean": 210.6, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 186.86, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.32, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.35, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.81, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.5, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.5, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.79, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.5, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.5, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.5, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.5, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 633.0, 633.0, 639.0, 636.0, 587.0, 587.0, 636.0, 633.0, 636.0, 587.0, 587.0, 630.0, 587.0, 582.0, 630.0, 636.0, 581.0, 630.0, 630.0, 639.0, 590.0, 618.0, 627.0, 633.0, 575.0, 627.0, 630.0, 636.0, 636.0, 582.0, 582.0, 563.0, 636.0, 582.0, 636.0, 639.0, 630.0, 633.0, 576.0, 630.0, 633.0, 621.0, 633.0, 633.0, 582.0, 123.0, 636.0, 633.0, 582.0, 627.0, 639.0, 633.0, 639.0, 587.0, 633.0, 630.0, 633.0, 621.0, 633.0, 630.0, 630.0, 633.0, 376.0, 630.0, 636.0, 582.0, 633.0, 630.0, 510.0, 587.0, 564.0, 579.0, 587.0, 582.0, 587.0, 633.0, 630.0, 630.0, 636.0, 630.0, 569.0, 633.0, 639.0, 582.0, 633.0, 633.0, 636.0, 587.0, 633.0, 636.0, 639.0, 633.0, 530.0, 636.0, 627.0, 582.0, 633.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [299.0, 288.0, 309.0, 324.0, 315.0, 318.0, 312.0, 327.0, 317.0, 319.0, 290.0, 297.0, 298.0, 289.0, 320.0, 316.0, 313.0, 320.0, 321.0, 315.0, 288.0, 299.0, 293.0, 294.0, 318.0, 312.0, 294.0, 293.0, 286.0, 296.0, 318.0, 312.0, 319.0, 317.0, 290.0, 291.0, 320.0, 310.0, 314.0, 316.0, 324.0, 315.0, 295.0, 295.0, 314.0, 304.0, 320.0, 307.0, 319.0, 314.0, 285.0, 290.0, 316.0, 311.0, 313.0, 317.0, 320.0, 316.0, 327.0, 309.0, 288.0, 294.0, 293.0, 289.0, 279.0, 284.0, 319.0, 317.0, 291.0, 291.0, 324.0, 312.0, 314.0, 325.0, 309.0, 321.0, 315.0, 318.0, 280.0, 296.0, 311.0, 319.0, 321.0, 312.0, 308.0, 313.0, 317.0, 316.0, 316.0, 317.0, 292.0, 290.0, 60.0, 63.0, 314.0, 322.0, 324.0, 309.0, 291.0, 291.0, 316.0, 311.0, 319.0, 320.0, 304.0, 329.0, 319.0, 320.0, 283.0, 304.0, 318.0, 315.0, 321.0, 309.0, 321.0, 312.0, 321.0, 300.0, 314.0, 319.0, 319.0, 311.0, 311.0, 319.0, 316.0, 317.0, 182.0, 194.0, 319.0, 311.0, 324.0, 312.0, 291.0, 291.0, 316.0, 317.0, 316.0, 314.0, 256.0, 254.0, 286.0, 301.0, 276.0, 288.0, 290.0, 289.0, 289.0, 298.0, 296.0, 286.0, 295.0, 292.0, 313.0, 320.0, 318.0, 312.0, 311.0, 319.0, 314.0, 322.0, 313.0, 317.0, 283.0, 286.0, 319.0, 314.0, 319.0, 320.0, 294.0, 288.0, 324.0, 309.0, 315.0, 318.0, 325.0, 311.0, 294.0, 293.0, 313.0, 320.0, 320.0, 316.0, 316.0, 323.0, 316.0, 317.0, 265.0, 265.0, 317.0, 319.0, 317.0, 310.0, 288.0, 294.0, 316.0, 317.0, 322.0, 314.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9438635758880244, "mean_processing_ms": 0.25705717388077914, "mean_inference_ms": 1.528223444830069}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7320000, "num_steps_sampled": 3904000, "sample_time_ms": 22489.17, "load_time_ms": 36.377, "grad_time_ms": 10072.238, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005805303808301687, "policy_loss": -0.002536727814003825, "vf_loss": 89.14191436767578, "vf_explained_var": 0.7592394948005676, "kl": 0.0022071560379117727, "entropy": 1.1443239450454712, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3904000, "episodes_total": 9760, "training_iteration": 305, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-21-09", "timestamp": 1660256469, "time_this_iter_s": 33.905731201171875, "time_total_s": 14881.793804883957, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14881.793804883957, "timesteps_since_restore": 3904000, "iterations_since_restore": 305, "perf": {"cpu_util_percent": 30.185416666666665, "ram_util_percent": 58.73750000000001}}
-{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 612.03, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 306.015}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.43, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.14, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.82, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.57, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.05, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.54, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.96, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.36, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.66, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.96, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.96, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 351.0, 633.0, 582.0, 633.0, 636.0, 639.0, 630.0, 639.0, 639.0, 582.0, 579.0, 627.0, 636.0, 639.0, 633.0, 627.0, 630.0, 579.0, 636.0, 587.0, 587.0, 636.0, 579.0, 633.0, 630.0, 627.0, 639.0, 636.0, 636.0, 630.0, 536.0, 630.0, 636.0, 582.0, 633.0, 630.0, 510.0, 587.0, 564.0, 579.0, 587.0, 582.0, 587.0, 633.0, 630.0, 630.0, 636.0, 630.0, 569.0, 633.0, 639.0, 582.0, 633.0, 633.0, 636.0, 587.0, 633.0, 636.0, 639.0, 633.0, 530.0, 636.0, 627.0, 582.0, 633.0, 636.0, 636.0, 587.0, 633.0, 633.0, 639.0, 636.0, 587.0, 587.0, 636.0, 633.0, 636.0, 587.0, 587.0, 630.0, 587.0, 582.0, 630.0, 636.0, 581.0, 630.0, 630.0, 639.0, 590.0, 618.0, 627.0, 633.0, 575.0, 627.0, 630.0, 636.0, 636.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 172.0, 179.0, 313.0, 320.0, 280.0, 302.0, 318.0, 315.0, 322.0, 314.0, 324.0, 315.0, 316.0, 314.0, 322.0, 317.0, 319.0, 320.0, 294.0, 288.0, 290.0, 289.0, 310.0, 317.0, 319.0, 317.0, 314.0, 325.0, 316.0, 317.0, 316.0, 311.0, 315.0, 315.0, 285.0, 294.0, 319.0, 317.0, 283.0, 304.0, 285.0, 302.0, 332.0, 304.0, 284.0, 295.0, 316.0, 317.0, 311.0, 319.0, 323.0, 304.0, 319.0, 320.0, 317.0, 319.0, 320.0, 316.0, 308.0, 322.0, 263.0, 273.0, 319.0, 311.0, 324.0, 312.0, 291.0, 291.0, 316.0, 317.0, 316.0, 314.0, 256.0, 254.0, 286.0, 301.0, 276.0, 288.0, 290.0, 289.0, 289.0, 298.0, 296.0, 286.0, 295.0, 292.0, 313.0, 320.0, 318.0, 312.0, 311.0, 319.0, 314.0, 322.0, 313.0, 317.0, 283.0, 286.0, 319.0, 314.0, 319.0, 320.0, 294.0, 288.0, 324.0, 309.0, 315.0, 318.0, 325.0, 311.0, 294.0, 293.0, 313.0, 320.0, 320.0, 316.0, 316.0, 323.0, 316.0, 317.0, 265.0, 265.0, 317.0, 319.0, 317.0, 310.0, 288.0, 294.0, 316.0, 317.0, 322.0, 314.0, 314.0, 322.0, 299.0, 288.0, 309.0, 324.0, 315.0, 318.0, 312.0, 327.0, 317.0, 319.0, 290.0, 297.0, 298.0, 289.0, 320.0, 316.0, 313.0, 320.0, 321.0, 315.0, 288.0, 299.0, 293.0, 294.0, 318.0, 312.0, 294.0, 293.0, 286.0, 296.0, 318.0, 312.0, 319.0, 317.0, 290.0, 291.0, 320.0, 310.0, 314.0, 316.0, 324.0, 315.0, 295.0, 295.0, 314.0, 304.0, 320.0, 307.0, 319.0, 314.0, 285.0, 290.0, 316.0, 311.0, 313.0, 317.0, 320.0, 316.0, 327.0, 309.0, 288.0, 294.0, 293.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9422548001581954, "mean_processing_ms": 0.2567378512099078, "mean_inference_ms": 1.5267428709470499}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7344000, "num_steps_sampled": 3916800, "sample_time_ms": 22275.217, "load_time_ms": 36.758, "grad_time_ms": 10257.278, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0026395271997898817, "policy_loss": -0.005618779454380274, "vf_loss": 88.24600219726562, "vf_explained_var": 0.7727122902870178, "kl": 0.0020911165047436953, "entropy": 1.1326097249984741, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3916800, "episodes_total": 9792, "training_iteration": 306, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-21-40", "timestamp": 1660256500, "time_this_iter_s": 30.71598792076111, "time_total_s": 14912.509792804718, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14912.509792804718, "timesteps_since_restore": 3916800, "iterations_since_restore": 306, "perf": {"cpu_util_percent": 32.81395348837209, "ram_util_percent": 58.667441860465125}}
-{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 617.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 308.845}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 190.49, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.18, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.9, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.65, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.12, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.51, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.42, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.41, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.16, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.79, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.41, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.16, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.41, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.16, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 639.0, 633.0, 587.0, 627.0, 639.0, 633.0, 639.0, 633.0, 636.0, 639.0, 587.0, 636.0, 596.0, 639.0, 630.0, 596.0, 630.0, 639.0, 624.0, 633.0, 639.0, 636.0, 579.0, 636.0, 627.0, 636.0, 636.0, 639.0, 636.0, 624.0, 639.0, 582.0, 633.0, 636.0, 636.0, 587.0, 633.0, 633.0, 639.0, 636.0, 587.0, 587.0, 636.0, 633.0, 636.0, 587.0, 587.0, 630.0, 587.0, 582.0, 630.0, 636.0, 581.0, 630.0, 630.0, 639.0, 590.0, 618.0, 627.0, 633.0, 575.0, 627.0, 630.0, 636.0, 636.0, 582.0, 582.0, 636.0, 351.0, 633.0, 582.0, 633.0, 636.0, 639.0, 630.0, 639.0, 639.0, 582.0, 579.0, 627.0, 636.0, 639.0, 633.0, 627.0, 630.0, 579.0, 636.0, 587.0, 587.0, 636.0, 579.0, 633.0, 630.0, 627.0, 639.0, 636.0, 636.0, 630.0, 536.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 319.0, 315.0, 324.0, 318.0, 315.0, 292.0, 295.0, 302.0, 325.0, 327.0, 312.0, 311.0, 322.0, 327.0, 312.0, 311.0, 322.0, 314.0, 322.0, 319.0, 320.0, 296.0, 291.0, 315.0, 321.0, 294.0, 302.0, 319.0, 320.0, 310.0, 320.0, 304.0, 292.0, 316.0, 314.0, 317.0, 322.0, 313.0, 311.0, 316.0, 317.0, 314.0, 325.0, 319.0, 317.0, 297.0, 282.0, 319.0, 317.0, 304.0, 323.0, 321.0, 315.0, 314.0, 322.0, 322.0, 317.0, 314.0, 322.0, 308.0, 316.0, 320.0, 319.0, 288.0, 294.0, 316.0, 317.0, 322.0, 314.0, 314.0, 322.0, 299.0, 288.0, 309.0, 324.0, 315.0, 318.0, 312.0, 327.0, 317.0, 319.0, 290.0, 297.0, 298.0, 289.0, 320.0, 316.0, 313.0, 320.0, 321.0, 315.0, 288.0, 299.0, 293.0, 294.0, 318.0, 312.0, 294.0, 293.0, 286.0, 296.0, 318.0, 312.0, 319.0, 317.0, 290.0, 291.0, 320.0, 310.0, 314.0, 316.0, 324.0, 315.0, 295.0, 295.0, 314.0, 304.0, 320.0, 307.0, 319.0, 314.0, 285.0, 290.0, 316.0, 311.0, 313.0, 317.0, 320.0, 316.0, 327.0, 309.0, 288.0, 294.0, 293.0, 289.0, 314.0, 322.0, 172.0, 179.0, 313.0, 320.0, 280.0, 302.0, 318.0, 315.0, 322.0, 314.0, 324.0, 315.0, 316.0, 314.0, 322.0, 317.0, 319.0, 320.0, 294.0, 288.0, 290.0, 289.0, 310.0, 317.0, 319.0, 317.0, 314.0, 325.0, 316.0, 317.0, 316.0, 311.0, 315.0, 315.0, 285.0, 294.0, 319.0, 317.0, 283.0, 304.0, 285.0, 302.0, 332.0, 304.0, 284.0, 295.0, 316.0, 317.0, 311.0, 319.0, 323.0, 304.0, 319.0, 320.0, 317.0, 319.0, 320.0, 316.0, 308.0, 322.0, 263.0, 273.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9406496030307164, "mean_processing_ms": 0.25641787950901196, "mean_inference_ms": 1.5250430030640023}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7368000, "num_steps_sampled": 3929600, "sample_time_ms": 21842.439, "load_time_ms": 36.991, "grad_time_ms": 10104.518, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00218362919986248, "policy_loss": -0.005569128785282373, "vf_loss": 83.15591430664062, "vf_explained_var": 0.7728936076164246, "kl": 0.0017335275188088417, "entropy": 1.1256619691848755, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3929600, "episodes_total": 9824, "training_iteration": 307, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-22-09", "timestamp": 1660256529, "time_this_iter_s": 29.1308012008667, "time_total_s": 14941.640594005585, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14941.640594005585, "timesteps_since_restore": 3929600, "iterations_since_restore": 307, "perf": {"cpu_util_percent": 35.333333333333336, "ram_util_percent": 58.726190476190474}}
-{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 620.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 310.09}, "custom_metrics": {"sparse_reward_mean": 214.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 190.58, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.9, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 19.12, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.45, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.43, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.42, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.78, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.37, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.68, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.43, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.43, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 636.0, 639.0, 630.0, 627.0, 636.0, 633.0, 630.0, 630.0, 630.0, 630.0, 561.0, 627.0, 627.0, 639.0, 633.0, 579.0, 633.0, 633.0, 582.0, 630.0, 636.0, 636.0, 627.0, 639.0, 639.0, 630.0, 633.0, 576.0, 579.0, 636.0, 633.0, 636.0, 636.0, 582.0, 582.0, 636.0, 351.0, 633.0, 582.0, 633.0, 636.0, 639.0, 630.0, 639.0, 639.0, 582.0, 579.0, 627.0, 636.0, 639.0, 633.0, 627.0, 630.0, 579.0, 636.0, 587.0, 587.0, 636.0, 579.0, 633.0, 630.0, 627.0, 639.0, 636.0, 636.0, 630.0, 536.0, 636.0, 639.0, 633.0, 587.0, 627.0, 639.0, 633.0, 639.0, 633.0, 636.0, 639.0, 587.0, 636.0, 596.0, 639.0, 630.0, 596.0, 630.0, 639.0, 624.0, 633.0, 639.0, 636.0, 579.0, 636.0, 627.0, 636.0, 636.0, 639.0, 636.0, 624.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [310.0, 323.0, 316.0, 320.0, 317.0, 322.0, 310.0, 320.0, 311.0, 316.0, 319.0, 317.0, 306.0, 327.0, 321.0, 309.0, 313.0, 317.0, 311.0, 319.0, 308.0, 322.0, 278.0, 283.0, 318.0, 309.0, 315.0, 312.0, 327.0, 312.0, 316.0, 317.0, 282.0, 297.0, 314.0, 319.0, 316.0, 317.0, 290.0, 292.0, 314.0, 316.0, 313.0, 323.0, 316.0, 320.0, 313.0, 314.0, 319.0, 320.0, 323.0, 316.0, 316.0, 314.0, 314.0, 319.0, 293.0, 283.0, 296.0, 283.0, 316.0, 320.0, 313.0, 320.0, 320.0, 316.0, 327.0, 309.0, 288.0, 294.0, 293.0, 289.0, 314.0, 322.0, 172.0, 179.0, 313.0, 320.0, 280.0, 302.0, 318.0, 315.0, 322.0, 314.0, 324.0, 315.0, 316.0, 314.0, 322.0, 317.0, 319.0, 320.0, 294.0, 288.0, 290.0, 289.0, 310.0, 317.0, 319.0, 317.0, 314.0, 325.0, 316.0, 317.0, 316.0, 311.0, 315.0, 315.0, 285.0, 294.0, 319.0, 317.0, 283.0, 304.0, 285.0, 302.0, 332.0, 304.0, 284.0, 295.0, 316.0, 317.0, 311.0, 319.0, 323.0, 304.0, 319.0, 320.0, 317.0, 319.0, 320.0, 316.0, 308.0, 322.0, 263.0, 273.0, 317.0, 319.0, 315.0, 324.0, 318.0, 315.0, 292.0, 295.0, 302.0, 325.0, 327.0, 312.0, 311.0, 322.0, 327.0, 312.0, 311.0, 322.0, 314.0, 322.0, 319.0, 320.0, 296.0, 291.0, 315.0, 321.0, 294.0, 302.0, 319.0, 320.0, 310.0, 320.0, 304.0, 292.0, 316.0, 314.0, 317.0, 322.0, 313.0, 311.0, 316.0, 317.0, 314.0, 325.0, 319.0, 317.0, 297.0, 282.0, 319.0, 317.0, 304.0, 323.0, 321.0, 315.0, 314.0, 322.0, 322.0, 317.0, 314.0, 322.0, 308.0, 316.0, 320.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9390469484848887, "mean_processing_ms": 0.2560959206110981, "mean_inference_ms": 1.5230623769962466}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7392000, "num_steps_sampled": 3942400, "sample_time_ms": 21606.28, "load_time_ms": 36.86, "grad_time_ms": 10018.576, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012893896782770753, "policy_loss": -0.006317433435469866, "vf_loss": 81.66983795166016, "vf_explained_var": 0.7689216732978821, "kl": 0.0018363837152719498, "entropy": 1.1203217506408691, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3942400, "episodes_total": 9856, "training_iteration": 308, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-22-36", "timestamp": 1660256556, "time_this_iter_s": 27.574139833450317, "time_total_s": 14969.214733839035, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14969.214733839035, "timesteps_since_restore": 3942400, "iterations_since_restore": 308, "perf": {"cpu_util_percent": 34.44871794871795, "ram_util_percent": 58.748717948717946}}
-{"episode_reward_max": 639.0, "episode_reward_min": 533.0, "episode_reward_mean": 625.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 312.605}, "custom_metrics": {"sparse_reward_mean": 216.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 191.61, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.26, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.85, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.83, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.25, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.64, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.2, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.96, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.23, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.31, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.28, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.64, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.2, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.64, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.2, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 639.0, 630.0, 587.0, 639.0, 624.0, 636.0, 639.0, 639.0, 630.0, 630.0, 633.0, 606.0, 627.0, 627.0, 636.0, 630.0, 633.0, 627.0, 587.0, 639.0, 627.0, 639.0, 636.0, 639.0, 636.0, 636.0, 627.0, 636.0, 627.0, 533.0, 636.0, 636.0, 630.0, 536.0, 636.0, 639.0, 633.0, 587.0, 627.0, 639.0, 633.0, 639.0, 633.0, 636.0, 639.0, 587.0, 636.0, 596.0, 639.0, 630.0, 596.0, 630.0, 639.0, 624.0, 633.0, 639.0, 636.0, 579.0, 636.0, 627.0, 636.0, 636.0, 639.0, 636.0, 624.0, 639.0, 633.0, 636.0, 639.0, 630.0, 627.0, 636.0, 633.0, 630.0, 630.0, 630.0, 630.0, 561.0, 627.0, 627.0, 639.0, 633.0, 579.0, 633.0, 633.0, 582.0, 630.0, 636.0, 636.0, 627.0, 639.0, 639.0, 630.0, 633.0, 576.0, 579.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 322.0, 311.0, 322.0, 317.0, 311.0, 319.0, 288.0, 299.0, 319.0, 320.0, 311.0, 313.0, 319.0, 317.0, 330.0, 309.0, 317.0, 322.0, 314.0, 316.0, 311.0, 319.0, 319.0, 314.0, 294.0, 312.0, 310.0, 317.0, 309.0, 318.0, 320.0, 316.0, 314.0, 316.0, 309.0, 324.0, 313.0, 314.0, 291.0, 296.0, 329.0, 310.0, 321.0, 306.0, 323.0, 316.0, 319.0, 317.0, 319.0, 320.0, 319.0, 317.0, 319.0, 317.0, 314.0, 313.0, 312.0, 324.0, 313.0, 314.0, 260.0, 273.0, 317.0, 319.0, 320.0, 316.0, 308.0, 322.0, 263.0, 273.0, 317.0, 319.0, 315.0, 324.0, 318.0, 315.0, 292.0, 295.0, 302.0, 325.0, 327.0, 312.0, 311.0, 322.0, 327.0, 312.0, 311.0, 322.0, 314.0, 322.0, 319.0, 320.0, 296.0, 291.0, 315.0, 321.0, 294.0, 302.0, 319.0, 320.0, 310.0, 320.0, 304.0, 292.0, 316.0, 314.0, 317.0, 322.0, 313.0, 311.0, 316.0, 317.0, 314.0, 325.0, 319.0, 317.0, 297.0, 282.0, 319.0, 317.0, 304.0, 323.0, 321.0, 315.0, 314.0, 322.0, 322.0, 317.0, 314.0, 322.0, 308.0, 316.0, 320.0, 319.0, 310.0, 323.0, 316.0, 320.0, 317.0, 322.0, 310.0, 320.0, 311.0, 316.0, 319.0, 317.0, 306.0, 327.0, 321.0, 309.0, 313.0, 317.0, 311.0, 319.0, 308.0, 322.0, 278.0, 283.0, 318.0, 309.0, 315.0, 312.0, 327.0, 312.0, 316.0, 317.0, 282.0, 297.0, 314.0, 319.0, 316.0, 317.0, 290.0, 292.0, 314.0, 316.0, 313.0, 323.0, 316.0, 320.0, 313.0, 314.0, 319.0, 320.0, 323.0, 316.0, 316.0, 314.0, 314.0, 319.0, 293.0, 283.0, 296.0, 283.0, 316.0, 320.0, 313.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9374628711409474, "mean_processing_ms": 0.2557781242683051, "mean_inference_ms": 1.5211277740560474}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7416000, "num_steps_sampled": 3955200, "sample_time_ms": 21548.086, "load_time_ms": 36.789, "grad_time_ms": 9960.253, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0017820480279624462, "policy_loss": -0.005541125778108835, "vf_loss": 78.87618255615234, "vf_explained_var": 0.777707040309906, "kl": 0.0019577995408326387, "entropy": 1.1288975477218628, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3955200, "episodes_total": 9888, "training_iteration": 309, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-23-06", "timestamp": 1660256586, "time_this_iter_s": 30.049942016601562, "time_total_s": 14999.264675855637, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14999.264675855637, "timesteps_since_restore": 3955200, "iterations_since_restore": 309, "perf": {"cpu_util_percent": 34.733333333333334, "ram_util_percent": 58.76428571428571}}
-{"episode_reward_max": 639.0, "episode_reward_min": 533.0, "episode_reward_mean": 625.2, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 312.6}, "custom_metrics": {"sparse_reward_mean": 217.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 191.2, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.32, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.98, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.83, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.36, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.65, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.07, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.15, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.24, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.65, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.07, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.65, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.07, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 639.0, 587.0, 636.0, 593.0, 633.0, 627.0, 639.0, 627.0, 639.0, 633.0, 636.0, 630.0, 630.0, 630.0, 636.0, 633.0, 636.0, 582.0, 627.0, 639.0, 639.0, 582.0, 636.0, 627.0, 587.0, 636.0, 633.0, 633.0, 624.0, 636.0, 639.0, 636.0, 624.0, 639.0, 633.0, 636.0, 639.0, 630.0, 627.0, 636.0, 633.0, 630.0, 630.0, 630.0, 630.0, 561.0, 627.0, 627.0, 639.0, 633.0, 579.0, 633.0, 633.0, 582.0, 630.0, 636.0, 636.0, 627.0, 639.0, 639.0, 630.0, 633.0, 576.0, 579.0, 636.0, 633.0, 636.0, 633.0, 639.0, 630.0, 587.0, 639.0, 624.0, 636.0, 639.0, 639.0, 630.0, 630.0, 633.0, 606.0, 627.0, 627.0, 636.0, 630.0, 633.0, 627.0, 587.0, 639.0, 627.0, 639.0, 636.0, 639.0, 636.0, 636.0, 627.0, 636.0, 627.0, 533.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 296.0, 286.0, 317.0, 322.0, 296.0, 291.0, 321.0, 315.0, 302.0, 291.0, 308.0, 325.0, 318.0, 309.0, 322.0, 317.0, 315.0, 312.0, 322.0, 317.0, 309.0, 324.0, 321.0, 315.0, 322.0, 308.0, 313.0, 317.0, 317.0, 313.0, 322.0, 314.0, 320.0, 313.0, 319.0, 317.0, 293.0, 289.0, 316.0, 311.0, 322.0, 317.0, 317.0, 322.0, 291.0, 291.0, 318.0, 318.0, 305.0, 322.0, 296.0, 291.0, 311.0, 325.0, 309.0, 324.0, 319.0, 314.0, 326.0, 298.0, 317.0, 319.0, 322.0, 317.0, 314.0, 322.0, 308.0, 316.0, 320.0, 319.0, 310.0, 323.0, 316.0, 320.0, 317.0, 322.0, 310.0, 320.0, 311.0, 316.0, 319.0, 317.0, 306.0, 327.0, 321.0, 309.0, 313.0, 317.0, 311.0, 319.0, 308.0, 322.0, 278.0, 283.0, 318.0, 309.0, 315.0, 312.0, 327.0, 312.0, 316.0, 317.0, 282.0, 297.0, 314.0, 319.0, 316.0, 317.0, 290.0, 292.0, 314.0, 316.0, 313.0, 323.0, 316.0, 320.0, 313.0, 314.0, 319.0, 320.0, 323.0, 316.0, 316.0, 314.0, 314.0, 319.0, 293.0, 283.0, 296.0, 283.0, 316.0, 320.0, 313.0, 320.0, 314.0, 322.0, 322.0, 311.0, 322.0, 317.0, 311.0, 319.0, 288.0, 299.0, 319.0, 320.0, 311.0, 313.0, 319.0, 317.0, 330.0, 309.0, 317.0, 322.0, 314.0, 316.0, 311.0, 319.0, 319.0, 314.0, 294.0, 312.0, 310.0, 317.0, 309.0, 318.0, 320.0, 316.0, 314.0, 316.0, 309.0, 324.0, 313.0, 314.0, 291.0, 296.0, 329.0, 310.0, 321.0, 306.0, 323.0, 316.0, 319.0, 317.0, 319.0, 320.0, 319.0, 317.0, 319.0, 317.0, 314.0, 313.0, 312.0, 324.0, 313.0, 314.0, 260.0, 273.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9359034441976002, "mean_processing_ms": 0.2554667399859708, "mean_inference_ms": 1.5193431419939385}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7440000, "num_steps_sampled": 3968000, "sample_time_ms": 21381.604, "load_time_ms": 36.734, "grad_time_ms": 9849.343, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0021060302387923002, "policy_loss": -0.005640763323754072, "vf_loss": 83.09170532226562, "vf_explained_var": 0.7722363471984863, "kl": 0.0021093024406582117, "entropy": 1.12474524974823, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3968000, "episodes_total": 9920, "training_iteration": 310, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-23-36", "timestamp": 1660256616, "time_this_iter_s": 29.72802186012268, "time_total_s": 15028.99269771576, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15028.99269771576, "timesteps_since_restore": 3968000, "iterations_since_restore": 310, "perf": {"cpu_util_percent": 37.352380952380955, "ram_util_percent": 59.30714285714285}}
-{"episode_reward_max": 639.0, "episode_reward_min": 530.0, "episode_reward_mean": 623.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 311.605}, "custom_metrics": {"sparse_reward_mean": 216.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 191.21, "shaped_reward_min": 166, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.42, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.99, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.88, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.26, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.78, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 16.78, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.25, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.26, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.29, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.78, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.78, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 582.0, 630.0, 573.0, 636.0, 630.0, 639.0, 639.0, 636.0, 639.0, 630.0, 587.0, 630.0, 627.0, 633.0, 630.0, 636.0, 636.0, 587.0, 639.0, 633.0, 639.0, 587.0, 630.0, 633.0, 530.0, 630.0, 639.0, 633.0, 582.0, 633.0, 576.0, 579.0, 636.0, 633.0, 636.0, 633.0, 639.0, 630.0, 587.0, 639.0, 624.0, 636.0, 639.0, 639.0, 630.0, 630.0, 633.0, 606.0, 627.0, 627.0, 636.0, 630.0, 633.0, 627.0, 587.0, 639.0, 627.0, 639.0, 636.0, 639.0, 636.0, 636.0, 627.0, 636.0, 627.0, 533.0, 630.0, 582.0, 639.0, 587.0, 636.0, 593.0, 633.0, 627.0, 639.0, 627.0, 639.0, 633.0, 636.0, 630.0, 630.0, 630.0, 636.0, 633.0, 636.0, 582.0, 627.0, 639.0, 639.0, 582.0, 636.0, 627.0, 587.0, 636.0, 633.0, 633.0, 624.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 325.0, 316.0, 317.0, 288.0, 294.0, 319.0, 311.0, 285.0, 288.0, 322.0, 314.0, 311.0, 319.0, 322.0, 317.0, 320.0, 319.0, 309.0, 327.0, 319.0, 320.0, 313.0, 317.0, 296.0, 291.0, 311.0, 319.0, 308.0, 319.0, 314.0, 319.0, 319.0, 311.0, 315.0, 321.0, 312.0, 324.0, 296.0, 291.0, 322.0, 317.0, 319.0, 314.0, 319.0, 320.0, 296.0, 291.0, 303.0, 327.0, 309.0, 324.0, 258.0, 272.0, 321.0, 309.0, 319.0, 320.0, 324.0, 309.0, 287.0, 295.0, 319.0, 314.0, 293.0, 283.0, 296.0, 283.0, 316.0, 320.0, 313.0, 320.0, 314.0, 322.0, 322.0, 311.0, 322.0, 317.0, 311.0, 319.0, 288.0, 299.0, 319.0, 320.0, 311.0, 313.0, 319.0, 317.0, 330.0, 309.0, 317.0, 322.0, 314.0, 316.0, 311.0, 319.0, 319.0, 314.0, 294.0, 312.0, 310.0, 317.0, 309.0, 318.0, 320.0, 316.0, 314.0, 316.0, 309.0, 324.0, 313.0, 314.0, 291.0, 296.0, 329.0, 310.0, 321.0, 306.0, 323.0, 316.0, 319.0, 317.0, 319.0, 320.0, 319.0, 317.0, 319.0, 317.0, 314.0, 313.0, 312.0, 324.0, 313.0, 314.0, 260.0, 273.0, 316.0, 314.0, 296.0, 286.0, 317.0, 322.0, 296.0, 291.0, 321.0, 315.0, 302.0, 291.0, 308.0, 325.0, 318.0, 309.0, 322.0, 317.0, 315.0, 312.0, 322.0, 317.0, 309.0, 324.0, 321.0, 315.0, 322.0, 308.0, 313.0, 317.0, 317.0, 313.0, 322.0, 314.0, 320.0, 313.0, 319.0, 317.0, 293.0, 289.0, 316.0, 311.0, 322.0, 317.0, 317.0, 322.0, 291.0, 291.0, 318.0, 318.0, 305.0, 322.0, 296.0, 291.0, 311.0, 325.0, 309.0, 324.0, 319.0, 314.0, 326.0, 298.0, 317.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9343560450317893, "mean_processing_ms": 0.25515871155662695, "mean_inference_ms": 1.5176487497740194}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7464000, "num_steps_sampled": 3980800, "sample_time_ms": 21156.178, "load_time_ms": 36.97, "grad_time_ms": 9869.74, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005363213713280857, "policy_loss": -0.0070701222866773605, "vf_loss": 81.7235336303711, "vf_explained_var": 0.7686123847961426, "kl": 0.0019356707343831658, "entropy": 1.131825566291809, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3980800, "episodes_total": 9952, "training_iteration": 311, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-24-07", "timestamp": 1660256647, "time_this_iter_s": 30.375731229782104, "time_total_s": 15059.368428945541, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15059.368428945541, "timesteps_since_restore": 3980800, "iterations_since_restore": 311, "perf": {"cpu_util_percent": 34.15581395348838, "ram_util_percent": 58.86046511627907}}
-{"episode_reward_max": 639.0, "episode_reward_min": 530.0, "episode_reward_mean": 621.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 310.815}, "custom_metrics": {"sparse_reward_mean": 215.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 190.43, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.35, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.84, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.8, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.14, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 16.65, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.82, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.15, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.3, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.34, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.31, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.65, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.82, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.65, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.82, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 624.0, 630.0, 633.0, 636.0, 636.0, 618.0, 636.0, 627.0, 567.0, 636.0, 636.0, 582.0, 633.0, 561.0, 582.0, 582.0, 630.0, 627.0, 587.0, 630.0, 639.0, 630.0, 633.0, 633.0, 633.0, 633.0, 639.0, 630.0, 630.0, 630.0, 627.0, 627.0, 636.0, 627.0, 533.0, 630.0, 582.0, 639.0, 587.0, 636.0, 593.0, 633.0, 627.0, 639.0, 627.0, 639.0, 633.0, 636.0, 630.0, 630.0, 630.0, 636.0, 633.0, 636.0, 582.0, 627.0, 639.0, 639.0, 582.0, 636.0, 627.0, 587.0, 636.0, 633.0, 633.0, 624.0, 636.0, 636.0, 633.0, 582.0, 630.0, 573.0, 636.0, 630.0, 639.0, 639.0, 636.0, 639.0, 630.0, 587.0, 630.0, 627.0, 633.0, 630.0, 636.0, 636.0, 587.0, 639.0, 633.0, 639.0, 587.0, 630.0, 633.0, 530.0, 630.0, 639.0, 633.0, 582.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 313.0, 311.0, 316.0, 314.0, 319.0, 314.0, 312.0, 324.0, 316.0, 320.0, 305.0, 313.0, 309.0, 327.0, 309.0, 318.0, 273.0, 294.0, 319.0, 317.0, 317.0, 319.0, 294.0, 288.0, 311.0, 322.0, 280.0, 281.0, 290.0, 292.0, 291.0, 291.0, 311.0, 319.0, 313.0, 314.0, 296.0, 291.0, 308.0, 322.0, 322.0, 317.0, 320.0, 310.0, 318.0, 315.0, 319.0, 314.0, 317.0, 316.0, 316.0, 317.0, 317.0, 322.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 310.0, 317.0, 314.0, 313.0, 312.0, 324.0, 313.0, 314.0, 260.0, 273.0, 316.0, 314.0, 296.0, 286.0, 317.0, 322.0, 296.0, 291.0, 321.0, 315.0, 302.0, 291.0, 308.0, 325.0, 318.0, 309.0, 322.0, 317.0, 315.0, 312.0, 322.0, 317.0, 309.0, 324.0, 321.0, 315.0, 322.0, 308.0, 313.0, 317.0, 317.0, 313.0, 322.0, 314.0, 320.0, 313.0, 319.0, 317.0, 293.0, 289.0, 316.0, 311.0, 322.0, 317.0, 317.0, 322.0, 291.0, 291.0, 318.0, 318.0, 305.0, 322.0, 296.0, 291.0, 311.0, 325.0, 309.0, 324.0, 319.0, 314.0, 326.0, 298.0, 317.0, 319.0, 311.0, 325.0, 316.0, 317.0, 288.0, 294.0, 319.0, 311.0, 285.0, 288.0, 322.0, 314.0, 311.0, 319.0, 322.0, 317.0, 320.0, 319.0, 309.0, 327.0, 319.0, 320.0, 313.0, 317.0, 296.0, 291.0, 311.0, 319.0, 308.0, 319.0, 314.0, 319.0, 319.0, 311.0, 315.0, 321.0, 312.0, 324.0, 296.0, 291.0, 322.0, 317.0, 319.0, 314.0, 319.0, 320.0, 296.0, 291.0, 303.0, 327.0, 309.0, 324.0, 258.0, 272.0, 321.0, 309.0, 319.0, 320.0, 324.0, 309.0, 287.0, 295.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9328126111792946, "mean_processing_ms": 0.25484999330507013, "mean_inference_ms": 1.5158702163945572}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7488000, "num_steps_sampled": 3993600, "sample_time_ms": 20762.299, "load_time_ms": 37.78, "grad_time_ms": 9801.464, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0010485065868124366, "policy_loss": -0.00627841567620635, "vf_loss": 78.91202545166016, "vf_explained_var": 0.7650337219238281, "kl": 0.0021341259125620127, "entropy": 1.1285619735717773, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3993600, "episodes_total": 9984, "training_iteration": 312, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-24-36", "timestamp": 1660256676, "time_this_iter_s": 29.196868896484375, "time_total_s": 15088.565297842026, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15088.565297842026, "timesteps_since_restore": 3993600, "iterations_since_restore": 312, "perf": {"cpu_util_percent": 34.93170731707317, "ram_util_percent": 58.778048780487794}}
-{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 620.09, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 310.045}, "custom_metrics": {"sparse_reward_mean": 215.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.69, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.28, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.75, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.86, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.98, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 16.63, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.77, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.27, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.36, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.34, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.63, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.77, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.63, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.77, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 639.0, 630.0, 570.0, 633.0, 639.0, 636.0, 582.0, 576.0, 630.0, 639.0, 636.0, 639.0, 636.0, 630.0, 590.0, 639.0, 582.0, 621.0, 573.0, 627.0, 516.0, 639.0, 572.0, 639.0, 579.0, 636.0, 630.0, 639.0, 627.0, 630.0, 633.0, 633.0, 624.0, 636.0, 636.0, 633.0, 582.0, 630.0, 573.0, 636.0, 630.0, 639.0, 639.0, 636.0, 639.0, 630.0, 587.0, 630.0, 627.0, 633.0, 630.0, 636.0, 636.0, 587.0, 639.0, 633.0, 639.0, 587.0, 630.0, 633.0, 530.0, 630.0, 639.0, 633.0, 582.0, 633.0, 636.0, 624.0, 630.0, 633.0, 636.0, 636.0, 618.0, 636.0, 627.0, 567.0, 636.0, 636.0, 582.0, 633.0, 561.0, 582.0, 582.0, 630.0, 627.0, 587.0, 630.0, 639.0, 630.0, 633.0, 633.0, 633.0, 633.0, 639.0, 630.0, 630.0, 630.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 319.0, 317.0, 320.0, 319.0, 323.0, 307.0, 282.0, 288.0, 319.0, 314.0, 317.0, 322.0, 319.0, 317.0, 294.0, 288.0, 298.0, 278.0, 310.0, 320.0, 324.0, 315.0, 317.0, 319.0, 322.0, 317.0, 316.0, 320.0, 313.0, 317.0, 291.0, 299.0, 317.0, 322.0, 271.0, 311.0, 316.0, 305.0, 278.0, 295.0, 315.0, 312.0, 249.0, 267.0, 317.0, 322.0, 277.0, 295.0, 319.0, 320.0, 297.0, 282.0, 317.0, 319.0, 313.0, 317.0, 316.0, 323.0, 316.0, 311.0, 321.0, 309.0, 309.0, 324.0, 319.0, 314.0, 326.0, 298.0, 317.0, 319.0, 311.0, 325.0, 316.0, 317.0, 288.0, 294.0, 319.0, 311.0, 285.0, 288.0, 322.0, 314.0, 311.0, 319.0, 322.0, 317.0, 320.0, 319.0, 309.0, 327.0, 319.0, 320.0, 313.0, 317.0, 296.0, 291.0, 311.0, 319.0, 308.0, 319.0, 314.0, 319.0, 319.0, 311.0, 315.0, 321.0, 312.0, 324.0, 296.0, 291.0, 322.0, 317.0, 319.0, 314.0, 319.0, 320.0, 296.0, 291.0, 303.0, 327.0, 309.0, 324.0, 258.0, 272.0, 321.0, 309.0, 319.0, 320.0, 324.0, 309.0, 287.0, 295.0, 319.0, 314.0, 319.0, 317.0, 313.0, 311.0, 316.0, 314.0, 319.0, 314.0, 312.0, 324.0, 316.0, 320.0, 305.0, 313.0, 309.0, 327.0, 309.0, 318.0, 273.0, 294.0, 319.0, 317.0, 317.0, 319.0, 294.0, 288.0, 311.0, 322.0, 280.0, 281.0, 290.0, 292.0, 291.0, 291.0, 311.0, 319.0, 313.0, 314.0, 296.0, 291.0, 308.0, 322.0, 322.0, 317.0, 320.0, 310.0, 318.0, 315.0, 319.0, 314.0, 317.0, 316.0, 316.0, 317.0, 317.0, 322.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 310.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9312771580791189, "mean_processing_ms": 0.25454145444446286, "mean_inference_ms": 1.514166938443501}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7512000, "num_steps_sampled": 4006400, "sample_time_ms": 20653.913, "load_time_ms": 37.661, "grad_time_ms": 9796.127, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016649666940793395, "policy_loss": -0.005544379819184542, "vf_loss": 77.78628540039062, "vf_explained_var": 0.7735397815704346, "kl": 0.0018068948993459344, "entropy": 1.1385550498962402, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4006400, "episodes_total": 10016, "training_iteration": 313, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-25-08", "timestamp": 1660256708, "time_this_iter_s": 32.13484477996826, "time_total_s": 15120.700142621994, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15120.700142621994, "timesteps_since_restore": 4006400, "iterations_since_restore": 313, "perf": {"cpu_util_percent": 34.30434782608695, "ram_util_percent": 58.8478260869565}}
-{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 617.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.91}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.42, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.24, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.59, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.8, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.92, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.44, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.53, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.22, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.28, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.53, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.53, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 587.0, 639.0, 587.0, 630.0, 582.0, 636.0, 636.0, 627.0, 639.0, 639.0, 584.0, 587.0, 639.0, 582.0, 630.0, 579.0, 633.0, 636.0, 633.0, 636.0, 636.0, 582.0, 582.0, 633.0, 587.0, 630.0, 633.0, 630.0, 630.0, 593.0, 582.0, 639.0, 633.0, 582.0, 633.0, 636.0, 624.0, 630.0, 633.0, 636.0, 636.0, 618.0, 636.0, 627.0, 567.0, 636.0, 636.0, 582.0, 633.0, 561.0, 582.0, 582.0, 630.0, 627.0, 587.0, 630.0, 639.0, 630.0, 633.0, 633.0, 633.0, 633.0, 639.0, 630.0, 630.0, 630.0, 627.0, 630.0, 636.0, 639.0, 630.0, 570.0, 633.0, 639.0, 636.0, 582.0, 576.0, 630.0, 639.0, 636.0, 639.0, 636.0, 630.0, 590.0, 639.0, 582.0, 621.0, 573.0, 627.0, 516.0, 639.0, 572.0, 639.0, 579.0, 636.0, 630.0, 639.0, 627.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 292.0, 295.0, 314.0, 325.0, 301.0, 286.0, 311.0, 319.0, 295.0, 287.0, 319.0, 317.0, 317.0, 319.0, 305.0, 322.0, 315.0, 324.0, 317.0, 322.0, 298.0, 286.0, 298.0, 289.0, 319.0, 320.0, 286.0, 296.0, 309.0, 321.0, 296.0, 283.0, 318.0, 315.0, 314.0, 322.0, 314.0, 319.0, 316.0, 320.0, 314.0, 322.0, 291.0, 291.0, 285.0, 297.0, 319.0, 314.0, 298.0, 289.0, 319.0, 311.0, 316.0, 317.0, 323.0, 307.0, 315.0, 315.0, 297.0, 296.0, 291.0, 291.0, 319.0, 320.0, 324.0, 309.0, 287.0, 295.0, 319.0, 314.0, 319.0, 317.0, 313.0, 311.0, 316.0, 314.0, 319.0, 314.0, 312.0, 324.0, 316.0, 320.0, 305.0, 313.0, 309.0, 327.0, 309.0, 318.0, 273.0, 294.0, 319.0, 317.0, 317.0, 319.0, 294.0, 288.0, 311.0, 322.0, 280.0, 281.0, 290.0, 292.0, 291.0, 291.0, 311.0, 319.0, 313.0, 314.0, 296.0, 291.0, 308.0, 322.0, 322.0, 317.0, 320.0, 310.0, 318.0, 315.0, 319.0, 314.0, 317.0, 316.0, 316.0, 317.0, 317.0, 322.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 310.0, 317.0, 316.0, 314.0, 319.0, 317.0, 320.0, 319.0, 323.0, 307.0, 282.0, 288.0, 319.0, 314.0, 317.0, 322.0, 319.0, 317.0, 294.0, 288.0, 298.0, 278.0, 310.0, 320.0, 324.0, 315.0, 317.0, 319.0, 322.0, 317.0, 316.0, 320.0, 313.0, 317.0, 291.0, 299.0, 317.0, 322.0, 271.0, 311.0, 316.0, 305.0, 278.0, 295.0, 315.0, 312.0, 249.0, 267.0, 317.0, 322.0, 277.0, 295.0, 319.0, 320.0, 297.0, 282.0, 317.0, 319.0, 313.0, 317.0, 316.0, 323.0, 316.0, 311.0, 321.0, 309.0]}, "sampler_perf": {"mean_env_wait_ms": 0.929753206610253, "mean_processing_ms": 0.25423624145174695, "mean_inference_ms": 1.5125268663026497}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7536000, "num_steps_sampled": 4019200, "sample_time_ms": 20614.803, "load_time_ms": 37.576, "grad_time_ms": 9935.143, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005392418708652258, "policy_loss": -0.002403073711320758, "vf_loss": 83.61144256591797, "vf_explained_var": 0.7692582011222839, "kl": 0.0021780512761324644, "entropy": 1.131287932395935, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4019200, "episodes_total": 10048, "training_iteration": 314, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-25-41", "timestamp": 1660256741, "time_this_iter_s": 33.338226318359375, "time_total_s": 15154.038368940353, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15154.038368940353, "timesteps_since_restore": 4019200, "iterations_since_restore": 314, "perf": {"cpu_util_percent": 34.01914893617022, "ram_util_percent": 58.87021276595746}}
-{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 614.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 307.41}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.82, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.1, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.53, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.65, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.87, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.42, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.44, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.46, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.7, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.03, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.76, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.24, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.24, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.46, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.7, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.46, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.7, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 590.0, 633.0, 633.0, 630.0, 630.0, 627.0, 582.0, 573.0, 630.0, 630.0, 582.0, 636.0, 639.0, 630.0, 587.0, 639.0, 630.0, 627.0, 630.0, 630.0, 639.0, 459.0, 582.0, 627.0, 582.0, 627.0, 636.0, 627.0, 633.0, 530.0, 639.0, 630.0, 630.0, 630.0, 627.0, 630.0, 636.0, 639.0, 630.0, 570.0, 633.0, 639.0, 636.0, 582.0, 576.0, 630.0, 639.0, 636.0, 639.0, 636.0, 630.0, 590.0, 639.0, 582.0, 621.0, 573.0, 627.0, 516.0, 639.0, 572.0, 639.0, 579.0, 636.0, 630.0, 639.0, 627.0, 630.0, 630.0, 587.0, 639.0, 587.0, 630.0, 582.0, 636.0, 636.0, 627.0, 639.0, 639.0, 584.0, 587.0, 639.0, 582.0, 630.0, 579.0, 633.0, 636.0, 633.0, 636.0, 636.0, 582.0, 582.0, 633.0, 587.0, 630.0, 633.0, 630.0, 630.0, 593.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 297.0, 294.0, 296.0, 327.0, 306.0, 314.0, 319.0, 308.0, 322.0, 319.0, 311.0, 311.0, 316.0, 294.0, 288.0, 282.0, 291.0, 318.0, 312.0, 309.0, 321.0, 291.0, 291.0, 318.0, 318.0, 317.0, 322.0, 316.0, 314.0, 296.0, 291.0, 318.0, 321.0, 313.0, 317.0, 318.0, 309.0, 311.0, 319.0, 306.0, 324.0, 317.0, 322.0, 231.0, 228.0, 288.0, 294.0, 315.0, 312.0, 294.0, 288.0, 308.0, 319.0, 316.0, 320.0, 324.0, 303.0, 311.0, 322.0, 265.0, 265.0, 314.0, 325.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 310.0, 317.0, 316.0, 314.0, 319.0, 317.0, 320.0, 319.0, 323.0, 307.0, 282.0, 288.0, 319.0, 314.0, 317.0, 322.0, 319.0, 317.0, 294.0, 288.0, 298.0, 278.0, 310.0, 320.0, 324.0, 315.0, 317.0, 319.0, 322.0, 317.0, 316.0, 320.0, 313.0, 317.0, 291.0, 299.0, 317.0, 322.0, 271.0, 311.0, 316.0, 305.0, 278.0, 295.0, 315.0, 312.0, 249.0, 267.0, 317.0, 322.0, 277.0, 295.0, 319.0, 320.0, 297.0, 282.0, 317.0, 319.0, 313.0, 317.0, 316.0, 323.0, 316.0, 311.0, 321.0, 309.0, 316.0, 314.0, 292.0, 295.0, 314.0, 325.0, 301.0, 286.0, 311.0, 319.0, 295.0, 287.0, 319.0, 317.0, 317.0, 319.0, 305.0, 322.0, 315.0, 324.0, 317.0, 322.0, 298.0, 286.0, 298.0, 289.0, 319.0, 320.0, 286.0, 296.0, 309.0, 321.0, 296.0, 283.0, 318.0, 315.0, 314.0, 322.0, 314.0, 319.0, 316.0, 320.0, 314.0, 322.0, 291.0, 291.0, 285.0, 297.0, 319.0, 314.0, 298.0, 289.0, 319.0, 311.0, 316.0, 317.0, 323.0, 307.0, 315.0, 315.0, 297.0, 296.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9282592182519096, "mean_processing_ms": 0.253938832905362, "mean_inference_ms": 1.511735993488809}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7560000, "num_steps_sampled": 4032000, "sample_time_ms": 21363.9, "load_time_ms": 37.576, "grad_time_ms": 10037.47, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0003922081959899515, "policy_loss": -0.007403677329421043, "vf_loss": 83.57759857177734, "vf_explained_var": 0.7612032294273376, "kl": 0.001659790868870914, "entropy": 1.12375009059906, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4032000, "episodes_total": 10080, "training_iteration": 315, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-26-24", "timestamp": 1660256784, "time_this_iter_s": 42.419737100601196, "time_total_s": 15196.458106040955, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15196.458106040955, "timesteps_since_restore": 4032000, "iterations_since_restore": 315, "perf": {"cpu_util_percent": 30.92, "ram_util_percent": 58.89833333333333}}
-{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 612.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 306.395}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.39, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.82, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.71, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.34, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.12, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.45, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.14, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.92, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.14, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.92, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.14, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.92, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 633.0, 630.0, 587.0, 630.0, 587.0, 564.0, 576.0, 627.0, 636.0, 630.0, 526.0, 587.0, 582.0, 627.0, 633.0, 633.0, 630.0, 621.0, 579.0, 587.0, 582.0, 633.0, 636.0, 633.0, 576.0, 636.0, 633.0, 587.0, 639.0, 582.0, 636.0, 630.0, 639.0, 627.0, 630.0, 630.0, 587.0, 639.0, 587.0, 630.0, 582.0, 636.0, 636.0, 627.0, 639.0, 639.0, 584.0, 587.0, 639.0, 582.0, 630.0, 579.0, 633.0, 636.0, 633.0, 636.0, 636.0, 582.0, 582.0, 633.0, 587.0, 630.0, 633.0, 630.0, 630.0, 593.0, 582.0, 587.0, 590.0, 633.0, 633.0, 630.0, 630.0, 627.0, 582.0, 573.0, 630.0, 630.0, 582.0, 636.0, 639.0, 630.0, 587.0, 639.0, 630.0, 627.0, 630.0, 630.0, 639.0, 459.0, 582.0, 627.0, 582.0, 627.0, 636.0, 627.0, 633.0, 530.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [315.0, 315.0, 316.0, 317.0, 306.0, 324.0, 290.0, 297.0, 316.0, 314.0, 298.0, 289.0, 286.0, 278.0, 287.0, 289.0, 299.0, 328.0, 313.0, 323.0, 321.0, 309.0, 261.0, 265.0, 280.0, 307.0, 291.0, 291.0, 313.0, 314.0, 314.0, 319.0, 318.0, 315.0, 311.0, 319.0, 318.0, 303.0, 291.0, 288.0, 293.0, 294.0, 286.0, 296.0, 321.0, 312.0, 316.0, 320.0, 311.0, 322.0, 288.0, 288.0, 324.0, 312.0, 309.0, 324.0, 300.0, 287.0, 322.0, 317.0, 294.0, 288.0, 322.0, 314.0, 313.0, 317.0, 316.0, 323.0, 316.0, 311.0, 321.0, 309.0, 316.0, 314.0, 292.0, 295.0, 314.0, 325.0, 301.0, 286.0, 311.0, 319.0, 295.0, 287.0, 319.0, 317.0, 317.0, 319.0, 305.0, 322.0, 315.0, 324.0, 317.0, 322.0, 298.0, 286.0, 298.0, 289.0, 319.0, 320.0, 286.0, 296.0, 309.0, 321.0, 296.0, 283.0, 318.0, 315.0, 314.0, 322.0, 314.0, 319.0, 316.0, 320.0, 314.0, 322.0, 291.0, 291.0, 285.0, 297.0, 319.0, 314.0, 298.0, 289.0, 319.0, 311.0, 316.0, 317.0, 323.0, 307.0, 315.0, 315.0, 297.0, 296.0, 291.0, 291.0, 290.0, 297.0, 294.0, 296.0, 327.0, 306.0, 314.0, 319.0, 308.0, 322.0, 319.0, 311.0, 311.0, 316.0, 294.0, 288.0, 282.0, 291.0, 318.0, 312.0, 309.0, 321.0, 291.0, 291.0, 318.0, 318.0, 317.0, 322.0, 316.0, 314.0, 296.0, 291.0, 318.0, 321.0, 313.0, 317.0, 318.0, 309.0, 311.0, 319.0, 306.0, 324.0, 317.0, 322.0, 231.0, 228.0, 288.0, 294.0, 315.0, 312.0, 294.0, 288.0, 308.0, 319.0, 316.0, 320.0, 324.0, 303.0, 311.0, 322.0, 265.0, 265.0, 314.0, 325.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9267730740231102, "mean_processing_ms": 0.2536434066530759, "mean_inference_ms": 1.510930494877483}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7584000, "num_steps_sampled": 4044800, "sample_time_ms": 21428.224, "load_time_ms": 37.331, "grad_time_ms": 10025.054, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0037510674446821213, "policy_loss": -0.004000961780548096, "vf_loss": 83.20941925048828, "vf_explained_var": 0.7631545066833496, "kl": 0.0021077950950711966, "entropy": 1.1378254890441895, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4044800, "episodes_total": 10112, "training_iteration": 316, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-26-55", "timestamp": 1660256815, "time_this_iter_s": 31.23423171043396, "time_total_s": 15227.692337751389, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15227.692337751389, "timesteps_since_restore": 4044800, "iterations_since_restore": 316, "perf": {"cpu_util_percent": 34.638636363636365, "ram_util_percent": 58.979545454545466}}
-{"episode_reward_max": 639.0, "episode_reward_min": 342.0, "episode_reward_mean": 610.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 166.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 305.135}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.07, "shaped_reward_min": 102, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.65, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.26, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.92, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.02, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.76, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.02, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.76, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.02, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.76, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 587.0, 630.0, 627.0, 627.0, 630.0, 624.0, 587.0, 627.0, 639.0, 627.0, 630.0, 582.0, 633.0, 593.0, 633.0, 639.0, 639.0, 630.0, 639.0, 582.0, 342.0, 630.0, 564.0, 639.0, 587.0, 587.0, 627.0, 633.0, 627.0, 627.0, 627.0, 630.0, 630.0, 593.0, 582.0, 587.0, 590.0, 633.0, 633.0, 630.0, 630.0, 627.0, 582.0, 573.0, 630.0, 630.0, 582.0, 636.0, 639.0, 630.0, 587.0, 639.0, 630.0, 627.0, 630.0, 630.0, 639.0, 459.0, 582.0, 627.0, 582.0, 627.0, 636.0, 627.0, 633.0, 530.0, 639.0, 630.0, 633.0, 630.0, 587.0, 630.0, 587.0, 564.0, 576.0, 627.0, 636.0, 630.0, 526.0, 587.0, 582.0, 627.0, 633.0, 633.0, 630.0, 621.0, 579.0, 587.0, 582.0, 633.0, 636.0, 633.0, 576.0, 636.0, 633.0, 587.0, 639.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 295.0, 292.0, 311.0, 319.0, 311.0, 316.0, 308.0, 319.0, 322.0, 308.0, 319.0, 305.0, 285.0, 302.0, 306.0, 321.0, 319.0, 320.0, 321.0, 306.0, 321.0, 309.0, 295.0, 287.0, 317.0, 316.0, 299.0, 294.0, 319.0, 314.0, 329.0, 310.0, 319.0, 320.0, 324.0, 306.0, 314.0, 325.0, 292.0, 290.0, 166.0, 176.0, 311.0, 319.0, 265.0, 299.0, 322.0, 317.0, 294.0, 293.0, 287.0, 300.0, 313.0, 314.0, 309.0, 324.0, 316.0, 311.0, 315.0, 312.0, 319.0, 308.0, 323.0, 307.0, 315.0, 315.0, 297.0, 296.0, 291.0, 291.0, 290.0, 297.0, 294.0, 296.0, 327.0, 306.0, 314.0, 319.0, 308.0, 322.0, 319.0, 311.0, 311.0, 316.0, 294.0, 288.0, 282.0, 291.0, 318.0, 312.0, 309.0, 321.0, 291.0, 291.0, 318.0, 318.0, 317.0, 322.0, 316.0, 314.0, 296.0, 291.0, 318.0, 321.0, 313.0, 317.0, 318.0, 309.0, 311.0, 319.0, 306.0, 324.0, 317.0, 322.0, 231.0, 228.0, 288.0, 294.0, 315.0, 312.0, 294.0, 288.0, 308.0, 319.0, 316.0, 320.0, 324.0, 303.0, 311.0, 322.0, 265.0, 265.0, 314.0, 325.0, 315.0, 315.0, 316.0, 317.0, 306.0, 324.0, 290.0, 297.0, 316.0, 314.0, 298.0, 289.0, 286.0, 278.0, 287.0, 289.0, 299.0, 328.0, 313.0, 323.0, 321.0, 309.0, 261.0, 265.0, 280.0, 307.0, 291.0, 291.0, 313.0, 314.0, 314.0, 319.0, 318.0, 315.0, 311.0, 319.0, 318.0, 303.0, 291.0, 288.0, 293.0, 294.0, 286.0, 296.0, 321.0, 312.0, 316.0, 320.0, 311.0, 322.0, 288.0, 288.0, 324.0, 312.0, 309.0, 324.0, 300.0, 287.0, 322.0, 317.0, 294.0, 288.0, 322.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9252951052976854, "mean_processing_ms": 0.2533493735803219, "mean_inference_ms": 1.5101286664706006}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7608000, "num_steps_sampled": 4057600, "sample_time_ms": 21664.809, "load_time_ms": 37.646, "grad_time_ms": 10004.469, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0035793918650597334, "policy_loss": -0.004444916266947985, "vf_loss": 85.9527359008789, "vf_explained_var": 0.7614016532897949, "kl": 0.0019710592459887266, "entropy": 1.1419222354888916, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4057600, "episodes_total": 10144, "training_iteration": 317, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-27-26", "timestamp": 1660256846, "time_this_iter_s": 31.29483914375305, "time_total_s": 15258.987176895142, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15258.987176895142, "timesteps_since_restore": 4057600, "iterations_since_restore": 317, "perf": {"cpu_util_percent": 35.325, "ram_util_percent": 58.888636363636344}}
-{"episode_reward_max": 639.0, "episode_reward_min": 342.0, "episode_reward_mean": 610.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 166.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 305.105}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.41, "shaped_reward_min": 102, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.61, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.6, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.26, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.99, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.31, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.4, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.99, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.29, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.75, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.99, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.99, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 587.0, 639.0, 636.0, 639.0, 630.0, 639.0, 566.0, 639.0, 541.0, 590.0, 587.0, 627.0, 584.0, 639.0, 639.0, 518.0, 582.0, 590.0, 633.0, 630.0, 633.0, 627.0, 636.0, 630.0, 587.0, 633.0, 564.0, 639.0, 630.0, 633.0, 630.0, 627.0, 633.0, 530.0, 639.0, 630.0, 633.0, 630.0, 587.0, 630.0, 587.0, 564.0, 576.0, 627.0, 636.0, 630.0, 526.0, 587.0, 582.0, 627.0, 633.0, 633.0, 630.0, 621.0, 579.0, 587.0, 582.0, 633.0, 636.0, 633.0, 576.0, 636.0, 633.0, 587.0, 639.0, 582.0, 636.0, 633.0, 587.0, 630.0, 627.0, 627.0, 630.0, 624.0, 587.0, 627.0, 639.0, 627.0, 630.0, 582.0, 633.0, 593.0, 633.0, 639.0, 639.0, 630.0, 639.0, 582.0, 342.0, 630.0, 564.0, 639.0, 587.0, 587.0, 627.0, 633.0, 627.0, 627.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 282.0, 291.0, 296.0, 319.0, 320.0, 319.0, 317.0, 317.0, 322.0, 308.0, 322.0, 325.0, 314.0, 290.0, 276.0, 319.0, 320.0, 277.0, 264.0, 306.0, 284.0, 285.0, 302.0, 318.0, 309.0, 294.0, 290.0, 320.0, 319.0, 327.0, 312.0, 246.0, 272.0, 291.0, 291.0, 296.0, 294.0, 311.0, 322.0, 316.0, 314.0, 326.0, 307.0, 308.0, 319.0, 322.0, 314.0, 310.0, 320.0, 293.0, 294.0, 319.0, 314.0, 277.0, 287.0, 319.0, 320.0, 316.0, 314.0, 321.0, 312.0, 308.0, 322.0, 324.0, 303.0, 311.0, 322.0, 265.0, 265.0, 314.0, 325.0, 315.0, 315.0, 316.0, 317.0, 306.0, 324.0, 290.0, 297.0, 316.0, 314.0, 298.0, 289.0, 286.0, 278.0, 287.0, 289.0, 299.0, 328.0, 313.0, 323.0, 321.0, 309.0, 261.0, 265.0, 280.0, 307.0, 291.0, 291.0, 313.0, 314.0, 314.0, 319.0, 318.0, 315.0, 311.0, 319.0, 318.0, 303.0, 291.0, 288.0, 293.0, 294.0, 286.0, 296.0, 321.0, 312.0, 316.0, 320.0, 311.0, 322.0, 288.0, 288.0, 324.0, 312.0, 309.0, 324.0, 300.0, 287.0, 322.0, 317.0, 294.0, 288.0, 322.0, 314.0, 319.0, 314.0, 295.0, 292.0, 311.0, 319.0, 311.0, 316.0, 308.0, 319.0, 322.0, 308.0, 319.0, 305.0, 285.0, 302.0, 306.0, 321.0, 319.0, 320.0, 321.0, 306.0, 321.0, 309.0, 295.0, 287.0, 317.0, 316.0, 299.0, 294.0, 319.0, 314.0, 329.0, 310.0, 319.0, 320.0, 324.0, 306.0, 314.0, 325.0, 292.0, 290.0, 166.0, 176.0, 311.0, 319.0, 265.0, 299.0, 322.0, 317.0, 294.0, 293.0, 287.0, 300.0, 313.0, 314.0, 309.0, 324.0, 316.0, 311.0, 315.0, 312.0, 319.0, 308.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9238104630131461, "mean_processing_ms": 0.2530528359146936, "mean_inference_ms": 1.5086822690810806}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7632000, "num_steps_sampled": 4070400, "sample_time_ms": 21837.173, "load_time_ms": 37.475, "grad_time_ms": 10172.214, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033606337383389473, "policy_loss": -0.004180160816758871, "vf_loss": 81.06964111328125, "vf_explained_var": 0.7688854336738586, "kl": 0.0033983252942562103, "entropy": 1.1323426961898804, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4070400, "episodes_total": 10176, "training_iteration": 318, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-27-57", "timestamp": 1660256877, "time_this_iter_s": 30.971107959747314, "time_total_s": 15289.958284854889, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15289.958284854889, "timesteps_since_restore": 4070400, "iterations_since_restore": 318, "perf": {"cpu_util_percent": 34.67727272727273, "ram_util_percent": 58.75454545454544}}
-{"episode_reward_max": 639.0, "episode_reward_min": 342.0, "episode_reward_mean": 610.4, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 166.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 305.2}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.6, "shaped_reward_min": 102, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.4, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.75, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.13, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.74, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.79, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.67, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.13, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.74, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.13, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.74, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 627.0, 630.0, 633.0, 579.0, 530.0, 587.0, 570.0, 636.0, 627.0, 579.0, 636.0, 633.0, 630.0, 636.0, 624.0, 587.0, 621.0, 630.0, 582.0, 636.0, 630.0, 633.0, 596.0, 630.0, 541.0, 636.0, 630.0, 627.0, 582.0, 582.0, 587.0, 639.0, 582.0, 636.0, 633.0, 587.0, 630.0, 627.0, 627.0, 630.0, 624.0, 587.0, 627.0, 639.0, 627.0, 630.0, 582.0, 633.0, 593.0, 633.0, 639.0, 639.0, 630.0, 639.0, 582.0, 342.0, 630.0, 564.0, 639.0, 587.0, 587.0, 627.0, 633.0, 627.0, 627.0, 627.0, 579.0, 587.0, 639.0, 636.0, 639.0, 630.0, 639.0, 566.0, 639.0, 541.0, 590.0, 587.0, 627.0, 584.0, 639.0, 639.0, 518.0, 582.0, 590.0, 633.0, 630.0, 633.0, 627.0, 636.0, 630.0, 587.0, 633.0, 564.0, 639.0, 630.0, 633.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 290.0, 320.0, 310.0, 313.0, 314.0, 316.0, 314.0, 319.0, 314.0, 291.0, 288.0, 256.0, 274.0, 283.0, 304.0, 286.0, 284.0, 319.0, 317.0, 312.0, 315.0, 291.0, 288.0, 319.0, 317.0, 319.0, 314.0, 308.0, 322.0, 317.0, 319.0, 314.0, 310.0, 293.0, 294.0, 307.0, 314.0, 313.0, 317.0, 291.0, 291.0, 312.0, 324.0, 313.0, 317.0, 324.0, 309.0, 304.0, 292.0, 308.0, 322.0, 272.0, 269.0, 309.0, 327.0, 306.0, 324.0, 318.0, 309.0, 293.0, 289.0, 291.0, 291.0, 300.0, 287.0, 322.0, 317.0, 294.0, 288.0, 322.0, 314.0, 319.0, 314.0, 295.0, 292.0, 311.0, 319.0, 311.0, 316.0, 308.0, 319.0, 322.0, 308.0, 319.0, 305.0, 285.0, 302.0, 306.0, 321.0, 319.0, 320.0, 321.0, 306.0, 321.0, 309.0, 295.0, 287.0, 317.0, 316.0, 299.0, 294.0, 319.0, 314.0, 329.0, 310.0, 319.0, 320.0, 324.0, 306.0, 314.0, 325.0, 292.0, 290.0, 166.0, 176.0, 311.0, 319.0, 265.0, 299.0, 322.0, 317.0, 294.0, 293.0, 287.0, 300.0, 313.0, 314.0, 309.0, 324.0, 316.0, 311.0, 315.0, 312.0, 319.0, 308.0, 297.0, 282.0, 291.0, 296.0, 319.0, 320.0, 319.0, 317.0, 317.0, 322.0, 308.0, 322.0, 325.0, 314.0, 290.0, 276.0, 319.0, 320.0, 277.0, 264.0, 306.0, 284.0, 285.0, 302.0, 318.0, 309.0, 294.0, 290.0, 320.0, 319.0, 327.0, 312.0, 246.0, 272.0, 291.0, 291.0, 296.0, 294.0, 311.0, 322.0, 316.0, 314.0, 326.0, 307.0, 308.0, 319.0, 322.0, 314.0, 310.0, 320.0, 293.0, 294.0, 319.0, 314.0, 277.0, 287.0, 319.0, 320.0, 316.0, 314.0, 321.0, 312.0, 308.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9223351501134641, "mean_processing_ms": 0.2527584584336856, "mean_inference_ms": 1.5072340124708836}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7656000, "num_steps_sampled": 4083200, "sample_time_ms": 21920.847, "load_time_ms": 37.336, "grad_time_ms": 10402.485, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0003446048649493605, "policy_loss": -0.007382390554994345, "vf_loss": 82.95357513427734, "vf_explained_var": 0.759884774684906, "kl": 0.0017484420677646995, "entropy": 1.1367279291152954, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4083200, "episodes_total": 10208, "training_iteration": 319, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-28-31", "timestamp": 1660256911, "time_this_iter_s": 33.19297218322754, "time_total_s": 15323.151257038116, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15323.151257038116, "timesteps_since_restore": 4083200, "iterations_since_restore": 319, "perf": {"cpu_util_percent": 34.074468085106375, "ram_util_percent": 59.221276595744676}}
-{"episode_reward_max": 639.0, "episode_reward_min": 518.0, "episode_reward_mean": 612.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 246.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.085}, "custom_metrics": {"sparse_reward_mean": 212.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.17, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.9, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.56, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.53, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.91, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.41, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.13, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.85, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.95, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.13, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.85, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.13, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.85, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 630.0, 630.0, 639.0, 633.0, 627.0, 590.0, 587.0, 633.0, 630.0, 536.0, 624.0, 630.0, 575.0, 636.0, 587.0, 590.0, 630.0, 587.0, 633.0, 587.0, 633.0, 633.0, 630.0, 570.0, 630.0, 587.0, 633.0, 582.0, 627.0, 633.0, 636.0, 633.0, 627.0, 627.0, 627.0, 579.0, 587.0, 639.0, 636.0, 639.0, 630.0, 639.0, 566.0, 639.0, 541.0, 590.0, 587.0, 627.0, 584.0, 639.0, 639.0, 518.0, 582.0, 590.0, 633.0, 630.0, 633.0, 627.0, 636.0, 630.0, 587.0, 633.0, 564.0, 639.0, 630.0, 633.0, 630.0, 582.0, 630.0, 627.0, 630.0, 633.0, 579.0, 530.0, 587.0, 570.0, 636.0, 627.0, 579.0, 636.0, 633.0, 630.0, 636.0, 624.0, 587.0, 621.0, 630.0, 582.0, 636.0, 630.0, 633.0, 596.0, 630.0, 541.0, 636.0, 630.0, 627.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 311.0, 319.0, 313.0, 317.0, 316.0, 323.0, 314.0, 319.0, 311.0, 316.0, 299.0, 291.0, 296.0, 291.0, 311.0, 322.0, 313.0, 317.0, 257.0, 279.0, 318.0, 306.0, 308.0, 322.0, 281.0, 294.0, 314.0, 322.0, 291.0, 296.0, 298.0, 292.0, 309.0, 321.0, 290.0, 297.0, 313.0, 320.0, 293.0, 294.0, 321.0, 312.0, 319.0, 314.0, 314.0, 316.0, 288.0, 282.0, 315.0, 315.0, 291.0, 296.0, 319.0, 314.0, 302.0, 280.0, 313.0, 314.0, 314.0, 319.0, 313.0, 323.0, 309.0, 324.0, 316.0, 311.0, 315.0, 312.0, 319.0, 308.0, 297.0, 282.0, 291.0, 296.0, 319.0, 320.0, 319.0, 317.0, 317.0, 322.0, 308.0, 322.0, 325.0, 314.0, 290.0, 276.0, 319.0, 320.0, 277.0, 264.0, 306.0, 284.0, 285.0, 302.0, 318.0, 309.0, 294.0, 290.0, 320.0, 319.0, 327.0, 312.0, 246.0, 272.0, 291.0, 291.0, 296.0, 294.0, 311.0, 322.0, 316.0, 314.0, 326.0, 307.0, 308.0, 319.0, 322.0, 314.0, 310.0, 320.0, 293.0, 294.0, 319.0, 314.0, 277.0, 287.0, 319.0, 320.0, 316.0, 314.0, 321.0, 312.0, 308.0, 322.0, 292.0, 290.0, 320.0, 310.0, 313.0, 314.0, 316.0, 314.0, 319.0, 314.0, 291.0, 288.0, 256.0, 274.0, 283.0, 304.0, 286.0, 284.0, 319.0, 317.0, 312.0, 315.0, 291.0, 288.0, 319.0, 317.0, 319.0, 314.0, 308.0, 322.0, 317.0, 319.0, 314.0, 310.0, 293.0, 294.0, 307.0, 314.0, 313.0, 317.0, 291.0, 291.0, 312.0, 324.0, 313.0, 317.0, 324.0, 309.0, 304.0, 292.0, 308.0, 322.0, 272.0, 269.0, 309.0, 327.0, 306.0, 324.0, 318.0, 309.0, 293.0, 289.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9208740877938348, "mean_processing_ms": 0.25246719663876194, "mean_inference_ms": 1.5059256221319566}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7680000, "num_steps_sampled": 4096000, "sample_time_ms": 22117.687, "load_time_ms": 37.357, "grad_time_ms": 10501.528, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0030171778053045273, "policy_loss": -0.004927590023726225, "vf_loss": 85.14810943603516, "vf_explained_var": 0.76070237159729, "kl": 0.002144080586731434, "entropy": 1.140079379081726, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4096000, "episodes_total": 10240, "training_iteration": 320, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-29-03", "timestamp": 1660256943, "time_this_iter_s": 32.6832230091095, "time_total_s": 15355.834480047226, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15355.834480047226, "timesteps_since_restore": 4096000, "iterations_since_restore": 320, "perf": {"cpu_util_percent": 32.95652173913044, "ram_util_percent": 59.06739130434784}}
-{"episode_reward_max": 639.0, "episode_reward_min": 530.0, "episode_reward_mean": 613.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.82}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.44, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.75, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.81, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.11, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.37, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.06, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.04, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.47, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.06, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.04, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.06, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.04, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 582.0, 576.0, 587.0, 630.0, 636.0, 630.0, 636.0, 630.0, 636.0, 624.0, 639.0, 582.0, 624.0, 582.0, 633.0, 587.0, 636.0, 633.0, 636.0, 587.0, 636.0, 633.0, 633.0, 636.0, 633.0, 633.0, 627.0, 587.0, 633.0, 582.0, 579.0, 639.0, 630.0, 633.0, 630.0, 582.0, 630.0, 627.0, 630.0, 633.0, 579.0, 530.0, 587.0, 570.0, 636.0, 627.0, 579.0, 636.0, 633.0, 630.0, 636.0, 624.0, 587.0, 621.0, 630.0, 582.0, 636.0, 630.0, 633.0, 596.0, 630.0, 541.0, 636.0, 630.0, 627.0, 582.0, 582.0, 627.0, 630.0, 630.0, 639.0, 633.0, 627.0, 590.0, 587.0, 633.0, 630.0, 536.0, 624.0, 630.0, 575.0, 636.0, 587.0, 590.0, 630.0, 587.0, 633.0, 587.0, 633.0, 633.0, 630.0, 570.0, 630.0, 587.0, 633.0, 582.0, 627.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 282.0, 287.0, 295.0, 289.0, 287.0, 299.0, 288.0, 326.0, 304.0, 321.0, 315.0, 311.0, 319.0, 314.0, 322.0, 317.0, 313.0, 322.0, 314.0, 311.0, 313.0, 317.0, 322.0, 287.0, 295.0, 313.0, 311.0, 302.0, 280.0, 314.0, 319.0, 295.0, 292.0, 322.0, 314.0, 316.0, 317.0, 324.0, 312.0, 294.0, 293.0, 314.0, 322.0, 311.0, 322.0, 324.0, 309.0, 316.0, 320.0, 318.0, 315.0, 315.0, 318.0, 307.0, 320.0, 296.0, 291.0, 313.0, 320.0, 293.0, 289.0, 288.0, 291.0, 319.0, 320.0, 316.0, 314.0, 321.0, 312.0, 308.0, 322.0, 292.0, 290.0, 320.0, 310.0, 313.0, 314.0, 316.0, 314.0, 319.0, 314.0, 291.0, 288.0, 256.0, 274.0, 283.0, 304.0, 286.0, 284.0, 319.0, 317.0, 312.0, 315.0, 291.0, 288.0, 319.0, 317.0, 319.0, 314.0, 308.0, 322.0, 317.0, 319.0, 314.0, 310.0, 293.0, 294.0, 307.0, 314.0, 313.0, 317.0, 291.0, 291.0, 312.0, 324.0, 313.0, 317.0, 324.0, 309.0, 304.0, 292.0, 308.0, 322.0, 272.0, 269.0, 309.0, 327.0, 306.0, 324.0, 318.0, 309.0, 293.0, 289.0, 291.0, 291.0, 313.0, 314.0, 311.0, 319.0, 313.0, 317.0, 316.0, 323.0, 314.0, 319.0, 311.0, 316.0, 299.0, 291.0, 296.0, 291.0, 311.0, 322.0, 313.0, 317.0, 257.0, 279.0, 318.0, 306.0, 308.0, 322.0, 281.0, 294.0, 314.0, 322.0, 291.0, 296.0, 298.0, 292.0, 309.0, 321.0, 290.0, 297.0, 313.0, 320.0, 293.0, 294.0, 321.0, 312.0, 319.0, 314.0, 314.0, 316.0, 288.0, 282.0, 315.0, 315.0, 291.0, 296.0, 319.0, 314.0, 302.0, 280.0, 313.0, 314.0, 314.0, 319.0, 313.0, 323.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9194369912164846, "mean_processing_ms": 0.25218136491616727, "mean_inference_ms": 1.5049782377407859}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7704000, "num_steps_sampled": 4108800, "sample_time_ms": 22673.048, "load_time_ms": 37.017, "grad_time_ms": 10506.62, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004381106700748205, "policy_loss": -0.0034678278025239706, "vf_loss": 84.1359634399414, "vf_explained_var": 0.762717068195343, "kl": 0.0020634233951568604, "entropy": 1.12932288646698, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4108800, "episodes_total": 10272, "training_iteration": 321, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-29-39", "timestamp": 1660256979, "time_this_iter_s": 35.97740912437439, "time_total_s": 15391.8118891716, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15391.8118891716, "timesteps_since_restore": 4108800, "iterations_since_restore": 321, "perf": {"cpu_util_percent": 32.44705882352941, "ram_util_percent": 58.78039215686273}}
-{"episode_reward_max": 639.0, "episode_reward_min": 536.0, "episode_reward_mean": 611.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 305.78}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.96, "shaped_reward_min": 164, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.75, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.77, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.41, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.12, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.5, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.12, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.98, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.43, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.12, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.98, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.12, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.98, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 587.0, 636.0, 627.0, 582.0, 582.0, 579.0, 630.0, 636.0, 573.0, 639.0, 633.0, 578.0, 564.0, 579.0, 630.0, 639.0, 587.0, 636.0, 627.0, 636.0, 633.0, 579.0, 636.0, 536.0, 630.0, 633.0, 584.0, 587.0, 567.0, 627.0, 636.0, 630.0, 627.0, 582.0, 582.0, 627.0, 630.0, 630.0, 639.0, 633.0, 627.0, 590.0, 587.0, 633.0, 630.0, 536.0, 624.0, 630.0, 575.0, 636.0, 587.0, 590.0, 630.0, 587.0, 633.0, 587.0, 633.0, 633.0, 630.0, 570.0, 630.0, 587.0, 633.0, 582.0, 627.0, 633.0, 636.0, 567.0, 582.0, 576.0, 587.0, 630.0, 636.0, 630.0, 636.0, 630.0, 636.0, 624.0, 639.0, 582.0, 624.0, 582.0, 633.0, 587.0, 636.0, 633.0, 636.0, 587.0, 636.0, 633.0, 633.0, 636.0, 633.0, 633.0, 627.0, 587.0, 633.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 295.0, 285.0, 302.0, 316.0, 320.0, 321.0, 306.0, 292.0, 290.0, 285.0, 297.0, 289.0, 290.0, 319.0, 311.0, 322.0, 314.0, 284.0, 289.0, 322.0, 317.0, 305.0, 328.0, 301.0, 277.0, 287.0, 277.0, 295.0, 284.0, 306.0, 324.0, 324.0, 315.0, 285.0, 302.0, 322.0, 314.0, 308.0, 319.0, 316.0, 320.0, 314.0, 319.0, 291.0, 288.0, 319.0, 317.0, 262.0, 274.0, 308.0, 322.0, 316.0, 317.0, 292.0, 292.0, 290.0, 297.0, 285.0, 282.0, 314.0, 313.0, 319.0, 317.0, 306.0, 324.0, 318.0, 309.0, 293.0, 289.0, 291.0, 291.0, 313.0, 314.0, 311.0, 319.0, 313.0, 317.0, 316.0, 323.0, 314.0, 319.0, 311.0, 316.0, 299.0, 291.0, 296.0, 291.0, 311.0, 322.0, 313.0, 317.0, 257.0, 279.0, 318.0, 306.0, 308.0, 322.0, 281.0, 294.0, 314.0, 322.0, 291.0, 296.0, 298.0, 292.0, 309.0, 321.0, 290.0, 297.0, 313.0, 320.0, 293.0, 294.0, 321.0, 312.0, 319.0, 314.0, 314.0, 316.0, 288.0, 282.0, 315.0, 315.0, 291.0, 296.0, 319.0, 314.0, 302.0, 280.0, 313.0, 314.0, 314.0, 319.0, 313.0, 323.0, 285.0, 282.0, 287.0, 295.0, 289.0, 287.0, 299.0, 288.0, 326.0, 304.0, 321.0, 315.0, 311.0, 319.0, 314.0, 322.0, 317.0, 313.0, 322.0, 314.0, 311.0, 313.0, 317.0, 322.0, 287.0, 295.0, 313.0, 311.0, 302.0, 280.0, 314.0, 319.0, 295.0, 292.0, 322.0, 314.0, 316.0, 317.0, 324.0, 312.0, 294.0, 293.0, 314.0, 322.0, 311.0, 322.0, 324.0, 309.0, 316.0, 320.0, 318.0, 315.0, 315.0, 318.0, 307.0, 320.0, 296.0, 291.0, 313.0, 320.0, 293.0, 289.0, 288.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9180140863586184, "mean_processing_ms": 0.2518987033904737, "mean_inference_ms": 1.5041907922787607}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7728000, "num_steps_sampled": 4121600, "sample_time_ms": 23101.321, "load_time_ms": 36.599, "grad_time_ms": 10589.639, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00461611757054925, "policy_loss": -0.003138140542432666, "vf_loss": 83.23612213134766, "vf_explained_var": 0.7696110606193542, "kl": 0.0018815431976690888, "entropy": 1.1387158632278442, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4121600, "episodes_total": 10304, "training_iteration": 322, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-30-14", "timestamp": 1660257014, "time_this_iter_s": 34.30680704116821, "time_total_s": 15426.118696212769, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15426.118696212769, "timesteps_since_restore": 4121600, "iterations_since_restore": 322, "perf": {"cpu_util_percent": 32.239583333333336, "ram_util_percent": 58.845833333333324}}
-{"episode_reward_max": 639.0, "episode_reward_min": 536.0, "episode_reward_mean": 612.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 262.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 306.44}, "custom_metrics": {"sparse_reward_mean": 212.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.08, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.92, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.77, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.56, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.03, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.28, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.01, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.51, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.68, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.28, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.01, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.28, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.01, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 636.0, 630.0, 630.0, 627.0, 630.0, 636.0, 636.0, 636.0, 639.0, 536.0, 627.0, 630.0, 633.0, 639.0, 573.0, 636.0, 636.0, 561.0, 636.0, 582.0, 587.0, 633.0, 561.0, 639.0, 587.0, 633.0, 627.0, 570.0, 633.0, 579.0, 639.0, 582.0, 627.0, 633.0, 636.0, 567.0, 582.0, 576.0, 587.0, 630.0, 636.0, 630.0, 636.0, 630.0, 636.0, 624.0, 639.0, 582.0, 624.0, 582.0, 633.0, 587.0, 636.0, 633.0, 636.0, 587.0, 636.0, 633.0, 633.0, 636.0, 633.0, 633.0, 627.0, 587.0, 633.0, 582.0, 579.0, 587.0, 587.0, 636.0, 627.0, 582.0, 582.0, 579.0, 630.0, 636.0, 573.0, 639.0, 633.0, 578.0, 564.0, 579.0, 630.0, 639.0, 587.0, 636.0, 627.0, 636.0, 633.0, 579.0, 636.0, 536.0, 630.0, 633.0, 584.0, 587.0, 567.0, 627.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 314.0, 322.0, 316.0, 314.0, 319.0, 311.0, 308.0, 319.0, 318.0, 312.0, 320.0, 316.0, 319.0, 317.0, 317.0, 319.0, 319.0, 320.0, 262.0, 274.0, 311.0, 316.0, 319.0, 311.0, 322.0, 311.0, 319.0, 320.0, 291.0, 282.0, 318.0, 318.0, 316.0, 320.0, 279.0, 282.0, 324.0, 312.0, 295.0, 287.0, 293.0, 294.0, 319.0, 314.0, 283.0, 278.0, 319.0, 320.0, 288.0, 299.0, 319.0, 314.0, 316.0, 311.0, 276.0, 294.0, 324.0, 309.0, 288.0, 291.0, 319.0, 320.0, 302.0, 280.0, 313.0, 314.0, 314.0, 319.0, 313.0, 323.0, 285.0, 282.0, 287.0, 295.0, 289.0, 287.0, 299.0, 288.0, 326.0, 304.0, 321.0, 315.0, 311.0, 319.0, 314.0, 322.0, 317.0, 313.0, 322.0, 314.0, 311.0, 313.0, 317.0, 322.0, 287.0, 295.0, 313.0, 311.0, 302.0, 280.0, 314.0, 319.0, 295.0, 292.0, 322.0, 314.0, 316.0, 317.0, 324.0, 312.0, 294.0, 293.0, 314.0, 322.0, 311.0, 322.0, 324.0, 309.0, 316.0, 320.0, 318.0, 315.0, 315.0, 318.0, 307.0, 320.0, 296.0, 291.0, 313.0, 320.0, 293.0, 289.0, 288.0, 291.0, 292.0, 295.0, 285.0, 302.0, 316.0, 320.0, 321.0, 306.0, 292.0, 290.0, 285.0, 297.0, 289.0, 290.0, 319.0, 311.0, 322.0, 314.0, 284.0, 289.0, 322.0, 317.0, 305.0, 328.0, 301.0, 277.0, 287.0, 277.0, 295.0, 284.0, 306.0, 324.0, 324.0, 315.0, 285.0, 302.0, 322.0, 314.0, 308.0, 319.0, 316.0, 320.0, 314.0, 319.0, 291.0, 288.0, 319.0, 317.0, 262.0, 274.0, 308.0, 322.0, 316.0, 317.0, 292.0, 292.0, 290.0, 297.0, 285.0, 282.0, 314.0, 313.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9165951119530592, "mean_processing_ms": 0.25161700044619856, "mean_inference_ms": 1.5033026848004383}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7752000, "num_steps_sampled": 4134400, "sample_time_ms": 23061.265, "load_time_ms": 36.453, "grad_time_ms": 10583.338, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006471332162618637, "policy_loss": -0.0015162205090746284, "vf_loss": 85.53211212158203, "vf_explained_var": 0.7684184908866882, "kl": 0.002009378978982568, "entropy": 1.1313238143920898, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4134400, "episodes_total": 10336, "training_iteration": 323, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-30-45", "timestamp": 1660257045, "time_this_iter_s": 31.667726039886475, "time_total_s": 15457.786422252655, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15457.786422252655, "timesteps_since_restore": 4134400, "iterations_since_restore": 323, "perf": {"cpu_util_percent": 32.73111111111111, "ram_util_percent": 58.83555555555553}}
-{"episode_reward_max": 639.0, "episode_reward_min": 536.0, "episode_reward_mean": 610.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 262.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 305.28}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.36, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.28, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.82, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.62, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.57, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.98, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.57, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.57, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [596.0, 636.0, 639.0, 633.0, 633.0, 584.0, 582.0, 579.0, 636.0, 639.0, 587.0, 579.0, 558.0, 627.0, 639.0, 573.0, 630.0, 627.0, 581.0, 639.0, 639.0, 639.0, 636.0, 636.0, 567.0, 576.0, 573.0, 570.0, 621.0, 636.0, 630.0, 630.0, 587.0, 633.0, 582.0, 579.0, 587.0, 587.0, 636.0, 627.0, 582.0, 582.0, 579.0, 630.0, 636.0, 573.0, 639.0, 633.0, 578.0, 564.0, 579.0, 630.0, 639.0, 587.0, 636.0, 627.0, 636.0, 633.0, 579.0, 636.0, 536.0, 630.0, 633.0, 584.0, 587.0, 567.0, 627.0, 636.0, 633.0, 636.0, 630.0, 630.0, 627.0, 630.0, 636.0, 636.0, 636.0, 639.0, 536.0, 627.0, 630.0, 633.0, 639.0, 573.0, 636.0, 636.0, 561.0, 636.0, 582.0, 587.0, 633.0, 561.0, 639.0, 587.0, 633.0, 627.0, 570.0, 633.0, 579.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 299.0, 319.0, 317.0, 319.0, 320.0, 317.0, 316.0, 317.0, 316.0, 304.0, 280.0, 288.0, 294.0, 289.0, 290.0, 322.0, 314.0, 317.0, 322.0, 288.0, 299.0, 290.0, 289.0, 274.0, 284.0, 316.0, 311.0, 322.0, 317.0, 290.0, 283.0, 319.0, 311.0, 314.0, 313.0, 288.0, 293.0, 319.0, 320.0, 322.0, 317.0, 325.0, 314.0, 314.0, 322.0, 319.0, 317.0, 296.0, 271.0, 281.0, 295.0, 277.0, 296.0, 287.0, 283.0, 307.0, 314.0, 314.0, 322.0, 313.0, 317.0, 316.0, 314.0, 296.0, 291.0, 313.0, 320.0, 293.0, 289.0, 288.0, 291.0, 292.0, 295.0, 285.0, 302.0, 316.0, 320.0, 321.0, 306.0, 292.0, 290.0, 285.0, 297.0, 289.0, 290.0, 319.0, 311.0, 322.0, 314.0, 284.0, 289.0, 322.0, 317.0, 305.0, 328.0, 301.0, 277.0, 287.0, 277.0, 295.0, 284.0, 306.0, 324.0, 324.0, 315.0, 285.0, 302.0, 322.0, 314.0, 308.0, 319.0, 316.0, 320.0, 314.0, 319.0, 291.0, 288.0, 319.0, 317.0, 262.0, 274.0, 308.0, 322.0, 316.0, 317.0, 292.0, 292.0, 290.0, 297.0, 285.0, 282.0, 314.0, 313.0, 319.0, 317.0, 316.0, 317.0, 314.0, 322.0, 316.0, 314.0, 319.0, 311.0, 308.0, 319.0, 318.0, 312.0, 320.0, 316.0, 319.0, 317.0, 317.0, 319.0, 319.0, 320.0, 262.0, 274.0, 311.0, 316.0, 319.0, 311.0, 322.0, 311.0, 319.0, 320.0, 291.0, 282.0, 318.0, 318.0, 316.0, 320.0, 279.0, 282.0, 324.0, 312.0, 295.0, 287.0, 293.0, 294.0, 319.0, 314.0, 283.0, 278.0, 319.0, 320.0, 288.0, 299.0, 319.0, 314.0, 316.0, 311.0, 276.0, 294.0, 324.0, 309.0, 288.0, 291.0, 319.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9151684979449644, "mean_processing_ms": 0.2513316747000567, "mean_inference_ms": 1.5020585872568248}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7776000, "num_steps_sampled": 4147200, "sample_time_ms": 23007.183, "load_time_ms": 36.594, "grad_time_ms": 10309.706, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001403640490025282, "policy_loss": -0.009379498660564423, "vf_loss": 85.44359588623047, "vf_explained_var": 0.7652726769447327, "kl": 0.0018997077131643891, "entropy": 1.136988639831543, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4147200, "episodes_total": 10368, "training_iteration": 324, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-31-15", "timestamp": 1660257075, "time_this_iter_s": 30.05816674232483, "time_total_s": 15487.84458899498, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15487.84458899498, "timesteps_since_restore": 4147200, "iterations_since_restore": 324, "perf": {"cpu_util_percent": 31.88372093023256, "ram_util_percent": 58.74418604651163}}
-{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 611.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 305.985}, "custom_metrics": {"sparse_reward_mean": 212.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.17, "shaped_reward_min": 138, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.44, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.37, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.97, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.6, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.46, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.63, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.55, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.24, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.63, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.55, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.63, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.55, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 630.0, 630.0, 630.0, 579.0, 636.0, 633.0, 593.0, 630.0, 587.0, 575.0, 633.0, 470.0, 633.0, 465.0, 633.0, 498.0, 633.0, 627.0, 636.0, 639.0, 627.0, 630.0, 630.0, 636.0, 627.0, 636.0, 636.0, 627.0, 630.0, 639.0, 587.0, 567.0, 627.0, 636.0, 633.0, 636.0, 630.0, 630.0, 627.0, 630.0, 636.0, 636.0, 636.0, 639.0, 536.0, 627.0, 630.0, 633.0, 639.0, 573.0, 636.0, 636.0, 561.0, 636.0, 582.0, 587.0, 633.0, 561.0, 639.0, 587.0, 633.0, 627.0, 570.0, 633.0, 579.0, 639.0, 596.0, 636.0, 639.0, 633.0, 633.0, 584.0, 582.0, 579.0, 636.0, 639.0, 587.0, 579.0, 558.0, 627.0, 639.0, 573.0, 630.0, 627.0, 581.0, 639.0, 639.0, 639.0, 636.0, 636.0, 567.0, 576.0, 573.0, 570.0, 621.0, 636.0, 630.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 296.0, 286.0, 321.0, 309.0, 307.0, 323.0, 321.0, 309.0, 285.0, 294.0, 327.0, 309.0, 314.0, 319.0, 293.0, 300.0, 309.0, 321.0, 299.0, 288.0, 285.0, 290.0, 323.0, 310.0, 231.0, 239.0, 309.0, 324.0, 236.0, 229.0, 307.0, 326.0, 250.0, 248.0, 319.0, 314.0, 308.0, 319.0, 319.0, 317.0, 319.0, 320.0, 312.0, 315.0, 313.0, 317.0, 316.0, 314.0, 319.0, 317.0, 305.0, 322.0, 321.0, 315.0, 313.0, 323.0, 316.0, 311.0, 312.0, 318.0, 319.0, 320.0, 290.0, 297.0, 285.0, 282.0, 314.0, 313.0, 319.0, 317.0, 316.0, 317.0, 314.0, 322.0, 316.0, 314.0, 319.0, 311.0, 308.0, 319.0, 318.0, 312.0, 320.0, 316.0, 319.0, 317.0, 317.0, 319.0, 319.0, 320.0, 262.0, 274.0, 311.0, 316.0, 319.0, 311.0, 322.0, 311.0, 319.0, 320.0, 291.0, 282.0, 318.0, 318.0, 316.0, 320.0, 279.0, 282.0, 324.0, 312.0, 295.0, 287.0, 293.0, 294.0, 319.0, 314.0, 283.0, 278.0, 319.0, 320.0, 288.0, 299.0, 319.0, 314.0, 316.0, 311.0, 276.0, 294.0, 324.0, 309.0, 288.0, 291.0, 319.0, 320.0, 297.0, 299.0, 319.0, 317.0, 319.0, 320.0, 317.0, 316.0, 317.0, 316.0, 304.0, 280.0, 288.0, 294.0, 289.0, 290.0, 322.0, 314.0, 317.0, 322.0, 288.0, 299.0, 290.0, 289.0, 274.0, 284.0, 316.0, 311.0, 322.0, 317.0, 290.0, 283.0, 319.0, 311.0, 314.0, 313.0, 288.0, 293.0, 319.0, 320.0, 322.0, 317.0, 325.0, 314.0, 314.0, 322.0, 319.0, 317.0, 296.0, 271.0, 281.0, 295.0, 277.0, 296.0, 287.0, 283.0, 307.0, 314.0, 314.0, 322.0, 313.0, 317.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9137344731192653, "mean_processing_ms": 0.2510432909241615, "mean_inference_ms": 1.5004598778963754}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7800000, "num_steps_sampled": 4160000, "sample_time_ms": 21864.49, "load_time_ms": 36.474, "grad_time_ms": 10036.502, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015124385245144367, "policy_loss": -0.006040909793227911, "vf_loss": 81.20950317382812, "vf_explained_var": 0.7680754661560059, "kl": 0.0019407202489674091, "entropy": 1.135194182395935, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4160000, "episodes_total": 10400, "training_iteration": 325, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-31-44", "timestamp": 1660257104, "time_this_iter_s": 28.25439429283142, "time_total_s": 15516.098983287811, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15516.098983287811, "timesteps_since_restore": 4160000, "iterations_since_restore": 325, "perf": {"cpu_util_percent": 34.2675, "ram_util_percent": 58.785000000000004}}
-{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 613.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.815}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.63, "shaped_reward_min": 138, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.51, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.3, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 17.01, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.55, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.64, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.54, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.29, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.15, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.32, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.7, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.31, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.24, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.64, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.54, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.64, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.54, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 636.0, 630.0, 587.0, 639.0, 587.0, 633.0, 630.0, 639.0, 633.0, 582.0, 567.0, 630.0, 630.0, 633.0, 636.0, 576.0, 633.0, 587.0, 624.0, 636.0, 639.0, 639.0, 636.0, 630.0, 627.0, 612.0, 633.0, 570.0, 636.0, 630.0, 636.0, 570.0, 633.0, 579.0, 639.0, 596.0, 636.0, 639.0, 633.0, 633.0, 584.0, 582.0, 579.0, 636.0, 639.0, 587.0, 579.0, 558.0, 627.0, 639.0, 573.0, 630.0, 627.0, 581.0, 639.0, 639.0, 639.0, 636.0, 636.0, 567.0, 576.0, 573.0, 570.0, 621.0, 636.0, 630.0, 630.0, 630.0, 582.0, 630.0, 630.0, 630.0, 579.0, 636.0, 633.0, 593.0, 630.0, 587.0, 575.0, 633.0, 470.0, 633.0, 465.0, 633.0, 498.0, 633.0, 627.0, 636.0, 639.0, 627.0, 630.0, 630.0, 636.0, 627.0, 636.0, 636.0, 627.0, 630.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [324.0, 312.0, 322.0, 314.0, 310.0, 320.0, 286.0, 301.0, 319.0, 320.0, 290.0, 297.0, 314.0, 319.0, 311.0, 319.0, 321.0, 318.0, 314.0, 319.0, 289.0, 293.0, 291.0, 276.0, 311.0, 319.0, 318.0, 312.0, 319.0, 314.0, 314.0, 322.0, 288.0, 288.0, 317.0, 316.0, 298.0, 289.0, 316.0, 308.0, 319.0, 317.0, 316.0, 323.0, 317.0, 322.0, 311.0, 325.0, 303.0, 327.0, 305.0, 322.0, 310.0, 302.0, 316.0, 317.0, 285.0, 285.0, 319.0, 317.0, 312.0, 318.0, 314.0, 322.0, 276.0, 294.0, 324.0, 309.0, 288.0, 291.0, 319.0, 320.0, 297.0, 299.0, 319.0, 317.0, 319.0, 320.0, 317.0, 316.0, 317.0, 316.0, 304.0, 280.0, 288.0, 294.0, 289.0, 290.0, 322.0, 314.0, 317.0, 322.0, 288.0, 299.0, 290.0, 289.0, 274.0, 284.0, 316.0, 311.0, 322.0, 317.0, 290.0, 283.0, 319.0, 311.0, 314.0, 313.0, 288.0, 293.0, 319.0, 320.0, 322.0, 317.0, 325.0, 314.0, 314.0, 322.0, 319.0, 317.0, 296.0, 271.0, 281.0, 295.0, 277.0, 296.0, 287.0, 283.0, 307.0, 314.0, 314.0, 322.0, 313.0, 317.0, 316.0, 314.0, 313.0, 317.0, 296.0, 286.0, 321.0, 309.0, 307.0, 323.0, 321.0, 309.0, 285.0, 294.0, 327.0, 309.0, 314.0, 319.0, 293.0, 300.0, 309.0, 321.0, 299.0, 288.0, 285.0, 290.0, 323.0, 310.0, 231.0, 239.0, 309.0, 324.0, 236.0, 229.0, 307.0, 326.0, 250.0, 248.0, 319.0, 314.0, 308.0, 319.0, 319.0, 317.0, 319.0, 320.0, 312.0, 315.0, 313.0, 317.0, 316.0, 314.0, 319.0, 317.0, 305.0, 322.0, 321.0, 315.0, 313.0, 323.0, 316.0, 311.0, 312.0, 318.0, 319.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9123094574385331, "mean_processing_ms": 0.25075636823694536, "mean_inference_ms": 1.49889087709384}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7824000, "num_steps_sampled": 4172800, "sample_time_ms": 21942.597, "load_time_ms": 36.578, "grad_time_ms": 9969.611, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004511403385549784, "policy_loss": -0.0029741593170911074, "vf_loss": 80.4991683959961, "vf_explained_var": 0.7677297592163086, "kl": 0.002298202132806182, "entropy": 1.1287130117416382, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4172800, "episodes_total": 10432, "training_iteration": 326, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-32-15", "timestamp": 1660257135, "time_this_iter_s": 31.342418909072876, "time_total_s": 15547.441402196884, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15547.441402196884, "timesteps_since_restore": 4172800, "iterations_since_restore": 326, "perf": {"cpu_util_percent": 35.46363636363637, "ram_util_percent": 58.74999999999999}}
-{"episode_reward_max": 644.0, "episode_reward_min": 354.0, "episode_reward_mean": 615.31, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 307.655}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.11, "shaped_reward_min": 114, "shaped_reward_max": 204, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.28, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.47, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.8, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.84, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.42, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.75, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.35, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.42, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.75, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.42, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.75, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 579.0, 627.0, 354.0, 644.0, 639.0, 636.0, 627.0, 630.0, 630.0, 630.0, 579.0, 627.0, 636.0, 636.0, 636.0, 633.0, 587.0, 636.0, 627.0, 636.0, 633.0, 630.0, 636.0, 587.0, 633.0, 636.0, 576.0, 590.0, 627.0, 636.0, 636.0, 621.0, 636.0, 630.0, 630.0, 630.0, 582.0, 630.0, 630.0, 630.0, 579.0, 636.0, 633.0, 593.0, 630.0, 587.0, 575.0, 633.0, 470.0, 633.0, 465.0, 633.0, 498.0, 633.0, 627.0, 636.0, 639.0, 627.0, 630.0, 630.0, 636.0, 627.0, 636.0, 636.0, 627.0, 630.0, 639.0, 636.0, 636.0, 630.0, 587.0, 639.0, 587.0, 633.0, 630.0, 639.0, 633.0, 582.0, 567.0, 630.0, 630.0, 633.0, 636.0, 576.0, 633.0, 587.0, 624.0, 636.0, 639.0, 639.0, 636.0, 630.0, 627.0, 612.0, 633.0, 570.0, 636.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 285.0, 297.0, 282.0, 316.0, 311.0, 179.0, 175.0, 330.0, 314.0, 317.0, 322.0, 317.0, 319.0, 311.0, 316.0, 318.0, 312.0, 316.0, 314.0, 319.0, 311.0, 293.0, 286.0, 308.0, 319.0, 316.0, 320.0, 330.0, 306.0, 317.0, 319.0, 319.0, 314.0, 299.0, 288.0, 319.0, 317.0, 318.0, 309.0, 324.0, 312.0, 319.0, 314.0, 314.0, 316.0, 324.0, 312.0, 304.0, 283.0, 321.0, 312.0, 319.0, 317.0, 279.0, 297.0, 294.0, 296.0, 318.0, 309.0, 321.0, 315.0, 316.0, 320.0, 307.0, 314.0, 314.0, 322.0, 313.0, 317.0, 316.0, 314.0, 313.0, 317.0, 296.0, 286.0, 321.0, 309.0, 307.0, 323.0, 321.0, 309.0, 285.0, 294.0, 327.0, 309.0, 314.0, 319.0, 293.0, 300.0, 309.0, 321.0, 299.0, 288.0, 285.0, 290.0, 323.0, 310.0, 231.0, 239.0, 309.0, 324.0, 236.0, 229.0, 307.0, 326.0, 250.0, 248.0, 319.0, 314.0, 308.0, 319.0, 319.0, 317.0, 319.0, 320.0, 312.0, 315.0, 313.0, 317.0, 316.0, 314.0, 319.0, 317.0, 305.0, 322.0, 321.0, 315.0, 313.0, 323.0, 316.0, 311.0, 312.0, 318.0, 319.0, 320.0, 324.0, 312.0, 322.0, 314.0, 310.0, 320.0, 286.0, 301.0, 319.0, 320.0, 290.0, 297.0, 314.0, 319.0, 311.0, 319.0, 321.0, 318.0, 314.0, 319.0, 289.0, 293.0, 291.0, 276.0, 311.0, 319.0, 318.0, 312.0, 319.0, 314.0, 314.0, 322.0, 288.0, 288.0, 317.0, 316.0, 298.0, 289.0, 316.0, 308.0, 319.0, 317.0, 316.0, 323.0, 317.0, 322.0, 311.0, 325.0, 303.0, 327.0, 305.0, 322.0, 310.0, 302.0, 316.0, 317.0, 285.0, 285.0, 319.0, 317.0, 312.0, 318.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9108943505628629, "mean_processing_ms": 0.25047292677413735, "mean_inference_ms": 1.4973202589318924}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7848000, "num_steps_sampled": 4185600, "sample_time_ms": 21889.076, "load_time_ms": 35.811, "grad_time_ms": 9803.849, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004534369800239801, "policy_loss": -0.0032402947545051575, "vf_loss": 83.42310333251953, "vf_explained_var": 0.7677843570709229, "kl": 0.0018213322618976235, "entropy": 1.135262131690979, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4185600, "episodes_total": 10464, "training_iteration": 327, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-32-44", "timestamp": 1660257164, "time_this_iter_s": 29.095314025878906, "time_total_s": 15576.536716222763, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15576.536716222763, "timesteps_since_restore": 4185600, "iterations_since_restore": 327, "perf": {"cpu_util_percent": 33.5219512195122, "ram_util_percent": 58.87073170731708}}
-{"episode_reward_max": 644.0, "episode_reward_min": 354.0, "episode_reward_mean": 615.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 307.57}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 189.14, "shaped_reward_min": 114, "shaped_reward_max": 204, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.23, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.35, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.78, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.42, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.52, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.75, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.25, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.14, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.52, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.75, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.52, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.75, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 630.0, 630.0, 636.0, 465.0, 639.0, 639.0, 587.0, 639.0, 633.0, 582.0, 587.0, 639.0, 633.0, 636.0, 636.0, 596.0, 587.0, 639.0, 630.0, 633.0, 639.0, 582.0, 636.0, 479.0, 587.0, 630.0, 639.0, 639.0, 516.0, 582.0, 633.0, 636.0, 627.0, 630.0, 639.0, 636.0, 636.0, 630.0, 587.0, 639.0, 587.0, 633.0, 630.0, 639.0, 633.0, 582.0, 567.0, 630.0, 630.0, 633.0, 636.0, 576.0, 633.0, 587.0, 624.0, 636.0, 639.0, 639.0, 636.0, 630.0, 627.0, 612.0, 633.0, 570.0, 636.0, 630.0, 636.0, 573.0, 579.0, 627.0, 354.0, 644.0, 639.0, 636.0, 627.0, 630.0, 630.0, 630.0, 579.0, 627.0, 636.0, 636.0, 636.0, 633.0, 587.0, 636.0, 627.0, 636.0, 633.0, 630.0, 636.0, 587.0, 633.0, 636.0, 576.0, 590.0, 627.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [303.0, 327.0, 312.0, 318.0, 317.0, 313.0, 314.0, 322.0, 234.0, 231.0, 322.0, 317.0, 322.0, 317.0, 288.0, 299.0, 317.0, 322.0, 307.0, 326.0, 290.0, 292.0, 295.0, 292.0, 311.0, 328.0, 306.0, 327.0, 322.0, 314.0, 314.0, 322.0, 294.0, 302.0, 289.0, 298.0, 321.0, 318.0, 313.0, 317.0, 310.0, 323.0, 316.0, 323.0, 291.0, 291.0, 316.0, 320.0, 234.0, 245.0, 293.0, 294.0, 316.0, 314.0, 327.0, 312.0, 324.0, 315.0, 265.0, 251.0, 288.0, 294.0, 314.0, 319.0, 313.0, 323.0, 316.0, 311.0, 312.0, 318.0, 319.0, 320.0, 324.0, 312.0, 322.0, 314.0, 310.0, 320.0, 286.0, 301.0, 319.0, 320.0, 290.0, 297.0, 314.0, 319.0, 311.0, 319.0, 321.0, 318.0, 314.0, 319.0, 289.0, 293.0, 291.0, 276.0, 311.0, 319.0, 318.0, 312.0, 319.0, 314.0, 314.0, 322.0, 288.0, 288.0, 317.0, 316.0, 298.0, 289.0, 316.0, 308.0, 319.0, 317.0, 316.0, 323.0, 317.0, 322.0, 311.0, 325.0, 303.0, 327.0, 305.0, 322.0, 310.0, 302.0, 316.0, 317.0, 285.0, 285.0, 319.0, 317.0, 312.0, 318.0, 314.0, 322.0, 288.0, 285.0, 297.0, 282.0, 316.0, 311.0, 179.0, 175.0, 330.0, 314.0, 317.0, 322.0, 317.0, 319.0, 311.0, 316.0, 318.0, 312.0, 316.0, 314.0, 319.0, 311.0, 293.0, 286.0, 308.0, 319.0, 316.0, 320.0, 330.0, 306.0, 317.0, 319.0, 319.0, 314.0, 299.0, 288.0, 319.0, 317.0, 318.0, 309.0, 324.0, 312.0, 319.0, 314.0, 314.0, 316.0, 324.0, 312.0, 304.0, 283.0, 321.0, 312.0, 319.0, 317.0, 279.0, 297.0, 294.0, 296.0, 318.0, 309.0, 321.0, 315.0, 316.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9094951926222233, "mean_processing_ms": 0.25019344496392953, "mean_inference_ms": 1.495850505323708}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7872000, "num_steps_sampled": 4198400, "sample_time_ms": 21863.329, "load_time_ms": 36.113, "grad_time_ms": 9649.438, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0027417896781116724, "policy_loss": -0.00593235669657588, "vf_loss": 92.42369842529297, "vf_explained_var": 0.7659929394721985, "kl": 0.0018632843857631087, "entropy": 1.1364426612854004, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4198400, "episodes_total": 10496, "training_iteration": 328, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-33-13", "timestamp": 1660257193, "time_this_iter_s": 29.17238187789917, "time_total_s": 15605.709098100662, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15605.709098100662, "timesteps_since_restore": 4198400, "iterations_since_restore": 328, "perf": {"cpu_util_percent": 35.3780487804878, "ram_util_percent": 58.91219512195122}}
-{"episode_reward_max": 644.0, "episode_reward_min": 354.0, "episode_reward_mean": 609.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 304.77}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.54, "shaped_reward_min": 114, "shaped_reward_max": 204, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.91, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.35, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.27, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.53, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.02, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 633.0, 582.0, 627.0, 582.0, 639.0, 582.0, 582.0, 630.0, 630.0, 633.0, 636.0, 630.0, 584.0, 530.0, 587.0, 636.0, 573.0, 630.0, 624.0, 627.0, 627.0, 630.0, 549.0, 633.0, 633.0, 482.0, 630.0, 579.0, 636.0, 579.0, 630.0, 570.0, 636.0, 630.0, 636.0, 573.0, 579.0, 627.0, 354.0, 644.0, 639.0, 636.0, 627.0, 630.0, 630.0, 630.0, 579.0, 627.0, 636.0, 636.0, 636.0, 633.0, 587.0, 636.0, 627.0, 636.0, 633.0, 630.0, 636.0, 587.0, 633.0, 636.0, 576.0, 590.0, 627.0, 636.0, 636.0, 630.0, 630.0, 630.0, 636.0, 465.0, 639.0, 639.0, 587.0, 639.0, 633.0, 582.0, 587.0, 639.0, 633.0, 636.0, 636.0, 596.0, 587.0, 639.0, 630.0, 633.0, 639.0, 582.0, 636.0, 479.0, 587.0, 630.0, 639.0, 639.0, 516.0, 582.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 299.0, 317.0, 316.0, 291.0, 291.0, 321.0, 306.0, 296.0, 286.0, 316.0, 323.0, 293.0, 289.0, 295.0, 287.0, 319.0, 311.0, 319.0, 311.0, 319.0, 314.0, 319.0, 317.0, 314.0, 316.0, 288.0, 296.0, 265.0, 265.0, 292.0, 295.0, 313.0, 323.0, 291.0, 282.0, 323.0, 307.0, 310.0, 314.0, 309.0, 318.0, 324.0, 303.0, 316.0, 314.0, 275.0, 274.0, 318.0, 315.0, 322.0, 311.0, 245.0, 237.0, 313.0, 317.0, 293.0, 286.0, 314.0, 322.0, 290.0, 289.0, 316.0, 314.0, 285.0, 285.0, 319.0, 317.0, 312.0, 318.0, 314.0, 322.0, 288.0, 285.0, 297.0, 282.0, 316.0, 311.0, 179.0, 175.0, 330.0, 314.0, 317.0, 322.0, 317.0, 319.0, 311.0, 316.0, 318.0, 312.0, 316.0, 314.0, 319.0, 311.0, 293.0, 286.0, 308.0, 319.0, 316.0, 320.0, 330.0, 306.0, 317.0, 319.0, 319.0, 314.0, 299.0, 288.0, 319.0, 317.0, 318.0, 309.0, 324.0, 312.0, 319.0, 314.0, 314.0, 316.0, 324.0, 312.0, 304.0, 283.0, 321.0, 312.0, 319.0, 317.0, 279.0, 297.0, 294.0, 296.0, 318.0, 309.0, 321.0, 315.0, 316.0, 320.0, 303.0, 327.0, 312.0, 318.0, 317.0, 313.0, 314.0, 322.0, 234.0, 231.0, 322.0, 317.0, 322.0, 317.0, 288.0, 299.0, 317.0, 322.0, 307.0, 326.0, 290.0, 292.0, 295.0, 292.0, 311.0, 328.0, 306.0, 327.0, 322.0, 314.0, 314.0, 322.0, 294.0, 302.0, 289.0, 298.0, 321.0, 318.0, 313.0, 317.0, 310.0, 323.0, 316.0, 323.0, 291.0, 291.0, 316.0, 320.0, 234.0, 245.0, 293.0, 294.0, 316.0, 314.0, 327.0, 312.0, 324.0, 315.0, 265.0, 251.0, 288.0, 294.0, 314.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.908108829511899, "mean_processing_ms": 0.24991601885712256, "mean_inference_ms": 1.4943968227370834}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7896000, "num_steps_sampled": 4211200, "sample_time_ms": 21816.04, "load_time_ms": 36.245, "grad_time_ms": 9396.983, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00012729612353723496, "policy_loss": -0.007717677857726812, "vf_loss": 81.6099853515625, "vf_explained_var": 0.7742553353309631, "kl": 0.0021955876145511866, "entropy": 1.1412299871444702, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4211200, "episodes_total": 10528, "training_iteration": 329, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-33-44", "timestamp": 1660257224, "time_this_iter_s": 30.190826892852783, "time_total_s": 15635.899924993515, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15635.899924993515, "timesteps_since_restore": 4211200, "iterations_since_restore": 329, "perf": {"cpu_util_percent": 32.737209302325574, "ram_util_percent": 59.3720930232558}}
-{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 611.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 305.805}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.01, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.06, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.46, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.61, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.35, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.35, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.35, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 633.0, 525.0, 630.0, 587.0, 630.0, 630.0, 615.0, 633.0, 630.0, 636.0, 627.0, 584.0, 633.0, 639.0, 582.0, 630.0, 587.0, 630.0, 630.0, 579.0, 630.0, 633.0, 621.0, 630.0, 636.0, 639.0, 636.0, 639.0, 582.0, 627.0, 630.0, 590.0, 627.0, 636.0, 636.0, 630.0, 630.0, 630.0, 636.0, 465.0, 639.0, 639.0, 587.0, 639.0, 633.0, 582.0, 587.0, 639.0, 633.0, 636.0, 636.0, 596.0, 587.0, 639.0, 630.0, 633.0, 639.0, 582.0, 636.0, 479.0, 587.0, 630.0, 639.0, 639.0, 516.0, 582.0, 633.0, 587.0, 633.0, 582.0, 627.0, 582.0, 639.0, 582.0, 582.0, 630.0, 630.0, 633.0, 636.0, 630.0, 584.0, 530.0, 587.0, 636.0, 573.0, 630.0, 624.0, 627.0, 627.0, 630.0, 549.0, 633.0, 633.0, 482.0, 630.0, 579.0, 636.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 323.0, 309.0, 324.0, 258.0, 267.0, 314.0, 316.0, 291.0, 296.0, 314.0, 316.0, 313.0, 317.0, 307.0, 308.0, 322.0, 311.0, 321.0, 309.0, 317.0, 319.0, 310.0, 317.0, 285.0, 299.0, 319.0, 314.0, 322.0, 317.0, 294.0, 288.0, 321.0, 309.0, 286.0, 301.0, 319.0, 311.0, 314.0, 316.0, 291.0, 288.0, 319.0, 311.0, 316.0, 317.0, 311.0, 310.0, 316.0, 314.0, 319.0, 317.0, 324.0, 315.0, 319.0, 317.0, 319.0, 320.0, 278.0, 304.0, 316.0, 311.0, 316.0, 314.0, 294.0, 296.0, 318.0, 309.0, 321.0, 315.0, 316.0, 320.0, 303.0, 327.0, 312.0, 318.0, 317.0, 313.0, 314.0, 322.0, 234.0, 231.0, 322.0, 317.0, 322.0, 317.0, 288.0, 299.0, 317.0, 322.0, 307.0, 326.0, 290.0, 292.0, 295.0, 292.0, 311.0, 328.0, 306.0, 327.0, 322.0, 314.0, 314.0, 322.0, 294.0, 302.0, 289.0, 298.0, 321.0, 318.0, 313.0, 317.0, 310.0, 323.0, 316.0, 323.0, 291.0, 291.0, 316.0, 320.0, 234.0, 245.0, 293.0, 294.0, 316.0, 314.0, 327.0, 312.0, 324.0, 315.0, 265.0, 251.0, 288.0, 294.0, 314.0, 319.0, 288.0, 299.0, 317.0, 316.0, 291.0, 291.0, 321.0, 306.0, 296.0, 286.0, 316.0, 323.0, 293.0, 289.0, 295.0, 287.0, 319.0, 311.0, 319.0, 311.0, 319.0, 314.0, 319.0, 317.0, 314.0, 316.0, 288.0, 296.0, 265.0, 265.0, 292.0, 295.0, 313.0, 323.0, 291.0, 282.0, 323.0, 307.0, 310.0, 314.0, 309.0, 318.0, 324.0, 303.0, 316.0, 314.0, 275.0, 274.0, 318.0, 315.0, 322.0, 311.0, 245.0, 237.0, 313.0, 317.0, 293.0, 286.0, 314.0, 322.0, 290.0, 289.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9067267612642047, "mean_processing_ms": 0.2496377464704088, "mean_inference_ms": 1.4929051187388651}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7920000, "num_steps_sampled": 4224000, "sample_time_ms": 21531.777, "load_time_ms": 36.36, "grad_time_ms": 9360.92, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0021365683060139418, "policy_loss": -0.005692864302545786, "vf_loss": 83.99735260009766, "vf_explained_var": 0.7644996643066406, "kl": 0.0020622028969228268, "entropy": 1.140602469444275, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4224000, "episodes_total": 10560, "training_iteration": 330, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-34-13", "timestamp": 1660257253, "time_this_iter_s": 29.480799913406372, "time_total_s": 15665.380724906921, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15665.380724906921, "timesteps_since_restore": 4224000, "iterations_since_restore": 330, "perf": {"cpu_util_percent": 35.38095238095239, "ram_util_percent": 58.940476190476204}}
-{"episode_reward_max": 639.0, "episode_reward_min": 482.0, "episode_reward_mean": 614.36, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 307.18}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.96, "shaped_reward_min": 149, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.19, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.43, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.72, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.79, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.46, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.36, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 567.0, 587.0, 639.0, 627.0, 627.0, 639.0, 636.0, 636.0, 630.0, 636.0, 636.0, 627.0, 636.0, 584.0, 630.0, 627.0, 630.0, 633.0, 630.0, 579.0, 633.0, 633.0, 627.0, 639.0, 633.0, 633.0, 639.0, 633.0, 576.0, 636.0, 582.0, 639.0, 516.0, 582.0, 633.0, 587.0, 633.0, 582.0, 627.0, 582.0, 639.0, 582.0, 582.0, 630.0, 630.0, 633.0, 636.0, 630.0, 584.0, 530.0, 587.0, 636.0, 573.0, 630.0, 624.0, 627.0, 627.0, 630.0, 549.0, 633.0, 633.0, 482.0, 630.0, 579.0, 636.0, 579.0, 630.0, 639.0, 633.0, 525.0, 630.0, 587.0, 630.0, 630.0, 615.0, 633.0, 630.0, 636.0, 627.0, 584.0, 633.0, 639.0, 582.0, 630.0, 587.0, 630.0, 630.0, 579.0, 630.0, 633.0, 621.0, 630.0, 636.0, 639.0, 636.0, 639.0, 582.0, 627.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 285.0, 285.0, 282.0, 289.0, 298.0, 317.0, 322.0, 315.0, 312.0, 321.0, 306.0, 314.0, 325.0, 314.0, 322.0, 308.0, 328.0, 319.0, 311.0, 322.0, 314.0, 319.0, 317.0, 308.0, 319.0, 316.0, 320.0, 295.0, 289.0, 319.0, 311.0, 327.0, 300.0, 318.0, 312.0, 311.0, 322.0, 321.0, 309.0, 280.0, 299.0, 310.0, 323.0, 312.0, 321.0, 308.0, 319.0, 318.0, 321.0, 315.0, 318.0, 321.0, 312.0, 318.0, 321.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 290.0, 292.0, 324.0, 315.0, 265.0, 251.0, 288.0, 294.0, 314.0, 319.0, 288.0, 299.0, 317.0, 316.0, 291.0, 291.0, 321.0, 306.0, 296.0, 286.0, 316.0, 323.0, 293.0, 289.0, 295.0, 287.0, 319.0, 311.0, 319.0, 311.0, 319.0, 314.0, 319.0, 317.0, 314.0, 316.0, 288.0, 296.0, 265.0, 265.0, 292.0, 295.0, 313.0, 323.0, 291.0, 282.0, 323.0, 307.0, 310.0, 314.0, 309.0, 318.0, 324.0, 303.0, 316.0, 314.0, 275.0, 274.0, 318.0, 315.0, 322.0, 311.0, 245.0, 237.0, 313.0, 317.0, 293.0, 286.0, 314.0, 322.0, 290.0, 289.0, 316.0, 314.0, 316.0, 323.0, 309.0, 324.0, 258.0, 267.0, 314.0, 316.0, 291.0, 296.0, 314.0, 316.0, 313.0, 317.0, 307.0, 308.0, 322.0, 311.0, 321.0, 309.0, 317.0, 319.0, 310.0, 317.0, 285.0, 299.0, 319.0, 314.0, 322.0, 317.0, 294.0, 288.0, 321.0, 309.0, 286.0, 301.0, 319.0, 311.0, 314.0, 316.0, 291.0, 288.0, 319.0, 311.0, 316.0, 317.0, 311.0, 310.0, 316.0, 314.0, 319.0, 317.0, 324.0, 315.0, 319.0, 317.0, 319.0, 320.0, 278.0, 304.0, 316.0, 311.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9053530084533031, "mean_processing_ms": 0.24936183583486593, "mean_inference_ms": 1.491518263590993}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7944000, "num_steps_sampled": 4236800, "sample_time_ms": 21154.165, "load_time_ms": 36.748, "grad_time_ms": 9274.795, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -8.082172280410305e-05, "policy_loss": -0.00758820166811347, "vf_loss": 80.76020050048828, "vf_explained_var": 0.765857994556427, "kl": 0.001791672664694488, "entropy": 1.137281060218811, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4236800, "episodes_total": 10592, "training_iteration": 331, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-34-44", "timestamp": 1660257284, "time_this_iter_s": 31.342971086502075, "time_total_s": 15696.723695993423, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15696.723695993423, "timesteps_since_restore": 4236800, "iterations_since_restore": 331, "perf": {"cpu_util_percent": 33.804545454545455, "ram_util_percent": 58.92272727272726}}
-{"episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 616.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 308.005}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.81, "shaped_reward_min": 165, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.56, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.07, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 17.05, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.34, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.74, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.52, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.03, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.9, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.27, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.42, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.33, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.74, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.52, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.74, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.52, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 573.0, 633.0, 567.0, 630.0, 576.0, 639.0, 587.0, 630.0, 582.0, 636.0, 639.0, 633.0, 630.0, 636.0, 579.0, 576.0, 627.0, 587.0, 587.0, 579.0, 582.0, 636.0, 636.0, 579.0, 639.0, 587.0, 633.0, 630.0, 582.0, 633.0, 587.0, 579.0, 636.0, 579.0, 630.0, 639.0, 633.0, 525.0, 630.0, 587.0, 630.0, 630.0, 615.0, 633.0, 630.0, 636.0, 627.0, 584.0, 633.0, 639.0, 582.0, 630.0, 587.0, 630.0, 630.0, 579.0, 630.0, 633.0, 621.0, 630.0, 636.0, 639.0, 636.0, 639.0, 582.0, 627.0, 630.0, 582.0, 567.0, 587.0, 639.0, 627.0, 627.0, 639.0, 636.0, 636.0, 630.0, 636.0, 636.0, 627.0, 636.0, 584.0, 630.0, 627.0, 630.0, 633.0, 630.0, 579.0, 633.0, 633.0, 627.0, 639.0, 633.0, 633.0, 639.0, 633.0, 576.0, 636.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 287.0, 286.0, 316.0, 317.0, 280.0, 287.0, 313.0, 317.0, 281.0, 295.0, 319.0, 320.0, 295.0, 292.0, 311.0, 319.0, 284.0, 298.0, 320.0, 316.0, 316.0, 323.0, 317.0, 316.0, 319.0, 311.0, 324.0, 312.0, 281.0, 298.0, 285.0, 291.0, 311.0, 316.0, 288.0, 299.0, 297.0, 290.0, 288.0, 291.0, 291.0, 291.0, 319.0, 317.0, 314.0, 322.0, 280.0, 299.0, 317.0, 322.0, 298.0, 289.0, 314.0, 319.0, 316.0, 314.0, 291.0, 291.0, 312.0, 321.0, 298.0, 289.0, 293.0, 286.0, 314.0, 322.0, 290.0, 289.0, 316.0, 314.0, 316.0, 323.0, 309.0, 324.0, 258.0, 267.0, 314.0, 316.0, 291.0, 296.0, 314.0, 316.0, 313.0, 317.0, 307.0, 308.0, 322.0, 311.0, 321.0, 309.0, 317.0, 319.0, 310.0, 317.0, 285.0, 299.0, 319.0, 314.0, 322.0, 317.0, 294.0, 288.0, 321.0, 309.0, 286.0, 301.0, 319.0, 311.0, 314.0, 316.0, 291.0, 288.0, 319.0, 311.0, 316.0, 317.0, 311.0, 310.0, 316.0, 314.0, 319.0, 317.0, 324.0, 315.0, 319.0, 317.0, 319.0, 320.0, 278.0, 304.0, 316.0, 311.0, 316.0, 314.0, 297.0, 285.0, 285.0, 282.0, 289.0, 298.0, 317.0, 322.0, 315.0, 312.0, 321.0, 306.0, 314.0, 325.0, 314.0, 322.0, 308.0, 328.0, 319.0, 311.0, 322.0, 314.0, 319.0, 317.0, 308.0, 319.0, 316.0, 320.0, 295.0, 289.0, 319.0, 311.0, 327.0, 300.0, 318.0, 312.0, 311.0, 322.0, 321.0, 309.0, 280.0, 299.0, 310.0, 323.0, 312.0, 321.0, 308.0, 319.0, 318.0, 321.0, 315.0, 318.0, 321.0, 312.0, 318.0, 321.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 290.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9039942028756937, "mean_processing_ms": 0.24909087688920636, "mean_inference_ms": 1.490273663500262}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7968000, "num_steps_sampled": 4249600, "sample_time_ms": 21096.339, "load_time_ms": 36.622, "grad_time_ms": 9224.002, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002680680714547634, "policy_loss": -0.005275225732475519, "vf_loss": 85.20003509521484, "vf_explained_var": 0.7707304954528809, "kl": 0.00193729845341295, "entropy": 1.128203272819519, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4249600, "episodes_total": 10624, "training_iteration": 332, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-35-18", "timestamp": 1660257318, "time_this_iter_s": 33.216859102249146, "time_total_s": 15729.940555095673, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15729.940555095673, "timesteps_since_restore": 4249600, "iterations_since_restore": 332, "perf": {"cpu_util_percent": 34.92765957446809, "ram_util_percent": 58.93617021276598}}
-{"episode_reward_max": 639.0, "episode_reward_min": 354.0, "episode_reward_mean": 613.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 306.97}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.74, "shaped_reward_min": 114, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.39, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.05, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.95, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.29, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.46, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.6, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.63, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.89, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.26, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.21, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.6, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.63, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.6, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.63, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 354.0, 587.0, 636.0, 579.0, 636.0, 639.0, 587.0, 468.0, 636.0, 624.0, 630.0, 579.0, 630.0, 584.0, 633.0, 636.0, 636.0, 630.0, 639.0, 639.0, 639.0, 636.0, 639.0, 639.0, 633.0, 636.0, 630.0, 582.0, 636.0, 630.0, 633.0, 639.0, 582.0, 627.0, 630.0, 582.0, 567.0, 587.0, 639.0, 627.0, 627.0, 639.0, 636.0, 636.0, 630.0, 636.0, 636.0, 627.0, 636.0, 584.0, 630.0, 627.0, 630.0, 633.0, 630.0, 579.0, 633.0, 633.0, 627.0, 639.0, 633.0, 633.0, 639.0, 633.0, 576.0, 636.0, 582.0, 633.0, 573.0, 633.0, 567.0, 630.0, 576.0, 639.0, 587.0, 630.0, 582.0, 636.0, 639.0, 633.0, 630.0, 636.0, 579.0, 576.0, 627.0, 587.0, 587.0, 579.0, 582.0, 636.0, 636.0, 579.0, 639.0, 587.0, 633.0, 630.0, 582.0, 633.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 314.0, 174.0, 180.0, 288.0, 299.0, 316.0, 320.0, 291.0, 288.0, 326.0, 310.0, 322.0, 317.0, 290.0, 297.0, 225.0, 243.0, 314.0, 322.0, 321.0, 303.0, 311.0, 319.0, 293.0, 286.0, 317.0, 313.0, 301.0, 283.0, 319.0, 314.0, 314.0, 322.0, 324.0, 312.0, 316.0, 314.0, 319.0, 320.0, 317.0, 322.0, 312.0, 327.0, 319.0, 317.0, 320.0, 319.0, 322.0, 317.0, 314.0, 319.0, 314.0, 322.0, 313.0, 317.0, 290.0, 292.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0, 319.0, 320.0, 278.0, 304.0, 316.0, 311.0, 316.0, 314.0, 297.0, 285.0, 285.0, 282.0, 289.0, 298.0, 317.0, 322.0, 315.0, 312.0, 321.0, 306.0, 314.0, 325.0, 314.0, 322.0, 308.0, 328.0, 319.0, 311.0, 322.0, 314.0, 319.0, 317.0, 308.0, 319.0, 316.0, 320.0, 295.0, 289.0, 319.0, 311.0, 327.0, 300.0, 318.0, 312.0, 311.0, 322.0, 321.0, 309.0, 280.0, 299.0, 310.0, 323.0, 312.0, 321.0, 308.0, 319.0, 318.0, 321.0, 315.0, 318.0, 321.0, 312.0, 318.0, 321.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 290.0, 292.0, 319.0, 314.0, 287.0, 286.0, 316.0, 317.0, 280.0, 287.0, 313.0, 317.0, 281.0, 295.0, 319.0, 320.0, 295.0, 292.0, 311.0, 319.0, 284.0, 298.0, 320.0, 316.0, 316.0, 323.0, 317.0, 316.0, 319.0, 311.0, 324.0, 312.0, 281.0, 298.0, 285.0, 291.0, 311.0, 316.0, 288.0, 299.0, 297.0, 290.0, 288.0, 291.0, 291.0, 291.0, 319.0, 317.0, 314.0, 322.0, 280.0, 299.0, 317.0, 322.0, 298.0, 289.0, 314.0, 319.0, 316.0, 314.0, 291.0, 291.0, 312.0, 321.0, 298.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9026482821594658, "mean_processing_ms": 0.24882351646773487, "mean_inference_ms": 1.4891609969323358}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7992000, "num_steps_sampled": 4262400, "sample_time_ms": 21161.346, "load_time_ms": 36.685, "grad_time_ms": 9083.544, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006233640480786562, "policy_loss": -0.0020334760192781687, "vf_loss": 88.32830047607422, "vf_explained_var": 0.7596514821052551, "kl": 0.0019263379508629441, "entropy": 1.1314295530319214, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4262400, "episodes_total": 10656, "training_iteration": 333, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-35-49", "timestamp": 1660257349, "time_this_iter_s": 30.911512851715088, "time_total_s": 15760.852067947388, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15760.852067947388, "timesteps_since_restore": 4262400, "iterations_since_restore": 333, "perf": {"cpu_util_percent": 33.54318181818183, "ram_util_percent": 59.031818181818196}}
-{"episode_reward_max": 639.0, "episode_reward_min": 354.0, "episode_reward_mean": 609.4, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 304.7}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.4, "shaped_reward_min": 114, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.43, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.91, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.98, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.13, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.66, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.4, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.89, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.94, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.27, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.38, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.29, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.66, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.4, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.66, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.4, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 582.0, 633.0, 582.0, 365.0, 627.0, 633.0, 639.0, 639.0, 630.0, 630.0, 633.0, 630.0, 579.0, 630.0, 576.0, 579.0, 633.0, 582.0, 633.0, 633.0, 636.0, 630.0, 587.0, 618.0, 633.0, 636.0, 627.0, 561.0, 639.0, 624.0, 587.0, 633.0, 576.0, 636.0, 582.0, 633.0, 573.0, 633.0, 567.0, 630.0, 576.0, 639.0, 587.0, 630.0, 582.0, 636.0, 639.0, 633.0, 630.0, 636.0, 579.0, 576.0, 627.0, 587.0, 587.0, 579.0, 582.0, 636.0, 636.0, 579.0, 639.0, 587.0, 633.0, 630.0, 582.0, 633.0, 587.0, 636.0, 354.0, 587.0, 636.0, 579.0, 636.0, 639.0, 587.0, 468.0, 636.0, 624.0, 630.0, 579.0, 630.0, 584.0, 633.0, 636.0, 636.0, 630.0, 639.0, 639.0, 639.0, 636.0, 639.0, 639.0, 633.0, 636.0, 630.0, 582.0, 636.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 286.0, 296.0, 313.0, 320.0, 288.0, 294.0, 186.0, 179.0, 308.0, 319.0, 310.0, 323.0, 317.0, 322.0, 319.0, 320.0, 314.0, 316.0, 319.0, 311.0, 314.0, 319.0, 311.0, 319.0, 290.0, 289.0, 311.0, 319.0, 302.0, 274.0, 285.0, 294.0, 319.0, 314.0, 283.0, 299.0, 314.0, 319.0, 316.0, 317.0, 319.0, 317.0, 311.0, 319.0, 286.0, 301.0, 313.0, 305.0, 317.0, 316.0, 325.0, 311.0, 308.0, 319.0, 287.0, 274.0, 322.0, 317.0, 300.0, 324.0, 296.0, 291.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 290.0, 292.0, 319.0, 314.0, 287.0, 286.0, 316.0, 317.0, 280.0, 287.0, 313.0, 317.0, 281.0, 295.0, 319.0, 320.0, 295.0, 292.0, 311.0, 319.0, 284.0, 298.0, 320.0, 316.0, 316.0, 323.0, 317.0, 316.0, 319.0, 311.0, 324.0, 312.0, 281.0, 298.0, 285.0, 291.0, 311.0, 316.0, 288.0, 299.0, 297.0, 290.0, 288.0, 291.0, 291.0, 291.0, 319.0, 317.0, 314.0, 322.0, 280.0, 299.0, 317.0, 322.0, 298.0, 289.0, 314.0, 319.0, 316.0, 314.0, 291.0, 291.0, 312.0, 321.0, 298.0, 289.0, 322.0, 314.0, 174.0, 180.0, 288.0, 299.0, 316.0, 320.0, 291.0, 288.0, 326.0, 310.0, 322.0, 317.0, 290.0, 297.0, 225.0, 243.0, 314.0, 322.0, 321.0, 303.0, 311.0, 319.0, 293.0, 286.0, 317.0, 313.0, 301.0, 283.0, 319.0, 314.0, 314.0, 322.0, 324.0, 312.0, 316.0, 314.0, 319.0, 320.0, 317.0, 322.0, 312.0, 327.0, 319.0, 317.0, 320.0, 319.0, 322.0, 317.0, 314.0, 319.0, 314.0, 322.0, 313.0, 317.0, 290.0, 292.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9013131460094013, "mean_processing_ms": 0.24855772824532033, "mean_inference_ms": 1.488151121252854}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8016000, "num_steps_sampled": 4275200, "sample_time_ms": 21421.637, "load_time_ms": 36.594, "grad_time_ms": 9205.096, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013225991278886795, "policy_loss": -0.00687911594286561, "vf_loss": 87.6913833618164, "vf_explained_var": 0.7619670033454895, "kl": 0.0022256800439208746, "entropy": 1.1348274946212769, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4275200, "episodes_total": 10688, "training_iteration": 334, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-36-22", "timestamp": 1660257382, "time_this_iter_s": 33.877387046813965, "time_total_s": 15794.729454994202, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15794.729454994202, "timesteps_since_restore": 4275200, "iterations_since_restore": 334, "perf": {"cpu_util_percent": 34.24791666666667, "ram_util_percent": 59.02708333333334}}
-{"episode_reward_max": 639.0, "episode_reward_min": 354.0, "episode_reward_mean": 608.6, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 304.3}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 186.6, "shaped_reward_min": 114, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.21, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.16, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.75, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.44, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.5, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.44, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.5, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.44, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.5, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 627.0, 582.0, 636.0, 627.0, 633.0, 518.0, 587.0, 633.0, 582.0, 582.0, 582.0, 558.0, 561.0, 633.0, 582.0, 579.0, 579.0, 633.0, 639.0, 633.0, 630.0, 624.0, 633.0, 579.0, 636.0, 564.0, 627.0, 630.0, 639.0, 633.0, 584.0, 630.0, 582.0, 633.0, 587.0, 636.0, 354.0, 587.0, 636.0, 579.0, 636.0, 639.0, 587.0, 468.0, 636.0, 624.0, 630.0, 579.0, 630.0, 584.0, 633.0, 636.0, 636.0, 630.0, 639.0, 639.0, 639.0, 636.0, 639.0, 639.0, 633.0, 636.0, 630.0, 582.0, 636.0, 630.0, 633.0, 633.0, 582.0, 633.0, 582.0, 365.0, 627.0, 633.0, 639.0, 639.0, 630.0, 630.0, 633.0, 630.0, 579.0, 630.0, 576.0, 579.0, 633.0, 582.0, 633.0, 633.0, 636.0, 630.0, 587.0, 618.0, 633.0, 636.0, 627.0, 561.0, 639.0, 624.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 308.0, 319.0, 293.0, 289.0, 317.0, 319.0, 316.0, 311.0, 319.0, 314.0, 266.0, 252.0, 299.0, 288.0, 321.0, 312.0, 295.0, 287.0, 279.0, 303.0, 285.0, 297.0, 287.0, 271.0, 284.0, 277.0, 319.0, 314.0, 301.0, 281.0, 294.0, 285.0, 290.0, 289.0, 318.0, 315.0, 314.0, 325.0, 314.0, 319.0, 319.0, 311.0, 305.0, 319.0, 316.0, 317.0, 291.0, 288.0, 320.0, 316.0, 277.0, 287.0, 306.0, 321.0, 317.0, 313.0, 314.0, 325.0, 317.0, 316.0, 287.0, 297.0, 316.0, 314.0, 291.0, 291.0, 312.0, 321.0, 298.0, 289.0, 322.0, 314.0, 174.0, 180.0, 288.0, 299.0, 316.0, 320.0, 291.0, 288.0, 326.0, 310.0, 322.0, 317.0, 290.0, 297.0, 225.0, 243.0, 314.0, 322.0, 321.0, 303.0, 311.0, 319.0, 293.0, 286.0, 317.0, 313.0, 301.0, 283.0, 319.0, 314.0, 314.0, 322.0, 324.0, 312.0, 316.0, 314.0, 319.0, 320.0, 317.0, 322.0, 312.0, 327.0, 319.0, 317.0, 320.0, 319.0, 322.0, 317.0, 314.0, 319.0, 314.0, 322.0, 313.0, 317.0, 290.0, 292.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0, 314.0, 319.0, 286.0, 296.0, 313.0, 320.0, 288.0, 294.0, 186.0, 179.0, 308.0, 319.0, 310.0, 323.0, 317.0, 322.0, 319.0, 320.0, 314.0, 316.0, 319.0, 311.0, 314.0, 319.0, 311.0, 319.0, 290.0, 289.0, 311.0, 319.0, 302.0, 274.0, 285.0, 294.0, 319.0, 314.0, 283.0, 299.0, 314.0, 319.0, 316.0, 317.0, 319.0, 317.0, 311.0, 319.0, 286.0, 301.0, 313.0, 305.0, 317.0, 316.0, 325.0, 311.0, 308.0, 319.0, 287.0, 274.0, 322.0, 317.0, 300.0, 324.0, 296.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8999851577327678, "mean_processing_ms": 0.24829403649185813, "mean_inference_ms": 1.4872182879909173}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8040000, "num_steps_sampled": 4288000, "sample_time_ms": 21915.303, "load_time_ms": 37.24, "grad_time_ms": 9393.89, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0024422004353255033, "policy_loss": -0.005287020932883024, "vf_loss": 82.97665405273438, "vf_explained_var": 0.7636620402336121, "kl": 0.001807666034437716, "entropy": 1.136885643005371, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4288000, "episodes_total": 10720, "training_iteration": 335, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-36-58", "timestamp": 1660257418, "time_this_iter_s": 35.08472490310669, "time_total_s": 15829.814179897308, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15829.814179897308, "timesteps_since_restore": 4288000, "iterations_since_restore": 335, "perf": {"cpu_util_percent": 30.30408163265306, "ram_util_percent": 58.94081632653061}}
-{"episode_reward_max": 639.0, "episode_reward_min": 365.0, "episode_reward_mean": 608.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 179.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 304.165}, "custom_metrics": {"sparse_reward_mean": 211.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 185.93, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.14, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.68, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.45, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.81, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.36, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.45, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.45, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 465.0, 636.0, 633.0, 582.0, 579.0, 582.0, 633.0, 516.0, 582.0, 587.0, 630.0, 630.0, 633.0, 630.0, 624.0, 633.0, 630.0, 627.0, 630.0, 576.0, 582.0, 630.0, 639.0, 639.0, 633.0, 576.0, 633.0, 633.0, 576.0, 633.0, 633.0, 582.0, 636.0, 630.0, 633.0, 633.0, 582.0, 633.0, 582.0, 365.0, 627.0, 633.0, 639.0, 639.0, 630.0, 630.0, 633.0, 630.0, 579.0, 630.0, 576.0, 579.0, 633.0, 582.0, 633.0, 633.0, 636.0, 630.0, 587.0, 618.0, 633.0, 636.0, 627.0, 561.0, 639.0, 624.0, 587.0, 633.0, 627.0, 582.0, 636.0, 627.0, 633.0, 518.0, 587.0, 633.0, 582.0, 582.0, 582.0, 558.0, 561.0, 633.0, 582.0, 579.0, 579.0, 633.0, 639.0, 633.0, 630.0, 624.0, 633.0, 579.0, 636.0, 564.0, 627.0, 630.0, 639.0, 633.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [318.0, 312.0, 221.0, 244.0, 316.0, 320.0, 319.0, 314.0, 290.0, 292.0, 288.0, 291.0, 283.0, 299.0, 314.0, 319.0, 256.0, 260.0, 283.0, 299.0, 291.0, 296.0, 317.0, 313.0, 311.0, 319.0, 314.0, 319.0, 326.0, 304.0, 308.0, 316.0, 321.0, 312.0, 321.0, 309.0, 314.0, 313.0, 317.0, 313.0, 288.0, 288.0, 286.0, 296.0, 318.0, 312.0, 322.0, 317.0, 327.0, 312.0, 316.0, 317.0, 291.0, 285.0, 319.0, 314.0, 318.0, 315.0, 288.0, 288.0, 324.0, 309.0, 322.0, 311.0, 290.0, 292.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0, 314.0, 319.0, 286.0, 296.0, 313.0, 320.0, 288.0, 294.0, 186.0, 179.0, 308.0, 319.0, 310.0, 323.0, 317.0, 322.0, 319.0, 320.0, 314.0, 316.0, 319.0, 311.0, 314.0, 319.0, 311.0, 319.0, 290.0, 289.0, 311.0, 319.0, 302.0, 274.0, 285.0, 294.0, 319.0, 314.0, 283.0, 299.0, 314.0, 319.0, 316.0, 317.0, 319.0, 317.0, 311.0, 319.0, 286.0, 301.0, 313.0, 305.0, 317.0, 316.0, 325.0, 311.0, 308.0, 319.0, 287.0, 274.0, 322.0, 317.0, 300.0, 324.0, 296.0, 291.0, 316.0, 317.0, 308.0, 319.0, 293.0, 289.0, 317.0, 319.0, 316.0, 311.0, 319.0, 314.0, 266.0, 252.0, 299.0, 288.0, 321.0, 312.0, 295.0, 287.0, 279.0, 303.0, 285.0, 297.0, 287.0, 271.0, 284.0, 277.0, 319.0, 314.0, 301.0, 281.0, 294.0, 285.0, 290.0, 289.0, 318.0, 315.0, 314.0, 325.0, 314.0, 319.0, 319.0, 311.0, 305.0, 319.0, 316.0, 317.0, 291.0, 288.0, 320.0, 316.0, 277.0, 287.0, 306.0, 321.0, 317.0, 313.0, 314.0, 325.0, 317.0, 316.0, 287.0, 297.0]}, "sampler_perf": {"mean_env_wait_ms": 0.898665270354117, "mean_processing_ms": 0.24803277399210055, "mean_inference_ms": 1.4863570559836363}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8064000, "num_steps_sampled": 4300800, "sample_time_ms": 22027.266, "load_time_ms": 37.353, "grad_time_ms": 9612.106, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008330469136126339, "policy_loss": -0.006751233246177435, "vf_loss": 81.52507781982422, "vf_explained_var": 0.7658050656318665, "kl": 0.001944715972058475, "entropy": 1.1364573240280151, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4300800, "episodes_total": 10752, "training_iteration": 336, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-37-32", "timestamp": 1660257452, "time_this_iter_s": 34.64702320098877, "time_total_s": 15864.461203098297, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15864.461203098297, "timesteps_since_restore": 4300800, "iterations_since_restore": 336, "perf": {"cpu_util_percent": 29.189795918367345, "ram_util_percent": 58.936734693877554}}
-{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 608.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 221.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 304.205}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.41, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.94, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.46, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.76, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.31, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 636.0, 524.0, 630.0, 636.0, 636.0, 633.0, 639.0, 584.0, 636.0, 587.0, 636.0, 587.0, 639.0, 636.0, 587.0, 639.0, 627.0, 630.0, 636.0, 582.0, 590.0, 573.0, 636.0, 587.0, 633.0, 633.0, 578.0, 630.0, 573.0, 573.0, 624.0, 561.0, 639.0, 624.0, 587.0, 633.0, 627.0, 582.0, 636.0, 627.0, 633.0, 518.0, 587.0, 633.0, 582.0, 582.0, 582.0, 558.0, 561.0, 633.0, 582.0, 579.0, 579.0, 633.0, 639.0, 633.0, 630.0, 624.0, 633.0, 579.0, 636.0, 564.0, 627.0, 630.0, 639.0, 633.0, 584.0, 630.0, 465.0, 636.0, 633.0, 582.0, 579.0, 582.0, 633.0, 516.0, 582.0, 587.0, 630.0, 630.0, 633.0, 630.0, 624.0, 633.0, 630.0, 627.0, 630.0, 576.0, 582.0, 630.0, 639.0, 639.0, 633.0, 576.0, 633.0, 633.0, 576.0, 633.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [301.0, 286.0, 314.0, 322.0, 264.0, 260.0, 316.0, 314.0, 319.0, 317.0, 314.0, 322.0, 316.0, 317.0, 319.0, 320.0, 288.0, 296.0, 319.0, 317.0, 306.0, 281.0, 316.0, 320.0, 299.0, 288.0, 325.0, 314.0, 317.0, 319.0, 293.0, 294.0, 319.0, 320.0, 318.0, 309.0, 316.0, 314.0, 319.0, 317.0, 296.0, 286.0, 296.0, 294.0, 293.0, 280.0, 319.0, 317.0, 283.0, 304.0, 309.0, 324.0, 311.0, 322.0, 282.0, 296.0, 316.0, 314.0, 280.0, 293.0, 285.0, 288.0, 312.0, 312.0, 287.0, 274.0, 322.0, 317.0, 300.0, 324.0, 296.0, 291.0, 316.0, 317.0, 308.0, 319.0, 293.0, 289.0, 317.0, 319.0, 316.0, 311.0, 319.0, 314.0, 266.0, 252.0, 299.0, 288.0, 321.0, 312.0, 295.0, 287.0, 279.0, 303.0, 285.0, 297.0, 287.0, 271.0, 284.0, 277.0, 319.0, 314.0, 301.0, 281.0, 294.0, 285.0, 290.0, 289.0, 318.0, 315.0, 314.0, 325.0, 314.0, 319.0, 319.0, 311.0, 305.0, 319.0, 316.0, 317.0, 291.0, 288.0, 320.0, 316.0, 277.0, 287.0, 306.0, 321.0, 317.0, 313.0, 314.0, 325.0, 317.0, 316.0, 287.0, 297.0, 318.0, 312.0, 221.0, 244.0, 316.0, 320.0, 319.0, 314.0, 290.0, 292.0, 288.0, 291.0, 283.0, 299.0, 314.0, 319.0, 256.0, 260.0, 283.0, 299.0, 291.0, 296.0, 317.0, 313.0, 311.0, 319.0, 314.0, 319.0, 326.0, 304.0, 308.0, 316.0, 321.0, 312.0, 321.0, 309.0, 314.0, 313.0, 317.0, 313.0, 288.0, 288.0, 286.0, 296.0, 318.0, 312.0, 322.0, 317.0, 327.0, 312.0, 316.0, 317.0, 291.0, 285.0, 319.0, 314.0, 318.0, 315.0, 288.0, 288.0, 324.0, 309.0, 322.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8973618509446918, "mean_processing_ms": 0.2477754616951261, "mean_inference_ms": 1.4857111837613974}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8088000, "num_steps_sampled": 4313600, "sample_time_ms": 22572.125, "load_time_ms": 37.728, "grad_time_ms": 9903.43, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0030143249314278364, "policy_loss": -0.004624274093657732, "vf_loss": 82.12947845458984, "vf_explained_var": 0.7718231081962585, "kl": 0.0020513928029686213, "entropy": 1.1487096548080444, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4313600, "episodes_total": 10784, "training_iteration": 337, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-38-10", "timestamp": 1660257490, "time_this_iter_s": 37.458003759384155, "time_total_s": 15901.919206857681, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15901.919206857681, "timesteps_since_restore": 4313600, "iterations_since_restore": 337, "perf": {"cpu_util_percent": 29.675471698113206, "ram_util_percent": 58.94905660377358}}
-{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 610.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 221.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 305.345}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.49, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.77, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.44, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.34, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.93, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.81, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.6, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.81, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.81, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 630.0, 630.0, 636.0, 633.0, 582.0, 570.0, 633.0, 639.0, 582.0, 630.0, 630.0, 530.0, 633.0, 516.0, 633.0, 636.0, 627.0, 633.0, 587.0, 636.0, 633.0, 573.0, 579.0, 582.0, 633.0, 636.0, 627.0, 587.0, 582.0, 630.0, 630.0, 639.0, 633.0, 584.0, 630.0, 465.0, 636.0, 633.0, 582.0, 579.0, 582.0, 633.0, 516.0, 582.0, 587.0, 630.0, 630.0, 633.0, 630.0, 624.0, 633.0, 630.0, 627.0, 630.0, 576.0, 582.0, 630.0, 639.0, 639.0, 633.0, 576.0, 633.0, 633.0, 576.0, 633.0, 633.0, 587.0, 636.0, 524.0, 630.0, 636.0, 636.0, 633.0, 639.0, 584.0, 636.0, 587.0, 636.0, 587.0, 639.0, 636.0, 587.0, 639.0, 627.0, 630.0, 636.0, 582.0, 590.0, 573.0, 636.0, 587.0, 633.0, 633.0, 578.0, 630.0, 573.0, 573.0, 624.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 319.0, 311.0, 316.0, 314.0, 321.0, 309.0, 317.0, 319.0, 317.0, 316.0, 298.0, 284.0, 277.0, 293.0, 319.0, 314.0, 322.0, 317.0, 291.0, 291.0, 321.0, 309.0, 316.0, 314.0, 257.0, 273.0, 321.0, 312.0, 256.0, 260.0, 311.0, 322.0, 316.0, 320.0, 311.0, 316.0, 316.0, 317.0, 293.0, 294.0, 314.0, 322.0, 316.0, 317.0, 276.0, 297.0, 288.0, 291.0, 288.0, 294.0, 318.0, 315.0, 324.0, 312.0, 313.0, 314.0, 293.0, 294.0, 291.0, 291.0, 316.0, 314.0, 317.0, 313.0, 314.0, 325.0, 317.0, 316.0, 287.0, 297.0, 318.0, 312.0, 221.0, 244.0, 316.0, 320.0, 319.0, 314.0, 290.0, 292.0, 288.0, 291.0, 283.0, 299.0, 314.0, 319.0, 256.0, 260.0, 283.0, 299.0, 291.0, 296.0, 317.0, 313.0, 311.0, 319.0, 314.0, 319.0, 326.0, 304.0, 308.0, 316.0, 321.0, 312.0, 321.0, 309.0, 314.0, 313.0, 317.0, 313.0, 288.0, 288.0, 286.0, 296.0, 318.0, 312.0, 322.0, 317.0, 327.0, 312.0, 316.0, 317.0, 291.0, 285.0, 319.0, 314.0, 318.0, 315.0, 288.0, 288.0, 324.0, 309.0, 322.0, 311.0, 301.0, 286.0, 314.0, 322.0, 264.0, 260.0, 316.0, 314.0, 319.0, 317.0, 314.0, 322.0, 316.0, 317.0, 319.0, 320.0, 288.0, 296.0, 319.0, 317.0, 306.0, 281.0, 316.0, 320.0, 299.0, 288.0, 325.0, 314.0, 317.0, 319.0, 293.0, 294.0, 319.0, 320.0, 318.0, 309.0, 316.0, 314.0, 319.0, 317.0, 296.0, 286.0, 296.0, 294.0, 293.0, 280.0, 319.0, 317.0, 283.0, 304.0, 309.0, 324.0, 311.0, 322.0, 282.0, 296.0, 316.0, 314.0, 280.0, 293.0, 285.0, 288.0, 312.0, 312.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8960540337881886, "mean_processing_ms": 0.24751510966747803, "mean_inference_ms": 1.4847704330515064}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8112000, "num_steps_sampled": 4326400, "sample_time_ms": 22480.697, "load_time_ms": 37.651, "grad_time_ms": 10202.093, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001484702923335135, "policy_loss": -0.006018726620823145, "vf_loss": 80.70446014404297, "vf_explained_var": 0.7642549872398376, "kl": 0.0017236651619896293, "entropy": 1.1340447664260864, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4326400, "episodes_total": 10816, "training_iteration": 338, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-38-41", "timestamp": 1660257521, "time_this_iter_s": 31.244572162628174, "time_total_s": 15933.16377902031, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15933.16377902031, "timesteps_since_restore": 4326400, "iterations_since_restore": 338, "perf": {"cpu_util_percent": 27.328888888888894, "ram_util_percent": 59.27555555555555}}
-{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 612.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 306.0}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.4, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.62, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.03, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.16, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.95, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.34, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.49, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.74, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.75, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.16, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.95, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.16, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.95, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 636.0, 630.0, 633.0, 639.0, 630.0, 587.0, 636.0, 582.0, 636.0, 576.0, 584.0, 630.0, 639.0, 639.0, 633.0, 627.0, 582.0, 582.0, 630.0, 630.0, 633.0, 522.0, 587.0, 593.0, 633.0, 630.0, 582.0, 636.0, 630.0, 587.0, 636.0, 633.0, 576.0, 633.0, 633.0, 587.0, 636.0, 524.0, 630.0, 636.0, 636.0, 633.0, 639.0, 584.0, 636.0, 587.0, 636.0, 587.0, 639.0, 636.0, 587.0, 639.0, 627.0, 630.0, 636.0, 582.0, 590.0, 573.0, 636.0, 587.0, 633.0, 633.0, 578.0, 630.0, 573.0, 573.0, 624.0, 633.0, 630.0, 630.0, 630.0, 636.0, 633.0, 582.0, 570.0, 633.0, 639.0, 582.0, 630.0, 630.0, 530.0, 633.0, 516.0, 633.0, 636.0, 627.0, 633.0, 587.0, 636.0, 633.0, 573.0, 579.0, 582.0, 633.0, 636.0, 627.0, 587.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 299.0, 322.0, 314.0, 316.0, 314.0, 319.0, 314.0, 322.0, 317.0, 316.0, 314.0, 291.0, 296.0, 314.0, 322.0, 285.0, 297.0, 325.0, 311.0, 288.0, 288.0, 295.0, 289.0, 326.0, 304.0, 317.0, 322.0, 322.0, 317.0, 316.0, 317.0, 308.0, 319.0, 296.0, 286.0, 288.0, 294.0, 306.0, 324.0, 316.0, 314.0, 311.0, 322.0, 262.0, 260.0, 289.0, 298.0, 296.0, 297.0, 322.0, 311.0, 313.0, 317.0, 293.0, 289.0, 319.0, 317.0, 313.0, 317.0, 309.0, 278.0, 314.0, 322.0, 318.0, 315.0, 288.0, 288.0, 324.0, 309.0, 322.0, 311.0, 301.0, 286.0, 314.0, 322.0, 264.0, 260.0, 316.0, 314.0, 319.0, 317.0, 314.0, 322.0, 316.0, 317.0, 319.0, 320.0, 288.0, 296.0, 319.0, 317.0, 306.0, 281.0, 316.0, 320.0, 299.0, 288.0, 325.0, 314.0, 317.0, 319.0, 293.0, 294.0, 319.0, 320.0, 318.0, 309.0, 316.0, 314.0, 319.0, 317.0, 296.0, 286.0, 296.0, 294.0, 293.0, 280.0, 319.0, 317.0, 283.0, 304.0, 309.0, 324.0, 311.0, 322.0, 282.0, 296.0, 316.0, 314.0, 280.0, 293.0, 285.0, 288.0, 312.0, 312.0, 319.0, 314.0, 319.0, 311.0, 316.0, 314.0, 321.0, 309.0, 317.0, 319.0, 317.0, 316.0, 298.0, 284.0, 277.0, 293.0, 319.0, 314.0, 322.0, 317.0, 291.0, 291.0, 321.0, 309.0, 316.0, 314.0, 257.0, 273.0, 321.0, 312.0, 256.0, 260.0, 311.0, 322.0, 316.0, 320.0, 311.0, 316.0, 316.0, 317.0, 293.0, 294.0, 314.0, 322.0, 316.0, 317.0, 276.0, 297.0, 288.0, 291.0, 288.0, 294.0, 318.0, 315.0, 324.0, 312.0, 313.0, 314.0, 293.0, 294.0, 291.0, 291.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8947470159821566, "mean_processing_ms": 0.2472537044161699, "mean_inference_ms": 1.483669994240604}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8136000, "num_steps_sampled": 4339200, "sample_time_ms": 22385.99, "load_time_ms": 37.99, "grad_time_ms": 10282.952, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0002800325455609709, "policy_loss": -0.007263503968715668, "vf_loss": 81.1025161743164, "vf_explained_var": 0.7635498642921448, "kl": 0.0021122132893651724, "entropy": 1.1334240436553955, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4339200, "episodes_total": 10848, "training_iteration": 339, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-39-11", "timestamp": 1660257551, "time_this_iter_s": 30.060129165649414, "time_total_s": 15963.223908185959, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15963.223908185959, "timesteps_since_restore": 4339200, "iterations_since_restore": 339, "perf": {"cpu_util_percent": 32.38333333333334, "ram_util_percent": 58.776190476190465}}
-{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 611.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 305.805}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.01, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.45, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.7, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.11, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.07, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.02, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.4, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.51, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.75, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.07, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.02, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.07, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.02, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 590.0, 639.0, 636.0, 582.0, 624.0, 633.0, 587.0, 630.0, 582.0, 630.0, 630.0, 630.0, 636.0, 636.0, 579.0, 587.0, 587.0, 636.0, 584.0, 578.0, 630.0, 636.0, 630.0, 582.0, 579.0, 636.0, 627.0, 582.0, 633.0, 582.0, 630.0, 630.0, 573.0, 573.0, 624.0, 633.0, 630.0, 630.0, 630.0, 636.0, 633.0, 582.0, 570.0, 633.0, 639.0, 582.0, 630.0, 630.0, 530.0, 633.0, 516.0, 633.0, 636.0, 627.0, 633.0, 587.0, 636.0, 633.0, 573.0, 579.0, 582.0, 633.0, 636.0, 627.0, 587.0, 582.0, 630.0, 587.0, 636.0, 630.0, 633.0, 639.0, 630.0, 587.0, 636.0, 582.0, 636.0, 576.0, 584.0, 630.0, 639.0, 639.0, 633.0, 627.0, 582.0, 582.0, 630.0, 630.0, 633.0, 522.0, 587.0, 593.0, 633.0, 630.0, 582.0, 636.0, 630.0, 587.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [323.0, 307.0, 304.0, 286.0, 320.0, 319.0, 316.0, 320.0, 296.0, 286.0, 313.0, 311.0, 311.0, 322.0, 291.0, 296.0, 314.0, 316.0, 296.0, 286.0, 311.0, 319.0, 321.0, 309.0, 316.0, 314.0, 319.0, 317.0, 317.0, 319.0, 293.0, 286.0, 301.0, 286.0, 298.0, 289.0, 314.0, 322.0, 288.0, 296.0, 286.0, 292.0, 318.0, 312.0, 312.0, 324.0, 313.0, 317.0, 288.0, 294.0, 295.0, 284.0, 314.0, 322.0, 316.0, 311.0, 291.0, 291.0, 316.0, 317.0, 285.0, 297.0, 313.0, 317.0, 316.0, 314.0, 280.0, 293.0, 285.0, 288.0, 312.0, 312.0, 319.0, 314.0, 319.0, 311.0, 316.0, 314.0, 321.0, 309.0, 317.0, 319.0, 317.0, 316.0, 298.0, 284.0, 277.0, 293.0, 319.0, 314.0, 322.0, 317.0, 291.0, 291.0, 321.0, 309.0, 316.0, 314.0, 257.0, 273.0, 321.0, 312.0, 256.0, 260.0, 311.0, 322.0, 316.0, 320.0, 311.0, 316.0, 316.0, 317.0, 293.0, 294.0, 314.0, 322.0, 316.0, 317.0, 276.0, 297.0, 288.0, 291.0, 288.0, 294.0, 318.0, 315.0, 324.0, 312.0, 313.0, 314.0, 293.0, 294.0, 291.0, 291.0, 316.0, 314.0, 288.0, 299.0, 322.0, 314.0, 316.0, 314.0, 319.0, 314.0, 322.0, 317.0, 316.0, 314.0, 291.0, 296.0, 314.0, 322.0, 285.0, 297.0, 325.0, 311.0, 288.0, 288.0, 295.0, 289.0, 326.0, 304.0, 317.0, 322.0, 322.0, 317.0, 316.0, 317.0, 308.0, 319.0, 296.0, 286.0, 288.0, 294.0, 306.0, 324.0, 316.0, 314.0, 311.0, 322.0, 262.0, 260.0, 289.0, 298.0, 296.0, 297.0, 322.0, 311.0, 313.0, 317.0, 293.0, 289.0, 319.0, 317.0, 313.0, 317.0, 309.0, 278.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8934318351724546, "mean_processing_ms": 0.24699024834277958, "mean_inference_ms": 1.4821846503211202}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8160000, "num_steps_sampled": 4352000, "sample_time_ms": 22391.649, "load_time_ms": 38.471, "grad_time_ms": 10443.746, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004414581228047609, "policy_loss": -0.003194813383743167, "vf_loss": 81.79281616210938, "vf_explained_var": 0.764918863773346, "kl": 0.0018889306811615825, "entropy": 1.1397979259490967, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4352000, "episodes_total": 10880, "training_iteration": 340, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-39-42", "timestamp": 1660257582, "time_this_iter_s": 31.150686979293823, "time_total_s": 15994.374595165253, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15994.374595165253, "timesteps_since_restore": 4352000, "iterations_since_restore": 340, "perf": {"cpu_util_percent": 27.049999999999997, "ram_util_percent": 58.795454545454525}}
-{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 610.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 305.27}, "custom_metrics": {"sparse_reward_mean": 211.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.14, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.57, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.6, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.28, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.97, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.13, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.94, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.31, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.79, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.75, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.13, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.94, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.13, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.94, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 462.0, 636.0, 639.0, 587.0, 576.0, 639.0, 633.0, 633.0, 633.0, 633.0, 630.0, 579.0, 633.0, 636.0, 633.0, 636.0, 582.0, 573.0, 630.0, 582.0, 570.0, 587.0, 573.0, 544.0, 582.0, 630.0, 630.0, 582.0, 630.0, 633.0, 639.0, 627.0, 587.0, 582.0, 630.0, 587.0, 636.0, 630.0, 633.0, 639.0, 630.0, 587.0, 636.0, 582.0, 636.0, 576.0, 584.0, 630.0, 639.0, 639.0, 633.0, 627.0, 582.0, 582.0, 630.0, 630.0, 633.0, 522.0, 587.0, 593.0, 633.0, 630.0, 582.0, 636.0, 630.0, 587.0, 636.0, 630.0, 590.0, 639.0, 636.0, 582.0, 624.0, 633.0, 587.0, 630.0, 582.0, 630.0, 630.0, 630.0, 636.0, 636.0, 579.0, 587.0, 587.0, 636.0, 584.0, 578.0, 630.0, 636.0, 630.0, 582.0, 579.0, 636.0, 627.0, 582.0, 633.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 312.0, 228.0, 234.0, 314.0, 322.0, 327.0, 312.0, 293.0, 294.0, 290.0, 286.0, 320.0, 319.0, 322.0, 311.0, 324.0, 309.0, 314.0, 319.0, 317.0, 316.0, 308.0, 322.0, 287.0, 292.0, 314.0, 319.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 296.0, 286.0, 296.0, 277.0, 321.0, 309.0, 288.0, 294.0, 285.0, 285.0, 285.0, 302.0, 287.0, 286.0, 275.0, 269.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 301.0, 281.0, 315.0, 315.0, 318.0, 315.0, 316.0, 323.0, 313.0, 314.0, 293.0, 294.0, 291.0, 291.0, 316.0, 314.0, 288.0, 299.0, 322.0, 314.0, 316.0, 314.0, 319.0, 314.0, 322.0, 317.0, 316.0, 314.0, 291.0, 296.0, 314.0, 322.0, 285.0, 297.0, 325.0, 311.0, 288.0, 288.0, 295.0, 289.0, 326.0, 304.0, 317.0, 322.0, 322.0, 317.0, 316.0, 317.0, 308.0, 319.0, 296.0, 286.0, 288.0, 294.0, 306.0, 324.0, 316.0, 314.0, 311.0, 322.0, 262.0, 260.0, 289.0, 298.0, 296.0, 297.0, 322.0, 311.0, 313.0, 317.0, 293.0, 289.0, 319.0, 317.0, 313.0, 317.0, 309.0, 278.0, 314.0, 322.0, 323.0, 307.0, 304.0, 286.0, 320.0, 319.0, 316.0, 320.0, 296.0, 286.0, 313.0, 311.0, 311.0, 322.0, 291.0, 296.0, 314.0, 316.0, 296.0, 286.0, 311.0, 319.0, 321.0, 309.0, 316.0, 314.0, 319.0, 317.0, 317.0, 319.0, 293.0, 286.0, 301.0, 286.0, 298.0, 289.0, 314.0, 322.0, 288.0, 296.0, 286.0, 292.0, 318.0, 312.0, 312.0, 324.0, 313.0, 317.0, 288.0, 294.0, 295.0, 284.0, 314.0, 322.0, 316.0, 311.0, 291.0, 291.0, 316.0, 317.0, 285.0, 297.0, 313.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8921345690140595, "mean_processing_ms": 0.24673048959961144, "mean_inference_ms": 1.4810833291212553}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8184000, "num_steps_sampled": 4364800, "sample_time_ms": 22778.758, "load_time_ms": 38.164, "grad_time_ms": 10656.478, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005775378551334143, "policy_loss": -0.00215825904160738, "vf_loss": 85.0276870727539, "vf_explained_var": 0.7658646106719971, "kl": 0.0019542332738637924, "entropy": 1.1382619142532349, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4364800, "episodes_total": 10912, "training_iteration": 341, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-40-20", "timestamp": 1660257620, "time_this_iter_s": 37.338398933410645, "time_total_s": 16031.712994098663, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16031.712994098663, "timesteps_since_restore": 4364800, "iterations_since_restore": 341, "perf": {"cpu_util_percent": 27.592452830188673, "ram_util_percent": 58.783018867924525}}
-{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 610.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 305.22}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.64, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.65, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.62, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.96, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.93, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.58, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.93, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.93, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 573.0, 627.0, 630.0, 636.0, 636.0, 636.0, 582.0, 624.0, 576.0, 575.0, 633.0, 587.0, 630.0, 630.0, 636.0, 624.0, 633.0, 636.0, 590.0, 630.0, 584.0, 576.0, 582.0, 633.0, 633.0, 636.0, 582.0, 582.0, 639.0, 579.0, 564.0, 636.0, 630.0, 587.0, 636.0, 630.0, 590.0, 639.0, 636.0, 582.0, 624.0, 633.0, 587.0, 630.0, 582.0, 630.0, 630.0, 630.0, 636.0, 636.0, 579.0, 587.0, 587.0, 636.0, 584.0, 578.0, 630.0, 636.0, 630.0, 582.0, 579.0, 636.0, 627.0, 582.0, 633.0, 582.0, 630.0, 633.0, 462.0, 636.0, 639.0, 587.0, 576.0, 639.0, 633.0, 633.0, 633.0, 633.0, 630.0, 579.0, 633.0, 636.0, 633.0, 636.0, 582.0, 573.0, 630.0, 582.0, 570.0, 587.0, 573.0, 544.0, 582.0, 630.0, 630.0, 582.0, 630.0, 633.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 316.0, 288.0, 285.0, 305.0, 322.0, 319.0, 311.0, 314.0, 322.0, 319.0, 317.0, 317.0, 319.0, 287.0, 295.0, 308.0, 316.0, 283.0, 293.0, 290.0, 285.0, 322.0, 311.0, 285.0, 302.0, 318.0, 312.0, 314.0, 316.0, 314.0, 322.0, 311.0, 313.0, 311.0, 322.0, 314.0, 322.0, 285.0, 305.0, 316.0, 314.0, 301.0, 283.0, 295.0, 281.0, 290.0, 292.0, 314.0, 319.0, 311.0, 322.0, 317.0, 319.0, 291.0, 291.0, 286.0, 296.0, 322.0, 317.0, 299.0, 280.0, 281.0, 283.0, 319.0, 317.0, 313.0, 317.0, 309.0, 278.0, 314.0, 322.0, 323.0, 307.0, 304.0, 286.0, 320.0, 319.0, 316.0, 320.0, 296.0, 286.0, 313.0, 311.0, 311.0, 322.0, 291.0, 296.0, 314.0, 316.0, 296.0, 286.0, 311.0, 319.0, 321.0, 309.0, 316.0, 314.0, 319.0, 317.0, 317.0, 319.0, 293.0, 286.0, 301.0, 286.0, 298.0, 289.0, 314.0, 322.0, 288.0, 296.0, 286.0, 292.0, 318.0, 312.0, 312.0, 324.0, 313.0, 317.0, 288.0, 294.0, 295.0, 284.0, 314.0, 322.0, 316.0, 311.0, 291.0, 291.0, 316.0, 317.0, 285.0, 297.0, 313.0, 317.0, 321.0, 312.0, 228.0, 234.0, 314.0, 322.0, 327.0, 312.0, 293.0, 294.0, 290.0, 286.0, 320.0, 319.0, 322.0, 311.0, 324.0, 309.0, 314.0, 319.0, 317.0, 316.0, 308.0, 322.0, 287.0, 292.0, 314.0, 319.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 296.0, 286.0, 296.0, 277.0, 321.0, 309.0, 288.0, 294.0, 285.0, 285.0, 285.0, 302.0, 287.0, 286.0, 275.0, 269.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 301.0, 281.0, 315.0, 315.0, 318.0, 315.0, 316.0, 323.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8908455849362553, "mean_processing_ms": 0.24647214238020762, "mean_inference_ms": 1.4799101322874493}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8208000, "num_steps_sampled": 4377600, "sample_time_ms": 22385.575, "load_time_ms": 38.145, "grad_time_ms": 10547.897, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005002778489142656, "policy_loss": -0.002570929704234004, "vf_loss": 81.44794464111328, "vf_explained_var": 0.765848696231842, "kl": 0.002198006259277463, "entropy": 1.1421762704849243, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4377600, "episodes_total": 10944, "training_iteration": 342, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-40-48", "timestamp": 1660257648, "time_this_iter_s": 28.1991069316864, "time_total_s": 16059.91210103035, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16059.91210103035, "timesteps_since_restore": 4377600, "iterations_since_restore": 342, "perf": {"cpu_util_percent": 30.8525, "ram_util_percent": 58.825}}
-{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 608.72, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 304.36}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.12, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.7, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.5, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.47, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.92, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.33, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.77, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.25, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.57, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.37, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.77, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.77, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 633.0, 630.0, 582.0, 587.0, 582.0, 582.0, 630.0, 530.0, 633.0, 633.0, 639.0, 579.0, 627.0, 633.0, 636.0, 633.0, 587.0, 627.0, 627.0, 630.0, 630.0, 587.0, 582.0, 582.0, 582.0, 582.0, 596.0, 636.0, 633.0, 630.0, 576.0, 582.0, 633.0, 582.0, 630.0, 633.0, 462.0, 636.0, 639.0, 587.0, 576.0, 639.0, 633.0, 633.0, 633.0, 633.0, 630.0, 579.0, 633.0, 636.0, 633.0, 636.0, 582.0, 573.0, 630.0, 582.0, 570.0, 587.0, 573.0, 544.0, 582.0, 630.0, 630.0, 582.0, 630.0, 633.0, 639.0, 630.0, 573.0, 627.0, 630.0, 636.0, 636.0, 636.0, 582.0, 624.0, 576.0, 575.0, 633.0, 587.0, 630.0, 630.0, 636.0, 624.0, 633.0, 636.0, 590.0, 630.0, 584.0, 576.0, 582.0, 633.0, 633.0, 636.0, 582.0, 582.0, 639.0, 579.0, 564.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [308.0, 319.0, 324.0, 309.0, 311.0, 319.0, 296.0, 286.0, 296.0, 291.0, 288.0, 294.0, 293.0, 289.0, 321.0, 309.0, 259.0, 271.0, 319.0, 314.0, 329.0, 304.0, 314.0, 325.0, 283.0, 296.0, 308.0, 319.0, 314.0, 319.0, 319.0, 317.0, 317.0, 316.0, 288.0, 299.0, 308.0, 319.0, 311.0, 316.0, 316.0, 314.0, 308.0, 322.0, 296.0, 291.0, 288.0, 294.0, 296.0, 286.0, 286.0, 296.0, 282.0, 300.0, 301.0, 295.0, 321.0, 315.0, 319.0, 314.0, 316.0, 314.0, 296.0, 280.0, 291.0, 291.0, 316.0, 317.0, 285.0, 297.0, 313.0, 317.0, 321.0, 312.0, 228.0, 234.0, 314.0, 322.0, 327.0, 312.0, 293.0, 294.0, 290.0, 286.0, 320.0, 319.0, 322.0, 311.0, 324.0, 309.0, 314.0, 319.0, 317.0, 316.0, 308.0, 322.0, 287.0, 292.0, 314.0, 319.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 296.0, 286.0, 296.0, 277.0, 321.0, 309.0, 288.0, 294.0, 285.0, 285.0, 285.0, 302.0, 287.0, 286.0, 275.0, 269.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 301.0, 281.0, 315.0, 315.0, 318.0, 315.0, 316.0, 323.0, 314.0, 316.0, 288.0, 285.0, 305.0, 322.0, 319.0, 311.0, 314.0, 322.0, 319.0, 317.0, 317.0, 319.0, 287.0, 295.0, 308.0, 316.0, 283.0, 293.0, 290.0, 285.0, 322.0, 311.0, 285.0, 302.0, 318.0, 312.0, 314.0, 316.0, 314.0, 322.0, 311.0, 313.0, 311.0, 322.0, 314.0, 322.0, 285.0, 305.0, 316.0, 314.0, 301.0, 283.0, 295.0, 281.0, 290.0, 292.0, 314.0, 319.0, 311.0, 322.0, 317.0, 319.0, 291.0, 291.0, 286.0, 296.0, 322.0, 317.0, 299.0, 280.0, 281.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8895732427757973, "mean_processing_ms": 0.24621711936805896, "mean_inference_ms": 1.4789333750052633}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8232000, "num_steps_sampled": 4390400, "sample_time_ms": 22479.914, "load_time_ms": 38.369, "grad_time_ms": 10420.997, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0025722135324031115, "policy_loss": -0.00497409887611866, "vf_loss": 81.19109344482422, "vf_explained_var": 0.7659382820129395, "kl": 0.0019239649409428239, "entropy": 1.1455968618392944, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4390400, "episodes_total": 10976, "training_iteration": 343, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-41-18", "timestamp": 1660257678, "time_this_iter_s": 30.595246076583862, "time_total_s": 16090.507347106934, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16090.507347106934, "timesteps_since_restore": 4390400, "iterations_since_restore": 343, "perf": {"cpu_util_percent": 28.927906976744183, "ram_util_percent": 58.81860465116278}}
-{"episode_reward_max": 639.0, "episode_reward_min": 521.0, "episode_reward_mean": 607.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 259.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 303.955}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.11, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.38, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.68, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.19, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.04, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.9, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.0, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.34, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.9, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.0, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.9, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.0, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 582.0, 630.0, 587.0, 630.0, 587.0, 579.0, 581.0, 630.0, 587.0, 576.0, 521.0, 587.0, 639.0, 587.0, 630.0, 630.0, 639.0, 630.0, 630.0, 587.0, 582.0, 582.0, 630.0, 587.0, 639.0, 582.0, 582.0, 630.0, 630.0, 590.0, 630.0, 582.0, 630.0, 633.0, 639.0, 630.0, 573.0, 627.0, 630.0, 636.0, 636.0, 636.0, 582.0, 624.0, 576.0, 575.0, 633.0, 587.0, 630.0, 630.0, 636.0, 624.0, 633.0, 636.0, 590.0, 630.0, 584.0, 576.0, 582.0, 633.0, 633.0, 636.0, 582.0, 582.0, 639.0, 579.0, 564.0, 627.0, 633.0, 630.0, 582.0, 587.0, 582.0, 582.0, 630.0, 530.0, 633.0, 633.0, 639.0, 579.0, 627.0, 633.0, 636.0, 633.0, 587.0, 627.0, 627.0, 630.0, 630.0, 587.0, 582.0, 582.0, 582.0, 582.0, 596.0, 636.0, 633.0, 630.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 289.0, 290.0, 292.0, 321.0, 309.0, 288.0, 299.0, 318.0, 312.0, 288.0, 299.0, 287.0, 292.0, 284.0, 297.0, 323.0, 307.0, 288.0, 299.0, 287.0, 289.0, 260.0, 261.0, 296.0, 291.0, 317.0, 322.0, 291.0, 296.0, 316.0, 314.0, 313.0, 317.0, 322.0, 317.0, 311.0, 319.0, 319.0, 311.0, 285.0, 302.0, 299.0, 283.0, 291.0, 291.0, 306.0, 324.0, 296.0, 291.0, 324.0, 315.0, 293.0, 289.0, 298.0, 284.0, 311.0, 319.0, 316.0, 314.0, 296.0, 294.0, 318.0, 312.0, 301.0, 281.0, 315.0, 315.0, 318.0, 315.0, 316.0, 323.0, 314.0, 316.0, 288.0, 285.0, 305.0, 322.0, 319.0, 311.0, 314.0, 322.0, 319.0, 317.0, 317.0, 319.0, 287.0, 295.0, 308.0, 316.0, 283.0, 293.0, 290.0, 285.0, 322.0, 311.0, 285.0, 302.0, 318.0, 312.0, 314.0, 316.0, 314.0, 322.0, 311.0, 313.0, 311.0, 322.0, 314.0, 322.0, 285.0, 305.0, 316.0, 314.0, 301.0, 283.0, 295.0, 281.0, 290.0, 292.0, 314.0, 319.0, 311.0, 322.0, 317.0, 319.0, 291.0, 291.0, 286.0, 296.0, 322.0, 317.0, 299.0, 280.0, 281.0, 283.0, 308.0, 319.0, 324.0, 309.0, 311.0, 319.0, 296.0, 286.0, 296.0, 291.0, 288.0, 294.0, 293.0, 289.0, 321.0, 309.0, 259.0, 271.0, 319.0, 314.0, 329.0, 304.0, 314.0, 325.0, 283.0, 296.0, 308.0, 319.0, 314.0, 319.0, 319.0, 317.0, 317.0, 316.0, 288.0, 299.0, 308.0, 319.0, 311.0, 316.0, 316.0, 314.0, 308.0, 322.0, 296.0, 291.0, 288.0, 294.0, 296.0, 286.0, 286.0, 296.0, 282.0, 300.0, 301.0, 295.0, 321.0, 315.0, 319.0, 314.0, 316.0, 314.0, 296.0, 280.0]}, "sampler_perf": {"mean_env_wait_ms": 0.888300023019413, "mean_processing_ms": 0.24596248593117787, "mean_inference_ms": 1.4777042667426168}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8256000, "num_steps_sampled": 4403200, "sample_time_ms": 22324.862, "load_time_ms": 38.863, "grad_time_ms": 10570.078, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001806688029319048, "policy_loss": -0.005986546631902456, "vf_loss": 83.65050506591797, "vf_explained_var": 0.7647177577018738, "kl": 0.002452569780871272, "entropy": 1.1436399221420288, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4403200, "episodes_total": 11008, "training_iteration": 344, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-41-52", "timestamp": 1660257712, "time_this_iter_s": 33.82224774360657, "time_total_s": 16124.32959485054, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16124.32959485054, "timesteps_since_restore": 4403200, "iterations_since_restore": 344, "perf": {"cpu_util_percent": 29.40625, "ram_util_percent": 58.83958333333334}}
-{"episode_reward_max": 639.0, "episode_reward_min": 521.0, "episode_reward_mean": 603.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 255.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 301.93}, "custom_metrics": {"sparse_reward_mean": 208.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.26, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.41, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.77, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.95, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.74, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.28, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 4, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.95, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.74, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.95, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.74, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 627.0, 633.0, 636.0, 582.0, 633.0, 535.0, 621.0, 579.0, 624.0, 573.0, 579.0, 633.0, 587.0, 636.0, 579.0, 576.0, 636.0, 630.0, 587.0, 522.0, 582.0, 579.0, 627.0, 639.0, 627.0, 627.0, 541.0, 639.0, 639.0, 587.0, 582.0, 582.0, 639.0, 579.0, 564.0, 627.0, 633.0, 630.0, 582.0, 587.0, 582.0, 582.0, 630.0, 530.0, 633.0, 633.0, 639.0, 579.0, 627.0, 633.0, 636.0, 633.0, 587.0, 627.0, 627.0, 630.0, 630.0, 587.0, 582.0, 582.0, 582.0, 582.0, 596.0, 636.0, 633.0, 630.0, 576.0, 567.0, 582.0, 630.0, 587.0, 630.0, 587.0, 579.0, 581.0, 630.0, 587.0, 576.0, 521.0, 587.0, 639.0, 587.0, 630.0, 630.0, 639.0, 630.0, 630.0, 587.0, 582.0, 582.0, 630.0, 587.0, 639.0, 582.0, 582.0, 630.0, 630.0, 590.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 309.0, 318.0, 316.0, 317.0, 319.0, 317.0, 294.0, 288.0, 323.0, 310.0, 273.0, 262.0, 310.0, 311.0, 285.0, 294.0, 322.0, 302.0, 292.0, 281.0, 283.0, 296.0, 317.0, 316.0, 288.0, 299.0, 316.0, 320.0, 288.0, 291.0, 280.0, 296.0, 319.0, 317.0, 314.0, 316.0, 283.0, 304.0, 267.0, 255.0, 289.0, 293.0, 294.0, 285.0, 313.0, 314.0, 319.0, 320.0, 321.0, 306.0, 313.0, 314.0, 271.0, 270.0, 314.0, 325.0, 324.0, 315.0, 297.0, 290.0, 299.0, 283.0, 286.0, 296.0, 322.0, 317.0, 299.0, 280.0, 281.0, 283.0, 308.0, 319.0, 324.0, 309.0, 311.0, 319.0, 296.0, 286.0, 296.0, 291.0, 288.0, 294.0, 293.0, 289.0, 321.0, 309.0, 259.0, 271.0, 319.0, 314.0, 329.0, 304.0, 314.0, 325.0, 283.0, 296.0, 308.0, 319.0, 314.0, 319.0, 319.0, 317.0, 317.0, 316.0, 288.0, 299.0, 308.0, 319.0, 311.0, 316.0, 316.0, 314.0, 308.0, 322.0, 296.0, 291.0, 288.0, 294.0, 296.0, 286.0, 286.0, 296.0, 282.0, 300.0, 301.0, 295.0, 321.0, 315.0, 319.0, 314.0, 316.0, 314.0, 296.0, 280.0, 278.0, 289.0, 290.0, 292.0, 321.0, 309.0, 288.0, 299.0, 318.0, 312.0, 288.0, 299.0, 287.0, 292.0, 284.0, 297.0, 323.0, 307.0, 288.0, 299.0, 287.0, 289.0, 260.0, 261.0, 296.0, 291.0, 317.0, 322.0, 291.0, 296.0, 316.0, 314.0, 313.0, 317.0, 322.0, 317.0, 311.0, 319.0, 319.0, 311.0, 285.0, 302.0, 299.0, 283.0, 291.0, 291.0, 306.0, 324.0, 296.0, 291.0, 324.0, 315.0, 293.0, 289.0, 298.0, 284.0, 311.0, 319.0, 316.0, 314.0, 296.0, 294.0, 318.0, 312.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8870510518721875, "mean_processing_ms": 0.24571562902687222, "mean_inference_ms": 1.477060351190489}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8280000, "num_steps_sampled": 4416000, "sample_time_ms": 22762.706, "load_time_ms": 38.824, "grad_time_ms": 10571.457, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005129000172019005, "policy_loss": -0.0031147233676165342, "vf_loss": 88.14037322998047, "vf_explained_var": 0.763336181640625, "kl": 0.0019052595598623157, "entropy": 1.1406329870224, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4416000, "episodes_total": 11040, "training_iteration": 345, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-42-32", "timestamp": 1660257752, "time_this_iter_s": 39.47760009765625, "time_total_s": 16163.807194948196, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16163.807194948196, "timesteps_since_restore": 4416000, "iterations_since_restore": 345, "perf": {"cpu_util_percent": 25.21272727272727, "ram_util_percent": 58.801818181818156}}
-{"episode_reward_max": 639.0, "episode_reward_min": 521.0, "episode_reward_mean": 602.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 255.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 301.185}, "custom_metrics": {"sparse_reward_mean": 208.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 185.57, "shaped_reward_min": 155, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.45, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.19, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.95, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.4, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.94, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.95, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.95, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 587.0, 569.0, 555.0, 570.0, 639.0, 522.0, 633.0, 579.0, 639.0, 582.0, 627.0, 639.0, 579.0, 639.0, 630.0, 636.0, 639.0, 630.0, 627.0, 579.0, 579.0, 630.0, 573.0, 570.0, 579.0, 630.0, 581.0, 639.0, 587.0, 587.0, 541.0, 636.0, 633.0, 630.0, 576.0, 567.0, 582.0, 630.0, 587.0, 630.0, 587.0, 579.0, 581.0, 630.0, 587.0, 576.0, 521.0, 587.0, 639.0, 587.0, 630.0, 630.0, 639.0, 630.0, 630.0, 587.0, 582.0, 582.0, 630.0, 587.0, 639.0, 582.0, 582.0, 630.0, 630.0, 590.0, 630.0, 582.0, 627.0, 633.0, 636.0, 582.0, 633.0, 535.0, 621.0, 579.0, 624.0, 573.0, 579.0, 633.0, 587.0, 636.0, 579.0, 576.0, 636.0, 630.0, 587.0, 522.0, 582.0, 579.0, 627.0, 639.0, 627.0, 627.0, 541.0, 639.0, 639.0, 587.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [315.0, 312.0, 299.0, 288.0, 285.0, 284.0, 279.0, 276.0, 287.0, 283.0, 322.0, 317.0, 257.0, 265.0, 319.0, 314.0, 285.0, 294.0, 319.0, 320.0, 291.0, 291.0, 316.0, 311.0, 320.0, 319.0, 298.0, 281.0, 317.0, 322.0, 308.0, 322.0, 319.0, 317.0, 319.0, 320.0, 326.0, 304.0, 318.0, 309.0, 293.0, 286.0, 285.0, 294.0, 321.0, 309.0, 294.0, 279.0, 291.0, 279.0, 283.0, 296.0, 314.0, 316.0, 294.0, 287.0, 322.0, 317.0, 292.0, 295.0, 294.0, 293.0, 260.0, 281.0, 321.0, 315.0, 319.0, 314.0, 316.0, 314.0, 296.0, 280.0, 278.0, 289.0, 290.0, 292.0, 321.0, 309.0, 288.0, 299.0, 318.0, 312.0, 288.0, 299.0, 287.0, 292.0, 284.0, 297.0, 323.0, 307.0, 288.0, 299.0, 287.0, 289.0, 260.0, 261.0, 296.0, 291.0, 317.0, 322.0, 291.0, 296.0, 316.0, 314.0, 313.0, 317.0, 322.0, 317.0, 311.0, 319.0, 319.0, 311.0, 285.0, 302.0, 299.0, 283.0, 291.0, 291.0, 306.0, 324.0, 296.0, 291.0, 324.0, 315.0, 293.0, 289.0, 298.0, 284.0, 311.0, 319.0, 316.0, 314.0, 296.0, 294.0, 318.0, 312.0, 289.0, 293.0, 309.0, 318.0, 316.0, 317.0, 319.0, 317.0, 294.0, 288.0, 323.0, 310.0, 273.0, 262.0, 310.0, 311.0, 285.0, 294.0, 322.0, 302.0, 292.0, 281.0, 283.0, 296.0, 317.0, 316.0, 288.0, 299.0, 316.0, 320.0, 288.0, 291.0, 280.0, 296.0, 319.0, 317.0, 314.0, 316.0, 283.0, 304.0, 267.0, 255.0, 289.0, 293.0, 294.0, 285.0, 313.0, 314.0, 319.0, 320.0, 321.0, 306.0, 313.0, 314.0, 271.0, 270.0, 314.0, 325.0, 324.0, 315.0, 297.0, 290.0, 299.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 0.885814344606481, "mean_processing_ms": 0.24547265426163356, "mean_inference_ms": 1.476598213105171}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8304000, "num_steps_sampled": 4428800, "sample_time_ms": 22961.91, "load_time_ms": 38.412, "grad_time_ms": 10610.867, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004017222672700882, "policy_loss": -0.007523353677242994, "vf_loss": 84.93380737304688, "vf_explained_var": 0.7618634104728699, "kl": 0.0019265868468210101, "entropy": 1.1366103887557983, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4428800, "episodes_total": 11072, "training_iteration": 346, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-43-09", "timestamp": 1660257789, "time_this_iter_s": 37.03333592414856, "time_total_s": 16200.840530872345, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16200.840530872345, "timesteps_since_restore": 4428800, "iterations_since_restore": 346, "perf": {"cpu_util_percent": 30.683018867924527, "ram_util_percent": 58.82264150943394}}
-{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 600.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 300.28}, "custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.96, "shaped_reward_min": 155, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.59, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.88, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.34, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.39, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.9, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.02, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.64, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.39, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.39, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 587.0, 630.0, 579.0, 582.0, 519.0, 587.0, 630.0, 582.0, 582.0, 582.0, 630.0, 582.0, 570.0, 587.0, 636.0, 630.0, 587.0, 579.0, 530.0, 587.0, 630.0, 630.0, 582.0, 527.0, 582.0, 630.0, 627.0, 576.0, 636.0, 636.0, 630.0, 630.0, 630.0, 590.0, 630.0, 582.0, 627.0, 633.0, 636.0, 582.0, 633.0, 535.0, 621.0, 579.0, 624.0, 573.0, 579.0, 633.0, 587.0, 636.0, 579.0, 576.0, 636.0, 630.0, 587.0, 522.0, 582.0, 579.0, 627.0, 639.0, 627.0, 627.0, 541.0, 639.0, 639.0, 587.0, 582.0, 627.0, 587.0, 569.0, 555.0, 570.0, 639.0, 522.0, 633.0, 579.0, 639.0, 582.0, 627.0, 639.0, 579.0, 639.0, 630.0, 636.0, 639.0, 630.0, 627.0, 579.0, 579.0, 630.0, 573.0, 570.0, 579.0, 630.0, 581.0, 639.0, 587.0, 587.0, 541.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [312.0, 318.0, 287.0, 300.0, 316.0, 314.0, 283.0, 296.0, 285.0, 297.0, 270.0, 249.0, 287.0, 300.0, 319.0, 311.0, 285.0, 297.0, 291.0, 291.0, 288.0, 294.0, 311.0, 319.0, 291.0, 291.0, 295.0, 275.0, 285.0, 302.0, 309.0, 327.0, 311.0, 319.0, 296.0, 291.0, 288.0, 291.0, 262.0, 268.0, 293.0, 294.0, 316.0, 314.0, 311.0, 319.0, 286.0, 296.0, 265.0, 262.0, 287.0, 295.0, 316.0, 314.0, 321.0, 306.0, 287.0, 289.0, 316.0, 320.0, 322.0, 314.0, 313.0, 317.0, 311.0, 319.0, 316.0, 314.0, 296.0, 294.0, 318.0, 312.0, 289.0, 293.0, 309.0, 318.0, 316.0, 317.0, 319.0, 317.0, 294.0, 288.0, 323.0, 310.0, 273.0, 262.0, 310.0, 311.0, 285.0, 294.0, 322.0, 302.0, 292.0, 281.0, 283.0, 296.0, 317.0, 316.0, 288.0, 299.0, 316.0, 320.0, 288.0, 291.0, 280.0, 296.0, 319.0, 317.0, 314.0, 316.0, 283.0, 304.0, 267.0, 255.0, 289.0, 293.0, 294.0, 285.0, 313.0, 314.0, 319.0, 320.0, 321.0, 306.0, 313.0, 314.0, 271.0, 270.0, 314.0, 325.0, 324.0, 315.0, 297.0, 290.0, 299.0, 283.0, 315.0, 312.0, 299.0, 288.0, 285.0, 284.0, 279.0, 276.0, 287.0, 283.0, 322.0, 317.0, 257.0, 265.0, 319.0, 314.0, 285.0, 294.0, 319.0, 320.0, 291.0, 291.0, 316.0, 311.0, 320.0, 319.0, 298.0, 281.0, 317.0, 322.0, 308.0, 322.0, 319.0, 317.0, 319.0, 320.0, 326.0, 304.0, 318.0, 309.0, 293.0, 286.0, 285.0, 294.0, 321.0, 309.0, 294.0, 279.0, 291.0, 279.0, 283.0, 296.0, 314.0, 316.0, 294.0, 287.0, 322.0, 317.0, 292.0, 295.0, 294.0, 293.0, 260.0, 281.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8845895698087, "mean_processing_ms": 0.24523366351665968, "mean_inference_ms": 1.4761306525958986}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8328000, "num_steps_sampled": 4441600, "sample_time_ms": 22533.553, "load_time_ms": 38.187, "grad_time_ms": 10390.855, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0007659171824343503, "policy_loss": -0.008885729126632214, "vf_loss": 86.87432861328125, "vf_explained_var": 0.7518642544746399, "kl": 0.0019218157976865768, "entropy": 1.1352366209030151, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4441600, "episodes_total": 11104, "training_iteration": 347, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-43-40", "timestamp": 1660257820, "time_this_iter_s": 30.97221803665161, "time_total_s": 16231.812748908997, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16231.812748908997, "timesteps_since_restore": 4441600, "iterations_since_restore": 347, "perf": {"cpu_util_percent": 33.49999999999999, "ram_util_percent": 59.33636363636363}}
-{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 602.52, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 301.26}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 185.32, "shaped_reward_min": 155, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.59, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.02, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.5, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.04, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.48, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.93, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.04, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.48, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.04, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.48, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 633.0, 627.0, 633.0, 627.0, 576.0, 590.0, 633.0, 582.0, 633.0, 573.0, 576.0, 633.0, 582.0, 633.0, 630.0, 630.0, 587.0, 630.0, 633.0, 630.0, 582.0, 627.0, 582.0, 627.0, 636.0, 587.0, 633.0, 573.0, 633.0, 576.0, 639.0, 639.0, 587.0, 582.0, 627.0, 587.0, 569.0, 555.0, 570.0, 639.0, 522.0, 633.0, 579.0, 639.0, 582.0, 627.0, 639.0, 579.0, 639.0, 630.0, 636.0, 639.0, 630.0, 627.0, 579.0, 579.0, 630.0, 573.0, 570.0, 579.0, 630.0, 581.0, 639.0, 587.0, 587.0, 541.0, 630.0, 587.0, 630.0, 579.0, 582.0, 519.0, 587.0, 630.0, 582.0, 582.0, 582.0, 630.0, 582.0, 570.0, 587.0, 636.0, 630.0, 587.0, 579.0, 530.0, 587.0, 630.0, 630.0, 582.0, 527.0, 582.0, 630.0, 627.0, 576.0, 636.0, 636.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 286.0, 293.0, 316.0, 317.0, 308.0, 319.0, 316.0, 317.0, 308.0, 319.0, 299.0, 277.0, 288.0, 302.0, 324.0, 309.0, 291.0, 291.0, 319.0, 314.0, 291.0, 282.0, 290.0, 286.0, 322.0, 311.0, 293.0, 289.0, 321.0, 312.0, 316.0, 314.0, 311.0, 319.0, 294.0, 293.0, 316.0, 314.0, 311.0, 322.0, 322.0, 308.0, 288.0, 294.0, 313.0, 314.0, 297.0, 285.0, 313.0, 314.0, 319.0, 317.0, 290.0, 297.0, 316.0, 317.0, 285.0, 288.0, 316.0, 317.0, 285.0, 291.0, 314.0, 325.0, 324.0, 315.0, 297.0, 290.0, 299.0, 283.0, 315.0, 312.0, 299.0, 288.0, 285.0, 284.0, 279.0, 276.0, 287.0, 283.0, 322.0, 317.0, 257.0, 265.0, 319.0, 314.0, 285.0, 294.0, 319.0, 320.0, 291.0, 291.0, 316.0, 311.0, 320.0, 319.0, 298.0, 281.0, 317.0, 322.0, 308.0, 322.0, 319.0, 317.0, 319.0, 320.0, 326.0, 304.0, 318.0, 309.0, 293.0, 286.0, 285.0, 294.0, 321.0, 309.0, 294.0, 279.0, 291.0, 279.0, 283.0, 296.0, 314.0, 316.0, 294.0, 287.0, 322.0, 317.0, 292.0, 295.0, 294.0, 293.0, 260.0, 281.0, 312.0, 318.0, 287.0, 300.0, 316.0, 314.0, 283.0, 296.0, 285.0, 297.0, 270.0, 249.0, 287.0, 300.0, 319.0, 311.0, 285.0, 297.0, 291.0, 291.0, 288.0, 294.0, 311.0, 319.0, 291.0, 291.0, 295.0, 275.0, 285.0, 302.0, 309.0, 327.0, 311.0, 319.0, 296.0, 291.0, 288.0, 291.0, 262.0, 268.0, 293.0, 294.0, 316.0, 314.0, 311.0, 319.0, 286.0, 296.0, 265.0, 262.0, 287.0, 295.0, 316.0, 314.0, 321.0, 306.0, 287.0, 289.0, 316.0, 320.0, 322.0, 314.0, 313.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8833591807666914, "mean_processing_ms": 0.24499142247523886, "mean_inference_ms": 1.4752658321565872}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8352000, "num_steps_sampled": 4454400, "sample_time_ms": 22743.076, "load_time_ms": 38.058, "grad_time_ms": 10274.124, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003061985597014427, "policy_loss": -0.004540739115327597, "vf_loss": 81.69985961914062, "vf_explained_var": 0.7570112347602844, "kl": 0.0020776980090886354, "entropy": 1.1345181465148926, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4454400, "episodes_total": 11136, "training_iteration": 348, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-44-12", "timestamp": 1660257852, "time_this_iter_s": 32.17093515396118, "time_total_s": 16263.983684062958, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16263.983684062958, "timesteps_since_restore": 4454400, "iterations_since_restore": 348, "perf": {"cpu_util_percent": 33.459999999999994, "ram_util_percent": 58.76888888888889}}
-{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 601.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 300.855}, "custom_metrics": {"sparse_reward_mean": 208.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 185.71, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.56, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.99, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.46, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.73, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 4, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.51, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.46, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.46, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 639.0, 636.0, 579.0, 587.0, 587.0, 581.0, 639.0, 576.0, 630.0, 630.0, 582.0, 582.0, 584.0, 630.0, 636.0, 582.0, 576.0, 630.0, 590.0, 582.0, 582.0, 587.0, 582.0, 627.0, 582.0, 582.0, 527.0, 630.0, 639.0, 630.0, 582.0, 639.0, 587.0, 587.0, 541.0, 630.0, 587.0, 630.0, 579.0, 582.0, 519.0, 587.0, 630.0, 582.0, 582.0, 582.0, 630.0, 582.0, 570.0, 587.0, 636.0, 630.0, 587.0, 579.0, 530.0, 587.0, 630.0, 630.0, 582.0, 527.0, 582.0, 630.0, 627.0, 576.0, 636.0, 636.0, 630.0, 582.0, 579.0, 633.0, 627.0, 633.0, 627.0, 576.0, 590.0, 633.0, 582.0, 633.0, 573.0, 576.0, 633.0, 582.0, 633.0, 630.0, 630.0, 587.0, 630.0, 633.0, 630.0, 582.0, 627.0, 582.0, 627.0, 636.0, 587.0, 633.0, 573.0, 633.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 313.0, 322.0, 317.0, 317.0, 319.0, 291.0, 288.0, 291.0, 296.0, 299.0, 288.0, 296.0, 285.0, 317.0, 322.0, 291.0, 285.0, 319.0, 311.0, 311.0, 319.0, 283.0, 299.0, 293.0, 289.0, 288.0, 296.0, 314.0, 316.0, 319.0, 317.0, 291.0, 291.0, 285.0, 291.0, 310.0, 320.0, 291.0, 299.0, 290.0, 292.0, 283.0, 299.0, 293.0, 294.0, 298.0, 284.0, 320.0, 307.0, 291.0, 291.0, 288.0, 294.0, 256.0, 271.0, 319.0, 311.0, 315.0, 324.0, 316.0, 314.0, 291.0, 291.0, 322.0, 317.0, 292.0, 295.0, 294.0, 293.0, 260.0, 281.0, 312.0, 318.0, 287.0, 300.0, 316.0, 314.0, 283.0, 296.0, 285.0, 297.0, 270.0, 249.0, 287.0, 300.0, 319.0, 311.0, 285.0, 297.0, 291.0, 291.0, 288.0, 294.0, 311.0, 319.0, 291.0, 291.0, 295.0, 275.0, 285.0, 302.0, 309.0, 327.0, 311.0, 319.0, 296.0, 291.0, 288.0, 291.0, 262.0, 268.0, 293.0, 294.0, 316.0, 314.0, 311.0, 319.0, 286.0, 296.0, 265.0, 262.0, 287.0, 295.0, 316.0, 314.0, 321.0, 306.0, 287.0, 289.0, 316.0, 320.0, 322.0, 314.0, 313.0, 317.0, 294.0, 288.0, 286.0, 293.0, 316.0, 317.0, 308.0, 319.0, 316.0, 317.0, 308.0, 319.0, 299.0, 277.0, 288.0, 302.0, 324.0, 309.0, 291.0, 291.0, 319.0, 314.0, 291.0, 282.0, 290.0, 286.0, 322.0, 311.0, 293.0, 289.0, 321.0, 312.0, 316.0, 314.0, 311.0, 319.0, 294.0, 293.0, 316.0, 314.0, 311.0, 322.0, 322.0, 308.0, 288.0, 294.0, 313.0, 314.0, 297.0, 285.0, 313.0, 314.0, 319.0, 317.0, 290.0, 297.0, 316.0, 317.0, 285.0, 288.0, 316.0, 317.0, 285.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8821320498213592, "mean_processing_ms": 0.24474905703796943, "mean_inference_ms": 1.4742298243357532}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8376000, "num_steps_sampled": 4467200, "sample_time_ms": 22958.396, "load_time_ms": 37.77, "grad_time_ms": 10335.342, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004589398857206106, "policy_loss": -0.003508263034746051, "vf_loss": 86.6531982421875, "vf_explained_var": 0.7629675269126892, "kl": 0.0021643172949552536, "entropy": 1.1353298425674438, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4467200, "episodes_total": 11168, "training_iteration": 349, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-44-45", "timestamp": 1660257885, "time_this_iter_s": 32.81572699546814, "time_total_s": 16296.799411058426, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16296.799411058426, "timesteps_since_restore": 4467200, "iterations_since_restore": 349, "perf": {"cpu_util_percent": 33.702173913043474, "ram_util_percent": 58.7978260869565}}
-{"episode_reward_max": 639.0, "episode_reward_min": 527.0, "episode_reward_mean": 611.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 305.705}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.81, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.28, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.54, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.79, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.23, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.78, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.61, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.7, "useful_dish_pickup_agent_0_min": 4, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.23, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.78, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.23, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.78, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 627.0, 633.0, 579.0, 633.0, 633.0, 636.0, 630.0, 627.0, 633.0, 630.0, 630.0, 630.0, 630.0, 636.0, 630.0, 587.0, 627.0, 633.0, 630.0, 587.0, 633.0, 581.0, 630.0, 633.0, 593.0, 579.0, 636.0, 633.0, 633.0, 636.0, 636.0, 576.0, 636.0, 636.0, 630.0, 582.0, 579.0, 633.0, 627.0, 633.0, 627.0, 576.0, 590.0, 633.0, 582.0, 633.0, 573.0, 576.0, 633.0, 582.0, 633.0, 630.0, 630.0, 587.0, 630.0, 633.0, 630.0, 582.0, 627.0, 582.0, 627.0, 636.0, 587.0, 633.0, 573.0, 633.0, 576.0, 627.0, 639.0, 636.0, 579.0, 587.0, 587.0, 581.0, 639.0, 576.0, 630.0, 630.0, 582.0, 582.0, 584.0, 630.0, 636.0, 582.0, 576.0, 630.0, 590.0, 582.0, 582.0, 587.0, 582.0, 627.0, 582.0, 582.0, 527.0, 630.0, 639.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 320.0, 313.0, 314.0, 319.0, 314.0, 280.0, 299.0, 317.0, 316.0, 322.0, 311.0, 321.0, 315.0, 316.0, 314.0, 318.0, 309.0, 319.0, 314.0, 321.0, 309.0, 319.0, 311.0, 314.0, 316.0, 318.0, 312.0, 314.0, 322.0, 316.0, 314.0, 299.0, 288.0, 308.0, 319.0, 316.0, 317.0, 306.0, 324.0, 296.0, 291.0, 314.0, 319.0, 295.0, 286.0, 308.0, 322.0, 319.0, 314.0, 296.0, 297.0, 297.0, 282.0, 311.0, 325.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 314.0, 322.0, 287.0, 289.0, 316.0, 320.0, 322.0, 314.0, 313.0, 317.0, 294.0, 288.0, 286.0, 293.0, 316.0, 317.0, 308.0, 319.0, 316.0, 317.0, 308.0, 319.0, 299.0, 277.0, 288.0, 302.0, 324.0, 309.0, 291.0, 291.0, 319.0, 314.0, 291.0, 282.0, 290.0, 286.0, 322.0, 311.0, 293.0, 289.0, 321.0, 312.0, 316.0, 314.0, 311.0, 319.0, 294.0, 293.0, 316.0, 314.0, 311.0, 322.0, 322.0, 308.0, 288.0, 294.0, 313.0, 314.0, 297.0, 285.0, 313.0, 314.0, 319.0, 317.0, 290.0, 297.0, 316.0, 317.0, 285.0, 288.0, 316.0, 317.0, 285.0, 291.0, 314.0, 313.0, 322.0, 317.0, 317.0, 319.0, 291.0, 288.0, 291.0, 296.0, 299.0, 288.0, 296.0, 285.0, 317.0, 322.0, 291.0, 285.0, 319.0, 311.0, 311.0, 319.0, 283.0, 299.0, 293.0, 289.0, 288.0, 296.0, 314.0, 316.0, 319.0, 317.0, 291.0, 291.0, 285.0, 291.0, 310.0, 320.0, 291.0, 299.0, 290.0, 292.0, 283.0, 299.0, 293.0, 294.0, 298.0, 284.0, 320.0, 307.0, 291.0, 291.0, 288.0, 294.0, 256.0, 271.0, 319.0, 311.0, 315.0, 324.0, 316.0, 314.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8809197598148512, "mean_processing_ms": 0.2445094292995116, "mean_inference_ms": 1.4734182783968888}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8400000, "num_steps_sampled": 4480000, "sample_time_ms": 23484.152, "load_time_ms": 37.181, "grad_time_ms": 10284.073, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0043442933820188046, "policy_loss": -0.002717310329899192, "vf_loss": 76.19442749023438, "vf_explained_var": 0.7705621719360352, "kl": 0.0019369550282135606, "entropy": 1.1156750917434692, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4480000, "episodes_total": 11200, "training_iteration": 350, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-45-21", "timestamp": 1660257921, "time_this_iter_s": 35.891582012176514, "time_total_s": 16332.690993070602, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16332.690993070602, "timesteps_since_restore": 4480000, "iterations_since_restore": 350, "perf": {"cpu_util_percent": 33.81372549019608, "ram_util_percent": 58.90000000000001}}
-{"episode_reward_max": 639.0, "episode_reward_min": 524.0, "episode_reward_mean": 609.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.785}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.57, "shaped_reward_min": 164, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.75, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.79, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.79, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.79, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.79, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 587.0, 587.0, 593.0, 630.0, 627.0, 524.0, 630.0, 582.0, 576.0, 630.0, 579.0, 567.0, 579.0, 590.0, 630.0, 630.0, 627.0, 633.0, 636.0, 582.0, 582.0, 633.0, 636.0, 587.0, 636.0, 633.0, 633.0, 587.0, 573.0, 630.0, 582.0, 633.0, 573.0, 633.0, 576.0, 627.0, 639.0, 636.0, 579.0, 587.0, 587.0, 581.0, 639.0, 576.0, 630.0, 630.0, 582.0, 582.0, 584.0, 630.0, 636.0, 582.0, 576.0, 630.0, 590.0, 582.0, 582.0, 587.0, 582.0, 627.0, 582.0, 582.0, 527.0, 630.0, 639.0, 630.0, 582.0, 636.0, 627.0, 633.0, 579.0, 633.0, 633.0, 636.0, 630.0, 627.0, 633.0, 630.0, 630.0, 630.0, 630.0, 636.0, 630.0, 587.0, 627.0, 633.0, 630.0, 587.0, 633.0, 581.0, 630.0, 633.0, 593.0, 579.0, 636.0, 633.0, 633.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 299.0, 288.0, 301.0, 286.0, 302.0, 291.0, 314.0, 316.0, 319.0, 308.0, 253.0, 271.0, 313.0, 317.0, 283.0, 299.0, 277.0, 299.0, 318.0, 312.0, 288.0, 291.0, 274.0, 293.0, 291.0, 288.0, 290.0, 300.0, 318.0, 312.0, 311.0, 319.0, 313.0, 314.0, 316.0, 317.0, 314.0, 322.0, 290.0, 292.0, 300.0, 282.0, 319.0, 314.0, 319.0, 317.0, 291.0, 296.0, 324.0, 312.0, 316.0, 317.0, 316.0, 317.0, 299.0, 288.0, 279.0, 294.0, 311.0, 319.0, 291.0, 291.0, 316.0, 317.0, 285.0, 288.0, 316.0, 317.0, 285.0, 291.0, 314.0, 313.0, 322.0, 317.0, 317.0, 319.0, 291.0, 288.0, 291.0, 296.0, 299.0, 288.0, 296.0, 285.0, 317.0, 322.0, 291.0, 285.0, 319.0, 311.0, 311.0, 319.0, 283.0, 299.0, 293.0, 289.0, 288.0, 296.0, 314.0, 316.0, 319.0, 317.0, 291.0, 291.0, 285.0, 291.0, 310.0, 320.0, 291.0, 299.0, 290.0, 292.0, 283.0, 299.0, 293.0, 294.0, 298.0, 284.0, 320.0, 307.0, 291.0, 291.0, 288.0, 294.0, 256.0, 271.0, 319.0, 311.0, 315.0, 324.0, 316.0, 314.0, 291.0, 291.0, 316.0, 320.0, 313.0, 314.0, 319.0, 314.0, 280.0, 299.0, 317.0, 316.0, 322.0, 311.0, 321.0, 315.0, 316.0, 314.0, 318.0, 309.0, 319.0, 314.0, 321.0, 309.0, 319.0, 311.0, 314.0, 316.0, 318.0, 312.0, 314.0, 322.0, 316.0, 314.0, 299.0, 288.0, 308.0, 319.0, 316.0, 317.0, 306.0, 324.0, 296.0, 291.0, 314.0, 319.0, 295.0, 286.0, 308.0, 322.0, 319.0, 314.0, 296.0, 297.0, 297.0, 282.0, 311.0, 325.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.879725637823849, "mean_processing_ms": 0.24427457411119732, "mean_inference_ms": 1.472868024343041}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8424000, "num_steps_sampled": 4492800, "sample_time_ms": 23466.741, "load_time_ms": 37.693, "grad_time_ms": 10196.154, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0034591767471283674, "policy_loss": -0.0040799533016979694, "vf_loss": 81.06632232666016, "vf_explained_var": 0.7659358978271484, "kl": 0.0018826290033757687, "entropy": 1.135020136833191, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4492800, "episodes_total": 11232, "training_iteration": 351, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-45-57", "timestamp": 1660257957, "time_this_iter_s": 36.290544748306274, "time_total_s": 16368.981537818909, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16368.981537818909, "timesteps_since_restore": 4492800, "iterations_since_restore": 351, "perf": {"cpu_util_percent": 32.60196078431373, "ram_util_percent": 58.86274509803921}}
-{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 613.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 247.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.59}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.98, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.77, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.5, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.86, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.21, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.92, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.49, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.21, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.92, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.21, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.92, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 633.0, 587.0, 516.0, 587.0, 633.0, 609.0, 633.0, 582.0, 633.0, 570.0, 633.0, 579.0, 639.0, 627.0, 636.0, 533.0, 579.0, 633.0, 630.0, 633.0, 582.0, 627.0, 630.0, 630.0, 639.0, 636.0, 636.0, 636.0, 630.0, 582.0, 630.0, 630.0, 639.0, 630.0, 582.0, 636.0, 627.0, 633.0, 579.0, 633.0, 633.0, 636.0, 630.0, 627.0, 633.0, 630.0, 630.0, 630.0, 630.0, 636.0, 630.0, 587.0, 627.0, 633.0, 630.0, 587.0, 633.0, 581.0, 630.0, 633.0, 593.0, 579.0, 636.0, 633.0, 633.0, 636.0, 636.0, 636.0, 587.0, 587.0, 593.0, 630.0, 627.0, 524.0, 630.0, 582.0, 576.0, 630.0, 579.0, 567.0, 579.0, 590.0, 630.0, 630.0, 627.0, 633.0, 636.0, 582.0, 582.0, 633.0, 636.0, 587.0, 636.0, 633.0, 633.0, 587.0, 573.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 279.0, 318.0, 315.0, 292.0, 295.0, 269.0, 247.0, 286.0, 301.0, 319.0, 314.0, 304.0, 305.0, 306.0, 327.0, 291.0, 291.0, 309.0, 324.0, 282.0, 288.0, 319.0, 314.0, 288.0, 291.0, 317.0, 322.0, 321.0, 306.0, 319.0, 317.0, 267.0, 266.0, 295.0, 284.0, 326.0, 307.0, 311.0, 319.0, 319.0, 314.0, 288.0, 294.0, 313.0, 314.0, 316.0, 314.0, 321.0, 309.0, 316.0, 323.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 314.0, 316.0, 296.0, 286.0, 319.0, 311.0, 319.0, 311.0, 315.0, 324.0, 316.0, 314.0, 291.0, 291.0, 316.0, 320.0, 313.0, 314.0, 319.0, 314.0, 280.0, 299.0, 317.0, 316.0, 322.0, 311.0, 321.0, 315.0, 316.0, 314.0, 318.0, 309.0, 319.0, 314.0, 321.0, 309.0, 319.0, 311.0, 314.0, 316.0, 318.0, 312.0, 314.0, 322.0, 316.0, 314.0, 299.0, 288.0, 308.0, 319.0, 316.0, 317.0, 306.0, 324.0, 296.0, 291.0, 314.0, 319.0, 295.0, 286.0, 308.0, 322.0, 319.0, 314.0, 296.0, 297.0, 297.0, 282.0, 311.0, 325.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 314.0, 322.0, 314.0, 322.0, 299.0, 288.0, 301.0, 286.0, 302.0, 291.0, 314.0, 316.0, 319.0, 308.0, 253.0, 271.0, 313.0, 317.0, 283.0, 299.0, 277.0, 299.0, 318.0, 312.0, 288.0, 291.0, 274.0, 293.0, 291.0, 288.0, 290.0, 300.0, 318.0, 312.0, 311.0, 319.0, 313.0, 314.0, 316.0, 317.0, 314.0, 322.0, 290.0, 292.0, 300.0, 282.0, 319.0, 314.0, 319.0, 317.0, 291.0, 296.0, 324.0, 312.0, 316.0, 317.0, 316.0, 317.0, 299.0, 288.0, 279.0, 294.0, 311.0, 319.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.878542017918733, "mean_processing_ms": 0.2440427958629624, "mean_inference_ms": 1.4723767990503938}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8448000, "num_steps_sampled": 4505600, "sample_time_ms": 23873.275, "load_time_ms": 37.543, "grad_time_ms": 10389.923, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013407707447186112, "policy_loss": -0.006131558213382959, "vf_loss": 80.36180877685547, "vf_explained_var": 0.7696139812469482, "kl": 0.0018947357311844826, "entropy": 1.127698540687561, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4505600, "episodes_total": 11264, "training_iteration": 352, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-46-31", "timestamp": 1660257991, "time_this_iter_s": 34.20055317878723, "time_total_s": 16403.182090997696, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16403.182090997696, "timesteps_since_restore": 4505600, "iterations_since_restore": 352, "perf": {"cpu_util_percent": 33.57142857142857, "ram_util_percent": 58.8061224489796}}
-{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 602.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 301.135}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.07, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.25, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.53, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.04, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.43, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.65, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.04, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.04, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 582.0, 630.0, 582.0, 582.0, 576.0, 582.0, 582.0, 582.0, 579.0, 567.0, 633.0, 459.0, 630.0, 582.0, 579.0, 582.0, 636.0, 584.0, 587.0, 627.0, 579.0, 630.0, 639.0, 530.0, 582.0, 630.0, 582.0, 636.0, 513.0, 630.0, 579.0, 633.0, 633.0, 636.0, 636.0, 636.0, 587.0, 587.0, 593.0, 630.0, 627.0, 524.0, 630.0, 582.0, 576.0, 630.0, 579.0, 567.0, 579.0, 590.0, 630.0, 630.0, 627.0, 633.0, 636.0, 582.0, 582.0, 633.0, 636.0, 587.0, 636.0, 633.0, 633.0, 587.0, 573.0, 630.0, 582.0, 567.0, 633.0, 587.0, 516.0, 587.0, 633.0, 609.0, 633.0, 582.0, 633.0, 570.0, 633.0, 579.0, 639.0, 627.0, 636.0, 533.0, 579.0, 633.0, 630.0, 633.0, 582.0, 627.0, 630.0, 630.0, 639.0, 636.0, 636.0, 636.0, 630.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 255.0, 289.0, 293.0, 314.0, 316.0, 296.0, 286.0, 294.0, 288.0, 283.0, 293.0, 286.0, 296.0, 295.0, 287.0, 296.0, 286.0, 285.0, 294.0, 287.0, 280.0, 311.0, 322.0, 237.0, 222.0, 311.0, 319.0, 285.0, 297.0, 295.0, 284.0, 296.0, 286.0, 316.0, 320.0, 298.0, 286.0, 298.0, 289.0, 305.0, 322.0, 285.0, 294.0, 320.0, 310.0, 322.0, 317.0, 270.0, 260.0, 291.0, 291.0, 316.0, 314.0, 298.0, 284.0, 311.0, 325.0, 259.0, 254.0, 314.0, 316.0, 302.0, 277.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 314.0, 322.0, 314.0, 322.0, 299.0, 288.0, 301.0, 286.0, 302.0, 291.0, 314.0, 316.0, 319.0, 308.0, 253.0, 271.0, 313.0, 317.0, 283.0, 299.0, 277.0, 299.0, 318.0, 312.0, 288.0, 291.0, 274.0, 293.0, 291.0, 288.0, 290.0, 300.0, 318.0, 312.0, 311.0, 319.0, 313.0, 314.0, 316.0, 317.0, 314.0, 322.0, 290.0, 292.0, 300.0, 282.0, 319.0, 314.0, 319.0, 317.0, 291.0, 296.0, 324.0, 312.0, 316.0, 317.0, 316.0, 317.0, 299.0, 288.0, 279.0, 294.0, 311.0, 319.0, 291.0, 291.0, 288.0, 279.0, 318.0, 315.0, 292.0, 295.0, 269.0, 247.0, 286.0, 301.0, 319.0, 314.0, 304.0, 305.0, 306.0, 327.0, 291.0, 291.0, 309.0, 324.0, 282.0, 288.0, 319.0, 314.0, 288.0, 291.0, 317.0, 322.0, 321.0, 306.0, 319.0, 317.0, 267.0, 266.0, 295.0, 284.0, 326.0, 307.0, 311.0, 319.0, 319.0, 314.0, 288.0, 294.0, 313.0, 314.0, 316.0, 314.0, 321.0, 309.0, 316.0, 323.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 314.0, 316.0, 296.0, 286.0, 319.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8773547258582282, "mean_processing_ms": 0.24380845402332824, "mean_inference_ms": 1.471635610090532}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8472000, "num_steps_sampled": 4518400, "sample_time_ms": 23706.56, "load_time_ms": 37.425, "grad_time_ms": 10690.015, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011104041477665305, "policy_loss": -0.00902702659368515, "vf_loss": 84.78372955322266, "vf_explained_var": 0.7670722007751465, "kl": 0.0019426337676122785, "entropy": 1.1235073804855347, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4518400, "episodes_total": 11296, "training_iteration": 353, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-47-03", "timestamp": 1660258023, "time_this_iter_s": 31.92423105239868, "time_total_s": 16435.106322050095, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16435.106322050095, "timesteps_since_restore": 4518400, "iterations_since_restore": 353, "perf": {"cpu_util_percent": 34.76222222222222, "ram_util_percent": 59.38222222222222}}
-{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 600.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 300.055}, "custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.51, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.56, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.08, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.23, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.3, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.02, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.56, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.54, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.71, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.89, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.02, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.56, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.02, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.56, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 627.0, 579.0, 582.0, 579.0, 582.0, 633.0, 582.0, 573.0, 633.0, 630.0, 582.0, 639.0, 576.0, 582.0, 639.0, 633.0, 587.0, 627.0, 636.0, 636.0, 630.0, 627.0, 579.0, 536.0, 636.0, 627.0, 582.0, 633.0, 579.0, 582.0, 582.0, 587.0, 573.0, 630.0, 582.0, 567.0, 633.0, 587.0, 516.0, 587.0, 633.0, 609.0, 633.0, 582.0, 633.0, 570.0, 633.0, 579.0, 639.0, 627.0, 636.0, 533.0, 579.0, 633.0, 630.0, 633.0, 582.0, 627.0, 630.0, 630.0, 639.0, 636.0, 636.0, 636.0, 630.0, 582.0, 630.0, 519.0, 582.0, 630.0, 582.0, 582.0, 576.0, 582.0, 582.0, 582.0, 579.0, 567.0, 633.0, 459.0, 630.0, 582.0, 579.0, 582.0, 636.0, 584.0, 587.0, 627.0, 579.0, 630.0, 639.0, 530.0, 582.0, 630.0, 582.0, 636.0, 513.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 293.0, 313.0, 314.0, 287.0, 292.0, 291.0, 291.0, 285.0, 294.0, 296.0, 286.0, 321.0, 312.0, 293.0, 289.0, 280.0, 293.0, 324.0, 309.0, 314.0, 316.0, 298.0, 284.0, 317.0, 322.0, 283.0, 293.0, 301.0, 281.0, 322.0, 317.0, 311.0, 322.0, 293.0, 294.0, 311.0, 316.0, 319.0, 317.0, 322.0, 314.0, 318.0, 312.0, 313.0, 314.0, 288.0, 291.0, 274.0, 262.0, 316.0, 320.0, 311.0, 316.0, 286.0, 296.0, 314.0, 319.0, 289.0, 290.0, 288.0, 294.0, 291.0, 291.0, 299.0, 288.0, 279.0, 294.0, 311.0, 319.0, 291.0, 291.0, 288.0, 279.0, 318.0, 315.0, 292.0, 295.0, 269.0, 247.0, 286.0, 301.0, 319.0, 314.0, 304.0, 305.0, 306.0, 327.0, 291.0, 291.0, 309.0, 324.0, 282.0, 288.0, 319.0, 314.0, 288.0, 291.0, 317.0, 322.0, 321.0, 306.0, 319.0, 317.0, 267.0, 266.0, 295.0, 284.0, 326.0, 307.0, 311.0, 319.0, 319.0, 314.0, 288.0, 294.0, 313.0, 314.0, 316.0, 314.0, 321.0, 309.0, 316.0, 323.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 314.0, 316.0, 296.0, 286.0, 319.0, 311.0, 264.0, 255.0, 289.0, 293.0, 314.0, 316.0, 296.0, 286.0, 294.0, 288.0, 283.0, 293.0, 286.0, 296.0, 295.0, 287.0, 296.0, 286.0, 285.0, 294.0, 287.0, 280.0, 311.0, 322.0, 237.0, 222.0, 311.0, 319.0, 285.0, 297.0, 295.0, 284.0, 296.0, 286.0, 316.0, 320.0, 298.0, 286.0, 298.0, 289.0, 305.0, 322.0, 285.0, 294.0, 320.0, 310.0, 322.0, 317.0, 270.0, 260.0, 291.0, 291.0, 316.0, 314.0, 298.0, 284.0, 311.0, 325.0, 259.0, 254.0, 314.0, 316.0, 302.0, 277.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8761658335288303, "mean_processing_ms": 0.24357312011055882, "mean_inference_ms": 1.4706370891819096}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8496000, "num_steps_sampled": 4531200, "sample_time_ms": 23737.686, "load_time_ms": 37.309, "grad_time_ms": 10476.005, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0014189122011885047, "policy_loss": -0.006124518811702728, "vf_loss": 81.1131591796875, "vf_explained_var": 0.7619540095329285, "kl": 0.002155100228264928, "entropy": 1.1357545852661133, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4531200, "episodes_total": 11328, "training_iteration": 354, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-47-35", "timestamp": 1660258055, "time_this_iter_s": 31.991327047348022, "time_total_s": 16467.097649097443, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16467.097649097443, "timesteps_since_restore": 4531200, "iterations_since_restore": 354, "perf": {"cpu_util_percent": 34.031111111111116, "ram_util_percent": 59.01333333333334}}
-{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 601.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 300.845}, "custom_metrics": {"sparse_reward_mean": 208.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.29, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.61, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.08, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.33, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.02, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.79, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.02, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.02, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 636.0, 567.0, 570.0, 639.0, 636.0, 587.0, 630.0, 630.0, 636.0, 633.0, 587.0, 639.0, 570.0, 630.0, 636.0, 582.0, 587.0, 633.0, 639.0, 630.0, 633.0, 582.0, 639.0, 579.0, 587.0, 630.0, 587.0, 633.0, 627.0, 576.0, 630.0, 636.0, 630.0, 582.0, 630.0, 519.0, 582.0, 630.0, 582.0, 582.0, 576.0, 582.0, 582.0, 582.0, 579.0, 567.0, 633.0, 459.0, 630.0, 582.0, 579.0, 582.0, 636.0, 584.0, 587.0, 627.0, 579.0, 630.0, 639.0, 530.0, 582.0, 630.0, 582.0, 636.0, 513.0, 630.0, 579.0, 587.0, 627.0, 579.0, 582.0, 579.0, 582.0, 633.0, 582.0, 573.0, 633.0, 630.0, 582.0, 639.0, 576.0, 582.0, 639.0, 633.0, 587.0, 627.0, 636.0, 636.0, 630.0, 627.0, 579.0, 536.0, 636.0, 627.0, 582.0, 633.0, 579.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [299.0, 283.0, 316.0, 320.0, 284.0, 283.0, 278.0, 292.0, 317.0, 322.0, 316.0, 320.0, 293.0, 294.0, 311.0, 319.0, 323.0, 307.0, 311.0, 325.0, 319.0, 314.0, 296.0, 291.0, 324.0, 315.0, 285.0, 285.0, 323.0, 307.0, 324.0, 312.0, 296.0, 286.0, 289.0, 298.0, 319.0, 314.0, 322.0, 317.0, 326.0, 304.0, 316.0, 317.0, 288.0, 294.0, 319.0, 320.0, 288.0, 291.0, 291.0, 296.0, 314.0, 316.0, 299.0, 288.0, 308.0, 325.0, 313.0, 314.0, 286.0, 290.0, 324.0, 306.0, 319.0, 317.0, 314.0, 316.0, 296.0, 286.0, 319.0, 311.0, 264.0, 255.0, 289.0, 293.0, 314.0, 316.0, 296.0, 286.0, 294.0, 288.0, 283.0, 293.0, 286.0, 296.0, 295.0, 287.0, 296.0, 286.0, 285.0, 294.0, 287.0, 280.0, 311.0, 322.0, 237.0, 222.0, 311.0, 319.0, 285.0, 297.0, 295.0, 284.0, 296.0, 286.0, 316.0, 320.0, 298.0, 286.0, 298.0, 289.0, 305.0, 322.0, 285.0, 294.0, 320.0, 310.0, 322.0, 317.0, 270.0, 260.0, 291.0, 291.0, 316.0, 314.0, 298.0, 284.0, 311.0, 325.0, 259.0, 254.0, 314.0, 316.0, 302.0, 277.0, 294.0, 293.0, 313.0, 314.0, 287.0, 292.0, 291.0, 291.0, 285.0, 294.0, 296.0, 286.0, 321.0, 312.0, 293.0, 289.0, 280.0, 293.0, 324.0, 309.0, 314.0, 316.0, 298.0, 284.0, 317.0, 322.0, 283.0, 293.0, 301.0, 281.0, 322.0, 317.0, 311.0, 322.0, 293.0, 294.0, 311.0, 316.0, 319.0, 317.0, 322.0, 314.0, 318.0, 312.0, 313.0, 314.0, 288.0, 291.0, 274.0, 262.0, 316.0, 320.0, 311.0, 316.0, 286.0, 296.0, 314.0, 319.0, 289.0, 290.0, 288.0, 294.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.874971949450553, "mean_processing_ms": 0.24333505417716514, "mean_inference_ms": 1.4694074728869129}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8520000, "num_steps_sampled": 4544000, "sample_time_ms": 22906.057, "load_time_ms": 36.777, "grad_time_ms": 10398.662, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002145820064470172, "policy_loss": -0.005216358229517937, "vf_loss": 79.28690338134766, "vf_explained_var": 0.7675671577453613, "kl": 0.0018057804554700851, "entropy": 1.1330245733261108, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4544000, "episodes_total": 11360, "training_iteration": 355, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-48-06", "timestamp": 1660258086, "time_this_iter_s": 30.381834983825684, "time_total_s": 16497.47948408127, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16497.47948408127, "timesteps_since_restore": 4544000, "iterations_since_restore": 355, "perf": {"cpu_util_percent": 34.25348837209302, "ram_util_percent": 58.85813953488373}}
-{"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 605.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 302.665}, "custom_metrics": {"sparse_reward_mean": 209.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.53, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.51, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.29, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.74, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.87, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.54, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.72, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.9, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.86, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.87, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.87, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 633.0, 582.0, 590.0, 636.0, 627.0, 587.0, 630.0, 636.0, 630.0, 630.0, 630.0, 582.0, 582.0, 539.0, 582.0, 582.0, 576.0, 590.0, 627.0, 582.0, 627.0, 630.0, 579.0, 639.0, 582.0, 633.0, 576.0, 579.0, 582.0, 584.0, 636.0, 513.0, 630.0, 579.0, 587.0, 627.0, 579.0, 582.0, 579.0, 582.0, 633.0, 582.0, 573.0, 633.0, 630.0, 582.0, 639.0, 576.0, 582.0, 639.0, 633.0, 587.0, 627.0, 636.0, 636.0, 630.0, 627.0, 579.0, 536.0, 636.0, 627.0, 582.0, 633.0, 579.0, 582.0, 582.0, 582.0, 636.0, 567.0, 570.0, 639.0, 636.0, 587.0, 630.0, 630.0, 636.0, 633.0, 587.0, 639.0, 570.0, 630.0, 636.0, 582.0, 587.0, 633.0, 639.0, 630.0, 633.0, 582.0, 639.0, 579.0, 587.0, 630.0, 587.0, 633.0, 627.0, 576.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 294.0, 288.0, 316.0, 317.0, 293.0, 289.0, 288.0, 302.0, 319.0, 317.0, 311.0, 316.0, 301.0, 286.0, 318.0, 312.0, 316.0, 320.0, 316.0, 314.0, 316.0, 314.0, 316.0, 314.0, 289.0, 293.0, 288.0, 294.0, 265.0, 274.0, 283.0, 299.0, 288.0, 294.0, 274.0, 302.0, 296.0, 294.0, 321.0, 306.0, 294.0, 288.0, 308.0, 319.0, 321.0, 309.0, 287.0, 292.0, 320.0, 319.0, 301.0, 281.0, 318.0, 315.0, 293.0, 283.0, 290.0, 289.0, 291.0, 291.0, 296.0, 288.0, 311.0, 325.0, 259.0, 254.0, 314.0, 316.0, 302.0, 277.0, 294.0, 293.0, 313.0, 314.0, 287.0, 292.0, 291.0, 291.0, 285.0, 294.0, 296.0, 286.0, 321.0, 312.0, 293.0, 289.0, 280.0, 293.0, 324.0, 309.0, 314.0, 316.0, 298.0, 284.0, 317.0, 322.0, 283.0, 293.0, 301.0, 281.0, 322.0, 317.0, 311.0, 322.0, 293.0, 294.0, 311.0, 316.0, 319.0, 317.0, 322.0, 314.0, 318.0, 312.0, 313.0, 314.0, 288.0, 291.0, 274.0, 262.0, 316.0, 320.0, 311.0, 316.0, 286.0, 296.0, 314.0, 319.0, 289.0, 290.0, 288.0, 294.0, 291.0, 291.0, 299.0, 283.0, 316.0, 320.0, 284.0, 283.0, 278.0, 292.0, 317.0, 322.0, 316.0, 320.0, 293.0, 294.0, 311.0, 319.0, 323.0, 307.0, 311.0, 325.0, 319.0, 314.0, 296.0, 291.0, 324.0, 315.0, 285.0, 285.0, 323.0, 307.0, 324.0, 312.0, 296.0, 286.0, 289.0, 298.0, 319.0, 314.0, 322.0, 317.0, 326.0, 304.0, 316.0, 317.0, 288.0, 294.0, 319.0, 320.0, 288.0, 291.0, 291.0, 296.0, 314.0, 316.0, 299.0, 288.0, 308.0, 325.0, 313.0, 314.0, 286.0, 290.0, 324.0, 306.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8737833276000079, "mean_processing_ms": 0.243097780431969, "mean_inference_ms": 1.4681762334073296}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8544000, "num_steps_sampled": 4556800, "sample_time_ms": 22541.516, "load_time_ms": 36.891, "grad_time_ms": 10138.825, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00407541124150157, "policy_loss": -0.0034555860329419374, "vf_loss": 80.97249603271484, "vf_explained_var": 0.7684476375579834, "kl": 0.001921386457979679, "entropy": 1.1324900388717651, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4556800, "episodes_total": 11392, "training_iteration": 356, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-48-36", "timestamp": 1660258116, "time_this_iter_s": 30.78407096862793, "time_total_s": 16528.263555049896, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16528.263555049896, "timesteps_since_restore": 4556800, "iterations_since_restore": 356, "perf": {"cpu_util_percent": 32.61818181818182, "ram_util_percent": 59.45681818181819}}
-{"episode_reward_max": 639.0, "episode_reward_min": 539.0, "episode_reward_mean": 609.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 265.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 304.845}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.69, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.62, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.37, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.4, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.91, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.16, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.96, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.46, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.69, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.16, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.96, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.16, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.96, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 636.0, 639.0, 627.0, 630.0, 582.0, 633.0, 639.0, 573.0, 636.0, 579.0, 633.0, 636.0, 633.0, 636.0, 579.0, 582.0, 579.0, 630.0, 587.0, 636.0, 582.0, 636.0, 633.0, 627.0, 630.0, 639.0, 584.0, 627.0, 627.0, 576.0, 633.0, 579.0, 582.0, 582.0, 582.0, 636.0, 567.0, 570.0, 639.0, 636.0, 587.0, 630.0, 630.0, 636.0, 633.0, 587.0, 639.0, 570.0, 630.0, 636.0, 582.0, 587.0, 633.0, 639.0, 630.0, 633.0, 582.0, 639.0, 579.0, 587.0, 630.0, 587.0, 633.0, 627.0, 576.0, 630.0, 630.0, 582.0, 633.0, 582.0, 590.0, 636.0, 627.0, 587.0, 630.0, 636.0, 630.0, 630.0, 630.0, 582.0, 582.0, 539.0, 582.0, 582.0, 576.0, 590.0, 627.0, 582.0, 627.0, 630.0, 579.0, 639.0, 582.0, 633.0, 576.0, 579.0, 582.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 316.0, 317.0, 319.0, 317.0, 325.0, 314.0, 313.0, 314.0, 321.0, 309.0, 296.0, 286.0, 313.0, 320.0, 317.0, 322.0, 287.0, 286.0, 312.0, 324.0, 286.0, 293.0, 321.0, 312.0, 314.0, 322.0, 314.0, 319.0, 316.0, 320.0, 291.0, 288.0, 293.0, 289.0, 292.0, 287.0, 316.0, 314.0, 288.0, 299.0, 322.0, 314.0, 286.0, 296.0, 316.0, 320.0, 314.0, 319.0, 313.0, 314.0, 316.0, 314.0, 322.0, 317.0, 290.0, 294.0, 311.0, 316.0, 319.0, 308.0, 293.0, 283.0, 314.0, 319.0, 289.0, 290.0, 288.0, 294.0, 291.0, 291.0, 299.0, 283.0, 316.0, 320.0, 284.0, 283.0, 278.0, 292.0, 317.0, 322.0, 316.0, 320.0, 293.0, 294.0, 311.0, 319.0, 323.0, 307.0, 311.0, 325.0, 319.0, 314.0, 296.0, 291.0, 324.0, 315.0, 285.0, 285.0, 323.0, 307.0, 324.0, 312.0, 296.0, 286.0, 289.0, 298.0, 319.0, 314.0, 322.0, 317.0, 326.0, 304.0, 316.0, 317.0, 288.0, 294.0, 319.0, 320.0, 288.0, 291.0, 291.0, 296.0, 314.0, 316.0, 299.0, 288.0, 308.0, 325.0, 313.0, 314.0, 286.0, 290.0, 324.0, 306.0, 313.0, 317.0, 294.0, 288.0, 316.0, 317.0, 293.0, 289.0, 288.0, 302.0, 319.0, 317.0, 311.0, 316.0, 301.0, 286.0, 318.0, 312.0, 316.0, 320.0, 316.0, 314.0, 316.0, 314.0, 316.0, 314.0, 289.0, 293.0, 288.0, 294.0, 265.0, 274.0, 283.0, 299.0, 288.0, 294.0, 274.0, 302.0, 296.0, 294.0, 321.0, 306.0, 294.0, 288.0, 308.0, 319.0, 321.0, 309.0, 287.0, 292.0, 320.0, 319.0, 301.0, 281.0, 318.0, 315.0, 293.0, 283.0, 290.0, 289.0, 291.0, 291.0, 296.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8725992979631204, "mean_processing_ms": 0.24286049780551075, "mean_inference_ms": 1.4669091441555409}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8568000, "num_steps_sampled": 4569600, "sample_time_ms": 22487.797, "load_time_ms": 37.05, "grad_time_ms": 10111.337, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0030587399378418922, "policy_loss": -0.004343332722783089, "vf_loss": 79.63693237304688, "vf_explained_var": 0.7643921971321106, "kl": 0.0021028893534094095, "entropy": 1.1232417821884155, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4569600, "episodes_total": 11424, "training_iteration": 357, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-49-07", "timestamp": 1660258147, "time_this_iter_s": 30.161853790283203, "time_total_s": 16558.42540884018, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16558.42540884018, "timesteps_since_restore": 4569600, "iterations_since_restore": 357, "perf": {"cpu_util_percent": 33.550000000000004, "ram_util_percent": 58.85714285714285}}
-{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 606.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.095}, "custom_metrics": {"sparse_reward_mean": 210.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.19, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.4, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.42, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.17, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.86, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.9, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.76, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.9, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.9, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 582.0, 627.0, 639.0, 579.0, 579.0, 587.0, 630.0, 579.0, 627.0, 582.0, 636.0, 630.0, 624.0, 518.0, 570.0, 636.0, 576.0, 587.0, 624.0, 636.0, 587.0, 627.0, 630.0, 630.0, 584.0, 579.0, 636.0, 576.0, 630.0, 578.0, 467.0, 633.0, 627.0, 576.0, 630.0, 630.0, 582.0, 633.0, 582.0, 590.0, 636.0, 627.0, 587.0, 630.0, 636.0, 630.0, 630.0, 630.0, 582.0, 582.0, 539.0, 582.0, 582.0, 576.0, 590.0, 627.0, 582.0, 627.0, 630.0, 579.0, 639.0, 582.0, 633.0, 576.0, 579.0, 582.0, 584.0, 636.0, 633.0, 636.0, 639.0, 627.0, 630.0, 582.0, 633.0, 639.0, 573.0, 636.0, 579.0, 633.0, 636.0, 633.0, 636.0, 579.0, 582.0, 579.0, 630.0, 587.0, 636.0, 582.0, 636.0, 633.0, 627.0, 630.0, 639.0, 584.0, 627.0, 627.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 288.0, 293.0, 289.0, 313.0, 314.0, 322.0, 317.0, 296.0, 283.0, 290.0, 289.0, 288.0, 299.0, 316.0, 314.0, 291.0, 288.0, 318.0, 309.0, 288.0, 294.0, 324.0, 312.0, 316.0, 314.0, 302.0, 322.0, 247.0, 271.0, 285.0, 285.0, 319.0, 317.0, 291.0, 285.0, 296.0, 291.0, 313.0, 311.0, 321.0, 315.0, 301.0, 286.0, 308.0, 319.0, 322.0, 308.0, 312.0, 318.0, 286.0, 298.0, 288.0, 291.0, 319.0, 317.0, 285.0, 291.0, 318.0, 312.0, 292.0, 286.0, 223.0, 244.0, 308.0, 325.0, 313.0, 314.0, 286.0, 290.0, 324.0, 306.0, 313.0, 317.0, 294.0, 288.0, 316.0, 317.0, 293.0, 289.0, 288.0, 302.0, 319.0, 317.0, 311.0, 316.0, 301.0, 286.0, 318.0, 312.0, 316.0, 320.0, 316.0, 314.0, 316.0, 314.0, 316.0, 314.0, 289.0, 293.0, 288.0, 294.0, 265.0, 274.0, 283.0, 299.0, 288.0, 294.0, 274.0, 302.0, 296.0, 294.0, 321.0, 306.0, 294.0, 288.0, 308.0, 319.0, 321.0, 309.0, 287.0, 292.0, 320.0, 319.0, 301.0, 281.0, 318.0, 315.0, 293.0, 283.0, 290.0, 289.0, 291.0, 291.0, 296.0, 288.0, 319.0, 317.0, 316.0, 317.0, 319.0, 317.0, 325.0, 314.0, 313.0, 314.0, 321.0, 309.0, 296.0, 286.0, 313.0, 320.0, 317.0, 322.0, 287.0, 286.0, 312.0, 324.0, 286.0, 293.0, 321.0, 312.0, 314.0, 322.0, 314.0, 319.0, 316.0, 320.0, 291.0, 288.0, 293.0, 289.0, 292.0, 287.0, 316.0, 314.0, 288.0, 299.0, 322.0, 314.0, 286.0, 296.0, 316.0, 320.0, 314.0, 319.0, 313.0, 314.0, 316.0, 314.0, 322.0, 317.0, 290.0, 294.0, 311.0, 316.0, 319.0, 308.0, 293.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 0.871423269952275, "mean_processing_ms": 0.24262389296401352, "mean_inference_ms": 1.4655941462374882}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8592000, "num_steps_sampled": 4582400, "sample_time_ms": 22265.267, "load_time_ms": 37.124, "grad_time_ms": 9998.565, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00020107599266339093, "policy_loss": -0.007807094603776932, "vf_loss": 81.7130355834961, "vf_explained_var": 0.7695291638374329, "kl": 0.0018338669324293733, "entropy": 1.1305631399154663, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4582400, "episodes_total": 11456, "training_iteration": 358, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-49-36", "timestamp": 1660258176, "time_this_iter_s": 28.821206092834473, "time_total_s": 16587.246614933014, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16587.246614933014, "timesteps_since_restore": 4582400, "iterations_since_restore": 358, "perf": {"cpu_util_percent": 32.69024390243902, "ram_util_percent": 58.856097560975606}}
-{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 604.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 302.47}, "custom_metrics": {"sparse_reward_mean": 209.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.74, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.77, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.18, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.56, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.29, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.59, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.37, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.81, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.41, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.29, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.59, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.29, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.59, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 630.0, 630.0, 615.0, 579.0, 579.0, 587.0, 633.0, 630.0, 636.0, 584.0, 582.0, 636.0, 633.0, 576.0, 630.0, 581.0, 579.0, 579.0, 636.0, 630.0, 587.0, 576.0, 584.0, 630.0, 587.0, 576.0, 633.0, 582.0, 636.0, 579.0, 582.0, 576.0, 579.0, 582.0, 584.0, 636.0, 633.0, 636.0, 639.0, 627.0, 630.0, 582.0, 633.0, 639.0, 573.0, 636.0, 579.0, 633.0, 636.0, 633.0, 636.0, 579.0, 582.0, 579.0, 630.0, 587.0, 636.0, 582.0, 636.0, 633.0, 627.0, 630.0, 639.0, 584.0, 627.0, 627.0, 576.0, 570.0, 582.0, 627.0, 639.0, 579.0, 579.0, 587.0, 630.0, 579.0, 627.0, 582.0, 636.0, 630.0, 624.0, 518.0, 570.0, 636.0, 576.0, 587.0, 624.0, 636.0, 587.0, 627.0, 630.0, 630.0, 584.0, 579.0, 636.0, 576.0, 630.0, 578.0, 467.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [299.0, 280.0, 306.0, 324.0, 311.0, 319.0, 308.0, 307.0, 288.0, 291.0, 288.0, 291.0, 288.0, 299.0, 308.0, 325.0, 314.0, 316.0, 319.0, 317.0, 290.0, 294.0, 291.0, 291.0, 321.0, 315.0, 319.0, 314.0, 286.0, 290.0, 306.0, 324.0, 288.0, 293.0, 290.0, 289.0, 299.0, 280.0, 322.0, 314.0, 313.0, 317.0, 283.0, 304.0, 285.0, 291.0, 293.0, 291.0, 324.0, 306.0, 288.0, 299.0, 280.0, 296.0, 314.0, 319.0, 291.0, 291.0, 317.0, 319.0, 294.0, 285.0, 296.0, 286.0, 293.0, 283.0, 290.0, 289.0, 291.0, 291.0, 296.0, 288.0, 319.0, 317.0, 316.0, 317.0, 319.0, 317.0, 325.0, 314.0, 313.0, 314.0, 321.0, 309.0, 296.0, 286.0, 313.0, 320.0, 317.0, 322.0, 287.0, 286.0, 312.0, 324.0, 286.0, 293.0, 321.0, 312.0, 314.0, 322.0, 314.0, 319.0, 316.0, 320.0, 291.0, 288.0, 293.0, 289.0, 292.0, 287.0, 316.0, 314.0, 288.0, 299.0, 322.0, 314.0, 286.0, 296.0, 316.0, 320.0, 314.0, 319.0, 313.0, 314.0, 316.0, 314.0, 322.0, 317.0, 290.0, 294.0, 311.0, 316.0, 319.0, 308.0, 293.0, 283.0, 282.0, 288.0, 293.0, 289.0, 313.0, 314.0, 322.0, 317.0, 296.0, 283.0, 290.0, 289.0, 288.0, 299.0, 316.0, 314.0, 291.0, 288.0, 318.0, 309.0, 288.0, 294.0, 324.0, 312.0, 316.0, 314.0, 302.0, 322.0, 247.0, 271.0, 285.0, 285.0, 319.0, 317.0, 291.0, 285.0, 296.0, 291.0, 313.0, 311.0, 321.0, 315.0, 301.0, 286.0, 308.0, 319.0, 322.0, 308.0, 312.0, 318.0, 286.0, 298.0, 288.0, 291.0, 319.0, 317.0, 285.0, 291.0, 318.0, 312.0, 292.0, 286.0, 223.0, 244.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8702563301594608, "mean_processing_ms": 0.24238978711918313, "mean_inference_ms": 1.4642680017402931}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8616000, "num_steps_sampled": 4595200, "sample_time_ms": 22073.451, "load_time_ms": 37.129, "grad_time_ms": 9911.472, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011308585526421666, "policy_loss": -0.006429137196391821, "vf_loss": 81.29705047607422, "vf_explained_var": 0.7726876139640808, "kl": 0.0017355438321828842, "entropy": 1.139426350593567, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4595200, "episodes_total": 11488, "training_iteration": 359, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-50-06", "timestamp": 1660258206, "time_this_iter_s": 30.0307719707489, "time_total_s": 16617.277386903763, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16617.277386903763, "timesteps_since_restore": 4595200, "iterations_since_restore": 359, "perf": {"cpu_util_percent": 36.002325581395354, "ram_util_percent": 58.82325581395349}}
-{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 605.46, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 302.73}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.86, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.85, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.09, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.61, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.33, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.59, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.91, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.77, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.33, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.59, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.33, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.59, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 593.0, 633.0, 639.0, 627.0, 582.0, 587.0, 587.0, 639.0, 636.0, 582.0, 630.0, 633.0, 633.0, 630.0, 582.0, 627.0, 633.0, 639.0, 636.0, 639.0, 521.0, 633.0, 636.0, 630.0, 582.0, 558.0, 576.0, 639.0, 636.0, 624.0, 636.0, 584.0, 627.0, 627.0, 576.0, 570.0, 582.0, 627.0, 639.0, 579.0, 579.0, 587.0, 630.0, 579.0, 627.0, 582.0, 636.0, 630.0, 624.0, 518.0, 570.0, 636.0, 576.0, 587.0, 624.0, 636.0, 587.0, 627.0, 630.0, 630.0, 584.0, 579.0, 636.0, 576.0, 630.0, 578.0, 467.0, 579.0, 630.0, 630.0, 615.0, 579.0, 579.0, 587.0, 633.0, 630.0, 636.0, 584.0, 582.0, 636.0, 633.0, 576.0, 630.0, 581.0, 579.0, 579.0, 636.0, 630.0, 587.0, 576.0, 584.0, 630.0, 587.0, 576.0, 633.0, 582.0, 636.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 293.0, 300.0, 311.0, 322.0, 322.0, 317.0, 316.0, 311.0, 289.0, 293.0, 294.0, 293.0, 288.0, 299.0, 324.0, 315.0, 319.0, 317.0, 296.0, 286.0, 309.0, 321.0, 319.0, 314.0, 320.0, 313.0, 319.0, 311.0, 293.0, 289.0, 318.0, 309.0, 317.0, 316.0, 324.0, 315.0, 314.0, 322.0, 319.0, 320.0, 256.0, 265.0, 316.0, 317.0, 314.0, 322.0, 311.0, 319.0, 294.0, 288.0, 281.0, 277.0, 276.0, 300.0, 319.0, 320.0, 314.0, 322.0, 310.0, 314.0, 316.0, 320.0, 290.0, 294.0, 311.0, 316.0, 319.0, 308.0, 293.0, 283.0, 282.0, 288.0, 293.0, 289.0, 313.0, 314.0, 322.0, 317.0, 296.0, 283.0, 290.0, 289.0, 288.0, 299.0, 316.0, 314.0, 291.0, 288.0, 318.0, 309.0, 288.0, 294.0, 324.0, 312.0, 316.0, 314.0, 302.0, 322.0, 247.0, 271.0, 285.0, 285.0, 319.0, 317.0, 291.0, 285.0, 296.0, 291.0, 313.0, 311.0, 321.0, 315.0, 301.0, 286.0, 308.0, 319.0, 322.0, 308.0, 312.0, 318.0, 286.0, 298.0, 288.0, 291.0, 319.0, 317.0, 285.0, 291.0, 318.0, 312.0, 292.0, 286.0, 223.0, 244.0, 299.0, 280.0, 306.0, 324.0, 311.0, 319.0, 308.0, 307.0, 288.0, 291.0, 288.0, 291.0, 288.0, 299.0, 308.0, 325.0, 314.0, 316.0, 319.0, 317.0, 290.0, 294.0, 291.0, 291.0, 321.0, 315.0, 319.0, 314.0, 286.0, 290.0, 306.0, 324.0, 288.0, 293.0, 290.0, 289.0, 299.0, 280.0, 322.0, 314.0, 313.0, 317.0, 283.0, 304.0, 285.0, 291.0, 293.0, 291.0, 324.0, 306.0, 288.0, 299.0, 280.0, 296.0, 314.0, 319.0, 291.0, 291.0, 317.0, 319.0, 294.0, 285.0, 296.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 0.86909953447402, "mean_processing_ms": 0.24215862097735263, "mean_inference_ms": 1.4630909533739367}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8640000, "num_steps_sampled": 4608000, "sample_time_ms": 21874.868, "load_time_ms": 37.48, "grad_time_ms": 9746.645, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013312319060787559, "policy_loss": -0.005779942963272333, "vf_loss": 76.8069839477539, "vf_explained_var": 0.7698413729667664, "kl": 0.001875889953225851, "entropy": 1.1390520334243774, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4608000, "episodes_total": 11520, "training_iteration": 360, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-50-38", "timestamp": 1660258238, "time_this_iter_s": 32.26046180725098, "time_total_s": 16649.537848711014, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16649.537848711014, "timesteps_since_restore": 4608000, "iterations_since_restore": 360, "perf": {"cpu_util_percent": 32.559999999999995, "ram_util_percent": 58.84666666666664}}
-{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 605.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 302.965}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.33, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.2, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.6, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.5, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.42, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.75, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 630.0, 633.0, 582.0, 636.0, 590.0, 630.0, 587.0, 636.0, 576.0, 630.0, 630.0, 582.0, 576.0, 630.0, 582.0, 587.0, 633.0, 582.0, 636.0, 630.0, 582.0, 587.0, 630.0, 573.0, 630.0, 630.0, 525.0, 579.0, 627.0, 633.0, 576.0, 630.0, 578.0, 467.0, 579.0, 630.0, 630.0, 615.0, 579.0, 579.0, 587.0, 633.0, 630.0, 636.0, 584.0, 582.0, 636.0, 633.0, 576.0, 630.0, 581.0, 579.0, 579.0, 636.0, 630.0, 587.0, 576.0, 584.0, 630.0, 587.0, 576.0, 633.0, 582.0, 636.0, 579.0, 582.0, 636.0, 593.0, 633.0, 639.0, 627.0, 582.0, 587.0, 587.0, 639.0, 636.0, 582.0, 630.0, 633.0, 633.0, 630.0, 582.0, 627.0, 633.0, 639.0, 636.0, 639.0, 521.0, 633.0, 636.0, 630.0, 582.0, 558.0, 576.0, 639.0, 636.0, 624.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 282.0, 297.0, 319.0, 311.0, 321.0, 312.0, 286.0, 296.0, 314.0, 322.0, 296.0, 294.0, 311.0, 319.0, 298.0, 289.0, 322.0, 314.0, 285.0, 291.0, 313.0, 317.0, 316.0, 314.0, 291.0, 291.0, 298.0, 278.0, 308.0, 322.0, 293.0, 289.0, 293.0, 294.0, 316.0, 317.0, 291.0, 291.0, 319.0, 317.0, 321.0, 309.0, 290.0, 292.0, 296.0, 291.0, 316.0, 314.0, 287.0, 286.0, 313.0, 317.0, 318.0, 312.0, 265.0, 260.0, 287.0, 292.0, 314.0, 313.0, 316.0, 317.0, 285.0, 291.0, 318.0, 312.0, 292.0, 286.0, 223.0, 244.0, 299.0, 280.0, 306.0, 324.0, 311.0, 319.0, 308.0, 307.0, 288.0, 291.0, 288.0, 291.0, 288.0, 299.0, 308.0, 325.0, 314.0, 316.0, 319.0, 317.0, 290.0, 294.0, 291.0, 291.0, 321.0, 315.0, 319.0, 314.0, 286.0, 290.0, 306.0, 324.0, 288.0, 293.0, 290.0, 289.0, 299.0, 280.0, 322.0, 314.0, 313.0, 317.0, 283.0, 304.0, 285.0, 291.0, 293.0, 291.0, 324.0, 306.0, 288.0, 299.0, 280.0, 296.0, 314.0, 319.0, 291.0, 291.0, 317.0, 319.0, 294.0, 285.0, 296.0, 286.0, 314.0, 322.0, 293.0, 300.0, 311.0, 322.0, 322.0, 317.0, 316.0, 311.0, 289.0, 293.0, 294.0, 293.0, 288.0, 299.0, 324.0, 315.0, 319.0, 317.0, 296.0, 286.0, 309.0, 321.0, 319.0, 314.0, 320.0, 313.0, 319.0, 311.0, 293.0, 289.0, 318.0, 309.0, 317.0, 316.0, 324.0, 315.0, 314.0, 322.0, 319.0, 320.0, 256.0, 265.0, 316.0, 317.0, 314.0, 322.0, 311.0, 319.0, 294.0, 288.0, 281.0, 277.0, 276.0, 300.0, 319.0, 320.0, 314.0, 322.0, 310.0, 314.0, 316.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8679613450602781, "mean_processing_ms": 0.24193358188716718, "mean_inference_ms": 1.4622622611677922}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8664000, "num_steps_sampled": 4620800, "sample_time_ms": 21811.818, "load_time_ms": 37.251, "grad_time_ms": 9734.507, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007378067821264267, "policy_loss": -0.006473819259554148, "vf_loss": 77.80099487304688, "vf_explained_var": 0.7715656161308289, "kl": 0.0017216805135831237, "entropy": 1.1369411945343018, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4620800, "episodes_total": 11552, "training_iteration": 361, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-51-13", "timestamp": 1660258273, "time_this_iter_s": 35.53581404685974, "time_total_s": 16685.073662757874, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16685.073662757874, "timesteps_since_restore": 4620800, "iterations_since_restore": 361, "perf": {"cpu_util_percent": 32.418, "ram_util_percent": 58.85999999999999}}
-{"episode_reward_max": 639.0, "episode_reward_min": 521.0, "episode_reward_mean": 608.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.125}, "custom_metrics": {"sparse_reward_mean": 210.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.05, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.58, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.71, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.21, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.81, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.74, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.21, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.81, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.21, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.81, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [624.0, 587.0, 582.0, 582.0, 579.0, 636.0, 630.0, 567.0, 630.0, 633.0, 636.0, 582.0, 621.0, 630.0, 587.0, 587.0, 639.0, 630.0, 587.0, 587.0, 587.0, 630.0, 636.0, 627.0, 561.0, 627.0, 630.0, 636.0, 582.0, 582.0, 579.0, 587.0, 582.0, 636.0, 579.0, 582.0, 636.0, 593.0, 633.0, 639.0, 627.0, 582.0, 587.0, 587.0, 639.0, 636.0, 582.0, 630.0, 633.0, 633.0, 630.0, 582.0, 627.0, 633.0, 639.0, 636.0, 639.0, 521.0, 633.0, 636.0, 630.0, 582.0, 558.0, 576.0, 639.0, 636.0, 624.0, 636.0, 579.0, 579.0, 630.0, 633.0, 582.0, 636.0, 590.0, 630.0, 587.0, 636.0, 576.0, 630.0, 630.0, 582.0, 576.0, 630.0, 582.0, 587.0, 633.0, 582.0, 636.0, 630.0, 582.0, 587.0, 630.0, 573.0, 630.0, 630.0, 525.0, 579.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [306.0, 318.0, 296.0, 291.0, 288.0, 294.0, 287.0, 295.0, 288.0, 291.0, 319.0, 317.0, 316.0, 314.0, 281.0, 286.0, 321.0, 309.0, 316.0, 317.0, 319.0, 317.0, 294.0, 288.0, 307.0, 314.0, 308.0, 322.0, 291.0, 296.0, 285.0, 302.0, 314.0, 325.0, 313.0, 317.0, 295.0, 292.0, 293.0, 294.0, 294.0, 293.0, 311.0, 319.0, 321.0, 315.0, 316.0, 311.0, 284.0, 277.0, 316.0, 311.0, 311.0, 319.0, 320.0, 316.0, 294.0, 288.0, 297.0, 285.0, 282.0, 297.0, 304.0, 283.0, 291.0, 291.0, 317.0, 319.0, 294.0, 285.0, 296.0, 286.0, 314.0, 322.0, 293.0, 300.0, 311.0, 322.0, 322.0, 317.0, 316.0, 311.0, 289.0, 293.0, 294.0, 293.0, 288.0, 299.0, 324.0, 315.0, 319.0, 317.0, 296.0, 286.0, 309.0, 321.0, 319.0, 314.0, 320.0, 313.0, 319.0, 311.0, 293.0, 289.0, 318.0, 309.0, 317.0, 316.0, 324.0, 315.0, 314.0, 322.0, 319.0, 320.0, 256.0, 265.0, 316.0, 317.0, 314.0, 322.0, 311.0, 319.0, 294.0, 288.0, 281.0, 277.0, 276.0, 300.0, 319.0, 320.0, 314.0, 322.0, 310.0, 314.0, 316.0, 320.0, 288.0, 291.0, 282.0, 297.0, 319.0, 311.0, 321.0, 312.0, 286.0, 296.0, 314.0, 322.0, 296.0, 294.0, 311.0, 319.0, 298.0, 289.0, 322.0, 314.0, 285.0, 291.0, 313.0, 317.0, 316.0, 314.0, 291.0, 291.0, 298.0, 278.0, 308.0, 322.0, 293.0, 289.0, 293.0, 294.0, 316.0, 317.0, 291.0, 291.0, 319.0, 317.0, 321.0, 309.0, 290.0, 292.0, 296.0, 291.0, 316.0, 314.0, 287.0, 286.0, 313.0, 317.0, 318.0, 312.0, 265.0, 260.0, 287.0, 292.0, 314.0, 313.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8668385855769685, "mean_processing_ms": 0.2417133493697533, "mean_inference_ms": 1.4618269528034153}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8688000, "num_steps_sampled": 4633600, "sample_time_ms": 22112.322, "load_time_ms": 37.291, "grad_time_ms": 9542.038, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006689311121590436, "policy_loss": -0.006603518966585398, "vf_loss": 78.373291015625, "vf_explained_var": 0.7716686129570007, "kl": 0.0016899490728974342, "entropy": 1.1297602653503418, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4633600, "episodes_total": 11584, "training_iteration": 362, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-51-49", "timestamp": 1660258309, "time_this_iter_s": 35.281026124954224, "time_total_s": 16720.354688882828, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16720.354688882828, "timesteps_since_restore": 4633600, "iterations_since_restore": 362, "perf": {"cpu_util_percent": 29.87, "ram_util_percent": 58.83999999999998}}
-{"episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 605.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 302.605}, "custom_metrics": {"sparse_reward_mean": 209.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.01, "shaped_reward_min": 146, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.54, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.47, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.27, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.87, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.91, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.88, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.69, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.84, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.91, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.88, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.91, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.88, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 582.0, 579.0, 633.0, 579.0, 582.0, 582.0, 582.0, 627.0, 582.0, 584.0, 633.0, 630.0, 587.0, 546.0, 587.0, 576.0, 630.0, 636.0, 630.0, 630.0, 573.0, 587.0, 636.0, 590.0, 579.0, 582.0, 633.0, 639.0, 576.0, 630.0, 639.0, 636.0, 624.0, 636.0, 579.0, 579.0, 630.0, 633.0, 582.0, 636.0, 590.0, 630.0, 587.0, 636.0, 576.0, 630.0, 630.0, 582.0, 576.0, 630.0, 582.0, 587.0, 633.0, 582.0, 636.0, 630.0, 582.0, 587.0, 630.0, 573.0, 630.0, 630.0, 525.0, 579.0, 627.0, 633.0, 624.0, 587.0, 582.0, 582.0, 579.0, 636.0, 630.0, 567.0, 630.0, 633.0, 636.0, 582.0, 621.0, 630.0, 587.0, 587.0, 639.0, 630.0, 587.0, 587.0, 587.0, 630.0, 636.0, 627.0, 561.0, 627.0, 630.0, 636.0, 582.0, 582.0, 579.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 313.0, 317.0, 291.0, 291.0, 282.0, 297.0, 314.0, 319.0, 285.0, 294.0, 291.0, 291.0, 288.0, 294.0, 291.0, 291.0, 313.0, 314.0, 293.0, 289.0, 285.0, 299.0, 316.0, 317.0, 316.0, 314.0, 293.0, 294.0, 273.0, 273.0, 296.0, 291.0, 287.0, 289.0, 316.0, 314.0, 319.0, 317.0, 318.0, 312.0, 316.0, 314.0, 283.0, 290.0, 299.0, 288.0, 321.0, 315.0, 302.0, 288.0, 286.0, 293.0, 288.0, 294.0, 314.0, 319.0, 316.0, 323.0, 288.0, 288.0, 321.0, 309.0, 319.0, 320.0, 314.0, 322.0, 310.0, 314.0, 316.0, 320.0, 288.0, 291.0, 282.0, 297.0, 319.0, 311.0, 321.0, 312.0, 286.0, 296.0, 314.0, 322.0, 296.0, 294.0, 311.0, 319.0, 298.0, 289.0, 322.0, 314.0, 285.0, 291.0, 313.0, 317.0, 316.0, 314.0, 291.0, 291.0, 298.0, 278.0, 308.0, 322.0, 293.0, 289.0, 293.0, 294.0, 316.0, 317.0, 291.0, 291.0, 319.0, 317.0, 321.0, 309.0, 290.0, 292.0, 296.0, 291.0, 316.0, 314.0, 287.0, 286.0, 313.0, 317.0, 318.0, 312.0, 265.0, 260.0, 287.0, 292.0, 314.0, 313.0, 316.0, 317.0, 306.0, 318.0, 296.0, 291.0, 288.0, 294.0, 287.0, 295.0, 288.0, 291.0, 319.0, 317.0, 316.0, 314.0, 281.0, 286.0, 321.0, 309.0, 316.0, 317.0, 319.0, 317.0, 294.0, 288.0, 307.0, 314.0, 308.0, 322.0, 291.0, 296.0, 285.0, 302.0, 314.0, 325.0, 313.0, 317.0, 295.0, 292.0, 293.0, 294.0, 294.0, 293.0, 311.0, 319.0, 321.0, 315.0, 316.0, 311.0, 284.0, 277.0, 316.0, 311.0, 311.0, 319.0, 320.0, 316.0, 294.0, 288.0, 297.0, 285.0, 282.0, 297.0, 304.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8657202120154665, "mean_processing_ms": 0.2414933019778603, "mean_inference_ms": 1.4613549029197088}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8712000, "num_steps_sampled": 4646400, "sample_time_ms": 22230.157, "load_time_ms": 37.429, "grad_time_ms": 9536.697, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00019748850900214165, "policy_loss": -0.00744326738640666, "vf_loss": 82.0882568359375, "vf_explained_var": 0.7681159377098083, "kl": 0.0019463537028059363, "entropy": 1.1361408233642578, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4646400, "episodes_total": 11616, "training_iteration": 363, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-52-22", "timestamp": 1660258342, "time_this_iter_s": 33.05159020423889, "time_total_s": 16753.406279087067, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16753.406279087067, "timesteps_since_restore": 4646400, "iterations_since_restore": 363, "perf": {"cpu_util_percent": 32.35531914893617, "ram_util_percent": 58.848936170212795}}
-{"episode_reward_max": 639.0, "episode_reward_min": 353.0, "episode_reward_mean": 605.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 302.71}, "custom_metrics": {"sparse_reward_mean": 209.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 186.22, "shaped_reward_min": 113, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.62, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.37, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.68, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.82, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 16, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.85, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 11, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.82, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.82, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 587.0, 582.0, 636.0, 630.0, 636.0, 636.0, 579.0, 630.0, 639.0, 579.0, 636.0, 353.0, 639.0, 587.0, 630.0, 633.0, 630.0, 587.0, 582.0, 636.0, 633.0, 639.0, 636.0, 582.0, 636.0, 639.0, 630.0, 621.0, 582.0, 630.0, 636.0, 525.0, 579.0, 627.0, 633.0, 624.0, 587.0, 582.0, 582.0, 579.0, 636.0, 630.0, 567.0, 630.0, 633.0, 636.0, 582.0, 621.0, 630.0, 587.0, 587.0, 639.0, 630.0, 587.0, 587.0, 587.0, 630.0, 636.0, 627.0, 561.0, 627.0, 630.0, 636.0, 582.0, 582.0, 579.0, 587.0, 582.0, 630.0, 582.0, 579.0, 633.0, 579.0, 582.0, 582.0, 582.0, 627.0, 582.0, 584.0, 633.0, 630.0, 587.0, 546.0, 587.0, 576.0, 630.0, 636.0, 630.0, 630.0, 573.0, 587.0, 636.0, 590.0, 579.0, 582.0, 633.0, 639.0, 576.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [324.0, 309.0, 283.0, 304.0, 293.0, 289.0, 317.0, 319.0, 311.0, 319.0, 322.0, 314.0, 325.0, 311.0, 290.0, 289.0, 308.0, 322.0, 314.0, 325.0, 288.0, 291.0, 322.0, 314.0, 175.0, 178.0, 327.0, 312.0, 290.0, 297.0, 313.0, 317.0, 316.0, 317.0, 311.0, 319.0, 298.0, 289.0, 286.0, 296.0, 316.0, 320.0, 319.0, 314.0, 312.0, 327.0, 313.0, 323.0, 280.0, 302.0, 319.0, 317.0, 309.0, 330.0, 318.0, 312.0, 302.0, 319.0, 294.0, 288.0, 309.0, 321.0, 319.0, 317.0, 265.0, 260.0, 287.0, 292.0, 314.0, 313.0, 316.0, 317.0, 306.0, 318.0, 296.0, 291.0, 288.0, 294.0, 287.0, 295.0, 288.0, 291.0, 319.0, 317.0, 316.0, 314.0, 281.0, 286.0, 321.0, 309.0, 316.0, 317.0, 319.0, 317.0, 294.0, 288.0, 307.0, 314.0, 308.0, 322.0, 291.0, 296.0, 285.0, 302.0, 314.0, 325.0, 313.0, 317.0, 295.0, 292.0, 293.0, 294.0, 294.0, 293.0, 311.0, 319.0, 321.0, 315.0, 316.0, 311.0, 284.0, 277.0, 316.0, 311.0, 311.0, 319.0, 320.0, 316.0, 294.0, 288.0, 297.0, 285.0, 282.0, 297.0, 304.0, 283.0, 296.0, 286.0, 313.0, 317.0, 291.0, 291.0, 282.0, 297.0, 314.0, 319.0, 285.0, 294.0, 291.0, 291.0, 288.0, 294.0, 291.0, 291.0, 313.0, 314.0, 293.0, 289.0, 285.0, 299.0, 316.0, 317.0, 316.0, 314.0, 293.0, 294.0, 273.0, 273.0, 296.0, 291.0, 287.0, 289.0, 316.0, 314.0, 319.0, 317.0, 318.0, 312.0, 316.0, 314.0, 283.0, 290.0, 299.0, 288.0, 321.0, 315.0, 302.0, 288.0, 286.0, 293.0, 288.0, 294.0, 314.0, 319.0, 316.0, 323.0, 288.0, 288.0, 321.0, 309.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8646062243279308, "mean_processing_ms": 0.24127430142106637, "mean_inference_ms": 1.4608685672633468}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8736000, "num_steps_sampled": 4659200, "sample_time_ms": 22504.157, "load_time_ms": 37.127, "grad_time_ms": 9578.876, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00030447044991888106, "policy_loss": -0.007189334835857153, "vf_loss": 80.5628433227539, "vf_explained_var": 0.7757663130760193, "kl": 0.002137100091204047, "entropy": 1.1249442100524902, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4659200, "episodes_total": 11648, "training_iteration": 364, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-52-57", "timestamp": 1660258377, "time_this_iter_s": 35.15313506126404, "time_total_s": 16788.55941414833, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16788.55941414833, "timesteps_since_restore": 4659200, "iterations_since_restore": 364, "perf": {"cpu_util_percent": 30.822448979591837, "ram_util_percent": 58.930612244897965}}
-{"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 600.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 138.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 300.455}, "custom_metrics": {"sparse_reward_mean": 208.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 184.91, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.5, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.13, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.16, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.5, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.54, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.53, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 16, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.82, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 11, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.54, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.54, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 582.0, 579.0, 633.0, 579.0, 582.0, 627.0, 582.0, 636.0, 590.0, 294.0, 584.0, 630.0, 621.0, 576.0, 630.0, 582.0, 582.0, 584.0, 582.0, 627.0, 579.0, 576.0, 633.0, 590.0, 630.0, 630.0, 579.0, 627.0, 579.0, 630.0, 621.0, 582.0, 582.0, 579.0, 587.0, 582.0, 630.0, 582.0, 579.0, 633.0, 579.0, 582.0, 582.0, 582.0, 627.0, 582.0, 584.0, 633.0, 630.0, 587.0, 546.0, 587.0, 576.0, 630.0, 636.0, 630.0, 630.0, 573.0, 587.0, 636.0, 590.0, 579.0, 582.0, 633.0, 639.0, 576.0, 630.0, 633.0, 587.0, 582.0, 636.0, 630.0, 636.0, 636.0, 579.0, 630.0, 639.0, 579.0, 636.0, 353.0, 639.0, 587.0, 630.0, 633.0, 630.0, 587.0, 582.0, 636.0, 633.0, 639.0, 636.0, 582.0, 636.0, 639.0, 630.0, 621.0, 582.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [308.0, 319.0, 291.0, 291.0, 288.0, 291.0, 319.0, 314.0, 285.0, 294.0, 285.0, 297.0, 319.0, 308.0, 292.0, 290.0, 319.0, 317.0, 291.0, 299.0, 138.0, 156.0, 291.0, 293.0, 311.0, 319.0, 313.0, 308.0, 294.0, 282.0, 316.0, 314.0, 291.0, 291.0, 299.0, 283.0, 293.0, 291.0, 298.0, 284.0, 317.0, 310.0, 296.0, 283.0, 286.0, 290.0, 316.0, 317.0, 296.0, 294.0, 314.0, 316.0, 319.0, 311.0, 291.0, 288.0, 313.0, 314.0, 285.0, 294.0, 311.0, 319.0, 304.0, 317.0, 294.0, 288.0, 297.0, 285.0, 282.0, 297.0, 304.0, 283.0, 296.0, 286.0, 313.0, 317.0, 291.0, 291.0, 282.0, 297.0, 314.0, 319.0, 285.0, 294.0, 291.0, 291.0, 288.0, 294.0, 291.0, 291.0, 313.0, 314.0, 293.0, 289.0, 285.0, 299.0, 316.0, 317.0, 316.0, 314.0, 293.0, 294.0, 273.0, 273.0, 296.0, 291.0, 287.0, 289.0, 316.0, 314.0, 319.0, 317.0, 318.0, 312.0, 316.0, 314.0, 283.0, 290.0, 299.0, 288.0, 321.0, 315.0, 302.0, 288.0, 286.0, 293.0, 288.0, 294.0, 314.0, 319.0, 316.0, 323.0, 288.0, 288.0, 321.0, 309.0, 324.0, 309.0, 283.0, 304.0, 293.0, 289.0, 317.0, 319.0, 311.0, 319.0, 322.0, 314.0, 325.0, 311.0, 290.0, 289.0, 308.0, 322.0, 314.0, 325.0, 288.0, 291.0, 322.0, 314.0, 175.0, 178.0, 327.0, 312.0, 290.0, 297.0, 313.0, 317.0, 316.0, 317.0, 311.0, 319.0, 298.0, 289.0, 286.0, 296.0, 316.0, 320.0, 319.0, 314.0, 312.0, 327.0, 313.0, 323.0, 280.0, 302.0, 319.0, 317.0, 309.0, 330.0, 318.0, 312.0, 302.0, 319.0, 294.0, 288.0, 309.0, 321.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8635010378078678, "mean_processing_ms": 0.24105756914616358, "mean_inference_ms": 1.4603867065067317}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8760000, "num_steps_sampled": 4672000, "sample_time_ms": 23125.247, "load_time_ms": 37.852, "grad_time_ms": 9835.319, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003108972916379571, "policy_loss": -0.004532767925411463, "vf_loss": 82.0846176147461, "vf_explained_var": 0.7727766036987305, "kl": 0.00231738924048841, "entropy": 1.1334295272827148, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4672000, "episodes_total": 11680, "training_iteration": 365, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-53-36", "timestamp": 1660258416, "time_this_iter_s": 39.16720676422119, "time_total_s": 16827.726620912552, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16827.726620912552, "timesteps_since_restore": 4672000, "iterations_since_restore": 365, "perf": {"cpu_util_percent": 31.412499999999998, "ram_util_percent": 59.01071428571428}}
-{"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 599.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 138.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 299.95}, "custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 184.3, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.52, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.92, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.17, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.32, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.46, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.29, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 16, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 11, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.4, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.37, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.46, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.46, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 582.0, 582.0, 639.0, 579.0, 630.0, 576.0, 630.0, 402.0, 639.0, 633.0, 627.0, 587.0, 630.0, 465.0, 582.0, 581.0, 576.0, 582.0, 561.0, 633.0, 465.0, 633.0, 633.0, 633.0, 582.0, 627.0, 636.0, 630.0, 582.0, 582.0, 633.0, 639.0, 576.0, 630.0, 633.0, 587.0, 582.0, 636.0, 630.0, 636.0, 636.0, 579.0, 630.0, 639.0, 579.0, 636.0, 353.0, 639.0, 587.0, 630.0, 633.0, 630.0, 587.0, 582.0, 636.0, 633.0, 639.0, 636.0, 582.0, 636.0, 639.0, 630.0, 621.0, 582.0, 630.0, 636.0, 627.0, 582.0, 579.0, 633.0, 579.0, 582.0, 627.0, 582.0, 636.0, 590.0, 294.0, 584.0, 630.0, 621.0, 576.0, 630.0, 582.0, 582.0, 584.0, 582.0, 627.0, 579.0, 576.0, 633.0, 590.0, 630.0, 630.0, 579.0, 627.0, 579.0, 630.0, 621.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 319.0, 317.0, 291.0, 291.0, 291.0, 291.0, 319.0, 320.0, 296.0, 283.0, 316.0, 314.0, 287.0, 289.0, 315.0, 315.0, 198.0, 204.0, 324.0, 315.0, 316.0, 317.0, 311.0, 316.0, 288.0, 299.0, 314.0, 316.0, 231.0, 234.0, 291.0, 291.0, 279.0, 302.0, 296.0, 280.0, 288.0, 294.0, 288.0, 273.0, 321.0, 312.0, 231.0, 234.0, 316.0, 317.0, 313.0, 320.0, 313.0, 320.0, 288.0, 294.0, 308.0, 319.0, 320.0, 316.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0, 314.0, 319.0, 316.0, 323.0, 288.0, 288.0, 321.0, 309.0, 324.0, 309.0, 283.0, 304.0, 293.0, 289.0, 317.0, 319.0, 311.0, 319.0, 322.0, 314.0, 325.0, 311.0, 290.0, 289.0, 308.0, 322.0, 314.0, 325.0, 288.0, 291.0, 322.0, 314.0, 175.0, 178.0, 327.0, 312.0, 290.0, 297.0, 313.0, 317.0, 316.0, 317.0, 311.0, 319.0, 298.0, 289.0, 286.0, 296.0, 316.0, 320.0, 319.0, 314.0, 312.0, 327.0, 313.0, 323.0, 280.0, 302.0, 319.0, 317.0, 309.0, 330.0, 318.0, 312.0, 302.0, 319.0, 294.0, 288.0, 309.0, 321.0, 319.0, 317.0, 308.0, 319.0, 291.0, 291.0, 288.0, 291.0, 319.0, 314.0, 285.0, 294.0, 285.0, 297.0, 319.0, 308.0, 292.0, 290.0, 319.0, 317.0, 291.0, 299.0, 138.0, 156.0, 291.0, 293.0, 311.0, 319.0, 313.0, 308.0, 294.0, 282.0, 316.0, 314.0, 291.0, 291.0, 299.0, 283.0, 293.0, 291.0, 298.0, 284.0, 317.0, 310.0, 296.0, 283.0, 286.0, 290.0, 316.0, 317.0, 296.0, 294.0, 314.0, 316.0, 319.0, 311.0, 291.0, 288.0, 313.0, 314.0, 285.0, 294.0, 311.0, 319.0, 304.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8623994673017462, "mean_processing_ms": 0.24084175116520762, "mean_inference_ms": 1.4598463496186935}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8784000, "num_steps_sampled": 4684800, "sample_time_ms": 23187.668, "load_time_ms": 38.066, "grad_time_ms": 10176.48, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0020015325862914324, "policy_loss": -0.005890776868909597, "vf_loss": 84.59882354736328, "vf_explained_var": 0.7721861004829407, "kl": 0.002045721048489213, "entropy": 1.1351399421691895, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4684800, "episodes_total": 11712, "training_iteration": 366, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-54-11", "timestamp": 1660258451, "time_this_iter_s": 34.82252907752991, "time_total_s": 16862.54914999008, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16862.54914999008, "timesteps_since_restore": 4684800, "iterations_since_restore": 366, "perf": {"cpu_util_percent": 28.122448979591837, "ram_util_percent": 58.88775510204081}}
-{"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 598.83, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 138.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 299.415}, "custom_metrics": {"sparse_reward_mean": 207.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 183.63, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.72, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.37, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.1, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.24, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.36, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.24, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.24, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 633.0, 636.0, 627.0, 630.0, 627.0, 630.0, 627.0, 587.0, 630.0, 633.0, 630.0, 582.0, 582.0, 587.0, 582.0, 627.0, 627.0, 587.0, 576.0, 633.0, 573.0, 636.0, 630.0, 587.0, 582.0, 636.0, 587.0, 573.0, 627.0, 636.0, 587.0, 621.0, 582.0, 630.0, 636.0, 627.0, 582.0, 579.0, 633.0, 579.0, 582.0, 627.0, 582.0, 636.0, 590.0, 294.0, 584.0, 630.0, 621.0, 576.0, 630.0, 582.0, 582.0, 584.0, 582.0, 627.0, 579.0, 576.0, 633.0, 590.0, 630.0, 630.0, 579.0, 627.0, 579.0, 630.0, 621.0, 630.0, 636.0, 582.0, 582.0, 639.0, 579.0, 630.0, 576.0, 630.0, 402.0, 639.0, 633.0, 627.0, 587.0, 630.0, 465.0, 582.0, 581.0, 576.0, 582.0, 561.0, 633.0, 465.0, 633.0, 633.0, 633.0, 582.0, 627.0, 636.0, 630.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 254.0, 314.0, 319.0, 322.0, 314.0, 311.0, 316.0, 312.0, 318.0, 324.0, 303.0, 321.0, 309.0, 313.0, 314.0, 288.0, 299.0, 311.0, 319.0, 310.0, 323.0, 306.0, 324.0, 293.0, 289.0, 288.0, 294.0, 282.0, 305.0, 294.0, 288.0, 309.0, 318.0, 313.0, 314.0, 291.0, 296.0, 295.0, 281.0, 316.0, 317.0, 287.0, 286.0, 318.0, 318.0, 311.0, 319.0, 288.0, 299.0, 294.0, 288.0, 319.0, 317.0, 296.0, 291.0, 277.0, 296.0, 311.0, 316.0, 319.0, 317.0, 288.0, 299.0, 302.0, 319.0, 294.0, 288.0, 309.0, 321.0, 319.0, 317.0, 308.0, 319.0, 291.0, 291.0, 288.0, 291.0, 319.0, 314.0, 285.0, 294.0, 285.0, 297.0, 319.0, 308.0, 292.0, 290.0, 319.0, 317.0, 291.0, 299.0, 138.0, 156.0, 291.0, 293.0, 311.0, 319.0, 313.0, 308.0, 294.0, 282.0, 316.0, 314.0, 291.0, 291.0, 299.0, 283.0, 293.0, 291.0, 298.0, 284.0, 317.0, 310.0, 296.0, 283.0, 286.0, 290.0, 316.0, 317.0, 296.0, 294.0, 314.0, 316.0, 319.0, 311.0, 291.0, 288.0, 313.0, 314.0, 285.0, 294.0, 311.0, 319.0, 304.0, 317.0, 311.0, 319.0, 319.0, 317.0, 291.0, 291.0, 291.0, 291.0, 319.0, 320.0, 296.0, 283.0, 316.0, 314.0, 287.0, 289.0, 315.0, 315.0, 198.0, 204.0, 324.0, 315.0, 316.0, 317.0, 311.0, 316.0, 288.0, 299.0, 314.0, 316.0, 231.0, 234.0, 291.0, 291.0, 279.0, 302.0, 296.0, 280.0, 288.0, 294.0, 288.0, 273.0, 321.0, 312.0, 231.0, 234.0, 316.0, 317.0, 313.0, 320.0, 313.0, 320.0, 288.0, 294.0, 308.0, 319.0, 320.0, 316.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8612908936403926, "mean_processing_ms": 0.24062264484082838, "mean_inference_ms": 1.4590268461349842}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8808000, "num_steps_sampled": 4697600, "sample_time_ms": 23071.295, "load_time_ms": 37.913, "grad_time_ms": 10615.56, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0038227650802582502, "policy_loss": -0.003672233084216714, "vf_loss": 80.5904312133789, "vf_explained_var": 0.7626190185546875, "kl": 0.0024631840642541647, "entropy": 1.128088116645813, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4697600, "episodes_total": 11744, "training_iteration": 367, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-54-44", "timestamp": 1660258484, "time_this_iter_s": 33.39541292190552, "time_total_s": 16895.944562911987, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16895.944562911987, "timesteps_since_restore": 4697600, "iterations_since_restore": 367, "perf": {"cpu_util_percent": 29.602127659574467, "ram_util_percent": 58.99148936170216}}
-{"episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 603.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 301.92}, "custom_metrics": {"sparse_reward_mean": 209.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.44, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.7, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.99, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.28, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.07, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.57, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.07, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.57, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.07, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.57, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 636.0, 639.0, 579.0, 639.0, 636.0, 525.0, 581.0, 639.0, 573.0, 582.0, 587.0, 630.0, 636.0, 639.0, 633.0, 587.0, 582.0, 636.0, 636.0, 582.0, 636.0, 633.0, 627.0, 587.0, 516.0, 630.0, 633.0, 630.0, 639.0, 573.0, 582.0, 627.0, 579.0, 630.0, 621.0, 630.0, 636.0, 582.0, 582.0, 639.0, 579.0, 630.0, 576.0, 630.0, 402.0, 639.0, 633.0, 627.0, 587.0, 630.0, 465.0, 582.0, 581.0, 576.0, 582.0, 561.0, 633.0, 465.0, 633.0, 633.0, 633.0, 582.0, 627.0, 636.0, 630.0, 582.0, 582.0, 519.0, 633.0, 636.0, 627.0, 630.0, 627.0, 630.0, 627.0, 587.0, 630.0, 633.0, 630.0, 582.0, 582.0, 587.0, 582.0, 627.0, 627.0, 587.0, 576.0, 633.0, 573.0, 636.0, 630.0, 587.0, 582.0, 636.0, 587.0, 573.0, 627.0, 636.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [324.0, 309.0, 316.0, 320.0, 319.0, 320.0, 286.0, 293.0, 319.0, 320.0, 322.0, 314.0, 260.0, 265.0, 288.0, 293.0, 322.0, 317.0, 285.0, 288.0, 293.0, 289.0, 282.0, 305.0, 306.0, 324.0, 314.0, 322.0, 322.0, 317.0, 323.0, 310.0, 293.0, 294.0, 290.0, 292.0, 319.0, 317.0, 319.0, 317.0, 288.0, 294.0, 314.0, 322.0, 316.0, 317.0, 313.0, 314.0, 290.0, 297.0, 259.0, 257.0, 316.0, 314.0, 307.0, 326.0, 321.0, 309.0, 321.0, 318.0, 277.0, 296.0, 285.0, 297.0, 313.0, 314.0, 285.0, 294.0, 311.0, 319.0, 304.0, 317.0, 311.0, 319.0, 319.0, 317.0, 291.0, 291.0, 291.0, 291.0, 319.0, 320.0, 296.0, 283.0, 316.0, 314.0, 287.0, 289.0, 315.0, 315.0, 198.0, 204.0, 324.0, 315.0, 316.0, 317.0, 311.0, 316.0, 288.0, 299.0, 314.0, 316.0, 231.0, 234.0, 291.0, 291.0, 279.0, 302.0, 296.0, 280.0, 288.0, 294.0, 288.0, 273.0, 321.0, 312.0, 231.0, 234.0, 316.0, 317.0, 313.0, 320.0, 313.0, 320.0, 288.0, 294.0, 308.0, 319.0, 320.0, 316.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0, 265.0, 254.0, 314.0, 319.0, 322.0, 314.0, 311.0, 316.0, 312.0, 318.0, 324.0, 303.0, 321.0, 309.0, 313.0, 314.0, 288.0, 299.0, 311.0, 319.0, 310.0, 323.0, 306.0, 324.0, 293.0, 289.0, 288.0, 294.0, 282.0, 305.0, 294.0, 288.0, 309.0, 318.0, 313.0, 314.0, 291.0, 296.0, 295.0, 281.0, 316.0, 317.0, 287.0, 286.0, 318.0, 318.0, 311.0, 319.0, 288.0, 299.0, 294.0, 288.0, 319.0, 317.0, 296.0, 291.0, 277.0, 296.0, 311.0, 316.0, 319.0, 317.0, 288.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8601760204307758, "mean_processing_ms": 0.2404010276903208, "mean_inference_ms": 1.4579011173262801}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8832000, "num_steps_sampled": 4710400, "sample_time_ms": 23247.48, "load_time_ms": 37.926, "grad_time_ms": 10791.679, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004444511607289314, "policy_loss": -0.0034118040930479765, "vf_loss": 84.17324829101562, "vf_explained_var": 0.7645478844642639, "kl": 0.0020590554922819138, "entropy": 1.1220086812973022, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4710400, "episodes_total": 11776, "training_iteration": 368, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-55-17", "timestamp": 1660258517, "time_this_iter_s": 32.34189581871033, "time_total_s": 16928.286458730698, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16928.286458730698, "timesteps_since_restore": 4710400, "iterations_since_restore": 368, "perf": {"cpu_util_percent": 31.186956521739134, "ram_util_percent": 58.830434782608684}}
-{"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 607.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 303.555}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.71, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.11, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.33, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.13, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.13, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.13, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 579.0, 584.0, 636.0, 627.0, 624.0, 636.0, 587.0, 582.0, 636.0, 581.0, 587.0, 630.0, 636.0, 633.0, 630.0, 630.0, 636.0, 630.0, 584.0, 516.0, 587.0, 630.0, 513.0, 587.0, 630.0, 633.0, 627.0, 633.0, 582.0, 519.0, 627.0, 636.0, 630.0, 582.0, 582.0, 519.0, 633.0, 636.0, 627.0, 630.0, 627.0, 630.0, 627.0, 587.0, 630.0, 633.0, 630.0, 582.0, 582.0, 587.0, 582.0, 627.0, 627.0, 587.0, 576.0, 633.0, 573.0, 636.0, 630.0, 587.0, 582.0, 636.0, 587.0, 573.0, 627.0, 636.0, 587.0, 633.0, 636.0, 639.0, 579.0, 639.0, 636.0, 525.0, 581.0, 639.0, 573.0, 582.0, 587.0, 630.0, 636.0, 639.0, 633.0, 587.0, 582.0, 636.0, 636.0, 582.0, 636.0, 633.0, 627.0, 587.0, 516.0, 630.0, 633.0, 630.0, 639.0, 573.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [298.0, 289.0, 285.0, 294.0, 288.0, 296.0, 316.0, 320.0, 316.0, 311.0, 308.0, 316.0, 316.0, 320.0, 294.0, 293.0, 301.0, 281.0, 314.0, 322.0, 297.0, 284.0, 299.0, 288.0, 313.0, 317.0, 314.0, 322.0, 316.0, 317.0, 324.0, 306.0, 318.0, 312.0, 319.0, 317.0, 319.0, 311.0, 282.0, 302.0, 256.0, 260.0, 288.0, 299.0, 316.0, 314.0, 255.0, 258.0, 293.0, 294.0, 316.0, 314.0, 318.0, 315.0, 311.0, 316.0, 321.0, 312.0, 288.0, 294.0, 268.0, 251.0, 311.0, 316.0, 320.0, 316.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0, 265.0, 254.0, 314.0, 319.0, 322.0, 314.0, 311.0, 316.0, 312.0, 318.0, 324.0, 303.0, 321.0, 309.0, 313.0, 314.0, 288.0, 299.0, 311.0, 319.0, 310.0, 323.0, 306.0, 324.0, 293.0, 289.0, 288.0, 294.0, 282.0, 305.0, 294.0, 288.0, 309.0, 318.0, 313.0, 314.0, 291.0, 296.0, 295.0, 281.0, 316.0, 317.0, 287.0, 286.0, 318.0, 318.0, 311.0, 319.0, 288.0, 299.0, 294.0, 288.0, 319.0, 317.0, 296.0, 291.0, 277.0, 296.0, 311.0, 316.0, 319.0, 317.0, 288.0, 299.0, 324.0, 309.0, 316.0, 320.0, 319.0, 320.0, 286.0, 293.0, 319.0, 320.0, 322.0, 314.0, 260.0, 265.0, 288.0, 293.0, 322.0, 317.0, 285.0, 288.0, 293.0, 289.0, 282.0, 305.0, 306.0, 324.0, 314.0, 322.0, 322.0, 317.0, 323.0, 310.0, 293.0, 294.0, 290.0, 292.0, 319.0, 317.0, 319.0, 317.0, 288.0, 294.0, 314.0, 322.0, 316.0, 317.0, 313.0, 314.0, 290.0, 297.0, 259.0, 257.0, 316.0, 314.0, 307.0, 326.0, 321.0, 309.0, 321.0, 318.0, 277.0, 296.0, 285.0, 297.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8590666727700321, "mean_processing_ms": 0.24018037596280067, "mean_inference_ms": 1.4567772377738835}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8856000, "num_steps_sampled": 4723200, "sample_time_ms": 23393.671, "load_time_ms": 38.324, "grad_time_ms": 11132.048, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011790187563747168, "policy_loss": -0.008629883639514446, "vf_loss": 80.15734100341797, "vf_explained_var": 0.7653172016143799, "kl": 0.001749455346725881, "entropy": 1.129709243774414, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4723200, "episodes_total": 11808, "training_iteration": 369, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-55-52", "timestamp": 1660258552, "time_this_iter_s": 34.8981990814209, "time_total_s": 16963.18465781212, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16963.18465781212, "timesteps_since_restore": 4723200, "iterations_since_restore": 369, "perf": {"cpu_util_percent": 32.48979591836735, "ram_util_percent": 58.86734693877551}}
-{"episode_reward_max": 639.0, "episode_reward_min": 453.0, "episode_reward_mean": 604.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 302.025}, "custom_metrics": {"sparse_reward_mean": 209.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.65, "shaped_reward_min": 133, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.52, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.13, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.16, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 636.0, 630.0, 584.0, 636.0, 465.0, 627.0, 633.0, 624.0, 633.0, 465.0, 582.0, 587.0, 630.0, 627.0, 639.0, 630.0, 633.0, 453.0, 639.0, 630.0, 630.0, 627.0, 636.0, 579.0, 587.0, 636.0, 510.0, 587.0, 624.0, 630.0, 539.0, 573.0, 627.0, 636.0, 587.0, 633.0, 636.0, 639.0, 579.0, 639.0, 636.0, 525.0, 581.0, 639.0, 573.0, 582.0, 587.0, 630.0, 636.0, 639.0, 633.0, 587.0, 582.0, 636.0, 636.0, 582.0, 636.0, 633.0, 627.0, 587.0, 516.0, 630.0, 633.0, 630.0, 639.0, 573.0, 582.0, 587.0, 579.0, 584.0, 636.0, 627.0, 624.0, 636.0, 587.0, 582.0, 636.0, 581.0, 587.0, 630.0, 636.0, 633.0, 630.0, 630.0, 636.0, 630.0, 584.0, 516.0, 587.0, 630.0, 513.0, 587.0, 630.0, 633.0, 627.0, 633.0, 582.0, 519.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 322.0, 314.0, 316.0, 314.0, 298.0, 286.0, 313.0, 323.0, 222.0, 243.0, 318.0, 309.0, 321.0, 312.0, 310.0, 314.0, 319.0, 314.0, 220.0, 245.0, 297.0, 285.0, 294.0, 293.0, 318.0, 312.0, 311.0, 316.0, 317.0, 322.0, 316.0, 314.0, 319.0, 314.0, 222.0, 231.0, 319.0, 320.0, 322.0, 308.0, 322.0, 308.0, 311.0, 316.0, 319.0, 317.0, 294.0, 285.0, 283.0, 304.0, 324.0, 312.0, 259.0, 251.0, 301.0, 286.0, 305.0, 319.0, 313.0, 317.0, 271.0, 268.0, 277.0, 296.0, 311.0, 316.0, 319.0, 317.0, 288.0, 299.0, 324.0, 309.0, 316.0, 320.0, 319.0, 320.0, 286.0, 293.0, 319.0, 320.0, 322.0, 314.0, 260.0, 265.0, 288.0, 293.0, 322.0, 317.0, 285.0, 288.0, 293.0, 289.0, 282.0, 305.0, 306.0, 324.0, 314.0, 322.0, 322.0, 317.0, 323.0, 310.0, 293.0, 294.0, 290.0, 292.0, 319.0, 317.0, 319.0, 317.0, 288.0, 294.0, 314.0, 322.0, 316.0, 317.0, 313.0, 314.0, 290.0, 297.0, 259.0, 257.0, 316.0, 314.0, 307.0, 326.0, 321.0, 309.0, 321.0, 318.0, 277.0, 296.0, 285.0, 297.0, 298.0, 289.0, 285.0, 294.0, 288.0, 296.0, 316.0, 320.0, 316.0, 311.0, 308.0, 316.0, 316.0, 320.0, 294.0, 293.0, 301.0, 281.0, 314.0, 322.0, 297.0, 284.0, 299.0, 288.0, 313.0, 317.0, 314.0, 322.0, 316.0, 317.0, 324.0, 306.0, 318.0, 312.0, 319.0, 317.0, 319.0, 311.0, 282.0, 302.0, 256.0, 260.0, 288.0, 299.0, 316.0, 314.0, 255.0, 258.0, 293.0, 294.0, 316.0, 314.0, 318.0, 315.0, 311.0, 316.0, 321.0, 312.0, 288.0, 294.0, 268.0, 251.0, 311.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8579708589489354, "mean_processing_ms": 0.23996389125057788, "mean_inference_ms": 1.455828460743175}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8880000, "num_steps_sampled": 4736000, "sample_time_ms": 23349.429, "load_time_ms": 38.506, "grad_time_ms": 11333.701, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00012880750000476837, "policy_loss": -0.007648926693946123, "vf_loss": 83.42855072021484, "vf_explained_var": 0.7715353965759277, "kl": 0.0017624356551095843, "entropy": 1.1302567720413208, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4736000, "episodes_total": 11840, "training_iteration": 370, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-56-26", "timestamp": 1660258586, "time_this_iter_s": 33.842254877090454, "time_total_s": 16997.02691268921, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16997.02691268921, "timesteps_since_restore": 4736000, "iterations_since_restore": 370, "perf": {"cpu_util_percent": 32.75416666666667, "ram_util_percent": 58.89374999999999}}
-{"episode_reward_max": 639.0, "episode_reward_min": 453.0, "episode_reward_mean": 602.85, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 301.425}, "custom_metrics": {"sparse_reward_mean": 209.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.85, "shaped_reward_min": 133, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.57, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.99, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.99, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.5, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.99, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.5, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.99, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.5, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 579.0, 633.0, 630.0, 582.0, 582.0, 630.0, 627.0, 633.0, 630.0, 627.0, 579.0, 633.0, 587.0, 579.0, 582.0, 576.0, 579.0, 627.0, 627.0, 636.0, 639.0, 630.0, 630.0, 582.0, 618.0, 630.0, 587.0, 473.0, 587.0, 624.0, 587.0, 630.0, 639.0, 573.0, 582.0, 587.0, 579.0, 584.0, 636.0, 627.0, 624.0, 636.0, 587.0, 582.0, 636.0, 581.0, 587.0, 630.0, 636.0, 633.0, 630.0, 630.0, 636.0, 630.0, 584.0, 516.0, 587.0, 630.0, 513.0, 587.0, 630.0, 633.0, 627.0, 633.0, 582.0, 519.0, 627.0, 579.0, 636.0, 630.0, 584.0, 636.0, 465.0, 627.0, 633.0, 624.0, 633.0, 465.0, 582.0, 587.0, 630.0, 627.0, 639.0, 630.0, 633.0, 453.0, 639.0, 630.0, 630.0, 627.0, 636.0, 579.0, 587.0, 636.0, 510.0, 587.0, 624.0, 630.0, 539.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 288.0, 291.0, 317.0, 316.0, 311.0, 319.0, 289.0, 293.0, 290.0, 292.0, 314.0, 316.0, 319.0, 308.0, 311.0, 322.0, 314.0, 316.0, 310.0, 317.0, 302.0, 277.0, 319.0, 314.0, 299.0, 288.0, 293.0, 286.0, 283.0, 299.0, 297.0, 279.0, 291.0, 288.0, 319.0, 308.0, 314.0, 313.0, 313.0, 323.0, 319.0, 320.0, 310.0, 320.0, 319.0, 311.0, 286.0, 296.0, 314.0, 304.0, 311.0, 319.0, 293.0, 294.0, 231.0, 242.0, 291.0, 296.0, 308.0, 316.0, 293.0, 294.0, 321.0, 309.0, 321.0, 318.0, 277.0, 296.0, 285.0, 297.0, 298.0, 289.0, 285.0, 294.0, 288.0, 296.0, 316.0, 320.0, 316.0, 311.0, 308.0, 316.0, 316.0, 320.0, 294.0, 293.0, 301.0, 281.0, 314.0, 322.0, 297.0, 284.0, 299.0, 288.0, 313.0, 317.0, 314.0, 322.0, 316.0, 317.0, 324.0, 306.0, 318.0, 312.0, 319.0, 317.0, 319.0, 311.0, 282.0, 302.0, 256.0, 260.0, 288.0, 299.0, 316.0, 314.0, 255.0, 258.0, 293.0, 294.0, 316.0, 314.0, 318.0, 315.0, 311.0, 316.0, 321.0, 312.0, 288.0, 294.0, 268.0, 251.0, 311.0, 316.0, 283.0, 296.0, 322.0, 314.0, 316.0, 314.0, 298.0, 286.0, 313.0, 323.0, 222.0, 243.0, 318.0, 309.0, 321.0, 312.0, 310.0, 314.0, 319.0, 314.0, 220.0, 245.0, 297.0, 285.0, 294.0, 293.0, 318.0, 312.0, 311.0, 316.0, 317.0, 322.0, 316.0, 314.0, 319.0, 314.0, 222.0, 231.0, 319.0, 320.0, 322.0, 308.0, 322.0, 308.0, 311.0, 316.0, 319.0, 317.0, 294.0, 285.0, 283.0, 304.0, 324.0, 312.0, 259.0, 251.0, 301.0, 286.0, 305.0, 319.0, 313.0, 317.0, 271.0, 268.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8568823921280486, "mean_processing_ms": 0.23974756126791116, "mean_inference_ms": 1.4548800404150943}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8904000, "num_steps_sampled": 4748800, "sample_time_ms": 22963.635, "load_time_ms": 38.114, "grad_time_ms": 11201.711, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006220227223820984, "policy_loss": -0.007045889273285866, "vf_loss": 82.31112670898438, "vf_explained_var": 0.7558401226997375, "kl": 0.002209648722782731, "entropy": 1.1263946294784546, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4748800, "episodes_total": 11872, "training_iteration": 371, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-56-56", "timestamp": 1660258616, "time_this_iter_s": 30.35482382774353, "time_total_s": 17027.381736516953, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17027.381736516953, "timesteps_since_restore": 4748800, "iterations_since_restore": 371, "perf": {"cpu_util_percent": 31.702325581395346, "ram_util_percent": 58.86744186046512}}
-{"episode_reward_max": 639.0, "episode_reward_min": 453.0, "episode_reward_mean": 601.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 300.635}, "custom_metrics": {"sparse_reward_mean": 208.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.47, "shaped_reward_min": 133, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.51, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.03, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.21, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.04, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.47, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.46, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.62, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.04, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.47, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.04, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.47, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 633.0, 579.0, 633.0, 584.0, 633.0, 636.0, 561.0, 573.0, 570.0, 582.0, 627.0, 633.0, 587.0, 636.0, 579.0, 630.0, 627.0, 525.0, 576.0, 636.0, 636.0, 587.0, 587.0, 633.0, 633.0, 636.0, 630.0, 519.0, 587.0, 587.0, 633.0, 582.0, 519.0, 627.0, 579.0, 636.0, 630.0, 584.0, 636.0, 465.0, 627.0, 633.0, 624.0, 633.0, 465.0, 582.0, 587.0, 630.0, 627.0, 639.0, 630.0, 633.0, 453.0, 639.0, 630.0, 630.0, 627.0, 636.0, 579.0, 587.0, 636.0, 510.0, 587.0, 624.0, 630.0, 539.0, 630.0, 579.0, 633.0, 630.0, 582.0, 582.0, 630.0, 627.0, 633.0, 630.0, 627.0, 579.0, 633.0, 587.0, 579.0, 582.0, 576.0, 579.0, 627.0, 627.0, 636.0, 639.0, 630.0, 630.0, 582.0, 618.0, 630.0, 587.0, 473.0, 587.0, 624.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 298.0, 289.0, 311.0, 322.0, 296.0, 283.0, 319.0, 314.0, 295.0, 289.0, 317.0, 316.0, 314.0, 322.0, 281.0, 280.0, 285.0, 288.0, 287.0, 283.0, 290.0, 292.0, 313.0, 314.0, 314.0, 319.0, 298.0, 289.0, 319.0, 317.0, 286.0, 293.0, 311.0, 319.0, 320.0, 307.0, 262.0, 263.0, 285.0, 291.0, 320.0, 316.0, 319.0, 317.0, 288.0, 299.0, 301.0, 286.0, 322.0, 311.0, 317.0, 316.0, 319.0, 317.0, 319.0, 311.0, 264.0, 255.0, 293.0, 294.0, 293.0, 294.0, 321.0, 312.0, 288.0, 294.0, 268.0, 251.0, 311.0, 316.0, 283.0, 296.0, 322.0, 314.0, 316.0, 314.0, 298.0, 286.0, 313.0, 323.0, 222.0, 243.0, 318.0, 309.0, 321.0, 312.0, 310.0, 314.0, 319.0, 314.0, 220.0, 245.0, 297.0, 285.0, 294.0, 293.0, 318.0, 312.0, 311.0, 316.0, 317.0, 322.0, 316.0, 314.0, 319.0, 314.0, 222.0, 231.0, 319.0, 320.0, 322.0, 308.0, 322.0, 308.0, 311.0, 316.0, 319.0, 317.0, 294.0, 285.0, 283.0, 304.0, 324.0, 312.0, 259.0, 251.0, 301.0, 286.0, 305.0, 319.0, 313.0, 317.0, 271.0, 268.0, 313.0, 317.0, 288.0, 291.0, 317.0, 316.0, 311.0, 319.0, 289.0, 293.0, 290.0, 292.0, 314.0, 316.0, 319.0, 308.0, 311.0, 322.0, 314.0, 316.0, 310.0, 317.0, 302.0, 277.0, 319.0, 314.0, 299.0, 288.0, 293.0, 286.0, 283.0, 299.0, 297.0, 279.0, 291.0, 288.0, 319.0, 308.0, 314.0, 313.0, 313.0, 323.0, 319.0, 320.0, 310.0, 320.0, 319.0, 311.0, 286.0, 296.0, 314.0, 304.0, 311.0, 319.0, 293.0, 294.0, 231.0, 242.0, 291.0, 296.0, 308.0, 316.0, 293.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8557975282130863, "mean_processing_ms": 0.23953222980731334, "mean_inference_ms": 1.453823461548284}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8928000, "num_steps_sampled": 4761600, "sample_time_ms": 22359.762, "load_time_ms": 38.161, "grad_time_ms": 11242.342, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00044215377420186996, "policy_loss": -0.007962713949382305, "vf_loss": 80.8259506225586, "vf_explained_var": 0.7670674920082092, "kl": 0.0015741548268124461, "entropy": 1.1240602731704712, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4761600, "episodes_total": 11904, "training_iteration": 372, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-57-26", "timestamp": 1660258646, "time_this_iter_s": 29.648212909698486, "time_total_s": 17057.02994942665, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17057.02994942665, "timesteps_since_restore": 4761600, "iterations_since_restore": 372, "perf": {"cpu_util_percent": 34.21904761904762, "ram_util_percent": 58.84523809523809}}
-{"episode_reward_max": 639.0, "episode_reward_min": 390.0, "episode_reward_mean": 602.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 301.07}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 184.94, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.54, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.13, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.2, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.01, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.54, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.46, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.01, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.54, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.01, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.54, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 630.0, 582.0, 630.0, 630.0, 636.0, 633.0, 584.0, 630.0, 633.0, 408.0, 582.0, 630.0, 584.0, 630.0, 639.0, 630.0, 561.0, 630.0, 633.0, 587.0, 633.0, 630.0, 587.0, 587.0, 636.0, 636.0, 633.0, 390.0, 636.0, 579.0, 579.0, 587.0, 624.0, 630.0, 539.0, 630.0, 579.0, 633.0, 630.0, 582.0, 582.0, 630.0, 627.0, 633.0, 630.0, 627.0, 579.0, 633.0, 587.0, 579.0, 582.0, 576.0, 579.0, 627.0, 627.0, 636.0, 639.0, 630.0, 630.0, 582.0, 618.0, 630.0, 587.0, 473.0, 587.0, 624.0, 587.0, 582.0, 587.0, 633.0, 579.0, 633.0, 584.0, 633.0, 636.0, 561.0, 573.0, 570.0, 582.0, 627.0, 633.0, 587.0, 636.0, 579.0, 630.0, 627.0, 525.0, 576.0, 636.0, 636.0, 587.0, 587.0, 633.0, 633.0, 636.0, 630.0, 519.0, 587.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 291.0, 316.0, 314.0, 280.0, 302.0, 319.0, 311.0, 319.0, 311.0, 314.0, 322.0, 316.0, 317.0, 293.0, 291.0, 318.0, 312.0, 319.0, 314.0, 205.0, 203.0, 285.0, 297.0, 308.0, 322.0, 292.0, 292.0, 306.0, 324.0, 319.0, 320.0, 326.0, 304.0, 285.0, 276.0, 311.0, 319.0, 313.0, 320.0, 301.0, 286.0, 311.0, 322.0, 313.0, 317.0, 293.0, 294.0, 301.0, 286.0, 316.0, 320.0, 319.0, 317.0, 316.0, 317.0, 193.0, 197.0, 322.0, 314.0, 288.0, 291.0, 281.0, 298.0, 301.0, 286.0, 305.0, 319.0, 313.0, 317.0, 271.0, 268.0, 313.0, 317.0, 288.0, 291.0, 317.0, 316.0, 311.0, 319.0, 289.0, 293.0, 290.0, 292.0, 314.0, 316.0, 319.0, 308.0, 311.0, 322.0, 314.0, 316.0, 310.0, 317.0, 302.0, 277.0, 319.0, 314.0, 299.0, 288.0, 293.0, 286.0, 283.0, 299.0, 297.0, 279.0, 291.0, 288.0, 319.0, 308.0, 314.0, 313.0, 313.0, 323.0, 319.0, 320.0, 310.0, 320.0, 319.0, 311.0, 286.0, 296.0, 314.0, 304.0, 311.0, 319.0, 293.0, 294.0, 231.0, 242.0, 291.0, 296.0, 308.0, 316.0, 293.0, 294.0, 290.0, 292.0, 298.0, 289.0, 311.0, 322.0, 296.0, 283.0, 319.0, 314.0, 295.0, 289.0, 317.0, 316.0, 314.0, 322.0, 281.0, 280.0, 285.0, 288.0, 287.0, 283.0, 290.0, 292.0, 313.0, 314.0, 314.0, 319.0, 298.0, 289.0, 319.0, 317.0, 286.0, 293.0, 311.0, 319.0, 320.0, 307.0, 262.0, 263.0, 285.0, 291.0, 320.0, 316.0, 319.0, 317.0, 288.0, 299.0, 301.0, 286.0, 322.0, 311.0, 317.0, 316.0, 319.0, 317.0, 319.0, 311.0, 264.0, 255.0, 293.0, 294.0, 293.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8547135741769734, "mean_processing_ms": 0.23931513380752317, "mean_inference_ms": 1.4526421752907723}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8952000, "num_steps_sampled": 4774400, "sample_time_ms": 22196.797, "load_time_ms": 37.846, "grad_time_ms": 11312.767, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003103644819930196, "policy_loss": -0.004869487602263689, "vf_loss": 85.35115814208984, "vf_explained_var": 0.7750833630561829, "kl": 0.0021017238032072783, "entropy": 1.123950481414795, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4774400, "episodes_total": 11936, "training_iteration": 373, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-57-58", "timestamp": 1660258678, "time_this_iter_s": 32.121092796325684, "time_total_s": 17089.151042222977, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17089.151042222977, "timesteps_since_restore": 4774400, "iterations_since_restore": 373, "perf": {"cpu_util_percent": 33.684444444444445, "ram_util_percent": 58.78888888888888}}
-{"episode_reward_max": 639.0, "episode_reward_min": 390.0, "episode_reward_mean": 602.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 301.475}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.75, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.57, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.3, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.21, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.66, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.43, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.59, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.7, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 636.0, 636.0, 576.0, 636.0, 639.0, 590.0, 633.0, 636.0, 522.0, 633.0, 578.0, 544.0, 636.0, 630.0, 636.0, 636.0, 627.0, 621.0, 636.0, 582.0, 582.0, 636.0, 587.0, 587.0, 636.0, 636.0, 582.0, 630.0, 636.0, 627.0, 581.0, 473.0, 587.0, 624.0, 587.0, 582.0, 587.0, 633.0, 579.0, 633.0, 584.0, 633.0, 636.0, 561.0, 573.0, 570.0, 582.0, 627.0, 633.0, 587.0, 636.0, 579.0, 630.0, 627.0, 525.0, 576.0, 636.0, 636.0, 587.0, 587.0, 633.0, 633.0, 636.0, 630.0, 519.0, 587.0, 587.0, 587.0, 630.0, 582.0, 630.0, 630.0, 636.0, 633.0, 584.0, 630.0, 633.0, 408.0, 582.0, 630.0, 584.0, 630.0, 639.0, 630.0, 561.0, 630.0, 633.0, 587.0, 633.0, 630.0, 587.0, 587.0, 636.0, 636.0, 633.0, 390.0, 636.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 319.0, 317.0, 320.0, 316.0, 288.0, 288.0, 329.0, 307.0, 324.0, 315.0, 296.0, 294.0, 316.0, 317.0, 319.0, 317.0, 268.0, 254.0, 315.0, 318.0, 285.0, 293.0, 270.0, 274.0, 320.0, 316.0, 315.0, 315.0, 322.0, 314.0, 327.0, 309.0, 314.0, 313.0, 313.0, 308.0, 317.0, 319.0, 296.0, 286.0, 293.0, 289.0, 311.0, 325.0, 291.0, 296.0, 295.0, 292.0, 324.0, 312.0, 319.0, 317.0, 294.0, 288.0, 319.0, 311.0, 314.0, 322.0, 314.0, 313.0, 282.0, 299.0, 231.0, 242.0, 291.0, 296.0, 308.0, 316.0, 293.0, 294.0, 290.0, 292.0, 298.0, 289.0, 311.0, 322.0, 296.0, 283.0, 319.0, 314.0, 295.0, 289.0, 317.0, 316.0, 314.0, 322.0, 281.0, 280.0, 285.0, 288.0, 287.0, 283.0, 290.0, 292.0, 313.0, 314.0, 314.0, 319.0, 298.0, 289.0, 319.0, 317.0, 286.0, 293.0, 311.0, 319.0, 320.0, 307.0, 262.0, 263.0, 285.0, 291.0, 320.0, 316.0, 319.0, 317.0, 288.0, 299.0, 301.0, 286.0, 322.0, 311.0, 317.0, 316.0, 319.0, 317.0, 319.0, 311.0, 264.0, 255.0, 293.0, 294.0, 293.0, 294.0, 296.0, 291.0, 316.0, 314.0, 280.0, 302.0, 319.0, 311.0, 319.0, 311.0, 314.0, 322.0, 316.0, 317.0, 293.0, 291.0, 318.0, 312.0, 319.0, 314.0, 205.0, 203.0, 285.0, 297.0, 308.0, 322.0, 292.0, 292.0, 306.0, 324.0, 319.0, 320.0, 326.0, 304.0, 285.0, 276.0, 311.0, 319.0, 313.0, 320.0, 301.0, 286.0, 311.0, 322.0, 313.0, 317.0, 293.0, 294.0, 301.0, 286.0, 316.0, 320.0, 319.0, 317.0, 316.0, 317.0, 193.0, 197.0, 322.0, 314.0, 288.0, 291.0, 281.0, 298.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8536367311955897, "mean_processing_ms": 0.23909908370976882, "mean_inference_ms": 1.4514727184055203}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8976000, "num_steps_sampled": 4787200, "sample_time_ms": 21894.527, "load_time_ms": 38.299, "grad_time_ms": 11412.728, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0030573883559554815, "policy_loss": -0.004609658382833004, "vf_loss": 82.26570892333984, "vf_explained_var": 0.7665780186653137, "kl": 0.002119669923558831, "entropy": 1.119057059288025, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4787200, "episodes_total": 11968, "training_iteration": 374, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-58-31", "timestamp": 1660258711, "time_this_iter_s": 33.1311149597168, "time_total_s": 17122.282157182693, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17122.282157182693, "timesteps_since_restore": 4787200, "iterations_since_restore": 374, "perf": {"cpu_util_percent": 34.03404255319149, "ram_util_percent": 59.29999999999999}}
-{"episode_reward_max": 639.0, "episode_reward_min": 390.0, "episode_reward_mean": 608.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 304.09}, "custom_metrics": {"sparse_reward_mean": 210.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 186.98, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.45, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.7, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.44, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 630.0, 636.0, 630.0, 636.0, 564.0, 633.0, 633.0, 627.0, 630.0, 587.0, 627.0, 621.0, 576.0, 582.0, 630.0, 582.0, 639.0, 636.0, 636.0, 582.0, 630.0, 633.0, 575.0, 630.0, 587.0, 582.0, 639.0, 639.0, 587.0, 630.0, 630.0, 519.0, 587.0, 587.0, 587.0, 630.0, 582.0, 630.0, 630.0, 636.0, 633.0, 584.0, 630.0, 633.0, 408.0, 582.0, 630.0, 584.0, 630.0, 639.0, 630.0, 561.0, 630.0, 633.0, 587.0, 633.0, 630.0, 587.0, 587.0, 636.0, 636.0, 633.0, 390.0, 636.0, 579.0, 579.0, 582.0, 636.0, 636.0, 576.0, 636.0, 639.0, 590.0, 633.0, 636.0, 522.0, 633.0, 578.0, 544.0, 636.0, 630.0, 636.0, 636.0, 627.0, 621.0, 636.0, 582.0, 582.0, 636.0, 587.0, 587.0, 636.0, 636.0, 582.0, 630.0, 636.0, 627.0, 581.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 314.0, 316.0, 316.0, 314.0, 316.0, 320.0, 313.0, 317.0, 314.0, 322.0, 275.0, 289.0, 313.0, 320.0, 321.0, 312.0, 308.0, 319.0, 316.0, 314.0, 294.0, 293.0, 314.0, 313.0, 302.0, 319.0, 285.0, 291.0, 293.0, 289.0, 311.0, 319.0, 288.0, 294.0, 317.0, 322.0, 319.0, 317.0, 314.0, 322.0, 288.0, 294.0, 316.0, 314.0, 319.0, 314.0, 287.0, 288.0, 314.0, 316.0, 288.0, 299.0, 288.0, 294.0, 317.0, 322.0, 317.0, 322.0, 296.0, 291.0, 311.0, 319.0, 319.0, 311.0, 264.0, 255.0, 293.0, 294.0, 293.0, 294.0, 296.0, 291.0, 316.0, 314.0, 280.0, 302.0, 319.0, 311.0, 319.0, 311.0, 314.0, 322.0, 316.0, 317.0, 293.0, 291.0, 318.0, 312.0, 319.0, 314.0, 205.0, 203.0, 285.0, 297.0, 308.0, 322.0, 292.0, 292.0, 306.0, 324.0, 319.0, 320.0, 326.0, 304.0, 285.0, 276.0, 311.0, 319.0, 313.0, 320.0, 301.0, 286.0, 311.0, 322.0, 313.0, 317.0, 293.0, 294.0, 301.0, 286.0, 316.0, 320.0, 319.0, 317.0, 316.0, 317.0, 193.0, 197.0, 322.0, 314.0, 288.0, 291.0, 281.0, 298.0, 296.0, 286.0, 319.0, 317.0, 320.0, 316.0, 288.0, 288.0, 329.0, 307.0, 324.0, 315.0, 296.0, 294.0, 316.0, 317.0, 319.0, 317.0, 268.0, 254.0, 315.0, 318.0, 285.0, 293.0, 270.0, 274.0, 320.0, 316.0, 315.0, 315.0, 322.0, 314.0, 327.0, 309.0, 314.0, 313.0, 313.0, 308.0, 317.0, 319.0, 296.0, 286.0, 293.0, 289.0, 311.0, 325.0, 291.0, 296.0, 295.0, 292.0, 324.0, 312.0, 319.0, 317.0, 294.0, 288.0, 319.0, 311.0, 314.0, 322.0, 314.0, 313.0, 282.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8525659073078762, "mean_processing_ms": 0.23888337042997632, "mean_inference_ms": 1.4503574621583197}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9000000, "num_steps_sampled": 4800000, "sample_time_ms": 21378.834, "load_time_ms": 37.98, "grad_time_ms": 11184.622, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0034743063151836395, "policy_loss": -0.004098345525562763, "vf_loss": 81.30432891845703, "vf_explained_var": 0.7632368206977844, "kl": 0.0018746949499472976, "entropy": 1.1155738830566406, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4800000, "episodes_total": 12000, "training_iteration": 375, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-59-03", "timestamp": 1660258743, "time_this_iter_s": 31.725862979888916, "time_total_s": 17154.008020162582, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17154.008020162582, "timesteps_since_restore": 4800000, "iterations_since_restore": 375, "perf": {"cpu_util_percent": 34.54888888888888, "ram_util_percent": 59.05333333333331}}
-{"episode_reward_max": 639.0, "episode_reward_min": 390.0, "episode_reward_mean": 611.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 305.835}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 188.07, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.08, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.11, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.71, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.66, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.58, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.57, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.24, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.64, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.17, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.58, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.57, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.58, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.57, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 639.0, 636.0, 636.0, 636.0, 633.0, 633.0, 636.0, 590.0, 639.0, 582.0, 587.0, 627.0, 636.0, 581.0, 579.0, 582.0, 627.0, 639.0, 576.0, 630.0, 633.0, 633.0, 570.0, 630.0, 639.0, 639.0, 630.0, 582.0, 581.0, 630.0, 390.0, 636.0, 579.0, 579.0, 582.0, 636.0, 636.0, 576.0, 636.0, 639.0, 590.0, 633.0, 636.0, 522.0, 633.0, 578.0, 544.0, 636.0, 630.0, 636.0, 636.0, 627.0, 621.0, 636.0, 582.0, 582.0, 636.0, 587.0, 587.0, 636.0, 636.0, 582.0, 630.0, 636.0, 627.0, 581.0, 636.0, 630.0, 630.0, 636.0, 630.0, 636.0, 564.0, 633.0, 633.0, 627.0, 630.0, 587.0, 627.0, 621.0, 576.0, 582.0, 630.0, 582.0, 639.0, 636.0, 636.0, 582.0, 630.0, 633.0, 575.0, 630.0, 587.0, 582.0, 639.0, 639.0, 587.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 301.0, 321.0, 309.0, 317.0, 322.0, 311.0, 325.0, 317.0, 319.0, 319.0, 317.0, 319.0, 314.0, 321.0, 312.0, 311.0, 325.0, 293.0, 297.0, 320.0, 319.0, 288.0, 294.0, 291.0, 296.0, 316.0, 311.0, 314.0, 322.0, 281.0, 300.0, 296.0, 283.0, 288.0, 294.0, 319.0, 308.0, 314.0, 325.0, 288.0, 288.0, 316.0, 314.0, 309.0, 324.0, 316.0, 317.0, 283.0, 287.0, 314.0, 316.0, 314.0, 325.0, 319.0, 320.0, 318.0, 312.0, 293.0, 289.0, 299.0, 282.0, 316.0, 314.0, 193.0, 197.0, 322.0, 314.0, 288.0, 291.0, 281.0, 298.0, 296.0, 286.0, 319.0, 317.0, 320.0, 316.0, 288.0, 288.0, 329.0, 307.0, 324.0, 315.0, 296.0, 294.0, 316.0, 317.0, 319.0, 317.0, 268.0, 254.0, 315.0, 318.0, 285.0, 293.0, 270.0, 274.0, 320.0, 316.0, 315.0, 315.0, 322.0, 314.0, 327.0, 309.0, 314.0, 313.0, 313.0, 308.0, 317.0, 319.0, 296.0, 286.0, 293.0, 289.0, 311.0, 325.0, 291.0, 296.0, 295.0, 292.0, 324.0, 312.0, 319.0, 317.0, 294.0, 288.0, 319.0, 311.0, 314.0, 322.0, 314.0, 313.0, 282.0, 299.0, 314.0, 322.0, 314.0, 316.0, 316.0, 314.0, 316.0, 320.0, 313.0, 317.0, 314.0, 322.0, 275.0, 289.0, 313.0, 320.0, 321.0, 312.0, 308.0, 319.0, 316.0, 314.0, 294.0, 293.0, 314.0, 313.0, 302.0, 319.0, 285.0, 291.0, 293.0, 289.0, 311.0, 319.0, 288.0, 294.0, 317.0, 322.0, 319.0, 317.0, 314.0, 322.0, 288.0, 294.0, 316.0, 314.0, 319.0, 314.0, 287.0, 288.0, 314.0, 316.0, 288.0, 299.0, 288.0, 294.0, 317.0, 322.0, 317.0, 322.0, 296.0, 291.0, 311.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8515001170833599, "mean_processing_ms": 0.2386703929555994, "mean_inference_ms": 1.4492475264918965}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9024000, "num_steps_sampled": 4812800, "sample_time_ms": 21252.385, "load_time_ms": 37.854, "grad_time_ms": 10740.605, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007883608341217041, "policy_loss": -0.006749347317963839, "vf_loss": 80.9527359008789, "vf_explained_var": 0.7635239958763123, "kl": 0.0017555366503074765, "entropy": 1.115132212638855, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4812800, "episodes_total": 12032, "training_iteration": 376, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-59-32", "timestamp": 1660258772, "time_this_iter_s": 29.115790128707886, "time_total_s": 17183.12381029129, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17183.12381029129, "timesteps_since_restore": 4812800, "iterations_since_restore": 376, "perf": {"cpu_util_percent": 34.046341463414635, "ram_util_percent": 58.78536585365854}}
-{"episode_reward_max": 639.0, "episode_reward_min": 558.0, "episode_reward_mean": 615.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 273.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.97}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 189.14, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.19, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.07, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.83, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.69, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.1, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.29, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.33, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.69, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.69, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 581.0, 630.0, 630.0, 587.0, 636.0, 639.0, 558.0, 633.0, 582.0, 636.0, 582.0, 639.0, 633.0, 636.0, 587.0, 636.0, 636.0, 582.0, 630.0, 584.0, 636.0, 587.0, 633.0, 639.0, 564.0, 639.0, 630.0, 636.0, 582.0, 630.0, 639.0, 630.0, 636.0, 627.0, 581.0, 636.0, 630.0, 630.0, 636.0, 630.0, 636.0, 564.0, 633.0, 633.0, 627.0, 630.0, 587.0, 627.0, 621.0, 576.0, 582.0, 630.0, 582.0, 639.0, 636.0, 636.0, 582.0, 630.0, 633.0, 575.0, 630.0, 587.0, 582.0, 639.0, 639.0, 587.0, 630.0, 582.0, 630.0, 639.0, 636.0, 636.0, 636.0, 633.0, 633.0, 636.0, 590.0, 639.0, 582.0, 587.0, 627.0, 636.0, 581.0, 579.0, 582.0, 627.0, 639.0, 576.0, 630.0, 633.0, 633.0, 570.0, 630.0, 639.0, 639.0, 630.0, 582.0, 581.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 298.0, 283.0, 305.0, 325.0, 314.0, 316.0, 292.0, 295.0, 319.0, 317.0, 319.0, 320.0, 279.0, 279.0, 316.0, 317.0, 293.0, 289.0, 317.0, 319.0, 296.0, 286.0, 320.0, 319.0, 311.0, 322.0, 317.0, 319.0, 280.0, 307.0, 314.0, 322.0, 317.0, 319.0, 293.0, 289.0, 316.0, 314.0, 302.0, 282.0, 316.0, 320.0, 285.0, 302.0, 317.0, 316.0, 322.0, 317.0, 291.0, 273.0, 325.0, 314.0, 319.0, 311.0, 324.0, 312.0, 296.0, 286.0, 315.0, 315.0, 314.0, 325.0, 319.0, 311.0, 314.0, 322.0, 314.0, 313.0, 282.0, 299.0, 314.0, 322.0, 314.0, 316.0, 316.0, 314.0, 316.0, 320.0, 313.0, 317.0, 314.0, 322.0, 275.0, 289.0, 313.0, 320.0, 321.0, 312.0, 308.0, 319.0, 316.0, 314.0, 294.0, 293.0, 314.0, 313.0, 302.0, 319.0, 285.0, 291.0, 293.0, 289.0, 311.0, 319.0, 288.0, 294.0, 317.0, 322.0, 319.0, 317.0, 314.0, 322.0, 288.0, 294.0, 316.0, 314.0, 319.0, 314.0, 287.0, 288.0, 314.0, 316.0, 288.0, 299.0, 288.0, 294.0, 317.0, 322.0, 317.0, 322.0, 296.0, 291.0, 311.0, 319.0, 281.0, 301.0, 321.0, 309.0, 317.0, 322.0, 311.0, 325.0, 317.0, 319.0, 319.0, 317.0, 319.0, 314.0, 321.0, 312.0, 311.0, 325.0, 293.0, 297.0, 320.0, 319.0, 288.0, 294.0, 291.0, 296.0, 316.0, 311.0, 314.0, 322.0, 281.0, 300.0, 296.0, 283.0, 288.0, 294.0, 319.0, 308.0, 314.0, 325.0, 288.0, 288.0, 316.0, 314.0, 309.0, 324.0, 316.0, 317.0, 283.0, 287.0, 314.0, 316.0, 314.0, 325.0, 319.0, 320.0, 318.0, 312.0, 293.0, 289.0, 299.0, 282.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8504378248511929, "mean_processing_ms": 0.23845860870627447, "mean_inference_ms": 1.448146633207985}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9048000, "num_steps_sampled": 4825600, "sample_time_ms": 21404.603, "load_time_ms": 37.763, "grad_time_ms": 10323.133, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002214103704318404, "policy_loss": -0.005506592337042093, "vf_loss": 82.8126449584961, "vf_explained_var": 0.766756534576416, "kl": 0.0020635148975998163, "entropy": 1.1211366653442383, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4825600, "episodes_total": 12064, "training_iteration": 377, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-00-03", "timestamp": 1660258803, "time_this_iter_s": 30.737117767333984, "time_total_s": 17213.860928058624, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17213.860928058624, "timesteps_since_restore": 4825600, "iterations_since_restore": 377, "perf": {"cpu_util_percent": 36.85227272727273, "ram_util_percent": 58.872727272727275}}
-{"episode_reward_max": 639.0, "episode_reward_min": 546.0, "episode_reward_mean": 616.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 264.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 308.28}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 189.76, "shaped_reward_min": 146, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.81, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.71, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.7, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.06, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.3, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.24, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.7, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.7, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 587.0, 639.0, 576.0, 639.0, 633.0, 639.0, 636.0, 630.0, 636.0, 633.0, 633.0, 587.0, 636.0, 582.0, 593.0, 546.0, 579.0, 633.0, 639.0, 573.0, 630.0, 636.0, 633.0, 587.0, 590.0, 627.0, 630.0, 639.0, 636.0, 636.0, 639.0, 639.0, 587.0, 630.0, 582.0, 630.0, 639.0, 636.0, 636.0, 636.0, 633.0, 633.0, 636.0, 590.0, 639.0, 582.0, 587.0, 627.0, 636.0, 581.0, 579.0, 582.0, 627.0, 639.0, 576.0, 630.0, 633.0, 633.0, 570.0, 630.0, 639.0, 639.0, 630.0, 582.0, 581.0, 630.0, 630.0, 581.0, 630.0, 630.0, 587.0, 636.0, 639.0, 558.0, 633.0, 582.0, 636.0, 582.0, 639.0, 633.0, 636.0, 587.0, 636.0, 636.0, 582.0, 630.0, 584.0, 636.0, 587.0, 633.0, 639.0, 564.0, 639.0, 630.0, 636.0, 582.0, 630.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 308.0, 322.0, 290.0, 297.0, 322.0, 317.0, 288.0, 288.0, 320.0, 319.0, 314.0, 319.0, 325.0, 314.0, 312.0, 324.0, 316.0, 314.0, 316.0, 320.0, 311.0, 322.0, 319.0, 314.0, 291.0, 296.0, 319.0, 317.0, 288.0, 294.0, 294.0, 299.0, 264.0, 282.0, 288.0, 291.0, 319.0, 314.0, 316.0, 323.0, 279.0, 294.0, 321.0, 309.0, 317.0, 319.0, 316.0, 317.0, 293.0, 294.0, 296.0, 294.0, 310.0, 317.0, 314.0, 316.0, 319.0, 320.0, 319.0, 317.0, 316.0, 320.0, 317.0, 322.0, 317.0, 322.0, 296.0, 291.0, 311.0, 319.0, 281.0, 301.0, 321.0, 309.0, 317.0, 322.0, 311.0, 325.0, 317.0, 319.0, 319.0, 317.0, 319.0, 314.0, 321.0, 312.0, 311.0, 325.0, 293.0, 297.0, 320.0, 319.0, 288.0, 294.0, 291.0, 296.0, 316.0, 311.0, 314.0, 322.0, 281.0, 300.0, 296.0, 283.0, 288.0, 294.0, 319.0, 308.0, 314.0, 325.0, 288.0, 288.0, 316.0, 314.0, 309.0, 324.0, 316.0, 317.0, 283.0, 287.0, 314.0, 316.0, 314.0, 325.0, 319.0, 320.0, 318.0, 312.0, 293.0, 289.0, 299.0, 282.0, 316.0, 314.0, 316.0, 314.0, 298.0, 283.0, 305.0, 325.0, 314.0, 316.0, 292.0, 295.0, 319.0, 317.0, 319.0, 320.0, 279.0, 279.0, 316.0, 317.0, 293.0, 289.0, 317.0, 319.0, 296.0, 286.0, 320.0, 319.0, 311.0, 322.0, 317.0, 319.0, 280.0, 307.0, 314.0, 322.0, 317.0, 319.0, 293.0, 289.0, 316.0, 314.0, 302.0, 282.0, 316.0, 320.0, 285.0, 302.0, 317.0, 316.0, 322.0, 317.0, 291.0, 273.0, 325.0, 314.0, 319.0, 311.0, 324.0, 312.0, 296.0, 286.0, 315.0, 315.0, 314.0, 325.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8493908771025369, "mean_processing_ms": 0.23825289023894292, "mean_inference_ms": 1.447125693575985}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9072000, "num_steps_sampled": 4838400, "sample_time_ms": 21529.609, "load_time_ms": 37.587, "grad_time_ms": 10211.527, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015308427391573787, "policy_loss": -0.006051002535969019, "vf_loss": 81.36373901367188, "vf_explained_var": 0.7675411701202393, "kl": 0.00216054730117321, "entropy": 1.1090576648712158, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4838400, "episodes_total": 12096, "training_iteration": 378, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-00-35", "timestamp": 1660258835, "time_this_iter_s": 32.476667165756226, "time_total_s": 17246.33759522438, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17246.33759522438, "timesteps_since_restore": 4838400, "iterations_since_restore": 378, "perf": {"cpu_util_percent": 34.55434782608695, "ram_util_percent": 59.1304347826087}}
-{"episode_reward_max": 639.0, "episode_reward_min": 468.0, "episode_reward_mean": 614.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.265}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.93, "shaped_reward_min": 146, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.14, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.82, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.64, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.72, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.91, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.25, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.64, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.72, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.64, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.72, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 639.0, 633.0, 636.0, 630.0, 570.0, 630.0, 633.0, 579.0, 630.0, 639.0, 593.0, 633.0, 636.0, 627.0, 579.0, 590.0, 636.0, 468.0, 587.0, 576.0, 582.0, 582.0, 630.0, 636.0, 636.0, 633.0, 633.0, 630.0, 633.0, 630.0, 630.0, 630.0, 582.0, 581.0, 630.0, 630.0, 581.0, 630.0, 630.0, 587.0, 636.0, 639.0, 558.0, 633.0, 582.0, 636.0, 582.0, 639.0, 633.0, 636.0, 587.0, 636.0, 636.0, 582.0, 630.0, 584.0, 636.0, 587.0, 633.0, 639.0, 564.0, 639.0, 630.0, 636.0, 582.0, 630.0, 639.0, 633.0, 630.0, 587.0, 639.0, 576.0, 639.0, 633.0, 639.0, 636.0, 630.0, 636.0, 633.0, 633.0, 587.0, 636.0, 582.0, 593.0, 546.0, 579.0, 633.0, 639.0, 573.0, 630.0, 636.0, 633.0, 587.0, 590.0, 627.0, 630.0, 639.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 288.0, 322.0, 317.0, 316.0, 317.0, 316.0, 320.0, 311.0, 319.0, 282.0, 288.0, 316.0, 314.0, 319.0, 314.0, 294.0, 285.0, 314.0, 316.0, 319.0, 320.0, 307.0, 286.0, 314.0, 319.0, 314.0, 322.0, 313.0, 314.0, 287.0, 292.0, 288.0, 302.0, 324.0, 312.0, 237.0, 231.0, 291.0, 296.0, 282.0, 294.0, 291.0, 291.0, 289.0, 293.0, 319.0, 311.0, 314.0, 322.0, 324.0, 312.0, 314.0, 319.0, 314.0, 319.0, 314.0, 316.0, 316.0, 317.0, 308.0, 322.0, 319.0, 311.0, 318.0, 312.0, 293.0, 289.0, 299.0, 282.0, 316.0, 314.0, 316.0, 314.0, 298.0, 283.0, 305.0, 325.0, 314.0, 316.0, 292.0, 295.0, 319.0, 317.0, 319.0, 320.0, 279.0, 279.0, 316.0, 317.0, 293.0, 289.0, 317.0, 319.0, 296.0, 286.0, 320.0, 319.0, 311.0, 322.0, 317.0, 319.0, 280.0, 307.0, 314.0, 322.0, 317.0, 319.0, 293.0, 289.0, 316.0, 314.0, 302.0, 282.0, 316.0, 320.0, 285.0, 302.0, 317.0, 316.0, 322.0, 317.0, 291.0, 273.0, 325.0, 314.0, 319.0, 311.0, 324.0, 312.0, 296.0, 286.0, 315.0, 315.0, 314.0, 325.0, 316.0, 317.0, 308.0, 322.0, 290.0, 297.0, 322.0, 317.0, 288.0, 288.0, 320.0, 319.0, 314.0, 319.0, 325.0, 314.0, 312.0, 324.0, 316.0, 314.0, 316.0, 320.0, 311.0, 322.0, 319.0, 314.0, 291.0, 296.0, 319.0, 317.0, 288.0, 294.0, 294.0, 299.0, 264.0, 282.0, 288.0, 291.0, 319.0, 314.0, 316.0, 323.0, 279.0, 294.0, 321.0, 309.0, 317.0, 319.0, 316.0, 317.0, 293.0, 294.0, 296.0, 294.0, 310.0, 317.0, 314.0, 316.0, 319.0, 320.0, 319.0, 317.0, 316.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.84835648025371, "mean_processing_ms": 0.23804887644488537, "mean_inference_ms": 1.446224605883411}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9096000, "num_steps_sampled": 4851200, "sample_time_ms": 21545.942, "load_time_ms": 37.194, "grad_time_ms": 9930.811, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008399917860515416, "policy_loss": -0.0065034665167331696, "vf_loss": 79.03890991210938, "vf_explained_var": 0.7710984349250793, "kl": 0.0017613372765481472, "entropy": 1.1208573579788208, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4851200, "episodes_total": 12128, "training_iteration": 379, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-01-07", "timestamp": 1660258867, "time_this_iter_s": 32.251976013183594, "time_total_s": 17278.589571237564, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17278.589571237564, "timesteps_since_restore": 4851200, "iterations_since_restore": 379, "perf": {"cpu_util_percent": 33.40222222222222, "ram_util_percent": 58.955555555555534}}
-{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 614.36, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.18}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.76, "shaped_reward_min": 146, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.93, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.33, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.65, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.81, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.41, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.88, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.28, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.16, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.41, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.88, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.41, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.88, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 582.0, 636.0, 467.0, 576.0, 639.0, 582.0, 582.0, 627.0, 636.0, 639.0, 627.0, 579.0, 587.0, 636.0, 579.0, 633.0, 636.0, 582.0, 639.0, 630.0, 633.0, 633.0, 633.0, 630.0, 627.0, 633.0, 627.0, 582.0, 633.0, 633.0, 636.0, 636.0, 582.0, 630.0, 639.0, 633.0, 630.0, 587.0, 639.0, 576.0, 639.0, 633.0, 639.0, 636.0, 630.0, 636.0, 633.0, 633.0, 587.0, 636.0, 582.0, 593.0, 546.0, 579.0, 633.0, 639.0, 573.0, 630.0, 636.0, 633.0, 587.0, 590.0, 627.0, 630.0, 639.0, 636.0, 636.0, 573.0, 639.0, 633.0, 636.0, 630.0, 570.0, 630.0, 633.0, 579.0, 630.0, 639.0, 593.0, 633.0, 636.0, 627.0, 579.0, 590.0, 636.0, 468.0, 587.0, 576.0, 582.0, 582.0, 630.0, 636.0, 636.0, 633.0, 633.0, 630.0, 633.0, 630.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [308.0, 319.0, 286.0, 296.0, 319.0, 317.0, 230.0, 237.0, 287.0, 289.0, 317.0, 322.0, 291.0, 291.0, 288.0, 294.0, 310.0, 317.0, 314.0, 322.0, 322.0, 317.0, 308.0, 319.0, 290.0, 289.0, 298.0, 289.0, 324.0, 312.0, 291.0, 288.0, 316.0, 317.0, 319.0, 317.0, 296.0, 286.0, 319.0, 320.0, 311.0, 319.0, 311.0, 322.0, 319.0, 314.0, 318.0, 315.0, 316.0, 314.0, 316.0, 311.0, 317.0, 316.0, 308.0, 319.0, 291.0, 291.0, 311.0, 322.0, 321.0, 312.0, 319.0, 317.0, 324.0, 312.0, 296.0, 286.0, 315.0, 315.0, 314.0, 325.0, 316.0, 317.0, 308.0, 322.0, 290.0, 297.0, 322.0, 317.0, 288.0, 288.0, 320.0, 319.0, 314.0, 319.0, 325.0, 314.0, 312.0, 324.0, 316.0, 314.0, 316.0, 320.0, 311.0, 322.0, 319.0, 314.0, 291.0, 296.0, 319.0, 317.0, 288.0, 294.0, 294.0, 299.0, 264.0, 282.0, 288.0, 291.0, 319.0, 314.0, 316.0, 323.0, 279.0, 294.0, 321.0, 309.0, 317.0, 319.0, 316.0, 317.0, 293.0, 294.0, 296.0, 294.0, 310.0, 317.0, 314.0, 316.0, 319.0, 320.0, 319.0, 317.0, 316.0, 320.0, 285.0, 288.0, 322.0, 317.0, 316.0, 317.0, 316.0, 320.0, 311.0, 319.0, 282.0, 288.0, 316.0, 314.0, 319.0, 314.0, 294.0, 285.0, 314.0, 316.0, 319.0, 320.0, 307.0, 286.0, 314.0, 319.0, 314.0, 322.0, 313.0, 314.0, 287.0, 292.0, 288.0, 302.0, 324.0, 312.0, 237.0, 231.0, 291.0, 296.0, 282.0, 294.0, 291.0, 291.0, 289.0, 293.0, 319.0, 311.0, 314.0, 322.0, 324.0, 312.0, 314.0, 319.0, 314.0, 319.0, 314.0, 316.0, 316.0, 317.0, 308.0, 322.0, 319.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8473261545871941, "mean_processing_ms": 0.23784601732362667, "mean_inference_ms": 1.4452831057564066}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9120000, "num_steps_sampled": 4864000, "sample_time_ms": 21329.654, "load_time_ms": 36.87, "grad_time_ms": 9824.642, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0010081265354529023, "policy_loss": -0.006387272384017706, "vf_loss": 79.55323028564453, "vf_explained_var": 0.7746841311454773, "kl": 0.001845820457674563, "entropy": 1.1198536157608032, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4864000, "episodes_total": 12160, "training_iteration": 380, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-01-38", "timestamp": 1660258898, "time_this_iter_s": 30.608419179916382, "time_total_s": 17309.19799041748, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17309.19799041748, "timesteps_since_restore": 4864000, "iterations_since_restore": 380, "perf": {"cpu_util_percent": 34.48139534883721, "ram_util_percent": 58.923255813953475}}
-{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 615.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 307.95}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 189.1, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.11, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.87, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.83, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.55, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.81, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.2, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.55, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.81, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.55, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.81, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 587.0, 636.0, 639.0, 639.0, 633.0, 630.0, 587.0, 636.0, 636.0, 627.0, 639.0, 636.0, 633.0, 569.0, 633.0, 522.0, 630.0, 636.0, 633.0, 633.0, 630.0, 633.0, 579.0, 636.0, 633.0, 633.0, 639.0, 584.0, 633.0, 633.0, 630.0, 630.0, 639.0, 636.0, 636.0, 573.0, 639.0, 633.0, 636.0, 630.0, 570.0, 630.0, 633.0, 579.0, 630.0, 639.0, 593.0, 633.0, 636.0, 627.0, 579.0, 590.0, 636.0, 468.0, 587.0, 576.0, 582.0, 582.0, 630.0, 636.0, 636.0, 633.0, 633.0, 630.0, 633.0, 630.0, 630.0, 627.0, 582.0, 636.0, 467.0, 576.0, 639.0, 582.0, 582.0, 627.0, 636.0, 639.0, 627.0, 579.0, 587.0, 636.0, 579.0, 633.0, 636.0, 582.0, 639.0, 630.0, 633.0, 633.0, 633.0, 630.0, 627.0, 633.0, 627.0, 582.0, 633.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 293.0, 294.0, 313.0, 323.0, 319.0, 320.0, 319.0, 320.0, 319.0, 314.0, 316.0, 314.0, 293.0, 294.0, 316.0, 320.0, 314.0, 322.0, 311.0, 316.0, 319.0, 320.0, 314.0, 322.0, 314.0, 319.0, 287.0, 282.0, 311.0, 322.0, 264.0, 258.0, 311.0, 319.0, 319.0, 317.0, 319.0, 314.0, 317.0, 316.0, 321.0, 309.0, 319.0, 314.0, 288.0, 291.0, 319.0, 317.0, 311.0, 322.0, 319.0, 314.0, 322.0, 317.0, 294.0, 290.0, 321.0, 312.0, 319.0, 314.0, 311.0, 319.0, 314.0, 316.0, 319.0, 320.0, 319.0, 317.0, 316.0, 320.0, 285.0, 288.0, 322.0, 317.0, 316.0, 317.0, 316.0, 320.0, 311.0, 319.0, 282.0, 288.0, 316.0, 314.0, 319.0, 314.0, 294.0, 285.0, 314.0, 316.0, 319.0, 320.0, 307.0, 286.0, 314.0, 319.0, 314.0, 322.0, 313.0, 314.0, 287.0, 292.0, 288.0, 302.0, 324.0, 312.0, 237.0, 231.0, 291.0, 296.0, 282.0, 294.0, 291.0, 291.0, 289.0, 293.0, 319.0, 311.0, 314.0, 322.0, 324.0, 312.0, 314.0, 319.0, 314.0, 319.0, 314.0, 316.0, 316.0, 317.0, 308.0, 322.0, 319.0, 311.0, 308.0, 319.0, 286.0, 296.0, 319.0, 317.0, 230.0, 237.0, 287.0, 289.0, 317.0, 322.0, 291.0, 291.0, 288.0, 294.0, 310.0, 317.0, 314.0, 322.0, 322.0, 317.0, 308.0, 319.0, 290.0, 289.0, 298.0, 289.0, 324.0, 312.0, 291.0, 288.0, 316.0, 317.0, 319.0, 317.0, 296.0, 286.0, 319.0, 320.0, 311.0, 319.0, 311.0, 322.0, 319.0, 314.0, 318.0, 315.0, 316.0, 314.0, 316.0, 311.0, 317.0, 316.0, 308.0, 319.0, 291.0, 291.0, 311.0, 322.0, 321.0, 312.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8462923346422779, "mean_processing_ms": 0.23764003789944027, "mean_inference_ms": 1.4442668898213196}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9144000, "num_steps_sampled": 4876800, "sample_time_ms": 21323.81, "load_time_ms": 36.975, "grad_time_ms": 9811.843, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005420349538326263, "policy_loss": -0.0019105566898360848, "vf_loss": 78.93695068359375, "vf_explained_var": 0.772759735584259, "kl": 0.0018517466960474849, "entropy": 1.1255789995193481, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4876800, "episodes_total": 12192, "training_iteration": 381, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-02-08", "timestamp": 1660258928, "time_this_iter_s": 30.169427156448364, "time_total_s": 17339.36741757393, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17339.36741757393, "timesteps_since_restore": 4876800, "iterations_since_restore": 381, "perf": {"cpu_util_percent": 34.1, "ram_util_percent": 58.95116279069769}}
-{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 615.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 307.735}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.67, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.92, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.69, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 18.01, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.93, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.19, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.42, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.93, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.93, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 633.0, 630.0, 633.0, 636.0, 567.0, 627.0, 582.0, 630.0, 630.0, 582.0, 633.0, 636.0, 579.0, 636.0, 636.0, 630.0, 630.0, 630.0, 582.0, 630.0, 633.0, 630.0, 578.0, 587.0, 530.0, 587.0, 630.0, 630.0, 522.0, 582.0, 630.0, 633.0, 630.0, 630.0, 627.0, 582.0, 636.0, 467.0, 576.0, 639.0, 582.0, 582.0, 627.0, 636.0, 639.0, 627.0, 579.0, 587.0, 636.0, 579.0, 633.0, 636.0, 582.0, 639.0, 630.0, 633.0, 633.0, 633.0, 630.0, 627.0, 633.0, 627.0, 582.0, 633.0, 633.0, 636.0, 579.0, 587.0, 636.0, 639.0, 639.0, 633.0, 630.0, 587.0, 636.0, 636.0, 627.0, 639.0, 636.0, 633.0, 569.0, 633.0, 522.0, 630.0, 636.0, 633.0, 633.0, 630.0, 633.0, 579.0, 636.0, 633.0, 633.0, 639.0, 584.0, 633.0, 633.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [309.0, 324.0, 311.0, 322.0, 319.0, 314.0, 308.0, 322.0, 316.0, 317.0, 319.0, 317.0, 282.0, 285.0, 329.0, 298.0, 291.0, 291.0, 308.0, 322.0, 308.0, 322.0, 291.0, 291.0, 309.0, 324.0, 319.0, 317.0, 290.0, 289.0, 319.0, 317.0, 316.0, 320.0, 326.0, 304.0, 316.0, 314.0, 314.0, 316.0, 290.0, 292.0, 314.0, 316.0, 319.0, 314.0, 318.0, 312.0, 293.0, 285.0, 296.0, 291.0, 271.0, 259.0, 293.0, 294.0, 316.0, 314.0, 313.0, 317.0, 265.0, 257.0, 293.0, 289.0, 314.0, 316.0, 316.0, 317.0, 308.0, 322.0, 319.0, 311.0, 308.0, 319.0, 286.0, 296.0, 319.0, 317.0, 230.0, 237.0, 287.0, 289.0, 317.0, 322.0, 291.0, 291.0, 288.0, 294.0, 310.0, 317.0, 314.0, 322.0, 322.0, 317.0, 308.0, 319.0, 290.0, 289.0, 298.0, 289.0, 324.0, 312.0, 291.0, 288.0, 316.0, 317.0, 319.0, 317.0, 296.0, 286.0, 319.0, 320.0, 311.0, 319.0, 311.0, 322.0, 319.0, 314.0, 318.0, 315.0, 316.0, 314.0, 316.0, 311.0, 317.0, 316.0, 308.0, 319.0, 291.0, 291.0, 311.0, 322.0, 321.0, 312.0, 319.0, 317.0, 288.0, 291.0, 293.0, 294.0, 313.0, 323.0, 319.0, 320.0, 319.0, 320.0, 319.0, 314.0, 316.0, 314.0, 293.0, 294.0, 316.0, 320.0, 314.0, 322.0, 311.0, 316.0, 319.0, 320.0, 314.0, 322.0, 314.0, 319.0, 287.0, 282.0, 311.0, 322.0, 264.0, 258.0, 311.0, 319.0, 319.0, 317.0, 319.0, 314.0, 317.0, 316.0, 321.0, 309.0, 319.0, 314.0, 288.0, 291.0, 319.0, 317.0, 311.0, 322.0, 319.0, 314.0, 322.0, 317.0, 294.0, 290.0, 321.0, 312.0, 319.0, 314.0, 311.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.845260231721656, "mean_processing_ms": 0.23743439147057216, "mean_inference_ms": 1.443180349457874}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9168000, "num_steps_sampled": 4889600, "sample_time_ms": 21404.999, "load_time_ms": 36.99, "grad_time_ms": 9811.226, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002549513941630721, "policy_loss": -0.004884073510766029, "vf_loss": 79.93880462646484, "vf_explained_var": 0.7685417532920837, "kl": 0.002187439240515232, "entropy": 1.1205859184265137, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4889600, "episodes_total": 12224, "training_iteration": 382, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-02-39", "timestamp": 1660258959, "time_this_iter_s": 30.454362154006958, "time_total_s": 17369.821779727936, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17369.821779727936, "timesteps_since_restore": 4889600, "iterations_since_restore": 382, "perf": {"cpu_util_percent": 33.88139534883721, "ram_util_percent": 58.944186046511625}}
-{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 617.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.555}, "custom_metrics": {"sparse_reward_mean": 214.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.11, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.09, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.61, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.82, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 18.05, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.46, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.88, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.46, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.88, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.46, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.88, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 633.0, 587.0, 630.0, 582.0, 582.0, 633.0, 627.0, 576.0, 587.0, 624.0, 636.0, 633.0, 633.0, 636.0, 630.0, 630.0, 633.0, 630.0, 636.0, 584.0, 570.0, 630.0, 639.0, 630.0, 630.0, 630.0, 639.0, 582.0, 633.0, 636.0, 633.0, 582.0, 633.0, 633.0, 636.0, 579.0, 587.0, 636.0, 639.0, 639.0, 633.0, 630.0, 587.0, 636.0, 636.0, 627.0, 639.0, 636.0, 633.0, 569.0, 633.0, 522.0, 630.0, 636.0, 633.0, 633.0, 630.0, 633.0, 579.0, 636.0, 633.0, 633.0, 639.0, 584.0, 633.0, 633.0, 630.0, 633.0, 633.0, 633.0, 630.0, 633.0, 636.0, 567.0, 627.0, 582.0, 630.0, 630.0, 582.0, 633.0, 636.0, 579.0, 636.0, 636.0, 630.0, 630.0, 630.0, 582.0, 630.0, 633.0, 630.0, 578.0, 587.0, 530.0, 587.0, 630.0, 630.0, 522.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 319.0, 314.0, 291.0, 296.0, 313.0, 317.0, 291.0, 291.0, 285.0, 297.0, 319.0, 314.0, 314.0, 313.0, 291.0, 285.0, 296.0, 291.0, 304.0, 320.0, 314.0, 322.0, 316.0, 317.0, 318.0, 315.0, 319.0, 317.0, 311.0, 319.0, 311.0, 319.0, 317.0, 316.0, 311.0, 319.0, 317.0, 319.0, 299.0, 285.0, 285.0, 285.0, 308.0, 322.0, 319.0, 320.0, 316.0, 314.0, 316.0, 314.0, 308.0, 322.0, 319.0, 320.0, 288.0, 294.0, 317.0, 316.0, 318.0, 318.0, 319.0, 314.0, 291.0, 291.0, 311.0, 322.0, 321.0, 312.0, 319.0, 317.0, 288.0, 291.0, 293.0, 294.0, 313.0, 323.0, 319.0, 320.0, 319.0, 320.0, 319.0, 314.0, 316.0, 314.0, 293.0, 294.0, 316.0, 320.0, 314.0, 322.0, 311.0, 316.0, 319.0, 320.0, 314.0, 322.0, 314.0, 319.0, 287.0, 282.0, 311.0, 322.0, 264.0, 258.0, 311.0, 319.0, 319.0, 317.0, 319.0, 314.0, 317.0, 316.0, 321.0, 309.0, 319.0, 314.0, 288.0, 291.0, 319.0, 317.0, 311.0, 322.0, 319.0, 314.0, 322.0, 317.0, 294.0, 290.0, 321.0, 312.0, 319.0, 314.0, 311.0, 319.0, 309.0, 324.0, 311.0, 322.0, 319.0, 314.0, 308.0, 322.0, 316.0, 317.0, 319.0, 317.0, 282.0, 285.0, 329.0, 298.0, 291.0, 291.0, 308.0, 322.0, 308.0, 322.0, 291.0, 291.0, 309.0, 324.0, 319.0, 317.0, 290.0, 289.0, 319.0, 317.0, 316.0, 320.0, 326.0, 304.0, 316.0, 314.0, 314.0, 316.0, 290.0, 292.0, 314.0, 316.0, 319.0, 314.0, 318.0, 312.0, 293.0, 285.0, 296.0, 291.0, 271.0, 259.0, 293.0, 294.0, 316.0, 314.0, 313.0, 317.0, 265.0, 257.0, 293.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8442347187033086, "mean_processing_ms": 0.23722937400390215, "mean_inference_ms": 1.442092522505549}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9192000, "num_steps_sampled": 4902400, "sample_time_ms": 21412.011, "load_time_ms": 37.029, "grad_time_ms": 9601.556, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016393003752455115, "policy_loss": -0.005780236795544624, "vf_loss": 79.79308319091797, "vf_explained_var": 0.7686330676078796, "kl": 0.001640369649976492, "entropy": 1.1195478439331055, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4902400, "episodes_total": 12256, "training_iteration": 383, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-03-09", "timestamp": 1660258989, "time_this_iter_s": 30.096380949020386, "time_total_s": 17399.918160676956, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17399.918160676956, "timesteps_since_restore": 4902400, "iterations_since_restore": 383, "perf": {"cpu_util_percent": 34.86279069767443, "ram_util_percent": 58.95348837209304}}
-{"episode_reward_max": 639.0, "episode_reward_min": 507.0, "episode_reward_mean": 614.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 307.08}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.16, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.96, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.46, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.63, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.0, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.83, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.83, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.83, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 639.0, 584.0, 507.0, 573.0, 630.0, 582.0, 630.0, 633.0, 579.0, 582.0, 564.0, 633.0, 636.0, 582.0, 639.0, 582.0, 621.0, 636.0, 630.0, 633.0, 639.0, 579.0, 639.0, 639.0, 639.0, 639.0, 579.0, 639.0, 579.0, 636.0, 584.0, 633.0, 633.0, 630.0, 633.0, 633.0, 633.0, 630.0, 633.0, 636.0, 567.0, 627.0, 582.0, 630.0, 630.0, 582.0, 633.0, 636.0, 579.0, 636.0, 636.0, 630.0, 630.0, 630.0, 582.0, 630.0, 633.0, 630.0, 578.0, 587.0, 530.0, 587.0, 630.0, 630.0, 522.0, 582.0, 630.0, 633.0, 587.0, 630.0, 582.0, 582.0, 633.0, 627.0, 576.0, 587.0, 624.0, 636.0, 633.0, 633.0, 636.0, 630.0, 630.0, 633.0, 630.0, 636.0, 584.0, 570.0, 630.0, 639.0, 630.0, 630.0, 630.0, 639.0, 582.0, 633.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 316.0, 306.0, 324.0, 314.0, 325.0, 293.0, 291.0, 253.0, 254.0, 291.0, 282.0, 314.0, 316.0, 288.0, 294.0, 319.0, 311.0, 324.0, 309.0, 287.0, 292.0, 293.0, 289.0, 274.0, 290.0, 322.0, 311.0, 316.0, 320.0, 288.0, 294.0, 316.0, 323.0, 293.0, 289.0, 308.0, 313.0, 319.0, 317.0, 311.0, 319.0, 316.0, 317.0, 317.0, 322.0, 293.0, 286.0, 319.0, 320.0, 317.0, 322.0, 319.0, 320.0, 319.0, 320.0, 290.0, 289.0, 317.0, 322.0, 279.0, 300.0, 322.0, 314.0, 294.0, 290.0, 321.0, 312.0, 319.0, 314.0, 311.0, 319.0, 309.0, 324.0, 311.0, 322.0, 319.0, 314.0, 308.0, 322.0, 316.0, 317.0, 319.0, 317.0, 282.0, 285.0, 329.0, 298.0, 291.0, 291.0, 308.0, 322.0, 308.0, 322.0, 291.0, 291.0, 309.0, 324.0, 319.0, 317.0, 290.0, 289.0, 319.0, 317.0, 316.0, 320.0, 326.0, 304.0, 316.0, 314.0, 314.0, 316.0, 290.0, 292.0, 314.0, 316.0, 319.0, 314.0, 318.0, 312.0, 293.0, 285.0, 296.0, 291.0, 271.0, 259.0, 293.0, 294.0, 316.0, 314.0, 313.0, 317.0, 265.0, 257.0, 293.0, 289.0, 316.0, 314.0, 319.0, 314.0, 291.0, 296.0, 313.0, 317.0, 291.0, 291.0, 285.0, 297.0, 319.0, 314.0, 314.0, 313.0, 291.0, 285.0, 296.0, 291.0, 304.0, 320.0, 314.0, 322.0, 316.0, 317.0, 318.0, 315.0, 319.0, 317.0, 311.0, 319.0, 311.0, 319.0, 317.0, 316.0, 311.0, 319.0, 317.0, 319.0, 299.0, 285.0, 285.0, 285.0, 308.0, 322.0, 319.0, 320.0, 316.0, 314.0, 316.0, 314.0, 308.0, 322.0, 319.0, 320.0, 288.0, 294.0, 317.0, 316.0, 318.0, 318.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8432201147992054, "mean_processing_ms": 0.23702861990301977, "mean_inference_ms": 1.4411244641965177}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9216000, "num_steps_sampled": 4915200, "sample_time_ms": 21556.377, "load_time_ms": 37.038, "grad_time_ms": 9524.16, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002193765016272664, "policy_loss": -0.0053141750395298, "vf_loss": 80.70391082763672, "vf_explained_var": 0.7705557942390442, "kl": 0.0018228074768558145, "entropy": 1.124890685081482, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4915200, "episodes_total": 12288, "training_iteration": 384, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-03-43", "timestamp": 1660259023, "time_this_iter_s": 33.80204796791077, "time_total_s": 17433.720208644867, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17433.720208644867, "timesteps_since_restore": 4915200, "iterations_since_restore": 384, "perf": {"cpu_util_percent": 36.197872340425526, "ram_util_percent": 59.73617021276596}}
-{"episode_reward_max": 639.0, "episode_reward_min": 507.0, "episode_reward_mean": 611.52, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 305.76}, "custom_metrics": {"sparse_reward_mean": 212.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.52, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.93, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.3, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.54, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.88, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.34, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.77, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.19, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.62, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.34, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.77, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.34, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.77, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 630.0, 582.0, 633.0, 639.0, 630.0, 633.0, 630.0, 587.0, 567.0, 633.0, 582.0, 639.0, 582.0, 630.0, 582.0, 582.0, 630.0, 582.0, 582.0, 630.0, 630.0, 576.0, 636.0, 573.0, 573.0, 633.0, 582.0, 587.0, 636.0, 636.0, 579.0, 630.0, 630.0, 522.0, 582.0, 630.0, 633.0, 587.0, 630.0, 582.0, 582.0, 633.0, 627.0, 576.0, 587.0, 624.0, 636.0, 633.0, 633.0, 636.0, 630.0, 630.0, 633.0, 630.0, 636.0, 584.0, 570.0, 630.0, 639.0, 630.0, 630.0, 630.0, 639.0, 582.0, 633.0, 636.0, 633.0, 633.0, 630.0, 639.0, 584.0, 507.0, 573.0, 630.0, 582.0, 630.0, 633.0, 579.0, 582.0, 564.0, 633.0, 636.0, 582.0, 639.0, 582.0, 621.0, 636.0, 630.0, 633.0, 639.0, 579.0, 639.0, 639.0, 639.0, 639.0, 579.0, 639.0, 579.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 288.0, 316.0, 314.0, 283.0, 299.0, 316.0, 317.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 316.0, 314.0, 293.0, 294.0, 282.0, 285.0, 316.0, 317.0, 288.0, 294.0, 319.0, 320.0, 288.0, 294.0, 317.0, 313.0, 290.0, 292.0, 288.0, 294.0, 308.0, 322.0, 292.0, 290.0, 297.0, 285.0, 313.0, 317.0, 316.0, 314.0, 285.0, 291.0, 322.0, 314.0, 288.0, 285.0, 284.0, 289.0, 319.0, 314.0, 282.0, 300.0, 283.0, 304.0, 319.0, 317.0, 314.0, 322.0, 288.0, 291.0, 316.0, 314.0, 313.0, 317.0, 265.0, 257.0, 293.0, 289.0, 316.0, 314.0, 319.0, 314.0, 291.0, 296.0, 313.0, 317.0, 291.0, 291.0, 285.0, 297.0, 319.0, 314.0, 314.0, 313.0, 291.0, 285.0, 296.0, 291.0, 304.0, 320.0, 314.0, 322.0, 316.0, 317.0, 318.0, 315.0, 319.0, 317.0, 311.0, 319.0, 311.0, 319.0, 317.0, 316.0, 311.0, 319.0, 317.0, 319.0, 299.0, 285.0, 285.0, 285.0, 308.0, 322.0, 319.0, 320.0, 316.0, 314.0, 316.0, 314.0, 308.0, 322.0, 319.0, 320.0, 288.0, 294.0, 317.0, 316.0, 318.0, 318.0, 319.0, 314.0, 317.0, 316.0, 306.0, 324.0, 314.0, 325.0, 293.0, 291.0, 253.0, 254.0, 291.0, 282.0, 314.0, 316.0, 288.0, 294.0, 319.0, 311.0, 324.0, 309.0, 287.0, 292.0, 293.0, 289.0, 274.0, 290.0, 322.0, 311.0, 316.0, 320.0, 288.0, 294.0, 316.0, 323.0, 293.0, 289.0, 308.0, 313.0, 319.0, 317.0, 311.0, 319.0, 316.0, 317.0, 317.0, 322.0, 293.0, 286.0, 319.0, 320.0, 317.0, 322.0, 319.0, 320.0, 319.0, 320.0, 290.0, 289.0, 317.0, 322.0, 279.0, 300.0, 322.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8422101834912255, "mean_processing_ms": 0.23682867408475425, "mean_inference_ms": 1.4402525240623634}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9240000, "num_steps_sampled": 4928000, "sample_time_ms": 21695.699, "load_time_ms": 36.564, "grad_time_ms": 9521.463, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004186244681477547, "policy_loss": -0.0031982522923499346, "vf_loss": 79.4544906616211, "vf_explained_var": 0.7711065411567688, "kl": 0.002083372324705124, "entropy": 1.1219121217727661, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4928000, "episodes_total": 12320, "training_iteration": 385, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-04-16", "timestamp": 1660259056, "time_this_iter_s": 33.08577585220337, "time_total_s": 17466.80598449707, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17466.80598449707, "timesteps_since_restore": 4928000, "iterations_since_restore": 385, "perf": {"cpu_util_percent": 33.6468085106383, "ram_util_percent": 59.0808510638298}}
-{"episode_reward_max": 639.0, "episode_reward_min": 507.0, "episode_reward_mean": 609.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.845}, "custom_metrics": {"sparse_reward_mean": 211.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.29, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.92, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.56, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.76, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.41, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.94, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.2, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.41, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.41, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 590.0, 630.0, 636.0, 525.0, 530.0, 582.0, 587.0, 633.0, 627.0, 639.0, 624.0, 582.0, 630.0, 639.0, 630.0, 582.0, 636.0, 582.0, 522.0, 636.0, 633.0, 630.0, 627.0, 519.0, 639.0, 627.0, 636.0, 633.0, 636.0, 636.0, 627.0, 582.0, 633.0, 636.0, 633.0, 633.0, 630.0, 639.0, 584.0, 507.0, 573.0, 630.0, 582.0, 630.0, 633.0, 579.0, 582.0, 564.0, 633.0, 636.0, 582.0, 639.0, 582.0, 621.0, 636.0, 630.0, 633.0, 639.0, 579.0, 639.0, 639.0, 639.0, 639.0, 579.0, 639.0, 579.0, 636.0, 573.0, 630.0, 582.0, 633.0, 639.0, 630.0, 633.0, 630.0, 587.0, 567.0, 633.0, 582.0, 639.0, 582.0, 630.0, 582.0, 582.0, 630.0, 582.0, 582.0, 630.0, 630.0, 576.0, 636.0, 573.0, 573.0, 633.0, 582.0, 587.0, 636.0, 636.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 296.0, 294.0, 314.0, 316.0, 322.0, 314.0, 262.0, 263.0, 281.0, 249.0, 288.0, 294.0, 294.0, 293.0, 316.0, 317.0, 316.0, 311.0, 319.0, 320.0, 308.0, 316.0, 291.0, 291.0, 308.0, 322.0, 319.0, 320.0, 322.0, 308.0, 288.0, 294.0, 317.0, 319.0, 296.0, 286.0, 249.0, 273.0, 314.0, 322.0, 316.0, 317.0, 308.0, 322.0, 311.0, 316.0, 268.0, 251.0, 322.0, 317.0, 316.0, 311.0, 317.0, 319.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 316.0, 311.0, 288.0, 294.0, 317.0, 316.0, 318.0, 318.0, 319.0, 314.0, 317.0, 316.0, 306.0, 324.0, 314.0, 325.0, 293.0, 291.0, 253.0, 254.0, 291.0, 282.0, 314.0, 316.0, 288.0, 294.0, 319.0, 311.0, 324.0, 309.0, 287.0, 292.0, 293.0, 289.0, 274.0, 290.0, 322.0, 311.0, 316.0, 320.0, 288.0, 294.0, 316.0, 323.0, 293.0, 289.0, 308.0, 313.0, 319.0, 317.0, 311.0, 319.0, 316.0, 317.0, 317.0, 322.0, 293.0, 286.0, 319.0, 320.0, 317.0, 322.0, 319.0, 320.0, 319.0, 320.0, 290.0, 289.0, 317.0, 322.0, 279.0, 300.0, 322.0, 314.0, 285.0, 288.0, 316.0, 314.0, 283.0, 299.0, 316.0, 317.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 316.0, 314.0, 293.0, 294.0, 282.0, 285.0, 316.0, 317.0, 288.0, 294.0, 319.0, 320.0, 288.0, 294.0, 317.0, 313.0, 290.0, 292.0, 288.0, 294.0, 308.0, 322.0, 292.0, 290.0, 297.0, 285.0, 313.0, 317.0, 316.0, 314.0, 285.0, 291.0, 322.0, 314.0, 288.0, 285.0, 284.0, 289.0, 319.0, 314.0, 282.0, 300.0, 283.0, 304.0, 319.0, 317.0, 314.0, 322.0, 288.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8412065546272243, "mean_processing_ms": 0.23663028686685655, "mean_inference_ms": 1.4395227599407487}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9264000, "num_steps_sampled": 4940800, "sample_time_ms": 21927.826, "load_time_ms": 36.438, "grad_time_ms": 9711.085, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002509244019165635, "policy_loss": -0.005174743477255106, "vf_loss": 82.38806915283203, "vf_explained_var": 0.7595655918121338, "kl": 0.0020332231651991606, "entropy": 1.1096264123916626, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4940800, "episodes_total": 12352, "training_iteration": 386, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-04-49", "timestamp": 1660259089, "time_this_iter_s": 33.33205199241638, "time_total_s": 17500.138036489487, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17500.138036489487, "timesteps_since_restore": 4940800, "iterations_since_restore": 386, "perf": {"cpu_util_percent": 33.47234042553192, "ram_util_percent": 59.114893617021295}}
-{"episode_reward_max": 639.0, "episode_reward_min": 365.0, "episode_reward_mean": 607.03, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 303.515}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 186.63, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.05, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.03, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.71, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.39, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.57, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.14, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.39, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.57, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.39, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.57, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 639.0, 633.0, 636.0, 639.0, 558.0, 582.0, 633.0, 633.0, 630.0, 582.0, 627.0, 630.0, 636.0, 506.0, 630.0, 639.0, 630.0, 582.0, 630.0, 590.0, 584.0, 584.0, 365.0, 639.0, 582.0, 639.0, 587.0, 633.0, 633.0, 627.0, 579.0, 639.0, 579.0, 636.0, 573.0, 630.0, 582.0, 633.0, 639.0, 630.0, 633.0, 630.0, 587.0, 567.0, 633.0, 582.0, 639.0, 582.0, 630.0, 582.0, 582.0, 630.0, 582.0, 582.0, 630.0, 630.0, 576.0, 636.0, 573.0, 573.0, 633.0, 582.0, 587.0, 636.0, 636.0, 579.0, 636.0, 590.0, 630.0, 636.0, 525.0, 530.0, 582.0, 587.0, 633.0, 627.0, 639.0, 624.0, 582.0, 630.0, 639.0, 630.0, 582.0, 636.0, 582.0, 522.0, 636.0, 633.0, 630.0, 627.0, 519.0, 639.0, 627.0, 636.0, 633.0, 636.0, 636.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 285.0, 297.0, 315.0, 324.0, 313.0, 320.0, 319.0, 317.0, 322.0, 317.0, 282.0, 276.0, 296.0, 286.0, 319.0, 314.0, 317.0, 316.0, 302.0, 328.0, 291.0, 291.0, 312.0, 315.0, 303.0, 327.0, 314.0, 322.0, 249.0, 257.0, 316.0, 314.0, 317.0, 322.0, 316.0, 314.0, 287.0, 295.0, 319.0, 311.0, 299.0, 291.0, 293.0, 291.0, 296.0, 288.0, 183.0, 182.0, 320.0, 319.0, 288.0, 294.0, 319.0, 320.0, 296.0, 291.0, 316.0, 317.0, 317.0, 316.0, 308.0, 319.0, 290.0, 289.0, 317.0, 322.0, 279.0, 300.0, 322.0, 314.0, 285.0, 288.0, 316.0, 314.0, 283.0, 299.0, 316.0, 317.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 316.0, 314.0, 293.0, 294.0, 282.0, 285.0, 316.0, 317.0, 288.0, 294.0, 319.0, 320.0, 288.0, 294.0, 317.0, 313.0, 290.0, 292.0, 288.0, 294.0, 308.0, 322.0, 292.0, 290.0, 297.0, 285.0, 313.0, 317.0, 316.0, 314.0, 285.0, 291.0, 322.0, 314.0, 288.0, 285.0, 284.0, 289.0, 319.0, 314.0, 282.0, 300.0, 283.0, 304.0, 319.0, 317.0, 314.0, 322.0, 288.0, 291.0, 319.0, 317.0, 296.0, 294.0, 314.0, 316.0, 322.0, 314.0, 262.0, 263.0, 281.0, 249.0, 288.0, 294.0, 294.0, 293.0, 316.0, 317.0, 316.0, 311.0, 319.0, 320.0, 308.0, 316.0, 291.0, 291.0, 308.0, 322.0, 319.0, 320.0, 322.0, 308.0, 288.0, 294.0, 317.0, 319.0, 296.0, 286.0, 249.0, 273.0, 314.0, 322.0, 316.0, 317.0, 308.0, 322.0, 311.0, 316.0, 268.0, 251.0, 322.0, 317.0, 316.0, 311.0, 317.0, 319.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 316.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 0.840202535823751, "mean_processing_ms": 0.2364293047857562, "mean_inference_ms": 1.4386641797535789}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9288000, "num_steps_sampled": 4953600, "sample_time_ms": 21835.055, "load_time_ms": 36.557, "grad_time_ms": 9742.403, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00015397991228383034, "policy_loss": -0.007473704870790243, "vf_loss": 81.87383270263672, "vf_explained_var": 0.7745316028594971, "kl": 0.0020445811096578836, "entropy": 1.1193923950195312, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4953600, "episodes_total": 12384, "training_iteration": 387, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-05-19", "timestamp": 1660259119, "time_this_iter_s": 30.124536752700806, "time_total_s": 17530.262573242188, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17530.262573242188, "timesteps_since_restore": 4953600, "iterations_since_restore": 387, "perf": {"cpu_util_percent": 31.948837209302326, "ram_util_percent": 58.95581395348838}}
-{"episode_reward_max": 639.0, "episode_reward_min": 365.0, "episode_reward_mean": 611.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 305.82}, "custom_metrics": {"sparse_reward_mean": 212.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.64, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.96, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.89, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.31, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.55, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.51, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.55, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.51, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.55, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.51, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 639.0, 639.0, 627.0, 627.0, 633.0, 627.0, 639.0, 636.0, 636.0, 584.0, 573.0, 636.0, 630.0, 573.0, 630.0, 633.0, 630.0, 627.0, 636.0, 630.0, 636.0, 630.0, 630.0, 627.0, 636.0, 630.0, 630.0, 587.0, 579.0, 570.0, 633.0, 587.0, 636.0, 636.0, 579.0, 636.0, 590.0, 630.0, 636.0, 525.0, 530.0, 582.0, 587.0, 633.0, 627.0, 639.0, 624.0, 582.0, 630.0, 639.0, 630.0, 582.0, 636.0, 582.0, 522.0, 636.0, 633.0, 630.0, 627.0, 519.0, 639.0, 627.0, 636.0, 633.0, 636.0, 636.0, 627.0, 630.0, 582.0, 639.0, 633.0, 636.0, 639.0, 558.0, 582.0, 633.0, 633.0, 630.0, 582.0, 627.0, 630.0, 636.0, 506.0, 630.0, 639.0, 630.0, 582.0, 630.0, 590.0, 584.0, 584.0, 365.0, 639.0, 582.0, 639.0, 587.0, 633.0, 633.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 317.0, 322.0, 317.0, 322.0, 311.0, 316.0, 308.0, 319.0, 319.0, 314.0, 314.0, 313.0, 316.0, 323.0, 314.0, 322.0, 319.0, 317.0, 291.0, 293.0, 298.0, 275.0, 324.0, 312.0, 323.0, 307.0, 281.0, 292.0, 308.0, 322.0, 317.0, 316.0, 311.0, 319.0, 313.0, 314.0, 319.0, 317.0, 314.0, 316.0, 319.0, 317.0, 316.0, 314.0, 316.0, 314.0, 316.0, 311.0, 314.0, 322.0, 314.0, 316.0, 316.0, 314.0, 293.0, 294.0, 288.0, 291.0, 295.0, 275.0, 319.0, 314.0, 283.0, 304.0, 319.0, 317.0, 314.0, 322.0, 288.0, 291.0, 319.0, 317.0, 296.0, 294.0, 314.0, 316.0, 322.0, 314.0, 262.0, 263.0, 281.0, 249.0, 288.0, 294.0, 294.0, 293.0, 316.0, 317.0, 316.0, 311.0, 319.0, 320.0, 308.0, 316.0, 291.0, 291.0, 308.0, 322.0, 319.0, 320.0, 322.0, 308.0, 288.0, 294.0, 317.0, 319.0, 296.0, 286.0, 249.0, 273.0, 314.0, 322.0, 316.0, 317.0, 308.0, 322.0, 311.0, 316.0, 268.0, 251.0, 322.0, 317.0, 316.0, 311.0, 317.0, 319.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 316.0, 311.0, 316.0, 314.0, 285.0, 297.0, 315.0, 324.0, 313.0, 320.0, 319.0, 317.0, 322.0, 317.0, 282.0, 276.0, 296.0, 286.0, 319.0, 314.0, 317.0, 316.0, 302.0, 328.0, 291.0, 291.0, 312.0, 315.0, 303.0, 327.0, 314.0, 322.0, 249.0, 257.0, 316.0, 314.0, 317.0, 322.0, 316.0, 314.0, 287.0, 295.0, 319.0, 311.0, 299.0, 291.0, 293.0, 291.0, 296.0, 288.0, 183.0, 182.0, 320.0, 319.0, 288.0, 294.0, 319.0, 320.0, 296.0, 291.0, 316.0, 317.0, 317.0, 316.0, 308.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8391970766904484, "mean_processing_ms": 0.23622675772030782, "mean_inference_ms": 1.4376630323224628}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9312000, "num_steps_sampled": 4966400, "sample_time_ms": 21599.042, "load_time_ms": 36.591, "grad_time_ms": 9790.428, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0022788590285927057, "policy_loss": -0.00476012472063303, "vf_loss": 75.92620849609375, "vf_explained_var": 0.7665655016899109, "kl": 0.0023904216941446066, "entropy": 1.1072710752487183, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4966400, "episodes_total": 12416, "training_iteration": 388, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-05-50", "timestamp": 1660259150, "time_this_iter_s": 30.593504667282104, "time_total_s": 17560.85607790947, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17560.85607790947, "timesteps_since_restore": 4966400, "iterations_since_restore": 388, "perf": {"cpu_util_percent": 33.502325581395354, "ram_util_percent": 59.08372093023256}}
-{"episode_reward_max": 639.0, "episode_reward_min": 365.0, "episode_reward_mean": 614.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 307.46}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.12, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.31, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 17.01, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.31, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.61, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.12, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.38, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.21, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.61, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.61, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 639.0, 579.0, 582.0, 633.0, 579.0, 633.0, 636.0, 630.0, 633.0, 609.0, 582.0, 636.0, 630.0, 636.0, 576.0, 636.0, 633.0, 630.0, 636.0, 582.0, 582.0, 636.0, 630.0, 633.0, 630.0, 636.0, 579.0, 633.0, 570.0, 633.0, 633.0, 636.0, 636.0, 627.0, 630.0, 582.0, 639.0, 633.0, 636.0, 639.0, 558.0, 582.0, 633.0, 633.0, 630.0, 582.0, 627.0, 630.0, 636.0, 506.0, 630.0, 639.0, 630.0, 582.0, 630.0, 590.0, 584.0, 584.0, 365.0, 639.0, 582.0, 639.0, 587.0, 633.0, 633.0, 627.0, 582.0, 639.0, 639.0, 627.0, 627.0, 633.0, 627.0, 639.0, 636.0, 636.0, 584.0, 573.0, 636.0, 630.0, 573.0, 630.0, 633.0, 630.0, 627.0, 636.0, 630.0, 636.0, 630.0, 630.0, 627.0, 636.0, 630.0, 630.0, 587.0, 579.0, 570.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 322.0, 319.0, 311.0, 317.0, 322.0, 282.0, 297.0, 294.0, 288.0, 316.0, 317.0, 288.0, 291.0, 311.0, 322.0, 314.0, 322.0, 313.0, 317.0, 317.0, 316.0, 310.0, 299.0, 293.0, 289.0, 317.0, 319.0, 322.0, 308.0, 319.0, 317.0, 288.0, 288.0, 314.0, 322.0, 316.0, 317.0, 314.0, 316.0, 322.0, 314.0, 285.0, 297.0, 294.0, 288.0, 314.0, 322.0, 321.0, 309.0, 316.0, 317.0, 311.0, 319.0, 311.0, 325.0, 282.0, 297.0, 319.0, 314.0, 287.0, 283.0, 319.0, 314.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 316.0, 311.0, 316.0, 314.0, 285.0, 297.0, 315.0, 324.0, 313.0, 320.0, 319.0, 317.0, 322.0, 317.0, 282.0, 276.0, 296.0, 286.0, 319.0, 314.0, 317.0, 316.0, 302.0, 328.0, 291.0, 291.0, 312.0, 315.0, 303.0, 327.0, 314.0, 322.0, 249.0, 257.0, 316.0, 314.0, 317.0, 322.0, 316.0, 314.0, 287.0, 295.0, 319.0, 311.0, 299.0, 291.0, 293.0, 291.0, 296.0, 288.0, 183.0, 182.0, 320.0, 319.0, 288.0, 294.0, 319.0, 320.0, 296.0, 291.0, 316.0, 317.0, 317.0, 316.0, 308.0, 319.0, 291.0, 291.0, 317.0, 322.0, 317.0, 322.0, 311.0, 316.0, 308.0, 319.0, 319.0, 314.0, 314.0, 313.0, 316.0, 323.0, 314.0, 322.0, 319.0, 317.0, 291.0, 293.0, 298.0, 275.0, 324.0, 312.0, 323.0, 307.0, 281.0, 292.0, 308.0, 322.0, 317.0, 316.0, 311.0, 319.0, 313.0, 314.0, 319.0, 317.0, 314.0, 316.0, 319.0, 317.0, 316.0, 314.0, 316.0, 314.0, 316.0, 311.0, 314.0, 322.0, 314.0, 316.0, 316.0, 314.0, 293.0, 294.0, 288.0, 291.0, 295.0, 275.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.838196919665678, "mean_processing_ms": 0.23602508803676062, "mean_inference_ms": 1.4365150313753652}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9336000, "num_steps_sampled": 4979200, "sample_time_ms": 21400.03, "load_time_ms": 36.799, "grad_time_ms": 9763.193, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015943764010444283, "policy_loss": -0.005665285978466272, "vf_loss": 78.14656829833984, "vf_explained_var": 0.7691711783409119, "kl": 0.0016816608840599656, "entropy": 1.1099879741668701, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4979200, "episodes_total": 12448, "training_iteration": 389, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-06-20", "timestamp": 1660259180, "time_this_iter_s": 29.990082025527954, "time_total_s": 17590.846159934998, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17590.846159934998, "timesteps_since_restore": 4979200, "iterations_since_restore": 389, "perf": {"cpu_util_percent": 36.06279069767442, "ram_util_percent": 59.181395348837206}}
-{"episode_reward_max": 639.0, "episode_reward_min": 567.0, "episode_reward_mean": 617.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 275.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 308.805}, "custom_metrics": {"sparse_reward_mean": 214.4, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 188.81, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.3, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.97, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.76, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.68, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.24, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.22, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.68, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.68, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 633.0, 633.0, 582.0, 579.0, 582.0, 633.0, 579.0, 633.0, 633.0, 576.0, 567.0, 636.0, 576.0, 636.0, 633.0, 639.0, 636.0, 579.0, 636.0, 627.0, 636.0, 636.0, 630.0, 630.0, 633.0, 633.0, 633.0, 579.0, 639.0, 633.0, 579.0, 587.0, 633.0, 633.0, 627.0, 582.0, 639.0, 639.0, 627.0, 627.0, 633.0, 627.0, 639.0, 636.0, 636.0, 584.0, 573.0, 636.0, 630.0, 573.0, 630.0, 633.0, 630.0, 627.0, 636.0, 630.0, 636.0, 630.0, 630.0, 627.0, 636.0, 630.0, 630.0, 587.0, 579.0, 570.0, 633.0, 633.0, 630.0, 639.0, 579.0, 582.0, 633.0, 579.0, 633.0, 636.0, 630.0, 633.0, 609.0, 582.0, 636.0, 630.0, 636.0, 576.0, 636.0, 633.0, 630.0, 636.0, 582.0, 582.0, 636.0, 630.0, 633.0, 630.0, 636.0, 579.0, 633.0, 570.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 319.0, 314.0, 311.0, 322.0, 285.0, 297.0, 287.0, 292.0, 291.0, 291.0, 316.0, 317.0, 288.0, 291.0, 310.0, 323.0, 317.0, 316.0, 298.0, 278.0, 282.0, 285.0, 314.0, 322.0, 292.0, 284.0, 316.0, 320.0, 318.0, 315.0, 317.0, 322.0, 319.0, 317.0, 288.0, 291.0, 321.0, 315.0, 308.0, 319.0, 314.0, 322.0, 314.0, 322.0, 319.0, 311.0, 312.0, 318.0, 316.0, 317.0, 316.0, 317.0, 319.0, 314.0, 285.0, 294.0, 317.0, 322.0, 311.0, 322.0, 288.0, 291.0, 296.0, 291.0, 316.0, 317.0, 317.0, 316.0, 308.0, 319.0, 291.0, 291.0, 317.0, 322.0, 317.0, 322.0, 311.0, 316.0, 308.0, 319.0, 319.0, 314.0, 314.0, 313.0, 316.0, 323.0, 314.0, 322.0, 319.0, 317.0, 291.0, 293.0, 298.0, 275.0, 324.0, 312.0, 323.0, 307.0, 281.0, 292.0, 308.0, 322.0, 317.0, 316.0, 311.0, 319.0, 313.0, 314.0, 319.0, 317.0, 314.0, 316.0, 319.0, 317.0, 316.0, 314.0, 316.0, 314.0, 316.0, 311.0, 314.0, 322.0, 314.0, 316.0, 316.0, 314.0, 293.0, 294.0, 288.0, 291.0, 295.0, 275.0, 319.0, 314.0, 311.0, 322.0, 319.0, 311.0, 317.0, 322.0, 282.0, 297.0, 294.0, 288.0, 316.0, 317.0, 288.0, 291.0, 311.0, 322.0, 314.0, 322.0, 313.0, 317.0, 317.0, 316.0, 310.0, 299.0, 293.0, 289.0, 317.0, 319.0, 322.0, 308.0, 319.0, 317.0, 288.0, 288.0, 314.0, 322.0, 316.0, 317.0, 314.0, 316.0, 322.0, 314.0, 285.0, 297.0, 294.0, 288.0, 314.0, 322.0, 321.0, 309.0, 316.0, 317.0, 311.0, 319.0, 311.0, 325.0, 282.0, 297.0, 319.0, 314.0, 287.0, 283.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8371997037946857, "mean_processing_ms": 0.23582575562090372, "mean_inference_ms": 1.4353288532965658}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9360000, "num_steps_sampled": 4992000, "sample_time_ms": 21348.738, "load_time_ms": 36.486, "grad_time_ms": 9719.814, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004423701611813158, "policy_loss": -0.007186357397586107, "vf_loss": 81.85875701904297, "vf_explained_var": 0.7682639956474304, "kl": 0.002242224058136344, "entropy": 1.114312767982483, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4992000, "episodes_total": 12480, "training_iteration": 390, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-06-49", "timestamp": 1660259209, "time_this_iter_s": 29.65726089477539, "time_total_s": 17620.503420829773, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17620.503420829773, "timesteps_since_restore": 4992000, "iterations_since_restore": 390, "perf": {"cpu_util_percent": 34.275609756097566, "ram_util_percent": 59.390243902439025}}
-{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 614.77, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 307.385}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 188.37, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.97, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.52, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.03, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.45, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.02, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.37, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.45, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.02, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.45, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.02, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [593.0, 636.0, 633.0, 630.0, 630.0, 573.0, 630.0, 630.0, 630.0, 516.0, 636.0, 630.0, 633.0, 639.0, 630.0, 582.0, 633.0, 636.0, 636.0, 639.0, 639.0, 636.0, 567.0, 636.0, 636.0, 582.0, 636.0, 639.0, 636.0, 408.0, 636.0, 636.0, 587.0, 579.0, 570.0, 633.0, 633.0, 630.0, 639.0, 579.0, 582.0, 633.0, 579.0, 633.0, 636.0, 630.0, 633.0, 609.0, 582.0, 636.0, 630.0, 636.0, 576.0, 636.0, 633.0, 630.0, 636.0, 582.0, 582.0, 636.0, 630.0, 633.0, 630.0, 636.0, 579.0, 633.0, 570.0, 633.0, 582.0, 633.0, 633.0, 582.0, 579.0, 582.0, 633.0, 579.0, 633.0, 633.0, 576.0, 567.0, 636.0, 576.0, 636.0, 633.0, 639.0, 636.0, 579.0, 636.0, 627.0, 636.0, 636.0, 630.0, 630.0, 633.0, 633.0, 633.0, 579.0, 639.0, 633.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 297.0, 322.0, 314.0, 316.0, 317.0, 316.0, 314.0, 312.0, 318.0, 287.0, 286.0, 313.0, 317.0, 318.0, 312.0, 308.0, 322.0, 254.0, 262.0, 324.0, 312.0, 310.0, 320.0, 313.0, 320.0, 319.0, 320.0, 321.0, 309.0, 291.0, 291.0, 316.0, 317.0, 319.0, 317.0, 314.0, 322.0, 320.0, 319.0, 319.0, 320.0, 319.0, 317.0, 274.0, 293.0, 309.0, 327.0, 319.0, 317.0, 288.0, 294.0, 322.0, 314.0, 319.0, 320.0, 319.0, 317.0, 198.0, 210.0, 314.0, 322.0, 313.0, 323.0, 293.0, 294.0, 288.0, 291.0, 295.0, 275.0, 319.0, 314.0, 311.0, 322.0, 319.0, 311.0, 317.0, 322.0, 282.0, 297.0, 294.0, 288.0, 316.0, 317.0, 288.0, 291.0, 311.0, 322.0, 314.0, 322.0, 313.0, 317.0, 317.0, 316.0, 310.0, 299.0, 293.0, 289.0, 317.0, 319.0, 322.0, 308.0, 319.0, 317.0, 288.0, 288.0, 314.0, 322.0, 316.0, 317.0, 314.0, 316.0, 322.0, 314.0, 285.0, 297.0, 294.0, 288.0, 314.0, 322.0, 321.0, 309.0, 316.0, 317.0, 311.0, 319.0, 311.0, 325.0, 282.0, 297.0, 319.0, 314.0, 287.0, 283.0, 319.0, 314.0, 296.0, 286.0, 319.0, 314.0, 311.0, 322.0, 285.0, 297.0, 287.0, 292.0, 291.0, 291.0, 316.0, 317.0, 288.0, 291.0, 310.0, 323.0, 317.0, 316.0, 298.0, 278.0, 282.0, 285.0, 314.0, 322.0, 292.0, 284.0, 316.0, 320.0, 318.0, 315.0, 317.0, 322.0, 319.0, 317.0, 288.0, 291.0, 321.0, 315.0, 308.0, 319.0, 314.0, 322.0, 314.0, 322.0, 319.0, 311.0, 312.0, 318.0, 316.0, 317.0, 316.0, 317.0, 319.0, 314.0, 285.0, 294.0, 317.0, 322.0, 311.0, 322.0, 288.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.83620884479323, "mean_processing_ms": 0.23562873648902904, "mean_inference_ms": 1.4341204983718234}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9384000, "num_steps_sampled": 5004800, "sample_time_ms": 21231.946, "load_time_ms": 36.725, "grad_time_ms": 9739.689, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004321941174566746, "policy_loss": -0.0030343374237418175, "vf_loss": 79.1146011352539, "vf_explained_var": 0.7782495021820068, "kl": 0.0022527193650603294, "entropy": 1.1103630065917969, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5004800, "episodes_total": 12512, "training_iteration": 391, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-07-19", "timestamp": 1660259239, "time_this_iter_s": 29.202332973480225, "time_total_s": 17649.705753803253, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17649.705753803253, "timesteps_since_restore": 5004800, "iterations_since_restore": 391, "perf": {"cpu_util_percent": 32.102380952380955, "ram_util_percent": 59.785714285714285}}
-{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 613.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.805}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 188.41, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.84, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.63, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.4, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.31, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.1, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.45, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.71, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.08, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.31, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.1, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.31, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.1, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 636.0, 639.0, 587.0, 636.0, 630.0, 633.0, 639.0, 639.0, 639.0, 587.0, 582.0, 525.0, 579.0, 636.0, 582.0, 636.0, 627.0, 630.0, 630.0, 587.0, 639.0, 633.0, 570.0, 630.0, 630.0, 639.0, 633.0, 627.0, 582.0, 576.0, 579.0, 633.0, 570.0, 633.0, 582.0, 633.0, 633.0, 582.0, 579.0, 582.0, 633.0, 579.0, 633.0, 633.0, 576.0, 567.0, 636.0, 576.0, 636.0, 633.0, 639.0, 636.0, 579.0, 636.0, 627.0, 636.0, 636.0, 630.0, 630.0, 633.0, 633.0, 633.0, 579.0, 639.0, 633.0, 579.0, 593.0, 636.0, 633.0, 630.0, 630.0, 573.0, 630.0, 630.0, 630.0, 516.0, 636.0, 630.0, 633.0, 639.0, 630.0, 582.0, 633.0, 636.0, 636.0, 639.0, 639.0, 636.0, 567.0, 636.0, 636.0, 582.0, 636.0, 639.0, 636.0, 408.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 290.0, 286.0, 322.0, 314.0, 322.0, 317.0, 299.0, 288.0, 314.0, 322.0, 313.0, 317.0, 321.0, 312.0, 314.0, 325.0, 319.0, 320.0, 317.0, 322.0, 293.0, 294.0, 288.0, 294.0, 270.0, 255.0, 289.0, 290.0, 322.0, 314.0, 286.0, 296.0, 319.0, 317.0, 313.0, 314.0, 316.0, 314.0, 316.0, 314.0, 290.0, 297.0, 319.0, 320.0, 319.0, 314.0, 279.0, 291.0, 315.0, 315.0, 316.0, 314.0, 317.0, 322.0, 317.0, 316.0, 300.0, 327.0, 301.0, 281.0, 285.0, 291.0, 282.0, 297.0, 319.0, 314.0, 287.0, 283.0, 319.0, 314.0, 296.0, 286.0, 319.0, 314.0, 311.0, 322.0, 285.0, 297.0, 287.0, 292.0, 291.0, 291.0, 316.0, 317.0, 288.0, 291.0, 310.0, 323.0, 317.0, 316.0, 298.0, 278.0, 282.0, 285.0, 314.0, 322.0, 292.0, 284.0, 316.0, 320.0, 318.0, 315.0, 317.0, 322.0, 319.0, 317.0, 288.0, 291.0, 321.0, 315.0, 308.0, 319.0, 314.0, 322.0, 314.0, 322.0, 319.0, 311.0, 312.0, 318.0, 316.0, 317.0, 316.0, 317.0, 319.0, 314.0, 285.0, 294.0, 317.0, 322.0, 311.0, 322.0, 288.0, 291.0, 296.0, 297.0, 322.0, 314.0, 316.0, 317.0, 316.0, 314.0, 312.0, 318.0, 287.0, 286.0, 313.0, 317.0, 318.0, 312.0, 308.0, 322.0, 254.0, 262.0, 324.0, 312.0, 310.0, 320.0, 313.0, 320.0, 319.0, 320.0, 321.0, 309.0, 291.0, 291.0, 316.0, 317.0, 319.0, 317.0, 314.0, 322.0, 320.0, 319.0, 319.0, 320.0, 319.0, 317.0, 274.0, 293.0, 309.0, 327.0, 319.0, 317.0, 288.0, 294.0, 322.0, 314.0, 319.0, 320.0, 319.0, 317.0, 198.0, 210.0, 314.0, 322.0, 313.0, 323.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8352162722416216, "mean_processing_ms": 0.2354315996686975, "mean_inference_ms": 1.4328297296809587}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9408000, "num_steps_sampled": 5017600, "sample_time_ms": 20999.824, "load_time_ms": 36.668, "grad_time_ms": 9737.715, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033923883456736803, "policy_loss": -0.004631926771253347, "vf_loss": 85.770751953125, "vf_explained_var": 0.7699734568595886, "kl": 0.0020241406746208668, "entropy": 1.1055186986923218, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5017600, "episodes_total": 12544, "training_iteration": 392, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-07-47", "timestamp": 1660259267, "time_this_iter_s": 28.112272024154663, "time_total_s": 17677.818025827408, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17677.818025827408, "timesteps_since_restore": 5017600, "iterations_since_restore": 392, "perf": {"cpu_util_percent": 32.03076923076923, "ram_util_percent": 59.29230769230767}}
-{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 615.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 307.81}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 189.22, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.94, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.53, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.04, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.42, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.01, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.47, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.19, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.77, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.14, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.42, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.01, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.42, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.01, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 639.0, 582.0, 630.0, 633.0, 630.0, 579.0, 633.0, 630.0, 633.0, 630.0, 633.0, 630.0, 636.0, 633.0, 582.0, 633.0, 636.0, 636.0, 633.0, 582.0, 579.0, 627.0, 633.0, 587.0, 587.0, 581.0, 639.0, 630.0, 636.0, 639.0, 579.0, 639.0, 633.0, 579.0, 593.0, 636.0, 633.0, 630.0, 630.0, 573.0, 630.0, 630.0, 630.0, 516.0, 636.0, 630.0, 633.0, 639.0, 630.0, 582.0, 633.0, 636.0, 636.0, 639.0, 639.0, 636.0, 567.0, 636.0, 636.0, 582.0, 636.0, 639.0, 636.0, 408.0, 636.0, 636.0, 579.0, 576.0, 636.0, 639.0, 587.0, 636.0, 630.0, 633.0, 639.0, 639.0, 639.0, 587.0, 582.0, 525.0, 579.0, 636.0, 582.0, 636.0, 627.0, 630.0, 630.0, 587.0, 639.0, 633.0, 570.0, 630.0, 630.0, 639.0, 633.0, 627.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 319.0, 314.0, 319.0, 320.0, 288.0, 294.0, 313.0, 317.0, 314.0, 319.0, 319.0, 311.0, 288.0, 291.0, 309.0, 324.0, 309.0, 321.0, 316.0, 317.0, 313.0, 317.0, 314.0, 319.0, 314.0, 316.0, 329.0, 307.0, 314.0, 319.0, 285.0, 297.0, 314.0, 319.0, 319.0, 317.0, 322.0, 314.0, 321.0, 312.0, 291.0, 291.0, 287.0, 292.0, 308.0, 319.0, 314.0, 319.0, 283.0, 304.0, 293.0, 294.0, 291.0, 290.0, 322.0, 317.0, 316.0, 314.0, 322.0, 314.0, 317.0, 322.0, 285.0, 294.0, 317.0, 322.0, 311.0, 322.0, 288.0, 291.0, 296.0, 297.0, 322.0, 314.0, 316.0, 317.0, 316.0, 314.0, 312.0, 318.0, 287.0, 286.0, 313.0, 317.0, 318.0, 312.0, 308.0, 322.0, 254.0, 262.0, 324.0, 312.0, 310.0, 320.0, 313.0, 320.0, 319.0, 320.0, 321.0, 309.0, 291.0, 291.0, 316.0, 317.0, 319.0, 317.0, 314.0, 322.0, 320.0, 319.0, 319.0, 320.0, 319.0, 317.0, 274.0, 293.0, 309.0, 327.0, 319.0, 317.0, 288.0, 294.0, 322.0, 314.0, 319.0, 320.0, 319.0, 317.0, 198.0, 210.0, 314.0, 322.0, 313.0, 323.0, 291.0, 288.0, 290.0, 286.0, 322.0, 314.0, 322.0, 317.0, 299.0, 288.0, 314.0, 322.0, 313.0, 317.0, 321.0, 312.0, 314.0, 325.0, 319.0, 320.0, 317.0, 322.0, 293.0, 294.0, 288.0, 294.0, 270.0, 255.0, 289.0, 290.0, 322.0, 314.0, 286.0, 296.0, 319.0, 317.0, 313.0, 314.0, 316.0, 314.0, 316.0, 314.0, 290.0, 297.0, 319.0, 320.0, 319.0, 314.0, 279.0, 291.0, 315.0, 315.0, 316.0, 314.0, 317.0, 322.0, 317.0, 316.0, 300.0, 327.0, 301.0, 281.0, 285.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8342365209622892, "mean_processing_ms": 0.23523671051615616, "mean_inference_ms": 1.4317637574949895}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9432000, "num_steps_sampled": 5030400, "sample_time_ms": 21299.857, "load_time_ms": 37.172, "grad_time_ms": 9869.636, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 4.354631528258324e-05, "policy_loss": -0.0072962199337780476, "vf_loss": 78.87313079833984, "vf_explained_var": 0.7646682262420654, "kl": 0.0020736621227115393, "entropy": 1.0950974225997925, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5030400, "episodes_total": 12576, "training_iteration": 393, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-08-21", "timestamp": 1660259301, "time_this_iter_s": 34.420122146606445, "time_total_s": 17712.238147974014, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17712.238147974014, "timesteps_since_restore": 5030400, "iterations_since_restore": 393, "perf": {"cpu_util_percent": 32.710204081632654, "ram_util_percent": 59.30816326530613}}
-{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 614.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 307.225}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 188.85, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.08, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.29, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.72, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.77, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.59, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.9, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.28, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.76, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.18, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.59, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.59, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 624.0, 576.0, 576.0, 590.0, 590.0, 630.0, 633.0, 582.0, 636.0, 639.0, 633.0, 639.0, 639.0, 630.0, 627.0, 636.0, 636.0, 636.0, 587.0, 581.0, 636.0, 621.0, 639.0, 630.0, 633.0, 621.0, 636.0, 573.0, 636.0, 633.0, 522.0, 636.0, 408.0, 636.0, 636.0, 579.0, 576.0, 636.0, 639.0, 587.0, 636.0, 630.0, 633.0, 639.0, 639.0, 639.0, 587.0, 582.0, 525.0, 579.0, 636.0, 582.0, 636.0, 627.0, 630.0, 630.0, 587.0, 639.0, 633.0, 570.0, 630.0, 630.0, 639.0, 633.0, 627.0, 582.0, 576.0, 633.0, 633.0, 639.0, 582.0, 630.0, 633.0, 630.0, 579.0, 633.0, 630.0, 633.0, 630.0, 633.0, 630.0, 636.0, 633.0, 582.0, 633.0, 636.0, 636.0, 633.0, 582.0, 579.0, 627.0, 633.0, 587.0, 587.0, 581.0, 639.0, 630.0, 636.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 312.0, 312.0, 290.0, 286.0, 287.0, 289.0, 297.0, 293.0, 294.0, 296.0, 316.0, 314.0, 319.0, 314.0, 286.0, 296.0, 311.0, 325.0, 319.0, 320.0, 314.0, 319.0, 320.0, 319.0, 322.0, 317.0, 316.0, 314.0, 308.0, 319.0, 322.0, 314.0, 314.0, 322.0, 309.0, 327.0, 296.0, 291.0, 290.0, 291.0, 316.0, 320.0, 309.0, 312.0, 317.0, 322.0, 316.0, 314.0, 316.0, 317.0, 313.0, 308.0, 319.0, 317.0, 280.0, 293.0, 317.0, 319.0, 314.0, 319.0, 262.0, 260.0, 319.0, 317.0, 198.0, 210.0, 314.0, 322.0, 313.0, 323.0, 291.0, 288.0, 290.0, 286.0, 322.0, 314.0, 322.0, 317.0, 299.0, 288.0, 314.0, 322.0, 313.0, 317.0, 321.0, 312.0, 314.0, 325.0, 319.0, 320.0, 317.0, 322.0, 293.0, 294.0, 288.0, 294.0, 270.0, 255.0, 289.0, 290.0, 322.0, 314.0, 286.0, 296.0, 319.0, 317.0, 313.0, 314.0, 316.0, 314.0, 316.0, 314.0, 290.0, 297.0, 319.0, 320.0, 319.0, 314.0, 279.0, 291.0, 315.0, 315.0, 316.0, 314.0, 317.0, 322.0, 317.0, 316.0, 300.0, 327.0, 301.0, 281.0, 285.0, 291.0, 319.0, 314.0, 319.0, 314.0, 319.0, 320.0, 288.0, 294.0, 313.0, 317.0, 314.0, 319.0, 319.0, 311.0, 288.0, 291.0, 309.0, 324.0, 309.0, 321.0, 316.0, 317.0, 313.0, 317.0, 314.0, 319.0, 314.0, 316.0, 329.0, 307.0, 314.0, 319.0, 285.0, 297.0, 314.0, 319.0, 319.0, 317.0, 322.0, 314.0, 321.0, 312.0, 291.0, 291.0, 287.0, 292.0, 308.0, 319.0, 314.0, 319.0, 283.0, 304.0, 293.0, 294.0, 291.0, 290.0, 322.0, 317.0, 316.0, 314.0, 322.0, 314.0, 317.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8332639627657553, "mean_processing_ms": 0.23504271675970742, "mean_inference_ms": 1.430763529891338}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9456000, "num_steps_sampled": 5043200, "sample_time_ms": 21101.644, "load_time_ms": 36.639, "grad_time_ms": 9913.059, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006912912358529866, "policy_loss": -0.006923032458871603, "vf_loss": 81.60092163085938, "vf_explained_var": 0.7633647918701172, "kl": 0.001780605292879045, "entropy": 1.0915051698684692, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5043200, "episodes_total": 12608, "training_iteration": 394, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-08-53", "timestamp": 1660259333, "time_this_iter_s": 32.24967384338379, "time_total_s": 17744.487821817398, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17744.487821817398, "timesteps_since_restore": 5043200, "iterations_since_restore": 394, "perf": {"cpu_util_percent": 33.69347826086956, "ram_util_percent": 59.52391304347825}}
-{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 617.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.91}, "custom_metrics": {"sparse_reward_mean": 214.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.82, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.15, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.88, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.74, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.69, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.9, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.33, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.74, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.32, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.74, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.69, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.74, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.69, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 633.0, 630.0, 576.0, 639.0, 639.0, 627.0, 636.0, 630.0, 636.0, 633.0, 587.0, 633.0, 579.0, 587.0, 630.0, 579.0, 633.0, 587.0, 636.0, 639.0, 627.0, 639.0, 630.0, 633.0, 639.0, 584.0, 590.0, 630.0, 636.0, 582.0, 633.0, 627.0, 582.0, 576.0, 633.0, 633.0, 639.0, 582.0, 630.0, 633.0, 630.0, 579.0, 633.0, 630.0, 633.0, 630.0, 633.0, 630.0, 636.0, 633.0, 582.0, 633.0, 636.0, 636.0, 633.0, 582.0, 579.0, 627.0, 633.0, 587.0, 587.0, 581.0, 639.0, 630.0, 636.0, 639.0, 579.0, 624.0, 576.0, 576.0, 590.0, 590.0, 630.0, 633.0, 582.0, 636.0, 639.0, 633.0, 639.0, 639.0, 630.0, 627.0, 636.0, 636.0, 636.0, 587.0, 581.0, 636.0, 621.0, 639.0, 630.0, 633.0, 621.0, 636.0, 573.0, 636.0, 633.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [324.0, 312.0, 316.0, 317.0, 317.0, 316.0, 316.0, 314.0, 285.0, 291.0, 322.0, 317.0, 319.0, 320.0, 316.0, 311.0, 317.0, 319.0, 308.0, 322.0, 317.0, 319.0, 314.0, 319.0, 296.0, 291.0, 317.0, 316.0, 281.0, 298.0, 293.0, 294.0, 311.0, 319.0, 290.0, 289.0, 316.0, 317.0, 299.0, 288.0, 319.0, 317.0, 319.0, 320.0, 308.0, 319.0, 319.0, 320.0, 311.0, 319.0, 314.0, 319.0, 322.0, 317.0, 293.0, 291.0, 294.0, 296.0, 316.0, 314.0, 319.0, 317.0, 299.0, 283.0, 317.0, 316.0, 300.0, 327.0, 301.0, 281.0, 285.0, 291.0, 319.0, 314.0, 319.0, 314.0, 319.0, 320.0, 288.0, 294.0, 313.0, 317.0, 314.0, 319.0, 319.0, 311.0, 288.0, 291.0, 309.0, 324.0, 309.0, 321.0, 316.0, 317.0, 313.0, 317.0, 314.0, 319.0, 314.0, 316.0, 329.0, 307.0, 314.0, 319.0, 285.0, 297.0, 314.0, 319.0, 319.0, 317.0, 322.0, 314.0, 321.0, 312.0, 291.0, 291.0, 287.0, 292.0, 308.0, 319.0, 314.0, 319.0, 283.0, 304.0, 293.0, 294.0, 291.0, 290.0, 322.0, 317.0, 316.0, 314.0, 322.0, 314.0, 317.0, 322.0, 288.0, 291.0, 312.0, 312.0, 290.0, 286.0, 287.0, 289.0, 297.0, 293.0, 294.0, 296.0, 316.0, 314.0, 319.0, 314.0, 286.0, 296.0, 311.0, 325.0, 319.0, 320.0, 314.0, 319.0, 320.0, 319.0, 322.0, 317.0, 316.0, 314.0, 308.0, 319.0, 322.0, 314.0, 314.0, 322.0, 309.0, 327.0, 296.0, 291.0, 290.0, 291.0, 316.0, 320.0, 309.0, 312.0, 317.0, 322.0, 316.0, 314.0, 316.0, 317.0, 313.0, 308.0, 319.0, 317.0, 280.0, 293.0, 317.0, 319.0, 314.0, 319.0, 262.0, 260.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8323023893129786, "mean_processing_ms": 0.23485230859207035, "mean_inference_ms": 1.4300119711867751}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9480000, "num_steps_sampled": 5056000, "sample_time_ms": 21142.709, "load_time_ms": 37.292, "grad_time_ms": 10082.816, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00028009479865431786, "policy_loss": -0.007545720785856247, "vf_loss": 83.69845581054688, "vf_explained_var": 0.7653185725212097, "kl": 0.001969197066500783, "entropy": 1.0880564451217651, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5056000, "episodes_total": 12640, "training_iteration": 395, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-09-29", "timestamp": 1660259369, "time_this_iter_s": 35.19951057434082, "time_total_s": 17779.68733239174, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17779.68733239174, "timesteps_since_restore": 5056000, "iterations_since_restore": 395, "perf": {"cpu_util_percent": 30.6265306122449, "ram_util_percent": 59.11836734693878}}
-{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 618.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 309.44}, "custom_metrics": {"sparse_reward_mean": 214.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 190.08, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.01, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.44, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.79, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.88, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.7, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.92, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.41, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.91, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.27, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.7, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.92, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.7, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.92, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 633.0, 633.0, 636.0, 582.0, 633.0, 630.0, 633.0, 630.0, 636.0, 636.0, 633.0, 576.0, 633.0, 582.0, 582.0, 627.0, 639.0, 639.0, 633.0, 636.0, 630.0, 560.0, 636.0, 636.0, 636.0, 582.0, 633.0, 636.0, 582.0, 578.0, 639.0, 630.0, 636.0, 639.0, 579.0, 624.0, 576.0, 576.0, 590.0, 590.0, 630.0, 633.0, 582.0, 636.0, 639.0, 633.0, 639.0, 639.0, 630.0, 627.0, 636.0, 636.0, 636.0, 587.0, 581.0, 636.0, 621.0, 639.0, 630.0, 633.0, 621.0, 636.0, 573.0, 636.0, 633.0, 522.0, 636.0, 633.0, 633.0, 630.0, 576.0, 639.0, 639.0, 627.0, 636.0, 630.0, 636.0, 633.0, 587.0, 633.0, 579.0, 587.0, 630.0, 579.0, 633.0, 587.0, 636.0, 639.0, 627.0, 639.0, 630.0, 633.0, 639.0, 584.0, 590.0, 630.0, 636.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 325.0, 316.0, 314.0, 317.0, 316.0, 314.0, 319.0, 319.0, 317.0, 289.0, 293.0, 321.0, 312.0, 311.0, 319.0, 313.0, 320.0, 313.0, 317.0, 321.0, 315.0, 319.0, 317.0, 319.0, 314.0, 285.0, 291.0, 311.0, 322.0, 285.0, 297.0, 291.0, 291.0, 313.0, 314.0, 327.0, 312.0, 324.0, 315.0, 319.0, 314.0, 314.0, 322.0, 321.0, 309.0, 279.0, 281.0, 319.0, 317.0, 316.0, 320.0, 314.0, 322.0, 291.0, 291.0, 311.0, 322.0, 319.0, 317.0, 291.0, 291.0, 290.0, 288.0, 322.0, 317.0, 316.0, 314.0, 322.0, 314.0, 317.0, 322.0, 288.0, 291.0, 312.0, 312.0, 290.0, 286.0, 287.0, 289.0, 297.0, 293.0, 294.0, 296.0, 316.0, 314.0, 319.0, 314.0, 286.0, 296.0, 311.0, 325.0, 319.0, 320.0, 314.0, 319.0, 320.0, 319.0, 322.0, 317.0, 316.0, 314.0, 308.0, 319.0, 322.0, 314.0, 314.0, 322.0, 309.0, 327.0, 296.0, 291.0, 290.0, 291.0, 316.0, 320.0, 309.0, 312.0, 317.0, 322.0, 316.0, 314.0, 316.0, 317.0, 313.0, 308.0, 319.0, 317.0, 280.0, 293.0, 317.0, 319.0, 314.0, 319.0, 262.0, 260.0, 324.0, 312.0, 316.0, 317.0, 317.0, 316.0, 316.0, 314.0, 285.0, 291.0, 322.0, 317.0, 319.0, 320.0, 316.0, 311.0, 317.0, 319.0, 308.0, 322.0, 317.0, 319.0, 314.0, 319.0, 296.0, 291.0, 317.0, 316.0, 281.0, 298.0, 293.0, 294.0, 311.0, 319.0, 290.0, 289.0, 316.0, 317.0, 299.0, 288.0, 319.0, 317.0, 319.0, 320.0, 308.0, 319.0, 319.0, 320.0, 311.0, 319.0, 314.0, 319.0, 322.0, 317.0, 293.0, 291.0, 294.0, 296.0, 316.0, 314.0, 319.0, 317.0, 299.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8313395238935175, "mean_processing_ms": 0.2346607427713282, "mean_inference_ms": 1.4290797727914242}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9504000, "num_steps_sampled": 5068800, "sample_time_ms": 20854.832, "load_time_ms": 37.297, "grad_time_ms": 9880.508, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0009566675289534032, "policy_loss": -0.0062157814390957355, "vf_loss": 77.21820068359375, "vf_explained_var": 0.7768221497535706, "kl": 0.0020733082201331854, "entropy": 1.0987348556518555, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5068800, "episodes_total": 12672, "training_iteration": 396, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-09-57", "timestamp": 1660259397, "time_this_iter_s": 28.42993927001953, "time_total_s": 17808.11727166176, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17808.11727166176, "timesteps_since_restore": 5068800, "iterations_since_restore": 396, "perf": {"cpu_util_percent": 34.480487804878045, "ram_util_percent": 59.09024390243902}}
-{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 616.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.265}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.33, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.89, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.64, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.64, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.93, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.48, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.03, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.93, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.48, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.03, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.48, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.03, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 639.0, 627.0, 582.0, 636.0, 630.0, 630.0, 636.0, 582.0, 587.0, 579.0, 639.0, 582.0, 578.0, 618.0, 570.0, 636.0, 567.0, 630.0, 582.0, 630.0, 582.0, 636.0, 639.0, 633.0, 582.0, 636.0, 636.0, 633.0, 636.0, 582.0, 573.0, 636.0, 633.0, 522.0, 636.0, 633.0, 633.0, 630.0, 576.0, 639.0, 639.0, 627.0, 636.0, 630.0, 636.0, 633.0, 587.0, 633.0, 579.0, 587.0, 630.0, 579.0, 633.0, 587.0, 636.0, 639.0, 627.0, 639.0, 630.0, 633.0, 639.0, 584.0, 590.0, 630.0, 636.0, 582.0, 636.0, 630.0, 633.0, 633.0, 636.0, 582.0, 633.0, 630.0, 633.0, 630.0, 636.0, 636.0, 633.0, 576.0, 633.0, 582.0, 582.0, 627.0, 639.0, 639.0, 633.0, 636.0, 630.0, 560.0, 636.0, 636.0, 636.0, 582.0, 633.0, 636.0, 582.0, 578.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 316.0, 317.0, 322.0, 317.0, 313.0, 314.0, 291.0, 291.0, 321.0, 315.0, 321.0, 309.0, 316.0, 314.0, 316.0, 320.0, 285.0, 297.0, 294.0, 293.0, 288.0, 291.0, 319.0, 320.0, 289.0, 293.0, 293.0, 285.0, 313.0, 305.0, 276.0, 294.0, 319.0, 317.0, 279.0, 288.0, 316.0, 314.0, 291.0, 291.0, 313.0, 317.0, 295.0, 287.0, 319.0, 317.0, 324.0, 315.0, 313.0, 320.0, 293.0, 289.0, 316.0, 320.0, 320.0, 316.0, 308.0, 325.0, 314.0, 322.0, 285.0, 297.0, 280.0, 293.0, 317.0, 319.0, 314.0, 319.0, 262.0, 260.0, 324.0, 312.0, 316.0, 317.0, 317.0, 316.0, 316.0, 314.0, 285.0, 291.0, 322.0, 317.0, 319.0, 320.0, 316.0, 311.0, 317.0, 319.0, 308.0, 322.0, 317.0, 319.0, 314.0, 319.0, 296.0, 291.0, 317.0, 316.0, 281.0, 298.0, 293.0, 294.0, 311.0, 319.0, 290.0, 289.0, 316.0, 317.0, 299.0, 288.0, 319.0, 317.0, 319.0, 320.0, 308.0, 319.0, 319.0, 320.0, 311.0, 319.0, 314.0, 319.0, 322.0, 317.0, 293.0, 291.0, 294.0, 296.0, 316.0, 314.0, 319.0, 317.0, 299.0, 283.0, 311.0, 325.0, 316.0, 314.0, 317.0, 316.0, 314.0, 319.0, 319.0, 317.0, 289.0, 293.0, 321.0, 312.0, 311.0, 319.0, 313.0, 320.0, 313.0, 317.0, 321.0, 315.0, 319.0, 317.0, 319.0, 314.0, 285.0, 291.0, 311.0, 322.0, 285.0, 297.0, 291.0, 291.0, 313.0, 314.0, 327.0, 312.0, 324.0, 315.0, 319.0, 314.0, 314.0, 322.0, 321.0, 309.0, 279.0, 281.0, 319.0, 317.0, 316.0, 320.0, 314.0, 322.0, 291.0, 291.0, 311.0, 322.0, 319.0, 317.0, 291.0, 291.0, 290.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8303852563726812, "mean_processing_ms": 0.2344708309042675, "mean_inference_ms": 1.4281787126468246}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9528000, "num_steps_sampled": 5081600, "sample_time_ms": 20985.415, "load_time_ms": 37.239, "grad_time_ms": 9808.735, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003967406693845987, "policy_loss": -0.0038024026434868574, "vf_loss": 83.1785888671875, "vf_explained_var": 0.769153892993927, "kl": 0.0019865171052515507, "entropy": 1.0961049795150757, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5081600, "episodes_total": 12704, "training_iteration": 397, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-10-28", "timestamp": 1660259428, "time_this_iter_s": 30.70757508277893, "time_total_s": 17838.824846744537, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17838.824846744537, "timesteps_since_restore": 5081600, "iterations_since_restore": 397, "perf": {"cpu_util_percent": 33.08837209302326, "ram_util_percent": 59.13023255813955}}
-{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 617.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 308.56}, "custom_metrics": {"sparse_reward_mean": 213.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.52, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.65, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.54, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.89, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.42, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.17, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.33, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.08, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.42, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.17, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.42, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.17, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 576.0, 636.0, 636.0, 630.0, 582.0, 582.0, 639.0, 593.0, 639.0, 630.0, 630.0, 636.0, 633.0, 636.0, 630.0, 636.0, 636.0, 522.0, 633.0, 636.0, 636.0, 633.0, 636.0, 582.0, 576.0, 636.0, 627.0, 633.0, 636.0, 587.0, 633.0, 590.0, 630.0, 636.0, 582.0, 636.0, 630.0, 633.0, 633.0, 636.0, 582.0, 633.0, 630.0, 633.0, 630.0, 636.0, 636.0, 633.0, 576.0, 633.0, 582.0, 582.0, 627.0, 639.0, 639.0, 633.0, 636.0, 630.0, 560.0, 636.0, 636.0, 636.0, 582.0, 633.0, 636.0, 582.0, 578.0, 636.0, 633.0, 639.0, 627.0, 582.0, 636.0, 630.0, 630.0, 636.0, 582.0, 587.0, 579.0, 639.0, 582.0, 578.0, 618.0, 570.0, 636.0, 567.0, 630.0, 582.0, 630.0, 582.0, 636.0, 639.0, 633.0, 582.0, 636.0, 636.0, 633.0, 636.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 282.0, 294.0, 308.0, 328.0, 314.0, 322.0, 318.0, 312.0, 287.0, 295.0, 291.0, 291.0, 317.0, 322.0, 299.0, 294.0, 317.0, 322.0, 311.0, 319.0, 317.0, 313.0, 319.0, 317.0, 316.0, 317.0, 321.0, 315.0, 316.0, 314.0, 318.0, 318.0, 322.0, 314.0, 257.0, 265.0, 319.0, 314.0, 317.0, 319.0, 322.0, 314.0, 314.0, 319.0, 314.0, 322.0, 289.0, 293.0, 282.0, 294.0, 316.0, 320.0, 311.0, 316.0, 314.0, 319.0, 319.0, 317.0, 299.0, 288.0, 316.0, 317.0, 294.0, 296.0, 316.0, 314.0, 319.0, 317.0, 299.0, 283.0, 311.0, 325.0, 316.0, 314.0, 317.0, 316.0, 314.0, 319.0, 319.0, 317.0, 289.0, 293.0, 321.0, 312.0, 311.0, 319.0, 313.0, 320.0, 313.0, 317.0, 321.0, 315.0, 319.0, 317.0, 319.0, 314.0, 285.0, 291.0, 311.0, 322.0, 285.0, 297.0, 291.0, 291.0, 313.0, 314.0, 327.0, 312.0, 324.0, 315.0, 319.0, 314.0, 314.0, 322.0, 321.0, 309.0, 279.0, 281.0, 319.0, 317.0, 316.0, 320.0, 314.0, 322.0, 291.0, 291.0, 311.0, 322.0, 319.0, 317.0, 291.0, 291.0, 290.0, 288.0, 319.0, 317.0, 316.0, 317.0, 322.0, 317.0, 313.0, 314.0, 291.0, 291.0, 321.0, 315.0, 321.0, 309.0, 316.0, 314.0, 316.0, 320.0, 285.0, 297.0, 294.0, 293.0, 288.0, 291.0, 319.0, 320.0, 289.0, 293.0, 293.0, 285.0, 313.0, 305.0, 276.0, 294.0, 319.0, 317.0, 279.0, 288.0, 316.0, 314.0, 291.0, 291.0, 313.0, 317.0, 295.0, 287.0, 319.0, 317.0, 324.0, 315.0, 313.0, 320.0, 293.0, 289.0, 316.0, 320.0, 320.0, 316.0, 308.0, 325.0, 314.0, 322.0, 285.0, 297.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8294402136883693, "mean_processing_ms": 0.23428178262037597, "mean_inference_ms": 1.4273260562333872}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9552000, "num_steps_sampled": 5094400, "sample_time_ms": 21312.43, "load_time_ms": 37.532, "grad_time_ms": 9807.592, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002816990716382861, "policy_loss": -0.004787659738212824, "vf_loss": 81.49095916748047, "vf_explained_var": 0.7696583867073059, "kl": 0.0025824178010225296, "entropy": 1.0888774394989014, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5094400, "episodes_total": 12736, "training_iteration": 398, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-11-02", "timestamp": 1660259462, "time_this_iter_s": 33.86050295829773, "time_total_s": 17872.685349702835, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17872.685349702835, "timesteps_since_restore": 5094400, "iterations_since_restore": 398, "perf": {"cpu_util_percent": 30.977083333333336, "ram_util_percent": 59.083333333333336}}
-{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 609.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 304.595}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 187.59, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.74, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.71, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.27, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.63, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.27, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.27, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [197.0, 582.0, 636.0, 630.0, 636.0, 579.0, 636.0, 636.0, 639.0, 630.0, 582.0, 582.0, 630.0, 633.0, 633.0, 399.0, 636.0, 639.0, 636.0, 587.0, 576.0, 630.0, 582.0, 630.0, 587.0, 633.0, 636.0, 544.0, 624.0, 636.0, 581.0, 636.0, 633.0, 636.0, 582.0, 578.0, 636.0, 633.0, 639.0, 627.0, 582.0, 636.0, 630.0, 630.0, 636.0, 582.0, 587.0, 579.0, 639.0, 582.0, 578.0, 618.0, 570.0, 636.0, 567.0, 630.0, 582.0, 630.0, 582.0, 636.0, 639.0, 633.0, 582.0, 636.0, 636.0, 633.0, 636.0, 582.0, 627.0, 576.0, 636.0, 636.0, 630.0, 582.0, 582.0, 639.0, 593.0, 639.0, 630.0, 630.0, 636.0, 633.0, 636.0, 630.0, 636.0, 636.0, 522.0, 633.0, 636.0, 636.0, 633.0, 636.0, 582.0, 576.0, 636.0, 627.0, 633.0, 636.0, 587.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [100.0, 97.0, 296.0, 286.0, 319.0, 317.0, 311.0, 319.0, 316.0, 320.0, 291.0, 288.0, 317.0, 319.0, 319.0, 317.0, 317.0, 322.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0, 313.0, 317.0, 311.0, 322.0, 308.0, 325.0, 193.0, 206.0, 321.0, 315.0, 322.0, 317.0, 322.0, 314.0, 288.0, 299.0, 284.0, 292.0, 311.0, 319.0, 291.0, 291.0, 319.0, 311.0, 288.0, 299.0, 324.0, 309.0, 319.0, 317.0, 279.0, 265.0, 311.0, 313.0, 313.0, 323.0, 294.0, 287.0, 314.0, 322.0, 311.0, 322.0, 319.0, 317.0, 291.0, 291.0, 290.0, 288.0, 319.0, 317.0, 316.0, 317.0, 322.0, 317.0, 313.0, 314.0, 291.0, 291.0, 321.0, 315.0, 321.0, 309.0, 316.0, 314.0, 316.0, 320.0, 285.0, 297.0, 294.0, 293.0, 288.0, 291.0, 319.0, 320.0, 289.0, 293.0, 293.0, 285.0, 313.0, 305.0, 276.0, 294.0, 319.0, 317.0, 279.0, 288.0, 316.0, 314.0, 291.0, 291.0, 313.0, 317.0, 295.0, 287.0, 319.0, 317.0, 324.0, 315.0, 313.0, 320.0, 293.0, 289.0, 316.0, 320.0, 320.0, 316.0, 308.0, 325.0, 314.0, 322.0, 285.0, 297.0, 313.0, 314.0, 282.0, 294.0, 308.0, 328.0, 314.0, 322.0, 318.0, 312.0, 287.0, 295.0, 291.0, 291.0, 317.0, 322.0, 299.0, 294.0, 317.0, 322.0, 311.0, 319.0, 317.0, 313.0, 319.0, 317.0, 316.0, 317.0, 321.0, 315.0, 316.0, 314.0, 318.0, 318.0, 322.0, 314.0, 257.0, 265.0, 319.0, 314.0, 317.0, 319.0, 322.0, 314.0, 314.0, 319.0, 314.0, 322.0, 289.0, 293.0, 282.0, 294.0, 316.0, 320.0, 311.0, 316.0, 314.0, 319.0, 319.0, 317.0, 299.0, 288.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.82850652599667, "mean_processing_ms": 0.23409592110050972, "mean_inference_ms": 1.426659038222931}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9576000, "num_steps_sampled": 5107200, "sample_time_ms": 21603.638, "load_time_ms": 37.362, "grad_time_ms": 9958.009, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00230390764772892, "policy_loss": -0.00582013139501214, "vf_loss": 86.7406005859375, "vf_explained_var": 0.7753866314888, "kl": 0.0019396115094423294, "entropy": 1.1000421047210693, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5107200, "episodes_total": 12768, "training_iteration": 399, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-11-36", "timestamp": 1660259496, "time_this_iter_s": 34.40714716911316, "time_total_s": 17907.09249687195, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17907.09249687195, "timesteps_since_restore": 5107200, "iterations_since_restore": 399, "perf": {"cpu_util_percent": 33.638775510204084, "ram_util_percent": 59.13265306122449}}
-{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 612.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 306.07}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 188.54, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.01, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.18, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.72, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.55, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.72, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.92, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.21, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.69, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.17, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.55, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.72, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.55, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.72, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 587.0, 636.0, 636.0, 633.0, 579.0, 633.0, 582.0, 627.0, 630.0, 590.0, 639.0, 633.0, 579.0, 630.0, 630.0, 636.0, 639.0, 633.0, 630.0, 630.0, 630.0, 630.0, 639.0, 636.0, 633.0, 636.0, 582.0, 636.0, 582.0, 579.0, 636.0, 633.0, 636.0, 582.0, 627.0, 576.0, 636.0, 636.0, 630.0, 582.0, 582.0, 639.0, 593.0, 639.0, 630.0, 630.0, 636.0, 633.0, 636.0, 630.0, 636.0, 636.0, 522.0, 633.0, 636.0, 636.0, 633.0, 636.0, 582.0, 576.0, 636.0, 627.0, 633.0, 636.0, 587.0, 633.0, 197.0, 582.0, 636.0, 630.0, 636.0, 579.0, 636.0, 636.0, 639.0, 630.0, 582.0, 582.0, 630.0, 633.0, 633.0, 399.0, 636.0, 639.0, 636.0, 587.0, 576.0, 630.0, 582.0, 630.0, 587.0, 633.0, 636.0, 544.0, 624.0, 636.0, 581.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 314.0, 316.0, 294.0, 293.0, 316.0, 320.0, 319.0, 317.0, 317.0, 316.0, 289.0, 290.0, 319.0, 314.0, 293.0, 289.0, 319.0, 308.0, 316.0, 314.0, 293.0, 297.0, 322.0, 317.0, 319.0, 314.0, 294.0, 285.0, 321.0, 309.0, 319.0, 311.0, 319.0, 317.0, 317.0, 322.0, 322.0, 311.0, 319.0, 311.0, 311.0, 319.0, 308.0, 322.0, 313.0, 317.0, 314.0, 325.0, 317.0, 319.0, 316.0, 317.0, 319.0, 317.0, 296.0, 286.0, 314.0, 322.0, 291.0, 291.0, 290.0, 289.0, 320.0, 316.0, 308.0, 325.0, 314.0, 322.0, 285.0, 297.0, 313.0, 314.0, 282.0, 294.0, 308.0, 328.0, 314.0, 322.0, 318.0, 312.0, 287.0, 295.0, 291.0, 291.0, 317.0, 322.0, 299.0, 294.0, 317.0, 322.0, 311.0, 319.0, 317.0, 313.0, 319.0, 317.0, 316.0, 317.0, 321.0, 315.0, 316.0, 314.0, 318.0, 318.0, 322.0, 314.0, 257.0, 265.0, 319.0, 314.0, 317.0, 319.0, 322.0, 314.0, 314.0, 319.0, 314.0, 322.0, 289.0, 293.0, 282.0, 294.0, 316.0, 320.0, 311.0, 316.0, 314.0, 319.0, 319.0, 317.0, 299.0, 288.0, 316.0, 317.0, 100.0, 97.0, 296.0, 286.0, 319.0, 317.0, 311.0, 319.0, 316.0, 320.0, 291.0, 288.0, 317.0, 319.0, 319.0, 317.0, 317.0, 322.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0, 313.0, 317.0, 311.0, 322.0, 308.0, 325.0, 193.0, 206.0, 321.0, 315.0, 322.0, 317.0, 322.0, 314.0, 288.0, 299.0, 284.0, 292.0, 311.0, 319.0, 291.0, 291.0, 319.0, 311.0, 288.0, 299.0, 324.0, 309.0, 319.0, 317.0, 279.0, 265.0, 311.0, 313.0, 313.0, 323.0, 294.0, 287.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8275880756644836, "mean_processing_ms": 0.23391670126994718, "mean_inference_ms": 1.4262180371248092}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9600000, "num_steps_sampled": 5120000, "sample_time_ms": 22135.485, "load_time_ms": 37.429, "grad_time_ms": 9991.096, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019203064730390906, "policy_loss": -0.005054800305515528, "vf_loss": 75.28291320800781, "vf_explained_var": 0.7728467583656311, "kl": 0.00209710281342268, "entropy": 1.106364130973816, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5120000, "episodes_total": 12800, "training_iteration": 400, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-12-12", "timestamp": 1660259532, "time_this_iter_s": 35.30730485916138, "time_total_s": 17942.39980173111, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17942.39980173111, "timesteps_since_restore": 5120000, "iterations_since_restore": 400, "perf": {"cpu_util_percent": 33.525999999999996, "ram_util_percent": 59.168}}
-{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 613.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 306.635}, "custom_metrics": {"sparse_reward_mean": 212.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 188.47, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.0, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.22, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.71, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.7, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.48, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.76, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.44, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.92, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.48, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.76, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.48, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.76, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 636.0, 633.0, 627.0, 582.0, 633.0, 639.0, 624.0, 633.0, 630.0, 633.0, 636.0, 639.0, 582.0, 630.0, 627.0, 633.0, 630.0, 633.0, 630.0, 567.0, 630.0, 539.0, 630.0, 630.0, 633.0, 630.0, 633.0, 630.0, 639.0, 587.0, 633.0, 636.0, 587.0, 633.0, 197.0, 582.0, 636.0, 630.0, 636.0, 579.0, 636.0, 636.0, 639.0, 630.0, 582.0, 582.0, 630.0, 633.0, 633.0, 399.0, 636.0, 639.0, 636.0, 587.0, 576.0, 630.0, 582.0, 630.0, 587.0, 633.0, 636.0, 544.0, 624.0, 636.0, 581.0, 636.0, 636.0, 630.0, 587.0, 636.0, 636.0, 633.0, 579.0, 633.0, 582.0, 627.0, 630.0, 590.0, 639.0, 633.0, 579.0, 630.0, 630.0, 636.0, 639.0, 633.0, 630.0, 630.0, 630.0, 630.0, 639.0, 636.0, 633.0, 636.0, 582.0, 636.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 319.0, 317.0, 319.0, 317.0, 316.0, 317.0, 318.0, 309.0, 294.0, 288.0, 316.0, 317.0, 319.0, 320.0, 305.0, 319.0, 319.0, 314.0, 307.0, 323.0, 310.0, 323.0, 311.0, 325.0, 316.0, 323.0, 285.0, 297.0, 323.0, 307.0, 316.0, 311.0, 319.0, 314.0, 318.0, 312.0, 314.0, 319.0, 322.0, 308.0, 283.0, 284.0, 312.0, 318.0, 277.0, 262.0, 319.0, 311.0, 319.0, 311.0, 319.0, 314.0, 318.0, 312.0, 311.0, 322.0, 311.0, 319.0, 319.0, 320.0, 291.0, 296.0, 314.0, 319.0, 319.0, 317.0, 299.0, 288.0, 316.0, 317.0, 100.0, 97.0, 296.0, 286.0, 319.0, 317.0, 311.0, 319.0, 316.0, 320.0, 291.0, 288.0, 317.0, 319.0, 319.0, 317.0, 317.0, 322.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0, 313.0, 317.0, 311.0, 322.0, 308.0, 325.0, 193.0, 206.0, 321.0, 315.0, 322.0, 317.0, 322.0, 314.0, 288.0, 299.0, 284.0, 292.0, 311.0, 319.0, 291.0, 291.0, 319.0, 311.0, 288.0, 299.0, 324.0, 309.0, 319.0, 317.0, 279.0, 265.0, 311.0, 313.0, 313.0, 323.0, 294.0, 287.0, 314.0, 322.0, 314.0, 322.0, 314.0, 316.0, 294.0, 293.0, 316.0, 320.0, 319.0, 317.0, 317.0, 316.0, 289.0, 290.0, 319.0, 314.0, 293.0, 289.0, 319.0, 308.0, 316.0, 314.0, 293.0, 297.0, 322.0, 317.0, 319.0, 314.0, 294.0, 285.0, 321.0, 309.0, 319.0, 311.0, 319.0, 317.0, 317.0, 322.0, 322.0, 311.0, 319.0, 311.0, 311.0, 319.0, 308.0, 322.0, 313.0, 317.0, 314.0, 325.0, 317.0, 319.0, 316.0, 317.0, 319.0, 317.0, 296.0, 286.0, 314.0, 322.0, 291.0, 291.0, 290.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8266799330355084, "mean_processing_ms": 0.23374167551235864, "mean_inference_ms": 1.4257962598910456}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9624000, "num_steps_sampled": 5132800, "sample_time_ms": 22522.212, "load_time_ms": 37.519, "grad_time_ms": 10359.995, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.000927128829061985, "policy_loss": -0.006055487785488367, "vf_loss": 75.37408447265625, "vf_explained_var": 0.7751708030700684, "kl": 0.0019053876167163253, "entropy": 1.1095930337905884, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5132800, "episodes_total": 12832, "training_iteration": 401, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-12-49", "timestamp": 1660259569, "time_this_iter_s": 36.76053810119629, "time_total_s": 17979.160339832306, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17979.160339832306, "timesteps_since_restore": 5132800, "iterations_since_restore": 401, "perf": {"cpu_util_percent": 34.715094339622645, "ram_util_percent": 59.21886792452831}}
-{"episode_reward_max": 639.0, "episode_reward_min": 527.0, "episode_reward_mean": 619.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 309.825}, "custom_metrics": {"sparse_reward_mean": 214.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 190.05, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.07, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.47, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.77, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.95, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.6, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.53, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 4, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.6, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.6, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [527.0, 636.0, 633.0, 636.0, 633.0, 582.0, 587.0, 630.0, 630.0, 627.0, 582.0, 587.0, 627.0, 624.0, 636.0, 633.0, 587.0, 630.0, 639.0, 636.0, 578.0, 633.0, 639.0, 630.0, 639.0, 633.0, 636.0, 582.0, 636.0, 584.0, 630.0, 581.0, 624.0, 636.0, 581.0, 636.0, 636.0, 630.0, 587.0, 636.0, 636.0, 633.0, 579.0, 633.0, 582.0, 627.0, 630.0, 590.0, 639.0, 633.0, 579.0, 630.0, 630.0, 636.0, 639.0, 633.0, 630.0, 630.0, 630.0, 630.0, 639.0, 636.0, 633.0, 636.0, 582.0, 636.0, 582.0, 579.0, 630.0, 636.0, 636.0, 633.0, 627.0, 582.0, 633.0, 639.0, 624.0, 633.0, 630.0, 633.0, 636.0, 639.0, 582.0, 630.0, 627.0, 633.0, 630.0, 633.0, 630.0, 567.0, 630.0, 539.0, 630.0, 630.0, 633.0, 630.0, 633.0, 630.0, 639.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [273.0, 254.0, 316.0, 320.0, 316.0, 317.0, 316.0, 320.0, 319.0, 314.0, 291.0, 291.0, 301.0, 286.0, 314.0, 316.0, 321.0, 309.0, 318.0, 309.0, 296.0, 286.0, 291.0, 296.0, 318.0, 309.0, 308.0, 316.0, 316.0, 320.0, 316.0, 317.0, 301.0, 286.0, 314.0, 316.0, 319.0, 320.0, 320.0, 316.0, 295.0, 283.0, 314.0, 319.0, 319.0, 320.0, 311.0, 319.0, 322.0, 317.0, 314.0, 319.0, 319.0, 317.0, 288.0, 294.0, 311.0, 325.0, 290.0, 294.0, 313.0, 317.0, 296.0, 285.0, 311.0, 313.0, 313.0, 323.0, 294.0, 287.0, 314.0, 322.0, 314.0, 322.0, 314.0, 316.0, 294.0, 293.0, 316.0, 320.0, 319.0, 317.0, 317.0, 316.0, 289.0, 290.0, 319.0, 314.0, 293.0, 289.0, 319.0, 308.0, 316.0, 314.0, 293.0, 297.0, 322.0, 317.0, 319.0, 314.0, 294.0, 285.0, 321.0, 309.0, 319.0, 311.0, 319.0, 317.0, 317.0, 322.0, 322.0, 311.0, 319.0, 311.0, 311.0, 319.0, 308.0, 322.0, 313.0, 317.0, 314.0, 325.0, 317.0, 319.0, 316.0, 317.0, 319.0, 317.0, 296.0, 286.0, 314.0, 322.0, 291.0, 291.0, 290.0, 289.0, 311.0, 319.0, 319.0, 317.0, 319.0, 317.0, 316.0, 317.0, 318.0, 309.0, 294.0, 288.0, 316.0, 317.0, 319.0, 320.0, 305.0, 319.0, 319.0, 314.0, 307.0, 323.0, 310.0, 323.0, 311.0, 325.0, 316.0, 323.0, 285.0, 297.0, 323.0, 307.0, 316.0, 311.0, 319.0, 314.0, 318.0, 312.0, 314.0, 319.0, 322.0, 308.0, 283.0, 284.0, 312.0, 318.0, 277.0, 262.0, 319.0, 311.0, 319.0, 311.0, 319.0, 314.0, 318.0, 312.0, 311.0, 322.0, 311.0, 319.0, 319.0, 320.0, 291.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8257902245780139, "mean_processing_ms": 0.23357282477056074, "mean_inference_ms": 1.425400068641748}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9648000, "num_steps_sampled": 5145600, "sample_time_ms": 23036.82, "load_time_ms": 37.721, "grad_time_ms": 10540.081, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013805682538077235, "policy_loss": -0.006049450021237135, "vf_loss": 79.8260269165039, "vf_explained_var": 0.7674198746681213, "kl": 0.002044239779934287, "entropy": 1.1051733493804932, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5145600, "episodes_total": 12864, "training_iteration": 402, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-13-24", "timestamp": 1660259604, "time_this_iter_s": 35.064194202423096, "time_total_s": 18014.22453403473, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18014.22453403473, "timesteps_since_restore": 5145600, "iterations_since_restore": 402, "perf": {"cpu_util_percent": 38.665306122448975, "ram_util_percent": 59.40408163265307}}
-{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 614.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.27}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.14, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.78, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.59, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.08, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.28, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.02, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.51, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.28, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.02, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.28, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.02, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [527.0, 582.0, 630.0, 639.0, 630.0, 582.0, 522.0, 621.0, 582.0, 564.0, 636.0, 630.0, 630.0, 630.0, 579.0, 633.0, 627.0, 636.0, 630.0, 633.0, 567.0, 582.0, 630.0, 639.0, 636.0, 636.0, 633.0, 567.0, 582.0, 627.0, 630.0, 576.0, 582.0, 636.0, 582.0, 579.0, 630.0, 636.0, 636.0, 633.0, 627.0, 582.0, 633.0, 639.0, 624.0, 633.0, 630.0, 633.0, 636.0, 639.0, 582.0, 630.0, 627.0, 633.0, 630.0, 633.0, 630.0, 567.0, 630.0, 539.0, 630.0, 630.0, 633.0, 630.0, 633.0, 630.0, 639.0, 587.0, 527.0, 636.0, 633.0, 636.0, 633.0, 582.0, 587.0, 630.0, 630.0, 627.0, 582.0, 587.0, 627.0, 624.0, 636.0, 633.0, 587.0, 630.0, 639.0, 636.0, 578.0, 633.0, 639.0, 630.0, 639.0, 633.0, 636.0, 582.0, 636.0, 584.0, 630.0, 581.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [258.0, 269.0, 291.0, 291.0, 322.0, 308.0, 319.0, 320.0, 313.0, 317.0, 288.0, 294.0, 268.0, 254.0, 310.0, 311.0, 288.0, 294.0, 268.0, 296.0, 313.0, 323.0, 318.0, 312.0, 315.0, 315.0, 313.0, 317.0, 285.0, 294.0, 318.0, 315.0, 321.0, 306.0, 317.0, 319.0, 313.0, 317.0, 314.0, 319.0, 282.0, 285.0, 294.0, 288.0, 314.0, 316.0, 322.0, 317.0, 312.0, 324.0, 319.0, 317.0, 313.0, 320.0, 290.0, 277.0, 299.0, 283.0, 308.0, 319.0, 319.0, 311.0, 291.0, 285.0, 296.0, 286.0, 314.0, 322.0, 291.0, 291.0, 290.0, 289.0, 311.0, 319.0, 319.0, 317.0, 319.0, 317.0, 316.0, 317.0, 318.0, 309.0, 294.0, 288.0, 316.0, 317.0, 319.0, 320.0, 305.0, 319.0, 319.0, 314.0, 307.0, 323.0, 310.0, 323.0, 311.0, 325.0, 316.0, 323.0, 285.0, 297.0, 323.0, 307.0, 316.0, 311.0, 319.0, 314.0, 318.0, 312.0, 314.0, 319.0, 322.0, 308.0, 283.0, 284.0, 312.0, 318.0, 277.0, 262.0, 319.0, 311.0, 319.0, 311.0, 319.0, 314.0, 318.0, 312.0, 311.0, 322.0, 311.0, 319.0, 319.0, 320.0, 291.0, 296.0, 273.0, 254.0, 316.0, 320.0, 316.0, 317.0, 316.0, 320.0, 319.0, 314.0, 291.0, 291.0, 301.0, 286.0, 314.0, 316.0, 321.0, 309.0, 318.0, 309.0, 296.0, 286.0, 291.0, 296.0, 318.0, 309.0, 308.0, 316.0, 316.0, 320.0, 316.0, 317.0, 301.0, 286.0, 314.0, 316.0, 319.0, 320.0, 320.0, 316.0, 295.0, 283.0, 314.0, 319.0, 319.0, 320.0, 311.0, 319.0, 322.0, 317.0, 314.0, 319.0, 319.0, 317.0, 288.0, 294.0, 311.0, 325.0, 290.0, 294.0, 313.0, 317.0, 296.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8249056294785048, "mean_processing_ms": 0.2334031065407326, "mean_inference_ms": 1.4249795896358415}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9672000, "num_steps_sampled": 5158400, "sample_time_ms": 23134.408, "load_time_ms": 37.402, "grad_time_ms": 10502.988, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0036110735964030027, "policy_loss": -0.003584003308787942, "vf_loss": 77.51012420654297, "vf_explained_var": 0.770778238773346, "kl": 0.00190709566231817, "entropy": 1.1118710041046143, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5158400, "episodes_total": 12896, "training_iteration": 403, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-13-59", "timestamp": 1660259639, "time_this_iter_s": 35.02220106124878, "time_total_s": 18049.246735095978, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18049.246735095978, "timesteps_since_restore": 5158400, "iterations_since_restore": 403, "perf": {"cpu_util_percent": 34.604, "ram_util_percent": 59.326}}
-{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 614.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.125}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.25, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.51, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.38, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.91, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.95, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.61, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.75, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.95, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.95, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 587.0, 636.0, 633.0, 582.0, 578.0, 612.0, 630.0, 633.0, 633.0, 582.0, 633.0, 630.0, 636.0, 630.0, 630.0, 582.0, 636.0, 627.0, 636.0, 630.0, 633.0, 636.0, 536.0, 582.0, 636.0, 630.0, 639.0, 630.0, 639.0, 582.0, 633.0, 630.0, 639.0, 587.0, 527.0, 636.0, 633.0, 636.0, 633.0, 582.0, 587.0, 630.0, 630.0, 627.0, 582.0, 587.0, 627.0, 624.0, 636.0, 633.0, 587.0, 630.0, 639.0, 636.0, 578.0, 633.0, 639.0, 630.0, 639.0, 633.0, 636.0, 582.0, 636.0, 584.0, 630.0, 581.0, 527.0, 582.0, 630.0, 639.0, 630.0, 582.0, 522.0, 621.0, 582.0, 564.0, 636.0, 630.0, 630.0, 630.0, 579.0, 633.0, 627.0, 636.0, 630.0, 633.0, 567.0, 582.0, 630.0, 639.0, 636.0, 636.0, 633.0, 567.0, 582.0, 627.0, 630.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 310.0, 320.0, 293.0, 294.0, 324.0, 312.0, 319.0, 314.0, 288.0, 294.0, 301.0, 277.0, 307.0, 305.0, 321.0, 309.0, 317.0, 316.0, 319.0, 314.0, 283.0, 299.0, 314.0, 319.0, 313.0, 317.0, 319.0, 317.0, 314.0, 316.0, 314.0, 316.0, 283.0, 299.0, 319.0, 317.0, 305.0, 322.0, 317.0, 319.0, 319.0, 311.0, 323.0, 310.0, 324.0, 312.0, 268.0, 268.0, 287.0, 295.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 316.0, 314.0, 317.0, 322.0, 288.0, 294.0, 311.0, 322.0, 311.0, 319.0, 319.0, 320.0, 291.0, 296.0, 273.0, 254.0, 316.0, 320.0, 316.0, 317.0, 316.0, 320.0, 319.0, 314.0, 291.0, 291.0, 301.0, 286.0, 314.0, 316.0, 321.0, 309.0, 318.0, 309.0, 296.0, 286.0, 291.0, 296.0, 318.0, 309.0, 308.0, 316.0, 316.0, 320.0, 316.0, 317.0, 301.0, 286.0, 314.0, 316.0, 319.0, 320.0, 320.0, 316.0, 295.0, 283.0, 314.0, 319.0, 319.0, 320.0, 311.0, 319.0, 322.0, 317.0, 314.0, 319.0, 319.0, 317.0, 288.0, 294.0, 311.0, 325.0, 290.0, 294.0, 313.0, 317.0, 296.0, 285.0, 258.0, 269.0, 291.0, 291.0, 322.0, 308.0, 319.0, 320.0, 313.0, 317.0, 288.0, 294.0, 268.0, 254.0, 310.0, 311.0, 288.0, 294.0, 268.0, 296.0, 313.0, 323.0, 318.0, 312.0, 315.0, 315.0, 313.0, 317.0, 285.0, 294.0, 318.0, 315.0, 321.0, 306.0, 317.0, 319.0, 313.0, 317.0, 314.0, 319.0, 282.0, 285.0, 294.0, 288.0, 314.0, 316.0, 322.0, 317.0, 312.0, 324.0, 319.0, 317.0, 313.0, 320.0, 290.0, 277.0, 299.0, 283.0, 308.0, 319.0, 319.0, 311.0, 291.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8240164523926264, "mean_processing_ms": 0.23323028854357722, "mean_inference_ms": 1.4244623501474682}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9696000, "num_steps_sampled": 5171200, "sample_time_ms": 23257.709, "load_time_ms": 37.644, "grad_time_ms": 10547.246, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0014424080727621913, "policy_loss": -0.00573391281068325, "vf_loss": 77.33064270019531, "vf_explained_var": 0.7716807723045349, "kl": 0.0015154121210798621, "entropy": 1.1135029792785645, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5171200, "episodes_total": 12928, "training_iteration": 404, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-14-33", "timestamp": 1660259673, "time_this_iter_s": 33.92467999458313, "time_total_s": 18083.17141509056, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18083.17141509056, "timesteps_since_restore": 5171200, "iterations_since_restore": 404, "perf": {"cpu_util_percent": 34.637499999999996, "ram_util_percent": 58.86041666666667}}
-{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 612.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 306.235}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.07, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.66, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.56, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.23, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.83, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.21, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.09, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.68, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.77, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.68, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.21, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.09, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.21, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.09, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 633.0, 587.0, 582.0, 627.0, 539.0, 636.0, 633.0, 639.0, 630.0, 582.0, 579.0, 639.0, 636.0, 590.0, 627.0, 633.0, 639.0, 579.0, 587.0, 579.0, 636.0, 636.0, 630.0, 636.0, 633.0, 636.0, 584.0, 584.0, 633.0, 590.0, 630.0, 636.0, 584.0, 630.0, 581.0, 527.0, 582.0, 630.0, 639.0, 630.0, 582.0, 522.0, 621.0, 582.0, 564.0, 636.0, 630.0, 630.0, 630.0, 579.0, 633.0, 627.0, 636.0, 630.0, 633.0, 567.0, 582.0, 630.0, 639.0, 636.0, 636.0, 633.0, 567.0, 582.0, 627.0, 630.0, 576.0, 636.0, 630.0, 587.0, 636.0, 633.0, 582.0, 578.0, 612.0, 630.0, 633.0, 633.0, 582.0, 633.0, 630.0, 636.0, 630.0, 630.0, 582.0, 636.0, 627.0, 636.0, 630.0, 633.0, 636.0, 536.0, 582.0, 636.0, 630.0, 639.0, 630.0, 639.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 319.0, 314.0, 302.0, 285.0, 290.0, 292.0, 313.0, 314.0, 265.0, 274.0, 322.0, 314.0, 316.0, 317.0, 319.0, 320.0, 316.0, 314.0, 288.0, 294.0, 292.0, 287.0, 322.0, 317.0, 324.0, 312.0, 299.0, 291.0, 308.0, 319.0, 315.0, 318.0, 319.0, 320.0, 285.0, 294.0, 293.0, 294.0, 293.0, 286.0, 313.0, 323.0, 316.0, 320.0, 316.0, 314.0, 316.0, 320.0, 316.0, 317.0, 319.0, 317.0, 291.0, 293.0, 299.0, 285.0, 319.0, 314.0, 296.0, 294.0, 319.0, 311.0, 311.0, 325.0, 290.0, 294.0, 313.0, 317.0, 296.0, 285.0, 258.0, 269.0, 291.0, 291.0, 322.0, 308.0, 319.0, 320.0, 313.0, 317.0, 288.0, 294.0, 268.0, 254.0, 310.0, 311.0, 288.0, 294.0, 268.0, 296.0, 313.0, 323.0, 318.0, 312.0, 315.0, 315.0, 313.0, 317.0, 285.0, 294.0, 318.0, 315.0, 321.0, 306.0, 317.0, 319.0, 313.0, 317.0, 314.0, 319.0, 282.0, 285.0, 294.0, 288.0, 314.0, 316.0, 322.0, 317.0, 312.0, 324.0, 319.0, 317.0, 313.0, 320.0, 290.0, 277.0, 299.0, 283.0, 308.0, 319.0, 319.0, 311.0, 291.0, 285.0, 319.0, 317.0, 310.0, 320.0, 293.0, 294.0, 324.0, 312.0, 319.0, 314.0, 288.0, 294.0, 301.0, 277.0, 307.0, 305.0, 321.0, 309.0, 317.0, 316.0, 319.0, 314.0, 283.0, 299.0, 314.0, 319.0, 313.0, 317.0, 319.0, 317.0, 314.0, 316.0, 314.0, 316.0, 283.0, 299.0, 319.0, 317.0, 305.0, 322.0, 317.0, 319.0, 319.0, 311.0, 323.0, 310.0, 324.0, 312.0, 268.0, 268.0, 287.0, 295.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 316.0, 314.0, 317.0, 322.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8231192050910455, "mean_processing_ms": 0.23305365514326026, "mean_inference_ms": 1.4238788752206395}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9720000, "num_steps_sampled": 5184000, "sample_time_ms": 23218.954, "load_time_ms": 37.026, "grad_time_ms": 10357.169, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004148914944380522, "policy_loss": -0.003635302884504199, "vf_loss": 83.42072296142578, "vf_explained_var": 0.7650599479675293, "kl": 0.001778147299773991, "entropy": 1.115702509880066, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5184000, "episodes_total": 12960, "training_iteration": 405, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-15-06", "timestamp": 1660259706, "time_this_iter_s": 32.90920972824097, "time_total_s": 18116.080624818802, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18116.080624818802, "timesteps_since_restore": 5184000, "iterations_since_restore": 405, "perf": {"cpu_util_percent": 30.800000000000004, "ram_util_percent": 58.806521739130446}}
-{"episode_reward_max": 639.0, "episode_reward_min": 536.0, "episode_reward_mean": 615.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 265.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 307.555}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.11, "shaped_reward_min": 172, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.86, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.51, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.4, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.7, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.31, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.03, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.96, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.75, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.78, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.74, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.31, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.03, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.31, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.03, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 633.0, 636.0, 579.0, 630.0, 630.0, 630.0, 630.0, 582.0, 630.0, 576.0, 636.0, 624.0, 630.0, 639.0, 624.0, 636.0, 576.0, 587.0, 639.0, 630.0, 633.0, 630.0, 593.0, 633.0, 579.0, 630.0, 639.0, 587.0, 582.0, 636.0, 582.0, 582.0, 627.0, 630.0, 576.0, 636.0, 630.0, 587.0, 636.0, 633.0, 582.0, 578.0, 612.0, 630.0, 633.0, 633.0, 582.0, 633.0, 630.0, 636.0, 630.0, 630.0, 582.0, 636.0, 627.0, 636.0, 630.0, 633.0, 636.0, 536.0, 582.0, 636.0, 630.0, 639.0, 630.0, 639.0, 582.0, 579.0, 633.0, 587.0, 582.0, 627.0, 539.0, 636.0, 633.0, 639.0, 630.0, 582.0, 579.0, 639.0, 636.0, 590.0, 627.0, 633.0, 639.0, 579.0, 587.0, 579.0, 636.0, 636.0, 630.0, 636.0, 633.0, 636.0, 584.0, 584.0, 633.0, 590.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 308.0, 319.0, 314.0, 314.0, 322.0, 285.0, 294.0, 316.0, 314.0, 313.0, 317.0, 316.0, 314.0, 313.0, 317.0, 298.0, 284.0, 314.0, 316.0, 290.0, 286.0, 316.0, 320.0, 314.0, 310.0, 316.0, 314.0, 322.0, 317.0, 310.0, 314.0, 319.0, 317.0, 291.0, 285.0, 293.0, 294.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 316.0, 314.0, 302.0, 291.0, 316.0, 317.0, 299.0, 280.0, 316.0, 314.0, 319.0, 320.0, 296.0, 291.0, 291.0, 291.0, 308.0, 328.0, 293.0, 289.0, 299.0, 283.0, 308.0, 319.0, 319.0, 311.0, 291.0, 285.0, 319.0, 317.0, 310.0, 320.0, 293.0, 294.0, 324.0, 312.0, 319.0, 314.0, 288.0, 294.0, 301.0, 277.0, 307.0, 305.0, 321.0, 309.0, 317.0, 316.0, 319.0, 314.0, 283.0, 299.0, 314.0, 319.0, 313.0, 317.0, 319.0, 317.0, 314.0, 316.0, 314.0, 316.0, 283.0, 299.0, 319.0, 317.0, 305.0, 322.0, 317.0, 319.0, 319.0, 311.0, 323.0, 310.0, 324.0, 312.0, 268.0, 268.0, 287.0, 295.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 316.0, 314.0, 317.0, 322.0, 288.0, 294.0, 291.0, 288.0, 319.0, 314.0, 302.0, 285.0, 290.0, 292.0, 313.0, 314.0, 265.0, 274.0, 322.0, 314.0, 316.0, 317.0, 319.0, 320.0, 316.0, 314.0, 288.0, 294.0, 292.0, 287.0, 322.0, 317.0, 324.0, 312.0, 299.0, 291.0, 308.0, 319.0, 315.0, 318.0, 319.0, 320.0, 285.0, 294.0, 293.0, 294.0, 293.0, 286.0, 313.0, 323.0, 316.0, 320.0, 316.0, 314.0, 316.0, 320.0, 316.0, 317.0, 319.0, 317.0, 291.0, 293.0, 299.0, 285.0, 319.0, 314.0, 296.0, 294.0, 319.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8222123786920608, "mean_processing_ms": 0.23287368102080933, "mean_inference_ms": 1.4230607054783406}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9744000, "num_steps_sampled": 5196800, "sample_time_ms": 23260.688, "load_time_ms": 37.047, "grad_time_ms": 10547.131, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006928029470145702, "policy_loss": -0.006872573401778936, "vf_loss": 81.25198364257812, "vf_explained_var": 0.7684532999992371, "kl": 0.0019740292336791754, "entropy": 1.1196430921554565, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5196800, "episodes_total": 12992, "training_iteration": 406, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-15-37", "timestamp": 1660259737, "time_this_iter_s": 30.75086998939514, "time_total_s": 18146.831494808197, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18146.831494808197, "timesteps_since_restore": 5196800, "iterations_since_restore": 406, "perf": {"cpu_util_percent": 34.53863636363637, "ram_util_percent": 58.81590909090908}}
-{"episode_reward_max": 639.0, "episode_reward_min": 3.0, "episode_reward_mean": 605.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 302.91}, "custom_metrics": {"sparse_reward_mean": 209.4, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 187.02, "shaped_reward_min": 3, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.56, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.46, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.1, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.67, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.96, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.95, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.49, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.75, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.73, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.8, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.72, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.79, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.96, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.95, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.96, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.95, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 633.0, 587.0, 544.0, 630.0, 587.0, 587.0, 582.0, 633.0, 3.0, 579.0, 582.0, 587.0, 579.0, 627.0, 633.0, 633.0, 582.0, 582.0, 636.0, 567.0, 587.0, 636.0, 630.0, 636.0, 576.0, 636.0, 582.0, 587.0, 636.0, 636.0, 639.0, 630.0, 639.0, 582.0, 579.0, 633.0, 587.0, 582.0, 627.0, 539.0, 636.0, 633.0, 639.0, 630.0, 582.0, 579.0, 639.0, 636.0, 590.0, 627.0, 633.0, 639.0, 579.0, 587.0, 579.0, 636.0, 636.0, 630.0, 636.0, 633.0, 636.0, 584.0, 584.0, 633.0, 590.0, 630.0, 627.0, 633.0, 636.0, 579.0, 630.0, 630.0, 630.0, 630.0, 582.0, 630.0, 576.0, 636.0, 624.0, 630.0, 639.0, 624.0, 636.0, 576.0, 587.0, 639.0, 630.0, 633.0, 630.0, 593.0, 633.0, 579.0, 630.0, 639.0, 587.0, 582.0, 636.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [309.0, 324.0, 316.0, 317.0, 319.0, 314.0, 296.0, 291.0, 270.0, 274.0, 306.0, 324.0, 299.0, 288.0, 296.0, 291.0, 298.0, 284.0, 316.0, 317.0, 0.0, 3.0, 287.0, 292.0, 292.0, 290.0, 293.0, 294.0, 299.0, 280.0, 316.0, 311.0, 319.0, 314.0, 322.0, 311.0, 293.0, 289.0, 291.0, 291.0, 319.0, 317.0, 281.0, 286.0, 289.0, 298.0, 316.0, 320.0, 316.0, 314.0, 324.0, 312.0, 280.0, 296.0, 316.0, 320.0, 301.0, 281.0, 290.0, 297.0, 319.0, 317.0, 319.0, 317.0, 319.0, 320.0, 316.0, 314.0, 317.0, 322.0, 288.0, 294.0, 291.0, 288.0, 319.0, 314.0, 302.0, 285.0, 290.0, 292.0, 313.0, 314.0, 265.0, 274.0, 322.0, 314.0, 316.0, 317.0, 319.0, 320.0, 316.0, 314.0, 288.0, 294.0, 292.0, 287.0, 322.0, 317.0, 324.0, 312.0, 299.0, 291.0, 308.0, 319.0, 315.0, 318.0, 319.0, 320.0, 285.0, 294.0, 293.0, 294.0, 293.0, 286.0, 313.0, 323.0, 316.0, 320.0, 316.0, 314.0, 316.0, 320.0, 316.0, 317.0, 319.0, 317.0, 291.0, 293.0, 299.0, 285.0, 319.0, 314.0, 296.0, 294.0, 319.0, 311.0, 319.0, 308.0, 319.0, 314.0, 314.0, 322.0, 285.0, 294.0, 316.0, 314.0, 313.0, 317.0, 316.0, 314.0, 313.0, 317.0, 298.0, 284.0, 314.0, 316.0, 290.0, 286.0, 316.0, 320.0, 314.0, 310.0, 316.0, 314.0, 322.0, 317.0, 310.0, 314.0, 319.0, 317.0, 291.0, 285.0, 293.0, 294.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 316.0, 314.0, 302.0, 291.0, 316.0, 317.0, 299.0, 280.0, 316.0, 314.0, 319.0, 320.0, 296.0, 291.0, 291.0, 291.0, 308.0, 328.0, 293.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8213047734882832, "mean_processing_ms": 0.2326926130887054, "mean_inference_ms": 1.4220964273978254}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9768000, "num_steps_sampled": 5209600, "sample_time_ms": 23078.335, "load_time_ms": 37.045, "grad_time_ms": 10614.832, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0037456925492733717, "policy_loss": -0.004354165401309729, "vf_loss": 86.5316162109375, "vf_explained_var": 0.7801554799079895, "kl": 0.0024353403132408857, "entropy": 1.1066083908081055, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5209600, "episodes_total": 13024, "training_iteration": 407, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-16-07", "timestamp": 1660259767, "time_this_iter_s": 29.56272530555725, "time_total_s": 18176.394220113754, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18176.394220113754, "timesteps_since_restore": 5209600, "iterations_since_restore": 407, "perf": {"cpu_util_percent": 33.98809523809524, "ram_util_percent": 58.745238095238086}}
-{"episode_reward_max": 639.0, "episode_reward_min": 3.0, "episode_reward_mean": 607.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 303.94}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 187.08, "shaped_reward_min": 3, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.36, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.2, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.2, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.2, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 636.0, 636.0, 636.0, 633.0, 618.0, 582.0, 630.0, 636.0, 636.0, 633.0, 636.0, 639.0, 630.0, 576.0, 582.0, 633.0, 639.0, 579.0, 636.0, 636.0, 639.0, 579.0, 636.0, 630.0, 633.0, 636.0, 579.0, 630.0, 633.0, 516.0, 633.0, 584.0, 633.0, 590.0, 630.0, 627.0, 633.0, 636.0, 579.0, 630.0, 630.0, 630.0, 630.0, 582.0, 630.0, 576.0, 636.0, 624.0, 630.0, 639.0, 624.0, 636.0, 576.0, 587.0, 639.0, 630.0, 633.0, 630.0, 593.0, 633.0, 579.0, 630.0, 639.0, 587.0, 582.0, 636.0, 582.0, 633.0, 633.0, 633.0, 587.0, 544.0, 630.0, 587.0, 587.0, 582.0, 633.0, 3.0, 579.0, 582.0, 587.0, 579.0, 627.0, 633.0, 633.0, 582.0, 582.0, 636.0, 567.0, 587.0, 636.0, 630.0, 636.0, 576.0, 636.0, 582.0, 587.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 314.0, 315.0, 321.0, 314.0, 322.0, 319.0, 317.0, 311.0, 322.0, 312.0, 306.0, 294.0, 288.0, 310.0, 320.0, 314.0, 322.0, 314.0, 322.0, 316.0, 317.0, 316.0, 320.0, 317.0, 322.0, 319.0, 311.0, 281.0, 295.0, 293.0, 289.0, 316.0, 317.0, 322.0, 317.0, 293.0, 286.0, 316.0, 320.0, 325.0, 311.0, 319.0, 320.0, 293.0, 286.0, 319.0, 317.0, 319.0, 311.0, 317.0, 316.0, 314.0, 322.0, 291.0, 288.0, 314.0, 316.0, 313.0, 320.0, 265.0, 251.0, 321.0, 312.0, 299.0, 285.0, 319.0, 314.0, 296.0, 294.0, 319.0, 311.0, 319.0, 308.0, 319.0, 314.0, 314.0, 322.0, 285.0, 294.0, 316.0, 314.0, 313.0, 317.0, 316.0, 314.0, 313.0, 317.0, 298.0, 284.0, 314.0, 316.0, 290.0, 286.0, 316.0, 320.0, 314.0, 310.0, 316.0, 314.0, 322.0, 317.0, 310.0, 314.0, 319.0, 317.0, 291.0, 285.0, 293.0, 294.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 316.0, 314.0, 302.0, 291.0, 316.0, 317.0, 299.0, 280.0, 316.0, 314.0, 319.0, 320.0, 296.0, 291.0, 291.0, 291.0, 308.0, 328.0, 293.0, 289.0, 309.0, 324.0, 316.0, 317.0, 319.0, 314.0, 296.0, 291.0, 270.0, 274.0, 306.0, 324.0, 299.0, 288.0, 296.0, 291.0, 298.0, 284.0, 316.0, 317.0, 0.0, 3.0, 287.0, 292.0, 292.0, 290.0, 293.0, 294.0, 299.0, 280.0, 316.0, 311.0, 319.0, 314.0, 322.0, 311.0, 293.0, 289.0, 291.0, 291.0, 319.0, 317.0, 281.0, 286.0, 289.0, 298.0, 316.0, 320.0, 316.0, 314.0, 324.0, 312.0, 280.0, 296.0, 316.0, 320.0, 301.0, 281.0, 290.0, 297.0, 319.0, 317.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8203957464749945, "mean_processing_ms": 0.2325105755388932, "mean_inference_ms": 1.4210101321453532}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9792000, "num_steps_sampled": 5222400, "sample_time_ms": 22766.104, "load_time_ms": 36.963, "grad_time_ms": 10377.417, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0032091455068439245, "policy_loss": -0.004078669007867575, "vf_loss": 78.43866729736328, "vf_explained_var": 0.7684862613677979, "kl": 0.00216904329136014, "entropy": 1.1121129989624023, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5222400, "episodes_total": 13056, "training_iteration": 408, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-16-35", "timestamp": 1660259795, "time_this_iter_s": 28.361918210983276, "time_total_s": 18204.756138324738, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18204.756138324738, "timesteps_since_restore": 5222400, "iterations_since_restore": 408, "perf": {"cpu_util_percent": 30.642500000000002, "ram_util_percent": 58.74749999999999}}
-{"episode_reward_max": 639.0, "episode_reward_min": 3.0, "episode_reward_mean": 606.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 303.12}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 186.64, "shaped_reward_min": 3, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.61, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.4, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.16, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.69, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.15, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.81, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.53, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.76, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.15, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.81, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.15, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.81, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 570.0, 633.0, 633.0, 584.0, 621.0, 636.0, 584.0, 630.0, 576.0, 587.0, 630.0, 630.0, 630.0, 636.0, 587.0, 582.0, 630.0, 639.0, 633.0, 633.0, 465.0, 636.0, 587.0, 630.0, 630.0, 587.0, 636.0, 630.0, 630.0, 636.0, 587.0, 582.0, 636.0, 582.0, 633.0, 633.0, 633.0, 587.0, 544.0, 630.0, 587.0, 587.0, 582.0, 633.0, 3.0, 579.0, 582.0, 587.0, 579.0, 627.0, 633.0, 633.0, 582.0, 582.0, 636.0, 567.0, 587.0, 636.0, 630.0, 636.0, 576.0, 636.0, 582.0, 587.0, 636.0, 636.0, 636.0, 636.0, 636.0, 636.0, 633.0, 618.0, 582.0, 630.0, 636.0, 636.0, 633.0, 636.0, 639.0, 630.0, 576.0, 582.0, 633.0, 639.0, 579.0, 636.0, 636.0, 639.0, 579.0, 636.0, 630.0, 633.0, 636.0, 579.0, 630.0, 633.0, 516.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 322.0, 305.0, 325.0, 279.0, 291.0, 319.0, 314.0, 316.0, 317.0, 287.0, 297.0, 311.0, 310.0, 321.0, 315.0, 299.0, 285.0, 314.0, 316.0, 282.0, 294.0, 296.0, 291.0, 318.0, 312.0, 316.0, 314.0, 314.0, 316.0, 316.0, 320.0, 299.0, 288.0, 294.0, 288.0, 313.0, 317.0, 319.0, 320.0, 314.0, 319.0, 317.0, 316.0, 239.0, 226.0, 319.0, 317.0, 301.0, 286.0, 321.0, 309.0, 316.0, 314.0, 293.0, 294.0, 317.0, 319.0, 313.0, 317.0, 311.0, 319.0, 321.0, 315.0, 296.0, 291.0, 291.0, 291.0, 308.0, 328.0, 293.0, 289.0, 309.0, 324.0, 316.0, 317.0, 319.0, 314.0, 296.0, 291.0, 270.0, 274.0, 306.0, 324.0, 299.0, 288.0, 296.0, 291.0, 298.0, 284.0, 316.0, 317.0, 0.0, 3.0, 287.0, 292.0, 292.0, 290.0, 293.0, 294.0, 299.0, 280.0, 316.0, 311.0, 319.0, 314.0, 322.0, 311.0, 293.0, 289.0, 291.0, 291.0, 319.0, 317.0, 281.0, 286.0, 289.0, 298.0, 316.0, 320.0, 316.0, 314.0, 324.0, 312.0, 280.0, 296.0, 316.0, 320.0, 301.0, 281.0, 290.0, 297.0, 319.0, 317.0, 319.0, 317.0, 322.0, 314.0, 315.0, 321.0, 314.0, 322.0, 319.0, 317.0, 311.0, 322.0, 312.0, 306.0, 294.0, 288.0, 310.0, 320.0, 314.0, 322.0, 314.0, 322.0, 316.0, 317.0, 316.0, 320.0, 317.0, 322.0, 319.0, 311.0, 281.0, 295.0, 293.0, 289.0, 316.0, 317.0, 322.0, 317.0, 293.0, 286.0, 316.0, 320.0, 325.0, 311.0, 319.0, 320.0, 293.0, 286.0, 319.0, 317.0, 319.0, 311.0, 317.0, 316.0, 314.0, 322.0, 291.0, 288.0, 314.0, 316.0, 313.0, 320.0, 265.0, 251.0, 321.0, 312.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8194928769561236, "mean_processing_ms": 0.23232969366347853, "mean_inference_ms": 1.4199784153489992}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9816000, "num_steps_sampled": 5235200, "sample_time_ms": 22579.389, "load_time_ms": 36.726, "grad_time_ms": 10063.447, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015366753796115518, "policy_loss": -0.006077593192458153, "vf_loss": 81.69181060791016, "vf_explained_var": 0.7707114219665527, "kl": 0.001978269312530756, "entropy": 1.1098326444625854, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5235200, "episodes_total": 13088, "training_iteration": 409, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-17-04", "timestamp": 1660259824, "time_this_iter_s": 29.396647930145264, "time_total_s": 18234.152786254883, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18234.152786254883, "timesteps_since_restore": 5235200, "iterations_since_restore": 409, "perf": {"cpu_util_percent": 30.553658536585367, "ram_util_percent": 58.824390243902435}}
-{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 616.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.15}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 189.1, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.64, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.48, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.97, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.03, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.53, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.68, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.66, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.03, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.03, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 624.0, 636.0, 630.0, 627.0, 633.0, 587.0, 633.0, 639.0, 581.0, 630.0, 570.0, 636.0, 636.0, 587.0, 633.0, 627.0, 587.0, 633.0, 627.0, 582.0, 636.0, 633.0, 627.0, 587.0, 572.0, 582.0, 630.0, 630.0, 639.0, 636.0, 636.0, 582.0, 587.0, 636.0, 636.0, 636.0, 636.0, 636.0, 636.0, 633.0, 618.0, 582.0, 630.0, 636.0, 636.0, 633.0, 636.0, 639.0, 630.0, 576.0, 582.0, 633.0, 639.0, 579.0, 636.0, 636.0, 639.0, 579.0, 636.0, 630.0, 633.0, 636.0, 579.0, 630.0, 633.0, 516.0, 633.0, 633.0, 630.0, 570.0, 633.0, 633.0, 584.0, 621.0, 636.0, 584.0, 630.0, 576.0, 587.0, 630.0, 630.0, 630.0, 636.0, 587.0, 582.0, 630.0, 639.0, 633.0, 633.0, 465.0, 636.0, 587.0, 630.0, 630.0, 587.0, 636.0, 630.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 294.0, 310.0, 314.0, 321.0, 315.0, 316.0, 314.0, 305.0, 322.0, 322.0, 311.0, 298.0, 289.0, 316.0, 317.0, 312.0, 327.0, 287.0, 294.0, 316.0, 314.0, 299.0, 271.0, 312.0, 324.0, 314.0, 322.0, 301.0, 286.0, 321.0, 312.0, 316.0, 311.0, 293.0, 294.0, 313.0, 320.0, 315.0, 312.0, 286.0, 296.0, 317.0, 319.0, 319.0, 314.0, 310.0, 317.0, 293.0, 294.0, 284.0, 288.0, 287.0, 295.0, 316.0, 314.0, 319.0, 311.0, 314.0, 325.0, 319.0, 317.0, 317.0, 319.0, 301.0, 281.0, 290.0, 297.0, 319.0, 317.0, 319.0, 317.0, 322.0, 314.0, 315.0, 321.0, 314.0, 322.0, 319.0, 317.0, 311.0, 322.0, 312.0, 306.0, 294.0, 288.0, 310.0, 320.0, 314.0, 322.0, 314.0, 322.0, 316.0, 317.0, 316.0, 320.0, 317.0, 322.0, 319.0, 311.0, 281.0, 295.0, 293.0, 289.0, 316.0, 317.0, 322.0, 317.0, 293.0, 286.0, 316.0, 320.0, 325.0, 311.0, 319.0, 320.0, 293.0, 286.0, 319.0, 317.0, 319.0, 311.0, 317.0, 316.0, 314.0, 322.0, 291.0, 288.0, 314.0, 316.0, 313.0, 320.0, 265.0, 251.0, 321.0, 312.0, 311.0, 322.0, 305.0, 325.0, 279.0, 291.0, 319.0, 314.0, 316.0, 317.0, 287.0, 297.0, 311.0, 310.0, 321.0, 315.0, 299.0, 285.0, 314.0, 316.0, 282.0, 294.0, 296.0, 291.0, 318.0, 312.0, 316.0, 314.0, 314.0, 316.0, 316.0, 320.0, 299.0, 288.0, 294.0, 288.0, 313.0, 317.0, 319.0, 320.0, 314.0, 319.0, 317.0, 316.0, 239.0, 226.0, 319.0, 317.0, 301.0, 286.0, 321.0, 309.0, 316.0, 314.0, 293.0, 294.0, 317.0, 319.0, 313.0, 317.0, 311.0, 319.0, 321.0, 315.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8185939281976542, "mean_processing_ms": 0.23214957034967199, "mean_inference_ms": 1.4189514044158715}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9840000, "num_steps_sampled": 5248000, "sample_time_ms": 22039.582, "load_time_ms": 37.045, "grad_time_ms": 9893.172, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0014178849523887038, "policy_loss": -0.008465434424579144, "vf_loss": 76.02017974853516, "vf_explained_var": 0.7725793719291687, "kl": 0.0019942354410886765, "entropy": 1.1089389324188232, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5248000, "episodes_total": 13120, "training_iteration": 410, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-17-33", "timestamp": 1660259853, "time_this_iter_s": 28.213119983673096, "time_total_s": 18262.365906238556, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18262.365906238556, "timesteps_since_restore": 5248000, "iterations_since_restore": 410, "perf": {"cpu_util_percent": 35.04, "ram_util_percent": 58.745000000000005}}
-{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 615.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 307.625}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.45, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.63, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.79, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.29, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.07, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.11, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.55, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.71, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.84, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.77, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.11, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.11, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 633.0, 576.0, 630.0, 587.0, 630.0, 639.0, 627.0, 636.0, 627.0, 630.0, 630.0, 521.0, 636.0, 576.0, 633.0, 633.0, 636.0, 584.0, 624.0, 633.0, 630.0, 633.0, 584.0, 633.0, 636.0, 633.0, 633.0, 636.0, 636.0, 630.0, 579.0, 630.0, 633.0, 516.0, 633.0, 633.0, 630.0, 570.0, 633.0, 633.0, 584.0, 621.0, 636.0, 584.0, 630.0, 576.0, 587.0, 630.0, 630.0, 630.0, 636.0, 587.0, 582.0, 630.0, 639.0, 633.0, 633.0, 465.0, 636.0, 587.0, 630.0, 630.0, 587.0, 636.0, 630.0, 630.0, 636.0, 587.0, 624.0, 636.0, 630.0, 627.0, 633.0, 587.0, 633.0, 639.0, 581.0, 630.0, 570.0, 636.0, 636.0, 587.0, 633.0, 627.0, 587.0, 633.0, 627.0, 582.0, 636.0, 633.0, 627.0, 587.0, 572.0, 582.0, 630.0, 630.0, 639.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 314.0, 319.0, 286.0, 290.0, 319.0, 311.0, 293.0, 294.0, 316.0, 314.0, 319.0, 320.0, 310.0, 317.0, 316.0, 320.0, 313.0, 314.0, 324.0, 306.0, 320.0, 310.0, 254.0, 267.0, 319.0, 317.0, 295.0, 281.0, 319.0, 314.0, 316.0, 317.0, 319.0, 317.0, 291.0, 293.0, 316.0, 308.0, 316.0, 317.0, 311.0, 319.0, 313.0, 320.0, 293.0, 291.0, 317.0, 316.0, 314.0, 322.0, 314.0, 319.0, 326.0, 307.0, 321.0, 315.0, 319.0, 317.0, 319.0, 311.0, 290.0, 289.0, 314.0, 316.0, 313.0, 320.0, 265.0, 251.0, 321.0, 312.0, 311.0, 322.0, 305.0, 325.0, 279.0, 291.0, 319.0, 314.0, 316.0, 317.0, 287.0, 297.0, 311.0, 310.0, 321.0, 315.0, 299.0, 285.0, 314.0, 316.0, 282.0, 294.0, 296.0, 291.0, 318.0, 312.0, 316.0, 314.0, 314.0, 316.0, 316.0, 320.0, 299.0, 288.0, 294.0, 288.0, 313.0, 317.0, 319.0, 320.0, 314.0, 319.0, 317.0, 316.0, 239.0, 226.0, 319.0, 317.0, 301.0, 286.0, 321.0, 309.0, 316.0, 314.0, 293.0, 294.0, 317.0, 319.0, 313.0, 317.0, 311.0, 319.0, 321.0, 315.0, 293.0, 294.0, 310.0, 314.0, 321.0, 315.0, 316.0, 314.0, 305.0, 322.0, 322.0, 311.0, 298.0, 289.0, 316.0, 317.0, 312.0, 327.0, 287.0, 294.0, 316.0, 314.0, 299.0, 271.0, 312.0, 324.0, 314.0, 322.0, 301.0, 286.0, 321.0, 312.0, 316.0, 311.0, 293.0, 294.0, 313.0, 320.0, 315.0, 312.0, 286.0, 296.0, 317.0, 319.0, 319.0, 314.0, 310.0, 317.0, 293.0, 294.0, 284.0, 288.0, 287.0, 295.0, 316.0, 314.0, 319.0, 311.0, 314.0, 325.0, 319.0, 317.0, 317.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8176992911646718, "mean_processing_ms": 0.23196962818643072, "mean_inference_ms": 1.417972483148229}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9864000, "num_steps_sampled": 5260800, "sample_time_ms": 21770.07, "load_time_ms": 36.824, "grad_time_ms": 9519.356, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001577894203364849, "policy_loss": -0.005276820156723261, "vf_loss": 74.05913543701172, "vf_explained_var": 0.7708218693733215, "kl": 0.002156370086595416, "entropy": 1.1023942232131958, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5260800, "episodes_total": 13152, "training_iteration": 411, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-18-03", "timestamp": 1660259883, "time_this_iter_s": 30.32603693008423, "time_total_s": 18292.69194316864, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18292.69194316864, "timesteps_since_restore": 5260800, "iterations_since_restore": 411, "perf": {"cpu_util_percent": 34.49999999999999, "ram_util_percent": 58.81162790697674}}
-{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 608.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 304.465}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.93, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.53, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.57, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.15, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.81, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.04, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.92, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.95, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.83, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.69, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.04, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.92, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.04, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.92, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 630.0, 582.0, 633.0, 633.0, 639.0, 584.0, 639.0, 197.0, 579.0, 465.0, 579.0, 636.0, 630.0, 639.0, 630.0, 630.0, 413.0, 582.0, 630.0, 576.0, 633.0, 639.0, 582.0, 633.0, 630.0, 630.0, 584.0, 587.0, 522.0, 636.0, 630.0, 636.0, 630.0, 630.0, 636.0, 587.0, 624.0, 636.0, 630.0, 627.0, 633.0, 587.0, 633.0, 639.0, 581.0, 630.0, 570.0, 636.0, 636.0, 587.0, 633.0, 627.0, 587.0, 633.0, 627.0, 582.0, 636.0, 633.0, 627.0, 587.0, 572.0, 582.0, 630.0, 630.0, 639.0, 636.0, 636.0, 582.0, 633.0, 576.0, 630.0, 587.0, 630.0, 639.0, 627.0, 636.0, 627.0, 630.0, 630.0, 521.0, 636.0, 576.0, 633.0, 633.0, 636.0, 584.0, 624.0, 633.0, 630.0, 633.0, 584.0, 633.0, 636.0, 633.0, 633.0, 636.0, 636.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 316.0, 311.0, 319.0, 291.0, 291.0, 314.0, 319.0, 311.0, 322.0, 317.0, 322.0, 293.0, 291.0, 316.0, 323.0, 100.0, 97.0, 294.0, 285.0, 229.0, 236.0, 288.0, 291.0, 321.0, 315.0, 311.0, 319.0, 317.0, 322.0, 319.0, 311.0, 311.0, 319.0, 207.0, 206.0, 293.0, 289.0, 311.0, 319.0, 285.0, 291.0, 311.0, 322.0, 319.0, 320.0, 296.0, 286.0, 318.0, 315.0, 321.0, 309.0, 316.0, 314.0, 287.0, 297.0, 296.0, 291.0, 262.0, 260.0, 319.0, 317.0, 313.0, 317.0, 317.0, 319.0, 313.0, 317.0, 311.0, 319.0, 321.0, 315.0, 293.0, 294.0, 310.0, 314.0, 321.0, 315.0, 316.0, 314.0, 305.0, 322.0, 322.0, 311.0, 298.0, 289.0, 316.0, 317.0, 312.0, 327.0, 287.0, 294.0, 316.0, 314.0, 299.0, 271.0, 312.0, 324.0, 314.0, 322.0, 301.0, 286.0, 321.0, 312.0, 316.0, 311.0, 293.0, 294.0, 313.0, 320.0, 315.0, 312.0, 286.0, 296.0, 317.0, 319.0, 319.0, 314.0, 310.0, 317.0, 293.0, 294.0, 284.0, 288.0, 287.0, 295.0, 316.0, 314.0, 319.0, 311.0, 314.0, 325.0, 319.0, 317.0, 317.0, 319.0, 294.0, 288.0, 314.0, 319.0, 286.0, 290.0, 319.0, 311.0, 293.0, 294.0, 316.0, 314.0, 319.0, 320.0, 310.0, 317.0, 316.0, 320.0, 313.0, 314.0, 324.0, 306.0, 320.0, 310.0, 254.0, 267.0, 319.0, 317.0, 295.0, 281.0, 319.0, 314.0, 316.0, 317.0, 319.0, 317.0, 291.0, 293.0, 316.0, 308.0, 316.0, 317.0, 311.0, 319.0, 313.0, 320.0, 293.0, 291.0, 317.0, 316.0, 314.0, 322.0, 314.0, 319.0, 326.0, 307.0, 321.0, 315.0, 319.0, 317.0, 319.0, 311.0, 290.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8168045018063097, "mean_processing_ms": 0.23178953609537822, "mean_inference_ms": 1.4169011715931346}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9888000, "num_steps_sampled": 5273600, "sample_time_ms": 21329.9, "load_time_ms": 36.743, "grad_time_ms": 9279.775, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0034279574174433947, "policy_loss": -0.004527573008090258, "vf_loss": 85.10655975341797, "vf_explained_var": 0.7758853435516357, "kl": 0.0018181651830673218, "entropy": 1.1102546453475952, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5273600, "episodes_total": 13184, "training_iteration": 412, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-18-31", "timestamp": 1660259911, "time_this_iter_s": 28.26536202430725, "time_total_s": 18320.957305192947, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18320.957305192947, "timesteps_since_restore": 5273600, "iterations_since_restore": 412, "perf": {"cpu_util_percent": 35.269999999999996, "ram_util_percent": 59.315}}
-{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 608.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 304.08}, "custom_metrics": {"sparse_reward_mean": 210.6, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.96, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.73, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.35, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.28, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.7, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.7, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.7, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 636.0, 587.0, 621.0, 630.0, 582.0, 636.0, 587.0, 633.0, 630.0, 633.0, 627.0, 587.0, 633.0, 627.0, 630.0, 633.0, 587.0, 587.0, 633.0, 587.0, 587.0, 587.0, 636.0, 579.0, 627.0, 633.0, 582.0, 630.0, 630.0, 590.0, 633.0, 630.0, 639.0, 636.0, 636.0, 582.0, 633.0, 576.0, 630.0, 587.0, 630.0, 639.0, 627.0, 636.0, 627.0, 630.0, 630.0, 521.0, 636.0, 576.0, 633.0, 633.0, 636.0, 584.0, 624.0, 633.0, 630.0, 633.0, 584.0, 633.0, 636.0, 633.0, 633.0, 636.0, 636.0, 630.0, 579.0, 630.0, 630.0, 582.0, 633.0, 633.0, 639.0, 584.0, 639.0, 197.0, 579.0, 465.0, 579.0, 636.0, 630.0, 639.0, 630.0, 630.0, 413.0, 582.0, 630.0, 576.0, 633.0, 639.0, 582.0, 633.0, 630.0, 630.0, 584.0, 587.0, 522.0, 636.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 319.0, 317.0, 298.0, 289.0, 317.0, 304.0, 317.0, 313.0, 286.0, 296.0, 319.0, 317.0, 288.0, 299.0, 320.0, 313.0, 316.0, 314.0, 316.0, 317.0, 322.0, 305.0, 285.0, 302.0, 316.0, 317.0, 314.0, 313.0, 314.0, 316.0, 317.0, 316.0, 290.0, 297.0, 299.0, 288.0, 316.0, 317.0, 288.0, 299.0, 288.0, 299.0, 288.0, 299.0, 322.0, 314.0, 284.0, 295.0, 316.0, 311.0, 317.0, 316.0, 285.0, 297.0, 313.0, 317.0, 311.0, 319.0, 299.0, 291.0, 318.0, 315.0, 319.0, 311.0, 314.0, 325.0, 319.0, 317.0, 317.0, 319.0, 294.0, 288.0, 314.0, 319.0, 286.0, 290.0, 319.0, 311.0, 293.0, 294.0, 316.0, 314.0, 319.0, 320.0, 310.0, 317.0, 316.0, 320.0, 313.0, 314.0, 324.0, 306.0, 320.0, 310.0, 254.0, 267.0, 319.0, 317.0, 295.0, 281.0, 319.0, 314.0, 316.0, 317.0, 319.0, 317.0, 291.0, 293.0, 316.0, 308.0, 316.0, 317.0, 311.0, 319.0, 313.0, 320.0, 293.0, 291.0, 317.0, 316.0, 314.0, 322.0, 314.0, 319.0, 326.0, 307.0, 321.0, 315.0, 319.0, 317.0, 319.0, 311.0, 290.0, 289.0, 314.0, 316.0, 311.0, 319.0, 291.0, 291.0, 314.0, 319.0, 311.0, 322.0, 317.0, 322.0, 293.0, 291.0, 316.0, 323.0, 100.0, 97.0, 294.0, 285.0, 229.0, 236.0, 288.0, 291.0, 321.0, 315.0, 311.0, 319.0, 317.0, 322.0, 319.0, 311.0, 311.0, 319.0, 207.0, 206.0, 293.0, 289.0, 311.0, 319.0, 285.0, 291.0, 311.0, 322.0, 319.0, 320.0, 296.0, 286.0, 318.0, 315.0, 321.0, 309.0, 316.0, 314.0, 287.0, 297.0, 296.0, 291.0, 262.0, 260.0, 319.0, 317.0, 313.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8159183949251133, "mean_processing_ms": 0.23161113375748543, "mean_inference_ms": 1.4159039621746354}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9912000, "num_steps_sampled": 5286400, "sample_time_ms": 21002.373, "load_time_ms": 36.818, "grad_time_ms": 9265.226, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002250772900879383, "policy_loss": -0.005318752024322748, "vf_loss": 81.240478515625, "vf_explained_var": 0.7617523074150085, "kl": 0.0018393909558653831, "entropy": 1.1090354919433594, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5286400, "episodes_total": 13216, "training_iteration": 413, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-19-03", "timestamp": 1660259943, "time_this_iter_s": 31.600411891937256, "time_total_s": 18352.557717084885, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18352.557717084885, "timesteps_since_restore": 5286400, "iterations_since_restore": 413, "perf": {"cpu_util_percent": 31.806666666666665, "ram_util_percent": 58.973333333333315}}
-{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 608.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 304.405}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 187.21, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.78, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.38, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.2, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.7, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.56, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.13, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.2, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.7, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.2, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.7, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 633.0, 639.0, 530.0, 630.0, 582.0, 582.0, 624.0, 582.0, 630.0, 630.0, 630.0, 630.0, 587.0, 636.0, 639.0, 636.0, 627.0, 633.0, 627.0, 633.0, 636.0, 630.0, 630.0, 630.0, 630.0, 636.0, 639.0, 582.0, 636.0, 639.0, 636.0, 636.0, 636.0, 630.0, 579.0, 630.0, 630.0, 582.0, 633.0, 633.0, 639.0, 584.0, 639.0, 197.0, 579.0, 465.0, 579.0, 636.0, 630.0, 639.0, 630.0, 630.0, 413.0, 582.0, 630.0, 576.0, 633.0, 639.0, 582.0, 633.0, 630.0, 630.0, 584.0, 587.0, 522.0, 636.0, 630.0, 627.0, 636.0, 587.0, 621.0, 630.0, 582.0, 636.0, 587.0, 633.0, 630.0, 633.0, 627.0, 587.0, 633.0, 627.0, 630.0, 633.0, 587.0, 587.0, 633.0, 587.0, 587.0, 587.0, 636.0, 579.0, 627.0, 633.0, 582.0, 630.0, 630.0, 590.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 316.0, 314.0, 319.0, 324.0, 315.0, 260.0, 270.0, 313.0, 317.0, 290.0, 292.0, 286.0, 296.0, 313.0, 311.0, 291.0, 291.0, 316.0, 314.0, 316.0, 314.0, 314.0, 316.0, 310.0, 320.0, 288.0, 299.0, 319.0, 317.0, 319.0, 320.0, 314.0, 322.0, 307.0, 320.0, 316.0, 317.0, 311.0, 316.0, 316.0, 317.0, 314.0, 322.0, 311.0, 319.0, 313.0, 317.0, 316.0, 314.0, 316.0, 314.0, 314.0, 322.0, 319.0, 320.0, 290.0, 292.0, 319.0, 317.0, 322.0, 317.0, 321.0, 315.0, 321.0, 315.0, 319.0, 317.0, 319.0, 311.0, 290.0, 289.0, 314.0, 316.0, 311.0, 319.0, 291.0, 291.0, 314.0, 319.0, 311.0, 322.0, 317.0, 322.0, 293.0, 291.0, 316.0, 323.0, 100.0, 97.0, 294.0, 285.0, 229.0, 236.0, 288.0, 291.0, 321.0, 315.0, 311.0, 319.0, 317.0, 322.0, 319.0, 311.0, 311.0, 319.0, 207.0, 206.0, 293.0, 289.0, 311.0, 319.0, 285.0, 291.0, 311.0, 322.0, 319.0, 320.0, 296.0, 286.0, 318.0, 315.0, 321.0, 309.0, 316.0, 314.0, 287.0, 297.0, 296.0, 291.0, 262.0, 260.0, 319.0, 317.0, 313.0, 317.0, 313.0, 314.0, 319.0, 317.0, 298.0, 289.0, 317.0, 304.0, 317.0, 313.0, 286.0, 296.0, 319.0, 317.0, 288.0, 299.0, 320.0, 313.0, 316.0, 314.0, 316.0, 317.0, 322.0, 305.0, 285.0, 302.0, 316.0, 317.0, 314.0, 313.0, 314.0, 316.0, 317.0, 316.0, 290.0, 297.0, 299.0, 288.0, 316.0, 317.0, 288.0, 299.0, 288.0, 299.0, 288.0, 299.0, 322.0, 314.0, 284.0, 295.0, 316.0, 311.0, 317.0, 316.0, 285.0, 297.0, 313.0, 317.0, 311.0, 319.0, 299.0, 291.0, 318.0, 315.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8150462375742643, "mean_processing_ms": 0.23143722383890097, "mean_inference_ms": 1.4151601741062898}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9936000, "num_steps_sampled": 5299200, "sample_time_ms": 21315.468, "load_time_ms": 36.748, "grad_time_ms": 9192.863, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0032192638609558344, "policy_loss": -0.004382268991321325, "vf_loss": 81.57144927978516, "vf_explained_var": 0.7626829147338867, "kl": 0.001976991770789027, "entropy": 1.1112231016159058, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5299200, "episodes_total": 13248, "training_iteration": 414, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-19-39", "timestamp": 1660259979, "time_this_iter_s": 36.33256697654724, "time_total_s": 18388.890284061432, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18388.890284061432, "timesteps_since_restore": 5299200, "iterations_since_restore": 414, "perf": {"cpu_util_percent": 31.756862745098033, "ram_util_percent": 59.57450980392157}}
-{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 614.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 307.4}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.4, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.55, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.79, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.35, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.43, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.33, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.82, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.69, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.1, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.95, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.79, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.33, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.82, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.33, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.82, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 633.0, 570.0, 633.0, 582.0, 636.0, 630.0, 639.0, 639.0, 584.0, 570.0, 636.0, 627.0, 627.0, 630.0, 630.0, 573.0, 630.0, 582.0, 587.0, 633.0, 582.0, 633.0, 633.0, 630.0, 525.0, 582.0, 633.0, 627.0, 633.0, 627.0, 561.0, 587.0, 522.0, 636.0, 630.0, 627.0, 636.0, 587.0, 621.0, 630.0, 582.0, 636.0, 587.0, 633.0, 630.0, 633.0, 627.0, 587.0, 633.0, 627.0, 630.0, 633.0, 587.0, 587.0, 633.0, 587.0, 587.0, 587.0, 636.0, 579.0, 627.0, 633.0, 582.0, 630.0, 630.0, 590.0, 633.0, 627.0, 633.0, 639.0, 530.0, 630.0, 582.0, 582.0, 624.0, 582.0, 630.0, 630.0, 630.0, 630.0, 587.0, 636.0, 639.0, 636.0, 627.0, 633.0, 627.0, 633.0, 636.0, 630.0, 630.0, 630.0, 630.0, 636.0, 639.0, 582.0, 636.0, 639.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 317.0, 316.0, 284.0, 286.0, 316.0, 317.0, 291.0, 291.0, 314.0, 322.0, 316.0, 314.0, 324.0, 315.0, 319.0, 320.0, 287.0, 297.0, 287.0, 283.0, 321.0, 315.0, 313.0, 314.0, 316.0, 311.0, 319.0, 311.0, 311.0, 319.0, 285.0, 288.0, 318.0, 312.0, 286.0, 296.0, 291.0, 296.0, 314.0, 319.0, 280.0, 302.0, 314.0, 319.0, 316.0, 317.0, 319.0, 311.0, 265.0, 260.0, 293.0, 289.0, 314.0, 319.0, 310.0, 317.0, 314.0, 319.0, 318.0, 309.0, 285.0, 276.0, 296.0, 291.0, 262.0, 260.0, 319.0, 317.0, 313.0, 317.0, 313.0, 314.0, 319.0, 317.0, 298.0, 289.0, 317.0, 304.0, 317.0, 313.0, 286.0, 296.0, 319.0, 317.0, 288.0, 299.0, 320.0, 313.0, 316.0, 314.0, 316.0, 317.0, 322.0, 305.0, 285.0, 302.0, 316.0, 317.0, 314.0, 313.0, 314.0, 316.0, 317.0, 316.0, 290.0, 297.0, 299.0, 288.0, 316.0, 317.0, 288.0, 299.0, 288.0, 299.0, 288.0, 299.0, 322.0, 314.0, 284.0, 295.0, 316.0, 311.0, 317.0, 316.0, 285.0, 297.0, 313.0, 317.0, 311.0, 319.0, 299.0, 291.0, 318.0, 315.0, 311.0, 316.0, 314.0, 319.0, 324.0, 315.0, 260.0, 270.0, 313.0, 317.0, 290.0, 292.0, 286.0, 296.0, 313.0, 311.0, 291.0, 291.0, 316.0, 314.0, 316.0, 314.0, 314.0, 316.0, 310.0, 320.0, 288.0, 299.0, 319.0, 317.0, 319.0, 320.0, 314.0, 322.0, 307.0, 320.0, 316.0, 317.0, 311.0, 316.0, 316.0, 317.0, 314.0, 322.0, 311.0, 319.0, 313.0, 317.0, 316.0, 314.0, 316.0, 314.0, 314.0, 322.0, 319.0, 320.0, 290.0, 292.0, 319.0, 317.0, 322.0, 317.0, 321.0, 315.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8141797334624801, "mean_processing_ms": 0.23126474228719665, "mean_inference_ms": 1.4144802295158576}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9960000, "num_steps_sampled": 5312000, "sample_time_ms": 21138.109, "load_time_ms": 36.708, "grad_time_ms": 9062.52, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00037816105759702623, "policy_loss": -0.006607938092201948, "vf_loss": 75.41075897216797, "vf_explained_var": 0.7763264775276184, "kl": 0.0018363663693889976, "entropy": 1.1099668741226196, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5312000, "episodes_total": 13280, "training_iteration": 415, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-20-09", "timestamp": 1660260009, "time_this_iter_s": 29.82709288597107, "time_total_s": 18418.717376947403, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18418.717376947403, "timesteps_since_restore": 5312000, "iterations_since_restore": 415, "perf": {"cpu_util_percent": 32.99761904761905, "ram_util_percent": 59.095238095238095}}
-{"episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 615.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 307.985}, "custom_metrics": {"sparse_reward_mean": 213.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.37, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.97, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.66, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.96, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.99, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.94, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.99, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.99, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 552.0, 630.0, 633.0, 579.0, 636.0, 587.0, 579.0, 630.0, 630.0, 633.0, 579.0, 630.0, 636.0, 636.0, 633.0, 582.0, 630.0, 636.0, 636.0, 627.0, 582.0, 633.0, 633.0, 587.0, 636.0, 584.0, 579.0, 630.0, 582.0, 627.0, 633.0, 630.0, 630.0, 590.0, 633.0, 627.0, 633.0, 639.0, 530.0, 630.0, 582.0, 582.0, 624.0, 582.0, 630.0, 630.0, 630.0, 630.0, 587.0, 636.0, 639.0, 636.0, 627.0, 633.0, 627.0, 633.0, 636.0, 630.0, 630.0, 630.0, 630.0, 636.0, 639.0, 582.0, 636.0, 639.0, 636.0, 630.0, 633.0, 570.0, 633.0, 582.0, 636.0, 630.0, 639.0, 639.0, 584.0, 570.0, 636.0, 627.0, 627.0, 630.0, 630.0, 573.0, 630.0, 582.0, 587.0, 633.0, 582.0, 633.0, 633.0, 630.0, 525.0, 582.0, 633.0, 627.0, 633.0, 627.0, 561.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 314.0, 281.0, 271.0, 311.0, 319.0, 322.0, 311.0, 288.0, 291.0, 319.0, 317.0, 296.0, 291.0, 279.0, 300.0, 313.0, 317.0, 316.0, 314.0, 319.0, 314.0, 282.0, 297.0, 311.0, 319.0, 319.0, 317.0, 319.0, 317.0, 313.0, 320.0, 296.0, 286.0, 319.0, 311.0, 314.0, 322.0, 314.0, 322.0, 306.0, 321.0, 289.0, 293.0, 311.0, 322.0, 319.0, 314.0, 285.0, 302.0, 319.0, 317.0, 290.0, 294.0, 288.0, 291.0, 311.0, 319.0, 295.0, 287.0, 308.0, 319.0, 316.0, 317.0, 313.0, 317.0, 311.0, 319.0, 299.0, 291.0, 318.0, 315.0, 311.0, 316.0, 314.0, 319.0, 324.0, 315.0, 260.0, 270.0, 313.0, 317.0, 290.0, 292.0, 286.0, 296.0, 313.0, 311.0, 291.0, 291.0, 316.0, 314.0, 316.0, 314.0, 314.0, 316.0, 310.0, 320.0, 288.0, 299.0, 319.0, 317.0, 319.0, 320.0, 314.0, 322.0, 307.0, 320.0, 316.0, 317.0, 311.0, 316.0, 316.0, 317.0, 314.0, 322.0, 311.0, 319.0, 313.0, 317.0, 316.0, 314.0, 316.0, 314.0, 314.0, 322.0, 319.0, 320.0, 290.0, 292.0, 319.0, 317.0, 322.0, 317.0, 321.0, 315.0, 316.0, 314.0, 317.0, 316.0, 284.0, 286.0, 316.0, 317.0, 291.0, 291.0, 314.0, 322.0, 316.0, 314.0, 324.0, 315.0, 319.0, 320.0, 287.0, 297.0, 287.0, 283.0, 321.0, 315.0, 313.0, 314.0, 316.0, 311.0, 319.0, 311.0, 311.0, 319.0, 285.0, 288.0, 318.0, 312.0, 286.0, 296.0, 291.0, 296.0, 314.0, 319.0, 280.0, 302.0, 314.0, 319.0, 316.0, 317.0, 319.0, 311.0, 265.0, 260.0, 293.0, 289.0, 314.0, 319.0, 310.0, 317.0, 314.0, 319.0, 318.0, 309.0, 285.0, 276.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8133221505395263, "mean_processing_ms": 0.23109594102314795, "mean_inference_ms": 1.413916507245515}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9984000, "num_steps_sampled": 5324800, "sample_time_ms": 21409.91, "load_time_ms": 36.506, "grad_time_ms": 8913.404, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003510029288008809, "policy_loss": -0.0037950894329696894, "vf_loss": 78.6290054321289, "vf_explained_var": 0.7686605453491211, "kl": 0.0018828777829185128, "entropy": 1.1155847311019897, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5324800, "episodes_total": 13312, "training_iteration": 416, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-20-41", "timestamp": 1660260041, "time_this_iter_s": 31.975250005722046, "time_total_s": 18450.692626953125, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18450.692626953125, "timesteps_since_restore": 5324800, "iterations_since_restore": 416, "perf": {"cpu_util_percent": 31.96888888888889, "ram_util_percent": 59.13111111111111}}
-{"episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 613.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 306.745}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.49, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.1, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.4, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.76, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.81, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.46, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.33, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.86, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.57, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.33, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.86, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.33, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.86, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 636.0, 636.0, 636.0, 630.0, 633.0, 630.0, 630.0, 627.0, 584.0, 633.0, 630.0, 573.0, 636.0, 582.0, 579.0, 633.0, 630.0, 636.0, 633.0, 573.0, 587.0, 579.0, 627.0, 630.0, 564.0, 636.0, 582.0, 636.0, 633.0, 579.0, 633.0, 582.0, 636.0, 639.0, 636.0, 630.0, 633.0, 570.0, 633.0, 582.0, 636.0, 630.0, 639.0, 639.0, 584.0, 570.0, 636.0, 627.0, 627.0, 630.0, 630.0, 573.0, 630.0, 582.0, 587.0, 633.0, 582.0, 633.0, 633.0, 630.0, 525.0, 582.0, 633.0, 627.0, 633.0, 627.0, 561.0, 636.0, 552.0, 630.0, 633.0, 579.0, 636.0, 587.0, 579.0, 630.0, 630.0, 633.0, 579.0, 630.0, 636.0, 636.0, 633.0, 582.0, 630.0, 636.0, 636.0, 627.0, 582.0, 633.0, 633.0, 587.0, 636.0, 584.0, 579.0, 630.0, 582.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 289.0, 319.0, 317.0, 319.0, 317.0, 314.0, 322.0, 313.0, 317.0, 311.0, 322.0, 314.0, 316.0, 319.0, 311.0, 313.0, 314.0, 290.0, 294.0, 316.0, 317.0, 314.0, 316.0, 283.0, 290.0, 322.0, 314.0, 297.0, 285.0, 291.0, 288.0, 318.0, 315.0, 314.0, 316.0, 322.0, 314.0, 314.0, 319.0, 296.0, 277.0, 293.0, 294.0, 291.0, 288.0, 305.0, 322.0, 314.0, 316.0, 279.0, 285.0, 314.0, 322.0, 288.0, 294.0, 319.0, 317.0, 321.0, 312.0, 291.0, 288.0, 319.0, 314.0, 290.0, 292.0, 319.0, 317.0, 322.0, 317.0, 321.0, 315.0, 316.0, 314.0, 317.0, 316.0, 284.0, 286.0, 316.0, 317.0, 291.0, 291.0, 314.0, 322.0, 316.0, 314.0, 324.0, 315.0, 319.0, 320.0, 287.0, 297.0, 287.0, 283.0, 321.0, 315.0, 313.0, 314.0, 316.0, 311.0, 319.0, 311.0, 311.0, 319.0, 285.0, 288.0, 318.0, 312.0, 286.0, 296.0, 291.0, 296.0, 314.0, 319.0, 280.0, 302.0, 314.0, 319.0, 316.0, 317.0, 319.0, 311.0, 265.0, 260.0, 293.0, 289.0, 314.0, 319.0, 310.0, 317.0, 314.0, 319.0, 318.0, 309.0, 285.0, 276.0, 322.0, 314.0, 281.0, 271.0, 311.0, 319.0, 322.0, 311.0, 288.0, 291.0, 319.0, 317.0, 296.0, 291.0, 279.0, 300.0, 313.0, 317.0, 316.0, 314.0, 319.0, 314.0, 282.0, 297.0, 311.0, 319.0, 319.0, 317.0, 319.0, 317.0, 313.0, 320.0, 296.0, 286.0, 319.0, 311.0, 314.0, 322.0, 314.0, 322.0, 306.0, 321.0, 289.0, 293.0, 311.0, 322.0, 319.0, 314.0, 285.0, 302.0, 319.0, 317.0, 290.0, 294.0, 288.0, 291.0, 311.0, 319.0, 295.0, 287.0, 308.0, 319.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.812458332690278, "mean_processing_ms": 0.23092571272245643, "mean_inference_ms": 1.413108766021267}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10008000, "num_steps_sampled": 5337600, "sample_time_ms": 21466.731, "load_time_ms": 36.538, "grad_time_ms": 8937.935, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011007506400346756, "policy_loss": -0.005807527806609869, "vf_loss": 74.63658905029297, "vf_explained_var": 0.7761281132698059, "kl": 0.0020840545184910297, "entropy": 1.110751986503601, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5337600, "episodes_total": 13344, "training_iteration": 417, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-21-11", "timestamp": 1660260071, "time_this_iter_s": 30.376654863357544, "time_total_s": 18481.069281816483, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18481.069281816483, "timesteps_since_restore": 5337600, "iterations_since_restore": 417, "perf": {"cpu_util_percent": 32.944186046511625, "ram_util_percent": 59.151162790697676}}
-{"episode_reward_max": 639.0, "episode_reward_min": 536.0, "episode_reward_mean": 613.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 261.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 306.775}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.55, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.0, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.74, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.77, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.31, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.42, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.84, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.55, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.04, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.42, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.84, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.42, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.84, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 630.0, 639.0, 576.0, 630.0, 633.0, 582.0, 587.0, 618.0, 633.0, 582.0, 621.0, 636.0, 587.0, 636.0, 582.0, 636.0, 630.0, 582.0, 633.0, 581.0, 630.0, 636.0, 630.0, 627.0, 633.0, 636.0, 582.0, 627.0, 536.0, 633.0, 627.0, 627.0, 633.0, 627.0, 561.0, 636.0, 552.0, 630.0, 633.0, 579.0, 636.0, 587.0, 579.0, 630.0, 630.0, 633.0, 579.0, 630.0, 636.0, 636.0, 633.0, 582.0, 630.0, 636.0, 636.0, 627.0, 582.0, 633.0, 633.0, 587.0, 636.0, 584.0, 579.0, 630.0, 582.0, 627.0, 633.0, 567.0, 636.0, 636.0, 636.0, 630.0, 633.0, 630.0, 630.0, 627.0, 584.0, 633.0, 630.0, 573.0, 636.0, 582.0, 579.0, 633.0, 630.0, 636.0, 633.0, 573.0, 587.0, 579.0, 627.0, 630.0, 564.0, 636.0, 582.0, 636.0, 633.0, 579.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 294.0, 316.0, 314.0, 319.0, 320.0, 288.0, 288.0, 316.0, 314.0, 316.0, 317.0, 291.0, 291.0, 293.0, 294.0, 308.0, 310.0, 314.0, 319.0, 289.0, 293.0, 318.0, 303.0, 314.0, 322.0, 298.0, 289.0, 324.0, 312.0, 290.0, 292.0, 314.0, 322.0, 313.0, 317.0, 288.0, 294.0, 322.0, 311.0, 289.0, 292.0, 314.0, 316.0, 314.0, 322.0, 314.0, 316.0, 312.0, 315.0, 311.0, 322.0, 314.0, 322.0, 294.0, 288.0, 316.0, 311.0, 275.0, 261.0, 319.0, 314.0, 314.0, 313.0, 310.0, 317.0, 314.0, 319.0, 318.0, 309.0, 285.0, 276.0, 322.0, 314.0, 281.0, 271.0, 311.0, 319.0, 322.0, 311.0, 288.0, 291.0, 319.0, 317.0, 296.0, 291.0, 279.0, 300.0, 313.0, 317.0, 316.0, 314.0, 319.0, 314.0, 282.0, 297.0, 311.0, 319.0, 319.0, 317.0, 319.0, 317.0, 313.0, 320.0, 296.0, 286.0, 319.0, 311.0, 314.0, 322.0, 314.0, 322.0, 306.0, 321.0, 289.0, 293.0, 311.0, 322.0, 319.0, 314.0, 285.0, 302.0, 319.0, 317.0, 290.0, 294.0, 288.0, 291.0, 311.0, 319.0, 295.0, 287.0, 308.0, 319.0, 316.0, 317.0, 278.0, 289.0, 319.0, 317.0, 319.0, 317.0, 314.0, 322.0, 313.0, 317.0, 311.0, 322.0, 314.0, 316.0, 319.0, 311.0, 313.0, 314.0, 290.0, 294.0, 316.0, 317.0, 314.0, 316.0, 283.0, 290.0, 322.0, 314.0, 297.0, 285.0, 291.0, 288.0, 318.0, 315.0, 314.0, 316.0, 322.0, 314.0, 314.0, 319.0, 296.0, 277.0, 293.0, 294.0, 291.0, 288.0, 305.0, 322.0, 314.0, 316.0, 279.0, 285.0, 314.0, 322.0, 288.0, 294.0, 319.0, 317.0, 321.0, 312.0, 291.0, 288.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8116024056297, "mean_processing_ms": 0.23075773519114987, "mean_inference_ms": 1.4123586541833584}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10032000, "num_steps_sampled": 5350400, "sample_time_ms": 21668.296, "load_time_ms": 36.478, "grad_time_ms": 9333.309, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0028228743467479944, "policy_loss": -0.004008984658867121, "vf_loss": 73.87229919433594, "vf_explained_var": 0.7751579284667969, "kl": 0.0019005200592800975, "entropy": 1.110758900642395, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5350400, "episodes_total": 13376, "training_iteration": 418, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-21-46", "timestamp": 1660260106, "time_this_iter_s": 34.32990908622742, "time_total_s": 18515.39919090271, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18515.39919090271, "timesteps_since_restore": 5350400, "iterations_since_restore": 418, "perf": {"cpu_util_percent": 29.667346938775513, "ram_util_percent": 59.18979591836735}}
-{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 609.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.74}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.68, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.87, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.16, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.72, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.97, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.79, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 630.0, 639.0, 630.0, 579.0, 582.0, 636.0, 180.0, 582.0, 579.0, 636.0, 582.0, 573.0, 633.0, 639.0, 633.0, 630.0, 630.0, 633.0, 630.0, 630.0, 630.0, 579.0, 636.0, 630.0, 582.0, 633.0, 587.0, 587.0, 630.0, 627.0, 639.0, 630.0, 582.0, 627.0, 633.0, 567.0, 636.0, 636.0, 636.0, 630.0, 633.0, 630.0, 630.0, 627.0, 584.0, 633.0, 630.0, 573.0, 636.0, 582.0, 579.0, 633.0, 630.0, 636.0, 633.0, 573.0, 587.0, 579.0, 627.0, 630.0, 564.0, 636.0, 582.0, 636.0, 633.0, 579.0, 633.0, 587.0, 630.0, 639.0, 576.0, 630.0, 633.0, 582.0, 587.0, 618.0, 633.0, 582.0, 621.0, 636.0, 587.0, 636.0, 582.0, 636.0, 630.0, 582.0, 633.0, 581.0, 630.0, 636.0, 630.0, 627.0, 633.0, 636.0, 582.0, 627.0, 536.0, 633.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 316.0, 314.0, 324.0, 315.0, 311.0, 319.0, 288.0, 291.0, 294.0, 288.0, 319.0, 317.0, 92.0, 88.0, 293.0, 289.0, 287.0, 292.0, 319.0, 317.0, 289.0, 293.0, 284.0, 289.0, 316.0, 317.0, 322.0, 317.0, 311.0, 322.0, 316.0, 314.0, 316.0, 314.0, 322.0, 311.0, 319.0, 311.0, 311.0, 319.0, 317.0, 313.0, 288.0, 291.0, 319.0, 317.0, 311.0, 319.0, 289.0, 293.0, 319.0, 314.0, 296.0, 291.0, 291.0, 296.0, 313.0, 317.0, 311.0, 316.0, 314.0, 325.0, 311.0, 319.0, 295.0, 287.0, 308.0, 319.0, 316.0, 317.0, 278.0, 289.0, 319.0, 317.0, 319.0, 317.0, 314.0, 322.0, 313.0, 317.0, 311.0, 322.0, 314.0, 316.0, 319.0, 311.0, 313.0, 314.0, 290.0, 294.0, 316.0, 317.0, 314.0, 316.0, 283.0, 290.0, 322.0, 314.0, 297.0, 285.0, 291.0, 288.0, 318.0, 315.0, 314.0, 316.0, 322.0, 314.0, 314.0, 319.0, 296.0, 277.0, 293.0, 294.0, 291.0, 288.0, 305.0, 322.0, 314.0, 316.0, 279.0, 285.0, 314.0, 322.0, 288.0, 294.0, 319.0, 317.0, 321.0, 312.0, 291.0, 288.0, 319.0, 314.0, 293.0, 294.0, 316.0, 314.0, 319.0, 320.0, 288.0, 288.0, 316.0, 314.0, 316.0, 317.0, 291.0, 291.0, 293.0, 294.0, 308.0, 310.0, 314.0, 319.0, 289.0, 293.0, 318.0, 303.0, 314.0, 322.0, 298.0, 289.0, 324.0, 312.0, 290.0, 292.0, 314.0, 322.0, 313.0, 317.0, 288.0, 294.0, 322.0, 311.0, 289.0, 292.0, 314.0, 316.0, 314.0, 322.0, 314.0, 316.0, 312.0, 315.0, 311.0, 322.0, 314.0, 322.0, 294.0, 288.0, 316.0, 311.0, 275.0, 261.0, 319.0, 314.0, 314.0, 313.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8107503957777891, "mean_processing_ms": 0.2305898995792267, "mean_inference_ms": 1.4116530949413433}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10056000, "num_steps_sampled": 5363200, "sample_time_ms": 21868.435, "load_time_ms": 36.628, "grad_time_ms": 9569.244, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002508052857592702, "policy_loss": -0.004472339991480112, "vf_loss": 75.3826904296875, "vf_explained_var": 0.7911410927772522, "kl": 0.0020311845000833273, "entropy": 1.1157482862472534, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5363200, "episodes_total": 13408, "training_iteration": 419, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-22-20", "timestamp": 1660260140, "time_this_iter_s": 33.75737500190735, "time_total_s": 18549.156565904617, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18549.156565904617, "timesteps_since_restore": 5363200, "iterations_since_restore": 419, "perf": {"cpu_util_percent": 32.68936170212766, "ram_util_percent": 59.19574468085105}}
-{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 609.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.805}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.81, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.63, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.29, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.55, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.28, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.28, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.28, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 630.0, 576.0, 630.0, 516.0, 630.0, 636.0, 639.0, 630.0, 582.0, 633.0, 576.0, 582.0, 633.0, 587.0, 627.0, 630.0, 633.0, 630.0, 582.0, 582.0, 633.0, 633.0, 633.0, 630.0, 627.0, 636.0, 579.0, 630.0, 636.0, 633.0, 627.0, 636.0, 633.0, 579.0, 633.0, 587.0, 630.0, 639.0, 576.0, 630.0, 633.0, 582.0, 587.0, 618.0, 633.0, 582.0, 621.0, 636.0, 587.0, 636.0, 582.0, 636.0, 630.0, 582.0, 633.0, 581.0, 630.0, 636.0, 630.0, 627.0, 633.0, 636.0, 582.0, 627.0, 536.0, 633.0, 627.0, 579.0, 630.0, 639.0, 630.0, 579.0, 582.0, 636.0, 180.0, 582.0, 579.0, 636.0, 582.0, 573.0, 633.0, 639.0, 633.0, 630.0, 630.0, 633.0, 630.0, 630.0, 630.0, 579.0, 636.0, 630.0, 582.0, 633.0, 587.0, 587.0, 630.0, 627.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 300.0, 317.0, 313.0, 285.0, 291.0, 314.0, 316.0, 254.0, 262.0, 319.0, 311.0, 317.0, 319.0, 319.0, 320.0, 316.0, 314.0, 290.0, 292.0, 314.0, 319.0, 285.0, 291.0, 290.0, 292.0, 316.0, 317.0, 291.0, 296.0, 310.0, 317.0, 316.0, 314.0, 316.0, 317.0, 314.0, 316.0, 288.0, 294.0, 291.0, 291.0, 311.0, 322.0, 313.0, 320.0, 316.0, 317.0, 311.0, 319.0, 315.0, 312.0, 316.0, 320.0, 291.0, 288.0, 308.0, 322.0, 311.0, 325.0, 314.0, 319.0, 313.0, 314.0, 319.0, 317.0, 321.0, 312.0, 291.0, 288.0, 319.0, 314.0, 293.0, 294.0, 316.0, 314.0, 319.0, 320.0, 288.0, 288.0, 316.0, 314.0, 316.0, 317.0, 291.0, 291.0, 293.0, 294.0, 308.0, 310.0, 314.0, 319.0, 289.0, 293.0, 318.0, 303.0, 314.0, 322.0, 298.0, 289.0, 324.0, 312.0, 290.0, 292.0, 314.0, 322.0, 313.0, 317.0, 288.0, 294.0, 322.0, 311.0, 289.0, 292.0, 314.0, 316.0, 314.0, 322.0, 314.0, 316.0, 312.0, 315.0, 311.0, 322.0, 314.0, 322.0, 294.0, 288.0, 316.0, 311.0, 275.0, 261.0, 319.0, 314.0, 314.0, 313.0, 291.0, 288.0, 316.0, 314.0, 324.0, 315.0, 311.0, 319.0, 288.0, 291.0, 294.0, 288.0, 319.0, 317.0, 92.0, 88.0, 293.0, 289.0, 287.0, 292.0, 319.0, 317.0, 289.0, 293.0, 284.0, 289.0, 316.0, 317.0, 322.0, 317.0, 311.0, 322.0, 316.0, 314.0, 316.0, 314.0, 322.0, 311.0, 319.0, 311.0, 311.0, 319.0, 317.0, 313.0, 288.0, 291.0, 319.0, 317.0, 311.0, 319.0, 289.0, 293.0, 319.0, 314.0, 296.0, 291.0, 291.0, 296.0, 313.0, 317.0, 311.0, 316.0, 314.0, 325.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8099141458256162, "mean_processing_ms": 0.23042638477560978, "mean_inference_ms": 1.411223574078033}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10080000, "num_steps_sampled": 5376000, "sample_time_ms": 22436.969, "load_time_ms": 36.407, "grad_time_ms": 9832.43, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0009076216374523938, "policy_loss": -0.0059606158174574375, "vf_loss": 74.23489379882812, "vf_explained_var": 0.7686769366264343, "kl": 0.00233254861086607, "entropy": 1.1105002164840698, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5376000, "episodes_total": 13440, "training_iteration": 420, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-22-56", "timestamp": 1660260176, "time_this_iter_s": 36.52545118331909, "time_total_s": 18585.682017087936, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18585.682017087936, "timesteps_since_restore": 5376000, "iterations_since_restore": 420, "perf": {"cpu_util_percent": 30.815384615384616, "ram_util_percent": 59.175}}
-{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 604.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 302.165}, "custom_metrics": {"sparse_reward_mean": 209.6, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 185.13, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.5, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.15, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.15, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.55, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.58, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.15, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 17, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.94, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.55, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.55, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 630.0, 579.0, 576.0, 582.0, 639.0, 630.0, 518.0, 582.0, 582.0, 624.0, 624.0, 633.0, 630.0, 584.0, 633.0, 633.0, 627.0, 630.0, 633.0, 579.0, 633.0, 582.0, 582.0, 627.0, 633.0, 519.0, 582.0, 579.0, 470.0, 587.0, 579.0, 627.0, 536.0, 633.0, 627.0, 579.0, 630.0, 639.0, 630.0, 579.0, 582.0, 636.0, 180.0, 582.0, 579.0, 636.0, 582.0, 573.0, 633.0, 639.0, 633.0, 630.0, 630.0, 633.0, 630.0, 630.0, 630.0, 579.0, 636.0, 630.0, 582.0, 633.0, 587.0, 587.0, 630.0, 627.0, 639.0, 576.0, 630.0, 576.0, 630.0, 516.0, 630.0, 636.0, 639.0, 630.0, 582.0, 633.0, 576.0, 582.0, 633.0, 587.0, 627.0, 630.0, 633.0, 630.0, 582.0, 582.0, 633.0, 633.0, 633.0, 630.0, 627.0, 636.0, 579.0, 630.0, 636.0, 633.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 316.0, 314.0, 288.0, 291.0, 283.0, 293.0, 289.0, 293.0, 317.0, 322.0, 316.0, 314.0, 279.0, 239.0, 293.0, 289.0, 293.0, 289.0, 310.0, 314.0, 304.0, 320.0, 311.0, 322.0, 313.0, 317.0, 295.0, 289.0, 314.0, 319.0, 317.0, 316.0, 313.0, 314.0, 316.0, 314.0, 321.0, 312.0, 281.0, 298.0, 316.0, 317.0, 293.0, 289.0, 287.0, 295.0, 316.0, 311.0, 316.0, 317.0, 261.0, 258.0, 285.0, 297.0, 287.0, 292.0, 226.0, 244.0, 299.0, 288.0, 297.0, 282.0, 316.0, 311.0, 275.0, 261.0, 319.0, 314.0, 314.0, 313.0, 291.0, 288.0, 316.0, 314.0, 324.0, 315.0, 311.0, 319.0, 288.0, 291.0, 294.0, 288.0, 319.0, 317.0, 92.0, 88.0, 293.0, 289.0, 287.0, 292.0, 319.0, 317.0, 289.0, 293.0, 284.0, 289.0, 316.0, 317.0, 322.0, 317.0, 311.0, 322.0, 316.0, 314.0, 316.0, 314.0, 322.0, 311.0, 319.0, 311.0, 311.0, 319.0, 317.0, 313.0, 288.0, 291.0, 319.0, 317.0, 311.0, 319.0, 289.0, 293.0, 319.0, 314.0, 296.0, 291.0, 291.0, 296.0, 313.0, 317.0, 311.0, 316.0, 314.0, 325.0, 276.0, 300.0, 317.0, 313.0, 285.0, 291.0, 314.0, 316.0, 254.0, 262.0, 319.0, 311.0, 317.0, 319.0, 319.0, 320.0, 316.0, 314.0, 290.0, 292.0, 314.0, 319.0, 285.0, 291.0, 290.0, 292.0, 316.0, 317.0, 291.0, 296.0, 310.0, 317.0, 316.0, 314.0, 316.0, 317.0, 314.0, 316.0, 288.0, 294.0, 291.0, 291.0, 311.0, 322.0, 313.0, 320.0, 316.0, 317.0, 311.0, 319.0, 315.0, 312.0, 316.0, 320.0, 291.0, 288.0, 308.0, 322.0, 311.0, 325.0, 314.0, 319.0, 313.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8090875663620912, "mean_processing_ms": 0.23026522516611045, "mean_inference_ms": 1.4109073671228203}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10104000, "num_steps_sampled": 5388800, "sample_time_ms": 22748.439, "load_time_ms": 36.384, "grad_time_ms": 10256.981, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004364584165159613, "policy_loss": -0.007198403123766184, "vf_loss": 81.89620208740234, "vf_explained_var": 0.7658551335334778, "kl": 0.0018362547270953655, "entropy": 1.1095339059829712, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5388800, "episodes_total": 13472, "training_iteration": 421, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-23-34", "timestamp": 1660260214, "time_this_iter_s": 37.69177174568176, "time_total_s": 18623.373788833618, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18623.373788833618, "timesteps_since_restore": 5388800, "iterations_since_restore": 421, "perf": {"cpu_util_percent": 34.76037735849056, "ram_util_percent": 59.533962264150944}}
-{"episode_reward_max": 639.0, "episode_reward_min": 470.0, "episode_reward_mean": 605.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 302.835}, "custom_metrics": {"sparse_reward_mean": 210.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.67, "shaped_reward_min": 150, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.52, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.17, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.15, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.61, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.06, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.69, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.4, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.3, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 17, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.78, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.06, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.69, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.06, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.69, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 587.0, 630.0, 579.0, 633.0, 582.0, 579.0, 576.0, 630.0, 624.0, 630.0, 579.0, 636.0, 522.0, 636.0, 630.0, 587.0, 633.0, 582.0, 630.0, 630.0, 579.0, 573.0, 579.0, 582.0, 582.0, 636.0, 636.0, 633.0, 579.0, 587.0, 579.0, 587.0, 630.0, 627.0, 639.0, 576.0, 630.0, 576.0, 630.0, 516.0, 630.0, 636.0, 639.0, 630.0, 582.0, 633.0, 576.0, 582.0, 633.0, 587.0, 627.0, 630.0, 633.0, 630.0, 582.0, 582.0, 633.0, 633.0, 633.0, 630.0, 627.0, 636.0, 579.0, 630.0, 636.0, 633.0, 627.0, 627.0, 630.0, 579.0, 576.0, 582.0, 639.0, 630.0, 518.0, 582.0, 582.0, 624.0, 624.0, 633.0, 630.0, 584.0, 633.0, 633.0, 627.0, 630.0, 633.0, 579.0, 633.0, 582.0, 582.0, 627.0, 633.0, 519.0, 582.0, 579.0, 470.0, 587.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 322.0, 286.0, 301.0, 316.0, 314.0, 290.0, 289.0, 316.0, 317.0, 293.0, 289.0, 288.0, 291.0, 293.0, 283.0, 313.0, 317.0, 308.0, 316.0, 319.0, 311.0, 286.0, 293.0, 316.0, 320.0, 262.0, 260.0, 319.0, 317.0, 316.0, 314.0, 288.0, 299.0, 314.0, 319.0, 298.0, 284.0, 316.0, 314.0, 321.0, 309.0, 287.0, 292.0, 285.0, 288.0, 290.0, 289.0, 286.0, 296.0, 289.0, 293.0, 316.0, 320.0, 319.0, 317.0, 322.0, 311.0, 291.0, 288.0, 291.0, 296.0, 288.0, 291.0, 291.0, 296.0, 313.0, 317.0, 311.0, 316.0, 314.0, 325.0, 276.0, 300.0, 317.0, 313.0, 285.0, 291.0, 314.0, 316.0, 254.0, 262.0, 319.0, 311.0, 317.0, 319.0, 319.0, 320.0, 316.0, 314.0, 290.0, 292.0, 314.0, 319.0, 285.0, 291.0, 290.0, 292.0, 316.0, 317.0, 291.0, 296.0, 310.0, 317.0, 316.0, 314.0, 316.0, 317.0, 314.0, 316.0, 288.0, 294.0, 291.0, 291.0, 311.0, 322.0, 313.0, 320.0, 316.0, 317.0, 311.0, 319.0, 315.0, 312.0, 316.0, 320.0, 291.0, 288.0, 308.0, 322.0, 311.0, 325.0, 314.0, 319.0, 313.0, 314.0, 313.0, 314.0, 316.0, 314.0, 288.0, 291.0, 283.0, 293.0, 289.0, 293.0, 317.0, 322.0, 316.0, 314.0, 279.0, 239.0, 293.0, 289.0, 293.0, 289.0, 310.0, 314.0, 304.0, 320.0, 311.0, 322.0, 313.0, 317.0, 295.0, 289.0, 314.0, 319.0, 317.0, 316.0, 313.0, 314.0, 316.0, 314.0, 321.0, 312.0, 281.0, 298.0, 316.0, 317.0, 293.0, 289.0, 287.0, 295.0, 316.0, 311.0, 316.0, 317.0, 261.0, 258.0, 285.0, 297.0, 287.0, 292.0, 226.0, 244.0, 299.0, 288.0, 297.0, 282.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8082635107475633, "mean_processing_ms": 0.23010486784203785, "mean_inference_ms": 1.4106251231358269}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10128000, "num_steps_sampled": 5401600, "sample_time_ms": 23169.092, "load_time_ms": 36.151, "grad_time_ms": 10580.278, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0029069948941469193, "policy_loss": -0.004262510221451521, "vf_loss": 77.2344970703125, "vf_explained_var": 0.7713862061500549, "kl": 0.001992677804082632, "entropy": 1.1078964471817017, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5401600, "episodes_total": 13504, "training_iteration": 422, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-24-10", "timestamp": 1660260250, "time_this_iter_s": 35.70268106460571, "time_total_s": 18659.076469898224, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18659.076469898224, "timesteps_since_restore": 5401600, "iterations_since_restore": 422, "perf": {"cpu_util_percent": 33.46078431372549, "ram_util_percent": 59.009803921568626}}
-{"episode_reward_max": 639.0, "episode_reward_min": 470.0, "episode_reward_mean": 603.43, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 301.715}, "custom_metrics": {"sparse_reward_mean": 209.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.43, "shaped_reward_min": 150, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.55, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.19, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.57, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.63, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.49, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.22, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 17, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.81, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.63, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.63, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 576.0, 636.0, 576.0, 639.0, 579.0, 636.0, 630.0, 579.0, 582.0, 582.0, 624.0, 581.0, 636.0, 630.0, 582.0, 582.0, 633.0, 582.0, 636.0, 633.0, 587.0, 630.0, 627.0, 633.0, 633.0, 587.0, 582.0, 582.0, 530.0, 633.0, 630.0, 630.0, 636.0, 633.0, 627.0, 627.0, 630.0, 579.0, 576.0, 582.0, 639.0, 630.0, 518.0, 582.0, 582.0, 624.0, 624.0, 633.0, 630.0, 584.0, 633.0, 633.0, 627.0, 630.0, 633.0, 579.0, 633.0, 582.0, 582.0, 627.0, 633.0, 519.0, 582.0, 579.0, 470.0, 587.0, 579.0, 639.0, 587.0, 630.0, 579.0, 633.0, 582.0, 579.0, 576.0, 630.0, 624.0, 630.0, 579.0, 636.0, 522.0, 636.0, 630.0, 587.0, 633.0, 582.0, 630.0, 630.0, 579.0, 573.0, 579.0, 582.0, 582.0, 636.0, 636.0, 633.0, 579.0, 587.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 282.0, 294.0, 309.0, 327.0, 288.0, 288.0, 322.0, 317.0, 282.0, 297.0, 319.0, 317.0, 316.0, 314.0, 288.0, 291.0, 291.0, 291.0, 288.0, 294.0, 313.0, 311.0, 292.0, 289.0, 319.0, 317.0, 316.0, 314.0, 288.0, 294.0, 292.0, 290.0, 314.0, 319.0, 302.0, 280.0, 316.0, 320.0, 311.0, 322.0, 293.0, 294.0, 316.0, 314.0, 313.0, 314.0, 322.0, 311.0, 316.0, 317.0, 285.0, 302.0, 286.0, 296.0, 297.0, 285.0, 270.0, 260.0, 316.0, 317.0, 316.0, 314.0, 308.0, 322.0, 311.0, 325.0, 314.0, 319.0, 313.0, 314.0, 313.0, 314.0, 316.0, 314.0, 288.0, 291.0, 283.0, 293.0, 289.0, 293.0, 317.0, 322.0, 316.0, 314.0, 279.0, 239.0, 293.0, 289.0, 293.0, 289.0, 310.0, 314.0, 304.0, 320.0, 311.0, 322.0, 313.0, 317.0, 295.0, 289.0, 314.0, 319.0, 317.0, 316.0, 313.0, 314.0, 316.0, 314.0, 321.0, 312.0, 281.0, 298.0, 316.0, 317.0, 293.0, 289.0, 287.0, 295.0, 316.0, 311.0, 316.0, 317.0, 261.0, 258.0, 285.0, 297.0, 287.0, 292.0, 226.0, 244.0, 299.0, 288.0, 297.0, 282.0, 317.0, 322.0, 286.0, 301.0, 316.0, 314.0, 290.0, 289.0, 316.0, 317.0, 293.0, 289.0, 288.0, 291.0, 293.0, 283.0, 313.0, 317.0, 308.0, 316.0, 319.0, 311.0, 286.0, 293.0, 316.0, 320.0, 262.0, 260.0, 319.0, 317.0, 316.0, 314.0, 288.0, 299.0, 314.0, 319.0, 298.0, 284.0, 316.0, 314.0, 321.0, 309.0, 287.0, 292.0, 285.0, 288.0, 290.0, 289.0, 286.0, 296.0, 289.0, 293.0, 316.0, 320.0, 319.0, 317.0, 322.0, 311.0, 291.0, 288.0, 291.0, 296.0, 288.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8074351528225807, "mean_processing_ms": 0.2299430227686211, "mean_inference_ms": 1.4101585420796834}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10152000, "num_steps_sampled": 5414400, "sample_time_ms": 23238.003, "load_time_ms": 36.154, "grad_time_ms": 10699.531, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004093436989933252, "policy_loss": -0.003705031471326947, "vf_loss": 83.51854705810547, "vf_explained_var": 0.7628346085548401, "kl": 0.001839231583289802, "entropy": 1.1067644357681274, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5414400, "episodes_total": 13536, "training_iteration": 423, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-24-43", "timestamp": 1660260283, "time_this_iter_s": 33.482574224472046, "time_total_s": 18692.559044122696, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18692.559044122696, "timesteps_since_restore": 5414400, "iterations_since_restore": 423, "perf": {"cpu_util_percent": 33.295744680851065, "ram_util_percent": 58.97021276595746}}
-{"episode_reward_max": 639.0, "episode_reward_min": 470.0, "episode_reward_mean": 602.74, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 301.37}, "custom_metrics": {"sparse_reward_mean": 208.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.94, "shaped_reward_min": 150, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.04, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.51, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.07, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.71, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.04, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 17, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.94, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.89, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.07, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.07, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 627.0, 630.0, 582.0, 636.0, 582.0, 587.0, 636.0, 627.0, 636.0, 633.0, 633.0, 630.0, 579.0, 579.0, 587.0, 636.0, 582.0, 639.0, 630.0, 627.0, 582.0, 582.0, 584.0, 639.0, 582.0, 639.0, 630.0, 522.0, 587.0, 584.0, 579.0, 579.0, 470.0, 587.0, 579.0, 639.0, 587.0, 630.0, 579.0, 633.0, 582.0, 579.0, 576.0, 630.0, 624.0, 630.0, 579.0, 636.0, 522.0, 636.0, 630.0, 587.0, 633.0, 582.0, 630.0, 630.0, 579.0, 573.0, 579.0, 582.0, 582.0, 636.0, 636.0, 633.0, 579.0, 587.0, 579.0, 582.0, 576.0, 636.0, 576.0, 639.0, 579.0, 636.0, 630.0, 579.0, 582.0, 582.0, 624.0, 581.0, 636.0, 630.0, 582.0, 582.0, 633.0, 582.0, 636.0, 633.0, 587.0, 630.0, 627.0, 633.0, 633.0, 587.0, 582.0, 582.0, 530.0, 633.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 313.0, 314.0, 316.0, 314.0, 291.0, 291.0, 317.0, 319.0, 293.0, 289.0, 290.0, 297.0, 319.0, 317.0, 311.0, 316.0, 314.0, 322.0, 316.0, 317.0, 321.0, 312.0, 316.0, 314.0, 286.0, 293.0, 288.0, 291.0, 304.0, 283.0, 319.0, 317.0, 293.0, 289.0, 319.0, 320.0, 314.0, 316.0, 304.0, 323.0, 291.0, 291.0, 291.0, 291.0, 293.0, 291.0, 319.0, 320.0, 291.0, 291.0, 324.0, 315.0, 319.0, 311.0, 268.0, 254.0, 298.0, 289.0, 293.0, 291.0, 293.0, 286.0, 287.0, 292.0, 226.0, 244.0, 299.0, 288.0, 297.0, 282.0, 317.0, 322.0, 286.0, 301.0, 316.0, 314.0, 290.0, 289.0, 316.0, 317.0, 293.0, 289.0, 288.0, 291.0, 293.0, 283.0, 313.0, 317.0, 308.0, 316.0, 319.0, 311.0, 286.0, 293.0, 316.0, 320.0, 262.0, 260.0, 319.0, 317.0, 316.0, 314.0, 288.0, 299.0, 314.0, 319.0, 298.0, 284.0, 316.0, 314.0, 321.0, 309.0, 287.0, 292.0, 285.0, 288.0, 290.0, 289.0, 286.0, 296.0, 289.0, 293.0, 316.0, 320.0, 319.0, 317.0, 322.0, 311.0, 291.0, 288.0, 291.0, 296.0, 288.0, 291.0, 296.0, 286.0, 282.0, 294.0, 309.0, 327.0, 288.0, 288.0, 322.0, 317.0, 282.0, 297.0, 319.0, 317.0, 316.0, 314.0, 288.0, 291.0, 291.0, 291.0, 288.0, 294.0, 313.0, 311.0, 292.0, 289.0, 319.0, 317.0, 316.0, 314.0, 288.0, 294.0, 292.0, 290.0, 314.0, 319.0, 302.0, 280.0, 316.0, 320.0, 311.0, 322.0, 293.0, 294.0, 316.0, 314.0, 313.0, 314.0, 322.0, 311.0, 316.0, 317.0, 285.0, 302.0, 286.0, 296.0, 297.0, 285.0, 270.0, 260.0, 316.0, 317.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8066043021110226, "mean_processing_ms": 0.22977916427475648, "mean_inference_ms": 1.4095576278285673}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10176000, "num_steps_sampled": 5427200, "sample_time_ms": 22903.737, "load_time_ms": 36.042, "grad_time_ms": 10790.567, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005737189203500748, "policy_loss": -0.006916053593158722, "vf_loss": 80.42852783203125, "vf_explained_var": 0.7650584578514099, "kl": 0.0017220000736415386, "entropy": 1.1061476469039917, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5427200, "episodes_total": 13568, "training_iteration": 424, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-25-17", "timestamp": 1660260317, "time_this_iter_s": 33.90529203414917, "time_total_s": 18726.464336156845, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18726.464336156845, "timesteps_since_restore": 5427200, "iterations_since_restore": 424, "perf": {"cpu_util_percent": 32.64166666666667, "ram_util_percent": 58.96875}}
-{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 606.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.09}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.58, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.74, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.13, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.41, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.77, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.99, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.97, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 630.0, 639.0, 582.0, 582.0, 633.0, 630.0, 633.0, 579.0, 636.0, 582.0, 579.0, 636.0, 630.0, 579.0, 582.0, 587.0, 633.0, 630.0, 633.0, 587.0, 636.0, 582.0, 579.0, 576.0, 630.0, 561.0, 627.0, 627.0, 630.0, 564.0, 633.0, 579.0, 587.0, 579.0, 582.0, 576.0, 636.0, 576.0, 639.0, 579.0, 636.0, 630.0, 579.0, 582.0, 582.0, 624.0, 581.0, 636.0, 630.0, 582.0, 582.0, 633.0, 582.0, 636.0, 633.0, 587.0, 630.0, 627.0, 633.0, 633.0, 587.0, 582.0, 582.0, 530.0, 633.0, 630.0, 582.0, 627.0, 630.0, 582.0, 636.0, 582.0, 587.0, 636.0, 627.0, 636.0, 633.0, 633.0, 630.0, 579.0, 579.0, 587.0, 636.0, 582.0, 639.0, 630.0, 627.0, 582.0, 582.0, 584.0, 639.0, 582.0, 639.0, 630.0, 522.0, 587.0, 584.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 317.0, 316.0, 313.0, 317.0, 319.0, 320.0, 293.0, 289.0, 293.0, 289.0, 314.0, 319.0, 319.0, 311.0, 316.0, 317.0, 286.0, 293.0, 319.0, 317.0, 293.0, 289.0, 290.0, 289.0, 317.0, 319.0, 319.0, 311.0, 291.0, 288.0, 290.0, 292.0, 293.0, 294.0, 316.0, 317.0, 319.0, 311.0, 316.0, 317.0, 288.0, 299.0, 319.0, 317.0, 291.0, 291.0, 288.0, 291.0, 288.0, 288.0, 309.0, 321.0, 278.0, 283.0, 316.0, 311.0, 313.0, 314.0, 319.0, 311.0, 284.0, 280.0, 322.0, 311.0, 291.0, 288.0, 291.0, 296.0, 288.0, 291.0, 296.0, 286.0, 282.0, 294.0, 309.0, 327.0, 288.0, 288.0, 322.0, 317.0, 282.0, 297.0, 319.0, 317.0, 316.0, 314.0, 288.0, 291.0, 291.0, 291.0, 288.0, 294.0, 313.0, 311.0, 292.0, 289.0, 319.0, 317.0, 316.0, 314.0, 288.0, 294.0, 292.0, 290.0, 314.0, 319.0, 302.0, 280.0, 316.0, 320.0, 311.0, 322.0, 293.0, 294.0, 316.0, 314.0, 313.0, 314.0, 322.0, 311.0, 316.0, 317.0, 285.0, 302.0, 286.0, 296.0, 297.0, 285.0, 270.0, 260.0, 316.0, 317.0, 316.0, 314.0, 293.0, 289.0, 313.0, 314.0, 316.0, 314.0, 291.0, 291.0, 317.0, 319.0, 293.0, 289.0, 290.0, 297.0, 319.0, 317.0, 311.0, 316.0, 314.0, 322.0, 316.0, 317.0, 321.0, 312.0, 316.0, 314.0, 286.0, 293.0, 288.0, 291.0, 304.0, 283.0, 319.0, 317.0, 293.0, 289.0, 319.0, 320.0, 314.0, 316.0, 304.0, 323.0, 291.0, 291.0, 291.0, 291.0, 293.0, 291.0, 319.0, 320.0, 291.0, 291.0, 324.0, 315.0, 319.0, 311.0, 268.0, 254.0, 298.0, 289.0, 293.0, 291.0, 293.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8057716704864768, "mean_processing_ms": 0.22961462225140888, "mean_inference_ms": 1.4087586857000423}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10200000, "num_steps_sampled": 5440000, "sample_time_ms": 22855.818, "load_time_ms": 36.111, "grad_time_ms": 10847.011, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001967804506421089, "policy_loss": -0.00539380731061101, "vf_loss": 79.134033203125, "vf_explained_var": 0.7726359963417053, "kl": 0.0021053599193692207, "entropy": 1.1035689115524292, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5440000, "episodes_total": 13600, "training_iteration": 425, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-25-47", "timestamp": 1660260347, "time_this_iter_s": 29.913795948028564, "time_total_s": 18756.378132104874, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18756.378132104874, "timesteps_since_restore": 5440000, "iterations_since_restore": 425, "perf": {"cpu_util_percent": 31.83333333333333, "ram_util_percent": 58.778571428571425}}
-{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 610.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 305.285}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.37, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.51, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.46, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.2, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.87, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.96, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.87, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.01, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.7, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.96, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.89, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.74, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.68, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.96, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.96, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 624.0, 636.0, 627.0, 582.0, 636.0, 636.0, 630.0, 573.0, 630.0, 630.0, 579.0, 633.0, 636.0, 630.0, 639.0, 627.0, 582.0, 639.0, 587.0, 630.0, 636.0, 584.0, 633.0, 630.0, 579.0, 618.0, 630.0, 627.0, 636.0, 587.0, 636.0, 582.0, 530.0, 633.0, 630.0, 582.0, 627.0, 630.0, 582.0, 636.0, 582.0, 587.0, 636.0, 627.0, 636.0, 633.0, 633.0, 630.0, 579.0, 579.0, 587.0, 636.0, 582.0, 639.0, 630.0, 627.0, 582.0, 582.0, 584.0, 639.0, 582.0, 639.0, 630.0, 522.0, 587.0, 584.0, 579.0, 633.0, 633.0, 630.0, 639.0, 582.0, 582.0, 633.0, 630.0, 633.0, 579.0, 636.0, 582.0, 579.0, 636.0, 630.0, 579.0, 582.0, 587.0, 633.0, 630.0, 633.0, 587.0, 636.0, 582.0, 579.0, 576.0, 630.0, 561.0, 627.0, 627.0, 630.0, 564.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 320.0, 304.0, 319.0, 317.0, 318.0, 309.0, 296.0, 286.0, 317.0, 319.0, 319.0, 317.0, 313.0, 317.0, 284.0, 289.0, 323.0, 307.0, 311.0, 319.0, 298.0, 281.0, 316.0, 317.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 313.0, 314.0, 286.0, 296.0, 322.0, 317.0, 283.0, 304.0, 316.0, 314.0, 314.0, 322.0, 295.0, 289.0, 319.0, 314.0, 314.0, 316.0, 288.0, 291.0, 299.0, 319.0, 316.0, 314.0, 311.0, 316.0, 313.0, 323.0, 293.0, 294.0, 316.0, 320.0, 297.0, 285.0, 270.0, 260.0, 316.0, 317.0, 316.0, 314.0, 293.0, 289.0, 313.0, 314.0, 316.0, 314.0, 291.0, 291.0, 317.0, 319.0, 293.0, 289.0, 290.0, 297.0, 319.0, 317.0, 311.0, 316.0, 314.0, 322.0, 316.0, 317.0, 321.0, 312.0, 316.0, 314.0, 286.0, 293.0, 288.0, 291.0, 304.0, 283.0, 319.0, 317.0, 293.0, 289.0, 319.0, 320.0, 314.0, 316.0, 304.0, 323.0, 291.0, 291.0, 291.0, 291.0, 293.0, 291.0, 319.0, 320.0, 291.0, 291.0, 324.0, 315.0, 319.0, 311.0, 268.0, 254.0, 298.0, 289.0, 293.0, 291.0, 293.0, 286.0, 314.0, 319.0, 317.0, 316.0, 313.0, 317.0, 319.0, 320.0, 293.0, 289.0, 293.0, 289.0, 314.0, 319.0, 319.0, 311.0, 316.0, 317.0, 286.0, 293.0, 319.0, 317.0, 293.0, 289.0, 290.0, 289.0, 317.0, 319.0, 319.0, 311.0, 291.0, 288.0, 290.0, 292.0, 293.0, 294.0, 316.0, 317.0, 319.0, 311.0, 316.0, 317.0, 288.0, 299.0, 319.0, 317.0, 291.0, 291.0, 288.0, 291.0, 288.0, 288.0, 309.0, 321.0, 278.0, 283.0, 316.0, 311.0, 313.0, 314.0, 319.0, 311.0, 284.0, 280.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8049370656544826, "mean_processing_ms": 0.22944824169786282, "mean_inference_ms": 1.4078260577315087}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10224000, "num_steps_sampled": 5452800, "sample_time_ms": 22520.554, "load_time_ms": 36.416, "grad_time_ms": 10937.296, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 9.403874173585791e-06, "policy_loss": -0.006973860785365105, "vf_loss": 75.33930969238281, "vf_explained_var": 0.7694594264030457, "kl": 0.00176583684515208, "entropy": 1.1013368368148804, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5452800, "episodes_total": 13632, "training_iteration": 426, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-26-16", "timestamp": 1660260376, "time_this_iter_s": 29.526015043258667, "time_total_s": 18785.904147148132, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18785.904147148132, "timesteps_since_restore": 5452800, "iterations_since_restore": 426, "perf": {"cpu_util_percent": 31.057142857142853, "ram_util_percent": 58.84047619047618}}
-{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 614.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 307.115}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.83, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.73, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.36, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.74, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.35, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.84, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.7, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.04, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.35, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.84, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.35, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.84, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 633.0, 573.0, 582.0, 579.0, 639.0, 636.0, 636.0, 633.0, 579.0, 630.0, 630.0, 630.0, 582.0, 627.0, 582.0, 630.0, 621.0, 630.0, 630.0, 633.0, 636.0, 627.0, 636.0, 584.0, 636.0, 633.0, 636.0, 630.0, 627.0, 633.0, 636.0, 522.0, 587.0, 584.0, 579.0, 633.0, 633.0, 630.0, 639.0, 582.0, 582.0, 633.0, 630.0, 633.0, 579.0, 636.0, 582.0, 579.0, 636.0, 630.0, 579.0, 582.0, 587.0, 633.0, 630.0, 633.0, 587.0, 636.0, 582.0, 579.0, 576.0, 630.0, 561.0, 627.0, 627.0, 630.0, 564.0, 630.0, 624.0, 636.0, 627.0, 582.0, 636.0, 636.0, 630.0, 573.0, 630.0, 630.0, 579.0, 633.0, 636.0, 630.0, 639.0, 627.0, 582.0, 639.0, 587.0, 630.0, 636.0, 584.0, 633.0, 630.0, 579.0, 618.0, 630.0, 627.0, 636.0, 587.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 317.0, 316.0, 289.0, 284.0, 293.0, 289.0, 287.0, 292.0, 317.0, 322.0, 314.0, 322.0, 316.0, 320.0, 316.0, 317.0, 297.0, 282.0, 317.0, 313.0, 314.0, 316.0, 324.0, 306.0, 286.0, 296.0, 318.0, 309.0, 297.0, 285.0, 311.0, 319.0, 319.0, 302.0, 316.0, 314.0, 316.0, 314.0, 316.0, 317.0, 316.0, 320.0, 319.0, 308.0, 319.0, 317.0, 290.0, 294.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 314.0, 316.0, 316.0, 311.0, 317.0, 316.0, 319.0, 317.0, 268.0, 254.0, 298.0, 289.0, 293.0, 291.0, 293.0, 286.0, 314.0, 319.0, 317.0, 316.0, 313.0, 317.0, 319.0, 320.0, 293.0, 289.0, 293.0, 289.0, 314.0, 319.0, 319.0, 311.0, 316.0, 317.0, 286.0, 293.0, 319.0, 317.0, 293.0, 289.0, 290.0, 289.0, 317.0, 319.0, 319.0, 311.0, 291.0, 288.0, 290.0, 292.0, 293.0, 294.0, 316.0, 317.0, 319.0, 311.0, 316.0, 317.0, 288.0, 299.0, 319.0, 317.0, 291.0, 291.0, 288.0, 291.0, 288.0, 288.0, 309.0, 321.0, 278.0, 283.0, 316.0, 311.0, 313.0, 314.0, 319.0, 311.0, 284.0, 280.0, 313.0, 317.0, 320.0, 304.0, 319.0, 317.0, 318.0, 309.0, 296.0, 286.0, 317.0, 319.0, 319.0, 317.0, 313.0, 317.0, 284.0, 289.0, 323.0, 307.0, 311.0, 319.0, 298.0, 281.0, 316.0, 317.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 313.0, 314.0, 286.0, 296.0, 322.0, 317.0, 283.0, 304.0, 316.0, 314.0, 314.0, 322.0, 295.0, 289.0, 319.0, 314.0, 314.0, 316.0, 288.0, 291.0, 299.0, 319.0, 316.0, 314.0, 311.0, 316.0, 313.0, 323.0, 293.0, 294.0, 316.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8041036336722438, "mean_processing_ms": 0.2292823831802886, "mean_inference_ms": 1.4068110858832141}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10248000, "num_steps_sampled": 5465600, "sample_time_ms": 22514.038, "load_time_ms": 36.353, "grad_time_ms": 10948.965, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00023957279336173087, "policy_loss": -0.006577346473932266, "vf_loss": 73.66693878173828, "vf_explained_var": 0.7691845297813416, "kl": 0.001824389211833477, "entropy": 1.099536418914795, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5465600, "episodes_total": 13664, "training_iteration": 427, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-26-47", "timestamp": 1660260407, "time_this_iter_s": 30.428364992141724, "time_total_s": 18816.332512140274, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18816.332512140274, "timesteps_since_restore": 5465600, "iterations_since_restore": 427, "perf": {"cpu_util_percent": 33.06976744186046, "ram_util_percent": 59.05581395348838}}
-{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 614.26, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 307.13}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.86, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.72, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.42, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.32, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.88, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.79, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 6.03, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.98, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.32, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.88, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.32, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.88, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 582.0, 582.0, 633.0, 639.0, 633.0, 636.0, 636.0, 633.0, 465.0, 630.0, 584.0, 570.0, 587.0, 627.0, 633.0, 630.0, 587.0, 636.0, 639.0, 462.0, 639.0, 633.0, 567.0, 582.0, 576.0, 582.0, 636.0, 633.0, 590.0, 630.0, 579.0, 627.0, 627.0, 630.0, 564.0, 630.0, 624.0, 636.0, 627.0, 582.0, 636.0, 636.0, 630.0, 573.0, 630.0, 630.0, 579.0, 633.0, 636.0, 630.0, 639.0, 627.0, 582.0, 639.0, 587.0, 630.0, 636.0, 584.0, 633.0, 630.0, 579.0, 618.0, 630.0, 627.0, 636.0, 587.0, 636.0, 630.0, 633.0, 573.0, 582.0, 579.0, 639.0, 636.0, 636.0, 633.0, 579.0, 630.0, 630.0, 630.0, 582.0, 627.0, 582.0, 630.0, 621.0, 630.0, 630.0, 633.0, 636.0, 627.0, 636.0, 584.0, 636.0, 633.0, 636.0, 630.0, 627.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 296.0, 286.0, 289.0, 293.0, 316.0, 317.0, 322.0, 317.0, 321.0, 312.0, 316.0, 320.0, 319.0, 317.0, 314.0, 319.0, 231.0, 234.0, 317.0, 313.0, 288.0, 296.0, 294.0, 276.0, 293.0, 294.0, 321.0, 306.0, 314.0, 319.0, 316.0, 314.0, 288.0, 299.0, 319.0, 317.0, 316.0, 323.0, 230.0, 232.0, 319.0, 320.0, 311.0, 322.0, 276.0, 291.0, 288.0, 294.0, 293.0, 283.0, 280.0, 302.0, 317.0, 319.0, 314.0, 319.0, 301.0, 289.0, 316.0, 314.0, 290.0, 289.0, 316.0, 311.0, 313.0, 314.0, 319.0, 311.0, 284.0, 280.0, 313.0, 317.0, 320.0, 304.0, 319.0, 317.0, 318.0, 309.0, 296.0, 286.0, 317.0, 319.0, 319.0, 317.0, 313.0, 317.0, 284.0, 289.0, 323.0, 307.0, 311.0, 319.0, 298.0, 281.0, 316.0, 317.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 313.0, 314.0, 286.0, 296.0, 322.0, 317.0, 283.0, 304.0, 316.0, 314.0, 314.0, 322.0, 295.0, 289.0, 319.0, 314.0, 314.0, 316.0, 288.0, 291.0, 299.0, 319.0, 316.0, 314.0, 311.0, 316.0, 313.0, 323.0, 293.0, 294.0, 316.0, 320.0, 311.0, 319.0, 317.0, 316.0, 289.0, 284.0, 293.0, 289.0, 287.0, 292.0, 317.0, 322.0, 314.0, 322.0, 316.0, 320.0, 316.0, 317.0, 297.0, 282.0, 317.0, 313.0, 314.0, 316.0, 324.0, 306.0, 286.0, 296.0, 318.0, 309.0, 297.0, 285.0, 311.0, 319.0, 319.0, 302.0, 316.0, 314.0, 316.0, 314.0, 316.0, 317.0, 316.0, 320.0, 319.0, 308.0, 319.0, 317.0, 290.0, 294.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 314.0, 316.0, 316.0, 311.0, 317.0, 316.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.803277493605749, "mean_processing_ms": 0.22911824330865546, "mean_inference_ms": 1.4058271454809743}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10272000, "num_steps_sampled": 5478400, "sample_time_ms": 22384.398, "load_time_ms": 36.404, "grad_time_ms": 10797.09, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016918530454859138, "policy_loss": -0.005912030581384897, "vf_loss": 81.57828521728516, "vf_explained_var": 0.7692078948020935, "kl": 0.0022940493654459715, "entropy": 1.1078943014144897, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5478400, "episodes_total": 13696, "training_iteration": 428, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-27-18", "timestamp": 1660260438, "time_this_iter_s": 31.51498508453369, "time_total_s": 18847.847497224808, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18847.847497224808, "timesteps_since_restore": 5478400, "iterations_since_restore": 428, "perf": {"cpu_util_percent": 32.73555555555556, "ram_util_percent": 59.61555555555556}}
-{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 610.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 305.475}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.35, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.98, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.44, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.73, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.91, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.99, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.44, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.44, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 639.0, 570.0, 639.0, 590.0, 579.0, 579.0, 639.0, 636.0, 584.0, 522.0, 582.0, 627.0, 633.0, 618.0, 627.0, 584.0, 633.0, 639.0, 576.0, 582.0, 636.0, 630.0, 522.0, 627.0, 636.0, 630.0, 630.0, 525.0, 630.0, 630.0, 633.0, 627.0, 636.0, 587.0, 636.0, 630.0, 633.0, 573.0, 582.0, 579.0, 639.0, 636.0, 636.0, 633.0, 579.0, 630.0, 630.0, 630.0, 582.0, 627.0, 582.0, 630.0, 621.0, 630.0, 630.0, 633.0, 636.0, 627.0, 636.0, 584.0, 636.0, 633.0, 636.0, 630.0, 627.0, 633.0, 636.0, 636.0, 582.0, 582.0, 633.0, 639.0, 633.0, 636.0, 636.0, 633.0, 465.0, 630.0, 584.0, 570.0, 587.0, 627.0, 633.0, 630.0, 587.0, 636.0, 639.0, 462.0, 639.0, 633.0, 567.0, 582.0, 576.0, 582.0, 636.0, 633.0, 590.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 320.0, 319.0, 320.0, 290.0, 280.0, 324.0, 315.0, 293.0, 297.0, 297.0, 282.0, 285.0, 294.0, 319.0, 320.0, 314.0, 322.0, 293.0, 291.0, 265.0, 257.0, 297.0, 285.0, 316.0, 311.0, 317.0, 316.0, 307.0, 311.0, 323.0, 304.0, 299.0, 285.0, 319.0, 314.0, 319.0, 320.0, 285.0, 291.0, 291.0, 291.0, 322.0, 314.0, 314.0, 316.0, 267.0, 255.0, 313.0, 314.0, 319.0, 317.0, 319.0, 311.0, 313.0, 317.0, 265.0, 260.0, 311.0, 319.0, 319.0, 311.0, 314.0, 319.0, 311.0, 316.0, 313.0, 323.0, 293.0, 294.0, 316.0, 320.0, 311.0, 319.0, 317.0, 316.0, 289.0, 284.0, 293.0, 289.0, 287.0, 292.0, 317.0, 322.0, 314.0, 322.0, 316.0, 320.0, 316.0, 317.0, 297.0, 282.0, 317.0, 313.0, 314.0, 316.0, 324.0, 306.0, 286.0, 296.0, 318.0, 309.0, 297.0, 285.0, 311.0, 319.0, 319.0, 302.0, 316.0, 314.0, 316.0, 314.0, 316.0, 317.0, 316.0, 320.0, 319.0, 308.0, 319.0, 317.0, 290.0, 294.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 314.0, 316.0, 316.0, 311.0, 317.0, 316.0, 319.0, 317.0, 319.0, 317.0, 296.0, 286.0, 289.0, 293.0, 316.0, 317.0, 322.0, 317.0, 321.0, 312.0, 316.0, 320.0, 319.0, 317.0, 314.0, 319.0, 231.0, 234.0, 317.0, 313.0, 288.0, 296.0, 294.0, 276.0, 293.0, 294.0, 321.0, 306.0, 314.0, 319.0, 316.0, 314.0, 288.0, 299.0, 319.0, 317.0, 316.0, 323.0, 230.0, 232.0, 319.0, 320.0, 311.0, 322.0, 276.0, 291.0, 288.0, 294.0, 293.0, 283.0, 280.0, 302.0, 317.0, 319.0, 314.0, 319.0, 301.0, 289.0, 316.0, 314.0, 290.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8024594853265918, "mean_processing_ms": 0.2289562714759904, "mean_inference_ms": 1.4049375994877125}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10296000, "num_steps_sampled": 5491200, "sample_time_ms": 22179.532, "load_time_ms": 37.018, "grad_time_ms": 10788.772, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0015958556905388832, "policy_loss": -0.010051627643406391, "vf_loss": 90.0771713256836, "vf_explained_var": 0.7485197186470032, "kl": 0.0020946140866726637, "entropy": 1.1038951873779297, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5491200, "episodes_total": 13728, "training_iteration": 429, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-27-50", "timestamp": 1660260470, "time_this_iter_s": 31.638920783996582, "time_total_s": 18879.486418008804, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18879.486418008804, "timesteps_since_restore": 5491200, "iterations_since_restore": 429, "perf": {"cpu_util_percent": 30.328888888888887, "ram_util_percent": 59.14666666666665}}
-{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 605.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 302.585}, "custom_metrics": {"sparse_reward_mean": 209.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 185.97, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.86, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.3, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.35, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.61, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.68, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 6.17, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.99, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.61, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.61, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 579.0, 630.0, 630.0, 351.0, 633.0, 636.0, 582.0, 582.0, 587.0, 576.0, 582.0, 633.0, 633.0, 633.0, 633.0, 544.0, 633.0, 579.0, 582.0, 639.0, 633.0, 636.0, 639.0, 558.0, 576.0, 636.0, 627.0, 576.0, 636.0, 587.0, 630.0, 630.0, 627.0, 633.0, 636.0, 636.0, 582.0, 582.0, 633.0, 639.0, 633.0, 636.0, 636.0, 633.0, 465.0, 630.0, 584.0, 570.0, 587.0, 627.0, 633.0, 630.0, 587.0, 636.0, 639.0, 462.0, 639.0, 633.0, 567.0, 582.0, 576.0, 582.0, 636.0, 633.0, 590.0, 630.0, 579.0, 636.0, 639.0, 570.0, 639.0, 590.0, 579.0, 579.0, 639.0, 636.0, 584.0, 522.0, 582.0, 627.0, 633.0, 618.0, 627.0, 584.0, 633.0, 639.0, 576.0, 582.0, 636.0, 630.0, 522.0, 627.0, 636.0, 630.0, 630.0, 525.0, 630.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 293.0, 286.0, 311.0, 319.0, 313.0, 317.0, 177.0, 174.0, 314.0, 319.0, 319.0, 317.0, 294.0, 288.0, 286.0, 296.0, 293.0, 294.0, 282.0, 294.0, 288.0, 294.0, 316.0, 317.0, 316.0, 317.0, 324.0, 309.0, 316.0, 317.0, 270.0, 274.0, 311.0, 322.0, 288.0, 291.0, 286.0, 296.0, 317.0, 322.0, 319.0, 314.0, 319.0, 317.0, 322.0, 317.0, 276.0, 282.0, 285.0, 291.0, 319.0, 317.0, 313.0, 314.0, 288.0, 288.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0, 314.0, 316.0, 316.0, 311.0, 317.0, 316.0, 319.0, 317.0, 319.0, 317.0, 296.0, 286.0, 289.0, 293.0, 316.0, 317.0, 322.0, 317.0, 321.0, 312.0, 316.0, 320.0, 319.0, 317.0, 314.0, 319.0, 231.0, 234.0, 317.0, 313.0, 288.0, 296.0, 294.0, 276.0, 293.0, 294.0, 321.0, 306.0, 314.0, 319.0, 316.0, 314.0, 288.0, 299.0, 319.0, 317.0, 316.0, 323.0, 230.0, 232.0, 319.0, 320.0, 311.0, 322.0, 276.0, 291.0, 288.0, 294.0, 293.0, 283.0, 280.0, 302.0, 317.0, 319.0, 314.0, 319.0, 301.0, 289.0, 316.0, 314.0, 290.0, 289.0, 316.0, 320.0, 319.0, 320.0, 290.0, 280.0, 324.0, 315.0, 293.0, 297.0, 297.0, 282.0, 285.0, 294.0, 319.0, 320.0, 314.0, 322.0, 293.0, 291.0, 265.0, 257.0, 297.0, 285.0, 316.0, 311.0, 317.0, 316.0, 307.0, 311.0, 323.0, 304.0, 299.0, 285.0, 319.0, 314.0, 319.0, 320.0, 285.0, 291.0, 291.0, 291.0, 322.0, 314.0, 314.0, 316.0, 267.0, 255.0, 313.0, 314.0, 319.0, 317.0, 319.0, 311.0, 313.0, 317.0, 265.0, 260.0, 311.0, 319.0, 319.0, 311.0, 314.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.801647414418135, "mean_processing_ms": 0.22879516876474576, "mean_inference_ms": 1.404093752736684}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10320000, "num_steps_sampled": 5504000, "sample_time_ms": 21723.978, "load_time_ms": 37.011, "grad_time_ms": 10801.836, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -4.737731069326401e-05, "policy_loss": -0.00790297333151102, "vf_loss": 84.07501220703125, "vf_explained_var": 0.7714950442314148, "kl": 0.0018305158009752631, "entropy": 1.1038156747817993, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5504000, "episodes_total": 13760, "training_iteration": 430, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-28-22", "timestamp": 1660260502, "time_this_iter_s": 32.1021990776062, "time_total_s": 18911.58861708641, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18911.58861708641, "timesteps_since_restore": 5504000, "iterations_since_restore": 430, "perf": {"cpu_util_percent": 34.26, "ram_util_percent": 59.419999999999995}}
-{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 607.03, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.515}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 186.23, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.97, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.95, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.4, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.25, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.54, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.77, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.44, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.54, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.54, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 624.0, 630.0, 630.0, 633.0, 587.0, 630.0, 633.0, 630.0, 639.0, 639.0, 630.0, 584.0, 564.0, 533.0, 636.0, 561.0, 630.0, 627.0, 522.0, 630.0, 636.0, 636.0, 624.0, 630.0, 587.0, 582.0, 636.0, 587.0, 587.0, 624.0, 633.0, 590.0, 630.0, 579.0, 636.0, 639.0, 570.0, 639.0, 590.0, 579.0, 579.0, 639.0, 636.0, 584.0, 522.0, 582.0, 627.0, 633.0, 618.0, 627.0, 584.0, 633.0, 639.0, 576.0, 582.0, 636.0, 630.0, 522.0, 627.0, 636.0, 630.0, 630.0, 525.0, 630.0, 630.0, 633.0, 630.0, 579.0, 630.0, 630.0, 351.0, 633.0, 636.0, 582.0, 582.0, 587.0, 576.0, 582.0, 633.0, 633.0, 633.0, 633.0, 544.0, 633.0, 579.0, 582.0, 639.0, 633.0, 636.0, 639.0, 558.0, 576.0, 636.0, 627.0, 576.0, 636.0, 587.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 314.0, 322.0, 306.0, 318.0, 319.0, 311.0, 321.0, 309.0, 319.0, 314.0, 296.0, 291.0, 319.0, 311.0, 314.0, 319.0, 314.0, 316.0, 319.0, 320.0, 325.0, 314.0, 316.0, 314.0, 293.0, 291.0, 288.0, 276.0, 259.0, 274.0, 314.0, 322.0, 279.0, 282.0, 316.0, 314.0, 311.0, 316.0, 257.0, 265.0, 317.0, 313.0, 314.0, 322.0, 317.0, 319.0, 316.0, 308.0, 311.0, 319.0, 288.0, 299.0, 288.0, 294.0, 324.0, 312.0, 296.0, 291.0, 286.0, 301.0, 313.0, 311.0, 314.0, 319.0, 301.0, 289.0, 316.0, 314.0, 290.0, 289.0, 316.0, 320.0, 319.0, 320.0, 290.0, 280.0, 324.0, 315.0, 293.0, 297.0, 297.0, 282.0, 285.0, 294.0, 319.0, 320.0, 314.0, 322.0, 293.0, 291.0, 265.0, 257.0, 297.0, 285.0, 316.0, 311.0, 317.0, 316.0, 307.0, 311.0, 323.0, 304.0, 299.0, 285.0, 319.0, 314.0, 319.0, 320.0, 285.0, 291.0, 291.0, 291.0, 322.0, 314.0, 314.0, 316.0, 267.0, 255.0, 313.0, 314.0, 319.0, 317.0, 319.0, 311.0, 313.0, 317.0, 265.0, 260.0, 311.0, 319.0, 319.0, 311.0, 314.0, 319.0, 311.0, 319.0, 293.0, 286.0, 311.0, 319.0, 313.0, 317.0, 177.0, 174.0, 314.0, 319.0, 319.0, 317.0, 294.0, 288.0, 286.0, 296.0, 293.0, 294.0, 282.0, 294.0, 288.0, 294.0, 316.0, 317.0, 316.0, 317.0, 324.0, 309.0, 316.0, 317.0, 270.0, 274.0, 311.0, 322.0, 288.0, 291.0, 286.0, 296.0, 317.0, 322.0, 319.0, 314.0, 319.0, 317.0, 322.0, 317.0, 276.0, 282.0, 285.0, 291.0, 319.0, 317.0, 313.0, 314.0, 288.0, 288.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8008314445541891, "mean_processing_ms": 0.2286322337318132, "mean_inference_ms": 1.4031615335106213}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10344000, "num_steps_sampled": 5516800, "sample_time_ms": 21234.37, "load_time_ms": 37.138, "grad_time_ms": 10446.294, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004487487021833658, "policy_loss": -0.002838247222825885, "vf_loss": 78.8043441772461, "vf_explained_var": 0.7659228444099426, "kl": 0.0018056267872452736, "entropy": 1.109397053718567, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5516800, "episodes_total": 13792, "training_iteration": 431, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-28-51", "timestamp": 1660260531, "time_this_iter_s": 29.235426902770996, "time_total_s": 18940.82404398918, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18940.82404398918, "timesteps_since_restore": 5516800, "iterations_since_restore": 431, "perf": {"cpu_util_percent": 33.543902439024386, "ram_util_percent": 59.482926829268294}}
-{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 606.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.24}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 186.08, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.91, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.95, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.27, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.53, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.75, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.45, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.41, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.53, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.53, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 633.0, 630.0, 636.0, 587.0, 579.0, 633.0, 630.0, 587.0, 630.0, 576.0, 582.0, 630.0, 627.0, 525.0, 582.0, 630.0, 582.0, 582.0, 582.0, 633.0, 624.0, 582.0, 636.0, 522.0, 633.0, 636.0, 636.0, 636.0, 630.0, 582.0, 630.0, 525.0, 630.0, 630.0, 633.0, 630.0, 579.0, 630.0, 630.0, 351.0, 633.0, 636.0, 582.0, 582.0, 587.0, 576.0, 582.0, 633.0, 633.0, 633.0, 633.0, 544.0, 633.0, 579.0, 582.0, 639.0, 633.0, 636.0, 639.0, 558.0, 576.0, 636.0, 627.0, 576.0, 636.0, 587.0, 630.0, 630.0, 636.0, 624.0, 630.0, 630.0, 633.0, 587.0, 630.0, 633.0, 630.0, 639.0, 639.0, 630.0, 584.0, 564.0, 533.0, 636.0, 561.0, 630.0, 627.0, 522.0, 630.0, 636.0, 636.0, 624.0, 630.0, 587.0, 582.0, 636.0, 587.0, 587.0, 624.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 289.0, 316.0, 317.0, 311.0, 319.0, 314.0, 322.0, 288.0, 299.0, 288.0, 291.0, 311.0, 322.0, 316.0, 314.0, 301.0, 286.0, 311.0, 319.0, 305.0, 271.0, 286.0, 296.0, 313.0, 317.0, 316.0, 311.0, 265.0, 260.0, 291.0, 291.0, 313.0, 317.0, 287.0, 295.0, 285.0, 297.0, 293.0, 289.0, 319.0, 314.0, 316.0, 308.0, 291.0, 291.0, 319.0, 317.0, 267.0, 255.0, 321.0, 312.0, 314.0, 322.0, 317.0, 319.0, 317.0, 319.0, 316.0, 314.0, 298.0, 284.0, 316.0, 314.0, 265.0, 260.0, 311.0, 319.0, 319.0, 311.0, 314.0, 319.0, 311.0, 319.0, 293.0, 286.0, 311.0, 319.0, 313.0, 317.0, 177.0, 174.0, 314.0, 319.0, 319.0, 317.0, 294.0, 288.0, 286.0, 296.0, 293.0, 294.0, 282.0, 294.0, 288.0, 294.0, 316.0, 317.0, 316.0, 317.0, 324.0, 309.0, 316.0, 317.0, 270.0, 274.0, 311.0, 322.0, 288.0, 291.0, 286.0, 296.0, 317.0, 322.0, 319.0, 314.0, 319.0, 317.0, 322.0, 317.0, 276.0, 282.0, 285.0, 291.0, 319.0, 317.0, 313.0, 314.0, 288.0, 288.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0, 311.0, 319.0, 314.0, 322.0, 306.0, 318.0, 319.0, 311.0, 321.0, 309.0, 319.0, 314.0, 296.0, 291.0, 319.0, 311.0, 314.0, 319.0, 314.0, 316.0, 319.0, 320.0, 325.0, 314.0, 316.0, 314.0, 293.0, 291.0, 288.0, 276.0, 259.0, 274.0, 314.0, 322.0, 279.0, 282.0, 316.0, 314.0, 311.0, 316.0, 257.0, 265.0, 317.0, 313.0, 314.0, 322.0, 317.0, 319.0, 316.0, 308.0, 311.0, 319.0, 288.0, 299.0, 288.0, 294.0, 324.0, 312.0, 296.0, 291.0, 286.0, 301.0, 313.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8000283085438272, "mean_processing_ms": 0.22847315169555785, "mean_inference_ms": 1.4024220813317556}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10368000, "num_steps_sampled": 5529600, "sample_time_ms": 21335.175, "load_time_ms": 37.375, "grad_time_ms": 10351.537, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001511982991360128, "policy_loss": -0.006039683241397142, "vf_loss": 81.0816650390625, "vf_explained_var": 0.766996443271637, "kl": 0.0019059469923377037, "entropy": 1.1129895448684692, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5529600, "episodes_total": 13824, "training_iteration": 432, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-29-27", "timestamp": 1660260567, "time_this_iter_s": 35.764232873916626, "time_total_s": 18976.588276863098, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18976.588276863098, "timesteps_since_restore": 5529600, "iterations_since_restore": 432, "perf": {"cpu_util_percent": 31.023529411764706, "ram_util_percent": 59.011764705882364}}
-{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 609.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 255.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.97}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.14, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.89, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.51, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.53, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.86, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.37, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.24, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.51, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.53, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.51, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.53, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 582.0, 579.0, 636.0, 579.0, 582.0, 576.0, 587.0, 627.0, 630.0, 587.0, 630.0, 636.0, 633.0, 636.0, 636.0, 624.0, 630.0, 582.0, 633.0, 630.0, 633.0, 582.0, 636.0, 582.0, 630.0, 630.0, 633.0, 636.0, 582.0, 582.0, 582.0, 576.0, 636.0, 587.0, 630.0, 630.0, 636.0, 624.0, 630.0, 630.0, 633.0, 587.0, 630.0, 633.0, 630.0, 639.0, 639.0, 630.0, 584.0, 564.0, 533.0, 636.0, 561.0, 630.0, 627.0, 522.0, 630.0, 636.0, 636.0, 624.0, 630.0, 587.0, 582.0, 636.0, 587.0, 587.0, 624.0, 579.0, 633.0, 630.0, 636.0, 587.0, 579.0, 633.0, 630.0, 587.0, 630.0, 576.0, 582.0, 630.0, 627.0, 525.0, 582.0, 630.0, 582.0, 582.0, 582.0, 633.0, 624.0, 582.0, 636.0, 522.0, 633.0, 636.0, 636.0, 636.0, 630.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 286.0, 296.0, 288.0, 291.0, 319.0, 317.0, 293.0, 286.0, 286.0, 296.0, 292.0, 284.0, 296.0, 291.0, 313.0, 314.0, 316.0, 314.0, 293.0, 294.0, 314.0, 316.0, 324.0, 312.0, 319.0, 314.0, 319.0, 317.0, 317.0, 319.0, 310.0, 314.0, 322.0, 308.0, 290.0, 292.0, 316.0, 317.0, 316.0, 314.0, 317.0, 316.0, 293.0, 289.0, 317.0, 319.0, 289.0, 293.0, 316.0, 314.0, 306.0, 324.0, 314.0, 319.0, 317.0, 319.0, 293.0, 289.0, 289.0, 293.0, 288.0, 294.0, 288.0, 288.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0, 311.0, 319.0, 314.0, 322.0, 306.0, 318.0, 319.0, 311.0, 321.0, 309.0, 319.0, 314.0, 296.0, 291.0, 319.0, 311.0, 314.0, 319.0, 314.0, 316.0, 319.0, 320.0, 325.0, 314.0, 316.0, 314.0, 293.0, 291.0, 288.0, 276.0, 259.0, 274.0, 314.0, 322.0, 279.0, 282.0, 316.0, 314.0, 311.0, 316.0, 257.0, 265.0, 317.0, 313.0, 314.0, 322.0, 317.0, 319.0, 316.0, 308.0, 311.0, 319.0, 288.0, 299.0, 288.0, 294.0, 324.0, 312.0, 296.0, 291.0, 286.0, 301.0, 313.0, 311.0, 290.0, 289.0, 316.0, 317.0, 311.0, 319.0, 314.0, 322.0, 288.0, 299.0, 288.0, 291.0, 311.0, 322.0, 316.0, 314.0, 301.0, 286.0, 311.0, 319.0, 305.0, 271.0, 286.0, 296.0, 313.0, 317.0, 316.0, 311.0, 265.0, 260.0, 291.0, 291.0, 313.0, 317.0, 287.0, 295.0, 285.0, 297.0, 293.0, 289.0, 319.0, 314.0, 316.0, 308.0, 291.0, 291.0, 319.0, 317.0, 267.0, 255.0, 321.0, 312.0, 314.0, 322.0, 317.0, 319.0, 317.0, 319.0, 316.0, 314.0, 298.0, 284.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7992234752910505, "mean_processing_ms": 0.2283129313200136, "mean_inference_ms": 1.4016145546737357}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10392000, "num_steps_sampled": 5542400, "sample_time_ms": 21105.654, "load_time_ms": 37.195, "grad_time_ms": 10237.421, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00017731667321640998, "policy_loss": -0.007162818219512701, "vf_loss": 79.00240325927734, "vf_explained_var": 0.7636518478393555, "kl": 0.0019576705526560545, "entropy": 1.1202179193496704, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5542400, "episodes_total": 13856, "training_iteration": 433, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-29-57", "timestamp": 1660260597, "time_this_iter_s": 30.04263925552368, "time_total_s": 19006.630916118622, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19006.630916118622, "timesteps_since_restore": 5542400, "iterations_since_restore": 433, "perf": {"cpu_util_percent": 28.414285714285718, "ram_util_percent": 59.08571428571428}}
-{"episode_reward_max": 636.0, "episode_reward_min": 522.0, "episode_reward_mean": 608.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 255.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 304.035}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.27, "shaped_reward_min": 162, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.87, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.01, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.38, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.62, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.75, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.1, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.38, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.62, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.38, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.62, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 636.0, 582.0, 582.0, 633.0, 636.0, 582.0, 630.0, 630.0, 636.0, 587.0, 633.0, 633.0, 636.0, 636.0, 570.0, 576.0, 624.0, 587.0, 636.0, 582.0, 587.0, 582.0, 576.0, 579.0, 582.0, 630.0, 627.0, 633.0, 590.0, 636.0, 539.0, 636.0, 587.0, 587.0, 624.0, 579.0, 633.0, 630.0, 636.0, 587.0, 579.0, 633.0, 630.0, 587.0, 630.0, 576.0, 582.0, 630.0, 627.0, 525.0, 582.0, 630.0, 582.0, 582.0, 582.0, 633.0, 624.0, 582.0, 636.0, 522.0, 633.0, 636.0, 636.0, 636.0, 630.0, 582.0, 630.0, 633.0, 582.0, 579.0, 636.0, 579.0, 582.0, 576.0, 587.0, 627.0, 630.0, 587.0, 630.0, 636.0, 633.0, 636.0, 636.0, 624.0, 630.0, 582.0, 633.0, 630.0, 633.0, 582.0, 636.0, 582.0, 630.0, 630.0, 633.0, 636.0, 582.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 291.0, 322.0, 314.0, 293.0, 289.0, 288.0, 294.0, 319.0, 314.0, 317.0, 319.0, 294.0, 288.0, 313.0, 317.0, 316.0, 314.0, 319.0, 317.0, 293.0, 294.0, 319.0, 314.0, 319.0, 314.0, 319.0, 317.0, 317.0, 319.0, 277.0, 293.0, 295.0, 281.0, 308.0, 316.0, 296.0, 291.0, 319.0, 317.0, 296.0, 286.0, 291.0, 296.0, 288.0, 294.0, 290.0, 286.0, 293.0, 286.0, 296.0, 286.0, 316.0, 314.0, 313.0, 314.0, 319.0, 314.0, 299.0, 291.0, 319.0, 317.0, 268.0, 271.0, 324.0, 312.0, 296.0, 291.0, 286.0, 301.0, 313.0, 311.0, 290.0, 289.0, 316.0, 317.0, 311.0, 319.0, 314.0, 322.0, 288.0, 299.0, 288.0, 291.0, 311.0, 322.0, 316.0, 314.0, 301.0, 286.0, 311.0, 319.0, 305.0, 271.0, 286.0, 296.0, 313.0, 317.0, 316.0, 311.0, 265.0, 260.0, 291.0, 291.0, 313.0, 317.0, 287.0, 295.0, 285.0, 297.0, 293.0, 289.0, 319.0, 314.0, 316.0, 308.0, 291.0, 291.0, 319.0, 317.0, 267.0, 255.0, 321.0, 312.0, 314.0, 322.0, 317.0, 319.0, 317.0, 319.0, 316.0, 314.0, 298.0, 284.0, 316.0, 314.0, 314.0, 319.0, 286.0, 296.0, 288.0, 291.0, 319.0, 317.0, 293.0, 286.0, 286.0, 296.0, 292.0, 284.0, 296.0, 291.0, 313.0, 314.0, 316.0, 314.0, 293.0, 294.0, 314.0, 316.0, 324.0, 312.0, 319.0, 314.0, 319.0, 317.0, 317.0, 319.0, 310.0, 314.0, 322.0, 308.0, 290.0, 292.0, 316.0, 317.0, 316.0, 314.0, 317.0, 316.0, 293.0, 289.0, 317.0, 319.0, 289.0, 293.0, 316.0, 314.0, 306.0, 324.0, 314.0, 319.0, 317.0, 319.0, 293.0, 289.0, 289.0, 293.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7984267196562377, "mean_processing_ms": 0.2281543074598729, "mean_inference_ms": 1.4008634634421164}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10416000, "num_steps_sampled": 5555200, "sample_time_ms": 20963.356, "load_time_ms": 37.144, "grad_time_ms": 9878.857, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0020503299310803413, "policy_loss": -0.0053678578697144985, "vf_loss": 79.79612731933594, "vf_explained_var": 0.7709012627601624, "kl": 0.0022744529414922, "entropy": 1.1228529214859009, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5555200, "episodes_total": 13888, "training_iteration": 434, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-30-26", "timestamp": 1660260626, "time_this_iter_s": 28.891623735427856, "time_total_s": 19035.52253985405, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19035.52253985405, "timesteps_since_restore": 5555200, "iterations_since_restore": 434, "perf": {"cpu_util_percent": 35.10975609756097, "ram_util_percent": 59.18292682926829}}
-{"episode_reward_max": 639.0, "episode_reward_min": 539.0, "episode_reward_mean": 609.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 268.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 304.505}, "custom_metrics": {"sparse_reward_mean": 210.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.81, "shaped_reward_min": 170, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.86, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.97, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.51, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.39, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.58, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.62, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.16, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.87, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.39, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.58, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.39, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.58, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 630.0, 587.0, 582.0, 639.0, 587.0, 584.0, 584.0, 633.0, 633.0, 582.0, 587.0, 630.0, 633.0, 633.0, 582.0, 582.0, 630.0, 630.0, 630.0, 633.0, 590.0, 633.0, 630.0, 630.0, 587.0, 630.0, 584.0, 587.0, 576.0, 582.0, 636.0, 630.0, 582.0, 630.0, 633.0, 582.0, 579.0, 636.0, 579.0, 582.0, 576.0, 587.0, 627.0, 630.0, 587.0, 630.0, 636.0, 633.0, 636.0, 636.0, 624.0, 630.0, 582.0, 633.0, 630.0, 633.0, 582.0, 636.0, 582.0, 630.0, 630.0, 633.0, 636.0, 582.0, 582.0, 582.0, 587.0, 636.0, 582.0, 582.0, 633.0, 636.0, 582.0, 630.0, 630.0, 636.0, 587.0, 633.0, 633.0, 636.0, 636.0, 570.0, 576.0, 624.0, 587.0, 636.0, 582.0, 587.0, 582.0, 576.0, 579.0, 582.0, 630.0, 627.0, 633.0, 590.0, 636.0, 539.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 311.0, 319.0, 321.0, 309.0, 291.0, 296.0, 301.0, 281.0, 319.0, 320.0, 293.0, 294.0, 286.0, 298.0, 288.0, 296.0, 314.0, 319.0, 319.0, 314.0, 293.0, 289.0, 293.0, 294.0, 314.0, 316.0, 313.0, 320.0, 316.0, 317.0, 291.0, 291.0, 288.0, 294.0, 316.0, 314.0, 316.0, 314.0, 319.0, 311.0, 324.0, 309.0, 296.0, 294.0, 314.0, 319.0, 316.0, 314.0, 311.0, 319.0, 301.0, 286.0, 319.0, 311.0, 293.0, 291.0, 299.0, 288.0, 274.0, 302.0, 286.0, 296.0, 317.0, 319.0, 316.0, 314.0, 298.0, 284.0, 316.0, 314.0, 314.0, 319.0, 286.0, 296.0, 288.0, 291.0, 319.0, 317.0, 293.0, 286.0, 286.0, 296.0, 292.0, 284.0, 296.0, 291.0, 313.0, 314.0, 316.0, 314.0, 293.0, 294.0, 314.0, 316.0, 324.0, 312.0, 319.0, 314.0, 319.0, 317.0, 317.0, 319.0, 310.0, 314.0, 322.0, 308.0, 290.0, 292.0, 316.0, 317.0, 316.0, 314.0, 317.0, 316.0, 293.0, 289.0, 317.0, 319.0, 289.0, 293.0, 316.0, 314.0, 306.0, 324.0, 314.0, 319.0, 317.0, 319.0, 293.0, 289.0, 289.0, 293.0, 288.0, 294.0, 296.0, 291.0, 322.0, 314.0, 293.0, 289.0, 288.0, 294.0, 319.0, 314.0, 317.0, 319.0, 294.0, 288.0, 313.0, 317.0, 316.0, 314.0, 319.0, 317.0, 293.0, 294.0, 319.0, 314.0, 319.0, 314.0, 319.0, 317.0, 317.0, 319.0, 277.0, 293.0, 295.0, 281.0, 308.0, 316.0, 296.0, 291.0, 319.0, 317.0, 296.0, 286.0, 291.0, 296.0, 288.0, 294.0, 290.0, 286.0, 293.0, 286.0, 296.0, 286.0, 316.0, 314.0, 313.0, 314.0, 319.0, 314.0, 299.0, 291.0, 319.0, 317.0, 268.0, 271.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7976245186877899, "mean_processing_ms": 0.22799297194787038, "mean_inference_ms": 1.3999329365484723}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10440000, "num_steps_sampled": 5568000, "sample_time_ms": 21006.175, "load_time_ms": 37.426, "grad_time_ms": 9977.404, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013511897996068, "policy_loss": -0.006047597620636225, "vf_loss": 79.5729751586914, "vf_explained_var": 0.7702791094779968, "kl": 0.001890461309812963, "entropy": 1.1170209646224976, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5568000, "episodes_total": 13920, "training_iteration": 435, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-30-58", "timestamp": 1660260658, "time_this_iter_s": 31.331193923950195, "time_total_s": 19066.853733778, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19066.853733778, "timesteps_since_restore": 5568000, "iterations_since_restore": 435, "perf": {"cpu_util_percent": 33.626666666666665, "ram_util_percent": 58.966666666666676}}
-{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 605.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 250.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 302.93}, "custom_metrics": {"sparse_reward_mean": 209.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.46, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.81, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.0, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.36, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.42, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.31, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.61, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.51, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.86, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.74, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.31, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.61, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.31, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.61, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 579.0, 630.0, 582.0, 630.0, 587.0, 630.0, 522.0, 636.0, 590.0, 633.0, 582.0, 636.0, 636.0, 639.0, 582.0, 582.0, 633.0, 579.0, 579.0, 536.0, 582.0, 636.0, 630.0, 636.0, 587.0, 587.0, 639.0, 630.0, 633.0, 582.0, 582.0, 636.0, 582.0, 582.0, 582.0, 587.0, 636.0, 582.0, 582.0, 633.0, 636.0, 582.0, 630.0, 630.0, 636.0, 587.0, 633.0, 633.0, 636.0, 636.0, 570.0, 576.0, 624.0, 587.0, 636.0, 582.0, 587.0, 582.0, 576.0, 579.0, 582.0, 630.0, 627.0, 633.0, 590.0, 636.0, 539.0, 582.0, 630.0, 630.0, 587.0, 582.0, 639.0, 587.0, 584.0, 584.0, 633.0, 633.0, 582.0, 587.0, 630.0, 633.0, 633.0, 582.0, 582.0, 630.0, 630.0, 630.0, 633.0, 590.0, 633.0, 630.0, 630.0, 587.0, 630.0, 584.0, 587.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 316.0, 296.0, 283.0, 316.0, 314.0, 293.0, 289.0, 316.0, 314.0, 301.0, 286.0, 314.0, 316.0, 272.0, 250.0, 319.0, 317.0, 296.0, 294.0, 311.0, 322.0, 299.0, 283.0, 319.0, 317.0, 322.0, 314.0, 319.0, 320.0, 294.0, 288.0, 289.0, 293.0, 321.0, 312.0, 286.0, 293.0, 288.0, 291.0, 267.0, 269.0, 293.0, 289.0, 319.0, 317.0, 311.0, 319.0, 319.0, 317.0, 285.0, 302.0, 288.0, 299.0, 321.0, 318.0, 316.0, 314.0, 314.0, 319.0, 294.0, 288.0, 283.0, 299.0, 317.0, 319.0, 293.0, 289.0, 289.0, 293.0, 288.0, 294.0, 296.0, 291.0, 322.0, 314.0, 293.0, 289.0, 288.0, 294.0, 319.0, 314.0, 317.0, 319.0, 294.0, 288.0, 313.0, 317.0, 316.0, 314.0, 319.0, 317.0, 293.0, 294.0, 319.0, 314.0, 319.0, 314.0, 319.0, 317.0, 317.0, 319.0, 277.0, 293.0, 295.0, 281.0, 308.0, 316.0, 296.0, 291.0, 319.0, 317.0, 296.0, 286.0, 291.0, 296.0, 288.0, 294.0, 290.0, 286.0, 293.0, 286.0, 296.0, 286.0, 316.0, 314.0, 313.0, 314.0, 319.0, 314.0, 299.0, 291.0, 319.0, 317.0, 268.0, 271.0, 288.0, 294.0, 311.0, 319.0, 321.0, 309.0, 291.0, 296.0, 301.0, 281.0, 319.0, 320.0, 293.0, 294.0, 286.0, 298.0, 288.0, 296.0, 314.0, 319.0, 319.0, 314.0, 293.0, 289.0, 293.0, 294.0, 314.0, 316.0, 313.0, 320.0, 316.0, 317.0, 291.0, 291.0, 288.0, 294.0, 316.0, 314.0, 316.0, 314.0, 319.0, 311.0, 324.0, 309.0, 296.0, 294.0, 314.0, 319.0, 316.0, 314.0, 311.0, 319.0, 301.0, 286.0, 319.0, 311.0, 293.0, 291.0, 299.0, 288.0, 274.0, 302.0, 286.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7968373127766308, "mean_processing_ms": 0.22783629500024907, "mean_inference_ms": 1.3993492052434942}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10464000, "num_steps_sampled": 5580800, "sample_time_ms": 21691.27, "load_time_ms": 37.308, "grad_time_ms": 10128.337, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0022821128368377686, "policy_loss": -0.005102970637381077, "vf_loss": 79.3483657836914, "vf_explained_var": 0.7713219523429871, "kl": 0.0023417342454195023, "entropy": 1.0995064973831177, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5580800, "episodes_total": 13952, "training_iteration": 436, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-31-35", "timestamp": 1660260695, "time_this_iter_s": 37.88511109352112, "time_total_s": 19104.73884487152, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19104.73884487152, "timesteps_since_restore": 5580800, "iterations_since_restore": 436, "perf": {"cpu_util_percent": 29.92452830188679, "ram_util_percent": 59.107547169811326}}
-{"episode_reward_max": 639.0, "episode_reward_min": 311.0, "episode_reward_mean": 607.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 151.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.775}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 187.95, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.21, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.55, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.66, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.75, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.84, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 639.0, 627.0, 639.0, 633.0, 630.0, 636.0, 587.0, 587.0, 639.0, 630.0, 633.0, 630.0, 582.0, 573.0, 639.0, 587.0, 636.0, 633.0, 630.0, 581.0, 633.0, 582.0, 633.0, 311.0, 636.0, 630.0, 633.0, 636.0, 584.0, 636.0, 636.0, 633.0, 590.0, 636.0, 539.0, 582.0, 630.0, 630.0, 587.0, 582.0, 639.0, 587.0, 584.0, 584.0, 633.0, 633.0, 582.0, 587.0, 630.0, 633.0, 633.0, 582.0, 582.0, 630.0, 630.0, 630.0, 633.0, 590.0, 633.0, 630.0, 630.0, 587.0, 630.0, 584.0, 587.0, 576.0, 582.0, 630.0, 579.0, 630.0, 582.0, 630.0, 587.0, 630.0, 522.0, 636.0, 590.0, 633.0, 582.0, 636.0, 636.0, 639.0, 582.0, 582.0, 633.0, 579.0, 579.0, 536.0, 582.0, 636.0, 630.0, 636.0, 587.0, 587.0, 639.0, 630.0, 633.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 311.0, 322.0, 317.0, 313.0, 314.0, 325.0, 314.0, 314.0, 319.0, 321.0, 309.0, 316.0, 320.0, 288.0, 299.0, 301.0, 286.0, 317.0, 322.0, 319.0, 311.0, 316.0, 317.0, 316.0, 314.0, 279.0, 303.0, 289.0, 284.0, 319.0, 320.0, 291.0, 296.0, 321.0, 315.0, 316.0, 317.0, 316.0, 314.0, 293.0, 288.0, 316.0, 317.0, 293.0, 289.0, 316.0, 317.0, 151.0, 160.0, 319.0, 317.0, 316.0, 314.0, 319.0, 314.0, 319.0, 317.0, 291.0, 293.0, 311.0, 325.0, 319.0, 317.0, 319.0, 314.0, 299.0, 291.0, 319.0, 317.0, 268.0, 271.0, 288.0, 294.0, 311.0, 319.0, 321.0, 309.0, 291.0, 296.0, 301.0, 281.0, 319.0, 320.0, 293.0, 294.0, 286.0, 298.0, 288.0, 296.0, 314.0, 319.0, 319.0, 314.0, 293.0, 289.0, 293.0, 294.0, 314.0, 316.0, 313.0, 320.0, 316.0, 317.0, 291.0, 291.0, 288.0, 294.0, 316.0, 314.0, 316.0, 314.0, 319.0, 311.0, 324.0, 309.0, 296.0, 294.0, 314.0, 319.0, 316.0, 314.0, 311.0, 319.0, 301.0, 286.0, 319.0, 311.0, 293.0, 291.0, 299.0, 288.0, 274.0, 302.0, 286.0, 296.0, 314.0, 316.0, 296.0, 283.0, 316.0, 314.0, 293.0, 289.0, 316.0, 314.0, 301.0, 286.0, 314.0, 316.0, 272.0, 250.0, 319.0, 317.0, 296.0, 294.0, 311.0, 322.0, 299.0, 283.0, 319.0, 317.0, 322.0, 314.0, 319.0, 320.0, 294.0, 288.0, 289.0, 293.0, 321.0, 312.0, 286.0, 293.0, 288.0, 291.0, 267.0, 269.0, 293.0, 289.0, 319.0, 317.0, 311.0, 319.0, 319.0, 317.0, 285.0, 302.0, 288.0, 299.0, 321.0, 318.0, 316.0, 314.0, 314.0, 319.0, 294.0, 288.0, 283.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 0.796054313311972, "mean_processing_ms": 0.22768002877826965, "mean_inference_ms": 1.3988094341608432}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10488000, "num_steps_sampled": 5593600, "sample_time_ms": 21763.453, "load_time_ms": 37.236, "grad_time_ms": 10078.856, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00570017471909523, "policy_loss": -0.002029874362051487, "vf_loss": 82.7793960571289, "vf_explained_var": 0.7683680653572083, "kl": 0.002793658524751663, "entropy": 1.095770001411438, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5593600, "episodes_total": 13984, "training_iteration": 437, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-32-06", "timestamp": 1660260726, "time_this_iter_s": 30.656537771224976, "time_total_s": 19135.395382642746, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19135.395382642746, "timesteps_since_restore": 5593600, "iterations_since_restore": 437, "perf": {"cpu_util_percent": 32.4046511627907, "ram_util_percent": 59.06744186046511}}
-{"episode_reward_max": 639.0, "episode_reward_min": 311.0, "episode_reward_mean": 609.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 151.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.745}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 187.89, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.41, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.23, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.77, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.1, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.04, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.75, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.94, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.1, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.04, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.1, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.04, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 639.0, 630.0, 636.0, 633.0, 636.0, 587.0, 639.0, 579.0, 636.0, 630.0, 627.0, 633.0, 639.0, 636.0, 564.0, 630.0, 630.0, 636.0, 636.0, 636.0, 639.0, 636.0, 633.0, 630.0, 633.0, 579.0, 627.0, 408.0, 579.0, 630.0, 582.0, 584.0, 587.0, 576.0, 582.0, 630.0, 579.0, 630.0, 582.0, 630.0, 587.0, 630.0, 522.0, 636.0, 590.0, 633.0, 582.0, 636.0, 636.0, 639.0, 582.0, 582.0, 633.0, 579.0, 579.0, 536.0, 582.0, 636.0, 630.0, 636.0, 587.0, 587.0, 639.0, 630.0, 633.0, 582.0, 582.0, 627.0, 639.0, 627.0, 639.0, 633.0, 630.0, 636.0, 587.0, 587.0, 639.0, 630.0, 633.0, 630.0, 582.0, 573.0, 639.0, 587.0, 636.0, 633.0, 630.0, 581.0, 633.0, 582.0, 633.0, 311.0, 636.0, 630.0, 633.0, 636.0, 584.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [309.0, 318.0, 319.0, 320.0, 316.0, 314.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 285.0, 302.0, 319.0, 320.0, 285.0, 294.0, 319.0, 317.0, 311.0, 319.0, 313.0, 314.0, 313.0, 320.0, 319.0, 320.0, 324.0, 312.0, 274.0, 290.0, 315.0, 315.0, 316.0, 314.0, 319.0, 317.0, 319.0, 317.0, 319.0, 317.0, 319.0, 320.0, 319.0, 317.0, 314.0, 319.0, 316.0, 314.0, 319.0, 314.0, 285.0, 294.0, 316.0, 311.0, 210.0, 198.0, 283.0, 296.0, 316.0, 314.0, 290.0, 292.0, 293.0, 291.0, 299.0, 288.0, 274.0, 302.0, 286.0, 296.0, 314.0, 316.0, 296.0, 283.0, 316.0, 314.0, 293.0, 289.0, 316.0, 314.0, 301.0, 286.0, 314.0, 316.0, 272.0, 250.0, 319.0, 317.0, 296.0, 294.0, 311.0, 322.0, 299.0, 283.0, 319.0, 317.0, 322.0, 314.0, 319.0, 320.0, 294.0, 288.0, 289.0, 293.0, 321.0, 312.0, 286.0, 293.0, 288.0, 291.0, 267.0, 269.0, 293.0, 289.0, 319.0, 317.0, 311.0, 319.0, 319.0, 317.0, 285.0, 302.0, 288.0, 299.0, 321.0, 318.0, 316.0, 314.0, 314.0, 319.0, 294.0, 288.0, 283.0, 299.0, 316.0, 311.0, 322.0, 317.0, 313.0, 314.0, 325.0, 314.0, 314.0, 319.0, 321.0, 309.0, 316.0, 320.0, 288.0, 299.0, 301.0, 286.0, 317.0, 322.0, 319.0, 311.0, 316.0, 317.0, 316.0, 314.0, 279.0, 303.0, 289.0, 284.0, 319.0, 320.0, 291.0, 296.0, 321.0, 315.0, 316.0, 317.0, 316.0, 314.0, 293.0, 288.0, 316.0, 317.0, 293.0, 289.0, 316.0, 317.0, 151.0, 160.0, 319.0, 317.0, 316.0, 314.0, 319.0, 314.0, 319.0, 317.0, 291.0, 293.0, 311.0, 325.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7952795152199544, "mean_processing_ms": 0.22752629484006134, "mean_inference_ms": 1.3983582965229155}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10512000, "num_steps_sampled": 5606400, "sample_time_ms": 21892.052, "load_time_ms": 37.192, "grad_time_ms": 10267.0, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008096967940218747, "policy_loss": -0.008205131627619267, "vf_loss": 79.47277069091797, "vf_explained_var": 0.777022659778595, "kl": 0.002324033295735717, "entropy": 1.1036995649337769, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5606400, "episodes_total": 14016, "training_iteration": 438, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-32-41", "timestamp": 1660260761, "time_this_iter_s": 34.68048119544983, "time_total_s": 19170.075863838196, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19170.075863838196, "timesteps_since_restore": 5606400, "iterations_since_restore": 438, "perf": {"cpu_util_percent": 33.92857142857143, "ram_util_percent": 59.06734693877551}}
-{"episode_reward_max": 639.0, "episode_reward_min": 311.0, "episode_reward_mean": 616.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 151.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 308.265}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 189.33, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.54, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.7, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 18.11, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.12, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.34, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.91, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.89, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.77, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.74, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.12, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.34, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.12, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.34, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 639.0, 636.0, 630.0, 579.0, 587.0, 627.0, 636.0, 633.0, 633.0, 636.0, 636.0, 636.0, 630.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 639.0, 630.0, 627.0, 636.0, 579.0, 636.0, 590.0, 636.0, 570.0, 636.0, 579.0, 639.0, 630.0, 633.0, 582.0, 582.0, 627.0, 639.0, 627.0, 639.0, 633.0, 630.0, 636.0, 587.0, 587.0, 639.0, 630.0, 633.0, 630.0, 582.0, 573.0, 639.0, 587.0, 636.0, 633.0, 630.0, 581.0, 633.0, 582.0, 633.0, 311.0, 636.0, 630.0, 633.0, 636.0, 584.0, 636.0, 636.0, 627.0, 639.0, 630.0, 636.0, 633.0, 636.0, 587.0, 639.0, 579.0, 636.0, 630.0, 627.0, 633.0, 639.0, 636.0, 564.0, 630.0, 630.0, 636.0, 636.0, 636.0, 639.0, 636.0, 633.0, 630.0, 633.0, 579.0, 627.0, 408.0, 579.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 311.0, 322.0, 317.0, 319.0, 317.0, 318.0, 312.0, 277.0, 302.0, 291.0, 296.0, 313.0, 314.0, 319.0, 317.0, 315.0, 318.0, 314.0, 319.0, 316.0, 320.0, 324.0, 312.0, 319.0, 317.0, 321.0, 309.0, 316.0, 314.0, 311.0, 319.0, 316.0, 317.0, 319.0, 317.0, 324.0, 312.0, 316.0, 320.0, 319.0, 320.0, 319.0, 311.0, 310.0, 317.0, 319.0, 317.0, 293.0, 286.0, 322.0, 314.0, 296.0, 294.0, 314.0, 322.0, 290.0, 280.0, 311.0, 325.0, 288.0, 291.0, 319.0, 320.0, 316.0, 314.0, 314.0, 319.0, 294.0, 288.0, 283.0, 299.0, 316.0, 311.0, 322.0, 317.0, 313.0, 314.0, 325.0, 314.0, 314.0, 319.0, 321.0, 309.0, 316.0, 320.0, 288.0, 299.0, 301.0, 286.0, 317.0, 322.0, 319.0, 311.0, 316.0, 317.0, 316.0, 314.0, 279.0, 303.0, 289.0, 284.0, 319.0, 320.0, 291.0, 296.0, 321.0, 315.0, 316.0, 317.0, 316.0, 314.0, 293.0, 288.0, 316.0, 317.0, 293.0, 289.0, 316.0, 317.0, 151.0, 160.0, 319.0, 317.0, 316.0, 314.0, 319.0, 314.0, 319.0, 317.0, 291.0, 293.0, 311.0, 325.0, 319.0, 317.0, 309.0, 318.0, 319.0, 320.0, 316.0, 314.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 285.0, 302.0, 319.0, 320.0, 285.0, 294.0, 319.0, 317.0, 311.0, 319.0, 313.0, 314.0, 313.0, 320.0, 319.0, 320.0, 324.0, 312.0, 274.0, 290.0, 315.0, 315.0, 316.0, 314.0, 319.0, 317.0, 319.0, 317.0, 319.0, 317.0, 319.0, 320.0, 319.0, 317.0, 314.0, 319.0, 316.0, 314.0, 319.0, 314.0, 285.0, 294.0, 316.0, 311.0, 210.0, 198.0, 283.0, 296.0, 316.0, 314.0, 290.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7945004798552335, "mean_processing_ms": 0.22737090000954274, "mean_inference_ms": 1.3975837411593142}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10536000, "num_steps_sampled": 5619200, "sample_time_ms": 21744.649, "load_time_ms": 36.586, "grad_time_ms": 10113.951, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006775472429580986, "policy_loss": -0.006301699206233025, "vf_loss": 75.32054138183594, "vf_explained_var": 0.7757834792137146, "kl": 0.0017814143793657422, "entropy": 1.1056231260299683, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5619200, "episodes_total": 14048, "training_iteration": 439, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-33-09", "timestamp": 1660260789, "time_this_iter_s": 28.622015953063965, "time_total_s": 19198.69787979126, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19198.69787979126, "timesteps_since_restore": 5619200, "iterations_since_restore": 439, "perf": {"cpu_util_percent": 31.939024390243897, "ram_util_percent": 59.1219512195122}}
-{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 620.15, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 310.075}, "custom_metrics": {"sparse_reward_mean": 215.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 190.15, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.59, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.78, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.36, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 18.29, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.46, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.77, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.98, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.74, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.74, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.72, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.46, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.46, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 633.0, 627.0, 582.0, 630.0, 636.0, 636.0, 630.0, 639.0, 627.0, 579.0, 624.0, 636.0, 630.0, 582.0, 636.0, 639.0, 633.0, 633.0, 639.0, 576.0, 636.0, 573.0, 630.0, 587.0, 633.0, 630.0, 636.0, 639.0, 633.0, 630.0, 587.0, 636.0, 584.0, 636.0, 636.0, 627.0, 639.0, 630.0, 636.0, 633.0, 636.0, 587.0, 639.0, 579.0, 636.0, 630.0, 627.0, 633.0, 639.0, 636.0, 564.0, 630.0, 630.0, 636.0, 636.0, 636.0, 639.0, 636.0, 633.0, 630.0, 633.0, 579.0, 627.0, 408.0, 579.0, 630.0, 582.0, 627.0, 639.0, 636.0, 630.0, 579.0, 587.0, 627.0, 636.0, 633.0, 633.0, 636.0, 636.0, 636.0, 630.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 639.0, 630.0, 627.0, 636.0, 579.0, 636.0, 590.0, 636.0, 570.0, 636.0, 579.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 294.0, 319.0, 314.0, 311.0, 316.0, 288.0, 294.0, 314.0, 316.0, 319.0, 317.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 306.0, 321.0, 293.0, 286.0, 307.0, 317.0, 319.0, 317.0, 311.0, 319.0, 293.0, 289.0, 321.0, 315.0, 322.0, 317.0, 317.0, 316.0, 311.0, 322.0, 322.0, 317.0, 293.0, 283.0, 319.0, 317.0, 279.0, 294.0, 308.0, 322.0, 285.0, 302.0, 316.0, 317.0, 305.0, 325.0, 319.0, 317.0, 319.0, 320.0, 319.0, 314.0, 318.0, 312.0, 288.0, 299.0, 319.0, 317.0, 291.0, 293.0, 311.0, 325.0, 319.0, 317.0, 309.0, 318.0, 319.0, 320.0, 316.0, 314.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 285.0, 302.0, 319.0, 320.0, 285.0, 294.0, 319.0, 317.0, 311.0, 319.0, 313.0, 314.0, 313.0, 320.0, 319.0, 320.0, 324.0, 312.0, 274.0, 290.0, 315.0, 315.0, 316.0, 314.0, 319.0, 317.0, 319.0, 317.0, 319.0, 317.0, 319.0, 320.0, 319.0, 317.0, 314.0, 319.0, 316.0, 314.0, 319.0, 314.0, 285.0, 294.0, 316.0, 311.0, 210.0, 198.0, 283.0, 296.0, 316.0, 314.0, 290.0, 292.0, 316.0, 311.0, 322.0, 317.0, 319.0, 317.0, 318.0, 312.0, 277.0, 302.0, 291.0, 296.0, 313.0, 314.0, 319.0, 317.0, 315.0, 318.0, 314.0, 319.0, 316.0, 320.0, 324.0, 312.0, 319.0, 317.0, 321.0, 309.0, 316.0, 314.0, 311.0, 319.0, 316.0, 317.0, 319.0, 317.0, 324.0, 312.0, 316.0, 320.0, 319.0, 320.0, 319.0, 311.0, 310.0, 317.0, 319.0, 317.0, 293.0, 286.0, 322.0, 314.0, 296.0, 294.0, 314.0, 322.0, 290.0, 280.0, 311.0, 325.0, 288.0, 291.0, 319.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7937258148671773, "mean_processing_ms": 0.22721751341177562, "mean_inference_ms": 1.3967610737623508}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10560000, "num_steps_sampled": 5632000, "sample_time_ms": 21732.229, "load_time_ms": 36.475, "grad_time_ms": 9880.218, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00015868060290813446, "policy_loss": -0.007246671710163355, "vf_loss": 76.3777847290039, "vf_explained_var": 0.7677585482597351, "kl": 0.0021524711046367884, "entropy": 1.099584698677063, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5632000, "episodes_total": 14080, "training_iteration": 440, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-33-39", "timestamp": 1660260819, "time_this_iter_s": 29.637184143066406, "time_total_s": 19228.335063934326, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19228.335063934326, "timesteps_since_restore": 5632000, "iterations_since_restore": 440, "perf": {"cpu_util_percent": 33.75, "ram_util_percent": 59.899999999999984}}
-{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 617.76, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 308.88}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 189.36, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.6, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 18.01, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.27, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.84, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 6.16, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.69, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.97, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.75, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.73, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.27, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.27, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 627.0, 627.0, 636.0, 582.0, 630.0, 633.0, 639.0, 570.0, 582.0, 576.0, 636.0, 639.0, 636.0, 582.0, 630.0, 627.0, 624.0, 587.0, 630.0, 633.0, 582.0, 630.0, 636.0, 582.0, 576.0, 633.0, 633.0, 630.0, 639.0, 636.0, 408.0, 579.0, 630.0, 582.0, 627.0, 639.0, 636.0, 630.0, 579.0, 587.0, 627.0, 636.0, 633.0, 633.0, 636.0, 636.0, 636.0, 630.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 639.0, 630.0, 627.0, 636.0, 579.0, 636.0, 590.0, 636.0, 570.0, 636.0, 579.0, 639.0, 584.0, 633.0, 627.0, 582.0, 630.0, 636.0, 636.0, 630.0, 639.0, 627.0, 579.0, 624.0, 636.0, 630.0, 582.0, 636.0, 639.0, 633.0, 633.0, 639.0, 576.0, 636.0, 573.0, 630.0, 587.0, 633.0, 630.0, 636.0, 639.0, 633.0, 630.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 319.0, 314.0, 314.0, 313.0, 316.0, 311.0, 320.0, 316.0, 288.0, 294.0, 311.0, 319.0, 319.0, 314.0, 319.0, 320.0, 289.0, 281.0, 291.0, 291.0, 288.0, 288.0, 316.0, 320.0, 319.0, 320.0, 319.0, 317.0, 295.0, 287.0, 313.0, 317.0, 319.0, 308.0, 310.0, 314.0, 296.0, 291.0, 314.0, 316.0, 313.0, 320.0, 301.0, 281.0, 316.0, 314.0, 314.0, 322.0, 291.0, 291.0, 288.0, 288.0, 316.0, 317.0, 319.0, 314.0, 316.0, 314.0, 320.0, 319.0, 319.0, 317.0, 210.0, 198.0, 283.0, 296.0, 316.0, 314.0, 290.0, 292.0, 316.0, 311.0, 322.0, 317.0, 319.0, 317.0, 318.0, 312.0, 277.0, 302.0, 291.0, 296.0, 313.0, 314.0, 319.0, 317.0, 315.0, 318.0, 314.0, 319.0, 316.0, 320.0, 324.0, 312.0, 319.0, 317.0, 321.0, 309.0, 316.0, 314.0, 311.0, 319.0, 316.0, 317.0, 319.0, 317.0, 324.0, 312.0, 316.0, 320.0, 319.0, 320.0, 319.0, 311.0, 310.0, 317.0, 319.0, 317.0, 293.0, 286.0, 322.0, 314.0, 296.0, 294.0, 314.0, 322.0, 290.0, 280.0, 311.0, 325.0, 288.0, 291.0, 319.0, 320.0, 290.0, 294.0, 319.0, 314.0, 311.0, 316.0, 288.0, 294.0, 314.0, 316.0, 319.0, 317.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 306.0, 321.0, 293.0, 286.0, 307.0, 317.0, 319.0, 317.0, 311.0, 319.0, 293.0, 289.0, 321.0, 315.0, 322.0, 317.0, 317.0, 316.0, 311.0, 322.0, 322.0, 317.0, 293.0, 283.0, 319.0, 317.0, 279.0, 294.0, 308.0, 322.0, 285.0, 302.0, 316.0, 317.0, 305.0, 325.0, 319.0, 317.0, 319.0, 320.0, 319.0, 314.0, 318.0, 312.0, 288.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7929580463765498, "mean_processing_ms": 0.22706572704499173, "mean_inference_ms": 1.3960311893008097}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10584000, "num_steps_sampled": 5644800, "sample_time_ms": 22196.121, "load_time_ms": 36.22, "grad_time_ms": 10110.176, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019730820786207914, "policy_loss": -0.005001601297408342, "vf_loss": 75.2809066772461, "vf_explained_var": 0.770819902420044, "kl": 0.0019049126422032714, "entropy": 1.106797695159912, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5644800, "episodes_total": 14112, "training_iteration": 441, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-34-15", "timestamp": 1660260855, "time_this_iter_s": 36.16889190673828, "time_total_s": 19264.503955841064, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19264.503955841064, "timesteps_since_restore": 5644800, "iterations_since_restore": 441, "perf": {"cpu_util_percent": 30.368627450980394, "ram_util_percent": 59.160784313725486}}
-{"episode_reward_max": 639.0, "episode_reward_min": 564.0, "episode_reward_mean": 618.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 279.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 309.15}, "custom_metrics": {"sparse_reward_mean": 214.6, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 189.1, "shaped_reward_min": 164, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.65, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.51, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.37, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.85, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.09, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.74, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 6.31, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.02, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.91, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.09, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.09, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 624.0, 582.0, 636.0, 630.0, 630.0, 633.0, 587.0, 567.0, 630.0, 639.0, 630.0, 636.0, 633.0, 633.0, 630.0, 636.0, 639.0, 636.0, 630.0, 630.0, 630.0, 627.0, 630.0, 582.0, 582.0, 587.0, 630.0, 630.0, 564.0, 576.0, 636.0, 570.0, 636.0, 579.0, 639.0, 584.0, 633.0, 627.0, 582.0, 630.0, 636.0, 636.0, 630.0, 639.0, 627.0, 579.0, 624.0, 636.0, 630.0, 582.0, 636.0, 639.0, 633.0, 633.0, 639.0, 576.0, 636.0, 573.0, 630.0, 587.0, 633.0, 630.0, 636.0, 639.0, 633.0, 630.0, 587.0, 633.0, 633.0, 627.0, 627.0, 636.0, 582.0, 630.0, 633.0, 639.0, 570.0, 582.0, 576.0, 636.0, 639.0, 636.0, 582.0, 630.0, 627.0, 624.0, 587.0, 630.0, 633.0, 582.0, 630.0, 636.0, 582.0, 576.0, 633.0, 633.0, 630.0, 639.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 316.0, 313.0, 311.0, 291.0, 291.0, 319.0, 317.0, 316.0, 314.0, 313.0, 317.0, 316.0, 317.0, 290.0, 297.0, 282.0, 285.0, 319.0, 311.0, 319.0, 320.0, 316.0, 314.0, 314.0, 322.0, 319.0, 314.0, 311.0, 322.0, 314.0, 316.0, 314.0, 322.0, 322.0, 317.0, 319.0, 317.0, 319.0, 311.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0, 316.0, 314.0, 291.0, 291.0, 290.0, 292.0, 296.0, 291.0, 316.0, 314.0, 316.0, 314.0, 281.0, 283.0, 290.0, 286.0, 319.0, 317.0, 290.0, 280.0, 311.0, 325.0, 288.0, 291.0, 319.0, 320.0, 290.0, 294.0, 319.0, 314.0, 311.0, 316.0, 288.0, 294.0, 314.0, 316.0, 319.0, 317.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 306.0, 321.0, 293.0, 286.0, 307.0, 317.0, 319.0, 317.0, 311.0, 319.0, 293.0, 289.0, 321.0, 315.0, 322.0, 317.0, 317.0, 316.0, 311.0, 322.0, 322.0, 317.0, 293.0, 283.0, 319.0, 317.0, 279.0, 294.0, 308.0, 322.0, 285.0, 302.0, 316.0, 317.0, 305.0, 325.0, 319.0, 317.0, 319.0, 320.0, 319.0, 314.0, 318.0, 312.0, 288.0, 299.0, 319.0, 314.0, 319.0, 314.0, 314.0, 313.0, 316.0, 311.0, 320.0, 316.0, 288.0, 294.0, 311.0, 319.0, 319.0, 314.0, 319.0, 320.0, 289.0, 281.0, 291.0, 291.0, 288.0, 288.0, 316.0, 320.0, 319.0, 320.0, 319.0, 317.0, 295.0, 287.0, 313.0, 317.0, 319.0, 308.0, 310.0, 314.0, 296.0, 291.0, 314.0, 316.0, 313.0, 320.0, 301.0, 281.0, 316.0, 314.0, 314.0, 322.0, 291.0, 291.0, 288.0, 288.0, 316.0, 317.0, 319.0, 314.0, 316.0, 314.0, 320.0, 319.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7921926702018212, "mean_processing_ms": 0.22691451487922393, "mean_inference_ms": 1.3953653520568468}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10608000, "num_steps_sampled": 5657600, "sample_time_ms": 21841.972, "load_time_ms": 35.995, "grad_time_ms": 10215.543, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0027040394488722086, "policy_loss": -0.004097369499504566, "vf_loss": 73.54324340820312, "vf_explained_var": 0.7729549407958984, "kl": 0.0019481302006170154, "entropy": 1.105837106704712, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5657600, "episodes_total": 14144, "training_iteration": 442, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-34-49", "timestamp": 1660260889, "time_this_iter_s": 33.27770400047302, "time_total_s": 19297.781659841537, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19297.781659841537, "timesteps_since_restore": 5657600, "iterations_since_restore": 442, "perf": {"cpu_util_percent": 29.43404255319148, "ram_util_percent": 59.22553191489361}}
-{"episode_reward_max": 639.0, "episode_reward_min": 564.0, "episode_reward_mean": 615.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 280.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.955}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 188.71, "shaped_reward_min": 164, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.74, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.35, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.42, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.85, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.7, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 6.2, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.92, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.42, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.85, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.42, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.85, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 579.0, 633.0, 636.0, 636.0, 636.0, 567.0, 630.0, 587.0, 587.0, 587.0, 630.0, 639.0, 579.0, 636.0, 630.0, 630.0, 587.0, 633.0, 579.0, 587.0, 587.0, 582.0, 630.0, 630.0, 579.0, 636.0, 633.0, 627.0, 630.0, 587.0, 579.0, 639.0, 633.0, 630.0, 587.0, 633.0, 633.0, 627.0, 627.0, 636.0, 582.0, 630.0, 633.0, 639.0, 570.0, 582.0, 576.0, 636.0, 639.0, 636.0, 582.0, 630.0, 627.0, 624.0, 587.0, 630.0, 633.0, 582.0, 630.0, 636.0, 582.0, 576.0, 633.0, 633.0, 630.0, 639.0, 636.0, 627.0, 624.0, 582.0, 636.0, 630.0, 630.0, 633.0, 587.0, 567.0, 630.0, 639.0, 630.0, 636.0, 633.0, 633.0, 630.0, 636.0, 639.0, 636.0, 630.0, 630.0, 630.0, 627.0, 630.0, 582.0, 582.0, 587.0, 630.0, 630.0, 564.0, 576.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 296.0, 283.0, 314.0, 319.0, 319.0, 317.0, 317.0, 319.0, 319.0, 317.0, 280.0, 287.0, 311.0, 319.0, 288.0, 299.0, 291.0, 296.0, 296.0, 291.0, 316.0, 314.0, 314.0, 325.0, 285.0, 294.0, 319.0, 317.0, 314.0, 316.0, 319.0, 311.0, 293.0, 294.0, 314.0, 319.0, 283.0, 296.0, 288.0, 299.0, 301.0, 286.0, 296.0, 286.0, 316.0, 314.0, 316.0, 314.0, 298.0, 281.0, 319.0, 317.0, 317.0, 316.0, 308.0, 319.0, 311.0, 319.0, 295.0, 292.0, 290.0, 289.0, 319.0, 320.0, 319.0, 314.0, 318.0, 312.0, 288.0, 299.0, 319.0, 314.0, 319.0, 314.0, 314.0, 313.0, 316.0, 311.0, 320.0, 316.0, 288.0, 294.0, 311.0, 319.0, 319.0, 314.0, 319.0, 320.0, 289.0, 281.0, 291.0, 291.0, 288.0, 288.0, 316.0, 320.0, 319.0, 320.0, 319.0, 317.0, 295.0, 287.0, 313.0, 317.0, 319.0, 308.0, 310.0, 314.0, 296.0, 291.0, 314.0, 316.0, 313.0, 320.0, 301.0, 281.0, 316.0, 314.0, 314.0, 322.0, 291.0, 291.0, 288.0, 288.0, 316.0, 317.0, 319.0, 314.0, 316.0, 314.0, 320.0, 319.0, 319.0, 317.0, 311.0, 316.0, 313.0, 311.0, 291.0, 291.0, 319.0, 317.0, 316.0, 314.0, 313.0, 317.0, 316.0, 317.0, 290.0, 297.0, 282.0, 285.0, 319.0, 311.0, 319.0, 320.0, 316.0, 314.0, 314.0, 322.0, 319.0, 314.0, 311.0, 322.0, 314.0, 316.0, 314.0, 322.0, 322.0, 317.0, 319.0, 317.0, 319.0, 311.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0, 316.0, 314.0, 291.0, 291.0, 290.0, 292.0, 296.0, 291.0, 316.0, 314.0, 316.0, 314.0, 281.0, 283.0, 290.0, 286.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7914282474248366, "mean_processing_ms": 0.22676310167632485, "mean_inference_ms": 1.3947257800951314}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10632000, "num_steps_sampled": 5670400, "sample_time_ms": 21982.654, "load_time_ms": 36.099, "grad_time_ms": 10196.436, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0026817016769200563, "policy_loss": -0.004366503097116947, "vf_loss": 76.00869750976562, "vf_explained_var": 0.7792714238166809, "kl": 0.0014469980960711837, "entropy": 1.1053307056427002, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5670400, "episodes_total": 14176, "training_iteration": 443, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-35-20", "timestamp": 1660260920, "time_this_iter_s": 31.264520168304443, "time_total_s": 19329.046180009842, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19329.046180009842, "timesteps_since_restore": 5670400, "iterations_since_restore": 443, "perf": {"cpu_util_percent": 30.41136363636364, "ram_util_percent": 59.12272727272728}}
-{"episode_reward_max": 639.0, "episode_reward_min": 479.0, "episode_reward_mean": 614.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 239.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.27}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.54, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.78, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.25, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.3, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.54, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.47, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.69, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.11, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.22, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.47, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.69, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.47, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.69, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 636.0, 633.0, 624.0, 636.0, 630.0, 633.0, 639.0, 570.0, 636.0, 630.0, 636.0, 582.0, 587.0, 579.0, 584.0, 633.0, 582.0, 587.0, 636.0, 633.0, 621.0, 630.0, 633.0, 584.0, 627.0, 630.0, 633.0, 479.0, 587.0, 587.0, 633.0, 630.0, 639.0, 636.0, 627.0, 624.0, 582.0, 636.0, 630.0, 630.0, 633.0, 587.0, 567.0, 630.0, 639.0, 630.0, 636.0, 633.0, 633.0, 630.0, 636.0, 639.0, 636.0, 630.0, 630.0, 630.0, 627.0, 630.0, 582.0, 582.0, 587.0, 630.0, 630.0, 564.0, 576.0, 636.0, 633.0, 579.0, 633.0, 636.0, 636.0, 636.0, 567.0, 630.0, 587.0, 587.0, 587.0, 630.0, 639.0, 579.0, 636.0, 630.0, 630.0, 587.0, 633.0, 579.0, 587.0, 587.0, 582.0, 630.0, 630.0, 579.0, 636.0, 633.0, 627.0, 630.0, 587.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 311.0, 319.0, 314.0, 322.0, 314.0, 319.0, 314.0, 310.0, 314.0, 322.0, 311.0, 319.0, 316.0, 317.0, 317.0, 322.0, 288.0, 282.0, 319.0, 317.0, 311.0, 319.0, 319.0, 317.0, 291.0, 291.0, 298.0, 289.0, 294.0, 285.0, 288.0, 296.0, 314.0, 319.0, 296.0, 286.0, 299.0, 288.0, 319.0, 317.0, 316.0, 317.0, 310.0, 311.0, 319.0, 311.0, 322.0, 311.0, 288.0, 296.0, 310.0, 317.0, 313.0, 317.0, 316.0, 317.0, 239.0, 240.0, 293.0, 294.0, 296.0, 291.0, 319.0, 314.0, 316.0, 314.0, 320.0, 319.0, 319.0, 317.0, 311.0, 316.0, 313.0, 311.0, 291.0, 291.0, 319.0, 317.0, 316.0, 314.0, 313.0, 317.0, 316.0, 317.0, 290.0, 297.0, 282.0, 285.0, 319.0, 311.0, 319.0, 320.0, 316.0, 314.0, 314.0, 322.0, 319.0, 314.0, 311.0, 322.0, 314.0, 316.0, 314.0, 322.0, 322.0, 317.0, 319.0, 317.0, 319.0, 311.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0, 316.0, 314.0, 291.0, 291.0, 290.0, 292.0, 296.0, 291.0, 316.0, 314.0, 316.0, 314.0, 281.0, 283.0, 290.0, 286.0, 319.0, 317.0, 314.0, 319.0, 296.0, 283.0, 314.0, 319.0, 319.0, 317.0, 317.0, 319.0, 319.0, 317.0, 280.0, 287.0, 311.0, 319.0, 288.0, 299.0, 291.0, 296.0, 296.0, 291.0, 316.0, 314.0, 314.0, 325.0, 285.0, 294.0, 319.0, 317.0, 314.0, 316.0, 319.0, 311.0, 293.0, 294.0, 314.0, 319.0, 283.0, 296.0, 288.0, 299.0, 301.0, 286.0, 296.0, 286.0, 316.0, 314.0, 316.0, 314.0, 298.0, 281.0, 319.0, 317.0, 317.0, 316.0, 308.0, 319.0, 311.0, 319.0, 295.0, 292.0, 290.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7906650471868468, "mean_processing_ms": 0.2266115620582618, "mean_inference_ms": 1.3940369234530334}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10656000, "num_steps_sampled": 5683200, "sample_time_ms": 22188.83, "load_time_ms": 36.224, "grad_time_ms": 10331.481, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0037986973766237497, "policy_loss": -0.0035794072318822145, "vf_loss": 79.28974151611328, "vf_explained_var": 0.7626357078552246, "kl": 0.0019579820800572634, "entropy": 1.1017413139343262, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5683200, "episodes_total": 14208, "training_iteration": 444, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-35-52", "timestamp": 1660260952, "time_this_iter_s": 32.303210973739624, "time_total_s": 19361.34939098358, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19361.34939098358, "timesteps_since_restore": 5683200, "iterations_since_restore": 444, "perf": {"cpu_util_percent": 30.893478260869564, "ram_util_percent": 59.16521739130435}}
-{"episode_reward_max": 639.0, "episode_reward_min": 479.0, "episode_reward_mean": 610.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 239.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 305.46}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.12, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.7, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.18, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.15, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.42, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.74, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 582.0, 582.0, 582.0, 630.0, 578.0, 576.0, 587.0, 587.0, 630.0, 636.0, 630.0, 639.0, 636.0, 636.0, 579.0, 630.0, 636.0, 627.0, 636.0, 578.0, 633.0, 633.0, 582.0, 636.0, 636.0, 587.0, 590.0, 639.0, 630.0, 630.0, 582.0, 630.0, 564.0, 576.0, 636.0, 633.0, 579.0, 633.0, 636.0, 636.0, 636.0, 567.0, 630.0, 587.0, 587.0, 587.0, 630.0, 639.0, 579.0, 636.0, 630.0, 630.0, 587.0, 633.0, 579.0, 587.0, 587.0, 582.0, 630.0, 630.0, 579.0, 636.0, 633.0, 627.0, 630.0, 587.0, 579.0, 636.0, 630.0, 636.0, 633.0, 624.0, 636.0, 630.0, 633.0, 639.0, 570.0, 636.0, 630.0, 636.0, 582.0, 587.0, 579.0, 584.0, 633.0, 582.0, 587.0, 636.0, 633.0, 621.0, 630.0, 633.0, 584.0, 627.0, 630.0, 633.0, 479.0, 587.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 305.0, 291.0, 291.0, 291.0, 291.0, 294.0, 288.0, 316.0, 314.0, 284.0, 294.0, 295.0, 281.0, 299.0, 288.0, 288.0, 299.0, 308.0, 322.0, 319.0, 317.0, 311.0, 319.0, 319.0, 320.0, 319.0, 317.0, 317.0, 319.0, 288.0, 291.0, 316.0, 314.0, 327.0, 309.0, 315.0, 312.0, 319.0, 317.0, 280.0, 298.0, 311.0, 322.0, 319.0, 314.0, 293.0, 289.0, 319.0, 317.0, 319.0, 317.0, 293.0, 294.0, 291.0, 299.0, 319.0, 320.0, 316.0, 314.0, 311.0, 319.0, 291.0, 291.0, 316.0, 314.0, 281.0, 283.0, 290.0, 286.0, 319.0, 317.0, 314.0, 319.0, 296.0, 283.0, 314.0, 319.0, 319.0, 317.0, 317.0, 319.0, 319.0, 317.0, 280.0, 287.0, 311.0, 319.0, 288.0, 299.0, 291.0, 296.0, 296.0, 291.0, 316.0, 314.0, 314.0, 325.0, 285.0, 294.0, 319.0, 317.0, 314.0, 316.0, 319.0, 311.0, 293.0, 294.0, 314.0, 319.0, 283.0, 296.0, 288.0, 299.0, 301.0, 286.0, 296.0, 286.0, 316.0, 314.0, 316.0, 314.0, 298.0, 281.0, 319.0, 317.0, 317.0, 316.0, 308.0, 319.0, 311.0, 319.0, 295.0, 292.0, 290.0, 289.0, 319.0, 317.0, 311.0, 319.0, 314.0, 322.0, 314.0, 319.0, 314.0, 310.0, 314.0, 322.0, 311.0, 319.0, 316.0, 317.0, 317.0, 322.0, 288.0, 282.0, 319.0, 317.0, 311.0, 319.0, 319.0, 317.0, 291.0, 291.0, 298.0, 289.0, 294.0, 285.0, 288.0, 296.0, 314.0, 319.0, 296.0, 286.0, 299.0, 288.0, 319.0, 317.0, 316.0, 317.0, 310.0, 311.0, 319.0, 311.0, 322.0, 311.0, 288.0, 296.0, 310.0, 317.0, 313.0, 317.0, 316.0, 317.0, 239.0, 240.0, 293.0, 294.0, 296.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7899054631660849, "mean_processing_ms": 0.22646096684642672, "mean_inference_ms": 1.3933342141949396}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10680000, "num_steps_sampled": 5696000, "sample_time_ms": 22242.608, "load_time_ms": 35.979, "grad_time_ms": 10328.703, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013677343958988786, "policy_loss": -0.006408216897398233, "vf_loss": 83.29845428466797, "vf_explained_var": 0.7579674124717712, "kl": 0.001941792550496757, "entropy": 1.1077739000320435, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5696000, "episodes_total": 14240, "training_iteration": 445, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-36-24", "timestamp": 1660260984, "time_this_iter_s": 31.836724996566772, "time_total_s": 19393.18611598015, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19393.18611598015, "timesteps_since_restore": 5696000, "iterations_since_restore": 445, "perf": {"cpu_util_percent": 30.162222222222226, "ram_util_percent": 59.18000000000001}}
-{"episode_reward_max": 639.0, "episode_reward_min": 456.0, "episode_reward_mean": 608.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 304.41}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.22, "shaped_reward_min": 136, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.57, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.19, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.99, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.77, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.65, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 579.0, 570.0, 582.0, 636.0, 636.0, 630.0, 587.0, 630.0, 630.0, 456.0, 573.0, 630.0, 579.0, 576.0, 633.0, 587.0, 633.0, 630.0, 630.0, 630.0, 633.0, 582.0, 579.0, 630.0, 639.0, 630.0, 639.0, 587.0, 627.0, 522.0, 582.0, 627.0, 630.0, 587.0, 579.0, 636.0, 630.0, 636.0, 633.0, 624.0, 636.0, 630.0, 633.0, 639.0, 570.0, 636.0, 630.0, 636.0, 582.0, 587.0, 579.0, 584.0, 633.0, 582.0, 587.0, 636.0, 633.0, 621.0, 630.0, 633.0, 584.0, 627.0, 630.0, 633.0, 479.0, 587.0, 587.0, 587.0, 582.0, 582.0, 582.0, 630.0, 578.0, 576.0, 587.0, 587.0, 630.0, 636.0, 630.0, 639.0, 636.0, 636.0, 579.0, 630.0, 636.0, 627.0, 636.0, 578.0, 633.0, 633.0, 582.0, 636.0, 636.0, 587.0, 590.0, 639.0, 630.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [310.0, 317.0, 290.0, 289.0, 288.0, 282.0, 291.0, 291.0, 324.0, 312.0, 314.0, 322.0, 314.0, 316.0, 299.0, 288.0, 316.0, 314.0, 316.0, 314.0, 228.0, 228.0, 281.0, 292.0, 316.0, 314.0, 293.0, 286.0, 288.0, 288.0, 316.0, 317.0, 293.0, 294.0, 314.0, 319.0, 321.0, 309.0, 316.0, 314.0, 322.0, 308.0, 319.0, 314.0, 293.0, 289.0, 288.0, 291.0, 316.0, 314.0, 319.0, 320.0, 321.0, 309.0, 319.0, 320.0, 299.0, 288.0, 319.0, 308.0, 251.0, 271.0, 296.0, 286.0, 308.0, 319.0, 311.0, 319.0, 295.0, 292.0, 290.0, 289.0, 319.0, 317.0, 311.0, 319.0, 314.0, 322.0, 314.0, 319.0, 314.0, 310.0, 314.0, 322.0, 311.0, 319.0, 316.0, 317.0, 317.0, 322.0, 288.0, 282.0, 319.0, 317.0, 311.0, 319.0, 319.0, 317.0, 291.0, 291.0, 298.0, 289.0, 294.0, 285.0, 288.0, 296.0, 314.0, 319.0, 296.0, 286.0, 299.0, 288.0, 319.0, 317.0, 316.0, 317.0, 310.0, 311.0, 319.0, 311.0, 322.0, 311.0, 288.0, 296.0, 310.0, 317.0, 313.0, 317.0, 316.0, 317.0, 239.0, 240.0, 293.0, 294.0, 296.0, 291.0, 282.0, 305.0, 291.0, 291.0, 291.0, 291.0, 294.0, 288.0, 316.0, 314.0, 284.0, 294.0, 295.0, 281.0, 299.0, 288.0, 288.0, 299.0, 308.0, 322.0, 319.0, 317.0, 311.0, 319.0, 319.0, 320.0, 319.0, 317.0, 317.0, 319.0, 288.0, 291.0, 316.0, 314.0, 327.0, 309.0, 315.0, 312.0, 319.0, 317.0, 280.0, 298.0, 311.0, 322.0, 319.0, 314.0, 293.0, 289.0, 319.0, 317.0, 319.0, 317.0, 293.0, 294.0, 291.0, 299.0, 319.0, 320.0, 316.0, 314.0, 311.0, 319.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7891523423128755, "mean_processing_ms": 0.22631170679234558, "mean_inference_ms": 1.3926725732710878}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10704000, "num_steps_sampled": 5708800, "sample_time_ms": 21771.335, "load_time_ms": 36.027, "grad_time_ms": 10385.96, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0026401570066809654, "policy_loss": -0.0047949193976819515, "vf_loss": 79.88745880126953, "vf_explained_var": 0.7707352638244629, "kl": 0.0022178192157298326, "entropy": 1.107340693473816, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5708800, "episodes_total": 14272, "training_iteration": 446, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-36-58", "timestamp": 1660261018, "time_this_iter_s": 33.74682116508484, "time_total_s": 19426.932937145233, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19426.932937145233, "timesteps_since_restore": 5708800, "iterations_since_restore": 446, "perf": {"cpu_util_percent": 32.958333333333336, "ram_util_percent": 59.18541666666667}}
-{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 602.99, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 202.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 301.495}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.79, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.65, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.04, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.17, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.2, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.62, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.44, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.81, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.76, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.62, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.62, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 627.0, 587.0, 582.0, 636.0, 587.0, 627.0, 636.0, 630.0, 630.0, 587.0, 621.0, 575.0, 639.0, 633.0, 636.0, 584.0, 582.0, 573.0, 498.0, 587.0, 579.0, 408.0, 636.0, 587.0, 587.0, 633.0, 573.0, 636.0, 587.0, 636.0, 582.0, 633.0, 479.0, 587.0, 587.0, 587.0, 582.0, 582.0, 582.0, 630.0, 578.0, 576.0, 587.0, 587.0, 630.0, 636.0, 630.0, 639.0, 636.0, 636.0, 579.0, 630.0, 636.0, 627.0, 636.0, 578.0, 633.0, 633.0, 582.0, 636.0, 636.0, 587.0, 590.0, 639.0, 630.0, 630.0, 582.0, 627.0, 579.0, 570.0, 582.0, 636.0, 636.0, 630.0, 587.0, 630.0, 630.0, 456.0, 573.0, 630.0, 579.0, 576.0, 633.0, 587.0, 633.0, 630.0, 630.0, 630.0, 633.0, 582.0, 579.0, 630.0, 639.0, 630.0, 639.0, 587.0, 627.0, 522.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 308.0, 319.0, 286.0, 301.0, 293.0, 289.0, 316.0, 320.0, 291.0, 296.0, 313.0, 314.0, 314.0, 322.0, 311.0, 319.0, 314.0, 316.0, 288.0, 299.0, 310.0, 311.0, 288.0, 287.0, 317.0, 322.0, 321.0, 312.0, 319.0, 317.0, 302.0, 282.0, 291.0, 291.0, 282.0, 291.0, 253.0, 245.0, 290.0, 297.0, 288.0, 291.0, 206.0, 202.0, 319.0, 317.0, 293.0, 294.0, 288.0, 299.0, 316.0, 317.0, 285.0, 288.0, 319.0, 317.0, 299.0, 288.0, 319.0, 317.0, 293.0, 289.0, 316.0, 317.0, 239.0, 240.0, 293.0, 294.0, 296.0, 291.0, 282.0, 305.0, 291.0, 291.0, 291.0, 291.0, 294.0, 288.0, 316.0, 314.0, 284.0, 294.0, 295.0, 281.0, 299.0, 288.0, 288.0, 299.0, 308.0, 322.0, 319.0, 317.0, 311.0, 319.0, 319.0, 320.0, 319.0, 317.0, 317.0, 319.0, 288.0, 291.0, 316.0, 314.0, 327.0, 309.0, 315.0, 312.0, 319.0, 317.0, 280.0, 298.0, 311.0, 322.0, 319.0, 314.0, 293.0, 289.0, 319.0, 317.0, 319.0, 317.0, 293.0, 294.0, 291.0, 299.0, 319.0, 320.0, 316.0, 314.0, 311.0, 319.0, 291.0, 291.0, 310.0, 317.0, 290.0, 289.0, 288.0, 282.0, 291.0, 291.0, 324.0, 312.0, 314.0, 322.0, 314.0, 316.0, 299.0, 288.0, 316.0, 314.0, 316.0, 314.0, 228.0, 228.0, 281.0, 292.0, 316.0, 314.0, 293.0, 286.0, 288.0, 288.0, 316.0, 317.0, 293.0, 294.0, 314.0, 319.0, 321.0, 309.0, 316.0, 314.0, 322.0, 308.0, 319.0, 314.0, 293.0, 289.0, 288.0, 291.0, 316.0, 314.0, 319.0, 320.0, 321.0, 309.0, 319.0, 320.0, 299.0, 288.0, 319.0, 308.0, 251.0, 271.0, 296.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7883964067869212, "mean_processing_ms": 0.22616136742936413, "mean_inference_ms": 1.39185520221877}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10728000, "num_steps_sampled": 5721600, "sample_time_ms": 21601.13, "load_time_ms": 36.066, "grad_time_ms": 10530.811, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004505176562815905, "policy_loss": -0.00313469092361629, "vf_loss": 81.9230728149414, "vf_explained_var": 0.7751343250274658, "kl": 0.002040610648691654, "entropy": 1.1048672199249268, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5721600, "episodes_total": 14304, "training_iteration": 447, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-37-28", "timestamp": 1660261048, "time_this_iter_s": 30.400289058685303, "time_total_s": 19457.33322620392, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19457.33322620392, "timesteps_since_restore": 5721600, "iterations_since_restore": 447, "perf": {"cpu_util_percent": 36.12558139534883, "ram_util_percent": 59.20697674418605}}
-{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 604.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 202.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 302.49}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.38, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.19, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.14, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.39, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.53, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 630.0, 630.0, 630.0, 630.0, 627.0, 630.0, 606.0, 633.0, 519.0, 633.0, 636.0, 573.0, 630.0, 636.0, 636.0, 584.0, 636.0, 627.0, 630.0, 636.0, 633.0, 633.0, 579.0, 633.0, 582.0, 582.0, 590.0, 627.0, 576.0, 570.0, 582.0, 639.0, 630.0, 630.0, 582.0, 627.0, 579.0, 570.0, 582.0, 636.0, 636.0, 630.0, 587.0, 630.0, 630.0, 456.0, 573.0, 630.0, 579.0, 576.0, 633.0, 587.0, 633.0, 630.0, 630.0, 630.0, 633.0, 582.0, 579.0, 630.0, 639.0, 630.0, 639.0, 587.0, 627.0, 522.0, 582.0, 636.0, 627.0, 587.0, 582.0, 636.0, 587.0, 627.0, 636.0, 630.0, 630.0, 587.0, 621.0, 575.0, 639.0, 633.0, 636.0, 584.0, 582.0, 573.0, 498.0, 587.0, 579.0, 408.0, 636.0, 587.0, 587.0, 633.0, 573.0, 636.0, 587.0, 636.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 297.0, 316.0, 314.0, 318.0, 312.0, 316.0, 314.0, 319.0, 311.0, 313.0, 314.0, 316.0, 314.0, 304.0, 302.0, 316.0, 317.0, 267.0, 252.0, 322.0, 311.0, 313.0, 323.0, 278.0, 295.0, 316.0, 314.0, 314.0, 322.0, 319.0, 317.0, 298.0, 286.0, 316.0, 320.0, 313.0, 314.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 314.0, 319.0, 299.0, 280.0, 319.0, 314.0, 286.0, 296.0, 288.0, 294.0, 296.0, 294.0, 308.0, 319.0, 285.0, 291.0, 272.0, 298.0, 282.0, 300.0, 319.0, 320.0, 316.0, 314.0, 311.0, 319.0, 291.0, 291.0, 310.0, 317.0, 290.0, 289.0, 288.0, 282.0, 291.0, 291.0, 324.0, 312.0, 314.0, 322.0, 314.0, 316.0, 299.0, 288.0, 316.0, 314.0, 316.0, 314.0, 228.0, 228.0, 281.0, 292.0, 316.0, 314.0, 293.0, 286.0, 288.0, 288.0, 316.0, 317.0, 293.0, 294.0, 314.0, 319.0, 321.0, 309.0, 316.0, 314.0, 322.0, 308.0, 319.0, 314.0, 293.0, 289.0, 288.0, 291.0, 316.0, 314.0, 319.0, 320.0, 321.0, 309.0, 319.0, 320.0, 299.0, 288.0, 319.0, 308.0, 251.0, 271.0, 296.0, 286.0, 319.0, 317.0, 308.0, 319.0, 286.0, 301.0, 293.0, 289.0, 316.0, 320.0, 291.0, 296.0, 313.0, 314.0, 314.0, 322.0, 311.0, 319.0, 314.0, 316.0, 288.0, 299.0, 310.0, 311.0, 288.0, 287.0, 317.0, 322.0, 321.0, 312.0, 319.0, 317.0, 302.0, 282.0, 291.0, 291.0, 282.0, 291.0, 253.0, 245.0, 290.0, 297.0, 288.0, 291.0, 206.0, 202.0, 319.0, 317.0, 293.0, 294.0, 288.0, 299.0, 316.0, 317.0, 285.0, 288.0, 319.0, 317.0, 299.0, 288.0, 319.0, 317.0, 293.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7876488216946843, "mean_processing_ms": 0.22601276823059913, "mean_inference_ms": 1.3911503009579902}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10752000, "num_steps_sampled": 5734400, "sample_time_ms": 21718.783, "load_time_ms": 35.962, "grad_time_ms": 10435.936, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016340842703357339, "policy_loss": -0.0055215489119291306, "vf_loss": 77.1473388671875, "vf_explained_var": 0.7692286968231201, "kl": 0.001808720058761537, "entropy": 1.1181970834732056, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5734400, "episodes_total": 14336, "training_iteration": 448, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-38-03", "timestamp": 1660261083, "time_this_iter_s": 34.906923055648804, "time_total_s": 19492.240149259567, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19492.240149259567, "timesteps_since_restore": 5734400, "iterations_since_restore": 448, "perf": {"cpu_util_percent": 33.91428571428571, "ram_util_percent": 59.09591836734693}}
-{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 605.22, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 202.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 302.61}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.62, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.6, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.2, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.59, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.89, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.42, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.74, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 636.0, 587.0, 579.0, 636.0, 582.0, 606.0, 636.0, 587.0, 633.0, 630.0, 630.0, 636.0, 633.0, 633.0, 624.0, 587.0, 627.0, 576.0, 584.0, 570.0, 582.0, 630.0, 579.0, 636.0, 633.0, 576.0, 587.0, 636.0, 582.0, 630.0, 636.0, 587.0, 627.0, 522.0, 582.0, 636.0, 627.0, 587.0, 582.0, 636.0, 587.0, 627.0, 636.0, 630.0, 630.0, 587.0, 621.0, 575.0, 639.0, 633.0, 636.0, 584.0, 582.0, 573.0, 498.0, 587.0, 579.0, 408.0, 636.0, 587.0, 587.0, 633.0, 573.0, 636.0, 587.0, 636.0, 582.0, 587.0, 630.0, 630.0, 630.0, 630.0, 627.0, 630.0, 606.0, 633.0, 519.0, 633.0, 636.0, 573.0, 630.0, 636.0, 636.0, 584.0, 636.0, 627.0, 630.0, 636.0, 633.0, 633.0, 579.0, 633.0, 582.0, 582.0, 590.0, 627.0, 576.0, 570.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 322.0, 314.0, 290.0, 297.0, 283.0, 296.0, 324.0, 312.0, 285.0, 297.0, 302.0, 304.0, 319.0, 317.0, 287.0, 300.0, 314.0, 319.0, 316.0, 314.0, 311.0, 319.0, 319.0, 317.0, 317.0, 316.0, 316.0, 317.0, 313.0, 311.0, 298.0, 289.0, 313.0, 314.0, 294.0, 282.0, 296.0, 288.0, 279.0, 291.0, 301.0, 281.0, 316.0, 314.0, 293.0, 286.0, 319.0, 317.0, 316.0, 317.0, 285.0, 291.0, 302.0, 285.0, 319.0, 317.0, 288.0, 294.0, 319.0, 311.0, 319.0, 317.0, 299.0, 288.0, 319.0, 308.0, 251.0, 271.0, 296.0, 286.0, 319.0, 317.0, 308.0, 319.0, 286.0, 301.0, 293.0, 289.0, 316.0, 320.0, 291.0, 296.0, 313.0, 314.0, 314.0, 322.0, 311.0, 319.0, 314.0, 316.0, 288.0, 299.0, 310.0, 311.0, 288.0, 287.0, 317.0, 322.0, 321.0, 312.0, 319.0, 317.0, 302.0, 282.0, 291.0, 291.0, 282.0, 291.0, 253.0, 245.0, 290.0, 297.0, 288.0, 291.0, 206.0, 202.0, 319.0, 317.0, 293.0, 294.0, 288.0, 299.0, 316.0, 317.0, 285.0, 288.0, 319.0, 317.0, 299.0, 288.0, 319.0, 317.0, 293.0, 289.0, 290.0, 297.0, 316.0, 314.0, 318.0, 312.0, 316.0, 314.0, 319.0, 311.0, 313.0, 314.0, 316.0, 314.0, 304.0, 302.0, 316.0, 317.0, 267.0, 252.0, 322.0, 311.0, 313.0, 323.0, 278.0, 295.0, 316.0, 314.0, 314.0, 322.0, 319.0, 317.0, 298.0, 286.0, 316.0, 320.0, 313.0, 314.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 314.0, 319.0, 299.0, 280.0, 319.0, 314.0, 286.0, 296.0, 288.0, 294.0, 296.0, 294.0, 308.0, 319.0, 285.0, 291.0, 272.0, 298.0, 282.0, 300.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7869063801847372, "mean_processing_ms": 0.22586438145730278, "mean_inference_ms": 1.3903532948966142}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10776000, "num_steps_sampled": 5747200, "sample_time_ms": 21734.341, "load_time_ms": 36.357, "grad_time_ms": 10633.962, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015590289840474725, "policy_loss": -0.005610723048448563, "vf_loss": 77.2466812133789, "vf_explained_var": 0.7718032002449036, "kl": 0.0017093941569328308, "entropy": 1.1098432540893555, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5747200, "episodes_total": 14368, "training_iteration": 449, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-38-34", "timestamp": 1660261114, "time_this_iter_s": 30.762639045715332, "time_total_s": 19523.002788305283, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19523.002788305283, "timesteps_since_restore": 5747200, "iterations_since_restore": 449, "perf": {"cpu_util_percent": 34.67441860465116, "ram_util_percent": 60.09767441860465}}
-{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 607.51, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.755}, "custom_metrics": {"sparse_reward_mean": 210.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.31, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.27, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.5, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.83, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.75, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.87, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.0, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.74, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.65, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.85, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.62, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.77, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.9, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.7, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.87, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.0, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.87, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.0, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 582.0, 579.0, 633.0, 630.0, 587.0, 639.0, 582.0, 582.0, 522.0, 633.0, 630.0, 582.0, 633.0, 630.0, 579.0, 519.0, 627.0, 636.0, 627.0, 587.0, 587.0, 630.0, 573.0, 639.0, 630.0, 582.0, 627.0, 627.0, 587.0, 582.0, 587.0, 636.0, 587.0, 636.0, 582.0, 587.0, 630.0, 630.0, 630.0, 630.0, 627.0, 630.0, 606.0, 633.0, 519.0, 633.0, 636.0, 573.0, 630.0, 636.0, 636.0, 584.0, 636.0, 627.0, 630.0, 636.0, 633.0, 633.0, 579.0, 633.0, 582.0, 582.0, 590.0, 627.0, 576.0, 570.0, 582.0, 582.0, 636.0, 587.0, 579.0, 636.0, 582.0, 606.0, 636.0, 587.0, 633.0, 630.0, 630.0, 636.0, 633.0, 633.0, 624.0, 587.0, 627.0, 576.0, 584.0, 570.0, 582.0, 630.0, 579.0, 636.0, 633.0, 576.0, 587.0, 636.0, 582.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 291.0, 289.0, 293.0, 287.0, 292.0, 319.0, 314.0, 311.0, 319.0, 290.0, 297.0, 319.0, 320.0, 299.0, 283.0, 290.0, 292.0, 264.0, 258.0, 316.0, 317.0, 321.0, 309.0, 291.0, 291.0, 314.0, 319.0, 316.0, 314.0, 296.0, 283.0, 270.0, 249.0, 316.0, 311.0, 311.0, 325.0, 315.0, 312.0, 290.0, 297.0, 295.0, 292.0, 321.0, 309.0, 293.0, 280.0, 322.0, 317.0, 316.0, 314.0, 293.0, 289.0, 308.0, 319.0, 311.0, 316.0, 296.0, 291.0, 291.0, 291.0, 301.0, 286.0, 319.0, 317.0, 299.0, 288.0, 319.0, 317.0, 293.0, 289.0, 290.0, 297.0, 316.0, 314.0, 318.0, 312.0, 316.0, 314.0, 319.0, 311.0, 313.0, 314.0, 316.0, 314.0, 304.0, 302.0, 316.0, 317.0, 267.0, 252.0, 322.0, 311.0, 313.0, 323.0, 278.0, 295.0, 316.0, 314.0, 314.0, 322.0, 319.0, 317.0, 298.0, 286.0, 316.0, 320.0, 313.0, 314.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 314.0, 319.0, 299.0, 280.0, 319.0, 314.0, 286.0, 296.0, 288.0, 294.0, 296.0, 294.0, 308.0, 319.0, 285.0, 291.0, 272.0, 298.0, 282.0, 300.0, 288.0, 294.0, 322.0, 314.0, 290.0, 297.0, 283.0, 296.0, 324.0, 312.0, 285.0, 297.0, 302.0, 304.0, 319.0, 317.0, 287.0, 300.0, 314.0, 319.0, 316.0, 314.0, 311.0, 319.0, 319.0, 317.0, 317.0, 316.0, 316.0, 317.0, 313.0, 311.0, 298.0, 289.0, 313.0, 314.0, 294.0, 282.0, 296.0, 288.0, 279.0, 291.0, 301.0, 281.0, 316.0, 314.0, 293.0, 286.0, 319.0, 317.0, 316.0, 317.0, 285.0, 291.0, 302.0, 285.0, 319.0, 317.0, 288.0, 294.0, 319.0, 311.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7861726618280127, "mean_processing_ms": 0.22571819485718037, "mean_inference_ms": 1.389515005345911}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10800000, "num_steps_sampled": 5760000, "sample_time_ms": 21545.479, "load_time_ms": 37.135, "grad_time_ms": 10871.751, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00040180576615966856, "policy_loss": -0.007139734923839569, "vf_loss": 80.9995346069336, "vf_explained_var": 0.7637953758239746, "kl": 0.0017641382291913033, "entropy": 1.1168159246444702, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5760000, "episodes_total": 14400, "training_iteration": 450, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-39-04", "timestamp": 1660261144, "time_this_iter_s": 30.14027214050293, "time_total_s": 19553.143060445786, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19553.143060445786, "timesteps_since_restore": 5760000, "iterations_since_restore": 450, "perf": {"cpu_util_percent": 35.460465116279074, "ram_util_percent": 59.4627906976744}}
-{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 599.51, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 299.755}, "custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 183.91, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.25, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.17, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.82, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.82, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.59, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.87, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.82, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.82, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 576.0, 630.0, 408.0, 582.0, 582.0, 636.0, 636.0, 582.0, 627.0, 582.0, 630.0, 630.0, 633.0, 582.0, 630.0, 630.0, 636.0, 180.0, 630.0, 510.0, 633.0, 582.0, 630.0, 579.0, 636.0, 582.0, 582.0, 630.0, 639.0, 630.0, 570.0, 627.0, 576.0, 570.0, 582.0, 582.0, 636.0, 587.0, 579.0, 636.0, 582.0, 606.0, 636.0, 587.0, 633.0, 630.0, 630.0, 636.0, 633.0, 633.0, 624.0, 587.0, 627.0, 576.0, 584.0, 570.0, 582.0, 630.0, 579.0, 636.0, 633.0, 576.0, 587.0, 636.0, 582.0, 630.0, 636.0, 573.0, 582.0, 579.0, 633.0, 630.0, 587.0, 639.0, 582.0, 582.0, 522.0, 633.0, 630.0, 582.0, 633.0, 630.0, 579.0, 519.0, 627.0, 636.0, 627.0, 587.0, 587.0, 630.0, 573.0, 639.0, 630.0, 582.0, 627.0, 627.0, 587.0, 582.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 308.0, 285.0, 291.0, 311.0, 319.0, 213.0, 195.0, 288.0, 294.0, 294.0, 288.0, 321.0, 315.0, 319.0, 317.0, 291.0, 291.0, 311.0, 316.0, 301.0, 281.0, 316.0, 314.0, 321.0, 309.0, 327.0, 306.0, 291.0, 291.0, 311.0, 319.0, 311.0, 319.0, 311.0, 325.0, 91.0, 89.0, 311.0, 319.0, 247.0, 263.0, 311.0, 322.0, 283.0, 299.0, 316.0, 314.0, 288.0, 291.0, 316.0, 320.0, 293.0, 289.0, 291.0, 291.0, 306.0, 324.0, 317.0, 322.0, 314.0, 316.0, 282.0, 288.0, 308.0, 319.0, 285.0, 291.0, 272.0, 298.0, 282.0, 300.0, 288.0, 294.0, 322.0, 314.0, 290.0, 297.0, 283.0, 296.0, 324.0, 312.0, 285.0, 297.0, 302.0, 304.0, 319.0, 317.0, 287.0, 300.0, 314.0, 319.0, 316.0, 314.0, 311.0, 319.0, 319.0, 317.0, 317.0, 316.0, 316.0, 317.0, 313.0, 311.0, 298.0, 289.0, 313.0, 314.0, 294.0, 282.0, 296.0, 288.0, 279.0, 291.0, 301.0, 281.0, 316.0, 314.0, 293.0, 286.0, 319.0, 317.0, 316.0, 317.0, 285.0, 291.0, 302.0, 285.0, 319.0, 317.0, 288.0, 294.0, 319.0, 311.0, 319.0, 317.0, 282.0, 291.0, 289.0, 293.0, 287.0, 292.0, 319.0, 314.0, 311.0, 319.0, 290.0, 297.0, 319.0, 320.0, 299.0, 283.0, 290.0, 292.0, 264.0, 258.0, 316.0, 317.0, 321.0, 309.0, 291.0, 291.0, 314.0, 319.0, 316.0, 314.0, 296.0, 283.0, 270.0, 249.0, 316.0, 311.0, 311.0, 325.0, 315.0, 312.0, 290.0, 297.0, 295.0, 292.0, 321.0, 309.0, 293.0, 280.0, 322.0, 317.0, 316.0, 314.0, 293.0, 289.0, 308.0, 319.0, 311.0, 316.0, 296.0, 291.0, 291.0, 291.0, 301.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7854523352273275, "mean_processing_ms": 0.22557596598023225, "mean_inference_ms": 1.3886629869315275}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10824000, "num_steps_sampled": 5772800, "sample_time_ms": 21458.474, "load_time_ms": 37.866, "grad_time_ms": 10821.902, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004755727481096983, "policy_loss": -0.00296382955275476, "vf_loss": 82.76275634765625, "vf_explained_var": 0.7805452942848206, "kl": 0.0020347917452454567, "entropy": 1.1134214401245117, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5772800, "episodes_total": 14432, "training_iteration": 451, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-39-39", "timestamp": 1660261179, "time_this_iter_s": 34.819623947143555, "time_total_s": 19587.96268439293, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19587.96268439293, "timesteps_since_restore": 5772800, "iterations_since_restore": 451, "perf": {"cpu_util_percent": 35.726, "ram_util_percent": 59.13399999999999}}
-{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 600.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 300.195}, "custom_metrics": {"sparse_reward_mean": 208.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 184.39, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.35, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.11, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.9, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.4, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.87, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.63, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.51, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.84, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.87, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.63, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.87, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.63, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 582.0, 576.0, 609.0, 582.0, 630.0, 636.0, 630.0, 630.0, 633.0, 587.0, 584.0, 582.0, 633.0, 587.0, 636.0, 639.0, 630.0, 630.0, 522.0, 636.0, 587.0, 633.0, 579.0, 633.0, 582.0, 630.0, 582.0, 633.0, 633.0, 582.0, 636.0, 582.0, 630.0, 636.0, 573.0, 582.0, 579.0, 633.0, 630.0, 587.0, 639.0, 582.0, 582.0, 522.0, 633.0, 630.0, 582.0, 633.0, 630.0, 579.0, 519.0, 627.0, 636.0, 627.0, 587.0, 587.0, 630.0, 573.0, 639.0, 630.0, 582.0, 627.0, 627.0, 587.0, 582.0, 587.0, 627.0, 576.0, 630.0, 408.0, 582.0, 582.0, 636.0, 636.0, 582.0, 627.0, 582.0, 630.0, 630.0, 633.0, 582.0, 630.0, 630.0, 636.0, 180.0, 630.0, 510.0, 633.0, 582.0, 630.0, 579.0, 636.0, 582.0, 582.0, 630.0, 639.0, 630.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 309.0, 288.0, 294.0, 294.0, 288.0, 290.0, 286.0, 302.0, 307.0, 291.0, 291.0, 311.0, 319.0, 324.0, 312.0, 313.0, 317.0, 311.0, 319.0, 311.0, 322.0, 288.0, 299.0, 283.0, 301.0, 293.0, 289.0, 319.0, 314.0, 283.0, 304.0, 314.0, 322.0, 319.0, 320.0, 316.0, 314.0, 321.0, 309.0, 257.0, 265.0, 321.0, 315.0, 288.0, 299.0, 311.0, 322.0, 291.0, 288.0, 309.0, 324.0, 285.0, 297.0, 316.0, 314.0, 293.0, 289.0, 319.0, 314.0, 316.0, 317.0, 286.0, 296.0, 319.0, 317.0, 288.0, 294.0, 319.0, 311.0, 319.0, 317.0, 282.0, 291.0, 289.0, 293.0, 287.0, 292.0, 319.0, 314.0, 311.0, 319.0, 290.0, 297.0, 319.0, 320.0, 299.0, 283.0, 290.0, 292.0, 264.0, 258.0, 316.0, 317.0, 321.0, 309.0, 291.0, 291.0, 314.0, 319.0, 316.0, 314.0, 296.0, 283.0, 270.0, 249.0, 316.0, 311.0, 311.0, 325.0, 315.0, 312.0, 290.0, 297.0, 295.0, 292.0, 321.0, 309.0, 293.0, 280.0, 322.0, 317.0, 316.0, 314.0, 293.0, 289.0, 308.0, 319.0, 311.0, 316.0, 296.0, 291.0, 291.0, 291.0, 301.0, 286.0, 319.0, 308.0, 285.0, 291.0, 311.0, 319.0, 213.0, 195.0, 288.0, 294.0, 294.0, 288.0, 321.0, 315.0, 319.0, 317.0, 291.0, 291.0, 311.0, 316.0, 301.0, 281.0, 316.0, 314.0, 321.0, 309.0, 327.0, 306.0, 291.0, 291.0, 311.0, 319.0, 311.0, 319.0, 311.0, 325.0, 91.0, 89.0, 311.0, 319.0, 247.0, 263.0, 311.0, 322.0, 283.0, 299.0, 316.0, 314.0, 288.0, 291.0, 316.0, 320.0, 293.0, 289.0, 291.0, 291.0, 306.0, 324.0, 317.0, 322.0, 314.0, 316.0, 282.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7847405370697927, "mean_processing_ms": 0.22543724156360243, "mean_inference_ms": 1.3878004584531793}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10848000, "num_steps_sampled": 5785600, "sample_time_ms": 21368.51, "load_time_ms": 38.205, "grad_time_ms": 10677.018, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004785877245012671, "policy_loss": -0.0067210569977760315, "vf_loss": 77.5384750366211, "vf_explained_var": 0.777080774307251, "kl": 0.0022153640165925026, "entropy": 1.108397126197815, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5785600, "episodes_total": 14464, "training_iteration": 452, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-40-10", "timestamp": 1660261210, "time_this_iter_s": 30.929455280303955, "time_total_s": 19618.892139673233, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19618.892139673233, "timesteps_since_restore": 5785600, "iterations_since_restore": 452, "perf": {"cpu_util_percent": 34.12045454545454, "ram_util_percent": 59.23863636363635}}
-{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 601.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 300.725}, "custom_metrics": {"sparse_reward_mean": 208.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 185.05, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.52, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 17.97, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.17, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.3, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.1, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.54, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.98, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.75, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.1, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.54, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.1, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.54, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 636.0, 636.0, 636.0, 636.0, 582.0, 633.0, 587.0, 581.0, 633.0, 582.0, 630.0, 630.0, 587.0, 582.0, 636.0, 582.0, 633.0, 536.0, 582.0, 636.0, 639.0, 627.0, 582.0, 624.0, 579.0, 573.0, 633.0, 633.0, 587.0, 633.0, 627.0, 587.0, 582.0, 587.0, 627.0, 576.0, 630.0, 408.0, 582.0, 582.0, 636.0, 636.0, 582.0, 627.0, 582.0, 630.0, 630.0, 633.0, 582.0, 630.0, 630.0, 636.0, 180.0, 630.0, 510.0, 633.0, 582.0, 630.0, 579.0, 636.0, 582.0, 582.0, 630.0, 639.0, 630.0, 570.0, 630.0, 582.0, 582.0, 576.0, 609.0, 582.0, 630.0, 636.0, 630.0, 630.0, 633.0, 587.0, 584.0, 582.0, 633.0, 587.0, 636.0, 639.0, 630.0, 630.0, 522.0, 636.0, 587.0, 633.0, 579.0, 633.0, 582.0, 630.0, 582.0, 633.0, 633.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 291.0, 291.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 314.0, 322.0, 288.0, 294.0, 319.0, 314.0, 288.0, 299.0, 290.0, 291.0, 314.0, 319.0, 294.0, 288.0, 316.0, 314.0, 319.0, 311.0, 296.0, 291.0, 291.0, 291.0, 322.0, 314.0, 290.0, 292.0, 316.0, 317.0, 257.0, 279.0, 288.0, 294.0, 311.0, 325.0, 322.0, 317.0, 319.0, 308.0, 293.0, 289.0, 312.0, 312.0, 296.0, 283.0, 287.0, 286.0, 316.0, 317.0, 316.0, 317.0, 290.0, 297.0, 314.0, 319.0, 311.0, 316.0, 296.0, 291.0, 291.0, 291.0, 301.0, 286.0, 319.0, 308.0, 285.0, 291.0, 311.0, 319.0, 213.0, 195.0, 288.0, 294.0, 294.0, 288.0, 321.0, 315.0, 319.0, 317.0, 291.0, 291.0, 311.0, 316.0, 301.0, 281.0, 316.0, 314.0, 321.0, 309.0, 327.0, 306.0, 291.0, 291.0, 311.0, 319.0, 311.0, 319.0, 311.0, 325.0, 91.0, 89.0, 311.0, 319.0, 247.0, 263.0, 311.0, 322.0, 283.0, 299.0, 316.0, 314.0, 288.0, 291.0, 316.0, 320.0, 293.0, 289.0, 291.0, 291.0, 306.0, 324.0, 317.0, 322.0, 314.0, 316.0, 282.0, 288.0, 321.0, 309.0, 288.0, 294.0, 294.0, 288.0, 290.0, 286.0, 302.0, 307.0, 291.0, 291.0, 311.0, 319.0, 324.0, 312.0, 313.0, 317.0, 311.0, 319.0, 311.0, 322.0, 288.0, 299.0, 283.0, 301.0, 293.0, 289.0, 319.0, 314.0, 283.0, 304.0, 314.0, 322.0, 319.0, 320.0, 316.0, 314.0, 321.0, 309.0, 257.0, 265.0, 321.0, 315.0, 288.0, 299.0, 311.0, 322.0, 291.0, 288.0, 309.0, 324.0, 285.0, 297.0, 316.0, 314.0, 293.0, 289.0, 319.0, 314.0, 316.0, 317.0, 286.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7840324712041147, "mean_processing_ms": 0.22529998529372405, "mean_inference_ms": 1.3869181064814406}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10872000, "num_steps_sampled": 5798400, "sample_time_ms": 21127.393, "load_time_ms": 38.987, "grad_time_ms": 10639.287, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0022095281165093184, "policy_loss": -0.005497789476066828, "vf_loss": 82.6261215209961, "vf_explained_var": 0.7598109245300293, "kl": 0.0015994912246242166, "entropy": 1.110588550567627, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5798400, "episodes_total": 14496, "training_iteration": 453, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-40-38", "timestamp": 1660261238, "time_this_iter_s": 28.482766151428223, "time_total_s": 19647.37490582466, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19647.37490582466, "timesteps_since_restore": 5798400, "iterations_since_restore": 453, "perf": {"cpu_util_percent": 34.01, "ram_util_percent": 58.98499999999999}}
-{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 609.06, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.53}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.46, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.14, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.62, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.31, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.29, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.14, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.19, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.63, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.31, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.31, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 587.0, 627.0, 630.0, 630.0, 627.0, 582.0, 633.0, 582.0, 582.0, 636.0, 630.0, 579.0, 627.0, 627.0, 579.0, 587.0, 633.0, 633.0, 582.0, 582.0, 582.0, 636.0, 633.0, 587.0, 633.0, 579.0, 636.0, 582.0, 582.0, 627.0, 636.0, 630.0, 639.0, 630.0, 570.0, 630.0, 582.0, 582.0, 576.0, 609.0, 582.0, 630.0, 636.0, 630.0, 630.0, 633.0, 587.0, 584.0, 582.0, 633.0, 587.0, 636.0, 639.0, 630.0, 630.0, 522.0, 636.0, 587.0, 633.0, 579.0, 633.0, 582.0, 630.0, 582.0, 633.0, 633.0, 582.0, 582.0, 582.0, 636.0, 636.0, 636.0, 636.0, 582.0, 633.0, 587.0, 581.0, 633.0, 582.0, 630.0, 630.0, 587.0, 582.0, 636.0, 582.0, 633.0, 536.0, 582.0, 636.0, 639.0, 627.0, 582.0, 624.0, 579.0, 573.0, 633.0, 633.0, 587.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 320.0, 288.0, 299.0, 313.0, 314.0, 306.0, 324.0, 311.0, 319.0, 316.0, 311.0, 296.0, 286.0, 322.0, 311.0, 296.0, 286.0, 298.0, 284.0, 314.0, 322.0, 311.0, 319.0, 277.0, 302.0, 319.0, 308.0, 316.0, 311.0, 290.0, 289.0, 298.0, 289.0, 316.0, 317.0, 314.0, 319.0, 289.0, 293.0, 299.0, 283.0, 286.0, 296.0, 314.0, 322.0, 318.0, 315.0, 291.0, 296.0, 316.0, 317.0, 287.0, 292.0, 314.0, 322.0, 293.0, 289.0, 291.0, 291.0, 316.0, 311.0, 319.0, 317.0, 306.0, 324.0, 317.0, 322.0, 314.0, 316.0, 282.0, 288.0, 321.0, 309.0, 288.0, 294.0, 294.0, 288.0, 290.0, 286.0, 302.0, 307.0, 291.0, 291.0, 311.0, 319.0, 324.0, 312.0, 313.0, 317.0, 311.0, 319.0, 311.0, 322.0, 288.0, 299.0, 283.0, 301.0, 293.0, 289.0, 319.0, 314.0, 283.0, 304.0, 314.0, 322.0, 319.0, 320.0, 316.0, 314.0, 321.0, 309.0, 257.0, 265.0, 321.0, 315.0, 288.0, 299.0, 311.0, 322.0, 291.0, 288.0, 309.0, 324.0, 285.0, 297.0, 316.0, 314.0, 293.0, 289.0, 319.0, 314.0, 316.0, 317.0, 286.0, 296.0, 293.0, 289.0, 291.0, 291.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 314.0, 322.0, 288.0, 294.0, 319.0, 314.0, 288.0, 299.0, 290.0, 291.0, 314.0, 319.0, 294.0, 288.0, 316.0, 314.0, 319.0, 311.0, 296.0, 291.0, 291.0, 291.0, 322.0, 314.0, 290.0, 292.0, 316.0, 317.0, 257.0, 279.0, 288.0, 294.0, 311.0, 325.0, 322.0, 317.0, 319.0, 308.0, 293.0, 289.0, 312.0, 312.0, 296.0, 283.0, 287.0, 286.0, 316.0, 317.0, 316.0, 317.0, 290.0, 297.0, 314.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.783318071764331, "mean_processing_ms": 0.22516025845999352, "mean_inference_ms": 1.3858608320735286}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10896000, "num_steps_sampled": 5811200, "sample_time_ms": 20778.477, "load_time_ms": 39.252, "grad_time_ms": 10489.319, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007634037174284458, "policy_loss": -0.006680456455796957, "vf_loss": 80.01913452148438, "vf_explained_var": 0.7667891383171082, "kl": 0.0017371875001117587, "entropy": 1.1161128282546997, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5811200, "episodes_total": 14528, "training_iteration": 454, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-41-06", "timestamp": 1660261266, "time_this_iter_s": 27.318589210510254, "time_total_s": 19674.69349503517, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19674.69349503517, "timesteps_since_restore": 5811200, "iterations_since_restore": 454, "perf": {"cpu_util_percent": 37.051282051282044, "ram_util_percent": 58.9923076923077}}
-{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 606.28, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.14}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.68, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.2, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.57, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.27, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.68, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.83, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.27, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.27, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 582.0, 582.0, 579.0, 582.0, 633.0, 630.0, 606.0, 639.0, 630.0, 639.0, 633.0, 636.0, 587.0, 582.0, 582.0, 582.0, 579.0, 630.0, 462.0, 573.0, 579.0, 582.0, 624.0, 636.0, 587.0, 579.0, 579.0, 636.0, 627.0, 587.0, 624.0, 582.0, 633.0, 633.0, 582.0, 582.0, 582.0, 636.0, 636.0, 636.0, 636.0, 582.0, 633.0, 587.0, 581.0, 633.0, 582.0, 630.0, 630.0, 587.0, 582.0, 636.0, 582.0, 633.0, 536.0, 582.0, 636.0, 639.0, 627.0, 582.0, 624.0, 579.0, 573.0, 633.0, 633.0, 587.0, 633.0, 639.0, 587.0, 627.0, 630.0, 630.0, 627.0, 582.0, 633.0, 582.0, 582.0, 636.0, 630.0, 579.0, 627.0, 627.0, 579.0, 587.0, 633.0, 633.0, 582.0, 582.0, 582.0, 636.0, 633.0, 587.0, 633.0, 579.0, 636.0, 582.0, 582.0, 627.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 291.0, 291.0, 291.0, 291.0, 296.0, 283.0, 286.0, 296.0, 311.0, 322.0, 316.0, 314.0, 300.0, 306.0, 319.0, 320.0, 319.0, 311.0, 319.0, 320.0, 316.0, 317.0, 319.0, 317.0, 296.0, 291.0, 299.0, 283.0, 291.0, 291.0, 291.0, 291.0, 291.0, 288.0, 308.0, 322.0, 228.0, 234.0, 279.0, 294.0, 283.0, 296.0, 288.0, 294.0, 313.0, 311.0, 319.0, 317.0, 291.0, 296.0, 291.0, 288.0, 287.0, 292.0, 319.0, 317.0, 316.0, 311.0, 296.0, 291.0, 316.0, 308.0, 293.0, 289.0, 319.0, 314.0, 316.0, 317.0, 286.0, 296.0, 293.0, 289.0, 291.0, 291.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 314.0, 322.0, 288.0, 294.0, 319.0, 314.0, 288.0, 299.0, 290.0, 291.0, 314.0, 319.0, 294.0, 288.0, 316.0, 314.0, 319.0, 311.0, 296.0, 291.0, 291.0, 291.0, 322.0, 314.0, 290.0, 292.0, 316.0, 317.0, 257.0, 279.0, 288.0, 294.0, 311.0, 325.0, 322.0, 317.0, 319.0, 308.0, 293.0, 289.0, 312.0, 312.0, 296.0, 283.0, 287.0, 286.0, 316.0, 317.0, 316.0, 317.0, 290.0, 297.0, 314.0, 319.0, 319.0, 320.0, 288.0, 299.0, 313.0, 314.0, 306.0, 324.0, 311.0, 319.0, 316.0, 311.0, 296.0, 286.0, 322.0, 311.0, 296.0, 286.0, 298.0, 284.0, 314.0, 322.0, 311.0, 319.0, 277.0, 302.0, 319.0, 308.0, 316.0, 311.0, 290.0, 289.0, 298.0, 289.0, 316.0, 317.0, 314.0, 319.0, 289.0, 293.0, 299.0, 283.0, 286.0, 296.0, 314.0, 322.0, 318.0, 315.0, 291.0, 296.0, 316.0, 317.0, 287.0, 292.0, 314.0, 322.0, 293.0, 289.0, 291.0, 291.0, 316.0, 311.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7826053116086107, "mean_processing_ms": 0.22502024237804114, "mean_inference_ms": 1.384772842121544}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10920000, "num_steps_sampled": 5824000, "sample_time_ms": 20652.138, "load_time_ms": 39.437, "grad_time_ms": 10376.522, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0003411610086914152, "policy_loss": -0.0072668264620006084, "vf_loss": 81.6093978881836, "vf_explained_var": 0.7669034600257874, "kl": 0.0018620697082951665, "entropy": 1.1059015989303589, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5824000, "episodes_total": 14560, "training_iteration": 455, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-41-35", "timestamp": 1660261295, "time_this_iter_s": 29.449601650238037, "time_total_s": 19704.14309668541, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19704.14309668541, "timesteps_since_restore": 5824000, "iterations_since_restore": 455, "perf": {"cpu_util_percent": 37.61666666666666, "ram_util_percent": 59.06190476190477}}
-{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 607.34, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.67}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.54, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.63, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.3, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.26, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.61, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.2, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.75, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.51, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.7, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.2, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.75, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.2, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.75, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 624.0, 587.0, 573.0, 633.0, 636.0, 582.0, 630.0, 633.0, 639.0, 633.0, 633.0, 582.0, 630.0, 633.0, 630.0, 630.0, 633.0, 582.0, 630.0, 630.0, 587.0, 573.0, 576.0, 582.0, 636.0, 525.0, 633.0, 579.0, 633.0, 527.0, 633.0, 633.0, 587.0, 633.0, 639.0, 587.0, 627.0, 630.0, 630.0, 627.0, 582.0, 633.0, 582.0, 582.0, 636.0, 630.0, 579.0, 627.0, 627.0, 579.0, 587.0, 633.0, 633.0, 582.0, 582.0, 582.0, 636.0, 633.0, 587.0, 633.0, 579.0, 636.0, 582.0, 582.0, 627.0, 636.0, 633.0, 582.0, 582.0, 579.0, 582.0, 633.0, 630.0, 606.0, 639.0, 630.0, 639.0, 633.0, 636.0, 587.0, 582.0, 582.0, 582.0, 579.0, 630.0, 462.0, 573.0, 579.0, 582.0, 624.0, 636.0, 587.0, 579.0, 579.0, 636.0, 627.0, 587.0, 624.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 313.0, 317.0, 316.0, 308.0, 301.0, 286.0, 290.0, 283.0, 319.0, 314.0, 324.0, 312.0, 291.0, 291.0, 315.0, 315.0, 317.0, 316.0, 314.0, 325.0, 316.0, 317.0, 316.0, 317.0, 293.0, 289.0, 316.0, 314.0, 321.0, 312.0, 316.0, 314.0, 319.0, 311.0, 316.0, 317.0, 288.0, 294.0, 313.0, 317.0, 311.0, 319.0, 288.0, 299.0, 282.0, 291.0, 285.0, 291.0, 286.0, 296.0, 311.0, 325.0, 268.0, 257.0, 316.0, 317.0, 296.0, 283.0, 320.0, 313.0, 254.0, 273.0, 316.0, 317.0, 316.0, 317.0, 290.0, 297.0, 314.0, 319.0, 319.0, 320.0, 288.0, 299.0, 313.0, 314.0, 306.0, 324.0, 311.0, 319.0, 316.0, 311.0, 296.0, 286.0, 322.0, 311.0, 296.0, 286.0, 298.0, 284.0, 314.0, 322.0, 311.0, 319.0, 277.0, 302.0, 319.0, 308.0, 316.0, 311.0, 290.0, 289.0, 298.0, 289.0, 316.0, 317.0, 314.0, 319.0, 289.0, 293.0, 299.0, 283.0, 286.0, 296.0, 314.0, 322.0, 318.0, 315.0, 291.0, 296.0, 316.0, 317.0, 287.0, 292.0, 314.0, 322.0, 293.0, 289.0, 291.0, 291.0, 316.0, 311.0, 319.0, 317.0, 316.0, 317.0, 291.0, 291.0, 291.0, 291.0, 296.0, 283.0, 286.0, 296.0, 311.0, 322.0, 316.0, 314.0, 300.0, 306.0, 319.0, 320.0, 319.0, 311.0, 319.0, 320.0, 316.0, 317.0, 319.0, 317.0, 296.0, 291.0, 299.0, 283.0, 291.0, 291.0, 291.0, 291.0, 291.0, 288.0, 308.0, 322.0, 228.0, 234.0, 279.0, 294.0, 283.0, 296.0, 288.0, 294.0, 313.0, 311.0, 319.0, 317.0, 291.0, 296.0, 291.0, 288.0, 287.0, 292.0, 319.0, 317.0, 316.0, 311.0, 296.0, 291.0, 316.0, 308.0]}, "sampler_perf": {"mean_env_wait_ms": 0.781893848476972, "mean_processing_ms": 0.22487934380632985, "mean_inference_ms": 1.3836766382532326}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10944000, "num_steps_sampled": 5836800, "sample_time_ms": 20326.683, "load_time_ms": 39.601, "grad_time_ms": 10224.263, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009388479520566761, "policy_loss": -0.008330571465194225, "vf_loss": 79.4411849975586, "vf_explained_var": 0.7653481960296631, "kl": 0.0017687659710645676, "entropy": 1.1048110723495483, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5836800, "episodes_total": 14592, "training_iteration": 456, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-42-04", "timestamp": 1660261324, "time_this_iter_s": 28.971395254135132, "time_total_s": 19733.114491939545, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19733.114491939545, "timesteps_since_restore": 5836800, "iterations_since_restore": 456, "perf": {"cpu_util_percent": 36.80731707317073, "ram_util_percent": 59.075609756097556}}
-{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 598.66, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 98.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 299.33}, "custom_metrics": {"sparse_reward_mean": 207.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 184.26, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.33, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.14, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.96, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.96, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.64, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.54, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.74, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.64, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.96, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.64, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.96, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.64, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 630.0, 579.0, 579.0, 576.0, 575.0, 582.0, 579.0, 197.0, 639.0, 582.0, 636.0, 587.0, 579.0, 582.0, 636.0, 582.0, 582.0, 630.0, 582.0, 582.0, 570.0, 633.0, 627.0, 582.0, 587.0, 630.0, 639.0, 590.0, 633.0, 582.0, 630.0, 582.0, 582.0, 627.0, 636.0, 633.0, 582.0, 582.0, 579.0, 582.0, 633.0, 630.0, 606.0, 639.0, 630.0, 639.0, 633.0, 636.0, 587.0, 582.0, 582.0, 582.0, 579.0, 630.0, 462.0, 573.0, 579.0, 582.0, 624.0, 636.0, 587.0, 579.0, 579.0, 636.0, 627.0, 587.0, 624.0, 636.0, 630.0, 624.0, 587.0, 573.0, 633.0, 636.0, 582.0, 630.0, 633.0, 639.0, 633.0, 633.0, 582.0, 630.0, 633.0, 630.0, 630.0, 633.0, 582.0, 630.0, 630.0, 587.0, 573.0, 576.0, 582.0, 636.0, 525.0, 633.0, 579.0, 633.0, 527.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [248.0, 271.0, 311.0, 319.0, 288.0, 291.0, 285.0, 294.0, 290.0, 286.0, 289.0, 286.0, 288.0, 294.0, 285.0, 294.0, 99.0, 98.0, 322.0, 317.0, 288.0, 294.0, 319.0, 317.0, 298.0, 289.0, 288.0, 291.0, 299.0, 283.0, 314.0, 322.0, 296.0, 286.0, 291.0, 291.0, 311.0, 319.0, 290.0, 292.0, 288.0, 294.0, 277.0, 293.0, 316.0, 317.0, 316.0, 311.0, 294.0, 288.0, 290.0, 297.0, 316.0, 314.0, 319.0, 320.0, 294.0, 296.0, 316.0, 317.0, 296.0, 286.0, 311.0, 319.0, 293.0, 289.0, 291.0, 291.0, 316.0, 311.0, 319.0, 317.0, 316.0, 317.0, 291.0, 291.0, 291.0, 291.0, 296.0, 283.0, 286.0, 296.0, 311.0, 322.0, 316.0, 314.0, 300.0, 306.0, 319.0, 320.0, 319.0, 311.0, 319.0, 320.0, 316.0, 317.0, 319.0, 317.0, 296.0, 291.0, 299.0, 283.0, 291.0, 291.0, 291.0, 291.0, 291.0, 288.0, 308.0, 322.0, 228.0, 234.0, 279.0, 294.0, 283.0, 296.0, 288.0, 294.0, 313.0, 311.0, 319.0, 317.0, 291.0, 296.0, 291.0, 288.0, 287.0, 292.0, 319.0, 317.0, 316.0, 311.0, 296.0, 291.0, 316.0, 308.0, 319.0, 317.0, 313.0, 317.0, 316.0, 308.0, 301.0, 286.0, 290.0, 283.0, 319.0, 314.0, 324.0, 312.0, 291.0, 291.0, 315.0, 315.0, 317.0, 316.0, 314.0, 325.0, 316.0, 317.0, 316.0, 317.0, 293.0, 289.0, 316.0, 314.0, 321.0, 312.0, 316.0, 314.0, 319.0, 311.0, 316.0, 317.0, 288.0, 294.0, 313.0, 317.0, 311.0, 319.0, 288.0, 299.0, 282.0, 291.0, 285.0, 291.0, 286.0, 296.0, 311.0, 325.0, 268.0, 257.0, 316.0, 317.0, 296.0, 283.0, 320.0, 313.0, 254.0, 273.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7811987617860328, "mean_processing_ms": 0.2247433395983137, "mean_inference_ms": 1.3827621285902887}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10968000, "num_steps_sampled": 5849600, "sample_time_ms": 20643.82, "load_time_ms": 39.721, "grad_time_ms": 10128.234, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00031386129558086395, "policy_loss": -0.007833792828023434, "vf_loss": 87.0155258178711, "vf_explained_var": 0.759077787399292, "kl": 0.0023228460922837257, "entropy": 1.10780668258667, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5849600, "episodes_total": 14624, "training_iteration": 457, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-42-37", "timestamp": 1660261357, "time_this_iter_s": 32.612699031829834, "time_total_s": 19765.727190971375, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19765.727190971375, "timesteps_since_restore": 5849600, "iterations_since_restore": 457, "perf": {"cpu_util_percent": 35.49347826086956, "ram_util_percent": 59.11739130434784}}
-{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 600.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 98.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 300.29}, "custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 184.98, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.3, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.91, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.9, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.66, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.87, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.9, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.9, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 606.0, 582.0, 633.0, 579.0, 587.0, 639.0, 567.0, 630.0, 630.0, 582.0, 627.0, 636.0, 587.0, 636.0, 639.0, 582.0, 587.0, 587.0, 630.0, 633.0, 630.0, 579.0, 587.0, 576.0, 633.0, 587.0, 587.0, 630.0, 579.0, 630.0, 636.0, 627.0, 587.0, 624.0, 636.0, 630.0, 624.0, 587.0, 573.0, 633.0, 636.0, 582.0, 630.0, 633.0, 639.0, 633.0, 633.0, 582.0, 630.0, 633.0, 630.0, 630.0, 633.0, 582.0, 630.0, 630.0, 587.0, 573.0, 576.0, 582.0, 636.0, 525.0, 633.0, 579.0, 633.0, 527.0, 519.0, 630.0, 579.0, 579.0, 576.0, 575.0, 582.0, 579.0, 197.0, 639.0, 582.0, 636.0, 587.0, 579.0, 582.0, 636.0, 582.0, 582.0, 630.0, 582.0, 582.0, 570.0, 633.0, 627.0, 582.0, 587.0, 630.0, 639.0, 590.0, 633.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 291.0, 296.0, 301.0, 305.0, 286.0, 296.0, 319.0, 314.0, 290.0, 289.0, 290.0, 297.0, 319.0, 320.0, 282.0, 285.0, 310.0, 320.0, 319.0, 311.0, 288.0, 294.0, 313.0, 314.0, 319.0, 317.0, 293.0, 294.0, 319.0, 317.0, 319.0, 320.0, 293.0, 289.0, 299.0, 288.0, 290.0, 297.0, 310.0, 320.0, 324.0, 309.0, 311.0, 319.0, 291.0, 288.0, 299.0, 288.0, 286.0, 290.0, 317.0, 316.0, 296.0, 291.0, 293.0, 294.0, 316.0, 314.0, 294.0, 285.0, 326.0, 304.0, 319.0, 317.0, 316.0, 311.0, 296.0, 291.0, 316.0, 308.0, 319.0, 317.0, 313.0, 317.0, 316.0, 308.0, 301.0, 286.0, 290.0, 283.0, 319.0, 314.0, 324.0, 312.0, 291.0, 291.0, 315.0, 315.0, 317.0, 316.0, 314.0, 325.0, 316.0, 317.0, 316.0, 317.0, 293.0, 289.0, 316.0, 314.0, 321.0, 312.0, 316.0, 314.0, 319.0, 311.0, 316.0, 317.0, 288.0, 294.0, 313.0, 317.0, 311.0, 319.0, 288.0, 299.0, 282.0, 291.0, 285.0, 291.0, 286.0, 296.0, 311.0, 325.0, 268.0, 257.0, 316.0, 317.0, 296.0, 283.0, 320.0, 313.0, 254.0, 273.0, 248.0, 271.0, 311.0, 319.0, 288.0, 291.0, 285.0, 294.0, 290.0, 286.0, 289.0, 286.0, 288.0, 294.0, 285.0, 294.0, 99.0, 98.0, 322.0, 317.0, 288.0, 294.0, 319.0, 317.0, 298.0, 289.0, 288.0, 291.0, 299.0, 283.0, 314.0, 322.0, 296.0, 286.0, 291.0, 291.0, 311.0, 319.0, 290.0, 292.0, 288.0, 294.0, 277.0, 293.0, 316.0, 317.0, 316.0, 311.0, 294.0, 288.0, 290.0, 297.0, 316.0, 314.0, 319.0, 320.0, 294.0, 296.0, 316.0, 317.0, 296.0, 286.0, 311.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7805088940621119, "mean_processing_ms": 0.22460838488183463, "mean_inference_ms": 1.3819062694297763}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10992000, "num_steps_sampled": 5862400, "sample_time_ms": 20388.555, "load_time_ms": 39.901, "grad_time_ms": 10047.414, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00028213454061187804, "policy_loss": -0.007530031260102987, "vf_loss": 78.06029510498047, "vf_explained_var": 0.7737483382225037, "kl": 0.0017934959614649415, "entropy": 1.1162586212158203, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5862400, "episodes_total": 14656, "training_iteration": 458, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-43-08", "timestamp": 1660261388, "time_this_iter_s": 31.54933786392212, "time_total_s": 19797.276528835297, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19797.276528835297, "timesteps_since_restore": 5862400, "iterations_since_restore": 458, "perf": {"cpu_util_percent": 36.01111111111111, "ram_util_percent": 59.15777777777779}}
-{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 599.73, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 98.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 299.865}, "custom_metrics": {"sparse_reward_mean": 207.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 184.93, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.45, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.03, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.0, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.04, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.53, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.54, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.05, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.02, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.04, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.53, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.04, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.53, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 636.0, 639.0, 630.0, 630.0, 582.0, 636.0, 581.0, 636.0, 630.0, 630.0, 590.0, 633.0, 636.0, 636.0, 630.0, 582.0, 582.0, 630.0, 584.0, 579.0, 627.0, 504.0, 630.0, 582.0, 633.0, 630.0, 633.0, 587.0, 582.0, 582.0, 633.0, 633.0, 579.0, 633.0, 527.0, 519.0, 630.0, 579.0, 579.0, 576.0, 575.0, 582.0, 579.0, 197.0, 639.0, 582.0, 636.0, 587.0, 579.0, 582.0, 636.0, 582.0, 582.0, 630.0, 582.0, 582.0, 570.0, 633.0, 627.0, 582.0, 587.0, 630.0, 639.0, 590.0, 633.0, 582.0, 630.0, 582.0, 587.0, 606.0, 582.0, 633.0, 579.0, 587.0, 639.0, 567.0, 630.0, 630.0, 582.0, 627.0, 636.0, 587.0, 636.0, 639.0, 582.0, 587.0, 587.0, 630.0, 633.0, 630.0, 579.0, 587.0, 576.0, 633.0, 587.0, 587.0, 630.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 300.0, 319.0, 317.0, 319.0, 320.0, 319.0, 311.0, 311.0, 319.0, 293.0, 289.0, 319.0, 317.0, 290.0, 291.0, 319.0, 317.0, 316.0, 314.0, 311.0, 319.0, 296.0, 294.0, 317.0, 316.0, 319.0, 317.0, 319.0, 317.0, 311.0, 319.0, 288.0, 294.0, 288.0, 294.0, 314.0, 316.0, 298.0, 286.0, 288.0, 291.0, 308.0, 319.0, 254.0, 250.0, 311.0, 319.0, 288.0, 294.0, 313.0, 320.0, 314.0, 316.0, 314.0, 319.0, 291.0, 296.0, 293.0, 289.0, 289.0, 293.0, 317.0, 316.0, 316.0, 317.0, 296.0, 283.0, 320.0, 313.0, 254.0, 273.0, 248.0, 271.0, 311.0, 319.0, 288.0, 291.0, 285.0, 294.0, 290.0, 286.0, 289.0, 286.0, 288.0, 294.0, 285.0, 294.0, 99.0, 98.0, 322.0, 317.0, 288.0, 294.0, 319.0, 317.0, 298.0, 289.0, 288.0, 291.0, 299.0, 283.0, 314.0, 322.0, 296.0, 286.0, 291.0, 291.0, 311.0, 319.0, 290.0, 292.0, 288.0, 294.0, 277.0, 293.0, 316.0, 317.0, 316.0, 311.0, 294.0, 288.0, 290.0, 297.0, 316.0, 314.0, 319.0, 320.0, 294.0, 296.0, 316.0, 317.0, 296.0, 286.0, 311.0, 319.0, 289.0, 293.0, 291.0, 296.0, 301.0, 305.0, 286.0, 296.0, 319.0, 314.0, 290.0, 289.0, 290.0, 297.0, 319.0, 320.0, 282.0, 285.0, 310.0, 320.0, 319.0, 311.0, 288.0, 294.0, 313.0, 314.0, 319.0, 317.0, 293.0, 294.0, 319.0, 317.0, 319.0, 320.0, 293.0, 289.0, 299.0, 288.0, 290.0, 297.0, 310.0, 320.0, 324.0, 309.0, 311.0, 319.0, 291.0, 288.0, 299.0, 288.0, 286.0, 290.0, 317.0, 316.0, 296.0, 291.0, 293.0, 294.0, 316.0, 314.0, 294.0, 285.0, 326.0, 304.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7798432151982481, "mean_processing_ms": 0.22448068931407403, "mean_inference_ms": 1.3813562407452884}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11016000, "num_steps_sampled": 5875200, "sample_time_ms": 20860.599, "load_time_ms": 39.533, "grad_time_ms": 10085.729, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -4.9240032240049914e-05, "policy_loss": -0.007299743592739105, "vf_loss": 78.08226776123047, "vf_explained_var": 0.7718666195869446, "kl": 0.001796315424144268, "entropy": 1.1154268980026245, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5875200, "episodes_total": 14688, "training_iteration": 459, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-43-44", "timestamp": 1660261424, "time_this_iter_s": 35.86376190185547, "time_total_s": 19833.140290737152, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19833.140290737152, "timesteps_since_restore": 5875200, "iterations_since_restore": 459, "perf": {"cpu_util_percent": 35.09411764705882, "ram_util_percent": 59.11176470588236}}
-{"episode_reward_max": 639.0, "episode_reward_min": 504.0, "episode_reward_mean": 606.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 250.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.165}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.73, "shaped_reward_min": 144, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.61, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.11, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.51, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.18, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.58, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.13, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.87, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.18, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.18, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 582.0, 582.0, 639.0, 582.0, 636.0, 582.0, 627.0, 582.0, 582.0, 582.0, 582.0, 579.0, 636.0, 587.0, 636.0, 630.0, 633.0, 627.0, 582.0, 639.0, 627.0, 570.0, 633.0, 582.0, 579.0, 636.0, 630.0, 587.0, 582.0, 582.0, 573.0, 590.0, 633.0, 582.0, 630.0, 582.0, 587.0, 606.0, 582.0, 633.0, 579.0, 587.0, 639.0, 567.0, 630.0, 630.0, 582.0, 627.0, 636.0, 587.0, 636.0, 639.0, 582.0, 587.0, 587.0, 630.0, 633.0, 630.0, 579.0, 587.0, 576.0, 633.0, 587.0, 587.0, 630.0, 579.0, 630.0, 582.0, 636.0, 639.0, 630.0, 630.0, 582.0, 636.0, 581.0, 636.0, 630.0, 630.0, 590.0, 633.0, 636.0, 636.0, 630.0, 582.0, 582.0, 630.0, 584.0, 579.0, 627.0, 504.0, 630.0, 582.0, 633.0, 630.0, 633.0, 587.0, 582.0, 582.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [308.0, 319.0, 296.0, 286.0, 288.0, 294.0, 319.0, 320.0, 288.0, 294.0, 314.0, 322.0, 291.0, 291.0, 316.0, 311.0, 291.0, 291.0, 293.0, 289.0, 282.0, 300.0, 296.0, 286.0, 288.0, 291.0, 319.0, 317.0, 296.0, 291.0, 319.0, 317.0, 319.0, 311.0, 311.0, 322.0, 316.0, 311.0, 287.0, 295.0, 322.0, 317.0, 316.0, 311.0, 282.0, 288.0, 319.0, 314.0, 295.0, 287.0, 288.0, 291.0, 327.0, 309.0, 316.0, 314.0, 293.0, 294.0, 294.0, 288.0, 290.0, 292.0, 285.0, 288.0, 294.0, 296.0, 316.0, 317.0, 296.0, 286.0, 311.0, 319.0, 289.0, 293.0, 291.0, 296.0, 301.0, 305.0, 286.0, 296.0, 319.0, 314.0, 290.0, 289.0, 290.0, 297.0, 319.0, 320.0, 282.0, 285.0, 310.0, 320.0, 319.0, 311.0, 288.0, 294.0, 313.0, 314.0, 319.0, 317.0, 293.0, 294.0, 319.0, 317.0, 319.0, 320.0, 293.0, 289.0, 299.0, 288.0, 290.0, 297.0, 310.0, 320.0, 324.0, 309.0, 311.0, 319.0, 291.0, 288.0, 299.0, 288.0, 286.0, 290.0, 317.0, 316.0, 296.0, 291.0, 293.0, 294.0, 316.0, 314.0, 294.0, 285.0, 326.0, 304.0, 282.0, 300.0, 319.0, 317.0, 319.0, 320.0, 319.0, 311.0, 311.0, 319.0, 293.0, 289.0, 319.0, 317.0, 290.0, 291.0, 319.0, 317.0, 316.0, 314.0, 311.0, 319.0, 296.0, 294.0, 317.0, 316.0, 319.0, 317.0, 319.0, 317.0, 311.0, 319.0, 288.0, 294.0, 288.0, 294.0, 314.0, 316.0, 298.0, 286.0, 288.0, 291.0, 308.0, 319.0, 254.0, 250.0, 311.0, 319.0, 288.0, 294.0, 313.0, 320.0, 314.0, 316.0, 314.0, 319.0, 291.0, 296.0, 293.0, 289.0, 289.0, 293.0, 317.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7791820626361985, "mean_processing_ms": 0.22435411593063023, "mean_inference_ms": 1.3809254829785487}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11040000, "num_steps_sampled": 5888000, "sample_time_ms": 21384.075, "load_time_ms": 38.923, "grad_time_ms": 10181.416, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033431891351938248, "policy_loss": -0.0041669332422316074, "vf_loss": 80.67221069335938, "vf_explained_var": 0.7671453356742859, "kl": 0.0021624856162816286, "entropy": 1.1142171621322632, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5888000, "episodes_total": 14720, "training_iteration": 460, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-44-21", "timestamp": 1660261461, "time_this_iter_s": 36.32121300697327, "time_total_s": 19869.461503744125, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19869.461503744125, "timesteps_since_restore": 5888000, "iterations_since_restore": 460, "perf": {"cpu_util_percent": 35.05294117647058, "ram_util_percent": 59.078431372549026}}
-{"episode_reward_max": 639.0, "episode_reward_min": 504.0, "episode_reward_mean": 604.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 250.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 302.105}, "custom_metrics": {"sparse_reward_mean": 209.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.21, "shaped_reward_min": 144, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.49, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.93, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.32, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.03, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.59, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.99, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.87, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.03, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.03, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 579.0, 630.0, 527.0, 587.0, 636.0, 584.0, 587.0, 579.0, 570.0, 582.0, 579.0, 636.0, 627.0, 573.0, 633.0, 582.0, 630.0, 582.0, 582.0, 630.0, 636.0, 582.0, 579.0, 582.0, 587.0, 636.0, 587.0, 627.0, 587.0, 633.0, 587.0, 630.0, 579.0, 630.0, 582.0, 636.0, 639.0, 630.0, 630.0, 582.0, 636.0, 581.0, 636.0, 630.0, 630.0, 590.0, 633.0, 636.0, 636.0, 630.0, 582.0, 582.0, 630.0, 584.0, 579.0, 627.0, 504.0, 630.0, 582.0, 633.0, 630.0, 633.0, 587.0, 582.0, 582.0, 633.0, 627.0, 582.0, 582.0, 639.0, 582.0, 636.0, 582.0, 627.0, 582.0, 582.0, 582.0, 582.0, 579.0, 636.0, 587.0, 636.0, 630.0, 633.0, 627.0, 582.0, 639.0, 627.0, 570.0, 633.0, 582.0, 579.0, 636.0, 630.0, 587.0, 582.0, 582.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 293.0, 289.0, 288.0, 291.0, 316.0, 314.0, 273.0, 254.0, 291.0, 296.0, 316.0, 320.0, 290.0, 294.0, 290.0, 297.0, 293.0, 286.0, 285.0, 285.0, 294.0, 288.0, 290.0, 289.0, 314.0, 322.0, 313.0, 314.0, 273.0, 300.0, 319.0, 314.0, 299.0, 283.0, 311.0, 319.0, 288.0, 294.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 293.0, 289.0, 293.0, 286.0, 285.0, 297.0, 293.0, 294.0, 319.0, 317.0, 296.0, 291.0, 313.0, 314.0, 296.0, 291.0, 311.0, 322.0, 293.0, 294.0, 316.0, 314.0, 294.0, 285.0, 326.0, 304.0, 282.0, 300.0, 319.0, 317.0, 319.0, 320.0, 319.0, 311.0, 311.0, 319.0, 293.0, 289.0, 319.0, 317.0, 290.0, 291.0, 319.0, 317.0, 316.0, 314.0, 311.0, 319.0, 296.0, 294.0, 317.0, 316.0, 319.0, 317.0, 319.0, 317.0, 311.0, 319.0, 288.0, 294.0, 288.0, 294.0, 314.0, 316.0, 298.0, 286.0, 288.0, 291.0, 308.0, 319.0, 254.0, 250.0, 311.0, 319.0, 288.0, 294.0, 313.0, 320.0, 314.0, 316.0, 314.0, 319.0, 291.0, 296.0, 293.0, 289.0, 289.0, 293.0, 317.0, 316.0, 308.0, 319.0, 296.0, 286.0, 288.0, 294.0, 319.0, 320.0, 288.0, 294.0, 314.0, 322.0, 291.0, 291.0, 316.0, 311.0, 291.0, 291.0, 293.0, 289.0, 282.0, 300.0, 296.0, 286.0, 288.0, 291.0, 319.0, 317.0, 296.0, 291.0, 319.0, 317.0, 319.0, 311.0, 311.0, 322.0, 316.0, 311.0, 287.0, 295.0, 322.0, 317.0, 316.0, 311.0, 282.0, 288.0, 319.0, 314.0, 295.0, 287.0, 288.0, 291.0, 327.0, 309.0, 316.0, 314.0, 293.0, 294.0, 294.0, 288.0, 290.0, 292.0, 285.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7785286320058347, "mean_processing_ms": 0.2242302416319241, "mean_inference_ms": 1.3806320563992234}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11064000, "num_steps_sampled": 5900800, "sample_time_ms": 21464.8, "load_time_ms": 38.628, "grad_time_ms": 10142.381, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011879469966515899, "policy_loss": -0.00630860636010766, "vf_loss": 80.56136322021484, "vf_explained_var": 0.7605991363525391, "kl": 0.002013101242482662, "entropy": 1.1191506385803223, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5900800, "episodes_total": 14752, "training_iteration": 461, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-44-56", "timestamp": 1660261496, "time_this_iter_s": 35.2242169380188, "time_total_s": 19904.685720682144, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19904.685720682144, "timesteps_since_restore": 5900800, "iterations_since_restore": 461, "perf": {"cpu_util_percent": 34.286, "ram_util_percent": 59.076}}
-{"episode_reward_max": 639.0, "episode_reward_min": 527.0, "episode_reward_mean": 606.08, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.04}, "custom_metrics": {"sparse_reward_mean": 209.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.88, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.08, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.13, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.72, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.53, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 6.02, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 4, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.13, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.72, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.13, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.72, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 636.0, 639.0, 639.0, 582.0, 627.0, 576.0, 587.0, 636.0, 633.0, 636.0, 630.0, 587.0, 633.0, 582.0, 579.0, 573.0, 636.0, 627.0, 633.0, 636.0, 633.0, 587.0, 573.0, 636.0, 630.0, 636.0, 633.0, 636.0, 630.0, 633.0, 587.0, 582.0, 582.0, 633.0, 627.0, 582.0, 582.0, 639.0, 582.0, 636.0, 582.0, 627.0, 582.0, 582.0, 582.0, 582.0, 579.0, 636.0, 587.0, 636.0, 630.0, 633.0, 627.0, 582.0, 639.0, 627.0, 570.0, 633.0, 582.0, 579.0, 636.0, 630.0, 587.0, 582.0, 582.0, 573.0, 630.0, 582.0, 579.0, 630.0, 527.0, 587.0, 636.0, 584.0, 587.0, 579.0, 570.0, 582.0, 579.0, 636.0, 627.0, 573.0, 633.0, 582.0, 630.0, 582.0, 582.0, 630.0, 636.0, 582.0, 579.0, 582.0, 587.0, 636.0, 587.0, 627.0, 587.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 316.0, 314.0, 316.0, 320.0, 317.0, 322.0, 324.0, 315.0, 291.0, 291.0, 313.0, 314.0, 296.0, 280.0, 296.0, 291.0, 314.0, 322.0, 313.0, 320.0, 314.0, 322.0, 319.0, 311.0, 291.0, 296.0, 316.0, 317.0, 291.0, 291.0, 290.0, 289.0, 288.0, 285.0, 309.0, 327.0, 313.0, 314.0, 314.0, 319.0, 319.0, 317.0, 316.0, 317.0, 288.0, 299.0, 288.0, 285.0, 314.0, 322.0, 313.0, 317.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 319.0, 314.0, 291.0, 296.0, 293.0, 289.0, 289.0, 293.0, 317.0, 316.0, 308.0, 319.0, 296.0, 286.0, 288.0, 294.0, 319.0, 320.0, 288.0, 294.0, 314.0, 322.0, 291.0, 291.0, 316.0, 311.0, 291.0, 291.0, 293.0, 289.0, 282.0, 300.0, 296.0, 286.0, 288.0, 291.0, 319.0, 317.0, 296.0, 291.0, 319.0, 317.0, 319.0, 311.0, 311.0, 322.0, 316.0, 311.0, 287.0, 295.0, 322.0, 317.0, 316.0, 311.0, 282.0, 288.0, 319.0, 314.0, 295.0, 287.0, 288.0, 291.0, 327.0, 309.0, 316.0, 314.0, 293.0, 294.0, 294.0, 288.0, 290.0, 292.0, 285.0, 288.0, 316.0, 314.0, 293.0, 289.0, 288.0, 291.0, 316.0, 314.0, 273.0, 254.0, 291.0, 296.0, 316.0, 320.0, 290.0, 294.0, 290.0, 297.0, 293.0, 286.0, 285.0, 285.0, 294.0, 288.0, 290.0, 289.0, 314.0, 322.0, 313.0, 314.0, 273.0, 300.0, 319.0, 314.0, 299.0, 283.0, 311.0, 319.0, 288.0, 294.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 293.0, 289.0, 293.0, 286.0, 285.0, 297.0, 293.0, 294.0, 319.0, 317.0, 296.0, 291.0, 313.0, 314.0, 296.0, 291.0, 311.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7778691140112235, "mean_processing_ms": 0.22410468367497743, "mean_inference_ms": 1.3802370398093593}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11088000, "num_steps_sampled": 5913600, "sample_time_ms": 21647.909, "load_time_ms": 38.304, "grad_time_ms": 10153.272, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001676362007856369, "policy_loss": -0.005163781810551882, "vf_loss": 73.99588012695312, "vf_explained_var": 0.7706634402275085, "kl": 0.00203719618730247, "entropy": 1.1188966035842896, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5913600, "episodes_total": 14784, "training_iteration": 462, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-45-29", "timestamp": 1660261529, "time_this_iter_s": 32.866820096969604, "time_total_s": 19937.552540779114, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19937.552540779114, "timesteps_since_restore": 5913600, "iterations_since_restore": 462, "perf": {"cpu_util_percent": 36.0304347826087, "ram_util_percent": 59.14782608695653}}
-{"episode_reward_max": 639.0, "episode_reward_min": 527.0, "episode_reward_mean": 606.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.275}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.95, "shaped_reward_min": 164, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.11, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.1, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.21, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.84, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.21, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.21, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 636.0, 576.0, 587.0, 579.0, 633.0, 579.0, 630.0, 636.0, 636.0, 569.0, 582.0, 584.0, 633.0, 633.0, 627.0, 582.0, 582.0, 587.0, 564.0, 582.0, 639.0, 587.0, 582.0, 627.0, 636.0, 587.0, 630.0, 582.0, 630.0, 639.0, 587.0, 582.0, 582.0, 573.0, 630.0, 582.0, 579.0, 630.0, 527.0, 587.0, 636.0, 584.0, 587.0, 579.0, 570.0, 582.0, 579.0, 636.0, 627.0, 573.0, 633.0, 582.0, 630.0, 582.0, 582.0, 630.0, 636.0, 582.0, 579.0, 582.0, 587.0, 636.0, 587.0, 627.0, 587.0, 633.0, 582.0, 630.0, 636.0, 639.0, 639.0, 582.0, 627.0, 576.0, 587.0, 636.0, 633.0, 636.0, 630.0, 587.0, 633.0, 582.0, 579.0, 573.0, 636.0, 627.0, 633.0, 636.0, 633.0, 587.0, 573.0, 636.0, 630.0, 636.0, 633.0, 636.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 319.0, 311.0, 319.0, 317.0, 282.0, 294.0, 291.0, 296.0, 294.0, 285.0, 314.0, 319.0, 291.0, 288.0, 316.0, 314.0, 314.0, 322.0, 322.0, 314.0, 281.0, 288.0, 292.0, 290.0, 285.0, 299.0, 321.0, 312.0, 319.0, 314.0, 305.0, 322.0, 293.0, 289.0, 288.0, 294.0, 293.0, 294.0, 276.0, 288.0, 296.0, 286.0, 317.0, 322.0, 293.0, 294.0, 296.0, 286.0, 319.0, 308.0, 316.0, 320.0, 286.0, 301.0, 316.0, 314.0, 293.0, 289.0, 319.0, 311.0, 316.0, 323.0, 293.0, 294.0, 294.0, 288.0, 290.0, 292.0, 285.0, 288.0, 316.0, 314.0, 293.0, 289.0, 288.0, 291.0, 316.0, 314.0, 273.0, 254.0, 291.0, 296.0, 316.0, 320.0, 290.0, 294.0, 290.0, 297.0, 293.0, 286.0, 285.0, 285.0, 294.0, 288.0, 290.0, 289.0, 314.0, 322.0, 313.0, 314.0, 273.0, 300.0, 319.0, 314.0, 299.0, 283.0, 311.0, 319.0, 288.0, 294.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 293.0, 289.0, 293.0, 286.0, 285.0, 297.0, 293.0, 294.0, 319.0, 317.0, 296.0, 291.0, 313.0, 314.0, 296.0, 291.0, 311.0, 322.0, 291.0, 291.0, 316.0, 314.0, 316.0, 320.0, 317.0, 322.0, 324.0, 315.0, 291.0, 291.0, 313.0, 314.0, 296.0, 280.0, 296.0, 291.0, 314.0, 322.0, 313.0, 320.0, 314.0, 322.0, 319.0, 311.0, 291.0, 296.0, 316.0, 317.0, 291.0, 291.0, 290.0, 289.0, 288.0, 285.0, 309.0, 327.0, 313.0, 314.0, 314.0, 319.0, 319.0, 317.0, 316.0, 317.0, 288.0, 299.0, 288.0, 285.0, 314.0, 322.0, 313.0, 317.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7772074101048227, "mean_processing_ms": 0.22397937610682028, "mean_inference_ms": 1.379755174295214}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11112000, "num_steps_sampled": 5926400, "sample_time_ms": 22034.25, "load_time_ms": 37.344, "grad_time_ms": 10407.822, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004309405107051134, "policy_loss": -0.0030093893874436617, "vf_loss": 78.81652069091797, "vf_explained_var": 0.7744302749633789, "kl": 0.0022333713714033365, "entropy": 1.1256990432739258, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5926400, "episodes_total": 14816, "training_iteration": 463, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-46-04", "timestamp": 1660261564, "time_this_iter_s": 34.88473105430603, "time_total_s": 19972.43727183342, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19972.43727183342, "timesteps_since_restore": 5926400, "iterations_since_restore": 463, "perf": {"cpu_util_percent": 36.355999999999995, "ram_util_percent": 59.1}}
-{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 613.15, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.575}, "custom_metrics": {"sparse_reward_mean": 212.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.35, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.95, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.16, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 14, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.47, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 14, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.19, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.21, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.84, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 14, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 14, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 633.0, 633.0, 630.0, 630.0, 630.0, 630.0, 627.0, 587.0, 630.0, 576.0, 627.0, 587.0, 630.0, 630.0, 633.0, 582.0, 522.0, 630.0, 633.0, 587.0, 582.0, 639.0, 582.0, 630.0, 633.0, 582.0, 630.0, 633.0, 636.0, 636.0, 587.0, 627.0, 587.0, 633.0, 582.0, 630.0, 636.0, 639.0, 639.0, 582.0, 627.0, 576.0, 587.0, 636.0, 633.0, 636.0, 630.0, 587.0, 633.0, 582.0, 579.0, 573.0, 636.0, 627.0, 633.0, 636.0, 633.0, 587.0, 573.0, 636.0, 630.0, 636.0, 633.0, 636.0, 630.0, 633.0, 636.0, 630.0, 636.0, 576.0, 587.0, 579.0, 633.0, 579.0, 630.0, 636.0, 636.0, 569.0, 582.0, 584.0, 633.0, 633.0, 627.0, 582.0, 582.0, 587.0, 564.0, 582.0, 639.0, 587.0, 582.0, 627.0, 636.0, 587.0, 630.0, 582.0, 630.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 309.0, 321.0, 319.0, 314.0, 319.0, 314.0, 316.0, 314.0, 321.0, 309.0, 316.0, 314.0, 316.0, 314.0, 308.0, 319.0, 293.0, 294.0, 311.0, 319.0, 288.0, 288.0, 313.0, 314.0, 291.0, 296.0, 311.0, 319.0, 313.0, 317.0, 319.0, 314.0, 291.0, 291.0, 257.0, 265.0, 316.0, 314.0, 324.0, 309.0, 291.0, 296.0, 288.0, 294.0, 319.0, 320.0, 290.0, 292.0, 313.0, 317.0, 319.0, 314.0, 290.0, 292.0, 311.0, 319.0, 316.0, 317.0, 311.0, 325.0, 319.0, 317.0, 296.0, 291.0, 313.0, 314.0, 296.0, 291.0, 311.0, 322.0, 291.0, 291.0, 316.0, 314.0, 316.0, 320.0, 317.0, 322.0, 324.0, 315.0, 291.0, 291.0, 313.0, 314.0, 296.0, 280.0, 296.0, 291.0, 314.0, 322.0, 313.0, 320.0, 314.0, 322.0, 319.0, 311.0, 291.0, 296.0, 316.0, 317.0, 291.0, 291.0, 290.0, 289.0, 288.0, 285.0, 309.0, 327.0, 313.0, 314.0, 314.0, 319.0, 319.0, 317.0, 316.0, 317.0, 288.0, 299.0, 288.0, 285.0, 314.0, 322.0, 313.0, 317.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 319.0, 314.0, 319.0, 317.0, 319.0, 311.0, 319.0, 317.0, 282.0, 294.0, 291.0, 296.0, 294.0, 285.0, 314.0, 319.0, 291.0, 288.0, 316.0, 314.0, 314.0, 322.0, 322.0, 314.0, 281.0, 288.0, 292.0, 290.0, 285.0, 299.0, 321.0, 312.0, 319.0, 314.0, 305.0, 322.0, 293.0, 289.0, 288.0, 294.0, 293.0, 294.0, 276.0, 288.0, 296.0, 286.0, 317.0, 322.0, 293.0, 294.0, 296.0, 286.0, 319.0, 308.0, 316.0, 320.0, 286.0, 301.0, 316.0, 314.0, 293.0, 289.0, 319.0, 311.0, 316.0, 323.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7765494210947501, "mean_processing_ms": 0.2238566721049911, "mean_inference_ms": 1.3793130627015395}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11136000, "num_steps_sampled": 5939200, "sample_time_ms": 22597.793, "load_time_ms": 37.078, "grad_time_ms": 10833.136, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004432214889675379, "policy_loss": -0.0028620418161153793, "vf_loss": 78.4912338256836, "vf_explained_var": 0.7597255110740662, "kl": 0.0021421227138489485, "entropy": 1.10971999168396, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5939200, "episodes_total": 14848, "training_iteration": 464, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-46-41", "timestamp": 1660261601, "time_this_iter_s": 37.22005105018616, "time_total_s": 20009.657322883606, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20009.657322883606, "timesteps_since_restore": 5939200, "iterations_since_restore": 464, "perf": {"cpu_util_percent": 33.75576923076923, "ram_util_percent": 59.192307692307686}}
-{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 609.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.695}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.39, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.73, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.17, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.12, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.2, "potting_onion_agent_0_min": 14, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.75, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.12, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.87, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.2, "optimal_onion_potting_agent_0_min": 14, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.75, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.2, "viable_onion_potting_agent_0_min": 14, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.75, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 630.0, 522.0, 636.0, 573.0, 636.0, 636.0, 582.0, 582.0, 633.0, 633.0, 587.0, 633.0, 633.0, 582.0, 590.0, 582.0, 579.0, 630.0, 627.0, 587.0, 462.0, 639.0, 630.0, 579.0, 584.0, 630.0, 587.0, 579.0, 587.0, 630.0, 633.0, 633.0, 636.0, 630.0, 633.0, 636.0, 630.0, 636.0, 576.0, 587.0, 579.0, 633.0, 579.0, 630.0, 636.0, 636.0, 569.0, 582.0, 584.0, 633.0, 633.0, 627.0, 582.0, 582.0, 587.0, 564.0, 582.0, 639.0, 587.0, 582.0, 627.0, 636.0, 587.0, 630.0, 582.0, 630.0, 639.0, 633.0, 630.0, 633.0, 633.0, 630.0, 630.0, 630.0, 630.0, 627.0, 587.0, 630.0, 576.0, 627.0, 587.0, 630.0, 630.0, 633.0, 582.0, 522.0, 630.0, 633.0, 587.0, 582.0, 639.0, 582.0, 630.0, 633.0, 582.0, 630.0, 633.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 322.0, 316.0, 314.0, 268.0, 254.0, 319.0, 317.0, 299.0, 274.0, 314.0, 322.0, 313.0, 323.0, 288.0, 294.0, 293.0, 289.0, 324.0, 309.0, 316.0, 317.0, 298.0, 289.0, 319.0, 314.0, 316.0, 317.0, 293.0, 289.0, 296.0, 294.0, 288.0, 294.0, 288.0, 291.0, 316.0, 314.0, 307.0, 320.0, 296.0, 291.0, 223.0, 239.0, 324.0, 315.0, 313.0, 317.0, 290.0, 289.0, 293.0, 291.0, 311.0, 319.0, 296.0, 291.0, 288.0, 291.0, 298.0, 289.0, 316.0, 314.0, 314.0, 319.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 319.0, 314.0, 319.0, 317.0, 319.0, 311.0, 319.0, 317.0, 282.0, 294.0, 291.0, 296.0, 294.0, 285.0, 314.0, 319.0, 291.0, 288.0, 316.0, 314.0, 314.0, 322.0, 322.0, 314.0, 281.0, 288.0, 292.0, 290.0, 285.0, 299.0, 321.0, 312.0, 319.0, 314.0, 305.0, 322.0, 293.0, 289.0, 288.0, 294.0, 293.0, 294.0, 276.0, 288.0, 296.0, 286.0, 317.0, 322.0, 293.0, 294.0, 296.0, 286.0, 319.0, 308.0, 316.0, 320.0, 286.0, 301.0, 316.0, 314.0, 293.0, 289.0, 319.0, 311.0, 316.0, 323.0, 316.0, 317.0, 309.0, 321.0, 319.0, 314.0, 319.0, 314.0, 316.0, 314.0, 321.0, 309.0, 316.0, 314.0, 316.0, 314.0, 308.0, 319.0, 293.0, 294.0, 311.0, 319.0, 288.0, 288.0, 313.0, 314.0, 291.0, 296.0, 311.0, 319.0, 313.0, 317.0, 319.0, 314.0, 291.0, 291.0, 257.0, 265.0, 316.0, 314.0, 324.0, 309.0, 291.0, 296.0, 288.0, 294.0, 319.0, 320.0, 290.0, 292.0, 313.0, 317.0, 319.0, 314.0, 290.0, 292.0, 311.0, 319.0, 316.0, 317.0, 311.0, 325.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7758921958877883, "mean_processing_ms": 0.22373444703892312, "mean_inference_ms": 1.3788740058001947}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11160000, "num_steps_sampled": 5952000, "sample_time_ms": 22835.117, "load_time_ms": 36.936, "grad_time_ms": 10943.452, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008901534602046013, "policy_loss": -0.006549746263772249, "vf_loss": 80.0201416015625, "vf_explained_var": 0.7684802412986755, "kl": 0.0019470960833132267, "entropy": 1.1242157220840454, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5952000, "episodes_total": 14880, "training_iteration": 465, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-47-14", "timestamp": 1660261634, "time_this_iter_s": 32.92273998260498, "time_total_s": 20042.58006286621, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20042.58006286621, "timesteps_since_restore": 5952000, "iterations_since_restore": 465, "perf": {"cpu_util_percent": 33.19787234042553, "ram_util_percent": 58.98936170212765}}
-{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 611.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 305.81}, "custom_metrics": {"sparse_reward_mean": 212.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.62, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.66, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.35, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.01, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.63, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.13, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.9, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 630.0, 579.0, 639.0, 636.0, 630.0, 636.0, 633.0, 636.0, 582.0, 587.0, 624.0, 639.0, 630.0, 582.0, 636.0, 633.0, 633.0, 630.0, 630.0, 579.0, 555.0, 630.0, 576.0, 630.0, 627.0, 587.0, 639.0, 627.0, 627.0, 633.0, 630.0, 582.0, 630.0, 639.0, 633.0, 630.0, 633.0, 633.0, 630.0, 630.0, 630.0, 630.0, 627.0, 587.0, 630.0, 576.0, 627.0, 587.0, 630.0, 630.0, 633.0, 582.0, 522.0, 630.0, 633.0, 587.0, 582.0, 639.0, 582.0, 630.0, 633.0, 582.0, 630.0, 633.0, 636.0, 636.0, 639.0, 630.0, 522.0, 636.0, 573.0, 636.0, 636.0, 582.0, 582.0, 633.0, 633.0, 587.0, 633.0, 633.0, 582.0, 590.0, 582.0, 579.0, 630.0, 627.0, 587.0, 462.0, 639.0, 630.0, 579.0, 584.0, 630.0, 587.0, 579.0, 587.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 288.0, 294.0, 316.0, 314.0, 288.0, 291.0, 317.0, 322.0, 319.0, 317.0, 313.0, 317.0, 314.0, 322.0, 316.0, 317.0, 314.0, 322.0, 291.0, 291.0, 280.0, 307.0, 316.0, 308.0, 324.0, 315.0, 317.0, 313.0, 283.0, 299.0, 321.0, 315.0, 316.0, 317.0, 316.0, 317.0, 311.0, 319.0, 311.0, 319.0, 294.0, 285.0, 270.0, 285.0, 316.0, 314.0, 281.0, 295.0, 310.0, 320.0, 313.0, 314.0, 288.0, 299.0, 322.0, 317.0, 313.0, 314.0, 321.0, 306.0, 316.0, 317.0, 316.0, 314.0, 293.0, 289.0, 319.0, 311.0, 316.0, 323.0, 316.0, 317.0, 309.0, 321.0, 319.0, 314.0, 319.0, 314.0, 316.0, 314.0, 321.0, 309.0, 316.0, 314.0, 316.0, 314.0, 308.0, 319.0, 293.0, 294.0, 311.0, 319.0, 288.0, 288.0, 313.0, 314.0, 291.0, 296.0, 311.0, 319.0, 313.0, 317.0, 319.0, 314.0, 291.0, 291.0, 257.0, 265.0, 316.0, 314.0, 324.0, 309.0, 291.0, 296.0, 288.0, 294.0, 319.0, 320.0, 290.0, 292.0, 313.0, 317.0, 319.0, 314.0, 290.0, 292.0, 311.0, 319.0, 316.0, 317.0, 311.0, 325.0, 319.0, 317.0, 317.0, 322.0, 316.0, 314.0, 268.0, 254.0, 319.0, 317.0, 299.0, 274.0, 314.0, 322.0, 313.0, 323.0, 288.0, 294.0, 293.0, 289.0, 324.0, 309.0, 316.0, 317.0, 298.0, 289.0, 319.0, 314.0, 316.0, 317.0, 293.0, 289.0, 296.0, 294.0, 288.0, 294.0, 288.0, 291.0, 316.0, 314.0, 307.0, 320.0, 296.0, 291.0, 223.0, 239.0, 324.0, 315.0, 313.0, 317.0, 290.0, 289.0, 293.0, 291.0, 311.0, 319.0, 296.0, 291.0, 288.0, 291.0, 298.0, 289.0, 316.0, 314.0, 314.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7752302665514498, "mean_processing_ms": 0.22360935089938724, "mean_inference_ms": 1.378262918190576}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11184000, "num_steps_sampled": 5964800, "sample_time_ms": 22879.712, "load_time_ms": 37.04, "grad_time_ms": 10842.802, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008535028900951147, "policy_loss": -0.007580641657114029, "vf_loss": 72.87356567382812, "vf_explained_var": 0.7752940058708191, "kl": 0.0019255572697147727, "entropy": 1.12042236328125, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5964800, "episodes_total": 14912, "training_iteration": 466, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-47-42", "timestamp": 1660261662, "time_this_iter_s": 28.411120176315308, "time_total_s": 20070.991183042526, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20070.991183042526, "timesteps_since_restore": 5964800, "iterations_since_restore": 466, "perf": {"cpu_util_percent": 30.5725, "ram_util_percent": 58.955}}
-{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 607.06, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.53}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.26, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.71, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.14, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.11, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.33, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.14, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.67, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.24, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.96, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.98, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.14, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.14, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 584.0, 579.0, 582.0, 630.0, 627.0, 573.0, 582.0, 636.0, 587.0, 639.0, 636.0, 539.0, 615.0, 579.0, 582.0, 633.0, 630.0, 582.0, 582.0, 576.0, 636.0, 627.0, 630.0, 507.0, 633.0, 564.0, 627.0, 582.0, 630.0, 630.0, 582.0, 630.0, 633.0, 636.0, 636.0, 639.0, 630.0, 522.0, 636.0, 573.0, 636.0, 636.0, 582.0, 582.0, 633.0, 633.0, 587.0, 633.0, 633.0, 582.0, 590.0, 582.0, 579.0, 630.0, 627.0, 587.0, 462.0, 639.0, 630.0, 579.0, 584.0, 630.0, 587.0, 579.0, 587.0, 630.0, 633.0, 579.0, 582.0, 630.0, 579.0, 639.0, 636.0, 630.0, 636.0, 633.0, 636.0, 582.0, 587.0, 624.0, 639.0, 630.0, 582.0, 636.0, 633.0, 633.0, 630.0, 630.0, 579.0, 555.0, 630.0, 576.0, 630.0, 627.0, 587.0, 639.0, 627.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 296.0, 290.0, 294.0, 288.0, 291.0, 291.0, 291.0, 316.0, 314.0, 321.0, 306.0, 288.0, 285.0, 288.0, 294.0, 322.0, 314.0, 287.0, 300.0, 314.0, 325.0, 314.0, 322.0, 271.0, 268.0, 302.0, 313.0, 285.0, 294.0, 288.0, 294.0, 314.0, 319.0, 316.0, 314.0, 288.0, 294.0, 291.0, 291.0, 288.0, 288.0, 317.0, 319.0, 316.0, 311.0, 311.0, 319.0, 251.0, 256.0, 314.0, 319.0, 281.0, 283.0, 308.0, 319.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 297.0, 285.0, 311.0, 319.0, 316.0, 317.0, 311.0, 325.0, 319.0, 317.0, 317.0, 322.0, 316.0, 314.0, 268.0, 254.0, 319.0, 317.0, 299.0, 274.0, 314.0, 322.0, 313.0, 323.0, 288.0, 294.0, 293.0, 289.0, 324.0, 309.0, 316.0, 317.0, 298.0, 289.0, 319.0, 314.0, 316.0, 317.0, 293.0, 289.0, 296.0, 294.0, 288.0, 294.0, 288.0, 291.0, 316.0, 314.0, 307.0, 320.0, 296.0, 291.0, 223.0, 239.0, 324.0, 315.0, 313.0, 317.0, 290.0, 289.0, 293.0, 291.0, 311.0, 319.0, 296.0, 291.0, 288.0, 291.0, 298.0, 289.0, 316.0, 314.0, 314.0, 319.0, 291.0, 288.0, 288.0, 294.0, 316.0, 314.0, 288.0, 291.0, 317.0, 322.0, 319.0, 317.0, 313.0, 317.0, 314.0, 322.0, 316.0, 317.0, 314.0, 322.0, 291.0, 291.0, 280.0, 307.0, 316.0, 308.0, 324.0, 315.0, 317.0, 313.0, 283.0, 299.0, 321.0, 315.0, 316.0, 317.0, 316.0, 317.0, 311.0, 319.0, 311.0, 319.0, 294.0, 285.0, 270.0, 285.0, 316.0, 314.0, 281.0, 295.0, 310.0, 320.0, 313.0, 314.0, 288.0, 299.0, 322.0, 317.0, 313.0, 314.0, 321.0, 306.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7745646022660153, "mean_processing_ms": 0.2234810440216946, "mean_inference_ms": 1.3774520581026746}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11208000, "num_steps_sampled": 5977600, "sample_time_ms": 22667.166, "load_time_ms": 37.563, "grad_time_ms": 10782.505, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003179629857186228, "policy_loss": -0.007398936897516251, "vf_loss": 76.44898986816406, "vf_explained_var": 0.7757420539855957, "kl": 0.0019861727487295866, "entropy": 1.127841830253601, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5977600, "episodes_total": 14944, "training_iteration": 467, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-48-12", "timestamp": 1660261692, "time_this_iter_s": 29.89157724380493, "time_total_s": 20100.88276028633, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20100.88276028633, "timesteps_since_restore": 5977600, "iterations_since_restore": 467, "perf": {"cpu_util_percent": 33.63333333333334, "ram_util_percent": 58.976190476190474}}
-{"episode_reward_max": 639.0, "episode_reward_min": 507.0, "episode_reward_mean": 606.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.42}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.04, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.77, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.18, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.58, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.7, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.42, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.38, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.0, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.58, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.58, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 581.0, 582.0, 582.0, 636.0, 633.0, 633.0, 561.0, 630.0, 578.0, 633.0, 582.0, 582.0, 582.0, 579.0, 584.0, 639.0, 587.0, 582.0, 587.0, 633.0, 630.0, 630.0, 567.0, 587.0, 636.0, 630.0, 582.0, 639.0, 633.0, 630.0, 576.0, 579.0, 587.0, 630.0, 633.0, 579.0, 582.0, 630.0, 579.0, 639.0, 636.0, 630.0, 636.0, 633.0, 636.0, 582.0, 587.0, 624.0, 639.0, 630.0, 582.0, 636.0, 633.0, 633.0, 630.0, 630.0, 579.0, 555.0, 630.0, 576.0, 630.0, 627.0, 587.0, 639.0, 627.0, 627.0, 633.0, 582.0, 584.0, 579.0, 582.0, 630.0, 627.0, 573.0, 582.0, 636.0, 587.0, 639.0, 636.0, 539.0, 615.0, 579.0, 582.0, 633.0, 630.0, 582.0, 582.0, 576.0, 636.0, 627.0, 630.0, 507.0, 633.0, 564.0, 627.0, 582.0, 630.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 290.0, 291.0, 293.0, 289.0, 296.0, 286.0, 321.0, 315.0, 319.0, 314.0, 319.0, 314.0, 285.0, 276.0, 316.0, 314.0, 295.0, 283.0, 319.0, 314.0, 288.0, 294.0, 294.0, 288.0, 283.0, 299.0, 281.0, 298.0, 288.0, 296.0, 317.0, 322.0, 296.0, 291.0, 289.0, 293.0, 288.0, 299.0, 321.0, 312.0, 313.0, 317.0, 319.0, 311.0, 284.0, 283.0, 302.0, 285.0, 314.0, 322.0, 313.0, 317.0, 291.0, 291.0, 317.0, 322.0, 309.0, 324.0, 316.0, 314.0, 291.0, 285.0, 288.0, 291.0, 298.0, 289.0, 316.0, 314.0, 314.0, 319.0, 291.0, 288.0, 288.0, 294.0, 316.0, 314.0, 288.0, 291.0, 317.0, 322.0, 319.0, 317.0, 313.0, 317.0, 314.0, 322.0, 316.0, 317.0, 314.0, 322.0, 291.0, 291.0, 280.0, 307.0, 316.0, 308.0, 324.0, 315.0, 317.0, 313.0, 283.0, 299.0, 321.0, 315.0, 316.0, 317.0, 316.0, 317.0, 311.0, 319.0, 311.0, 319.0, 294.0, 285.0, 270.0, 285.0, 316.0, 314.0, 281.0, 295.0, 310.0, 320.0, 313.0, 314.0, 288.0, 299.0, 322.0, 317.0, 313.0, 314.0, 321.0, 306.0, 316.0, 317.0, 286.0, 296.0, 290.0, 294.0, 288.0, 291.0, 291.0, 291.0, 316.0, 314.0, 321.0, 306.0, 288.0, 285.0, 288.0, 294.0, 322.0, 314.0, 287.0, 300.0, 314.0, 325.0, 314.0, 322.0, 271.0, 268.0, 302.0, 313.0, 285.0, 294.0, 288.0, 294.0, 314.0, 319.0, 316.0, 314.0, 288.0, 294.0, 291.0, 291.0, 288.0, 288.0, 317.0, 319.0, 316.0, 311.0, 311.0, 319.0, 251.0, 256.0, 314.0, 319.0, 281.0, 283.0, 308.0, 319.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 297.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7738952247411988, "mean_processing_ms": 0.22335063238074299, "mean_inference_ms": 1.3764749095367632}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11232000, "num_steps_sampled": 5990400, "sample_time_ms": 22503.528, "load_time_ms": 37.786, "grad_time_ms": 10902.652, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012515783309936523, "policy_loss": -0.006079933140426874, "vf_loss": 78.91991424560547, "vf_explained_var": 0.7658045887947083, "kl": 0.0020609761122614145, "entropy": 1.1209732294082642, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5990400, "episodes_total": 14976, "training_iteration": 468, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-48-43", "timestamp": 1660261723, "time_this_iter_s": 31.115761756896973, "time_total_s": 20131.998522043228, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20131.998522043228, "timesteps_since_restore": 5990400, "iterations_since_restore": 468, "perf": {"cpu_util_percent": 34.11818181818182, "ram_util_percent": 59.265909090909076}}
-{"episode_reward_max": 639.0, "episode_reward_min": 507.0, "episode_reward_mean": 609.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.625}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.45, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.16, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.2, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.39, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.63, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.84, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.35, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.63, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.63, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 624.0, 627.0, 639.0, 636.0, 636.0, 636.0, 587.0, 633.0, 582.0, 630.0, 636.0, 633.0, 624.0, 630.0, 624.0, 582.0, 633.0, 570.0, 579.0, 633.0, 630.0, 633.0, 630.0, 587.0, 570.0, 633.0, 633.0, 633.0, 627.0, 633.0, 627.0, 639.0, 627.0, 627.0, 633.0, 582.0, 584.0, 579.0, 582.0, 630.0, 627.0, 573.0, 582.0, 636.0, 587.0, 639.0, 636.0, 539.0, 615.0, 579.0, 582.0, 633.0, 630.0, 582.0, 582.0, 576.0, 636.0, 627.0, 630.0, 507.0, 633.0, 564.0, 627.0, 582.0, 630.0, 630.0, 582.0, 630.0, 581.0, 582.0, 582.0, 636.0, 633.0, 633.0, 561.0, 630.0, 578.0, 633.0, 582.0, 582.0, 582.0, 579.0, 584.0, 639.0, 587.0, 582.0, 587.0, 633.0, 630.0, 630.0, 567.0, 587.0, 636.0, 630.0, 582.0, 639.0, 633.0, 630.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 313.0, 311.0, 316.0, 311.0, 319.0, 320.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 295.0, 292.0, 316.0, 317.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 316.0, 308.0, 311.0, 319.0, 315.0, 309.0, 288.0, 294.0, 316.0, 317.0, 282.0, 288.0, 294.0, 285.0, 319.0, 314.0, 316.0, 314.0, 313.0, 320.0, 316.0, 314.0, 291.0, 296.0, 296.0, 274.0, 321.0, 312.0, 316.0, 317.0, 317.0, 316.0, 316.0, 311.0, 316.0, 317.0, 311.0, 316.0, 322.0, 317.0, 313.0, 314.0, 321.0, 306.0, 316.0, 317.0, 286.0, 296.0, 290.0, 294.0, 288.0, 291.0, 291.0, 291.0, 316.0, 314.0, 321.0, 306.0, 288.0, 285.0, 288.0, 294.0, 322.0, 314.0, 287.0, 300.0, 314.0, 325.0, 314.0, 322.0, 271.0, 268.0, 302.0, 313.0, 285.0, 294.0, 288.0, 294.0, 314.0, 319.0, 316.0, 314.0, 288.0, 294.0, 291.0, 291.0, 288.0, 288.0, 317.0, 319.0, 316.0, 311.0, 311.0, 319.0, 251.0, 256.0, 314.0, 319.0, 281.0, 283.0, 308.0, 319.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 297.0, 285.0, 316.0, 314.0, 290.0, 291.0, 293.0, 289.0, 296.0, 286.0, 321.0, 315.0, 319.0, 314.0, 319.0, 314.0, 285.0, 276.0, 316.0, 314.0, 295.0, 283.0, 319.0, 314.0, 288.0, 294.0, 294.0, 288.0, 283.0, 299.0, 281.0, 298.0, 288.0, 296.0, 317.0, 322.0, 296.0, 291.0, 289.0, 293.0, 288.0, 299.0, 321.0, 312.0, 313.0, 317.0, 319.0, 311.0, 284.0, 283.0, 302.0, 285.0, 314.0, 322.0, 313.0, 317.0, 291.0, 291.0, 317.0, 322.0, 309.0, 324.0, 316.0, 314.0, 291.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7732342684064082, "mean_processing_ms": 0.22322351749716166, "mean_inference_ms": 1.3755872244816174}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11256000, "num_steps_sampled": 6003200, "sample_time_ms": 22143.958, "load_time_ms": 38.351, "grad_time_ms": 11066.072, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00040481146425008774, "policy_loss": -0.006932735443115234, "vf_loss": 70.86636352539062, "vf_explained_var": 0.7707180976867676, "kl": 0.0017913728952407837, "entropy": 1.1174226999282837, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6003200, "episodes_total": 15008, "training_iteration": 469, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-49-17", "timestamp": 1660261757, "time_this_iter_s": 33.90879726409912, "time_total_s": 20165.907319307327, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20165.907319307327, "timesteps_since_restore": 6003200, "iterations_since_restore": 469, "perf": {"cpu_util_percent": 34.28125, "ram_util_percent": 58.99583333333334}}
-{"episode_reward_max": 639.0, "episode_reward_min": 552.0, "episode_reward_mean": 612.77, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 274.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 306.385}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 187.17, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.85, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.26, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.2, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.28, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.74, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.73, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.28, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.92, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.28, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.74, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.28, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.74, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 633.0, 582.0, 639.0, 579.0, 579.0, 630.0, 630.0, 624.0, 636.0, 582.0, 630.0, 630.0, 633.0, 615.0, 627.0, 630.0, 587.0, 630.0, 630.0, 630.0, 636.0, 630.0, 636.0, 621.0, 587.0, 633.0, 582.0, 579.0, 633.0, 630.0, 552.0, 582.0, 630.0, 630.0, 582.0, 630.0, 581.0, 582.0, 582.0, 636.0, 633.0, 633.0, 561.0, 630.0, 578.0, 633.0, 582.0, 582.0, 582.0, 579.0, 584.0, 639.0, 587.0, 582.0, 587.0, 633.0, 630.0, 630.0, 567.0, 587.0, 636.0, 630.0, 582.0, 639.0, 633.0, 630.0, 576.0, 630.0, 624.0, 627.0, 639.0, 636.0, 636.0, 636.0, 587.0, 633.0, 582.0, 630.0, 636.0, 633.0, 624.0, 630.0, 624.0, 582.0, 633.0, 570.0, 579.0, 633.0, 630.0, 633.0, 630.0, 587.0, 570.0, 633.0, 633.0, 633.0, 627.0, 633.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 298.0, 314.0, 319.0, 294.0, 288.0, 317.0, 322.0, 297.0, 282.0, 285.0, 294.0, 311.0, 319.0, 319.0, 311.0, 315.0, 309.0, 319.0, 317.0, 291.0, 291.0, 316.0, 314.0, 319.0, 311.0, 316.0, 317.0, 307.0, 308.0, 319.0, 308.0, 316.0, 314.0, 296.0, 291.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 324.0, 312.0, 316.0, 314.0, 319.0, 317.0, 313.0, 308.0, 291.0, 296.0, 311.0, 322.0, 293.0, 289.0, 279.0, 300.0, 311.0, 322.0, 316.0, 314.0, 276.0, 276.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 297.0, 285.0, 316.0, 314.0, 290.0, 291.0, 293.0, 289.0, 296.0, 286.0, 321.0, 315.0, 319.0, 314.0, 319.0, 314.0, 285.0, 276.0, 316.0, 314.0, 295.0, 283.0, 319.0, 314.0, 288.0, 294.0, 294.0, 288.0, 283.0, 299.0, 281.0, 298.0, 288.0, 296.0, 317.0, 322.0, 296.0, 291.0, 289.0, 293.0, 288.0, 299.0, 321.0, 312.0, 313.0, 317.0, 319.0, 311.0, 284.0, 283.0, 302.0, 285.0, 314.0, 322.0, 313.0, 317.0, 291.0, 291.0, 317.0, 322.0, 309.0, 324.0, 316.0, 314.0, 291.0, 285.0, 316.0, 314.0, 313.0, 311.0, 316.0, 311.0, 319.0, 320.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 295.0, 292.0, 316.0, 317.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 316.0, 308.0, 311.0, 319.0, 315.0, 309.0, 288.0, 294.0, 316.0, 317.0, 282.0, 288.0, 294.0, 285.0, 319.0, 314.0, 316.0, 314.0, 313.0, 320.0, 316.0, 314.0, 291.0, 296.0, 296.0, 274.0, 321.0, 312.0, 316.0, 317.0, 317.0, 316.0, 316.0, 311.0, 316.0, 317.0, 311.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7725761312773625, "mean_processing_ms": 0.2230967942471684, "mean_inference_ms": 1.3747759311690726}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11280000, "num_steps_sampled": 6016000, "sample_time_ms": 21879.975, "load_time_ms": 38.961, "grad_time_ms": 10890.541, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0007060763309709728, "policy_loss": -0.0072075664065778255, "vf_loss": 70.60037231445312, "vf_explained_var": 0.7745871543884277, "kl": 0.0018414078513160348, "entropy": 1.1170852184295654, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6016000, "episodes_total": 15040, "training_iteration": 470, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-49-49", "timestamp": 1660261789, "time_this_iter_s": 31.930355072021484, "time_total_s": 20197.83767437935, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20197.83767437935, "timesteps_since_restore": 6016000, "iterations_since_restore": 470, "perf": {"cpu_util_percent": 29.486666666666665, "ram_util_percent": 59.02666666666667}}
-{"episode_reward_max": 639.0, "episode_reward_min": 552.0, "episode_reward_mean": 617.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 274.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 308.805}, "custom_metrics": {"sparse_reward_mean": 214.6, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 188.41, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.03, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.33, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.42, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.8, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.14, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.42, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.42, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 630.0, 587.0, 624.0, 633.0, 636.0, 630.0, 633.0, 633.0, 639.0, 633.0, 636.0, 636.0, 633.0, 627.0, 582.0, 633.0, 639.0, 584.0, 582.0, 636.0, 618.0, 636.0, 633.0, 627.0, 582.0, 582.0, 582.0, 633.0, 636.0, 630.0, 582.0, 639.0, 633.0, 630.0, 576.0, 630.0, 624.0, 627.0, 639.0, 636.0, 636.0, 636.0, 587.0, 633.0, 582.0, 630.0, 636.0, 633.0, 624.0, 630.0, 624.0, 582.0, 633.0, 570.0, 579.0, 633.0, 630.0, 633.0, 630.0, 587.0, 570.0, 633.0, 633.0, 633.0, 627.0, 633.0, 627.0, 582.0, 633.0, 582.0, 639.0, 579.0, 579.0, 630.0, 630.0, 624.0, 636.0, 582.0, 630.0, 630.0, 633.0, 615.0, 627.0, 630.0, 587.0, 630.0, 630.0, 630.0, 636.0, 630.0, 636.0, 621.0, 587.0, 633.0, 582.0, 579.0, 633.0, 630.0, 552.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 319.0, 311.0, 288.0, 299.0, 311.0, 313.0, 311.0, 322.0, 317.0, 319.0, 313.0, 317.0, 316.0, 317.0, 321.0, 312.0, 317.0, 322.0, 314.0, 319.0, 324.0, 312.0, 324.0, 312.0, 321.0, 312.0, 308.0, 319.0, 296.0, 286.0, 314.0, 319.0, 319.0, 320.0, 296.0, 288.0, 286.0, 296.0, 319.0, 317.0, 308.0, 310.0, 317.0, 319.0, 319.0, 314.0, 321.0, 306.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 291.0, 291.0, 317.0, 322.0, 309.0, 324.0, 316.0, 314.0, 291.0, 285.0, 316.0, 314.0, 313.0, 311.0, 316.0, 311.0, 319.0, 320.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 295.0, 292.0, 316.0, 317.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 316.0, 308.0, 311.0, 319.0, 315.0, 309.0, 288.0, 294.0, 316.0, 317.0, 282.0, 288.0, 294.0, 285.0, 319.0, 314.0, 316.0, 314.0, 313.0, 320.0, 316.0, 314.0, 291.0, 296.0, 296.0, 274.0, 321.0, 312.0, 316.0, 317.0, 317.0, 316.0, 316.0, 311.0, 316.0, 317.0, 311.0, 316.0, 284.0, 298.0, 314.0, 319.0, 294.0, 288.0, 317.0, 322.0, 297.0, 282.0, 285.0, 294.0, 311.0, 319.0, 319.0, 311.0, 315.0, 309.0, 319.0, 317.0, 291.0, 291.0, 316.0, 314.0, 319.0, 311.0, 316.0, 317.0, 307.0, 308.0, 319.0, 308.0, 316.0, 314.0, 296.0, 291.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 324.0, 312.0, 316.0, 314.0, 319.0, 317.0, 313.0, 308.0, 291.0, 296.0, 311.0, 322.0, 293.0, 289.0, 279.0, 300.0, 311.0, 322.0, 316.0, 314.0, 276.0, 276.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7719288658166613, "mean_processing_ms": 0.22297329438160504, "mean_inference_ms": 1.3741316094375031}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11304000, "num_steps_sampled": 6028800, "sample_time_ms": 21780.977, "load_time_ms": 38.854, "grad_time_ms": 10976.386, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0016910168342292309, "policy_loss": -0.008838978596031666, "vf_loss": 77.08248901367188, "vf_explained_var": 0.7691299319267273, "kl": 0.0020619730930775404, "entropy": 1.1205838918685913, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6028800, "episodes_total": 15072, "training_iteration": 471, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-50-24", "timestamp": 1660261824, "time_this_iter_s": 35.09460806846619, "time_total_s": 20232.932282447815, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20232.932282447815, "timesteps_since_restore": 6028800, "iterations_since_restore": 471, "perf": {"cpu_util_percent": 30.86, "ram_util_percent": 59.02799999999999}}
-{"episode_reward_max": 639.0, "episode_reward_min": 552.0, "episode_reward_mean": 617.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 276.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.91}, "custom_metrics": {"sparse_reward_mean": 214.6, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 188.62, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.21, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.26, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.51, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.49, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.59, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.78, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.62, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.19, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.22, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.59, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.78, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.59, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.78, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 582.0, 630.0, 624.0, 615.0, 582.0, 582.0, 633.0, 630.0, 633.0, 639.0, 582.0, 633.0, 621.0, 633.0, 636.0, 636.0, 636.0, 633.0, 633.0, 581.0, 633.0, 630.0, 582.0, 627.0, 584.0, 636.0, 582.0, 630.0, 636.0, 639.0, 633.0, 627.0, 633.0, 627.0, 582.0, 633.0, 582.0, 639.0, 579.0, 579.0, 630.0, 630.0, 624.0, 636.0, 582.0, 630.0, 630.0, 633.0, 615.0, 627.0, 630.0, 587.0, 630.0, 630.0, 630.0, 636.0, 630.0, 636.0, 621.0, 587.0, 633.0, 582.0, 579.0, 633.0, 630.0, 552.0, 579.0, 630.0, 587.0, 624.0, 633.0, 636.0, 630.0, 633.0, 633.0, 639.0, 633.0, 636.0, 636.0, 633.0, 627.0, 582.0, 633.0, 639.0, 584.0, 582.0, 636.0, 618.0, 636.0, 633.0, 627.0, 582.0, 582.0, 582.0, 633.0, 636.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 314.0, 319.0, 294.0, 288.0, 316.0, 314.0, 316.0, 308.0, 305.0, 310.0, 291.0, 291.0, 287.0, 295.0, 311.0, 322.0, 316.0, 314.0, 317.0, 316.0, 319.0, 320.0, 293.0, 289.0, 316.0, 317.0, 310.0, 311.0, 314.0, 319.0, 309.0, 327.0, 319.0, 317.0, 314.0, 322.0, 311.0, 322.0, 319.0, 314.0, 293.0, 288.0, 317.0, 316.0, 309.0, 321.0, 296.0, 286.0, 314.0, 313.0, 298.0, 286.0, 317.0, 319.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 323.0, 317.0, 316.0, 316.0, 311.0, 316.0, 317.0, 311.0, 316.0, 284.0, 298.0, 314.0, 319.0, 294.0, 288.0, 317.0, 322.0, 297.0, 282.0, 285.0, 294.0, 311.0, 319.0, 319.0, 311.0, 315.0, 309.0, 319.0, 317.0, 291.0, 291.0, 316.0, 314.0, 319.0, 311.0, 316.0, 317.0, 307.0, 308.0, 319.0, 308.0, 316.0, 314.0, 296.0, 291.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 324.0, 312.0, 316.0, 314.0, 319.0, 317.0, 313.0, 308.0, 291.0, 296.0, 311.0, 322.0, 293.0, 289.0, 279.0, 300.0, 311.0, 322.0, 316.0, 314.0, 276.0, 276.0, 286.0, 293.0, 319.0, 311.0, 288.0, 299.0, 311.0, 313.0, 311.0, 322.0, 317.0, 319.0, 313.0, 317.0, 316.0, 317.0, 321.0, 312.0, 317.0, 322.0, 314.0, 319.0, 324.0, 312.0, 324.0, 312.0, 321.0, 312.0, 308.0, 319.0, 296.0, 286.0, 314.0, 319.0, 319.0, 320.0, 296.0, 288.0, 286.0, 296.0, 319.0, 317.0, 308.0, 310.0, 317.0, 319.0, 319.0, 314.0, 321.0, 306.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7712881392644176, "mean_processing_ms": 0.2228504081573419, "mean_inference_ms": 1.3735625457302265}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11328000, "num_steps_sampled": 6041600, "sample_time_ms": 21798.833, "load_time_ms": 39.052, "grad_time_ms": 10918.523, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015959719894453883, "policy_loss": -0.005134529899805784, "vf_loss": 72.90253448486328, "vf_explained_var": 0.7736382484436035, "kl": 0.00227816472761333, "entropy": 1.1195167303085327, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6041600, "episodes_total": 15104, "training_iteration": 472, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-50-57", "timestamp": 1660261857, "time_this_iter_s": 32.47214722633362, "time_total_s": 20265.40442967415, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20265.40442967415, "timesteps_since_restore": 6041600, "iterations_since_restore": 472, "perf": {"cpu_util_percent": 31.686956521739138, "ram_util_percent": 58.99347826086958}}
-{"episode_reward_max": 639.0, "episode_reward_min": 498.0, "episode_reward_mean": 616.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 242.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.235}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.07, "shaped_reward_min": 138, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.18, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.56, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.6, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.78, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.59, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.16, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.91, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.6, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.78, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.6, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.78, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 582.0, 630.0, 609.0, 630.0, 636.0, 582.0, 636.0, 633.0, 633.0, 558.0, 630.0, 633.0, 630.0, 630.0, 636.0, 630.0, 636.0, 582.0, 587.0, 627.0, 639.0, 636.0, 587.0, 630.0, 627.0, 630.0, 498.0, 579.0, 630.0, 630.0, 579.0, 633.0, 630.0, 552.0, 579.0, 630.0, 587.0, 624.0, 633.0, 636.0, 630.0, 633.0, 633.0, 639.0, 633.0, 636.0, 636.0, 633.0, 627.0, 582.0, 633.0, 639.0, 584.0, 582.0, 636.0, 618.0, 636.0, 633.0, 627.0, 582.0, 582.0, 582.0, 633.0, 636.0, 630.0, 582.0, 633.0, 633.0, 582.0, 630.0, 624.0, 615.0, 582.0, 582.0, 633.0, 630.0, 633.0, 639.0, 582.0, 633.0, 621.0, 633.0, 636.0, 636.0, 636.0, 633.0, 633.0, 581.0, 633.0, 630.0, 582.0, 627.0, 584.0, 636.0, 582.0, 630.0, 636.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 311.0, 291.0, 291.0, 294.0, 288.0, 316.0, 314.0, 304.0, 305.0, 316.0, 314.0, 319.0, 317.0, 286.0, 296.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 276.0, 282.0, 316.0, 314.0, 314.0, 319.0, 324.0, 306.0, 319.0, 311.0, 319.0, 317.0, 316.0, 314.0, 314.0, 322.0, 294.0, 288.0, 296.0, 291.0, 308.0, 319.0, 319.0, 320.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0, 322.0, 305.0, 316.0, 314.0, 242.0, 256.0, 288.0, 291.0, 313.0, 317.0, 316.0, 314.0, 279.0, 300.0, 311.0, 322.0, 316.0, 314.0, 276.0, 276.0, 286.0, 293.0, 319.0, 311.0, 288.0, 299.0, 311.0, 313.0, 311.0, 322.0, 317.0, 319.0, 313.0, 317.0, 316.0, 317.0, 321.0, 312.0, 317.0, 322.0, 314.0, 319.0, 324.0, 312.0, 324.0, 312.0, 321.0, 312.0, 308.0, 319.0, 296.0, 286.0, 314.0, 319.0, 319.0, 320.0, 296.0, 288.0, 286.0, 296.0, 319.0, 317.0, 308.0, 310.0, 317.0, 319.0, 319.0, 314.0, 321.0, 306.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 291.0, 291.0, 314.0, 319.0, 314.0, 319.0, 294.0, 288.0, 316.0, 314.0, 316.0, 308.0, 305.0, 310.0, 291.0, 291.0, 287.0, 295.0, 311.0, 322.0, 316.0, 314.0, 317.0, 316.0, 319.0, 320.0, 293.0, 289.0, 316.0, 317.0, 310.0, 311.0, 314.0, 319.0, 309.0, 327.0, 319.0, 317.0, 314.0, 322.0, 311.0, 322.0, 319.0, 314.0, 293.0, 288.0, 317.0, 316.0, 309.0, 321.0, 296.0, 286.0, 314.0, 313.0, 298.0, 286.0, 317.0, 319.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 323.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7706549385151754, "mean_processing_ms": 0.22272985988411015, "mean_inference_ms": 1.3730067691447254}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11352000, "num_steps_sampled": 6054400, "sample_time_ms": 21729.717, "load_time_ms": 39.098, "grad_time_ms": 10717.617, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0005160535220056772, "policy_loss": -0.007616788614541292, "vf_loss": 76.61554718017578, "vf_explained_var": 0.7677715420722961, "kl": 0.0017990797059610486, "entropy": 1.1216602325439453, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6054400, "episodes_total": 15136, "training_iteration": 473, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-51-29", "timestamp": 1660261889, "time_this_iter_s": 32.178860902786255, "time_total_s": 20297.583290576935, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20297.583290576935, "timesteps_since_restore": 6054400, "iterations_since_restore": 473, "perf": {"cpu_util_percent": 29.955555555555556, "ram_util_percent": 59.05111111111111}}
-{"episode_reward_max": 639.0, "episode_reward_min": 498.0, "episode_reward_mean": 616.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 242.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.225}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.05, "shaped_reward_min": 138, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.93, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.38, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.6, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.41, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.94, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.67, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.89, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.41, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.94, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.41, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.94, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 579.0, 636.0, 639.0, 633.0, 582.0, 579.0, 627.0, 636.0, 639.0, 636.0, 630.0, 573.0, 558.0, 573.0, 636.0, 579.0, 630.0, 633.0, 630.0, 633.0, 579.0, 636.0, 630.0, 630.0, 630.0, 587.0, 624.0, 633.0, 639.0, 582.0, 636.0, 633.0, 636.0, 630.0, 582.0, 633.0, 633.0, 582.0, 630.0, 624.0, 615.0, 582.0, 582.0, 633.0, 630.0, 633.0, 639.0, 582.0, 633.0, 621.0, 633.0, 636.0, 636.0, 636.0, 633.0, 633.0, 581.0, 633.0, 630.0, 582.0, 627.0, 584.0, 636.0, 582.0, 630.0, 636.0, 639.0, 630.0, 582.0, 582.0, 630.0, 609.0, 630.0, 636.0, 582.0, 636.0, 633.0, 633.0, 558.0, 630.0, 633.0, 630.0, 630.0, 636.0, 630.0, 636.0, 582.0, 587.0, 627.0, 639.0, 636.0, 587.0, 630.0, 627.0, 630.0, 498.0, 579.0, 630.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 309.0, 285.0, 294.0, 311.0, 325.0, 319.0, 320.0, 319.0, 314.0, 291.0, 291.0, 287.0, 292.0, 313.0, 314.0, 324.0, 312.0, 324.0, 315.0, 319.0, 317.0, 311.0, 319.0, 282.0, 291.0, 278.0, 280.0, 280.0, 293.0, 319.0, 317.0, 288.0, 291.0, 311.0, 319.0, 319.0, 314.0, 317.0, 313.0, 316.0, 317.0, 293.0, 286.0, 314.0, 322.0, 321.0, 309.0, 326.0, 304.0, 316.0, 314.0, 293.0, 294.0, 305.0, 319.0, 316.0, 317.0, 314.0, 325.0, 285.0, 297.0, 319.0, 317.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 291.0, 291.0, 314.0, 319.0, 314.0, 319.0, 294.0, 288.0, 316.0, 314.0, 316.0, 308.0, 305.0, 310.0, 291.0, 291.0, 287.0, 295.0, 311.0, 322.0, 316.0, 314.0, 317.0, 316.0, 319.0, 320.0, 293.0, 289.0, 316.0, 317.0, 310.0, 311.0, 314.0, 319.0, 309.0, 327.0, 319.0, 317.0, 314.0, 322.0, 311.0, 322.0, 319.0, 314.0, 293.0, 288.0, 317.0, 316.0, 309.0, 321.0, 296.0, 286.0, 314.0, 313.0, 298.0, 286.0, 317.0, 319.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 323.0, 319.0, 311.0, 291.0, 291.0, 294.0, 288.0, 316.0, 314.0, 304.0, 305.0, 316.0, 314.0, 319.0, 317.0, 286.0, 296.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 276.0, 282.0, 316.0, 314.0, 314.0, 319.0, 324.0, 306.0, 319.0, 311.0, 319.0, 317.0, 316.0, 314.0, 314.0, 322.0, 294.0, 288.0, 296.0, 291.0, 308.0, 319.0, 319.0, 320.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0, 322.0, 305.0, 316.0, 314.0, 242.0, 256.0, 288.0, 291.0, 313.0, 317.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7700230204234435, "mean_processing_ms": 0.22260903764962753, "mean_inference_ms": 1.3724136807505178}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11376000, "num_steps_sampled": 6067200, "sample_time_ms": 21448.766, "load_time_ms": 39.222, "grad_time_ms": 10552.154, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001221023383550346, "policy_loss": -0.005727085750550032, "vf_loss": 75.09713745117188, "vf_explained_var": 0.7705094218254089, "kl": 0.002081832615658641, "entropy": 1.1232246160507202, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6067200, "episodes_total": 15168, "training_iteration": 474, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-52-02", "timestamp": 1660261922, "time_this_iter_s": 32.74073004722595, "time_total_s": 20330.32402062416, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20330.32402062416, "timesteps_since_restore": 6067200, "iterations_since_restore": 474, "perf": {"cpu_util_percent": 29.70652173913044, "ram_util_percent": 59.01521739130436}}
-{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 616.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 308.16}, "custom_metrics": {"sparse_reward_mean": 214.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 188.32, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.74, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.52, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.14, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.67, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.22, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.12, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.83, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.86, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.95, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.7, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.22, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.12, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.22, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.12, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 587.0, 633.0, 633.0, 630.0, 639.0, 639.0, 624.0, 630.0, 582.0, 408.0, 636.0, 579.0, 636.0, 633.0, 633.0, 636.0, 639.0, 627.0, 633.0, 636.0, 630.0, 639.0, 630.0, 587.0, 630.0, 636.0, 587.0, 639.0, 630.0, 633.0, 633.0, 582.0, 630.0, 636.0, 639.0, 630.0, 582.0, 582.0, 630.0, 609.0, 630.0, 636.0, 582.0, 636.0, 633.0, 633.0, 558.0, 630.0, 633.0, 630.0, 630.0, 636.0, 630.0, 636.0, 582.0, 587.0, 627.0, 639.0, 636.0, 587.0, 630.0, 627.0, 630.0, 498.0, 579.0, 630.0, 630.0, 630.0, 579.0, 636.0, 639.0, 633.0, 582.0, 579.0, 627.0, 636.0, 639.0, 636.0, 630.0, 573.0, 558.0, 573.0, 636.0, 579.0, 630.0, 633.0, 630.0, 633.0, 579.0, 636.0, 630.0, 630.0, 630.0, 587.0, 624.0, 633.0, 639.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 293.0, 294.0, 313.0, 320.0, 319.0, 314.0, 316.0, 314.0, 322.0, 317.0, 322.0, 317.0, 313.0, 311.0, 311.0, 319.0, 291.0, 291.0, 208.0, 200.0, 314.0, 322.0, 284.0, 295.0, 319.0, 317.0, 313.0, 320.0, 311.0, 322.0, 319.0, 317.0, 322.0, 317.0, 313.0, 314.0, 322.0, 311.0, 319.0, 317.0, 321.0, 309.0, 319.0, 320.0, 318.0, 312.0, 293.0, 294.0, 311.0, 319.0, 319.0, 317.0, 288.0, 299.0, 322.0, 317.0, 316.0, 314.0, 317.0, 316.0, 319.0, 314.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 323.0, 319.0, 311.0, 291.0, 291.0, 294.0, 288.0, 316.0, 314.0, 304.0, 305.0, 316.0, 314.0, 319.0, 317.0, 286.0, 296.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 276.0, 282.0, 316.0, 314.0, 314.0, 319.0, 324.0, 306.0, 319.0, 311.0, 319.0, 317.0, 316.0, 314.0, 314.0, 322.0, 294.0, 288.0, 296.0, 291.0, 308.0, 319.0, 319.0, 320.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0, 322.0, 305.0, 316.0, 314.0, 242.0, 256.0, 288.0, 291.0, 313.0, 317.0, 316.0, 314.0, 321.0, 309.0, 285.0, 294.0, 311.0, 325.0, 319.0, 320.0, 319.0, 314.0, 291.0, 291.0, 287.0, 292.0, 313.0, 314.0, 324.0, 312.0, 324.0, 315.0, 319.0, 317.0, 311.0, 319.0, 282.0, 291.0, 278.0, 280.0, 280.0, 293.0, 319.0, 317.0, 288.0, 291.0, 311.0, 319.0, 319.0, 314.0, 317.0, 313.0, 316.0, 317.0, 293.0, 286.0, 314.0, 322.0, 321.0, 309.0, 326.0, 304.0, 316.0, 314.0, 293.0, 294.0, 305.0, 319.0, 316.0, 317.0, 314.0, 325.0, 285.0, 297.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7693923762268673, "mean_processing_ms": 0.22248851172311768, "mean_inference_ms": 1.3718337576179396}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11400000, "num_steps_sampled": 6080000, "sample_time_ms": 21471.253, "load_time_ms": 39.181, "grad_time_ms": 10674.814, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001056800247170031, "policy_loss": -0.005863674450665712, "vf_loss": 74.7898178100586, "vf_explained_var": 0.7796471118927002, "kl": 0.002407137770205736, "entropy": 1.1170334815979004, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6080000, "episodes_total": 15200, "training_iteration": 475, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-52-36", "timestamp": 1660261956, "time_this_iter_s": 34.37432289123535, "time_total_s": 20364.698343515396, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20364.698343515396, "timesteps_since_restore": 6080000, "iterations_since_restore": 475, "perf": {"cpu_util_percent": 34.573469387755104, "ram_util_percent": 58.995918367346945}}
-{"episode_reward_max": 639.0, "episode_reward_min": 291.0, "episode_reward_mean": 611.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 305.56}, "custom_metrics": {"sparse_reward_mean": 212.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 187.12, "shaped_reward_min": 91, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.5, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.97, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.55, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.07, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.06, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.82, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.66, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.98, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.69, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.07, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.06, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.07, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.06, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 582.0, 636.0, 633.0, 636.0, 291.0, 639.0, 576.0, 587.0, 630.0, 587.0, 633.0, 621.0, 582.0, 630.0, 584.0, 465.0, 636.0, 582.0, 630.0, 636.0, 636.0, 630.0, 636.0, 624.0, 579.0, 624.0, 633.0, 636.0, 582.0, 636.0, 498.0, 579.0, 630.0, 630.0, 630.0, 579.0, 636.0, 639.0, 633.0, 582.0, 579.0, 627.0, 636.0, 639.0, 636.0, 630.0, 573.0, 558.0, 573.0, 636.0, 579.0, 630.0, 633.0, 630.0, 633.0, 579.0, 636.0, 630.0, 630.0, 630.0, 587.0, 624.0, 633.0, 639.0, 582.0, 636.0, 633.0, 587.0, 633.0, 633.0, 630.0, 639.0, 639.0, 624.0, 630.0, 582.0, 408.0, 636.0, 579.0, 636.0, 633.0, 633.0, 636.0, 639.0, 627.0, 633.0, 636.0, 630.0, 639.0, 630.0, 587.0, 630.0, 636.0, 587.0, 639.0, 630.0, 633.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 324.0, 312.0, 294.0, 288.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 145.0, 146.0, 319.0, 320.0, 292.0, 284.0, 293.0, 294.0, 313.0, 317.0, 296.0, 291.0, 318.0, 315.0, 313.0, 308.0, 296.0, 286.0, 314.0, 316.0, 298.0, 286.0, 228.0, 237.0, 319.0, 317.0, 296.0, 286.0, 313.0, 317.0, 327.0, 309.0, 322.0, 314.0, 316.0, 314.0, 319.0, 317.0, 311.0, 313.0, 283.0, 296.0, 310.0, 314.0, 316.0, 317.0, 319.0, 317.0, 289.0, 293.0, 319.0, 317.0, 242.0, 256.0, 288.0, 291.0, 313.0, 317.0, 316.0, 314.0, 321.0, 309.0, 285.0, 294.0, 311.0, 325.0, 319.0, 320.0, 319.0, 314.0, 291.0, 291.0, 287.0, 292.0, 313.0, 314.0, 324.0, 312.0, 324.0, 315.0, 319.0, 317.0, 311.0, 319.0, 282.0, 291.0, 278.0, 280.0, 280.0, 293.0, 319.0, 317.0, 288.0, 291.0, 311.0, 319.0, 319.0, 314.0, 317.0, 313.0, 316.0, 317.0, 293.0, 286.0, 314.0, 322.0, 321.0, 309.0, 326.0, 304.0, 316.0, 314.0, 293.0, 294.0, 305.0, 319.0, 316.0, 317.0, 314.0, 325.0, 285.0, 297.0, 319.0, 317.0, 316.0, 317.0, 293.0, 294.0, 313.0, 320.0, 319.0, 314.0, 316.0, 314.0, 322.0, 317.0, 322.0, 317.0, 313.0, 311.0, 311.0, 319.0, 291.0, 291.0, 208.0, 200.0, 314.0, 322.0, 284.0, 295.0, 319.0, 317.0, 313.0, 320.0, 311.0, 322.0, 319.0, 317.0, 322.0, 317.0, 313.0, 314.0, 322.0, 311.0, 319.0, 317.0, 321.0, 309.0, 319.0, 320.0, 318.0, 312.0, 293.0, 294.0, 311.0, 319.0, 319.0, 317.0, 288.0, 299.0, 322.0, 317.0, 316.0, 314.0, 317.0, 316.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7687649833136294, "mean_processing_ms": 0.2223678201524863, "mean_inference_ms": 1.3712901278888552}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11424000, "num_steps_sampled": 6092800, "sample_time_ms": 21822.996, "load_time_ms": 39.16, "grad_time_ms": 10774.782, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006356429657898843, "policy_loss": -0.006720335688441992, "vf_loss": 79.15064239501953, "vf_explained_var": 0.7751259207725525, "kl": 0.0025446319486945868, "entropy": 1.1181851625442505, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6092800, "episodes_total": 15232, "training_iteration": 476, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-53-09", "timestamp": 1660261989, "time_this_iter_s": 32.92729115486145, "time_total_s": 20397.625634670258, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20397.625634670258, "timesteps_since_restore": 6092800, "iterations_since_restore": 476, "perf": {"cpu_util_percent": 31.27608695652173, "ram_util_percent": 59.04347826086958}}
-{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 607.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.615}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.83, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.48, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.44, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.99, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.43, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.02, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.98, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.76, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.98, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.94, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.02, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.98, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.02, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.98, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 636.0, 578.0, 639.0, 582.0, 576.0, 587.0, 636.0, 582.0, 533.0, 633.0, 636.0, 630.0, 630.0, 579.0, 587.0, 630.0, 639.0, 639.0, 579.0, 636.0, 180.0, 587.0, 633.0, 630.0, 627.0, 587.0, 633.0, 633.0, 582.0, 630.0, 633.0, 639.0, 582.0, 636.0, 633.0, 587.0, 633.0, 633.0, 630.0, 639.0, 639.0, 624.0, 630.0, 582.0, 408.0, 636.0, 579.0, 636.0, 633.0, 633.0, 636.0, 639.0, 627.0, 633.0, 636.0, 630.0, 639.0, 630.0, 587.0, 630.0, 636.0, 587.0, 639.0, 630.0, 633.0, 633.0, 630.0, 636.0, 582.0, 636.0, 633.0, 636.0, 291.0, 639.0, 576.0, 587.0, 630.0, 587.0, 633.0, 621.0, 582.0, 630.0, 584.0, 465.0, 636.0, 582.0, 630.0, 636.0, 636.0, 630.0, 636.0, 624.0, 579.0, 624.0, 633.0, 636.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 319.0, 317.0, 316.0, 320.0, 279.0, 299.0, 319.0, 320.0, 293.0, 289.0, 291.0, 285.0, 293.0, 294.0, 314.0, 322.0, 288.0, 294.0, 254.0, 279.0, 319.0, 314.0, 319.0, 317.0, 317.0, 313.0, 311.0, 319.0, 282.0, 297.0, 295.0, 292.0, 316.0, 314.0, 319.0, 320.0, 316.0, 323.0, 288.0, 291.0, 324.0, 312.0, 91.0, 89.0, 296.0, 291.0, 319.0, 314.0, 311.0, 319.0, 316.0, 311.0, 306.0, 281.0, 321.0, 312.0, 316.0, 317.0, 288.0, 294.0, 311.0, 319.0, 316.0, 317.0, 314.0, 325.0, 285.0, 297.0, 319.0, 317.0, 316.0, 317.0, 293.0, 294.0, 313.0, 320.0, 319.0, 314.0, 316.0, 314.0, 322.0, 317.0, 322.0, 317.0, 313.0, 311.0, 311.0, 319.0, 291.0, 291.0, 208.0, 200.0, 314.0, 322.0, 284.0, 295.0, 319.0, 317.0, 313.0, 320.0, 311.0, 322.0, 319.0, 317.0, 322.0, 317.0, 313.0, 314.0, 322.0, 311.0, 319.0, 317.0, 321.0, 309.0, 319.0, 320.0, 318.0, 312.0, 293.0, 294.0, 311.0, 319.0, 319.0, 317.0, 288.0, 299.0, 322.0, 317.0, 316.0, 314.0, 317.0, 316.0, 319.0, 314.0, 311.0, 319.0, 324.0, 312.0, 294.0, 288.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 145.0, 146.0, 319.0, 320.0, 292.0, 284.0, 293.0, 294.0, 313.0, 317.0, 296.0, 291.0, 318.0, 315.0, 313.0, 308.0, 296.0, 286.0, 314.0, 316.0, 298.0, 286.0, 228.0, 237.0, 319.0, 317.0, 296.0, 286.0, 313.0, 317.0, 327.0, 309.0, 322.0, 314.0, 316.0, 314.0, 319.0, 317.0, 311.0, 313.0, 283.0, 296.0, 310.0, 314.0, 316.0, 317.0, 319.0, 317.0, 289.0, 293.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7681470637203944, "mean_processing_ms": 0.22224947461581387, "mean_inference_ms": 1.3708820792528533}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11448000, "num_steps_sampled": 6105600, "sample_time_ms": 22237.355, "load_time_ms": 38.878, "grad_time_ms": 10934.933, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003096085973083973, "policy_loss": -0.004263754468411207, "vf_loss": 79.16039276123047, "vf_explained_var": 0.7912160754203796, "kl": 0.001874853391200304, "entropy": 1.1124080419540405, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6105600, "episodes_total": 15264, "training_iteration": 477, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-53-45", "timestamp": 1660262025, "time_this_iter_s": 35.63289189338684, "time_total_s": 20433.258526563644, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20433.258526563644, "timesteps_since_restore": 6105600, "iterations_since_restore": 477, "perf": {"cpu_util_percent": 34.068627450980394, "ram_util_percent": 59.11960784313726}}
-{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 606.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.34}, "custom_metrics": {"sparse_reward_mean": 210.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.68, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.64, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.09, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.87, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.63, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.87, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.87, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 627.0, 633.0, 561.0, 636.0, 630.0, 525.0, 633.0, 636.0, 636.0, 582.0, 636.0, 636.0, 630.0, 633.0, 636.0, 639.0, 630.0, 636.0, 587.0, 582.0, 636.0, 544.0, 633.0, 636.0, 630.0, 639.0, 584.0, 636.0, 579.0, 576.0, 633.0, 639.0, 630.0, 633.0, 633.0, 630.0, 636.0, 582.0, 636.0, 633.0, 636.0, 291.0, 639.0, 576.0, 587.0, 630.0, 587.0, 633.0, 621.0, 582.0, 630.0, 584.0, 465.0, 636.0, 582.0, 630.0, 636.0, 636.0, 630.0, 636.0, 624.0, 579.0, 624.0, 633.0, 636.0, 582.0, 636.0, 630.0, 636.0, 636.0, 578.0, 639.0, 582.0, 576.0, 587.0, 636.0, 582.0, 533.0, 633.0, 636.0, 630.0, 630.0, 579.0, 587.0, 630.0, 639.0, 639.0, 579.0, 636.0, 180.0, 587.0, 633.0, 630.0, 627.0, 587.0, 633.0, 633.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 319.0, 308.0, 316.0, 317.0, 279.0, 282.0, 316.0, 320.0, 316.0, 314.0, 259.0, 266.0, 314.0, 319.0, 319.0, 317.0, 322.0, 314.0, 291.0, 291.0, 319.0, 317.0, 319.0, 317.0, 319.0, 311.0, 313.0, 320.0, 316.0, 320.0, 317.0, 322.0, 305.0, 325.0, 313.0, 323.0, 294.0, 293.0, 291.0, 291.0, 319.0, 317.0, 271.0, 273.0, 311.0, 322.0, 314.0, 322.0, 316.0, 314.0, 320.0, 319.0, 285.0, 299.0, 321.0, 315.0, 294.0, 285.0, 285.0, 291.0, 319.0, 314.0, 322.0, 317.0, 316.0, 314.0, 317.0, 316.0, 319.0, 314.0, 311.0, 319.0, 324.0, 312.0, 294.0, 288.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 145.0, 146.0, 319.0, 320.0, 292.0, 284.0, 293.0, 294.0, 313.0, 317.0, 296.0, 291.0, 318.0, 315.0, 313.0, 308.0, 296.0, 286.0, 314.0, 316.0, 298.0, 286.0, 228.0, 237.0, 319.0, 317.0, 296.0, 286.0, 313.0, 317.0, 327.0, 309.0, 322.0, 314.0, 316.0, 314.0, 319.0, 317.0, 311.0, 313.0, 283.0, 296.0, 310.0, 314.0, 316.0, 317.0, 319.0, 317.0, 289.0, 293.0, 319.0, 317.0, 313.0, 317.0, 319.0, 317.0, 316.0, 320.0, 279.0, 299.0, 319.0, 320.0, 293.0, 289.0, 291.0, 285.0, 293.0, 294.0, 314.0, 322.0, 288.0, 294.0, 254.0, 279.0, 319.0, 314.0, 319.0, 317.0, 317.0, 313.0, 311.0, 319.0, 282.0, 297.0, 295.0, 292.0, 316.0, 314.0, 319.0, 320.0, 316.0, 323.0, 288.0, 291.0, 324.0, 312.0, 91.0, 89.0, 296.0, 291.0, 319.0, 314.0, 311.0, 319.0, 316.0, 311.0, 306.0, 281.0, 321.0, 312.0, 316.0, 317.0, 288.0, 294.0, 311.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7675359476043465, "mean_processing_ms": 0.22213291684157827, "mean_inference_ms": 1.3705566142110346}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11472000, "num_steps_sampled": 6118400, "sample_time_ms": 22722.857, "load_time_ms": 39.067, "grad_time_ms": 10783.352, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003133426944259554, "policy_loss": -0.007279651705175638, "vf_loss": 75.2169418334961, "vf_explained_var": 0.7742903232574463, "kl": 0.0020874079782515764, "entropy": 1.110769271850586, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6118400, "episodes_total": 15296, "training_iteration": 478, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-54-19", "timestamp": 1660262059, "time_this_iter_s": 34.45740509033203, "time_total_s": 20467.715931653976, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20467.715931653976, "timesteps_since_restore": 6118400, "iterations_since_restore": 478, "perf": {"cpu_util_percent": 28.777083333333334, "ram_util_percent": 59.04374999999999}}
-{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 608.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.12}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.64, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.8, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.38, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.19, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.84, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.51, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.96, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.78, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.84, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.84, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [606.0, 582.0, 636.0, 563.0, 615.0, 627.0, 582.0, 633.0, 587.0, 627.0, 630.0, 579.0, 630.0, 633.0, 462.0, 630.0, 582.0, 633.0, 630.0, 633.0, 636.0, 636.0, 584.0, 633.0, 582.0, 636.0, 633.0, 630.0, 582.0, 630.0, 573.0, 627.0, 633.0, 636.0, 582.0, 636.0, 630.0, 636.0, 636.0, 578.0, 639.0, 582.0, 576.0, 587.0, 636.0, 582.0, 533.0, 633.0, 636.0, 630.0, 630.0, 579.0, 587.0, 630.0, 639.0, 639.0, 579.0, 636.0, 180.0, 587.0, 633.0, 630.0, 627.0, 587.0, 633.0, 633.0, 582.0, 630.0, 630.0, 627.0, 633.0, 561.0, 636.0, 630.0, 525.0, 633.0, 636.0, 636.0, 582.0, 636.0, 636.0, 630.0, 633.0, 636.0, 639.0, 630.0, 636.0, 587.0, 582.0, 636.0, 544.0, 633.0, 636.0, 630.0, 639.0, 584.0, 636.0, 579.0, 576.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [302.0, 304.0, 289.0, 293.0, 319.0, 317.0, 284.0, 279.0, 305.0, 310.0, 319.0, 308.0, 294.0, 288.0, 319.0, 314.0, 293.0, 294.0, 316.0, 311.0, 321.0, 309.0, 283.0, 296.0, 311.0, 319.0, 314.0, 319.0, 239.0, 223.0, 316.0, 314.0, 288.0, 294.0, 319.0, 314.0, 319.0, 311.0, 311.0, 322.0, 316.0, 320.0, 314.0, 322.0, 307.0, 277.0, 314.0, 319.0, 288.0, 294.0, 319.0, 317.0, 316.0, 317.0, 313.0, 317.0, 288.0, 294.0, 310.0, 320.0, 292.0, 281.0, 321.0, 306.0, 316.0, 317.0, 319.0, 317.0, 289.0, 293.0, 319.0, 317.0, 313.0, 317.0, 319.0, 317.0, 316.0, 320.0, 279.0, 299.0, 319.0, 320.0, 293.0, 289.0, 291.0, 285.0, 293.0, 294.0, 314.0, 322.0, 288.0, 294.0, 254.0, 279.0, 319.0, 314.0, 319.0, 317.0, 317.0, 313.0, 311.0, 319.0, 282.0, 297.0, 295.0, 292.0, 316.0, 314.0, 319.0, 320.0, 316.0, 323.0, 288.0, 291.0, 324.0, 312.0, 91.0, 89.0, 296.0, 291.0, 319.0, 314.0, 311.0, 319.0, 316.0, 311.0, 306.0, 281.0, 321.0, 312.0, 316.0, 317.0, 288.0, 294.0, 311.0, 319.0, 316.0, 314.0, 319.0, 308.0, 316.0, 317.0, 279.0, 282.0, 316.0, 320.0, 316.0, 314.0, 259.0, 266.0, 314.0, 319.0, 319.0, 317.0, 322.0, 314.0, 291.0, 291.0, 319.0, 317.0, 319.0, 317.0, 319.0, 311.0, 313.0, 320.0, 316.0, 320.0, 317.0, 322.0, 305.0, 325.0, 313.0, 323.0, 294.0, 293.0, 291.0, 291.0, 319.0, 317.0, 271.0, 273.0, 311.0, 322.0, 314.0, 322.0, 316.0, 314.0, 320.0, 319.0, 285.0, 299.0, 321.0, 315.0, 294.0, 285.0, 285.0, 291.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7669199085026867, "mean_processing_ms": 0.22201572097443972, "mean_inference_ms": 1.3701586168616826}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11496000, "num_steps_sampled": 6131200, "sample_time_ms": 22699.68, "load_time_ms": 39.016, "grad_time_ms": 10734.597, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008129358175210655, "policy_loss": -0.006242450326681137, "vf_loss": 76.11907196044922, "vf_explained_var": 0.7632293701171875, "kl": 0.0021639217156916857, "entropy": 1.1130343675613403, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6131200, "episodes_total": 15328, "training_iteration": 479, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-54-53", "timestamp": 1660262093, "time_this_iter_s": 33.185157775878906, "time_total_s": 20500.901089429855, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20500.901089429855, "timesteps_since_restore": 6131200, "iterations_since_restore": 479, "perf": {"cpu_util_percent": 30.472340425531915, "ram_util_percent": 58.97021276595746}}
-{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 613.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 306.835}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.07, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.9, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.41, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.34, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.56, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.41, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.9, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.05, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.74, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.41, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.9, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.41, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.9, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 630.0, 579.0, 639.0, 636.0, 639.0, 639.0, 587.0, 636.0, 587.0, 627.0, 633.0, 576.0, 582.0, 633.0, 636.0, 630.0, 630.0, 633.0, 636.0, 525.0, 630.0, 582.0, 587.0, 639.0, 627.0, 582.0, 582.0, 633.0, 636.0, 630.0, 633.0, 633.0, 582.0, 630.0, 630.0, 627.0, 633.0, 561.0, 636.0, 630.0, 525.0, 633.0, 636.0, 636.0, 582.0, 636.0, 636.0, 630.0, 633.0, 636.0, 639.0, 630.0, 636.0, 587.0, 582.0, 636.0, 544.0, 633.0, 636.0, 630.0, 639.0, 584.0, 636.0, 579.0, 576.0, 633.0, 606.0, 582.0, 636.0, 563.0, 615.0, 627.0, 582.0, 633.0, 587.0, 627.0, 630.0, 579.0, 630.0, 633.0, 462.0, 630.0, 582.0, 633.0, 630.0, 633.0, 636.0, 636.0, 584.0, 633.0, 582.0, 636.0, 633.0, 630.0, 582.0, 630.0, 573.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 322.0, 319.0, 314.0, 308.0, 322.0, 296.0, 283.0, 319.0, 320.0, 316.0, 320.0, 317.0, 322.0, 319.0, 320.0, 288.0, 299.0, 316.0, 320.0, 301.0, 286.0, 313.0, 314.0, 317.0, 316.0, 291.0, 285.0, 286.0, 296.0, 319.0, 314.0, 319.0, 317.0, 316.0, 314.0, 313.0, 317.0, 316.0, 317.0, 314.0, 322.0, 257.0, 268.0, 316.0, 314.0, 296.0, 286.0, 296.0, 291.0, 319.0, 320.0, 305.0, 322.0, 293.0, 289.0, 294.0, 288.0, 308.0, 325.0, 319.0, 317.0, 313.0, 317.0, 321.0, 312.0, 316.0, 317.0, 288.0, 294.0, 311.0, 319.0, 316.0, 314.0, 319.0, 308.0, 316.0, 317.0, 279.0, 282.0, 316.0, 320.0, 316.0, 314.0, 259.0, 266.0, 314.0, 319.0, 319.0, 317.0, 322.0, 314.0, 291.0, 291.0, 319.0, 317.0, 319.0, 317.0, 319.0, 311.0, 313.0, 320.0, 316.0, 320.0, 317.0, 322.0, 305.0, 325.0, 313.0, 323.0, 294.0, 293.0, 291.0, 291.0, 319.0, 317.0, 271.0, 273.0, 311.0, 322.0, 314.0, 322.0, 316.0, 314.0, 320.0, 319.0, 285.0, 299.0, 321.0, 315.0, 294.0, 285.0, 285.0, 291.0, 319.0, 314.0, 302.0, 304.0, 289.0, 293.0, 319.0, 317.0, 284.0, 279.0, 305.0, 310.0, 319.0, 308.0, 294.0, 288.0, 319.0, 314.0, 293.0, 294.0, 316.0, 311.0, 321.0, 309.0, 283.0, 296.0, 311.0, 319.0, 314.0, 319.0, 239.0, 223.0, 316.0, 314.0, 288.0, 294.0, 319.0, 314.0, 319.0, 311.0, 311.0, 322.0, 316.0, 320.0, 314.0, 322.0, 307.0, 277.0, 314.0, 319.0, 288.0, 294.0, 319.0, 317.0, 316.0, 317.0, 313.0, 317.0, 288.0, 294.0, 310.0, 320.0, 292.0, 281.0, 321.0, 306.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7662995807176626, "mean_processing_ms": 0.22189831707550936, "mean_inference_ms": 1.3696437835161013}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11520000, "num_steps_sampled": 6144000, "sample_time_ms": 22732.007, "load_time_ms": 38.46, "grad_time_ms": 10636.502, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033598102163523436, "policy_loss": -0.003906731028109789, "vf_loss": 78.25418090820312, "vf_explained_var": 0.768868625164032, "kl": 0.0016973327146843076, "entropy": 1.117727279663086, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6144000, "episodes_total": 15360, "training_iteration": 480, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-55-24", "timestamp": 1660262124, "time_this_iter_s": 31.27005410194397, "time_total_s": 20532.1711435318, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20532.1711435318, "timesteps_since_restore": 6144000, "iterations_since_restore": 480, "perf": {"cpu_util_percent": 31.795555555555556, "ram_util_percent": 59.01333333333335}}
-{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 611.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 305.56}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.72, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.73, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.38, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.18, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.57, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.29, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.29, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.29, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [578.0, 582.0, 630.0, 587.0, 570.0, 587.0, 630.0, 612.0, 624.0, 633.0, 570.0, 587.0, 582.0, 630.0, 587.0, 636.0, 636.0, 582.0, 552.0, 636.0, 584.0, 633.0, 590.0, 624.0, 630.0, 630.0, 636.0, 609.0, 630.0, 633.0, 633.0, 636.0, 636.0, 579.0, 576.0, 633.0, 606.0, 582.0, 636.0, 563.0, 615.0, 627.0, 582.0, 633.0, 587.0, 627.0, 630.0, 579.0, 630.0, 633.0, 462.0, 630.0, 582.0, 633.0, 630.0, 633.0, 636.0, 636.0, 584.0, 633.0, 582.0, 636.0, 633.0, 630.0, 582.0, 630.0, 573.0, 627.0, 633.0, 633.0, 630.0, 579.0, 639.0, 636.0, 639.0, 639.0, 587.0, 636.0, 587.0, 627.0, 633.0, 576.0, 582.0, 633.0, 636.0, 630.0, 630.0, 633.0, 636.0, 525.0, 630.0, 582.0, 587.0, 639.0, 627.0, 582.0, 582.0, 633.0, 636.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 283.0, 288.0, 294.0, 313.0, 317.0, 290.0, 297.0, 284.0, 286.0, 299.0, 288.0, 315.0, 315.0, 304.0, 308.0, 317.0, 307.0, 311.0, 322.0, 295.0, 275.0, 296.0, 291.0, 293.0, 289.0, 316.0, 314.0, 288.0, 299.0, 319.0, 317.0, 314.0, 322.0, 291.0, 291.0, 279.0, 273.0, 324.0, 312.0, 291.0, 293.0, 319.0, 314.0, 296.0, 294.0, 311.0, 313.0, 311.0, 319.0, 313.0, 317.0, 319.0, 317.0, 301.0, 308.0, 316.0, 314.0, 311.0, 322.0, 316.0, 317.0, 319.0, 317.0, 321.0, 315.0, 294.0, 285.0, 285.0, 291.0, 319.0, 314.0, 302.0, 304.0, 289.0, 293.0, 319.0, 317.0, 284.0, 279.0, 305.0, 310.0, 319.0, 308.0, 294.0, 288.0, 319.0, 314.0, 293.0, 294.0, 316.0, 311.0, 321.0, 309.0, 283.0, 296.0, 311.0, 319.0, 314.0, 319.0, 239.0, 223.0, 316.0, 314.0, 288.0, 294.0, 319.0, 314.0, 319.0, 311.0, 311.0, 322.0, 316.0, 320.0, 314.0, 322.0, 307.0, 277.0, 314.0, 319.0, 288.0, 294.0, 319.0, 317.0, 316.0, 317.0, 313.0, 317.0, 288.0, 294.0, 310.0, 320.0, 292.0, 281.0, 321.0, 306.0, 311.0, 322.0, 319.0, 314.0, 308.0, 322.0, 296.0, 283.0, 319.0, 320.0, 316.0, 320.0, 317.0, 322.0, 319.0, 320.0, 288.0, 299.0, 316.0, 320.0, 301.0, 286.0, 313.0, 314.0, 317.0, 316.0, 291.0, 285.0, 286.0, 296.0, 319.0, 314.0, 319.0, 317.0, 316.0, 314.0, 313.0, 317.0, 316.0, 317.0, 314.0, 322.0, 257.0, 268.0, 316.0, 314.0, 296.0, 286.0, 296.0, 291.0, 319.0, 320.0, 305.0, 322.0, 293.0, 289.0, 294.0, 288.0, 308.0, 325.0, 319.0, 317.0, 313.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7656785099973358, "mean_processing_ms": 0.2217805887765104, "mean_inference_ms": 1.3690072686883668}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11544000, "num_steps_sampled": 6156800, "sample_time_ms": 22631.205, "load_time_ms": 38.401, "grad_time_ms": 10438.368, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -1.2905181392852683e-05, "policy_loss": -0.007116043474525213, "vf_loss": 76.6054458618164, "vf_explained_var": 0.7700133323669434, "kl": 0.0019200993701815605, "entropy": 1.1147748231887817, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6156800, "episodes_total": 15392, "training_iteration": 481, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-55-56", "timestamp": 1660262156, "time_this_iter_s": 32.10103392601013, "time_total_s": 20564.27217745781, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20564.27217745781, "timesteps_since_restore": 6156800, "iterations_since_restore": 481, "perf": {"cpu_util_percent": 31.702222222222222, "ram_util_percent": 59.035555555555575}}
-{"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 613.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 306.58}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.56, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.59, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.53, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.04, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.63, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.08, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.7, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.08, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.08, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 587.0, 630.0, 621.0, 639.0, 636.0, 630.0, 630.0, 639.0, 561.0, 633.0, 630.0, 627.0, 582.0, 582.0, 636.0, 587.0, 636.0, 630.0, 636.0, 513.0, 576.0, 633.0, 633.0, 630.0, 636.0, 582.0, 587.0, 633.0, 624.0, 633.0, 582.0, 630.0, 573.0, 627.0, 633.0, 633.0, 630.0, 579.0, 639.0, 636.0, 639.0, 639.0, 587.0, 636.0, 587.0, 627.0, 633.0, 576.0, 582.0, 633.0, 636.0, 630.0, 630.0, 633.0, 636.0, 525.0, 630.0, 582.0, 587.0, 639.0, 627.0, 582.0, 582.0, 633.0, 636.0, 630.0, 578.0, 582.0, 630.0, 587.0, 570.0, 587.0, 630.0, 612.0, 624.0, 633.0, 570.0, 587.0, 582.0, 630.0, 587.0, 636.0, 636.0, 582.0, 552.0, 636.0, 584.0, 633.0, 590.0, 624.0, 630.0, 630.0, 636.0, 609.0, 630.0, 633.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 323.0, 313.0, 317.0, 293.0, 294.0, 319.0, 311.0, 308.0, 313.0, 319.0, 320.0, 314.0, 322.0, 316.0, 314.0, 311.0, 319.0, 317.0, 322.0, 273.0, 288.0, 316.0, 317.0, 316.0, 314.0, 313.0, 314.0, 291.0, 291.0, 291.0, 291.0, 314.0, 322.0, 298.0, 289.0, 314.0, 322.0, 316.0, 314.0, 324.0, 312.0, 249.0, 264.0, 280.0, 296.0, 316.0, 317.0, 316.0, 317.0, 311.0, 319.0, 319.0, 317.0, 288.0, 294.0, 296.0, 291.0, 316.0, 317.0, 308.0, 316.0, 319.0, 314.0, 288.0, 294.0, 310.0, 320.0, 292.0, 281.0, 321.0, 306.0, 311.0, 322.0, 319.0, 314.0, 308.0, 322.0, 296.0, 283.0, 319.0, 320.0, 316.0, 320.0, 317.0, 322.0, 319.0, 320.0, 288.0, 299.0, 316.0, 320.0, 301.0, 286.0, 313.0, 314.0, 317.0, 316.0, 291.0, 285.0, 286.0, 296.0, 319.0, 314.0, 319.0, 317.0, 316.0, 314.0, 313.0, 317.0, 316.0, 317.0, 314.0, 322.0, 257.0, 268.0, 316.0, 314.0, 296.0, 286.0, 296.0, 291.0, 319.0, 320.0, 305.0, 322.0, 293.0, 289.0, 294.0, 288.0, 308.0, 325.0, 319.0, 317.0, 313.0, 317.0, 295.0, 283.0, 288.0, 294.0, 313.0, 317.0, 290.0, 297.0, 284.0, 286.0, 299.0, 288.0, 315.0, 315.0, 304.0, 308.0, 317.0, 307.0, 311.0, 322.0, 295.0, 275.0, 296.0, 291.0, 293.0, 289.0, 316.0, 314.0, 288.0, 299.0, 319.0, 317.0, 314.0, 322.0, 291.0, 291.0, 279.0, 273.0, 324.0, 312.0, 291.0, 293.0, 319.0, 314.0, 296.0, 294.0, 311.0, 313.0, 311.0, 319.0, 313.0, 317.0, 319.0, 317.0, 301.0, 308.0, 316.0, 314.0, 311.0, 322.0, 316.0, 317.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7650611031190503, "mean_processing_ms": 0.2216638185152556, "mean_inference_ms": 1.3683447229242562}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11568000, "num_steps_sampled": 6169600, "sample_time_ms": 22476.544, "load_time_ms": 38.386, "grad_time_ms": 10500.152, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 5.255569703876972e-05, "policy_loss": -0.006667418871074915, "vf_loss": 72.75797271728516, "vf_explained_var": 0.775852620601654, "kl": 0.0019747635815292597, "entropy": 1.111660122871399, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6169600, "episodes_total": 15424, "training_iteration": 482, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-56-28", "timestamp": 1660262188, "time_this_iter_s": 31.53903889656067, "time_total_s": 20595.81121635437, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20595.81121635437, "timesteps_since_restore": 6169600, "iterations_since_restore": 482, "perf": {"cpu_util_percent": 34.43555555555556, "ram_util_percent": 58.98222222222224}}
-{"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 614.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.265}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.73, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.81, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.41, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.57, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.31, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.98, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.68, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.41, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.31, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.98, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.31, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.98, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 582.0, 627.0, 633.0, 636.0, 630.0, 582.0, 627.0, 630.0, 579.0, 630.0, 630.0, 633.0, 630.0, 630.0, 633.0, 530.0, 636.0, 633.0, 633.0, 576.0, 633.0, 636.0, 630.0, 587.0, 633.0, 633.0, 581.0, 587.0, 633.0, 630.0, 636.0, 582.0, 633.0, 636.0, 630.0, 578.0, 582.0, 630.0, 587.0, 570.0, 587.0, 630.0, 612.0, 624.0, 633.0, 570.0, 587.0, 582.0, 630.0, 587.0, 636.0, 636.0, 582.0, 552.0, 636.0, 584.0, 633.0, 590.0, 624.0, 630.0, 630.0, 636.0, 609.0, 630.0, 633.0, 633.0, 636.0, 636.0, 630.0, 587.0, 630.0, 621.0, 639.0, 636.0, 630.0, 630.0, 639.0, 561.0, 633.0, 630.0, 627.0, 582.0, 582.0, 636.0, 587.0, 636.0, 630.0, 636.0, 513.0, 576.0, 633.0, 633.0, 630.0, 636.0, 582.0, 587.0, 633.0, 624.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 314.0, 293.0, 289.0, 313.0, 314.0, 314.0, 319.0, 319.0, 317.0, 316.0, 314.0, 281.0, 301.0, 313.0, 314.0, 322.0, 308.0, 291.0, 288.0, 311.0, 319.0, 318.0, 312.0, 319.0, 314.0, 311.0, 319.0, 316.0, 314.0, 316.0, 317.0, 268.0, 262.0, 316.0, 320.0, 316.0, 317.0, 309.0, 324.0, 288.0, 288.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 296.0, 291.0, 318.0, 315.0, 318.0, 315.0, 296.0, 285.0, 293.0, 294.0, 309.0, 324.0, 316.0, 314.0, 319.0, 317.0, 294.0, 288.0, 308.0, 325.0, 319.0, 317.0, 313.0, 317.0, 295.0, 283.0, 288.0, 294.0, 313.0, 317.0, 290.0, 297.0, 284.0, 286.0, 299.0, 288.0, 315.0, 315.0, 304.0, 308.0, 317.0, 307.0, 311.0, 322.0, 295.0, 275.0, 296.0, 291.0, 293.0, 289.0, 316.0, 314.0, 288.0, 299.0, 319.0, 317.0, 314.0, 322.0, 291.0, 291.0, 279.0, 273.0, 324.0, 312.0, 291.0, 293.0, 319.0, 314.0, 296.0, 294.0, 311.0, 313.0, 311.0, 319.0, 313.0, 317.0, 319.0, 317.0, 301.0, 308.0, 316.0, 314.0, 311.0, 322.0, 316.0, 317.0, 319.0, 317.0, 313.0, 323.0, 313.0, 317.0, 293.0, 294.0, 319.0, 311.0, 308.0, 313.0, 319.0, 320.0, 314.0, 322.0, 316.0, 314.0, 311.0, 319.0, 317.0, 322.0, 273.0, 288.0, 316.0, 317.0, 316.0, 314.0, 313.0, 314.0, 291.0, 291.0, 291.0, 291.0, 314.0, 322.0, 298.0, 289.0, 314.0, 322.0, 316.0, 314.0, 324.0, 312.0, 249.0, 264.0, 280.0, 296.0, 316.0, 317.0, 316.0, 317.0, 311.0, 319.0, 319.0, 317.0, 288.0, 294.0, 296.0, 291.0, 316.0, 317.0, 308.0, 316.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.764446149993761, "mean_processing_ms": 0.22154654084728279, "mean_inference_ms": 1.3676326972645958}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11592000, "num_steps_sampled": 6182400, "sample_time_ms": 22364.392, "load_time_ms": 38.739, "grad_time_ms": 10348.578, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.000403035432100296, "policy_loss": -0.006678896490484476, "vf_loss": 76.43026733398438, "vf_explained_var": 0.76324862241745, "kl": 0.0020988413598388433, "entropy": 1.1221919059753418, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6182400, "episodes_total": 15456, "training_iteration": 483, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-56-57", "timestamp": 1660262217, "time_this_iter_s": 29.545005083084106, "time_total_s": 20625.356221437454, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20625.356221437454, "timesteps_since_restore": 6182400, "iterations_since_restore": 483, "perf": {"cpu_util_percent": 31.23658536585366, "ram_util_percent": 59.02682926829268}}
-{"episode_reward_max": 639.0, "episode_reward_min": 120.0, "episode_reward_mean": 614.09, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 54.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 307.045}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 188.09, "shaped_reward_min": 40, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.92, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.26, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.35, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.38, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.83, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.59, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.91, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.73, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.16, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.38, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.83, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.38, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.83, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 636.0, 582.0, 582.0, 633.0, 630.0, 636.0, 627.0, 633.0, 587.0, 633.0, 639.0, 630.0, 587.0, 582.0, 636.0, 630.0, 636.0, 636.0, 630.0, 636.0, 120.0, 636.0, 630.0, 633.0, 633.0, 633.0, 630.0, 630.0, 630.0, 639.0, 630.0, 633.0, 633.0, 636.0, 636.0, 630.0, 587.0, 630.0, 621.0, 639.0, 636.0, 630.0, 630.0, 639.0, 561.0, 633.0, 630.0, 627.0, 582.0, 582.0, 636.0, 587.0, 636.0, 630.0, 636.0, 513.0, 576.0, 633.0, 633.0, 630.0, 636.0, 582.0, 587.0, 633.0, 624.0, 633.0, 636.0, 582.0, 627.0, 633.0, 636.0, 630.0, 582.0, 627.0, 630.0, 579.0, 630.0, 630.0, 633.0, 630.0, 630.0, 633.0, 530.0, 636.0, 633.0, 633.0, 576.0, 633.0, 636.0, 630.0, 587.0, 633.0, 633.0, 581.0, 587.0, 633.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 301.0, 286.0, 324.0, 312.0, 289.0, 293.0, 291.0, 291.0, 313.0, 320.0, 321.0, 309.0, 319.0, 317.0, 316.0, 311.0, 319.0, 314.0, 291.0, 296.0, 319.0, 314.0, 319.0, 320.0, 316.0, 314.0, 291.0, 296.0, 293.0, 289.0, 314.0, 322.0, 311.0, 319.0, 316.0, 320.0, 314.0, 322.0, 319.0, 311.0, 319.0, 317.0, 66.0, 54.0, 314.0, 322.0, 310.0, 320.0, 314.0, 319.0, 319.0, 314.0, 322.0, 311.0, 308.0, 322.0, 311.0, 319.0, 311.0, 319.0, 317.0, 322.0, 316.0, 314.0, 311.0, 322.0, 316.0, 317.0, 319.0, 317.0, 313.0, 323.0, 313.0, 317.0, 293.0, 294.0, 319.0, 311.0, 308.0, 313.0, 319.0, 320.0, 314.0, 322.0, 316.0, 314.0, 311.0, 319.0, 317.0, 322.0, 273.0, 288.0, 316.0, 317.0, 316.0, 314.0, 313.0, 314.0, 291.0, 291.0, 291.0, 291.0, 314.0, 322.0, 298.0, 289.0, 314.0, 322.0, 316.0, 314.0, 324.0, 312.0, 249.0, 264.0, 280.0, 296.0, 316.0, 317.0, 316.0, 317.0, 311.0, 319.0, 319.0, 317.0, 288.0, 294.0, 296.0, 291.0, 316.0, 317.0, 308.0, 316.0, 319.0, 314.0, 322.0, 314.0, 293.0, 289.0, 313.0, 314.0, 314.0, 319.0, 319.0, 317.0, 316.0, 314.0, 281.0, 301.0, 313.0, 314.0, 322.0, 308.0, 291.0, 288.0, 311.0, 319.0, 318.0, 312.0, 319.0, 314.0, 311.0, 319.0, 316.0, 314.0, 316.0, 317.0, 268.0, 262.0, 316.0, 320.0, 316.0, 317.0, 309.0, 324.0, 288.0, 288.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 296.0, 291.0, 318.0, 315.0, 318.0, 315.0, 296.0, 285.0, 293.0, 294.0, 309.0, 324.0, 316.0, 314.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7638319008211926, "mean_processing_ms": 0.22142837816064478, "mean_inference_ms": 1.3669160139559398}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11616000, "num_steps_sampled": 6195200, "sample_time_ms": 22342.562, "load_time_ms": 38.969, "grad_time_ms": 10165.472, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003674185834825039, "policy_loss": -0.0030232470016926527, "vf_loss": 72.50147247314453, "vf_explained_var": 0.7972453236579895, "kl": 0.002131336135789752, "entropy": 1.1054468154907227, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6195200, "episodes_total": 15488, "training_iteration": 484, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-57-28", "timestamp": 1660262248, "time_this_iter_s": 30.70011305809021, "time_total_s": 20656.056334495544, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20656.056334495544, "timesteps_since_restore": 6195200, "iterations_since_restore": 484, "perf": {"cpu_util_percent": 29.970454545454547, "ram_util_percent": 59.07727272727273}}
-{"episode_reward_max": 639.0, "episode_reward_min": 120.0, "episode_reward_mean": 616.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 54.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.095}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 188.99, "shaped_reward_min": 40, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.99, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.25, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.54, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.01, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.78, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.54, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.54, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 636.0, 639.0, 590.0, 630.0, 639.0, 639.0, 630.0, 636.0, 636.0, 639.0, 633.0, 636.0, 582.0, 636.0, 621.0, 630.0, 633.0, 630.0, 633.0, 576.0, 582.0, 636.0, 630.0, 630.0, 587.0, 630.0, 630.0, 636.0, 633.0, 633.0, 633.0, 587.0, 633.0, 624.0, 633.0, 636.0, 582.0, 627.0, 633.0, 636.0, 630.0, 582.0, 627.0, 630.0, 579.0, 630.0, 630.0, 633.0, 630.0, 630.0, 633.0, 530.0, 636.0, 633.0, 633.0, 576.0, 633.0, 636.0, 630.0, 587.0, 633.0, 633.0, 581.0, 587.0, 633.0, 630.0, 636.0, 582.0, 587.0, 636.0, 582.0, 582.0, 633.0, 630.0, 636.0, 627.0, 633.0, 587.0, 633.0, 639.0, 630.0, 587.0, 582.0, 636.0, 630.0, 636.0, 636.0, 630.0, 636.0, 120.0, 636.0, 630.0, 633.0, 633.0, 633.0, 630.0, 630.0, 630.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 294.0, 319.0, 317.0, 319.0, 320.0, 299.0, 291.0, 316.0, 314.0, 319.0, 320.0, 324.0, 315.0, 316.0, 314.0, 314.0, 322.0, 324.0, 312.0, 317.0, 322.0, 314.0, 319.0, 314.0, 322.0, 286.0, 296.0, 317.0, 319.0, 308.0, 313.0, 313.0, 317.0, 319.0, 314.0, 321.0, 309.0, 319.0, 314.0, 299.0, 277.0, 293.0, 289.0, 314.0, 322.0, 311.0, 319.0, 316.0, 314.0, 288.0, 299.0, 311.0, 319.0, 316.0, 314.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 327.0, 306.0, 296.0, 291.0, 316.0, 317.0, 308.0, 316.0, 319.0, 314.0, 322.0, 314.0, 293.0, 289.0, 313.0, 314.0, 314.0, 319.0, 319.0, 317.0, 316.0, 314.0, 281.0, 301.0, 313.0, 314.0, 322.0, 308.0, 291.0, 288.0, 311.0, 319.0, 318.0, 312.0, 319.0, 314.0, 311.0, 319.0, 316.0, 314.0, 316.0, 317.0, 268.0, 262.0, 316.0, 320.0, 316.0, 317.0, 309.0, 324.0, 288.0, 288.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 296.0, 291.0, 318.0, 315.0, 318.0, 315.0, 296.0, 285.0, 293.0, 294.0, 309.0, 324.0, 316.0, 314.0, 319.0, 317.0, 290.0, 292.0, 301.0, 286.0, 324.0, 312.0, 289.0, 293.0, 291.0, 291.0, 313.0, 320.0, 321.0, 309.0, 319.0, 317.0, 316.0, 311.0, 319.0, 314.0, 291.0, 296.0, 319.0, 314.0, 319.0, 320.0, 316.0, 314.0, 291.0, 296.0, 293.0, 289.0, 314.0, 322.0, 311.0, 319.0, 316.0, 320.0, 314.0, 322.0, 319.0, 311.0, 319.0, 317.0, 66.0, 54.0, 314.0, 322.0, 310.0, 320.0, 314.0, 319.0, 319.0, 314.0, 322.0, 311.0, 308.0, 322.0, 311.0, 319.0, 311.0, 319.0, 317.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7632241989034774, "mean_processing_ms": 0.22131123162111455, "mean_inference_ms": 1.3662700284091551}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11640000, "num_steps_sampled": 6208000, "sample_time_ms": 22316.513, "load_time_ms": 39.137, "grad_time_ms": 10131.169, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0029844159726053476, "policy_loss": -0.003889852436259389, "vf_loss": 74.29019165039062, "vf_explained_var": 0.7697036862373352, "kl": 0.0019323105225339532, "entropy": 1.1094969511032104, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6208000, "episodes_total": 15520, "training_iteration": 485, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-58-02", "timestamp": 1660262282, "time_this_iter_s": 33.77160096168518, "time_total_s": 20689.82793545723, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20689.82793545723, "timesteps_since_restore": 6208000, "iterations_since_restore": 485, "perf": {"cpu_util_percent": 30.51875, "ram_util_percent": 58.97291666666666}}
-{"episode_reward_max": 639.0, "episode_reward_min": 120.0, "episode_reward_mean": 613.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 54.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.905}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 188.61, "shaped_reward_min": 40, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.93, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.16, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.21, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.61, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.46, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.02, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.16, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.79, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.16, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.61, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.61, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 576.0, 636.0, 621.0, 636.0, 636.0, 636.0, 576.0, 587.0, 633.0, 633.0, 582.0, 522.0, 633.0, 579.0, 636.0, 639.0, 582.0, 633.0, 576.0, 627.0, 636.0, 587.0, 639.0, 630.0, 636.0, 579.0, 582.0, 636.0, 582.0, 579.0, 633.0, 587.0, 633.0, 630.0, 636.0, 582.0, 587.0, 636.0, 582.0, 582.0, 633.0, 630.0, 636.0, 627.0, 633.0, 587.0, 633.0, 639.0, 630.0, 587.0, 582.0, 636.0, 630.0, 636.0, 636.0, 630.0, 636.0, 120.0, 636.0, 630.0, 633.0, 633.0, 633.0, 630.0, 630.0, 630.0, 639.0, 579.0, 636.0, 639.0, 590.0, 630.0, 639.0, 639.0, 630.0, 636.0, 636.0, 639.0, 633.0, 636.0, 582.0, 636.0, 621.0, 630.0, 633.0, 630.0, 633.0, 576.0, 582.0, 636.0, 630.0, 630.0, 587.0, 630.0, 630.0, 636.0, 633.0, 633.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 294.0, 282.0, 317.0, 319.0, 310.0, 311.0, 319.0, 317.0, 319.0, 317.0, 309.0, 327.0, 293.0, 283.0, 293.0, 294.0, 311.0, 322.0, 314.0, 319.0, 290.0, 292.0, 262.0, 260.0, 316.0, 317.0, 286.0, 293.0, 317.0, 319.0, 322.0, 317.0, 291.0, 291.0, 319.0, 314.0, 282.0, 294.0, 313.0, 314.0, 320.0, 316.0, 296.0, 291.0, 322.0, 317.0, 306.0, 324.0, 314.0, 322.0, 285.0, 294.0, 289.0, 293.0, 319.0, 317.0, 285.0, 297.0, 283.0, 296.0, 314.0, 319.0, 293.0, 294.0, 309.0, 324.0, 316.0, 314.0, 319.0, 317.0, 290.0, 292.0, 301.0, 286.0, 324.0, 312.0, 289.0, 293.0, 291.0, 291.0, 313.0, 320.0, 321.0, 309.0, 319.0, 317.0, 316.0, 311.0, 319.0, 314.0, 291.0, 296.0, 319.0, 314.0, 319.0, 320.0, 316.0, 314.0, 291.0, 296.0, 293.0, 289.0, 314.0, 322.0, 311.0, 319.0, 316.0, 320.0, 314.0, 322.0, 319.0, 311.0, 319.0, 317.0, 66.0, 54.0, 314.0, 322.0, 310.0, 320.0, 314.0, 319.0, 319.0, 314.0, 322.0, 311.0, 308.0, 322.0, 311.0, 319.0, 311.0, 319.0, 317.0, 322.0, 285.0, 294.0, 319.0, 317.0, 319.0, 320.0, 299.0, 291.0, 316.0, 314.0, 319.0, 320.0, 324.0, 315.0, 316.0, 314.0, 314.0, 322.0, 324.0, 312.0, 317.0, 322.0, 314.0, 319.0, 314.0, 322.0, 286.0, 296.0, 317.0, 319.0, 308.0, 313.0, 313.0, 317.0, 319.0, 314.0, 321.0, 309.0, 319.0, 314.0, 299.0, 277.0, 293.0, 289.0, 314.0, 322.0, 311.0, 319.0, 316.0, 314.0, 288.0, 299.0, 311.0, 319.0, 316.0, 314.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 327.0, 306.0]}, "sampler_perf": {"mean_env_wait_ms": 0.762623780459851, "mean_processing_ms": 0.2211964092332032, "mean_inference_ms": 1.3657033505370475}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11664000, "num_steps_sampled": 6220800, "sample_time_ms": 22297.013, "load_time_ms": 39.012, "grad_time_ms": 10179.524, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004206617828458548, "policy_loss": -0.003324081189930439, "vf_loss": 80.87010192871094, "vf_explained_var": 0.7655022740364075, "kl": 0.001813961542211473, "entropy": 1.1126155853271484, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6220800, "episodes_total": 15552, "training_iteration": 486, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-58-35", "timestamp": 1660262315, "time_this_iter_s": 33.220837116241455, "time_total_s": 20723.04877257347, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20723.04877257347, "timesteps_since_restore": 6220800, "iterations_since_restore": 486, "perf": {"cpu_util_percent": 32.0404255319149, "ram_util_percent": 59.24255319148937}}
-{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 618.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 309.46}, "custom_metrics": {"sparse_reward_mean": 214.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.72, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.92, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.48, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.4, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.57, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.0, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.42, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.7, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.22, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.57, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.0, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.57, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.0, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 639.0, 633.0, 636.0, 630.0, 587.0, 633.0, 630.0, 630.0, 636.0, 636.0, 587.0, 636.0, 630.0, 633.0, 582.0, 633.0, 633.0, 636.0, 561.0, 630.0, 630.0, 567.0, 639.0, 630.0, 627.0, 627.0, 630.0, 587.0, 633.0, 633.0, 582.0, 630.0, 630.0, 630.0, 639.0, 579.0, 636.0, 639.0, 590.0, 630.0, 639.0, 639.0, 630.0, 636.0, 636.0, 639.0, 633.0, 636.0, 582.0, 636.0, 621.0, 630.0, 633.0, 630.0, 633.0, 576.0, 582.0, 636.0, 630.0, 630.0, 587.0, 630.0, 630.0, 636.0, 633.0, 633.0, 633.0, 630.0, 576.0, 636.0, 621.0, 636.0, 636.0, 636.0, 576.0, 587.0, 633.0, 633.0, 582.0, 522.0, 633.0, 579.0, 636.0, 639.0, 582.0, 633.0, 576.0, 627.0, 636.0, 587.0, 639.0, 630.0, 636.0, 579.0, 582.0, 636.0, 582.0, 579.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 319.0, 320.0, 316.0, 317.0, 322.0, 314.0, 318.0, 312.0, 296.0, 291.0, 316.0, 317.0, 316.0, 314.0, 308.0, 322.0, 319.0, 317.0, 314.0, 322.0, 299.0, 288.0, 324.0, 312.0, 313.0, 317.0, 308.0, 325.0, 288.0, 294.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 277.0, 284.0, 316.0, 314.0, 316.0, 314.0, 278.0, 289.0, 319.0, 320.0, 316.0, 314.0, 305.0, 322.0, 310.0, 317.0, 316.0, 314.0, 296.0, 291.0, 316.0, 317.0, 316.0, 317.0, 288.0, 294.0, 308.0, 322.0, 311.0, 319.0, 311.0, 319.0, 317.0, 322.0, 285.0, 294.0, 319.0, 317.0, 319.0, 320.0, 299.0, 291.0, 316.0, 314.0, 319.0, 320.0, 324.0, 315.0, 316.0, 314.0, 314.0, 322.0, 324.0, 312.0, 317.0, 322.0, 314.0, 319.0, 314.0, 322.0, 286.0, 296.0, 317.0, 319.0, 308.0, 313.0, 313.0, 317.0, 319.0, 314.0, 321.0, 309.0, 319.0, 314.0, 299.0, 277.0, 293.0, 289.0, 314.0, 322.0, 311.0, 319.0, 316.0, 314.0, 288.0, 299.0, 311.0, 319.0, 316.0, 314.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 327.0, 306.0, 313.0, 317.0, 294.0, 282.0, 317.0, 319.0, 310.0, 311.0, 319.0, 317.0, 319.0, 317.0, 309.0, 327.0, 293.0, 283.0, 293.0, 294.0, 311.0, 322.0, 314.0, 319.0, 290.0, 292.0, 262.0, 260.0, 316.0, 317.0, 286.0, 293.0, 317.0, 319.0, 322.0, 317.0, 291.0, 291.0, 319.0, 314.0, 282.0, 294.0, 313.0, 314.0, 320.0, 316.0, 296.0, 291.0, 322.0, 317.0, 306.0, 324.0, 314.0, 322.0, 285.0, 294.0, 289.0, 293.0, 319.0, 317.0, 285.0, 297.0, 283.0, 296.0, 314.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.762033130729778, "mean_processing_ms": 0.22108464880455408, "mean_inference_ms": 1.3652218358269517}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11688000, "num_steps_sampled": 6233600, "sample_time_ms": 22173.711, "load_time_ms": 39.002, "grad_time_ms": 10239.525, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0017484220443293452, "policy_loss": -0.005304198246449232, "vf_loss": 76.0898666381836, "vf_explained_var": 0.767227828502655, "kl": 0.0019641267135739326, "entropy": 1.1127411127090454, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6233600, "episodes_total": 15584, "training_iteration": 487, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-59-10", "timestamp": 1660262350, "time_this_iter_s": 34.99830985069275, "time_total_s": 20758.047082424164, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20758.047082424164, "timesteps_since_restore": 6233600, "iterations_since_restore": 487, "perf": {"cpu_util_percent": 33.78367346938776, "ram_util_percent": 59.60612244897959}}
-{"episode_reward_max": 644.0, "episode_reward_min": 522.0, "episode_reward_mean": 619.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 309.9}, "custom_metrics": {"sparse_reward_mean": 215.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.8, "shaped_reward_min": 160, "shaped_reward_max": 204, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.95, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.48, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.5, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.75, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.57, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.03, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.09, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.15, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.63, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.75, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.57, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.03, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.57, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.03, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 639.0, 639.0, 600.0, 630.0, 639.0, 644.0, 639.0, 633.0, 630.0, 639.0, 630.0, 636.0, 582.0, 630.0, 633.0, 633.0, 633.0, 636.0, 630.0, 630.0, 633.0, 582.0, 636.0, 630.0, 633.0, 630.0, 627.0, 639.0, 582.0, 633.0, 636.0, 636.0, 633.0, 633.0, 633.0, 630.0, 576.0, 636.0, 621.0, 636.0, 636.0, 636.0, 576.0, 587.0, 633.0, 633.0, 582.0, 522.0, 633.0, 579.0, 636.0, 639.0, 582.0, 633.0, 576.0, 627.0, 636.0, 587.0, 639.0, 630.0, 636.0, 579.0, 582.0, 636.0, 582.0, 579.0, 633.0, 636.0, 639.0, 633.0, 636.0, 630.0, 587.0, 633.0, 630.0, 630.0, 636.0, 636.0, 587.0, 636.0, 630.0, 633.0, 582.0, 633.0, 633.0, 636.0, 561.0, 630.0, 630.0, 567.0, 639.0, 630.0, 627.0, 627.0, 630.0, 587.0, 633.0, 633.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 322.0, 317.0, 319.0, 320.0, 301.0, 299.0, 321.0, 309.0, 317.0, 322.0, 319.0, 325.0, 319.0, 320.0, 316.0, 317.0, 316.0, 314.0, 314.0, 325.0, 314.0, 316.0, 314.0, 322.0, 291.0, 291.0, 313.0, 317.0, 314.0, 319.0, 314.0, 319.0, 316.0, 317.0, 319.0, 317.0, 311.0, 319.0, 313.0, 317.0, 319.0, 314.0, 291.0, 291.0, 319.0, 317.0, 311.0, 319.0, 314.0, 319.0, 316.0, 314.0, 310.0, 317.0, 322.0, 317.0, 288.0, 294.0, 316.0, 317.0, 317.0, 319.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 327.0, 306.0, 313.0, 317.0, 294.0, 282.0, 317.0, 319.0, 310.0, 311.0, 319.0, 317.0, 319.0, 317.0, 309.0, 327.0, 293.0, 283.0, 293.0, 294.0, 311.0, 322.0, 314.0, 319.0, 290.0, 292.0, 262.0, 260.0, 316.0, 317.0, 286.0, 293.0, 317.0, 319.0, 322.0, 317.0, 291.0, 291.0, 319.0, 314.0, 282.0, 294.0, 313.0, 314.0, 320.0, 316.0, 296.0, 291.0, 322.0, 317.0, 306.0, 324.0, 314.0, 322.0, 285.0, 294.0, 289.0, 293.0, 319.0, 317.0, 285.0, 297.0, 283.0, 296.0, 314.0, 319.0, 319.0, 317.0, 319.0, 320.0, 316.0, 317.0, 322.0, 314.0, 318.0, 312.0, 296.0, 291.0, 316.0, 317.0, 316.0, 314.0, 308.0, 322.0, 319.0, 317.0, 314.0, 322.0, 299.0, 288.0, 324.0, 312.0, 313.0, 317.0, 308.0, 325.0, 288.0, 294.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 277.0, 284.0, 316.0, 314.0, 316.0, 314.0, 278.0, 289.0, 319.0, 320.0, 316.0, 314.0, 305.0, 322.0, 310.0, 317.0, 316.0, 314.0, 296.0, 291.0, 316.0, 317.0, 316.0, 317.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7614456124606471, "mean_processing_ms": 0.22097346860872114, "mean_inference_ms": 1.3647307081670101}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11712000, "num_steps_sampled": 6246400, "sample_time_ms": 21961.201, "load_time_ms": 38.731, "grad_time_ms": 10224.967, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00313456985168159, "policy_loss": -0.003917869180440903, "vf_loss": 76.04095458984375, "vf_explained_var": 0.7662898898124695, "kl": 0.0023237813729792833, "entropy": 1.1033259630203247, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6246400, "episodes_total": 15616, "training_iteration": 488, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-59-42", "timestamp": 1660262382, "time_this_iter_s": 32.1832549571991, "time_total_s": 20790.230337381363, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20790.230337381363, "timesteps_since_restore": 6246400, "iterations_since_restore": 488, "perf": {"cpu_util_percent": 31.12391304347826, "ram_util_percent": 59.10434782608694}}
-{"episode_reward_max": 644.0, "episode_reward_min": 561.0, "episode_reward_mean": 624.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 277.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 312.325}, "custom_metrics": {"sparse_reward_mean": 216.8, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 191.05, "shaped_reward_min": 160, "shaped_reward_max": 204, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.97, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.65, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.52, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 17.98, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.54, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.21, "potting_onion_agent_1_min": 16, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.33, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.68, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.16, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.54, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.21, "optimal_onion_potting_agent_1_min": 16, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.54, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.21, "viable_onion_potting_agent_1_min": 16, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 636.0, 636.0, 639.0, 639.0, 639.0, 633.0, 639.0, 624.0, 624.0, 636.0, 630.0, 582.0, 636.0, 582.0, 576.0, 639.0, 630.0, 627.0, 630.0, 627.0, 630.0, 630.0, 639.0, 636.0, 636.0, 639.0, 636.0, 636.0, 636.0, 630.0, 636.0, 582.0, 579.0, 633.0, 636.0, 639.0, 633.0, 636.0, 630.0, 587.0, 633.0, 630.0, 630.0, 636.0, 636.0, 587.0, 636.0, 630.0, 633.0, 582.0, 633.0, 633.0, 636.0, 561.0, 630.0, 630.0, 567.0, 639.0, 630.0, 627.0, 627.0, 630.0, 587.0, 633.0, 633.0, 582.0, 579.0, 639.0, 639.0, 600.0, 630.0, 639.0, 644.0, 639.0, 633.0, 630.0, 639.0, 630.0, 636.0, 582.0, 630.0, 633.0, 633.0, 633.0, 636.0, 630.0, 630.0, 633.0, 582.0, 636.0, 630.0, 633.0, 630.0, 627.0, 639.0, 582.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 318.0, 315.0, 319.0, 317.0, 314.0, 322.0, 322.0, 317.0, 319.0, 320.0, 319.0, 320.0, 316.0, 317.0, 327.0, 312.0, 314.0, 310.0, 316.0, 308.0, 316.0, 320.0, 308.0, 322.0, 291.0, 291.0, 319.0, 317.0, 294.0, 288.0, 285.0, 291.0, 319.0, 320.0, 311.0, 319.0, 308.0, 319.0, 319.0, 311.0, 313.0, 314.0, 319.0, 311.0, 316.0, 314.0, 322.0, 317.0, 319.0, 317.0, 316.0, 320.0, 319.0, 320.0, 317.0, 319.0, 324.0, 312.0, 319.0, 317.0, 316.0, 314.0, 319.0, 317.0, 285.0, 297.0, 283.0, 296.0, 314.0, 319.0, 319.0, 317.0, 319.0, 320.0, 316.0, 317.0, 322.0, 314.0, 318.0, 312.0, 296.0, 291.0, 316.0, 317.0, 316.0, 314.0, 308.0, 322.0, 319.0, 317.0, 314.0, 322.0, 299.0, 288.0, 324.0, 312.0, 313.0, 317.0, 308.0, 325.0, 288.0, 294.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 277.0, 284.0, 316.0, 314.0, 316.0, 314.0, 278.0, 289.0, 319.0, 320.0, 316.0, 314.0, 305.0, 322.0, 310.0, 317.0, 316.0, 314.0, 296.0, 291.0, 316.0, 317.0, 316.0, 317.0, 288.0, 294.0, 293.0, 286.0, 322.0, 317.0, 319.0, 320.0, 301.0, 299.0, 321.0, 309.0, 317.0, 322.0, 319.0, 325.0, 319.0, 320.0, 316.0, 317.0, 316.0, 314.0, 314.0, 325.0, 314.0, 316.0, 314.0, 322.0, 291.0, 291.0, 313.0, 317.0, 314.0, 319.0, 314.0, 319.0, 316.0, 317.0, 319.0, 317.0, 311.0, 319.0, 313.0, 317.0, 319.0, 314.0, 291.0, 291.0, 319.0, 317.0, 311.0, 319.0, 314.0, 319.0, 316.0, 314.0, 310.0, 317.0, 322.0, 317.0, 288.0, 294.0, 316.0, 317.0, 317.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7608592858442179, "mean_processing_ms": 0.22086239768906624, "mean_inference_ms": 1.3642056926460657}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11736000, "num_steps_sampled": 6259200, "sample_time_ms": 22026.8, "load_time_ms": 38.825, "grad_time_ms": 10224.17, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0021181919146329165, "policy_loss": -0.004830162972211838, "vf_loss": 74.9923324584961, "vf_explained_var": 0.7754970192909241, "kl": 0.002056455472484231, "entropy": 1.1017627716064453, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6259200, "episodes_total": 15648, "training_iteration": 489, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-00-16", "timestamp": 1660262416, "time_this_iter_s": 33.8461229801178, "time_total_s": 20824.07646036148, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20824.07646036148, "timesteps_since_restore": 6259200, "iterations_since_restore": 489, "perf": {"cpu_util_percent": 34.53541666666667, "ram_util_percent": 59.26458333333333}}
-{"episode_reward_max": 644.0, "episode_reward_min": 575.0, "episode_reward_mean": 626.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 285.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 313.145}, "custom_metrics": {"sparse_reward_mean": 217.2, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 191.89, "shaped_reward_min": 160, "shaped_reward_max": 204, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.1, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.53, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.67, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.99, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.72, "potting_onion_agent_0_min": 14, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.17, "potting_onion_agent_1_min": 16, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.43, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.97, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.24, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.24, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.66, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.21, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.72, "optimal_onion_potting_agent_0_min": 14, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.17, "optimal_onion_potting_agent_1_min": 16, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.72, "viable_onion_potting_agent_0_min": 14, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.17, "viable_onion_potting_agent_1_min": 16, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 636.0, 633.0, 575.0, 636.0, 633.0, 639.0, 636.0, 582.0, 636.0, 639.0, 636.0, 639.0, 579.0, 636.0, 636.0, 624.0, 630.0, 624.0, 639.0, 630.0, 633.0, 633.0, 639.0, 630.0, 633.0, 639.0, 630.0, 587.0, 630.0, 636.0, 587.0, 587.0, 633.0, 633.0, 582.0, 579.0, 639.0, 639.0, 600.0, 630.0, 639.0, 644.0, 639.0, 633.0, 630.0, 639.0, 630.0, 636.0, 582.0, 630.0, 633.0, 633.0, 633.0, 636.0, 630.0, 630.0, 633.0, 582.0, 636.0, 630.0, 633.0, 630.0, 627.0, 639.0, 582.0, 633.0, 636.0, 633.0, 633.0, 636.0, 636.0, 639.0, 639.0, 639.0, 633.0, 639.0, 624.0, 624.0, 636.0, 630.0, 582.0, 636.0, 582.0, 576.0, 639.0, 630.0, 627.0, 630.0, 627.0, 630.0, 630.0, 639.0, 636.0, 636.0, 639.0, 636.0, 636.0, 636.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 319.0, 317.0, 316.0, 317.0, 288.0, 287.0, 319.0, 317.0, 319.0, 314.0, 319.0, 320.0, 316.0, 320.0, 285.0, 297.0, 319.0, 317.0, 317.0, 322.0, 314.0, 322.0, 317.0, 322.0, 288.0, 291.0, 317.0, 319.0, 314.0, 322.0, 310.0, 314.0, 314.0, 316.0, 316.0, 308.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 314.0, 319.0, 319.0, 320.0, 318.0, 312.0, 313.0, 320.0, 315.0, 324.0, 319.0, 311.0, 288.0, 299.0, 316.0, 314.0, 314.0, 322.0, 290.0, 297.0, 296.0, 291.0, 316.0, 317.0, 316.0, 317.0, 288.0, 294.0, 293.0, 286.0, 322.0, 317.0, 319.0, 320.0, 301.0, 299.0, 321.0, 309.0, 317.0, 322.0, 319.0, 325.0, 319.0, 320.0, 316.0, 317.0, 316.0, 314.0, 314.0, 325.0, 314.0, 316.0, 314.0, 322.0, 291.0, 291.0, 313.0, 317.0, 314.0, 319.0, 314.0, 319.0, 316.0, 317.0, 319.0, 317.0, 311.0, 319.0, 313.0, 317.0, 319.0, 314.0, 291.0, 291.0, 319.0, 317.0, 311.0, 319.0, 314.0, 319.0, 316.0, 314.0, 310.0, 317.0, 322.0, 317.0, 288.0, 294.0, 316.0, 317.0, 317.0, 319.0, 314.0, 319.0, 318.0, 315.0, 319.0, 317.0, 314.0, 322.0, 322.0, 317.0, 319.0, 320.0, 319.0, 320.0, 316.0, 317.0, 327.0, 312.0, 314.0, 310.0, 316.0, 308.0, 316.0, 320.0, 308.0, 322.0, 291.0, 291.0, 319.0, 317.0, 294.0, 288.0, 285.0, 291.0, 319.0, 320.0, 311.0, 319.0, 308.0, 319.0, 319.0, 311.0, 313.0, 314.0, 319.0, 311.0, 316.0, 314.0, 322.0, 317.0, 319.0, 317.0, 316.0, 320.0, 319.0, 320.0, 317.0, 319.0, 324.0, 312.0, 319.0, 317.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.760275311275054, "mean_processing_ms": 0.22075194950197713, "mean_inference_ms": 1.36361755561513}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11760000, "num_steps_sampled": 6272000, "sample_time_ms": 22027.209, "load_time_ms": 38.894, "grad_time_ms": 10417.749, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0028506640810519457, "policy_loss": -0.003879321739077568, "vf_loss": 72.80782318115234, "vf_explained_var": 0.7767069935798645, "kl": 0.0016769763315096498, "entropy": 1.101601243019104, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6272000, "episodes_total": 15680, "training_iteration": 490, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-00-49", "timestamp": 1660262449, "time_this_iter_s": 33.20848989486694, "time_total_s": 20857.284950256348, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20857.284950256348, "timesteps_since_restore": 6272000, "iterations_since_restore": 490, "perf": {"cpu_util_percent": 36.26808510638298, "ram_util_percent": 59.0851063829787}}
-{"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 624.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 312.42}, "custom_metrics": {"sparse_reward_mean": 216.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 191.24, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.13, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.74, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.83, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.73, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.06, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.98, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.28, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.73, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.06, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.73, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.06, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 627.0, 633.0, 636.0, 627.0, 633.0, 630.0, 627.0, 579.0, 630.0, 636.0, 639.0, 633.0, 630.0, 579.0, 639.0, 633.0, 615.0, 636.0, 630.0, 579.0, 582.0, 639.0, 636.0, 639.0, 444.0, 630.0, 633.0, 624.0, 630.0, 639.0, 639.0, 639.0, 582.0, 633.0, 636.0, 633.0, 633.0, 636.0, 636.0, 639.0, 639.0, 639.0, 633.0, 639.0, 624.0, 624.0, 636.0, 630.0, 582.0, 636.0, 582.0, 576.0, 639.0, 630.0, 627.0, 630.0, 627.0, 630.0, 630.0, 639.0, 636.0, 636.0, 639.0, 636.0, 636.0, 636.0, 630.0, 636.0, 636.0, 633.0, 575.0, 636.0, 633.0, 639.0, 636.0, 582.0, 636.0, 639.0, 636.0, 639.0, 579.0, 636.0, 636.0, 624.0, 630.0, 624.0, 639.0, 630.0, 633.0, 633.0, 639.0, 630.0, 633.0, 639.0, 630.0, 587.0, 630.0, 636.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [318.0, 321.0, 311.0, 316.0, 316.0, 317.0, 319.0, 317.0, 311.0, 316.0, 311.0, 322.0, 311.0, 319.0, 307.0, 320.0, 288.0, 291.0, 317.0, 313.0, 319.0, 317.0, 319.0, 320.0, 314.0, 319.0, 314.0, 316.0, 291.0, 288.0, 319.0, 320.0, 316.0, 317.0, 307.0, 308.0, 314.0, 322.0, 311.0, 319.0, 289.0, 290.0, 291.0, 291.0, 316.0, 323.0, 314.0, 322.0, 322.0, 317.0, 222.0, 222.0, 311.0, 319.0, 319.0, 314.0, 313.0, 311.0, 311.0, 319.0, 322.0, 317.0, 314.0, 325.0, 322.0, 317.0, 288.0, 294.0, 316.0, 317.0, 317.0, 319.0, 314.0, 319.0, 318.0, 315.0, 319.0, 317.0, 314.0, 322.0, 322.0, 317.0, 319.0, 320.0, 319.0, 320.0, 316.0, 317.0, 327.0, 312.0, 314.0, 310.0, 316.0, 308.0, 316.0, 320.0, 308.0, 322.0, 291.0, 291.0, 319.0, 317.0, 294.0, 288.0, 285.0, 291.0, 319.0, 320.0, 311.0, 319.0, 308.0, 319.0, 319.0, 311.0, 313.0, 314.0, 319.0, 311.0, 316.0, 314.0, 322.0, 317.0, 319.0, 317.0, 316.0, 320.0, 319.0, 320.0, 317.0, 319.0, 324.0, 312.0, 319.0, 317.0, 316.0, 314.0, 319.0, 317.0, 319.0, 317.0, 316.0, 317.0, 288.0, 287.0, 319.0, 317.0, 319.0, 314.0, 319.0, 320.0, 316.0, 320.0, 285.0, 297.0, 319.0, 317.0, 317.0, 322.0, 314.0, 322.0, 317.0, 322.0, 288.0, 291.0, 317.0, 319.0, 314.0, 322.0, 310.0, 314.0, 314.0, 316.0, 316.0, 308.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 314.0, 319.0, 319.0, 320.0, 318.0, 312.0, 313.0, 320.0, 315.0, 324.0, 319.0, 311.0, 288.0, 299.0, 316.0, 314.0, 314.0, 322.0, 290.0, 297.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7596883332679625, "mean_processing_ms": 0.22064007492809667, "mean_inference_ms": 1.3629331607275919}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11784000, "num_steps_sampled": 6284800, "sample_time_ms": 21863.417, "load_time_ms": 38.973, "grad_time_ms": 10519.041, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0017596340039744973, "policy_loss": -0.008380659855902195, "vf_loss": 71.76502227783203, "vf_explained_var": 0.787438690662384, "kl": 0.0020715922582894564, "entropy": 1.110949158668518, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6284800, "episodes_total": 15712, "training_iteration": 491, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-01-21", "timestamp": 1660262481, "time_this_iter_s": 31.476083278656006, "time_total_s": 20888.761033535004, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20888.761033535004, "timesteps_since_restore": 6284800, "iterations_since_restore": 491, "perf": {"cpu_util_percent": 35.37777777777777, "ram_util_percent": 59.10222222222222}}
-{"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 619.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 309.65}, "custom_metrics": {"sparse_reward_mean": 215.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 189.3, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.17, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.3, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.72, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.53, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.68, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.82, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 6.1, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.69, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.29, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.68, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.82, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.68, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.82, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 570.0, 582.0, 636.0, 627.0, 636.0, 630.0, 633.0, 636.0, 630.0, 576.0, 587.0, 579.0, 633.0, 636.0, 615.0, 633.0, 630.0, 633.0, 573.0, 579.0, 633.0, 582.0, 636.0, 525.0, 621.0, 627.0, 587.0, 630.0, 639.0, 630.0, 579.0, 636.0, 636.0, 636.0, 630.0, 636.0, 636.0, 633.0, 575.0, 636.0, 633.0, 639.0, 636.0, 582.0, 636.0, 639.0, 636.0, 639.0, 579.0, 636.0, 636.0, 624.0, 630.0, 624.0, 639.0, 630.0, 633.0, 633.0, 639.0, 630.0, 633.0, 639.0, 630.0, 587.0, 630.0, 636.0, 587.0, 639.0, 627.0, 633.0, 636.0, 627.0, 633.0, 630.0, 627.0, 579.0, 630.0, 636.0, 639.0, 633.0, 630.0, 579.0, 639.0, 633.0, 615.0, 636.0, 630.0, 579.0, 582.0, 639.0, 636.0, 639.0, 444.0, 630.0, 633.0, 624.0, 630.0, 639.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 288.0, 293.0, 277.0, 290.0, 292.0, 317.0, 319.0, 314.0, 313.0, 311.0, 325.0, 313.0, 317.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 296.0, 280.0, 293.0, 294.0, 293.0, 286.0, 316.0, 317.0, 314.0, 322.0, 307.0, 308.0, 314.0, 319.0, 316.0, 314.0, 319.0, 314.0, 282.0, 291.0, 288.0, 291.0, 319.0, 314.0, 291.0, 291.0, 316.0, 320.0, 263.0, 262.0, 306.0, 315.0, 311.0, 316.0, 293.0, 294.0, 311.0, 319.0, 319.0, 320.0, 311.0, 319.0, 280.0, 299.0, 317.0, 319.0, 324.0, 312.0, 319.0, 317.0, 316.0, 314.0, 319.0, 317.0, 319.0, 317.0, 316.0, 317.0, 288.0, 287.0, 319.0, 317.0, 319.0, 314.0, 319.0, 320.0, 316.0, 320.0, 285.0, 297.0, 319.0, 317.0, 317.0, 322.0, 314.0, 322.0, 317.0, 322.0, 288.0, 291.0, 317.0, 319.0, 314.0, 322.0, 310.0, 314.0, 314.0, 316.0, 316.0, 308.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 314.0, 319.0, 319.0, 320.0, 318.0, 312.0, 313.0, 320.0, 315.0, 324.0, 319.0, 311.0, 288.0, 299.0, 316.0, 314.0, 314.0, 322.0, 290.0, 297.0, 318.0, 321.0, 311.0, 316.0, 316.0, 317.0, 319.0, 317.0, 311.0, 316.0, 311.0, 322.0, 311.0, 319.0, 307.0, 320.0, 288.0, 291.0, 317.0, 313.0, 319.0, 317.0, 319.0, 320.0, 314.0, 319.0, 314.0, 316.0, 291.0, 288.0, 319.0, 320.0, 316.0, 317.0, 307.0, 308.0, 314.0, 322.0, 311.0, 319.0, 289.0, 290.0, 291.0, 291.0, 316.0, 323.0, 314.0, 322.0, 322.0, 317.0, 222.0, 222.0, 311.0, 319.0, 319.0, 314.0, 313.0, 311.0, 311.0, 319.0, 322.0, 317.0, 314.0, 325.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7590978232117643, "mean_processing_ms": 0.22052625115278437, "mean_inference_ms": 1.3621520810826262}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11808000, "num_steps_sampled": 6297600, "sample_time_ms": 21732.6, "load_time_ms": 39.061, "grad_time_ms": 10636.833, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013283310690894723, "policy_loss": -0.00562013266608119, "vf_loss": 75.04083251953125, "vf_explained_var": 0.7633475661277771, "kl": 0.002308204537257552, "entropy": 1.1112407445907593, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6297600, "episodes_total": 15744, "training_iteration": 492, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-01-52", "timestamp": 1660262512, "time_this_iter_s": 31.412389039993286, "time_total_s": 20920.173422574997, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20920.173422574997, "timesteps_since_restore": 6297600, "iterations_since_restore": 492, "perf": {"cpu_util_percent": 32.93999999999999, "ram_util_percent": 59.11333333333331}}
-{"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 615.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.99}, "custom_metrics": {"sparse_reward_mean": 213.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.38, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.08, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.42, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.52, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.86, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.27, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 6.05, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.33, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.15, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.64, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.17, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.52, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.86, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.52, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.86, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 630.0, 639.0, 627.0, 636.0, 582.0, 630.0, 593.0, 582.0, 627.0, 630.0, 530.0, 639.0, 636.0, 636.0, 519.0, 627.0, 636.0, 636.0, 633.0, 630.0, 627.0, 581.0, 636.0, 633.0, 633.0, 633.0, 633.0, 633.0, 582.0, 636.0, 633.0, 587.0, 630.0, 636.0, 587.0, 639.0, 627.0, 633.0, 636.0, 627.0, 633.0, 630.0, 627.0, 579.0, 630.0, 636.0, 639.0, 633.0, 630.0, 579.0, 639.0, 633.0, 615.0, 636.0, 630.0, 579.0, 582.0, 639.0, 636.0, 639.0, 444.0, 630.0, 633.0, 624.0, 630.0, 639.0, 639.0, 573.0, 570.0, 582.0, 636.0, 627.0, 636.0, 630.0, 633.0, 636.0, 630.0, 576.0, 587.0, 579.0, 633.0, 636.0, 615.0, 633.0, 630.0, 633.0, 573.0, 579.0, 633.0, 582.0, 636.0, 525.0, 621.0, 627.0, 587.0, 630.0, 639.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 317.0, 311.0, 319.0, 314.0, 325.0, 308.0, 319.0, 316.0, 320.0, 291.0, 291.0, 311.0, 319.0, 291.0, 302.0, 296.0, 286.0, 316.0, 311.0, 318.0, 312.0, 253.0, 277.0, 322.0, 317.0, 319.0, 317.0, 319.0, 317.0, 264.0, 255.0, 314.0, 313.0, 319.0, 317.0, 319.0, 317.0, 315.0, 318.0, 311.0, 319.0, 316.0, 311.0, 295.0, 286.0, 319.0, 317.0, 316.0, 317.0, 317.0, 316.0, 319.0, 314.0, 319.0, 314.0, 321.0, 312.0, 293.0, 289.0, 311.0, 325.0, 316.0, 317.0, 288.0, 299.0, 316.0, 314.0, 314.0, 322.0, 290.0, 297.0, 318.0, 321.0, 311.0, 316.0, 316.0, 317.0, 319.0, 317.0, 311.0, 316.0, 311.0, 322.0, 311.0, 319.0, 307.0, 320.0, 288.0, 291.0, 317.0, 313.0, 319.0, 317.0, 319.0, 320.0, 314.0, 319.0, 314.0, 316.0, 291.0, 288.0, 319.0, 320.0, 316.0, 317.0, 307.0, 308.0, 314.0, 322.0, 311.0, 319.0, 289.0, 290.0, 291.0, 291.0, 316.0, 323.0, 314.0, 322.0, 322.0, 317.0, 222.0, 222.0, 311.0, 319.0, 319.0, 314.0, 313.0, 311.0, 311.0, 319.0, 322.0, 317.0, 314.0, 325.0, 285.0, 288.0, 293.0, 277.0, 290.0, 292.0, 317.0, 319.0, 314.0, 313.0, 311.0, 325.0, 313.0, 317.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 296.0, 280.0, 293.0, 294.0, 293.0, 286.0, 316.0, 317.0, 314.0, 322.0, 307.0, 308.0, 314.0, 319.0, 316.0, 314.0, 319.0, 314.0, 282.0, 291.0, 288.0, 291.0, 319.0, 314.0, 291.0, 291.0, 316.0, 320.0, 263.0, 262.0, 306.0, 315.0, 311.0, 316.0, 293.0, 294.0, 311.0, 319.0, 319.0, 320.0, 311.0, 319.0, 280.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7584994121289265, "mean_processing_ms": 0.22040925813735274, "mean_inference_ms": 1.3613199972214534}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11832000, "num_steps_sampled": 6310400, "sample_time_ms": 21713.634, "load_time_ms": 39.62, "grad_time_ms": 11039.119, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016831206157803535, "policy_loss": -0.005174629390239716, "vf_loss": 74.1271743774414, "vf_explained_var": 0.7761192321777344, "kl": 0.002214068779721856, "entropy": 1.1099259853363037, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6310400, "episodes_total": 15776, "training_iteration": 493, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-02-26", "timestamp": 1660262546, "time_this_iter_s": 33.39047908782959, "time_total_s": 20953.563901662827, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20953.563901662827, "timesteps_since_restore": 6310400, "iterations_since_restore": 493, "perf": {"cpu_util_percent": 34.62978723404255, "ram_util_percent": 59.10212765957445}}
-{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 617.75, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.875}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.35, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.12, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.53, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.51, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.61, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.32, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.5, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.07, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.41, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.99, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.66, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.65, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.5, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.07, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.5, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.07, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 567.0, 639.0, 630.0, 639.0, 587.0, 627.0, 639.0, 636.0, 639.0, 630.0, 636.0, 639.0, 636.0, 633.0, 639.0, 564.0, 636.0, 636.0, 633.0, 587.0, 636.0, 636.0, 633.0, 582.0, 630.0, 636.0, 633.0, 582.0, 639.0, 636.0, 633.0, 624.0, 630.0, 639.0, 639.0, 573.0, 570.0, 582.0, 636.0, 627.0, 636.0, 630.0, 633.0, 636.0, 630.0, 576.0, 587.0, 579.0, 633.0, 636.0, 615.0, 633.0, 630.0, 633.0, 573.0, 579.0, 633.0, 582.0, 636.0, 525.0, 621.0, 627.0, 587.0, 630.0, 639.0, 630.0, 579.0, 639.0, 630.0, 639.0, 627.0, 636.0, 582.0, 630.0, 593.0, 582.0, 627.0, 630.0, 530.0, 639.0, 636.0, 636.0, 519.0, 627.0, 636.0, 636.0, 633.0, 630.0, 627.0, 581.0, 636.0, 633.0, 633.0, 633.0, 633.0, 633.0, 582.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 284.0, 283.0, 319.0, 320.0, 316.0, 314.0, 322.0, 317.0, 294.0, 293.0, 313.0, 314.0, 319.0, 320.0, 314.0, 322.0, 319.0, 320.0, 314.0, 316.0, 319.0, 317.0, 322.0, 317.0, 309.0, 327.0, 316.0, 317.0, 319.0, 320.0, 275.0, 289.0, 319.0, 317.0, 321.0, 315.0, 316.0, 317.0, 286.0, 301.0, 319.0, 317.0, 319.0, 317.0, 319.0, 314.0, 296.0, 286.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 288.0, 294.0, 317.0, 322.0, 316.0, 320.0, 319.0, 314.0, 313.0, 311.0, 311.0, 319.0, 322.0, 317.0, 314.0, 325.0, 285.0, 288.0, 293.0, 277.0, 290.0, 292.0, 317.0, 319.0, 314.0, 313.0, 311.0, 325.0, 313.0, 317.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 296.0, 280.0, 293.0, 294.0, 293.0, 286.0, 316.0, 317.0, 314.0, 322.0, 307.0, 308.0, 314.0, 319.0, 316.0, 314.0, 319.0, 314.0, 282.0, 291.0, 288.0, 291.0, 319.0, 314.0, 291.0, 291.0, 316.0, 320.0, 263.0, 262.0, 306.0, 315.0, 311.0, 316.0, 293.0, 294.0, 311.0, 319.0, 319.0, 320.0, 311.0, 319.0, 280.0, 299.0, 322.0, 317.0, 311.0, 319.0, 314.0, 325.0, 308.0, 319.0, 316.0, 320.0, 291.0, 291.0, 311.0, 319.0, 291.0, 302.0, 296.0, 286.0, 316.0, 311.0, 318.0, 312.0, 253.0, 277.0, 322.0, 317.0, 319.0, 317.0, 319.0, 317.0, 264.0, 255.0, 314.0, 313.0, 319.0, 317.0, 319.0, 317.0, 315.0, 318.0, 311.0, 319.0, 316.0, 311.0, 295.0, 286.0, 319.0, 317.0, 316.0, 317.0, 317.0, 316.0, 319.0, 314.0, 319.0, 314.0, 321.0, 312.0, 293.0, 289.0, 311.0, 325.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7579059857165342, "mean_processing_ms": 0.22029379571210359, "mean_inference_ms": 1.3605505316111535}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11856000, "num_steps_sampled": 6323200, "sample_time_ms": 21716.733, "load_time_ms": 39.394, "grad_time_ms": 11172.934, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015474725514650345, "policy_loss": -0.005591364111751318, "vf_loss": 76.93659210205078, "vf_explained_var": 0.7724745869636536, "kl": 0.002216791734099388, "entropy": 1.1096433401107788, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6323200, "episodes_total": 15808, "training_iteration": 494, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-02-58", "timestamp": 1660262578, "time_this_iter_s": 32.0608389377594, "time_total_s": 20985.624740600586, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20985.624740600586, "timesteps_since_restore": 6323200, "iterations_since_restore": 494, "perf": {"cpu_util_percent": 31.900000000000002, "ram_util_percent": 59.13260869565216}}
-{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 615.83, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 307.915}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 189.03, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.02, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.42, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.54, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.45, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.04, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.49, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.95, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.66, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.45, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.04, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.45, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.04, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 579.0, 636.0, 636.0, 636.0, 627.0, 636.0, 579.0, 639.0, 633.0, 639.0, 576.0, 630.0, 573.0, 630.0, 636.0, 630.0, 519.0, 633.0, 633.0, 636.0, 636.0, 630.0, 639.0, 630.0, 639.0, 587.0, 584.0, 180.0, 630.0, 630.0, 627.0, 630.0, 639.0, 630.0, 579.0, 639.0, 630.0, 639.0, 627.0, 636.0, 582.0, 630.0, 593.0, 582.0, 627.0, 630.0, 530.0, 639.0, 636.0, 636.0, 519.0, 627.0, 636.0, 636.0, 633.0, 630.0, 627.0, 581.0, 636.0, 633.0, 633.0, 633.0, 633.0, 633.0, 582.0, 636.0, 633.0, 582.0, 567.0, 639.0, 630.0, 639.0, 587.0, 627.0, 639.0, 636.0, 639.0, 630.0, 636.0, 639.0, 636.0, 633.0, 639.0, 564.0, 636.0, 636.0, 633.0, 587.0, 636.0, 636.0, 633.0, 582.0, 630.0, 636.0, 633.0, 582.0, 639.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 311.0, 291.0, 288.0, 319.0, 317.0, 304.0, 332.0, 324.0, 312.0, 321.0, 306.0, 324.0, 312.0, 285.0, 294.0, 322.0, 317.0, 316.0, 317.0, 320.0, 319.0, 288.0, 288.0, 321.0, 309.0, 288.0, 285.0, 311.0, 319.0, 314.0, 322.0, 313.0, 317.0, 256.0, 263.0, 314.0, 319.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 314.0, 316.0, 319.0, 320.0, 319.0, 311.0, 312.0, 327.0, 296.0, 291.0, 296.0, 288.0, 91.0, 89.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0, 311.0, 319.0, 319.0, 320.0, 311.0, 319.0, 280.0, 299.0, 322.0, 317.0, 311.0, 319.0, 314.0, 325.0, 308.0, 319.0, 316.0, 320.0, 291.0, 291.0, 311.0, 319.0, 291.0, 302.0, 296.0, 286.0, 316.0, 311.0, 318.0, 312.0, 253.0, 277.0, 322.0, 317.0, 319.0, 317.0, 319.0, 317.0, 264.0, 255.0, 314.0, 313.0, 319.0, 317.0, 319.0, 317.0, 315.0, 318.0, 311.0, 319.0, 316.0, 311.0, 295.0, 286.0, 319.0, 317.0, 316.0, 317.0, 317.0, 316.0, 319.0, 314.0, 319.0, 314.0, 321.0, 312.0, 293.0, 289.0, 311.0, 325.0, 316.0, 317.0, 288.0, 294.0, 284.0, 283.0, 319.0, 320.0, 316.0, 314.0, 322.0, 317.0, 294.0, 293.0, 313.0, 314.0, 319.0, 320.0, 314.0, 322.0, 319.0, 320.0, 314.0, 316.0, 319.0, 317.0, 322.0, 317.0, 309.0, 327.0, 316.0, 317.0, 319.0, 320.0, 275.0, 289.0, 319.0, 317.0, 321.0, 315.0, 316.0, 317.0, 286.0, 301.0, 319.0, 317.0, 319.0, 317.0, 319.0, 314.0, 296.0, 286.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 288.0, 294.0, 317.0, 322.0, 316.0, 320.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7573280240404361, "mean_processing_ms": 0.22018399055339372, "mean_inference_ms": 1.359923906771843}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11880000, "num_steps_sampled": 6336000, "sample_time_ms": 21753.166, "load_time_ms": 39.587, "grad_time_ms": 11166.874, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002439265139400959, "policy_loss": -0.004945265594869852, "vf_loss": 79.36029815673828, "vf_explained_var": 0.7895925045013428, "kl": 0.0015944234328344464, "entropy": 1.1029914617538452, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6336000, "episodes_total": 15840, "training_iteration": 495, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-03-32", "timestamp": 1660262612, "time_this_iter_s": 34.07752990722656, "time_total_s": 21019.702270507812, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 21019.702270507812, "timesteps_since_restore": 6336000, "iterations_since_restore": 495, "perf": {"cpu_util_percent": 32.84375, "ram_util_percent": 59.13125}}
-{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 614.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 307.225}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 188.85, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.78, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.3, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.06, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.65, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.67, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.3, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.06, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.3, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.06, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 633.0, 636.0, 627.0, 579.0, 630.0, 636.0, 633.0, 582.0, 639.0, 624.0, 630.0, 587.0, 582.0, 582.0, 639.0, 522.0, 639.0, 639.0, 639.0, 636.0, 630.0, 624.0, 639.0, 587.0, 633.0, 636.0, 582.0, 630.0, 579.0, 582.0, 630.0, 633.0, 582.0, 636.0, 633.0, 582.0, 567.0, 639.0, 630.0, 639.0, 587.0, 627.0, 639.0, 636.0, 639.0, 630.0, 636.0, 639.0, 636.0, 633.0, 639.0, 564.0, 636.0, 636.0, 633.0, 587.0, 636.0, 636.0, 633.0, 582.0, 630.0, 636.0, 633.0, 582.0, 639.0, 636.0, 633.0, 630.0, 579.0, 636.0, 636.0, 636.0, 627.0, 636.0, 579.0, 639.0, 633.0, 639.0, 576.0, 630.0, 573.0, 630.0, 636.0, 630.0, 519.0, 633.0, 633.0, 636.0, 636.0, 630.0, 639.0, 630.0, 639.0, 587.0, 584.0, 180.0, 630.0, 630.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 294.0, 314.0, 319.0, 319.0, 317.0, 318.0, 309.0, 294.0, 285.0, 311.0, 319.0, 319.0, 317.0, 314.0, 319.0, 290.0, 292.0, 319.0, 320.0, 307.0, 317.0, 318.0, 312.0, 296.0, 291.0, 291.0, 291.0, 296.0, 286.0, 319.0, 320.0, 260.0, 262.0, 316.0, 323.0, 319.0, 320.0, 319.0, 320.0, 319.0, 317.0, 316.0, 314.0, 315.0, 309.0, 322.0, 317.0, 296.0, 291.0, 316.0, 317.0, 324.0, 312.0, 294.0, 288.0, 316.0, 314.0, 282.0, 297.0, 288.0, 294.0, 316.0, 314.0, 321.0, 312.0, 293.0, 289.0, 311.0, 325.0, 316.0, 317.0, 288.0, 294.0, 284.0, 283.0, 319.0, 320.0, 316.0, 314.0, 322.0, 317.0, 294.0, 293.0, 313.0, 314.0, 319.0, 320.0, 314.0, 322.0, 319.0, 320.0, 314.0, 316.0, 319.0, 317.0, 322.0, 317.0, 309.0, 327.0, 316.0, 317.0, 319.0, 320.0, 275.0, 289.0, 319.0, 317.0, 321.0, 315.0, 316.0, 317.0, 286.0, 301.0, 319.0, 317.0, 319.0, 317.0, 319.0, 314.0, 296.0, 286.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 288.0, 294.0, 317.0, 322.0, 316.0, 320.0, 319.0, 314.0, 319.0, 311.0, 291.0, 288.0, 319.0, 317.0, 304.0, 332.0, 324.0, 312.0, 321.0, 306.0, 324.0, 312.0, 285.0, 294.0, 322.0, 317.0, 316.0, 317.0, 320.0, 319.0, 288.0, 288.0, 321.0, 309.0, 288.0, 285.0, 311.0, 319.0, 314.0, 322.0, 313.0, 317.0, 256.0, 263.0, 314.0, 319.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 314.0, 316.0, 319.0, 320.0, 319.0, 311.0, 312.0, 327.0, 296.0, 291.0, 296.0, 288.0, 91.0, 89.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7567700204261155, "mean_processing_ms": 0.22008117197400467, "mean_inference_ms": 1.3596748332774586}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11904000, "num_steps_sampled": 6348800, "sample_time_ms": 22347.681, "load_time_ms": 39.524, "grad_time_ms": 11120.722, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003682489274069667, "policy_loss": -0.0037913068663328886, "vf_loss": 80.29010772705078, "vf_explained_var": 0.7666907906532288, "kl": 0.0018009584164246917, "entropy": 1.1104191541671753, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6348800, "episodes_total": 15872, "training_iteration": 496, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-04-10", "timestamp": 1660262650, "time_this_iter_s": 38.697832107543945, "time_total_s": 21058.400102615356, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 21058.400102615356, "timesteps_since_restore": 6348800, "iterations_since_restore": 496, "perf": {"cpu_util_percent": 33.66909090909091, "ram_util_percent": 59.103636363636355}}
-{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 612.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 306.34}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 188.28, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.26, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.26, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.32, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.84, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 16.32, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.84, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.32, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.84, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 636.0, 633.0, 636.0, 633.0, 630.0, 630.0, 627.0, 582.0, 630.0, 630.0, 633.0, 636.0, 579.0, 587.0, 633.0, 582.0, 582.0, 636.0, 579.0, 633.0, 633.0, 630.0, 630.0, 582.0, 630.0, 639.0, 579.0, 587.0, 582.0, 639.0, 582.0, 639.0, 636.0, 633.0, 630.0, 579.0, 636.0, 636.0, 636.0, 627.0, 636.0, 579.0, 639.0, 633.0, 639.0, 576.0, 630.0, 573.0, 630.0, 636.0, 630.0, 519.0, 633.0, 633.0, 636.0, 636.0, 630.0, 639.0, 630.0, 639.0, 587.0, 584.0, 180.0, 630.0, 630.0, 627.0, 587.0, 633.0, 636.0, 627.0, 579.0, 630.0, 636.0, 633.0, 582.0, 639.0, 624.0, 630.0, 587.0, 582.0, 582.0, 639.0, 522.0, 639.0, 639.0, 639.0, 636.0, 630.0, 624.0, 639.0, 587.0, 633.0, 636.0, 582.0, 630.0, 579.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 316.0, 317.0, 314.0, 322.0, 316.0, 317.0, 316.0, 314.0, 316.0, 314.0, 311.0, 316.0, 291.0, 291.0, 316.0, 314.0, 316.0, 314.0, 316.0, 317.0, 312.0, 324.0, 288.0, 291.0, 290.0, 297.0, 314.0, 319.0, 291.0, 291.0, 293.0, 289.0, 319.0, 317.0, 291.0, 288.0, 314.0, 319.0, 319.0, 314.0, 316.0, 314.0, 313.0, 317.0, 293.0, 289.0, 321.0, 309.0, 317.0, 322.0, 293.0, 286.0, 291.0, 296.0, 290.0, 292.0, 322.0, 317.0, 288.0, 294.0, 317.0, 322.0, 316.0, 320.0, 319.0, 314.0, 319.0, 311.0, 291.0, 288.0, 319.0, 317.0, 304.0, 332.0, 324.0, 312.0, 321.0, 306.0, 324.0, 312.0, 285.0, 294.0, 322.0, 317.0, 316.0, 317.0, 320.0, 319.0, 288.0, 288.0, 321.0, 309.0, 288.0, 285.0, 311.0, 319.0, 314.0, 322.0, 313.0, 317.0, 256.0, 263.0, 314.0, 319.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 314.0, 316.0, 319.0, 320.0, 319.0, 311.0, 312.0, 327.0, 296.0, 291.0, 296.0, 288.0, 91.0, 89.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0, 293.0, 294.0, 314.0, 319.0, 319.0, 317.0, 318.0, 309.0, 294.0, 285.0, 311.0, 319.0, 319.0, 317.0, 314.0, 319.0, 290.0, 292.0, 319.0, 320.0, 307.0, 317.0, 318.0, 312.0, 296.0, 291.0, 291.0, 291.0, 296.0, 286.0, 319.0, 320.0, 260.0, 262.0, 316.0, 323.0, 319.0, 320.0, 319.0, 320.0, 319.0, 317.0, 316.0, 314.0, 315.0, 309.0, 322.0, 317.0, 296.0, 291.0, 316.0, 317.0, 324.0, 312.0, 294.0, 288.0, 316.0, 314.0, 282.0, 297.0, 288.0, 294.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7562172148453271, "mean_processing_ms": 0.21998071761796992, "mean_inference_ms": 1.359487549617304}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11928000, "num_steps_sampled": 6361600, "sample_time_ms": 22295.567, "load_time_ms": 39.346, "grad_time_ms": 11042.076, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008341053617186844, "policy_loss": -0.007899199612438679, "vf_loss": 76.18367767333984, "vf_explained_var": 0.7670480608940125, "kl": 0.0021500647999346256, "entropy": 1.106536865234375, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6361600, "episodes_total": 15904, "training_iteration": 497, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-04-44", "timestamp": 1660262684, "time_this_iter_s": 33.68950605392456, "time_total_s": 21092.08960866928, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 21092.08960866928, "timesteps_since_restore": 6361600, "iterations_since_restore": 497, "perf": {"cpu_util_percent": 35.32553191489361, "ram_util_percent": 59.131914893617}}
-{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 612.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 306.32}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 188.24, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.69, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.63, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.23, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.91, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.7, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 16.23, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.91, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.23, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.91, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 587.0, 630.0, 624.0, 582.0, 636.0, 582.0, 579.0, 636.0, 582.0, 636.0, 636.0, 636.0, 633.0, 630.0, 587.0, 630.0, 587.0, 582.0, 582.0, 630.0, 633.0, 630.0, 624.0, 639.0, 630.0, 630.0, 636.0, 633.0, 639.0, 624.0, 636.0, 180.0, 630.0, 630.0, 627.0, 587.0, 633.0, 636.0, 627.0, 579.0, 630.0, 636.0, 633.0, 582.0, 639.0, 624.0, 630.0, 587.0, 582.0, 582.0, 639.0, 522.0, 639.0, 639.0, 639.0, 636.0, 630.0, 624.0, 639.0, 587.0, 633.0, 636.0, 582.0, 630.0, 579.0, 582.0, 630.0, 636.0, 633.0, 636.0, 633.0, 636.0, 633.0, 630.0, 630.0, 627.0, 582.0, 630.0, 630.0, 633.0, 636.0, 579.0, 587.0, 633.0, 582.0, 582.0, 636.0, 579.0, 633.0, 633.0, 630.0, 630.0, 582.0, 630.0, 639.0, 579.0, 587.0, 582.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 296.0, 291.0, 316.0, 314.0, 308.0, 316.0, 288.0, 294.0, 317.0, 319.0, 291.0, 291.0, 290.0, 289.0, 324.0, 312.0, 288.0, 294.0, 314.0, 322.0, 319.0, 317.0, 319.0, 317.0, 321.0, 312.0, 311.0, 319.0, 290.0, 297.0, 316.0, 314.0, 293.0, 294.0, 288.0, 294.0, 296.0, 286.0, 311.0, 319.0, 316.0, 317.0, 316.0, 314.0, 324.0, 300.0, 322.0, 317.0, 311.0, 319.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 319.0, 320.0, 310.0, 314.0, 314.0, 322.0, 91.0, 89.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0, 293.0, 294.0, 314.0, 319.0, 319.0, 317.0, 318.0, 309.0, 294.0, 285.0, 311.0, 319.0, 319.0, 317.0, 314.0, 319.0, 290.0, 292.0, 319.0, 320.0, 307.0, 317.0, 318.0, 312.0, 296.0, 291.0, 291.0, 291.0, 296.0, 286.0, 319.0, 320.0, 260.0, 262.0, 316.0, 323.0, 319.0, 320.0, 319.0, 320.0, 319.0, 317.0, 316.0, 314.0, 315.0, 309.0, 322.0, 317.0, 296.0, 291.0, 316.0, 317.0, 324.0, 312.0, 294.0, 288.0, 316.0, 314.0, 282.0, 297.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 316.0, 317.0, 314.0, 322.0, 316.0, 317.0, 316.0, 314.0, 316.0, 314.0, 311.0, 316.0, 291.0, 291.0, 316.0, 314.0, 316.0, 314.0, 316.0, 317.0, 312.0, 324.0, 288.0, 291.0, 290.0, 297.0, 314.0, 319.0, 291.0, 291.0, 293.0, 289.0, 319.0, 317.0, 291.0, 288.0, 314.0, 319.0, 319.0, 314.0, 316.0, 314.0, 313.0, 317.0, 293.0, 289.0, 321.0, 309.0, 317.0, 322.0, 293.0, 286.0, 291.0, 296.0, 290.0, 292.0, 322.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7556558584426459, "mean_processing_ms": 0.21987743266667745, "mean_inference_ms": 1.3592501447797594}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11952000, "num_steps_sampled": 6374400, "sample_time_ms": 22219.899, "load_time_ms": 39.602, "grad_time_ms": 11084.562, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010325837647542357, "policy_loss": -0.007863701321184635, "vf_loss": 73.90652465820312, "vf_explained_var": 0.7726984024047852, "kl": 0.0017767212120816112, "entropy": 1.1190696954727173, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6374400, "episodes_total": 15936, "training_iteration": 498, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-05-16", "timestamp": 1660262716, "time_this_iter_s": 31.8541898727417, "time_total_s": 21123.943798542023, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 21123.943798542023, "timesteps_since_restore": 6374400, "iterations_since_restore": 498, "perf": {"cpu_util_percent": 35.686666666666675, "ram_util_percent": 59.20666666666665}}
-{"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 613.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 221.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.735}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.27, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.05, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.53, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 14, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 14, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.85, "potting_onion_agent_1_min": 16, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.37, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.85, "optimal_onion_potting_agent_1_min": 16, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.85, "viable_onion_potting_agent_1_min": 16, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 578.0, 587.0, 570.0, 587.0, 582.0, 587.0, 582.0, 573.0, 636.0, 630.0, 636.0, 633.0, 600.0, 633.0, 444.0, 630.0, 636.0, 636.0, 633.0, 636.0, 630.0, 582.0, 584.0, 639.0, 633.0, 630.0, 587.0, 587.0, 582.0, 633.0, 630.0, 630.0, 579.0, 582.0, 630.0, 636.0, 633.0, 636.0, 633.0, 636.0, 633.0, 630.0, 630.0, 627.0, 582.0, 630.0, 630.0, 633.0, 636.0, 579.0, 587.0, 633.0, 582.0, 582.0, 636.0, 579.0, 633.0, 633.0, 630.0, 630.0, 582.0, 630.0, 639.0, 579.0, 587.0, 582.0, 639.0, 636.0, 587.0, 630.0, 624.0, 582.0, 636.0, 582.0, 579.0, 636.0, 582.0, 636.0, 636.0, 636.0, 633.0, 630.0, 587.0, 630.0, 587.0, 582.0, 582.0, 630.0, 633.0, 630.0, 624.0, 639.0, 630.0, 630.0, 636.0, 633.0, 639.0, 624.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 290.0, 288.0, 288.0, 299.0, 285.0, 285.0, 296.0, 291.0, 291.0, 291.0, 288.0, 299.0, 291.0, 291.0, 285.0, 288.0, 324.0, 312.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 301.0, 299.0, 311.0, 322.0, 221.0, 223.0, 308.0, 322.0, 319.0, 317.0, 317.0, 319.0, 319.0, 314.0, 309.0, 327.0, 314.0, 316.0, 286.0, 296.0, 293.0, 291.0, 324.0, 315.0, 319.0, 314.0, 314.0, 316.0, 296.0, 291.0, 293.0, 294.0, 296.0, 286.0, 314.0, 319.0, 316.0, 314.0, 316.0, 314.0, 282.0, 297.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 316.0, 317.0, 314.0, 322.0, 316.0, 317.0, 316.0, 314.0, 316.0, 314.0, 311.0, 316.0, 291.0, 291.0, 316.0, 314.0, 316.0, 314.0, 316.0, 317.0, 312.0, 324.0, 288.0, 291.0, 290.0, 297.0, 314.0, 319.0, 291.0, 291.0, 293.0, 289.0, 319.0, 317.0, 291.0, 288.0, 314.0, 319.0, 319.0, 314.0, 316.0, 314.0, 313.0, 317.0, 293.0, 289.0, 321.0, 309.0, 317.0, 322.0, 293.0, 286.0, 291.0, 296.0, 290.0, 292.0, 322.0, 317.0, 314.0, 322.0, 296.0, 291.0, 316.0, 314.0, 308.0, 316.0, 288.0, 294.0, 317.0, 319.0, 291.0, 291.0, 290.0, 289.0, 324.0, 312.0, 288.0, 294.0, 314.0, 322.0, 319.0, 317.0, 319.0, 317.0, 321.0, 312.0, 311.0, 319.0, 290.0, 297.0, 316.0, 314.0, 293.0, 294.0, 288.0, 294.0, 296.0, 286.0, 311.0, 319.0, 316.0, 317.0, 316.0, 314.0, 324.0, 300.0, 322.0, 317.0, 311.0, 319.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 319.0, 320.0, 310.0, 314.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7550828949863245, "mean_processing_ms": 0.21976950142528295, "mean_inference_ms": 1.358698286827804}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11976000, "num_steps_sampled": 6387200, "sample_time_ms": 22150.153, "load_time_ms": 39.223, "grad_time_ms": 10918.039, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016517750918865204, "policy_loss": -0.00551184406504035, "vf_loss": 77.22379302978516, "vf_explained_var": 0.7703518271446228, "kl": 0.002163690747693181, "entropy": 1.1175265312194824, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6387200, "episodes_total": 15968, "training_iteration": 499, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-05-48", "timestamp": 1660262748, "time_this_iter_s": 31.469547986984253, "time_total_s": 21155.413346529007, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 21155.413346529007, "timesteps_since_restore": 6387200, "iterations_since_restore": 499, "perf": {"cpu_util_percent": 36.49555555555556, "ram_util_percent": 59.12888888888887}}
-{"episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 605.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 302.84}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 186.08, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.84, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.11, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.05, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 14, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 14, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.21, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.69, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.45, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.81, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.21, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.69, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.21, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.69, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 639.0, 582.0, 636.0, 633.0, 66.0, 576.0, 630.0, 636.0, 582.0, 582.0, 627.0, 636.0, 639.0, 627.0, 421.0, 587.0, 582.0, 633.0, 627.0, 636.0, 582.0, 627.0, 630.0, 630.0, 633.0, 633.0, 636.0, 582.0, 633.0, 570.0, 579.0, 587.0, 582.0, 639.0, 636.0, 587.0, 630.0, 624.0, 582.0, 636.0, 582.0, 579.0, 636.0, 582.0, 636.0, 636.0, 636.0, 633.0, 630.0, 587.0, 630.0, 587.0, 582.0, 582.0, 630.0, 633.0, 630.0, 624.0, 639.0, 630.0, 630.0, 636.0, 633.0, 639.0, 624.0, 636.0, 636.0, 578.0, 587.0, 570.0, 587.0, 582.0, 587.0, 582.0, 573.0, 636.0, 630.0, 636.0, 633.0, 600.0, 633.0, 444.0, 630.0, 636.0, 636.0, 633.0, 636.0, 630.0, 582.0, 584.0, 639.0, 633.0, 630.0, 587.0, 587.0, 582.0, 633.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 325.0, 314.0, 319.0, 317.0, 322.0, 290.0, 292.0, 319.0, 317.0, 312.0, 321.0, 34.0, 32.0, 288.0, 288.0, 316.0, 314.0, 322.0, 314.0, 293.0, 289.0, 293.0, 289.0, 316.0, 311.0, 316.0, 320.0, 317.0, 322.0, 304.0, 323.0, 202.0, 219.0, 294.0, 293.0, 288.0, 294.0, 319.0, 314.0, 322.0, 305.0, 319.0, 317.0, 291.0, 291.0, 308.0, 319.0, 314.0, 316.0, 311.0, 319.0, 316.0, 317.0, 316.0, 317.0, 319.0, 317.0, 288.0, 294.0, 316.0, 317.0, 280.0, 290.0, 293.0, 286.0, 291.0, 296.0, 290.0, 292.0, 322.0, 317.0, 314.0, 322.0, 296.0, 291.0, 316.0, 314.0, 308.0, 316.0, 288.0, 294.0, 317.0, 319.0, 291.0, 291.0, 290.0, 289.0, 324.0, 312.0, 288.0, 294.0, 314.0, 322.0, 319.0, 317.0, 319.0, 317.0, 321.0, 312.0, 311.0, 319.0, 290.0, 297.0, 316.0, 314.0, 293.0, 294.0, 288.0, 294.0, 296.0, 286.0, 311.0, 319.0, 316.0, 317.0, 316.0, 314.0, 324.0, 300.0, 322.0, 317.0, 311.0, 319.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 319.0, 320.0, 310.0, 314.0, 314.0, 322.0, 319.0, 317.0, 290.0, 288.0, 288.0, 299.0, 285.0, 285.0, 296.0, 291.0, 291.0, 291.0, 288.0, 299.0, 291.0, 291.0, 285.0, 288.0, 324.0, 312.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 301.0, 299.0, 311.0, 322.0, 221.0, 223.0, 308.0, 322.0, 319.0, 317.0, 317.0, 319.0, 319.0, 314.0, 309.0, 327.0, 314.0, 316.0, 286.0, 296.0, 293.0, 291.0, 324.0, 315.0, 319.0, 314.0, 314.0, 316.0, 296.0, 291.0, 293.0, 294.0, 296.0, 286.0, 314.0, 319.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7545081908133792, "mean_processing_ms": 0.21966006909098515, "mean_inference_ms": 1.3581228643420677}, "off_policy_estimator": {}, "info": {"num_steps_trained": 12000000, "num_steps_sampled": 6400000, "sample_time_ms": 22245.607, "load_time_ms": 39.734, "grad_time_ms": 10935.257, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0013599375961348414, "policy_loss": -0.008364356122910976, "vf_loss": 75.6484375, "vf_explained_var": 0.7971202731132507, "kl": 0.002114498522132635, "entropy": 1.1208560466766357, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6400000, "episodes_total": 16000, "training_iteration": 500, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-06-22", "timestamp": 1660262782, "time_this_iter_s": 34.34099221229553, "time_total_s": 21189.754338741302, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 21189.754338741302, "timesteps_since_restore": 6400000, "iterations_since_restore": 500, "perf": {"cpu_util_percent": 34.074999999999996, "ram_util_percent": 59.19375}}
diff --git a/human_aware_rl/ppo/trained_example/checkpoint_000500/.is_checkpoint b/human_aware_rl/ppo/trained_example/checkpoint_000500/.is_checkpoint
new file mode 100644
index 00000000..e69de29b
diff --git a/human_aware_rl/ppo/trained_example/checkpoint_000500/.tune_metadata b/human_aware_rl/ppo/trained_example/checkpoint_000500/.tune_metadata
new file mode 100644
index 0000000000000000000000000000000000000000..1cb609180e0541d45df640dfe4b966df758b3d89
GIT binary patch
literal 34495
zcmeHwdw68WRcE)}uhB@F_ly(Eb{sT@ku|a>*u=4A<gsjP)E1t}6Nl@o?%V1v^-K5O
zp0ONhGk!}>1y9HY5=g>?Kp+Vi10f^^J|<3xlaC}gA#9cp0yccGe4G8S_uGWc1{U^r
z>UHmx`nG1g`(Ik$%&FI@s#B*<Rh_Cjb^C1R&wlc;l=xc<#`q5B{+i<!%TC3&in)M~
zX7e{3yYW~)d;PH+C$7Ifd)<vUIt#}p?HjWcZQpP#;JroPac#d?tps!5N%NgPa(lj0
z^DMvW+od4G`zT6<W~M&)(Z6~g_3=TcR`jYlN8`<%y_xryY|pn`$E%n8favLSPTQrr
zES<u8?9-@h(XqUm?RpMS&)6>L+*7HSYb)c~s_TrGk-2EQuDufQE-!1BY&Upsz&iyB
z+?>Bsb10@Qe?zdb{&;XtFgDxH(>Dc~Oy;itz@GrKDL+`4?vcDHP6dwvkMBYuR>84L
zeqqHrQ+1ad*9&G-qJ9vQbsgJxa*>oeAI#Q0zgo7+j_($;o@mxcjC>^7m7p+K*gf5I
z9sT`zK~*=r39l&7hIv~tLV00NBf7aHJSFMgT#$V~)pzxD&tJ#TM!bSu1O1Y%QM#Ud
z_Pb;E?#mmx;&_rS6t@;DZg^91rq<_TXa~U_+pk)+Vs@!sv(UYezKLk<3SBkkKmxv@
z;^NQ^HF5NY)EvD;YJh(!)Brt!nk(v_ldqRBdM=4Yqs_A=(6&vkS3qJDzBQ{XnSI;j
zN+5B+Vqea!UW9z@Ylxk|v&P65Pe&6DH7GinF^!QE=(s#C9v8=!S2xR)z@4UfE}6I{
zyh^SpnPpA$BoKF~R`va2Wl;~vZ6~XPNf{D2mWY2z1U2DQ;|h|wl!%`|(1B_Nx<Hf3
z{Bu}Io_uIREhYOKqZ{t@$Cio&Y(vVv7<MC4Vw`zH-#0%}dqXo)qI~I#72b#xU>j2Q
z#<2Cg)ws3c`v2d5;?u8wbV*aB$7$nShDUM2nrAZf#aY#az8J?I{Wb{fya@V|8P|lq
zDF61v__t58AZez0$L^<QUVPwJZjsu~-bToVTdG{8Of<wbOiFJ<M2t7T@P7LHSRxTo
zmTX5vloN3x^sLpWjiDUH%YT)cx*O$5;7hYChCT6G5-E#uNp{sEic5*rBr~g77DHK_
zXJ!tQ3}`TEi@rOc<@$i0v^U`!Ud3mnBy>Yfd@g86O^iok7WnqHYKm-Fe3U=&@t=fl
zsELoP{kfu7Xfj=6KEjyat!UZT5ZmyjJw9F<A{#o?-bxL`26V)Da`;}_gX~GBBg&C&
z=>T#99eR3dNP9TI<kF$pqD8-9rUdRZ&2!1b#dsxlYfo-ZT$E*Pv93!dE}3(CMeCEM
zXemb@QsaJ416;$W__Wdp)vzZ%&bLqy<H|M^M47P-1yLTvDbSNsgXV>@6EFQTH90rP
zlE9HBISgasbtI7$<4-(GQYFc3YLX*?to>dU8~)^d|Gz*}eD=G$^q6mmZFsb4%r`_f
zbZnY?wjwv+j2=(>8e(5O9Z~7&R!o<Q%Ry0c&M6gf%(c?!B;>^hNYl|AIgo&#KuZ#O
z0#_PR6XOeM&wbx)N=;N;;yu)mnq=PWm2JJK9sHgKxP~WjF>i!w_z_3lLP3ldaojD%
zM)|NkY61mUR%<v*wM&-NOSG1<tv++bHu(&1;(E<?*$kC&{iZ3EF`mWsoTk-9`4`uB
zw##OyjO#sDX`bk#HMOyCz|44mndT*BG~B$ze1_0?zUG9+_!-aGoX{vkFEL*Np;w$P
z+6yJ;QiXH0c{an%xNL5d%a9ls&1NLVI2o7BW|c+x7#GZKav2iia(Sg<N0M|UsS&zq
zN%;&P<DI=-HbZ4RTT?1yoQ&sdT3wWtmzFJo${)(wK5mq-W)zCq?d39F=W8z}a{^Z{
zD(@>Mx(R=`uQ-{>7nL`G=&=~Hl4LMRcH{k&lskdf7nk{!)7^yI7g2RGyDu(t0^L_)
z(@=6e?^1=D?vtA3OW^T#*<J~iP59chx@3-Smo0(Ht29rN1TRT!O(!H}OyK6W`CbvB
zP59Zo!eoYSo3AmUb=VQe%Y9s%=887%ybFhLOWske<g4NWyu0MsZpCpC;O(^<Me-dg
z&hmWQm-plY*}7{jm8@*l$>-G#d3=)YB8-w7roAw?Ft5%S`h)?3RjPU(rQ_N>e|>o2
z-|*VJ<D;K`*E4c2)?cfZirE#No&6dPNTxeV!_TVp?$h~AY4=|HeYH~^LK-Y7N_Me=
z+w{{gKqw5V0!}RtD)QQvN@4WWhmO8p*4vHSZ!B0VVOlDElghF~rEO*i8a<^@I?J%B
zD1o1K&y!{ePUSRMN(LF8U9Od!TyR(F*KK995GMMrpjIXBBSQzN3UAIk|G5<3k{sqn
za-Hv#Rk_NRLs>m&q0r_9ke->ThrcI!l#gsq#o(ElTA7!ryEdnWV^W;Nz4V}Fy+$lK
zm93ZdD-w(S3_6p&7r;~tGn7Knv&fhwm>uQAs`oXiAV!O-G}I7eomYk$-WGf`Z7pDb
zi5ep46TYn8<0G4jr`E*qWuulAe&Mpht9T#i^CN>p8_;MYn70RGGgA-k7lh4BJ@`+`
zV`3f>5h>tI=p^kqE4hrMw`@)_*&lwNN-`y%Eofw2sX-2_&`A$IuaF;pT6w}sx8mr!
z&dPI|2+gMUtyMIjM>o(^8d9h$H8Cdp*-_mtll^--M!4YN>RA=@@E5g5x68zY6zW{2
zuF#{aFtsa*$G~_&m25gLB<w9JDddK(BFv>5c=mo>j;?F<1r=jRHJxi1Ws<^<(=9!F
zgU<dw<p~SaOn9iKk_;2JcK6wH3TJI-vnS+_NFkv$o(Y9)a15!2_^{GZjjZVj$<XD5
zJk&8Jdswn=yQwtHrTMIKIecei3R?6FUVxD~cP`!fcaa=$%xntHAU3-#KdrjcJ*zy+
z7j(>!_Ka%JmYC%iRFd(Sa@_lLF5yDpYFZj<HJ$5ajdMYJ?$aLOBA07CI=erlaJ*6F
zF*wW5tC*Tjsy(e@mS5J$Q`$4HlU~p<6I(p@sTfo8vW{`5RFbDl_G&7odR}>I&nu5R
zp*_MyyOz)Cm?7oyrnKi}<*D7LJXP%p%P|FN&+>VNY)Ey_>6jOk$DLQ6nog=s>7-HZ
z8Pc99?HN@b?}GN6SDug_5z(-}Oz#-vny#XDL6u|ZY0^@ER3UrME6?&-<uOuM(lN$k
zIwj2hvdU$WOo66>roiPF6mmE&rc{ipJw~*apVmodwP#d&rj*C@X|1MWm~g?m!g&=F
z(xXXrhg6cOs{zM!Ij$jNROd1=_vx7P%Hs<cwTEpVRY|5&Q<ov$bfxi>UQoG=Cv3$9
zm1L?2IX|zGeBD6f@y@Cw)8u8{imfHro>$1#Ddq9bY0r7(F(Of(QZfF7@_5=~`aPV!
zh7^uFuRL4lF?UqsXyn=jjeMW-7`B+&%O>f9!ZFM=L&mh-^o1eAa3joRTA@oeQ-Sxg
zs=_<3Jr}e`$84=4oX2Vkc`JX+fUW5&8qhPOvYSC<2y@TrlJC<V;YI~A>~39!X+=o$
zsLCFetjpPovzbDt&Ik1V3fB60>>S4UK{6t@T>FewUGS`$Tg~Ehhzu3WV)F~r@^4gc
zf8vU<e^mR#VS$egx#K#OtP`TWd2nQU*M<M7&<+SRJ4^Qc4NHCI19RuU)%jV4J|xg-
zQ+dWN`eC8pd*r8YeapxHo5C6~Sg1SaxFMcoLMkP;!gXnGpSNMqzhWt+ip=fn?913#
znXM!HiU=vinSB)@_B8@uC-4^p{xiVrDElV){|kX{5%?Pde@Ec&3H(<A|3u&=0xuKj
z#GU1As&H_2h<y>^yHeQRvM(9m7laSHY4)7<@yluJ9PX>traQ8|oJ&n^MpE17I8vcQ
zZqyG^f$B+x+@#0x@G3%>%x`{qA&BK&6pdF3LP$hCBo*e-h9w?nKuBh$KJm~0y!(L{
z?@$LZk%V(Ub@$)CKKC|0Bu;2hxm&F-7HV}LnV$RBsbBonzdyzIcq^4`!8+p<7YjZ%
z)AZy+lzPqLOCS85BZGC`ldaY(*j@`#SOd;A;&7dJ<f;`X*eG<-1Cf!f4G^zRdhoUa
z*P`@fr90;ci3tknLK|}9K?dG!OioNr9yvO3Wb!&|a^lE!lShtSkEbh=V>lHF?#g$+
zDa(fh+2$a?BCcF^mk>%mAk*kgi-_0nSlVl-^{%TvbtuJq3RTaq*kx3>$2(mfU$Ber
z_=1C1A|+>K+{x8JT?NN8R<VZTuA`GTj9)i7e)LC=jUS!BQ<ZGBk}ocbej0V|M?<aJ
zieJF6gDe%@YQ?kiRoBY7WE%i1-ULa$Od+HmlAXm`9m6Iqhf@{-3)6;rq+;R;DHfmN
zeOBIHa%A=men>rJ!SkGI-Np10#YR;lIPc0j;^`2sT)_54JizIgzjb;RjddWRu7&6<
zq%YXOv5Sa+5f&7DigrNZZoD4KTOLhZXtwPxN~yi9h!Zi}g6TdU{n(`v%p<%qNugus
zkgJUo0!mLt=ttv_Op_QD$p=tM^^CRXR?ql_VBO=pOVvdy=bSELCN#|enFA_Lh%V)9
zpPqjsScV+32(Oy=BRSUVd{3#0H%C^3hOkMROnTUL!m0CibZo%)R?(l?YIO-Gh2rUw
zJa!B4_6gHyL@4^oImrPGD;msrEVWXt`<4{AfvgKTrnfuQY8jn@gDZ=k-_T$uwy8|J
z=TF=^eSGeC=9G13#+twTly%SXHwSqBG;-UWC-1Q4@0^}}<2=RPde3bl`sCdRZ;I~Z
zdj&SYyajOT*39&ASx|}(QK6e7hea`)V(WF;#t3=cg{oV8fS!__OY{D6$u38UAHm76
z>svs<IjDFP=20JsrABlI&Nesr4mHnMi&eWsn{Pg#W5_m*A_jGYG(DoIVa(H*v8rOB
zlR4Kx;7FuPqNMCm2ZNf$c`T7tH<-CauK6jxcN2nc*vvaMMmMe<EL=>2<o<=M<z)+)
zAxaLN7U173mK_JK!AB4ae?NL0qAng*;c4AYdLL&+1cOaOxaxcuN`T{@7CfT2a#;t+
z18N!9;eHY@+>b6<`3gPLQ;RW4CAgX`6>Dnq?ICrzQgvZ@QhdO%WHDZ_u8?$igYSk7
zn=k<oqFZpXOLDaL(=243%MHni2sp7R1QXPdTr45=80CVY{21Qxjk~2%abY}s3y6n)
z3c(xqigc)LVNUR)`F5QY@h1>Ht&!=`pVFKxo7O9|bJ(c}iZReCqDm5}eJ--l*vjLv
zp_tKdnvA0yA(nj$wvUyL&e<V0zFV*0?$ZhN6B>JvBS$rurf({9zltT+$P%=QgqxA8
z(2Ye}sx+4@J5T)#sxX(<h57}4?=3E(8#EGT=VGaj=~z&#9v_NGOaq$R8~k!$h;c*c
zNb%|mdP$+CQSQUkKU9~Kt@~JCH0inmyml$!-6izIU^d6*A!3jwG4*M&-r$3kszpn+
zW&0?e2I6qdb(A8Cg$sc^)}x%`)bKVA3-w4;M)SoI+Kco|^>*dmB{T{wd?a%+on&rL
zwpvD6boCj^xL|?mNk4xSmoHA%5=%^J1(Y0$s>|{S-Ezt{nhcsUymQemmu(E*m!5(G
zH1ia1!;R9+E%OU=U;iH;`!{F>q;;uI^FU1i4UT;t3Lj0w2Q3=MO6kC4EchobVTh;$
zmL3u@W?)`M;z5(BMrOWb`@U0=gJwrQTXB4=P|W2VEL)fh=2B_=bNG-<p-gg!W1aQw
zN&(W1PCRXxh512*V~DO(f*yhk2#lA_HSj%==twU1K6E)B$h!^(8dgfrr~ciE9?usm
zn1D*S=fWiyqHytrxd|eQ#fenC4c<fSDS3Vw{(f5TJ!jd%aH8dYkm9YZQ-Vq*`I^R3
zOmSjvbe+>`vO(TW@p5{lGL^1rGt9r3{VAv5qIi-F@izgBq#S)&@t{aSC${R9BE%o_
z&W;c#S1bp!Q+!t#L%HzemsE{39|_|r4jl;eQGPItE@SY+cGK*vg4s!aU`w(PDe7mW
zMkoaPi(&;jQ$#b$rJAnbvap0AsD|oNw-#x>SSZ@0rK*h+kKhp_XwPPthz6t*5F|GR
zLLcTldckdHmoVDeMFUUq-bKvA^q3l|IPZPHajWujjn)A5zFFKmX85jXm?jme)bbG(
zuS6Zg!pG3l*wN8}=tGT!KpzE*=h=1MD-8G`HbNWjru$aRji7l%dgbC563`*uee;o%
zel=TwRZuZ|lI}yP6FfdptYAMAA@`7^6E}ht{G&9p0FvyB@uMa;nNG64%PZ9Ld8|eY
zaA=t(p$5~tu$ItvDZ_V2U!<aaCPqzbSljAlIpBFa<~Z6kK$#<-!*?re8c3KE@erP>
zpe;%V-xV&9APLhc78T6bvG`nFz15%X<NG7{I$n$kOv7fk%@0Me)bdO_A<!YVLHV;n
z^!ViwG_9s%*`V<TffYLca0JD4%NV;7a|`Z1Un8B@!kJC8ACk@(IVpA+&IY>%5Qskl
zhaDj>E~2jmoZZbP;9x$MN_Lx6vfJ48B9^@l&br5b6wU^_LB{<U9BA_F^^{g;H;Gu4
z-ze$qW<;}F2uvx2A6M>e6nUJ$?JDLDxU=o-PU+l5PA8isC&f-kXHGcvI?KRA<kDga
zWtaVg3ZI0#!A=SIM!>fR?9I}7t8{)64m4ACFI+0%r{Eg3lUWqfDki%Rp$%pOV)ekk
z#!4k~fRJH%K=wBPb+#xX3IY}dWPlrNN%%{E>#QtdDgsskaY9lfC(V}OFrPpjV1u0&
z@C+al?}x*OX$>zqQLTwRAh6y+SShv&XPvEysQ)D3I|V!os5KDwpon@%z=r|ZBLp5L
z@EC!|0oK_%;0D_e@Cm>Sdl#Tt>lqgzwbrwDBNSQP*^>xwuyX>wN5G!}1S{2A%mM`2
z&l31KfDHS2^3rn2-VcD@`$ZG+0r)o92L=3)fWIW*c|Zv8hvA@9_RAvjBLe=4fWIo>
zM*+dWUn6IV{kjZ&jGR&SadIZvC#3TmGV(X!n3aJ&MUg4?The(N&N};T3Y=oU180N%
zu88_1V21r3AcorSi@>J^{0yL+dD$~E^$+2!vp<r7KZe6D0C?=PGUjt|HrVF{{1ZS_
z{slN0_8cGz{}LQ9lKm-M_GJKveFad=W$df)p`-tdoIdtE9QHK=Uk8|(Qj-q*bA&e7
zHvq9Ue^oV~x0GE4)-Tw}i<K1L-6&MCcmrc^WCDuqNK$;%E}g*wN1B5T<vxN4RTJlr
zG`sJ>QV=F!p*c(%s@Aq(iHE(n+%_oXTWBVFqRhx8nqR&I4OkqV=xbuVV5$#<Z1C5p
zZ-wYR1y&2O-Jw&F=-jJ|yi(&+qx8%zPK&&9wTj~`(lc^=KlE;?EXjSBIRnJHbcIM0
zdM?!<PaEl!C`7_Kb|qm&7J&q`LFkBd+#^1RQgvzB(=k#V$naiLBTLR{m@4rO95!Ug
z{);qA^g-Rj*$~(}AWol@>JI*Qcn&uBv;F*PI$cpcU9c-T7&Kx#2Bp(S2>n1xDr~-D
z<CY&X$~J8du+0H8anyrzCg|ep*m}x+WT;fE^X@V>AGEC(a?kgLoi9{L&1sd46L|53
z7t1i+^o1@UiM6u)oy1+@x9TLFOZ7e!E?K@30hQbz)iq>6BQ>{(LlPgV0e#B_tz1#a
zJH-hu<bgIpE~Hv!7?w!UEsjjUDF)>xoxLuu3DiC~!}o8k!?H`Yf~~1;kiOvfI(ct{
zWSFR_%$p?@R8(F1?1D;9l-A9N$pX6>b4V(AVHc~a`b$vb^wA6sNl2^C5^u8*8gYcP
zXp^w^7&FD0TfQV7g8u>^@fLBW2lYqp_vIF6>T*6n<CM~<vxC_l96l+%QS+HLx;vDC
zAx1x0KhE58@O6Lp?aQW*unX9E&dAvWYw6yYO<+i?Uhxe%;xy^jk-LS$Hgn6A+}%13
z?-NHo;uH?Gd`uBf)khPiUZW;3+6v@E60kp`uzS&bNYtW(JAqpsp-uGecQ9Jy)_E6B
z@fLDodwuWNcb|RoUfwBq4ZT*6ZQdt8+C<kF;%u`=jP4?iHn32<5ADT~j5!CABY#{v
zI47MKhMFg9P8J6|haVIL49HZFX=5hfuMKnCr9~PWg|ase^Ah?W&L#mLn8)$u?MG#i
zC{CzCd~k84JT8vSyzzzV{Wu2xaX?*JVd!FPOZkok+<)XSA{QJSK4S|GDbU}nf)}6N
z$_CiY=G|&p4rx>{_H#XGJpH9xe=QDx0|Cj}!qL!<nWtA9e7J13bGBpl;Nt;LPX|7I
zxy(qPy?_gcfZfrNN`=xTWvm5%&^ZvW6OaonJZv5TAw6B{#zCj>cACPJ5RLZyptG&j
z{uT#X9B6T%#eo(FS{z8?Kp>vLX%!+=E;<UZ8QZE%oPB31)%xG!K}!Z&GSHHNmJGCH
zpd|yL3?wNNJ!II6!+qG2wNH%0uDX}B3mpTi92^*16<9Xn-iQokhv{e+w?QRX65@J^
z?il#h_lU$yy$s{+83*4fz~|}RaSO-O(rXFZL|7Ac&QH(0?)u<SoUpCI7Hr1?ndWFL
zlgvAD+X6FmpfDFlDo+g0Up@5g7r$}xYpb7{_~6ic-u3+yZ!f|e@rj{-{mi|~ul?$~
zIo~+*fj2$$-VT_^!dAtq!=B^1NirqrA+LkmHF^iJP+wqlOV4*j4ya*SgNrw^dhfYS
zd-YK@Ps8r2OVH!`D1p5N<Q$d<*xUW404o4DgBnN0eXBNrQ+LH<Ok8I3;mjN7tUK<!
z?Y83?>%^^ZJ$}zgX-0hK+m3IJp5pz4eEXdzP6@OFd?$sc@6Nm_lC^jG_=ywNN#uvw
zU7B~?HhIs9V583HqMG;3t>D0ZPM99mVc6*LUFzV!Y}c%;4a?V}ze1OvL$P>V=-^gT
zKk^5zu#<)vEI&jy1z8w>=Eyd<<V5{AMTKP#%oFJzRsD!W-wAQMaf*T@24n0!U=#0|
z22Ek)Y&%RZIskuidUQ2)K9x&<IPDJR(kD|Q)P~Syxwez3T<T;h$jBRywuNkf1Kd7&
zQ+Ebe7N}s1_sGW{;;DnWfyCnhW%19umhgiZu$%V-{)TG{)v|LfgCUn!t*qFUYYX)<
z-FE^5znoFp927>j*Gf}ce13xOpfAN_VcjmSSAw}WwN3Y=(tYU;{0|NvrgK4(`F;y_
z(b*+XWmy5=9j`-L?m`kRVWv(P2OD8T@Pt*96|i3h8|(OG3vvE9+N4)t@d{HiK4Rrz
zi7T#kJ;#>=4)-@tp_nx>@;(k9`v`T;fMqS$?GhQz6Gh_~vC>WnSFd#aq71#qZxW{b
z-p!a9qMYD6Ez7P{@DhY-^H2nA3kEG~>5Q@hr6?H4TG}d9hWjkC8HR;7QrRbnj&5L7
zvjQ1hgwT1g<`uzx6w)F{7P~U1ux1D9VyU_S+jgMhJ0Fh)kKHq&WEnSR#^@?|62eG#
zXXp4}#8g%qd-r3Cq>EiLRK=|m404?sy~V=)o3f5qFOC)<3B6>qEbjUPnBBn$u(Uuz
zrCKq3N||^~n*_{^yX9}8kfkwMMyO{iFymC#4s_9KdMsc8_%X(Qp5Sa7#!CNe-|VjG
zXV$yj0Ux8~;dDB+o?dTTY+Gyd)2!`$`zppvI<?w%Z`)e?S_ei^$7=hB(`)U**8yKg
zF4d7*>s;%~wXL=-b-C+)=X%F_`@?N(-K*WKYa{h|Dxc1^=Q<wgS?gJC%XJ<|<=U3o
z-1}F1);baHQsH%kyZr8T%H6fvzS^<cx!Sefo$G#g&suLWwbtjS&-bkM=6VjKKAc|d
z%k|!rTJ6Iq9?S3_o`#td)|SfRsIm+ht>#DZ#Tr=J`=i1#&dH6c@x|Vc#w!k_!GSbp
zi(S)w>zFN8Q8LX7kK!Ry9=|YC#~TS){?VF3w*QzG#Igd11`Y4IN#3~L9L#+^J)Oen
zTd;9!jh+Ce7`OZ;T-0I;fK|P)XfYiTioCXTal$B5EK$e1@oZ(G>d~x+XB_lY0Cd4@
zvC!{hR_Mh2KFLyB1y7y2^kUns?zxeJ2v9BO{S**1;woKO+`#}5@_`@zfpB)vYe7tC
zW4v3k3`_e+cu#LE-PuuU{Ggm3Crsfg;;4z(sWFI^G#a>yIck}hZcPo}iKiJM&k4I*
zTLi&AM49aPiu1jOYr))S)6-)qOqM<EJ?R0!zI0z(Pr3(_XBWZ)NWuS({p}s;ku=&i
zqN!5r6wLn7Oc|+ZN0+;SupAV|dc0o@BK3SmSn;vH1`j~ZH{m=Ap4|gA3EoU*j6F%*
z8Nz3;@uJHq*YKi}?5^n&8%Y0-#7l5}S@drs7whSaO|ajgxKC1D*Ic}e1?*GEfvf&c
z!^yDE2>1-3m?zmEi0~f*s%G@bW~c^;NsiWDHV9hcAyT>Kp((r)6H8Mh74rXN?HMfz
zvuTa&-}Q5nUNQ#30|a^ihfxLXP1x(e${If#h2<buBm=5`6)ztQ)lGqz6I8p`$%n50
zBe3c1f$Z*HtQ;_Fg6$0YS(u%`;6M#VLbur;1Dl%nB3yO^>?`aPCH@1#2}`w5mIFo@
zSYQ8<GSX@$eD4kFYCY-|X5^%nU=r}5b&H?Q5|0lf&lqRZb@m;pPW}rVD5U=j&IbFQ
zfd3nCW=dJPvG0qxe~rcc8$vJ>{6BCq?B6MnV*eo>%>IZ=3)n{VwG-$7SZAGp8>|a(
zwx4y2K-9#uM<DeI*as+Vp;*814H(}J;~O--oyIq0e8a{!Vtl)dZ#R4yw#S6`8s9$S
z+i!dajPIcF9WuVl;LEUA0j{%G3wSx8F#=+v2+d7h&aQxqw}@8?l&b_vL;E838l=5l
zSr40Hl5C0Cm?%lv5V6A)(uPCq8j+?fhS(2_P|Q?_{fLNrNty|<BM7l^0@nhNl+p~Z
z41(A(_@%KAyMb_CPk;=U+07>9;w*F6ampGuh-lfGv^YQ<NHB<CC%1}B(zQn`{s)T#
zNg_i>E$poX-WCc^%Xa1mOE_AsZE>K*ffff^9B6UiQaF%s-Nmd%szi88*19ol@uDRI
zEg5LZKuZQ%GSHF%l7S>uB1fu(Lm*FJkpP)|uqA*^mM*Loi4&KPbUd42+roSxH!y8s
zb;RSq{<Lt6F$-HkSe@*yQc@RNlZbSJ^8eFVg*{5;wzJ2`nPQKVbAYYO$PGC3Iff_T
zDsvO|F8GC+3FCy6WABEOVLuHh3=P<`@R@6O_IZ)|CzSdH0?!fnB0!yeiM)SG;L89T
z>?`D@t2A~4fg1_@7yvFv@X<^9m?l2vijNc0x4dvGgfIT+?@#UvAC<>ZU@WxXyxzx;
zf5h_7eFxvygCR&6zrRTZpulUsvO=Gx;~nOkvO~D8vFN&oK39+>+W`4v7V@`m$K{Vx
z@F9FJs1erWb@X?&#KNca`eDOi;nJ8s?g-PU$#G$xh0mhlqgV9JG1%l}mugj98^itr
g#xAf!Ffm7tO~90Ehi9M0`&rmS<%(G!MnL2L7yE@Q^8f$<

literal 0
HcmV?d00001

diff --git a/human_aware_rl/ppo/trained_example/checkpoint_000500/checkpoint-500 b/human_aware_rl/ppo/trained_example/checkpoint_000500/checkpoint-500
new file mode 100644
index 0000000000000000000000000000000000000000..9e9c1bf4e4a6ddd53cf47ac2da80f4f464ce6504
GIT binary patch
literal 568778
zcmX7Pd0bD=_kWxA4IxQ8vZWHL*PKzbi&DxGDJ@Ebijq)i-=rccBqduUq<YPnLM0--
zEy*6TXNe?)-|O@J{dJeeow;)#cg{S|^Ld^VE4gi>*b-?mF|o+?VIcuw8^!0GsgM<m
z9kOTFVsX)3F|loVh8Z$}!D}M|!ou^m<z+~2*swk?V`P}`7NfAWYlBxAZHfq98*cRf
zpUBIQU9jHn{~pfE&6^mP7oIUBJi<33K-4lw^8dPu3<zEw6cL`6q2RZE?b-mph~V|1
z|GRPt^A89M@V5!}i}+ttH_>ZHiC#6$Z++<ImBFFG5y8G|r-?=w8n8AmV_4{>bsM%A
z`K=ENFcO^xga!MFx>y+)wtn5JO@V;{VWKV!_rCq}e_tRLxAH8igw3VT1vi<5%33~P
zu^XN)69dx_9e9028h*BzkVY#faK8TlVlDpF%?mW5&GN_KZreuC%6ko_;m_F7pEX1?
zY9GeBg@I$y61vw~o=T322iXi0E~}kMd7c9NoqiZk*z~X`WslMOt0tV3DZ!kdW$e!U
z2sj_}iajZQj1w2hKuNVW_*VMi(>{OxHslFjdD#j1N_Xot6&TUmT+2MQM)S$<zoM6=
zGIxq_=l)*~l6{e;%+6pG)*n%zcgDw%k?(U*X`UYK+L=eJGvn#Y_3?aMW;ZlCZ=eSX
zj>F%tjNL2|QqRu|_y|?t`}f@w+-+V9OaAH56|(y+Q<Y~xRp(H8d#49YztWBu-^60j
z(PU7bxP`w?Jtc^ixq?FvJYyHC=ksUN&f#n$+4@H(?x2zF5v*9BLtJmKr7Jfl&@8zk
ze!27^YoC_Dzn|NQ7ec;c?x7mN1@&1Fv58QPrhAz7B@YK$GiYGsW?o=$g`UYhz~_#t
zfF&%Be?P0ohab8`GgftyR3YF+#jW(<crofITf`Go4C#n&d;ZKUh^O}3QT?q&aHP?m
z#u{dFsXkLa_Ma9%a9|%qSX<HCr$*C%O+onW^=8PqB0+8~1DagZ23>bAz@yc+e2Yss
zmdokWs?(BOGS!Ptntl)ilooJz$<I*RMWCx%l{Sjaqep%$qDKep=+yP|z;nJ37bth2
zWmf~r%jwa`?p*Gw_!@R)Z{=r47+{HVFT_S~gkRemgkAD+=v>fB#(Eb(Qc62{uyGrY
z+vrbxEDuBG3vbNJ_km@<l7KWNVsHBu)QG%^X3_^>eBcK9XLu$Ej3l_ep}1gWqCB6$
zU3o&nGVIVS#akME;Q#P3=)Ek*=@rLOa@2H~mtMwC`y228i3L<Icq2cYJ(0Wfg><E7
zAiUl+4_!(E@bthuo{%_!-}f0$4cfk8fP6ZinQ@z~8GBPup>0C_v=-q}OLeM|c^vb0
z9fZkmz6xb+o50@cBY4On7{))t?!I2(@XRDS$Ht64Ye>SL*Gq9tQxl3GRY9i<yLe7$
zCpgHv(t`_%am2VpPGkoy_5XO$=zLdZ75#SB(6OUvXKD^EGHN0V`r7d3l$qRoc(X7i
zua=zG^CSy9ipkBo%jhbYM_>A1CPSCc;eEz8;McLyG)uz<m!vo2l#uPBZ~Fni^EBwp
zXff{p$(pwc|3eFWi(v_;*ngKx_-n;K&>Qbaaqm*g+0yTb_GTr{&jc|k=UwdElZ7B{
z`iI25e+gTc$l_R^O=QT&QdGJ%8E(y9#cr#dK&834M9ZZTqP5nc>(zV2xbQZfSCSN-
zm?qCl)AYf^s~1Uc0A7&EfTKcp;#`!3f1)P9htql3up^sH6(0hwGTJ0QKcAeOluZoV
z``Pd*_XV-b_Og}5^<?JvFdQ4}ia$m@A^#Tb#B)LRxUNMBgefNIwn~}l-uO)}FD|Tm
z=6+F-d>{hnXYa<+wFFyljfaz#c6ePXiVR!+k=0J@K<%8nn5D6vRLqwIt*m!2M{h6N
z@0=}It?>x8roUzW7mkog<2BfY(I=qg;9dy5@<_1xZ+rEvar0{{<41zgr5U&!CgSjq
zzPQiRkD%BVY}X<1?Xx$!9;pzn%Du&Si8+>J{9qe8>PYd#%Yw!^QS4Zk6^eb;Az@u<
z==x$RY3P;}+6;!m-l?~k%B`iCw<;LE=)VzsJGdTI>v{zZ4%Nb*U(Ku}NeR?{uZH3S
zXIb~E9qj3dv&?wm2GEQ9%ZNu6QB`t<u76sjp(Yxu?<T<B7B^^kCJU2PYFLJ(fGzrn
zq?AWPY(bQ8_U~;d=I>V<@FN-P`u8%+QUko-YL7D(8lsr@)H?EKDiledXKPmrKy_~_
zi8Y*K8C&y^bvI^_V$TJTy(Iwy=M6*4%vk8V_>e@safgBXDd4U@7DwvH0(4m5o?LHq
z@M|SGmhwm@=`x>HcLZg3Q*hM1G{hE7UTKIp+q4UPsy{>B$UN3<q0Oac4M4_UG3xeu
zC~wcNWgk!8!X>8WeDw=LV_%G*YCVNe^QeXWla}CVBh=7glNlYpD}a8~%clK3Cufa`
zs26AyPlng?@3X<T@v!k(0<Ogkm}D^=eJ(@`4eNdhvj;|Fzf2UyE<PfNdY=dpUmKa_
zb|;k1p8**TNo@BoWz^C+&W@!gg4?p8;Fu+cvFZBg@H7VOtlUAhbPJoHEDLX&9z)sp
zhnPFqgS{UG*e$*S|GAi<=bd(__l-vNHb*||%p;h*MGs^WyYTA=DfVgcBlMi9!jtZv
zz<u)-Xs5FQzVK1R(yhAi@r)LAp4tdaatry^1SP(9*?KJ9yAE~cT_D6-4=PQEU}K*x
zzT7}i3R1x{S`NCdtcB<+6k<~i*i4fl*x*(ybe*w^$WHepZ90?S;I|a83z>szCTEDD
zc?NNt<%;pTGLUvA6qo;W!g-~hLW61<)Qgc61RTxBbF+p)d}JYZ6qX6;@)PLtWDjO+
zyKQ-<G7zWzC}6A9q%hOwAn{JKA`g9}iG0w1?6+7v)47)gwtrp<pKpI>xo^EDJAZu~
z{CWBtuH86*PV&du+fz(1s<VW9Rd3)YZ)LHnDL;s2&P;x)KMUmlbHk2h8a(%_GHia7
zN7>O?_-M^ix_!3^m&^+0X9ecMb;HKf!RL|uK}H2w*SE2RZ92Fvrync^$M6;D-_XR}
zhyH!;0^aAQVCvNs^x%&V{P502Ft_9=wpJ9uMx#2k-IGka^cA50t0BL6YdYRslFIK{
zsesA^BRGF8japgBQkmp7^6SPP{A0L*E1VJV^xPf%<_R^J8CS|wpH@JHeIg&3dX5*2
z+{0fD>mdtPyYmY>(!oY>79MWvVqtIQ@XZfr@}**5U};!7T`8f(R4%4-l4=5_Fb(V`
z#G$_3F5r`Hfbz24JSFb{TkTSV(y~+d((sd9tZkEJ-@4(DlWh+#tyQr*?IZIXKL!5k
zYT_XMK~znHpmvfpI$Vy2fZ-Gm4NgIyd@j6b7m01zVjwmcVqsO`D6~2gFI*_=j_2j>
zlSOLY==R(SqW1k`QSPN=Vzn-x?xew!pc@^0cf&ZxdWg*E29>f*%wMcd-~C#PL^q0W
zuRIQ$r1Z%CaRTJ!$@uD29GKoo7amZGC9(>4$?07iEyErJ!lJ4fFms_i^yN>23XcP9
z-IQn|t5I&LkTsM(^hkm~m1kh~>k?G4?}Fv2<9JfoScs^4MI75d!h`QVJY!U@;E}^>
z2r2mo5#wd)!<JzD*EXMy<V$#?x;(r8V<y*L)?PO~&Km}XHemVmZ}4_V7CKe-Ky611
zXBMNWANhmS@gQ+<A=u&m2WK4T<n6C<thH-^eLpILy+^`e=+(c_UVOgp!#OR|ZhIXs
z<*vfVzwe@c_H8oa{0wH2v7Ej8F_-UbImA*{slm#F8Zfu29!fts!(`9NeEMF5^Mb|v
z+>Zhrku(>5Rt%>qTfXAT`h|4e-6h=lp9$TyKL>rYYIyzce!S(*NzsH4U{ZS-lVdZf
zM29v11=>97rV*D6H0KKap9RBvz3C4bMc(|d5(b`h5~lnSeuqz^Wy_TDu;XTU9jZ!~
z{K{atccl0>J1MH>vJ*>s>cG>bAEsJ1V|<G<@2`4C#vVNbm(|=cv?rDuUo_(n)_-Ql
zbhj|2&=gwJdt7K~9}6{d0)A_A4WDpnEZ$VU3YS!k_@1C4ycC^SQeqG!WomJ=p+nF@
zRh*`7RHkzS=FwcQa-6g573fNEx}|(7y;beP!&f(paI1st>pe$<Cn(Y>@_}^b-FmL`
zXAyMz1k%Y>j`Vx?MbNnF!9(w_r>npC^Z8?9@o@Dz`fB`MqFa51d+Rpi*mafg_Jt$$
z*zua4|EI>?UmPck!;bUR=?C~O=T>-iB9Y23?g7SYQE5Xi_HKNNo8xBlrsWZQUVkU7
z{##C}itEAdy*vDT{sDbwB=f@P8>pW4kCdD=fai*T!O?h(C`Vt!rZp|4&lJS@#~43Y
zzuXU{{o-MR!Cfp5j^~wo4f)n9qxhxE-u#5eUCYn!-@udRUHHayE&C9-6)n3Jxx|kk
zZeX{YzrCP>n&Jw~a%>JfI5ZV>qFnG>kO9=CzK1mHL3XcH7U~)jkk3;`^^Unj#y^-}
z`;ZL>hVEpp-!BlKff-mOZcR5`+dwPUeZrwSTj<o0<*Z=;L|VCQ5`9yz1xL>1(2pS_
zX;9fU{^Xbmeo1Zt*%P~I_2vCgyfFyQJ&Gg2YL{Sn2jw;=)%fjmVO0LgGd%Wb68q`6
zgLQt+$9E|~yz#jfwIAIIwtLo4`|R1gYK<(@tT}_jW{jXxZndmqfeu}`YYytH*u;0J
zCkcmr&Vu@63-0YC#wFLS!jRVS@FnLOZmCRy(?eTeU!X0<#~0$&;kLwKMJ%lpugBup
zPeK=iAEdW75kl`);_X{|$i>H1IH+a;f3Htv>}e?pxp=L%=)NjGf4vh<30iSZ10`Km
zKIqk(4D0e8(Bx$r4jGex8o4$ozc?N0PT0e+ofpx!@G7JRj3VDW%P}PFG@CnP1MZeT
zLM~s4XHlQ`31&z33MO1UjP+&5;EL2iR8l|7Mrl_PzsTz-Ie$2y@Dn+j5hUpBT*$tp
zm9l#C>G<n-C0Qjt3J=G<CChS#Ku+o^@XE4hA@mxN1Pd&F`HMJR|H;<WJCmn*0c714
zE#@yV0fz=og(HF6S=RfVsJ!HPUC<F{jA_coB%A%HRjCQzzxZL^wL@^|k1qyF{v;<A
z&5-C<vMaL^$^N$^$h|^4_H<n`3^UZnx|DOw=TE87@6#C6{BHqE$kc<utbEpDodBLX
z=UJn2B<!ntDePD(2c{42FjoUZ@QHE}bYB&Ke%WIDP?vzGV&kx;D;^umV=yS#3|fZ|
zN9+3`7<kPXVr7QF-GL~WFOh-`COv}8joYwz%x-4<(*^D-cQV<`rEqYEBgEIqfU-2j
z;vwT;y!b~pQhPjZZv4hnUtJL_FE_%5$}F<^n;RDQjTU4NYa*uaWnr0x33&e-WZg?k
zSf}Y$Fg!AnJRPwU-k%M^o+-9q**OXtvOlwwt9E2j^?A}B)6HZXqOg5Lp5V0KAlth+
zff=-H1YJ8(?mO8T(tNbBIakPD97M+aUz5-a2B;X@O}hG<SSq$c`=n5Kr0+|=s(C{4
z(_8Sde;B`zc^NK=YfuR@doHH>M1+50tYq#hlIEKaigHn;dYe11nG}zc7RvDAzg0B<
z%XUb4_6vr-(WUM8R$x+R6Ed;yb&X{OMC!p_k~Pr|ebkcSXu}vR4VlZLWz_NRpfp4o
zyJ5_L9f&U<fq5I&g0Yh#ihY}4xuszXI@xp9B56j<uYDoLt)n6PvmEPMQYt9gDaNEr
zcEX}3b!7iLe;ih_1N_x4<Li!i((xf33lA8;?Cvudu3Uw2!6VSLyq)N5UkLTu7s<ps
zQf%_Q6fAhGg8aNHubTe}d{?*O6F3d~cQ!%ci;?8Z&vK@>G7pW@-{GFTMBXTOft8x;
z;c_`fY+WSK+t&)ZBE$qqH(W8LrIWatj$qmDxq=t65-|8A67I$>2Zy_cFwi}Pr8s;N
z4y&07=E26I`LTw@$iHT9r_MzkO;L=P$U*moV0Qm?0@`(H!{|emm`E!v7M4v$b<=j?
z;LjYUSCj~&)u&ioSE4u}B!k%vX~YGsp2Fsl>a58^i`_eUntj(xLHp`QSaX=LLs_q|
z%q|7p47OOR9NxkFKQa>d{g5!iJD;4^w8U52yx3K0#zVKh#uF0rSa41eEm=a~Me96X
z^v9j6u4;qR_UE9<UXA-royjwn+tU^2R>7Jf2l=u!SK-2tIZ!ZG7nEFIK%>bWG`D!p
zYRc`o^qU~=Gc}aI-{VU^rhY&}^;`UMU@s$EjOiplS?==2f%>FQ!Q6M>q4sA!d2Tw1
z?s45u#Ko@Q`lT^c*{^`eJXPS&ic;~5%t<hOsLk0Cd3xSkw9X%hCyxs!b4g1HI_=66
z%<52uIX~?A{J1_|+wRMAK81r^j}br6JDDDTnu|u|g@V8Co>XzY1(XM!!FL*xJS=Jl
zFSuNQ8?tBdy^=fl%JE^``b##B3@hSeR?ViJKaBYP!Rx$Fev{yc_Mh4zy`NBHUIa>>
z@J62lM@ekn1Yx?#a%^zzsB3mphM*6+%y=mhd70_pV=;ncB%UET^8%q~);hdwq6}6y
zG=&49xy1jM7n<qcBo9w`!Lip%NsoLR5xbpd*)?)126ZaK^ZF*3G)9_F6YBFf>dyGQ
zNtVudIRI%NFOY|dN7(h_V}w>}L-^On)4=e!CVW-u!RsSVl8xmRkQtW&;`d{5?X$(i
zWA<K@4jfKHLISWW>Ka@=Wei0n2O+Xb3a#CTakr=Y*fsCrZ2hWoJRrUvcK*yps-Qug
zu@ii5cYx}LT#}REj+w-T76u$537)_o6xi_mH`A%NRtBGcW<MMAz@4eBuEwDf8hr00
z8)5Q}09;-?3l9X(p`VQXIr;t_4qw-%Kf=Ze@7#GPpy_f{NshvyYj0WT_T%{Lj}4ks
zd_vd86HpSOKvyr+CC^sIqn^`I^hlWpLzL&if6sqlk;y1*4n9o6OjF_FiGIBHaS_Hm
z|Hfj<GGMZnI5B!F3yJl07@E}y4Z4T8gO4tq6MGvjj(t?8GQ1X@!q&jff6XvTahyPy
z?TS9PgW!YJbToILz*7w`LW}NPDx9f7H%(6DG^G+7zEr|Bzh%r=b0VeRr1^7OGt}}o
zVIRdgYDZVV4dY9~^9_e-Mz;+gdl)e~T#b%7vYMARBIqfW;Yqn8JgY#8U(WgpYvv6F
z+tic1RAD9$>N>&??rtMS{th_OHHM{^{6wh@uULK4M>Jh+gdUTJQ0WA3{&IsfR}KuI
z(uTLW{F4i`V%Tkznz)mmkJ6+k?@lCN-`nukEedpE^l9$=sS{+%$8)b!`uw=1HI;r`
zL-*P4;Wnq+iCsn^1dO%g?fR;GyOR?yw%r5UPj4Y}ow`AK-76TaG!<V&Ci1n<w5df)
zE}bYL$Ft*nVd>xf)a+slj2%e9Df-g9B=rU4Uk&B5wOi=(WA6OL^*HXlaS^Wn;Z7^|
z9AaAG#vmK{9xSI*Zq=+z&vlo>`<JyOGP<7JY7?VlruCzaZZbT7s7Ez%y<m2BI+Ugu
z(ux0Ffp@9VsN=q$+|z4d^W4rsvX>0;jF?B`gH-7^F9BJn?nb}g9tTtP&w_m58Q${a
z3=ZFR2X;ygrPV*Jp!-xEF;$SE`^F2IMXe9K(!K$U*JraEQhS)|=XAlujx}^wM+x{Z
zzXBIN2)NbAPSg>Nd)u0+JiE$~znXd&;@gMPPoX=ZaG(`OF1iSf^S_~2{z5DY+zI82
zj`581*Wrwv5*?Cj&iovAfyAUG+-P4Y_p*5p?#tHG%|ri#;jdNU-sL@5C_9xpw~nGc
z5nq}1(Z%SoMwyqjX5$?7$->Vr9ayhcAn;k(0XJk8qH>Wpb9rtL*+I+jcF04ld@TWH
zR-UvF8P+D*@nUU%98!25hgi69rx<hI8&V67E|S>OEP?luZ-T4aQT}vx15U7Wp(%xZ
z5MbtuN<5YipM6W9sbWpF-z$+@L0j3$RkF-i=QtSV$B}lMk8JO^Ulu*9{^EyYxooeW
zJ<CtpfQB*E!ex$9@G59Kln(z+(wg!y;BEl!UU`D7*s#7X!KxG;-yeV+rR!w4xh#yx
zdmz#-N-?eZ2vM501PvzIfXe)_?4+&~sMRgN+*xwaS(qn$n&Bh}+11PrR@Gzbx5-d@
zK#nyR%|csydmOCah6=44@T<frVwk0e9{C=yKRg1BG_tYah$e(QT>)oaI^n>d>DajV
z09ZWH0sH)5vcGi_di-*P+X3Y$EGZ+OlWyaz@!#2yQ|dS_I0YqtC8D&8NPAL#Pi&%N
zQKmfs)dC-|UBwenZBPkzZdXO+EBnxKVLG{8(ZVV_{NdDt268>>4jWvNL<T>;Vpdb^
z1bubGff%pAeM!Ty%E$;~-}_ja>u|CvAs&|Bp9Mkh{t7?M4?=@C-YiFbHuRP0Ve_BK
zkY#TSnhuIEkQq)syp6*=g9K*WtOL5ix#V+#gJ8b-X83Iti@8VUqha4yTzt2mC}n*o
zp9^=9+`I2!ooxYj9oULP)Gy-x|LR$OdI5XIrQ!PoWx<tY(RCXatj7!y-#O8(4B^*b
zGOb0=nVmhse*G{)6wk1n+83;A-(+D!c0nB<G8HR})-pB4Cq#CK4o>QoM)4dgY>iID
z_Me4}z8Q_DEZW(Zco+1{(u10(FW8qSexT~S9*UR7kw1|hDCxThrMw@K?ky%vEb@Y7
zp2;TkZmh!g5M|8vG-gR<;q=G+2)<Ff2(yH`RHO1CY)-tw-cOXHEpP7Q@wzF@$><-9
zl03_tim!s>+&C`3VFGW8`6`?_!Wt(oH~_BdBj`9U4KO!2%8u5)Le08k%(&+n``(s}
z8q+tTVWS3d=<E@8+3kSty)#%x=0e#1K@T3au0x;L3c;5nr&#PIRV)%qBj#h)!YOfM
zi0>bbkNvwyhm;pa2luhZ^P{2iP&}xL@T@sb1s~>xGpRY<mUEPk)Or2OfQ2qMaH`7#
z*xpftPi`6VefO+*osm5(fwj0-c{@z4r(hAl$=Y$BAS9|0qpuz#K{YK<cA=jXTstf1
zSN(-o9kVcWx)8(0>frD1I=tU?G4v^qhLZOa!0gUKX6WTZY`#Z>;)OV(R-_32HA&bR
zl>riG)8XvhZD{QBRnRyg4uTfRv$QZVj7bq^G53R5-?v9ZK30??pG_6T{~vPfrYzA8
z3PI-Yi$8R0$+|nQSmH%j@R<J_hsadG@@1=7T-`S;eYUYqIc*s``|Bi2e|<%G$X|g?
zS@cWjHm(fM4_n86ad)D6w2h^SwEXnvGtg0d8!Aewp^t42h?Z`_^lOKOlJTmjkr2ei
zboSvXZFxFTc{TfRtAyF=XVG)E=3KsMy@+>h<L1F5F?yW|Ex4u1%|5K8H0QfTs$eu-
zIdm;w@OL6rpQcYszV75dr%d8`F#?`{tAKlqIK}7c?ck4-P7}G1&D>)_9)8ojC$Q-(
zK)qsfYE_m4uL2kI^wQltx=9CL?pK0WU+>XHvw!lj+M~E@$r<Kuzksb1DAmu=0qn9<
zoBhLV8Q?lGHcsL>%D>qIGYW42X;b3Eqnl_!{#dS4E6I)1ZnI}Y_6sCE5~*R%0<hPZ
z4tJ#**rYCHF1aItu2>PuJLjF}rR%h)$GUa=$^$dLCiXbhpJNS%!x!_jLnhLUP)BGm
zT#wh*?%`ay9<MBLq>I8I@PxA-?9C8q%osBaDnDN)J~Cp$tWPeWY8wd}HxseV^9Iu)
zp=2O=BXc%N0ofx{5V{l5*x{d`GGYSh@>PLh`YXZV>I>l!7J}`kV^B=>kNIU6S=4#u
zE-;n|gh2D%@UFER#Vt6F+F34Ig9@4M`B;*}rsG5rzNXHy!fKYu#-Ed-#$9Xh&e(UT
zp1L0`sVZ$>dz_q{7Y?KR6d_<+Fb3%w!MFP(@YFFAa;JL|oVY(9lYYD5SyxBef8c~r
zzibxzZr_GwTO;_(?{}Egv`RQ4;x*$Y+(4z~D{$Iy7PX&q7Jt`u!SOwNFmh1I;t<O6
z3!CTB8)=kYT&P7)HkLuk=8JIrmofdVqe$#ieR=&~9q#CI2P(HUuvFr}bzJ}9+=Uyl
zK`Vr(P5q5?$3$U*<|5v<@G!10D!_Zc+aUFi1sL|K^3`06`^FZ)r$2+Jw$GO7oV$+C
ztyZ)9-(%Ttol8)mUI1R-U%{-m@#N`Fd#Y&v6uJYuF$^a1$mP=XXIld8GnhxME2h!7
zEpzGEm?EHYqiK}zI{XVSq)op$7t9R;pP5?Rd&OM3R^5x5`eqAduTm8EbK>;_*I+}K
zGkhFo%$w)U$8*=rXt!((SG?{-e`O5A$f;NPoazj|cD^dzwPPGgh`q)+GfzX9tq!_d
z8=;@K3-8hT!*-0+r-pYoVdIf#8vP*_r4mZ$)b<3r(If#P+*Lr?p$eCYYK!f^H}OKT
zk&q`h3k1{T=!{Rp=zq6!=;@y0INv4_mz4h};sUdI`}P}9elZ0|d>a2D%h;+vHLzf>
z8@I6PL2t8CT;#SF{vP*%k)vHu(y$0eEu2huHR<wKs)gv?M(Ix-bsGLllRc3c&1Ktf
zV`E4KukW;^b8T{YZq7+exTM8PmRR%b<PcgOeiqJ1--G8a?jSqSkTfNKhdb2^@I~)x
zn&4xKgUgN)C3ji+R^CCl;#?KZ9%;rM?|((ZQ5CqM;xdS(eu1fShw=R=G0P?CcDz1F
zir$PF$!{?$UKThUoi}&G;$sV$!Ff5ZA#s2={aipow<e>B|46zgyNX1#<PyJaMQotw
zEr#=AaEn|l66f!OU)@W5=b=R9N*f`{+=b8RYa;re8Hx9G=fR_YAg0Wt&K>gb^JD@V
zxm+f{O-27qRJRb0n;?)%5Y>nCmeA_f794){Hjd8P$z<Y=gQT>`m*998bX5;y_^@y^
zI4gt|4wvblZ3?t~U?Uc&i1HnOmHPX>uKbZ6<@X<)6<o|1$%nO{KpU5<;Cx+{f1Z{P
zYsd76avEEDMKFPHP!{Fr_hR{uiaJ56u!vN&@8U<Nt)X8xR|tv~m(#a{O_o!RX>tF_
z>0Hy>ggdU<&CBQYV#A+0U_JHB$!j~m@<xX@Jc!|gV-k4G<`II(Yd#b$N5Ra`zP#l>
zU+B@*<O63e(5vIE_@m_gFw0(prC4fnf8l2OQhX?#HbsugPW~Z0AK?zC#LQq=dkxDo
z(*x<MR9K?57Gk_Y!6xGa8B`P1T_l$R8S;lLZ%>69b0@4DVh^5XYS4Dp24X)KT8?f@
z$6>#Xa3EQV`P6u^IirnmFnJq`U79IaCfv;GX(r6+ZNe?9TZJJOmr!$6qh;lpQe5oY
z$)-;DUS|>MfVqZWh<L<Oq3)E=xc&D?Y?=E~FwF>Xm|#1TOgw=jBG!=C)8Yh8mMK`1
zyN{hY^GdKfeg@9BeNWP>f5A?jR{T?Y8Xip_O{yQ&;^pEPJo<1S`F!gy_W3Qu*7UWo
z!*xCzbrIpQ*iR-Gp@6rNgv_))5Z6>c74gj#Wbn^vGDt^5#>ls%r&0|whx*{V^>5+d
z=3ykd%mj5zBH4$VK`1u*L#@q`aL_!y2HtlWftBtka>~D(8Shg;F<E~?&i0X(p;w4b
zjyUdB^8_8akAk2Bxuh~74(rN(vr?TH5L=a1*Z27ZQ?T&E^L!6cwl+ftiN`o+bFMIP
zMH?G(!3Gm|pF;PYOJS>&4V!l`A3x@vWtRq|c%487%Qwiw>0l2i9&!mHbUWF4qxbAy
zpe!EPX-Kwc7qHU0ZZgt(GVxiS#riK5;1x}C7WAeBqNpz(^H~5<O_xdWjYt@3y%W>d
zI-$|4Huj|$Si6@Vm~P2Hn~YBOEKU*q+qXbA+b{IgHimthyNLOh<3uODU4X{^_*TmV
zz8w3==4kqWos0z5Jllem3HJn3a)*Jj(IPTnCWkc@JFy`<N1*CdD(L<Zh!KZ9MgPSK
zHkYYG>4aGF-)~Pe-)PA?<rjcK%xALZZ#z@8xX0=&Tp%YS9%p)lp}tu<7Ef>`%cmrR
zfvq!i*hzz7xF)-syc16BQGlrJZ<y+ZMc7p;5Cna5g^a3rc3vg~n<v&Wv)&0Hn=_2H
zw<QQpG%SGFlBPPk!5;?KxkKfmB(~tFG%Bl2!LaG$ux4Z=$tVZ}yT7yWEi8qoVk@5U
zVhmb;@}`HqX7H&+^XXk3dA`Hffc%JxMa8;{mfy!OrRh>me9D_OG(S3&Z;cquT1+%S
zXCFbc5M5rJo4~a{ED(yf{DTW7{#4WMDdt?Q0>z~D^rXsW%zix;4V^=vFJTp~&Gv(a
z{{2D~g(NgknFLl1mE=q0die30pv}UWtYP~rCU%DkbrSxO)7r`qSE!F#_OVdyy#~FP
zs-f)V64Jk04a}lvK~e7<RwVWxA?9z%;}?a*KskXdaTCWPze4i(>|Bh^GhxLQXM`74
zuE6(87sIE36=0?LTG%;Ghy0zV4Gs6U3RnK8k5Mj(WJ-cAJGyBqEPuTL#vh!F*KYyH
zj10sLb7rFR@pSAC83X+-AMoSY#rX2hO1kgY3LYalkFK524^QVGA)%{Q;LVe>xb0sR
zTKU$AD+asKNV<g2wrD561^FcV(F&B)DMO);4&QUp9P4jQpihTrP~-B8B5rdSRyxSg
z!oTA&H>(WCUl;~g&uO8I+dl}cAI(>a+P>Wv!oz%5(N?>e=sx5DS-$c)kaGgq6VT3m
z#8s(DYc>rwGT<K8$N21`v-ssl1oz!FfiKv92KWjqI&@<{?l>mFEmyg-`&TvS5&dR*
zFkX!g6raKF;hRa2Tpy~>+{~ND&f-h<2l35U#`3f3-(k0>BR}}66oz~&<qe&hf+*d)
zn9+gsv6vj6HS7!Q^(mtN%zTB@&T8>l^NtJ3X0PMvW06+J=)#nL$;{@6E~t(a1F4_K
zSbzw^G^VV8W6kf_oT6pex#KhIo1+TC$*H)UM8PphWiTyVg$Kh*S%RuHb2fZN45XWg
z7~jpJhfASOy#_q~YzZ4YdP&u4E$mT~r|G>EYp+XVo>>TYw%y0<imhPv-W@{Ymu}$G
z+e?nV5aTYZEpX*&RS2#rz*o1eK>gVQ=xvJvx0}rZqgU0n>uzZC(>>>z-2FmG-win7
zNf=sK%k#87S|H8xEbk~q!8zG2aO%xuD%Vj4(lXJs(r*L5ef=yRnCZq<i|&En-*eDw
za!g3<;(1Q&41Qwz9K6*j;1|3qz^l6j#(g}8Uc0CB7V*(&cxV&9BvuNg59GMonH3-#
z{+^tAkq?KDoWMxay?D8#iFD`3gY&UIVcwQ3!G&>0@$m24a9_cJIBd8MABE+jeDjKM
zrJ*%eNDaZk8S~-eat+#jdk-nPyc32!6{8(D^m*5lR`@z85uB@MQhC{0a0$}El7*gR
z@<}_M->b`e^VE5#)e^jOpbfn5{bZI=gY>c8J19QhhOadhx%P-k3_D-T3=HD9zd<&t
z-@1WfQ#ZlHSaH0twv>B%OYmJ^#?F5B;{z_+u?NpBvVj%76nty5A^*`TD%&eXU*3wO
z$6GA9TgG;-ojDxO&(J`HHP)>2xhg*x;>X_&r17#(pGa%#J5;UELC?9v=?uA5OugnK
zZc`pfhl%w5>q`9?q125RaUY$$sSccmJS06D{gAP{fn4)Tgy@(!I@5m-jPA){Cx5)a
ziG4r8@>l`O+c6X>%RB^qsfF~DzC7<$+s>7)Ns^G;l@R~<Fn@Y-F;7`Dlka^y3B^U)
zz22w>P@+ma%WyuA`Qu7!-1o81>t;jX##;VH<nidg@s>?kx08NdT+c5LorHR`1BmzN
zNpw)!552^XVt1xEkF6RbSpUs|wtds!+CAI&<A3qO&m)eay-X-R`DKbSvJu3$c@vK+
zHy{lqdDNsZ8Qbok;Mku{wa0B1%F|@J)<%jJj*F(F#)x8gMF}pKu%RkH10i+IFcu@F
z!3PuMF#WVLt<Jp1v}Jo?^P62X@`EfLsTahP|2T*=|0ufsz8Vj)IVmhF8-PpA{VZ0-
z5q8I%XGtgaS#<pJB#N({aQK1G%w~Q(-65Nd`NgqxhSzeKro0VKF8c%%3wvR&q?iyx
zF2W>J6)K`=_+(Q%WVOYhy5m`}E(j-n)nYVKvXU;$^yA5DCVcF=2duH=0w&m;p}~r~
z@LbCgnjqhgzSh3DXm}>Io!JQkUeQ#mScG*I3iUSqo;>v92T`p%fx?KHAYLOu7d%Sm
zQ)X}Ca~Dmft!`B`RU(_t{*(tJJjPMSnIovYiY?tc%Za{yRLNg%+=8A5T=`Y|aa`6s
zmOS?}p!*i<a0B%#81G!f_rEm}3@%&@hA;H_@*s1fWq5^DNq%Kkd*{)aTNs*j{|B3A
zAH!X)A}&6+l<5a3qw5GSB371&zZR>bPW}YpB<Tq#+t*BPk6I2^XZBm>HKf5)nMJVu
zW-K(A<kg+bc0ipIXNB!^8j0db2lO%fDQw^Rn+<45LF~*KV3qMqn72%XJBo1_)0YZj
zh1RGS>W4oU*5Kt9MK&Vdf)$Gcyex>u<Ngi!Sz;;v7xkNM-@Oy+6dyCeX28bbhjGEs
zvv~i%%WRcO1GC$2f?^M9Swzr165=-!m6E5Elk5MIm5J8S{$e$xM9G8dqX&X7f6d^*
zB5&*w#$)!VyKGAIJ0?5j6Unx17Mg#5#Rf?TWY^k)q2FwhaUzSz?vi7#;sbH;R3b<R
z-C)O3K9b_A^`ufi2#iM_BY*Rx!0ekmbNxLNt!{m-t32TYH&W-ItBo-l*r#Epw;QT1
zk%IE!Q!!R8xNgmYF&H)CC%eIuP$$|$pe)kmmh}H6m!(}m%wJnrYNdiHFD00W^OL#<
zOR>T8o^ZFe02P-gpkeb+EKC2(t}zST(y;<t`yDW3lAdt!uX1u|!U7WccqbgPD8TV<
zC!neGksyCYB2!e7WxCRG=z2UIRFfnHALT5ta@v5Ue$8$6v&{`=>#D=8wEIN+=2sY}
zGY9f6JR_ktQ!vW=2dTU#BdSHmf<gRV;{PoYly42N*hk0etR`KrvkDUFCrd{|{4pC`
z#;wt8TQaI%dq5sZ$6|5L4jdM#01L+YL)Yoqf>Zz9B^$j`vFk&cu;DXdL3a5febf(C
zYk#rJHP&cd7l_V!aget$1;@+lqUP}ta4<d!6<f5}*E4QVRK-by?G!=R2Nj`OyT~)B
zGyyh09}j)n-^iB=H#nwIN3P6D#wmW2pyikgTi&<?Z|vFtijs+7tgrzdU;oHVC(i<h
ziwd~*RRqM?CqRnXZ?a)wg5c}7W$5=R8AF=nvEhOmC^m|Nj_MRB)hZ%?z2CBi*l^MP
zG^`W({vLZhAv$#fg6jbTOe+61p|<0MVnd(csWcg?q&$cI{I-tX3*7`|3noyV{$PG$
z><tXHS-@+A;w&ySffha+!83opM%PVBH23^x$W_pzDg%Gmjps+n@E}Vz`kSbhHX{q9
zV{JuRi!&CzynzlP{c=@75H6k;iDI8GlGsD7f|v`Itfwmhj(4eHifIyQpY8{SmG4-1
z!~$%z*8n~5aTxBs47VQ~3dT2;VZpOtEC_Of2E8tUY>^k(-;hUj)n!n6=p5_0@|Z|0
zea5~T5V&)DI1c2k0YUm?Yz!+TErqTqyC#^F-dTapO3%qJGd~E7f6ZvC0R$cYL?m`4
zq2l#;h&PSJnjd4Z;d~UFAK{EO=6=|z@RcaWhQVv|FqAJzg6-WWgy~iHaF}8iM8}Py
zBQH7AfT{DLNYR0g9Ejz)yDYfgs11Aye+Ta-?S+snaZG;tJ$(0jJT>voB#Y!$(fj+;
z*h3=^=>2&LZd**m2QxkB9v^$KUmb(*=Jzpmk34wi+XDyY&VeIkTc}j<K6>W+Zv0+u
z$<H2n$ky3b^0G)(u(&Tzb&Ipf3h7Q}>QDmpA13oL(`0D*qC_^?F&Va&?qEm$%LP9V
zJ$@@)5fq%nsG&+Zc1$`AJr|J63bR4pqa5<0PN98AIO|Z_M&lN)=8|~@_-l#^ZD*Za
zIYEwhuX4aIW|r{%N;56p=gmit-@{#)KMg;mPRmA523Wn0fBIQThnBk2^D*Bcx>KC?
zIO;;f<L`CH$|(+5*t1)Sw&*kQAGx?P79YM(hGmWlaQo?QrnN;G_l7=XW@@o0wpN=-
zz1<4CKG|Z+s<Gr)fhyQEZvcJmp;-M%0o3voS(Id-u&*}?Pyd<$svA=UPHMyHSq}sJ
zeSQ=S^f>YxR}H}J>Kf4esDUnxewGJ4JBe@YaJp}H75h`1L%zgla<i^dwxrlgaHse^
z$;Sf_c=-fmxKyFI)i0)Iybbq%K1Vj5+|S-D-AU&7nDH=`e*9413@@}lW7YIXn7dXS
zD@JdGaoLl&SGqV~(dEbE(>lqUq*S=QXRlBtN{nkCh{Kmd%5l@i2AE@;08$2BP|%yt
zzm*)|$+Kh``SJ%%LqEWOy65qwbP_Q%a)K7;A{I5{8ir&i;l~aqG@VrdNAFs~zSqgH
zWzRoh+lcdUQNa=VW~E{EUpL;oBocPbFoqk-4~4Vkn@RiOi)bE3sZZY&XtzECyJBt;
ziO6t_^(;bpug`cuZ#VywHy#4C(!e&~m9z9^RN=-WaLaVzU-D=2Hk;e5RQ3S6Wa-gu
zBA<u#nk3$$sgJt10B@KfoPM-l<hwh@#`zrO$H(vEZB4&$jUBQ`^CQ%t?<Cy&(<q>3
zYIF)8L)ABiapRNQsnUBtUR<<-9JnVzJ;W!`jWhG1D*hO56ANJni;MB2;eK4zw3=&*
zaQdcFHdRrMq3amX5$k{9+YlXo=|}<}oB10*W=rz7ev&k>>J}E<9S8Ta9qFobS$x9o
z97z2Z!1dTYh%9v@w%UU*w__>yD$0Q;S5L7^hT(X%b2!X&e}SDIW2x~q6>9&?fX<q;
zhX%g83C@)(cyIAIsHy#gA2dat?GzP$k%aT0Bho~vbukK>Rj@yH9^CIp#qZ-?xy<`f
zywK_{>}oi~qh6R((+@N8zq99r=UvuAN9%HIZavG1#Rp6_TSG@3jU$;RPjGO41y&dg
zp*!!qz;OZ7_zksZIG8D*dsfTg(!%Gczr~QB-7CTec2p=QzZfM}hLP#x#HskjU2vtZ
z6(pB-u#>4>u<^n;B7UHS2tSm8&x2ins>M(*(vMXg57K<)T6}ozxxh)?o!A7%lIGoq
zK<iN_YWd{@Pb{Us$A|EYlAR=}J%+wF4yHjrDj-I549y7R#9f)jhjd_g{!~iuwV<U<
zhR`hkHEpVRD17j~kcY27j9cb7(c(87=-Ajxy!GE_(HK<mqk0$S&nm%)`;lZ@hZN0;
zv*vogJ^173y{KHWp5D~bB1I96z->~YK&=9oEL_Ax|9RmVsa|H*`T?bX{354Dc0k3|
zslpGX&+x*6mzZ)^m;X67jRy6<gyemtn7OYRZ_F~`&D%?ent86J`k~46vC&XSZ*=F`
zdq#1G&3;%awTo9>x&|2r)$p%5mezgB=7(+^<h$ox!o7FW{-+s$ZMzpw?@WaL-~`ex
zvkUe<tpY~tG0D*rPpV#G8rw$E*u-Fzn~?{u=K<RGD-p9hOR;i?71P&9VJol13w*y6
zTUKWe<8_V&c)YxgylPqk%5`7C+u{gzyXlhPmJE!Jcf>=F)gWp@3v1ryVSf8gH!)h}
zg6F+fgGPNR$jtC$E5+5Ju{svl&xjNFmAqqVBhNr$U<oO`YlzEMh%~M_vG~Z;9R;1Q
zMVOT<?7BNZR?HrPV3Le4-bkX(@2kW$$q8#g3w+*`qkN7z?sqSQyA$kK;)XWN-;xfq
zcZsk*t%TgD^}th7JK#d!c06nA3sXPdBUb5k<gP>@?w|XIJT#95KiLg}zeYu<Jgx?E
z9Y#RtmmTDU;yIkYJRW|%dW^X*Gnv4wzs{+48V;PR28~OWD4L<@d_@)=YI}r1b1TSJ
znFtIz_nWMJ&B*AR5vUe^o%wtoDeR`AyeoGrQ=1^#rgYc`+8-pM^HW0{xDo}%Q~QL?
ziesVCTx7A^VrID{x0h75C!^2qc(P%{5z@D69)9|8nw`>_0Uf0)@a{2ln6q>Qrbnk@
z_CSx|)JS)Dkd}hxM^i}A915>nHDTHBSa6%@jnXUJu<D;2#z%?4UpF@l`gDzT2hU(F
zd0y<)?-1}g-Y-x;Hy7T^Mq`&oBA!~O070jJlKscsaEd@4q`Gdh<*#-?S7IG8_#uly
zKUOjaj||*;dnXpW$zZg1I*3jEY{{NqAYxktLWhE5tkgve);|44d?ue4i0K@$YzcZs
z68zeU=Bs(&a7bThBUj9iw}|}FPND2haU+uNw?X<*6_wv-!Iy5Z2KC38pfS@Me8%sA
zn;U$2_#{0V<0K}S-6HZC{k_I+ZRMC7Cc#%;Ifdi=0zqSMJUTiwvi<EMJ}MuD%3D(5
zXmBbReA$X(^WO`qn<ZgHtpbMrPK2ObTj;$kszFRiK*fw1P_2{-hGpkS!<XNJW8Vjf
zdc6cO6ZvZAY9-_J|3WbB=u}W0A&t%d&4$>m*97MqL-G9i>15yEu{fqO7cTlQhVK^x
zaIBIp>9G4u+$Z0Gxuc)qns3T9>UJC4*mwYT1yq6S#t`t-D2EXt5>O?YFWdgTVM~wH
zz?q@j(0lG@oS$igTVMHd>F;V_+_95){Y!xteQ7YKEsv!H=!0R>exf?*h49ycLu~NS
z6LP)83w6Fpu?$l;h?1T`bcnSuUyy`tjhSroritit+@2|SYGOmJHECHH20cA@*u3{1
zqB)lU>T8z4j@8lVBGo4F(LBq(UTwexg%mKHw-Lk=KC`z96R}EE-`=S63E8Jx!W*$a
z$$IB@;y*A6FK5ojx=9Bt_MDX_iupQ%PfGtt!O?82!J+WiSPy>Sb<{U(B?Zn~QKiNX
z=X_kq?`ZwN=ZWUH<hTqUk)^=9nq!6RqA}In5ld52$8vAIN%VF550=_@n^{+f@Ubfn
z(xc89v}LR}-O)Q5-;UkNPwZSsRowglA5Ma}9$mD=&)Aeu0}|8o@T-m@ohttm(w;1(
z@!9HhtyLe1{+Uce<>a|(NFD5tyidkl$)FozN~q88(R@$!b9O<bhYek-M;Chx#S_6x
zxqW^(jog*aPMa9=f#xEzz5h7KsV>3j#nbq6dYBKr^o*?(?M)nSJd9Y6@t}Wa^q`a7
zBsOiM25k}TJAHmdn*T0M;1_k*@}dWU^swd-{&#~hZS432Cub%>=35zV(rH2W|0jhK
z@B7)i0@2=$vQqBnHweR<Jjqb)aTqmZHe`+(gRUO0gjZiAq2}gaq^4p#Y<@Zctb{)-
z2W9=i|I=v3N2^2IHxo!{H6;-@(@D?8Y9fF2G0Rps!j8GTB@Siph^zMlmKGuoDUxG|
z-60W&b8izUesqGbvxe}z%U6lDZ78hybq)SZdWN;uFJP*PGd^6D21_^1<qi}2ab}q(
ztjX{Y44-R+TH(4_dNdKXmcPITqh07fb0=h+e$OuM&j6#*S@>$UEZNlQjekX&SmQKj
z(tX*UAJy(dP^crXLu7f;fgbjDwLBG;<>S0vYjJk07Dm%xR;=1bri3N(`R|2L6gZ40
ziS`-gY1ZT6Q48T=<}Tc-ph7y%^fAS7!fWp*V3nyX$_0<3{X-71fZXG7{lP)@NA4_G
zjyn%#PZq=CiDN+W`90_bhU=aevi|Eo@WyQy(zMDBjAm`eU85=?YK;zEqFaT#U!2Bq
zwg&uGel?rhdk#A!ZE>CDd3@Hj8?8mLP}d-ZJFLu^PW2SBt$!S!@WqUuSh`8r&|M3r
z<K*E$<b0TR{w!e^m9X`y2h7S6;|pfR@w@A7S!!w?c=>8_t+#SK9>n;Lu%952kPmKK
z<oW3BraZ;^wGbK$nCieCesRwUyz_1k51sjqg^w=B{gYJLsUyaG*8G>E5b?v6%!N<X
zHL#p9#DII18}YNxDsZcYKL1{wDf}*Yf-Rl#aBb{p9&&Fiey??hVUgGIsONQfqUi_g
zdPmX=v1|AZcT17Kr2%9wT^Gz{C$LjqhpHQkQ>O=-oLTRo5;qC8iFpUryG`ph%AO+E
zw(O**tlnYV6E`#nm!_wGh|z=L+o=7MGPM1@PUOwhp-yhu_%>XTMBC_6JAYGpx8fj%
zJ|9QtZEk~`Rx<q7ldEX;(iMKroXlfC%kvfmRo*f<h)mp)ZnJmfDqG#?+wrq_cFss%
zvh)&0xFyrZ)p@vFeKI!-*5D>%J-N^0t=N<^7e>~+gNV)M)b;l~x@(Fd-9LB{{w>Xb
z(<xcBaMEI&cG81iw#(rPk$d3rwx{SG<c0ZpBHr(Jk<TNY81*5x#z3Qsefr7quc1Bt
zW499$En8vwYB^qTE)(8_%g~z@H(`mc0(EY=hc)`*eBne5^3Ep%Pvz|4DQ&Ch<y2F;
zsP>7_>Ut6?4KuZ@T)B%a7uC~eeR)`Sv04(!he^>5n{I=q*kK-h>;`I7U9)_6JOih_
zA4xYD{XdG%JFe&Ni{qs<G_@p6BPA+?>T}PPhLKPtA!KERkR(aFv==JTG)h83^||Mg
zgv?5ktn3sDDeKGc{rk5+d-VBq?m5rb^QFQSyR3MGwKrY(`~Y5!ZN@1^3;D6G$1u6Y
zjY?!IlaS8>FB>xpjd#9+eF33Zlr)8U6<X8Z9wvCe;|)|#o<~n-^umazmbj=uhHg{;
zhQ;=dv}I&6PI@#7OK>MDW*$JLuc>gRa0{Q67zQfZ0kE>-8b~Gdg0w4RJ1Sq|>}wO~
z=6Xs8mG+X++k?5cl{WJ{b{WnLF{YpE2hyMe{h<7#iI35gq(+fZToy<1Mcv2Q-eW&V
zdx|sPa#z^RJ__YFI-G5M;tkLJR<QG)!y)-`C^~vaVAYorQn5z^_a1zKPvq;M*!3Ll
zzVnP6QZvP|IkT$OhdIIP*rRYf#2TI`Dw5FFI#MInhv(vUQD&J9cDB4?-lMc|%edxh
z-lYaDJ8GEAt5m!S^T4><4VEd$VxznprlKBb=#4~+Fjby=x&RY=z0hM!0xLNy4Usj6
z*p4-KSx((Z@zXzD*tYf-HnneuyHoYxym%?L_>Y5xC+QIPbQU=?Di~{*9feOxUA7*X
zw)kOED%ldLk6&M>kQGX8;%|HFaZX6Q_{lH8H+!9Mi+3WEaxBAleSu8*vm;wIBN5$R
zgun;sN_aT;0Q1><fke#nV0}6>(bUTZvbrr`QgR|nX+>aX;6{|l(jnd(Q&~!v59w(;
zO8kB-LF$#jo@U=?5)YlKU;dE71iJv1Qp}mc$VgOGzE9qc8HuYlj)WI13Rk79fi1J6
zV1Sf0L|&Z+rDhGpB<mn^yK0B#q=wlguP0}ZRuS8Whgh%mZ8mvt1Ug=?Wf{iziC_0f
zQaTx!YN7&Bo~$6;7GuHE+5kS@en75Q=|k*r9aO%h0Yw`>lIL6vBwDZ99?8(en^M7`
zw(=+Iap@KXYwv}>1$iv=*|}<CZ6OCezJgtOVkB}OtOG5{@$e`11G!RbCvwb-2FumE
z$kP$|%+SjlM|p0?fRDG>+#8nAv%?>j9bbu)w|!#+6xDI;k1enzWIE3LvIXA6s=~aH
zlh7*MpI$4QOgD%VXnp=C(D*cs7w<X<lUHn@m51ZdSLG8NDK5jk%ZFm(cqw7`G=T)&
z(&fdz9c;;Q&RBa4Kk3s5fufDD!>kl+68eebsRs7QZ9dj+vF9q{A=tb(9-2-4m{0F_
z_9Ji@IX685C-sJ+-?BwSL&gccp6-N%$SQJ{?<5soD(r6MGBhy=!k|k6YcOpNj2L2r
zg{NfE@Ww`x>FbBf%_vA$U1fhQHK1dgIh-i+gRaVpP+tEQ7vzmcpOkp=GpUET*5!+i
z+lCXj4I6lJcZ6ucpm)T5i8oj-{)XRP-9V{y6Lhc|#C&)y9(XkX>N6i<`?4tf8#e+E
z9@c}19z{?!I)$~5WVqeWGI8^(B}~;MRBV6Q1~N@c$(p+1@Jl|HdCyn_;l+O$wbukS
zkF^jx#TGC9NAb_wgUl|n&z6Wa!EniB!KL8`V~ggplTY4>Pa0MeRf&mYqsIm;n!Fk&
zTnxwd9|26!Y9K89VFkbb{S;sQHU;!n$^p3u@Kt3HoGfDaQ0Fq6n5Br*R@joHTgH&T
zcmjEHli>WM-*}+<0W?+4hN=Zm*e8#8d|H%5E?!uSiP=Yqi{l{3Z#0A<w#9Uh_AMg0
z%!wxiE7EJtihS0+FT_A2ldF&<{$biIu6Fz<=0&u_zQZq2>T(V`PTRso3emi1p9RQv
zsqjgAx54An3;Dd$<$PrRZw%h-&0jkFCaR0Vxl8VKT)Q*?bWbM|=jnPZR6m!W5OxZR
zF#=0dI-jRk4T8f1^U*dz2TDeIb1j<&ei=>S#=>~e@mb49&1j@QKN#_2Wr@Ubgf87;
z|B;uDuE*m#vvK<+A~+~saLJWwyh<;F_AQLzinSKFJgN#uTr=jG8rfXSaS!(jokdkU
zM)QYfBj|1uOWIOi3rj;E;A)jN=(hbr>_24To|zf6ttk-2?|Q(=K8ZU~4IFQu!58Sh
zz^iwkz-jM5YPayT$nWPin6StI#!PNvE^ps6e@8QjkX1ngu`Jq+$R>vS^hD0NfWF1{
z=y%oz%Rk28O(i|j=A;TT8@wU7D+-yW74~I}gC9F)6I&B=j8thOZ1+g~b2W{Oniqw=
zH)U|dWfPpT?kVgl&R{>I&p_m~^SJ23DU$K_I@va;5yjt7^kc?$Y<rjti~TOKF}i11
z*YKNo%j}k@(qDqF{<#mgrAyO6`B%|u@IE&0#T;C$aSjGd{ZIT?`2`U_ju3L&<!~s+
z7B+qTf!|jDhsM&s!K%@O8<yTC-zSua$21a7ULE9x>zYXHo;j>R^*Ww+{J<{k3&K#{
zcy{kyJeJ&SBX%dcQEPE0K0T8P0h{uwEJL;Fcwtrzd>M-y6IJ<$iV{c^CxK<oO?-IN
z6#|y}z;@Z&I4VVxE>Z?`+BX!|JIcb(yZeBY?-iMMXyW+RXBaVWFrV*HOtjBGg}D6{
zEPJ9=m3!TL@Sm3sx38qY@LE}}pu8DxwG8C$kA~6LFJGgD#8+_Y7)uSTMO^8D@SW=O
z8RcS|xRXx+uhohci;F+P@!X3zHQ#~;nf%5Fnmf6Bhrm<?)Zo;6ui<%oHu`T}1TR9q
zz|1V+mVGUnzPs>%OxO}crFTg3-`DP7)4e?MYx-XzUayRK8TawU3N>oHY9S1diDt)V
z2z$egM|fJ(T(G_S0%s;3V+WR<q7rVUaQ6Kix>CWD&%$YZ=Dci{ciDo!jdi0fUYD!p
zE}TPMSKIJItKCVXYC9}F^h$iCtA==GO{Q+kM)K)bi@{Us3&N^0v^u+iE;2TPNwu4q
z(_mv%oID1f%INW_n@(WSh=n}beh&ZP9}Kk#Co!ct6<=R9WpQ)$*z@c}R&h7c_70tn
zTCXpYCFV!D>fX7$Zo~j?8s$%~TvwrC%|OfwqPTpA6Er;AhFRiWyk&wH=uC?v%gQDB
zfM*%Fdr1j~N$K<CjWVFcpF!al6<TOKhU)qLg+!&%+~rDa_05~pL|JPRL@Ip;Ky|=<
zh}a_u2dkyYqoIp&_TMy!|MXKd=#35^RcC-&yCtZmpPGo4X7K{)M*KGN2>wi8gLB3l
zBah$RCxr*}xy!e4I9+}=$V?kfQ-b%h*48CBGd_xTFTB9+P7Xt#>2g$IeGQDeR9L+!
z!;(4}j^Q@*Mnhv;Je>HFA?~_Wh*F2*MRNzg5s$D}MTcign5@Q1cz!gST+e?3Uip^v
zL_!o%JRk?%1H<`0pCD*ku!eutK8>Fh#4ziw7bwg&;^%!1iOh2D*lu_qM2*5q=(``S
zFi1w3H_mQ>G3_2WJmfrF>Dj@2ety9f=Dl!ld>Rj3{f(%EG{Ew=duheNJR92-FPL;h
zjXjqpv^H=HG0m{yV@FHUbA_^e{p|7d(aREXyUR3w<L-KT>{1L5?|v?F-jN5DYV)aD
zw+a4PwHdU99JS~8qj>mcp2%)S9EDpteDug{Zuq(l+JxKJ`U-bAz41TMM4pcsf>+>=
z#V&DJs4RfS5%H;i?npvTgX3ajC=dK08b2|Gy)Ii#nm?6_*YzL3>;exQz3V0%GSkO5
zU;9X(%HL}Lh9hY2)XknXYQT<^;qWa!msszPu>H$kFyqscVHiFYC0`l>bve_4?)!ir
z*NVu&=K1(<ge86-OOY-$!6&Yx@xuHQ>`H!yZJ(|_zTa$*rxM0N<JOz#ZD<Vh?X!gW
z;xL|QSb%AxQW?K`49@$F#=y+ssCDWC@pKCSQ_)qpv(y6=^5xiWdq4K@{898DI0+BD
zSU_6ZWbwDl4*Wg6hPZsGubwi$1*;6haDDd*wDvrV1v!7n*3a|dhx0&U7X6EiRu~6m
zn}*=+{>|XF+8<k+B$-vtJ{Wt%6_S<Wu}!Q5t$G*O`X9>V;}~riv_BO7pQX(UnkqW#
z>W-nbmw8=Wz-FEa5jehhap;>7;z2d>EKp`Sbk&T->bb&gerp&SuKPsh%XhK0bS|Dv
zTTbGJ=0RrmJM#HIdt7_il0<6dqr}D>)G^A38>5H7&Xg|FeQygM`|mXw3azN2da^n~
z^qRcpM~LS915D+l7&>xvG2QfzHBb0YymLShOSY{dYa}W2&`H31GgCoQ;V?jL1)Hjt
zj+u^Y$op>^V88wqW^9WCg)hOFyCsecTG}Pfy%`NbJ{#HkzjBxW#W=U+F8ENuu%&A-
zbmkwT=ursQ-%HTtQ*RNk`}vUe<}fsjo6hCr+ys8-8RjTz(2v`1!~OdEbYP<+7&kOq
zTW#6Ew5#;#(NpT+Sknd}o})nb&_=W>uY{>?32^FCFHGy)#%F&24J$UR#>??#*w$2x
z9+EnoMszT-Torn)+bwuw3W)iR703fLFxn^@t%OXG#4OF~f=)?XeQYHv?AVJ}g#44@
zd1n}-Tgjw9c!0Ho6I3qPgZg9Uft<rD(z;|W=53t^LS8{&qxO=5&RoWfj}o<^YoN18
zk7mBtB?-63(Ku-ty2tMp`OPPza+Cs(8#^96CiaWIc}mmh+Fh*a;aD`^d=WOg8$$al
zWm>*!A|I$}#5*pBh-+@X0M*|H+|XhTDYyQEr<f7HIW&!1nMm-E_7&ir;Rq7m1~&Hf
zq2OC9g(>-tB+2y&>j^bMdo_IsJGKKIr)LliQv&77L)o*;f!K>9n6<k(NNlmNeQz9t
zUxOlG<QqRAiD~533RPGzcr~WI)4?9UQRMBNL~K4$U7h}7J=tD90sU0<_(T_dXuX(%
z0XC;?qr68l-7+~oWyM9X8NLl4WTvw%^1I-o>{^>Rix|;ds>$M3Xo-y9#E6$j>l5EQ
zM)+9G04#nD;x=0O@G#ne%(t1umqqO3`IlluB@^nPvaB2Chb@ED`BMDC!YFohM?LX+
z;?IdN3s^oMi!SpFsk+@-y8UXJ*ydgsR};9fUowN~3BBuZ_(~HVx>18iO|AICkW>8d
zkmLNnye~L9x{mLFPS)Fb1Fmck(PP6qaKjN-`t?RQZ#`~`24BVK`m2Iy80FGYRjKH?
zUyhoL$^*0dQ9SDKGba5dnR^v|!Z~f<&}qXf9Cj^$FKbPQNEHjPsn|k)>=jW$!|2JO
zCDceRn?`m8(<b448M*5&?JP^?d&15LS(aNcb9E-XQIO;5!xzI6S$T3>)q##qp34_M
zdrH)#hB24#>q*V4Ef6mAgpc~S6D+pp;;-})u43)Se_V*5?dhALeT|CPdFcwlS2YE_
z4MsA(;uyTTqMW_FJ`?X<6nt<gPf7K+aOfCfNO}(z5dUd=;HsMptS?$iuI$bezl$tl
zYG&V<`;}S1?KD8*@jP3Jav^`8mP#z|ofFCIy@<62TWov3QM{L74#g^#C@IWzE8nQ#
z=P(M&#h)-<&J6d=6Qk^gVpI}qk|EL8(JSa8nM8B(^ZGGl$SP;FeLoHxE@**Xg9qLj
zqX*twZxgGAAuRuV8B-a{xtjb%d@{Kemwz#V__OQyuSX&(=`oZx1+N#Cci53Zn~sQM
zM_fU*jA3-flo$xr`35fg=3(tN8E$imv98cH^kLH)erIzWc-RT^wr-4V%F-yT{-(^n
zY8hg?*9Y>oX^zOJavM~Sd4!sGI)VSah7CJk;_)@Iv^`sw_KbEG8+Mjsx^@LR+}FpL
z3Kcr#h7>>Z;1PU|N`uqR`uymlR1hc5#Gy-VaY&XTb$AJ^RxD3P+twhRE5{GL`iC0@
zhL(sHQ19krJfug4MhrVm_g=n$mJjC9-6M5~jQ=kTHVvUaBPHQ(R1chqDuEYob@<}t
zPngCL4;r_mio1;~#x0G>xM^S@+`E;+i~Le>SW*%<^f<{2_73Gk`sDe$FHYzhUPIzL
zPO*@}PFyo^B<>g=&cB4a(9Xc?P;qKL7(XyV<5(wNQM!kU(%+!+k%g4MdW%VSJMiSJ
zhw$%2k+8qY!}GV>p-O!$O}A9w>g+VPey>c=eqI9Z9csL;ql{I4kwbC$0KV?D7frmp
z6sm82z(MWTq1e)xUW(Bpea9*MP%7jju8G-YVa71nSO<3-OnI%hJ)11tXP(^HfOW+-
zJa*tI+%=Dav+XS`I_iS#LMn_sCy!wrbMWAaFxvU}8ChL)4}KV>fsWrU@qqa~_{nN5
zKe}d#usf8a<rmDk<m|7kwXc*HDB07YDceAy)(wtaT*TAIOheh9jc_X^oeb2fr&E4s
z(PMHM@XTF@u6=hBgL3R>9(>`o7nDHHaREQSa11X?F9%J@r+EF+BqpC(!h<f1rX!X&
z!F=&dpoIkxsak;N4GmzJ@pznKI}~S>E7Ix7-c)}?7`gwo9v!M}pjpO{UmAP>XS*Lk
zvrDG%Bv<$x8CmdgPju;TWjT7{QW7!?TW}b;LcB6qi+a_JrkOh_@A~UXQ<chy=|@?r
z{NNzWqANjLvV^QvHh{vd8Y0uMVUXOug;y_0hVIe_5Huy2_I;>>v6~0b6)L85tlVCx
zFWpY(ZrcFCOKQQk`UEDcs$y=}JUY<9p5EWR35<u?au@xVxN&GXF3UMagYV|xviK3u
zeRKicmo3Bnt{Kw<uQlkLC)r%%vnqe6qf96ERzT~+Cva`Ree^ZZ;fq!~(fL*~^nyr{
zE6Y-hs#$`O57oKqt6Eg71#VuO%;s$wKreI+<&h5mP<C~+DBSO!C}6@cl(7lmSM{Um
zYhPpjRHu!F278lc?PYL9A(suf-NFL&q~Py_IZ%=Bz+5{nh(g4Q@Nsu3vCO|q?BvG5
z8+$V-gBbKT62a5vI5PZ}4|JULhHzaAs5rhGrk_#AWlr&EJVOG{*1aXFZlz?3Y%HnE
zd`EasD65W1#I{~%_Edf`@pVmRAFRtr#V3CZ4g10rAC#eL$^e8(mEy0}^|&N15AUy9
zguhdJ(dM)QW~nH{lD`?K=Nt_?5>|rQHv(B3vWaG!JRduq!Uca}``>1OodN$y=-hMU
zM*LXtE1fRf-wv^)-?P!mq89TkH88w>DLHtc2Hy$0xZ%sJV4Yt%)VWFH>Eu8dY_SX-
zbH|YY2X(k@@dckhl@)moUPZj67K>%aN231YldS#M0+M$yT9i3h7kbktfr-2m=^mDV
znPZ}e-YE$*%-ALN!g#RFjE5E>3w&{1A}bI4!X(b?z_0xgXzu0;*6KRs%7bY{BH0br
zhuXo#X_^>)X9TX7%R&>!05~0;gQOV*em$KP_lAPXY(sD>)L|1kK8SnL&oZCZGthAM
z2hr<NBNl(u$cV=i@lAg?hTNAWoBIwi3xRun*S8xDZWM`fSG*%?s~=Za+a}}BNrRdH
z$l)~Z)m9vlwjS@a?_x@GUz0Sgmu!LS4bpv}U0gqPtLV2<0$bL2%=YZIGG>4I3cI=N
z2aB@X%YKUOiIkE&em$gxS~b3;Qpih7xE&MKXa8Y*(P^B#?-*QPxdQT{ACZkOobi{H
z2Y5e{gx06`ShMm`67zi`_C5##mm3aXoqd5AOtQnC)1%l};l4Zm@M_df`^tiEd&Bjb
zV)o}^5Tv}kOIkI*u;RNrvHh?v_+B6o@bv@xQyYm_9ygGKaZzBn@`>Q{oCUsv-w{Q_
zd{W}&hki>pV5z4cObC|;i6iyZGuKLE(a&EbZ+xvdG~tEl&Nv^qqT4RIx>yw&Yxkpc
zjhv9@oQu^ilX30NL3s9o8Vk&<!6!NXY?<^JG`rZ01sTDt@W2{w_4FF7@y^Cvk5G6#
zN|}yQ+y^D@^2DX)ALd`M=g%*=!rjPzF#D{INq$*;`XE;*{A@*6@99Dw8G{oP`-uF`
z8T7a4m3YjcIM8Sv3vWCYg5kG)qC5;1c_(>d<mp`Ey<-@-UiBxWLrHLxtic}7v&2zb
z@c2zK#Jn?}7;<hj)_cg~$Nz4SoY*+9u^NJwhrWn=?gq2#lii{Hw<q{t_XpL@hehqB
z%OS<=lsMEw2~?{CASJ9u#DY@U^d%{*Jt$oCLrZWM3Esw{zxM3aocXv!lz=~y$AH8H
zw<`AlHzw#aP^o+o=5;v3W8cYG*(ZD#JXnG$feoTNe@$7zlC3=EsWcxzt8w+@X*|){
ziH`mJ5F4Toiq0R_z&B=bV6n2FTpl<Q+-%+xqZg;}Znh#%YX421|FWYWTjyfI5nq1)
zb~acp*~enS-r?nc=F~9h9XtN@3$xRh0@Ks>@JsD`=*DxS@a44~ym+TGh&5axvO$^O
zy!Q+aN~H5ENfYQ)#bQ`&wi^3R#?pON<KO~mfEVNY*tQO3p0M;DdTf@WN+!W{%%~Oo
z-lvnO+ZjdIo4W9tfw|Co?mHNC6yuK>v*1DfEnfP5uSmoB4ga}Y;2r|}z&t_<4kQ}$
zPktiqv(4M)u5>vr74W5`y_4z3u%~!px(%N(x<b6bWH~R|v7Z;@8c=<i9YX##fW5y{
z$;~p0h`ydGn3T;R+Oxm2=IIm-AMb>ex@qM8ViAs2nZrI@jmH@=6kW^RVEJclNQrvH
zR9}t3?Fs7Ot#VxyI^76MH*5vZ%24>RY!wi#^Q^#9gnz7qn0eJ03>`I$-I0{XT8Hrh
z*GHi;SA>~D-hca#aL5;Kv0Zk;TwEE!e(4q4#MG#BwZrA8v3VH|oBIc|3`1dN+Ue@l
zu_v(p#WJ`XED2Xyy5W+*?Ht_MNRCc8P(3gw6Pf=ncsBkfZm^xmpG{H5)~k!D<LN#y
z39{gUAu)VF(jl@<-5XbYQzn5y1NpCkW3VT8Gz;kuVm*HSaEzExUkPD<5;2iyM}@!*
zT@Bt&214rRsc>SU6O?`yoFU>-+&cd{(>^?k50X2N#|9R%{0BYIc>F7D8+0BPH;tf8
zZCk6w#v_Hd*IyDjAqeWlzu;)XbDW<n$<?BD=y?Y-_L>RrUe!?iDJp~Ab06aDslcv3
zcjw2ZbLfkI0vhu5up!TjuD80*()(AVhxuLZnKz10*m8xX_r%bdZ!gnZ>6>`9>Uut`
z`z~?qZ{n)$#gN<62Xzj0c*r1)jIK7~ho35OhoKYMPtEV-rh_@xsfdHEx|{gqgMgzX
z{<64*nf$$bD7F2(kDBhA1C6N*p;)&TyW4cRESbT3Z!Ly9hNp4Iko8bmS4FOF&SaTB
zji^vq4ECokp>)m#bY}vWKg$4DICtSY^<Dh!_!|&%$(+9~-i*`j*3%jK|KZHg0PgLx
zpDd6}V;|zDv&nWc-1WOZTyFkLmcEaouU<Fd%nx}2Cw~zx>{&(!KkK3a>nv#wH|HS>
z!uYsBD%4Vc1@}CCht<prr{B`d=w=07aGDnl;ql=-^+*={+*Hl;C%nci|7Z01n>2oF
zb`P?d=Rq=9f(}m7<T;j;c;h|9w={r`_we91UWb9kb_qKBPLwzz@d{5}SO{io1!jHk
zV~}hMrNRA%!kZ)mWX^m-mx6F~x0pm{ta-@a?9im2a58<gU5CPO6>e9RL~>*k@Y~c_
z*hcokn?+Ok>z9snU(I^DFsBp76gg128VkC1R|^y@ErMA$eEFM@EO;Z40HZ_i!@`QC
z=-KZ?b*hKrs(%r@D}65e;1CCEmMTG|`7QWv5eZHMl5kuJ!{QzBWY2AR>~I)CTb`|{
z-o0CoAKE<z_iVR=6GB#Y|L<bx={biwOLx#qa<%Y!)^hUY#0UI(cRsCHCWUclYDkdY
zFdh}Cayj$k9BgcvO;0w*&^N!#@wS%-%_=!TOHFf$dHHFSF<cIV_sP=UG+7?><_-^2
zeTy4S9|&2j7x?O0k<HN6>$pk38_o==fF6%mSY{cIY0pQ}leb%0Y0+(rHk?HNB{=g>
z?~-vy>0D~jd4~2J-$U;{)Wq33quFq&MKGkJg&nFp2mf@M@#yZU*eel1|7^(@UmE4d
z7gbKCmsDp_`|K7Tt8dCRzGiUC-(vD2^orQ!q#3v>X0oF-aoBxC1uG6KfXn+W5GXkV
z?|CVs_U%<L#6bg%J4PeXx<WqM`2cAjhrc3*!&NU=lz6#~ndIAJW$PBASkOtX++0BH
z&U?byzt5P7**_BOb)77$aDf`Hb?B`gh4WWti#7EEVWq|dq~|WO?xpY8)qihE@u1gu
z!8r>jzukn!4_2V%igu!Qq@9!x>n4>ow(MJsFGg&Y!+G^?;G5-vyWa*u%YMOk(&d1H
z0ULArCxQF&kEA2skeO{5gx6GqFlE&aXwKaU(Pv`e<N;OEsv<C=F9N{jV<E9CsUaUW
zZ9<8Jd+ds#uGp7;Bgc=<z=P`1a8KJ>@BqidukQU&(qf2mO|>NNV-&0K86kLlib&3b
z!Hn+|+|CQNu}C(XoPB42H3O88`)Xp2b2{<5z6)kvzfVHdD~Lp3v`DU^h6Mk&o^`xb
zLhaaW=JP&-dA$o}+MBmSru-3dZ=4j4*4lvgtpBjimTb{O=>iz*B)Dd-mR7sBrjh=o
zk8NL^5aV~XSh(w7f)n>3x*L6}8gQV4Y*T2&&vl*T)QUnF>^=ZDIa!F;=tZ(@>1$Z+
zEHFp|Tv(M!9G>-kPmY@IW_?nVz(G_&f&vOz{N?eWeC8pJ`W6N&BA?^dz}sxEk_$}E
z6|*@53~{E^BW54@jttRkB$2gU)i?j%z>k6R;lAZJ@@}((C}DOY>V6vtt$k-%V$WP?
z6{nzNLlXENl>+a~t?*IEofWAk;Bfbq!slo%NKRA6&^(iBpU465<%S!|TZN+PnR4;^
zdJX6qpiatS!%@vCm$e2i#FCw=VE3X|w7$X-C9aMVnSNW0+I3;<Ve=5QT7{%wxE-`U
zwnme2E3p2K23`uXz#^Y{pdWaFO~0>!Yg)5e=kfudHOK~CeJ-<^(+ptgBo*|$VGeBi
zcC?#B#B&Gxq0w+dTv4|e?Sg{Q@KiLL-xvWM=O%(o<Vd_|upXb>Riie(X4FHtwXQKe
z$6^%X$nTpkneIo<UT$e+uD=9UD%%2FEv0Q;u8868j8twiy#(v(?xOG1S4^=Zhe`KN
zr^hQPs)minmXk@~IpHt3y;P>-lJrQ2=_WGEEgVB9`iuWwih&7rmMHx|9j{qFCkAD*
zn0dqy&(5_b!LnbO%bgH-JT(e6TD<Y&4m)`IFr8SHe<hZy1+Sfao#@)F8Ztr16)%zA
zj;nSHJg}uQcy9>9B}13M&CN9==Ts!|HhE7}>z3jAW1E@DdvgqFoCBd#CxXeeO)%1D
zH&}O7u%|U2h;5)5D$WW5=LhkiyKf5GpYq0A%M#H0u^W^4Jq7Ge<cd2-Cb7RM1~}xU
zB7Ph<8CxxTKw4!WbbYpCpEgS2*Xe5fspUHyo{>aW*Zab?l{KW<`xqQuT#8XQE8$7<
zLjKJCEv|cL%~h}0f&YlrnC`L*`i4rdVdr{8C4P(Gi$osyo>LZFSW#$uMHar^BXpmu
z8=4$D$KCy=^ObgW*l}kvEQ_uppGuF?tI?9&PkJpq(5_9_jIXagZxRb<rzG()x^G~?
zkzC&K-$*JkM2YSxbD&*emNdC$C7&C78=Ei3^F3)_;PbdrSZk3XI+pzzt@^Lg*lEj1
z<N6t}V7Z8H%cy1l$sM)b{w|f?4Jv?G3FPuEZ&>*D5PG9^IL<6Y+*}kw^8zEOtHC@R
zWoF0iXQ}gdUD>3_`95R_=dc*37etUrgXO?;qS#fdNw0S)ON$r-f99?PuljYUmUoA_
z^a=j`LOs&yN6{;0GxIhxCFO60pR0{$MH}`Ayn;5l66__`P$JONKbQRpuO~iwlHhu8
z6s$MzWmhuhk;tB4JP`4QgkE&O2eUV_TasP)H%$gJr8HT8O(vUc>kV7oE|KjsjfA<%
z702w^gO_OlXcRxj2|tG8_jO|Y6C_E?k4+@&mbAe3QFY?V)}PF|uO1`*9f0{mG~sr-
z4C<%0;<z1aLHgb=a;ZU%dxu%TiJi$9*llAQx5JvoZ5U3Z=Tu@y^Kjm?s8TdXp0l!$
z_2NrbiDY5(I+D}nz<Ym{!LnW{t~TjA?rBva%deKAr^PJVd}09WSE<4peQoOJ$T6qv
z1Gt-=#Zk6H`5p6OGRJi)I2P>!dQ+RqAL<hYUJJr~KW4+?_{Z==@f<3I3ID?ko5=Nu
z8IW#K0t#DN*oRTxI9jC?GS7__cC(c*>0}0$SZjfVa~m49P2#H8PGffMaei<8T2|+C
z8~U6s!&f~IO4aj-fol#wbxM)1S$L5IxQInR9_!<iu8(-1nDaz0AB<hLf*;=b1pS3~
zh@D;@{+#dxjM@&O#f0l{H-9g=b!aW0r%_2ewGMGlMJIYLNSK@ceum`pI&{wAC7`=~
zIq!3C!R*iay!7Wt`f27#@eO+|?ml?~eS5}*TP6GPchEq+FP-I~aoHrI4EWl-vl#p(
zoR%qHw!I}|!?#w4vHe4aQ{92%=rP$iZpdRn>7Eo0C{IJQ-M|A%obmE3O6wM_hS-!q
z-t})6@ALD5UyjFksZuZuQu~2R>|KeTyD;OOljr7R8${9JpYWmnX?j{}Jl*o2H#z*k
z18nS)KyBx1_)kTS1}Yz6izd$Jrb@0bVEba)HSqv8?Y>L24a&&!;6flLovBq>GVV=3
zOS4AKrCnJ$f}5_C(x5Bk^z%6KpRp^bP08k;ox-U3FfXq5Jc6I|xF+y1S@=^agbnRJ
zfU>5manB!R%mj)N=i&rEyB#*_pAmX7v}p63`08iBBiWiPeXP@ejY(C`&={IS=1yLY
zMSfcZuY};%Hn-=ysWScX(T9AJpGsvsRrvE)L(wbpB`ork<*G-z@lwhYn0R3wK25W&
zifsv|N^e%d&b9y9?)guNesa7GOPsT4`coVJY(fR@T>BbCRgqLac_J)QlL3p@8)!gZ
z9v&Rk&(;N=gDY}-_~Cz<xV7Xy?9H;I?`Kb@%hvX@DZ4B9gQ;%(yy6L9@^aW(R7Ph?
z1)~mK&jUy7#1~cR*ts?vr2pH@9s0~*@|3~cQ6h|%&8`&f&Od|A1v&K2%UbZzu7Qy2
z)_m8tKk#CmIhnI=6n8rGO&rp=6ZhqOf$}Gp*n*<(Oif0L`W~iKW6KA8qASg5su2yo
z`4#Fzhj5QoE;L{AGIYKD0m*;uAvI6<+x=Gn9$hK?!Go9JCH!wLJ~)Dtk&Nz<E`m<I
zpXl*-KU{fsfJA7X7v<m9f%|=@K_@*08h!RLBO_fr6JH?g`;(z`at?WFX$bc+GjNVY
zn`mibH4B=_tG-RsgkfD?Z1xm}^R|Y=>Z0oqyTlfA_GjT{*~3_wbPyKj>q5!n?CNh;
z^;OZn4me*gl#KuL5pS68fY(hIn6q#Lla1Ua&h}Omsos$SeOWVn{&+Yn-k>Euyu5_X
z?)}DII}Aq`Zwaopbrz04UyfSO&yXoID5#&)0bP~(@b36omb&mbNQIS&p7fq!N4~Gd
zm{(_*_D56PwPREDh7Cmmq-ccpH*CoH?qYbeR{{K<zCmZ}EWoh~QN>3K>t&{6MT-W6
z2;RU0dwXm?$V>sgI18%3KVqKlAIXngee{#m1c^nT*q>7;i8<2*zrX~dyK^IStPUX?
z7k_3a*Y}dgBb3oF`XULp4aI}{1?=ReY1lDRib-gPTeoWj;>C!)m?!gyOi`GJg(gwx
z=kH7kCrrV)8S%hBJELJ`hN#2E0ws<Qw>2E40cCe}QM=F^%*RN=Aio%x-h6;vnY0t0
z-i&5HT)e^6Ya3X<x=8|OPr>=agxrkKC1usI8-j1fWBco!;z063aHuQd`#ElCBizz&
z7CvH%ZDC;7d8yjF&lc@MJ`;^j8*nLKjyjh#Av;kXir)>zv-=I8A+1E@->d@WvCr7p
zI$?HhoPz4tl`yA71$#b!7frTa46&bd;Nz!H#4{v_G#!}2Ur%LVRJe&xD)5ISTO*b@
z+y}L$*1)Xa3S4jZQ<S}V3Z8xt<Bsn}(0S(pyy@!_PYlcv8AD9<ngiW1a_??|1GoYw
z|8uHd*clBg&xi3ovofX>?TIC;mO|feQ)u`2Cr*1SgB5k_ps#o=CLMjwwAzHeim4f5
ziFwuH;TBeK)OR2H2Kqv{%@gKlVS;{>#bRdvoHT3~W~zPO_-Eh=vg(Kw+%=hmR~oe8
z_wWEzz9GU17p(F4+5Kp#ZbbQWMgBhVEBqU+#_dbT@~Nv=@Kdd&qH9wudF2FYto1!k
zI=-%h&M%4V;Dn)ENyuM+w3DRo|5Kpd2j+3xV;K-`zW|MY`QU}x^I&s-IUYEtgE4=d
z@lAdt44-9((Qa#@g~fwa@eAU5V*!c$I1Ga8>sY8{n61u&l@L5AhP2jgA{Trk(Iqq<
zZoXa%zVSE6#ZfE3yg(Cs>sPYa4bRCn&D$jJuP|46h2q(&%i`>)A=q=n0NA$S`1@!J
z8b_?c#hp{gu~SJT(RvW-8w??34IhNTxd{8G3;i3C!RX)`&j!cULT_J`?VI)%k^7|u
z7%y<G1(<~g_D#cA$p>IoS(3P1$hXbD^N0*7XdpqJnsn~Y6u269Q*?WWA#Kz6fKrF8
zc-xo-I8i=`Vx#aqCGgX?B46PJ!IAMMI8#)0g0f*rLug=iFt_&DKn>|9cB&zQez{o<
zNlj_cbp03`8gPmCue8CYwrJioRfY$DOog0BDZKDs0@LYqK%K(VpuS&==RH!V>BGlB
zgOEkAJhXzJH~kE{ZTj@~t=+aB-RHTVK`PdF{~@>T9fsiW1kSrg(weXVm%lsZa1|d7
z+NnAfW$UfP%OXwjtnf~s_d%2YJ)+AE2BgBL+C$Z5vj*~;U7>tu!co4iwGrl}q`<DQ
zaITtP!nMvE#?Y$`;#KRuu{-Ct!2Gw;JVRZJk7Kc<p}P{zevXAY#pPVy$b#ln<bqww
z8C&b}XW}a(hKeg`INtHBCx#`$K1?x_RNfs6XPu_8H})~0Eo2m8Kdd2ZvIjznahyo;
zdn#EIZwtYn_OM=`>G(N4h2`m3z?HY%#7omd_<gO$HMUvAGr|Ybe*MKS7gbUH>V8bs
z|3|bRiUmKXF<HN46|TCoLnNX00tcMU0=2kj5SO2VLC^PttezIRym%}86grN=-`v3^
z{g;LHfD;eb(5~LyE5|i-%h~msccM@$bG+|)4X36orE(K5;d0-fWXJs{_)*AN%Hb=}
zo$&?NSRTa<n>}D%;w|i{lMt^_(xh_Bzu=Ij9?%#(9VQyi!~tiNaAQsh82vR6xRsG~
zP(n1go?k=fPdbhH8V)$j*9sTx3&A$h!wTdt3Hj*3)wM62z%b`MYdI+Ff~F(d6)oo;
z<JY5q@h9*y*C)F3`>|`kG@p3-B|frx1K;9g2v?s-QfI59jG{42_E}8oLtepc_m^1F
zlw-SFL&%hOcv1!Hy<oYs0Iq8V@d5U>OxP~Kp)yk%EPQr<`2V)%FT!!ImORr|H{{!8
zrOE8Dq1^eF7SxW+$NRxC;F9N0V}@DKC{<scJ@*NY=-LnaZ4~JtlP#!mUtqQ_M$-F^
z&xq%F11dji7gSkkaidaINU_L57l}~xnt2qC3E@QNP&eB8Ih2|WI>0Rq^>OUe&2+(`
zz4&<WUN8(Xpb0tMFtx)P{u44`Z~LagPoD(74}?yRtv+1hPa1YD5?BpSb<n?1MI;=T
zqIZWQwmn~o<^7l8@UecdsC&nohY23-8^$#7nu*|6IZ3x38O}e*a-6qsI4+Lb%7a)t
z(>t9+cT}gqj3`Iyv@eco*eCOS%_C{;tf96uS`PAA5<5^&#TlnOY{BL3#~`gE0pFhr
z6Q5O0!6U2zRpx5(mfJVM(sdhm8L$Yac=@u?`;OD&GnPTji$nZRsXAZel25JMj^P7=
zXMUw_$M>9<q-w7%!EHto-5$S|wvOISmmV2NXYJPjr$BvLU#-vdCMu)V!22lIavNs+
zkfTF=kC1Gc!#E`790r7Fk>&3MX1O3jT>HS1x&>dui)lc2u6RW9Mr6W=tqb{{p)D*t
zu?3E9Q{)FqF2bsf*H}(pJoKHKKsT|mZ1SHd^crQ$Eu2Ep>3uvXr@mraYja`4P<igM
zV=%v&ro;<w7jm0%?Zl`%34`vxVrPvn;A+ijFstvn=y}{p`es-XzB)MtdL~WeUR%fW
z8S8DiQN9)L36OxZz5Dov?t47-%usHjtS(A@_zErDN5WrEA;ak@IC)2S@x3<3#T{c8
z(AeNJFe!H+ev;|Me>(LT|DqbRXHAEW3C5VSdnmVCs)YqVb@`)B23%R-f%mUH0!F3b
zeAnku{F1Fazxy&2lO8tX`~P}Lg4}Cxm^lQ0p9;dJ1#@Ae-BMoi$bgi*u0@x`2>$x$
zczSui36um%^Er_|bjBZbe(i=QhM_-?`t?k7C-)7zHgGcNtnC-8RzwQ^*+_C>(pcj3
zJPJ0S7PEWf0x{2N7(~Ku%#Dm>jZ2MTNd9*ASiKZK_zlCZEDKUDn#|^pixSwAp&;Hm
z5Qlu#g2t{=sO;Z~1+_=m;!Tp=mgGRr{^#Q1nu=KK?jcq^aEv&bBZl!scyz53-1b;4
zI5v{e?@uDT{T`s)>k)I+;^djPjmYNKUv_kVK8|o~5Gky+WZCO(+3wCVAZNplu_@Og
zNcfo;kiFLgLn9usYun?<Bd0_(&ihSHNvX4c*E`vRY3We2>pB>f++e#B$785vGj5EN
zgX@DAp@YK|9JV}+8RQDS>+VXnV|6~Rbvc4|pEjcTouOQF%wM+D%z$M|8iA#7M`>``
zFOF_rfr-M~<6coH$nIH&TTElwmCTB2!x%fFTx$%Wzq$BI<SG&zI)xP-sUupe60qdj
zYG6Yj5c7-Updf85tl8eg?8>6qy&=w!Iqf*}7V?318NaIAUygyNQx&1QdK{LY8ID)Y
zL=gJ@Y_-Xv{cvQJI=brQh?lGGXZi_4(C(NE{><2j*S&S&L(T~FxfF&^lLZG~#!XS*
zHBAVAEI4UjP84^%-c3$U-VCbWa@de=5A>Qm6<bFOULW0i#4KeHTJPP<dRMu#7l#((
z73ZTQ&|(wOj++nbzh$w~gxO3f$Q+g9hC)Dr2}o>Aut_?(5UvK9!xVoX^c-M;DcuRo
z#T4M9Ydf)XEUxAb3NW&<10R}JiNgOKA}2@PVNDaY@XuitIJtQdOnomV%@2hxnS}z=
zH!_Ppaeoh1&9;1-dLtJ2t+Oq;Y{YZWo+pcKaM>dd{CQf9{-+y5H@If7g^#ac2g-1-
z&PL)Ju?Kn#mf&6QIe5`G8v5$f+3}W1xGG`+oD9w({(b#~o|}Lj<woG=xSJeZ?Eo$1
zo{(}OQye!y;48^zrqQz*LRT2trd8i&dGBV5^ZK_!Pw-r(I=xnGZ<Y(i=^iMGkmG6Y
z5zzc@HU7Jn2~*?yN&bLDxcg`yc*^{N_l^k==Qn_VSnUms=XSx*;;*=QUpsyeoC%*A
z89VoBARG@e5<~MXxE0wXx;XJZeyZESs#=6j&b&b+*58s;_HH0~x$j75p&cn>8W4JX
zA%uFJt{xMphbh87IMTU>m_!YNj~BItJNggXo}3|spIV14!BKFiIv!8Y65bk?f_uGf
zJ9c)?7Cx=IXji?8oop*1Q5D|gt6LT(eo2Rqt4i?P_KEC`Pz=~PQ64YVY{HTrTRc>o
zz<w%Af#Uf*NVU)hwWMZ@8@P+;I}0<Z=VOviPQ#?<8^nsopRgaxg}abywAe-F5>EWR
z9{t`%S6}}fCuI5Jp(JY_&n#pZH-A5I9&;Te(+&8vC8ppiWG2Sn`yftIQH2{7sj%N{
z9a|hKqS88Iex~mt&dhj-J1>u+%|f?<-R-5+D?g2{cd_JE8I5>s{wyKqznK1w4B<QV
z#`1&ZNAN@GQcU(pM!Q81;Mcqa-rti$bLVJ6vDl5&Js8B}Wlr&7N|(`5xM_&q9T$a$
zAEsBo?*ha57x0TlA^+%=gAWF+q9MMLm#6KXf}7*5`Aiih+!q>y(_0JSx1&9`=#0ko
zQNOS|4teQABN}>Mjt}YdA?{B`(>9mUP%<!(pEZuB(oY9~BfHEV?6c>Rg7fw1;3mj$
zA9%T{H46ugpT*PfxpBMJ_b}_RCpKISC6-^lk?u(eIQ{-4oO#qn=q#Bg<QVpm^=~JD
zrD8kL)ZK#@L#BaV?gnJN4@rN$4h|UC#Jo%UL{ZON(eg(;<T;5%l|Fq$B7Tslbmcu(
zIWU}frvD(-#Zw^s=Q8|qRGx2p?u#DnXRzi@CA>eL1xkZIW6Fz2{F1o?4S!|}T--_6
zT=I)}uKU8`r@ked6^B9g*CRxBbQTVtm5X}ihcN!)7tB2&4?dS1dDNG7c-t>z)U2nW
zdHw@@xMKiaqOM4{7a8#J^KXI7iTSK;tu8OqX+@uLGx@)Z+xhFU1Ni=HO~M?yOSJKn
zKeopvv0Vmo)Ky-GEjeJr2X^(~)-hYKD~;f`=uP;cW*+NQ$cGU{DcCUS1j#>l6enMO
zh$nxBllR8wKx$?wdwB5>`KNCr_}26|SdXXEy;g`ktlyER{iFCjr5e#PsUjHiScPAC
zH3E_!WkRyxoJ}?x%ugmtk<^p&Ja<t9Hhg%DiAw85Tl0I#X)+P6PTb6N6)&>7qQ_8p
zNP=2U8_V%ZF{*UgaQCv?=#ih#9vcqBt&+3Qr8Av7JXYtL)~lJpjEi*YUZGbxTAM8z
zIi7utp2XYgu0ns8K2a!qjJ_`^RVa@aI>dgVv!5yLw=%)#^v~$^qqQneCzdNzPK9>|
zrt$L!boos^Pk!BW3c6nH#%&ujY>j6*aPiMb8asa{|8)KoP8c?tOZp0(CxHrF)I^2u
zlu;->wjUDB?jSwbfjWQggA`nXeLLmox}jmvmU;re$9I#k6cNP2H&Cb_ioTf-aOb~w
zAW<9(PA<WqY_Ww(SwzxZ;+0%Na2vP%`XDeY<N1buZJwBXm7gVtNqLGMH(K_B^Le4P
zLOYTkId}!nmQLptcZGfh+h~4KsR|sw3A}E`aI`xW$#2$;=4$&A@xQItajMN~`YGcc
zt9vB@ITw!L>+_dkRNW-urn!PgissNa-R~f)(3d{ld4?|daTN#Ug~0flJCLY)5F>3&
zV9s%0w2*mU{d%}M_0Sde!~&mkvo?kWc=y1-O$G3tJ44eUN*(^ZCO7_`z&`=Hc&vUu
zDvlk?4X+D#kl%%vTQ!U+DaP@?LE1EH<2+O_o)6_8BG{iiJ5Wmhiwkn<;kvF0k#$q1
zT@|;<{PXuvfwsYgN1oz*l{I3MFk^1yuEXUDbZ~CmFOg|s0V*wzhm4lJaJaA4c5aU{
zZBQG-<%%-F+vPNFm~RFT^52M7PP&Vyg??zYd16R@BV>CVCFt0xZsefW0xC189&fJ8
zqI2KhCmGA5asRPx{EWb^f3ExqkI#KZ-<55|t#cjG`S%?Lwts^ckL;-K@rSS|;22!U
zF5=JfD&cZPA!I3M;L8Lj?*6orxTL1gb<6=SE&GZC-hX3h1Fr%!32s`A5xh^K2IepI
zBGoBEe@Dk=_&Rb6`DZl)at^*0U&<34ezM2VcW*ZStybV_B#*5=z7<}oj-lV?45l;B
z)<9~|4zhld7fcyu4ZpIM;X$iKm@~c{dPx{I{9b@cS##lI@MTb1;tGEz-A5eifX99%
zV~VE@{tCSeRgul&@yn`k;H23gnY$Pj`(>D|ZXV9|S`5pjeQ~?_O>{L^$3MI6@#BpP
zV#O2xnEQ)d%=mngEIR86_oIZK_wrn*RGG!L?VpD)F6=_n!b7Mevlo9@D`3FuOE~DI
z9_cqz$1_bgP{Va1TlHBEjrv*j=NxOoz74?0bZs!m?-gslZG!h_6?oU&uOdlRN&Hk`
z3k&uwVD0nwz@5<x#P+h@M5WRTGIwkiP=^bmFEb=bXVM%foX>D^Z4S9~@iH#_as`R9
zGVY!+heeDDgT<M;5c<{t{>o^g`ehwdAF~Pl!-ldH|0F`RO(1YpBgxx3LhSu+D;zx~
zh3(V2h{H93Evn5H2Mk+|Kib!z>c98oN_MPBaZW1JDKG{95FJ<(@PRxKoQs3*nu6-B
zI9wy}l=arxMASb7^5pAm9y>;h{xmIM)=BfQ{YVuwJdFZ*<F(kI@Dm5+10IY10I@Nu
zpuDm{eEYWvnpIeW{XZ3v)7vOq5I&e_EhxhYP5#*OI7igf-9-l8I7b@XhOvc*0$|~r
zVmLUUj?8Hr#NLWcp)*sPj~IW7++HZ;9c=~lX!kg%6y>7kgul!nasj?}xXV-qN#X0g
z*|5cQ8N5AoM6@+!E6d!xf>E#6;O>|P7mh}eu1{0&R_ZZu*Z+#+tE{mpSe9ph{DK#a
zXHpcLcovoRT%lnL4UC!xt?kVuZTU+)6EJ`;R&7GBkOaE&u@PUjv6Z=~2>fg9en|bh
zfn5$dhNB+GGe2>kSeiAG?ypg-VftR0>%1R(1Dc3m!8Bs>-WC4tpN2-GB5;n9CYsNV
zgZ5#oMAFOs5d@w9X8U4r-+8i7V;O3{8wy@F4lr`MD$9uHq%7DTbDj2Mp6m{CD<uwG
zW}PH~<HnPEd3`jm&4Y1OGWbVEk-xaK4a;Y@lR&Qsxc6u!hGlHQ!l#o+zP${Qo^ca?
zf0#h821LW|m1}XolhENdLvSeWo&(Z>jo2LW75<xd3;HFyVESndT6|s+|2PKVyxZ@Y
zMnxyl`4fW@H#U)b#;UNUFcK8+=|J#~#q81EAS52=Nk;#B@;UJc)9@XS+1s3jUcASw
z_0SD=r#A*ezc`@d69Uf<j)U(ZiugvzntPr|A!BYt;>ddoz(2KwJnS^$;{6j~lj=!k
zsx=NQW}86K9e=oV@qZMZhhNR#AIEPRDwQHCv`d4A6xBWNQxSy<O$~%fLb5ZWNSmfq
zS`<QvGU}f95t)fd_V{KenIW=%pWlCQANPLl=bZQJ{dzrL?JVhV5!-wEGWm&hD6R91
zthx3PuSQprwi~s;4FU!K=V-XzyA_2ZGssd>C)dt(u^DG3k=7F}j&+L_VXRIg&VC{A
z&WyL=f76N}%J(9Md0b`JwvXj2og(S?^%wDbc_yCD+W-$|jO549+t3~Ng6NIyb7<qh
zW>~u9G{4rH1(W9=5#N693mX@XV>J)_d0&T^<g96CPvT>tB-@fNev(LHkJRyO>)#mI
zUx5z~kimT`GN{+Zvv}}&A@6W3$I#$CShC|1o|NfB%}4JsM@ET9cH7bvnE*<)bMa)Z
z&=)i0EHAmWk>6Vr$zQm*(}@RFc~<FN7`(%kuKqEa*1cT?e(hSqlh#i>X;@!FO!96z
zzJCL(Ri4BhwfzLYwBUofAnf&cAEYk4$2M5>LT1bWY8~^Lcbx9Sio2t^!jc;(ud^DK
zcdh1nhrIaR!Uv%Ft{TUVP@tjNI^4HCn|;1H2-jpDXKlgF><ZKnO%*TVthbTL|C$LK
zAFDveKo9J$+s<NAN=WLwB;28s1cPQ?V!bmLW6Gm5WW?28qG$U_JYV?=^KY7pMh@YS
z7vzqvg{m0yAr2g4rTO;<W!RqemsDN)%jS1mu#X!!a~D0w5{Vw$TrJDLZ&HTeK6>nH
zQa_Y2^g#zTfiE;Qhj@esVL;_CG9bR3oiRp9u7Vm2Y;!`d|4K2l>L=E`91b2?(db#c
z10yxR;RDS+cr#@rmk4YP{h3!mbEpiTI%FrVWKMjo?G2dRt3f+<7f9BQIfNE--O=y3
zI<I;rhPs4FRR3T!OI<w;>;Cpb>#;5DW8G718@30pbxPA4h2~&zXcWy^R0{fDS0rn%
z4kE+N8JgA$nqHqg+JG$y`CJC77t3JJZ4ExzJxP4tI|_MW6pkNPf+x-P@Qv5ZY0igJ
z<msvb;&4?%Ty$|JwHv>jMV;Ruo~LLiNxH30BJT{NKCzN|)8;~nQbaJ8UvER5_21CX
z{{Ye6t<P7e7-7P#CHT8{8%P&D0-5~xC{>n^8e&iJAjx*xE_8pzHaPNkNekieZ(Y{&
zUpDsbEpPZ3cp8-k42LqId(VIE6|wz%d%oUd0p0F$3d#TP^C#C)E}F&-9Jj!zZ9%;6
zb~bjaE`X_f&O_U|-!N}u5j?0c;DHn7!D#!tU{`+@%HM70sfXwCy6L*~-Ld`LcuF@E
z8LQH)17aH5y^Bw3vf#r$Qr=^;k?%hjhVr$_^!&gE$TqtQJ0>jP-KVdxr`0mFUx5cv
z+GfHp7`?(3GYy$jQx9fu9mK!f7)tjltl>w)vvK{Pr-Yx{P2IiD;kfmb9<g}D=3EQl
zz7wVCq4+qiVXi}OOfaOO=XZhW+ZbNyQ-t~+v#?d=0HxJb9DZ#c8N8~HSDh<_Dteo+
zB2B#0eI3?ZX~FJ(0dz#`Jwe~_=l2aQc=E6`uD&iE6DFzg6YB%HjzuDN&0G(MVKH_u
z^yIqfa{T8$S!z@KolVimVB0KnL40-#eRWt0yY3(5tIHJliZ_d4)X}y4NBV*Wt#3z&
z;VyNmvPpvnp3a7dhxriF{t@;HiQ3&o!OU!d1HJQTFMa(kk40CH#4mx3;PHH~c>eyS
z%wljb<la4qH8T%@+qf7W@wNlj=Y9qkqn&VKh9`dUU&A-)&w=fKN5konRZ!Wq6Ds@F
z!n)x@xLoZkJkTHq28jUE?%s!dbtl-N5)X-K#gb(aL&SbtoW$K9YawNoJuiOK0rC3f
zVE<GbUx>rm%Fh()RjP4h$!Qi5l8A{?-=TVOB1nsTSoMZnRI+LzX_n!9A{j!n+oh?z
zO)i>LSkvG4-Ld??P`Da47fY_4#g7$txuE%iaBhNIWezE6ct`4n?Zf=#7U+HRr$oHK
zo#+lV!RXwHWZEw?`YviVmuXAp$EVA2lOfLZ^`aXPnK6*xtt|q#3VABBcB=1Jodo4J
z&q%}&4_IoghGs$@<9z56^mShXuBXqCO-r9Mz2{t9EPaaD-Wr7iO*UX<(*yRhDGod@
zj>Y4{reNdS_YE=QOC?u6%wwNrqtG!R03P1l0xxGu3)+hzHie&Vn6=_AiO`#f8}oG7
z_bp@C@Ix<{dQUx*N%+fR?_XiMBLcAL@>P=epE26~r;Oyw4i>o4542vm;-<S9%w$;`
zyQL|^&w0N<(||&W7Zi}RF@o2+riDGy)q!aqx=iy&ELqsM5SB_>u%e}ly|6WbzQtjf
z@G=<UPE91HMuF^d(FxqL`U2VYMFA2w_Daqtj3s)<PO{Ria`d5A7-{@7j-Bfq%_2AI
zfc8^eXy@|KA|HhBR>q^dr4x8OSOJZSMGc=u{tz4cr(*naXJMDT-68+bavWNx4@W0m
zVj+dC<ov;2Rz2Y?``WC>%p$hp{gr1(`KwjLeBvNTxw}lzh*Bl~R34-MnL)Xw0ckrR
z<U&0E3H>7Z@HhK6k=?r(lKjF@&aRx*sC;FgSvDED<O7&myNd7mydkbOY3xYr0_NK~
zACIojV<hH2D}L%g>P#|8OoAfZ9;gLVnhi0rX$>1|8wvVn5=qjn7<S>Iz`!drgy@r5
zAfI^~XO(3!8}(V_NW#>H<f+A|ZaEM-tS+Ko+cZoX7K{9X6xc1DM(kr%NTjSId>=oU
z=l}PUE&9^HcKqiCY0_OxUwe+YNg)cy>du45N;d5ASraI25_<LW79llKgbAi9DAGAs
zA7<-@p~IqxujCJD8|nm&X`bS+zK_hkSb?cJ%m?QsZY);4l{6~caFlwZhNUm(z}~75
z2uah%H^rK`cJgv;@R<tnGddm7OdBsGB$Hoq1F`(D&>wMbH!GK)BC+hUfwQ0IVrjc2
zW;NPD<!)hLGd)L4HzlI@+CrQjH4#mfvYE)mM_g2yfD>;kVAzP`MD%<BM$Xh{u8))P
zl+Hw$l_s$2TzcSL|5RermVj<9$~?@l3#QIIMM5V$Bc25T5N(|&_DE^O3p3|n%g$!F
z8ZjD^{MSK~%@X)9BTZn<N|PwNA$-FCIqu$+2bvM}=$Ix0W<xIGyX-A|=r(&QDm>y)
zKI0Q9=x2yy(-iSWg*O;_hGGh}W@$&#LFb(zcs$kxQ>ijiK5q^hY5X8_C4LyQ`3z}K
zk%I4U>e$rTTJUmMGRBPjBk9aKMv~@sGr8(lOeIPm*7yqC<+n#jXa8-)?6NAxnZ;te
zk2XY}7)3JkJaI<56Vxn!#6kw?v#%zBaPwF^xqq~niS&BMw@hm#Dn0g4B$LaQI8B1`
z!GeeOyC=q6%#&ouXOn#4bGE43LWZD=J^DVFe;)jwq`}}Q1WP+`)yQU;|H=U310{IO
za}WM^+l!~ncEw$1p5R)o8({lzy5#Y+v1k*a2-%N|;cH<S8>A($2WK1dDarwKtiaJ(
zP~=Q~!~4PUGmH6v{Zpyllr*m2yo7%V+RT-1mh<ps7cpb!1?ta}Ve^O#YWZ|4oNoFq
zIdI@Sx4$`%lS92=HYSxu4XlNVZKibJz4g3gRVWOK+eIB!O=#RNm9vZXHes!W8ofV8
znmf;0L?_<b#fudLhL@=gpKO!@t*aHdcdh~*U~&nsD-6OClYCr#+6pTINAd`H6+WZ>
zC>0+_rQarm!FpltvkY~Eh;{quyrKXMHf`i8AG2wWPDO*7O(3@E-$l@#i`|QT@bEt~
zj8xNS=eBrb#=vy&FLFWAy?Al_YIE`V!MSXP^m>@F`iZ!&nXv-%cxaDHhGCcr;a^m-
zYp5y22lR)$YpLXeSrW9&od@xMwPCX0sfwT3OnhzR;q-1@sNAH)FXk8nduoL9Rth_h
z;Pp6Yz<96@y$N2zIlyN7qba$Cxv4eal>8Pq9B9IkS4zY^TP)%5(hOF=VhkR?LU8!1
z@%U%+ILW6S=fr31Ut(+i3tV|w$m8Wr!2-`FSllt1rgdlHnGZ{8``<Fs@~D-pUTDL{
zeYnW=bBxhKF`T;;DbdC+9uTl^A=dmerEwuI+2*(j%-p3E%X~92#9<0mlG(xStN!53
zE<JkH#2WnEQYEuJE{W>`&Vj6;PpA*mVDdA5!ieubh+Ob#s4E}AXC8Ecy(OODP!vvi
zpe8>uQv?0YsxUouFjo)Ez`5hq@a8TnY9?!sUowlZVQCb&L>|Vp&*S)k-cCtp;SnK!
zu?_Z(K1MC-Cei^xEzB|?iAH%F(OY*$!0FEh{CD#x{;$WAX4m(k{k;SlxscJ-UyHH%
z+i|hCw<&0A%JP3pdx^q?Gw9QjNJple;VbpC_)3Y0-!1)#6Wed{_^e4h>yj=D>u7?*
z!K1lf)e73#o5H`eNyMJ<N_>&#DPBCwo;Us%35G87>1!Ad_mroT&UO*~xJrx9RNM)t
z)stC8jW^sjpAR?8DkRZ|Z^P5e6`<6R0nMMbQu%5Z>U+-^0)zg8U5$h<(^uhvTK(xL
zg@O3qF_qjOKZpm$N7GN)!o1ez%R9|?(}}+t_~td)bnE+Ix^Lzf-ej?z?1@UluG1P+
z*)fw#|H|hoOX}hAg<U*0=`a>r-h%RgW%%><NHl*nmChtd_~WM(`SRP09{(?iRxF&v
z?Y%0=r3;DtwX$%wtyi|+5io!@e$(bfcl*)IjKqfK|5Uh1$zZPFHi|mg7V~%OU+{zf
zyusz337w|s%O(}hq361dx%*{ld_I0ZR9y4pZ};qhAwOQ=_c^uf)#yM>-7<~#Ot_Do
zxiNf3Oq;+KnFtNx2eCuC8v^@p#5p%hNF|Et*Un6u5VMr{nQ!9O8%o*iH}*7oz<b=}
zv>NPw*wU)^{dn7#O#E+mC|wt)E_mdlKx$nXPT0JUq^2yyw3pjxNwWfzU42BnHnu-E
zwVc3yrX|r-tHV&2EEYU6TkxFK9<+Oh%+%l|GgkY7a~E@rlY50n<py)P;8gK}gHOOV
zdmN3QeU?Y|+0d8sCeaOMp=`mjY-&-wmY&wwLjPnK;q4O(iP{iL%!zqH?DtRLKZfL>
zrDYOKyE>Rx?hSw#|I5s2&N^PRz>0?scSGMPh1_X+3fU$4g2v}|L1E8jJTqAu)_BH2
zpmRPqDeK2w3NGS@wk14I_7419@`C5uydzot6Cm~NBb1oSoZSxkR4=U$Vyb%4<&+bz
zn?De4S+{^?l{TC+E5^Z!fmAcf9<0MAqR~=4tdx4jW(_tWq2GUszoHA&q(70IkQonE
z%U`o~`{IQx=u=X8zKj?(v@@B@;b5pq&}L;5Q#I@&F~)XK=%$YM=_u%(xDNA!?C@bJ
zL9_mTI9ex+eA?U0rWoF2`@<K*(-w)uwgVZD6*OGq-;&|^VpP`LgthN#$l3et66?4z
zP<Y)O^B>rP>WR7dJk0?;1>VYl#Af!?bQp##h{remqA|Vg7FoXd0nv{ihK<G}BvA%w
zaPyPUy|VE<8J2O08C6^qI*8_AT5TWeP__~~Z-^7Kk>Oyf@sK^fvl<PWQsDcmQ0R)-
z0*?z!@x&`BgfH>}t8*Q$iWI|@ooT}FBHWJ)r?9S>=`icfA4%gJXR+wle?(+-%kj$o
zE|TT4f#~V=lZaZ6JIt7IiyYG3CUjnn#F*PNV0xuITI)vO@827ta*rJpmnMVi=2OhQ
z!iQB&3ud!ga)_|Q#@}(nnf1YHQlnu^MjQ(yx2|l$^N05_TMs?(9pMj$<~6V}IffV~
z|5Y+cG*MzN?}s&4^~ec>9T=y5ggkl_!p@oxz~8k75~<_@Ha?{uOa6J2>-dgMzg0rU
zhu9+vTmau6SCGG3M-ulh<?LPG3~WD|h~EMa6Wd|&EPBQkxUY7Mxe5*N6V!WPxRyH8
zsM_05YiUGAS{2|oDSy<i^&qFhp0eXc<3Ve^91MB$%fTy9@Xs6@4{QGhHasgGfnzci
zVdVKjn6W#BxH~Kp<^%^wl={b#YUaZ4TR(|)|1!2pVJ5SdZ<UBHJhD&xaGoSQ8-uE9
z>&a}Qg)K{bAx6m#b-a`y>rpzk%^83fYVVQ1YuusQEDeUK8-ZTSI<c>(7nI!8K-K+o
zNlk+>=#}}1Up|O|ntnHlbLL<|<N@_k)5WGE2ZHG)O(uFax<R!RCCheg#rVn<k~5t`
zH_og`$S}S^8hs5V9lPCG)`SHx%kn?Qx{+j+XpqNFLqIpY7wjwx(M`|}-skoMrR>*^
zSBCV$)h3ka>W`=Yy)%VVIZ@bYA4FelSLWX}M6_(~J$SBY#cCpFao561Fg{vFZcZ5s
zEwgiZvxy449+U=o8;^?3|7M}9f;P`k&LXa=cbHnf2F4c;5%OEw*x56cJ?-~`NeTMw
zrHM1JT<I+tw<r~!|1*K^i<g9%E)DaXt`XDKvCwSU%ck9)iG$|Kg5G8)cy9HZ>Fooe
zI_U=K9$|%frftl!*%<NbJj{AJ1TIB-p=0E5IBFnx-&V=PkB}~Mf3-i!%T<KfA~C5H
zGCvs#eXRXhES&M!%VM_IkqrI0xHHHN6@5;yc8kx%S)3&fm9>`S6$zP)lYON91YsR#
z-$Urpp=fYIg$D^)oB7JaN$^{F+T6PxPYu<gjo%aD<mGF)ZHPW~TK9|DxHXa8_NugY
zSRo918qC$-55j#4EAi3!1}4_<;Oe{+4A(S4cS9AnPgN2R{OHPOeU4!n%AAig2%}3j
z<wDu-o#bNeCZeSM0@Ym$_$loWTBDlJ&l>lKQ8BtWT(d}+s|4<Bqze!GWeXjT*V8eh
zmN2K!dA!Sl)9oIPe6Uvr4<oZ2>%W#$i`VM~4&`WmyYm=*cIqjqj&$Hg0dCxKj0N$q
zwW3oReq!|@5#9|3zOQfuU9<8R3|RUOmMLFAskv3u&{6Q0zmwwbd1I+)Q539_j-sWH
zEg*5kKyK)9Mch?9n6!>Cq_i^_oNJbd^?LmXo4k}&%nrry?+dYfs+uHZ&n#92SDD+a
ztytb9MPj=1nQ7@(cBm`>3UmQlo~wcH^W`u{%?+E^J7Gb3IZ;_Q6e@kC;D^A;+c+he
zShx^8uxC7^&S6YEbS|s$nTwZ?%)(JRBY3sj70fw022Aq&QGJhIgZA3#{HC@CDt>Qk
zu)AoBeO4iudvdL#AN~UOBMvCmuyfopN9dO~Hp1Km=NcZ09I$59JxF<a8f&hvfFlvd
zQF{0@@%rdVbk@O6G?DKXIO5f~?oBZ{wP*{-KC8fr%Yy$gcN;&nzKq#K$kDZ{6Zz%^
z0mL)pHvZ_epr*fLh4&bW<Ngkyi?yfoziGx?L(iMsl0A*y>eujVg(Ec?tc0JtC!kQi
z2ZJj{5;M(Lp!#zsyj8pbvm)~(D%IW0PGbO#UVcJ6s(LSq?>bU7_jzFRr3$*X>Pn=U
zZbM4v1@w<ng?XM|9JO7ZusvE8ID4fP?XXs&hc>Uk)8X@I^u0Y)ag`FEI7wA}E5J+A
zG*QsNJTk#P#S<knDw&;mGHNw#<}&#ybo7cGKHjH{ADW-Zf2mqfX$@`K(wYEfg*i0v
zd9~1q^bIfX)1x1Cq^a*vM>?iSmfzGY<@a89;Dx+U96a+Y{#2MmPq?>o<#F+Jh+7Pu
z5t~YfvswI>(B<ZNp^z)6yYc8Pg><6%6xf&NMqPXEb8E|q{8w8d54uA6?`)yprQ<Fx
zU9gRp9UZ}+eSL-XRU`Sm->KL;RSn;(j^Tz$*|<176sm%w9JM#3aM5xnF4bR(AE_8h
zH={QXogXi}S8qY4g9@GH?F3Uj%J7q}EAaF@fzgwMe{}P(ZH*f%Hebza6&B%^m;Gs@
z;1?e!JWGo21h%uT2lfnY;OcQD(0Oq=-o4n0j@kYAfMc<|a6&NMa=BL0K4YxlnX`hf
z$Z%LNbPem$Ux<gTcfif~QGBk&SgxgB0>5fM;c{{imR`7ui^h7=PNT~hRAP!#UX+9L
z+k>D~Fdf9oBXM1=EFaQl1{Ryg(T1B%%>CeQ82GCKwj2&eJ?TwQa_AWZ-4XE>9qM$t
z^?2@d*N1An2&dlXq<PGDXZ|YHfLc4Pq&*)VkklX}s&-GASH~y7xp$)>I{ybe+g?aX
zZI1X<qZ-Hb3esk_ly2L9ph2Ut43C_eP2HZY;p)lp^vm`!RBBBZPu9AM(T!92uNZmm
z@Mtg{w6s6Isq&&RTLO8qD4716^Msvrji<SD%HdtOH`JUH(GzCo)NxfVq^GVxotxR*
zps)l)N40pB(K9sOHk>|a{0(+v`$MAo2bjAtji;)4QS0KZe7n;Pvij6t(s5nTQne;h
zpS=#;Pw=MZgdK&H7vp%{Hd)D;c~SUma2gHrUPGU6K1LsRDxvq5$v8D)CA<}w<wgG{
z@#-Io>ESN|cinS1Z_e6-=6T6Pc5EGfwCE-;7yU)grlHhn^-bRVY%mY&`vcb>hmrjk
zx4_=Sxh$uomE7&<VK497;y1s92It<jtaB5A`<t>Fjz4)rwki+DR}(4m&;1X*96pkT
zcT^-_|E!1Lvr1$^;#abKvyiV$(_;7ZkHIYWR;V7>j$RQ@aQf4Y?AfXUh}@aY_8P6j
z$p#^kH@iNAt<Qd}n3^m8*%E@z&GQ=MO&F6G`NOI3f#Tiz!%(gum;_(@jhAJG{h?SL
zKi3SyF=kIBfBz}Ko9E9O_Kq{b88dE=pJ$&3Dpe8eYD}s)%ws3sdE<nSjRlY5#y#L8
z7YIYQ`@oRM60)8|;=YJ?_|8iO-=tn3;}ecCyOKuIU{eM8OO!A^$_gD%M6(YoFTtU)
zb|lrjp1oRV1t*@C6Oo*UBypP|Y`QWMuSEP|T<G{KJ!*rMJ6FKNT_0Jod^c&iF&Fp$
z@W9Xwt0m@>EWv!!9O!Qqiybj1iR~SET(vNlgf5IA*6v$aqsM;@FWWs)wR$OgbIlp$
zM%6O%r-sBYTQ06X6awWN3fUm@gDle4g2*&Pv6Efq=z8}7J8*p~%9lmrjHjExzrh=R
z>#T*B`IA62wnCf)8Ia^WRA99|WTJ7=pgzX{S#K=X)XGBR>kf7)VibHoeT|9S+Q|Lh
zL!{<`ArYxZlTT@HNl?#N=%^hp2^%{EZscf!O!{>a`r<2lNjW2)MQnDCkT+BMN!lHU
zL&zI3i1dy*bUvwLk;Bc&g0Z&v_s?RyzuF3YW7o2f@eeTMsXw)gSWEK%+EVAtW%QD@
z9^3hQGIw_gq0`4~=c7#aF=d@{tQRs}j+ffN%{K&HGIHpx-_CSr*D(z7*pEve<Pc|f
zTfSCHmA8cKV)cDae1g+a?6H|5d?){rK_e2GNMA<$Gk+m=8zzwg8cD{*t;S)8WnfE+
zI~0@!66JQGf7tmkdz)pBjjxi(d~YpSBJfII3UgY#34zseQ79udnngVJ#9xP0F{vON
z<G1=^l6NZZ>Kux`x(@tcuod4irqYouGQ^vmOHuN`o)v#Kq|<{wlKBHK;AXR7I8lI8
z#=jHl7m|X&`(!dI2>m73;(~<Uq@iTbzxlX0p@ekqE5?_9SHf`5w|MAsw&TS378X?#
z0;yBnu>Hh#)-^^4y(Qu3uIj}^=IXE`a13<sK1NiFY>2r_8t6^-AvUKASmY2pJf&a_
zu~*DlS8t*)LrbH2w<oN;5Q<ymYgqBusW7Y63_25@lMw~$A^(#lP6|yUZ)}v{?Orod
z{p%9B+brCBm+PRP+5k3j_fJ;-G)HVNWqd|y`%q@KJqF(-w=}f>Fd#<LfvFuz0hfe*
zq}A;p?!GgGP3zbOtJOcT(zo9lmOL`X9h(-C6onzIu3d^o?XAO{Un6OJl>*H_>`tZ}
zFF*xh-Wflwh8-!pi-jiAY}DjXnw+<SjlBFHd6_+$|97kk#)$M`*iA|!bVbZ*%6D?(
z>u5gA#~lXDTR`tO_o3cvP1>sL0w?Rd$=<USWbmvH^!4n?G(L3zKk&qqCk`40Rgac(
zSJ#bvB|naV={k6Ijt933>xOw}=TH}|OzL2u#AT&b=;UAW)IQ`2EL*0}HB(<=z&bas
zeq|zMch~XFry2wnp$7f7ArlQwR=~)Yw^+*^N{!OT!kJm}kPx<z*4)$ur^w}W)^0VL
z)V>uj?$V$Yg13Bl)(O&^`5iu$NQt#dhw<p*jiCB;0J91o!jxymp>C5Z$ECxF_fIFd
zIZN>Tj|(8yOJwnW-V*4z*DJ2MRW5d)vI@fjW<&J^0-}y6`-v9bD6>u%+nd8MD{d{m
zYo3ca3P$k7Fqg&r8O|<DjRYf=Cv4;U#gJ95f}xYG#N7=C81FX*6I;g+i_KzjbLdB6
z92)@oA@%Shxr&&MRp)Ho9b9BMg^hADpsOOZF->5UIW1Md%cswZMFqv|_zxrO?N*?;
z#E`ouit%rz1vdPxfT*u)nD&Eu{JFk>otD$2dkz>0y-|1Bi~&Q~*6e7ERUXR=lW$7i
zFREw6=R5qUjD>>G_vn5)1P&_?g&`)g)U0|oNZz#L-;D39#`gf<|9UyqcP@hr$5*J>
zd<R^J3O~Q3nuqtlkJgq6sFbjZy-FTU49FgcJ3o?Z-WJ%+LuJU)&(-3=H&l3Su`#`L
z{U7*W&jdSPS#%z-3nX(@=|L$|_IF<tx2!w^aVOrx9<@98+ESff+nNa1+z!F9hceJ&
zxC7HAxfm8~%QZD7QAL$6@L;7mpDxTDThAW`f5m0=(=TmUzb=5MSv`fejdt+s%4hOn
z@?a`mc@&j}j?V7uK~N!dk*H;*P-Rsm9yd9Z$2*quCn`o1A`bGada`uqpO56I?J)e~
z=mJ5Hrt){{3cU86IW2or$FFW%h<%~OU@rF$9D`2qdpw(Nn>13$F;_w2taNI7-H_Kj
zzC?z%qzYAU?dUeEl%EZ%Al^5_$<MVZeB@<Al#>c(Pt)ep(P~nZ1RUZ@DtjARymm0v
z-`B;~TNi=MkImdXv<})8`=Ni%RKD7LEl-|sOl;qwLqBLKaiu~fHm&~(p5&6kd!w6K
zlK^Id#(LcRcsc!;n~4KwJMo=r7W|Q{4PI23O$UnfxcAQhoV>w^eq5Qr2g=+4J1@dq
ztR8^pyzMaMpchG1i{tTM5IWwdLAdN(G<jLWebRnH`s*WtFD-#S-r@*-#ZaHpr%ugY
zn(<-!Qrtvh#&=#2m_%oj@y&rEX0K+CabG#^Z#auNwF|k&-A)Md+X)s2(`ib#F8|l$
z3NC%d>~o(C?>@Lz;<O|dbPp%-Iy*huwjzq>yf5Mdo=l{Zf@NUd$TaHxPY3_bl&8TF
z<8b7yMO;2inogG8ib8@0tz>n$>w!7!-oqhu_;to#_Kc-}{)u6J*<7ePph<6q8PZEp
zE`akVz%Rc%zVY%0wqM<wihX(nKe;uYIbTPX3*E7LhwZRwmkG@<d`jY+OJS~JGir`9
zqW`W8#Ed=npnkL*C8e&o*hR#bCp^YSc_*mF<da~u>N#%jE&-?a+RUhD9&34Af!=pI
zLHzbBaU0YlWco&9y76S%<YUB-?B?*<zL2k)y_$GdZl#NlRf28WIda3T3ndl%!6(s`
z8rSUP*5smO;puI>)p#BqYg)#=V)wwxS5qXD|LvyV_$V}v&Lt1-oyL&v@w98*P@ZXV
z8J6v@#NW?~Y4rA+<hbiy{>NPzO^>?5-c$3Tz3?)$*sTHfHi*1U-YF?mJA&(GN8ux%
z1K8K?PR89%W!m-9aKyfZ-EsYd!whX;*co|rEI7wvrFM}kr%a*ky9)l!9}h~}&%wc0
z@Hg)L$_Dx+oSEWWBL3#Im$=puLYj++*V7$1spSC5ja-BJixZf|m_pRtT1})D&BAy3
z3fxGpkSskg1m@43hYQtBk-Z*;?>DKF-yM?(xKD+;Gc(B5!HYrZ@J=Qp(E=J?f#pK}
zDOkCOee-yNIV--SopAnTZ4QHBUzFJ51Cz+#12uwIV+*_fY93i;r3CA5v_NI`8@Baj
z9@x6xLak;_WR#N8OG6jmxlf1P&z<1-*7Nmy6|C!J5-))L)(-N?_P6+F`ylka5{B*#
z%_Q`tz}eQ>1fqn!4IPKNC3olOL$hcc7+)3ov!?B3drt~lZJ;ZJ3h#IK{+=wf<DS@7
z%MX)J2ztwOEfl?qB#z}F@ciLwbdTOoPHa5RWM=EbD$`_eZrd)|+jE|Eb<aW7YYwDF
z>#kU}Ta$z?OOn`Dydtgw7c9l!6({Hkn%Nx(!ZsVSoK?EecsrQIE*-}jtsgmlGgrkK
zUz1?|*PpC=@mg51zliAtN3)`@&LAo+ca*Ce4E@r#vnx?{EMw#ftU2_GoiXudvo4vF
ztVRNr1^#&d;A_(8*N?P)AkZFrmAO}4B+lcPh>u%r!6ns8AV*{(^nRtomk(+ndVkTO
z<E;wGZdIe}yvt~4*=|sBtLC-oe{kB13E*$j!0$&_QT208c<bLt)SEQ{Zq+*Di5_P@
z{KhWsdSV(a3)oJKgT!!Xzdh9c>qo!cu9kQmT!<swwn(aV4S2ysVOP9Fj)lw0V)4B3
zpy#3^*6BUQ9#(lk<*T7k<*Y~a@<ZYGm`)OV^EElUdnO)|=-|VA3VgQ%v}tFv89Iwu
z=t)&@T(J_ZGd2^G8d;2&vLWFPVsyT8L+m)m8g`iXuoJmE;jo_~JygFD8>C%Gp1(H#
z>NOoL1Q)=6=T6~`*T(4MH%jnzG!iy7gm#i@awO>l1oW&#(S&44%$;Nyv)c*-N^JPG
z0s1`FBp%+5v*okbKEhIWIc7527e;tk!koif!FsX|iUvM#Sdq3K`!!!;t|~KdkdqiM
zoVRDrTM|G|y^JLFrou~KRmi^-&qVVNIHVjEd>rouALa6yLMB}eZPP9Ae!3jX$Sfhg
zKLRjWXE5eX{X-U)>?dL6cO@E&+UoW8MPpo<IgXZTBZgt$iCV1!2|ggjGjsOBvoZjO
z6|>kCTM^8a&ti@La>aLN?GqbKwI=aPYT@^f67uTzUef6Ffb`qYC>dQL#WxPRS}$AX
z!<1iL#n{zJ4ILME^1QjbD4#HZ4LfZ@^RMlOyWL9Upsg(2Rg%W$-9qN97pTRCAo`>M
zXr0X}uHZf!YOd&_jBf|%mX-1D+0#JLbQ&+VJc0>ITX^w1cky?1XPCB(@P>+Vs=T-u
zcGoYZNB-=_yFS+iul7xDE-)XGUt96TZ~OS`_PgjgrG!V;?xC@VPm--K*6{ref&7=#
zNfOtLBwn!O`^SlB$}0`NQ1c~=l+xsji_`JE$|IqhxSi(&bzyK%gOG>U=J)-*sou}w
zoL)IC9%cE6{8e$K(;X9Vn$%u$-Oq^z{E)y<8DHul-UzElcfpBqe!PJj@b1dTymtQ%
zex_D~KN|H7?&>OnrfDn7NRXvNzX}|cw|9xg$9QC0&k=V8Ihd_B2-lAv0eVVtU^>W}
z#Xd5H(e*z_kc=g0Dh|PBm7(A-6^iW<`v_T`2&JDVg8S<Z$zE;%IRmy}NbfG<|G)zp
z3+yCBbd0UqWzG*|f5zUv4qV#(nW!Df0MAGE{Oj^AqA5YFPxc|fYh#hRWWl)6DFTDi
z3rZjN!I0b=<PN=z#c|SPaaa!ilS#q@v$G_JmYQ*)p$@*SdycdA&1ksuWFqv5HMr>K
zRC;&PMmp?;B6SN^rQ6&qxZEgHnzuiSnSJo1xwihaw7eJ}M`_S4tHqKBAM`QY&>S>B
zt>G@yf^bG#2H2f?FR?4y#iM@rpyd2YV)r5iN`hkOl<Bd={8&3ay7dWT8ujpUq6be$
zS6tpviK9%9p<4Y3vi9LY@?=mTS1Z4S(Q#+7b%6)B>Z|5oSFMNEqgnVodoNM@eji=E
za&d<2UFcflA!swYe8J|OxSk6cuQU-&oZiYh9(%&o{wt{B-!53US(o<Pb&$86`bV;N
z72$E?!{qDGeQ>|<7F2vvh6$g`q25WE{@e2awXCvv!PrTdwL60{Wdpi)=uBo2>5j+!
zHVeL`42<r2Llj9O&3L&JB9lb)u!{y1emh746e?lnwMhQx=mNTPcQJi=?l?GrCazfF
zg*~6o!{cZFaE8l3x_|I~y5m$fc{1LW^e-F4OV<tJy*o<z6ycl;`8AjAP)y}RB1h9x
zi@Y6U2Qs`RF{dA39kq?N;16W%>AIfT;FDemw=No^^}Zz9|A7as?DgZXvWEbxjHI!z
z*ReOEQhrFth$qdJr~B62LW7(Axbvtw-e7VK6&l9S*3>QZjoW&<?vpqFu9hG^)RO}(
zT-b}BFQ(T{&cntwAuIi1JRNg(8=ro0Blb8fq!oMIaAELuNWH!otH&(Dh025RP~tFt
z$3RDDUa4fwpTkK=&o3NgCB;X&8S>f7bx7H%uVTmBTkv_73t4;Y84flaK>IJSWIJ<~
zv%EWV@b7_vsOP3ho2@>vun9tDX{j8KIC&eSEwlNtizoQ{50mIE)m%8yGn58zI?Y$%
zGgP|TO%_^SK$*PB{ONFgNVi@_*M6Qu$4!^zd!1KvrQ(^~f-d6fRyQO;eG=TXH53}=
zE#~%1+DQ2o1AOzj4IZAofX9@_;J%hKc;wPs@&2pXWX%zOJYkj39Y2qRW*vKI2wTIo
zugBopY;~&II)ZOBv4N-W^T`Iyi{Kt)iTe}Yu_R1}D7813I41`NJe1+)QcGb{@<P;7
zt>V-Ad-91#*T4|bRU*}A0QH}@@#h;duz2Khv?)Exwkep=S3-8i>z5a<F)hHWcL)|Y
z*~273o7uVTBsyG8<KMU(o(cJhz5B01_?8$hYj_W3Pu#}o>qNZ7cmvFvEbOWxhw=m(
z%qQMT#r8oDuqx#vp0SCg7Y!8HSF1ndKgi%yZV%%Yf=8!#XM*rt-$27@Uf9;Jgs`@^
z5`&_R&>gyitUCWp%=cNcPe+fFR_So~?cvO1toz}Nb(W-~GeGk8!YX_+(-Q`-DQ3;b
z&y$FP6ed}l2Y0oF`}l^XaClJ)ewx|H<~}?C(J?}1UAPBs&}xyS^zMYFg}JySVKSQ5
z975Z|1u(E892aGn5!II$Fm#28OnQ^givE~_S@0Mb8#n~apU)E8bp;XQ-*fG!PLoAl
zQ9illIp6W`v<jxIxt-Y_Sqk||?s#(TIJoXXiS&*v%qh+m>xX=0BL{6`?d98Xu|_#o
zZru(S;)H(K<=5fUovlpTN*A9sOeZRI8cghGNM=si4-Lth_;RZQ@zZ}r?0bwLvF8o3
z)!4xPcHf6x7sX8VLK2EPZrjU;E+(wz66v>l4k-D~hQ@_Sz(3A`RdsW~e@`8|am@xI
zUtJ?fp&wb|ssT`tbC2v>AB1|^rQ)02*NA9eA6d0%7kPZm7xvyt#;SZ}m=U^@*dN*m
zq4jr|<-SXJVf=YWmMjMk!-2T@feb`yUcrJFD)3@QqT{PIYd}RU52`<^h%5F+z_-*#
zu>I6@xGH-G&pFHEx@Vc}q_8t3=ckDie`$%OU-UzJuRr4E^(XN6yiqW6^FzGgewMYa
ziiOjwWy#H$QruBHfQh19iTd{gWaB3da_v$DF)vF6*`%3pUf`R_58FkS{P@nUuPw)A
z;}u{}wE@1haf0!w+nHgeH?9gwWW_P}9Es}$d>y)2T(aj240#&?+Xq}j3yTHpqsD8=
z<k8!)qb?H4N^Zm2J}qh!nn<-3-(XxuCw5syVRLajHCuEO%qRHqz=BEGT&V&kirJX(
z<08|ar~>B?$?~utO5zZeOOni@o7gWS1ni@=;m5%UlGY-y-itEGm5gY}8$S_suIZzw
zcM%MVN(CC}2RCmQ5|7kvB=+<){5mxP<BxecuBo1iuD4Q2d_oB6XZC{?pZ8~9ZqG+i
zLbc=3fyk0aZNu;Goowil4is<Nj(?X9pqs4}ct`Fj$?DVsyl1}<hn`qYnokd=2D1a;
zO?)VZy@|jJ_0nX|_l=l!KN;#yjKbw#Re5oW47Z$g8ncBv*zAp3V5X$XW6+HI8U>@$
z)?}i0$5!l{EM&!N`iQe_w)pY2{<tzP6-Nvi!|rdJ1MVFoNqPBt*wYe<8Qtpa$CQIC
zHrk2(=o-j6XIQa}<ZVRHpjCWhb|T*Yl0^;|Ct=?Cc$Ts>k7&-NIB-J|-02+-j!z8X
zXk`zZFf$B_g}n8{KFZEHJtoO+`fU0IS&Zp5f!Tug??tQ7XWUj6;~$3Bmj&<aKSNlu
zXg1coszE2=jJR%Og4N_29+x^u!fSr8x+~#q`sg0emOY4Oqcr*a4h8Bj#fXi-j{NcQ
zIBy;Gkoiciu?btIQx!jB{{5XLm3uuDXDHk~bKW3;4=pt0_OC4Xp#^pP(dh#2^Kb%R
z7@S6r9A3lXwRL#*yjHGvvkV4(9}K2y$N4tvbR04~gI{}4L_<scn4U)ev!73TFnZOW
zP1@1Nd+x`=!@@yy=(X!i-6w?JdDWjQ4p|0!XN`h2zX$PyZuyw*B!;dtN;q8KgZ*7T
z6}~+Rq~}~s>BjbSrnj>UQue&(W-^=TT&qrg4o1*E<q5oIR}if@vsK9Hgz);~+i2|+
zU#|M<KIo4-4VOLYF|;IEoRT+;EL9oD*MwhZ2ZkH+AhiONznG8v-p29`eR5}?#MR(U
zughds!8OuRpHEtbY=Y2gYsnS$?<}VMo;ZGLcEgPC0(&UT9~whk1P=5hxH12dpy@`S
zwPqTW2W3h+3m&tf>8-5M(^5QKQ-o2+?BS{ZV2qbjVzweDJYK7XM~fx6!J<UyUsDJ3
zu3S9Z?>m_`Q<g1|K8CaIDe=zM%!VuR$4{@dT8#OPMHuatO}u)g9Oj36kPGT^D4tcw
z-Uu0yl<<6(Q)fmBYZq}XJ1I;UtWMg5yukj^KUu9=4Tz`YaS2<>15NUwv-uw$zj^~L
z)#ACkQx+LCM4b-)&hYEzRCsT;8%vAypy}Zvep+uZI?NeMql&7~Df};}&IqRIElR?D
z=m^u?ErDeH7@-GQmd;B$iAQKBw#o`z%z5RQw(ld(zvE3-$t%%6PMXxLcsR@Y^dIKl
zxdEFBC60kM?%YLx2Ucnfr|wQkIHP|i>x?|h?!bBcH82awl|+yqXN^HwcD(qPG})>A
z6|Xvtq;jwRKv}<Bp8aeij$6cFvbh88`DefnwRrLk@{dtev>5JR@I!l7dAe~d!nd`t
ze63L$FI!+k_E)&j6#+7|aMx<OMf(E1lM%vAM_SNTr^|`$yz{uN<PogeDHd10KMg+l
ziO2=NkRQ6^(!zm!nSCkuKa(uvt5S)dk3Ma@pGZy>)<Bmif@g<V(NlB0`3SWqOnqG{
z9lE3++D03olh+zze^Ow$G+6T$vHxItzgZCQv;o+PO6-4U6`tMw0qYNLLc4K-$3XTJ
zz80SMrm>H3@d!&gRy>~?X_xYSt?z&-$qGDy22|40;x+19`Gqnm`aw~PuQ=ZZI-9nW
z`{AYZk%}$8&`y(BS&ZRVa~0{O;Yoary#!tIR`X>!T4ztaGo~wrUHTZ&W*V>aQKFSI
zo<6f|!Uq#Slg*&U<zDMi1%a3S-Tpe6_ec{WG*tM@my38w<Uw+9+Dp(f7|9jQI1JkZ
z^?w8_e2{AukN6n}GG#S5;HVk@qh$kELInLy+KUgWJj0TD-+{(KSuXmrfotyl0*NQ(
zsNi+sx3>lGLGhMUrgQ_?`&6OwOm%K~LLOcZ>%c)RzBEa*3Qz1S#e$1@tn&!M@2*04
zb5e@_uzl~?HSP$Kf&?z-7Yt@!E$Io@9%f*h3R9F4KxDZPEyPQxrn?<~Uh$NS_PPNs
zM+fj{|E`0==TSK8*>&u<b~4`<A%=XP@z4-^6^1A5f}<<F#21>p=(n!luz!~TOK^OS
zH~I@X`3;J2T6+;)w7nng3T(ra$fLMcW(hs5xdUe%DT8CV97a6o&!e}UVn2S2qjC#!
zxxw!Tc;}@vHQ5kGOiTiJR>n1Qdw40{*&fcHI;4~BE#pa*>rE(g-wjW8xbn(LJ>dUs
zDdvdM$TqL(^o-B)hK<syP!+L_pDGF9x<!=srG?XBh9>m0c@j!iG}1l$`_o=eSq#e@
zh~1O!!!r5L_<N*;4Rw3K#;tmXzg4d=-!^Yd`<MhHrS3DGFG=_^TNOH%Y=Y<?HlQd-
z&+!2PSf!>%>S~sN>YPlm^S<#Ax@QTf`;KDv??1Abx9v<6Y5;YWiy-Dtvn0alCJS0A
z`0@NyVbLI0bm(x!iv8hib-Eh_bTjO&No8d+x{?IZIpRMxQ(&5G5l2s6hTj~sNYJ8q
zwx(BxI6Rn$POFP?e}pT{F0*2<6bd8>IFDFqYQd_%yKvHeB?!v>$1+XlKzG$t98tcE
z<aa9z9Q1UIpL5dj=Fi^^@g^m>Eud07=*2DQOd5@&-5(IIc0qO<|A77I)WRp)324|R
zLYcC^FzCfU$^J!m$b8uk;#G04Noee8IOCmz=WO1KXW#wEddJCwD36J4cQvti{XDVl
z%ZGv|>NPnuQW4hogyIEZrrKXKAI^M8h7QtBL`#LPjF-a%-`qv9|AH~lcws(sFA*~7
zdAg{+!WYjt>65Fcld&=6msl@jmAEF%2-j<l#{ZTML&c<@Z25vpIIGesHi5M`JM5>V
zK<XOY%~cUHH0dm5b{fh0S1K@xny|mHFB!Cds95|yk?cJ{Nd81e?4N&_u$R-JpWi{u
zw$Fl^E$1X-HEU6+aUe7wOn|;S@oexnU*f)aAW8Av%63n=Px@Z^LF$=*tiPce91grh
z#y?MoV@`m{5n@T1VW2q0+aA)D{qgXW3|w(j!VdJC2)(xnY4I-<|D;P<tX2*b3y$~)
z3wGn8ly5A0vV>iln}9Mi)WG?Mt~hI75m9nhgPN#ucuwF6oBpCuCw9O?ZwSP$ZkEU_
z4#1*XD^L+;uE|G#GLd?|W94CYHb1|VmB%QG=V*vw#Gc8};Z-B~(q9f^Rpp@Qsv#6S
zo=J+$C8Tm>AZxD}hlzpn;OKKdSf48V-W#pacS96>J-rNTzB>?)o+sqT#9K`LC;^=w
zFSw#JnUwcl631V@<+wk|3N8&B03vx$`$%^mto*)`$%w`YUVzDb4!s83H(KG`32B%(
z;0l<FL`=hd4le%unf$IB#)q!`MXtPe1hw2k)RGUT{wLlezokG+29$ukzKr8E!Ncup
zd<GwC^}+TtPuPKcRa__tcW>{eL)y7Wl$kdHE(rcGQS0@FO;et-crRZT?<VEgIea;(
zu|F%mX}VQX<+q-QdR~hUf04!?;(7S$pfhIFK4N#H6LI-td7L#s4piA(V%jV09&egr
zgoX*8dDEZ8$R3l3GQGv!hKnF-_B!;>Y+;3;2B4_?LH+%1SyE09kb1=+jPG}Z)Lh78
z1*<n<^sBAHc{G>x<rT7mgBuBbG7}5}2SfCt%~+$+D<L{&=s$l1KDK+vERTt>IQ$Ds
z9H+r1U$)~$_d6tqORDk1c2A)@B@`}yZ9x1i@LA@o(Nn5oUb`;^Bd-Wu8`8z(oUs$#
z7nXr?6UtG0%13aiNfeKf%Oyp<He8j~LAuW)$)@v<FzUM<uF)}oeQK$gsp1b~wMX!m
z0Ve#O!xRV=bTOYWDc=9S3Y~Im3)MF%<Mz{}`LiYZT+P~-miVgSr(1e_)Q!2k>#YHo
zU;Gr7zZ*@fy846+ga>_aYb^1L^28<u6|VHanDP^M=<~~&IKgfbJDgt%t!EPGqW(3K
z`8I?;$_R#@7yaq&-2y-Hav?~b?;^)mr_#?Ck3hgtS-M2miSK$JNv+dcd7hli*;VdO
z$lfYHa%ZtFojvX(O%K}7j|{)h)t{fl&;?7xr`C+Y!8{hDR|y=@wfiJ3DeA1tI0;U)
zicmFo94WY+#XMB~VAbvl(sIllCVF||hvif8W~Le05+itNhq__>)2)u)#~v}I=Wci<
zbuJ8&(ZzVXJB}9uPZABmD-$wWp5;H8D0w90h`>&Sp?A(WDhv9H&dc%mTQ{A2on42E
z|BJ)te%g5cfdkeKDZzxVn(!olk2v8`Do&a21%Hz*$(nOvY>wEI{f$&ax<>>5)W_nE
zQ8nN=@UXbi`4nClJsf|=O~Bw^lb~j56&bmGDhw}P0$*0XhtDHM@U6oW$-`3*;ZnUl
z_mV4smA{`0|FNAk@Z3c()<Tw5TY%GMIFj@~`gD1xvC#Y3A2-<+VY|@(?drRL+t>bN
zL9#x$bKfLz_Y%A`^{-i8)+Ly3xCw{<N+sQLioEpT2rfF`k6s?N9V6LgQYm*5@6HIq
z!lT}NUf)i5GNY5!3YyKg2TAzs;9i_|KmlKSyv9jI@o>vz7v{tpz-GyAe&}U3`ff~w
zgMoV>W~&das`)@WXLN(E?oK|Tbtfy`pG0d544|p>28ajLqxV&FXdV>B_r-<s!&!<{
z)$0iSn7xf!%-c!te4Ec-C>3!d4_R*UsSNL*eZleKW$b@F4ue%p!Q|?Cv`H<&r2M5^
zB!7o&&1ry|Z6fL&w;$sKuUS@!6y$q9<C#K!u5b4cK1X9491m?GuKAOxv)4Enyub(B
zWnMtBn!dP`I&cpYJL)qqnI<17h8vD^>81J6^rqPxlpVZ~R^D%<|8mW#0pEb`2B$#w
zVKO~&H5MWBAUIgIQfUWUT(V>h)lsTOHIHlv-_wd2Nv<^J?KX_h^5$=Hf8e9D)+qMe
z%nV(YgL-$5xOQzEelqd}?eKK4&L4_zMqL4eL9+$^^+cXOcm<Eu%%mPS-r}UEG33BG
zb!d{?LzlHhQe&Z~+4gD}PTJDS^d$3OtDYiqYi+*q$abg;SK<@&$KpdzbueBZ#7B&_
zr0HhUQ2kgPe|P3Ef7jQKUmrJ@dxW_0X4@M4^;MY@>quxi84dRg2h+Fuv*?)gGCsdN
z1Dj{-fX>)gxTP@#o}3sWF1|jGzpQk|{6in%W<M`JVBuhHv1lCMu!7S6@pR_VRDNOq
zrjU@a3{goEilhi<-<uT497O|?CQV3^gv>I8BvcYoX+}cMzBd|Z`j!TzQfXFc)}-n8
zyzjf#?^ug}tmRq9InUYq-k;BPDX&@p-{jvx!=xd+Z1)+gRU0f?A?1Ucg~ESDs<6hr
zV$Vlyw&P;MFy8Wgp}^5f<vEu^>F5SeUi7q(yBrtJfiIQdQ^j^sVdfY<k8j8KTK4=`
zg(SUs{sFViTg6ZMI^yi)>p1@PaXM0ODwp;8MM_>2;o^m<L_Io#HXCHa?qdm{Sod9E
z-rMuIjqA9v)(C#EP8}BJwy?RQ9zpOoBf2Xm4aQE$<2P<xg?z8)a3sVLOX_yfn+3|;
z9WLO8hG*<}+EhB`VF5f>n}I`4jfQHeWB5)xiyGfpjZ0^Zgldf<INdc5ealp7aLifk
zs4}FUb=vIfpGdyrOb*{Vpak~M_$%sg*P_>Er3hyY-gvPrgfyR6BG!pMMH&|mfNsSB
z;^w8*HoDJ4Nc@?HWTV3>>~%|k$hl`&#O#yIOD~758KMUjBbK8{ybBIcm51IwAAuzo
zEzCV4Sft(~_D6Xn&T({S(x-cvLv*%i;$J(wnqN+w4jzE&O~bMObD1c8%WhF~i3Fr6
zwvxq;Zur4dIExd`I@3*tW08k7Sx|Nb-ujl~|Ku3AgtW2~(z4LCG7k&fQp8JleHK-}
zF()Hm&&Db5CPA^>Ez;>>POPK@An0ZgJnuF{*HfpMljLQwZe5{h#)#3(&-n=nw6<ko
z{-?!;hW=2hFoJbw{SYy?i|lZI2KEJoLU-d#vZ_7_?6Oc?aqSLio~}afoK1pVQ<maz
ziwR&^AnZwXMVJMynEcxV^aY1c?%+8j{-1MW<CCRm=bYEreP<i7o}hxIDN&--xw6=O
zsZSL6-+fW?;RulN834a!vf%z(;Vki4q&UcA60{pUV5L3@A|vCIByny(d%2}a9Jq8p
zF`QmOzMY<iwrMt)*<i*dZHR~O$`+7rE@VjWW{cJ?O9YjenfT#Ei>P(jNc8tM=1H$}
zSy^xvSrX8IAEW*Q3;At$PJbDx3_8jZFQ^K+_tAK8#cy(=bOyRiKijy;OJF}}kH&cm
z3Q_-)1FHYb!sX@1S;4XY*b#>dYz>`>`aU60={69LxO)(f(gu>WX%E^xjwO+i&Cnd7
z%ACuVVyEd#rfliYWNa)@;$({0>Y%`ho}de!t8Wv(0tcL79s>8jD}kT?T5Rq(MotWn
z2G4&3h{TU_Q9+*pR9Z`u-@OiaK65GSy%qyk&!mtV>r!_0=xnU;*2f9XS4h~l6jAAi
z?W|_(V<!4J7_$;DlUX4u_-pnkJaJ&O(3|;88iVbyvek#_=4Oc>c__frggDT=u>iWx
z<T16s%GmDqhy-3YVai$8MHWxfP%UvP25OYCu)#7|nl%jPC4@ltK0nc4H(MMr`~n_0
zph(Xanq#DklW6j%Rm>rDKS-_A<@WzZ@aRknsy)++MPF}1)pcDUWg>JX4c_5EtNjpK
zHiOo=KSYJZ960{p5E_15gML=;hmR-a=$i@k#Mjh>f1Q#9A8R*b?ssQ+sbdC-2_xa+
zzf(+Cb`vR5y-6zhS8~ikf=v<Fgt2Xt@Yrk%7@E2X{>K3>oD#E&-1nsNXe^1bkHpDm
zx8kq+257lI5$yUCNXA@a5P{Hl*<}v~p%G}8_n7F1x<Ru|wAd;~2YA&dmhv$Y+u!YC
zJ(ULVSvLZ6_mzlbU&p}Uq^WpNVHL*j*dPuo2n8*VMATjFAnJF`g8yOQ1(;I^hRxWs
zN|%V|B?$AIWw38w8M&f<nhc7l!wC=k;NuAy{ybZomZW*|O$MbX7o$iYHeZDSGFp6{
z^cAQc^-Ww$>qM8|OA-~Y7Fd1t37f!{k=1hjY)d^*!}EC{;WU=!91Ig}@HgZOBp1M(
zyXthUkPr8`kVf`s=kup_sZd`nbkR0`#{EkQiR6efuD`YkpEs1jwY2T1ZZG8a%=RL7
zo<f^9A8>K;2of>Qp4$0&amBJ6d@W-jzUCE3)O!a(&%tH1_}4nT5fT7HYsR97!e?mO
ztx2<<NC++lO)^h1&!((=5uZPLB$u)*1?o`EkJs%*PxmoIPd1*qYu?4QS-~jh7r_Sz
z^EA~KQ|`O6278**s7jO>uRr{SZJM(j7p*j>$7A&cChbO%^14G}%PkU^|1J!z79lBd
z6<84;FEO*EL#%RdD#!+ZBPp+DqWj(gfn)0n!{er7B69_&ifcso!$r}1$;HSvW|Aoe
zpNM0J7EX!xVGT}y3GL7YRh=*5<Ec~lhd<4N(>uTM>AQEhV*Wu?XxGNL{yk^xa>|9S
z{3qrhJ&9lbJpp`g_u~CMm+*R_kXx7E#~!$z0Qn(zS+L(du;`fpHhT6vK6DA|h}<Tc
z_UHio%~_0Ll?r6Avx<;a&<5hY2mbVag$c{1&~s^v;9%cn&=h(l3g))NVy6RDxT?>?
zkAEYF`3Vr^6P#>%iR}Hm9YZsBir1JPC;1cfcxBHRVfM(Vxp@)`tZyZ2WGS8P_Z2L6
zs&f}<IojSkm5)_xgHo;ikp2E0j#&5wI}DuZQIj$V=u+hWqE%3<;uvOY24dXWQyj*P
z5`2Hs{K~8i@bFV4xpE@RW>fN6X1^{QUkJ0JJ0sT6ABSUE_aGneQ5;5fI-_~Q&@J?;
z<!c!K<fVA!kbQXmq9?PCzJzCw{ejgNhtPQ*N4V34jr5Plbh>fpD4K7!mPWOY=Q`{)
zdY~_N54J>2>%|-foxv%NseGiWA6LSoY+=Y*-r(&(MU$TKdi%kALBuTXWn^f(Vu&r5
z8(2dlKUMMd?zV7#zc0P?W*9ZmTFVPU^DtBIwb)6cfR5BirQY6(wnrvM^QW+p*KYq#
zcNWUiFD-FGXU2}kzG{H*=rFF*r^4ys2k;~IrZ~At3^S8v!0wiKrl~xUu3xW8t(w<x
zY02GGadXO<TOu*uShkGc*RbXz%?@%;lWmN-eaG2@Uc+tc53KsQh_+Y$q!v=kSe0ud
zoNINX>AO0>aJ+%Y@v)fNUfqJ3v4Za{OA+jf-Dw}(g%2j$w9`m}e_8$#OXqKAUZ1tu
zpfo)&4eMvW)y?S+k1_O0=yZDd@<D3V*u@1Zo;Y;89hHzgOrKlbr~FebtXMlp{Cu@4
zPp*#_i$6&5QG36^y6FYrEzIO~Ca%X-aS>eqq%Jjny9NL2p2q%rn?!z1a>My5?)>P#
zU@Ru1c(ZsrY>OU94O*3WP>vHn7q$*p1aeWfTq-r{y)AYa6hc=nddsGmE#@oE{ACl<
z3TaUATCO-EonI6G2erRH3mGL<`orfOY;e=HUF+}}6e5Coq@ERjYrd2|(aoV3XOH18
z7uM5R@6N%yvoh30u>!^)X1u6tCZ9OlnO<FFL&w`^<F7IWDmoU$D+icy`#I$>H1I5h
zrZ=GR#BDTK?=wms^y5=kP2kTv*P}(_PKX@mz*YZdvTKbGNQ8R{Iq9iHEj}k=bmK#a
z+95~A34HUq6ZPO#Gm?C$4#o7-E?ie|_}ur4WJAV}CzD19Zuc97v?wqcmrSalN!yh0
z`|kHFX^$yR=<mewH#7LHu2=XgJAjWYQ^AxkW<0s|0+-vrjZXTdAe<(h#p2X|<WW(+
zsC3mUl#G@nt!EFhE9(bB?5vmUgL^P8c-V?3&byPFtB&FDcSm4rx`@o_i)WIKlktfL
z6)lEg*mm$Dq~y(p`=2y%;g~Hz+kT7JyA0u`zeTX@>v^myxrFoQKVv<E!RD>ePqAS~
zIW|?wgMoE6D#)H;lMPmb$MJMxyh8(qKRyI+tB;Yku?uj)C?%+2Q}KjmIv#(&6Ca1I
zz%&nmGgW^SroXVky_0W~k~cnN5fs6^$6Lugk2PZV#7<N!S7)2oQgYz%6;aUFl^DC=
z11qcSB|BQjk?qsFn6+#nn-d?vMEo8!6`HYdu^o&J?qJ&60*K>|YHXEz&&0#0Le2O@
zs9UT96Z+~fUau5S_>F=BRS!5aa5pSCYyiXMjG(1%H8y8hGM!loFuWiLPCN>N?(K8P
z-&}!vmS4r%Q+2>z!X5Kkcd_zSGay4v1D4-Q6<p$j(fOkn#7`9Xxa&1pY-JMGdVXW9
zx`sqqd$PQJdsvm02Mbs?8;AbAL%8Qqh%R}-EFalm(r2M#Gk%fCa%4Ofk!{50hc}pC
zRE5+7R$$k-k<5_@WOLUAp$z-Q?o@^04BNFVd5sm8>#2iTijXn<Jqq=1-(gfE3#0^I
z;*oj{Q0Uac1&<bi>s2cjXulE7q$XfLnTxUx*Vs}6f4mtXc)*HA6B#!}kT{pX+WTGM
znBqA2U8)J4pNd#_VI#Zq+z^fO6R}eCnRU-~WaLT`>K9IelRpo_$0d^7L@ytn*;S)$
zWCZPfaF$(>(&5Xd*>bh1eQ<I^F9|fNhCmHBD&FZxJ62fn84F{ux4@cfe7_>r=--0w
zBU9Mu87}-s(J*Sid?t@wr+}pfl{iXM*z*_2LRR5F_R+$KX>MKy%e5@=>L5Lgzaq(c
z;_s5cpMsyJ?=eYkor0b(0-40-+Qym(j~VSv#H|MTq$tA+hrSJhzJwG^T%QiE3Sr<g
z-x{1p37&Fq7kFxCi2t5{5Sg0`9>oJYSlzEg(AAQpUFz~Yw*MC!F3ctx^8EM(S0i4(
za3}tm@f=;;>d2c|RXAIJ1WQIu=WWHmpxJ0AS|wkE9A-y{U2nnPO;0iBh!@^H*iF9e
zlcIe~{v+XkQ{i^^2-wq+h~XwbnX9B9>m0Wg>^&_ZZ1NDMA3ha74j4_^TQ7-&<p&dG
zH8)b#e^m7O@<|q`y;0o1APL&%7mDoSo)cy5u^9BafmPnDCY|nbBv&GwOnDngTK4Ji
z$htoKvRMsZ%rN2$un1lriNoP@CNRYyOD0lg7%<fbii$?zf#(XK&|?Y~xg}s_VhEpR
zUlD(}Tt+%V>&Z+X4VZM%6s>lYqR|gkNHcO0U#S-Q;8Fv?KhYc=wQDizRe^Y1y%c>l
zS^>XwN0N66I*?{{y>Y9nFFxFD&aN7N!~X5o{KT{8Sa-CEALIh}=<7(dF&jtAnvUU#
z8=BN7`U_T@EanTpcfgVj&%to}ME=HmB3@kMPBs^)gNs=3m1-C9)Uh3?eaeSByh?@s
z%4m2nbUV-8ozLSW#`A$n19|q@Z33t628%r`bmf@PmkKqd%I_1!KLmb-q|gQO7M+HQ
zfGu2ho-x&Y@d7=ED$;$O_eioBK%z~C8(%Xg)@BiO*zuA4#=+UtpumvMO3M|ug+4>i
zaW_cT0ZowHFpXQ*Kfx(RC&fo2$MG*qa_FGQ`(Uc;E3i5|sOqv3&@5TU^-su9Z*e)s
z%u@w35SZF3ePr@>b#$Njj}=|Ffq6FX$+46uxHeb<z1k9(gwrahYEBlBmcJz6i7vRu
zWU}8DS{ORb4XvC2Pdv{6bJbQBlJkRgX+^R-ljFh8&s3zh*#;7mb#d#>8IV~mbns`l
zvl`!W*r2UUGwxTiZO1iWcj*O`{%<cVe(p`dRSBq1E~)Zr29LEv$ti<VjhbTzphLkB
zx^|i-UwTfK4cYpQT~B^18Z>4OKQ>B+^B1bTQ+hI=H1q?$&u_xg*JqgOpeZ!###pGi
zJ(wO^aaXJuD&o_g4xk(E90I-QQ8>#xgijXyZ#A=Y`5l$xU?tfOHTSi+!QNc_*tiYb
z{7qO)jx@hxT7>&N22xwWZQsA85y~tF(pc}~kb2VzR+xLk?ch|nwA!0%O#4L2Gk0@K
zw|bb_yBYTXFkq)<kAjtlrMN<7hDc>>H+0yFS?3jVI=0>o>=QOX|KTg(p{a@TPud|{
zLI%e5J|Nqz+sVf{|6$3CUyyVtlfM$WvGae_@zpv9sZZ1%Hh!x+Omowu#;6FLli%@0
zg(G;++nv<6qzN@?5)@Xo3Tu@${B_|6R2KT$1z%pUMROYAodvVe`uL8uL=@l_4<}sg
zK8^o9zJcG4d<#Vq()59Ip6Jw>WQfj-MY-f+zE(n$emfXX?VD7o|2tQ1(7TV0dzOtS
z*EQkOq!?24tN~N@jibK~1&L!e`_g~sZeXXq!0q7i;HW1_$N8u7ML!16T}f4<k!!PP
zq-!j!OZ1}Ar)P4<oRw5|{9RN(tc{+sTd+6$Askux6c@C{ai^vOtZTK9`$22^>V_Kp
zHMxr`b?)MZ=Nj}>Z8GMzC2*hMCP-@<gD2z@h;{1%K457oH>fuPmGKLCa@i!R=B~(N
z9-ian7NJ~jVhtFFb&;&K>!Ex7G-~ual`F>7!SiWb#b%{CG~=@}pJqFQ_fPGkOZvuh
z>6|?DYhMZpwX<k!oWR#;thX7pOz6*y|IE1CAV|yFiz075qAIx5-PPP_$Dh}H?E9xy
zIywHl!?g=iTx~JfU>#4f9mowOvgxQD-$4{LliwUZ7}X3{;?h0i=uiJ!D6eh<K{<o?
zi>w3W-KI1&o_vYJ#J^C{uvjEL;}Te#kE5kUQT)$G5uXwDN#yVAjYidG)VudFR#@zW
z;}vr-?y^07VCzWwmkuDAlTXp-;j-MW$(k-@MzrJBUC`18ZoS@wJ_;AskoKc!O~W-f
zX>k)`_g}(k<~d}m+!&Z7>~-p#W?1>onad~NxCjrYY=MVL4yf2<NQ*ELFIV3qD}^QV
z%hlJw{CYlB7#$7mSIuerjW@W~QI7^FtJ3?80yon@S`b>Q(E5>r&$_Y*j_RF)@C~b3
z-$ysvm6O8@?Vh6XRKZXB+>D-(kSEoiPF#J65*^{ThR(Hiq=tTH$jUYQ_y~P}5sZEa
zX0wO!|2}jAzfs5aJSXvyrQ>02)qXlKy$x@^9f+y|17@1@7IY{)gL90k@%n-lqT7$S
zI9PKAB<~*qE1Z>a&fQe@{#%JCprVQ;8E9aSYa(m-p~NrAR<On2``H)wV9~!eFL0e%
zCU65zit~CzsQ9~_-CdqXYNmc<?MJNGn5-tTshtMdd@CRRdX?kf*K!!0^o3mg8HGPP
zh7j8GhlO9wMTsY3vgx2T+HaI#$Bsn_e6dm#?^GumGb==X0v|pvt(M4ay#WgfM&YjQ
zOYqd}HX^C}AL@^&7R%f0!_V`ch_}0}Wk<GD3fa2xSQwVf{0qjB=;hDZJNHk_KBJGQ
z{(j5mr;Ngz3112SHJoWK8ZW9kGKJiGZwSR|61eBuF0lW09sUYApI)D2QZ*CUyoa~N
zfgz?udEY9w;AcIHcXT90$}gE`uOyt95f4i@%wrL;7TB}vAo0xbCY4I7Am4F3Og>|Y
z_5oh#(`tevQlhcQzntjR3NGFKQ}K?5CLZ-P#to+|(5`))sM%6p%!1R%aR=f3*?pS)
zJ~tGOG!B56_K~b~n1MJe&>WJL!_Z)}0nRCEAb~xxBClJ*{NB(M9_k6apJXSzm}`$_
zpDIXat~xQB&&8c;=SV?I5_=;RgptV^&^A63!(8)43hSoAyquNfwwk7JpYXw=CA-NT
z<3zNdVhueXVp*%NF7&nQF-!fO%uMYBJ0BJVriB@-<jXatAoLLw+I83sJ5~J1rZciC
zlcd$!VZ^>#vP?;tt~uDprpr3BV{U(Nv~DsTA6f^$ZyCUiCVxzS9*eTIgJDs91MHir
z#4n!Z*w>s32g2>Bhk5{CopFO|hiH)vqH?%emMhNB7)kw)C(~sz8F=)*7nTgD!6Gvo
zR_Y!vN?frR3>D50m*vsuyww!cqclJ@F`bd-Tf}Q(1@qZ^oJ}abPX1QzWCza<g2uji
zFrs5Q_Il^B_M|N&TV*B`gt$ZLj3BTqpN*dXSrOU8{)A5S6nf%Y*(k}qIDOwYGzv)P
zqZ_-xa?Lgfa_xp$g9g$Jlfl$RZxBB^_dl@x@Cno&9)v5>r(jD-ruc|-Eq?zPFJwal
z$SH++7^*#%|6X5?&ulYLVfj!D>6!s?r{(CJi~53FC!2k2Y$q~zLhz<A$4u^tMV|vg
zKFn|-#*Za7y2(i_YtAbYbyn!T&RM`@l@hSjN|{WtW5n#MkPkUwhCL5<k*EI7pniKU
z=x&(LBC2~>{6im7?xX}hBRj-5{+YsD%L;bn;urDU=CwGde*$WGXOiIb-6&ss8O1L7
zxNXWFVtr78ZP(nx!p8l=lkWzhr;5Pu^v{R4rHe_%c{71G;YucIA7No#qeYdrgK>ZE
zUUA{se^~L+1x%(GLZ_o5ESq{6hifgPXGaCW7MD2q?=vUsk4)ohGO{2vbtrD0BSi}f
zCeUN4CoyiLGaZ~J<WVfnpknPN+~vnX?!s_3|Mh4-CipbO%*uw3EoOA_w7sz7paJ%X
z^l{po2K>>qjvuV{g!Z#KbbI(ZIG(?n9N8#EuX>!~pEjPuCOP1K>M5{ikw1HIz=><0
zo6KWw#-PTL8K`}H6i$pXhFh;s(<kre^4aP6JU-zjzUbLaml_%3baPW2Uv?aaD7S(6
z&wOY;TtwI0IY3S2MfB<yFP7|h7GfUcaC?>0+(Ij!>R-E!F}ak!lgWWq-{;dm?`p~1
zW$FBz-bk9OF_5Nj9?chi%*9P*!zirK<=tn(!TIwX>~@bPb?2h7{dgeW-!=g%x_%L^
zWP!ge2Vn8+VzN0=aLol7120Qsmi{{MhB$)x-W}}x`9kL7YX`Ni(QrGdm=*SU2<vuf
z!RKd(%J~x^+&K=nX8Q2=VGH5k3mf<mb_5poZ3V5kMz*4)f`vuv;M^aZu*&TXC_MB9
zqXK#QC`KIyeR8F~>keQ{iWc6UT!ZJ*5RQLn$Mg|b$vd$IA6{b#E*JFq)r;2rq1g_U
zRDT1K*A9^Cn0(P=<9-O3yg+cd=fLtM?l9ipiW+>>0QY(6#AWmo@;pYE!=BTI@_XlS
z2jOC;-NhKKuEZ_ZU4T%RqgWcHMkAF?socEV*dH{WR_KOP>GA78lRUv8#=_dgYb72#
z>j#Gdgn4vT9qx6$3D1=(@awr{Xdv?9hZK$y-IhSn+(pS;{CNcMTj|*5Dflv9E+au2
z572VLOLl$nR#DKxzgV&O2f5|)5?1Rx#Pi`<+@+@t7pt1n_#LfGwyPc5OrGI<^FEws
zmWBdJhD)le^Vdt2Vg0!bcGp6S{5)hsYc?Fljcq0zuBC|cE?l<Jwz`MQM;=1)?Q|SD
zatQSbKFvckU8vEe!L-`|VOf(L?{2!p#>|zXUnl#so#oOzWn2cnvlvfbP8iOEt*YUW
zKZh~3T21gpjttt`fL_DX`Ag*{Q0dOXee06={<M7BeSHYOeBv~{Ggek`@1=1m?WcJ4
zk~dVvETqR`Jm|mzX_(oT%`a9y!LsS0JnPSTIzLT=W<9%3-JD!$$m_|CORsLEg?5|x
zn_E8k;)*sMC|?4@HQ$Q!?<WfEzBY1p?stJPmkcqT@^om>Ec%viqibvv;CGs^*D(#j
zhHLYv&e#?rRi!`;im%iC$`N#QiIA&PT1W?K?V^6ASxmk(m?}JU;C<gs>CR)r`PXN{
z`tgA<n>dyQ@2mB}wpfOLz9ECpjPqIPu|MKx-G;ntRT|h-Pp2g#S6IJZIEALUNWjne
zGx7V3F7o=RCjU>e7na{m0a^bX!ngO3$Ko3###&$j*{<L>7fR8K-ov>Qabnr6RY>>8
zaP{9uz<fwOs(;JI*QG@yC~g|t|4*5`>j~kfjMaHDG3O)X$B12YjD*h{6xT0a%ZgnB
z=*K)c-e;!|(^4{E>Fs#j?X`)Hsq=z+uj}CZT7c(WTiF5ovv6{JG%wki&YZs3kkoIp
zXlU#T(bW|LY0Y0#{-0+zCOfZ$vD1FAlI#^Y&2$%ZwdM00zkfi|0&nq>gMxG8_6%M#
zZY-C3tH4_aTG8>}WVwFJPpoR&i|%hEsM~IJz_J)Jc3};!(3PcQzOBZ*$7jXeKSqd*
z!p3p_OOt;J+QZ#%&!sV9TjICPmOefB8P{x*ZrrJo%?3{^09AbrNGNpTVO5jqLZ5I5
z+_*~U?=(?IPYs&qdXld0?ZT!OQ~IO$BzBcQ7u~q$ffWJc`8uiDv_4=sT$v{@n0&;<
z`+yXW(Z9@`kBHDx(}8y^e<{q65}={Knf$f60q5rJW$6J&8XsJrD4O?LxG%p7ASvy}
z@ZzNy-aZ%D{(I%<rb1=#T4VyE*}_?(OD#ESmJiwCLcjE`1p6~$FWEaa(nfoiDlXaR
zNpgRlLACU9^78Fz_Ghd!bCZsRdCzi5o84Px>L!P!X{jt@?pE}f^n?u>Jr<oW*O6tr
z2MKJp8!Yg{IGk}Ifw=B^MNF4mA#o3doOg*Kmir#ZTN-nq`hy*Krw=FV1P0-J_cHRR
zxDva|_ku>^0@1`xPsoF%M(8r>2f06^4sni9lk`0g0YS>_?S>g7(!GlaFh;Uk<r2B*
zUBW7i_ObQ0`^cP_JmJjXD2ehkVWnwK?Dzf{F#9@;G$_r5RUW6<gyJQ5^TapiA@G@7
zyi_r((3|zVs3bw((((PEEO@T9l4<mZ3p1@t<eH-`hS&UHxn4}%+cE=s&!rQEd5@VZ
zeMQ1|t-#t<yGhsc1QOLDc(vAVB)WfJv-Wk7q;cacC`zv-0cKm_@O}@x{O1)jma@f4
znKdkb<^fjwa5(I~<^tE^;(?#YM8mDo#PE(BDC_(XUzNATV^17e#SbqqoVpR}=9a)l
zX9pa1KN8kEOor1dPl;|SX5v1JaRSHG2R3P`q1C8gcvd$XUQJblNpk|pal1NprPmKK
z@A$zO_kp-wd;ry^)`5=ZV|=_n0h^Ab!PCPfqGeV4*t=3?=#;k=J$jTVzG-;?bV6gW
z{>}z)ADO@(9o2^UW=WtLTrPTi@G-eEXd^wp_7Pee_u!EAfqbs%DB4rAT$tnd(O$n6
zvE-%k5c)erv@TC@ZU!EJR<Q)SjZ&v7uUBDyOgzLtj1{@ZErwikLc+eUVHcjS#~D+E
z8Lfx4ke}HI?lb+s?)+x9aPStK;gZjqweE|rg?d5JlT6TU%z*R0A=qtso|x&?i2YtD
zz>nGlBGINH3jA}F1l=45IwJ=1;L3$y`SBed-ujk}HG2y)mTS-lZX$lX?+PsbdIUZ8
zx^ta>5_G<&C;7yd;b&QIo<2|k=X(Y6fjj`)CwX9`P79M9atw1jp5nBhqv=tpSiBK6
zhR4=jgb9{g;mFM)U^O=mbUFi|=Z_niVBblyEH}WgZyWH)B7LZweNk+mRlts&oyM+N
zszB{mOR)9}Li^A$pt!^vRAp5##Ki&rs-F^!!Xcnn+rnNNOXKe;8=?KkW0LyJ6Fp)h
zVOR78$eW&lF+Y3Qm)Ganl^d^cX!mgbeQG!EcxVBZ2PiHmo&eK|@3Jd1H!xb&K{mI<
zL7w{#m~lEwG+^Q-wmo$L`+SM9izg-dmNiQ-_Hw)Eb7MOBY-kUOO7_&~<TdnNwTl0A
z^Q0F{R)UI}AO9`A17?YD;DC}GZloDbTwx8LRh|g{gr4JtorbVBstna84a59aX<nt-
z1o`LXXzh`kqBdJ2s%u=pVD<!TNPWS6ZCJ|XCn(VRY#E+h>_DHcD&ekVJNoYy&LF1^
zr#_!1(3F|F{DSH~{Ir|%k(qWh`(hM*Y3a=W?8t)&)fN2v5`Q|fM!1$M18`MO2OLVW
z1K-yix`N~2`>81YE`ZRwM+0!u-4Q&l^%2a^Z6v<7)kJ#YbS|B;lexT}#ltl?(Y&>T
z-YZ;0Z!9W+f{T_Uu|k{flPUv`%zoq}UlGS~)8PAxi>NT`BB>EOlIP8myumk*Z?Aod
z|8~n_X<M53jjk#v)J8C!mUYk-kOfmbkFwip!a8wVCMfK&#H!qjq*$kt*>7_NE6gLM
zDKVnlE5>kp+B>3fqlJ{Jd6U*f|CpxtLNxQufCg(*Fw4sk#qW-=vDloCx@(gmW9~6<
z(pkcms^7!;v&O@THMhhMX9j?su>Sivw+ODj(4wDG;z3K;Q(IRfvC*tXt?pp-UJ%MU
ze0*Tz*9vq>+JQ>)i*WLQOU&-U7}Wi|0q?rhVexMTswVJUwEODOwey}ZBiR8v{<M%#
zCY(S%IgDGZ`(Vf4XGE#xG92DkiTOjN`I?|<<n%x@IKEI7=RRr1HCLDNSHJ&*kl|zb
zISWas+LOa8S486cB?s}g;&k@mjTC|DHKKFB>tW29n`Fzb0=PFNoq0t|;gZL4^j7Fh
zl4rG>)hhj9OXlCfmy2CV<GZ({+bl!ipZp{7(&khw3Wt5aDj;aCFz2$f;G5T6h2itx
z0vw7bt*4dnepe+giOi;xCO>B{b{2tDiIcc)T^BQ6GKu@hO7VGd(tMVJJbaw|kaWi0
z#_WQ(I5tp*rk*;;XNOPao!M7FvdWj+e$oMjAHi&-%?=(>xqy!z=mJvUK???tqM1EQ
zXv8~XTI4lHJW-+wG%T0UwY#nOmTR);By$;rwk$$q0<RK0-K*UtsdDNw?EW=^&V3EE
z>#HT-f2kZ_8<}9z%m6x!493(?j!<H(M{*_%;`{O^@Lv-LatW)AbmaSGyxz=TILmgS
zCf&36uKK++Y|JA1`^*7$+O33EI0uXQiFAI`q6)7?$#AKZcjV@kGJs#*VE%M1moClX
ztA_o7?gOE`Z}}PU_Ne9JBpdo<gb#i^Fb&VEpNDeuoA8eq@__>jG3&%%=wDgEcAPh-
zS7W4T)Wd%9XHcZE%U4jdWwW5Q2pb2_UV}5M#jrr+LVYGx!<p1<NNHi1n3=%6XN;l0
zR~`q0hHd=bwhegpd#y0<G2u1#ABjncF%Nj{$D^+g<#HXnvCmhP7M>ePFFNYd^shUy
zPEQr&_XYFf22)!6I|vH;3t4^ZRBjiQ0*kE^;N<tWxM9B<f37792Z|=b4<i{mW`-$#
zo|;6Qn*8AEvsrx66B*Hy#znL<?hP6BFPMg%Ga(<N1-5YgWqis<am_wY?zeL>@4h$^
zmIw}&=a##<oyBNsa&7>|Puu}6f;3RaIS00Bv|(R}8Gz|}tb4K@oaiDrGT4Z_?l}r;
z8VbNL;1M>#Gz!x@Nc6aBUgo9?!N+^iYp#j7_vJ%4Dx*eQ>oZ`zFlPzwI?U`YoD}c&
zRHr$w*MsfoNP1YJ2h?|L5j@1zV(nq(IM?7j#t5BdFS3trDARx|k5y=!Y$HgHcEe#)
z{J7_^1@z0*Y#MmkkIFmyK)CciHraOoIyGnUKZOISXIGEN$B8nHvHjqB_b^Bv`^aMF
z1<~@vElgcH6Q^lCfzTr+;4*U}U)MPwORvj{J<Df+(jIAib7u?2A21Z>NBY2X{+U(v
z6pM-*)dUyuV`7*SO*+>LUXDJ&)js`<DC*~L7Ud=nSsQfGu-u;XL~TIQx(W1tIl(I7
zWk*WV;e^#SmLA;AaEBskxGDqL(w?~b{ZP<#{YGA;>9M)F&7y7At(b7c7q$v>Nt3YY
z%<arM9B{Isan+=g@TVk;#hi-4%v(>`%TE9c7fm30U1y^9pKgJfUkSP|w~*G_>*C#6
zS7BA`7P7de3VP2jWa9_kB8e~6FjHX%z`=iP^_+)n={`@ARg(w<qWVOxukYaWiaOGv
zHiG?glY#!7_n~&|4E)gQj1e*`p^40ed4H79MEMvFY)yyUvob7r|7Oy?(3DA?OJR=9
z8&Nhf6YO>^1+mR4G|Rgy3dldkhFa|<KTI|<pM_H3H8l?VLW`K<K~IQMP-S*YZ`m03
zH!#JUzQE7fLHs|LMrAdzSmMMhn}FGA7%Aj9b<H%y4W;Ewy&?%LeKz1?{bY1MxDb-f
z#tJgMGvx1qTP(t4Bd~`VOrduk>QCGN=SPLJWzQTi!!Zq1mGhatmKNCUUjkRnFB7{H
z39N<;z}yor$aEpU7QZ7w9M$;(!gmjWS?1ed(8@0MY}6xiDB&>s{nQm@uB)ShX(kz>
zW(^CTgh9{EZ|tbkT{17%6)JxZ6`1ea*yC^{?USF0CJq}*TtW`R$(~l+osvNcuFZ!R
zcTHhV+XqHdrLfbwkSw_9gLd;Lh%5Flg^Rn)A?9Nc#FhPH21TY&^NiwFFG<uLwn|hr
zegIUBelNN`C<>B?>EXeZ!@(jV9c%Y#vTM;(ad~qhC`K!R>+6}UHsCV*IOecOb(rwK
zOfF=#qcgC_yn`fm>*LE`Mi}Te17a$>i0aoSR(T_iJQC)y@ds_8&!T`hZ=MP5l6EBU
zhXjVbUd=RZouRVLj!ga&h+U6{qQpld>w{kfPUU7PI3J$@?#f?bw38Qhos}aSch$il
zBf&wte>Ac`inMOFEgu#U!z<)uaQ=!+9A$oh4JfT4MT0(LfX8UmEVYF9CtHYL>PXx+
zt%ma!RrGA0iL2@@K{iW;2XrRDq}9G+-OjUY%33=p^>{1J_OORSOL>%QPKKb>5^%!o
zAG?;QA^0bREMTq!6s_qZ7&r&@|Jn&Y?r-Gc#!)0hVIB<oz8ZD6%E0};Nw{k9ap8Sw
zfnnhW=qKc|yqr_m)|?gi==@eRyjDW8_br3ZQ!3bBj}<84X#@pIe(<}%p8cKvj(xPs
zMB^4kIQGqv-Mlam9xXM7jJ#c}dwc}6m!yb)v}wSa<jrtwxrkj{vyk=YB;v4p321nE
zIeR&<7#fql;HFWPFyFZtC(4clhg)X^md;NyLuUs+{V;*su2sQ<cmLx2=wketzn}d1
z^Bz@-RD|rFDw<EAbR0<FqH@MpJbDLv)?2eB;UV}lq7rusY>uUd$uMkg6xn7pfGS#N
z!{^hKK6&&V_D*QP=F)sPkh_cO-BY6#;{-4Gk-_-uZ48ZDn9Gvy?z3sS)XeUe_Oc<4
zQuN-%BsyYq7+3rzFp>RC+1L4#`Lk<z)bWZQKj`<9X1qNF6Xe6`&-X1j16;Uxk`~`C
zl}NKRBso_!0SU1ipKaC%i^|4>*wv3|8+8hQe?GNK^`|-KrNLQc7TC0>qDY-W;H&^X
zNTWmOTuIW0_au1SVJEt7)1aonu`=SSRWo7eqZG(~Si*`Rh<whngVb%ap!Ay1>0W07
zt>y&6Vk*h8UBfYc^(KhEvJBq*o`!Z0Zizd4Z1BzQb8L4-Jd1yIM-&w+T-O{MR`PEl
z1pH?K5muh~IeZ6G%DV`OM+U*a{u{(s=_s_VV?^g@Jt@&^hnx{J_^Cg!@cD`q4_<!?
zc6*Fq)jLO^$m*2%%Dz%j{_fpuv)mByoje*G1)rPuEMtr}eabG+t-~)bR4}vj1qRRB
zKqS9a;n!996b0wQ$+n&|lHqYsZnuu#3>ib`kKQBN{&+J!y;hF-gHH2y9di&DyO906
z+OTNh8S-$mGcUDJqMvhhnA?w)c+b)pe8#>Mxoy=4(eStEFIR?6eY^Olv^4lID~Ug?
z-9Z$cO2mF%2K3l49oiP1D0=2p4g;pI;%;60$fSb+TOzLGzT;&uYwZNO=CD5vh*joG
z60XCdJ3H9O<Icj~PJ`EVXhJz#4daR@!T4pRXc!X>9xH&}e>;j^-+YSQa0}zL$%2=$
z;5@JUHIgd%AEleu$H0$E>D=K~GB4|W1P-$|L)-b8^o@cS*o@_H{>yPJx)V;f`JN`r
z&zK1e+fwXa$N9RREqvmrxjc@pgwGE$c(VO4x@c1%&g-n<4?3KAA1UO@AEj`}f0-av
zm4+(IrD1_V4<9=3J2@^M%3DSVIa?PguCk(pe-RyHx!L;U(VcFX@i3P<HaXJ_g+N%6
zpANG3v)G~O<(%3Np?JN7=I`Im6T^Sd()vks+xuL)VZ%(S_v9HKZkkL}o+z^duEM7E
z+M=4zX?nS>qVf2sJSx_ArP6;_(Klbt;pC#Fd_(dhzIA?*cw^)TNJ{rZQP}|Q5LN<L
zy`spmS*z))^l0ujp@uFgoyi}+4#$czQ)>EbB#(XBjEj7#$h$=g#p7o0hBPMR;5I!)
zW&Nitxu_l|u3tuvmF^eSd)v_yqm+ei{A*(Ubvz#$-vBqKz7_V)x5OqlGN{A;46v`E
z^vJoN&?XfKp+7!ifMqZ=Uq8i1@87}V%6G%dVja{PA>=fN6k^XPd3t71BC$%=pj-2U
zVX*dVqOseE-yZjn%P$$(v?Od4@Ae%;i*nB4HLdq(>pU7_bS7|TnYYYj{V8H?wH;q7
zwTl}sdh<OO{($MfbYULLpkde~elocL_BI>C$m{0Rc-K^FWZ#DG!w10UMHR5&+(g<e
zir|B)Ou+K{V^ql<iuwmvh~ySU;dawzG|p`2X<gQM;Y%|Mi7=##rZ1%zmgLeY#+q!d
z>2#J<@&QM4d#bPq(2Kd?k((ak?c3(lE9zm~Kr0!Pv|HI0-{->f=s**@RjHw_91Sfx
zigGvq;=#%+o>XYad%E|DZW+q(8%-0b@3V2VtK|mlHA)iwlN(4veEs<H(BU|~ekd8U
zWDW*i$fO}hU$fcPDG*yefNPaHz;T(+P+Oftj@K#C2T!I_|Ji!f_56PrGu)ik+^uD2
z<q}}4zdRY;FZ3Kwxk0>Enn)ry@665FBXG?dHEi}h-6*@y0z&Q&hJxo>Xy3U3bKeeR
zzpq!56<&VW@;L$HTi!QDw+m+(4RYvr`#X~@G$x9x1P<(wg(Ul}Eh-+@!c`N}MULfj
z8Xr|B-~p{OWX3sTyfyASyRH0BJm>RvynS;t(Jp$xEH=gB&soyY_i_kESl=Ul7E911
zJCjVuUNoM!l}JDH!C%`QQ98F7u4t!$$3kCJ7|vO?(0~1HJrv42{;;?cv%t%L3cA1W
zgFU;9(QHpX(><pto-yqh2`f8Ebf&I_d||<(_#~fo{`^ULo2_y1rd6o<r(HZuehE5u
zJ4015lJSp62%QcB-yT|F)t40*)h|c-zP1T`K5sI^Xc5NmY$axCFPN8?CQ8UTh|b>y
zFe_IBw!#yq_6cy6(;b8f&Vr6sbByo!EcV*01;zGCux9WvVtRiQ5=U8f{e>D{yQG7P
z-FmP!PDfy4i%>b`xmc!09=hIUfT!Ub=CxoVzR?~A`KMlzr{l&z##mj*yD$LGYn#LD
z&}a;tuSQ&JipczBb5U>4VvH{m+r-y~5UW>xq%=Jah8^34OFPDb!hfOg*?K#f|2Gq<
z<ONYguPnGNNr9*;KlUoMoJbQh^2Kg0szzmENB%}!cu1ZN33b4Cf5PC-`QPkIycS+7
znu)od4{@4kBRloT3yu`&z=Fppek6S#rffXSZryEQOZ)G$EU#1yRt#cC8r@NLIsvnZ
zdql(4(!goNOBS`th+KRq5B-)d@I%gxbh(8==QDSr>|94?XdWYGJL|<mgWd?fTSsx9
z*<*75*f0n_WC&BVmSd?nQB*bEhiMA#(H~t2P%LE&0ncRdd7cFxzPu1m1ZKjWfAQEj
zSr=dKP6Hi_5zxGEjkx!gkoC%NWZkPKi=xY9!E%Z=NG(f*vA1OLV%;|;<(`gdHj%hh
z|B^UY+m2kw7d(Gy-*E1SR}gyLgR9UM)OlMBzrq`E$ZKW%WqUz1Q)V$+cCs4uZzRC6
ziL#J#Q;9FVFrJ)98clyaR-^Bl<oP<iGXB2wq}bq-CLel7kG>Anpr6iVv#V~d+&a<*
zZfcb?PpPdSmt%)|_Rbg;k_^q+xg^)t4my*T6T7;mMyG<Mq`fdp?EfeY->yi*?(DJR
zxse;NxNZYPbpK>d=7Y({1-nG=61=f+U;s*#EQ7G_U1X7pA7)2KVNCfyCgJu(v|K3`
z+xv>iqUj0f+<TctH3qYl!HLLT93c8DqCiI}4V~}4XH}PFNcILH*YHLaJh#b1yn7Zp
zP1hq57x&xzzOaWZT``zRgk5hq(QS<lGEFS_vkG+Gw1B6#22qzjZ7!df!5ZzQ>GsTl
zIMb#81B*VQrpbP0ch?spC+vpyrLE-AsTQnu_JnxhIW50$O1q}chtRKv{MWS<R4=yU
z+gkb|;h`Qpkh_gv_Q>+E`gXE<`T?{GItuFVs^D*LFs!_8E822+AdHVG#vvyG4=%{3
zUAM)sx$Qf<>8Ami?&+e}`#H{ky`JBS)8etA8$mIk1AEp*!KWsDoN}%S9y{*hscCgQ
zf&s0mpnSxm0BX=130EDKQS2B^{TDhxgv@cSveJW2i9yKDf6vdX9wxB#fGbwZ@NVTQ
zK4eN6%KsP)r4@SYn1%~>?>I01+c}pX(N{*V-pWQvQzXk1+@MX;s?ljh9dA1za61Rg
zg$Wr$A?sNbNa&4eywk0OJ%{69UWAw6HW-VOveYq9WX!hqYmpP)GGMl~MO1p(g#-v$
zo)E!R6*oc)e%(;OlnH_x-2RS@SC%|}oO6+lUN#T=XX=A%%_&SNJqXlI2@l-#L_dew
z{NtsqsFJcBD+<HO$(;lFF@sTP;FFErpPs_%#aFP#crxJFljMWXaQ4r3jd*n45*E8+
z7M@%zc;}UjdB&srY?H(QJbh6Gd73)rXGw=_SR1xU__;J<Q`oZn9r;-UmQTq~-<p>1
zm2c=WFiVo;ySuwP{}Uen`^`lnB0tE`MOye8?xGcupCvgxKSKE3tYKkcp%a&chD0t}
z8L~2B<>ECHgH|qHpC7DdSg=&${~j&9zu3rmV{mH1b)iN$6ywNDl(G!QEsj>~+7fF_
zoVbGx*}Dk!9_+!W^lQwtDH7F>PQ#v46S05Md-hDT2n`JXvL|o6Sm2{|SRP2xG-M}R
zlq4zEew0UOTN}#{7A`9#KX~N-UX;rJzbLbbtCxj`EL-!x7xdis|1N0dt`;&y^oA7P
zd`5c5>+<JPF7Pf$1P`tWHQDzPjrLE6VX=S#_p1wq`ZaZEWjK_ae%VRJ{d1+A<tNzH
zLFeG-`n~w4uNwqRBY4kCg1T87#TDPRX!yg8IIm|BD14eh6&=*5Ly0+GIs85;z9K;-
z+=FOIq7M(-+0U-cyvp`>xsnZ1t9Xgg6wqqefhscfFzJXsj8`d#znyZ8Dc%W~ZXC&H
zY0IFa-zfowv5DqBx5fjLuE3TgE$;tSm1-J|;ZYR<JW|U+<TqWOFIuBc$1Cdae<xP)
zRIx1Gy~T@^#ror{dJj&$hf#x_3Us2G4gC>jK+D!|$9Xw+e8Uw#jIWf$Z7bri-f0SL
ztQMh^i3GQ3wWAw@+QBzA30q9Yk!>?iV)&^+bY0#A(EE~t?XymZRgXom+cA3lLQ(|O
zzRzVZN7t|^5wF<LOPQ$bs}5I0wzT7eD$#pvMqlsphel!9Y(F5Iw{#z77edCsHU6F@
zUhl&CxKbGQ&WSgKFM!)44IrrFmDSF#gTUraHs*ZN0<WKl>8q8gk&Z1dvfIoz4x%*8
zR5)HgPy_!)T@~xt-^a@d|5#bLEuVU9H_mI!0&7VnxNu&dx(=gMX`nRkm7a^r8PYs`
znJkQ5(~NiSZGddKYFsXCt0LAOBI(7cB&qraoK$VX8+o3{t{;OH-x8R^*D+|&L--nT
zE{hM@PvZT}sDwBOe}8nr3m2pXd`lI(IY*hbns`C(<iny;zpd!+w+p;&2II6TH*DV2
zNzlm-Ueus{DqXpH0NpF#EC!Ad)bH2SNEA`0BNiUVOKYY0s+ZbaFIEBj@29Z;#>xsG
zO?%<hiZ%G9WiZ}bdqp(*{ZQH!V9oV*T8XQ-J!j;3INlr+C;FlypxSB%!1&P$H0AxG
z#_hjUxj66zG|nH+|5ZA|#nvu(KiHpZuS*x|&|_&v_I$eINdx4}SD+Pc0!;0p6UumG
zqWz_p;$ez|d9I}tSK4gO+g1yTyp_guVPUPHPWXxX|IMQG*LmjoL<^7G4B&|YNsu1o
zfV%$AaiF^@S4-Ijae4P}&o?`kHP{t~R@qazFhA(bZz6$nHR##xEf6+)2)^l_Kzm0f
zP=yCZfXz)X<<}pwZ_*&42DS`Zvleh^wF^ufxlX{x9fsCRMpXLUXBLzJqOKQ#Fy=&M
z<CtqRx#Wc<^j*FsA0)g+)|G6;`J<z`%=zaSE#!!bJN0SH>^!!8TPnWqCUnrdrF{CK
zhfFQe8t-ru)M`k@lNSH*j?pTjHJC$aju|}8*QN#!N-^lrel|(%EW5S1N5Eq*<RJ@Y
z;|;@}GvPY^{N$Yz@E~U&yHzDi_Sz;x#Lp!>CNT=nJkaMut`?z0kEZy`QlVZsWdd5<
zx1n=s9)D1N0wnJIVi)!2bKRgpT>iuzRGTBo=WH2^YujXK!<7`s+reO9nKu{?JdE#0
zDDsgK^1Ni~LzsS4nQu5FPkzoR#dXc{FkAT$C{MX8j9uQbU+V~el{SY23(C*Sxyd-Z
zbq;u37)Kk!XR(@b57GB>AxY?1iKlZ<3rq0pqRzMl<W==SqMg2%G({llKlZ1#{%N>w
zgfK2p$VBY7qGM)m!~?OvAie)3Ia`t@QdD?^A^ml*`=%aLtI5)Y-{rh=dIrSYEN076
z=fLq=1z31-DE}LG6{BnJqg=2zFMT_L95t7u@~sgx+o2GiiO2ER`EuNa58?Cf>(SP{
zAbR%cO5!pjmyNMZ7O%VTKs@NMHV^L`$FHrKCkli#8c_5e>+)Lh$L595Z8nQ6QBuW!
zTP?Wb95+_=ZUe44{1fIDS)<`mMfyVNy7=I`UFh(i4PE-ylc`*xuu3nNUB0!FTc|nk
zWB~!|+mM6r8U(!SJ6m+EmErz&r<uCVSmZ4y;jrN&bV|I<0uM>j%h|UYhiwWHYT<*Z
z=fA1+vg>3hnD|t*qGBBXX4{Lcch?A-BLbFqNc3f!0`_bwwwe0e8~!ZJ#1=tmqO0r4
z58E4Z_cKHJojYD!^XW)9zwQj_>r$b&7dQzTwXLXr*@!-!e;z}|?#9-A4qW&|M2SN?
zM5iOqlb;=0bZXmHh_f-F4Y||!^t@h}<ZH^VCt1*6BSiRRVFG<Ib_DOZKzQow04Vt7
ziS?0B$hLn%y!{V__*=K(e&86We7%;<5*72Ia)n^G^dHXps!ZQUX5q|xe}sFi2L7xl
zVy<60U{d;9R(h=hYzI1WuZ~d=*4u;j%?UVmbu4MH9n2@W9A(x2kD~Jor1Jg4xK&2T
zmKm}_QlvP~_fAEKG$<8?q@_V=NA`-cOPPt3q(R1czIPD~g-S{Kl@WysEorF#^M7?-
zd2u}Fx$p0|uFn-Y1j#7^_>YS=FAyn!eZqR=syZJl_qH-(hu*^FucqX=S3cAIy9#&M
zSr7}0Ma0rRo%dza08W1XoJlIuAWeU~AXw=$9(0*N)4rdiHHkyG!=8_?)Ng{f{&~z8
zWFWD9F0IhtiT5P`!xnl8A1Pb00cw;jyLbW%N+{en;1Gu2V<7ieChV5bCzd%^*|UL@
z>CEegSP-fLkyruT=f8?=i*CbZH{#&v_hn@Bjs_5Q3Bxz9zOwJSeAs348PwX;4%g>*
zqfNd8-qx*PwDg{X`j%w~dw0<+DhCDpwb-cG$264=qm4G@SMKI4c5{}H39{m}b&Du5
zJD^9El0|Wv%3C;So`(TFv-qr-Eh-t0!9r&-8usKhwAjxjV=E@mS6Li}RP+-6)x|Jo
zEqCuWT5FK?u}ZW>U7AVk7AB6HYw*Jf#EAzkK<>yTY*-it!!8Ao7n8v3{vL{=6NRCy
z{403)FJRscs-m!NE`~?+a5nEyuqbGUi13LxWAqi1uJjOt-EH|zrv&h(XDREa$9+1M
zqM6kTNWs0hHUA^jd+mS;N)t)8Z#lLYJ!Cqr_u=u=OZXEOQrx&|H|X7I$HccA>1c-%
zIpi>xHVNC)#Ke1et!)cCYvC~}?{paBjIC-yY7F?f>{Irrg$xyQYDfFSLpUBZlVqr^
zAX*c;*#}j%V4ArER_&Lk3nuH3?Jrjmoh>=m-%EDEm9c8LxXPAIGfZO3W3;J*jT^h+
zlN6ZzIf*`dT!{BTEi7uugYT>)(JL1rZm!eln4uf3(m6t1_e*mto&dbQH62%E9bqqJ
z7r-;^Q%vlT7y0!fmv1=y5yW97N(5w81?FgRYrhZ7iDO*!`NU#aqAy2}%07aQcWTTd
zb1`D@wT$R>FQg59T{t|%(%~dy{O_?g&1MFyvu2o4-JH4P>2k^(daOzH)Xp#q?c?#?
zs%Px`!GoABr-P55<ufxY2GMX;EH%-Kfz93fXwPFMDsit8ZYK#7A$u*HurdoH9$v<&
zBV|ya*30~SD1ePh=i$PiYjDpX9*)KLvzLrEfYxj$WMaJVeTF#s>U<XhxOkb7(KMW_
zoKLOV&1sP92qs;>%M1TLm!|KUPIV)0GmV^dmA@ti$6F1F+B#1DHZKzI+{?!$f*W|5
z9Lm*vz5seSyR(NrNfFNl6vp=H(Nu0F<XL|N3(^#r7hKzQe)}3K)8Rv2%dG*v$0Pg_
zR*TI&Gl)#xIp9L%uzC7^rlV{zS#R%ZZTfNujm(D8Ip-ekYE+{ZVtQoBt&i5Tq#k3(
z4<G9Pwi~;~a_IJ@mslfpVe%wQ8Ao53(9_2a;4-9v(#2yqEw&X~s;qE)cPLIhS_B{c
zyjjO7D!jp-M0oC&$VI1T<E@@l2#9`xOs5*VbnzCZB~X{B9vuabGb+&TImoTLzQVK%
zDJ0=B#rCO2bolvf_!4&$i{1*-1JzAnE|P}VF6l7cJC2c|L)ozPdkZu~l>k+cA};rz
zvGX=v!WA#NaDL=AIPwdbCo_#<Sg{R!`kkm*>=I}?IhS?{#p8SNeQ4>hn7kg_L$636
zbF(OiIi1>#rrg=Kqm$2^DX=4&M&0;oRU;(Hr}9|IStPk#oa)$ipu*=qPHK7wFMN+D
z4~L8JN@FD>ZMBe^eNMtPRbe0=C=3QEGUVFH5b|qIHEdPeijnee&@+1v4SK$Y+G<r}
zd+<XxS6GMihNQ54E`F$Pu1MrWr*L(_bBO3YjlnW&h`-__I#>J~y18rsxdoC$RUjSQ
z55&P%kN3DoYc-p{-3FC-*YV?PT@wENFv7tnOvMIn4_tSgI=Wo8_KrRdwM*rQ@{VQ1
zWX3h@%eceT^(SDE@gd&8$Z4GT;soqI{DJAM(ZyMI<9H-o7h3*Jrp`hyncX`k6VWg^
z#{HiZ-~X%(*@H<CevTXQkDg#Q*Ofw>@&!EPbrwGrJ_AXu2XLS-4!mq@p|#ALo=lL$
z6VtfJgfA0GyU+&QFeRY6o~;6*J_5qeq_Mukh(^Xg$HTb_#7fwOR8Jabv<HOfF~t$E
zmR(P+r!FVE_!4kog(wwVEQmL?AK-hg|Ja{Co9Fw|n+9un(rd|=K=`%<op=8n<_UW;
z_Y63T@4k3?`p{?I(;dCI$mANUYI+3M?jxXJsX{AG-b9-pld#-hivH*-g!;F;F``@_
zuIbg`((7@|C;M@5+p>?AuYU<riV>`(Zw3@9okWqm8z35D%Q|*%W>+>e!&9-7y#7=N
zGFH;UnkyTEWTrmt9q)(N_4;(@WO3Tidx6R1@TZCy8sy2b9-Qg^nr|#9Mjw}-#q$Cd
zRQ~5-oa@vF6Vi3q^wVPaN%k}QdWbtiM@o4w%cs!|sd9wxX29}`_1V#rso;}miVJ_&
z@tqro;li&u^jpYMYRx+WgGX8+FySs2Q|S!h94<|?(U+6?tb)5YR>QTLE(|Da#kn8M
zsg#u%`LimRY_iqE$<Yh3Ub2jt5^xUQI!REOo5G-U;~Iy;S_7AM&!7kI7Sf8;!>p>H
zDEZH0HhD=RXyIN1D*oLMPuW+3OMd~{3GtYDc#@s-NP?HQ(GeqZp5WawHDZ@HABUQ~
z+3z<N(hFyA<Co^w>~FDt3~#m~MLPdLU>Vm_76_0>nbXM16()4Y&fRd@T979-Z3VTS
ztxeOP*24RCA=+%93GBsc3~7x92Yms0bX7jfMoW{S)8WKs)hVpE3C17N`b58I5EqKL
z(p&pPA<ABt|6i3W`CUDc!%<#<{Da&&FA8z5i)+oqJW(RuQOT2uv0*<<+lqcXE-E+5
z5~Exb@yKFL`cHc+{8spnbrm?x817dhZf^hKN&jQScjjYQH&Kz^yZ;>W%xCbQ7B5Ey
z7kB!6raGoZMUv?r>q(w#9;-e_0<&xL;9j&kb-l3{C#hYB5(ha{Z(L9A9oUAlo1)QO
zgTtfmKaW3GHZX_eL!mN8m|<5;WipG8utQ^_XuS0jyk!bOAXS-2O#OmKEk5DVkAl=H
zw1&}nzmZ9yHbm#!bvW9*giaZH0H+)Oa&fWyxF~=I=-d#4U+)=_sY35yUq=Ec9@#=4
zY}iO!b1$MtFZVakzM#X<W4L5AkG8w`(t9s%GE3Y&$o0xRSQ1&wZm-J2=boM5diW13
zxiF1aWxavUN=?Sq8=APsuo?E0rozJq{or7-gNwBI2ilu*nKe>v@Nn!Cj&r!j^9j$;
zJ-Pzs)V#qwB@r;ZHjN)WV9HdD4B>?*oou$DFcGky1Qk|(1ilQi<%99$=<4@;8*Kwv
zvLuq}d~XI4Jr10`Cl7B<{{#PRdJmg3tE!nbzN~cf5z^-?Ovftb5<#z6@~Pn~OtcpQ
zE%~po-&~7^h}Lp8m=$=GAC4bHm%zrD>-^JiN1$8b7|hu($=*mgifaV!faVH576+fg
zqJiriVzC@NH9j#O6BeQ2=nu?XasaBXwsR84m5jEXDfW)cAXe=x7G2)~_NDJ|=3fh3
zlrM(O|GFVkM}tgu;Pby}FQl_LjM<um8uWiq#cN*fLvvS#;;ny6>71N<uw=j&B-WgR
z1SN5*Sl~^QekEh9a~C%J+(CajAEvLqAHd~Xi-_RPH}Ga=G$f79q)Sq+Glmz!smDn*
z`gG?UR&Dqy4y-@M%nsiTb^USRW?zI`N<VX>y=?aI6*=Z{z8kfW{Dg9*mr?JK8LZlT
z0Q|Pfk^7uY_~~W^axyfUdhd<I%rOeGYKw{TUm^PQ`ctf$B}VciJkWfSANg;uBsFYY
zz(!AAK^7NYhK9f5^ryiM%=)E6YUfPBefzd?^73lt(Wei*Vcr&+FK3A%#s*Y+Uq0ll
z9%q8y{=*L<TiA#<6KF%*QTmRHsIdixC|8@~+Y5elebXx9dTgBk!B(0Y^~=+G-Aj;B
za0%zf3(>*Lm1ts}iP8txFbyFMOud)_S$bHB;>p>};$RipuCNt~zpWw$zXj-94*jgO
z^8yq<SEGl6H1N^YKw?X6Sl=!0p^c05IL%%~U#BjZm*EB1m(RhEuPhl0jn#DN!$Q<D
zh{aOQzO2>!7-NU+v23RV^gkKERgT|biTQ4FZ$=e+dzTEo_|O9r&mMy$@|lSi{*HS?
z>sc}Xat>u(g~ijC!P1BCp_W@OOzDyUJ>OJFkzR;{!nr*EBN|+^h6voBtjUbe>_E5o
zD$JiH?zlVE4D|}*VcukQIyg_AAFw5jF216UNp79k^On0?OOn7;dl+P%pJAXU6&p0y
zK$Ny5owe7VrrvP3K6Up4U-4NH9=vEw<r7t5ae53cvn*xuts^k(@JSS^OJhB!-$2#N
zf>_>F1MzuUxa{*z`tPhL-WsPU=IX@XdPg7r9Q+HL94Awc#%Q=HWdlBkj^aJPTFBeH
zoWANgiGSR*z{Io));ck4O!YQ;jrW25q~6Xfv|CI+wQ-UJ=Oj>&u!gpE>crq*C|xIM
z&$=gylcLmkW}}rWNjCRp&nTwTznr}y@p>CPb$yKPN&fiqxEchEXmc@!vF!G>IW$D)
zG}YpesOL(wL0ab$F8>n8i~41VX_a#1(NlABJUpDd5e&z%8M$Eo=ONsWh=*oRTin3m
zS4qHid{-$<`R+T|=#Me*-5`vXd%Mti&Pp^W_c<$ku>v3OYR3C`0SBxrU|)|o-L3Wn
zTsK=nj4O{!yI?`P6HilOc>`B8)G;fg-!MNeatO+Qsi3`wvyq=)O%`Q$!N0v5N%)jg
ztf+`8y!)j^r~W-j=ABiCs72!t7*Y#Kj<2j6);&P+XWdZf-Uy#+x!KO)P4ruzC^=MJ
zh+^COQ0|)snR!r%%<nwKTzFc5dxC#p%s&yTne>45+4h6$dz#qwo+qKs(tx%ZNx=TO
zV&tWcBC#=7zyK#V`g<EUH!wHhKe+J%mLC2C@gdyvPhL+&Pdbu6BFph|Rv|rd^*II~
zv!JW?Jpk#bcsAmqD@vWMhsCyM*;#f`Fxn?Zikfy~mGK;URhE-Ztat=f4p}f-##w16
z-T)^~T9P|olj3?~#%1zvymu-Ajz)?BwKk;I`^9NTPzBQ`B8Q{x>6r8>4o_!^@tx#C
zXy&8k@a?EQy1H*5v;Ab?Tt*|VmkcA%<hFs1ViG=972>|@GknNdP{b}hMG+Hw>Tv82
zZ22gPt9WWu{|+~YyVg@PPBU@uuNlN7PldF}Wx*Q%NQ`iefUb+BsOdGpUtOAulS+CZ
zr36{a^BKHZopl_7K!{3+BoltGIC*-&7jM4Fz!!=##9dE;-s@e>h_!eSA@iMZet#|e
zW+s!s=@NLF*6|NDR<gzq7ZZ`8Z01Yj5Su)AfNHLsK|Rm?!u5SpV45OJH|EQb$59$&
z$DkD5VR;dS_ayM;K9$3m%K#gmXG0DWU7YC=Hm{)i9h!=*z?od+C0S4liH~N1YV3Yy
z_UW(qX3|D_O`(|n{-Ojk)n*XCS*mnEF&MktCR5_C4%^P0Av0u#AX`X~9)B-OJ2qdz
z<m2T`6ZgD*B&U<f(S=m6d^=1}F{NyvAkX9F1Wd6E!+US8<3;ry^w_2~RIaoSEl<kO
zYTv17nWqRU=hxM=Ogj$v$eA)i7G!VaL@s)29<{nWmEJA70Z+e#vfDHU;pDdyR5?F{
zH7a_Kp;Hgjx)YWRboj!O776kv@DuiU<zVmQ7JfpI4OVYTf|>1t=pp7vqdEK0-sU2z
zldVEr(&9ng^#<^6sep`wKK%Oik-de*Y=&Di-aNDt+s7vm=8z?6Th~g{4?0l^6Dg27
z$DzvB2oafs+^nTJol%&46&y?aaowC?O8?5y#ZtR@lkb?(6I|Wz-g5`a4o)XoAAe)p
zZGR%^%$+HRUC1%k4L$0-Ntk94ky_sZDn1K9#NZ*WoY8LeV2Uoro6aMhFHFg_+!Ey1
zNO6&PZ&2FKkXZD4Kx$eIDEDQ9w2c+`EYTt&Rky&kbQ}J2Q6>*YPqFC@oP=%jS2%vU
z4n_)o;-vOT)FDp|!wg=*wHbRLRoRvLOr64)OliV<+FJ1TmnHlsR>F+^+fMrb+=hVT
z{cw7k4wERx#d#FP!ltip@vL(xp4;lizW3CoqQ=WPx5Z5i`}!GBHHzsfw?n&+&eYU2
zn(_N)0iOOvX#S}ki7_`zE}H}{CG+VGUI{K*w1gV=uEalcl<CHlK5Va5XEJkV(jwo3
z7|(=~_c;zYN%%iD{n!sIe9uYsr-_lMSS@nA-IQ)zx`^FmcNf;oodyFl6~R`!54sJ#
zc_yzyX+@0{=<qKx!UsG^aNHjz2d<#+j}d0M$4-3pw+rIK@57|(nZ!+=t4B%>;RIn*
zx>_!o6;{!rMx#>9VbvS#NSzG2#}~4XmBaY%9v`5m@q;zfu@O%^HzGyd;biZZM%a>h
z7OlPA*~gqEWZkeL{;T?iYXuz0uKDA*Je`vym<SRj$ud^sYC01a$MxdRTS0r*PmI6F
z*>If`!Hhg*O%!dJm=*hI%d1xS;A=wi!Y09?e`|<Y9pL(d8uZ_$#~kWYmtJ?2qP=4_
zBs%6gIPI^1iyr48;(-Dzc1d8i_;Co~oMe2vnmaEvFEHtG?Jy_d33Eku5hvNY3Iz}3
zp^UFX%LirYgtTNlvM?2YUfs;(YJP_mGdU^gLv2dzrHHSb6!rNlMQ?TLFyRNpU`}uw
z?n?Q=F1^K2&llt9qdcF9wOBxgq-I0MAwzPFZNSSHMR{ho-eA6uD^0$<2Ull@<C&pn
zEKgq;f>b&5KUYg+d}(EiUzWh$z5d*sPLy^yg|jLr0#J4344Ch76#tI^ma|T-4zvu1
zebWwdm=k}vGO-`l-<?SlISYZ{c3aw)SqS>PQaoR(M$Rpt1@U_X$PPyeU&~|(J3W=<
zYnT9Sn+=5ylS$9CFkCg_ifL2k5J!cMnhTBNoPB`H(!I6@K3`u6uMS5*R;>eBb5xyP
zdY6tGi|ipND1*=2HXqL})M6j{IMKM<dSqYy0(x(3Dpa~e;i-LP`00)?c_e-YoJ)ns
zhL{bkW9waZ8+RXU=g>P+9;Wm;vmftWQs!3Q;rt9C6}n1Zg{I2baTqrR{$H-PJ!hQ@
z>Puy~$i`FjqSj9Ka&ZWm^w|QPdzMinu_D$oZy}kx>plD9awnTG$q0>dmeW%MvXq|n
zW!^DWxaQnAt~wS8enWd`w^9Z-Pj{lP7ngxaK@@tltVOc634f)(U~V00z#~K(C!O*I
z&+ZCbZND1TElV+F#&I-p8-|Aab7-QZ0nynXPOHq4ShE`!(RQ~Hvu1`V{VM$dpFMht
z5@R2jvcvW`ep-*dS1(2XQwlhEUYG9QvW4VMQDoltIPe#-HP9#jf!`y&8$bWmK<TUE
zWSQA+IC@7Ex`Vnv-CKb~cP-$1r?xTy{>$hEzpF5C&j?T68o_Uy-m&rr98mKI%kEQX
zLuP6$j{2L?om$G|N`eekPP)Z7Z`Pm&afUboBGmtk4x7EJ0NMg-k^cbc-RxM1_FqSP
zyl1e}Me5iMDkpG(aXeVr-(i~^reM|O8fGSSL75FjxY*|)HvLl|_m#ql?6OB-x_K=o
z`48jKugc7vel9+yd>z?rWkq9OanZ+zM{volH_R^^4QgR~lK*Y68RC@taAMd))Z_X;
zhZ{9``(6#oG#zJ?EmuHo&}p*!@LJOMOp@O0NQR5KY1BzN0Ls3LQOhDOM)!dMv;WsR
zDt6U?c|Gid>ORw{ZS+m{L%cFsby%C&$?l?F4nM&{Q<aI^WdLj6OveYG&#~Jd5p)pw
zh%R0l^nUGT3=&xaFOG6Dn36Eux;lVtwF5LuOoW;*(@DcJ6JmE;2>xxn$n9kOaGqEo
zX>grLbkjLm$j*b*IcN%wP0^r_Utea2eFO3Ct8cuC4#wpEyB}y~Crz>h@_0kW;;4Dm
zk=}4J1*b#R7;$|fNeP%uyq23oPF)ea_mL;v3BeHgt{QF}o=9l12&64ZCe3$EL2^Kw
zd<*6A)xOwJvC(>{%(=rJEZsw%EiWR=gnjtC^;5VwTv4)?i{9L`e>&YNZba^wEP$BZ
zo7g=9L(H=svq{qscTV@2(kFkn@MZ4-Xlxl{?~rx;pR!x1kx>lwykSBGYGv@l<4(}(
zXh3F;Jhbg^fWO0wu|u#H|20X|r$+^GbZ#eWxxE>(-hTvFr5s#2+a9~_-{mzt4j{H$
z%*lz*Gw_v)1e`v0fw{8Y3^X^C@Jb{n(cXm*Vam$3nnPCkFg5cXWXXL7vpG|#*7zUH
zlM-Zf)h?l5P&w8u?_#!0A7?)r3p3K*$#BwmE3S@^qRm6G=&NN)KXJ9>KQDc{%fE%L
z9?wAM3+L&9IVo(&QXg7Y$<5B1mtkkobpVGPeD6%zPH_w1V782}*An)9;2&m(>OI`F
z^BtQR%vmbh+VRxES+w&aWxKwnV>~NG;=Z+jub(6_ifF=+o2JBHg1Zw0&*K9<1@`$Z
zE$S7>S-uLVL(SbP=Ag|UA{FzRzrcrkMk6n*^BccI*3M?8=~OecOghUZd&zN<r@PRk
zT+iFw;!EWE3xHW<Kx(Zn!{gCM7;!ZZ#%(V1&Fa!gPedIPWipAPZv^Z<;*N_B?_<xr
z8-`eWQ9RO^gx740Y5!I`lD6v;VwVBw81bRU#0ueDS0Rb46eMJ>G}Ygo%`VG52iu-_
z!sMLgMCt4iG@2F9?DFTVeoP|9JpPGqex-n=VJ|c>bBMhEX-s{RjJDU3a3?EG6kkeG
zXN}oZHIc(W%x?n)l?}9BzY1SvOH-dR8=~~)6jT=_VO^~S3F7Rr6S-K2$e;yK{MwvY
zcu~w}-!Kg%68g6^F|O=PvS3Fw(|S3CR(z<(t%;lr{p@Sl6e$Sy<Pfvuqb3PC@BxaY
zjKS+2vYu)S*&E&S;or&UnARSKw5|c>#|e-{hUw@n-U_J&>iA{94W*HaR1sV7Z>S>`
zlq^Kvlb!4w#~S46NfZD5W^~^>eR^0}orb?urwu<YVRu?9^6V|>$D0eN?G<<I-!X-F
zb9j`e&l+(9U!7h4<Sj2TUxIE3;&$CzElBqvWqi_>Lmn&&$LEz1SY27eGaLKF{Ikdg
zy~*?GN--&De9;1pGK9A0oX6~6A<T?1E*i|I4cn4i;K_h4*~c>^!}>2d8$lk^X&Or3
z%ZpHXrBE!Bb>fw3D3dnN@7B$Nv#7z)Y_ilTirvO7)_!lD3vYIB#^hP8&}+7c@~YMn
zJLe=6Gg$!-<68L+{v(W!M-4jWRDk_rWAc7OGqZl(7fjR?fax7FxQ^e;=6sn8^Zeqe
z_N^q=IB`1VT&&fWu{AKMrwlq*zh$>&+ytB7`|;v7J{o-1q<8xysQ!c#9Huc57AA5r
zQ3o@qSc@xtv`GhES8C#}QC+HBAV;;AMPkUwQl413Cyf}fpb0C=tbDHbLBe1$xNYQi
z2(Jj{x$Z!XJ9}#Mj5>J>9vD&WfAffk!yWd1$WwOS>SMs0SO;1cG~jUPC78SR5;$gA
zlMe-paq03(*1jhntcwlFsy(x4RpcMo9>1SaSJQ;!Qx8xr&mGwBKb_wHJc{dGdJ(iZ
z>8e%<c5E}`c2b97Jh>SAPt`KT5`^k0uZJJ>95L^lL4S;&V^-1-cocF5W(~Vxz^E2T
z?KI$G<x<!mj%jRA(^I%3-h{)K7gOnipYTrX3|8~&K;x|lZQK704|1r=Sy<0s`1T;4
z&9SB>&;FvyJSzyCCcvyaRSuGQCe$j^ghW~mV&8HR@?%pXmS#M$et$@o)Tt`h_$I4T
z+w3&F|F8qT2x}0%H+}eeo(qbf@ntu}y3if>2idHS2hjJ;hPykCLeH=a{p;Jyj}3Dq
zH%6tQc>gft^VO3Q>AB3YCtPGq%t!F-FT}I=8*8p#;_k>tbLlGoO;n@(B~~w#B^Mvd
zkXo)c#_(v8#UZZ0kL5G*r(58f<x%FK%t|_Q(q(3}RF;;hoyH*9>yY4g0?5}}7;vZp
zG~%3CrLCp>EteW_OeYAZlzMUan-cz;kC_<TdYgM@XGz_s1L!H30qId5lu=EC-#l^5
z-k8Szv6)G_)2n!=udXC+)@C$huRSBR{|psY+YhN;y2!hjjammn(9>5NzZ$(~ZcOuK
z`Uii&ae=wyx%L;BA#?<T#XVui7ipTrHzl!E%V?!^FUq}t09TiB7~|LJ%nt2VXzSRG
zEt(gxqVql*t}g|;=Q+t}rw1ASWCt5AC=u6`<CuQo4LdL52oc+ENeWi9z$N+TkeC)*
z)99}YiIVTFJZ)Yv=eNxwRnvR<_l(+rZPTW2-qnM_EiQ_C{S@MR!IjT0x(k2T>Cn7_
z7f`tdFz3Y#XtEGuhULZ3@aH4mq&-@+B6cE;SvtlD?RfxU`>sKM+kbGL(<ViZ3D(rO
zjA8$mH!yIrl0Q@?jr`y#m=T-L3J1L4Fwb6Srur9021fCvp9`kRUcl>D&f?sr1uVTA
z4xf^`87=o6(8Rw18)iRZ4o~XiT?)DZB_;Os$`t{a_s0{a-d;-Xt#yJQ`wxQ+pR@MP
z`N!N?m4KJy*U*0N5VVe~f(6Fq=pA|oAGFKxug<s#^OmXdN3H^O>NcS7;H<U7_;CcD
z9{o|yW0w1RSqJQY$ryMGlgiwF=2=-MocJ}3#3sd1>5ffE4&;G~vmiSE=ZSYLD^MhS
zA_22hsOElurgMVPE`2dx^!<kE15vcmp%n}}1?kb!V6@kJW3{?9hPZ_^L${zRZ+5hj
z^@^Za<Y)Wi-m7v%E%`h)J}t&0o3_A&m?=bFCjd(h%p|>EG~q#C40ZkQDK3~~h$X3i
z*z2#I>1MDdQt5)kLA0FZueL<lQ9}|pxQ1Sv&obcMivm}ynYUrq^iaG#(FiC6o~R>=
zdurnSqFtouiW%vN8o_8uXDaD%gxoQVqAv3{!t=wosa%8v&3&Rq-!@L5^{p|~D*qTg
zIgDH{=8k*1Vqv_b9v@w4#F%><m_1v>nFUL|nM+B@oQ6UfJH>n$pQ3)8|8FHZsJfrT
z_zz;N=>+nps~z^<OUDtVzZ`;Dh$h@?g&vnX>{OK*)b8srY{|7Gi8Vb8m%B*XUeBN@
zO+n<&zY)ma9?2~DJ&bKxE)e244CN|rpteyQoIfG_*OY`obw}|0q~-XJi-!p@DreFL
zW<%_YSW<Yc0?Rvpu%)NBg8L^oJW~4`Z&h(xm5?y@o})Hhm1+aMm#0uM$$G39noc*I
zH)6i8=a5(Nk5P4rIF%NX<zfxQ@ZphY@+!cDg!Rc#b*WxdGK#?qAKS2fy(~E)_#G?T
z&*04?iu4<I9{HZpX43L3F>P8E-2FMoPLVi=L2uKo<spf_T$xDc?EH@vRzC)kS6i@U
zr4-fps7#kO9z(O#2~@&u8@3D?(nEJGXo{N&f5zP`?A)}CYDRNN<kacVY3B~X9sf`)
zONpFLI|Jv^{i)uBKe*?mHLYF3X|Fc7VQum=C_nZF4+Jcs^#+D0y1*UmdZqA*sX4#O
z^(vtL6#Anu9Na54m;e(A^!d<_s`3j-la&EBoV28iDmcu+T2~BJIK${?$J6{RBG4DA
ziM^VEc&T;}o8~@-yf>Ag)N%<+{We4Hm@FyZ8bj4j%F*-{`b5C(CFu2@qRMs7)YIT4
z8W;$W=Sv^L#0~SvPr0v*_{&ttyeA0ep3||yEgVbf0?r;dg=WpmgvHf+Ft_0v90gf2
zWF7@SEq3zvS(#AxOH--k=tJzE5|2aNS1`~Ng5<!nFiXD>&4>tWJl4h37v5)oUa=xK
zySQ^`S_4zi`vd>uy<)AT+d+DFC^wG~AV#9+!9rJ(m{fB(5Y4|h_gxa4spM==cRS(S
zw^RJig-&$g<V_S8aWN&MSMagVDH3$;JcI^Mr24D@`6e{Pz?(u0A8^K{YhuA?gA%#r
zzmcBZWe!(%_p|N3a`ef7Hwn}hV-!=KaQ9Cu{ME_gr?l9ieC`&sSakpj#Ah=Xk0+v!
z*L>i;5GT4F3_B}*I`A8fsZQKR{A8ZaX1|Za4F4?5tnGj+j#A+8hI^k1>;FG5!$RS0
zG$K^jx+8Qd%?dT8Ni$s;NRTAk4!5xT9?U|4`)^r+<Ej`?(TB~^ljw_jNus#Kfr#z8
z3X4nD(PZmH=GK8#bl*f_a#pVpmvQInGyh3cpjVmBwd;rRU&3VL(MGt&Sp{~X3UxKR
z!W4ET<9$sBvY^3@rcB8Pn>CJPoO^e44eY7?FDu+#WI?a{{N`+5)5$i$)4UaXUD%}h
z641Iomz<MnWqv2L^6WO90>@e5P%bFV*q1+r?(>F3GkG>0U8YK<FD$03U#-WU@i_Lk
z%`_;{=0K&Tb@;3%f_3hUU>k&$s9oh>_WVf``m^T@?7y-O?#OPZnWw&C<gO}q<m-9%
z{pKJ>{(vp5E}M*7LM*K3*K;^mYYH_v^7NvU5G>9pLd$S%rsBI7%B|KXy`#5q#Az;k
zk4nJ<p7Xi-G7`<*mSWZLJHALl0C;$(GJ|_eapY$v34QI0uEB>`hp=FB{^=m_dKS^D
zoIm{NNx#9SPK6vx6oj0&*RUos3lij%aiHZ8>CIdO(wPkq?ks_UPCsDjHx5gGaXx9A
z5dk3ux%{YkiC`Vg?TMNPaqWN0*|$qf>6Qv(lBBMRcbwCZx1bdz7sxZK<@<1Fls;9;
zu!e4P0jlC(07d+*^!@M@>LQ;&)}4%I5=sE8HEQABe@pO2$9Fs(sLpoIdCA(IRiWyf
zF70807u^2v6P_Io0t;1f`k$-{xo+9QZ1xy{LoHKb|8WKQ_C$tCpXq^+Cl*vHeie>#
z_uAdG^N=nP3V{PRu&r8^zPzIeNjoLcgsb5e4&B2UA^;_Wzwz0rzt|V+L2nODAR0$G
z>uQ`WEj^k5g%7>(P*przdGa)rhAPsOpBeZyIEw0C;Os2i?rxTe4N*&#qM{yr=AYV1
z`XJ{Vg!Wr79a*bDUgreH<zzt6&{`A~r~!MM6U^@;+fmK21D2@ClR~}g5caVGk49dG
z(%<pW-KI#MTo<7QIe{p;?+glyC&1(H?d(foNh)Ad3A&4i;nBvm#Hzv(?My2`mYb{4
z&T(j}R|56<oK>p&B_<tjW3s<m6V0G$r0n@7RKH}8LX#8OM$<`rp%Y(Os>Zz|@_uC6
z2SFOy@QkPU!WZ_K?P1M{8H^-1qP*=ZMo_7rjrb!>w8S4kWF?^%c2%I(8V>&*nZW5Q
z)u_`_72@Hug44<V!q<NDm_s$IsF&F-2n(1>nkFSuucis`s;~uLtBk>cAL*cXii_@=
zsYOoD@qh~fS5RcnQ<TZSgCpx^vsN_^;FE{~(7C(F{sY;>so)NbEtyT%yZ*wPp^99c
z@PKLj&k5_hzOV=NSCgyD&cmIjMx4ddfTrfuKrx4$GuzG0D<TuI_JA5ydz_B<;Sc*J
zh<pEE$x_Ao6A;rgmA*EAh##LRauJoG_!>me`m!c_(PIE#-F1cTHR)Iv6^3oYlJrS^
z0?HXwfTp)2UDjwufB#X!nO2m2uBt&!t+pfXEX&2uQ|y|5gdJSl$js^0rj?HC*|EIY
zM17+>eOxR`>&jP=<;%X{<k4b&Lbx=&+HsRn9TcLQrWmmwg}A7Tjc1t+jS6VYMaS&T
zc?atoyD|IETxcqnWk)yOWagE7!!4%^yggEhSY)1!Z#nyPhw5bdP)Lv6^0t>l@z0`Z
z4_F*jDdn@%n&EC_yVcSQA3?%0fGWwwz_gwhXk#jZ5>q<x&{jv7wpRd>eSd+%H}1W7
zoDNZsX0rvfg>@IdhAGLlEPtXAvvl=1yHM>NM%mA!j;qcwb}9aJ=TbphH$#lLFgu|y
zB?i|oaAO-PFVL~s-(bpPTc%DU4aV=iM~TufxRg(rS&zJ7!m>|LTv&tWman0+JzhhP
zxH>f*4Q7qRmH{<ZBh1ixYlQ@B(mJJ{F<B7I>{{Cc+~5X3%v^-U9~GG`1@3GCFBdy>
z8zC~<9&-4aWUAjN$dn1u(b-b0;J#b9a(@b@>vki1ZUDOjC7B)0r>vK_|70~TZ-7lc
z_OP>Z8Ka=Nf^J%rK{xuJ1rK2ZRNXVe-E}rhc(EY!a8!aW&%F+{F8fG-uPFb5(I0Sa
z_kv%`#lcYF4>W%2goNai@FMsI`X!YyzYZ0FcugQ(dqs#1jc&#XwI=ZO_9?`W$<~MW
z?1i(D%UI2^ZvKafMeHx>=XhN<4Nb)_krQ>X<XN=_*?Lfg+&zAqKaej#TzSIeAzcci
zo3zMW-2&ov$dr`s4I_IymXIfECwR*~$`NZ19nzWFiI;@s$vQ27;8{nAr`RxiW0Ewz
zGBgbHW$%Ns?o1LE62^aUU^R2%nh{;q9Le*R$;G~Ti&2**LGX>Obgx`Ij=oI5{0ddl
zzj+adz!akk8*1659Okm@p*dzt2lAU<O4DU~7c#Zv9o&gp3yU;j;KHqMOnvfJvg8zk
z{nQJv?N2(hbe;|_F?h<@J70si{a@IS_Id1q$^@LhSB~xxRiOSoiy*mk671MBh5~yp
zfoZ1<Jyp%4r=+sLUR#TFx1MLlf3@SorBYb>q8wFzCV<9@MjWWx2j<*Wv8*kb#>a&7
zVsdsc`&6&PkFB;eZ(1@~R7b)34jD3QZ8~nXmB+gFdM@rYn2Te6#(aiNjIB!?6L&ci
zthfHc<39p%XNduu@5NwMas^uZ{YEpr2;R<%e7=nHKX{(wL3DN~(XOM$P@9;5*^_JW
zxu_SuxHcPkay}UOOqJg5&I3D93mOr<m&x`$fx(%YsF^QDzjnx@*WT|u3A^nu05upN
z_Y#gojYFSL2euzv2?<?O@j%;Q@~5+jN%T63PoiXL#o|V&<7|OH29Lx0;3}Arn2#TV
z4>2*(3rXd@+bnOQ2uZmeNIdV(A<qs)LC?AYm`62fOWh@izP1&@ZduW)kU&m3>rP1H
z4XRZ04BI|<lYBWN*s&#roOxh_`$jEEkYOR_{JjTdk3~s>>luvX=A6!U%gChC4W#f<
z63m?z2_tE;I4?n&Fxu~#7>SQ4Z4}G4?0nAE@A+IzNg(BKcjpDyIFJ$@Cn!BIo$H&I
z)6@yVsG<1*W@&`772~4hY=kK_(&lqJyF%DvQ4GxmM`+0FPQ)KU(DgR}4%`XH%I44b
zDXN*t&`jg*>?_b)=m0{Gg-N^lR>p^?L?>&@V0XPBjFf(X+=Or%Rk#_S%sPvA%4X8~
zvm!KWk}&b9ai_^6Z#Z0fE&>;08}M}|#`q<JVCHmc@t-{UZ7gQ$Iembgh&<Kt)}n{|
z%h*}Eci_RR8gvcV$O`n%BVY2AX;y_dbvCYoHAkYDx9)jNTZb$RcqD??-B4=Q7r>Yp
zXTi~>+3?BdD0TGv47<<SK~Rz{cr;V?VYDLsQvU#7<h+EJm*Yr7U_Y#QTuH=w#^J>}
zZxBAJMVhzR(~nW-z;J3Q+^VXiLSr-Wzg;5q*Wd>FMqHY_ye3Dc>n~s)YF%Jv2%1pa
z`a8^qbRBxxHwylauOT&?b9ufAV#K~F74z#|NwByIJ(b=8ZJ(Oa;`3K{Mjh#-*;9z-
zPGNGp%begEZua5E#TuM)B%Vz($=!?)GP*$#PKXbngI6`WzS>3(TM!zX{0K|t)Z;bF
zXtw@wE{ra40wb?QSQP8VR3=p5?}cknNyv?C><L8qAT!wV?FJnFH;9kZr_;zQPtk3?
z0(Dg1&F;Q7iZg68fth_4yxZ!TfW<GN<3uWRPW1$~9qDIM*1o_`({*8lv-b+TJ_vuq
zHjra`b;(Sbtza@Yj}MPdp*$CBx6Ab;`!Qr1Yqww+^#W~Z-SqFE$9en)SLA?Cs}UXA
z_XrZ3enM)UF~&v)VZ6n2>#j-Dsn4+@<hk?UoyKg4n@NbK_)|O;>O@<b+tFaQ3kezz
zU{C6C8l6NvdcyT9d^Z2X#lox6Z+7X-=`3k@rjpL6xviwbw=P5RG96}`@hxUAr^SeB
z(m<_F9o)+r(pY96vq#poW^>pB&?bvu|0{||xM#A#OrKhrTxZTSS>mSHl{6rb({vAu
z(m(spV$D)x^6~K}xD&Sk^*p-)I3z{y=x0=0zYBamFG9<8wqS5Mp3dkOp}zNOd53#=
zR4X<dB(fgCG2Il@6-&cUuP)<<sbl=yPqT=UvN&B@&WECieK`G%HEO!8!;JIeteNOb
z*myJy<&M3;bN_Pq-VJh$ShGEz|MHHD%@rhJo^Mc;!}>TzIAHRR3M}|F!W<b`%OpL`
zLg^WCC|qJm=h^+>t&LVBq0PtPqoXcseY^|g=ig+MIW3uxKngq1>IL7rnz6pL86H;&
z(2kQ<=$E;i37#fFngcYs{~b41)J@>HVVTgEe1&J<_ynF!+z$30Whi>%AzWGF4iX#R
zp@Uc~XkJn$o43S(^pEBAU0n%oU7F8!9R(U{C`Y0V7qga*42{qMdhmTDJ@9fhteE@=
zW2WoTgX*$qvrmN{pT3p(Gd+(@v~kB-%VJ@c(O#O_A&BlT_cAp)IwY)1pWaK8B_8ft
z=(k`etlRdPZ4jM;o*R%Ul==)oV_z5&%Fvdh3N$d{BtGT(!K{crHsIe(e(QGuvS8*M
zYBfO%x>ktcoxESDT>KELBv;dvTX)$0-bw6>g~>qEK7-7G2vTpmo<vuw(X75xoTrh;
zP7c2fR{kc`@SPwv^eCgcMKTz1&5PZqn}<)?Buo!mM7@3Fp?FR)BW7rVM}#sUR&oP|
zT=<0Vvc+Mr#1YOd)FKThEYLvAkVg1jVS*eEvXj~u5aB^P+BsDecGR9Awr^FaZ*?<>
zcJR<Il|vnBFXVReIcU8~kN&z9LbC#Q)4NAC=<9j2;pTz+;2d)j$Es?17n;oIlSNBv
z#A_HLblrl8t`+1j2TLoz2^v_{XGOA}E~ft3v%%rA3d)<^r=1=g{>3%{Zc_jQjZ*B5
zaD=ujKCHhaK!VdawC>IfWUl-{_hT{OQW*_n76~XGo(%^CTu7naXNWs4Mn^7hC#1g{
zjN@g9g1k5+ZyCjv+sbiWrZ0MlS<+!qW!~$o82okpF6YrZ2dCc|k;1=AIZf6t_V0!W
z+I9OI?rB&+G#p*A`cM`Y1Yg0=8ULVG;XQMx_XCqJQ_kG8ZDkjXC{wpMD{#=;j}6K{
zu{B){4QgT_`vce8aP{RPhiy=je*qQJHQ;KoA{D%6jecJKT<4#Q^XxLYXw{YU**S5F
zDm8fcu>g-hxro7|1E|(<g1NG$6f>eDnQ1L@D6~tN9vRJK#^)V?Pv>lL+{7D85)JU2
zWE-d9;Ua-{2jjOToNc~bmj6Ur4Ck3%1x1l>81?TdE?gLg*OCp1lTsIdC3(V5ZBS<e
zAIqcuTW?15U>S2S{UC23v5CKeaYNJbNG74*3U}mvW+uIU2S0<SLiHtoYPOm~pU;{=
z=P%9x!E+Ds{o_;2%V9$Z*|dmVsHjKYbQgjDhevFgSuWZn+LE)4FCZthls)byLvpOb
z8TLyghTWB>^_vT^vHBn${7?z<cG4s`v<Q<NOlZBWDXr|BK~7DN<>#t@Mbqd@7`{vi
zeb)t$P*Z|un*C_}6f1JY^BVIhd@7{XsiU2~2>dH;<cU3d%@UW(IDDrDmhIlnsK07q
zKD>|t>;4yLzhpg?xov{&+LxKZtY4Vt=}4N3qv1?Q4CvSOv-K|t)~}6Z%paU#^-qao
zkeV;Xto8%Xaz%VoR0(Bg9dVm}HNF;=Aiv5Jc+E@BGYb3O!NdF$B<*xHBx+jFnG1?Q
zZ-o&F`>IKM{l9`n*)rC<U>2sTMS(FoF?+<r@b&hIG;q5Jr)icY?vfjTP0nLuV&;N(
z$R{j+D@>+nnG?R>6y~Ot5M4N3nWm=3z?+Aspg3s@8>!9ExC~{`;Jfl;)Rf@!<i9ZC
zj0c&&?hqBX%ED^<sj$ZII2i3R!nbQG!M0kIc5PKhuYcU!x<?B(o228G2b?ZCNrZ@c
zYobuG5Qe3w62DC<^v>K9xW9WpFSaQZ&I>714asV}-&KN&3oM|zSc4AeyuiHqiuBpq
z39$Qz0Nwp4+d4Qyj;=XlN9Wkd(5D|PaJ%qws22Ooo>uB*tvGa+&&VYDdx;!A$yA|!
zD()z9TaNF3KL+ob51?`Z1HyJ6p(T<bS7NwG#VhlOB!2<S;UcoVjSVqVb0fXjB}6t)
z7pLOvHTKK)6+A14OSq=)KbZPWn>bj-(8a57;ri*GOj(~2QT!7NCYxrEkO%Q-?ve||
zdpa1u*D5rP^VDnH4})zfE9vijPK=A_9FpX<1H}S;t)D&%B3qvLFmuMcAY8|qI8;>f
zmP-~w-fT-+I$NF?xZHx}&fdJ!=YB%mWkLGak>T6?PNQ!Gr0JmE7#pGen>}W%%IcX+
zq45hiSWV_^CVUMS`Tctu?pb(-`68}J?q?>kzxHU5j%Hby<;ce|!5LJlQW7uKT!6?D
z3rgj$V1i)<zwB);8?2v(A;3_R#V_#pPc{1apbmY}vz>;>UxGt@b7;i(?;y}s!tPO4
z;t-yH!S~BvdihT}ymDEG(c2^#jqS^53nN3{{HIJGGsQO#F(%a0(U|Tzzmm2^Y=&&d
zgXs2H8xN(<q~-mW!6Nkq)A}=#-py4dlOw*t+Ly(k-{l9IGREv+Acd6o{<zS?h#EW^
zVr$y?uxY0xX`6T#=0|K~n|Ggtz{AQkc7q>J?kAtuCMQQc1^zKxZxo>Pe-Cj`EQ8l)
zw;W;vuEE_~Jd*izB2Ra_H1U}hgG0to7{lAr#Heu%-60&rygFb&GdKJ|8~;`Ow|Um|
zUH)&3o_q;PE-K*y%LFtkn9Mf+h)0dHV-Q(wiq{kB@u6E41}#^iEf0D4<bXVV=KLF1
zT+D}{dr{Dq{gmf>>>=yZUd6mxvj|T7_mOv{whuM;9iSimXR*I#rn5<lF5<LtG2*^!
zlsW%0hPi7s3oRF^QQfbZu>E`#{#DtA&hwmUnVBYO+Lwd3<)vxQK@E&~{s($T{4mm6
znf}Ya2c;*PV9DxGe#OQJ_!h}&g&9#gCwe-%-Bu+lE8}o`hZtG(^)!vUYE9+2nc)5W
z9W<)3gj)V?#jV`DW3KH)a_WgO1}=ESn$<qx{pPKMTLTD})znCha~u~5n?rg}4MU37
zReY^$52gaj#PCo)>wPB|52Wv;Th5)pfA?;Jn)qbMbo>wGZrefg>?3&L>10gmNQLX$
zr_j6XLtGOhKn6Z+q?2Wf&~+?>n1zqRWeG**X7w&ovciIH=i*ZJ3x&Bom@-}HaTuDn
ze}mQbF^pH?Uncg%B2w-rPJUc-p~qW8`JT%?`6JKG;CDs`WZVA0`VmE9f7^lVP3y)X
zPkoqLuR?Mwq@a9`09_d+OM(T3;is?^<*Ue1Z(~hD>io!O(~0Et>P_%rxd7GD<$MeN
zrD$8YgIak6^IyE2h_f#G^S><{XTFVAV8FLxSpH3fe7Ii(zm97{L>Gtrbe=?J8?VQU
z`hS^)O0S^j<a)^9B1J6<K4a}=DYE>_6=vVx3d~69!I+gd@wq}0J^7xiIa?NzaFtg4
zbK@?4ty@W}%~I&l#{$OKY$^;zg)piZiE&y1_{Z8DTf<!O8#~Vbt(44aZ4{)9&p1s<
z{zLXyunV)yqY1R{+tc-{L$Oh>9oDY<0KcCWz{`9=l6f@{+kAgA`mxFI+u|^Nc3S{e
zfftAs9OOK#lThn&J|j}M3a8E54<f5$;H}qO<lSI#XqP7L{*r(*1}wm}Rh4ArZG+|j
z3;e#9vOezqc#u11R&F}ZX%Euic)lrJS#L#$MZ&>i%z_Em$-tgtEC!hDU_z^MXp(y>
z3Ufa7)jCt@+R!W%*;NkB>l~rV#TpkGMN(I}F#6!O5$)W1grs>-#*@>7u|>KY$DR7Y
z#MX|^P*k9TZBk6G!e>@J^gHe_utd!fTN*g=1PpxdM13OzdNp5;kAI)?)Ze{EbFo|O
zfXruh|DKt6du1S*cJ~83aC0VeyayT8ns{uuvxRttJJRgMJf`gR3_=~=<KQ$Kl=>=(
zHD8Qrf-2`#DYRe%CJsV_mppZl;W!KvmRd_V{Q@@RETm1k%{we@3*YKKL#_4;;JxFc
zy<Y$<oA(m04*kJ(NBK2rvpM8=-V7X#y3adOmdzZ~v%@*xrEq2N1uRM9p~=}2<RDe#
ztJ)UOZtZ0QQa00fjzM(UBQBO@t~P{@WH1`5A}}Ga1a2SI15bS;qNQR>^6PGK8rP{b
zXi6*0?3ZT`4-Bz_ck?)w&U8}b8jC%}x-|Z!8*x&gY~SZev;*$Lyb>ciVJw~)pPfVQ
z>pH=INiSLNgD-GJL_doDipD#4ezAi~g-MT76B=*M;B9j@BGO@#@v}OgiP+SIZ;qOi
z<g86(%BP9gKVHam3%}*O!Q#Z`PBB>KmZJ4fRpj||z3&VYdh_8WEVAFkX8+J7mwf@(
zxcg#j@mKb(jv-a8bz_=VMl$|qhL{`&L7IP2kf<tkBjt7~TCq;VZ7d6KnrXmsiC54)
zV>z>ILMd;S+fDc~R1XPzT0u$WJ)WNx&whJf&8+$?NXtzA@H6hL#oOI{K6$E3Chjz+
z700JgFLzFRuNe!T%EGiRwY=uRqG|jvEnTb*vbN6L@fLX@QK-G{I}Qv8(2CQsSpWSQ
zyYSp!bh|eW6Xc^vOIt1Tjq#@kb<5cs8hRMF))20pm8V_Y?ynd)pU$J*w8-0r*u|Yf
z->wHttYr?4JrIW7<OHlmgzCeU?17kKn7Y0U*?;<!HMxj4f<t*0O3oluip=ST-EhKl
z0y5)yw06~b`i7gZzplQFFJ@$8w6-ex^+dzfc^TY1a}P`f4ukmh5y%-tS}SWhp_APv
zI-DGW3OO(E=>B4SA=ZkAZYv;g+=!AJPw;<qoq04?Ul{MriG<8UB17gOm3YsyizZ2i
zB$DQVRHRv>xkQBw6{3udN}9aqc~odJlm-nNXb{b%M5*6B_pbZLUF+Vv{O?`sUGLdv
z@BMtgpHJH;7YzJl3bIzGFwWQoH66~2S{qkkQBot$(Yp!T+sefQ&t2f}fFj#1;PK?V
zr{MVD|4`<{N`6|0pw6orSrxVDC>loSz{$v7_`P8T-(I{4ybCo2sFWivTN}p(i-PIb
z$Wlx`y$RL(3UL2>YjL-}C4XQ<nK&}>8s3#wVvFXU<Fb5TVSDjWIJELJu`k!LI5Lt~
znB0$17lk{~_mlC&fKoX5dK3kkZK8;Z3-Dug1loS<LJhrISTo=)KW~IDxYp{kjk?RB
z^uam&cHNyGDUN|j$E-1GpfQ^%)rC$cqEY3npg%9`#igHwUWD{aF6QG|sA-a5A;HdY
zWO%2jIpH-(H1Uw7+kw5CZD@ap0Zx0kN8A-U5n^U~(%ot&Fn62^cl@T4=Y@wnU9@4Y
z;r0}IrV+)yr+80`3U2o@#CwUt969&}TsyA7vM*SZLG5EavqO`9mPX?EdDC&p#^t=K
z>tLEJcMeTzz0g+05l+M@<EwU0vLEHf|It_nsqXV=+19C?ZTU&KWqK5KPA&lrhrKvg
z=P({gl;(4t`%xnzOTd)Jz{a(DV5}d57v~g$|0X@O^fq9&lUu-Gf)bP6>`oPMnS17N
z89ifqz|D0(NVv;@a=0Zq$?WIb<;<yaa3V}T?7}(5D>Hl_O^$z7urlGSY4YTP_?7M$
zmfA5Mg8L4kXX6XpT<XsLHo9^POlIMO<Sy(~(j&G-$eP}(!v{0k!66x#iQ0UwO-jJ<
zY0FXN*0;QB+)jwvJDN2Zf8c^F4#Qme1Gr|{5VQ@b0si|IE^eqHyW3sQz5e+E|J*Nx
ze-1fdynhqk%$`Zd_2rrBc}-Y5w2PajeGL^S3HOxaZNPqV8hbUWfb(Yy`J{C_Ajowr
z<-|zfl5x|SRn{|csizG4ZE}nir|c5e+5?c6tO&hJ#_|DC-T2At82B}9=9d3`h20k>
zk+DGzywnZEzDa%D1;1Z-uO<YZ8Y}Zq`k9-)qyqP+^LRwS=k-LSVL-Ds?GZQP!uvz1
zcR(R-GI<GE^@BiK?jU5W;`wbQkHEsANL<*W!K(}xX5gK7d7XT9+7UDnwXd#)Z7m8U
zs0i>y+D(3AizG?@Qe}0gB52`i4U8V4&R6!^vr6;Z(6gutvRduLi|76Z%eiW}J}ey;
z9h-p?0*hggRw~$!2!iM@xA3z4NcO->f}hi1L&xl*anx-E>bp{in}btuZ*LRo?YsdM
z!-RZe*){C)dj%J7hk>cR!2NjU%kFkI;#bFh&O)P*^L}|44i&x!o0fHG<@pD~lk%`}
z{S6GEs~B4%AoMsb*6?P8Ff0Cv`Iq9*xk`z~&yHdr77Ko)zSW>v`%yUPb>f9lO5E7n
z<CuiWTUdEalGl#YXS+&t=<>g9RPa}kNqK897n9Ez?|ue9RXYH42*AP*=OJ0>i8|bW
z0R?T0UsK)<gAI$tUJFOFx99WVmP#<pHjrS;6r0fKg#mof%g5SmE7)~oFt{E$Ob?na
z!OfRi<SsRuH73P!VVSM)_)#$|nmd(SHFpr{&9|W~kLR+I&Bt+b^XtngM$Mw0k-N!a
zOBhQS^a2zr4?syLk8GY7ERs}(C6SLr0rSmavz0yN{T@WRKE70aAQoFD8uQK-SNNs(
zo<VP01*Tp7i{tjjkeRI|^fuhU-A{_}WmYe2-}47@LLR_+(Pch-=Ucc^a1b*W3)pS<
zQ{07kp}*n07Cx+#6BqkO2`b1R$QcDg$o(PIUH6qAlUj%ORF`mxM#6J&;t)D4If(j;
z4IrU&FpP@H#qI~IvDo7vHm@7aLOq>um6I|>clk3@gGpk?lCdnT@*V6Ltja33hQsW}
z-4q}&DAq}P(f!EzAggb}1Vpx&mFm%mh!)(qVIq~Ax{=IX85Y*IkoXm;!u^mho0Rd6
z+r7;n;vZyG^_NSKY|?T5ay19Z<t}9QA(Z+T%0o`(WHM}hD_%b?71A4O;8@fN7?{z;
zCu_!XQw9MR5BC6WVjfM+J%nGSPr&J=rKqdnM~_#$<h$Qrz)RznVwV-r{NgnH<FCN#
z=N;jHo|=K*&e@aMP+c0ebRb5Z_k&dXS6uxTEoM779Zw`$Q|QIn@UOfX%{VCt)LFu-
z*nQ)k|7qi;H|w(+#dF-`Q_5`ofjVdsN3j2P%2U&%!~BkmLFlzyk$W@b4#+5KpzOgx
z?0~-wxAnzs9DUG^cF(94-Ojb+ds{kCCGs`Dt$eDmE-JDa3J+n)rw3p!C_9=$EBG0*
z`Pj5*BA&H0!)XHYJFe{}Y&c=g9Hm^U_RM^W+TT84o@X-zbgR+}aRMAT{S~c$j>p}J
zChXfgC0x}O!&KwtnU<je1t%8ZjD|F(E<1rab(h1JXl0xm?m+i)c)s!FNZR6Ljm0Lr
z>E)ynknszp%)WkZ5NyGj<{R+Jq#Ew!#z(w!W;)*-;7IBT&vC82552vO7`I|Qc6`4G
zg^Sgw;B`CrdM~CF&pfCUJ>oCA{16W}^`}u<QMl&PG^TMi8wy&r>C1a7PO~giK=r@l
z2CgjxH;J91z|U&zZ`(yQ8?+j&KDw|QUb7%PrV4rKDa_ILG+y}Y0JiPIUF>rUr|<4v
z;)**5pwA`~oh<#Z_Rbsb)0S$CXqIQsh4an>E(fN1$6)5<A(S)x96xmaR2r%O12lJS
zpo_j)aMJV^*zMgwOXd|rf8hkyl_0?mTU`)KXI<ptoWH<Nvo|pK&N7H>b7MJ9(R5=`
z9hY+Z2hW7Hdzr~Ml=}D$O<Wz=_*GBvqHjBlsoFx4<5xp=`X7jL4QKw-)?uT|SpIFG
zGpzEO!-5o6^U4-Jc>hK;yZ-MbHc1Ni#Fu6;orP}&p3np?BikAGZB%8c<2zxt`b%&L
z8%V3l_o3=Bf&Ekz#r8xV<Wu#8d_?m!P`zCaL4~=fzgmU<C=Q@#MR#VIdlAfx#Q44c
zIrLpLpsnhk#6wH=^EsO(*yedjTuS6g?!Xxt+9u?>#EZg3)4zY=9wujrJe7x&`!sc2
zH*guJ$+=V8HUqlwtGen!Py<98FNER8j>6Gw18~0ULpD{mu-U*<oZ!2V8VpZ!hBq4d
z8A88j_oGO@^+ywbs=|&I=N#lePl>~e)$tgyB_5{TREB08!aH@UVZX&5+P`uleXr6&
z)f0xK^Qw{$IT(jW6aMfqflBz?a4y+o+L9>52_>3)`R3i0@cWG^n>TeJ=sUTwI~kSu
zCV3(y3@a4hERf+GFEbD;DX_<nWB3orhHP-D5|uefvOV^9xTou7SO8=r-H4=dPO;oW
zi8d_GcO?%KcS@eiFub`C>V*8c>i!UvdTa`!qd`pWRR>m>OTgrxkyyKXAy+wY1nj&X
zhbxN}VCldAxG}XpoML$-H@{w$g+B-tFG?2nS|`gfX@VMBAG*t54Sc~j?eD>WG1Y9q
zI}ru^{f?@pLLT~`1AMw~%=&cwnYUR4e4g_Tb{7w0avO5dH~BU8_7tP@;~g06uSvDu
zQ@A}dH0YSzGJIPyfX#pCO0pAdKrwa>+rPOD&p#><U3)xLH0bg>exEp%b+1gq5ff&>
zk(NGC?^R<-mOPHExy)1Mbb4Z)fK%q&hm@z~@X_uj`t8)C6;~I4bD27(3iAs`{|vbM
zM7UWOGUk(NuE4PKQ)ttWBfNoQJTtH{g;{^R*@qE<Oz}e&#JhMiv#AX@Kf{|nxweNJ
zi0RDOe*@=qS;(lXoP~zxu1sf1HJ|e261;hAj{hngsiVAr4{4KOkLO<y2UKcPSD?^s
zk3EMyIpH{UUMPE9G6q|Xtk~BoSv>IOI}Gs}D0--U78||~fSc_nVW}6u*ta4uPaTIT
zYFDvB_9B$cTjTusVH~(!&?Up<*D%>%kMeKy3LT=`xMNK|Jk*_nCoHaT&U5>@3g0r^
zw>*zpd?hI3_H}OH4+%DHx(z+8)rYMyVi0}bDwa2h6Th|dBb9D*y28J}*G0m+;OB7W
zt=A6Q^mkEztiV!>97N0GDtO;BQCOru73auWvv2dHg=et~H=N03A@AhCc1tdv-26+t
z+N~2(J+@%=JAeL)|2vTR^o6@=<%pLK2s7u*TJ%#SuEZ-<%wIZ!T5-2gE_fIf1RlnF
zhc1D|aSIsl9?ly-{{xdZ^@|dA>ao(SomipK%GCs@&{`)8O2=l{w6PZ+CnUmkulpGG
zGKqgysm(6D^q`kssl3FvZTP3rk=<JzORmo%;I^X%jaR)2EAu}AH{~mKzL-D<OZ`#C
zSC$<NQKtE3+N^kgG=`N(QFU!Iez84<1M`N`T&Ean^Y(+!^CiKxiLo~UeHcwk@KD%O
z=$^0+lFv>?54!|>H1_}~m+hjVON31L*?s)vqsiR%&9CvYuW<J?$BX}*nU5{YKf?X{
z)~wF{1<o4T1v?kUQrf%*zUs?k{O{Uhe*S;^;7@Fo_>gQa9<WRz?l+^iJw<%}t0HQU
zKf}qQDKmIIlx1Yr*h~H|+;6C^05lf<bp9u|_<sX}mH!Vw@c+dt?lPH+I|jePd9G(&
zRQCMjbv~qE<)_*BwQ?DbADoA3K?!*2<pOLQ-;5dCRPfR3?->196&I}Ah_dpNaih{q
z?CgBaTTh$I4?VI7UFUn?thHOv<=P5dZhjaacNpXUk5}yQ|Kb&E6^6rG9Y>VfE{{EU
z1glSX^8a$a;*9D!G%4XHq)VB=GDk~>;{`pMp&33@5%!lhVQ|+!kl*DH%FfL@38gQh
zA#CdbymU~D0*-mI`H?YviheAdJ1hexXT5QcUb{$U#%(S!bu>m#p2kbfx`6`*??KJ^
z=OE6&jD2~cP5wEH+3AUX;81MAkM54fv!A}9x$z`;tUrobO6rsO^TpyHJvMY=VJHq5
z63h=&x-R5QvSE(;Zsubxbk~ZsDD&DK9CJf~m3{|s)|<eNIY!WM7a(t~tMK^CGk7a2
zXpmKU;gy*(_ee<zS3jCSZ%-O?ba(`fQa*`t_P;P>h&}Tcdd}5VQe182P|CGff*&NU
zIDwbP<km~j@&6XEK_{2;V|M&T*|QelvV9RhdgTCS=X4Adv{bl_*=9J<L*B)Bu&jW{
z{}1KVcCtow6LQ(&NTwP?sn5Efld50IN9+}}Bq6hevvV4|ANn0WuI@nBtFF|2R}=5;
z+Y8OLlIXE|AT4Y!290e7aG<mmUp7e6rk=O(yhej&Sjn)dzBbHe*;~wNc_j3sB52FS
zRGe6Li%(Sxpc_squvGdtO3BGnnZz+%@J@>?Z!Zx&iOa`x!u`V3S}(RVHHy--Z*$sa
zW^BgCEI2!5FbkHm!^)5oqJw>ZVTYO_D-lMVvNKPDOn)=i>?Oqxy*U6MCS1haUuU7&
zKTW)9`8ODL&Yi3pufn((Q+nO$B=p1IfMK9B3yS{=9|K3@0ACH(G<g!M=y?v+X4b?F
z)+FvlES&Od<j1_+LXD#%xs#2tShD>YH#sYVU-)7icj8Dhzczgqs&CbS>VWO^Z^khE
zXF8tgS9Wt3HyyxVPm|g7zE7y#w92Kn`#Cgu--WYvaiH6%K>vQNqaSlJ@rm3wT(`yp
zJ*LUAoOdI*J2eZa>6RiJcx5PcRA<uWRT2WGU50K~?S;((23^|Ikel~l7o7a;Dlos2
zaB(w(sUdm%9`nbjIaHm#F8#-A|I=f&Vc&ry&cpa~wOr`MKAsc5=9*5sVaxT^Ffm~e
zt(~F`$8D6@y=|dfY1I$7uDlM%2f0y!V8Qg>vjj{flQ8an0<S#u8-}JI<kWtgM{}bC
zRm(RCzwPCUczDMV$h@V6K^bP$SJB1W(+8}W>cr~Hgbex~OWJP2Lw&L>MF$GH!lOF;
z`%h9d`}s*ayjB$!KG$V4=S#5mx5`wJenUK^#GHO_ScQiYKjX-W)k1G`0ctJ##Vyh|
zr7gS4Ay!GS%~qt~sWpG_Z*vv*RA)W)4=Bca2I{QyN-|rhkVa~HWn!_Q%~_BWhaY+$
z!_*QRTK9bcTd|~1<mW@UaIHUV$jgHx4<AD2r2y*Ik!9~H<!NQlVA^#03q-s;iE|dK
zLATI1R#hqm!};1Qb(|)@%4IFI8u??v{DJts4$=1t@gG(!6dR0HBHeKd>3Z%{&aPuM
z47uLG|4|oiY1fN+&!A=W<>yfhvUgx3-l@Uxt;v*9Wdklh!`QRI`^6zE%$e(@>1=Ak
zLHy5|(bvNhMI|wB&@wZgtIh}~<w#l5+B%-L7>MXsZnvmEcn~wFe!wN_K6Tmqaup2r
zdxBrH4nm4ogUeO%L^NpJ!1~3j_)XiTsH;zrtdlCS>_G*5`4vpJ4lF@wS6z7ipbZ@J
z_u@b2!4N*D4p+@D#OxYbCavejGFPppZo3>P{A9y>`M<&4>vqG)Q-}DMb_)E<QI}!f
z?Rxa}`NA)su?u(1iw5nUXjGk5gp!8S(7h~;+4snh?u>Sf4K3$Z{R`)c%lh!#5ml0S
zDaBkHck}wc2GJ8{O!q_Yar0F?Y4z8+tk}v0N6Q*f&?h<iw*3jVn$N^rL9%?@cVR}k
zz>mCE<fFPoHD3Ak2^tG-^EJEoI_HeOgN|~;aG}5s3!NDTYlCgc;%*BbG1@J#z#Z}7
zhQGr3G=P3|-Vrz<vKYAiGuBl%Lc!*<ctd;;3tKd4+0lpCUhteV+JBA@ON)Slq3f8>
znzIo9DjU5ThQh0DCFnGCYL%o?JfH0EO-eH+iFS5f6B}z3<CXhz%t~3EV!zq(W^<$|
zDC{QezAMi{Wv2_A&u6^6{WtujaFSDZcZKJ3s&JOEh-I6^fX4#MDgo`q<-01=r9&y$
zl#HS)uU%>OXa!bfl>p}jE|7}UWq#1PoiM&S5$f$qIlF!Hpnvo#XyIz%Y0pcDO%9-<
z+HEjXn01;;+wr%ImavmH2eHGa9^5Zk)A9%ldg-D@_mTyU|7v?G>3f6KX;auB`GHil
zIi8CTY~=POiAZ~p8CZC{#<}*H@av<%p+E5zbp<B#aP1#(YM(sQdA<Yo4)YQhEq)7b
ziTUEOvM!<@v$J9NVlC!euz)T`*-_uM2XI)$4gaY*b8call<@Qoryee7NwkL1$QP{`
z=q&KSjWx-6RU)Z>)+4EiV9s+(5|^rb5|zIXU~}qTgK>v~OH$Ajka;A}rdy1Fg^up9
zWmYTKd1oli`<KIyn-PI7r~jddQjtA+2$Q7*MWu}y<NZ^Z@2MiReA2_~O-jN2GHy^7
z>@2YS1L=L)5{iBLgn!<m4s{bvSmU7Uurl10Vg=@S4(t@vM9Q$>Yc|YoYQnh}PvQJe
zqabAEZOB~u20li9M2}r(Vc)55+`6Ne_*DiHB>JF4?c>F8si9jG<(0=b57uSw52EqF
zyQyR)$-{kq2b%Sqg`FBD{P_`wNb6J>^?D^iQjoBRJy`(Zf}(zGfF);tJDNt7b2K>c
z7M|0{1;c{r7-6)EefNF}Thq4itv#jqV?h?&SJ@4FZ_NX>{BiWp;x^wpvzzlD`V1DP
zGw5IZAAhFmAcQ<w2&P*#VZoziaM-(((^=ujj{K+0<Z2&rJ?AR%ZP6|CzoSV?BeXI7
z&0+Mc4S~sp4QRb*D@7Xj!#TOHFmkLE^uG{;q}2=V>T(16ESXH9^1@xgg#omnbsCDw
zZeacaBWl<Y4kL8+>F*aU`jEeZJ8F9aGk0ZEYx8>C{3{R~F1DaW{B+U|7WAAUJv@C6
zLh1GKVDwp*8uVvTXvuE6qcxK9Ev(_gNN?6UG7Afg>Y&IeUTnIk$mP+v;oy;qU|ugJ
zu;hN@hsZI^+0vCJ-8~NVvxT$t)=ivu!vu(L-NU3P8)hwa#qR$a@lky{OwiiGw(3e?
z%hxy;>wS@|e%n$AxU>bI^{xZG>CX7pTZ!yG<Wta}0Py&74BPLmq_+m!A$)f{89#mp
zJ!aQYp>cuu<uG~nw#E&IZ5xX1G?EQ2i4op26KGz1Gbs0lu=lfO5nb5`>P`pYM%Eh)
zdiNT6?>{&q)SPC!AA(=j`&svHX=bY`Lz{SAtk}Wm$^BuZwxE$CZ;py`F2n4iPX5AT
z1#W=uNwDIU;bZq&eB5~f6;@`D1$&38|K+g0E>qF8+0_`oa|3@mu?#9KpF$w!^OaG3
zSkbRYzmETbDcujC*FFvBUTpw}h8XUQpqqJRoQlqNv#8R|5Y_dn@n^RxrX5Pf!A24=
zdyXpCvr>wkp4tI}GH&yJCaGX1c@m!=3nMV*QDM;yNb<D92~FM5RhWTMr=wjo*KNWh
z+vmYO;jZNO*<+a79E;(jO<>d*Lkz9!!$c29R{CoL+&wLvs}`Dx4OeT?G!qN*P=Cc4
zIcC8@kQZ`~mhi)WHz=Ik3XhEsf>YZ_?tZqQv)j`QEJ%*z?S63fH<Ib-syXZ$*9Th`
zeSq*y(=b7Q3?H_A4C!8O;vK$BqeQ<B&|h!QoK6W^?@V)w`D;%b$L-=Dm->?AC{Gww
zDxA~b{Q(<!G1xcF=bNRB=<|$q5IwhuQ#jiK#x3sDH}(YtTFhfx)TeO)5<i5`YC+cp
z4^YRE;|^>$!R4Y#jICY?i}s#_;zN~iIKl%rA2lSum+v5LOOlYAE9NSt6S#9Wb707v
zdC)%pDcmo$CY7B*U^!*Ic&(BLsyEMONiT<iYRhxfHr8VA6^+>Qqym966vz!58N@Z(
z&SAr@Ex=2Hv!ZisgV0NGBh9s|xlOHl<UjTVu8mmBM}#j$(S~#UR;y)V<8&ji)|H^$
zGS^}D3^&evR1Bx>zXOiV7(g$SOod!d22M@?B+jo(gIGN~7`mwpqBFIjP1TNS{2f7f
zEAV!m3ha1(J;IuykmRk!AKCbudpT%08}n))>}a1w*9G<W+h$98cew-Jms(QvFjML&
zItCUiJ8(>7BA?fH4*Vl(kY8&oC^6T=zz%)r@xI6hT}eS-*(2huUw>46NEyuSZha5m
zD>^~pT@>U!k)%Wub4bj7j+cb~@jgQv5Z7oj(_L=`PQErZyC#E!<6-E3L%8f&0OK4>
z;a<NCo_^uTf^YtZ4YgOGcbx-GFwcY1Ur#t%FYFio`~-OX7X3}HbH``?<^Q(6;XhB<
z$4v-O<VU)6@%QD1(c%6`_;0@r=_H=xLUu_*{6{JH6B<nS<z1Q7x2xE-V++o09Ygz9
zE3tKluEDMFL>M{Z6c@00DHI+!j@q}3*x!jGsQ<V+H#1wAWogu-lg=f6zfeMq`nDQB
zJwHg(8s;*->j5sju!^+vE$O?GA$`^97hA0V39)0d_;8mI6cj8EeT%E$m(*s_MwWzk
zJF_9zBn<Z|zQOXqX}DNgI4IlqfZ%h)b%JK;fW{xO-N(<MJK`Z1SY=E{CVoVzs9P{)
ztTI{7i=aWxihRe8bU_h&41%X#gC-vrRH%-{yA^A>mJ&<a=c_^6)*XQ1gXF1w!WO<I
z$V9B=mk;6IBIXye6g58Z@UUHhow&9YPA!t>Z^h?h-=H+S*gQ_ixJJPqfw@xJV8@?M
z9>cZPPlk!2Sy<8j3{NlmfJqxPSc1^mNc+zO^-rb2h0mMB*8)_TX4-G@Sm!0&lCEp8
zul5(T9F@bc_HX>pl?&ik#T+>6odSKvU(qwljK&AJ;_CyII9}*YWC^naW6~C9?Z3w#
z$akQCX|`-a$XxO0N5-tA83fPD4!oi{gkGt1;q}?-%r&G2Moo~XYUh`{_ht!PP<sr1
z6mrBo>bWcU6X#V_Vc;<@I8m32DT6dgQ7Q-Y95zGXbR{~RwT)!{X^14`GN3s`4ewet
zaJt{zv8#0m+gQIF)z4)^^MF(`w0y@03w_0wDO>rIA<u;U#Bba@ZUon~#RF^14{^^Y
zdf=sR;oN?+Y-q7hfviXeJTibI=LLy4ZCwry`}i5Hb!EtBjw&swZ{gQGSc4wtjVMdx
z2xD6%Xf@f9PO3AVp8E`xvIS0e$Z~kdk0A$<3eAsG;*5r7fl=odD6y6!H(5J2Zl|zr
zN=L%cW$tu#(FpQsXcA9al`Zm~aTn#C6``|uI(R+eVREi86PaMnem&R%Rf5N7vBoLr
z%8{k|v}z$oybaDAI)lfZ)A%=+4Kdq$2Hh68zvHXEVN`4#+*$cstd*h03ik$6;@;_~
zxxJpZZi>Mbmh*`znUb1q1~0o=9eVnQ(4<HOd}EObUUSO0sB;?hw!|7MmI`dTo-4Tj
z<{NAmx-ZGaW5ICqC9L;Z0+sWQVdds@eDf`c#w^R=2i#wQO*#8v!O{hIvu=^F=a3UR
zt~s#%-zo6lV@nI3T^5hIu1{OV4$QXOALqXhfbmZ4_>nVbF%}MVL~$Oa2sx-*(=8zN
z>|gZyDLfy8Wyna&5*3Bt*>rjjmwunXfz|m`d}Eq;@NqX<aY+^@gD#bc4#Uc6PI%I*
z5(k`!=el~0p<=TvxA}lA=TkC;*5rQ&t?qwV5w{ERXbH2id<AK|Ep`ie$Fm7R6fyb)
zC~9PI7q3l(<SuoRXe6#j^o*06y@AA<!}!W8J`~cso_0jv=L=rXM!OwpE?HrTcvUeO
zXPXRUBldTKO8!LV+dYGPHeG~=*0vOOUYart##4Q2iMT;h5!t8pG_<;dv&S1KvGFf&
zwEP)H4AT+(LqZ<^cRN0-m4NJzGPrTlBCc3$N>NuwvjKfSE~Q@ThNB1MUHUffp>HW_
zcveN2$6f2hnv+2^*tQ7YLJ|MeSd;y7%;4V1#d9C5Y+22eUDzPZ+II{Zjlb%o$n~Az
zywQ`Vn;WKLVcl}F{MCk*sRdYR`vT5;O2eIcfx8lzgmO>P#4ARr($d$X(Za+Gx?_f;
zwB<P5c&ClmHJE~XM{dN3DjqzCc<?m>|MJt~^Uezu2Qu9Z!Ig5+lDUg-fa(<qcJP)Z
zL`Kb|msb{1_L=#-r*tjYHm-u|c_C1+ubuPM97Sg~74pHC5k_2Vgkdi?@aKQ#V#w@N
z)YShjoaOA$YuRWzx_=(#KaIh;qrUL%?K>!WN*ZkOI>P6E@`Wp=J4ms07C7A;NR9T_
zF(^TanR{**c*~*WtvEu!#;4-b%s}AI#<IP)qhSA^dr+b{h_aGu_;gJ@xOz#Rt@YiH
zi;m935q>Aw#$FD!)tX>KQ3WqP=!fZxXJd$bF>P#?Wcg_(Twbys{dyb)`^>vx`|6R@
zXV;J4)kW~>fik=;&xPg<_1ufVIzH*2iAYsV$jv4i;knL7Q2u2veCpgptA$+W$%G};
zs+CGp?1O1wq9n?0FeM9((_kuf2y_z82@aD;;rnvNjdO3neOEhHn_@)`4b7;KdYs?d
zPY{`R7+X&aV`5q1*-Y60T}7X`?1d}1<+)aDN~SLO{@Kraw!MdiJL4eyrxu9UXTjK=
z6WRQbPLX)FANzhth4Sj}f#*2E$JMPs$$fiCx||8jE<>h~vWo%aV|#!BrQcbDBf`3c
zJNzMR*Nb@W$ow%d_Q4qPyY5dx2HF&R(jVKD)3M*O4l6$zG2NgVF2}5hdwu^zRk+h)
zy7^96pI)tDqKV$L-(+ExwZA<DCaj~#_FycX5D2|7o!CJc)Gs0ECIt;*GCzU!yf_Zu
zVx~duds$B1*MsSw644X+4czpAXqFN^mI9Yv$ALu^*l;C`|4Ko4Zv0yQP@4h$>a@oR
ze?pl~R|e!CT@7Khn+EppVH(8;F@m1*p>|qqr<4+PTov9ou8D&4AfDS(BHXc@x1x+c
zTlwR=c`RJC7L+84`Du&wVbkGS+_A!u%DlHR<ESt2?c8x(P;j09{d73Jd^Q5a1=1`?
zX#vRP&8Nn0S=9Wb&y)mqchS1eDvikD%<RZMytvAl!p934?V15>*6W!tCu=vitL;7?
zu_lymov6Vn7uTV&TRF6;Po+`C&Eoo=>HO|Ver(7R8J79!A74}2!he_=O#ck>vA1li
z_@F^B9a0@hyTs{mG<+M~%v&zDcQ{^EVVK4=_DyHI+P1(m2^rKFcM^A0t|XtF9FhKt
zZ162SEB0(}MT@oLap>JFQSnC+%RDb+Y*W&}{mnUkrIH_WtI3BINqceR0XKGdYo#!=
z(&KWb#k21I0Jg06AU{qzR@{AA$Ta0!b5fCc&~1MJ3;ISvdQ&v5_FsdC-X7yydk2!C
z{Z!Hvy{tMh^$6Y!SK?Ef<Js6<+u3$yX&hJ(0}(zpw4iS;tH!N3_Qqk(_PVX$L)gLo
zOwUIZkIB5`HeKpld=G*r?8L;a!}%xPW5MfF4A|xEqF;aHFkrF^dpvyzoyHCL-C!?z
z$%XM@Ul*{U?K<p-RUvPe@`QKKeaegTjls%C=yTWZB+(ctc3de6e>Uu6dF9IN%aLcG
z-yF_M>E&Qfk`k>}PzG`8S<v7msU$j^W^8(lSB+#q{8`9y+`9n(9-6W{!;{&NvR!Da
zDR4_a*2A&Swm9_c56Jv95b{<W#DIiEmoZKE(9&ZR-oN{S+vkx^-gC;Z-#kI=r11s^
z1jun7(n2QG%${4^u?$LgdEkxFo`5oiG&LrcTQ)ldIg`QUS>`|~m$h)mj6b}{>m<Z=
zR#c68?0~1)L>fEe4Vr$NMKdQ}<qJp5VBw#D)t@-Yl?@G{VCReA>M<Cf^q+y*j>;^4
z;aFC^t{TIBxe;e;Ez&hRfkz{h*#7?>!cjv<@+so5qel+zhj#GQi*@<e@8y_Gsxl-e
z#6XRoBrD7^2QN!&s?OD6*Irw*+Q-v~*6OmNCIgBxjbfKRpP<&T`FL^E8v3a(tfOmv
zVfch6kQ08LJ3qS%Qi~r#$M8n}-`)aHua{)DI$5IF<MPb@wF|_~xrz$2s_}d1NM5WB
z{Ppc?NOk)P@;>|yEwx+u>L5k>;?u`3IyDJp^1N92@*i+#*m2l2DTE!SHax5EOFDk1
zxcxE$C)&kDm=zD8!C3+yQOJsa%C<w<@z%6h@4d^KYu`}!$7kNs^P`XhvLGkFTNuu@
zWAg@WCcCW<f)$<6W@RJin7ERCA63bXn%#{x0-sA}vJ!O;yo5W~nv*e%#rHk3bbgc<
z<eaXfk1Hd2z0@=z*ZmPMRE-mjZM(^-WOm}Qz5v{_!H1a-k!8ml(n0BaC5%hUgEf{q
zFzw_BdNV(Q<fIB<2lqu_2=?=dt&7;+rvD&dvLSQn{)HnoU1;*tb#S*K6BC*}LHT+<
z$jYnWgB|-|{Jc$<kKK9#?IUYovHfqz`dy7NJ4W*tRfsdG*hzMtp_p597stafu~m^Q
zTcT71m(*U0jS4;3KAi)+^35k0EIe0dH`@WMT`!*a(}G=ZS&14OZJCCK6|#{vY*FuJ
z{%HDYW;)4%iT|d-!yoQAL#v&$e6t3v*P3wUmkhAZZ3H|_`o}#_60#CIAHl6*#&o<m
z7c<9&2tk7{@U>0`hF^V%f9|e_Rdv>E@0tl{e^8zI&19sp{uNx;eh4b@9OrYO9m6Cv
zsPMHjE891NQyexFoF>l($HE?vow^4@*ErG<i-}YfGMB`#844^uK(f<ku-Z|Mw-+g~
z{t0X8OLP>>)Yf3n?(D;w$~ojysRfma9{khtn_xfbA-7JU7gPkkz|+7KtXuF3Ms=J4
zgCX<ae(-)y<C_KM`VWM4Tb6O%AqD)28Kd!$<N<i^H-pWwGhj#KCXv*Xr(%ikpHctn
zG%j$~PG)kvj}Msr2+mIPhn^qzxP<A}yz$C;pry7Jhg&EyqY_^VarI*@6Xe)b``vtI
z(p6E8nh`qanZY{05>nmfNI%j{X}qQbXWf64yR@d8cM`flarIsJ!}k_0u4)C_xpFA3
zUg&)0QK@K!ObLu!UJUsG^*kSJKoz-~&|K98UJfl>)!C)+E4KrNEmOubhjMU1ybG+D
zuf%q47{X|t6MZvMp`(dQgt>VUJoq<)(z9OS{#J&vx1NcX8cwE>qqL~=st@XR7jsjJ
zo4APJ7tVv`*pSOo1Bm)Q5|@sSg8?_I_?mD@*t30`sN~!~kwTaj%YI>xsg>!tXhboN
z?CRj+x<!<)GaXa}PNn!;HH_7iU_UN7V9<|uoZQrY{>IS(<Y9js^V_26+4+e=KfeH@
zj-=zi(!uOVx)fCHH9@Och2s0ar1>}X2l2wINBpNFN=!b!3PfpPEV*PKsaYnYq?4==
z3b4e8-%>a%{2h9TblEo1H&pr~ct$efAYMKNlR6Esd$BW}HNGSA%8KQ3Ht^`<EAYQA
zFCrV&Ii!|85gxX>KxTOaYNo0Ro<(VDTcOO_Z)Wf|=XGG>w_O;2!3K_-l!0>lKYq-~
zRFU<TU}&5*nNOUm2PrAHxbXeC=$bN+fwLjI$vC)oVHl~Z3(h*}GuYsLTogOSoVFzl
zAno*g@Tt~<!#O>$$Tb(;h1syg32z!@9tJ71IX*MJitU?M%sGx+j3Gwtg7h~7D`!ll
z98jT&Quc7LFb+R0S|oaX{SbAXb!1^D5H92?(fKV6yk4>mxOblxwA6d({UI~X;nged
z!V?u%Ufay4J;>q%)mE^%mL9_T{TttmcOk#vKzw|D5Zk@63ciOO!q0yc>F%;8;Ar=q
zcPJYOCdn_V4tUD5-d8?UXKKf~d&~GxH8GCzZUT=@8}M{qns5elAO~Sy*SSs?Ysa3!
z@Xv#I=U5}=;$Y8e1olAY6(#C>VaSelETL;k94PHoqqG~P+&jO6P}2AjW&gW}$ByaZ
zQ(h8wUfD-cb?US;>p9wIlwr7mI@K_qR|+b|MsFYH#j^O*4~~luKYRpZ@04DiqZ7%z
zFEoigFCM{`jWaO)6r-BQhakFg0{zMQ0-@J5*^c+SXn`ijq;Cuo_%$u)8M%#uM~;IF
z0l_GxbesEZ*CRHn`~bf9N^tp9Q<}t|f%L~3EZfQyIKP`1@nM<Zy3NHg#~Z|_WI4)e
zjK`dMMhO#IMG@&ysPKI~j?B|xFLEvU3yDrlb9XLu*zCi&*e%SqEsL3cIELFsCIKfU
zVyg<T!r<|O+v%nuy*T@Vn^<}QKL)yC+c8EaibCJLsThKf?Et;m3`Eb{INN#t%pkLX
z?>|+}DNm^q|IHae6CoTnr(NM@ww3c^cD@lWS9gc>bG!NZ^>XY~k{JKG%Cl?yXR)uJ
zg^<hKLAMfx?6pQAMNIw&%IgZS$tOXi@@5=1w)Mae=Wvp0Nr0V;Rq@)}i{Nzk3T~-Q
zVtu1+nWc#l-U<8)ZLk<-UmfNmv)T>XE;_RrrOqUMPT*90YZgDhybN?bAK{z7r?B7D
zlXKfUOuRTb8I5izv*qJzz<6LC?|w=T@PjeLuF@vU&7R_VhvC!_^qv12p)J<R+loa8
zo4D_P2a*45LzY<@jn-HDVMEUnx@}?rJ$dGA>DD0rPUsn!W2nlCW@u9B+~K&#bUr&e
zrx~Bm{SK#x#?gOUPe5c+Vby?mZP;_UkXEOD7O%UcO_6DLxY6bNarMwR{&=-4b^WY>
zzoqf;dS@-HJ9iv{))<qndoo=QSd9}UEwQ+0BR+f42~Ep_&~oy7H2azWQPD|Y@70Dr
zYxlqcH7kfYZ^?GQJAeVxx_SQWNHWOKtSbBPNZi;nns@jxf)rmUlGMmg{7Q>KtSdv8
z`|)%KcT2HHyyUev`Ut(|CqpdohWRzvzODho!`s39>2w@(_bsUAm*RilKk!jMjad6p
zPnRI8`*7@yJq`Nu4(mp6tSIjk?9BD%X0P7MhRpT{?<<`+{n2skTK||Y(dE(lhAO>(
zBLiPYO@nEhJ)mYkaj%uC;ELKg&T>%@pPp2P1161w?9FD36gZms%AXQ9#N+L4JNWJz
z6K-GXdB{$+V|87QP&EB6FMK|>YYTkRg|c+rz)G0M9-#O#F<<+FQO+Ps?0m0}YtBkS
zYe+sPvpksT8;7woEkfQPd<E$p23#7n4d#tX=g;3zgtM2QLt^6<Xpl`p@w$~_2k$=2
z;+HU|j8cB1g(<yQCylNPJ@N57CFgO9X6(1bew<S{n%N~dFnO^Lo$$GfZmN^GYp)!^
z4}xi^ibd6;uXFhe-+gGTnKFHHvg5LA*VB+F!Q~>%K5J^UQ8eC_UCoxDxi1H^xCsZL
zIzNYBQf!Rb?Q7sp?*ew`_Asg%|BC;zSsCWo<-%`OZ5FAQjEad*x#~ZPEU8S1?nI~4
z(c;s%u~-v458AL2b9?sLJrl=zwP4C{NABJ_M|?kMp^!0j!jCt$atrMa!h;gAs7vV>
zMxR`Qla2FepY~C{^0zBnTk@Hczi<!rAIVdx|2rI9x`s2{9!NK$CDC%Z@V=70&41jl
zM<=}mmgM`xc=(_(dFdL%n5FB1+v<s@>W*Up4B;BA<*CjsRYdom;FopgG%chROYISB
zj*Wn*__YwVUYhnM#Zpy~DcYBtki_mOP;+Mhs~>j)`VH*ZQ2Xa%oSBc~_SnGAnk4A<
z>g7(yjU`PdD@gh>4aGMa@TgQAc+Z@SExw<@ZHfw|4;90n^~XV>s*`s${mxD4*^MVS
z3FbaZhhHgM2aOh0=vDS!`2R{Q{+lGq?0f||qzZd)tig{R=h3(IEFUt&1H<Zo&h$jU
zLyu^emob5K|7i-QY>;P;hxT)?LX%;aaWC94YQ>!ePa!NIf(!a1bYB!h>GDEPHq+mk
z{Hl*}U%ven&Y`pT#zPe_VT&dEe7phk!Y#l*loxAU6?*+P?x3#b%#tyQ;gD1OKraO<
z5<LX_GYa%tr4Clb>EMu~&76xt9}WstWm^KZ*b!F1`<w3Ke|6|$W>Oq>ESXP|j$`Pz
ziZSUN_NHTVV##H#8@|0~%?~&x<gH%H($d-x=A&^KKdkhntQt4&%b!nhq--T5r7Z!8
zeYZHiP7PC@d$2>5_xKBMWBA+WJ_x>K!CSoIIP}y%#w*3wq4|^;RFVoIaYH7gjJeCr
zzW!eDo)4n$#whfeP56%k3`pTqB0iBH!d>6MF&}jq__tS*-IY_PE!N>E-@g=Rg)YMc
z8D~D_YZxv6+ke@-T$XOnR-hXTf52-4f%o~qpI|^5d~(a6fz!k3<KDwqp1X>hx=6?m
zD*M73hyA$H|1fsWoP_iCc=9-GfN&nOVMd2%vjxL?`Hl5aRioZ0(y{Cq-tfXW&SU=y
z^m^|`9$!vl*aLO?l)9dRdpDEb7#H*|a|Ou{H({`UGKSd}!l0kVv{1;l{5FbV>NgCj
z{e(J=Y1U@<-uB|y+YV%LTm?UmJ4B(E4H+n^vKiNZ;`K3|n6prpmCmilpW!lWL|z`*
z{*r?p=Xs1Qhmqgv!Jry4mB~JEX2C6YxEqB*sBL3FDt(v4tCy*hteYE*UA!K4`P}1<
zm&SlryA+gWdseN>e#MnvXn}?w2DEfw0n`*u<0q_)hu}N@pc@wqcKdWdWzArb#u^0{
zJ)}T1bZIJt&K=Jd7+u3p(I@c1a8H_DG@1KsE-*yLzZcbJyy4>>Zo@rx!>DQeXlze>
z%^#9dWYd%y!O(LutXN@4i>uxFJ3?-#$;+N%s&b(=RSv%&90Q{iJn`-qMb6DLkGpwx
z7|Z?s4;nQN!JMaM&|V-#ALK*H%feRJ7jTp+bmm9;`g0ourk<O=lK9pyfW~VEuzcAF
ziW)MKw;k>-u!(>3Y1#e|v_qYLIz)#K_kQHI|4x7<!g9Ofssz<L?gozlP5Am)5;N8f
z=DFUxe6-djA<w#!8GM_By7u!#b@ey##(*%WxpjhfQrDr4t#!cGd!nh$G_;joDe9ef
zA2;6`ivefP;jQsve$dcjnCAEqn<5|b?xXW)@~h{t<3}bq;6z+(s{uQ{DzlA!9?+eA
z3~p{nC7)g?+$VHq{kn#tOi2wVJ?<tSEp5OqPrA)pZeAt2GAW+#2>AkG<u~wgnjYI8
zeHkXtS7z>_jY77p0{1<PB!e%1c?I`sJa)GjP7jz)lP<(_RSQgLOKK0i9M^_pctvLO
z{ueHr8Ur%ITwwIHc<Aa(gQl)5e&3O&xF=!-&UR7Zaxd6pQ21OtKggHKI?chw+R<2K
zB+vHLEaYP7D)h}hz*pWKOO4l!K-$#;ydJ8tf2$W_Q|}(GZa@TRTixXL+Md86`nz$%
zhZ}svW=RUo6FMfQ^58G*b0p^Yu=(TNSx$#Jchjx~ITt^MR!yjM!T56Am3&Y%*hYMj
zD@D#!V(XU~a;v_ofwoi=nAKIgXy%6Cl#TTec~%2gjjrPUTkr+)t+b*4oi?+(EW=hF
z$b>Z60^HLw79Oe}7ks|8FyFimf~IJ}soQ?6VpAUE4hI%H<{hNhH{-Sw^D%eQ1PI{?
zimC>)gmGmMBXF$J`~<$~h7=t9Uk5C57g64IHCFY(nB|NwgTTnaw9aD~u3V^0B^pbG
zyu)8^;EPgdxhlh&Vn$=%$ng|D<sz=`a$shg%t&jb&=IpuWV0qb#jIbRxMR^P%=A=*
z2Q{C#g17b9g2S0f@GCriZ3Oc!yTDBoOToCW^YOTwCg*c>KE%FA#!ZV7I7O>eaq~$t
zc3N!$<*a(nC#&A)jonnar3L=1_}fG1x0hsxw7%lmOH%Z1=}6cndja1i+{DWI5#aSk
znzR~pIF~<+W&32%rSg?%k*dgg6EiS5$dtc3&4vC`JIXK4{K|dy_T}Dwn@L#<H{jKn
zulVocMVw%^onjr{^Ia9Dlo$I1Rpqm}hxeUuUS>MX8gLDw^>`SbLC6XYLzd-hNRUY6
zKMAvpdlT1S&=nQ9d(Ipj#}@Kt9~`-5&%fcjG&wq^l>$DXMr#*MVMXo@VER&rC8WK=
z@{gsEBY5K`#teW@N2akq&<U2$=2OJ?W>J3q3?>nB4@cx=!@|$*-258?qtVlts_fe^
zRG1x!zVyMLib7aEP=X3f&){o!JBl32V{XM0yeWDKJL_}6?DJvBkzFjBdUYn=|MC+2
zq=!<`kzm&QxdL?rM$x@I4Ki&1h(W_rs|GhG;ZEL?t!s{jz{(;BQj`?j8pj~LIs>Ns
zm%(`*`i0+Xb-9kKg|OX6pFNOV$p#wL@n$E=xSF>&fq$IIt#Vw2OO8FlAlLEYBK<em
zF=8N-z9o;o#yj~VFOI<bkrz?vxGuB4Zp`ip9H6?#A=LYD8=YR^O{q=8VaY=g4V7-d
zw2@s%R)f)T^#wS)WIOlnh$9zJ`wGIZ4#GF%iqKi>7Yxg9K--(;+`m~%q37u)sL?He
z-iuW{J$50R*%5T(RvQ=2C&GjTJ65Bm&+3OK!=8t2`~#~@xHwvY)WVV=XX0f{@;?oG
ztk%=8)r0WBi4@eSsK<j-c(l^?W)De<3dib`>_vIvcb~#*nR&d4kp1ny(Jubji_j96
zjP8GjQ|-AZ&=T(Ihd*`T_5aqPjm}4y_*$M`4V*>uLN|l+;7e#-`=si?KtE1qiZUMA
zWJ!ZY4}<BGtzf~9EEudghW1oxlC`k6*?Q3&pUI!$AD<aRm(uD{P3N278_MQ}8m8l<
zyN}?Y+(-O9+@5tA&A>zIr7&-*88a6c<v*=F*rSZqa5XR)4=6NpSrjX*DMQ$y2}jvz
zk&x$+7zBfApTeV`CD`0vEUL9zEV>(z%2}T=V+%ztRI0Fsrc4<|b&9^IZ5l?IY$Zw6
zw!)VohAhEXU>p1~z}NS~=xFp+-r@3LoL;vN7hS0k&x(-5$oS79`Kjh$mUbD2>iltz
zGu?#`_rAp$SMpKf=Sm9kY=X7hyU}fS4DS*95-x0+h4z(ltd4xit-J{nZ~jEJcp-mu
zsSlP3T>2~Lg=g7Bg{?M>hp`*}ak09B3#gKjp79WzIPx6)CmzR{EwtgIJ7%KR=mPkY
zc15_aoy|wzD!`D7yHLhPkG?<7qt$WcXgbl3{_csugXQXEwQUmaADu*fZ39S7;}ni*
zZ|7YLh3}+h5pyhA18Nsd=-Oo!h!s$#Z@&or$Dkpw)+LCFmY;^h))}l)y$O$ZXwi(I
zcjDhuEnwz}uke0sKB@Y0tg>039{87_fGJ@!PDMlc7-zDVuq4->OL$vx8NSSrqQeKf
z&}~vUyjZmpH%>jw87#^s#e)Osq|HA5UD_Uelp4a?)||q~#^HSUW@pi+RvF;_0&nW_
zn|t{CHheu{iubH)VcD=;Cbh^HrS|END>sTBx;;gI^8w%)kPe~y6`-hC9;{2ou;1)F
zv<i6<&5)sNv(H-y3hAp-@KVC9cXq>>a~-_@DiiMD8yU*^U_)L(S^ULv4<VNsk45Qc
zG1E<sMXz(C%;#yeRx|+H<ecDnhZh>!IZ`uoAnwN!?3-qevCh}IncMVf^!q$KFnu_$
z=jD%yevA11XAg<DO4f6st8K`~Z5|oz)JJ)Vd(g2;jGyJIq5Odo3_F72MMm@S)+;?G
zqhiOh(qEua*D2ul#){hBe}%7Crm{nVi)Y_#JMvTP1>L8wVcCnDU|lT_3LRPe#J(Rc
zUI{WZ{D3yOoD89%OLx*@;hhvTq>uX@SdI|`%&6hqYVN|-3>0s)CYfyu*p`lb{QX%4
zN}rjbT6#Q$)~HZ^cqls=m<vsUA8^X8H~es+V-xIGEqDUV=tX%G7y8YRr3?8f*^;j)
zpdx6))Cug_53zX2D@*=|%Rc(tafyH4F`epOb>fGI58%v<L=pG-627@Pm#x{Lf^!C)
zhW4^kI6H0)FP(A|2bc@zl9(LS{-n%m=Ey^P*Ii-WnS-!LV4*v|#Hi$_e0L*Z-z6bW
znZ1(?UQDE1t9$5^afpxkxSO&LAK-VJt%Si(^|5A7D;LI}#&O3OKf`+%9ltCIeQDz{
zX4QONxY*!Q9Xz<>6p7I*?qO|*4L-=x#_QuJ;GJ8S&@WM&J=;HlR>$sOD{BN+-8DZL
zF=8*Y)XMUkhk9}Oo<i=;%oB#TG{MWVSn*wSWo@1-=!dE!)7Q4a$c4q2=~#p@hrHNC
zDOF0(jmFlFef-m`PcUxqYYb}i<J|9d@a3N#A{Yrgk^lY&UuPbb(;xl&=8{HAnvH2t
zDT;c|{*)qRC_~>2MIuo$lpzwD2a!l+YC?vP6rtzr9VHS;(ufR+Oi9KN_x$c(cip?z
z{lC@fX`Sbs^V$3Te!X<y=GFy#nMXJt-^lY(=DxI3HxnwNbZDWbB(r?-2u9p|CK^~=
zh&G8Qp~K5p_{@<IP8O^ThOnprQ`U3rF*d2c#H3wHP@opd8=7xpQX2%G#HWpzBjjUW
zOFf5zr98fAD;B*=6Mp|>HHr)?0hNH~m@#%Re$M)Z`6fTO@Le5vu2g}R9kvktSM!^{
zrRdDry^`f^pP0a$hP(Ks_zc{xd;(wX7C}<5B(?3B%5Fb0gV6Pg^f2x&q~vEqTdxjk
z`x(+)IZ4`n;UxE7m_O=&=s-W~aS%9Elgb~vvTs*~J#)VmYw0&*yQUVRky$zN?N9NG
z_b%ujKZP}!hC=R7VGcCFo-(E#;wB2`(S2Km{N}_~NUs=<;rf%oJo_K7ER)1VzcFRH
zTPlH>6rf^IlX%t>XNpvo2lYz_L=wLP$;DU}H+~;Xng8OUSyl&p6EZQYsSs+Gz5@AQ
z+iA4-BJynMK@G!F`Ye*<LIZ8tDTh4R`|c+id}i3&(~mzkCGlFyD*PDtP4uojlqRk4
zr-Pk?MXJq7V105n-K}<Eq8le*?V1;uuU^4r)a0`#M`Z<f??K+$=eqcI)C92UvSI<}
zzQXL4Ke=r}N1drC;#1dB40cH1J_nCu56sWwbfXTt7dc-1l0r`|E#ND<|FL3z>yttE
z^cgG|vfDl=XdQB0LSE?eY$ntKXj<BL{@wM_;tDfDJ%u*dA904O*cptrohJA`YNx%~
zWplDA8%PJU=TP?;SpiI=%uf0YrP@^{7=K=d4eqnSyzC0GM`96<>T#o^L20~9u@!G5
z<Q^P<KjUlGCGj(_#fcyO>cfA7hO>{~gpSPzS<+nRk6-rgfkWYnbnM0t_^4(;S)YCQ
z&6h<WpK=)Ogy*!CeZ;k?6P}=qQ#=U6eZH?Dg^!ef$bZ?8!zEleK!!nc;85)~jFx`D
zXU{nTZ)!%tUad^-yPcfCPYnZwuS?1BS_5RAw?iGbW029C184Y`@NiTHANO<v-B~Gx
zn_HIg-2%s_aLyue$7%^OZj$EZCOhH6#{yeh@WaYaQe$_{tmdDeDdfL?9Ka0Je?qI!
zL9_3_4+REIFst%~XzsNEeBh~*;<2}YY5WeNs?cKS?EDJrgtK^?wkeru9LIGF`Y^lO
zlizrg;P`)Mpq6B5n2{^%o@hWfqfe8|5;Kw+d>Xq?%TfNwF0O2OGgx!6l<}Ykhv{mN
z+WItDKj{ZF?>`2Xc$vGOW5PYG8jTkmA3^*sIS9vWe*N(kyrXrLko&S=KUTzvD;+{H
z{_9v+cuAjKNwcFLAzL}K7nAAYO?&ab5zFY<t?l$V?K)~FcwndQ1OBa<oRD41;@yMK
ziIzN?&bDM1L3q)3Y|%BLyxHModNhWl5+A@?yPsU7j3#h6m@Ph_&aDm`z;-zo@?l+`
zDBpJ;2j-W<L}zEJsB?mdU`4VnwiY<QdieQWI(*#Ijv6mKpf<>lU%Ck3{!C+Rtk{bo
z*Vc3MVwA)i`mXUU`;#em+I={4wG3ZLJ%JB_pYVd^VyO6_!}6zDz@pVBaATX0HClQU
zt!BJ{2P16Y_?Si<D_x26qnzlDt16WwkD#Ee0OoV>0fbK;D$LZciYi)VV9VA{$opbV
zEh%-}OVi7I^?NZ^mLBAG-IF56yh3r;;z-oI4HS|23??gF$2*O|wAelt$@m%`Fg}ax
z_Uh1&Q31T|?{T7xF$v;57c?QD*@7M<x-w^JCD!2Aj_De?xJ+jl^}ifJ(=76E&zW*A
zar_d#<HK{<@gWsc7q;?;m#fp-h<i9gm_>@DjVaIR8#alOP<~(@moRq^zqx-RO}_OK
zWaD*6x>bugo>QeswTN@qrenzXon&w$ob9&C=ACZOho_b<7*NpyYhDR?|7*SQ+DRK;
zTvnx>NB^T~*Ub3ulk71p(F<=ctirrp1OBDb2Kuuj7em&hz#aQb{H>qu+=E^n=offu
zd#c8gm!;rBT<gFtMMgvPg|FOcA?xa9Bsg9S9H=I-PRLKUK$!L!TrppVX0*z%S&t={
zw)JIPS-FtL%-jhVm<3ET`^z6#v6jxqE0Su)DbCg2ncGq74EiT`&=K($KD}ulSo(cO
z=fywJ*mMFJ#_a;LZAKL9aFq8rQiM{~MR>U{2~RcL7RS!Cq#crP@!Z&9Q2c8XKK(Gj
zzR5t9%GOBoxeB9LRJSaqM&#nnB3sdb0X?|u@EkV%PavosnvZ{+-B_QHdwvjnm$vlp
zhmSS+SUr6(Ys)QWV-1I~NzT1|%8&o}i!KG28rsOa$Nk|Shb`eP7D`b`+FVkev68bY
z+s3|KI}Bdk-J*ZZ`RM8U1Qj}bpmq9WR`OYzA33ZCq&sX_`pLWKs(Vh9GVedS|NRS&
zFL#8KLzN-qfde-`_LWF`domQUpZo+PSMhhhNuU#wh1cXh@yj=xGvA4iLHE-t>Uvy_
zW<3`naK?D@lG{WNh8fXAo5$$mnk+8yk)`Ti7W`rI0o}=upk%lzJ$Sc_W*?6gZT0&B
zbyME*aiOlfqwgSUJgm*`Hy7b=uO@8mHDr5_=Rw4<WH=mDhLOH8SiJEROjk?CYca%S
z6wPDbM#-S-v2y;OqBIpwILJ+N`-*SUN70erYH%Us9(j0a^9!1-X`s9<wGIgYr}Z*a
z*Eon$Q_kVHiMuF%{XEn%iG#348S)*I2+u-XX{GK^FwN@2HvhTMQ!yRgoMQ0s@@25X
zSq@i^pF>j~Ob5NQvq5`n9=EB`lG$E)4?dyB<eV{(ty8lG#Ur^`%8g}u$s&5*QX#gS
zKY|6VRAEt@ZoxkvbGCZHYk`3=oQA8npvi9=2o?Q>ud@=_A?JI%EB6o<r;HN4ye^Fm
zyFbIKexU;|rw;$h6<JAhG|rjO1Z`Swxb4za{OUNA-E^JJOYewBx!%`U+j0e0Zd(hN
z=WAeQfFye+{0|Dbc<xr(JiPlwcqiBU!)@3Tf&q_4P)}MU^YcY`84`+L9>#JX;$0}`
zpB`9oNz8i2H}Ff*!KtC)T+G`Mn4v!(6dVt7n>%KKwy_3=L?%PvgduG9lgapB!*e_u
z=7Ci|1h4kf(JWK{1-Rs&5Lem{=j%6o0n=Z{xqoTNm?e7}UeC;iFwIHg@^S;xnHB?A
zH(QbBK6%EcFJRNewSw>44@>%UV9J$KSTn8?Zr0~gcXkIiYJLyYE!Km?-XEB?M&LOd
zP=KPTv$@LVADH<)h!6d*2G&<(!Qv_ttVsLAMcD+S)n{R^o7Tu*UDgHqN917zW>WWR
zGg4oDg&k^`gW=gTP{G~=elC$=hIc+=v4jB&4^w29y<O;$-6PI5wGbV1oLR<q88&SF
z5nf!gj@)jB;3c(M{N_1~N@NC5Pl27_S~e%2;Mv><%cbyQ;V^o2V2QXl)(>uH=JCfD
z3})e$E&RR_UQFw^JZS~2#UXRF$yi{2+df*w_$L<PKl5kfQ_H{br+YXXyJ$J?IR670
zuSt;2x;hB^av#mb$&iw<8tO(J;Kt3lfWstI*}oJAX!0!JM{@D>OU8xC34Pwf)(FvZ
z2K3oDlFOeF43|R;D0h?^xA^@~mQyY%&WM;r9xI*!Z!1r+8-4iV7m2vJcQdz2;9o=~
z595|jDd1lxMsr>1hV0i-fjujv<#Zm&vcor}L}Pu|)7uI|nD^nVcynDe?mP7s=LLM`
zz6M*+>)UDkwefHH3xUlz^3Y(~yZH)SOUw{<=z7d}f*l3ArBXqgGJipEwYcp5i;Xwd
zz>sUt@U^NhrEc%SH<Rl*yNmnKv_r^;w9ex{2gmZ)ZNK2uWwxwt&pfz2ya(=ns7B?{
zV`<QzU05B^0$VI4xla~5G1W95o@q^kMB@~kIB^MlSu~3_nV$uv*AA#v8cEB|y5Lft
zA?3~8K#Gd}*tEVJAD8VQr%Xp$D3QpW-hB+eWV_L=tv|S(ohNZ;_9H$aai!n~U5`IP
zBe3amAKs2sq^jg=+*?^S`Xd`nQUYkPCqEgd9^D3x?n$CYRon5?vm^YRh(8!iRy2A=
z9`%+CVr%8b)8`d(0xM_<&77mlF4<fHSGzfslAgndOIPt<4%I+H>jY4E;=~je)bnlz
zJPautPJb60Qc=`vZdaoQj!OB8W1cpGjHD~}t4YwqPlKs>*-1W6elP4^pvdsjW{_OZ
zL9j<4JS+VQYv+v<L_8|2KR^oS?`wmu4IA;=j$nE`OP?Oe-^H|Nd$IIK23Paj4J`|@
zdGFLS7~j^5BZRJ_!aG-1vO$eB=gNvgf9$3DYlpDY%o`k}N72n!V<{~2EIJJI!ZFvp
zgq)!n$(`89mhA6`I1??b8#{?!-1~&P8tb?P8P`#iB1?Un2hsamXZSJRGenNV4cLvR
zqiJ!v6g6a?6Lwi+AbgJjpINe5bZN~lU?0wd_Tr_mS??i^2vO#{4mD$R&QEaOH66W`
zCy;MoD(V?r1*=Y3c6O!*+uqlOCyP#lm5#u)+1`w%a}L=3J3W{hm9=nce+|Nbx3DEd
z;AN>T5U-!;0;@vzqQAWbtl3x2t4(^r8EHPm_u2MvMAsYQ)7s!_<WWv`WehG|lL*F(
zWLS=MJBS{b2ptCvHg)lBl=R3)okU}*G2V^~bCbbS*OgW+`~y=Gb=c$lCHykoPVDhw
zG~7^8^n8vAtM0AAds`jg^^;ZX<sZAUif|*U`}LH2(K#J=oDmBSb4O}eU_x_OOJa4?
z4DkQ9k1}$U887Tfr$iP*M1=|$D!RzkKg$G-Y!~*ra|VW_T@f|x)gn8IrDXqgHJ7n}
z5%i8-N`V*JpdhIRH>#B4ydn9}I;NlRyLJ$KzPCZZ=@uMlHJJIPTmiKOE3sgy;ODcl
zW91=UZ~|}P=iINN(~j|QbMhM6Qy9w%I2*{RwZLw4!5M?3A#11yY2WD(y&bqw*wapd
zXQM`N8#k_E6+J@6?%8r~P)syVOdiE@YCm(O<%-<=VKdkRi5mWOuLXQG^q|IJ%1}Nw
z2kS)&jGZ4u2CZ9otAy)Zj<>LDB7m6xKER<ifulWiJZN)nkePoRJv|puT}Ln1b8s9j
z{Hj4;s}Dd(a}#%>h{um+YP2)NO6<Mm3SX}0BU&8x4xUa;f`~UiczVB9G&fF{{qD5p
z<Ogyz^kpbMYSyH4JvaH0bw8l|&?|Uk&;f^6$kVes+i_%Qfmm{$9OkUMi9gN_WzW72
zgjp@>l%M|rCF>t>OJ2^S);pczsdl5;mgITNVW$q6Yn!s^>$Ax|a0I2zoJM{J5_qkp
z6R`ilXplHofkRvZ$o}pE9Nqs3e+ths(fc_1x4%TlqC5rh#72~?-G^<HG$?50O2M0T
z0Y{p8(-Ko@kQ&zkjtX~h_@hDyxiB7XZiwejPF+jgjfL>o=^^ws*1`kxVRX2#PrO#S
z2S=Aa2mMJ`IAF=};*Srl*|QTI4DX6}O534Tun!tK082>b;lz*$tl^y&89ix$Lo&hq
z!@;}2V(=MW`rT;Sd*K9hy-9`osRyAp)RoLfPUef0?MX}U_^nzRLdgy>czJ#;KC=>d
z;9XK|LDdMZ=ITt^eFs?Oejk#&^aEdM`ci=HUHow;l2x5lL*La(Y{WoAj5w=?bIz%=
zoSXZ3!HI<nrVCwo84GHCvkRa7yAE|l%Jk!!z%42>VNrr_Sn<dV&|AEb&Sr(fjYeBg
zSL}oz*upta$i#Je198_m<c_cWDs&3!!Sho;7HaOLjpkRO=Y2MPR{a36kqy|La2|){
zI>TLAeagQa%DuA-W;>^BB(-K0TEjfKP%T&9Ju^X!j-SzahZ#&UJ51c)S$JS~K6Y+e
zExf15vqzJ{%92kxKy30|@Xs2}wBDK0jYLE2PkaU5Z_L@TlZP-mJsKXW4-n?UvXp#6
z=+${d^Nwd0<APoTGW<`(pVhgIW4$Md-p?3~HuhSu!1*eOguUtq0d!Nn&JTusDdH^e
zUlL_J&BDWeZy;uUHPqi#rGg7atYg$k{$|JyzI8~Z*r0hfYbp|UXqRL8k!scaqdZM&
zFHD2ipI>m=Yf-fR%?19#tvE>05_(YS(rkWaHfWX%WTOnK!Pe0am%p)Lf#ZanX-x$d
zt-JvR&w8MBLOZA9b^}9?4yN*_)!5pS2O|ZyN;~@^E?Jf;TKI1W8kIW;OqwQ``A3-!
zK6e$Fp0{R&^1Yn4MmF5k`h_X)nsJTZ6pFn1nLl?!jp9@5AY)D|XSS#ww*RMs6I(aI
z<!WbqwCEuWjB}t1i`sDF{z5!ELXLHe$%GoWOPDsmp0}&H1odN(kMfaXIlaW6G~5Mh
zo6YgI++cV*|33`-sUT`u`3koT`d}+5@X3m%39dQ)!{9VD1-0b#*y}@;ViPHQwq(l^
z-1pKDUJj22$23N@le!`G&N4JTbso<Md|&04m*8r$5)0-&aUH8h*xP(t$m+iOiZ;|v
zB9{Zwy!e<s>=(Lf|7xB>@xoV-z_oyrZX1p_JPbQbFNjw!Got)==A7b{YiKu68X9*m
zXYXRB;Zxr}<l-_>Rze+Hgn0`Xyus@~dqgX1h0Oe!V(!q3ZFI_6INRNh<~wzI;bZk=
znuS}iz34e^n&XIu&m35{)gzExJ6Z5{UPqI6uVBE`WsvpP2$jC>hD&C#U|ev8Hb!Ki
zhp8{KbPs|rTbpo$gA{uba)+z*<WTi%Ig~Axqr`uqnDEhvq#kQ=E(=vy`qDX6Dr7;{
zpEZXwwYT{hXHVkSTjgAc{B}(Al*emIyCA3D7KTTS2L&0yMI77=FZQ0pw&$j>{J1P-
z27JT~!!}bDuMbw$Avo=&wCL<TJ^H=*1t>SZ<2D(z@;Q|k;QNR<?Do@W{Cs^WL~ylm
z`luGCzIhp4sCtg2zFxF;=2+IbcpUE&se~KO%QMZ`Lhu)uh#A_a&~Jq^OJ*n0OJA5R
z9+RT$jw|uDaA8-<H3;XtTinxUefVPheAMjE;Tx<X@V)a2+|s&+W@?WFxoK-~=as#v
zWH1WTx2)#2drJ$w%oW_dSm34`R^X&7QcN{W9VNr+u~gV2&DAR8M%ibg!pl72olBaX
zKYJb)J|9Lan_@(Jx=o?#N-^A?UJQAslwgC94Y)n;8`vcWQ$uqF#-Ex*-v{;zU4mm!
ze#{Ji7R#aU2WQ+ujx1u6HJ;v~%EbGW$nd5)-Qt#`Q`bz9V#*6{T(&F=8yhcn7nl@r
zQLEsi?jpJ}^C<tes0kCBO=!&>X>v>ah8a;i=uSWm)aT8Cx6SJXR(Uh38V-Z<uw)z;
zzlQA3j3L9pvdm20g)V7HV3yi@_;~Xv?klZ8t)Le0RUX4W&(~uoU+u%Dr;FJ6_c4^3
z8N$12nF(CeYIKp(Cx=;yIOn7ci{GjYN=||=^yp3qT;m20M_z=jOVdR~9tr&4dx6w;
zM*>FXCfLgls^X_93Ei|0$I!gXgFTEx@OUf5KF@Zi<&}{%I#`Wb_X*C1)B8m64`i96
z^cfshd6)Y_d*I2rlbB{djaAE@M(;m4T$diBl1Gu4x#kz%i@c6Me8<wVkSQ?inKE8E
z-ic`^BgGQIf(K+~96CNNM(5a1P$2srls^(mtk;8%BWtPip(Gu#I*uLNTe-WR%K5sx
zLoj}6AoT`rqV|t>xZVa?{=gDToNK=m#@WlV;1%h(<kd1VSd&8gh6~)+uohTqI346?
z{lq`Rs`<BD<0#_mCEO%CkDZaKM^({QnA~qcng7Y5k*ESAO-!lw)DB4XRA6tm4J6xE
zAxA%J7FEWlU?cYzbMMZh==WEl%zgxo88elvU!8ytQl4PeWkbcM%y6Q05qwIxf{AO|
zp(!PTTQ)k4oAxS+AN0u<PW)K{%}3|br6qNEDlm%s(;Wu?#&pA!vu0fA&wZejG>~oF
zlaI0OJMnPBatz2^%1f<Fz^mH_Q{Q(HyZ3e;s$7((`?p?Vv7{=f={E6?BBHn?!$>k;
z?+)|VYOum@|KQ-$7ziw@!mi1~KzZ%}oGh&%cs<rrbFV$g`K8dYVe%|}n+&Oxuf$p0
zc<^ay#aBY7scLEv6u@RK^z}^Q?$mJahiWk0*@|}N!aKeH{Db`V<t8lscqSYz`T%JQ
zbm^_ePx$s{4y=w>qUF&;*&Odec&DaHVZ%jbFJ0nzmz{;!weg`4RV?MLPbg7{p{+>q
z;Tf0_m`|4as_etC0d#fo7^bpWa7#Me;XW@?Vvd_nau=Vf@^8d$a6U`$^%Ms}_lS3*
z?cNXZhW{jXZA~RO$f?q%C3>X4b{*<u-VkPAxv=WUZSYN3p-F856U*+oee#l6s{R~F
ze&h79FH>L)jTO2k*%xr>$CpsHd?VYX9F2qi?L(Ez6HsZd8uQt3045!q&V|j2gXJ62
zc#p|Xp#4ZZU7V(mpFTatTP^L7Mbhk`rY+?fwc>?Cr>RN5T;M-E=BJDs4mI@!_{v&}
zs^yMQqrEjMU-4!2T_X4z(IRjpwP54*GQ1`54f6J0z?df^_}wGS*?p_SxNNoHig<q$
z9m*%M%R{cg4Sf&xwrV+inrzN2SFhz@fi3woyP?-;ZB{nO8E=f4MQ*OrWHc~=RaxbM
zed;`xD&&D<PrIXek{AMhN-(>eZNw=zL6+HOde$3>^DFJxrD?BVwY4nNsmSr_k0-&K
zt*>Fiq9Xj$GX^bY9OUZD#8{{z$+^S`-tL2=sVi?Rjp^o4f4-2h#D4J8R-+*ES^Nu4
zFPPvGhOPn^;&xRWf5B)2<TeQz_&w36-|j#LgKXIpUBUPIYc!hiL3B_oK^mLVu`>QD
zKU3l#AE+^yW}do;@jYkYqDCpMdGiR=M#f^uD{D@5>JJQ5t%c$ZPSBs_L$^QJp=-oT
z{>r6T{?8-?_HPC7R|>FTK_y>x<^&wqJ`FoQDPOx8JR9A-hKhGojRL9m5zNm>fj+mK
zM7K>xY02<RzBBp}-s(u>tk<81`L8vxdvFlDd+Q{>Mdk+2u{KzxcO9~48lpjJBzH<n
z9S==6r`^uKLD%9qwtlgJ`Ips5-e0)aue}7Fmy7w;vpQkUu>%yBYA5iDbMaj8Thu#o
z2W}n~bH(RBp=PI$?W&Jp3p^9KF=MBqSJDu?*4mE0q&7i}@>Xu@;2E%SZzJX|8wN@f
z%$Uyk-8eI=4g6}CvmZvWF!om%mw5d?-Yb41_SyXlLgM@2gf>TIdJC|%RTVtf4q_1v
zaX2aI7QexC9#*XJL*7o2H3?^884U^Y`g;b{=NH4(1VwV&y$7?0^J4DmJ<PXl#(@tq
z#VRj9!A`NjJdB&bb7r;N#&ZWTZ?*{~s}B|zhbqE<Pa5%Y_jKqxWGz;`Un$y}qtDhB
zjDe{W1%|W2I{sDLS-v$^k4*?<RCCFawzybg(@G#GVK(hkew{0LEKO?Lq{Q27s^D?M
zAHjuXE}Z}6ar(hDSoE<1%!+krPRASjswY3OZ4sjf;{tJO*=Fvq&3qjEEgm+unz3zp
zax6@40v99PCma?$ff;Juc;ZVr&N#h>{by{>)Y2Y7ZXpl86FhOaQl9vUl#Tt;9mCN0
z<TC8&u%TSx+_B-pKw!&0K)uR2NSu&PelPDre5xYVcwPjRsA%5!w=bQ#+yUZW34-r^
z1>d?l7yoP8D=xR)1N-~VLbcx}G>%HbV~^MHk!|g8F?AHZ3LeT@Wpm)yP$#it?jf<R
zYYL1`p2D@f?GiO59b&F=&(TQpD4zW<jw~zs_`FtcHh<eIXjh6NpTn~`=?AM}rG*bP
z&dXx&SC3`a7kRL+S7uTD?ftlVWGQakas&4sw<g1$i#YhzcYzIG4|l@{u+(Se{IO3a
z>~Kjk9`E#_fGnQ>ru7=-hX$}e+9qIR7%hGlkinPV8_#w!YdTza1?odaQB&p>{`FSC
zSC`6h@Y*9}w!wsEr^<`7`cCi`8Z&60*(YwY(1Ti@tsuIsu$o(Q-<u?)B1t!;0@VLH
zq4i5+=9%h1JFf2rK6f{qm9(b6vdeJPffPt8(qs!Jo#N}&zjHHm;wXQ~XtFJtM*j^H
zQTs2!f1_v!u*C*nX|-d+(<vZTcn7Rfd%*qfeePV94)kmj(bE)57X8NtpWM&E7OhyY
zGBKg^G7;#rDHdE;gt9$@mGQ)Af^{RF;UJYdE`D$U_u-T~J?i%dgZ1&$Q6tTYh4X~B
zmnq35o&d`)8m#$cD6h8b8ie|eB!^{}#1(5r!rFNXYc{KdM@ygK>Z-qZ;Mfof)V;>f
z)3Al^1MQsB#$7ny{1z^i@8Rzye?;5He^@YgJF8x&Nmguz-~!VV+%!+%+S}{!=EVdS
zXr##8SM9{&2#$Sr=)|9cZs5KAD>zHhmc{yNGl}>VxH6@Y>wUnBrx@+W<vaCRhOrWx
zc}mDhA9CTI{Z=QtX=6mrs)&oH45#u__uyTN7fLl>;#wwM=lqHXkhyLl<_o-Xf3ImI
z^W6dKZ+X*wU15GMWCEV=&4U--l8j4y!Y#_&1w%BAz%f^mKY8>eyt&)JAMAR70e_V!
zGkhWTRcVo?@m!R=-HE@Ko`U~QOX9?(f}cP}p4CQ0(Qz&Zw~XHk8?@u_UhV<xf0zix
z8Ftu_X2K=qJ1`x`E07eZhZ5Ssf=BiYj63@sH`4@iOLb!>pO1pqD++PhXrPmG{=!FD
z7rKxTg~~HxDf<39)UwLKN-q(u(p)3bs8kmGFxj9!NP-^4$KZ|IGpO&TE1al$B1$W{
z46X}hAak)2S17n(eKrW*re&u=e=H+uS6*<*ujd=@k0(=OcYJAk8-I^Hi2o^T(~$Bf
zxIik1d%ocpn(d#(e*9M9W4>KR_xmL<(%y*HN8SRNkfBtzQRv>JcH>5cM|i@dg39H3
zpy@=qeP@Oa=Ew~PqqTB4CAS7NvU+j6hb>%?(Sxf!p<J}26BPzXlPJ@HQ|(p4eG39v
z%-cm|6!QjVTTZ2Oms17if+XA5;7BG<uOYgQ<fMBKaVf<epi-|Tsyi`={XM-B6FjuY
z`_EJE{U{aOT4c;BURbgt5?iTz?_rWJ)TER`XHo4_;@ulMK@yWN^WF{cDQd)0Hk~A%
z&&Bt;y~y6*;+$U&#FsAH(0807s|g$pDWA*B%szhNuN>P+7FTSU^e+wO=wi%gB}9pC
z9rA>!>2LY3-^Rkxm)7JzM~Ss%U&m$}KyB}RnA>i_B}|dvyI&P>19vuKLRKxG<2Qm>
zos)R+*2}zY!BA$?cmzX=CW1t>kTFVcf>%$6h+aO}34V1KAtT}j?0e#m?^HBljA$^O
zojjA%3miZa2`+GU_ZzOk{wZv0)1Z>xV_ec?SNs?0O+({yF>ClsG#=tWGY2%|(a)3c
z%$Ehs(P%iD<&J0HM?K`)dIhg$Qv&uWWMC&Zo9xFHalPvg{|^xO|A#5IviyHA#n)}l
z3N5zZc*<a!s4+|nj;u=Jf_B`tZ@+GX+aK-W$EZtUSy30a>~$R46sMrE%xvub=7d*g
z|KMFKW|W;>e2?$`T_g$+h2qzX&HT2s!{WMokz7q&I<J#{2uDk#{$H44wf~_e`>$BL
zVa5MRDVE6i-<V|y6Y&bjG;a~E_$v6-a2&h+|FdtIeL&o?A%+J3n}XN9%3<xFJP0n@
zO8Ogbf~wI;O5kEB+Fz2i`V8o2<pMnBd5F$85cdE4fnjeYd6&gI*qmdt#Bco+X@lo(
z3_L%SbjRybgl#HaYdA=QUgYpo?#ofV_IC9C@*Q21oMF#G1y=klA76Oq;&QKc;Bt0R
zdT1_}01H83*$Q%f{2p884k8VKG_#^pNI+iA!qF?Xu+v#jxeT@Aw8c_}&I)f4AMfUZ
z&8^9_W=As3Rhvx38Y3v+vOm-2(^%LY2`0JvF)AJpL#>;8P(C*o&cz;P@$%{LsCgJu
z9sU+#yGk%?x-9cpY{Cj6N6|gQw=lkaHQO-i6*ph_nKSDuAT}rhWpb|bN;!S_pI<FF
zB<+KkoNcf!w~#Bn*~E3~i}}}1?_q*>Hq2<+0hysU;b>$XKYq(Bk?n`$(73-7R=M=y
zL72vscSZ53&A|e>{utA>yvlvuvK?X$g+owyGQ>(Z;<e!;n9QDh80zE$S1bmR<`I3|
zoga@mf&oo=bPPA{r2_>Q_;9;DWcVe~#!%$+mrpai3Y`I7d_#Z~X?hE7j~P?he)*dq
zms}2(Rk!%vLfbiNs&GQQ8^Am&qwS?%4y5`Adsw&MHi|a?1>nCChF-I#bxqRr*h>c7
zja2B8|9i+?aSpC*+{F2+UxMx9&DawCR4)6W3Tv>N#YP+7N6UNJuupqCnCnWR$*>hP
zGI<nQ)~dr%j{|Vzz#vqvH=(Y*!A!?*4o<XFq-~ED(l9Mk7ME^^6S5P9Nkp4K8D5IB
z((B;d1_NxYKY~RYM>89zYv}r}2h~GE@R;>p=sdZAW^CC*e+<lF%l8GM56_eFbUsIB
z)AZ<*^Lf~-A;}Eq9fRpv!`SNWB{*|U7@oZT12=C?VSkmrfSX`wbk>^+I&)ib^D}Q@
z0vN&vu|?#cypE~@KH(wHS77Mj!(|+s!X!fzFmvKK+9+_8jt?+k+c&78u{f4o7dp{P
zf!vq8U_Da|yut@fm1D)Xs^OO~q4M64O|dWEV`_~74*B^VBMg-w(AJHXx?0fS`*KX#
z%o&#tNkQjfL3BM&iY`wP!Rl*ESz*mr*lNFn)i@V3Pa#R2I(7q7lorU?HujLl`*HpT
zVVwP-u@ID5EE+Yv2|t;8vDl;1R8=EGfzhehJ7^vq=y8GNvXaaw$c(~ll;}#PGMOJ(
zML)(2W`_&XX}B<XUHw0AqJMWW|7TzDSN1D7(o}*!x@$S>)dA%4_$D5nZpv04tj347
zAHnd!Cv-G0W~a|cQcIIENV!Y1oFK%UANDXo?KVkz{lRnR!ogCkN&Cv9*?ZdysGj<W
zFS8iVK8*b-T2=Z3Yw8TxdG##<-SY;#^Gu{WMal5lEe-E%U5zK*%hBU`D9b&`n69=V
z)qg+FYEN=3Id~BncYYEqll@510ug`1Lb~iRj_vpPkIXv`P|XKMNps!FY}*nTy<;aC
z1q^0)?FQg6$usP3lRONsh(n!ace$lDx#AM*N_<h7gBz#nfv@W!_+>SQ)~clNz0t1R
zx|&jP;N@(J#$<j`LmKSK3nPzHm7Li072FN8$H&_gVcwPh=;2ytl9*^pllI48+=X-8
zzb-GlWfqS2oTb^T9Y@h3avSV=x0`9*+fa7QelqDA9;V9neAwvbj0)#OEIZDbzO;@d
z(LMt)t=!C;_#eYNu?tvj!wDE9q@mW^?7?+Os+e`>Fe~09m?IoIc=lo$ND24Sch{|{
z`{6-!pXC7l72{#Mu@MaT`xBBzJcO11#*)SEk+3MsfPIaK5t}_f#IMV*gj8D}b|@wn
z&L(Xoe@At(WULupO;siRgHveVv@&>Gqr#StoyD#%Nr$6cCVkN{W)q9^xPh~`!|z`M
z==v$-&Erhzlc9)}&RhV_TP4_#i{>1wpTSREmcc7u$hY71Qj5wZrSVkB6CB@NO2>DM
zV_NIST^qJTmPQ2VbA4<A7}E<rO&raQSfIuZMg)VgoY2hpRDd^%kJEVmIn?;qf#<n6
zC~BPv34gocdPahfAh4owog#``TnbmR{V}++88yCFa#?TgLEOK0@cBU^>{5>7t1ryr
zq<tmXrY-iAeZ&hRZDOc2X+HaQWGMakBqXHO<N1xJ=HiZ}w#@s$VEDP?HYEN_!Yd<2
zF%W1Xe?0rK^l%}x-M!B*9cgY?ztWmiOILF3AId@H)Gk!`r${{!ek68^WCLcpljVz>
zFj&!vC2JL<R@Ymuv+o|~8gmT|g+%X&)FUD#p*<ivmBB4{O{B1&QZ!1SoGJS0Qfsd8
zef%6xbJq-@q`xn5$P;M_+au2o^VT?Z=UDbiK?`<vb4+8!5v&?0$#&Q~!|VM9H1hdy
zuxhnueUi0sLp=uf-x<SRobUqQSvsH;dW7l~*Pybc2aQ#ki*>*6L(tj#7^wdheP`#v
z{T$(QJ=ufHZ_Ps?&<BPmCF!Wt8Gip!bNFv{0=P^m=MIm*3#8M^7fl~Q+tsh5-YaE{
z5>16Yx;DJ~#7MYjG>++|7W1}EV@PsW5Odc&1&bk<U*8bI21>p_Yuml>qKAhg>ie)2
zqbNGD2r_n>GhMZKz9d(N2J}DYEl)MTv(8zf($f~GRl~8{^*N;RZ5UVxwy}Fh72unn
z1&&;tiMtF$$o(}$o*l%cD$!uIO`3^{JMr)7c*<5x<OA;|aCUMHqW7DA;`yxyA#0F6
zYdx@<ZJrR!J`}CPY5TvG1ui}VI#(7_`qWZbSJZ}P;qRevL=a1H8P8UEzXdZxSqvMH
z4wi14*yg!uH0ekbH0K@W$+Cb=R~bwJj*oGVsVN-_DChOhb@Dsb>C)@25b$Xy+`Ke|
z-MP7vubNmuO&tj=rpA-p+uOvxDM?_qNsspp?ckSgB(5(+nW^>kAi3@2U)^6LUVpm;
zs@!Vr9PjJ_m(lzAfqj2bMqe;vY0iPg2Oq-rkHY<=HAWoK!l*2wf%C6B3tz(9`TF)4
zCRw_esXmAj-?1A;UrM8ev_&MPR2Q;i^Q-vXx0OlWSImE0KM+l1Y}u@FQ{m|2447k{
zKu?szI3H@p8m)t3(_eL%w^EPk{|?4X^%(KO-=}C>WER_=F_D5-CE%dBO=8u7G0e2Z
zoDQZ+(VeVwV7Vv|Y9GZ>jdKO2?><9sRWGru`&ZDu%oF~_5KaYO_RgQ9ikWU?tTBz=
zi&7}7L6&Vh7R!!Lc}N#xhLNtU1k-!4P}piHbHzaxw723aoqjo=mG@uAfjTGH8GA8z
zENKrU%yt5&A%}!Uyc-VdR%BkOxlFS!n_oD634eb(qbjX|?1KIld_CNf3LYM39~<uD
z);H^5-A7NFuR53vmi)xbNAj$wejwblJS<MzCEVL@JjU<kJZgW;1S{e1eXcE{^Ru(4
zuyrcVwNj>Y$D8SK*~hYFzip{I=P1nGY|5w3HKaXRGici11#E+!BIm8_jndOqS&`LR
zRQu=1)B`_b^z-R(F=jk}{>V`F;B*iCc9NhGLwk4~^B6(mAx!fXwy?-=iIn(Gj#WQ4
zWQX!RS&zFCXpNF)1NEZF&!V5(l$wb<UhIdrW!G`$D#2_s-i?_{9b#vd>+$Y)55Y2T
zDkKn-;m_CCD6l)xzki890GmS#&pWURXAib`P9Wy*J<bi)n1xb8Q}UgJ6PbKFM)lQ6
z;^lGE>FCsI*zSG;8(!4Gym%dEA^R4T9;&iZ6%!UC>q(zh#lf*pLgV@UOeoMaU=5d6
zK=r%~@~WK3`OHkF0>iIdiP(qP6;B}bw<Ua`^AX-dbu6}-X*210j^w;=JNx%U1y<ho
zV+-F;q|(c(Y}LofbW?Os+<k2xud>dA3%Mh-nq1bC>E<JFaGEk(IzEKHzZpUzzuCNL
zX*2#&(n8sB_pohYHpsv0gNV1gQ0A)wZ{BkY-&+_njb$E~Z8cBqesCVnOmIj3dkNpS
z-j}!$fIF|=;<_&O;y7z}>U&g%UoG#0?;j_cQ|FFBa%!~1VmK-1jlij!=3v|lIog|+
zhtzxvQwr~k(zH(CtW=<b&l0ihizckn7D4tO5ygJcWrye7g`a!I)7p<w_}ptc8?$}_
zU4CXqp=vMC(anGv%yh#1BMz|N<pF<JeKz+X^SyY3(B5?0sKI7x?1!#huIy|}4%feX
zJXhzd#mp`ZA)A`BT>Z96EZ3bz6UVLN`Y!E;2rWhNt*PVLgFFkiu`QIHU!}>$t`nxZ
zUtMT*|5X?{RSt4&+OfRo3^!){P`<Q0M)c0LoubYjL9^}8;d$&#HuL*T=4$j7`%V^;
zfA%PLboxr>C-2Tu%Qc{=OAWe3I>6>nWwTBpEUlfxdcU6HOoHNxOB==|NPVWY-8-4S
zycOp(<0mw`Sg;_caJ)26n)>Cfsc?lFw<=nibVAqQMX9Oy;K^TzuTSE>WY3^Ob^?(x
z)Padch~Vh;H{!q5M<}}g8k}3<1(j3>-gRbdQgaLx8d|WS>POLLW(kCxz6P^y?BwbT
zcnq&xCB$q(=y*viOb<E&a(<^^q!1iw4zLz1)qT8*Oa*`RU$c11X=BzfU1-Jqp3k<+
z?`PNd4WR{LRuG_R%&K3e!twJ$dMs`>3p*c3x3#vz$&M`iPxq$Km~H@h4|UL9V+UoM
zEwRz8m|yg22O8ZvM*hRSz<5cjV4QpnE(Trjq&fNOWj6;pHhUd7>3qhC+r!XF`8}xQ
z{ShtHf5(3}cW2-FRcJKX(W<S&0HSgqjog~Yd<Iu>#)8q+p+A9z%Wfk7GjE`+*nzIf
zis5>SV9RkIK;6~yZ1XdLMA0${_O(o*Af0?XmF@<y+-As58BMdC*Fi_?HuC)~K~{rR
zz+sZmP~LPAf+Mzp#q^yl!~&R^kf_&PV}OoBjM+?u9Q*q>9&&|s%Ru&7IF?j<uq{8c
zSmiZUIJ?D+Y91O;{F0|w{P!Tu-q?>aWfSPaQ%8EdE`eoC90;d`d-P!4CotgAWeAzH
zm#xcY)NY;(Z`Z8Et3slAzV9HW^;MDiPLm~dEg6=Pun|fkN7AZsf*D=30Y1*;*t8|d
zxP0jrevtQah%z0^HWUQof|k=*a9f5<4@Sa=iBkAg(3zetccXR}PG~8Q#Wk}exkFn-
z{3nT6a%+4fau5>r_Znv7%H31o=`usu^jw#HnqbZRR!Pxniv)3&hcW){mt%%nkGZiQ
z7qb-8WAyN09asv+=`l)kd54xCoSOS*4AN7eiKQk~8$Aif4$HzJ&vRIRP$~Sp^o?)o
zZU!T*<HDKy9+xO2XkTV@;m$dSd4=U>FsXPj%Wyu*P24_(d06#=ia;w*niIx94)=$7
zK7Yho`d-v6p^2;ad$6r1YD7hzkMYOcCESAcP+ncB6}#`8Ksw;Vx=M%B%N0umONIm8
zinF0vQT^yzAkR#$9AW`7MAmOlQpd|#@MX_@c%v@KUP_qYqJR<n^{i*`!g(nk?p9~_
zP0Z+`aXPmzQiW+R(g7QFBhE02W4o$e!1Uk)nD{}Me0(gxmEK+~x<QluN|;N59`P`G
z_5j-A`i4(AuV-&-R7M@)HRNCV00#UHV!ajr(cBZ|_%7rlwixWCrhJ0HElXK&oiqjd
zDKM9<kKk-@7%j3|Ef5}b`MNa0!g9zIPTEW16hBYa%Z1Ri_s&r0vI>{|vSwW>gM<yK
z3obsUOrG-|aKBBv;HdvhwlsP@D4nuk+tt1JJ2oxowoZZe4IGH-6=&I<rWeqbUB$i2
zn?^YjKJ5OlS=7<k2Uo_cu&N_Ts98S`Jxs3P*4;vK`=TXk5AA}sn+w75LlE1Nau3e=
z?1!mU3Ap~>LUekk4AwSlIYX7xkaydQMmKt~8}Glsl&|XSy-p6A1=-@ttL3PgXwD)E
zN^#DqBwjD&9)D$sKks-|1@_-Ifvx{0vfatm_;jNhlXja;YrW5LdV(D_q%Rc4J9pvT
z4SR8;-!u4}z8`N1MrAph;WR+M7+unz;`3iltm#=C1SBca-HRpASGtX6?$D#{KJHwy
zaw(jN$ikVun(TOW4Xl|iLDOT6*xM;WyHKu_AHKc=?=;mRfAukVJsHIuE(T#tgbkZ9
zrVjKcsL`tC;p}tzMw~z8EQAHf;iThx@U-qF{IaMBt_#-V!f@f)R+0s~EH2pleSD7G
zF2S<?)R5^v34*Gxk=)x2mof2rIP4#Ik(p1i$8!~hFhWs}Yzsy~tyoCZpU%g&-{z3I
z&z9}GBtdqW<&ZQvU;O&hC@Q%xEo^qJ_=NL{xV>aJd-drG*sFV!r1e)m;?Z+dKHV+;
zEVGBD%9OzswS6qCM3)?HK7jU@Zmi9*Luh}Fq<_Z;!DF#BD>>5y^{;dZl$0>N204wb
zR_xDTLngIUpOt86(7c9Ye9wy+?AI}QRyI8lSJ#YSLsU0_?c+G^$%#~U!bqP4r7c&o
zH3+md?P!tbAolCV0x&z9$W2Qg#|)!F;gnnlud>SyCq5{~zc<p^xu8~jy=6Nc>h@-Q
zgYcaQ|B5>v??t`ewRrxu0uu@v0K>+CYD7Bi?-0@Py<TFk5^LtJ?an@=^m2TODN70!
zS{=p)EVwxxeK&PL@ApuAd36+a#O}sAjlt~w2t!nHQKih<C^U01XO3NUxWK{_TOT)Z
zG2@<LrR07LetwQycXBeV|1%eLY(8=ye|S-1u>wu_BVspWw~LHl8jvU~8(q_D`O=3+
zM5UH$Z2aWGut{hMjtU84YYq>Ci(dDjvkyTgb{PNc_8crdZbUi<N>G1-1sqihC!rZH
z{uZJ^)#}lBaA*Z*T``wT8>+cRsaRgrHIO+y)+UX~L)e#lWw<8P1f$GdSXf~-b`1^@
zt^ozs*0v0O>F#0PK{MdHuL3*xcmV&W{s`11+0ulkmpFskskBDVgt-*j(o{twmcHT)
zMwrc|RT7~P5TZc(@A~=qLd)UgzAs?=Xgn#H6yYn?%TN}RVOJpU1>O_)z{n6g%xjs>
z>sx=pT7ThvcHRf^$b0WV;gbt($_yi;bLOmkaWS~7YH~AUzJZ}YfK9ZEWcOc3L3&&h
z78SR{?)Da(yLAY~Ii%p94^7<7$Zf3UYC7DXT8>#P9I|@+am8v8z3qw@c`maB%R|R#
ztM41|o8rfm9$N8_YJ+i$WioRg*nl~uitI#3EM(8Jq~`YJwCwL~2tW3ne=C0gP6iET
z?v5*1%7+!4v6?x2aN)Uw#*N|!zSVF*Yd;Lwd5Qa^`5H#X2wU{sn<)t`1*)nkH|UuP
zEwH<Y)3W5*+M-?Td6hrB$k*Tvnv*fxdjz!nH~^9n)^soPf#^hZ9Og~ji8s%VXBEEp
z`Mo{u;;S1Pady>f9BEt)Mry+5aJtZf8Fx;!@9JpKeWxJ0A(*rmZ92id`L~%i+gyOp
z+<mNc$i$F3j>6t+u^~&oq2ygD<~}D3b##0%QD-Nsul6L@(>HP3jR)dmx`Nf_6%+p+
z>rWfyQZT4uCN)H6G8xqxjQKK76y7-+e<oG4)`9nN!0igO_R!=C+m*=kvNHx*0rk(|
znCkJh=yd-iZdTW$^vf@~A(QP$;gcMT(CmeZkTUpt=^Omc7D4o@@pNv@I`-CcJ&c%m
z8s~&*!?_o6a54QS^r03iZ*pN@!gIJK`3>;Rauq2jOcpH~ph(>t1ZwPW6M9ul_*TiA
z_P1w=)`lI2F*B`L_g*#pw=a!PH@9P}_T7T^-U3>BO9?M|jD^!LqR1)#9{!yY&C<`r
za#N&D@S5LMcq`BntG^!P(&OD}!uB=b;uZ@w8Y9p*wTRnf)y%bAIl{--gkVt0C6TWE
zNt~Y=4OdgPv#5`;xO@i3`ExR2y+h`>_Qz0E^jCz^*Wu`Qd<vica~|DY8ce36{psnn
z$@r_f2QrmC*ni*lfy^aualb($On3BTErYz+w^iPBWcMgm{(_@#Usk~uR|RtL(qccj
z1guVbjX92!$$FJHwqMX@#|?Gao}Y{1@)89$e_WM#XvJH>A}A@eMGa}m;$alqxfk5-
z_|v~vs_aUmH$LrEqG{o+T=+PS&Stiwe33S5=tmqPXUaMXw~2-uFT;$R#_U|F6pM2m
z#fExIvrk`qFgi7gA3X0o&Of9{dBsuqcfE-2*Jj}9pGiUjY&?Vu?}01cZf5_=#t0i{
zb21uv5KinjWfMM2gNot~zOuy~2i(nJclDODTOUH<oqh{mocv1c+BbmL{GLu`z5dK`
zhCAn$tH~mc%w_@e@)=$9g>`Q{$yy*}jH``c170+9Z<KC<@9#ROi*ymTnQ1)DR=}Ib
z^7+c_se+YR9sR!_;eXdzz`S{<>En=2@w7vuL}v39=*EgD+L1AW%_-RfcGiR8rWZ%n
zGbHHVka!&JqQEt}U*~Rh#jw19G(P(M6A1OJ<gE1$<E@;7OmtF<=4GrG?7#U`lWfLn
z&5Y@t=046iybAmUGx=6Lif_0BY}&^JGBB*d#aDN;ai>o3{TC#d%L-j|wUVHKj0P<2
zNCT(giR|1&XKVy{Zgs;Sest0-oZh2?v+DH7q)(2`D-m-8+&|&uv%|r|cNA<b@Mc$*
z4WzC8I`q6vxCfmX$d*dHhDOOmHh2FO=utFB7yEMYjOMXarTZItRHw004QXmWcn@L@
zmhvrsdFZ_O6QmcGKxw53yE{^XmXw(>V~HY^c~FTHr%h%ON=J}#^{7TyEG~5&4+EUl
ziBhAf?b2-=J#RgK=EEJncv}l+uvLxLOpy~>wA%dG?-!V6&?)Gz(W4t#Z_wlW2{@7b
zfKx2dVJq{xP=4ic%pXv~h5xH3n)4p|glj={co;o#<k-=+&-g6)GT&Gv$M1+3!Je&j
zfrH-4bm(Rv6baADkC%L4pZ+fxwaJDK7OdvhDsG{cJS%?cjeL~VKTH>#OR??Y0h*@N
zhAAssdD*c&y!zXJu-7~u9xtrp{BneI&cl4FygQ7$5oJKp9T&J@&oaEfb03Wz^dHsb
zUL#Gx_NDk^GXag`dP-br`i=o?{NR1~(bk-gzLdvjU0N^tIM_gFrWb%ziW+sF{)zp!
zt>|y9KI>e1hLec;h~bXmVyl+|Df-7nlq%RRzSE&h8vE5LnreC7gZo**muuXww0X2)
zj{~#*Z$Auec>?T_E@k%yup1r!GxysCV>~QqJalrEL1Xy#`E%%XtOF;hIhM(|PhvH{
zJz>0%VqfR4&wOrfB&Xx)s9YR?I#X_mJ{QCZb#*IpvY5#B8jqmmk2ay)j5qu#;Xe6J
zBMd)$J7%AA*qwg%q>@}&F=l@Ch5v@%z){C#*`!}`_^VY!<1O8AAOaP9s<y9NREkfh
zti<&#ra040j#m9Iiq89ytN)ARl$EkFGm1hAO-bJO9BCL4DI-#8N=i$6Q3wgiC{aWy
z+BA6Ib1F^kv_wNYl7>&E{k`Ac-haS--}`!<^L#!YLA%h@>M8^c9EpuhTD+>>4;SD5
zL02?x)6jx_V%?HB!K<?qmy~ByVd+evcPA^9N4J8Z>jN5=I*^atn-62#dO^I86<)Yk
zEbMz-O8pLbz+Uf*G^2JiUf7ocCh8k0-CP~?%^pI1P#GPcP$+zI&lFb1rgB!~JD?e{
zaC^^jS{-ajdN0&r>3}nEGV?W<U-ZS3TA!%+r6!O6{EZ?SHSv+#e}eLq31~A?gO?rf
zM*C7L+BEF~1WPU5<+^$Nc%DSYopn?+j91_-9m>J%*g>2U+zf`ZorN;F8rF>WhNByk
z!6PY;ZI<-|bIlXf?_*CIyf>OJd>%m_&QhXk&Q!{=cq$&!?ZgwL`Aq5XuKf03IlgL(
zfagiOIjra+g`BLU$>~ne4HR*pQz#_bz7)2meSxj>LwS&*6_E__iS=1weWeV277HZU
zf2H_$sr0<>5jwj-g^mZQuvf!bA!EQ%^7@wx3Za`IZFnJ1E;|4p<$8FAM#>{^%;$HB
z-=R&K^L<wCD{5aWp_wOkiKFLFLJ!MXJY1TGE@%!BKU}>e3tFc^o2*p%$KTo5xBG5!
zgtjub4T~dJ_kEPMrycBvpAnrihl@5^jbthj-L5rkK(*}de6_7vuygX|?3d3e=Yz3u
z&vJlR`O1SjUKr1hf|7(CZcnQ^74HYPeZBah?>z|j%;S+hLwMG(t}?gP6R_XZAnclY
zQdU~+FLwLo4n;Hj@TyniIeBtAtQ@d`=a~1x=1ITk=9n+Ezw1A6a8QA`g{tJYVIjN7
z9MSx1E8RA=5=(B`p`C3j>5tZs?pkrwedIvyES=SjmSs`jX^&~W(rPr{;l;~-tq@I>
z>S&c0@ZCZW{*2N*T%yKB9FK<1yT3sF-3GGRl>|439)`}=svJ2080YnXfUz}T(BF>j
zOqU54YMB<)J{NAf{eff4RJm@VJ-Sr?5xks-^2pSMLP@SKy_&TMEAHjPid~^>y<(l<
z*BT=Fe;Y5x<ZNWkyWMbUq?9cBs3t#Pu0wq^6U71A7lc1{xims+A5}~W6W2_dg<YzR
zxkLSM?$-PfMo1~7Rfi-)Z^>Tzu)j^VGkY74z4Td#bVi}GTN$Vx3SlpPFEtP<DP(a+
zbiIEFz8{IeUfmenY8Sz&PsY$gYR0TsBPBqeb>WkFFX_^sq4;6HG2I&@?f9*B!21p7
zsYNN0PM)?v%h7L1-qaP_7mQ((S<+m&i)6G~mI=PY>`B(}7{-^(<=J1Nx%PkzD^0u6
zEl>nMi(T~J#5Vc7otr5}q9Qt8wZk(Q#95bZa7TouJoRJ-dE|GZn*nP0xF`VszP~H%
z{<slcwg!n$>n;cdlVyyi<LL7B!F+k`32a|eAgUdf@!`NO81ZEQWiBY@-In?o{$vNg
zx%iYE-rM2a_|<TR`|{SZo?I@Gf!&8BNoTVPVAUE@?sS%No@i4KZF>wW>&D@Y!IYD!
z%$pV*;WhP7U{ll`>Lxcs&)^vpykaKDj<`-{r~C6x*X^)6KNTMp?__J29)kPXPTV}T
zR)}8yjULR6;xrQzUfQ)6HoV${<II>kD7~PL7JfK<L=rY{+DJ$4Nd}BaZ5+1znkfB!
zC^fo3+^D${=RO+79tCk|r)|sKo0rgI=^JcpS|~Uh?xj5&9+0}vU%|Ch87C|}gojNZ
z&=K?X(DlnR@z?L|tkL%^s3(M>`NtpP-dH#J#06_u?RT8mP^H0hC-#S0d+E;Ekcu}N
z5Am(2Y|PSfMfdtv@%Efn@yFqD{AScjN*Q#EwyNboSBYYFXVnjyr4x+lw$*TVh8Gs<
zPs7?f4Pw2s1MeCR{CTo1g)gwcI?oolzg8yQ-BDkaookCrXds2&EvMs|pDAMLAFy4V
zC+xDyq=+FVqM~(2{FQKB+99RF#77NqJUW=J{pyKr<6jD68_q%c--q-pXg(*N&*34T
zBhc>P1Gtb@$`?+a1hrS0eEexP_xt7vJ6o*z-0l|mFlS6<^|dZIxHO$K68FgNhIYXz
z`U7c6v>SG<+s^CWY+<FQ^J0fJKB%!{Hf`_Ng`(b!A=NKx*bvYNlYjf+t=+w_c&r*0
zY&=W$TTg?f^FjKRaa7#nT}HDLkBOGGn<3?sz?)Ti!0X`Ku<x%77sXG4(57j^+%W+{
zk=}5Az04b57o~!;WdZ%&ypM-FnG0HnvMBMzLe4kZ##b-AqdzO2k?ZwdFtDIEJ72v*
zv&>y+XZt3IR2wRL)6xmEr2A6JaDC3OKgI<&PSMa)6Qp)h1=LO7B%R?bxpqPtZM~pO
zxBE=vNf$hMla!(}3e(}r+Eywr-@sX^kKsb=d~BI066z{p{P29(+Bk^MY~4?z?`?yE
z;TG)G5=u9YBUZ1qCmr*xl%F<On0(oc|IG}eKAoZ{Xhbe4Ehv|NQ|b;MdsIn;y?!_>
z&X<q(v*lqnrre?Y6v4z8nY;c9x;LR6^e=V7gP)^eqwWbR?a~)#KA+FFURJEud`&1<
zsTP~x>Qe8s`vuR3qLcvEV8x}wIlHer&qy96{SK7KeQJ2DsRBB+De?PW8>m8>ksZA*
z?HuC<3#Bjo`P2LcuuISvmmN4K7Pe$^gYbup?+EC6ld0_ac!^lHK~#9A!p4gpil@~K
zxLGTfMjq_Sre+q{J9rc<o?*rceNrI*ivi7k8B2C&kAVHPPQ0_XyU-#v0b(R$Nap-}
zEcU-CzOU}gd9#y+*HS*E5CK!Fhd}u&GaR2AgDbjUhUHDQaOnF)Ve%MXetx(U<^73a
zrD|9DUc3}HDhBi3<2`uK?<71lD2kh!f>~RAKlmPthpI6K{PKY&Pi~K)8=W%5H{C|j
zkTh#-=n~9F8V7T2MK=7bGKQ<?1Nn2G7Fm1S1i|fyCM>2}Sn#?h+rHn<L$;5FlP&48
zeNq~EK;H*6+4>hH<SOD&$wKVgvX3m@4@CdACy=pGge9NL#rAjcc<R@6`9z-;u=ssX
zQh1oh#@_eo`}cTk4e!Ml+B)-PEoaKEEf>G8l~LupGV#r>m!h3Url=rM&7z-P1X<uA
zc23+xbN+asQ_y)*DA-FcE$>5fNN3di)(7m?IN;NfC2-=SBY70(!Pkrk9IRRhlO|`%
zSB9umr7a)I@9yN_%svZwcE{hOa9xcx(>iiNxi%|!ACl;RX|jQ16~Q#$1F+Lk@ncaZ
zTKo;g!Ed6)yV4$MQb>2c|D+W9m~ZCrh8uAGV=SIZS<PO`rg*-`ZFt+ekm7Gz;ZezO
z6`HX|%Fg(qt=V7dyQUleu)R+sx`xA)V<*Jw=N=L@RiD#+gLp}KA3P=5DU=o)@_@Gu
zRKZ2!i$RNc<N$k!cqG|q^%8~G{gnCcBMk`O{T1f+3dOtY>uJcDAYS|14nrQc5+ArH
zJx2y|^-jOxlAy`&Y#XKf)MT-3t0qn}zDn{7yM)~jrL_8-5=a@nQ`mW4;5S8f;B+?#
z_pdFYZ0ldb^}o_R)T>ycbZo@^@#U~N$OgBqC}b_eL`aAlkJU>9xa5EdFR%AyB^?*x
zu|!On@n)ku@s>BJ>@9{<T0>#$vg`2MaJk^#s~Wm!rHehzn~CmqGGVM~2fVi6G03A=
z2{P{h{Px9HZ0n!K4reb4%br%zsNXFRX<fjbH(n67Cr!ch>gl{#=^{NUazd-U5`|R9
zpBL)4QD3K0QKNYqytckZyL&tqoIkvQuPyy?o~9<Rg&tCyCmtL+xWOQcgEZBq1Qs9b
zNA;`pNy$}_zMNZ6`}}ffiS04!S9eQD88MrOFYf~%%A9$KQ@U9C?3w6HkKn<&a@an9
zlSJF?E6(t-!2!M_(86yxcir}Zoa8rzvc_Su>%%L-&nJ?MeEjL2;$zD8y-OqR<fGS{
z0sQ8-vfz0~kHh+25>i(FCAqLxe$e1EX#MvPo>l#(%^Nr3yJizKG1&r9HHPdFnF591
zr}O9H1z;T+$iX&pFiKqmO`<NAG#?k*ONX#9ZV(^8<;Ivb7rI;3K!{K;>-yb}8%zd6
zM34vHOq8r^{Wf4mugUD^{emt;*U-Te<Ai~at%T4Ksnmr!aK{QqeCVT%ljhuqLTMh`
z`%-t_{2&S6c%(?Jqg8k{Hy#?5zrcvat*|q&1I86dJNfR?GpcdHIjc_-gfzUM=U2DU
z(W!?)VcJ^(!<a5K%%#E4`e5`=4c5MIfPMF^;i}p+ur%v}2W~0J^lp8D`BOd7NlOR&
zxy8WuA=WT3m4!*2UeMD2Y*_r(Ox2lEgE=Jx&o^~J{q@K3?xZ{9e8!8@$9*U9$`*1-
zFQuAqVG#T=7#wUHAOO`l$^IXV8$L<isQUq`mdC=VJ)hy@eKYPE2r{Ete<X{&7f<#~
zz!Ps*;l`r}!QuX0vF^bu2(fAx3ZLb%+0>cB$@1;;70M)$3C~i~!vP$lvsoN7+k&60
zk!&j3ny9^D5}y0<94c~>1jBBHJa*tEGJX}$Gd#Uv&hvegsL+Fkm^EU+zGY}JDOLP1
zbD-!H)rEt<_UEi2_wW-)yKtQ(oO&<~|Czld=Ku%v`^w~Zbpx$=I8@BN-xEj86~xzd
zXK0H=<u2ARmaWb`jUlNiq`v7Wg|zyC>J%wBcyp|9PVW#}ZA=zQS{@2>h94Av+XQ2q
ze;h40P{o!0X+qbBbFg)s31?T1;A3EcYV-X0LfRSpwxc_*njXa^nI1g!%OzZ-X2q%Q
zCpg87og6++CY^)kVxlmX4^~J<O!;g~JeDMk>#c(B!HHtWl2?MqvnBNI$bLAV5Xjr3
zf}q>WNO5MuJ5b1oXN74O1)Yqq^nRi_PVoB#R;P8T)cG{^*`v<=B9rlKf)`I*G8BWV
zm*MV1vv}ad3hb2sR`l0T5DZ2x<hFXrM0Ls#pEj=LSSdq1PIVisT8%iPIuO2;_u!C{
zBLcfd(IbOxe9O6x+)i!>%Sq9=)2lxYh<^bU`M+sZt3T&zZGcJ>!hn7@qHWDtYOhH~
z@{Z=4vo68*w@0w;j1uw)$&69QU>em*^1<`ivF98ib>$?Ocy~X)ycJCLpI_4RDk;bC
zxIY`)+t50#ksMwXi7J2VX!+-<==kIaNB@z|#&0b6qEHHt7X@KcSyoj|y(Mc7=_S6f
zG(i8Y6Imry6q-BV7v5aX;%YTVOb$6gy~iwvIoHn!u3HqiSb8SY|4hg0?^pA2c?<j~
zvnP{+d02iZ4@=_{QL%Lu)yy{MyLGcM%y1*#_FN+9%N22#ZW6`X8A3$=5FvX>9nV(I
zz~fa(P;=8>Sm_$bpQc?W8_9|^aF80lD{G^B<=QyWa08w7T~w(g5q}5k)`E}20P5Ga
z3p?F8O<ku~P<K8F-;2LUJDp3g)o~S)<7~cq&4JI@1@p8&YHU7lu{f%xg5sM}<cBrR
zgIA#eW;h&$&ezmM;kYt9xm3(6BK)aWb{+WW4Z$OFZM-tvlIGWdv>*9RnRlvT!`fU(
zcv&ufzn6usJBJHn&g~FR9!kK-F5#Sc%@fCZ2MGRuRVZdn2H5zRa>e=>=&GDX3gh<+
zD!US~LMvYgshG~IzE6W;mVS`)$QtjwGUjh;6~fa4aWpjD23HIog4Zu+;;T>g7%UNU
zd+t0yWfN=VI<q$8qr*Akx0e&eA!_@a^{;K^390E==hQ&Xy4vjBHi&NOjA!5fG<o~j
zT~L%V2wR?Br9>|~{2JYbcgIY?$3K-Qe6%voC@mM8Qf+8T@NijHe~D~S>&i`07X*iB
zQ>>q$&pSt0al3yq<mt-z<1Za}nQIOA?I$_cl}7PW#g8yU%7%5-b;R$NnxLlLm@gNq
zqVKI&V)K$4FzeG#n7dySXAXNTUY@ZQ&wcR3k((l+!+Ax@>M~q7GJif)=qJmr8)f1f
zlPMHzeMWY)_9=NtE#mcSRdBZBJ@_-a5qw=wiD&i7c!GNeG))~%I{qGLGFY-CSk#Fq
zWWt;8a#6F(Fj_m&g;x~Zr^t`qbUHtPTUL9rTR+LP+v_u&F&l}~!pCC9g%999#6xB)
zkx#x_<no<MpUMBfGTJd8<;~|m($usq;!BfL0NQ!<)9VR+E1t^^w$oWyd6+hC^XB1C
zcaxP)7Yck-O0x29ta`C0Pq3TB+f*%RX1^9VHO>W_v`$gYnqqE$SqpA$mEzr*5_RdY
zKaa{LQo^BRR!~DD8h1hTcVjGA^bi6!XkpKq7Dyc)L%88Aos<Rh^Cc4f?B;SFnmv~S
z^+Wl?KNmvhDl#22AFD<s(*9v<=}Jdic=95VQgJ0)7<Xfrs6&v@G8rERorIU4k})e{
zjCAk4ENngMD9-Ppiv{bAI7_`JU$$AzON$Ovy?PM9cQdDBkDx*P^PxmN>3kA4y|<$k
zigCPIbsX+&o(w5xv*A%|ocJU1AJuqSfvrL`hCjH#V*@VI#cYw5JTn2Wk|HrIs{`(g
zE9CM(dtQ-yf+kAeP{S=-1j9?__;6*rC<~j6P#-QXF&RXMXX)~bIS1J&%@G<RP4KRL
zBwDz=0H>O>7&^F~N~~{zmh&lE5pB+MrpC~#pN4$k({?h=m&iWG$k93LQCqIdL))D&
z?vfgA{<aZqF7AVk)p>kym<DXM$d`z_2K+Wc4WsI&!-U-y;@K_hgwX|E@#XqNoEkS6
zXZRaHM$<Fkpng2wYk_dKzcw8EtBc=1S@XclXX#H!F+KUGjURadf4XFi4og+3XtO&%
z*mIXI|NTwobH9Q^mkXkOlQVp?djL-#Z5L)HdW#+PB!Xd~4@F31Q<eLdg;>_dPIr6b
z!N8H|)9VY&iC1(!cld}X*Y1GN{{dEvyy$E^<^~kD_hrvh#&|8O8q}8Fq#g<rXxZe&
zG^=F}mYx3yTVA^Hixp+ku3QZ-MKXj3*x~@sDRkxGWVRwLtbci)W={V{-E;@=%-_#w
z>&OJ|-WEV7`~@y7dr7y7XNb!>sS9r5n`xWhRbktvYqa!dfzY*l36EHOQYIR`r75<t
ztkGv6{=A+7XHQCWq5wS}Zm!FbBdY0qz!LFpz-M9np<}e>S|(gd94pPd?OEYaBY6cL
z6~lg6@<ibn{pT>7HW-ZOc86m;VD2=D>d}oS7dk_``zDgtmr(h#S_<p_oT^|8EYdUK
zr#}>UPxcC449Ka2AB%Qx4@h|{Gd8yyjmm!0;n(d`)c24pTEFj)QkRv>GfnxZPhUv;
z*<DzW6H58s=@fEi8<>sKMn{)^@*fWc^orTbjpjY^_g51-)-jdR@tB}}`XNC46MFb7
zntM72!RDkD;Awjj636S|PV4pJ=lj3tWa3MZ&F@SpvopksSqehy#7l7Lx+N!7{RH(G
zCpaq2vo@<{Q`-Oq^c=ew7XGjz3%h+1Sylxyds#|Wjy{|!+W`9;_tE#PFVt^!5+*;A
zY%9r2u)A7!tk|9l&c}1$%>xCoSvidUIxdC3Oa1Y0uz(A%#qovwIIuV}3Fd#wgMn%;
ze0%u`KDgowO_**$R>qgbbjjj=#_1EJpH8Abx*Mz9BbH<A84E~^S%gZjjBvx^BKGNS
zhe-;9`FrVHp47bqZqrsm|ErdmHmQeL*Sj1hh3*&sI}k<le`UdhxdLsB^B@;{5suc5
z!d_kUxaQadxOBXd9xJCv^ye@(o*W_XqkA8oUG~ATo<3;s@g@D6cO7K^_6kAzw)|Ey
zXoau(Pu%hLk1R01A5(A;u*p54yT={bbOjIWp0t26K0E<Glg@ndT_ETE8xEIu>G4zJ
zz2X|LLJYcl4BnMp76X@z<V*Zlc)Cvjy;WPtId}n#8#71<4Bt|<K4_OXs%K~1F-Y1M
ze{F-k@g9t+o9K(%3HrG1Cp{j!m=zy?7wjrYXz3&4b(K-{cF=#YZlZ{ud$jPakARNL
z9t#wa%ANI|!#-;*RCj0*?^JD~=DnTK`F1RhHz<XTU%a{fayT6bb>k_DWu#^ljn0Rk
z$%kS%ZVJ~z(<7NM)+$DPTNq5G#fH?&X*7PD<;y1E4xynL^uS;^++MMnLkuGY7u_MK
zANm%K>FDB$H<!uzCPQpuE+d|lDfUXiN=*%pe`|o=V+Mi7&p8|zJ5_96IfJI`&IP%2
zMnBsA2hR7X7T0B0)1TI2A^K?-e6j043Vd~){zyCi@_;G`zX5o+e^=~t+X+K%_XUl&
ze?`^vU!ldhnHmK-zcutE@9L4PJvIa09_#^O1`Bbox(g-l)uD0PmYi#!h)qL{$@NfY
zett}k4#$n+ISzC1vhO<3t3D;2c{_1uUjy70nj$uAy#rIFPQ}}_Kwe_baG>fB_4#^G
z{3+3>j^>)mehyRSNZn1iW%@DU$K^zE>{KJ3>8Xw%!VmDsZ_Buww*ennvXHXh%%tt^
zmN+%S9aICA=)<yNewMM5T?00dN_HwYKg{6YW$HZd-ZpB@xd%5scj1ps9k}zqJFvRz
zP<l|@1A|Vl7Y{Dq!y$v7i55Cd;@<72#1(_91*sE_>lbRH_kr)Q_-PK_)86BJ>C-dt
z_!-5$>zCsJr|z`5AerY+iUFJ(%L}G=fyp-MxYh0rea$(*f8*S_&flG{FE|2mBTfpd
zwod0|-#$Z{-()Or9e`N@qp<hIWmFYC8y8(OgrwsKXvhbtG5q!nAGqSf9c&R7mR|&2
z=QQ}<)ez>tSqM6kMZo*YI-zbM3f(PCaO?et;_=7MG|Hz){_6HbaJ+R^82-}~-}<@Z
z(o@6n(uz9J=zBzb_G&Km(fUHIpO$fs-DQfonl2RgZWVLPO!(*PJYiFW)U)XMUY2yc
zlX&Hl4feFOq!*j+i=(gHr2V=r6dI&LmVQ0p*~g)H=HF#-KZi8<_CP%STM4hJWPn$0
z6b83ibLT5-@ZHW)=)Pbf*8jLh3cc1!nbJfkh>|Rs2P|c47BmTO!miLzwG?r@X^iai
zk+(ux;BUd1rc>jhF=();iLT~*W39<w;eN(QZuMNnhr?HcfA}21Gix)Pi{1~lZrkMx
zOSeJBl5*<2<PMxHK1YQLJ=uQD4H_4zhjv4CV8_->92EXmu)mNG^9JTX%~1nZo0bF5
z7w;$czQcKje5;VC5rs42@<iFL03KR+5$gWifPoEbB~pC`b&Ad7oDNmw@HCTOSUdnh
zRsg29{#@Eu8?|U24(NAUwp8yF1ZrFpCmA)u@<m-3tz1~+dMfGJc$3*3Czx{Lx)|T{
ztPBTh3OzoCVa=Z++C1_-WEicbmOnvIziK-7aGHq&dWpj0jbV65JBr>lZiZgFjJQU3
zH@w=C%lk$Rq*L>ziN;YyP&P6TVse{Kdao6R1vG|2`u$w49J~Q6x=D63T@%cRH$+7f
zUryL4Wps+A%y&>8&aSD339=wOXdX&^^*hjLuYs6;p++41LzgSwJp>%&K}BAGT`7(X
zb^CLwmcOVI+Cmct+y|e7hUA=EflZo5c=?Y8Z#h{^iT}odq)Vrxt+Awcd@l`2xi6n6
z?N-e5%Y=KvI{J3urD(mN1EejF5uPPF<2<id^q>81`JWdh@OR2=`Z#6^A6P3<BBi^E
z{jiOKf0r-hGwUS0=p6(NwXP^@8ie*M^fCR)cQLNGSV-17ARO2&(&`QF!okxfI92q9
zu|o!<bBE`!|K4};c4l|1{n}qPYKI2an3&@6@`GTr(uGI=dQX#La^bs_^}Dh$171zr
z?VRvAfXz=xxraG@;dq=OuDW(us7PH(4HrKNmJWNR9>`WQHQE81Z%wGVa2IG)ONN?1
zlOec4pN$tNVae=&aNPPj)pb1$^#eB1f3YKQ`qF*y-@)#<SZ6YvwH=GMMpb~8(`Ipw
zqPh^M`d26oRK-hEKL`o`48%cZ`mA4kQ#9<7%@I=!L?tEQhj$*)Ki^2$>@=R|+f3mN
zb!DPWH)me<DofnyFjsh|ok*wR)?xpT(fq}JIkin|g_^JX;Ku2hJh<a@cE8yQ`?~F<
z?Bf!V_^~N<H;EKuZq3AFv(#k@SDe@+FN6~!`b!z|$y7emh_ux$*<v5kv66X$%;OOK
z^7;p^6WrML&1>P^mSOOI<P9>A{}9^aHq*qQWjIG8o`S>D#Y^cCn7huF&vtnYUn?!x
zwnIl8_pXY24w%NQxLyqM+|C~}Y~kagpU^lv3u<n}^SugFN~~KAbHiMCsEI37t=<9V
zTXu@G=O)s}i+Q}T*_K_6)OqQb5U7c1Cv}Oi{vzuI4c?=GUEABiaLg@=-*1m|m+H!U
z_WlWvi`J3aq4vtlZ`H(CmujhK{$lPwtS48_trw1WJ}6A;qm7H_*Na7yQsBs*=@dw*
zVBOG(l5I3F#wdx;ua)j3TmC^&yf3;rmBA6yYRdVY50UmQaQ#LCMC^CNrT21#Ubc!j
zX}Ue8ee5qDn2?6y(cRhR*)R}&)F7|-AU2lv9~PrJ^UW)>z}H9-_jdJSKlQg#&UGO@
zfLXBPKnQl9xJSywk;wDn@OEW5bPWF^Mvb0^*^dv>_qA>~t4)R9^m3<bR_&x28!N80
z$R+CsKu4dyqm_ds674TVc7kPmY0fRN($$rdT&=Kwe@(o)@EuHDF40l99u*Xewuy#=
zF2Kl59k6bOWc?_ajz+u#dT&s|M$p8fgPmDZYXqDMxCjq^8*{pypD^-^wq&A<q?x@t
zvVD30zZmohFx!AnOS7=FxzfAR_d3zWvDnjF7gStRsHL$C!n2k6<og>!Ra6@U`AbF@
zO%M7ky%US!CY&9cgl4B2M5XjwR26SWa~H42V|5{ve!vOeCLE_V(XVLG)%SGpswxB;
zt7A$~J-tYHPqOE|@C3(FQHZkarnDnFBjtvcU(S>0vZW9fb6@uUZ71yWc@+=6_XYlG
z`pV0uy%WX%mgD`~cj$oMQ`+-sAGs;!lS9B(*y68?!D*2|Q*H6{{ef_Ps?;LeSt>^5
z7UNl&BAY9DLf)9C@U>W%JGI^w+vESits9f*)O|P5{U}kn^BzFUlt6wnHy(z5_2en%
zXF!u`C+cdmky@*I3;bsryj<(gn+L|pir4C(Vp==E+&r@Cqzv}4Wi-p%kHhx$fX2Q{
zQB`>fZT8HCr|!L|`310%wK-)Nnu$H9PJvlTsT@%>g!ikCz{kNs-1q1}vg$E`Cl>0#
zhL(7|A$R4X^`lW|e<J^G9?Ir*4s>vKJlyG0A*?<bD9m%6M`!z*3xB2aZo`Ckf^jY2
z#f<Sh@sXo2ZE$bA61Y_CcQBiN?i?iUTc&{hzaN1!_eQhQh7aQAtTY<Aq%TaB@*=I)
z0m7sdhNiDl$9$PG9^7Tl-qyn~?xrf2=Gd{*j~S#bh*Yq)E8kuw<)c@4;FXoih^+^~
z#jPF|Y#fR@-_)@*yMnZLJ(cOc2oc`8F5?b+XNV?#CL9pkABK&4LfKN@SMQxZdzFqC
z9rU|!C&@;(^s)*1e+;COy(XCI-2m&3z9IW_0?yd869=eotr{=U7fom|Z#iu%IOd!H
zx|%BN9y^*89jrNOUKl3#2_cPfcKr42FkWY@f-^!TTY&n0avHN&%!vF1Rqnd{Wtg<P
zC^Hlq+${0@>?nDF!vD_zI+)q{3K@-d0qsy7uBk|YtFh~8-dJ0XD$|!~YYoHPwIS3)
zSw-gB)tK*Se<k&W7S#30sH$y#A<`KPXywXD*tOdpbX=cKGc5OuZMM%~{#`kB_nE>+
z$7I9Pu4m|Nt_$W&s+KEtmVuV7WKf%9#vSgbK-b8T+<&4HC-<4c!>nQ{WxpPf)Gau+
zDT~jzj>Y&(rZl{)m}b_#q#Nz6WR#Ich5zjl@@7^Den)<bzjO+O*1b&iK56jOdkd>L
zNqbI30rt;5fbWzd&{Ne@%9wiKq7n~sTWlw?nF0<?59FU}7a`1Wti1W1zvwXgD<pP}
zrede>!WQ3ov}^5tVZg<`94lGB8$vsx-Q*pD&a^RDcVZBj$VcOK%UfVom<z#5I<Q${
z3+y&7#hjY+;+V$1c;=%mHa6=*NQ*k2dd0NESrhZV{ua*Pi@_1&ZosP}i^2b)p3ry4
z4*6<(4K|(no#LPM!#>XxarFVoAe3_o2HeO+m(&zol{KDW;8SRbkmzJ3ooI@NoPGp%
z#{Mf3`EXjkaI*A1EYUNdz}X2rZQeEEP?MXWFmndp?O`o!w)+QXyCn11HPtZg<xIT0
zWi+m8*THi24T8@?Kg`>nKznjG(<;|%^nC7r&}!<BPhyTh$3AJW%}C03ICkQVw|mlr
zo-tUJzZA@*=Xi;K8^MV{E`RI|=kY5|76)SPi4zd@Fb%HdxMJuX2Z%Z%eYf{a#S4p6
z@KKE#++K50n7+V>w;NZ0O5Q2xKCK^oDtSqIid%4&V<@QIxCyf-|DbiQO`>uK75-te
z95g1cCu;|P{<}^C+iOO_Yu9u1iM8=T*-bdr%apvA&Y+3MdT?gHo~-}6h(G_<M8!BI
z7#(lK^^4w$Zd=?@FERnPj_-;dzoTIGi$S1kYCxWLx9HmLhZIbs1?%m{*gfbsb-X$N
zJjbT+sCSmA@4pyMpT8^g(=Fo45g|Nk<VP`gLniI-Yt0q?{?LzITIi4h{Ck))$V+43
z<lNUn)5COhIjTx0w%mZ-_>B(RpU2pP)8J8Zkz80>B(^2@=lfnt0?3nKhxTH=GU*?g
zK^QOj6;Hn#ma<~j9U*0m)KT1{jIS=+^UFbH!iM{b*wQ+VhfGn&5c3D(%kw3|tt;WQ
z;*uX^8fnWO<aEJN9e2^!MX}gA8YMPD6HLAlPd-6XC-Y;m^WzaIqGzxTPyXDQ|LyON
z**_|2O|lkW-5w*(`(lm?<5bb8hZD!HpT!fN?}7uDdSI@jbf&26NK5}n_Si$Qtn*2U
z=S{pUq^^<r+Q|$Gd8OpP>J7Xt@!<Hb1@PlrFVt!?!tJAm<Ge4`v}i{qc)#C99&bFV
z#$T18sP>7TVFIW}?t;$CS5oP_-l+A)3Kx61@bo(-sCd1R9@&;rtJQfi!7mkOq<6!$
z9!rFAC%k!5TLGz_D#S}Sq~4oPrEIK451d-1imA8@m8#DQ$K>(i^n^)V`LGbSHeZE0
zpGqNX<zpE1ya$wQ?}UNj$HmY&cSW&R4p~3_MC(<)QNf8q`kf!aeq)n)Tm2ZUO)}-q
zg`2U&qCd{EpNXbw=VY_;Msn1`INC(Jz%p$h8(i*;u@5uo&-%BbWW!_?c_DPz)g~;F
zZ{++CJ^Xj_Dd^5>64CS*_3>z=<Et0a^tKEx?D-VN{p|&-dq+Wad>o~IY8P|6Xj9X4
z59}eAP%r1=06*T9wd!`0XLZcN&V?4V;O$aK-jqOYRo3*eZVl)hm<+`!>a@XXJN+PK
ze7i0l2e?~`yC>PvZa*1EIQ)hi5pGa-+?>_I?u*YG7T_0@GA*Dd574X=s&fUtnXMy8
zJ{)Y{+>56xSz^DoPOz(8pNBSdgfT0Qf`0vNnv&F2>|1<`UW6GlD6f|YPRHTg?I(0~
zRW!YgxIyAUWzJMf6`~j9!tnRqsN>j`;ykM^Vu(=}R8uuVU5lYuxi&#GFY*zehxvoD
zZeRJ$=@C`0^N+*9U8~^3fOHIhUP-qu_23+bFZ5`ER4AUkU8q05mM<1Ar~lM@(x>~r
z<g8>vVK0<$=4mHx3rXh7pN^1?@)U~vF%Rno{~|@VQ1L*9KhO8EVmqVNv{2$1y>QB=
zu=d_8{}T%;QV&+w$Wb07o#&J?v`JZG4ko_{l<3pi-1SvYF7lGBv-LjQ8u3l&SwBqJ
z@YEHjYixj(EB?Utpb(zLYP_Xh866vXL&=q%*g5t-ZO@3|{w6vQ)p3`wF*ut}Kk(xF
zzc-MM$9LgL`C;MM-Cq1S_Mgn{T_~Qv*)HrFco_DbImBVzFUyR|ok%TV8~QFi38|qf
z`0x37*~zt?QNPRpUo>>bGT0&0`X!R{CtcJ_X%<cxb)=sM4bdmqn6-R%fcuhhnEp_K
z%Ov{v+S!lcakxa^oTLYvGy=F!Cx8CFeI@4Z`A!bu#S)F$Rem$ijc(hG61&Cq!9iap
za>>?eP>kM1*<1bjSYQBsyR%hRRnV2bm3X60@@a~ndx>Nk8dWmKg;YA-47b|rLgDSs
zbSPoG{N46C(R@e~m6a93e+i+OSfPrQ32oxiNNLa7{F4I1yYb@myL65M(Z%aKDC%4h
z%DY*LW2_C);l~a1tUN`7j&#CT?+e7Qm#RRvyMu7DwpQH!eF&_6yISn>s0*qW4CC^*
zo5d5-`95y(EXa${A@#Qt;P#09JRr%54-5(g7qfgQ4jzW?n>|FAkCo)SFd3(79|lFU
zjyTXr;Q#j8@_64jU~RXNw4QCiTc0I9L!%!|sQv-Lnty4uqblmz9)-M=0qp+Bnj3Zx
zhDY|&xAJ{|&gpzdR2cG;#w64WwTqjfYTs$>p%=$c=E-)~<gi!w2jSQ)Vg~~as@1gT
z3Ws|_^57J-G%F>IS4JG0W6xc7n{xb|>q19q@94DWG!5?%%l(dBqfji9-!mLVk$co}
z-Ax@{Kj4*+zPXOa4XG2a>wKW1^Uq+aiK6(XC`r8FoiA!P-+?jR=fPYH>6yv=LiZzE
z<hAOLgfw>}VUFe<q0{mHLhgu><jS`Au&V*|t~9_h!?~o?_yOLnj)sRz4dCQ*fhxbB
zqPsCVXy15(<~lu>-_140_Ai;xd-^0C93WXdbCZOO+4*!-A(Ac^XA<Ox^W#(Yd=Sm(
zcW-6vR#ZuamoL$$(tL5xA1}E3X*k~(_CdmjB%a_k2ev!8(}W2UQy?-FE8==`%koDs
z(l8Y^zt`f`9nzuC^;z)qeGx7BJd&?;9L!Vu1w-bN9X$2RK>SwNgYRe0$H*l%C@#;Y
zx=T4ive<=v_dcUr%Uw{XIUVA3rL+1o7j_Bqhfzi?u>JICd~<6YCg}x{`<wI9>|2A~
zZ4`v-Ix*roRX+?)aEAV|ejFp^dBr)gLO@JM_OjUr=4(dM=W#J$xhx8&t6dTE8oY6P
z=P9sz;|NN*w}>J;HGpBa4OL;=<alSw0rL1DS@$&Doxk{7;IlW2Fv%_+2IQMybJ%t;
zh_>PnD`M&V0S~IF%#h{~)>wJL8$A;?3G*PI+MikQ(DEo@!P&{^YcK<%j`d`jx+=!Y
z^1-pRlbAAHCfnDj!efF);@0y~r11P2t#;jmPSUq=a>Gn9*yaG()~(0iEALYI?*iyP
zBb@go{|81ZN1|q{H%(i)0bY|_j2P9CCLe!7rxm3C!Ou?kTH&Cq=BOhL*jFN1u69ah
zI}^CqcoC;dRQs?mCPJrR5j@%-)5g4Iy!U1)?*80|ysoJ52~S(T6Dz%IYgglhFa^1W
z)XQ?KNrIkXuc+K`Ab(3PqU%0cxZUFfD#cd`ZQhP#t~QPh_GrU^q(m;<Qb5YX3^{X`
z3mhx-hq1bS*w@<)eD5YxxMWz2P|FjC<r!nEcMRN^HiA!=zo2KcOnK_^zGV1|#LEZL
zv8eYU$-)(mz94W-VFInXorcRB=Wx%wJJ9?`iM{_;3KOMw#LGQFJZIe|IAWPiMr%jN
zKiXTtHQh>~=Tv>cqQ?|*%c9Bbeq|<Jm$LACefLOKQfX)EHkr>XmUcZ#zsTulU-~O`
zkX*iN(7ib!@*dKR?zNXGb5l9&c0VD^8n&BW&$~s-D$Mx$^m=%lwm`CJt^}73<1u4E
zEwrkyrn%=ILezCP$%63$E)^)UN68$hzC9dlKTSZtv}CUBJr=jzJVyJ!j^GM~K&+ph
zE+%g%q4=Bz`Gp_rx!{vN>djn6etimPX7xB;IQX*MYSadbvsQ)+HE-y{A31OS(;`b*
zdKOj<cL9Tr{<L_kWP?(@PU-S#X!pXHUB=HQZfmEDUK{DPv=3juW)e2;y#ghDB_>+U
zO4eIv4O<^p&}P``9Fw}7<JTp?-nUmEq`i=u?QRI4DvlEv?txQRSs3{ItSDd6O6IE7
zGRN9*>U421x`i%;%`OU}URxFPZ_&WIRl$5dD;p-b&Jh1|)PWQm7uK!N$BRc?dEAB)
zVPHs<n7eianN}_1l|N1Bc=ZJCwV?wWzurQ7RjxzgQiQ8EMm%AaC2zf?$d<27*!Is)
z_+S@C(?=bZAMpAol$s2Jnn&$0e9%QG43i#=hQ9E;^bn-a0Dg3?1s3(zrO_(M96Wpq
zFMYiMMVAoNtCR_I<_5EP$eN=&XrSN145?%KAFW)h0`-no{CS-NkKLLrsN^4pRaR3t
zHtYeM*sY6OyEM@rl6v`tAw0<8gk-fGh}1NY!~R-vxAe2H)$S<h7MZ}2jLmps?nvY<
z^KgP$62|twA!Tgi>EDAlkkG#b@}kc|`F3-haX6dCyC(`7y<NB|q9fN0DHT&1LSglO
z73%QB1Z{tw7BfPpvcE-))TNnB2hFV5cl2@jgpQV&zI_?)xEX{_+TLQ_WNAM(SF#{)
zn@6j3^YH7HYp~Hz1;@K8llQ3^bgrrwcFxhn)~64}<ievAbzd?o8c0m9G2=*~hb5cl
z?3U(Nfm|6~39BA2#(d9<&eJ^Jh<~S;^Ri95g|kP!z}j*VcaKm()6-+QK|$aPv);hX
z(iAfMwv3(~suLToWn*Axi4g3tTIy;i(9%ddELW1qzrNc+JhX}PXB-vg_(<liHwrwj
zVKkm{(S$x8>Y)53iMAhi5eBxZ;5sQ+*K+a*_0R}_-O~5rddfg9=xZlCeb^V{j{PS-
zXzu|Rc`W)Ty@q>9Teu`%3HPUaaGzdj+?Y@(-WW9>SAL7+QuP<mE9?oZJDDMG&WRUy
zM0Asln9`YNNwfIH?JC@)V1cs>^6}HK|G@QA7g>W^GJtbOe0=1kY*~kQbVo8J^cklK
z>Fd<--J=9}y5yM<zbl+_j%~!CIs@wETO?@AG@wk0dK>Jn3rQx-VamZ&rML|SZaFIc
z(3m8iY0jf#Z%Sm}B*w;)nxPV#VjXs!T|;a2a;W>;bXvKho<gk(z^L;=LFrePsA?Q4
z{_?azlQs1MlRADoXUzR3x(b;WB$ihvEmo+W%QmH#N%!bc4E2h_;u0<4z{hmxII|aP
z%`?CfZBe-Y%tPGM*dVkF60mT^L8y0G3qK!T6P#M&D97O^=%);q+Z(^5A7usjaEumd
z2d@@-nf-u%1txs?rbrqC963jQB3$X92r+pt=-k4+{3PN#WvsR5s9Nbgad`rK=r<L&
zG+!sRe(%8X`#9|H5G77Z9!%xhdm%PCoYZum3vYC!f5}B(D#AXjarQne?)Zf=t$g5N
zZ^_{B#GVt~^l;C*L-gdvUFg(1QM`O;0v4T-?um0X_~wo>%4m(Dc@4%KGQbI^DD8!`
zf;`FKq{`F!#?TP?F}kI0$OZLN=+}@TsClJGT$iJQ-FJIJezOvf^4&(KBYZf|y&n%5
zIE?@D8shCs$h+$snDJ);#!ZRf$1Y{?Ks}KSlTVR)uPnGhd06my7oGg-%b(8v5gu*1
zF4HNC$LS|_Q+T>7>qzIcs3)epZtN=Pupx<DT(Tiw-3O<fGw1CwgW&eH5IA&N7t{O)
z(i(@!V11x3`wbf`j=83aKF()o#Qu$Fo*aO+QVw%h;XT>TEIT~%FPks*?MmXi-*C@=
zq&UCkfp9pq2CD58@n!?S${VBMzqoZUV%}k~_e*W|`TCxMdyWGkGKYU|wZXxPo}8lH
z5j)o1gWWGY`P(;=HTz7%NTbm_&Ali8mM`UZ50YVDmn^up*cXeG7BZY31b3sO;9czw
zI9TR_%hHd+1v(2O9eeW9bp`PE-#O^0Z~`=^NNBmrqtyQR2Sh%!hK>$u*eU0ccx~WI
z!m_J^d9pRW5mmU~ja%?II-4I_PvT`MMby6Pp71bZDBK=33(hv(665yt#wFjEz_jLh
zH0ZM}I!4;Fm0>@ee^0X6rVhiVUlHP{RegAn#OZr{B8z(%4&qx+HCVY(AEti30Jnbz
zKvb3$zihC?S%<nyyT#Yk=u<*vdGD#iLZ+oNLqz?meCq#Wlwfnl162y%(4^>VG$A~T
z-~XOOS*a~#vG@}B^%@G-r#*lXp{k(&aSe=Ie4w(rFrOU1s$unnQepL1Bg~JU#eP#g
zL{E!pWN~D(Fyq=uD*qbDd#7%dIE|z5Ma@Zw(0C5<rCZQXUEpW>n(USOOq^ymj8?7h
zg!4v62vz4k$ybitLY@nIa>C}1kXwJ6@^5_<EY1Az+Uz_UVbKgrT+-=)>82_pD^)x>
zxf?1SUW32p4Tf_p84$nD$MPJB;BG8qr&@hX6H3MAjIMm}>@DF-T>_0+<w;)`OY9#l
zZ#-mP1oN&r;+2t^aAdp`hMT&Gr;V}T=pZMKIIPCo)elp`_`7t$Fd0)NDstK0=g^Yj
z%&ryp;a-X{d+c39r)zEtO06<(>(M0p6sIJuD(gj+kAkq;MKX1y7*fF1J~%)5Yi0Pz
zPF$y)0eQb#Aimv!3zW*K-D(pB#b}_}#vu6B<bvYP=d!QcRAogwuFz?j8M}8pO7dmW
zIcA8Ol$o>z&&Rf4<?V-+i5qCq>0s8mqsXtWOF7%2(;;s85}tKg;)h(kC5{{EO|QEJ
z!!gwY@-Rrk2c{RGEo>#V&g+3!tg`6-N)1|)^_Pxj8e;$4yRgY@rm*l~ID6m!3EOl=
z!Ph%Opk+Y|Wu^|MD=YWW^M-*mT5}dVTS?#PJDqWBr6>L>n1sep*2CKKo{&~K3~MC2
zTf_Mhc;%BIc+^j%556({Wq>ov9w_no5=U;(RpPjcesJhgqwqm(01qqM1h(T-IZEng
zMEBC<r=dQ;<^RC^z-X*prOo}P*9m@iRpH++Wi)tcgElRuI67;WIN#V1vd+8|>(?s4
zlCdkf%TgP*{nR2HsR<`5X$SQF;ZV-(+CnK27vcKKWIAM>AZkrX<!vFuz$&ki<{A*a
zNSn!CO<TmM@$HgD<g7U7Xn$;KQ{(yW&N$6X4-cIe1SOjbLSjjgu<Nif6!%UMjFczH
zReZE$Zi^)b)7S5?Vrm*VEKCp*JhgaD#dxx*JpnsTCDHL7L$JNnMO5(`0Y>lkfzM1^
ze7C(4RI2mo?Y&)M;i5kHdzC6T2k4+x)DWH*@LuZgjAHE{Q8-|Rvk=uhV%V69m*hI9
z5AJi9iTjGoP`fcsB81PAw?+37Yv=poQ=@@gyuS-8t~?71#`h(Qaqk7cJ0@&1EEf8X
z(Z?~prr_}_mUv*KGnxfXqynq)xWg@llh$6L8ztYNQ}Y>6cE}<v^DKPUwt;`XO05d}
zds-g*S5=s8x|=&BXTW>|iAVd|LQwu-0y<9=c|(~Wel{{k{R!EWzCWKIpIHv;c`5ik
zxI-O=riec-T%>+Ottq167o?25NS7W|P{X*HyrZ}Yw)#7Q*#={-SpEu%&-h~ZQN}dx
z!Fa|?mtf<kV9tmvp%EwVO16bU=~)}cje~8t|E5fC`cI$%@-xsj`!x78ZsddSPk}>|
zv3%LK4tU1lo;07=rsChHss85|3f8pY9-%*Bg+UTkjI9HcOA}=7>zw#>th9TKlWH3g
zhp|Y<j8A+^;g{~|kT~a$ynCPzZVpaHdq+cj*nAlJ1t#FcyVZ2dWE-7Q9w97Sew_l&
zjeu8U8|cE^L?PfsCma=3OV97`p&OHDbDxiI$Y6b%khrHi=5-&*+n)3loTTs05}^h%
zt6k9B@FQ#vYml7`FvHdPPBcq03GEJ$^1NO2aAZ?Anr$$Ijio!zajot6B6u|KE8Zx4
zFxF)9JxW^hR-)HDZFF6;k(8X4i<#Lv?E2S5?5b<eb4uO#!{%FX?93YoU|0T=<w^E#
zR<Jf-1T<NQ@NI;UyXm{&8LEy771a<sG8Zk!KPJ86$#_pO8*&T}Q=0!qd~mK5Z93}7
z{RUqXKFLeD`|Clt_v8XldJ)5!ed9%4i+%9;R+ZRwLnyzwZ3K>bWza|JrW_wK7fXHr
z3ZKF>Ie$SjC4Rm`>BB;}V05v3%91vCWu4?aJ!>oHmOQ6Dy$-<Z3AJL+4VqY6F%~=`
zo%s8!FdpsL4^poTrAL<9JnD)8U(}dS;aN5EnbAK*LvL&DdCVMpIc}mue^x@5LWy}g
z#+LSe(-#tET2R)ajeOSZ9=Q!rf@y=wXm!z18l`W*ug~p*wz(t7^L{cVo4uyi718u`
zx(au{wMh6r^{k+8nJ5&EUWT0$1pzkhh1B*G?y{sio0!}M?N!~$&DMf6%Jzv16wINA
zreys&X@V_}^>}@a5h}0I!fEdnNY{5b{A>)O9>p>UP&KJs-x!BR9Y>NlR*jsR=7<NH
z#mZjc-LQIF29HkeC*=2DOS=74Y3Qx}uuyp+HeA}m(lkeQq0L5EJ<y$<7N>%HZ#Ddq
z>yA<DZa`0!5<35+g6eyw!}6b3#rUqVpi_62zE0T>_0JBHx#QBRZcjFF4^smy5ADZx
zS35xd+-_2yYaji~R>$+{@oeXHh)%L(_j8<uz8{z2l4JA8(Qy|kYlmZ~!9-DSQw~-Z
z?<9OOn6HN&qLLpA@%^T4oZ7Ym|4aNS+`hVny)7JBJZ!@!GW~hI;u%5b-Y*z)@PSYs
z*@qLh-WPnVRB7&@Nw90_EBN?H;+1wNpc^x0idxDKV3mbGu8U0)qlQJ3pte)~baE{O
z4e-Q%bN11Pj{i~qpKH){o*vJS4-xjaDvKu$bjLQ8-uUXE1HL}%40GcL;LHWRCEn+I
zYIFt|{4E=fhw5{;@aa{u4|WpA`!khCcjdVk_3>IvFv}nWgU@th<*-w9?nF8@)do=U
z`!2(7?Ys}W9Qu=&^?3P>tR}ElkP-)q@zlCl8D}Ofk~g-P(ch~YxY4+T3eUe1|1?Tm
z;L(aa=yEqae=8MIpLHOE;@-GJ<Bssr{X1#?JO}hYp3cLc%Rl=2G9w~VQc8r1LS{bK
zIcC`^?Lk9Yw0AVfjD)1DHW_J2$>%z!l1l4KJ4tCL?Y-1}{qEo6zWoJU9*^sKzt4HS
zp3gOMm?AO2GL0tEgdZv5mp+p?G(`bDKNZ5gIRS!mz6qbHH^C*QI@nX<4!>(Q!ggT=
zqk^rVQRBmA{?4fX@F7G?cEtEQTKr(te(}rper&g*iaJHO3C$|wuwYmyYvmWh_>?Z(
zw&WdT_FshuzL?_>HAVJRoDHJabIRD5!|6Z0pnbPFb&o5Cd1QgBpKjv0vyajH|0ang
zRdX@8Uj~h=a|Kz9I>tBLfabROu&wYiC1ff<>D>oH>6CYLe6ItZ)jcb$`T(RLaid4B
z?Z;a`?MK6@A&?Sq5Xb2+!w;>`>HN^k;tkF2+`83Ae)hk0LUoM5<A+Y+lA09RM5B{n
zdGb4SEDVG`O3T<uV$r<LGviZ<ad@WsrVyWK43)}K*C%BGCB^FDdd=gIu`op#SsDvE
z`mg9yX^J?u@fo~c=K}6=#X^|a3*DwpkR~X}aHew%PdztNxOm7KyWLBIUF()|cWI6u
zuh9pOzjqfmPDp`XXRlC1SQb8-^G6K+FB9w^CgBPDBA95ffGv-_gb{1*Q}%Q%9QeJC
z^2;C6rm<(?^sp1S|N2ZRuQ-7HZ1%$R)FygY^PYy99>t(+TfQ*hIC+M*Qq++yd^7m8
zkn&#YsaXW^jstTc+-N2gOqq>AJ5AYdem#sFt`9|7DO9mLTsZez8Ah8wA;mFgY2_4K
z{9UDl$MaSA%Yz^c3YdUv!}~G~NuduWfYz53v88bfWi3!bdbC;8Nt#5KF)cJgVvAcy
zv+rVQF0iiI5f{Dh4(*dX$aC~J=xvZH>s6_Q2ZdaWzcZK~o$JB8_a&qUzNBvhmx5Pa
z1Vq2j<^c~o^XS2`Y#22Le-Dy&oozX?H|c$FcgZ+<Qu3ANJ$HwDF&%k$gfwqQ7vV;6
zcaA-#f~}>mV71{{+IFu3-RvE}prjjXJKlx3`LlU;k`jJzRin;+9xTr|4ay%CdHT@|
zQG&_ysW=s3kXkv6mcAj+YR-yDkxn!ten0*G+moKACy_9D6uQc%@bC>&`Thh865x=0
zy-F~y(V4-?r{;p*Nrt~eT47hU3Z`ugLWAd;m}sER^S_RTU*S#i#H_WP|4|t~j@^v4
z_8ss>bv8VDv;ZT{pA?)EBUwwF!5508{KC~Fj%+XIU*V60%t&8(*4(j})Jxh!4c5i#
z<DPVGSuD>y{)V=Ft)MiCjpwW4%0u`y)P}W*oxE(hi}YRX7u5zw_yWZQN;8^Crd;yq
z3+O56a+OA+#Nv(>Vt$O`)csfKQuSVnnD0z(8j^vkQi;Dc?-5Ok6)<#g67-WY*)^s|
z(D$mGlvQ?8*3->G^<7)}lBdS-Xa7pkcSbtQGP@*;c8$d=J+fGJfC+j!Jd<gAEW_S0
zzr>^YJLKi5FQ8#qE7kVSBDbgyv}=_Ex^%ik?m?B{d}zE&Kg&Cme|a1K9seHwsQ!@h
zd!2YdiyF_@+ayM~&Sc%pL1?hQ7Plm3^12bqIP{1nE?B$*GeYWN=THlt^5ILhS-mS3
z?lt4-3peAshc4WB!IO5E*}~b4Zgf^nn{sW`>Cx=IY&-ohUH3|5{W=@E*<lJZRzc#S
zai~-^5AI5t+95d|aop9J;;tGgZ~i+4`_3zdX6MOx=eIJpekv3;4jawsE53v0g>Tex
zSa%pJ^-EUU7;>Ru4P}JK3XLbO!WX@KXzpUcYpo1X%Po+Dp7n<%S_w4S)(ou<D{*lA
zQ~FZXL^W}iFyWFLW~Z9*cgrGi(9@1=I-rt1-vxfKMWAC!PCP%f12x4zfX5Z9=|J6f
zF>Hv6v|lc!N$;oe*(O^)<dwlK+PScHMOWIkMY7rror9OUTJz_gDdcmz3m#ZLnJbR#
z;eTb;>^m$Jx<Bp*>2oiW%)dX?w)A1Ov7vai>8HFTXc2~O3grEnhHU=hFsz)n8yX_M
z(VRKG`0Jh*)yr}YLtZyM`j=TvDyJ(c#(N}h_?ih$-6pU>n-7QS2)yBBy(l-Ih)d?_
z!2Y*msrMiy{#6_a;vUKAFg1~1CA3ickhAo%;I1rJ>mEgW#es|edny0r%CZ4vLWsI4
z54Q8ib>r<M+x=i{DDoFaZaBj)Mw$tRAy1*-5Nmo7s0}-fdSgUv8C*KtAQMwkIlbLp
zOo?#dj*-?dfYj;ByQwsPxefZ<aTmT^je#27VCqrnh-*$B;u}S}So3k0uw!^MMNBY+
z!>OY{=bI(=_Nkz=evNSL$69`r=mdwi$3n=-XX23;HXM=eDBTYi!<9o7^z3UcoJ(sH
z>dN%E-1mW46jcKT$J%Iopgp@Bn8Z~E_44crV|KnS8Ix1i6IqoBj!spe@jQo}f2^m?
z`X(^=aXxu3{0&|Ao)c>VZ{r=iqu_2n83Ozp<b!sk(Hqw-G|6leSDY&*`EDY|N4sET
z;~f~Nm<)@LU8eM=OuQNxg8klgK&$WGygPND;52Xp$o3aec66MyZmfWP!!p1zVl$Ob
zFvJG!sp7VPTJiFq6WDUhne9)$fI25#+!nJJ;<}f)EOf}J_AEU^x8LgG4KHsVblnY|
z&sYnqRFwG4kOKHplpu?3R|E5R5<4-V7f<kZl<#b+6OLN`f*H4Fi$$jD>?%~@kvBOM
zq+ALy6VrriR$Z|r;u<NgFk#K`8T{<t4Y<C<4qq5=!dW3>giX@DETqVv)B5$qk)GEC
zt2rMjZhwRLLbU>-Q<cfa#sywF1PY-`GEoRo5EsVx#qco_OX$x*Xt;b6x@#(<<pC$o
zc{&<ipC5u&TLzQHTx%THewZx!M+vz%&2aRJm27!3nKyp^C^1y7!uXc`kgAo)%Wg~N
zQnwNwYHP+;5gMXd-gF4s{+f0@(8Q|-yTH$JHC?$KCs#cy8QeQ>C(T<uct%2{*!Hmt
zFH6ql-3`q!fA4FWfbZdmRVn?elyP8LAdRx@kJ<I}q&{UlXO~MHw{|%kdxPkmwu3c$
z+!KfIcnn+q=Fy!%Rodazh4*&(492(K5?)cGzD<>|d}u7Vj!6{DvwMhIU+sj~^R@Vc
z#z9!LaG_*V-^aD`S3=%$CEor{4O`c>i}D*&dEKtN;)1(Y`1pGY-TbJ{KBKol`A=z1
zD)HnRf~QfeMU60JMJBw7O2B^RV`;`p6*yW~0!N<eQ~U7uApdlM{Ig{k5U#{EMgu^5
z><RgZ$)N~^%UQ#59FEpFPL<!+3!VPPNX`XqeqdR`BVHSc7aKcMcRz^(x9pqCko>Vi
z>8b02;{LO+<53Nzjj>}Ra{`T~L>lHBPm2xPDd);hDA#tTeoqJUtcz<TCRQ$J&gls;
z!+ydr*&*uv>LOT~tMdg3Z=QSVETxCd<U)%9m~^s;TwX6Di+*-=VNNnd95;iJX^P^{
z)>gXSty;=D93a(GCR`HukiM+e#ANF~RNvGce~mT97a!z8*t%#=y|$k|CmZvW8gou5
zdrrL~q&#T<-*hu|9WKzF0H2fILe~EKl2N%o+N*pNx`xM5*9li(u4E0?ezt?!*L|Vi
zYu2E4vl4C?-wXc*ccTrRG}tpvAFQPJqjUREEHzVxJ%d$%{S5eX=nb(o;4FVpx=P~)
zBX0ExMelk?&OI`LJsLWY%3Kc-dYzyluMXlup_ZCA)k&-yeS9+V8mal)v-$HgFsx=F
zajgXeP8o*pedgf3U0tzeWEOPnZjCv56LC?mL)0&E1*j=F;vsiMC{#1!6{Gr8d!6yY
zRdIiXpl<VpwpYr0VZtS1$2higkEekFI+$?xCuKh$?6RVy8_GsZ<TPCZ(P1z@)1N`l
z*8HPRN3N3kq9~kHvqj1|_7wEzZ-IhKIn_2F6{s*1cu-(0R(9QiBYyNIjc@jR=HO8F
zcxWU)Wod@X7P-*p<&osnEamT<G;o;pX|e5L9Gva&5YAWMg)83n{CTr2J<FfVyWcGn
zRnGQgoq0#Z8@o-hc19!J)Y=UvwU9R7TFifco)=ykPX||@VzD?Xoin#yl)b&0PdXOb
zq^9^6y6GuXyjg!9qiu}mkB@}={RTqq#t-y0ej>_;YoNV`z34Wl7`ygW<9?<e#V{i&
zV|z{F$`0(sSx@hSx3VgZmFAJRpWcGTFeNcNyodOA*c)21bsIjhwZsPNJ#_r_YJT*5
zIJ|myhT6t<<Bu`PkU8lUU3-|qlZPA<+WT7KnZbvlyseCeN#}s?o-1M2F?Z^<?rpW=
zvq`X|(iCUGJbq+q!tqAtvc=yjs_jO_!nN<ysNwlDa{r*ur>(d0FarVBsZ`O0j7_j$
z^*n4(pF?kET?K=4rub;@FKK7;nL1~TmER5-#gi9Wqw@VAiT^T<pEdOc?=A(nhxIx7
z`#QM#<RVNwyq-iAZ@AWd2X6hml21E^^TmCa$>{G`_8fEpx_`YxX$J?g=}#7WlzQXG
zP&3e&r^4wS&B*#!CY)St&bur9accWL+V`r7F86*2dEx)jcojW*)!hK19VSYw`koLw
z%$DEA`_Y+r5~pi<^2j`89zT0IzTEW`Ub<C~!qGo;=&2n>3~{C(%BRWbwK@MBeGGLn
zy5Vl;OAz1wik2KM;8T~GdN1|CM-NR!vl<5qQQ64<x*)G_dLr(8n#b!d{HB@<p1e@0
zJ0Ixg!X|~0I9aEXCL7Gcx>L!dv`m{zbdtGtoWwMLRxkYW-zyj|$b`ckE%}w+QAoI@
zElypt7-i41#Qgpm*k<5{5cLK+`P$&Aeni#bV|h!TL6UK>7kdAi$WanPBz%e*2JY<)
z7K@$=CHD2Ocy>oIqoyM??I?h&%AJw4-ouT~h8(Z(o0cE`3(oPMVA|bs`Ss`zq*S&{
zEa~1zwzhpyt=0{1ZhS#+gbY}?&WG>W*kDX#COz<$_ANzvVrPRraKtT$`#ifRyVQRL
z&&oqQvrK~qxp#t~O*Y`UM{-)c9?!+*XDEI|B0hDk6OQ@Lfv2s`qC=Wws~oWi<Y^nB
ztY=qVeZm7()>+}fUj;mSQyvWXDxKQ~uES0T!fD2ZbZT4g&WVNk*icglzH5`|xw`Z<
zF^#1D-<|pL<&R_)a1=(rmbh8|wUqRsKToKsq+c8NiK5B^?$@Cf+RYk8ZwFmYw&{j{
zmdDVKS*c*UeluA)_o9;J+OX|cnvlE3m5oR3Bt6Flfp;58ztdZ|=krx={HKn7Bw9(f
z&QD+{Me7nT<%#a0IusW@;*^!yKDd1Ux45RPgrk3k$v%CxLtR~MY(559*11JUI#<Gz
za0vg1D4`Lz)4*%`b9in!l0%fkxqi`X*ixSYy){cYY4UR6-oCD!Uv!sNjgJ#toJ^rU
zY&KbS$%kz>6>wR<lN7XSBp-jOE*!+kG^giP;n9Xqw4}C8KGx|3eLLO@=WU3m6Qlja
z(j#Tiyvq<H^z?9eLthv!j}g}2&5`;T&XVI{5S)&UqXFX$vCrp1I(MK241$+%*k+b{
zwcijnE$#|lr*{jRdig`ixgIphc{E2G=EB~11={}6MVzlagkAQy(W?Uj*hiUTr;F-*
zLxNN2HY~<lv6_6f2q7iz7$ztBL-yV0f~+K0EO_pXp$7-?g|`o=<CD$Mbh|5_Ti*!F
z`YH0_VfRFr@K~PPb(g4+o(pjnW3kL}6z8q{L_1A7iVaF%<%1>nf>V~X6OnR*b-s!8
zKwS;Tj{G8)FIrmd_TN)zJoipGR29NIovg*_o=$XY_Xc=5-IS+zTEbfYKA@BdSo~-Z
z$9i<<uiM+iReD=sWA9;7#?pyJ?<;gN0x&;k0C;^jrL`;b!KCeh7}UgK@KJ4ip((Kl
z^R~!lDs`caV>Dp0jww&|T0#Xw&G5ugJx<(K2Wp#t(azzvbVTagpLg&RZuR|3%_fOt
z?;3*N|Bhh_ca?W4FNFR5kBGaj-hi@m1G%Q`7N>q4NZB$0AHQGEhed7Zv_x`BBq~tG
zHdQ!iuE23RUhsReKAKb+p^v*aPJ569-CxGSuo31MJX$iYe;ErMuDL<&y@Pn8Vlj`_
zJ_Lmm>&d;x5;6YHEV39^4#wYdY1$%*9<H$sPiMV>uuo~UQkW;~3@iY*f5(MJYkySy
z?;#mCNgS~cT72?-5H{t+aj)Yusrim7`rOKdRCQCdAG8RzMW3gk!zEkqj@ew^Fou*5
zM~lXdIi#u(3Q?IGxnry*>Rca*V$m8l_tZn{6NowwB=+9HA$;R!v(UU@6uv?YHtus4
z4lP|rvAsR%#FuFBIF})xwEv&DO3GK(#vdS`b|VOO+Y4!{4d_taJU)D*k(6vKInC%9
z*t<25LiR}L^l%*v*H?zSlBw6r-CI1_^pPx;=do^?0^i;gM|$Hr%SNcg(~zOs_(v|8
z&&SHZp`S5W4!uV{S3}8ma4F?TM)C2PM8$^FIb_Ftm|#AHqtDoI_}(v|6z58bHUvX1
z{eVODyUBB-B{r>;x~Y>*Mc2a_*tfD(G+SH)YpS|I?xG?g@~jHp8Kwr8wUHG97qi+$
z4Yn=YBJJCIk#%_`KYJ+gjddMacl`#Ma<DhMDk*bpZaUBP_Ml7q6#4#iCEWHW1{P(;
zVC=Z5TrF`Q-)3l0M7J*3@~;NIC-ud<{|3>pm093_YaR?ZeU5}GM|k;nH5BxAfs98L
zg71nl*g1PYb#YiID1Lh^PAn^-?;7sX|70q?KN7+oR+8)Hx)z$&cjo?Q6yypIH}KQz
ziS(o0h0jYS;HA$bUQwz7M*Q;Pge-NuGxq@nI7N_8(_rfPN1Yc`d0?Sz0T1@P4%M>~
zWuVd<S4A5Or!OK7mHx+%MyzF<Z53k0%-MpiG6-==I$V5gCU1Cuy7I%e@ib?Ylrg=&
z1K)0X1;33x3M0F|B31bWp>*nLdhx6$o?hIC>!kPN?N$3IC{pURpOARVW9#Y1+G09X
z1MstZcX+?k6BRpnz>sy3@JROw4Y0|l>&-*ic*1(>{2>vn``3s|5;h92s{HWB+a2`m
z^B4M-6-(2NQ*dvHKj<VI<KFv)LVdXh&RzALQp<L*`KUyifB6md@aX_e?*Bn@NEG$B
zvY*1DH{gh&@vPl%vy@D{LP@Ljr7rh#aP?XQ=Sy#rMUQd()}|MHn>G`F_IJYFYSO;H
zu8Ml@Jw&NZ)9}paKQ0A#!+F-AtK#sfinwCtQ6W^`2jdsah6Sa{9HyX(IdKheWcC2s
zylS;*8Ee1`rJZYFa20)YE92j$AH>1tx#Aay#V%C?#JJ#A*mQXp-TrNc@jG4E^OG-M
zwC>44dx!F{hr@CA4_nMSHdA)g(wXCKei!;hbi?0LUuB+j4!U`NA)j2+DEjS7ps2bd
zVhi*~zgZ{6-LjjIeq|q2q!`GObJxO@;?dCA<Q)C}cT0G9IFL7p9XO~-n(N-14=J*z
z^q{xIfZy~{Fn&`GcTalrXt!ScG^7WA-Pi(KlPqz-hEmw_sY3EmiTG(o3YtVaWBkcP
z+S1jL%XjxBxN$?`m!v>RfaFhkc3ZHSn1a=PTzT=w!%*FAB>O#^1}5d&WH6xynpLNs
z+7Ouo4HmuWZI~)L3?~XQv&GdvG(gw91Mbj>mJsAtxNLeLJNDlxE)1yP)7R^TV_)2O
z>Ge}^4KLB~7)|(6en$xC?!hiIy0XWY2wvwo3xlk>@Utw*V=(s!G#Bj=3)dbLD|hO_
z1+x~}+C_c%=IJ!*ekhgN2hO5iO%<flO_Pr=@s@JzGbm@=M_AeX1`Kqr(f5c1cD*<r
ztftG*zil}9)@hK^2_rmHGne90mDp;X0qCTa)6K!3#qgmWd9Cgj!C|}+H#nw3okdUj
zFYK5w?YTBxuJYvLN3RLb&z>Ioxy+w=%>!}VsW@?R-byx@d`i^Xb_rS)o5)Vd8h%`;
zftlZo*}cq&f?+1k%K0IrNgk^0{Q~%vYA>#R@Kku8cMV#87{f5nJMijBHs8}PgRc#7
zeC|t6G}v5@E~=Ypiu){xiTfpGtDD77pX&v)uUW8aT9f#F@=Ypi?+fAQ2IJN~=2T!V
zjg5cZ61P=_^1Bp&w*GjL)D=xR?5+&Q7)5c4XoYv4t-<J*C!pZ08T!bQaESR%$kqQq
zT^9wR`My?~SkZ~2r{uC@=h4E}i}s{bItvmCw~6MnUP`Rg4$$+<OBi*(5V~be!I3BR
zY1`W{yg#o<bn#B+ACg<9*vb-*pD<+4&sX8?gQGCjHc!<2mM&9LPT<|&#tUKlK2!c_
z2fW!kj}}VYgNJ8^a_s0t8X5LYe(#Sf&AH~ptM~U47hCrd?R+<ryda#crx`)0v^PtO
z-zctd@4%T&ZDjSvjgLn9u$z?mYii#IPhI=4(#L<4lXC~o>x(dHa4V^pzYs>f9>GnG
zJ{)rXC>M8Gh?kZ2%k8ZKXyu<1lt2%}DNp}{aW991-PL$8y+;A)$hPo+XZ2K;W{A&E
zbm6FNN|0+)BLu%xg|lXjB;OkVsWJ^L9$hc5AEw78sDzIV+*tK@3H<in0l7U@q35&T
z;+G3UNXOZiv<i08bVX+<>v0&y@2wRK{RZKC^&GA{s=*Ugp2=6u9YOV^3ZXxhar>xI
zXuHA{N-GrbVdoIm|J+KSO4`MmD1EGp19@nmG;0ohLLbiA!Kqu}oRc+y_uQ8;Y>N}8
zZ0pG1hK;6z>InF6>SMa{d?_b=R>F>p8)%$w8T30Q&21J>fCa5KsM^|{uNPmUOyf~-
z*M24aYYBi(RUIj#W;q+gPJl4&iP-#WI_6|co4EAryy9^k?agkW84qO`U(y|`=OwfA
z{!zTkR*wwc%%{ogdUK7Wbm*e6Uc5JUvS{D%0$jZMv5vJij+xm)VY%6KyX?5orS_NL
z<|p-ZEv?uoaw>ny@)zqCs_@eh)A{G^pThCSH{s8j3|>{Wj3-{o6=SDM+1*3teCX;E
z@O$;0CbpK5&sGOqaM*;Eo4*PEf6wB>mj^Je`a0|`lQ_b4>GYqa<gXk(5JN%_Q%H0c
zMiu#^&!q7XV;e$8yZ<GfLKzqqnUd|0O6a%l3OrG(B0Jj+(5A6d*mL9zBo%t%tg3S~
zH$2BhK2uJ+c5Q*To0T|Zodtfij^L7j6b@ScfxP@f@ci_z;*Nm7U_Lhix0>4H(g)_4
z<2V7^?~dYJWfxpqe^Gu}G~>d14d9=p$w7Go+5Ds*_f>Gl1NQxB#Kjm;RJsE{W0wz2
z)6ry$4_<6_Gzl-kJ6K*2Eajv9Xv=g9>>8=ZVKKUNw>1HuY%PX>f1aGwIho%MD-eg8
zx`XSgjXcOe>J$%t4$W?-V6l2S^Hx(D*p|o_vOD1xEftvlsVmk8zNfu;7Od8Ds_@41
zcJ=y?F05wv2>zWZ1;>WV)PKk(v~V_o@MR<M!>aFK7y4aX`;%$FnLpsRL=PV~Z3Gh=
zKa#!tPu3-?8+QMg2I1kB<ZNd|&V3~J$5&1K@Gu)4H_d^74rgfA;ma`V*+`nwQccOe
zw<#{N5dEec7YZ`I3S%F+a{IceZ1_11EWb^XT*Nz3%Oe=a=9N*PP6kw2I)T>X&EhtX
zYB>HR8e|27vFz&@dJ}veJo}r-s=T5^$2RGA8f3{YB4=`nPzJh(JL00d&g`PANdX!!
z#h1B7)I7G-rKo*94D0y~?C(6GyH*Ej%(&UGq2LWIno&ln?_a={v^8*PwmM#MRH0L&
zTByf^7_`iG<Gt^$!ROyu;)(Ch<nJLdXSW}uW8u=>*45pm5VrA^=X#i=QYmQuI7wT#
z7@|W<e=evA;?ql~;^|wx@M>QL^1sz8G|no5E*lR~=<9Rh{IdfwqoEF_UmHq2m)r50
zuKTIxYd8AgC3PG#(>c=YyZlGu0dbi59a+#ZiQm3stoWsIIq$rE9d4i7!A++=$&F4Q
zq1=go;HZ@a*FKLI9@n{`>YO=vJJb}%thN!VGPI#Je>d!y`c<^d^W~b7d6eW`C!TCv
zfhLxZh2^3-KD9}eI$x@gWW5XC2tTQHo(*V>`7JNjpTO^e<7vXg(Hs${OE=4{@WYW>
zdEUy`;+1)lOEtU*c6Hq*9k@%#JK>f*{YEB_@KU62WQ?yKs`HW4=jp|W&l1~pisS_w
zfZ-!Q!uJJ#;9rq6BQDnzI?wwFqw6-1>KZNda0wBsq6qA|-H~~@$HBzChPb-94HhTP
zu2xk3EXVRYuy@4}VRDfYKY3$>=>>-&F*X(dOzpv)tUo}Z!A$fV=PsW*;s;!aI7_>H
zRe7u56-aXK$E~xZK7LR?d~G}fl~jZH>dn`}%e$el>EAEN4g5_y4)*A&(@akrdP3nN
zZRq>rteD?<7@xcI52CE!3y+TI!Bww~@T>U%l(@$5*K;Mb{?1zA<$@2iR&I@|5$}cK
zkpIM3-&klqSSl`EkW7xp15f3?y1?;WuR?UZ0_b1zLZ8q2_`)FyF5bL^fAs6cac(2H
zEq{^lr28JRY?$P6JZr*Hr;DZRn=+JY-4kRXgJf!N^w`__2Sh*87N<pQ;90NUgV~qo
zV6V3pPHE=T38}}jXVDTccRdf|Z}_3*g*7l;+Ff=yE_GX`*^#%z=Xo$@Ji2f0$jh!@
z7ZqbXX^vVSRvhMv`Ocl7A!VbO_vC+g374T`S$Bxt*jp&KZxp5(Z=u!J63f$u$>*;H
zO}=wT)G>S^EKcvttNlOHFZ0W!e%%nA^m?Jf^P}QoWnWD3orjOg&A9GU5X>JkU2+CG
zk+otYl!kq#Nzd}>w##!8JCusfiR!|pF`r<)&VGg|62I<QIb7Yi9rj#*L*7lxDC}4d
znb9#Fw(IF5^`Z;J$bXOE_>G&S81xwq4b7EBru4;;mv7NgZ3EbGxfr^SzX|`I=<?*3
z+I+=#HjGfY2yb$<I6^@i4qr6D@f%&SAjAsPHcr7c(#%k56N&F~d!q8`JRv|wg5?i=
z2M1O;(ARgmsJqrjkhkujq}12&&19*VesKrB(~n>iH9uZ!`&w8(Wi=OhtP+>$^aWk3
z1TuZB58kue$kk>hXObm0Ydj#ALx!Tt#fd!c#7`(wa6*ed<HVdfs@TC^%5G^DQdQ9w
zP`jsyw=yC)a@KJQpPMH*w)McP{p00b)UCiE<SiArvAFETei*pEf`^oOiJs;)u;ku0
zG5yYcdj4xYnvFcbzmzR-Mst?rxlcnAd&#?DH4pu{qu?gpq5jqslI|omd~#))kTCx;
zD8D`}cf2!%?e+U{iK`p$k~+3t3Nln{I|ldu1Yl9~Rahh+PT7Wlhc=u8mmjCZK3e<8
zYF!*ZoqSkO+UX86#)_c(QI$VVSPq>9H`LSE4Vf9MK-(ihT)M*yCYE`k%ga5yNQ}km
z;)Qf9H&@(idtRJ)=q7bj)`q8lAHg!cQ(|L52O9M5BjopSp_wV8C9m8d_HG!5){=8!
zR;CrW8S5~M2dRg9xm?{M5+a5Spsc$)+45c?>SV6s(((_KaOOSziSG<rHV-L3e;_XS
zHw}B=dL}iV?eIsBO=RW}BOJ3Cftgy?sHL`scQvcB!iZS1>KFye2EBNo`&#kH<&kLg
zMjMPLMsnc(c-~qTfMti3(Kj?sd|Rk5<z;NSefwZ+$$mha=4QZA*?z7uIY1?j=2Tp*
zh0a~#NPEm&Jax#PpQSX@_U#@x+q_DMa`wf(!B@#q*Mar^3*({#&M3%^lgnTucGKuY
z>j%7|#_O|i@Rc+vv$YdrcTVIXkMGOeN*r-+yfLXJtQJ@PI7I(B>cOl+O>|Z(g@T~1
z*cxF%ChAwHU`IF>RD7k068B6s#|N^12J!1aBRn0Z&rv<Y`Kbf|I{(@m9_~0zt0LoQ
zOzctmdZj-VUX!8BTNm$ll6=y;!Z69-9Fknlk;*b>_+6hNzA*iddaG8*9-mR+(1CYg
zSl2+z>l8z-tG+_=A0?dYb60lN#S(5in6ci;z3@CP5vQ#@z<DhLc*SW0p5D?0Rdx2T
zQRl@N7;8td@8z`gpgQ(SE}<1_8L0kjB<%L=FHY_y_0tZkp~BWnV&=_!ve+*|{hj~7
zfBJp!`P4@kXtokNdbo(LmBnPYQF5Z6+zj!PF4EU?acDhh8ve1*fn#ff*<{rt7}UQ=
zxY&0cwttfx9Wyns=fLT-V*D7&J79&g;vLZPMvLIvcAGZ+u7HkbJ!J!z`SJU|$Jpsc
zfq41IYU)3?Bej-PK=Kc{pnG^Y>TCCd8=3or(}{yQwyK$aN&U=yo15Ta)=x57tjjkf
zK1tuF|HR%tgD_^70S|w-OMH5?Gmp<`1t;IzARDDcEgD|@#oGyOhG?PYi#g~z+gosG
zma;$>jtS#`7r@B3eeh4kZSi303*m!(h!Alj8h%a<<x>+i`Q4%Js8vQVCo%+X`T6jH
z%xYovizATSSy7q=Y!~$}PUY5N;gIwUX#YVQadOOhcJAB<dGuQ7_UyiV&kjLiKzUG)
z|29#f^*xv|+#1gW)kE>nFI2h0nNRN7B96N{fN#c!fqtGRPkMWXVq;Fh#4j7D;Fmw%
zPSL{8tA5hpkNz~^R1(ksRVd8q+liNt+<=)~gK*_YReC53;L7R>=(!^v7OXr=8!nBb
z3#w}T4ys6=5sRqlBM&x+!HMk>KlfK3ju@tgC;VM8yU-Ul{qg4}Fyuc23~|ZK<KVS+
z2`u*Vz%vhgvFwB$TIQUhFD4W4<#sy=@zjFmfGGIAI8hMKFA@%)JVDS!3489z5GimD
z_St(y@Kn4YKfUV&H5s*t<8Ec4URTLY<S`bzD2>MD@uO)<un~8T>4{0xrjqZI*|>4o
zCOl?<oRysl`2i9)%*2I~6SZ;KzjPRP_l&&X{dMSfNCqAo^trE9Cp>w>ha45sY58jv
zFnQi8{#sfrxo{4ELpVaOHii?89=s%173N+`LtXElpt5f!b~teYa#p9w=iJ&zE_r5j
zzAzX1SHubVNAk(fZy4py$fD@MOe}rByYT^`e|aQcTs%!!IDWdYU%OE3)$Yjw^J9cF
zMoKWK*H^k~Y=@Hv*~0I#o$xIzo%#+<6DI^+k#Y%T^wC`hKPiu*--S=0b;KfEbFU*m
zNOXfilRH4TPXo-a*iLE3#-T6lp;w7z<oA0N97<J2yCIbn_2Lq(`ZbDEZYs(CCO3(6
z%tTlpY9=xB_tVy-Gw>*|7T)i2<J8)}!jnH6McZx)XmNWHlmr*T>!Z2!u;~^kjEaG<
zNs*lD;K)l<KEP+upO;G=-7~sn!eWmkHZIpkWg!R}I=_MR2RfqR%pw?Lsm9SMH|Ue<
z9tem};;pC8fL-o6vKabNxY**$PeO8}^Uz&kj9pi0C;3BWkZ3~sPhW^yl3&lcPdts<
z;fuYl4;O!c2?iOZa^Bx}l$ooH*AE%ti=lGr*xXDZ_5pCwY9ZN;u4F?!1-cvgR+z7_
zM!xrAGV7JJ(<(86L&kPT&D}|KW@$CuvFi?Jm2Ej^a4HSZxkwiVT8Ia)sbiAVgG-IG
z;?TjG6o0D~8t(hTZxv7el@KE+4|lJs^y<J1POj#lk1ayitBu0ZJIh%8m*i87w-ifX
zN<2R$4fwX(4egKI0h7xw>FCSl^eNPZ^J9<Dg)JIX*1d*0mtLb$v;A4QUZ1*XNb|R^
z$Hn=J6nIT&0DWza724Nz=8NiGad1^9rWu~lG-Vd;m6&A3JzU`Zs^idMT?w2_IV+YA
z$fdK+=@huV2l)Gp#{=PgafbC%QER&=InUlK=ALV(%+g&vGh;Z5dLL<PpdDVkn*v?)
zJ;44!E(DBqMBl&a*vHJ4-gHZ&)x$UOn}IQ8bgYBKSgD2Bk;7owbY;v{FO_}l_z2EN
zd*W8Xi4H$lPdjT{=&PC?Z<?ry*L&IV9Bmg!M}3-6a1b6G(Zy?LZ;@^JB5?Syi-$Em
zqG)+KzRXhK0axa;(N!PpJbs0+_+YBM4)#d%(oZnLEdx%Tp3A3)SA){6B$he4uxfjM
zJn2?1>J6O6Es1?-RG}ZPv|fVU(icMFk$1AR-F-P@b0=7|qZ{|QGansqD?(X=9!{v<
z2Hh4+$Bgnk@%ow7kRKDs)~jRa!HYQY%&%L*a*vfXc5fUPRMk_HFbj;NedP4P^T}j;
zPe>YNjxFDALe{DWbfEDhb$ELXb{1LU?Y+yvB16RTW1ac^M`f^Fr-J`UnYT9)8zo^E
z(8p&1qRU1N*cPe`Pk&A2txcQ2N*pC5WTasEbOk)LuZ{xD)%dcO4u825$}Y;2Fnhd!
zKh`;t5aWpY9cppi`Y`^d7D~$5J!!GTEn7eXMOC%$;%tiN-|ap$<G>)a@?3%1%SNMH
zurjo*vu5|VzVOKV1+7uKEKWRnMm%6rPdB9JbYR30P=DV_6<=Jipz|{@*mN4Kcbc(N
zSv;NUlgA-18Yn{+B_EQUMA^n8Fg|lT&-wRWEd1RO7aB)lUsV;fOdpG**Zw7ie7O)`
z`bB=Scs^PG`%NzAV#MWrqG{2rWw36{V$wA2#q&>%rzx{HQ}@~=d|E#Q>dFqG^0h;x
ze4rD*iii=54=VBS4myneDmnT_M|NBhiP!BL;bL(r|J!v^XiD;BzkXTt{8$pTAI%ZA
zb?t_iRB|}eOdB1%9*}*y5&R5N!-nd9XtK9A1dJ~cB7eoB@uJgo(zFAoOFY#pzG_(c
z(|~^lXh^fSH85o6L)kp9QDQ+CCH}3Z%}@UAq;bUuAf;nHR6L#m7iS))XR8L{cf%W^
z5WbTm-8;aQUM)0m{1O@(-hpfGeG=BlRoSk?Mi^uyii7Mrp;Cn<bjzNBy|N-<?foz?
z?tcVSDweaM?D)`*U;EL44L`;18w#ppT1WHeON;qvhijr@>;kxLSdKQ|7}lG5lA7}a
z;rM$^K0R^|O)8j=7i&Z4YH1n$-C7~H*OTVYpMr5(t23^i7bT9vL|nRY1RjmNB`A0q
zz|BGh>=bOlqZdr%sqTMiQu;P=!yRpIc`n_xcCO@_Kl4#Ttv|KQ*hERD1+efz5q7Mr
z6Vze~SjS%zPV9-1yh$otU)_-xUN&OkLY;hTGsE{?lPP^5V9KxyRKBYkdf!dO;{1U$
zd0H60K?fA}x6y%)B4s&+(cCxTd?a}Vf89NVZTKK;T5*tC!q&kx&sHdHx8YL<Bk^kN
zX}Bygv99Y+=h8(#>3F%ROr<do-uz5rvt?K0<D>%4o*7c#Ho=zP-|*upCD9y}selQw
zHI%UI9GFISfybYgf$e|SsJGTe{&jpTCnQMz$L0{2EfQsam?kq#8jD+BTk!C)RZuR?
zvuEh9z<JwGh)VQ@{^_c6|5i8HFLh*Qjt!yZL*9rhlG5?}?Ln9*q(NZ&F&JU}Qg)GN
z<C7T!X^3wKUt7F|E)7z_w%coYIZu;)>G)dEIAe{PD?AAsR$*TC7r13x#4pc>iQ<L|
zP!@i{fe%)g^VgpqK3N3g)eE3(kSluD&*P}55%AX|m~IU>6*E*TVc`F;;;*jcp(;fn
z|D%tej^+u|n<hYchp8}vC64_Tsh8L)&74-(%42e)`Lks<{IuwUt6uw&l~*Sm-X)EE
z59i{{BZuL5a3lsgn^4qasrv+yH=v^_-Eb4I^Mf(?z9~@LJUR%|Pbbm9LrSpFx0Yrq
z8)7F#DKG3XnmgV2N<DKXiAzpNj`EQb$L&4f;bjTXq1B1km(RkX_kUE+=%|Rb8@qF-
zTfR`=*_eL@ZiA3tS@2<Wq5RdgBh=L+nZmoO(9M8b^k|0yH{Z3yN(Bo}z4Sp;?JdIV
zl%aI0&J}+-Rltkguc*$f1qLpxrP#83eiu_JtbdchDe40-Ph#S^Y>UN7k`E)g@j3kP
zuSC`6Wb#<QwtCFX_jJ393orA04t67~rHtf2TypgjJrAvv&HojJ7v^Mw%8-0$({HBh
z&U0|w@L&k#?&Mf1;MCkPST3>M%D(=A%SQuAM{zcGdoD4{bNcYCZqoO;a2oee8^E9L
zC-J+h7wMi}xe%AzA{YfN#L!!l1XJl9y~jt1|2TO=Y=SoHcLw;q)k<uab_7FW=W*TO
zC}HUZ)zfE>juQ=9M4E3WF=dW_qwG`a^y%0DJayO%Y!t2wJ^`DA2OSpjin0~>$~28v
zDc6#N`%Lb6K7*n{l+oZw7!N(8#mmz#P^IE}_%PEQQx2}j)n)G7C(#_{^_U?<NIrwv
zDn{&=)g}`(C-KW^p|HRC9NgHyn|_bhpzwJ$m@Lgfo7N<Nw?QbDDO-Wcz*NCZP{sY0
z?tD9Ni=aJo4%aO?Cp+rug*T;L)$ljbvZ>i)xgm9=*fYnJBK>m2#;Ua(-`bU89zF)Q
zp7zkUawna=uEq@mnC?~ef(wd)cx$-|Xty>~{r6tjf3ge`K2{3$ZGT{Q^-_ikPwZu|
z9>&~irZU6d5D{XI+EqF@eo6w2&(Pya{nw(+&NM2NSau(0^uZmMvjyvZB3yZOh<ZH_
z;z<@AxPP-h^mj8R(+}yw^ie&b88^b*5t4)5z637c^Wz)cr2gv(C2IcN9Zg5vr!^bA
zB^Ij+K9=^De#KFIO6r=nRXbu_%UEi;5RQiNIdtn-5B3`Bi4V7Y<YIdjbh`9Ne82f8
zo!b{lMNt>U>x)Rp8R;SU`ZRFhqBr8LIkEieUL_>l?ab*9r_kP(X4$6p(xKU#dW%W*
zR`|f)PxMr|L`~j{xZjm)aCwFSCyqS;_NlQ@u}cvGFMXgsvMJ({sNJBG5sy!$*+y=(
z6Ds^JC6^6fh4b5zg|Kud!6<GJoqGOV%-9(No22aewNyuR-)=3v8+W1otz<0IC>A!>
z8&l}Z8C2?=ESu)A3^TvK5kI<GN;By}$h!~H9y=LtozV(KtAlvO`4k9Cd_#qQtl0hQ
zF!;Ppn-o5=Sf?VR(@Hl5+kv}8UuPe2Vw3|Xk2Vr6OrHkF%eP_BqGT9$?1u1il{@cz
zIe>je0C=7#$4!rC!{P9i>}=%9+YAcO@6SZanq%mn?eCV5HK+Hotl+HF&Wjc;w4NEh
zU}?DZjJea7X3f#-pRLeA;s5@m_}kNYuUo>PQ(f5i(phlNUO?fE7V=3ee~1btsnUMi
zRJ675f~T94M3=O5xN%^KOBV$NvA&xEDH|Hm1Gfg@!QAcg*6a$wGe3d;^l<>+{gFcB
zy$NzJuVQ-k?Xu|gv4O4?uCA_$y9<5Cxr=6f1j@6^6N>fs(RVFlsNJj%C(>Sv*DN<v
z%8MOh+-S+WQEdUCy=IBHR*@7lTt)B54Efwe+r`<wSH(NePKi&mSBjqA3NXE>Oza%J
zAC{fkA#MmcB<f2JA*KJ!WO=WaQr+Tgnvf<JCaw7;rnpLcrLbI4l`3I%r-x#3ZD+x+
zrcC&sXXO_E|DKfY|9?_bZLK51mn;a6{NJbL_P<YSTXLb;a<pDJ_p211&q!b|-C!|n
z+$`R_VLptJNT<H)z?aS&iw)NIp}zbYR7Y=z>__h*Kg>-y5}_yaZ|Hz}zbA_FFV3tR
zWzD!Rn*V27WIQN{SHF%!`^%Ee*`paw46YFL7o}s@_g96PS~K~TRWuu`ZsCy6*Q#&t
z>Bk3DM`M8eDA?591oOMcD2V#<cHto<r!3}QJ9Y_0LxRLO&E;J1Uo>{GT7$cKThcJ+
zJzzLZ69a!g6vyT*kf`((ut>=S!%_gnSxNLkb0D`o8h{P+ZQ1Nk3R#Xe0KYle(D$hm
zpIuW*<IOd3@WB3jA$A9baVp=&B6z;M2x=|d#T>`Y!uNlLyd~rTTvA<09j54EaGrn<
zW&}u+<ZJ9d;3w6eHDiOhk74NNy=-uDICuTFO{_`w#ZQ$Pe0Gm6cUpH*RD<q#GPnua
z9;m~S9ZBerpv0FJn$V-86`&P)kX{VmCZ0^)4P{p(>!pqw^NzjXnK=npUNmR@WixSp
zFNtPj)+#IA(-}QK$J6udcXF9(M?R;KC)9>+fHRLa^DHSv+xts8uXzY?Ql`z?%ZBmL
zzuoYyRu*rqvBeqNd-K-D3B0VsRhN(jYw(ACSKb$E#T)N+<p(cjq1gmO&YEq8k50U%
zr9Llc-fAt*k{VJ8FB5oj+C>_<Qz8&+#|v#YHsFy@i^=QvWNO&Fk>%m*DfZqH(qGyY
z5BJ^1+cub@^bU}Z+cJg+hpnb-6MNEEmt|bP<+AujP{O)ijl$T6FX)P%3y+QaNx}Yp
z_(k)sl*Ckr@lrFk&dL-&U%CmC?ewUw@)bBZ1+$YhjZ<88nqC>cqyhHFL3g|rpULqj
zAzc;qe}v-F4~D!qe=^VeFbZ!gI19taO~ZxrU0~=RW3K#qj<?It(Vxo}Shet%pyMwk
zlD;~?hf_|t;bIN-3BE@vvrMq5O&!0?{6%}muZM|;W=f>7k1%bdl<Hai0Ro(^2qP+d
z(5CldcF8xv)u|F~cftXoddviLZW+dx=4!(C4jHJcnvU;kwK!w#QQGm%1D}7C!~ML$
zbm>5wknAuTyz=@`Y~d7s=;99R--^PW6Qg*LL<)YHo{lZ)EwDMW3Kk@HVhv?I_-|D$
z7*0^fKN-`wWQHlH-qu79+jP!yT8=ixb8w5ek#m;!$I~7B@SMYY=yl&yI9hpFc=dh`
zbqdhnpnZL@qHaAjIK|Nw^?X_*nP=-0bHFBhE-tsKhvxl`G|xeq@7(Rri=*~(tJLJK
z4Sq~_zXpMyheVIq-9QO%e58bXCGL2-R3aBt(*AX8Xll6B5I(h+YnLYQ!2(AZlJX9Y
zOmh(4>ine7>&nH>1%j01+s$Xv4e>wc1a?}uP^hw<K$^Clz-QN0!OY$pYa+Ihr-dVX
zT&{;<E8Rr@Bdc&u>p{Axw*dSWucv&sWqfw3L_Iw*k>kf56r!g;p+k22<n0UI($0d#
z9K5RxAFR4a18YY}q<ekvbt`7cZzkN-9{@!mx8Ol?4(ZO-l=LyuO)J=$)%;e7?h6$$
z`22L9dps9*>098r+7807!G`>=F#_-Qn1U^a&tbR6Ui@M77siL27gt6u;UDwX^RK(R
zDBGe`h%uc;|MKUdalAL?a5m1GQiNywx-&&2L)faVlyq_m3DZuKZuWojqc(@|v(X9g
z{SyL3!H&WUzqx$3ERCOCSHL%tRr%&*Cs4H*A%+F2Q1CeuKJU8}?9wvOz~vf^eI(sX
zKg{Lyp8Z%;qFaxOcn_=YZ^gfVM$rD*XK|1D4b=JD9gCyhk`3tNf^m*`JHUyL8t&%1
z4NI|7cOkEi-5~z0*oJL4EZBMW8rl>sk<_wRq4|;xBJ^GhH(G3{A*rv_^2;P|Gaq!b
ze2evsK(}WcgkaN0)Y$R?{I^b~VF&bh`_EkL5*-B#t5V_C(<r`Kat)G(X3@eQr)cdS
zFFagZ0fAkV0nKNi?xIT6`qmFlOVg86v*IyEL)SIFY8||ljfS$wb=<et2Fa|wpSBN@
zEcF|2!N2~^VEx;k^R&*Q-l-en@+OI%;vt!(UX8$$m8rD<OCn}=vE~B@eYp5aB>E><
z2=XQ0sm{GSM8>Uw<Jec!ej3ERR(XK-3q@X4ycUKG43P}%4X}KW3(ma17&HooU}EiL
z-X$eBQXD2rbo*HNF+i1TyKSby{bIoXK~FYN*Wt2?8$yIni*W8!E^J)b8BdrVpp~9w
ze9Rkoj5vg=(j9r{iDsBs7mbFl%CvBNJn+%`q*I>2ae33wduK;Z7FBqtYa{GG>xh%o
zX27IYE1~-E25LJbQe%)eTb1k<|DE*0%(`G!Y@8tYq;KFU0XYz7Siow$9gigV!_KXf
z*}!-q=hZn=!?Xs{k-jBYmTnQ|NJ)Q}nyDxYz6<*+qy(WfsV$kKD0cid8ho4jqq;*T
zBwS9wYu_)@z9o_DIMtBfBq!s&rPCoUFCB#lFTSd)MX$F{;4%{jYJ023YXbWqyge+B
zK4-+&24-Q9oey%d4|UCzjD{<1xI}*xmB(suVfakgj$!alGS3D7PJqW*X;?nv0-Y&1
z1ylTn<9G8Q`1r3DURbukLH<Vu=gvUS*h-nNsx3wqDv_F-L@!b7$g10Vqvrip6t1~j
z>IBWkFEb)J;F*kH&%Htq&0o{}pBlLRRvH{w{9N$bI~ZB{4m>_O9K8agv2@Z?Sk!qs
zPEuCnNdG(H;vH(@rU|W-Gw?ArN?Y0C&pY#SkMY>0&4q7+4|Z?#z|M_h5c;k{&6m>O
zI-!_u{`@3{?3>Q&O(~LbQJvP8tK+WgYlMsAV&UdJBMOiGLn|AzdD`7|Jh^8Kw3j-7
znS2AH(Pmz~%muA%1Atw}(bgSD;nEBp+&BNUVBIN~l0CO_TdTKx&!s$m=NC(*qiZ0u
zG7d6(Ci3sq8F;xpkv3Y5h7BH9DCl@74S6z)zg^hKQ;R<e_(cvyCxWTmOPPbbd@<v5
zthjxpGIq0>%01t-vGOzNJ!GeeZ8ibaRfvH`-#0?)Xe&Gx?aY5fWB!%v#v2~p5jtqT
z6*c~Cg3U``i0YdRX{|>ne|9ey9(m-mZm+qF2hIzIj;96NRAruLAra6il=GvS#q`f<
z;>?c9*vUVPZv=Ydx>1wytIQcr^l(PyeS;wEM7(5e9LE>;4kWJw<7jEc2psF<!t<lj
zg;_i1((U`Eyrg{;j@DX(`R?C^3zpJ3Yl$zK%<B!^yFa9d`|pVZXP2{o`+Fh!k}cls
zS}Lx2QBIrBbwqE)@wk2909NcViE}%Zz|vwb+NJ#-+?2bsMraNU$moO>|8}ABtgbjw
zO1v3mrK5I@6~`7KV{bjU?Bv1Uf+b?CN)g$#p923?$A$7+z^lDia>}Ho)kCeFdG@<}
zVB=D-uIf$lanj)^){noo*OGG8I%=^@p)QNssQZfl;EqJl=;NmflW(U9M=no+1;1wT
zjuVO8WM70gf)i<zf)y?dH^)6YWD=RpA3`na!Eyd@DS_QYa|hewEQuC4sPGx-=S-my
z(#>_xv?%=0Yz&volAxR6XTfvvdLeVwYw&JZPGM=LxMI{U&h`<&aIYV%=wc29AqKqi
zef0mK>rBIP`lElXqFED-L@3HoLa1l2)gVG-C^BWrtdc^8(5OL_G$|sKh@zz7*=r%w
zFJxAdsff&yIm6li>%2MFb<TO!yXvZE@BQ6teLnY1mh(r81C}`9+Folo_{}*{)y@OE
z&W(g~yCp-kM-Jp=xl)g=DR|zwn8symabwzYo_;JJXFT}<lX9~uOYq`@UG_7{M{|44
zWeTY}B2kf2Iq_&Y89QvoHkAnYy5ADcv}HlToO+PYe+zCK6nOcItNdZT70#UZT99pi
zAiVB&f)8D=<M2}xuq7xO8`M9+;wPDKb-lE8JJ*+M<4-`>wVt^0KSS(REYWD5?8g43
zdQ|&yseC8?rSiZdaA@#7N_Kn-(-dxUrFv(Sf4fa4O^7l7lIVlX6h6JOgf~qaqzO(x
z463UUJZk#lr=2qdDEdvsci+)7eFb!|T+BJiHz7IaAmu*W4|(T`#EHMhle=OpeBBX&
z`Z1}{s^bEduOe9cV=gWGag+=vO@^g$KggiOiQ8-(7=AcaUfL{KrkD4Yu<z;6w_*n0
z9A?N(M!nf$T$|V$FbX$K%!EEii{Q1jWZAwrpKONZ!t%-;;s?7exTr%D(=;0%IMcAw
z+ItW^ebtqRDfHzzW8LYb^)*`fbSGbGixigCB#K3IOt`{Rjo%IXC_WHH5)xODhP+mY
zFg3-G^Md)~$C;#GXp3=k@4+j}c+oK>11I=oNoTTJ$TU!3VZ92K?_Y>57z*v1oblfD
zJyKFA8$FHAQIN|}wlFq?<135lLdi6k*R+Lt)mDnjw(q6m6C=gDk~M9ycNjfiohg`<
z$8-NnN1$bM1{_V^0;xCl3Fq4$i>G=o6#bjzG~(0*t{9cVF)sG7d||58XftD#+uOnG
znHv7YjePmzHVzzO&nd$Oz(|7}c%Qb_q3Qa4y0%ACI352^Tp2x={tD0G$xeyBc~_60
z584Y&Yyqzw8Ds-Ki|2NBq$7`yaO5;E*?EO+ynd?->l(-MC7m?3+t(_z_*#hy?NjMm
zVLFE?4uhY5KSb+AA84}19{LgZm_o*Q$z(^u1uxfiylYn{Hrb%V+V9u0+ut-!4N|Ne
zyzU|NSmQ|INhQ9fjJ*5uPfAh`;We%?pz!!0ot!&{V(q)*^y)oa7_1`;Ny|oE^Kh)u
zeJ7lXoy70k)oGqx6nFg746CnSq)`_S;tbsex-wS4{wMU%bH4=|*XZMR&3iOhbjK4x
z%lNUL5q5ad8-7do#;KBdd_vF=F=6g@_`4w<(taJ2-S6jvg;L6)ysnwDekjn_9c%d7
zA}6qyCaD8fEkaYZ{<t*Q2U*yHDR%qN!f!1+s_ep5b|&B#k%~uxgSfV<jMJtc;C<Hl
zSaUL#HtV^_cix-Mimkn{>ud#Xw%AFx?F-p9K7;+XFGm;IPAvSBjPgl}cy-$V_}lUn
zPL9kW`_vya?c^5R-@QoebfYhxx{)kRrox1e+q%MQ^&m_gV2=+UCP9{23KhIo=4tC^
z@vZ5mWTE1O5mRl^U`j6K&YH+S`c%Qn97`x()`fdgXKtI|#kD^wxGFS;4LfawopaYx
zy!Tujobt5t#LGF5?zxA(R=T6CVg}T$pMZ0YKNa`I74wPbYS?=z69y&2`oMWy-uEB;
zwpoM^trECsdM6Cp^&PD2TI8yqV}u|2f2pxYEOs<mjCCD<fcdSNe670+Z=Sf2GnT92
zyNDYwW0NA42h_pt%~I1d;*8wJTOx>_xj`ZAYV7dUiaSQ0mm0@)V0bQ@d)}IWDq#h5
zZIP6!uIn#truXx2RVQ|Gj^|lLEs!&N0+!8|NKo;;g<Uy^DY5n?7|i`pnYH#LyzlW+
z{2IFwg_obHc>N(RTq5x9TO-hA;d4sI6rrumjK{YZfzE~l{Cf6$apLNDD!O?Il>cid
z+i*?VYN|v}GCXkjv^ucZaRL9E?29XkBFMka6(2T_<wg56aiL>Jbi9-gs$b%{p~el<
zyb2_X@GTGybwv050>)1<!tY_7FrX=wH&>f*hJOr~v?hrCbscHru3+we?J3;6(St>!
z04Y`R9is0!l6ze*`nT^6-IwgXua^Qj|B2<l0~hmpqk%YbQXTZotgSrYx(S+1573I4
z6UcQ&S8klxmHT@Ha;ixv?|LTiW1Vd7tkRzYgA1|#R|@@E))lW8C*qBO9K7hQfwv+$
z<3R7P!p71%oZV`KgTLtT*9Sv*<rjOllP06-hDvPIzX(0_wt*<7%g^7xFY-Vi{#vz)
zhrAijpBC#&o8g{V`EV+SN<?ebPa81r#ZHIjn`T_#qsd)^_QIn5D=}{6J~EZn$i_A3
z@Y_lmH6;Y&nT8Cm=&1+)E*yg;-ztQ&hmV1v>Vw%eYal$-0?%Kz!C!9E#lQHCnoj1E
z?t%S0c-K{Woj8yTgT_EpL^&<CiN=3nI~@#qS>Xd|p0W0nCEt9rnVpr6h!=();hXAR
zXie=%w6%8QZOdNDQ#;<`i<M`o?WCOl69jxVXgSolEW}6J$?!ok^RD-oj3GCzc(v;s
zFe$Q=?8*z^R#F>z{%qh;n|D%RmaU+qG#59Vwc%c3knHXA+q8JfWLECr!tsTNshdwJ
zZIwO~lM`&<eRa4{-+7BL;?Hv4c_cwt^CE?>_R{0u;|FqCV;WY?>VemsUx<095w%97
z@x5S2-nY7e)J)W%_+y4}x7>!)_N?IM*M1!Qb2;B_x<Ie<kJG8*#T>6ch;ypcdEmnj
zv}R2usvP?WdV23f+4=8uro;%V_qEC2WtRvK@`}aRc?P^~n3hDviW0hR*}@7D`XmqY
zBfWoWc<R_xC@%U+l`->S%lv<$tHWWO(&dVf9D18{zmE`lo9p3U+dWjiZZHSf?Bosa
zo(UNe?aumQA3T!&N%We(5;F=lSpH$J?1L-~QZB8gF8;lEmAnV1?|DP3nx-P9E``CF
zZPc@)5-RRhBlWvdDs8|CuC`eWt2_$$dyNyP?$bt}Y)y*TNMctbfxp%*z}f$*;PdRo
zs4*%678{?Yy0)1#Ve2~1iaW=}cg<P$J_aKv?SZ~k*4#nbhIEz&hQ-ATIsV`_82k1j
zcnID!?vOoY)b(fMvJCQ0J`AHJB41|waL(PP!_IAY<$1s3M87rlvZS%SM3WCGZ2n;y
zFZ6Wes$2V@_G%xT?5-pmFl{__IBAc&*B?bRI4_U$_u&usS7Y$%3()V=Tu>@>#Okb9
zGLxzV@$SXr!oAoyPW|;rxD<CB@@6Y@^5Hl>b!ZOXxiO!&Ty29G%RltJrPv{)tceVZ
z4q#i)fjHFE2frSei|>Qt`M*O;MV+0FxTNU3_-%`1ll>8eoxIM&#VZGq16Dx6#Vm~5
zYXm#yUJxFc>;%)fy{TTY8J3)>61q?QN~O8MJkKN$mMNFg*_UzLWOf)%)Ob_A!Aocw
z-;dP_&(MRbr{H0QI#*A+4Bd^gX;{Jw!Bc9>`dDdJbmBC4ajy^O^B1vYqB=Dc-9R5}
zGu+hgB4#X!=7$~iQD$o-_K?U=7J~zMTTLHOhI`zeRYA`O$D-lxS+uAqS^Bsw#?YLN
z+!A|&lcaXD+jbk)sCp;({<<Q*U8#Z%G+7qa$(=(=rB-54JPw#^$J4a#LZ(gt{x*<W
za7An6sTYolrsulDzQr?z_&mvoCvB(aJ1KB)y&m|gN(~bpqwu}kc^n;+fIhp&(!2Wy
zgr;|ewBC3(-%Fmzi!Zms?g8s@>HXgHGf~<g9;k=ySt<OfNQ2*4)PiM<9>2S-gH1_V
zRA}ynb(hrHC21F2E?mPWG&T8el_y#cUyYp<FR|suze1#?9gaOTf+jD#2Wk&`@~j<6
z+|r~k#KhL{h{L_$L+}WCwqOCLF4E>#eo-78@)ADx8^t5*j`Ga8akSL_B9%5=B)6kq
zz~xzjpsuZmW23^*dFvj^J1*Ik*Y{wjP&p`i&E(Qn1=c&G$E!`HL}Ro>#4}RIz1=$U
zB3~7*oHUD1W`Bf&;h)I1j{+Wg6M$Ft<&wr83ry_eCC#(r@xsScT2VTTSIrKWzK*6>
zaWf$4x+;6G?!l|(oTEObDZJ`Y7>;S)F2;%@P)|(*OOw1fUAB?_IgP+E-FCsiBd*8^
z%Gm3M5$dXprM{!gd0Jh#%pyP$Lx=dln4}V_yp<0@8ROx^6&t?#Y!IzHHI=*8nn1K=
zSKf0-iTe+DB^1m|qKn2^T#i3z?%{OsnIa|p10!&tWFK_c*_T&k?B$h3C49%%82fkE
z;m?+fq&7w`+@fj-r|dVv#`AAs?}bXT_Ie~HOgT+ev99>hu1#pE7)d9MGsKoAD>j~L
z&6|QYK)`+zT9_TfUIzx_mSNF+!CHoS^+PLlJU76&mfJAR*o9-q4d+8gH}Kst+vvoz
zLF_#LHZ6U!iMC6ouBfE$=;`P{xBoTL@=KR#)!O0mhyN|&4Ob8G)$#L$<U3AyRW<`l
z?=Hawx7Fb2?#HxVZ!=Gy9u0O*^)M+dMqKtWNf`S^tKw&}2A>;bBDJ=4_@Xw+c2u08
z^2jI-8*R+pVpH&A+8KI&SOs$|lF-993(lRmSo!OED}3wv39NeGanRiwChi<K0^dAa
zE40SsKtk0X41KtSCnYaORf#?|)#w0bc<dA$%94d;k4C|`5#90IzhYVCvRb%kZ-ou1
z33w;j93AJUvxR9IP3}<6>b}x@@-YMM+09uTZ9JQ;>MVHm>>*fGKMkVIHsH*vMEV!j
zhwV?Qa)C!ME0p;|fctSOw=v)ieYW7xtKHD7dpQgU&xY@J2cq6(CEmGjE^ZGr<heZ@
zsCCq6iAaA8J`YsJrcNJeQ%ZM<?pH<0vSfCju#FDw_7RmXodx|BD{#4{DOhCOk=;02
z21l3LVo~R{T)!g>hV`hS4iY_P?o2bBc&H=gsfDBO>~<<VEfG8;98u%*3wrYI8??9W
z<uMax^YWfbXffMCN*L>-cNYzu?iPamYL|-j$J*)s6c=8s5hDNlx*RTtUX=GX9L)EB
z8FESZHg3&d0YPyIFzj0ybRVzB!)CeQvX(rGO)jS4ZK~3>p~mGZN%XD5W*X#rod#Vg
z2Hs=GD$NTxP*K1MO1{)<s1cm4-HK0(s_EJ7Ojy)uA@!(khuX$s@G0=Zh3=EFar}Dd
zD?Q_z-`nDZU*Y1fHhV1HY0o!R`fx<oF5JVrnu@a+Fxv(izF6QWGX?q(@>mQ!yMXFc
z^w|9AS$HB}fvWrez{7Jrg;UerIrg(QdP{TRPZFsx!G9bK3$TS^qb@w|)^>amz=F8o
zuZ%7%fHYG*T&6G`-!)&QBj<XGMrtF4br&9k@e*~(7F;5wDO$+dA{>4hN-4Lv^|)$n
zi%f5hw4a;v74U{5T==ZPALj+)4VUix<4y-^^Xh?O{Bdmld6uq7P4W?q_uz0J$(prG
zmmcou#xKw3;;Q)bVy9LSI*pny+!>OE8XprW@yJ}n>(S`!`&V3D&|W$8MHd*Yp(|aF
zui<E6BtCJ<!d~AcvQ5t%`+ssnIAhv|stWJqIW>#X$m0b)JmSpFf#JkTA-vH++RfZM
zLDq)baFtG?V0cd8lr1`JKO`Gw>Gh(Va|^^3Dp_=NRXP~ONJ+W0?yNj|GU)w&A(}tx
zK>-aJa7i;4+!fp)#Qq+9QMbisA-Y&qpCEMAwiCzNC(3Q->*MjZP1MiYMi%jmg?JlV
z^j^1&phGvIHu;h4L~m_r#`sxuRUOC*gSE)FRtp0*D&Uqqadh^P2CiRjz!%Jh^6e4U
zqQQ27<`u`mui;ymXZYZ^x*G6}9LD!Y6q8M#0Z^IHjl4=tq`lKX=yvsju)fO$VUxun
zR%%cbHPt>qgXtS_Z~v+I(#RX)UH##B@jFPEv={q~DHU8RKMH-P#S5`5lVS42e)7;y
z2@un|AF5vYVaSE+;=<rX+&6y+tbaE{c<<2_r`CKHCX_od96BTZn)QR;ZaxcN>L)|C
z*HdZh?FG5>G<fypap1pV5Kpu<#}5k{z;B-jHSPi)dNBu%CS4ZnV!MNU*k?F$<^nv8
zw#JW74~veICZo$oC0cSPf^&X&@}lj7v252%5~cLe0ghp9hj~QXRiRr>1Y8^LjMION
z;Dw#E=&fYUI*~Az)=uid)AI&Ggn1R*f31Mk#%p+fZ6v!JjfMj)`Dh@$cP+ma3EC&e
zqI_2temfk>d;j&qTN97cLY;7))vFs`xNj~T?C6G%mnldTn8QqU{#?1n2-lS>aPEQg
zeCSY5d|2m*DYH9pN2}W~EATz(tqkVXAB-^4_JBP4gcYmp&ZJ9zN9DctzM~VLB{K1n
zZ4_QCQNRMj_{~g-WNO?<z7iqbDeWPZtg3_Uor38=Y%js(v^(b=FQSs0Kgi{327KA=
z$ARY^(Y;tQxxPQZ(N0Gw>*_?jGjKN^Eqe%2vzOuM1KaV2yB#+9P36mjUHPa&F0GMh
zw6_obq^-xz8CFE{nCgkRW`smrw(BE|sryWNk_m23^=wwoUV)ERt;Z+(Jt3|0T6z2w
z9js4$Dl}<&^8+2G&`rTSD)BL$?KThM+$AGP{5?27#DJc~E92$VSJaqcN501wp!)A$
zU~Hu!l*(4(fv<~k{-jFU7rdX<o*Ln>+t%2<-Wu0#OAr%iI{xS(P=4q8y!P2UF|Xks
zDO88ho91Pp;+-cvy5E~`N83S4wJz31=CRT>XW?|U8GovY!8vP;*;d*)=uDYPDIdCX
z_e_Zfci9NLUpg$A?Si?_es^|#eFV-<4#d?}*P$}VR~Dw#3~i>C40?B{<){t!vkv1~
zp{wAK^-uV*`8(+}_243pSJ3S>z|wDT;9<xAXrJLx@!**L7~9xS$UVKE6mBH&zO+B0
zk)AC(^j(IpHa->4CAjd^ePxv7w_DJ7(GKTBI$+&kD-4oVQ2yG1l=*Eaqw87zqxcqH
z7Z2r$?V8wo{(a%wxV<o=awj*=JRwG3n<==Cb7R}G_u@h^hx)wnW4qE>s9o|-Tr+zk
zmyMc<Cj&=gZKu7kC2bfPNrVqa?PJ{SUm;bKHMfnuf?N8JU_HIFu&Q+ce~)+p&JXJ<
zdoJGw&JXOwad(zclwmP>4b9@0DOs@K!eg?|`68M4hSM>rLFavIp?u>o#P}JVcusu<
z`ur0F*WvSdgLfD4*XcwUYr9z@IW0$hWjB7{u?Bj6HRff?!+EJ=He5gPhSm=p$`4Fy
z(ZNB$$+#PL%v9jjjne%q=Q{0idnEXs_$$9wwqCq^yiNRIz7$IyC$N!i0=lc^P-m#8
z!Uw}i<-iVE&&Y5{Uv^Zec#sVh(z9pm^HwqF=RKk^dwJ6%6@IPz2R>KUfWL(weDRUm
zOv7jL?br*@b-Oj5nRWq|xt7uTHN&~{H$NyD&<S${5pvhNaKjXHq1J32hAOwx-j(s}
z`!<359h7KpH+;dQB2sE3Z-aLA3>eKGaAkrE7dLlAlaP9t9Xpx}_Z}ge$F^J%ZHSxg
zC9-*m2F@=^<mKDr#Yz36C}I}y^gS1)R#60bU)F>li&xO2^R_(qR45plny`UxSKJ`Y
z6AP+LsmrgPcw@kRI`F11WUXOYn8{i`DqWX_&7Jwum$ULa1|={${{Tk@r|``mtNCA}
zwAYuKP@yYo;8CYC3Q9XJRqtc@LC9_Lj=o0Coohh<%zDB0nkuPd7TG-2$DAvrXj3ko
z?;HP#np!f7tJk2A=S#RxwJWTd@L7zp9L{C8Ux81)6|G*^8E~@=Lu)3q>099R{RTYi
zl?9tTkZi5Hb6IK5QPH{6D!NniL!$IYuuqT0_-BbS_>HoGP_0}jK6#Z4M^wv3PfX;C
zzb9evklV1+f1qIDV<v34F4FMsSum~FV(ve8H2=Nf!NpTD!QDWM_eAaFBXv{xrH3&;
z4<E;h8+NjFbB0h*tHgagvN%Js%^rRxgNyxtLd|MD%zoq{uUW4MQE%*_ufkUvv&<a{
za`;Ym8~p6xgHExnP`b#5eZD#JX3b2A@_LXKnFL8zdN0n7u;$8PwqkQY6Mfh)hY$JZ
z@UZyK*x7TMWLo_z|8D+`Om0L0w#31A+o2HJcRJpT_TrMW5~=;rZCHGDDqZ>-A=z)V
z$=%h1j8(sbaJwJ>UD5$d6pzE#mucKI?Ii7aA5MWyJ#fp*D*Aii3Z>n?N3*kx_)l;Y
zd(E82t=q$3q}e!r9AbeX{UT}N=$Wkdx}D--8CJwypo=peL*3P(=%ewE*3Gyn-2C>N
zURq~D*w??{n`uouLam{*PaLo9T>&FHeW8Ny&xCL8l10TWMaa`k5!PKD4pUCm!qjzh
zVT$$LN`PdzMp9xjT$f)CtcT)>T7p@S)Lh;Z&BoWwQCHgquZ4%P`p+1?wMd3f3+-Xg
z;iWui!y*h&@4^3OB%_OOA*>tuO&l<|GwwR6L(dhm(4(>mzSsEEz}RTP?@6u9F~Wv#
zwW@J*>_yhFOa}X4H#T{!h&OZ#_zx<H<1{?5<2emF_r5QzUbFz+%T@_C9gbnOLYR=b
z`-Rxu(Fmg^NG3*CC3Lhp1+z~&2nUX@#E4FI*zjy0`Ru8oK`Ir{>+&F>tM7eCQ+^~_
z(Kdjpdm3*KorMD$#<BYbckUq@k8j(4fXRUyw7FOtF03DjlhZ`Xvogf-N7v)`O;waH
zkpgb8WV+2T$5+XjR5slTCv5o!<AWlxxsNw%6#aqZIyvlU2G*(yf^)4y;K3)v6^dGX
z;iEtJtQyNvw+*qt*o}KM24ja_*GX~gcz&dI3%*>L!*+8<as-d%$_W~HKPUkw)TpBO
zqGn-Z_6}??nIV=OQLo%LwTZU$)@PN;xx(@WXN>;VN-xS+u=n6J(N{MVAG+V6)7V`w
z_l}dy&r#4>Ax0Q7ttSKyEGO}?7T#2x0q%Q9XzZ&4QMs4k?%*9%rSZwWz~v3y|7AuF
zHnTZ(G02m(l|jC^PNLO$bLEe8Jm|O<Rd@Hmtu0$&^1Z&?vM--n6W8K`S<`tmh0&-@
z59Q-t>2po?6ly&BNHAZZ!JVG=;G3BbC}!DyPFk{43|H~te6z`DSu$9D*FuJt@^G%{
zwuL)f{ttZDeubmskHO+j-e~jVG>_Wji5A(ZoP`rn?bt;2%S&R%*V-6TyBuGy`zAei
zt~$)gIw{n7ZsR)rPoi049QzF{rJ1|uvBd~``1U`oTf=TJt1*_Yz34=F!-w;GrL$bE
zbd`?%_CPBwQ#L5os2o4XhNsQY$26;4zEzheOp3WiTM8bNdxZ|aT&c<KX}+MUqlYI#
zzKW;j`EtLZQ$cHI1zosPB$`=S<1t%*iZ<^F`L4TopO+n!22K-Jb$$tb)Sknr#<z4|
z_zUC8&rs-OOFUS`@MmbEWJe!_!6tJ0?QX^g{@fDOXP(2MFO$L3So&RZ=ZpRy^<Y$7
zZ{DhYTDH9JaTx9!0nP=6s3;|?JqNba8SBI7809S84=kkYfj*_5jKd?3YA9kr2X<RN
z2K!kZ6Z<L3a7E-uoM%!AZfd8&C+8)7)qBAaURq?M&;cvnYT%$1Q-!Dh`k-n?7*D*p
z6VYoUtlOIg>wBhSYxP5X`qKwb=G#DdauMaotI>9P7woQ|MsL@Qrx%}+rToiP`F)JV
zYeOUO>7fj2{p(F(rr*Vy_wS+HWH%YjRHh>uVRSd1gw@B|L0{jTmUW(mC40`%bf1&B
zb?ivC3K<33JRZ~E>2uU}A1n{}1MAMZ!s7$uA!_j)etcp$J}ETA6-hC|$Zcu7@M-|6
z8|TBxeG4#n{8Wx}?M1OKdU5wjTK0RpSV()JObIsGmtO}A#{*{3I66z64J>xU%FuhD
z^V5Lu<R9Tvo63c#K_z5alg*dUM8n1(%D6^17W@7_RM8yrTQ>Jsjrjhx2Q1fINQMi0
ziZ3O*l+pYp>|j2YGfo+CHF$Hz+&O$@^nB5=eh3D()PQpzf39B;$75Ccl9rdW>w34E
z)^*u0|7w&C;tfM=Hhm&~_-_mk*w6xFey$N>mu#ShYAs=}xQMRxen_}S;HyEM@rt!M
z4)5#8{<iO+pjT%e+x@=K*m(_4>UoS81c}0R$5dhbjQw1?cOhk+9>VR0!L-O=4POay
z!d<cUeCz88q0zgPj`}+DrjDn;qkk-u$32Qv^MI(8-O+7vRmDP`gR(HgRd_t}8Ej9|
z<a_Qt(Z<G&2l%bvla6n}$Knnojx`X<nojX4{RrNuaT3CYW{KioWlo79GMn=pj;r0L
zZms?>Dfqr@hLIm<uKQED-=;sz7#E1Y`h-K4(@jC;$v}MSB%R~^4~lpEUD0XHcfl%J
zgZK3s!2KGe7JNYp#A~Q<;n5HrG@}`+9hBM2xC_n-@4>JnOzarH95U0sL+^7V(LZwv
zYuebMgK-II59tUI29_XKS%fR^E~3C`N_eiR1OE51QS?vGW92P2uw(F3C|cWvU4FNN
zj&>@)SvDTE!!LnZ;39lo@5)Yhs>QSUQ&{De2JZQm#1rK@n13rAhp!sRgBNWTlp=z-
z!=@_X&pTgX!RLCi?pq1j9jBvjQ87)ZHlV!^!#H(UGxZ6w#w*&BaZaN;o(W!oCnD=;
zrNT<Sc|KZPJ$DL4=^Q3IMahPil?@7ACH04uE@#df&F@xRrR#$taNqdp__=He-IlAO
zN9GR@l07IPRRyb;e}Q9D?u%X;vvH809gg85e(`iJiF|~DJ}7a>pEe?w37iy?gj1FJ
zvBCZTq0PM)tX7wvMWeUjwC#cHU+@y_4TAZvX9K;wSqg(I9q?N6d!cx^9z>_x3KNnv
z`0{skw37ap^;dS#=jokUeTEJ0JyHepaus;zkU0MHc|W;b+koFLETm}oMmN%Ci1DHW
zx?WU6#}q@pFy$3ZJeEx@1^-~&4lQ9)<XL)CP=VjB?8n)<rt)R6Px-^!5P0_}kw@LN
z<PJwR;n|t{c+l^?kbdnHZt6P>>`MExL*`2|sXK#L{+#5|Cl=86Njh*!z8-&VnaNt4
z?Kr>d1L-^%i~m}T$nU}_uwRje2WxfVVen(|6`o=%vmEfamL=OAn82a+SII0QPVCdc
zoDVPV$WuO42*o|;@zhoGF>q`QjR^|nxVjY9>21dOmr{AC(^o<5q6-bFPQa=h5pQkj
z#^FOt$okGkKH;bZ=>vYszuocV-@`^h%H^?Km3oj-+G9GLKMIRq=1RLHE3TMzfECgb
zxKC0Fd-%(F<wG-^e9@8<js-&FF3Io_HxbYC81Zcw((YnCULXG)=8U>aA@{D)(#InG
z&GF&p_zdy$k_#}q#s~iz`thih1IT+%D^-{;g0R~&V7bu*+<7FNtAmUowCe!z!-d71
z-A|%uthx$`&g$Tn4_MY`Jxnznj;i$qSo>)mtyZcQ7ar-x^O8c~j(-{SGfxt;>?Lbq
zt3RjI#`Dmh)~wZ#20rcSIHgIGrcSotj54{ndb%+-FK}YLB~h?>O%GN$lPD_QI)L6P
z2jN5M1}M$H4fDblv$p3b>b7AYUC>X(IcCZ7Nj1ALCqIlj&z5KgfwS?RvpF`^YH|G1
z61M5Ik4A5LM&kpz!ux9kTL(=ci}VdF^wo#9uPv~=(+4W?3PFv2^ReiC80xhAgh}s)
zgY0_%+>zg;)z??@@I;B!dfOQ4V>NknuTgB(t_stKAa6IhMC9!)*~;qJtLHX~nzn?0
z??M<c$P&|U{UR%MO(|vjL`*T=gjfIG2Q8mTkmb3Z{9GOQz#bj$_CtYZU(Dcpf+4nc
zozGu(ujKAcQsY8n81MAGL*7z5;z58PPVtOnjk?)*?tCv?_v8rl?K_%wnVjZS%iUc5
zrCFwL>n72Qo{0)iVldou2i9I$k40LNvDVZOhE-psgJ=8E_=L4s6>$JQ?Ox9V=Oobm
zbS1P7-zqDa|3bR{uW|5uXFlfkL9`gXg7pWEWw(4KjtKe+<qJl#Ahkq-JIk>4OBb9I
zF$hD(?uS_`3L!M`JiH6K2<Gj&IN);-<(OQ8)9+rx9ra)w)O?b@-I<LqN*_XB=Z%;+
z?4|JS<3C7ivO|UM$+)6_BD{Rm>OkKj`D5Nwiu$R?y`|>g;Q3*6;M_#~FsB2heEv>o
zpbUq{3Ov7qDgNHthj-;Zfj$A6(rhsvt!IzmLnV_aHL{cgmU{5CisNK`E)lb4|EBb^
zq4=)J5-xNN6K7Z`aQC5FSZbq&PMea%fjyGZAx{wxKK8(8e)C0bM`uc`$st{-h3+P0
zq`FzO!Gi${IVR>K-B=mNS&eeMd3+*$xMB!5iuCZ(jUp)jt-vGIcEZn5^|EdkyQ1ga
z23ba+8l&0+Ds(T0oDDO$%d}M574A=3OM9X2e`o3Xax00nVk*&L*5YsDy=eG)5m@TR
z(ypT&u<Xb-jI`^>&84Z-#Y~s>_m-@ajvLtY<Xa)8{T@W_aKZc?0U);Rfw#5Nz1`KH
zW0b~Hr|(12%|n%=ZPj^6uqUf1TjQ_$p+dm@ZSn`VW{_QMl2|yuC+&>=E`;tK&Q`Ow
zk*A*m7n}{_W2cvK<Q3qa!b%L?H3W`lRnh0)cc5r*XD-i+BLA0LD$?sy#4E?9;!~re
z?5KB^3eumzoRRkY-XMztQatI_`z_>iJkddAfn>W)zC=E2a=61|M}E7z1HK9z%x8i_
zaQrtbd?L-CZy%3h7aI?}ySNPQM*=9Wn$Ksote~<b61Q!N;)z)y!q5}@d70TB#ES{E
zZJjY)d45yo<eJK7qcVkwcSkeS_re!@wz2L0*JQkX7e8GP#dmMy@|Vv}{D+L$?raiN
zzOcj4ZR7daphSA{ES^7KRizz6B84yg7xLl?si9|o7$VzsgkD;1&>EV`S8`TBDZByG
zlNEyMoiwO8kVYr_=YfKkGQL>tCd@Dc*~RZq$m(>Cu>N5WbQ^1l=TEBQo#15IU6gF6
z`Mt5<y9|<_vcR74_Tq*^gD^8MnK5Ztg?Vx>ICv_K)(um{e3iG*{!;o37g^)`e=D)w
z<A7katuuZ%_yS&vXV5M<9{=if!J^RTqSdoWXnr*rJ$4VEN!|D0j_%5^GIIjYY8=WM
z37R}2B92k35aE0d+T`uS+v7Te!p>j_e0hi5Y=$6S$z$82S}^QF1|44h3><GR$1hg-
zVAK3X?DFLS&7Zdoi$Au|%7>5W-lX%;-^H2ct^5Epp8p2Jm)lYKi!Ev2s~3V+AEDCb
z7s6`UVOqL2f!bSUW0%ewWDhbHqVj-!V&th0U{Y+#Hy$mP=G=G5&iV=YI-P@GQZC73
zW4`doc>(+W4#c&$obcMCn>6S_8osc|#$V6e#l4>=a#DdKWL>L=7sKvTkK81|Gus={
z@(`RgQ^cnQ9qG}M5yGA;ZScED2Q#PElVMyQ{jRwUb5D2R(sdSsQdzaE;>+vGZ+RU^
zwM3uR85|(H{Aqk;r8FlvY|33)hayi1h4lUcT4OHE?>(O`ZMZ^bWtrrwmI<A2&jZWJ
zp>RqvR;`=U7q2_{W9sEjym#3(stf-?k4K8s*0vOTUfhlumYwBItv=v3AxN?w?}lD0
zs>H$f)P)Jf#;9F7n6Hu-zV#`<8Cq)iRoje`*WD0|?kZ!8lv9)K?kFg3{3Co$E#rQR
z$I*=K+sN4JC3O3zj)qklsDI5?)|6&Nn%%!sR^do)nfhCHXHbG{%-n}`GtUS=?3P2X
zmI$F_Y#1J$k`4wD4MKy0BOe?xim$C$jH)@~=-V_cg4a7`nt>5K#J3+7b$?7{zYbGg
z=rmrk?TZk0_!VteY=qWHC&6*5l({~5SuoO+%n}byK)^gFR;m&~({w*)C`7a6j#9zX
z*M&o*cN!ZVO<J_GCubd~qou$4a?`T`oU<Z^6FzRnZf?_IOMO1*9(^GgkLb^nn^0IA
zrXkHZq<z$tog5gc;Lta;4L&t|Ba8AvvMCwCKX@0p<tMVQWakqXsN#S0P*9y0!b!o`
zpgq)qy`!1eM^2&R|5ovmCO69IVamPcc7ySg4Kdiq8T$n91Cz9#s1*H~hJ1<T=v`-c
z%KU2RdU7O17d;d{8hcULhcsNcY#yJC9mEUv<>Bcl1s*V=k~+?*!0Cxj82|Q;`0mAS
zxt+><^b=h9@<3%C@imN&JFDYk4P9OzE|F&+2XhyRfcI0{r#cnnQ-Io2VY@1WUEN9{
zN56z_yk91Ke`Jb958`3baRWYJC!p!bY%I?mO>>+SX{JQeFshfDu%X>K@59-OS#@XV
z?O^8I2xGhwW(<4!e1(gq(G=@!3;}rppWp1t9faN-KEi@tWIKu9H?F0Y01~I3RAG~m
z4y3G`4_{VJ#dU{!i++Q)u*%hX>a1~<3Z?7#)aL?OQ%kO7IP8x_qAL2umxz1x;`s8x
z{WS5hw{*Yw09hkPqVCiL7}&KVn^_g$i_}_S%G1%{S1r+?`?P_3pB!v5v|w4;B=(Y5
ziRt>PXnt`&p1v0&;=mg0aaj=)gYS@aV*-47mxkYl?5FV>!-d!Pj$upfOz~WwLK<Fe
zhm}?B!s9b{;N>${7!oSko+m1DY-tO1wW&n=QLEX%%ogh<LhJn$Rhm^Mon`%};?Qlr
z(CdRUz7X;u{oz@8P`@#FvhWlh=z<t}^&Hwx%Alarvq5?2Uomu%As4wE7O!h3po)SG
zu2mk$i#*lQvcv82f&@pHe%ls&gAelc->$r>$1}<e^pnWtuB<;@8*;nG3bDC|#eRQk
z#px+Q4howz1;62Ug#Y?gK=oi_>=EBYfurX0eWM0Yoqt0(t@;YO>^;t2CYLyO_-E)=
zc%O<!?goXEUDzZklKX@f(ePj;yj85r25srI>U%GC%sc}YJqN<WL<?Tnb^?UVe!PCx
zZ0^_Z9vGk{pK}=|s`~3NT(}E`8I#a#+g?bS<c~j38uJ<tC;E2rmVCyheZ2f-DKz+R
z!*j}x=#{qt`}c^Xy){xUfALswHZ0=H;sV@fo=AZg4A4Yv7&h!RMm<Xlv8T2fB*YCA
zMpq*wsXU@u<4DGL8JxbZoDAak^O(X4A%EN|K9=pv8yf=!FD)-_&san^W7Fxp<}P~H
zqYC~-j=-+f`Ut~<C9+c}$nPJ7vbb*0(_5cCb9=EoxRSD+%HX%*5cD3Lh(9Zbz{_M4
zS=VMm%E@aHc5mOo7cXbwMXw5|svN)>drNR*=XjoJc!=zh`w80i3G6ZZ7ai>@?a9s@
zlJY>VJn!x)2d@c*a9)~CeD2Z-=ZL--?skB++TA4sgAX>go<ie`mK1rWH?CckEGozS
zN5&GZST#hCC+ul}s3B7R;EFzvnix(_8x--#iAW5;l?zcDLIu0mxpdM+dKZ-lT@Rmh
zgU`2DgVlmdvh~B3Vy0p_%u1O_FB*+udj2liiZ|78@kum?)SZB(7DvT-@wxKGg&TNK
z+f?Xle?v_F+D?P(l~Fak15UWo5$n^Xy^W#<<<@Tzb!_xG^|}{)bC+hTrG{j^I2&#(
zUBL#o65)7|5m`PRfm3BCDE?OrT81idL)dsu9%IUli@!jITQ6xA^@ygb_vNW-`*?m*
zPijBi17*`EVo8XJcvV&-I{f(}rVWq4br)BOg)b$W!<(%<M9@N$)$W4TtFHJtNgdBF
ztA=rXJ`ue+#rG@j3yZRca;Ih6<TgpMTzRaPu~mr^mZZX#uueR_cP(iwkcdGyi>b9|
zI7f_oOxIhtVQs&1T>7Mz&Yayr*#S{x;#a_>x1=3Hqc%DpD2A6Vhau^05gZtjfaYDV
zQ@;`2`RT@!ti56xS$Q0WVXj-j%-0apla^8a2`6lhmS_T>rhrY#cKkQF9fpqI$5Sic
z2u(}7@zNfhV65de;r!&)Jm^BcIIg2V9{D^2Uc4U9ts#%V)S<hSi|U3;UFY%I<7;u3
zY5^bFUjVagRdM9fLEQZBI^8Hf2i*@0<IlSysXBKlhQ6zT)(kBep6iUeL`&@VW-e$B
zIw}t9=+9e>r=iF5iMY*Q0Z}4?ZQK1`IH%Z3IYkmNq(%?gK1JY~TSNKOco`RWk@1zS
z?GO~-AAfB)O@mb@)3kf4{JpF<e9ttbR~Gs5@)|2_FN{XN9}zgRUJnbjwb5;K9}c}c
z3jd3p$4ZXN(4$}kTTN5OX!!^nY10)~$RhCOMGJ6xDDBVmoVn!Ic+@Lj3Hw(Kg}h6%
zn1W}C?}xe34Z}`+>{k@eniWQ$JAV>;S*S?F=E*d7dMdq{kOrO7%b;s#V+^0M3q2=4
zrax;XQ|R(9RK+nA{o;U-WBGuTO9EkC%quz@7>y}$UqI0z6m_@Sl2L&t)+dL-fXXp=
z%5@a~J#(2fpWY`Om67;fcQxwP48+pVP^k9M!0cs5ag*vyKC)|@Z1wR2u>3(jw#7>G
z9h0-N&HagNhh;*iq&?z<heR3rh4kp}DBd*xnQ$d$jF=_N<;zJGpj9`HFMl#6t&l{t
zv=}Bdwe&zQt<L;AcDHb7WF#*+(<TP}mW<|^A}E+2hw}Ug%t$X4T5i{3+mU1Bn`(&V
zx?jllLSOW|=F9&3YvGwvGo;OWgDI&d<Nh}|;eW6J{|910=l=u;{9llT<sFS6|I`Bc
zLPcXCVpKHDR_caFw@-mdOI`|x=eNl0kEIH+UnS_?`@xj7MilO!nGByc{SiOTOa#l(
zQ%SFA51c!8MKFx67xqaJ+p&i->BqouU{;1-x=r$EokK9-_$8qO?vPEgD3za0^r0U~
zd5{#F592x-Lzv2N{J5VX&w4l9%>N<Qbh;p))o>7gE?5kC{-&}m-kv}S(ks^Wd?D<D
z3>p^4!}Vi#<Zd|^$#>yo{$e&wSg)oj>R5LZHWnnwCAlv>&)5iG_I{M@96yL=-phlu
zKWjuV@RcXE4i|8e8aOE=2zT%96(^f@hW{^QVaWdrvala)u==nko8^qayNYFW+}~WL
zIpUiX$Ti0<0qyYSNg17;`xzqR?Xl_hUr<rErKh7c_{wQNG8jC8^~P5T_U}&$Rc>8q
zQp0C5+JjvC@39ymTJpv*0#8`KmzF<sz)fC~-pGHNxOdevDZo4%^vxp4wPv-j{L(6(
zw9bYoR1I<-#8d6R)0~&4%$GOo^W+OT9A4~<iqe60Qhqp_Yn>CFkLvLAr}psSdKx{w
zJ``j39fzt^UmVo0jU0!mi)~()=xO~KG55AUK6{bI@oQ{&$DBygo0mo=T7jTDYBnYh
z4;S5n;>l-aC_H<fMhhjd(G*d7X_lwMh6OViXLiE)6UQjV)del~K9dL9yP)UDKpb~s
zG9SKjKnega#*X9Wk<F9{Y!NeQP4XhNuwISnLxO~#pQlTK>wUO(mo=#^kQPW?VnP3I
z3N-6{6v%rGzsm69HA&yWwN)K$JDH1fUml{eV;THBK>8a$Iz-LBWwgdC3gKS?8D!<r
z?7!nVwBsZmwG&}Z!EjzzB1Ib>I*7LHPBM*QSWxhSBFkdnZR;^=Y%+osy7M_<unzjk
z`thwV{TVvm66eKfV#)em=$4jFBQK8r9~>cNTa2gq+ohs$!5?_yITNdvB*LoHg`nB1
zTjg8bW(utJ!k%BZ^QHbrXt9LxoD!G~?TKG#&#pMyv3V>zXcmEe7egM=A(|De?9pJN
z1Fm&Y<Tkm7EcdBYoK@{h2c10d)FEx&=v7MBiXOnmMo+XI@I;)c>H>rIE~S}sI|@hh
z9I*eGOLVpQgD_w5I~Xhcqbv5EFfLRT`gd<2vjscFV^P*n8|#X;BUDjEc__bGogk*(
zpU5rpBuWpK3a6v5QqAmsQpipP4X}`AIQ^o+kh9>4J*YW!33gbmz(%s&Lhq6g+}yAf
zH+rjynHN{%)(b1>i4?l7UL1}0QvETpL5mFT?dLsHZ&5}{yxg;C2=naM&~!Q*&J^~f
z=H#t#T4@3{NL{JBMLoGlJ{%{+&SkAGW*E{KK`Wk)WM%gO+%;uCoz2$7OFvyC1fPb4
zMS4#kV)tS7Aa8Wjp3L%dB{<>TFNmq01&1DP#rx-@ATu;e$dpQ`?xp{zXlVe)^AYi6
zK^ab}&gN0la>;VhDmr7{5k0$2z!!?DkbOam3sW=c;pKnSVmVe&_c?)a3pVpV37bA9
z;0X0OzD?NUa+>!_OT`?G2#iZ#g%M-s@aZqg)N|l27;*bLY&a6Z=4L<XnMVNqX|{sq
zV~3!B)O&KA7LVJ$8lm5}7ozEt7&@#Jiwg{PQM<x8T$;8K$5-w^?Jvr>^5t4sp1TZ_
z9mZl8U8#6;E(JE+(3cHS>5t>a&BqUQ3sJYd3e<hm$Z3~4zOy<<V^;m9qh@)iA%)HY
zj+)?`&l7}&-h1fSbaT`RHNd%2u`^}yE;LS;id<K;@y`7uJe@EJKUQwVAyEc!?)Xn?
zdhdiPSB7HStR<KsI<Vr0o6s<P2(C$Z1w*c6h%d~ah`P(y3Lo<~(+6iA9Gl(|`c&nS
zef@iJ&mw<rJK9%R=AKKpXEj32$xcktvU!TWJ$I`A0mJGtgoCyZVCnJhykt@|sy8mg
zJX;gG+;1>OMMU$pv6rdDU=$tK41lzc0tVhnNADZc(4`MyTvs_ZUHT(7&iqfpZ=J;B
zGqZ(r&F3+)*a0j(_2A~a-5CDgZOT;8z^H~!xU2l8U|1c^b|=om&NnkKU<Sd$4Jx>0
z=rJ@tTSP^IFOK@LjZf*A(r?Eu`0UtwNH=t*mAe<vUk@n^K-WQikX)P_<jkMeM&Pcu
z{b7LfW>C(s!O~Z+V8H=@zOh!Fw;$et9W?|>O@2dN_tc2z!?m&dj#p$;GmWJS7dj3b
z&6VkI$s(}{9+&rp+mCP3t4*=s`DidzW-r3)?Ux}*?F4;pEv0iyB}A^>KWH$?<nM2z
z@X4>i;@wA<5b)m=jM$_MN=Zg|;!Y%m#h)Xa?^4mW-kyJ{DDx6;Wxm>H3TKDhfftPr
z$bNn`JqWL%D^p@5tnMOu_IU_KeD1{SoK>l8@O8@3*~}v@-k|%P?fFXZfARp`XP`d9
zLg=2Cjzi2$+4EwUXeJ$^^4@%a2l`8><E{{Vyv>b$b8d-K#~5S#AwRUR-HW?&li6qB
zUD&#7G&VaJ;FfuhU}Z`N{9zWvlLLN0`MprADVxjPB<PJ5YhiQHZ%8Ux$QPemvU|ib
zu8(OXuUqF~W${7`dp#a277U~AUypD@S{Q1#ZWo?B+Rjm%D@pTP02`f_o>9LBK#OG$
z_77KNt$hgJj%{$*A8Ltx5Bjld`4+CVnutmbX0YW;g*ZS8QC=N0SJYEoiUBbyq*}iP
z&EKX`OP?%GOcSI9;3n93uZe=?%LzRf$=1%6VcOG4{JVWEeu_85zawX(PuE=0v$l|q
ztxe=rb(ctWjU8#Mc}_nzTJoClr(lJz4X3)zl8TBAu-Vm|PrWolZK>2R?sVaP-M2yA
zca#bGy1Y=rA0CKXj+V+Tw5+8gcY5xC^*i-(ysAC9ojoU>u<HSSO;_pSe(B}6ScGxQ
z%&?#&3a9Uj=U(HkiNEfd;mv`Qc*C*%u;s)!@oL|CAtC$>Jz8prneFxTx?v9J>!|Ye
zIy?Tjbr5R(mV)q;Wcc}#gbrVEAB?_jp{1u&DZ*$IKb*aWjdef5`-D2tCHncu@4<eo
z-RUwUPRyXEH|==qn;WFD|1FtqkK#MdL-|DeKe+d8KK6H=hu7)~@Xdl$EOm<&BjUBO
zbI<Mg$wh`UigeK~ok?-_P}m~r8~>d=B+e_9%N>VrMw+XE&%lf>SL(BCcrMOy)F%u3
zr!?5%I&C-~298H+gths9p?7r#t}0I>vql3<T0Ih*BK>*#B4b?QoWO}C>3skGdYQ?a
z0XY4;4xbwOPyE!-0X?cDJ?E_*@ZEDbmyf?qqf{JGspB2Go&dsx!gPFJ^G(oQG7x`0
z*Q55$nb=MG-XFbn$ot@Q+^G3R$bzLfK=Yx{CD@gxEvrD;4G9h197oPm)o`Mjghx|1
zKvSE3yve<U=AH3|u6eOgsTxTS7OaMZrtuj0pqhSd>LK<rvB7-%&8T1An^sDNkdwPd
zvyD?{dUd6O4U6+>UA#!zO<LTV7YaHth0y9-BMuDffTs6Xa`<RvRv55|YpzKpV3$%d
zD$k@l9<l7|oGf;5c@NVE$%K8$X87aZM9y?nVmsXs+;Y4IoC;dt(~a-oakEVv+j^B0
z_nd`?L&t&Dpy4cf9;Am;^|<n_4tJluliR~Pv+=tsYLc{46^&Qn)aVX4BPLAJSo!fP
zV>fdCFqbA2=z+C+Uwol&!(Qn%)Nj)+?hvXY>*hI-FOMEUQU49#K|}VVSHo8P@3IdJ
z>D`<Ej+OLEjUB1_V*-he{n+X8NLuo(MKl~?&Ue*r$bMSg6P5jj!oNP#AVyn>g4c&)
zoz?}|*~c6ec4=Zt_5{dzW=Vc>W8ORd9+mDegjr()Sv_$DT`DZZ$ftkl*xhXUerSsX
zvFe74@H0h>na)a4w(O#r4mIHoRQ9AdSFSh-;~vcyZ%AeAsS{+pUML3PN(uZoyc_Pz
zS;6KZI=oq1Uo1MW#EUG~VR`#?^7v(ji-sC;kH1q;$a4UBAEa%H{pE4zq@QxB5srqO
zro3Ce^0XOBWcH?&9Hnz!S<Ne%M!*v=aUaiy)t$L0w?D7E-bB6ns)6>LSPVPer>xht
zOSJaFJ2>0ujblD8$C1)fq;O3?{40wVOTOQQFz1(~kx*T^?_3_tZGH{kB{XM`M`z&D
z^o7_-XA1qvUV;u2MH(m$1d9bz!9sB!8)fz8$PLP9^mQVl?51E6UJkOE58+6vHR+UJ
zrr62f;6Zl*@6Yz*mWk78=dHQy{!ie(@6+Kzi=?c&^3uV2<`#0Z{wS18@}_!cJGw@n
zgu|bQ$%Di*eCyM3Y#pmf>mpLANq3MqqpLt``|ZW0W3EzS-d$mkwFMjt3d5~wijFT<
zpZ;I$y?Hd1VfgQD$QY72(uh(Ng}vF&eIZduBc-S)W0_JaN}1=(nx#Za8c6nLKleq0
z5|w62g9Z&6HAqFh&+q(x=RNOP?|IicYn^{zYgyK^_I{qdpXt8u>-v5_Uy>g+4DR*D
z(=2xm@$AgN<#)GffCd*MU=YGC^5*dHE=ZIXuVCuk^=Z?!K)gHDBv8mmCj)05(t%@Y
zL}!#Tsk<&mYdLK3VU=g>!0qu+-e$pkoBxBvUwwr_Q%BGgbkRE>{OOfjPw42@5`jTn
zKN+jwjZVJpf;)QSa9+qR4%K{x>Pn=Ou!b=tf8hZrS^a>zy8UJLJlapD>8BCb=F{|t
z2Bo(`JLp*LLL#S>M(0dvBl`@_lAwQY=<cJYSR(Ext5OZ{jwu7f&Sb#C+>dPF>MqjG
znVrodW}`0`Yjn{l9hdJ3#$<6Sy1rR}Wk-g?=xN&2Yq>J9cb$!5pW8&DwHmIL&Vbb^
zl63Tk>#WxTRmwVzK|{l3DE%Q7elNI0`tOH9N%dTIoa<d?6%<mb=TF6}=Fg-r<z3Nu
z^hq+l;xtWKB!?fZ;-L3KCtWa?oAXb)&jw^GkqaCSb#GH7af^CN-c`P1vQ#6XIcYg6
z-;)7jNm;HjIGk>1{lc1@R)JzA4%E?fl}w&3g;C#E66-2wRNR*gx5jfOaLrQoR_0nV
zuOSNG8pQ$8sHeVW>+puU1ojCo(rPvt#($Vjo-7^?JMKi19<e2;eXz%uGw0FIoq9xS
z$4Dsr7fh-HXHvx(eMG;wgW0_16zNb3r$yHe(b`!P@T(!gGpk*Qp2Z3AGf#c2w?0mf
zMBgO$R_-PmOMI~G!Y*8Gw~;=Wc8-+_sG*K4Vo~X&4jN9_CU&wYq89eeWcY&_;_}%y
z8HxNzq7yli^nW=^1=>9Pt~?Se+&(csL-?don2DA0?(jHkG`{Vf1-0LI;@)ln-ne>_
zru~totu-HMWJN5avCkZ5kYD6py*>U)cub|<OohHbg{b#R5A5vxa8;Q+E`3=_jyro|
z@xQ0^(kUHQzwiXnJ}AIFj;EP)r(<;1{0ZQBF&@`Ot3Yzu6B1=8jjQ`)z~!AQzWni!
zN|tF7&skh-d**WZ8f*xw;RM4<SdrAC91?Oq47%?DG<Y~csNQT?x7`%Ye*R(}t+650
z%9gZuo1jY#A@g0k=wr?p;h|*AWHvu#G8acN@AV6r-x~QO$Wt9pj~au0ITFyg`!~De
z?sY1kZA4>?b6FC93fvmyX+YX^a`A=^%&yu^s``zwYd#00QP##;5_wEXs0Ow?lcsSF
z`8Zs2CdRFhp^EKtpm=f>L^W;z%}2YbrQ{oG@b?SJ=$Hj@(n`32i(x#XKAv^n+fEaw
z_i@I2eTZMBi54EluweaK=<lCK>MRe@X6Y1A{yYP&op2y9G(?A~rjVwbcEQLQt8tT3
z7JXbb1?G#CaomtA{kH_buHhH$@k__N{%dq7))l%76j8+_6uMN^;r@Ys_V!CjC|x=h
zx9(u^*E0t&9+84Wtu?gFshfnfF2oihi^UhF!lqIQcv*LteYN@m(>1*aB#%+{H3x(6
zc&7w1asbnIn#0$k9)=kmhH^^}(5VK08m@A&n^9Aiu$9Tvz<paK4Ps>R)jd@lvR)6e
z_G;*i?@8iGcWfB@oIG%wMxv9K!oI<GRF;>FX=du^R9A#5!tr3VG!bXbaD@AuiIo@r
zogQh<gV+xX=|E^FTRYqhqSy1t2Sa_R{p<!_4-PRCCdt58u0Y7tCyR^J9+ET5?NL%&
zm5I7HjlQz|L%U=2=@=tNxY;oUKf1Y)-MQcCxv9ETU(*dN%}w!-k~>U#JOk{DYH3iT
z8LIktz>z6kWWkPi^w9b^=zBF0T|FgW#)US>%Uuj9R%wVm^N5iaOLTD+N_m)rhx2G0
zY}kp~jzU^(v<vPZ)Q8Z3bz+=m0=HJ_V$VQ5ap2$+p$g$xrFI;W*Tm7#OK+&u3Oz`T
zTnXMzf9dNEGnm+wN0uiRF(*wqaNrb{Y*;u%tD|#>miKJ7Qt>IRe!zpZ$6RSeN;+(u
zbsh_%8p-iaQ!4u+56#lV?7I2}x;V=LPMCdXJEfLjXw3@P79lIn3n>(g=vfc_xnp6d
zJsRe#t1*FsaFU-f1ER#o$;IiD@we9-vgeW})ePFtL`IC_gl)^{ox#Oesjh-s>aR1M
z3Pbear(!g@VTR8wuaoCPT<rRfwV=*xA#LYogSKxeRlDCyep~3`$*C(a*0T;Dc3mL-
zZ_DT=E^cyFZ~#Wx`s48tSBZ(qQ&Q=z56zrVNjE?SB)7>UzpRiD#XscTUIo-PaV1vF
zZlY3rgVp)|kZH9kp;xAc&}$Ppr1L(mu@*BQKf)6lR-lD*lfDzb_#}*dxd{A=4x)fx
zMjgFv@o$nJUf3&5cN$4C7k=`Y@^BHO5y{Pe-&c_r#*@k9^+$-rg;KB>VUDMcPGw^5
zABAk6KV)i&16KNXQl7FZJ+JMDBOW-x`M4T#yWIz8&c8)(@0$#^s(Z<TB<`5uVs6|5
zVzjBA*7emf#RX59;5{OSXID$FtcYUN)zWb4xm+4r!>76pn{m4B9C}uJ7|uDo5{5F2
zX}NSM7cG~Du_qlFyY5_cQm@3N+x=L%S!+RD8IM7oi`e7I+KjAi64wBVqS9WIaM*7N
z45|4@CUF(w>}wgYE%yK;k~sp-fzqh=t&Q;850mr`8BAS1$XMMB#Ao^DAnR`n)_)3V
z)nYHGwW%jzYCo7I$r&g$b1`JaCergABXG$61=%Z*z_cSq1WM)MtG_$hK0ASEbaKt^
zXZOTD6OJ;9>&L;Wu(4#$tZsoR?<2cv_&d^E<^mqM5*&!LR=m080*Sd;!LUL4pp@Yc
z3Y$6Li2q)CalaDSPLLL?@O{l(-@pTXmnwR#-~f3c69g@H<M5z|C3sHxNj#@#kau@F
zNz2<Qknq!iym_WcPRJ<Z>8sMv>R$liEk#6EdNUY%xnlS755%)So5pwCqDR(Uq95bA
zII_xQs2o#5rQRCSfXn%C=frgAv%khjZu5kKZ|3A-b{R=}yBVKFO=J8kn(5%(P&Pay
z7H(TAz^2=G7_$x^+!inwg?Yua!ZVYaO1xq-9p)iSze|O0A3}?30dz}`#xQ+CXN=cn
z7CCuvke_jQ$cY2bc4?3)3s&M?8Feh5w+PBMAA#c!HqenT73rrION?|FBAE^Kg1O7;
z>9&pP(7K($oPqIFcTNOz^kx-!UVBce{yig@IT25+?4)eIJZj6C(9bVb#Qm?Lu%bDD
z_DH(p68BROy=DSDO7J08#^ab@n*zW{e;B;r#F+t0<>6bvQ97A31Z(#jU{@D+PS-ob
z-x>RGdbbk}c}!;$x)+e^#fz!n=?Y?LJp$y^Y;m9YMWQ<|mo!JL!k)U*0=-)kQGCaN
zs8`D3y-^9I?CU&8Iu=ap7TM58-IqyLhXg`LEp^_zi>$R4(Xf}UxRMilsMhegwRA9c
zu42gL`MtDjxdL4km`N5Wz9U_NT(G^cgF5U=1$O2mvhh<YIdv<aYYNH1Db9eUaVmqH
zEl;B5tGVY3dclF&cc?<8NKkTWF_b5rp`Y`&VQhjFcTF1NMqU$Z?BS0aeoQBZr;oC)
zZ+s<t<t^dn;SJDvPTIC$h79-|cYvPIFgl}C9SW>^={1|lwqHNb1owr-;@uNVAnok{
zm6Q#qKDVn$(WZHDa^xQn+-9L-y%Ss+b`pd}VVnU<fUC;6D4p1=^vgea$m>v|Y_lFj
z`|jdo``)lf;S0_1`$k4{LdmZ!O11^n&&ei{7L47JMvJfKV45h6krQuYBc{G&Of>z#
zL7)L!xdu0n6U=8t8KLN{7c@p!LP*Fxm>#HY8@;_AlFD9_mMf3RvXoA^!b>M!axKLD
z=tVkHHw|QdAE2gE=fO>r6V5j&i+x-yV7-4Jw24zdw%vjKJ>3Yy2F+;i)l?=t^$ebF
zj3Wlmv^lWcVP;*;1!5ikmJAvEq$~c2Ny3%6<SrM56|=z&i;N3suC4=Y>~^5?gG~+2
z$~@2$`rsCcQjD7rMx8vbk=(oAXeD>vk5^^E$7_zbz>*P2+;*cQFHFSo3mRxgaUR6K
zzR8@(cfgzHbE)r&*EIjsAblCm(!5vWpeE9rNdMbKrw^9F0!s>KO-I4(2@`<am12fj
zZa}y9o;dfMJ)Rlykd%D7MoP8#q{uZNLPp9$MWG`p%+Q0MG1@e<?<5YY$3l4E88U3<
zFyMb&O`qR#Mcm;@axRB~%c^Yjbb3r9w^)Ok`wjBleG%?{T0jjYqw!Lq4qRQ%Ku%*l
zvoJ>u)Y(6b(6xcAC(9v)r%O-N6rjFKJx2R;nDTNN^l*Mfx6E?JV!N@Zp{LI1w=X2o
zezz%qO(h&o55(HPdN}lp&zbHrQS*m6`|~^xBG1jCtFC@#oWG33{C9q!lIl-Jma9VF
z>o3gXk=N*nS5?II&Jxs;asutdG`jX&9-jPLNiXdavNha%e70XPX6C3sQK})@X?j!b
z=5kVan|lU*Fa|5{$?S%4F>uB#401*$(zjha6lWYEu9!pLZ<EARp{Ce7kIw-GE8&85
z7a?ll#LQzZD0S@y1*bc7>6H-jA<mP!7b)UjPGG*~Wimv2tJC6zIyk<TGviNs%RKYX
z#N<uRD7o=E6ges57YjfIMI^rq1_c&FK+U%r<ImQg<eX?UEYJMP92&(z{Tj<)n(TUf
zSfGK!`&;Qh!E}gO?n;h1yVLr)E5M@q5P(6I!2X5-BywgG2l;H+;Qf}$tltQ)JYG{6
zribfqm}7$NCK@8D1XjTwtKa2e=Ux#zrX+{>HXkLn%wY^$r~*4n&B)#N`)N;w0{Pj0
zl=`%!(z^jHKL1lp=NeCfIrKa>RJp<I5GCk6G9I>VT!`DUR&s0O1+aKSC0k04lHcQ5
zI?!E5_cU9;@eMcW+B1C8p12pnq`YX@vRN3t#~M@`i)f?!Hfme>omi55@U7Rz{<Eo6
z@7PuHoHLMK9=RVjS3H0~8iO`Euc)n22le@OO*}Ov1!lzN(xo=%P<7&TRO|A=s%vLq
zwK4~vKJS8C#<nvjUb?WeRu|FN4tpW?h#!=E;xftFvfxKl1!GAa;KH9U5~w;1YoGbh
z<0T^*J&S&_)cG1WKP#rEEj|j2lN+eHQzzpXc%PIt3SilpOQfc!kd8|^&n_J?NZ0q?
zCP}*`S(#o2s^<S?++r(;ELV<sxthD($_!9$Fq8P&sDiRx5fp{IV~mov<H>^E)Qi1O
z9?}NVa?u(t#YE$+=Y@2G^8@zO{R^}!<N$nIvIjh0$%4YFRWM+igbC|pA?MUDHhx?h
zSkID!FZTOUMaz>EC8iO<7p?}mcC)}@;4h_5C!$X7cM{(zi(T)YH5@$lj5YXug*0tX
zAPf1LC{EskSNxs0h@I!O-E2Qo9Gpp3?At^-bNyg@P#WPEZ$(u<WDm4W#l$hOWbVcc
zVp;xLaF&zHei-wFoJu(_c<}K8{oLBe^7XXAWK%gPnM<$|H#R~sT}LH2_;|b5b9!OS
z1gNN+z{Py8=W2oj^q6l77+0QvU19f#z&e|lx!)y+xn7x<W(Hg@3IjuuLR^k=pwdPq
z$P>u3U&bDU#61C6*ME%e^>u(U9eYp{o@RH1ed3}r8C1OQi7B^?A!&IdW43h*=zAQX
z%YK-V(>(`ag~=NHwqYSXSFDVEYogeoVol%`cQVrJ!^!5`X86NV8^c;BGXtjYiQz9U
zN@vSEvgXcQI5BT82b@l!`yU!&@Ah6&eB=nc%HNKrJEnrlfl~0d<l~!{=gGP^*+k`c
z8{K`m7L*P}L-2$@lrO>6rox)ZE;+6TL>l4j#ZV@tu7U$PU4{0Q!?-#oS8MrPMLr#o
z0^ubS(tPL_+1-^c=KEU#+V#^*eh<mJd=u2~yFh0jjmG9oTjt=}-CWON82ZY!vFAd=
ziC={gp8erRFRAvxIA$14>HA3g)uy6@MIWg>+}%(+|2hpgZ3;hyg+zPFGKjo0g;-U#
zQ$t%0Za3frN|OHY{l$6WHjoeIqxRqzSy>$A8%E|-pMzuhpXqq7Ex7c80o`2V4`y6l
zxMYzUKF%K^MwSNDW9JXDc+z5wb8jGVhCfO6Oka>U_(`U>%!go}ABb}7>C2Z3;Qf)6
ztleV^@-?oMlpbwjE%z2t11}d?(7ubF8erh%Knc{@Wf7OFNie5I2_96rg5i-sl&Dz^
zvnsX7kD_oeDBMho8b1g&@sy!tdL<iKvJmzNxSsVRONeoEf#*swP?hPAasEND;1d_d
z{ACYuex1PBubzcvk}l9AXM<W1OHg8HG#b76%e0%`XO))hCxK_W=+w8<G2eF@giO%|
z$Fn+cEHoU-_EgeZ#~MaktAr$bAHa<YmMG900~x2SVeJ?Vw0G%Yg#nvjvS=(chR8rn
zj53%dsAEj`7RtUTqCs45V~Xi<_Fda*kpHL%8+VU}S5Btn<jDf+9N-LzR2nnf;^1!R
zS8{T5FI!u?ma8Fgv8bN$WW`i<&~ncuyIBp8jvof!Rkp#*Nf*eKOi9|>Z4XKLub5lW
z4&cgx7VlNQVGeH$hnzWcn6-=V(hklTEE6FQ`9zsXnsJZ#E}cU(ROY~iH?72{cOA&~
zSJPd4W-<GMmx5nbDrvbtm6|FV!BL~R;IlXZ)D*|!j3P4_@mdr1tj?v5iWyui<uJTz
zaG2a%I3IrhTFnl&IWRk3tz*5eZzhN3B=Eo*5!bWtrrAr?Xo?|sA31QQkp(ZPf2%sQ
z`5p$7?E55Y?s(jt#X*w0tnngOJ97wXXEepWc-GL9?Yren1}l!!QN5a=87M8jv9pmH
z{CiIoZ3?NkY&AI?+fR-ajlztQU)G6{=gGZe=6EA=3CuoO%6RXnBgdXif{xAvOg^{<
zTKumN1GUQ}$UzEbW(?AkHZss<lg^ZLbpi6(0DhJeBJt)vd9~6E6xve+Ywjwe=33S!
z{L6q%()cze^_UV$+6;ri9v^&Iya4((j3=YA6iL0x2pH}n4Y_mDuug>&ei`qF{Zl64
zXN&Lj*X*~{TH_g0)4mzJ`0p7T#S|DgIv3st+QXY=oWbgcI_~q9VPbr*i!Zc2Ak&q&
zT4Bh2a$&6;Oh{3J21j2=Yng#bpS{pj(wjWrF_xqqv|$s!hQp4b)ifu>16O?YVnQa5
zMT4I%WZ{NK0?YDL$Q_YP4YFs$gVr7RDCP)#e!vON$WFqrd9K8^Tn-7oAexS|u>5le
znbP@+y)Si*{S@3xXKI|Iq0`@!>iJb<^iA&hVX~8qa-M;0I~1Ui6VrV;v54;M>ZJ?i
zq~T@ySXy#+JsceInQhFR0K)dmf}v|8pncXNT-;HF%DG-*H?1w;lcUEm0e=YIa^w6u
zO*stx7l$+7{t}Cva%tz+RI*Wf5soa>!06jUG{|Bv<okv~(6<<J-(WZx-8)W6b)2|T
zGKf6vQo_>XHaL}w*^Khe!4#`4_{K>O9~?eGhK*i>qw=N@H~W9YYy=necI7GEntYOp
zSQ`glPqvafvkw#5vnsgYlbA8J`$+zrNx`v8AJXoGc&NS0MPJc3L^{J4qzAaZ)wFw5
zTKzS-ro}~>E~`h;GjkZR=M1x`XFmC}HJf}ZH-c+_`ss~vU8L^aeB6Fm53WQEvOcSx
zQq?nW>Az)f=>~^sq+`lwBAQD;t$PwUk9)!n-dQic^m;F3s~O^1qJ$AXBk7pEeDHd=
z9%?-$;q{BL*i`nJ(K2-t-*cD&BYmHfnhee~c&3aVoSF!#R&p?zs^ZdH^GI?)8yO0V
zBs-NalL-@Ih`(tXTzz2z#@e5Vc<3~-iyac{yqHaN7HpsapMTL+9Du{>qb-=+nu0!m
z80afmM`r~(3CyqTfOE1LWUhk`JPen_t0zA)nQyOf(T|5fa<&GZESU(4q~^j`<2dLN
zO~n;GBbf`gMuXfEDJ~u}j>h>;f^j9`ko#>MIR0?Q&nn-To=H~pkh}v)F#AUpYz!c4
zh6420`a@^dXky;Jj@p=J!`_LXnR5MApscnO0&dR6BtH{;{`Vg@H>qP~zAixB5#Dre
z)i(ku)ug*(3RVtRr82`;z@+3rEa{nz%PKC@HRq?p37_d0I!YQgR*eT!^?0J#o5e-h
zifN+7NlY1g0{I0SK%>=<(=()?b%_KA(+i|J#qN-^DUM8xuVLfII^ii3clyV36yzMz
zBL`%s({HDT==}rZ;fk&Y*Yi%npDR*0=&T|H{c2;%HJ{UO!DF$cD4I5|w*((2Nix5e
z3!E_7Or29W`1+kuuyANBQCzP>o;*>9oo?pnJ52>{iMYCTP9$zKS_rR>a&^qU6EsOG
z84otpvp@Tiaoi0gZA%x^*uu}uxOdyc0|%zTQ;&n#@^&UIs!Yee3&rRZd|oVUx^2VM
zeWqD6OTgsQ0#r<+;*a;ZSlB^XIR0FViGLBsddDiDf#EfDpUFeZG|tp|?<{*M`w+g7
zH3i<pNDQBpNv@8lhp?y_M03kF6e>4S%}EO}C)<F&x-<&6oy`DE<36(7O_%jaR=^Fe
zB3igC5+5$ygQIn8h|vRvINBe8rLIv}Ymh(`jt@sEqhSc{7ih%LD9WoiNA|4LV%N>c
zC$>?M#C`R4>NDmJDPEz7UL*Ek^P~&nli9yX-qoArt*DmH{b)#Ku3e#xM>$|Hw`N+?
zxrf*o#5ZK79!G<lrKDv{8}ssGF#2)f2ca@mwAlMHNjH{(@yCbg?`M&4)<T3ECd|Na
zy8^J-F#$Vv3F)FUGx1w1x1PCEz$U&<fJ=-T`O+Oocki)hTOS0V;ruP^=6iX>ek#z@
zpW-mP`8#Q!t%T*yTcN9kgF(`1?5x+rVeVc|3o@;Ys?6t1kB_IqB7qMa()&s*qb7j&
z_`U3*ffTyk*$GuYE&!FaY4ny(0j8K7g%>YIfR#HEowat@`?Hs5?z~D*zx_+qdn}-I
z<35nO+Cr5ZIRLrpDNrudAaZxNVQ9ArwAjQ844%$|xEKy{dBGA3E{y<(ycQbc%RP%F
z&(ZyZJkSb0gEu0Q;L!nfh}p#%ZN^zb-02jmX0eU#s5T;FH>nc!kmFR*1JK(-ji@@W
zW2QemL?#$0!KF_UWO!B){T4YuhPO!2!QmZrT;)r$#wv=s+)8J@1q@OJz5!mkItu#l
zW|7JN#2~!23(6Ws((NiP;?U6(QEj9HTY6_IhdwQ*RprmA@2O&#dSwmn+Mxx*R%F96
z{wwzFhHEtO?_7rWfIxfTG<4Ll$GL4M7)z^0>f67Nd|S(s^l3{l;Yc-EYpaPi9dGEW
zmHx0QTo%@T+6)R~=7HJf>FlKuse<ra5A4wS#NHo9@N9G_oY;`V-uqle-v2g%88;H}
zR3o39+&2!cs27kurcH2=dman@+{l(MAuz#r1RAQxV6p5R)>0;!4d&)!J1@;3!kR*e
zKBoa$rZq%&x*hAiTaP+rJt8BkjBx3mg``L|UMzVhl|3mOpf?i7;)hN9aD06TyleeL
zetca@TKW?~Lq(4+n3G3(>zv`ee-^d#*pGBf1vyq-M}D3uhdZMf{5d0zLbNMX4XHx?
zin-9vK`PwVx)YT(#vtT0ts!|n%!{;E8pG-Tez?kmiRmCSLQ8<9?^mKn^-?spn+#hg
zpTn}cbR1qg4wwI)1b>c8lH{61h^=0Sp6!$Hm|F;Ez%}ADIW;u4I|7ntn!>GOYqWLN
zgyok_QT5tcV*Rg;Exc)prH*=7b$1@T6f}V#^(w8C+X<7ZcaaO{CgI68FD!1=rHd@z
zu$o#P<kpl5cxt~FY;I)YjR|s4e`_Z-`O-=bfh+M-Ji+w@LU22;i6$o6L0ohWJrg;b
zwq_vl+j^5^1Wbe5o1+12FTtj&e8|y>XMcpq;qf(3=%)v#&`eGTrM<G?O#23GjxJ!X
z$x9Qx3ohW^qy+J=?vvpYdzcmTv}yi9Nzmd9FV}pzF~BC2oIF&8vt5xKK7W*M5-H;L
z7(SRvm*U5j^H4j}5lmu6!e4{Ctka`uc<J>qh>BT-*B>m#zgZWdW$|sil-|QWN%>Ci
z?hmHkZ!ZCFTLI~Ht0UX{i$E)H5`3SS2^)_k!J7dA8sE)8S17@GDd%yTc|Dd@A7Z79
zm%(q9r*!nK^WdMHL3J%npgPYV7w9>mhsAJQ|7kIlq{S1--%r_<3mqu9og!8;hqyTF
zY>@BZVKW!|d|;0SjjY}ZEl~%klHq(vpHhqR$zqxkt%5OCZ|RpegT!yXCcK?u0rmIP
z!0yr}y8S>Z*{Obnv?|-;(q?z~c<vBg^l%is`+161Mk>*!!>5=i2N{g4UqC}*SCUzs
zf5}711EhUrIJs)xOm4(FVjicxG`lYa(TZ&(<H{=XM@gQ$myXeDGh6y#^JhAv-vaud
zk0J7*o*b}<(}K-Jdg4hbd|vp6na61zdOT84Teuh|hj!D?jt}Ur>+#gflwj?QaC~;?
zJE^m1p%RUJIQCwHje4^XLp^7q{;4v$>@)|VFp@|8>Rnj*VGjDKHek$8101(>8DiQ*
z>T!Q9_KulBW}W{*(#I#DlXWUC=ho4Q#{1Fi-%2Puev}wljlmM33#!lifP%dNSkA#S
zcFD=mg_^_BzpD_XPx#=zw}*(lq8ZM+qsv~a8%J(Ltwi5!Ioz`Q6QezT3wg6tj=F9%
z!6m83aOrUim{@57YfR*EPiQ=*;WE&C8OL7Ic7kI?t>U>8Mv=>A-L&HT0?2xzfsT{5
zqi<(6t?x7dE!!6~$8rxC?JpqrQXh~P-`C^Vok=u%&i(o~53bO!Ne6Jzcw|opjl)Mc
z9M$c-@S(pg%i2uE`PnfTRxL#1?@vhQUvupEZY9>w4CG)e8JJs@2Aez&q2v8#qO$xs
z`*vFv3xV2b;;e#yyN9vYs4<{hI6h9eM_rVoQMbpC=;m(1g)_KNgC0}bazTR};>>#Y
zgWT!!uqoWN`;(T0kHO#%$7!Oe9EiI77~AzpbZCbaU8(SyO7XlQBi4~R1#F{PCF%5o
z&PHma^OAZuexSM;1!zfasaySJc5+4p`udj8alzJf!?}@==oyP!em`o6zB-#2ntft2
zzb(Lwl$%VqNh&%%EE2EUe+o)B|D)lX1oX?uNob?34{XE|su)D6OX^f`;|y@megkRv
zbet`Hz7?Xm+JiE8?*uj(L(s~P9MDP?ChyFEPeVJ&!uy6~Q(iSubY20yo~9re|A4ja
z3&5nY2@siK#$kTAx#X@%Fs(fj1FmjC@jV?p>Rb+$jS9BrE0@stZZnA7s7Nf=?S$W#
z3c!?$oj0*d#otbef*gK2xw3f!475(i@7!8?W#uwdnOXzG+p@_gE?BIhm!(}zLRei>
z0oF1_ByU$a9@RC*sDNbpAwHj04NQS6mR_hDF&iQVb#eEol}y4V6HFOWhJ|zy<G*4L
zovKknB5p6k2&;)~n9eg2;#UZ-BTMM8&6DY8)ofz>;sld2Ul$r@xM2FNtHi!u1;)AC
zfs0U@B+UWxKHnVsFU}$_K4ie*-3I8SRYfNUm|@osZRqTbVCqW#vZm|q5n<~rcIeA0
zY~07K{S{x+29rVv@OeZ9o26-uJ{MK2z7D(g9srZ)@mMW03a`4Sg7S`;SR~M(1Z6?v
zQ?TIM;(2IG1jH}k7?o4*BL_H;-q|NVNRXW=Q5iFpst1RlQoA%0SG1k<Zummp1ZPt3
z*qs>gD3sK#vBnYe1$5q<w`6MVQ4%v;9h6$f;AZ3ke1a^%-DU~IGv#E)CrxTnHHPje
zlEC;PTTDzRczFGBSnsTkX%RhSpZ*^L->#DIiG_Gz@?Scrdjcx<+JRI-7B0QF5F-L3
z=$EO|SdM2Q;1h$5E){gy(F3%)L63Pl${&8TSI{Q2gEXIKj|aVV$t%lo>`FP7R@_iy
z{p3tw$tWvE^FbgMJxPU@b_bZDn2nz@&8Wp_WiABh9Zj;IFNmMc(wS2qQ&w{cSG(zl
z6^7N~?6@SlWi1yl!$qubm{14%^we=)nh6=%xLBYuWdz>b@Q+@0Izlo}yK@07ez+&o
z82H|cp)yPbuA44H!6X@6w^9+G#~&la&Xt<PyeB_{?dcz(ELaRWP_=+2;<c@tEIWLK
z?xP;G=wl7DZG#Lc^PdhD<M)D}P>CLBu|wlW8%gM_QTR=5Ivp!90lTHNK*87=Z=AnE
zMriz`GDYrq{nRuXkz7Y_KjHe;gHu2|q>%j6j;00joKyD35}F>emAQR$IJ``n1q%0c
zId-`;3@&UX{uw!le<+RW-AlgKO@Zr@rg%o}A2$ZqLE`+0SiZ~>Uq$5F_~m?KZsc&<
zi{*i++LVEFTs%NmDwICDo=;CN4kWt5-Dni;!<l)xdedficoLRPHQ8FGInV*Tt=^Kb
zPvNxws|rXPKP3mKHNKlN8g?9sfR$eo>4<_(y4J`|Jk#nG*(D=~o_AKk>8jzJp<E9y
zrsc8HL96K0>{d3{fa@>2XuzIHyU3p+YrHclossP5r!&$W;7-aVGIDMficgKBwTUv^
zvptJ$=GJ-VIy`aS<Y(lOS}2JdzmMM5-zNU{$C=)&|3Lq$dEm5kBOF+vfKq7!X0f^p
z7c#kxmi7!|idQ9(SK)r3ZY_cPMwbz*mNn$~c>(l|G{9SPKU2lNQaX*R8JXnAk}He$
za=^h|5Lnp3<UCkKg<?Z=d%hN4h@)UzR|rvRD8oLxMnSn;8C#vWm34jnfh^l=46Oxn
zke?ckh2!Q^Qxbr~HtwUIf&b{*vrXi@nFQR3RiJxr&%mLK161x*6Qeog2@S1}nelhC
zVR2=yApBM&E`By1&F$QXEtkBToIF7GH`J1S!`kTWLVJ8YKm{8Qw=h;wQ>n>6J$!RS
zL_Xe6g}ME1u#W5P4USm~M-MN+Ifc97u+LA*=ZgC67Zq&pv>%7lOf``ZDbjfzcbGVh
zX~2)Mr{X**{8PZA8<y14c`uEaEwSh5hLp(w%Y)g39|owsqKsbt76q#;ZE;<kKAPN7
z!5MSjQa+atQDZm>PR36qiCfK~V#yWq?X)9#q@quH*?Pf}N2#dfxB(hUp3}aT3S!?E
zgWnUai1cv{(C2`~LvP;Dj$xW;?2rT>^^Cz9E%3wX0$3`ojx$sE_-a%Ut+`K-Z>2}~
z?{UJsoL2FsdE0SfvON0!VTfQ>F7r5E1>KU`***1%%$OI`;Oy^cypXO7b63Su*VDu4
z=~w~M#mhh<=QO%?L<?G+FHkr*5lZBalEK5a<nq)f6ds(fzhNg0c|SHW{NF~PBgd_u
zZ+Z~&Rv)80%E9_ULxXT%4SVTk6SF8`GH7qigkXmva<z0H-D2X5ZvCwSpPrTIm#~tI
za_fT?o0L(%Srt{^`oia|eWZhXk4J|td=6+K+Y^LP9{ZPZG<6}3BWF{e`8TNjUTwU;
z>Ly7tG@_ztO|kUI*JR<DCVFHxQs!eU)TX~8itm?0>Gm=b8#NtO{VR!Yk|KWatYc2q
zRxuBc?h;sO43PstS`Z#G559>k$kyN%DtsLe+a}n90Rzx27-75N*>u~_fj^<(MGa&x
z-v;3U3Pc9ZbHEEOh2&8J{)o(>j_xZ-{KUh=<6RZ3ZZ{)~o$4?pbSmtK(}RH57fFZj
zDaPQ@W$JRQg4oQAB5}H|Xt3)68D%~U&93NB(`O|p_c0!S%o_<s6(-1XfoJ;;5pX}@
zhGt6MWd8V#Y^wpE9+k*s^)n3M@sUk*$$ksia7zl9%RDR{b&ytyhLeHUT!`hWG$wJ&
zS(0!3y?&}R7d|&s#omakr7P5DW3nJ0GMA=c&j$fq)OLirT~pZ?2jy^L_9QxY*J%>B
z<%o^c>T7hIr!-{!9z~KhrE%kBN38DiCYg-}VCkPr*Zv&=bw#h}zfb0*eX$(c1Zm<)
z<!3b8$(z$!q~Ny0&*`Z5U#ZO2EkyN@DVlKM6jm;(M2GIiywdAzYD^N0{xS*9|9Xf~
zMSm&(g%XapT~4+?Qv!`Msi?lDjvV>5ne@LmrE@%&G9<$TZ+x8u8_J4ck@q{Y`q&M+
z?(egP*+zkQWXwO3BMgQaVaYh8I){{h{Y?VdSbVMv)aQo^L>O}5+xyBiw0(fSd~g;k
zBXe+Kk|xe|>}3*Y5OuENf^YvxqLvDvQg=1&-xZJVzNcXK_#Aq&d?xG<bR}~SoS`}C
z$3X2#7TJGR9@5$<wSK&gt36y25A{?K6@D)Jc=Srhdi$C^d1M9sy?8bR`zE5z#3H&z
z){Dw+b0>yFt;|;YwSwvIMiY-I3&~3b4!CI2K&F*sLB&r>9Ny)y(~65oow^SVOisXM
zYpqdY>j$d6=`qoHo662UVvPr4mt%3rcOoh64>mcmH1h2fY!SaC7dQarw%s{2zIQJb
zWvW5$B7m<P)U50ID6lWyMwb3tOD~?>i84P{;FlVAdfT><E_-&9E*pMV5cQ~su^d)J
zi>uy{$fY+)+hRHLg{wuhJXr+&YTsx>w-yvP_@ns471G&ZgSpiKD2`Adw=FA}o(6xY
zUCs5nxO4c8yO!7MSwNWG5~y!n0_D*bxHnJ%PES^$yYsJ!6K^g@Z*E{!lrM#2{U<~X
z_S;chW<^`}cQXmGli7QRY@uzxJ0{D1p~+c;M4@aYY`km;kAE6rX^1m~g^nX9Bnn8*
zU>3+qaNurl4iX&RBz~5$n_T=bob36SfwKE{;>Sk`&{DGtOXPqV>8e2`tLMX>{Zp`1
zKLhH2Dbk~x6Y+3O0(@LrMAqNWpo<<KBk5lsQ=zveOfT3En`FLHlET3ktK=}fY9}Z)
zd0_Z+H@e6mLr{C;Iy16sCyu<8!syT2NLu8SFs>_ss%h;aAD+J=8tF0krGw=>5I^Ym
za4&om{)hZLC=b8A4zu<Fd(ru45Pe#5jzqmQAb)Mj$s4aiI_t(#aLai>zJ_Ihhw@1Q
zJ^YFu+I$$yxBy5Q4*D)tXb!{2n27r<)5SrpsbGK0m8Rv?!4fw=)Jfq2Y`EU%1$vuI
zYEZ%EHRU9v{yrD@wG9WC-KJ{W&y(t>ip-hY(@B1D6PZ}51&Rk8@o}p$nYzA*X>aaj
zmFMk4i_ic@ym<*@-#QXxLc1H<CLt{;Qh<v=zHqL;oeYWxsI+e|G@hM+lgn}hJx5N9
zC(fS*ZYN~8wT3npS%lzk`P;;$;~RNV@|Q7hen5w|w1GdPi5oayvt9FV`m*o>Q|msH
zIiB^%CVrtIG!J`C=MOVt^_ZWe_k9^V?(iCP>^;P^<*tCcW}0Mhi#(ni?69%T-cPrl
z>|}->O2NO6V%T24jG-r&AnsWYQk&1vudRh(zDN&r`obxX7f22cexy@2&ZFUbmch|G
z=b`jb1Z4jyC(>T~=-v@=@VK`O(s-=`ds!j5d8dXZZ@)lpEp+E}%`fQRXe%OWh-Y*p
zBj9!Ha@w8Dxm~|30HVB*G)?)(MjOirf}?Uk!BLJ4*Kwf^FLyJQIk%WO#vjPL(G$43
z`b@%CN#w>z9rpZgDe#dQi=!qqkTp?viNQK8{KAD9NWHMZxt5fsXC%@K!#Qvr*H3h>
z)dAO#QkXbf1=meYf#YLRNoV|T;+%m@kQNuFHD@(wrD}oRk~+%wb7r5W?geK-CmFtV
z8FT9F6zJk~Mz(V0MEP4v!{M^1`mM9IxVq4JqVRn=-TZJARXvhOl&&k_$AUa|9dLmO
zCc$uQtt@%@J%a<THIWYkrWn&y%szcpz<z%+joC89c{`?DC9M@2Sg|gFoLzPS=1d$7
zxN9c#Z974Ay}9pjJsFITtRlM(D1p)_Phu?bg1Y=tATQ?i(8j8M#<zbB`z$bz)pQAl
z)K#kRRy74rhs?!gY1<jz$36H+Ivx)<C5lT=s6cq}DEhN6myKTAL@q6tplgN+>D{8;
z)E-SR-h3_|;`U+b6;E3Q^O+L~96(xj5zdlsBF`P-&}a5yST2{&IL|$U5gO<4%$$Qz
z^<Ehr0v+hRvuaev+5%>IZ=g#b&jxi)AN!cfVCc|YS};owZ^@bA^?MC)J202IW55bT
z#V6p3hrYNlbQ~<MD<eV2KGGrWZ&c>oV*&d{pDwy9CXOK)aARA!z-4VAdW@Y8{ZlqG
z4=(&;?VFdQ=C1)7yJjE#Z?u5_HCtiE^#2!Ip|^Pgx&Ln$7a@>G=F#)4Cg(mY{47sM
z>{DXCtxDjnc8*+9Sw!}Yn8j}XRYAmawTP%$RbZzwfvj7&p4qf-DKkoC2XpB1Yo^Uc
zhqT8|C5C^^sk&1$)3P9)%q;pMSY~=l>??Vb>sWNN(lJF$`amoBRlbXS;YX18kUX-$
z+?h-f9%NkaKW6;ncaWwubCP%NAYBvsjoE1?BF#rP5(CFfvha=;F}%_*4mvfHRNY%j
z&V7$3b%PB=_op7Ij7TIi+|HB2`M%`Q;9Bx$?;}Fo2btu&nPl6mOmg$R3vqeiOdk0d
z(n9(5%&Z+EvhUYZoBvO?!p#4PtspD$-+m4URneYv`fRjCA?*!K2J2-d^ym8`Ear3!
z`}FRi^Uyb@%lZaLuG$ZmHfzGH+!CgE<75bV-AyC@?gpK^2{hyD0V=F21NWV^&~*6?
z)mgX%?}=jJ&_pl%zIqR_Op_qbbc{g5>=6BTb%=dnHLp<_c9XI9GC_PNik=#OnG5q(
zguj)`v2?T$r_NpjUNI|S&D~7+sI>yGC?&z`6$2o4I!oLXlF+N#3bsFt<iMlrN%X-?
zU~g-)fwsNae()?>PLPG}ow6`x*jn6j=_m~K9>=Q%_vuTF$1SsW0khZ@{5t&LMD9o$
zwEZ+mo-vP_S~_8!X)1ZHxCho1sgbwuG;ytZ3jA0+qS57R2v=K(fu05t7TBia^ik1p
zao!%Xz&jhV0&j`$g__xpJZ+9@{9`z4Ng!yQUk`Kr_rv>?Jjf|6Aa5R>ql@*D(ZM={
zzBW8dG>%?j-R+G*zpaG6FIK>9o=-SHO#u~7j)cDDe(**xL<O%V<DtuOP#~Cx8dF;E
zY3DPVvb~20+qU76Yx~ITsyS%E)$hlfc;Ktt3hL=3gZzCmP;i!Vq20%*i_9~;{p1C`
z=3Y#8KN(3c1^r@H?<j!fJ%^w}n9pvOFMz7Dop4i%t3z76<1|s7bc4Ag_}w)my<FIE
ziOf=ZZ%Y-nylX~PqZ>r^Eu}x)!XdkPJgirl4XWt{I54A_HdM6G&q<U7_vho)Pw&x6
z7=!lOTku}+S<W~*2WFjPsp&R(cDZ6Nv!m?<=Nl-%o9<(%S=JC^rgsZ<c3p()u3q4K
zvKp?8%7xEbSIJ3(YC1wR4BLiH0>kR_WKjec2BvY9DRaw*%1IT(U3wP$jyXVQy~;-O
zl?S2q+dKR+c^6)vE+8w{q!FXvFNjP<7NmYUj%jJx;(EA7<AW?=)jw^>GYsbR-hTAP
za6=f#>7>t62XIq!3o~N&0-9{J9+zG{OdbfF@XO*wD0`!dZtoq31%I>P7za4l+Mx){
zfIkO_lz_jSPSRIq7`<_@9+MxH!JR3S;6SbbU-e#K&Vm`Nv^@#`cHAVLODOu3^^pbp
zgF$dG97?rwao^7MaMET1Hb3|YcXlknr!&XUowegZiA$mC+15ji$AmCPIZyCWqej}b
z%LMzP#?lL=acCkt7K>AqiO0hnEO=wd)v?5IDI}Tmg0H}BCw*Y{&7Cl<Z6bA4jl)Sk
zZ&4M7$WrMbSeKfOR*W&DnXnmawKjnMgPUAm_8A?vh~l57I+){@&YmvNg4Y+rAl&|(
zc*~Yz{>gc6P~S5Ws*0PDO}|PcZ*L%B^Gs=6TLvz|au~m4kmmPWL&)k-i0$BdQ7a~p
zgot9X{*6fVQy2$p54)iL2q`eTsYp!q;z1+!5^npr9B#<pf*1=qd?Au%R@=qEy$uxW
z2E*vhcLTJcFPvPry~x%pab6dJJqipXIjyBHx@an+cx@hdJKHi+{Q?x4*ns)*93+7!
z@ubsg>}a?|JOdP=`RaML@tQhack~9_(Uby9nKJY&jfWxr@W#QV!y9|E<!!fLwxO!E
zj&zHrf$cta6D-JOXukJt(1>`A@45Y)*;@k#_R8b460Vm&$p>7EAETsOBWb{AWNGm-
z_<QOoef;qn*zRAA<5mZt<it{RPI$*QXY=rgD;Gu@F_jCtsGyZ2gE${u9emPRg$eH#
zuu?}j<ziJ5w%^|eU!;^_)Rs$3%>;Ek*=+!~rldm2@16K<`*2`xdcvDb9_-U!MI(x2
z;P<o7<loeTz#Gtjj5E$q-FB2LJ3j~BoVZK9t|vhgrw?4ebTa)kU!R18yTJ7fZh<X(
zn*^^PEk0Ve5p>J*$?Tmgam~XBGJ*>eIqP``Zf%l<v;LcLN17d49FJrFb8MAODGVC3
zsc>;&*8hWBH1mIqtvdfXSV-)Ce_K4{vOw_u+d;vO4nvarNS}6_PZIpcR%Da^`!>}6
z2m8q4e;RZClawS;Np$S$)#`}Nn@YR!3mLl_(FVI$2dCN5`+IF)eo<+xa2cZQ8Bc7V
zRI1oMEInoS@yr~%)?=^HDoDkyyOXgK&GBgJW-r-xPItyfA{)EnLM^+vqnjIpY@%%=
zW|rBVuDMThe<e5d?#Z>)a^7ioEkfC@Uu2Fh&9%0PrVH(=Zl~DIU{~5*s5k@T+NrHm
z>J7Vn^Pe?__T@LNDwVVA8o1L~!xq8#%-gn(%3*fX@~CZQRG!^8@fbS~->$~i>D`Sr
zTQckvzku!I6%tL$W^8O6=jCOWyffY|a7enT+w7cO<>17|y^d*iBBK>{5uu%pZ;!az
zrl+RZjac}g@js8b<p1iJTl|k>KJ34adFF>%cK`7X$^U+Qqq0T>hJ>&24OzV@IMjdR
zCjW>+mvKujNn~lP4v*me6&&Lqxq7p2WU%j=5dV#ZK`O?J3nyod_>Ysa#($Hq`Ra)9
zE&h>(S(;%{p%Ky3)`my=PYaC-*%a&>8R;8cm?gV$t#62LWMKifSBL-hDGF2m<I?!Q
zw=m<swh$EgA20Zr^?BxhzF^#c?BYLu*M^5}UL713%xyB{zb=63_I|#hg+c0*cfTF_
z&o4G)(k?O@9K|cIm9HQ05?Rg_-sa!>kS~;7o+k8`)2sI_7*juz2^Fnf?JdkY62ygC
z@I}Ml%&Q+|k|;VZagcx4#D-sM%@>_&pT>bYqJ-8PlIr!R-4UHr*;U^xUoJAv7mK=Y
z&*vvB59S4`5AZV@1U&hGWPbEG1yP2Xl6CnVq0s5HG5>Slu=+m#QzDODb^I~9vqfJv
zX9}Nh3Fl9WO%sZT>_j_jnnV(RMZyA!$NXap^sUSPP3B4Xuc$K~67lnQ$O+p@)$6y}
zdWa^4rSoST(-cOI59O!(Sc`O*Df7*vy?JXwj6`Co-6HV2CpzhHT<Geg!}ocT!vD8h
zy8ip)ul#(Q_q@gGjzZ<n*F;Ut>qNS><@}P1DWbC;FY7)fZWUELIwLx%G=lH7H&H0w
zwO@2^NRRhtu$eb4Vp(0Alnw93_zS{2tJm{<3%;L!eo#&{ftP1Bc3X~cOP7(TQMO3f
z-`pZJ=^5f#c1Kz*6pyhk8R;bad#FN`w?F2*Zmqp&_I7FB<$d!+3VtH10EsyMlAcqw
zf2?K+(@u9<jWx6sEj(si*K9jdbkq8tsQ#g}aQ%l={`$k>x_Q0Tmg>*Pi{j5@@V&lG
z;APD<<kc1?pKt9P##5}3=3h~Y7wHW56urEWZ?#nFg|K;DHNQDwyp__V&Ahh<{)u{f
z7xNm|`tVF0_VFdxMTyeRKNKZpTo)c_xyeh|6wG)2+RoEmpDM~Vg8B<3n|Ol;uY@}5
zuL_y|k)qJ;o%{^fI$mDDoO(Hu!JBLnSTBG5eC@+ZQ_<5uFD>(5oE3U4dBa=t)>HJW
z&xikd;SJ%PzNdWid-D8Mm(TM2&i4!3&e;pYUNh$p2XJ4!@xAc0)Udie6+#g#lH@=A
zb5+!*a7lD)zmNy^xjX|KL#y@!6+-b?Ml{pvCC{KGT2wnzn}4g2FO+y?&a-Oa^V+`T
zT3xfcWhL_?u1-qhQr#pQ<2utF<As4g&3RY0kL7>A`K&J1y__dweXGuFU=Kg9XN#53
z<U>MNrnb)D^1ZtLbHnN!KOQ^ZW81=0OO6&+2j&S)rjDxXel|jwQIjv+w^~&c^XshD
z$eM6r#`c4}96f#B?Y7~f>|a}X&oU0zDql?CUEMXpYPec9zt&&KJKe+=Svwh8<qHFZ
z>TlzCXYMueTI@Z9hqs^R5ldeo|JFNRM5dJWmb+z^>TYlNWnTIGDmf*7?z$qO@?{Nv
zO<11jU3(Jmx@^4Y)|kVh<3XkTfIK_V8kZE_VUth7pMJr7`^LwjO@C+BB~9(*@13}h
zZ*c4zUrJTXJNcE=y~>lYPL^~LDIZ=S3f#V>{@cO3=cOP0<{y36$@lGh!q59-!k0bu
zlD9tX5ARQgiuI`@4Wi6wGhw5CwQ#`)8NNbn3{T~*9N+!t2)=xauE^88jh7P=CfXk{
zfp=EAM-;s>x_(EJAK&iyBjLH8@z(2<+O4;08a3R{Y2poJhg$#ES!nHZB-XlW$(;HE
zj|0{Z^DIO^uZ**i3vsko+;y=&%&gDaK}lC+*REiFW%6)q@2=0*?`zVnKc3iY{l&FQ
zWa*P{U7>foe)LiwYu%)+_1TY3@t3W6C%k`oP}tSo#a~_|EedTuT%T<FO?0tpjJ0e=
z2LHE~l+BLk|3s^dlts0R&8-^}ehKg2j;puG@8mo3_FFS02l>_>(l*1s=89Z$xA5jY
zJ6M0tPTBg9@z#2IM$G@ONB;l+=YQh}C{D8#zSoPhwqG??G@x6`Pc3oc)%hTQqpYO0
zT~0UeMOZjL#quJ5_Wj|)e6{C1^+yK$f@$4Ch2a<ZH6OzH);{UNC5HN<%d66aG(3Rc
zet{?2*FU1Z|C&O5+s6>$SF=RX|HjAoKYj_CKNM|FR%ysKFXN~2r}2yKWLsPQo>XsK
zmRx5A9oAl>0_vA<=-}N}8e89S^|ZBh&U2CVlo)HHAG-B&>(7d&fAkTKtk<vKekFmQ
z|3SZgAYQ|IEU!u=7<Q-razM3p;wVS!|Kkt&zp;VTwzB#WL310_m+Z2ZP#<F*ymyz)
zsa&7>NvCD352SsoAOCbu{o5ohYhn25`mzNh8}80KT>nJIz@~aHE4r#RP_K2lSJcf<
zXjn_v+4$)b{xv^U>%n`K{31PhG4pzijnDON>kf%*A$jiqf3f%GQ8h;I-+u!Q8Ym5#
z2P%qE>FjHtq(X*JLK#BI5JH(lG^<GSD3S^xO2gULjtmjXP#O$n%$RuyrSP1!exE;n
ze?Fh*59_<0_56Rg&gs7IbKmdlwXf^7(dKd_u^+sYJuy!dyzltS^j6I$gU86y2O5m+
zn?Hdl`>EmJymv70_E~08dR!11r_HQyYY~-a{_Lm6VZ1+;iin{#3$gk_Y(svqzB3nD
z>WXFLe#{S&+%$||9iRyBtmGi=cqGb(-yvhBw~oo3+6gaPj=_8X=Yr!mr1;^sEPQ><
zk2tyNizJ>%LgLCQ5s#2#o%KCzzqx=54-7+%hvUe)SM|_zB!hwIxJWT)CCjy63N<?G
zSV(9K+*Q_N&xRf<yOuXl)YuqCwz=;UXO^cZU*Jr9*33k+1()EotQlMlw&azr6S!xm
zI#m`<p;>`aSUVa8&b?DWm^y@xF>68RKznQqNXJLf-Xiq{9slIsjkxqqwB<<v1bi)k
z%DO6$_n$}W9U^#$ry;2xV8s(As)@pfr@*<ka2&5O0WwVOU}E=oSmSUT)U(4NSo<^x
zM$e|@b88{cON|zW93fHLzhK)MbsqQODV}mXi%q=?c}1iPHQBfV@4hwWD<xE@USBzQ
z+3cfwF7Hs!|2h0lG{C9G=V7?L8Mau>!YdggV8-VbGWEq``sGNsUAQ=3EYtDeYP^MY
z|L6zh1EcVLjXVAd*@kbkKI3A&7Q1YDJxDqKj0c)YlH|ZQV4v$xb2dstsEIP{Rt({;
zp~<}Z&UU(Mwj_Tz=MEZt+eZ6#sPVf+ad7mVA|0ZzkDhUxMyp*7=pXg{=vo;Ae)E(0
zENdAWt22xjusism(-*JH1Fz`54j(p}VsQV<kQXaU`PTl}6W&5Lu9bo}_ru7kpgDpQ
zU8U%F+5+9QzuH*_SyB%NCo*z!Fiuj8VS`O%(9D5}+;X+~@}k@L)YFBZ@JK`7wM963
z*9?&BHlrOsJfQG$AEY!M!_7JUsa@GfLGHebaN`F;ewG%@OUy?R|AI}2<OMPs31HP|
zPi9uj!MDRXY+HOk9`^c}pvLPWEcc8j?{_G}Czn64c=%M&?ugZuu2;gLxUPhJUJ{Qr
zWpSh@po&<2J1N+yEk~Z;5(t)?CZOf(FidY~2O|N8Pd*CtVE1eGTquAGad~V;!!N-~
zX$@?C8z*Q^i6C~G2#spGm~d(V7Vm#w8U5oY3l5Q|HwW#)x8<Ap2epwfc>WyNX3!^?
z@bx@Cdh5un8_wc$nI;$>o{Fn|!{B3!FZw6%Vdu6fu`^MZShSl9QC)h5ZO}3kxhbZi
z@1JArjj1;n_H6^J%wgCb9l%bf-DV5d?Bf-l(InaY7I@g`(%NgqQ1JX9p(o04@WZXB
z-;zaQcA9bYF~PHU9N^OKVcbOKlEC(eHCCw9f~##WD6buj{;%S3L3tv~fAELheU(i1
zHK_6otJkDM-dA8>XAJgEcG$IH5KKv44|9V(1Zk=5;NoYF#*?@4*`H!zsROV%-)(S2
z-2yPYHyQ7IwquSLGs$DC!F0j-eV{v_KOA3Wi8B^95vS}4+&|X|D~hwwv^<~Gzd2V)
z%v11f<`nR}H4oi3Ccxiod4iYfBSp$<nlbr{DGL#eht6wP$f=ZFAp2kfII3M>d$imI
z8tu_!$>Q^(_sKt4totU=Hj5-i`%BrM85$@PDg%e*(kj2Nlg4sk2+Vbs<F`A!m}lK@
z^7)!C)=b}u6SGCQJAMUQu=zW=HS{IaAH7ayZY;#5#kD}MmP6g6eNcE<oqly50Wya*
zK*^vDsy`GG?ak8cgV!ylA0kC>)CIxfW7j|>M@^)a-wN;NXW&z7VB3`^;KIS@@J>%O
z#11NDZ~PpfWqb~IU1iP#&ClRc)19c&p#gzEMJRPJmb92?fSqKPD9W}UYI!UJ84VS>
zJzSgG9~}#d5=UwOQ6aQvYa9$$(cq3#;;4PTBeoh3$Jdi9=*VklsI&3|N;{^~5!nG%
zk%qPC5q$+Z+A_hmCWsG|a;1feI>MS9AK~}s9ki<EBG0jrrE$J3{OYIK{5LnP(k(Q^
zJJu7$m696UJSvzAvpi@WKaBx>=`dno4)wZlk>AU^N(XH^4~bvmss9uU`deZ$OnjV5
zBgeF2qV0YB-2W52z8gy~XFP^k(qBlBn1}Z28PmS~rMz?4WO`IFmHcnKj{n~w|2YFz
z^=-sg<^rf|hU2MsM^WWhp6Dd~j47Ut?CVo)`s%bH{8S%~KR>NxLltCbdFwt_)U7~+
zhj_rFwS)zCl?ys|-C{ngHMzsa`>fWe7y|z;q&9&SP~vYw=XPHFC;95hg2TXf=i!9u
z7F^}kB<xOFNzHn#Y3!c?qRo?4@Z{iI{OM+0tn)XjS`(kh2lUsXm%5Yrfh|Mf@GeR4
zyf~g&Rb6A9D;=q+{U(Tf>C8I+WZ}E%a<p>rOFU3n!T-fg)3V_hei+z;zlZhbFLOAT
z43?`(TKa+)?CIbED-OUc2R}aL>2`kXd3{xB_7y5spvxWN-{4_^1ATKOfL2w0<cSy6
z$=16Q`1Sg~^onOMEhzhlt3vYWiodF;)~+FZ)N>WI<5bDx++~$#Chix-@IkP5(MZTX
zBLn9HT}5Y(Nb<#}XApT8cQSC%TzoJ~+*rBx80O5ju-o~@4UVa5z&Oj7m32e!lQmQJ
zkQG<Un60#cHRWoMPM1>A8V`A1nRppro;pYD@=EO@rp(4irO!ye20fY&I#k8-K4^Id
zgIC~SZWel5Bu@9R{(F&V)?FRAk+c|h`CS5Q|DB>MztiEb;~SzNdd+qYeUAg#T)et$
zBJUZs5pVR*!2`07$cr1gbgXa(Z;%?m)rSU%L>rQ*=95MAUWz<jEpY|Uj@*Uzn+)k~
z6HA`vsD{t&4e9aMl2{cH4pLz*xLDJcepY=%rq*48U-!P?i8)R<S|fq8_;d(tDflNj
zvE{Ec(Ro)xg8p0p)uHBKlx0G1`0wL0i}m2Jt_!~vFbb5eFToJA1gvlyNdJtmfx+Je
zRLjr~Ou7?jp?!aPQ=@>6@w>+C?y1s2uNH#!MtM54T9yCp+fO!saU`0088|xp1k(|3
zRNmfE3f`CZ(fr3#sMWmvkh&&=))krY@_S22rJ*Tb^I5ja=Fmr!of1Ty)NaF<w(pRv
z7YBbAzJ{4&N@&KBNJx2FjE^V0WKX^y;FVkQ=xlctdOdv%&7Gz}H?DpOMBWT%T{sS;
zcN;x)b%;o%WE1rcv!O574`8astN0Dsp)~XTIduQK2Nq1K7n~VY#In>kP<rtscr9~5
zm!H$wuz{NR_p2^ST7JOo>y7wiYXcbNAO}ARvT?N06mGD8EO|eDCa1|GFx%G(umB+W
zyc_oQePN-N7C7b0dOl|*!OTz@+OPK~E1JCpghAd|>=8lFG)<xrNtrMq{SiBU@E{!W
zc!t-^=YlBMnI@wN>XwcN?}^&v^^8bJZJIzeWuLJYgGOA{(qs3zb}z=XC9x)@X583s
zHaF7QM>33`v)h**fL>TTDDQHF)V)(7Ha?gJ7%hWGy5mItAv!Qa*g}q!^ukyihkM<=
ziyp5SglG4ru;i1&vETM%kmB`7^l_64@TN4F(icN)RxKpFcPCnPJRv{o*qF)?HAtx5
zBe=0C4srE0GN^@P!O3y>)P4?H#7VR8lsdtmb`8v2pvw~Dv_%T)LD*Vt0qrgeK&oU8
z^i7yUUCj2<=Zf_}X4}x`ULm;s+IToyBY>2o4=CysA=RmY?deC!?96;}e5)(0Zoi3}
zEnQIM<6`D+GY&uI58+mW#XF4;(xfXxe%gr|t=S$~6MCu8f$LwWCu+UMtkpum!ww}1
zToz4Yn@^~~wB+YZwqGcpK2ZocPgb)GqhnyQpcrHCPXvq6`F79OR%79(o#^Qw#JoR$
z#+fh2<F^_g95Bt8J5Mkoxiv}TxU)HZo`lS*{4!hUJsiIK%>x;yVdQ3y9-T763X8LS
zu<Vo$ZHitE2K6SqY=i^b5i^Lk-dPGix_?2x>4cov7%f=iG6RQs=m48i41Y%lqo0EY
zk?=0F>u7Nh9FVt1$EAnhW~~|~ng@~fC1FssTZd^t9vfMFovioqCyg57O`GeLpy6;l
zY<rvlT?KCJ)8==A!?HgG@rTtweNa1TbJ|0eRY{S(j_YCiu)U;vd@)w9Z4rzpFsDZL
z1Ne%nt#qdQX;IQWO$cZ|Kw~z45RIPr5yI;Y>Aeq|z{<mnAJ-@+7D_uvkasH5U|rOS
zm*KDB9(?MNrgC0;nY^S5yuPCh$9~H3EnSJs$1(>_g`OgfLo?uRcQ~}9X!4T>W`Ux&
zB>rC53)OQYu-IY}zdPBG2KZOwivSBzdodx?cWBUxhC^um)|PBoU5q>WA7+_{<Zx2>
zdg%L^z*ek0h%3Itkng!77#Ze{iZT>4qV?#7Fh|%hpH%s}8PPG7a!l2sgnM-w3IFce
z!j+yW&=605>YccSTCd+n=T54oo_|xRiS1DyP!z!JR;JS%&`8ylkJH5;L~xmZrtaOn
zEHp^OpSwv^85+l6%@RFceOHwq&pyQEbk(5uX*y)ltML18CJeK5!%D+<@Jd#Sw~5)~
z@iY23EbKHczYx!2N<1)0cMDF*Dg=R{K)4@Mu+N}`hjcBbpU<Czyu(3Et)PW(?;8G3
za*0c@kf-Sn7MQt)<0Pg8%S&VF{P>Arx73GLO+1LlHhMw%cq`npV+W?6dWoaw1Anpa
zJw_gvq#+&yY4-68+;W2@>TD^&G7E1K{=pscix*+;B+8?2*+TfhJM3TNk{dRiU?$&2
zn$-%?TQwK9>t}=FkO<QE#RZRTnaqbm1ibfq#$?wVBqhVEiQ%1Q(T;9Y^gVHkeNx<y
z(JJnkKQ9KEs|tkiRJ`8$8fEfw;MwbZ*tW`%w{}VYlXvZld?~u`rv)@th<I(nUu-oo
z;;ofgbnVxtOnYS!(cg6lI*QBD{pADfI60b@cj^jX+*0OGj(XrN!*ST(e>XMF4(3{t
z8w5rZOYyws9H{R2ENcCoPj~laV(-GS|K$Jp;Hn^8@Jf+xdpDy>^^kg%#KPD76ibD_
z5vMWk>_eCpqE6*sjHIsyN6>kf-q9x^Quh1BEGK!?(5iw-XJAphtuW{ODk@iPz{-A|
z;d`D8q?yZ}QjKzL;S}rB+&rNN-jAvv$A`USwSyW-%izzj(`o`4vU4%$Ke%Mqof*d(
zUtc19E_v9lu@L8tbrx*!Pl4&PQbnN;HlbUMwqR7w7h++HSbeX8H8!uZ8yjj&mi%yH
z-?udj=5IO7j&J_N2Gs4x(``XuxlxWQM0SW~X}Z&`$Ni~AmJyGRok`a;S7XBCMQn@E
z54$E8<M8tjxQ~;Ney`AmwQrN?o!OcAFuMnq&FK=fpWO}TS|!2o@G02zH5MaFym9?h
z$_qx?LF*V--mU!@H@xe|Z)Bc@QRb@bnvp9V)=>{OXKz8q48)XSHCQq!0H58NPd5y5
zg1NsEfR5Is*<Q=3?DNI&=k<7W-zz~=P7K1<Jy$W#`~ddRA@uvg_J2}yetV}6>wPXV
z&tW1Gq;Y`sT}!5ZOE&XcuQDOJ?GR5GlnfoZ7F>7PbfRq<O}pFnlk~yE==asW_~48-
zb@F=;b;>HjN)=XVT(F1^=-WjGMuyOD6%stlEdtt4o1n*<zjXVRQaW(MSA1o*0;O8M
z;+c~N+0c-;U}$s$Tg@ZL_G9}QQ`f3mY%mFWCLYBrCwGay4=%=E6*WX3GU?FZET+3g
zk&Za}nHF#l(8?V}LY-S6vn`f-{XRzG)Hb2Bw*j}e9R)$j!DQXvSVqG?VUXV`sxUT!
zuIdfMKC>vie0D2q!gR8L%Ta?b6G4C4Ay`ovjz3=QgV4Jzf*jlVIAX6c=9;+i8#-Hf
zDi7gz+QazE>@Qfias<6yV@y+|bTLFcn^v^14{xt&LuqMwxKd-yTRP<6Dk;FbEmL@!
z(GHxpsgOJy94)e~?h;%`Um_Z%pHAateZ}%iZLG-lhP5?wneX{@%>J_g0^Cg@F=!0P
z?wATmyHA1oE))JZ>=bIR7>zen&lBDFRCc_4HyGbhCo_f&g0|v`+~@fnqHdgs6F<bl
za%B~6TJOdt8Xtyf#y2ZeYX!7l?GBpMHJa)>d&B<W17ViUc)Bh76>Fao2^&thie^QY
zLWyKGTU=8{R*hK6epf082G6oVB5W5~2CKo&>d&I(e!GZ*$u%aPL&W-*7VI25Sm|It
z6rbs>Af3L`&?ZYjlrm~rtidvPr6ft7U3w<k7&{k_u3172z3)P5*db9zuRNfb4Vy>g
z!N<_6&{XaTITZ!0wrLa_;qJ~Sd&k4UwXa1LNfvDJ#$Z^ZHXP;%m2vK@EVg9-SQt-+
z(&*F0SkTkR_@&Q+X<fG9vvf7AoKOsdHVq)A*Q(*xz7w#n?E{<Y8xHbHQZz_Ymi~~b
zM9I+i%zD!Z8g;@8l=Ke3@rNxSZ$1oq0!!hYew&~tPlVm7WAULz8UFE)5y@0|@bSj6
z{OMnYx91yR|9kUb{l!E~kiEb*sYI}QGI2OhD-*9~_ooFrHR<C!Ytd$BFe~CQyd<{{
zGVCc?Ilo(^vrYoy7foUHb^%~K^95;KtiT@qU55Q%sq%J_4*xYT8wZZk<`XRY*o2|s
zII?vS%gOl3^0r@KCNDOVpC6-`^4{z0kAp76WNsjrUpTPEvp*8=BSL06SDYiS_OP8A
zli2+ex$Kcgx!`5#HzKIe7R1cm2WK@hFlz61*s(E!c=>CBE1bih^>VyAt`Jv@cNetz
zy=SuFk>D1O_<Heu=$aRSI)~EX{Zkco`nexukM72%NfMw{p3GciGs(wwU+~VoJrz6J
zbzs2Iot0kS6d<4Ekr$<Y(0Jz%NT;}<!D$=#7nwmV)PvbK)bUr&%2kRoRn$@C5}b<@
z*Q=-RQU}+YyfsNqXubU&tgH{jFT=mUiGAy^qalZCEgr~&ie<U+t+iD6BuF^db0-8n
zs^&T)U1&tF5_R|2uCkOE!JkWr>-z6@Ts-hl`8X_$-!**2lH_#g<+HnR)sN|zvL+HM
znzPBwC1d#W9kXaacB)`{v?KSWVRS>^bV2|3f!M^(@$WM2Fz@^k_Uv9Gn<f2|wr%(c
z(?dt`L8f;2Wk<$8$ydHM@;I=pnNC$}<bk^>KfgJjHfr~$_;m?hG8a<cQKxXpVnx35
zKm~2py+dGaJ3mq=q?(qoFrafY-8JeUF_3qoe%ixmWp*6>v?dkDinC6=R0Iy(Q%71i
zME?_h^)K$LfAJZvr>Ekm%u1H#JcM^|Z(+|8RC&@QGxnHGW|zNjg~oY5$dI`wNO^IK
zZK;<OMn<-ZlFwzJo|sQtr~hU%O~p0qQ7=r4@x#cY=io}S6#urvf|$I1QCV_nH!=22
z{wKe|e{rtprroSj|EtJYY8p;Iro;?ZO~Au)J|ahxXi=Km5d!wZ$RGXZ0;|hPww86H
zSo&OL(W>6-ta_dSm@1E8Wi}TDSG5wEqIloTOX7Z)(m+deJ+e$NJ7Y+t#;I~aSnDNn
zF5Q=|?Xscc)f!-@Duw&G>zK{ARlNSkFnTa27al4(gO$`Z!HJ)WRoXL}$)nmX*r6E#
z7F?bly&44{|3uMA#n)l=Q&*5IJj^6h3&`ucN4V8}GaUb70OkcXu%a6&Fk(R*&e@p;
zA-WR0q1PLIwkz{V_yzhZ8`#Bbj<_M}6K?YlVH+f*VI`RHg{PX?9BW@bH`#$3{`ynd
z^^q{MaS^coPZxA~T_^W66j0So2YS4Zpq5R9=wDoqC;B-=bie?$r?-e&<a#To#d^^#
ze-*iY#8a?%o4}19Cc@5n;+(W02hK)D&?iTpv0aY3bkO~Mn6lM@wtl@03bF%*F`5Su
z4zHrOnL2ZPok9JY!f<KuWa=WNP7c&3;LZawyv!|`4vrf`Ls!ev_w9ekki~6Gt1FM$
zWLyW^S1GvCy;tDf6b8+MrjtdjY1p~vn<!OE6F(J5(u`0AZZ~T<zmWBubt(>rv37T0
zhS6ShbdyAV$%jP#*;JbRO%uL5%7R2fpJ>viRy1?(6s5kZg$X`?(f6_&iL0?d)pNZd
zjbotU<lf4UTU7Y!&z#&d-j9C+fgG>Zr3al~;IZZ!@$Yggelpnufzc9F@I{JVb(gmD
z8f3{9JTAwk2SSQzYPd!ezzXKCBrg|Kh^8oSVvApSkTcGJo2L$-2PfH71`XCF?HA?{
zLEQ;a-1pN=a*>#W-0c<28<~Z@+bnQ{l90`5I>oH)qfueBB^KT*VcOC5%)20g{JCLB
z2S{tv)}NO^P&yR7Z6eu}R)F_gmcU%exkP?lD%v*6LznMWHlXz|c6v@@-37^PN5)m~
zNs{IBmG|S1bPr-YYZDOj1ZbGok96KV&0OE@gMN$j+3HKPSp0}^;;=uSd`n+TY?qag
zFE;utLqQw!YOa#Ok6$zM9Z`1j&tln)h(~0|h%M~$@>-Jrv>zKe?*hAKGeVFU6f3gQ
z*&>p3vLrm|UF8XhJEXDX1&fOCAWO1z@V1u=ntP|<S08|^@mdh)sRC(((gd3%rm~$a
z=LLa*4y;1jAJvr|VOP~d+qM4kr2g(wHc{T3=qKJ1jJ5D*<{DP4aK;hAq=FQ7&3hR<
z_ftokye)9ubpZNAtpdIJ1Xg~1G6~k6Dem2SjDMzO;R?-xqJz_xp?B&?sO;87x9~mW
z<knCa_vwjfXuTA+h~JMIHS(Bj>Ih>~V-TE%G6`!d9Bes=<T!31!yei&C#?<OB9aEv
z<Zoae>MQQ$%Y^o}XBfCEnH^R>3J-e^!R}*|$<*%0qNVv1+ddGs<6H{+bl-^TpDe^%
z9>u`7If7+DhrmqEncD95MxD8zVXH$fzAG&k+=zK1xFT)H@-?(^Yl;V0Eo>G9D$fS%
zF$dvdU=Ap76WsZ5I-9yI*zVD}OKk6RODMg4kJx{mN$$_=MmM`_>^&3;Y>Lf^^Y9eB
z)<2bvj8wJFdo#<HHcuC<Z;^%G<@d;fTjwfgRLx<0)psV9^ib67dKW)6dyzX|y%7hu
z5v4z}Fw~$3^Y|9KqDA4Al_pgn7wp9pC;VZ}{mzI+&q^b<bLvsz`3DkrXfmM3L_GNR
z7}@EcCs4n-oGeRghMjc<umf`bNnPt3706}tHMnBgTb|P#jk;SFP%m#!wj*H=_Nh$8
zC&@2id~75iJ^3_G3!cGUPceQz$%_8|LfF0EbGW43ULNv%fpBC|Esm+U$vY#Rg&CQZ
zw7@_PCCZ!mF4Zspg!TQ4XS=$`VS4fWCOYKo6q;%CSFAfO#}&)R(5IR==;XD*bjakb
z^uhYE{J;*Ss;E2XSecYG`L((S^!M)ppNrbGeCu**KduB~5?|Y0ZcqcMEw{09$qcSF
z_&Be9*(fMql==U1;(u`=XMESDcQb3qWihY+wf3@T#iw^@^U?tp-+l~_=6AB1aS;N!
zaeK)D?~5dB#BK8Gq9V#%9l=&V-6^Otx8e_cL%_B67g)uIk(}!hFz9g<QFhkEAMYN(
z3FBrm-(|@EH92wl0XImfk|eTq9|dcsN{J@A>S2oYPLdX<29C)(>|4<(JH;X{P;rnT
zD(mIRqm%Y*{vtn7lGAa!=?SYa#i@wcTAD%D@{@uazNd-S@G}Cn;;F1>vI^8*QY415
z)5sXZU})5wh|#hEblg-AIJ~)>=v~dnHOC&2K--!82yTWn_3_*~QHRz~S|qX@D9s~%
zHlyFUqlgW!$yF(3*l=YzPJYs#W9>D#@yr@kHqD30h2l+5Y>dGt%aPnDyd+q2MhVWe
zAiPVs4fA4Z$$p7LFe&l?nF%`3n9)PFr!2)Gw?fJ7kV3)6j5~t(G#7MDxCk=0_6TbG
zN0N=@#^B)d0k!T9gMm4!aQRIP912Vng*?@ShU$OQ!=kWkEM&_b#V<{3Q0Gc2-t))+
zLE(C$`!Et*PYmK^W$7^7dJ`Y!p949&=Tol@4XkF-FxXBFcxHVCm>TquEzwA&$6QCB
z>=YRED^p~VS4HhFi2L|+t!Yx@OsK4!OVim}jPv(l2irr)Y|XhaQ__jgqyY0J>Ot*#
zJ{;fX1h-X_Ny}Ibqzm6-yksshzT*qIi3g!uF9G@+ghO^ustAU*2)-8$qElBa!okb7
z(G~s;`24OvnBq_#B_-~olDA?;PV-=*-4=MLKY;IL`p^&rxcI>$>h5uy?)%}&g9Gnl
z%dQ5tLh2es=4~e~BM-r@NI5?2yf-`*f8N&XkIB*FfuinxcKogPE+{>}h&RsbhqEP#
zX#AfB?5Z&0J7mA3aA5&teB~%3^_ffvIL3^17Qo7_BGIosmOO85CF;ld5~oW>%))LD
zd+Y8^S6e<69QnB#%$!u1+RsYCuJ00bg}e&tR6hZs))}_#YXfW9WCQ2V+<>2FoS=Dk
zI*iU!WG_1ma9wXgrSVl0aoxC=NEcobDPGt^mT0t)>cCIT;LLuKw8D@~@USF*GFIZ_
z9e+jUxshb;jj_zkc!{9RR7o5ax5NHf*7){ie+ZNch3dcA=u+!~)S?27WLA)uMsJvJ
z*E4c{Xa(7GzS?dZJuXo1IsrYuGNC*(1~VQ;ko=jGKr&`F21gykX;)&AjI_jGYTpFv
zhoXt4`C1%s>Ne3ayg>|=It2~>4<Y~3LE<H?PEQtS(Uf2Pan!ueZ0c}pc3JN_INJUe
z^m#6TPoi?D@pHiq-}>Xa(^aCA{TGw>8nqbxeFnAiDTHs{edOh-LP6-kB$0`1FF9{L
z5mG|8W7+kQ;D2Bwk2_*VYm@^;3U|g~zUmR;-@J)z)kuMlt>wgJaT63dyoaI-+WeBq
zGf}|(A~ry4Al+=93WIk}g_qGIxng#iU4%*!Z@m}8a`Kx=VEI+NP_!L3I#-I0>3<Pb
zolh5q2z!{3+y_C*<V#~nKLa>^=#=Qqqy8B0q=~M=QEbp+Z)i4iB>o>tDu0hQ6qHOq
zC@KuvjN_hF5j}@tOzy%*!CbdRFg{0tJwHE)y&t9z-NW~hGUq>HwlGpW&m~O`{@ji~
zt_L%>FUiCqD^C>qXETnL^=7K;;zT8Sau8kgitN;QO*Shh<EGDwWXU!YbiTMjaL6bD
z?5q~p8L$~-a10lW-rOrHHGM$JYZfqt^WEe?!(LnyuEy%pP6|4^<Jj~sfp)#FM`7^Y
zT7kshV$40ZkQBBxvoG(fz~$a7YB!_|o9x@zt6|~nDU1{te<-gQy(R)@m)gPK=lvk(
zWfl!RvKfl3obj^peYA0M#PM}65R6Mj^Tqs1Ma-`zi}}@hF~2%1=2thx{HjFEudays
zm7h4D`-%BgsF+`Miusj|m|vY1^D7B4zmgF1tC;`rt9fF6bym!;ev0{((|`EYd@;Xj
z7V|4N+DK(Db?_q5J#cK)pg!qWK;=a@t(kL(o+v2hg-8F;zn5P_SoJBYsA5WwOkPhO
zZrMRM-I4{Fa8r~jl%S1Owe<6~T-aM-j%@raGVxC)(4Z#%YH2j2mBhjBY-w<~cZps4
zk-_zk)UZ606x>{+%twElF4|O*&WFoOvUwK|LR<P`?CIBpj!}|SJTgV^-I>i>if!=F
zfZ24`<E4Vwl`CoFp%Gy8IRft9I0OBD+yuL`n`yg#*gshx{4XNR|L8-X9`u6eB1;;|
z^7sO!Cl~<Z#OspN_+{0@RQgR37AVW|!tQ3a-FP@$yM2ir-*l8loy~zux=W!!CX3p;
zDpTt42{P=q;t_3Gn*70*40XtbAm_5mEd6tEq%-B8<lQyXqxeCqR=P^Ao0qHP^P=Y~
zgs{{=xVfv4w||ll{#5aS%L+?5>B|yYjPc~Zjq|Hc?;0ySKUA8Qt6ZWpKd<9!B_|3;
zub(PBF{FUrl6B^rpS%O_s0^mql1`hQ>i$Vie7`@eYPHW4de(kH)$TOMs?Hr-?B#k+
z2+wzCp>(OSaM|eGs>gA5RlXa0goQRAp!c=Q*^f%ogdd;v(YIxNeA{V5?l0pfRN8n6
zKKk_1q@T%EQVXtg>AJ(Xe@+)I{j9+s4X|NLUdqv*-zs3pMkjL4r-CI7Jd8dPoj7Gz
z3GDEA0AX;L$q1Eb@a+Jce&Z>s$J(<MBPU^uax;b>J<B>qU4YdcskCB|4UjRaqJ2x}
zz{j8>a$~|$xMihBGP{<5{jWq;eSA0W@Z3jc&jWEU^Gl{RVgUHbGg5NnJGnBq1*V+4
zfuUjZS#ReSsClG{z0u)p{^3r+swd-N)W-<u@!bovI2C#9`NjHH7qcBrpV;n0^Kiqt
zo$RcG7mHWi2j|ucxB;&t%d78_u7$Js#TAXV59gc}jfh`LH>p1+YxJe*v?e|7^wI(i
zqXXfmiUpstYzlv+vWjfYYC{bf6<%;K7bLa~2c4@0#8lOZ==B^B-Fub(Pp-MS+dw#~
z>Pt@ayC&$W?j-R~tVHUoUSVup0<_GY%1<4ww6k9%2a_1#`*ho2)%FMweeZ!0T|e3U
z$iB*1L9bv<$~3wn*b}}?-bXhcYZ0w%eS+PF3jFif+srI)IlE#OBG{y$&aK{0!dHhP
ziOn5DIPqo|sGVMjYH|HRqt1!GT)UKNdwqnCH~`<Cmr!-@2x!;r#TyULz^1{57?T`D
z2azhsx+p0!kB!51Vb-YkC6_Io9nXeM8qEhB@TTw6f8g#{0`4U9SFA8+qFeLBQL0i!
zxTQXx4;D=ayVXPJJ1rOf<g_k*nAHh)SsyqIzQNjtsKNLln;^5BLHpR<5IV~Swsc<*
zL>A|Zp6@KfM^!`lg;#4}(VcvD{o;Pq?~TIl#ieYX!Ey}tk;2UE>!?^P&zH<rqJw`s
zP%fv#9fNO@hkx(l=YnR|C6$9_xsNKxB~FCv_WNLw$~HK&{SY}b|1bI4TuqLyPZ!UV
zPNlPs+QS>KNC<M(7WkZ>O&&`JkbKQY<d2x!?CH5uIroJc$cZB0ieDp)`5Xo|1s{m<
zT}O!9E@18Hsdi6iL_qkZ6g$I!w_y5MiLTu)k0BFwvu4oYSK7N!_vbQxR7n?i)cB&q
zL@6wLnuz++GHg=8A0m|$Mk^e*!pzcG@V0Ihc<s2!R6gYh+|`qq<!&YFY!Lu!Ss?6W
zBe1LS3`@IbjRqC1Y~RS$;5pNpY>53$?zn5Aw8tBALi!E5N~|Smil(^uLk8Gfkbqv*
zArLOEiTh34zy|(`fQefRnZ=c1=>2dyeXgTStfl)wN!M05-Q<bSpT>as@L25F^9^(k
z7vYw@fX9DUV|>LT7~mBJ?JxJU0iLn=;P+_q+Aa|be?@WIo$VmMu|JjUngPYj^ypKi
zCeegR`&sbvHq!G$jyEOOu!|wDA^&(kS}W&5r}g|M<pOus=s6rVS?uRMt42fPipl7a
z?!Yegp1>Ia{`~C#H{_@F=)M8zENxF3?tb)0w5F<>sI=;0^|^yYZLSo0xh`U}tScbj
z?=BkNl4mWCud>_YtS}(rlxT=zHn!bG(y3@6hzg%0e%7C6`EjQ~j)${KR~v4BG!Uhu
zUD$cKYpk)_5=PxXGJDA~<kiEm<)Ir}FB1!&w3|@t`yE_&HD3^z{!H-h*KpbyexLLl
zZ(#9-=aB@5!{X;sSh`RGcQ5`41CHNhH$xf(E6SANjp}T4kTioY*72ynGZQ9<WMigK
zJd>4K02hM3;A*8XbZjf4utc6GxHwSQ{E6I>F@&Q-j*#8Dsg+3&cf#8F-_Rm9mdIEg
zgZP46*rTEcuUs6#`=BO94w=UGx-F$k%TL1TQO;E9Km1Bl%&(Hg{OYWjUo92$s}3>0
zauf3_-T&~b{bGJqDCSpZ#QaJspa+JE`PF<ezd9x6S5w6Nsw(NVU6Ghy{oZ^VPmB4L
zzL;MXzit$q7W1pdtYKtW<^}A2?_MSRZApJDYlXW~yJ_;mZS>r;Q{4DWE7&fG0J{Q(
zswqcr@&iW)(>q^FA+GK?E-5`nk}D6=1zQt^f5*S3S|K-i`W{F6G*=TM)GOe{rzmcE
zLRsjc7E9+;P2~OVm(eYDzy~eQhSWDXw6<XYUe+2*{Kp)Iz^mUd`;9&=@8yEKVN;l2
zcn)gc?19q3Qn2RDUG`?1AssYkIi5C|&Bm4KK#S&c^1959ZcKCF<rz8u<k|i|I{f_?
zU#{hS9y>93G~d2sJ)Nf%O#|vx(YWG1Jx~(O2NpyVDHChD&3ppx*gTx44UcA(pQ>P4
zzYlm}h%z_dq`|umyuqvTT6T7EVc0!Qf*dnRB0-}*;dH+O9C=7mSYsmiC%IkS)n1tE
zxm$Sfku0}(yqU%ehww8VPq~M8fv{QITv%|dm`FUGj&H15tG?9gaxZH=+?Jk6XI*<h
zmo(j{b6S&ye-#P6o#{Y-pWiDOvg;(jzFfL$QeR-z)Q={-N&Ch>$-BNotb}h}H`}XK
zFXK7t62kp8d#g^G6Jf*Q!}fx2#Qyl(({!T$K%BbL=ImTk^|Q3DgzKF$vY&P15;YzY
z59Xzd?E^BZh0WZHUXS({7T+0DmA65sYRS6jsxvJTLiO$@F4NQqKfg~Rn!y?(`GRpU
zWx{Ca#EGa-;tT%McaSeno{OM$5!s~)IA?e&ydG&m_P-4zelL2UF5)V@^~)#ylReQp
zV<LnOUySeOE(Ft?i8z&c^StaKWWjkR@G0F%-}wK=^0+jr{e2$9?Rx`xkiydjw&I*m
z325?JgZyB}>6ts9;KN)E+$xf!gQk7Myp!_$!OI#<>Mei>?M{>tkJM{z+6JqInoQ?r
z8yfkEP`1bsrq?W{7t6n)M&L=@Xr0TgT=VfteVlmidInT37taP)9O26APgCd5%eZmU
z>M?2o&*@z8CQD0i9Rnrp$1qpk3vaG5;+Cs@=rFT!basUapKw#1#?Nd-52Lemn4uv*
z8};*_T=RP4>w=<2UA9YY3>`i{4IWR^2cyROSUAv(W{+Aao>MvoZBep#Bx63mDD@j=
zCjY8@)b|*o-pS(D33)VDiqbU4OZ1iUSNyH-Lig*RV4s^_ix%0+^WPgh!E9<29Fd(2
zC(|tGy8SPh=lBAMR}s>fs5)5FHiDm8mII*;jr8wa8!GdB3AJ1G6RbB~=9SBm!D{0+
zFt=~P<wKu=!w7M`UNsts#Xjx;<H#KUK{S8t72dfbm`8bDA$p%8pnIw|b}Nc_7QP3p
z_r&s6BkYWwM<=wZ(q*NCsiyO8Ts>zTud^7!CqMA#f(uIAa()qN&$<HHoe_{dt%n8-
zF{ioLy}|u=Q{{}<GLSd=#Ck6rM2C()<m9=Jb{3l*p<kv823e%S`l4zmUB8a3T7H+w
z>_T)_xCg1xm7=H_lvP!_!ae_M*kw4JKCbPLnem@-%E<&+b2U(;B0U+yv!(fh@d*%_
z;{aO%$MKyG;w(_R8=n?EM%ndEM3Ol37W4Z|<@*VeylOvWE!fP)zqvwEe=kGjIp#D#
zm`^@9tcF{oM66pq9N+#<0NJn_NIcn2pn4y)PFe<Oqt>D*rx;Ds#eS0mAzU?6lIsRu
zXRf-rq|r=;U}Y0bn<>rZB3y99Qf-i%RE$H@x}on`FwKawh8WQn@%p=X9i&Z`4}Ek1
z4SM6qpP^b98sp2hhS|dfeJ+@4`b{+X@;0*jnkkMnY=aNy%E1wH@xW#UntQ4gnioER
zfF-7M!LE-mKlKxPFvk=Z%qk)tAKt^wl<%xILOlO*Zk%X%T0b6WF%CZec_vsVw8hQA
zkI9(Xd*J@jZW2~{3?rNfS#{wX?D)P41I05v8|F@=58@1{{#Y%#AWoI{UnmW~e*H$X
zIxXtBS(6TP@)VR$uZ9_#I&gON9{#x}6@I@7V^_V*dB40Qq~ckun53VqXX{5W4iwO~
zAxA|mH`bw9z&tp*ssu-#xX((yD`DSA16)4u6U)u&4~8XG<b%==NZ<R1wOu?7Rja!o
zVbW1DI<pl=`|f2O3PRX3%7puLHP8da^GUz($vn|?GW?M(fXCi|_o}ZGqpDJ<`uGhW
z^_PLQ#uZG*ri=x<sleG&QDCW5$y^pafq<r4_(r~nSUVZutw}jB)-Ml2rkld`(Fvp_
z>xe+7LqTMCU&x0mj)u3Y1F`XUCCh%j2XCsZgkJ-k*t#N57SnP7yqfQer(g~ftwYXG
zHl`BA77A|Sxr~?fx!~vuPf%V*_*m_DGRAKS_FQnpWM+&?ZpAS7{T?o{(-~E^P2mH+
z@4<uHG-xZ|13Ql}nrT$SL{?*AaB&QCT6+)@$B5_izq_E^vte+&e?3?z9Azu96>{ef
z#mgOkSf#=s2-_1wb$Knjbt)ax!o}I|mzUkk#8cqmJcYV?j^dX4L*Yb365p4T1`6eR
z@L><*+LH5N<BDAHdKLzq5+yKgTsYPlOhvitx^%?Z2+@#}*SK_`Bu*S~hF(_KO%rY}
zp|3@a+;hYm7G=JJ2WQ-(PGjcquy7}S?a)+OG2|re3%G}$A1ti8?lgujezl2~EZj<`
zjJ-gUm#m>Q>n1E4cL}>+ea9Eq6d-7H4tzV4%S*PdrHv(TU=yjJGBaZ6zPX{;bay|o
z?~a3$dwt>A%5&)b)0u8wvYff(iTfNUE~2M3^90s|s%ft7e%fjpizYv`cqdKf8(wR2
zyIuXc=jHzNzlcfy$A@N#{XPL=zt1(X-{-#A@3TVe_X!aDePYFapK7t+M|IEt=l9tw
z_WNuW`+Z8qexK7~zfZH+?=wp5_n9E}`#2mr00Ob!r}s+|j{A?_M<n+96o~yk%45%h
zWtI&_n>>Kn>P(*Ecn05j5VklX3sv0%sUU10)JDbdJq~rKvE~Er>|F#M)yg!n*bN&M
zl<@7$XTT*a>64#DsFqd+NiEIrC@%q*sl;O0gJ_&)c7}U>dx+zwhX`DHq_HQV11oyh
zVRnNRrz)W|J!TJ8UKmaro{oX-$HF2Nt^XOI>j8(Od^8@Fu?hli<ZeEx~w<d=Bf
zt)q~`5kEt!I`#|;`%wZdZ@uYs?SpuHyU9Pv?R&=|$uDoj<_V6%c(0>8WzI7?qNId}
zDj4wJ7oDs0lgHAl1(Dp?S-gc%-Ai_4)G+??a2M~)t>u$cJJ`^zSy*q8%dg9=uCgBg
z3nK0U-FKk_Cl}1%Df-_@=dzlAl6M=ASkaB+lKD4Nd*SelQGEMXC$3j;wkmqdEl7-2
zJe&DtxUeF01@{|hCREDkz#m^HRb2>ZsT%4srD|$ZOVxtxd!Th%zv@uV*0bks+@j`N
z_KEgqAQ#yfi;{lk(HAdAS1I?}6V)~9m^N{m-4FA89I^Z__!!&}%(L!?@+;?J=<RM&
zK1UTkjt$45rVrVgIr~NNn@-?2za-I*4I3b2`6sqYb`18v6$GC$tWdn?ER*iu$F@rL
z2zDIOgZp!%nF+U`=cR&Z*TS(pVzfJTI~)vIwY#bNs5K%P&jR>h7X!z#r0L?p_B6D;
z7?hXRvCVGhX~ErctgS8KW6uhy^2mPF-EBSn+#uj}i)X{N2SdPYz(pSIeGF{wjG!(e
zYwWsx8k*XFkOe!$>x6ob#r<|0!Lso=hNc(LaZg0lOi7ndqviO4wa_DC=TOax0=#h6
z9GcH}!9lf$$k(O9iCuMg^7achW4adEQY}e`cf1ElbxZWhZ22emuG+khZ1)z1ZeG^p
zz0rDf9;8A)U-iaC&U5I-J@I@ec@MWEt$Bc8B$ipP7SHip;ecy}@KNs%?w(>u8=DJA
zpSPOuO;tGNC$6IluUep*ObjTe=iyJyZqOTV%{IC1s5G^|g`F?5A!A=urKx5tKAND-
zcg(&{z0aJ6I0ZB4K0BSe`os`H!(nv)9>4{TL6EE-0%r$j;*-nnw5-QWSi4EUL(}sy
zet0HbAkIW>!reS9|2G;p?C0lK=)t(xJLt&SSDEHwbGUVQ9W%I94f9@d^3nMzPWW+{
zmK|~AcH;#YAE^Q(D=tD?z$dWkodvz@8V~wAfV-7>(z^3fe1k_U|5A974)GjK?dm3j
zHBUiPqy98t?Mh58NW>M6M@XQ~WK_KRi9OVK3S}KT=;vo+E5kHC!p=EcdG!h{Y^^wo
zlLE%^Bh!5FYuI6SGIBR*pR^xiPrRZIEAFDo&p4)|*8zq*?}=S2vr(ejfCrVB^74*>
zkkkBxJQ$n<LA{jN&W**ObE$lh^F@+-DPN=<BO#u{cnYgsoUyRxF6KHWVeD5k98sGC
z_Fs=dc8M1)NL@&WcdHQ94Rw_l8eO4ti39Z8IiA1S^T%$4?H;@{RG!U^n-0q(-;kTW
zM_|af1hR0+31XXjik-X3?GA@e#J8psamo18FiY&0YCAWW<-YkMX!_|S=Hri<p=%Pp
z{&a?9uKL5=o33DU*k>56VG2bP_F%T4hV&ghi>q#ZAa@(vQQdJWnz#z^{XiEYf2LP(
zE>M*Uf*33{TaV{63Yh-sND`Hw4lXVOq0e1|S|;BUol8{XqJ_%z@?QxIb)SVNE)EpC
z5K_n<=QprM(+O7hwg|R<JwztePvnaeGC|q62%cor;?l-_xN5l+ExJ@C_^en1nfdcU
zaCtG_SaA)5GS`ubAHpEhw;OWo8rUhD`?zSuQFgGd5d&8!bJd0M81Y?^Iz92hRSILV
zb<90%R!3$rz@IF6mjLgC<M8~vW1!Mc07fCBurF~cx!}{Ezc}DZeMZWo<rN3i&<=#Q
z!be15gQ9qybsv#gq{OekiM8wTPN4d?EntqNE7yo>XX|Qh=`wfD5=%Z}(Oz|`*EJYd
zoT4N#+Y)je49UWQ<tTBgOuTL`4+e!#SmTqcqE{m%p`hil;FnIc=*IQMaC2WICNH*w
z+V?X2X;cin)wsvrf1v_tPfHkkegJI#b(Ou1FC{*Tc64^tuQ4X`!l|}F6>NER2TSY6
zL8RPuFrMqrOlQ7lax)z9t$hjzyZZC}!fG(gmP4VE8@{=7QQS|nonS){<1+SKU2Z)r
zzb%J7>#WdSeJx#dYd^fuz5+#+?d;yX%Yy&oK=WVx;rkV_)YwQoBl__ycbeFbr|py!
zCV5R2K3{1|1)4Hc<FzLVc5&ufDuaY0`fbCA$}oOMry642j#1MGJ@i?TlIYZMp>R}W
z1HJyinJQcy%bnIq^P%??vFURfH}+nH_3A2g%Ro)KP(GQoS9QXt%>wK>=*(uH)1|_S
zIn;LUF|NAf5v=%SOe>9+vBh>p=vb@{Nnr`J-It@*{6yY=b1<u~dItyA1=9}dz~!uE
z!EeM#cwkV6YG3UB$@{!b?Dsj0YHXF*?;{ZVeQt{VKEYzYkEz)2GxtA!A2qSxXRp}r
zvr_E$`6c%IOcwinLd1TbB(dM;yLf)$t=R8VC-(bH5&M0%i~T;5`o{E+*zfaP?DvTm
z`+XGt<IF4<J2U0Q&dft%XXXa6Gc#4}%v2LQGq?T!oS9|8quDjFGjp2QnMuUXOck*+
zbDr3lIa2J*bP+o<v&GKLJh3ygSnSLU`HwS`ik+GN#qIGw`p}dO7ogt$E7_ow4YGy`
z<ejLO?Y{4TY4h)5dz=t$3zt%xjS)QM^<aDy(2V^Z-?GUUfk=Jb&GycSW>20N;<NWM
z<lsPQFpK#G?N0~5hJH<eKCiIi{T0!?M%{l>fBcJ+96jMLvE6AzY*lZPz%VM9*kHz5
z4R^9>uavQo1+(ilnegW0DLg*Lg@t@o#9t$H*-_6eg1H~cMQvx#h<1c}G09i90>2BE
z<a|FfcxFM#ODRPdnJ#`ePm~s2Rrdfz8}T!FU^3lVt%6BsocX|8i!f;YC9d|Q5VWUN
z;q(RdFmBseUNxoxBAySUU*6T@u*Q0Hig^iE>+|vY$66RNQd0P};UNm&Msi7o2>PHW
z21dMz;SC?f?u-f3sY7-;e4Me6d22YZaWCZHR={X(vCfZnev71g6JNr?AR{pSG7J63
z%FtEy4`Gfdn7GN-z=Ov3u%vMVmuZPa?!Sh9tBb+|#m``#ta!%b`z^Fx_nJsVC5vX8
z&-y1kF)LabwT2Bs=VnFP7kGuFeV7e%dYs`|xhK6ld@b)=*9hXRYj{j(I~1h3Q}_9~
zI2Z1TG!JdS@tdRQ^yw#9^g%75l%W)#G<`Akow^%me_Bo#Od7(jEpeqAE!Ck>?>4hl
z@PYyI`8@s18thmUOsg&f&ztW+#`#AH9(*ceZY%!c)QK|W*6`0vYk@h2>z%__Lk|ef
z7mnj$o@@DwASW<26|vCX5;Q1^r&CA20t3aPn0Zu%dpzqR?f1iY-WU-!bUk7-<VIjz
z#|W5ERE92tg9OKC|H0^PIaax)0F&`B+^`u;Pq^P@0?kH<bnb_xtL&-Tlk1qjP=+>0
zRzU5O=kQlFn|_?~0d|E6V8E$2*lVoHul5>nGDaOePYmJT4h9nGDGx*vOH{FJSQMLk
z@+h-8IE4EDC1-rStzhecWq3|-xzgZ4A!JC(vgxlMqrn|_9<gycnP8O)dcrsiE6||^
zu?JwgZv<J03-FZtPhzj64;q!J;H)?eM}5D_Hm#MSI%f~zsQE&2#Aqna(8)yY`dsX&
z{7&M(T*Ghk;~{jfkXsEr4kfD`@k&oV+St8?>dH(qQs_sro#(?bPiy>LZ^*+hY{k>(
zM#ANqkE~Rwi!_(Y@KJgRxN@J0An%ViW;U%OGLQUVesmvubRi4NzaGOp$9Cd<xr=n<
zPk`&mde~FU?bK{X<5|aiJXbP>b*}X%mHi&TCYk+$oRLdGLCXk>qsF7Lb`Nyb&SN<>
zz9fR2CS7m3;NFWl%p0HE?KwDz+~6@7mG+cCSOO_teht!FmH1*m9rQ{3!<Gz}#*zUS
zL9*JUGA>D6U%Yq3;aO^UpezesRiq21wb_!A-Gzd~h7_a3Tj#9Wf1U+hP((@V0dT?n
z5wY$JWNh5+$~2W@#5GJ7h27nv8s`U?S}hM!hpn(wx>XQ4WDz^H#DsWkdjK0VoWSZw
z7pe4{jNkenV|E?#tiS0e{Nq+F8u7olyVHNFyRdEGWK4uYB}GILMX}epwn#~aCWT6K
zX`bhikSR%~gfb+hD5YYrb5VwrW|am+Q7EOkG!M_ZKhO2z{tJ3>{{xrLWoz%>K904%
zhl?4H?B5_+%ogL-3&KQMJOC#;TtNK|a$N6aD=s!EkWCZkCsy6O1o7dcDZi-4H72!y
zg15j?r!e?>M~;1U`)arMpf-#uzDBeXmQde_{?z7Xe~7EIu-z7ygcIjDku9~Mg#|ln
z$<UeMY;koh34a#}aatLqpSVA?@Q)O?srQ2LCw8E8d6&4GHyLWL6iTut{E;+Qb|nX`
zH$k7B8<@f3RML7+4cgb7Bo`-B)QZxVtPa+N$c*Qb)j10=Dt`>7#MO`<SsTT<phDt%
zN`}WW|G+JpA~~{|kwfCX^^zAe@!S+`X4hQ@(%YKIg6#`sYhACwiP8P}kmtRjAz~go
zU|LLi-FYL4+LudyYgv#OX=gm?9`jG0ecpKgqzdO=(|XOz;vB_$YJ4k&T^Xz>jh^Qz
zJ$1B_cJ$nZ#XGw&F=)gKwG?@Buh$TFy*nNA^(*b18%=w*d7`SpQ`-9R0kx0ur5o)U
z`EJD*{M7L4{Asu!AGl%4KY8E%7oWAI!uL5Ne4pdO_t6!;&)ffepKrqV=_-7m)57<8
zA$*@&;rm<{zK@CUecFWYvtIZ<=Y;R$EqoskzE7I)eSQevCqno>A4<-HoA7<~h3~Uj
z_&$EZ%xpiPBkO+n0elo@<|AQdt`uhGY++{J6lP|mFf-o@GxMG>Gbai&bG<M#rwcPv
zO_-VMg_)@>%*^M)%p{$XWt)YWsrjFoxm}o<PQuLWBFxNx@w@sLKjWF{K-%Vb64KST
zl2qG39P@S;nK@1m6@QJQ`6_ovS*Z$@R#{_5PB1u5t%EkLS*-F}s4Tf~ujFb-KhV9o
z80~-Wr-@&}B#t&dWR?gN|0cG;fW>|3w(fiIw~FUK`8W74eplf`=fbd@R<cYrTk_2~
zmwk+vN$xLnW7B>I!`;(r<iuZFICfz<JM!!#x!<A%;d2vY?doR=LtIPjmQ9~Z6n7n9
znUk)8cKTVn8l%0EBza9xT+kD&Y`x&vr6Q)3znfh@c9>@grp)m^#V%fF$yj|07<<he
z6WzM<(@*-qG1F>HdA0>Te|DyUou|M-Q$60x_6;b%;cUajzhuwlxqRKhA8<)Y0;ZJ>
zIO172eImM>tihLLE5~1hD;2h^d*Eq2IVM_WxlNmEPhN)IJj`fh#uldjL!TDkUWvM+
zf1qh|0?T+;EZOl{ffoClQ|I}^*a3(8r1e(-oO_uqSzOs6+wPMNLsH}^n;M7nPQ4@A
zo194Xw}E_rXy<?Ons;{y!R9UZ@awUIk`;0yPrcopI(1nKDZkX|kKhb^GWR9)J?O<x
z<*3ukV}5|bmPa^hmIbyNm0{FKV`{Sf0<yOksnO=wSSDwUi)^&<_{w24+IKnR+|#A2
zgh$-#@H^~0>@2KYy_aiT>V?x5S#yhPU(wF-y<}ToD?YsWH@dzyV3j6OxG-oT@Bk@U
zR{VgJ^;?8m!^aY@4_(3Z<Ww*uJ;7trNtSN#6Sq{hp<H(lKJT?Vy}6>7tjOji20l1L
zT1pl9U*~oB;KvL+Ty`4<r|9vZ$cq@Wu?H~u_4Ksdchb*MhI4P|f@S-5d~whPm3N(i
zb9EDF>gI3YM|V?@O32T*Ua;&-GCnrE0E-%?Gc}Wga4p6Z=lG*!==sgqFL4Lc_&bnG
zpT*&^$|kn%xf9K6kHM_k5vaQB1dJ`ShUR(OaA$VCL~Uq;EGPSoY~&kN(rC1V1Xvxk
zySut48at~&t!yHm*x4EXcAN)|y?XE@JY2SI;qt;!M;tJ?OA2!=2x8{(CYU{514?4|
zV8BxkNlsydq^ivy)dtPMiF;dYUrg{McaKd5)08^syY`o?y~SO&MCl&8F!d<W7?C6U
z9qNR^Wo>xWB?GL7#<CgSW3YPCV^CRE%1(M|vt_gQv5u5xiShQE_^fXoyIQ|aSYwLV
zEl&>z$YO!VgcN#Jc411Mha?*&he^IPcV|7_dr7`_b%oVeJjoz9jKjXh$PxmR(0J<w
z?C+B;S=IM4iyd)~g{Ac1QyU8<QMGkMZ+0Ym-T4+w9T(1fwVO!J9}j|K<-VAtJxw+%
zHxbJ8x03SgT3mUOV}A!5xSnSO0b{0;$Ci;yYh40%b9Z3bD=Wx&y$-l|vW|7Ta70E8
zhhprcUfi`?FT6Ibj;U<XpxNScZF$jmh$`%AJ2IdvZMb?4jq>Z^-Qh}dSYC8m=e6zP
zeB+_#C^dS1b}@KX67ks;%AO7lfnSIGS!888F8-W>8*(;cMM{vQ^C8Nf27QwF46cyb
z;~-2^DVLqym`nD}{>4HomSOJD-IBLYr-I4)#j*ycl`MFEicH>DQDicMQT<3Zd;CTX
zP1}rUzPKaw0<N={6Eeu-Vg9Uk;tbi810AvjMymXNzp3!#=`e^}HIQWms)`OSl@uRT
z;V1K!px7vdu?GzBqlY>^$g{)P@?yqfgUGZ12}x1>Oh!HZfJ2+B$o($i@a@@VR+6%u
zxG4I=w@r2M=dJK~Tw39x%Tc17d5JWu$zkuEg|f$kl9~DOad^i`2Qrr_@(XciB-?#e
z=pl!j@c2y+?s#G>Y%;tm`#bp=J7CaCs%B26fAji4;Op~XBc8wecU6>iO4Z~)J2GL`
zhgA5eCkG88zna2z*m99y*^B(@uE?)uiu`K8fB99d@TrwWezjZVSMx=F^;hIqPXFas
z@gl$a`(J)_N#s|<MSkTk@~a=hr~aS(s#WAyz9PT+Wh<e#ItDR=F1Gw=;SWBgcA#{1
z|6bB{P1c-WilI7=--hv~!CX7-3mw<}1Jg`S<Y|^V^vw-|)!i;qgSI$!<jO0$^?f#c
zNp**G4HM!RJVX*})J$RrKg3BZ5A#`u8oZ_@6h^x#7d^J<DYIU%i;!qTZg_b<J<>3N
ze%g@1+pi|jkK+FO1-cWrK5(aDaZ}Kx-*2pMD}~i-7tl6yBYY=Z+*%hsdIl8uyS<(G
zgzo)FOP?Ej{^oyU<Nk}!^MCO(F3V<t^&rBGHplV~e}9^%tIT^1kn%5U3+SY-Uol8e
zk6zue8NI%$!HG@rG{e~sE1$)nl5aIegapFg!E$_f)_JVDkSshqS2p@9!mlCiWJ<>l
znNM0eCblX6lkWyQhZalb7Iee6vHH>nkNm0Y(Y><nxs^0o+`(|KeaCCXdDvI0CrVrP
zeWmMkRirN`EX0hNNmA|U8~Ey2r=sOgzS8&4drHS1>?w`erojEJlKF>kC+O)X4n>dp
z>q(tPr2dm<>Vxi`i<XaB$d;e@LRT4h@sD5D!Y9AuQtd;x>D&9CipHMoCjC{_AQcpf
zefqSM%<$$lZoMtYJ|S&)k=weWqI1R3QXT(%>5-;*X>iJG8a}q52%-<zceLLudb4<j
zeb(<qvZKqtflIjxRrxIy=heQ$*0i;3Yg-7rH!4fglE>J{ofk;|&7Yw0=1EpG<{bF=
zD1q|6iD090huKOL*c4?A(AaWIa^Pbjqz{kA+?fxU-SsT&EAu7s@#3?nXdThb_(-nv
zb&ykd3e?OGNUpkHWnWjT;l$NueB6qA?8+E<bl5Wx=$}iVHgg*tv!#(`J1)UXp9Yef
z+s}{@yG9}1G8@u2xMO|VRE)LoCH7jHWbNc|2vD}*noV8dr|E5s9U*ok>=C;)A7j%7
z1`3ZS9A@2~LmxGNW)~hVB6}URxJjlt-#4QQ`)ZUE=b={A)Xp6*Y(I_O&C}?@zVAqW
zaCfSIB^jE$oY<M-`P|uH0A%(4M$=8Z{*%Am#nXp_e}FX%wVck(O2*)1aXvC|k`CuD
zCG5C?Kfg6Ykz79gl}uh>3jO6C!A*s~@O#!E=$*Bc9bem3>};Kd6<f1tC(BnP<yj0Z
zdz@JqlIV|@j`iU>r$)=dXFMkdhUU`Naa!DA+a6dsN{MD(Z4`EiDW-QF2s*3hKx$h*
z-sg7=t=$-kKUI5TQ^5hr^e|Pp>JY})9X|o@z4yrivS;GT&>;BJGln*;J_FM}_2mOp
zN<san6Ph=Tq+2&9(VjtLz(qBaZ)i(`Zy}>$-AqG1YSlPCIByQW={8@IuqB;NuDHVj
z{#xRJ#4z46+nm?WtU%52r|E5fQ&RnB4eUB&B>J=w7!O(a+~1QfvA%%G9*3ZH=3uDW
zJ(RtDJPP0CS@3!85#+`Q@!BtX1lPA00o`K>(<iFXaN#P=`E?StB6azZNxm#>gCoT`
zQce$DfIop+FknL^R%U*~z&I6}BJTQEC|+Zpeiu>YZZ)nldkXsIUFfl2Sy=VanzkQw
zknI>!XsguZ#BYY(!?x%+Jb5+N_FP*i3@TK`n3BG<clTiUmi-2QpS_GRk~wG>p$T8T
z3Ykj8S{Uv8g;e@ICG$2<fWb4bvJ%%lpz-PgJ95<r(pB5YuKUwLT_K)asOv*|Tg*Tg
zwegsvvkFgT$n(`o(WLUXAKWeJicEBAi~cUhD+&8q>&->jg}UO589$iu6?5u;C>3tI
z{k2=PT8-aVjwE9bg`i1I7q;hPU+O#80zK?>NOs@rkkRsv1^Qitk^ApKi=sAVO#^sa
zaA$h9)I??%2rPAs6b8thB=Jsq&`Iqs%s%swrK?|prBluDyP=ubWUYYLio@|*h8tZm
z)eC0CG()#3+o=6{Ro-dLGh9DnBCXX1NIZ22<_<LFkq!)L)G%D*V@th%zam>duY`-^
zt@+$AXLueE3<0aU(kcBL$VU6u1eyzA<(n<|@>Us|{f>gbdAD##S_w0H;smA>s+pmm
zJT#TbVTODz(NtCd(|Nu5oSrdI)7_a*Z@3}Z`^yxDZED6Px|#U!Mi^?Eq`|Obn|O}G
zTZy813iMnNjxmpy!;^bvAibx~FH2^VbIPF@{&OO{{=FTQoW`-g(Y9pyePREvRYN<A
zTZInY)k&=Sdi*xCj`%naASXX|fMv5B-nnhVPUguZqhGbKQ6)ieH{laVn<H?|;WQlm
zWiV8{{7&9)+XS08J|YvRCt~Nk1ad~?lf%X{v1i$p{vGpz&41F45k5^Ud_WQ2vUkJH
zi#0*}$}h61<q50&S^$aOHtf%$H;n3s^R@@+WO{QNwEI`FEv449^L=@IaeNut|0#xH
z$rnNYnhs^!%5dHE2e~Bjt5T6)9TE9eKapR35cyS_@Tq@_{7PBmSLZ~2_3}TTI!WYL
zgG7GiDDtbFBEPEmUp{rhfB98_$ge(%{0c>W)l1}8!6Lu1xspWR+xXK!Tg{^KMqPTu
zutm~n97-pzkVsoR%IJ@qhG?ZRmd5xkDC$d3@{W5-Qk%D$e7J!dBy@R2%^yFc4@#Y-
z?-g5V@0s(!iyq;|v*+;5Q<m|#Gf;Ho!5!R{IvXvQuH>Kke?qs@3OxPkF8cN80J>z(
z37jQ82hHm$Bn{=Kz=77V6FysT%Fv!P$D|J!iu<S@tLs74$C`EPRRxc7i`d%~FaG@1
zFmQD>;B5!;5v>-&%rcdK^4|V0p5gw*&)9g_4cqrD<F~!f^5BER=n{K<{9V_LO)V%y
zu$=&%<Q_tE4`<L+w<W{OOIdQ)_gGX#Fr>tSCQlE+_W1)bJoO|FdE`fD2RFf*PQ~P!
z^LT1Jx`3!HKF!H=pMUavm1bo~7dwQ|0_ki%YCsqt<#dc#>}%qco5E<zvpzgS+XDxR
zyN{L5lZ))t&hymuj1SwflD8gmpia*|$xgT2ryBj0>>Z4{^XAgyMdcx?bYO`-|Ff%z
zM=Cb(=traf$^S=PhOM->Nq0K%kTGu_whe`FD{Ul$sh+*HG-aMv(eTb1{A-O1P0(Ff
ze01=lq7}wNi>#u4^HZchl{lM8FYi>5`XBGG*G{`p6qjODT;bmdhVE75Y4?wFRfXaF
z(W0x^cd;&Hb($@k?==rfBMxJw{S|!t*`42>xQO`8sbS|g?<d{?P3+5;a?<i95sibx
zFd=I#`i@4n_r=q5_N$a&&ioIuNvF0*PB<#@a=qPbk)jW}c8DD6S~FgsevXXnGl5=M
zV9ra9Z$w?M+id!Nc^=wTmG4+&MCWP_;)A}|64h!0IuJDItve(66<#W<t(VYZ&y!fR
z+Kal57!Q6E4PeR79P~DefxmM~uvMiBkD5<|+%bd()y;#`U2oA6tt2irs6f>fZgkQ3
zHF)ZAH~Q}BQE<<5rESZUdAE&m&~48Qe!6NBe6m&Mx~ppWJkM&<Z_{wG7jOu_Odf)+
z3y(2dixbqd-(8m8*A1l;`tY{a9(+&T{(thCKZ?(meO8%Wc-gQHdM=8?J^2r4QO!>*
zzq%jrR64H{-9b*#S3G`Y2kV$IlTPrd1H~&DcGgZs#QsG%^}X&%LpN$kr=(2c5jU)<
zf6aZiJj@OnD(|A5))+{d%s}bA2cJ7cRr)f2Km06-0r!IyRQr533oKX#Ea4<BSd-5#
zJ;*{+i)ZA`GIw!j{wB=3H=6ICsY?Io8__o%9(Xb#mZn)fg=p0&D5s&s`zTz&eMyPn
zxpg(UJBd(JlOcTj`tG#1|3~l)x{Aypns?uJm&&X+)ASyS+;`|zdT4?d8}{}+bm}q`
z_YXDXnS)gMy)Z3UZmYvjc}$?EAN8h>QaaHJzt7}jrVY5<*ufQCck=J^wm?|ySzPeV
zk-nQbjw((!A;nX^;K-ma5Up?$b{*_Woo~rl(YqcjeDH4Bb?XzTRFH*j$-^YqA|kL~
zMj&gyV@Njy<+7w^PkL+bLmVgemlP)?;-PCm)+Of?_mly+Ed4MrouRxeWGjuVv;wt;
z1*FVbgHGAr!m`H=;nqwWC$G6AD~Z^FDrFVmJ5LY2Ogd2|v3ok=@<XsoTq_LEWQp@k
zCwNpf5?X%g!KK`Lpr~lg3oJXc6)X=YuN^^q55I-BjlyP{*aOe!4uMAB?snZTp2t!5
zKfoW4{cy5RJu5n4Dcj@G1@%6+<DFT_q)&^KPtv?eF87Fr_tW1O=*b^~K$|sCsdyP~
zf2FLwwGgfDjUbP^6_CO0Ic&;`XE>!x9%$;Uf@#&Nw7~HfWbhyoGN*;5PrQjb<;Tdw
zakflz<XEUy(t=5CuBbe%0+M47l8IOQh_f8Zs5Vjb&F7!M`kVdvgM&SJOIvTck-9*)
z)gK{${w|bka_2`^l*7Aj2YBqSZs2}I9=tO)(dQ|Scz$*XK0T3w3O!YzeB@x}7n;L1
z9hSq2_eJoz&j8xn<^s#IJq_&nO*s7QGPCbI3+6=~hIptKxx_E@4%Eew=4Z%QaX!R-
za51xspHJ79-$T#S!}%)rq4d<mL!fiafIUx(r0&z1%wzWpaJkzmSujR?ZYA!Mefg%v
zqYuA;$xM~TRu#czhhhAawLSmoluhh+E3#?B7vW#wcs7W$y7ylGg3W8*Lxy28J@hOX
zQ$O#;hmr(|fzDh|u`~d;G0OCSl@vN{ZnulSykFKu@gp8@7h8Ttd&#oWU%1?E3v2Ju
zm!~@}qDqgoKwZ-c512ba@A-#hlh&TaG0zr*d7~=Y`2dcozm75wVT`s<$GcAlvWdBU
zY2Ktz9KSXKp6ox3pWe?PL6gK5;-W<u)9o#8d%ac`^|u*6wiwA2tzSbz?j4Zd^$WiA
zT*T_$15cET!81EG>DGelSS@_&-omGTEPUz;;ZsKlpZdD+sl$a&y-)bmm;dvr8--6@
zo}3^V8#aQ^5k7V0&v6oE;ZxrcK6SnDsb>hEx<UBV<Nx!iJ%mp^RQS|#{`=Yconz^{
zd0naLfkqzPWJt|Tufyddn%pvK53Lss`kTAmiathoN!xNl@k#w?*uHM8v@ZEL&k89+
z5~#pBo4lgM4#y>vz82DJ7l%{(w8MPth1*;`brRL7@h(zzzRin=odVk=RsQl_CcL=g
z&7U}GqiUyDIQ_8|Z>g-8ZS-&<yCZ8z^|Ck~65fwroUA~1<+$-0pU#*vV~cFg1x2(!
zdY1NIHx3p!_~Yk>)u118jhbX{q;3i0`FFQ&|Kz<rOZYxrgzv+J@8kNve4qZp_gN==
zpF#imKHr7!qoP!e@SpE<NBBN(h3}Ioe4j+&`|J_EPq^@X`U>CYzVLm1ble16;rp}-
z-^WR8zFg`1|2JPGV)JE**nF8SHeZH{&6jIp^QAy+zSN1$mw@g6fAdB9-{#A1vH7w}
zY`&z6%@=dA`C`54G**kvmmsnEa#C!*WQfg|!dt^2Tx`C$wD{6N<D5WsqZJ=GYdJIh
zcpgVw+YM$T^JswS2fQ(06Spe3#(pQgARC11rl33%#x1R3#!F9=+#a4x)iE3GUJd7S
zFUoVD%ROjW>>_&qjskw%a0xp+#`B1uivQ&GXnnP)=+Hr9y7W*Frknf&+w>5Ywp#GJ
zgBBMpRXf1X4DhF6K>>Wqx5?7YbI+kk`kbOY29fkc^Aoc6uRqqE4dUar=}M0bG~#zU
zo#*jt%cQm8j9UA==SNV94w^HM|BL7Lurmoo4|`e_?dZ0ZncLjvU3)}Jo21=}o2no3
zHu{8@tcsT=P0}tJ_I;~V@j?wBENv|M)cK^eKs|(Rss2M7r^XbW*H|Fs{hPT}LQ#?C
z<UP{X6Bb2xieh+G6qV+lRD$<Y9HE<kB=j`Y0ROY)vXIfj(oj4BuSz5MnaP$|cV{Y|
z`qqUwb#8+Ah>0*I$$-C%OUJrPJ!D^1%&}VD1zdBDA#m3xCUKsNk!3p2d&XaK#Ofic
zzq$$aS>b4Ab(M(b5aRdx({JKD`<^?Mcy`z*>f7TUMCNP5lpVrFxfnuz%Jsvr097d7
zQwVygJHX@CQ?Ne1niNd^09m=~P(7xO9cw#-UY_ega6A~LqeUkTa>PxQgXqoFp_nf2
zdaTGlKv(|IWjl|rr|Ld?U|P?;bm5S6xTnyI4xBxhCu}mJ&+{kp^ULF4B)x~LCP%Qz
zJF9Rx@5Scq??+eZ&!C?gx7&60I*5zNHC*r`f~lOa#kU*I|C7I6GW9Uq;2eaG`%7Vz
zr5!(K{+F(`u*J)Bi=k)PQLgXNnU=Lg^TsP9=(yD>^oG)QF#o2Fa#2xOIC2y{ajpt3
z90BR;wVM1<=?2=hKo6S-5Zdp36gKwx1jk2)voqZdA+o7AY$#3OEwA<Q;OlJc;^8fQ
zCBD-e{7k0l7AHV+-ZJjg=NLDM=>=XNHc<<sS2X!aJl%5FhEH}#;PRcX(y5{4pxqdX
z)!h%FiB~zMb<2|VSzo||Vj`JpyuS2Jm##eS{aG?>v^(u(=0V}=aIWJSN1uE$0qx<!
zLfrH~a@L>0-9&GwJsN_G_KoFw@=4^X?=xr;&yDRfV<7QMSH9#|XX+Q~K!3(5@r~Gz
z-{}#=JCP#nxoJCUaA%Oa>H@hV`@zQvQrHqOfR>a+(`UvJ&?1dxp~sJ7eX*Hjei4U~
zpv&aMbD*m?=0bvFEL<2ekiRAfA65sUX{i!-kto?29f^XZQ7@Uiof5wtT8^jJA1v(i
zU<<U*%j5?leqvdL6{(mPLXA=bLH1jno<Cm=jYl~<?4gfm4=C`BCmutKTq7L1+z7Ij
zy4<tg9<$?=sh8C;@@jZ31b5HH=?U)8;q*(UXcGi!iDB>~R0VzoKWBkrduGWmRWJ-l
zVOz2n(KQy6LEEbVQjWUexec0JBJL6AeU+#E?(Btz97~v&lnvf%B5}P+F&Um2j}8?;
zjNTZsxl?5<DsT)RG$j>ZbZqBjU>>P|7X|NDSfNFr96XD2C3Y)6;bEuYbl{97@OR)i
zmYF^h>Y77IG4H{b_<EsQS0m^+B}dOX`Vd{;Kg@L5Ez~I7M74Lx(boZAn5E?-GD{_a
zEual>V1gl9f7D>J&kv{Hw>se1<K4)?25ktvJsQ?0tCGvxIAq7pBg>9tuy*NmykGc<
z_0fM!YUDR!K-h2Wm2iO6t~dm1tIpuj2u*BgyCSI{s6}-y_N7H<y6|(FJGtA?cg&}!
zqHOG8XF4k9CoY}p!1BkU%=3v3te>%*@4M>*O1h5ttZ65zm@bjjz6uA0h6?ugvMSwg
zW=1DY&BT$@s>q&026)<FB29lJ!3~bbFmUl_Y~SX9d)|gaY1K0D=pTYt#Ix$mU3#Ft
z)(9fYyMxZ;&2X{*ZCTpD&d}FkDkkqv1--|{5cl;3aY|Bv^J3T5e%&cnt~Z6%S{4(P
z?i}E>J?c)<=b0ytgEZqb{C*aI(-d={n|TR~-eL;9HYw2Wb8gF`hpuIx(v@gb@>J;2
zWhlIKYk;*N98ZJ6Jgj>JnX6Pq8ZYc3MhBl`zWFSE%&r(5?glf(>0i-5crrS=j)h5n
z4Kht(Fl~AJ5|(;QLDkfqeB!CFe{xP{syOSflU>TkEr{iLFO+$*=p1*A`3ciqJ>b!t
zB;KX&HvJMci5e`{#8=}dVbS4iI$E4NE`N7{+q~71<)`ikyI>;S-mMF*-nD^SEA0lK
zRX6Fz$IiU-rm+w|@deou5e|05n?AIC4U$ha;9r!E$6B<hqO}?KJiCPNj1GaZQ-d+X
zU%X~kU1*V;8NE{6o45b4p;wJoz~kHcv~PzYud-Y~MyqC$kE^t}eQ_SRy|~9#3=rQp
zTSonp&+`ew_wg0JkDTZ;HwxdUhwy!ph3|7!_&(o+@3TVqKFfsflO%kfg+I7NE!d4M
z6TXj;@O?6c@3UX{KFz}S>1`|ePvQFv6~0f6@O_>M-zQM`J{7|EnJLUnbzx?@=-lBO
zgqc|`%*=<v%q$dU<^^G9F8t5T+#$@&I$>s73p4YWFf&z!nfX7PFYG@vbEYseF(s0A
z2s874Heag5=1ZI~Gpqj(ZN8k&RA8L~%HYFx0!`6zWM}w%++<KnxXDduey|b>{Jz1*
z_B${uc03+YNH2WQ5DaUM*wD|%W)lO2oA}9JkM|3mM><rLQ3?Ebx>5nt<+0q@<flX>
zCIL<SL;ug*`Ed;%BJ=T*g9k@85R+~fC94#Z*xoyTB-eYWLebuI(wbxnzg)Y(7qv(h
zmR(&i&n%BX{R(j}WS``Ryed{@yJ5@J{Sw8(p{!tA5>8TiFRS`ehM6k{NE%Bnv8_k)
zWqn)qY27?ezQ6hryHa_d7}R&BIpiFjxyyo9T#BX6Zz|B#xD8u3&jPtM6Znqw7Gn4G
zIdsh^B)S!I_}r{_#OvE?x(Pz*`q81dTly4B?Lwf|ayhGc!*E@eGko4KkQTg>@GsFO
z&?lja9cxyh&Sf1qs#K274b<U2Q*Oianfv%SVT3rizk=>siMVOcM%uA+65jl?7!Mn3
z^DmuZsBWVU?~${PW`7Xf=9j*BK0I7FalQDTpndQ!z8g%vCzIJ4$fIFeAYIiX8KK;t
zb{%~VKE#}XtJyobq@)=ZZa&E)J=RcF?YVT=)AeZZOPr^gcZxkAyQx#$PAKt~NK@a>
z<t^=N=(3%IkoQTV0X447|G{*+S?WX9^f&?omz7}GPP*LtQdZ%*X^Xkt>5DM>#vtzD
zx{GW!Od!WSSHPsX^0eS;KXTe2Q1<$-A(V^<csEyUi}ibgZJsvt$C6;uc*+G1D{D%o
z?(T%j{;$b4c_MLh+=1Ii`tX6`9K*S`hotEBU|!Krm-dx&p{w>~z;@|tnBC(z|8l-r
z(rIK3-10BLhVpF~=PS-6mR*7cV?!nF$HWfH>u0cPS{i#-6$<9w2idBu9I;s@BLjUB
zA)xdFEK2A_kB_FPH+M7!wZ}u-#XPjS8vxDvg!R35PEylr0b6q|o^8!%%&X4DHnlNT
zbkk#Tt!X0*+c|>dXbmUEUCY_46Y`MuttXxGI#G5y-H8RPX_B>$TFQEAYT~h#lXzO=
zXk5PV6Fxlog)HlB!wrw8fmy(AaP9wvWD!cP*OtPn5LL<NsR!VDVU;+an*)lyq;M#$
z0L`3rSj)sV2)8oDlti(SA+utYS65<!{1kG@-wICt4ui}k?}2#ik`1UoM@n-?W1mH%
zWw+LDhve}O@yGOO(AnmV?DG9Bvi;8s+2l9cNOJRKSr+PWyKj-i^zm>k8xc*;XvxFu
zA?{eZFcZCe%gOdVuOw^7ES9`Td5886dYBp=O){>=OZHR@E$p8;4{K_=!n6Z5_~F@S
zeE&2a-_|#ivBgVSzYmr)-|`C^-tZ9<7rtOCRr1K&jY@FEPJ!F#dqct5{ZKR19BU$P
z$_ASMA|<C%$;0AenE7S_E`Rh$lAfnScBKC%S6;N?l0oIHe*uBA>ryiA`e~Nc$p_;k
z+rc6E30~T0Bq@-e&ujwgiMDSjq3fQq%xNJKTj_N2>|ri$bj~FwVva*t-T-z;s!7fn
zl&~LnZb;-kdvd?sSx{!vgA59agUaVKVEa=!>gRt9@?UnR3%icx3ipzk-ppJYCF_b^
z-dD5GKab(haS6CxRmTzWqxj%K>g2$>m2A$wN${wBG5YlC1W$8LFu(ddyQgQo;YP<C
zmboH=)fqMrSKBO!?yPOXlS-0hmQKXKN7ZrnG$c9;Ggv4+iu<Z&qJfhx3-MNy)#!%H
z-h?WHr`vPb5g$bya=Q@sjNVXLe_WhnJkAo9?j>H9ZuE=iN77<4Rou1r0CO?`6DF(j
zSGBWY+?*QnXSg<;J^P5%Jh+92UBcM0A_K&2?+Pchj%Vu!?1A7_{orbt3f$>rfNSj&
z;3AKKrws6Xojxq-$Ry9T_mGRjs!-nN+dtW3S=@c3taZg+Y#aF=;ZOv|elW(Ra&k0Z
zIg~$)j^k-3oN2_x0X*;V4<4<z1)XMyZM{E5++>3cuBzOWJ%2rx7H(WDy`J({HaY1M
zf8?)5A82)x8XJzI&Oe70wVZkXPu_R`#e4gkD_vlo@O|D2-{*|*eQpTfXX}5y&u-!S
zTo=BNlkk0x3g732@O>5v-zRbILgprXA9vyVJQu!?^?$xkhwy#+3*V<j_&%$J?=wjF
zKC^}I)AfIunW@6eye!PjRl>}CD$GnzVP^LGUz;zJg_&6<%*?LB%rrW%8x{*QGgp|I
zp2E!hEzHb3VP<|4X6DKNHecZR9Z8`uGylc!>R<ef?N@s8-hRH|{b?Lo*ym`$mQ~Y8
zN$p!%a=-bo$xR6=49|etn1q7nw<Qom=D~tlR%Bm4J6fRhhH>$oGOv#{AGg;PPlZ^E
zO{Z(*QIR@d>9rRY)QrL<21nr4#eef%?O*(^4i}8Y^RHt`QRXhmCZmRPkKAJ=7WRfR
zjq?3OS!P3~SiB|+W?VYgnDB?_nC>S72P~HK-MJRtD)eQyBU8!H>SY+!IgEKsU(Q}^
z6!&AZ?D5d03*>c6Fbli$h)8_TvSlMi!;)?)<daFYZ0d&V_{TkmoE=gQKI>ihE|+tX
z&tV5p*=;GCS$P7Mzg+>-erzBoyqYCa^+sYf-;$5$wgY$W*eUtEX8`C{CsG=##<tzD
zCljvh1nUiDY^qr>>s%~cJuiKnJHZJTCJhlg*aM*7_Gso9egk`*y+FQRn1m`T3ehRs
zPk6-1WI^>P@*rDVW-MJ%xMgS-ygE98xL8EtAEhL`Fs>TqO*cTXMlbezv6M+=t#HWp
z@juz^GpY#3VLNN_Lj5DS;IF{1X{pkN=qmW)Pz~L$q@$0bJT0G-#xJbWquZXi(3e5M
zsC^<GmGfre#G`%b(^+$f#+_Sq5wql;x|TTPs5Ux|-$VoEoCZ_p?~pq|f)coaqYJfQ
zmbJUsBDSF~zW77KhYK)JBOaCSqoi;}4w5&`Wc`q?RM9sUdKGHW*R2z%3Gu}I56j4!
zR&P7&zH)T0)fBwf@t0@^37^?@IjDIt^5k_QzSIBBzO9Xgm1A~tzU>~d9km;;+{j0R
zv7@nOej}bAexJ?N*@r~9{K;L+X}MJa43aN{rtV5Sys?0PA9s+QGL8c0r6XA9j0>{E
zO_O1TTu*v`WG`wt@h7oMP=LpmufqBYUr?Qt%)IUZHZ|o#e8E=cKL5Pr<FEDjd$%|b
zF*a5b(rXFy?a-$BiGjr9-8s@W?hD+pImr|=V(_r((SrIp2chohN|vT*3q95L7n*ra
zAbqNOfLccgW(YfmMowTxSI?5}p7j#Db-i(?d8oweS2$Vk)GAxlTv9l)bw6Hluf&tf
zWaRJkaPoW8R#;T&PDsB$cH7cY*!T&`<oPBQX!G`z*;qW1bb0xa7??Pa$2F0%hLMx(
zR$jCuo}Zi9-SIV&^Y_Jh&mN{Q{*#pKHU1*GbW8(H?*EmQeSXLkKBtn>6f^91Ll1oa
zgvriVuV%%+N1$<iDT&>`h;(l^#Fu@)%RE13k>BQ}GV7-gB&1*iQyF6l?^{Ah+Spc<
z8Qvu~`F<=n6X%4D{b83{FtjcBOpg4HWqP|N5&Yr`-)+~Ds?bMvTg2agtkX$o=bObQ
zVHThykEvMAhg`eIOvkMsu09=vduP^@3WIpboo(aLa@$@UeZm4}{XwR3?I}F34aI#E
zb@<3*M27E&a*Zo{aG^mhTkzAA_NfJMar*)jjWkHW+f?!(|1~~-oQ;{g9Wg1g2IhA<
z1S;zC<g>preO<0kbM8!(Rn7Fpn%mkqwZsP!HcQa=i#^M?OdzeMx@_%<;TYzTC8=E+
z1sBWe*@SL`;I8{F@P77ArWZC7o0cE8i_KQYTlr3`x$YY|wRAO_7k`&c`+G-nq%cgD
z{xh9@^4wuJFPsxG>H&2fD+=pgI`F8+QS6|pDXZ%e4W0XsgC7;%ko@8<IqH;)Z!&x^
zu~R7g7-fcPzc#|6_nWcQ<ty$InJ$0%gus}Q7%<R-Rmjeg(*v)uT*VM#t6s+@O?U{G
zSGlmLEXHaRJ{C@HTY!ZDmh5QTk-{<VZurPilVnfz#4RQvu%N+{>8RTYOErijWoAOz
zXNHcpIq0k!42o};K++drIX`!zYotHv=W7qYegE1Ww^<KOt1WS2@IG)8`PE{PU;U3y
z?Jx2x4Uu0ZiTvt@$giSBezjEOS6fAXl`8V9Ya+keEqv<D|K(RzBEO3GFTeU9pZcE2
zubf4G)hP0-W|3cQ68RO1{L0TvoI2FKPbX-NF6yVYl&=|ljrQ{krSlhkhkcf6{6X$s
zYLs>ro#xp{KhEk#n<}R8)jdAb#nJIxJ>G{Zj74yt)mvJcJD)1}^rFAA67lMfZL}mI
zh<gXkrD>bb@=>Bo-7C6OOOIgm6kY0K(WTB2U8;@fQcFaadRla;g`!K{D7w`2yjC14
zy43pU*BC6i)a9Z}O%h#dAJL_ni!SxE=u&5kE;U1RscE81-Sc0U`Y%3f|F8UvXR~7|
zajxgDJ9g0?rNg-Wi?ejl`!#g+p&M+7@mVNqUr8h6r?b3=f63j2!su8(iN5wX=f1x+
zxJ|t&d3moC#a0)!J@gD$bsf!8<u%~l=LlAbdtvXyDB3yn=sz)?bX4TIX2NYamFy;6
z&}SBpS07(w_Mr;9o#bRxU=9yddByw>EF_)_W2r%V1)R6~fMdUHqWKry=(zG!vgh<g
z_I11_FFfEz*SVae)Av+i>DUN9V@sekIeTkSTX4uf=^zy+t59!Av~+8VqWy;H8GO-$
z#-a$Nm?Dc05BRe)Ez+kGzthC_-CV17m-NhcCF%Sv8N4bpyl92<X=&8j4EnA*o(A52
zP3ONJB7LKLm9LUl1gq@}rMHxw?ej<06`5R5mO9GT;ehvd@WLu()ZZ9@Z$_5lQ4KR#
z(9azvSy=IvvKCxspbc``omkA_p0F&<8En6)&<qxViY47}N?Vm||A^x_{7Sy$`R_;)
zyUmZOUtWw`1089`vH;v?zaIlj7l36*I;a|t!s>8iOniP5gU!Wv^m*&hJs}+3YDW<}
z*Y99=;5d_;I~$*_8zp)A+ZaxqH^is@Ymp8;E4x}a9Cuz&z??1FOt<?^Sf+kfmONp%
zZ2iU!7~SxdY*?&E2b`ASyt+_0z4#`!g$}3FcNwAgI&XNh*phx<@f&ya-U}8rL2%wA
zh>q@kj&7T+3!~bCB+hv`L`U->^gnD1_a<ty3Ht_t+ecf>9%Dm4t#oF8%HF}>egEdn
z#7h323~(uj&!tA>*<)8!9Dk6mi*qLDs!y<0&n$VnIOiX>gF@I`Q@X%Ijuu^d0lV{8
zKxWlZoOh@@9*GGeXWEa^JyY$(hMhkx_ngQ=vqOo>Wlc1>TnwcL#YSJ}MfBFXbfz4l
z&Oc;tM*WH>5MZ;P+Fj@d=VXDB5ixV=`^m<<b%q9bP3p{RPpC>e%(GE%cMEPejAXsm
z9i?V*V_>}JHP$)blV2SEv+&tsMLK)NGn{-Z5!4T5Bh?R~A6#AdMtT@Evj4)3ua<OQ
z+XPfukPIEuy@~#!X?)i_ZMy$@4NIMO7MI`bN;85#g5R5-wCB<qFmpS{wy_1QG^Z7Y
zmgGT?EmbVnFN*hR+Xd&UwfF<kzZ#XA(eJB*pzFpyT;X9IO!dx}8G2>HX4MX|QC^MS
zUtfw|PQvhvv48@_g}A<0iAn}OCSP+C*&xSsI5$k3pmNWY*ew|ci-dph=ZFO^f7lD!
zw#4A8+dKa!r<Fy46unBcak<tY>N|cIukDloT-pTN4}FBofA#sxioeXL>ugeeHygF9
z)ws+h56nMQ;NZBuP`vX6NxQua77QsOJ?s<MS%(ak)?$Vq<Ss##>N2JnOzG^Lmsoi=
zoU!s-X!_g{!^#g8*f!`hi!>uRpS1@%%|FUilfvn7*Av*j`5CjYjR&WLnegELNqn|*
zCMlm4kK_9=$=?A=aJ^E2@?TSV@_7p|EQrMG?cy%+mr?jBdL*-$^&1KX&4MS_?xXx(
zbuw&}Hw3&4huaC1zdmXPt^=~+S))9E_}~tt?^wv@hHRwQCM7_^AzQB9%~Ra_FrYbe
zMzGCU4*cEiy<qmKFI-CshW34CFh*;(<f76D*lzg|I?SzcjM_wa==wrpabHWgGh0wc
zZY7Kt=b64$jpCOyitwh@c`_$Pmk<42!#WQMh1Nfc)aHpgtsCqNzxO_sge`WF^;@gO
z<}{{}NXa=|ANdmNdRg)PpEPAki84}k{4wb?X)?5a*P#P)=CEEv<H#tZ&G@LU30iga
zaq-`Nw9(@*$}L)kjopI4_19?FmhS=a(*9ISHC;l6r=e@HI{iF<8VtR@9pAm_z=N+&
zL!RGabY>0Ua~Z`swrLo(>Mk2`V=g&z+=H6`e2B^M1^9t>V2p<rA8%ubMhC9J$oOt_
zNxtwiTnEdX2lfK{BOO?f5yz6A+o7%ULpU)a915<#WuXoZEN`$9E^fL*R6ZOfXIyWS
z8I`}W$i<K+Fmp6CsAO#+S73vJIX(93603)PG-O#Dn2q0q)9yB~lvVGT15qQJHFqQJ
zpNRUG7UQYFTk!OrJg<ts&Tc9=!1yp<bdTvoKWMIjXP@@keGvJTpUAI1i2N#7<X5*v
zezis9S8BqijurV;qsXruMShha@~bA1UriDD)e4bcjr-51E)n_F5RqS{37`6_$gd1V
ze$_1Ut5yH`)RRPh1=mzCVuUKjeG&VsIa{G$+)~;wY$vT<Z6MtfV*_yupHTzfQXa8n
zO3`qsIk!<7jODv@xYCb(Fu2=c`ZntmChpoL^{%Z#^;1XqjikY}L5V@XoJMYc*r3Sx
z&Pl#ibg5lLmpVr0|97dSqD#Fey3}8yOD*}YOP%}Q{$Ql&QY}T7n*ZPapo-{H_lqvo
ztn&mq<G=mE&!S8HpZ&qRqD%D_UF!Y+LtW~>corS&l1tvJ$xD}3D)P_A?xR<erpV<&
zP{Zvll;+Lj(?|z=9sP-YKN$h@r!vO9y31}wC}G94xpbTQMY8j(BaBEmg;Pc)Lf_?!
zA<@VZAMWa0)V3)Jaz`y=N%tQ9lQR>igI`g7>j6cdv2T&b9}}tJ;~n%+qmlIHF=uJ}
zHwCKuzMV%j8q;^tskn1-9ULpmp(ZY?d8;@ZVj^*(3qRFi#;hUIt^I~bbIXs=Sl@{}
z)wnBnJt)U(mXGAK7j>5Yi`Qd8N@7v(iH1dAa8B{?->$sK)RC$i^|JS}ESFwusHFjZ
zulcKW<<hD1j&qZ{rL>cgHeGU-bNfn1>FzOqC_8Uhv_Ed8z4?M`(o0Foy#I_{MLrvS
z_?=hIbkLTw(yFmFeC@46u-xhs7VKF@X8c-3(-fjf&1r4i+Zn*!QkPtbtPyARjb$DQ
zsiZo&k*x3e4ScNq`0G6*WaP|SvQ%RZqq9RXGWjF1i#8+1U+u8|(R>zP9gG2~r|{1<
zZEz7=J$(}QVCr@cUirs~-Em$8UF)pKTMd9!=QKh7Q4bQh@fHkk9Lv%wI>A^?b!s#C
zFIiXBPTZ!*$lgVvpowAl((fwKPvmGfZLQ?RyQk<iejm0y@Gl&*>^j+1+l5}&m;f2(
zfADPZRh%}dCw;lD4)Mtje#pfEigllpt7B$iRE-6<4LgVr_r=f`qu#-zh!PTVaxh&}
z>Pfu1yaVG1KkUEFjJ>G!W(iZAdD8ojn7Q~aZnT~APxAJkYaLkA-3ah2kAxm?r=!f}
z3|08%1h0;Zvk^Yic#T6gM7_Dniu!+r)e}N!V5e-DImgg$+BbJRlKlZLJF3w&Q*@;Z
zEz@vW;7(c}kc^*g65x)9j;ztKqL9?q%fg*6!a*Bh;pDWUU2YI>+8)G1ixMPMSD&A7
znuKRgb&`fQ1fZe$ZDO?P7jd3m0kO%+)Z?Qv_uG7!85bt7+vk+T9ief&;I=y5w@sf0
zyB#In!#n7i6&6L-pH*q+rYL5vn@QdL)cBBe1$cNlot{mNruha1(Dlpo!nzxWao`Ir
z7$3Kgel7_Vo1Sav(a#YiuyZwh_%sYg)%8X<K90Uw-<hmjX}}kbU&l6gn?UbcUV@5{
z)2KLHoB!3zAaSLkuyJH>v@w1~v}Xv{F2xcqP8*0H>}}}L=bfmr^rB?+Ee$$IoPE>o
z;Y(+9c*3O<vvJvq!EpG15o|i4PZPFV@STS7FmcIa*fF;OzfbE&Hz<$f3K<J%_&#sA
zdFK-5SR7@o*PgH`TZ&QJZW|6eWK0$6%TVEiJxyC;f>(GW4&2m<Cl((k=a2NFd-sQ-
zgM^XM9X1#!{RK|J_i<fZ05R?82FZ$fkl|b|$sTrFWWA~6xylUk?U@6HRE>dmPfxR}
z-uC?XCnFqvN{MgnwT#c`at%_Ni}3i=Ka$IRwCSl5L*A#qCf&2U7txO<Y<JdLHbFgy
zozvb&^urp-k?J9E-)B2n@W6o{s~y2VpIRcjnP4Ui?{bXqUyXVdEBNlY(cm2t1STKU
z`QeUh;9pz?pTfj*-{TQ1G4&oKy-FcHy@%kF?0nXCdOtpQ+Gdyd{3qU<>jNQYO392<
z-SLTfEAhXlM9<QZe1=I4%5D!JAs5B%>$fs-ws#1chhHYExFOY(dqb8DEg`D0p`=ON
zJ8iza8y*h&0!fbsK;Dcw;PNdF(=r1|zz=<h`uUu^EXl?2u!#`3Y7Sd5Hj|Ys+|AnU
zd(vHH!Y?>)g-tW_VcJFmoW15JQ_-^n%Zz>S*5WHf?~a3YQ-6~W-ZL;h%Mt>^7qA^$
zFT>utSnzY5Qn;Z0BPiF5WCL6yA+~P|^klclA+Muk^P4A-kl6seO`>qlLyn(9KjGyC
z?If#316=1F#m`B%KqcoS^Ba2_M~*2(h0ry)<)J%j)I1^weNy1&`%}b6_6_gPkcW4_
zI`O)Xd8E1d2+Vrf72h8##wm6lIQV@qd~VW#`!0WQh{_c_3Y}?GN-&Hxw8HoI9O=eu
zTGS;f8dN@4ka0_DWuXdDsN2{Jy7`=gG1{N7Uw$e?&z(d@2X&)|-Spw`iBp(nTuC<c
z*+vR%)cM+Dt>mDsD=bWs!vHfYl&;KVmxNEfRru6)!l!;EeCqqcr`Ag!$)*UOdYJI3
z2MeECU-;C1!lyngd}=@8Q;!!ub+quQpNKp4y@gM0Eqv;-qINhXeCo%-r|v0y>LtRb
zelC1!E8$Zcc<$!g(iEhxZ<-cm6b|P%r|L<Ab>-m0sA%f3Fr9xNFoLIVIL93<zS4NL
z8{|g(D^5=a(=(T6(0Mo2>HVd9xSgQ`?eF>n3sWko<aTH2qVD&o=grf6cf~;JQlMF+
zDY{fvvVry#`-2BXms&2m)Fq-zZ4q7SMA4-hi7xfQ+TlD+><_LLUFtm1rFIcrYJbtC
z&Jtbf@&EP*<>Fr0m5MI)yXaDniY`@O><?n=9ZCK_>QcXdEW_B~fq3`mQtr3oB*dt<
z5KPX-H|cI{yMZcg4H|)xo|QC7hfB_q4_G&Q1vE$fD%^HZhSeEiOt-=pCSSIJS2I3I
z)~>PvY36FY+;bZ$|4M<y1rxF6Z7Ycvn=ebn=F4BP`O?rN?jVWHmlb03B~om@l!?uk
z-;ulFr(P%N)~A0lRBXPS6`L<-#O6!2*nAoK-{y-;Mk?5e&6h48W6?!yzIZgIaxOMs
za>V9KC$agWzF!^MclM{gEy`SF)M}c)Bp9|hd?7gcAPw;BCXMjzg$F$Dv!pxy;mpXf
zXz->Oy{DAom*;Nur<EC-?6rgkx?iDxn$zfkX)ExxjX8hh=L2udj+2PBy?K~*`#*V3
zY+bEPgVra~eMU>^l>jYCs`p6hcBT{>95eZ?<$K@~b0{)8eT<tumQau1Be|o}QWoFj
zh1YUU!GcBe`C6-+d|a14T>hgKEvul?WxjKGSwbMuiYy~d>ujj$kJx{bC$7`{%H!+*
zNN;aXqMk#7s8`KPoW*jP+|)w4!}h$?rAK|y?R+iikuD%Lt19QO`rGp^DH5sa?aS2G
z;V&<LpI>zD(0P`$<N|g0s%B5G?txFky76m~=F$)1%-y}E1F7V72pcxaov8k~0(ZNd
z!IQ^?*%g;5day_wQ2ZKSMgC%E?O$WROk*Hr-`S<8UTpun8dfnNKvo!jT5?bM0&W_W
zCiyI%PIieibmWB^4pu87Yi~(#pN0nv3yUGQQ#poOsiBpFF)y?i4qRykntV72Dr)_)
zUVkGjo$wtL<wk&({6WaC{KWEp^`nNqPW03XYgApYLk}0q@n$V0+Fl;WMu`7Lh1EfL
z^DqI9XE{@h%;2XAUATpwHo5U^4Uhb$!Mqfg<ADiwVC0<z!|puBJ&vwi|KlUpB_fg)
zo9R)F<(}02TO#XtyAy|p0W_OC@R_64>B*-VH0N0;@jkMOEoocAc*0O|r?L(l2kX+@
zXaC08zVWI%#x3ka2Gt*i?Mn=~%eQhGbl(61H1ELmCPhvHTA<FfH-A){LZ2_sr9Gdo
zg@+nZIQ8r`JgTz`l)A5FE5?nG-uJm7ahjh;7f(&Y)mKcZ`|)ZrB;f=!UTMYL>g}lb
z)q|Uz91Z>MC&Snq_aOPI1dgPb@shy@*}DUW$t#nG&>k@b<CkX8yo3I<b?-Y|<*q{C
zoaHR7AQk7tS_|_plct`H;C_qx!k}@5^nUO2pwKdszqt^~UxaSp#*TRsenOcyZptPD
zI;HaI9t}{^yDvTRwKJW&ONAY%kmr5&=u*Ej6J9sy4Xl0GgI<ywN;m60$Ck+#*wneg
z%^B0vE`E^=FP`_shS-7hN|yt$$Zt4LEbNLwY8Gg;*#-BThDfY)ZZXv=L(uvN;QM(u
zx!fs4vZd?^k|l4U_dX@~u&z=TaB&z^PBvi5QF#(0my5EqS@JyYRVU1ixeclHcMEf)
zikQnhe>7Sz&Ou6}@MU2Z7N=O_$FPSO6C6a+vm04*@?Ub%x{dUBP%ksJ%)?W|{ouzq
zV|@2*HujI`PPg9dMf)#N!}1k<P*Lm)Mw@tw3+BV2p{@|u?FeTR<|IJ!dn0;*--IY?
z%gem(%Dk^eqH<UuYIba8`e;F)<je=s^#8bf^S2(p`2WAOskCU5v{06cw&%>5bEX}o
zsK^$PvV|<!MOoUTMM)(k(LzdSn{%c_i6}&=B(g6NiHMYZ=KcA+zSs5r3;N-Gx%@m=
z&*yXIoVnj0=XQ%DvX9(}^GsDT%DM_ATBnyTQfbDa|4!l<XK$EaxDwL0T<}$=W6a??
z19W6=KWF|!iS?Ur#@B$g=+&z@WE(S?Ov-XWb`rD6H*+3UEs2HgVInjyL7x~cFT?2v
zC!v74exlTL8=bAcfam{w2PZV7auL@ZSkF#9<ov#v9R2~=*EMU=?z(c=7BYw2YM#W>
z)6<D;P84x|FrCfum;uMnJImM>{3U<32s5mG@yL8k4DxJ>LfdpP`Ss{0m)n(0Hu+d%
zkB&I9=U^zVn-7t6ffnCsgHQ_P!k4lK&_4ZWazXJsCvNV{ni&5h)|(AThkFZ7xEet2
zF8D|8N1h~K^-b9>g?*$+r=2X3Sj1Pv_XER$S^V2{DY)sqFKcu06g>DR3*FpWOG4&q
zprs#_`6Y&1puuVtw$C7z)AVnI0RhT<nOq){So{gL%I1?&?~yov_dNus9~a(T&B@`=
zseJ0UVnWSGA@gwrv8(3sQ>hC?`kEX3W>mm=DojVV<x_EQ&OS2p$vA<X8IRs>NFXa>
z&my}wn%pxbeNuTz63(7AlH7QojU?2H(YDt^@QvWgu7CWOnV4z?*U&!1C`Xc?Wz&V6
z=Ov?=1r4~^U_V-GU5$EXjf5xbveA4jiQ~%bQ9{!}^#0i)w7XNB&9C`~8ozu;cXa+@
zYEloHU>Si91Vur5w<l5bcO@Vp97);tp>4BHlS6-EhGjl6KyQ?#TWJ@&eTE{Nq`!l=
zX%XBai?l7X4`r~DCNJ3LIosKM&sY3G8j>riQqc5d2cLY>kNr9^j5RpX#(LO$voDn2
zut(7ov_CDGy(%fkhiqrb*Ig_4_@_Do!{#S{S?~|`%ALcjNeim-G$2^;51tqNgCT-{
zu$kP#ZU3KtaKHn67W{+G(}iA8@DEP^&p&uv@DDyI)<NF{|KL=?Ke%JvY2+&S2j7Za
zMz(@~@T%VIVcC~Vy1A9hG?@TDwtgjAM}v9KhKX!;*=iKtEyk;#-i6`{>v3hC7jb$N
zj*1yqqU~YInpW}*K5>w*i8z7cMp+Z)ngYAKdj`2aBrrJU%A*GLDdeTn1a|H;BRZkA
zjl8vRVDAdfmz@GL(?(!s-W8aci~fJi%qIdfbCkf$JSs3V*9gqaFaJ4TLImeaB-KX#
z2+Yh6t%q4oU}nw~n3*A!O5~rw%={}bGhYeJ%#{K&b2u`x;kd?z<9o;hBU!$3^CnU*
zoJWkfEy1K1Jj1p<M+yGkNq*~|1<^An@plcAh)0noYWi8qmPp=VT%X=RiuU4!NX|e}
z&j>oA|Cjl@(~F<JvIN-{7IT|Vaj5I{WHSHm{$crD4aaBYwmJu|dd(1n-c$JAfl=t?
zB{T4@S_Mm`T*F1K<H+|%t+dWEFH#<J3r{=Y0;`?Ap_fagn6n?m(XqwT@cN}8aD7l8
zoK$1T^!-XC5>vmzySCkB^MZPzSGYIMips-w{=H;IPd5>hJw<HPE~0-Kc|<itk^EM7
zCRY>?IV3FxXPKGcNt-K4->xd8?=Tgm@Q2B>OIGN3TqJUsy_~EzGG=EjS%Kar7^BmB
z55ac}KB5ZqMZ!B}C0t}SmzQ=@;6vJ%kz}hvr25rgn8&Loh05#MJ#)8^`}y%mS;i2b
zbN-E$^ae=KqEOg&ZzR5*2-rKqEL_9lQRuR!F>2p47L^{MS?h#B5?7Fkwc9h<_8&BA
z_kV<<8<!4Co`@B~1@7`2_;R%;>pd!u#QmAZj@@5}BuhGx=9+WFggb={Zm;Gy2dA?S
z5@XnY>JL8kRm49!yAW4PPGTqb7L{zhhAd<EBoL*u_gH_wQu0A5id{e29666XiRMnx
z<_*^QlKUxvtdvhPye8I$`sZk~Z;tEod;a+o-S1n8L(wa)exf^BiuKw0?~>>>?MedQ
zE+aEm_M?w8o1ptpI8leY2n#ike84BPQ1q9m->76N@d#)?rV=rGHlu3niZ%cVsNC}%
z>J;5ZT2(Ci>C}PUPiXS@{)M6CcpM2Ye~p48r;w_#m(b5A`S4+$GW)KQ<vv9`z~{a@
zlYd8)h|5z&lyH9^r#exDX1zR#2CP?-wD@phz0!)W@Hz}5TkTQcvJm9A>>uOzHlKOa
z=?|#udYEZ6hSGf=oT<kG654FdSsHzWw#%kswM+t6kFUfTp-Na|=^3<l)m<DqZcN!h
z1s$$8=?(a5rAZt{PU1eMCBa+%f&-%_i|CrpKq>WKNT=!nR5MEGS!E?i{GE5?wcdCr
zW?F=7-tQq9QUB1r_TSLOs1YwbJG*TBpJe2DYa9}@okINg&LHZ|U0mXgcihI2JxsY*
z3VIuTiqX3{iKGg=%emVoa0*h((etFIaMIzCDBzMLyW&$3mMDn8aRoi(Vdg0^I{zdJ
zT`3Q<E0mEhq_MU`7O}=DsQgg`tcee2Iu1zVUDt2G3%lK!z;zkeLFy_lICvgE6!sdQ
zuB{_UlQm(CVJ%whxER@;p295Y=J6isMiN<&jb`WRBEMK4?EHNLmX7)epVjXtx|LSk
z^VTq?Oh=sa@yy06#M;R?g94cAbpZ~*UnpvQCkfQ-<Fb~v!YcvMC~fLEyv?^6(hf@K
zt(6LDvZJs>ZUlUM;W_vo6oP9;hBD?yElFpe5nOu78(tc71^h}%WULP;q592`xXch{
z9Nicg*fs|{|M-Wq-9EqxR#)Nm9T47Zp|Ih|1H?kYk;G^2C(eatg8N&KOmAF+rdZx7
zTRWd4R#JC4?={sh%*Bx?^g5x`Iv(eAgrj^HITZZpCk%FRL5D8gfcr)@mc|@a;)9m2
zM#=?=5O2SX-%3qD=gvtGg<0~%a&|QNlX(kaGfgtENQJzqo{644T#l-Wbdb)f$$XRF
zW~iN^$E7>jm)WgdK^9z!DE+;44nAmefk?jG&ODqthHQ|Sjaz*PmizgOTVI_i<Tyv+
z)vyZg%<sb(NDH$rSr<vq7IS>?hZI@oAB$>!<}h6|DskiGV%X+58g;$JaICgJnk(gj
zq)a(DRr(Zsn4iPlE!1b()t30}LM=2pdj^T|Rbmalr9h)c@8D459T<J;H*|e^j`>V3
zaiPW0!?K?5a(Txl)~B!w9%y4mCyUiky3CaTWy=S@He0R|I61%S?bzL%H}4WS#Gjyl
zV_&+CwQUO`?S4ntc|(PQqcx4yNf~5Ep|!+Unuqm&E%;-_kNKimb^O~DJARY>@?m-I
z4#!&Cm+pe!I7sud^$qxSwk&VrTZ?}N9wr0lGRYe5IJvk?7EQ6q#uI`w;3y+SDA~E3
zt?-fM?~Y3#D+Okhi~MYo@iLQb-R^^qsrCqS^N7*=9n2n?&7mJx_wzF_%hwCcOe`=n
z3j}7S<bTeW%K|g=ufWXI5SW<-<Gav1ftmS4aK2;;%*;swGxMOp%*+><nKuPyrh94*
zVg={R|1dNA1m{bzz|4FnFf+gZ=X^ONFf)havl@<Tq#l(dV`j&}Egu9PTKXmU66_!+
zS}pmY_daCK(NHulT@!bIZR1|uF+zRW{$%aKN>0lCA|5l?g~nUVV#mIXh3}|i{OIr~
z_T{~7q!&|-T5ldhht4lT{-z6ImH)T@HTy)xYv$N?)l~+(xQ)FZbTSV6&SG8NePp3S
zGFDW*1oQX0a5<UEoZ+g~@XMG?th9Bo?CbS+9A2P<u8$8#S?v`}a9TCdxl)RP*0sY;
zGUDjY@*8l}tvGn5E|v-VF`k%AZ-?qBQtU0oSLlAHA)+k~;pN{O;rT=tvY7iyVk?rM
z*xfC7w7xpKc;PX4++r<hP6)+;lMldecl)?`6;<f3cO`KSv>;NKYT#*biNtI<jVu2e
zk{nYt;-`L^wAEyzZAD3N`SJi{^hl1iE*H)nmUO^*d#w0xB%a&(W;rx;ID%$2t%Fxi
zZ^92+dvU;lXE37nI@;)Yk@0`gkAE0f3*6&dj0UfdA2fsrow(qV8S21Jg-_7yuETgZ
ze!B$*kz_||A<?Nhiqu}WlTR1ZP{zy0aEjyr+9N$4x%po}M<QnP=JOTVWpRsG`|K5D
zWQiD0#mD3OZyVA4vHjSzu$682JdR{!S0L*xVI*0k&Q2Y>57ozu<0tio{NB}r*kGjz
zYLop;?myf{TyL#_a}ve)<W5bpsZfN(Rv$nS?VH%Kdw1~vzKKYk{uXlQ)q29FjKd{E
zA7QfYet5I-B#aFjM0@p&@yt&qsJrPnsp*}B94DotpME1r{lE`q>kkP8+`Y&NYj^a+
zWGSO|PZ>Wu+KxNkoxs0&AIQ(Yjr3RTL=P2CqK+w_$;PKPsQKm_+#_%W9^LULie{4h
z?h*-f%;^eATRojzyn7ItRAiu?ld{ORb#<6FQbeV*&T$tN8ko*lW8!xu5vE3eB`>F*
zf(M$u!WSFD;F^=ctX8iKFSGGG*H}fvk1kzh_HW(D8Q-61{0IZsHIxmL652U`kk57H
z{{f9<@g!L;7n<s(L6yTQ+=r8uWizcTh^gHM&~(@i2g)RYj=Xz#p2}#hXafz~H$Pye
zuw~eJ?*Z=P`ML1(BQY4BRK$4Qvfw^WUxKfF(#5G(@_4R(3s@`f#@#H`BqHlEaMY6$
z_#lXe8KK!&g=6rTBT`5;^&JWP^ApOo3I4i+dT4!-9w&J@6PDgc#2M!C@XE<kq@ZpS
zDLOip6e)})<%{D5zeg=Dk+FqO2I}CSH8)VVc_NII*n=WJ29e?et4OjiyE;bJi_xiC
zN(`QEK^>Y#xX*huvHEccXIBvH`F$a|xhx#dp0Ek|^5HmJ<pv({Spm-MS0wz>3M@V0
z34R>rMLzvq&sfA9#U^bd(TCC5*y`XhIOU}qO8&PDskqO;Nh7x4&;H4<BkCl)sqhS!
zkvMccNu8Vcp&FF>Nuu0m!nt1amBRa^D%`fZm<yRV7V=@;SpWG2hOD`aC+{-H$-8^-
zvypbBpx%a@(tL^|e>~-Cla!FPdMj+LQQ#Hzf8yoBIhi*b4Qb=TDE#M46L&5C1e8>8
z<XjUCIP(>2FeRqWOfHIs$6_|X#DpxSRmB)z`g;lt7FCt)rz*?p{>~TNC6}>*8w-89
zDJH9SE1r>aiV19*SElsd3-7XDfR{`UMQ={?5I*RJ2PMxlj+uflx=EENopC3ZF3jbw
zAGIy<4mg7hHaMfd^OTT`^D=Tb=L~w8EXUcN{Q<>}+mVw(rI}F>f$n|OBBtn2Y39T0
zur+rAdKIgJ-s(Pr{^J%ASDmrgx_tsU7aD_o)?a`-tGi(Ib&e}5n1EVesdD1kZ@ELR
zZ-JZ3L+BAo@C(7ym^l`4{mBOS?6e%F$L%{D*!l;IzEX>s_l<bv^}}UFYR*u8`xq2F
zs-Nqv*~+~T@~c-uel<?Wua*n>RgI8eMGE<qw2)s#3i(x)kYBk9`4txOD@`H4`cU!z
z@~dhgzd9o1S3t<Gb_w}ajgVi(3Hj9vA-`%D@~h6#M)YAJzcM%Z#12Z$<xfgIMM`7u
zLaiKQT(G{HwJ$ru9(rERt95s?#dBKl4c}+1m8Jz7`brJagPv@WS0emwbb`Eg5pr(T
zCv45{CFuILDA=>ej&TaHWygd~<vpdw!cU%6C`RZ~J%ldxwa}$L7P{08LYKNx=u*Rl
zF11qVQvV2DYQNB>#{Ji&UKYC48$y@bB6O+RLYL|Q)7Y6pm&yuVYTvg6;_+XXdilRD
zRY~YlhvV7)zvUWJMmM4TITz6bl|s;=DZ^(kC}s`(Q^>~gH;Bf!5oGnEUwGlarLaJ9
zBfI+YK4MO%p&I32qPbp<uio?nT8)xMQo)zdpFkOQFn%smJiC!}*G)#};|s{|=IO|7
z@Zzw1AI{tK_?TiP{)l)3KO#`XTl?(c$8=S(eoGGVlUKU1CDr?h$HhyiT=f%@{xE{=
zDZ0l7pSjNN+ZMyWZhz07bxh!IU0=Yi^BB+HDjnj@Q?H=DCNuu`<wmx1?hU@|i%R)$
zeCL*ftIHQ-_j0E^#qvMp1^f!-7Ct5DkEOJNHfz_fYI(}SfL-9O$4(d{!)hP?&IX*6
zu+nUMVEOoRa=G#IYnCg@`+2)}`j)W=#4Bo*q$@bE9bNXeEbn&7vh+z;t@!ezw%qBn
zDqdV%1LZR&l9j6x%lvX>;SSMXX6(WxL`i}N+MWAwj8iK!b=v}zGU*r&IG6+%Y^lJN
z=uO$NpJ(7aiEU);cXzVlr~>S3If`UYzc1UjFNJCUTUusyFdE%SIu1WcU4%zW#*yFh
z)9_fdpR<+<MUNV9VyzeT+<;dR)ZQb7_1fp4HUX;g?C?=2Giowaf3<+5%-DsDWu~E(
z-E-jjFVpe-++*azpes~A(S{eT)`1N@mzh+xIcS^t2X0ovVG^H}i)J;;^WFVk<Xp53
z`F-phDcJ9gJ)aryj}^30jfy?1aytl0%bF176?r6fAOmfGG=z0zf0C~)DgwtsgOAGG
z%Nker<GDBkTO@j-u`A?J+k_mhWc|Ehc{h0Vg+>y77hwL!9;VbKjvT7AU>~pcB@y+B
zcyekNar-bvz!Lr8#%#+%Ll#=>9Nqh<G$jdLy{5#cu2=vayY-PAi&ztGHlsSK8@VrP
z<s5J7q7uJU?)KnQ<glz4d2YK65BrPr#l87>$)yBTH+UR%+1$n-Q@T-Ky%ujQ8&4|#
zIH46a!R+U^$B4$C8|3CI1O9TP3ktCKf>VP|AbFoT<jFEqfv5iro;R*V%;h!)rCufr
z0#%S-qdnwro#fYsD6j*CQ^*+UF?>el71TYw4Hp?T;59O`Y;b5QoOommnKp2qC??tQ
zPnRIJzFvi0aHIhKJNXK;XJ2u<-HxGv1N!{ILviGSy)K`*(2oDS;tVphn#|V66%spR
z4>E0@Keib<LA*^C;Rq8+R4q94?%?;(ZDT4i(o*Bc9d6*HZX7_xzj>^$Jfch(OCSf0
zoyvBey8vsxu0%_ObNaT|W6**zQ<41dG3bYwEU`M1K=xQpB2OG=l6h-0P}4qlxT0(;
zGxrq-Rn*VmtAgkGMDlUmn4FEdFLH3%$0sEX2F1v`{5TPn&LLNB8j^6myQRNnSCM=Z
zFM<TmpY1Fe*4jf2>O46ooYBulQWg*4oq1toVWKe^Q5^}jkA5IVR}Z5v5_<50gewfc
zpNjWv>_bmNjY(JUQPkEj01vGfq4A9haOhtxPU*78H3Qn@)(w^%zc7vMHC~T`C-`$w
zMo#$Pqeqw%_GSMBR+ambi-_FSMQq$*T$E*vH@%L*2bqPav&9~rzcd3nzFdpHPTGr$
z8`g4S*>~XG3)|6(ySw1atp!;1aRRPR<528MfwhZnlF$hY$=Bm;I3mg(>z@y26gQp)
zCfs*ik?V`j*&jm7OgF$$6OW_aBe$X_AJt&KX9GGewE!(yy%jgrC!>!?9f{=AE2!}0
zW^{Se5~84R9d+h^LR}gPNHTRl;s)E`p52=e-~19U{cc9qY`cra#8;7oGX`+P^DwAu
zKL-67=R%%GmE-$;U$C6XLNZNGghqTm53PF|7~AS^sI%_?lKY!Vs^fKez4bw)Nz4pO
z$KF7zI!p1_wqn8mUW96JAjsPmK#os8im!Oy#`KvI*wHrv7WZhNV_jMJh0}Ciufzxb
zF#g13U1}s2{kw>+O%K^|R)ts!XGP4^9MO#5<M<ucNSHMnK|IDfBK5ivME?FN5<&|;
z=s5~}g6SZ9U*JeipU=dyqtnPC-6_0Ve?IETC_rZ&3`yruI<bpRLtBDRle&m$Twrq_
z$Spqw4=bj_l!6$p%hG~$yEUUtIlGa~V?(m-^K5i$-zef25Q>&QcPDR>lAt1=jQ@2I
zoRw(?NB`=>SGJFZ;(R53c(xHbp505dPPyQ>LVoq8t^vOh@~f>ve)UVpuYL&m)kYz|
z;)MKal8|2=6Y{HxLVhLjUw&0B<X7H8el=IfuTBd2)dL~F$`JCarZU7B2>I1HA;0Pt
z@+&(bzuF?FNPaZmB!lu(%0plk+rL_utrU}E=lxm9hMJflU-P-FdEPr#$><FK+*p`H
zlRV102ks|oTMc=mnUQQ+$1PSvH_7t&C|gUlbwBw@-Fax`$05Ep>OMYJF}wVaaV`5l
z{=q(>OWpFHfAGH0rEU_sR3pJZxcGV%-}+yd8vCDrP)ghh%@F*9VM3Q0Ep(|vLYMkp
z=u&HiE_I5~r3MLI>PTIEc2MY2JB2RQ>wi&~Ivo3w{QdwI72hPm3DWGJUM0fmk7MT+
zJ|*0=>t%|TVr=J~6cjLREYH@;^J=9|Nb>c0CNo70oqW{CHBD7S&0^l<?<{9>sJEpo
z{B=Ijyk$V<m$b3xhZ>>hsxm%zo6E4QwN<b3`Db(Y@(LM?(8KXfL^scbPb8c9L!%+9
z6W59)mi}QYCK<r`=gGX2TMyBQ60@92Fp0OF&(~k5W%vK>g))tAS+|%`MDwW(`!ClR
zIfP7RqqodQ&O-_O^FPamB@6!?@{#|0CYqOgtH#QjR<f%%p5`MSuLRH2FO}=;v9;Vd
zwujAmbcvOh8Cibh%r47ET`rc|3oH3qMd<>USk<y9y^v@8FIc|*chYh`-OaLtv-z{;
z``C@^0@>uMqVjt8bn<YAHgR5+hV6}&;q5+a(t6ShzdZ4d9Mh6u>qq;*y@y_-6wy=M
zT9`p1b}k?bQdS|s9f5$dkm(GCpm;AuB)+Z<YQ0M!x&=Y7U~2;0px+Nu?d*xl$*sVC
zO$?OU;D;5NU>s~U8!;1lq3P0M&U?xvJomg4s-LhPH80X*9@ss>aIFHycQ=8iCRcLu
zmoIwwxeXpOj3LsNx_DgH1Gr^w2`&_!C%e7Yqk{=AaL4>TF#3)Hsxh97eXg9~mqp!1
z-k^b`q#h>KDHGYaq-bQ_1c=pVo^Tqm%$nd@BBx$SR(rh09#@7?Fj+@(`=+70e*$sc
z#L2MhnhATPML4(fZ35Hod=*WZevSkT{K6yWAQpaf7?$7eGv^Ob_Ff6(Hy=m)JS@l}
zA7NhmYX`YrwHPgdHvE{gquCeFj$-SEaWJephF#R+hrABv;4gU@WVyyXv_3_S%^G;j
zei6DjiI<PjHa|0xZ=;2u{fb1d2lGhY`a*OgXg@pFG>QBSyGHEZw2)H?-YELRMYJyX
z6`7WA%177kBX3kq_)yck=wodrvtiC;Bv$ztmu0$h6JK0LRo6PmhUZ(@97#hIIPo?a
z*B%6Ik4V87U4%9qnMDqcvEVD@Bgzb^FC;{96xK>u!tbkBM-JYrkj}w+a;@kt+FUi6
z1r`V35gWm6^2U+zT|Ne>x!fZ9vUz0I_H<;g;>1+Sx|2f|xv1^&cs5Z-@GC}2lQ_xo
zY{a5fe2Ziq%(-Ze?8ZBzb7^YG#!Hxua5sk)=7DHJ=RZ=r_&RBeo{rMu=M%7LBsw^0
z6k_woBPD%ta{rycjsJI_vv<-U?-$ge>gX(}xo|fev-2t%>(vA=TQ#FM+xwvFsS&(^
zyzso+_lGpD)ZoWhe<q=Kj-i%i9=P*VF+8-ABNMmr<j4{;JWf_zaO^cendxdMPH?F$
zeiw_b%gUlXtFGV{ua#`+vV2%=Ium2DGu$@^d9*IP7@hd%MI?Sue8R#l?4O3S@WfCT
zjHB<O73Xru#vtL{FSMRf{aXuFG-aWF)G?CuZy(w%9!bX0vzX4>U1Z<Z9$`=5h|Hr;
z<ASmX<gAxdRyR_H+{#!@?Cmzd<~DVHi;=kSo-W0*%D<7^n^fExh_Lc<1)`6Zq9tB8
z$v5jI_|vC3c(Q*Bc6p{sTn5z80W%6dlo&^je!YdBC(kFPbEd+k-~miVXbCQyR&eU=
z5%MZK5}PGN5}6<|;QQH^yxhQ*Yzj?8(*<sOjy{JEh31f;HGuTY{>(VeX@*`hVbJ3C
zV{~a^2}yj`13L{{VQsO{u}}U0jSsqzGnZVUh3<PeO+^KJ2aHFvc4lDBOPAp5_+!XO
zVj&WL+mB_`SL1;RN8oI+Bc!=d1;!VyAe$ntAcu7$Q14wuLR)8Yna`bxc}6(=VEh@s
zUAdTCe3pvc)TWb_#E9HqQ_W-_Xu+kALg2g1?dbHU11RUC3z5g3*gIB<{3@{|7OUUk
zNXJm3^1y+-z8a1~g5*iT0!cD2%C;<_!48dfh({R)$I<)o8PMr`0vVxRhh7)iK=nE6
zi9)kFN&r)lP4-hz5GzL_{O`dzcVyUAl8tzY(JV51WC~i8vzY8X&<lV0Het`jV^HK6
zq3ctbMy7T>gHIX{!=aI*c$<SMke5?L%a=!^`>oT^rR#AdFW(fB<O00K=N0a|5Dedq
zF)Qo7S%Xu%Ovr<G$+(%%WJ(3^i<{$N^i$$08SGa_>u>gR&6cX{&DKcPY4!@VVyra@
zFFFf%jjShowB$(Gh)Q90y9fRjO+bg&{zJ~K`(gF@69iqI%x2n*W{>rlvX*HB<i>79
zHi>B>&)s3<l<-dVPk3LgZ%`v|vefwBH`Up=fg?y-)w(>dr;i=IC=PylSkFJ^V_6CP
zRj6@Y18O?F$#RXWlw}JqjDzGG!pYk5mPsB{`2Nd={D$j*KMQ`cXen#C$zvr8{a3OV
z!d4Tp*WSEm{T@E$O)9Dty40IOmwHj?Qk8@*wMXy|CJ0@sj94mC61vn<p-cTBbg8d|
zF7=|&r5Xrbs(w{04iUQ4lK=dJNa#{Cgf8{he_iSip-WW}x>QZUKN$4Cs7p-{oG(Cd
zzT6U=FS7;bOZucl5^zJ2-}e3gIbUQ2=gYFH4Adz&U!<j){;%_;_6lU11n0|u;Cv|%
zoG)_)=gU#S`4S^IUvvcLOUvCNG@?(K;T4=O9fI@4wM~{VDOl(SSY*E^4W_;Cg@&aZ
zE<A3=Yw2pB_SGB7J?&Jeuu)*!-a_cN)idPVqRg*|UdO(kYlI4(zC%5aHjtwxw~<Tg
zMd)JX#A)}+u}`Js`R3{U<gePIVOeXJ{rZT$uYJg>OHH!un9^K!x$!1%_qBz6*m#CG
zJ)h5O%Y_lU6RhxTIgSF??qxeGP7$qsYazG7$o*mh=eE0+o!4l}FU_bV$9G8bKFbY|
zDA0`z+DY(x+(QJ$m)o%H6MsM5!794_v)m-BVD+PW3M>jMVsl4-XD5h%Bw6>$m_iRp
zJoRh=T4At-zghgzGIv%I5pxYif$ME8J!+Pt>*IPXojgvJU$PA3cO)HVr&u#=?#NnF
zc;^ptm~oAq%X2MXzET<$F<Z&^sCjU&>1G@<Q;q-DF2(Me*}~nt`4u0({1cCxX$uDf
zFQa)Uweio#!T93d+3>i+XWTXIgaw5td^>X*(v%#Dlpnc}z1!Y^O_<?~qJ2oPo(B0W
zWe&e5$U+anv-hdP4+k(g<i&{tz+pYbBzf1tOZX<d^Rk5ea7sc459(ukA4}9WJ_?x$
zY?6ZDFnpz04e4&xLo@amqIt((Gm&{4QM*_aaS*G;-!GKFU3L-ZNN_zIzd@Webgd=!
z6Bm%v-)o3@Z$26KVKjcyZNRT=s6qp07qYW5`^bWV^T<~wh=>!R>((|Sv63>#TJaVX
z4=5r#RvD76?j`Wlmwps`+ZiY;=fd9-&&Y$tJNc$QI4pA+b5_}s7d>)hkw}rfQaFY$
zsC8gB1yACa1aC#jC-nKsjQMPQ{6B)nYOrQ(K6~%+-?Ffit4JF6fmnVmMd3m>`Qtue
z@$q=%cvzm*nG*(|9Y{xS4Njn*xH_~^=>+GzRfGR((@e6`LlNUDh2>oC5V^8QP;Qut
zVs~H0;-<6t4_q2LT<yR<mw8LR#Y7-KaT8+m&ySx~KN&s0Gy=USV$g(QTkgVE9%(60
z<~!vbQJ%Np1}SnUCQg?8%N>`=7T+k85E+1@^By48#8JrXnKP{Yn2e>?@8y@@4J8TT
zQp8yQ18Ez2OHys`qSh^6@R3y?&|k`ttqHK=KbTjcy1vcGW9e2B&HOG?>&?Pr%de4C
zDhzI(KAqpbb~|2_c#McYKaLKYOd{{TYm!TPr%{yuN4(b39@@9=CG%DsM=wW4!p^w2
z*oW1C>JBq-VqhxF_fm#s4&vz5$pZSl_6bIAgEcm-6OpC8mE1;y2CO2?BHsKFNPZ<}
z;dk545cX9j+VfEp={7Cm*BsD7z0Xgf<yOVy^{o=@rUhW`t{LpEFNXY-z%)|&&=Rdm
z93-<#1ZH!(BS~C+udGV>DH@&Ci2~ji;~3dStXHs^7@MkNjgc`VcH0^_YsGb_H8p~m
zFZRWw$5F^?uMY1No(JQ84Z`M_Jfhgv%lz<7#}D^-B8R+P$ac_@n5z%qd*3Wc%RPv`
z?mC9Vs|U#BCB2+($O*FdL?x~p9|Wr=dXuws3!ZHw&Ie%(UkT@_H;)|+MUKr#ZkYiZ
zav;o{HCN%ldogmH-G#kWGMTHNJ7L?WL^Lz{H;%h?jU>Ejgu2IeGR{jI@KYx_SU42P
zIZ2e`)e22$ajOEDy=V?G9%#o^xiskwyp6B?9tm%oX`xH>X!uOOn3+p5FnafcY>;^i
z$0yC=svJh~jUO6d@R$*3!4YQ|`NINEJ{g5Jd<Z0W$6Z7ZFQ^jN3zzWALm9YA1bCd7
z21m~f29KN7@tDtLIA7)>-f!LqrMCa%=6okeN!o{)x#=OWMuRV2RKP8%8;g&A`wfk@
z2=mkj<B;9pKe+3`Q`om)NboJIk)XmFMzk#thh4BCr#n`_H>edoostiK?Z1gEw4RdZ
zJ{eHTNtsnVvWReh1%Xe01PSj<7tSkRhmscGaA=PzfqqNKNWCR!ld!h!>{(Ja`rc~t
zZPH}C@acon<p+6k_S`Al&{B>(Zr{Zlw^VQ!R<1(^MG@q>{S%Vnok}GB=n!`NH~i;i
zOIhDkW72p@4QQ;|iGw_X$kT<(kcr(~VpS!HjOw-#$!9C^Y4N#bw=`mLU+*Q9v%`p4
z5^$cGr*suAS#}LBJtB_=8plC3t-ahy7)`d@Hb76k&tN#RC9#>K&|D=IUhdJ<^8UIh
z`1UGI*6GeVw(>+U>-tS7v9JALJ$p(>P{M6gIqod-XdTJ!?vG=e8XEZCiVE`Xt`>Xo
z?@`NZ71OXt=`-Kawt{_!Jy?D7dHm15RXp92fm#Ls;N8rtWlshF-~+)wcwF!gW(oem
z9eu7OR`3s=6#Rp8SJC`C!9Q3a_y^|;{=p@Je{kQRD<d!X2U`XI;9J2z=ppzAdBHz8
zSK=4jvEvu|AOE2EWI!hGPZ^e(qT%>H+yrLk;{TYLTmEBaCJD^U27#FwEif|=3e3!%
zwY?-wU}lC2%*<GUndvPsGY#(bqlW@B)AK)Ork=pe{3b9nX@Qwp_@DD7AjyX31ZL)N
z{2jw_jhnAZm`B?3WCi?(%4Pn+?ehrPtUH?Z{q<R3dMe`SGj77?QL&_k3oHAbqe_6y
zWgM%w8`pdIp^Q}nXwK6pQYZHgnor!uCEpQ)BfKR@kbWF)nGu4N*De(tWbyxNKC8Bt
zub?`J#r#BLR3c>z=U@McKOUHlJpbsR&WAQ!bL$9r*QWyOoxWW9=&1u1o&U%U_+@iO
z@#!!sWeP$Q%Wz?lCR{%DOxf|`sm#|YW_YyeRZhn9B0N1K0d4BI2F<R0h2qBY$Vqb@
zGFhgAHeL_L+s1i8P*#F_b}b_Q7O7=Nu1Y{_tMA<CE)mf!O2IOd2XM`v0%jBIg$}B2
zBBxKqBh@hugg6c0af5p3Gknf0So@8LFZIV0_B!E(`!AB5i<{Bn?G&~foFMqlf=THF
zEo`{58##O}!|MVC*6W>nU?3?DiL#AJp+ASoUTMB1Q<a}Paa7si!bG@t(rwrxafsM#
zs3cpXTS!voF0?DnojH=F!As1D9G36gbcYJLv*HpQTY8*z?T#durCLy?!3ARF6^V*M
zP5BeQAp12+jJMRA#)|Je%m(upNJtJV%rJ*yJ)<#f^X+1?O0A5QEBp&nzB;k|w-Wen
zhaD^19g4g@JwYzF7m^X}!N{lmKC+)wiGJU`g~BY)V;u)MqC9Z~TKwfAWcqygUX!Wl
zL18S~c5xY+)K^ZF9hZ>^uLH<kcPy*jDT^&XULgLT04_8?#|3VmLndYX;pAJ6VCkO{
z#1|eXTeHJpxP?4=qa2FD_-$x}(BpTM-o{zou4K92Xrg0&hD>>J3@sPje~y<W$cGWy
zP~m4ga6d1{K5oz?H^z)&yVYyR?dB2`{-S_+_3Z|E8ubL7o#(>r-qMfy`p#l`TNilG
z^8m9fZUHRYYy}ryN@F4(g>s!&cH)D|q1ekM46DlKaLY2fxW-&p#%OXa_c$&Hw^<#<
z6K4zpWAz?x(;A*Ld{&N=tz__<3U$WL=L-I?{&<=G>WN&5@(XTL<#d$y<2DMMV~WVb
z7W4#Yq8Gw!g#BhW^j9qlR}_Tv594M+yPC!1vGXoY?9~Huiob~3<~yUft)=Ln)E>@i
zQYK#5kPc_Qh#)Ht9}}*lh%Jkq=u1bg5cWzAw=XY@OPBS;Wexdg*4b9P@ylOq-~0zV
z$r~cMx+Czt$_P|Dr3~HGxQzy1_Av8|PQvS&-NbC%1l;M+h>cRt;!Ro<w_(G3rf;n?
z;$oa}jfO7t)TzR6r8lFz(^Yuq%g;>4OJ{i1-j*y``GygR#K=%o6n0<Fkk>f|D8j5B
zc7-h`y7hz5U(OD>XX$Zgv~F<=w@yTcCYO-T1`IP^ts-3`HsQjXXQ0cyBHW_32=1@^
z#mKlOm90LnPULc;%N`c2=T>QFL-Dljc>l#GWe=`=V!{tS=gQmLm?Vyc9}2s<kff8)
zA+R0X%?aRIBMP~Ve@jb88CEe04}xGuMIKg|Dhc<R{{ytiL-6MFFDCFrd0A#cCVa55
znp@j^n=4MUWZoZ;Kp&^B$A`S!@XIL?P}@kItk9i~*q(e`^f;OMCzA&AirzCFrJ7~C
z?@z=_{1VW-!)oNaS{FK0HiJ2Hs{_3rl@25I%CPsgy=eT+3eMqq915BE7hj{JAzAkq
zTn}EunUF&$<syaGjrfbR>jCmIDkO)_>6SLt?L{B7yWxDGNFGQ8z_8Gd@PcbB7E^tM
zRm9|A31g0xeXo(~wR17solmA{EhLkhe=}RRB?^1EbhrRLWPHa+qXuR-%=c}8zGthM
z*vM#XUNxV{ho*6H%VUX}%~B}4^esH3SH~S(bE-^B!x{$^=AjQkY4B_FhqC8C50i)8
zVd!w-EbjR@W!S$0qmn>VrY>MFG}+N#HXLj12-zz<oD;{N4bv$9wNjj)JI#&tx0N7@
z4qjxNjvZe(VJf?2LoQ!=GZ9}}--z$++C*0O)uY0g??n2n3^6Ua$Uh_rmNz;yiH_M_
z{=hHFGF!5n)vUWgCO@Lf6J{$8OSUu|pVe?&L*kA+TKOL{vs7Sa)(FhZAc2`FBQP_M
z3e3#M0yEQ2U}o|HGxOnwtHfVmW||Ak%$Wi+bD`jTNfVfv76LOf?LTJb`2U<Q8wF;j
znc#d`EHE>N<L?-bYvgGpV6*Ab_*{ZEO4(Edm7R;>)LCXEkCQ^mMvz#&{ebo^+=uSG
z3r3z^=dr5qCmeXXgt+gxOWq8GgO)GeTy;+i_j^t;>KAx^eJ>`FE79w*jOz_<LDwVb
zrjzx*=KTZcR&hNZgnL}&4kO|}Gj3&HnBHPPT<0u;T5uOOOw~ddo`doo1+ab63(jw*
zHtsv{6^9G=_yX4=dfVxwviLKK+<g8EJip8orWmilQeJ7C=3#Lz+DQRYQ`)!*R1jme
z^aaOF+Kkd1ec=liNwR9-CoF>LSfzr&gAtnKY)1&Pkk&(I#4bTM9}CXdS{uJ!U%~nA
zHWp@-T;XozYq%`%5*C+RfS-NqE;E!cgCkupFvpHLVzVx9@Jcxn8TXxl(nGUh8Tp5v
zddnci@z=5FSt9q$I-8_Je-iV~7Iiv5LRCQ@v6{9MXLM1UiHzziZ7h7kP2PAN+PC=P
z364d0L6<eu=(<B5x~UN6eE?~Qn*c*$@vxj9Oghh#?YTZ=C|{brtdvC-&Q66J=CzUf
z9jB0(DMdbHPi6Veuf(<H2=ceLWb@S<No~;xK78B@7?+)fE-H7EH>-u3an=!}qLKsM
zUl14qHPOL*!9%!P6+7J>BuUA!OT_>0jkrO}&_zo4r{ywsSJDRET}ca)V)eIe+iLbd
z|JQ8w-{a%w@3+fu)n>CzzB~MVH=9jgyK0M%MbZXU!<5dy|L<PDaH<qU#LNN%UHQOm
zRS31dRSi7tl&60yMgZS!qd`)MH#n;y!TeE`1%445=m#s}L9}}w7+Gr#3N(Vjq_6LR
zdek(KTm1pZE(xYbHcVs=;#>6jd9I+QCWaoNmkU1c6=#zFB+#KP1;9p~r>A>~fdw7Y
z==jS<0B&I!^({7FP{kYEkdLA(4)xIO^L0Sgt08Ka@dSEGN+jrdpTj)r`$$`b&jiu^
zlW3L8k|0=q9DTX>I(=c&O%V1ei@q+s0HpkGr(cxM0cndTPy=pR^rP(@b#9#@7;(OX
zQob?@lv`W}t2DFdALa4X4u1$@bW-S>whL+BxnA^_M+?E@*s);JYZVYY|2O4()*sB#
zOrhmp@1W}xZ0Lp@TVS)LLDXuvRrEHyN;IzFt%cFc&7zjJWfuEG&R95`xrq9suZi|^
zAE;)N3eiuc7%H<+jgB(%pwAclwV2D^vatFYLe0|6w@7Mjq@}Y4L_f9MsLmhJ7FW)-
zP;c(>7V_txQ0IJXML!pKQAL*jEWY}QELQA^r~K??L|bm{q{jS8qdv{?pbE%7s<U7|
zHTfB5F(bu^I%(iZJ$d6meXKb^ty>c%dUT-HVx-O_N@jbC=y6*jHLqQdYJC?jx*niQ
zDJ@$;?bFJjOMjZuDy2E}{+|;?f4@JZrd_NR6@T`jzdPpAdrMU5ho<i+smvxy)+v}0
zk9DBmoM@zHT&<u_^lE|WS4IPwk9O3jmGN};MN8`RqRjxS){64S$IxpvkBihE+R|Hg
zuLC=8yV1d!(qKF^p?42C(q9fdrIe<;qR-zm1eXhg=#<)NV6{sJ<r(vus&qd|54mYj
z^D2)~$f1Jn9(4_zxKm7T`TdkW_BWcEc=s?ZK4B_lHgyI)Q6vVIy^W!u!fkr&&m7VA
z=aVTjw=$9L_h+Kof8#~Z?UF?ENA%Gu^K?YB4n&APZ`T$njO!K!K0HcAJFFIIxS5JP
zn&yj~wvPfqG8Zi3=af<%trDVhk!d0;FD=o=hsL65Qzy_iR(C`<<+f8_PbZ10)-R;k
zV@E`t_jgdqr9Bo}p3S1v#CB?SRz9`ZuTun99~KopL?TmpU&`1p#A5u#o1z1zT~zPJ
z_f#$XYN3|mB>FCyC2Cz~D*9?|FA5p7q+U8IQMMKWo3d~wZ4Ks7>n^;ba~D3K%I>Js
zBV!6hBNo|Elb=|FnAT|exAt`EUere_(>RpQ>^ep@NGntEAEr^S3h&W}c3uW@&B@d`
zlUgcOdr-8|O`N_Ge3H7XzLT2&^9M~2jiXV)A9@CNm6@|Yh~CgJKnFJ^gR3W%ne<>y
z@NJYV)9(5isP@P)29Yx0VQmUSmL$;0lKSA&@-h&!Ya{3oEdnP;MlzeL(!tnk;k4Xm
z;eM9#XCCYHGJ{POAa-#oC~$eqd<}ZSEHkTsGaf`RF0Wmf%KlHx@%jDCFY%Rd;r+{u
z!Gi?GEFch`qoSa5sTzFkq6;6L*~O54O*n&6V-}!K%<WM}p<l!jrZ2>a$-SllpM+~e
z(`oZy*6dVfXUA1AbaWb1p|ypXtyTca?X$soTUX|X+#;Z_TL+AdRx;MHxs20+3P!R2
zJD9!Fg_+AKf~jGe%mc}EM%6JBbZyOK*3Ir?#5cSIpBkEg*v{WTVP-mWJ}ZK$JCQv+
zN$zlLrH0`e*ZN)QB__R~%rlE>HrhkanW97oNIEjMBk#~xq9SOkJ>!_*Gf|AvlfTr1
ziXnjS%g{ASiBy365oVRcB!+!-0z9s`Os{M#r4Fc!rqgPR=(L*|VBRV>=IV4WI^kK}
zuzVkt8lxG<jcVxfC{NIN@+>F_SB74nW0=3cR2dEZ0NB1`B$MQs2p(|9nCJ=rgqANE
zELvR3%v!0+z+;J^<KhjlD`XTJcS;d*wI!g__ZtYc{Q&<ynum7o0q{BRIV_f%@lG-D
zS%VRq;eM2vCusmH{>rjfZO1|!)&-9*FN4#jy2HiMUm3PX*eQN%M>6j5O!P}7w96#|
zChakT%PJe0p+XT`UeJxgzRAFmY(8UAr^-&a@5zQ<F=p=+uZ7=gS5u)2CxS3OR&?~G
zhA29_U&KH_>v&9|n)R>Jznaph^v~fUhbxJ+!VOojSV@<<Sz$%<qvndTd{&D(^~X`*
zY89<Hqf8V$MUFB}ETyjOyiMPK-$2)lJ|;RgK&W|-qQOSB68e|-YLK`1AC((9kDA=^
zm70Epp`RPipvNa}qMJ|>xV?E6?ecjMrCpjp54fjO=aV5lP$dt(CA)x-paOc*=WP0x
zu?;meSWWlMNv4jaY^30c^K|K`b~^3jZ6KQBL+855fQ{L&s4jC!;8!s~g=qT%ncX_f
z_6~8db<i7B-Q7vsFOmh8Hfi9`CNBUJ*3jlw_F&Gnl}xvS25nEj2laCU=+|8rfc|P5
zdequM`sfKs;PLPSJq)*?nA5#zD$Fv9n)x`FmU&iBznq{1devir1ks@llGDIa*hFPp
zi0JvgAwbXmE$y=#(i3+F)1fnbLHwhybaAsE?JM3-8(dSRuexsmoBlqc{@%6(bjMCw
zV%vSXZXlk{eXhtno%R$Y8GZ)Go)m)M`&M9cn--(fT>`Q;oM*fqq=Vyq>P*bPP~d%N
zJTpSA4K#h4&Kx-O04Q7u0nfs+nA>(!z`RG=OsbhGIQ;q$Si3722ulsna6ZZ`_-Fz?
zJDw5tXvQG?Wd-vvtA-8;hz7k2Jizkk8zAz!KEroOFq}#cXqU8Np0}hjk!M4h<m$&@
z@7i^sb7DA%o!1G}RX2edfu2n2hG}4Q?h6nm^@D!5{4{uT12Rh@B$>m39LVz%2Rd^l
z!HBF{`seLeRAc7~aD0gZxLje%^l{O&S^gFf(1>X7ly6{Mln&S@kxH*WD+W%xnSoI*
z*MN@de0tZJGH@VXhmH$Xr)?JRr$Y{u(aIBxz|uJ>wCB;o^j@VxdUV5hAS_1ohh|Ju
zo62cL-O=Faj)kC%4W-Y|*9Gy58|m_Yg;e<M<Mi2=4q#Q(D!RjYA@xe(BX~PikM8}~
zOB)`UPF*dnqH|W2P+Wl)?eKjOs4<A3&u-ACJf}KSvi?4zQ!1yZk++m7*^Bj5-GK&?
zQ^`guzDt&t{<W7{T%kez9&wY}lJki&wp>m3ew3x;H*BD~mZ{S28OKC^+rNnPW+#e7
zi({#I6=hUSEf!X05gnB2Byw-}pmVp60|s8&;6$Ptm`;79{{B)1o8RA~j4o8udm|)4
z_VPM<QjjCv=95Q%)KUOSl6D{mBH*`Nj!xbo10rQK=)hfL=udCs=z+{Mx_I*t-K(BJ
z9oZ8Ci04@<U`HJN+Ikf2+I<7OkkX(#bbryT#5dYBHJ4WNm=1i-NrBrtbb-&T)8H#o
zVQ#wQf`BENboY`+^qSid^fbQ?dgWjvy?$K|h}IuN@AELDr%6J3(PKjEp01=zN?*~5
zCQWokz-6k^?h`ogdY;1bb?N#@ijsOVk`C<G0@i2J=?&{vfgcl|(^Xr~)7wmZK$zGn
z&=!4)wj6j&o7Z>(sANkYZ`cf8FB(lZF3tq^_8+Cvuj_!{LQ(a}Y#TWL{R$=ZI7_ti
zwusiV8AsXG1yJt_WayT-Wa`7+qx1<KUHb8+c2S-ypj4w;sR>gWDA}MxRDO5AsM=qV
zda#J2T&NYIJN`?klWHB5QtoNd&x=b%citCJdwS1MU&eb-BSjJP3A+Nib=(;+CD(_V
zxMm$K)80$nFHi;%mBQ!bGz+K?^i#XC66x<tLqYk`+2H%GN%ZP~JnF<+JKFi8B5gZ#
zhkE$&3#}EqguX?ZDB~St=np~DY0p2i!GVfcI@eyBDLJGK-cCqlH0B?qBc9tb(f4kG
z=sN-d;7tH<N_PRpFE)adpBBs+zjI8~cpj`-Urbv?2Ggs3X=Z9c9IaM19~5ps2TopH
z1eO-Yg9S6Mfr<TlY3otppgBN_xxQ~YG#on-Ud^_FkteP(gK0&Ks`nA-{ah33SL#79
zb3ClDd&V68`bjX#3X_gLj?4nr%`kmq7Sl***y67T^%L^o=}a10Vrdvx)5J`f@{##C
zJ`7G=T?VHs&w@7wbD1e`e}aQ0rA$y$KA7Dc0^D0mnOl=i347i9jLr`gCVXH$bLvet
z==a{oRK5QUQoegKPq!=u9^^Rl{lzm77`m8=oYlt6xmOP8<{>b<;xRMu?GfXz(Zp={
zGiO-V^Wj)OhT|HFozlS0&H>yQ@f$2DG6GvNhCuGccrd=9nXb8}#q2zo4yvYW&?oy}
z(iz{(nKSa$V9~PzIz}{s4t5+3CJN7(5ejbf=c}%axTX^L{<<1e&3_0+QnTobJ6wV1
zf$Cw|C%XRH2o3jWFl|8<aGCOK&{OLRpOmP;VDnqdXQM|TPj@9V&xdE6ZBn6E?>DGE
zYY6Ps&jtIgB!R__q41gCInej{8siab&)C^6Wdg(ofem|>(bO>mf9we`+c{xalB)T7
zsqo7ueYkv=JG%9B15>T|0rr|sW{>VNLPJ-MGZ(Y0;g6l>a2m_7BdP$K?EQ~fcb#G6
z$NXhZj2Bv;cFd%Gm<sI|{bVjUX2Q*R&d?+(25k@C%cLgHhjsqDVC-0RFwIVts+P#1
zz11bC66Z>~*inkoD3_*ewM?j#B|3Ecz&SeC+k&oNHkJ;5+eV!>KS0HvDG+7#RM6>$
zZj|=@czWu7K!*g^QkMf}Quoe17mbMt75&M)Akta>jjoB8VqVX>PUj{%GGUB5DD-Wm
zJyMjwU6Z|FVB;>vGTQ)5CqrOtq%U}uQviI1RG1wW@{HCrO`w|X2V#b<FmGh_=>^v#
znWnrFI#OAZv35O7rymIbYIj*C#v>bS>HkH~&A3WOJTqi&hCQbL80!M-dryIN-~y1F
zUJIli)&e~)5=>bB8a&tonX~I3g5Q}KJh7Jmn}!<cG_7np!)y=83s(g{<0b*WoJcxQ
z@g*%gO&iqC{RK|<NDa$x*Yd(0+T;Bt>e*TvRNhkn?&Ddop&}9#eP2vVJ#PYmefQ{7
z!ZW{WO%|AOIs?e>*hT-^@|aeC5)5vyk_AFXOXEvRLCTe5^iGZQ;7N@oy*zX$FdD3(
znUW~rd$oYRaZ4X4yi;Kwe2QfbAD#}X4Hhu-W=sK>7HBYcC>KWIwXm-~BL&poZenCd
z=s|Tyb!K0Q2=weOqencpVH{gH+V_eSQ=NVbyl?Ubc5jc;(hBc^&wMeaUGoTYW4tso
zzGNDBJ=T?3SH6xpwBtT#-|oOnRzJ#Ua}$|!6MdN68dq>7#|)eiyT!aQYXcE;^%&Dr
z%FN)yGN5)xiD|NG2YF3)%unH&@i#FMER9tGX>L;(_upwCX08ik0*?cClt)i<k^)(@
zJR`P6i<bFt1l*pfOY6#q1G5M>kUrx8I5)3@3RfHge)p#^7bZr6zW>ACn@8pNz5U-&
z8Yq<pD$RqasB<63KCe;|QHnw#O-Mylk}^l52Bmq@q?rbiDUzv>3}sG5grX2BQ{;Ev
z&-Zh$^;!3N{`vjWde*(3|F3nO>pZW0?eo~L{XSlMU@7#z_)66F^YDCg7rEcF5v_hp
zVer`7@J*qR?2?p6pI4gL<xzo>yCYCHSqOd$J%@}zM<mz0;9UB0-0a#<>b#w?`^9s3
zUug)P+ti?5Y!Z@^dDM5YD6ZGzLA>ufvQ=*jZs>_4UXP+7e@6|-C)j}TCXP9-O@R3t
z(J;w$E=q)?!P(dzW~Q(^Jeqo$C>^S2{9lVe+_4j6!`IJDvi}6&OMgSElp0B(a1<Hm
z{hKsjwI<tN+7U|+C#DbI6EkpN-oXu`G<z&$(1XPHhAR_UF9g|XK9ITfIvG?8A$AR!
z<Vu(XT0edWG|C%J=uN}KZEL`~FaSC}F2^%=%kZt;D<bylFf{9&!oy7^@LX98CLD?*
z4sKiV#)MoFxaKkpczFU8Jtb2$?vm$g#qjmsFz{y5FmiQ2lsbGRaqm{6mqHlMytf0Y
zzr<shX9;T0w8t}EoABn9lOS8D1zSB@G5hOm{5@|xu3vZs{NDzGO~@ti66QlCr>(e#
zCx=gC3Sjy;7qAab#!G7-gNSMg`lc5{W-JAXwGr4*cN|l?Ly?9?;<6n{IJbETT9${R
zZ=ncG%C7{=N*&tkD36D?UB^?Zi?C{@4_fSF&_^!@zGPS6kDg}u^}Z4KpN)qtUj^~D
zNEJx&N#NL#C|GxC50f@`KK#ymPqa*9nZKv+5tZ&p*vDoPwb?}^@<2TkSuRMlG>gf^
zU3J8FNh?`m+C#$CFEYwyI?%i=pY+EbA*KSZM0RZ}^Ek(snH|?j(ie>*OG?B+f9W2Q
zvU4q5lKBWCB2%%iz!^?n34kNtcBA?wMU+h+fXX39*s5DVK1^(fm{MiPp3p<g#{7kj
zJR#_7T84ivJchpB$x!K|33HUQ;P?rBc<Or+9+@?g^_4of=|}~{$6kVcN%l~3S)TrW
zd4vmVar4O33-~o>Bbc{6fQ2h9Q0S=&dVSpkMh7zS$AL}s#;zYw(}HyW&HG^Zb2p|O
z_QS^Uh*`ULK!bNJcu(!X_VO6KmR65DwLO4uUNw79*Nk138N$}Zwb3l=PP(Aw2b&$b
zfUUgj#qROSV&BTxvHK=Ru(57?*a)p5n$xp^U8wPp-rjD(Hf5@@3I~Bb(;39-jB#fB
z&M#wyv=!KvTV?DX@3n05Y;$&%QwHrbH=_FLF*G_Qkp}!FRQ-ztJIuJ^(Zw_AzB5MD
zbJ`=k_UsrHD}F`Qju&Br2_M_p{|0Bf$<pNJ^Jx3WhE)~NrfUSA)333n)H5Xmjb<LE
z5^L_!J8iL}QYW68RYUnC#Hc8a(A6f3a8CLi3~P7*5%%0JnfMDjAlr*wXMfU3lP&P<
z-CSI1+=6!;9jHO9EZwi}MDK4gVGoC;q3gXnwCkJ^3Y6TYypskP?kq?PUPaK$A9P3M
zK30$uFx6lj=<?r$<c|`#Um6e(3vgr67N+aQ;nH1^7``+DHX9d$YQ`c+3YdjK?ro5m
zDGD=NhM?Gr547MBSnj$D--HxErQ$O<hM&d|sUW=9RgJZqCyxq~nKQE&t&3A}*1m8y
zH@u6k5xz=;cG|Kg(_X;NkCHf7FdP5Qb)%85_T!7IitLm-&)BpQVd^!mj_u^H#8)nl
zXzXw`UHwR%jukaVT&#o&d&aYRDK7NfGd?!Xkr|bmAxJ`ljXY{fwV4CFSlLtb&L$V0
z+*NblU&oJFl%zrDIpou(JLc@ws{u4cej`uNaf0E@B?)v;QowNjn-<zM@f6$L>&4!Z
znaZwyxQhz*dD4~MnY;ka@2t{>oxGK~7PMko36pgtigb2YKv+Z$<jQ_v%9Z(Xr~W}^
z;;|G)&*nHY{ri0)l**D1ya;G68IN;=-ja$&9~c??nhB}&gs;;V!p0G0l6Lk3^Z4*~
z#_i=w#_6pO^Tf@L)NcC1M9uvScOG3qVY7VtTf7K^HmlO(E5xay#yh-u@h}?YM4?k@
zJ_LpK&_=&=^o5@V#V=QJ&L2PYXc9-04TZ3FU_TA}<%)870XS`rJJ>14VO-1^WZh(_
zs(KPm{PqRjmNw!?!3!uD^#&HOqSSDp5*JC{#U(9r^tG1;Mtq>S?GYapKY9cmn?i9d
zJ^+RMw{UfgJY9G?1TS*ypOb=#Xc{yK!V+5{i?18S*XZKB12d>((Xmk(yY)xiP$=aN
z*&~~Z%dbqvk4~Ye*4&THKhMLU@HE=}g4=US{tgRhCT5opqAVu~Su9ryK_|j-Mx!pC
z(9Fl-?n@Z&I1WoU-9YR1C}{MJ!j#IdAZFW)c6%D(jL}^BOQ#v93yIQp4-M3*7othR
zvefXt9Q_e)MY|)UaWLEmFW9`MMt^po=E-jS_SqFBg7;#0ofr09=F#$3^%(fR3$GPj
zMCZl%xbE>{?v#<;_#-|ER~?l^rE^8-nmUbYq-LWFn}?Rh_pw-ZFRrZ;pn8HI(HVck
zY>On+_kN4%m6Wo9w)n$$0Y1Ddi2T`AsA8Iq$ss2Z8t38y<w@8TdjQX_<D-{57@G0X
znM$k5QhrB496L!FoBGA@lkj}FzVtfo=5s{_eJ2bIFGi=}C^(hh1@j&W(GNbMu)lpZ
zE<XH?XnI`*@!+q-W3Md^wkqM@C>gxB;}Cq<V*-koPGOkiahOn@0oH#s;RPPX)-iWM
zt9li>nu&rxzaWI2dI#H@)38%15OWF{m{n>FtK|C7b6g-SJgX1Z!Mz|TbqXRr=R#p>
z4)*9jgORN{u=Is8s;UlxsGB7|Sat`LzC4HcGxBisS{q4;Fd?yiMsWX6JkiT^gT5`(
zxI92Rv-srzao>Q<vB;lfcj|ev*sO$kKJ5ij@wOy~7sWH4JL|~26PL(SB2BI>5(9_*
zie$-9AM^1eKbTb#MqyDt^TN4-G<|&mGe5^c)%-^=Fku=f=w2l|eJA4O?_FehVHk`Q
z{UDEi>O#OrS2W|l3PHRc_+4)e7Mn!D=vxtdf0GL5Bb;G~9fm6}vtR;K0}|_x!0ZH5
zvdY#G-tjug=0hLh?Atf+)lCSmxCmqAydL05P6C73()h{c3g8?*X#Y47H+j!Sv$q6)
zn~w(#(`67xHo=va{h&J530gK)!T8EJNKrUKE>)f+`<G8d<64gW<wxO-c5R3+?}XYJ
z7m(iwV5({b*nRj5n;ZG@(=$hWm6wlR%B$ggsx?@?l0xH~`KXXG20W&nfs{lYun!VN
zZ&5ky`|uom!<N9@eh;t|osVfFZfGtOhe1l!c*_4gIzM_#xL#>e&A*S#c0EKGvnkAJ
zZ$IY7?gmD}R)a_dyD-1kjw7{OH;}Tc@nqh-PT~RW%$5`lq9jmDd?z0xLl=vg`Lm`F
z`7ip!BV{3Tq-Z>-X^Rtu+&5%aYYCYb(@P?{C&2Yd+d(jTF7&0ggYZ@dcsmdYhDB-Q
z?BdhJWoaInX#1WFW*vq4{&Em&`$257t;t@v0m=c-$gWR!!T<Xma!6(>2uwOe9%zn`
z#Z4h_mX8lLU$l(Md=)%bkX^ZYJiD8-tDiggoQm&1OpRCgve)+CrgnkO>?gHJ?DA{p
z=>BPUX!@*Z_IGt8EjnSvj<uXe@9FnbS<O(YyWEVm3!2O}W$3aQb|rM?>=9~fn$2dd
z-^$ud`#_IBdqxu{mE&m}F{*Pt7x`Ql(c?szEf|QUKRxcENbM_nUr`vp8oZ$vr;gH$
zup9JEemSM~ap0*vooan6qmvg2P`k;()Fr)!KD=$k%4$xbOC{uSl}Zi`J*GV>_0{ro
z0yIN*KaG&pW3xVKP}wW`^v9E6x~D#t{vCKl-EB;$|KBjId#TT&!3uW6l`-sEfeSRe
z255OkC4Ia>3(Mk{()HV#>5_0sI(Ci(R(Q{&d+#2^^nkH!Zmhtl+{b7z%h9-ou#7N?
z^;RWvjU%w7`6}~JsRqcaaiqA}58U4t5GfmdIO_QW=7feajkE5;o6mDd`sTkx6m>xE
zn*i{CF(RF1LgdzozvS<0ag@5o<!$Drkr<KP;LeV~jsX5q`45i9v%+VA2E(=q295mg
zq;`Rx!O}8KQjzIr;CkyRQ?4Y%geBxKM{o@pyx6C|>^n<{aD_olusf4<A(L@7_hoX*
zs|-Hm2N;+NdoUS)Lx{+3KVtJqkg2bJ$*jD%fwUgl%<S&Hh2Nt(xV4ierY;?aKY1dw
zUMn42@)`Q!ZY+Jp&1wDFHLy*wfVN8gM)UM$$V=XjBJ%!NwwOih8OrpydjgiV9ff1p
zGeLh&8s-EnrQ54-<JH^K>GSMH9NT&iQx*%+fsPpT)^x;fAxHGgy9(b!Ct~hKg3sRT
zgVDuIjKB0A$%Xq+@+1#CR&0dV2EpJW%EJmjFYdHC7qowT5FJgp{DkKSw7=(S)b~;7
z{z4elmphNj+926-8S3|@k?1R%aBJLU_<7$Df0(wSiEbIx@1H<(MNWcU%@=rO?SV$E
zfAGG4C49b>4SMd;cwABew<ul4NO~LtO=MA6{U~}KSp)V5oN?pI+c3PT5od9G_re#$
zvGblMx_Y0$_-7?(Gk+e2%%%9}@+^q4>4Ce(N?<ziIEq>p<Kaiv;CtB?tePdz(_9Uz
ztj9t@2S2J<h2v^Re;gANLM}Qbfb!!d@Y*v3-+9)6e{&Cbq{PBfGgq7_Wr5D#W+-em
z59@bp;9ipi=)V<%aZT#L_A8=K-v<1quYzwLWuu+bd@MUL6;oL^c=PK$X#9xAkdwLi
zbE_ckms!AtA#&m8Qc+Mnrh}$CQo%EDDoQ5$fU92(^h6wlFxz_a?zI84>UI%1N=)Eb
zWfIdq%>vsWPsUoMyNpPbCb@1aOvd!MF(bLbjGb8%GhX>N@d>j9?WZA((t%;-ui7rg
zX~#TNPG3xTm4dK!)>?Qup%H3NO5x(G;@DPj9X2(|;iqLHBpHQa!`3_!>>df{lwZNv
zLo3K+kBwx>_@@x3-b&Kq4-!0f2&}K@V%~3lR4Qs=*4LRrhvOj<>H=i1OgnMd>J9yg
zvW%&L7}M!8k(8QjC6))SGhGWb$qv~w26x6)8-!N#7|l9WvajTcfyRYI=G)3~WaOY9
zbE|MFlipuzQ2ucVG21qcv|4*IAM<YL$7DY>K#eNqKy?+fZCNi<{<%lrz*vW9Hryr7
zcYDZ5^QEvMvYR~ir9{2Mh57NSmJ~`>F%C`_Nm%775<b6>sPpW|lo`X!dCOSlvwS@h
z;4(z!#l?|_J#)#0=3Puidq1;CE`j*mt07wJzK}<2CNX=G$AFP^C^Uvdk=LKyn4O_Y
z5NWZPq$LNDU(t$SyGoag(K<q^JvAXuV;ON#bOn2-E~e}HOXl0aT_)7+6M61D2g(Ne
z$*N`HaAW;bQr>RGY#Mf8?!`9~sdX2a5~l`IysH)(FG=Il7t(knu?k)eHUSg+iQL{|
zi}FIJ$o3PB;5=p`N}05RfSMW(S1F>(w?Q~*stKjlHgIT@Do%EF#-lq_K~dx}#H0rR
zBj`XD^Va}>rW{mD9$=z$RSXL4ikTB()5u`N1Lpk5b+YY_JCna)9V4<_h%w9B$^=<2
zV^&5mq|@pV@rt?0D2z=oxO(K7!Q&D$l0D%Yv$?E_85{4#Nd72d=D+PU$SEmcLN|Gn
z?yNh^nNO)qYo`*Cdbyt%52usHmO`SK>_L>Csu8kzEE)H3Dr4F;gS<X&N$iyvlIayr
zT3`HREHbr;X5UF>;a@BA<Mtt9792^^!k#b-=AI+Yg5}Jill&yrU^|K1=1NB6{jukI
zBAd8-3k@6=X59_4Xr9gz)^}n7`^P<r)@&c7cO(>96>S$<mnFcKL?p4!=XBXa56#(0
zMPJb6=>e+LaGmat^Jgc!W>VuBNo-rd1y*fCBRzEPF}*M6z-qs2`Cn7}OlXRrHQXUM
zLka8Y-6w){DLF(73!ABj*G;-(RTrfb)2Y)mb8c<S&&J;jqWg!A;;-j>s7+`G-ko=r
zP76+;H~FX1E9T>9<F2vvNw+k8vHc?T5R0J`YvtLVnQ8xPp4DiK-8@lGqODJuBg@~D
z$EFj&G4dJdiQCTQff1(4mKl6~smMGT2sN<oyu<XU{vy>j6G(*LVS~M8o#f6>0kGe<
z7Rs|$6TkQZviesA`NcPxWV~HRp4?nYCe@n|WPO>@xR24eMvO;`!9eE`eXWVhm>(N<
zF|&@$XG~-}41D(&FlY0o>Z^>z>d&3kJL`EcHE?iW&*Zl3G}se3Uf(NY5i{WD%?Lkg
zV)jQ~GZ-13#4Hz9V3x9x2GtjD8Js+I!JxI}n1TMrR{aG-`S{1Q3ttR9zzGNHv0&F$
z{G2C%5vStd`o{zu+Asko?HxdmFai3CbizK#gXr?gA8(4B0xNeGoS$BShkBQ=B{&BY
z${#`g=PD2vPlUgl-SGKiA#~K7f+OR<Gx{H{5Dj~KC=gA9gAH+Tt!O1g{qiBNj(;bH
z?Qh5f^XKHkXL}-|)<hzUmP1Tf3~U*9g=kKgNb0%{!VlL|AiVD!{OoomDrNFGLob<_
zIrtKfnLEMS_tdD&yLIZ$Xs_Wwq779sStySb=e>rqGZ`4OCkJqQIQqHo1+BZW;J-Qr
zc1;Y!zv60e_?$Zw7OX+Xh2_93t46Nl77uM0gKFYA81ZX5?43CkCzjbjLF_Sfo;?HB
znU#X+p?laQaTC99pNSbknwWS*6+^RkL40r$=rzBBUAj+DC+h@`%$|bx6zX95UMEoG
z*54`26#6)77hX<#ht|I+NJc6_;Fo3iDM}86FEpU@d>7<#`+G}1tj8NG_9CBZ1kQSp
z0WL!ip{jl%j@e~|!%NO#@v|eC)}%py1~%Y>?V?!BcL0uo6-N72;_AUTd~NTDE$cIo
zQO>3+amtutaTgZ|jX{~9(<uHq3Evz@hxb`rj?B&mGm4VYLQw&pPv%F4a{0l7uB7%n
z9~@tL7?vsk4$4nN)AbV2b?XsH;mLqvOdN=nreR@66%34Bh}-KHl3CIfWQ4s2*7i3@
z#A=E+4ocy()SECabR#+T>Hq}wl`^hVvdQg~I3}rl6`6ec8B>ryhUq!^lMz{Vj4>(J
zBJT0Y#9H7HW5FIa7&kIZc6tWEl81bxBtjFu#Thc}J5gqEeKMmlvcf>a;RX3&XG`q+
zE;2t)c#}8g+F<0xl1v?KNC@4;2#geysH#G;wmF;(9x7!*`nH2slMXO42goizQ!;FS
znJEa#CB+`yq&&0`-s{=GUMF2*(c;Nu9~=WKemXLrW%R(dO_qdo7BHE~mBjIvBpmTu
z2u}vq0Dplw$$aKXl+XE*VB-uDw_qK)B_)NP?zQmt<5~#0{EECldE)hDJIONs1L}*S
zL44#6oR+a9D_uoN?WI7FbSq=#N8N{}xS6nd?;7CEZ-ztRBVf`z0MFH=L0#Gr`ZXe<
z!j}&#CDvh(J`aBdRp6TBevt3t`f2{&B`v|jP&=rMtV%B|;qrw$rGLSDyNUF?f)gb0
zd0>`?H|Uv&pvdh>=(kG=))p2qO{pUAq+6A_b5f9*8)rd;W_=`Qp87E(ol79vJ%y2a
ze~ZjMIF6Wa`^c=`!eiVL0-33FBizy&N51T}VlIvKWt3veVeRhAq++rF6P7=NIM;TO
z+VMbmftrk+ObO{|LAWY?nXJyuXWSlbAP>D)1HXAM*}nT55v=Yfrr!umci%{)rzjHd
zy!k}+oeT&pe@T>HM3B@v88Eb+1T^RziCz7ekp^E#O*}^=dnSQbi5e6SSdhyrz2L#{
zTe2#+9(n?gk#T1YNuci;BFN>zCvFQR_r9ip!#Zme9i&)i+DJ<8SdcEw6J+1#U}*0y
zhpa?9!p=Sm-?%JESBx!WD`dmEp>R^Pq!cp12R0Ua5$?okn#A#|1027~{)b<E`-fkR
zaQw=R<5$}_es!7SSHSTr4~}0s|BGKe<@i-8$FJP}2fy0$55GFX@vD(a0XFZ<Vzz1e
zbavg?TU12&9BpPNuodzrs1TR;+Pe54JMyBIzA@6Lef~x4IDIe5={4EEH<D=8%C}Ta
zu9#ZClVY8v=CdUF7(ME>own5ru@dZhcEb83Hmm<G_014wSKgdXLw7b(5BFOLTSaN>
z*=4x9;3$oZE~7#+h%r6wFy73XzCRsFt5mG%{TF)l!&WsqYe6U#DvhP4^`D{9aVlG~
zwgRU-E63<)GkSYX5S?5sNhbt89u)>K8t?PbxQ6u66*%2WmPVMrLiwi~P-p6K@L83P
zZyGG<9!p#H`j}y~y0{G%=5YOlHOuMqKy5m><0TFXUB&AMA7Vr1E;PJx6@vHYVoszc
z?##>s)uz4l&EQ0A{VT}InkbA)y*u0|$F5ot&BmOG=V`x6VteIJu~83CvomvVu!HJj
zd0oTWtVx9|JL7UCFOIp$D_i2gTR#08Ti=<<7Ig~pGIx)mb>p9~xqjS9wF}(Y?(;S@
zbN5H86Q4tO1dio>zLPO3_aPkC#Ov2c<elJ5rXp((8<uI78aAf2^L!*yc;U};4bN6j
z;hh(*V)+%Vj4BN`8ht%+*U<c4C~vuwq2biya}EDSsj$l1HyE~PF68+NuQPP3d%(`?
zGhpl1&f=Z%sOQ}u2xY_kZxAV|Ti~*59YVf5xti%lM5IdK$h_6qk)w{A#`7R|sU`-j
z5J1WJ3`i3lgToPN=n)}`PPQ><>AaV8woQTF*f?xm^^BOxoFD}OVIcX+l5C!0fa~hV
zb8CMuXjRcifyz{DuA2+T_c(+8voiSg{4Ggs%!TBGU69ulgI<dbutqnVguZ@CqF#oR
zyTd17mx%{>=07E|lV@Yg8bLfDB?1%N?!g>kfOV4}z|o%z!LV@^QHeeexBCup<JJs$
z8NJ~A<|vqboPqf>p2MxYr||dkbI{@HrCO_Lc&QgKr+F<<<sz8<WjTJ}dK{dbMX}7g
z8kMzP<AJj$VaR+DUVWkfF|X=y@=e6dH}RvwkS~WtkO?2>Lh|{^h^nqQut*FmEOy~V
zOBZ<mwge9d-y!ow_JP5#-Prdr0Bw6eg79>XaS!_9rvwRHBp3k|H<w^T4FZpeKt1g*
zWaWz}e8cAr12_nUQ_ev2hd7k&DnqG}EUK-13)s`T_}N66UViF|sfuNE39lIUY?(o`
zc2B0+j)u6oho4^dQ^y;-Q_yUY7`<1bN!6or@VtBkebC^KGc@Hf^-~kJI;nE=bOe=2
zJb`Hr%CzF+Q~cDif{uN?55HGM;^v>TsKz=ysv@aEj}AP;{Wcd6u^DH{*wT`3_tE;)
zLi%#J2h~@GQ2!shaf-VDU7V3gB^e`Zxg3punx0|U#MM-BX*B)q_Z5rRX94w^h?DBa
z<Iv7^;GAa(;z}_%$-WKdr7F`G$J?N1&IB~mT!FRDAE74G1Z5T`!S(n#xLqItXXLxX
zXNwL9xI3B7aqNXnzkd*knn~!y8XzCL2kTG1C%U2jVAni{T=z}Ex|*FNev&y1>xh75
z0yh9%1|jovIJo9+=CXdO==Cy<EV%y)Ms@}fdPN7?A6J98=n8yo@_;<vy$=r<Pl0(~
zV#x0kXF>YgTs+Sa$u>0&5dX#ptA^5{q`CkeJifrp*7-s73_GAoV?WuqP8x5?Rl&~i
zF(}n%1DAJYkyo+)AjIa9EgnfktSApw_78#48ei}dnhHDDar>;zCXg~GhrH&AgF#yX
z7<ILQfTS^;-hKgeTArduyD4#RY9sa?QfMJy4}Y>4EDARU9hJXOXL|$EB_EUgs~Y(C
zhAt=;*AuJZRj@SO1=p0Of<<%z9C>zwED$t>cVF5-cRzzU$IoG6d<U#{kH8nwUI=;7
z=<oIv%D0K*i9mV$qdpVqS3VrOa}a*HAB1l|zrfeXJ6Oqg5Msd=Yo>jG<aI+hMRo<$
zuB=2K`+@&wU3Bw}JDl0F36H(LgcUP>p<<~jDs37Dg-3^BQJp-OM_UR7T5aUu`vUZL
z>4F)v7eY^_ByQz$BjKsnfM*v9*OFy$eViP=8<4^$>!0F1l?mXWbq$<<oIur<esb!-
zH~17^1m)G^u+1$HPnV`ZWw{Q-mIgr6_$YAp>?gwZH6-}=URbBR2#l*`FmLU6NGajR
z&u2D)(UH^eVA>Xl<FbM3&Wpi!WhPv@z7{^6KTV!3k;W5(3gi%k5o6j*e&1LR?k0yI
zu16C@w1%LHPXjs!wqTKyDXivtyF;F(gWuyQ9Oq((H}w0U!@vZLk9$Ca7R&Y79fLc8
z(iofe43-yOBe$MsgXQ^aBrKdCzjn@m#Cf4`xAOql2rJ_pheR+8=!1Xrt2-ROvj2x)
zd2;+}_don<HOH^WKm2MR$FF|=!>{iA!><}Ses%R9e)W*!SI;?q<^B)9ddKlAp5Hb0
zbiO_NVy-LeIrj&RXrIX57X8FFZJf*=tX5}Fe2k#1MFcGpDPoVeN3*%#_1M<7i|p>Q
zDjK*ynmrtNi<*iZWw&c0YrKv-Ic(1$eHze4ts~^w4vF2Y`T~3Q>8(gQetrP8(i31?
z*TmCE&2XB-uBPjfThLD~pO(cCHgBUQbu%--biqqhaQp{+dOMq@{=I?m<xA;0HA~u4
zqfH-AP^Ufxo2gep4w}zYriL|DbXH&&b>v(1|JRBC7X{{#``_@=*-(0O#Zjs_&X!&*
zpGl?cl&FX;ADuj7A<fn=pec}v#m?UNFg_ba`_<^BO*5&P;Ut8cIW*8k2aTJQVS)pp
z*-w$?H3Xy0eRKL-@GdlUU8CDpl#a^pVcl?!%`8>mEenxm<EoF+lZQLly=KF#{%$kg
z7+B78y>g2#pP@)?euwgAhTURghHPl@<+ZHCYXLTM#GTzyK7r?7md_S`k)}c3t<-#z
zE30m!$eWTX$y<KNocB!p`Ka`<2r7sg4xGy}6hA0zXgs%;wK}S9SQ~oDP;_>;p^E!+
z!wpNGveqAFvN6je4J{_`Fr4_kjwQzg40U`zv+p&h^DYm+HazXH#!&w-kByE!%m&R}
zXsEh%x*<+4He5V#&QMyg-Ee*d#kBT?$RAfmzK9Bd^eQuW_(lgKPY#e3DtpLcu|aau
z{xT6xSc&CIiD3NBjtrR$k!_J9<YJ!z9$A(|OgN`XTunLYpRxo_<Xs>p@!w$Kg9NaC
z-vkMp-w-3;O;~s-7i&~M<D5saAlgZA)|NY1to#>TW+mg%3V+Pta*7oTWALwQ71#^M
z<NPIaa7RDax7Qg(6E2s)p6x{_a5RHn+F1!L9>REw+qd>RBt|DFZNlnrwXoei8$&}+
z!RCXy^w6t^pm-nxPQQ!BCsm2qHqnX3E~XeUouB3^9>G52Jp6dw4~JKs!mf?0aJ9iP
zwAgwdd(y^WfEyoG-`WcS1Ay6o_F(mnu{2RIjP}}ejLO)({^&Jqatww~6YgLw*@5Th
zt-{~0GqIL^3JuE!@%5McV7;IKR2OF9qNDHeT8jiq9+`=oEfcX&NfvL;{s0gDR-j60
z9jLtjgMu?U2s`l$Zs2+WuDD-B&uRUDD_Q!$a5EK_$)M+EiPD4ik@!P5oH|HJ(#Drr
z*!<d(s+R3T@^u#VT^5c`OEhV1mpSz{{SMO~I8%dU9#Xq$w7*f2s%Krm_ELYme&`P#
z^tYsGyfoU=nnE*u<7lGD7u?gYN3CwHrsqF>M466gTJHP{1*~FdibXeW{WY0RUS&hq
zU1_4Xc^P;}`2dwM&qpYlLi27O#FDj%G+S1iLd7`R5?Fv!AG^|<#q;RKrBzff#)yK`
z6m(l3jGr{raKK>>PBYE|$0Kz})*VE6LSgw?9c<hFg*0xohW*!9z;T04n7#NuMn!5s
zjqM$bh>Zfx3q#N^F@y7FDB~;RUf3R`jAB#TvFph!dapkhZ+$z6-d2t%*ZTww{>FjU
zuOH-T#W~zCX#mu=y@OvHB`~T`8I#Qc=T7+nqFd{sPqzlJXf|w)>w%g!OV}!~88xam
z<JMy-kS8YtUCm=Krt3baf0Dz`F;CF5G!(8c62j&$@@SvRRTbf1$&6RQF!Sg>SbHK4
zD#SUSYm-av+i61T=|99YzZbNGw}8*I&199CFy_B`%G6A}Lj0>0;Iq|q5|%QPSh>Z(
zYrl4~tmha=Os#;qT8gMAVv2fe1F%G84({xH30*<Wu>D3X7(Dw$$ocJ9eZ8B!xSN98
zca8@=Z)cSM-p2I^FC&+qy@Ni7A7o6FExI(Oz=I17F#8l&i%!Xf!*wF4c+ea#Yi&Zi
z+l_d2<5L*vJb*u!m!p!2A{ILgp<?lB<e9jki?so^R~JFguSOL2`i&t4yWlIgK0N(%
z3Z65Vg(GgYV6ZO|Q_sACv9szS?ZhdFToi>(=k#&Lo`<;Z<SA6XUX86T7eVa!FdW>p
z5bN(u1I<6H@Z!obSc^hbHH6!55b(py@5XSA^T7Pxm<caG??C-`2Cz$W8fNr+LfDWE
z=8Jit(%hG@-uO36cQ}v1-aE0{s}2p#M?h-7I1G+Wg_<Axp}fu(1da2dYg;eel`AKc
z$}(V5r#Gxgih{Xfx>%N=jk>G{IJAt#iCtYJAwdjZIZ6<pmp~?a{{e~21j1%KB#Ftn
z;9+|Ru8WGG<;@Ck`M}l9atb)K@BwVFehr>`Cgak#V^QH#A~=WKhedOxAVK{zoO4+P
zE+%{N*Vt0nzGxHfGY<mQsX4INiU;+rL6{;v1gU<$nD?Fs)14WZ!S!K%a}31;=U;&7
z8-DtudLF%<=1d1ojOqL*duZ804f^96(vB6TRBn6}c9liY&}Ro}eU%{P1<2E!GplIB
zqEk3Q<t7?gO`?X<c69UDILZW;qSA?}wBgY(W<4&&l#7ydNA+3k_6}orpJLf#?qWQj
znRaZNt~%Q@Tb(r&3un)7+r^p*-DeN@GHl`T!|W<t&YtvHz>c3*%&u<|WRJVMv5E2*
z*f#Qp-cml#3fl^@8;hT^PUZ}o;a|lL4h^z}CY-@?13zm%DUpT*Wz&HoYuX*~3nL(e
z6*P^YJ6AGPq`DBh3u^IA<Yr0~W9ar}zp1zTa`yO+FX;SxBJ1oXf_iBSs6)~PdVke8
zc7gE<`YJS*`fOc7%?fr?$JZgF!hQVy<@d3Y{*T|MnDhI5<@`P>oZn|U=l3z<{63bP
z-$#t|`&e>*pCz2%X9nl@xySi^r2pmj5#anjA2`3ySkCVw`Y*o^^IQxwwN&w?v>?s)
zbEb6%Epg71c(kp4ftRG*@Mp9DJ-B%R9c=K%FK2q;>eCst%!**$lSiOZqy}HhU8qys
z2E4Ah7a9%A!ERkNCO#HG4~yIQ`lbi{eQzu+6Z#8>Bfmnm%S%`=gOA#Vi_+_D?`b^j
zq2r4Z@#R-7jMwGjp%{R_9K~q3Z9JOqzX)k+dvUsp08RJGqSxBz;>=V{n)$#4?ao`G
zhE5S4*gqa&@iXN6pf)P++D4sdR{zu$w&s36>;I~nJrf<tp4%kI?x7KMqFXWhA^9o&
zn*Efuv=`tdZ@)vU2M@D9dpg+H+*-$O{|p+w$%!UfoL~o&C-9^WU7#~O=duCTvTXBa
z0rtq+@$9%UZKE=-G<;QgR`bRi3Qb5cd^@><H$C$-yI|i0!=(-TS?NxmVY~1!`}C41
zug4<NaE%A|-xDa~9W^L4+{4S@g;$I5%x;&lT{X{n%YJP%l-&P-ZA&a+PkB!@^j+@5
zPTqHm7vAp0^U8S&Cu`=z`D4Q9a)(8q`uW)IEDawLO^C*xY`DkOhN10T-*03+X?Xva
z1VzeXYw0yez1#pAg(774XIHW?_6r2hD!?tPj*!GZeV|>*Phi#<@-$#Q@$L>JS}pIf
zqINZ!?&PO~=L7H_rh%G{2=!RZ`G@Q@sDt(@jI7;&y7&k;Xza(o5j>RJg7gfxzmiE6
zDF5JFu-%u08%OTpk%tU6L@8qU(S;P+rqbcM0&0o#=~ca%G{;7X#%D>>;$0RrRB9IT
zbD3#Ld3m&RZ^6_&1^PuXA8XfaLeD`Xs(W)SzTTllmll<S?W4;WlzAVzx$j$Y=PJfr
z-iiKMJMrPO9hmYq6(h7B;rSOTqw<?~S2^OCX%$eIp+&{AccIypO!Tj*!|^62sO+sv
zqprRHGZh^SOfJV;r)25P{mb$9HapyIm5JR}=6Lp63YJ{qqfc*yW2XC0H0+oM4U^xa
z?ju?JTs4)>asLB0^}_hMG6Ub=KZSlPwCHEK4Jf$#8~x^IOn-$u$9*%eV%_shylgK?
z9RmlctK|%8=uk+b4~L>`o;Rl76r?jIRnR}KLfBo#?JG<_N~c`rGM&Z^TtCYM`lO$q
z&cAkvUb=f1v#0A)OBXc^ysJ*@>=#j|aj7`oUX0FcN~RGPmY|@q4sG0|NPCwhV_;w?
zP0y3T9@kY=ZlfwyoE}ScHyO~JC`#|HR;0!8BRGV)xMM4iPTIqLev$-u+zNmhCnI6v
z-LIHGxCqvS&7sjzSFq=n8g2@hf;&AvgOaTaR;_ZR{E6v!qe+XlKD-5?-&R5MCN=y>
zf1~rkKg88}Aud>yiMd|Yu%_=a`MKgFY!=uLNk^{}pH@ehSSE<hJEs6|`C&*Pr}0fo
zJlqjJ&D8;8ApPzY;PX<&jHn5ezori6?3TqVZSNpy+(D2`_r;#`k8#DxaUd*{i8n08
z>BfmI;1;tSi&7?I@P-sfs=Cg2c>#1x{XlN~eMVxJiGiZ$0r=Xe3`&dVaP^HRtm)Pw
zi*9!l-Pc!vx6=Y{#2$q*EkUyMtR!g8YA2s?9oNrsk;!-s@UuPyg6H&-aRrX#zcP6%
zb7n!-x3RF#{~nmgG{XWe-><Y{1N=;Qj=?soG3&xvs8M(VcTZj*{<IN#buYl`E!V;C
z<!b!NS45JyJ?7cR^g&kk5A@uh0$YTo@uuNK%+osrFJ%&e|Kx1k(i(?y^EGL!h%L@*
z^~7ea4G`Orhj+(n;cl*9M!4e!w9K!?U2D1_$Up%jB`h&=PznyEwxFDT7F=t#NAKCu
z7}-?<TjTa&&JQ8<{N4lE{APHpG6Tl1=W2O!7Ubuh#rV7+&`AwM|BUxUS?Cw^Gxt%U
z%mTXoEK$_K24^b^z{Z9Qe7Si(*31z`Tg*n;?Rn(La5c`=6~JrjooQynYIM?!!bN=3
zuxVF1thh1+A$!cR{b(y3&I^HYFC9QvIS$gr?vOVXJK(;f50H7E$RRU-D3l9@_JS+q
z__a9rsnSUzR1M+Im1D5;vJiyFc|vf?KJsJ8lzh5S1I3=@<l6TNqV0MU!kyB|wW(E*
zHg7USPB;dAT>sA{`R_0$Kpu{a<-?#<dk}W+BUZ-3s3H-Dvl`<FIa>+Z_2ux#?G>po
ze-2LRVq61I6U-WZ4HYLkiIvb1ki6dmU&jA}tdc{p*ia6Jr_IKf^=+e)lM(MRV#g1S
zXG>`ryVaPFb(C$SM=!LomD=mrRaFVBu|_m2nSFuohz@1dw}h}y9=NiC(^}Yfu9bA#
z&?eSDzL5T$%CLJJ99hvTNvwEU6|J15&1Oj`uxs*jSnJ|ODzS4B)!HXQlhnGfJ|dGI
z)|ICl#apodloQR745L52AJ8@94C$BW#@sxlj(ML-;AjxX4+C3qI9rX{c-PXYwP(=c
zrz|ZN{y=q&ui<O-#cSMNiS>MS>gW?aDs@TyKYpJe&hN92^ZQsm`j6k|#y@@^rT^jg
zxy1Q>QU?Cx_p#^vK7*X!XD8?PiQxP`i#Wdz!})y{{^R%A@{ix=znq!#&t9a~|8i!Y
z=bV{2oHO$?=gfT0IWwjIhch$f|HYZ<!#OkMIcKKuhXe@w$C)|8IWyxpXXgI}di?+N
zM@I%cAjm5LV)7osd%+uIi$O99aCw=n;TClE*pu9R9fwk?vAF4-II32+VUvP3r0BU3
z!??NVm@<t{T386_2_58k)mpUoJAoN~*09h_30>kIf_X<5&i+w4DtnNl@sK~)-(}k4
z`N(I<IpoQRK4a<652cC`%)p70ux_I=c_;Flq%QqM*2@Nyk=XgrtCPlLv^E*^#Z(Xr
z|5W&_EJY@%Su!8rG?I5YQ$g#@Ifh2Ykw<+dq{GJ=E>_8q`(_EK?bd)!jyY&^{SjUY
zH^D;pLOSgS50z&oARp&en>ieSK7Ryh&6YIk-*f^)-v!gi+dn~H>^j0iW$eBz!jffY
z&{J$K{aCyOPxi=R)y=J_ek2Lk)!AXZXb?Si=^&inu?_3`?4U(44Sj@DF<opIPH`&5
zVwWv+a!wxDwy!|H1CF#dY8%>Z(56i)e3<tz4;2$W<7vM6IOgp%RHDc5{o!!B?c_3)
z`@@e_uUDdZ@S;)qf5h!_LQ5+TxUp1~s;BWG6SxHxoO3a^)CR@QE70XbU7(XCgBzbj
z<IJMJ82D-?I&j%SiRWpURWcR7AFjj@!Jimr9*hfo-(zc@2|WCE7D>fVFcMRvDlxBN
zq;?NIQ=12gw`I}r(jL@VUxyzCWKdCMCRBLs#+@STsZT8*`bei>LiJ4?j;_U5<9^}$
z&+U-bUW$P;KH#2`I7kjTje=r+Xw<fpK0UM#@6US7<$vEneta#={LXSdB2{=<brf7b
z?16e;7wqaZqu0Ynuy<uU=!U4^s+B8YOwJ7Y_I?h!Jp7A`ryjs<0bhu<#5nwPm&;bV
z%){T)l(EA25`L9drElFG5K3>7g5$5T<ID$;E0LkwuK1I-G&eH0@*3kH84X>ozKlY&
zED17M16@6WG;jA*a`@9E;?&}T>n(pV-PRgdCTUI16hDDA=Pb~+ZVn?|$@P<P{WX0)
zc4)Png@eCtGP5j%NW?BZ#`+9DifyDYeCHkv^()|sxT(n4H$depdlFO|35{o80bQyM
z51bs({74e$E-}Y7ujZoZ+%XWUeiz(Myn#;ZxuDItzePoIL5ZYc$K@=zxgr{rBYR0%
zJ%N*-+=)KVg7NuFiHF}=a?`(?=<#BR@pVPUAtaSp`=t|;b6g+R%>riFScp`m0n88Z
zCYE7uh|QFVq|5sbBbJlH{0eL(Z$6zhpe<3Py>UDFpeIjMjYgR83rXah-7MJpnxCB6
zAW0hbErsRDd2rHKpDY&DCfSxd;O3Om@O@wp(bIcDZnkl5mEN6Dw9gzggc?YyurBnO
zQ^>FtB|d^7uzn7g{}avyo1|383{oR8>l|QZvOl2#VI<;E8B^ox#FVe&zE@HP`uYYT
zxSYV|h6v)?w}iBmNs+3|BKTGE2>wVNgVcmOkaglYquBb8^u2!!JJnUd>{>2a%hjcS
zHvfhXt~ErnSB7!!JWWh;Ov#Rh0jNFllGt`x!SOc&(78k$XZST@sa_#|>{W(oI<2s6
z?IFV7`v|3PbU}W0B0h1x4w2u(N%O_S5W~CyOU*$b{sk~~%~pK2;VceK5Wz_O2(rGS
z5OXUd;8*7xrmb|?Kxr_ES=RWD(YjVfD$*x{w$ExZ<ysVpI9o{?v|`AQ_u))WyAN3{
zQ%f}B8Vn|VlVXy?J~PWabjX~&8;IYwIb;&oAGrO992xAGPlD%7U}iZ?C*~_9$mU=9
zME)LS3})t#pmQIXm!L(=a|56`#uOYj-6N~Yy@+GbM`pI$AgMi9Ml5fYkpi{pWLvN>
zGv!G#+0gliY?`r@><@iF<O4)O{PJDqLdgI#)RJVd#)<32>EAFabz<k*DE9QEDeO%N
zE!H#$*sMr#mTu`{b>9}UN#nP${=Qah?6CuMt&$4coOOq$G%K<Gx4qcfhu`V;TRCh4
z1-5<Q1pTYCo$WZhk1bs?k^MSfn|<0U!)jhGWWVW3kILTNXuNAza(<s4&hNwg%kQJc
z`F(zKexEYV@3WWl`)uU=K9@Pa&v?%7qt5w#>N&s9tAG4Hshr<uDd+c5=KMZuIls>k
z=l9vb`F%_{zt1Gj?{k;)`;`3S%<STvnNv7_+JS$ZnFkyH<IG&dIWtdk&dl_GoS9La
zGt+%TH5hWv%r*ZwGo?6Z=81nfGwnHNraI@$G!^}iGjlYa)o5J9x&JKW8XAC1(O$A4
zBL$ABPKW*dH6-`6Fl;UBA|i7QppMI_2<w@Vg}Hoav*;L;`dX69E+1v&_vDgu+brQ>
zL>Z)2&mj%+TF}~-0X>7UuwHWm^xh5u$D=R**VKthM1#0z=P;+)ZiA#A0ix@g&Ir8J
zWQz3ynaH13Oh9TbBVWFgX_Koo5S8{MlA@Chj$oL9T-s!V((;MS4b8KRrqvYkR-)1%
zd1nf9Cs3HEwGA^RdG{Ej?)OZ<R7>JHK7r0-HNk?sAkuE_aJVG_H(&jYOI~y5g3RW}
z*9)wvPf#KJVytPOhZ?Q3nnC$&U*hH?tHJ2%CTu^UO-}?!<6ouqD7$eS9b=kFSE}y9
z@U?YB=6O1fIldeW7)v;@a2#pskAk<{I%rkt01OC7VR>T~gxjo!73U-{*I+d?JXt{|
zWCTNyc@5mZFb=$L#*tmW(;;!N9PT#{k;cRhB7Hp^?4>S1ahNmb{S=0%;fK(>%V$)6
za|3xBY&L#C{+Lwa8DV>9JtK}z)E%$b>*3YYx!Bqj4<U(d;BfjF*ldl#F>;x(Y^Vry
z;=E8i_%=LpEydL3=eZ2#Zgig3h|B9FL1Cc|CJI)9dHfKLU9lMKZ+!>l9Z~2RJ&yBH
zMne6mAawsOgS>!?=okMR6VEL`(Kl8om&SRozPsUz+43kIoCq_ec4J1#LljS%h@55w
zUx-wpZ0b1tEIb7p!%oAa#dB!Dz)9?$oekUbqiNMc9<JFaN|%qk28FkU&~jM{cdv}Y
z-qS5O^dkkYpGn00(<vxe6NX*ePh-BF3^sGFEy**F&`4`6z3RLXKOUJw&xt*PMTVPj
zLT>@~*x$e>P>Hug4q*LwMUG9yz}4g8NM*uFmu&?cIQa&)KUT#QgUuMePzo1x7((^$
zJQ&k77N6dWfw$HjWW}j$h^&7NHvQ3%8ypIoe@;f*Gm(((!Lhfj5SVp<^Zs2kLYgQJ
zE6gMqeF@GPI!=P98V8ak<DL+MnA^;XZe`GH*h5TC=NP<izQ#1I&0`FITxV|R=rNB)
z#h7uEqM+sEa^iK#nt3T2Ps|!}n7Uy#;{Q;MiMlY2@he=(Im2HPasTm5b;2sx^QaiK
z3n)gZj4;2i+rcC^74qESJ=tG45z2zXi5|9sz3)j<`!SS^TVKmKw$%|U0aaq8T@RD}
zroh#u`tWAG4ha3wA~kXcVL0*^Gko|lNcC{8Wc4<(*sYfMUGxJ%(hs{m&XDu>R)Y7@
zE+!;^^CaH;3&%GcCql{}$vvsnkZ^rIsB<rknTf9CwSPKzsE$Qf?Zc$DMGG?izJ`Rx
zSaL)D0WsSDjya$$gTuF!AwfBVIo<RKwZp8St#dp2&PaiILNCadodt07?Kv>BuYi$r
za@b25^z~Sdrd{`-p=vI+FJ1vxzq&xgqaP5ZZNp_K#^ZO_Sd{-d6Y{+m!Ii0#F<fgl
zPVd`|BTI%zph`D9<7&=s&gHr;{i=cQz+dKw&KP1g^(o1kY5?6kB#GV@TT)V3NFpk3
zkoRSd<iI8k=GffBq?C3NX?b&)@F#;Aw>KZWPTLdJcmcw9KAU{4%_1+aWir`0icC*z
zDa=hPhWi&A;NdnG!hh!gbG%Z4T=5YG@=_g)X71qDLLp$$xRPv5e8J>q43e`e-x1ji
zaT2|730#SegVakVAW`Q8D#Vi{>WQLs#3j<UXcd$Xej!UL^P!>05A)PYNR4m@od1+W
zy!U)0i#QLruJd!)>Y#&oPZg$XBoa+CK^Pxc4|mFnq5P)-yl8X;ldNgzuviWk%bp@1
zq8Fk~?|ovi@d<O-GZ`d<SAbTqDrjW!<A3ohjsM}NUdH*U|C?V;<@i+u=cmr&{M5so
zpZeeYYSq8^)qne`>A(Ec|IM$;B@WY5p4x24fogWU$z^I}F+fi)7H20E&0wDzafbl7
za^A$VmuO;IENdE`&rW}~jJ=SR%*HPlU{^Qkva3>Zs6dP(d$5YI_ge4K&%Z>e!Gk_p
zYG=iMa=*uh)?cKO%WvbykU!k{DU0bEP^W%Z6R34*BDI(^iHdG<qeadK=;9G|*2(V~
z^{y?(-v`FBb-(oKWcwQAiC1E_{y|E2s?oLY58y2Se7yKcmL8qdh<%gCP>alDddT6#
zsO%MuM(r~i*XY-lq|;qvXwNKH+A2B+yUR;)spd86cD#q`Chf(Aak*$#nM?IVYM|OO
zmYR5|(3+J-_;*+dn<HLuIp{T*F}sQu3LmAX5+CEid=)&}DTczma_kl3EOb#CoqXbr
zl}&8*azWO7<6-t}hZt*}DN8;23)tuF19a(C1D@XVDQvum9a|MJhUJ|ZpcPB(sLPKs
zwy7(dwnxbEex7z<>%up)?&b=-jUh2~;H5e%)MLimYTdvVMq2P>hRR1}jWb`c%5d%c
zbF84W1n)#Eu(uSCu%$mN3}*!k@N{yH8(QFR-n#7L?6{EYyh%THu=<;0*@62<46`2e
zv5~6&hS$;sct*Py82YRlYxr^BeAd}s*f6E|y`gt+J<q3TjM1KzPKJexszD^-E#%LS
zM5BNqIE=#N)jBztD|LqGwHjdL+ddK)XpbpIEyVtQA*2?^6HT$nWDXw-*ZVHOibHd7
zrUxa5axY@<9e!vDR0qrL6_D1m0A7e~1>^i+BGfbitwLsiVb~+s)FTAjovxB{Nqx}O
z9D(i5FTvwlHGI0Zlh{x2fM2#Xkg|Iw+DGfb!;B*Ge47>oKU)t6#p_{f<xN;YcR*Uw
zZzf%D0)!b`;>WNKcsbHVy8Ps+{rz<)z4bh@7hIu&PZxv5Iv{^t2<kt}#a+rbq2!7>
z$`28gvbqmO3Nauneh73fY{B_+>oNIH0?LL8VCDnPcCxY=G`C#<4<9SslJp4s6Ha4E
zlEA3&M8&3U#I@EPvRB&R-$|Z$gyY_~jFNEW{#)=+_6$yfVe)_2d-JfG->Bce87Wa|
zBqX6k?R%~Jvv;OQnowj)A|;h%h?KcWrFley=2=J?6BQ*<NkxW;l2oRUna=(Fou2D_
z&vTuBp7V$6T+j9Um)g5M-s|&TYrS4&x>Hd`hq+VCk0XERd~__Hgv*ML;P4q*7~z-(
zzb~!D?%q96_%IOF7j+PGeSg#tb_1y*72ITa1Ki(L(ev_o82uoa8or&3BYecs_wpfn
zBT0+C?cG4%OgKp`Wp3fr4>I&cz<%m|D+7N;38?t$VYK*!3w@rUNtg8MP_dh$+_FCt
z=}pf!m?ZB+Z>%(+`c;-xIXexft{p(>iCVNy@gSYBP@c*x8cmO;JjTp~QY>?vM!iDQ
zF<6SR6lWBo{Ptlqbz=s;(Y}G*+xKJdXpTPLH=Nq+Dni5ej3IIM6xx5?jt)P2F|lwR
zO`2;$N9B5A`!hZrl|2Nd-#24a`*TwJYXfxqwlnWoH0s$`<D-`&sd&3EUi01qcghHk
zh;4wM=PYoX-wlvFmVgfqI-w2Q>J_^<l0I|14S~m6VVgxF-riV@BTNpVL-%@EY}St<
zoiSk8e+c>WRpI5ki)0(y2O2FThCzHad?A*KKKca1F9x7!=TppiJORQVOX7^D2QXf5
z2QKv6gDXnjLUy+T)%#QlM?}rgBfbu%4d{T@7)umnPKD!*&M=VDKvt#5;Iswbh@{PC
za!^YZ9bcV;U5CHHP~ICjt+bR398<-#n2C5U{2M&^VTjx$W7xV^6?&$|lCt12FmHP)
zI<(25$B-h3JvSQFw?BndNsA$(RtVpljX}3of4JwVgEw6_L7?h8vP5+o+rN1MJrzaJ
zzMJ`JoCvH6(SXuxlKAxFOQM|d5L7xZz?xCEc;2K2YG==c>J7RW{nnE81Qvs=nkinf
zmBr!v_G6==GQ59(9Dg^(W9;D{xF%PC!eyQ~>{c8eh*H1~>l0{K`~x%#2}Ult2$^wK
z*!raeM&)#Y3gZm;_@&~FGcvf?P!&@vN8t-UWgKvDhbIdew>)GP+f53=v`3$zthOAi
zJ-cCa{2^TAC6DKu+_3Va0=x{B!K;$Gn6Tw3>8`#8ohp~1ul*_H49ka@AThknYk~sR
zcwjgmB!Bc^0-K%98!{YzxEH~vm=ZW?dK<^cGFMEL704|*OqAurK;0k(o)p)Ub6Z${
zJNOd0u`8cEJv|i?FT_Bo&jxV65eV~=WpV5JVCXp`4sXO-2{|wWAC(;epV8AHds{km
znp`6_)5o#znNrwfpbca8-GQX}lVMFl45-i5Lgl>^aQZw=^t4k0FTWMgvZRxkx}OE@
zr(WPT<0*9Ldqb;+1?Ueu3cufE!NDIX#Oz`*jCq>@ifU=lxatpBWL3g2yJw_L@e1U)
zzJfzLcsT3cVzQd$SFZ>2t6;{b_F#PKfAXt;_|*U8S3WGia$@;aIm@qx|0h26EXJpf
zX8F}pmS3r{{AwM`ufDMS%K2Y>>f0>8vUw27>3ELdoPBn3B7IZ2qM<VwkGO%ms}aW)
z?=s=moz&-E9M|UlEWge9=wIMwD%f!w=x*+~)ERo$=osg5Y6#bLLW}!sF`cVzUB#6v
zj^wU4g>Y>Rj$CEScTQHwl3N_)h?;Ki>1atCD&bIqowNMt8p&vSWn3bit#3xP#_G_S
z$Kz-(e+-QjYN9h$-Kg!D1iGtd7adk9Pb>CZMitq5YCJrS2A(#j)6CD}jdcbzS7Q>#
zzn(*H-#h&O*NOjy0yENDgQ{!Zz+pKzP@|sEKW$&|!k%mRJnj=LCu|1f?jO{z5W_D9
zhPZH70KHw53ZE;c(A<p|u)G^^NnQdxzkCYbzp=v4fs5&c?UA(p&v(3YxsZmgJVC{~
zeE-T>o1j<2l}?(*1)sjdt(up`#U-An4c`y*H^|?ol7a4A&O0m4lrH8%+DG%Jni%m%
zZy&=Y9h|`*GBAwW5$?@(>QXMd>LuUR(T=n8Qs6JEaOD<6+0n4Q+qqub6mG||Uw@@e
z6tVRYgs9))Tn{}HB#s%!&-s3wQ*Nvmv{zSgu?nmBuOAV9QK>oqZ1#J>+{$9XsG*ho
zJgGDM`o%%~?O*l_Bo-PA=D3vUKhn6xr_WRb=3b-t?mGfGGw0{rm{|w-XKon??s+TX
zm&u)Q!J~*=DqadIan;22@p*iOYeBw18(rp9LZH`IHnVjBPFlB<_d}M$fKn>%*3cyh
z6a0vH&QOrj%ZK^?g)nZuI}T)S#o4!qL$jC^<}6Uak2YE;Ma<|fqcK>p$(-68X0tb0
zo6#-r3hLYzLyy<XX=h3*j=a<e(w|naIkF`DzC8z4PF=!$J`s3DQkHIZmZv*Z<+1UA
zEj_Iqj^|bfpy^@_oIc|hcK=C2_w*5XAZ-Az|4c<A&#Pz>9fY@*_+S(gtQNn;=FB(Z
zvhnNb-um;n|F;M32q?j{t^z#G`1iy|6YJ{^<HIFuFqHK@?{1uncdJg~p{NY_6<CS$
z<d8<NUBTZgr2oo$H=NBKy|<5qnluS|?s^Q0*c4*spVR36LXx?shR|HY7Eq-YD6^px
zCw0qG(-aeQnD2(k_j1^NjwY(x4#0=?=P}?%4{Tc~%w~%}ka;-|@ssga7_(oP?q;`t
zMk9}gtzAkB27aM^p&E_VmZEuQh3Hs%2^F(EnY&^;ZnhnTTH`L^W0je7;yZ1+$mb-z
z;v-5gCD>8p4;MlCdLAxRlcJqUQ>cw<9*WcnsIb02o%_s>t};=epY~YNC({#frSlHj
z)~rTfj1Z=hnvHbaMiIKh;R+5|i*j+pd{C{@lpgDzPM1}Tq^8fsx%%I7xb9sjJzw08
z^FxK`@$W-v@gf&$TiTB8_Rmo5#7O$lEt}#I6&iEr5vp!+M^SSL{P{i>Uv3CT343YW
z-&qekh1S4VCvDg^Z9W9u;PBTbHjl~X5k&fh=#UBDVdwp~n9XKV+~4-Wqo|?Sogsq?
z7xZvc-3WYTbpo@d-o=2|K`0|Bj~nN@qJHue9JQ_q&bHozs%^K3oZ2-={ip>KzHWqD
zo?nUIQ&r$LYGKL}HI%+E9ep;aVA#@^FvDC5T%NnZfd>I7+dLDr%uS))>NT`3iNgoo
zDR?*i5bSOF4*S20V#T8bIHaBeUp_QIm(>LjSvwEL=C1|m2PtsDGlYz^e+2n{5}25r
z37yy7aPs<}@OIZ7kbS66L>B6Tzp6YO$Pxp$%ZEtUv=`7Ke;g*qeIv%3I$*zN9a`KU
zjhi}l!>^rLASyo{)FcgX_C<i#La7kI_=9H--X(fz-Naz`6R7%d5-aY_MH!<)s7t#A
zUi$*c_c(c2$a=~38;bCdZ4$i9*^P^(*Wn_u5oqrrjt!$fVSh?GaHZ?;)#C{`HS8rg
zM$6NKv<B1{-h;7aa<pN2J<M~tkL&O6!|_UCXlI~96V7aguj?Z*?m-j;>5Je8UvVth
zxf5sT2cTc>RuH}(j6x)ju}t^kSLWhgbuR~of6YVQ<wZE`>r`~QnT+S2l!1uQP;7st
zPgT;Hi__s0q^P=KALCm7UiS{$UVR~-ECo2y#T)mxNTKxNVaQ8Zjh+Mcpvb<fu2m#L
z!sxrW?PDc;2^<egykj9kWGMbIj)h~}Q^9IW3G8mtK@!<b_M9-m6C)px`tR4EFw`Dg
z_WvP?CnfM)Pb!JGC<ia6nV@}fHiXxw!`FEQAS5yow$#=Eo%@sw2r>Vo<4V}~;36nU
zu&le)4gFpzVesfm_%kgAWope}zCjM$J^c<|z4$@sOF3*%iG%g&*I}cH114vrki{o<
zvG>w&kh$Ok_ID-m6>mE(xm8NkN>0Pnb3&-+ZH3Pl+=erZPaVnl)E5|^T8r_iMF;uR
z8H`W;gYl_{GCp+!<5Pe97oWN#?d1Q)ryl(;K6S$&pL!|dW36U<YCp!OUdt8JyKIm7
zM{GE!x@Q~LZ!g5RQa{e^*Hq<xiq7LgwS2i#UxfG~lP_@!?ED&bA%lA*y^E9HZO%Cv
zF6LgE8*^Px0=Rasq5P??i@81KN}TtaN!&br15TyGi7RcI!No?Y(11AscxY!C^*z{z
zMVsc(7b_&FpPdEGEdm;<AWyw-$Kj!-=QMP>5hwh19Zer|6%R8n)OR-5toHadE`NTI
z=H@S^-y;2R)So(ZUE@vn&lp05#}w1jHDmuuo%j#mXEo#d_zd!W$ROWm|G)S?M+W&m
zJ%fCoCdT*a-Jwo(|Hb#I8RYvYKKkGJJ~E8&bM`+uU*--vU;LT#C5Sm+`q-KGf;nI0
zne*iwbG}$G=gXZz=Zi0MzBDlBOA>Rw*fHnJ^Fim!tU>2X26MiYGv~`&hhp@qkigly
z4XCs`V15AG6^Q<dj?YHZ-ckBA)ABCXmbsu(s5&N==3-TT3us?BM4gwnk=ap*R<%hu
zA+!aXGDqSyYbEODx`)~)+=9ACr9f6Mp_}#%{FT=u+|q=<_^J{oQ|CoPOMB^?ks;jN
zfJ6N0DK|M@pf{x}&H0gS?Rdt2BY&V#oiDP{ly4b4gr5?Yg0~uOaXL98_|G&aaC!Iq
zx#ufX`BQh~^7YkHx%aNYbitl7Zp@*uzw$WdZr;e(+a@ZIIeDF%m^_6qH%>+1vb0H%
zcR@~Ip&7zuIII?A`3i9sr{wj!r`q#X9@Po<r5_ONSvHcZ*j38!b2!EyY5Ig8;KuU9
zV><<>#*f$k-akQLzU=`QesvB1?$a<%YMB-uyk-eT5n@o!{1HwrmZ0Aij<>`<gXYO{
zqLy|TiMA8@(<q5O#<rN1J_emewZY}C-#o=F`>_9@7`~oXPF^ds&RC=c%DKB^($DXZ
z?=%DKdR0*Is4wjE2*WGg3-B^?Z{@@d;QKc(@na$Ljm(&ZQ%221hiwt`NBd|DY+j0E
zs=F|@Mh)-Xbf6Xa%kfEhCH}DvM*loTtk3xbXFQMKlh%u9bxa?vUdz)l4_DEhMZvi3
zl{@7Mdht@#F>ExsfpdZ@u=BY;wVLFDn=ORV{?{AK5|+iDwo2S|;ttl<{YDYZCv@q>
zG6-2OLhtwZ!gPTcnrOEpzdHd_XP<>-r%LhUHZdByiA^jna{ViH;`P3*c)Y0t%=GLj
zNFHbX<Im{t@EQBXmg6;th4kpnci^Cuftu32Sk10>E?0Qqq@)w5ed{uAaB@X;)ofgC
zK8lXgh{gw<L+KW34MIjQ@!>cD%AU5MeY3uT$EB5Y$1FFNcg>?yax?JB*bJ;~GvH{$
zH+*%q4i}yKiLjv^54A_qjR7O6^6!59X7+|&{%D0p-+F10<T6@)JeaBn|G}M(GpM>o
zD&Ax6Y~43(htI{28s5^S8S6snT{#n4WbqrD3MbOvZ7Dc@>lzx-J%z6FlcY(WOqcw!
z4>bmMB3mn_{PbWtccMR4`)Z0K<Tl{2c0KyQZUn8e51@B~w$KXo!!$q;jKEyerEB_;
zGw(r@JQ*rN^WeE+J%~8`gnk1d?0zIoWxXaN&bk54H#TBs(r?f*IEhP>r=p#48!Xt5
z^sR(7NG1Qk;0dR(==WtPPMv}d6W3$=UO#Y1$cI(DP`Im9h}yUHQDPOrv#Y1W%CxuG
zb}t+fenbKH^E6eDdk<R|M8LIOw#<XK8a}-%#&|IYj93uO+%PTp#$gs5eLInMr47gG
z_!zu#)e8bA%)w73QkbnL407|PqM2(nrVLHRX|=i}EiIEQkGVxuvx>-v4R44}q8-N7
z?}Cny3OGD<2idkP2|m5J1kz(NU`m1{+!m<;yX<DNXq7#r3jCoV`2>g@Od!rLEnw`E
zaBvVxfW#0{$Qm^Yds?F5sKE+odz=Pq<FcUXK_QIK&jiBu{cRLY!8Y~=6x0ksMH@9d
zS~?GV3oRkX?kt(;ydPsOnSt#+VSLds8BIc)G5@*(9s)i5mR&$%K86w}Z!wU2Erfsa
zY{Bf#F8uuS24t~XyJRSbpGSK@Kb2$NJQKV)QxoSD9>vIgmmo>{F${0i#Ax?&@QiH1
z>+3w=*o<|cQ5=iDKOcbFt<@NK%m9U!cd;Jzc_6=K&|vR#xR|Gp0=5VG?LZ5LPo9K2
zOJtFMx)OBQd4I~x3iW55hyHCh;HvskjNmdL(jy;l37gX=Lp^Z$!nNRPUJ3ziry)Z=
z3Og!f>6x$Z(cw`L4tywrmc`+4q5UJQo;4PiN?rpUkpS4SaXqB!zlFvzvq4_14LtT<
z0mndHu;1_yL_^C!`hqa*d2<$8qO)L2X*PW9sweHboy1!<3ADy~fgby}LmoDuRoe_|
zcGrmLp?bJ`A{!n)bp(UWis;y44-TV~p{Pa(OP5)|(fUM^P;Us5af{GDY9^k!>H}rL
zHRPe;Wcb$P3E@AQAt+uPVm;f)#>`?6(shQ%nG>N>JrB0ljlzaXKd>CC0v8{N;QiwU
ze`QW4q2v>t;xvkDzwgaG+fzfAcbuV?XBTs3Zb6(x$$IYI+9ukyTaF9gtihfAq|D#0
zCd(cFtjrz%c$Zepl;j$w-KJ=*$#pNw=3K-Z>7PZH=;}p$u6)N@?pJ6vci(ITr`+Oz
z@=+l;vvWP|Iw(Zxl*>4A7ScL)EpdF_Xlnk6ad5=KFr@V~UKW`|+jhHCo5lzX3HPS6
z0`qZPHqbf^0UgyROpAM)U^;W5if8PlCUb=;4qZcILgoI-dH!&a?_)m5_X%KppA^RT
zu^;676fnL|8sqyo5AuD)8Q<qV<NL%gzE9X7-{<up-)HL}-)AA?`*4GNpVf@-V>Re}
z`G=XAz?hkegU*+OgU%O+|Kxo6hncyaF*E&`^X1f_^W`6A=0DDtCCvFE%$zT$nDb@N
ze?#Ysqq{Jz_j(48ubx8j)rL@0`y1!24Zwz?v5d903zw2y<g@Q8t9%zY;k<~hu+N0j
z<G1lwg*iT9`;}H-&f(iBU&;8@2C%9}oGNcEf)d9os8<jKr(^)eR)qYO_tk$P3jV+T
zQSr7-;JIWWdG|sU#%iA7NuP9pE&3nGnR{WRShST4Ng564H=jrxY~&ezz5t){WQk!V
zz+1VC#G59wU7-ok<FlUEaP}1MYxXvXQyoX_yELIAwx2i7EQ6PN`aAgD45N!@XW*fF
z#>;^5Sg7a+;&ZgQqvEeI%qtdOC7!~axe+)kHxZvnh||R~-n2~m7rnpzCGbOb(t70u
zSUKkkjvj77!-|*DvYOF&@u3jo=&-ZAHv(7xd5n>_Lh$~ocnkp#9PyzYM`jx0lJx^v
zKF$^+EOl8<a20?1&BL&_C((GwR%|kiz=}CfP=5J0I2say&*#mgXIi{*ma;7#*W8Mx
zlYZdN-n-DT!5FQdRx>8Vy1!B<Dy-g)Qfbj(+q9j|7uUu-#;NwHFGaN##;Dvkl_u<Z
z4NptvV!*9DtX0vW`<+ZNeX%QAnVm)<1w_*?CFoi>fhsQ#Lbb<#@CM6EWQ<F3y^Aug
zy0erX(Yg%N^_la0rY>d+60pyHE)J|0rKMf-&}4-w?cXyLt!HV&?a8C@&*@|w{<0U&
zI_<<$i*JDagrWFUWi&P`zea9C3y{jY5FhX!^>+rqn$GP|eYgV1%w{}%+zb~_y@{#E
z{c$PtyeL^@VEsgUtk`XXUd!chh<hLe9#*8+DkO22g&y5=D;-Eq4ODDxg`~;LLE?Tg
zDqd{E?s&i#_Ka(}Y9uvJu|o54)8P6Z5vaamfcFhO!SLz^Xr3ShYH_RJl;#1LU$q3z
zYz`!@B#Mj|xRRY&t4M<IUg$Tz&D&dGPd**qL=Nw=BRT1Jc*zBSco*2;w90o1+5O@<
zZ?$43K-^_svz!ty=Vl(S_o)!s)T~c5@>W6g)&z(huZA8q^I+KSWSqN06NcZ}gC7lz
zprm67@%=U$5>>_UdAlL(70QHQ(Iw#c^*p$KNd!xo9@zU^7Gm{gf_JDXj(sN!3#+tY
z^2|dJ)T@XkIak1N(Ofu@A4;56N5IT$eZ1A~BSBh`2YL0=;etRwP7G}4wG7oJ=W-{J
zhwuD|`FuNgf8aJRK**1%+g>L#Uu-7IGKoaH_6$*snG6Q2&+&RxX7i4<j3<%hOTpuV
zH<`d(e>N&lh@I9X*gK66zKjo3Yit9&A}c(5+Y}zAk0Ra0)#Q@oO{li)C)cyJ;O(>P
zgjDfKlV~8(J~bZX*bHFg)EUI>>jPNSt3$>~ZQ*6j&?5X*)lfQm8b~-t!z+^>-j+4O
zpk|g26Az9j@*NtWU+GF5JfnGQ=}%C8vH~v3)PS{rJa5{TBHsC|<D}m;iDxzY0^Hmp
zj*%TVNV!29x#R2rZnn2bleaGKUA`l4pvM>*zUUIG&0F#DG#eD&lm?e~%VXH{$>@`I
z84mgeLznS9K(|c%X|fiqH(Wyr<tgBFaV^-!BOKo%3B3>WvG5kF1CN)&%$dHBQgQ>Y
zTQ>rC;}{X^3n6x1mJoPmF0q+vNgm8pAwCn1kifhuBJ#k4r!z?s&Igv0dV?08TloTL
zupde;EY}3vw^vEgfDM`SY9u-OJC)}#Z6z_fD+Wq?YRSQGV~E)eHCT7>14&NO<H_Wu
z=uJtwMW*B*<~2u$kt>n&NN9LIX^Xi>a;6_6oqI~jWZ`i{F8T%UOZ!XGAS_NQR?0!x
zjY{6RF$>7&g|A6Vl|QfXQWbCi>Ui?5y_syDY)p=R+exfjx=BaXbTVC4lc!j9k!%d;
z`73)Y2Bw-^^i@B)@qi>}=PAj3j332y*jsV^*F!0<VI0?(_lWM!v8To*Jgy?_6gOtd
zD9&NhFiy;9HWw*#oF1I`olf#9<i2=S(el=>H2+;TjZT@#?aQ9YMJzeWIdzHumEPU|
z^#S@{_{D$tK1Pi1<IVU!p9c9p0gUf6gz<em7~g03zxY1*FTT$i#`m#fe4jOp?=y|@
zeO5BQPXpuo7&5-kbH?}CI>`62W_%wh=6tz4=zO^|=zPg!&X<jhnd!BvodR>d{0B2L
zX3+WajWIJVnDeEcF*7xp^W`REW|lE#X3e1UrESppBF&sH|1dNE3lI7K=^w4~%>>`@
zYrJR6QovP05k|5Zn@E2?Dhb=cw5nMsF*+8kudO0SV_!jp!4)E6`xM@+`U%#Dq(Qe<
zp2%O4z%A)f@MFOu;L6!-vf3wTklFyDW;H}i&;tF+&;Q%_Sgt;lBbA7HBLZcJ+3o<|
zb2|+pBAZ1<-l*pF7vz%6;bO!%xkAq|U@>{$G=-;@Xhg(0JF<HFeqMR-VZHv+9VF$n
z2)TRX0?%=lkKX#I)uhDW7O(U(;khiBM=sWy=#}$tVtu9)j#Tl%OY$YCJ-!f6Tx6ES
zFNz3{gaL9M<GBV2bSnRdcVm~p+T8*0xlRoXYcG(>Q^EM5$AnH*0pK~$1w1&P{t8Nl
zy7xt7)3R-_b%Pa#Wto6+-7pw0V9vO-)8JpY4jvhu1gGpda8QTf3!5q6zGFYUJ1h*z
zm;GT<ZxYlQ9AUFguSkbiK3tzR3tn0o!0%5*AjkM{vJ-}&NzFlc^(BlLU%N@lQu;t_
zef(d!KHtK3#1)=0@R2;n0y{@=XO0Z#x&t`!@I=(hDn_1n0(@F1gFlilf>B)w8m%mW
z`2|84<FN^)h04HW@_GCb+l;LZUKkqGh$dt-<k?wb{=josDyu?^N}V8OT@jv@_6I(L
z55=X$GLD2ZY<d(Aewm_JGGK*laubJFG3M@#VYJkA2IH={;#s2>oR=&FL)0ZPF(DgX
zOXT8nr#-k>jLo}U3q^aGAH=b=1CANZ!Q-=~(c+mdazf208DN7WM)~4l#RSanQNfay
zI&AyX&*qt<aQ7!K%o)~zm;LYK%9?9vHpv`$x>@+xt$@wxC*s!?74Tr`Gd!Z?3Fo3d
zK<aF3{3b}o+fm(6B%Fxa4eN07@S|i?D`5rlQsj-fP7b~w1EX}Kp!!KXRP`^0#?%&w
zxU>+r?~GzQ3yF-&my7A{jBU`+${Z+B<Vw{W@cFEarz%2W(F<i9chHMiJ{7>F*`oOS
zjU$+LNRaACC*BR&1idb8N~(ib=p{TNyu#6vBz8fF-o81Hc}*H~Vb`)MIOZb5TQ1f_
z9{W}D;u9kEEHd+m(Rf*2>smhZJRQ>0kXxsxq@2V1+P79OepZ6s#7}Cx`KIDLf!ZwS
z4a_3F;xb_0!GoJI3rR|^7O8$94M~}+;ozww5U*_rN?*gE^_LH6GzbEo@nq5`r$VaI
z4}(UH1mq9t0c-7Wh~g=NUU?ZjupLUo4t*gbQzhZU`4_xbDjZDS6-q9tSQGX1T2j!v
zk_>;AK>VMy@Cd7!ZuLbGTX%ph4T+#P$)9IeI{}n0PKVE8ZoGQeWYT{BDv3k`a9Zq6
zp4^!Xx@V0ES9gS0w(KX#5_TXDjCO$Q?{3(a<%^*58-i#529@>F=+N&5EneHPReA@i
zZBGLC=4&v_WCOl(JAi85vZ(Rr0p#!g0UwlCp?k?HwAj;2x-#=nK(oofD^E~i3_<4=
z`LLyM4Xe3F;`LqbWXre-L^^9C`C=AM>ZS(smYpBq{Ye{5cDxdU4z7q4MQtQ?R)OT>
z3@wPxX(C(Bcag7haYWZ~EbqkQz2xquM|xt#Q+N)sAw0`*HhQ&n<H*(hVPxX==Xz59
z1*CHR4zj$7Wv(s?5a`PCI+iCVZmlEpS<YJjJ{^>@e4wC06O2EP19__hV07yXk-uU|
zlv^S|JyRRjWe<aE_mx4W>joiFw~6B3WZq;^gp$_tu(@Y34A@4(gdMX<_0u?_Qqf7|
z9J9%uox4HwnLHTzCWB`|CN!RB*Ir^_5OSpe>hEboy6i7Fs$5AN#Nt4HULcgjZYFc`
z_$1xDhwZ$V@RZ4Xkj-3AYBwE*ueDjctFt8G0n4w38U9Z`^&`fo4r2LLGRv>l59U`V
z2l><+S$=hVkWc-P<yX=F;#2>VUp*M)Q@b)gwGqp&^jUtT!}!$JEWetwx`C=1exPLU
zZ7${11)8efPJ>m)aWB%sxLm*8+`P;Wv~RH%ry-rn)vv4JK5jAOoDT+YH8-Bn_Lv!*
z=xzrp)hNPUCnvd|2TxJ8z-2UUr#iQD!)NNR63^vbPo@9#Qbk5e(-HWGTF*Ac*{ql9
zU)c!%^il&^FZCnqrFIVXQqQwqs^x#`r8W-sQrELyY8&gNie%rR{J~yoI_srgW4+Y>
zrwf|@$NspQ&#doXl23P6y`@u%PtxJft?1{6&#-&+PpD0Ep>wR(;T;D#TKn`g<e0|c
zkXtqA9`P9WuTG-eoeT79=?Uyy?TF*+(`ebiDcs#`0Y0N9(H(JJSe%tg>licgZ#-nN
zWOq*PT^?Qc=qvYQ+9j^K>NHnAVIOBw@t!X7e8NS3jpo17y+yP1_334ek^DF8x<_$W
zH}^o)lJ@qm;Xb&Y;7Z0jbJrGKr(r8&Ia^U#PB9QU-PB*))ni|{ZF@@p%KIwGQ<Pt_
z+>5_PTU39=4jsYU%tC(N7A3*&J|DjMQz1d<<g0?D^FO&+b(I_*OA#nIp5+!a`U^(3
z4&(1Wa+IG@o-cTEVTj<yPUQDz)baPZDF_}l-{Fd!*9oGJtr2X~s^J8(y%0A4I*Bnk
zLtd)Mpl0DOP*@`hHlx&G%qI){lplray$6WOnhavxtAR03C!=4s7~VQOj|6?n1C1T(
zr14HU7_L)+#$!*QxK@MwBJwB^rhrDjdLZCUDyW;uL*@$((kvW6;><_L%{l@nXQdFY
zX-#lday^LIdy{*w=E8;KbMT_$2N5x-A@Mi@qb+8^%yZRX+;oqGn~#H(cX{y0$Dceh
zSAmgP6h0X&C66U1fNZrm=x)`*1hH*+^pFVFDz3mfJt3U=eke-nFGJT(f0VrMj7CA;
z7$mMhUpTE~`@F80Z)lC%YTv>29$739HOJj!dSR)=1gxkQ#j;b6;L-R9*b^v?!ycZ7
zBiH{<p18f!iF_Wp1avBvVWELGT5sEg`+lrL>xB>DSw#hYFF8+YuBzj>M<r}ud<z!q
z8luS`739r|$Nh>D_^87VT3JT4>#aA8^7F*F5e;Ofk1aZ>FNKN0S6D5)1>WwML7R=5
z@aSZHDm0=Omp^!llZ>;m(*FS#NDrgNMa-KdKZbJmtub}xc*<KELfI%fjj=Ds!?LBQ
z&i2&XPI$trf@k#g<t}tFZiLTP&+vd#2v#nfh8FC$(q{Q$@Z?KqAkd~+`PYy;b{T!v
zeME27A@n?B7CNoU#W_D`(K7+N=sN`!YM4Bcz7k$QPX@j~f4$2%?5;0tnO%nU_eanP
z(XObJ9*CpQ=HklLE>u6niVi91!F>Ka?1=0FD~C2*zA+sAj1pk<Y8_bqpcQQPMq$g1
zL`<8j3wK=VK=JtuEZw{p_lD<Uid6%+wd$k)YZcTH^CvrBu>HKV4tVTtCz;YS3f>qE
z!*R{Sn7d{y&PeKk9S@yh_n9UL4%dYY1zkw;8iE~p7s%mxT_DJ9hPs=V(6Znq*<rH+
zd)m`c@zX1KC8vYBZXLwta}G%893ZYwOfX=}IWi_*6wWN}g-b<2%prFcY&uRbKM5c0
zQc~fY^j9*la5Vh#y9oFz1P=Dsfb!N)<epF`B${r9o9-*f5Q*y$yXgf4%Cb3e-xxAd
z<`2uR1f+@W_-c>$$IPO3*r$Dt3^b2ob@nsZR1-(8i$s#b_d+Pt(+^Lv9-i%<24#t}
z(4}4tZk|`dX0`ROuWmOe8;D`vXafjt$OC01Q=I2{2hMLWgXe8I<Se^RIuJLESZ~vy
zrK1*r>iYmf1Cro(Xdo%}*h<c-$$<jnhP`_?1z*pJWV2yY@qv~EN^1zSy?JlAr0a@O
zzf5pqzY|7POX2*aH1u*-#|yH_D1So(!?$Z;E`3e3s~4gE*IQu2+%&G0?;uk6E>sMB
z!s@J>SW*s{`$8Jdsuqx~lQQAM>qMCK^*HFxXoJ|Yov`U`D`Yu@lfEOnk-E2_P)Ht}
zu9}Ufx69)BEz!`DV*!`1ixCJo0p)w{fC^*4$~#!Xj8n0&c*OxYHRLjY$W$EGiBKh$
z3m@xm!rL77dD0pR@j5M_(pLt14ddZvpcRDr=|G9XeaIg22`-eC!=f>HaP;6r&|<8|
znZ8p=)jA*0a#cjdNhisW<w>M{#trzrECS~3&xE|`Yl!C9Kjd3YJhTKbPj$8!$aGu7
z+C@QZch47J*oKnjeFxxz`grt^d<ljRroqh-Rq)W4%}AsOV-=fW+;5^yLK-g;wUp<;
zE51Q~oLL8!y0Wk?))a4`0;+QU<Ujb-|KwMFEWhexeCj}!Ux~B)YS+K`)LntDl*ugr
zKR&e!<5T~WU#(;LmF@o_zxoe8^<2iM{wKd$v}QC{nD>p=1O;(^Q6;odsgFiWd2o>{
zM{#|-CUYi2L%3ZIl3cEYFSlc(KG*j;kO~i>+`aAwDiYw!8UGo@`7gQ7tv+qYO?}r(
z`%da`!NDq=U&cgkS>!|Rw(Ndx*`R;$Fzclz4*CbbGymZJ!CvZyLI2>@fB6U34faw`
zvtH`|$v=3S^-}i^_EJ-rfADYACI4OSV?wJw^*M5q`7F}ud(IPG8~V^S(wlb1pF{ay
zKdIrQx#-B|Hq>Q3uwj`iNIMzgY__w%LD_{~nQ22y4-`PhezvEzqM7~(e?(U-P)AjR
zvs7Y{9-X)F7tYh?{>tkilIOvnzM-9)vnicRz8l49eF>!T39Wp)7tP$#Xn%T`?M@{Z
z&E_iVnz(sORQb{6-h9342mJS8*XZZD>-i^|(&*Ul&0LWCQ|`nraejC6H7*>Q>Gs0?
z-0h~boCE_>|Bct&@mq?Z_*owRo#3#*KSG2r>XXbhRJ-U;$;uV@p1#GkR77$KCsqm)
zQ?dk!n|uUcDvb0qF8mZsbFt=IdEFIc#whZ)$n*(*y{Zya+ms8OGExK*o=*IVy$bx(
zhClf8eYOa?vixv#`7sn9bCeuAY=s6Ltz=rf7Ctb)3!Ar`B&t_m!wLIYWWor>1-iQv
zrQKHH3dWxf^jJa0X?Mb(DXU?pxGZ=Chm+SP^Pp{KDukS9AdR#AVKdt!ELzqAPuj)N
z;P?^@DzZlTC)HR#BniaU9mR%WYr)w499|XKfP0){P)}qCT~*Kr=@C-Yxa=H`cfAc{
zO)N-WJr2ed4`7a!F22sQ#&G6JYthrjsT*b@5mv!@zhm*9Wf8dD(Za6^saXFo4Xl22
zu>HqUFcoGyw#@`jm<HeqlSI6E+>5cZ+i;>+5$^4fqW$xW(d=RpR*Uc<+e8Q(t54zq
zHtX|b?E%<w?Esdwb>fbAwi_c={#UMXMEKDVyL2a9<B3pTk)`;6?OHEXJcZr|rE!D$
zV>aVi1m9}SaZ$o^tZA*r?Jf(^?T9_*&M3yt5tA`d|1&IUc!iVt+W<Om<9d=vY)6$~
zOhN}p4AkQhjD}sr5p{OVqY)0Z^!+dm>hX9R)h*r6<{$a=?gejJ^JfnASFgj(BR*ky
zSsKloV?ZZuh@n>mg}Agy8e4t`A-9~}ztjl2BuoQ;Joy9HN=MM@>ZNpB#0@HDK7iSa
zQZPz59KThbLQj!(IJ;&eMjlv6AFexu@t-SkWkv+P6TJ-{Kd+!5Oq~9j6oQR^qVRj&
z2&#Ui5Os%}<G8vwR0^}Av$AYx+~qO&YI_J8HT=Xoxr=DIlQuoRBOQ7(`ytG(9|gYC
zFy8ST&cCJscVAA#4IfpoZuAS-tZ*7W&EJQ9&z9o};d<oVzm3an4uOl?8xpmpfH<5r
zMa!deF{kqpbWT1DJHO~)Y)&99uu5SyZaAvN14s&#aLwvMeDSUcMtaP}9g2%!tb#AT
zG08*AxqP%#ONIUJd2oKmIjpjI0?QU`#(@wy6nrs-e5rA`-7yfFV<#f?UnY{T#UZRX
z7}6HBLeG_aJlI+To8Fi~^iNCV&j`U0Tpo~})6hKXGCX<V3k{qF3SP{?>xaTXG4MWl
z8a5lBExid>V#09xVjHqeGN1f#cu!m{|B$e+>!Ds=n<zxvkmRMG;D+>Jd~3}<tDXU5
zeYhHP3-hpQh%08MnW0u@BkUEsO62BEz===f&}`NuoG2%OhrgVJ{SFJ!bn+6gh-e3k
z&`hu&5>5>KUE#^cSXif72D{CTNW%e3LUc-D>*Raj#O6R>Ph!1Qa~XX8^AbF_+=27b
z>R^R)EqvT?0nO^~!G%Ms@l;&`7)hKWHCrVx?RYva7!v^tBLiV$*-qTnYl)#<1+bC1
z%ae|8#_<ao@6F^qJot4DtXwQ`$btu`Z<m6N8R}4G&_ujeECAz;m&uf>b#RSJ<DR5)
zsQ+dIxtH=GeESmEv#SwKXqkahrw}e(GZc@Fya><Io#EkieP}OIgNFmhP(s`W4fAry
z$$jQ<|A{7s#(acb{bR7#&jHrIZGbK7lZkcsLeNbYhaHpDP%-Z=+vfBD7l$a=uDg>6
z9S{&2*h=2_{2|)Db3ifJ9)4sfz*~)1BqA@GJmJPc&H^4@_3I-m&KW>ojUgO!&j9yP
z4?+8$6Vw*<kkIBMM7K>1Tz7Vm*mJ?)_A`Wt#}9>s0&(~~ZYtRp9R*fj&7gPp3!>cU
zM&8A$;J)-?a#N>^TvucJMj2T!IcptsE^p_ZS~dyhU)@7G!r1lnu;uU%pL#OmQ%4Q*
zsgDoxsoyd_bpqp4|6qLT4TF4YMaHL2W_;?LfAOg;7@u01@u`g%pE`%}sdX5ix|8v#
zEB?i&cHO(4Gr6b2P04TIM8Eb?x$lR#SF#(phL2~sl%aRIJ*TX>Ae{itwN;isKk+wr
zsY;Chcv&&GYmosL=yI5wvagUED0StG23oj{`(99^k@vVz|Iys&_pS6>+!t>Bu+7|m
z_77SO`Ujl`d#T8JscV>jP>=Zs<5(}%yZ?Xs2N$zmY8~sPYOr4FKmNggda1YmQ!jM{
z>!o)7rC#dzAxlvAxespHHW}rgs!$)@v2;h1GL<To$B}+WSF{&n>ZX%)%G?$jQaKW(
zRhG~O{DEd{zjDK}DC8Y(hp2HS_{dTUca#`oes>INA3u(f?>RiDs?Yq3vUKOaoG&K-
za=!FSE=To2=gS=Cd@*It7b!ODYs{Q47A(Kh9(2Bp8g#xmnSP<7%=yyIoG+rx`6B%<
z=S!-427aW)_+{65*fsJ94Twrb-lQcsZsu94yEB?bE{~^E`Jr^%1z%`e<qz{K901Bs
z&<*XTG|VdsjLbh{cEvcFdNYWgsLsN#$6E2H1>oZ7WOmIpn%>?x{I8s~kdVg(2bpmZ
z^H*^0aD_|o4CKPh)woqJXLF~DG&o;Y6x^#d=A3SpazdpJwCqSK8r$Ym1M@MoFz^`n
znNH;E_o>qn;>+leZ)W_Tg3C1QxgRw@UO+?Nh11{rpZ=A4_wBrJ0SYz=_PI?I^eGka
zz1yV(5-zP=>o;Tm6V>DVl*h?}tKqWz#_w(d(`}QuK*>9_{K65Azl(4Ifd+z}tyTj5
z)>>*|8PAOi)700_DWfjkLfkc;0{@kRGdIgGja$C|KH0bS3<>S9g|*Ci;8f-Xz2j@Z
zIdBuyxDSK#=NsW`Wj~l^j)(B0#$eiJK*S5rLtxrT;<Qo{4!@cWABP5#&HL9AJNr`v
z#$N<~mivtz_k*}fq>;nDZV>J$gm0xoKyz$1UU$0;+p|-!%4jZDxuk;AqW4gEj*r)M
zb)mQ25@)9;p}(;&9?h1gerNZh_Wa?fs#yg6(vp}gFHALrLeV021N6_yg4IorF-v0!
zE&1{bmD&G0ukXO0XV>8P&|>WJu7y7z58{{=kvJlBJavfPhniY3ko5TkPQnMcRmu;u
zlI`(BpgoRwrh>cYK7*^FTXAD|9bPHq<7gWK!{ohDinGLnwk{Z19Qs$@yVsU2!j}7$
zu;ceedhUoVN_u}oW2v907{3Z<?p2_3lv+T;J`r=_7iRH?(!E1{&@ny=EA|!ORt*k+
zsK;RbH$8eNJraG-+{ISi59HyD8hmUq8FjYuX|d%c=<$`Ho2yFU+1jc2O;Lg_RTJPc
zoeA{2*<SP&9Yw#~)Sxk`>+m$H(t4fowEtNt+FaMgM!8sYImGI?qyWa?-bX#2-k_J}
zIn#FeL#Q-uGL>`G#NYhYRPs|lE*+7CYuyQ^b~NEBkHdKTsvqWePNU1UU1+hr6t&1#
zLNoay#`Kn;2a;nk+~pd|HtEuP$Bv<UK{9^55`u4E-b9tGG1yrkK^@gCFgny1eSe+6
z2Yth-iLeyCA|;9<7o$nE?G(7MayI@Png`y(HKejGj#yXE!DlOXVmOS3Q@q*8zhHt#
zb{7$gZ;3=xuK*_8Qp7U_6;QZh6@Co5g=tx#VBfm|l6_ym=Q+nn+UB|J_9mgK*D|bp
zH5{rEG$Asy9QusD5%&*95IVjQA}!is#w~HYR;>)PSA=1#^K<ZEUZDr?1TeCO2PbP0
zN8c_1rGXk;Us8Y%cKf2^v14$B%?(7By@tA9?O1x?K0G)tjK{tCWbu^6U^wLjbPPxU
zsmp{wt#aZy<R(aE&%jS0Nqk@xI2SjAov#&SPg8@6W&x4f)C8^<6=1Y<EWj$}KUZCb
zWj=G^cV;QXT~s2&mK1~E{wmV2un<-!A0={rn#4Tx6-YZflX+1WF`zRUtoNy+^%xhJ
zpz#_0G|q)LC*OgPT_OlA+XESoyTO5+gdtC^kg^vN7$H)Ll6TE9xyTQSm)c;G#2uby
zq!%n!y926iwotbK;eMnNF7X(N=CxDdV4pNrJ0_z{zJPI?rop0DfRh#ffJD3-tWEAC
ziPgqf#CC4iZQKSyV=5sb?m6h}FahQKZ8+1+6oz>O!N#plaBQDAy5z3L0^4k;jdq3Y
z6(@jvC?s#>%3$d{CDhv(fDu7&K<~XWjBp$vl?py6^Q#S31`frbPuke@!2)i~s)mhr
zS+L=A1&Bo7B$iw*?45fBWW-NHbgl@B)~vvfLzOYBWB|^~J|q>D>M*Av7=BI)0Oz@W
z=&sQN^9z%}<6bPeG|djW$FsWpM<VQ<bO(rQGmJDmPohk{!Ru`tdG0rg?POa*$LV%3
zuY5_aMNELbx$3ZT{4G-78vsrELNL@{3AO~}!}wvLWc_3T1bGI*ii3VIvbh`%HvA#u
ztM@~@{d1yxESBURwS&$3rSZ6N2q?T+3ymuyKxuU(oL%5RlD92{)VgF+ymUOo6q&*1
z+BkTVFy^n!$v}lT*9m;?c%TdSYJWSOLnd)!m3DGHKL|I*=mr<PazEG1pHD9@Y~f^H
zL~;?Hnw;BfLvH^>A1W#B#x3EG<qnw)<32S`<%Xw^<lONj?P-|FDNobniiJh_zw7SO
zfBb_#issW>NB@_9@FepOeqsK>C(J*n!u*3T2K|GT%s(jDr9zXLfAFShF-kH2;4HKM
z<sZ~z{=sp~KX`M{KN!dSgC5L3_&0K&$TvaIGxiFWz2MWS=}WMyX$B3}7NHSWPgBby
zPPj4gEZ+9Zr3ZsP;jiDnaki8*)em>3nM8z!mNU-m1%0&OVlk&;4{dZ+rn=gZcxl{0
zIQ?6LZg&ZyW-AH(4`!w}bH40j&KJ``=SvlHzO*i?qhkl1FA0p9`E8Jy8ONM2Pu*7I
zKg`Te%=xl+(D`CgTY@mi%>2>UM7It)U;f5(^*8S0!s|M+BYY+*+*CuQkO2HrcNOz`
zB5(uC9E9}inG@a=a9KaN?~1}4^%z*47)R9>AA>!5<Ei<hCs^|$5t0MX<L$`dcq=uG
z+KDuQdQK};%U{QbX8zRH=*VCB4E~Mh>f8bWPxXB^@#qR8Ml(K;Q^8Z==2jt??-)<y
zCJ8|G(0Sg>$~8nfM;Q9@M2VD^IG!{dLd01X;4pOoNM;Qu-Qnuc=eCzD`}zxJ56mQG
z$_vTkE*)^pegmn#Uw9e+oC}+_K)%Q_a7|ah<}<tDvD60`k&%m6P6p$fM>~OgOyN*r
zGoD%X4Exh{u&k*O-c4K!GMQ3nQ*4167ShyZpb3Yv?{m!XhF?q0Lc{KQ5IZhMBa%dE
zP4)=<UN(=ps|cz@#NjJ;W=a^{$Dr6KEDo<ifrlK%Hp=1H-P&l{Iv<PHs$oHuFKWFV
zg|o-|Vd)=DknAadm%px{`h&|TyRjF1?hRn~V}BU9o(5|?m;ROSk~K3Iqpbd3@E_(%
zwcbeJhn5h$oL!C2Hkx3n&wCtZUkjy&#$#JVCf0TL;S6m)z7O1hvttwRj;8{?s!c@I
zsu^@~kPj~HyN$Dl%7f6UB2=C_9D5ecq%ZXwK(Im{4UTZAcmuHLpad>y5k?=z5}&bA
z9fx*W;A?gr`l-Vej9RTR=cN#u7+xg`haN(;_5t|VG8t#wIfxBi51{{!08YJmkD89D
zIE;CrY&IVPlQj#_dQ}j-9kvKYwwA&0^J@5M-D|KtvJ<R*KEe;#3aCv_2Q{f8xU+i{
zs3q@(PG4QTazqbgF&QRL&4fCi9OyqlpheXmw@nd6sk#MVUWgz$Ef6-%zDG(JlRdz|
zA6K~L@QS)el5;O^^PI!f$@N8QWbK?Yq-e-qG6MGi>9iwyMnEK=|0GYh9U#{2io7ds
zvgBuVI4?wnBcZwZylL`hcnKB?yuA1l(qfrP6qjg$kVlE$5rc8OnQC*1&w>zWUipS7
zO?VHnzADh{>I)G+7Xe1@1&_@v$IBm&_f$iP2b2JNfFLxso44%rY?!n-oczhS3?&bh
z&|+UGj8s2G<SbNCGkPLs<my7B7J%yRY}o893U@cvli4=@q}ysK`8G}wvh89Ri<yvy
z-`c>dR)AeHeE6oSKz>NGJ!!SAr1nQ6?+J5pC|h`t6#?5wdvq7E9{Y%wzh?&NbMqlV
z^0^RkJeYh^@ZrrDuOa(B{^m6VJ|^P=@4!*5%cRdPi-2N2Iq$fZ{8(Q{TzXc3T30y;
z8r;ZU!!)8T>_u!pxHHE~A5k;eM>OAdlc?y45Opq?be#?4&6;P-K0B%`H``CviMH{^
zN1KB2b01#byM4U0(>*-uIh|+n@hEv_>&fdKGlJK>Xd<txZ9GxSD&^6G+PsHDfAI8d
zQ;2HARbHgCJh6E8gDj3YNSZcw=*_Z?CHIqM;b!Md-U7`4Uf7>F^7cxx-Y1z~ysPs!
zKqPG@3$qWyD*FydKII5up2MLg_YAA!9+1{^>TvO_Ht6Sz5jiDU&}M7^Ve`|BBNIp7
zUrQjW4*_oX#gTCt1T8Lzz-EzBHj{CcNEY^xbDkSW(yB3_R3d~gA1jjNvk!T->qUrP
zLMRE5c*EOfd6;Y&GXu7J)bZjU$B?I*i%G%6%e<j{Y4Rm(1nCnwOhgx&lA%w(@;uW=
z@N|ZcA{RHilRyng5_jB-%+DmmdHZj@8AlVDyJ{`Dufuk!%<6c7504SoD+@^8A91oq
zT!kn;TSP`F#t~1|C1n5OX=LEqQQl^EOLAt2F;90&191rlQe7-Wo~kC0;jt1hVD*md
z&Xgd@ZQu3M4<_)AZ+^*harGg`V$S@PJ(i2p)wtsL`*cM7WX^Eu8M;nj$(6@l;0`Kp
z;|%2~x9wUz6_qQex83$|*YXuPU9Uylyq)1($I~#XS$Kqd*z%M9iV@;MZms6VtyAT!
zx6S3|=*{Q6oY!%O91FQUhSUB^@9uA$wX|_5+Rs0P-{utHLc6p0<=7d#`e72*xeKUI
zU=Q9Cx{7O$Wx!U42)Z_7JvGgFgTC1}(Z@!K8u?$qX~VBz*QiajcJD;C?-K?GormFD
z*#-3BDjE7UA(p1A@#sIy%x=ca)EjiZ>>gxh{^NYf8)Rl~9(2A)4LV;cne(NMIbQ^f
znHj^FnHGc27yCiy%O=LmoXMOoqnPuhhA}h$#&h*I?ql(=7-C`B%ye%pWZ_R$RLK>F
zD_%M%-28|roysQp%E$2Blw6`7wVa5xJO;z@b4i*~3F)rk;9cfrI25x0dS450`k%Sz
zqI4Bj2gzf5=Oy@k?<eUpTTAMmdHuJk69a>X^Md4Nle~8ZJd;TwM8(XB*L5a=I889*
zJwB_<+jc_&Y_2QtmM^K$yMEn}v`C%Qn_)SH_fk8PcO*iSY(8Ab^Rv$;*={F^vz`_i
zz15$HKGh(WWp+HLaX0lA@Y;Agl0@KJ>_f<XH35SUF_!PyUu4mv8_@qs4I<|kfkFOC
z$ovvVs_+WvKVOSQzs{0yk4W+>W-ctS4u<<fA3{=*A*vbGlHsggZQHpKL|#3Fk~@1y
zqt0{)FR;Xx?S`=D(oWK0IzV1{<$y>F2k&JpA*tm$^c&fM(4}bb%x@(+KT62H87<_@
zjwJH%mk+$@Y9*JJO#_*J6EK=614lY1K=XWM*rn48i(3v8N5-X4wmku_ivs`3Yd+g_
z7Rpu0g6z3xSmd<~EEehF#D)XdDKiegt9s+A_YvT|sRuMSM#I(>`*6>g7zk4P1-q^F
zFuUtMG`K|JSM3J;UAG-)<ekHG8wJoYvBaw1m2jIIN@ww7L1lq2tesK~@*_$i*JK3F
zzOIhQ_H)};-G?K(+L$Yu39r{oz=dZDaZ;iUtUf&j`)8em?P?c6B0~vhmG6R_dRutm
zV1NlOM@fZY7fFIGXqylYw?z*_FcCs`Gc|l`>_HxspM#tw{bcIwZxFL@0J>HxL2`ow
zypK2x7%&&tF8W2r)vx^@+`V};R^R{sos1PBQ^rQAB(AgfIcIMqQ>9UwRMIFZB{V0Q
z=b;Q4LZPCRCd0M2q(l@&Qb~g}3yms7xzGFl^;!4#d#`)lf4=|tEX%*H<67&w&OXoQ
z>-BiR8>61X(z##YM^yy8bkyPGmIiQS^Jm!pm|>&eEP-_$Uhtj7V0gQG4D|R^0W%yl
zpg}i=z8bdhzJ>@KWecHjLxv9~I=~<6D!^d*gYcKh3xM`Wvj=KNu_KjZ7{zlH(6=KH
zHdzbYf-%paSlHtYl=TN%HxGmKt)G}7$`uT-QeXpOr+^bWVIX2pG@L6RV)D)6IP>aZ
zi?QseZ=#bwH;cZ?Z(;_*Bt==KJD8^3^(Hf7Z!j0^r-MbaPMEy9Su476OG%VH{jrJr
z+Bjy&l?S4pn|qnkvvDSso+nIb$pYru-D8Z&s$ZfoA9-em^l?$m3oC#Pq+q>q9K3vM
zDC`G|;hE0kAYxZ6Y~FMhPS+m{jeIHNm)i$ZzD$QL&u7C^R~LfT6)T0E7I`q#QU%`F
z7zefsGr<9;B|xvRtLrTr2XBuxhQ^L#;DtLZ+;k*>u{-~sDGH8cSl2|RYHmCjwDJ$r
z)T;^-_pJu|rmX`+X%L97uLOFr4WftNFkl=b7!%ox%!H8{z-(E!s3lkz^pwm6x56%q
z#x=@<@5!41aE}JN^UgD6=8}Mh?S{F|iOi3Q^3c#B1f1A>9`+ong3_PPGhbd`2I=2(
z!1r{42ebA#SU!b-^zHkhQO8lJ7nH+<q&WfijxMHgVLJS<MH~K*+YFZ52ZHv2Ot8%1
zB*<-dgJT~*2V7GrWAAv<<U*6K==;?BCWZZ8Op)1u$>ZNj%$k=67-5c7G*8``IURUL
z^l(`SGwg{E6DzOH`2NZ^*;uwrG?x7$8krnx;-~$NG4Y<rIM-H+#J69Yv>xs>mV4VT
z^4KWFBrM$xr%YA_mts}mz|3aGC_Nce-aiA@Cqeq|u=r6PRNu@*!bo@H8Qi-9AQ
zfU+9*fZM3`OubJYm~#yStNZ7{ib=1aq__+m(3F7R@}>w@@NHnh_D--|J(Vds;|7|i
z7>j(q8-mqAtC{@5dPcTz29x{{GVj-CGv57_aaoYeRGI5CGIy&)JuM5F#ZB`Wrywm+
z{N`p+`e`Xf!t*#|7}zMfv{MS~y6}N%x;j-PlT$1Dm=wU+k|NPR`PGNN`IVuNU%eCZ
zt0*DAiV^aw1wwwc;ctHBBji_qg#2o{kY6Dozd9l0SBHfBYKV|uIsDD98io97s*qn@
z5%R12zxkD&kY8DU9?Wl2Jj~6m*XE7OE^>WkC%Mm3vi#Le>0H(po>wv3!%KE~adT_*
zxb#6I`8?aj99h%B>CT?Wb#>Kq9X|uP9d90SCmi&7xiM$Cr?P?E*9CHXkBc48yZi9Y
zlae_vp-cT8JB1@!+ql`nGwc=16YMFAd0fx`=u%CEF4a!xQWgGosq_DKsSQ$T+=#zj
zs_Wk_wO!~^2fY7xm-=s<Oa5D4BYoO8I8`%+?UIV;)(!ds%fA9{aX^5;n~&g@2+UFb
zu`bv3;l3~vTgfVZi05YdIddUZ0xNg-Le|c~nVpgO7*?tk!KQSfXV@tXYomJEIqenT
z+vUlu<JGHd*#4e><zBmYl`^O5UBKlSXL0+MhjVUXT|P;M<wG~g@UIuhbICidv%lvU
za!1)w{LbNPIH?y={O`++yxDHRAFbTPDISjEY|cOBhDZ)Z`VuF3>BoB9(tU+&^~^*3
z1+Q9uw#=!2<r$~BE}l10^ya@kFyNoRUBNd^TfiSfw%DP575_jdh0i9BIn^5ue8L7*
zoYMP;k24Oyg;^%}CVG!r`?jLZ_fPRo1=h&Gc_&^rMG3#pN#$vVF>gD6GLF!giNm$d
zqq&Q8Vb8T~@Nu*yoRF*lZ(1b4bBS8u)Ru=(`3VFz{u)4T{2XAP(g-95AlRi<0@9|M
z!)pPlOop*MXuquvXGNR=kL<eO{dJAd;jt_j;@HoGlqQ2VX#&rY#qg5pI544l6xeU^
zm9aL80$X0lfQJ%0KuO05CNXRrNPIXATpYI(F53MX97tZwOcnf9WtJo1kdafsGQJSx
z%-zbA_HG5MT{%Yb)o^C>n?`1YrU`6bVat@94F@Z)-v-@3UErd;m!QjkJiNYp804Do
zz^+3Da9ze((6v1c-mpFlPv&2O{Sp;$N=5?cwB7_Ar(Oc3=4-(+H(B`9P8wtjeDC)z
zm*5tKbs*GH18PTZfgYo4!Rr(~D6TF0SDw#re_si%YR`tU>C4$0S=#J^6;^C@z6I-3
zcMT3VT*m&C4+V$sj%5ucXR<@S0=9nLWcGTD6x%V~oPBgD7IyaafTa;j*x$#Fz;`cv
z*=Un&5IRJb4X6h2`#Li?>iKQ(*`x+y=eO|ukojzFm>#=1yjEZy-(qi#{lO+*Enx@U
zGGHg@9brpU_p&J;9oUOHV_0|XP&Oen4mOhn);;DLt8is89D3;_i-!DywqFEx@(n}i
zW+%(u>MCUu@}{$eYy8*&Q+Kv7^fBwdq!aFXG?#sT-kCKKd9$0wXR+#&Z^BgzFR-hQ
z>$BI^?`7jpJ!R88WLf(^58&cP8@4axJzRBQKD+662AfPZ*%5i;+0@ivD4(Lu)_+eB
z_$I=9-Th3U?GO#NXbAq^8;ZdE)eZQ&R2lA^E6flEEP<*U#c-E$9*ikl11kb|fTN>S
zV1=C;d*DtsXo~QHB~B{v&k_ybBDV>SupS2YM~lG7P6+ncZ-kS-O0g%(20*djCa7t%
z9^70YX8g@-f!^>$_^jz8$Z{IOuCh#pcYowCRZ6R2xYRAEViEy0x*viNQ9i8m%>Wh`
z-og_vKR`+MSIq3k>)1xKvEZZ2C*V^wA3U=k0$=Xg0iS$-0Ut#O=ON}W-0XG^rms2&
zgI-IroxwV+c77%d_H$rWw;qHFlHpK)d^`x!xegaC4+K4PD#3VR=eqREIr#C`IJUz1
zKIoNQ2QIFd3BMU}u;}m(ApQ0ju-9ya@f%ZwU6x$N<oE)nX>ko>@;(tvis%4`l%7L9
z>!I-avEjgdb^&wSCl+p^I<QA42N?DSLU!75uyaQdxEd-CpVd_WhI_{}ve%fqVFc(r
z<AKu4(M)Q0E*O+B5pH!#2Uja5!AS>Rz>S|ccCYs=D3f&=R*k*{)wcIRUf4DFNnQ;U
z+O1jVL6hK}P0wL`-UN1x{tQ@MR}Tta4Pyly3|z8lD7>Pr!A6EpgPTbe+&9Yz*f&@~
zJ&6Xm+HW-+)w&icTKa<UvYA3IUI1pc-v<j+>VRA5X5c0>5?;Nr2;AwEff{o5(5X!Y
zcKIelZ>u5jlhk*xaBnL3mT3*1s?`C-lLcV4%}*e0EDQTqRRT4GAn@_iJ7HH^1_X|L
z2Z!|ThYhRmLm$a%VAjCFkKxwv{Lnbq@lKcRI64`wi`Irx--g4OQ|a)SZVA*%`vT-v
zKLmP)RiJsI5!~+C2(LZLfD<0Agg56D0N0LJAT}WkUZgGX+2^ewaApoL@XZ5aMPaYo
zaRG2Wkqs&~D8dOIwahuCh2T|7JXjPl8je_X65I%R%5-m-40GxV!129{LCqg|@ajp9
z=vdqqaDVm=(68`<+3{u~sGs#-#N2(wq?S$r+I?Z*pZsd=-~4KpkYD`}@~g7H`BjjR
zU)>k-E2qEtm6MQP%@Xpf;J^9ROd-EA6!NQfA-}2<@~bm{^Q%rFziRp)`PJOLqxg*W
zC*0Ayro3CI2A>=Lj(a)#EC1})Rqo;J^W65*%Uqj(Gq<tOmd|hzy5*nq_%@Z>yjlGb
z?mI2y{oHqOW3F}Z0~ts7%jqZhB~CB7>&Fgprd`(j7n@W3+e*OS5d4Gsf`9OzE_J`q
zrP}@NQmch7^{LRM>iz9f)Bbj;*WPuqNkW&}EOe<$g)a5m|Eo*=$3HmhN)z{=E_KVV
zIsY{o*}riXt$cNtJvv;KebDuY9XOi9N_oq3_txsOSrda;#o1%n#RBj6%k$50q$9)m
z@4O71ERVoEw+?npODq@eD92tEI2twsscf9dE_O_uDKxN4Vc*_<#JNe1;U+{*=ALao
z`mc0%8T(grQd2H*zY91n?@bD;KK3#9)_VrmJ@^T4W17zYOjYC6L5$!#yTad39E3VK
zXTB^VpYt*u$0@Z{aIx|uc^9)Ae3?Wl|3UE#yKA6^P4U{r87%JN+=e#uH)5mzm2={%
z>300wo_JKJHJv|j@)uWaw}da+)5S5<H{*+AzwzUWQh2rBX<V4&WPG*%J_?Xuh;~<~
z^3{t*^Y0tV`EM&G;d9ds@b27U_$Y-ud&iKU^XV9WY~UuJa?Jprd{V+E-P3}~v9qD4
z+Hj~J^@>TES^*VyO@mtfzHD-DD7#RY2mkoX2NaHc1`RJJvNPp1*tzYQaOSt)AZx56
zTpC&rCKklQ(g`ErNBbIP-m$~Xw7y7i=1KtEGGh!oSyPLB{rn_&GqD7?iUPr669$}G
zeHLCAz8o&n6vH9;mB3s_3Z%=Q0kh**z~1GaU|y9fY=Fz*#$h8tUcezRd0Y_q{VN1a
z&YTOAu4RF(UzMQW`vK<IS!1xX^fl;xz8M<y?So}gkHf-+%51^rQb<OPV{5l5uzM1t
z*_BhRSQA+-XzS(4?s1q7wR??V%Jea;TFzmpXgwe9YL{dqHz~6dB!l6M+bdy0d>(X2
zx(WNzlR*FOr7*bKkkyNj`B$ECK)nofb6N)11<7&BYYo_)`)yfV5yOW3cn+sV`?BNb
zN<kN^ne48sJJ{yyQS6r%z<z!b!a{2|c2#vbT*VxMITM=M^+_)vYHeqY&&C4PVVhaJ
zuNWRToW|n$DzINTmz_*q!v3<zWK~@w*;T>}`?76i>{YMj0;4dKy?T2)`)GA5`)=fQ
zc1lMV3{@BQm}_RS1~VqHOC?&NZEGTA?_XpWyftD!xtFsol_9LsI(3#zsAVVH^W4Jr
z^Q@^!HMAT$h0R|zjUDB<i=D9e4@@~|!X8!@Sj=JdQ1`J8`})Wzn615oeR63v`+0k!
zuy32d%DR=n@{L<r)%Zd<T9_Z3dvGcH!Bvg*_t%5w7k{!kZv{WJ7Rwra)Pf;I6EeFc
zVbjwDI6@P^RU0Qm^{5%}cIGxHb20)t>7{~@ds5JflVB%I7zOj+v_g#=#&BwrDy*E8
z1>CNyLyw=)u&+8EjB%R*yH}t2zqx>7x#w_Y)_it_?E@(1F%6nnR>AV&a%^Y%Q79d7
z96VBA3#XCapj%EC8kVKN1955ac%`DiYLSBv>b`@YvH2k0buz5#ISE{=MuV!4gV_bI
zC&2PA<#0^d8t^Dr73|CE7ThTZ*{ZwCp>>zQst;NM4@||-BBm1B-xvw~_lQ`}5d_w$
z=(BtGoPleNYoNDv7YtLY1>BQfkapb%KAC6^t!+PpxHWO0eQG7F5FB;V&L(WaCNaFn
zl)yI)AK*j7=TKsu2zq_p54PM%0DkOOV7yfeYJXh>o!i1;-(hDUwWtI5+UY{QFu_Ta
zaS&Qw4g~$J$Kil&7_73s0J^O{gOaOv82c}iVT|KR;1Mf=YZhU+#^654&NqT1Zf%9J
z6)n(BatL%OPJpvp4#3XzP4K3THyow<4#-;zuDT3;xc!npv^nJsw378$qdZ4Asp>vB
zrSTYg_UXVYwj1G|=?ZYN%}7{k91Umtr-F&@A;PX)C~JN1Hk9p6hCZ_*z_HpE&=jf+
z-7^Gc4OtEy!;65ha0zaoGlS6J5^%Xa1#W5>4Q|}319QC0;dPtWU}2XpELsu{>LjFK
zc>5suRLzHJO_&Ga*g??I?+bH6ED!yMhrz^&GB8m8Dr2M350YAR;R)fhHor+=tNqCZ
zWepVii2A?^eMxAd77u1f5$3m7CAbQ=!<D5ofc>kj@U@}PrA%pqu?h19*K|DSigbkY
zTe6@_h7lMZRSVCoNQL44<sh?lI2bYLr>NID68I(+g5~S-K!b7`nC~iPHX%(oWy)Ys
zA%7G!&n#wEH=G4?oFrgx%zWm#uM$M2(csVbL@=TzfI;$8fPzyD*k-q$`CZ*F+Pvl*
zX#Rc#%v<#gRCv#0B2+uznUEdqG^N2@ZjZ2U)maXY8k@4J8cyu69naV`OLww)0`DSt
z*mm~bS;FpJQv~sXNo<SYFIj50iOp?IV2i#?=41uViPqONc+D#wjtlK%Ra{rGGIRRb
zn+c;idF^;UFLo?1nv}%P#EbbzkBR)<uo7NkkPdG;B9Pyi6UZmc_T!t>0SY?5n75GA
z;R~f!@h=lfxYvib^G5=Q@;^0p@)G&!yv#Zc-f7}|{)4F@?;<~gUtyZVTNX8Qj{l=e
z73PSzokEv7=C6P7tk9+I6a0h!bg5Sb|KL!eOC5jyBzyC3mumXAOI;{*siOt|;0B>f
z?fUB<{7;u!_g~bd{^NXc6r3;qg7d}2<u=O*&X?w52X?>Ud<l)n<K)y>_8;fV5UaV|
zKhBp6qvvw>1?S60ud$r5;CzwEjpn9Te1ZQsUvk%;W7P%cOIU;zcYBR5r+!MAlNX#X
z66!H<xYP<LQGSK>IW~k1X0E_i(OJ-V@hICf=s8<$ahcr|XUP_PXk;DNJq5boEa3*3
z&v4%0wOsVCc(yxnBRj9}3#+$fBb%Ez8z!8#fEoM8bCIGv*3V=rXKkbYubdMtzh7c6
z9hB#zM)Le`SAFimrvPpcBg5aEXT>gCc!&F4^OTJ(dL+ySo#f&VS@Nrt<vBU&$!yiD
z4P5J;Ib3edD{key*}U8SQ11Q>Wv-@P%s#v>;+tcWxO-XtT<7TYf90G=>-6}*&}yVN
z$N)Q}z2xF?D0jx{Ie*vb5ohBe$=}v9;r2W`&$&z5^8HUVaZ|u|?pM`Dt~Fyk%KK)E
zGCOXd!1eFZ7R~v5rlH`nj=0QSeYb+wN>Am)ZI!&<HwnDox{n##;s{-al><3BIoM<%
zWNvp~!qrdIfSq9>T(_wM$V*x=)0eD-i|TB_x`E%|cI6}H@>OS$UGs-&vuj}<4;={?
zAKL|+-zmdW7XtxTJ(}%EDTk`k&w+eeB0Tx}2=jH?WazOx5_Idu!F8j?0q*@dFe9!W
zn(v8%qsFa)IW4QfbxslLZ1e$~K@hC*S`5jSN@l8c3J~q;VpOkPgeU7{;K$fuP#!gd
zo{Pmm&C3xMYvsVtXa;DwChS%oOM)TQ6mG3BXKl}4g2C4%Slt=I4$QR6Fv#{D%>0nf
z_P@|$p9$`XVMZ&VPGS!{aN!43TKx+KH7P?6Z+W)wK?)q4a|-_aXb0!M5V74Fvh3ky
z<?yz^Jo`7E)w(vnhwStsctgfXaFmQ^Ur*i2p2+NCcTH4d8<rHYy_vhAMnn?ZmXyR+
zjQ_y?PMpb((8^#9w70SrA*$>KC1JioZV<O-sW+>m`I*&pQ-)!~qgZjsA9ypMmo=R;
z74Ewn#opzD*ssb%S+V~SwnjmcHOp~lb7RVd*$l*Hs~ED61pda6pEj&P<3x5ZyB5w9
z_DXDo`^)iTZ{SpAT{dFJa_9hpSi>SSHp5q*oo;jo{@Sm^x?Oc<-A1{yt1=2;j9RuZ
zQ*X{LxgpKkP+7L#?;gCpW;7ex5WtQ<ZO!gYjDipEUWcG97fKx;%&z}_9~RXPVNWg{
z#zywouqQXgvJZB3!nE*L(0b<tcpM#O7tS5YZuGs#{`eq<i+V=@W%Fp5_00>mC+>wY
zwP7I96~P%ti$U@37eHEyfo^?fu=AHGJdi63SE{)&?P)I1xHcbL`DqCbOf-gGzs`df
zp4O0T+z8Cv3z*Jpa-i?SOgMID25eC6h6QPL&?Qsw5MNvcnMn(T^XO_}Pq+kBpACn{
z9*%{ER*hi$;!x)7%>*bvSqGYgE(YYD9V`j8hsF_>V8rW8u%6Qa<;Nu04|B(`u2a@Q
zP}2o=yQ;#WJ<q|2$wT4YeXrmv-7g^1=q7XVw<=p>(+DEFrPwc@G+4`-vCu{E>;=Uw
zhXH@ifNhs$VE4sFAZvdK7P}_G$EI`PNex3VEbJlEv?vCud>#)5JrKc|i@%xEzhvOg
zS2N&JePh^TJP4j?9tAUcgPA#@rXXAI0#J6?08%!-fztak!Q@RgK+bJG*kg-eMnfIQ
zNxj3gHyJ|jY~lPYCkfAu-47$TJAhWwz>KQB%g~TaFuYn4h8=y+1bZmJ{+@jB#v=}#
zGH(TPGL{f}-v;Fx-eBQO42+z@;Y9r}@P@YF9effED))?pODF|(8p^ONR}4(X`ayQN
zB%InE2A3!Qg3~o~!ECLKK)diVv%u{YEb{Sz+4wHhF@n%<lnlIDI1$KpEr%};+d}#2
z>ah2@3B3NXPvA7X0&R8Mfn%H>)4bRM{QBbu9$lFS9cuExybE$bZ~bC;A?zpk@Y@!i
zjhqUvAD#zG-hW}{+oeH`Dk=Eq#4>0rnGfC{O9Kh>x`1d24@Yg5ful_JLQ7*Mcz>u5
zxHi`p1_g|QTW;yXRo9<@ho^o4wy_hWXB)!jZWlpwufXW~c@XZrDX`N$hr;gdzkyfX
z9Z(-_0Bd^MK$75(xjQBoNRK-JbTfy8IkU1rWy~Sa`STGoN;QUwNjL#s8w`P6@fqN{
z!3$=6pFMEXEoIi~ons<*PXU28*}$w<A1J&y4CJid0>4r}F!7TqNNJqHTvmJuQetd@
z{LxYH`RTBKh4HJkN0s+EQ^<KAgS^qz1}=8%cJ8T7BL79xmH+%`1b<>*A2+6Q1V1%E
znpbg4=l5M`=Dhdq=0_)-=Z?6(;F_azxp0qE{>=k(K0zUvFIaV&>wiCl*O<ohrrHs_
zh4n2i|F3_rQ1A~L3jRTfzy85&!9Un5_y?s0|6qf#3-$G{fADBf75Dak_y@=T%;VGr
z|KL&$InFtmaP@+JaHRf7?jQf)GQmF>aAx9vO`g~!Ff)e<%*_8dUqS?CW`@AbTq`g$
zw`*o`|1dK%1?S89zs?sq!TItJGc!(LX2uK7m-S=qxc@I^CK8yLQ~okD|Bd_LzwsKj
zY%RMuyAD)d-p0<T?q&~Eyny2I&Ma)5$n{6I0^bZpxX~~Y?myQGQiYw}t?eV(=&?a?
zQOXABmE*-8PMpK`Hnzde>(@cgo`tNks|4#J+=p$w)8J#TA>2$8|9|Do@Nc|V*<}zK
zL&V%({}9xya)xPJW`UK%Z-VM2NznP~9470mKeN34C$o3_Y^JnuHxqh19B4XyV5n*>
zvt>*ZSf5q|5<3<Gqmxq@$Gj5oe6=f>S=9&{pAQ8y_}wtz{TpUSh74RWJ0B`te+*t<
zzXW{IW1#7x2)#6)f|%D)aD<5q)czR2=%s!JmlBRb^<~50gra6}_iPe)ClL)w^E$xF
z34`EhHxqEVsh;uQr4JMAdq6>?H~hT)Ie0s_7+#LP0r$xjLWdO&P~S@oGul?dZQM<G
zXl)G~xcwD2WeE4GT^zJGjDhLp47B+96mX~$-v6M&R&75HMJ_X-&!GsIy(16Gzm$f%
zUmL*E2lDL86c@N;voX8Z{@>}HZKgefX}31RGZs$VmxNo8&embKK0VJax;=zl-FuO(
znid1U$V;;;nq=6pzy@~lB{lZoD@S(0fHmuW^CKK^(_(u9|FDY`SF-KX%30&6v2dGi
zJiBGa1^6Lfid%Tg7=Cm<1YdSL!(%UovyM8paQaYRm=`$=YMMqur6UUL?qnraYg#&V
zK5PI-EdK*q?>q%vi3ect{XS3;DS`(guYjC`nykUKO+dNG3if*Mf+KAXzzy+=aLcJ2
zC_7><bj$p}toE0N;8F^>a{3@#y6!!&pH>ZYuBbt!YJV89awcq8`IQ+r=nA;H`WiU4
z(}y`XxEW|&$cIHTlVD!&cW_h}!>EOVo2+00{4y^JtRA%;nmH~9eAo;~whI1=9tXyv
zMUqKaf0-!@9L0R!d&1<c{UuSvn^@6}2rDqmKY#&c&7$8pOC)hB++^#|a#6HHrD$36
zR7T-|7E_A0GZm>P8HELgOi722$yDi8CK?41j7Z)|w14bI=GS&fX4%(gV3N}@aCrG4
zXp$ohPp{PjhlZ(uePw;jr>U_p#jh3y51tEagT?~yZVhmB^&AkVZ46&dV8PK#%YkM2
zYo`2#3j8+Z29vBIWJP9cpxF^+xH?1`)VByMM!}~t@_Y$!kF*0S2hM`$KMR;E`Imq}
z^BC~6x)hi*C%~zL;b8C4L|~*93l#r^i(3E0Gf_Fyz|i`4jIQby5WMXjQ}!?$yq42r
z*!5!OMwbH9qB|DYtkwbP?Hrg=TO-P=n+(+X9YA8%2cR71%5;O{pxki?$WO@u2Lnq$
zs812|*lji=W%P>qE_oW1mS16}DPbmYQX4aDh&&LHO^n^ZN;vks1hdEWo=EA*57D(D
z^TCY_7PO3307oY81P+fnP2#hR8CTT?(XN6O%+e+-+N7h)_{21dD#;)w#KKEtynCvN
zo`M;p+cbme9d*}4_L{9|`#?EEm@^`|bw<pH*@KwySZ9WVnWB@2GfYtAR}+r`b5TY>
zC?oC1Fb9UOWd<ZNz~G%Dz#z3za8kXHuRZT#f=t4}pNW4!vzI(<d%TBfdaVh*>$Nh|
zLgX2H?^IBxZwt-h%7N9<qhLT_X=@4D=Z2N%!A`l+0G+xH))_8f9=*Q{rppWlPj?Jq
z-d~dzW(k`_AI*)yfgNqk?p%5B`TzveUS9&ai+?hw7C=VRDVZq^R01n~f0zs;9AHec
zj6sW}6u5ZVfyu}7K<gV@@T-FbyM6XDD#FZsaCi=rG@_eHJE+2p`KBrQ!%8s)@^;L&
zF&CK^ep1YZjY&-0{Bun6k0xfgXB1N@)o0=gjxzT<ycyB^(W2+8rZU}yEYPUB%=Fqh
zFscc1V7~D)rtbF$W@5GsbGdat=s9T3NS!KRd{v_Um2+a0Xa=8-UAPk)YdGs$h1|F+
z(!51#B7d_%iuWJ=lzU<f`C)%FxIL{3{P6jkc=Zdid}W_C?{0UUTQT7W*Y&50^Bi-H
zzq$DYr#Y~duWpOwW2UR{&y@zaZ^)CE8UX)FCi8FHYyXYc_=lNkBRF3I1ZL(pftmSU
zU}pZ~e322DnL7k#=1{@;@;>_t`$k}9aso4Rr$ayIAuuz~@A*H>%zvCOMt_|z4S$_4
z|6yiY{bgqU8~?|@@fvqeCo!k*89?p3Ljj}O3#JE2vMaI$&q7ck2&taVPMdogngx$y
z&#rNX?XmLkZf-7Pvx&kXGsmzIoj+mSv`5TmxyOt}lqPI`vk>g<sDKjRy5O?@tzgEe
z)8OHwtN%6U#5r$eL`u3z%qQ$CdZD_S5r=O9kqcTyUcW{&y1i$C@)tEQCsAM{&bi4r
z<m_i2O;Z;QnCb&{qr+gjy)!6zIa^fl)k>708Oby+DG|lROa(*kIe`bO8b#iA>Wts#
ziQql_CMrGZ49-mV1Tk@XKysQXP(6JB%-31QY@HbjNG$_Qo?_;ysUBFpH;oy;_ZYJl
znlKL!6fj4=EMQy}JsIxYHSpqtHhBI<m*KZPXWn<1f{sdmQTmg+OkG(OBYJQ}@J~Er
zo?m$bG>19D0goP_HZp>7?brwNhOdBcKP7=}8Ev5ZkSb`2h+!nR<uSwRH-np{Dp2Rd
z4WQpI4Q8_(EKOSm{v-^ANtt^=BDH`ebuU4)6NEBSr~j4j-Bm|#!Zgn=uy;o_`{az7
zFk5v3c2})uQx-|G+7))}sGAz_nQIyx{|>^tzAkLX?qC?}{SlU0*1@ozgK+&bIad2X
z0Grrj$=XYVvJU3u;8v6p%anM)MXlG_S$7R!qMO#lxzfUq)?Ce<iEes36Bi{$>v?)^
zH~!~$!*-9|Yc_jq-nn_*R^#=X*X=O2a#^=6(M@GylFQ#uuj*12(>Iw{@VdyyC8t&x
z;_n>`O>6dH(>48RrrVcYB{ipl@r(2{eCVJv&3qe8Wu)qGO<^?7T(5xKE)C#RJ_DQO
zDwu_O?KREY9D*FJyzuy^{>bEvY)RGVoD%v+u0*)VxtbyWxw_H+|5vw0^uMkzt5^j&
zpB#c(ALgMeFO*Sp!D#eOGZ{^_V^NXnO0-El0i`HD<Lz2}QJG;m$~vKdT&NEkyZkzD
zbFqgv+}*%??%9CO)+8bJ$9UvDq@P!Gkw9xVI-n~54`})C(dd6)pVt4nJ`-d2jouy`
zxBfrx+#ARL^VThMWO4G$t$5^~HuA)B9a-0`V0zBRi?rA8M;ms%LQG;YPoxa_EQLFS
zedi!PYp6!$BnFvY5ig;`B`y<(y;j8U!4p)!H={UdKoP%Ix{4mX|IE+1D@}5LkETDS
z0`l2I0gbV7quP&L(O~~<^mSY)J@rnH+B+Lk_MohI98Exu2C`H!+K3Fi@Wg7XGVrc6
zbM!l+7lj=jL?v&DQ2|^_Y#t4#%}RT4$s>%LETm}E!C>T7o<QHn^bz;zN3hS*c=Dj!
zSKOX%N4`EhfSeV(#aFuL(yg1Fu&<mC+An=dZ1TPljkPgC9V>>Smh~Pa=no=GPs`Iq
zTQ#Ya<ad%A^As7?_~0CcXXxSAHuBw=!+(@C@RNW(Y~5LduGbz#TD~%ru`xqvN;ye-
znuD%~oa94f@1c;oiS$ahF;-rbL>6_PAjZ`dtZP?k(bM$PeC(-b*k67wzBJ?%nSCrB
z|ERo0hJAIU-ik?BStb$totutM9hasXb>p$Y;oU^aD2>JixzIx+4N$64523dT2rnRJ
zbEn#4+bA>fS|QA~wGQJu;u&0jwg*+KwIGw9n~)tBL$!|iQGfeHoD=MX50_fvaC{oK
zk_i6e-f#Sm2OL=u|DHa0YlOzVJc4VqlX0_QIa%Y@N=ij8{QcHkx+?D%S<0G|vqtxk
zU$X=L*ceAfY~4-sEk!tBNf()^<3URbyGZTQdV2oOL-aEBD&AM1#5Zr7L(@~saH)3?
z9;Tp9Ycp%bV%;6&jP6V_sHzg(bsNdKJ#InlvY*I3E&;P+h<N+?qh#o;A@qZ_Ie)>h
z3hf8;aP*{m<i4E^iaTzE9mQ9RAO8A|y*_H;{8lk?&w7Wh?DD2j=}T!rT^A{c9VuR$
z0a4lH33SVlS9r+w8OS`ypNM)4aQnw=q-{+rX>E$aH<t~?>lI%iTaOvU&&CtEUKmEL
zBn>d#ep|fPD-1n27eJP!4kbq}T_NYb*kWn7bEM(LL6Y)h9^TV<gd3F^PrqEAMB9e{
z=GBX)l7{3Q+_F<0-+H-9d_f$HeqL@t@xOCX_wpR1a5+>wW5{qE?%#)Jzl=f#7cFVp
zz-OM(h^HH^2ICsXP(1FVBEDXH0Ue5RqV+v1$YgI78kJAUsdXH-TdzSA%mPVxkP@=Y
z9f}`DeJjq)UO>*ie?wyBo{?c%E6C{J092u_BA$|eulVEZS>kKm>(CLCTB<(dEgA7$
zgzqT%;T412(G*`Rgo9#;?9Tu!JN_MF1Io~syXrV^moA>oYt#0`2K3pWP8?^JiK^ul
z=?l7)F3miOe2FVcez%%SD>X;CizQIF^;<G<M*(-@)o97LC)m|L53SORp!XdHp}XG0
zpizJ%<|k>R*sm7&!{A(UI(`|w(~*V-*H)4OsZU7ZP8EuAoPfP_TgXUrDLOYZ5#@&r
z7f)WblHB?_pT_#T(LD89<OQ-wz?dZTtadq7z8^v_1f}y1nZ4o~<)z{UYgKTeNdleh
z`H*NS?8mM_EHysbPEt2J6HSZV#A;+3y$Y(y!<1~4JNqs2F^WYl@FhuJ8(XX;ZGs*8
z!-%e=3yMJ9#7Cou6r>KNk<-n^=eJEmFBbRn@m-0e%YOuoj?t(5k5XbRafY5ayN-q^
z-zAy}W3eJxi)-I{(v#OxsNfmH_ZxKZ*EA=jSay@fsV+s$aZhlX&3At5x{EZ>L!HE!
ziN$W!+r-*O58>w$_pq9UCS5x)1ZzC0Ls8RykZ-b4RCQY%j)(c=VC6+pxjzn}Of0_a
zI~NbXKbd~D9E7656+Tn9oJ$@K>G~F)A4g<J!P<M|cZoCia_tEG>$^VkIuMG=D(29^
z2=ILG2z<nHGp#BPqn}2a)3!NYc<)PVGUA9infmS&9vBPhdmm@iGb>HJrx0MXN9AJW
zr>$a}G*9%vZwEa%A&(T@`GUGGKEl_|`cU8O^Rz2=5B^!LPRq{>rE^LKlN$+%;-Z)H
zai{pJIBKtg>4^;+$o6{&P~F}E@sq$Q6gH(2=$<BCo8XLPmHUvb!bBP!62)H*e~IiI
z?Zh5eL$F0=I6W}9gLuFqTDIr4*wQ4GaLcl>i)1?1-;L0*J-fwsAB-pIu7UW}%dcq1
z>+Irb9!<z}!X(miJVKoN!JU47Z$sObIiuvl<v7J=6rbQLL-HoZ;#bS|&=Fbp$*_W0
zVjUMutAbBqw|hZ&(moN{pesSIuFA!k?$b*`A}u(%KW)WN+QYE_Wm768Un=%=uRzCV
z@1TAg*5Y6L$I-lL^=$QBSGwciClcJg7X31~&6jQ}z(+Q$LpJvkY2wclXi&U4GEZNG
z18!Vk>wL1&+3R7*)s{#5Eo8B|nIU@it%xkSd4sxZUE-7c?jrG=QlzF9Mcx+N#h)Za
zbkf>(GS4%MM7xh-$4ZQ&dK0YhRT#@(FJ47W)>z?GI~{ub&JD6L-;&z@TtbH}_rbrr
zGqLT$&Be~)7kHi%-lM6bQ6X<!EG=4!)n{6h?@eWNN{<fm{0y;_xrZ2gm5R0YrlO{|
zvyhU88<9BqfsEK;jtUkHK_=mY@iq%7agRl-cr;##y_;oFO4}ML*4LnOUxwoR<^<}g
z>x(Rg7N9lrCQ#dgXsXcvR=g*8II{GYNAtJ0p*Q&xaMW2<I{4{sdR%%Bq2UrVe&JGd
zBQRKe`^9Gb#?696tGq#C|53$wmOkFqIUg;*?~fbvOv&gr6S~KK2UT&4<n6LDh`EC`
z5lQBwwoU73)c!Ccs`Eo-+C=<9lu2XXgd?|{Vq!LI4lN<>DBaYB3$He$8}fDV_xLd=
zt7$Pl-ns(2_YWr;Aw{U)q6>vfJ;B>|#1g$LOL3`(IsJL<B{?}WADK%vpn2ZXxXCY<
zTjhF(RP0M8rw>bF#kWbt-w#RSh9x_2&L=T?c5xvNSDb{sb!O2mw+7)iibut>gY#*Z
z=XDw<cMuzkhoa%taU?|f6|Xqx2PySs=v%Law9ouLdQo<Uq`V2jk^4OHltnUFXU1=K
zp3V=l-@i^AAYq0q6LQJ<bY*hp(RAL>#gJ<3e2=VJs>z?rkS@qxN;Q+zc@m7!w$*p&
z`jOwo(Kl1^_1uLh`$G#^u`dXlOf$rf&IIG$A#3?AJ2yILsvPd?*TN_Cq_I=51%9gY
z2x}V|;fset$YpgaevMxlO6&J5o}nCyr<~G8Ay5~mAJL~ar`8eafiI*~R?Hi}T#3f4
z+)1`|r;(TUN{FvtGA>xZpBjp8^Hq~W`10F(NSVcZ61cYzUHS0=UA2wEryLGZ<slBp
z&@&p{uYJTz+u4zIL%m5__Y^wNrbutkN<>}5-1&RS3y`bzHkvMNNN3upVX?w6{O#C6
ztkkPcmz*C(UszV)2>+R4F4G>LxgAN{q?V!mjS*z$vlL`>Nssn^)x#rS-{7Ts&y(Em
zlabBk265-<V6v!EmpUHF$NPMZF}^+>s}zLuI_=NJhbE}wHB}1qS?Yjrp6x=BRzc*f
z{Q#2Gf>dnMK-P68pp=gBxFc{2a&@&rzEy!V|I!6)_<X#$D9ssnFFrzZ-(5l$pSR!z
ztsBr$CuhF<H=yI9KXXGhl<D$NeX82;fhq73m;Nc__XQ0S-?snEFWl!ydu(&jTv<Kp
zK4t>`1V54AlZiMtq6p2KqDC9B8<uV95s&-)lU$mWN^_KAP}st+T-Uq9w6X0Bo>$#K
zT87L+D`ey7#Mp5-AoL8~?zRHw!Qr&HZUr@a7>KtXIz$dWIDzQSi)3}Uw5jHW^CV$t
z078Am;<m74G?$-Ajp~|dwKk%z7c|kW;fwGveMfOd=19Dy;V7rD%bFV1d_|VWb8*<S
zeZ--A9p1L;F=uLDfu6>l#7n2F##p5j?TIg;{oOX=j!{GL{`+afwb~CY*{O-`7EYo1
z*B0QgO`mwE{{sCfK2GMeKc-TPu8K>J972N~e<A<6cJi&D1FyMqAAiyNEWW3<i+*u4
z!@Khu#6hYlBqz?5rnnl@-Bl`7yFC*%o+`x>HW**icA=qXR`O2K&&Az*3Lh%90r?dD
zWJi3;B5n#X=%(u+dS=xz5^}->?{U3UeCFdLqJH!h|MoxtwJCi~%<U97Ig4S$cWxS*
zHQSPc2ZxC1tJ&n(Efq4*P=J>1mL$HTW4XZaND@;o!t#!qxc>MNGTBCgzpnCwoRANo
z8JE*g_ys52k_vIzyLMh;$XgVC!-wt(9*n*h7m-U<ktpcZP5NU%y@YwPj9=$Gf@bOY
zp^@3U(Wr=V{Em(U)?+SDZdWI97pG1oM{R~-d(MXFRNf<rbCj@U>oqhR7oeA-1Nc!<
z1A6#&6A5$Ljg)29;B2R0Tvk68zl*X(juF$*<-?M6Le(TZ60>C2DK+$cSp?otVT*%J
zW03X(1$6sQATJ}o9!+uUL&?{&(3Q{2sND%EK3BP)Xl+@7ts3%Kv7RlSns}O=-zZVs
z*l|`|6!(P4cx<6V?l|L?gM*N3*=>B}vM@Y*ZXD7&=)_IFRgZjybK_vAO7d;|OL6-7
zB+8|Sp~8Yv@ywVZctLJ4c^_CnY(@>D``7DWG${lb6*eQ-_)=WjpoBBbyUAM@o<3>_
zpdrQ)=%vm=lo<RIrG)9@p;8;kR@*=1#Fs+x$;89-%=HiAL3?+LXMOlnq?_~x_L@B<
zJr9Fv@Qde#hkpDPdtM@_yze8*-mi}HA0(29yn9GT@&JCf%7;8#S4h--73j_@8_7u3
zg=o`28ecmz0n0kIBGcmQWWi9T<jBDhII-*#u?x_|k;cB{K#wOM6XAk)Y;&O*o5Sg%
z@kLbaM-1Mb6;FROSzyjI8x{ZbqY8Fu;@6*J=r@xUbXw?W{@eRra_C1I$|{Y;3P*0y
zH7oki_lPK*p%8`wR<0q7ZwKM&FZ9SSSvg!YaUQZe8i%e=(4jVI@_6>?{g{mQN2h<a
zk}q%fajkNPP>IABBwcKV*PsRXkogjtx2c$xK5`{aL4XGAol2j+KZNH5Ov3KxcT#$4
zquBA)b@D-VES~yVnp#-!baf*n6V|Mxg&O0qYj+xL+R{KYT_W)>Q<nb`GmJ(}5)r*z
z3(l<6fFzwbf}~nqX=g0uueZKNZ5}yfVn-$JbhAdv(ov{a?Kvqv*2O#iGNg^^zr`zt
zq|oFe$;fTvZ{Du#I&%LqlbbfWgM2!Bg`BSVDBd#500oA7(GO4oN1WEd>Id%?_RhM5
zcTHHvPg@<!msdPS%Qi+5g%g7)a2XJ*1()#qzn(;Q?v24+_3FsxqcWYn)r80j3O*^D
z8Cc`NaQgC|JrxB`r2TPs#cJ;9$n}XXJ)}Mk4Jy1z?3GHyuuO*1y36#+-4kRLolVr1
zY~t77-h)5(*WtF8DtP>trO4eQn%t3_OgbmzA-Sk56n9Zz0U2bYlezP8i;EFW9P<i|
zSDA>dxnw+Yr4ju$S{i?BAC8&GLp)cz05vH1kX^eYh<Ee_6!*yySqsm~>%Y&YlMb2U
zsF$8tKOvQ0E()h_3(nI<%Mg6&*+g<<=K@suErMFk^hd+*=ka`*0e(_%hZhgF=5g%_
z{KIK1uJKYqZ3Z{_>LV?reA6PdOCu4Fl&(Psfxmc4#|S#2+lij9IYnpm+n}A*0d!Zq
zEce1r6~CQ+8vT0kmdyCvMzWYZyyE3qvg@`y3e>YFK2!;9PP@#f2e%`|iNi=+h$pfO
zA50RZ9*TpHdeY3bOYkf6J>pxnEoj%x5hT`J5@+hF)9v4PV9B?pbjSR0WZn2`l$<4j
zzN$|or={c2n;E}I#@4%JW&KOCwtX+coE|Rx=!Y*jhG5UR=}21O7f(_*Btf$zXinvP
z5<t#S)BG^fx#9;}XpGQ}`7%gePK?*axS~q;M`%%tHm%Xr!q#<vP-C+R-nj7&8X9kp
zi>*a;%~oBE2bZJpvsbW=iUKLGiz9_m?c&P(Eb1uvvAE~FAsSreNV8sa5{v3#c#Vr3
zz8~yD4GK5o!|gA4)89vE_st_Xhx4S{;Cw8-+5{CuDBzk`h}LdhiMf);sCjBMno~Fh
z8!?#Jew4-eX2s;%(pcL2_z?e4#~nMgXp=KC2e3*&35u?*p|<nii>q!#lU);Qh<J<{
zeS7#g_6RM&0TvT!Q~WqG=v4wv(>0*S#x?WHB&$fpq#U#nEh5rZTf~jgjwpIWEKYot
zQE>jeK5=nlky(WSRxvw>Nr@(X*xG=4)2Z0|?J0iX-EnHuwi(-c@;GefH&SJhNOZ<a
zi8GQ1$lDLYsOg1X@s>}LI5lS+KF~Hm;-1%`npJ1e^DSv8V)k3onPh+!BzK4xp5H_J
z-aI2y1TWi};U<*Cig4-rU3kVxO`NPONj=sK;}6~Op%#k!#YSRz+$3|ONa=1na<I=u
zu2%Y3YmGZOQ~H_TSQmwCBzE$K?vt@-mjiY@q>M8rjKo%JA^K#dKs?|7L6R+R#2K^B
zh#x+7z~`6gVwXRjsBxh@PP?i~)a~<#T6CQFzNtI5d1Xbp1M1}b`d~bJ^fk`hIT^kA
zkV@)mGSQ+>tI4U^&B)5qoo+rAPSlh(;;Umep{4B}WbTT~<jA;Je3zmg*{7IH6v-4S
zx2>G)xpj+7J}l-tdat29qHKKN{$L`rVKZH`b1+|HKv2W6>14ZXq`0%{rg;6s1T?9P
zArtPd#?!Bqk`Z$*aZ(Cv@YTZ8)OGDAQkr2ZZYa4!`hP}~15X*`<DrP}EnGx&Yl3ma
z&sV7D`ZjuX!w#yEeF=^88j1Wq&L>5iuc4@jC~`|fhku_MNiw^_h>pTFR3SVkJe`EF
zu~9ENxw%KI*4;_^iUQE0bC39s)pM}s25EdR|2+zN8%QsHn8N??3__c|4T+(}B&ufM
zOO}q_N(Qk*u}Az@exT6<^A#`9O_=~Z>PJ4(yYqvb?F}c?d@cQ&Iho6n4W_rgY%3lx
zFeaVxwfrqTA%~Ro<Q*Sd(cg&^v8rSVx_Wvjw#e5a1CP$}@R>Tg6znCos5yh|CBt#n
zfEQjmF_Er+g2)b02<_bIfO5zRH0PqAp*0M|1LrVyd)I~<A3%~Bejkm9)TLh{-lIR$
zigC?cA1ZrcgK#!hpcg+;)Sa}B)i}7H9RI8+zBDX^@{4ZqtNbPKZcDMaCiMgkUoade
zGbzH~IwID#u`JG8l0@Q;s*<c=j^=HZqU*j%(S+Pr#lF*IaYLRx_w&h9barn6+A_6}
z_=YRu`jY3UAyh=gCH7=w$OAIG%!5X+m!bJR3(zt5Xl%ggpwAEY5Q(J(t<da1?I(Nq
z3&CdS=j}+88oU|lKFUUOd~(sP@g3;yv=|)p&=GHV)_@W|CnEoKu{2a)34Ph}mMoj8
zOshu!K{Jj$6Q9>h;a_J+5UU_#yzf3lPwreI2Cq*b?Wm2UD&2%`dk}&mbp6pmtJwlN
zJB}=WqfNGzc#wrnr3ln^h{x^LKpL^;xadeMHm`U^+O@9nRceRHg{|#q@TAeScFqT~
zN=c8_??^z8);~pG`$u8ZWy-XaPND0ScA<tNuJkCqh4z_fVrkDzl96de?)EFtgWCG^
zc*S*MyEvR)?M@}fu6LsRdGp16i3i0C=BlGHS^D%0a~7#q8)4g5^Keyf0f`MAEH+3|
z#Qu#=r00?;{wN-Yx3B(2qTh@okrzuyUuh0%Sm%!o2cIOCZZ}b!<^+85=?*d`^g9`?
zFqrOe=qAg~Ks4>!2D-T+kouQT!(J~t(3W^%4OaIHzt|j6sq|HN&G%C5w{SRZY>q^u
zJ1>&RZg=8*I-97MmLtoGTJmah7U$L`$#qJt5=%5>k**eh^0r5fx(v_2NXRWxCM#gf
z8Bh@7g~FKt>K)&V%y*Q~iuKP(RZ|_YdFzf7_C@ftZw9}!Jduolph@=n&A}=W<7mH;
z32{8N91U7j$Za_>mP#0f(2FI)XWY_&T4~4NC#r|}olugV)U8Gb4i=G-y<=!c#uh3!
z{tD{1vLllgrjY!iJrqk=V@u_ExSnIk`mIY)<>6lR%}Jhgd8E)M4Gl<!2B78HOZel{
z=HqkE(@AE@YvSbCh1U4)p%W7N$<UzJ#n~56i&>die(BA5l>eSYPHY;Bt2|<fky$Y5
zD&9_1CMEFl{Sx%;oD(D|T!~5_ekYbI4-!8frbPFy)~DnA=VRH*o&1V+74F@TJE-To
z3^#3kE59YFSDd@5AGI3`-#yiDxUJ)B`8#H6I6A8q-F)OtrY{M`-P?YNL#+@pOUxiE
z4UO<lBTB}oC7^AJlKhXU3cQBqU1SAY$RxQGUQ*VI?a6*euGp&-tG?xN(P2njwM>e&
z7px>_=T#L|xo@WJju_v|k-_XtU2JghD7k9iCJvO?Nv=#CM+cO~i$^?Ofd?{fh%XKf
z#m#auID{)lTDA4!i1SWjwKWIOP-$nPlVgMvy$<0A%i7Rpw?p`GqBsBAU^q=3*2SmQ
zh=svCOYEFxjN`<A_-`#a;;%oRiR-1-i0zk}lcAq6UNX*uw6|`<f!Cs_l4T#+uXdCK
z$oXSZAWwI!KZu^$?L+0V&k4wzLnaAlMt}Qj;_{r2bnd8|=ub^O2^|di%6F=Csn=C;
zj8y>&uNY7DjAd|N-ElOp_9N1FdBb-mno|w8x%8IB2J9XqkG}=9q1B$5c)Yh4`Vv)5
zI@fwr-z^bnP=z-ACHYi5)=ieShL_Ma1spE<UM_y6*+<Iy0?1u8`{Li1PEqesE6KdI
ziPUXvDpib(M_%tj30rv|=`NPVA%~*TpUbJ}(0va1?=`|X1B&!%vIeR+RY1HCipa8_
zrFhYBA$M=whsGJc6#E+w61O@YLoXg(5}&E{#68Ffi7H3oAzx(a@A|>~+>Z?;&oG-n
zZU_IXZzpp9W{WG&+2SMLKce-)n^1CtA+CNZgEC@|(qqgYzC6JjhY#)(|NJOJ_*N~v
z=T0Wk*KZ~jezSS=f@IFwVL5M}=}Nc#aHfGGIl4_~Cnsz?gid~!L4A2@IP`HNu^W35
zAO2~K_0PUWBipUWyO~eX%P1ARgnx<RpBAE<rM4K7^kV0Y3$UreOJwo9hCiZom@{|T
zT{Ky8Ex`{psPUpd$b?NqJAds(&qEgCYY#ke|KQ7n|NVt%4ho^7`uSKf{yx&L??LsN
zMtH_hWBg-&L2=GRjz6dpK>RG$iyviRdS}v5O7&H6UY9HWvZP+DJthU4ExJdSx{gMc
zE9%HxkM;OtfDvA}tqD!LphBKTr_;Uv7hUK5m*XG)ebLmCw$ijxB%J4Y9F>vG5+yP!
z8qzQ`G7_a|i%L{fLc0iE=kZQ5()!pV*-}JS$e#E0y&w1ec-+6-kLy3U#(5pb>-BoR
zj$P{TzMO{>E(4imeJq}&T>7LX)t`pAQ0^6VGFM$s(HotZeEbz!UhPU68IPftu{OI|
zAk_+FH1Ye%bg(Rx;cryQQ-{YV+LWun43GYy%W>VX*uxIrYJCFrZ<=)cz+`xOsF<oR
zwy^u%zD(}W60!JpJ+oH}L*GwTf}=?d9Q+WD%dB#s*eHx;--#l1#c|lOClv2*DJS_B
z2Fo;;!`TseX!LF(df4cK<k($u4>`t+ES1oM>%*pd*RnjD&lD592>g!>V*$=hFx*WW
zzbq|=sqq`2;*=e&+t?s_7|vl;ztX8=aw{wjcm-$7j<Wc2j<vqq3pRGTtk>9D%8c!%
zFH>|#Q+&Z^p1ubC+K$jFKXrEAE|t~Wrof7J7kqes6&o{45q>;=%EF(`5`O8-Cax)p
z4bupPKi_~A=%$G#x*JgcUJrfQrj1)Zex%|djhOKln3(@mJfnGql?GOW;W<HAF9}A!
z?-D#yUjps%eb}0jlUcy)b$B*Sg;9|%eg3M+q8lYJVUsFcyK+|SJJo;<I~oUF-G_OV
zvvn{%-bCuhDl*r?Ud*m<D}5dF7shIaur%YTIADk$J7H7|S5Hb6w$*M}uxYaB?>~(B
z7Pa8iof?S#Q$aB^o_t*<;MPnV_^n@0!=u$Ptz8Fg(yN%dfhu(me#nPkR%HKeJx=Q{
z#Zz?H65Pkfg0<67X8b%!aC6#$+thS1q%{v;_I1ViYtt|%*O;`Fdb4V${wQ<b00kKv
zSa72a9=qz%$&@BC$vi39urz>tt2CH<x&+Ov_1LeG2jK1R+w88JF)Ml}{q8HPuwkA`
zY{tXy@Z<OaCO_mBKeuTg?mcgcYY)7HP3gn2Z+R9x@zfATzR|_&MmlVQnL1oOXom;>
zErEyqWO4eWU8tWk6yvWOg1o9I+*h;_Ju(Kd`rKgJHRCW#O*;#3dd9<OtA3)T$53HO
zU=}=b&BegBNLr&jhV?3$gmza}vD1G>u)Y=#U{)U`)V^no+dMcN(Uw{DL9SI&_Ma9L
z_B^B+MeDHiXMb`S*$WF_T%@;e`a!djE{<uAWP#~f@NUK=w!-NRy|I-+#hYm`O{NCD
zUR;3h@1n<Q9xy@Q_rK}=k)ce%TdK8>minb1dgxt(yl5F818)pg;NhZr5X@E!VHv&I
z{7GhT!fyc9-Pd3PTaV$kvMM~7?~5nAbD;BHF5P$O1qBLW689CSX??&M!bClsu0NFd
z);@=?Pqi4gou`g{H^k<zx4_n;h|-s4&=9v0G2)2=hC6zRALoA+YkuvZZG%SQ+sb6P
z`?XZE+^s*Z`_)AMj+#-}gdj?PeqA&iHINCfa!~H_5d2$O&(@863y<2f#r%0bAp3j`
zY}(VnGC~;fzunN2yFdZ6Tv$ed4()Ga@UZM8ggl7F3nx>dtfH7r3Cdxea^)bYBs}<Q
zAwJ8$NXH`Qp+wdm?|+VlirgV$`E_NqZ!lofM<YvV?-GX=K9el}+X3few$hCF0va($
z8+S|fRQs?Dma<q5AJk>jzX*34Sgg<5bDden!aFQZsA1Rcwv(Mt9Bf=R1b6iWQr;(Z
z{2pr}cAd0l3;cSqzvErlF@Fh7J2s0<tj^N;%PwI2xfVSC{HAlZT5u|HA=}wvEN*-j
zL1n%A!{_MtFmpoyn;o#4Rwm6S@3!-zXLc4GE_e?Q=4X?}C3Pxjy-G5Y0zCR75B0s`
zSpNLw?1zI5PI*3$1w{efj&K$it<=T3j!!ftBacdRESd5BEbMdW7`>F$C)I_mutvs)
zxy&AnMiXvg@yu(Wy7r=&^S45L{B{&Od`k^O`t@Mo5@X4{)Xhw7=LgYl<5AL?rh!MJ
z%$ZEwXjUhaC4TcgBK%>EtXS3>V|FbD=_Poaml(r#=Scm@V>7`2_DON`fN}~~YY{Fi
zoC)p&%f;CJnb=rAg>=^rqdlJ*h52`n;UKX9t?d4?Qst)-ovXJnUF`@<PTaso^t=*I
zyj5U+v(sti=kxf`#0~X_NhsCJ3693}X5&7d1G;8Gmb0T!cAzPXj_6B6pS$7mRSUsD
zaWnG?{zKDpyy4kBS+;w35*2#(A(MZ)usk4+&cC@JhzUdKv;I_?I=UCk)$f8IRVK{9
zFCQm0T8Xg*`<T}fU6xR!Lp>e4(97V8SXy)*ybO1-%eRd&a+beTyR^Vn_oW^`cUe&1
zScL;i?C@vRIri<73Lfm71<Th)NPV<Yv9H}-Hth9FQhVpl)P~L=b&WzQ{Irkp8-L@+
zBeLv{0mlkU^s46=ZGof8&8+uSrRwvsi-q&~18JqZ$gCr$VrqFI7>jGM<<n)cV(WTF
z7k^WY?{Ao&6~G4HRHC)k?(9~M7L04Jr7MB?;<9=BNc(*>F5IVp>*jXx`#poi(0hq^
zzxO;m)O!&`rzBzJMOESNs?`|hG#<C_FNEnT60($=&-#8V5Q-Pd<Fq?FS=P}oys&#0
zWou?)jiww68vaqdt$7@FukR)9XJ=T>?Oc{{WiJ_g9fXTjCxJ}Dez;x{1KnCjS=c@=
zX00+5XD^Mz1)1KMy+4G=Z>%J!{x-zzl8{c&FJW`(dw3;hFD6aMrzg*skm;aONc<Wv
z4!gI5B(4+CDM%D&%GhF`@R3yWp%0T?_?<7X^&|E#T72?Lh5b|Lk0xVu@S{cnTv--H
zbs>qO)1fhJkCh@mEVO35&02)(H5rsRioyG3-fY3IZ0NlFK-?PP&YJ)Fz#siOXp)bs
z`kd?tJ%fM3vRD6!cYH@;TSqw5iQY8Hc{2O#-G(o(0)6UT1zzS?#LO#35{X6$8@BQs
z3%xoMZ~n`t`b`JHwQm)E{dpFYUDF`mB>|gE&WiKqG|_>g7`)WCo<h{pDdV~WwrxCt
z6UX@qfw%XI(GF$MSSl~vvJAwpP7T7`k}ABuJ{I@+OkjB#ldyH7Ci8s123D-O$v(C1
z!ej37XrW&U9`83|ElWn-*ZZj2av_`<{8qAJxGts*iWDadUd%$Q6xmFp5m0$n0sFRo
z6fZ@N!{UAtL2;}l)}2eBTTb7=S=JUV_{g(KzcOgvrUw%H{p0bBE)&)F{>L7NDWO}~
zDq-n+3wBkZQ1JTJL50JYlj4M>LeZTiFyrYU>KAK6FT#4Ukj;IB^2T^Ef6_v3XHhn@
zQh!OObiRo0THomR<lm5dFj4$8=p1YasE6tE=8$!p2R9_>B+86C%6Lm>x?EWW=f26a
zg&UP%%(JyrqBEUF?b^u>+}40RX_jMl<ytn#K#g54?}Q2?1Ge+~Wj5qe1oP@BC*h(h
z-t@A<MJ+~nsO~?ye%YOwPb-7=&S%65ir9>RsVqIw4-Kal)5p7S;X?oAsQ=xA7ANO2
zE2)26pks)00z2t|$`N{?JrIlU$|ETC1&7@k^s=Y`eRsSkxh?V3WLPIT)8@y_19qZ@
zODJ7xM<!0Fpw1)K7-F&$H$5CmMpk{;_@!g%pM5ig{p%FqV?Vm=yBOnroZ-*KQFx^5
zA_|3xtah<3Ml8xC%~6E!4+PL@xo`~CdnbOmRVbQCy}gQY+gb6)dNwyD8!D%afll`V
z8a}y{qIT?o#)1#zT4RD)cniE^JZV$QA+~YNE^(Ki5<Xwq#(&8%XZ}M>(DL3cTwAr7
z9u0|O>&E%wVgJXBclL+(Mb}}I$zvGg`<`y8MhWwp)}V8U86^!G%JvR9NAr6R0^b+X
zId94y7%Znn;<f)M@{*6B@~i|tUQA%yv)%X;iYV^RUWp47_Tl$ezN|BAh{*9P#4_Dr
z;nK5XRK-#R$C6rFIj#&-->e4CWCP5;xe!U1z)H2fnBvf%;w95yyi~|T^d)x)>Jtg1
zdqeDRwS;9n3c;>@c~I;kLI21hY;!nADQyP0JwF_8doE%cCT}6IHW|y~r^3+2V`w@`
zo5}7lW}7Un*hKYqda!bn^uJk`t<N+-mjpAs{>qPiwR6XJEt9EXl$E4bE*Y|K*@Ei}
z6n56}Xleahu&7(lUVPAE&4rigTCFv%ANra!%qFnLTSlzPX$RBXYRyWMqL`O{B`rDE
z32UxPA3t}_g2lQ#e$Jdza4KgHbo$8P*<gQq{<WF(ZoOb}jT*STwLcD%n<ac3f^6dM
zJ5cy+9Tv&t)Ao?1%td7gyL?5N1)b_oT|a{9yvb<xFX0B9>94>-2EG%oN_U9cVSC}=
zn^)wS`&l$xdqdn1z61^|41t$6x3Ok)9<=FsFpKy^(8)T*4!zn*ndklR!PnEwUqu-s
zqF+F%*JAvtdXr3k?t__uy|DLqX+>F`7BhRjfmLr>P5)Jt(0}hN(0GM0vv^b^gkSRq
zt(i^Y=o$%Gh3$fQTknZRPKU^^%aqYQAFRr)0FTFup<QwqEOfOo*g1m9Z83ut*K{zo
zrwa_kM{w-oARJJjgR|d63TH2M;iuR#SaQdTPCapD{}v{r*mO|Hjb91xLZVoG)mC;+
zFNleoitzfZd(yhJVmcdnKu~QA0EI3unDzJ{tzMKYs7~>vfZO@7XWc*Xi`fFa_VOgj
zd?;iZ_WM|pTejqOTR5)oS3+M(wJ_$~Z_GI2&#nfpWM?jFfo;kf7I0uHOi3+8rNAtv
z*BOH&7sXKR^7Cx@r8sPAy8+>8(RA8=EQ?>3j6eGChdpa6A#>MsC|Y}*eF~Qa%M1G;
zFt`L+kq<i&_J#}}^g;G@D-&N&MIU81n)0F%QsmaLlzE}7fBrZM^WKUt_sX)a?t?7P
z{x+BtXTaa_sx0~89QLe24tJ&vq3-bxn8((LHDy1*_uj{`%PSwEuFF}5-;&w9#&Im}
z$O%e|{sG&ECF8Qj2-FJcCbyrOEbvhkUYXdJ%IxBq^66nz9ruaqB~O_32p3$o;4*qG
zxX4B~*FZzbYf&cd5v5)!gw}7npv3;QB%$A8`aaPEtc#p5DXmsiuzp3W8=~pb?I3hm
zY%2M^(FOv{mx?z7H=vKI8E!s?pt^T6Gn{gV!mDEO{j;5*X5Ux5t2GByTG!)goe8+W
z;HPAl-Y)WZv{QOT#f*jYn!x9spNbz$>S%H5RXV7xiZ`5QfL!nXO#bB-z!R3>Gryhr
zywSl3twU_<!BwopBmkD3ekRzI?;(qU9jw#-p>V3<fpBQ?GB&;cF}D5w5wz*)jX7!l
z?8>7&Qup4?%A>=@q7TZfWaSOcUFQhnw2JWV?RA)YE|Zo_ct$sUr(?&j&G1^T7~*^O
z;q=#&$voSD`OWV`32mzdAD3<5W2%c2x<}!i4Sli8XcxYYdWxgpl!-xFN7&269{i^L
z1D=LD!=$&7WYKE{23>2WLWi#`LhcPkDj#J_?m4l}sxy#LxllNAtQ2M`TqTDd2PVHT
z63g|kVBXGcRNQ3EE=sf4!yGc8*P2S46`O&U?}yXuw?RUV?<w$#I6<`!bTOc-pC~iQ
z7-a)`u_t{D@j<IO1=PF)y#@Q(lpbY#wP-k&&U*)7a}{R(JVKKjt7&XkfHaS73Oy1x
ziZ_(Shbx2GNohu5QF|5+jn0H1sV98AB#wnX6`*d50R>9bn1PWEdea6m+R+lrN}X`@
zI6drqnk@dl*&yiO8o~ZvN@lGGYeAu14x=@1OLR`ELFxz@*jsfF{kAIMYwc!HWk3Sd
zIPQR@ID*}(`A(bX4WsVVDJ*o^B@jKvfMM)5@t)!bdbO~X#`xS5r+x9G;!7>!oTY7W
zMr$~{946Js%q`%}w0Dx2&bcgR(oTFAJDJW5C=m=zu8B{7UWOUc-~U;Ikt{o*AJ`ur
zfH!|ngv?d9P<z2}^1UF7v+DQKdRaNCmuCuvrd#M$Y#lA!{fI3qnh1#n!>RC91YyNw
zOz3pOVR60CM8N}xH+GA5)0BnWzl$iTpC=4|Fqp~d&KGtl9YSR@f7YR43SZm}DMMiw
zzPhv@AI@3`dxwo>nUR^|qffQs>UCk{HToxHzP4q>1~+NTjTm9&7CW38auUVJIDD~Z
zF%Fy<&E7Q`<6hOv@ZXA!uwzI%Tep80GxkbA_0lYH^wlM>h5Dn<SZ%yCX$%eKo7m1r
z0dyi-+4kszVC>^N8o&K4fnEb&(5fvC(o9^5&~Iuh{pfp9kWZc?Y`Uk5J#`H@{^oOt
z`jf;)*F6*}x8=i?@?|)|@0(zwVF>E5p9;hl7P(fReVo6Jo{U?Hb=7X-%0VjhV%Tdq
z6cj5S%zI69tlzLfN1fmfS)i7i4x45b&W33t{y8{|`F;1IUXAzY@!&}2bH^Ggt=w3p
z`42dM=9zFTYA;J#SdShBR&ZO!Px#}g#Nt~s=$T&<gx|gercob+0+(={wR<S;H*kY4
z&kOLe&wEL(V?2HRcnSj^PiDuj8RJRARd`b=5w|<UO1k$2!$_myxHMaXSw665Uaj6(
zXmy%q#Q8&?^_9ZfhjDPoWFsV;_)GV0av+=2L@sO>AJV;#y3(ez{o;5$pmqShy`3Tk
z>K}&qn^)mz^HJD;PoDyVtth+e89X?4P5f;4R9H1V0ep?6d6z18_PezRnhr%#K`&)Y
ztcw+r{u<NHI!CPcai;Rf9QrcSpN;T|rN{@0jGIvod$rE8t$10w2LQH!I@{XR4}YFK
zi_0P&3A2Wmk!|%drk=io=C1~}$FUcjmY0U>&nhsdk;^51pH!L4&s;G>Uls~B^oT}p
zUjP?2gTAfMrJVy0!38drwQn%P!R~Xg!L<^6f}T*qJS9}{RmM@di|Li5n_|B#qA^pV
zLH@Td^?TkeHlJC?{7UO7=S(B1W-Y^M(s?5FQVgnX$Oq--Raj+bg~e5USk%<>r2j3B
zEN_fto#QS+)|rPOWcaYbE5FgdTXAqhs=XbLs)JsKmDqmED6u+z3%PDf7W53pW0cxO
zN^`Vg3!d1p+PC>^WYIF}-}|_f4JZ;iymzt;4|A%S+)U2RyO_4#MN!?##CB8CD>B~E
z%39{2P+u;vQ%(Wo>g_^d69ZvWwF4gQ$%CPMy*PG#wNQJjn%sBx!^jP?Y{0^2@ZX<K
z9H)2^uihVs+Ryc=eD4%CdG{m)t`|$o`$p4J)2Wn`Ll-IY5}aEtjw#~dbovqEdc~sA
z;aj4&I34bX?SbcCCPBW1t2i+7n`rz(5hqH0-8!$65Oebi==qw`*0dyH#dAHBDUqe9
zmCEeTjIUL;KlLP55z15>7bCoLbi=*I7UJ(C9%w%@0zQQtW^3;2!l>0ppxjw{S@U(B
zh>EMAW8Db0qGkh;b|cNr)}-G-Z<*8hXn6A`9XnlK)1MM$@yX<ToW1`RZBx1~(c1ls
z@{aan)70vi<`C)c_oxijZyU#Kht@&ydNby7U50eSIN{DkWnBF}i^5j!r;$f0iGR^V
zhqt_-BQX=n$|VZF|EQ<>tu1Wkj$g1O^|ELgsK`z%P9aTQiLg~QjXurK5Dv?Zf|>Vs
z3;!KhEXcQ<ku?3Z1SOAo;zB=1W??ZH{Vn&RSJG}c{rw2_8}vw6WvR$CbX>%9L7!l?
zd4K%#P9px=D919*95F8OEF`b8WzRh{A+V%B)9OQP$n3|=xa!*J;I$6o*{Q?W`shDo
zplHGlg~ow#_BQAja*i2EpBD$_Wbi#pufnKF<MCvoI)1)BjJbx$v)L-c#35l_uv1#6
z`*4pA7B4rXw^JXUj1v-JU3m`5ukHl{&L0z2oic*@ydDbDnFxxSHvryOO6T-?+TWmU
zdn(KkEqYgi^sS{?6Xj`st{b~ym&OdjY@lN2E!d>+M4Zz&URd=vhFzWeP)s$7g|$7C
zs3KK|H5cq)DV2TEQY8b9yi~#}lVqs6HW6LaoS462G}*p-2G!QJ>`d}{*7ad8{!jKL
zW`+Wr8M+TPA5zB93)zTiCTO(z2~+I1TTJ<HFU~(#58?Lv*}B>lXdY|Ej{8)w+Cztg
zO$kkKJIMi`JnbTJtTKzA`2<F`{-Op$RV>v14gMb!X;t|Z_<QsUd|aIZTPl?Ccv=Te
zy=%n0|5^xlCuD-E-a;{L!$gUqQn#>nvWD>aXBhwd!)|eT-@bHBWghE_RucO>EQJFP
zUE%kR-!NqQEc&(kIMzOt?o2MJWUE`lj>bs$$OGfqY-?aOjemLXfE#q-hy^R3rG#G(
zq*3%Td6cPmE%wVW5cskW5bRqA$NXoLlH(S!XX;)UqcEM)Ut~$E>}-XRSK?{1$iSta
z0?I7t6e>Nk#CShxrtnJ$c0ctMwEAn|*@_$V+H9u~F~7A+ePch-Ji-9&Qm5jk@Ab^u
z^d4Nl+6j-RA0&@6K$ib1#QUpaX?KSWZB4o-;b;5;L8*!sTX`{y$N?yqJ6k;Z$(F7x
zRUxIeHDKE3D9xF<l;!mM2Y1H(s&Y-KhK}BsDb{o>Ol`Jgd-wOjX#N(RKF|$)#LJ}h
z^RAF2ey5sM!^uXf4|o<u!?siVFfOb&Ua{FNehzv`mlW&8DdvlDke(ivubn~3yqnNn
z;vwa*GH6LR!M&Td?C<3{6t6d$*MAv;;m<RqUQ|1H=C$x~MGHw$syC%t%*W$;?n2#b
zCrQn$eBtN1SaHetUC{1(4~orp(t}fr`0=tP%vA9)EnZWLa%wNo&1WgKI=O@1-MQ@c
z?{{=NEst{lwulZ>-RSTvX~EZKIXZf81o{NENbW{NK;DRJknsJgm|v^UK)5E((5w|r
zS_YEtpCVRX-Ua+(TjCe4WYdHh?CQItWI@v)rRX>qUNyl_Ck^oKtPAku`Y)kZT^7jR
zk@8=v`mAr#0lHjzR4`a~idGFMrT7Wz;w2d`x^`%)u;y4Q*hR==ufdj>xWE>B&5EFp
zw;A+nEr<Aj6VcP|GbpQsV*28HAj$jy538lm#=IE>H%h4H<uYbI8Q^C29@Ndw6YOIS
z(;xqGsF<TG*quob0!p=EpYLfl>G}xr3_U26HB`Wtv4e2S{W;<~sRr}-b`Bj1|0wzf
zU!^}!XJE8{5PsHe5X*H-Bva1i2{vCHadmW<Fmjg_Rwa0{rgH&o)ah4Z@E%c&_!Ep}
zd>$yRP+}#Dh2->T4~>f$Lw6b%!6BD0QW#PORw-7PZ!tvdS|#O2Om{)ywsuMDXKBXn
zmJMVz+cM4gLl_lh4iz!iSfZOV>&Tjp2ODC=l-+Ky@r=FTWgLUc&lQkW+b~R!-NyPI
zbY;oeiNcn`(d<KU8GAr2l>ebG9hdH7JuRo{+{aPaIA}k++N{SuUHSlafm)dNI!1hO
zJym#KkuN@et%}?Gtz(zYd=eIH)u#!bMR2ZKq-i%Vz^AV6(>tdbk>?(J`Wn&z2W*QW
zYEw2#$_pTyv5PTxeI1?2cng)gq->T$Fr4AF(KyBrDr2*;PIDH9Hfb=JTFQ={crUtC
z<TKxaHWcR(%wox#C8-{x5vIQ+8EQskzTXTr|GJ{_ok8MXmrIfzW8|^yMHQPr@G158
z4q@A7l!=X<2diWh_1U+Z4$SC;k2v_coaohX7xGkJNzTsn!2PP<xDnci1+7vwcA@JC
zY0vD#<n>-iJu?N&4Cthl<=?1HcLxg#+s5w99mw{b4@IZ7HteG^vQGIh`ny)7?N|eo
z#oaK(qA&c@R)kDxe$Qd187d_HgAWgSvww#|_+yKVao3SMAk*;yW~VM<_;v_V@K9E?
zeluCOhrmWvO9-;O50B?f#lgq((9TwoT~5v<^9p3WzZjwCp<3a->2d}|Hn6mc!T$4!
z*t<OvB_Dmzu{sXU##U2L%SI;C{gm#`(Sv`|InDB^v<5U_J{3y(V50jSXi<t`QL?)1
zu9g?&d`FUwLE@M-K9E`IDrhu#ix1;e>Bz5}IMaS48C%|=q$hdIe)%ve=-flreo{KR
za3kDzSEYUbc4E59K_N`x0?V3}0F52Sc<hD=n<<H-%X22P0S(pSE7wR;Eximc9W9}4
ze+k`KsV=#5Dxc*K_r%sKzBD59B8@um55$ifB~b?ufBYDUb~%?>uA4Mlm{(6bPZ(kS
zuTj`*Lk=_R$$(Dh)6A+{Q&|2?Mfw>Ov#x!5LUoZXTmR=6GdH~kb9(j)uMb~lhl6vd
zW7BTZ+hfVDeP=Yme-rxsd?9QaJ%x2u4FcDic{rh@9sWE~gSWZOaBhzoJhJ@*<FcNJ
zCn;M{QQb(b?apL(d?7ozLi${r`<I$y6|0Aw<gj#D0X+MDoBo6etWLUvJV<q71EP*l
z(jlHbvoK|M?WSQ%X|oW%Lz^8oixh^xd`u&5o`G}+ANcRgcbb`hf<9;jz}UN)pjI)B
zJqZ0p_6N1t;D43iJp4ZOoY+ZgV(&xb5;aNS(KyyI%L_BBLxCHv2qvL#g+;n+*r=xK
zRAM)qy~?P9bvG~4$Gzj&@>iYWoV}KKub>aS{Wgd$n0@3ugYSXG_yMeB=?MOKfI9nO
zqlU|0DYDETEyDB_d&%h40Bp+n1hFrAXzchq@cxDoxd#Taf!fs+cFUfHt{x!{a`U04
zqVa6aHd(s5Sn9!l&_?r)dB8pMhd5Owyk;WJ7OWb9Q@?wleD45B$LiB!b>Skc2~<Tf
z*&BXtoXQ?5jKwu=k%C#sAY67T9?tE5By1mWj|IQcu;nIBKx+>x_U+9#*mrsuE6gbn
z)T0J4#iJLYYD+70EA65~XCF#t+o<E;pknymS&F@A5A*eZ2jS|KOnR%5t?Ir=CRNKZ
zcyb%qwWwo4jS4HQo<i+f2e7eQiNy@{292m*tZ9NL?pwSQU+#Ixb*C(*7cWcLTLV8F
z+;9<A`=0_!t1+Tyn=MW(I86&nxAQT3B;t0zR9rVbNqp4U3D*)Af{e!iIxhM!2S-;n
zGHM>JZ%|@WPCbUG$```G8z<TFrX|ACryF2?j|^TYe+Qcm_Q#&i3TO}aXL=cZp`y<L
zG`Ok9_Fq{k1e@uILyG0mR5OU2XGnF%@N-~)C=|YS&w(rqp|F|7u&z}Pr>Q>YKkQ);
z_qPT=scE80N?+El&<9^^noox3%J5VAW*pV;CR@~HPdeVe#pio>u#Tsjr8?evwy}B|
zHPsae=f^71wK5WC8Q!G#A2(q^;Aq5*!$OG19{l(4G#!l1q77B+_^=D3*?q-*;^!6C
ztR?Fvj@-k;(S_5=urVCHr(J<-Ss&?J^k<mWS}*=xJ)U*091lyq<4Jid!=KKU620>S
zg{^}I(1WB#2r~9zt`B=)m18)>UY5f~=SWHNqCI$DS|Rvo&n%YxLyPSH3udpPoWX4M
zUMibEjSWcqDp<1r;NY3-V9S&tcgaNlTgx`~<ytSgkfqCN9T~ZkwWuu{N`sf%k-KgM
zesGj0`He4yWbIhyzR!iVNn_9{SsY6~y@NH+4kVL;TsH1+8C@+I&K?}&*pPR2u;SQ2
zR&?zHeb3jWTZ)0uQ|K%VygpmJv1CBC_n^Jh+eCv#*B@k#1AA3l@3R)}-^yag-}hpE
zj}%Dv$_we75{jMXwb&BsPJz$1QMKO!^t-RjR-BV#VJBy?<Qox!RfRfJn~+79d#q60
zCRbfIWe;W^F=UUI#K5P&N-)*lk%jxD!1VUPY~IKu3co!JZ|*qAcBKLxo3aG+>@!iJ
zGJ_O9D#Nu~&*)-s2DypKn5XtyFv718AkD>SzUvl4?0HD4m<qQ;CgQSfcgWrQ46p2M
zgNjEb5MSgAL-jr}o4Wl{zx<crFjp39Hd>&2!h62qLKD{&^Z-(&rCrCB6j_7q9#|#g
z1L4wGzMF8Lu1#HsuXD46iwS>Wqo*3E9^s*M?=6tIv7Ollr?TAkCZ-kdjOD!}*sRjW
z!k*VBSYz&Bu*g2iRI7$!L(NrjNt6%fkKRqPyPd=Xm2ZS;vDQ>;J&CF=n=rljbr3cC
z5b3QM!c^wi;{5O3{F4MjoF$!;Z8n(FI~_~#t$jnorahMCKlO0!$iWnsoi151vw}~&
zUj^TpBkb5D{XQ$)Cgsf0K$n!+V?!@kD`n)B+9Pmus5=|$<c$A3+DNwjBE7t4SY<K4
zoB|e(V5!qoSeb?y7VG^LJ}*cI*Sw4DzQsXSU%Z`Maz6{E29~%YY(B<lw9};xj(B13
zOsw^gXHSO@#pB^gaCT6au&YNGC)+Q;Hxu&U-2JaYm;Wd%z7|P~ZO!S>sou<4;gh)j
zWdJVXtx@OtEy|Dg!@XT=P>XjU+0POR8#5SR6d2N-)&cB5&~+i@zeGvNAYBZ*unC`S
z(5g0&TZ#qWS72fIO57fl0{^)eiUn~6Z0Hgf(%NTERbJ;Qe&c_1w!Q<(O%{p`MxpHU
z_&2b5>|(f+pn+?YwzHY>PhefBFMFvFh?7H#!Q@ChstxpHyQDcz1Giqd{Fp0dZ+R)6
z>sd+>$8W-xW^?fGqe-HBFeFdeC;4Wx3-V1@qhyqnclSF6MrBeCcGY5x$UBZ@V{~ci
zrQyggJc`ZR7KxWv=RxLnIab%zMvvC-hDCFe(PZ9iwncsfb2O9gp)?dH7&btRY9^aD
zNXk>zR0?;l=uqZcW9nAbCFf7k^vu?T4IUUnxuZ&<)i8nbH@&KIQ#b?C?H#ns;~}y;
z8Epn=!;&;d`1}1d{Io4b$E0KI(b8`abqT;}$89dM!;{HB2^O@vu2Nv`G<N>$V5n5#
zz;lNW#+`G95<lsT`gsKe-P$2m9}Pr{*7fZ7=X$)Z)(<tS$3y7mGFUmXRZ?xd3^a`t
zn8Z}Y)@FPJJ6W=dHdy?^&l=je{BRTRn=_9zPg=3Lt6ed4X>STk6WRO$qTm&y!M@z)
zfc0J}N%cWd*X|4)@-G5~%tkou{XuxGRf(f`Px34KL<3(HVEoAO?Ai9G*rg?pZ};cp
zP5pWnoL?t&Wy-Se6Y_AFf*u`f_(DJJhm)cGA6m9`jj&zHjv8i9X9q?sV{ulgRgUr3
z@m{VjJAE@w>^)^E9xFUdvbRqQN!MCLRhJcv8<r@%=#b{0_BD$$pH$#e?>@{)b-$3c
zxgE}z+@(iS?IwNm5oWM>A#3I~FyE7ZA<H}wufOkLat#UOV6>76XHD6XKLM=Pp%0qH
zZNR4Yfq1e1I&f;8fgS#raEQ%0s^uOE3e)s(=a(#YZ(S!o`*)g^PMXGgpDGe8l=UIy
z^?7makj40>y+;zD8O1u5>}7gurF|9K?%;ol2VuAKFuZ<eGA_=F!(gW|%*UonP;*tl
zJq_{{oa6@wn;5t)3Ba0fBo66&9fTk4@OR>1h%2iRcXJzY!v`z;kRiuZR~3QMa7J$7
zA-GzYjXlp6(7>EpvRl4~8J{={3;#QSl`I;MPjMF`Qifwc`7U8w%QJA8e+9?=0Mzu<
zWQ*);;iRW6$tTt_>k|_&Q{g(jo0Ly=uGXToN1Ieru5$NoPi9%BYpLIDLl}PPEY(!6
zXYEtgg8tSWl1rjB>@QRSyBSYm;Pxyw{(CMo*4?4hCyD6$V7bK7e<F-6TM0q=9;~tE
zGG+ID4cA6Lg=r@~k>?RLrd~H(SUElt&b@R;o$><Yr891(c^Q5v^=C_WDdB&IPSJ~5
z*PvsTA6jLKuzJZKc;dVntfV|t^_37d^2Zh`?s-dVf4+sC@-yLS$q*b8Uq#;x!)Sbn
z5t^R5Pui`I#fITy;qBj_uxUiDSm0{G-px8GURtt2I{!P7msN~#_eno&p7d0_Hsc9s
zZt2CI=WVK5_-rs}zEWd>DzR+b)@+GmQeTuEA!W70B3W`+Kc=!ei7}b$q-rsOZoYX4
z4cQyWtkRe{yc|%y;$0z&=)6I9?mQ63oT?H(gD0-*Ud`GL-xlt*MTzMy%dp!jT5#x`
zN{eRP6=%*k1xGI}16iY6&^}2Q-HfDJQ<XF!anX33nehdh$EYJ0UrlC5A|we>pTT8*
z2Ibl33CC4~gr#$n`4kg#wx>##GJ7=G6;LL%&j&$_wq+zcnx3B>2Mf^xua9bGE`NHl
zlnwckiwP|xm>hydfGi<z8kwEQfMJ__>55A$jNaQMw9QGO(T%!n^JNp<Qs^O$O*Mo4
ztDETVnbm@Xb_gJINJ#kQ$&3a{yI5TA)6d6>OsKbDch278FO>viN8NFFKTrmYx@O|>
zUiabK>Pakh&=_`AGYfx(>o9pMBi!I4k9PUFz-wKmkxN9u2|m+uojsuZxEeJY{OM?7
zqtK?ki-kQ=WV3X3uuC=1$!TU8BwFl8=f80*+jTnrSUiQMr`bvz6czCNvt;O<d6wEI
z-69$?Q{tR%D(NiTEgTGRVplJiqWYIiHd3RJ)IP?7v#J%7xUE2^?-D4V?!&Zp7GaXU
z4=#9LCNvgoXYDqTV&3RPQ1G_~`9lvu#k!rzZ(fOg*Vlqy*DPGWP7bqN74i0r3&e+g
zgYD~H(&o5Gdhp(e4ZAeHdfl^`xb37jKK)_F64Z{er|$k_cQ}eZ++iT|cCt7s;=Azi
zv^&ZUdk=>9wOC%GEKOZx4)&@(7~(XURjPDB_VfaZxYEKdIJi*H6a`3f?<?Ba{jPGf
zwM4UXb};Tm44Z%4g1ya=&S-o!a)l4z!jQ4#)x8x@nVGV&Z-2wm^aK2Ksdjd9mNPR;
zx(_x37r~G1OC%nrE-{x8b6NP1?ckh0m}yHHd9zt=%%W)@`>aw9hbpEBvyTs?+|Jc3
zJn5pu<n<1*CT${F8}?yqGa|6xpDGk%MzLkFCop_w0VJFBrRnOv)Hwbg^hiCdQy#}?
zl;bftT$zIZGF+fd$(shv9V6T}+l2jpU7!>x!x;9g4^7&!i#^xf4Y8q$xc!tGlM(Op
z--X$t{i;3ed#WXA8D>Mo&t)_+?I#Tgjb|$D()W1vI+~`wovpn08T?bC>FkNkEa3AV
zh})S!6AxY&1J-m>?bQXeX#F+%)|o@8Hu3Cq*)K8Y(@-`pAQ1)(+sj<%<iN(w;Y_ii
zm1dMxkxi5?E8+gZuxaLE+JIqfdhtq}aoQPzKfk1cKa*JLiWi`9sJE02o<uoM_ltp2
zwlz(=H`Gr#3QOBZp!;_T$*f*ZKeo!@tYx`$%&;8KjGjb&E=?3QRvVzFtqh~LhU8%P
z77PO2NNI$UD08BL;g@;XojjjSiTNm=^w~*wSDu5@XVYPZ-Z^Tzu@D}cKNZ_mN?_Kd
z4J^ks5)WKlLAs_5pm=R7d*yfxjV#LH^}|TIs3THHU^cCk?ie(~2XE-#1#goHRR1R$
zd2K^XD>Y)vx(fu43CrPm;#cyJW<w=aMPkoe>FK<JZ!jk{8NA(cNHXy!jI56YnV24k
znR$Sv4nHgOI-`yKp1Q-4w;*^<xeCUMWhj642(<M&0<X3dlV?*Qy`JGqb8lxr-W4;5
zVD@Z?iauq@KcXhx18mO=UO1{f86TEy!xMR$Y;Rr+lX05L!l%xqmeu=lWmp(n9B5DB
zd6QUz#S)flA!YEwzhifaIeWI=5`V^MVB>`W!qz)Y=x)?R7w08X$Cfa0!x>MyHM)w{
z{;Cz0#_z$(Y<+09d&4r<^~Xjt0rf8(6t`^j!wcIc3Y``EA?;@fE%J9m`RTWW?OICA
zHgp=3Yg1;ao5#UM$`ay~eew6^T+U9v0-m4_^6MF_bZVq+6;B}Rn<7>2oJY4Er!bM4
zsc7hEc)w8r8qRKnbLNUP`^p12m*R}`m5<Phb_s}II6D4UhS`5?CVp}e{ySPuBsY~_
zY>I=I4*s|)LCSC&x(L3ixsqj{ukkWlJs9gKpzPRc@uJ@~dNgh?$ZFh%#~b(3z2j1T
z;8!KJ*OM^!d<7Jw7Q?7hEpV>U2RN@V@NTjL)7Xo`=)i#(ci4-Cw(h`J$G_0?w!1Lv
zT`FAKRxB*D8VJ8ue}}`9>uB`$!;-eViL^GR7Sj7ylkQRzY&G-3ceDnVjqA-$zbv5-
z?tPf=M;Gii8c(|mPKqbn1ZKGYKS;MSqIuURkZYhm>a3d&>1tMN>rMhNc_XOYeJ!{=
zo+9jeX-aw-O6>C7Wth5a8b+RI<m;j@(231sm~Tu!ax)2K7u3V3`Di$H^{b=hzaNYH
z?xmCIXj^c$tHL0aEFu1iCQZI43jL=h(XR9AICIocx@dHc?#`YIo|jul{#tJqu(=P(
zZXv-Y*BTRw72(En1x)ryCr3&YFI-nd)mjVu{im2JzI+j7d*;K7UaR@dapv$|_YlPG
z=9$J}cc!cQ1KzK{&H~T=<ChLRK@|;JEYUk3KFN**xi14@)@TJ7kyJ^nCQ#Dsx{qBP
z8_I0;^T5v829Icmv&&yg7_&*nyQ`z1@w@;n<ykc7sxv<Qv;mL5_(NX)eFeMNBxrut
zO{>TC!GeY9aO~euw0JlOj0RqUeII>s+h{XV_&7$=Da|OFkMxC;4jY)o0xgI-w+V(A
z6w)LqFRQ3M47(N=L&L7#SfHH>>RVL>oZL+g7lwn8{z<U6F=Z@dCN13i8D@C*<(eNZ
zU}ZTL{OqOv?1f=Dbjqbr+;BM>HuJi0-%FW2FgZbw9;N}aekOXIPleO3uJ9xFra*+J
z9&LS<Ed~wg3-zZ>@E$!8KK#BXB$kzukKZbMdE^z9?i!2LtNTz}#1)*^9V>=?uZFg(
zl@RUy3YxTYS#ZyJ;nn1$u;G(*jv8;sx--90;ricV7(W+$ALhW%hr7k9WGNfG_<`8}
z;Yj#>b~ej(Xco6eO7mRCDKNIvkUV6ML8@7*pfueLFY14wm;4C!@99tI^W_UvO{fO{
zI4#`M*e2#jFBT8~S_5_CZ@{1YX_V77NVNC0r?K~<;BHZx=#wo=KMR|o|Kv_^4jPHw
z^2@Pw{Y{cz5rbE&9l)d>@T#;At@>0U)pleFy75cc?uMH*wlfmHloinDr&13?HypLy
zr5XR4Rj^S;SJGiW2?uQKf}5vx@o3o))EPe%*UgC}rAdFmaM2MeTr-|sE_h5`-{Z)j
zy$<RPk3vT23!$^LQqq0>ENnj$37M|BVtSE)OLw@6+fB#8%s!83-?bRFdZh$pPo~g4
zVJSNNr-%Lb?1lm03m24<VY%x|zV62o$=;pMscND&3mo4hc+ZZP=p?A(2fKcBdvAZ+
zyahhAy<`G4IxQ0lhu#t+BrX!&fUi*8l}pAiZKb~a1$gW<0PMg1g86GhNn3sh+dp9-
z{HOT<a<0cnI-Szk$@z_vjto84-$;$ws`SM%i8<u{cMyyEI0uW~Pr}eoL+QP24nFo?
z0pqOX+1O=|$y3?`_$m6E5Ok`AhG;gxe-AZTx?426Gh;Sm4#?*E)x(pbGr~fJLAbBc
zf~}a=AMuSDoquQ#<NmbKa-S6Pw+o_+XLyO-2Vm!)ouh^G?BUAzM<7+1uxa5YOw!{~
z-7|o4JvXo|jsly|d5UE3%Cjf;`@s52e=PZViyNpW&&JJg!GBv<QMjT3Wb4FW=~`=Y
zx;}?hZg>uJ8}|wWPS=ojo;p3tQpd+@O_}k<J20XD1<F!*3gd@A1o!s4l7lNHbZA@-
z-BvQff=?%@L#iogd^#v9?Ykjl9h;BeQsz;8P>P_^@*lgttW$7Y)`SaccSC|ke|Fh;
zjHK^pC1!6{1EFnF{(8(8(CRj$y>jE&i}&lnuHzH9ud}8<l4ER(j4_r3c#6whq`Khr
z2c+*joDPQd76+&-W=$*SvJc(%RG^%Xx@F_3^HVaKFII+}jFC+5z5(^Wk}Q~qr@`@i
zo&0}4|B+5Hu+8TqX@TD+(sPU!Oi!q=!`uGC`Y%ct5W0fiy>6hky1CS3x04!vry}&q
zz+HFxk$-TLaDI+6TUhIZ>D8XF`~7<u7NRIV3suJ-(wgyRZVpwLWKv((;iyx)iMNQA
z6ZBU}`HV};M3+sC{6EJL`0JMr${J6{B}=3%%V2plS#v`irQ%%W+AU?9GJ<i{p0%vi
zHWj)*%dy$%(vHlyDGWO%;mr;kbdv6^&q77`K3}SJT>L^&wKcH7Vkaf{>J&EZNW>BU
z>{we$2;6hhWN!~nrq$9;OOF<5hWyHLcw*QL4SGk(dg^s5{4toF{J0cCf+w+Er%piD
zut-cxYoG)Bnt>b9S5Q5@4_mvM$X%+tCr@f4AD533_u_lh8r%=-BV5G%Ik#w>{8Z>%
zmq3p`r?4?Wi}0Fqf7JOn9v`JofO+Sh(v#R1RN=G|vZZ=U>B_k@;{7bD=%>IeK6k;0
z5A!H7Ngh0R@237$*0{+~l?h$xG-{p}`*(Cd<5!rH&AyQWh6f?5PbY<z)1YYVDrz=x
z;#6;Y2IXT|g0Gajche9K1%HL1(#(GJQ)jky_YhnVbOx92+rf^kNyQUVW<~zi7P2aJ
zU=H?b6uQF_n}$oX5VMJ{*=yj)r0-(Tk}c9K>>8XNq>i*%8A~qDW+TcXnTE1N3?920
zTw10>RpMw?@njFWXD{HKo`>L|j@LA*HXT)}U2vzPL|hc$1HV4G2<7cg^jP^E?fA|y
zIky&A`r<pqtQf&g_Z4AK-W-+}SVNNzMAP7##c*+cU$(d7u<#{y2Tiz81^06PBkRA#
z@S-pSV*jS1#_|A&mF9(1Cp*#dxk)hP>Ql)F)Bd1lt`4he-oxeU=k(86j(X+E;rVt|
zRw`v}=M6WcALHjynQk&PJWH!Or0a>cj|bwc%gyBBWyI3=Wh%-1@4Dar=~?b0ed+%{
zJ<Epw|EA#o(_;L8yMi|!iQ?_N)A_Nx!uT6oj&ZfHiT_?ZmkWrQ!40@B&-X<1=Crpo
zazkGOmpHMD8=zIo*-tpbhjK~W>r5;D;FWAH=gJFCFHw;{?ec~ziK^xF=X~cbEoPi^
z^GeP&{VX^3c{F#EZ{;NIwVbI-7I&(02RB~v4Ck?XE_cE>gj30k=9X;_=l*@IAG@&c
z4lYomh>HpC<gAU>b2=%W+_gdbxSexix$Q^%xSUAh?lhj|jyv4pGRKB;%UtGgV@q?n
zXTB!<jAKLiZ-M)G)sb3!@Lw4|!Caj;51GQ%Lj$)*tDK9i-^KYjhjC*~FK{n6jNzxa
z7w|i0{^lkP_u(%Gq)X4yNl!#h8qR-vYQUR*>EKG<X!G)4<oLd4lenjICh&DJ1NrNZ
zK658$=kr~&e{z;HRQTVEO1LktQaP)q@43m-XL7bKyEq5#1gF}p%}3joa}&mV<YKJj
zI5nEb-}!Ts8#S+%o3&~b-!k_YSJ?27du3I_dGtTTm0jG&Z9nv!i!2|(S6&;&pOe+*
z=U@K8HJwo7O>Sm$$COucsYhmW$I|w5X$H%<5%O_d=K63hRSe`j)JF1di;i<MB9Xr{
z{UFzScPjTQu`j=9T0g#DN+mb_;}h;;>`w0Z@K#Rgi9BEU;sY0@{ex>YdCBFj{KrN1
zs^!MXPUZ3k9_EZs4B$JiR&&*Q2f23VWUgUR5jW=cD(=iuUH<RCt=!MfQGC~^XWYzK
zdEU{*jd#eF;lEBb;?v_ydB-bBTzr%kzjd`K@8o!&v)1^`bvyTPcheT~>$h*@nl5H@
z=4H3I?+tSN(Cg<olZ!o^U79<WnwQSm?|;q33^n9slSS@%NFB$Gs^@;B?&sJS1ODLB
zN1UzcFK+(h=UnurHqLw8IW9QWiI2@Z&N&3?^9H2GZxT;&`uLDrK1GK+KVU6qtX;w-
z*2i$WBKC2;xAo&bE1u?-KhNPfFyuQGVz?cVdpSGI<vQlS;r`8g#mzm{&UNLe@?J};
zxy+Mc+*~y;uFWo%3y#j^Bp;u0D+ep{ih2|HwR(5BCC)kAsLBj3$ia~BH!O;qH|H$p
z=C*;`y8kVA<s|a2TOV;BB8aQ>QsxcyQn>Ix7dY1rjz9fglh<>4&lOpZ<hMCxbF(X=
zxy((f{BFw&+<Xe=@7)>1%a)7Wfm0v2+^l5IH6)fx`tKn3LmN4ZpaY!C_%YmoRcYK9
z>j-W;J>zQm9b8RE26y#U4QG~@$<_C6<|fy7abB8jTu^cWH!``F>wLJEGaG-LduMJg
zJ%5(KPuuOxyF}064;1R~7xSt)$IaTjX1OZ=;-?nBXTS91*|!94-J>U5`I1uU$t!`M
zoBx|T-(t?UZfN3WTWRrbcJuh-2CJnU+;47cvkpJ>b_*B1Wh#Hm@gwK@uZ~+~-kY~=
z<@o-+&G}hAcKqP#5YGQ`6Q^RN&M(%Mex74vIg>w@T=0EWzT>wy*B+zJi?-`HS9Ljl
z$;#K<!-`UFeo8O?Y<nB0e@~9@oHCdn7J8J6f2PhE>zVOgPF<YKg-4vh=8-&a{)tP;
zf6N(;^yW@0n{cHX!}zPdzTD+Vf9~?Cft>f%(_Gp2HZI_PEBEG;5;yNsE7$t3h8uVL
z5obP>xaYl-IRnQlT%GhZrum~}&S;GEw53uQ=RM7V*Q*%GtN0Z2D`tD}qot>DHP8Pi
zCG=<W%Z>eby{8NK$*xu0v7kC`s#`kmhGY0~X&SuF`w_fn&_LdQ@*l23<1N>@$D6Ng
zSLO9z9pMvxSMV}_3%J4mnm9v0b^cnQ$i+Cy@v`-y{7k<KTyUKS=Mk#Ju{2k%rD+|f
z__&UfI7f4Rw0m<$d;Q=l<gRngPWhbdCNs`9qnyh~-oaIg@_d<tK5v;?$36b~mTS$B
zw#NQ8=IS~-IIkZq+^o*od`#OUKA}~U@Bgbef5Xs#-~Dw77fesM5vy-=p$?&3^2Sr#
z4k4eLUGRkK@_fKOaJb9Om3-w&G#2xHbjR^$4}0)lcY}D@UiI9m#L;}1i84R7DT51D
zyU8i{DD%Uo{V$5nI}q!y4dcj8MpnuyqmYVRcafw;8k7o6l~GYC?TticWHzjfjLhs2
zBBRJGQ6ZEN=~q%Jz32VQKRll2`JQu+>-ya1gp>8@?ex4poLolxv1`L6+IH*#)OZsh
z-E|+~mBCmnaUa9$vtbn!g5}+U<nh3kgzj{reEc}YpH!s7N4QAg$pU&`cbulqdymBS
z$7q<4p}((v@OAHWI<asaJsI&NlNM7tu~dLMC1=tr_WO%WA|N6(osLM=<E`ZYf(!QG
z#ESd4At+3}8Z(!adkrGPZ!^u9?}N-Ddy;aNptTK-^u1MtHa@kXQ=e+kEZqQ$J`HML
z%}rNlRpZ5nO(b!#2}jB}$ZG69jwEPc?v?~he#t`d%|x^h$>5+_ELs+R!pvLOXm0!=
ze0eHJN0(j2t}A)$3gue(j%T1l`X{=$>#+Q}DZUpfg73~KB)BZ;VBrI@-6}>m%9>C#
zCPogcf^c)2H;k0nXM1CYgmc+Y{O=;p-dCgl&S#)p$AEGpI&h|U1HBfPr@&z;(iM{?
zm0%vy6;-CE%l4C70S`&8Er7P&9Fj1$#?^v2e6%e?z_N#!Vd{hNmRQ7<U&qp*gJ>U6
z#?NwX%ncAi{N+&8nk1v+Wd!io3_p(fAlBFoeOtHSJ^3N*R0(1u|3js!8y>c-gZSxn
zNH~}d!{>e2HuE-xPtl{B-Wll2;-&RUcTpqj1C`n^MBTd!|H4eP^10#!c9BVYIre&g
zKt}jgoNwWvt!H|$%<4TpY8Ap@Z5mEYs#1(}8uBH*5pI--{C%EyYf%C_B`Lae&Vo!*
zv!L7(j`%`X)P|mc+mTYd)bYh7nces|GzI_F|3H6G00y^2;oMd$cs5+e!o|UurI&!!
z9tz|yl8ZUJvd~|Vi;A}?5E8zFvNv@|*qeYz#ZZzFw4{^0^Qk(8pSC1&QuKaa5^IWp
zP_7~wD#bvhEgDh&^J!ahB`Tf{L3p7Q-B#eHK+_UvUJ<5|clvbSdJ1(2O{3O-wq!Fj
zgQ6E_pe^Gdo!31C)!aciE!877S4SORiZNWh8=o~fXi)SH4x5>yk8dreE|~(38O5lm
zaE84-H|f`9A^BDz_RNh$1a}%NjanhVxe+yc&Ld%(HYM!LfQ^JX{zQDnj4(dR;G9J!
zUdm+RkcfeQ7Ni*BjZ57JAtzanQi}i_Ru-aew<su>HKFXuTO<bm#GmeT+?@Rn%F0Rj
zVDbP@7(@{_^93%wOhlyZUA&Gti*bvu@K_^2Nss)YFZBay$KE4iSpl@}7of@fAm09r
zMv0&&Oiv5JX}K%RUbpH%#SfF0uOste6n?*o!ELchwE1nqt!1UKv-u2l6eDcKT>5po
z1mj1tV8HtfH1Zsw_Ia?eoJ$LpjHtnBDcQ^RVtB?Q2&}({$0sr|%vXXb@7K|<4Hv-K
zQ-DX`KOr*dFr_Tupi90h33F~B(QyW8OS7xJ-z_C^d0FDD8pTU?kJk4QIVw_;Cz1Ot
zaLn(=ZucW}gvG^cFAvd^**BmNK7(fOm`+K5Yp^5g7w)gmMPOPLUVQ$6t#4*iYf2Ro
zxHahLqG2pN)`i!p(<!Vi9oCXND6424$xq*b66rv^y10O@YWRY0jEi2_&Y?8*FSx!$
zklw`xVLp#9EMiNrs^JU{`!<2<t<WOg%GPQcy9Rjyot(_arTx<>s3!?#HZPDHIf^dp
za7=wCORwJQ(zB`pY-Mq-QJ?^)EauUyqsz#0crLBqC{EeQKD5dAB940Y;<l0;t2LJ)
zFW#8`B$-fm_bX<TJjLLcB&A;I!-n@E)XUKb&O>pSd4Qi1Z%v`>z2=mX`ULO(%SUXl
z88six$6A$A&>lW|J<3DQ?0sw>?nE-$@%)`XWX^HXOEn*iE$D`}-dc26S7817*AP2z
zNw352VGTR;%BeU5jdBC13R+DsIliOd=?Hj+W3lP;O<0<L1=oW{)OWYxMx`*t&$A|Z
z-h0@T-3J}H4Uqoog*BNa@L7Hsc-DuCn{pU9lMlYFVQ4MuL!G-ch5L3Rttl9NJXP5C
zq8~fDYZ1a_KuTsKST1%2(&6{9VlozMEzM|_Vh1+3-lOEUzo?y|Kpi1D(3$E2h2B-9
zqxA%8$DgC(P8`&HH7RKMN3^Pchwr9HYz$mUQGGTvR?bU_&64!1Zv$Ok$3>4XFQw`0
zr_r?_A*wVrqyRgXOPqDd<)au?KmLftQR3u35r>C;Zn(QA7g6rq<ahTG7T;6{mo|$>
zVy5W)!B59`q(dbm37)rapb$Y2Stvl3{B`J04Tj;u?<h0agTF&DSi(7uqgQz8PUm;r
ze62zwPrsn*j0^3G`-$X#k*o)aM9<@LEZb3xxtmMydrmr}42y6@t)Bkq#X~rcpZE@E
z;J+Wzq@M8>`+i8!@~%{z+}H;*+gtcsSq)o<0R(7v!i43gI~Bv=j9f^jGX+UGvJK~U
zKEeUzTFf3E#H_=C&@R4@lAH$YaQ%$IfN^NJ^HPDEJl+1?i1qhXX=|-EeNE~^hukRA
zPjJx^l`r5KTt(;msvt7RPo5{HlF9Hg;!EHqZx4CO^1q0$>ZjoUW;euI4uNmYb3Bc`
zfTVyLjGIj-4*pK;sEvXAOLa;Qj09KjLoAaohvcJ2SZ>DF=qhn44m$wX->mLEG>3NV
z<RY*3hqQfP6|IU4C9XhkT4SL@ZKH}byfl_1GgEMM;~PxsfgbGSr)$T&h^zMw{XEo+
zvS26bT%}5`iASjOu_CpzJn-zmLJBImL0y5>P<!PIu^RU5D<2?x<R%H2@Ke}`Dosvy
z!uQ>4xaHhJ`tf$0cMnHP-4k@%E90#Bb+i<GfvN2pQrZ}SCpSf?$;ui7{-QMZMFz5N
zK7yA1JB0InM(YMEa8}NxV%~AIU2DRUMT1z*CqSt|KCoY<K<e_ANU?Q=)xn!+HqL^d
zoj<nAWgzu$H>Q;iqwBLGU7yWIJ@+;cclmL6TG?RTlU(SzC*#R4K3W=?jYjn(WKC!L
zZTJbAuf4&NvY!Z>+KIpU9cVl@gSHu;MoVoox>#@0u*4XWrpxHVv0E_MSA?$?g=keO
zMe@sO)O>Cx$@_()bn+u+y!!#$E8p<UBb4O{ZmR1mhLOqwGLrU3<gYU14ZXwNsdDst
zV>T{5T0ye?;RqJxCGEp6kvM)C{&KI`S>WynRAY6P?gVse#i?(f4E>qZq{@K`gzam?
z-$n&8jdh@<Z1y%p`Wv{c%h2?}j=m)Q!*N@23Xrs?{HAP7<JTuUt;g6kSOk&e9Qd5#
zqw7-zNXb74tGhz5TE7C)&r=}tLWayE(&46HPV=(UaG7fW5(Aa+UKxeYgCcZ9PmZci
z+Cr$L6xDvqNkUnjBsJ8j|8YOdy#nMQxR^{XKLe+cAZ=M^NjHkJaqY@-DxG+Y6-H|0
z|KcmEB(9V5v}}l5ET>@KEX=yOfWAe@67K{*wI}n@Va~glmcU8#jTh3%rX3XWH5Xmm
zhS2a^j23o^lh4LR?9OsRqR=%M+)c-9#ay($x{3Q5D%d?W4_Tb|@#%X7yi|hmE_*t4
zIUGS`?gQkHaZ;$u0K84za6wL*ZY<x4{+b9BTTZ8=Pqe6VT#r;)Zy|ltibS_ak@cw<
z^rRKw@XiECJ<nnFA_pm6+=t$C@A2&XL-<s+Bj7hbalNQUqoX2OXQg4@)i(&){TwST
zIOxo_Hbez{!x`}mbmuZ1p}!R>S$cHg3A1CJUgOP+{}68#4wt+VY>P;NSM*H?ChO41
zl>LaZ5ui7f2^i(-z!&al*qthcLE=k14B;hlml<Rq{Tj1)yP#0`48J3yafz*y{nHxI
z_Dz)Ji{0p6Su^$Cx2DC)!t`gI6e&ZCj>T(|w-YBB?wm@u>Ox@p#E{J6<fz%@K1FIw
zqtjjMY2eZj+k5h)V_-#3<oDA~cU@A838Fy<4H{Es`|*=Ly^^|#7k-PW!Pkv8N=&D7
z>g5>gcSoqwD0&q7@g|S$E#apq>)D3b<}f%dzKas~y<{gngH)IHp!>QLD&KQc<<kb(
ze!mN@3z~GaUzAR#dm+l+6dz9WQ|yK?q<^T!<%DoJ%9g?K&|hQ}n&Ib;Em$~ZJ%mJ$
zz;5PVv^Xut*_wCQ`ELnKjKATz(KR^iJwktf-GH?CL1c)r@2=1VZ&Y>2qv{{bIaZ)m
z%@^A)lp&YZ=UMT8k(~Ji=TDfU#y1D`yVOwB-lKCmJpkIJ8xY>S4pB9t@Ock#s;<Ji
z(L9{++=@Z_gUE_F2s^QpP_u}_n%_RylU{(2TfNxpU&9s$BmCga!YNf=+BEkaa_)}c
z>h^8~ZvTo*)?e_CEy6Toc6Pq6A8Z<(amdR7+54`ctJV%(LYv6db2|=52-1{see8Y7
zN!D<|mg%2ZPDqDx-zR8W#Nk)bQ4AeQLK=TMmWF<XVL3ltT6=;<<c9E=BLG`B4<MnZ
z7H4iWAm>{lvRqHXxTY9Q!@k(it4Uv-`A9CI0$ZXs!uU@-Obv?>H8~2W{{-k}2`_z>
zd5CJ?P<%^LA>L$Pgcwh!4g2q6TdFZE$KPS~_t}sZv_fRaO<cJwjBL>ggv{vGv64E4
z&9mIFJ+Tz!r4eWqFoz}AX7=+KPTAi_W@jqgccr0TTm!mn|7FYh;?-?;#85I`ulB*e
zM`1Xhc?dnHMQGo_5}ira1WfI`jMvtxi2US_A+7)j{PDr``Z&Cde-7<hXQsb3;7-pS
zxWB9dpO*(_&prS@!wh)jtcGYy2wtzd3*QVc)a)xqtX?sujWwfp(i|(Nzs9#QIV8w-
zfJ^oY+`?9aOYkz{vku|9s|^xMtHC!b9C}K3uvViQyLa8euIJT|)qan_5Fe~`?Z?+U
zRTxNh$C5RV@ab(Ra)0*XbVofpr?g^YI2TPkxrw$78=-7|0Tvd<<iC3bbXQqIFXbR=
z(<<;Gc{4s&cH-jP>lmrYM0DU8h{<_k-mV8I%D9IEy1x+WVuaaC0+1Z8ib-3h8$A4k
z$Hv^0u!ffm7d%Fn^f)$4=dt*njjH@!@IO$&(T)fBVx9rbt&xz*_QAW{3S_Q!K~uqB
zXgU6X(g&su$o&Uc7GcZm9vnB|C;5gjOt9R%bTS>=1Ri3vZ5UdU4cOUy3jFzj(0aZO
z<G!2m(a0Sa=>S{?y>N4<I$QgW(0#fe-kI?*zOoA+H5!nWqmJRU02oKb;KBz3wAo~$
zA$~39%s&Y3tz|emQU_PwcQChW#Yv}1wEERxZ1WG?iyhXvaL*67?U}Y*=mrOe2Y4o}
z36pwXnDx!1vr9jq(xe?y8sgNflL~2$a%eUlqRY{hSh4LV;ttfJY(6Kcl}nJ3&={Wg
z{>9^|UvPh<5SM#oF)A^G4!B6*uht1f2u~n5ARIg;e6)I6Dx?DMLVU|=IC{#___}ml
zo19LA8*S0AxgLjlV-YFj3DMPxZ0|h6-NG<jT$hRSXFV|-lY)l_mr?ED0E)k5v2`y>
zsb=hTf)l{8&Kr*sVsUb=J_?F9qbalsx7Yi?L0^bYeFJ`~zQbn2UKnf}z-sewtnduQ
zqUHziTPjNT3+iCW849(3i4e7|g;CHC7<^wvS2`ylB@{uYUOHjc{Qzp+@CO@@bCU6r
zQS8}Wh_kC=5$g04m(!goM>!1gnL^}KT7Vw&!z5cco!0nCQEm4y)-zp+=T<qyA66qy
zC<!L_cEO;o2VwQ)tOmBitp%@exJ3)z)%v*IlLmqJc39Qog2P6Cb+o4jpyWqB>hJtS
z>?3WK1H(|Bxd`8`KEzAuAQaz7VtM=yavvn2_p>3~3|ArP`B_#|2jg<dSseKli`cv2
z(7kjE7Y{wvY2H|&6Mich%W8Ub@aX`yWGG;z-35I6!S?Ba2iRIJ3xl2u*yy$mS@s&(
z-xdkZLSuaV?-|l8Zo&D6I+k$?!sBf)&i?0$xi6D(?)Ycy9ln4jr7A3Me6Lfqb~*en
zFTmMWKMa=MW&7m>tJ{{rcG(Op9eaz%H{`$pWr~SggSWQ&81_%XEG{3g>(I~@76EnN
zt#Ie_$9lyKs7$$s?fYEuJIn_*i>vW3h?{!#XW&VY8RmVxfu@<Wpl{}Y1o1UEY;zpr
z?U@h|zYnR`wYVD0ViadE9&YtOQc?_L*IviImR1y8^@daaVoVeSqruJ%$8%nzI%@!@
z?rD)z%pXiWu0ww|>f&*NI=(UOd(>$w>$yGPTA+)muz;-dZdl8=U~R-wOgn5rk(rl~
zV3?1Y*X+se=@dFC84FLI92B}ZB31n$X73-yYQ5c<_w6nm2O|-_<SG_A3sLV?KUgOM
zU%lNSY;+9K#~y+P9>90MD?()cvOaPXUD<8|mFurDQ?wQ*=1Nd6XA%y`TtoH58a!@}
zh2uIKgd9%9K^=cswqJz2(QT|~N``^jcl^0shR(^utPej4zef*Ic48xPt{p?e#bXc}
zJ%iF|*WeKl0p_Vd^y?jbdTNIKU2S+0`U-aS_t4Z;iu4U3IIkCpOM9<CG-M`4|9cDh
z!hQ&P-G#@p5XkC{!)$0f8aUp8&oKjT5&AIH3`E6QMQDi!B4)ZD9IsEIXYQfk7Hfq4
zKn7-NmcsvWBSi1HqgJi~sfT=7pH~Uxv1<@oupAM^!<cW<jKNhrRJ}eJ5xKFDIZ^^8
znG|FREFyEc8XU-Q!{amI`05&i#5bM@)GNn&hbIUKc!-Jf&v5s}I-LCc96lxYP%_<|
z9#0!Z-G_Rl+%X2vQGU7;{tKLelPKC2NHV!5^w>g)7Fu>7s<s%OyLqU#>MpDVc`4`2
zD6)ogajPjEk67Okz3MLt|2s?J>klGnydK^hTofrgmDHH_^0sag5A$EbZ{!JNCOYxq
z(G86MZb#zN^;CN)40m_`2iwC=_}M8%jl$P3&SqxXpXH#$TnKi1G~pGah;lV<@_u?1
z8k|1h3=6=gszU7YEr!T5D;zmLj5W9YG3E9hl-h5>-?kVyhJV7B+nluE-#XgdJd^sz
zB&ba`5K}T^QFc*@wwy~sAx|?pSlzriFabS$UvSk}mOSO`Ft4BoP7a+Ax#A7YZf?4l
zSBkx#YH@I&1KB!W2wNWwp;itWa7jS+&weByUQXAW7LnjTf83a~#lS3ML<qbBx9B<?
z;rGM5&_6myf@7dG#~TH4<xr?UfTv%dV*kuyxOr^FLzb&4C>kq^pTKVSKU@~_faZN`
z%uw&ds#h)O=O!Y%DE2=T=<8DfvXK&_lt=f_FR+y?)N8Pg^{rtnXFKn+Bu77Y+WK=Z
zc~0Y@Ytp(@{YaGZKgy7euqYKg+eGX0wCGl~3>}V1!ydg0bm+gi6w9=Yqm~BrAnPFA
zUMk6k(b}QAAO;KF9-?#20Ak<u;8b)wCU)B6PRLx!%XG)S8OF3eIUih(lJqn&73OCW
zpyeq`3k6HjSJHwX`CpkfEKR27HfX$>3zg_u6wZ9x2OpNwe%DQ8zr&uKUH_p)UmeD2
z0(8Y!2N8uaSo}K`t2$+9zJ><V?l#f}BPq(>E=qD@b>JT@#z(#;q%}&>%Wqd{hpq~B
zKm3K1i*D$>E=gJ~&6IzriPGlzQ}SnPHhX13B4Xp{@*2Uzl-2ZnOE*;hL?G5<2aLUA
zNcOS}X)(=tO*7NYEfeTYxi?*Ddq(lHCup&vJ+*Kpu--A098D^qP?U>%mW${y({mK2
zHKKE#1ns`XPghy3054&Bs}_jwhk9{5vKo6N{a|(}4xYIhRM%9G{7NzMSl9sd@rN*~
z>qd3JGpsdPPCcgoVOa<dE&Z^VCWVS2urCf5D<tUVlRmuW<|NNgniQWRNv|)9(BUn9
z7+<v;3W7dZVd4h9TtD1ORAd@kF_KSwfQN}FS;;t~<HRI<Cj9a4k{?X37Q)xN8oF%1
zW^Vb0vYn+6dlUpEEp2M}Q;LqZi^MtQ3aK}MdL`zQiur#Cl}`ls*-_RPYh!$GAx0X!
zz#+AdoXx&szr!+mGxQ7t+;ga@X&83%Q!y#4N`GE$qvv<?5&m@rRp0!9qrv4+zC4ZU
zuVuqNZ5Le>+D2;2i?M7t1rs@8Ffp2ks$Irt*cpfTvCZgTERAyR92mP5A>WXL`tBHD
z>6|8T_gCZk+!XNTbJ8DHe;qKWhw#5!SYsLs<x{sXA^8fgI;K-Oe+Ct4HPOl$4m6Ob
zK$Vat^O)ZdFWN#QOI*Qs{U2nSJkjkhLe@EAbWcs1>>qxHuS7NNTOdjS3fCx*-<4)H
z^N>SJ0X{2#N8`|0JSty9+w4EXihZ`J9CPU?(?wTbGNXSd`N`#T0eF^jP_X|moM`gG
z<Eu?j%j2Pg7yiK8b{4%)mL-e71(2jPTKYwWROfP2QKBx%{?w)CNivkG=S)koB*;DY
zK9nzUP-rzTacvf)%QvQw?~7uX2DCs}bp^aU8lZPL7D3N$!%*oxWb#(w)Sw_a4Wy%f
zs2__zxMGb#5rp42L&Gf}>l_~=@LV&_)fFK1bS!?(PexW}Ao^}~Lu+3ky{RvuhxC<{
zxA_v!Q!h&0FqJW>3UqlOfVSnu<5wW-HFloCoL_;IH5K&A{VGlHC1P0dCfRPeLv3F3
zX=b7dNqMTX-qMs@H7)3YwF$jZV7_hL66#T^fWK!SQck(S>iG~{-2}-0QxX=rI6!Oa
zN$_5o1Adt*e9v@+*Ir`?R)peR$P}9Cwg6A3yu|75-?+_sAa)ZJo>}<d&8!NPUysI~
zd%JN-Z#}l!aMOz;d&z_2In5LCrBCk0)Ndb5FHDDELLqn?S%Z+17huXGM~V%VpouB;
zwppLJ|K6n=3u+PCzl|iyzTi{X9=c>Oi-b-q(MXOFm9()FxYW||<WCHaB!9u9;?1z*
zAB4l3325a_GEYvEUiFROSARNwt^9-oOk?)3c?f}5-YD)bM}CqpRs6nAZOxm|vyAmR
zTi-%8q@Q^)Dzwg0gp`$gAi{L=OIAK8;J1P3LmN6Mr$UA{L9{WYh-{gsIJfE`RcQ;8
z!<Y?)@ODD%TmVekcagvzS1ga8L+`JcQf9<9>UC_wg4}B);~P!I%OXj3w<SfN@TT~n
zr>J$=NLf!7Q;qp*N-9$!pYjN(=DkL%(+T38%le6>8F)W68#kxAVaoAt7z&ty<X>Xn
zPkTI>6%4zzaVU=Cp>Kxaa8;?q+?qNxe9DJ_;}>MB`-4mKK0-NC@G&L`4Nj)`G%FPw
z)B@;=of9?EHF{{anw%%B2|fd;v2Me%&ocBaHy%e?7E*GJAf1qtqH*`5<oTly!uCJ0
z_u2t!|HwyMxQ4Lk`gD5U`3bMOw~{d9OH!H65P2d8b5AQ%V?+Q>drc$RkU>~SF)jZ<
zIF$9w(51SY#s4A*{$~wt8+k04WsAlKkyzsO4BTxUNcXM;&xI1C_=G_H%LQ;02~!iR
zCvVudgL{4^>PnB`o6>fS1;)d=p$w<>2k~X^LCUC+rn;;#IEfv=P6J<Lj4M)c(JibD
z&qexACsyN%klQ{xvW!W@`ZbO4k<SL-J0TL?n1uIsD`@4YDtV@fQ9_y^Ss7HrVcu4>
zJoyN@W=>K)B}flgY?<vRNIFwaqQQO&T?yF*%gJUKw`w6K%@5^k4<i4-RPq^mh$W9J
z(YvJ%zxdz4Vz3^HOH;5(P>cc?Taa!4jcG4T%dUvR;kdu>;gqLPAt@T=F{8Xoiqs#m
znZi|N=~4R@k`154z+yR4ndONmYlUf(#&>uOs*;2BJlelVh+K*+snJM<bfw&gCr5%@
zncs4B@lv{eMT>eA*3z-?8VoM^g0^SdC`=;|XAf^7XSo$*)1XOr?Vh1?`3rPCF@gMG
zHLjKl;rn+ptZs0Ezj_}c7bYQa7bhM2vK79y^XP*27#!3baA2H|?vFpi4DJUw`}rkG
z0%fVm)CfwfmT)>2i%)6lw7f-{Dw+TMqF@#MC}w(hm>hY!F<yu#4EiS55qOmOqZP#n
z`dEUe^|NSURWlN1Xwi%9EXQ4{$M^7!w8fwRvwKUBe}geA&JWPg`VXh2Kf=GV4q=k=
zRCD$q-8(l7-ZhKJq}mT%*MbqWDHl7x?u66xavUFR1)r}l?Kqx?gomxzwQnch$T1%5
zd>u}6Rbb<d6^xB~kC-}E_blwd*}X~F+pkR03$maYK8IrNDH0XAk?tNxI{n_2QqBpJ
z()e#+?=1Rtp$paO<M7}eh1#;UWY#&3ZDZVIxTzK;ET+CG=A`KyywtDxfF7j?lD5PG
zat@eF3_K;SwyRXRdkGniMqwgAhW2drCXb~XNNf8SNQ|U|SDi6|;o(?+?jKH`m!eee
zB-GquwfV9b^xEzr(S}AODqp7Vt`<Z)^3s>wk5J2JdhL4Vk24M2!}bfLH8@H5^cI>K
zE<^r4U8o#5M*G|U1J@}7GEsEFm|XzUkM-cd#ZvH93D8GzLl~^6z^l3QNdLGt&2{p{
zT#J+R{tffqZp@)o54A{~s}E;>O{Em~x41EK4<gn7P%<P(--lLE-JmwTarr^xYgdt&
zO9iRkOTf&8LM$s=NZOmOpkq-b9)4a+qnjcS(z=*#AGRTp$Q6`lGDu4%MQG3cF>Gc!
z$(XMQ%U4TC_%SENtr4bEX`L8XG9cj%^+;1+N}QTX<a>V(@eJQYyOuxn6rLjBgEFoB
z*?`ZXJhVL07Si49`{DSBHY_49lXsYUOOtAuC*SzB6-Ks8NxMRl%6s3#>RA@fM(~k$
z=_^R27Gr<femI^Gq{AF7*ma+u3Jg8Lzv32nen(-ORwc$<PDA?DCR{ri2(_AT_`s=4
zvE4&x)AWFn`4lSMnT53moaD#T2n*$xIB+l%I#E@al>Cl4eY0un?o^yf6sEZ;oOHWo
zJ{3A&r+K#Z_`%(SU3=S6@mYj&FJ{8%RXD1PuD~p20?+l;$Sp&THmWvaa)ku_nf3-!
z4_4DmF%!DQcos7!VKP_YB%?+yI%E44Kk|-K%d?wE=&nNYJViPxnv1Jt!OYjXhm#jA
z(W07z4;Oc1hocXq`;OxJe~GYVe9wR4=}e1u!rdMLYO`ngv5;{I7FGDVJR0X*@8aL_
ze%xnT%LL>0-1b*vT7*0$KI(u<xj8Mq?MNgbMUR$7;_a6w;8z>Ko`Xr~*N#WMcN}(D
z#o$D+3faY-pv<Swu>L?ZIQNZXx8N@vSDr!r3R`KnV=lJuYQ*m;@9{Dw8r!(_DRrF)
z`7V4w0%2E3E-ML!d+jKpnrTJ91z11CL0ekHX+`iLK1iu!muw%LPZVII-ah(P-UGkq
zdq}3xA4?S&8+cWO9&FPgUhf(7Ziwx{rup>9rxD(d`6+)xHl(v(VzO%<4R9LMbcMfg
zZ4bu#`N8;gg5|BWAk^%<fsi+D$ja-*n#N-M=$%DgPLpuvEx^fEZkjoeil?>8v|ptg
zda1*Bv6+|3cp1BK;2{!*MM&hiD7n~0Q2e3kOQi0dCD{*F)Fh}zu7`AKf%`hz`8W&N
zm(8fXPm#j+5V`XkQ?&JVijXv+Q!^gZPM&ErB&b0GS=XuWyb#@wG9|MGn`pPQD0Nv(
zp-bXZNi_R1mgqf#rn5V}v{58a%g?A_wP{b+5zL<+goKwbVA6LPobSAGT9o<Of}dgi
zVI15&p?LfJEv)r&A+sR^{y~kHDV&GQYQ}<yaM1j<MR>1YiyFfbw3LmYLtTtSN4C=T
zzElkAZKm1!QP^lMMqCoHc+H!Sx7P--RQEh2^cd@~cNj+$rATa@5UnlHqQgx4bk=_X
z2Xh&^-17;kZ-?NxTap4~ILK60g5KFm63^C9e4WdDjKogxZ;ipGEz@YZH5ZxxkfgX#
zF`D4#p~{odcpGv9LDM+tjc_pnZ-2pdZ63-uzeoYq?HKs{3web{s6j-UPRx0M(w7TZ
zol}LR9)J38)i|0}R2loGP0zbFP`krf65{fpstLAFO;d6F$S59d&qB@<Rnm22%;4Vz
zwD0{V+<Op1?(xc0+v-4E_HR%luSUD&lu7TKFiCwZ#NfrB*s9Tp;7xN#!1^|o1wDrL
znhum6I7>O3SJLsXd~_hX1^LU8p>p9nS{&K>wr{~Y&PJU3mW=$+O7NAgp&*4^WR`H!
z$~(2-rwW{uHlTaDigfZ@612-XDCzSZ3~W`TcVZ%R@JSN%|JEYytpb&v7=gp<%_MU1
zE9@BSQ18Ele!Z6<-pn{u#!shWxn@+eI(5yc1a<mq&|)8Pa**p_JX#0Be(4j(`6<-B
z@iVr+m`+^9T*S?|(rsCX8F!Efi(w7=Feyy@oxiYJCk{&k8Q;NpwO_SUC}X=WR&4Nv
z|FYdc_j81p#-qiCaVL#I@E8&%_qPQ&p<0H?8v-<Tn~(O4hN8Tp9~IjE2=Tj%aVc&(
zdvX!=e%MXN5`C%t@G7b*(xLU;&8Yj5goL0#IvSD(tqvAPOrPV-A_a0hCqx^?#mQ?t
z1Z!s3Bd1q{E>4x9uR}6aGhB#1S9Q`qxrs!6<zVxQRGfP$ORYsR)K#~Xo>_R2m$ff#
zcqL7L?)`%9$2_clx{OXZzDKG6ADygr0}q!Ly-Me&_iLt*(r$Umm-qmmJ5O=avJ)S{
zn9(n+H-Ec=#9F7&8J0s%Ce~s0xH_G7Wcl&vHrlTdN5|d$X|<~;*&3cFw;v1W)^%q}
zlD|ea+on=-R}R|eET>+sxs>>y4%t?zQD)jYvIrI@mjE$(y7eXLg&(E=R)&+a9XAzD
zHzI>ojBS3uogPQsf%7OgU0dlz=bT;W>y{Zb^sE~n74uORB1rpxpTyBiH8^Kvia)Ux
z*d^2nYsQ=@_MU@P0SEos_Z>~FUJy3^i4x09RQ<osp3hIy61nNv<0}ZSc>(^^NHn}w
zBLP8aYPb4=6^r^IVrYjMLH_t*SBB$ZeL86tXYpf^CITfAp_Rp`Ccn2(Y}^dnX})OM
ze;wOGGvR-pmuC8uVn`<tUs49}jrr?c^Yc-9k@d#RQ&ZZ&K}XJTQLUXHj28>h`}(8U
z<#7*(MjfD6>H@wKk(fDWE6hZF@Y*&GCj!4=ftVi_WqiVg>@yH4@PazqE0(Qa(G~F+
zBLBrgUQ(2fL?ptKGY<nr-Iz?5q2+=mWPIo&2HfO`=i*Gd(;bg%vuf~1To+gBim}4l
z4e^{sSf1sMhV7EXu^<%v1M0LhAsVvcj<~Q=kT&?%BW&dm%IkROKbZkMF>b`d9dl@*
zV;pqS<!O9ek~Rtz;(b~nwzs*#ZgC0<?+#;Gm=SyeZsRrUq5tfAjmP&|p!rdguDyB!
z!Dmu*wI&;?3c-*PWvqaG0t&>vQ8qmVhs+C5>DB=!4mJniVG5%bc2kYcE8Jw>&=##}
zWVU7x<{0auYQ;lTIx4|<wlzHFp2m?3rt!xxzv0RcY_5q#ecK=w)>+`6tPT9dV;~)y
zf&9KYba3h;LBj>Al6k0qbsu})Mq%!paX4icLQ`u9hvtjZ^)WN>DtTeNkgfNeJxB<!
zqsFVNuwi~4)VANn@J&OC`tT5Mha_mR^Ho^n{6eez3&>U$<BalR8b8hA8)IU=b=`&)
z@?j(sgpa0-i~Bx~Qz<_nesdbBnHS=bcK}Z1JONjV7P7ASB5K<mZ2p}I`~HI%ugQdg
zizP^wkL;B9qeRjhGxf_+e>x0XCO^W#wGyY6-$T{KB*ZQ60{1#s9CN!5k-JT}|D}%Q
z=SSGQCkuP(c46wy)i}(z3|2pO;ryb_aN^zrH%}*=T@(zSmHUwQI~?`%o#1rzGxSbH
zBB5p(<onNK@Dn?+c)Km0OFAQ(E~4i3Bb=Bv0&7tPN;e(A7vdqajuw29jmN7O<=By)
zfE7}lbnDYv2$?4!km<7V$@28;ssV;~)#6gub?iSYOnZ;2&>=Pt6L%~FkGMK<C^rSG
zy{b^Qn3KGsec;X$i!cLWs=S&3cf*h1{-ukIq2rKC<fGc^c{slPDpa>Df!={sJYN@s
z?}m|(J}QGawpNc6F(&r%HPlXN!{`nXvae*%S>hsodYQtQ9aDGgE*}Yuyuduh_dNE0
zhwEFWQ15RMx_;CTZ*{IgPCb(9$2f5LWD2Zj9DqkCFJ0CNf^g|uoQb-OP{zsBw`8OE
zPb1_F^I-WR07<iZv2P2Dw-yPguB(LLK5-i4<e{;_)nv7z8IN{GBl;&7efV$>S%0o$
zo&5*2&e#vhqE}d(wg7*h-a}evGFBIMV4^z-i(mDF6uhB#@g07*b5MS1G~Q>h+<%nK
z==}9U^pqD69Iu7ig?v009f3wLV*|RA!DZTjZ8L%~B6$vRgOwQYT8r2~Z~WV92(GPX
zq4kCF5e_4e2x2jT?-`!AIpf{CN6_I|N1`ddC>@lfy<vsuWit*OJ9Z#wxB>iGhNKpJ
zko4lxX^(>xy?WzG?yDnlq+kRJDk4;5kP0>DM9hA@2_7wth`#HC3CmS9)v^ow7B8WJ
zO=&n6T7_4u8!^<$NBY{uOv@CadG{6R)}6~RlAc19`c@cvs!E~3Qz#++7~WiUL7S2<
zCN{2y(8fAMCCoto@v|6E=RmksIL2k2A^MipjypC$?QaM;dJe(;?`<e$pU37^e6%ty
z9{1me;%))c<+f#Gw`wTt&$%M=St5)xdXaL3X=88l;Jx7(ViXQxdrJqx<JZIDq7}*;
zRZ%AH3yG^Y;k-W#ji+s~JlKQ9!egw~YDK`08?cx^AD)}`L&(tvH8K0)Jh1^6%Tpog
z8iv)?&!M}e1d@8v)a<4~5dl6>yty8>iJ2IgtAbb+2aK-BfcKMaIJ?}D^*S}swl{^m
z#31$`a6rK414x^<hIXe$Lr{qE+Xq9?D3}VZuL3Cl5rYEtc-(O9fh@RaEHxcZE5E|v
zsS4iqSfX2b8D)2BqM|4Uslp0a82tgX$`ic?qIBwlI<o7=(f=R@Bd)<PZK#8A$z2@!
zlm(Hpn|SDa9V-jHvC?`RG$=){XO3b@)pRnp`G$Y;xiDE<j!jNkxG^maZ;XtvU{f((
zakE(`HjDCNR~vLIdT=3n1NP_bpzPDvpkI=MZ|j+EylOUWt2{wJ=4jK0+(CR@9gmFr
zPcZ)>;f8t>9ImQR2%F>ch)P7mcXO17>f-v?bo$1awYTMAa2=4rPse@e_;U?6S9U`r
z$C>2?DRRoYj{7k~@S7NeT89GNz8Q-#%|sMT+kq83eQ<o(3*N8Y(6BTO;sW9%Q}q^q
z7qvsq<pj>PSiw;LIa&`GLrKCJKa<u#<WmJCdKrH)+>iE!E7-ivE0j5UV4^r0n@6US
zmjKg5pRYmlybRnjtH4n7X&By|N#A_ANkCefI3orz`&|NzKTM~Eh6hnd4p{qv%@6q>
zz^`oP<<3lkN5xGze*cA~e8c#6{uUI^PNRy1ZqOJXC7ZFjdW7+<LyS=oFT+HZFH(Y@
zVKj08%f$5P)|d);v~$t;cLSQ7KZNww0c3s-fc}?wGP?5*@nV_KJ|BRK20Q5wPdxml
zO3+gC-_VS6rNDcl^hbF)txtJ}m^1H?sZmF^!d5hoF+~ZC1Izc`MVU2^P@N`3gOpF3
zTv@L*&6$kDJn_{1G0vTxPxE~{!SVD54!>zf!{JIu91)=d(o$43o#_w)y41t^3`N&o
z?46=S;ma4&>O)hB+j<6l<7mcqp+5L>tfs~zp|BnMgo0n4X!s#aM;R=}QJ#(c`fH#p
zRSuKA%fNHY9V$;HP`%y(M<d)|x+527mRGX1md2PIS2Rzqf{aQ7rnBDji`-BA*0jKx
zCC#{`Du<8<xzPF%3HymYj5FpV#p*U@_q>DjnGF#1xC{Lq1z3`E7`===%RhY{wr^OB
z_jZArQZ*J-GaoZin~L@_ecbUO0=$zj!|XBIlZGMZ$m)CX(<q2p318Q8I353pi}(0x
zHpej<59om0ygcka%x12bCV1{~3Z}g(#H5%Ya;)F!7;4NV%kMF;*Ak)cJNIMtd`;rX
zYsKfl9>j^t5{_BpOYmzb-~9~k87JU#Pak2r*Pzj7OUs?PD9b~1$pY<7RQA}M#u*FH
zr124AmxX9T&j%idc<IIt305PwGhSAg0v@cSbuaj-XGIT1vYvj=R56M*K26s4nq*Wk
zfYGN*Na$Y|E^OB%(<%-cHfRF}V_VYah>_YgHClfx5T7-oaMbMt^Ge+@{BsZ9Jd8xn
z#rL><xf>SEYP3K@o>mkl<HrRan!eu|LX7?Nbr+_V3HCfA=aIuYVG4=lBbng)$hwq+
z5vGw@NkmesqY%v$lBD<vHJYd0jjvv8-G|>Gr}_ZQ+Pw)chO!`fZV}lo=cacv4%5Kd
zWW@I@B)fuXB$&LCN-dRX%vp<6*vt<JFb#$$8F`m2sK=@s)0c^m;-=TMX3i~|b;gOp
zeIzMHQJ((H7b4UB?EMry!7+6nGTN_=Sz)tC<R>@Pa~!2iK|j1PSV5KEBhX&UOOI~_
zk(AC)6n|evUw)sZ>#NwjL@LYmONwx2v;ikB>QdBBrrG}HBcp-&^sf92PBU&nT{aVQ
zzA@%(Z!KP{&W6H`Xt*6`Gci)RIK1Wp&O9}viFFB>zOn&EFURnyz88&=yC|+sgpB-{
zH##pIfz64yU8O)9O*YcKP$Sy7tP5d9FY!Ch7nxGQ*uv)7R+#c(8PoM^D%4TNYKA=9
z{iIUlL`s7G7#H}9?J6cDBfgTtoxiYIg;tcW=BK8UmvM3GI+i2XLp`_?yPaM@Y~FRo
zLrK#CB_6uEF^k0*Y4Y^+g2p0cDqFAyQa;A`yh|Uy1AoGAtuNAps$kV+h5e`Gs5&wM
zZ=!o3^KTHTbHiZd(1>}DKj6>5AUNtI!|pWm8P{aMUgIa0KJCYs%tT!Mnv7$MrLf?I
z1snvoAoWa(j%e<6cn2?qgPAXM4%gs;6Vq|ds6c#iIQlvs;849Z!Xk?ia6Ag;^J7tS
z>Nfr*`=D)wE;Q035cM;Nat)(M<c#Q&`iQ&O`of2De>%gkcm#{S6(D@z1eSQRnb7dP
zxH6NE9u>@>n~!YBCOL-n24xh=IG6@2QEJZahnR#iEm~C%7xfW5PxgY-g$r2vPn(>)
zYOw34Cp$fF8=Dd8fxwb^6uEaM88*9M;VoX8ns^^Cmdn${n`=@3whtftr0L>zUy}Uy
z43q!WLN_7{pRRMz;wyr*ta23f3#7>8t0eh3A42EQ9QyR30$cOE!O!ZIkr`}FRp!9&
zNg?hx-axI7H_`<XFeJJJ3!K;E;}>7_+4Q18@ii_gJcILIeIPj<T><sbc*ef3UDBkt
zFB3;|O7QXS7jXRUfx!ZKSaw{7LAo?;4(}uBdCFAt&5GU_$We`TI_AIbLVfxuHs&0F
z`B~P#nlHlc&y!GDq(S4)7f_yX8@sedg#2cWqgPgnW-MMs9iH{bwv?hSV_W7EzsB;S
z5gh9+hK3{KFx!hz=9G&~OMUTjQz`;}?O?2`I-KQB;%fV_4yS-V3bQqlrB6t-iGzSx
z8RPo*!g4SPV@HdiKYce`)#Yho^F17^&wxwW6_k++4j$&gS=9^J9KMMB+4~Rwy@D3I
zP9eR9E{L#M(S5wHK&KXx|4|!Qtn0#Wn~QMjE5fU$JaCThq|Yfw@Qkwu)fr<bbgzce
zwi@tujN{X%7VJ~iqHV|D;l%Mw?4Bt`vR}N>>Uo2*Ui#3hmuwzxpBj1T6=C9Q2J_Af
zp)59?^d3o*&-eK_#Af^}as)}?1`o}tOU37cRLXsK59hZl(3{-3RJ48z{fYw}kaD4|
zR?m^w{~1?0T+vha77L!&Q|^E#{l2A4FE?(bpDX6jFYZa4YA_<6Ic$#g{VZA&SBgV9
z8%Z}}KE<8g#Q2#ju+{XYS#9&_ZQ^Ds$PK62Z00#jW+ADqo<=1*USNWw6LtND<mKWD
z3Ehuy8RMrxu4ZgZXS~*EETRh>@vi?K1f#y|96D=?weREb{`?d4Z&-(*btyR3vJ-Kv
zU++j{&yph)H%vPr_2LP%tOJ-{>V;KTR$$FXNxTg>kD(QpFg~0Br7zYrH8dZgEPh0J
z3Xs#6P(<zErF8LZY-!-3n~p1?_lVURroNDwd!Me{o`IGTUu^vz2I<}<v}dFpN3K7`
zyH!OHTd|%-Ue+-#>^S@#*xb~}A-rtQgZc9?oK394`qC7Zhp(VZVGZ(h{qZgP29yhw
zFzm~V;|}3?DRKgQ31OHre+GWAey!-6BSeHR;=X<=V?e@CS9XUnxCxNeS403~6oo^A
zF;TjaoW3g)zuFIUY?dJNV>j`PvFYU=lGIyhM9TKQ`13#m@s6{=@3WL9TzE*oIv!QV
z#qikv6Z7AwQ$*Ykd_Ss4B~CT)C?K+W$^K6HJkAyHQtPSLs4L*2^$PzX&tH?o*-T}n
zz#s4)E5rFi{@4|qgCWOgsEsP)mG&;Iv-*yCES|2a4}twW#-t8L!K~{YX0Uwve*P!8
zJDta<z<fHt{wZdeyFz&%V}g1=A}vCfK3P1Z2Y>xY&*>~ZIv`Im>JlWYS^&JTpnaFq
z*$kBe=`tSm&rIgAUy-A~ryVFn=qlrtCLmFr$<AC@LP=7`7-RDpTdH@{#fnHee`o?d
zt)f&Fv6CFMBQWF5F?tdI0we1ENYAyzeWumSF*u0hmxmzWvm6-R2Ah~kolR2jq4fS1
zi;;RPZyv$hmyMXM$>v9^PT*GPGnn3IHT>Kf{Hwf$4`)~6Qd}PPFONd)(VM7i5~D>;
zEtr>50#4a(9C=@d6%RWwa7&(Xf7S@Bd4QK}JsVkP<9d}pZhy$eJW)Sr9Uny9NG2r0
zA7Bleu}jDv!N+~xP)(Xbr|$65%`0gzS1yEG%3Lz;VLVOA68e_MM^8_1(o&|0X|y|I
zd&5C0+P)mRJI*20lI5(PW)yZN!TG2R9cfCzqLDV}9T6qR(gtMAb0fLy|H04Y8+5B`
z8RN~KmET${NzK8Q@<p^+QlCW1yeXvQBIQK8)6)(e`ex9IQ_rj@sHPi#qGyt*ZxW<E
zPSLXEU(sxLfhwZoP?IY_LRPa$BTAG8uSii|S3Q;*Zl-Hpc4T!%gkJhg;PrNYn(x<u
zQ<)FQqVxmUrOOm9ph*d(&RD(oDjtq$)9?phXx|>hbmlKiZ+;KS^HXUec?+$6VTbA8
zSJQXvXV8sQBz2o1n6LYdxjxKS?VH4&oel8xDuZ8&2yM{&g5;D#<ntq$>gI~kwG?Zn
zDJR3Ca18TTtJ5qt>#=xyA9B~6foEej7DsbZr9~yB=w{$>sxE2WbRu1|z2rMNhcufi
zV0g?ECyfOuOsNM8vxTYsnl>$9dVq^9JMW>^otBLWkm?)<%ChOk(eH1Ox_kx&q`$<w
zQ<sqBYmdewpCNFbkA5_RIP3pm!3s_KtIA24(xsS`W}FP~O8WO(kfg01z<iz<4J{gh
zU`Q*>7HCpnpe`vG-J*^Q4;YiZn8d!;f=eeCmX8ZjvTqx%PkKS>v;o8zkF@gN6{xf;
z($=d12stu`o-~GGcAGnTA1t8bZ%Q$K_!-3P>%gJjj%Ig%95UjkHk(5Hp7s+Q9B+`b
zpp5A+%!{2GkJ{%pIAHYwznI2(ShNwbY^Fnqc{%>gyU;tTNiE4dR3Ioxwyrt2-f4%2
zUqdlwJj$5u$Jp!O4TB|KC_A2w{M(Ep)fS;wRqKeO(~ZJg9z$r!0<vISW7RD;U{5V{
z!ouLgCr<J?dQj>Xr_gc%`jE^^e6!<VCCX0*U&Lsi(@N5dlB0aXUyyGSBt=6BN@VAO
zd~R)ov*b?Fn7tc$H?pDdT9RaJ3h-$n6>Yhjp}o!n<wxH^OT-B=9zlrK*T$69WiaX8
zi=I8_pm^a7OoT2okJE;|mqXaIrU~N}ZCGNH3zu>WL|r-nqf__M!&t*qlb>j|;v&9Q
zNphdcO)X72l)LB|%N_qg=EoQw)jB~!tqb2*pGE7A1~ymE$ruJLifI~0!Js|Onb(b2
zmcx06sxfgl6m?J4=%l(ft(J*IH<v#;syL~67VCjSu94-$TAEtifRyM~b}mB@KAdWU
zmtq`p^q9|U?14ANTXEn6JD-J}H&UZ8g=$8N@JW6iO+5Mu&V)B8m|;e<ZU2HdjgPuq
zvZ1c?4ff8`6tTV#u8h$O_U^*Nh4bi6*l#4i$wo%D7m|Bf4`4b01GziMEL@LW+kJ5U
z<U;x%U+>}9;}`!At2EHiR%mEw4|T5VoXTj>5YkdILXrrTjD+^yLn-a0LHm83lTsQA
zDTyc*$;!%3AM3up_v8LO?#JVP+`r#n@ILQry~cA1>$C#T+<TzbA_pu9+e`^`{s;WE
zioi3TOd#3w9au^`QePK}Q)`rN0D;pf;BLzjN-|84dX$t;#UI*1#f7qdKU_vs`uS*}
zw@I9u(3PV&WsQJGWfFABTLIn|tEm}QcR6P)MqSN*3zl!41Wsp`Q&%p310MY>c4H<;
z&CQ5WORUwY<bPw}@VEqZ;H)fV9qj@7>p!p>GHyy+HWU16z6z9i{J>P}F5sQ;7sPx#
z0!okPg29V!z&Se<)ZReg<jY|oc-s%8s@w&)_r-$u4V!@WQ~~I%t^@{hW#GYFInWP@
z1Paa(;Pawxu<z?laQysdAiI&(0S1nP=!(5mT3icwCM!Y-tmdO8M9e|m%2&Xb#X+>$
zeaRm`mN&_>oYKxI1GKv#)ztO|JZ}C8e!lq!9+kfU87E!>6@E!d%&!XkEGPxX*x4ke
z_$1Z;EQ4BjB#Bb_Dowe%U!hc4Exqi=8DRPL7wEY19+aMAH4I+mU`U3KTFl}K9Y=Dg
zCAHx|;ZPwJb*&W$e;1;*%|=se_Zv}`$tS>q?Snw*{A)1zh1I~k+d{3aG@>3H3#YbN
zB~fxF{8X6&>np-uvjuY>0k<Bj5xg_N?iCkNGUaT~c5M=jHkeV$!mFu`O9sG^j}p}B
zSQ%=$$3M_FB}J7y{0i2*TS|S3_ym4tw}Ec!KJeU!o7$V}07}CYDBYQTl%=>fwQ*hp
zxE}I><#uO*u{crc%hF09W0C^?iK&90R58n?i~@<_Z-FGwE$}{;rk-Xdf}l=WR<r&I
zyd1v<exWMxnENZ(>|YE-Y_0=6sV^W>><Sp+mY~+{?f_fbQ^B0tBjEh)1<0L7RIFVM
z*ePfU)b-;)i!}`l0<(aW7^`WBF{8L`Qo;SEQ^2G)7`&LP1DDfS{2$6uvFevVPV{@$
zk0c+w+L8%^Qv1PewK@>JXA)R?O@dFSTfw}X7eKx$5%9I{1T*WJz=b=j!RLz2toOrW
za3(n!_-<mkfz_;!NL2}VaPI|J)w>zo7n}n6rJ__zOcf|zI|%l8wFB9n1HhH%0+`-A
zpYj6NfY?(bYGQpTHNa7&OqSYF0Wcrz>--2DV;%#uJFH%Y<wl$D?*k^unALfSP@}@H
zz}E?e<>{(YYO!LJmhKHOeQye!6MG3HSihNVe#<ET73{r}>H&s#Se}WPICb82Gu0l>
z)<02o>YkDWHB@*NXqxL$JerTdAXfw^tl@w%WC0pk4A?~G7f`$Z2fUmoN^N~T4B((1
zg}b^y>rg0_%X(-*t%X#~M@gz!wi#sEb^|3=SL$^90QExGlI8k-qg>5=DUTJ&RR7Nq
zs$_=_Ras|4d3F6^IZ$07?94@A)L}&(DHEcebsePk7F`6hi#4e1z6eTHBb|D+Qjl_$
zHKC;cvYfX))|Bw^XW;!bdymw8snf?<4!~&xsz&4oc(mCU^tpEc!^f7uQ8ylZt^WZ8
zZvqhZWF0u!%FAk#=26!<lOR-Z9PEyc1?L;Tf*(;205F^Yv4<6?zIb-m^>mE&Q;Gq%
z?N<S#8!R^|NtCJ?cL#sC&Vj=GE^zW_4G8>|1oo{c0ZW_n!A`T&;Aq8sa4Fal*gg&i
zwYJef+_4Ne9PR@9%v6B-bUxS-eGEt%27qT3ec%P370BMt`T*T!YqRGqu=|%SkgC26
z3IjU<pZqPb_`ms-fBHf2WJ;QP%I?p|xe72=)&w&5?gyuqvYs&~BS5urBzTt*3vlZg
z*ni?M7%*ZzzwT56OTPfXdJuxaP)DG@X&NZB>;>gy6pVk^3oKpA*k?B%45eNGX1t-4
z=C*bqRLW{2<K|ID6I~!pc^Z^_H36HwKY`VwE#PXx8L;#X4|TMems;i715RZ+P{~>i
z;BLun@IRQiVs{v8BX$pTRAd6jWOm<}QODwQy`cZHDfM^35o$zCojSQM13YPml*PIb
zuvNx`#W#L}yes_Rc33c2GAjbg+%E!yy$dKgR+Ici!xbc*k)*!=c@HA5vp8UVHrT*l
z3%X?Mz`tH2@c6b25G~9B19y@@?4}8J4_Xft-w+VPQw!uiJ_dmuyMWlmH7qZ?5XeLy
z0H6QuXR+;PhTD!9fg0=GfUq6}AhDL+r<Vb%;4ttev<jSz*#^F>Dg#%Amjn9-J8(LA
z2`JmY1z6Qb0;|9KK=7Y5u+LhET0w<@RV<b&a9sw-><$9yi8G-8yBv7O;%L{kZNSm{
z7r}_kWgukK3OY{sf{gAdFw4zyJ?=2z;`?#XR>^uLWfTM9zd=BEeIq!aaTRda$x|k4
z*d1^KtBs)kfD0$%fP{cEkVdAU$yR~Y8&9)Xi~^v}I06k3W8i3>0(|M+;022f%s;mt
zlyp`DK4CqO+vp7L-0uQs9yEbkB_Htb%~@da$(`N#<O0c179(f*uZ_1Wz($8a5NVeH
z0ybR+Z_a-;RJpVqXuGn#*i@ZiUw|HvubDJlF%Sq=vpX}cPZ&(C(*YAlOTbL<E*3it
z1l$+T155thpvk5U7{-SJZuXvTn6&{W$`?USU<_D$j@=)awSlWj?chpnKPXAK4!DfM
zfyc)(Ft{)soZHY0<ZtK!;iuMs&*MGV@%R?F!g{FKaSem`l*>S4xDsS&N>is6wgIl~
zFM#(I4^Y^22kbW32iB1lR9kTc_&Z$%?qqIZXRt%8UhNn-Atw)PrPhE{w`kzHX~Z!1
zo*bC(D*-wVMuMJ}^`OPm8T{!>2J-)@fb|28Ads^OoO1C2;VH(TMKlU%mdXR`Z91Sv
z)(~vYT@TWC7lGNLeDK?i)rEG2vorg5a94K$tEtokz_y>&U<&|!od9q`Y%Pd%Kgx0+
zI4qaoE7+sB3#6>?1ZmOhz_SxOLBiBO@J<8+PDu&)de|1kFlRv7R28^pl>$b6E&v?|
zZpzxljxyWV3))Y~Q}@P44I^5EflPKL&_A~gTwJyRJPfb|AjcF$yqf`r@5Vs$GBYrq
z{|uawiUU1mTfu$p_u$JpKk!te0-QW`3j8C<z^Jwu;L^*Wik<HYyRLwF?MtcDM`Bb5
zi(O}GvYvBq53}45)(^<*hT(q}S>QibCx1Aq1;jY>QKy6!Q8NqHf*-4{gO88DvNeUR
z2S1y^qC>IZd-f1WyA%V;zL|iNDeHjL@*I%;_9?rs_yw$39%gxA9!RkH0<Mj*nlb<X
z`F03X8p=ySiRCr0IXDN@yb}X&=e-BsEUuO@;}7;UJ^-sPSAy>N4Inb94@?_B0@rV{
zHJ~;C9DGm<^h8-4gXar)flq_Mrt@IO>k{zsrX8@loej1u88N((@!rs3<7FUy=DXob
zD;MzW>U^;7-7-+weHN@^_an}N@?clmC7{V_j;<@`gX*OfV1?2-fIhAVH*z$9=dDn{
z6>k9)tQ&yk1bgq<Ij*QH6YQ59VSP3)1DU;!fTNTE9zS0LZbf;3LkVVpcZVu)C{O}#
zToOR}&JytACi~r<Pr!F0_W9F^02RabpziBlaLv*csHp~n&unkChNeNV?@3S(C8!^K
z+SIo(Me0P_GvHl54&J8wf$1NStQXub@IG9g<%-UMsZoCRyJkU!iNyd;!w**Lyc%#8
zMp9u8qb&A$9VD?h2-i&(Kbm8C|K<AB_(&Fzp|60a@k5}5uLOkt>j0L2;=!SiLeR9$
zmEsy-3r^%lgT}}=;6<l0n9~XduNSTXPIjk(LNbS~gV$KE$N;uSx(nPyZ9!IZ5P1HV
z)kHqb1O0E~ST0)}(C9A)E2CV&55)+u;41^n9Hv3WkRWwNrW%O$q=BNVnZW61IQZ?r
zdQEozGE_bg&f+&BAR;*w{10E@vg#{veDej+<lF^}9;bs(ZiT?%<$r+ZIO~sO8V+tQ
zJp_8#XT*mq3q-p82D^o{zy<v<;8=AL)EU$PSGy-bFJ~$Edtebr3O>W?=0^<=4aNhL
zqIuw#)hUpBHyjLmv$NaU4~9Nnq2RaOcJTFH0uY&vVl}4MLA0Ya=>2mH<e$0#w(L0$
zOxLjb&4@_Q(Nqlfa&b`~oeDtmNFERlqQR{}eITPW2j+kIYPfWv9N63gz`L*zVDP*Q
zQ1w^9LY{R%aN8}=$Z7;wfE;|$9t7_^ia{=42=FT2$?6Zv!NPj>_ZAL<`i=}RY?%c*
zUYk=T-B&<y91o?!`dX|F$^|FdpMrx^n?Qo~C!pD+06^z<;3_^0ejBDzlOCtR1NS>X
zsbnMg?Ucr1T1DV-N)z~x>nx}`<^`;dhJfNE3Yhkk0KOd<%!I^JZ^tVs<FKPtKU)ir
zFM0&HFLi-3C6?1_uo0}AZ3KmSyTB2yt3b8(AIP)}q3$0{0arQCLEq9I@Hds^f;<=p
z$Jle-aMY&sMl~qqO$|UKNtu!{%K^9Cw5XliEUDi2OQ|l=9x(dLm&Kn2C^5_1VCio;
zYQvf=Af!16(9&R#bn*kpD;)=U8y^Dh;#u(QLK}5QehFo{Re;L(HKPPF#Ho~+Wt8#>
zVJdin^<BEVoN{enL>&@;4t6?qfNN7%!1ft0kW?`OM$}e<y0i1a?#vz_=ivybpO(O+
zVHwzyqyiRSH33IJ8K^T(0tT#Z<m6sfb3AqiJbAwdw6E0y=U%cpEEzkHG^GJLCRp56
zs}Q9B|5*et`Ttl1?>)H3U)R)Qk59UnxY`Ae|Jz!4-aN}mK%VsQ7?)q1X>MN8f!57M
z8XerZ#K=b46#3nEX)aM5V%~o*K>;OVMC&!x{Ak`0mcQsmuePeee05ySR<EuTnfnjX
zQbB&BsS@wz*KaAtw&yZQO7kW5ZW7JHYvzpD<873QNcWOEo^GEWFZ;hc*Z*T&{J%~%
zFZU+>UuPz4mY;^P?&;9bc?-Odat3;L^1=B_G-1$&9(uU;JN^1Z6kT~u5nfv94pS0r
zVbA`{bcyyL{P5>7op^T_G<~lF<@n-ZX>~F!^27}0Brb%F{VH(N>VEitI+?!iq5XbG
z_8<O#ui5{yR6aaX1lt~*rpqpvleH4!NF#F?pF{<8Yxip=DzKfJ6ci$BYd+u=_IYIa
z6+W`co)>$Tw8GtCCvb(VC`tKGi`3}ZLHzFk(Qyzbel@F*%fDr)v3?e}2EN1N*<Nt#
z;U9G0I%N`jw+V(X7e~9;xHcK>;>OkYzF?F3-`KPLB-wP%3vH!>@C_MJden3a-Dw&D
zr-Fqkov0E<eEJJ;vkxa9Th@@pw#Ts59fHqS*rTh5j-t-iaFkI}jbA?sCw|VWI61R<
zxX9x%9nMCA*e;Pr=ViIz@8Xl>Qxi?N5)$w;3nj8{z#i^h+)Oj6{}Vgl!W4{iu!?yg
zd|4Ta<Dd7C>Z=Jb_k|wR5AG(HcD9hKM%nmLWf?&}`<Q8o<5=n6ahO}S2&UKa!zYFr
z@W%dc&^}=U`S358UbNO5{{4QMROC#+tR7w1^05X!f5=ZXoM*96cP}GwIhNK*i-WdC
z7je$k1?b474EPl0P`9JonT}T)$h*se*v*T>CZR<zz2z^oaaJMS2cj7>>954Q#t{zi
zD_}mK`LOJ-3JJHCLBFH^!usj+Xh-`9-L)-&R)wEf32`X4jM|64N9&+4yDiAwXcu|?
zE*GxX@Ip-;VMs5)9#$y0;}0Gfj%&w~fEiob@l68CYm3I~W{T(stG6MG^SLOyv<B+?
zIYGrTUZj@$8W$b%g9?7B(DdM6+Oa$nHyElA3mGY-;c^vQCODAORx5F+R4$(7EWpRd
zl<`fGVi<2z$I-w|%-(C6P>YSe+uV=|Bfe>3!#pMAS=s`3IX}f)w!Ug8yYq}A_;<R=
zsE=aislJ14EhF@0adD<Tl@IQamM2pyV(`z?G$~&xhPmkDILcfRCC;rPS|@{1V!%C2
zKn<PfuYhgs+_B=tJNN^y4fRq$ij>$cAjh6)qi)_3{Q8(K_USM~Q#qsXEQCmZNCUq%
zP{e7{#-zJllBh}#!qb5vB*OS6d>pQfy6+5;Kh3fzY}}Hmy*0#4h=<}-M?F$~EEaDH
zET%(+x?w_S7Bkl;jI7vLNVV_^+U)3Z=o^tq|LXn$6MdFr#4JSoRyx=~vKa1bmW2tc
z@6hW;FdTeXN6YTWhK{1qu&maU91NO(R9zyvmUIdqUhshCv5`VvJn49Yqblte-vxt1
z7LZp_Mc8Wp3HmD=k0{`$$|TNHLZx@wko~4KM$#|@8VBi-XaDrjJ&!GDk5dTA7!)Ax
zmAm1EJtgGY6+tM(I|+OAHOLX|RG6~%Gu@*xfRiNq;1OLrjQZUunK?t`u~3}E{As11
z=>dobJMnOEFsv<arzc@A({b%JbB2Evx^aIThc`#TB~3fYQuqK0I>?ZN36U^I&Xx4;
zQG*KSf8f9kJ5Z&?ZFF?-Dn6N`4w3A8Tvl+KSstrEmd;?}T$PO9H7{bi{}v<v-PiER
z?hW*?+8|7{P{y9j54=Q2lXQKRME^pclAc^o{97;-&Q`ipd^!<0e^?gpPVR<E#Yf<S
zG<njJtISDxZ%$m4W-+O%g>SbHV{b7Na=AnSCT=TZBj!WNOyqh(nLfdFr_wq4(z7_}
zYdLh@cY-)X=Aj02e)23dh7S6%9IXy9MI#~WiJbR-bUc1P8VVGLU8B3O%i?dixKE3i
zJ$;2W&74v3peCapca#y|nU4N*Q$qd2TJQkZ1*rOo552e+hKrxBgg!Y8$+C_|V|h>U
zs*q>oV~QEIYB~Z7?ls58ZwztVtt>oWwHZ=O4ODD=LL2vPCGM{&EIg0^R~kQtRBScA
z_9%dn=T{=jM-M>Pkn^;VZ#4E~&nxK>!Q|dwLH|whgSJPa$@idC^m|tfEIu3nzyC^t
zcj`~kQ}Y!`ve{lDvRs0gXFq`amLqH=pklL0R}UNwsKZ|OEQpX{7#r>Oo}4PIfrh8`
z5TBea^4KMfk~jI2)>1iiN-F`meMmxjap$lW?>n?zRt_&G(&V$P0pi~$44s_qaKPgX
zeCSUz-jK=0kIQ5sqg9gVQ7cVud~|~fRlCV(OfD2IgyfoU1<vVBfNL*5fl}>znVG{&
z;Q@mJT5?8!ybYuYdt>qCg=cV-MHT*QEr~973ZeM%N;Ewu038pm!mfVOD0250IV*XG
zUf8-0%^yjj@$@z9=2y^Q++T-L{ZY<ezgAe>=?}L^pTjyY{$Uxn!zd_Ll5DivMfjT6
z(}TD9jZD9*!A;x~MBwdbI945vd`Fid?=!({6iFC7n=XQTH!LTCnVxjcQfZ`Wyn%Xp
zx($AODNQte^l?SCJN^020jR@9@5pD{LUp|m_#i$Wn)t^N!5n+MKI<c0Fkp)`6V|}%
ziodY!fBbM+a1^4S^P)dW-|^3f=it=ijf_>j45G|Vp$p!tv8ISVz3<*aB*HNz%BjA{
z@|-wH?V5v~w(Y1a$^^Z7R0>;tv*DX&9{eDr4vE~&#cvumpdPIiXy5j5{Nm6k6Qgw-
zyV{hJjcy`n{a;PEul7CgGibu&zP+U7f)^EQV1j!xGLfrPAsQ7a#K*TBg!cy>N$F7;
z;(1^ecee?Vw;LC84EH#pQjRi?Q+)u|_v}QCHw&5b&WI8JEstEvCa}A3G`;xt5qxK+
z0InyiP}uAU5p8fH8yGf%beASxq05KNUk##yyIRy4WgW78%|YaCmkSGOB*=n?=_DYM
z7o{~6V1906itXu9_`xo8QA8SV7jnU28@<SfB42DjWRGKKD(T>`Pk7A|ZZ@8C3nh`p
z1N&RK;k{Qcpv=Dp8sy5OhL!J`Lw!crYF#b8Bz%DRyI7Ric}i10>pxSg2TVv#a|YIw
zu|&OPd`OFp2~~M0h%BOw@lO{|+`9f9Jj%%<axR5L+$a=gO8=u<S8-vtH8w<9s0v#A
zo`aHK6QRqUOH7)R3e*-^gv3q9v0Q~ZmT!B*2&-CQqmhmDsSy$4QXz$7mR&>Prmcu?
zauJlB2_PDV1sEJ#hSu*G!f7#|aY}(M*;=ZM9*J?2w`U(SUn1F9xw>2|XrPWSv-kaT
zNFP2};R(-5xG~ep&(WCeJ6K%M0R5-TphoO<bmX2g@d&6u%Y?Thp7t)hKj<nxxN9{d
zvfdMc=b7Z5-3C%^`5kXsYKIoq#G<V4S4dXwG_xe|3bA#>cxYQ9*||@REdCtJIDTTI
zt_>8?9fKvr^F}mj5<G;Y-e@2nS8?d^X$QIcHxe$MeuTF#P$Ip-JF&J^9p*NUC)|QE
zi1*npa#MDe`ID6h-DjdnMf+l`_iHV2-}IX)Z`VZidv6nN{!K)~p_E9dIHK6@9Yn}w
zH%VEEklL;v@a@)5P(hTR<n2%-nO56Si>M;9*8pT+<w^8R)B(xq=A$vOG}xvbNxW*+
zVaZ7~6dPC0@z~q|o7DEhsiYjNl6M4oP9LXd^gGbGJ^J+0CVsZxd`3%0+(6FV@8J`L
zcsTG>kL@?#5ldHvX8ZYz$%K>)PCI{_(8X1FD5?w{jg&xdLS68A--meF-!R6p&6CW$
z?1cYKyutS^t;p-SV5krq3WJu;Lh{6fgj=0uf>vFCe<F?%yQ@j?WQi>vx`&Y*|7u)B
zRyNyf4bo}nijmpSAG{(lj%>j@m~r)YxT$#p-rAgn1}|vfo`Yv#uV@T3iFbz2_o|cW
zfAYlqu@>@IltK3P-(cVeN2cLry<v;5CfYiah$?kfqhDe3$w0ykRK71w&WRl*%VhQ8
z+Rtw2mJ1{+pK2i0#3`(JN{!?^J&J>T7LiAlrSz*2DcIzq#Duo}M;8YSqn;2Q+}`vH
zS7fV^XXid+b&Xx<x9ww?osdq;-t5JV!k5q&K}T};ni!57&BjjJ@-WbipF}SnVodW}
z(1E&4_|IZBsnuUatZ#dxs=c%1iJA-&o3lWT11jiyErZk#zr%{^70i;Pa=Z`d!!-e{
z?YL(#iVA(syb_cr1q$BGC8ZL^`@&KpYUqQ;qY_{-;wElQr<l?Gtn|{5jjT%O2No(1
z82*M!#GQ>peOWI`M&(|@bjf@0y&w-!%#<QRM<2l!o=E(kH$=T|#^jR-MOvH`VBo1u
zjDKt!R2ljRx7YgNMd@E)#$6S%Afy)m$ufk|5BuSZ5k0Iu+y?eKd|~9g-@_82DSU5L
zHPkh|fR~IUG4xOh<Qp8pZyw5$lk+37xveE`d|S$lbd=(N9S6wtUv84@od;F^_F%PX
zS@cV+o72!;%CQaMChpbzq|QVJS<}PtsFyw3zxO`e!OkBtjrsVEpc?ea_D92)rBT#}
zP7FiJ;lk0y=<{wb#?U1N8Mb|alM6~XYl5PY<m^$7QQs=`M#PDTs`iuPY16cR>lhYE
za3a$RtFg<lD%!WA37%N3jCp*xIS1~~;-dFiIQC>KHV7HS%4sVh{1=3FdWj<L^DAIS
zWE{@AvI5Pd20;tBg|<A(i|elQ(&?E_v{K?ma<^OsA2GT>PNeLCJN=GPO9TqZ;+8`E
zTylzBS8*Y$if3?VZ3D5pU&4gcQ)Jt}x7f+H34+Q=d_Uj@bM!_6RC)6soTc8<ht!17
zGC4`3(m^>im(<8S&vqqp-~CWC*E{%mXDm7()r0A6Gx(s`E`oNbA{z~7y1P?@EFaQC
zL0rY0?g<b0d0`KJ_tXa?GemxU7eY^kdT|^b2=zz%@!Mfx6s?m4yX*z&%<yTPY!`xE
z<f;gh$4dm9)kyXg9&+i?68tgJk~}v|CnEd3X}7J(a30CQw`LcibtX+@vQ>r=JiY);
zE`Ed7@2(_^?)a0+zEecPW(am{+>1t>xnSH>AgR(BMf@8ipxQDs%=n$84Vxa(UH&()
zLXI+4G*csiyXBGlU2#M^ZO5uYJvb_?f!1i^LUT7}=^9%Fa(MhKQ&n=Ak?xTos_cx&
zO)J13*}s_gRrRo6Ef4gCjbhZN57Tl_kf?zv9K6Sby!g9~=pNZjK5u6u{;c&7Z<irs
z`8<KXU9t?>u57_`opxwCISYo2xgqug1m||&V6OS*(o2`lL$h|W%@_O@BTdWc#-{F*
z$Va(`>39)D47g*Ebeav>!LdQlm&lL@@ruZG@i=Xf=8d<Ox-g+xOJPr92g$Q($GaaW
zlOgYR{NtrQ`IwtSF1#wkKYz?4_k>h%dC(*1IvbCEiykKP?JV&7B4LBbgXPfW5`%3v
zhoH>VJ-A@*1X9121)b-Af#S#hK(6i{v|(0?V@J<&T+$uL;r0exQ0z|L7DSS-{lTO)
zr-U@yJcs?A0%)Jn9s2ktS>!ZVhq+GWk}du(>1;N#wG&>URb8XtcziH=R`nX*x{#0e
zER2NfS|ic7d~;Ggcn>{w@FLyZUdU#~0RFNNK+EzO>Zec<*^!||s<j2#X!l|=_(qGY
z*GWXL1tp>JtxEjM?GwH3wio^I;0-KLC&kn{+-82?=SDs=eRRr<9%r$)D}EAgM3xJx
zll#tyT#ND{t6ijt+sb`-p;jdR8DK%USIChRJ`*JR<1X%v|ARl@T2GEk7{P<J<?#EU
z1v#dwM88t@WR&G5k>$;wSZ>yU7|-a!dw<H2F5gG?nMfxxOOnWwl`&-TaYFVcYB5>M
zG*Jqeg=4)B=$J}xV!L|_{aIEV#m8YNePbVSQ9K7jwSU4D+BIb1b4k*b(+$VJu#sIQ
zU3jhADx5f$i-)Rv;2vfLx%W{7|9tzCkzSMvQB4h5-|Rz<DBpphljqR7*T>MM_{)?#
z(+|~OAB1{s!SLj`JW{+=kA#xtaPK!BBFskIZhMu_WE<SZ%ei`BigF0dpO(TGjm44l
z=1I<X{wTCn&y@TmS7GSv6Zk{v9*OeLVC$MG%sS_cbKFJ9Xt4~wINu7LwA_xfZulV^
z$7URBYKkA28IZYe@=!?WF6UC<0oZorIee%63U2?}z*z3A!F8TCh;LsTjM6xVcBEX!
zU0MM|<=zyom@Q#$oeITtZ8JX9y$Lt2ZD0ic)WXDocCh8<Pk6Av0-Em4MlT)9Xi4=g
zv^CFC7%Tn_YD}y|A6TcUvsGH;{T&V!wP`6*Kj;Y0-rGw8BvY~BgB$e5M032NZyos|
zrN{ArS$Mi85=Y3<=sWwa2en9$xfS=Jb&?VB;j2N%{Om|mBZkYq7hqjzgMJqFW3if3
z<P@C^Z5Um&E9(PXld=dM=FeveSLZ^hS3Y?6>Ss{JhX;qOJ%!%cl;RY%WsL7C32geC
zk2d$UrIU6&fq|Y0u=}wpdTY~0MD#gi%yb&JJn81FTz`gmrdBX3M|$YmcUdUWb_`Zn
zHsgss>L}{TXZ-W-7{lXN10RXk(9Rwg;3uA9sQa-POJ7N*h0y_I%(;j^vc>?uPK(i}
z#`Eyib`w-X1(Iu?cX7_=)g<Gj4EZ*H8OiHZC3DkC$nuCDIct889G*^K+|gqu^~OAO
zSgaaq_^pManx}CaTMO@W3!~btKX8CpG^g*SZ=*}%2Il>E1ZfCXLp~8+<m~ej67=LE
z@vRCbx`}Cc+t-h9MTH;o%2Pq!J91D_i#CaA&Zq0?PqhA`RY;R#j<jQ~QJ}F5DJjq=
zKHJ2Mf;af%p|&9+{?Z*M`W=C(Jw9YbuQLu=QbV^1)H1JRq=<cD2Uc`QZFsc)20W0U
z1g&;$f<Q118{T?A6fEN*-|!H0S}uW{9#1f3{B2GDRsEn7g!V(xD|PrsL<TyZX@&$Q
zKap=eTDWB{f-H7kgtkY^A<l+bPLyOJ)>OI%_iSE7@%A=jk*ChYrqCAWIN9R092KOn
zE*2Js6*3DVG?{{;iyUs}FLa+;8FtwG6(&i=;v6GB(&ZY0Lv007K=KBpB)txvnA3v8
z?+x&hne+I%J%LgVT&VZ%5hUy=*1T28pV2i`B)iwI!p^?t__O6{GRfgaTc`NZ5B0BD
zIME;4w`S8aZ8SMlZci>u2*N$fztWX1$Ivdvi@<ivL;R^Y78Z>8(0xMnSoB6U5z<nG
zN2ZiW$}tflC$Jle^@QVnvzhqcVj=WOcoC{L)F=7D!N|aRC0W&!4b|kHQg2#JVbH7r
z9QvgU&Cj00<9n1)?z~p$vrL&Za<>q2#Tcn<oQHPy-JqrmbMXStc>HJYSu*qNCZkDT
zfQs#!)H@R+yz31=SypfWTFjR~ADkG@YLO$*d@>WNZeGv0eo#X{8W!OOlk>1wx(jl0
z*CJt;Dtv681hnfHL9zWwP|~0Xi%J`!3#0$x2fy2~=zsOVvN#jY4~inMdU^0U(fOEc
z;m5v<e&Csx!MHPLBb-dW1x-32Zgywz#r$Pt?UVz#sHa3;l$FEBb8qM|&NX;^P?voC
zm;>h~e=}8M7!M1dfpMnxr1Do4{;iY(S4MKdd6(R2>!~2LoqHB$2Hk}F;x@vJ^O2-I
z_!~WYdKtYdn1|36x9BgTo1jKUA$?HF7q2Y!gNoTKWl$g=O3Fk-pS)2@yc&^Mr%J5q
z;*AH`sNzF(AiY1r1;1I*$$U8W8@o?tAfD$@u<Feivwu8|v&j1jeW`vLy1m2LYI^}(
z)m;kDPoAVZ<}bh-zDbh-hvoG9rZn7Y-%eMAd}8XCSWucHl92keAAkK^i*wr)vB%>T
z=$yn-vX&me5uY81*GL%QKQ2mE?;FRpBJXgp8;1!va2^``J%sN&YoW<Ud(aK*^&F>&
z`%qVOl)h7!$q9eG3BEotO2>^YB{ABF6z(m?0Y7umkK3>4yB@q`O>G?$lRttx{eNM2
zu!)wS*qz;$94O?{NplKi(L|yg{d{&2tQw7nU)qg0J}kYXo$odLaNit8+~12%y}8Fp
zb!~52l+;Iu9Ml4rrNUut_CD&J`~;<CF3@ByT23Z@%3_b%|L}OPKTPRoYmE1QIBZ`v
z)?cw5XAD)Nl(Q4~hN2MBIBd{#Vyp!oiV{Ja`t#|ydQ-aB`79EUi$c=#HljuuHYj<(
z21OXmryFl`ILT^p(9y@9*lH|A(J&fL56!~~o?66G^bh{jPz+BUS7AO4>_y_LzG#K*
z5Ek}x;>3K|g=@Dlw2t&E9PH(QQ#vikCLRW_3tmhbZ<gSn_HoQ}QFSu%-k9)OO;CQr
zi_o0eB3vKhKpw{A!A(1^aoWbr@#QL0R41eY?RligOi>N&<P~R34Udoz`xqj%Y@Fkn
z(u5zlzl5lB71?E(%``Y#knM8INcCb>a%B<5tNJkAI<c1ARZ1a+_A&7Kr~7!P{WV<9
zrHC%H>f?2lIXHeYkErM95UXvmc&q4byk+t#eKhhR&i$N%xi{OOn{mbXNnQgiYUhIm
z-`PGdxdh+N`H36!grSV9BXV6)i2KzSl07YH^s8ktC`g6C^0gVH{6Pof@iLekaAhNZ
z&&9%)9AQ$Pe~Gj0;$3oWr4F7?@~{NEyWi+80X<KL5-zzC5M8te?fNjoexst0-=T4o
z#hZ?@(+rt^I!S1a&>?be&k1bZcNW>FZ9?|P)^lz=X@YXE^@&Mt8kz}MhD&rs&{eB>
zXkO?xbolie@^nipSuUK2eoZch_ZkFYxPCaSc3*%5_$APdXg}QhCm3lK4%6bcyOH5>
z4I*~2n0T$72N$b<fyz$v@dcMI+&;6MaqzrAK6uKa`A4;wn+E-y0XqS-{BbMYc~FTg
zyS<cr+xHWHktt)stk%QzqwnBi4FT+#c$=v=PNy^Tw9wZN;Y?RR1X}Mcj?Q-`V{6I}
z>t=?dJN;A4lua~AVP}=@)>WubA{lFaOTqQZ5jbk87~1F`Ni+kWF$#CssM^a_@Y|37
zaFOSI_~BG3lkO{s-xp5f7hpHuaq$UMsXd5y^Yh_t^#bHyhY7Rr!vXU2a{?V7BS{`l
z1QDluqNLjHD&D$64YoRYAUh=&r14-Kv^gV<-1Y>)1J`D-b8sE*`1FbKtvZ8Gc{IXr
zwU6*}*&$9%i#`fF*9uX(9EaJTjh@?=BKyE*GUX=(NoqJ=`o{n(*l&jl&%LlgTq<6W
z4w0$QeYn#1K1N<fxHO-ayt!KiPj||rf|nAAPjD2bs($2%MtUReuU9aB){Gs5)M2YB
zW)vsNXzf-bWIdCH3|{}GvsAm`yy5?FR81L9R$WAvcV#oKKSr2>XVN65fPHqacu~&V
zuR)3Ds!U1qF#aIp2e~f4M?*N5F#sCm&3=D)^Vt_%+p(9)J#&GR6vKlQ(u|msw|0?x
ze;|2KkcueHokV3xG5iCbH}NPxhTlyN;ly!u^!(a<;;p)!G;0iFiCgMK<8&+(Y}Q5j
zm0D<9n-E(4T^7!6uY!j+iNR*8JJ@x;H+EIc!m37L<S<P`z4!BwU8V|&<Zoj_2ZNDC
zrvtX$K@*QC1+23*hOVCsqch#MplwGU(DU-6$@&ey;ph=v@;>ky7B1SyJn&mimWPN!
z*@qAD;ix>yA+d>Zdsv5q?W<t2MLC?S%7M+jfwT;JetUjvGVh@xxi+^F{Ta_dLgp7p
z0v(K}?1aftFvB!|Sco(obMf^(>uGGugJP3*6Tz%4OsR)4dE&pHVeF=`k69fC_$$+u
zeVj}>+(OmM!%@w(SFk|E9z*eK^gX>vxHVfEZBZJcvfX0Q?TV}Dm#YxboGf9uZh4WH
zVG`)zd=XU9)`ON0kK#A!QE=;qtw>98FOzpK5|5gFVfz1JIAb0F3*#@5PJeaM!TFA5
zw8ByTo+QMVnv3n9gwcKri{PovThQz7O2i|(5K0;uGe5SbLiP4bWbWZpJg{4b9IP{i
zf7w`OYE>)XYgvU3>$wn>X=7%em;)nX9LxL_yw3E$aKM*8siDqGKXCMcUih%Olze@2
zh@9l(L(1(d$iLxG6gayT>S&xIdrid|`B&M*_UlUgum2~W+EGq2#V^3<(-+~Z2q$#t
zTMv}9@x+jh$0Z?G;pvC>p%IiP>v|1|qpk@uSP@3@ytku@kgLq}9y7GNofjr}KB5)8
zF`jqN55*Vd!dqL{F;dLJMwj)y%(r4*B0A?nl#LETqh;>+_V6Q|>>`6+Ycu#x$Qs6W
zXgv)l)X;CkHn?e!!AduJP;NSf6tWi(iM218rQkZu{n`riBCbM#KMP3Xlo~M)AAk~X
za`2Q-7T((z$J8pR;_`DssL~{Z5oR%jjVmYV2Qtgympj|AWq2VhO@9Z!{}_fv0>YH~
zx7S#1Yzc~bHA_FtY-MbGo2Y12PiU!>jrn9-@s_q48k}gM8`3Wz-*O{Zoh*tT;#%m;
zMqxXMr4kjxX)65c9+)2_4xb0IxIr5t)^0_(led%EbI*_7w|o~FijpR<X&vfK)FG69
zG&Tr_<a$vT3@LP=-w&Cgpuh!4M))FAde#C78mN&wYvOR{zA$(<SpseH48qSXM6mEC
zGiqeQ5vw-)!?<J~@~%J$3AG+Wb{lxnsRA)H{Ui}~n#rO1U12mY`wsRlI*$+6jp5)E
z5c$pok?%*U;oq$ta1Z-C7oOk5LyF<#orEU(IpT;8OG_eG5CC<~u~GN)?U?;*s&Uh$
zU{W5_jSEbraKGh27>3)Ku%U~%rK}o%l-@`WWauLg&+AlEa5JZ={0ICNrVir+)3NMt
zVWi-_16>g3M^mZEO`|WPnY1)-xLeT^&)3o<j}*6|?|Q~?`iM7#R;?uHlQdD^!HaBo
zUpD%;E0X{Ertx<L6*PY=ixA=0c*}-)<VWTbC{!lF?7ue*A3KD?uXzvfq8FVw`i%rL
zxvl`eJ-Qm<o_zY^=?n0&avm(}dP-dq6d=8Jhlufq5gfzghHJ<4iE!3IxFSrKFbi}^
z>uEc94th6PUEc`}6jzZY!otXzcLBO=oI)!2R-k|VchSc*Y0}bNh5zG@#%s!!Kv@Gp
zrr|tA984b~@^dNaTGb8@8}`5siD9gEW0L-~tsT3+ZN&PIFQHVBg(j}25$)7q7$N6R
zp67pu#uB!udPx_iURPq{9lJ^Uu0G^5?;|{F=1yQmBFRvF0h#p!)Gg6@&C?2cq^%T@
z-%;jd@b?CY_NNf%%|GeC5*2t*_$3sI)gwFyoYBMue{?6E##ND#kf(Dya$swsj<Y+l
zURsOB>U~h-`Eu0fq>F0(eetL+FTNH&M=P6|L#zHscx=&gYC|f*pNbyC$vuwf;K*(K
zW5^Jf8YIE!rFzJ`<|@9wES@+{B$DH(f)>5Ye)HfNWF2k>;}yHHuY(|Jw|2wA{pm3C
zfG9i}aG312RBdiPJII)ZI}@kFGo)&h8oD;<hpz6*AUs`h$k8VhCizs7?tN15Y#EEu
zdfOmXN{CEE?jn22wW0owlPK`_5DHrRkFx$`OAE9tKyBb6zE`~-$-D+=pj;2W+IR#p
zG0*7UtTeo-JAp)ZAETT2*U=NU*O|mrC9*9h3wspUK&`YVP^^A2E-~-GnN?4i&4H3g
znO~GJVSMZio=Uh+_A$1d>Fl0=BRSkTAIUm~z<56wG`LD0ZNF-Za;3RxA{hl8m2Z<P
zk};THFA!JC(VQ>xdSu<~DH0ynNmuK~q0^tn;o5y+_~M$w=+}ouXiHZOmRQq-#SN>8
zlH?3BPIZF8jgR0qZEmC*be}eTm4sCOTL9m$aUdz(N6~X_Z9JxQh%Wmbi4_0JLaCIU
z*w|$rdA7BlG&zJox%EZN&Gbn|VnGWm%KlDgY}IGB<t;=#eG(|^^E~8m@(j}1RzaWb
zu_huxy5!6^6~yZ?h<)Z}@p3&!qOoBNZL>8;Nly*%5%FP^*DFX4WEbMeX?G;F<pbv1
z+JLjhHHdysECiZyaM?R&@@Lm-x*#bHsT}!5do|vLKNZbU-XBd8^Y1Qt?YWxV|MeQR
z%y1z-c@b!1ZjSbUuA%2&(t#r8Y~RI{k$p}ASgOhl`K2x;Tr1}hZ%J=-reF}(>X<^4
z90AM;i$Hg`gcHk(t*9<dn}HrQYSqOw{~NyMndV@^W5&&NyqtrUn~syl(onMS^l5U)
z{3e#kj6**AGoiTb08}01BDDD+Zk0Dg3wTzdBhH&~tr4dA3Ux5w`53Ie#R&O(u<y&z
ze)KqBjZ{3`k1Qf5u!`e#D44qo1#~~7clHRQPs`WA+Q>-6U%3NYOszv4AAnw3okpb{
zCMajyO?Y$M2^MgFfmgiCVeX2X@Z~agr>`=|`E~gjJXo$q*C^$X?JM(O&BhdZRPhQP
z`Zj^Rp6(*?o_q1*l~J6T-5j{{V;o7tKWMjpUOZZRkXE$o#FAT+5x!Il<4e?0g1!^G
z*NVU?6?!CW@gewj1i<aSXVDn@PWzUa!Ci*G$uGM+q!g}+bX+$flZ1nK^QO0qM@}+%
z(DW3hG<l$17WMdb!T>B>xdy48;34`U@#Nz3#f($SVl)!9naSu&Mm!H+VjimodP6`X
zrNKQ!^LF>)Rcic5c~K}dNqLJ;eU65~-_nuQ)x%I-qkuRY*Aq_TH~jg`PAt3QD&k2i
zfgAZEpr?ohY7IU`tbV27XSJ2s(a4_UjT#~6yaLX=dpWqrdlB&(oWYHxlX?>;M6}Pi
zlX%`Zn6u{){xH%{q+_D#+u6@ikd-|1yt<Lp##Q2<q2ln7&;b0$SB^hCUyRZfH1O;K
zapIW!0%r&fL4`jw$rIf}<kMz2?y<c1iNz~)$v*<ReXAjQ%MXx`o?3K_e<cZtJ^=OO
zxk<v7&B$BwF~jTelx$D>%?yn`;@tUr8CeAxqbarqaRmgBE7O9slaULFYR_Xfe>wwe
z&V<oNLR-l<8B^@9J_Grus$prnF^=z5fe97SP(<9HXgIl$xV8i|)uaq>nD>+G_FCxl
z9UIgeoPtI#HXyh4KIE@xJe0P(OBbYsp!$=uP@Tm(Ps+N%%|*ksg!MmsQsF0@>RyRt
zBLs-*jnyP=*94&+Y=ys9XfqQH%SmSIM`&`s2%3aHrSH%=c!`n{UF<y!znP1p+$&zF
z?&k}3zoE%#LpyL)jw1PI>cxmSIO0+z3w*Js3fU~VjUV!7<2$vfaL0=QXmoBd%zsu5
zM`PsBnFR+?=GVhePF0Sa`fNdFXL;$Xyyn=qJ{5f%62py3+DO2PCS|&!gvT`&Ez}P}
z&!%~iefe*^D0m%wF`x;HKMO&gAL(%GAAU6Ri;EmsauoviOfvBB2U8Lx0;`$`-L@^C
zNoorrLeZY&ZZ!usWi()TR@O+m$`$Lp?IC@NY;4FmOKQ)PNm#zb7>!shMOV8VabYn-
zU)s%$u+~S~_^=RmhFkGK$0a=WViw-_*o#juxJXYou>DcP7T7<00c$B`kUC>sbi&Al
zs6BCq`$i=xyT_8a5Uxia+JE8LcV)6Ge>>9n$#Mo`f=EKb8~UTkA~>XqVb!T9lGo)=
zMtrK^N~t5rN_P&g+v)*Nj?F;F%2Q~Y-WW{1ZH^Z{jK{^RXX#EUE`mJ{!Mz>H2!+_;
z?we!yUXB`RJ21$RezFQb8k9#1ce0$YP%o%j8G>hm`fv!}3i9VuF;1%Ai+nCCU@x5k
z7<cj{`rx3#1Z9=updMSIPDjIs#}Z)XUJ=5*pNlBRdqCC8cIf5SS~||a2LEMweQVtm
z;J>RWL<Drgj~@=g#+x+6Z)f4&i?6Wp*kSl%zBS={R)_`iTbbTNsp$C^MI3-r@VHDF
z=~7xt92^!x0nQTS7tM`+3)OSZ6jyV``@#sl!kpk!3(>9jKVhS6DNNBhO>X+@VgC&R
zWVfg=QGa1Vn<s>$`vE(kd*5UHed9Mc|0jz%<4k<~#B!8%+YmmtE2nL5>O=biWkyl;
zEgcq|iEVBTaE_n(LGK?cfU*O>nCsyX=d3m(Q4d)>ZuTtRaafF?>pQW7@*(8o#7nNo
z=flto8F(d*Am7+o(jrX)z2If*`m;pz?W`aPw2p_)4F}NvUya!G*9<wHGzI1TUJ=va
zcWl3`1AlB0BO^Of3D?#Y=+yu}bL@H-)Y;BOYL=cQR;f#f`GI2eZt?&+R27HcOY@Px
zI<Z9AQ6K3_s=*BX&E&y>DTd3kA7-w)3a^Q(pxL*X(8qrXu|2O$pPZA!YYzw0S6lgv
z!ha1lbzN~rk6hviTj<cp_y=<ES2)?a>N2#(30Pk81ui>UM&I^Ac=e)sQngSS6$w_s
zFAsX4&qfD^H%k@%onl~=?i^EfS_t#%uEqMTrK}X96OB6mhvee|(Ru4!e94bOv1JnE
z+RO?ZJNFH?dm59q*DsJAqzJB2i9oS@UL+?_7QS(N%{f<8N7934@V_K~_)O&((({;r
zcfV>Qj}%?<A}0;D(tLECiWj<ma~TpG?O|;8WFYalV7Px%6q+*Xh4&n|(T(yeBsV&X
z%pJ=kd{zfZ?4ED<Tzo&d!0!63rL?fTy*g)0)H>qeT7aFemqI0;J>=W*I#{>05c4I>
zC#Te=p!UsQ&@46zwW&vvD!#W2&$>cbcixp4_B6rU#@A676M>4o+~CB4T2ijk&e-c;
z!QLrtgvYR)O#6;N`<uGZu~r`CT)T}aFFmv}Mi<2%HD-=KYr>zVbFeP=EjUkN0SVxW
zAXCBlNN(dxXzXl9zNc2Ah}rY_)`|i8cEEP>LbnGkrKFL+E)V&Un}Opy-jh0u7?Qcf
z2tP3DLGw(W(Y5zS;onQ_e><&%mhde?qo>nRr8PHaecUocwtt3mWw~&*^?o#1SV+!X
zaVOv3>_vWC%NWfam+*ewt)SA+jIJ-#Cnvn)@DH~`DCNd;ntND|%$Dcl4O{roqD|VE
zW_M5<msd0Su7JbF9mBFwDI)RsEqo^M2!4rb#C2toNH_E%`FbUUY`YXmg7U{n(uPF*
zH*N$Kvwe@~YzXtR<qU0q`wY21V@o*y3W=EQZeq&gLu&r}OLKXCqMxpsgu_l8j7-lE
zw+K<9u*H{2OY+9HZ$6;oE~l}Gb}OuUWQJ?#FnA(7gL8|Wr%SJYfM?IWf|di@P=Fji
zlC!9$<)1u5yX3Rr*jI@2k_*w0U=W-$3xPM;9a^_gG+xO(q8nD()0+pY@sYIU=>4fg
zhWnH>vEmOy+~yic8?QzpN<;9icoKX(R6stD-ohCJ7tmruUn1t2gz}{|A>#?rHor(x
zzDo@)vrEChE>@D6(Ol@#I0KEk7NY;$FOsmP8;rs|V=OwyOV@68C0!Q-(3c&J=;Y}^
zloMKxHy+Qyag{DGk)we!r*rAgqN|Yi`}_3RSp`yUZiB7z^I*(pmUF1XjcjE{vD#;f
zYzX-XTl!p@%!Xs&ef1J7*r^8}ZXAX?q*`F!qyZFJvyHTc3lYAIAo%lhDI6W*C8071
zZDapK;K*X=<^G7~16kN6Edd8=ttGw3B%q#M1d3$e#Z~tn&~8_^;yaU)q`1h3ls(O0
z7F`N~KF+e_wZkX;YON7^yCxa?{<#6mMU$X>=rX4Htt=5*WQ3E#_jBy@HWH<}>87F;
zk&Hl%3Um0hIK~I+@N07yG89k*<yRMA_u+Pwx~T+-nKTk7=>q87?oQ4h)*xnnp76?5
z3pBsPh56~=im%s1qtO4P(ZjdfNPC_ULP8zz%3C)a6%@!^ddG`0fHdLBpTl}zzJb}E
z34Cpy6e&9)L@s^biB^OJ<JFx`Sa11vW}w5Kc)8o7<iQ&-@ZK0S>?>q$et1v2yHr3<
zXgBAzxCX)wN!WWYMU3~@;yYj8asG@|lJk8foUX?U(Nc3)^5l#d`u^!6d~xgmaTiY~
zT6G6upo0nweNqJfTe6Swi>e^eU%n%~l@TP$bQc-B<3_Z9Eyv*PA8eW8O9BUq;lk&6
zkc=52+lO*w^C}wEch_L|)cK@aPlqgK`F`)8EJS}32~%o+7ryw+OL&i8#idf08Ow#1
z#0Dl|CM5&C==lrfq~oD)??<RT^%|#LEn?h{R+F~_Md)$=VgiH>(aJ_$v@=+c80lM*
z%8ooZ99@Zd0y|)$`ut{AsE04G^NWcPA5z;A0@F9}lh%VD@VhUf<b;0=6p#`^=yg6`
zm$8#T@p?RIdIVizd$zt|9(<Q85V^{oLCb$dFb<nT(bMuZXuXy-iN2kTq((a!)iv(;
z|6=RR-?99nwvUuqL^5Rvl`(OzeeEqN(SQt<Br4zL38hi9S!5n76iG-(xUap9MWaS3
zl~ht`&?L>{d;Rb}KRn0r{0G-@ANSsSt#h60{3r{0VWO!d{muEaHK7&wmf2k@JJ%ft
zL|?`;m&Vh%3tH)j<DR(3=QH1M?*pFW)rPn2HsLnSyG7Rxe@%Dzj-Vj}`$_)XpExyJ
z#Ev}v$@{CA<F7=Q^fztADKg5~`oGT@U+l!w?%WekkX*ob_WEGC1L-*EkSzW2%7vTy
zNe@4sqrgm#R?<EpGV5%uFXPV)p|T_9v5Ssf^senBW@I*o->q~WPhA$kR=yg}UaoFo
zJJN*TFQ}Lr#MfiblZ3?QeBj(;9^jykK`gb)m>uXFiWg?>VY)-|_^$`%lN*z&u&Puh
zdDOa)mJCrM#RfT~eNi9n&^BYuiP3ER)pc~siEMf_5)mqshgI!|utJ@B4yFk{6^)7P
zi=jD5J8_$<YzxCuYkm1k3!+%u0LQBS=uyW5TX6b1H8%E=;2WQ@l}@d7A%mI;MD)Z6
zZ<={asObWz`s``kUZXbrJx7~d)KsmV)pHWB$W&ppFOh}~zQT_^W)UUbboxfYn|!u;
zLpMp+;J+nOd{5XY=ImZ2UKc9Qa?QeU_?X3%d^v)THw21b98_eh78*0{U=6Cg-~qm#
zH;aAIy^19crcsB*HY6>73?Hgc%x=6{jV1dR^Qrwxq{?s&_4u^|Yt;<kIaO=9i)lf0
z$mJy@yE1@uC*0?1`~q>0VH~$7ERW7!sw3QM-E=?=*f9%Tyep4lTe(Cw+~5ko`E@7m
z3sffk-(8vGyd(S-m3aEiQj!`Z?Z?YkF30p(B4@g4K6z$(8&@~EQ%kK9`qW60Z4A6c
zo8GPCWVJ`r5BL||&&l(vWj(R2w?2Lle2eSPr2H%=%;Iz=@vGOUag%N~QJv&-`0`x?
zezpBv>|<O?{lg|xWwAa{nsybhOD$zTcVT9kpG${p1+tGFL~QzZKRG)31^4Bu72P{0
zmfT1?O9v;l;VVy6S;qBdT>R0F)!J^yn~eO31=1xe3r>>~XKyY`Ql4#{AWvpqy^PI%
zlnC?87CP&{)hx*=i_TwNOD{Vq<I(9dOb|Sov#p>7mu}{jvODpZe=dYAtjAOD-6Nj)
zKe3I*EJEJ9^97<j26Il*jTOB(CdZ5S=^agj4dtoQ^ob;?a5bkr`7}NESA~TnaCqIP
zTCD6iiR`j-$Cn!tu}6eDd6;3&4osCL5|x*5>49uMRIQe~omGL-?s}8i%Xjb@`xq8i
z152HpL^c~RJX&WT3tTZBpL+Tq+o0shjS2fjmpW`A`I1^(;3i==$w8frIFZ2;rx-C&
z+61cP62R~vM?E9jxfjtpNn&&d{djwTAH47me;L`vjg{VspPneJ4e`E7mwfhR;X8bA
zi$Ms@8TpFV`l%4P^#k~t#7LT9xro~TjUuz{u3_0Z@qA;*BC0tCXoV9cg+U%zW&S*3
z`e!s*wf{Kv(=@_0gJJBDk1(f}8N&t_oTo*lp47Ez67!OeV!qWEuvFn$`sR8cmu<P6
zr*mdf$B8PWPp*?Q>v)R0zrGO{HFUG}RY!Qk>tlpEES=Gj@94|N2o0DAW1s$HvF1uW
z`nu*Dp7!t(@#%iTzwsD`FOMk_TcxCsQH!6D@sr+>7sn3r(dCL{SyT&E?#QMQOQcEv
zUtN+i%836Du5k-Awz9?b$8fGeEdB)Q>}cIx{K@kcJ!<j+n;R4ewf<d-K1)-#F$36I
zA)D0J9EJ~+;~K{MP{S@|ayv4eM6C)WZTaeCgWfA_+ab-$M=Ow*W-{cBTRYYJ@&bFm
zmchQGcansXX!g6;jOdnh)6a*FQ}qQUVs(Xy?CRu95+892N6OyC>T!#3-1#_McX%~B
z@!=eE`dQ34*Bmy%)5<RGM-e*hD`?J>GHCYFqr}c{1{>29$?wd&Ozrv(a}h72=#btH
zI_k$V@%@x*{Gsmaw7~2O_M2kM-n<w_UVHarH<k0;B{v6R7gJQLc{z~#@b@|YPUOV)
z=8xjz?%5M9+0l4yUMrgsnn6pvBgp9SisbFM9$s~013vX_6T8zh$c4*jGJ%{<mW|V5
zrK@FG*9Q@Ov9tnD`*vR3tgOxKHjJeUohH(-$9wSutyyH*(Qx+cW-QH^e4kEub%gX4
zT%}J6O6cSt*ZKK+ZFuJtcZwCH>ErxO_|U3t)G4lx*FCMuZE4s+;y(UFMe{<qqI(%c
zVKL7yty#t^4!oc`J+bV=PZu(Vy7J8vvazAJ4(_Z968zCJcymlS{^_N{wu*LRjqkPi
zPgxw9b5enHTAidHyYs|85=U{qbrxPm9@3Tz#-!d$l^UC8(D~2$`G2E*N$b#*+N>Sl
z@e0XWYH{}px3=*lowN4<o9ER<ZAV!%qYdBbd+XK0|6~>`o$rq0r(ME3JHBw|<L8nw
z@8{x~azCklyB1#DD9x5c*wQX<Y3eX8oKI9-OPPx=fBn~TwC~kd{B-<HnpQN4hKp=j
zx2O!qep9A_SDaXm|4FJ{<B!Kr>B7NgWo$!XGnM@Dh|eG4$<B4zbIODN;r%OD)5iUp
zY^lo(mb-Q{DVCT-A`Ty=^LO7AyiSL3)24K;On(Xcbf^?B-<3+Qw6)?8%WaHH&!+cX
z?1-EBRJuk_h1>UFJlizHj)d-6$DSP=%Vxg0B=-ONkWTY#XBMF*L}d--CO*z(Rx2~u
zj+I*Mg6bIN9piz&(J12EIg7a_2a;-jC{?VxM5BVovN@m6;gm3Y;$EGJ^ZzX5H+xs%
zY2OvtqCZZ|NG*~kr_W?9(GKK(bUiyEn?siE@+RX$m2tUqD&1@8%gU$~JG^EOQ@CVI
zp0CnJwZq1fv*LSX_HcI+cvqgdSo730Rf>4bb;4ibq^ZqcQ?fB{DnEJuDg4RdER#>v
zA-T$f_)PL#EU7nyy)G{1gRYMyLBmt%x;$If)IFAsv<b(2s2RR{@)Wh(J(he^jly16
zRfuuX0_-qci{JidANlV3hf@}`k}L%)@=AReQ&}j%Z{9r<C*KMZdI)vmd;SrXeoXP)
zP%rAca3xE-XTbup;;8fOZ02*ulx{vY3BMir221T!p+_p^iTbhq)c4d>Vw`#qOY;%<
zN2?W{nS5G^4(-Mo#}r7>9UIoSSq`UFW^<<ubp_Ad85}XJh*g(ckk@A?k%2>D?26e<
zbfDaTUtjM|uk1g@9`y$D>SzCQ_Wug_$+uS#{~?yd;e7zTl&{EbEzl)%Jp`S|ERI`b
z--gfIEhc7bU*My&W{@J!S=i`XCt=4f<9iCP@wzGIRH|Q&n3{@+`>tqw!X%Eqo?psu
zdhdtzy7ZWd`dBj0;ytyweN?=|<v;v%$zLpU?gJhpT`oRT{18nzZbv*@gJ|NNqsX--
zpARw^M;?8=PaQjWrgcuxH2*rV>{CbS`L8Z~9Ir&$q!!}BB0@i8?885noT9Ur$MEZ4
zPQ(62_T>GYo%D6`Lhck-M;}dcrjJgJBztPD@T-<~@#~^b^y!V4SVnFK^Lm_5pDrH3
z(+p)YG;XK(`n|V!)R_7FzAL9`fX@vY*IY^s)=FV}sSotQ%^~D@0U{R;enh7ohcM@<
zsrc)K12m*-64{E)@&3bNx|t-=q5TCYz{U|f1stRE3L3a8uLIcJI$gSOSTY@w7{k}C
ze8>B<PlKH?s3EE{w25Zxwf<f6v<vS+`P@ipI3q+nJI_OtE58H<0@-=2olnXkp`
zyZfp6_F*(tJf7PeRe?vKN!*d4ous#7F14JhL{!%*FbTs?+`&O(c571umYk)D?|qre
z_d6>ShuS1uGVu%d-yA!<>Fh`Pc9{<zKi`p-pN!@jD}Gat?h>jc&FG&Km-*&sBfj%~
zD&Kw);UnX%n4Zl7rqV6Np1*lWR}J!XtL$$)a&{tj^ok8D`DH{d<T$f^=lk&XlsdL>
z_#Em3Wi%0s@ujjTs)pCm1uN2d!8VFb*LdPbGCQcdOcMI|>?+^3awD!!UC$P{$Fu1t
zb%mV*5k2L&h<^BJD|SxOA&ZUG$R`;Y^3Z6ocDT`G(mdaTWi36y#cSN=+m^-Av18XV
zDI-Jj=;tId>N}5vL-&zCfnhAZdo5|PGA3%zKVjG6Zfw+~K%Dj5ut7`$BY~y(_LJRM
z+;j>T&r@MKZcoLkIcwSL*T4DnmMD60%@OM3-N{S0w_vxYk;LYR0UI@}jMb|e;<~B?
z{NHwa_OYgs@68HjW;ae?pJ6jdM3E(VFls)xE3W{XzJ9_z9de$$|CK<aQk(I*-e#J3
z--J!yYt1~bT*Pu!j%3Q<LbmlhA~M4iIr}<2R(L9g#wZW1^9*oeLsJb&%v42UrZbIs
zpLQkNS0*y^$V7HPGJ?fa=g@+^<4OJsKm5zRg+3i7gvAzz@Ynq2;@$C)?4E5I=YMP%
z(K6Cwj%V-kTV_R(wD^tWg3KfArn7*(T0V!}pE8B@ygbaShv(3$_c!n@lEh|bYtw<F
zvgGpJ*@UVb<_6+E(GTNFF>X~MUgoOop=LOZ=BHA9pAh=?n-i8zW>{rmBi=mbARBse
zq+PqmZu}%q#5PGeu}j?^EMR0j4p!DB`+RH3yeZX0=909X#cdnvDW^^%lho;&pbTPg
zPN-uJMH08tY?7)@sp<7UWLd&(JYXHp{C8`T(F*Trl*v?fy~T(1ey*e_Y9*b0W)ZV`
z%+o<Mge(o;!i9Vn-nWBN>}g$)sC~LN9bXoU_c+><q@pkQu-!@iuF50qw0bp_w>g3f
z+O+86>xEc4W)X1~5=C>aWD*T?LdUu3lFhjlw9wgx<v$r<S0e9l74Jur?bv}S$)3kX
zt=rkinu$zq<$Nloa1OVhUrTaY63Hm94AP+9PVJhM+2~(iu$RPXjz5rwK2>&b$5hpc
zjFb`!d8|pCe%rFBlsa;yX9NHIY7(}1;lS8$RZ{uiFxD3*%U>b>Bs_Z+@69*So{%=Q
zxjUWOR+n<6`Qu22?{M5Z^(MBsnZ*2KJg9ccdLsS$k2qDLojCW*q+Pet8FNXa<`sXr
z(>_0NLuV+<tPdyWr>Ai{46KP_MH_XLn8P+I4rgn29}=H`m%+Y^WLS`gaBp5MB(+6d
zIM1$=FRacXv;JzaRQ-np$qkUgVN-;jVjq4G5Qn!Mv!daKl~~o(kaR`tXA&uqc-6*g
zZeZaO^4VuO<Ll#T%byq265F%L;T8PS)1zt7fiHY=$2ZpTF`BmAJcRvnpK}gfCA7=>
zzF6hcFS=b}A>FWBjhlQ?p1t)f=Q1}8XRbega3z92_(kPBGAptV|4nrzt@mEi={J>e
zhO_}TSt7$CbMysY-e>Mn(@lJLFqobadon}W)#RvGJdu^OAir-!QQpmiok*<3j%LPq
zRZA;QvEPihN{nHrydBt<h85&iiYDp*Rfk7^b`W5}W_a!A&$uWem21}aWWS3HiTJ`Z
z8XVS$w>PPfEr*oI)(r}*uh@|+JS<O6l`Nx<p0Tun2)f2`TYRng5kB}$MEzYkYOPR8
z&+XHrS$8`4l_p<s%REQ+=s-K3V)2b1d;9`@HDF0vtbcOK6~cb=j0n8p-w}H6dk9y2
zkZ~_lMzJYj$y`uw3_WmS40kCI$e`exY1(ywuFVZ6@1_S4>h!nvY_yPvprppQ1=g(1
z^#a}}Q9-hgm$NU?skri~K0cZChNge*;)6Hovy2l7_(@D3U9osBDc^Wm{5!gW+)itu
zLByElED2&N#}M&r?jYOshu~SgnfT+Wsm$=RCpmiP5;lx6V_Pc>nCbE5EPrJU8<F^r
z(}@D|afcc2I9;CUthXe)4$6_DnOUUg{Xx=dS4l#4y`x<TdGx$x7-yaKp3Xejz%yfI
zdM<ke8T+Mxg;%FDajzSNogqYh#1tZ5R!(bYS220H%Yp~jn4GIAqF4XUB{X0&+w2p^
z3Z;I~M5!Q_d!r8b<4&ReK8m+GU1v5*C&{_?vE-L1hu&6yMAwvVBxjt~5b?ZYI4Qz}
zzB{@fzj!~0b5>R1(Rp*oh~vju(7NLoUA(~$T;E43<<3*v7NMWz&j@>1{k*M>H<PJ9
zN{j5y^Ygdeq4C9M>HP)6Snu5k{Aa{uqCV7$m?hOx)h8#|mkcKsmy1}=f^eLw7D~pe
zISSey!#`}-kplr6>D<K<M6S$;I=*;{gU&so>8D!xZw|?P>oQsLR^W$39-GD_aGZEj
zZ3g~k_JiXEU)76EfAOrQ1KfgVzXb2Qi+E4G;1M`7mOIlsgHvmpOWc}c@hZ*X?2+13
zvNPU-C0R}+Cf5$rbKVcR%t)cH9%o0S1NPG7aiz@Q%T?T-9)@4}>>xMi4Q1`m%-NJJ
z92Q&3V!2PcY<#0USrj0{xStYi!PaKFp~jqu?w8})RF#HBxiY`{_w-ZtTO7Fj4FBb_
zB}v?~mTWVXVkM=a6lFHBUYSgmSN#lcdhi@)33Hb-nal8^?RKnqsxAMZ=rrx^>EXAl
znR7p0sggBQ;_wWqx!A0i(xj|(;`?7dU(nNwC(Vf>Ho6Mr9C4(}wA1;q3H!O@6%Y70
zsp+_4`cTqa+fPqTGbTA>d$_3!rqlfcDYQZLGCDUfnxDGm0o5C;N!8X&#yih{6HECA
z(*BD=)<fMePJ8?d`i?hZ@<;Np<tk6S`ppJ>@%>fIwmET^k9%?*r<F-yLn{+^sNxr=
zPxFq2(d@y#OmaH5kvrLN4xe$I$&OXpGt1Qbc-sCY_<Mj9Yi}CE&cs)<8J|5^n^F=N
zk>f|kINOm8J)N}kng%xPnv5rGe@+cns1W^dPuwWzt<|zh<fTGC{Zrh4T?2iH_xyAm
zpDtovhmT{Wy$;Mdt4Z7<Q^BP*_i!6REa{ESp>%nh6wR&MfP=qDvfu6dMZ+K(U-_tr
zpFE#P_uLwSTrOTkd)msmLpMTbrC%Q%Vxi7f^gqR4Ppi?(?#J<jQGfC3Et=R`!j~<6
zaFC_DFUDg{C)3cv49aHcU|o-NPIc2zy!A~ZO|bkzvnM69+*zyX+<ZSO9~QwvzD=aN
z_fBI;-$#)2whuT{m`HE<>`Wpjc;nun;pD)p68cbn8rInBE`D}*4>nnOoMzqBW|M!8
zVhJ0JsZo_AQ=R;cK6iB!?)BSL>OU#cGC_iMl@;Jmp(bnv-;LLB>xg6KXu7%5lOAgk
z@`<L)Vbk$Kj}tkQ=xP&IJHYVgfMz~HbQ|+$tLU>=J<xW3FWH1m$;sC5xED`mSx-Vq
z<C!ACW6(zJ`@Oj4X=BNr7189QODVnET|qOQcac|p<(#RM3LBkwmEXKTmNU&(r0Q$E
z$cP6)EY|HSeitc0zBfc;ZNW?1JY+p9e(t~)7Fy!`v-63BiZ;F0lY^(8T1Ms^5@9X#
z4*adcn;zPCfI6#`)0F5!{LZL?{ERN6gCk|?=6X#O{7WyGS=)WuGdh5O+a*n2d~qVF
z(<Si!Ek(rokB}z17-QWdcI?Z0W74orcvD8laytzM>BJa9)_Tmq;+N6H@U0H%+^k2^
zHq4~|EqO%8PaJ{O{z@~|ha5}U>_z=AB-8op_3%^=J3PGMAm?bRMRGRXN1}rTRKNEo
zwUpAshwn_~K7RJ5W_F2$zcP$UH3@j<v)1JI?-SG_<RwM(rjcP5>&VPlLQgOMi?s{~
z@uuK9YAXK5&KC|N%U)Nou=XxyeS0-CbeTq8sN0fF5dqk@McQtqqZu3ZXO!LA+d5cc
zRtdfK+=cAEbddBsFec)Mb+p4M1+VBP<l_3v<eyIl6&d^E&riLWsC_%`@{uMJR!(8n
zKc?fnr?sTZwU}+)(?-h<I8%v-h|Xwg!;h-V@M*bBTzh*FIrYenG>wcP{zG^2@46hZ
z)%cO(&LW<luV2kgI<L?6rVOVcU!5>|smPQ!Un4`#hmn+xazy#-BxZHWfQ22FCN(i_
z?0Kvg(;Rt*dG}PY+ZWff>9&%jB=tC6qrZ`?7xZi8)efX3aV<96Dos>S4ZS4)uy%I)
zJwfkygf>@M5`6zG?nu8!WqyCe7i?}5afmnTYD~oGT6vVebf0fOIf@NSm_V{s#|!Ud
zDjhJLLF!9uaB)d7J(l0VK4sgma$3R<>AA{xI(xJE?J?Xt{{_T7bR1W{XbUM(D50_Q
z^VowOOW3oC-mK;FI<hn^nQWV`PQ48c$+mPAwrAHgvd;A-)W@!8!5?Gk)aj4-Zehpv
zTx%FwRX&dyEWah}ye83A?<dogqEYy}uL6mf+sH2NSV`W8Ucs^_Q|Qtm%h=oJJE+IQ
z3v^bGU_aF_C1C~LOeOdPwVZN7Tv`#u8UHX9uY7bFKOeJ-<QdrTdvZ<L(!o(A&9sR<
ze&j;#X>Y>AvR2_o@qze+5O=zuO6bWFQ}I%i!$qe?5!-|6<mM%5yym(C3mKe5j*Z_&
zD?*ov7qqowGPRvK1^41aMKzXro6wyZh5Y<WhuQcG-?@u_=99nH?#%ejO)Pm|iSa|m
z(CqN5=+5(#)MJ}H(I_qEm&7^K)D~B!H@24D?OsWm_*&+%Er1?xHXvI9O~~SuFZBBL
zaQfi-3w$e>;jbg-(f6AT$o#$Yh`ii5=Hs4^rB)4PVKVc{^Co2?lJrEE68fpD%M0##
z)*oDUM)=0i)i~y&8#(`IJiRbXihY*7Mb*<Mk>PhAVGTjc)n5?FWszk3UuhxzR-($z
z{IFm@He?Z(Hyr=Q-<xr>LiwV13$eMk7ajI0g+@6!(9dnAWXT96Qs8xuZ_+~8@at*r
zwCQi$`E!u|_*Fs%TPms4(tp@IyFwi6vXAoA5pU~nqIcqN;30JjiF@#5;SO>o{T0=;
zblV=XQdI|cl#io#^g^j|&RyJX^a5-BP9&9j(xg&ZgPSo%pR9QiOm<z|O~wyiq<n57
zE3p1f>&pw+cBxR}H)IVhn?HrztgPl$`-7Ng&}<<=I*!Df`7+bO;WVz)mMVOEPw_4V
zYLNDa2dlF<>9sPgDBg*kR-O>DHV$!0eP8hNJt=(fvn`mrBo@zUpUX7jFXH{L1Gz05
zWZAK0tLa9o5Hep%fkbY3!sJ{us8{tpJpI-ODxXt`SLh{?)orCz)v5+fF_B>9)4tG$
z$;+5bw>n+i7*1LhJ~7+K2=+Gdxj6bECc*C`>B<e&^z!UX>J`A(B=$$(8*i^;!>tBn
z$=k8)5d-?waytDIq0dt8-li{ConcqZ^XQr7a-?e0S88lI0n^(tR1v8Y{_G0o-JZdo
zJS!vz6y2%kQ7@do!j2qskY#If7vPEke}1=~COH6}IG`(7&~bnBTQY{S`VlMfdF{_M
z)-{Nkd~juv+g;cJS3f%5<{F-~GZoWoNmwaRlODgNO8Vb_riN7`SXikG&6oT@>n~Hh
zI#WZuyV{Q(61)U!`V8=un~S;2qPI9oVt_u$>A*I+YGkw0BAl_hpVs&s<FyNIS%BXK
znsY{*NXVPgql@04WPgrSsz1S-XOxPoSE%vPA5!qA<XXJW{Q>_rTM74MoTG`3ifqli
z0<K6?*e&FX=<`vxP|cyS^xFhML$rB8r~Ip+f8JQrb$tcYX5v?z$MthZqIF1kI52sw
z>CD;Ojrn~IW2U<<@!w}jaBc_l$$92Q#8=(<_R-;--83cgMk5-xG|gcDg=6kdmmB>O
zGmG98@`Mx<r8trNA)5Xyg6=vcqFrC*Nc8Sl+*$DvH$WXPQ5Hm3s$yo=*b0t2b@30Q
z92&bJh33D}WC!;BrQ1@Xg}H0Ckl`dvB)3WtGZ}9>d&W0h{<NCc`MHq!h-c$X73s8L
z#14GoYdSu%XBCanZNp=FH<82L(RAlib-GJ)3zn_Vs%=(xCJw>Z$xPmzJZMkFHLu3e
zDft6<?W*Pc&<QD2re^?CYr!k|az3ZmvP3*HFNMvw7{bh~HSpe>C$a6R+nml5H#YXl
zas1+5vKWt!qEj5B$hDql_)n+_{~HuY$L7s!(Zw9TZdD^)G~JG_Zt<nL_s=8Ec^7H#
z)uDKONEg1nO_{B+k7s;R6utWJ40rrY747cZjmwRHV6(`nIALf6Hm=`CqoiNr!fm=-
zgM%1<9T4M!MoR0&)%>}+ZuE%4E84Q>3^g7$k4AZPqfg4esL!M<nlVR)PI-Ea`?>1^
z{XJO&2NX!M5yrK+VN)^Pr$3sF?;J_J_FNG^%PXVLU)JE=a|6(Pc?*1N!6-WBr4;RZ
zcLF~tnt>-i%)t@UyZPSEIQqFiUz|PrG~U}0gS$dTVy*F-cy31!+rOxpeh8p=;d%?k
zH73w02TJL$+XYzZ*&Jf4Sx0N#wy-NFC1^|aUJ`N1TbL7skfrixgv=HVK6U#t&hg`7
zaa?o)DcGw+X8!EL;WAF-#$5xNXQacP_r9i&#|pV?`ttNeoe9f*wh9kFdjhwxYV2|^
zohq5hvC+x5sU6N?p)!M9)Cn2(wjvb&u+reWZ}ec7hCIACFA{rHmGBRJQt<s}tMNg$
zgv#1&qtDcY8m_C8c1-^)-uHSBH#;hlSZ-ZKvdk@TaE&63UoE1abT+YXW$VS8=ZNvA
zed^3g>NmZ2Cyd@KDdr0+j?uTf^vJn#AN)?}f5zS&&+`4E@vSjtEM~M0`H`ZH`~C}H
z71<iN{;vbw=jcfvytzc*?L18Pd1y0!b1zmFW+W~#k!+js0;ZvQmO0a6c0BfrFe|lY
zUw_S{->f8=yQMSnQj#Wmo$vV8I88jLT}9vYYtvd`ulx1MS9Jd!9rBib!)>oVaSLV}
zF_lzF!9T7~Y;2>L`*#sPL6|=~l^tSUv;9d^`8c*iW<4Eq2-po-JHn^RlJ!}mu-=CZ
zVmsNFy{N3fRxV30=Q*CPo@B=z=o-qz538`rWHZ(pk;9)<ROaO?vpBB@TEtuK4ZfBX
z&z6{2v-gqKBsS?E8X5i^pHWE`A9)<XPUxj$lP4x5EVz-Dh53`@rR7B4QiE&}RpP1x
z+03zhE-g|N^JC6Ze7dg~@5+3Hw*{HvYno%2#oRcewd0SV;mDyQm9t6fnpQloeiF(X
z`<8Egv4#Ffa3J|d)bQD$LT)j&;}-p!fv+AgA+EFivGJh<E>Am&j(VlZ`J$Ov$-0H^
zs*J(*|EkExylbpREeE;kj;00&qlsl&3>oGX##B6R<6x7&^z`lXsQOqknXqUbKkl(T
zK9_V7=Lp)BqPP@04>^T@`J5If?#`k`9Z5L+Y7zZ(zl57Ky^BV8lyNKMV`*sge&!|K
z&-M%Z@mrvTIvu-1hE_N5t8>bjPft8mYg|K;bp2?;3nhMD<Z}G<N<F<9bd_&Zn~As2
zQ(#Qk(Y=>5mfcAkz}ctsx$aO4wpedDeX`D;`!?5w(5Pv&_{SVJ*epV#r2+K7yMBCV
zxdCxf%*0PEOxSb(m$a?#uK3-`fBc4x(@9Tb6s|qeMz32m@%yqP$&!b?)Tr+PeeEcs
z-@azh7`r`m)xiO#t8|k)qfv!<xk_AD?M?r6C-FbK%V@2oJbq;vh2!4z&@Z0<klfoe
zb}o1%4eu$y9ly0m<%ekOFeQ!pUn2aZZE|$gW_xz^_kJwb)PNIb&*Qhv^X0Zr3}bZ(
z21IRP6#lfR0bg>SO%)y+u$A8e*ww8jY=X2Z8CX+@r}*w7e`cgpnM0=7SvQOosCRKa
z;r_U^O`hm4x=F`J%Mt&qL;O<nPAVN-&kX+b(i`(z@a8QO=(MjLoS(fsiRS8Y{E}i`
z&bfj1PJG83-_|42+0*bgk6Zk!#&m8V<{tUC>nJy2>2bQM+Ma74IL^<|cVd5%!nj>N
z92e5Lo;JANqHnIL5VtZrmO90m%~|xA9$h>K2OYS`UoQAZ_oQo+EzgA9h9NaHX2Cl8
zel?K4-y3PYf)yKTY)eO8x1@(+k~lo{8NC)Zl!lzpChwF+5OTklYOSiE)@H>xdeRcA
z=|}0>)6Ohwl?}hXt%~lNzKmV>O-EC{E~MTUa>OS`xw3?B6o2h#6;Jbvpizn=$^1rb
zvM)}D96#quS1U@eSeY5P&fx);;VyDojY|Cg8L6!MzoLu(S3vOp4=?_oh~R$x64(_o
z1$t^8!^P-%a6xh<%=*?2wQD)(a`b}2lIO5Npk$;xF+yMM1gcT|YWOtG2!@0j3X~*8
zR5Yg^`~-qSr>;9VB??5oPw@~~(gQDLAHop#MKEP(4V<w`g=-$+kbO!F<*(x5%<^1F
zPudUHH{FM;8A+g=dIgSzJ%+x`XJPaLZ-F4M0mdb>!0P!Gk+GdX;?cSVU4A(*<#w<@
zfD-8S6N*4NSrM&_9RR&Ahaq;wSY+MW2!C?QVCk<hsJu)GZD>_Q@+K1K;2nZG9~Hvm
z^?yM|FAQac?MI{3??H`C9C(<FfYS<|5O*#OCglx6<p*mNI&L(Q>6wV8Z~F)F+y{{1
z^-y8jX86~!3_V^q1MD<ppuB4$xVZw<RLzD>@^J!*XeFc>n}LpRJfs?gf$QRY_`50`
zOiDFjeyBPe|8+xjv3N2>DXfQv4f|n)PYlGJ4+oj8dm%^TE;PjjgPOS&6keDKHo@N@
zSRm{e+KoWtPBeg9?qpPOPY*ry4n|&l0W7NDjs7LSf}K}8(DbA<q$>Fi))WZ@KsPPO
z39?3GOot(CdmbLNJ%;0}=c2Y8Z8X7Z9dfCcM~b3nV14H$+M7)v=wL29e=`QM#eGoN
z{|mJIuS0z9P1tJc2gzn4ShcYbf=^ur#{nUoeViPK;H$`?wohazSqyuc1&aOJA5iPG
z6Q221!+#qxVdH|gkf}cdXwDCjmvTMW*a>7}6?JfJw}N~bcPP#{03jtU5F<SiD&Kq%
zE%`7V0{sL^i`Ei2DG;<rzP=->^&JPQzFR~pQ_8^oM+Ri@cA)1m2?U5P*bC|7d1di1
z=2ifBrAb4m!covaav$d3*ael2b?|cQ1sJK&1Hr*VP?XemP???$sp@w?Ex8x|ixi>E
zuZQ8OXa*c~%7wU-Peik{^Wa#_DTumR3KP#b!p%#Uz(x8w9FJ5**Y@ZkOO0@_i_HVx
zu~&3>-D#26`{9soD+{%ec97NN4|yLYU_(u>=*y#@qRd|*kXpY4CcPL6dBGi`Qz3^$
zCo(fdfzd|bHGL{r7n#CxlQ*KxKNdmL2p7m$v;@MA*}}=YEuy*&wh(QV12eAV!>BqF
zB$<33k^@wc(l0Y);Cun%s!E}4*In?d5hC|SPD0IEFQ7YBA4H%2iY#O-!CChgJV<*5
zM|^p>kn|ZG-rJxD<tk|P+7W2+0Y!8%ZVz}(az+lpBhmA_0(IfcPncO>3M}Oy<bf$F
zd@s-mUb({koet>gjh~P|v=H9*?uYs<kHEpa3Z_2!CW@TZ2^p@V(Hz%PFycB7C$mdH
zQ~5ibx70#@P1<P1b}RTp&%o)YN5E%32l1Pl;p=#T)V!e_2LDY%g=+=chst`e_A7zs
zmzx1eNO49Mo^X=xgT1C<P<GP|(osL$+GT}m<C<ZcKsZ11<O*zjs)7oin4+AjK5&Un
zfd$tu0pETNqze*Z!|O0~JW~>xm2HK-d6MX6`5>4iNunHSS9ERcEVSrIHC$X4D^PU8
z;6ctF7%M3+kh?ns@^&4352=FZ|K-Azi)QG;#eHb>^y{#9?=-aS&Q&-Xoe9b(pFysr
z2`ZPBLXxTjaQdGv(t0LPc~_|*jpxa*8waDOeZ?RhGgKfEhM|Gr0?7Gs0M5$$2xN`t
zprbF1qSjo4T>GKOWT*lfoQILItqXegTN<5Ra2%pu5p>&jKiDSN!McVEU|W+3+CR3y
z)I|fZ>{>kR?w<$VvvR=srWzW3@Fwh&-V29<Bf(eHEee;XqH%!>U{Asw*!m>|?tG7f
zQ+L;ZhoKbG@wy8M%Z8yJ<L#k5YX}g5oPF_5FQ{c7M3*ZakYAWU3|e#)rKI>Ey)W;e
zimO0dt$Wa;yOP}HHFu$^`W8I3*Fjm=M?sN-6IwgX4h619XuMMtx*4H~au?*Huelm%
zUzk2O;8F?iv+R+@^<z+KNWjDVDkSf01Uq>#+-uZDzsow|h*LGFKfWx`oHBtoABuEW
zWWnSGV?f?vI_#-`C2ATx2TqeN!QE|jkWr}%w)KZ#+_n^$`$iI3eK;qO*!KhPTmgFo
z0^2Ya4oV?1$ZW9++P@?gyi`V`Yq=kw!L=EDqpM+l(^a@|<tYSCY=K4JK8S3a9e_Pj
zhkw_jq1NLE6#5Us6B{X%d3iia89od>shp3dx{g3PKMauL*LWCz{0973rwX&B0>R<R
zcNme}0F&%HVd-~w@Y*o}vQ@)DI;$LpoK1lZZYD6^ZyJcd%z~32GDST<tKeN^8}x*f
zK*|h&N{4q~GB*)ar=Ny{KPe21FN9m~rO=Ib3p6}!3$h&<kKB%Vpy<NM=xBQ+`gY75
zP1P@jBl*ja?AZ(yer_VlzqABh)R92hDS43nw+IH3BhadOR_KhnELt?I7O2)()U`+k
z&8(V&c6J7!+UR7^(71sNSK5QN8V7b;T_C|Q3Dz!`MDp#85cfR=a!wk<;ACHD_H~2u
zcrl#u4Tm?s0B&vgDiUG}Mb~VO!e)(Y&}Ux=uXnqH>fZ}s5Ks>*OKafZzEaq>Py<Y}
z*21E|A;|pPH@GDbKFT(Iho>9E(dNeoVgKK+&@wC*WW6uIy#Lft=gFT?+AW7P_AY_A
zMGAltrb6|M>yVpP21Dg-(A<vSkfdygRLvz(ucbP=RH=s6-X9Omo0g-Ke@=s2+eJtY
z&I9iQ`{4|WgM23^)LZufbOhRC*wUY{UZ6xDj0}R^^SZ%*)H2{Ud%(xh7e!^}*+^x=
zNVMtZ5>O7^0kf}YBEgx7Y+tIO5zZ7W<BvdKYZ@#UsQ0rERKk?dr=S@y6;%oUGoQQb
z(Z%3}NY&Q>aiMdOqSy-^kupUxeNW)b7&R2HnFgDN3ncT2P4MEq5qgo53enE8XhUNM
z!ml2p$FW}M{#kw0<#!6aqqjlClmSS#_z6*av{0nD1mfQnfZgLpP^)T#cdeZ;J-Q7l
zwmb*#P;Inj(?Jj&y9GyoT!kBh>R>+Z3M>kJ3uW<^Xxifhh@7p67B77V)joqzAN2q}
z{g@5sJq*x0fpDM038c*Kn~>osk7PCtL2A?D&~0a3w0>z9+&Mc5tsNJOLQO2tPd5#;
za6JzifrsE?trVPBABkqFU?iF=(EA_&CMM`3<DCc{RQ(1DjWdyv!9(G$&xNmV5!y`z
z>i0T#)ZjS+HSE6!;$~Bn=R69vFXy4u(;pRg{f4-GkI~??KG^v17KrUq;gY#EBn_TM
zb=MWSEg8d5lKBNF#;?It`3Q=fVvZ#KZbg@nCfe&i05|<+p{}LfuxPC{dah!G1m_iM
z3(7~oBW%$)HyLComO?EO)+peuE4t#Miz2$OLG&g`R2gOtz2DQ&+`5TKVWA4jfAI`R
zLNDBM@IX2JS5VTfSTwe87o64@i%KRvhLby<L$a$qa!Ve738Ax4=aXF2-}wwVx*mW!
zmWFaa2xQrCALL{82UL64!7j6DVD&5sKztWQ&;1LjD^DQnh%6*8u0{#zt5My5{)lY<
zj?OAwhC9RL&|MuPRCyyGxQB<JeR3D9ZIVMl;$*b0q61`K#K4YIH$XXG0$nD`=#`WX
z`WKjpdOc-8#b^y|UAhUCZ;(JU-xt8E?N?xQu0SsOV27^T`k{kX-EiRD7I^iiP^d+$
z&|T{mnD)92jNvJGEYwA3A6lTPP6p_A(F~~g{t{f(>`>2;c(lgs60B5{L3=h!!Mu<<
z$Q*kbJYWg>G-(A|{p%W(t+zvS?n|LzGfhyx@P;@k20_V*Yw&etB`E1hBIDS4SPM}g
zo8Asje7Ygyp#%C-s0Sb8T40mRTQC<$g6T)i(G1=k-p|5tyWI=c&pQHVn)T7$Bmbaw
zeJYd;eF+n*euDpYE#&U)i2QWt3nU^j?0?q`dgiL|clklcDpf>>W;3wq@<w()R=|Tk
zb2z?UAY@KZK!MJ}$Wyx#)~8>9><O)~{o)M~Ju(OZON`K#w*BzL>IOvYh=ci)B4MU|
z5?FlKKyvRVg3qdIxcoF3Y91TFsDda+U&uj4n>E~a*$ckg{Q+P7C2}7V19V0L%n@kk
z<%`$B_EiS3tm%cwBJQw=4>JPYUH`$6D0>(zKLDBk1RA*ZEpVSR4dxA|LgvrwqKe)a
z@XHZM(B_4Z7;FP`8sET$8OkVHDi<Yv{s)>Lgzw4t2u3e{z(+j}H8xlwgG2SuzgIZF
zeg3c@av1u$5}`crTDYwE8r-&p!mdxoC@XU)GSayPX=`#}_(c^IxIq&YIDQnRb=$!9
zo#!FDq78IMXru0J3h;VsAS{z#1lvWkk;#lm6r$G-3Vkc!aDyjYzrIm4<VOoEt4jnU
z&pU8prXJcM)KFV%C`eT#f?3F6(73b`#yu*433204g;f$b&pw454OEcH@c|h1ehYeL
z6OR&YX2BPU5uB5oD~fZQjm)c0!#YTVhaP^=zGOHg$$kf2)w$?=iX-ClCL=vr6Etf2
zA@sZb0z5zT6uB>N2bb;x(7f{#c!t{xsO4_>AkgpXZeoPam7uZTk3hFL9r6x~Vb`t4
z@NvJDKs`MV7hMBjUG!BrIoBWl*0w`l|5M1l+7Bmuo53cn5hfm$MiDL3(aT0>l({Ah
zoKD1mT+ukx5gLQyAIwCTCbz)-aYm?qWH^#rSBy5wb-=}lROC6JgQNq7bMIWoazCe!
zMuQm^kYMy5#7t;_-H#rl6YF%)lY{5r(J&wMtEB==&mKhuHbc13`s*OdZbiD5sh~e)
zI4FLZ1BEaKLdv$o50wkBLWP6>Zj1tLxfFQ4ITa4wKLvYo*9mi9Ifyk8iw^sb03Q{B
z2&gz7jF%NcX3#qD+1LaRTpHoq=rkx&O$JZHT`*y%3h?ROuwZ>D?D&C%H{J?uSZ0K(
z4=+LnvzlSV$uOwY^g*qGyHMvy9WMH-G<vn`9i*PaNOzSH92I(o87(K!hs`QTD@Yyv
zEyZZ-z9JNNDg`Zai$+NWli@|cEc7O27`oV+0+(Mrf#OLckq2pm^S^JS!0Bd4W#l^0
z{=O3(4P1aUPQ3*6$X{@dmq5uV8C)~9VcwnPsCv{?lxaO1#ZBl1TX{LueBJ|H)6+pe
z_a~z4kbD?;JODcme}#=ipiIpOK>hwBk#QhG#BeY2*&B&WVzp3O@ozXbzZ%TO8=we>
z1IX!h7Mz<h20lF44iiQipr}Y)?vcSCcx6`s^=F5p@4?1szw$a{;XE64EUXuZg6$xZ
z*aKgS|B0ODWW&epA7TB>EVQIiIJ3H{Xb9^7i-ZKYI(7{Vj(G<&D~sWf(3|alkO4>K
z!r|<@7jQk;04;uBED(#^U|P^yxZfv<j3)V@qqhZ8^osfL;ngjW_?8HJr{9FWp=QW%
z-$$^z5rp_fI_Qtg2;^Jd3BxzdMZtE%QA~3;YSn)SPcqGsska<@uV@eP8|={`^Hj7p
z@efSiDA1+<3`gNjmgu1VOtf9z2L&IBhDT@?8f|bFy#Ct2sC+FHrT7^p^{JzR3)OHZ
zrd+st;=wk(7j{H`gtWT}fJ`z$$)*U#?;i?FZr&3;JW~hsW-GV~Z<*BnY*5^2kJ<x<
zpiU_jl<52jOqLyl%2=W2{dx(Gt}R0!HoBqpf4V_trZ!UFz6ed8oR5@BN1(UfyU+-Q
zIVe8j1H5(3N8*Iru)ugXJnZ#?@B3b(Um6sx(gKv4lK_o#rlX5%+>uLoKI-Xi0#VEx
z_^Galmb@0j;!7o9)KLKnnY-bdRSFcOt%X_Hx8UYNfy9_`7gjrTK;3Q?bbpTrx??#4
zG}ITuf3}LyJgAF)*#*K2_ZzT4I|<gw9)U|g<WaJXCHgo23G95yL)DpuaOZ73*bP?%
z7JC{LVy{5qxomjseHW&&&k(C03y-&DgKAd|{FGNgG3`rX&|kP)^Iwa`?QnxrOLxJ~
zkB{MJp(9LQ83D^)8lyS;Ws%|g6fjEP1COtp!ICR=@GRpZbU)Yx2Q<!s%mjfpyh0nj
z*qV+`cS)c|CtYMyeiELY(n3mWWYF9GB6xb_Ib>%f!pa8@kRLq^)hzFT@^SBAuCpXs
z>}iiae0&QV4V6&zVp;T0pcbDEkU~`hRWP?o1$0g?1i#UlkoosDM1nO6j*WxAheZ&Y
zngd;VRbVw>i<$-M@Sp;NXPXF^4@`z}`UfDmPoQ_23Us~1V{qK}9YjvagG&~YDDme2
zgrAjy7tZToT>3&(dA$O@6uy8dQyzn6%z1FTF&mw3I}Rh$4ne{*g8tPSBZu)>=*p!&
zn6`W|iglGhc@}fPO7k$hw{$}xS548sErqc7*d8R#PDN#*7AQq86+N4^7u*dNgL<a}
z`hGYH=GRFeeWAu#bXf~EUr9ubNqR_mWiAZM^g!7YS0fweGpJ?nMAUC2iKJpy!02_+
z(2@KS#%iaaWAhAA0M<knM~slfQXlk2eJ*+{r-VXXdLa6*H2T^%9r*~v>8VaG=)etv
zYRNPdJv$BU3SW#Swkx6b!6ulIF%w-dw?Y4{{RK_~Z{XjgO88L<;IREOGEtp}WDk0y
z^2;3%xq(9+4`Wf%qD(Z}+Z0`jGe&;@>`}yk1#I1TO7y8l3f>&khpT(vi7dnz;*~X^
zCTIxA1w9efxGKWNLt$XBjf1qs*F}SgXGFgyGSQYe;Y}!#g{qlWU^B8;)I4-3Y|l1=
z){ADac#$*|paGFfG!v<BxeK-}t6^woFAVWzaMcG3RQA!R{`*_VmWzUkkBVXZutU)L
z?;cFfy90B?$?&<U6_hm#V1z)ZRBO;ctCzilmP0-e5*~z3?o~&stAbFqdl6iY_#<lO
z55S^U7o=kC0mkn2kauG)YzY1fhi(O<m$xO5ZbB}s`Zx$$x9Xv~>^fW@w;w%v_zLz{
zKM?J|b4+CV=M<zzRX}oTE<8QA0dgf;K=q#-x_oat+BLogHf<Aw_0$zmxupd56`T}#
z%$x)Pc?+OCT@xg4?S|~geo>uGpJ?wwTaY?u2(A4BW&DdT)b-DSz~3K5mnEo3Le&|L
z`}snvyD=OwHGxkd6JX=N1gLBvFwbf>sM!A&J=VzshY*33iv~qm4Xfeu8%GE*ZiLUf
z9)Q(QfcSa8MIG+eP^&2k_HX9FQ1kV0^Oh`p@@W#;?i~e<i-fv#Y6_sepGEqoJYd04
z8#thL5b8qw;dO#Lq!d($ymM!R|A;B@c14%yuIn4o?r>j_8#n;f3W*S?c3m_>sY>+p
zSvDZE2k<&XpdP-_hRDhG5Z)9isyMSr)RwCOJz>MayIBW}k19d^MhDpcs1!8PoZ;l?
zeGqk*7wK3VKwNmYXm@A0Xy#aT@aq@{3-fwK4Lk=;w}*q<VkXj*UjX+C4dB>B3Ai}&
zk?70fa#7~E0FjczW7`?6-$mnQN`rHp670o$Mft;ih~`Y}5E-W_LGvY5@Q*DO4X;;)
zD|$94M9`!aVib{6S0BtV-+|sORppFtzXsKW7-)zS>iBJAk(1_0RO9~?R>z-%rTa8s
zr`AL`J<K0Ys#L;(HG7cfi9ay>-T*92xC=jj8X=1vedxh=Ke#Ya8>P0MfX0RHpuZ&?
zo+_t+tDX+LpPUP7qlDV4e=g9d=P+-5I*1=BA(;(gFc_&1NxUq$%z7iT7p(zBH!oOt
zcN+{pIu^QOc<8>~0dCq2@NHN>6dzQB4QJdSbk`Y_KT-jG#R_Qd1s$Yy{u#7?I)JRk
zorOaom%-veAX4d^f}R{J1+jiJ5+CVC8Y3d0Nx4I0Sh^C)S`I;X%4ASt>PL94-3Yy-
zPod4bW6^@o@1dc&2R7gw7%Z9qFM589(v|$+ldBs9)+)i5b2$+9c@1=KTMFZS(;=bI
zOCS<2hZB<dFj}|+js=_-eRxnO5++=*euV|(arSWk?QmEg6%Mt<cAyqM9Q9`{fn#q5
zMB#<qBG<Utu<dZ0Xjy-pKpjQUMNY$&EN|4_R0u6{zeHZ=^WgaJMp&a=g0>{Dh9`f5
zVU^}3(TG2tq5vC75KX)Rp^9mc5*7ezPB9SZH5ToW{0F&fw9)j*A4M->Ou$6Y<qT#0
z;9Pe#2<&Qj=_%a(63R&4tpc`YZx_ziQ_z~63G=_qLtk4ifLH5B__bb`V}+f8U6Eqg
z_`U{=zNn$aC+*Sk1dN{Mmci&Xqfm_^LeD)@!QA2oG!}-SxCn1}wC@cpm%IqEMk}Gv
z2hg+31mUc{gy68tu&!)4Qn_l2ZvJtB=HNKAckU5*yjv3@Kc9!Xste#|*e2+BCt#nd
zD%zxLiKaU$peZ)Va5GIA`I!5G?kow^ko`)urMwCn^~+#o_)YN3z5x{<a-b$`1-z43
zh~7V6k0eZlTD~L;BKDkx?-q*SpCt=htfOFBL=fBw`T--&Dq*-N7kYk8M}x((=*Q&Y
zD5OLm{1#4!V#8+;c626uJ310YCx3zPyaK3cU5Hw@><6v$hoLjk4h3(VjB;g;Ap@6W
zcsQyIMun~b5p5Pp-u8!xds*<coS+-i-hy^nE4*{gL)A4x?H4fuC7MOSrc-Xhd}kA^
zT|5zvtx1LaQ5_I<D<1YfsewQ9*TbIT9Juqc2iBC{fCB$?P;5zp_{PQXbMT``XVxBJ
zCNKe9Tn$l?gRel3o&v{?uZKHB+JQrF!Qw^&D5V&}tySLO8oLd-7U#nwomTj~`2jTW
zM`6T*eaPj~ci8o?1C*=;dVPQ`8l_{6KBWDD#(%!(acz_U&DDZ6y57hwRv$Hl>Z8sX
zb3yJnhh7I{puW@$v~CB3vG$6{s!SCg^dEz&;$u*M%lrS)_1;lE{_+2RrJ*SrS{jnd
zXu0l>*Q=7zFpDG^C96<bg}i09C|a6IgGh>&)~k}pEEz4MB$AP`M=JDv{>JD0&hLB9
zm%p6Dak*UA^?W?;_uKvPz`I9V&}I;dC8IJhy7(-VRz^TDst1=<uOaeL8@_3HQOxpe
zg!i~&Pj)MJ@^Cj6<OxvD)`4wWA%~q=2XOAeGn5xvko6vQvXHUDgb9XFoje`!g=O4=
z#G^33H31Iq6Y$@y%NTro6vM%@F!8QEw)OwQ<s2dMdSHX@YYTDnqy#EohH*X~+h81W
z1l7eZaFqSVspke`tm|8}cqYNvm7RC*M6fqc9=(;FIB4XFgL#K><7pF$okx=T1_hGs
zYs7D()98F%h{tDd;i`xVJt!}P>vr~S7<C@o>Mme{jR94!evLHE7KkZ-$AX6j^u?<P
z*FTIR*%RKdm)Z+m!y4S-UqzroFbucO$Ewo9;4?2UZjC3tez*hQfK3P*?S<C6{t)y0
zj?2QexY`;7*UqW9;w6bMU+duVT9QiBnla`>I8+Q9@w9jZc`kc{;;#MJ9KQ_Kf1>bx
zf-I$5ra(<N7{VI)P+Ag&SsrYgy>u!Xlh{`29bFKcABPztad@G~G>sF<@Q5;k;FBWL
z%BN9o9n*=He@5-ijnEGMhsZ%EnsKHN8&>Ycw&5_Crk7)N{%4d*)}UHnnlhyZknh&d
zwoQl9fmNf(XQu+0%BWK9{ZS;G$@ILL5;V;@3c_kK7&%i7>vkWf(JdcnQAr=xF^_~7
z@f7PD7m-w@6{QE=250;O%Pwlsm}{>w*G3M%%7SsVf$5w_gTYTQrh?oI>|JFHecF%X
zX){Us%O{isdBHiph?`?Hz$vaR<)XZcabRT$rr2JF2jbC~n}p7**Pv7o2)^=c96CQ4
zYRqT0m6YO*JS$H_=3}V+LnJuma}d?(!$R9v&|7f@>ux&JnK{4ldA<>?h}=bch38PE
z`3lG-A4G2YJf5$(GufL?Cnb|_5HDrkT1Y(QG;Jiuc01ZAKAA3N4<Yy0ZYccu#jPzo
zhl}NtNK|JLY1o~B+pPq=i%LbvEHz~2q@yS(4&0Y~EN>Uaz(I9f+R?zRxZDI=(@qqw
zJdW0<Ke;WxPeA|N4w$P{z@mQ!`iB-^J3A-L*2E%5SCZ)uMu_T)!s)lR@SW>~n3{CJ
z(U|pDM{&Y`J3ek_+pw#$aCLzhlBM2qx^6LCnv^ppRE40g+XIGcb&&BS6z|6$!t;>@
zP~E*Blm2MptED46Sudoi>H_bVM=>qzC_+8FQC$-bld;+Oup<Pu#!F%SL<XyZ^YQ&!
z4f95%V0L1Fd;aVh=P^45PZuYn?%z8+dh3tIJstRK5ecE7KA26t4msxccwZg|H~tz#
z%*jKb$R>;)Wr|1EyRfTm7EE7;q1JjT=3C0*dGsx$4hBJ4D-`>tsAI<b*IZYO21F9C
zV0ElH3_l&ke&Gr1dQvdlABWZdMc_y-2d?=o*S=X3(|-nI`i^8APri@7yb6dPJce1@
zilI|sLdA!YVV8G|c_7&+wz-eSqp`U5brH<#HF59D2pDRqQQV4GnCH|Dr_NL~Z)O|P
zk3V5s(z8%M>L^}2zCuIFI(To+!{3Zd>~V|5;<Fzy#a)X++4p3`d40+x3sfyAMTJp1
z25dIc_o`tu?=r=-12ZAE$_#H_S7F84(G>q!h8p_LA=_{w<(e>$S3;YXZkMO5duI5%
zUy1%!F@0ah7}LI7#g$Lx7`Mm{uQ~@Hn;eBcekwkWxd`vsrEu)MhL4L5gQU;lU+NP4
z^wGqD_EJ14I0K2!qnI6Qf!icQXCH4T%Sn|u*qZ^7YfR5@JBIadhwy8`2j~to;ZeCN
zo%<Khz5((y<@+S&AIGER_({C|)Q-%b53yh}^NL5lg>+3cOlMgkc)C4ZK@Sc&ar9*2
zS7aWnh1!K8G+t4ons2GN;8B3>n~0J&3}8=nCWMqNXn%qg#hlWm5drmdI(a|s^X5~%
z@D_?STuWLi8MML3i7HRKkb|`@nZ@#PE-8bq?w?PmMU!Z$xI3LZy^v<$5V0wpv}kxY
zsSb%zs<;8Q#-D<*Rw-<96VM)Z54+?~fG=Z6Z}}6*XR#e}-mF45yVs&a$MMC;kpjmA
z;*rP%+*z{>?QwmGc$SLyLYWv?%;C|3Cs=onJuhpF$yYiKH|=?-4q?xPdpk{O*iK;w
zM)8c2r;=WyA59FC;GI}InWk*iAS>4>DiMh$*JT6LojgdVQ-a9)-(Tzx6``pk$I_q0
zb9j;s(`lQOBCVXbmj*<<$x>w<wFVZ_Natr5H}f-nt1N&<Q##tUE@QWT06ObeAmC{r
zj<IL>KwCbtI>Rtf6NR1!mmqw754wdrxI-m9oOF`|Xdn+64WU@KAs;{7nFbnK%6xd{
zXCHlr6Lv4LR<i`}zm3P;Mz%?0A6w*}YC@;iT|v{M&p7tqJ<?8-qCMjVuzb>E%#C^g
zu@N$48FCh_vu)wDmhnHql5lz=#9N{90{UVvu+g9m$;lk8mdvArFXYL{SA?eYsFC^P
zXAs`<7v0Of@$<}6EPg3T2hLQYorG!h<4d^0c_Q~xF~0IN=*4ygIvjEk6YuZG_&-*t
zHF&@odcMKJbmnR7dyjp#=P;)`51NXKH1|mY;#l1gwICixgbeAT|6epahGX*J1vvSy
zCFcPx`YV`D6*;3R;-wgkUGNb0mAyz33ZmcdThW=SL#P-*M#k&Wf6AJC{2FP8L^!SY
z_oC&WCs1<74wCCPq6;_M@bB_pAnYQwugJiY<n@rSb;Mxpeq;!WVQN_kiTi_a(mjRz
zz=J4H*ahbgGL-FLOVKs<*eWZG*W#0~!Bl~6ryT{q;U*ledV!lZj5)`@V|L;}a@bM=
zXT#O_wlo-*>{>AClMb2fd;&AK(X_Efn0#E0knP)Jl$DwX8>WjMFB$+B`55LEE%;!&
z9*5FGaJ#0S^GXuXdwngk50R&z&3h4d&X|UdxX{^ek>uJ#Bx{n1zm`(;?(GwNv5Cg|
zhpW)1B1&i~fJjLj&V&r(=n>}a_?WPjEPW8w|BSyE{vf*b1w<_iv1O+@W3c8>nV%ND
ztWu?>s_U3HlJy>E&tl}l6{H(<2dPdcQLPY$0*zq$FoQ?7+ltVytxFD%)=-zL8%2#R
zMUjXPtl!S1PgSGnGB=O%icHDr`ZPLWuR^09_8>R7n?B7u&OC1s3aoUZ(>ZE%e_#nM
zdXr3BJNfkAon}Pbjw0Koj}h~qHB`A@(5!2~E3dQIQ7%kZ4_?BwQ+YTfeG^Z1U&pt9
z^|1PI85eY7A>R{;hBZ>Sv-Li5++Rbof@y=Bd+>twKQZ}lk!5L0Ngm}W4R-}MYZ@(C
zH;x{}FTu?=6`Gg+7Wr}An6I*$H1C=-FQyF6JtlO>sTuD1XRv<8I7;1Jj$p6D2u|LB
zlr>Uhx%&sMeG;Wj8<Z%$d^OF9a3QfbGsyDmILf|$36FznSgm`El=pYxQsY%LT`j^6
zR@2QoG>QU>-ecIT4CjkFP*_up`zQX=Zew+FFO;D>w~XoWw`kg>GMjX!rqG0yTWDbF
zCYl!zPZsa==z`uBTDPbKrlykg;^`#1tEEAc^xxu0(@1)>Z2_rB_>!H=WGa3fP4mCm
zF&$o>9_mgY4Vfg;<uT27qB2>|>w?uaOH>rd(71~-G`eg%Jz}1m!6qAOyB9zkwCzbn
zcoP|PK0)uvd9*I14@!@WDOCLjoc^_=>3j=52Th<GJ{pv{HXE~;X0UH2dv?Z|((+7m
z3R>ci8R5eCUh{~XTarvdjkTl_WJTKHCCu{^C-*vKN*vUnljrTJ#;FY<RSNXPq!(9r
zd7!h<9=f6`Br*R13<pg~?+5ev&2)*guEk!i8lhes8RW!J@5x2vlQM*h@8vM#ln`Bv
zW#1(kKQbuk!k7bkv`WQ*3Qr|cUce_*Gp@o~Pk{oo!?09pCuF7f;kCp_@?ajjtDY&X
z(P_hj@T*8Mz6bdK#Qx3~IN4BuWlKZx>eoWV55>U!Q9d-U%FvA(4O$c#1DA>^g#Phl
zziBb~d|!dVI&;KE>d}ig#uT3L3^mg~BIUCawAA+@oeM?3JdfIr#p27B%@8`#jL%+Q
zk!tBl&57IbfE(tr+&4nF{|+hle8uj1J~<uUf(=RcAX>&)gwygQ@#Z;JCkxRwhxcg6
z8ivV?PMi~rCztJ&5UOcHi;W>w%8e$+0+7LhE09i7Aw6CJOqnO$;b;l{wh(+cVuCtT
zX<C_Iin8~U*qNDvqJ!%6(YhC*XQt5gtP41z+Jb<f?M%1WfOu7aFQp0Np2fHmz_i=>
z;c$4AgZQ;-aG%R`y8V|iL$?#96OLekV=Gr=CXPiFCRl6RhpW_#GW`YY9*n>Z#g|yG
zwHKqmmLYnwB5bM^@oem5`tTqJA%kWJtPKH{{DS)%X<BghHKbh2;2I)9Ra-V-OW9Qv
zuQj42j9*QpRhY=U+c*7&xKVT!M?}Wbf03Qo^spY9otNNy_B=kd2(xG7IeyHSM@8L7
zys%Owoo{<EIA4I$RiW5+?hgLGXYX34BpqC^8>J$`BqUUWf}sJ7sx_vL=mazk)N}vV
z#d8lD@1kN-4mMsp0|kpLXnLn3H)0&sHEYw6fcdbw{Ft$??r6>Eg}6Y7);7JzE|F~H
zIhG)+!-GoeR*{UDJiW;sO>H^Vc&R2!ZLzVGGkYA)Z_mQ4#IyL^^b3FG#7Us0OIJ3?
z(l7SiJ@0EzEAAeo9Hz}ri621^cL`7(W=rcOgYl@-j+Qtm(6YYYh{{<&xxNFqDm9r7
z#jixl>UxCAD3e&SFsYb0(~~U~@Lzihi<7@W*{1}&^X&KSPDRzEYe@amja5<Bv_rfQ
zQ&hBxUzZBi_djWh)+*BL-9mlqm8h3#&YlC&)U!E)u1Wtu!3uF=1II}IK@%PLSwR!V
zYmxAvJPnk((4=%@Uen-FI%6Tv`>1?^Zf-eF30Czq;^9rosj4K;?npB4Ql#-s#Zb4I
zj;7cc<Vmi_#h#-O{nQKR#kU}vvytgc;-ov}3Oc@CM)zPfl9fv#{@xHfb<c6e1(M{P
zb`00UUO{rUDTSCtAmUUT{`)IJlNX%C5tohdN#BQ|a5uUPR$n}7$Jb^*>f3vQrZE=L
z-cg*4KPgkDh%`Mkx<eT=9+UjFhqNH?7Tw=Fgxx<zP|O;R+DwIceslWps3VGWW^JHd
z*_yojGeDQ(WvTMwd+0<gptcG(>e3y?!9;)BBFgmr6C>${U<*}9SkngER$Q6>0;)=F
z7|HyR?|M;GdM_9MK80YKQX{^e8$n59Ihq{a!#K^`7@N7AJpGhtL_#2$<}uHA-3{Pl
z1Rjj3rI-vKk{$IDceGdyKJEcpCk^23^%$BJk&9|!13KMO3%!qtRP$Yow{+|{S}T+c
zgDFQz(P<f_`Ol&&;rD1q&PciyF^im^CDY}_r)W@oC#`S#2gF6-X?+&caZ;i9<Q@8C
z_R@@T??_)FfJRI|PX!C*$Y;L=eI7B3WWH>tzCkszxD*1iI*)6i&JgObB5q0>)ASb8
z%1f-y5E@M`ugod@dJ=v0kE120523Nel&-W+rGWb9cxS@A%?2e39w|vKb<c5s%pjI+
zx(BC=jO%Q=g3sGiDP+tDQm*U9_lHyH-9c7+raXXyR0P$mnM$8o598rAlU#XTBz#tx
zVglJWobfuRW_`xg>|5C3avb%k8K{e`hkNB4{M^cVlow;De|HEHT}|oPDgz2v=jd;j
z3Y}|x2|G>3s!#Poz>E!ar@R?fhPG7qM4X07f1^ErHbts0qdgkmaL^}+vEQ{!>rSL%
zmvMA;)DNt`XwCF#Z4#?pL>pZVDO-uL=z9KiP-hNZeOg3wB|~Y+)Cn{-<_li`l%fY+
z`6N5<5>*O4q!g$BNO^P!xzrw`(^<zUQY?&QXGzmWyRVRcQis*c?s5OdA4Y~qFJ>#1
z;qLH#WK#mdhV~$AVliGNU&WIhqExNA4ae+u;?B<r&}iZCSur2YVKK;_dk}9@tuTE?
zF?xRAhU$WP9CzOVg_CQcaq}2dlZT)?TMJtUR^aaE^RU|`LeXi@(cfT*`*C4#xNs7a
zoSR_zSCqQ`?!v)QU$JAJ9`264!FWD<3SjS${j?)E>HQSldvowZsRv>v)|C3a5MKGg
z&@d9A#Mg{B_j-)g&(~Az{q6KYildx-C2G$!APJiid|0SS<D&kfG|^}pYV1JeGcT;a
z7mpdow&Gi!B=roo;#QI{og1T18mEU~)_)Ieh3^oYA5Z=ADVVxCiN@T|f>ixs4E*cH
zj<!ZDV%lf>R~dRCQ-|%0`?b4&7`l4W6l`%HT34iK1Ir@3zTU$9)lh^+!C8ncufp)L
z+mN!j4u=_;@O>SE#$atyyOobkS&T_#?DqK56QsT^9dSuRn7E<{vF)wsn8>4FJw*^Y
z!!ijAwdhH%6TNKFB1`i+EV%F!noyt}6Fag0=MFMiaSt!0nMYdw8lTO^)1Mq3rQ6=7
z&MZqhcI-S3&!~cl{A&99MugV+N5gGF3gc34L##)Ng5#<%Vf9lSnf?>rM@Lb7#WKeF
z6_SnmIC9v;*x*m~a0x0UzaYjaw1kqZOcI>$v|#pMOZs&qns%OP#ls2#zDHcbHmMCX
z@VE&M)yeqsMwEnB4MV1PCF%XOV$WhfS{o<R)QUT}FCj}(^5<wCe<m$llMhATa4fA%
zqAb?`Z^`kcLi&Rm6CJ9pm`7f)q={eUsK2lmTa+0m*gqX7KmFyb^h}Xdy8_#GN+QMV
z9(MG}!eN#v-miGUE!^OX4Tcl3bg?$8GZYZjTge^ec5{KZL($;65gyEU(DQ8I&UN|V
zr|4Mx)H;co4u%-oI39;WmZ8Gl7>P>>I~;G|qQ?is4lSmn_9(Qw)<R;`9;}S(#4e^C
zWjvWg2l9$xd8+_})1y&tFd0*$-@tEmB^;hy!&v78{MBxQ$Kx~T*}&0lxe@f#)(6q%
zA|&lGlG;^w;84y91Z;hWyQiXYvrL$#M48dpZ}Ulfz6fPmHA5|BJjvJxQ-t<M^b7yM
z2t6g3?2Drg`yP0<-J)YnM!-cqs%d%z^^pP^wl9HreG~Ghxg*Qv0!FQ!0>_~cSQf;>
z{8KfwR*6vg^lkX{eG<CQokYnx1DYDALC3y7#Cc~61T-pR<~?!nKMT?3kFRifR{)mG
zTZd!v|6m*(3b}hC^!s57Bu7{<4!8g>*M;Ixp%RuHvL}V;N)#L!PeNmIVDRQ1j#=xW
zvNVj$l5;S2hB7@}?u5czSC~I}kLfvG+!Omu9GBSx?ODA@{`v~l@B84f=^B1ntJ9pR
zN2%_;1>T#y!QYGeSpBb)3;G}g9|Ip~)Y&oayAjT_52NF0F_-?_hOuxSnD^co(z`w3
zdHgqb)p8!<FFSLy{r90KH31Fx%(42&J8s#?Rj`qZ#B`k_@P08FIvd|}&u1J5-$x1R
zDGi+e@lj})zZ!1LXQ~$Q#Pn|$InGrY;qv|5LQ5lzD_9EG0WnMttK<Bx2XR}U*mBx-
zju;cAi}N`SP(7-Sm5y@w_ge*J11u+StCO?V>*UNEmmyti3B8_PgHM=6|2;ShU)?I`
z%}=2G`dGZye}v|(vnkho4|)3>A#Dj)QoXU2ocE4_V$%_%e3z$r&o#;VN-k`#Ji(Fk
zwlrdC9whbN5>H_+9shP0%{8JFCMil6*X_cKsMB!dqM#bJ5R2w;G~z}XS`WD5ZI2C7
z6#}tPfql0#Ucib<V0Ds1^DLH?(v+e17fW!rXDvpgq$5Z?8ViNuVItHCGxu-|ew+kJ
zt1r;i_=<_PB^W0g1^35>R8h+qq^AeyLd8kaGPftMX;UcS=vkEJ8I%2&aC&~$nSOo_
zCBJ8qxJenb|H)BWcI7(dSKmecqS3T-=@3RG>+&?8kEiEO<(OC<O#h`F!K65G#N;NT
zdTIgcudGCRpf?f{^O3M@3vTLkaq2Vql<>d>i)EDv*SxVT?J{@#L=|^+ZxV8^Z(&|g
zG8{GPv3%wh_<xxP-HM;Me&8s$z%lIX+7BuSfpfGu<{dhOS)oN(p6h_cCdRaQSu*}j
z&c=%S83<PBfr^v>mc0$QGX6Bwx^8i$b8c}`&Oz8@-;3OzQRosC!%nx0SjxPpSH69S
zS|osi+gbe7JA#0Y!{onQn1rj2QlQm4Y#eGwS@lB-@DZgwUl=>RVHtJ#w$i7PF{Get
zNtr5=G<r!0)LkBOl8k$5ziCe^)AVR$wlWPLl%(nQmejGrkNnfi=u_BU^oifZsk{~B
z`gbKduBXyle8(lzB&aaf_KIT%7K9p5no}~(kI<$&`<K#SsvX(at)yb3_jsB;$UR;R
z3i0WN`OaEWuvtKpSZ)7+)yhV3AINa?Y?2I^g;bKJd3lvc&-DdL55T297v*2g=<vGZ
zq`7|>(<;0n@m-TNZXKbD!EYpQ-2=bFCsBCH1oBD~Q1v|ym3E^^qTP&Uev+pvH>Z-8
z&Nhm#V;V`g1ch%1fW3MzyqzY|nC&jOzUMadeL^8?dH~Va2XSBgIV_i5fPK#-YI)QE
z1z|(Vu=|X7%QP4*N`&~)7u@%A)hL;n3CE&K*uP;h4$dBg!Tvwkwa1GdEEJ((<`efy
zDUhc0H%y-9fNF!Km>Kj5&cO+=5^2TBjw6W3SODp#rchY%4ZF%D$l@ap3SZAc$VnOd
zj-+Go{Ta-?&X`D%htRt3hn&N$?94a>HTM$;R$l^@Vl})<ybH6})5s#j2phClLe~2M
zE;wF*)ZJ<rO*=%>5(eSLw5z2~9r&C>H1z!j<=ArMwLJ*ValQ!Em`&rO$5N)TGhLj@
z>TK>QX7IO?N#ivd8k8f7TZ6@Y(HIIij?lddkP8sU)`C1Nl}W_j{1W(N+QH<dALN%X
zM#%6Hq;90ZYknAhuT+PX)HDcdMR6Lh-JpK97IN%7{B5`to0x{zkQ<99anXno65zZ*
z7^Sn>*~i|I3-vwRCMP$Ls~hfhJj0H<M2JK#hu|b1Vq&{7J$yI%mqp<0{)KpCqQf$d
zQ*p9$Hp@X~!z$qtqL<Er+ZjC+JU@+5Z&hhLmjbPwYw)>aGyGn8A$H7ethnNb9T7#?
z^JYFNH0qM0@DohjUxMPm66h!;KrBd^aF9oPO?Sf5s|)raxkyPGhdC@~Xr#V?ycVaS
z>v1T){t+Vm5(#=^U`~!Lxsb9^g9*zPt(n$?jO(MZ@%}S(F80HNGtZ#$UY@%4SEEWK
z2(~Y!>Fv4iIJ;Akpc@5E#%OpX96|z)peFM(auzYao3TsN-_;@`#)Ay}*OMK~lR4EZ
z(b*ps2sStY@yrqopIn2$5FdCu&&SY`%V_?X3#BhCKRId<tbCdF`+h&(-C0G-d!I6W
zW*&y3n_*Z~hFkpaptCP9q3<L-s%$Wlv3eah#Oa2qCOK4Yhj)cF9;N9})KLq%n{<k0
zC2k?MVGv(bZ=wCyNBq_)Ktk_RWb|D_Vo5fdjPyvorxEX@CZRXI7Ogv_$u@cvbu3sy
zHV+n3G|Np&=G7tbwGM@jpGIywnQknRkC2@~@XC3F+JD+8ez2A0+p4kmK{2AX|3RPi
zCh(riz%gtzWzANgEbRu!)+tc4C-aCM{&IS^%ehCW#4ytjO1+vP%<`G1rvd6b#)izR
zLf^XeSn$RjRU^%?bF3JN`_$lB(+<`%oxr+jchRuB5ROS>=;`ilu)Fpd^J*$k8@+|5
zT)B;nb2DM5t3v6rJPf_4LU6S-sU9{XiQo0`XBy_4v#060hc0bjV?&|OgYluM2__L6
z(6?uR>t@g9<Y^aB>v0~5N14A>KZZsPO2P8v60A6+kDe|6q2kd_JU`5X=DK&>!!L43
z)Ck3k&k@X{x5j!MVHgV5AoQ;yEr0S7x%n>GexsdRJm(QQ79U}Z=qQ}Av_au>Iozn2
zi!s+%<NV7#5OFX=<GY1;-#!_Ub(i3iqJ<~!f4F6;y<A!NB`&w?EUbTdVAQ@n;ImBZ
zw@3@DUwRM2)oB=H8n=k!7;vvg;n`>%EPXfyMP+kvIn)|mhEf>%=ZnK7kFdCBB6{3S
zurA&VpGGf3ar1cuXLfQTW504~1C87z9b-DV*9`Y<;&BE)xbgQGUud=&r#;eecB&z+
ztWv?;NpeWJ=MIN)(cHD4?VOW#J$x?C2kI?RR5Tl1H}x?tF#xK)L)_d|j#w){77wEo
z@pAb`uHPh{)7iKjshtNgE^rxC^R9ChH3<mM=HP#*5L<m1FOejQqD8Y{<`My!pb0pB
zAs)YcvY^zKj^CHl(6;d&yyKVRRA2%swz{EEX*?}048)E&KitikjY<2)Vw2zv5~eOg
zaM~@*oUMaSmDBjoH=X4a&oQme8pf8Ah<a9p3+)N$-f;uZI!@!I*=0;K(50b_v2=iQ
zVLC4JSE`~hZE*n8^lu|vQi8Tzc)&bQZ5T=)#w59~j7gBC9j}<?uj-G|!%b*7AxrTW
zmXK>}6@Cof$BpZ2G48lO6^+-zo&LiR_KRoz*D;7TG-33Pi@4e!gyA*vlwk1(YpRD3
z(A)-P)oO^uJwc<L0StoeDE!NHWUf4ey3manI`j}B?RQY1xtjLv6TtP19o*`Yace^l
zqQbPvc8epK72SvW&a=q4uz_Z$+taF*#n9{I(XoO_bWTo`q`Q?V+|LbN5}_!bEQ8wy
zPV~+*0^IjQwD29vXABhLqw-u@KKuZC5602LkO!!h(W5KZG-*%V51ecbB7<=(_f+mq
zW4cQy@ZD<iVE>k?mr)dFf0TM+!%1Y965YQ13=7BpV7aFl80wBAFRgg`aB2zJHYd?y
z#m(gXsD)<c>9Ne<2m15lA?~wUUUKCVWNNHNbx#jESUox>=mqTgVsxh?ACc2{Kxl9T
zRk1v(*;H3@um6O>g@@tyb1&v?c*9M9t3e^A6`1z)BnDdA@J4$RT2G(A-x?Wuqqm6)
zw|nB_=+{trs>kD39ic{HVe(ZlCBB?0-Q0SJMjc&7?N-b)Hrho8pD&~4P+6XeQ8QUS
zu%To133Q|K3mq7?<ryjH@V=N-k>8pka=!bH(t4SXy!1C!g>0v{1^zT+-x%J(qU|^o
zssrPKNC++jV*+Ec%o^3ndtCvho>>eTy9B7_&c)-<saW5ViInD%_%Kc%9d8;rCHaf!
zc^V8kn-fU-?Fm89NXlyXjeBbvP^(}A-*$qDY8D!PYSNNSTjY#!r13E)FikEIm&I)8
zgyef1ZpwqLnh(;ui_z^_kC_p-@NE5E^h$c**V19G?`jC%E@7T{;|-)%reM*+G{%V5
z!&pp>1kW0<OZyDOviHMjbrCK{hQMLObEqsj$$p)J|NO_%&n<m;sT2ttNk<%A9F6<3
zamZF}N6fo3us=5f;fg#|+!#$Cb?m8FMTE}GU60<KQ>poP1Iq)xLYLK2wEfe-(Sjnl
zjO)hY(g!HrPz^_q$4Hp3PWwi#po#OpLb#(9Jk>wwP-WVS<UVTdeu>@-DkO8E3!+D*
zX||dXZSLPnyz?_@=+tin8tkRV@(y&cgYgyzMp2jZ42)W}luYZFBWsfh1UcT&NZx_f
zT`V6qT*erLBe*+j8^~=nJ}$|@vcqF(;1<hmx7^^2wi!XC?Hy-mFHFhf(r~|ko$W0)
zSf!i_y(=qV`lAuOUj+F1d=_#U!#8Eb3k-i;jja-2p(9fZ74s%6zR-f4O^i8Y{Zn#u
z6xm(v!JeA|RCS>WGL7t>2OVL&jR@Qt{$RUfE;=<I!mnx;$*$9(nct0RV3Z#1F^QnM
zeHnPUONPW&W<Xx%3_?b)$7fcfH2M9*M#(J9pZpruD)lhzF2NSLOiY@Z4#%KIDC{_m
z%t%WlTz<fzG!i<0g7J7uE0kD|_Fz{b5{t4i&7~V7Z3?0Cb1cd-FW`e{0)Ch95%4Ao
z)A@(7$Nwnic0I+Qbqzf9BCs$@3MQr@a3xK0Vfp3>y&Dlm0`w=8a9NhkFiD<3t!Ixy
zsOdCJT4ON(<x{-2D#Zxn2$*=Xnv?0oC-E8`Qpd3@|32RO_G0s$vk=ky!E~1lG`O6C
z^MW)y)9D3`R;5LZd;a=-34V(({UXr;|BY8>n&xMm?5@C0r|qbT>_BYQ8A$EV!LYFv
zIK``2nW9LpKUtr^{*GUL>DZ=n8^<>0;f|#{C9xiMao~9bPU?c0YcC8e7=t%lkFGnx
z2xN7u`Zybyo<2bf<VMmCo;D44T*G<q94wbFL&RlqnkV0bt$!q_?tUvo-8{)8@-h^j
z-NR>=fv8$Hjh@A+lCAwq#J`PzDbq0T4C~PvX&>6y@sIg>r*X`q4jUpKVV7+Vne+a@
zlHH5j-$&BCnQmnBFpkcj5ht(r5@cYOhKq;9DNdmSOYV3uHu(WewLjqN7iIdC!C1s@
zUz!zOjDXGx2+vGrwIbtfUHSBeWqTj8IVjQ!z`tMXX->Zt<+feJ#I2Ty+5HanaV(1y
z@|8|A&17>`D-368(+MMg(jHR}H=~<)CzOw0u3J!H#eBvey)fOJ%jTk7!VjZtIP3V6
z<_8<v5FUlq)5M{-ScK$n#Zv#jNi^UTM5#3%kYarB)&Hc)`^!hXHc_Um3;8Iu%E$QY
zKk<b05R@lHe_mxGEhh)-7?)cSl8w11mZGyqhD=Jraee$Br0X_A+&Bxj#6q#*t{f8f
z=+VdNC(svGjweG*%e{R8^P)$SzY?F)Tt-mhgnCqa#KOMb3VkM8kX3F%N$VpV%{>D8
zeiwP?(jlASh1JUW(4OcEjW>;$=EK8}Cmb5hUvRfCKj-uxU%>b;LPR1-xa=2*Gb#^|
zdbJwd#tO7t`T-e{*(k_93hB}_?7Lt9-?!%&zc?3HoFmz65q4HhF2S|sf4Dh$t<c{U
zj2<>G#Orq%3Qy}p{_{7^!lH<~*Ln^s^qAJt)r9`BEAV{nB8<7xf>^`jn5(Hz>*mR0
zVU`~ro*PZ+#*1)deHzx>P^CV-SCIMTkFddqtT#B1ldJDS=>Bzt>1Lqg=6A-+<e_q@
zBz6lKA!*bO(#>eZm0)%9SiA=(?ub$P-;3xT+DJKwft;=aC9-^K(XlmHt33kAGBJ43
z;sP!6xeyjzh&^}nF-Gwo7y3g0v93Az*6EJm-LtW}ZI~PT@-Am)`ia}_>jxX9U055o
z8DFj&vpPZ*H`)B1OnpVh#^1-nWzC$|7akOu-uh@|JJ&mLAB0;^!k+QX!|QK>cYhy_
z_RPhM02fTnKMaFy&!JHq15H-jBi#~OkE9tNc^U5qs^IqU6!Pw?!=33#TkkO5L2DAl
zGA(iJ8%y{<3We7CT4-6#!~LAyxDfLc=h8Of++C&(&SqNXabb%7kJWMq+3b#MAJA4+
z3YT9Icz8sT3P!eY6)I&g+{-fC&3cq;poqQM4<XbpPOg=sDE;v^1f;uTgYO^CpR=XR
zcgFN`hA5p?8G_-hYna=lKp)Z%Qt<739A=zanRPx+ye@`S&v_cwIf^gPL9dGzwm%c*
zx!)YXt|2!vs=NY$tpxSt#?i~S7StjkLDnowE*s9y)4D2ziDaTv?GMgPn?%1>4`9qP
zRZ^{6NRID5;8Wil+?lIGPp+m=<{2m2`Ad~tSTA_HY7AaV3?cERHKj%ULqJUd(=`n#
zZ0BtHG{&2TnI0}yzXrdUFMa982l}vl2(89`WUpjTV+VbyZg~K$S!qX#>ki|(Xb6Q*
zKSJ&Wr|9{%bV`X-C#?xr@Mw7?{wzK~lCc~~#_Xn2Ss7ZcIgZNi#na}^OX<hx3i|YJ
z6x|&*B+uPZIDSeW9`g%vZ9^IE=!Y<Ny9y_>t1+aN3{RuW_}4IwnjF(8>q#^AOC`ej
zz$LWr6~$J!WZd5;NBZ|Gas1e1^sxS0*-e?8t3=7p<{RGW97j*`JM6ry%A0s*6FFYC
zr+wdbsWqpDBHhDi{azc&J9ie|-8aZ|w*iTT`q7UyUnuq8ShBEtMh+xSpFha)V%E9R
z<&B=C@kEJgLL=ycXCux0V8Cl0T11~#XwbCntC<g5i(^Acj6wZ~SuJm&IxieN!7Gft
zuSAq0L!xr+$jUg274o8VYv(k2L2M4oS5bW8yCb6ZA{B3MNAgigig_YI>wP6Dd*XS-
zMP^g#H)E1nz<j3g7Q~GIg*l&ep|&{^aVqyANVyH2xOJ3L`42ABJJ7n{4waEN@mxEA
zX6+kKbJ7Cgnd`?gKFe`sNh&fNi}2_L+t7CVCp;$&!`VfI)(nrOJd=k|wm1&Gv?~ai
zehNJ&V-R<!iSZ3@V0!K|{na+1t@pghGP)1Ozc(PMlhtG{7jedc<q7OKsE(C@XUAw%
zjTNE|FLh~hw;I`)AH{~n_N26EEO|#c(@w?*MBhrozN*Qz{AUpz-*6PmSp6cswFqxR
zM^V4jX!2rNjS+jd<Hso@YIkcxxaU|Z`=?5uT-mvCN}tlRMv`wsFk_y+AZm>mUECgr
zfZQ9HFl<WhCDwH6$y_qKS&U^R2Bd!?3khY>81#rhvd9g*%uj;YsXiobJcs>?!N?n6
z?83~cw9cfEWq9JTnX|+`X*OHQNd&fm0_-bgd9}M|k=~~dwXF|OqCSG=1tucWKOGN?
zJz#n=7}Ktslaf;<R94BbJ+C|H*JgdH-l9ZzBw6-x!$)+Cm!t`S@l-c<A8nayP9^L)
z{vB9|)_Hf}aY}}q?oXgW_FjKY>V)Q$Z+Md8NYb{}w5;|wo*H`L;nu$hV?Fh{xKykc
zx5m);SUgTBgy@M6Q1y(%q_u|7`Fj*5*+~$%UShM**nFvvnuz`(hM{y(_8u{I&vP5R
zZO$WR@&pv^sKbPQrr)>!#5Q+FYzdo-P3Zw}sab{2WjfgW$`92e)8H6c1W7hC#pGEQ
zqUz^iU|TKf`gY@>g(6M-5QL>-Q=z&!1Obvdu&PhT^g}FvwC4!R8_dQo&nld5P@vbF
zq9Cnag6IrwSUvxW*G2j?w5kPNo!{ZMM4DcEC{kOyJ0&W9#*Wx#$ZhY2s(|&~Ri{XA
zj}BQ>_hT;0M~Q9pz?T`0)ZQFMp5OE-?^PG}d4=Ii;!0ATIfM+&V7RJ$!L*!k`Vv@$
z{o^b!bKz>7`pfb%J*Ai<b`@TyyHL3&4@oTRS^L=)8h?r*Thaq{R=dvhT8T9_f4HL(
z;n-By3bjux-Y>3!Ur!5g;h{ci7Jo<cQ(!Ps0da0INMv(M${N;U*mN$wvRr`z^P0a*
zRi=b98f3`Z2u;H{#HcW>->MNC4LRDGmjO?=8iZ7C#Jl$jZ1GD!uAdPm1<^Zr;V40S
z{R(hSyaL-sKgN>ELM-koL(JuaP&=Rn)gLjq@m!2bm(;^^Is1mR4xr5M0ft>3piMpw
zDNTcT@AnzX%lDvanKVtW)qv^qVcZS;1+&Bzq<B3AE~#aRSF6Q;6aGQb?;YyKviVk(
z=TUoGm~4$M;`E+g?0nb)Ddqv&37w{HXwnQ*8M=AQlJ;IzAl;~?RPKC;T*f=l)!Cx#
zeN@8cxfdw;!!#=VFNrj@f)FZZN#Dii(tSHAUa5^gwd}o#pId~<fai*FW>-1=lc8Ab
zTZWvAM{s>dA!1GiA<ZR}dF}<A$VPYM-I#~@j7JsR8H0$hHZIlnEf=)ik}&{*D7&#8
zyq2kGw{yThhlzN!Y7?xTf}ryJG1rrBi<G@CNG)7|SG;WqWV42rhRvrDCS{oWYZ&6*
zsqpQv#|+kwX_=Uk+wCY!*qjKH$xMcg)J50lY?woal2~4;_1+OUxIIEX^N#)F6zPcj
zeU`=U<=PH^!-sQ=uww8I*HEwnrW3Q_@Z~@JVzVJ0R^_vvzz535Cu7U=U3hTpFrLa@
z!hd{kxK;e)l1$HY$rCO?@fza`3ev&rSHW!UYdGv4i>EB>w&~;^ydN<KWwvu+yr}^m
z$BfYEGZoM4gy?K|2Cgk&o|?NXITSi$&&6a`=cVAa!epHFVE=b_k3jssC9Xut!t~e;
zj0$`XcUQ(moYP~ooYIl}`!$k_SWjCi2haYQ$d=ZE{jC7TGoM4<zEkj#)rFzkSc)wA
zjhUyjpj(oJsly?d^|}NtOD0g}#U`Y#EI}Rb5vMhm&0C7J#%KKz^sT-QsqcE>dhQq&
z=s2PM!3b)d`5XJ4e{!=v>C>`z%1{jo=QLG2xGXlSO*ljdCF>P&VW~Y-?n>b4*9n+z
z;sBY#sW4k2k1U6Bjy2)9bKIGmc;YZ7`#GY$VKY__Ug0Je{^L}nUGN}Z52g;|G5o5H
zGcRr7zL`vdtbYP$VXcJGLT*^vq=ZE^0r<UEh4n!HxJau4j_a0&bd?b#?};M+Ss?l%
zleps{@mxxeKHjizp>|sU4At_ugzV8!*|Zpw-pql>9y=UOPT(9{W~0dDCw~0R!l~;I
zP_}Y2J#Ohlp7lRmJ^PuyepM#<-$8gL8w)X+G4y!acWQc)K!tmjBcYn*O(we0cB?t`
zYOsN%jlQGSY98rDKE%WFGMeQ7hyDMub1*0aHH$LPtJTXH|4M?m`)T}gw?$Fb8f=VP
zk5=h<7&&(W8n5aj`_vKq-g^wYMSpU<pK=&1Q)W8A4#*8e;7}fi{#peLS-W7*%qbY*
zI~|HYMDb?3I@Zi{#n|Y0{P&E3H3#OQttSyHHrBIwFOsyn%K^OsN$^RWioBhYw3qdP
zK_kv%P$3el7G1%E@&GP1NSHAK_n<w^3(Lc@FyOEdHrzpkRvbXujLSGRVHQ5UID_w+
zEC*c7eC-j%2po3}w=DOu-f93JA~TSl(#ILRWqf<=c?`Yc<IkAmSQ6!k9iK07FH&xD
zdG8yL#QM$Ib*Gqzr;T97&>Uh+{VvgUaQab=K=<dI{GEkZ{?n0e&RGnDl<g2{XuzEH
z*YMXwnno69;FR_>96I|OXFC`hKl&gY{Hsf9+q3cN-7&24F@{^=5H>i-uw3y7YP@g^
z-Wdn*bo_U;u*LQEzg@`awHfLK2e5MMC&*pUCe?;BOuz9bgL}Vlq)(j|-pZrZb6(?Y
z^>cWNgg}RJiD}*Wbd7nxb9djsDluP7bu4Eya@NwTnhoS5w;tws#gsS{gGJ4|DdSWR
zzE-b*zT#q9D76pC2KlJ(kAOx|8cu61K&J((eKg+SKpLTVlRuW7^k=gj69AUSs2916
zYnxqQ5if_0EGHCQ(}|a}USsoy2>jZ13hA>~;^n0(1SIT-m0cA3#;TCP+^bk!p9B~E
zI*iR9Nm|d{aPz;J^!}h49nNNRZHfn2j^36rxiz??oq{`0S^m~?HdLE_aS|<jTDEQ_
z(fL#O$D4`$k<!#Wuz)hwO`*-ZCgb&?DyTo-1nG}PwBR|LA*dOP5&oN**6x6AZ&owd
zY=XC3G`e<QhsJJ})wjNZjkAm3F7XBi<Nm`YIbFDw=F*~XeK_LKiF@9$Xq8Mv6rbhM
zcG%KN<8v4l#cE@}YtXaTg{yZ7cUtg|3%cO|jf(Xc*M1GE9&R|4^@QX8sUdc<6yD~?
zLnvtx(^)oSdUgWm*L$7&d43-hZ}9N$)*N`<^+m0mB^=k7;n9zD?7gjvs?USmhSKGz
z$m+o95&DRpF%!M-DiA1Od7^F?8gu#@TE-qg#j*AHIcoqbg{P8@VJ6nNox=Lt8JPRu
zV(5A3(Ddt1k+=9Jj^A^IRY5XLRVUES$Ch;FX(4{4*umg-G|csRaA-DRbN3=(7IPdo
zx28d*@H`Zr{)F};Rpb|_u(=`2Q8PXhTR&GIQdkpP*FWUkTutyVZ#KCO)`3(*aQ9d(
z_wA!7t(<oOx&H~Vb+B4cd+3Bq{<ksbz<zuTE8(17jbK>66JG5ZY{p(U{+7ldeB>Cc
zE;|Ur*_zlqKNhuTjzHAEi3{=f!(Ww=@LT>7N4i4s;rKDk)YXIi#eMj7<P7HR9ZzvH
z((wL|9QOIx<7M(@NNwGPQlmrIqdo+?Zc8FD2il@mgS*XP_%JXH3G+gcX(vp-=Y4|W
zylMoQyQ4VP4O{hF*^H!esJ3OX?0GpHCcnnBcaHS8<TjS6AI6j>3+xTgM|?u|RAHh2
z>&Ip_gM}m5Pt}F0yW%%%?uvJck5KvFW(BYJTDQ}9w#T}y@tYM45<UL!KPM!l<ZEB4
zAoGIX|J0!JzXf>}+rs-Ry5c-48$&+vR;71V^p?n4{ECaOkRErl!h2Gld3L*~#a~Z7
zi-6@@_&?`ZRb*PNHV+&*)*@-*CG-Ae4=jxR4x7te>ZtTgyHatzcBIA9ost%8l<`8<
z(EqtEwg1y~{l628|L4j=TE_7M_$|DXlVtf4w&!`_6^4AL*q!{rO#<G+{si8u-UIxa
z@x&i_`!s*=(0KmLT1kF(Ofk=P-#T8f+8(}hW)Ck?a~uCr%~{^?`Tcykt8V-Vy)FE@
z_GG?S>KmSQ>t_D{|H@SV=gK@hx9#*a_VM)F=DFqnyJbR&|Ia<^o-fLOWxugv#LTli
zzwG(uhuRkKXUt!3uHCxVe7=n&PyV(xFQ&6ZAV|Ab?i=aD?;933|Ihb(spl?rUbgJi
ziXS&)%?mXr2<*GX1m?RV_@Vme%yX<7E8I>jsTjJ`UY-}=$)Eo-kbk2fm%m>kir*jA
zCQz%IC>VVGN-*^5Kl9SJ?&i&6nSwD|UwCHwOwH?3WzButua&Rkv;~(ANAMG}9m@-c
zCshp3yjh+<X=VAi1yd`gq<9IA2hTL$`>jtfL;N#eM_OH=`s+z~&EMPRo5$7*&c9m4
zcbnqJdt%+jyMMot|1WPzg>BTNinPQwf!9?d{x&BKe#R?rzWJY%<%XB0u!)Gm75^1y
zmybEIzTCTTn_yYkLB87vJ@XlR*=BE#4E{izO8FU=<kE3tA5-YN3ci-XXn}-OApg+c
zNWq0o!Gh2m`DJrwEajz?-QqncSjSs=B~x%Uq>zp76E1)0)XJ}}xX*L#bP&v5WzQ4T
zUEnLGN(i2x5UyAnFIm1yW(x1(t3saHn`A-fYH|L(?>_vujbePg3E{kiY#sh0{nPwj
zMH9iQtCi)am3m4q3nudZT+}Fc8=uAZ>^fY2pm7#2<d0&-Xth}a-%U1xtY7-P<6A9w
z!)G?|O~xygm4{s88$Asv|7f(QOm_WFo>qvIAi+<9zU>Mt&sw5nZu~5g=X0rrue-y$
z>}r*hpt(+z|4;Y@udHHr#l0;vd2uTf%hyyM<4<<A5loYxB?y|^D3Iyb;g6}DD_CY$
z&R-t+R3M!w!Mi71S*{ixAh2jPt(a0b$X8ZZHNPY+Avm^ht{~;AN%=`dYu-@}X+h<<
z8|D8-_VM<rjWOR^aawTjrHdfY>m~n_)MWmG=5Yc+W^DQL`eSTNW(dEIgaqpLRb1hP
zCcdkFGJnnvVK%n#SXn`a9RIrbVZqQhC!Shdh+s!h6n{pV2``9EJD;tr!naWJ5?F6u
zTi$G!N~xFB1k==B^7_@;=CZGu{Nz=t{6k~h1S{XY<yE$g=UZ(n;XC*`nID|YEC06S
zmOx$o4;BBiswlKID*djRUH(+1qWpZ#VSY0IeEFE-IDy#CiTq{5@5?tw%k$p!jy4}W
znIaGy=%9$Eb%KVq-aM&UmjotNWrEEs%J`$><OKKB?09Qrqsl+5{KUHvCROn)BV5q2
zT9&`N{B3E*@Ge1hXuF_fS3iXuJjU<U$>D#rQWp5;-{Zxsb>n|1P2si1#t3f2r3uWw
z&*o<&Xwbm8gXQ_^Yx(n5KP`JN{hn`Iakl)Ioi4xY+N<(|4WWWwg<-*C+wbMwnzPGi
z?b%xH=xf6F>WdQe{g&l5J+0%V|MlR%-TQ@KTpUq8_NYBCsb0V0YiK#&WrRF`X_20w
z*2s?z?X)(xS;XT#^AzI$YjfuRGt{Jt<d^($^Md()hM(C8KpuZh?Uah)zqx`JU)#%1
ztybZ6{IC(cw$Q8C+-Y4g!AZ{ikm6;5cJ^1n?}j8Ee{O={&gRr|<Bbpbuk?=a3vz49
zJGu_>w{Mf;8>o}{UC0RP)3SN<oSzEhYeWS3QY`}gYZd&B^FHxZujlfUS19mDKU~Ad
z=2*c>(f@eQ5+({Z@eh|<Pk$;H{v<BAaz51D&|w1qeW5+u-kQcsjQ+=;`_NtR{@?9#
zd4FMncjLSA>g3TCS0bIv^^}4IYTM_NlXE^_rAC*(Mqa9H`PNbVtMbYM@7U7vXcs}L
z^jAB9%ERlt!2C$wnfh#=LxzE1eS(~zXU2c!#q)ob-wqWiuW0k2_nXi2?#26*-!)a_
zi~IQs4AqVc{@l#s&yWxZMhV_h(xACu^tp`k8OJU74I@)|1Ii->xi&laCnmfu*SMI@
zv)&QUyQ%c9{8H=N@^wl>W&I<6lh&0$S}xbkEgUtOuN}66_a)OoaKLvXTPY+EtZo=l
z{*i6X-q-P!r?AXl@V%TpyFwO%T%Q?&l;N<lu!RzW{E3SEZ$dTvkQqw^5wX8X=dg*O
zWoHCWEOtz}+SszP(KZge(RQW0uF*&N-@MQB+_%~Z8asCJb6;QP<tX<H?wvo((;G8f
z-dL+v@rIKT?6Kcn-hKIe`OHWDg7o*@0@=f|6}J!W;01dLc;k{z@Xb}&rk-hGybJ2S
z<%iFIET8!@S5VBhMcsWngTH6~h4PG9BK(36-h%hJCk0gxh?n}mD7x;joZdIyUfNTX
zghHvLdY}75MMFqgkr6_LB70=CNrR?#(LyEBIM00&Nomm_DI*yb5|xq2SHJi7=lj>W
zuJ=06bMDW&KP9@Twf!Mgk8)s+*Qer{o@{!KX(TRr?`UgEEL~R_fxSiX$h=g9K5c&-
ze{+s_JwL-c|5OA1>LtN@WgEyk#6VZ5F)j*_q+QAUX6l+B#8*cSny(b&d(USWY`Py0
z{~ll`KMaOL{**i^*#K^@o9SvDVP??%0$3Ld;n|1?EUN!N1HxswVnqoiH9&_Ne9ORV
zGSS$#@dRicH>DZ|58&k?E!elf0p-?YlV<xm6bkU*IQ|O5#nMx`s~$g~^P3?)UU7iz
zmMsN=7uj6@5)RvAPQbUF$#~$OGWTHn7#5B7;O!L}jCgY{jhMu5%_F9eW**GA@ZaLm
zA_Jy*%`x<U6c3+EKjHzQXpTM=;#!XcfQs%;>MK)*Ymyz9>vzKW|C>B#v9TC(Ryg3o
z5GAg*=@;GU_8zBSyTVx6i9mHo7nL101KCgy=HQnk%#-y)*&c@Tk-HAk^Hm|*Vg$a4
zh|vFn1i*jAYf#NUjFOBRYD`<lH8}Us=HMyJU*WxI{-Brd<d$HLpBbTw7mtziXJzq3
z5#pOmStKO36AuR`lIfk-Q0G`E@280Y#vVIQ{bhp5qMOyY-gzTak)s3;=^BiPzfUDb
z5~%S>70?;cz<~|>QQ=)U$baNVbtGePKw60oJAET|i>H?*UT&lR9Tz5_tcrQv#j2>B
zHw}In7vp`0TAslVc_t}*m>w#ciwEUi5-0hkR8QpvSt`b{!-vB0YMcO8xyfRW<WVB-
zdW)Qre~3wX8sHdFOCH;Mg5tti%(>%R(JJ%?#QM6C2V#U{8Uv`K{VS5VClANdR&g!j
zw@5?4A4pV`;54r1;i=hv%vs0HczE4yI=^8XMi@zQxhbLOBExSdo&DY{Y+DWP8vH|b
zS6FcolN-S<bO9-MJdUEFri_<&8r`aG!an}B4r}jbg0s8?O>b<)jQB-38Y#=Wq3A+=
zHuQpyC4*j1*OSj*_uw9t=X7>t09o{&Un%|PJf^$thVh3s*s!{i#9zyS(3OUqQvMZm
zy?PVcecH+Nkm<~lODpLjrUJ`vN#U&a6I9SO684qdM3Km!=;bB?dgkhk@}b8N?wdsJ
z`ln%b^eX01;5{DQHG!*DzW`a~a^NLj&;JI?F?;G#cvpSII8_@V`0?>IxUJLTrhk|P
zp5HW}z2ynJ{@Eqmzw`-KwrX$&wtvYMeGZqXT?UUeAJL(O|L(31!tD#BxS{xE+;HMB
ztnm^?+Y5r+NBv?Fzj+rvd3ToR-OB+g@DHBv`odmgHJP9jmF({R6cR`LaQpkubVc+E
zEP5u!T~ywQN#oLtw~`?)>%V}z{=R_Ulcizr_hH<!v7YTS;5VM-N^m=bR>Gn5*QkCl
zlKCj1M5<$ixJ%s$yx_qZ@Z@qRW~YRc6A`0S>5u@kQFkUbpEqUZ@Ec;xl~gf&qcDc8
zA0c^j_)X8PdvT`uD(H_dMe9YqaMw?P*)iXOc(ux5DZ`PcO68cFE(I2!gK5X4<M?Nz
z5t-O80vj(+;(T9AGVyLcxL%@+JV*_PR}-5sI^2}&y*5fOymH2$^6QxJZbK~0oriH1
z#!SVDK`MK*0t-7>a67b&>GAyvWyg17t)MKY-Qq<T{Ig~6O_M<D9Rc_>-J5FlUxHoX
zRamlOJKd_U!|aurL{c=GdHwTJ@nPZ&u6v0kS;U+oTEA`*+Y?4Kt8oIi#dSH;;~7Ky
zTZGU%zktR>T*jQ+FGz{sUApSd0h;Mh42^lZ_`C2t6uq>d+Z|2d!>j;0#t6_s<vTR2
z!JM3qP=HfHgLKhX4R-d^9K5f#5QS6|sp_mHoMz!2xOh~Ib21*K&#c1m;;kJ}aK@7K
z<r?C<l^-y7`2spET?M|1rQ#9G3UFvKBj@?;Zr@g2B2B#An57s;&**6}uV-fxv9w5f
zQB?$-{=LE@4@cQ>enaqS6h~LLm2lLy1AlkLqu8Iz#Jwh-=FPYUYaZ=D1<wPJCR|Mq
zy-=b%v~IGyj~rmf=cUoF+Zu^wRW&-fI${hz>JSlPj6UaQaly?SF<nNSlk+r$-2JNj
zx!;J0Y)qqjW~kx@t>2`oH<Ho(+Duo;EXLulo%DUfGkknFflItJ1FEkH;=(1TKv!9Y
zv5&Y1dqq~jntfLA)p!EV6;FZ}Z{<K&;ULog((quHBlr<_Zd;`x<G*+l9QONbw$6Po
zJvX9BFaO*EH|x~kh0+v$6XJZ3u$l&i(qHl5-(wi6X$qS%)!;$)1guu7hlw`~pn9<>
zIr|sEY;zUtS9*-b6%BCRCkW~dYH-CHM{safrRm2cSj!+4W~hD=$hRJ*_O|EQWauS-
zmy42J{-&tn6iM$tHRi(HdvTA3JWkbf0p-jroGNO`Bt$9m%G9c9W}zki9!!LRt!c2<
z;ufB%ox=#vkYv6IuH_!7m9rlY>ET?1O>FYOQCyxLM}0H05km6Ft(u=Wr1c+-j9pD1
zD<zPM>Do|KZ%vLnWN>a(X|P%0771(k#cx?FhSCISEDQNS(-<k{r$->|_S}yT${)aL
z7d@swa|xaIxd!G+Uqh8}A6(PC(=69ZnO<8K2Ip>G#2KH`X-?CA*uS=cB;Ii04fSxi
zqB{`xzYXQ~4<9DoR$uV?F+qm6Y#oRVg<<Ev7_fY^hUpg;;}y+QM5kNfxOeY<eD&oP
z?k-ONq4j1k{O33CaMvKX_)KT!ZmNMHU3;$M(rIu%;9GV?`xabD4+mK%UuHq+W-K-}
z1)Fo5VBSENnO@68NQ|*zoP2VzpHpQ5<|uL!t}m#<&Kj)yd=;jys)xkhOCY0l27@zK
z!Fj(*$P$|f4_D@a{fxETwfoW-XfaAsG7gec?&92wA_4sUc9?GOuK+nw9!AG%urU*4
zn6rKdV1fG-{7apn*ZvDM-F{Dl4w!-3A9WIQ`7qT}tfD$?x{PhQ5^Y@859+4^xr1Wi
zxMI>5djH95C|Btu$3#S!bj1QxTp$Uf$*MT@V>WDgJ)djv;x`meD<{MD6-d?2GS0JX
zh<8afT>CK@SNg_ces~i4ui-a48wN8mH*;V#ERR^~JwUU2#*BF5QM}u$z}PBf;-L*~
z@PC|r7VVk1@BQcyMDf@8A5dp`h<WYB<LYF7;N00h3?HF!y}K_Fjp;hfL*^rej^jWY
zK7q)QGPM4Z4KqTdxXMmR?ySorkmyT+f0ojW?R5S;nWf7a-un$Nope#ZK%P@MBG0)V
zGKFcDDbPKuoP_E81OErVSt0M)jQSr1<_T{Hj2s+-37ui&PU9rT($53Wd^acelV&no
zCfCy?%S4&NY%NaEaVL6JxiKH-s?)EB)wy`1K%lqt;a*TU9NufqtqnX2leLww^KdAd
z_`D)oos-ChPak;Om&TAH(<Dp?`HA<V8c;RLg7-7!4Oo8drMu^6;-$uwpyRU;eZH>4
z)`xsEYU~Eseq5cYnz04{vx>kL>x{A$<%Bj=9V3IzTjA}6cxG%<7^KdrCanjp%c5cz
zz_0xQq-OO1ZZ7U+c@{~yyw4y1#mCV<vjgbKk8h}W=tk(3x(}@13>a<}$LOnJ_)Q~*
z)(>hk|CO$$)~V}wgJGq-xrtWH&h8efn$2%@ET4l{G>xFW=O$$CN`u2Zj@(y%28q85
zNnO<pBC4mxb8AV1{w#aQ7PZA_4|(vBI0*EC1Sjhuh<z_xh*3`!*>*MvV}&kK+dV$m
zzfJ%Q9vsHe+g_w<elhN_aE7gC&eEZ#0_5yfx#cAp;B+JyS_*m)lq*Q?GZQ*(y#m?4
zQXU=jL)k?`alFt26X>>rspymzfMq(bVDtNLXf3b`>uMTlL-SGAPu&F!J!&!O7M~zK
zavRPIh4atlLbQG~sZ6OMn*8}8%+0zv0Gir0IC+^AClq0d6N_v(*Ey1)tk^(JKDd*O
zj0VORX5*P9Pss0;iC7aB4PV8V!Lp5eDLdmj`F=Ycbl=^jCAB3m#8{HJ#1T{*Ort9$
zCvrcEFH%FZ`()E^Z#+{o9|Iri!So+RD7iwIYuDJ1-aq}|!-JWOaaJodIPOOa^J%o=
zYyb@COF;Z_S*Fly0a_H!2Gt~46#4H0E%`PB1~=N6j;8W|f8GUovTq<?p#}BMR^hgt
zAIvWPoI@kv9M$LkqjF1>K<M*Xytf+Y>cuHE@992vV$>&M=b#8p+-0J9YXxZ46yiwM
zUgB`9g5AC?8?u(};<?oA!~omb*yM2%-+i2ft};rbRnQ*l$Lzq4ivsagv9M3Bg)Fz0
zhgDLm(A~8jBOX43v3Nn|%E={Is5FQx56plopA<Nq6T;l(HBU*W)igS`J&KlDE`ybK
zolz-jCOLKdH4nn1V88Dlp36u-tZud7sg5O}^0F+jx6?$!+gFKC=M(bCa5D+`s)w2-
zL9iiKnXym($-XrRV<m$3(oM+)Xsh)OZ?CP!hZ|Dq1$G|UcV!DG-toa$15dE1Od&sm
zE65Y;2prpe7L(GNVY$-;U@IbVa+55KC$_R_ci%zay8;kMj%5wMMG@7CGZ+z4OA;gJ
za$mkoz_(rE5T1SwzUg_uiLd!EURIBClAlS(OabunDx|ibM7W^1YLqrPM7|eu@LYbJ
zjJva>V16i*)NzccIIJMP*O#!b;*Y?>!5w6tVk++oOv9j}7i@Oi88k1P$Za}eM|&0k
zKAbiPb`gJR()E?xZLJb|J}?k@Qwn+hVP^q{WT-R0xx8tPF=X$}g#@Y9JVmKAGOcPF
zqr3MHoPAse+b_<;D{VYZ`Fsg|dP9uo_U09y-rR;FCm6=PF#?{ynhD+?Zev5AIhZus
zq16T-ZfxlR?v!mP8u770TWK^Iew4%Re%S=wyH+qeFDBCUW?9%-Aj65BRf9H<*RXAN
zB<y}!ik6q<Nu{3_fc#Sk8F`1TD%ZJD@m1KcC6Z}g=8B&;oFS$O7h$nm0{xaD3Yp@P
z+>dQL(W%M?ZW{@3ldC1+S5_024yMAA&RguiU)|&p&jHGdfm?e0I^!9rfxEsJV&6CZ
z4BN7niXKTTOS7z`{x9FLmxkVe!X6bQ_w&s*S{;TTH|H`PgGg#cgJ}1b3-}>#2@0h;
zqu1n6&{KYfkDjeUw}SJ`m%LDvJ5ABo`6`6=Ji<$NhsjJXlYQWL2FFIDfu1hlZTbbw
ztzb)dc`t@K#Oc6BYDlf}Ho~G`>zRosLTIs$Go#jTfp*6$x%;_gaNI{7b3Uyg;xpqw
z*prfkX>;)wGYb1mb7`mAWgOlzN=?tWQq$1O)Y1ATJDl1{Rp&I&U#0FaG;JYiZ+k}E
z#y3Ok_*!<@F&8otZxXYHzZi8c7S%HcY51vCSTZ>s`qV7&n58oL@ni_J(oHeYO&piR
zX+VbSAn3NL!jIaQX6^wK>EN;ejJJ#+t2a+Xm#Ho^=B*&=@v-4te;Kkpa2*MYSxP6G
zZb2i>A=YC`DrWBtf{^&HM9z9KY^zJBQx*U69{l}HZ%+>bFT)RHj=C&-IGjhbF2v%L
zziOBhRfi=rHk0_>jxgUKkQDT1!(>BiXq`C^K3^YT-P5h{$IeM;R$pZrb-IOjC*uXV
zF}9mGmHs5@Minq}<t|aP`U29TN9bsp8@}iiqxv?F$;AbCc;_y((fT}Ny6Y{)#X@|p
z-dcl*z7a=}PhBK!W)`*j6hU9h3E`AeJJ5fJ4rX8F<AqP9q<ys&>0|qO%OC2%?qN}w
zD*J)n9-9k-7e1Cn9=i?I4f!PWLj?7BTSiPwZRl)+D4MOk4WIEQG1*_@aAIOS@8IIk
z<dnfoDjK*M^iO`JGo3fnsnbM=VZs%7l2S?f^}gYY-Z*qSI1^{LN6;l@Pe4V;1Rky7
zVV9H_7ACRSWVWHqXjqO}aM=sSt^;GFf0YiUd?ppWdx`Jgm8k!MA0fP>%IJT5$xdm1
zL9S<0%;0yEL_~GrrXR*Id)|9!bsR#S6^I%2JZy_xjw;+?{=yjnP+5Bee=iL~VNE;u
z@>QG>ox;b`sV8Wht0H4Oq|MyaGAHxE8r?GuxU(m=V0C^Ba(M#aU}1>Hf6730UO&`H
z<?<2}256>#Fqt!2j}x3H-~*ow3|t;Vj(&bX+LNuJbe=JE76>vWE>jq1+i;Sib{(&_
z=yFQ$_kgFzIMnxUCr)2xa~5r%QI~&{35I46edio_^7j?q&ur#xI44CmsLf*!M=CIx
zzt=Mp{zih(1RDtU;bALZlL*SQ1l`d~0C#8M;Mu$EkFTPf^LasL-FP|%jsyc;8^t@~
zy%0DX5w7#Z4!n1IoSf>ALC1gd!6tM$?3J;DPoEXQ$S#`RPIux&mySZe)_=T^b(_jG
z4}Rg@U+fBN^#}x?Hio!ULY#p5Byes!fPb`)p!dUabXQ#hI~%vq_zr!Xt~VPMhl26a
z{+~3EzfzAkb0HD9xfp)8JHW3b6F77un3~cZDDu&UstP>AMn!iJzZ`>U6IPoU?7IRH
zIlDo?wjIs${ZZ@41Pp#P6+GKiP%rK#_Md)B8z&rOR_6`UF}3?(6EBZz)FwdXT`fHO
z{yY_yeo6&cdz@DEimuB1%l@F(*q#hM{M-|TmAMh{MH6sS`zJa@yq-?{9RVJ9YRN!j
z4p|j56TW>>X3Po$OD@-q&_|mbVDI)TaQx3(Y*D;{?@q46?x>q|fe4E?+azJx&}-uH
z`yKQM+QMo6`XI`Rb6E;AK=jKE`eLUzSV_OYdC*C%R(${opEKxv?=M@Cb^*=b>M|Rz
zeK-4*W5ZrhZy=@HJ>c8=-T3|EM!5Q9H*RX~BVXE=V$TsHCP$!#_ai=ycW7-HS(0go
zE>Vv7V|pCBB_JMF-CGGe<fg$VO>HW8=#QDf;c<E=_zO9D<0NX(NEAQcM{xUNw(Xld
za7IyBDkeo9%^W7*9Y5ffr>9Z&MiaIBvIhskjftyfI^6$&Snke&Td^<YSjxKlUI!m5
zAr#wt9cTNoV3BhSuE@_vtMm;x8uyo0-czLZ+Go&JZ3>Kk+lLy4&2)M>OQ!7=LpQT*
z-s5l~IC1L@F7}sWwl=<DWp6A(jj&<zcfuH)=2c=`2Z8ha*=|PRRC?n0Q@XW12Spxj
z1D(8H()?{B%-7n6hx%D`j#&$vg*$0r&V4vD<c$$O+qu&R)tIA}*{B^PRc7iDi`ADd
z;)gynt}*Wv>WsL8b^9gOul^+5doqDZU6})pB2zi*XWnpmV1#iiIShMd@tfN#V)(j?
z737#31NprQvs})g&B704Pmd_kKhkQ(9ZKXZMz(Sd$thT|q!gQbOo*(X26}%sL1HYy
z6nxzU()YzV*R9vtYzH3wlH^V&$FC$cfp=k#zb(^VU&v&7e<#=eo5d}C`~as#zoj)k
z!dz&@cO3fEMYWk<aN?&vbGGIS1h*OD15qj1d?XzFa_8cv8BI8xTmu6A{@@ya6m;l&
zR=8;%K6Xt8aeF0h@q1^iG=E9XW`)x7`)%aGZxv8d)1xg5X2M?m4>bG34H7aS#&kNW
zz|m0!cs&H@XB9+*&I(cO#BusuOA21tgwQ(IyEO6cI%+sQ26{i}V)etx_&hy?#@f2W
z{W(E&cY71wZy6#DcdxT5O+NJ4#jE&F^9HF}GD6l150Y+43!IuE4`Ms!LLm(!Z&zx<
zmCF%qE$=D>IER5AA0IV!N6`Z+Z_7Ua^q|2D7X!b$9X~xSDhqbHOYhAuhRur3$X?B%
zebOS_{IibGml{Q$z48E2e=)qCK7rZNxE>{Qjp*LoY&7D%!?QWjgnk}?9=mRMIKGkU
ztq_ER8^fV0V>%UlV!*^a(%|N{Cc*24#t_eoBRrajME@0=vaA-wugEZBdmOnG;oGz@
zT#HHhJ%W-I9^`!20IV`K<KCAGL&B0C7&TABRo!D$DsT?xYpP5~9mJXL7rr2q&;bh`
zo`Szhj+{c|URXVL9F8SRz#M5)Z0>$eZb(Oi;k9=1WZhQw*(^<NO6WmyFHr%Mj#Bb<
zAet`uy9MG3C&MqHX{g5K(wt2dkZH7lU05y0T(;`N1Q9oyxep<Io*8h5i*b*jAfr+)
zgmO(w@z1vP+_J`Kh#I+tHHD_!#u^!Rlc5AzxaS|9dGeRE91Xy@ed|bsvK?nP^BXh{
zNHKwTmEq>l8B`m~ro!H(Flx8W?0r)-9*TU+F4))!yZ=U_*8FDZDoq8;*H>_AtO%1h
zSqIVthQYgA8N%o-bZ$F~GRD*3nAj{>`Ti{QKdyp1P497uMkILOo{qNHjG&=RoO@@d
z0AUA{!EKuf=+rIZ4u-wsWriexe9KbMw9X)7o278%{1nJfY6hXP5t<Zeg*_hOY<KS!
z$kpzEU)FE1d8-WXLiHPv@UJ5)=_u<dT}n@P@wwq0c|>l8J2}2hgZ4}e!*`RH5^uj4
zHg)6_Rq#7V_83e@m)o=HRvkUu;t@*3x?_2+PO{|k&B>&6P7fhwis;~NNKgA!5YJdM
zxYux(w&$tg#qW#3!9|5I_cX9X=&tE#-zltn&r*wp3t*x5Ni5txh2PM<&FuWiEmZX4
zP9lF>44sCxScx~GL}+3KbqJn@eR}e=Bw-VA%1tF<!JEj;C)Y@}Q7`#z^_Zxy55!5Q
zUFcLdWhiegM#opTXsZ*y;dtL+v%aCD;JqjvKIQ%+Y0E~;Hk?y{zeyLtKU@#}-uKd0
z`jcj6-GZWqOR(7kO~(%G!+n?2!MI~Hr@?$iISo~CbzXtV4=NyJ)e`8qEJL41O=rp{
zCgYb`@=VuFfU#BIL7dv7FuMXBwbhuL)pp=o>PyOAt3mJR2V$F8PE0E2LRM`Q828KI
zUCVm@S=mG{*&v$C9>GvwUGCMEGsMtDgmi!Q1gfS1mMtvUTHPVf{biY@edR332yzqk
zlc0Un9HbvR<9Ug*;4*t7to2>ToH;1KL<?Tzo~}s5Ssk14q;mk4@HP0iqH5&5cm};y
z=M5=S^iX@~6B+A~<h+*#@;*{^=Jap~3WulDV?Kg7#rqm6EU%%8ZURiui!1oeQi(hI
zAc^VNTZ$<mQ=ptVNSd{>*i(-W5uX_?AfYC}93DRhiAve{Uh5iF4YG#o$G_p{$vF@c
z7s%K>youf65%^N>1@G*_|A=dF2C(mk=v)h3d_Vl06zsA<4U>MH<FkTOX=o&KXFS2^
z$)f0<R}R%v&-3q)-&CjR8FV#mAnjwFw0|eXGto!c1?S7zpgYY}%OnpZ>k62Y2@HG}
zp9;4+KH^wiAm%m1fDEr41;3xA9-nmRz-lpuZ2Sgg20~~aXvD{X=1}&$m!x~R<Mel@
z@!`55QyGC*a6hV&4w)43uDXeHlGiSf1Fz$tP;eRV$G;OWHDQC<pB<TGM?n%X>ex$9
zObq}%nFCl)<5-i0`Ix7eiR<r+V8^y<@~w0tIEVhiky{7IjNhRoKP?zqL#N<&(L3b$
zKn?VoG~hkSP!eIP2r&UsbihiItUNt{i{_?5gzizQ<RwW=$Dfp;p*<WgOv4}A(b(2}
z6=0GE2+Lfe7wwOMN3|(9q|YJQG7)&U`2pU{^Pz8MJwXM2j44#70X6Sfz#%q_b*cP8
z16JtaP&q|Gr&JUPPlTANi`2kwjHj-27CimqVIVn>*!~bE(~ljYzNcki!~CDT!2G9h
z&HWKL_WI+lL$}FcPYrxvah`nPsX=VE2xAc9MFs;$$nal9?r(M$PRz+?pY+a!Z@-P;
zO1n5WYt3YCy6CI2(BJcM`hz^wjt=2zg>J_M^D}rQJH23T@f&jWVG26?C)4v|E%3s`
z9x4W&kkuMRc*ijpf4tK`sij}oK$95qXT}(H2-ynd$5a^s4;!*gTMC&%Cwl7F3#z{-
zA9W0pY1TCcYwXvd{I)?-k>UaO4*A2<sz)$rpTs&@l;FpGBHZn7|InC=MiSCWz9bjW
ztgk|}v}pw+yzd!nHsDG7M>{ZR<|8!FE{5V~P2?kA?@l*;P4?Z8#*@BIuz7<3J<i1=
zt0xEpGkfTdeS8k$R|egGRD{>PzQuHxlni)x`Juv|)A%b$hxzoO1Sg5`Ih~;QC^aX7
z2JF}hz9Q$SyGk()R?`B(E{@8te}>Ys86;SHGwgJ4B%2QJN7biG%+Bq*LylU;@Lazb
z!LNW2vqR;tSZ3BaXr{->dk<xtq;?cXC0%evl{WErcn-_k&d`zUL7Zl^5>L&XL^G~V
zrgcmwZny3uGwhd<-xFKN<s*$`$)*@A-FKcmb@l=cjVkoNx1NlCFC|kG25G^`Kh(2L
z9%Q~qGks$*Sg0${NRQdVzwI|@>cu5IKTmy-SY<)Ft?$CLq(BmNIU6!d4Y~EpIQ*az
zizj!5LDR=)BzY~5-FH0=%hTd`o>?k%9`lEa$W4X~hdz^^%i~Dv@&a1ms)cd?>EQT*
zWO`V(o%i^77R<OTK&e^?9@a6!))ZkFnP`MHOFCe^Y5{J1)d>S16(Ht^6c|pJh$-f=
zXf|yRoD8}RORPWPtH;y98qCq9eH*!$xR4P{E+n7q%+b-=ll)W<1YTYa`fBgM>>n4n
zMW0of=hLs?X^2H}{axh#=usjd_YhqA<QWwQGng?|i#xeY2a;xra4AdIay3j2E!IxK
zl)Tr_-j;<EX8c0y6-r!}*F&OGdy8kReFrM%1K!^ik8Ai=`}VOe8mFkhd8sdD+Ot`3
z{@usZjC#f!8NESXU9*7d!!<ORKkH8Qya2l=EM=ZGH{e6vm)N>p9Rg%+$U#*zsu^B^
zmhu-stwsjaJtlE%_Y>Om^ASnp&+Up@2dQUk60Q_4f#~->+{+S5YNuR8$Aw=ZWq~kk
zQ=h;E-usCray971Gogt}jqKjrwV0>f0G?M$z}HWbxm>UtjN1fI&c_OGojXU|KP1D*
z<L5Nhn}<B-GjQwp0G{8_j<5X=G97nsfb-`Sz-h8r*q6;D|69yli#g035E4M1kQrHi
zoZyH5GMH7eLfj;yo4iti5}16r9KIIvw^J|thx7JEf#^sZS?LymiZ#37)#L$k-6#@g
z1<9k%f-tVp))abE{(<s^vlxEy1ssU!gBrO6n6I5ck|)JsFmDl4dvFm%Ce-7^ZPs8L
zdjcMM3v+wFRMG?HA<P?C%q=wyXTIcYAkz1y<Fc?~Cc#!6gC1^%z?-}9${!sN`(*}#
zqVu@4-h2$F6S@8a8r+3HW(>7(fo>%^&hw8f9OuvESMvFhpSUm*rFM&W1;^p5eV(AT
z;W67W(+~$!*OC3RCs8@=NmyUw!V_6AQfA{YfrNy~&>Wjpa3N8O_`X|*9gfM=an?dG
z|7Q)sLU-A!`artJ_7{6BFp~UQd;#7rylocb_Kds>t)&k-RAKI55H3`>ghP)e;~Bv!
z+Q?nz`PoQe$;yRfSNv3v9t<S&w?<HP(Wj<Pzn8Mohs5B;m8;}Rjtsn>`V?k<8zTV=
zco^c6!Hz^Zvh#n3vtM3L#NOz?=-)D%Di;KpJ#V{B;}<I7nT3Hg+WRpzikk!B)5cBh
z`W<+&Ydf*``H3<NC0zXY38L`g3O^SghDDl!Fl@b#@mo>NUyi;WLODSyb$J@KXl3AY
zW-Ih*hEa>zF60XNjDb<}Fg`gFu8N<A{C65~cH2}Y%r_K|91F&*qq~S8U#}Uqd`a_r
zsxa5wiC3zY3bn>=xF(*514Sd$s39Bv^{mHvGY05kew2D~YX#Zioj~Q6{h?#0g2ACZ
zie!CXNn`h)!e3O8Y&s?mFDJjEIcW=-kH(Lvwy7HYH>kp`-*lOJNL~j&{n@zoqbgoB
z0q}GSf}?#oaAET&+TrF&Z7mIn^ymyU8114`=9EUgx(-!O1(_4PXzsXG5qWAH1?D9K
zm{X0oP_mn850d4XXCH=`#9!d;XhpA%41mYPQl{uk1CHhIW;Q65QWd^V@F2Jz!BT|D
z8s&dum#k*=zw2`w<Sg;r9tmciy$jD~&M}x6H;<dG-OktHPxGI-pN)BSklT9X9*#^X
z#%<LM^!nKFy8l*^OLvMP#a4}p{wl$}O*e(WXBCimypmi!C{AY*eY!YXoqhGLhs0*_
zm>1VS<I$5&(6PFi8xWO-0xwVUMyV9`zfxsJoo+(C<pKKImydA`HW3jv3bZ-~@bUVm
z_{pn`euyyVdoS&oz-uiy;X?&LTN>GzDS~Nsw_q+$n)#A>2=(^(18-Ldu6w=&`a(O2
zuEA?oID9vVZCwXj5`5s-MpcZF)(4~F`6yIf$3`>?GXiU@NXw5I+=(lrRA%90a$svP
zJg5pmAOCWQ-TDAbkEEgAPCI<I<P0=vJpfiWj?5GILa4<J-qv{us2^jE@$toU;zdg|
zpHfblm)ofDSUs{Qvq8N2F!;6Ru<Da}Xny|yxgBzncOg;;AD{MRW_SGpt=^}k@?|T#
z{f{Z**R~l?btIA8+Zpia%18Q6umOsCzGI8+F0d(^MQk5emWi#}4fc_Zbg0M}PnOt_
z=9<;Kmt70d$|D8+%dP<P{5E|Y=}Mm+)xe`Rxp??r5?(2KfU=`jm^~+uXe2~{NlZT$
zJW&Sa#rJ7fpff~$I{`jK8)T0K5%Ny~Tr0F_eyj)xN~+RLovN6wxq^(yhr`B7N#?bB
z7Pbb=M#Da9JUBnP%*rF0r};=57V`M`Y3(Ra$!r30f>U5f{{-*W)Oz$$yMkXn?S(Dy
z$}F%xlFV7Z5HD5Mn}z*c4)-raklM3tY|x~)G~F@@2hRi(I28}SWD?+aQU{ECjKb0`
z1e=~iyso*)sC(fqyEw!bB;N;;Jo82jGc-W`vr4%Ara1NS%pjXfYN71&Q6e}qpES){
zf&NEg3Drs`fd)gw;!YCQ`JIA~hSk)wQHmcX`Ao^Fxis^v2H8F#9lC5B@o#t++eisi
z$E4CrUVJ{@`vtMR9fhKKwNQN1kz9z{PA`@QkYiO|tl6o%^v{yr)b7(wp2gO^P<e5H
zd<=0wBY$J~crKRI%t;{~g`Y5&J40@i&V#Ri!%ACbZ-d2iUeleD5oG?ab7-|o73$(T
z>9QBM;L*WGu&;m4+wU4f`-E=tEPI5&x3?ZF{v5@vv$mA_dByUm&lXakX@EKDPH^Lw
z0o+g?Atk$iVrP~r<6yFYS#@+H7tuc*bjwU}&zo1U;#M%Go8*vZZxV@jN(=4C&!hTd
z9n_<^6El?`;Ow(!pseIK4VxCst$OJ}4Mrwlo`ww^6m+7JMRI^OE%3Na2Amc>C1sBK
ztfcA+P`_ct^gZz4{oehCl;z){h2PKO$&36|k1Makk}W$i_+1gk{EG(nFRS2}tO6wL
znT8JHt!8^GW7*I(C7fR(V17&>PMh7!_gY^9zoZu!zilR?8xzb3*~*|%oeK9waT53M
zh(wv-QE5i)_#7JasJ`rZx-3i*;2<+xfN5S_0x@IfnGBnA;QMm~%l*<|^51yAWVeS~
zwR;sU3yh@q9N&YYPA>G$JdQ^Nv*2smO6YUY0G+4;K4xi$ykTL?aqA+BEdrTGburu{
z_X6lOP-ViU{9r7qjO1SXM*kdggg5D-a9DOCH<Edk{8t@=OP{7A__BB*b~C&eSO9AL
zd84Sz2Bu%FVPh*pAxSzNte-998Ang&>IUa9^W^0r-$$LZb^T1}j2uXA(_su`1eh`X
zXJ~r76Gdjm;ux8Xk<l}8abW}961IpnmlCDN41Zujf-QOmxzq7cLaMgtGkrA$=$6w#
zK6!UT$DB^`Rm`3WBx-QcJ67O^SaW8@+G5!L-y!%^`<@J%<)Yue3fgYthACG%iNthC
z?%M}<cIJyo)Kk(7jy$iyj+A?lhF(x(_?#>|GK779-_wO16|hCw24`NHWy0;Qg<I3q
zxSP&BU^Jk~%#$j>MRv28M+H+*!8DUdY`sb5?AXTpce(@iJS%~b>ndDvMG4B6#zMyZ
ztGr|F4^h17C|S6E2!g_v;W0&L&URFVlL@=T8?sqWdQF~?wU<&bIyWDlYyW_z1WDA;
z<#0k#KX%aq@SQKm1ga0?yM9g1QD_n~+S&;3Uw${^9GAdGxA|C+B!!hUo7j98;81yp
zq?GAEzVRM-utXJ#>`$U{);TyaIv4zm7sJCaOSbyh479ayf|i7Q@Kh2(CE;xFz8;72
zBn4DYyK@4C8k~=cFWGoCoo-uL#ZFPZ4)J{c`IWgCbL_D{Vfy?yXJsL{c<d|aO^C<&
zKaP+alI!8q#8?_({h8f@5}e^~DZ<hpWIX#W>@_SvA>mANOW7ZW<fdVhZ8JXNdr_9f
zbn`^B5iX=|1;vU_Y?j(vLX1b?qjVCMcP*z!hm1*V*a=?Z*jtjM@~+HDK_7GXbHt~%
z3glD!G3)&<m}+-}OFc0QI=AyZ%6xsaY_AOS@7)aM#8Xv@QHO|`o)xdxAd?#QcfyB6
z14izxFsGY81cOQfT;Vq(x-X!cW}cYHwEdG~{%%ia+(H*~$YtYQ^$m>8$QpL)(GK!`
z<t1EkC>2-UEXCj@>14jBA(!qnn=!Y00Cjqk>5pTP;IP~a{>hAj*0IvE!%<pX-l}8N
ze2psOa>^P>;TBwT$%|I5ONZ*A-FV^w!yNpm!<p5@z?B#OuzScE@26@p-GjnJbF8H-
z2{V9?fiTr}HMG;0Wc@)SrZ~b4|L*EUbJ0zFu1+0n<AcDvVjDYuss)|l5Cr!|<?waz
zMRN1w4Z31!GODV4=WR$(po)1n;ndS@IQwxL20Q978x{m$g6maC))WHmN8fk{gtubG
z#)<H_&=!VOCAdBNlzERv_1W1m{$TA83y<W_lSz<{Dh96XpBPDe@N_+EeJ>tr9XqLd
zS{m?N)S=nRoPI=GY{XisFXjo?reu)C0Uzk}RUL4}pc?ZZzJ;yB|7dM^AO-DHv_a~o
z8T<YXdj45NpM0Bz-sx}Y`>`3|aAF2$TsHyC-{<fi2WFIQus4L9;rsA`uS4G&)W)Xj
z#i-vVON@<*G5T-}D5UNN2RBFN#GecBVvP{@`+E&MTlx(3*JpCPNw0aHU!~yvf76&h
zexl41?e%1!A_qjg1Ten4h&6k;0JgnzW*=!LVywS7`I!C*Qt!5LJ&Btc%L+LrpihQm
z95y3MHb;^Lf9}KO857MO6|X|sZAamwxE#nQNpt>Y4b=SMG|+$9OctG7gInTaVfS&q
zS9QP*wmI|QnACrup4EkEJ8T%=F9O&!{}lcSH3n<>G`tf%jg)*m2Euh-xb9p47I_MD
zrt@dRhGt3PpZAMC9p8Yqjj^~(d>%B(AErtHOW>JN1TXWxB2#d;4~4T^pr>B}3>I-D
z_TFVO?KdI1sv2DX(sL*|X2!G%s-Rur5_0#9IurTkPFeRx4YZn?4{D+zn3&T5KTH<T
zPnpu(vuM6{^O4X05K$&^i7&tj2U5#7h4j<Ad_N!`_iV{2o4ck9`=l?@eyy3*<@r6R
zsdR!`^*UVR)WAAimjSW9T|C?OJ?z-BX4qGHhpZh?!{dCf^8Oe7P~IlY87*B4X9u^S
zPcP74nwsqSJ;uz2nil$JTMeA<X~yZ3f>5lG2ZkL}IqCh9X!7hX1jU^PU+s$^vd*4;
z<-U(K(rch1dSmD`qn504n!~La4rLtp8gg4{GMD_-m1jF!ni>9=iIYw1QBPEa*{CFr
zNsrdRqDEEhkIlrRgD-e}kGfDrCl5ueGEor{VXgNtYGW99ep(KpQzW>v)i;<u*SzS^
zf8+Qs&jJ%H-QllcBdNLCiwUL4=o_$**}UL89o^A@*B)Kv{k{-N_Js0%(r@+10__HR
zx@IT3@0fyPX|1UEdKG>NkwZuGWsF;}B=@3P5gpGLK;X+kHoo{PP4){WMXnoB@<txb
zUa!vAaxP+-oIL0iE+giJIb`C}ZsIjQ3D%4+XI3k7^vrf^i0)8?@2$GrRlXK~DLt7y
z{Jx5sT(}Dh%yxo&<ps<r5@2shjNo$J>71>^Rrb%LMAYZ&>O+nWC?;8nOI?&WorN*v
z=7(Hr_mHnwdMR)UCbQuBeP?u<OYzf$zc?|$idibr2>DDTo)LOO=RKCAxz7*d*LrtM
zJEM=S<H@{z6Lc7@$<o|ap?jD;^#IKF^MN;eov7%?=O`y7jVb4DLg>~kYzmlyYUgE{
zqBbQ6^Gahi_IH!$jAHV6aX&^2#*itUleo80ny_+MidWz~f>rMJWP9&peDa3S3sbHV
z_kYFYWnv$!n@M0n$qTq0D9oJZ>osQ$1duB(qot**SeGa1MDm&xxiQ#`U3E%u$DtaI
z4GR$O0Tp67+XC*|`=Wqd8d<-28M^I%Lx;+u(Fmr~(s{aA#pf@-w>jY7+yn4B?j^{L
zPX)PbX)elb3Jx#KAj=L&lYdWh31i3qc6_Qtfz^*7cf5gY_~?sAUs7;hwH@5c%27Y5
zf$iJ*iB=b6!=CYXq$K1HMn4Q=_rH&VFJp>0QTGfANeAHO76w?4KWts8F~-N_;liNp
z@VfE<n6f*}zFMonhVlwHUa^ERf3q5kd$Q5NRF<zdrhsH^2;5q<4VMY$qQS}-m{_X?
zD_aGb^TS)=rtSk!8xUtRO-3-}zX~(oIhVOKMv*AmN5W~Pk5sQ|1Ph9?AkSKjvs@O9
zgNsbK%cTQwaAyXjG1HjC3%;S;B4h_2h%@tCbD21&0Pb9%2DjeEp82J6pAJ@=Flz=T
zF_o{iIrrB+&_r*Ollc{FpXN-e^ePcOM69@?_b*6opdj=6dMjD2xdudRHgN)}T>$ON
zaCMqK(mEOLPrEn^cLyL~&Msh^B5`%ubNGTkpgi>=I6a$N{P%ki9G~}<Oce8GB+BFP
z!mnxY;eJ2VI!u5C!m}8caDDF8&p%~`Yh9V-Pgb~AU@LBI>@RCC@I<#vB`9>zgVYnN
zc^egX!1}eDF=?GM4c?{#?#mj<#e#JFP&kVWz21T@r?N;!dm~=9pNx&Q=h5M56r}je
zgZG#!ebpa|+Am&_ea?SK;Uzb0AD;vDJy(gmSOJDeiDPVKJ-I$&Nw)p7rSI+bWALdv
zY*3^!@lSQ8i;`Z_m3xax)C^se(>8{y0XMM0*$skL-h&%6qrlzX1Sff{K+<}KsE?mO
z&PAMVdy|0qf(>|UmMmH&)=|BKuHdcvoPJF$<(0bsM=T!*;q8DT@Lwdr9dWw}GGj~O
z-%t%6%fCgNQzPNwk502I%2wRy)&O)9&xWZk`*?MyHz2&dhfcML^qsyL<ax~HwmU8)
z>l+Nf+xsaUX_x29x|Wn(tTJQ<F9(}y&k4jmx=FA&W(p_K7t83MFvNC^c3iHhj>}J;
z#sAu-k-e@L$&<26DBGLP)W_*VSg0mfQ<n#!C(WTdrHYR5_3ink&8U-mh1BH@kc7i!
z_;HIYUiOkEkJW;i_U%So=l1F7DEA#Ew4TSk(sQ`E#~;Hk-2|d-@uO@%6U=iRyF!28
zUBtYwxC_&r+Id}RV;HTIh8M*H827K!m_tJe)ctBCh8uiA(Qhwdjm8SJG>gJ>hW|iv
zQwlYkTT9x8ukiKAEIc}Y7Ne9MNiqcqnf&cLCe95oy(Z6~q8(qu+G&OrSNDKufI1#I
zzZ!mUW8_D@3t8uLndA*7VEa`=Mj*SCOcxX7=?GOo))Xg9nAAifbsq5gmmVW~Cmsj2
z?1_v(Jck*7ydjmWAulpRP+fiz{)!)FL*{JbeGvV`vlisvtAQzS@4hmsus3+g`tu>@
zmJ58@BFL-WufnWpk>u_r)REO)_2ByE3cFsdoJx5CPW3OxKeNkejDsOu0X5R^BM)O9
zg>cu42hU6+(Zw#0+&DJ}53S=c^FMXaj=#k^h<~Twx)jL!sxCsF$f4)O1ROhmk(Qg(
z(9nMy;m4668sKLJ<&lS2fn6iSwzh^|&fkjPt5Q(XMGR*D9HB3}>~LUO3^m#Cot&<m
z0hgQP(TkX16IV};OrM3VPFjd3yXZHo6gsUW03*iu_8IdbI`w`var!8MJtg_3=^cec
zlZQldp*1w^Rfd9<A>eImO(y)e9IVXx=|H{`Ro)-LKZpFB{E|567EUDD+r+ucQ#4R7
z%>Y+kilc>>&f(^D3nA&`MOq{oiFqbn^o_g^Rr%|Oys+E6Wh(-B-^Cfy){;Sv6lT+?
zH$9NNc!bQ~;>v2JN-?GtJK$IOA};G&29<q3PHk0+!TNq9oo#FZb}a$8^nn{lZj8mj
zMsav35QcZ1*F&`6b<!~Z0<Q4e%&{iVXk5uT(EO;(1(t7ujEDp@o9Tvmm-xKCYc4Ha
z&`dvm&Vs$7*U7On43_0)!QnefXw_Yd;p-NYjIUxe<jp2Xeilcs1xJ&#jd$SAuf-5R
zHc)}7nlz^HB>lk1a0VskL64l^V*?9zJiXGa@<2Mu-1diov1E8+(@S2opT-MCi_kPo
zlH1+(2LD^qLnizTg2W@DSfeP$)J{SO4t~YYU^c^^+bYCeUX)wfbrpIOpJVsEd))Ve
z2_Qe&1_OJ6nIIv@3>?Y>lcsja%FI9wzE|jGoCxFY|BhsdZ|A&wvw=6ai1#>0k9*Uj
z2sPE75a3?`Gq(wG%0;W`zrBB<HYbGld+`+<9e4<=!e3s6OB~Ujo{w){Tmm1yuTI?0
zllqs)!Z*>E5c_Tx&X!Y#@VX>OS6+mzEfnT8%*FB(+N?v49a~DR&`9Go9dD@w9l5U%
zEaLzH&3vDtXcqi(N&>44G4ku)6i!8|ma1Nnp=*C{h95(o*z+r&7H`gkq~10XcR~pZ
zZp=kJzHYR>E*F+RK9BKyzIFGGozSS|#QF9I^Cb8)k&9C;Y(4*nyq&Y0uFlRUhi>)q
z>h2TL8J|zKw9R9SYKQTO_d$$5>juxqleovLZiD`cWw2ak5<Q=-2%Z<qX==)D*k3k>
z*|EnO|Jj8Qh0kx{Rf-mK;En=XyIq2E5qosqQiaE6#KV!s3=$ux06mXu=)<FDVBtfG
zgHn8M+UX1x&bG%Bo35~RZF=0^$%(iwM1`lGa*C0X*-NLdu17UHhT?O7X12uy6tVk?
zrn;$U^LrZCICrSb;l~qT4gk9Us)skPnn^{B0Q35{5$Y65qqu7~_0`b9Z+?PkGTk0u
z)LSFddx&iD(1i2~dE71=iS^gcP=)3yR6KQwhP>Q>Mq3Vo>rzur$L1JJ5$K^B9|X9j
z8ye{FVSr4&cY$qtxe8|nAkWSvn~aq_quVOHux_OqW6nRrZh4AuO7;Mr6TZdnJ{dz*
zdDm!<t{lo4pTU36w0W=F#YkSUJ%6U$MD5${m<p>E7~i=QWwcAl$n{mUnpNP6H)*4)
zMgnd*`G)jg)g=|iV%*m8b71#Q5ELF(vQfUDc)m~C=*tsw3^QW^D1NvPwv7e+JZ~X(
zOqPVf+^?)jM*wV}a2NKaO5qmC3pn^;1DLmKBe^z+_A1v&t=n0)KPn0)N}h&O<CkDw
zuQI58$Rdi%=is@QLOe-{WV&HQpNOc`(&0RF3^*vpjhW4Y;F=a9<+%=LDOusp4|=?D
zD<crskid}>G7wnO%<B$F!9&|`;pJRCjM<q=F85y|X@{P&=`(bpur8S}kM*EOlM=I^
z?+EL-6yi1Xz#=f1_op(K?G+W~g<22{&#5Cj_iZB6_upcj`SaSQH92(3gH*7aXpV>9
zNzywfHR#4k3aG5BLbd+I-~r|V`8d6S*E#nz*&9oNNkCLtcOFr43FrUbo#(YxgL%3>
z1B>nAP+d-(5eqxZ_ncRg{Dfv&HC2$iml90DXB?f1x5Hhb%jj`|(0Ld5yzF&;?#TB(
z9rM_ZJN*M0*O_iGZeflRj+1fxiU4HQ8-Sfn0miaAc%hk}(<!mXPjC5pS4%i<sSd!r
zL{*$KIS@VO4wGs=mz4j-1n22FlYPpWxZ?3oy1wrK>9ozq$2pPozfK!yoWBM3UmSzq
z_kxJmhXDMx`Vk5{x6r-sc9Qeo_Tu#Ynw-vs0H|`Vh7Ki35V4B}Ya+wxvii)j8V=44
z0#`U?oD$EaWZs$sxVCZtZ#^8KLNV41vsajFiOJ*s+3GW<)@xBka~W=2qe6NwF!afW
zmn1Jkl&i9O3i-bhG0WJ4SX+z0p^|YjEE$e+`7z{adlmEjjyA1asfPNaVzi^{5oq}y
z25G+6EJK3QrC;vSO9wVk`5;Y9h#NtB)#W7cPA}bhV+G3ZE@X|&;^=Ij-Pk{Y<H=e&
zV}r0JJj+NXwX$YdH8>F!9TZXKk0m@!-pCkxc0u2PN0f#>!8dLa+^s5GMznP%M@^=2
zPanpSbLS^BQ(R)eeED+degoX3vh|>~qnD~?UdE>fH{+S&Vb-lvg#)gK>APM;>hI;^
zs$XrmJnAzi={$_n4OKwcY%7Xhc#V>)tBHFM;m(cS;i~>b(7S@Ku<Lg(qZyV-#-rx2
zIt^iD=$Smns&^AR+dM{AcoA9bqlUKc>Ttbk6TaE-#cUp*E4VsSoz*xzk>OO{GdI7P
zakcqt{zuVy_;dBXVcZrK4SOU+DT(lT?jwbULXw6Or6EeGq(oZwNLIE&8nUy_b00*7
zP)Sq>)wfBhq&@XJzrO%4=bY!c@9VnW7uI)QHw}mk2J7>?urS1*6CGDUV_Guc!iRKn
z>EjSat!9B)G6TNojG@15nxO8lJe(=Ii*-(=FuXkncPHC0Q%B9&@Z(uHKBoc;R{TN(
z-*5D9+gftrQZ8QB(_p5~{0x-OCGA-e1QXnrnTkuc;KBWm@X`Bla5^=fjto5`>N=e`
zA?^Uy{EH*oe%j!XkxL{^m5}A^50ZJ#7L0O4X|?<^2)=Wa_iDJp>ocu_BD2%r&-Buu
zwGnXDcoB>gULeDv;+&EHF|-S{gXgh3SsU3(VtM{Ftkm{E6?t=FUOI)g=k?(2V^Ks}
zOcuP`Hlg)eN4PqMcQccnP}tu?*Gx;pH-)kAw=x`dg!5a~xQ`^QUlaG893}s~y-IHD
z`{T=CNxVB`1G#S+aN#E&fh=ndM{m`Tx$*~z-vc+)8c~Mwx#LOypand(cv2a+rV0P*
zPr&IW1K?HPPKRd)3p{UCquj|6l-soim#vBir>qJz3!F{GHzDk}c80#r_(XqiRL7;G
zRk&j6KGQQoid<(z5dS-;Vdm&uv_5wkw{3_vo!~cAwednO?z{3EUR=2>@cX?U#;QK1
zZaac$xI+%vATtT8^))e5XMmPYi6tuak3lX~3qKr4!>@*?;hw1>?@iwZF9!<og<(t;
zv+4mEI1q%#Y!gt*aW$NFPeSs{7T&#iL~bh@gT$L4=u_5*EpO}T_}$r<`^T8b>4{=j
zvJQMy66G$sP9~8*UkSE11!H@dCA2*dfr~Er<Y%>|Ai4JlPI6U*w2xP*(E2{MUFH$3
zD-UPwX1&9samD1l$8-2<{0w#HeIt9r{NdPgBUqO9fT#pX(i?0RDEqWwrV<CAHG-<H
zH;vLiEgE>>o(D0B+C<;HOUC3?H_?ID+FeSrNw&}u6ji)P)5_PP^1w`FzR%)4vtM{k
z`#0)Mp2w`WRc2P$IN|vl<3QCqo?g7y1JXiA=*(Zv82)V@<yP@K+K4u`nBSn<9?wT&
zPszvN<@oAc7moeei1INv1TXtKVdX?GFtKkJlsVo4H@`4ur@SUofhG1#o)2?tBG|Aw
z8?jAQ53elvNPk>e15wM8!NW5fD$6D^Zt`-tFhBtQ=^nT&_z#h-n+9`k24Lc?4l;ND
zTs*I-2AgBHK_s86`BeEAvOY;+@9)#F``263g^C`O<|otVhi&0&V+yPno5S^9uM#Y~
zI~z;q2gBKo%0z5`7TMON2wP+n;q_kzUkZw7|KpALakmJ(8UK~seKrL`eCL2@{vQ(Z
zBT`TcGr)b|G^Q#G6Q#X3iLb&b>dDUn%Z#p&Q)NSB6H&mI<0o;R&*IVKwLH9dY>wQ9
z$Hd;GgI1jBgq?BaB(XY{GzER2DS7FvfjvKm%<$y3@pkmnxknZV8Gvz=Ev^4{hW+EV
z7ZzV0%Z<_9hth8|Aw6n3XRnz{sy%C%qZR#Rb6-3}zO2IPl!p+d+s%7)ZJ12wdiwU;
zR`$u!rzn~@0>2BN2|Nbkh<dgpF7lhn+)gdR1+$ZIw^B9~-PD2uA0E*gs$)q@&o+Ge
za58gp-s!4)J$~FE?^SM$n!xN?qk=aMt>%TP8Sv_$J|qVZvA2W|VQ@zjj1(7vn!P&e
z6;+^&(Q@ciaDv<izPP@29%CP3z!BbKDEI0uRr0-Ka^P?txZP?N<ll{gR-0Y;>Ckb(
zPFJ2EbD6KtJ(mfiO*iP5b}{a$_hk5@y%?ZNiR(AHO~Pa(nSuKm0z2QG@OtYu@KC;k
zV?;eTS??26Vn7U+;6<njOCyi(N1Mid9tQ>!3+U|Tl&WtsbztY|ilPr3(5HPYUDp}F
z8l4p7(#II0rdd46U-q61ZKx;4rl(LSAr%t)3IurIARW2;o*Wx{4@w%GVUu$&8$C4+
zx;kEgcJE<c*V;=B7ZTbqKTPoXWJJ}WRk3i^G?O$xhyc?nLoAUKgOSo7bZ>D3Qk6RD
zF++&zy}ST_)`Zbp{Lj&R&>^_cUkp{+NRFA@qV7{pu*3e2Wck*6aEHIwnWc)6?(1{e
zwf0M4RZs;ycGE}c+wVxKi$6>cK8+a$9;EI2Tv#{wnjYtMf6E`)WWJj-NEltB#}mph
zXfOna+C<=2!vs7%OCKV#_&oVVPiRo*cc<kBB<#p|juqs9*OVK4(`O+)ed!>bIKv*@
z4i@v-v)!<H4+E~UGH|b7j@cQjNG6tr;U(TL-aTQMr1+cTva-kYVvQH}@38`({Bu}X
zoP}TG>uBc`ZBUzMjaueMNJ(zTT#5bBXwiL<Ca*2URWElk4}_l)35QzTkT(HZYtzZN
zJZZYgVjRqfZi3pCx4`+ZJsx?G3*yhtf_V^&53IGRf$&DqIb4sHZ^kiebdE!X^+oix
zo(xA%^LpN*AWkVyfw*>O(>XEPQ0jXLQ?ymN`bBHFtl0@9uYM{wedAX6>-GRKna{B~
zU&0sW-%J(8M_}guLMZ<?2)A!WVuyqZt!$Y9)L<gM;Lk=~$ElDtw-ah^En^B-igQ-m
zrgQzL#<PoT@6rd_jWp#~8*7#Ngno*yg4YVO@J-)q+5y54sQgcmtMi)fkIP~=8m))C
zoNSEyN})9{29rnI!S${xyM0YAKJj+L=*hpSql*+9POiYe*WMVsy@C9T6h)&wB1}up
zSmxEMF}TDi5#JXTf}-v-dgP%#$1Yh9*UR@XJt@VQBf){6Ts9Na%+P;ovvGoEG!~xE
zg^3Q=VRc(C1ip_2sucl8hgWlEUgaRG9w>OM#Cx0#K0#7T01S&tfXz{HjwD=wE31nk
zJzECh(f~=n^&OO}(=mCW84_t;$Nu*j-WE(@zFcv{H)8_Xr8bpx%7q5-;d7urYxiUI
zrE$2(=^lH$Y&lnPHdSzLS1Uv>UPf;n|3zm$F=foxhI2Yf?%*P32iX^nkabHoGeTwY
zxW6w5UmcJE%e9vwL_Y)i)}BGXv6{Ho&j?mBt!O=G0$#|9N0r0dxedQF*<(AlqIB#S
zEbKW;yk`ud^tKOdQeP5tWSk58;&~(#pUuPpqu+Gx!*TSalM-j~`3Ic_uaH}>4YmiK
z(*-|j*t`Q0pwa4y-bdFm_g$OO?#p;AGj#`xI$eA;?mUrrYiqir_8_jxE+HBk7eS*o
z7#^2(laM>E=sSzgPPRQDoCES6A4%$c{UJI3Y#JI>2+`fP`IsDNgt9WcUOK%LELU9w
zo3QbabZ#fi*rADkm-NyzI<18K`NoP&*5h+*C3MzYJ3JM4n;n0>h|bzoNQAD(K>6P~
zc#<<j5~rOf-^(M3<#lPi;eH7J1-9U=>`kmu(TIsOR|V-GjW|v2CK29!f{|2M%jRJ!
zjF-K}j@h1tA*o`NjUB|X4!_Cm531bq(_N&e#t*AS)4`B`zQUs~qTZ8O5`RXAJM-`|
z*1c}RpHvs-3OcK#(#+6!ivz5<G>2K>ri{a#@AzCy9JtI%2fg8moX4kGn7v(sT`=x6
zW>>vK>n(kPi3ZEDU0n$8<ga4}3ng&&{!dt#oyV%P%W*}&5)EkENL4H*ao&!uj74lG
zU2T(%pCSM)6B4Pk##yQtf0;aMjs|C~U!W^LipLs}eh$AbF!>>Y4%%yZZ<q)i-^XX{
ze!G!rhkglmPoIH{Z>+}1BSCb&hAvDq=^$IKtza9QU*RbeWoT-dSJmcjfNPsCQ{~!f
zIvUHfdw9o%a<c}sc+CJWTO0H^Q%4r!Cf*;s5ZBL^$1iWkLYZeb&-ge^7U!OYMZqm(
z$!$OSWzsa>$8?PT8qDNpvx}@}PnF>PE|$8r2BMeJVz!yjkgnIBLj7v_-N(sKWMkJd
zcFmAFX!Z(0Q`;jd*r~w9Kk&h5pA|&v+jrP{%nR(coh0k;ZATd!GkUh>DqFm6BB=2C
z_m=P&_>ZqEODjqNOED{YuYM<0d)h*N=-kHTd(H5*LOvK61VPPSBec%QqY+w(<nV|N
zieEVnUQ(vy<C!sRm6i#Zm5oKC={?vUQb_KsY{n<+)EVoF75Llx1sfGRgW~OEG`~5P
zJ1sqz8OJj`yL$q#<VYC4={LdOkDn1qrFlf!d5C!KeoiKh(P7+PT7ZOQGu`Lwjf--R
z!lAI|mF+paF{dyH?G*&%yZdGw`5uRE?wU|^NS&6mO|<dyY%)BojNW5{P|}NHf#r8p
zJ(0nL-?$<8RBVW`?xVDNdn<e4uRjgY{EvhTj%PO<UxV7lo6+G3&tRN#Ss=Dl8GWBD
z1G)A{>Uq<fTqEVswm%4bTqwP5hd9x22~K-qO9Oi6)0#9D5@9h6o@2CmCdmc*@X}VI
znGiu6##1W$A_`=tgrIzDDbEK}gp;@0=#GplxZwK5sspYX+_McQAhYK$+K5WCk7hIh
z)fV8uaaE?q@*QlxkwKnjgu>rFTWGD;IBsKE5NPdmqDQQkf|!FYI3!2m=T$T4m+`%z
zq!C1{uGQ1deF|KHj6Z+o*if6_t|*u^kJHE%gHsEZLAJ9B9x<3oKI*0LIT>AMa(^>*
z3bm!rqMu>XP!_mflVHdEizYqULfqP0NKOS9@|@EmSRQ&F()X-Im#JoWSty2Wnx2ba
z%!To{%K^svv@5L6aEHVh(>R~<Vz76ygU2&M;o5)%w_vdhGoI-rSBD?KU&%R~`|gtv
z@oO<Tapg5AM%|?yKQpnVjA8a3T|<&2c=ib2A<m4A$AgcBn73~SV4mU#Eg925LN!aE
zwo!sk{PB$JFjt~N{9gO0St^vy)ZmUjv4mo#3*L6Q(fi?daI0`LY-#0ZFS-$nCTY>d
zx^7%oZ50Up{Z3q;^^tgCfZVc^VDUPYh<wcyd^<md8pnZP&dfGg_$!t1dwvj2JdL<;
zaXFfjqlJF4;@rb&1+XH=7vJQ4rLCJ~@ow8$dP?p98qT>)XLs0wn6(`Ue%~ZzQqACa
z;v$Gl6=G_{gYaIpI~0n^<8~Em;7VFq{i`FiVai@87^{n=<}zq|FBo1Nm<XO7PVBs^
z)#P8;D<XMl4_bo~2HRd@1^;#9%RV&}s}8~U!fS}n#6RTPXd2X{)x$Zx8E|iwEQTz-
z0U=Gsu;%<dHmM6BDbEB>^LGU2h3!Q6(<|Qmb`sC~j6lSS>74L$3;cAoPjGAW1h`Kp
zXjrYlW=k2+{Y@dzoLW!+Q{BkE5Jci{k0{e2|8t=0Kn2yKFKO?W5~zoIKA)~Z6^a~j
zQw0N2r=Nn#svJD~xrV5z{2(e-ZS;v;szBD`sz5(99R8E0IHzSJG)-QH3WWi@*APfs
z{W`ida6kN)`<Qs0wdC%~TGGFl7_zC3W3%|@B64V$q|Y`)&)St_IF6Y1?1;d3GlN0-
zhZL;Pv7%G=KQz@p90~jOkHGC-MdEBJh9TQ~dC;)|XufKqf4`(qC(Be)<Sl|r>*cW^
zNgl7Y1z@YrYofO*5BKd9BGS*&$f~kQ5XtAp6LyqV^~Lwm#_WACqv#a=?e-vBZ?$9O
zRtM_!Jp;D>Hh~tCJAzI98q8ghDEwWTELi<ilCum81zIwRJO4YEZdbJw_+I7j<6G>(
zOE#3q>zC5S(~aQhB?HdK(1fX|y#~grvGB#e1?zOwVO`98X2aq~(74qIQqt;)kGl@D
zx8xvwuUJE_t&A2FncT(kk$cgprvxHBLgA~u3c3Ux#!O!|P~S9xULVXPK~dEZ_hl?x
zelwZ7HPlWl59dI1dN_L9EUXOMyBz{zPC{S$U9u%;HI%Gq!Iu}6U}5n|x^c@{x<_rf
zvC3*4_FT(f`uW8bQq22b)*86Ne_}^yMw<;xmOO+vC4Z2&y|VD%cR8$ZO+?oj{bXH7
zH$DyoT9^8Uq&!nV@386Iz|vIeG*OS5+Z*CI?G!p>atW*V*<kOJO!QLF!uC`}@YT3N
z7dj>J%qUOZV{48De-2{1;v=|Tv>LuAPKK5(O62+l0$y%2V7J?U7;bqUqaC|wP01ZT
zzg|f`b)3d;85L;p*bw%N=eeNe47`*#z-smzX);M;Pyg1!_;>s1<LC^~`}~*Mb$(-K
zeTs)!a-y(7g}}`NLd=7KB-DI2gb&;*sG`+UG#?BXG~~;3SFDuD4=G1*Q2i-5eCjeN
z$Fp#BR06`^=Rop?V>q-zmc}~mq}9{>=!T(<oZq2O1Z8$HVF7p0?WY;;%{a;U_lx46
z`YB+p(M(=Q1>+KzBmo(JMBvp_2b&E~;fL-K{PDAr^-ntuD%oy$pMUmFSvoQb{4OCV
z>k4swQbq>C?i0c8MWAuB1y7XQA<MI6Z(Ivze~WIxojwsbe`O97#Jr~ECj7jWCkoqy
z^<f6L36q{50HO8U@bZGikgnyx4)*>tJ$q~gD%zgM=B2wx*d=u);6@2{_fAAZg-@vF
zavm+}S1~Jxk6@CuBA3xT5$aFMF^kp1V5#$5p1WYmeUVaQ{)mg=^d-}{4#hI6^J5;E
z*2Lg&MhT`otR}s>N${mniYs}S3Aq-h7`(a!Gcr;j{YMmieYt}i-#L-ba8CsTc`vqe
zi4WE$g=0od4|W7h!HsgiV1(z=32!hLI3`PQ8&}1!FQ==4yyiGY$`#o)+7g`Vn@?oq
z*Aen#TS(PQ*HN_i6%Jc=^YK3ZEJ*q^DDXK)Or4(mfqB+Z>@k{1i=-Bl!O48~Ywaam
z=yHVDRpg<!B$4EZ)w5<pSK#GFiji;ZK!2qbdQP8=)_a8D&aH4Xh}4C%J=@{+#WU1%
z`ZlP4o(Qar7zX^@3kMIk;>`!mRk8miqRoj7jFs(0oU%d}vMT+lpzsz{sLtm$@Vf@1
zHC4<%VQoQrMLkS+8N|oQkKmc71uG}|iO3B6V;lX1VEl)jWWLC5W=on0itbzn4a<8;
z%!FKK{TLB0d`=-sce#-rVhMak&4YR+g+k)NTh!`oBg_cBD@c;w&ZW#qCJRGUneP0_
z-2O?21lG%E!-UV@S?!1w*in88w|(d5Fq3HV+<GB3JRL~xXEL;6sV>c3A4=<soS8HK
zh2yFEF^suD399*-$|3v>EIK2?Ev@f^(!^lg{=JX1nkzA1-lxE3>31~V=f0_L-D!NT
zD+cGLR^xz{DBf^Ug^?BC=;LM|xZEy@SGHH;nUX}oSX*Oe^HDcQa~UzU@9HEDwf3a3
zND6lfU4=@)cy79>Kl!P1jMs2_A+<^rzc|&vhB_hGv;PC6{MF}L>;9pk@ndrNC7<^*
z)?iy}`Tg*c4M-(yP|t83n~<~%Z#7l0_x>wmC2W4MS8qNc#nB%G++quw;JFZm?FR91
zZ4l(SXhV9q8C5Wu!@hNziSczvqOY`@eslf;@>`?OwM-U=hR4Dft4P=!s7VKQslfMI
zU)aKHn=-rqzf;p<1WTpBwK#_U{<I0-Ri%^5Vln)g>QCCG<j`YzA1L5n?!y#OnEdxB
zNS9VqhYK0_T33d7px(g#7MeriKAeIj22b&<!ACMAu?gOF97nSkCt<0EEd6jk0n=~K
zhREs?SYVq0CFSN!TB9o0Hs&(k4he*P2Y7FV<`(cD{fREa5qN1%CaU?Bk|jAsWY3<n
zm~c~C(4DMAzRD+5K6Ji8?!MFo3!DVw*3ZVVF`tRKUnX2^-p=aKa1tq;j`xzk2(~Xs
zCgI{s>3Azu@EtaUzndetO=b_t(BNFK&e%oOvf^;$8Vj~1SHLacDp;3Jf~f_$#0Hx1
z`nzUwEYA{Ss`sK1`w=W8g2|4?0l4F$fIpAP;-5Q}^hTEzw<wZtWy)!WWUsMkf9oRp
z<r;&t#XFSHAH?%NT=Dq^8R!)G0>3PIA7p<yHvh1pqseki>hU#<%8O`DLPL{t)1AUN
zG-YC}{9H&;p2NJ!zKH)xjbX;$5rZq6i*e=Q`(#_B0MG1ui6V#k;n~Vxpv><Illos$
zhx9NmFyI(jeL{*0kvc)2?@vX^!^V)jnZP@N1S<3ih*9%2`0vp)7*dF2m=Pz4jx;4M
zH*-vF(gz^Y`3go}3ZrjTkHXVMX0T$iHY2rgB9}$YU{CZVh(6W8D*Daj#$^se*+(II
zoEzfnXoFEBa|WDlF9E~SYdBMP8hOf!;;B0-%mJkus&Q;BOnWy9?#_GhZ>SPXG!@1<
ztM<~W>z(LvJp$P5V0y?a0C$=X(%N+q=%=^}!c2Z(&Xq^xl+k!5Fya#K?l}nQ3jJh;
zA&*Qs*$R)s9nkvvG*GLY1}VdUEw99(azi~V=J^}F4z`rNBEf{|#zCEi2fUH*#fsX4
zFuEt3t}l3oe+wh%m&NgT_IwGB@)<Gff7f|`)fv#_ePPx2R+`Q@_W*BqKBwiDXQ=Ji
zcIZ1_je>>7IIF<EYEMHMoUqWwLZcYE_E{;}(Q^&fE?Q0}$}9ra#;g3<HJ2lj$t1*u
zpR=O3Lh8aTBqFz(_=roAUy3u}w_zHLqf4+e(+NcURH$Y4Z^)0#hNC_cxP$xG<B!L@
zFYEPH!4rpx%uDybcw|97{+8{8uvfp(GV%asOi8QUH@gf1v&0!!KC`_uR*TqGzCoX^
zT>7jb2phiL#J#6uv3NlpBs`S{XcJ<V<xLeN-)$gypDts;iI2F<Gad_m+@R4CcA$Ns
zk9<;nVrtcYpW50FLs9S;O16oSoHM25iH8YTPj-eK2Zyoy;$31Pe3A$^ZK8J#Okqpo
zCO+$YfZ6r#C;jS}gzdcdk<5|dW?%K;e4|rIuzUmEH!UC9wX^BNtXP~pzXu;#l)-IY
zw~<s+WR5E>5tyfU5xcOjI4iQ9ZdUw(^WP4^K+hq@)-Z`p`O%9WQeH&#>2&r&!&kbv
zERq<CoFKy)n#{Z2e9Zc6!qp5LLB*&9YT5jtE4u%ok43VHHvhbAw~jDX3EPaav(CfX
zrU^{IF)`d3k%7OKL{>d)D+F<QRmRq}oSH1qVJy`halW(&*;Go|>_dNP+S!Rz5e2xz
zA|DS8wb1$+GpcVe7p-Ce7~##BB;yPgo)5?}e`DO)cY?`zT23yMRiKHQ0S&xw$2`?4
zM?I}euzB8TY>la;uQzm(<bRjx{l5FS;<hXqvvVV+XT672(<H#H-v}F@d(vqipCPj)
z6-%5Kf$gPT@cqU(!Lck|PB=#d=kx4ddUb#(ubYiA6`vr?YbkTY{sz38@E22Vr@$-4
zLH>SrkIwJA&-=qd0P|f@<kSQ>85w~;H9Lv)+*BHQQw>ghs~}H&D#=FYuXtq_0|j2k
z>5e5UnVgzGq;{Gyb|jqm|GtT2N}i{0R(+-;fy?=Pd@PC_J_<_a9Ie~>oo?6r0B@En
z!Z%Gmrf?>Jm=%nGCZ7-fIGBkEhHt@B-Hx%g$l&hunt+VhFZN+;JbUM@De7LCf=({m
zFzm|<a@wBPpS-)MtJK}9qekOln&%lZM@JWj6|%VcxMvWzejG@7-N(t53+N)72^jn^
zTc9=mCCC-+V#7uNgM7^3_C215wK1FK&x(bD%4WK2s~$U1cqz{IOQbJLV!?>t{TyMP
zdH-MzM5bB8S+lKz*UR)UqvJ5zKi|SNt<WQB+<#d3V-!v|xq(hbG@8>5P<MAZE%_M5
zf_E2d5*muqi*E|NJRZ_oA01FuTo1)L$I0S)HIQ4OL&gpS(@%m?!Slz7bh5T0C>O26
zWuZYJ-kuI~)no;^^UKNTst~-l=Ph}wQZ1M}A_>j~{-m?xF?lUD$VzB`BlDZQK_+|>
zEu|4)F^lJ?#>wKI?;04C=n9j}w-Il{AY73jS$Qf#K+Z*s!S}qDuvTa`*}lk~+~EJ$
zgL`#Qzxy$XJ^G#`&v2%Pv=-nAn=bm}@({V{8Ap4Zn|N01SbE7y0XX++qUH0EeIlF=
zvyT*GQNdCeZ7HManLkYzzB~nc#P{GX|0!fjt1Exih+vby|AcpEWN_W4I$~tDhTf3h
ziW{+!xTmV&I>(v#<Vrcclp{?3Rr8wL(hK;tdo^fhpMr#ut(?NZL((y=k?ab$A^E2}
z@Ry4uT9&0^(4K$vzdIr*HIT~=G)?4|cjyDRtC39be~xX!n>e$>WiVxJ8LOU`gm>O<
zM&tZ8(l4fsGkrFo`yG3zofJ)sZ-0Z&+qFP3#~O;ecz^Uc5xR2)izjMM;)dz|rhoOL
zs<g&c<KX~%5|DC_->=@Hfmg5LRBK82y=o>-kG;yZ$}B*MZ@oBfHGlTZb_ZAa@3>zs
z5-ii>z~=Ti!MYby(Ulb7#+qAje^m`xu3!eA>?Db2n;g!aGfJ5EZ=h_wCimkz1)sQh
zcJtg!P$*0Xk*+b^;)hBg|15<z*#Ae{M0N`-r7qLE*4MG>^#IbrB-mgTL+0HZfUXNy
zAyHfc-t&8~4dtr1HCqmjrt;j62YKW|Y9igNr_9az;KFX0Fdcj&-jF#>?*aZ9kjkz*
zFb`MZy<@xZ#D^NX-#Fa#mF6#+u&5ocsHafZQ$cXIa|gchdxE)d55Xxx6umKd4_dXw
z2&T3kKwX`yf@?8aaO7A6CTQ<w&J3_r-pLrc4+@jzQ^N$Ah4XP?s8H4Shm(noY(%C0
zDF=vJKcA-E6or`W_1Jo+pSJrwWlwr1<6U!o5G}Mb?F|f}`<8`OwXNF6u*L5MBCFM)
zc+)ueGx0h78Bl_Q>lPrJK9!Ts>?fID)w!#(vT$$YC7ONb^Ws@^V9@I*_uYR08(&U9
zXz_zoi#inAnL`x!%_q|DL+R>Ea(MBG6p0RYrT%v}Fiv8FWUr|i2)$VhNo6H8HElI^
zjd_C=ypL*9loKRAK7t>fn6i?*f8+jyF1l~=bN1`HNZ4CSDPEn#X=aK+YE~yc@>oSn
zq-MgePii<Z=QGKPbO43wKZ4W8tuU-V4;|~~0UFi|ifwO#<$<q6BKH(MsgAJJQV8-S
zOmTdD3Hf^CF7)!=w}kcl{oQ#HGl^Zs&rpkDQ6SHvPSk_53jg@d5m{Pw&zI!ZTf?B5
z53RR)fJ?c40w0=*$(mNv|9taF=4}b?OSCHD`z3hJMI8q{BxzBcR>gwDKEzcz8a|8h
znZhN5)Gefn2F-a)T!v0Sx9WC0qk5}y<%@%YEvy>Oa2cY%U(AKP>~8u(UW5Fb7K(aV
zMo@S?nx-C@LTBxW!uh*>;n3+CngQ#n+J%|$^pG?iJfjR_9%d4S=h7hIVu*Qm>ZEsc
zF-hY0{vCWHMX08&V5BpX94V5g&doOHeY2MLQ_O&@(+W7i&Y{=tMpE%5(?Q|a5c$2W
z1dI2cWW|z0>4H8zn7Pdcm$tUE<JXT=UY&H89`z7{KF0?3LfAY!cA=Ll-m4dU^_f7-
zGemH^Sp?DQS%ZrewFK&UJa^RkE3uc5!pb+AbXvj<n)A+p*sT8{IHVsh@VOa9gh?=I
zINVH|y`~F<BzSH9{vBeiT_o6K$`C$608u5rwED6<J8@_#?XlOOPQ=!9z^DKZ`RfRx
zt4&Fn=w2*IXd)`F*3mz*vRI;coy45<CO_}Tu&LF<H0?nPO;Nl>Vv?tkch(mK&VMHe
zx<BX=vp1^b+8Q^Sam9ccXT*}tyE1Y8L0J-U%82vXWyWoZ`XQLP`xKt)+62Z+jL6Jc
zPtdhjic3`06;uY?p%?ngcy9O<GXMNIl73wZ={<zefM(=-ZkWhf%}^+uLm!=5M6{aJ
zQK`osZ|)e2wYTKC&AUTET=^DsG(_Xgs$E?35+Be`=%ORTrluu(7DDdYVZr%=tHd@u
z64U!4u-P<<noUdtwxOI>4jv@#|0Lk%ZgHSTW-*q$9x^s_3_TDrj}|izXyDpn+^+Zt
zQ}!j3^XW5To768soo+bMKi^2*pNzq+dW&IeTp#(Xb`&qyFCYglY~h^08ZxSl^<-#M
z4ZM816MxKWBtI{|f@`&wWZcZdC^zs*P-ZI5H!c6hWAox@?(!25pnneE_Xsgj^VX0F
z=H-Ip{S8<VHbR<PCNfg#lR4jjDct=zF|=;NQIPP+;`7m^yboH2`C{k^8YjNe&2ulo
zvPxm*_46eBrLchOUB8$7=kW>uSqfo$z7`(7J`3-=hN0+-SLpcrpCDQ?9>b>!asT){
zy!Q?b8o@J3+boKS##ABRBlMIRk$(j9GQvPRUyloX6~wI0%)l3)c0t%aHRk;65XdN3
zfpg=;F=$OMEB<*g*=cZ)?s%R?4C1nJ!h#USLg_5EX{sTvoEIn^{KR|H=X2_U3v}v~
z7?kGkU$2tyV%}_uzdxMf^j~}frKT51hW%jo8$&v;e}wF{n~OQTSC=1>xm=q7W=Zuj
z=Hp@kg+y(-<!u6X9xug5{Cyx;@eju52s7v1JDEc#hw;0b1($xl5I!t%BeQn8pwH|o
z%;NK;+FJSW`C>o%EN;L-`53x)YBN}Ua^-uSD3*UZM_wfUp^{36fIApLJ-gq*jv_r`
zDe;xmsId5E$e245y_V#bo&eJ)<v2g*sUWYfndUvq1C!aW$vl;>sLF}pwtv?_Ka0{2
z((g!I!YMe~|B3Aso`*ZX7%^3IX2NKz4m{Ws&pH(xrxUk0!SdT>^k-rKHMTiOSE(;W
zj~!2|s>ZE^;zSADvmz2~L)YUApC6`8b=J5dCk4O7x#7l~i#W^kA>E`n7as1Hg?yem
z7sG!~OUh2sr4`}u%DRsjJ?mkm>MsNHO%7CSP7tkxFp_yJnihN&f>}Cm=(ON?bkJmJ
z!b5dDKkCC)b?m{04f$1XMkisz`CnBdyPkoq{6o@hGoM6E?J$|NR)t+#S;d}+c~0}D
z`x7PKN!0(UJEl6lCR$665tBe0XxO6y279$|Np3oOG~_JNl9prpes4m_U@JQLlmceQ
z8{${-E}}N%LqA?y4>RU|66}7|Mwf->v9-<wW7QL&_Ma#SPPbCmkOo?zszKN4u~f_d
zALzHO#>W#^SGfn<krhh)SkG&xwyj6W_?;BjuqRO^`@X>belUA#z9!c2`Km*=I%x7Q
zO;8ri6O5&s1+7!$Avxa<14U!V-r`^6=FU7KVWdT8FQ0{6e~#dBfD*)e8ddIJbqb`M
zD@oAgSLonpELhqbPQR_X1hd{Bft!N$G}C$~D^(&-Bja=-$yN!x7EHpkjsJ+F)jbln
zIe`7xc9>e&OTz~bJ#dR|rTeDdB#c;sAVRJTR_eOYr2F^igtfIK<D)$uu)7I|js?S}
z0$s+o;1%7~^^~l&bEH}mvLMRN2Ce@okoOky;5(Fy2YS;mr!0;5g_e-W6HDRI{0wqC
zr5U<E&4rUcL+J_+d3wV0Kinl63RWUF$N|~IM9;p5JeEI9(%I9nOf87qsLUolDtEw;
z8zjW!J-;96WX-erym*^2JznyTv{^2PuZx0Ml|||Bb>dc>n;K4~E0)oa4lj~9YZ1L9
zt0ZXuxf;_GTFK@Ycd+866t>1Hqo0E%mW;aq*K{-l(k>Bf%KT=MWqXV2|Gguql`f@E
zwdBD<YZ3et2OK=4BzQ615XVNEL6XG)`7Fuvq%TR*(^DRj6MJ&2tmbU0R4wnLAxmGA
zV0u<CHME<Ye%Hazu^HrSggR`uSEj)w1GFxFyuiu89L6q`7Tj(RL=WMu>;;WN;wSTw
zZJ~Aa`&SQGG=Db?H>DAg#&Z6?;6-+gIYZZN3a8a8#v}SR(oN6jkX&6U5D$!_1;#~!
zhhwtf#h5s@I<ArW&dH(Xl3U2nN79g!HXojPPR7xu6=dX9BZ<7)!RjroL%M!14g?Cr
zS(g*=`*8u0>^};BMBmZ=@p7;}&KR^K7SSj88ig~<z;WMxOr34S?N@37mo@zN*sPmu
z2}`BI%R&Sd8OL!(m>klLTIjvL9vTCcaBNQl>G`=1ofpO9cy>H@Tjv&xx4tgudb<~^
z=|aqvROfw2>0mx~fOe?w5j1~WhW+>F!p|#PG4=R;^zC>6TP+Q6PJt}WUa|~ji5Jw}
z8;`3tWRQ&g6Y#$e>)}uSN5R>PYH*Z)#}>(5BMaQbxYDoAQ24DG?|w0bkN^`1F_z{c
z|0yvKt0h6}<`eq!vo2Nb+JQPr8!_T;7^vw*3r2371KH|F#Elffs?f(MH)bsL+dY<9
zkbXmuV<iS}LoeWox)a=tIgYeH*r&4P!ATm}ycY6T21D$*g%D^H28F54(6#Ov`LcXI
zbK~|hbmI;X^~Kqwb7}zU)@#w!w$%{$OOn_;7-G%Nwor}r>+q@HF|@c-fHV2u=h)?J
z)}-PsuZQb!P714yO&b&hr`?5_!rdpSZ7$(t=8PepnvpQ+&J0MLu#c=d+zj`=3;12G
z0GD=E!*jz_`fQv5QxbShpfPC*%urLu>ZEu~4vK*z->;D$rxVfZwG#Bbze;v(YoiiQ
zTJU#DI4ztsN_(G=u<Cv5F!ESGU8kgh7i;v<ZP!ZTI!~KtmnsUH+$OW)y0xHTtPWQU
zQ%QZ}Sjg)O#-qOH$QcD*KQ2?mmBB}7%*wZPPx5y3x90m8<Ak8>+AwwVRtD=CWvt8R
zNZgQEL93$%$%8B;v%i+|*|%LFvSEPgXI=sEu5A8YNub?-wZKDXfXKwe(}%~4>4C$u
zXb<nDNQ<6`AJdMYa7ilp6TJt*y1tNa@~!m!rQPgD8NTcB+$E~l(2DcU<l?jpA3ApP
zJ}|6e(CXbK8gAePoA-@})d5rSm(@?Ypdp!<o6MsP#f>1#`$sL^GU$J$>wv5-pry{8
zWM<uB@Un>l+rYa-_sDIWds+t^f^PzILx|JdyA=LdEN08rI1!bD(PZ_sLV@U~2^f>_
zkFsWfw)X{K)MiL+^S9wYF$2iZ>ZI2WM5EJAeK>pm0*hzIn#Kj(M;q=J{ag|Qe*Lnj
z=Vl9ydVD6qJ(Z~JOopJ$RjAX@A!x8#2`^sNRM}h%7HmJSLnbWyLFSdn(RIA8>^G^D
zOcasA-+#Z;tm6`}MtU-N=X4FFT33>WN3(dXzKA%N@%xwf%h<8}0jMphh6wE(s?hfj
zjqRS|49k~fheRAyI_6d0v-e}`Ze#)E_y9bAQT6-J1DMyJ4XVDeq%7hyTK{b`%{<YM
z@4h6F77sHR?q5YxYl*-~a|aA)KQ|5hqRB|6q+{>=a2V)%&ibTAfp^(3^?oJBybxW^
z*m}j|!e26+$HI6Nnlg^*e989*?Z{)>?O%}G8L9N0v^_}9`ppiDzQ&BS$9z`sF1x<p
z1xGdAfNgI9<$^encr***WW>3oSPRg(nn>QmJv`|zhnLdQV3vm{iF_7SrEb*-^IlY9
z!@*%tPA{UbMq{hKx{Jeti#@DH_H#JbxDi>a2IBK77S>rN;fRzBtr**cliKsC;r=%u
z|FDc6H0x)z`*!07p920pPbIHj?PfN23b>2$essTi6|+a>0$O`&!UAO<nBJv8ZqIe*
zzX3{+nXJlGwrFwAw~V>piHIHdyg}bb97>Yg;MS`|STshL&t<1U$w)2f8CuN9xK1JM
z#ZU0SX;r#A_zX6#SEg^C^8JXGO?dOmIEbI)fGG>_p!k0+#Aq-MM&nmtg}*do<MdoG
zy5%vyyXUjA(I3bzMvwNHbYlMATHX^=%eYJIpgX1uF^)oR+^|C|-djDN8ME^nez`h>
zQ}S^KO;Zsj#<!3)8GKJ8JCnJ<X=50sOM`2<@sd_<RO6HnPG*{xO%*)g`SL2yJBgyq
zMke4}0qrrAAoWk5!x6DUSekuZP+1+08>lJ~=Jff$=^R*k{zrKlAy?op&dm?j<oUOI
zVI0q|nC$wHge;U~)Zau9=FB)wpNWCp-7nZ$r5bWBFAKDqzR<A-HEhPAV4QGEm6Ld<
z2=T`i@LS$dRtz4Qq#TyTU9N6e$&SGnk9MGS-E`d770P;8ub|WQhDpzz1W<gwh}91M
zO`Z5C_?&wtIA?EzVCFkB*za!!1HGb*;}k7o6EDP74~_`#Pj7<)$9W(*tq1M~UW4g4
zfF(v*C_K#ttL_>>#&{2|An-e#C|&{i+QG!9*?}|!Nnn7rD!vW7N>2P1Me!fo*}>;L
z@3Y_+hUx6bmCwH5AMZ&19lJ?&6d8fx^KfjKwHB1h;#eEicY@y7P}DE71(%v2@cS<U
zeQ)GJNA3yym|B2dQ^M%O%^9e;`a9o6ycMi!EwPL5oBXIa4xaDv#rN0wPLEnobltlc
zTLsdjbb2O6zrJ5J)X3l8P9(yYi7VmF*^@M9f*5R!JB`~y8cppbPho|T67{(fNvDWE
z<@;cDxhTF{I_SI-+1XZJbvscST(m<-)#7Q`WV-{z?ren5L1O53rxvxAPQqDQUqGy?
zh^+fjK>sSf#lg?v@V-$MCa4Ufwv8~jk9e~3Ug0?L>}|R&R}$N|#G-9;1-@4lp%V_Q
zfjoZxaCK8<6NS#2N;uXLdU7*sur?T8PnU$r8+Y?Oj#sp7?>4XrJ`EEV9Yg<b$)q*m
zB(ZuT!MwTNK*JSWFj}gBIAbPR|7^dYW&2yx3nBg>S~L$2M(iW9eJ|LEfaNe(eId>^
z`%5Ym?~uB08I><zt8+`ZJAx$&;dHj(3AHQt0?mCvcrw5o?&PbYqt!-IvSb3*wmhW|
zbR?mw^bO44P)s$}E+L<NjmU|#O0at+LOma{{H*ejid%<5=$+jdqF4k)fA=$0|J}os
z7sce|k)bMsZBMH5)!qxNMRUP3HxCQTCy;105$=fe7t-Yy$d>%^AzLdilB|k+T;+a|
zdl)TD?)ABYxp5NpDA0lZH?CnzT{up*RA(?&6>8cyqOi?2CQB!eS?72TZ1n;mrS>d6
ze@BJeRrQH{G|52Y+o{xaVG2Hzzd(QN3V>f9XJP-w)%41=gV^y~6HC=nxP;dIoSBdZ
zS}+2prn8slC>erz_G9>OIm_Ir3L#70$#Z=*3_70wgHB5(ahL7nn0*(#&~@qQst6Nn
zW^~^&Cb=O8KJ@RvEsPD3xqA=Cvid|Pq7s@<UPsf_DW>O+O=6w(&%)-(ZrIc&PbVpF
zg~Lx7EWWWH0(FwB&Ln-MPG5L$!Ndw!tX_bQ;h{J%$AJVl?<Wa|%c!)CHg)8mt)!}0
zD%h2TyT)vwUxdu)8ovZ=)9<1N&Y|#UK?*!KFvqvM6>0ucH4^yi8xag>QFYlA+Me43
z_l9TkoxV}<=-yxYB4Q!m!L@@vgh%-Eo-lpkFhZZbmg4F>3W;oaIG+4lPQ4r_6FCVx
zV!0{>@7wbCPv#ueacd(dG{2FZ#pl>%%9>>HeFJb?QA35YGw3muucW0xh)hqqNJXyr
z5ZT;%TI@nV%V~)IdoN1Y&C!7?mpy3)KYMlTxJu7ml>r;`5b`Anu)8#xo-h`Iy}7oy
zV}lFo%t)jrC<HoVOu?bjA9Xn!d}^|YEIwXIbG)-jgKsnaHrPaOs?4icuqYar=v^hk
ze@t<-RR-d_5=fVr8b+WltL3s1EDr`z{oQ%|UdW4Bw^>%H+o#fpHsgqlg|(na&7EiK
zw373|GjYXe1~$95P|3`2`sCOW2(UkmndPahr;G{oY*r%O841+&LkoTNu!<gY=b(6w
z0$K%JqpNQ7eaWJt+_i{QuAws=I>L*Y&p9i()TiQ1nf^F4=>>Da)L||(RxRYJAFN<5
zrz~RzKka2^A2Y--Ne8$scbw}E3t~EW{>PATIJ4uYFBrVN&Nzyk;yP#Pa|WCW^R`Ev
z5$QOKM>U)9i5jn6yi%h|!$oxap>QsVab~QRS2F*3OL8T)G0^p*0RMG9hm5Xlbd251
z8BSjevBIAqL6XnhZ<OWo>|-IScstnFOL4;b>zQ-2nz?q>ea!o^YvlRhJ6biNm-(rk
zh^<Eq82yGI_TfjuE_>zz*K;qx)@A*$i{EwIA1~n+O0^SWb(hYVcadK8xrKJdd}oV?
z5gq&T0GU;;4?oJEVE(%ztWA+1`W3(EYo}E7zp`5J@(D|CZnx$JR%F6J$w%z&)?~~=
zOX*L^42VxFqAD?$vDR@r>T30pxMq&5+;I*^-fyStKUv}DUkQ8{<uEyz)k?bCP3VY?
zEAt^+8CKPa;xySgWKoDB+&Lay<=rd8dFgzlqL=o7<hwIyeaD15_`DPjHk;$O`gp3l
zu9i&R`$1q5u!dduz>ps7=lv>oteNe+*E^U=A(CcasjZbfC@OBGPkCR6O7=P4k9m$3
z&D@Ds_{@39O=Bo><glglCV1p8gyFNA+?Es9nLQ3)=++StE?A-fik)&e`QcXXYui2;
z7ZQkPUTUG>?Rj$Qk2j~cr-w_)<w)fAHMq9Fl&P8|jk*y|f+zB0xxRBFre%LL;1Ib5
zUV~RbuID2={`oG><F+PzR=NVZ!3=lk{c^IbSCf%r{$u(lnsa&$xm=DyA=~n~4xYv9
z(a@*X+`Gx*Y~+lI`18tbWPiGIQb8xc)nghSpA$&i^6Ox%QW@C9iEu|v%$UZ*awz5+
z&YS~Z)6uz|_)gvsHmw^*`NMoB$((2Lt$Trwr>ZccJ+q<8b2<LK!rw_F_+DZ`8Zx&k
z@kh8VZFzbDxO4w!R19LNPa&0>2DGvGJ(@o30p)FdwD-YlwDZ0T`EtRS5?qD1Y(g+Z
z<_+H7qRMDi#DHZ)Bwh9WJ_*bz!{O>FTn-Z@Fx@T9EjNip3&SjYSjclF=j$>tWr47B
zrWmvO@c_}Y(_lVt3xt&^O|V41o3fwN;IO+A4#geBxZrnmym%(oDtEB5a(pMHLMBXZ
zD8Og;hlzh$67<tz`s#)O=Q~cGn|#y?@3fXP?-IU3_N{+(?8EbLDDeoBAN>m3E}G%<
zx-EF9gW%nHt6}qkBjEdH3t1d1&J;!tLbdHRT)wmh=auB~yn@|i|KmJ-xbB3hlAJ2@
zbTi_jX|Y6)=dF9ipF@*dnS>7n;)Y-$ycoT<>PxjDER$`fomG5C?&kx*E}w%6KVqnC
ze*}78Izi{U_hE9#L9E;6g*CTt(`6Ym;hc#BOx@oA!;>26gCmAas&E#uH=YKcjDF(I
zP(CN#uSQ&?lQ8Oy15VLb$IuJPc>QxK&b0qVW-rY_P5l<Z*Ei<y%utsyLGid^!gZ9|
zTul#sI?Rafo(@ZIir~tp^XT_XWtgau!*`_dolpn9kj7=+G~tg3K62?L^HeiQz#;y-
z>CjE;^hz-@^fo>JZ#+XhqoDmqKKdRtWcJ)KX9i~aayy(uiFLmbjb1DV`R<8TEk?$)
zqTmnx6P->>Yel&uh2I5>HZMf3LmGtf4E27LL$`iC!<kOk!kd9l$=|_TeAYY$;@xxc
z*12wQdNc%qYc|oRvwg968!%Ik?O`@*I>2r|N4ILKCbZ0x;MBiM;Buqwn0#gmh|iyf
z*ODc1H69@u^Mt`;-4Gr0T_f1AX9*@ar*gi>66x#o`?#^sgXzXwK;9<fyel)Ais6gQ
zDT()V!0Q7p8Tw;V7*&KRCtp;BI!=Q7OS566vnzNz%wkSC>N9ccuVMN*H@y6@hyK-Y
zW>(%<!1Z_VeYqxAnYHHo;g@eET}ULj^FE@?ZY~1e^)`})a#dAhbLzmMoM(<N_GR8&
zbi`GmzL2hbnvj%OrY^o0l8%Jnv?pi4qxly}w);tEjy(Y(neyb*Rej7}6hp>58Ov?9
zx=rfTih<n}O_xtcI-83_C#_I&_}@R;^O(i=Rtw<DtP8Yho;?Qr&Zn!g8BpEukGx(p
z3IDvcWHN~f_lA267bksy*aei<S@3-`y<c(D&|eI#?O?Y&7z5rF2EY|{lR?gk@z+nn
zt-Fl48F$oZR_HhE9!cO@TVyb84grx1A&{kS2Pt})7&P$$rc9jAT>trqWT%P1{EJd3
zz2p|Pvu~jEbv9lUtYmktKaE0>uZew4ES@?4loeOxa|*q8@l$COt}D}qgB!AO?|NZk
zHyjS#n{@H>v1r`Ad?&0CZ6L*t2~|7tOc<l~Y1A@cF=+5Sxlu0#(9to$b9sH(uwXNL
z`KMHshy5OOoOy<r>E9-;U!v*#&1J|aiNlLEPpH+=$!NT7F(;B&jx9Ww-E+lJ5_OID
zB-=A+FwjW7*=_XQ@t-{ZN*XfDY#HG#m8=B+jGJm~L3M9cE?*=Z7ldqveWQ|8_5N*m
z)}smTiISlGS{E+<ti>PmbwRp486$<QEAQ#EkmLQ6R!kWn>){Mc77KuZqzmBRzXsix
z9iz4V-2#6}X*emY2MabxagKFU;No^+W?@ShO$hIV`^S6emRIXh-Od-zIt%a{&rMR<
z>n<qFw8AKpVzhI01=-R#eEq>4Cyk3jZn-jRD3=X>@~=qf{CL>=W-m63NHUizmQru~
zOLSAC9oau%1_$P~z+tt00{s}it2NOP1FN>;xJSPP2X0Dmcipmi--8f6Xgn7#D!PEo
z_I}gFpWowuGTxO5%6=HVsTJiO|AKil-&8GGKb=PDWL2Fyun;p^mqXEgaS$yx;EbHJ
zNxQEIepx3$7)=*!ZQf0j!}#3%$$A>Eae<A?@CR8tQS_6QA!|MtQKy<d{70stgxUdO
zRC)$K$n$Ipt2nIQu0X7V&Vkmp`^5j#0rEh06HO_0CBtRcXr1^XObQl;m{&XL3Gc(0
z=VyU)58NWsE|IjSDh@NWP3ZL>HE_-Kqv`o-Lwcse5ax<Hl9A8jxaj-k<nNgRl$_E=
z0v0Mksa!ld#p;63R|8!4%8#~(umX8SA3D(ifNRS$zA~f(=QnD>NdpGoesu#qg>nqd
zb_Yc#4`4Kl$b&ft(09}VwmPq1x_u~i7&QuJ#2%wxJ^zw#N|J1j=q2(pZzD|ZSc{GR
z_gUs3pRu2=O+$g_;mz6)UZWockCZ!bbEp{}VvO1FZB=B)U<Ce1`6|H31Q1-_UbS(H
z8f?fM3-|4OP%Uu_q{K<!k)3m4^spXWkaH63owNz?Q7Zg78iT;ojgB{2!pBqFXk^1I
zHh5eeTbjfCkE81jr~3Whgea@1%rX*^tml56^G2eShEXZ)Nm7*NCk-NdL=+`LN)$z9
zoHuPL(vpTkOQoWuX(av5_vgC&%jI~+>mJX2m#IF`5nZ`j!w-&>;O%bOiqdTl3g4fY
z#vRya!FRW1a~lU=<D4D$ihZEsUPISW{6OCnK1fBJNmO4W^0Yr7a=5daa}nfn=W3LM
zvrjJ*WND8ocdZW~)F*K!^Ox}j)8|>o+HSWVTx!P&Cy(Od*LU*Y=Gbs?#!;NsmsP^s
z_V@Wpb+Mi_YZ$+5TeUD^oe6(;h&^{)>7lUs_E_s-9iz*o!-_;P(tm{ucWn|5jC)mD
z`!`wW8=oNXz0@w88tBMrIMj<&lEvXqC81n|gDKy(<{9_vMk#l2+#{|ibcM+BPBve8
z)mg}Q<?>PD%)a2HPX54rdEqHj4dF5Io!qK9iF2;F&Ue@^6-Eye-ycU`3T8^b;3QNO
zxtwK(xy5;xh25Wi^PTUyIJ<Xp{De(e!mmEbB9;59eEg@y-0i9YE_dQTu5P*=AMO()
z40_dBHYf9jFr)7yO|j7teQCHU`jH<hT5K@6bWYnpk%jIueqm96*_RP<qEBD`3V&?S
z5zY{|P}&aD<TrRv=45mtMY{`S_`9*gxS#u~g~5vq_*Hv`^QVth3ro&)mjz`O3#M#b
zFU<9-Df{n?xI13pEGq83Q<m(d&D*sP;-6)&<ZplaENJ>1#H&PJ5y@x>xc41LMUSgi
z@cAc|gx{twE^YW^SQfVNe%Z&0&1Lsi-4f2W_2OpAO7pv$-U|M%i4zP79?oyuH&--k
z0eG)(0!;L=;*{4ZaIFIv_fVWA@w%o{cv^Zi_wM-~{_nh}f=F9azO{A=?-+TNGyOMG
z6k$D)Z{I7!Kd#_;-DBarU!bILQ~Q0<>hY&U+ugvMwgmDXryuh->~y)PN*UgFo}6`%
zORm^oWY0BEQW9RQhp6GQGB@LQhNxfs7Ef)00dF>^RrF`2pU~H7mdJkL5y7}zdwzRy
zq9C3UIaifkeD3%Tk<70l{D9r&T;y*zuKwK({`w17QR&%_BKt<Mhr8nkzja`;aAJNS
z|08ybNMu|uSTMC%AT(3tFZZkvUM@7^hYotgc|LdG<}8cne=F_hk9M6D<;|}ah0l4)
zg~;6zHafi(9)O5n)OVG$xOIYiCBI+PEAAKk^g^B=`rDc_bR58Kytr62|Mn%`YnB7|
zKBuBg@<lqIc`dcH_@a!+R<1|1vukVFe>W9*ztSK+%YHwfHP?juHCLQ7u-iej`okAd
z>cH#5x`u(=Z5IoETdy4d;@d8v`{vEu?~oDv8=awCc|o%<DrYu7yXt~4`*W|bZ;}q@
zoOqBM)VW#ouyip0zN=iwRuAHQf6I}KoQ=q!T$x{GASYCrtjPzRRpbNI?}&m@&j`og
zP!>g{R|%JCjwnB)l_Hc+{!^y0CrhBXWdYZ8PL`Jmcj00?OSmaAI|K_R=axy`+A7L-
z94Bbcw-gQ<^PlLv#tmW8-9cR9V{LBk7)h>b)VOlNm9N~#`{}~cT4ho3u5N+<E;}xI
zEziaOZRVygxx`W3vhwwk4%UhJn=i;8ImD~`jN*Se9_3$by~(Y~|IVrWtP%ys$Os=?
za}-@zv4Z=NS;-si9aFyQrJX3vv6Y+gtcy=6d?B<trDVNXoX3A)P>*Po&r<7qp_8re
z{nNIVXt>Y!E4K+f4Ym2_y0QG^jr;k}&GG!`vR&NPk}6&xUL(oY-h8DOxaXoVTtM<A
zK3;Y{e`TR<`RUHx+?yp&tedv%;tUGEagBTyKR<nw_5C(oe$BX%!tx4B?jLuM&-fI@
z9l8}>7GNsQ_n9irlAGSf`35@i;@WM|rJwt_Q2$%Pc<B<pYr<J!z&lIMrE!fo$6kdW
z-EF})eR1ZrHT^|Z+;rYZEnHNpHk>m)rNfO$iRVrkdGVV^$cp52CW`(VTL~K*P_7>-
z&RES=5M6sVjejtFkWjzkpvZkdtRP6d=C|G+EgHJ;dRgk!??T&iHKKD9mh#V_E-cbI
zDhlo>5n;DY=@R*y!WR`DoYS52{IKM~A|J*4vQtZEadWy#MNJN>qSt{M+!zU~^7=u;
zg$k_}{Awv3ZmjznVeNy<;+d?7UpRObKWX$((W8C^u3YAY2vh$Ey{+0s@1ELlSE41k
zC#nL`G=sT(uIt4z`|*{cjT@(M7$e2G%HH9ll(hK+i}OU<{#W>K?HSz56X%6LMuQi<
zj1tndgFGkp?R%9Lh-?H=e65ZncX+};o)%5umL7U4vJ%De5&p^imOOE+z4btTfs7I_
zc~zVXGPb<TS7(;!g<P_ryhS8T5_bh|eJ5cZwOib2D1K)+`G+De_j<VKPn)x-{l|9x
zy!6{Lnfk#Z@1l>Qu4CE!>uW=~><==0$;Bk$5XE|-MgBz2-Dp2QeMX1yl~x(w9h1t%
z3B$O*-2=-<%=nL+yx=bPBE*mX^j(Hi3XT;zemcV)YLyfvI6meQcOK(rO6zeF;=0<Y
zz4Q2vMa3dhuYYB)>&tO3S&Mfu{w>agoWeiRHL~`y%o1r;-6#_$fC*oFoaFy=JjBmD
zd5qT&J|uE5Il<{=$nhDPU1e*RM~Sr3thmCyY~k;u=bX;7Zc%q{hL8>z3G;4*a*6&I
zM0poW%3hls5Y^nO7w2aE5U!O;<-E_E@aa0o#a)Kt^PzVYKjGigvM-hSe0ISIey!1S
z{=(KRe24K|&ftDJ|L}A)H)_5Mhs3I4bN6S$+4`#7{iip0=U-n%64xFG`MPJq&xgK?
z<eFv5mtNh+-+QXc8y0DEvZM7l>vQUy*T6%<p=&2_ZjwU<&1qYON1QzP1j(iRy-4Zu
z+rjzPyM<AldF(`foWuZbYws%Skd4~qQKy!2-gjQ{lb+w?dVYEePcFQ`Pg9)5`+C0-
zUU?TJ?i;$y{j%E6sVau^-xO}~W>4LOQ9Xv`4uylbbw;VYvzQBA+TF!RM&9IY7W8sC
zwK~={kF~h=_<YV{j)%4Dy6fC7nSFe=<{42?>1Xc!mT$c5;WmNFNp(SSkOqG;Y9{Z|
zry-KhJt!Et`HN8Z{ycucG%ew_W3xpXkuK%t|I3~|8^?Qey|a!~F6PMMr|8>|2<}$L
zc<X3yo<B80lQ%6*;KFwn^M{O7%Z4p(6Gm^oQubE2PndqMN4Wi`NLcqKg<B%lV63-j
zb1r^0!sDM8me*dh<UfY&wKhmv$MyINxp3)Lt}=U=b@Kcx+-HS&Yc20^kz#$DFvYBo
z|0MQGTdMEp+kYJ8Y6V-x8MFuZTb1Sf$boJA#evOS|MO&_c!~1{K4!xB(arp^%(48)
z<SW9I2V&kaU`5I95r4Vv@83j<iv(OnR=IHX)qL)#;UNC}vSR*EvpW~_FW%bwq$+<u
zvPSs4dZtL29>%>KZ_V#m|AN~U7*{^VAd3r#I>L8)PvVa~n$4w%b5R_PL%B;wq<Cow
zOU`dVqTosn1RG*Zxo7`r^7|KzDnFiin%nfrTYPV3^7UnU{J_|2+yF6`YE~61id;9B
zGy4)%vQKvj?>1mM=k@BK=uh7+UfR)EB!91<G@x!DPam^{Wor_6?*R?mnXYIqVeMA#
zs%s3lD9)ffGUyba^dUoJeE*nepssXz+~Pi=tD#JJ>0>kD73WM&uBlSw<G6uWY#hNU
zyes6EeH+0gC$Hl2K8eI$v6*~~ase;<W)&ZD{*-W2$`g_Ar9=GMCRuBh`!XV#K`Me_
zu35r2K}UHj$G3dj+iZ>-e&T<A5N>$ucfsC_5YFs?wJ1F}S(NguS8%yRhgW`5Cc4`+
zm7l#VhVM8lEws3<$(1_|<(vasxRv+b@Eslx1eR=E`MY)hiGrtG=EB}{!fxq10tN4T
z-1(K0_)}Yy#l4iS!rBxS(TEAj+|AmIGV>3i+|$p-{7usle8i*&WfN6*30+MxMcbr3
zt-i>Za69(S7A4zii<Tw^h@9sw7pko@=5}hT^B3`3aOaNL2U;P=%h-qUn`JXZOLLi!
zCEO`H^0`&ubD=``{e_gUD|L;?zGNyFv|t~fV-+dzb^U?Q)dK`Wf2E5qpL{G@pMF<p
zUZ=->V^>6Hy41w)$gl80IYapK89eti^`WpM?-+mM@o28Y*NW3rcMxn?X3Cjd9alc9
z<PF!-SS#Ekp;aEcV<5M9@>ni;t)}xKXNSn-wWGEryC?6{+O%n-`TxFGd;4$oHuv-1
z;_toQeA4Pw8&+5*uT?cZBymXM|G$KDYOq*8k2D@{gtMDG=^ROiTKHwU<Je2}jWHOq
z$Buq397Kc6%h2p;gLOj|QBd1$Xq;U}1rrnKTkivM{B4ZU!SbYa`6Q(&uM|A2KR^~@
zKgf^)_1G5Ph?1mMEZTGt1HTQT@uSQr-(xto7Ke~i=t;WNEKA!f4wI~N6pmS526w*{
z!ULrUc-Mq)nZ{&i)C`*^r%9$vfzmSavGC17`g_<Oes4AkUcN}69q-Rmow62L8@oa`
zT0(HiXg3-852MYmYM4g95jEJFkn^Yv!Stkb^1S<mtW^|f@rQoA@3;;JlXx=RlgUa~
zpT;2lKCAHP5cXS|XQj#&2np1r#<*$BEjOQ?kgsN^-k)JVPt0UHZiK-1Y7UFt^@II3
zJcfU6Q5bX7j3sJ5W}<dAybiOZ6{-8dZyyQ|`7+jdB#UiqUd@zsM&Om&Cl<ay1DY{S
zRuW2Tl-*{8J3q7$a`GHA=-A5cn2%(^i(A-@oC0QlNChqfvYGqOE|xZM4b-ykvI(AR
z*p&HeScGzu)y{xd@RxRntA`QZ)<)xm$wAgUcpcUUGCW%`4%%;9t>#}UWVKC80jFSW
zSvmm<+t#2s?;JbdoyVNi%9zJPD?~KLG2z->cv@L+^`_T@)ivH=R|5BA$s<YfO0i%I
zf{(EmK838O@(pXM+Qtt4X0RHyn~hy^h25wBFzxRgs2i+g3ZC!SmPt;i+0ucOoG=XO
zZez`@SDDP%IMzKrmbo<<<LT%tEIoAruGth=`DmVn_U1Bn+ju-yyHvBmYaH3g7Z!}4
zT*WRHuVsg_q;WHLE4z_76eAPo<LaIYcKKoin_4uPCG1SKGH<(%nsatYo$;FK{qaJA
zlLa$P(S-3A4pQ^RBe?L6)%Eg9cFoHYJ5FAJy-x>w(>5I0r6DZv@Lgu;RK=E_a)*Cr
z9Sm<Ua`!pUcK%bt?)gWsb;S#6dznB(?zX~g$!1tQ+ylLtJZ=tMgf1;5%KFM6n4XOf
zw<c0zz8VetVu7{)E`r<o58ls`sBL;T{yuKTyyvG-jufoOO@N#IC`y{22ifsov3pPp
zYYyB%8vici?~`Q|Q<4DNwqnTpO@hm_*SOYy9Nj;vF{gYqwqG$OIe`pK`R)q8rW}m)
zQo!DA!{B^!8Tlzhk>QsO=+WOm0ag29k@F9Yq1wb6_ma|xG`3{hNle}R24+61>FmZ1
ze6s68;_hI`_w0r5lvYGI`cbvV3G5|C$x9BhQIj>Ga4r%T*b7pcn@fV~ENs5K2ND@E
zNMi-qTU3MtUUD@1&<xB_m8RG++GH`W6LR&sh@#6#`tt|ElUHbj_grFGlJt66HA)sv
zMsS}q)cX@KNTUVYQ{Lie_)9kDY&bcL{|$+QgJ{99d~CHd!NHm#n0M(2F8R*Hz0fQ0
zpQnqbB@(33phvmi7NX+HVYq32VBL{f?62HW`l#50+c|mQ%r;QEZVF2F&7}9f`qUj1
zPkq;aFuTY&3`~6jC*6fqbg3M37IfhAk^?ATGZjfDvZObFBdgzaOzK)5Jj}bTM(lsa
zUd^z^%4a6zY7j-1YCbIE`a2d8IRtTkpRmiBv21SXHmqNNi+$Dl!vZf~g4euqtoMy$
zoxk>BS=%I}B&N`Dvs4_d=wiVq6tFv61*Ih?m`#!%_RRXt#-vPxnxhtb2~GM_cLKfw
zCmeKdW?wvf*q4H6wkhN<I~8|_)z+C}+q2J1aLyTm{v^b@sNq4*Tc#{J&NgIOv0Zb8
zwA^MVDtQA0J<mdbo*7bBM1jR@LP2N<GBP;!SlbwZlPfTxDg_O}4%nQq8y7CsvFiA0
z7I?}Effd)V=pn=GMN%}*C7ivFT#n$?Vc2+a4t;bxL!GK!Y*w#3vYbLN$RG%f4nrV$
z_&Y2H*g#$wg+1;=$?lXk>Gn&Z*<}-ERh1)b-yPB^(jzfQ!mG*<bSkca^=uWKusw>6
z7t%2H=Pit_4`7X^YiO~`O)Tguz^tkJadg;KMoE3l>0~sjHW}dUvwaY~cS4M9112{9
zg|dApWY;ah+m#~_R}{wntjwi>-YtmRB7$f2V2T|i4}D*0(wX!PGhdY<*d>Y0`?VP!
zmY?90Z$SRN1!$DM3Db(z7(HPL&U{RT&$+8eyK6+-etA<%Tqg6A--Lzh4Cv6|ae@w=
ze##f#f<kg4?zudM%9<Le?l_E(ZX9j6;e^;ZgK7FhG1<0$5LFt*z|>#_S(i?xb9<hX
z+)f|L(VRq=A3aBt;Zdw=zJy=rmFQcUDk)D0p-G<37)OHy6Y`hP+SW`m-TDF3R;kmJ
zxjg#D%TScxd(=3!V$f|n1Y7x$mH9Suu1LdcX+}Samf^=>hUK50(SNFI=t|oe%&~b+
zIW7A1e#2%`>-VCmZ%awD(;f|q-FUysfGkR+1z1~wp8-FyO7|7|?M1lfKZm-rM3j49
zA0K~e(7?q?%&u+*?2|PS>R5xgta&uKJC<3!Ze~R@6wxWChurx`*`NP%u<Nw~3>HmB
zn&2`DPu$1b%;Rj1U^PsAj*HcR1bX}J4038i;C&F-+&c)x11-?GB^f%?)N!yh54x>p
zEUTW;W`j6b?cIZ8&MVQkHk>uw%wVFu3ix8Kg137k@kZ_|6V%vZ@5OE;O`MLj4UNok
zSu7+@J2T}I2DI%2j~erGOx5d$%=e>g`IB@!Er`XTOYyidSB<HQXBwYvk?48sM%8I0
zc)2+Zfj!UJd+W`(Q9J`4-`p^B)NI&?WYWxwtJoHu1f<rb!gKLrI{fGswXA*5T7@&9
z-xdI&?lBzC(Sr3ZNeXQa!N~jD5x&8OPJW(7gFni`VDCo=ihkm0;79s8+lqpsHECYK
z0j!+oib(Ut$n@!jo8d|nZIh%*-vi9i>jYUz79x7v16<v-4~ep0n3I<lWW*6=$|vT*
zHOZ7dC?;Xf@SE7O*p%YVivRAuF<8<37z(yAO#49`)vhq36CrX`o$XG3#r-VFv>yj@
z<f+$EmPW^Vvsk4Z+%}S@mJhq=zPA(=PWuR5@&BagbT%RtQy}AT1@4!<ajnCF4kztq
z+gvsyK`9BU9{Xr%zq}wz*#o~9Oht(J9=J4N8R~v%;mSm95=1YDf68T;)auY9`$bf*
zAOV}RX;`LkAK{mW2>8_XBvthYS9*@1?AtV)ogzLrq%&agFB^7ij^UAAK0D-}ODm<`
z;2)O?@6qRw@S}#s`PZ>uT1znZ$Q%UA9mV*H1l%($huSz$3!j4j#*W2FpMhxG7|-IY
ze^bebdhoB>q3dEui!;W-@|O|Gi|-x%>)JHK@fzDPF9kd5_2^}j8l~PHNH!PrNcboS
zX5R>nda1BetH*{zO)y{SNegU_vHbf>(EIWSYC}TkX7d9oox2iGH1A<d=XdOx+J+ta
zA!t{(raHNoxR~05(DZ1sY>1^NK{1%T-WNN@GdeIii=uj_(^JtSbo=+>T%iX2$}fY2
z-cs^DG?qrK5uw_#mgT)2BUlntgxrXQr1qs4A8TTuv~&T!9nz;w(=MX!t0c|%xEA9t
zRnv>$LG<R^dH8jGMaGnVHcQeLhcr`2KXDJuNl!vc;0)4Sx)!<t_o1%fO_oRN@V)RG
z>(0J`<2wvUDk74C+rvpo&X=NsAK{xy2%K69@g}U1rkgNKnbw5Wsp2zrCNQaOKkikG
zpgqG9DB|Y;j1`~tQj-&~|Is`se{5u?yA>%~*%l`wQ=$JunPyHmqW07xRuDHHl{b&$
z)`C(RKK~EYZVp4}bqj2DJCEezI#%0x4U3PR!^XZTcyl9(9dp`G>c7Uq-1`pvZ8k$y
zp_<)UbBQ%KZh>`%3)-8LA(<YFk98wRZ$&CB&BSN!2}@Y7dBh$%{APA?2gqIgdE`3G
zK$ziq96ZXi-;RZF|9S>G%L8%6eIF}H5n$du(9e+XaGx=h_zBnGp}7KTTV*g#Z3#A;
zI#Q{30;X)+LU+%nu}Xz_TxCaKbMFPknOX`KRlj0SrjLc&v73<BPJz8`5^g1|rO4@S
zFnstI(f8e`qjE3VUQI?wmoc?T%9Ga@JHg;0IYHI&d{`DplB`lJcI|6qg8$;t>h=Jk
z<(d?ea+dW(s|(HyOCptxFEDwV9pd%MndB57Tu6S0jU$qf5IUT;-z&o0@~bcysz&F|
z4y5C&PhiH?a6E77W;@zXlf{*Oj8&dPtF#ko*3-*yT^2@Xj*q6|v#!)_UeEUD^g!l(
z62&{0!|eAVcm|o#*Wv=0PC1TL#Syelc^Ql!*kP)h7VX%R&Tb}};bEo+wtiM8wW?yu
z+S|l*v^b>8dSdudGsNeQ0jWvTpICDoIeZvL)z!rN$&_xt{>n~e2cW%aKPF1mQi<6v
z`gbt`!<7?p?v@QIRUWXch5w=N^lB)N+JTpGVyx1cNv^fWQPpw*TV_Usi@eTgRTWdn
zbU;hDF>*b#@Zj}yRINFU>6gFY$Z=0BZ`6fe&rf#AZLt_19H1=!)A-NjCDg4oX>3gs
z>#leO<Lo9JU3nCC<5n>LSu0^Sya8u?&mr089u_rxM8B^emR=HIiEs{v965}WoFeMe
zUgBZOIp%lV4#Fjp6gy3qp1ji$oJzmK8g2CvHbR8^gC}6$qYn16`6#S{lyTPCA5WSZ
zk@ifA5?-XUUoT1#G<hnzLgfUTUS^WU?$glsal>Y9M?Cn%v$f%8P&g$3`4ikQKQ4y}
zd1IQiFaQ@_L@?~XkDDRI?Dt>^1lyRyL`M8OES+(Czj#0N)nK%ZF-=L2K&i1j-1Z+}
zio$UAbe^<8)As~c%T7Vll`%Bh{{oAjPz00w7f`a?jaMpHSd!0m$jyBKoe6=^N-06z
zeK9_G1dP#D7AGR)z*nORV*dnv-t?UIwOL@pUBYK~NjlOoMPPI6Hr0>ughlmajCqoT
zD<gMcSp5$CeAP&0XO3a+@jlcPd(sYFX?i;O9BzB4Q{ql_^0djOM8j-4Gj%L^X&RA>
z)H)a!&A_QVKa%Y_0jaf@pgwdA0^f`goDO%V!iX4h+~$roi!LBWGZ+4HZnVKuk}`K@
zk?ri^$lS?j^p;jE8=rviRV(pf@*n1~=L@sb+C{g0FVefS*YNkSte|Ij3ru&kVMfhZ
zI+A&e)N2RBZssR=&JogyaZPC96{z5jG)cV5M(@L&u(;lZijOL!k*q=nH*(0k{Tf?7
z;TWF1*hKF>$_iRah6+ZgCLy#k3wGrVNPAQZW%ZSq+!sxQt5RVhD@#jzV<_|5G@7+K
z0$pa!c=Fnj&gIDo{QTF`_8c85jM1STclu!1b^`AF1bS_xLW9ofP^r`;WR3bwfnDk(
z^CF6Nza2u{4JqnB*M^}=I`s3xX{4pN&=A8=Or3g^itOi+y0ZulH}oke%o`b--Jn=K
zSa4#*3F=!@j@T#l6tv|PoZ@Y$__PDv@^hm~@6T+o%U}vP6hodB>69L(MzhLH=#obP
zmY+_+tA`V*|8WgD&f3S!f)n6>FW>5iUIo)q?qa%C-q@9ENxj=gGZpm|wrr{-st%rK
z<~L?Ci#3)|FFnroovdcv2C?E<G#Pn2?AVj9JX1`O#YdS%WaOI%7puQa(zJ#dZ^>uJ
z7ENcft>thp^A1aIk;8#?1KF?PN>s46pGoXe#(B?=tn~%howt^3#ly4g?1LmGX(ElG
zi!QUd`Px`?dKO$ed)SxuC2XhD8pbX+S)KeNL319NpuTr36fcB9vDAXSz3z-Z0~i8F
zcd~U}l~#$xWz27|KVChU4)fXL(d-on(f8wQX2D_Rk#~d5b<qZSWk7GqN@`Tz%JzMp
zi*KeQNFm0Ko@IR@PZwD+o?nA4TBZ29dM~0324JHT$gm|A(d-<GOd_b%dIE_hRw$%}
z<B7Q4V!(#4H0}Obl3SgEAuTr$|F9b0o4iqe@)yo$=AtC47{<9LS+1YCVE(KJkol@X
zqeEB1bH-mLeKrEzXes(V$`e5z-H`H*M{w>i8tU8yXRk&Kw%LZm7rI%Z&T%&XM-h3;
z8j^hWd0aA|MvwIvX8M+6?{GcZrI3%#Py3mxavFZx#*x|bn>f@XO~!VLbgMZBhjVQ)
z^7>^I54wcqKa8OMW+0vW;LDtu3I4s0g6h+8WRZ55j;sG<_H8R+JbW{TJXs7?l`kyt
z-CGpPSYxMZ5GIuUMRJ1-dF|?C8*4Yi?ri~n+UHZCnmm1;n}?7OG00OP#EE*?hcUr$
ze7qNnh3S|#a|c^}e-i~Nm7?scI3#D}I>dR&!TQ@TmN9-a-e{;JVQV}duGWFWi>nAd
zB|`}h!f-px7!?&JXj-s`MSjhoMEPp?7Mu{{BssF!(ZTqxW_%of1)F3=kP;nWf2>#G
z=kX?-T5d?D6}b@nxsK4|QxT!=j7-^W5EuSZNo52=$AuH;YRyuj=R;3?Ph1EQ&uTu(
z0`I-`>`UJ<tZw>%^DCcX-D)dj{jehacTsq`vjsg@4^ZP<U$P5dfq6<VVE9UrOl;i*
zChi$@`08L%)<2IAGUpH-y&Ad4M$wyfZ(-LWpjnv>Vq8UnFKQC>D(@@q{%S(d0ekH9
zGDgDt!>E#7f^$9N>HNhIDE?8QiH+v;pj?iQS6sp~od9?gYvPIg5t`|!Lg)J@(43AS
zTKK{S+q}lm57|ky*`yik+#a*{FAFi)WeH7|t)L5DOGv4IHT5=TK}t(f%zd0eo%SAT
z|9cR3Ym@{tOGDY!xpL&TJzadBn$p|Nb>!1J6#ot%!%z(=8oFr^6$g&Smt9gc`nn`7
zG<$>7v0;?%agvOE#dvn%cg${bqGP?s1<hALg~{uw-97?w#s?4=u0#nJXH%=8SQOuQ
zg05YtVoqlq1&b_QXyRZwT77;wh3DL1KKEX-U!S%i@M;*++Tv-gSu|>@Yw>&LMT+=4
zgzWc6z<84bIagX>;hkVQ+&Yhp?%t&EY7qr}ddz+=Sx&Fd#}U<PP<>Vnn}2I4xvwk7
zj4pkFf6;Nuwwp^U0_$MBFn~Vb4k|uK(%Ro*UgvWv%&(tf%X^<N<AqDHP@)Huugs!e
z(^X7Cs+BE?ABWT4Pnn_cFdJVZo<%owv#<lxG1R^mwu$HQ>DfLuX7mYseQtsA)diG3
z$%JgG<<al&1jWMN?A*D1OluhM<m>=UubhQXk<*z`pCUPLTaLuuC@B3h#@{=hEaJ>Z
z_VCShcH-6`T(VY!qNs}LjrN7gLm3)>Pz{s!{9(3-V%fDNer)oYFiNo-hel;bOt|w9
zlABJm$cTkFd?o_Rx42`|AqAHDWfR7V@m1m400<5&#{LEAxMnYlKK%!5=}!-sk6(a(
z)5%CYRE)ItFYM;kE@t~;28Qcf(kA{mZ45NW<YoIont_-+!yEoCM(`;<fIAkgtiL)G
zy&(#8$MYK=jIUvZf$8FNB?C&k4^i9j`82G&2y51b;Kz!I*c76HUu9yPSsspio|mEf
zv5e(8sgbH=2(-RDfY<UQw7Q$Y(N_wka#Qem+-O`8^Hq~;4AGEQjTv|PG5E1I#y#@F
zzp>rScy${4a{n|5b1&f#_QImYlI)E>urk%>n3?_#74hHjE;){Q3q!E(S}caY(xKL_
zO~_8jgmd~r{Ik%(Sqekt1d#f4J)F##MH)qJ?8$x?6wRNGZHHCpb8<Od-SUNT)09zv
zFa#PeLV!SdtZ?pt;l7<HQaFgtf-$7OZ7<DS)y8JNwiWXq**Gw9h~WAUH#&Sb3$J%M
zqGy#m-l=HeOk+E)w<lwgemO?j>}21z_tUmx(eNIXiOd!~M2DoXHG)jG{n&Vft2rQe
zPz$V*l8{jo0O`fwF(hd(j;vJ1#;?t6icB)QZ@Ynd(g%>)CR4g^sZYb_USiv>s*=*@
zW>lw_;bf<H?T%iL&1Z&?a?~lXyB?%e;Y$ujqOs~~EUKzqa60|~5(eF6;g91mcv?Pt
zlHti}Q`?#0X;m_opGCj7XEC<~eQYt(N00Vc^tf+i8%?L9sp1No7x$Bm+$SJEwKrIN
zCX#($HWC|;Zh)7fB&}~6NRe}t@kd@7r`9#H4ud$x_V^&G(-x1i3SsK@+{%9EP%0WT
z9$lKgcz5qSyLmo`ne2{d4#v7DaxG$?9*l+SldsHlumfC+;&JYf8miaiGvg=QnCjdy
ztSDhT>DJ9e;5}bt<?Y9O;XbzVZY_SQXk%%J7J4>aw%Y!tooRF}!k%%8RC#X;rb&2U
zjHrygTMiT$6tUc$;jr<X0=J|ySbJ(Lt2bE6{xdW{>B5ne@pUpulz(IKlTC4{Q69bS
zGtg<##ni>P(W5JheI76#j)f}`^6dydRhO{+dV_Fg%t2hYk)Y11rFgP47t89ZSYMa~
zWUhT?X={?OlCwvGS|W1lr?I#9ZzJ#iLiEOEp-grtlJkaPWq2X8eXfY*8GY=gq!Ug%
z2QkY>amZclhnEL#G9^!2SiRWD99j(7W`l>=_~aP2KYzybH9lg+FEy-m&A^B9i<sJY
z5ns25uool8pncpXY<aVY<^>PJuQy9@vE>GPUm}m`h0Ab$%vh?uo{r@w50LDr>F9r{
zhlRiY!>KE(f|#s2Dr)|U&mjkKOKf@X9&sNTPU(=Am_aKC&IQ#JKtrR7ZjPEjn=J@h
zbC2M*P@7bII%!qtF0v?GL-*X~V}rF5JS)fJZ}M|^WqwDt_ilQ-UlGkCQ>kN&B-yRC
zq`uAwq{JUZcI+({Q8kL3*KEYQx#OwWGXm4oj$*O6bm9K01lY|t$EH!zu(~eRfj&f0
z``U2&@k5tlRRgH`Z8i4(+D`*~$C9gl1x@FwSdd*h5*?K(%`c72^$IY`U4qc(gTb+t
zcs0Ra+$VVxU;g`teV$4Joss{+VcC7u_;z4~qod%4w1mKAX@q#*;NkT6DQ+khqsbv3
z#c|@Ft?(U}<Z6-iA)VAqMYL0_O_*6nQq9?5YCE%6pgwXht-BURzehet&*f-zVJ(!6
ztSIGE8&rd}>2SIX`riJgkGtz=hnxke4V0jWm0=ibVu#)biuBjE1$C`O<eqyCY5#r0
zs<+<c8lp-mUUOium?IUdpKOKVJL*h&MHUnP6LVEhXqT;+QLfJ?J;Qh!6?T_?y=h@x
zQ=dX!#g7D|N~rYSAQBAqqp%@qs6E<%#!e|Zzp5FxYxCLeqBpRweQY%@foCnBY%pnG
z83zBfq^PV=7HIR9jg=V;hf~AxIWe1!Ia-Ma1EgS?GYhjtwb+w;2LDYgV>@5VLzTQx
zxMm4i488(8LnU0j5P&^(uNf^!VpC=(;Y93M=p;|U+wY9&^th0mv;x}KdSbK3KNe+n
zgW0`{WYg2s@gTp2jXdE3)hoAH_{(&RlI9WdaTs>(_{db!6PS{4I9t=FLk_|KtgDy?
zfu97WX6$9Vj?IU!t39NijzFlAfW2*f%bveZLEy?11e{MrqDdNDsGq4;l(C5CBk)@-
z6$=a0DOaM7oCM>b*A{`pyfp2c`je(kk{1Nr+l}fWNm#Tb8!<mlqk-Fm;|;~+J0$@t
z-*&+KqYml1TGO7%hoN?OFlCLlr}|fK==3vhDyS4t-qL}z^;<R?_h%xhIfInM`*X0?
zaLOGs0^OsO1@j-OQIU2iJ?YQJ^*^atrWFdiWF^Y=8$=`e)|1@RHF!`oikcfuDRtmE
zT=7j1>r_jTJMs`q>Mo<ozEZL^dWqAUU(*YV1F)SXO{EHp>ACDS`myR4t64vUWEL-=
zMaKS=siIHI-b&K;=xhW=C*f0A3%nQ2C$*q7xcM9)<=yV=QG*kv`n!VL>rLH~JLvfy
zQ|#P704mkC_-~&O8YhR~zT^aI-<gQjJF{RfvZnokqbdISU}$NkW7h2sOw&C<&i)%{
z@0C2<jrfP!BVNduqmHLqjacY74;rVpqEdDfi#6FnPj>7_*_%=nNzcNei6gOZLOs*1
z6yf*m(I{GLLt!mbu<FuNtet)tnWq-uRQ7NzHoDII7iY7=jZU;gdn{#IzlHi$H@dc+
zLvdyUp5=T(x|RvyV-b6HIt;7qFC$pF7~Af(VXd){POUe<)15{*qwI_F;mVYFvw`Kc
z6+q8@0MkF7#g=r*Aa3J*sN{?%g}M8fC@Y(d@A%8g>%OqFx~Et{<3Tw3RI+ojGT8Ac
z6~SeN5SO2@Piu!Gb=5r78oH83X9E@w9E8CA<FQ~?4HMWVupbEu2qFoT{PIBfsX=U?
z1*q0%4nBl9;q222#(8aKhySFq?MF4ld+ikaeTCrP+rY{f*u(5z6js_wLwLTHxfCU`
zGkV%=c(WoIrmew*n1#5wD-#ay;@NXy4C0Kuaja%7O!Nn^cgm`mXnhzv6;I%<vmTP#
z{LtI?mAzMQVnGFi@p$oET%9GxLiwR2ZQRCo3?Qt~T8rH+YpL+vRvN9b0)t{S@#SO~
z4oykN6uV%oyY~n>k7i-YKQnmr&mh+$<LOVN9Ym@JQF*T)We*1nK25YENnJJisho+~
zqZn3n>qBPXAiDQ74^Pkf(U$*`*eNGdve+Aqc$*(^&5XkU#lhHbT*73<y2{2O&an7u
zMB&lVP+gG&rZJZutlo{zXZ9Fp)yb~+_%Qu}))aWzkAl9ck!i>Z3Ob?+mNk_U6KrT=
z%~I-{@j#48-@$+63EccOmUbKZ!8G>`f(B2+ke8?MJZ(1mTYo`AGXWX1&8gA4m}NFU
zgXHn&=u3P+8m3x;1f7`}mY9Go-r2b29*w4;h4>xeOK%s3p<8wU)s#=9BxNT`{W%II
z-`g-ISd9#fv;|r(Tu4Q%89o|cjN3cou{{IuZz#d~(vOgBxR2Xi@7VGF^VIRlh5E*~
z!<D_j>1+0=D_jjj?_!wCBq6W35jnFGpzy9&jD?KoNOBRLZwbev*i#6({f4bq9VA$i
zX-*$18jzehot(EO!@YAl2@Z^+k6obe9rD=quL4<9t;Fx2k5g8X21yJsC#jN0IQuXh
zeZ~zaxvEPZE55MDTk3H8l>|HQ(as8Qj6}lS9_%aRX#K3c?8E5+xO!Op0{6KAZl2lC
zvKNWxm<^xUg%#FFEHB2-?PYL35Y2`xS%^lb06ZJGgC3tf3H^oBV6lEI8Xpc6`!t@g
z-@Eo>RFDDc2e}~9LC7{~=uul?05lD3V0M2J4mIVokewUY%u5nD;_{RIo3#e#0@|5+
zOfp{M0PJrW!)kyYhW_YbuksC9%>^C0bLTKrj&H#p*&8^TaDqL2lz~{KMG*dQ7Judw
zS;(P&HXwK{rs@tNq3Ldz)rqlI>TBk{Od5?g%Gfn&2&KMkXAP07aOCR<R$B6qoxZsM
z`-WbF?jmIpy%F<7<tLfitS@YOxdO^=d|(6rH6k$o0Nb<72N0cu^Aum4ny`oU%jqJk
zdIChYQ)o+61;Qe~GJic|n2k}z)Z~NgbF~IMlq_Iowg6Axdb7Nbb11+}fUCndVt>30
z9-Rzk{~Ru}6Rm$(O`{BcZ_$N>SO>M;8if<_u}IQ;&93R`V5R&{me|CYiP}60`#1%i
zD>~VZ;B1UoU(0;UR^$5m8A$DLf%zmO#{GE3QfJp-(Y3?aEbR)XV}`KaYX#4aJU01g
z1ipPRf}`3P_!<~c-;vQQ&~*q>TQ;Kma39`l2Gi|DB4&JP1_a`}eqxK5KeIf~wvNg|
z(j6rvs%^*QT}m`_+Ffjys%Ab3n;<3{aqvMB<+Q!SnV){h&z}G{4|C-C3!yUl7!nL-
zVC|(6Y_ZtLl0Syiw~)PP+kO_`azc@*`<*?Ke!_B0mtwb`A?DwWgHzE^@L!MN)I}9C
z3p2p%EGsCBXQ1O9Cz<&g0d<bN5933}pda}SS#wXb`KluH|H*+#Kon*zw_zjBiFI4o
zo0xqqp3F~_<IPnbvWJY2xOxS)c-f+{M3HWEMM1%AK4tNr*wWF<X=>^cEPotAI#cBZ
zuOwEY^=2Mk$0s3jRSDjRvF6vcH)+HgPYhf-jlLJIqh6`AbgO{SQaJ~6Bj?iU(Y}J`
z>l~=ynIgre2SMk_byQX@fcDIj81NS~z{!Xltd()A_z=lOk0AHhhq(34mUR6uv&vJF
zn9D_@eO?1v&EDbpGEc(OJNTTPKwj5A;521nz_;I6ku?B|#+eHO1Ue)?)|tXysM3x_
zim<InBZqq&`R~=FbklKg6MG2mx?Z9dy;?FnyN4e7@1`G~hGcWM8uuQ{)B3k(5w4d9
z4f*d#8kNV!^@bv_Y#A*G%cauQ2L#LB{bSPk!6@FXOcCza#e3ZZ9*5dcc2xlq!%Of>
zCy;v0#!+p(J8oUqr<~{>sK|~JOpMB-GOHSVU9lg63q7z{w-eg=s<h}*4jPK3s8i+(
zYihVcHq+&(;xf^Jhwm`w!(?0;`V1Z$O7JN2I?AV>K=Y&>h&I=ytE**coZf3Rq*USA
zYb%`ku7{;_ECeqrXArk12DaBm(){QQ=u5suUm6jw(}fcck2B#`OUnLSL{{tlXv{+?
z5*nR>>thEj$UcCrv7>3u7V+$+(T@M7m6GuS1t^xyrZ-*Xw5`fYp!LvHIQ?rIo>vW|
zIXCl3ag_@dw=JQ4J#Q(<GY`_uDKyj-0*6FR!S6qv$o3mf7vI*<9+{hh_mPo;q3;do
z!UkuWQ{zpaPalNS^5-}^IE`$Q{-dL>#Ir~7MM1phd2*d!M7)|5xjP?$l#D#xytjwE
z6^!Zed0To?7ei)cakTAYGd-@GL`!%*I<~bFO4*08`&*u%bYDDK6s6Kl`6?=tmZy8#
zih|+mdFpVfAoEB5kWY%GWjVfr35s=8<Do%qo8$yD8xsgGlxWV&7|LC>gd_}qGi|HE
zBt6@UJ-?d7vh+v7Ev*B?^%l{GMG^>-9|othLg;0$#V^aBta#KR1Z>vFs_0?RI+}&E
zw<M{f_Zgd3sE8v`IZ(Q>j=sG(f(e7~qIInTtO^HX&%85isL6W_pBIkNPru{CbrV+A
zt4^c*MxalTN7#Ek{9+aCT&n|n=4Fc4J;QKr-&wKe>;>z!Jp-SW$!Liiio+&P*u1I9
zY`thOduaI=78b*4#g;j!=>LMi^z&@r+agTumm)HFi&pQQEWK5L*Ue{8^Us!)rtQZj
z7K)b7ZLHqR0_SG_W`CZ1f<pH_w!JO{|BAm^Raa|cTJ~y8`;jE}p!(6LpB}7xaW3nA
zt&Fig+Nj*Kp3N@bg7V)bEH|Wq9XaNWnRCN%!ea_s_|p*OUthAZ^Y_x$rYmS|vxG^6
zG0dwP*^?M|R`zEIMm2Xbt35B-Gmq7*pzaxzE6p(ayEz<sJ<xMw77N-Z-h)>4?0#G?
zi!vI3rp@JSuG0)G9Jm0wO+&FvvYcI)^JHqrhp_d3!boz|0z`(MWyTu&F#SR@8z4Os
zYhD`QW$I8ocbLKY_q}6^(FI7D?1#c&Q+(XK5XZtRnR9|a>nvM<@HM*Fut@CF$oHiR
zEorm_=Of`kDES*H3cfq;5?uPIhV!>V5wlI1+8ZuF^P1R$lzfR)Uy8k7F9r1c^KzQ(
z>_hI!0Vuq*ilW|MfZ>!lL9vmJz@YIXX5HCK8<!QJyO+WH?f`OpFM`*)JGgMt4?_mO
zrlySvbold7vRP$HWBi;jJ2wYoYs27QKahS{45NNIZTJmzA^-Vdw5(_t$%R$G?|1{#
z_5I13KkOFN{P_Tp;1dpo_7l|8kQ6YJ4%{C{g+~(T<GO=bU8GFw%I&B_Q<BD-yhi;L
zP~U%7q5g6r=4}Zji(kWO{9G@jHp$YU_?yiBjRH~@m!l$NCnYwVr)oc0%o&>kyTvOZ
zb!{2$Donyl3r*T`V*n06D~1QJOgARTQDmPi1iuUL_iicHwj7~C15?^xAVG7pLr_|o
zjoE|k&{f|H#LPybWISG7{Up{ePLueB8_qu*NJGjhu+2kU^E$PY{lG5xY&e46C3#}q
zTMoHeQgrpvJFH#l2CKdK;s};s?03uwc4KKMC8Tx1Y-I>qcPysr<#X^py8t)t8_}mV
zQgorVgk5qCK<AEcaN8qA0o&f;Q<)^qS}+mPV&3>d=51WiwW5(*7vQ<JGc8No#EOGd
zG2-z8q$`+HY;-84m_BCBW}{HMeI9%wGN78Ej8Q9Pspr-lOm9y>-+C$1e56kw3rm@a
zTme$WSi|IL1&MY`)7nF_lyYl7a?H$-Tw;T<Dy>LqFvs3a>oKY0AX~U?Ct=26ypv1B
z;Nds0uBerjxIADVBJ6P}(gx8>-{Eu7Y~*adh6Nkzu~oPiQtPK8-*GOUyKP|4S31+6
z9v%8R<2XhvWOUJ}igDX7;%&`492%1gOTh%@uy`i4$K)Z>fFs=;Lt4_+g0o&DQJ=XC
zV*>3F*&{_U-SJG@FcC`>uUXZt31HiXUS;VSC!ibN1h<$V_U%MC`}*cHTmI`Fi;tMW
z<{X&{ZP!5<B>jLruJgwMu`laF@j7<0_A+xaRf9uBD+Vg;#lW;POe6m`Td^sf{ii;c
z8NS&MPgf(%+^PZ3<+)aqjkPJw<Q8lCu7C;o(JX$S8q;VW#ll*o@pj8&wy10@I_DO#
zf!!R=$t;5Rm)ER-_hXwS?b)-8ZY#%QwK!wC26lh-z#dNrKiq=N&=>RU6A8v%qoJH}
z%}OD?k@Y+_#L13%(2O{Sf<p^n__vnr4>-lRt{k>|&lt$g+k|P``w--3$$a9hpz8Y%
zC-ZDb>sUYCUHhD^y^sl=&HErNvlN9rI<SA*j)MBfEP2K;$VP1^?|1Fkq@V)b2j@|<
z=LIBPUedmAv2<#79LjQwaQOC9Scv^ryF(vBJ9Z|l#>K(SvxX_XyG$Mz#dG#_2`YQD
z9ZCU5a3eSzj}yf250oMyC(6X~Wir?@O<SycRY0j}GqhT6fu?_FVLB>kl*^{H0tL$X
zFb>~-j;D!BcH&jSS@i7rgAF(C;En1t7GY6~tCK;!8g8`0C>>+}ioM2*<T1Qm1+8bV
zVu@5LRjgkD6ZM58C$9>v&?5+Nw7|uzM4I?GhF1F-;Pv?xh_cSat&8XIB`Y4OH8PYP
z5sHz4i{ab8m;QSBkjdrc(0%tB6NVX*qqn`FX-x#Zexpfav}4eeA@(d8D5L*N6JB4F
zpv%I2lsCSJz55+aUSb-xYU%@A)=xl|WIGeb(y{lIxp<+q80lVLaJqdzOl?XKUw@Qt
z{d)jm#sc)H7o*%Vn$^Dup@!ITv~au&=}51ns@st;YKbF*B1QVemeFqW1QzA;0IOe?
zVw#gJb#~@rp0y&i-Pwft7oGUM$RD}pIq17)N>)qKsJG}MYw9RP`{xjvKHvuxTIvdF
z2B>1lrh~Y=w;i6(uHm+g3erzsAXm%t$gZ>@=Lcfl?eRcbE_DG79qUM;brPj_P7nlM
z)fSu#R-xM(-sD;I2=lvMV`zgXCB3hN`#>M6cJjq@x#fcGK?&qGBZj)Gl*#zb7bF$7
z!q`fe-Yb8_%Wff6rbfX%D~ObDP9`S@RgxMqo<hBoF<N^)76zFJ9*>Ns7iX#w<WomS
zY#PuvdL;SZ9z(;$GjMCp5L^;-XFf6EB-bS=h}*M@5*6NHN5CVD-*68zJ66#w?KgDy
zoIV})8AT%lCNn!dLdTgN#O|6&@-mgw?IsJIEvh&+ZwuB(UBIIM&ckhEG1b?FA<1$y
zwd^pZ*pX87Y{XaiH19{y!ACe+D)yy~JWl-n1IV*WB)Ky|_<G6^H8J{h@X}o@O?inp
z_coTNU?hIfRf2gE6UaAS2gir!VR?T#KJ00NqWvY@s+v!it2$X_X$a|dD$?D<>+nph
zUGCC8z%Jj~&FUI966fed=Vt#!dtwDWxfzV9=bBMD^C3=6x`<3A1$?$Y4E~fH$<9!t
z&Hbu0eL*Q+f2>BA@e&v}%24<^J(_sM13T-MlCh08+d5SdlQeC_>-;vXo~@(NNoCC4
zM+Fz<r-K5PKvAQPJ-MJtI!=kuIvt1Q(;nmgEeV<xpn`P|-S9Ch2bL~p>FCw(c(5r3
zGdH%PzS0JI$4BFD)ETsHH$=(u>oD>j&nyL#sUdYCe8xoM$d!2zHauZVOe9cnKL*<S
zdRcHmEk2DAb2O&e7%%pd?`{de=P@c!5%1mk*$db`CtvCvUJS*X<<Rq2BmF1y*~ZUj
zVJG@Oiq1Qp%eRZ;D3Os-kr6UVG;n>+7a3_NNhul{NTrlfN*aDisLV1lA{8l7BBByX
zMv^2+g(zuJX-L}7_5AG*FLmG7eXeufpZBRiyxMNu*ki|x+}t7c?H0~wJmIqgDbkoV
zfDl_v?6p{l@hSwtq)Yhr`7$g!4v}hR2U{3xik~@s(ET1qHR><u%7kKsKAcO(EE{l2
zd@Nl&lZ_X?Z=v>g5faZlh1Z^B62HBb+BfcjomLy7y@P0^yNcla3`u%gXGL4Po}rT0
z90gu{R&&ae()GVX=}$g=e>)y0MqQz0$$n^9dyvlS%F$)@_wX^5M%mp;)aIQ>d!iA&
zlh}v*w*qKpR}yU+h=S4T`Sfr3I=EbS#4y(ivZ;)xdl4V-S0jnGF7$^|pEW7TU#6rn
z$H->9E%HN$QRlvqlzu{3u)Q}QGfgdMqemFh|FuEn%~#ZG7zh$`DzM6jXDR*-u$Inf
zRDU@Q-C<EA|MD+oOCIKPCu!R8wTb6!%qZYR6&8Bx(&NoxuobzBZc`g_9T86dF8sn^
zVL9%vx=5Zj`huqC)|9PufL3ijf(d8NqxR!r#6>03r1KW^N<&Ftf3u4n{1-u9i_b%E
zbRxNCKj!b{3GD10hh4o=<ltTnU(P5v?5u^*3UhM5@|Dc%Yq5T`2nC)pftf-Ji|>n}
z_}pxgE}KegOAnJ-M;iJUB$C_i3>rH!nS}J`@w00jH3vwN_jOf)Sd<tUj%F0F@+Ml2
zPb9nZLui#vHu=AahF69No$E?rj~o3En(Rqw55h?0)l9*OYeE?F*cSH2t!P`6jxNC@
z+)Lvu$cIO4ZRtg5IFF_dt6}uB%n|>h@=<Brgbr^NL5f%?85}9Zrowz^t7hWCy-`RQ
zKbb0cfB)!&uUP8d!#<>Lr$^F7DEOjD|BY+K8M{@uI1rD@&VzX9+KE@e`N${<#OyiJ
zq^qn!e@#+h&=ZZS=s$c0!h4lv(gLOTv&eM+V)Qyh(Dsef@j|KvB7WvHw!9iMrFvPm
zoD3~$NTz>>zu;8WS&S;bh2SlVFtknwnhPh;L(5?lvM~*jce<#}Koh+M58-|%0rt9=
z$^X=Kx|4hc>OKb`%XymjONLWd>{(<djHP$uny~o&b`)F*rC;7L6p|kSk3d7B$-Wft
ze@MU=-qV!OrR3(H44D^&*!a2z|8gT~M2!s92)>i-j4Mp&eg(Dq=D<ADha{qs!OE22
z?<o(9o$v6<*#!aH!%0~64AyE&P-wvqs=8N?eW7|d^B@mdiO-md^GTXMqJbj2f@yqj
zIyoM;$E3-JX?^}Fnh;)0wV~%&p+G?6mP^ozo#nK9{dZ&z453kTj$!T_L+ZAxg|_|!
z7=&6Qu(S^?#mTIoS_;M;z^8dibo}j6s!M&zmWAwxf80V`xx{N#TYX&4e1@kGfnMh#
z=nlPsead5KT#qhZR&K<&sfxtj#gR3i=RFrmfO)SQ>U^glh~J^F);&N#dlc?jTTxt^
zIr}@*gl=$VR@m<uUX9(1!{R^LTdf2(DtZA<{gj1@Z4_2dSB37o98BWuR#A@?)*aD8
zV!~JU&expT&f7;6EJjn*6Y<62H!{77nZ)SNSi5Q%soiRV@~9&$U;8i~sPlKp@B~eq
zI}7{7(m3A>O#2gt8)-Yyytf~9Wf92L9!)n7IkEfcvWVS$1+&A?kg9t*B?+amoAI(3
zT;PJ#EtZ&k_YCuUAWv@9>PWZE#PKH@w6`q`r$`vx*M^Y&uuMeWJ4<U__K-`!WlTGC
z1^Hubv1yYPCa*n?$Ycije~B1-t(L6`J3$uxQ)p-GbGUBK0OS1WkQ!r5oPG$?FU-LD
zfs2T<{>rkpmLpFy2VMQ?m{t&po?8kK66s^7SSD%7i&4ZT0*jENf4)bMEs_kkfb&q~
z9QLTd{Y-r+?~CsEic+i9)VHb;YtF`_L1{EBFbyYvbaJ+i&+iVok>>(y!3^24h|fKR
z-DzLoHFCe8T(O+0cZpNN<$64leS%>@=Je7?n`V@c6KFLqCA&apD!-;Jn0GspWQ}{V
zZjl3hSy9FBWR`+zH&ua5Oep>Jy-VlPj$&*38PpCYQpkw~q}9M#e4)>*`jfEW&XfH#
zbagouX#c{3eN$*$ViGbv@@WLWBj-m*3#4rFAg$3%bM$A^vhuNH@}U8lm4^_~sS3@%
z$H-^M1i>O&M^R#P1jo9KNoQ*!ZJHfI4*pVtpd&|l29rN?TbI-21wHi9WglsHJ*5n#
zr`WR2h&u9=NX7gOZP_^rF@6mwdF{ot@_AjKu>&>EZ5ZvHL@)1^v(oJbxZTB2qqh)w
zS9u>iqXAAARS>JX5&udbVy|%*LJdB!03#*L3FiG@*C5JNe~r~wBVan&0Lx^7%%1OT
za#Av89|^#O8T>vQ?Z<viv8B&`GvR)z1rarz!O%O$#&$L`XCq@YJr%?2z%bn2J{f&S
z5@CDnE~XC}qC8Csefm9Y(nou?r(->>P7@&&^&rf<@CfZ;nJkc>dpjreLp2~56OV6Z
z@$DP1W!C_b1D&X~<S3Sg@I0ceIX1u4LmX!*vL2NoN9+Nfdddpcg(WkWI7M2-ze6{7
zN6^~SKIEcjju#E<VRX70ejmG`r6vZ~1rur2b7}fGF%GpKT&OrTik4i+MExB{5(zb>
zn9+v>zEi?UtavmjEULnfxnc0U+Rfj+-E_*l2D;L9l-gLtW{AuaEc39V#0*W^weBj`
z&G^V>YmbH6mOP|XN8|E|1GM0I44kf<L2T$-+WJR?%C-k0piYRi>XmV<do8KaTyolc
zn-&@t(v3=0o?S8_<;H^)Hf9C&9?WKXS8Ykc<2v^0S5r#DEIK>;8H~EVV_}9X3B8jf
ziNXl@tq4L+ehM9^mu0*6b+G)nK8VIx(X|<O>Dl2ncF@EWvp1f^(_dNej~782|F@jI
z?ikNv{=tTlYC7=Kfl6L)K!)N$ycSj@8?Rc@J5)%Ovp1lo>lZfgyy>B;iF~d+nZ}hq
z!<^Mw7#+~U>Nji_%&3on*+LysT)h|$8>d2Irx45%-r>@iqfpJ&BkAgR+*4Re+SPTK
z*Tc`jIS27;MK>!O-NmXRH_+1pJqnZ>PP68xQ^u*Eto>OD%tSdWzQzZmtkYOskr`61
zJJQ|rzc8)afehcSrS@TkIQDZR@@MZrfb3klJ|c@fT6rCp>h2ghY!78smM_?y4^e1y
z5vEy@RxHCJn0+`}!3vu4S!v&5CS*SzTb^8G4u3AQ#OJY?VB(KfId3+_`W18it%`Iz
zP=ydbWMv0fhHww-iOFRv`r_H&e;SBtzsFAW8o_7Nb)$7RwJ9Y?5h*_En7S{IEwIvN
z3f{I%`1)g(GIl%bcs3I=bwU{ZkVnwH)tEfLo?Sh)gPoh4$YMUeG5Rw7Jf;>c$A4<G
z;d(d{FNX@)Y9|F~6>fmYI&FOSD>7=${>@7I=i<-%tr%!E#L-|2yecndzn4E`d3F_S
z^ix%6B$YE|&Z(a8dT-R|e~V=W=pc~y6RT}TQ;G0qw*7knOIR@!-YO!v9-hc%`|amh
zr~AzF{8$`WbsK|{k=SgP%ao>QV4~Ik=RHoeVMGH)?UF#^eI3X*++s)OUS!l0iiVL&
z82U8{Y0f(A-qs1^)cl6si1Ned`EQu$`4lFxb|3SLZDUH+y$lnT@cGth_WMaNYV}@Y
zDn7IEZyTAv*$Jj;CBs|}|HX|U{!Mydh>*fZoJ*R|Zk#v9=cjI1dv`V(@?@Bnt2}gE
zR-)cB1aW5WsNdp>nvD%?LDvhm$6^!`&3EGb?Ql3GsZhpXCM#__fdA^RW4c-)eXHLq
zcqGHw0G}yP_xKI9mPF*2dhpr*Fv>JEg%zI(7w;WQyLirFm}&|y^7#yQ_YfK;ZzNd!
zJ%zmeUjk-_FiH0&*bQ|&-#dWFgZZd%UO+=nUSd=7^J$g164iyu(pAm>Fh``G4Ju~g
z$HWZWUG)^BW;Vg6j%SDd%R|(zXfpit4+s0QaIS4EOyY&GQuG(O9}}k7hd1&1AZTSy
zKBC0MNq)!-;=5pI20O!)qUMr&_J6dxMvP3tnqY310j=KS5YdXqg|kFupAum|;y6@q
z@zU+tZ`Pu*72!t4v}-)S*Jo`O+}LIen~)ULNDGl-Rw1tbJ`S%B^7QMvFxn=Zh7CoK
z)wIp@xGxdye}5yzO_^riR24A)c)HL#gr+JVK<_bmT2(#^@ue!1+;s*g3+(B={5K}@
zqJ`SFenN@A9wpyD4x2OX$a|213k}V<I;{imI(e_;<w;0w5~k86LFAQFi1?>P`0tK7
z@|vfhQ1%hsNSjB|8ab#^wkD0!vG{XKkwRpWXyA$~-Q5404P8uB`ZR}<Crzc;H>YvZ
zPmd0@`=Ex;OGR7SF`K{twu;@%Yj+JIiYKrn*B|Ug&;*EYx`<`Gcd<qI2z$F!4lDXq
z@M^LE(q)&K;)zh$erRJQTCxzUJPyO@&G1>#z#c@JK`gcuhp&avZK}q?ibb%Gf6pdP
zmB7qzpP1s_v*^+@fXd>x{LU=Q3a-o23G>NN><Yny(}bmK=QCTE{meF66)%DZ*~}b!
z$kG>PR<Q}*N`nYCngXGk29~$$2J_U^X9w=sl5%S<&aFR)kGpx_Mq@J*(cv?s1MRR3
z+JLqmGj=d(2ll(iU|NL+?R+Odg2*m>J1YdWL|gbOjfYEM6z3qNAnwlRYQK2z(IbsL
ztu%s}juy|*yO8|DWOiJ;j!mCv4b!<}(Wa|_jeD|?vi=9lsoH}4&QhMe%s`r37wgin
z#2yPbv_CVa8Aspn)~*VQO;_OX{NL<!a}F~(Yy;~}-ta!Q0S<whZ270TG+H+RGe@q)
z#7lkbYj6nL>ygOTx!6D}v7Mzl2Erri7OR#_#ofFx+)7u#N;VumGj}qH<Vozeuqq9?
z9fXqjxtP8E8AKkuX9wS9!{|~l%sAtIzsr}6e=G(U-UF$(J%YB_Ab6;5M#;)CXkJ>!
zl+XC#alkGpOk0c0Oh!_gA<T4`3Eb_^z;u}vneQ#2#E^F8{8<aRBR0X)XC4;KRKd=7
zQj{NTg|!HUek!loV*bJ7tp<F?MdHN3eO$hIm3C_`qlOhJFcnS3-u@LReLV{MZ=J!s
zJ0aL1dI!%UW7r$73R+%y6ux&l@yAUI!BrpFjc^@Aiyy}inT0sI=sDgTS`XXFx1q#)
z>3=S`qV$mt)Zb|!C;SYneiTj1trW;{J<q?_t5N^8Au!+>D64y)kpDrL{(Sak78$9~
zKd4D@%M_^m^CcWF?Sj&~EiijN6Nl$+g=cFH{0)xcpxG%3fBlpF{4<C59InCLdaS^w
z#Z(}Bc?OoAd5m=>ZBU$@i@nd?c_!1Jng<%8@MZ!HoI6JkRdcAPayZ&rj?&iKiIleZ
zf*}2163tqoMloaa@nqc*RFwrFP$(3^<@)sGg)Xhse9uzX9i~&)TBz<)BVuZ&(#OUt
z%ui$+)Q_d(6Zf&mUzVamkzdfMNx-Q0$7y++J0<_!jW+K%lD;wo^LIH3GT*q9nA0Iz
z%{PW0u$Li0OdF;9+mm&q7pe1ah49%gcz^dGS^cLj$j-5#iJSw_8FL38hKQ4wv<aP&
z4+6F})0!q_!MR`aaPQU&9N#S=_-D9HFvUMyaMz$2cTW6<Or$Qo9JYosq(5QFs3n3)
zZ~sE%QUZ;9tt+@V-JPEPa-!$rTS>^Xg670+5iF8%6L|X^qki`y8gb5r{@qd|XFFxV
zNP}Q{)7?zzy*=>X)+6wgOd?Aw4*~pDsEA#mJ^2YFBXXP0ol7Pi@tgF@w-t4()CCS*
z7f7ggG#Sp%qRUmG@G#-om8H&tjaNMc{{45+c_2j)AQ?t$G{Wc_J*4g#i8Pl#?>m-Q
zQ}B~9g7y_M0^LK$>Bd4=ihH$@!cB+M1zRBjTPq`2JWUSkeRw}8dNGT=+QSq@6!6`1
z4E3wer$<u$tXuXKOK%v9rEA*QyHh8bQN?>iX=);usbPXf0W{~HgKoklw(*}W+`g>9
zN-rM@ly1Q5m#XM7UyVxD$4q%cG#jI}7#DoiA!?+H*()Y9C&M{3#y1h+<}REc|I3!n
zPh+0r6Pf680ZO)hVwWpCFul2-WgpuGnM-@|L{ks3B}J^F_XayK%!J)GSEomVyr%D)
zit>T?m@4;+nHz3`ATStX)`w!AyaRigB!b5AJP)j#fVmm}VO(ww?q-X^QRg<BYR{jc
zJ)tO_vJkV!kE9R_J(g$il?|F%pnmKx+?*9hBdo78<9Bj!&5DA^`7{{oszKGF1Q$2@
zAT>V?v)X@P_}MX(_wpP2mUjf-*;%}fils?Qz9PEzF^2msN6)CO+(qMqSnpOGPl|(3
zYXla#N3#;?Fp~Zrhxxo8CE4&2<L3x*Hs~f3GF%A@70wh1pTVTHI{0`{g&xW!VgI}x
zXvx?B@x31SWtYR2l?%w!Q;c3qMdI*k3G(qj#U}2{fb+u^yfWk2+YdXr%g7GP51q&5
zIeaGnXD;1xDTP)u=V%7qV5Vk@#L57A5g7sRR$ba5Y{_O$RL0*&>+oL8jl?_?Ny9+|
zk)NEgCTs=FI~_4}=3kb?8Ozk80)&6wgITA0vBFc7`gJ5R&%y#r^IqfTrxWydmo(iU
zdYr$zX&7T}iHu@xC~IAZQ}%K=Og@hFUt-xKgXJWroCd`gw=hoG3mcdPrsub@@Jl?O
zt89WZImy`DK&bAmhgs-LRL7g(_F^lnYVBa2kw@A6H-V&A&<ejDL8zBhp#23?v3}G;
z&L34HHD?(8cBo@A2QNZzY&Ys2O{epA*YU|mgmxrN#EStf1RdQB(^=+JBw_^z15vtH
zyNp?-x3l~KYuq0_l7xh{$xNY<JzOb?$y*e0W&Rj+o&CuiRXZX7Y&dLh+hgvD;S{ak
zfZPLTm`L<0$SQ6?vt1aOr;Vk*2X??{CeJ&+@rS9`4|X`@67Q+&L;ccPc)bi^KOEQ7
zyYS6;G3FYk7W10>+DDc>F_}r3ah}=728|VGF}1y#HS_xKSO&iz-sIokxGA`3mcsfz
z?_rZ3$x$X}skW|JfJxzMbThe|CHMs4$fnB>kxa)+^YyI%<s5W#_s}li1gve$!@t>u
zC@Sw~BN`i-e*aoH`3>jZoR>J#U_{Th%w_Y-q;XD6f<8%y&>Ne3lr^rK$;W7;*R%-L
zBTEsJvjd@rw$srp4;X9^rlI4OQf`+ql{`F+Bc8WVbF>G)Y~tzZ_OsM5X9LbW9ZQan
z(oy9<3B%M^(_`P?c<^it>8m%gX|D_f>7_5B)2l~5`}DBq@&t%V7(i-W1<wOLKr+w2
z&$|$Uh_IFPF|z}s9{dN9M=fYiJ<lTFer8V_&(W)|6Uoy;l-`W<AhU?g2zHbr$>tw;
z<=qEEqoFu<H51k~)9JqJ7((xOdb<7-7ROa$slNq=6!08-#Tq&@ponADFOm9Iiv1dY
zk&T=<8>SAYF~!Y{-udojCQyX$0tJZv8iR(A2IkUw6c;)<C%;Y%>8J8QymuuOJp|)q
zEYR~K3adseq1v*1I5}wG;Co9vR-b@5S+AJ<(+ik#rH5rJ_hOj#R3_CfNd>K=;56hU
zp5*>ybGRSOdB;`e;y4n+%A~l@WD4GtG%}B@O?WRPLTQS<{GO%=+f(_><GKq|?d`@K
z^GuAsZHNsE`CMrCHO}-pVd6j`Dy6nT^TK%cC1N3ZHXVeQ&@aw2+Cew#A_DZv*wT-q
z!DD48-(HFOdnwqd6+&Bz-!PB+%Wy6F7_6*!(WGrv<d-rLg-cGtFe)BXCZr&s5*RVc
zi?;6IxfPR4^kgd2^rIVTaQzf`H66lJe+^PTK1i}}4ap-Ar1l^lAJ(sdw2u~A>xa;U
zwg0~}>5#_#VkUp-8rd6&Q)k5>{~gDX5#|X8-A;DwbuqjRBQbFE0|tL>#+zN&q3h~M
zKQ(<JR4@aEF{*f*8OPQ+<kIFO4cZvj3U9|b^wcK;v!3dcfZwSa<27mV$aidQW;mwa
z(WNNmJ!Jj&H7pOP(5o-oV10QHI-C>X6RAnEnbNf4<^bK)GlhKkIJDS*!YcKA^2+Wa
z#n4$0y%mMPO;@m0XCJ0YFU4}7M|52{6*FcxL*lRoUHyH4emOZpEa(~X)~QpL;2-s<
zIn$LX`>8Cr60trWFe$V^(P?v<e|A0U^3UU2%s<xQEGam@QlAF96v%je4kkpMf(Snc
zY<`N6+^8WGmK9Aq{5ZRs+m6n6PGpjE6hh{U@w?UvVp%zCgnl8}*YBZsg#)P1%cWI?
zoLxRIOB*M;lkW*oWnV3uo)C+fmaC|^eKf`LUcdG+9lA6w9;<J-<3F(+>>}P9@AO5a
zMhO{~x-y}Jrx@7t12J}D<Pv^Pka<%cfqOaQ^=ATkzkkg0MIPv$<wOS0i*dK~8&-U3
zqUB2Rg0*{M@%N%8maSPvE?3h9tChPbrC*x*f@5(1?{MlrXMhTQL&~?;rlM~Sl=?k`
zGgYC2?w}edR*a>6%260J<b4gEA5Z(*fL9UMV3DLv9v2+whTmOy@meY@eIl9kdc(7N
zG3<BzVuPPrX@RpjN&bi<uO4SAv$@A4tfMJp?K;Y}s>FDo$LzI%EXgk)C0MXWK&KbS
z(ag!uXkYwLvRflUSB9rzyXIST<rJWBoj0jk1Tm*BZ`5`D;B&D^I%o5fay;Y^c6=qm
z%x)w3!#T`<B!}-EvuJt+&%_SR#&o7h?%#~5D_|8m#hP%~rW*!Dih^NJmB}di2I^a{
z;`G7mFe{gU*Q5#bc}P80P3lBbZa-UbDUtSBr6RdWmP&3MLwVLr%v)g(vCJn(64?so
zXhQy%kD?>Xixv$`rt7)Y2r)^7IO*cq)lO!5?;`ccS<w{kx>&CtLdVl=5%}#hc1{zf
zqOdM_uDZ{>ye{Hip&KdQnM(3|8?a^QMZ7=Hv+##JvB&T-B!yBaYmYv33@X7lT(GOt
z3s}63CLSy8r0v&iDMx5Hd}YU=k#i89FD5`o`~sU+bsUacD%e6(OJoX9pdhI`a4+{|
zrs{K``7{Kg#Vg46%{Q#xunWcZ3s4^Vhp}}zEbiGtG>Yh8XG;!BYHe5npC|2*;xnny
zL6A7B2Is0Oc79$Fd)P7&Dc3r=cfb+G&&6?Ja4v*{N?^nDM0b-Kn5|d_3sV`-%KI$n
zuyhJECb^<IK!WbBX=M)j&#-UI9<=@4kDB(SOq=t=MQ-u<vTYcp_Iu;MPHzNw)H46A
zdXO1nhmW~Dvu<yTKYNEz!)5_<KRd{LhjKPPw;A^e7Lm!np{S}{3GwcwaM?Z=hYt5K
zY2QF7S^i;{+t$Nw_%_NsC`Gr|#j_ul>yb6R0v~2a(iWaGlT0*+>R&(Z^Qy&x2WzqT
zKoeY-&w|9_8+erw!`Q=Ex;A$@f;C>iRNWr>2K~%fDVAj?k3*f|I2?DYgz23z2-{x+
z*?G^nH-q~ldG6@jh$7ZM_6lo}(5LpOO6YBxhkue1q~n;+?#&3rzKz19S{9D3<!NlU
zpD~1c@8G~!5sLd&0@LInq_$HLxty>3`P&UmcXrdNH+-%f{2AviDPY|NWvF^zgXzUW
zIyy{9VC5gf=UP5s$)Pa06N)Fpm*e*>VcNa+GmDBSN6Yhgk`fgsiE$EW7{{4Uxd-T~
zh@>UXoA|s=k{&)gh~r*-X0=xu%5`yYHouN1HL_$`qr>mF&b0Ml6Fj!7lLB{0%rH5C
zOVUQT5togBLEEr;xeuzpTA<-l9VS~&p*E4(uuKfVJIR;KYDG5tdAXdTjfLozbrNJA
z?xqc^c_zza0&V>I4jC32WT|tGeHt2t;vd7Qb(<qif3yj`CFgKAawZg(`{VJ+2z=0Q
z;(n>w<R0ltx&@rKOgxA~{9fI(-9T`3uAbnP-UN(~UWw1!-s3{!UDyv?#?Z0?vdFVT
zPuB#}tJqJ?*JUV4<_~r(-atbl#!*74kYHT9yuiWvCDLCz(n*<4w07*rQ4?#LzoHKj
zDQ;Alyc(vD=Lx2N&ZPBLr^)n75^6U+#_2W%(mXes7LT7qIRc`t30_e6>OtZMIn&*f
zhroN9d|oshAKeyVL$ZqCi`q7N6Z8r{?g$HxCiC9cX&WkZb)vrmX|zkn2K{>aq|_Bl
z54=Kn{^2k8IzE9x^gUd>bO5!wr^!KvXzbY}QtHqaNPX&Jv-Y-NpNfKD`h-pzK1;}W
zVr&Us-c=@v6Irxus4rOy4W;#slLc!v97+0bI=znKbFyjLf|6T1NvTbOI$B=R#c4%?
z$BPODk9^gsAoeC5@4i6&247%%Q%~S%o=+cS_R*M|7h$|}x^cn588las^WuY5P?IpD
zMSklkaY`>WDrVAvSOcX8@1lUOMuLKhR9f>Xkgm^IPuq*;!f*OHTphMtaH8H+5YFc$
zhka%UoNh_ctsEy3bIzn8!^Y8r&TV+86iv;+2L;C0-Dvc84Z(8g3Iqqz_|Lc(>1{N%
zNQ(=y_kUvF!W)r2V98SY-muz#lBk+p4HM2MuW9yVKU1n$YTjt*r%PkOib}S~BNS1s
zLot~9k;U03;rpCqj9!?@hF>>;qwz`v++0q>c&6Jrbrznym4foPdiMHj7kl?H6z&ds
zct1QF4}ylX&D;Z%{B;z*=mtY}*$7yT4`y+(zHG^wNyt9g%DUrR5wrgW`?GKt+WGA2
z_*+Tr|5(WU!$sk#ros-UeS>LnF&uAk=cM9A6!e5JL8LvlR7E3<=VeOe28|wiszD}n
z51cQZM`Pw*lv(Y8`K-@OW{3#7Ux_2d!3p&47u!|O`HNG+>|%2QQ#m>g&T9*BKfs23
z9ar)UVIFhbA%vn8FPOusRJQP2B-ZFYV~R2w=)GEw^Y4S8qrlI<hCWtv{s49|WBS-x
zf|@&n?2efSR=;g#dN!fVe~l%yokZa`!x~YhJx1QYmFSnA7z~cjK(BEXi*gQP>0;Yi
zyN?h$DzCAiInxpSw}^GQ@5h<cFwEG{%HCf&$FhDcW>piVnPizTxmd2`t^hUMDtmz*
z`PEENCBU)gn_x3sfX=r|*!KA&Fg1hsM<=JD^pFKa(+|KfyN>m3DrFY=KbX{obeM_c
zp{_NPdlT)L8!ds=N+~jI@*zFFd^-N6fvGncK_YY&icKtGJn(^4y-FkPq9QoU_Tx{$
zG<sMhL<?Sdz^^F``=)nd$+2!acqxt)_}{)r%8dJXcx|XK4Bfq-(fsNtoQf`Ct8Y20
zb$mnL|E9rc)EBIIx*PiE7ocJNNj%W|fu33cd_yG3{n|>z9_4vx&N)tUOoMA`Bt{qZ
zuzM{DEKIF}hS*3@j&%a6ah4jB$KYl{0r<)^_>~s%9A6`QY#xi&&DP|Zc@JW_a#X%)
zC@l@-J&#}d*mU(RHWoj^Q9kqMdw)rav!@+jxqGd58^l-3ka6l6YF~Vv4Raiaa_v<p
z*=2=l5g8OWa3Ax#;h1a_ff-hcBsE2X-ZwX~M%_qk{FIA--sKdoHi5<$AA;S~a5(kZ
zASZ$Inhx9pQZox5nrpCUzdI|Nu$}hS2E*3*0!F-;hQ~Ia*e8`P_TRV#Nc=M%dsDCB
zsoXrwUvdJM&i=xG>sKJcM;opGtTF5UI#whcPU)*3;oycWyceBBl~t$N^;3_yBefcR
z+86kJc`GZUAlPs=>9oTzLUt;e|7BsdJ>U?w3S!sdQTMu^f7^oLl@&~_{(Bgg)6hI0
z6D(Y_kdFCF3It6Gh~l~Ql3QC)ofm_EV@f!2V>XG^I>UI<WP0JXmtqr=sUvq74oFAA
zRL+-_O^gL9Nk5=$#HjAb4hY5uU~$4;ya_#rcZ-J6^@mb4byPje_~Avz1PjPvc_mc0
zuA%<-#hlmdVWacR;AI{S{omF2^H-Wi+a}{*@ETe)y&KYE+i|LFI*pcW;hqu&LB#m^
zq%?gVU0Wwd+ZIc}dv7Y8H5^IZ^I9P5T+K38$Kqv{IXQUh3JPaN(U!*j)cEQG&O|=O
z+@@%_ciuyQ!&7)jZKJV*bhfm17sf0qfz-As<QJwW$nBko%SZR4nw8<ldKrpc8jFzU
zi|Fpg6#O(SfW(%O+y!==viob%sgr=gaYhudZ;D{3(`qWJ5K#ZEE;RQwVnJFck|J$L
zZOm85{7Iz!)2^`MAKb4w7=Z5y($t^Zh^wzlnP`SC4D9%B_PmSe@X;d;(|Yc0G9=la
z96ICr684G32y?r}n(b|v#q;xIA3lw2JoTyi%zkov_l0TIn^2AB1@4%Uq@y-N;h&;O
z-txXQusw=~_*cO~<0te*PM~x0J!pA$Kyl`Bl8_ilIUNP0Lm{lIFB9X9^6)}0hFZC(
zebIz)9N2Xjms2l6EK!Q|ty)og@FJyro<>QT!)Vv9#dJU>mA(YMMw|Z)w3ypb%={UG
z-%Dd?_KHz7P_UowkBor__jrs=QlyKIN70jF8<Ob0#7-PsB6!m?oB9<MN$f!qqN@I|
zfK)#SeUPKuY(JE1qbOQ<8C-mJk@4Guba{ggKLg6}L?s$a5`0)`<7s;GdO106kD=eK
zRiu4{GdiPJ(F2A5=!DL6di_}#bJQQeZ_{WBJf24Xh1t@6bt4*WBSWrFzN0b!IjlE>
zF3;3P!8sH9x8OB9d2JF-o`^+2NC62JG*S14g?KNq5ogCo;MVaVJh{G-^D|$eB5sZq
zCj&9S83f5s3e>rzo23uS!#af){{QoZ9xw5y*MrIEw!etm-E(nl1?O0;iZCaKyVSS?
z;c{6jGx#!-YWd70(EkTqQsZEHo1w981ZEfUtfTZ$?gp2qj)Q9<tX_gycO@wyBNp2a
z|7C+C6tO(Yg<W}lm2P|<$CshqgSzBWa{eKTq6gCSb>RSV?X2iZ&>Hss<Z+%El%PXt
z(&TZN`v<JPVQ-)@+J?E{!<=A*HKxIfpIeslA$0Gk9gDtw81tpFQN3~~HE5iq&q^=Z
zT&sl$Jr#kKv*Wl|#~2<tL{XyKaYXGI9!%d)_e$;QwEJ(i<=Q!3Q;AScv^cMQ;;7r_
z9mGCzzXkW=sBm8M=K~3vdMSdl^LcnZV}QLf@+F<$4^eXbHIg<<(Ie|2I4(W{e@`F8
zLf)?%!`azA&S&60Q;71_4M<cp8L4-Su;z3rq-@fe!l5fv%^lYJ3P(_jb_lh7zs$x4
zYtbQPA*$;yK;E_nwzc6D!pd6FXp~MWwbSU7_Zk|mG##r&Hz4=%6__8Ap=2><`th)X
zw*SyX&XL2YRvF+P$0Y*YH)SNRd<MxEPC-9~JCAmzBbDdyQ~X5*gQKpXeeMJjR6eBe
znMx#aHyuv8vb1x^LRz@=1qGjIA<gT4v?JJsMjc8)YAE;c%6XGO>^US8wP>k{4&0aZ
zk!0T@n$np^3Fal7sU1TFS95UYZ4lk(PN)~E>9i+z7lz*7L`kbw)7m|k&?1(H+zs4E
znk|C35z!Q(_@16<wBqhNRl!e7ISSmZL)KzrNi4mQh7>NstCmdU#m=T@+q`Jmc0GzO
za;DX}MQ}ID$F5|44!3zw+B`Lkddl<4Mk?&$-%o6Phb^X?g8o}Gh0o^yV|)oD`_!wC
zj~cy9x;lh)sHMT@TRTfDm4kfZb%b+&`lBr+?8~SB@HWyL54LTfp>F-499=ArpNK<w
zVvw9x!G477LzaRB?zl$dw&yH%XX!X<Uyy;{Wud6ZQGw<_9ostXIkU+6#b%5ciqU4=
zfqLlytJ}E@i_4PXRizKv`TzKP*2^xGDl>l(&fhPKK(v%OZu>OjiQ;#*ygCw{P214a
zlL+0_6PZev92O!71J@eSxn?cmO8LEN=pcI^`Hc5Dw?p!*J4&V~;fq-b?1Z;6w*x|`
ztNsnklP__{Cx-sbJH`H+F2LiT_9)L)MdXyPY%TYxUG=`mzIYtL#$GG({=vUr(XZH*
z<{0jiJPc`(>olx(6W!+X@{pfA#}pHdvo~L{z@}uJbTomH`gWM*9b+RVo#MMAmLX&3
z6Es{sg2Pc#aIuxa>e<?uQ#AqAoUc^T8(^bd-=h2FPjt#Ifa8dj(CNF#WcuQm{@ViD
z=M;z4cVtnSp+N6<++ybzuEFPJT{u;}5i?S>neb12ToI8X$2o@)FgF;+@x7S!eiB|y
z6~^#=yf-;e2ftxgF(hI$1wQUz27j%wGyV{oEK_KQ(MJmSz5y#$C9ylg1~p;B(Hw1v
zcc0f$z1L@U|7#*HS@K%$PASfQ|HeeEy-@tA0)3@Czd7qU&$|%4%9;x8Hg$;ENa7-A
zm(7n(N2vG~C@z)9W;#PV|1KfvB}#Ou%pM*4GjQKn4M_*`aWNzx-^TITx>X_jRhEsH
z2Zz(MsJXav(gC7A+u+~5m&v8CqED7uR65)NE=xU0=|UwGf)Y`|UA|jS$&#k;Wwsao
z&^;|n(~3(m_-z4lsw<I{F%>KQ`Lj8A1aW7QU@}jcdbfno(bhJWQMChaxR>K*@LjSU
zIZg0kkvNP7HejsS7u28F3$ut}IQcx3u4Ju5{N7rqNrupa$G@@8IvcA$eurtLHr=)z
zCeV@mN#|2;z<#kR9eGfO>k>N<{?nGTb+O3L`;19R)9})BrQmq8Iu$y`Q1N$9WLn)v
zO^gI>RWP8hZ39>|z`w8DZ@)rr68)NFO6yPa?@K}%9<^#=_0I{gU)4{66JqEP_nql~
zEu%bx3`mBQ@%)YiVL>0-j8j?G8eRIJpGmKjCXmP#IeKI$M_vVOoaOVwbvGG$b-|M6
zJ)et`5EXi`rG;HxFcL#_mSIuREXtdDjgDIXVHz)YVodBd5cj=pZ2rgo`ToKxUxHr1
zZfO3RM-8nyq|n*KoHPSayQ~`BbI(!#XB)cnF9J_OIB$DO3x1NV%r$vE0!4S@=*JSI
z+U{V9Mw{v8$0OL*bO!r{U2!Z_9Ky!UY<;Rb9;nKpY(xq^RXN~H%|*D573UAra>Snz
zgYu9z_T{G|BijT@=6Q!ZdoJMf&rvk1wTWeK7NWO%&cpG3BqEMEvFU+JK}Bt-c|V2f
z6$)_RDreuP&%~Pg#kj;~!TQ-V?EO>A6h`~wSzM=)(7Xs%qsix8dgtK&U5Hd?MzU9j
z)0mNm5O?ZdWzUz|vioux++TK#nT!8nv)32FHXss?&t|jIRoB^DM*%|h#c1u=5S%o<
z#mb6=5o0O>5yu#26Cj1r&u_AYSL>ONTep#_hd4zblSNm}!N=M%7BD%ST`u0r7G%C+
zC5kKA!a+H_dsE9c2TaCu8B=6l3}#tP#calK1*SocY-<Y7jUC`O-TqJPsQ3a{YWXsm
zyHdE;y##hAW?*aJX`|JlPgsLpDDZnczGWF8a%?HSG}N=1r6DX~aw+!%AI6HTr>xW}
z4ei_?dSJ~pb}M)Z`ddd*-&0Sz+j^HxI>YxPe2{^Jv@8Pebh3~myU`(@!d|Z*flSU@
z`4yi)lWHbATsayGOhVA|VH#cG8x)6i%s~78DRBMzfE`yq#p2ww;cm(IBs|@Y8MYqG
zZ`UaDmp8%ljP-Es`p1<2^s}ZpiEQ6hJzS4|!-9(qF!Nvy%WZPRBkt^Rd-j11-`B@8
zjkj4?z((fcF^l4N^1TBK4RLROE)>k_*sY2qe9QV8{FihX`tJ?dwn>xVu_+8PABRxz
zk&&1;As>IZCwb^?X-NO~oBc|u#dB8?+Sr^-TZf9G=Yj>EHq|5CzJ*RlnhRWxq@gYM
z3dVTcLwxxNvU0zG4=1we%d=X%?tO*WDhHmmw<E{1MOd*o6jvp<^JvK<&Oa%V+slKr
zNB<7?Jc~r$U>H8D%_8xm!^t7rfRarOVE<lA5dKM<?q3X`Hl8g_p4kEg+r7B8(UKPS
z-N9eY$)rE&CmVG2qkE4xkr3;K-nv)N3RcE{(g|!4&yD4bj-g-6s&KFFAr0pBVE0UU
zy1dJtH0oVwaEAlN#GJsDxcOw=Rzh35Wogl<sT4J`39s54xW}sz2GvP4czP4`TrH_!
zv<<UU6oUO5FI49W=uGK0dYHNv$^-X!<~Ise594_M#{~-(@f|4hHzMp<GN#lKi5?h9
zYh86<`z#INog%cz`6$`*><V8dhC`fJV^43PlshK=t9pu;Em7#<dFv+&t5}`taT?z4
zjTH^wkaBMa4A<;OXwzM`<ncLP1M`09Hc^te+R8qBw57N8zu}!nNTr{c_3Rm2>MxEP
zYC3dz(sO((cSYIGS)|}315x+uFq|z)%)JALRQlM<OQ*0`5RZt^2<k`_qI&_NB)QH6
z2OQk7dSnQueEmy>O#-ysxq{}JC{~kpnbjq6&l5kZ_jT=|=0wg3eXv7|fY3R~8IOlO
zVN;fzhsBF)tYN$pM(IqW{=fBbJ>JG{b{N8A<W*$E%%J$F;S|`Fhn(1r_#z~Q7QZI8
zRBAgscoy#Gi+2cY+s*_|!)T|E3#?AYftO-9SOB~^c7|CRt%d8S4z?zE6DEH9!{*8#
z#aYfr4;P5zyQ3cR#+_x`->Wd?erw9N%fg`e4$P9~8Jsv-9E^UAJL(B=j=2rXt4U1L
zllxR7_}+k&g_Q8w4oCmFA^SrS`|Q2|V>Sw4Wbqh9k8|0Qg9p%l<&e?b)0XVi(hw$>
zVh7jQ2;2;nVGDG}vht0~n8j8bR+4GK7G0c+7D*qrdRhb1YO&;bbaBLsim+`Zq3r!A
zA&jt%N8_G_IA&DN^uJ8Rxfh$+xvGh*e*bsYXKKsT6}hKL`m|B=irrYeB$IW{dc(Bb
zT$p!_HY<89&T5`Uvxg^LSW~emZaC|+lWL-9-1UytN=#u{Qw*53=r^O<Er*TLZn@xs
zyeQf#)0y{w2Owo4!#<g1uy^}zv&PXMn335Fqmk`F?B~1h%((j(lMDLFhVY(RudRR?
zuAj<OpKNENqsBos=opR-_odz4y#8ACjx8D7j)o^kXrkFPTDh#AX|<^#)mjZ&L&l=3
zQW+UT_EFs*zN6-BDsC(iCciHaFs@e!kM{6<QhYJOkwa2DUCH9zXUIJ~1;I)ogpLrx
zyQU%ZP}dS(_fsIJ5YF@#$B_IT5vtnWkKsvEVO_r(-;)Q~g^8E3*OdF(hQG#VmEnkd
zmjPGH7f@O}5$SKgvmnkxZxIHIwib}eEF-eM5{xZo@${eeS_qvyiyID2sCqMk=5)rf
z0ydHFtN0FC-&A~U-U+$L3pjs%B%BHrFzZAb;yXg&HaD6Uj2tJJZS@_Uul;c{$AI*F
z$_2KUlmyRS-$&SjLi9L4!MGz?NSuEe<%VMfBM%3nZ=MA~?kXM7Nurr)5!kAdN&2Pc
zG;W)?pguo|BvQIa`dJxwgNajpWj~^{ZqaM!7))5&j_&6nSkOLCpeorzb*ny*N8d1-
zn6E^?uja$3#)J1__LHRMZ5k)*h6c{^F1sB{yIU{8_-zJe<?n-@f)l#WT_!DCf#CkM
zR_On*5X36)ppB|Tc@OT=i~AD=M#4O^6!s344oP(Edq2`HTXUy^IlVBsi#@%yP&Bup
zb=;e}TRRioR&n%c*BiFE{wW(fl5>}_AS<U|NdK>n{Z9PN6xXTaQNSd8?dW4d3qokp
z1YfK^at57)jToZ!18%k>aX)MhTp|ikw8)><Xm&tTn)}hOIU(_*Di+;$#*RrMG&xs|
z`?F0kC$ffpf6zrWquxVM^9+ur4j5u(4a2b0Ot}0Kh918R&FB3nvgP$de=0UK+(5L5
z9$Zbtfi5j{2S_tBt7>c%F{71Z<8dW3gdDfpLf$n7aXLRC)186-kAZAtg*1k^KIZIu
z782q{(vg`b0eMaC+f_qKofWLY3-Dq7den})h`G;Bu@2i8Y|=l@0Tv&j;*4wLwDk!S
zN*@B>(w+DqsfoWWH(7oDWvE%*U=Kw$;ss}_H*r?P`b-b|x2_4pcg4WR<OnIZn9`kh
zuDEl+6@C@67_#LP+Y#Z5_5eL-25!fNU%OcGCRh5p?Izqlp2n-LNk}TY%UYp^j+QxC
zx!D*WE^I}G`Exec)Cum~d$C1)9^$u8!Pxjl_G7;%>o^rg29msYoveeQ4sx_WiNTG}
z(R~(GpunpX-VPe9Xm1QYo~Y&i!(Pb71Y*JAl`tt&LU!p-R^D?C>%7Atu|gP|)`sIr
z_!px+6JpqZ7E>`+VIEEG981S)0$IW8HkKa#j5!92py^&Hi|zJ7clmW@__Uf8C33Ij
z*TeAeImTKN9Pz1SC)!46Q|r5RSQ-17Z8TTM2TO6-=@zhJwJ0cW8(=Z9YoNEL)ktO8
z1!PZl;X9;uL4T<zBu;K*O`MDJ$^OhDmX$G|*>;fd;(HRN%|hkw)o|D;3|YIYY&jfR
zQNw^yUzs%dER9Fkf}gDBP8N5M7qXX?b_hS{hy7MDI2|O!GJ{6rmF`;XY$${F@`bqM
z#`pI8t!6fLADOQD3?!vahcRU^xfS8K?kUIgk`A-ac1i5i;|v_*?#98$Sxj}+4rYq`
zOt$+5^FEQn<oRBu%*7FGdvZRzDZUw*rAg>Zy3E`=@3Y`V<FUC$lZ0Z9W4Nm`cK;}1
zQ`e-kOQw67*;4-dI=-=!7qYN9sLJS7Tpc{i-mr}U%G{0jjp;hhX5NA^thlw0C6qm8
z#WSVgu<I|Is3i>Iv*V@nO4x~)^-ND%gR#foj4tdbfeYL)*>?`+8m~vs=9%o;7EgTW
z4u;+DIdFIEGWtB%9@lit(Y#v|MK)?UP-=i_7yH?=x6fF>joVDDZxp1@zhHCrWubC;
zozb+R+t^g^A-K|X6$55tDZwOyU1`Z@7gGA!uiee8dq_5`{~HDCgj;O6f-K5@reaS|
z0-iogV-ov#2K<{IM7Vp-RI35?-2dzJaXOX_sbS{xlG&cSS_m!Y-ngl}k6<Rs44bTZ
zenAhF;#!<-e85UQ=dnX@Ww#3cvIpn#*ob?JuyEd0_HO24Xe?WVmZ=|@m0SVS4_nJh
z%7(D9xo<I5We#llUK*#iOWXky#11{ML#ERPh$=a87EFj8$?jt<>A@(mwZ}g%hWzjy
z(BbpAu8+CQv-vMOetaqJR!o6tiUgTVvS&NAM}nW?*f?w&iHRH{SIwb(Z;UY<YMh`v
zS{DsP(kLAGhlT5HVNn}}@3T1P?kG=<FQxGBZ78HZCqt|*mtxOqP*?2{h#e_KU7-XX
zU4O&QzMqL%{#Ly93B=6<>siUHy`=5;2xUCCIz`eEm#W*C%aIOd{&ER^DM`a%%qc|F
zj>L5F|L0{yspB%=8zLtU3KYlgxl38+g($kNRR;5vR`dm_(1B)QJgDe_&5@fJvG)qX
z*#TBRX$!7jNx;8$A=-5~0SZ-jad^vY-hWj`rQ9-DX7LPcd^cLYIa9hrFpFtk2Nkzp
z_$L=i2Uj(bao-ZG5B9<REKjWQ3qvqxRlk_lQ_?3Hgk3bGoy!l9m!J==+EGX~yMw9!
zx{zjYlUx+rNviNYzGS&m)yv}$GueXNNn$jbcpgsk5hUL_AXY;lD45fR_;f4UJbD{)
zxYzLOqIg)yc0qCSd$@GSlEFhMd^|ay-j^?<Yu}b&-i{IE-2aHZ%)ZVXRUXhpzcI96
z|2ce{_nLOU2}YjHaJqR?oMQPbctBnqXRJavt1^=2$jeb|RT1u6c5vrS7M84?h}1|G
zdZf0M!mirlD8G}eyy3xO{Ue$9H52?)H>BiOi)q{MWOfA#a1|m%$LV6kk{YJ=@dxM6
zobh*y6ZRWPl5|=F21?tQ?Kv&@&O3vVNu%lfjw<NLuY!_+3$*r&A*Q>Y30n9r$tT`8
zddChE7Y}8NUPM#rTYjz`xqw}H6|767nR6Yh*{!TYC``4$dDBe%IeLp(oT))+FRyV{
z3E}L6XY8z9I2(}_%a&b!kN39*G4RL;syu&MUf;_~1Ng2*{Y2D!jK<ZpajX!7Y_{-8
z?EapCv-x7QdCnkOsu}L^UYSv-6lNZ-MNggt&WRpF=boQNGtYfxPv`R8nJvHI`fMd#
z^xndXRUWc+YT~GR`in^)3}#N-_;;xyi|wiy0pt23cr2NQsT0n!6<@7z^2mO8|M4f&
zuN9c`Qvru~epx81j~TyBXU{c*ab(VL?gRG0?yGui=6F$>pWut+8ty86Dgvh^Wh~s~
zBJ-+jVSVF7(CMrX&){OVVkqAS)Rn}2wUT(M%za1-H<<E2QFfBwGm3ewpYN`Rz`P7B
zwWwpZgTXl5<cil4&R7ySh54S>fsVg7PN;sxWFtGgT;_&{E-#o##S3OBX^Tf+vvCI@
zn4hRbk59NV!Rd!A=)X2b7Tc)WLyvYWu!iW+QJCDWg;|T|@Ndctj7`)ghiyY)Cvh5%
zTlFZWyBaV47~`f{G;FSO&#(Fnia6{;U)SZMZ{i?!j;Lg7)|N1>E}o^R$bhfWHM~CR
z$Cg{zQN2Y3j$KQE(0oIjIv@wJ)<f(*zh|B={L0!_ji9zoU)h|QmoU!pGFpD>VtW%q
zQh7G}rlrk{u1M2R?s1sA{x#ezmXO%~X-Kl*`$sqCLU^Mg6-v~x-6A6}Le3tV34amD
zwxOr}6uxcx%`WXU!BM7ykSA$aw7nj5>^|*#u@F<wR#VhA5fZihMniXH2@YJj1RWuH
zs(CVz^tbDfpXm$ykFGb5=IV{!zhxF85g95fnlw23KKFfONHoxZRGKJC(jbXSMWoD;
zc`ieSN>q5C`=mk{(m<N&BTbq#4^kSQ^L?K6to8f;@niY#%{uF>W1oHReO<5X!6Q9(
z4HcP>p9_%Fo=3=H2+NL0O9G{>O2~20d8EHf8E@%*gbZ8KQ1an6(IYS#y&5aB6SwLi
zsjN~IEy+VK5~5&V`9CzVx)lA1S3-Sx<tRAvE|YZ74z?C%!_KGXh|@7gn@2aI?JG6W
zlwZp1xnWySeW*OzSL=^tE^LG~6DnBCaYab7=@v>CvQcipcDQ$YI(v8EA_{sQhaz;-
zU`yIg6jw8j{ns#_-6k#a#(us)ja)c7bB0Ax;<uKuy0}A5fPJ?+&=fBdI5b5DE%|&N
z1`_Wg!NHu(6npNk>y=THqXLrmABEbdl!MW52b6r~J=FixL{R+$*&dK%_wDh6^B?a(
zu9nEz>C9t~Z>>XHCL`qetQB;tRgtyg4*2S}A0>pWL}EfXidr&6^qZ|>BXy+E^RaW#
z_}6b>aM%v;x}pRBtS-UpfA`?@I}cPmRuhr2GtgME=YD_fA3Twkz=6|&Fvi~;4vHb#
ze^X1)*h_h0rsN6qN7RAQTMg*y>_}9%XA_cy+dwI&iw22$nCStBQF>V`+U;=zeLMRB
z#;i0%)^{(%<<dOV)M5-#2g~49He#&Yq(s)u2)KM}5c=vo3%ynQ#&~Y~%uEb3gu){p
z@b%PaNF7-TJ#!;q_a;wJ7+VhiDsIDvs0mQ_LKBwRZG(eNv(WRQ`Ecs2J}e)IgiAvd
zA<Vj$+3isZrKe+|RLcW=(-oQ9D#0i-ayBF+r$DTW64>EAjLME8<|VTm+`2}?_^+|h
z8d1(X9Il9N7iYjj%?V)oM;mtSiDg>f&SHEl)X|TGi*VrAXz<w}`ri(nW@<-zLFe>R
zh<?2pF8MEG#{Z`SvLnU$S8_Xy74uUP{~m(BSDfI5oGhH2H3NoPv_jFgDdKF(2t|JV
zVAgZu0^_)6I&?;sL&x<&XzAx_rbMQbiEfvM6W0_VQkrLm|4kG13+j-!&<UnHYoS#q
z3c%4Pf%)F225K=8&}F<t+(Qk3<{<}IxJnS2IHs_2+bFo6lLy_hpP9(<PN1&FF^7f@
zz)-&tAh&J5nBh#|+ARyxeo3cr{!f%loO+fzUbS{QOw0#BcbTB0qTOw1@lz-1Xs
zO{f*4()}3zt+@;yBj&@rpy6nFT!Q!?J{uPQj)J{Z?5SU$W=3t6hD(QLL8<<4(bsVb
zR-Hcw+i4urQQgP<lh%WXNQ5eX=|I8i9pbEJFmv!iJhN}bS>VjB!ClQbG+vU*lrENm
z`27x$JyjNDj2|*f@2mog*$0_ljl;ku^8t7Sh%>F(l$rih3BqkXVU@Z8db*(s?D8$(
zzj0IG@f$O6-qg<AKR69ctZy>sCrpG7u}Mse$yBt$4?q$g3)jTCy3MY)%$8;PplG8D
zrs6E(=%$HaCX>bF)<l4ylL*(WpEIuIV*N9WVf3DyWRh8P<Yg5K8B`7?gdBqh$#0lq
z2TMrnj)U!@#>MY2$BY?s2zaSFP;EO0M~$^1dRH=R|3{hcE^Um#Nlh4L^&VnO6d>O2
zC44<NgIOM3%?uhj1@Omlh}aR7?wZV)dEI7Ch5chT=t{xSH<66jsA_1b>tJRKod{#(
zO5lY~8F=o=Vz#%g1UnaZAp5<MV`T%VH2cB!1<FvnsGr$;sFHavW=|MTR{;gZ^{{KP
zJ~Q;K28uR33~z5QgP2xr*sN2))X&IcqRq6ycG+ujmlq6f^>3JeECW@ZN8tYOsnC1j
zC38rwf>{@-z}(85h$bf|!J@a$aO!3qT-n>p{F6z75$%3ps}m0v7d4q1KFYAnG!t0y
znspR>VOh>*c-!30M027a)scw0pCur5_ZRa|vkFeO>|s{uL^0aY2H>Lm4;pMdk!G{N
z3=u}a6UWJ5lcWI~AEz=?#eH7p>@$p2qa0*DRX~pOTH(^YQf894o9HW!fz`*xp}>m2
z;J;)O1m2kqn|@qndc$`!zrqmY@gpJHG6xQ)j$piZ=p!!&J;*3^gR;0gj5c?dnU;Kt
zS^segjGTXgL8|kB(dcDTK8aarmx^Hc24&b+ah~~c&y9)i7{SDi8;07vd|=qF#c*qR
zHKa!6Fm|u=;ZeB{xGS%NLTAL-i#me5_esDnkwFq}G?c1^Lg>SK=AKyNE=}uY%xvQz
zVESn!J?J!>!>@!byAx1kkQ7_+E|hgh+>fU@=%CwP8<63Lq3n^i9<;nbWNzrkvi95n
zI<r`bJzJT@);cw^nWY8jsB0$Tf9kP`Xc<<Osb#|#2xz0PHp@r^w1lxj)-5C0Q>|+3
zb(5cHqNqpHQ4PdrI^<Z%PBqp?KNz`h5m_aPW~jGz0R2lZKriArcHY9*XsEq4yGCU<
zD|Jnly?S~Zie0RU^3yt@XY&wz{wra(7e}(w`y<!^4{c<pw3$7z-h*|k??AnEb<j;d
zAsx>w)-CZn(%OB19a9>|E?cL}>P9!C`TF%}+uPSjr)DPnis**TkJXt+QJIX^3o#qz
z#trCW5|I0ePUg^yPfVY=4)_&|J3}*3dzfDd5IYz)xf;O+^LUUMI0aKSwlZm~3VeST
z0QPs>(1^-l$h%h$dl(&9xmOub*H6a5Z8_*w8N!&nNC+Rgh6!74iDahDhKEw8;Gg*r
zcu8epLd|bx)ZLXZB3u!C-?@Rx-y&w@W0Cp7Hi6uvsW8;V0wy$8F!%C5m}&XEgZ1Bz
zf^N_ZXw7~NogxRYIMokcx-@_gB))%qteB$skzl-_4su`1qFK{TL9$fLsB|9$@Biyz
z+{a9UqRan*)L#cAvp|Pk=I#xNS`&e~Jp`wl8te|s8dQ=c`qy8D0Y5twq(m+a-kAbD
zhC1xW#YaGTgch2dunT4OYof&>BWT6-CO9fGE~dC;BJFoa(41W-(O+?Iy0BscR5Tm}
z3-xfsTn~e?^QCa#a2@Qb$Ku~fMyaFo(QlP>ND>)zOq@M9zh8?!drw4jKF6bdi;4K|
zG(ejVJE7e0y|8J_0a$Ho2c{dYGM8(ePzW?2pTY-V;vvgwSdB;CO>e=>aSGZLb{MUA
zrUIG9(O^Ac9Xjr0j5d|aqIwM{^wZ)T)RgRpjj44IGjSp+EV=<cWll(M?OtYxv6%Bx
z{};ab9z_8~9f;J6`ga2sR(nQ)_2dK4lPS)Na|puSA``)-1ze`uqD^(f(3YMV@W7`R
zg0BumQz9OtvE|-qOq#flHkXKOX0h+wxC-8#{sw*%j>GCHQ<1jQSLQ}~D>4k!L26^2
z(B9sBFy0vho1*qYuT&v8pAj=DP$v`$+eE)~A1LoIM{gvt;N&L2X3sS+dX?zYYrlb<
z%+1igm|__1w-3z`Gd^r9$D)WOD(J=52}oh_C8jL%EZnRcg@QWL(8lqv;bQ-N*t8=T
z#<*^S*YlOo_vo=m+h8X6N@k%92xmfinwjtKO(8711ie9`W+W^eynGG;J>es=ZMH*l
z{|LCR)e1S^OrU9Q3V41GL%|>Zz{7%XjFbF9*jfA<Ml5hca@W_Onm8%6LdhTOtEa(D
zmzPXGe*p@VOChD<C<uEJ8NJq26rPa;Wo_5s=SpV?NL(a()-#wvxn1yXt1kTfA&rRH
zP`G!a2c-MY!PymIusD*4oQr%$n+;;f)QM>2CtZ;VT>~Av8I-hZ6QtOEh6|^jLQ2yT
zG{nAvvG{2TG~^VF(U^t|8w;VoO%Anwn+Ux%?(lN&4A4<uh;&YxfxFXK)UF=JT%Y0#
z?H|N0;wVCnW+MNvvX5yNc0sz`9^mJ?!}Y+?a8(TSv%q4eU(9jv*!K<G9vUJ;Wi==t
zdJ-Jn+QEF+OY~@;K3aJ(0EXPlfz#L)Topu)<dq}PpuPtNK71FS-w0+&eSp~aJ_oz<
zJowq}1<~olA$O>_Kd~@_#wv4>+Z+dL5~jkMjthY2SfU#5U63){Q_O7`4@&*1%)22c
zk$+1W82E_UycOCg?As_X9W(?bPX7u%>75XJ;wbZ6I|UXwcEX$=c4%r!4g`+s0}t_?
zH~h>rh}bA<qz6R*it!Y5c*iRCm|YDsUQ=ZDwY@@POiEB%*=nrYSq`_oh9Oy*J4mA{
zQRGy}BjuylkhFaSii<md8g~S<R}ZJNrweAG<GUN+Pt|keFlr25ni|Z?wgw@CX^!ZG
zbeY(9eg$+_ft_{k7Gir&pio6|4n7&-CSwONjpilN{4S4*R_KUXhHs(LCIv;LIH7S-
zcm9W`jM~~Bq6>vetV#Vv#CUuMpB-hui2OyjIT>uCgDU%1V+eb$`!^aLEoPc0_aGf(
z6*i`L3c}u#;r6L=G?_1Di`S*2MMf*x*I@zd^rj*d{9q$`+F*txFY3^^;On9eKn?|T
zL^Ac$rNH}#sLv?2M|bb&uw<eQY@2Wlyqxpl7JCh1Cy727rVZ4p27yyt3S{S+AxjG*
zbT>tub3|r?XUbf($+HqQrxDcQFdD5+*(~z7Dnaj@$kSOm1Fd;>6Q0fx^{b}J@LKyY
zvYDR&79%#HgpxGa`|t$3zy~1Fz7#^%=RxmRksoBg1bmlUp$mbgD0|~NXkK*#XvY_(
zKDCTl^&}D<D_x3aowbL1`y)lR+-?}v_ZeDtEkKJND5K;n3Xq^s1BbR5p~BS5Vm&?%
zDm4=5|G608bTFJ4xC{CoCFp+JOXls=vmjS7f$=<2!uWC~@NLL_xTrG{wP{^qtdkxv
z1&?ImV(WLNt4Z9U$ybAJ(+6g3?+mahzW^7CkHE_ovDTyy6ZMM@@T!|bDUSzW?uJ0f
z^0$ZAs^aWtP!Thz!WX<*eHgLU4MtlyGd5S2p`68bAoax)(oOYX>h&6??oT}9^mP`P
z^=pHcrsyBSS&aP8gRuEgGpt!J2evITkOBvo726e=!6#KvZ$%bV5q(JARR@DD1V-(B
zH0-cRhVgs=*bSS_s8fJ{tTQb5bPRTH4Th!VSD-On)a@TS$2>nG&NY%ZLwsfjBd2-=
zQZ8yUE-C|zV517nVomdp_D1}cNX8_mjajr#8|E$QVf2p|F;ZbgaCZ1x=FK`2aehz^
zbWA54dY{IiwHT)TwTDHTheYph3v}0Og5Pyh&@I2sXc)yZQ(Q-aVyp~&PZ<Z35>=ST
zPfbwzKWFHOm=1FFDzH+^nh~zWGsc!;-jK)_iU^tmi7hJ7_;(Rh!$~lAkOj_JU}BdS
zFaxvJGGWQi$i6WK7O3xp(7GHrHsKJHZ=V9Sr+i^b*JcR%tj`o~)P>@qDPXT1266sw
z@LtqBYYbI^{S)pnJ$V|ivTGq|r95G>PKQFe<~_6EUv<o}X(OSltq->C74y5cjAQn-
z>|#O|YJlm7i_AC1lnF1I1y#ODjI*hjEuy*|)>Lf;-Y=P%r0EYYQbt0(+$Qu)^ilp;
z-NoFy)XSJGZDMjxtY&sCG6$)x&CE(KJ<-!SiV1hsK<k&R7nvxtVJ=_Iq^o5z_d1@5
zZ0|Paqxuu(k%2zUR1y2?Au^&5*cBT0E5kk0N=DAvhdKLKn-Q_Qs9M|+Jrlk6Z}*0Q
zRl-|liNp#d)EZ*9aB#uyt=Z{Yoamj8gZS<(@NoNFusJswTE<i|gL{gYb&uwQ&)9LW
zv(8e?zcWCAA?eJ0+|6A4?GMr-Gvu*aEV?24f~ga=KOdiXgX~=;pbj^gJEcd(e7zYE
zAv*%PW@W;NCvD(6rk-gvKLB0U5s)V4HaG3AgG!4d@P6G|kS`hr336Q`?`JERY?gx;
z(aBKm?7}okWArxs0DMXf7yYavyX0~^b5}+i+!e%KTiZ`2@?Zo^QWbfOr(K~gTuv0-
zE(T50PA1V-4s;u>n4Rmw(87t;uywMSMXmJ|V%*O$wW3BeCtb|RDs2b<u|t_#;w&vC
zp&drKJ%K?Fd||ex$e(i?2UXM%9#7Q)_Hj1k<W@81t;1l+Uj=6E;rYy`g$f{N-wf&}
z2ctbt+?cGQ9A@vfN6czR(NDHMotbdN3Lf1~VsbjG85Pm@D))RY{0!f~c#j?ek`PT$
zemDj#Ixe!9PP}6@LO(K@^9q<zk2f*Kss9-7izQ6kAYE9Zqrrp>{t9&)r9m@8AG#l%
zW=!X-W|A%(VyfidF^;tdnQJ{pFv2T`k$lpDfZy{$|J!AzD?W?awZ($zQ2StJr}kUS
zDOwE1^A+Ij{_Rkavxk|p4Z~LBdC>BBFDP_Wm>ui>#vEVl3VMZ`q1R0wGSx*76Uk)~
zwGtTTE$^6683b7~y}<9b63SYAozXbd&B*&tgH)RnNV_%~o%Wdsj-p=dOPd#L(cBL?
z&qu<Fqi<lbjSfhy-U4%#kD|RcVur`(BIcjkS&&RBf#DaT(7-5fRNzzuzk{-2<v2MQ
zCISTee(Z!PgEL`jv#2*(m&z<`!e}LU!E4u>z?r+lM=wRlD{o|G&K33T-^Ykr(Tie#
zZvpfF%ZL3{zrc06IdnB^LK_H7@l`t}KhYkkIp2khm(?)3XdKGrM?l?jRb+3J00AEI
zh@0cW1TJTxQPg(zJ{7$fo@Zf5+I0xOrUfMuPmncXVUU<DI_Zg+V<UQ#e=T@v_VvV9
zChzY7nAXt?)31+3!Sf=R9a6Q-^!DdW-`^+9*rr>|o)g;Wyy#Ies|bQXwHJU6?}bb8
zHyL5+JLY*+E=;_+6O9C2<k@KfO_uk-RsEHy^*Sf+M>AkXxEd71hQaDIJ!ayx(P-SK
znUFX)4Xm^W!^aX6D5R=je$x{sPg4c?QzwB5{lqAU*>G7sAz)wnhDkZ41V%;&n83*l
zvo=r-eI{?<slqf^ea;NMeDFlvH7*0Y+uo4W=`YsP1I#ZY16a{}5}IF~0M;xM=&5^P
zz{o(>^@mKD>pGAX8540W@+g-aLGC4#IX~P6YW0>Q=Q)qjy&6Y0Ao(lI+js$DCmSL5
z$q-}`DT79zT!eCFFNOB0qtGOq&&cE1ezey=26j58Kv=U6YPvj}{VjVEx!v<at?w^^
z;aOv}x2pr(3(V0z$!rwfXpAyG`@`Vje(aFlV%GMkI^>ty3Df6&f`)n?^7TcQ&Y=fT
z@o6Lq(W-@8%W~1iUN<x<wGcjgPez|h7>G7l4${U|Xx6wq^w;DX6dT<_f|$3HBhF@5
zPd$dxE}udZ0xiL_NFI5ftV4HhWr)mbB~+6Ti-ygTMaAkxP^oE(<X+uG_wFgf_BvU#
z@8fr~?v~e#)kPhU82y9oOJ*X@H%QDGYG$ysI@F38=u@vBXWmzx1l5QxrmblnSVxHc
z_KPYQwl1AH{>l>mtk?tgJ-d<H(bsUS-55qZG>6KoqW9ZY9<1WSV4zwb!ks+eeW)t4
zE5s4~$(t+gUcBMW<RP%E%bU^qoxsd$9R`L$bxi7sP4Lh88S~(o3*7iq2D{`{AnSDr
z;~jm5`O7V3`d3Xx>(wG*PNj#aFYku7mR!bU=N_2(x(G&p*bP3u28@Qkw8#fZgg(*t
zt#&6C+Rhcg&V8?#?(-_JV9z*FQ|KaIyOMa;&_Lt&rZWvk4B<1<20>VXwyivjXp<4#
zbC>}hf)fnd<pV2+Pl7?s=OMp97v?n120wlAOxY>=U*%sgU$3r)jt6(Zx3wPiefbRC
z^fZ(uE`W_AwP4WqUgpn{vyhOI1fK&&qqT2X#`l#fI(|F@uFUU(wgMM;v9yaBWsuLT
zSMq~y?la?Yy#o5goW4PZ*|2u33K}K$G9~KInRm~J!1FnT*~A_~dFkWO$gE2EqtYwR
zO8+uuilfk%xqZ+asgGW~3}DoR&G0iS9Y9g!5^r&b7+JAa*s}0#=?d5p%YnY0$UaG|
z1XG<U=y8)1b7tKZ@Mtzf+5s`htnerL@l6E|M0-N->v9MjavpAr^<?$}JLD#2U|w4P
z7)YzgFgl=$`ad0ni3<g2>wXGP4W6SfZ>A$Ps{*jM(?Z&dPs2Hntq>XX4Kl5t!Q3DH
zaPD^<W7T~f{U`3N&paG}Y^nc5Rw@Cv<p<&7vWMWegoE$wM6~MSY%$v{6g^(5h>l5%
z=fw9NaMnc&2A`^CcD#NddaVpmZ0<hT-z}asW@$i^QUipvij0$`1u$2xhw&2YQBdzP
z^gDk$>Sf2FJ?lzf_nAyF8|x4_zI+RZGqaJ&J}snI7>kA<Gk`?(DEN&uP`1(&q>;Fd
zeI)MYE$2tT-K~#6XLk%p62+X??vqF^YCK%rD}~VRQRu7jWOQGg(Vm;5jt=B%A?t&`
zP}}%R=vDtb#PE+qCdFkKv;>2U^BN>q?hljv<KbtJ8*IHkk^SMd9cgUXf>39U__;2K
zF}n(`*QAk^n0M_SJ{&#FTmyQcJ>U~&iqyn9wszPd7^Y+cySM#gPFc#Z>ytu}^`s&Q
zNvJ{bxt}2Lh!j#7q=z2woQZ4``kB)wRM4WM2awU`(I_xQ9t{o9L@_1V(AvEg!q=;#
zbt^_AwNryp+eCS^GUNnPnl}kTcG;l-xQTRh{-RcOQ<(4(iP<B)pnB*d>`Uu_1y2%D
zzyvYt@v{}Ob)A4RR85fisXO4Zu^kdB|G}P*Nyx3L6lqV&2DQIdNYZo`P|qeXly*e*
zkHs86!Xn)PWpFVc#~RnjpyZO3NaAxCI;TWHh9nuzE%^wq&Gpd5>sn}Mf*%Bjr=Vx+
zq)?^u4mhz|5gk?d$Q%ea5wkTOp*uNCQBkD=YCm%wja_g8;;v6XCpXVQ26u;{T1Rt8
zeDM)Xb7rEUA|LYK^czssR|0x-Tj6p2N+`>8LwQj)X!ZjwxDe9?9-#@$-Ay_WE^>Az
zDGfu8qCa{4#M_LM$P4(v>A>}-v0%TunYoF3K<SSuYzpy$1*>Gx%y7};akHJN*QteJ
za@!%HAP2Q3XrX=k^x)z09JqC22qb!|foI)rxFW9!`NOI~a%Kl}S#cvWCCeeQuuSCC
zBe2Z)k4f|EVUz|~SX89|ucjV?D_;u4T)`ah+CLM0uLF@mE=I=4yk=G{U(JL~4Mp+A
zd%>y73oKp?K_7aLGM^u;f;8O>*l}9S-FTwK+?Cw{qgJHC`q?7;MbvZt9_$3I`({C(
z<#=c`iG%Up`$Z3<9IRV$8FKo6n*DR@Vr-(cU|QI7@Vzq=`KRSFueBPPe};X`g+_U}
z;giRVGpz$ZF%PCSL-e-3KOy#e`LNBSh%s##1YIH*fBCz~;v6^!2Fsg6&^&!uzr2en
z_j|=WP;rBMaaNG<Wge(M(qU#uSoGa^7Yw%92i?_2#2s#^=q*ZPvK~$Vy8JV<t70)2
ztrh1?i~J!R9Ra5mx==dd2NPds3}0qjFe7X<(d3aOFn3TK5TCQ~aC$IvOi}>pTmo#k
z?F^IOOkjeiiQLLTX`pXripsnjLC>=c!dFj#wVnz<C92@{Z5yPdUXU9k^*>)5b?2we
zpBa&|ab!Ts;*>aTKfi5L|L14Dm+ua*sh&O_JEq#K_t=`UaoG4YssHa+q_!kpkz8GT
zRN&t&;v(gqaie1+gsC;MoFqHeJW{iSq?x=YSt;9vKh7VyWjm&FL*n(!6*FUmKVz*q
zediOxkA(*2@6YB5SB@X!D(j`p$DW^W{^pkLc{i;HLBB@Le4>RTml-@lxaL&O>a0j2
zjt@@K>7{vO_{1ckd#!?bS70vp<U^7WG&6-`_nan2BaU&pDQaZgra|Y+9CJD2^cmcx
zyM2OR>u6za7$Mi*9>xdOXb2(%!u*_(!uiXodgcbjL%DAM+qB1Ym$~hD9id<QJr|*~
znvGw5mChPf&TWYMOFwNc<s`4w&WBx4JRhESiyQiF5>+YdCn@4hn|bK}zhxu-FSl&X
z^#66!oO1rM@-M#N@2eHr=ffz=6imhQat3g~v_0f~QzB~;_>^4}I}R^DuY`4)FR+$z
z7xCy?SuEW;5tF<QJOysyU+c@*bSoX)V6qPj7vjn3g+H-z^$a{R(-J3Klf%n>i`hSK
zoN;v4cpN{~6+7ilWXG-zBwCgW@IkE|xTALvHX9z!uIWl+zxNO<_otivt+4|u8!2I?
zJq%xJJ%isa9ECl8-e4;-a#?<qF`N3`lH4sw#g|rv;jG5n_~4B!Hlr>H=Y(b9NwNp=
z?g3pkW!o6M^;Zkt=2?iz_b}YJIRnqhQp7=XzOmU)MqsOiQ2f7dn%@7qX=|oW-R$M>
z>$T<oc-ChAzmKQXld`cSXa`GY4J{?Pa}7v#O$!cgtm4nED3Z9Z{=}SanM2e@XHXs4
zI67@rBA&n7jx5dBp~rsIVU6z_agwSz9ab`%-aNJj?fmZ&o^HwV&F7@ZIiU>KXxWmg
zHFN3t53~5PrY5{;nI0YC630%uRff0he1R^_tEHK1EOAZl397f~kECJyOwu&uD(3&C
zk%&7{wCTeVGJdJN@YUchJAVC9@^CZ5bxxU07Di|AcHV|0VUrBzG=>WX?~AEjMH^Y@
zGn0Sv?I^LJbGbED74%HeOX4!rQ*yAdA8TYB#$y|Y2p1C#h>>>&&g`B`N{ZLey}o+%
z{=p>rRCyn*`+gVu+c=T2DosM5<3W7OI)bdaEJH^IXplqAnS9O6Cj54D0U!BpfF%05
zlh?y;RO^iNAvRlXkR~pP!#6L}U$eIg(bCrZnyw>UWkfYiFVqnl_!{Ba(`u|T5JW$I
zAIBwrlon3?eMcUh^btH9b?H9&HgdLIj?gQzICDmvU=_Md@;mh@SCBiN7AKz-#@VzJ
z=&KN-_Wa^+E<PoEbts{eN{>^u!EgC1Q-*VTdyDuD#YJ>#L<{fG=*!pKRiU{?>D<KY
zTj*Qw>B5g|I&}QiC^}F7f>4*JC~T9}rSjD!cvc`sweQ`il(G^q&5FY|DQY}xu0mxi
zRJn$rEhN%m4VC)5o#^|Hpz4b@<BNk@t8J(1-~m@1`eV8+Z8cY=58id*@ICg}V@)>=
z<|48A0U!KGu?y!_mD6tz4q*P~6Umrsuf!UmNO+}oyvF1tKfB6=2-4B`<O?76L1La{
zO~PB&GV%p}d^oQ<I_(JAeRCpNT5^SVQP#r+36nUx$SS`4KL_HrubSAXoFQ=+-{Zbb
z9oY6{5~^?<&ud&NCH|MM;ErTRe&{DV>ipvuF0AilZ$6wuU#3(Ni=qSUy0T2%G<_*i
znJ|%fs}~c!z{`@8f4`Df%`rq?4-4Z?RaC`y=n1!%_s}5on}VvcHXV`f!Zr8Ip-K0m
zIP;ltv`2di$(R0$XIO=E$s6}@AM+LwvrUOO*QK9ydvlzUoG-4vxtEvUnn~h(&XI8g
zM+j3hNqF0NPq2PIm}~BJ;CI`NBfb`j^vBgMw%Mg#*s38z3cjS!p)D7P-5h0nz&Dky
zSJD!`4&BHVG};UCW1IxrqdH{!&u)7B!%ymD#0ld@N@2Niqq$1~JIJwPp%e!=3Dqmj
z_!C~Ev8!J!S@g1njEIopdT(Cg-A`!=qaT#hHcwCNJ~&0_KDkc#{>Fwc9b3k3D=Ful
zU-=W$PlaUR^vn2Ll|El!*hSjR!l>R%CxQEtOaFu{z~{bn(#JRK=zj{WSZ9e0-EDo0
z#LkK)7Vo9Fyy=VRw%XNn_#$a;+rd<v8LQ0iUg1R@OkMHJCCPO4tUOY3+?czu{3E^^
zH9@F;dxNg{DdE4lpQhvEUXvN4s%idw6}~!qmatN82=$)FV(BkKxSbZEWO-(|B-vR<
zP@E>uZ+Nc6ZwWEyzNxIi`RA*s(dO0E!nqZvpE)3;XUht6R&JsHUOSOvdGg$X^hBDv
zw4l0kdnRk$Zc2}6rQqOdJHB_SAvdnDi<rL3raP9c<8$lI<Ig|qh)vTEaw@?Iw>9Wg
zxyV12aF^Q2!KuT^u0l87;=o+qaHoPW{P$Ek<U%5L_s_)BwWf39c`34fkUo2DMkjCE
zd4;?ivyKSnp?ugA1G+fhl{8{gqOp1i|J3LgZQY(A;){K$aho2uZ}$qapx=l*sxahS
z>+aIwxi|T!B{33jLo0eVW)^)nVj7t?{4F_HI+*w-P38lGq=mj4wPc~8BMsP=&JX#a
zF9hu&w5vu=Fv;)anQg!L`GcR3gWm+YN#7D3vX91aXt0o^M7ZzEJL%ycv&e;zbc%1x
z=Ets86qNs@;roNS@LSJw)FF1UFy>e@shHr7Z}>0gZ)z3MqrRg^wpu)WKUSA;KN85B
zu3A2=tB`aCC1F*$diLnQ(|F<wWe$^elBlf78-4%6M-Fo#;Rn|e&&B}U`))C{Hp;_f
zkQB#jn&Kf17w8x*{pykxZzS(=CAndvgR|ERBXxZ-M9W%HkPNEjm7;!Nt@?-9=cOF~
z@=hu-t{6&#bYBw%_rJ6{a++Wk5G7Fzs+H(jZW2;CN^d6&CrjTRV_C;T)oWW>-l#p8
z-aB=cs(if1yjIU9jc)@aI-Bi@r@ubUoVJZBdKySpU4Mm(FAotk&01-+{58zHkflSE
z{i#f*4DYvJijPh;pr_ZjVK?p`@7s|{vg?nM=MKsE(c{(F-ogQUXlW3uu6fksYXWXs
zdzN1Qq(#GO#!_vkldOf=Ey?l&fmHFw0o=Q7Jz45sz&30;PnnTwWWlr*^u{4Kobxl5
zYM&V(>@`eMN?+gy?v_MlOCNuFrvsbGGg#I)maM$!O&yEp5^48&BqP<A4t;Q}Dk(Rd
z6hAx0o45bNhE|)YWmp5w9a4bbou7|6CV^K-DIi7elSsw+d~*58VSZo3e0uR>IVpS<
zPVN&EjH-u|{zLZsuQ`!8;P+0t=Fu0v=jURwK}(jN<3p++3o*Fk_Aasm9p$Z4ZG~wW
zSBTHW{rKv>8~D(PLwG)~gLj>KKx#LiV%sx4S&zXb{NSBUG|ke96wUk0cbB{1xv}L~
zfiI!<8w&B8-*FO|C0B7>uraw|_N;nSaS{%gbqb#-{lt6PZkNP19;1`kE9AegTgfG<
z{gRbRYl%~vNQEA2i<|t@P;sU=QGd0XCM(Y;Uh}1e!8^y3#m5Wi$xliAH>1^5wS5#@
z7IlZMNOPn2AIb35?;>fh<td6M45xnY_VIgS3`o<wxBRuN<8;9ve|}w|9Ce8==Wo_8
zCd0l<F!MOA+V$)<>Y6A^-D5YA6KB&ngKr7^i{oQC5Bn_cep3x4I=Vu=PK4lPtIp*r
zC6IuoRN=SvM&Za=A7bESN!NWj%Qa^C@*C80iLt#meROvn_n^;}6yA3r#t#a~VKI?d
zKS)K`r>DXNNVf>jJ4*=v<N)Uqq>qiWCJ4jMClRvfEIpC6P6+sQNN{{F!Q(=dXv0P~
z$#Bwyt;>A4bE-D{lm&g<sgP)4Na|2>=HF{>)xzi8yu?yYsVj-z9mwXcM5mMF=m8=2
zN&^k)n8L-)-$xz<YjfBBT9fKaYp7n3yfFVs4pG>pAjnF;C$9S+Q|GCB$&UCc>T$h}
z{<xDN7|pK6d8aqiF_X>cHL4?UQj@suW%)u)>;|g$IEH9S@pRPF|H#MD(xkgTkzC*H
zNJ>AtlG^04+{~B5@ZY{ZBB~Q`*3x9I`B^0Oa@i!ztk5L0UtHie>!wo6rQ^6w!}klT
z$}`EZ`?f;Cfwjcv!8l>r`4Vb-_zsbNm`&QXd~v@~3f)wgLCde}3TwK<1(uxVER!_&
z&`fpV(uB8o|LZxz9CC?9DR&AorHZ7g|2@557Rmpbbc~*@+JwKXJxBr=B{sL`IC*v9
z8;!bDDJf@r_@oU)c=T(eV6SUQPd>8}?s=zE*ikG@6BhCgCyIqzPmgd97c$g!YXouV
ztsskxSMwhuTWGFd0R6KSb5iX`NTaP671GOToyL7Up<@A^<61!*vv1I%o-XS4;w5jg
zUV&@Wy+~E`yvSvXSYkR{om`O`O}m#2=6)GF3fU9x@I&itxH&%}$n|@-Xsk{zzSMo4
zO1UNSwnv5uS+R2Dn&ndzL`JZ+(WA)H$`m^8(O=13;S#4<@Pp{wBGhkIDd*{Qi2HfH
zoGT+m<i{9)u5X(X3H#eE)cnb#OaD8<wKUq3oz5RfKaAq4J6K`&w1u=b-+&zPYoIbq
zzmiZ#7dpbdpP#Jwk0xnerS7wq3I>Z0k=jyq{{1_5a-82pn|~dnQj`2h%bWXnAhCkq
zlBdt!s|zCc5?Aw2AMx1bQZ2r$0i=-kmW0GevnKIHG<WS8+MdVo4;s>OtH*s5a7K?x
z<=n^pF~|8<$8VBFWF{SzphA8%yk)Jr-iZp^csh7YCAF?RNUG2G;<n>&BsYe6&}Tv+
z9@w45a}{m)>FPm(t5F`2t#9RP@_kU$9y22QypvdUp5>>!-A`9u5#^qtWALOC@<PKQ
zPd;^411S?!NuP5unLqptVLTTIH%eQG(z5`1!LYS@?Sp8(o&Q5tY<WSX_KB(ihiT-a
zhY8>FC5pxuR8V)v>?)(gJd!I$bv;-vE8IypA<ZK*Nm9~W+#lk?H`(T~UEhSNZ;{$$
zL%%LRa-%g_*n=hFaGVs$Eh3}TmvUwcXVAh?dcs-py1M%)VZ4UWiE*p>+}BHly3A3;
zQD%_v*0YXGSf(aS8i~o{>XAZ}y)5>wRH8qAuElC6!x3ArFKl;SOD}I<#eJM^NiY1}
zEd;%|M~d%H5^hZI<UiES5Gq~Yv2#wT(*4b5620j)r2dpDQQ6f}y=?tM{!x?y-SFZY
zUO#>iXJdPS9e&r3rr%Yi&2ET(3JewAczmTczkcBS8OzAA#bfyas}?d?-iZEjdqi^8
z{;>DfeCH_{B<wwHCb+Koh!=PM#v#wY@~=Ny(8C$~$#}&re8KJ2<c`!o2|0Y6jDEfo
zueD0Y?Q>mO6T?Kdb%+7~#=eE7sFaWvB#l$X-)98{eJX!Qom-??MkCAr(v*((R6}Zo
za6Yk&Olpat-{29|i&hsB$6v;i7C6y$4>r>>PK&+{_vIiql)e8mg8%E?PX1c1;B2B*
zxnukl-okSQ_iar)xto1ZVC$FA;w^_s=wfp+dh;OejO;mf&g8>_OXnDJcU(9B+iRqt
z-ueWk&RW2Y^?E>-uAVLs$8b8oM2#l+hw`~Hj{JguWATg1!QAtu6(o9B8a?!F8C{=v
ziNeSlk}uank5Db*x3w8J)cWwpjc-<E_v_KSZ{_G4sVMANctCPgJ&nwX*iU0-byKN_
zQ^<d=Ss0ZH)iR#XNPbQ_>9NJ6K24SGD3T|xiWa;pH=b6S%X50c_jtWi-*7{g9i4A;
z7{4DL%6AQmz^!`eB#y2iS2gESm-PooP^}?-99U0IC_R8#2ADX0d5pea@8?Z_?ZTH2
z4C9(CXOp$7ZdXaIVQE!J5iVbLg~V*NVc#$B!Vg0W=<R<)>4VMvym8W99Js3*ZyO~|
z)pc(1itg6j88;bvGqH@CZMh_vf6Czsa}3Ytp1(<Fcb?|tr^VArbM3f)N1xE+C##4<
zx+_^%<H^nFE8}h$bg`LF9XWRFBJTHcfxGg0DBf{<ir_W*3;82-bI*1D@MJ+Db<TPz
zd@}AM%ZE`Ke<GfG)GOoTt7Gv}S65-ntB2&huu*a&!koYC(m``AvgvlqM!|B)cym|h
z>B7#xBZTAsv2>;4U_tkIpb)&RUkHlTA}dqY(IwgCxTkH8@c!Ks+Jk1&K=ot1`fVj{
z;!H&$Df<ce`0FqAi}DsMuAdZ|<ByQ@%C$oED-)sj;8~$S&XDfkc2v;!P$mmDe&#=2
zsHwK7-%C;hL{!?0UaDBUk1GFeBqwTC)5?T%*m0T`cC_EmE}qxI;{XHtQfnuPDovFr
zj~+qHu1J%PMR6qX<V?0~|4m}9IE^hUeMn<97LcU<R)TtPK1t|mAd0)!5_dU)AAQDy
zo?H@1%^maT#EYepZ7SvLppUUcXHTu<#Llz0<-rJgp3%mOb3*wCYhtlO&{68H9Yt1u
z%d8seC(p&5Qy|xrn#dz7vG0qnBq?VC@MPgFR+>=4&!Ik&m2+!ob#Vn*D9NL98>aBZ
z%Qw;RFAK4D#St2tJX{!CxQcw!%IB@zRQSjZ7prB4zNZa-Ay{wx3Q{9^LKlpcua3HO
zgM>}b!-vDyp)H42lC;^=>B1Ln66tY4Vrzej=pK5{emeh^-<73J<COoh3rCd`nKkB=
z&UnFR@9-eioCa@^u7%eG=hLXIK7#8lo$C0#o#fMwN)jdOO5a;-AwI#%!Y<JyxU;a7
zkE(czkK88YR{Tr8on1k0j$rAp>)Y^|dzjt`ZXkiTT<GZAkMS6FH6b&84j=yLJGmZo
zoi9E%j7y&rMy!_qz^A&pc!AuLteR3z;;%@^%BX0Z+SSfK8}}do`H>CP7{Sw?LOpEz
z{SPUR96=H`T%+-y8_E2qN?enJ2Pv`N%_Vt;k+$fGT=T7sG<3h6aH_kQl=)=~;|=!E
z4LjcOo-Xt0PzNho)>ug2#rz`w-14aTyb7GT_b=ToZATPT6UZDHB`ULLzOXIGm%QPk
zsBX$#E-}egqI)-x%=ZqZs%f1hP&`NU?itB#$=xno&@|+33+B{zj4j?ZP)cm(G;`Mu
zxe11&4|0p|SP0Sq3REL4j|=`@!>Qd%=lt&}3QA)h^FR8A^6qDj3!i3OMl0SFk}~xo
zQZ(}_DJ(7^%ND!PrPsRHslG!v-%w{RC_0HG?tMif{`qtDp)$DR_cD5~)0l?0MTk9K
zE1B<XDohLfDr{Fd%{^4xCA@eiOFyokMIOUo;aR^PH^*cSb>8DjHcW}5_=GCA;Hx3m
zXUdUVXPU{1AJ55thbS_`q+C+>@gFrc3BxqzHuqZQ7PasV=d`Xm30e0;@yhb2bVv@*
z7Y)Bbp5$nAtk+1|x-gk2uP-JyFSygYzyHv6XG`gNd0Anp>^H6=;-H{V;UL_<SxdvE
zeFe9s<HFwdqe7{@J?(zJTu|Npho8M?iV#xiFUYhv(~!|lWO>#ouI_iJaMZGmU0d^l
z_K!0Y)bVVJ4-FxP<`={{qYioU?4hvt@f~U}olgfVw6MaWMZ)m~nuM;8BxB1Rg&$iC
zgt5}IdGx21@C%yBRKKUh>-GWOdC)k1_qutktnEnhEcpofVizN^Y@I^%%)_zsvs@bd
zS)1=^8A?7MF(X=QKGAi)0{Q*SNcelEhw8c<B|A?PlU=(v^BQx)>FulnTDI9!@N)iJ
zjaRIvR)vSj<@AMwTXYy3MQ76OyJP6I(?;adq$%9yX}SDXHjb3-l;vz6_7H%zMEc!1
zZq8s+vY;uSM!V>c5p(|#qriFe)rLL14QtFTHQyq+yv>m|3``+TuL5cR^CqnFC6)ep
zewD6YDa)DLIgpTAPhz|~lzd;XN|JTWm|a+sL7k#@<KTqJ)sH`1<v)pZ$F<A+ajC)=
za{7iMi8l$9xauCjk0$-VP9ZMDZC*CMTc1IVY~rz(S{7|Pc@!s1@WMY9MevU+f3mIa
zhImMT33b%YCiS)2l$VtdHfIIh-#b78(t`0p&I}uNT%dmyG0EN4kG*H6<7cyX65Z{Z
zWN>8%!&D!ldoOPz-ow*r(aeYZ%Li)Yq=pp9I{X+vW6DXxMH3tl=PLBw(h<rv2>)uC
zBTn|MC3=?^l5V{Kdd1=r&hU_@lPqJ1*{R#Osvw!ze;7f2E;N-apYf{NRR0#~{x+1Z
zoi{*ZN3JEsbz|s^RtF)tIhAg&&KJ)*WkmX*99`e7&UGp@^8O3^cnDleI;2cE?$|97
z-&}zcjdJLhixS?wCjqYvc!KvwXyIm)7_6P0M6JblPF-vppZ_UHID7L4&t31q%BO}<
z99BgYL$=Z(FCB%>P*twKA)9vUhLEjKCXkII2h%rtt`g-<)<VywJG8av2d|L^WbZ)*
z8r2p}8&3YAaT{jSoxU@;T_Y_eQOy)TMcFiG?JfLcvkcYzNbt@}b!6rSTN)W1L1z3(
zqr;wZG~aHmq@*?*g@^RxllSXLeZ~}?H}jxB*LUz)B_(8siauHE9ZcFEgpqdZZA9zO
zB3$nFhHp2$#riG(kJitX!`>TnB%U3bw0?&*om(4E4{6^K<8fQqG969Q;}=IqEq^R|
zwk@LiP5N^b>!2?DzCMYa^5#A@N*>M8Xl1<AVI=)_=Oz8Eq(coqXYsO)rPYZR;@M`I
z8MRYC%kSkLNMx--*cOFlRJO&GzM6R#8;*?U`@(C`ntNW@;AR_cNtfl`N=uP2wRxCE
z>5%Y?Yw&?}XR-6>IkY5Lf-NVl<wrSPpa*ah{x`RgE*JYUr**mbYt00#QDBXC1}4$v
zMwM9qc|J`D$tJD-i>ZUWwBUQE6^{wDCw_DLd7a*~?8SjX{BiqwUMe=2k1yyTFPv84
zF`>PZLC>a==M|5sm60rov?%5$o&U(6dlEro)}A6aYC~y2`ZH?T>;-)%e-rxo5AVxP
z<xZc$!u7qXbgqd&ySBuUX%>a_Na+J!e_~qo-OY-edXhffu+@&g^vH_HA4uY_+;pRd
zKe_RfM;g(#3nNHL(Lt<|@PJM<8iPNp$_diJWkgTgk?--o%HNRvPGkqg^26R9BNGn4
zs7^@7{N|r`NtDb_-o!eDs7WuR&ewMF{?4lO*M-aM2k+1PjQmi^ujdoU`b*QO%C8uD
zzsFH%);)o>>*|T&15M#(NQUsAY(L5BzDi^|BdJ{aQtsuwX=Lc(Ws)oNF^QD1=6d(l
zl20|6c<8D1q>qNu-*0U=gYA20ukK?SctHjaeE*D(jpZ>B_Y_Y8HwiyI<;k0=9MMBn
zB;iV|a3X84FzS(?U}jf~*J;luA&q&Y@t-<7TJr}DH%O$4@gqpT&1I_4^@|$*2`0yi
zSm6k_k~cZIMUb^n;CpSK5y$B^?6V|S?zXb6<lL`iyiYQrPvn*OJaJwjdo`JKmFbZe
zhJUF2trPU(6iYgE%yuF2_$0yf*ncF%E1Jxly^<E~enhsfeTj>Lk5(@@v5RDsTqDX3
zO+r|(v0%9pbAwWj)4ck*blezqn!L4+^qh+0-KPB}cZ<J~tLFqfMUdl;_7g5ka|y?;
zk*D)DRjKU3Ojh-kC3lngf{vzLqdS9-)8J{-IhR%W)Y?^%zk!d?$PNkJ(r|)a>+q-J
zwA=ZI+cNka8yhLM>!V+7HVRJ@k8;gZeK@A8fo^O$h4*zjkv_3`tW6#&G)&XM{Y8KA
zg@K90MdK&EYFNqQhLN;>|96S)z-c10M_riuJdz~69wTUf51}fVse(t$VVu4yQi!{>
zSoo^GPpEsFN{vaJaBzMiUppy)e%}0>FDi8B|LV6>`I*o75ei8n(&`W1>3f{o-MC0z
zC^kv{n`F;x`VYr5yCwYV)lvAzx<~wxiN|<XA(8qV@upgd!?E%oJ+5P+27Q_Thdw;F
zha?X^N3XUmCZ*=J{FvTx+`yr+WTy;=M??{NsUU>94u8UrO?%8=j5|vjnQ3Iw(Q+K*
z{uIwkFcEO{Zo2X7J|bBjP9tX+kf~d}C_6NfxUQ*X`^S}%(SI8y(OuO<^UN|@G4ePW
z>Z(95ox6{3eXk(bw`fuQtb8JLgz_E-7tvO)Hi`N<HCE@P8I>%mB-<2c5QQgOd5KqM
zb<X5H5}l{ztj%mydP_EhuJrK4J371hzUu4zWiJU^Z@iey$j~C;&856)pd8UJcOikN
zj^P8hG9>l0(x~#i%Y3M+0qtM+8n1KNOCp2P`2g#4{NN=9<kSF1l2UF;QYP{ItC3gn
zy$hN&X|o5pc1@RVopKFd@M-6ZGvy?+d+KrSt`&IQE?Z(@{uVoJxJb{Qo5S~~+L0BG
z6@1s#LFA;XEPmTEn$*Yy@(b=J6Fx?i`|8L@*3JFRFHle-AylLK*7t?1Wtt<=Us1}Z
zyWbIarGjLh%w_yAMymSuF*Bl<x`DVo|BRphy2OvsS0j7#1IhL2i-mZ<W^y}uJJ&;Q
z)AfDMLez-SLW}%)UOSPH!9A+N^!`h<)8QQ+?RAV_H8_|=s0{G4hPPm1CX3xQ<j}l>
zOURB#$LYo*mOfqn07vY}qx*tqkhuj{>6W52KBW1ZxaXQgH~sx2S$Jxi@W@?RFqO-o
z0U8bbc49z{Zr9Q`OOBEQWmD))`?<vb?Q2Rx;yI@?v&otQC-Q05SAvg3aW)2nxsJKN
z@UK_vx!tS6NXPgbntnxvvtD<Y+@)8lYX(mxUv)j`m#>NBVx|j?o@|Ei4E=={yw{`F
z`wQ8njb~_(Ln4iHM)ZKIwBR%03h~PfBxhHgq=P;U7Y^8LBG&>apUt-6wjE(a_4!_E
zdiXuQ_52RsJNys-Ym7VAJ|{)hf8UcNNDZSU#DT<oOd<-4TWN!lHjxc-Av2*BOD9ow
z$#pq0e05&+6~`*c@eef;kA>ICo%*+U^wCembzdKmnpH^`J{U$U3{MlEK3(EJbvCd1
zFPP{|mm@=7-63`##|Z-)6G_HRIjS^{$2;>oX~F9RV&5r?LQn&?Uv-%8Qv|wA>R?st
zg-#lAQxn&%C;UboPrSe~gABT{kN>>j7kfuG9=8=*k>wFhl1W;T{M`L!NZ#l&ANjA3
z$n-K~be=Vr{$eh14O_`oE_+Nmo<(q4zmw^zGoyr;CKribOS@21&_%C#UBRi&!-a46
zl1Opy1Yvw~G|9j3C-EBHLr(gS5~S`Y;K^I1xw6TZQCL#3u=;BR@4ja%^}qgtyWn>P
zyBu>Pb5b>gTaT@|kIG-jo4ZwHp4wW0{c{x$TydfkY;Ea}Ss7%+r6O)I`-G~79O1^M
zl?s0%m(dIB3b_?|Ux~b5D`&S~T?h{kl;|8&A;t35H1TILt7SHWw@9!i19tADH{DLq
zz8*t3btR!fX#-chisjmSL&>f^RwNJ}6Pu7*SjKUbF!<$jvSrIKGA(>3ue|diPAPm#
zGag>SCAU=sgMA(R+Q|DPE$=Y-G?(KReYK>Q;@4wO)pz*(wa=8(yh?P8ALAo)X3_5w
zdA51McG~Q$M~*gS6Ca<myu|~)x@t@dUe_|5UEJ*;Ol@prpNsSQ3D)Lxe3AoMyy1t0
z8+nuR69*Ii(IT2;dkDw5FDCZy<N1~Id#EJao$k}E;wwFD$vA@})HFtwc!#lM*@0{L
zh3+CU=$s<gwb_!`PCp>@+;tL^BF>Su%cJlPI+ROHuOLeXE>nv;{n#YmlZJkX=L7P+
zNmNuMxsRvNteFXPLS-$xK(C3-G;bvX9~FuG1aVGuES>26KYX2OSWRE}ur<)6P=rz{
z6=`li&)RD%Q6XhWsAS4KM1;(tfkGP5pfsUmE_L?WQbJ`;#thL<GAAiQ#Cxvydf)5%
zfBJtuU(Pw#+Iv0YeOq>;TVKc0!4a$2{182+rx8alP8HbrxzCxbq#GZI@|<o^0Ua6W
zNG<N3qI%m-ipp;T>Agu?SZi4bo#bgo-@ra`N6%)a>2jCX|MiQH>^E3Y*tU*(>wKhv
zukN$#j;pl$3)_;}oyx*XA)I}fq#&HRGEhvtbX|-%lu2*T)E0KMOlLJ2eT4CAjildw
z1?Jv=s!(egBvdXer4}tanY<XvxBOFOKmV<k79Z{@?-K>nz{%a|HLWMKV|^=cyyFy;
zBg5G2+3K`$iW%jaS5q6U6uMX*gv?p=O5E}-ms#BV$$U#&`8fCfvg3Cb@fA(0*x9eD
zY(dpMnd#y(KIKs~-zoab%tLHM>47spvb|h9*zdXM9GyTLrmW*rXAI@1#1CMT^dE@h
z6+VkwFJI+9+0>S-3*+fEK~*xorOOZOe^`9;a2&lm+n@ReTxM3X*{pA51naTahxxVd
zqah_`Y)4h9O!e?We&S|pF?GulezM6mvD0Kb3!%2ue`XO0`sK$~q^+PM+=uf0DyLDC
zx50QJ{WP8OyOCdRF_8JqJj4q6>5Gd0ylI5p7`|@Yc=3+?H8#rY3s%RSkUg8EAq+U!
z#Sapl>D}jPyjRFsv8S;I?>=!Qy>LKAcW=t0(x`Nr@gYg*mwS|l(+BuW-`y+-O34N5
z9A1{QUm6lLlkNFiEj#XS%q%~jq`$rESjD@kVz*`8i`%C@p(}>$<R6Cr;-O^-_4smI
zmO4I<RYaeqKOXx^bpdk)y{b5Ru;niQGhK=Kb>{Qe=FJi_bZ5{>OL)o4>k7Tx{D@z+
zuP-w+?@x;cj1<NVMsZ{DJt6t`KQ_v70A2YzPnasr7N+d>5LUadW{vj^sPeHWVz56#
zTKdDC)~U+7CwW8Z)2dv)M@KBZJyoAx-j&3fW!LC>_nYF8<^<~0QO!D%4$~UD!EE-R
z4AwO=LP}~YqY87*G8fAX`r2-?5Zql~Dnmo*W5Yc5uhmxg+Lc?<E%LBzR?<TjJg=O0
zUDX{QrTt|EHosUm^$w=wyN5i>A5SeT{^N6>6!OPjJ>k`y@??$^HnNJyLfX3a4BP1T
zm&I;15uRQ0WQWb;*<0s+(yIbaN{O+fqu<`9F7dtD>Qy~Ozr(xetLkH{VMYa$cBx5j
zX}Q85lPKYv(49<O6CiNamxO%PJRx*O3N@H`T3A{x@xhZVq(j?h(Ca%U3ek`9`Hj9h
z^hT|M&^^Ogs_@F-{km+3>W9rzrcpd4x8vziheX+rMT6Lx(|W8aR6{apr|e&Hl2|c%
z5<SsiDR^6r;nhN&rSj9$S=VlP@402UnBrR@o0T_1yk<n{*6rRj*r9|DNPkLOtR4A_
zr@j&|{l3)kVx4fy^AVFm<0P|t_KbUxC`=D%WPC&!b=wvyeLb{^CO?zM^Q+tG45tvO
zLEn_rSvRpCLD~F$OGWn4IFD_<Ia{1R&r^C-7slw(jY62320v**hIDWGU;0tnBfZ<M
z4-<p-O9SuvF=e+_Hu_U2e>QZSaI_(w-*hdfq<Hf!Ss#B-+T~#&IXCtaZYspHoeE{_
zV$b7jj>Q+&GxUL!@lHkhVp$>%)BIFYR_P&qsE;rC(c=Oi?i?#yQ)(mW`XtjGroALP
zd7U;lQ-dx%b$}&jw$YeUWj5Sx60I71oNt-+i8jpD7RX==>gHBJXY5Ip8R%*78+`8b
z-mdHT&S7RWvtE-r__>Plk6+P3_Y!t%l`C}}n?qhmUs&*gdGvijG;jRpF~9KCX<o^0
zE8n*{jfA!ZGM$ZM**3r1%w4IkAT%yy%}Hg<y!<Cq7%!i*Jn^0xDs-`&8zwBJSYmTs
z{>diVH#3(uBZ>OR_(X+DvTN=$dR(Ksr1|(MD;6iPf8NDR$FW^}x-g376fI<nJetIP
zXIj{H*?BQp=D-GYtEA1_C)32oWz@xeir|&Gi#o;krT4o%V70mF)a!2;{dT^HIdAV`
zTi53B&%eKuc|4dRt^U;{imOh^_B=l!78%{3Uj=h9Ei0pBP5eUoENq#W)OM2fdvuEb
zQQA$~YAVpxH;>an=l1X{znk=9a5(SuGn$P{IxoJup~yBpi()=!8%2e+duYdr*W$Pc
zEp~QJEPK^^l~642VLR-4jh&v>LaEvmp>%4v&=T(=zI+@)8`bym@o(lx&c>HmW|qBh
zdw2q0*ykX%F<D7(>gKRXIiXBfO{5Pt-C@1E8}PnWZKBK8;j*N!>q|D9ZD*%y&$C;{
z?8W%Kk-TK|R6KeofV!2;mJNMiOY;UVqDRB?SaQxqvMW`SMSbJxi_NiO@SGgRdI}=T
zaAi&*{YjndEqVTLrC4tsN7mTp$n+k&(p}4*iBe}C^O5Zm3uZXUj*iX{jEhS6k-c*H
zgIDIs>vyrd$M0S&G1o-!{aDGibzY+vQ$qM@+G(O}<6gRC)Cn5n^HH=62&Z2RGwGb_
z(X1l*ji|TgmMEP^!Od$Xd*%C_9zCbcc8=*KE=#>lTP(Ixv{t0P7I#I5FrLkM7DU_1
zTEt(OxA|DV<+RP0u!UZRLO0J=T6$4IJ|9w0tWljqCvH+^%87@VbxjOi(7lM>X&EBv
zjk?T+U)fD(8v4_~)cfL-F<(S)c`ws$SiNkM#Y8@Kak<c6XDD6&v5GG3o+dU{oA93H
z7pcP99y05jO5);nYifJ#HfxPpN?%`0k%Hs<vJIC4#s5yerqy|q`C!*GEZFHC@0?X4
zJ@Lt4p|UX1c1{&#E1r{GtyyfR-w?raNi0j4Qq1ypKbG3#_tU~QYj$N<7s(w{$qpX)
z!jh&97Aj2y+F3i1R)qP9t~>v+JdHrv%in$I-n<ajb|RS0uWaROZkV&w&$+Z$b6-*a
z@=EEb;~#eBK__jmJIN2Q8^_Fn2;&kQ`KdSh2*;WPAucGLCEQb#9z2hb;-$&-l72b=
zt<OG*DgS228ja|)0~bpii<~8wmj~F-n{u7x5nu7Vl`(G-?alc6L+HIrho~@S2#>-=
zwsu?w-Eqx_Wo6Exu}|Fu+Xj>@E}Bcx8t2%#yH@gd%w&On+$)x(*07j7OKHve7@8<$
zivNx7&en_?DLo$1eZ=r~GvW2?PVvT|MuDEF7PQ+XA+hgg@j?6qVP!`q-O+koI<lpY
z_;yBsV7lxWy*6J<NNXR+FHu_|MY=k%_k-6_Tdj8SVo4)?yKIACHuxb;`88RZTb0dP
zqY|WtZF0R=o{F$Ey^>ja2$GR}#@1lay^_mHN^EI)FP1fEG1L4zkKb;a$GX4WA?nwN
z%wc~jCB0<)vrpzUyiALBTOiMgTWVNfh#4&?FQWPL>%?$d2fDIiI6KlqPuTY$o*Jsp
z5C^?nPp|mAVC3Z_VXeF`aV3JvEF!xz6aNpCt~|^ON(-5D#ueVaHJ&!UFctK(Q8MhO
zAq{;J$a>${B2JH+EzOL)$k$Fj!4{nU#!`&B#J(=h#U9goOE0bo(##J%=;nvYv~~1j
zc0S3F3i}FKPQ_5RQga@S$TXtM@_ULS#w64GG5IWfQVr|*SYNpRM1d_CX33&*+^B=b
z40&8v$sU_UNsc2Qi);EEqoi#BJv6i*J=sehANwBWz2;T$(r|yae^okNwbNDBcZnU{
zZcrpDTrn4%ru1gVelHM{4i93V`fBhc>zl-XJEpT?dIP1i|IMOLX8sm;A2Fm!K0~Gc
zceJGEKi0FIk}b{WB3RtILS`p(q0Up2*|*QVq}}Vsh-!Ju`AwG#>E8%jYN@STLPAso
z!v&S>gY7ixz5NVr{kfL*lX+6ZU6J(p^X_z|o)*))Rm&@=QaZ*bSTx=AlV9m_UX((6
zNJGBg7j>@nW2t@jv4qL{_`a)C`3bMjh|lupFtxlCDZkfUy5Fz2^usSyx*RxMI^Z^y
zTFq3X;mQU=P0nOtV^ApVY|2Bsh!pBRXqWh0J)64u{p6p{%M}N{KEw?F1Tg1@CH($f
z6WHOfNi<;m4`%bUJ5`Gj#YOcw(#P+&=&foM>EUV*>XJP}IvZ&#rK@$Z;J8QhpVmVL
z`7!L%<C`?qDuJy&tty?}IzW0_Gg;)yPt$<SBLDeeg6LO&h$f>Yo3h@C-)8zk$o|CW
zK;J!L?}D3R-qbMRm_db%ywszTx4qEK^fTMMYd0Nv<A(hHED--Kx8i3_^b?MK8_j;_
z&Y{!CB+|`mv;^0@FxI%Ei=P?LD!MCu5a&9`?}^9*lGz<qb|*$lm{eQL>`gQ2$#iS+
z=$i@jzz`Yl{%)b*x%dawDaj`4X0`P4s!eq6j#A-4Krnlxzf{t;GGXxt^My@96*GFN
zLf@ZVC&`}q%XE|{Qhwt#TJpqPa+v2w4c&sISx-#av{SZpXuoeVmFbNv;k3F`xxk7Q
zg<cm*=1i5mvYtsPwR!Y{;a-U=-pqOzCrM_NGnv(R89Uvg&Kuu*M}1psSSVM@u8$}Z
zE0*1$4qy7R+PV6|qy7FROWOKLJr%;GpD%P-OG>-6d&PUU@BI_zX{Nzyf;A=cGuiaR
zKT3Kk-(bakKsvi}m2m7sjFi#qRLL%V4Z(Dz5+m!C#YYR?<J96Xy5r7F-e%4eHrM_W
z|4u$JHs!stptzxraCWY(Fmv=#R<Ar;>Q-(<?=*G@3j<=<B>pY!w|AB_(71qp`@~5-
z)@TYgTdP^0Fr-F5{KdbuJp`9u17$nvt=Ll!A9hxXXVy1DCDpWd!Ys!T!gte4)XhkP
z4lo`l)phq~4`;_qGWjN3*f14om}4{>Z?S~YoNmJPfW35!VI;qOaSp#NsE?GLHd=mX
zA7cXt-e<Ayo21o5n@#c<Ee*W<SoCkaEnRZjCQf`GE8Sf5oZ77WMLWGp*%r?Z@m}a(
zdOCHvbb9f3_T)-;wtUAq-d$xFyIfnyrf1Km!zX!&E8Uo&HMNBP`&&Wn<-{q2)X(&3
zyNv#htr6W$9N;ZS8qt#heP~@owivxbL$c_KWGX@D>8-ezd}iT75@Gj|j-2<2PT3v9
z6hBUq40qcLhff{gd$`52y-uo<$Ax0*x+F;$Tl+=Skk3MB>L=3PKa1Ehzeq+Fuawqi
zfMk7qEGtt}Wp-idjP?7>vc|@+mnyMD@kj`*NVAb9KlB&WR)(|0-5SyaqeNNXqVux9
z`8%aFwGnv1H%e?hZ^<Tw>$4Y&D`;jiM?XHh&aWDEL|B=;g8k_gPEjpcT%{Dsb{Ng2
z_G|Y_W5OTG_)+q_S{y2lZP8&37ddI;bq%)s*;KZALIhoUAy}r=&?aUv1Nu8Mh<|bR
z7mM4lir&7sMLKz{jb`mjp*Pp<p*@C;p=rC%^8@A?(X>?+)GU02wAgt%)f`<bD*g1O
z!}qNtTSpHQ{$5v-qSQ_a-~6p5<w3{k@xMMoRohgdI<S}EyTzDo8C_0YhZu<^>qREl
zW(hkJ6X{>uNPc3j7MoZzjP_YRm74xKD7HQq=`#KIqFIAG@y%6d7CB8MTGfi)o4!;$
zEk6$iVJ5;hi$>A>Z(qs%%vtGv@<pMoB1`(XS&_Zz{6W8tTg`ILcN3O4sIemz*Qm13
z2HKGtN-Y)*7WB<x7#CtM%>EE1tlYO%h^xQC4%MHLRIfd!0Z&XuINDXPS6-KBq0J{L
zdbS_EGtWp;9ywgd6~>8YXUr9@+%uLIR}U3#sEwn;MjJ6{T^7rGbb<Ys7ba=vJB?U!
z>IqYRuPm*;9#6H>@}-#VPEuRLFKM0gL~+3rAL+4039Gu8#QwdED_*zwApLgbyzG*E
z2DB_sN7AgZ6vEftWc!;(N+&N*l9~?bilcvINs@C5^KHK_bw8`fn(JmspPdJb!+iG_
zzvFt!V`X=C@az&6U4E417pCBgu(j0RdN1qoM@85}TV+$Ul*JM6hZFB8`HWz{ZM0b-
zl5YrW<hxdAi(dyTNmH_o=+`Ing*Qh_#iQ1PC38J>S{b4&os51*H@o-dH+fhKmhxE^
z`ri?HX5&6~V)!9!{%*$-mmA@-ubXL5^*!0r&}iyxY$H|&-)2Q-R_xAZKOw|WN4ny2
zi5{GFfR%dp6}Amhp(_^1Yo=dQ`Kxm#vbAn%lC}13UUT1W(I9v{{rzGje}2_Uw$E=R
z-+1pBUwL(?nDpi;->X5sClc(>+!F`M@HXW?jxDDHCs&a5tA{cdMP1oM?<@3=f0y{)
zZIbv(b1!|AkS9+3P|CNzna-bkm`R+j*U94kWs~n7-6ZFcR&1JD5W70Sl}&SfAx4kQ
z<@4EiR<iGyIDhUierrZQHh86@*!xN#o3tg0DVHXR%ICXyy&#cx-*ibd$~wTm+^Ed%
zlq>LD4@NdjI9SqsU@D#8qRrOu)_ky;FFSJN3?-Wj_>9+1yvKm8vL3$b!i54C_NTLr
zPq>=I`)u1UW{-%VzLnqki+A_X2Ui`%ZBzeZ*LxWaTzi$zFf`<cuXGmeeY|MdfoAdL
zm~_#2&Ma2x<wtAQ=kUwFuMwx~eB!emZt_+BdE#0{4KY90n1vOLWryP|`SvD%dQ#<_
zc&}_VO<QWf*Xn5VR#OD}B3+f%{{GAxZv7#0`K98qO(?G&oDe%7ZQxa3Zlytmluvwn
zjO0Wwpz)tyvz;?lWOio~MfW*z)ZX)-IMmCN4sIAo^{+o?&hq+0TgqqtQ(Ph~@He3c
z{!OO;Mk>%Mg%M)*NfTDxZ6E*8#6zST|M0Vp=>e=aq?WxsiKo2w9WidTIIwBIc;#%B
zc<rYS3p@v6=Ht0o8e1YW7}uK>F5XR3A3PE3eY(W(FW+#`woB~ZvC(Wzrvv?^zLjLV
zg^Ar~Hi;2Nykt5imVcdIA<MrrPRfk;ld@l*qG!TB(oO2Ivi$@43oN5Qjak<ruIWnP
zPb9W8J-2wOo+OogAD_hkP=6qPd6mO23=>N>r?=Dkt(!&v^j2o}A(RdX43Pc~?kfd%
z_ooh)C+NT#2{h>K9p<8w!%SBuOC#eV>4&)oS%&E;x<BRswMiMwPMQvvt654}ME_+a
zH@E8wdK%TtH{}e~Fw3Ww%QC2&`a()N775REGpJu(Hhq)rD!Y*=(%5cx?BE1zqNrR-
z12-O|o4)-nDV!TiS_*bi{}Y$^Ki!JhG3D`6_M#$MGwUery~j+jIkG~m%r4_ov+mQT
zJKo~V)IjE)?J0(iuH>8Rf3qHi{`}+vhSI6P*RlbRRAfh1T@%wyJca2od&QJhdP3_~
zYc{LfVCh$t#A;Hz(bse4(B&_S#JkUDO6xoqiA}s3TQPsDFx+2DJ~N=pu8+)N%16$z
zFN6BBsW)s{k9l3pXhVazaQ_8%$wrN3@r=LJJVuzV94YCoTZg^e&7^O)H#56S$u#ER
zctJA$L1m3U+11BQ^ybP2TDqBvbE=lHQT997W2;?sSI~DF6Y`kFKJa9r4tq)daaA$8
zJwnLreVm{5s*#Rpccl9)e!<i;TCCt@37u9E$7jxXEDmt!!#jU_%r<34@a=oHP-<nx
zkNf+dc;SvY4L)K|ZA>HBCY?Io;KMp{^2SfLd9*ckce*6DIw`Qj-wI^k8?9JrUMN36
zr&@G2%px608Pv5?iCj%RMhCduX3NJmkeE~TqQkf*UVW-BofUAH3YBF<ru2jwKh$BH
z-5r=`^%8m`?>WDDMS&R6s!g^Uq=_r$){?7YnW&79`Ac=<X^gz)b|=<>YS&kaZAo8C
zZjHUf{-$hXa6lgKB^B_i8V}MH6OK{qwzbSAA)MNMxFeq1c3uoiR;B$7W>B?rMWVr~
z7^WQQFLpdy&X%}I{E-|P`#WSh)n4<HS#2|=O3}KK!|@fgE?!Z1p7cki5^;>}JFmpH
z%vi|BZTU_b%U98%Z9!t%mq1y-!v@-aTL-<Fo<)_v$BXIB%cb=1JL#J0i9)OAEipf;
zlvRA)DD|UJC10PYvYl7^@LSgDO1}(?sOUXjYU_WAuP|(5C;e5~g9ZauU`yG^`VOYI
z-b*M>t6*!gY8ja~if${Q(t(7lG-1yX_Ulj&>a2N>-S?PFUGHCKccYFo9REQ~s;?~W
z8+sI<Mmn-zK}q72?C#Q(0z<Z&*+{LqCd48nNTMv4tt?$4jdUNzUT(g^9_Z)sYnvR{
zQ}3^0R6w2RmzK@;`mL3gW$W?B@AVOW{YfFJdB17ts%+71Z#Q1?^JX@(OI2)ou#-*{
zC_mQXHt%@#1XT>!%jUnfrOu6(!j7B$n8zPuN%dbL9rJak^!!sgQ}4Hku05q9ysZyp
zf2}_9j*IQt;0wE@iBg={Z|*<d@W&V0IQKZ;Joo|meTlFgEmOp7^<C_!*;DG{lE};z
zBdPffMLuMJvf$jZm-+>{2^o4$)WYqxxWy!k&3&K26qjEY4-C<uhJ*a6%WEs@)4ZE~
z{(eB*@i2=m)!#}VSO1f3E7X@p$GT8CrChc^)<rq<9>N9pInqSVjgE3Z%YWDwEcTFn
zmc9*~C~0<25*9}|^P%$DjvkBuW5-t~(}I$-w0-4JLBn>nu%J>BC&<6oNQK|@j=VOJ
zXWWC$dD&AkKIchs)gr;CW(tkZiYLFj-<DDzP8Ys}|6%h=JZV|T1Zn%2=~Dce0?DA$
zK$xXc%Zg3LvCy0t*|KffETJ$)Qr~_|*rd}ybv(+&*yL(@!1=7Gd(2$0H=iJVtZ<=Y
z=RT(XtB~d`Ntbk{lri5TeIevwjd<nFUb?_vL)frnJ)QMn6jPt4FBy_t=}VtL(Me;M
z6m?~fl%~^5TIXX$&)II4j{1w_Ph4|JSogEc%3z4p?}dXfz@(3KBtuI)*XqHpXts*2
zA?K;r=?S!CYd<<PY8M^w?+Gaz^oTxtZp`+C459)5o#&SyG8b0azhW-;=1Sex&!(@|
zhY7EC7ck-YDB<DhY0}NSbTP<KS-QHwLD>8`o6W1vrM55Q_z{!h>4|OfnNOVtad5>{
zR^RVCE3=!-Vw(>OY8@Snck_^LbRDEe_9sh&MisDU+jFEunTt3#{TEwrDnHr$Ir(m2
z1z+2X@!9#V{G2PsH0Jtf=9Ku4p4vW8>{X{CC9E4ky+vO(A#R|wc->q!D*HR@_Nj}+
zZC%c0&lpO747$MDpq=k;c$`oFSH#jz_mEOgJQw|pAMhFm3e3Pjhu-(x#xU+R@4L~6
z-t?$u4c~gxu|*Sz`1F*xd%+H|?&(%KU_&o)lwBx4(<qUjcUes~qUR|-I3$N1{^2I|
zR6I>*&2J{9x))_j&CA)<SMvPjbClROuP@!u)RURs(quXx#>n?fj9D*v&!OmL2~#N2
z5_Yc$BmWK6<Y$I2qCFK%gvYMFY*KZU=yP%#9kw-37Gm{Mq+SEqhMxb?RHs4wtR=D3
zOV~ww`yFKD1}E<`Ca|QJ6PZid7;)OTx3c8fg|dB%hA?gEJfH98AO$=q6zgtO(B>N_
z=&9x+vFp$@`s1VrnVAy7Y~Fa#^|yohw6qEGStoy%U)IU{MvUW4w7(Oroi)_|Ym)eS
z?^$uo{q8h&_;MO)&_dKbcJN0+Id*mD6&f)3l{oz=M=Pyv$ofAv;-7lhQZxB`;+A{|
zWLR`5Um--WnPcU<LghtlRPb`P{jwRgSm{ph1x#mi)Gg_!Kr1$-Ac>hQiWU3gZoc^Y
zYH`MC2liI@!fPv}GxO}%yoUCC+25=o(%pu^e8z%9qUNwd`gmHDcsu5+xW!Y0wW{3W
zJKQ^&W%^UT_%&nm*=?En!{cJz{n5;D$^`a*u>t=BP+{=DfCK(NapC`=1I8+FsAf9=
zXRcQU-zgGC&ho=g{XJ0jMJ^C;7>0%>vCyugN%kHng8$CSfz_*$KtWcAYxFkYofYNa
zvTFcjhH$vj;R84kAFk@&YbaZFms^?>h}Lzkpub`#W(W5s-R>J=OmZxq>7j$ebcdsD
z<TjYN?L2<udSdGHba+?O51UH@px?&bVA#?V>k=np!=NzC-#Y`J3>*WxhX$co^Bxno
zT9a8CW;pJ{UaUIV;c(fa0~bwF!Z&|I@!O6!99J)a-H&99dcG9>g57XsM^98pF~r>6
zW}yEzQ7)g_Ef@VY!R1@G;mVmB<TlNKhZZulyp#-IC#aE&eN&OxSdwS6o<aHcD75&b
zgS6Xl)K!`QPr`;nBfNxRf8-!<^?Erd#vQ6Q^u@e}sZi0JiT`Hpg12{7Ns;0Q&ScmN
zR8~I;-wYhcucKaM^rYkX%Oa6b_mg<w({-G%A{mFo*pj^MOW~>gIk0K<N8c@vVZ*F*
zc+Rs8vkRIqXwNas(Qn6Vb&C*Ei_qU`KXL3ih}4^Y09S?Cxb*#du5;8me6M@~f6Jv;
zlWi_Q-^=OHv}QH_+dq~}`xpabR&>JT27=8uUqR!Q6kM_L0I8j>i=%`XVDrN9NAg5?
zE|+~8znH~s7klB>`2Qe$tS9Px9SXA#JO!5tYI5+BIi7p<gWGP%anIT-IQ6afVTn`@
zt_Qwxv^9r&UFHi<OW#6EkB1I7$9h7bo)=g5zyb{ocXOT3eu2@7%}Cd+fmhOF*zzU>
zqHHT+iP{W!;4wnJW@ZUj>aKHY)}x?)%LQ)62^EM6Du;8bJ#dZ9eQ+N550>iO;zrmd
zK-GVU@4hTUr8r#>ZCjzMy4fM+rY=q|+6{HZ1<=EN8W$Sg$gO$Q0QbJuz>ECtV0keP
zKFn){g&J$P=@SN`;txGkKH3lFoYMlmr~UEYyPddW`yu=pU<%L2<Y3qsW3nT}knA}@
zaK`ToxZ#i-7&N&R9m;P&%*O8US@$9YIz7h~Q|j@+YkwI3B^$1XPeHHb0VLq{Vy?G^
zGN$`pmqUxD5^w7$Fu!#lSy?DK+cgjOmyg3Kz3nigcoz~!4!^p@kg1#;Qq)5UW8|V*
z-yKI_+w|LzvU5HzJ!VA=FQ?!L-#xf|brwvC*TuYz(Im(-ob+@qfw!k0k)qzVuzkK4
zkzI(y$^BNN8Zm&Y0oL&L;bGLBehl5GA)2&n;@0;%n7H2v#=C@(UZeUE(d;;wKaoMM
z^-<L7x`+WA|KLv(UvQtlhzuX<Muz%4Cyov^SlWL-dHlLJez1H-rg?E_JV*|*oBjy$
z;+K*KcX+bsN(9ziuYzw2%h5&{Pt2-DbK9I_z|8I+EV0oi>r?xn7w1NL2U_r+gAbsG
z%PPVJ?Z+3tEXb}CyYTnoIwD=YjYh&sEY#~s!ndemqwRjwvYS9g40?poe;d)c&kUk{
zED~DoY{6F-*OBor9*}@D@9=2FLQ*)mfIHM}0ogLE2BX&g!$%*VK<FhioO5<ES$=6E
z+4&|PRoVuSVK#e-eVZ2;9<L&_Esu=K3`fKLj&kW~4)L|Q&Dkwk1aHb}In#a7a@k@Q
zm`Cl0x2e^fT8kb;9PSGxTKk|q+X()w-NZS_We(1bFX7Z99^P;soNHnrXO;Sjv&q+i
z4BY2%_kMR6wN#r^xU7P6eL}cz4@=~L^VdL%L^%B<8Q$v8gL@q_An|$voI3U#bUY7p
zReQ_1!}|-lpGSUk4YM=2R}bVuQJeGd`B5v7_76~d$rY~8I}CwwKk@qXsrcIDBxihA
zE@FDQjw_da1h*7T@agJ;aT)avl~YD>`QDMB`1lq$jqrf`mOWwd%n|VWm@7Brya=)d
zX<)SJ4A+^_#yKz84%^KlKpTcaAJ6Sv^*wboJJ}y&$L@tvrG4DFq`6>V@|kmPRY&z+
z@3`s_I-Ig`e^7S(2G-imaBx5lXFbY?vl<czPD{$6uaxeve!mTbp1Q{Ub&SU)vD>-z
zFLW{Xc@K0|2nT~C23n(K(5)dAmW0VATSIO@s<{dzpPR}lzOCX&V=?zPtc9z|Hv@-T
zJ#oz9Cm?T&qimTSoEUcozRq8ReG>QJK~Bc43HS-Ioiz>(UfwvMIUK5O)KDw8)M38%
zBd%fieQ5lf3N8)(aC@Jz!23>zoAWZbYxx7Px0_tDXVb<NN5#XuVODrOfdlV5r(k%Q
zCG6DE!#}01avAV2TxUK;J~?{~ZeLyp&%XVZpZOW!G{l+H$n}L%#n0fnst~rl>EQOi
z83Va?eWB$_GiXk8<6>vMhT&(Ha(3m8xbo2>xtiArgEW@PfxbFedU`Z=jaR}wrB9%`
zmny1mHp0llo^aITGiQI;N-jjw1A}(C;Chk~9$Bx8Z@y{cK9!wlK3^YCzLJZs)gR&8
zt4DC+c|~}frG$-6C%E8^e%QaWJMv8&dNu9g*4o!{eXP%b-k2@e6txl{WG6U;)Whi*
z4Ny6yf*GKJzjhgcs<EwH{-KT=JdL3$U>#f;y^eF~dJ88`q`_w0M%cdD5Sm|Qz->Ea
znEdP~{Pe8nHf(<Fuy?ltocsD2ED!F1{C*8wy2&AKV%}i*sWTb!rk!&jzV2{9H0R7K
zKSS}#y<D$W1?*PgfnK}TL9l-XxOg9e#XkFBLZ%U<x(&qR^4~vZe-US#=>&JSo#u8d
zY~j}4kASX*G;p7K1B(0B0RNK*7#IbSoz56?*a=Hbv>>4K2weQ~z@ao)1?yXUpm*<Q
zV5+ahoz=+Z_NPd2#6kkCVaH+W2_qOIi-7*UBe~NPuECdIEii7a95%Ua5CqPB4kpq2
ziCf}6V)tqu7HV{oxWk3yeYOTa>*h8Rnfr~1Wj)B2%mhA|=o0^dMtI%h41UrG=ha6x
zlAG^_k&eG_KubJ8oHQqr?$%lmrQ(456f^NP%OGLbFXO_4%KW$9h5YH$d&pb!m*lUZ
z3z=(^OY&NE$b!vN`2xi_Vz#&siRpiWSUm4Tf-lS>PPK(($Y)hzKemhXz3j^;-;;~O
zFQgJ9y(0c+uXO(7zXRk^jC>Vi+Gx-n?89H3Ye#awb>pk9`k^@a0WMMPAiH!=5uJ)i
zk`lOqB*#4@N^;Tpm&0~^(t=U^-LKkYs>ww1W{nGPSo;o6alu#?v>kR7rjjqprljrn
zQZjl#Z`>Jp7MISk$Hrwx@y?FZ5VxQQbl>|A#Et~qQB{VCM}NZA|KjlUR2S@c<U%gD
zmcXrk3FP%k`SYfpOLk4a3<>87@kfyvnQfSdzcQ~98<`q;o!OH(|6PZZ2Djj$MTL-m
zC<EGtI^pjvpCD9bg<GHqm3;4#lzE-F+^{#fzTzhSzW*1;hjT=$G@49w+zvT?HsFOX
z;hef0#=2&yDe1ke0X^sWz~s-`*k|Nfd@*7<3E$y_SqpC=-Y3K?X^&jU{2$h9KF9E-
zPjIxtQz)u8Bb_DtF}&4T?t-rbr#WUM=)oscN{z&ztPaQ<mxVe8o~RnWkreAuJejEq
zv!1G=>+5~wUUeHD>DLd>Z_)=70~t1i1mRs*p8LBt5^78o(R1Tx%u|)XCJD)<x>;o1
z_^YUxP=z^LkK$sVF#M^%0t2?(L(|^VaL$Sncoe$<^KSWJU6))|J7*cLn!FbuEcYU9
zJ<>_cj@uyg(j(R{l=(@wB{JcG9FRBNnzZk(;9jRylBW{`aKU3YLhF^`*rl(qx>q-{
zE%FZXDbFx->L}FgaR$9ABcZ&yFZn)QiIi3Hcq``wMx}7%nnr~j$Qg*g42t0b>q#7Y
z{l*ol`|wp{48FK@64$?&hneBg#3*Ps+B~y|wG-Ze)y4hfu%AB3`s9WqzS==+=Q%7}
zd<%D2MRIL!`5-CQ!8Dy}yceGeYaEMlT-QOe=S~{#DY}Alb143eI)*3o?C_tF2FY?x
z!%ch5(R6n#YSs?K9mP7h-D3gTc2!}$wkkQcXCxVJycDvu(=q$VadOe8mMF^QAlH2I
z$k^y|Zo1DxGTYA%yS048!agogap8v?!rp?T2G!&7>%qAA%{;sxU5W>mO@m=!fyA(*
z9hZ((#T7ey<0YddWCqcPzg|xvYQ<VErAZr4-v0nw_oiX4X<wN9v;q3szvY@n{(zlZ
zBrc(#JE!k>kt^DBUJm}<43@V=PUF*cZrfLLctW4bfrFI}H+)w?$AU?mU%58EDNW~+
zme}Eegjn=-50MYl4#M2rRPfNNfq-lRBVV3JlRw|MV$#4pDKdi==lh(l{Ww_o?=&n>
zR>tlnGAKTzisjWmx#?=5V8hRn$IApQSDC}si4728r_8zT`VFmDN<j16L5L5{;k*<J
zIgeQq{O6>OzjAiK)ocMSyB>nV6H(mJsxfGj>xEUP6yd^y?oh#6!LUt}jP`nmv+sO{
zQ%??&*k>VR>4xpZ$e;j!D0mSFN<$LYlMhQS#82UJVdh|CoRamBd^r<K)Yg_^!O>b!
zO|~U9ZDHh4`B6@qsE^G)qp;@yH<F#%9aBSx5}oQk{EN6Myf9UPtjev#@2zg+XTOuk
zr%xfrv(<>1>m<Cfybedq=Wz0$p=iHCk8DZ_AYY>9kmDUi<lVJmxmc?k+3B~3G@TpH
zPg_%l?UglT*B3buJp3?;Nxp;0@?2`|gEL@g9EnjjtI1O9aU^5Fcl;dbNVK%&!oDGI
zp<&N`@^r)qk}S{bmaQt1$J|ZisO@boc+(wdc$2}MP1uP=R=*+T(0!a3Jq7B=_QskE
z>!IjY6Kt?`g$Mhhx%=&VpwH<-(3m2Dho&MdN>~HqgLGk+M-jS>RDsYFKImUPi5pfN
zfw}?jxEmYy;?KfM=+bouN{`&e<rn?%cCTr8*)<z{HFx8%qjj9e!8e@at0mxiF`iQi
zo(z?@M`FnCENrQN3*+r|@oMZ4bY7)}z2+^#p8Jz={ntBO`qyWmBuQKd#^c{U>S%vL
zA0w~-;f|l03ZJHSKxK10x;2%8?ti_|wYmhS%K~7}nKZo9n1avh(qPiTEa>ya4R<)N
zLQ|uo7?61ZylmzFPi+$EP%R+y{!Yciztl;Sjt6d3zK$yT{qg#(iD2FPH1zH&$NQTt
zNvV@DuGhSPdygfd?VKY-r9A<D?T92dO>05nP75)usssFZ4=cxpldaKjaMhnI@@1GZ
z*;s8&a6=s~STKSFe{6;R^B%+U8H@3cVIv&Roq*CLZSwKC89za9I2o1eNRsT%Va_rw
z;#wU});pgi3YB-^U3XLR%d(3joeAXPe?DY#UkRV?T>{QmhGOjWR<!+|MtV-}kD|U2
zxppjpEPfw@!ljL*$f+G~-u#Rgzdr)!wf=<nTM7wpzQBeLrQ9L~Z*2Q-66&w-#Ge|=
zVd$ID7;s|(n3t=fS7bfsztn=$=$r=OUR&Tx&|OG(D&ve&y}-Ew<udY_$o5U+E?f4)
zLeIHeU2-+vc=M8TeiDNT!!Kfw%`M<)9D#2xG(zTf0hb*~h1MrZ<e0WxoKet6{yc_2
zVyQ7G&9wnXr2(i}kbo0oE<i_}JsM7V0wDt~$)z6@kJU$^rinEy@xBIOFJ3uZ&z_8F
z9dfv^RZqODe1Q9OP#3Cqo8juyju`o|8wRGIhJSmk@JF^f)Fd61=d5uU6f+AJZnA^E
zgy5lft08c9G`x{Z0%pW@!Gd>R!Nsf_K3=pDd@NI7`s*Wb{9q4MELjQpzYlU<*>c(T
z`YE_!$p`o|{Uv8_bB{ADo-NOle!|z<DhKyh%b>jcB)9&tJ#IRYz;&MPg3UA(Cv5ov
zi!IgB_`lVl@0$%XH-<slM}h^x*3ji-0ZC=kp>ZvN70}3ye=8TWISj+qIi~0&YlK8E
z9dL}2z`RDAXwNv0iNA(`*5-FuI(I2|@?`=Jve*Thyj=91#c|mxh1@#vE{wdQje67V
z;LC_vpch>ZFU}q0YKa%Nz3h)gc5;}p92WhlstK~~8!#)Y2z6iNg4TznWY+fEczD}8
z+`RfK9;sC(?(HMdRO=owGqgY{WIC9JH^VH$DiRtth&VkC!$Zr{;lK1x`1)cbUP*k!
zC0j1V_H}1qVBv3c=o*3}|5*`s;sGhvc#g|1`;mZKWq9xI9o)2z!;4B@WR%i<^iX!i
z=GGSUm;v}Q{4^SRD$4KZYBc&Wm7MOMOfm*O0!u-Wyv$MK$G?3|Mw$P>S!-XTkN0Zs
z*1Q8GMGmQ7xx$}p3`>Dk2R(6dtvnudQ6%kHgAE%*yjH0|f`5I2lW&I*gPlQS0Xd9T
z7R5O8Wh^<Rx(WTCe!!UnOX0508}j$`2BNa9AITk61C`5PW4n4HMz_Al>n_Hq&}IRV
zmh;g&^et{pRU>!HR$$Gu_vk8J#Ya!HNv0e|?NIuGeB-mBxVjhrt-}O6+V7y|A~WLb
z^&3~&JS6=-Z6ukSrVyc^3Zr5_VBAk-wEQlD>HD>~B#cL&HSw4jAmY}m-TA0f%SgdH
zHL^O$hVb7g8D(^WOf-2wGTWzsOZr*-D(b+(=5P|AK8jpv9Yn(JkAMqEFs65Z@>X{f
z*;Jj5Q-+jd#D5=gxloO_p3J~k_lwcgaxxjc(j9kSmy6^y=E4qhxwubb8Mk0ke@qzs
z3Q}X0@kDhG80dK#epu_l-}UM^<(m%-UDuu4w^9c}f;7=;el4UOe$NHYWZatr%b?Px
z6e2?k9AvweLw#H^cUR31Ba|~aqUnJCF8eT8bsKyp-{k^{Dp0Wth3{62p<8P(mK;)p
zq`=!;O#jL7dUPUpNuFP;>`DRF)&n=S9)_{Xa$!haBv=~nfzeAtaLwgaXnpZ1mvcQF
zr*To-z|aDWN~i#j;#L^%@5nv0`pQ{Lzo5ND0sDHFK>kN(C>>o3E^%?(r_oce&sYsC
zS}d1()VM$o<8FAVd>;5#U4ilIPH-c}Zoua|yW_#dV{tYMh4(gVF#3cUjNcTEcEU|g
zw{-&dXE6b>WhK7*bQhjScXAsS6mZv#<KX0yV=ysuw*#?|IGelAxaV8DATf3pT-fv%
z-&HG;>*klC*PJFCUDgES6mwy7*#T(w{tw@tFo(N-+o0r~8}3xw3(5Ye_{Py4lcrw;
ztv{C7@NPZMyqm#|8r%q-XU34xR++fCS`Y8E$!kl;>K(TA&cl4)Ghj7vBRZecgu=@n
zz=Sc#qQ_wFDg!*Vdo39D`VTBye{n&~3E!`m*Mq(vgx2!~FzcB$E_Tb8cex_b$!QX#
z9esqOEl!|nb20WBcNkZFjKv#2df}j6)5sxp0oRrH0u^`&5I;x$dg{QzU;1PD+1ZfO
zBN5LmFTyR0<+V{SQ*1O!fRvNZu|(JfA9^y>TDFUfQrV8BgHB<cw+D9F?7{0>pTP%T
z3sNZr;bKEQtQj^9T@*8*vPOhlG7kOvm7rdBAwD#(#E&yi!KS1jTsy&og!ev4#K1Wi
z_^*I0o7cp7U$rAykpk*=p2f!+qrhupJ`8ZUj46B>W<QyUoqg;vapXbl9xw^?qNbAi
z^k<mn^8p@BYyz#zKrZd?2};g(V5vBttNVEf-bQ?akH>z)c;8%ZzSRL(GJ$b3p0q-N
z{}Jwp;eQU_Tcfz9PY2-Mr)YT&zng2i>diGKIDm2aeRy!y!y%$i3wN&BUJmJ=gHwJj
zl-I`UVcZN|<P=R|&G!v*iAFGNzI+rOuO1J-VstR!Lp!%$Cx~;H{h4$8vX$$cvWwfY
zVJ9s8+YL?TAA)|G_uxj)L~iOqD;RG#3{CEPV}jCcZhq`na4tRVz-3lI&cJ_Mw`J+j
z>3PLrs+T(adD9I&rp3aDqXqD;E)gD%NrQqb2Apx3JU0D)9$tPu#jQT=3>(@yp_k$a
zc>gCHCI;^2>IEI#oAexdzwU(J+N!W?O*J&-TfhpNp7?gfE3SUoQ0|415ttAA4Ug~S
z1GiG*6s{F>DZWGHQt<|;%1Pv2jW35#|4p2mACEg{pWr;rO)=0u9GzxFf!&7n*uF9o
z-aRabiAiCwRYuX{gC=aa^pxv0-v`#n<zH{hC&Gn?Cm>f|he~;M4?Zn7K{{{=4EmS@
zEfM)x=)DWO9#}yv(Zlbnb-1_M_IS;AhCE;R2-;2ITt~oXE@#Jim|foqgU6J?=U`t@
zn^6F{r}Mabjswv9jeuhY&H<YpPQVlpm*yG4`VD6wx3!qtdNCDePm&Aa12g2g(Nbt}
z)WUFYXBfQeCX7kV;r1Cga^B)(I4>9HwVi5^!`&}(In|5c!LS}Me*0WZz8%j!5X{k4
zEZ`1qo`aWSE^=YNrsLJ1aD23&04iqwk$<L$+cH(LO8)?qnNP$2ZuNkkHZQppi@h+}
zyFYwdVFL?0A3~K<2!0H#2bG!D7&9dt_U1G}zJC}VH9m-Z%}6+qGXxJ`e(TW3E(hZ_
zj>dC*H;lCxIS+LK^hVUeMeztW)n~x@DJLMzXC$Unc5pcz8*q`rI-EbC44xVpLQaqo
z#;6QMAJtQ6aQG%14n0Tak`<)(^98b?(F(hjYLX(q6)4vxVBL*sXjsu7mQFkd{*EbV
zSKEVl4H}5DQE8Z$=!kLYiNt?LKDZ8>Px^k}3jyxOh+>ag7&NUfsoA}q9R2bcPtA%W
zFOPn~mnvh)jerWdq%x2wjkyE8oNJ&xa2)=c-w2mFM`1$5K(e$=UW=MGlBhpiLo(jy
zVcJ4Vl0CwosD)<{T$B%&M;yed)~C2Z|6IxSr^CtbzD1~#a}I(B55j$pHJBA2O&)$T
z#Ao+=lZ{ck$k6Hg(L2kJEX}qhs^_B7t-2PJV(rP;JHw#z$XHl)KaF#-U4qtzr=f1h
zcj&{bL;t5I!Dm-0OnRz`4o@q&yIR`ZE+~dQ8Vo*(x$xKO1((*Y0*j{yz_5>d;8wjo
zrxs{|3r|kvcH>wqzS+nvTJ4RdA-%C&HydiFB;em;>UieGacDCSg}PsVVa%cq?yr0*
z+BnA-9A)+}cX}u1IqWce+5AH;RceGKF-oY|M;kla?!bZQ1-L);2(m3bAgnwQtSs+4
zSoX@n85)h?Hgqy7S@h!GQeP+-@f#EzjBwGFfp|2*7tRgZ35QQU;Uq2%oZC;sd)YAv
zN!|iKH`-$2%v{dgyPmuAY!K%;em`_yPT=ElKPZz+g4CP(L9XgSF7d%#aDVT>rC027
z=(jb9iwQXhx8+)uj3e{7pZ8U{$F&+@;Gzu`F(Vyry~*di*9_%ajxPka5eD3+vm(Sc
ziEw4`C$9a$Ita~}05N@(VBUbkT*~Zl==@pEjb7@>6==%1tk~sT$Gwl-@kh$wZDSAl
z?+?Q@hcK|;JCxITVgT1V9k5SR2Q1kd$!RU$4}Tm7JB*)l0rs0zajIhv!pHN>VeEe;
z+@+DTA-k^t*G?@1SoVigiP43N#of8X=jOqIh&`}nT?w}q5$0<Lz~W6~@RM0SCd~DO
z2~>r2-`k6fyctb8lnC;T%A_G^7#_*$Lz-JC4*NA94o~_6108yh2_J9Z`DuMI?ne<+
zni*oD_Fyta+6p!o6LIBGE%^F=6B+V514imulD{U!WZ8{W>}%7H<PA#4$yI$w*$xiv
zW*3p~O2e>1E`s-z#^bQcc<|ZX32Nj$I_eK1v-ebB#~T^WUSo!W!$I7>eIbdSzmJ3s
zvBZxNJn6g75Hx=)6KC6*xM*1x-sqnUBVH+>s$wag^|+1UCw=g_`e96;FoNh@6@h#T
zBX`{+$(tqxTzY|Ec(x^RUvVA`Dn{a*->Oi}9ma+K(lL2vC{|8%0?kBy91JO-9JCl0
zxcb7~eMh(_>nFgcu(cR-)flIG4+izXaM++Ag2uO=*ld`_DP_pxy|!cA8q^?$F_Xah
z2an6T%lnIt1M!oRKbrMZ$BW>Msyh03-25=^c^e5cuc$y+|0pokwuO5uR>H?VJ+M-t
z61P6wh<7|(@xqf6@Iplu#~pDcRhN2`Hpi*(?8|$6a78Y_v-QU0-*@0rtv+6Ua*m6e
zX(t!a*y7!g1(<oW3m$r1hO<5@Xh!At*2}%9R5uW>ye)-k_b$V2ooTpE!<<xavLIq#
zMQqkwM@GpJkZeded2#gtW}R^-13vG?g|9XE!3M`MX!3CgdA$^um)DRO`yldbWd`mz
zyZ~H+Es0`^DVe07#U+k?3N`Y0dGDIN<V>6aeh*ng+%62_?W?<!doOEIWGxusU{506
zoy6I%%1Ha;Ze-Tr7~FT^E-o3m7HyNwaQ|&nGB2eYc{5=(8JS!xk6CJ<CE=f3sQpl$
z!CCN4Hx$V7NvXtq;~y?6Es5AR6=QyCIQcr_EktCj#JORKWbMtNB-DEnQ6BvRU9TCC
z&FeEjoOGK+omxzms4c>I!Oi&Jrn|&Aeh5t6bO_Em`Er#J@*MtBCA>CMz>dVLTrJxH
zB_GamTJk(jySae7H@nus{`g03T}=Zt{5cBVAKr3yQIT9($5L4NFb+m{JLKRX?`J)I
zn9Hqy+6#9)TF8x2AB({p;`E8bVbF|+u-sgNh69sf>rD=BHz}j{?^f<nb0Oz4Q!X<<
zHkX6v`cTw!Ck!YNp>VDU19eN`+4XSlyLluiA0Lbho5$d9P=`f+gHZc;o`d4ESos;<
z4UManaoPOQ+_aSOa6GdIZmg6bO}8H^FB}7r9?`Hg*nv~#u0l=XH)swh1nYN^5S{b_
zdKL}9`Z@h^T9q;+NyS(?{~x?}I*81u4y19uIQGVV7^tmGv`lm0oM$TM^d}gu%kxM@
zxx_3d?hHiN*K;>S4xI<Tg|#ZST#H@;a2ow!=OcGiVS%ven+)GnYZ1?LR+vfm;!aId
zj2oqjCMBmKdfg?|cJ73|2|K|*(FDym$H9ZpvrzWU8iV6a@cNK*xc#6U%g|&3=i9yT
zhi?$srZSYAE9{L|(G}y{UpbU1s*y`R1M%PZcQ`idF?Z=vGAI?Dz=ajFadXHdEFJ$J
zJh>czu>*8r!QVK{d2s<}kADpxzM12WdByl-em)#AcY)A#V>y4dVkqK<W368?=*nf^
z1KgK@ve6}O;1?yFM8`nY5i>4xLpXOL%pUEn|H6a9GhA5T$=s^wwXoymKS+C;;t-Vm
zj!VeT=Ym6{<-L9%&VG74yfJe}^=ePhTo(>K0?k2RF2>O?3xYY(Z=vRtG3ZVEKYYD;
zR8DXB_MPUDl9W&?jij`%X-9*KOy342Q;1|t#$;@s2Z~fw(x}j=?tSe@5sD;KG7n86
zgveaaetzqD*Lv2w*8BF){dfDm_jR4;aUSRKInLzSO#}%iM9k9#&CIcGYtS%H4_0U=
zfshUjcwR1q33GP_Gxn^69kMxaTdfS(92o+G{(iNH%wVA5syU$KV-g^KPb@~q<S;FK
z891U>2#oKrK;y41*zA@G{v4XZ?EL8iug*&VqffLluk+q9J6C(dD+R5vvONRVuc-qw
z8f=ly`?v71iUwLg|2EX?mqVW}ctG|-BT|YngEi6zK%UbTAiPQFE!IGxEeGMYJUwu~
zx(SZ?lmUJ3E@w=ZRSC6yHZVCMjb_?Qz-!@3=(nINIxlzyZA+C<p#M=JR{JOPRJDd%
zEasxz<~AsiWeQij-h~h5tb)!{L*T+OU!Y`K2z+~P4|+^^)a<+jeDBpnG078fa>z?$
z*Zu%5-}ep%Rk<;K6*0)*lOyz~azsrZcY|fCG+=IlCgScsfk$r?!<Cs$Fv(jR^}dS*
z*Zi!J^&?lL;JzIWZY_e3z0y!vj{sd85r^kb+kwyLZo@w<;cy8FgXIBIkPL5w9`CfE
z)zOJaySWq8#HTYUtB!zCkp}3*s`K!~=@HO1)XpqgWea2On81uDQq1%m3lN?b0X8e?
zz;uMc#{HqN*mWY>Z(#sy)<?s^i-VxN$rmcj>IYp*Q(>i^J{+4G2>cJuf=kxE1}~1@
z0b&1?;p}%B@XxH<@Sxgf*nNB=c$vBx&X1gj9IIW?TKf?Y^4b6$%$v)s6Q7MbFB`)r
ziAUj4RL0z_ss+Qpav=G56n;K99u5~ygOAG7;oImZOo{+j!%>ycFsv2)OXvZY8wX(V
zVWEzC%frROJC@L4hl+F>VgK$+=tYV=4DS8|Z#@nKC;zs<0atsVY|_Xy?l}aOdb|U9
zQNhr2X9CE2tq$iZ?Euit5QW?Wz<%{PXp0%Z8cajwVXMHtzgEb}#1{>j%z@j@Hlp$S
zQlUoM5X`J`fEOJTP}#~{0c3av#LH<wJLwr<Pp>3gv*$isIoAnknk<IR1HDk#@dPjb
z9fTc{{_tIt6nd8<0ncZ;!Kfk+ruO$u_%-+eG}D|8$8DqF{ozh<=W-s@J30?BFU{ab
zg<G&L)C5gGR{^HftU$#c%hBE@ZFpWK10<z9fMX4%!HUaH;Njo^6MugP^ixD|o!mk=
zwCWoZXqg9eKAixs|4G4jH4Th-t`ZY=UmX+{RDd?s4iKvTlDRFm5B&YH07!;Rg1cAl
zW;70kLTnz(O!SS0OXEH>IH3*b?X!lT0vmu`ZV@#9*b3GP5bnawjo?2466PkS30_+E
zGA2`^LD;>SAiPfKr7b(aDUH+cMgMnjZJsC0d2S95wR{3X@Gdl8lnr|nTA5p=CBSHF
zhlROpK0NzS2UdJDgcX*P7!$GwY;^1e8!!1oWfx!I_N*4<RWyT%7q^2BsWJhw*$H%(
zCxAmx8SKq)fRp78!PE_>V96;jaM45^nRH}Br3sT!jGR6E1t~n#^aI?LxQOf?T?G#V
z>zL<mFF;R>J@U7B1QWU}p-#32keYuBE)7R8dz>HBU2+e+PCWs}j#R^$7ev5ogFN~%
z5{CBKFN0N9Ay7j11YG#D5_T;YgTbBh=+>JPuzX8DkTh_Ci+rzu>7~U$ohHHQN^XK#
zQWh<JI30b<+X%9$;M0<rgzEEh(6-KH5KmJ=uhXrWy}r`uR@y?Sd`28?4RZt7Y&yL6
z*9hI#EQU)oKEqowh0rA^3z|G#2TnayL&jb`aMMo(nEmS%v`R8UlFo*pVZ|{3`8~`Z
zKVA4J`!-m=*A)J@X%!e$9t1Z=M}mk(V|Z$J5%Xb&I^&$X8}wBh!?NpVfRF3|^C`BS
zIa+EDzKLgnj*wN%XF*3DvowN<een{==1K#nHCtf+!DF!2HyW%tatLnN*a}Aag!3_D
z3D9y5hqYps;L`<ppzC@a6z;JCkM7<8=7<NIN8JUw$3K7u2O}t>aT&}{ZUi?b)<fyc
zSoo_W2$=QC!f37E7W<;&;Qlq`AVT;qq}PTs35lPXrU5mWh^62S%~%liWj^RXvJIFl
z>16`G%0buOWSDS%2~etC0eZn8INzp$bi=2k@R)00!Bm1O&Mrkea`VvSaT;iR<|gDJ
z?E&|YJ`@?B1RoX^f%0+(h*ngfiG?1h-S0SD-xdaHuKb2s_J84r>Di3buNq+<@(j4W
zUxZ${X~0nZ4ajrh3~c2$06oOzk=Be1_|IP(S#8@61LyriLBfn~J{G|(Hh-aEr4>AP
z|3BE7rHkOf$8a>Af>vs8LC0NBfm6Hl;QNVBkkixA*yP=4#9V1Z^EQnG9Zhj)RM|yX
zmu-k5173oKUYbx^*%?iHq=;<Z_rkvl*Wly}GDy31I~afU1#&ob7X97507?xlhFkBH
zp<+KtRGLr+f28*TFN5u<Dd-TYj}AaqdY56H!#re4E<w}&R&)$pfYV=QgCXH}n|l8a
zk~z8uow#)i1`iwn$B&C6TS;AX!7iBTnfnMB&u;|bdlS%*bq}a}po-Sg5#-hK9=_fF
z7&?o^!-WHL&|mjNc;|5@YI)oT&0d(pykG5bpE(On9~!_@LQR9KwNdAkU^JXkg3{Y5
z@D5T%m0j1-Y*!8JFLfI>tHq;xjdDQF?JUZln+T0m^^lK_H?S$%A%r_EM2A^vG(Dh7
zfP>1Te|MtM`HB01bZ;pt3v)!`^8#Rtc{+^N?m<C1o?xo%N3f+kodMYqP%U*7%$E|N
zr=yz~`}mvSsh1rP2&Q16(O-sUNHGhPtih(4tzdai1+W!h?@1fI!Guav;BqJfJnL>`
z)~0TNxf>QTV|&YC<GN<%da5LJydZcw#3>*zeW0|29!#@Qg-TIDV4vzwu&H)5s47Td
z#yXn<wN1~M4UIOyTK^=-U$|bF)l7h;8M(mf`57Rxy#ihoWWrPZ5pY<knc44l2?TT~
zFqxWTVY8|TgvGRh{W*5bm_KdIOzUePyG#Q{y2XGyS-#-Pl5(&!<2@s;TmiDb*Mo<r
z+W_}@0eB~PlRR#_fLFo{BKuqu!}~mjD(S({)ovz|R4oUS|Jwu`D=mSRrw#NJ`aT1f
ze5R5}fHS`00+D4Fyx0zb0(TE&>lXlRqYv<abOBR6Z84nhE7Wl>-owiZ-OS$(Ke+GE
zYv{gH3`*9#fhQ)3LyH;V@Q#)^OkOUA)Ml_?SlR$QIbRG`J3M7F(t?2d?_E$!#|ACV
zH-e`#LZSJQNDy^Q6&fc8!<5$-1dqr}@OEGf9Ii&p>~-6q&7#?`!~Yq`TRFfOZI1)3
z;~>23cMGEOOF;cb9CVhMirCo+Ai@71<WDBRX;&|S^fGrq^P*r8N`qr+cf(riC1B~X
z!%)dd6=g162jhkx1LLhO(Br8(TyOIc=CAjMHdW)nr0Q#c^)^HYu1X`L@DQ+Rj1?GV
z90RvMGC~?J?U{n|Ti|=Wq~J|kiJU?)&@l{!pJG>|v?c(p6m?*^eJ`+Uj)7Z)g<0HR
zMI>=q@FH8?26Zpmz}KB=Kz(uv=#u^dZt6@$^K)XMUwH}a8Xp0;>}k;Iz8M<lR)l)T
zUH~2c>ER^rt-#)T3hFan0KMH3VgJW(Ohy?COy3kjk118~)u$OSa^E0GO7eh<J^wOS
zdrqU$fa5UrND$cd^gK|!^#xALcn*$ej7IuWeqi_88sz#$1KrP6#6=eC;KTp?QRmlM
zkZEp%{uFElPgY=%zHk&gxp+DJ^{Nf__o_g#dAndrO%Leby9lYzv;lV-#L${ir6BLv
z0n}#n2#9;7ptE7OQQ6~o_-f5(wC>_}=*XF(5Fv(PO~M;AtMUh!+Mx<%+%Ll@oeof(
zn+nw*LKJO!1pOD13j<c!Av;DNR^P~mxBl89^l&}$%gcc$)Xu^?Z5hlvXJ2%Kw?xm{
zZ^6#@;&8d7tpN1ZMN!u-A^*(}VCWtt)Wb(2pTZkp&$Kh>*V|m=eCQ*L5{-p6o6XQG
zoggTtd=1)~d;!{Bok-<V5^5V1Rx_pN!@g&&@YSQ!&`P);uIAgr1+SNY?wLaEw0R;r
z(5`|^Y~!JC+5?zw{tap!or6?vs=-~`8_^=AJD_(>9lBa6%wJx9hmmjAp(!gR5wF6b
z0asHbGF^=-KJ>w$1yhiQy*cEo1)%(e1Xy-tIy~(Z0nKWE!WTBGIPP2&vKqGnseI5y
zyBwCG7dwj4*=x@c(7Fv8&q$!oSpbNyJcgdxTOhPa9gQ7|1TL}`&~M))^us#?O_5p-
zKP!o$6+?DNOLqj$EFOf%Z^)uCiPGrlQe7yewi@kxF&bvwl0=OjI`Dz1EV|<Q4hD{1
ziSA!^f@!g%kc?3vRMWWvP3bu>DSA0LdVUI2xY7<&q<6vQkS&1AoCUQXRs*lEuVI$U
zCeVNOHykHuo14a}q0!2Uu>C_g@?DXSj)qS~2iAuPp8O}U`eiRnej<$?9Zf{_UtWVE
z!9TYE=RpnP4YL>DhMq;j+_!o>D*vK^%&zT6(|%;b)NUCxPel#Cx^x_E{pt(@^%aqh
z?^JNF+XUS=`U3y3W08&27jV}-6U>>L4=aA|MG+Y)uqH_ZMsL{;Yog@gCo^@lZsRq?
zypV+7@|r<vRywpb@B)VAxxm|5m^-}ChZd`D16<b*?DBVmO)pBplO5#%R}z?UMHeI`
z7BQj#Ww55@6g+Er86@nU4HB>9GyQ_@_kH$c@ZYrx3;q4t;Lzp6Op?+KFgElfQ|;>n
zdD#f)XFD5O+|Yn=_F3T1pC_O>x*8b%lz_MN=7Jl)%>h5=0MM>I2WF2B7Ch|Aa7l6)
zU}Q4k#pAWm)7l+;9kPasB}dR~*=qReaE##J{|Lr?)MxgY7QyFb+d$KleehA{RwlBy
zjFCPs)H0h!!<>3y7OUzCn!6tZN%ynNFLNRJd%<#e>BA~v_rM?M$ZNv0rMa-gMGG}Z
zn8WOK;`nu1A(H#*i=<9%gdV%a5UX4b-%rA*?EWq|PAUPIEpvmnoTj6r({#|{pBb?K
zl{AXGE{+Z+FGso;=EAse3A||D8?fq-D(3oEz*{boNO5f<GP>!ECNB7b*pCrN{z5!j
z?Zu+^U!2jkh3@drhe>eRy)=04&_?Jzs{xj*n}Is=X5f{9d8jNr3Z3>`gobZLp+Zvy
z+|jOp6JuY2o;MTGC69P;{AUVsIvaq_mf4||ikE@XY+>$s+!|f+x`UML<6*j*EIKuY
zLG`;vqxtfq(4SUAWbu0zI^Xgi%n#g%2IWVC=Ckv_Wd18-KNmn3lU(r3KuqvNykhQZ
zRDz{?IpFcO0pRvom;?GqF#lb@2(rxU!3M>nAf>318Q*OLc**JD=$b-s(8rV6Qs4#s
zq!%)GU(Sb`u#?f~n<{vpA+%R)1osmwpp@4HXt=H%gqBr+wSm9DHxp@4mDa$FHeLiO
zFF|mp<PKvR-3HQkUI3b#ngl<xE6fo7ry~|$!L+qe(0S5jI9|D)DRjIJy!KzR*uA(2
zt~U7v(4ItieXlq(Oq_w?cERKOZv|Yf1>uRtSs?S|Dsb263lkDw4Yuq~1znp@0wD$o
zEE02rtzX_U>s=Co-pui!S4kN{$D2UDKpn178x3YEoCMG2MS<8w^*~Lzh`G?QoQYF7
z2l7|M0k|~?ytTi_JX>Y~(&A*n&7yuVIrcFVx{!g<zg-x+XVak8#V%&zoCUDI-vUN%
z9|mC~g>c5^sqkX%QxMtM3wjT{2h+R1F$VK^#vM-rPU+TwU91BBjYz|WjlaR}N=@kW
zZwKsnAr3LG3Hh3CsJ|cwqDB$p<5>*mcFQmeFX%(D_zyrl(;en#g)$!lp9*5!dGJ+n
zH9YE(4~CQ7!Qn+2!upUZ=$myH<XMe@^2cpK;LN2$k0=I5``>3?sXGIQ)0koB6@dO<
zp<tZgrS|&ykx|Nx1X))<F<1YSf#pXxFw0(?vw(L?nLDnf;KcX!K<e>v#vxag*{F^|
zO}RaYIiqRuZPqEq(#3+Y$&Loc{)95)m(7I=OI0B#nE}F$b_$-WaG+qj1FZQy7Q76d
z2;0MdGM5yq8G~{;!0ygtp7?%e4%;3EBRC0MVLHJm?K&{ecMFs3?Fe@DtcRj|(_o$U
zO{R8kIyiPO*5Z&-9f)nv0i$BGfM<>jQ=?wbl)9Y&vxRxmr1L3Y=GEmu(k}q4iS=M^
z#^eL71POTjbSzVJ#03nWNC$HcXo5IFGn~Jwo(Uh6fGZUqfjbB0!A$+};Fa}BaOGkM
z*nGGfcwYX>to`7?SYDq74zu+jIs7O%IIe*?eSIo$bT$FzmqlPj-U{Yy83UIHe0N2^
zz3}M$Pt1#zf}eHP3K(fx3vL9*!in9dK<DxvU^=Q1XiEh{nNyQNAKxgzt=EF}(FjB-
zF9$x^_W)|pfH(1Luz%lt=sryhPMRa=1{(!mf?fnv?fAtw?QH|$>lQQIFo4{s_h6}u
zJRHbc!4#E<f#)_4f$4xT{9&{mEYx=g#5ohpo2m)Yj%Wz{sF@H}tAkU<6Tq76w_xf8
zOEh=V05neR1P7cg(Vl1LP{CdYbl^)H3|?V@Hm7;RYusI=?ktDm=19Vtrz$Wm_7xfk
zh(i$%!r>;r5D+s;29@64gaYq`Fe^OYfrE?Rg1XAD=wsy|Sg=zby{(;qJ#LLc=VtfA
zTbEPdf2Xa`F_jkh!@dStl6NrX%^|1}Q3oeFOoaaJli-cj%IMAE-RNtyAzB)nh>W{Z
zK%@K>xP-n%ekBA4sSH5JTP<ksDjyKNa4$M*v;~IX38?x|BG~Rb2g=WOL`x<}qg7M-
z;rq3cs9}#UvKe&<1a*qzfm3=&?6DhE%8G=ZR~7L!w@&8AiG|?hYsgG}AOX|7f`FvV
z3t(AT!PGisfVVXdn9qA(f+=yA7|)OQEXrgj1I!ly>D!0E$DAC-DBw7g^uP#sHEslF
zoH8sT>YTx~--z)(EeFjT_b|bmhQP9G<6%;RE>H=OgfBK-1jkaMz_TJVFh9Q=OlU7=
z)VGB)?dhX|?%F~|anvZV{G1!uFsBM^yK@xyGv|TqEG;l5o&^RXb7&~&0kNlEGYWeO
zLDw<fqF?qCn9Kx%AHto}s3OU%AL(JT%!OSP#WIi{p$gxb=z>!!5nzJ5EdxdbUAvru
z-Gh@s%*GWU>Df(i+f~pCoC9Fi{>i{DbQ1a*lmq*la$w7^Z16xo3Z^$M1^o#%(4zhp
z*x)q^EHuajpC;`P`f4BOJL@&rbEW|l3@?JoO6IWQ*IlOjdmao-9Sc?uu0;t4Fxc`b
z8h+jW8}3-30vT6v^iAFaIu31vtLKPAqvvu+G$t0znG+18J=(y*bv6J@&I7+6%EG?q
z(&)C+0azgH%uML{2&S$X4<`k+!SB90XyKf@pvG+oD(PHgByGRJ7jXt~;a+oCRriJo
zVG@8Sr~uZMXTdp1iST``A?&mbfbWZ)!5_`5uti2!*u!jq1;?sEdZRchZrTDiejWf?
zD4J1|G=Q&jW#ElCKX}rji#c-m26&dK3Y?4kfM1X@c(1O@1pjPi;`1JW3*SVb`AHS?
zuG$z#bR7X^<pUu8i7GSH&w-K=UnWp8655$mFiVFuq4ghM7`j<FFa9e7boDWC_~r|+
zGNKfG4M>9j<%@$7WuEbB5`0kC&oEKiqrm<m5%_Q?5^j0(34~ex0^Cd)P$Cut64FAU
zA-fXJ`gWB`9is@D@it6c|2r_RI02Y+)d0|E&ZOUMX3lGW2U(vU0(0kXpu6D!z=0#6
zbnac|s2>Y?-}m63lM*PrI}0Q<-Uf%SCqW<CQh0gHZm@dVaum3GJ(_&H6KzWMgag$q
z+R#51rjMDA;tp%Vr1S+~ptKpJ{C*364UI>RTNU8Rol<c8vnbdTxfwli6$iC0^Wo2`
zFW~j<y~rZ35HRH?Xu#kenkUpEp5f!rzsbj8kEtd~ot6r_CUqhg2Yt99RSxRy4~7jb
zPk{a}WjK!Q5R557Xw}ynQ1*`vx;OJ4kUF0Q6DQ6_`tD~@*d7abdhcFjvuluvmI+2V
z@#4sG`YEV*-5>NxHG`U7c@$)1g-V2X{HXVJI5cT4x~iD~LL6?Rp@&CM>bx0n{M=Vy
zecc)~TkjcY8J!HLW^v$2hc+7N$$?M5^+Dy}K~QSp35U0M!T1mP@Qu$sP*EcX)c1P8
zg>MRA)ZC>|b7K>*ojV^MTGay1+_?d3Q^n!!4Z+A|k|rn(pO27ME6})F4@--5(ft{X
z@cE1?bbY5b8n^cc3`!Mh+ZR<(t>Xhw9Eku5$`j$GoeEHY5W#sxmtn;ORdi&y88(F*
zpuXfmc*AfMvdcYzriVtL+JkXm;oN9A&u=2AF*8JqyTy_EfFdd=i2_A}uFM65!r$4>
zC{iE76fYCFc-J-<{be3J@Ovw)jg>`v4g|vRw_~C2%>g)a_8(YT`we)@&IE&gX)x=x
z8Eg=Cz~9f(hbBdHU~!BrT>tV8OwP*zU0{Go_p1fBt8c<f*InR;-s>Qt{W_yQ_Z_e{
zegXP@t(l8uTfmfra^}YZC3wYJ1JtiB5I+CGF|oEV^ZshMEk+97+o%a|y2Jsc(#J4I
z@G3rP=Yh1j7z|512MQjWf#$<{@JgKsuuZ4oTe=ip{S*!SB@AKkE^V}X^)#e&UlF?R
zegLnHS-?ykr-J@l{2WMohrw@V&CKUNMWCW@J7g`LVX^E4xaN2rIR1PrblFkJ%-Np;
z<M<0u<z+3Hzx5hOD$0RT>Z5_(g%{x8jZ;jw*(PCk=q$(`34(Uk3gE&~;Vz4Q$C%ic
zK(X~7nb8skOi05#aP#L=@Xeos4^ujrXp1bSCqE6m6XpaXQ&TKf6q|!?(|G24iZEYP
zy2DgSWWd9v5IVRyfyXyA;pq}P80kC-`YBN$naRRgSABqL>vhH)X@YHHN15KYmzl};
zyFn;y1O<*-@cgq0u;%m~#wt4-oLFuM$GL~YZ)sye+txzxrMT8Y+O-8dxGL=25DYIx
z6*BkUC4rOcQ{k>)4cIuY80NyMP-Ha(T1XrNmkoeWz6PxG{tra$91q*#rVD#9nu~1z
zujqgY|L^F4^;_2MF`Kht-S*_oszxbdqs0E_&*}otcCRa;7r$L4z7K<`Jlsg`PuNi1
zEPa|c9X-PCFV>|8wOZ(F<9NQH-Gtq{`67Kdu9h8;@nto`+y%q(W;V5|pFN(O$6}LS
zffaYAYHWTeNn2yU{@K?>UgarN7anNgV~qc?)>)hI*_v_uxb^R;6IV!6gOsbkSy}O)
zf9BGF9V_S*rx^M-ZZiK_<rIIbVv^;%%IB;wX5vF#--=pIeAwde{_JK~HF7e16y1=O
z$aYvwW@~e&RDXBh&$o}B!RDDwBf24}Y|91>zU|;GB08AQOI=dnKTlPz-m*lFg_aW4
zN#Z&DgUG+Uc7Q8S%{ux2d2A8Mo3-{PFG-G{`2YFP{-3mk|MS=!$5fz#*bx*3|3JJc
z4O`0|!JAYb;j3rEP{2QDoHjiaseX*dhyGiRZZ$;1-u>o?`EM3}H9QMnjC4S2Pc1<z
z(`pa{{BXubCm0wfg?9w^!uS_e0&GMUJsuZ^yPkC6>|}9lAh`{%+a!j|N38Mi`EK+k
zu>-4@FGoR*4B~4mQNeeFF83Woo2_PG^`R>4(6|_{OE``1D8`^If!dgHXvDSFgIG6k
zC;Bb36YE-=!J*Ar_|4%-ICP^f=0zODGh9%A+k70`xeKQaK15Yjiny$26E<Bf0G?$k
z@%t|)@S01fk?+YP!c-vy|NlNV3$txLyZwB8|F2)${|Tvh?f0HEuP?%f!(Wl*Fp=K-
zv7FR>e<5<V?jp%Dm$>JPI9kU4N6%CqA@wrjsOZWm6lWmGdoQ5G)rBFUlg0Qymdizl
ze*=8k<q`GfC0OI$G-}^DpT6*Z!L1Z1+JApc;9dQ4I2o@9YW4*ZC!boHU0}|g+TOx>
z`B&lQe*tuOMIN0w>nr*0AddMoFaFs8LUTTOP)8>_wzo-}w=g}A*%N1|t6MksXtO%p
zd2WD~y|JbBUrx}n234xH$c&Q2V_boQ0WlX@@aD5x(QS_y9Jyo~QImUv^!k_3G_(7n
zo|pRcdWS5n;1A(yXL0geYCKQeI_cj0ELs@$4f{Jf(moR{JpHIIeK2E)dJN}^zHjtl
z3u3O*)vGGWm?}NC!EQgTb}>ZzWPc#32Ua{IZOxm_Izy-FRgz1;j?<04W}@UVg>=VC
zdlLO2QuOLsk7)S>VQw6?gw#G27;f8K*~}UBWP0~nu5WKV)#(w5%=ew(I`?PMW}jTt
zA2*Gka=?%%`^m9;M7!u$gLEP$%Hnzslu`YPX8h~IAyL|pAGfJzDd}=tNE#$#d7FAl
z3pg2CCgh8S8HD1laqr3IJE5e=Nu3{m`~}UL^Nc2U3S14j)41t)m&p2X6n8uPD%s<*
z6E_T&@GI=^6T1z7=Kg7<T$U7(zU4$b<0^3Y?<m^+K7-vS<k;Ei^>eE1Tr#jW6{+Qn
zB343rX~*go?&^RuFL{4H{ZB7}ICjM#|Lk~fJFTWT^(?(~!j^Pvtfsk}_kdgfs_2P_
zH5}QY!TQ;0un(3#=7xJCFuh<xUtc*(k0vv?+(ean7XX}ca;-?~$4~C1Su6>zoXOU=
zMk2kI-Skq}B+m5M7i4YGOIr_QaPgcibyK}cUDl4`SN1?sGq@SApHm>Z&?ZI<*Spev
zrI>#09i-tO-_fC9wxU(zB-L5mE>dY<P3|uLO#FV#AyS6+)v|K-Z07^TsyX%HbXa>a
zTbLooAJ$B!J}wD-&9xbp@_I?!l#6rNfw_jPbh<RLIqAXvnj1$F&gIdblQPwN6&(0`
zav^MHh&eua=N;BPn82I-E~F=SCeTm%Un&2vnJTJ7Uh}COd3Su6ZC+c*p4-z#Z3i#l
z!jdX_PsW+o+#OFo_$bm(A2H2dv7Nlx`Gm&$#jwT4@3N<+6w_-rC-K$QZe)kKHL2-Z
z!LL4YntS+eh@0@5<JI2{^DUpUi2t11^!)w1_{9mt<|lV^-bdZ}0HsbotUN(jX|p3b
z7cSGZS(|9A?JqL(w<~+c$duC&XZZJ-t=Q3X5ecA=xtVkKkR;DHoL<>@K494--e$)(
zR_~uW&6VvS%UZSB7gHnYBSQmXr(#ST)TZ-hOE>dQR^9YNbOibKy^FKiv=KWCx5jiA
zJG$<xBLB_qF*jxC2j0BMi!M8-gQc%*V&R+(#L3T_=54j0bPS{#-hb$)#WL7)&LiB{
zEX||7Y`nv%hfFr?B=vO#_?o33CpIUF4xCaVdTvkgtm`*ut(PLvbdEwu%$NV0vV)g;
zm5ysx<>P7DEu!i7#MszZ0-InfM{}2Od|6l+mG&mM<XVj=r+XYVu2SO|LrHe|IBlv_
zqDr>x=DCoOGdTamBYb&e1Uq*RaxGuR^FwMksOJSqcJSpiYJY7!yAdj|Yd1vDh<p`v
z&bS0$_$$q4tLXA!8XfeRUNEuWJXtig{Q^!HeUvsnnZcDiit{#)=F{Js3rXdMrTDwR
zi%}2vVaYNZ&eVT|_BlpU*F<lmU<J8~d<o*_mWU14P2{i5!i1hMWUsQJWI?JDm$@he
z?@o`Q`Wu^I<XR_z57NXvyctHeoyDYTY%u@cc9=>V_;BZuJZjupkFPW<v%_B}QE!zF
zdTE+DFZR}ghBoPwZP~*jZ~IbiL^p;TP`i#dsMgb?`di2|FK?=~Foje1I8Ar8s8Ef>
zc_hzyEHAxtJXO;(X79B8p(WeH$oi`viTaz#wD{N!8uM}%(u3;!g(Eur=`k5psY#Ng
zP5R9xJzN6Uj<cuk?ao}7=W+Udza7o>mS^|<UWu(vE0b%3uB_FssZj5U1wTcxoL0s^
zL0pj+4$zmT^@@_jvvwT+d!-dwIJlHGULI04)cy;#<Zcj{>yN2(usvrg=7BDoN70bP
zeDWkM3Ag7*@j2Vh(+w9dqTuA4r0$U}QAzA1_~?CVI76I&-2IilIF?V<ZvqmL63v~R
zH<_FH>NfXX%Z^;Xzmxqb5@RLnqS$G?0{?qwGL}Dij7rzdWl!3)6IftP`(Gq-rtDN!
zrz@WC)Xk%+#oI`D<7zg;wv4>cx{beX5JzL<Hj=*L0D8^UmThYJNM{-Tr3d4rIFC|a
zyhnggK6WkQewA1ACLK!r<>#39OH|>!t;Vv^339A=-4s6Tnm_;3CWh4Y9p{Fo1kot3
zDfH29UA|n*f=vr`B-dtNB1&_HaiG{MdU1Li$;)2PFR1@a2liy}?d)21nyL(c^?W!<
zjGc*90_O8`1^(1-o%8&vQ8QQxj$vb(V`;%)K0V{#$1PmsPLIA{Ox$kjat>izIMawy
zqP6j@XvvoISj8uiUcB%I1(fcp+!4OKYK!eHa(t;6T~X1_we5H1>MzVEpCu#d>_egS
z_2C2Dz__RQ;F?k7zkWh$H<@#rb1bP^zb3k!aU73+`;^|kkc0<RR>H;M75MPl>D+DG
zT&}Ho2&+rf!s*M^NpQR}Eozge)5mHPjigE({&5Yrx_1^y+HeOoWZxssv57P@HiBy%
z9gCaPZsGf)y?Ekw1w7~3g-bccGf9(`4L&m9i}#<eBby&va%@ryJlA=dj!V*`i*pj_
z@7)Knwyq<$voeF&?9!v_pCoaUEVA*-?;-Txhg?y*$_kc{k7OkbWmCF%s&+e(PWBK-
zy%lrWLd*G-dApvvAFL+%*Sg8qQLQvvsR}O{E~a}6tqHYhBN96$_|AZp^wX4oNV(IK
z)6r98RpY<Y+}f9v8EeAJ8r|ibHyiNOU6b6n>&-VUZs9H;$;LP150QM8AnvwOEx9{#
zhq~&x@Rv%avijkJT--rR!B4Nm{b#377s*JlAAMGm#mD;z?-0Qm8)ecb#|+4#ZOeJ>
zN=fuz@kBmTT7uNA`cAVOmLb!1vSjzHc>dNV75>D<2>!&hx1>*@3eo)!McKwj=*FiP
zXoQ0&Z77nc%FV08Q$}atz8paoc-mBD_2V)<F{hY&ViQG=IG!c7_vaCdCr^=Pye3~c
z=M-0{rN(#TJgQZZz#Y;)DKIg<bMCIAu*3X9vO28`4<A?IiS;g`9~(f&P0zr)-Uid2
z?{mrc+dZOIjVQ9!_@!w6**q*Z-GHC%nM?yVOOuC^W;h(Y#_pFtV9k(yWc{fEn*BV4
z)}|k#v0FVk{}?l>8Q4s{AM7JzkeA5#NFe>vo=f|G%_Lp*jTA)7;i<`w$%*kDq+Ki=
z?Hu!m6l$KJYr}5~9NlSTcy}o|s-w#KZaIZdUz$(-ZGuHlN_Fwh*0GoN1dXE?U9D))
za6b7lX04DH{+vYYI*qg!=5g_5BWU^6Xw(v2Le2%>pcPsVQShKS=Pho4^}c@=l};qw
z`}tm6%~Y1osjnxJUlquncZ+DCx(j{4O%vTRo==Lls1w&G*YTT8EHNDS55u$m#Q(q)
z>UJQCuGDp)>a*wKHD14~41Y`0(0Faa80FLBqg`<$zn7-=`QSJHbIAAYkyPpMR*G-f
z<1PI&L_hM?@cdhIarn&&yd^RdCl$JihCdD9%Nf~3xwVuV*p!R|cTS-PKTf7mYl29r
z#Bn746Oy#sqlsj}m#Y4QONjp6)0k~lpjNggxCbkGxZc<w<f<M+XE-48R_r_1qY=UV
z8T$pDKbnd6%aoCu7dmL!A&Mpk{y>xdcQ$(SCr(X$9_?CO%(X5Q;ayxQDR{Pvv|4m=
zF`}ECX<h|RpX)*PcRJHQ!-Lpslp(ijwH!5bypJ2r04|l>MNd(28uq-UYQ0YY=cyja
z{dXr&V2|y`rvqM9ezl``t9UB*S+f(5r0CHdy8pOko<C5qjWYZm=7LSe6mw0g6fYQ&
zAiMIU$m@5>$kFc)S>jwGx)`tumwY-*KcxrJ!FvuQTKt5_C@q7mZ(dK@hf=vye{P86
zkE`N)r)=oswa!Am{#tfh`D<dSo6H83XOMedJ84bCSn^@<UJ}^2i1ifvM(+RZp=9S;
zcKV4*YGEx+JB92+^z;j!(z=~Yx0=Yl*=5A;%e+tYus(hL8Im=+JE*F<kmndXiN6$d
zf?Qp<hE9r@M=Tfopo=!C^K#O;lq-nCsrTQIFY&#k_23%5{=_)CJ+7FHo1;wkfBt|c
zSvTR@y?g22&`ZMixtE9%Kaq7y>&T+>S=9Qt92<Mbly}^t$VLS^@T!`j?C)vf?Af$Y
z^yRWOe8ayv{L!%s_+7c_{KsWJEGsZacjuY1Ru5X~?pG;v*S1<5w0<hpJrhLtIwiAK
zNRHjrpT^cs59Bq)esfU?gqj?k$F8k5B{QSi=&y))_)q$spdgg;W>SY~M&D>uzfaiO
zeAh|H&oEjV{g+#9T#gq{)#0Z^9AuwmYw%%4mVDhbHGYwr8YeFFYHvpj$VTrB-bJWC
zMNdcbh2aLc!*D*^JL3~EG0mgjw8D7);S_e%`TMlavyIl$cKWZqkIJ@7u-d0bahAG&
ziQP^;67*7>AEP*#EReM1Po7`EOHcRZ*X#*n6)u_ZbG+1u{<`1zQCJkOeaMo(?w-O=
zmEXlSM?WDaCvK%3s>-}qpF35x451Q|YUH2sD*CZZn=KE>qRM{`uwk)<R8hzbbv*M3
z%{Tc@y!$7z@uwH!DRSu~dr06Tp48<v1uksWlOXIhL4%x&btKJUMdXj79=SP1jBrh-
zE9<^oB_lC~)Hx{)?;RBK?O_SoA@0m=U|dP=Dmng6wmffnypkJsYb91PBSdEYMI!y9
zi>h^r^ZF|K<j%@wdga&zwr1lfnzN*nUVI?IwGSw;fwCp!t7jT{*Zzm1KN)1qydO01
z*LIF;vE{2~>||?c2v_*<43@BMr9KmM`82x)e7dbUf90wn>rKVj(~!kW;x1D9U_VWq
zu3uHwG>aHHpA&5f+(>1<_VB)oCzG!-lDwyKu#kzbNzQHlgTMUvOcz?L;X`NtWB(ec
z@sU%N*~pT|#Qh5(Z3_&E#<q$0rBHW{jlCz-GrGLkvo7M_o{KMcCt=-(bNPO~gY;Ye
zXYP5;R~oRZt?HQMEBa;LIr8G=MRxJexvX`V2md(4jK<~M!=W}8>G=ERL;+sHZPK#*
zV37&_*B_2EgnKw9U<I4^HjG<p<iIlpBY5kfx7_uIviy=OkhZ^^L0+E{@dpi3cy;44
za%OfoDNhO`GH0z>mzH>L)=7aM;h(}MsYdawE+J%vd^tTj*FeZszeD8mt=St|32b;_
z620k{$NFrK;{67{a38%75`A?y*4@8`JerPp+uS~K*XAneYMI6lmb6hzzYz{Ue@rIo
z-5>_57voja-cZq$&myl|SEy<5eztkuE?SrLgvuA$@h6`J;{sKN_qe;CJNaQ9FS-9d
zKJnR}Cdk|+1D<QJe&z_-TlP_;RR{UU_7SX%`7ka1qJ@X{1keqW-1+EB$Jp3?C8Td#
z1u3;Nq0bXF`6X&Aut~-$K4^;?uQtnofA17V_Iw=AuNOZ;8ck1f#rO62xytY8vZ;54
zdq$T1W-D+vFQ-wJ=cUvEOyIK?ZK7^lkCC)7TZz_i0jINRgcm<2oZpYMXh_=ywAwG3
z{8jqReUP)|4)`wM_a1t~Hb?HG&*@3hX<|rvVjal!ZGGfl(`$TBIJcg5m-6N>-&3#a
zx5@SsH?UgzQ#_+VlW*0`!KO*SIR7i5bn^FPdLylrY6f9aS{lKw@LkUCyLOMdMERge
zajUpd&Od2;?4v5V*O5X$`y7vLktAIknrYe;$lm?4hfU!_=+TOwR7K!^uaH*5*W2z;
z@8yNmX1tR~eCQw_vAcj@FeR6qpZ1XZI{qB7T0e?4Zk|F0cAE2@ANKPe@CYw+d^>qE
zWJRyb$k5Ga<=896GOX#Mcq)GOA~A4Er-uVas@$HMa-rQ?)Da16jCVcAiCIB5xw}#M
zA%Vl(Wy|RgM{;S07vfRHvb5oq42{YdMWnMAkbew^EAcU;urZ8Vrrn0?ZtNl!<A34#
zah}*d>pp&q%y9RcX(Y{ZG~OqK5my!{QMAjRGg_)fl?STuts}vtcegIRZ9IybaQFz3
zJT#1Ky>&_K8c5^JWaz26$yF0quRxz?@S-L8MO;~n5fNK{82!APh#MtRxHAS-L}JL5
zTXd@lKTgyq(G7dKsd0|v{;UB`D|ZVy7*z-UTCB#-Rz|qVW-h*XBZcd`eub-*m*JJw
zs`xjH7%H8zf~X&U!8Lg4lh46V=-kyWMOkXL)M<M%J3%6wzDh-`>XpMJY)U0wF>5;c
zp?!#3((6Gt3{K<a*OpL&k$=QH@Hlm#mAEEn8C`ey1X1j&#?z-R<>ES5R?f}YN3QDl
zVW-3etet@-dv0b0HHbgQjlI;z$q4uR-8*$u=7WbQ(5H)guy`TyKPiLz3+`gQ&lB1A
z-|uibb!{{|WEqiN9W2`IU&KB4jv+VhJCnI*R`46MuhSW=z1Vd*i#K*q@;%}%c~ts?
zIQ`P(qlAoWCp?;;uU$r(lh29VZ4Gh4bB27GTR|FLXOKQgd49QH5$W14OFEW$lQJfg
z#C$r5pFEVLZUG5YJZCMLDN#Tiw2MgHoEhXm(?el{>>e4ma3z-I&78zGUAi*$4`-m!
zj5R)W;}*9)$bO0sohTrvgWn#Zj+QrQ#Pbid?B_bF`p1|k_>~j8n(gG4@pyXD>p9+*
zKEkC1w9`joMI=%*mEg2<bdOdh29LH;Z!3NF&UQKOnyWvJjFqRM0n5m2vsCQnpF&F{
zyYat@+alZNXQ`w{J5`OV$IBY_(@ecg>T*!HOKRoGFXIDT=leYB|1*cvTDu>wJNTVj
ze^1Dgm-9ibvtr1LMdPW|yFL8FH)@=Js1m!|@h17%;ZBam9HS>XF5_()$~64@SYjM4
zNklq#iGmJCo=Eyp$wX69FkFFd?LI~$!-7Ns1v#7-6r+i6cXLH<3Y<gXYP|06Ub6f4
zWZWB(gKbw{qy0Cgl4T|@@G1*;a{Aw4GJZl0cX{o5{GrE;i@!A*SK35y3i(1^wPuLY
z!f3qT`ZQho<^j5I8%jg2^wW(;RjE&o1W~&e%!NuEBPZ&gV-&W9Of*xc^L7b3uzV~{
zHwvTb2{v^0lBGm$*-tXoWsnOpmZFb7PZh0+V9DhDW5}@XXE;iu0G}L;B+74Bv)9^v
zSp9=xtg?3#-L!NmJ!CzBZp=dL&dKWJI~YfF?S=dygE8!{uYgy2b%Wb%>`hsjqr^C}
zj`*}HV9AMViHFxMqSCjH9zUhXo{qmq6E!1fb*UP^U?C(KtJ`Ucmpf_B@Zw+9U8Q?Z
zH`7rE>wwnqH=^6nK=yhl@#*f$JY@cohIwVk;!F`OaI~b;W@*q&nN%`nc|p~k*Gg<g
zY8YABT1-pFUnS)M7W}M`2ri}AozGa}z<Mm!Bn<}>Mb{@>;3T#u@NcIHx?D#pf4F-t
z8(#T<$iCT0`V}8gEstm_)g(&;eFnLl%%fb|u(QAx!>r!AK5EmHK?>~^$%85T(2wF-
zG=4({mA^iROskvCE8FIwj*EoQ4dbfLY?Y_4KFp``R^1}y!M)tSM?C`51dxeSZ*fDt
zdbEG{B$B*UkGR`iz)e=hbg*F_`8R#8(8paNMSH`@nj>qer=S<j>l{ZyzR1uvZ#yzJ
zd@GspHx3WYW610C(da*y7s#gm0RAd|mKxdqp^d8vJtNc{!IQ?5HwwzsuUMX)Yb8sr
zYuzK;rWRBkx+K)@Um5y0v={%nIh*D;oafHAXtFVZC8EH*>A1J<8QHPR2cH~TNaoNp
zBJCU7sll%;Tvl&4_IWjmecHH)ytsEq)VgIEVGX8GGksa|%JMP3WM#~ali_LO@>L{m
zeG}D^%ICHm)1bfAZjm2rJjv~rbgCW{LY`Tsk*j~zXjWE2rIFSVdiakuMJbdxo=hNx
z!a|$nT3!4lM1hk(+Db)+PiUvM22zq$VXNg_$g%^qbe-KDdfqadZV!wgCQhUHDVyDp
z_1}$j?}8n)^>+{G5i;B7+&w@HLWKn!eMhpzC9GnTn;LESd6R0cP-VBR8G|P$Yv3-o
z2gsPqr)BL@)S*O^97rf2uk__viDNlP%)Xv8Fg#A0XS#7;tfKL;D`N$os)@i>(dKVY
z9^j62NU)Qelu6&>LZThtM(?h^&RvWy!C{tR<kz4&d8sAFUi5lOo^RBoo4bpM*s95*
zy+y&I)=YEi{aNVyYU0UqolDfcMTv|RSPH|h^hsi|7rm4ej&1MlrdeV?@S!6+aP&Jd
zmT~CD58d?f!Bukf`{sGHv(pB*uj}L9yT#(4afRHf5|&*3;7`!+aYSXjHz)Qchni+4
za+4L$lj&g=g#W%s)SDxmqbYMpN5UxH;E*OwThLo+>*PYMeLK;X-}lIZy07@d!F0Og
zz-L-(U4(Oor;uge!|)2dbLi&4Dq5-*Od5SVsmB;u?EmZ%-EOZzxoy*^seLF7Iv_<h
zsmIWOpK~d?FV7n7J4>IhHbZ~RLdd+fCbEcnbD3kLsOPFNbbO06?;oB;r(5O`n@Kg?
ztC*ws^p*rlUd<p02PLSiiWj<Qx{vtOhKNR-#PH@VFNvgBIVzlWoQ{7OM^^>Ok<`je
zyaFUseLW2_DW-;tc)A;9*FPs3YGY{VDS0yOkQZEJmqxGDE#jsqIC6udhehjtO-8E*
z2%hoYoL3C7q3It~$m1CtVT!s*W9khu-Q^^To7g~KWrfprfdk^|piCYnDdQ?{MFd*Z
zv39UJX`B~F3+{B_z=`>^BxW|+S}~gJyBx`VieJj8%+?dm!#Q|v$sVpbsG2LOG$5a(
z+(bKLG{{>kIb3Sq#*Id+@uGnHB2(Fds)fHaNn+dvykzHWB0u;Szgo7Ecw{7STNJN|
zlzxZM0GrX|*Bg7%l--RTiqg18S}Js4=?412<PT0db{9WS@#T(|l;EK;eYm*Q8}Bu8
z!wwTOaDTfvy_p+Fdup>eh1Ho=(`62Fo_80Ki>^5$_N_W;gAKH6z6_Zx9e}?slOo1j
zl}MUlBkpaJWNU`AXh_FvTwghh_E?Q4?543O;IIzYa(h_x`m`9Ck@AIGs34*q?T+}7
zqBMVj3S5I3ok*k27uCMe;XnUlc|Wme*73<WK8wvDQS3jmt16zFPpaqM^T)BCjTwKj
zDV8r2c$Kd&$q?rvMe=CcBhgcLeSX%DGCGv<m-gvJ(d>%xtjvTY>N9y2aVguvn$|0l
z*O#u)+VC^<^Xq0TnR1DiA%;w_ujOoIRQaLxKSj<L9oR!r*N9!mc<hlLPO_Yz(zmBn
zx#Fr1B4S-jzuw5?z&}+Y=M~0RFLL1>n;LPl@dFZx^YD5PN%mXMWjcJhf=>RB!aMeT
zq}FQ{@WMrimM;0uiH}yqUSa0M`9`QnGryYmeBVK?Y_;RZJ0;M<hz-PRvm4&fIGH~s
zF!7uc<SkEW=Fr=lgUF9&Bi8T$OGb3$+3iYeiNO^MUaoD3U-@_yZ{|LYO%p&g!w;jl
z+y^Iko5V=EFr254xAao4umgnm)a1Q&708aUjcl3KY_`JgEA4*#o?C9}BvQEs_y@<`
zNCD2JPPYXfP2@i)?UGEkU0=q&)!4}zj!wl%xAgen6?#;5c^saoxrFx<Mbhw4ae6+y
zikEqHo1ePAf=JwTAf0DSiMN7KHw`_aznqMCop+i1AJ<fVz13HIb#)|F934QFic?9^
z{OiR1iZAJ7HnZJlIV|N~jaSvL<jP#NNp|rU+_399-7;B@doDSFKVo-^`=NN8$cGq{
zw%zLdtc%$+V4)F-v^C(R$5fKvwG2jityFbFBd#A%r{lh^tBP~!pg(f`scmme)!}Ob
zvisX?GUz;y6s(h^mO9ty8G)0WbhJlQ_23TP@zV|Ee`zFtyM|EAa?CsQ=fm9|HN-_}
z77g4mk$)Msl+<o~i7wln!^ZqJF4k!xX<fgFq%<BTLV<wyl&O-aHNN!fAA&m#bos42
zu5gAujYxfIDDgM+q2tEMvN{T?^xxUv;J+9ZZp1BzKGcq+-P*f|=buB=V&Vv~Dws$7
z;@{9!33ti+4K28K!XkcT;01jUd6KO?p+pzPmy(4ieAzvlg#P?tG!+|6BvxkL{8aBr
z{QKrn{OHPW_>D>fcV4%a)*sO3hdPz{kNV?zXW3PBjQ(WyU%(*E{ZPfZ6c)3~>JrF?
zi7)97$qQV>q%Aa~OW;@yuO$hC)2ZwQ1>Uk`6lt{cV7cYNc=Yfe+}|5QZsf(#Zy|R0
zrF%5te6;xdh%dw;=e+1^n-Q;#lKIpvN;GW7HEzA2yUC9Qq-)7O-X>!c7ngjD@87c&
zd(V&M2D%!!CdKdEu;&<BALdLh#g=j9N?$p}2`qO<<0Xl@uZDk`MG{Uy9iQqN%TM=R
zO1D+&vnx&8xs~#U>>*}6duHBSZo`vx<cx(Fd2`y5FRpz`{b$<Kl_Af_r>M8w-AkA&
z-xf#prBacS+D*D(K_KlvxRg6R(+A6&M3Q^Tmh2uRO{8ST@nvG|T+9Ij>h$R}UBA6c
zcz-L&ON&*!%$jiWU-Bh#=Eq$UdAdaO>4P=hfp5{u>rJG<$p-ZZ{O_>-Kw>%~PwJB*
z`20>I-mol<yPB^+eDeRJTg)F4|EX8Nc%zS$ddTuRNerFSK**E34(PK?A-9rW!kGt@
zlW(!oL_)5Ql(?@T3kv^|2alg(&q=oQc~(0<7rmKm*tLy}%DqQ#H|G#7gL!m*pd)wU
z$5U**r;JSNQYMow4B**Lek612RdNk2#kWRp=kB+MaSP+Wh-B9J6Ych;<QtAgE_Vfg
z!yFmj=RhOPjFIA_zy9P7MWmB($139f><itX)xuq`Jw?6iDf!esnMM{K!%vFRsC=pf
zEqLrkmY($>2S109bu(S@mGkK&{`w<MVM-VN8u5bWRLil?5-1+pHOTePUP50?I!Db@
z+UbQo3;EDDvBc+padoG0HGN^<z?)}jl29R$2L01s*IMfoLM4?@Dl|wW8jx9LjY?@!
zQi+rzL#WPPy9`mrh{!x;h-Ao+LgLvkp8Lgpzw6yOpL2F+t#w`3`hDM=qt2VwQvH|e
zeB^r<>O9&~+VA6D-oL?;ZBdj;H}5v&)pg5weWw>xIO;^g>)h#<)}`#tnhRvEl>+UQ
z-_6wHcaW_0`^6fbA|2+S%~mL7l9U@ar0O^N$i09W)b$J^cm8VAp~#BGxn_{)t>t9$
z?EU=81U<H9U^(f0ftlmhiKM%F81D3VC9NJ=$`3B}CeEEE()UfO>|0Ve@!k<G?QE?j
zrft`V`=+bpM0PHT%2H&l`<nRwqW1Gnw>hHqpAXGw-iv9$dva*>YP!N`Bz2$TM^ctQ
zAd~Ve`H71W`!I1TAM@vt^!xGKgiNf*x6KpTJ#>+t&=i9gR}Cbs8={EjFF&@f{sjLh
z(v8_a{L0T<7sk;3{nCHBIYj!sAN%%743s=}j?cZ5%V!l{#mBwQ6Z3$!>Q0#(y#1}U
zG-&r9e12dsA9v{o`E=Qy<+c^@)sMAl##(!+`!XwPym_6}Zv1L`sab*lK0XT<ZuV!o
z4u?tG79Y}iC%t-CXsh&Gtu9Ns8Of*Ld_ktSbdn_6#%IQirMHHg^124HIQVKOpKz*}
z94EPWV~HwzJT!o>KH$Q<xug93&&lN0FFn2wY#@B|RyM7^4u43IqaK+HsloDUdgzG<
zeK8@5rZ&%K&Xd*oBS(DLA)mkGLxMI5nw`QQ#TDdXd;ncG#+6xxcCjPg#l&lVI&Bgy
z`LG@C6j>}|@gH=A^|mc+;f5T^sg`#B#Z?W-qiTCeq(ZNB(TfOraP|l~^t_p{cXgyN
zr~3s<zHB0qUscY2CK^iD4l)!bEDa|9t3I)tIy?A(3fXi}pOmlBXyH4wawPXv?@*sS
zOLl3R1v}ZjnrWy1VY7VaF!}2`G`IO2y9Y`%w?IkK@8odO?%zXO)#aoY-Tc|6;a|v}
z;uJb5;1sj?b&O`bc_8&uixfuZuMrgG<N5HzankEEwMm)dPwKw=2%DIHgEfRD3JqHl
zS?m25WJCLU;ze%oiqDFq{v}0pW#~q_yEVLKP5dH>_0|8FXP6@WIemj9Z1glbBXl#%
zO{);54jXSLgJr0#b%-S2Q%$nyR4mnc5HFED7Hjiq=7OO~|C(5v`GWuYagy|nadhhr
zHL5W`OYmHAhg2$rv##|f!i(@RLSKWrP;GvgIyt?T)XzOHaed({1eR^0N&h%LZ)%hf
zopzI6a5UpX?~D>wZ`eyutZ)^aV?Rg^ua-(0pU4RJ`nC%d=Fj+Z4l8Nujx=HC0wcC(
z_<oif@<F&cN597Sx~$~)bP2O6Euc6pLDJ882YvH%vmn3xo^WH^d10Z^GrB0Rl0_>N
zu=%g-Bt6YllJkQKn2MG`jas$nb<ZEn?59`=KT~f~VV%AZIb#o7bm%Z^9;9zKpU4PP
zcgaYWSA<K{LL-RsiWgKP`W5hSgDpvVDkxtVBn-LkBXLUJ%q(8dCR$-(!W}IQ;p44H
zeELHUn;;!P|8PBQ-1&3-0sqgEi^EbRaamXRWkDO+h~-ZhTiC{?He^eeyX|J17eugw
z>o1dSt7IfYPamhb%DED$+5omZu9Z1kPola*<>}mw4ua}7LptWGy|njFh4A)pjBsd2
zAbp0O^Gjx5C9)M-lJ)yySofoeLfV*1Ldgan;b_kavaPO;bv4i7SGnFHcF~5CSIRs1
zC!!-8ajT8|%XOz;|E_1hs&|mV&-U`hZtr<L@jH}LI!@Q*Hjvdh+ep-t57e**^M{As
zA)_-+5E#+~+x;qeG<*Wvv$l>dov@Do8B@V8ymylK=_=q$p82riL$9hg+YKdS_U~pH
z7Gv31BrkbZd7Vsi+((j<Q~2llds$@ZJa$axK5un1k=z-g#ok+b!#>&P^tJs6w&qp<
zDIIZ;C~BtR6<R^)vT+hgtqNpYYsA?Y0}Y10r?b;ibru!a#d{5T%~K;cwz7Q`GqB&!
z4n2J*%`vKHl9e0Dw3gq}yAnU1IBvu_4=_u}&y%LKm6EOhzS9LybjZ@wNb(}EmA~Ft
zSUsR>4jXX&7CorHjRr4J64u_fC9)Q4+4QG@>{snlSnih2WCl&==gVHFeqN7gV$T{n
zJ=Kkk47A|?HAj*^hh1s-!%FtFeFE)?Nu(P7kE$~!yeH!l2QUvOf$j9z%P&%wktp^n
zCrv-Cu(QE%+VAKQqOIn|hW}QVgzr4h_nRtV$)j|H?14pic5wjNT~aB!j@_bBYr;R)
zK28fOL7Fr{97!F7CHiCU(6FD%eDT~Qw#HKQ8mAm!B{frs)uA(VlUoyYTr4M<RI`;$
z9jb$`q?VF58EP!VdKXi%+Rb~caTP2UWYPISUc#AaoUlv$jtpD3R2MB7#0rYnv%r>Y
zN##cwp|or_n>xCUu3dA5KN#_ruZVumPf{JgCWhq;>l)88l@YP@xuGtt%2Z$xANvbE
z%e;hbkw!E^m$Cij8j|rhWQAG9)%=ww;gac(8tJm?oBRy@t!(1wiOg%<3>LKAg-$))
z%uXJQXPdnyG37gBB|iJdk(RIhC2e`l^u5(4wq9#1>#v~B_M0mU%L8umD%uJxX`Q*C
zc3PRO2RphhRG@fQ4!>UBk=C8I6t;dhX9~^o!oiq-tg3Yio3HeRy*vxN^&xwHldCfw
zXTE_RdFCQGj5$tQo90V;WtL)>J0m39K5k=4&&DthaXv%+@OY}bPEWGkSt2RU94Bdc
z+|9;+cA%g4I8*-@$*g<%NfPZdLz?dGCZ4Y=*vOQVL?Kz0JbELrW8WQFnrjj-XRwp)
zyBJK&AM0XC7bkf(OqPDnyvY~Kog%Bcf3ZQ=)(~s2ELt67O_L||69)c`p_^u>)6(yG
zR7O{UcmBPat=o_X<EE5SxKTxW4rDRMGHZ#)vl{9B^0Ta=><+&+)Qz>C>@O@I`-5p2
zXA{R2!1lMT<tGtqA?v{}8s}e%v!^ZL2k7PD*9$6`Px>9o9GfK$vqnf(hbh@9m4-2w
z3$Iv`tGhJxM1*8aMt@0U?Q=55RYKpn?vm(6=LjVwmZW`2IFs?95_Ll}iP@K5*yR2P
zqA1kUq%9ly<atJtv5O`PdSeX5n*17J`m%|%`?RVsWY0a?b@Z}ed^=b0IUC1T9ymec
zm(7#xI5I<WIIUV3RB>9kDzws$=>hDWvYg=i?*}hU<=O3)N7QQbbcu@QLWy(lR~ox@
zs4#PMu%te?LC})UpoUkU3g3QJOBzB>v8w?xlrtS7%%nGk?1Y_yR>-g#W%-km-q&~O
z;@~s1GTl*_sw`zu+L&z%*uyV@P~^CGrZDZ$CGum}3?W1zRpQxjf|NfR&+LmQGM9cD
z_-(PG#Hg#5muYfhw<kQL{k@k{xd}iu&#z$SQ-j&R>zVwexku<*^Cn_{LG+ZfkP!QE
zESWdt0<|n468p(kl+F-)GtYI|q{IX|<3ct6=BNp8Wo^QAZsf6wCV6IDbVr)M_7hc2
z4Hmq@AJE}x52aJ8_R+f==h6+g-2}bkM(nQULFT!sSgQR`L!vxgNBVJ|sw8pQefoad
z6FP12QucYCA1$-m$gFpb<!c5N)2$VgDQ=ui2f2M<vn}K3>RGb%#FaYgQr}2Tat!G*
zn_2wqj3IoF#W<>P_zE2qBE!tS_lb2~EgIMV0%@o#qD3_ynVab|Ug&KQXRUkqt8>Ru
z&urW3iFPUM*9=$oDAZ6Irg)SE%F5D>4?KC<`_*h%@kY9Og$zHiw-pNJyRxzU#<Fya
zhb(<GuADwlM_hk(C9Mgb?DCu)Y=(!b@Phu~f6AYxSCJuA)*dJ^-#U!Gu^c08vUcSg
zmVYDzY!xL2M@I^65ni}3p^km5T2JO$uVKLrUi^s-*Z5wwy>#Bv^DIiOi#P>Iv5e|V
zs`^fetq)A1{Z7=;6>ppQoaS3_w{e0rGMy)%BS)|ihR1mK%@=tCMF;AB(2Rvd8PO@N
zC)r?qc_A};1^IOBCcoP^gVMeLmOdp2r`5$Xp{0?fzpW-Gxl?$<s@MEr(FooSw|L9w
znGD({^U0mM{OFKOw%wglJ*5}qNcl=K<kIJAw}T7GxJRqm{Asmh^N4SxbMZ&2baVlm
z{K%W78ogl#$9&mZ@9VUF)nU4JLL!~`c_rD>(Z|MERq+q4R#MMR0rc{qcP#&&EeRFt
zT%Sz#^LdlvS?9levQ*uHE&Lu!vHCSuR+mY)1->CQwbqhnbM$CBiK8PPBvNaO5H@RJ
zJK6k#V?%QdY18;;{Pfk^r7}Yd`5R;%eMI|9WL-5SWzPHQ{Y&b?hNzKrUH^CD{N7v|
z|2&;}SEvcHr_}|Ms~zNGR6MD3jAZ(kl$eu#9SL|hi2m->qkH`?@M8mW_!p}RSn@F)
ziP_(|B%mvpdG;SD=+1l0YK~lBa3+!^RJXDj+((*zDvPZtJj={0zw)*D1vtia0Y!BY
zEL+xJvLNFap7F$#t<mn`=cmkJ=O>Mnl&-2{jW<>ieO{B9J+xqFLhQvF)q9>Bzl3GS
zd?$R;Mm}@R0A?l5!iS8vr+baxvxhetq-#hd>GJWSEeFCF|7{RWC`L?4r;rs7abjgz
zyZG8C@qBD=1Rv@BoIX-hVng3P273AsnWm{rk%tYNoBNh|UN2y_k8VoS6=GO)Rw0e*
zxJ5le9homgk>QmMBx_|6OL<bqd?l;cyc>7Pew#va#lM6^EUsYY$#3|~%5GM}V+Qr^
z)YSr5t3f8+6Qd{zh;wGqA9Ptp_cFGn(w-!j9_6npM$;$H14(peKf!uN2Jt<sE{V0e
zfFGgl%==dsp0#)tU(u^V^XEJAtnM`_+&h<iz*)5a6?MV8H-}k@mw0KT2{8y<#5cXm
zrM|I!_>q}4(^_gOe6D&$DttB&v)wt|i?n(Cm?x82|G-E5raBWg_v|-P@pde$U;KxT
zQ5s4Ae!Pdj)#<UWghm=Y{4pIlR$CadJAiIGYDwqaoWSQ<HDS{zAU`6%@@o1`;zBT1
z^0sX!z2tqNI;3Y6n>E*)BwBu;sU4H(ebp}V%JMH6SRKal!ZPUUVsZa_Z#U~RtL0Ok
znA6XWXQUost}JGe3(b4Uu}Jqkvd88-^*VHc=x$gj?koE<h1O|og|#zVko%ONoTJ1i
zH->N%E$DLNM0!Zooy477NFAEBBxfr|uqD<{*vRJwZ0V7m3_rb1H{>OfYsy#XZ-+`c
ze6%Xwa8F8R<@_Ss8=RQMp<X^9p@U7Fmnj{xYd1UU(@ZMr1DHq0FaFhk=cPl2y0GG*
zdDJ6FQ*x&yfu+Ba<Nph(p|&Qch(Z5IKKEe)Pi_uj3-2vs(P>ZkHLVr2_i-tkU#7~N
zo>1lc|4F7Qp#r&ld?3A7KZ+V>dQcQ7VeQX)X~ZdO>KLg)7mXDAjnYQ`<w0js7WsrN
z9aB#@m1<&{bAap)X`>@5df1%!C@N`O!LL{vg+H#b=e156lRs@LbU?C{y!3OIKA9|K
z83x<=b$%|qmTe-P?PN~n`})(5?wzFfUI8nby_kPdx|-#RcW8pM`qQ%50&JooD@{G3
zO&@D}@?INDsZw(Wzs%T_`USNSZR1zcq!;7KE6$Fl*e;_TPHk*qM*|s?aF&1OY01<V
zZ6dmRf0Evuo%|G~rSz=6jO3-_LfUL!BDH^CSS1Vy;(c-#({W{vbRfA&b{AUH?}zvB
zAKh2cyYC<KtH<AERn8T>#pPg{8NQfo_Hg45pWI6po*2T0R7pVb_bEECyF<G5eLpt-
z!9X^6-*^^&aVP)H4=9cj_-osPNZu?5`rC3SnHUnoFETHs#`c$JdVDh<zP6ihk5*>g
z8X4?$RuK&!U_*odl+vlq^J!2`g7kyB3_Uj5fwY*m@&6KZS&9F2nh`6`-z{26gKla_
zevT67(ag0ZZ^n&gyA&&FoaP=@eMo_P+P0T+XOt!BRVhTP(t;1n){xw{>LY;%R?|iq
zWm<iB8s784i7JHW5?^yeB3COWH{>FE_4*iAV41`J@$4t;xT8Q^(|oAyvil@Q?DHAF
zHNmAu^J(NLWj1`7H4AKBL=&79>7Rea^zr%cMET?hVRLsC)y!xhhHZD5=i+?6??)fm
zyzC%Zu^-WOHH~D@dSyDcwF|xUf5NI8&+%iA*$VfAqF88!JofE0kbDeisebt@gC*wr
zQae0=PM$1Fp8Oq!m5=SmA1|xXJin*xW!1o%0WEp7=9GodtJ22GTD+;=>Tue(tdmXn
zl_;#;JjG7;zJ*YC;1XR@YA4Z1ccoALVyWpaWvXN8AcUPWtVt`06LP%WC3@cDsosBO
ze7J4AP<yqT|5^J-IwDS6n6}7(E=bW5Ds2Mj+sB%Ag6nZfd+8!!T-`MGGAfzqnto-M
z^6GJ&ku57&IYD^Y-bZwwKPR!{G$l!mS0pCw=cU~;TLk(tn?3O8<JXmgu*}4SeeisP
zhdk>M_UgYDwk6%BCtC99hQgyX+i8WwG(1PTur^&_*3m-G>gU4Rx8*b_u7UmcVm|A1
zwvluP1xkz^=JM}^>vY7jU2ID2VVrz!0*(B3r<xrumllLOv3}y6h`z61*zF<<zMoAl
zZF4!m`{q~hfwu_TQJ>B`pMS|WMU_)MbW$q!T9=Kxs!v;uf09R+;^@7LZe-f_t*p3y
zgtRtu3VZyHr)nEo=#Wq&cBnC)jWmd74^OHyn0uD;r@iUmW^ZxURDpz!jK}v^v{$*n
zaB0KsVRYP5B_U<mHkKmMWvTu5kd$NotX(OM))k(H--8(S8Wqi41}&jcVQKu!rK9PO
zqrbuP?@KbTaFpbubtnJYsu>jKd?K29U&ygZ_oPi0di;0#kiRTr00*V}ST;H#c;-x_
z=JV48x3DN-|6P5yF=!Vxb&L_GuC@sGt`D}WuSgSocjr^)=OOu(VZ&yiR#xSZ1`|VL
zg@fnSYvkr^78Ht->5@bZS`?+m+FN!A;os*7J0nGd9rqV5AKoO{ao1W%n&?RtzkiaL
z>6=RQmIMjY?tfvr<z`eZk7qAVB+(VUtJw6CFhMcx41b?XkZhG?QQ@MOWVWun<n?B6
zp?dZxwk>TS^BX@^P)So^HKA7s=sgq?%!`B}sWYjyv$y1AYd-NzHj~`6m@ItInIv3X
z(I|+We_`LTP%>9RMld|-%U=CRqt9L_N!-3ENm4qRsPx2N+8!`mDDpkaX6IK+YYhwN
zY!e;k^X?40@b?#QBCY15>>HTVH%sPyV;1Xtc$H;Gq>z%uUwQlaXR4!p-toc7ZOmg-
z0L|D~!ET-LVjaKDgwMm*^Q{KieAp~CiNksc>mO&t@B49w*SV$3zR$`eLn2m@0=p%Z
z^(9p-a-M=@kog0q?mK{nZVzQ=bVkyeHFKoFljGQO<!^M3<v?N@mdSsYiqEI#I=0K@
zHt&`7mOrqdkLpIcu$Hf$L`O4|X2dB9ZWdpdZ3Yoi4YeiDZ<O$Y)Q6tll*P9Fac1u8
z3;3Z!Go(@5=d*#;auTCS0raVgk)$;zOwfOv#VW&`SS`CtQ@5*0n>t$Au4Hri?T<aJ
zeB&+L7iVSPE&3o`6ug8EooY{?I|b7Aus<wBBL^p%`?2U;SxNWK5$xcacGmCBU$(K)
zi<~&KocWH3ql%4Q!YO-ScF`}Bg@2eNLPE#sJgffH`J5$__W#06@(<F8)WfXTs)TrT
zdb6nt|M(W~2D-~Cg<3wBlZ5KNCtVvggfK4$^7zv~cB0=$*1JJjD7P=8Z(nP(&_(m8
z_QdIQ*W+4-q_^13^QJ=W;%l^g@N42(u#=7cc7UDdvly9T!uTFFI>p(O|M@C}#vA@8
z;erk5P)##x_2~^cQy5I%w8&DM=#%W^R-Qx$HL%I)7Bu#uk#xU{87;QF#@=^o61m~l
zbgYH0FyFb6_CD!Pzt-fji<zTwqJ=N(O&!i>`6=@j@6Flbvq$+f0(84tKAGiE&aV+H
zdByn6Y`K>P-JcYU$85A`p=Vu~k^O!$px-!Za=}RGTX2;$*M!rqaSzb`02$#IKbCGE
z_mBkIo}+TRWa+J_Aby9TB8@$%!p2UTMo){sMHkYHS-(t-?#<K@z8S>w8auxenH|e$
z+^ZdA$(UHG|M4*Iy2+4UxE#V?Nz!5ylveWXtM0Is9)6P3^PaM&K67Y}^&7nOwF^7s
z?ap3%ZDzZJt7+$vdgiiaEdOQeY5rip17ffKAX#`Pf$u*hP+HG*kb?u%=!QFU=&WlW
z$g;1oEMUqZ(sF+<xx6lc`W0*?Whchdm;vte!{HTl!t_aO<elk!cx))U=MzjP%^A(^
z`<pR`&mOeR|2fMpmD2O3Q`v_PF=V*>S^8^&mT>7w2Os@Uk+!6-r311x$VfXCa^U0%
z+L5J26KAN9>=7a4-h~F5ZC}EVOPfyY5)TlS+@I1MOIhkxxP{+4CYgTjKa7U!`QcLw
z;_1_%2t27}IMW*z!R&u`lQCtrtT8H^Bn%BEF6)h{)o*=jToAyDf(wc6{pIXQvwGza
zOL;nNq%H0BQ)B-2Ds;jFBkH-WzS^O0IyKt*OnPk`&o?R_Bzobmi0-e4MC(KeQNE`_
z)+aRZ-%=Oi%a%vj{A;%KL&`9ELU9=1oZgMsJUuL(I;MsET(3c8+eDDo@fiv62*h1?
zchS*?n?Paq8#ep)F5+PMfKbbItpCaiX=cR;^7ivo{!5N6jgbWM50@&?QjJF>smh(f
zAUX2z{6dx&s7aqTO3C@~UDQT%U-d90Ice3uK&iptBC>vz0e`&ak2E4Sia5#T;bG2p
zbi8UOea^ek;wuSa|8gh)srWd&6X(L$)t#l?)6UZ7&N0HNcNw(h_d<5+iUm^;5z4w5
zL)d@M75G_W4J4*|V<lSJd#T0aHhzgS&u{J6Af0K)=+ER0?2Xq;Vp*6)&El+a&1zlh
z6qn3)tZ88L+f~{3ZweBZ+h0hKVFXJV@q^U8bF0i!7|kZyXiJ8S8pO-Q-{VJje_-#{
z#PRDdbl{l#52(rP0W9OtHBw;XPVZDxy3E{(Kced`@!hH}iT-B7>Tmb3Y4<DHh3AUG
ztP|VGuQ9#!O51lj+{H-pnyq2KeqF`+^Y)YICd`gJ%VIZ<6idSG5~=Ex>yp@za)w?F
z5%m6c6C1I;v48m)+HvU;pW<Cft0pT-Cd~U!7}7_`>4r2WS7ySt${wb__V1wmj~7s3
z@JSe4{*IUXQbJwJW(fn<E3$J+g+l(g%<40ThO)DlzER_|8wIx%@m|6!3#wwVm+Bj|
z(9VbL^v^;&AtG^yu-V>FQg3*M1+J>2THd?ZvtLeZV%te}E#)))JYt|E>+dyc$Quhj
zpWe~qQ+<WhyI1MyGj2k_+M|+-QEymgq!wF~ag&j-O5xt|3_<Vb4MBKYAo2cqj^aQo
z$-5>CA;SIvYtjC}n=DqUcJ5ch|KE@bqyL?^@V|ls{wHza|3?R$Ja!wbHs^us9CbY4
zBaaV?nUvP`chIc<Q4ln8GJMFigC*X1@OIc382il|SN!vV4>%rTT|a^2`-@!LT06Ku
z*$9Ro^F<3fcY|Y_B0l%(EZWfd5XDZ8z^9&`LyMP9fsKoeVOp6yjA*(AgBvfwcc%nc
ztK^EEn+Bq7>bCgOUM=9~4#%deyU<_HOR#(S9;iLtimsOL#Pw;}5XsbGK-eA_eDMy7
zmk4n8%2ZsmYKQn*O%QH#1YdJ|izcbHA+_s&AhOvX&SZLn_TeztRIH5oW8-1W@hcEs
zk_JXRkM8PU=H7q*5AA<_7P2;^z`s+m(64zNgx!e3*`^EN?C@mRwIT>6+V+E*#iGrv
zu7Wcm3%<mc!>i5HLAG5D=(|I3vpf}99UK5(Jubpfaf(O5OC79S#SEc2gV4cg=g~8r
zJuq551uQMI(2Cq7(dN32E}s31n#Z05g+HpGloSm5FB;MA_oLzB*f%KUxGOx2yn;?;
z1%Te)WOV684P2a>2*S{KxYI?UPhA0K%-;@+Vicfke=wMy$O41oO7Ol-%!b|`503-(
zp&`+C;g_#2Y`&%dU-tRJpwMa1zuX9iT<rjhta$Vw*aQ6(EqN17hWxk&bmhBf@4ZF%
z=A*gr&u}t!)t(8te_CL0mJz)D?1qA0iP@HsHu!D7DX^xY4z66E3dc{!poF&-;M#l;
z$^Th^ru^IwX=j~L*h52DDwmA{E^g)?UKo#dow0_;!~urnnV|yx<xuu=6^d8)LA#|4
zZcYk;bnka)P}&V7r&o-$e2T=punyFzT7vwSmZ5DQG*IsR1F$_^0v99<y1E@ep9R3B
z{2@?Ii;(G`4z%fmH^`${^gH}0Z20jI{=`P0oU;{h+PoZP_lmyokez6GksfqeAA&-i
zd!Pmb(K97Kbmx6IcgR8%ipphSM9F?U<>+J*h(mDJ-elsen2R$G3?zGohT|pr`@rR+
zEryC^Wb4O#yn%m#4fX_MjIP3o)bsd|+HIIso(EYKU3k^Z42bZf*#B5DESq15-rmUu
z`3NoS8Bl~bH7|k_-KDrUA__Lf_(Q$t9eh+j6}NBv0ts3Fz*Thb+S`7@BhwpTyQ&RN
zZfOAD3R^JMc#nO}&G4vMDrBm-q^!QS1E>0zVBNm6@Zp**4nom5W#UPQdR~jo<>bjx
z10I&{?ZR$5df>su0eJ4)T(Ak@;)$cHVK<gfcnQP#<T7K0e{^V~2z)=mXKc32N)
zK6hZq<{<dtJ`Vrv+X>yhm*Gud1xz!a36EZsL-OtvJVSIY&xN+YnDJ*pIb9Rtm0v+n
z$UbD9y#ub^1{|;B0FKAcz?nHED5j^9JG1l)TE1lyT+~p+JJX8Lwrjf~L(KRuS2&Bj
z)-A)2`u4)WfI`T>^c8)`eTPcl`-0=nVem^s0h-Z6kWnx~HK2&!Y^a2GwNKFGU=Qxb
z1E5Frb>_Fq!1XytLAP}+X!u-0I?NT<=^0~>1<TL|jcO>IG!$xX?SZFeLm|s=3goU$
z!zbRpf*A^fP|c)&oX?TRoLq7jx}uW>vgcaR^ZYYtcSJ9`y5t<R+<S{Anuel;Xod>T
z+Cjwfm*~x!H2Bwh8jZK`18+|~XdLznwdjw5WOWZNC&&gOoY%n4wMFRYqBf-3)QH}S
z?upX1ooM!_Q1FzMz>TR6$WKoJG8*DU%jG?KZIlR)<Bp@E4HWL&8;kbKene)?_mFb-
z04Nu8^W>^_0d*UT3IcOrkSzy;&y0f*vJ$;$$U-qXYT%idfTkT6U8wNU;57ULdU~e@
zvacw^&)^Zr+`t58UTs0D`Esz(=RXLV`#~HRc?;3va)0uYJ(^Zs$9?-goO@Ne8g1Sm
zhZ0VCAgvXv(YoWo$aOe^o8jK*b<aaiL3R>iH{YS5!4*jTmkhjIHUV|(3#d@-0CLPb
ziu8;{ODIv6>sr``uFumJvplDv_ych${zMeocFCEW+CCALY#RzE%Pyh)Vy|r9rd`PW
zo<3Uppb~v7)q;dBOEh!yEwrGxn7d(=iPHFuXpQC%^dI+#GqYL-OMLS<?Q2F5xa1!i
zed;pWI{i0i_1O{~dA1I%yl#jxV-KQ+6=leO*<Pf-Vj|d!?`h|fMlNSZCA#Rp6SXed
zfiCbWV0m|j_@06x*zN+F_`HCtk{1oJZI5ktKaoL+rOHTszyYFt&VsnOw~!?LSbA?`
z8*XHZcwc`t(%63xDGwY(s!YG&7j0cQJVK6|1b5)#kO?H&yOPMYq=JfxID(s{Oe(w(
zj{4e<oZotvoa+h3<L)-&>BB}+<MVNN!JL7F9C(6ljQ?P{)r^c^J(!&7evDtN`ABA)
z{er<CJ4w6JHyqv~N6!{D;<qvJWc1jPq~7*4-jy+h>NlH``om{1>zzU}S3VKPq9e)q
z>n}<5aPcao%V8oLKAm=tEyLfIpTf$2@`#hH9=TK~OSZOD5~ZFK<g3qTY-E3kI9gj1
zi;_&NO7mdXS0j9+M3&_K_XUj}bpV|@atcXwJ)k^r2ny?GhOYi8K*yZ*kyZR7WV7ZC
z+LB_1vP#p?tlss=!`d6IS|pl-d)l~)r|QVC_z+s5DY~X1Ca7wPKayUj+}B@CTwm)|
zRJCw1`qF)pvs$$ltsA6+5_&DsmG%PkC3gh!OFV+cv&USbr5n1Xs)4%BA4QT$1>DPv
z38>`XI<DV?BE(M-L~l73eGW-SZ5Jk@H#cXZZwlSq6_xGC=+JKD5iksG8a4v1rN*I;
z@mIN9dOuLOK{2v68;*7+trx%h!^rX-Lss5TP}-Yg+^$>WIY%24Zk%-#cb{}|@0LEt
zE55A8{xAEHbIz4y)`pQdW#(`=Z{&p+$!@}X#l)!q7h|02s*V5lj3C2(hT|~XLhO|P
z7CXGViZ1<2#;XU+!V#iJKAt(^wniPYaako4S&zZ$A(6!6@@tqrX&nA;Y=Jd4%*Pu%
zl}OUN8F-k-0PKEpAc?p;9`;6>l0|Wrc(k`8DI2bZ_f7qR?^U|t1##bD!oFBC`GF_y
zTcd;Lu8hN13@z}|Y5Oq!EJp@c`I7NkjX3XWA}Nef!j2YlIBx709Di{G&b>VaZ|@yQ
zw6=W0>He<RUDAq&bcf;Nq6{^1${&<7Ap#mRjqsD4I%IOC9eu2*K}<Ihj1*&$Y3gZo
ze`F2n)el9JUhGF#E{ui?Q#Oj0f;|)z)gs~dKJ;C;58d_7=f2M`LNf<GMc)j+aCS|5
z(O0z_NY}iIv)ikOhTXP@{U=2Y$xaK+mrq0IwKb4qM<V(i)s3tYPb0L*08|gPaQF7^
zMd@26pb34d(64O>s?pVkd@Wbxa_>G0_Oyn7V&3nI&e>>2atR97JC3deuSTmM{zDfv
zC@2liK*tyCgypA>A(;*{bi`Bwzsm~HuhkpTJ5MR9*LsEaU$%vlt|oA=zzN})T+XB8
zukC-UmC!Ap(PB3JQe^X4%(NSof;wHS;cuWiYOFOtBc%4|ewrq_(Q1Wed`m{R_6VGZ
z;~jMQ&LH&5Q3q)pT#Bm9T)0NV51iR%2Q;Zz!hsV-4-K~=3+ly9v^vVozuAjIeHuB>
zxvAW&)#hk>?-}mkrlIKZ)~RUN{b49hIS+ZZPvtIOrbt#s%CR5i=z?k~7xcaWtrvrS
z<h!+yx6pu&szjr}e<RVPvm?>T59(;`jX&JYUD0Uvo%5)EL4UMRB6`dw#;7ARh1<33
zD)Q-XgJxV)K<(~dxq{A(=$6zK#k@&G<NnH{mCb(K3*#lWKc_6=9M5e<hqo+1(yqry
z>yYT|)tf@+`2SGW%W3Fen>Q-ybVK#Kl97e_KsYpC6>WZAgE|}|(3#D%(6|v7#lM$w
z8D|jMG2{udmHo!;NZ5ijQ!xrp9m;*E?d9&?yN=Y$w<66gom`;(CS)=7A9v{L0+c_q
z2N@ojhc5T86kW>QoE%z=#!79Gxm6K5rgE24mx>PCknLR0(Q5Q@?>{sp?i^b2Yy$e~
z?~9&hk3n5p%7{82M;*ookT82DdKPO9TRrR13;jxN^W!#j!rccwDp`v19_FG=xBbv%
zi@&JA>jYX^xQ~+>PUFzQLfgMaRooAEGZZxLJB(Rg3}n>+Tt4XlURmf5liC{4{2jq?
zDwqRheGJ>aXG2S^Hh5=^#B-lY;MUAIh@AL9^n@|$745|t$r12lP9o~*ngB1}bbyO)
zEgEFiDLzw?xJ*_9+UIp5>sMRgn#%@=Z7hW2WwB5w`e+|F-iMn%V^D~U6mBfHg^3|r
zSaq+8IG3;uj{HOL?Zpc42^@vTADkze0@FZi*+fW>%K~!63Ht2L!kK@1`1t)Ic<Jbg
zle#Ryzg81|+^mA1<r^Wi&l{FHSl~Q^Hs~UGu<7x02p=5|hjbsJ{L9C=qGgq+S=|4I
zGzAe~CwVgS;%+iKb0T&6dJyN#d<LUz+Oba0ADpvYo-}QIh2MVNix<s2M?ze9{N$)6
z`Oa@7Z%(^_x8pzTf95CtdVVCn5!!>7ugxV|<rcV<m1E<@Vr)d%20S;a9gplQ#D5$`
zCtf3&s5<Eoy}kw<+EGga2A_Z%L!{(IS2>Ogze+-u?8XkstH_q&-FSP?DtuYafR4*i
zA$s$YFupN}tX-6Z7ysl){?wDC_tsAmo4t^<wVKd{mV0nKUWdJ(EGGq(?{Mzod$?ih
zHnPZQJGtcX1~2msB@;KvkofT)IQG;kSnu;3G<x6T#AU}}y6*xAihBzQ%eG^jU;u}<
zo1t7C4N%J0g%Cw6kPCT*G@Gi?bJcs$+W#r?8509ZnpNP8t2vdK^~iI#C2VoS=(6^6
zR1)(5X6EK0vm8}0j7C^~cp`c>UJg8Nn?Ur(?O-$TB3w)Sj^^f^LCR?-z<7!kx~$a!
zou6)^rZ@$hI9ovb*N%h91LQ$-)>-80<c3eA_aQDp8I0t-;J14nGU^V1kfK76ooR^i
zukA4658&hDZ=luPa<E|M8JMZISj@Zzu%2Ux_YAIqqJBI&;{E`Z*x10TtW<O$yOOJl
zi9-SJYmw}WK2YDt;};qapj2!<8wP6PB#mwO)TwLmWwt-ev_1;P6Mx`Y2H)YF&O)4@
z(gSlc{Xkjbf}_`sfDz{%z(=x9l-#T#rQivi{;3H;jRR0;^&ogAe-h>w>EmxbuhHX|
zgRzx?GPqZ3iRYp<xO(XzJVneAHBE|u+v_<vz91WPEqr0-<`p1SNrP88C8GPe6{qg2
zg-JpNcK=ojw<ap!y1jb1_`^s@nRy#5C9i-fg@DD&DLA$x1}ElN!Ob_Gc%Sbg7@SrC
zOBc=uPqXb{yR;C8J;=mn+A-*~rkK$^uZb&JAf9XT@?pG<As%<n5PLK^;;(;y;OZ?)
zL4%sYt>o3PBYqNuz1;?ex0G;Y6TtkbJMgK+X>j&R9{8Lh*fMoAS{gn86#1>-dLt5*
zy{i$;CR<$Ibq6g7SA$IZBe-hm8?@Is5Y|Vo1<v^(Y%-U{QQN-2MAMUynI3}Q-`|Uj
zOha(Th;dLJFYXTp*nnI9Zg^WIK3{p>5dP~JzEFD!UIzxk>Cz^M*`WX*)!U(<BNAJi
zmEql^<?x{sH}I{YtH5TYGK{d-#NLg`kXu>+KC9}XJ`~}eRwt<3qAgk?7r^1xcT}(0
z$@M!>k7ib0hMhm}gUS7axOY%3Jko2y_anc7e0c!AIG4e(b@K2}Z!KuQyn(xuF2mG6
zbFlie<1qN9In;KIz$;UKp`%*{g1317nxW8*`mZ?$3L8z~oT@TPNtJ_Trib9)rFXF0
z;4+Hb_6@GSd4z_W_MrUhPB`tq-!OXeG>~hzfT_jS@JK%hHimk^x|@!0`}IuF@0W|N
z&M?7W$JT;W8jQ<R3c<_hIh<?w2zCd=*SJ|C+COjM^VG30e8Dh0w|y?Y*EJB*e-J!;
zg#!fc*acp05_tN2wz#G(#i|;cu$gFz4>0aSS&Iv~vCYwt6_f;vvv;Fm<$DpNRH9b!
zh3Ll5oZ-m7+>W&zGEgIEM2-{EA^Xt!-*PDO$tNVt8IDG-HAJSbMSHZL2A3WdjLv5-
zL!T@Qxb>H(B8UEkC@_-a99*t)H+$3|sZR?vy;MaN-HOQ1|2ZeUR*0sS?M1mV1dZmu
zqH~URob*5q+IpZLD&O@3t*EHyjH=F~FLjEj7LP<{{<DQITT)SB@dM5^(;YdsE25V@
z(^2S-7s&OJ4Ez>ykg(4KnwA72mzSce_c|M09;|}y)eT0sU31YZ>ufZqZx@R4PeUh{
zsUan!t=xoD`)xfRxpEt4h;B%jBDR~Qi$i+*;ZxJQ@L2!-u)+I1F#ZsnGMNJU5$Vts
zH34_8D}uV$-dOJ7PEZb81*Eq>)*e@f+C^JZ%6|ccD?EC3V+o9l>lWRtY;>j681j-F
z@TU10(A8uQ20IVJJqd!!*L9Hk>Kf$bC&P%d$~f?CCrS^L#VW7ILCQC6F<0La23<M;
zWQ;qkRH%bG&uMr<>K`!w83SVn3sBKg3kUzCfvJZYK2qd{wLQ*)!=D&D$@>cU$(w-h
zj*+<Er@62`cZyg8se?=7U%)3_16*e_12?2N!suF6IKB2KXZL(CcwVxHsHbkQ`$`}f
zbOphTmA&wJ=|1#DCJ|k()`TqmSLp9gWym}-7*>v%ErxEa1Lcj6(4&=Oq2<SPXgw?C
zk~~eJ{$vwMPndwRhuuOCe~OcxVwQSC;zm^Q<rH-K?n4u6R-$3MnvwCEqv-pA|6sI^
zC6pDuK}R=*i+TFLxQ<hcpy$aIG-`4N44yX=r7hV6diQsu<v*^VC9&thQLMMfEalOU
z=uc?*-gcA}*@k9(3WvdGbKv8F(Xc-BAv7<4hMsj5q0Jp>aM@ZD`1m@+%A>*f?*zzN
zG7Me}nFik1_n~l&fyg~7nX}vQg=4`V(YVGs(Z0?AZp$6CF^+-V`y+@;#bE5(hw|jd
zp@Z+XBBP0GK)w1gx*{7RdN3c*`{EqbF52$*J+E>XrtLxcV-BOyGb*_Wiv3~nPI0dl
zbeU@#s)X)p2ZF8dD%5m)2&!7gP{kB=bbnbYDoF8x#_FMv|J@WEJ+5;R#f?a*!3@=D
z42FBtywN8)Inj;^L)Lbg=*bsPFw+Hi=az?Nif7gR%16+~W7Q~i+b{?)-vCcVllHE-
zAERo2QEE{r(%*gn{=OIOi=us~|J*}J+2jSPHF%89jns$KxM;E7`Gm7^ePp}tUNbj3
zzZ&v~>5vEgc4K}4CP~YmlAOmL*h$A7a<#*;L(XRWVb2!4b<iMedIjOniQZ)14;wLG
zzZ_>(sF5cVMO$q~JhoWnf(M??g6*-cc*lBMvSapBxc6l^&iBe9rR5LdXv7@6SbhdR
z7`s}G{?H{0l3nnnE&Z@Wha<}rgW$Uaksgc5xH*3TnaP{tlBTP8q;D)fR;qy~30cI?
z#2qI-8jlU<@5NDjLU54PX)L<8<nFF@B)_p2Z?)S`W)#WdEFWWRcwd&3C}rSG_c>zy
zVK6zmcL3=cH33g?QzSxo4p!YJkAu7v;c9Y*m`OhxEBaBGo<0tq?VSf_xo((clZAS@
zt!Vkj0(Aau9vXkV0&UT;hSY=&$nZ%#<oRTy_#bs>uvi;hZlB6&EeJ*ZCq6{S@}F?`
zj4z=Hb{v$RxXnFV<buje3So_xxIahfC?i&Mhj;BlN>lW}Yi1vMwS%DqnLsFVyTN(c
zxWOjtWK@UO03GjzT>Y;jx3xLQ?`s*#cVSSgs0Dp~tC07q_h^<z82X`dA31G{g!^Ub
z;ILsB$eB05<J1QzsmUINFGS$>aWDEgvrw#cSwZtuO9*^ijI109=!PCf2?qxt_I(65
zHsU9@O+0_4$lK#78PB0vw3D}|jwE(2sh~Tp9l3|x1+{ZpxW3^R3`*MrSi1~HP27iH
zWMzR$k18IwaW-DuV~0-u^uW`q-$0AIBlI^dhnwDU7^;oHC_4?ZKi|QsLL$tPG=Xz;
z0vvgr09OVj;GCVJt9oQ7{LnjyXQ^kQej9e<c~f)Xz~u(~aQtd$^sB<XqTBi_APo8r
z^x}KzH{e>*d>D6J8OwB>gcUjZI5;~2?~c5HKMtIZtAdnBZ)iIFa`J;ow|wy)n?_it
zp9S02J7ASC7d%?*?d&<@h-<f8fTefl!iV7P2y~m#MEeXV`H>CtWb<J6LRa{k<Bq?J
zrrsXAOgNi88`MoQQ2Pl-IK{-X&Y);`J7_IvnIH%~AfSYhM0jv{FH-tcirU?_!|+CH
zG@P7AUdcnmD*H7qs(&2vA3hE&YJ$OR><(mbO9J5m+fYtGC(`i`5PkRG=tRzc;5XqA
z@|~#-A!GKUrQ7Xc#^NLtqpA%5MmZxr>&;-^86`evYe3pD3Y6Xqgw2+bkk36rPfd)W
zKsOpT^u9)%+$}(tXMpU1LnyTED%1!4K+B}3k%p-T7>*t<KIbi<EYkt@)n@3M%TQFi
zwUCRJPeo}d4B1SrL2C@oqv)OW$WOBrN)M!<NzD_GN<#*cG5?58Tsez|!$e4ls6@SX
zR$!u}2WM|yMw6D?fKAX`?&DiKxL$1mo_s6{S|ffg4m`5bOX5B{B2=F50cl73P>(%9
z)1SRW;|4gO^l;HY_&gdmb?_)(YaonSh>_gTHk2e+h}b9uF@^qU#Ee9^`^5w)H^#!_
z+XhhRKOOEK@JGteV^ERNFj!V5`pLhwU~AC;xS6C1$-3#V=Rq|(+GL5+lKaD{=xX$;
zrUVHRFEEq}C?@9=?3*<dW=tQ5zJDm^!n?lP&T*(ht|$DFlY$b=3avpsMuQ;D=M#J}
z%s^}Bjz=+Otw<$cC;Bq744IEJ12fS`c_s;mf9)BFcestVX&J!zN4DJjq+;ayTOCxg
z`nY#j#G0(PIb1pOoD19>i5_Zhfxv)Rw7N4LE%Z-7hpc1K<NuVQ|Ic@5{NpU-SK$F-
z^cpwxwHc_EEJqgx&lk@OA?Vp71*pFlg)BM?(2VNcVCx_Un!)ps+Vf8Iu<#Npyeqnm
zqkUoDuN83jlOkkot%9x_N$88D3|aq(fulhUsG>6g`O0ZP{9qNhr7sVon(g6C$u?Bz
zA&(CJGvKza?%+O&^^Y6hH^I-)8rW678fHIOg>6h3oPY8FCUl$v*P-cP^`sPPcOJu$
zcgvtnRtBFCZ9Ato7{YjY>?Tb|`#<i3e`yhTgH9CGIbVW;m5<TgKi!;Z>?ruNpahPe
zABH2Un$Y#03m|!^A-3H6L!Ch!wpT3$yM|Ke=b;bzi{v3^OcE3yZ~&=|ExdawgIC&8
zbocQDT;g8{iR}Zi?`mf#46y~jb9FE|xd%a^6tp&P0(UDt2v{{A&rP(z(y(fzqxu{|
zjGSRuNEJLAk`A9=xj}PQCH9{=6sMjUh!(t>F8)Sd=6DMSNHCfOe>>#yzC-zVyHg8T
z<-ErhwI+CxLM)!Er6Zo7A3@A(d2G0OFzMAX$L*SdIM3G_zx$L1F2S2|T%Rq}q@9E0
z&?>BFdkB_m+hEIxf5<f57`bJxhf2|rIcT>L2Tw4AP>DSriYEZM9|Xgc&f;3nv3PMr
z2e<{L!rF9qNM70nbGM~~t@0kQ3p2+BJ&6dMD)7qx{=lh(W7yvR8o27W!n`jNF*BG1
zDw+fF?u%C;oXY{{?fdcingdwzBn9mg;dq4DkL>!ViU%D&3!3TqV4$W<7DX20mQ@@&
zKISGGdBqS_K0OZuwBEpv2vek!-hq6BDSDe`44KDtkeZnd@{^x|cGw?9e^;iW^p&OP
z&)TP)%M8(zTl|GPYY~qoD6d8j^YppY$U^j}YddlZzQ8H0vP4EncF3#Qjw}1}jSEw*
z677kRsMUEnS6fGr@8n~gsgDEF5o=bd|5>7D!^OzKZvuDe%}iwHaf#azoP#2Mui(yI
z84L$6gmV0yG3espQuO6Q43hito1@4Sv5S|vN%nH+@4-nZStSP@k`c`j$6BO*cRSJ^
zQO@O6%fZ@19thjK;C}xej#^E%(a3SR2%E-<K2Z!etlXBHbo_<w7PN$$)6>UkS9-wv
zxBKCgVh&u6bj3SOSAoi_@i4N!24Yroxa8;%yh3LSE)xGowQA(>Rku%Yd|5eMP#lJ{
zEp^etp9*-g!Zw`lTMqxC6>-R?6j*6$hEzJv!3b4zQ8pNb)#oe%?`eBsHA?`6$$B^|
zHXa8Yr~<D`c6gri6}0kP8$4@k0Dnz$e9FiL{-j^PhV`3~d#ne(9Jdy)sz$hPaw~E?
zo(!HtHo?@ffzY{qFm`<Z6nu;t!Tice+*vQd=}omL>EwI(u(}xPV?|eZ4Z;`1HSX)!
z65Puk!bVFe$|wh<>!gEj!Z6tT@Bj>Ypb1|;SimCPf$&cD3*26;018@X(ZFB|ON8C9
z+-ofiZEJ#?e)2$LVnOxmG`O2P4dTT0_P$mv7cuWJuvJp9s8ohdA1AnCJOm`GUUM_=
z_o3Rm#qi*85~QC!jB2kBfgzUF$mjMJIB=A~ANPUa@i_yg4D?1rLtJ3%>5*X2T?T&o
z3{AdmitE$$(W)KQ(7dP`TKf*b)9tyaXx~p%Tr>rOm)=8lmo~v$(UrP+)D<#A)v#P^
zENnWt33<t9!dgpfICsJc{u~Vf2NyMX7iEZ3yVk+5EPGV{?>F~7{{UB~!qBsRFVXb>
z1bn%Q;3tzm<3m+qoN1mc{_mn9Sgsn3kIDc}IyV}Jy0^o>{)Twh8a1-7=nFg)X5uO0
znD0*eYV>$y07lJvxW^;_ekChoBk4Q5VQw^Z%07XDg?6M(V;>m!8sqyfd%<g)Iu7KU
za7u|f{(9{qOdrsNx2zZp$1OT>XsHsO$`2%oi@!mq(|$Zp*95Ely$#84+{o8oqp(@d
zOZY11f#vp$#Bt40c&*z*eCfv!GQ++Wo8Fp5X!jYAZ*KtG3q`ng$!PpWSp`3R+Jr|g
zt-*fNbn$~5C-BdBiibO%hDMDF1czq8l2<Qb?U{NMvwk1)nY|Mw|C$P)4NTD3{0L;)
zE$(HVd$^$<rHEy!LX(miipwoRWigvj>kNBDr|v*W2TyS+WBzjTt%uQ&Z9BO^2i4K_
zi(cqi;!y5W70(@}yU-QC7&PzNORiprAlsRbxj9lm7wi&HkrzcPmK;ImziK(NE3;6>
zRR!eMz7Li4WOF_ZOHjv@U0e&Fj?Bh1qTY|kP|>MrXvvXv=vY!YniTt!%U`tzZJb^#
z*8eA<XdMesvIs$Q`VB-=&dPwye*vf{P7_U>X^uG2a-1FX7ag_M0_#o5+@y5i-pqP#
zdmcA)E-)BL<HFEMeS$h~?m$_u#(>S;F7Z}I0heTHgTTffMJ!l|ly~ex_ZPk5bnFiy
z`#A&9*T%7^@?AbEeDv6MN^cCZQOHKgE3R<?qm7aLi5OJh&yp*v`^z01-if+;{zE3`
zy17#$)X}6jz1+9WqPhNjHHzLWkN#hK?-|xa(*+FEJAxDoqGCf(0cFq3?qWef1f+=A
z6^tPSNJt1KK}E5F(hOpWy@CxJiXtS~3-;c7@4ff(&L*JTKF{-B-}n2yy>QFSnc0~$
zr_agmZu>GA_QIQ**L@b?xt}Ss`d2}lzKPK0DY(|ZobuS|15HOeqhkLg(1h%yZphof
zNQEuTn$`qn1g(YERl6V*co0_YhIpo7(1^E)n$~$P1aIsFcc(Uo$o4BidN&SE58ny5
zWwT-YhC@_SQ7CmIu*i3mxr(~j^d(gg7l!toAwS8Kc%!`MfjHvhEYvKy2vt|kLDr#r
z(50BYsN}&U+_7Xa+TKWu);OO>?=GgI6@gz-$c(9Q^4=NL&T%mI4w-`TbC;s}Ew_U8
z<1<v#I2IKjo`;&xZHS}a--0eTcB3}=rf6N+0QC7>f1KMhA4#gVqU{1lWaVju`sQV!
z*!7c9$yrCFdE$#N5BW)TpE3v=znY3(7&&9)?}@BFxgzVw`%sgv<ojqBSE80w8`Ks8
z(L4W0ymD-PeDcnE5Jp}>vi3=6cTgD`=&=!bB{fIk8C7_~C_~)YcqP@y*A2|+`_vQh
zP}JnOHCnnn0e?1#rZ;8su=@Q#dbLc6pC+5qriC3a`Z57++iZ`IOd3T0dM&}`A8%p#
z!ze6}<{+=_dof<R3UyD+KqI<V;Yp5@5#3CLz3>8*cP|G{@Wg1wYCk+{(Fh##)(6SE
zsBkxf{^<GHD74@FBmQMmfcYD5pqJZrpf%4npz6qLs6loG>JfJl^*MMF2@=~P8}l`I
z=ermj5`P1)E$@bB9*o691QcG;b1z!LZ;8!Il(^gSMJO?LHBLNZMF%`jLPHhX@KQ}7
zdU&xT9_>|*ia$L?dq*F`T?M!BYbQ74zb_q)S<Zb^{1$3FE|Toa-ozyr>eDCsOvA=!
z=h2SzJbdShH9f(0AwHnkh$JO*@vIdI^ogaraJ2nFJp4)_9vu1r*-YGxCrV84#+dVH
znwv3wu-z8ap;t>>H2XEOS#u1=Y~P5oE8Vc!=xXvRw^=BuXd@nUZ8}PmE<myamvMZv
zQ8@YngD>@P!Ho4IwDH+Hl;5*4zKYMG%6M};p<y&ShYe_}r$M+N_5<c$$;K@l6j(ZF
zC+>av5jr|G0E=XKxOem+WU<SHZr$!N?kqfl4v#XU`v$edBaT_)lF(gfI3!@3&Y|=x
z#~yUgV@Bj#ZIjXI2lwDWeJ9-f@GUf<@dC8u-~hCuZ4Q$Ba>m_>pW^d=DKri4g=S4^
zhFq&BA&;Kjkl@ODC|o-jMG8Dns$VF&^F)S*w2Ogy_VbXN;czq}d?#%1+X+jD_CedX
zg+Nxn(=gWc8cJH!7}@=t4zdz2bezaCgLfgw2YaFuKQ}_-)iLPy_sKBvv={0*b}uY5
zt46|6o4~-q5X(g|Fw1!m+8(n6m5=g5vB#UEf^83AkcAwDyx0RKWtON%n|O2$JE7B`
zAEDrYX2`^154>?zp>s1B<fM2CgQ|a``EO(>>GK-Id?-NGdCTF=3_oyAD5HuG&w#Fb
z7lHeNL2%<m0{Hfx47awoLT+Bh5DQPJz6PCvmzoYY>ra95I~TxYm;tQXzYXBZ0C+k!
z5WW!G@UgEoHO(s%G~dZS^T|)thpU4jzdR9!^=U+94R?X;O_$(K8$J}fn8KS}S8(5N
z2~NT#aBOfj6xvV_`r`mJv6oWWj<dkFvMrR+)o`cyHq~+oi9Md1MJ2~Bf&RB_(U#pS
zVXd^9I_DArGk^HOhtdWRR$2sJu9a|b<!BJ??2H_$rol|lbyV#4L(q0ZH<-3J35Fgz
z01u`~p@r-*B;G0}<1VFEs#uEGa*eNfP7!4hU=N3z|3p4>jv^BhU*Fj>8`I`O)K*%C
zDhAC)DH*5XrQdF}PsZS%Ka0_n;3a6q;f1K}Pg6A6VgaIjjZkoqDSEr)9a{NR3x~+Q
zeL6+HtJd!qRo-h4_}>;F_eNzX<n>w@*?2CRO&cJo>JDgI?nL)LhM_m3#v@aW2Rd+!
zd~40o0GY1NKxtLQ5Y@v6ZLtu5(X=l}IA9VQ)#5OEEuW6+DdSPc^QB1Oat9ugy`uPI
z1!&jBtI)W6Yy5P>O0<sTadpZ1f{fn;qi5nDP$aQI5$21~rzsTqwsbpO-8LF-zl(%H
zG0w30<qN2=*21^|1<6rNNAAmf(XVZ<QC7kx_<GtE<xjIlcRLP-4<W&5@sZBx+yxsn
zH-h9rOl(5l4X*-qW<zB4J|EnxW8p~2JLvV!8BFBsz*O`JuC3cl#hvU3UYi5Z?lzWa
zh^i$-PjW=dGef{<^<rp2wL?L>X2aq=4rnnO178}PfK4&|;i|VgI=I0dZYN!bz#h#(
zdBFj>N-%n`;{=>C^+p3pEV;3V8QQS67%G-sgP+U2$X?rY7|~<_YT@RFdYx?sH$L`5
zE^YQg^L5+c&Z(R5g4mUJj`EQIt%0b5d_VfM(RS*L$#lvi@irA7N4|BMe438h5=}?7
zGGv<Y7c)<UHuN}q6Wm<ho*pzTfOaYlprd+U!Z7*^ez>|J)4shCJu_cSZ*FviUb^Bm
z`fe3Zm!I^c+pS5#ZJ#uvhlKy4+fSZ>?}&});$G3r@=Ggm#TIvZa?^VBrz~$;bK@F)
zZ=5r2_x>zKV8RT#?};}*{YCHcccOcqX~(qu`W?@X*+!QYlbGm>3pj1?K*q(|lXeO+
zp)JB==+|31&|_~+pdByWq)Y6)m|xSj)999vS#<pnep!4DFWyl~Pkz&#mK<+LPw@Cg
zcg;FY8^*cNL%lWh;5AWnIFZ|>w|1j0-vaUc6`!zV;b(Z!`XD5_cq0EJJ5cBSa_AoH
z0L87g!^T@HAg0xNuo-m-nzcI$<gGraiE0^KYxD>%y~b$62cSBnT>!UUJK#moSE_7B
z4(twXjt-3??|Ay23_FL7LB(&o!O2KDv`j95PtW>5Y~NNWd;dBZ*gX{{>}-T+W(oDK
z142bVRzP!AFzOuI9}aE`K<7L?p?=TRkea#-{pxfRqSy6;;0{~iBR&L2>XpOpnPkt|
zI1yoi7wR^1I!d@V0h+HSaoelmX!7}e@U6`>@JbzmPB8D`@f$6idD|5&8PXT_nxCag
zPuf#feT?C|Rt$ZOOwhAY^-#%lL-gt6di4F|Z18x!0Pd7;guww>(7^9FpwYf)XLk`;
zT_xXaoLU0;-_}6Z)yAl*!!jy)-6G&GvPJ8nU0~Pi#b8i%8=45psOttH@H0FCohe%j
zpJeSIyIB!f*B=S0P5a^dO$+1}x*Xc!dgy5ICrbS8Eevlq2qyUZqP!O#FyxjQ3RvO|
z3um8(39I9fz~U<$o;ePNnU})E^lYH*^Pp`@Gt|-C7tzm(p?Jtl^izHvS}-lZGs+bW
zE?EK>+~>g{l7Hy<@B;+#+o84ZJkZjE;V|hd`8N7TJ1U^R8T9Q|0FHh~VbS*^(0SA)
z*s?Yg*={5<yrcto_gw&8*RFwS=@k(D@IGw2;12<BjnO6EVmPQQgRHFYaA?5*s@<K_
z@Vc7hmZdg=y!2VHc-aLAD_=p$n)L;lMIR(>T>!iV9&jXcER-83LtN=SX!GbCSnd_W
z=uKv5=!MPHpp&=Z!kSokzo#R5^s5<I&G1JyDKDsA<4WOktHH>c$nk5q3v6TZ;r+)<
z*i$Km?p_B$u+AQx3SR|B8%;pxiLL9M<_O!D*dkZ=S@7bK8a7%nsB!8UIDWA{%JVWr
zU-P^mp}}0L=};4Df9Nf0T}(EZXT*|y+fqm!D}lJT6e@q&5Gr<3(EZ^em?AWSh7rSH
zD)EJScO&+Cr!A0us6YJjSpgxV3gO&TUn=jaBRG-#&4~@CQ$G@&U{&Qv;Kfa%?oR(s
zwR^iBvNm|Z^7*$Zqwn3}Wor{qDx;wMel|EiXan2(=fL31fs}oY0<Mpzpc~m+$d+!W
zUZQNc(sv5gdr3T4kX+km>S@r(pgtTPp9RYV1<+e+3c2rB14g^yQXdiQ5t^a>#|OeS
z|54DGeCyb)i3-w5uHo<(b3lNjp!sJFxbC|P9ed8CCcC$$p7hZA(teAm$lj(f)7K95
zCAuQfyaL`{*o&UzUx1J%cj5j0PUy?+9dJhb0va}6g*KX92f=~nXs53svP*jdSJ6vY
zG5H;}lXnI-i~6CCmpee0-s#}fV>;}<kwST1?FkuCN1<wFD<u2k0^K%ThOVLRaQK@N
zeibxEeV>;@qvw}l$KIZB@`D-{eVGgO?i_{-9qPf5`A%qZ@_uTMM1an*x8URV5afDD
z4RdB40b3F)%T5S@VEhRTIvJy+QN^$!ZYt4>S~wWI6wW!2*zt--(Bnojl<<nd5M6>!
zZx5o8w~A2fxYg8#hO?-kX91Mw*%Yu&3Ij1*C;Ocy$nmo&+IE?I$M<M5$ZjUX^>qs&
z_~#~=Jbwq2Jq|)$Pl>^2aTiod7X!0x1<djBM%HeVshf+}K-kj;XrtN#I($0;jc475
z;0=qY#%KJ&-7N++%3cTScTR%Q>|99PLiSL*7$Bc!?a}+0YoXQJ5abYbl%hL2pvPnL
zz}2=Z>g(zQ0Vmy&qIG9@-~B3_N}GeGyV@Z0>wRJTon7G4KN^NSSq9%s15umy{wU<a
zd+6PCD{8y=D;y)=n!gg>7=2nh1BUb`zR<9*kp1#L@Nc-I@N1UnRdf){<Q=B&q?=KD
zi=R^mLyBOnSO$oWgTU0zaH4N7WVgka_-d}u@W23=*|0M-9GM9L4Zp&`ZCQ|aC<l~m
zCPWPNhWRANC*N@pb#ajvtRGwir}lOL?;YXTfaVZ%tReL)<{`DVdNwFJ&4A*<jnu<$
zd9cyoE;VObAdImuhez(-;9ofj9Pk5b>y7r{xA_va?1CBsXC9}feEAIiX2YmK#rI&4
zb0!poZG{-W_OLX-5L#pwf=yoo$PSB#`Y#j^+|m=)cv_+4h4FA{+YG9}wH&%N3k07B
zMqt``K70%Dh5nYo$jXZR9_3e0%H7e2>MgzRyYqrAG@nvVb-daihLw(mtg;Na*rXL&
zIqW(${reT_bVnzc{@fE@_3?o%W%-a+Nc`{J3t&fkL)h1(C%iA5440E1`Z5DsLpa?V
zuI$UC9%eOyil?Jt*~5-h2--`<JM4npXI@Z8W@{<O2}U4W{e^lPKMJr=2K%!c!AK&b
zZh5;?p7+%d5OIY%xuz8QEml!6x79G;yM#J4)e|lsG)4uP0$BfKGqtRrJA6-iM0E&o
zg3g_D;oyxua9i4qlv$zsVclWKn&VXG0j1FFyf?%&^a97KC2&R150*df53R`WX^zd=
zMs15n)H-dxZ_DWU6rx{I8`j;2rkf4X+p?=9_xLJ;-qYcRy#_oFS)kt5NpN`TLTLD`
zKKht81a4AcXwNehRCM13S?gawvj=GueV@q5lTV>idYpQ-tUD}{o1rN|J1FUh7SQUs
zAL`L94Zb}d4%_DD!ug{Jn8ugk!8#S(zaImx?=2CNv4rZWehaJFP@vl)lz*Z*ShV;K
z%Tn9H(ZoXNWi<qq`8^@uJ{Q0g^1FxFvoD~4Hw9)iG(lTNGiXl68ECj&iH?7o2G7&W
zpm&?eXszu^_&AS+vMwJXJ77PI=xvHdJPSoVJUyVJwjSIpGN+=tH36@&1Hg5xBW~)x
z4=;RgfL*N3>9`aJoO5vsKH2^<YMDC&l`Pqh6ip53;m7LXu_xr1H{Sq9?Z`vDUWVdL
zLVwh*lL#4Z%Em`W1fjzZUZecCeb7(K{_u5sH}r7V9rA7GW_aLdI~3v2313$=LXRFt
zpqp=HICf)8yb`ZP*Ssg77du3#JfIv!ot=$-CX7cL3r?doZ>~eDpjmkQ=?BQV-*W6N
zI)i+A+Th&H9q{Ow&S<mMPjs=`6hF0Fgo573Vu^PSzUdZ-dPb*UN3ZE<sLfimJ+2%*
zn!gwoHY>qLMy$s3KW~K=y?oKIBi8V`c^S$SZ9-r2NY2*6Xw=^GEZUryh1U=I0zzIE
zyj_D)NSr6~bL~Suh!BFxrkkUUkIe7{^9D#dv?n@Xp+O@Wy`$C)NJ7#1gOIX#CX{}6
zM=zN($bB~tuJ*qNQq&$Fq@IDjg&SH?&j|Hj`U|qtD$$MBk!V=TJLtdXGx8sgVXNpk
zy86%rz07Zn7v+6|!U0>+_aBW=R7rg_+Y{q?<`JmhuNRPME<*dKbwr_7gVCKWm(i19
zR(RYMvM>FrKOWn#DZ1PL6~qlJMeW{pKx@9)p!(VhloqoI-6Qe$Vf>}2jk^cZc3uhd
zJ87x2fy3am!z(!Twl)3|Kz=)5Zi~AeZc5*Yi@?>=*?8dodN{`32+wNlfrndnqRYMg
z@%(nv@fgRx*zV32wEV_2yje{?$TIdm`cZis+b%qg+{b$2lG&k1;hX^@weyk1pk`QE
zGy_M<$v2`KrsLQJIp~yc7INiZ#k_=`cmy`VCp~r`%UhX9GWtC_b$u<`@@X?tz74?N
z+B|}*kMH2aE1#nDo_q27Bn#Ysg9$F0I212yJqFFc-V6t~w!zWk7Nfl7yYPlzTk-x8
zp=d{J7M^!-FIr6U`Xb)FL9II<LpMX+==l%#<G!YAp!`=8)MigVphfS{;K|ESW%v>3
z*xe0%jIl$~2m8^95|S6!P7B={T!J<Y$p-~Rai|ZMd-%%*n&3&O_=7uoV|55}!qZVJ
z$pmV3{!i%hqZ++9av8Qx^*~bwzJlP&QcAtt7EQi64LNq`i&pMA1x9A4;q1a@$iFxm
z9gQhRPGsL<MOq$-36fmxu7L8Q-@<cQCW^mS3d=8(+`peja3?MmbuQnDj*pv+4BKXa
zc~eW|(taaMy`lw+PGWSWBnoA9`~=-^nd0v~a?pdG{?K+sFv>pv5bAZ|A$y0Z$m`lS
z_^|Fgx)j}&`0WT@-@i!NpBzrfJ+h$UZaHkps6aP+%|W90J?Ir4i@OXkK?T8^;bx=e
z=vuS4P%Z3&a+;SQ(?PdkV5BkrF}wwuv$h$!vxD%>=NFXAk2`RM9*Ay~g}~!c{m?y<
zSLs_F3Xdi&gYG2%xM<~T5O%x|w>#EH5oRM$c(bX<q-_Cm;k%>kNo4=+v=r7pJ%$V<
zcL6v1hDufYp>xYN==yPc)KAd>wOQI4zfliBPd?b9-hy6eVXF$*mKTM_`kY2_CU$u9
zXdxPqH4qn{bw}TJW<itH`6$lV9ql>kiSkxTk<(2QQ!MF+=I6{q5uGU1BxfY7l&z*>
zBw4V%#1AdXnTw)7W}^HtT6EzN`6TjV!skuTf&Yg%)NsKDl5;r^P43hOeV%WJ?i83~
zzd;V@>GZKEx|J6BFS|nZGTViQy_=05O*{jAmNSTYeH(30&w>MQe2{dK4|eeGh<F(c
z$|1jDxL;t0PPYAsHs6m&{YSP$5fhy8yZx?kKhp#kJ2;@uh8=PDmuKNxS~ym<a6?Cz
z+M{h91^828E_&1BB21L!qCV;N$i;6eQrf*iSED_#>y}k0XzM7v<7PM1{`oeLBwk0k
zvBBtO%cf}k?0gi}ZWY?(xE?KOuR%Y@HbviuZHH-{4B&3-2>9^O4Xt&yL&JyUpa#vI
zkub{|hgMaA(T@|b()~FIUE83gu1@H|jG^eNs|gw`NkG-iCUB@UL-e8%$h@kQ8XtTJ
z9c(-hU2L-rzM+L6&h3PX*K4Rya}wt$U5qXmp8!Pk`M~IVAgmVz1~Ex!k-!kyL{-5i
zX9f+Q=>k(50}Adq8B{?V(HY`<l23D?2gn1gitNzo3xz01?SZaeng=gQelvZMMX{L|
z!P@dVm_6x@5?i%I7pE>k+FotZMA0x<C7X==l~*8(<SxL(d8p#_F>vafj#TSNu2E%E
zc<I4|MK;;g{W+uISLY!x$<YwK;ya*_5$0%t;srWfJPC&KmO_Kjix8li1V8$h!qb#r
zaB;?H@H^BO{UmhLtne_zm460%lgX5&Q2~tUaU9;PJWC}zkB1w1Ezr!($EiE&Xc+%N
zjCwc=;jL5(RALSs-_-?XyL<!5!eSWoAQOVFwMU1(Or;D<8=}MW`@k);DD=G5P*6So
z3?~gmaPZYsfXqzP?%q>k3n#;(GiK<zzZPDQ-{17xL8Be(hNC${U%)q#FPv1k1fDr9
zhcEAkqNB09;CX{yAXt0=N^iV^_Eu+L?s5txCP`uFS$nuhx1)A%{X!j;P62=SeXzW-
zEo#{7Gq@jKj!uV<hO;Z=@L}g#xR^<Dz($lq@PSZta(E0J*v+7CmRq4!+z~jo!V}G1
zFQ!~1t06#RjLscw0uJXcL16P+P?ocoQh74)%6|l!Q8f=v$CBS*v@C{KgZqQp;R!hI
z^+ioa>;*}YFN%rKP!oqVK#yeEuz8UuitgSCc%xdQ{<DJMP|j88e`-29+}QzzT^SC2
zx9)+r38P`0=~8f;(i62IzR=OT_n`6U)yU)ia}ZT0!@~tMiXr<{Cb!nYhVd@wYt}7z
zezO6QIrUJ~{&3i8Qbm0kZcBL#BYRS}ir{!}4IUziz@0yR#OrsdX+KAITuNTW^r$U`
z`JJcYc==>p|6?WU(cuUB)%_DTa%zYjeih+yCm-S_c^Zf~N#gP@6L3Y3P3YwYD?DR`
z5#8u1dF_AOf%v>{1ikhC9dvhO2i)qIFFx8X7cZV|MQ0Qn<8+7f$fW!hHd@seRlT*M
zy}xz9>hg~CPi%)1L`L+9im&L>)b@C8jEr7!t3Q4}*Mj7d#p8C*hZ6f|GX4;@35%L|
z(0c`k@D^1+de<^zJUgA_xfb8SY178z?}MZ9_vdkRf%P7I(Ap2DO?rc6Gkx&K#%odY
z%=_>>U_RPcx)rJCj0`r={Y!EQ>mJrUTb<NQsg72w8vp$cfjALg;nO3YFI6WwddxL2
z)VI3yc_}NiIjK$6erB^Ii!}Qt-_so9MYA7l&DfSj4>gL1E3~0@uh>m?TbX`_X>@SL
zV7ArmLE6Dxz1ZF5-Ppp}d0O9hW!hEqe`;0>w$iIFI<gK=S*_Kawc4xCS84O#lxpy-
zKH8C|A20_UmueqBT|_<+wuU`9GKHNSz^i!Q)Qr7SmZdF>-l-KF9>MP0I$hIbbCuRL
zg{ts&v()UI6u{oU6~%TUuYb$mF;~-IUxKF5lKafuRSUIypF1<rDt~S36*Dz!zWvgM
znio`t^*p1Q@bH>8lfP2qRk}%g#hZMlbL<}N`Ix>M&*xV(+j(C!(wCbxyxd##kKvaX
z-K!U-)*`LR|3q5tYZq$JmbBlRexPxsn?7}>=cSj^pHzjo{mDhRP0M|F_l;xt?N72?
zn+MRP?<eDvm0js`SGM5q;hXUBi7n|JW$Tcq*;U+H)0AGI=z@ilPT}|0pJCIoWyp%3
zh>sV>(;r?O#bK4<cw!&&6M@H>c%u1tyf3yreR}66Tz!wG-!FZO#!R-PXEG{$ttgDX
zJEI4_yCjsR_qV}aZ`{N9a5hc%vZKcqj3HZo+pu|)8F+1@1^A#nd0SgZXS#Qbqd4p5
zTDpCpHC_F|7XO^)Mqj_S276u6;K*`o8c+Oz54Gt<I~KaoCTtX*%_r*eh9@!}X^sa@
zT#f&)Xrp|Rh4K`kqz<*2>8b5?rwiKN39)|Bchj^sqlUAw>W)<jJ#$#!Wb%U1ejnMV
zpUB%<&!to{jVd(<PMB5Ajcru5$|&2<=&DfLBm(@z(txTCJv(ZT?OLbpx6{ur^3+#t
z+rHWCybWPh^9$Vkj4KW>JI=ge7ajhr-5EUB@6fT8RmYe8pfgvj(l#33#&6iZ$MmPR
zSqv|(m1b=BJ=*AA&zO>#J=lAKOQ@NZk@mtHDXS5l)%p%NuCYDPOM7YJ^vc(L7igYe
ze$94MZ_reA+fj915bbx&z>KwDW2G&c|AA>W!@_UY^Y&G?RvY|Q#RoG_j%RDP?KG$=
zi)WZmk#cPpA1ym}$2WZG-Fj_#Gizq1uNOO#Z>-g{?c&#_Ux+rewaV{E<*uqR%FN0R
z4Te^2x?JW5jc+j9Z%o#1UC_L$+C$3jSl6D3j^3v&9W<Q@?6yY}{<MK#<L^n@`9($S
zvvnKU;dWEC^DfYStp-l2a`V5d$@y_Y8&ed`_RB2MSdAa3Jr^^L^&B3{R?cTD)rNBI
z<;F|sM~m-ihL=5H_cFIMCi%0gK3{0z7y8gnGjUX5<*VE;n!!&x`4!ogRk`$O?bkGE
zGgI<)fws^rkUjM^&@U(dnHIK;U`vXv*oLzrwE5B)w(y9S>37hcA*!Bv<-bxJu(*g#
zu5`qqZEdxCd#tP+9X3#V$#DoX-Kmwfb;d#VMZW=>cjp4C6yv6_3Ga)U(4Qfiu?Gs+
z)H%UyeA+r~PH_pl^}sakw(8k_Yxuk>{H(Nsay721A};{d>~Ur+zWQi;pF6`2vTDsf
zr}wgE<!34vL@2bf3-{Q;fwooWT;5cSl&Y(O3ZMA3%Ku(DanmVn)r^bU5~D!!8ToSU
zz45L5c3aNXtcX6sUUasvGGr-DaCI3w;d~x5n|iBZXJ29S=6F|CuXWS($edi2@!-1O
zjjJ;D?M0RL<tahc<<GHfL5#X;S+C807GqvAvP(BL)e^g^;L!DKuZMe?v60u=t)047
z?Uk<fOProrWj{4XbHB1*mHqIYe%sR4RkZo=s&ZBTpsHSKS9bZMSz4D>lUT3$Us34t
z`)tXRNUhhQLE4m?$Fy%>*VhgWb!QLCE^9^<ePur!FR$=feupjiv6x+P%(qH$PU+X9
zoutZ#zgE+&?OJWqrdI5=F>jcCytA5HvCo(n&!#fp+Q~IdW))~Vu6&}=JSo#m2y*v3
z?f*!#*ZDo0bG}-8B5b<0wA9njWmblEg@ahzRa#ON5q`oi<{6JYSX!pdclfFe=~Kea
zr=PP4-s7}^v8$LJPqs7ToyxS+9(!v#3no-;oxZ})Zed?`{_U@t?)}ZGE_7?jrniz-
zxpzwU<Ih=Lx%&Gr?U<iqSnqksZ0i{Zv|8sbeuWdgs;=zZ;<sjEZq=3dp%wL4e$n2&
za+>v8>dH7*f7P6PLa`lI^{8zA>JW>2PSLE`x<S)#j*H*eYXMajGdE}?yBcWoJMU$0
zJXUL4d}{6Y@`z>CpkzNyMeK6+S$uo8S*MqpY3qIbJfGxN^;of(4!LEiZB{N~d$r49
ziflKtIksoCm(T9gn1AMLmlmyN2O3Y&hGkg#MWh>6O^*G-JTa)QnsL6wui1bgcIm(^
z+UBde(}{ztSf9dh)<eBi>v`@{#b&$M%Elv#*&*}%wXp*fY}8h=W^$HcPg`o0j*fm)
z<5z3vt=_~8v(akzjB~1VHosdbvwp%J3vtk%Fgu}Lx}||%+=cJ>klPlz6|K?u9yiju
zs6v>yF&(unI=s@Dp4`GHrCXTii?W${;UBe=8ujqIKDAuy<+`4oJY%)?(SRAu;k(&d
zr{FV8&Q_XP{>4MvZptclSYmrk^391=V*+RUopiOX@|`=}FE$~rYGLka_QT%m+DA(*
z{cd<q(sci9>gNR3RUfA8u1vVrl)3Mk$c~j&Xq+ue*#qRIj=g98s4O-N*Q~jlLm#Rx
zp&Kj<)MRKxZ0>{h+OVkE%&Sig*lBOK)2oKA*EBn9uC)reP-!)P3A2920!@U?OvY`{
zGEK$o2TW$vHI4p7M`pi`l)qkd^bf^Rt8r^o$GF%mCTyvc*)l4GSvq(cle}salY1hF
z9&!CBetXiCz7akFpZ~fZ7a4Y;yX@{sce~J;?Y14U#eL2&;$GIQy7>o&cWOC}cbl^P
zCgd{jXY!e6T_!OGQG1zPjyLJ>Z7u!m%09E7TYO@h-Lvr99P*yMld%*%erib9UpkfU
zxhoTkT=w8K-+XB4nn1c+*(7|ag%bN8Z-kdmi^a!srs9WT{&-730X88o7Z`eL3HCm4
z8h5^X3<rI>i3A37nTn#$j9~K?Oso?6)`o26=Cvf|<&W?5#Q~EU(e-QC?@>NI(=vn^
zb}5f})pjxt;`PUS)&(QO+aAO)Ex>D?cj3@`)_%pl&sejJt8A+oFWH56FSF}H(^1~Y
zmdu2ClbBemGW@hjIQ^;3D8}Hz0_Mb-rgRT+A>HmnV|w*sKK(s(03AMf5^m_(iS2u^
z6}#n2W43f$Q+C5H3l=gPqYlTP;oZ-U;x2olap3Wz*m>P2T&!tM4~tF1Q|=tZB`0jK
z+BXpQ895y{AKwU1s&3BgvA;~Gv=pGNZH{95Y2<r$!|OBonRA#c)$YuQ_f<H$UMk&p
z(?n*;lrhZD@fP&;7JIR1Cyl-1C*YKfeK@3QCt7#s9^ET_EuD7h1$}+`M|#Y(9dy_0
zgOMm~5}vK9#3uryaoOlSxb3uJ{BZIUWYzo(owO>DZaZ&0PAfW$<7N5u&z@=6{&`n+
zQ4(Tp^DWtAV@LK)D<if`j|wDObd1iwkVv1I-hfU^=uP`ITSG4>pF>|7AZ835dNP-c
zr!sMqCoy9?jA1t8^Jx0dCFGsoubAlia(Y_-4$QGZChW3d#OOWOk&eB!j?M_+V^vOX
zY<ZxNZnUv4ogu~S5Z{-~haq<CBBdGI_OcBde&RjaUlGH=%m<9$MW73v%jl6#)r{Y$
zkIc=8X-vCLH<`!F!tj@~{pe=j<}xL>wlk4EF0cc;EM&`itzZKVR<hgU=dv#zyhW!w
zyrE}ZUBV2xMSdu&zC=eY$z+xr>|@%lF=2enuhR*+^_c^%=FIrj=1gFq7I`K)&_$9N
zc>nyCcu`O+F79tcUmTf;Z*-W?PEZbI*KEpVZFWy*hqB|?y<-FLsr`4c>Em3y>~tZb
z2u=H#&BY5JWTKEWi?D2`GhOC4kbJjbG4AJVKs&A5h)WE^82JEqhVJFT7|NrVvcOoz
zJERZ2Jfe)*zVQM12HZw^W1%hc=&BLB{8&BK;Mxdga|<OiF}^=DKesp2`*~X?X6p(x
zd^={cLzXaG__NRvlVjMhz?ZpP(3#2m(1rPF9>iqL?Lud7H)FcD9L0R|*~l38>dCrK
zXu+Bf>&YHjJ(Mla@na*hjL?HA-RbW=HsH*9uK396P%OCClfLfu8vmL)fW7^r4?Fgm
zJKOhtF#D`t8@9v>q0Pra*pOy7m`T>=tYOo7?9p~E>;V@)RCGO#9or&~Jux<vJ>s9l
zmQM|2PYiwlvchV{-64-TzPN<J>sB!M)fPq?WsbW99Af&sD`wonM=^8WZD7Jr7cgr(
zHK5<%)y!L%#gy1gppz3W(LX#Znb|QF%*evNbVS<_y32s;m>q0PGl|}G)VA|zMW=oA
z>`!~>W|B^H`^!UU>)rF|w{H*O9z}QY2X7bpL-#VQiZ!DbI2h2{kfFF&ee!*SWzjTG
z9EnQ{X5e-YgXmeSFW^-R-RJ{rqwtem2e9j4DK>a=4{O+GXh7r6%$s&in5>{`+CuEb
zT)Jq<NS==)@A;O}8!kK3ieWeL+s%%&7rzU=r{Wzx6*+*ld(eWlopPJW`qYYT*Qzbs
z*PmwQr&iOw1-8st?GbuT-><ZE{VuxUk9nvtuQNULQ50SMi=oE^XVbfjhSCG7gh-ro
z4JY?GiSJz7j~7<n#38dT;$E&7(Pfw3cunLYtey526-yf7Ru@L%8H@L$3Fl7WtQ&oB
zbBP#_U9bl8WgD?m_c7>i)$x~qOZoiMzip&<I1GNOdsi9RPV)<0x=m~85$?B9*0$>1
z$n`o+WcI)OzB--NXrbEfWYl41z^v`ekB$_xXS;$~`d}(EZsJaw8{Plx4*t_0s&OMP
zoE*6A>G8x0`6-IZ?{NVoGuLqSm8aY4uaB)bdu%5xJ1#CiR$WywP`<M~rr<c<2{S7S
zk2kLH60F8kUv8;5rcOumKX)n*yGA11!>1y<>zEq8DHLrAh$k82vvKu-;EIa*kvJi%
zVdc|fabSO@0$*Qns6wF4FK3(F#GkkCuDIuT8D*LsLq<(2FyH+%@*MFBxjMZ?TZV3^
z=$-F`FS(g)EAX|bJpQ^IAF)@LFA{gg!i`h$5}bqs(K_5?YzbODd`QKbPpvDQ4<4`R
z^Y!7juPtr1m9H;HZ7()OMFWE?{GDDQJNH{?EXh)dNb6N$+G$VuL{-%`{h}KFFMHr0
zh^w^+YEXCf$`!BVm$$rDJ<9ZMX=UYoV%ZpP+uUrgZ*6nEHdt-$&>D~Mri5kQ)%j;S
zkaSA#A6J%npP+2L*?z~oCXO%mI`gQRS489`uk~#@dattC+TmTtAzq`e4fb-s+|{f8
zJ`?X}mp^#<Tzb)A@vJ}}$HMyF56owKU2J6GW4mIfxAU*=-Yr&r_I_|~k=J2ufY-4T
zX5RdQoDQp>WO+@O4fLvtw(uF}@9MR3;81U`rzgF&Z4A5{bSd;o^j+mO#yHn&{E9_h
z%0oT9wj4U`_1U4?o4n1z>-N=DZ-)`C-u2Hf^vX_M(80W(h4;%T<jt9R^SmYP26|07
z(6)o0bw{rSYfp73C^hi<8+Cv21N_AgCN=zU=I{}UYijtvXunRw8jROAZN0pY_dlAZ
zsT_F1F{u)noUatgQg{j_Unx|NSF7v=X>z$RMafH+@B}hpLV`Nj?Z^V|-@-J7$|hAN
z5ed?Hid3OMLBNxYRSo6*bZ@ytB8v0YSJs@=Dyt|Ra6ba8o~`zrOyE6!_aKn)6$-V=
z{ZDJYBvB?8DU+lMZxYJk{_~UxJbj;Pl}%JspZ>i%>ANJ|sp`w(6hir^n&GGN1wyr|
zUShh`Tfto`yyIkJ)GFfua;$xJQiG(1Dx(BY=jG;*Fq=o1aZ;1W$jIKm$Uog7%s{D*
zQ`yGJ(o*7u@w_;pL^fI%akDVeFM;8||Ng}Q!GM}0q_|Fh2uWp_q5e-&3NsBe33JJ*
zjZpq3EPkA9l<*&fRm0)`p8g-lfLe@+j{J*m%y^0<eyUJiTdXp!l}m0G|BvJA-|7F0
zfqyaZF9!a_z`q#y7X$wj1DxykPg~R^NtBqRGeQ5@5R)bv82tPHUmW~P1OL*%zclbK
z4g5<3|I)x88u(8e!h(b$cyeK?oWxl>dUsT-EIFg1wnUVYKtjO0lvJLapAs*Vl87xA
zolR4UBnoeZP#7;tNmQ%aMC&Re)1+}id9W}=NaE2lxi>Gd_L3(c_guW!G%7F{gL*27
z+;VYb+c=T(57h7KBnwiRWRYJ#s*U}#JLmhI$*AbK#D3<quLde(QHnsFZ~sjjGt&KJ
zfl8G(dhx)_*(PZ!t0caHmnKS4Qoiai1C<4LDNGc~$xT{ZTAa#;FVM%x^+9IGIzi@m
zkwPFBlE}HrqG#=8ZMa!wn4%6dQ<;*`x?D;6689jX9Z!7_TEzT`9sai<w74&UjU>-D
z6Ub5$M2TEv-c%}!7fN)KcNC;4lrkw#S3<({H6R{j><S@IqEJfJd8+!610r}Kp*?#B
zMe@RX3<-+vrw%h#IrIx18uWK@ca=2<+$%J^zYeUi%8t7a>=QY#PE*Ukpzv@W*IpfA
zV5l<b=^Guc&P!9-)Ey03^N4g3tB=qPHcgQhr%>7IgZOm^>Lwhcl=I139+XLB_d%F4
zN+g%1D0m4nIWJx=8by##X8eBv8ZDEPL`4qBA|*}AOH55usKX3Zj@*ra-0)I`a$XHk
zZ8-<@k8%S-p9%bAp}u*Zs!7d6<U)lkO-?ou>Ozx8IiyO+)`FDV9Z`_c6Cz1ht4yML
z1cs?qwn9E386GJXaD<(wa?lq@`C|x2aK%D0LMN3S+0#%66$)<Cg)dL!Hc>oOu7u(U
zHe{*1RKA=qkq9Lsg_NgEk_-9qq?LsZfb{;q0j$a7NHs^NT$hAqB^-Aq%4MUKN$OmM
z%1I(i<i!g|i3CF4?`e=>+SHT@$fk#qYiS%9HGrU!w9t2<kR>SVw8%|UIZI>$zJ%MH
z`FkQ_6&Jm<5Etl9X)0r~uxeEU677}?WU^$kry@<0C`CMiD)KOu6{)UE6jsuBJqDzD
zv{*~{>9RB>FNH599A+bs^QEbz54nt!CJu7pXkB2Gpj(Hwnnq4h;XML_B7!3O^Fkwe
zWYdTj9W+Qs`8`AXh47+61H%SHab-QCd+HgY54r!V*i_X(2Tcx9JaXvYBQh{Z-;sgJ
zp6m4Q!utKVZvHAwBf^E7Be$wG!!u7Jn_OdMDe8zBhAL~RgfFdI>!1@BJOV-xq}$q3
zaBDQ0PlTOrIfU`#-d49*gdP%Qe2GFG%i)GwK2j1dm!)zA^=b=JMS|osT~U1nK_d|v
zI$5S~nM(%fP-jW%4#k<^moJb1jp76%1&JgM_J6H+GgS@#0;ru>@WfgOb50tiCW*+9
zt>Xkdg&;{7pC%EK@p+KHFoQ4yAvynsC%3{hl{Hy$G7){qJ5psQ97SlY4q=Fw5aLP`
zdvF3$%;SzKYYxXsz9N~IkfM%MS&{wAc+v$&d<1y{iHPVR-DDJ`wtk9CF6B!^V}(4u
zTvw=bYk+F+^Hfemu=C?2Laq-U8MYu<&(zkOe9?;)!k6}Xc|&0SAt{Kwu_w4rCCvpg
z!pu6nkyrEm-Y4azl%`u<@81AKCyI1U2<t2BwsY*KD-S2dfxmkr`p~B45qw2D(Q8E9
z3sZC>CfXoH_ehR%8p=r%j)fBB{6wALk_b~0iA3P2&nk|u6eQ`C2vN>Td0L8q$TXfX
zRiq%on{3V!79->$teG%}3$D*bHKiPC^gZavlh8L&o~_8#MDbiVHCW>D6S(Chqaq?R
zO{Yi+z8i=V$uiUe5g{y+q!FpB8?lgRRIUTAkZ^k~w)0fY2?VE0Yio#Zsns0Z)VS83
zI{8nYB@_@tW7K~<fTxs6h>4LxHjBgJ#p`s`W22LBwbYxZs+S_;#miC%c_>Nu9KY31
z<*XbTk>CoVpU4fdY%Jo1!c?h<)T<oo?nEbuBxJtoBvX|mw=hXWt#PELFXl+iSs;^=
zE))u0oo2e3*DRnip%dAf7YcYf1tLi&18AaOYw~QNoJd_F85BIBluxiqh7zeVP2@|Z
ze8QTqN}a~u2`*I`NjOH0ii<e=X6<*CJ<%<ql$y0tkkJtJ?JJg&$#c5Lg~xGeyfA@J
zL^Bb!I<)Bw3vN)2d9|GI4;=7QIi_~`U0GuxB}n*6r7%U$E4B%O6rqxrB#Mt05~V~q
zomdUzPpGoj*Km#W`}ahPD)}i%gs2IfjQWi%!u`7Ye*nsbk~D(zR2gBDzn`FTu2Wp6
zmG~8ztjZ=qE+p0w(Sr&lx2$$#p%X+YM7&5uDMG%SD;h=6o5UxSNmP$ePB0)INtzP*
z#p!W{a3qiL52wcKaYhzTAe2aWH5fG{DwZqL$vnAmRE=~Yo&Hs>7e%Dpnx_<T5}7Y1
za+OGRuB8rtoP;I?&41tUII<?In4%yCl9aGhT8fC!I}tRte_-N8Qgv8&mE)fhu2quW
zgfLV&{3++k$kGvTH<inuVkzNvc{-06?m{9#ITrC%HU5uk9Wik$Ols?lkRV>9(=4M!
zWExUQYV8AD|MVatqb4Rv8ZVLKZ!YOe9o)SpXsN0L#~~tx<Fi^M>O{+b2dEY7MD|z_
zAt&WTgdmylnXzty{Z*EUMDQwz!6GCmwHzyy%Xm^DKSiA@9>cM}Sf=9xl|zl~S3}8!
z>Xn4EY7OnW#7~Nw{I`{^(J^XWDoL8klC#F7gxhphs<}SdMo0$cP&e;}(07e_Z=y^m
zd%VOxSIPv$AyJA1{Wu-MVVdOKh*DCC6#oO=L_baiSuOI%)%{j$9BJ#=qef)u>p5n2
zR3xP(BoMU}M^2oM<zxXtoEgy&qzf`JTm4m?2Q~a%QX{p9#!8dw`B`O5M3`D-o<b}!
z?n<a~s)6RXg@`<%{t>;&8k?vb|EN7OBqAh<svuJMALa3B`g|mHn3bwg9r)UE9pMw<
z`$tDLsRLD`5jn0R<KS`Bl`u(1U#jMHz-n!5q6|2uB2=j}*z4B;`Ge@DvV__sBp#tH
zkyM{>Boz|Jh1llAU~W*S-9O9&1C<+JGMW${$JKnIY3l$I1wh0tw<xw`1^-l#0#8nF
z)XG2}aT98Fc&$dQJHJRh`>2{H6Lq9>S87nht98nV?;#?&T6wD0WX#-Ut%4_utTRqH
zPvpOK*|HX%n^Y~rbsm~RDw8RbbTS}b)rcrTy&%&YK_arUIvf#lYN4ysX&>$h3Z*dh
zZy-W`Ejg$<Yxh6E>JiD!xmY^$Kqlv?wYEghM3E{>PPj{iqllZVG9d<jB=O61@}8_g
znu2&8g#B!E9$$?@C1^3>ViD>vYgL;+>1Mnn5|xk;|BnbnA@RfbDP$cu)uG@t!Xr*2
zh#%|pLX+PrkW<;j<{-!<?iKOvh=$B1cA(y<`y&f-RpwG+5^$!pPEe>C{#ksTn6Jfj
zO|?tyD*Q(J@3pAy;D4c3-@hG)U2aio7OA#X&FT|jsHch=5~!)Rt}7z+9XX`RMO=a&
z5gNn?6Ovh`*8tl|i1y^Th%-&(gsSxo>g$2(8Xo!z;yG}wIenVODH=|xaz<&Ss?mQw
zgZMqEN&MPT|CjnWp|ZBR!GEbHBv;e;Z%QSjkja%C8S97jKdJR^!2eCKe?!{qW%HjU
zu6dNTgh=07cZhf_oW>JyxD^nh(YZ~De2!Qx{$yeL*JNR;92AMfk0w$}Z+z>`kM7M?
zHXNUFb=+ceL4*3W;=UHoocU=>EF6xGb+#i(?(}!Tcb~ZidXWGIi9yIx(`zgAf`yn(
z4gOK_XV}D`BY$BA!L=jPIdeT6`shS%uzz>G`6(2ttaL7z&Iu+DT&vF})s3WXMfcV<
zvW(N%@jR}IKx=EDEeR$FaU}|sq;Lvn<vNWMe;_=Vt1=@_bX>g7{*CeYu&*#iWvW9v
z(M+{$uCmg3x%#*RXZ2d>LQ*2)a&Sqk;ufSMeS)gEHMtbG<qn*SDek}>I5$%);10>$
zK|&5(T+6X0t|jHC@&tSm9TX|kIgMujk8-ligtF`ZDNE%zg?Or}CY(zsApTiA=cP-8
zb+3u!B5ImA6P$GAECb^5S%vE++Mm1S;-DVlkt8uyWf4f+wLgVn#>BHWA;)2XZW#v4
z4dM-p4CVFW4f`4B?u^KtYrIiEgLs2}2I@$X!D|#JQ0I`|^~^h(q~Ma?35s;s;)#fd
zV_qYa)G9N*5+WDLBsxICS0s$n)h9_N75a#Y-KCJFr1MjJlG3D{2qX|fo?P<xtu9&E
zrzT~XM9^#%Le5F9P4$f!XcTB+U}$A%LjJ2gJ-MI6b4zB;BjE`_G8q+*r&c-r^BMXK
zWMUccbkQOX!sWj~)Pd94Cp@({li)CoByZ||dhhmsSrX!R5~GkLPA3pgStscgLM7*n
zaf>6KPP!S-4OE;@uEbf~%GmIDBh&a2E`G(0$eV<{^cDw^#tCu~z|{wayu0WkYKm?o
zE?yTCDmxyJpOPZ0i3E`@h`CXZ$4eev<E|-4Q6%@2f81N~IG>&bT1c&U8aEDe0wELV
zfC&}}#VLrlt-Ec=-PO_`k9gGjmb#!40Zo+1;z)>zfR0zEWvjEJJJt{~;hf**le&<E
z+>K|b>eX>*^^q9s8bQywlWLVU@s@?DTx^C5{^=uBL}PF^K^!3iOD>?GyHykGnHcPn
zIF6!frcl3sjina&yMl0pQ%$UZYf3b4P3V}oTs77YCwPA^ms+eO$nX%4=8j=TgiEZ$
ztil`vt8&fdHcG-LS%!uNxrVt$iALj%l!juX<;EF=TMP{{jAD$&8;>_3{9}?~Tx2-j
zSa)SYu1w+$Obo`GjyH=p$}mbcljkZ;b4_xMvy8@@XPAr4@(gke5)9*w<4vYmjJL=z
ziZ^X+5O0)hBp;JuG2WEio7LRsl6y0yc`n!5EW<d%B*QerEZ02Vyuf0-rO05smC|sz
zMTTX(MPq{^!wjo<%Ps~PR)l{&B311JNkD;E9w~`#HI|~AEWxcNjo(dY;0oj2^x{V?
zPA8ME!GIybfFY3+j)7LWL{4Ooo;isyRjqPNAQ6}}5~m~vJV)PL{Ef(jB#Wtfc2QY$
z(aYR!>WH(3fd+){;`l^`k!2uYI*wI!A)=ZvKlxpN&b<A-3V(>b*6bDjDf-<lH#C(w
zi5ths6r8Xlaa1mdM+QY)50x{Y)Nq1;<oe6Wb8B2`C6N-Q#022T%P1w42&vkwC-WLr
zNc!Q1Rf7zbHFu*MInlP_`9!P}*FqnhBlK#b^AovF&GlF$8(j{6RQd17lsJjwv3h!O
z+3|PhH3gl17ueN8aM;x;fSQs{yx%3}wWCqlk%;M^W#H6>d9BkxOvoCmURkpqmcMbS
zj@WA$=wU#Fr-iYFp$$1&8Cn@x7+MfvYDVsDNDcXK(#Y7v(7}+*&7pQwH42J^>Buzo
zg|%ZRR3KN2OE}%PheMLJj<58>OD*2bmG2-pAfu@yr>R6KM61O)<T5hSLoDO4V^6{y
zWTQnWO;r*LgM>|p$n+#Yw@BsxkF!$W^-R)A{Dd6p2;2)?nVK9_?f$=0o?1Lt*Nb>Q
zIYo*W=#C4?Q71RWi*)yk$+2b{R{CkwOd!da)579cq`uB2rvkXhA_=jI*ON1Mzmc5t
z#HG69CUT5bi_7$<t@_h8aw5_A-~DlomXb2@GVZXP9JrEVat<>Vuh5@Xawk*qD(++;
zUadc^(VhO;6A`VcwKg^v__gFLMG`aQcBBZGkywY$m?z;8va3iGL%fLduB>ay^%49(
zLn8vBX41Lb6<PEp1Oa45x`nnRI)wN<#IWWp7S1M3B|i_+?*Mb)eDN{@%}r(>Ig9HV
z6WlrLdOS$(|A*4nZ5pe^90bv{hq#8EUejI8R;SfyCQDuXhci$J`zuL|_Rnr$oqfeH
N!?ZN-kwyyd{{u9`YP|pe

literal 0
HcmV?d00001

diff --git a/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/config.pkl b/human_aware_rl/ppo/trained_example/config.pkl
similarity index 54%
rename from human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/config.pkl
rename to human_aware_rl/ppo/trained_example/config.pkl
index b2a1fc0f5da7e00f65b2cbf789760ee444439b7c..183882db00937b3e47a1be5268e8299788bf2def 100644
GIT binary patch
delta 710
zcmZ8f%We}f6wL%kCZQDtLZu>#SRj!uNK}c3Sg--9n>3-Sco+g0XU3Uv<9S@$6EeUM
zo^7+Mm|tMa0`UQ`;4k0@*zpN?To8zrt#i*k=lJ^mJpQh+_sv@P`2DkWbiPz7QQ}d7
zl^_lay-)AU{6OQHhTDD=(DoK_MluRh66O~wYgf^sKMinmmL#a?mp&2H#c{-$lftcC
zbWDR`5)_gw)S8$nDkR*_KU7Xm?__|Qs8-0tnA6ZxKHTl@Sq3V)JQ7d|_c|7;YLd%j
z%t;FUnIT9O35s1N;C>Gs{#(bf&)~r<K}QPVNlXO`XsB?g#o!@Y*XYK^4eW?~t=cHB
zUdsE`)hR}GjY~47n2xDP;ZbMFhzcqbuB7oH=#(b?Ka92@DgI4^fzc_Dm<SR`a58Y4
zCW@2j!~qT>mvYQppoZE_b(n6Y0AZLf*o&5zpR><j^pUN-6b)iy&Cpz7t%BIt=@Yh0
zJ{peoQz+5HfKfiPPcQSP<1)@$x{vevevx8FJJcb_zuIT5F#lmMSEIKO?*QcUwKFZ@
zM@DTZbV4Tzhs*=H1Ik1+7W-J@N82#Y`?aO@$0iZ}*MHKveEndlJZC|=vZCP0COma8
z9wdw_7Ghk-IM7?5FaD(0Q}ApPvJ9SQFwMWzPF#7B!ArRfuLcQP)Nd^&8tB|Ubw+W-
Y^asX7@5k$&*#-SagGf?%vzxSj0bda6mH+?%

delta 646
zcmZ8e-D(p-7|ljaHd;{IqG(VQg0vuw1$*s<qJ@xHg4OY_<LqX#JMHf5elxR4SKUSh
z`*R@_=U(u>AmRgf>pSQx2tI<d<AvI*Va}ZMopZi<Kl^KD|GQK9{Nt<BI991tC<&<e
za!`$qwEE`SUX@hHwnxPVjcDG!mH1gqxsvTWOf6@gzFNCo4KM4h+L`9PT{LYZJd3ds
zgmK0L%#@MCs{T|vKe=DPgE`bFO;Q|D9;gsdci*v4sR~F;B|PjnsBTCDCew)I@MzBx
zq)G(E118{c51sh?j?<9Alf4X`EC+9<RIoB2=4Kc?MID2#tlY)E(988%=bB!tFHWza
z;|7*wL@^yvkptEr>ob#QsA=HCl?BPMpfZb;1Y(6Hk`h5;30?vIoVE5dlQ70{GN2J=
z0~lCpH#g?p5(A1(nd$%0{7Ayk53`tXOvd`U+gb{cYgmfLsqGq~sWOs^({|}<PGGh@
z<s;LH60;Rp*YDhm&W1j4n@*&^xh*HwKivy;{uYuQNcHK)r3DDN)eVIS=_h>10uVbO
zZAv@r52ixC4VlJ`b5};z8~^Jx?kwFrTB%MsusJ`kAn(I-{h@L8#<<s0@S+bd3n&WM
kvJ1hOaGoflSi&X7Fe%`b+=ka)X1~|$-Ag2eH@n%wZwKYn9{>OV

diff --git a/human_aware_rl/ppo/trained_example/result.json b/human_aware_rl/ppo/trained_example/result.json
new file mode 100644
index 00000000..eab61ab6
--- /dev/null
+++ b/human_aware_rl/ppo/trained_example/result.json
@@ -0,0 +1,500 @@
+{"custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 8.03125, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.125, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.75, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 10, "useful_onion_pickup_agent_0_mean": 3.8125, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.625, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 4.09375, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 4.125, "onion_drop_agent_1_min": 1, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.0625, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.09375, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 0.59375, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.09375, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.5, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 2.5625, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.125, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.1875, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 1.90625, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 1.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 1.125, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 0.3125, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.1875, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 1, "soup_delivery_agent_0_mean": 0.0625, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.03125, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.15625, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.0625, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 0.59375, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.09375, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 0.59375, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.09375, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.20000000298023224, "cur_lr": 0.0010000000474974513, "total_loss": -0.359554648399353, "policy_loss": -0.0014348567929118872, "vf_loss": 0.5835446119308472, "vf_explained_var": 0.0011739898473024368, "kl": 0.0003632040461525321, "entropy": 1.7912538051605225, "entropy_coeff": 0.20000000298023224, "model": {}}}}, "num_env_steps_sampled": 12800, "num_env_steps_trained": 12800, "num_agent_steps_sampled": 25600, "num_agent_steps_trained": 25600}, "sampler_results": {"episode_reward_max": 28.0, "episode_reward_min": 0.0, "episode_reward_mean": 8.03125, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 20.0}, "policy_reward_mean": {"ppo": 4.015625}, "custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 8.03125, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.125, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.75, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 10, "useful_onion_pickup_agent_0_mean": 3.8125, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.625, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 4.09375, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 4.125, "onion_drop_agent_1_min": 1, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.0625, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.09375, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 0.59375, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.09375, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.5, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 2.5625, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.125, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.1875, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 1.90625, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 1.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 1.125, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 0.3125, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.1875, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 1, "soup_delivery_agent_0_mean": 0.0625, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.03125, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.15625, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.0625, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 0.59375, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.09375, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 0.59375, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.09375, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [28.0, 6.0, 6.0, 6.0, 3.0, 6.0, 9.0, 6.0, 8.0, 3.0, 11.0, 3.0, 8.0, 11.0, 3.0, 20.0, 8.0, 11.0, 9.0, 9.0, 6.0, 11.0, 8.0, 12.0, 6.0, 11.0, 6.0, 14.0, 0.0, 0.0, 9.0, 0.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [20.0, 8.0, 3.0, 3.0, 0.0, 6.0, 0.0, 6.0, 3.0, 0.0, 0.0, 6.0, 3.0, 6.0, 3.0, 3.0, 3.0, 5.0, 0.0, 3.0, 5.0, 6.0, 0.0, 3.0, 5.0, 3.0, 8.0, 3.0, 3.0, 0.0, 14.0, 6.0, 5.0, 3.0, 3.0, 8.0, 3.0, 6.0, 0.0, 9.0, 3.0, 3.0, 0.0, 11.0, 8.0, 0.0, 0.0, 12.0, 3.0, 3.0, 0.0, 11.0, 3.0, 3.0, 3.0, 11.0, 0.0, 0.0, 0.0, 0.0, 3.0, 6.0, 0.0, 0.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6989158420229374, "mean_inference_ms": 1.2856450085336348, "mean_action_processing_ms": 0.1339475425441613, "mean_env_wait_ms": 0.8226258031437905, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 28.0, "episode_reward_min": 0.0, "episode_reward_mean": 8.03125, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 20.0}, "policy_reward_mean": {"ppo": 4.015625}, "hist_stats": {"episode_reward": [28.0, 6.0, 6.0, 6.0, 3.0, 6.0, 9.0, 6.0, 8.0, 3.0, 11.0, 3.0, 8.0, 11.0, 3.0, 20.0, 8.0, 11.0, 9.0, 9.0, 6.0, 11.0, 8.0, 12.0, 6.0, 11.0, 6.0, 14.0, 0.0, 0.0, 9.0, 0.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [20.0, 8.0, 3.0, 3.0, 0.0, 6.0, 0.0, 6.0, 3.0, 0.0, 0.0, 6.0, 3.0, 6.0, 3.0, 3.0, 3.0, 5.0, 0.0, 3.0, 5.0, 6.0, 0.0, 3.0, 5.0, 3.0, 8.0, 3.0, 3.0, 0.0, 14.0, 6.0, 5.0, 3.0, 3.0, 8.0, 3.0, 6.0, 0.0, 9.0, 3.0, 3.0, 0.0, 11.0, 8.0, 0.0, 0.0, 12.0, 3.0, 3.0, 0.0, 11.0, 3.0, 3.0, 3.0, 11.0, 0.0, 0.0, 0.0, 0.0, 3.0, 6.0, 0.0, 0.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6989158420229374, "mean_inference_ms": 1.2856450085336348, "mean_action_processing_ms": 0.1339475425441613, "mean_env_wait_ms": 0.8226258031437905, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 25600, "num_agent_steps_trained": 25600, "num_env_steps_sampled": 12800, "num_env_steps_trained": 12800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 12800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 25600, "timers": {"training_iteration_time_ms": 4486.776, "learn_time_ms": 1795.153, "learn_throughput": 7130.31, "synch_weights_time_ms": 46.233}, "counters": {"num_env_steps_sampled": 12800, "num_env_steps_trained": 12800, "num_agent_steps_sampled": 25600, "num_agent_steps_trained": 25600}, "done": false, "episodes_total": 32, "training_iteration": 1, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_19-59-58", "timestamp": 1666580398, "time_this_iter_s": 4.5041868686676025, "time_total_s": 4.5041868686676025, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 4.5041868686676025, "timesteps_since_restore": 0, "iterations_since_restore": 1, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.328571428571426, "ram_util_percent": 10.442857142857141}}
+{"custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 8.0625, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.0625, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.28125, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 10, "useful_onion_pickup_agent_0_mean": 3.65625, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.015625, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.796875, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 3.796875, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.171875, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.046875, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 0.796875, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.9375, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.59375, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.765625, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.125, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.203125, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.0625, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.359375, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.96875, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 1.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 0.265625, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.203125, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 2, "soup_delivery_agent_0_mean": 0.046875, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.0625, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.125, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.078125, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 0.796875, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.9375, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 0.796875, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.9375, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.10000000149011612, "cur_lr": 0.0010000000474974513, "total_loss": -0.33062273263931274, "policy_loss": -0.0030750420410186052, "vf_loss": 0.559157133102417, "vf_explained_var": 0.001139489933848381, "kl": 0.00039957917761057615, "entropy": 1.7906370162963867, "entropy_coeff": 0.18297599256038666, "model": {}}}}, "num_env_steps_sampled": 25600, "num_env_steps_trained": 25600, "num_agent_steps_sampled": 51200, "num_agent_steps_trained": 51200}, "sampler_results": {"episode_reward_max": 28.0, "episode_reward_min": 0.0, "episode_reward_mean": 8.0625, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 20.0}, "policy_reward_mean": {"ppo": 4.03125}, "custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 8.0625, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.0625, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.28125, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 10, "useful_onion_pickup_agent_0_mean": 3.65625, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.015625, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.796875, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 3.796875, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.171875, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.046875, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 0.796875, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.9375, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.59375, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.765625, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.125, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.203125, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.0625, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.359375, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.96875, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 1.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 0.265625, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.203125, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 2, "soup_delivery_agent_0_mean": 0.046875, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.0625, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.125, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.078125, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 0.796875, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.9375, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 0.796875, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.9375, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [28.0, 6.0, 6.0, 6.0, 3.0, 6.0, 9.0, 6.0, 8.0, 3.0, 11.0, 3.0, 8.0, 11.0, 3.0, 20.0, 8.0, 11.0, 9.0, 9.0, 6.0, 11.0, 8.0, 12.0, 6.0, 11.0, 6.0, 14.0, 0.0, 0.0, 9.0, 0.0, 9.0, 6.0, 17.0, 6.0, 9.0, 6.0, 3.0, 9.0, 16.0, 6.0, 3.0, 11.0, 11.0, 3.0, 9.0, 0.0, 3.0, 3.0, 3.0, 6.0, 8.0, 3.0, 6.0, 14.0, 6.0, 14.0, 12.0, 9.0, 3.0, 11.0, 6.0, 28.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [20.0, 8.0, 3.0, 3.0, 0.0, 6.0, 0.0, 6.0, 3.0, 0.0, 0.0, 6.0, 3.0, 6.0, 3.0, 3.0, 3.0, 5.0, 0.0, 3.0, 5.0, 6.0, 0.0, 3.0, 5.0, 3.0, 8.0, 3.0, 3.0, 0.0, 14.0, 6.0, 5.0, 3.0, 3.0, 8.0, 3.0, 6.0, 0.0, 9.0, 3.0, 3.0, 0.0, 11.0, 8.0, 0.0, 0.0, 12.0, 3.0, 3.0, 0.0, 11.0, 3.0, 3.0, 3.0, 11.0, 0.0, 0.0, 0.0, 0.0, 3.0, 6.0, 0.0, 0.0, 9.0, 0.0, 6.0, 0.0, 6.0, 11.0, 3.0, 3.0, 6.0, 3.0, 3.0, 3.0, 3.0, 0.0, 3.0, 6.0, 5.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0, 3.0, 3.0, 8.0, 3.0, 0.0, 3.0, 6.0, 0.0, 0.0, 3.0, 0.0, 0.0, 3.0, 3.0, 0.0, 3.0, 3.0, 3.0, 5.0, 0.0, 3.0, 6.0, 0.0, 3.0, 11.0, 0.0, 6.0, 8.0, 6.0, 9.0, 3.0, 3.0, 6.0, 3.0, 0.0, 8.0, 3.0, 6.0, 0.0, 11.0, 17.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7028658943640999, "mean_inference_ms": 1.2718671499974072, "mean_action_processing_ms": 0.13463472279710942, "mean_env_wait_ms": 0.8300894303223083, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 28.0, "episode_reward_min": 0.0, "episode_reward_mean": 8.0625, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 20.0}, "policy_reward_mean": {"ppo": 4.03125}, "hist_stats": {"episode_reward": [28.0, 6.0, 6.0, 6.0, 3.0, 6.0, 9.0, 6.0, 8.0, 3.0, 11.0, 3.0, 8.0, 11.0, 3.0, 20.0, 8.0, 11.0, 9.0, 9.0, 6.0, 11.0, 8.0, 12.0, 6.0, 11.0, 6.0, 14.0, 0.0, 0.0, 9.0, 0.0, 9.0, 6.0, 17.0, 6.0, 9.0, 6.0, 3.0, 9.0, 16.0, 6.0, 3.0, 11.0, 11.0, 3.0, 9.0, 0.0, 3.0, 3.0, 3.0, 6.0, 8.0, 3.0, 6.0, 14.0, 6.0, 14.0, 12.0, 9.0, 3.0, 11.0, 6.0, 28.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [20.0, 8.0, 3.0, 3.0, 0.0, 6.0, 0.0, 6.0, 3.0, 0.0, 0.0, 6.0, 3.0, 6.0, 3.0, 3.0, 3.0, 5.0, 0.0, 3.0, 5.0, 6.0, 0.0, 3.0, 5.0, 3.0, 8.0, 3.0, 3.0, 0.0, 14.0, 6.0, 5.0, 3.0, 3.0, 8.0, 3.0, 6.0, 0.0, 9.0, 3.0, 3.0, 0.0, 11.0, 8.0, 0.0, 0.0, 12.0, 3.0, 3.0, 0.0, 11.0, 3.0, 3.0, 3.0, 11.0, 0.0, 0.0, 0.0, 0.0, 3.0, 6.0, 0.0, 0.0, 9.0, 0.0, 6.0, 0.0, 6.0, 11.0, 3.0, 3.0, 6.0, 3.0, 3.0, 3.0, 3.0, 0.0, 3.0, 6.0, 5.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0, 3.0, 3.0, 8.0, 3.0, 0.0, 3.0, 6.0, 0.0, 0.0, 3.0, 0.0, 0.0, 3.0, 3.0, 0.0, 3.0, 3.0, 3.0, 5.0, 0.0, 3.0, 6.0, 0.0, 3.0, 11.0, 0.0, 6.0, 8.0, 6.0, 9.0, 3.0, 3.0, 6.0, 3.0, 0.0, 8.0, 3.0, 6.0, 0.0, 11.0, 17.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7028658943640999, "mean_inference_ms": 1.2718671499974072, "mean_action_processing_ms": 0.13463472279710942, "mean_env_wait_ms": 0.8300894303223083, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 51200, "num_agent_steps_trained": 51200, "num_env_steps_sampled": 25600, "num_env_steps_trained": 25600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 25600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 51200, "timers": {"training_iteration_time_ms": 4118.887, "learn_time_ms": 1423.073, "learn_throughput": 8994.618, "synch_weights_time_ms": 29.77}, "counters": {"num_env_steps_sampled": 25600, "num_env_steps_trained": 25600, "num_agent_steps_sampled": 51200, "num_agent_steps_trained": 51200}, "done": false, "episodes_total": 64, "training_iteration": 2, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-02", "timestamp": 1666580402, "time_this_iter_s": 3.7993717193603516, "time_total_s": 8.303558588027954, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 8.303558588027954, "timesteps_since_restore": 0, "iterations_since_restore": 2, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.78333333333333, "ram_util_percent": 10.566666666666668}}
+{"custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 8.375, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.010416666666667, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.1875, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 10, "useful_onion_pickup_agent_0_mean": 3.5520833333333335, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.7604166666666665, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.5833333333333335, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 3.7604166666666665, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.1354166666666667, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.2083333333333333, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 0.9166666666666666, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.875, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.78125, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.7395833333333335, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.125, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.19791666666666666, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.28125, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.2708333333333335, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.03125, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.8958333333333334, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 0.3020833333333333, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.28125, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.0625, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.08333333333333333, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.16666666666666666, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.15625, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 0.9166666666666666, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.875, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 0.9166666666666666, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.875, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.05000000074505806, "cur_lr": 0.0010000000474974513, "total_loss": -0.3004382848739624, "policy_loss": -0.003537239972501993, "vf_loss": 0.6018714904785156, "vf_explained_var": 0.00017357245087623596, "kl": 0.0004423881764523685, "entropy": 1.7895737886428833, "entropy_coeff": 0.16595199704170227, "model": {}}}}, "num_env_steps_sampled": 38400, "num_env_steps_trained": 38400, "num_agent_steps_sampled": 76800, "num_agent_steps_trained": 76800}, "sampler_results": {"episode_reward_max": 28.0, "episode_reward_min": 0.0, "episode_reward_mean": 8.375, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 20.0}, "policy_reward_mean": {"ppo": 4.1875}, "custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 8.375, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.010416666666667, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.1875, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 10, "useful_onion_pickup_agent_0_mean": 3.5520833333333335, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.7604166666666665, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.5833333333333335, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 3.7604166666666665, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.1354166666666667, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.2083333333333333, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 0.9166666666666666, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.875, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.78125, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.7395833333333335, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.125, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.19791666666666666, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.28125, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.2708333333333335, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.03125, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.8958333333333334, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 0.3020833333333333, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.28125, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.0625, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.08333333333333333, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.16666666666666666, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.15625, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 0.9166666666666666, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.875, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 0.9166666666666666, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.875, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [28.0, 6.0, 6.0, 6.0, 3.0, 6.0, 9.0, 6.0, 8.0, 3.0, 11.0, 3.0, 8.0, 11.0, 3.0, 20.0, 8.0, 11.0, 9.0, 9.0, 6.0, 11.0, 8.0, 12.0, 6.0, 11.0, 6.0, 14.0, 0.0, 0.0, 9.0, 0.0, 9.0, 6.0, 17.0, 6.0, 9.0, 6.0, 3.0, 9.0, 16.0, 6.0, 3.0, 11.0, 11.0, 3.0, 9.0, 0.0, 3.0, 3.0, 3.0, 6.0, 8.0, 3.0, 6.0, 14.0, 6.0, 14.0, 12.0, 9.0, 3.0, 11.0, 6.0, 28.0, 8.0, 3.0, 6.0, 9.0, 17.0, 6.0, 11.0, 11.0, 14.0, 8.0, 17.0, 3.0, 11.0, 9.0, 14.0, 3.0, 11.0, 16.0, 9.0, 0.0, 6.0, 12.0, 3.0, 23.0, 6.0, 6.0, 14.0, 14.0, 6.0, 3.0, 3.0, 6.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [20.0, 8.0, 3.0, 3.0, 0.0, 6.0, 0.0, 6.0, 3.0, 0.0, 0.0, 6.0, 3.0, 6.0, 3.0, 3.0, 3.0, 5.0, 0.0, 3.0, 5.0, 6.0, 0.0, 3.0, 5.0, 3.0, 8.0, 3.0, 3.0, 0.0, 14.0, 6.0, 5.0, 3.0, 3.0, 8.0, 3.0, 6.0, 0.0, 9.0, 3.0, 3.0, 0.0, 11.0, 8.0, 0.0, 0.0, 12.0, 3.0, 3.0, 0.0, 11.0, 3.0, 3.0, 3.0, 11.0, 0.0, 0.0, 0.0, 0.0, 3.0, 6.0, 0.0, 0.0, 9.0, 0.0, 6.0, 0.0, 6.0, 11.0, 3.0, 3.0, 6.0, 3.0, 3.0, 3.0, 3.0, 0.0, 3.0, 6.0, 5.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0, 3.0, 3.0, 8.0, 3.0, 0.0, 3.0, 6.0, 0.0, 0.0, 3.0, 0.0, 0.0, 3.0, 3.0, 0.0, 3.0, 3.0, 3.0, 5.0, 0.0, 3.0, 6.0, 0.0, 3.0, 11.0, 0.0, 6.0, 8.0, 6.0, 9.0, 3.0, 3.0, 6.0, 3.0, 0.0, 8.0, 3.0, 6.0, 0.0, 11.0, 17.0, 8.0, 0.0, 3.0, 0.0, 3.0, 3.0, 3.0, 6.0, 11.0, 6.0, 6.0, 0.0, 0.0, 11.0, 0.0, 11.0, 14.0, 0.0, 0.0, 8.0, 6.0, 11.0, 3.0, 0.0, 6.0, 5.0, 3.0, 6.0, 11.0, 3.0, 3.0, 0.0, 5.0, 6.0, 5.0, 11.0, 3.0, 6.0, 0.0, 0.0, 0.0, 6.0, 9.0, 3.0, 3.0, 0.0, 15.0, 8.0, 3.0, 3.0, 3.0, 3.0, 6.0, 8.0, 6.0, 8.0, 6.0, 0.0, 3.0, 0.0, 3.0, 0.0, 3.0, 3.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7028905627927106, "mean_inference_ms": 1.2622640403172178, "mean_action_processing_ms": 0.13486106382724458, "mean_env_wait_ms": 0.8341695324982732, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 28.0, "episode_reward_min": 0.0, "episode_reward_mean": 8.375, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 20.0}, "policy_reward_mean": {"ppo": 4.1875}, "hist_stats": {"episode_reward": [28.0, 6.0, 6.0, 6.0, 3.0, 6.0, 9.0, 6.0, 8.0, 3.0, 11.0, 3.0, 8.0, 11.0, 3.0, 20.0, 8.0, 11.0, 9.0, 9.0, 6.0, 11.0, 8.0, 12.0, 6.0, 11.0, 6.0, 14.0, 0.0, 0.0, 9.0, 0.0, 9.0, 6.0, 17.0, 6.0, 9.0, 6.0, 3.0, 9.0, 16.0, 6.0, 3.0, 11.0, 11.0, 3.0, 9.0, 0.0, 3.0, 3.0, 3.0, 6.0, 8.0, 3.0, 6.0, 14.0, 6.0, 14.0, 12.0, 9.0, 3.0, 11.0, 6.0, 28.0, 8.0, 3.0, 6.0, 9.0, 17.0, 6.0, 11.0, 11.0, 14.0, 8.0, 17.0, 3.0, 11.0, 9.0, 14.0, 3.0, 11.0, 16.0, 9.0, 0.0, 6.0, 12.0, 3.0, 23.0, 6.0, 6.0, 14.0, 14.0, 6.0, 3.0, 3.0, 6.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [20.0, 8.0, 3.0, 3.0, 0.0, 6.0, 0.0, 6.0, 3.0, 0.0, 0.0, 6.0, 3.0, 6.0, 3.0, 3.0, 3.0, 5.0, 0.0, 3.0, 5.0, 6.0, 0.0, 3.0, 5.0, 3.0, 8.0, 3.0, 3.0, 0.0, 14.0, 6.0, 5.0, 3.0, 3.0, 8.0, 3.0, 6.0, 0.0, 9.0, 3.0, 3.0, 0.0, 11.0, 8.0, 0.0, 0.0, 12.0, 3.0, 3.0, 0.0, 11.0, 3.0, 3.0, 3.0, 11.0, 0.0, 0.0, 0.0, 0.0, 3.0, 6.0, 0.0, 0.0, 9.0, 0.0, 6.0, 0.0, 6.0, 11.0, 3.0, 3.0, 6.0, 3.0, 3.0, 3.0, 3.0, 0.0, 3.0, 6.0, 5.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0, 3.0, 3.0, 8.0, 3.0, 0.0, 3.0, 6.0, 0.0, 0.0, 3.0, 0.0, 0.0, 3.0, 3.0, 0.0, 3.0, 3.0, 3.0, 5.0, 0.0, 3.0, 6.0, 0.0, 3.0, 11.0, 0.0, 6.0, 8.0, 6.0, 9.0, 3.0, 3.0, 6.0, 3.0, 0.0, 8.0, 3.0, 6.0, 0.0, 11.0, 17.0, 8.0, 0.0, 3.0, 0.0, 3.0, 3.0, 3.0, 6.0, 11.0, 6.0, 6.0, 0.0, 0.0, 11.0, 0.0, 11.0, 14.0, 0.0, 0.0, 8.0, 6.0, 11.0, 3.0, 0.0, 6.0, 5.0, 3.0, 6.0, 11.0, 3.0, 3.0, 0.0, 5.0, 6.0, 5.0, 11.0, 3.0, 6.0, 0.0, 0.0, 0.0, 6.0, 9.0, 3.0, 3.0, 0.0, 15.0, 8.0, 3.0, 3.0, 3.0, 3.0, 6.0, 8.0, 6.0, 8.0, 6.0, 0.0, 3.0, 0.0, 3.0, 0.0, 3.0, 3.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7028905627927106, "mean_inference_ms": 1.2622640403172178, "mean_action_processing_ms": 0.13486106382724458, "mean_env_wait_ms": 0.8341695324982732, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 76800, "num_agent_steps_trained": 76800, "num_env_steps_sampled": 38400, "num_env_steps_trained": 38400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 38400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 76800, "timers": {"training_iteration_time_ms": 3969.635, "learn_time_ms": 1296.093, "learn_throughput": 9875.832, "synch_weights_time_ms": 24.94}, "counters": {"num_env_steps_sampled": 38400, "num_env_steps_trained": 38400, "num_agent_steps_sampled": 76800, "num_agent_steps_trained": 76800}, "done": false, "episodes_total": 96, "training_iteration": 3, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-06", "timestamp": 1666580406, "time_this_iter_s": 3.7223825454711914, "time_total_s": 12.025941133499146, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 12.025941133499146, "timesteps_since_restore": 0, "iterations_since_restore": 3, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.360000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 8.83, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.87, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 4.89, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 3.48, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 8, "useful_onion_pickup_agent_1_mean": 3.39, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 3.35, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 14, "useful_onion_drop_agent_0_mean": 1.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 1.04, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.79, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 3.06, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.99, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.14, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.9, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 0.32, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 2, "soup_pickup_agent_1_mean": 0.39, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.08, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.12, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.17, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.24, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.04, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.79, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.04, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.79, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.02500000037252903, "cur_lr": 0.0010000000474974513, "total_loss": -0.2698257565498352, "policy_loss": -0.003558500437065959, "vf_loss": 0.6820269823074341, "vf_explained_var": -0.001748034730553627, "kl": 0.0005089464830234647, "entropy": 1.788435697555542, "entropy_coeff": 0.14892800152301788, "model": {}}}}, "num_env_steps_sampled": 51200, "num_env_steps_trained": 51200, "num_agent_steps_sampled": 102400, "num_agent_steps_trained": 102400}, "sampler_results": {"episode_reward_max": 28.0, "episode_reward_min": 0.0, "episode_reward_mean": 8.83, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 17.0}, "policy_reward_mean": {"ppo": 4.415}, "custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 8.83, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.87, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 4.89, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 3.48, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 8, "useful_onion_pickup_agent_1_mean": 3.39, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 3.35, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 14, "useful_onion_drop_agent_0_mean": 1.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 1.04, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.79, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 3.06, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.99, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.14, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.9, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 0.32, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 2, "soup_pickup_agent_1_mean": 0.39, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.08, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.12, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.17, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.24, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.04, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.79, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.04, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.79, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [0.0, 0.0, 9.0, 0.0, 9.0, 6.0, 17.0, 6.0, 9.0, 6.0, 3.0, 9.0, 16.0, 6.0, 3.0, 11.0, 11.0, 3.0, 9.0, 0.0, 3.0, 3.0, 3.0, 6.0, 8.0, 3.0, 6.0, 14.0, 6.0, 14.0, 12.0, 9.0, 3.0, 11.0, 6.0, 28.0, 8.0, 3.0, 6.0, 9.0, 17.0, 6.0, 11.0, 11.0, 14.0, 8.0, 17.0, 3.0, 11.0, 9.0, 14.0, 3.0, 11.0, 16.0, 9.0, 0.0, 6.0, 12.0, 3.0, 23.0, 6.0, 6.0, 14.0, 14.0, 6.0, 3.0, 3.0, 6.0, 8.0, 17.0, 6.0, 6.0, 14.0, 14.0, 14.0, 11.0, 11.0, 17.0, 9.0, 14.0, 3.0, 17.0, 6.0, 19.0, 11.0, 11.0, 3.0, 0.0, 3.0, 25.0, 20.0, 11.0, 9.0, 0.0, 14.0, 6.0, 0.0, 14.0, 3.0, 11.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [0.0, 0.0, 0.0, 0.0, 3.0, 6.0, 0.0, 0.0, 9.0, 0.0, 6.0, 0.0, 6.0, 11.0, 3.0, 3.0, 6.0, 3.0, 3.0, 3.0, 3.0, 0.0, 3.0, 6.0, 5.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0, 3.0, 3.0, 8.0, 3.0, 0.0, 3.0, 6.0, 0.0, 0.0, 3.0, 0.0, 0.0, 3.0, 3.0, 0.0, 3.0, 3.0, 3.0, 5.0, 0.0, 3.0, 6.0, 0.0, 3.0, 11.0, 0.0, 6.0, 8.0, 6.0, 9.0, 3.0, 3.0, 6.0, 3.0, 0.0, 8.0, 3.0, 6.0, 0.0, 11.0, 17.0, 8.0, 0.0, 3.0, 0.0, 3.0, 3.0, 3.0, 6.0, 11.0, 6.0, 6.0, 0.0, 0.0, 11.0, 0.0, 11.0, 14.0, 0.0, 0.0, 8.0, 6.0, 11.0, 3.0, 0.0, 6.0, 5.0, 3.0, 6.0, 11.0, 3.0, 3.0, 0.0, 5.0, 6.0, 5.0, 11.0, 3.0, 6.0, 0.0, 0.0, 0.0, 6.0, 9.0, 3.0, 3.0, 0.0, 15.0, 8.0, 3.0, 3.0, 3.0, 3.0, 6.0, 8.0, 6.0, 8.0, 6.0, 0.0, 3.0, 0.0, 3.0, 0.0, 3.0, 3.0, 0.0, 8.0, 11.0, 6.0, 6.0, 0.0, 3.0, 3.0, 8.0, 6.0, 6.0, 8.0, 8.0, 6.0, 3.0, 8.0, 5.0, 6.0, 11.0, 6.0, 3.0, 6.0, 11.0, 3.0, 3.0, 0.0, 12.0, 5.0, 3.0, 3.0, 10.0, 9.0, 3.0, 8.0, 8.0, 3.0, 3.0, 0.0, 0.0, 0.0, 3.0, 0.0, 8.0, 17.0, 6.0, 14.0, 6.0, 5.0, 9.0, 0.0, 0.0, 0.0, 3.0, 11.0, 6.0, 0.0, 0.0, 0.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7017628475225973, "mean_inference_ms": 1.2446256924059935, "mean_action_processing_ms": 0.13484534254964872, "mean_env_wait_ms": 0.8348638696126399, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 28.0, "episode_reward_min": 0.0, "episode_reward_mean": 8.83, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 17.0}, "policy_reward_mean": {"ppo": 4.415}, "hist_stats": {"episode_reward": [0.0, 0.0, 9.0, 0.0, 9.0, 6.0, 17.0, 6.0, 9.0, 6.0, 3.0, 9.0, 16.0, 6.0, 3.0, 11.0, 11.0, 3.0, 9.0, 0.0, 3.0, 3.0, 3.0, 6.0, 8.0, 3.0, 6.0, 14.0, 6.0, 14.0, 12.0, 9.0, 3.0, 11.0, 6.0, 28.0, 8.0, 3.0, 6.0, 9.0, 17.0, 6.0, 11.0, 11.0, 14.0, 8.0, 17.0, 3.0, 11.0, 9.0, 14.0, 3.0, 11.0, 16.0, 9.0, 0.0, 6.0, 12.0, 3.0, 23.0, 6.0, 6.0, 14.0, 14.0, 6.0, 3.0, 3.0, 6.0, 8.0, 17.0, 6.0, 6.0, 14.0, 14.0, 14.0, 11.0, 11.0, 17.0, 9.0, 14.0, 3.0, 17.0, 6.0, 19.0, 11.0, 11.0, 3.0, 0.0, 3.0, 25.0, 20.0, 11.0, 9.0, 0.0, 14.0, 6.0, 0.0, 14.0, 3.0, 11.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [0.0, 0.0, 0.0, 0.0, 3.0, 6.0, 0.0, 0.0, 9.0, 0.0, 6.0, 0.0, 6.0, 11.0, 3.0, 3.0, 6.0, 3.0, 3.0, 3.0, 3.0, 0.0, 3.0, 6.0, 5.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0, 3.0, 3.0, 8.0, 3.0, 0.0, 3.0, 6.0, 0.0, 0.0, 3.0, 0.0, 0.0, 3.0, 3.0, 0.0, 3.0, 3.0, 3.0, 5.0, 0.0, 3.0, 6.0, 0.0, 3.0, 11.0, 0.0, 6.0, 8.0, 6.0, 9.0, 3.0, 3.0, 6.0, 3.0, 0.0, 8.0, 3.0, 6.0, 0.0, 11.0, 17.0, 8.0, 0.0, 3.0, 0.0, 3.0, 3.0, 3.0, 6.0, 11.0, 6.0, 6.0, 0.0, 0.0, 11.0, 0.0, 11.0, 14.0, 0.0, 0.0, 8.0, 6.0, 11.0, 3.0, 0.0, 6.0, 5.0, 3.0, 6.0, 11.0, 3.0, 3.0, 0.0, 5.0, 6.0, 5.0, 11.0, 3.0, 6.0, 0.0, 0.0, 0.0, 6.0, 9.0, 3.0, 3.0, 0.0, 15.0, 8.0, 3.0, 3.0, 3.0, 3.0, 6.0, 8.0, 6.0, 8.0, 6.0, 0.0, 3.0, 0.0, 3.0, 0.0, 3.0, 3.0, 0.0, 8.0, 11.0, 6.0, 6.0, 0.0, 3.0, 3.0, 8.0, 6.0, 6.0, 8.0, 8.0, 6.0, 3.0, 8.0, 5.0, 6.0, 11.0, 6.0, 3.0, 6.0, 11.0, 3.0, 3.0, 0.0, 12.0, 5.0, 3.0, 3.0, 10.0, 9.0, 3.0, 8.0, 8.0, 3.0, 3.0, 0.0, 0.0, 0.0, 3.0, 0.0, 8.0, 17.0, 6.0, 14.0, 6.0, 5.0, 9.0, 0.0, 0.0, 0.0, 3.0, 11.0, 6.0, 0.0, 0.0, 0.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7017628475225973, "mean_inference_ms": 1.2446256924059935, "mean_action_processing_ms": 0.13484534254964872, "mean_env_wait_ms": 0.8348638696126399, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 102400, "num_agent_steps_trained": 102400, "num_env_steps_sampled": 51200, "num_env_steps_trained": 51200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 51200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 102400, "timers": {"training_iteration_time_ms": 3883.504, "learn_time_ms": 1240.403, "learn_throughput": 10319.223, "synch_weights_time_ms": 22.878}, "counters": {"num_env_steps_sampled": 51200, "num_env_steps_trained": 51200, "num_agent_steps_sampled": 102400, "num_agent_steps_trained": 102400}, "done": false, "episodes_total": 128, "training_iteration": 4, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-10", "timestamp": 1666580410, "time_this_iter_s": 3.6808454990386963, "time_total_s": 15.706786632537842, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 15.706786632537842, "timesteps_since_restore": 0, "iterations_since_restore": 4, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.53333333333333, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 9.37, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.15, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.25, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 3.41, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.33, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 3.56, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.94, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 14, "useful_onion_drop_agent_0_mean": 1.41, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 1.05, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.81, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.75, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.64, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.16, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.22, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.76, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 0.42, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 2, "soup_pickup_agent_1_mean": 0.44, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.11, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.13, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.23, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.28, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.05, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.81, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.05, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.81, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.012500000186264515, "cur_lr": 0.0010000000474974513, "total_loss": -0.23854368925094604, "policy_loss": -0.0029007629491388798, "vf_loss": 0.5773570537567139, "vf_explained_var": -0.0037215352058410645, "kl": 0.0004978743381798267, "entropy": 1.7869577407836914, "entropy_coeff": 0.1319040060043335, "model": {}}}}, "num_env_steps_sampled": 64000, "num_env_steps_trained": 64000, "num_agent_steps_sampled": 128000, "num_agent_steps_trained": 128000}, "sampler_results": {"episode_reward_max": 28.0, "episode_reward_min": 0.0, "episode_reward_mean": 9.37, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 17.0}, "policy_reward_mean": {"ppo": 4.685}, "custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 9.37, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.15, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.25, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 3.41, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.33, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 3.56, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.94, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 14, "useful_onion_drop_agent_0_mean": 1.41, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 1.05, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.81, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.75, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.64, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.16, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.22, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.76, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 0.42, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 2, "soup_pickup_agent_1_mean": 0.44, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.11, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.13, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.23, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.28, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.05, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.81, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.05, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.81, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [3.0, 11.0, 6.0, 28.0, 8.0, 3.0, 6.0, 9.0, 17.0, 6.0, 11.0, 11.0, 14.0, 8.0, 17.0, 3.0, 11.0, 9.0, 14.0, 3.0, 11.0, 16.0, 9.0, 0.0, 6.0, 12.0, 3.0, 23.0, 6.0, 6.0, 14.0, 14.0, 6.0, 3.0, 3.0, 6.0, 8.0, 17.0, 6.0, 6.0, 14.0, 14.0, 14.0, 11.0, 11.0, 17.0, 9.0, 14.0, 3.0, 17.0, 6.0, 19.0, 11.0, 11.0, 3.0, 0.0, 3.0, 25.0, 20.0, 11.0, 9.0, 0.0, 14.0, 6.0, 0.0, 14.0, 3.0, 11.0, 3.0, 6.0, 3.0, 9.0, 8.0, 11.0, 14.0, 3.0, 17.0, 17.0, 3.0, 11.0, 6.0, 20.0, 11.0, 6.0, 9.0, 3.0, 3.0, 11.0, 11.0, 6.0, 6.0, 8.0, 11.0, 3.0, 12.0, 14.0, 3.0, 6.0, 11.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 0.0, 8.0, 3.0, 6.0, 0.0, 11.0, 17.0, 8.0, 0.0, 3.0, 0.0, 3.0, 3.0, 3.0, 6.0, 11.0, 6.0, 6.0, 0.0, 0.0, 11.0, 0.0, 11.0, 14.0, 0.0, 0.0, 8.0, 6.0, 11.0, 3.0, 0.0, 6.0, 5.0, 3.0, 6.0, 11.0, 3.0, 3.0, 0.0, 5.0, 6.0, 5.0, 11.0, 3.0, 6.0, 0.0, 0.0, 0.0, 6.0, 9.0, 3.0, 3.0, 0.0, 15.0, 8.0, 3.0, 3.0, 3.0, 3.0, 6.0, 8.0, 6.0, 8.0, 6.0, 0.0, 3.0, 0.0, 3.0, 0.0, 3.0, 3.0, 0.0, 8.0, 11.0, 6.0, 6.0, 0.0, 3.0, 3.0, 8.0, 6.0, 6.0, 8.0, 8.0, 6.0, 3.0, 8.0, 5.0, 6.0, 11.0, 6.0, 3.0, 6.0, 11.0, 3.0, 3.0, 0.0, 12.0, 5.0, 3.0, 3.0, 10.0, 9.0, 3.0, 8.0, 8.0, 3.0, 3.0, 0.0, 0.0, 0.0, 3.0, 0.0, 8.0, 17.0, 6.0, 14.0, 6.0, 5.0, 9.0, 0.0, 0.0, 0.0, 3.0, 11.0, 6.0, 0.0, 0.0, 0.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0, 3.0, 0.0, 6.0, 0.0, 0.0, 3.0, 6.0, 3.0, 0.0, 8.0, 5.0, 6.0, 9.0, 5.0, 0.0, 3.0, 6.0, 11.0, 11.0, 6.0, 0.0, 3.0, 3.0, 8.0, 6.0, 0.0, 14.0, 6.0, 8.0, 3.0, 0.0, 6.0, 3.0, 6.0, 3.0, 0.0, 0.0, 3.0, 3.0, 8.0, 3.0, 8.0, 0.0, 6.0, 3.0, 3.0, 8.0, 0.0, 0.0, 11.0, 0.0, 3.0, 6.0, 6.0, 5.0, 9.0, 3.0, 0.0, 6.0, 0.0, 11.0, 0.0, 6.0, 3.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6993358203765415, "mean_inference_ms": 1.234363948034564, "mean_action_processing_ms": 0.13489033625826413, "mean_env_wait_ms": 0.8388656832452103, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 28.0, "episode_reward_min": 0.0, "episode_reward_mean": 9.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 17.0}, "policy_reward_mean": {"ppo": 4.685}, "hist_stats": {"episode_reward": [3.0, 11.0, 6.0, 28.0, 8.0, 3.0, 6.0, 9.0, 17.0, 6.0, 11.0, 11.0, 14.0, 8.0, 17.0, 3.0, 11.0, 9.0, 14.0, 3.0, 11.0, 16.0, 9.0, 0.0, 6.0, 12.0, 3.0, 23.0, 6.0, 6.0, 14.0, 14.0, 6.0, 3.0, 3.0, 6.0, 8.0, 17.0, 6.0, 6.0, 14.0, 14.0, 14.0, 11.0, 11.0, 17.0, 9.0, 14.0, 3.0, 17.0, 6.0, 19.0, 11.0, 11.0, 3.0, 0.0, 3.0, 25.0, 20.0, 11.0, 9.0, 0.0, 14.0, 6.0, 0.0, 14.0, 3.0, 11.0, 3.0, 6.0, 3.0, 9.0, 8.0, 11.0, 14.0, 3.0, 17.0, 17.0, 3.0, 11.0, 6.0, 20.0, 11.0, 6.0, 9.0, 3.0, 3.0, 11.0, 11.0, 6.0, 6.0, 8.0, 11.0, 3.0, 12.0, 14.0, 3.0, 6.0, 11.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 0.0, 8.0, 3.0, 6.0, 0.0, 11.0, 17.0, 8.0, 0.0, 3.0, 0.0, 3.0, 3.0, 3.0, 6.0, 11.0, 6.0, 6.0, 0.0, 0.0, 11.0, 0.0, 11.0, 14.0, 0.0, 0.0, 8.0, 6.0, 11.0, 3.0, 0.0, 6.0, 5.0, 3.0, 6.0, 11.0, 3.0, 3.0, 0.0, 5.0, 6.0, 5.0, 11.0, 3.0, 6.0, 0.0, 0.0, 0.0, 6.0, 9.0, 3.0, 3.0, 0.0, 15.0, 8.0, 3.0, 3.0, 3.0, 3.0, 6.0, 8.0, 6.0, 8.0, 6.0, 0.0, 3.0, 0.0, 3.0, 0.0, 3.0, 3.0, 0.0, 8.0, 11.0, 6.0, 6.0, 0.0, 3.0, 3.0, 8.0, 6.0, 6.0, 8.0, 8.0, 6.0, 3.0, 8.0, 5.0, 6.0, 11.0, 6.0, 3.0, 6.0, 11.0, 3.0, 3.0, 0.0, 12.0, 5.0, 3.0, 3.0, 10.0, 9.0, 3.0, 8.0, 8.0, 3.0, 3.0, 0.0, 0.0, 0.0, 3.0, 0.0, 8.0, 17.0, 6.0, 14.0, 6.0, 5.0, 9.0, 0.0, 0.0, 0.0, 3.0, 11.0, 6.0, 0.0, 0.0, 0.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0, 3.0, 0.0, 6.0, 0.0, 0.0, 3.0, 6.0, 3.0, 0.0, 8.0, 5.0, 6.0, 9.0, 5.0, 0.0, 3.0, 6.0, 11.0, 11.0, 6.0, 0.0, 3.0, 3.0, 8.0, 6.0, 0.0, 14.0, 6.0, 8.0, 3.0, 0.0, 6.0, 3.0, 6.0, 3.0, 0.0, 0.0, 3.0, 3.0, 8.0, 3.0, 8.0, 0.0, 6.0, 3.0, 3.0, 8.0, 0.0, 0.0, 11.0, 0.0, 3.0, 6.0, 6.0, 5.0, 9.0, 3.0, 0.0, 6.0, 0.0, 11.0, 0.0, 6.0, 3.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6993358203765415, "mean_inference_ms": 1.234363948034564, "mean_action_processing_ms": 0.13489033625826413, "mean_env_wait_ms": 0.8388656832452103, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 128000, "num_agent_steps_trained": 128000, "num_env_steps_sampled": 64000, "num_env_steps_trained": 64000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 64000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 128000, "timers": {"training_iteration_time_ms": 3867.837, "learn_time_ms": 1225.733, "learn_throughput": 10442.73, "synch_weights_time_ms": 22.151}, "counters": {"num_env_steps_sampled": 64000, "num_env_steps_trained": 64000, "num_agent_steps_sampled": 128000, "num_agent_steps_trained": 128000}, "done": false, "episodes_total": 160, "training_iteration": 5, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-14", "timestamp": 1666580414, "time_this_iter_s": 3.8698205947875977, "time_total_s": 19.57660722732544, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 19.57660722732544, "timesteps_since_restore": 0, "iterations_since_restore": 5, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.500000000000004, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 0.2, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 10.13, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.19, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.45, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 3.45, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.73, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 3.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 4.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 14, "useful_onion_drop_agent_0_mean": 1.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.5, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 1.07, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.92, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.87, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.82, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.16, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.21, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 11, "dish_drop_agent_1_mean": 2.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 10, "useful_dish_drop_agent_0_mean": 1.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 1.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 8, "soup_pickup_agent_0_mean": 0.43, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.5, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.11, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.14, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.23, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.3, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.07, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.92, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.07, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.92, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0062500000931322575, "cur_lr": 0.0010000000474974513, "total_loss": -0.2105298936367035, "policy_loss": -0.005385499447584152, "vf_loss": 0.8951879143714905, "vf_explained_var": 0.004527205601334572, "kl": 0.0004916870966553688, "entropy": 1.7865335941314697, "entropy_coeff": 0.11488000303506851, "model": {}}}}, "num_env_steps_sampled": 76800, "num_env_steps_trained": 76800, "num_agent_steps_sampled": 153600, "num_agent_steps_trained": 153600}, "sampler_results": {"episode_reward_max": 54.0, "episode_reward_min": 0.0, "episode_reward_mean": 10.53, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 31.0}, "policy_reward_mean": {"ppo": 5.265}, "custom_metrics": {"sparse_reward_mean": 0.2, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 10.13, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.19, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.45, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 3.45, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.73, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 3.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 4.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 14, "useful_onion_drop_agent_0_mean": 1.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.5, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 1.07, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.92, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.87, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.82, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.16, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.21, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 11, "dish_drop_agent_1_mean": 2.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 10, "useful_dish_drop_agent_0_mean": 1.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 1.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 8, "soup_pickup_agent_0_mean": 0.43, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.5, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.11, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.14, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.23, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.3, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.07, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.92, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.07, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.92, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [6.0, 3.0, 3.0, 6.0, 8.0, 17.0, 6.0, 6.0, 14.0, 14.0, 14.0, 11.0, 11.0, 17.0, 9.0, 14.0, 3.0, 17.0, 6.0, 19.0, 11.0, 11.0, 3.0, 0.0, 3.0, 25.0, 20.0, 11.0, 9.0, 0.0, 14.0, 6.0, 0.0, 14.0, 3.0, 11.0, 3.0, 6.0, 3.0, 9.0, 8.0, 11.0, 14.0, 3.0, 17.0, 17.0, 3.0, 11.0, 6.0, 20.0, 11.0, 6.0, 9.0, 3.0, 3.0, 11.0, 11.0, 6.0, 6.0, 8.0, 11.0, 3.0, 12.0, 14.0, 3.0, 6.0, 11.0, 9.0, 14.0, 3.0, 17.0, 9.0, 20.0, 22.0, 20.0, 8.0, 8.0, 17.0, 11.0, 3.0, 11.0, 3.0, 12.0, 3.0, 22.0, 8.0, 17.0, 3.0, 22.0, 12.0, 8.0, 14.0, 17.0, 19.0, 3.0, 54.0, 28.0, 14.0, 12.0, 0.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 0.0, 3.0, 0.0, 3.0, 0.0, 3.0, 3.0, 0.0, 8.0, 11.0, 6.0, 6.0, 0.0, 3.0, 3.0, 8.0, 6.0, 6.0, 8.0, 8.0, 6.0, 3.0, 8.0, 5.0, 6.0, 11.0, 6.0, 3.0, 6.0, 11.0, 3.0, 3.0, 0.0, 12.0, 5.0, 3.0, 3.0, 10.0, 9.0, 3.0, 8.0, 8.0, 3.0, 3.0, 0.0, 0.0, 0.0, 3.0, 0.0, 8.0, 17.0, 6.0, 14.0, 6.0, 5.0, 9.0, 0.0, 0.0, 0.0, 3.0, 11.0, 6.0, 0.0, 0.0, 0.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0, 3.0, 0.0, 6.0, 0.0, 0.0, 3.0, 6.0, 3.0, 0.0, 8.0, 5.0, 6.0, 9.0, 5.0, 0.0, 3.0, 6.0, 11.0, 11.0, 6.0, 0.0, 3.0, 3.0, 8.0, 6.0, 0.0, 14.0, 6.0, 8.0, 3.0, 0.0, 6.0, 3.0, 6.0, 3.0, 0.0, 0.0, 3.0, 3.0, 8.0, 3.0, 8.0, 0.0, 6.0, 3.0, 3.0, 8.0, 0.0, 0.0, 11.0, 0.0, 3.0, 6.0, 6.0, 5.0, 9.0, 3.0, 0.0, 6.0, 0.0, 11.0, 0.0, 6.0, 3.0, 8.0, 6.0, 0.0, 3.0, 6.0, 11.0, 3.0, 6.0, 14.0, 6.0, 8.0, 14.0, 12.0, 8.0, 0.0, 8.0, 5.0, 3.0, 8.0, 9.0, 11.0, 0.0, 3.0, 0.0, 3.0, 8.0, 0.0, 3.0, 6.0, 6.0, 0.0, 3.0, 8.0, 14.0, 8.0, 0.0, 12.0, 5.0, 0.0, 3.0, 6.0, 16.0, 3.0, 9.0, 0.0, 8.0, 11.0, 3.0, 6.0, 11.0, 3.0, 16.0, 3.0, 0.0, 31.0, 23.0, 14.0, 14.0, 3.0, 11.0, 12.0, 0.0, 0.0, 0.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6987260941959224, "mean_inference_ms": 1.2293985973594628, "mean_action_processing_ms": 0.13473542591379423, "mean_env_wait_ms": 0.840520264494256, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 54.0, "episode_reward_min": 0.0, "episode_reward_mean": 10.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 31.0}, "policy_reward_mean": {"ppo": 5.265}, "hist_stats": {"episode_reward": [6.0, 3.0, 3.0, 6.0, 8.0, 17.0, 6.0, 6.0, 14.0, 14.0, 14.0, 11.0, 11.0, 17.0, 9.0, 14.0, 3.0, 17.0, 6.0, 19.0, 11.0, 11.0, 3.0, 0.0, 3.0, 25.0, 20.0, 11.0, 9.0, 0.0, 14.0, 6.0, 0.0, 14.0, 3.0, 11.0, 3.0, 6.0, 3.0, 9.0, 8.0, 11.0, 14.0, 3.0, 17.0, 17.0, 3.0, 11.0, 6.0, 20.0, 11.0, 6.0, 9.0, 3.0, 3.0, 11.0, 11.0, 6.0, 6.0, 8.0, 11.0, 3.0, 12.0, 14.0, 3.0, 6.0, 11.0, 9.0, 14.0, 3.0, 17.0, 9.0, 20.0, 22.0, 20.0, 8.0, 8.0, 17.0, 11.0, 3.0, 11.0, 3.0, 12.0, 3.0, 22.0, 8.0, 17.0, 3.0, 22.0, 12.0, 8.0, 14.0, 17.0, 19.0, 3.0, 54.0, 28.0, 14.0, 12.0, 0.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 0.0, 3.0, 0.0, 3.0, 0.0, 3.0, 3.0, 0.0, 8.0, 11.0, 6.0, 6.0, 0.0, 3.0, 3.0, 8.0, 6.0, 6.0, 8.0, 8.0, 6.0, 3.0, 8.0, 5.0, 6.0, 11.0, 6.0, 3.0, 6.0, 11.0, 3.0, 3.0, 0.0, 12.0, 5.0, 3.0, 3.0, 10.0, 9.0, 3.0, 8.0, 8.0, 3.0, 3.0, 0.0, 0.0, 0.0, 3.0, 0.0, 8.0, 17.0, 6.0, 14.0, 6.0, 5.0, 9.0, 0.0, 0.0, 0.0, 3.0, 11.0, 6.0, 0.0, 0.0, 0.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0, 3.0, 0.0, 6.0, 0.0, 0.0, 3.0, 6.0, 3.0, 0.0, 8.0, 5.0, 6.0, 9.0, 5.0, 0.0, 3.0, 6.0, 11.0, 11.0, 6.0, 0.0, 3.0, 3.0, 8.0, 6.0, 0.0, 14.0, 6.0, 8.0, 3.0, 0.0, 6.0, 3.0, 6.0, 3.0, 0.0, 0.0, 3.0, 3.0, 8.0, 3.0, 8.0, 0.0, 6.0, 3.0, 3.0, 8.0, 0.0, 0.0, 11.0, 0.0, 3.0, 6.0, 6.0, 5.0, 9.0, 3.0, 0.0, 6.0, 0.0, 11.0, 0.0, 6.0, 3.0, 8.0, 6.0, 0.0, 3.0, 6.0, 11.0, 3.0, 6.0, 14.0, 6.0, 8.0, 14.0, 12.0, 8.0, 0.0, 8.0, 5.0, 3.0, 8.0, 9.0, 11.0, 0.0, 3.0, 0.0, 3.0, 8.0, 0.0, 3.0, 6.0, 6.0, 0.0, 3.0, 8.0, 14.0, 8.0, 0.0, 12.0, 5.0, 0.0, 3.0, 6.0, 16.0, 3.0, 9.0, 0.0, 8.0, 11.0, 3.0, 6.0, 11.0, 3.0, 16.0, 3.0, 0.0, 31.0, 23.0, 14.0, 14.0, 3.0, 11.0, 12.0, 0.0, 0.0, 0.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6987260941959224, "mean_inference_ms": 1.2293985973594628, "mean_action_processing_ms": 0.13473542591379423, "mean_env_wait_ms": 0.840520264494256, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 153600, "num_agent_steps_trained": 153600, "num_env_steps_sampled": 76800, "num_env_steps_trained": 76800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 76800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 153600, "timers": {"training_iteration_time_ms": 3871.15, "learn_time_ms": 1229.232, "learn_throughput": 10413.005, "synch_weights_time_ms": 21.063}, "counters": {"num_env_steps_sampled": 76800, "num_env_steps_trained": 76800, "num_agent_steps_sampled": 153600, "num_agent_steps_trained": 153600}, "done": false, "episodes_total": 192, "training_iteration": 6, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-18", "timestamp": 1666580418, "time_this_iter_s": 3.949986696243286, "time_total_s": 23.526593923568726, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 23.526593923568726, "timesteps_since_restore": 0, "iterations_since_restore": 6, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.883333333333336, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 0.2, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 11.08, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.11, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.53, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.36, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.8, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 3.55, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.97, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.46, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.5, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 1.07, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.02, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 4, "dish_pickup_agent_0_mean": 2.79, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.63, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.23, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 11, "dish_drop_agent_1_mean": 1.98, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 10, "useful_dish_drop_agent_0_mean": 0.96, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 1.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 8, "soup_pickup_agent_0_mean": 0.54, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.52, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 3, "soup_delivery_agent_0_mean": 0.13, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.13, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.3, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.32, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 1.07, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.02, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 4, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.07, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.02, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 4, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0031250000465661287, "cur_lr": 0.0010000000474974513, "total_loss": -0.1758706122636795, "policy_loss": -0.001237577642314136, "vf_loss": 0.8410797119140625, "vf_explained_var": 0.007221847772598267, "kl": 0.0006372901843860745, "entropy": 1.7854719161987305, "entropy_coeff": 0.09785600006580353, "model": {}}}}, "num_env_steps_sampled": 89600, "num_env_steps_trained": 89600, "num_agent_steps_sampled": 179200, "num_agent_steps_trained": 179200}, "sampler_results": {"episode_reward_max": 54.0, "episode_reward_min": 0.0, "episode_reward_mean": 11.48, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 31.0}, "policy_reward_mean": {"ppo": 5.74}, "custom_metrics": {"sparse_reward_mean": 0.2, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 11.08, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.11, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.53, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.36, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.8, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 3.55, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.97, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.46, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.5, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 1.07, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.02, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 4, "dish_pickup_agent_0_mean": 2.79, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.63, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.23, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 11, "dish_drop_agent_1_mean": 1.98, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 10, "useful_dish_drop_agent_0_mean": 0.96, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 1.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 8, "soup_pickup_agent_0_mean": 0.54, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.52, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 3, "soup_delivery_agent_0_mean": 0.13, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.13, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.3, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.32, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 1.07, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.02, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 4, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.07, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.02, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 4, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [0.0, 14.0, 3.0, 11.0, 3.0, 6.0, 3.0, 9.0, 8.0, 11.0, 14.0, 3.0, 17.0, 17.0, 3.0, 11.0, 6.0, 20.0, 11.0, 6.0, 9.0, 3.0, 3.0, 11.0, 11.0, 6.0, 6.0, 8.0, 11.0, 3.0, 12.0, 14.0, 3.0, 6.0, 11.0, 9.0, 14.0, 3.0, 17.0, 9.0, 20.0, 22.0, 20.0, 8.0, 8.0, 17.0, 11.0, 3.0, 11.0, 3.0, 12.0, 3.0, 22.0, 8.0, 17.0, 3.0, 22.0, 12.0, 8.0, 14.0, 17.0, 19.0, 3.0, 54.0, 28.0, 14.0, 12.0, 0.0, 11.0, 17.0, 26.0, 11.0, 3.0, 11.0, 8.0, 3.0, 14.0, 14.0, 19.0, 11.0, 11.0, 6.0, 3.0, 11.0, 8.0, 14.0, 11.0, 23.0, 17.0, 16.0, 25.0, 14.0, 8.0, 14.0, 20.0, 14.0, 3.0, 3.0, 17.0, 26.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [0.0, 0.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0, 3.0, 0.0, 6.0, 0.0, 0.0, 3.0, 6.0, 3.0, 0.0, 8.0, 5.0, 6.0, 9.0, 5.0, 0.0, 3.0, 6.0, 11.0, 11.0, 6.0, 0.0, 3.0, 3.0, 8.0, 6.0, 0.0, 14.0, 6.0, 8.0, 3.0, 0.0, 6.0, 3.0, 6.0, 3.0, 0.0, 0.0, 3.0, 3.0, 8.0, 3.0, 8.0, 0.0, 6.0, 3.0, 3.0, 8.0, 0.0, 0.0, 11.0, 0.0, 3.0, 6.0, 6.0, 5.0, 9.0, 3.0, 0.0, 6.0, 0.0, 11.0, 0.0, 6.0, 3.0, 8.0, 6.0, 0.0, 3.0, 6.0, 11.0, 3.0, 6.0, 14.0, 6.0, 8.0, 14.0, 12.0, 8.0, 0.0, 8.0, 5.0, 3.0, 8.0, 9.0, 11.0, 0.0, 3.0, 0.0, 3.0, 8.0, 0.0, 3.0, 6.0, 6.0, 0.0, 3.0, 8.0, 14.0, 8.0, 0.0, 12.0, 5.0, 0.0, 3.0, 6.0, 16.0, 3.0, 9.0, 0.0, 8.0, 11.0, 3.0, 6.0, 11.0, 3.0, 16.0, 3.0, 0.0, 31.0, 23.0, 14.0, 14.0, 3.0, 11.0, 12.0, 0.0, 0.0, 0.0, 6.0, 5.0, 6.0, 11.0, 12.0, 14.0, 3.0, 8.0, 0.0, 3.0, 3.0, 8.0, 8.0, 0.0, 0.0, 3.0, 9.0, 5.0, 3.0, 11.0, 16.0, 3.0, 8.0, 3.0, 6.0, 5.0, 6.0, 0.0, 3.0, 0.0, 8.0, 3.0, 3.0, 5.0, 11.0, 3.0, 5.0, 6.0, 15.0, 8.0, 8.0, 9.0, 8.0, 8.0, 11.0, 14.0, 5.0, 9.0, 0.0, 8.0, 11.0, 3.0, 11.0, 9.0, 9.0, 5.0, 3.0, 0.0, 0.0, 3.0, 6.0, 11.0, 11.0, 15.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.701101003459848, "mean_inference_ms": 1.2283407266417963, "mean_action_processing_ms": 0.13500840795895466, "mean_env_wait_ms": 0.8444517091174393, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 54.0, "episode_reward_min": 0.0, "episode_reward_mean": 11.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 31.0}, "policy_reward_mean": {"ppo": 5.74}, "hist_stats": {"episode_reward": [0.0, 14.0, 3.0, 11.0, 3.0, 6.0, 3.0, 9.0, 8.0, 11.0, 14.0, 3.0, 17.0, 17.0, 3.0, 11.0, 6.0, 20.0, 11.0, 6.0, 9.0, 3.0, 3.0, 11.0, 11.0, 6.0, 6.0, 8.0, 11.0, 3.0, 12.0, 14.0, 3.0, 6.0, 11.0, 9.0, 14.0, 3.0, 17.0, 9.0, 20.0, 22.0, 20.0, 8.0, 8.0, 17.0, 11.0, 3.0, 11.0, 3.0, 12.0, 3.0, 22.0, 8.0, 17.0, 3.0, 22.0, 12.0, 8.0, 14.0, 17.0, 19.0, 3.0, 54.0, 28.0, 14.0, 12.0, 0.0, 11.0, 17.0, 26.0, 11.0, 3.0, 11.0, 8.0, 3.0, 14.0, 14.0, 19.0, 11.0, 11.0, 6.0, 3.0, 11.0, 8.0, 14.0, 11.0, 23.0, 17.0, 16.0, 25.0, 14.0, 8.0, 14.0, 20.0, 14.0, 3.0, 3.0, 17.0, 26.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [0.0, 0.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0, 3.0, 0.0, 6.0, 0.0, 0.0, 3.0, 6.0, 3.0, 0.0, 8.0, 5.0, 6.0, 9.0, 5.0, 0.0, 3.0, 6.0, 11.0, 11.0, 6.0, 0.0, 3.0, 3.0, 8.0, 6.0, 0.0, 14.0, 6.0, 8.0, 3.0, 0.0, 6.0, 3.0, 6.0, 3.0, 0.0, 0.0, 3.0, 3.0, 8.0, 3.0, 8.0, 0.0, 6.0, 3.0, 3.0, 8.0, 0.0, 0.0, 11.0, 0.0, 3.0, 6.0, 6.0, 5.0, 9.0, 3.0, 0.0, 6.0, 0.0, 11.0, 0.0, 6.0, 3.0, 8.0, 6.0, 0.0, 3.0, 6.0, 11.0, 3.0, 6.0, 14.0, 6.0, 8.0, 14.0, 12.0, 8.0, 0.0, 8.0, 5.0, 3.0, 8.0, 9.0, 11.0, 0.0, 3.0, 0.0, 3.0, 8.0, 0.0, 3.0, 6.0, 6.0, 0.0, 3.0, 8.0, 14.0, 8.0, 0.0, 12.0, 5.0, 0.0, 3.0, 6.0, 16.0, 3.0, 9.0, 0.0, 8.0, 11.0, 3.0, 6.0, 11.0, 3.0, 16.0, 3.0, 0.0, 31.0, 23.0, 14.0, 14.0, 3.0, 11.0, 12.0, 0.0, 0.0, 0.0, 6.0, 5.0, 6.0, 11.0, 12.0, 14.0, 3.0, 8.0, 0.0, 3.0, 3.0, 8.0, 8.0, 0.0, 0.0, 3.0, 9.0, 5.0, 3.0, 11.0, 16.0, 3.0, 8.0, 3.0, 6.0, 5.0, 6.0, 0.0, 3.0, 0.0, 8.0, 3.0, 3.0, 5.0, 11.0, 3.0, 5.0, 6.0, 15.0, 8.0, 8.0, 9.0, 8.0, 8.0, 11.0, 14.0, 5.0, 9.0, 0.0, 8.0, 11.0, 3.0, 11.0, 9.0, 9.0, 5.0, 3.0, 0.0, 0.0, 3.0, 6.0, 11.0, 11.0, 15.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.701101003459848, "mean_inference_ms": 1.2283407266417963, "mean_action_processing_ms": 0.13500840795895466, "mean_env_wait_ms": 0.8444517091174393, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 179200, "num_agent_steps_trained": 179200, "num_env_steps_sampled": 89600, "num_env_steps_trained": 89600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 89600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 179200, "timers": {"training_iteration_time_ms": 3869.742, "learn_time_ms": 1221.337, "learn_throughput": 10480.315, "synch_weights_time_ms": 19.85}, "counters": {"num_env_steps_sampled": 89600, "num_env_steps_trained": 89600, "num_agent_steps_sampled": 179200, "num_agent_steps_trained": 179200}, "done": false, "episodes_total": 224, "training_iteration": 7, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-22", "timestamp": 1666580422, "time_this_iter_s": 3.916262149810791, "time_total_s": 27.442856073379517, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 27.442856073379517, "timesteps_since_restore": 0, "iterations_since_restore": 7, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 22.03333333333333, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 0.2, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 12.78, "shaped_reward_min": 0, "shaped_reward_max": 33, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.96, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.41, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.43, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.92, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 3.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.75, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 1.24, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.12, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 4, "dish_pickup_agent_0_mean": 3.17, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.8, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.26, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 11, "dish_drop_agent_1_mean": 2.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 10, "useful_dish_drop_agent_0_mean": 0.96, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 1.13, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 8, "soup_pickup_agent_0_mean": 0.56, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.62, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 3, "soup_delivery_agent_0_mean": 0.19, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.14, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.25, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.38, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 1.24, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.12, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 4, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.24, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.12, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 4, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0015625000232830644, "cur_lr": 0.0010000000474974513, "total_loss": -0.14512300491333008, "policy_loss": -0.0010335970437154174, "vf_loss": 0.9326763153076172, "vf_explained_var": 0.009893229231238365, "kl": 0.0006305932183749974, "entropy": 1.7837448120117188, "entropy_coeff": 0.08083199709653854, "model": {}}}}, "num_env_steps_sampled": 102400, "num_env_steps_trained": 102400, "num_agent_steps_sampled": 204800, "num_agent_steps_trained": 204800}, "sampler_results": {"episode_reward_max": 54.0, "episode_reward_min": 0.0, "episode_reward_mean": 13.18, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 31.0}, "policy_reward_mean": {"ppo": 6.59}, "custom_metrics": {"sparse_reward_mean": 0.2, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 12.78, "shaped_reward_min": 0, "shaped_reward_max": 33, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.96, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.41, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.43, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.92, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 3.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.75, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 1.24, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.12, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 4, "dish_pickup_agent_0_mean": 3.17, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.8, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.26, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 11, "dish_drop_agent_1_mean": 2.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 10, "useful_dish_drop_agent_0_mean": 0.96, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 1.13, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 8, "soup_pickup_agent_0_mean": 0.56, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.62, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 3, "soup_delivery_agent_0_mean": 0.19, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.14, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.25, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.38, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 1.24, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.12, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 4, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.24, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.12, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 4, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [3.0, 6.0, 11.0, 9.0, 14.0, 3.0, 17.0, 9.0, 20.0, 22.0, 20.0, 8.0, 8.0, 17.0, 11.0, 3.0, 11.0, 3.0, 12.0, 3.0, 22.0, 8.0, 17.0, 3.0, 22.0, 12.0, 8.0, 14.0, 17.0, 19.0, 3.0, 54.0, 28.0, 14.0, 12.0, 0.0, 11.0, 17.0, 26.0, 11.0, 3.0, 11.0, 8.0, 3.0, 14.0, 14.0, 19.0, 11.0, 11.0, 6.0, 3.0, 11.0, 8.0, 14.0, 11.0, 23.0, 17.0, 16.0, 25.0, 14.0, 8.0, 14.0, 20.0, 14.0, 3.0, 3.0, 17.0, 26.0, 14.0, 17.0, 17.0, 14.0, 17.0, 33.0, 24.0, 17.0, 6.0, 0.0, 11.0, 14.0, 20.0, 11.0, 25.0, 11.0, 9.0, 14.0, 14.0, 14.0, 9.0, 17.0, 14.0, 11.0, 14.0, 6.0, 14.0, 16.0, 6.0, 17.0, 14.0, 3.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 0.0, 6.0, 0.0, 11.0, 0.0, 6.0, 3.0, 8.0, 6.0, 0.0, 3.0, 6.0, 11.0, 3.0, 6.0, 14.0, 6.0, 8.0, 14.0, 12.0, 8.0, 0.0, 8.0, 5.0, 3.0, 8.0, 9.0, 11.0, 0.0, 3.0, 0.0, 3.0, 8.0, 0.0, 3.0, 6.0, 6.0, 0.0, 3.0, 8.0, 14.0, 8.0, 0.0, 12.0, 5.0, 0.0, 3.0, 6.0, 16.0, 3.0, 9.0, 0.0, 8.0, 11.0, 3.0, 6.0, 11.0, 3.0, 16.0, 3.0, 0.0, 31.0, 23.0, 14.0, 14.0, 3.0, 11.0, 12.0, 0.0, 0.0, 0.0, 6.0, 5.0, 6.0, 11.0, 12.0, 14.0, 3.0, 8.0, 0.0, 3.0, 3.0, 8.0, 8.0, 0.0, 0.0, 3.0, 9.0, 5.0, 3.0, 11.0, 16.0, 3.0, 8.0, 3.0, 6.0, 5.0, 6.0, 0.0, 3.0, 0.0, 8.0, 3.0, 3.0, 5.0, 11.0, 3.0, 5.0, 6.0, 15.0, 8.0, 8.0, 9.0, 8.0, 8.0, 11.0, 14.0, 5.0, 9.0, 0.0, 8.0, 11.0, 3.0, 11.0, 9.0, 9.0, 5.0, 3.0, 0.0, 0.0, 3.0, 6.0, 11.0, 11.0, 15.0, 14.0, 0.0, 11.0, 6.0, 6.0, 11.0, 8.0, 6.0, 6.0, 11.0, 24.0, 9.0, 11.0, 13.0, 14.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0, 11.0, 6.0, 8.0, 12.0, 8.0, 3.0, 8.0, 11.0, 14.0, 8.0, 3.0, 6.0, 3.0, 9.0, 5.0, 3.0, 11.0, 8.0, 6.0, 6.0, 3.0, 3.0, 14.0, 11.0, 3.0, 6.0, 5.0, 11.0, 3.0, 0.0, 6.0, 6.0, 8.0, 6.0, 10.0, 0.0, 6.0, 8.0, 9.0, 8.0, 6.0, 0.0, 3.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7037344718234254, "mean_inference_ms": 1.2277625253300022, "mean_action_processing_ms": 0.13515547152110657, "mean_env_wait_ms": 0.8473578368718657, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 54.0, "episode_reward_min": 0.0, "episode_reward_mean": 13.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 31.0}, "policy_reward_mean": {"ppo": 6.59}, "hist_stats": {"episode_reward": [3.0, 6.0, 11.0, 9.0, 14.0, 3.0, 17.0, 9.0, 20.0, 22.0, 20.0, 8.0, 8.0, 17.0, 11.0, 3.0, 11.0, 3.0, 12.0, 3.0, 22.0, 8.0, 17.0, 3.0, 22.0, 12.0, 8.0, 14.0, 17.0, 19.0, 3.0, 54.0, 28.0, 14.0, 12.0, 0.0, 11.0, 17.0, 26.0, 11.0, 3.0, 11.0, 8.0, 3.0, 14.0, 14.0, 19.0, 11.0, 11.0, 6.0, 3.0, 11.0, 8.0, 14.0, 11.0, 23.0, 17.0, 16.0, 25.0, 14.0, 8.0, 14.0, 20.0, 14.0, 3.0, 3.0, 17.0, 26.0, 14.0, 17.0, 17.0, 14.0, 17.0, 33.0, 24.0, 17.0, 6.0, 0.0, 11.0, 14.0, 20.0, 11.0, 25.0, 11.0, 9.0, 14.0, 14.0, 14.0, 9.0, 17.0, 14.0, 11.0, 14.0, 6.0, 14.0, 16.0, 6.0, 17.0, 14.0, 3.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 0.0, 6.0, 0.0, 11.0, 0.0, 6.0, 3.0, 8.0, 6.0, 0.0, 3.0, 6.0, 11.0, 3.0, 6.0, 14.0, 6.0, 8.0, 14.0, 12.0, 8.0, 0.0, 8.0, 5.0, 3.0, 8.0, 9.0, 11.0, 0.0, 3.0, 0.0, 3.0, 8.0, 0.0, 3.0, 6.0, 6.0, 0.0, 3.0, 8.0, 14.0, 8.0, 0.0, 12.0, 5.0, 0.0, 3.0, 6.0, 16.0, 3.0, 9.0, 0.0, 8.0, 11.0, 3.0, 6.0, 11.0, 3.0, 16.0, 3.0, 0.0, 31.0, 23.0, 14.0, 14.0, 3.0, 11.0, 12.0, 0.0, 0.0, 0.0, 6.0, 5.0, 6.0, 11.0, 12.0, 14.0, 3.0, 8.0, 0.0, 3.0, 3.0, 8.0, 8.0, 0.0, 0.0, 3.0, 9.0, 5.0, 3.0, 11.0, 16.0, 3.0, 8.0, 3.0, 6.0, 5.0, 6.0, 0.0, 3.0, 0.0, 8.0, 3.0, 3.0, 5.0, 11.0, 3.0, 5.0, 6.0, 15.0, 8.0, 8.0, 9.0, 8.0, 8.0, 11.0, 14.0, 5.0, 9.0, 0.0, 8.0, 11.0, 3.0, 11.0, 9.0, 9.0, 5.0, 3.0, 0.0, 0.0, 3.0, 6.0, 11.0, 11.0, 15.0, 14.0, 0.0, 11.0, 6.0, 6.0, 11.0, 8.0, 6.0, 6.0, 11.0, 24.0, 9.0, 11.0, 13.0, 14.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0, 11.0, 6.0, 8.0, 12.0, 8.0, 3.0, 8.0, 11.0, 14.0, 8.0, 3.0, 6.0, 3.0, 9.0, 5.0, 3.0, 11.0, 8.0, 6.0, 6.0, 3.0, 3.0, 14.0, 11.0, 3.0, 6.0, 5.0, 11.0, 3.0, 0.0, 6.0, 6.0, 8.0, 6.0, 10.0, 0.0, 6.0, 8.0, 9.0, 8.0, 6.0, 0.0, 3.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7037344718234254, "mean_inference_ms": 1.2277625253300022, "mean_action_processing_ms": 0.13515547152110657, "mean_env_wait_ms": 0.8473578368718657, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 204800, "num_agent_steps_trained": 204800, "num_env_steps_sampled": 102400, "num_env_steps_trained": 102400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 102400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 204800, "timers": {"training_iteration_time_ms": 3844.573, "learn_time_ms": 1202.47, "learn_throughput": 10644.757, "synch_weights_time_ms": 18.757}, "counters": {"num_env_steps_sampled": 102400, "num_env_steps_trained": 102400, "num_agent_steps_sampled": 204800, "num_agent_steps_trained": 204800}, "done": false, "episodes_total": 256, "training_iteration": 8, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-26", "timestamp": 1666580426, "time_this_iter_s": 3.7250826358795166, "time_total_s": 31.167938709259033, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 31.167938709259033, "timesteps_since_restore": 0, "iterations_since_restore": 8, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.683333333333334, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 0.2, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 13.53, "shaped_reward_min": 0, "shaped_reward_max": 33, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.95, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.16, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.47, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 3.64, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 3.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.3, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.18, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 3.15, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.69, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.25, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.18, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.88, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.87, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 0.6, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.74, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.18, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.12, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.28, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.5, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.3, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.18, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.3, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.18, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0007812500116415322, "cur_lr": 0.0010000000474974513, "total_loss": -0.11943156272172928, "policy_loss": -0.00582084758207202, "vf_loss": 0.9689017534255981, "vf_explained_var": 0.007223993539810181, "kl": 0.0006363544380292296, "entropy": 1.7820351123809814, "entropy_coeff": 0.06380800157785416, "model": {}}}}, "num_env_steps_sampled": 115200, "num_env_steps_trained": 115200, "num_agent_steps_sampled": 230400, "num_agent_steps_trained": 230400}, "sampler_results": {"episode_reward_max": 57.0, "episode_reward_min": 0.0, "episode_reward_mean": 13.93, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 31.0}, "policy_reward_mean": {"ppo": 6.965}, "custom_metrics": {"sparse_reward_mean": 0.2, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 13.53, "shaped_reward_min": 0, "shaped_reward_max": 33, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.95, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.16, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.47, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 3.64, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 3.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.3, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.18, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 3.15, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.69, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.25, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.18, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.88, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.87, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 0.6, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.74, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.18, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.12, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.28, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.5, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.3, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.18, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.3, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.18, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [28.0, 14.0, 12.0, 0.0, 11.0, 17.0, 26.0, 11.0, 3.0, 11.0, 8.0, 3.0, 14.0, 14.0, 19.0, 11.0, 11.0, 6.0, 3.0, 11.0, 8.0, 14.0, 11.0, 23.0, 17.0, 16.0, 25.0, 14.0, 8.0, 14.0, 20.0, 14.0, 3.0, 3.0, 17.0, 26.0, 14.0, 17.0, 17.0, 14.0, 17.0, 33.0, 24.0, 17.0, 6.0, 0.0, 11.0, 14.0, 20.0, 11.0, 25.0, 11.0, 9.0, 14.0, 14.0, 14.0, 9.0, 17.0, 14.0, 11.0, 14.0, 6.0, 14.0, 16.0, 6.0, 17.0, 14.0, 3.0, 3.0, 14.0, 16.0, 11.0, 14.0, 16.0, 22.0, 3.0, 26.0, 3.0, 20.0, 57.0, 14.0, 27.0, 3.0, 6.0, 11.0, 3.0, 20.0, 9.0, 16.0, 19.0, 17.0, 22.0, 14.0, 25.0, 11.0, 14.0, 20.0, 11.0, 3.0, 14.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [14.0, 14.0, 3.0, 11.0, 12.0, 0.0, 0.0, 0.0, 6.0, 5.0, 6.0, 11.0, 12.0, 14.0, 3.0, 8.0, 0.0, 3.0, 3.0, 8.0, 8.0, 0.0, 0.0, 3.0, 9.0, 5.0, 3.0, 11.0, 16.0, 3.0, 8.0, 3.0, 6.0, 5.0, 6.0, 0.0, 3.0, 0.0, 8.0, 3.0, 3.0, 5.0, 11.0, 3.0, 5.0, 6.0, 15.0, 8.0, 8.0, 9.0, 8.0, 8.0, 11.0, 14.0, 5.0, 9.0, 0.0, 8.0, 11.0, 3.0, 11.0, 9.0, 9.0, 5.0, 3.0, 0.0, 0.0, 3.0, 6.0, 11.0, 11.0, 15.0, 14.0, 0.0, 11.0, 6.0, 6.0, 11.0, 8.0, 6.0, 6.0, 11.0, 24.0, 9.0, 11.0, 13.0, 14.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0, 11.0, 6.0, 8.0, 12.0, 8.0, 3.0, 8.0, 11.0, 14.0, 8.0, 3.0, 6.0, 3.0, 9.0, 5.0, 3.0, 11.0, 8.0, 6.0, 6.0, 3.0, 3.0, 14.0, 11.0, 3.0, 6.0, 5.0, 11.0, 3.0, 0.0, 6.0, 6.0, 8.0, 6.0, 10.0, 0.0, 6.0, 8.0, 9.0, 8.0, 6.0, 0.0, 3.0, 0.0, 3.0, 8.0, 6.0, 13.0, 3.0, 6.0, 5.0, 8.0, 6.0, 3.0, 13.0, 8.0, 14.0, 0.0, 3.0, 11.0, 15.0, 0.0, 3.0, 9.0, 11.0, 26.0, 31.0, 3.0, 11.0, 13.0, 14.0, 3.0, 0.0, 0.0, 6.0, 11.0, 0.0, 3.0, 0.0, 12.0, 8.0, 9.0, 0.0, 3.0, 13.0, 11.0, 8.0, 14.0, 3.0, 6.0, 16.0, 3.0, 11.0, 12.0, 13.0, 3.0, 8.0, 3.0, 11.0, 11.0, 9.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.704873639212576, "mean_inference_ms": 1.227418086105394, "mean_action_processing_ms": 0.13525291976515277, "mean_env_wait_ms": 0.8505155902591237, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 57.0, "episode_reward_min": 0.0, "episode_reward_mean": 13.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 31.0}, "policy_reward_mean": {"ppo": 6.965}, "hist_stats": {"episode_reward": [28.0, 14.0, 12.0, 0.0, 11.0, 17.0, 26.0, 11.0, 3.0, 11.0, 8.0, 3.0, 14.0, 14.0, 19.0, 11.0, 11.0, 6.0, 3.0, 11.0, 8.0, 14.0, 11.0, 23.0, 17.0, 16.0, 25.0, 14.0, 8.0, 14.0, 20.0, 14.0, 3.0, 3.0, 17.0, 26.0, 14.0, 17.0, 17.0, 14.0, 17.0, 33.0, 24.0, 17.0, 6.0, 0.0, 11.0, 14.0, 20.0, 11.0, 25.0, 11.0, 9.0, 14.0, 14.0, 14.0, 9.0, 17.0, 14.0, 11.0, 14.0, 6.0, 14.0, 16.0, 6.0, 17.0, 14.0, 3.0, 3.0, 14.0, 16.0, 11.0, 14.0, 16.0, 22.0, 3.0, 26.0, 3.0, 20.0, 57.0, 14.0, 27.0, 3.0, 6.0, 11.0, 3.0, 20.0, 9.0, 16.0, 19.0, 17.0, 22.0, 14.0, 25.0, 11.0, 14.0, 20.0, 11.0, 3.0, 14.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [14.0, 14.0, 3.0, 11.0, 12.0, 0.0, 0.0, 0.0, 6.0, 5.0, 6.0, 11.0, 12.0, 14.0, 3.0, 8.0, 0.0, 3.0, 3.0, 8.0, 8.0, 0.0, 0.0, 3.0, 9.0, 5.0, 3.0, 11.0, 16.0, 3.0, 8.0, 3.0, 6.0, 5.0, 6.0, 0.0, 3.0, 0.0, 8.0, 3.0, 3.0, 5.0, 11.0, 3.0, 5.0, 6.0, 15.0, 8.0, 8.0, 9.0, 8.0, 8.0, 11.0, 14.0, 5.0, 9.0, 0.0, 8.0, 11.0, 3.0, 11.0, 9.0, 9.0, 5.0, 3.0, 0.0, 0.0, 3.0, 6.0, 11.0, 11.0, 15.0, 14.0, 0.0, 11.0, 6.0, 6.0, 11.0, 8.0, 6.0, 6.0, 11.0, 24.0, 9.0, 11.0, 13.0, 14.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0, 11.0, 6.0, 8.0, 12.0, 8.0, 3.0, 8.0, 11.0, 14.0, 8.0, 3.0, 6.0, 3.0, 9.0, 5.0, 3.0, 11.0, 8.0, 6.0, 6.0, 3.0, 3.0, 14.0, 11.0, 3.0, 6.0, 5.0, 11.0, 3.0, 0.0, 6.0, 6.0, 8.0, 6.0, 10.0, 0.0, 6.0, 8.0, 9.0, 8.0, 6.0, 0.0, 3.0, 0.0, 3.0, 8.0, 6.0, 13.0, 3.0, 6.0, 5.0, 8.0, 6.0, 3.0, 13.0, 8.0, 14.0, 0.0, 3.0, 11.0, 15.0, 0.0, 3.0, 9.0, 11.0, 26.0, 31.0, 3.0, 11.0, 13.0, 14.0, 3.0, 0.0, 0.0, 6.0, 11.0, 0.0, 3.0, 0.0, 12.0, 8.0, 9.0, 0.0, 3.0, 13.0, 11.0, 8.0, 14.0, 3.0, 6.0, 16.0, 3.0, 11.0, 12.0, 13.0, 3.0, 8.0, 3.0, 11.0, 11.0, 9.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.704873639212576, "mean_inference_ms": 1.227418086105394, "mean_action_processing_ms": 0.13525291976515277, "mean_env_wait_ms": 0.8505155902591237, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 230400, "num_agent_steps_trained": 230400, "num_env_steps_sampled": 115200, "num_env_steps_trained": 115200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 115200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 230400, "timers": {"training_iteration_time_ms": 3837.567, "learn_time_ms": 1197.115, "learn_throughput": 10692.377, "synch_weights_time_ms": 18.365}, "counters": {"num_env_steps_sampled": 115200, "num_env_steps_trained": 115200, "num_agent_steps_sampled": 230400, "num_agent_steps_trained": 230400}, "done": false, "episodes_total": 288, "training_iteration": 9, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-30", "timestamp": 1666580430, "time_this_iter_s": 3.8528952598571777, "time_total_s": 35.02083396911621, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 35.02083396911621, "timesteps_since_restore": 0, "iterations_since_restore": 9, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.45, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 0.4, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 13.91, "shaped_reward_min": 0, "shaped_reward_max": 36, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.03, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.25, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.37, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 3.53, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 3.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.47, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.33, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.28, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 3.12, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 2.64, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.23, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.51, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 1.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.73, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 0.6, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 0.81, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.18, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.33, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.51, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.33, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.28, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.33, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.28, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0003906250058207661, "cur_lr": 0.0010000000474974513, "total_loss": -0.08926716446876526, "policy_loss": -0.006043273024260998, "vf_loss": 1.001132607460022, "vf_explained_var": 0.03307230770587921, "kl": 0.0006396523676812649, "entropy": 1.7810418605804443, "entropy_coeff": 0.04678399860858917, "model": {}}}}, "num_env_steps_sampled": 128000, "num_env_steps_trained": 128000, "num_agent_steps_sampled": 256000, "num_agent_steps_trained": 256000}, "sampler_results": {"episode_reward_max": 60.0, "episode_reward_min": 0.0, "episode_reward_mean": 14.71, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 35.0}, "policy_reward_mean": {"ppo": 7.355}, "custom_metrics": {"sparse_reward_mean": 0.4, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 13.91, "shaped_reward_min": 0, "shaped_reward_max": 36, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.03, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.25, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.37, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 3.53, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 3.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.47, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.33, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.28, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 3.12, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 2.64, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.23, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.51, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 1.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.73, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 0.6, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 0.81, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.18, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.33, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.51, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.33, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.28, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.33, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.28, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [3.0, 3.0, 17.0, 26.0, 14.0, 17.0, 17.0, 14.0, 17.0, 33.0, 24.0, 17.0, 6.0, 0.0, 11.0, 14.0, 20.0, 11.0, 25.0, 11.0, 9.0, 14.0, 14.0, 14.0, 9.0, 17.0, 14.0, 11.0, 14.0, 6.0, 14.0, 16.0, 6.0, 17.0, 14.0, 3.0, 3.0, 14.0, 16.0, 11.0, 14.0, 16.0, 22.0, 3.0, 26.0, 3.0, 20.0, 57.0, 14.0, 27.0, 3.0, 6.0, 11.0, 3.0, 20.0, 9.0, 16.0, 19.0, 17.0, 22.0, 14.0, 25.0, 11.0, 14.0, 20.0, 11.0, 3.0, 14.0, 6.0, 20.0, 25.0, 9.0, 11.0, 25.0, 20.0, 3.0, 31.0, 60.0, 17.0, 17.0, 3.0, 9.0, 6.0, 11.0, 8.0, 14.0, 25.0, 11.0, 6.0, 9.0, 14.0, 17.0, 22.0, 22.0, 11.0, 6.0, 9.0, 3.0, 36.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 0.0, 0.0, 3.0, 6.0, 11.0, 11.0, 15.0, 14.0, 0.0, 11.0, 6.0, 6.0, 11.0, 8.0, 6.0, 6.0, 11.0, 24.0, 9.0, 11.0, 13.0, 14.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0, 11.0, 6.0, 8.0, 12.0, 8.0, 3.0, 8.0, 11.0, 14.0, 8.0, 3.0, 6.0, 3.0, 9.0, 5.0, 3.0, 11.0, 8.0, 6.0, 6.0, 3.0, 3.0, 14.0, 11.0, 3.0, 6.0, 5.0, 11.0, 3.0, 0.0, 6.0, 6.0, 8.0, 6.0, 10.0, 0.0, 6.0, 8.0, 9.0, 8.0, 6.0, 0.0, 3.0, 0.0, 3.0, 8.0, 6.0, 13.0, 3.0, 6.0, 5.0, 8.0, 6.0, 3.0, 13.0, 8.0, 14.0, 0.0, 3.0, 11.0, 15.0, 0.0, 3.0, 9.0, 11.0, 26.0, 31.0, 3.0, 11.0, 13.0, 14.0, 3.0, 0.0, 0.0, 6.0, 11.0, 0.0, 3.0, 0.0, 12.0, 8.0, 9.0, 0.0, 3.0, 13.0, 11.0, 8.0, 14.0, 3.0, 6.0, 16.0, 3.0, 11.0, 12.0, 13.0, 3.0, 8.0, 3.0, 11.0, 11.0, 9.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0, 3.0, 3.0, 6.0, 14.0, 17.0, 8.0, 6.0, 3.0, 5.0, 6.0, 17.0, 8.0, 12.0, 8.0, 3.0, 0.0, 17.0, 14.0, 25.0, 35.0, 0.0, 17.0, 6.0, 11.0, 0.0, 3.0, 9.0, 0.0, 3.0, 3.0, 3.0, 8.0, 5.0, 3.0, 8.0, 6.0, 9.0, 16.0, 8.0, 3.0, 0.0, 6.0, 3.0, 6.0, 9.0, 5.0, 9.0, 8.0, 13.0, 9.0, 6.0, 16.0, 3.0, 8.0, 0.0, 6.0, 6.0, 3.0, 3.0, 0.0, 19.0, 17.0, 3.0, 6.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.704291722769332, "mean_inference_ms": 1.226455267948142, "mean_action_processing_ms": 0.13521600805286188, "mean_env_wait_ms": 0.8528073075048549, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 60.0, "episode_reward_min": 0.0, "episode_reward_mean": 14.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 35.0}, "policy_reward_mean": {"ppo": 7.355}, "hist_stats": {"episode_reward": [3.0, 3.0, 17.0, 26.0, 14.0, 17.0, 17.0, 14.0, 17.0, 33.0, 24.0, 17.0, 6.0, 0.0, 11.0, 14.0, 20.0, 11.0, 25.0, 11.0, 9.0, 14.0, 14.0, 14.0, 9.0, 17.0, 14.0, 11.0, 14.0, 6.0, 14.0, 16.0, 6.0, 17.0, 14.0, 3.0, 3.0, 14.0, 16.0, 11.0, 14.0, 16.0, 22.0, 3.0, 26.0, 3.0, 20.0, 57.0, 14.0, 27.0, 3.0, 6.0, 11.0, 3.0, 20.0, 9.0, 16.0, 19.0, 17.0, 22.0, 14.0, 25.0, 11.0, 14.0, 20.0, 11.0, 3.0, 14.0, 6.0, 20.0, 25.0, 9.0, 11.0, 25.0, 20.0, 3.0, 31.0, 60.0, 17.0, 17.0, 3.0, 9.0, 6.0, 11.0, 8.0, 14.0, 25.0, 11.0, 6.0, 9.0, 14.0, 17.0, 22.0, 22.0, 11.0, 6.0, 9.0, 3.0, 36.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 0.0, 0.0, 3.0, 6.0, 11.0, 11.0, 15.0, 14.0, 0.0, 11.0, 6.0, 6.0, 11.0, 8.0, 6.0, 6.0, 11.0, 24.0, 9.0, 11.0, 13.0, 14.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0, 11.0, 6.0, 8.0, 12.0, 8.0, 3.0, 8.0, 11.0, 14.0, 8.0, 3.0, 6.0, 3.0, 9.0, 5.0, 3.0, 11.0, 8.0, 6.0, 6.0, 3.0, 3.0, 14.0, 11.0, 3.0, 6.0, 5.0, 11.0, 3.0, 0.0, 6.0, 6.0, 8.0, 6.0, 10.0, 0.0, 6.0, 8.0, 9.0, 8.0, 6.0, 0.0, 3.0, 0.0, 3.0, 8.0, 6.0, 13.0, 3.0, 6.0, 5.0, 8.0, 6.0, 3.0, 13.0, 8.0, 14.0, 0.0, 3.0, 11.0, 15.0, 0.0, 3.0, 9.0, 11.0, 26.0, 31.0, 3.0, 11.0, 13.0, 14.0, 3.0, 0.0, 0.0, 6.0, 11.0, 0.0, 3.0, 0.0, 12.0, 8.0, 9.0, 0.0, 3.0, 13.0, 11.0, 8.0, 14.0, 3.0, 6.0, 16.0, 3.0, 11.0, 12.0, 13.0, 3.0, 8.0, 3.0, 11.0, 11.0, 9.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0, 3.0, 3.0, 6.0, 14.0, 17.0, 8.0, 6.0, 3.0, 5.0, 6.0, 17.0, 8.0, 12.0, 8.0, 3.0, 0.0, 17.0, 14.0, 25.0, 35.0, 0.0, 17.0, 6.0, 11.0, 0.0, 3.0, 9.0, 0.0, 3.0, 3.0, 3.0, 8.0, 5.0, 3.0, 8.0, 6.0, 9.0, 16.0, 8.0, 3.0, 0.0, 6.0, 3.0, 6.0, 9.0, 5.0, 9.0, 8.0, 13.0, 9.0, 6.0, 16.0, 3.0, 8.0, 0.0, 6.0, 6.0, 3.0, 3.0, 0.0, 19.0, 17.0, 3.0, 6.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.704291722769332, "mean_inference_ms": 1.226455267948142, "mean_action_processing_ms": 0.13521600805286188, "mean_env_wait_ms": 0.8528073075048549, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 256000, "num_agent_steps_trained": 256000, "num_env_steps_sampled": 128000, "num_env_steps_trained": 128000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 128000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 256000, "timers": {"training_iteration_time_ms": 3826.008, "learn_time_ms": 1193.503, "learn_throughput": 10724.728, "synch_weights_time_ms": 18.438}, "counters": {"num_env_steps_sampled": 128000, "num_env_steps_trained": 128000, "num_agent_steps_sampled": 256000, "num_agent_steps_trained": 256000}, "done": false, "episodes_total": 320, "training_iteration": 10, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-34", "timestamp": 1666580434, "time_this_iter_s": 3.78620982170105, "time_total_s": 38.80704379081726, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 38.80704379081726, "timesteps_since_restore": 0, "iterations_since_restore": 10, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.04, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 0.6, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 14.8, "shaped_reward_min": 3, "shaped_reward_max": 36, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.04, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 5.17, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.39, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 3.48, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 3.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 3.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.45, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.32, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.42, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 3.02, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 2.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.26, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.83, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 0.77, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 0.81, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.23, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.49, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.47, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.32, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.42, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.32, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.42, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.00019531250291038305, "cur_lr": 0.0010000000474974513, "total_loss": -0.056794971227645874, "policy_loss": -0.003975650295615196, "vf_loss": 1.1292062997817993, "vf_explained_var": 0.03024188242852688, "kl": 0.0006997665041126311, "entropy": 1.7786418199539185, "entropy_coeff": 0.029759999364614487, "model": {}}}}, "num_env_steps_sampled": 140800, "num_env_steps_trained": 140800, "num_agent_steps_sampled": 281600, "num_agent_steps_trained": 281600}, "sampler_results": {"episode_reward_max": 65.0, "episode_reward_min": 3.0, "episode_reward_mean": 16.0, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 39.0}, "policy_reward_mean": {"ppo": 8.0}, "custom_metrics": {"sparse_reward_mean": 0.6, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 14.8, "shaped_reward_min": 3, "shaped_reward_max": 36, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.04, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 5.17, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.39, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 3.48, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 3.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 3.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.45, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.32, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.42, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 3.02, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 2.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.26, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.83, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 0.77, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 0.81, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.23, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.49, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.47, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.32, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.42, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.32, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.42, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [6.0, 17.0, 14.0, 3.0, 3.0, 14.0, 16.0, 11.0, 14.0, 16.0, 22.0, 3.0, 26.0, 3.0, 20.0, 57.0, 14.0, 27.0, 3.0, 6.0, 11.0, 3.0, 20.0, 9.0, 16.0, 19.0, 17.0, 22.0, 14.0, 25.0, 11.0, 14.0, 20.0, 11.0, 3.0, 14.0, 6.0, 20.0, 25.0, 9.0, 11.0, 25.0, 20.0, 3.0, 31.0, 60.0, 17.0, 17.0, 3.0, 9.0, 6.0, 11.0, 8.0, 14.0, 25.0, 11.0, 6.0, 9.0, 14.0, 17.0, 22.0, 22.0, 11.0, 6.0, 9.0, 3.0, 36.0, 9.0, 20.0, 14.0, 6.0, 9.0, 14.0, 14.0, 25.0, 14.0, 20.0, 3.0, 25.0, 6.0, 28.0, 11.0, 16.0, 22.0, 11.0, 9.0, 17.0, 9.0, 11.0, 25.0, 17.0, 25.0, 14.0, 65.0, 25.0, 31.0, 11.0, 14.0, 17.0, 33.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [0.0, 6.0, 8.0, 9.0, 8.0, 6.0, 0.0, 3.0, 0.0, 3.0, 8.0, 6.0, 13.0, 3.0, 6.0, 5.0, 8.0, 6.0, 3.0, 13.0, 8.0, 14.0, 0.0, 3.0, 11.0, 15.0, 0.0, 3.0, 9.0, 11.0, 26.0, 31.0, 3.0, 11.0, 13.0, 14.0, 3.0, 0.0, 0.0, 6.0, 11.0, 0.0, 3.0, 0.0, 12.0, 8.0, 9.0, 0.0, 3.0, 13.0, 11.0, 8.0, 14.0, 3.0, 6.0, 16.0, 3.0, 11.0, 12.0, 13.0, 3.0, 8.0, 3.0, 11.0, 11.0, 9.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0, 3.0, 3.0, 6.0, 14.0, 17.0, 8.0, 6.0, 3.0, 5.0, 6.0, 17.0, 8.0, 12.0, 8.0, 3.0, 0.0, 17.0, 14.0, 25.0, 35.0, 0.0, 17.0, 6.0, 11.0, 0.0, 3.0, 9.0, 0.0, 3.0, 3.0, 3.0, 8.0, 5.0, 3.0, 8.0, 6.0, 9.0, 16.0, 8.0, 3.0, 0.0, 6.0, 3.0, 6.0, 9.0, 5.0, 9.0, 8.0, 13.0, 9.0, 6.0, 16.0, 3.0, 8.0, 0.0, 6.0, 6.0, 3.0, 3.0, 0.0, 19.0, 17.0, 3.0, 6.0, 11.0, 9.0, 9.0, 5.0, 6.0, 0.0, 0.0, 9.0, 11.0, 3.0, 8.0, 6.0, 16.0, 9.0, 6.0, 8.0, 8.0, 12.0, 3.0, 0.0, 14.0, 11.0, 6.0, 0.0, 11.0, 17.0, 8.0, 3.0, 8.0, 8.0, 13.0, 9.0, 6.0, 5.0, 6.0, 3.0, 8.0, 9.0, 3.0, 6.0, 3.0, 8.0, 11.0, 14.0, 3.0, 14.0, 11.0, 14.0, 8.0, 6.0, 39.0, 26.0, 6.0, 19.0, 12.0, 19.0, 3.0, 8.0, 8.0, 6.0, 9.0, 8.0, 19.0, 14.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7029510683441907, "mean_inference_ms": 1.2249036275862428, "mean_action_processing_ms": 0.13510709218572114, "mean_env_wait_ms": 0.854133319135467, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 65.0, "episode_reward_min": 3.0, "episode_reward_mean": 16.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 39.0}, "policy_reward_mean": {"ppo": 8.0}, "hist_stats": {"episode_reward": [6.0, 17.0, 14.0, 3.0, 3.0, 14.0, 16.0, 11.0, 14.0, 16.0, 22.0, 3.0, 26.0, 3.0, 20.0, 57.0, 14.0, 27.0, 3.0, 6.0, 11.0, 3.0, 20.0, 9.0, 16.0, 19.0, 17.0, 22.0, 14.0, 25.0, 11.0, 14.0, 20.0, 11.0, 3.0, 14.0, 6.0, 20.0, 25.0, 9.0, 11.0, 25.0, 20.0, 3.0, 31.0, 60.0, 17.0, 17.0, 3.0, 9.0, 6.0, 11.0, 8.0, 14.0, 25.0, 11.0, 6.0, 9.0, 14.0, 17.0, 22.0, 22.0, 11.0, 6.0, 9.0, 3.0, 36.0, 9.0, 20.0, 14.0, 6.0, 9.0, 14.0, 14.0, 25.0, 14.0, 20.0, 3.0, 25.0, 6.0, 28.0, 11.0, 16.0, 22.0, 11.0, 9.0, 17.0, 9.0, 11.0, 25.0, 17.0, 25.0, 14.0, 65.0, 25.0, 31.0, 11.0, 14.0, 17.0, 33.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [0.0, 6.0, 8.0, 9.0, 8.0, 6.0, 0.0, 3.0, 0.0, 3.0, 8.0, 6.0, 13.0, 3.0, 6.0, 5.0, 8.0, 6.0, 3.0, 13.0, 8.0, 14.0, 0.0, 3.0, 11.0, 15.0, 0.0, 3.0, 9.0, 11.0, 26.0, 31.0, 3.0, 11.0, 13.0, 14.0, 3.0, 0.0, 0.0, 6.0, 11.0, 0.0, 3.0, 0.0, 12.0, 8.0, 9.0, 0.0, 3.0, 13.0, 11.0, 8.0, 14.0, 3.0, 6.0, 16.0, 3.0, 11.0, 12.0, 13.0, 3.0, 8.0, 3.0, 11.0, 11.0, 9.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0, 3.0, 3.0, 6.0, 14.0, 17.0, 8.0, 6.0, 3.0, 5.0, 6.0, 17.0, 8.0, 12.0, 8.0, 3.0, 0.0, 17.0, 14.0, 25.0, 35.0, 0.0, 17.0, 6.0, 11.0, 0.0, 3.0, 9.0, 0.0, 3.0, 3.0, 3.0, 8.0, 5.0, 3.0, 8.0, 6.0, 9.0, 16.0, 8.0, 3.0, 0.0, 6.0, 3.0, 6.0, 9.0, 5.0, 9.0, 8.0, 13.0, 9.0, 6.0, 16.0, 3.0, 8.0, 0.0, 6.0, 6.0, 3.0, 3.0, 0.0, 19.0, 17.0, 3.0, 6.0, 11.0, 9.0, 9.0, 5.0, 6.0, 0.0, 0.0, 9.0, 11.0, 3.0, 8.0, 6.0, 16.0, 9.0, 6.0, 8.0, 8.0, 12.0, 3.0, 0.0, 14.0, 11.0, 6.0, 0.0, 11.0, 17.0, 8.0, 3.0, 8.0, 8.0, 13.0, 9.0, 6.0, 5.0, 6.0, 3.0, 8.0, 9.0, 3.0, 6.0, 3.0, 8.0, 11.0, 14.0, 3.0, 14.0, 11.0, 14.0, 8.0, 6.0, 39.0, 26.0, 6.0, 19.0, 12.0, 19.0, 3.0, 8.0, 8.0, 6.0, 9.0, 8.0, 19.0, 14.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7029510683441907, "mean_inference_ms": 1.2249036275862428, "mean_action_processing_ms": 0.13510709218572114, "mean_env_wait_ms": 0.854133319135467, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 281600, "num_agent_steps_trained": 281600, "num_env_steps_sampled": 140800, "num_env_steps_trained": 140800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 140800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 281600, "timers": {"training_iteration_time_ms": 3748.712, "learn_time_ms": 1130.334, "learn_throughput": 11324.089, "synch_weights_time_ms": 14.789}, "counters": {"num_env_steps_sampled": 140800, "num_env_steps_trained": 140800, "num_agent_steps_sampled": 281600, "num_agent_steps_trained": 281600}, "done": false, "episodes_total": 352, "training_iteration": 11, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-38", "timestamp": 1666580438, "time_this_iter_s": 3.776998996734619, "time_total_s": 42.58404278755188, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 42.58404278755188, "timesteps_since_restore": 0, "iterations_since_restore": 11, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.116666666666667, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 1.0, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 15.87, "shaped_reward_min": 3, "shaped_reward_max": 36, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.09, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 5.43, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.32, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.47, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 3.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 3.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.35, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.64, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.41, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 5, "potting_onion_agent_1_mean": 1.46, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 2.94, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.07, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.33, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.94, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.89, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 9, "soup_pickup_agent_0_mean": 0.83, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 0.82, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.25, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.23, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.5, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.47, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 1.41, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 5, "optimal_onion_potting_agent_1_mean": 1.46, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.41, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 5, "viable_onion_potting_agent_1_mean": 1.46, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 9.765625145519152e-05, "cur_lr": 0.0010000000474974513, "total_loss": -0.02605297975242138, "policy_loss": -0.003543408587574959, "vf_loss": 1.2588311433792114, "vf_explained_var": 0.027413932606577873, "kl": 0.0007508020498789847, "entropy": 1.7772871255874634, "entropy_coeff": 0.012736000120639801, "model": {}}}}, "num_env_steps_sampled": 153600, "num_env_steps_trained": 153600, "num_agent_steps_sampled": 307200, "num_agent_steps_trained": 307200}, "sampler_results": {"episode_reward_max": 74.0, "episode_reward_min": 3.0, "episode_reward_mean": 17.87, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 40.0}, "policy_reward_mean": {"ppo": 8.935}, "custom_metrics": {"sparse_reward_mean": 1.0, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 15.87, "shaped_reward_min": 3, "shaped_reward_max": 36, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.09, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 5.43, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.32, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.47, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 3.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 3.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.35, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.64, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.41, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 5, "potting_onion_agent_1_mean": 1.46, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 2.94, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.07, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.33, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.94, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.89, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 9, "soup_pickup_agent_0_mean": 0.83, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 0.82, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.25, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.23, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.5, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.47, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 1.41, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 5, "optimal_onion_potting_agent_1_mean": 1.46, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.41, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 5, "viable_onion_potting_agent_1_mean": 1.46, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [20.0, 11.0, 3.0, 14.0, 6.0, 20.0, 25.0, 9.0, 11.0, 25.0, 20.0, 3.0, 31.0, 60.0, 17.0, 17.0, 3.0, 9.0, 6.0, 11.0, 8.0, 14.0, 25.0, 11.0, 6.0, 9.0, 14.0, 17.0, 22.0, 22.0, 11.0, 6.0, 9.0, 3.0, 36.0, 9.0, 20.0, 14.0, 6.0, 9.0, 14.0, 14.0, 25.0, 14.0, 20.0, 3.0, 25.0, 6.0, 28.0, 11.0, 16.0, 22.0, 11.0, 9.0, 17.0, 9.0, 11.0, 25.0, 17.0, 25.0, 14.0, 65.0, 25.0, 31.0, 11.0, 14.0, 17.0, 33.0, 22.0, 14.0, 14.0, 23.0, 20.0, 19.0, 8.0, 25.0, 9.0, 8.0, 27.0, 14.0, 74.0, 71.0, 63.0, 19.0, 17.0, 14.0, 14.0, 9.0, 34.0, 3.0, 28.0, 25.0, 14.0, 3.0, 6.0, 23.0, 9.0, 6.0, 19.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [11.0, 9.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0, 3.0, 3.0, 6.0, 14.0, 17.0, 8.0, 6.0, 3.0, 5.0, 6.0, 17.0, 8.0, 12.0, 8.0, 3.0, 0.0, 17.0, 14.0, 25.0, 35.0, 0.0, 17.0, 6.0, 11.0, 0.0, 3.0, 9.0, 0.0, 3.0, 3.0, 3.0, 8.0, 5.0, 3.0, 8.0, 6.0, 9.0, 16.0, 8.0, 3.0, 0.0, 6.0, 3.0, 6.0, 9.0, 5.0, 9.0, 8.0, 13.0, 9.0, 6.0, 16.0, 3.0, 8.0, 0.0, 6.0, 6.0, 3.0, 3.0, 0.0, 19.0, 17.0, 3.0, 6.0, 11.0, 9.0, 9.0, 5.0, 6.0, 0.0, 0.0, 9.0, 11.0, 3.0, 8.0, 6.0, 16.0, 9.0, 6.0, 8.0, 8.0, 12.0, 3.0, 0.0, 14.0, 11.0, 6.0, 0.0, 11.0, 17.0, 8.0, 3.0, 8.0, 8.0, 13.0, 9.0, 6.0, 5.0, 6.0, 3.0, 8.0, 9.0, 3.0, 6.0, 3.0, 8.0, 11.0, 14.0, 3.0, 14.0, 11.0, 14.0, 8.0, 6.0, 39.0, 26.0, 6.0, 19.0, 12.0, 19.0, 3.0, 8.0, 8.0, 6.0, 9.0, 8.0, 19.0, 14.0, 11.0, 11.0, 3.0, 11.0, 14.0, 0.0, 12.0, 11.0, 14.0, 6.0, 14.0, 5.0, 0.0, 8.0, 16.0, 9.0, 6.0, 3.0, 8.0, 0.0, 21.0, 6.0, 0.0, 14.0, 40.0, 34.0, 34.0, 37.0, 25.0, 38.0, 6.0, 13.0, 3.0, 14.0, 6.0, 8.0, 3.0, 11.0, 9.0, 0.0, 15.0, 19.0, 0.0, 3.0, 19.0, 9.0, 10.0, 15.0, 8.0, 6.0, 3.0, 0.0, 6.0, 0.0, 12.0, 11.0, 6.0, 3.0, 0.0, 6.0, 11.0, 8.0, 3.0, 6.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7022611330961541, "mean_inference_ms": 1.2233884271505058, "mean_action_processing_ms": 0.13504361218921432, "mean_env_wait_ms": 0.8554002743051271, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 74.0, "episode_reward_min": 3.0, "episode_reward_mean": 17.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 40.0}, "policy_reward_mean": {"ppo": 8.935}, "hist_stats": {"episode_reward": [20.0, 11.0, 3.0, 14.0, 6.0, 20.0, 25.0, 9.0, 11.0, 25.0, 20.0, 3.0, 31.0, 60.0, 17.0, 17.0, 3.0, 9.0, 6.0, 11.0, 8.0, 14.0, 25.0, 11.0, 6.0, 9.0, 14.0, 17.0, 22.0, 22.0, 11.0, 6.0, 9.0, 3.0, 36.0, 9.0, 20.0, 14.0, 6.0, 9.0, 14.0, 14.0, 25.0, 14.0, 20.0, 3.0, 25.0, 6.0, 28.0, 11.0, 16.0, 22.0, 11.0, 9.0, 17.0, 9.0, 11.0, 25.0, 17.0, 25.0, 14.0, 65.0, 25.0, 31.0, 11.0, 14.0, 17.0, 33.0, 22.0, 14.0, 14.0, 23.0, 20.0, 19.0, 8.0, 25.0, 9.0, 8.0, 27.0, 14.0, 74.0, 71.0, 63.0, 19.0, 17.0, 14.0, 14.0, 9.0, 34.0, 3.0, 28.0, 25.0, 14.0, 3.0, 6.0, 23.0, 9.0, 6.0, 19.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [11.0, 9.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0, 3.0, 3.0, 6.0, 14.0, 17.0, 8.0, 6.0, 3.0, 5.0, 6.0, 17.0, 8.0, 12.0, 8.0, 3.0, 0.0, 17.0, 14.0, 25.0, 35.0, 0.0, 17.0, 6.0, 11.0, 0.0, 3.0, 9.0, 0.0, 3.0, 3.0, 3.0, 8.0, 5.0, 3.0, 8.0, 6.0, 9.0, 16.0, 8.0, 3.0, 0.0, 6.0, 3.0, 6.0, 9.0, 5.0, 9.0, 8.0, 13.0, 9.0, 6.0, 16.0, 3.0, 8.0, 0.0, 6.0, 6.0, 3.0, 3.0, 0.0, 19.0, 17.0, 3.0, 6.0, 11.0, 9.0, 9.0, 5.0, 6.0, 0.0, 0.0, 9.0, 11.0, 3.0, 8.0, 6.0, 16.0, 9.0, 6.0, 8.0, 8.0, 12.0, 3.0, 0.0, 14.0, 11.0, 6.0, 0.0, 11.0, 17.0, 8.0, 3.0, 8.0, 8.0, 13.0, 9.0, 6.0, 5.0, 6.0, 3.0, 8.0, 9.0, 3.0, 6.0, 3.0, 8.0, 11.0, 14.0, 3.0, 14.0, 11.0, 14.0, 8.0, 6.0, 39.0, 26.0, 6.0, 19.0, 12.0, 19.0, 3.0, 8.0, 8.0, 6.0, 9.0, 8.0, 19.0, 14.0, 11.0, 11.0, 3.0, 11.0, 14.0, 0.0, 12.0, 11.0, 14.0, 6.0, 14.0, 5.0, 0.0, 8.0, 16.0, 9.0, 6.0, 3.0, 8.0, 0.0, 21.0, 6.0, 0.0, 14.0, 40.0, 34.0, 34.0, 37.0, 25.0, 38.0, 6.0, 13.0, 3.0, 14.0, 6.0, 8.0, 3.0, 11.0, 9.0, 0.0, 15.0, 19.0, 0.0, 3.0, 19.0, 9.0, 10.0, 15.0, 8.0, 6.0, 3.0, 0.0, 6.0, 0.0, 12.0, 11.0, 6.0, 3.0, 0.0, 6.0, 11.0, 8.0, 3.0, 6.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7022611330961541, "mean_inference_ms": 1.2233884271505058, "mean_action_processing_ms": 0.13504361218921432, "mean_env_wait_ms": 0.8554002743051271, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 307200, "num_agent_steps_trained": 307200, "num_env_steps_sampled": 153600, "num_env_steps_trained": 153600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 153600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 307200, "timers": {"training_iteration_time_ms": 3741.977, "learn_time_ms": 1130.394, "learn_throughput": 11323.486, "synch_weights_time_ms": 14.516}, "counters": {"num_env_steps_sampled": 153600, "num_env_steps_trained": 153600, "num_agent_steps_sampled": 307200, "num_agent_steps_trained": 307200}, "done": false, "episodes_total": 384, "training_iteration": 12, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-42", "timestamp": 1666580442, "time_this_iter_s": 3.747410535812378, "time_total_s": 46.33145332336426, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 46.33145332336426, "timesteps_since_restore": 0, "iterations_since_restore": 12, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.68, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 1.4, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 16.88, "shaped_reward_min": 3, "shaped_reward_max": 39, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.85, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 5.36, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 10, "useful_onion_pickup_agent_0_mean": 3.13, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.48, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.95, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 3.4, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.33, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.45, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 1.53, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.15, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.02, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.32, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.31, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 1.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.9, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 9, "soup_pickup_agent_0_mean": 0.92, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 0.87, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.3, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.26, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.5, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.47, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 1.45, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 1.53, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.45, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 1.53, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 4.882812572759576e-05, "cur_lr": 0.0010000000474974513, "total_loss": -0.005024836398661137, "policy_loss": -0.004262564238160849, "vf_loss": 1.2552039623260498, "vf_explained_var": 0.04432354122400284, "kl": 0.0007258389960043132, "entropy": 1.7756567001342773, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 166400, "num_env_steps_trained": 166400, "num_agent_steps_sampled": 332800, "num_agent_steps_trained": 332800}, "sampler_results": {"episode_reward_max": 79.0, "episode_reward_min": 3.0, "episode_reward_mean": 19.68, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 46.0}, "policy_reward_mean": {"ppo": 9.84}, "custom_metrics": {"sparse_reward_mean": 1.4, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 16.88, "shaped_reward_min": 3, "shaped_reward_max": 39, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.85, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 5.36, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 10, "useful_onion_pickup_agent_0_mean": 3.13, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.48, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.95, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 3.4, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.33, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.45, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 1.53, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.15, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.02, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.32, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.31, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 1.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.9, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 9, "soup_pickup_agent_0_mean": 0.92, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 0.87, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.3, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.26, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.5, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.47, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 1.45, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 1.53, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.45, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 1.53, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [9.0, 3.0, 36.0, 9.0, 20.0, 14.0, 6.0, 9.0, 14.0, 14.0, 25.0, 14.0, 20.0, 3.0, 25.0, 6.0, 28.0, 11.0, 16.0, 22.0, 11.0, 9.0, 17.0, 9.0, 11.0, 25.0, 17.0, 25.0, 14.0, 65.0, 25.0, 31.0, 11.0, 14.0, 17.0, 33.0, 22.0, 14.0, 14.0, 23.0, 20.0, 19.0, 8.0, 25.0, 9.0, 8.0, 27.0, 14.0, 74.0, 71.0, 63.0, 19.0, 17.0, 14.0, 14.0, 9.0, 34.0, 3.0, 28.0, 25.0, 14.0, 3.0, 6.0, 23.0, 9.0, 6.0, 19.0, 9.0, 39.0, 22.0, 22.0, 11.0, 14.0, 3.0, 20.0, 19.0, 14.0, 14.0, 19.0, 9.0, 60.0, 14.0, 79.0, 22.0, 22.0, 16.0, 16.0, 22.0, 19.0, 23.0, 11.0, 16.0, 11.0, 3.0, 6.0, 3.0, 14.0, 16.0, 20.0, 68.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 3.0, 3.0, 0.0, 19.0, 17.0, 3.0, 6.0, 11.0, 9.0, 9.0, 5.0, 6.0, 0.0, 0.0, 9.0, 11.0, 3.0, 8.0, 6.0, 16.0, 9.0, 6.0, 8.0, 8.0, 12.0, 3.0, 0.0, 14.0, 11.0, 6.0, 0.0, 11.0, 17.0, 8.0, 3.0, 8.0, 8.0, 13.0, 9.0, 6.0, 5.0, 6.0, 3.0, 8.0, 9.0, 3.0, 6.0, 3.0, 8.0, 11.0, 14.0, 3.0, 14.0, 11.0, 14.0, 8.0, 6.0, 39.0, 26.0, 6.0, 19.0, 12.0, 19.0, 3.0, 8.0, 8.0, 6.0, 9.0, 8.0, 19.0, 14.0, 11.0, 11.0, 3.0, 11.0, 14.0, 0.0, 12.0, 11.0, 14.0, 6.0, 14.0, 5.0, 0.0, 8.0, 16.0, 9.0, 6.0, 3.0, 8.0, 0.0, 21.0, 6.0, 0.0, 14.0, 40.0, 34.0, 34.0, 37.0, 25.0, 38.0, 6.0, 13.0, 3.0, 14.0, 6.0, 8.0, 3.0, 11.0, 9.0, 0.0, 15.0, 19.0, 0.0, 3.0, 19.0, 9.0, 10.0, 15.0, 8.0, 6.0, 3.0, 0.0, 6.0, 0.0, 12.0, 11.0, 6.0, 3.0, 0.0, 6.0, 11.0, 8.0, 3.0, 6.0, 17.0, 22.0, 22.0, 0.0, 5.0, 17.0, 3.0, 8.0, 8.0, 6.0, 0.0, 3.0, 14.0, 6.0, 8.0, 11.0, 6.0, 8.0, 6.0, 8.0, 8.0, 11.0, 6.0, 3.0, 31.0, 29.0, 3.0, 11.0, 46.0, 33.0, 14.0, 8.0, 14.0, 8.0, 10.0, 6.0, 10.0, 6.0, 16.0, 6.0, 16.0, 3.0, 11.0, 12.0, 5.0, 6.0, 3.0, 13.0, 3.0, 8.0, 0.0, 3.0, 0.0, 6.0, 3.0, 0.0, 6.0, 8.0, 8.0, 8.0, 8.0, 12.0, 34.0, 34.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7019351354387914, "mean_inference_ms": 1.2218941189577206, "mean_action_processing_ms": 0.13495204944654685, "mean_env_wait_ms": 0.8561416115977619, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 79.0, "episode_reward_min": 3.0, "episode_reward_mean": 19.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 46.0}, "policy_reward_mean": {"ppo": 9.84}, "hist_stats": {"episode_reward": [9.0, 3.0, 36.0, 9.0, 20.0, 14.0, 6.0, 9.0, 14.0, 14.0, 25.0, 14.0, 20.0, 3.0, 25.0, 6.0, 28.0, 11.0, 16.0, 22.0, 11.0, 9.0, 17.0, 9.0, 11.0, 25.0, 17.0, 25.0, 14.0, 65.0, 25.0, 31.0, 11.0, 14.0, 17.0, 33.0, 22.0, 14.0, 14.0, 23.0, 20.0, 19.0, 8.0, 25.0, 9.0, 8.0, 27.0, 14.0, 74.0, 71.0, 63.0, 19.0, 17.0, 14.0, 14.0, 9.0, 34.0, 3.0, 28.0, 25.0, 14.0, 3.0, 6.0, 23.0, 9.0, 6.0, 19.0, 9.0, 39.0, 22.0, 22.0, 11.0, 14.0, 3.0, 20.0, 19.0, 14.0, 14.0, 19.0, 9.0, 60.0, 14.0, 79.0, 22.0, 22.0, 16.0, 16.0, 22.0, 19.0, 23.0, 11.0, 16.0, 11.0, 3.0, 6.0, 3.0, 14.0, 16.0, 20.0, 68.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 3.0, 3.0, 0.0, 19.0, 17.0, 3.0, 6.0, 11.0, 9.0, 9.0, 5.0, 6.0, 0.0, 0.0, 9.0, 11.0, 3.0, 8.0, 6.0, 16.0, 9.0, 6.0, 8.0, 8.0, 12.0, 3.0, 0.0, 14.0, 11.0, 6.0, 0.0, 11.0, 17.0, 8.0, 3.0, 8.0, 8.0, 13.0, 9.0, 6.0, 5.0, 6.0, 3.0, 8.0, 9.0, 3.0, 6.0, 3.0, 8.0, 11.0, 14.0, 3.0, 14.0, 11.0, 14.0, 8.0, 6.0, 39.0, 26.0, 6.0, 19.0, 12.0, 19.0, 3.0, 8.0, 8.0, 6.0, 9.0, 8.0, 19.0, 14.0, 11.0, 11.0, 3.0, 11.0, 14.0, 0.0, 12.0, 11.0, 14.0, 6.0, 14.0, 5.0, 0.0, 8.0, 16.0, 9.0, 6.0, 3.0, 8.0, 0.0, 21.0, 6.0, 0.0, 14.0, 40.0, 34.0, 34.0, 37.0, 25.0, 38.0, 6.0, 13.0, 3.0, 14.0, 6.0, 8.0, 3.0, 11.0, 9.0, 0.0, 15.0, 19.0, 0.0, 3.0, 19.0, 9.0, 10.0, 15.0, 8.0, 6.0, 3.0, 0.0, 6.0, 0.0, 12.0, 11.0, 6.0, 3.0, 0.0, 6.0, 11.0, 8.0, 3.0, 6.0, 17.0, 22.0, 22.0, 0.0, 5.0, 17.0, 3.0, 8.0, 8.0, 6.0, 0.0, 3.0, 14.0, 6.0, 8.0, 11.0, 6.0, 8.0, 6.0, 8.0, 8.0, 11.0, 6.0, 3.0, 31.0, 29.0, 3.0, 11.0, 46.0, 33.0, 14.0, 8.0, 14.0, 8.0, 10.0, 6.0, 10.0, 6.0, 16.0, 6.0, 16.0, 3.0, 11.0, 12.0, 5.0, 6.0, 3.0, 13.0, 3.0, 8.0, 0.0, 3.0, 0.0, 6.0, 3.0, 0.0, 6.0, 8.0, 8.0, 8.0, 8.0, 12.0, 34.0, 34.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7019351354387914, "mean_inference_ms": 1.2218941189577206, "mean_action_processing_ms": 0.13495204944654685, "mean_env_wait_ms": 0.8561416115977619, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 332800, "num_agent_steps_trained": 332800, "num_env_steps_sampled": 166400, "num_env_steps_trained": 166400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 166400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 332800, "timers": {"training_iteration_time_ms": 3744.604, "learn_time_ms": 1137.722, "learn_throughput": 11250.556, "synch_weights_time_ms": 14.033}, "counters": {"num_env_steps_sampled": 166400, "num_env_steps_trained": 166400, "num_agent_steps_sampled": 332800, "num_agent_steps_trained": 332800}, "done": false, "episodes_total": 416, "training_iteration": 13, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-46", "timestamp": 1666580446, "time_this_iter_s": 3.7613608837127686, "time_total_s": 50.092814207077026, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 50.092814207077026, "timesteps_since_restore": 0, "iterations_since_restore": 13, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.98333333333333, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 1.6, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 17.55, "shaped_reward_min": 3, "shaped_reward_max": 41, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.06, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.41, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 10, "useful_onion_pickup_agent_0_mean": 3.06, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.32, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 8, "onion_drop_agent_0_mean": 3.01, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.76, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.52, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 1.57, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 2.89, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.6, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.35, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.31, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.76, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.65, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 9, "soup_pickup_agent_0_mean": 0.98, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.05, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.29, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.25, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.55, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.66, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 1.52, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 1.57, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.52, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 1.57, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.441406286379788e-05, "cur_lr": 0.0010000000474974513, "total_loss": -0.005547315813601017, "policy_loss": -0.004789750557392836, "vf_loss": 1.2801625728607178, "vf_explained_var": 0.0378531739115715, "kl": 0.0008417462231591344, "entropy": 1.7712035179138184, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 179200, "num_env_steps_trained": 179200, "num_agent_steps_sampled": 358400, "num_agent_steps_trained": 358400}, "sampler_results": {"episode_reward_max": 79.0, "episode_reward_min": 3.0, "episode_reward_mean": 20.75, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 46.0}, "policy_reward_mean": {"ppo": 10.375}, "custom_metrics": {"sparse_reward_mean": 1.6, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 17.55, "shaped_reward_min": 3, "shaped_reward_max": 41, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.06, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.41, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 10, "useful_onion_pickup_agent_0_mean": 3.06, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.32, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 8, "onion_drop_agent_0_mean": 3.01, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.76, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.52, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 1.57, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 2.89, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.6, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.35, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.31, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.76, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.65, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 9, "soup_pickup_agent_0_mean": 0.98, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.05, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.29, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.25, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.55, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.66, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 1.52, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 1.57, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.52, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 1.57, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [11.0, 14.0, 17.0, 33.0, 22.0, 14.0, 14.0, 23.0, 20.0, 19.0, 8.0, 25.0, 9.0, 8.0, 27.0, 14.0, 74.0, 71.0, 63.0, 19.0, 17.0, 14.0, 14.0, 9.0, 34.0, 3.0, 28.0, 25.0, 14.0, 3.0, 6.0, 23.0, 9.0, 6.0, 19.0, 9.0, 39.0, 22.0, 22.0, 11.0, 14.0, 3.0, 20.0, 19.0, 14.0, 14.0, 19.0, 9.0, 60.0, 14.0, 79.0, 22.0, 22.0, 16.0, 16.0, 22.0, 19.0, 23.0, 11.0, 16.0, 11.0, 3.0, 6.0, 3.0, 14.0, 16.0, 20.0, 68.0, 9.0, 22.0, 22.0, 28.0, 9.0, 22.0, 17.0, 22.0, 14.0, 12.0, 3.0, 11.0, 25.0, 20.0, 14.0, 62.0, 60.0, 23.0, 31.0, 17.0, 28.0, 41.0, 20.0, 8.0, 36.0, 17.0, 3.0, 12.0, 22.0, 12.0, 9.0, 19.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 8.0, 8.0, 6.0, 9.0, 8.0, 19.0, 14.0, 11.0, 11.0, 3.0, 11.0, 14.0, 0.0, 12.0, 11.0, 14.0, 6.0, 14.0, 5.0, 0.0, 8.0, 16.0, 9.0, 6.0, 3.0, 8.0, 0.0, 21.0, 6.0, 0.0, 14.0, 40.0, 34.0, 34.0, 37.0, 25.0, 38.0, 6.0, 13.0, 3.0, 14.0, 6.0, 8.0, 3.0, 11.0, 9.0, 0.0, 15.0, 19.0, 0.0, 3.0, 19.0, 9.0, 10.0, 15.0, 8.0, 6.0, 3.0, 0.0, 6.0, 0.0, 12.0, 11.0, 6.0, 3.0, 0.0, 6.0, 11.0, 8.0, 3.0, 6.0, 17.0, 22.0, 22.0, 0.0, 5.0, 17.0, 3.0, 8.0, 8.0, 6.0, 0.0, 3.0, 14.0, 6.0, 8.0, 11.0, 6.0, 8.0, 6.0, 8.0, 8.0, 11.0, 6.0, 3.0, 31.0, 29.0, 3.0, 11.0, 46.0, 33.0, 14.0, 8.0, 14.0, 8.0, 10.0, 6.0, 10.0, 6.0, 16.0, 6.0, 16.0, 3.0, 11.0, 12.0, 5.0, 6.0, 3.0, 13.0, 3.0, 8.0, 0.0, 3.0, 0.0, 6.0, 3.0, 0.0, 6.0, 8.0, 8.0, 8.0, 8.0, 12.0, 34.0, 34.0, 3.0, 6.0, 9.0, 13.0, 9.0, 13.0, 17.0, 11.0, 6.0, 3.0, 11.0, 11.0, 9.0, 8.0, 11.0, 11.0, 3.0, 11.0, 9.0, 3.0, 0.0, 3.0, 0.0, 11.0, 6.0, 19.0, 14.0, 6.0, 3.0, 11.0, 33.0, 29.0, 29.0, 31.0, 12.0, 11.0, 17.0, 14.0, 14.0, 3.0, 14.0, 14.0, 19.0, 22.0, 8.0, 12.0, 8.0, 0.0, 19.0, 17.0, 3.0, 14.0, 0.0, 3.0, 6.0, 6.0, 16.0, 6.0, 6.0, 6.0, 3.0, 6.0, 3.0, 16.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7012446935769026, "mean_inference_ms": 1.2207221630940694, "mean_action_processing_ms": 0.13481071649814982, "mean_env_wait_ms": 0.8625244321628243, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 79.0, "episode_reward_min": 3.0, "episode_reward_mean": 20.75, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 46.0}, "policy_reward_mean": {"ppo": 10.375}, "hist_stats": {"episode_reward": [11.0, 14.0, 17.0, 33.0, 22.0, 14.0, 14.0, 23.0, 20.0, 19.0, 8.0, 25.0, 9.0, 8.0, 27.0, 14.0, 74.0, 71.0, 63.0, 19.0, 17.0, 14.0, 14.0, 9.0, 34.0, 3.0, 28.0, 25.0, 14.0, 3.0, 6.0, 23.0, 9.0, 6.0, 19.0, 9.0, 39.0, 22.0, 22.0, 11.0, 14.0, 3.0, 20.0, 19.0, 14.0, 14.0, 19.0, 9.0, 60.0, 14.0, 79.0, 22.0, 22.0, 16.0, 16.0, 22.0, 19.0, 23.0, 11.0, 16.0, 11.0, 3.0, 6.0, 3.0, 14.0, 16.0, 20.0, 68.0, 9.0, 22.0, 22.0, 28.0, 9.0, 22.0, 17.0, 22.0, 14.0, 12.0, 3.0, 11.0, 25.0, 20.0, 14.0, 62.0, 60.0, 23.0, 31.0, 17.0, 28.0, 41.0, 20.0, 8.0, 36.0, 17.0, 3.0, 12.0, 22.0, 12.0, 9.0, 19.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 8.0, 8.0, 6.0, 9.0, 8.0, 19.0, 14.0, 11.0, 11.0, 3.0, 11.0, 14.0, 0.0, 12.0, 11.0, 14.0, 6.0, 14.0, 5.0, 0.0, 8.0, 16.0, 9.0, 6.0, 3.0, 8.0, 0.0, 21.0, 6.0, 0.0, 14.0, 40.0, 34.0, 34.0, 37.0, 25.0, 38.0, 6.0, 13.0, 3.0, 14.0, 6.0, 8.0, 3.0, 11.0, 9.0, 0.0, 15.0, 19.0, 0.0, 3.0, 19.0, 9.0, 10.0, 15.0, 8.0, 6.0, 3.0, 0.0, 6.0, 0.0, 12.0, 11.0, 6.0, 3.0, 0.0, 6.0, 11.0, 8.0, 3.0, 6.0, 17.0, 22.0, 22.0, 0.0, 5.0, 17.0, 3.0, 8.0, 8.0, 6.0, 0.0, 3.0, 14.0, 6.0, 8.0, 11.0, 6.0, 8.0, 6.0, 8.0, 8.0, 11.0, 6.0, 3.0, 31.0, 29.0, 3.0, 11.0, 46.0, 33.0, 14.0, 8.0, 14.0, 8.0, 10.0, 6.0, 10.0, 6.0, 16.0, 6.0, 16.0, 3.0, 11.0, 12.0, 5.0, 6.0, 3.0, 13.0, 3.0, 8.0, 0.0, 3.0, 0.0, 6.0, 3.0, 0.0, 6.0, 8.0, 8.0, 8.0, 8.0, 12.0, 34.0, 34.0, 3.0, 6.0, 9.0, 13.0, 9.0, 13.0, 17.0, 11.0, 6.0, 3.0, 11.0, 11.0, 9.0, 8.0, 11.0, 11.0, 3.0, 11.0, 9.0, 3.0, 0.0, 3.0, 0.0, 11.0, 6.0, 19.0, 14.0, 6.0, 3.0, 11.0, 33.0, 29.0, 29.0, 31.0, 12.0, 11.0, 17.0, 14.0, 14.0, 3.0, 14.0, 14.0, 19.0, 22.0, 8.0, 12.0, 8.0, 0.0, 19.0, 17.0, 3.0, 14.0, 0.0, 3.0, 6.0, 6.0, 16.0, 6.0, 6.0, 6.0, 3.0, 6.0, 3.0, 16.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7012446935769026, "mean_inference_ms": 1.2207221630940694, "mean_action_processing_ms": 0.13481071649814982, "mean_env_wait_ms": 0.8625244321628243, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 358400, "num_agent_steps_trained": 358400, "num_env_steps_sampled": 179200, "num_env_steps_trained": 179200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 179200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 358400, "timers": {"training_iteration_time_ms": 3767.559, "learn_time_ms": 1141.018, "learn_throughput": 11218.054, "synch_weights_time_ms": 13.84}, "counters": {"num_env_steps_sampled": 179200, "num_env_steps_trained": 179200, "num_agent_steps_sampled": 358400, "num_agent_steps_trained": 358400}, "done": false, "episodes_total": 448, "training_iteration": 14, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-50", "timestamp": 1666580450, "time_this_iter_s": 3.917694330215454, "time_total_s": 54.01050853729248, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 54.01050853729248, "timesteps_since_restore": 0, "iterations_since_restore": 14, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 22.016666666666666, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 1.6, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 18.04, "shaped_reward_min": 3, "shaped_reward_max": 41, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.3, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.26, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.11, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.34, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 8, "onion_drop_agent_0_mean": 3.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.72, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.67, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.63, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 1.65, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 2.85, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 2.8, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.31, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.29, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.99, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.73, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.77, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 9, "soup_pickup_agent_0_mean": 0.93, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.05, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.26, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.27, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.49, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.68, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 1.63, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 1.65, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.63, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 1.65, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.220703143189894e-05, "cur_lr": 0.0010000000474974513, "total_loss": -0.0029025180265307426, "policy_loss": -0.0021586534567177296, "vf_loss": 1.3945486545562744, "vf_explained_var": 0.05108204483985901, "kl": 0.0008824581163935363, "entropy": 1.766658902168274, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 192000, "num_env_steps_trained": 192000, "num_agent_steps_sampled": 384000, "num_agent_steps_trained": 384000}, "sampler_results": {"episode_reward_max": 79.0, "episode_reward_min": 3.0, "episode_reward_mean": 21.24, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 46.0}, "policy_reward_mean": {"ppo": 10.62}, "custom_metrics": {"sparse_reward_mean": 1.6, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 18.04, "shaped_reward_min": 3, "shaped_reward_max": 41, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.3, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.26, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.11, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.34, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 8, "onion_drop_agent_0_mean": 3.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.72, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.67, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.63, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 1.65, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 2.85, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 2.8, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.31, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.29, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.99, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.73, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.77, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 9, "soup_pickup_agent_0_mean": 0.93, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.05, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.26, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.27, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.49, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.68, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 1.63, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 1.65, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.63, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 1.65, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [9.0, 6.0, 19.0, 9.0, 39.0, 22.0, 22.0, 11.0, 14.0, 3.0, 20.0, 19.0, 14.0, 14.0, 19.0, 9.0, 60.0, 14.0, 79.0, 22.0, 22.0, 16.0, 16.0, 22.0, 19.0, 23.0, 11.0, 16.0, 11.0, 3.0, 6.0, 3.0, 14.0, 16.0, 20.0, 68.0, 9.0, 22.0, 22.0, 28.0, 9.0, 22.0, 17.0, 22.0, 14.0, 12.0, 3.0, 11.0, 25.0, 20.0, 14.0, 62.0, 60.0, 23.0, 31.0, 17.0, 28.0, 41.0, 20.0, 8.0, 36.0, 17.0, 3.0, 12.0, 22.0, 12.0, 9.0, 19.0, 20.0, 9.0, 19.0, 6.0, 9.0, 6.0, 22.0, 25.0, 31.0, 36.0, 17.0, 25.0, 25.0, 23.0, 14.0, 9.0, 37.0, 65.0, 25.0, 24.0, 14.0, 11.0, 20.0, 63.0, 22.0, 9.0, 17.0, 25.0, 25.0, 14.0, 68.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 3.0, 0.0, 6.0, 11.0, 8.0, 3.0, 6.0, 17.0, 22.0, 22.0, 0.0, 5.0, 17.0, 3.0, 8.0, 8.0, 6.0, 0.0, 3.0, 14.0, 6.0, 8.0, 11.0, 6.0, 8.0, 6.0, 8.0, 8.0, 11.0, 6.0, 3.0, 31.0, 29.0, 3.0, 11.0, 46.0, 33.0, 14.0, 8.0, 14.0, 8.0, 10.0, 6.0, 10.0, 6.0, 16.0, 6.0, 16.0, 3.0, 11.0, 12.0, 5.0, 6.0, 3.0, 13.0, 3.0, 8.0, 0.0, 3.0, 0.0, 6.0, 3.0, 0.0, 6.0, 8.0, 8.0, 8.0, 8.0, 12.0, 34.0, 34.0, 3.0, 6.0, 9.0, 13.0, 9.0, 13.0, 17.0, 11.0, 6.0, 3.0, 11.0, 11.0, 9.0, 8.0, 11.0, 11.0, 3.0, 11.0, 9.0, 3.0, 0.0, 3.0, 0.0, 11.0, 6.0, 19.0, 14.0, 6.0, 3.0, 11.0, 33.0, 29.0, 29.0, 31.0, 12.0, 11.0, 17.0, 14.0, 14.0, 3.0, 14.0, 14.0, 19.0, 22.0, 8.0, 12.0, 8.0, 0.0, 19.0, 17.0, 3.0, 14.0, 0.0, 3.0, 6.0, 6.0, 16.0, 6.0, 6.0, 6.0, 3.0, 6.0, 3.0, 16.0, 11.0, 9.0, 6.0, 3.0, 6.0, 13.0, 6.0, 0.0, 0.0, 9.0, 3.0, 3.0, 12.0, 10.0, 16.0, 9.0, 9.0, 22.0, 6.0, 30.0, 11.0, 6.0, 12.0, 13.0, 9.0, 16.0, 8.0, 15.0, 6.0, 8.0, 6.0, 3.0, 23.0, 14.0, 34.0, 31.0, 19.0, 6.0, 11.0, 13.0, 0.0, 14.0, 11.0, 0.0, 3.0, 17.0, 32.0, 31.0, 11.0, 11.0, 6.0, 3.0, 17.0, 0.0, 14.0, 11.0, 13.0, 12.0, 6.0, 8.0, 28.0, 40.0, 3.0, 6.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7002415829283244, "mean_inference_ms": 1.219227588262156, "mean_action_processing_ms": 0.13465034540587548, "mean_env_wait_ms": 0.8677968402407349, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 79.0, "episode_reward_min": 3.0, "episode_reward_mean": 21.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 46.0}, "policy_reward_mean": {"ppo": 10.62}, "hist_stats": {"episode_reward": [9.0, 6.0, 19.0, 9.0, 39.0, 22.0, 22.0, 11.0, 14.0, 3.0, 20.0, 19.0, 14.0, 14.0, 19.0, 9.0, 60.0, 14.0, 79.0, 22.0, 22.0, 16.0, 16.0, 22.0, 19.0, 23.0, 11.0, 16.0, 11.0, 3.0, 6.0, 3.0, 14.0, 16.0, 20.0, 68.0, 9.0, 22.0, 22.0, 28.0, 9.0, 22.0, 17.0, 22.0, 14.0, 12.0, 3.0, 11.0, 25.0, 20.0, 14.0, 62.0, 60.0, 23.0, 31.0, 17.0, 28.0, 41.0, 20.0, 8.0, 36.0, 17.0, 3.0, 12.0, 22.0, 12.0, 9.0, 19.0, 20.0, 9.0, 19.0, 6.0, 9.0, 6.0, 22.0, 25.0, 31.0, 36.0, 17.0, 25.0, 25.0, 23.0, 14.0, 9.0, 37.0, 65.0, 25.0, 24.0, 14.0, 11.0, 20.0, 63.0, 22.0, 9.0, 17.0, 25.0, 25.0, 14.0, 68.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 3.0, 0.0, 6.0, 11.0, 8.0, 3.0, 6.0, 17.0, 22.0, 22.0, 0.0, 5.0, 17.0, 3.0, 8.0, 8.0, 6.0, 0.0, 3.0, 14.0, 6.0, 8.0, 11.0, 6.0, 8.0, 6.0, 8.0, 8.0, 11.0, 6.0, 3.0, 31.0, 29.0, 3.0, 11.0, 46.0, 33.0, 14.0, 8.0, 14.0, 8.0, 10.0, 6.0, 10.0, 6.0, 16.0, 6.0, 16.0, 3.0, 11.0, 12.0, 5.0, 6.0, 3.0, 13.0, 3.0, 8.0, 0.0, 3.0, 0.0, 6.0, 3.0, 0.0, 6.0, 8.0, 8.0, 8.0, 8.0, 12.0, 34.0, 34.0, 3.0, 6.0, 9.0, 13.0, 9.0, 13.0, 17.0, 11.0, 6.0, 3.0, 11.0, 11.0, 9.0, 8.0, 11.0, 11.0, 3.0, 11.0, 9.0, 3.0, 0.0, 3.0, 0.0, 11.0, 6.0, 19.0, 14.0, 6.0, 3.0, 11.0, 33.0, 29.0, 29.0, 31.0, 12.0, 11.0, 17.0, 14.0, 14.0, 3.0, 14.0, 14.0, 19.0, 22.0, 8.0, 12.0, 8.0, 0.0, 19.0, 17.0, 3.0, 14.0, 0.0, 3.0, 6.0, 6.0, 16.0, 6.0, 6.0, 6.0, 3.0, 6.0, 3.0, 16.0, 11.0, 9.0, 6.0, 3.0, 6.0, 13.0, 6.0, 0.0, 0.0, 9.0, 3.0, 3.0, 12.0, 10.0, 16.0, 9.0, 9.0, 22.0, 6.0, 30.0, 11.0, 6.0, 12.0, 13.0, 9.0, 16.0, 8.0, 15.0, 6.0, 8.0, 6.0, 3.0, 23.0, 14.0, 34.0, 31.0, 19.0, 6.0, 11.0, 13.0, 0.0, 14.0, 11.0, 0.0, 3.0, 17.0, 32.0, 31.0, 11.0, 11.0, 6.0, 3.0, 17.0, 0.0, 14.0, 11.0, 13.0, 12.0, 6.0, 8.0, 28.0, 40.0, 3.0, 6.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7002415829283244, "mean_inference_ms": 1.219227588262156, "mean_action_processing_ms": 0.13465034540587548, "mean_env_wait_ms": 0.8677968402407349, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 384000, "num_agent_steps_trained": 384000, "num_env_steps_sampled": 192000, "num_env_steps_trained": 192000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 192000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 384000, "timers": {"training_iteration_time_ms": 3756.788, "learn_time_ms": 1136.217, "learn_throughput": 11265.456, "synch_weights_time_ms": 12.889}, "counters": {"num_env_steps_sampled": 192000, "num_env_steps_trained": 192000, "num_agent_steps_sampled": 384000, "num_agent_steps_trained": 384000}, "done": false, "episodes_total": 480, "training_iteration": 15, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-54", "timestamp": 1666580454, "time_this_iter_s": 3.7644498348236084, "time_total_s": 57.77495837211609, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 57.77495837211609, "timesteps_since_restore": 0, "iterations_since_restore": 15, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.28, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 2.0, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 20.16, "shaped_reward_min": 3, "shaped_reward_max": 50, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.64, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.44, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.3, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.38, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 8, "onion_drop_agent_0_mean": 3.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.86, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.75, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.86, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 1.79, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 2.62, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.68, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.39, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.38, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.73, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.63, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.62, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.04, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.17, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.35, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.29, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.53, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.79, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 1.86, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 1.79, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.86, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 1.79, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 6.10351571594947e-06, "cur_lr": 0.0010000000474974513, "total_loss": -0.001889559207484126, "policy_loss": -0.0011786052491515875, "vf_loss": 1.7099534273147583, "vf_explained_var": 0.04326528683304787, "kl": 0.0008639540756121278, "entropy": 1.7639117240905762, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 204800, "num_env_steps_trained": 204800, "num_agent_steps_sampled": 409600, "num_agent_steps_trained": 409600}, "sampler_results": {"episode_reward_max": 90.0, "episode_reward_min": 3.0, "episode_reward_mean": 24.16, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 56.0}, "policy_reward_mean": {"ppo": 12.08}, "custom_metrics": {"sparse_reward_mean": 2.0, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 20.16, "shaped_reward_min": 3, "shaped_reward_max": 50, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.64, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.44, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.3, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.38, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 8, "onion_drop_agent_0_mean": 3.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.86, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.75, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.86, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 1.79, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 2.62, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.68, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.39, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.38, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.73, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.63, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.62, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.04, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.17, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.35, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.29, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.53, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.79, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 1.86, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 1.79, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.86, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 1.79, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [14.0, 16.0, 20.0, 68.0, 9.0, 22.0, 22.0, 28.0, 9.0, 22.0, 17.0, 22.0, 14.0, 12.0, 3.0, 11.0, 25.0, 20.0, 14.0, 62.0, 60.0, 23.0, 31.0, 17.0, 28.0, 41.0, 20.0, 8.0, 36.0, 17.0, 3.0, 12.0, 22.0, 12.0, 9.0, 19.0, 20.0, 9.0, 19.0, 6.0, 9.0, 6.0, 22.0, 25.0, 31.0, 36.0, 17.0, 25.0, 25.0, 23.0, 14.0, 9.0, 37.0, 65.0, 25.0, 24.0, 14.0, 11.0, 20.0, 63.0, 22.0, 9.0, 17.0, 25.0, 25.0, 14.0, 68.0, 9.0, 36.0, 14.0, 42.0, 90.0, 44.0, 36.0, 11.0, 25.0, 9.0, 6.0, 12.0, 23.0, 73.0, 17.0, 77.0, 9.0, 17.0, 66.0, 14.0, 29.0, 9.0, 36.0, 33.0, 12.0, 20.0, 14.0, 25.0, 38.0, 6.0, 19.0, 3.0, 19.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 8.0, 8.0, 8.0, 8.0, 12.0, 34.0, 34.0, 3.0, 6.0, 9.0, 13.0, 9.0, 13.0, 17.0, 11.0, 6.0, 3.0, 11.0, 11.0, 9.0, 8.0, 11.0, 11.0, 3.0, 11.0, 9.0, 3.0, 0.0, 3.0, 0.0, 11.0, 6.0, 19.0, 14.0, 6.0, 3.0, 11.0, 33.0, 29.0, 29.0, 31.0, 12.0, 11.0, 17.0, 14.0, 14.0, 3.0, 14.0, 14.0, 19.0, 22.0, 8.0, 12.0, 8.0, 0.0, 19.0, 17.0, 3.0, 14.0, 0.0, 3.0, 6.0, 6.0, 16.0, 6.0, 6.0, 6.0, 3.0, 6.0, 3.0, 16.0, 11.0, 9.0, 6.0, 3.0, 6.0, 13.0, 6.0, 0.0, 0.0, 9.0, 3.0, 3.0, 12.0, 10.0, 16.0, 9.0, 9.0, 22.0, 6.0, 30.0, 11.0, 6.0, 12.0, 13.0, 9.0, 16.0, 8.0, 15.0, 6.0, 8.0, 6.0, 3.0, 23.0, 14.0, 34.0, 31.0, 19.0, 6.0, 11.0, 13.0, 0.0, 14.0, 11.0, 0.0, 3.0, 17.0, 32.0, 31.0, 11.0, 11.0, 6.0, 3.0, 17.0, 0.0, 14.0, 11.0, 13.0, 12.0, 6.0, 8.0, 28.0, 40.0, 3.0, 6.0, 25.0, 11.0, 14.0, 0.0, 23.0, 19.0, 34.0, 56.0, 23.0, 21.0, 17.0, 19.0, 6.0, 5.0, 8.0, 17.0, 6.0, 3.0, 3.0, 3.0, 6.0, 6.0, 14.0, 9.0, 36.0, 37.0, 12.0, 5.0, 45.0, 32.0, 6.0, 3.0, 3.0, 14.0, 38.0, 28.0, 8.0, 6.0, 14.0, 15.0, 3.0, 6.0, 22.0, 14.0, 5.0, 28.0, 6.0, 6.0, 11.0, 9.0, 6.0, 8.0, 11.0, 14.0, 6.0, 32.0, 3.0, 3.0, 11.0, 8.0, 0.0, 3.0, 11.0, 8.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6990310521400235, "mean_inference_ms": 1.2183144516569073, "mean_action_processing_ms": 0.13449427945373685, "mean_env_wait_ms": 0.8729521794459285, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 90.0, "episode_reward_min": 3.0, "episode_reward_mean": 24.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 56.0}, "policy_reward_mean": {"ppo": 12.08}, "hist_stats": {"episode_reward": [14.0, 16.0, 20.0, 68.0, 9.0, 22.0, 22.0, 28.0, 9.0, 22.0, 17.0, 22.0, 14.0, 12.0, 3.0, 11.0, 25.0, 20.0, 14.0, 62.0, 60.0, 23.0, 31.0, 17.0, 28.0, 41.0, 20.0, 8.0, 36.0, 17.0, 3.0, 12.0, 22.0, 12.0, 9.0, 19.0, 20.0, 9.0, 19.0, 6.0, 9.0, 6.0, 22.0, 25.0, 31.0, 36.0, 17.0, 25.0, 25.0, 23.0, 14.0, 9.0, 37.0, 65.0, 25.0, 24.0, 14.0, 11.0, 20.0, 63.0, 22.0, 9.0, 17.0, 25.0, 25.0, 14.0, 68.0, 9.0, 36.0, 14.0, 42.0, 90.0, 44.0, 36.0, 11.0, 25.0, 9.0, 6.0, 12.0, 23.0, 73.0, 17.0, 77.0, 9.0, 17.0, 66.0, 14.0, 29.0, 9.0, 36.0, 33.0, 12.0, 20.0, 14.0, 25.0, 38.0, 6.0, 19.0, 3.0, 19.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 8.0, 8.0, 8.0, 8.0, 12.0, 34.0, 34.0, 3.0, 6.0, 9.0, 13.0, 9.0, 13.0, 17.0, 11.0, 6.0, 3.0, 11.0, 11.0, 9.0, 8.0, 11.0, 11.0, 3.0, 11.0, 9.0, 3.0, 0.0, 3.0, 0.0, 11.0, 6.0, 19.0, 14.0, 6.0, 3.0, 11.0, 33.0, 29.0, 29.0, 31.0, 12.0, 11.0, 17.0, 14.0, 14.0, 3.0, 14.0, 14.0, 19.0, 22.0, 8.0, 12.0, 8.0, 0.0, 19.0, 17.0, 3.0, 14.0, 0.0, 3.0, 6.0, 6.0, 16.0, 6.0, 6.0, 6.0, 3.0, 6.0, 3.0, 16.0, 11.0, 9.0, 6.0, 3.0, 6.0, 13.0, 6.0, 0.0, 0.0, 9.0, 3.0, 3.0, 12.0, 10.0, 16.0, 9.0, 9.0, 22.0, 6.0, 30.0, 11.0, 6.0, 12.0, 13.0, 9.0, 16.0, 8.0, 15.0, 6.0, 8.0, 6.0, 3.0, 23.0, 14.0, 34.0, 31.0, 19.0, 6.0, 11.0, 13.0, 0.0, 14.0, 11.0, 0.0, 3.0, 17.0, 32.0, 31.0, 11.0, 11.0, 6.0, 3.0, 17.0, 0.0, 14.0, 11.0, 13.0, 12.0, 6.0, 8.0, 28.0, 40.0, 3.0, 6.0, 25.0, 11.0, 14.0, 0.0, 23.0, 19.0, 34.0, 56.0, 23.0, 21.0, 17.0, 19.0, 6.0, 5.0, 8.0, 17.0, 6.0, 3.0, 3.0, 3.0, 6.0, 6.0, 14.0, 9.0, 36.0, 37.0, 12.0, 5.0, 45.0, 32.0, 6.0, 3.0, 3.0, 14.0, 38.0, 28.0, 8.0, 6.0, 14.0, 15.0, 3.0, 6.0, 22.0, 14.0, 5.0, 28.0, 6.0, 6.0, 11.0, 9.0, 6.0, 8.0, 11.0, 14.0, 6.0, 32.0, 3.0, 3.0, 11.0, 8.0, 0.0, 3.0, 11.0, 8.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6990310521400235, "mean_inference_ms": 1.2183144516569073, "mean_action_processing_ms": 0.13449427945373685, "mean_env_wait_ms": 0.8729521794459285, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 409600, "num_agent_steps_trained": 409600, "num_env_steps_sampled": 204800, "num_env_steps_trained": 204800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 204800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 409600, "timers": {"training_iteration_time_ms": 3728.323, "learn_time_ms": 1113.304, "learn_throughput": 11497.313, "synch_weights_time_ms": 12.737}, "counters": {"num_env_steps_sampled": 204800, "num_env_steps_trained": 204800, "num_agent_steps_sampled": 409600, "num_agent_steps_trained": 409600}, "done": false, "episodes_total": 512, "training_iteration": 16, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-58", "timestamp": 1666580458, "time_this_iter_s": 3.6585028171539307, "time_total_s": 61.43346118927002, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 61.43346118927002, "timesteps_since_restore": 0, "iterations_since_restore": 16, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.316666666666666, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 2.6, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 21.0, "shaped_reward_min": 3, "shaped_reward_max": 50, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.51, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.71, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.38, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.65, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 3.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 3.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.79, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.63, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.84, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 1.96, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 2.88, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.76, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.35, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.98, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.76, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.68, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.3, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.01, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.43, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.3, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.71, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.64, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.84, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 1.96, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.84, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 1.96, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.051757857974735e-06, "cur_lr": 0.0010000000474974513, "total_loss": -0.004403861239552498, "policy_loss": -0.0036931377835571766, "vf_loss": 1.6866822242736816, "vf_explained_var": 0.03509732335805893, "kl": 0.0009732272010296583, "entropy": 1.758791446685791, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 217600, "num_env_steps_trained": 217600, "num_agent_steps_sampled": 435200, "num_agent_steps_trained": 435200}, "sampler_results": {"episode_reward_max": 90.0, "episode_reward_min": 3.0, "episode_reward_mean": 26.2, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 56.0}, "policy_reward_mean": {"ppo": 13.1}, "custom_metrics": {"sparse_reward_mean": 2.6, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 21.0, "shaped_reward_min": 3, "shaped_reward_max": 50, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.51, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.71, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.38, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.65, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 3.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 3.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.79, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.63, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.84, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 1.96, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 2.88, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.76, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.35, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.98, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.76, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.68, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.3, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.01, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.43, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.3, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.71, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.64, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.84, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 1.96, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.84, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 1.96, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [22.0, 12.0, 9.0, 19.0, 20.0, 9.0, 19.0, 6.0, 9.0, 6.0, 22.0, 25.0, 31.0, 36.0, 17.0, 25.0, 25.0, 23.0, 14.0, 9.0, 37.0, 65.0, 25.0, 24.0, 14.0, 11.0, 20.0, 63.0, 22.0, 9.0, 17.0, 25.0, 25.0, 14.0, 68.0, 9.0, 36.0, 14.0, 42.0, 90.0, 44.0, 36.0, 11.0, 25.0, 9.0, 6.0, 12.0, 23.0, 73.0, 17.0, 77.0, 9.0, 17.0, 66.0, 14.0, 29.0, 9.0, 36.0, 33.0, 12.0, 20.0, 14.0, 25.0, 38.0, 6.0, 19.0, 3.0, 19.0, 11.0, 22.0, 36.0, 6.0, 42.0, 65.0, 25.0, 12.0, 17.0, 73.0, 28.0, 66.0, 85.0, 28.0, 6.0, 14.0, 57.0, 6.0, 28.0, 31.0, 57.0, 17.0, 27.0, 9.0, 17.0, 22.0, 27.0, 12.0, 22.0, 19.0, 20.0, 23.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [16.0, 6.0, 6.0, 6.0, 3.0, 6.0, 3.0, 16.0, 11.0, 9.0, 6.0, 3.0, 6.0, 13.0, 6.0, 0.0, 0.0, 9.0, 3.0, 3.0, 12.0, 10.0, 16.0, 9.0, 9.0, 22.0, 6.0, 30.0, 11.0, 6.0, 12.0, 13.0, 9.0, 16.0, 8.0, 15.0, 6.0, 8.0, 6.0, 3.0, 23.0, 14.0, 34.0, 31.0, 19.0, 6.0, 11.0, 13.0, 0.0, 14.0, 11.0, 0.0, 3.0, 17.0, 32.0, 31.0, 11.0, 11.0, 6.0, 3.0, 17.0, 0.0, 14.0, 11.0, 13.0, 12.0, 6.0, 8.0, 28.0, 40.0, 3.0, 6.0, 25.0, 11.0, 14.0, 0.0, 23.0, 19.0, 34.0, 56.0, 23.0, 21.0, 17.0, 19.0, 6.0, 5.0, 8.0, 17.0, 6.0, 3.0, 3.0, 3.0, 6.0, 6.0, 14.0, 9.0, 36.0, 37.0, 12.0, 5.0, 45.0, 32.0, 6.0, 3.0, 3.0, 14.0, 38.0, 28.0, 8.0, 6.0, 14.0, 15.0, 3.0, 6.0, 22.0, 14.0, 5.0, 28.0, 6.0, 6.0, 11.0, 9.0, 6.0, 8.0, 11.0, 14.0, 6.0, 32.0, 3.0, 3.0, 11.0, 8.0, 0.0, 3.0, 11.0, 8.0, 8.0, 3.0, 11.0, 11.0, 19.0, 17.0, 3.0, 3.0, 28.0, 14.0, 30.0, 35.0, 13.0, 12.0, 9.0, 3.0, 5.0, 12.0, 50.0, 23.0, 14.0, 14.0, 32.0, 34.0, 49.0, 36.0, 16.0, 12.0, 0.0, 6.0, 3.0, 11.0, 34.0, 23.0, 3.0, 3.0, 19.0, 9.0, 19.0, 12.0, 28.0, 29.0, 9.0, 8.0, 13.0, 14.0, 3.0, 6.0, 8.0, 9.0, 11.0, 11.0, 12.0, 15.0, 0.0, 12.0, 16.0, 6.0, 13.0, 6.0, 6.0, 14.0, 6.0, 17.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.698113649734605, "mean_inference_ms": 1.2171757494183797, "mean_action_processing_ms": 0.13440137759174353, "mean_env_wait_ms": 0.8729869098636517, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 90.0, "episode_reward_min": 3.0, "episode_reward_mean": 26.2, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 56.0}, "policy_reward_mean": {"ppo": 13.1}, "hist_stats": {"episode_reward": [22.0, 12.0, 9.0, 19.0, 20.0, 9.0, 19.0, 6.0, 9.0, 6.0, 22.0, 25.0, 31.0, 36.0, 17.0, 25.0, 25.0, 23.0, 14.0, 9.0, 37.0, 65.0, 25.0, 24.0, 14.0, 11.0, 20.0, 63.0, 22.0, 9.0, 17.0, 25.0, 25.0, 14.0, 68.0, 9.0, 36.0, 14.0, 42.0, 90.0, 44.0, 36.0, 11.0, 25.0, 9.0, 6.0, 12.0, 23.0, 73.0, 17.0, 77.0, 9.0, 17.0, 66.0, 14.0, 29.0, 9.0, 36.0, 33.0, 12.0, 20.0, 14.0, 25.0, 38.0, 6.0, 19.0, 3.0, 19.0, 11.0, 22.0, 36.0, 6.0, 42.0, 65.0, 25.0, 12.0, 17.0, 73.0, 28.0, 66.0, 85.0, 28.0, 6.0, 14.0, 57.0, 6.0, 28.0, 31.0, 57.0, 17.0, 27.0, 9.0, 17.0, 22.0, 27.0, 12.0, 22.0, 19.0, 20.0, 23.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [16.0, 6.0, 6.0, 6.0, 3.0, 6.0, 3.0, 16.0, 11.0, 9.0, 6.0, 3.0, 6.0, 13.0, 6.0, 0.0, 0.0, 9.0, 3.0, 3.0, 12.0, 10.0, 16.0, 9.0, 9.0, 22.0, 6.0, 30.0, 11.0, 6.0, 12.0, 13.0, 9.0, 16.0, 8.0, 15.0, 6.0, 8.0, 6.0, 3.0, 23.0, 14.0, 34.0, 31.0, 19.0, 6.0, 11.0, 13.0, 0.0, 14.0, 11.0, 0.0, 3.0, 17.0, 32.0, 31.0, 11.0, 11.0, 6.0, 3.0, 17.0, 0.0, 14.0, 11.0, 13.0, 12.0, 6.0, 8.0, 28.0, 40.0, 3.0, 6.0, 25.0, 11.0, 14.0, 0.0, 23.0, 19.0, 34.0, 56.0, 23.0, 21.0, 17.0, 19.0, 6.0, 5.0, 8.0, 17.0, 6.0, 3.0, 3.0, 3.0, 6.0, 6.0, 14.0, 9.0, 36.0, 37.0, 12.0, 5.0, 45.0, 32.0, 6.0, 3.0, 3.0, 14.0, 38.0, 28.0, 8.0, 6.0, 14.0, 15.0, 3.0, 6.0, 22.0, 14.0, 5.0, 28.0, 6.0, 6.0, 11.0, 9.0, 6.0, 8.0, 11.0, 14.0, 6.0, 32.0, 3.0, 3.0, 11.0, 8.0, 0.0, 3.0, 11.0, 8.0, 8.0, 3.0, 11.0, 11.0, 19.0, 17.0, 3.0, 3.0, 28.0, 14.0, 30.0, 35.0, 13.0, 12.0, 9.0, 3.0, 5.0, 12.0, 50.0, 23.0, 14.0, 14.0, 32.0, 34.0, 49.0, 36.0, 16.0, 12.0, 0.0, 6.0, 3.0, 11.0, 34.0, 23.0, 3.0, 3.0, 19.0, 9.0, 19.0, 12.0, 28.0, 29.0, 9.0, 8.0, 13.0, 14.0, 3.0, 6.0, 8.0, 9.0, 11.0, 11.0, 12.0, 15.0, 0.0, 12.0, 16.0, 6.0, 13.0, 6.0, 6.0, 14.0, 6.0, 17.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.698113649734605, "mean_inference_ms": 1.2171757494183797, "mean_action_processing_ms": 0.13440137759174353, "mean_env_wait_ms": 0.8729869098636517, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 435200, "num_agent_steps_trained": 435200, "num_env_steps_sampled": 217600, "num_env_steps_trained": 217600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 217600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 435200, "timers": {"training_iteration_time_ms": 3712.225, "learn_time_ms": 1107.87, "learn_throughput": 11553.697, "synch_weights_time_ms": 12.655}, "counters": {"num_env_steps_sampled": 217600, "num_env_steps_trained": 217600, "num_agent_steps_sampled": 435200, "num_agent_steps_trained": 435200}, "done": false, "episodes_total": 544, "training_iteration": 17, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-02", "timestamp": 1666580462, "time_this_iter_s": 3.7620363235473633, "time_total_s": 65.19549751281738, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 65.19549751281738, "timesteps_since_restore": 0, "iterations_since_restore": 17, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.080000000000002, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 2.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 21.62, "shaped_reward_min": 3, "shaped_reward_max": 50, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.22, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.71, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.17, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.54, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.84, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 3.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.66, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.84, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.03, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 3.23, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.61, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.37, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.88, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.4, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.11, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.46, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.33, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.79, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.7, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 1.84, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.03, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.84, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.03, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.5258789289873675e-06, "cur_lr": 0.0010000000474974513, "total_loss": -0.0026260181330144405, "policy_loss": -0.0018984454218298197, "vf_loss": 1.5118250846862793, "vf_explained_var": 0.10999676585197449, "kl": 0.0011376581387594342, "entropy": 1.757513403892517, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 230400, "num_env_steps_trained": 230400, "num_agent_steps_sampled": 460800, "num_agent_steps_trained": 460800}, "sampler_results": {"episode_reward_max": 90.0, "episode_reward_min": 3.0, "episode_reward_mean": 27.22, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 56.0}, "policy_reward_mean": {"ppo": 13.61}, "custom_metrics": {"sparse_reward_mean": 2.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 21.62, "shaped_reward_min": 3, "shaped_reward_max": 50, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.22, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.71, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.17, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.54, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.84, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 3.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.66, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.84, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.03, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 3.23, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.61, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.37, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.88, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.4, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.11, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.46, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.33, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.79, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.7, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 1.84, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.03, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.84, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.03, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [25.0, 14.0, 68.0, 9.0, 36.0, 14.0, 42.0, 90.0, 44.0, 36.0, 11.0, 25.0, 9.0, 6.0, 12.0, 23.0, 73.0, 17.0, 77.0, 9.0, 17.0, 66.0, 14.0, 29.0, 9.0, 36.0, 33.0, 12.0, 20.0, 14.0, 25.0, 38.0, 6.0, 19.0, 3.0, 19.0, 11.0, 22.0, 36.0, 6.0, 42.0, 65.0, 25.0, 12.0, 17.0, 73.0, 28.0, 66.0, 85.0, 28.0, 6.0, 14.0, 57.0, 6.0, 28.0, 31.0, 57.0, 17.0, 27.0, 9.0, 17.0, 22.0, 27.0, 12.0, 22.0, 19.0, 20.0, 23.0, 9.0, 9.0, 22.0, 17.0, 19.0, 24.0, 3.0, 36.0, 28.0, 71.0, 22.0, 20.0, 23.0, 22.0, 30.0, 30.0, 3.0, 9.0, 9.0, 23.0, 28.0, 17.0, 36.0, 17.0, 9.0, 9.0, 20.0, 22.0, 33.0, 62.0, 42.0, 68.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [13.0, 12.0, 6.0, 8.0, 28.0, 40.0, 3.0, 6.0, 25.0, 11.0, 14.0, 0.0, 23.0, 19.0, 34.0, 56.0, 23.0, 21.0, 17.0, 19.0, 6.0, 5.0, 8.0, 17.0, 6.0, 3.0, 3.0, 3.0, 6.0, 6.0, 14.0, 9.0, 36.0, 37.0, 12.0, 5.0, 45.0, 32.0, 6.0, 3.0, 3.0, 14.0, 38.0, 28.0, 8.0, 6.0, 14.0, 15.0, 3.0, 6.0, 22.0, 14.0, 5.0, 28.0, 6.0, 6.0, 11.0, 9.0, 6.0, 8.0, 11.0, 14.0, 6.0, 32.0, 3.0, 3.0, 11.0, 8.0, 0.0, 3.0, 11.0, 8.0, 8.0, 3.0, 11.0, 11.0, 19.0, 17.0, 3.0, 3.0, 28.0, 14.0, 30.0, 35.0, 13.0, 12.0, 9.0, 3.0, 5.0, 12.0, 50.0, 23.0, 14.0, 14.0, 32.0, 34.0, 49.0, 36.0, 16.0, 12.0, 0.0, 6.0, 3.0, 11.0, 34.0, 23.0, 3.0, 3.0, 19.0, 9.0, 19.0, 12.0, 28.0, 29.0, 9.0, 8.0, 13.0, 14.0, 3.0, 6.0, 8.0, 9.0, 11.0, 11.0, 12.0, 15.0, 0.0, 12.0, 16.0, 6.0, 13.0, 6.0, 6.0, 14.0, 6.0, 17.0, 0.0, 9.0, 9.0, 0.0, 11.0, 11.0, 8.0, 9.0, 9.0, 10.0, 3.0, 21.0, 3.0, 0.0, 20.0, 16.0, 25.0, 3.0, 42.0, 29.0, 11.0, 11.0, 6.0, 14.0, 6.0, 17.0, 11.0, 11.0, 16.0, 14.0, 13.0, 17.0, 0.0, 3.0, 9.0, 0.0, 6.0, 3.0, 14.0, 9.0, 15.0, 13.0, 6.0, 11.0, 13.0, 23.0, 8.0, 9.0, 3.0, 6.0, 3.0, 6.0, 6.0, 14.0, 16.0, 6.0, 24.0, 9.0, 26.0, 36.0, 19.0, 23.0, 40.0, 28.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6973220319660581, "mean_inference_ms": 1.2163561047550697, "mean_action_processing_ms": 0.1342786781324832, "mean_env_wait_ms": 0.872687151210557, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 90.0, "episode_reward_min": 3.0, "episode_reward_mean": 27.22, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 56.0}, "policy_reward_mean": {"ppo": 13.61}, "hist_stats": {"episode_reward": [25.0, 14.0, 68.0, 9.0, 36.0, 14.0, 42.0, 90.0, 44.0, 36.0, 11.0, 25.0, 9.0, 6.0, 12.0, 23.0, 73.0, 17.0, 77.0, 9.0, 17.0, 66.0, 14.0, 29.0, 9.0, 36.0, 33.0, 12.0, 20.0, 14.0, 25.0, 38.0, 6.0, 19.0, 3.0, 19.0, 11.0, 22.0, 36.0, 6.0, 42.0, 65.0, 25.0, 12.0, 17.0, 73.0, 28.0, 66.0, 85.0, 28.0, 6.0, 14.0, 57.0, 6.0, 28.0, 31.0, 57.0, 17.0, 27.0, 9.0, 17.0, 22.0, 27.0, 12.0, 22.0, 19.0, 20.0, 23.0, 9.0, 9.0, 22.0, 17.0, 19.0, 24.0, 3.0, 36.0, 28.0, 71.0, 22.0, 20.0, 23.0, 22.0, 30.0, 30.0, 3.0, 9.0, 9.0, 23.0, 28.0, 17.0, 36.0, 17.0, 9.0, 9.0, 20.0, 22.0, 33.0, 62.0, 42.0, 68.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [13.0, 12.0, 6.0, 8.0, 28.0, 40.0, 3.0, 6.0, 25.0, 11.0, 14.0, 0.0, 23.0, 19.0, 34.0, 56.0, 23.0, 21.0, 17.0, 19.0, 6.0, 5.0, 8.0, 17.0, 6.0, 3.0, 3.0, 3.0, 6.0, 6.0, 14.0, 9.0, 36.0, 37.0, 12.0, 5.0, 45.0, 32.0, 6.0, 3.0, 3.0, 14.0, 38.0, 28.0, 8.0, 6.0, 14.0, 15.0, 3.0, 6.0, 22.0, 14.0, 5.0, 28.0, 6.0, 6.0, 11.0, 9.0, 6.0, 8.0, 11.0, 14.0, 6.0, 32.0, 3.0, 3.0, 11.0, 8.0, 0.0, 3.0, 11.0, 8.0, 8.0, 3.0, 11.0, 11.0, 19.0, 17.0, 3.0, 3.0, 28.0, 14.0, 30.0, 35.0, 13.0, 12.0, 9.0, 3.0, 5.0, 12.0, 50.0, 23.0, 14.0, 14.0, 32.0, 34.0, 49.0, 36.0, 16.0, 12.0, 0.0, 6.0, 3.0, 11.0, 34.0, 23.0, 3.0, 3.0, 19.0, 9.0, 19.0, 12.0, 28.0, 29.0, 9.0, 8.0, 13.0, 14.0, 3.0, 6.0, 8.0, 9.0, 11.0, 11.0, 12.0, 15.0, 0.0, 12.0, 16.0, 6.0, 13.0, 6.0, 6.0, 14.0, 6.0, 17.0, 0.0, 9.0, 9.0, 0.0, 11.0, 11.0, 8.0, 9.0, 9.0, 10.0, 3.0, 21.0, 3.0, 0.0, 20.0, 16.0, 25.0, 3.0, 42.0, 29.0, 11.0, 11.0, 6.0, 14.0, 6.0, 17.0, 11.0, 11.0, 16.0, 14.0, 13.0, 17.0, 0.0, 3.0, 9.0, 0.0, 6.0, 3.0, 14.0, 9.0, 15.0, 13.0, 6.0, 11.0, 13.0, 23.0, 8.0, 9.0, 3.0, 6.0, 3.0, 6.0, 6.0, 14.0, 16.0, 6.0, 24.0, 9.0, 26.0, 36.0, 19.0, 23.0, 40.0, 28.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6973220319660581, "mean_inference_ms": 1.2163561047550697, "mean_action_processing_ms": 0.1342786781324832, "mean_env_wait_ms": 0.872687151210557, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 460800, "num_agent_steps_trained": 460800, "num_env_steps_sampled": 230400, "num_env_steps_trained": 230400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 230400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 460800, "timers": {"training_iteration_time_ms": 3716.285, "learn_time_ms": 1113.511, "learn_throughput": 11495.167, "synch_weights_time_ms": 13.323}, "counters": {"num_env_steps_sampled": 230400, "num_env_steps_trained": 230400, "num_agent_steps_sampled": 460800, "num_agent_steps_trained": 460800}, "done": false, "episodes_total": 576, "training_iteration": 18, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-06", "timestamp": 1666580466, "time_this_iter_s": 3.773197650909424, "time_total_s": 68.9686951637268, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 68.9686951637268, "timesteps_since_restore": 0, "iterations_since_restore": 18, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.066666666666666, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 4.2, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 22.6, "shaped_reward_min": 3, "shaped_reward_max": 51, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.15, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.87, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.06, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.71, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.68, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.75, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.92, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.11, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.57, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.62, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.48, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.39, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.62, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.87, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.57, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 1.42, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.23, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.42, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.4, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.85, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.72, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 1.92, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.11, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.92, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.11, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 7.629394644936838e-07, "cur_lr": 0.0010000000474974513, "total_loss": -0.005456134676933289, "policy_loss": -0.004799796734005213, "vf_loss": 2.1967272758483887, "vf_explained_var": 0.07287011295557022, "kl": 0.0010914739686995745, "entropy": 1.7520215511322021, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 243200, "num_env_steps_trained": 243200, "num_agent_steps_sampled": 486400, "num_agent_steps_trained": 486400}, "sampler_results": {"episode_reward_max": 119.0, "episode_reward_min": 3.0, "episode_reward_mean": 31.0, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 61.0}, "policy_reward_mean": {"ppo": 15.5}, "custom_metrics": {"sparse_reward_mean": 4.2, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 22.6, "shaped_reward_min": 3, "shaped_reward_max": 51, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.15, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.87, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.06, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.71, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.68, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.75, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.92, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.11, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.57, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.62, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.48, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.39, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.62, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.87, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.57, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 1.42, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.23, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.42, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.4, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.85, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.72, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 1.92, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.11, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.92, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.11, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [6.0, 19.0, 3.0, 19.0, 11.0, 22.0, 36.0, 6.0, 42.0, 65.0, 25.0, 12.0, 17.0, 73.0, 28.0, 66.0, 85.0, 28.0, 6.0, 14.0, 57.0, 6.0, 28.0, 31.0, 57.0, 17.0, 27.0, 9.0, 17.0, 22.0, 27.0, 12.0, 22.0, 19.0, 20.0, 23.0, 9.0, 9.0, 22.0, 17.0, 19.0, 24.0, 3.0, 36.0, 28.0, 71.0, 22.0, 20.0, 23.0, 22.0, 30.0, 30.0, 3.0, 9.0, 9.0, 23.0, 28.0, 17.0, 36.0, 17.0, 9.0, 9.0, 20.0, 22.0, 33.0, 62.0, 42.0, 68.0, 60.0, 28.0, 30.0, 36.0, 11.0, 28.0, 9.0, 14.0, 31.0, 91.0, 34.0, 62.0, 71.0, 68.0, 119.0, 66.0, 17.0, 9.0, 9.0, 33.0, 71.0, 20.0, 37.0, 28.0, 22.0, 60.0, 11.0, 42.0, 76.0, 31.0, 50.0, 57.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 3.0, 11.0, 8.0, 0.0, 3.0, 11.0, 8.0, 8.0, 3.0, 11.0, 11.0, 19.0, 17.0, 3.0, 3.0, 28.0, 14.0, 30.0, 35.0, 13.0, 12.0, 9.0, 3.0, 5.0, 12.0, 50.0, 23.0, 14.0, 14.0, 32.0, 34.0, 49.0, 36.0, 16.0, 12.0, 0.0, 6.0, 3.0, 11.0, 34.0, 23.0, 3.0, 3.0, 19.0, 9.0, 19.0, 12.0, 28.0, 29.0, 9.0, 8.0, 13.0, 14.0, 3.0, 6.0, 8.0, 9.0, 11.0, 11.0, 12.0, 15.0, 0.0, 12.0, 16.0, 6.0, 13.0, 6.0, 6.0, 14.0, 6.0, 17.0, 0.0, 9.0, 9.0, 0.0, 11.0, 11.0, 8.0, 9.0, 9.0, 10.0, 3.0, 21.0, 3.0, 0.0, 20.0, 16.0, 25.0, 3.0, 42.0, 29.0, 11.0, 11.0, 6.0, 14.0, 6.0, 17.0, 11.0, 11.0, 16.0, 14.0, 13.0, 17.0, 0.0, 3.0, 9.0, 0.0, 6.0, 3.0, 14.0, 9.0, 15.0, 13.0, 6.0, 11.0, 13.0, 23.0, 8.0, 9.0, 3.0, 6.0, 3.0, 6.0, 6.0, 14.0, 16.0, 6.0, 24.0, 9.0, 26.0, 36.0, 19.0, 23.0, 40.0, 28.0, 29.0, 31.0, 9.0, 19.0, 14.0, 16.0, 14.0, 22.0, 8.0, 3.0, 14.0, 14.0, 3.0, 6.0, 6.0, 8.0, 20.0, 11.0, 40.0, 51.0, 22.0, 12.0, 23.0, 39.0, 31.0, 40.0, 37.0, 31.0, 61.0, 58.0, 32.0, 34.0, 11.0, 6.0, 3.0, 6.0, 3.0, 6.0, 18.0, 15.0, 34.0, 37.0, 14.0, 6.0, 22.0, 15.0, 14.0, 14.0, 8.0, 14.0, 31.0, 29.0, 8.0, 3.0, 20.0, 22.0, 37.0, 39.0, 25.0, 6.0, 25.0, 25.0, 23.0, 34.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6967297222460701, "mean_inference_ms": 1.2153942149654409, "mean_action_processing_ms": 0.13419810737632373, "mean_env_wait_ms": 0.8722581256551439, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 119.0, "episode_reward_min": 3.0, "episode_reward_mean": 31.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 61.0}, "policy_reward_mean": {"ppo": 15.5}, "hist_stats": {"episode_reward": [6.0, 19.0, 3.0, 19.0, 11.0, 22.0, 36.0, 6.0, 42.0, 65.0, 25.0, 12.0, 17.0, 73.0, 28.0, 66.0, 85.0, 28.0, 6.0, 14.0, 57.0, 6.0, 28.0, 31.0, 57.0, 17.0, 27.0, 9.0, 17.0, 22.0, 27.0, 12.0, 22.0, 19.0, 20.0, 23.0, 9.0, 9.0, 22.0, 17.0, 19.0, 24.0, 3.0, 36.0, 28.0, 71.0, 22.0, 20.0, 23.0, 22.0, 30.0, 30.0, 3.0, 9.0, 9.0, 23.0, 28.0, 17.0, 36.0, 17.0, 9.0, 9.0, 20.0, 22.0, 33.0, 62.0, 42.0, 68.0, 60.0, 28.0, 30.0, 36.0, 11.0, 28.0, 9.0, 14.0, 31.0, 91.0, 34.0, 62.0, 71.0, 68.0, 119.0, 66.0, 17.0, 9.0, 9.0, 33.0, 71.0, 20.0, 37.0, 28.0, 22.0, 60.0, 11.0, 42.0, 76.0, 31.0, 50.0, 57.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 3.0, 11.0, 8.0, 0.0, 3.0, 11.0, 8.0, 8.0, 3.0, 11.0, 11.0, 19.0, 17.0, 3.0, 3.0, 28.0, 14.0, 30.0, 35.0, 13.0, 12.0, 9.0, 3.0, 5.0, 12.0, 50.0, 23.0, 14.0, 14.0, 32.0, 34.0, 49.0, 36.0, 16.0, 12.0, 0.0, 6.0, 3.0, 11.0, 34.0, 23.0, 3.0, 3.0, 19.0, 9.0, 19.0, 12.0, 28.0, 29.0, 9.0, 8.0, 13.0, 14.0, 3.0, 6.0, 8.0, 9.0, 11.0, 11.0, 12.0, 15.0, 0.0, 12.0, 16.0, 6.0, 13.0, 6.0, 6.0, 14.0, 6.0, 17.0, 0.0, 9.0, 9.0, 0.0, 11.0, 11.0, 8.0, 9.0, 9.0, 10.0, 3.0, 21.0, 3.0, 0.0, 20.0, 16.0, 25.0, 3.0, 42.0, 29.0, 11.0, 11.0, 6.0, 14.0, 6.0, 17.0, 11.0, 11.0, 16.0, 14.0, 13.0, 17.0, 0.0, 3.0, 9.0, 0.0, 6.0, 3.0, 14.0, 9.0, 15.0, 13.0, 6.0, 11.0, 13.0, 23.0, 8.0, 9.0, 3.0, 6.0, 3.0, 6.0, 6.0, 14.0, 16.0, 6.0, 24.0, 9.0, 26.0, 36.0, 19.0, 23.0, 40.0, 28.0, 29.0, 31.0, 9.0, 19.0, 14.0, 16.0, 14.0, 22.0, 8.0, 3.0, 14.0, 14.0, 3.0, 6.0, 6.0, 8.0, 20.0, 11.0, 40.0, 51.0, 22.0, 12.0, 23.0, 39.0, 31.0, 40.0, 37.0, 31.0, 61.0, 58.0, 32.0, 34.0, 11.0, 6.0, 3.0, 6.0, 3.0, 6.0, 18.0, 15.0, 34.0, 37.0, 14.0, 6.0, 22.0, 15.0, 14.0, 14.0, 8.0, 14.0, 31.0, 29.0, 8.0, 3.0, 20.0, 22.0, 37.0, 39.0, 25.0, 6.0, 25.0, 25.0, 23.0, 34.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6967297222460701, "mean_inference_ms": 1.2153942149654409, "mean_action_processing_ms": 0.13419810737632373, "mean_env_wait_ms": 0.8722581256551439, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 486400, "num_agent_steps_trained": 486400, "num_env_steps_sampled": 243200, "num_env_steps_trained": 243200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 243200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 486400, "timers": {"training_iteration_time_ms": 3696.48, "learn_time_ms": 1100.823, "learn_throughput": 11627.666, "synch_weights_time_ms": 12.828}, "counters": {"num_env_steps_sampled": 243200, "num_env_steps_trained": 243200, "num_agent_steps_sampled": 486400, "num_agent_steps_trained": 486400}, "done": false, "episodes_total": 608, "training_iteration": 19, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-09", "timestamp": 1666580469, "time_this_iter_s": 3.6425294876098633, "time_total_s": 72.61122465133667, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 72.61122465133667, "timesteps_since_restore": 0, "iterations_since_restore": 19, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.14, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 3.6, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 22.58, "shaped_reward_min": 3, "shaped_reward_max": 51, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.13, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.72, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.07, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 3.65, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.66, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.62, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.79, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.02, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 5, "potting_onion_agent_1_mean": 2.12, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.59, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.62, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.42, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.95, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.6, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.19, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.22, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.37, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.41, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.68, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.69, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.02, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 5, "optimal_onion_potting_agent_1_mean": 2.12, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.02, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 5, "viable_onion_potting_agent_1_mean": 2.12, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.814697322468419e-07, "cur_lr": 0.0010000000474974513, "total_loss": -0.0051202354952692986, "policy_loss": -0.0043845875188708305, "vf_loss": 1.4147591590881348, "vf_explained_var": 0.15972867608070374, "kl": 0.000972163223195821, "entropy": 1.7542475461959839, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 256000, "num_env_steps_trained": 256000, "num_agent_steps_sampled": 512000, "num_agent_steps_trained": 512000}, "sampler_results": {"episode_reward_max": 119.0, "episode_reward_min": 3.0, "episode_reward_mean": 29.78, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 61.0}, "policy_reward_mean": {"ppo": 14.89}, "custom_metrics": {"sparse_reward_mean": 3.6, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 22.58, "shaped_reward_min": 3, "shaped_reward_max": 51, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.13, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.72, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.07, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 3.65, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.66, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.62, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.79, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.02, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 5, "potting_onion_agent_1_mean": 2.12, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.59, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.62, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.42, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.95, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.6, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.19, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.22, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.37, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.41, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.68, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.69, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.02, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 5, "optimal_onion_potting_agent_1_mean": 2.12, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.02, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 5, "viable_onion_potting_agent_1_mean": 2.12, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [22.0, 19.0, 20.0, 23.0, 9.0, 9.0, 22.0, 17.0, 19.0, 24.0, 3.0, 36.0, 28.0, 71.0, 22.0, 20.0, 23.0, 22.0, 30.0, 30.0, 3.0, 9.0, 9.0, 23.0, 28.0, 17.0, 36.0, 17.0, 9.0, 9.0, 20.0, 22.0, 33.0, 62.0, 42.0, 68.0, 60.0, 28.0, 30.0, 36.0, 11.0, 28.0, 9.0, 14.0, 31.0, 91.0, 34.0, 62.0, 71.0, 68.0, 119.0, 66.0, 17.0, 9.0, 9.0, 33.0, 71.0, 20.0, 37.0, 28.0, 22.0, 60.0, 11.0, 42.0, 76.0, 31.0, 50.0, 57.0, 9.0, 20.0, 12.0, 9.0, 74.0, 30.0, 6.0, 9.0, 28.0, 14.0, 12.0, 34.0, 8.0, 9.0, 25.0, 26.0, 25.0, 25.0, 25.0, 28.0, 17.0, 20.0, 9.0, 28.0, 28.0, 20.0, 20.0, 82.0, 76.0, 14.0, 20.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [16.0, 6.0, 13.0, 6.0, 6.0, 14.0, 6.0, 17.0, 0.0, 9.0, 9.0, 0.0, 11.0, 11.0, 8.0, 9.0, 9.0, 10.0, 3.0, 21.0, 3.0, 0.0, 20.0, 16.0, 25.0, 3.0, 42.0, 29.0, 11.0, 11.0, 6.0, 14.0, 6.0, 17.0, 11.0, 11.0, 16.0, 14.0, 13.0, 17.0, 0.0, 3.0, 9.0, 0.0, 6.0, 3.0, 14.0, 9.0, 15.0, 13.0, 6.0, 11.0, 13.0, 23.0, 8.0, 9.0, 3.0, 6.0, 3.0, 6.0, 6.0, 14.0, 16.0, 6.0, 24.0, 9.0, 26.0, 36.0, 19.0, 23.0, 40.0, 28.0, 29.0, 31.0, 9.0, 19.0, 14.0, 16.0, 14.0, 22.0, 8.0, 3.0, 14.0, 14.0, 3.0, 6.0, 6.0, 8.0, 20.0, 11.0, 40.0, 51.0, 22.0, 12.0, 23.0, 39.0, 31.0, 40.0, 37.0, 31.0, 61.0, 58.0, 32.0, 34.0, 11.0, 6.0, 3.0, 6.0, 3.0, 6.0, 18.0, 15.0, 34.0, 37.0, 14.0, 6.0, 22.0, 15.0, 14.0, 14.0, 8.0, 14.0, 31.0, 29.0, 8.0, 3.0, 20.0, 22.0, 37.0, 39.0, 25.0, 6.0, 25.0, 25.0, 23.0, 34.0, 0.0, 9.0, 11.0, 9.0, 6.0, 6.0, 3.0, 6.0, 40.0, 34.0, 9.0, 21.0, 3.0, 3.0, 3.0, 6.0, 14.0, 14.0, 11.0, 3.0, 3.0, 9.0, 18.0, 16.0, 8.0, 0.0, 0.0, 9.0, 13.0, 12.0, 12.0, 14.0, 11.0, 14.0, 14.0, 11.0, 16.0, 9.0, 17.0, 11.0, 12.0, 5.0, 11.0, 9.0, 9.0, 0.0, 9.0, 19.0, 9.0, 19.0, 6.0, 14.0, 17.0, 3.0, 34.0, 48.0, 36.0, 40.0, 11.0, 3.0, 8.0, 12.0, 6.0, 3.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6964587722357374, "mean_inference_ms": 1.21476526717065, "mean_action_processing_ms": 0.13413883320263154, "mean_env_wait_ms": 0.8720606761944739, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 119.0, "episode_reward_min": 3.0, "episode_reward_mean": 29.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 61.0}, "policy_reward_mean": {"ppo": 14.89}, "hist_stats": {"episode_reward": [22.0, 19.0, 20.0, 23.0, 9.0, 9.0, 22.0, 17.0, 19.0, 24.0, 3.0, 36.0, 28.0, 71.0, 22.0, 20.0, 23.0, 22.0, 30.0, 30.0, 3.0, 9.0, 9.0, 23.0, 28.0, 17.0, 36.0, 17.0, 9.0, 9.0, 20.0, 22.0, 33.0, 62.0, 42.0, 68.0, 60.0, 28.0, 30.0, 36.0, 11.0, 28.0, 9.0, 14.0, 31.0, 91.0, 34.0, 62.0, 71.0, 68.0, 119.0, 66.0, 17.0, 9.0, 9.0, 33.0, 71.0, 20.0, 37.0, 28.0, 22.0, 60.0, 11.0, 42.0, 76.0, 31.0, 50.0, 57.0, 9.0, 20.0, 12.0, 9.0, 74.0, 30.0, 6.0, 9.0, 28.0, 14.0, 12.0, 34.0, 8.0, 9.0, 25.0, 26.0, 25.0, 25.0, 25.0, 28.0, 17.0, 20.0, 9.0, 28.0, 28.0, 20.0, 20.0, 82.0, 76.0, 14.0, 20.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [16.0, 6.0, 13.0, 6.0, 6.0, 14.0, 6.0, 17.0, 0.0, 9.0, 9.0, 0.0, 11.0, 11.0, 8.0, 9.0, 9.0, 10.0, 3.0, 21.0, 3.0, 0.0, 20.0, 16.0, 25.0, 3.0, 42.0, 29.0, 11.0, 11.0, 6.0, 14.0, 6.0, 17.0, 11.0, 11.0, 16.0, 14.0, 13.0, 17.0, 0.0, 3.0, 9.0, 0.0, 6.0, 3.0, 14.0, 9.0, 15.0, 13.0, 6.0, 11.0, 13.0, 23.0, 8.0, 9.0, 3.0, 6.0, 3.0, 6.0, 6.0, 14.0, 16.0, 6.0, 24.0, 9.0, 26.0, 36.0, 19.0, 23.0, 40.0, 28.0, 29.0, 31.0, 9.0, 19.0, 14.0, 16.0, 14.0, 22.0, 8.0, 3.0, 14.0, 14.0, 3.0, 6.0, 6.0, 8.0, 20.0, 11.0, 40.0, 51.0, 22.0, 12.0, 23.0, 39.0, 31.0, 40.0, 37.0, 31.0, 61.0, 58.0, 32.0, 34.0, 11.0, 6.0, 3.0, 6.0, 3.0, 6.0, 18.0, 15.0, 34.0, 37.0, 14.0, 6.0, 22.0, 15.0, 14.0, 14.0, 8.0, 14.0, 31.0, 29.0, 8.0, 3.0, 20.0, 22.0, 37.0, 39.0, 25.0, 6.0, 25.0, 25.0, 23.0, 34.0, 0.0, 9.0, 11.0, 9.0, 6.0, 6.0, 3.0, 6.0, 40.0, 34.0, 9.0, 21.0, 3.0, 3.0, 3.0, 6.0, 14.0, 14.0, 11.0, 3.0, 3.0, 9.0, 18.0, 16.0, 8.0, 0.0, 0.0, 9.0, 13.0, 12.0, 12.0, 14.0, 11.0, 14.0, 14.0, 11.0, 16.0, 9.0, 17.0, 11.0, 12.0, 5.0, 11.0, 9.0, 9.0, 0.0, 9.0, 19.0, 9.0, 19.0, 6.0, 14.0, 17.0, 3.0, 34.0, 48.0, 36.0, 40.0, 11.0, 3.0, 8.0, 12.0, 6.0, 3.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6964587722357374, "mean_inference_ms": 1.21476526717065, "mean_action_processing_ms": 0.13413883320263154, "mean_env_wait_ms": 0.8720606761944739, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 512000, "num_agent_steps_trained": 512000, "num_env_steps_sampled": 256000, "num_env_steps_trained": 256000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 256000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 512000, "timers": {"training_iteration_time_ms": 3678.582, "learn_time_ms": 1080.587, "learn_throughput": 11845.408, "synch_weights_time_ms": 11.897}, "counters": {"num_env_steps_sampled": 256000, "num_env_steps_trained": 256000, "num_agent_steps_sampled": 512000, "num_agent_steps_trained": 512000}, "done": false, "episodes_total": 640, "training_iteration": 20, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-13", "timestamp": 1666580473, "time_this_iter_s": 3.591038703918457, "time_total_s": 76.20226335525513, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 76.20226335525513, "timesteps_since_restore": 0, "iterations_since_restore": 20, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.21666666666667, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 5.2, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 24.63, "shaped_reward_min": 6, "shaped_reward_max": 53, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.35, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.68, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.31, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 3.72, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.98, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.68, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.25, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 5, "potting_onion_agent_1_mean": 2.22, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.24, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.89, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.48, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.7, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.3, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.26, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.42, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.51, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.77, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.62, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.25, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 5, "optimal_onion_potting_agent_1_mean": 2.22, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.25, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 5, "viable_onion_potting_agent_1_mean": 2.22, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.9073486612342094e-07, "cur_lr": 0.0010000000474974513, "total_loss": -0.003267324063926935, "policy_loss": -0.002591660711914301, "vf_loss": 1.9827957153320312, "vf_explained_var": 0.10969623923301697, "kl": 0.0012384429574012756, "entropy": 1.7478868961334229, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 268800, "num_env_steps_trained": 268800, "num_agent_steps_sampled": 537600, "num_agent_steps_trained": 537600}, "sampler_results": {"episode_reward_max": 119.0, "episode_reward_min": 6.0, "episode_reward_mean": 35.03, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 61.0}, "policy_reward_mean": {"ppo": 17.515}, "custom_metrics": {"sparse_reward_mean": 5.2, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 24.63, "shaped_reward_min": 6, "shaped_reward_max": 53, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.35, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.68, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.31, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 3.72, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.98, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.68, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.25, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 5, "potting_onion_agent_1_mean": 2.22, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.24, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.89, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.48, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.7, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.3, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.26, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.42, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.51, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.77, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.62, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.25, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 5, "optimal_onion_potting_agent_1_mean": 2.22, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.25, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 5, "viable_onion_potting_agent_1_mean": 2.22, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [33.0, 62.0, 42.0, 68.0, 60.0, 28.0, 30.0, 36.0, 11.0, 28.0, 9.0, 14.0, 31.0, 91.0, 34.0, 62.0, 71.0, 68.0, 119.0, 66.0, 17.0, 9.0, 9.0, 33.0, 71.0, 20.0, 37.0, 28.0, 22.0, 60.0, 11.0, 42.0, 76.0, 31.0, 50.0, 57.0, 9.0, 20.0, 12.0, 9.0, 74.0, 30.0, 6.0, 9.0, 28.0, 14.0, 12.0, 34.0, 8.0, 9.0, 25.0, 26.0, 25.0, 25.0, 25.0, 28.0, 17.0, 20.0, 9.0, 28.0, 28.0, 20.0, 20.0, 82.0, 76.0, 14.0, 20.0, 9.0, 12.0, 28.0, 90.0, 31.0, 20.0, 33.0, 38.0, 70.0, 93.0, 14.0, 23.0, 23.0, 33.0, 9.0, 14.0, 9.0, 20.0, 71.0, 41.0, 12.0, 22.0, 60.0, 26.0, 31.0, 9.0, 34.0, 20.0, 68.0, 57.0, 82.0, 65.0, 38.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [24.0, 9.0, 26.0, 36.0, 19.0, 23.0, 40.0, 28.0, 29.0, 31.0, 9.0, 19.0, 14.0, 16.0, 14.0, 22.0, 8.0, 3.0, 14.0, 14.0, 3.0, 6.0, 6.0, 8.0, 20.0, 11.0, 40.0, 51.0, 22.0, 12.0, 23.0, 39.0, 31.0, 40.0, 37.0, 31.0, 61.0, 58.0, 32.0, 34.0, 11.0, 6.0, 3.0, 6.0, 3.0, 6.0, 18.0, 15.0, 34.0, 37.0, 14.0, 6.0, 22.0, 15.0, 14.0, 14.0, 8.0, 14.0, 31.0, 29.0, 8.0, 3.0, 20.0, 22.0, 37.0, 39.0, 25.0, 6.0, 25.0, 25.0, 23.0, 34.0, 0.0, 9.0, 11.0, 9.0, 6.0, 6.0, 3.0, 6.0, 40.0, 34.0, 9.0, 21.0, 3.0, 3.0, 3.0, 6.0, 14.0, 14.0, 11.0, 3.0, 3.0, 9.0, 18.0, 16.0, 8.0, 0.0, 0.0, 9.0, 13.0, 12.0, 12.0, 14.0, 11.0, 14.0, 14.0, 11.0, 16.0, 9.0, 17.0, 11.0, 12.0, 5.0, 11.0, 9.0, 9.0, 0.0, 9.0, 19.0, 9.0, 19.0, 6.0, 14.0, 17.0, 3.0, 34.0, 48.0, 36.0, 40.0, 11.0, 3.0, 8.0, 12.0, 6.0, 3.0, 0.0, 12.0, 11.0, 17.0, 39.0, 51.0, 22.0, 9.0, 6.0, 14.0, 22.0, 11.0, 25.0, 13.0, 34.0, 36.0, 40.0, 53.0, 3.0, 11.0, 11.0, 12.0, 12.0, 11.0, 14.0, 19.0, 6.0, 3.0, 11.0, 3.0, 9.0, 0.0, 14.0, 6.0, 37.0, 34.0, 25.0, 16.0, 6.0, 6.0, 11.0, 11.0, 37.0, 23.0, 15.0, 11.0, 12.0, 19.0, 3.0, 6.0, 12.0, 22.0, 8.0, 12.0, 37.0, 31.0, 31.0, 26.0, 39.0, 43.0, 29.0, 36.0, 15.0, 23.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6960931178672759, "mean_inference_ms": 1.214054826686882, "mean_action_processing_ms": 0.13411483082756873, "mean_env_wait_ms": 0.8720779914357558, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 119.0, "episode_reward_min": 6.0, "episode_reward_mean": 35.03, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 61.0}, "policy_reward_mean": {"ppo": 17.515}, "hist_stats": {"episode_reward": [33.0, 62.0, 42.0, 68.0, 60.0, 28.0, 30.0, 36.0, 11.0, 28.0, 9.0, 14.0, 31.0, 91.0, 34.0, 62.0, 71.0, 68.0, 119.0, 66.0, 17.0, 9.0, 9.0, 33.0, 71.0, 20.0, 37.0, 28.0, 22.0, 60.0, 11.0, 42.0, 76.0, 31.0, 50.0, 57.0, 9.0, 20.0, 12.0, 9.0, 74.0, 30.0, 6.0, 9.0, 28.0, 14.0, 12.0, 34.0, 8.0, 9.0, 25.0, 26.0, 25.0, 25.0, 25.0, 28.0, 17.0, 20.0, 9.0, 28.0, 28.0, 20.0, 20.0, 82.0, 76.0, 14.0, 20.0, 9.0, 12.0, 28.0, 90.0, 31.0, 20.0, 33.0, 38.0, 70.0, 93.0, 14.0, 23.0, 23.0, 33.0, 9.0, 14.0, 9.0, 20.0, 71.0, 41.0, 12.0, 22.0, 60.0, 26.0, 31.0, 9.0, 34.0, 20.0, 68.0, 57.0, 82.0, 65.0, 38.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [24.0, 9.0, 26.0, 36.0, 19.0, 23.0, 40.0, 28.0, 29.0, 31.0, 9.0, 19.0, 14.0, 16.0, 14.0, 22.0, 8.0, 3.0, 14.0, 14.0, 3.0, 6.0, 6.0, 8.0, 20.0, 11.0, 40.0, 51.0, 22.0, 12.0, 23.0, 39.0, 31.0, 40.0, 37.0, 31.0, 61.0, 58.0, 32.0, 34.0, 11.0, 6.0, 3.0, 6.0, 3.0, 6.0, 18.0, 15.0, 34.0, 37.0, 14.0, 6.0, 22.0, 15.0, 14.0, 14.0, 8.0, 14.0, 31.0, 29.0, 8.0, 3.0, 20.0, 22.0, 37.0, 39.0, 25.0, 6.0, 25.0, 25.0, 23.0, 34.0, 0.0, 9.0, 11.0, 9.0, 6.0, 6.0, 3.0, 6.0, 40.0, 34.0, 9.0, 21.0, 3.0, 3.0, 3.0, 6.0, 14.0, 14.0, 11.0, 3.0, 3.0, 9.0, 18.0, 16.0, 8.0, 0.0, 0.0, 9.0, 13.0, 12.0, 12.0, 14.0, 11.0, 14.0, 14.0, 11.0, 16.0, 9.0, 17.0, 11.0, 12.0, 5.0, 11.0, 9.0, 9.0, 0.0, 9.0, 19.0, 9.0, 19.0, 6.0, 14.0, 17.0, 3.0, 34.0, 48.0, 36.0, 40.0, 11.0, 3.0, 8.0, 12.0, 6.0, 3.0, 0.0, 12.0, 11.0, 17.0, 39.0, 51.0, 22.0, 9.0, 6.0, 14.0, 22.0, 11.0, 25.0, 13.0, 34.0, 36.0, 40.0, 53.0, 3.0, 11.0, 11.0, 12.0, 12.0, 11.0, 14.0, 19.0, 6.0, 3.0, 11.0, 3.0, 9.0, 0.0, 14.0, 6.0, 37.0, 34.0, 25.0, 16.0, 6.0, 6.0, 11.0, 11.0, 37.0, 23.0, 15.0, 11.0, 12.0, 19.0, 3.0, 6.0, 12.0, 22.0, 8.0, 12.0, 37.0, 31.0, 31.0, 26.0, 39.0, 43.0, 29.0, 36.0, 15.0, 23.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6960931178672759, "mean_inference_ms": 1.214054826686882, "mean_action_processing_ms": 0.13411483082756873, "mean_env_wait_ms": 0.8720779914357558, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 537600, "num_agent_steps_trained": 537600, "num_env_steps_sampled": 268800, "num_env_steps_trained": 268800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 268800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 537600, "timers": {"training_iteration_time_ms": 3666.172, "learn_time_ms": 1066.079, "learn_throughput": 12006.614, "synch_weights_time_ms": 11.8}, "counters": {"num_env_steps_sampled": 268800, "num_env_steps_trained": 268800, "num_agent_steps_sampled": 537600, "num_agent_steps_trained": 537600}, "done": false, "episodes_total": 672, "training_iteration": 21, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-17", "timestamp": 1666580477, "time_this_iter_s": 3.6518807411193848, "time_total_s": 79.85414409637451, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 79.85414409637451, "timesteps_since_restore": 0, "iterations_since_restore": 21, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.16, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 5.0, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 24.74, "shaped_reward_min": 6, "shaped_reward_max": 53, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.18, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.3, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.38, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 3.66, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.48, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.5, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.45, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 2.27, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 5, "potting_onion_agent_1_mean": 2.28, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.16, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.07, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.42, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.74, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.7, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.29, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.26, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.37, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.53, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.77, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.65, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.27, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 5, "optimal_onion_potting_agent_1_mean": 2.28, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.27, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 5, "viable_onion_potting_agent_1_mean": 2.28, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 9.536743306171047e-08, "cur_lr": 0.0010000000474974513, "total_loss": -0.0045935483649373055, "policy_loss": -0.003943379037082195, "vf_loss": 2.199497699737549, "vf_explained_var": 0.14554493129253387, "kl": 0.0012895276304334402, "entropy": 1.740237832069397, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 281600, "num_env_steps_trained": 281600, "num_agent_steps_sampled": 563200, "num_agent_steps_trained": 563200}, "sampler_results": {"episode_reward_max": 108.0, "episode_reward_min": 6.0, "episode_reward_mean": 34.74, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 60.0}, "policy_reward_mean": {"ppo": 17.37}, "custom_metrics": {"sparse_reward_mean": 5.0, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 24.74, "shaped_reward_min": 6, "shaped_reward_max": 53, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.18, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.3, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.38, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 3.66, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.48, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.5, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.45, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 2.27, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 5, "potting_onion_agent_1_mean": 2.28, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.16, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.07, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.42, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.74, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.7, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.29, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.26, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.37, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.53, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.77, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.65, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.27, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 5, "optimal_onion_potting_agent_1_mean": 2.28, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.27, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 5, "viable_onion_potting_agent_1_mean": 2.28, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [76.0, 31.0, 50.0, 57.0, 9.0, 20.0, 12.0, 9.0, 74.0, 30.0, 6.0, 9.0, 28.0, 14.0, 12.0, 34.0, 8.0, 9.0, 25.0, 26.0, 25.0, 25.0, 25.0, 28.0, 17.0, 20.0, 9.0, 28.0, 28.0, 20.0, 20.0, 82.0, 76.0, 14.0, 20.0, 9.0, 12.0, 28.0, 90.0, 31.0, 20.0, 33.0, 38.0, 70.0, 93.0, 14.0, 23.0, 23.0, 33.0, 9.0, 14.0, 9.0, 20.0, 71.0, 41.0, 12.0, 22.0, 60.0, 26.0, 31.0, 9.0, 34.0, 20.0, 68.0, 57.0, 82.0, 65.0, 38.0, 9.0, 25.0, 9.0, 82.0, 25.0, 20.0, 38.0, 66.0, 66.0, 82.0, 57.0, 20.0, 9.0, 19.0, 23.0, 57.0, 12.0, 63.0, 50.0, 39.0, 8.0, 20.0, 25.0, 19.0, 108.0, 82.0, 30.0, 20.0, 47.0, 50.0, 93.0, 20.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [37.0, 39.0, 25.0, 6.0, 25.0, 25.0, 23.0, 34.0, 0.0, 9.0, 11.0, 9.0, 6.0, 6.0, 3.0, 6.0, 40.0, 34.0, 9.0, 21.0, 3.0, 3.0, 3.0, 6.0, 14.0, 14.0, 11.0, 3.0, 3.0, 9.0, 18.0, 16.0, 8.0, 0.0, 0.0, 9.0, 13.0, 12.0, 12.0, 14.0, 11.0, 14.0, 14.0, 11.0, 16.0, 9.0, 17.0, 11.0, 12.0, 5.0, 11.0, 9.0, 9.0, 0.0, 9.0, 19.0, 9.0, 19.0, 6.0, 14.0, 17.0, 3.0, 34.0, 48.0, 36.0, 40.0, 11.0, 3.0, 8.0, 12.0, 6.0, 3.0, 0.0, 12.0, 11.0, 17.0, 39.0, 51.0, 22.0, 9.0, 6.0, 14.0, 22.0, 11.0, 25.0, 13.0, 34.0, 36.0, 40.0, 53.0, 3.0, 11.0, 11.0, 12.0, 12.0, 11.0, 14.0, 19.0, 6.0, 3.0, 11.0, 3.0, 9.0, 0.0, 14.0, 6.0, 37.0, 34.0, 25.0, 16.0, 6.0, 6.0, 11.0, 11.0, 37.0, 23.0, 15.0, 11.0, 12.0, 19.0, 3.0, 6.0, 12.0, 22.0, 8.0, 12.0, 37.0, 31.0, 31.0, 26.0, 39.0, 43.0, 29.0, 36.0, 15.0, 23.0, 6.0, 3.0, 17.0, 8.0, 0.0, 9.0, 42.0, 40.0, 11.0, 14.0, 9.0, 11.0, 18.0, 20.0, 40.0, 26.0, 34.0, 32.0, 38.0, 44.0, 34.0, 23.0, 11.0, 9.0, 0.0, 9.0, 14.0, 5.0, 9.0, 14.0, 23.0, 34.0, 6.0, 6.0, 31.0, 32.0, 28.0, 22.0, 12.0, 27.0, 5.0, 3.0, 3.0, 17.0, 13.0, 12.0, 3.0, 16.0, 48.0, 60.0, 45.0, 37.0, 21.0, 9.0, 12.0, 8.0, 20.0, 27.0, 27.0, 23.0, 42.0, 51.0, 14.0, 6.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6957187968137986, "mean_inference_ms": 1.2132320138272397, "mean_action_processing_ms": 0.13407466594363157, "mean_env_wait_ms": 0.8722917900578605, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 108.0, "episode_reward_min": 6.0, "episode_reward_mean": 34.74, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 60.0}, "policy_reward_mean": {"ppo": 17.37}, "hist_stats": {"episode_reward": [76.0, 31.0, 50.0, 57.0, 9.0, 20.0, 12.0, 9.0, 74.0, 30.0, 6.0, 9.0, 28.0, 14.0, 12.0, 34.0, 8.0, 9.0, 25.0, 26.0, 25.0, 25.0, 25.0, 28.0, 17.0, 20.0, 9.0, 28.0, 28.0, 20.0, 20.0, 82.0, 76.0, 14.0, 20.0, 9.0, 12.0, 28.0, 90.0, 31.0, 20.0, 33.0, 38.0, 70.0, 93.0, 14.0, 23.0, 23.0, 33.0, 9.0, 14.0, 9.0, 20.0, 71.0, 41.0, 12.0, 22.0, 60.0, 26.0, 31.0, 9.0, 34.0, 20.0, 68.0, 57.0, 82.0, 65.0, 38.0, 9.0, 25.0, 9.0, 82.0, 25.0, 20.0, 38.0, 66.0, 66.0, 82.0, 57.0, 20.0, 9.0, 19.0, 23.0, 57.0, 12.0, 63.0, 50.0, 39.0, 8.0, 20.0, 25.0, 19.0, 108.0, 82.0, 30.0, 20.0, 47.0, 50.0, 93.0, 20.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [37.0, 39.0, 25.0, 6.0, 25.0, 25.0, 23.0, 34.0, 0.0, 9.0, 11.0, 9.0, 6.0, 6.0, 3.0, 6.0, 40.0, 34.0, 9.0, 21.0, 3.0, 3.0, 3.0, 6.0, 14.0, 14.0, 11.0, 3.0, 3.0, 9.0, 18.0, 16.0, 8.0, 0.0, 0.0, 9.0, 13.0, 12.0, 12.0, 14.0, 11.0, 14.0, 14.0, 11.0, 16.0, 9.0, 17.0, 11.0, 12.0, 5.0, 11.0, 9.0, 9.0, 0.0, 9.0, 19.0, 9.0, 19.0, 6.0, 14.0, 17.0, 3.0, 34.0, 48.0, 36.0, 40.0, 11.0, 3.0, 8.0, 12.0, 6.0, 3.0, 0.0, 12.0, 11.0, 17.0, 39.0, 51.0, 22.0, 9.0, 6.0, 14.0, 22.0, 11.0, 25.0, 13.0, 34.0, 36.0, 40.0, 53.0, 3.0, 11.0, 11.0, 12.0, 12.0, 11.0, 14.0, 19.0, 6.0, 3.0, 11.0, 3.0, 9.0, 0.0, 14.0, 6.0, 37.0, 34.0, 25.0, 16.0, 6.0, 6.0, 11.0, 11.0, 37.0, 23.0, 15.0, 11.0, 12.0, 19.0, 3.0, 6.0, 12.0, 22.0, 8.0, 12.0, 37.0, 31.0, 31.0, 26.0, 39.0, 43.0, 29.0, 36.0, 15.0, 23.0, 6.0, 3.0, 17.0, 8.0, 0.0, 9.0, 42.0, 40.0, 11.0, 14.0, 9.0, 11.0, 18.0, 20.0, 40.0, 26.0, 34.0, 32.0, 38.0, 44.0, 34.0, 23.0, 11.0, 9.0, 0.0, 9.0, 14.0, 5.0, 9.0, 14.0, 23.0, 34.0, 6.0, 6.0, 31.0, 32.0, 28.0, 22.0, 12.0, 27.0, 5.0, 3.0, 3.0, 17.0, 13.0, 12.0, 3.0, 16.0, 48.0, 60.0, 45.0, 37.0, 21.0, 9.0, 12.0, 8.0, 20.0, 27.0, 27.0, 23.0, 42.0, 51.0, 14.0, 6.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6957187968137986, "mean_inference_ms": 1.2132320138272397, "mean_action_processing_ms": 0.13407466594363157, "mean_env_wait_ms": 0.8722917900578605, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 563200, "num_agent_steps_trained": 563200, "num_env_steps_sampled": 281600, "num_env_steps_trained": 281600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 281600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 563200, "timers": {"training_iteration_time_ms": 3668.242, "learn_time_ms": 1072.27, "learn_throughput": 11937.289, "synch_weights_time_ms": 11.797}, "counters": {"num_env_steps_sampled": 281600, "num_env_steps_trained": 281600, "num_agent_steps_sampled": 563200, "num_agent_steps_trained": 563200}, "done": false, "episodes_total": 704, "training_iteration": 22, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-21", "timestamp": 1666580481, "time_this_iter_s": 3.760704517364502, "time_total_s": 83.61484861373901, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 83.61484861373901, "timesteps_since_restore": 0, "iterations_since_restore": 22, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.1, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 6.0, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 27.32, "shaped_reward_min": 8, "shaped_reward_max": 54, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.53, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.42, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.77, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.84, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.33, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.44, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 2.46, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 2.59, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 2.89, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 3.15, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.43, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.61, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.82, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.94, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.73, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.32, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.39, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.53, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.76, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.9, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.46, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 2.59, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.46, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 2.59, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 4.7683716530855236e-08, "cur_lr": 0.0010000000474974513, "total_loss": -0.0024071987718343735, "policy_loss": -0.0017509105382487178, "vf_loss": 2.1249547004699707, "vf_explained_var": 0.18562087416648865, "kl": 0.001308967126533389, "entropy": 1.737565279006958, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 294400, "num_env_steps_trained": 294400, "num_agent_steps_sampled": 588800, "num_agent_steps_trained": 588800}, "sampler_results": {"episode_reward_max": 108.0, "episode_reward_min": 8.0, "episode_reward_mean": 39.32, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 60.0}, "policy_reward_mean": {"ppo": 19.66}, "custom_metrics": {"sparse_reward_mean": 6.0, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 27.32, "shaped_reward_min": 8, "shaped_reward_max": 54, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.53, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.42, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.77, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.84, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.33, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.44, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 2.46, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 2.59, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 2.89, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 3.15, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.43, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.61, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.82, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.94, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.73, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.32, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.39, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.53, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.76, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.9, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.46, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 2.59, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.46, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 2.59, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [76.0, 14.0, 20.0, 9.0, 12.0, 28.0, 90.0, 31.0, 20.0, 33.0, 38.0, 70.0, 93.0, 14.0, 23.0, 23.0, 33.0, 9.0, 14.0, 9.0, 20.0, 71.0, 41.0, 12.0, 22.0, 60.0, 26.0, 31.0, 9.0, 34.0, 20.0, 68.0, 57.0, 82.0, 65.0, 38.0, 9.0, 25.0, 9.0, 82.0, 25.0, 20.0, 38.0, 66.0, 66.0, 82.0, 57.0, 20.0, 9.0, 19.0, 23.0, 57.0, 12.0, 63.0, 50.0, 39.0, 8.0, 20.0, 25.0, 19.0, 108.0, 82.0, 30.0, 20.0, 47.0, 50.0, 93.0, 20.0, 76.0, 31.0, 66.0, 34.0, 20.0, 20.0, 45.0, 94.0, 28.0, 87.0, 12.0, 74.0, 40.0, 9.0, 34.0, 39.0, 23.0, 88.0, 45.0, 40.0, 68.0, 9.0, 31.0, 36.0, 77.0, 36.0, 74.0, 17.0, 23.0, 9.0, 30.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [36.0, 40.0, 11.0, 3.0, 8.0, 12.0, 6.0, 3.0, 0.0, 12.0, 11.0, 17.0, 39.0, 51.0, 22.0, 9.0, 6.0, 14.0, 22.0, 11.0, 25.0, 13.0, 34.0, 36.0, 40.0, 53.0, 3.0, 11.0, 11.0, 12.0, 12.0, 11.0, 14.0, 19.0, 6.0, 3.0, 11.0, 3.0, 9.0, 0.0, 14.0, 6.0, 37.0, 34.0, 25.0, 16.0, 6.0, 6.0, 11.0, 11.0, 37.0, 23.0, 15.0, 11.0, 12.0, 19.0, 3.0, 6.0, 12.0, 22.0, 8.0, 12.0, 37.0, 31.0, 31.0, 26.0, 39.0, 43.0, 29.0, 36.0, 15.0, 23.0, 6.0, 3.0, 17.0, 8.0, 0.0, 9.0, 42.0, 40.0, 11.0, 14.0, 9.0, 11.0, 18.0, 20.0, 40.0, 26.0, 34.0, 32.0, 38.0, 44.0, 34.0, 23.0, 11.0, 9.0, 0.0, 9.0, 14.0, 5.0, 9.0, 14.0, 23.0, 34.0, 6.0, 6.0, 31.0, 32.0, 28.0, 22.0, 12.0, 27.0, 5.0, 3.0, 3.0, 17.0, 13.0, 12.0, 3.0, 16.0, 48.0, 60.0, 45.0, 37.0, 21.0, 9.0, 12.0, 8.0, 20.0, 27.0, 27.0, 23.0, 42.0, 51.0, 14.0, 6.0, 25.0, 51.0, 20.0, 11.0, 29.0, 37.0, 17.0, 17.0, 15.0, 5.0, 20.0, 0.0, 25.0, 20.0, 49.0, 45.0, 14.0, 14.0, 42.0, 45.0, 6.0, 6.0, 37.0, 37.0, 6.0, 34.0, 3.0, 6.0, 11.0, 23.0, 17.0, 22.0, 9.0, 14.0, 40.0, 48.0, 17.0, 28.0, 14.0, 26.0, 28.0, 40.0, 0.0, 9.0, 14.0, 17.0, 17.0, 19.0, 40.0, 37.0, 14.0, 22.0, 42.0, 32.0, 6.0, 11.0, 9.0, 14.0, 6.0, 3.0, 16.0, 14.0, 6.0, 3.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6953188897289305, "mean_inference_ms": 1.2124798978585105, "mean_action_processing_ms": 0.13404729178491362, "mean_env_wait_ms": 0.8727043294602006, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 108.0, "episode_reward_min": 8.0, "episode_reward_mean": 39.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 60.0}, "policy_reward_mean": {"ppo": 19.66}, "hist_stats": {"episode_reward": [76.0, 14.0, 20.0, 9.0, 12.0, 28.0, 90.0, 31.0, 20.0, 33.0, 38.0, 70.0, 93.0, 14.0, 23.0, 23.0, 33.0, 9.0, 14.0, 9.0, 20.0, 71.0, 41.0, 12.0, 22.0, 60.0, 26.0, 31.0, 9.0, 34.0, 20.0, 68.0, 57.0, 82.0, 65.0, 38.0, 9.0, 25.0, 9.0, 82.0, 25.0, 20.0, 38.0, 66.0, 66.0, 82.0, 57.0, 20.0, 9.0, 19.0, 23.0, 57.0, 12.0, 63.0, 50.0, 39.0, 8.0, 20.0, 25.0, 19.0, 108.0, 82.0, 30.0, 20.0, 47.0, 50.0, 93.0, 20.0, 76.0, 31.0, 66.0, 34.0, 20.0, 20.0, 45.0, 94.0, 28.0, 87.0, 12.0, 74.0, 40.0, 9.0, 34.0, 39.0, 23.0, 88.0, 45.0, 40.0, 68.0, 9.0, 31.0, 36.0, 77.0, 36.0, 74.0, 17.0, 23.0, 9.0, 30.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [36.0, 40.0, 11.0, 3.0, 8.0, 12.0, 6.0, 3.0, 0.0, 12.0, 11.0, 17.0, 39.0, 51.0, 22.0, 9.0, 6.0, 14.0, 22.0, 11.0, 25.0, 13.0, 34.0, 36.0, 40.0, 53.0, 3.0, 11.0, 11.0, 12.0, 12.0, 11.0, 14.0, 19.0, 6.0, 3.0, 11.0, 3.0, 9.0, 0.0, 14.0, 6.0, 37.0, 34.0, 25.0, 16.0, 6.0, 6.0, 11.0, 11.0, 37.0, 23.0, 15.0, 11.0, 12.0, 19.0, 3.0, 6.0, 12.0, 22.0, 8.0, 12.0, 37.0, 31.0, 31.0, 26.0, 39.0, 43.0, 29.0, 36.0, 15.0, 23.0, 6.0, 3.0, 17.0, 8.0, 0.0, 9.0, 42.0, 40.0, 11.0, 14.0, 9.0, 11.0, 18.0, 20.0, 40.0, 26.0, 34.0, 32.0, 38.0, 44.0, 34.0, 23.0, 11.0, 9.0, 0.0, 9.0, 14.0, 5.0, 9.0, 14.0, 23.0, 34.0, 6.0, 6.0, 31.0, 32.0, 28.0, 22.0, 12.0, 27.0, 5.0, 3.0, 3.0, 17.0, 13.0, 12.0, 3.0, 16.0, 48.0, 60.0, 45.0, 37.0, 21.0, 9.0, 12.0, 8.0, 20.0, 27.0, 27.0, 23.0, 42.0, 51.0, 14.0, 6.0, 25.0, 51.0, 20.0, 11.0, 29.0, 37.0, 17.0, 17.0, 15.0, 5.0, 20.0, 0.0, 25.0, 20.0, 49.0, 45.0, 14.0, 14.0, 42.0, 45.0, 6.0, 6.0, 37.0, 37.0, 6.0, 34.0, 3.0, 6.0, 11.0, 23.0, 17.0, 22.0, 9.0, 14.0, 40.0, 48.0, 17.0, 28.0, 14.0, 26.0, 28.0, 40.0, 0.0, 9.0, 14.0, 17.0, 17.0, 19.0, 40.0, 37.0, 14.0, 22.0, 42.0, 32.0, 6.0, 11.0, 9.0, 14.0, 6.0, 3.0, 16.0, 14.0, 6.0, 3.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6953188897289305, "mean_inference_ms": 1.2124798978585105, "mean_action_processing_ms": 0.13404729178491362, "mean_env_wait_ms": 0.8727043294602006, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 588800, "num_agent_steps_trained": 588800, "num_env_steps_sampled": 294400, "num_env_steps_trained": 294400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 294400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 588800, "timers": {"training_iteration_time_ms": 3682.352, "learn_time_ms": 1083.207, "learn_throughput": 11816.758, "synch_weights_time_ms": 11.952}, "counters": {"num_env_steps_sampled": 294400, "num_env_steps_trained": 294400, "num_agent_steps_sampled": 588800, "num_agent_steps_trained": 588800}, "done": false, "episodes_total": 736, "training_iteration": 23, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-25", "timestamp": 1666580485, "time_this_iter_s": 3.901500701904297, "time_total_s": 87.51634931564331, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 87.51634931564331, "timesteps_since_restore": 0, "iterations_since_restore": 23, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.016666666666666, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 7.2, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 29.4, "shaped_reward_min": 8, "shaped_reward_max": 59, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.68, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.85, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.66, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.07, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.6, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.48, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.46, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 2.86, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.03, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.87, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.56, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.67, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.82, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.65, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 1.42, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.58, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.5, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.53, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.77, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.94, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 2.46, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 2.86, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.46, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 2.86, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.3841858265427618e-08, "cur_lr": 0.0010000000474974513, "total_loss": -0.0055618928745388985, "policy_loss": -0.004936683923006058, "vf_loss": 2.4280309677124023, "vf_explained_var": 0.1790190190076828, "kl": 0.0011535612866282463, "entropy": 1.7360193729400635, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 307200, "num_env_steps_trained": 307200, "num_agent_steps_sampled": 614400, "num_agent_steps_trained": 614400}, "sampler_results": {"episode_reward_max": 128.0, "episode_reward_min": 8.0, "episode_reward_mean": 43.8, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 21.9}, "custom_metrics": {"sparse_reward_mean": 7.2, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 29.4, "shaped_reward_min": 8, "shaped_reward_max": 59, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.68, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.85, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.66, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.07, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.6, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.48, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.46, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 2.86, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.03, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.87, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.56, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.67, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.82, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.65, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 1.42, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.58, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.5, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.53, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.77, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.94, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 2.46, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 2.86, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.46, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 2.86, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [57.0, 82.0, 65.0, 38.0, 9.0, 25.0, 9.0, 82.0, 25.0, 20.0, 38.0, 66.0, 66.0, 82.0, 57.0, 20.0, 9.0, 19.0, 23.0, 57.0, 12.0, 63.0, 50.0, 39.0, 8.0, 20.0, 25.0, 19.0, 108.0, 82.0, 30.0, 20.0, 47.0, 50.0, 93.0, 20.0, 76.0, 31.0, 66.0, 34.0, 20.0, 20.0, 45.0, 94.0, 28.0, 87.0, 12.0, 74.0, 40.0, 9.0, 34.0, 39.0, 23.0, 88.0, 45.0, 40.0, 68.0, 9.0, 31.0, 36.0, 77.0, 36.0, 74.0, 17.0, 23.0, 9.0, 30.0, 9.0, 20.0, 56.0, 88.0, 20.0, 17.0, 128.0, 31.0, 11.0, 23.0, 82.0, 42.0, 14.0, 66.0, 25.0, 22.0, 99.0, 23.0, 45.0, 82.0, 76.0, 31.0, 31.0, 9.0, 22.0, 82.0, 71.0, 71.0, 85.0, 28.0, 63.0, 33.0, 25.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [31.0, 26.0, 39.0, 43.0, 29.0, 36.0, 15.0, 23.0, 6.0, 3.0, 17.0, 8.0, 0.0, 9.0, 42.0, 40.0, 11.0, 14.0, 9.0, 11.0, 18.0, 20.0, 40.0, 26.0, 34.0, 32.0, 38.0, 44.0, 34.0, 23.0, 11.0, 9.0, 0.0, 9.0, 14.0, 5.0, 9.0, 14.0, 23.0, 34.0, 6.0, 6.0, 31.0, 32.0, 28.0, 22.0, 12.0, 27.0, 5.0, 3.0, 3.0, 17.0, 13.0, 12.0, 3.0, 16.0, 48.0, 60.0, 45.0, 37.0, 21.0, 9.0, 12.0, 8.0, 20.0, 27.0, 27.0, 23.0, 42.0, 51.0, 14.0, 6.0, 25.0, 51.0, 20.0, 11.0, 29.0, 37.0, 17.0, 17.0, 15.0, 5.0, 20.0, 0.0, 25.0, 20.0, 49.0, 45.0, 14.0, 14.0, 42.0, 45.0, 6.0, 6.0, 37.0, 37.0, 6.0, 34.0, 3.0, 6.0, 11.0, 23.0, 17.0, 22.0, 9.0, 14.0, 40.0, 48.0, 17.0, 28.0, 14.0, 26.0, 28.0, 40.0, 0.0, 9.0, 14.0, 17.0, 17.0, 19.0, 40.0, 37.0, 14.0, 22.0, 42.0, 32.0, 6.0, 11.0, 9.0, 14.0, 6.0, 3.0, 16.0, 14.0, 6.0, 3.0, 14.0, 6.0, 28.0, 28.0, 44.0, 44.0, 14.0, 6.0, 11.0, 6.0, 60.0, 68.0, 12.0, 19.0, 8.0, 3.0, 9.0, 14.0, 37.0, 45.0, 25.0, 17.0, 9.0, 5.0, 32.0, 34.0, 16.0, 9.0, 11.0, 11.0, 50.0, 49.0, 0.0, 23.0, 20.0, 25.0, 40.0, 42.0, 47.0, 29.0, 19.0, 12.0, 14.0, 17.0, 6.0, 3.0, 16.0, 6.0, 42.0, 40.0, 36.0, 35.0, 37.0, 34.0, 48.0, 37.0, 16.0, 12.0, 20.0, 43.0, 25.0, 8.0, 11.0, 14.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6950401586137636, "mean_inference_ms": 1.2118232379219347, "mean_action_processing_ms": 0.1340076109085824, "mean_env_wait_ms": 0.8730247289869201, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 128.0, "episode_reward_min": 8.0, "episode_reward_mean": 43.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 21.9}, "hist_stats": {"episode_reward": [57.0, 82.0, 65.0, 38.0, 9.0, 25.0, 9.0, 82.0, 25.0, 20.0, 38.0, 66.0, 66.0, 82.0, 57.0, 20.0, 9.0, 19.0, 23.0, 57.0, 12.0, 63.0, 50.0, 39.0, 8.0, 20.0, 25.0, 19.0, 108.0, 82.0, 30.0, 20.0, 47.0, 50.0, 93.0, 20.0, 76.0, 31.0, 66.0, 34.0, 20.0, 20.0, 45.0, 94.0, 28.0, 87.0, 12.0, 74.0, 40.0, 9.0, 34.0, 39.0, 23.0, 88.0, 45.0, 40.0, 68.0, 9.0, 31.0, 36.0, 77.0, 36.0, 74.0, 17.0, 23.0, 9.0, 30.0, 9.0, 20.0, 56.0, 88.0, 20.0, 17.0, 128.0, 31.0, 11.0, 23.0, 82.0, 42.0, 14.0, 66.0, 25.0, 22.0, 99.0, 23.0, 45.0, 82.0, 76.0, 31.0, 31.0, 9.0, 22.0, 82.0, 71.0, 71.0, 85.0, 28.0, 63.0, 33.0, 25.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [31.0, 26.0, 39.0, 43.0, 29.0, 36.0, 15.0, 23.0, 6.0, 3.0, 17.0, 8.0, 0.0, 9.0, 42.0, 40.0, 11.0, 14.0, 9.0, 11.0, 18.0, 20.0, 40.0, 26.0, 34.0, 32.0, 38.0, 44.0, 34.0, 23.0, 11.0, 9.0, 0.0, 9.0, 14.0, 5.0, 9.0, 14.0, 23.0, 34.0, 6.0, 6.0, 31.0, 32.0, 28.0, 22.0, 12.0, 27.0, 5.0, 3.0, 3.0, 17.0, 13.0, 12.0, 3.0, 16.0, 48.0, 60.0, 45.0, 37.0, 21.0, 9.0, 12.0, 8.0, 20.0, 27.0, 27.0, 23.0, 42.0, 51.0, 14.0, 6.0, 25.0, 51.0, 20.0, 11.0, 29.0, 37.0, 17.0, 17.0, 15.0, 5.0, 20.0, 0.0, 25.0, 20.0, 49.0, 45.0, 14.0, 14.0, 42.0, 45.0, 6.0, 6.0, 37.0, 37.0, 6.0, 34.0, 3.0, 6.0, 11.0, 23.0, 17.0, 22.0, 9.0, 14.0, 40.0, 48.0, 17.0, 28.0, 14.0, 26.0, 28.0, 40.0, 0.0, 9.0, 14.0, 17.0, 17.0, 19.0, 40.0, 37.0, 14.0, 22.0, 42.0, 32.0, 6.0, 11.0, 9.0, 14.0, 6.0, 3.0, 16.0, 14.0, 6.0, 3.0, 14.0, 6.0, 28.0, 28.0, 44.0, 44.0, 14.0, 6.0, 11.0, 6.0, 60.0, 68.0, 12.0, 19.0, 8.0, 3.0, 9.0, 14.0, 37.0, 45.0, 25.0, 17.0, 9.0, 5.0, 32.0, 34.0, 16.0, 9.0, 11.0, 11.0, 50.0, 49.0, 0.0, 23.0, 20.0, 25.0, 40.0, 42.0, 47.0, 29.0, 19.0, 12.0, 14.0, 17.0, 6.0, 3.0, 16.0, 6.0, 42.0, 40.0, 36.0, 35.0, 37.0, 34.0, 48.0, 37.0, 16.0, 12.0, 20.0, 43.0, 25.0, 8.0, 11.0, 14.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6950401586137636, "mean_inference_ms": 1.2118232379219347, "mean_action_processing_ms": 0.1340076109085824, "mean_env_wait_ms": 0.8730247289869201, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 614400, "num_agent_steps_trained": 614400, "num_env_steps_sampled": 307200, "num_env_steps_trained": 307200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 307200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 614400, "timers": {"training_iteration_time_ms": 3677.506, "learn_time_ms": 1091.276, "learn_throughput": 11729.393, "synch_weights_time_ms": 11.902}, "counters": {"num_env_steps_sampled": 307200, "num_env_steps_trained": 307200, "num_agent_steps_sampled": 614400, "num_agent_steps_trained": 614400}, "done": false, "episodes_total": 768, "training_iteration": 24, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-29", "timestamp": 1666580489, "time_this_iter_s": 3.8679988384246826, "time_total_s": 91.384348154068, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 91.384348154068, "timesteps_since_restore": 0, "iterations_since_restore": 24, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.520000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 7.4, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 31.3, "shaped_reward_min": 9, "shaped_reward_max": 59, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.83, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.01, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.82, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.2, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.42, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.65, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 3.01, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 2.93, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.85, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.64, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.75, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.7, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.61, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.56, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.59, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.69, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.62, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.48, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.88, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 1.09, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 2.65, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 3.01, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.65, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 3.01, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.1920929132713809e-08, "cur_lr": 0.0010000000474974513, "total_loss": -0.0027541820891201496, "policy_loss": -0.0021353354677557945, "vf_loss": 2.474787473678589, "vf_explained_var": 0.18035045266151428, "kl": 0.0013497013133019209, "entropy": 1.7326486110687256, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 320000, "num_env_steps_trained": 320000, "num_agent_steps_sampled": 640000, "num_agent_steps_trained": 640000}, "sampler_results": {"episode_reward_max": 133.0, "episode_reward_min": 9.0, "episode_reward_mean": 46.1, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 23.05}, "custom_metrics": {"sparse_reward_mean": 7.4, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 31.3, "shaped_reward_min": 9, "shaped_reward_max": 59, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.83, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.01, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.82, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.2, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.42, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.65, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 3.01, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 2.93, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.85, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.64, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.75, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.7, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.61, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.56, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.59, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.69, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.62, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.48, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.88, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 1.09, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 2.65, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 3.01, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.65, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 3.01, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [47.0, 50.0, 93.0, 20.0, 76.0, 31.0, 66.0, 34.0, 20.0, 20.0, 45.0, 94.0, 28.0, 87.0, 12.0, 74.0, 40.0, 9.0, 34.0, 39.0, 23.0, 88.0, 45.0, 40.0, 68.0, 9.0, 31.0, 36.0, 77.0, 36.0, 74.0, 17.0, 23.0, 9.0, 30.0, 9.0, 20.0, 56.0, 88.0, 20.0, 17.0, 128.0, 31.0, 11.0, 23.0, 82.0, 42.0, 14.0, 66.0, 25.0, 22.0, 99.0, 23.0, 45.0, 82.0, 76.0, 31.0, 31.0, 9.0, 22.0, 82.0, 71.0, 71.0, 85.0, 28.0, 63.0, 33.0, 25.0, 82.0, 93.0, 56.0, 25.0, 79.0, 20.0, 77.0, 79.0, 69.0, 23.0, 12.0, 28.0, 60.0, 29.0, 133.0, 85.0, 19.0, 66.0, 31.0, 17.0, 20.0, 12.0, 34.0, 20.0, 68.0, 28.0, 82.0, 48.0, 65.0, 26.0, 36.0, 33.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [20.0, 27.0, 27.0, 23.0, 42.0, 51.0, 14.0, 6.0, 25.0, 51.0, 20.0, 11.0, 29.0, 37.0, 17.0, 17.0, 15.0, 5.0, 20.0, 0.0, 25.0, 20.0, 49.0, 45.0, 14.0, 14.0, 42.0, 45.0, 6.0, 6.0, 37.0, 37.0, 6.0, 34.0, 3.0, 6.0, 11.0, 23.0, 17.0, 22.0, 9.0, 14.0, 40.0, 48.0, 17.0, 28.0, 14.0, 26.0, 28.0, 40.0, 0.0, 9.0, 14.0, 17.0, 17.0, 19.0, 40.0, 37.0, 14.0, 22.0, 42.0, 32.0, 6.0, 11.0, 9.0, 14.0, 6.0, 3.0, 16.0, 14.0, 6.0, 3.0, 14.0, 6.0, 28.0, 28.0, 44.0, 44.0, 14.0, 6.0, 11.0, 6.0, 60.0, 68.0, 12.0, 19.0, 8.0, 3.0, 9.0, 14.0, 37.0, 45.0, 25.0, 17.0, 9.0, 5.0, 32.0, 34.0, 16.0, 9.0, 11.0, 11.0, 50.0, 49.0, 0.0, 23.0, 20.0, 25.0, 40.0, 42.0, 47.0, 29.0, 19.0, 12.0, 14.0, 17.0, 6.0, 3.0, 16.0, 6.0, 42.0, 40.0, 36.0, 35.0, 37.0, 34.0, 48.0, 37.0, 16.0, 12.0, 20.0, 43.0, 25.0, 8.0, 11.0, 14.0, 37.0, 45.0, 46.0, 47.0, 28.0, 28.0, 17.0, 8.0, 33.0, 46.0, 11.0, 9.0, 34.0, 43.0, 34.0, 45.0, 34.0, 35.0, 17.0, 6.0, 3.0, 9.0, 17.0, 11.0, 23.0, 37.0, 15.0, 14.0, 65.0, 68.0, 39.0, 46.0, 14.0, 5.0, 35.0, 31.0, 14.0, 17.0, 11.0, 6.0, 11.0, 9.0, 3.0, 9.0, 17.0, 17.0, 9.0, 11.0, 37.0, 31.0, 18.0, 10.0, 34.0, 48.0, 25.0, 23.0, 36.0, 29.0, 12.0, 14.0, 16.0, 20.0, 8.0, 25.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6947941057045602, "mean_inference_ms": 1.2115752069605927, "mean_action_processing_ms": 0.13399848174796214, "mean_env_wait_ms": 0.8736724055725108, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 133.0, "episode_reward_min": 9.0, "episode_reward_mean": 46.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 23.05}, "hist_stats": {"episode_reward": [47.0, 50.0, 93.0, 20.0, 76.0, 31.0, 66.0, 34.0, 20.0, 20.0, 45.0, 94.0, 28.0, 87.0, 12.0, 74.0, 40.0, 9.0, 34.0, 39.0, 23.0, 88.0, 45.0, 40.0, 68.0, 9.0, 31.0, 36.0, 77.0, 36.0, 74.0, 17.0, 23.0, 9.0, 30.0, 9.0, 20.0, 56.0, 88.0, 20.0, 17.0, 128.0, 31.0, 11.0, 23.0, 82.0, 42.0, 14.0, 66.0, 25.0, 22.0, 99.0, 23.0, 45.0, 82.0, 76.0, 31.0, 31.0, 9.0, 22.0, 82.0, 71.0, 71.0, 85.0, 28.0, 63.0, 33.0, 25.0, 82.0, 93.0, 56.0, 25.0, 79.0, 20.0, 77.0, 79.0, 69.0, 23.0, 12.0, 28.0, 60.0, 29.0, 133.0, 85.0, 19.0, 66.0, 31.0, 17.0, 20.0, 12.0, 34.0, 20.0, 68.0, 28.0, 82.0, 48.0, 65.0, 26.0, 36.0, 33.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [20.0, 27.0, 27.0, 23.0, 42.0, 51.0, 14.0, 6.0, 25.0, 51.0, 20.0, 11.0, 29.0, 37.0, 17.0, 17.0, 15.0, 5.0, 20.0, 0.0, 25.0, 20.0, 49.0, 45.0, 14.0, 14.0, 42.0, 45.0, 6.0, 6.0, 37.0, 37.0, 6.0, 34.0, 3.0, 6.0, 11.0, 23.0, 17.0, 22.0, 9.0, 14.0, 40.0, 48.0, 17.0, 28.0, 14.0, 26.0, 28.0, 40.0, 0.0, 9.0, 14.0, 17.0, 17.0, 19.0, 40.0, 37.0, 14.0, 22.0, 42.0, 32.0, 6.0, 11.0, 9.0, 14.0, 6.0, 3.0, 16.0, 14.0, 6.0, 3.0, 14.0, 6.0, 28.0, 28.0, 44.0, 44.0, 14.0, 6.0, 11.0, 6.0, 60.0, 68.0, 12.0, 19.0, 8.0, 3.0, 9.0, 14.0, 37.0, 45.0, 25.0, 17.0, 9.0, 5.0, 32.0, 34.0, 16.0, 9.0, 11.0, 11.0, 50.0, 49.0, 0.0, 23.0, 20.0, 25.0, 40.0, 42.0, 47.0, 29.0, 19.0, 12.0, 14.0, 17.0, 6.0, 3.0, 16.0, 6.0, 42.0, 40.0, 36.0, 35.0, 37.0, 34.0, 48.0, 37.0, 16.0, 12.0, 20.0, 43.0, 25.0, 8.0, 11.0, 14.0, 37.0, 45.0, 46.0, 47.0, 28.0, 28.0, 17.0, 8.0, 33.0, 46.0, 11.0, 9.0, 34.0, 43.0, 34.0, 45.0, 34.0, 35.0, 17.0, 6.0, 3.0, 9.0, 17.0, 11.0, 23.0, 37.0, 15.0, 14.0, 65.0, 68.0, 39.0, 46.0, 14.0, 5.0, 35.0, 31.0, 14.0, 17.0, 11.0, 6.0, 11.0, 9.0, 3.0, 9.0, 17.0, 17.0, 9.0, 11.0, 37.0, 31.0, 18.0, 10.0, 34.0, 48.0, 25.0, 23.0, 36.0, 29.0, 12.0, 14.0, 16.0, 20.0, 8.0, 25.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6947941057045602, "mean_inference_ms": 1.2115752069605927, "mean_action_processing_ms": 0.13399848174796214, "mean_env_wait_ms": 0.8736724055725108, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 640000, "num_agent_steps_trained": 640000, "num_env_steps_sampled": 320000, "num_env_steps_trained": 320000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 320000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 640000, "timers": {"training_iteration_time_ms": 3680.121, "learn_time_ms": 1087.853, "learn_throughput": 11766.301, "synch_weights_time_ms": 11.846}, "counters": {"num_env_steps_sampled": 320000, "num_env_steps_trained": 320000, "num_agent_steps_sampled": 640000, "num_agent_steps_trained": 640000}, "done": false, "episodes_total": 800, "training_iteration": 25, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-33", "timestamp": 1666580493, "time_this_iter_s": 3.779752731323242, "time_total_s": 95.16410088539124, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 95.16410088539124, "timesteps_since_restore": 0, "iterations_since_restore": 25, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.183333333333334, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 7.6, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 32.38, "shaped_reward_min": 9, "shaped_reward_max": 59, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.68, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.13, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.66, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.35, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.51, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.36, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.69, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.05, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.07, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 3.18, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.93, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.55, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.74, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.65, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.62, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.74, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.84, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.92, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 2.69, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.05, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.69, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.05, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 5.9604645663569045e-09, "cur_lr": 0.0010000000474974513, "total_loss": 8.647807408124208e-05, "policy_loss": 0.0006866119801998138, "vf_loss": 2.619206666946411, "vf_explained_var": 0.18624652922153473, "kl": 0.0013201497495174408, "entropy": 1.724108338356018, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 332800, "num_env_steps_trained": 332800, "num_agent_steps_sampled": 665600, "num_agent_steps_trained": 665600}, "sampler_results": {"episode_reward_max": 133.0, "episode_reward_min": 9.0, "episode_reward_mean": 47.58, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 23.79}, "custom_metrics": {"sparse_reward_mean": 7.6, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 32.38, "shaped_reward_min": 9, "shaped_reward_max": 59, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.68, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.13, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.66, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.35, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.51, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.36, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.69, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.05, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.07, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 3.18, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.93, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.55, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.74, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.65, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.62, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.74, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.84, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.92, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 2.69, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.05, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.69, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.05, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [23.0, 9.0, 30.0, 9.0, 20.0, 56.0, 88.0, 20.0, 17.0, 128.0, 31.0, 11.0, 23.0, 82.0, 42.0, 14.0, 66.0, 25.0, 22.0, 99.0, 23.0, 45.0, 82.0, 76.0, 31.0, 31.0, 9.0, 22.0, 82.0, 71.0, 71.0, 85.0, 28.0, 63.0, 33.0, 25.0, 82.0, 93.0, 56.0, 25.0, 79.0, 20.0, 77.0, 79.0, 69.0, 23.0, 12.0, 28.0, 60.0, 29.0, 133.0, 85.0, 19.0, 66.0, 31.0, 17.0, 20.0, 12.0, 34.0, 20.0, 68.0, 28.0, 82.0, 48.0, 65.0, 26.0, 36.0, 33.0, 31.0, 31.0, 42.0, 28.0, 66.0, 42.0, 20.0, 31.0, 96.0, 87.0, 35.0, 73.0, 44.0, 82.0, 33.0, 41.0, 23.0, 25.0, 47.0, 85.0, 93.0, 9.0, 43.0, 28.0, 90.0, 53.0, 20.0, 38.0, 73.0, 92.0, 71.0, 39.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [9.0, 14.0, 6.0, 3.0, 16.0, 14.0, 6.0, 3.0, 14.0, 6.0, 28.0, 28.0, 44.0, 44.0, 14.0, 6.0, 11.0, 6.0, 60.0, 68.0, 12.0, 19.0, 8.0, 3.0, 9.0, 14.0, 37.0, 45.0, 25.0, 17.0, 9.0, 5.0, 32.0, 34.0, 16.0, 9.0, 11.0, 11.0, 50.0, 49.0, 0.0, 23.0, 20.0, 25.0, 40.0, 42.0, 47.0, 29.0, 19.0, 12.0, 14.0, 17.0, 6.0, 3.0, 16.0, 6.0, 42.0, 40.0, 36.0, 35.0, 37.0, 34.0, 48.0, 37.0, 16.0, 12.0, 20.0, 43.0, 25.0, 8.0, 11.0, 14.0, 37.0, 45.0, 46.0, 47.0, 28.0, 28.0, 17.0, 8.0, 33.0, 46.0, 11.0, 9.0, 34.0, 43.0, 34.0, 45.0, 34.0, 35.0, 17.0, 6.0, 3.0, 9.0, 17.0, 11.0, 23.0, 37.0, 15.0, 14.0, 65.0, 68.0, 39.0, 46.0, 14.0, 5.0, 35.0, 31.0, 14.0, 17.0, 11.0, 6.0, 11.0, 9.0, 3.0, 9.0, 17.0, 17.0, 9.0, 11.0, 37.0, 31.0, 18.0, 10.0, 34.0, 48.0, 25.0, 23.0, 36.0, 29.0, 12.0, 14.0, 16.0, 20.0, 8.0, 25.0, 12.0, 19.0, 8.0, 23.0, 15.0, 27.0, 19.0, 9.0, 26.0, 40.0, 32.0, 10.0, 9.0, 11.0, 13.0, 18.0, 43.0, 53.0, 42.0, 45.0, 19.0, 16.0, 36.0, 37.0, 21.0, 23.0, 40.0, 42.0, 16.0, 17.0, 21.0, 20.0, 14.0, 9.0, 6.0, 19.0, 24.0, 23.0, 42.0, 43.0, 56.0, 37.0, 3.0, 6.0, 28.0, 15.0, 17.0, 11.0, 42.0, 48.0, 28.0, 25.0, 9.0, 11.0, 22.0, 16.0, 36.0, 37.0, 44.0, 48.0, 34.0, 37.0, 24.0, 15.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6945319759848374, "mean_inference_ms": 1.211253690174925, "mean_action_processing_ms": 0.13399742937214265, "mean_env_wait_ms": 0.874078587435082, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 133.0, "episode_reward_min": 9.0, "episode_reward_mean": 47.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 23.79}, "hist_stats": {"episode_reward": [23.0, 9.0, 30.0, 9.0, 20.0, 56.0, 88.0, 20.0, 17.0, 128.0, 31.0, 11.0, 23.0, 82.0, 42.0, 14.0, 66.0, 25.0, 22.0, 99.0, 23.0, 45.0, 82.0, 76.0, 31.0, 31.0, 9.0, 22.0, 82.0, 71.0, 71.0, 85.0, 28.0, 63.0, 33.0, 25.0, 82.0, 93.0, 56.0, 25.0, 79.0, 20.0, 77.0, 79.0, 69.0, 23.0, 12.0, 28.0, 60.0, 29.0, 133.0, 85.0, 19.0, 66.0, 31.0, 17.0, 20.0, 12.0, 34.0, 20.0, 68.0, 28.0, 82.0, 48.0, 65.0, 26.0, 36.0, 33.0, 31.0, 31.0, 42.0, 28.0, 66.0, 42.0, 20.0, 31.0, 96.0, 87.0, 35.0, 73.0, 44.0, 82.0, 33.0, 41.0, 23.0, 25.0, 47.0, 85.0, 93.0, 9.0, 43.0, 28.0, 90.0, 53.0, 20.0, 38.0, 73.0, 92.0, 71.0, 39.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [9.0, 14.0, 6.0, 3.0, 16.0, 14.0, 6.0, 3.0, 14.0, 6.0, 28.0, 28.0, 44.0, 44.0, 14.0, 6.0, 11.0, 6.0, 60.0, 68.0, 12.0, 19.0, 8.0, 3.0, 9.0, 14.0, 37.0, 45.0, 25.0, 17.0, 9.0, 5.0, 32.0, 34.0, 16.0, 9.0, 11.0, 11.0, 50.0, 49.0, 0.0, 23.0, 20.0, 25.0, 40.0, 42.0, 47.0, 29.0, 19.0, 12.0, 14.0, 17.0, 6.0, 3.0, 16.0, 6.0, 42.0, 40.0, 36.0, 35.0, 37.0, 34.0, 48.0, 37.0, 16.0, 12.0, 20.0, 43.0, 25.0, 8.0, 11.0, 14.0, 37.0, 45.0, 46.0, 47.0, 28.0, 28.0, 17.0, 8.0, 33.0, 46.0, 11.0, 9.0, 34.0, 43.0, 34.0, 45.0, 34.0, 35.0, 17.0, 6.0, 3.0, 9.0, 17.0, 11.0, 23.0, 37.0, 15.0, 14.0, 65.0, 68.0, 39.0, 46.0, 14.0, 5.0, 35.0, 31.0, 14.0, 17.0, 11.0, 6.0, 11.0, 9.0, 3.0, 9.0, 17.0, 17.0, 9.0, 11.0, 37.0, 31.0, 18.0, 10.0, 34.0, 48.0, 25.0, 23.0, 36.0, 29.0, 12.0, 14.0, 16.0, 20.0, 8.0, 25.0, 12.0, 19.0, 8.0, 23.0, 15.0, 27.0, 19.0, 9.0, 26.0, 40.0, 32.0, 10.0, 9.0, 11.0, 13.0, 18.0, 43.0, 53.0, 42.0, 45.0, 19.0, 16.0, 36.0, 37.0, 21.0, 23.0, 40.0, 42.0, 16.0, 17.0, 21.0, 20.0, 14.0, 9.0, 6.0, 19.0, 24.0, 23.0, 42.0, 43.0, 56.0, 37.0, 3.0, 6.0, 28.0, 15.0, 17.0, 11.0, 42.0, 48.0, 28.0, 25.0, 9.0, 11.0, 22.0, 16.0, 36.0, 37.0, 44.0, 48.0, 34.0, 37.0, 24.0, 15.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6945319759848374, "mean_inference_ms": 1.211253690174925, "mean_action_processing_ms": 0.13399742937214265, "mean_env_wait_ms": 0.874078587435082, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 665600, "num_agent_steps_trained": 665600, "num_env_steps_sampled": 332800, "num_env_steps_trained": 332800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 332800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 665600, "timers": {"training_iteration_time_ms": 3688.029, "learn_time_ms": 1098.54, "learn_throughput": 11651.827, "synch_weights_time_ms": 11.813}, "counters": {"num_env_steps_sampled": 332800, "num_env_steps_trained": 332800, "num_agent_steps_sampled": 665600, "num_agent_steps_trained": 665600}, "done": false, "episodes_total": 832, "training_iteration": 26, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-37", "timestamp": 1666580497, "time_this_iter_s": 3.7483062744140625, "time_total_s": 98.9124071598053, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 98.9124071598053, "timesteps_since_restore": 0, "iterations_since_restore": 26, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.216666666666665, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 8.4, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 34.33, "shaped_reward_min": 9, "shaped_reward_max": 58, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.33, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.17, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.7, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.62, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.72, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.37, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.39, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 3.38, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.62, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.98, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.64, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.72, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.76, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.82, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.8, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.72, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.86, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.98, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.72, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.37, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.72, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.37, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.9802322831784522e-09, "cur_lr": 0.0010000000474974513, "total_loss": -0.007030028849840164, "policy_loss": -0.006458994001150131, "vf_loss": 2.8966987133026123, "vf_explained_var": 0.18293237686157227, "kl": 0.0012218665797263384, "entropy": 1.7214064598083496, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 345600, "num_env_steps_trained": 345600, "num_agent_steps_sampled": 691200, "num_agent_steps_trained": 691200}, "sampler_results": {"episode_reward_max": 133.0, "episode_reward_min": 9.0, "episode_reward_mean": 51.13, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 70.0}, "policy_reward_mean": {"ppo": 25.565}, "custom_metrics": {"sparse_reward_mean": 8.4, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 34.33, "shaped_reward_min": 9, "shaped_reward_max": 58, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.33, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.17, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.7, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.62, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.72, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.37, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.39, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 3.38, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.62, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.98, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.64, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.72, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.76, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.82, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.8, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.72, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.86, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.98, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.72, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.37, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.72, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.37, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [28.0, 63.0, 33.0, 25.0, 82.0, 93.0, 56.0, 25.0, 79.0, 20.0, 77.0, 79.0, 69.0, 23.0, 12.0, 28.0, 60.0, 29.0, 133.0, 85.0, 19.0, 66.0, 31.0, 17.0, 20.0, 12.0, 34.0, 20.0, 68.0, 28.0, 82.0, 48.0, 65.0, 26.0, 36.0, 33.0, 31.0, 31.0, 42.0, 28.0, 66.0, 42.0, 20.0, 31.0, 96.0, 87.0, 35.0, 73.0, 44.0, 82.0, 33.0, 41.0, 23.0, 25.0, 47.0, 85.0, 93.0, 9.0, 43.0, 28.0, 90.0, 53.0, 20.0, 38.0, 73.0, 92.0, 71.0, 39.0, 39.0, 87.0, 85.0, 125.0, 47.0, 28.0, 12.0, 53.0, 50.0, 78.0, 20.0, 34.0, 73.0, 70.0, 23.0, 20.0, 44.0, 31.0, 130.0, 58.0, 73.0, 85.0, 68.0, 12.0, 90.0, 60.0, 49.0, 45.0, 34.0, 25.0, 111.0, 39.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [16.0, 12.0, 20.0, 43.0, 25.0, 8.0, 11.0, 14.0, 37.0, 45.0, 46.0, 47.0, 28.0, 28.0, 17.0, 8.0, 33.0, 46.0, 11.0, 9.0, 34.0, 43.0, 34.0, 45.0, 34.0, 35.0, 17.0, 6.0, 3.0, 9.0, 17.0, 11.0, 23.0, 37.0, 15.0, 14.0, 65.0, 68.0, 39.0, 46.0, 14.0, 5.0, 35.0, 31.0, 14.0, 17.0, 11.0, 6.0, 11.0, 9.0, 3.0, 9.0, 17.0, 17.0, 9.0, 11.0, 37.0, 31.0, 18.0, 10.0, 34.0, 48.0, 25.0, 23.0, 36.0, 29.0, 12.0, 14.0, 16.0, 20.0, 8.0, 25.0, 12.0, 19.0, 8.0, 23.0, 15.0, 27.0, 19.0, 9.0, 26.0, 40.0, 32.0, 10.0, 9.0, 11.0, 13.0, 18.0, 43.0, 53.0, 42.0, 45.0, 19.0, 16.0, 36.0, 37.0, 21.0, 23.0, 40.0, 42.0, 16.0, 17.0, 21.0, 20.0, 14.0, 9.0, 6.0, 19.0, 24.0, 23.0, 42.0, 43.0, 56.0, 37.0, 3.0, 6.0, 28.0, 15.0, 17.0, 11.0, 42.0, 48.0, 28.0, 25.0, 9.0, 11.0, 22.0, 16.0, 36.0, 37.0, 44.0, 48.0, 34.0, 37.0, 24.0, 15.0, 24.0, 15.0, 50.0, 37.0, 39.0, 46.0, 62.0, 63.0, 30.0, 17.0, 14.0, 14.0, 6.0, 6.0, 12.0, 41.0, 37.0, 13.0, 41.0, 37.0, 6.0, 14.0, 15.0, 19.0, 31.0, 42.0, 30.0, 40.0, 3.0, 20.0, 12.0, 8.0, 17.0, 27.0, 13.0, 18.0, 60.0, 70.0, 22.0, 36.0, 31.0, 42.0, 44.0, 41.0, 26.0, 42.0, 6.0, 6.0, 47.0, 43.0, 26.0, 34.0, 27.0, 22.0, 20.0, 25.0, 17.0, 17.0, 8.0, 17.0, 45.0, 66.0, 25.0, 14.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6941808593457511, "mean_inference_ms": 1.2108438805531228, "mean_action_processing_ms": 0.13398529337925486, "mean_env_wait_ms": 0.8743727041239987, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 133.0, "episode_reward_min": 9.0, "episode_reward_mean": 51.13, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 70.0}, "policy_reward_mean": {"ppo": 25.565}, "hist_stats": {"episode_reward": [28.0, 63.0, 33.0, 25.0, 82.0, 93.0, 56.0, 25.0, 79.0, 20.0, 77.0, 79.0, 69.0, 23.0, 12.0, 28.0, 60.0, 29.0, 133.0, 85.0, 19.0, 66.0, 31.0, 17.0, 20.0, 12.0, 34.0, 20.0, 68.0, 28.0, 82.0, 48.0, 65.0, 26.0, 36.0, 33.0, 31.0, 31.0, 42.0, 28.0, 66.0, 42.0, 20.0, 31.0, 96.0, 87.0, 35.0, 73.0, 44.0, 82.0, 33.0, 41.0, 23.0, 25.0, 47.0, 85.0, 93.0, 9.0, 43.0, 28.0, 90.0, 53.0, 20.0, 38.0, 73.0, 92.0, 71.0, 39.0, 39.0, 87.0, 85.0, 125.0, 47.0, 28.0, 12.0, 53.0, 50.0, 78.0, 20.0, 34.0, 73.0, 70.0, 23.0, 20.0, 44.0, 31.0, 130.0, 58.0, 73.0, 85.0, 68.0, 12.0, 90.0, 60.0, 49.0, 45.0, 34.0, 25.0, 111.0, 39.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [16.0, 12.0, 20.0, 43.0, 25.0, 8.0, 11.0, 14.0, 37.0, 45.0, 46.0, 47.0, 28.0, 28.0, 17.0, 8.0, 33.0, 46.0, 11.0, 9.0, 34.0, 43.0, 34.0, 45.0, 34.0, 35.0, 17.0, 6.0, 3.0, 9.0, 17.0, 11.0, 23.0, 37.0, 15.0, 14.0, 65.0, 68.0, 39.0, 46.0, 14.0, 5.0, 35.0, 31.0, 14.0, 17.0, 11.0, 6.0, 11.0, 9.0, 3.0, 9.0, 17.0, 17.0, 9.0, 11.0, 37.0, 31.0, 18.0, 10.0, 34.0, 48.0, 25.0, 23.0, 36.0, 29.0, 12.0, 14.0, 16.0, 20.0, 8.0, 25.0, 12.0, 19.0, 8.0, 23.0, 15.0, 27.0, 19.0, 9.0, 26.0, 40.0, 32.0, 10.0, 9.0, 11.0, 13.0, 18.0, 43.0, 53.0, 42.0, 45.0, 19.0, 16.0, 36.0, 37.0, 21.0, 23.0, 40.0, 42.0, 16.0, 17.0, 21.0, 20.0, 14.0, 9.0, 6.0, 19.0, 24.0, 23.0, 42.0, 43.0, 56.0, 37.0, 3.0, 6.0, 28.0, 15.0, 17.0, 11.0, 42.0, 48.0, 28.0, 25.0, 9.0, 11.0, 22.0, 16.0, 36.0, 37.0, 44.0, 48.0, 34.0, 37.0, 24.0, 15.0, 24.0, 15.0, 50.0, 37.0, 39.0, 46.0, 62.0, 63.0, 30.0, 17.0, 14.0, 14.0, 6.0, 6.0, 12.0, 41.0, 37.0, 13.0, 41.0, 37.0, 6.0, 14.0, 15.0, 19.0, 31.0, 42.0, 30.0, 40.0, 3.0, 20.0, 12.0, 8.0, 17.0, 27.0, 13.0, 18.0, 60.0, 70.0, 22.0, 36.0, 31.0, 42.0, 44.0, 41.0, 26.0, 42.0, 6.0, 6.0, 47.0, 43.0, 26.0, 34.0, 27.0, 22.0, 20.0, 25.0, 17.0, 17.0, 8.0, 17.0, 45.0, 66.0, 25.0, 14.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6941808593457511, "mean_inference_ms": 1.2108438805531228, "mean_action_processing_ms": 0.13398529337925486, "mean_env_wait_ms": 0.8743727041239987, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 691200, "num_agent_steps_trained": 691200, "num_env_steps_sampled": 345600, "num_env_steps_trained": 345600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 345600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 691200, "timers": {"training_iteration_time_ms": 3679.065, "learn_time_ms": 1091.01, "learn_throughput": 11732.251, "synch_weights_time_ms": 11.709}, "counters": {"num_env_steps_sampled": 345600, "num_env_steps_trained": 345600, "num_agent_steps_sampled": 691200, "num_agent_steps_trained": 691200}, "done": false, "episodes_total": 864, "training_iteration": 27, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-41", "timestamp": 1666580501, "time_this_iter_s": 3.674090623855591, "time_total_s": 102.58649778366089, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 102.58649778366089, "timesteps_since_restore": 0, "iterations_since_restore": 27, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.1, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 9.2, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 34.85, "shaped_reward_min": 9, "shaped_reward_max": 59, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.18, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.32, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.5, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.6, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.37, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.33, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.59, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.5, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.57, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.39, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.54, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.94, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.94, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.55, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.67, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.74, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.83, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.91, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.77, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.71, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.9, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 2.59, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.5, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.59, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.5, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.4901161415892261e-09, "cur_lr": 0.0010000000474974513, "total_loss": -0.003428479889407754, "policy_loss": -0.0028413136024028063, "vf_loss": 2.718903064727783, "vf_explained_var": 0.2129439413547516, "kl": 0.0011951376218348742, "entropy": 1.7181081771850586, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 358400, "num_env_steps_trained": 358400, "num_agent_steps_sampled": 716800, "num_agent_steps_trained": 716800}, "sampler_results": {"episode_reward_max": 131.0, "episode_reward_min": 9.0, "episode_reward_mean": 53.25, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 73.0}, "policy_reward_mean": {"ppo": 26.625}, "custom_metrics": {"sparse_reward_mean": 9.2, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 34.85, "shaped_reward_min": 9, "shaped_reward_max": 59, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.18, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.32, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.5, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.6, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.37, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.33, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.59, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.5, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.57, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.39, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.54, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.94, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.94, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.55, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.67, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.74, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.83, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.91, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.77, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.71, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.9, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 2.59, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.5, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.59, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.5, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [65.0, 26.0, 36.0, 33.0, 31.0, 31.0, 42.0, 28.0, 66.0, 42.0, 20.0, 31.0, 96.0, 87.0, 35.0, 73.0, 44.0, 82.0, 33.0, 41.0, 23.0, 25.0, 47.0, 85.0, 93.0, 9.0, 43.0, 28.0, 90.0, 53.0, 20.0, 38.0, 73.0, 92.0, 71.0, 39.0, 39.0, 87.0, 85.0, 125.0, 47.0, 28.0, 12.0, 53.0, 50.0, 78.0, 20.0, 34.0, 73.0, 70.0, 23.0, 20.0, 44.0, 31.0, 130.0, 58.0, 73.0, 85.0, 68.0, 12.0, 90.0, 60.0, 49.0, 45.0, 34.0, 25.0, 111.0, 39.0, 71.0, 9.0, 76.0, 81.0, 111.0, 34.0, 53.0, 36.0, 79.0, 70.0, 39.0, 131.0, 77.0, 9.0, 25.0, 22.0, 23.0, 20.0, 66.0, 99.0, 76.0, 79.0, 95.0, 87.0, 31.0, 81.0, 17.0, 12.0, 31.0, 76.0, 17.0, 23.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [36.0, 29.0, 12.0, 14.0, 16.0, 20.0, 8.0, 25.0, 12.0, 19.0, 8.0, 23.0, 15.0, 27.0, 19.0, 9.0, 26.0, 40.0, 32.0, 10.0, 9.0, 11.0, 13.0, 18.0, 43.0, 53.0, 42.0, 45.0, 19.0, 16.0, 36.0, 37.0, 21.0, 23.0, 40.0, 42.0, 16.0, 17.0, 21.0, 20.0, 14.0, 9.0, 6.0, 19.0, 24.0, 23.0, 42.0, 43.0, 56.0, 37.0, 3.0, 6.0, 28.0, 15.0, 17.0, 11.0, 42.0, 48.0, 28.0, 25.0, 9.0, 11.0, 22.0, 16.0, 36.0, 37.0, 44.0, 48.0, 34.0, 37.0, 24.0, 15.0, 24.0, 15.0, 50.0, 37.0, 39.0, 46.0, 62.0, 63.0, 30.0, 17.0, 14.0, 14.0, 6.0, 6.0, 12.0, 41.0, 37.0, 13.0, 41.0, 37.0, 6.0, 14.0, 15.0, 19.0, 31.0, 42.0, 30.0, 40.0, 3.0, 20.0, 12.0, 8.0, 17.0, 27.0, 13.0, 18.0, 60.0, 70.0, 22.0, 36.0, 31.0, 42.0, 44.0, 41.0, 26.0, 42.0, 6.0, 6.0, 47.0, 43.0, 26.0, 34.0, 27.0, 22.0, 20.0, 25.0, 17.0, 17.0, 8.0, 17.0, 45.0, 66.0, 25.0, 14.0, 37.0, 34.0, 3.0, 6.0, 47.0, 29.0, 39.0, 42.0, 56.0, 55.0, 16.0, 18.0, 30.0, 23.0, 8.0, 28.0, 36.0, 43.0, 34.0, 36.0, 19.0, 20.0, 58.0, 73.0, 37.0, 40.0, 3.0, 6.0, 3.0, 22.0, 8.0, 14.0, 14.0, 9.0, 11.0, 9.0, 37.0, 29.0, 51.0, 48.0, 44.0, 32.0, 39.0, 40.0, 53.0, 42.0, 42.0, 45.0, 14.0, 17.0, 28.0, 53.0, 12.0, 5.0, 9.0, 3.0, 17.0, 14.0, 31.0, 45.0, 9.0, 8.0, 9.0, 14.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6937747578849255, "mean_inference_ms": 1.2103427215659897, "mean_action_processing_ms": 0.13392971773962517, "mean_env_wait_ms": 0.8742234499034806, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 131.0, "episode_reward_min": 9.0, "episode_reward_mean": 53.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 73.0}, "policy_reward_mean": {"ppo": 26.625}, "hist_stats": {"episode_reward": [65.0, 26.0, 36.0, 33.0, 31.0, 31.0, 42.0, 28.0, 66.0, 42.0, 20.0, 31.0, 96.0, 87.0, 35.0, 73.0, 44.0, 82.0, 33.0, 41.0, 23.0, 25.0, 47.0, 85.0, 93.0, 9.0, 43.0, 28.0, 90.0, 53.0, 20.0, 38.0, 73.0, 92.0, 71.0, 39.0, 39.0, 87.0, 85.0, 125.0, 47.0, 28.0, 12.0, 53.0, 50.0, 78.0, 20.0, 34.0, 73.0, 70.0, 23.0, 20.0, 44.0, 31.0, 130.0, 58.0, 73.0, 85.0, 68.0, 12.0, 90.0, 60.0, 49.0, 45.0, 34.0, 25.0, 111.0, 39.0, 71.0, 9.0, 76.0, 81.0, 111.0, 34.0, 53.0, 36.0, 79.0, 70.0, 39.0, 131.0, 77.0, 9.0, 25.0, 22.0, 23.0, 20.0, 66.0, 99.0, 76.0, 79.0, 95.0, 87.0, 31.0, 81.0, 17.0, 12.0, 31.0, 76.0, 17.0, 23.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [36.0, 29.0, 12.0, 14.0, 16.0, 20.0, 8.0, 25.0, 12.0, 19.0, 8.0, 23.0, 15.0, 27.0, 19.0, 9.0, 26.0, 40.0, 32.0, 10.0, 9.0, 11.0, 13.0, 18.0, 43.0, 53.0, 42.0, 45.0, 19.0, 16.0, 36.0, 37.0, 21.0, 23.0, 40.0, 42.0, 16.0, 17.0, 21.0, 20.0, 14.0, 9.0, 6.0, 19.0, 24.0, 23.0, 42.0, 43.0, 56.0, 37.0, 3.0, 6.0, 28.0, 15.0, 17.0, 11.0, 42.0, 48.0, 28.0, 25.0, 9.0, 11.0, 22.0, 16.0, 36.0, 37.0, 44.0, 48.0, 34.0, 37.0, 24.0, 15.0, 24.0, 15.0, 50.0, 37.0, 39.0, 46.0, 62.0, 63.0, 30.0, 17.0, 14.0, 14.0, 6.0, 6.0, 12.0, 41.0, 37.0, 13.0, 41.0, 37.0, 6.0, 14.0, 15.0, 19.0, 31.0, 42.0, 30.0, 40.0, 3.0, 20.0, 12.0, 8.0, 17.0, 27.0, 13.0, 18.0, 60.0, 70.0, 22.0, 36.0, 31.0, 42.0, 44.0, 41.0, 26.0, 42.0, 6.0, 6.0, 47.0, 43.0, 26.0, 34.0, 27.0, 22.0, 20.0, 25.0, 17.0, 17.0, 8.0, 17.0, 45.0, 66.0, 25.0, 14.0, 37.0, 34.0, 3.0, 6.0, 47.0, 29.0, 39.0, 42.0, 56.0, 55.0, 16.0, 18.0, 30.0, 23.0, 8.0, 28.0, 36.0, 43.0, 34.0, 36.0, 19.0, 20.0, 58.0, 73.0, 37.0, 40.0, 3.0, 6.0, 3.0, 22.0, 8.0, 14.0, 14.0, 9.0, 11.0, 9.0, 37.0, 29.0, 51.0, 48.0, 44.0, 32.0, 39.0, 40.0, 53.0, 42.0, 42.0, 45.0, 14.0, 17.0, 28.0, 53.0, 12.0, 5.0, 9.0, 3.0, 17.0, 14.0, 31.0, 45.0, 9.0, 8.0, 9.0, 14.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6937747578849255, "mean_inference_ms": 1.2103427215659897, "mean_action_processing_ms": 0.13392971773962517, "mean_env_wait_ms": 0.8742234499034806, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 716800, "num_agent_steps_trained": 716800, "num_env_steps_sampled": 358400, "num_env_steps_trained": 358400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 358400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 716800, "timers": {"training_iteration_time_ms": 3676.763, "learn_time_ms": 1091.56, "learn_throughput": 11726.336, "synch_weights_time_ms": 11.323}, "counters": {"num_env_steps_sampled": 358400, "num_env_steps_trained": 358400, "num_agent_steps_sampled": 716800, "num_agent_steps_trained": 716800}, "done": false, "episodes_total": 896, "training_iteration": 28, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-45", "timestamp": 1666580505, "time_this_iter_s": 3.7481324672698975, "time_total_s": 106.33463025093079, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 106.33463025093079, "timesteps_since_restore": 0, "iterations_since_restore": 28, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.183333333333334, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 12.0, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 36.41, "shaped_reward_min": 6, "shaped_reward_max": 78, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.45, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.62, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.77, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.66, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.44, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.54, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.68, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.74, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 3.64, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.28, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.69, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.61, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.71, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.5, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.52, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.74, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.77, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 1.03, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.83, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.57, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.79, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 2.68, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.74, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.68, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.74, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 7.450580707946131e-10, "cur_lr": 0.0010000000474974513, "total_loss": -0.00494603905826807, "policy_loss": -0.004427894949913025, "vf_loss": 3.332706928253174, "vf_explained_var": 0.22580432891845703, "kl": 0.0014256751164793968, "entropy": 1.7028231620788574, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 371200, "num_env_steps_trained": 371200, "num_agent_steps_sampled": 742400, "num_agent_steps_trained": 742400}, "sampler_results": {"episode_reward_max": 179.0, "episode_reward_min": 6.0, "episode_reward_mean": 60.41, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 93.0}, "policy_reward_mean": {"ppo": 30.205}, "custom_metrics": {"sparse_reward_mean": 12.0, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 36.41, "shaped_reward_min": 6, "shaped_reward_max": 78, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.45, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.62, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.77, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.66, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.44, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.54, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.68, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.74, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 3.64, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.28, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.69, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.61, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.71, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.5, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.52, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.74, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.77, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 1.03, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.83, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.57, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.79, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 2.68, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.74, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.68, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.74, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [73.0, 92.0, 71.0, 39.0, 39.0, 87.0, 85.0, 125.0, 47.0, 28.0, 12.0, 53.0, 50.0, 78.0, 20.0, 34.0, 73.0, 70.0, 23.0, 20.0, 44.0, 31.0, 130.0, 58.0, 73.0, 85.0, 68.0, 12.0, 90.0, 60.0, 49.0, 45.0, 34.0, 25.0, 111.0, 39.0, 71.0, 9.0, 76.0, 81.0, 111.0, 34.0, 53.0, 36.0, 79.0, 70.0, 39.0, 131.0, 77.0, 9.0, 25.0, 22.0, 23.0, 20.0, 66.0, 99.0, 76.0, 79.0, 95.0, 87.0, 31.0, 81.0, 17.0, 12.0, 31.0, 76.0, 17.0, 23.0, 125.0, 145.0, 9.0, 71.0, 85.0, 48.0, 55.0, 36.0, 92.0, 44.0, 128.0, 74.0, 96.0, 53.0, 31.0, 14.0, 90.0, 40.0, 179.0, 53.0, 74.0, 90.0, 20.0, 87.0, 80.0, 25.0, 118.0, 9.0, 6.0, 71.0, 131.0, 33.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [36.0, 37.0, 44.0, 48.0, 34.0, 37.0, 24.0, 15.0, 24.0, 15.0, 50.0, 37.0, 39.0, 46.0, 62.0, 63.0, 30.0, 17.0, 14.0, 14.0, 6.0, 6.0, 12.0, 41.0, 37.0, 13.0, 41.0, 37.0, 6.0, 14.0, 15.0, 19.0, 31.0, 42.0, 30.0, 40.0, 3.0, 20.0, 12.0, 8.0, 17.0, 27.0, 13.0, 18.0, 60.0, 70.0, 22.0, 36.0, 31.0, 42.0, 44.0, 41.0, 26.0, 42.0, 6.0, 6.0, 47.0, 43.0, 26.0, 34.0, 27.0, 22.0, 20.0, 25.0, 17.0, 17.0, 8.0, 17.0, 45.0, 66.0, 25.0, 14.0, 37.0, 34.0, 3.0, 6.0, 47.0, 29.0, 39.0, 42.0, 56.0, 55.0, 16.0, 18.0, 30.0, 23.0, 8.0, 28.0, 36.0, 43.0, 34.0, 36.0, 19.0, 20.0, 58.0, 73.0, 37.0, 40.0, 3.0, 6.0, 3.0, 22.0, 8.0, 14.0, 14.0, 9.0, 11.0, 9.0, 37.0, 29.0, 51.0, 48.0, 44.0, 32.0, 39.0, 40.0, 53.0, 42.0, 42.0, 45.0, 14.0, 17.0, 28.0, 53.0, 12.0, 5.0, 9.0, 3.0, 17.0, 14.0, 31.0, 45.0, 9.0, 8.0, 9.0, 14.0, 58.0, 67.0, 70.0, 75.0, 3.0, 6.0, 33.0, 38.0, 43.0, 42.0, 26.0, 22.0, 21.0, 34.0, 9.0, 27.0, 52.0, 40.0, 22.0, 22.0, 60.0, 68.0, 49.0, 25.0, 45.0, 51.0, 25.0, 28.0, 11.0, 20.0, 5.0, 9.0, 56.0, 34.0, 14.0, 26.0, 86.0, 93.0, 15.0, 38.0, 34.0, 40.0, 50.0, 40.0, 0.0, 20.0, 34.0, 53.0, 45.0, 35.0, 6.0, 19.0, 53.0, 65.0, 3.0, 6.0, 6.0, 0.0, 37.0, 34.0, 62.0, 69.0, 10.0, 23.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6931727354048686, "mean_inference_ms": 1.2108588597383083, "mean_action_processing_ms": 0.13381849352361283, "mean_env_wait_ms": 0.8752686389591946, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 179.0, "episode_reward_min": 6.0, "episode_reward_mean": 60.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 93.0}, "policy_reward_mean": {"ppo": 30.205}, "hist_stats": {"episode_reward": [73.0, 92.0, 71.0, 39.0, 39.0, 87.0, 85.0, 125.0, 47.0, 28.0, 12.0, 53.0, 50.0, 78.0, 20.0, 34.0, 73.0, 70.0, 23.0, 20.0, 44.0, 31.0, 130.0, 58.0, 73.0, 85.0, 68.0, 12.0, 90.0, 60.0, 49.0, 45.0, 34.0, 25.0, 111.0, 39.0, 71.0, 9.0, 76.0, 81.0, 111.0, 34.0, 53.0, 36.0, 79.0, 70.0, 39.0, 131.0, 77.0, 9.0, 25.0, 22.0, 23.0, 20.0, 66.0, 99.0, 76.0, 79.0, 95.0, 87.0, 31.0, 81.0, 17.0, 12.0, 31.0, 76.0, 17.0, 23.0, 125.0, 145.0, 9.0, 71.0, 85.0, 48.0, 55.0, 36.0, 92.0, 44.0, 128.0, 74.0, 96.0, 53.0, 31.0, 14.0, 90.0, 40.0, 179.0, 53.0, 74.0, 90.0, 20.0, 87.0, 80.0, 25.0, 118.0, 9.0, 6.0, 71.0, 131.0, 33.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [36.0, 37.0, 44.0, 48.0, 34.0, 37.0, 24.0, 15.0, 24.0, 15.0, 50.0, 37.0, 39.0, 46.0, 62.0, 63.0, 30.0, 17.0, 14.0, 14.0, 6.0, 6.0, 12.0, 41.0, 37.0, 13.0, 41.0, 37.0, 6.0, 14.0, 15.0, 19.0, 31.0, 42.0, 30.0, 40.0, 3.0, 20.0, 12.0, 8.0, 17.0, 27.0, 13.0, 18.0, 60.0, 70.0, 22.0, 36.0, 31.0, 42.0, 44.0, 41.0, 26.0, 42.0, 6.0, 6.0, 47.0, 43.0, 26.0, 34.0, 27.0, 22.0, 20.0, 25.0, 17.0, 17.0, 8.0, 17.0, 45.0, 66.0, 25.0, 14.0, 37.0, 34.0, 3.0, 6.0, 47.0, 29.0, 39.0, 42.0, 56.0, 55.0, 16.0, 18.0, 30.0, 23.0, 8.0, 28.0, 36.0, 43.0, 34.0, 36.0, 19.0, 20.0, 58.0, 73.0, 37.0, 40.0, 3.0, 6.0, 3.0, 22.0, 8.0, 14.0, 14.0, 9.0, 11.0, 9.0, 37.0, 29.0, 51.0, 48.0, 44.0, 32.0, 39.0, 40.0, 53.0, 42.0, 42.0, 45.0, 14.0, 17.0, 28.0, 53.0, 12.0, 5.0, 9.0, 3.0, 17.0, 14.0, 31.0, 45.0, 9.0, 8.0, 9.0, 14.0, 58.0, 67.0, 70.0, 75.0, 3.0, 6.0, 33.0, 38.0, 43.0, 42.0, 26.0, 22.0, 21.0, 34.0, 9.0, 27.0, 52.0, 40.0, 22.0, 22.0, 60.0, 68.0, 49.0, 25.0, 45.0, 51.0, 25.0, 28.0, 11.0, 20.0, 5.0, 9.0, 56.0, 34.0, 14.0, 26.0, 86.0, 93.0, 15.0, 38.0, 34.0, 40.0, 50.0, 40.0, 0.0, 20.0, 34.0, 53.0, 45.0, 35.0, 6.0, 19.0, 53.0, 65.0, 3.0, 6.0, 6.0, 0.0, 37.0, 34.0, 62.0, 69.0, 10.0, 23.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6931727354048686, "mean_inference_ms": 1.2108588597383083, "mean_action_processing_ms": 0.13381849352361283, "mean_env_wait_ms": 0.8752686389591946, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 742400, "num_agent_steps_trained": 742400, "num_env_steps_sampled": 371200, "num_env_steps_trained": 371200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 371200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 742400, "timers": {"training_iteration_time_ms": 3711.502, "learn_time_ms": 1094.516, "learn_throughput": 11694.668, "synch_weights_time_ms": 11.285}, "counters": {"num_env_steps_sampled": 371200, "num_env_steps_trained": 371200, "num_agent_steps_sampled": 742400, "num_agent_steps_trained": 742400}, "done": false, "episodes_total": 928, "training_iteration": 29, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-49", "timestamp": 1666580509, "time_this_iter_s": 3.9900293350219727, "time_total_s": 110.32465958595276, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 110.32465958595276, "timesteps_since_restore": 0, "iterations_since_restore": 29, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.96666666666667, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 12.2, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 37.76, "shaped_reward_min": 6, "shaped_reward_max": 78, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.76, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.67, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.98, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.73, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 2.44, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.55, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 2.93, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 3.81, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 3.63, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 3.31, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.72, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.54, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.61, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.91, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 1.76, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 1.0, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.87, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.77, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 0.75, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 2.93, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 3.81, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.93, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 3.81, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.7252903539730653e-10, "cur_lr": 0.0010000000474974513, "total_loss": -0.0029865906108170748, "policy_loss": -0.0024586068466305733, "vf_loss": 3.2020277976989746, "vf_explained_var": 0.23653024435043335, "kl": 0.0015087344218045473, "entropy": 1.6963691711425781, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 384000, "num_env_steps_trained": 384000, "num_agent_steps_sampled": 768000, "num_agent_steps_trained": 768000}, "sampler_results": {"episode_reward_max": 179.0, "episode_reward_min": 6.0, "episode_reward_mean": 62.16, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 93.0}, "policy_reward_mean": {"ppo": 31.08}, "custom_metrics": {"sparse_reward_mean": 12.2, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 37.76, "shaped_reward_min": 6, "shaped_reward_max": 78, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.76, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.67, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.98, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.73, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 2.44, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.55, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 2.93, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 3.81, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 3.63, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 3.31, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.72, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.54, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.61, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.91, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 1.76, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 1.0, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.87, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.77, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 0.75, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 2.93, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 3.81, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.93, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 3.81, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [34.0, 25.0, 111.0, 39.0, 71.0, 9.0, 76.0, 81.0, 111.0, 34.0, 53.0, 36.0, 79.0, 70.0, 39.0, 131.0, 77.0, 9.0, 25.0, 22.0, 23.0, 20.0, 66.0, 99.0, 76.0, 79.0, 95.0, 87.0, 31.0, 81.0, 17.0, 12.0, 31.0, 76.0, 17.0, 23.0, 125.0, 145.0, 9.0, 71.0, 85.0, 48.0, 55.0, 36.0, 92.0, 44.0, 128.0, 74.0, 96.0, 53.0, 31.0, 14.0, 90.0, 40.0, 179.0, 53.0, 74.0, 90.0, 20.0, 87.0, 80.0, 25.0, 118.0, 9.0, 6.0, 71.0, 131.0, 33.0, 12.0, 87.0, 37.0, 95.0, 102.0, 88.0, 20.0, 147.0, 45.0, 61.0, 79.0, 113.0, 31.0, 39.0, 39.0, 91.0, 68.0, 44.0, 52.0, 85.0, 44.0, 87.0, 20.0, 79.0, 122.0, 50.0, 82.0, 23.0, 26.0, 23.0, 23.0, 125.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [17.0, 17.0, 8.0, 17.0, 45.0, 66.0, 25.0, 14.0, 37.0, 34.0, 3.0, 6.0, 47.0, 29.0, 39.0, 42.0, 56.0, 55.0, 16.0, 18.0, 30.0, 23.0, 8.0, 28.0, 36.0, 43.0, 34.0, 36.0, 19.0, 20.0, 58.0, 73.0, 37.0, 40.0, 3.0, 6.0, 3.0, 22.0, 8.0, 14.0, 14.0, 9.0, 11.0, 9.0, 37.0, 29.0, 51.0, 48.0, 44.0, 32.0, 39.0, 40.0, 53.0, 42.0, 42.0, 45.0, 14.0, 17.0, 28.0, 53.0, 12.0, 5.0, 9.0, 3.0, 17.0, 14.0, 31.0, 45.0, 9.0, 8.0, 9.0, 14.0, 58.0, 67.0, 70.0, 75.0, 3.0, 6.0, 33.0, 38.0, 43.0, 42.0, 26.0, 22.0, 21.0, 34.0, 9.0, 27.0, 52.0, 40.0, 22.0, 22.0, 60.0, 68.0, 49.0, 25.0, 45.0, 51.0, 25.0, 28.0, 11.0, 20.0, 5.0, 9.0, 56.0, 34.0, 14.0, 26.0, 86.0, 93.0, 15.0, 38.0, 34.0, 40.0, 50.0, 40.0, 0.0, 20.0, 34.0, 53.0, 45.0, 35.0, 6.0, 19.0, 53.0, 65.0, 3.0, 6.0, 6.0, 0.0, 37.0, 34.0, 62.0, 69.0, 10.0, 23.0, 6.0, 6.0, 26.0, 61.0, 28.0, 9.0, 41.0, 54.0, 50.0, 52.0, 34.0, 54.0, 3.0, 17.0, 83.0, 64.0, 22.0, 23.0, 36.0, 25.0, 37.0, 42.0, 43.0, 70.0, 11.0, 20.0, 25.0, 14.0, 20.0, 19.0, 49.0, 42.0, 45.0, 23.0, 30.0, 14.0, 31.0, 21.0, 49.0, 36.0, 14.0, 30.0, 39.0, 48.0, 11.0, 9.0, 43.0, 36.0, 52.0, 70.0, 27.0, 23.0, 39.0, 43.0, 0.0, 23.0, 11.0, 15.0, 12.0, 11.0, 14.0, 9.0, 62.0, 63.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6924762482736284, "mean_inference_ms": 1.2114109394813388, "mean_action_processing_ms": 0.13370852013253243, "mean_env_wait_ms": 0.8763468400843789, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 179.0, "episode_reward_min": 6.0, "episode_reward_mean": 62.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 93.0}, "policy_reward_mean": {"ppo": 31.08}, "hist_stats": {"episode_reward": [34.0, 25.0, 111.0, 39.0, 71.0, 9.0, 76.0, 81.0, 111.0, 34.0, 53.0, 36.0, 79.0, 70.0, 39.0, 131.0, 77.0, 9.0, 25.0, 22.0, 23.0, 20.0, 66.0, 99.0, 76.0, 79.0, 95.0, 87.0, 31.0, 81.0, 17.0, 12.0, 31.0, 76.0, 17.0, 23.0, 125.0, 145.0, 9.0, 71.0, 85.0, 48.0, 55.0, 36.0, 92.0, 44.0, 128.0, 74.0, 96.0, 53.0, 31.0, 14.0, 90.0, 40.0, 179.0, 53.0, 74.0, 90.0, 20.0, 87.0, 80.0, 25.0, 118.0, 9.0, 6.0, 71.0, 131.0, 33.0, 12.0, 87.0, 37.0, 95.0, 102.0, 88.0, 20.0, 147.0, 45.0, 61.0, 79.0, 113.0, 31.0, 39.0, 39.0, 91.0, 68.0, 44.0, 52.0, 85.0, 44.0, 87.0, 20.0, 79.0, 122.0, 50.0, 82.0, 23.0, 26.0, 23.0, 23.0, 125.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [17.0, 17.0, 8.0, 17.0, 45.0, 66.0, 25.0, 14.0, 37.0, 34.0, 3.0, 6.0, 47.0, 29.0, 39.0, 42.0, 56.0, 55.0, 16.0, 18.0, 30.0, 23.0, 8.0, 28.0, 36.0, 43.0, 34.0, 36.0, 19.0, 20.0, 58.0, 73.0, 37.0, 40.0, 3.0, 6.0, 3.0, 22.0, 8.0, 14.0, 14.0, 9.0, 11.0, 9.0, 37.0, 29.0, 51.0, 48.0, 44.0, 32.0, 39.0, 40.0, 53.0, 42.0, 42.0, 45.0, 14.0, 17.0, 28.0, 53.0, 12.0, 5.0, 9.0, 3.0, 17.0, 14.0, 31.0, 45.0, 9.0, 8.0, 9.0, 14.0, 58.0, 67.0, 70.0, 75.0, 3.0, 6.0, 33.0, 38.0, 43.0, 42.0, 26.0, 22.0, 21.0, 34.0, 9.0, 27.0, 52.0, 40.0, 22.0, 22.0, 60.0, 68.0, 49.0, 25.0, 45.0, 51.0, 25.0, 28.0, 11.0, 20.0, 5.0, 9.0, 56.0, 34.0, 14.0, 26.0, 86.0, 93.0, 15.0, 38.0, 34.0, 40.0, 50.0, 40.0, 0.0, 20.0, 34.0, 53.0, 45.0, 35.0, 6.0, 19.0, 53.0, 65.0, 3.0, 6.0, 6.0, 0.0, 37.0, 34.0, 62.0, 69.0, 10.0, 23.0, 6.0, 6.0, 26.0, 61.0, 28.0, 9.0, 41.0, 54.0, 50.0, 52.0, 34.0, 54.0, 3.0, 17.0, 83.0, 64.0, 22.0, 23.0, 36.0, 25.0, 37.0, 42.0, 43.0, 70.0, 11.0, 20.0, 25.0, 14.0, 20.0, 19.0, 49.0, 42.0, 45.0, 23.0, 30.0, 14.0, 31.0, 21.0, 49.0, 36.0, 14.0, 30.0, 39.0, 48.0, 11.0, 9.0, 43.0, 36.0, 52.0, 70.0, 27.0, 23.0, 39.0, 43.0, 0.0, 23.0, 11.0, 15.0, 12.0, 11.0, 14.0, 9.0, 62.0, 63.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6924762482736284, "mean_inference_ms": 1.2114109394813388, "mean_action_processing_ms": 0.13370852013253243, "mean_env_wait_ms": 0.8763468400843789, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 768000, "num_agent_steps_trained": 768000, "num_env_steps_sampled": 384000, "num_env_steps_trained": 384000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 384000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 768000, "timers": {"training_iteration_time_ms": 3716.909, "learn_time_ms": 1106.67, "learn_throughput": 11566.226, "synch_weights_time_ms": 11.25}, "counters": {"num_env_steps_sampled": 384000, "num_env_steps_trained": 384000, "num_agent_steps_sampled": 768000, "num_agent_steps_trained": 768000}, "done": false, "episodes_total": 960, "training_iteration": 30, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-53", "timestamp": 1666580513, "time_this_iter_s": 3.6705687046051025, "time_total_s": 113.99522829055786, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 113.99522829055786, "timesteps_since_restore": 0, "iterations_since_restore": 30, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.2, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 12.6, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 41.39, "shaped_reward_min": 6, "shaped_reward_max": 78, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.19, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 6.89, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 4.23, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 5.01, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.47, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.65, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.5, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 3.24, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.1, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 3.82, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 3.47, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.8, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.74, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.74, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.62, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.62, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.24, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 1.92, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.05, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.94, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 1.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 0.83, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 3.24, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.1, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.24, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.1, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.8626451769865326e-10, "cur_lr": 0.0010000000474974513, "total_loss": -0.0069397566840052605, "policy_loss": -0.006435990799218416, "vf_loss": 3.3930177688598633, "vf_explained_var": 0.253547728061676, "kl": 0.0013529944699257612, "entropy": 1.6861307621002197, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 396800, "num_env_steps_trained": 396800, "num_agent_steps_sampled": 793600, "num_agent_steps_trained": 793600}, "sampler_results": {"episode_reward_max": 179.0, "episode_reward_min": 6.0, "episode_reward_mean": 66.59, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 93.0}, "policy_reward_mean": {"ppo": 33.295}, "custom_metrics": {"sparse_reward_mean": 12.6, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 41.39, "shaped_reward_min": 6, "shaped_reward_max": 78, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.19, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 6.89, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 4.23, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 5.01, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.47, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.65, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.5, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 3.24, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.1, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 3.82, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 3.47, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.8, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.74, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.74, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.62, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.62, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.24, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 1.92, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.05, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.94, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 1.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 0.83, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 3.24, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.1, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.24, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.1, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [31.0, 76.0, 17.0, 23.0, 125.0, 145.0, 9.0, 71.0, 85.0, 48.0, 55.0, 36.0, 92.0, 44.0, 128.0, 74.0, 96.0, 53.0, 31.0, 14.0, 90.0, 40.0, 179.0, 53.0, 74.0, 90.0, 20.0, 87.0, 80.0, 25.0, 118.0, 9.0, 6.0, 71.0, 131.0, 33.0, 12.0, 87.0, 37.0, 95.0, 102.0, 88.0, 20.0, 147.0, 45.0, 61.0, 79.0, 113.0, 31.0, 39.0, 39.0, 91.0, 68.0, 44.0, 52.0, 85.0, 44.0, 87.0, 20.0, 79.0, 122.0, 50.0, 82.0, 23.0, 26.0, 23.0, 23.0, 125.0, 65.0, 90.0, 73.0, 106.0, 80.0, 28.0, 9.0, 95.0, 96.0, 51.0, 110.0, 147.0, 28.0, 39.0, 9.0, 150.0, 88.0, 71.0, 77.0, 67.0, 48.0, 107.0, 79.0, 84.0, 39.0, 34.0, 98.0, 64.0, 83.0, 98.0, 9.0, 39.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [17.0, 14.0, 31.0, 45.0, 9.0, 8.0, 9.0, 14.0, 58.0, 67.0, 70.0, 75.0, 3.0, 6.0, 33.0, 38.0, 43.0, 42.0, 26.0, 22.0, 21.0, 34.0, 9.0, 27.0, 52.0, 40.0, 22.0, 22.0, 60.0, 68.0, 49.0, 25.0, 45.0, 51.0, 25.0, 28.0, 11.0, 20.0, 5.0, 9.0, 56.0, 34.0, 14.0, 26.0, 86.0, 93.0, 15.0, 38.0, 34.0, 40.0, 50.0, 40.0, 0.0, 20.0, 34.0, 53.0, 45.0, 35.0, 6.0, 19.0, 53.0, 65.0, 3.0, 6.0, 6.0, 0.0, 37.0, 34.0, 62.0, 69.0, 10.0, 23.0, 6.0, 6.0, 26.0, 61.0, 28.0, 9.0, 41.0, 54.0, 50.0, 52.0, 34.0, 54.0, 3.0, 17.0, 83.0, 64.0, 22.0, 23.0, 36.0, 25.0, 37.0, 42.0, 43.0, 70.0, 11.0, 20.0, 25.0, 14.0, 20.0, 19.0, 49.0, 42.0, 45.0, 23.0, 30.0, 14.0, 31.0, 21.0, 49.0, 36.0, 14.0, 30.0, 39.0, 48.0, 11.0, 9.0, 43.0, 36.0, 52.0, 70.0, 27.0, 23.0, 39.0, 43.0, 0.0, 23.0, 11.0, 15.0, 12.0, 11.0, 14.0, 9.0, 62.0, 63.0, 31.0, 34.0, 45.0, 45.0, 42.0, 31.0, 56.0, 50.0, 41.0, 39.0, 19.0, 9.0, 0.0, 9.0, 53.0, 42.0, 53.0, 43.0, 22.0, 29.0, 53.0, 57.0, 71.0, 76.0, 9.0, 19.0, 14.0, 25.0, 6.0, 3.0, 67.0, 83.0, 50.0, 38.0, 37.0, 34.0, 36.0, 41.0, 33.0, 34.0, 33.0, 15.0, 53.0, 54.0, 34.0, 45.0, 41.0, 43.0, 22.0, 17.0, 6.0, 28.0, 44.0, 54.0, 28.0, 36.0, 45.0, 38.0, 36.0, 62.0, 3.0, 6.0, 22.0, 17.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6918396883946886, "mean_inference_ms": 1.2118290938548686, "mean_action_processing_ms": 0.13359040886880258, "mean_env_wait_ms": 0.8774454909009484, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 179.0, "episode_reward_min": 6.0, "episode_reward_mean": 66.59, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 93.0}, "policy_reward_mean": {"ppo": 33.295}, "hist_stats": {"episode_reward": [31.0, 76.0, 17.0, 23.0, 125.0, 145.0, 9.0, 71.0, 85.0, 48.0, 55.0, 36.0, 92.0, 44.0, 128.0, 74.0, 96.0, 53.0, 31.0, 14.0, 90.0, 40.0, 179.0, 53.0, 74.0, 90.0, 20.0, 87.0, 80.0, 25.0, 118.0, 9.0, 6.0, 71.0, 131.0, 33.0, 12.0, 87.0, 37.0, 95.0, 102.0, 88.0, 20.0, 147.0, 45.0, 61.0, 79.0, 113.0, 31.0, 39.0, 39.0, 91.0, 68.0, 44.0, 52.0, 85.0, 44.0, 87.0, 20.0, 79.0, 122.0, 50.0, 82.0, 23.0, 26.0, 23.0, 23.0, 125.0, 65.0, 90.0, 73.0, 106.0, 80.0, 28.0, 9.0, 95.0, 96.0, 51.0, 110.0, 147.0, 28.0, 39.0, 9.0, 150.0, 88.0, 71.0, 77.0, 67.0, 48.0, 107.0, 79.0, 84.0, 39.0, 34.0, 98.0, 64.0, 83.0, 98.0, 9.0, 39.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [17.0, 14.0, 31.0, 45.0, 9.0, 8.0, 9.0, 14.0, 58.0, 67.0, 70.0, 75.0, 3.0, 6.0, 33.0, 38.0, 43.0, 42.0, 26.0, 22.0, 21.0, 34.0, 9.0, 27.0, 52.0, 40.0, 22.0, 22.0, 60.0, 68.0, 49.0, 25.0, 45.0, 51.0, 25.0, 28.0, 11.0, 20.0, 5.0, 9.0, 56.0, 34.0, 14.0, 26.0, 86.0, 93.0, 15.0, 38.0, 34.0, 40.0, 50.0, 40.0, 0.0, 20.0, 34.0, 53.0, 45.0, 35.0, 6.0, 19.0, 53.0, 65.0, 3.0, 6.0, 6.0, 0.0, 37.0, 34.0, 62.0, 69.0, 10.0, 23.0, 6.0, 6.0, 26.0, 61.0, 28.0, 9.0, 41.0, 54.0, 50.0, 52.0, 34.0, 54.0, 3.0, 17.0, 83.0, 64.0, 22.0, 23.0, 36.0, 25.0, 37.0, 42.0, 43.0, 70.0, 11.0, 20.0, 25.0, 14.0, 20.0, 19.0, 49.0, 42.0, 45.0, 23.0, 30.0, 14.0, 31.0, 21.0, 49.0, 36.0, 14.0, 30.0, 39.0, 48.0, 11.0, 9.0, 43.0, 36.0, 52.0, 70.0, 27.0, 23.0, 39.0, 43.0, 0.0, 23.0, 11.0, 15.0, 12.0, 11.0, 14.0, 9.0, 62.0, 63.0, 31.0, 34.0, 45.0, 45.0, 42.0, 31.0, 56.0, 50.0, 41.0, 39.0, 19.0, 9.0, 0.0, 9.0, 53.0, 42.0, 53.0, 43.0, 22.0, 29.0, 53.0, 57.0, 71.0, 76.0, 9.0, 19.0, 14.0, 25.0, 6.0, 3.0, 67.0, 83.0, 50.0, 38.0, 37.0, 34.0, 36.0, 41.0, 33.0, 34.0, 33.0, 15.0, 53.0, 54.0, 34.0, 45.0, 41.0, 43.0, 22.0, 17.0, 6.0, 28.0, 44.0, 54.0, 28.0, 36.0, 45.0, 38.0, 36.0, 62.0, 3.0, 6.0, 22.0, 17.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6918396883946886, "mean_inference_ms": 1.2118290938548686, "mean_action_processing_ms": 0.13359040886880258, "mean_env_wait_ms": 0.8774454909009484, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 793600, "num_agent_steps_trained": 793600, "num_env_steps_sampled": 396800, "num_env_steps_trained": 396800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 396800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 793600, "timers": {"training_iteration_time_ms": 3721.315, "learn_time_ms": 1112.039, "learn_throughput": 11510.387, "synch_weights_time_ms": 11.618}, "counters": {"num_env_steps_sampled": 396800, "num_env_steps_trained": 396800, "num_agent_steps_sampled": 793600, "num_agent_steps_trained": 793600}, "done": false, "episodes_total": 992, "training_iteration": 31, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-57", "timestamp": 1666580517, "time_this_iter_s": 3.689429759979248, "time_total_s": 117.68465805053711, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 117.68465805053711, "timesteps_since_restore": 0, "iterations_since_restore": 31, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.766666666666666, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 13.2, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 44.58, "shaped_reward_min": 6, "shaped_reward_max": 76, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.52, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 6.69, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 4.5, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.04, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.44, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.67, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.38, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 3.67, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.08, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 3.85, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 3.72, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.8, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.81, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.0, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.63, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.54, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.51, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.35, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.06, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 1.2, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.31, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.67, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.08, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.67, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.08, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 9.313225884932663e-11, "cur_lr": 0.0010000000474974513, "total_loss": -0.002310357056558132, "policy_loss": -0.0018515328411012888, "vf_loss": 3.8169312477111816, "vf_explained_var": 0.21377882361412048, "kl": 0.0015562805347144604, "entropy": 1.6810286045074463, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 409600, "num_env_steps_trained": 409600, "num_agent_steps_sampled": 819200, "num_agent_steps_trained": 819200}, "sampler_results": {"episode_reward_max": 173.0, "episode_reward_min": 6.0, "episode_reward_mean": 70.98, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 89.0}, "policy_reward_mean": {"ppo": 35.49}, "custom_metrics": {"sparse_reward_mean": 13.2, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 44.58, "shaped_reward_min": 6, "shaped_reward_max": 76, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.52, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 6.69, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 4.5, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.04, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.44, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.67, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.38, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 3.67, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.08, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 3.85, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 3.72, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.8, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.81, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.0, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.63, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.54, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.51, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.35, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.06, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 1.2, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.31, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.67, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.08, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.67, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.08, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [6.0, 71.0, 131.0, 33.0, 12.0, 87.0, 37.0, 95.0, 102.0, 88.0, 20.0, 147.0, 45.0, 61.0, 79.0, 113.0, 31.0, 39.0, 39.0, 91.0, 68.0, 44.0, 52.0, 85.0, 44.0, 87.0, 20.0, 79.0, 122.0, 50.0, 82.0, 23.0, 26.0, 23.0, 23.0, 125.0, 65.0, 90.0, 73.0, 106.0, 80.0, 28.0, 9.0, 95.0, 96.0, 51.0, 110.0, 147.0, 28.0, 39.0, 9.0, 150.0, 88.0, 71.0, 77.0, 67.0, 48.0, 107.0, 79.0, 84.0, 39.0, 34.0, 98.0, 64.0, 83.0, 98.0, 9.0, 39.0, 92.0, 87.0, 56.0, 9.0, 144.0, 153.0, 51.0, 50.0, 52.0, 156.0, 136.0, 45.0, 48.0, 59.0, 42.0, 95.0, 46.0, 117.0, 96.0, 93.0, 31.0, 44.0, 94.0, 53.0, 50.0, 45.0, 44.0, 84.0, 133.0, 173.0, 87.0, 92.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 0.0, 37.0, 34.0, 62.0, 69.0, 10.0, 23.0, 6.0, 6.0, 26.0, 61.0, 28.0, 9.0, 41.0, 54.0, 50.0, 52.0, 34.0, 54.0, 3.0, 17.0, 83.0, 64.0, 22.0, 23.0, 36.0, 25.0, 37.0, 42.0, 43.0, 70.0, 11.0, 20.0, 25.0, 14.0, 20.0, 19.0, 49.0, 42.0, 45.0, 23.0, 30.0, 14.0, 31.0, 21.0, 49.0, 36.0, 14.0, 30.0, 39.0, 48.0, 11.0, 9.0, 43.0, 36.0, 52.0, 70.0, 27.0, 23.0, 39.0, 43.0, 0.0, 23.0, 11.0, 15.0, 12.0, 11.0, 14.0, 9.0, 62.0, 63.0, 31.0, 34.0, 45.0, 45.0, 42.0, 31.0, 56.0, 50.0, 41.0, 39.0, 19.0, 9.0, 0.0, 9.0, 53.0, 42.0, 53.0, 43.0, 22.0, 29.0, 53.0, 57.0, 71.0, 76.0, 9.0, 19.0, 14.0, 25.0, 6.0, 3.0, 67.0, 83.0, 50.0, 38.0, 37.0, 34.0, 36.0, 41.0, 33.0, 34.0, 33.0, 15.0, 53.0, 54.0, 34.0, 45.0, 41.0, 43.0, 22.0, 17.0, 6.0, 28.0, 44.0, 54.0, 28.0, 36.0, 45.0, 38.0, 36.0, 62.0, 3.0, 6.0, 22.0, 17.0, 56.0, 36.0, 35.0, 52.0, 24.0, 32.0, 6.0, 3.0, 62.0, 82.0, 77.0, 76.0, 24.0, 27.0, 23.0, 27.0, 27.0, 25.0, 77.0, 79.0, 57.0, 79.0, 22.0, 23.0, 29.0, 19.0, 35.0, 24.0, 31.0, 11.0, 46.0, 49.0, 20.0, 26.0, 51.0, 66.0, 57.0, 39.0, 50.0, 43.0, 8.0, 23.0, 13.0, 31.0, 37.0, 57.0, 31.0, 22.0, 34.0, 16.0, 15.0, 30.0, 22.0, 22.0, 38.0, 46.0, 71.0, 62.0, 89.0, 84.0, 46.0, 41.0, 45.0, 47.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.691321764992291, "mean_inference_ms": 1.2113081635303937, "mean_action_processing_ms": 0.13352380109990097, "mean_env_wait_ms": 0.8776338249946448, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 173.0, "episode_reward_min": 6.0, "episode_reward_mean": 70.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 89.0}, "policy_reward_mean": {"ppo": 35.49}, "hist_stats": {"episode_reward": [6.0, 71.0, 131.0, 33.0, 12.0, 87.0, 37.0, 95.0, 102.0, 88.0, 20.0, 147.0, 45.0, 61.0, 79.0, 113.0, 31.0, 39.0, 39.0, 91.0, 68.0, 44.0, 52.0, 85.0, 44.0, 87.0, 20.0, 79.0, 122.0, 50.0, 82.0, 23.0, 26.0, 23.0, 23.0, 125.0, 65.0, 90.0, 73.0, 106.0, 80.0, 28.0, 9.0, 95.0, 96.0, 51.0, 110.0, 147.0, 28.0, 39.0, 9.0, 150.0, 88.0, 71.0, 77.0, 67.0, 48.0, 107.0, 79.0, 84.0, 39.0, 34.0, 98.0, 64.0, 83.0, 98.0, 9.0, 39.0, 92.0, 87.0, 56.0, 9.0, 144.0, 153.0, 51.0, 50.0, 52.0, 156.0, 136.0, 45.0, 48.0, 59.0, 42.0, 95.0, 46.0, 117.0, 96.0, 93.0, 31.0, 44.0, 94.0, 53.0, 50.0, 45.0, 44.0, 84.0, 133.0, 173.0, 87.0, 92.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 0.0, 37.0, 34.0, 62.0, 69.0, 10.0, 23.0, 6.0, 6.0, 26.0, 61.0, 28.0, 9.0, 41.0, 54.0, 50.0, 52.0, 34.0, 54.0, 3.0, 17.0, 83.0, 64.0, 22.0, 23.0, 36.0, 25.0, 37.0, 42.0, 43.0, 70.0, 11.0, 20.0, 25.0, 14.0, 20.0, 19.0, 49.0, 42.0, 45.0, 23.0, 30.0, 14.0, 31.0, 21.0, 49.0, 36.0, 14.0, 30.0, 39.0, 48.0, 11.0, 9.0, 43.0, 36.0, 52.0, 70.0, 27.0, 23.0, 39.0, 43.0, 0.0, 23.0, 11.0, 15.0, 12.0, 11.0, 14.0, 9.0, 62.0, 63.0, 31.0, 34.0, 45.0, 45.0, 42.0, 31.0, 56.0, 50.0, 41.0, 39.0, 19.0, 9.0, 0.0, 9.0, 53.0, 42.0, 53.0, 43.0, 22.0, 29.0, 53.0, 57.0, 71.0, 76.0, 9.0, 19.0, 14.0, 25.0, 6.0, 3.0, 67.0, 83.0, 50.0, 38.0, 37.0, 34.0, 36.0, 41.0, 33.0, 34.0, 33.0, 15.0, 53.0, 54.0, 34.0, 45.0, 41.0, 43.0, 22.0, 17.0, 6.0, 28.0, 44.0, 54.0, 28.0, 36.0, 45.0, 38.0, 36.0, 62.0, 3.0, 6.0, 22.0, 17.0, 56.0, 36.0, 35.0, 52.0, 24.0, 32.0, 6.0, 3.0, 62.0, 82.0, 77.0, 76.0, 24.0, 27.0, 23.0, 27.0, 27.0, 25.0, 77.0, 79.0, 57.0, 79.0, 22.0, 23.0, 29.0, 19.0, 35.0, 24.0, 31.0, 11.0, 46.0, 49.0, 20.0, 26.0, 51.0, 66.0, 57.0, 39.0, 50.0, 43.0, 8.0, 23.0, 13.0, 31.0, 37.0, 57.0, 31.0, 22.0, 34.0, 16.0, 15.0, 30.0, 22.0, 22.0, 38.0, 46.0, 71.0, 62.0, 89.0, 84.0, 46.0, 41.0, 45.0, 47.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.691321764992291, "mean_inference_ms": 1.2113081635303937, "mean_action_processing_ms": 0.13352380109990097, "mean_env_wait_ms": 0.8776338249946448, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 819200, "num_agent_steps_trained": 819200, "num_env_steps_sampled": 409600, "num_env_steps_trained": 409600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 409600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 819200, "timers": {"training_iteration_time_ms": 3726.338, "learn_time_ms": 1112.85, "learn_throughput": 11502.002, "synch_weights_time_ms": 11.967}, "counters": {"num_env_steps_sampled": 409600, "num_env_steps_trained": 409600, "num_agent_steps_sampled": 819200, "num_agent_steps_trained": 819200}, "done": false, "episodes_total": 1024, "training_iteration": 32, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-01", "timestamp": 1666580521, "time_this_iter_s": 3.81624174118042, "time_total_s": 121.50089979171753, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 121.50089979171753, "timesteps_since_restore": 0, "iterations_since_restore": 32, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.22, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 15.0, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 46.47, "shaped_reward_min": 9, "shaped_reward_max": 76, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.46, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 6.97, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 4.62, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.34, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 3.75, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.34, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.25, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 3.72, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.84, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.77, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.8, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.45, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 2.44, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.38, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.15, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.25, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.17, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.75, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.34, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.75, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.34, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 4.6566129424663316e-11, "cur_lr": 0.0010000000474974513, "total_loss": -0.005534800700843334, "policy_loss": -0.0050851586274802685, "vf_loss": 3.892998695373535, "vf_explained_var": 0.28922930359840393, "kl": 0.001608746824786067, "entropy": 1.677882194519043, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 422400, "num_env_steps_trained": 422400, "num_agent_steps_sampled": 844800, "num_agent_steps_trained": 844800}, "sampler_results": {"episode_reward_max": 173.0, "episode_reward_min": 9.0, "episode_reward_mean": 76.47, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 89.0}, "policy_reward_mean": {"ppo": 38.235}, "custom_metrics": {"sparse_reward_mean": 15.0, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 46.47, "shaped_reward_min": 9, "shaped_reward_max": 76, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.46, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 6.97, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 4.62, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.34, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 3.75, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.34, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.25, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 3.72, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.84, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.77, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.8, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.45, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 2.44, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.38, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.15, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.25, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.17, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.75, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.34, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.75, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.34, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [26.0, 23.0, 23.0, 125.0, 65.0, 90.0, 73.0, 106.0, 80.0, 28.0, 9.0, 95.0, 96.0, 51.0, 110.0, 147.0, 28.0, 39.0, 9.0, 150.0, 88.0, 71.0, 77.0, 67.0, 48.0, 107.0, 79.0, 84.0, 39.0, 34.0, 98.0, 64.0, 83.0, 98.0, 9.0, 39.0, 92.0, 87.0, 56.0, 9.0, 144.0, 153.0, 51.0, 50.0, 52.0, 156.0, 136.0, 45.0, 48.0, 59.0, 42.0, 95.0, 46.0, 117.0, 96.0, 93.0, 31.0, 44.0, 94.0, 53.0, 50.0, 45.0, 44.0, 84.0, 133.0, 173.0, 87.0, 92.0, 98.0, 133.0, 73.0, 122.0, 23.0, 62.0, 91.0, 80.0, 115.0, 25.0, 66.0, 110.0, 30.0, 145.0, 26.0, 109.0, 73.0, 101.0, 23.0, 102.0, 110.0, 60.0, 56.0, 101.0, 77.0, 96.0, 113.0, 66.0, 28.0, 149.0, 87.0, 82.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [11.0, 15.0, 12.0, 11.0, 14.0, 9.0, 62.0, 63.0, 31.0, 34.0, 45.0, 45.0, 42.0, 31.0, 56.0, 50.0, 41.0, 39.0, 19.0, 9.0, 0.0, 9.0, 53.0, 42.0, 53.0, 43.0, 22.0, 29.0, 53.0, 57.0, 71.0, 76.0, 9.0, 19.0, 14.0, 25.0, 6.0, 3.0, 67.0, 83.0, 50.0, 38.0, 37.0, 34.0, 36.0, 41.0, 33.0, 34.0, 33.0, 15.0, 53.0, 54.0, 34.0, 45.0, 41.0, 43.0, 22.0, 17.0, 6.0, 28.0, 44.0, 54.0, 28.0, 36.0, 45.0, 38.0, 36.0, 62.0, 3.0, 6.0, 22.0, 17.0, 56.0, 36.0, 35.0, 52.0, 24.0, 32.0, 6.0, 3.0, 62.0, 82.0, 77.0, 76.0, 24.0, 27.0, 23.0, 27.0, 27.0, 25.0, 77.0, 79.0, 57.0, 79.0, 22.0, 23.0, 29.0, 19.0, 35.0, 24.0, 31.0, 11.0, 46.0, 49.0, 20.0, 26.0, 51.0, 66.0, 57.0, 39.0, 50.0, 43.0, 8.0, 23.0, 13.0, 31.0, 37.0, 57.0, 31.0, 22.0, 34.0, 16.0, 15.0, 30.0, 22.0, 22.0, 38.0, 46.0, 71.0, 62.0, 89.0, 84.0, 46.0, 41.0, 45.0, 47.0, 34.0, 64.0, 68.0, 65.0, 39.0, 34.0, 60.0, 62.0, 8.0, 15.0, 40.0, 22.0, 41.0, 50.0, 42.0, 38.0, 63.0, 52.0, 13.0, 12.0, 23.0, 43.0, 47.0, 63.0, 11.0, 19.0, 74.0, 71.0, 17.0, 9.0, 54.0, 55.0, 37.0, 36.0, 47.0, 54.0, 11.0, 12.0, 53.0, 49.0, 62.0, 48.0, 20.0, 40.0, 23.0, 33.0, 55.0, 46.0, 44.0, 33.0, 42.0, 54.0, 54.0, 59.0, 35.0, 31.0, 20.0, 8.0, 72.0, 77.0, 48.0, 39.0, 34.0, 48.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6909394660361795, "mean_inference_ms": 1.2106474727147383, "mean_action_processing_ms": 0.1334392124001721, "mean_env_wait_ms": 0.8773286615961038, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 173.0, "episode_reward_min": 9.0, "episode_reward_mean": 76.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 89.0}, "policy_reward_mean": {"ppo": 38.235}, "hist_stats": {"episode_reward": [26.0, 23.0, 23.0, 125.0, 65.0, 90.0, 73.0, 106.0, 80.0, 28.0, 9.0, 95.0, 96.0, 51.0, 110.0, 147.0, 28.0, 39.0, 9.0, 150.0, 88.0, 71.0, 77.0, 67.0, 48.0, 107.0, 79.0, 84.0, 39.0, 34.0, 98.0, 64.0, 83.0, 98.0, 9.0, 39.0, 92.0, 87.0, 56.0, 9.0, 144.0, 153.0, 51.0, 50.0, 52.0, 156.0, 136.0, 45.0, 48.0, 59.0, 42.0, 95.0, 46.0, 117.0, 96.0, 93.0, 31.0, 44.0, 94.0, 53.0, 50.0, 45.0, 44.0, 84.0, 133.0, 173.0, 87.0, 92.0, 98.0, 133.0, 73.0, 122.0, 23.0, 62.0, 91.0, 80.0, 115.0, 25.0, 66.0, 110.0, 30.0, 145.0, 26.0, 109.0, 73.0, 101.0, 23.0, 102.0, 110.0, 60.0, 56.0, 101.0, 77.0, 96.0, 113.0, 66.0, 28.0, 149.0, 87.0, 82.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [11.0, 15.0, 12.0, 11.0, 14.0, 9.0, 62.0, 63.0, 31.0, 34.0, 45.0, 45.0, 42.0, 31.0, 56.0, 50.0, 41.0, 39.0, 19.0, 9.0, 0.0, 9.0, 53.0, 42.0, 53.0, 43.0, 22.0, 29.0, 53.0, 57.0, 71.0, 76.0, 9.0, 19.0, 14.0, 25.0, 6.0, 3.0, 67.0, 83.0, 50.0, 38.0, 37.0, 34.0, 36.0, 41.0, 33.0, 34.0, 33.0, 15.0, 53.0, 54.0, 34.0, 45.0, 41.0, 43.0, 22.0, 17.0, 6.0, 28.0, 44.0, 54.0, 28.0, 36.0, 45.0, 38.0, 36.0, 62.0, 3.0, 6.0, 22.0, 17.0, 56.0, 36.0, 35.0, 52.0, 24.0, 32.0, 6.0, 3.0, 62.0, 82.0, 77.0, 76.0, 24.0, 27.0, 23.0, 27.0, 27.0, 25.0, 77.0, 79.0, 57.0, 79.0, 22.0, 23.0, 29.0, 19.0, 35.0, 24.0, 31.0, 11.0, 46.0, 49.0, 20.0, 26.0, 51.0, 66.0, 57.0, 39.0, 50.0, 43.0, 8.0, 23.0, 13.0, 31.0, 37.0, 57.0, 31.0, 22.0, 34.0, 16.0, 15.0, 30.0, 22.0, 22.0, 38.0, 46.0, 71.0, 62.0, 89.0, 84.0, 46.0, 41.0, 45.0, 47.0, 34.0, 64.0, 68.0, 65.0, 39.0, 34.0, 60.0, 62.0, 8.0, 15.0, 40.0, 22.0, 41.0, 50.0, 42.0, 38.0, 63.0, 52.0, 13.0, 12.0, 23.0, 43.0, 47.0, 63.0, 11.0, 19.0, 74.0, 71.0, 17.0, 9.0, 54.0, 55.0, 37.0, 36.0, 47.0, 54.0, 11.0, 12.0, 53.0, 49.0, 62.0, 48.0, 20.0, 40.0, 23.0, 33.0, 55.0, 46.0, 44.0, 33.0, 42.0, 54.0, 54.0, 59.0, 35.0, 31.0, 20.0, 8.0, 72.0, 77.0, 48.0, 39.0, 34.0, 48.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6909394660361795, "mean_inference_ms": 1.2106474727147383, "mean_action_processing_ms": 0.1334392124001721, "mean_env_wait_ms": 0.8773286615961038, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 844800, "num_agent_steps_trained": 844800, "num_env_steps_sampled": 422400, "num_env_steps_trained": 422400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 422400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 844800, "timers": {"training_iteration_time_ms": 3705.997, "learn_time_ms": 1102.749, "learn_throughput": 11607.351, "synch_weights_time_ms": 12.176}, "counters": {"num_env_steps_sampled": 422400, "num_env_steps_trained": 422400, "num_agent_steps_sampled": 844800, "num_agent_steps_trained": 844800}, "done": false, "episodes_total": 1056, "training_iteration": 33, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-04", "timestamp": 1666580524, "time_this_iter_s": 3.704005479812622, "time_total_s": 125.20490527153015, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 125.20490527153015, "timesteps_since_restore": 0, "iterations_since_restore": 33, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.06666666666667, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 17.2, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 50.11, "shaped_reward_min": 9, "shaped_reward_max": 83, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.67, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.06, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.04, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 5.52, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.07, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.53, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.07, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 3.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.92, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.7, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.5, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 2.56, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.27, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.36, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.15, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 1.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 4.07, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.53, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.07, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.53, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.3283064712331658e-11, "cur_lr": 0.0010000000474974513, "total_loss": -0.0033294381573796272, "policy_loss": -0.0029248581267893314, "vf_loss": 4.272636890411377, "vf_explained_var": 0.2562987506389618, "kl": 0.001298167509958148, "entropy": 1.6636810302734375, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 435200, "num_env_steps_trained": 435200, "num_agent_steps_sampled": 870400, "num_agent_steps_trained": 870400}, "sampler_results": {"episode_reward_max": 193.0, "episode_reward_min": 9.0, "episode_reward_mean": 84.51, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 108.0}, "policy_reward_mean": {"ppo": 42.255}, "custom_metrics": {"sparse_reward_mean": 17.2, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 50.11, "shaped_reward_min": 9, "shaped_reward_max": 83, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.67, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.06, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.04, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 5.52, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.07, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.53, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.07, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 3.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.92, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.7, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.5, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 2.56, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.27, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.36, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.15, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 1.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 4.07, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.53, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.07, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.53, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [83.0, 98.0, 9.0, 39.0, 92.0, 87.0, 56.0, 9.0, 144.0, 153.0, 51.0, 50.0, 52.0, 156.0, 136.0, 45.0, 48.0, 59.0, 42.0, 95.0, 46.0, 117.0, 96.0, 93.0, 31.0, 44.0, 94.0, 53.0, 50.0, 45.0, 44.0, 84.0, 133.0, 173.0, 87.0, 92.0, 98.0, 133.0, 73.0, 122.0, 23.0, 62.0, 91.0, 80.0, 115.0, 25.0, 66.0, 110.0, 30.0, 145.0, 26.0, 109.0, 73.0, 101.0, 23.0, 102.0, 110.0, 60.0, 56.0, 101.0, 77.0, 96.0, 113.0, 66.0, 28.0, 149.0, 87.0, 82.0, 102.0, 39.0, 101.0, 193.0, 93.0, 50.0, 110.0, 161.0, 139.0, 70.0, 14.0, 93.0, 66.0, 101.0, 179.0, 144.0, 84.0, 33.0, 188.0, 53.0, 123.0, 23.0, 66.0, 130.0, 136.0, 48.0, 42.0, 110.0, 96.0, 103.0, 28.0, 115.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [45.0, 38.0, 36.0, 62.0, 3.0, 6.0, 22.0, 17.0, 56.0, 36.0, 35.0, 52.0, 24.0, 32.0, 6.0, 3.0, 62.0, 82.0, 77.0, 76.0, 24.0, 27.0, 23.0, 27.0, 27.0, 25.0, 77.0, 79.0, 57.0, 79.0, 22.0, 23.0, 29.0, 19.0, 35.0, 24.0, 31.0, 11.0, 46.0, 49.0, 20.0, 26.0, 51.0, 66.0, 57.0, 39.0, 50.0, 43.0, 8.0, 23.0, 13.0, 31.0, 37.0, 57.0, 31.0, 22.0, 34.0, 16.0, 15.0, 30.0, 22.0, 22.0, 38.0, 46.0, 71.0, 62.0, 89.0, 84.0, 46.0, 41.0, 45.0, 47.0, 34.0, 64.0, 68.0, 65.0, 39.0, 34.0, 60.0, 62.0, 8.0, 15.0, 40.0, 22.0, 41.0, 50.0, 42.0, 38.0, 63.0, 52.0, 13.0, 12.0, 23.0, 43.0, 47.0, 63.0, 11.0, 19.0, 74.0, 71.0, 17.0, 9.0, 54.0, 55.0, 37.0, 36.0, 47.0, 54.0, 11.0, 12.0, 53.0, 49.0, 62.0, 48.0, 20.0, 40.0, 23.0, 33.0, 55.0, 46.0, 44.0, 33.0, 42.0, 54.0, 54.0, 59.0, 35.0, 31.0, 20.0, 8.0, 72.0, 77.0, 48.0, 39.0, 34.0, 48.0, 48.0, 54.0, 19.0, 20.0, 56.0, 45.0, 89.0, 104.0, 44.0, 49.0, 17.0, 33.0, 66.0, 44.0, 83.0, 78.0, 71.0, 68.0, 48.0, 22.0, 11.0, 3.0, 47.0, 46.0, 26.0, 40.0, 41.0, 60.0, 71.0, 108.0, 63.0, 81.0, 36.0, 48.0, 25.0, 8.0, 89.0, 99.0, 28.0, 25.0, 63.0, 60.0, 20.0, 3.0, 40.0, 26.0, 64.0, 66.0, 66.0, 70.0, 30.0, 18.0, 22.0, 20.0, 64.0, 46.0, 61.0, 35.0, 43.0, 60.0, 8.0, 20.0, 53.0, 62.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6906616942712467, "mean_inference_ms": 1.2101625683690138, "mean_action_processing_ms": 0.13338849796915966, "mean_env_wait_ms": 0.8773535896870084, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 193.0, "episode_reward_min": 9.0, "episode_reward_mean": 84.51, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 108.0}, "policy_reward_mean": {"ppo": 42.255}, "hist_stats": {"episode_reward": [83.0, 98.0, 9.0, 39.0, 92.0, 87.0, 56.0, 9.0, 144.0, 153.0, 51.0, 50.0, 52.0, 156.0, 136.0, 45.0, 48.0, 59.0, 42.0, 95.0, 46.0, 117.0, 96.0, 93.0, 31.0, 44.0, 94.0, 53.0, 50.0, 45.0, 44.0, 84.0, 133.0, 173.0, 87.0, 92.0, 98.0, 133.0, 73.0, 122.0, 23.0, 62.0, 91.0, 80.0, 115.0, 25.0, 66.0, 110.0, 30.0, 145.0, 26.0, 109.0, 73.0, 101.0, 23.0, 102.0, 110.0, 60.0, 56.0, 101.0, 77.0, 96.0, 113.0, 66.0, 28.0, 149.0, 87.0, 82.0, 102.0, 39.0, 101.0, 193.0, 93.0, 50.0, 110.0, 161.0, 139.0, 70.0, 14.0, 93.0, 66.0, 101.0, 179.0, 144.0, 84.0, 33.0, 188.0, 53.0, 123.0, 23.0, 66.0, 130.0, 136.0, 48.0, 42.0, 110.0, 96.0, 103.0, 28.0, 115.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [45.0, 38.0, 36.0, 62.0, 3.0, 6.0, 22.0, 17.0, 56.0, 36.0, 35.0, 52.0, 24.0, 32.0, 6.0, 3.0, 62.0, 82.0, 77.0, 76.0, 24.0, 27.0, 23.0, 27.0, 27.0, 25.0, 77.0, 79.0, 57.0, 79.0, 22.0, 23.0, 29.0, 19.0, 35.0, 24.0, 31.0, 11.0, 46.0, 49.0, 20.0, 26.0, 51.0, 66.0, 57.0, 39.0, 50.0, 43.0, 8.0, 23.0, 13.0, 31.0, 37.0, 57.0, 31.0, 22.0, 34.0, 16.0, 15.0, 30.0, 22.0, 22.0, 38.0, 46.0, 71.0, 62.0, 89.0, 84.0, 46.0, 41.0, 45.0, 47.0, 34.0, 64.0, 68.0, 65.0, 39.0, 34.0, 60.0, 62.0, 8.0, 15.0, 40.0, 22.0, 41.0, 50.0, 42.0, 38.0, 63.0, 52.0, 13.0, 12.0, 23.0, 43.0, 47.0, 63.0, 11.0, 19.0, 74.0, 71.0, 17.0, 9.0, 54.0, 55.0, 37.0, 36.0, 47.0, 54.0, 11.0, 12.0, 53.0, 49.0, 62.0, 48.0, 20.0, 40.0, 23.0, 33.0, 55.0, 46.0, 44.0, 33.0, 42.0, 54.0, 54.0, 59.0, 35.0, 31.0, 20.0, 8.0, 72.0, 77.0, 48.0, 39.0, 34.0, 48.0, 48.0, 54.0, 19.0, 20.0, 56.0, 45.0, 89.0, 104.0, 44.0, 49.0, 17.0, 33.0, 66.0, 44.0, 83.0, 78.0, 71.0, 68.0, 48.0, 22.0, 11.0, 3.0, 47.0, 46.0, 26.0, 40.0, 41.0, 60.0, 71.0, 108.0, 63.0, 81.0, 36.0, 48.0, 25.0, 8.0, 89.0, 99.0, 28.0, 25.0, 63.0, 60.0, 20.0, 3.0, 40.0, 26.0, 64.0, 66.0, 66.0, 70.0, 30.0, 18.0, 22.0, 20.0, 64.0, 46.0, 61.0, 35.0, 43.0, 60.0, 8.0, 20.0, 53.0, 62.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6906616942712467, "mean_inference_ms": 1.2101625683690138, "mean_action_processing_ms": 0.13338849796915966, "mean_env_wait_ms": 0.8773535896870084, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 870400, "num_agent_steps_trained": 870400, "num_env_steps_sampled": 435200, "num_env_steps_trained": 435200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 435200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 870400, "timers": {"training_iteration_time_ms": 3703.091, "learn_time_ms": 1099.905, "learn_throughput": 11637.363, "synch_weights_time_ms": 11.648}, "counters": {"num_env_steps_sampled": 435200, "num_env_steps_trained": 435200, "num_agent_steps_sampled": 870400, "num_agent_steps_trained": 870400}, "done": false, "episodes_total": 1088, "training_iteration": 34, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-08", "timestamp": 1666580528, "time_this_iter_s": 3.8429672718048096, "time_total_s": 129.04787254333496, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 129.04787254333496, "timesteps_since_restore": 0, "iterations_since_restore": 34, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.316666666666666, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 19.6, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 51.76, "shaped_reward_min": 14, "shaped_reward_max": 86, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.54, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.34, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.17, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 5.74, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.11, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.8, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.42, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.05, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.99, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 0.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.96, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.77, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.66, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.76, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.56, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 1.48, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.29, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.15, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 9, "optimal_onion_potting_agent_0_mean": 4.11, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.8, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.11, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.8, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.1641532356165829e-11, "cur_lr": 0.0010000000474974513, "total_loss": -0.005998552311211824, "policy_loss": -0.005601278506219387, "vf_loss": 4.330240726470947, "vf_explained_var": 0.26162025332450867, "kl": 0.0014254730194807053, "entropy": 1.6605894565582275, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 448000, "num_env_steps_trained": 448000, "num_agent_steps_sampled": 896000, "num_agent_steps_trained": 896000}, "sampler_results": {"episode_reward_max": 203.0, "episode_reward_min": 14.0, "episode_reward_mean": 90.96, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 108.0}, "policy_reward_mean": {"ppo": 45.48}, "custom_metrics": {"sparse_reward_mean": 19.6, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 51.76, "shaped_reward_min": 14, "shaped_reward_max": 86, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.54, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.34, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.17, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 5.74, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.11, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.8, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.42, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.05, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.99, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 0.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.96, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.77, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.66, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.76, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.56, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 1.48, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.29, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.15, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 9, "optimal_onion_potting_agent_0_mean": 4.11, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.8, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.11, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.8, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [133.0, 173.0, 87.0, 92.0, 98.0, 133.0, 73.0, 122.0, 23.0, 62.0, 91.0, 80.0, 115.0, 25.0, 66.0, 110.0, 30.0, 145.0, 26.0, 109.0, 73.0, 101.0, 23.0, 102.0, 110.0, 60.0, 56.0, 101.0, 77.0, 96.0, 113.0, 66.0, 28.0, 149.0, 87.0, 82.0, 102.0, 39.0, 101.0, 193.0, 93.0, 50.0, 110.0, 161.0, 139.0, 70.0, 14.0, 93.0, 66.0, 101.0, 179.0, 144.0, 84.0, 33.0, 188.0, 53.0, 123.0, 23.0, 66.0, 130.0, 136.0, 48.0, 42.0, 110.0, 96.0, 103.0, 28.0, 115.0, 45.0, 86.0, 36.0, 88.0, 52.0, 155.0, 76.0, 47.0, 45.0, 23.0, 135.0, 150.0, 39.0, 23.0, 103.0, 67.0, 93.0, 122.0, 133.0, 82.0, 85.0, 119.0, 104.0, 90.0, 147.0, 87.0, 55.0, 96.0, 182.0, 31.0, 147.0, 203.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [71.0, 62.0, 89.0, 84.0, 46.0, 41.0, 45.0, 47.0, 34.0, 64.0, 68.0, 65.0, 39.0, 34.0, 60.0, 62.0, 8.0, 15.0, 40.0, 22.0, 41.0, 50.0, 42.0, 38.0, 63.0, 52.0, 13.0, 12.0, 23.0, 43.0, 47.0, 63.0, 11.0, 19.0, 74.0, 71.0, 17.0, 9.0, 54.0, 55.0, 37.0, 36.0, 47.0, 54.0, 11.0, 12.0, 53.0, 49.0, 62.0, 48.0, 20.0, 40.0, 23.0, 33.0, 55.0, 46.0, 44.0, 33.0, 42.0, 54.0, 54.0, 59.0, 35.0, 31.0, 20.0, 8.0, 72.0, 77.0, 48.0, 39.0, 34.0, 48.0, 48.0, 54.0, 19.0, 20.0, 56.0, 45.0, 89.0, 104.0, 44.0, 49.0, 17.0, 33.0, 66.0, 44.0, 83.0, 78.0, 71.0, 68.0, 48.0, 22.0, 11.0, 3.0, 47.0, 46.0, 26.0, 40.0, 41.0, 60.0, 71.0, 108.0, 63.0, 81.0, 36.0, 48.0, 25.0, 8.0, 89.0, 99.0, 28.0, 25.0, 63.0, 60.0, 20.0, 3.0, 40.0, 26.0, 64.0, 66.0, 66.0, 70.0, 30.0, 18.0, 22.0, 20.0, 64.0, 46.0, 61.0, 35.0, 43.0, 60.0, 8.0, 20.0, 53.0, 62.0, 9.0, 36.0, 57.0, 29.0, 14.0, 22.0, 45.0, 43.0, 28.0, 24.0, 78.0, 77.0, 36.0, 40.0, 17.0, 30.0, 29.0, 16.0, 17.0, 6.0, 75.0, 60.0, 71.0, 79.0, 14.0, 25.0, 12.0, 11.0, 57.0, 46.0, 36.0, 31.0, 44.0, 49.0, 61.0, 61.0, 66.0, 67.0, 37.0, 45.0, 46.0, 39.0, 48.0, 71.0, 40.0, 64.0, 37.0, 53.0, 90.0, 57.0, 50.0, 37.0, 28.0, 27.0, 44.0, 52.0, 83.0, 99.0, 14.0, 17.0, 73.0, 74.0, 95.0, 108.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6906168886604556, "mean_inference_ms": 1.2098430190540526, "mean_action_processing_ms": 0.13335690940844777, "mean_env_wait_ms": 0.8774501217181921, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 203.0, "episode_reward_min": 14.0, "episode_reward_mean": 90.96, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 108.0}, "policy_reward_mean": {"ppo": 45.48}, "hist_stats": {"episode_reward": [133.0, 173.0, 87.0, 92.0, 98.0, 133.0, 73.0, 122.0, 23.0, 62.0, 91.0, 80.0, 115.0, 25.0, 66.0, 110.0, 30.0, 145.0, 26.0, 109.0, 73.0, 101.0, 23.0, 102.0, 110.0, 60.0, 56.0, 101.0, 77.0, 96.0, 113.0, 66.0, 28.0, 149.0, 87.0, 82.0, 102.0, 39.0, 101.0, 193.0, 93.0, 50.0, 110.0, 161.0, 139.0, 70.0, 14.0, 93.0, 66.0, 101.0, 179.0, 144.0, 84.0, 33.0, 188.0, 53.0, 123.0, 23.0, 66.0, 130.0, 136.0, 48.0, 42.0, 110.0, 96.0, 103.0, 28.0, 115.0, 45.0, 86.0, 36.0, 88.0, 52.0, 155.0, 76.0, 47.0, 45.0, 23.0, 135.0, 150.0, 39.0, 23.0, 103.0, 67.0, 93.0, 122.0, 133.0, 82.0, 85.0, 119.0, 104.0, 90.0, 147.0, 87.0, 55.0, 96.0, 182.0, 31.0, 147.0, 203.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [71.0, 62.0, 89.0, 84.0, 46.0, 41.0, 45.0, 47.0, 34.0, 64.0, 68.0, 65.0, 39.0, 34.0, 60.0, 62.0, 8.0, 15.0, 40.0, 22.0, 41.0, 50.0, 42.0, 38.0, 63.0, 52.0, 13.0, 12.0, 23.0, 43.0, 47.0, 63.0, 11.0, 19.0, 74.0, 71.0, 17.0, 9.0, 54.0, 55.0, 37.0, 36.0, 47.0, 54.0, 11.0, 12.0, 53.0, 49.0, 62.0, 48.0, 20.0, 40.0, 23.0, 33.0, 55.0, 46.0, 44.0, 33.0, 42.0, 54.0, 54.0, 59.0, 35.0, 31.0, 20.0, 8.0, 72.0, 77.0, 48.0, 39.0, 34.0, 48.0, 48.0, 54.0, 19.0, 20.0, 56.0, 45.0, 89.0, 104.0, 44.0, 49.0, 17.0, 33.0, 66.0, 44.0, 83.0, 78.0, 71.0, 68.0, 48.0, 22.0, 11.0, 3.0, 47.0, 46.0, 26.0, 40.0, 41.0, 60.0, 71.0, 108.0, 63.0, 81.0, 36.0, 48.0, 25.0, 8.0, 89.0, 99.0, 28.0, 25.0, 63.0, 60.0, 20.0, 3.0, 40.0, 26.0, 64.0, 66.0, 66.0, 70.0, 30.0, 18.0, 22.0, 20.0, 64.0, 46.0, 61.0, 35.0, 43.0, 60.0, 8.0, 20.0, 53.0, 62.0, 9.0, 36.0, 57.0, 29.0, 14.0, 22.0, 45.0, 43.0, 28.0, 24.0, 78.0, 77.0, 36.0, 40.0, 17.0, 30.0, 29.0, 16.0, 17.0, 6.0, 75.0, 60.0, 71.0, 79.0, 14.0, 25.0, 12.0, 11.0, 57.0, 46.0, 36.0, 31.0, 44.0, 49.0, 61.0, 61.0, 66.0, 67.0, 37.0, 45.0, 46.0, 39.0, 48.0, 71.0, 40.0, 64.0, 37.0, 53.0, 90.0, 57.0, 50.0, 37.0, 28.0, 27.0, 44.0, 52.0, 83.0, 99.0, 14.0, 17.0, 73.0, 74.0, 95.0, 108.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6906168886604556, "mean_inference_ms": 1.2098430190540526, "mean_action_processing_ms": 0.13335690940844777, "mean_env_wait_ms": 0.8774501217181921, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 896000, "num_agent_steps_trained": 896000, "num_env_steps_sampled": 448000, "num_env_steps_trained": 448000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 448000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 896000, "timers": {"training_iteration_time_ms": 3694.511, "learn_time_ms": 1092.395, "learn_throughput": 11717.368, "synch_weights_time_ms": 11.57}, "counters": {"num_env_steps_sampled": 448000, "num_env_steps_trained": 448000, "num_agent_steps_sampled": 896000, "num_agent_steps_trained": 896000}, "done": false, "episodes_total": 1120, "training_iteration": 35, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-12", "timestamp": 1666580532, "time_this_iter_s": 3.6886072158813477, "time_total_s": 132.7364797592163, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 132.7364797592163, "timesteps_since_restore": 0, "iterations_since_restore": 35, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.259999999999998, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 20.2, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 53.37, "shaped_reward_min": 9, "shaped_reward_max": 86, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.23, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 7.37, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.67, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 5.72, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.44, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.69, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.01, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.11, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.04, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 0.97, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.56, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.71, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.82, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.75, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 1.5, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.37, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.18, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.22, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 9, "optimal_onion_potting_agent_0_mean": 4.44, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.69, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.44, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.69, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 5.8207661780829145e-12, "cur_lr": 0.0010000000474974513, "total_loss": -0.006255907937884331, "policy_loss": -0.005860620643943548, "vf_loss": 4.331794738769531, "vf_explained_var": 0.3150397837162018, "kl": 0.0014443796826526523, "entropy": 1.656929612159729, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 460800, "num_env_steps_trained": 460800, "num_agent_steps_sampled": 921600, "num_agent_steps_trained": 921600}, "sampler_results": {"episode_reward_max": 203.0, "episode_reward_min": 9.0, "episode_reward_mean": 93.77, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 108.0}, "policy_reward_mean": {"ppo": 46.885}, "custom_metrics": {"sparse_reward_mean": 20.2, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 53.37, "shaped_reward_min": 9, "shaped_reward_max": 86, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.23, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 7.37, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.67, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 5.72, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.44, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.69, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.01, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.11, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.04, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 0.97, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.56, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.71, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.82, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.75, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 1.5, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.37, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.18, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.22, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 9, "optimal_onion_potting_agent_0_mean": 4.44, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.69, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.44, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.69, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [28.0, 149.0, 87.0, 82.0, 102.0, 39.0, 101.0, 193.0, 93.0, 50.0, 110.0, 161.0, 139.0, 70.0, 14.0, 93.0, 66.0, 101.0, 179.0, 144.0, 84.0, 33.0, 188.0, 53.0, 123.0, 23.0, 66.0, 130.0, 136.0, 48.0, 42.0, 110.0, 96.0, 103.0, 28.0, 115.0, 45.0, 86.0, 36.0, 88.0, 52.0, 155.0, 76.0, 47.0, 45.0, 23.0, 135.0, 150.0, 39.0, 23.0, 103.0, 67.0, 93.0, 122.0, 133.0, 82.0, 85.0, 119.0, 104.0, 90.0, 147.0, 87.0, 55.0, 96.0, 182.0, 31.0, 147.0, 203.0, 152.0, 125.0, 121.0, 101.0, 88.0, 90.0, 153.0, 142.0, 79.0, 17.0, 147.0, 27.0, 127.0, 125.0, 94.0, 162.0, 121.0, 80.0, 23.0, 101.0, 50.0, 113.0, 9.0, 122.0, 46.0, 110.0, 85.0, 47.0, 150.0, 113.0, 55.0, 77.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [20.0, 8.0, 72.0, 77.0, 48.0, 39.0, 34.0, 48.0, 48.0, 54.0, 19.0, 20.0, 56.0, 45.0, 89.0, 104.0, 44.0, 49.0, 17.0, 33.0, 66.0, 44.0, 83.0, 78.0, 71.0, 68.0, 48.0, 22.0, 11.0, 3.0, 47.0, 46.0, 26.0, 40.0, 41.0, 60.0, 71.0, 108.0, 63.0, 81.0, 36.0, 48.0, 25.0, 8.0, 89.0, 99.0, 28.0, 25.0, 63.0, 60.0, 20.0, 3.0, 40.0, 26.0, 64.0, 66.0, 66.0, 70.0, 30.0, 18.0, 22.0, 20.0, 64.0, 46.0, 61.0, 35.0, 43.0, 60.0, 8.0, 20.0, 53.0, 62.0, 9.0, 36.0, 57.0, 29.0, 14.0, 22.0, 45.0, 43.0, 28.0, 24.0, 78.0, 77.0, 36.0, 40.0, 17.0, 30.0, 29.0, 16.0, 17.0, 6.0, 75.0, 60.0, 71.0, 79.0, 14.0, 25.0, 12.0, 11.0, 57.0, 46.0, 36.0, 31.0, 44.0, 49.0, 61.0, 61.0, 66.0, 67.0, 37.0, 45.0, 46.0, 39.0, 48.0, 71.0, 40.0, 64.0, 37.0, 53.0, 90.0, 57.0, 50.0, 37.0, 28.0, 27.0, 44.0, 52.0, 83.0, 99.0, 14.0, 17.0, 73.0, 74.0, 95.0, 108.0, 77.0, 75.0, 68.0, 57.0, 68.0, 53.0, 55.0, 46.0, 54.0, 34.0, 42.0, 48.0, 79.0, 74.0, 61.0, 81.0, 45.0, 34.0, 8.0, 9.0, 77.0, 70.0, 13.0, 14.0, 68.0, 59.0, 56.0, 69.0, 50.0, 44.0, 77.0, 85.0, 55.0, 66.0, 40.0, 40.0, 17.0, 6.0, 44.0, 57.0, 16.0, 34.0, 48.0, 65.0, 3.0, 6.0, 60.0, 62.0, 17.0, 29.0, 54.0, 56.0, 40.0, 45.0, 20.0, 27.0, 94.0, 56.0, 68.0, 45.0, 26.0, 29.0, 37.0, 40.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6906281044431333, "mean_inference_ms": 1.2096052422422812, "mean_action_processing_ms": 0.13334734246729163, "mean_env_wait_ms": 0.8776488430576501, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 203.0, "episode_reward_min": 9.0, "episode_reward_mean": 93.77, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 108.0}, "policy_reward_mean": {"ppo": 46.885}, "hist_stats": {"episode_reward": [28.0, 149.0, 87.0, 82.0, 102.0, 39.0, 101.0, 193.0, 93.0, 50.0, 110.0, 161.0, 139.0, 70.0, 14.0, 93.0, 66.0, 101.0, 179.0, 144.0, 84.0, 33.0, 188.0, 53.0, 123.0, 23.0, 66.0, 130.0, 136.0, 48.0, 42.0, 110.0, 96.0, 103.0, 28.0, 115.0, 45.0, 86.0, 36.0, 88.0, 52.0, 155.0, 76.0, 47.0, 45.0, 23.0, 135.0, 150.0, 39.0, 23.0, 103.0, 67.0, 93.0, 122.0, 133.0, 82.0, 85.0, 119.0, 104.0, 90.0, 147.0, 87.0, 55.0, 96.0, 182.0, 31.0, 147.0, 203.0, 152.0, 125.0, 121.0, 101.0, 88.0, 90.0, 153.0, 142.0, 79.0, 17.0, 147.0, 27.0, 127.0, 125.0, 94.0, 162.0, 121.0, 80.0, 23.0, 101.0, 50.0, 113.0, 9.0, 122.0, 46.0, 110.0, 85.0, 47.0, 150.0, 113.0, 55.0, 77.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [20.0, 8.0, 72.0, 77.0, 48.0, 39.0, 34.0, 48.0, 48.0, 54.0, 19.0, 20.0, 56.0, 45.0, 89.0, 104.0, 44.0, 49.0, 17.0, 33.0, 66.0, 44.0, 83.0, 78.0, 71.0, 68.0, 48.0, 22.0, 11.0, 3.0, 47.0, 46.0, 26.0, 40.0, 41.0, 60.0, 71.0, 108.0, 63.0, 81.0, 36.0, 48.0, 25.0, 8.0, 89.0, 99.0, 28.0, 25.0, 63.0, 60.0, 20.0, 3.0, 40.0, 26.0, 64.0, 66.0, 66.0, 70.0, 30.0, 18.0, 22.0, 20.0, 64.0, 46.0, 61.0, 35.0, 43.0, 60.0, 8.0, 20.0, 53.0, 62.0, 9.0, 36.0, 57.0, 29.0, 14.0, 22.0, 45.0, 43.0, 28.0, 24.0, 78.0, 77.0, 36.0, 40.0, 17.0, 30.0, 29.0, 16.0, 17.0, 6.0, 75.0, 60.0, 71.0, 79.0, 14.0, 25.0, 12.0, 11.0, 57.0, 46.0, 36.0, 31.0, 44.0, 49.0, 61.0, 61.0, 66.0, 67.0, 37.0, 45.0, 46.0, 39.0, 48.0, 71.0, 40.0, 64.0, 37.0, 53.0, 90.0, 57.0, 50.0, 37.0, 28.0, 27.0, 44.0, 52.0, 83.0, 99.0, 14.0, 17.0, 73.0, 74.0, 95.0, 108.0, 77.0, 75.0, 68.0, 57.0, 68.0, 53.0, 55.0, 46.0, 54.0, 34.0, 42.0, 48.0, 79.0, 74.0, 61.0, 81.0, 45.0, 34.0, 8.0, 9.0, 77.0, 70.0, 13.0, 14.0, 68.0, 59.0, 56.0, 69.0, 50.0, 44.0, 77.0, 85.0, 55.0, 66.0, 40.0, 40.0, 17.0, 6.0, 44.0, 57.0, 16.0, 34.0, 48.0, 65.0, 3.0, 6.0, 60.0, 62.0, 17.0, 29.0, 54.0, 56.0, 40.0, 45.0, 20.0, 27.0, 94.0, 56.0, 68.0, 45.0, 26.0, 29.0, 37.0, 40.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6906281044431333, "mean_inference_ms": 1.2096052422422812, "mean_action_processing_ms": 0.13334734246729163, "mean_env_wait_ms": 0.8776488430576501, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 921600, "num_agent_steps_trained": 921600, "num_env_steps_sampled": 460800, "num_env_steps_trained": 460800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 460800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 921600, "timers": {"training_iteration_time_ms": 3690.454, "learn_time_ms": 1086.199, "learn_throughput": 11784.209, "synch_weights_time_ms": 11.013}, "counters": {"num_env_steps_sampled": 460800, "num_env_steps_trained": 460800, "num_agent_steps_sampled": 921600, "num_agent_steps_trained": 921600}, "done": false, "episodes_total": 1152, "training_iteration": 36, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-16", "timestamp": 1666580536, "time_this_iter_s": 3.6997194290161133, "time_total_s": 136.43619918823242, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 136.43619918823242, "timesteps_since_restore": 0, "iterations_since_restore": 36, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.16, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 19.4, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 52.6, "shaped_reward_min": 9, "shaped_reward_max": 90, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.46, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 7.62, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 5.79, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 5.92, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 13, "onion_drop_agent_1_mean": 2.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.4, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 13, "useful_onion_drop_agent_1_mean": 1.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.33, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.64, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 3.92, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.21, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.09, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 0.99, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 10, "useful_dish_drop_agent_0_mean": 0.53, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.79, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.74, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.7, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 1.56, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.34, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.2, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 9, "optimal_onion_potting_agent_0_mean": 4.33, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.64, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.33, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.64, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.9103830890414573e-12, "cur_lr": 0.0010000000474974513, "total_loss": -0.002857581479474902, "policy_loss": -0.0024382565170526505, "vf_loss": 4.072160720825195, "vf_explained_var": 0.3045051097869873, "kl": 0.0014278730377554893, "entropy": 1.6530787944793701, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 473600, "num_env_steps_trained": 473600, "num_agent_steps_sampled": 947200, "num_agent_steps_trained": 947200}, "sampler_results": {"episode_reward_max": 203.0, "episode_reward_min": 9.0, "episode_reward_mean": 91.4, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 108.0}, "policy_reward_mean": {"ppo": 45.7}, "custom_metrics": {"sparse_reward_mean": 19.4, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 52.6, "shaped_reward_min": 9, "shaped_reward_max": 90, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.46, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 7.62, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 5.79, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 5.92, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 13, "onion_drop_agent_1_mean": 2.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.4, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 13, "useful_onion_drop_agent_1_mean": 1.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.33, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.64, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 3.92, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.21, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.09, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 0.99, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 10, "useful_dish_drop_agent_0_mean": 0.53, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.79, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.74, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.7, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 1.56, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.34, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.2, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 9, "optimal_onion_potting_agent_0_mean": 4.33, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.64, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.33, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.64, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [96.0, 103.0, 28.0, 115.0, 45.0, 86.0, 36.0, 88.0, 52.0, 155.0, 76.0, 47.0, 45.0, 23.0, 135.0, 150.0, 39.0, 23.0, 103.0, 67.0, 93.0, 122.0, 133.0, 82.0, 85.0, 119.0, 104.0, 90.0, 147.0, 87.0, 55.0, 96.0, 182.0, 31.0, 147.0, 203.0, 152.0, 125.0, 121.0, 101.0, 88.0, 90.0, 153.0, 142.0, 79.0, 17.0, 147.0, 27.0, 127.0, 125.0, 94.0, 162.0, 121.0, 80.0, 23.0, 101.0, 50.0, 113.0, 9.0, 122.0, 46.0, 110.0, 85.0, 47.0, 150.0, 113.0, 55.0, 77.0, 9.0, 47.0, 150.0, 34.0, 77.0, 47.0, 90.0, 90.0, 161.0, 128.0, 79.0, 70.0, 79.0, 50.0, 118.0, 109.0, 119.0, 84.0, 136.0, 70.0, 42.0, 110.0, 122.0, 77.0, 134.0, 12.0, 61.0, 74.0, 34.0, 155.0, 158.0, 74.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [61.0, 35.0, 43.0, 60.0, 8.0, 20.0, 53.0, 62.0, 9.0, 36.0, 57.0, 29.0, 14.0, 22.0, 45.0, 43.0, 28.0, 24.0, 78.0, 77.0, 36.0, 40.0, 17.0, 30.0, 29.0, 16.0, 17.0, 6.0, 75.0, 60.0, 71.0, 79.0, 14.0, 25.0, 12.0, 11.0, 57.0, 46.0, 36.0, 31.0, 44.0, 49.0, 61.0, 61.0, 66.0, 67.0, 37.0, 45.0, 46.0, 39.0, 48.0, 71.0, 40.0, 64.0, 37.0, 53.0, 90.0, 57.0, 50.0, 37.0, 28.0, 27.0, 44.0, 52.0, 83.0, 99.0, 14.0, 17.0, 73.0, 74.0, 95.0, 108.0, 77.0, 75.0, 68.0, 57.0, 68.0, 53.0, 55.0, 46.0, 54.0, 34.0, 42.0, 48.0, 79.0, 74.0, 61.0, 81.0, 45.0, 34.0, 8.0, 9.0, 77.0, 70.0, 13.0, 14.0, 68.0, 59.0, 56.0, 69.0, 50.0, 44.0, 77.0, 85.0, 55.0, 66.0, 40.0, 40.0, 17.0, 6.0, 44.0, 57.0, 16.0, 34.0, 48.0, 65.0, 3.0, 6.0, 60.0, 62.0, 17.0, 29.0, 54.0, 56.0, 40.0, 45.0, 20.0, 27.0, 94.0, 56.0, 68.0, 45.0, 26.0, 29.0, 37.0, 40.0, 3.0, 6.0, 30.0, 17.0, 73.0, 77.0, 16.0, 18.0, 43.0, 34.0, 17.0, 30.0, 53.0, 37.0, 70.0, 20.0, 79.0, 82.0, 62.0, 66.0, 41.0, 38.0, 31.0, 39.0, 42.0, 37.0, 19.0, 31.0, 59.0, 59.0, 50.0, 59.0, 65.0, 54.0, 39.0, 45.0, 71.0, 65.0, 26.0, 44.0, 19.0, 23.0, 56.0, 54.0, 56.0, 66.0, 42.0, 35.0, 65.0, 69.0, 9.0, 3.0, 34.0, 27.0, 32.0, 42.0, 17.0, 17.0, 76.0, 79.0, 86.0, 72.0, 37.0, 37.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6906373736263695, "mean_inference_ms": 1.209409405773758, "mean_action_processing_ms": 0.13332850711565247, "mean_env_wait_ms": 0.877647120825507, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 203.0, "episode_reward_min": 9.0, "episode_reward_mean": 91.4, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 108.0}, "policy_reward_mean": {"ppo": 45.7}, "hist_stats": {"episode_reward": [96.0, 103.0, 28.0, 115.0, 45.0, 86.0, 36.0, 88.0, 52.0, 155.0, 76.0, 47.0, 45.0, 23.0, 135.0, 150.0, 39.0, 23.0, 103.0, 67.0, 93.0, 122.0, 133.0, 82.0, 85.0, 119.0, 104.0, 90.0, 147.0, 87.0, 55.0, 96.0, 182.0, 31.0, 147.0, 203.0, 152.0, 125.0, 121.0, 101.0, 88.0, 90.0, 153.0, 142.0, 79.0, 17.0, 147.0, 27.0, 127.0, 125.0, 94.0, 162.0, 121.0, 80.0, 23.0, 101.0, 50.0, 113.0, 9.0, 122.0, 46.0, 110.0, 85.0, 47.0, 150.0, 113.0, 55.0, 77.0, 9.0, 47.0, 150.0, 34.0, 77.0, 47.0, 90.0, 90.0, 161.0, 128.0, 79.0, 70.0, 79.0, 50.0, 118.0, 109.0, 119.0, 84.0, 136.0, 70.0, 42.0, 110.0, 122.0, 77.0, 134.0, 12.0, 61.0, 74.0, 34.0, 155.0, 158.0, 74.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [61.0, 35.0, 43.0, 60.0, 8.0, 20.0, 53.0, 62.0, 9.0, 36.0, 57.0, 29.0, 14.0, 22.0, 45.0, 43.0, 28.0, 24.0, 78.0, 77.0, 36.0, 40.0, 17.0, 30.0, 29.0, 16.0, 17.0, 6.0, 75.0, 60.0, 71.0, 79.0, 14.0, 25.0, 12.0, 11.0, 57.0, 46.0, 36.0, 31.0, 44.0, 49.0, 61.0, 61.0, 66.0, 67.0, 37.0, 45.0, 46.0, 39.0, 48.0, 71.0, 40.0, 64.0, 37.0, 53.0, 90.0, 57.0, 50.0, 37.0, 28.0, 27.0, 44.0, 52.0, 83.0, 99.0, 14.0, 17.0, 73.0, 74.0, 95.0, 108.0, 77.0, 75.0, 68.0, 57.0, 68.0, 53.0, 55.0, 46.0, 54.0, 34.0, 42.0, 48.0, 79.0, 74.0, 61.0, 81.0, 45.0, 34.0, 8.0, 9.0, 77.0, 70.0, 13.0, 14.0, 68.0, 59.0, 56.0, 69.0, 50.0, 44.0, 77.0, 85.0, 55.0, 66.0, 40.0, 40.0, 17.0, 6.0, 44.0, 57.0, 16.0, 34.0, 48.0, 65.0, 3.0, 6.0, 60.0, 62.0, 17.0, 29.0, 54.0, 56.0, 40.0, 45.0, 20.0, 27.0, 94.0, 56.0, 68.0, 45.0, 26.0, 29.0, 37.0, 40.0, 3.0, 6.0, 30.0, 17.0, 73.0, 77.0, 16.0, 18.0, 43.0, 34.0, 17.0, 30.0, 53.0, 37.0, 70.0, 20.0, 79.0, 82.0, 62.0, 66.0, 41.0, 38.0, 31.0, 39.0, 42.0, 37.0, 19.0, 31.0, 59.0, 59.0, 50.0, 59.0, 65.0, 54.0, 39.0, 45.0, 71.0, 65.0, 26.0, 44.0, 19.0, 23.0, 56.0, 54.0, 56.0, 66.0, 42.0, 35.0, 65.0, 69.0, 9.0, 3.0, 34.0, 27.0, 32.0, 42.0, 17.0, 17.0, 76.0, 79.0, 86.0, 72.0, 37.0, 37.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6906373736263695, "mean_inference_ms": 1.209409405773758, "mean_action_processing_ms": 0.13332850711565247, "mean_env_wait_ms": 0.877647120825507, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 947200, "num_agent_steps_trained": 947200, "num_env_steps_sampled": 473600, "num_env_steps_trained": 473600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 473600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 947200, "timers": {"training_iteration_time_ms": 3698.67, "learn_time_ms": 1096.287, "learn_throughput": 11675.779, "synch_weights_time_ms": 11.456}, "counters": {"num_env_steps_sampled": 473600, "num_env_steps_trained": 473600, "num_agent_steps_sampled": 947200, "num_agent_steps_trained": 947200}, "done": false, "episodes_total": 1184, "training_iteration": 37, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-20", "timestamp": 1666580540, "time_this_iter_s": 3.7627384662628174, "time_total_s": 140.19893765449524, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 140.19893765449524, "timesteps_since_restore": 0, "iterations_since_restore": 37, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.2, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 21.2, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 52.95, "shaped_reward_min": 9, "shaped_reward_max": 90, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.65, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 7.75, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 5.77, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 6.03, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 2.83, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 13, "onion_drop_agent_1_mean": 2.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 13, "useful_onion_drop_agent_1_mean": 1.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.37, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.75, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 13, "dish_pickup_agent_0_mean": 3.85, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.08, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.08, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.0, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 10, "useful_dish_drop_agent_0_mean": 0.57, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.76, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.59, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.6, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 1.47, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 1.46, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.94, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.98, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 4.37, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.75, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 13, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.37, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.75, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 13, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.4551915445207286e-12, "cur_lr": 0.0010000000474974513, "total_loss": -0.005668387282639742, "policy_loss": -0.005276101641356945, "vf_loss": 4.287709712982178, "vf_explained_var": 0.34124916791915894, "kl": 0.0015137059381231666, "entropy": 1.6421082019805908, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 486400, "num_env_steps_trained": 486400, "num_agent_steps_sampled": 972800, "num_agent_steps_trained": 972800}, "sampler_results": {"episode_reward_max": 242.0, "episode_reward_min": 9.0, "episode_reward_mean": 95.35, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 132.0}, "policy_reward_mean": {"ppo": 47.675}, "custom_metrics": {"sparse_reward_mean": 21.2, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 52.95, "shaped_reward_min": 9, "shaped_reward_max": 90, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.65, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 7.75, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 5.77, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 6.03, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 2.83, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 13, "onion_drop_agent_1_mean": 2.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 13, "useful_onion_drop_agent_1_mean": 1.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.37, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.75, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 13, "dish_pickup_agent_0_mean": 3.85, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.08, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.08, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.0, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 10, "useful_dish_drop_agent_0_mean": 0.57, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.76, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.59, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.6, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 1.47, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 1.46, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.94, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.98, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 4.37, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.75, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 13, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.37, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.75, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 13, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [182.0, 31.0, 147.0, 203.0, 152.0, 125.0, 121.0, 101.0, 88.0, 90.0, 153.0, 142.0, 79.0, 17.0, 147.0, 27.0, 127.0, 125.0, 94.0, 162.0, 121.0, 80.0, 23.0, 101.0, 50.0, 113.0, 9.0, 122.0, 46.0, 110.0, 85.0, 47.0, 150.0, 113.0, 55.0, 77.0, 9.0, 47.0, 150.0, 34.0, 77.0, 47.0, 90.0, 90.0, 161.0, 128.0, 79.0, 70.0, 79.0, 50.0, 118.0, 109.0, 119.0, 84.0, 136.0, 70.0, 42.0, 110.0, 122.0, 77.0, 134.0, 12.0, 61.0, 74.0, 34.0, 155.0, 158.0, 74.0, 75.0, 115.0, 114.0, 90.0, 87.0, 163.0, 39.0, 242.0, 101.0, 144.0, 69.0, 88.0, 50.0, 96.0, 31.0, 88.0, 179.0, 167.0, 90.0, 103.0, 107.0, 90.0, 9.0, 85.0, 82.0, 53.0, 9.0, 147.0, 139.0, 99.0, 25.0, 144.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [83.0, 99.0, 14.0, 17.0, 73.0, 74.0, 95.0, 108.0, 77.0, 75.0, 68.0, 57.0, 68.0, 53.0, 55.0, 46.0, 54.0, 34.0, 42.0, 48.0, 79.0, 74.0, 61.0, 81.0, 45.0, 34.0, 8.0, 9.0, 77.0, 70.0, 13.0, 14.0, 68.0, 59.0, 56.0, 69.0, 50.0, 44.0, 77.0, 85.0, 55.0, 66.0, 40.0, 40.0, 17.0, 6.0, 44.0, 57.0, 16.0, 34.0, 48.0, 65.0, 3.0, 6.0, 60.0, 62.0, 17.0, 29.0, 54.0, 56.0, 40.0, 45.0, 20.0, 27.0, 94.0, 56.0, 68.0, 45.0, 26.0, 29.0, 37.0, 40.0, 3.0, 6.0, 30.0, 17.0, 73.0, 77.0, 16.0, 18.0, 43.0, 34.0, 17.0, 30.0, 53.0, 37.0, 70.0, 20.0, 79.0, 82.0, 62.0, 66.0, 41.0, 38.0, 31.0, 39.0, 42.0, 37.0, 19.0, 31.0, 59.0, 59.0, 50.0, 59.0, 65.0, 54.0, 39.0, 45.0, 71.0, 65.0, 26.0, 44.0, 19.0, 23.0, 56.0, 54.0, 56.0, 66.0, 42.0, 35.0, 65.0, 69.0, 9.0, 3.0, 34.0, 27.0, 32.0, 42.0, 17.0, 17.0, 76.0, 79.0, 86.0, 72.0, 37.0, 37.0, 31.0, 44.0, 61.0, 54.0, 65.0, 49.0, 51.0, 39.0, 45.0, 42.0, 74.0, 89.0, 22.0, 17.0, 132.0, 110.0, 49.0, 52.0, 68.0, 76.0, 39.0, 30.0, 36.0, 52.0, 24.0, 26.0, 42.0, 54.0, 23.0, 8.0, 54.0, 34.0, 82.0, 97.0, 88.0, 79.0, 43.0, 47.0, 42.0, 61.0, 50.0, 57.0, 34.0, 56.0, 9.0, 0.0, 33.0, 52.0, 48.0, 34.0, 20.0, 33.0, 6.0, 3.0, 61.0, 86.0, 63.0, 76.0, 52.0, 47.0, 20.0, 5.0, 73.0, 71.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6905464855961168, "mean_inference_ms": 1.209148072408414, "mean_action_processing_ms": 0.1333054417881031, "mean_env_wait_ms": 0.8775259558875729, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 242.0, "episode_reward_min": 9.0, "episode_reward_mean": 95.35, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 132.0}, "policy_reward_mean": {"ppo": 47.675}, "hist_stats": {"episode_reward": [182.0, 31.0, 147.0, 203.0, 152.0, 125.0, 121.0, 101.0, 88.0, 90.0, 153.0, 142.0, 79.0, 17.0, 147.0, 27.0, 127.0, 125.0, 94.0, 162.0, 121.0, 80.0, 23.0, 101.0, 50.0, 113.0, 9.0, 122.0, 46.0, 110.0, 85.0, 47.0, 150.0, 113.0, 55.0, 77.0, 9.0, 47.0, 150.0, 34.0, 77.0, 47.0, 90.0, 90.0, 161.0, 128.0, 79.0, 70.0, 79.0, 50.0, 118.0, 109.0, 119.0, 84.0, 136.0, 70.0, 42.0, 110.0, 122.0, 77.0, 134.0, 12.0, 61.0, 74.0, 34.0, 155.0, 158.0, 74.0, 75.0, 115.0, 114.0, 90.0, 87.0, 163.0, 39.0, 242.0, 101.0, 144.0, 69.0, 88.0, 50.0, 96.0, 31.0, 88.0, 179.0, 167.0, 90.0, 103.0, 107.0, 90.0, 9.0, 85.0, 82.0, 53.0, 9.0, 147.0, 139.0, 99.0, 25.0, 144.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [83.0, 99.0, 14.0, 17.0, 73.0, 74.0, 95.0, 108.0, 77.0, 75.0, 68.0, 57.0, 68.0, 53.0, 55.0, 46.0, 54.0, 34.0, 42.0, 48.0, 79.0, 74.0, 61.0, 81.0, 45.0, 34.0, 8.0, 9.0, 77.0, 70.0, 13.0, 14.0, 68.0, 59.0, 56.0, 69.0, 50.0, 44.0, 77.0, 85.0, 55.0, 66.0, 40.0, 40.0, 17.0, 6.0, 44.0, 57.0, 16.0, 34.0, 48.0, 65.0, 3.0, 6.0, 60.0, 62.0, 17.0, 29.0, 54.0, 56.0, 40.0, 45.0, 20.0, 27.0, 94.0, 56.0, 68.0, 45.0, 26.0, 29.0, 37.0, 40.0, 3.0, 6.0, 30.0, 17.0, 73.0, 77.0, 16.0, 18.0, 43.0, 34.0, 17.0, 30.0, 53.0, 37.0, 70.0, 20.0, 79.0, 82.0, 62.0, 66.0, 41.0, 38.0, 31.0, 39.0, 42.0, 37.0, 19.0, 31.0, 59.0, 59.0, 50.0, 59.0, 65.0, 54.0, 39.0, 45.0, 71.0, 65.0, 26.0, 44.0, 19.0, 23.0, 56.0, 54.0, 56.0, 66.0, 42.0, 35.0, 65.0, 69.0, 9.0, 3.0, 34.0, 27.0, 32.0, 42.0, 17.0, 17.0, 76.0, 79.0, 86.0, 72.0, 37.0, 37.0, 31.0, 44.0, 61.0, 54.0, 65.0, 49.0, 51.0, 39.0, 45.0, 42.0, 74.0, 89.0, 22.0, 17.0, 132.0, 110.0, 49.0, 52.0, 68.0, 76.0, 39.0, 30.0, 36.0, 52.0, 24.0, 26.0, 42.0, 54.0, 23.0, 8.0, 54.0, 34.0, 82.0, 97.0, 88.0, 79.0, 43.0, 47.0, 42.0, 61.0, 50.0, 57.0, 34.0, 56.0, 9.0, 0.0, 33.0, 52.0, 48.0, 34.0, 20.0, 33.0, 6.0, 3.0, 61.0, 86.0, 63.0, 76.0, 52.0, 47.0, 20.0, 5.0, 73.0, 71.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6905464855961168, "mean_inference_ms": 1.209148072408414, "mean_action_processing_ms": 0.1333054417881031, "mean_env_wait_ms": 0.8775259558875729, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 972800, "num_agent_steps_trained": 972800, "num_env_steps_sampled": 486400, "num_env_steps_trained": 486400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 486400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 972800, "timers": {"training_iteration_time_ms": 3700.754, "learn_time_ms": 1092.91, "learn_throughput": 11711.85, "synch_weights_time_ms": 11.016}, "counters": {"num_env_steps_sampled": 486400, "num_env_steps_trained": 486400, "num_agent_steps_sampled": 972800, "num_agent_steps_trained": 972800}, "done": false, "episodes_total": 1216, "training_iteration": 38, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-24", "timestamp": 1666580544, "time_this_iter_s": 3.771451473236084, "time_total_s": 143.97038912773132, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 143.97038912773132, "timesteps_since_restore": 0, "iterations_since_restore": 38, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.166666666666668, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 21.0, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 53.0, "shaped_reward_min": 9, "shaped_reward_max": 90, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.52, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 7.81, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 5.72, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 6.13, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 2.83, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 13, "onion_drop_agent_1_mean": 2.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.54, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 13, "useful_onion_drop_agent_1_mean": 1.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 4.32, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.91, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 13, "dish_pickup_agent_0_mean": 4.25, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.09, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.04, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 0.93, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.95, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 10, "useful_dish_drop_agent_0_mean": 0.67, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.81, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.51, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.38, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.51, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 1.45, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.85, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.8, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 4.32, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.91, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 13, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.32, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.91, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 13, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 7.275957722603643e-13, "cur_lr": 0.0010000000474974513, "total_loss": -0.0030713295564055443, "policy_loss": -0.0026913548354059458, "vf_loss": 4.407833099365234, "vf_explained_var": 0.35603398084640503, "kl": 0.0016552733723074198, "entropy": 1.641512393951416, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 499200, "num_env_steps_trained": 499200, "num_agent_steps_sampled": 998400, "num_agent_steps_trained": 998400}, "sampler_results": {"episode_reward_max": 242.0, "episode_reward_min": 9.0, "episode_reward_mean": 95.0, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 132.0}, "policy_reward_mean": {"ppo": 47.5}, "custom_metrics": {"sparse_reward_mean": 21.0, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 53.0, "shaped_reward_min": 9, "shaped_reward_max": 90, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.52, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 7.81, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 5.72, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 6.13, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 2.83, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 13, "onion_drop_agent_1_mean": 2.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.54, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 13, "useful_onion_drop_agent_1_mean": 1.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 4.32, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.91, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 13, "dish_pickup_agent_0_mean": 4.25, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.09, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.04, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 0.93, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.95, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 10, "useful_dish_drop_agent_0_mean": 0.67, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.81, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.51, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.38, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.51, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 1.45, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.85, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.8, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 4.32, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.91, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 13, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.32, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.91, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 13, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [150.0, 113.0, 55.0, 77.0, 9.0, 47.0, 150.0, 34.0, 77.0, 47.0, 90.0, 90.0, 161.0, 128.0, 79.0, 70.0, 79.0, 50.0, 118.0, 109.0, 119.0, 84.0, 136.0, 70.0, 42.0, 110.0, 122.0, 77.0, 134.0, 12.0, 61.0, 74.0, 34.0, 155.0, 158.0, 74.0, 75.0, 115.0, 114.0, 90.0, 87.0, 163.0, 39.0, 242.0, 101.0, 144.0, 69.0, 88.0, 50.0, 96.0, 31.0, 88.0, 179.0, 167.0, 90.0, 103.0, 107.0, 90.0, 9.0, 85.0, 82.0, 53.0, 9.0, 147.0, 139.0, 99.0, 25.0, 144.0, 17.0, 9.0, 71.0, 152.0, 98.0, 91.0, 120.0, 107.0, 109.0, 191.0, 109.0, 83.0, 104.0, 71.0, 127.0, 93.0, 98.0, 161.0, 42.0, 115.0, 141.0, 90.0, 98.0, 56.0, 128.0, 85.0, 138.0, 144.0, 9.0, 202.0, 36.0, 90.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [94.0, 56.0, 68.0, 45.0, 26.0, 29.0, 37.0, 40.0, 3.0, 6.0, 30.0, 17.0, 73.0, 77.0, 16.0, 18.0, 43.0, 34.0, 17.0, 30.0, 53.0, 37.0, 70.0, 20.0, 79.0, 82.0, 62.0, 66.0, 41.0, 38.0, 31.0, 39.0, 42.0, 37.0, 19.0, 31.0, 59.0, 59.0, 50.0, 59.0, 65.0, 54.0, 39.0, 45.0, 71.0, 65.0, 26.0, 44.0, 19.0, 23.0, 56.0, 54.0, 56.0, 66.0, 42.0, 35.0, 65.0, 69.0, 9.0, 3.0, 34.0, 27.0, 32.0, 42.0, 17.0, 17.0, 76.0, 79.0, 86.0, 72.0, 37.0, 37.0, 31.0, 44.0, 61.0, 54.0, 65.0, 49.0, 51.0, 39.0, 45.0, 42.0, 74.0, 89.0, 22.0, 17.0, 132.0, 110.0, 49.0, 52.0, 68.0, 76.0, 39.0, 30.0, 36.0, 52.0, 24.0, 26.0, 42.0, 54.0, 23.0, 8.0, 54.0, 34.0, 82.0, 97.0, 88.0, 79.0, 43.0, 47.0, 42.0, 61.0, 50.0, 57.0, 34.0, 56.0, 9.0, 0.0, 33.0, 52.0, 48.0, 34.0, 20.0, 33.0, 6.0, 3.0, 61.0, 86.0, 63.0, 76.0, 52.0, 47.0, 20.0, 5.0, 73.0, 71.0, 8.0, 9.0, 6.0, 3.0, 20.0, 51.0, 75.0, 77.0, 54.0, 44.0, 40.0, 51.0, 54.0, 66.0, 55.0, 52.0, 70.0, 39.0, 82.0, 109.0, 69.0, 40.0, 43.0, 40.0, 47.0, 57.0, 40.0, 31.0, 53.0, 74.0, 34.0, 59.0, 50.0, 48.0, 85.0, 76.0, 17.0, 25.0, 56.0, 59.0, 79.0, 62.0, 42.0, 48.0, 47.0, 51.0, 36.0, 20.0, 48.0, 80.0, 51.0, 34.0, 63.0, 75.0, 69.0, 75.0, 3.0, 6.0, 111.0, 91.0, 13.0, 23.0, 50.0, 40.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6904068077951437, "mean_inference_ms": 1.2088387215834406, "mean_action_processing_ms": 0.13328399687148587, "mean_env_wait_ms": 0.877354026661738, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 242.0, "episode_reward_min": 9.0, "episode_reward_mean": 95.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 132.0}, "policy_reward_mean": {"ppo": 47.5}, "hist_stats": {"episode_reward": [150.0, 113.0, 55.0, 77.0, 9.0, 47.0, 150.0, 34.0, 77.0, 47.0, 90.0, 90.0, 161.0, 128.0, 79.0, 70.0, 79.0, 50.0, 118.0, 109.0, 119.0, 84.0, 136.0, 70.0, 42.0, 110.0, 122.0, 77.0, 134.0, 12.0, 61.0, 74.0, 34.0, 155.0, 158.0, 74.0, 75.0, 115.0, 114.0, 90.0, 87.0, 163.0, 39.0, 242.0, 101.0, 144.0, 69.0, 88.0, 50.0, 96.0, 31.0, 88.0, 179.0, 167.0, 90.0, 103.0, 107.0, 90.0, 9.0, 85.0, 82.0, 53.0, 9.0, 147.0, 139.0, 99.0, 25.0, 144.0, 17.0, 9.0, 71.0, 152.0, 98.0, 91.0, 120.0, 107.0, 109.0, 191.0, 109.0, 83.0, 104.0, 71.0, 127.0, 93.0, 98.0, 161.0, 42.0, 115.0, 141.0, 90.0, 98.0, 56.0, 128.0, 85.0, 138.0, 144.0, 9.0, 202.0, 36.0, 90.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [94.0, 56.0, 68.0, 45.0, 26.0, 29.0, 37.0, 40.0, 3.0, 6.0, 30.0, 17.0, 73.0, 77.0, 16.0, 18.0, 43.0, 34.0, 17.0, 30.0, 53.0, 37.0, 70.0, 20.0, 79.0, 82.0, 62.0, 66.0, 41.0, 38.0, 31.0, 39.0, 42.0, 37.0, 19.0, 31.0, 59.0, 59.0, 50.0, 59.0, 65.0, 54.0, 39.0, 45.0, 71.0, 65.0, 26.0, 44.0, 19.0, 23.0, 56.0, 54.0, 56.0, 66.0, 42.0, 35.0, 65.0, 69.0, 9.0, 3.0, 34.0, 27.0, 32.0, 42.0, 17.0, 17.0, 76.0, 79.0, 86.0, 72.0, 37.0, 37.0, 31.0, 44.0, 61.0, 54.0, 65.0, 49.0, 51.0, 39.0, 45.0, 42.0, 74.0, 89.0, 22.0, 17.0, 132.0, 110.0, 49.0, 52.0, 68.0, 76.0, 39.0, 30.0, 36.0, 52.0, 24.0, 26.0, 42.0, 54.0, 23.0, 8.0, 54.0, 34.0, 82.0, 97.0, 88.0, 79.0, 43.0, 47.0, 42.0, 61.0, 50.0, 57.0, 34.0, 56.0, 9.0, 0.0, 33.0, 52.0, 48.0, 34.0, 20.0, 33.0, 6.0, 3.0, 61.0, 86.0, 63.0, 76.0, 52.0, 47.0, 20.0, 5.0, 73.0, 71.0, 8.0, 9.0, 6.0, 3.0, 20.0, 51.0, 75.0, 77.0, 54.0, 44.0, 40.0, 51.0, 54.0, 66.0, 55.0, 52.0, 70.0, 39.0, 82.0, 109.0, 69.0, 40.0, 43.0, 40.0, 47.0, 57.0, 40.0, 31.0, 53.0, 74.0, 34.0, 59.0, 50.0, 48.0, 85.0, 76.0, 17.0, 25.0, 56.0, 59.0, 79.0, 62.0, 42.0, 48.0, 47.0, 51.0, 36.0, 20.0, 48.0, 80.0, 51.0, 34.0, 63.0, 75.0, 69.0, 75.0, 3.0, 6.0, 111.0, 91.0, 13.0, 23.0, 50.0, 40.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6904068077951437, "mean_inference_ms": 1.2088387215834406, "mean_action_processing_ms": 0.13328399687148587, "mean_env_wait_ms": 0.877354026661738, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 998400, "num_agent_steps_trained": 998400, "num_env_steps_sampled": 499200, "num_env_steps_trained": 499200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 499200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 998400, "timers": {"training_iteration_time_ms": 3678.152, "learn_time_ms": 1103.956, "learn_throughput": 11594.67, "synch_weights_time_ms": 10.871}, "counters": {"num_env_steps_sampled": 499200, "num_env_steps_trained": 499200, "num_agent_steps_sampled": 998400, "num_agent_steps_trained": 998400}, "done": false, "episodes_total": 1248, "training_iteration": 39, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-28", "timestamp": 1666580548, "time_this_iter_s": 3.767300605773926, "time_total_s": 147.73768973350525, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 147.73768973350525, "timesteps_since_restore": 0, "iterations_since_restore": 39, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.080000000000002, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 23.4, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 52.7, "shaped_reward_min": 9, "shaped_reward_max": 87, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.13, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 7.97, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.42, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 6.43, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 2.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.46, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 4.24, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.08, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 13, "dish_pickup_agent_0_mean": 4.56, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.29, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.87, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 0.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.78, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.91, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.52, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.36, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.51, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 1.54, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.86, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.7, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 4.24, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.08, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 13, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.24, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.08, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 13, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.6379788613018216e-13, "cur_lr": 0.0010000000474974513, "total_loss": -0.003938781097531319, "policy_loss": -0.003565125400200486, "vf_loss": 4.442060470581055, "vf_explained_var": 0.35988348722457886, "kl": 0.0015638747718185186, "entropy": 1.6357197761535645, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 512000, "num_env_steps_trained": 512000, "num_agent_steps_sampled": 1024000, "num_agent_steps_trained": 1024000}, "sampler_results": {"episode_reward_max": 242.0, "episode_reward_min": 9.0, "episode_reward_mean": 99.5, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 132.0}, "policy_reward_mean": {"ppo": 49.75}, "custom_metrics": {"sparse_reward_mean": 23.4, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 52.7, "shaped_reward_min": 9, "shaped_reward_max": 87, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.13, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 7.97, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.42, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 6.43, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 2.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.46, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 4.24, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.08, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 13, "dish_pickup_agent_0_mean": 4.56, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.29, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.87, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 0.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.78, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.91, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.52, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.36, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.51, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 1.54, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.86, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.7, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 4.24, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.08, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 13, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.24, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.08, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 13, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [34.0, 155.0, 158.0, 74.0, 75.0, 115.0, 114.0, 90.0, 87.0, 163.0, 39.0, 242.0, 101.0, 144.0, 69.0, 88.0, 50.0, 96.0, 31.0, 88.0, 179.0, 167.0, 90.0, 103.0, 107.0, 90.0, 9.0, 85.0, 82.0, 53.0, 9.0, 147.0, 139.0, 99.0, 25.0, 144.0, 17.0, 9.0, 71.0, 152.0, 98.0, 91.0, 120.0, 107.0, 109.0, 191.0, 109.0, 83.0, 104.0, 71.0, 127.0, 93.0, 98.0, 161.0, 42.0, 115.0, 141.0, 90.0, 98.0, 56.0, 128.0, 85.0, 138.0, 144.0, 9.0, 202.0, 36.0, 90.0, 28.0, 164.0, 96.0, 195.0, 124.0, 144.0, 69.0, 122.0, 68.0, 130.0, 150.0, 122.0, 141.0, 98.0, 79.0, 55.0, 9.0, 102.0, 12.0, 93.0, 49.0, 149.0, 90.0, 144.0, 195.0, 63.0, 88.0, 90.0, 198.0, 28.0, 31.0, 98.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [17.0, 17.0, 76.0, 79.0, 86.0, 72.0, 37.0, 37.0, 31.0, 44.0, 61.0, 54.0, 65.0, 49.0, 51.0, 39.0, 45.0, 42.0, 74.0, 89.0, 22.0, 17.0, 132.0, 110.0, 49.0, 52.0, 68.0, 76.0, 39.0, 30.0, 36.0, 52.0, 24.0, 26.0, 42.0, 54.0, 23.0, 8.0, 54.0, 34.0, 82.0, 97.0, 88.0, 79.0, 43.0, 47.0, 42.0, 61.0, 50.0, 57.0, 34.0, 56.0, 9.0, 0.0, 33.0, 52.0, 48.0, 34.0, 20.0, 33.0, 6.0, 3.0, 61.0, 86.0, 63.0, 76.0, 52.0, 47.0, 20.0, 5.0, 73.0, 71.0, 8.0, 9.0, 6.0, 3.0, 20.0, 51.0, 75.0, 77.0, 54.0, 44.0, 40.0, 51.0, 54.0, 66.0, 55.0, 52.0, 70.0, 39.0, 82.0, 109.0, 69.0, 40.0, 43.0, 40.0, 47.0, 57.0, 40.0, 31.0, 53.0, 74.0, 34.0, 59.0, 50.0, 48.0, 85.0, 76.0, 17.0, 25.0, 56.0, 59.0, 79.0, 62.0, 42.0, 48.0, 47.0, 51.0, 36.0, 20.0, 48.0, 80.0, 51.0, 34.0, 63.0, 75.0, 69.0, 75.0, 3.0, 6.0, 111.0, 91.0, 13.0, 23.0, 50.0, 40.0, 22.0, 6.0, 76.0, 88.0, 56.0, 40.0, 98.0, 97.0, 69.0, 55.0, 75.0, 69.0, 37.0, 32.0, 51.0, 71.0, 39.0, 29.0, 52.0, 78.0, 68.0, 82.0, 56.0, 66.0, 69.0, 72.0, 56.0, 42.0, 43.0, 36.0, 16.0, 39.0, 3.0, 6.0, 45.0, 57.0, 3.0, 9.0, 56.0, 37.0, 30.0, 19.0, 78.0, 71.0, 44.0, 46.0, 71.0, 73.0, 89.0, 106.0, 28.0, 35.0, 34.0, 54.0, 42.0, 48.0, 98.0, 100.0, 19.0, 9.0, 22.0, 9.0, 37.0, 61.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6901884261177258, "mean_inference_ms": 1.2084870526268037, "mean_action_processing_ms": 0.13327061894254838, "mean_env_wait_ms": 0.8770979785487504, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 242.0, "episode_reward_min": 9.0, "episode_reward_mean": 99.5, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 132.0}, "policy_reward_mean": {"ppo": 49.75}, "hist_stats": {"episode_reward": [34.0, 155.0, 158.0, 74.0, 75.0, 115.0, 114.0, 90.0, 87.0, 163.0, 39.0, 242.0, 101.0, 144.0, 69.0, 88.0, 50.0, 96.0, 31.0, 88.0, 179.0, 167.0, 90.0, 103.0, 107.0, 90.0, 9.0, 85.0, 82.0, 53.0, 9.0, 147.0, 139.0, 99.0, 25.0, 144.0, 17.0, 9.0, 71.0, 152.0, 98.0, 91.0, 120.0, 107.0, 109.0, 191.0, 109.0, 83.0, 104.0, 71.0, 127.0, 93.0, 98.0, 161.0, 42.0, 115.0, 141.0, 90.0, 98.0, 56.0, 128.0, 85.0, 138.0, 144.0, 9.0, 202.0, 36.0, 90.0, 28.0, 164.0, 96.0, 195.0, 124.0, 144.0, 69.0, 122.0, 68.0, 130.0, 150.0, 122.0, 141.0, 98.0, 79.0, 55.0, 9.0, 102.0, 12.0, 93.0, 49.0, 149.0, 90.0, 144.0, 195.0, 63.0, 88.0, 90.0, 198.0, 28.0, 31.0, 98.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [17.0, 17.0, 76.0, 79.0, 86.0, 72.0, 37.0, 37.0, 31.0, 44.0, 61.0, 54.0, 65.0, 49.0, 51.0, 39.0, 45.0, 42.0, 74.0, 89.0, 22.0, 17.0, 132.0, 110.0, 49.0, 52.0, 68.0, 76.0, 39.0, 30.0, 36.0, 52.0, 24.0, 26.0, 42.0, 54.0, 23.0, 8.0, 54.0, 34.0, 82.0, 97.0, 88.0, 79.0, 43.0, 47.0, 42.0, 61.0, 50.0, 57.0, 34.0, 56.0, 9.0, 0.0, 33.0, 52.0, 48.0, 34.0, 20.0, 33.0, 6.0, 3.0, 61.0, 86.0, 63.0, 76.0, 52.0, 47.0, 20.0, 5.0, 73.0, 71.0, 8.0, 9.0, 6.0, 3.0, 20.0, 51.0, 75.0, 77.0, 54.0, 44.0, 40.0, 51.0, 54.0, 66.0, 55.0, 52.0, 70.0, 39.0, 82.0, 109.0, 69.0, 40.0, 43.0, 40.0, 47.0, 57.0, 40.0, 31.0, 53.0, 74.0, 34.0, 59.0, 50.0, 48.0, 85.0, 76.0, 17.0, 25.0, 56.0, 59.0, 79.0, 62.0, 42.0, 48.0, 47.0, 51.0, 36.0, 20.0, 48.0, 80.0, 51.0, 34.0, 63.0, 75.0, 69.0, 75.0, 3.0, 6.0, 111.0, 91.0, 13.0, 23.0, 50.0, 40.0, 22.0, 6.0, 76.0, 88.0, 56.0, 40.0, 98.0, 97.0, 69.0, 55.0, 75.0, 69.0, 37.0, 32.0, 51.0, 71.0, 39.0, 29.0, 52.0, 78.0, 68.0, 82.0, 56.0, 66.0, 69.0, 72.0, 56.0, 42.0, 43.0, 36.0, 16.0, 39.0, 3.0, 6.0, 45.0, 57.0, 3.0, 9.0, 56.0, 37.0, 30.0, 19.0, 78.0, 71.0, 44.0, 46.0, 71.0, 73.0, 89.0, 106.0, 28.0, 35.0, 34.0, 54.0, 42.0, 48.0, 98.0, 100.0, 19.0, 9.0, 22.0, 9.0, 37.0, 61.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6901884261177258, "mean_inference_ms": 1.2084870526268037, "mean_action_processing_ms": 0.13327061894254838, "mean_env_wait_ms": 0.8770979785487504, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1024000, "num_agent_steps_trained": 1024000, "num_env_steps_sampled": 512000, "num_env_steps_trained": 512000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 512000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1024000, "timers": {"training_iteration_time_ms": 3680.412, "learn_time_ms": 1105.379, "learn_throughput": 11579.735, "synch_weights_time_ms": 10.803}, "counters": {"num_env_steps_sampled": 512000, "num_env_steps_trained": 512000, "num_agent_steps_sampled": 1024000, "num_agent_steps_trained": 1024000}, "done": false, "episodes_total": 1280, "training_iteration": 40, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-32", "timestamp": 1666580552, "time_this_iter_s": 3.6896848678588867, "time_total_s": 151.42737460136414, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 151.42737460136414, "timesteps_since_restore": 0, "iterations_since_restore": 40, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.099999999999998, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 22.6, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 52.84, "shaped_reward_min": 9, "shaped_reward_max": 87, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.69, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 8.12, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 5.1, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 6.49, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 2.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.37, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.37, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 4.16, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.1, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.75, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.24, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.87, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.95, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.66, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.35, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.56, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.47, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.95, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.73, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 4.16, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.1, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.16, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.1, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.8189894306509108e-13, "cur_lr": 0.0010000000474974513, "total_loss": -0.005516150966286659, "policy_loss": -0.005121724680066109, "vf_loss": 4.235774517059326, "vf_explained_var": 0.39267563819885254, "kl": 0.0015804313588887453, "entropy": 1.6360013484954834, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 524800, "num_env_steps_trained": 524800, "num_agent_steps_sampled": 1049600, "num_agent_steps_trained": 1049600}, "sampler_results": {"episode_reward_max": 202.0, "episode_reward_min": 9.0, "episode_reward_mean": 98.04, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 111.0}, "policy_reward_mean": {"ppo": 49.02}, "custom_metrics": {"sparse_reward_mean": 22.6, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 52.84, "shaped_reward_min": 9, "shaped_reward_max": 87, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.69, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 8.12, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 5.1, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 6.49, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 2.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.37, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.37, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 4.16, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.1, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.75, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.24, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.87, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.95, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.66, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.35, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.56, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.47, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.95, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.73, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 4.16, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.1, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.16, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.1, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [139.0, 99.0, 25.0, 144.0, 17.0, 9.0, 71.0, 152.0, 98.0, 91.0, 120.0, 107.0, 109.0, 191.0, 109.0, 83.0, 104.0, 71.0, 127.0, 93.0, 98.0, 161.0, 42.0, 115.0, 141.0, 90.0, 98.0, 56.0, 128.0, 85.0, 138.0, 144.0, 9.0, 202.0, 36.0, 90.0, 28.0, 164.0, 96.0, 195.0, 124.0, 144.0, 69.0, 122.0, 68.0, 130.0, 150.0, 122.0, 141.0, 98.0, 79.0, 55.0, 9.0, 102.0, 12.0, 93.0, 49.0, 149.0, 90.0, 144.0, 195.0, 63.0, 88.0, 90.0, 198.0, 28.0, 31.0, 98.0, 153.0, 141.0, 112.0, 42.0, 55.0, 155.0, 147.0, 90.0, 25.0, 109.0, 77.0, 126.0, 77.0, 39.0, 20.0, 130.0, 96.0, 9.0, 99.0, 95.0, 90.0, 132.0, 65.0, 118.0, 81.0, 156.0, 153.0, 99.0, 20.0, 121.0, 63.0, 93.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [63.0, 76.0, 52.0, 47.0, 20.0, 5.0, 73.0, 71.0, 8.0, 9.0, 6.0, 3.0, 20.0, 51.0, 75.0, 77.0, 54.0, 44.0, 40.0, 51.0, 54.0, 66.0, 55.0, 52.0, 70.0, 39.0, 82.0, 109.0, 69.0, 40.0, 43.0, 40.0, 47.0, 57.0, 40.0, 31.0, 53.0, 74.0, 34.0, 59.0, 50.0, 48.0, 85.0, 76.0, 17.0, 25.0, 56.0, 59.0, 79.0, 62.0, 42.0, 48.0, 47.0, 51.0, 36.0, 20.0, 48.0, 80.0, 51.0, 34.0, 63.0, 75.0, 69.0, 75.0, 3.0, 6.0, 111.0, 91.0, 13.0, 23.0, 50.0, 40.0, 22.0, 6.0, 76.0, 88.0, 56.0, 40.0, 98.0, 97.0, 69.0, 55.0, 75.0, 69.0, 37.0, 32.0, 51.0, 71.0, 39.0, 29.0, 52.0, 78.0, 68.0, 82.0, 56.0, 66.0, 69.0, 72.0, 56.0, 42.0, 43.0, 36.0, 16.0, 39.0, 3.0, 6.0, 45.0, 57.0, 3.0, 9.0, 56.0, 37.0, 30.0, 19.0, 78.0, 71.0, 44.0, 46.0, 71.0, 73.0, 89.0, 106.0, 28.0, 35.0, 34.0, 54.0, 42.0, 48.0, 98.0, 100.0, 19.0, 9.0, 22.0, 9.0, 37.0, 61.0, 75.0, 78.0, 68.0, 73.0, 56.0, 56.0, 17.0, 25.0, 27.0, 28.0, 70.0, 85.0, 82.0, 65.0, 37.0, 53.0, 12.0, 13.0, 67.0, 42.0, 39.0, 38.0, 73.0, 53.0, 31.0, 46.0, 28.0, 11.0, 14.0, 6.0, 62.0, 68.0, 50.0, 46.0, 3.0, 6.0, 40.0, 59.0, 48.0, 47.0, 39.0, 51.0, 70.0, 62.0, 30.0, 35.0, 56.0, 62.0, 44.0, 37.0, 85.0, 71.0, 74.0, 79.0, 34.0, 65.0, 6.0, 14.0, 45.0, 76.0, 31.0, 32.0, 53.0, 40.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6898855332895328, "mean_inference_ms": 1.208174559889397, "mean_action_processing_ms": 0.13325926651821823, "mean_env_wait_ms": 0.8768887278744478, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 202.0, "episode_reward_min": 9.0, "episode_reward_mean": 98.04, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 111.0}, "policy_reward_mean": {"ppo": 49.02}, "hist_stats": {"episode_reward": [139.0, 99.0, 25.0, 144.0, 17.0, 9.0, 71.0, 152.0, 98.0, 91.0, 120.0, 107.0, 109.0, 191.0, 109.0, 83.0, 104.0, 71.0, 127.0, 93.0, 98.0, 161.0, 42.0, 115.0, 141.0, 90.0, 98.0, 56.0, 128.0, 85.0, 138.0, 144.0, 9.0, 202.0, 36.0, 90.0, 28.0, 164.0, 96.0, 195.0, 124.0, 144.0, 69.0, 122.0, 68.0, 130.0, 150.0, 122.0, 141.0, 98.0, 79.0, 55.0, 9.0, 102.0, 12.0, 93.0, 49.0, 149.0, 90.0, 144.0, 195.0, 63.0, 88.0, 90.0, 198.0, 28.0, 31.0, 98.0, 153.0, 141.0, 112.0, 42.0, 55.0, 155.0, 147.0, 90.0, 25.0, 109.0, 77.0, 126.0, 77.0, 39.0, 20.0, 130.0, 96.0, 9.0, 99.0, 95.0, 90.0, 132.0, 65.0, 118.0, 81.0, 156.0, 153.0, 99.0, 20.0, 121.0, 63.0, 93.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [63.0, 76.0, 52.0, 47.0, 20.0, 5.0, 73.0, 71.0, 8.0, 9.0, 6.0, 3.0, 20.0, 51.0, 75.0, 77.0, 54.0, 44.0, 40.0, 51.0, 54.0, 66.0, 55.0, 52.0, 70.0, 39.0, 82.0, 109.0, 69.0, 40.0, 43.0, 40.0, 47.0, 57.0, 40.0, 31.0, 53.0, 74.0, 34.0, 59.0, 50.0, 48.0, 85.0, 76.0, 17.0, 25.0, 56.0, 59.0, 79.0, 62.0, 42.0, 48.0, 47.0, 51.0, 36.0, 20.0, 48.0, 80.0, 51.0, 34.0, 63.0, 75.0, 69.0, 75.0, 3.0, 6.0, 111.0, 91.0, 13.0, 23.0, 50.0, 40.0, 22.0, 6.0, 76.0, 88.0, 56.0, 40.0, 98.0, 97.0, 69.0, 55.0, 75.0, 69.0, 37.0, 32.0, 51.0, 71.0, 39.0, 29.0, 52.0, 78.0, 68.0, 82.0, 56.0, 66.0, 69.0, 72.0, 56.0, 42.0, 43.0, 36.0, 16.0, 39.0, 3.0, 6.0, 45.0, 57.0, 3.0, 9.0, 56.0, 37.0, 30.0, 19.0, 78.0, 71.0, 44.0, 46.0, 71.0, 73.0, 89.0, 106.0, 28.0, 35.0, 34.0, 54.0, 42.0, 48.0, 98.0, 100.0, 19.0, 9.0, 22.0, 9.0, 37.0, 61.0, 75.0, 78.0, 68.0, 73.0, 56.0, 56.0, 17.0, 25.0, 27.0, 28.0, 70.0, 85.0, 82.0, 65.0, 37.0, 53.0, 12.0, 13.0, 67.0, 42.0, 39.0, 38.0, 73.0, 53.0, 31.0, 46.0, 28.0, 11.0, 14.0, 6.0, 62.0, 68.0, 50.0, 46.0, 3.0, 6.0, 40.0, 59.0, 48.0, 47.0, 39.0, 51.0, 70.0, 62.0, 30.0, 35.0, 56.0, 62.0, 44.0, 37.0, 85.0, 71.0, 74.0, 79.0, 34.0, 65.0, 6.0, 14.0, 45.0, 76.0, 31.0, 32.0, 53.0, 40.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6898855332895328, "mean_inference_ms": 1.208174559889397, "mean_action_processing_ms": 0.13325926651821823, "mean_env_wait_ms": 0.8768887278744478, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1049600, "num_agent_steps_trained": 1049600, "num_env_steps_sampled": 524800, "num_env_steps_trained": 524800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 524800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1049600, "timers": {"training_iteration_time_ms": 3686.982, "learn_time_ms": 1112.505, "learn_throughput": 11505.568, "synch_weights_time_ms": 10.473}, "counters": {"num_env_steps_sampled": 524800, "num_env_steps_trained": 524800, "num_agent_steps_sampled": 1049600, "num_agent_steps_trained": 1049600}, "done": false, "episodes_total": 1312, "training_iteration": 41, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-36", "timestamp": 1666580556, "time_this_iter_s": 3.7608163356781006, "time_total_s": 155.18819093704224, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 155.18819093704224, "timesteps_since_restore": 0, "iterations_since_restore": 41, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.14, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 23.6, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 52.7, "shaped_reward_min": 9, "shaped_reward_max": 90, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.65, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.73, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 5.29, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 6.34, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 2.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 4.23, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.08, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.67, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.4, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.83, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.76, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.85, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.95, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.65, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.32, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.58, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.46, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.89, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.68, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 4.23, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.08, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.23, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.08, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 9.094947153254554e-14, "cur_lr": 0.0010000000474974513, "total_loss": 0.00025381380692124367, "policy_loss": 0.0006008308264426887, "vf_loss": 4.596561431884766, "vf_explained_var": 0.40004289150238037, "kl": 0.001455314108170569, "entropy": 1.6133447885513306, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 537600, "num_env_steps_trained": 537600, "num_agent_steps_sampled": 1075200, "num_agent_steps_trained": 1075200}, "sampler_results": {"episode_reward_max": 210.0, "episode_reward_min": 9.0, "episode_reward_mean": 99.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 114.0}, "policy_reward_mean": {"ppo": 49.95}, "custom_metrics": {"sparse_reward_mean": 23.6, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 52.7, "shaped_reward_min": 9, "shaped_reward_max": 90, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.65, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.73, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 5.29, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 6.34, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 2.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 4.23, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.08, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.67, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.4, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.83, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.76, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.85, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.95, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.65, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.32, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.58, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.46, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.89, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.68, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 4.23, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.08, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.23, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.08, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [9.0, 202.0, 36.0, 90.0, 28.0, 164.0, 96.0, 195.0, 124.0, 144.0, 69.0, 122.0, 68.0, 130.0, 150.0, 122.0, 141.0, 98.0, 79.0, 55.0, 9.0, 102.0, 12.0, 93.0, 49.0, 149.0, 90.0, 144.0, 195.0, 63.0, 88.0, 90.0, 198.0, 28.0, 31.0, 98.0, 153.0, 141.0, 112.0, 42.0, 55.0, 155.0, 147.0, 90.0, 25.0, 109.0, 77.0, 126.0, 77.0, 39.0, 20.0, 130.0, 96.0, 9.0, 99.0, 95.0, 90.0, 132.0, 65.0, 118.0, 81.0, 156.0, 153.0, 99.0, 20.0, 121.0, 63.0, 93.0, 124.0, 64.0, 187.0, 101.0, 141.0, 155.0, 66.0, 198.0, 12.0, 75.0, 31.0, 82.0, 187.0, 201.0, 47.0, 179.0, 66.0, 190.0, 196.0, 52.0, 128.0, 53.0, 76.0, 87.0, 12.0, 84.0, 82.0, 52.0, 74.0, 133.0, 210.0, 96.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 6.0, 111.0, 91.0, 13.0, 23.0, 50.0, 40.0, 22.0, 6.0, 76.0, 88.0, 56.0, 40.0, 98.0, 97.0, 69.0, 55.0, 75.0, 69.0, 37.0, 32.0, 51.0, 71.0, 39.0, 29.0, 52.0, 78.0, 68.0, 82.0, 56.0, 66.0, 69.0, 72.0, 56.0, 42.0, 43.0, 36.0, 16.0, 39.0, 3.0, 6.0, 45.0, 57.0, 3.0, 9.0, 56.0, 37.0, 30.0, 19.0, 78.0, 71.0, 44.0, 46.0, 71.0, 73.0, 89.0, 106.0, 28.0, 35.0, 34.0, 54.0, 42.0, 48.0, 98.0, 100.0, 19.0, 9.0, 22.0, 9.0, 37.0, 61.0, 75.0, 78.0, 68.0, 73.0, 56.0, 56.0, 17.0, 25.0, 27.0, 28.0, 70.0, 85.0, 82.0, 65.0, 37.0, 53.0, 12.0, 13.0, 67.0, 42.0, 39.0, 38.0, 73.0, 53.0, 31.0, 46.0, 28.0, 11.0, 14.0, 6.0, 62.0, 68.0, 50.0, 46.0, 3.0, 6.0, 40.0, 59.0, 48.0, 47.0, 39.0, 51.0, 70.0, 62.0, 30.0, 35.0, 56.0, 62.0, 44.0, 37.0, 85.0, 71.0, 74.0, 79.0, 34.0, 65.0, 6.0, 14.0, 45.0, 76.0, 31.0, 32.0, 53.0, 40.0, 59.0, 65.0, 23.0, 41.0, 98.0, 89.0, 48.0, 53.0, 62.0, 79.0, 73.0, 82.0, 37.0, 29.0, 107.0, 91.0, 6.0, 6.0, 39.0, 36.0, 17.0, 14.0, 40.0, 42.0, 82.0, 105.0, 92.0, 109.0, 22.0, 25.0, 91.0, 88.0, 37.0, 29.0, 98.0, 92.0, 89.0, 107.0, 44.0, 8.0, 57.0, 71.0, 39.0, 14.0, 37.0, 39.0, 51.0, 36.0, 3.0, 9.0, 37.0, 47.0, 39.0, 43.0, 27.0, 25.0, 41.0, 33.0, 59.0, 74.0, 114.0, 96.0, 50.0, 46.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6895577005322571, "mean_inference_ms": 1.2079098308060834, "mean_action_processing_ms": 0.13326194274361466, "mean_env_wait_ms": 0.876727358874316, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 210.0, "episode_reward_min": 9.0, "episode_reward_mean": 99.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 114.0}, "policy_reward_mean": {"ppo": 49.95}, "hist_stats": {"episode_reward": [9.0, 202.0, 36.0, 90.0, 28.0, 164.0, 96.0, 195.0, 124.0, 144.0, 69.0, 122.0, 68.0, 130.0, 150.0, 122.0, 141.0, 98.0, 79.0, 55.0, 9.0, 102.0, 12.0, 93.0, 49.0, 149.0, 90.0, 144.0, 195.0, 63.0, 88.0, 90.0, 198.0, 28.0, 31.0, 98.0, 153.0, 141.0, 112.0, 42.0, 55.0, 155.0, 147.0, 90.0, 25.0, 109.0, 77.0, 126.0, 77.0, 39.0, 20.0, 130.0, 96.0, 9.0, 99.0, 95.0, 90.0, 132.0, 65.0, 118.0, 81.0, 156.0, 153.0, 99.0, 20.0, 121.0, 63.0, 93.0, 124.0, 64.0, 187.0, 101.0, 141.0, 155.0, 66.0, 198.0, 12.0, 75.0, 31.0, 82.0, 187.0, 201.0, 47.0, 179.0, 66.0, 190.0, 196.0, 52.0, 128.0, 53.0, 76.0, 87.0, 12.0, 84.0, 82.0, 52.0, 74.0, 133.0, 210.0, 96.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 6.0, 111.0, 91.0, 13.0, 23.0, 50.0, 40.0, 22.0, 6.0, 76.0, 88.0, 56.0, 40.0, 98.0, 97.0, 69.0, 55.0, 75.0, 69.0, 37.0, 32.0, 51.0, 71.0, 39.0, 29.0, 52.0, 78.0, 68.0, 82.0, 56.0, 66.0, 69.0, 72.0, 56.0, 42.0, 43.0, 36.0, 16.0, 39.0, 3.0, 6.0, 45.0, 57.0, 3.0, 9.0, 56.0, 37.0, 30.0, 19.0, 78.0, 71.0, 44.0, 46.0, 71.0, 73.0, 89.0, 106.0, 28.0, 35.0, 34.0, 54.0, 42.0, 48.0, 98.0, 100.0, 19.0, 9.0, 22.0, 9.0, 37.0, 61.0, 75.0, 78.0, 68.0, 73.0, 56.0, 56.0, 17.0, 25.0, 27.0, 28.0, 70.0, 85.0, 82.0, 65.0, 37.0, 53.0, 12.0, 13.0, 67.0, 42.0, 39.0, 38.0, 73.0, 53.0, 31.0, 46.0, 28.0, 11.0, 14.0, 6.0, 62.0, 68.0, 50.0, 46.0, 3.0, 6.0, 40.0, 59.0, 48.0, 47.0, 39.0, 51.0, 70.0, 62.0, 30.0, 35.0, 56.0, 62.0, 44.0, 37.0, 85.0, 71.0, 74.0, 79.0, 34.0, 65.0, 6.0, 14.0, 45.0, 76.0, 31.0, 32.0, 53.0, 40.0, 59.0, 65.0, 23.0, 41.0, 98.0, 89.0, 48.0, 53.0, 62.0, 79.0, 73.0, 82.0, 37.0, 29.0, 107.0, 91.0, 6.0, 6.0, 39.0, 36.0, 17.0, 14.0, 40.0, 42.0, 82.0, 105.0, 92.0, 109.0, 22.0, 25.0, 91.0, 88.0, 37.0, 29.0, 98.0, 92.0, 89.0, 107.0, 44.0, 8.0, 57.0, 71.0, 39.0, 14.0, 37.0, 39.0, 51.0, 36.0, 3.0, 9.0, 37.0, 47.0, 39.0, 43.0, 27.0, 25.0, 41.0, 33.0, 59.0, 74.0, 114.0, 96.0, 50.0, 46.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6895577005322571, "mean_inference_ms": 1.2079098308060834, "mean_action_processing_ms": 0.13326194274361466, "mean_env_wait_ms": 0.876727358874316, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1075200, "num_agent_steps_trained": 1075200, "num_env_steps_sampled": 537600, "num_env_steps_trained": 537600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 537600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1075200, "timers": {"training_iteration_time_ms": 3684.119, "learn_time_ms": 1119.015, "learn_throughput": 11438.633, "synch_weights_time_ms": 10.755}, "counters": {"num_env_steps_sampled": 537600, "num_env_steps_trained": 537600, "num_agent_steps_sampled": 1075200, "num_agent_steps_trained": 1075200}, "done": false, "episodes_total": 1344, "training_iteration": 42, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-40", "timestamp": 1666580560, "time_this_iter_s": 3.800992965698242, "time_total_s": 158.98918390274048, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 158.98918390274048, "timesteps_since_restore": 0, "iterations_since_restore": 42, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.966666666666665, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 24.2, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 56.1, "shaped_reward_min": 9, "shaped_reward_max": 96, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.89, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 7.87, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 5.54, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 6.39, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 2.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 4.52, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.36, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.77, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.42, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.94, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.79, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.61, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.57, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 1.63, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.61, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.78, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.77, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 4.52, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.36, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.52, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.36, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 4.547473576627277e-14, "cur_lr": 0.0010000000474974513, "total_loss": -0.004181774333119392, "policy_loss": -0.0038678678683936596, "vf_loss": 4.915392875671387, "vf_explained_var": 0.3882026970386505, "kl": 0.001393836340866983, "entropy": 1.610889196395874, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 550400, "num_env_steps_trained": 550400, "num_agent_steps_sampled": 1100800, "num_agent_steps_trained": 1100800}, "sampler_results": {"episode_reward_max": 242.0, "episode_reward_min": 9.0, "episode_reward_mean": 104.5, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 125.0}, "policy_reward_mean": {"ppo": 52.25}, "custom_metrics": {"sparse_reward_mean": 24.2, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 56.1, "shaped_reward_min": 9, "shaped_reward_max": 96, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.89, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 7.87, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 5.54, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 6.39, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 2.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 4.52, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.36, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.77, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.42, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.94, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.79, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.61, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.57, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 1.63, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.61, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.78, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.77, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 4.52, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.36, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.52, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.36, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [198.0, 28.0, 31.0, 98.0, 153.0, 141.0, 112.0, 42.0, 55.0, 155.0, 147.0, 90.0, 25.0, 109.0, 77.0, 126.0, 77.0, 39.0, 20.0, 130.0, 96.0, 9.0, 99.0, 95.0, 90.0, 132.0, 65.0, 118.0, 81.0, 156.0, 153.0, 99.0, 20.0, 121.0, 63.0, 93.0, 124.0, 64.0, 187.0, 101.0, 141.0, 155.0, 66.0, 198.0, 12.0, 75.0, 31.0, 82.0, 187.0, 201.0, 47.0, 179.0, 66.0, 190.0, 196.0, 52.0, 128.0, 53.0, 76.0, 87.0, 12.0, 84.0, 82.0, 52.0, 74.0, 133.0, 210.0, 96.0, 161.0, 104.0, 12.0, 107.0, 146.0, 90.0, 55.0, 99.0, 25.0, 124.0, 242.0, 116.0, 193.0, 93.0, 45.0, 20.0, 164.0, 103.0, 154.0, 192.0, 74.0, 71.0, 149.0, 98.0, 23.0, 133.0, 159.0, 34.0, 213.0, 110.0, 216.0, 141.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [98.0, 100.0, 19.0, 9.0, 22.0, 9.0, 37.0, 61.0, 75.0, 78.0, 68.0, 73.0, 56.0, 56.0, 17.0, 25.0, 27.0, 28.0, 70.0, 85.0, 82.0, 65.0, 37.0, 53.0, 12.0, 13.0, 67.0, 42.0, 39.0, 38.0, 73.0, 53.0, 31.0, 46.0, 28.0, 11.0, 14.0, 6.0, 62.0, 68.0, 50.0, 46.0, 3.0, 6.0, 40.0, 59.0, 48.0, 47.0, 39.0, 51.0, 70.0, 62.0, 30.0, 35.0, 56.0, 62.0, 44.0, 37.0, 85.0, 71.0, 74.0, 79.0, 34.0, 65.0, 6.0, 14.0, 45.0, 76.0, 31.0, 32.0, 53.0, 40.0, 59.0, 65.0, 23.0, 41.0, 98.0, 89.0, 48.0, 53.0, 62.0, 79.0, 73.0, 82.0, 37.0, 29.0, 107.0, 91.0, 6.0, 6.0, 39.0, 36.0, 17.0, 14.0, 40.0, 42.0, 82.0, 105.0, 92.0, 109.0, 22.0, 25.0, 91.0, 88.0, 37.0, 29.0, 98.0, 92.0, 89.0, 107.0, 44.0, 8.0, 57.0, 71.0, 39.0, 14.0, 37.0, 39.0, 51.0, 36.0, 3.0, 9.0, 37.0, 47.0, 39.0, 43.0, 27.0, 25.0, 41.0, 33.0, 59.0, 74.0, 114.0, 96.0, 50.0, 46.0, 81.0, 80.0, 40.0, 64.0, 3.0, 9.0, 59.0, 48.0, 71.0, 75.0, 50.0, 40.0, 26.0, 29.0, 56.0, 43.0, 14.0, 11.0, 65.0, 59.0, 117.0, 125.0, 51.0, 65.0, 89.0, 104.0, 56.0, 37.0, 20.0, 25.0, 0.0, 20.0, 74.0, 90.0, 59.0, 44.0, 81.0, 73.0, 94.0, 98.0, 37.0, 37.0, 33.0, 38.0, 67.0, 82.0, 37.0, 61.0, 6.0, 17.0, 58.0, 75.0, 74.0, 85.0, 8.0, 26.0, 104.0, 109.0, 50.0, 60.0, 113.0, 103.0, 66.0, 75.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6892870612625809, "mean_inference_ms": 1.2077795859778049, "mean_action_processing_ms": 0.133269187249842, "mean_env_wait_ms": 0.8768115098341133, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 242.0, "episode_reward_min": 9.0, "episode_reward_mean": 104.5, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 125.0}, "policy_reward_mean": {"ppo": 52.25}, "hist_stats": {"episode_reward": [198.0, 28.0, 31.0, 98.0, 153.0, 141.0, 112.0, 42.0, 55.0, 155.0, 147.0, 90.0, 25.0, 109.0, 77.0, 126.0, 77.0, 39.0, 20.0, 130.0, 96.0, 9.0, 99.0, 95.0, 90.0, 132.0, 65.0, 118.0, 81.0, 156.0, 153.0, 99.0, 20.0, 121.0, 63.0, 93.0, 124.0, 64.0, 187.0, 101.0, 141.0, 155.0, 66.0, 198.0, 12.0, 75.0, 31.0, 82.0, 187.0, 201.0, 47.0, 179.0, 66.0, 190.0, 196.0, 52.0, 128.0, 53.0, 76.0, 87.0, 12.0, 84.0, 82.0, 52.0, 74.0, 133.0, 210.0, 96.0, 161.0, 104.0, 12.0, 107.0, 146.0, 90.0, 55.0, 99.0, 25.0, 124.0, 242.0, 116.0, 193.0, 93.0, 45.0, 20.0, 164.0, 103.0, 154.0, 192.0, 74.0, 71.0, 149.0, 98.0, 23.0, 133.0, 159.0, 34.0, 213.0, 110.0, 216.0, 141.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [98.0, 100.0, 19.0, 9.0, 22.0, 9.0, 37.0, 61.0, 75.0, 78.0, 68.0, 73.0, 56.0, 56.0, 17.0, 25.0, 27.0, 28.0, 70.0, 85.0, 82.0, 65.0, 37.0, 53.0, 12.0, 13.0, 67.0, 42.0, 39.0, 38.0, 73.0, 53.0, 31.0, 46.0, 28.0, 11.0, 14.0, 6.0, 62.0, 68.0, 50.0, 46.0, 3.0, 6.0, 40.0, 59.0, 48.0, 47.0, 39.0, 51.0, 70.0, 62.0, 30.0, 35.0, 56.0, 62.0, 44.0, 37.0, 85.0, 71.0, 74.0, 79.0, 34.0, 65.0, 6.0, 14.0, 45.0, 76.0, 31.0, 32.0, 53.0, 40.0, 59.0, 65.0, 23.0, 41.0, 98.0, 89.0, 48.0, 53.0, 62.0, 79.0, 73.0, 82.0, 37.0, 29.0, 107.0, 91.0, 6.0, 6.0, 39.0, 36.0, 17.0, 14.0, 40.0, 42.0, 82.0, 105.0, 92.0, 109.0, 22.0, 25.0, 91.0, 88.0, 37.0, 29.0, 98.0, 92.0, 89.0, 107.0, 44.0, 8.0, 57.0, 71.0, 39.0, 14.0, 37.0, 39.0, 51.0, 36.0, 3.0, 9.0, 37.0, 47.0, 39.0, 43.0, 27.0, 25.0, 41.0, 33.0, 59.0, 74.0, 114.0, 96.0, 50.0, 46.0, 81.0, 80.0, 40.0, 64.0, 3.0, 9.0, 59.0, 48.0, 71.0, 75.0, 50.0, 40.0, 26.0, 29.0, 56.0, 43.0, 14.0, 11.0, 65.0, 59.0, 117.0, 125.0, 51.0, 65.0, 89.0, 104.0, 56.0, 37.0, 20.0, 25.0, 0.0, 20.0, 74.0, 90.0, 59.0, 44.0, 81.0, 73.0, 94.0, 98.0, 37.0, 37.0, 33.0, 38.0, 67.0, 82.0, 37.0, 61.0, 6.0, 17.0, 58.0, 75.0, 74.0, 85.0, 8.0, 26.0, 104.0, 109.0, 50.0, 60.0, 113.0, 103.0, 66.0, 75.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6892870612625809, "mean_inference_ms": 1.2077795859778049, "mean_action_processing_ms": 0.133269187249842, "mean_env_wait_ms": 0.8768115098341133, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1100800, "num_agent_steps_trained": 1100800, "num_env_steps_sampled": 550400, "num_env_steps_trained": 550400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 550400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1100800, "timers": {"training_iteration_time_ms": 3700.067, "learn_time_ms": 1126.399, "learn_throughput": 11363.647, "synch_weights_time_ms": 10.556}, "counters": {"num_env_steps_sampled": 550400, "num_env_steps_trained": 550400, "num_agent_steps_sampled": 1100800, "num_agent_steps_trained": 1100800}, "done": false, "episodes_total": 1376, "training_iteration": 43, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-44", "timestamp": 1666580564, "time_this_iter_s": 3.8595781326293945, "time_total_s": 162.84876203536987, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 162.84876203536987, "timesteps_since_restore": 0, "iterations_since_restore": 43, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.433333333333334, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 28.8, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 58.94, "shaped_reward_min": 12, "shaped_reward_max": 96, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.93, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 7.95, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 6.25, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 14, "useful_onion_pickup_agent_1_mean": 6.49, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 2.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.42, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 4.96, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.51, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.46, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.46, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 0.9, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.0, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.79, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.74, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.67, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.63, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 1.7, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.75, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.8, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.71, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 4.96, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.51, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.96, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.51, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.2737367883136385e-14, "cur_lr": 0.0010000000474974513, "total_loss": -0.004320982843637466, "policy_loss": -0.004086933098733425, "vf_loss": 5.650534629821777, "vf_explained_var": 0.3545888662338257, "kl": 0.0015218043699860573, "entropy": 1.5982072353363037, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 563200, "num_env_steps_trained": 563200, "num_agent_steps_sampled": 1126400, "num_agent_steps_trained": 1126400}, "sampler_results": {"episode_reward_max": 242.0, "episode_reward_min": 12.0, "episode_reward_mean": 116.54, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 125.0}, "policy_reward_mean": {"ppo": 58.27}, "custom_metrics": {"sparse_reward_mean": 28.8, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 58.94, "shaped_reward_min": 12, "shaped_reward_max": 96, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.93, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 7.95, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 6.25, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 14, "useful_onion_pickup_agent_1_mean": 6.49, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 2.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.42, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 4.96, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.51, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.46, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.46, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 0.9, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.0, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.79, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.74, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.67, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.63, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 1.7, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.75, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.8, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.71, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 4.96, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.51, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.96, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.51, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [20.0, 121.0, 63.0, 93.0, 124.0, 64.0, 187.0, 101.0, 141.0, 155.0, 66.0, 198.0, 12.0, 75.0, 31.0, 82.0, 187.0, 201.0, 47.0, 179.0, 66.0, 190.0, 196.0, 52.0, 128.0, 53.0, 76.0, 87.0, 12.0, 84.0, 82.0, 52.0, 74.0, 133.0, 210.0, 96.0, 161.0, 104.0, 12.0, 107.0, 146.0, 90.0, 55.0, 99.0, 25.0, 124.0, 242.0, 116.0, 193.0, 93.0, 45.0, 20.0, 164.0, 103.0, 154.0, 192.0, 74.0, 71.0, 149.0, 98.0, 23.0, 133.0, 159.0, 34.0, 213.0, 110.0, 216.0, 141.0, 150.0, 61.0, 195.0, 190.0, 87.0, 176.0, 71.0, 132.0, 147.0, 199.0, 193.0, 76.0, 102.0, 96.0, 98.0, 242.0, 70.0, 57.0, 127.0, 152.0, 77.0, 91.0, 125.0, 174.0, 152.0, 198.0, 164.0, 91.0, 179.0, 101.0, 150.0, 127.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 14.0, 45.0, 76.0, 31.0, 32.0, 53.0, 40.0, 59.0, 65.0, 23.0, 41.0, 98.0, 89.0, 48.0, 53.0, 62.0, 79.0, 73.0, 82.0, 37.0, 29.0, 107.0, 91.0, 6.0, 6.0, 39.0, 36.0, 17.0, 14.0, 40.0, 42.0, 82.0, 105.0, 92.0, 109.0, 22.0, 25.0, 91.0, 88.0, 37.0, 29.0, 98.0, 92.0, 89.0, 107.0, 44.0, 8.0, 57.0, 71.0, 39.0, 14.0, 37.0, 39.0, 51.0, 36.0, 3.0, 9.0, 37.0, 47.0, 39.0, 43.0, 27.0, 25.0, 41.0, 33.0, 59.0, 74.0, 114.0, 96.0, 50.0, 46.0, 81.0, 80.0, 40.0, 64.0, 3.0, 9.0, 59.0, 48.0, 71.0, 75.0, 50.0, 40.0, 26.0, 29.0, 56.0, 43.0, 14.0, 11.0, 65.0, 59.0, 117.0, 125.0, 51.0, 65.0, 89.0, 104.0, 56.0, 37.0, 20.0, 25.0, 0.0, 20.0, 74.0, 90.0, 59.0, 44.0, 81.0, 73.0, 94.0, 98.0, 37.0, 37.0, 33.0, 38.0, 67.0, 82.0, 37.0, 61.0, 6.0, 17.0, 58.0, 75.0, 74.0, 85.0, 8.0, 26.0, 104.0, 109.0, 50.0, 60.0, 113.0, 103.0, 66.0, 75.0, 77.0, 73.0, 22.0, 39.0, 105.0, 90.0, 96.0, 94.0, 50.0, 37.0, 89.0, 87.0, 37.0, 34.0, 69.0, 63.0, 67.0, 80.0, 97.0, 102.0, 103.0, 90.0, 40.0, 36.0, 39.0, 63.0, 57.0, 39.0, 46.0, 52.0, 117.0, 125.0, 29.0, 41.0, 23.0, 34.0, 73.0, 54.0, 73.0, 79.0, 36.0, 41.0, 53.0, 38.0, 65.0, 60.0, 83.0, 91.0, 77.0, 75.0, 116.0, 82.0, 79.0, 85.0, 48.0, 43.0, 80.0, 99.0, 36.0, 65.0, 77.0, 73.0, 57.0, 70.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6891156559960221, "mean_inference_ms": 1.2092069581028801, "mean_action_processing_ms": 0.13325865541082607, "mean_env_wait_ms": 0.8773356894318693, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 242.0, "episode_reward_min": 12.0, "episode_reward_mean": 116.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 125.0}, "policy_reward_mean": {"ppo": 58.27}, "hist_stats": {"episode_reward": [20.0, 121.0, 63.0, 93.0, 124.0, 64.0, 187.0, 101.0, 141.0, 155.0, 66.0, 198.0, 12.0, 75.0, 31.0, 82.0, 187.0, 201.0, 47.0, 179.0, 66.0, 190.0, 196.0, 52.0, 128.0, 53.0, 76.0, 87.0, 12.0, 84.0, 82.0, 52.0, 74.0, 133.0, 210.0, 96.0, 161.0, 104.0, 12.0, 107.0, 146.0, 90.0, 55.0, 99.0, 25.0, 124.0, 242.0, 116.0, 193.0, 93.0, 45.0, 20.0, 164.0, 103.0, 154.0, 192.0, 74.0, 71.0, 149.0, 98.0, 23.0, 133.0, 159.0, 34.0, 213.0, 110.0, 216.0, 141.0, 150.0, 61.0, 195.0, 190.0, 87.0, 176.0, 71.0, 132.0, 147.0, 199.0, 193.0, 76.0, 102.0, 96.0, 98.0, 242.0, 70.0, 57.0, 127.0, 152.0, 77.0, 91.0, 125.0, 174.0, 152.0, 198.0, 164.0, 91.0, 179.0, 101.0, 150.0, 127.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 14.0, 45.0, 76.0, 31.0, 32.0, 53.0, 40.0, 59.0, 65.0, 23.0, 41.0, 98.0, 89.0, 48.0, 53.0, 62.0, 79.0, 73.0, 82.0, 37.0, 29.0, 107.0, 91.0, 6.0, 6.0, 39.0, 36.0, 17.0, 14.0, 40.0, 42.0, 82.0, 105.0, 92.0, 109.0, 22.0, 25.0, 91.0, 88.0, 37.0, 29.0, 98.0, 92.0, 89.0, 107.0, 44.0, 8.0, 57.0, 71.0, 39.0, 14.0, 37.0, 39.0, 51.0, 36.0, 3.0, 9.0, 37.0, 47.0, 39.0, 43.0, 27.0, 25.0, 41.0, 33.0, 59.0, 74.0, 114.0, 96.0, 50.0, 46.0, 81.0, 80.0, 40.0, 64.0, 3.0, 9.0, 59.0, 48.0, 71.0, 75.0, 50.0, 40.0, 26.0, 29.0, 56.0, 43.0, 14.0, 11.0, 65.0, 59.0, 117.0, 125.0, 51.0, 65.0, 89.0, 104.0, 56.0, 37.0, 20.0, 25.0, 0.0, 20.0, 74.0, 90.0, 59.0, 44.0, 81.0, 73.0, 94.0, 98.0, 37.0, 37.0, 33.0, 38.0, 67.0, 82.0, 37.0, 61.0, 6.0, 17.0, 58.0, 75.0, 74.0, 85.0, 8.0, 26.0, 104.0, 109.0, 50.0, 60.0, 113.0, 103.0, 66.0, 75.0, 77.0, 73.0, 22.0, 39.0, 105.0, 90.0, 96.0, 94.0, 50.0, 37.0, 89.0, 87.0, 37.0, 34.0, 69.0, 63.0, 67.0, 80.0, 97.0, 102.0, 103.0, 90.0, 40.0, 36.0, 39.0, 63.0, 57.0, 39.0, 46.0, 52.0, 117.0, 125.0, 29.0, 41.0, 23.0, 34.0, 73.0, 54.0, 73.0, 79.0, 36.0, 41.0, 53.0, 38.0, 65.0, 60.0, 83.0, 91.0, 77.0, 75.0, 116.0, 82.0, 79.0, 85.0, 48.0, 43.0, 80.0, 99.0, 36.0, 65.0, 77.0, 73.0, 57.0, 70.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6891156559960221, "mean_inference_ms": 1.2092069581028801, "mean_action_processing_ms": 0.13325865541082607, "mean_env_wait_ms": 0.8773356894318693, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1126400, "num_agent_steps_trained": 1126400, "num_env_steps_sampled": 563200, "num_env_steps_trained": 563200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 563200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1126400, "timers": {"training_iteration_time_ms": 3702.636, "learn_time_ms": 1114.714, "learn_throughput": 11482.762, "synch_weights_time_ms": 10.534}, "counters": {"num_env_steps_sampled": 563200, "num_env_steps_trained": 563200, "num_agent_steps_sampled": 1126400, "num_agent_steps_trained": 1126400}, "done": false, "episodes_total": 1408, "training_iteration": 44, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-48", "timestamp": 1666580568, "time_this_iter_s": 3.862396001815796, "time_total_s": 166.71115803718567, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 166.71115803718567, "timesteps_since_restore": 0, "iterations_since_restore": 44, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 22.616666666666664, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 33.4, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 62.7, "shaped_reward_min": 9, "shaped_reward_max": 96, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.31, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 8.37, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 6.48, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 14, "useful_onion_pickup_agent_1_mean": 6.92, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 2.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 1.94, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.58, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 5.17, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 6.02, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 12, "dish_pickup_agent_0_mean": 4.59, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.54, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.01, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.75, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.63, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.58, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.85, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 1.78, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.99, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.64, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.74, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 5.17, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 6.02, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 12, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.17, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 6.02, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 12, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.1368683941568192e-14, "cur_lr": 0.0010000000474974513, "total_loss": -0.003592498367652297, "policy_loss": -0.00334426062181592, "vf_loss": 5.455427169799805, "vf_explained_var": 0.41585662961006165, "kl": 0.0015013518277555704, "entropy": 1.587562084197998, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 576000, "num_env_steps_trained": 576000, "num_agent_steps_sampled": 1152000, "num_agent_steps_trained": 1152000}, "sampler_results": {"episode_reward_max": 244.0, "episode_reward_min": 9.0, "episode_reward_mean": 129.5, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 135.0}, "policy_reward_mean": {"ppo": 64.75}, "custom_metrics": {"sparse_reward_mean": 33.4, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 62.7, "shaped_reward_min": 9, "shaped_reward_max": 96, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.31, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 8.37, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 6.48, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 14, "useful_onion_pickup_agent_1_mean": 6.92, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 2.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 1.94, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.58, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 5.17, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 6.02, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 12, "dish_pickup_agent_0_mean": 4.59, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.54, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.01, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.75, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.63, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.58, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.85, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 1.78, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.99, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.64, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.74, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 5.17, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 6.02, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 12, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.17, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 6.02, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 12, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [74.0, 133.0, 210.0, 96.0, 161.0, 104.0, 12.0, 107.0, 146.0, 90.0, 55.0, 99.0, 25.0, 124.0, 242.0, 116.0, 193.0, 93.0, 45.0, 20.0, 164.0, 103.0, 154.0, 192.0, 74.0, 71.0, 149.0, 98.0, 23.0, 133.0, 159.0, 34.0, 213.0, 110.0, 216.0, 141.0, 150.0, 61.0, 195.0, 190.0, 87.0, 176.0, 71.0, 132.0, 147.0, 199.0, 193.0, 76.0, 102.0, 96.0, 98.0, 242.0, 70.0, 57.0, 127.0, 152.0, 77.0, 91.0, 125.0, 174.0, 152.0, 198.0, 164.0, 91.0, 179.0, 101.0, 150.0, 127.0, 149.0, 104.0, 136.0, 190.0, 152.0, 193.0, 118.0, 159.0, 190.0, 83.0, 123.0, 241.0, 156.0, 23.0, 139.0, 164.0, 149.0, 153.0, 124.0, 133.0, 85.0, 17.0, 110.0, 210.0, 9.0, 152.0, 157.0, 244.0, 142.0, 152.0, 222.0, 142.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [41.0, 33.0, 59.0, 74.0, 114.0, 96.0, 50.0, 46.0, 81.0, 80.0, 40.0, 64.0, 3.0, 9.0, 59.0, 48.0, 71.0, 75.0, 50.0, 40.0, 26.0, 29.0, 56.0, 43.0, 14.0, 11.0, 65.0, 59.0, 117.0, 125.0, 51.0, 65.0, 89.0, 104.0, 56.0, 37.0, 20.0, 25.0, 0.0, 20.0, 74.0, 90.0, 59.0, 44.0, 81.0, 73.0, 94.0, 98.0, 37.0, 37.0, 33.0, 38.0, 67.0, 82.0, 37.0, 61.0, 6.0, 17.0, 58.0, 75.0, 74.0, 85.0, 8.0, 26.0, 104.0, 109.0, 50.0, 60.0, 113.0, 103.0, 66.0, 75.0, 77.0, 73.0, 22.0, 39.0, 105.0, 90.0, 96.0, 94.0, 50.0, 37.0, 89.0, 87.0, 37.0, 34.0, 69.0, 63.0, 67.0, 80.0, 97.0, 102.0, 103.0, 90.0, 40.0, 36.0, 39.0, 63.0, 57.0, 39.0, 46.0, 52.0, 117.0, 125.0, 29.0, 41.0, 23.0, 34.0, 73.0, 54.0, 73.0, 79.0, 36.0, 41.0, 53.0, 38.0, 65.0, 60.0, 83.0, 91.0, 77.0, 75.0, 116.0, 82.0, 79.0, 85.0, 48.0, 43.0, 80.0, 99.0, 36.0, 65.0, 77.0, 73.0, 57.0, 70.0, 74.0, 75.0, 56.0, 48.0, 67.0, 69.0, 83.0, 107.0, 68.0, 84.0, 89.0, 104.0, 60.0, 58.0, 79.0, 80.0, 100.0, 90.0, 42.0, 41.0, 57.0, 66.0, 106.0, 135.0, 73.0, 83.0, 11.0, 12.0, 67.0, 72.0, 67.0, 97.0, 71.0, 78.0, 66.0, 87.0, 57.0, 67.0, 66.0, 67.0, 40.0, 45.0, 9.0, 8.0, 50.0, 60.0, 99.0, 111.0, 0.0, 9.0, 73.0, 79.0, 78.0, 79.0, 112.0, 132.0, 68.0, 74.0, 79.0, 73.0, 104.0, 118.0, 74.0, 68.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6890820883579823, "mean_inference_ms": 1.2106855797012448, "mean_action_processing_ms": 0.1332609226683101, "mean_env_wait_ms": 0.8777722925237638, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 244.0, "episode_reward_min": 9.0, "episode_reward_mean": 129.5, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 135.0}, "policy_reward_mean": {"ppo": 64.75}, "hist_stats": {"episode_reward": [74.0, 133.0, 210.0, 96.0, 161.0, 104.0, 12.0, 107.0, 146.0, 90.0, 55.0, 99.0, 25.0, 124.0, 242.0, 116.0, 193.0, 93.0, 45.0, 20.0, 164.0, 103.0, 154.0, 192.0, 74.0, 71.0, 149.0, 98.0, 23.0, 133.0, 159.0, 34.0, 213.0, 110.0, 216.0, 141.0, 150.0, 61.0, 195.0, 190.0, 87.0, 176.0, 71.0, 132.0, 147.0, 199.0, 193.0, 76.0, 102.0, 96.0, 98.0, 242.0, 70.0, 57.0, 127.0, 152.0, 77.0, 91.0, 125.0, 174.0, 152.0, 198.0, 164.0, 91.0, 179.0, 101.0, 150.0, 127.0, 149.0, 104.0, 136.0, 190.0, 152.0, 193.0, 118.0, 159.0, 190.0, 83.0, 123.0, 241.0, 156.0, 23.0, 139.0, 164.0, 149.0, 153.0, 124.0, 133.0, 85.0, 17.0, 110.0, 210.0, 9.0, 152.0, 157.0, 244.0, 142.0, 152.0, 222.0, 142.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [41.0, 33.0, 59.0, 74.0, 114.0, 96.0, 50.0, 46.0, 81.0, 80.0, 40.0, 64.0, 3.0, 9.0, 59.0, 48.0, 71.0, 75.0, 50.0, 40.0, 26.0, 29.0, 56.0, 43.0, 14.0, 11.0, 65.0, 59.0, 117.0, 125.0, 51.0, 65.0, 89.0, 104.0, 56.0, 37.0, 20.0, 25.0, 0.0, 20.0, 74.0, 90.0, 59.0, 44.0, 81.0, 73.0, 94.0, 98.0, 37.0, 37.0, 33.0, 38.0, 67.0, 82.0, 37.0, 61.0, 6.0, 17.0, 58.0, 75.0, 74.0, 85.0, 8.0, 26.0, 104.0, 109.0, 50.0, 60.0, 113.0, 103.0, 66.0, 75.0, 77.0, 73.0, 22.0, 39.0, 105.0, 90.0, 96.0, 94.0, 50.0, 37.0, 89.0, 87.0, 37.0, 34.0, 69.0, 63.0, 67.0, 80.0, 97.0, 102.0, 103.0, 90.0, 40.0, 36.0, 39.0, 63.0, 57.0, 39.0, 46.0, 52.0, 117.0, 125.0, 29.0, 41.0, 23.0, 34.0, 73.0, 54.0, 73.0, 79.0, 36.0, 41.0, 53.0, 38.0, 65.0, 60.0, 83.0, 91.0, 77.0, 75.0, 116.0, 82.0, 79.0, 85.0, 48.0, 43.0, 80.0, 99.0, 36.0, 65.0, 77.0, 73.0, 57.0, 70.0, 74.0, 75.0, 56.0, 48.0, 67.0, 69.0, 83.0, 107.0, 68.0, 84.0, 89.0, 104.0, 60.0, 58.0, 79.0, 80.0, 100.0, 90.0, 42.0, 41.0, 57.0, 66.0, 106.0, 135.0, 73.0, 83.0, 11.0, 12.0, 67.0, 72.0, 67.0, 97.0, 71.0, 78.0, 66.0, 87.0, 57.0, 67.0, 66.0, 67.0, 40.0, 45.0, 9.0, 8.0, 50.0, 60.0, 99.0, 111.0, 0.0, 9.0, 73.0, 79.0, 78.0, 79.0, 112.0, 132.0, 68.0, 74.0, 79.0, 73.0, 104.0, 118.0, 74.0, 68.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6890820883579823, "mean_inference_ms": 1.2106855797012448, "mean_action_processing_ms": 0.1332609226683101, "mean_env_wait_ms": 0.8777722925237638, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1152000, "num_agent_steps_trained": 1152000, "num_env_steps_sampled": 576000, "num_env_steps_trained": 576000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 576000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1152000, "timers": {"training_iteration_time_ms": 3695.92, "learn_time_ms": 1115.03, "learn_throughput": 11479.513, "synch_weights_time_ms": 10.717}, "counters": {"num_env_steps_sampled": 576000, "num_env_steps_trained": 576000, "num_agent_steps_sampled": 1152000, "num_agent_steps_trained": 1152000}, "done": false, "episodes_total": 1440, "training_iteration": 45, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-52", "timestamp": 1666580572, "time_this_iter_s": 3.6278324127197266, "time_total_s": 170.3389904499054, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 170.3389904499054, "timesteps_since_restore": 0, "iterations_since_restore": 45, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.060000000000002, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 39.2, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 66.01, "shaped_reward_min": 9, "shaped_reward_max": 98, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.41, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 8.88, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 6.7, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 14, "useful_onion_pickup_agent_1_mean": 7.37, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 2.55, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.4, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.46, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 12, "potting_onion_agent_1_mean": 6.42, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 12, "dish_pickup_agent_0_mean": 4.58, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.42, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.14, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.57, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.54, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.76, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.77, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.02, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.99, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.63, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.67, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 5.46, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 12, "optimal_onion_potting_agent_1_mean": 6.42, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 12, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.46, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 12, "viable_onion_potting_agent_1_mean": 6.42, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 12, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 5.684341970784096e-15, "cur_lr": 0.0010000000474974513, "total_loss": -0.005215235985815525, "policy_loss": -0.00502493791282177, "vf_loss": 5.936392784118652, "vf_explained_var": 0.40612637996673584, "kl": 0.0015834091464057565, "entropy": 1.5678761005401611, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 588800, "num_env_steps_trained": 588800, "num_agent_steps_sampled": 1177600, "num_agent_steps_trained": 1177600}, "sampler_results": {"episode_reward_max": 285.0, "episode_reward_min": 9.0, "episode_reward_mean": 144.41, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 146.0}, "policy_reward_mean": {"ppo": 72.205}, "custom_metrics": {"sparse_reward_mean": 39.2, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 66.01, "shaped_reward_min": 9, "shaped_reward_max": 98, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.41, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 8.88, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 6.7, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 14, "useful_onion_pickup_agent_1_mean": 7.37, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 2.55, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.4, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.46, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 12, "potting_onion_agent_1_mean": 6.42, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 12, "dish_pickup_agent_0_mean": 4.58, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.42, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.14, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.57, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.54, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.76, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.77, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.02, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.99, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.63, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.67, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 5.46, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 12, "optimal_onion_potting_agent_1_mean": 6.42, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 12, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.46, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 12, "viable_onion_potting_agent_1_mean": 6.42, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 12, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [213.0, 110.0, 216.0, 141.0, 150.0, 61.0, 195.0, 190.0, 87.0, 176.0, 71.0, 132.0, 147.0, 199.0, 193.0, 76.0, 102.0, 96.0, 98.0, 242.0, 70.0, 57.0, 127.0, 152.0, 77.0, 91.0, 125.0, 174.0, 152.0, 198.0, 164.0, 91.0, 179.0, 101.0, 150.0, 127.0, 149.0, 104.0, 136.0, 190.0, 152.0, 193.0, 118.0, 159.0, 190.0, 83.0, 123.0, 241.0, 156.0, 23.0, 139.0, 164.0, 149.0, 153.0, 124.0, 133.0, 85.0, 17.0, 110.0, 210.0, 9.0, 152.0, 157.0, 244.0, 142.0, 152.0, 222.0, 142.0, 104.0, 118.0, 101.0, 166.0, 285.0, 50.0, 134.0, 173.0, 102.0, 114.0, 156.0, 152.0, 149.0, 153.0, 196.0, 130.0, 129.0, 238.0, 219.0, 209.0, 178.0, 79.0, 188.0, 149.0, 196.0, 147.0, 85.0, 245.0, 203.0, 139.0, 158.0, 145.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [104.0, 109.0, 50.0, 60.0, 113.0, 103.0, 66.0, 75.0, 77.0, 73.0, 22.0, 39.0, 105.0, 90.0, 96.0, 94.0, 50.0, 37.0, 89.0, 87.0, 37.0, 34.0, 69.0, 63.0, 67.0, 80.0, 97.0, 102.0, 103.0, 90.0, 40.0, 36.0, 39.0, 63.0, 57.0, 39.0, 46.0, 52.0, 117.0, 125.0, 29.0, 41.0, 23.0, 34.0, 73.0, 54.0, 73.0, 79.0, 36.0, 41.0, 53.0, 38.0, 65.0, 60.0, 83.0, 91.0, 77.0, 75.0, 116.0, 82.0, 79.0, 85.0, 48.0, 43.0, 80.0, 99.0, 36.0, 65.0, 77.0, 73.0, 57.0, 70.0, 74.0, 75.0, 56.0, 48.0, 67.0, 69.0, 83.0, 107.0, 68.0, 84.0, 89.0, 104.0, 60.0, 58.0, 79.0, 80.0, 100.0, 90.0, 42.0, 41.0, 57.0, 66.0, 106.0, 135.0, 73.0, 83.0, 11.0, 12.0, 67.0, 72.0, 67.0, 97.0, 71.0, 78.0, 66.0, 87.0, 57.0, 67.0, 66.0, 67.0, 40.0, 45.0, 9.0, 8.0, 50.0, 60.0, 99.0, 111.0, 0.0, 9.0, 73.0, 79.0, 78.0, 79.0, 112.0, 132.0, 68.0, 74.0, 79.0, 73.0, 104.0, 118.0, 74.0, 68.0, 55.0, 49.0, 50.0, 68.0, 43.0, 58.0, 89.0, 77.0, 139.0, 146.0, 19.0, 31.0, 70.0, 64.0, 90.0, 83.0, 59.0, 43.0, 57.0, 57.0, 79.0, 77.0, 72.0, 80.0, 68.0, 81.0, 59.0, 94.0, 110.0, 86.0, 68.0, 62.0, 69.0, 60.0, 114.0, 124.0, 120.0, 99.0, 116.0, 93.0, 84.0, 94.0, 37.0, 42.0, 95.0, 93.0, 60.0, 89.0, 93.0, 103.0, 76.0, 71.0, 44.0, 41.0, 121.0, 124.0, 113.0, 90.0, 68.0, 71.0, 80.0, 78.0, 79.0, 66.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.689059692491561, "mean_inference_ms": 1.2121603621163812, "mean_action_processing_ms": 0.13326362140850226, "mean_env_wait_ms": 0.8780508722828152, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 285.0, "episode_reward_min": 9.0, "episode_reward_mean": 144.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 146.0}, "policy_reward_mean": {"ppo": 72.205}, "hist_stats": {"episode_reward": [213.0, 110.0, 216.0, 141.0, 150.0, 61.0, 195.0, 190.0, 87.0, 176.0, 71.0, 132.0, 147.0, 199.0, 193.0, 76.0, 102.0, 96.0, 98.0, 242.0, 70.0, 57.0, 127.0, 152.0, 77.0, 91.0, 125.0, 174.0, 152.0, 198.0, 164.0, 91.0, 179.0, 101.0, 150.0, 127.0, 149.0, 104.0, 136.0, 190.0, 152.0, 193.0, 118.0, 159.0, 190.0, 83.0, 123.0, 241.0, 156.0, 23.0, 139.0, 164.0, 149.0, 153.0, 124.0, 133.0, 85.0, 17.0, 110.0, 210.0, 9.0, 152.0, 157.0, 244.0, 142.0, 152.0, 222.0, 142.0, 104.0, 118.0, 101.0, 166.0, 285.0, 50.0, 134.0, 173.0, 102.0, 114.0, 156.0, 152.0, 149.0, 153.0, 196.0, 130.0, 129.0, 238.0, 219.0, 209.0, 178.0, 79.0, 188.0, 149.0, 196.0, 147.0, 85.0, 245.0, 203.0, 139.0, 158.0, 145.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [104.0, 109.0, 50.0, 60.0, 113.0, 103.0, 66.0, 75.0, 77.0, 73.0, 22.0, 39.0, 105.0, 90.0, 96.0, 94.0, 50.0, 37.0, 89.0, 87.0, 37.0, 34.0, 69.0, 63.0, 67.0, 80.0, 97.0, 102.0, 103.0, 90.0, 40.0, 36.0, 39.0, 63.0, 57.0, 39.0, 46.0, 52.0, 117.0, 125.0, 29.0, 41.0, 23.0, 34.0, 73.0, 54.0, 73.0, 79.0, 36.0, 41.0, 53.0, 38.0, 65.0, 60.0, 83.0, 91.0, 77.0, 75.0, 116.0, 82.0, 79.0, 85.0, 48.0, 43.0, 80.0, 99.0, 36.0, 65.0, 77.0, 73.0, 57.0, 70.0, 74.0, 75.0, 56.0, 48.0, 67.0, 69.0, 83.0, 107.0, 68.0, 84.0, 89.0, 104.0, 60.0, 58.0, 79.0, 80.0, 100.0, 90.0, 42.0, 41.0, 57.0, 66.0, 106.0, 135.0, 73.0, 83.0, 11.0, 12.0, 67.0, 72.0, 67.0, 97.0, 71.0, 78.0, 66.0, 87.0, 57.0, 67.0, 66.0, 67.0, 40.0, 45.0, 9.0, 8.0, 50.0, 60.0, 99.0, 111.0, 0.0, 9.0, 73.0, 79.0, 78.0, 79.0, 112.0, 132.0, 68.0, 74.0, 79.0, 73.0, 104.0, 118.0, 74.0, 68.0, 55.0, 49.0, 50.0, 68.0, 43.0, 58.0, 89.0, 77.0, 139.0, 146.0, 19.0, 31.0, 70.0, 64.0, 90.0, 83.0, 59.0, 43.0, 57.0, 57.0, 79.0, 77.0, 72.0, 80.0, 68.0, 81.0, 59.0, 94.0, 110.0, 86.0, 68.0, 62.0, 69.0, 60.0, 114.0, 124.0, 120.0, 99.0, 116.0, 93.0, 84.0, 94.0, 37.0, 42.0, 95.0, 93.0, 60.0, 89.0, 93.0, 103.0, 76.0, 71.0, 44.0, 41.0, 121.0, 124.0, 113.0, 90.0, 68.0, 71.0, 80.0, 78.0, 79.0, 66.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.689059692491561, "mean_inference_ms": 1.2121603621163812, "mean_action_processing_ms": 0.13326362140850226, "mean_env_wait_ms": 0.8780508722828152, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1177600, "num_agent_steps_trained": 1177600, "num_env_steps_sampled": 588800, "num_env_steps_trained": 588800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 588800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1177600, "timers": {"training_iteration_time_ms": 3694.313, "learn_time_ms": 1112.334, "learn_throughput": 11507.338, "synch_weights_time_ms": 10.858}, "counters": {"num_env_steps_sampled": 588800, "num_env_steps_trained": 588800, "num_agent_steps_sampled": 1177600, "num_agent_steps_trained": 1177600}, "done": false, "episodes_total": 1472, "training_iteration": 46, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-55", "timestamp": 1666580575, "time_this_iter_s": 3.6903653144836426, "time_total_s": 174.02935576438904, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 174.02935576438904, "timesteps_since_restore": 0, "iterations_since_restore": 46, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.340000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 43.0, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 67.15, "shaped_reward_min": 9, "shaped_reward_max": 98, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.24, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 8.83, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 6.86, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 14, "useful_onion_pickup_agent_1_mean": 7.58, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 2.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.83, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.51, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 13, "potting_onion_agent_1_mean": 6.71, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 12, "dish_pickup_agent_0_mean": 4.78, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.63, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.04, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 1.04, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.72, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.57, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 2.65, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.94, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.04, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.15, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.53, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.66, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 5.51, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 13, "optimal_onion_potting_agent_1_mean": 6.71, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 12, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.51, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 13, "viable_onion_potting_agent_1_mean": 6.71, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 12, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.842170985392048e-15, "cur_lr": 0.0010000000474974513, "total_loss": -0.003965587355196476, "policy_loss": -0.003803286934271455, "vf_loss": 6.179717540740967, "vf_explained_var": 0.41344791650772095, "kl": 0.0016391698736697435, "entropy": 1.5605473518371582, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 601600, "num_env_steps_trained": 601600, "num_agent_steps_sampled": 1203200, "num_agent_steps_trained": 1203200}, "sampler_results": {"episode_reward_max": 285.0, "episode_reward_min": 9.0, "episode_reward_mean": 153.15, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 153.0}, "policy_reward_mean": {"ppo": 76.575}, "custom_metrics": {"sparse_reward_mean": 43.0, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 67.15, "shaped_reward_min": 9, "shaped_reward_max": 98, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.24, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 8.83, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 6.86, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 14, "useful_onion_pickup_agent_1_mean": 7.58, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 2.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.83, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.51, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 13, "potting_onion_agent_1_mean": 6.71, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 12, "dish_pickup_agent_0_mean": 4.78, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.63, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.04, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 1.04, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.72, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.57, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 2.65, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.94, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.04, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.15, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.53, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.66, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 5.51, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 13, "optimal_onion_potting_agent_1_mean": 6.71, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 12, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.51, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 13, "viable_onion_potting_agent_1_mean": 6.71, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 12, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [179.0, 101.0, 150.0, 127.0, 149.0, 104.0, 136.0, 190.0, 152.0, 193.0, 118.0, 159.0, 190.0, 83.0, 123.0, 241.0, 156.0, 23.0, 139.0, 164.0, 149.0, 153.0, 124.0, 133.0, 85.0, 17.0, 110.0, 210.0, 9.0, 152.0, 157.0, 244.0, 142.0, 152.0, 222.0, 142.0, 104.0, 118.0, 101.0, 166.0, 285.0, 50.0, 134.0, 173.0, 102.0, 114.0, 156.0, 152.0, 149.0, 153.0, 196.0, 130.0, 129.0, 238.0, 219.0, 209.0, 178.0, 79.0, 188.0, 149.0, 196.0, 147.0, 85.0, 245.0, 203.0, 139.0, 158.0, 145.0, 133.0, 147.0, 136.0, 241.0, 192.0, 190.0, 91.0, 88.0, 90.0, 117.0, 136.0, 193.0, 96.0, 115.0, 64.0, 250.0, 133.0, 238.0, 187.0, 87.0, 285.0, 161.0, 161.0, 204.0, 104.0, 204.0, 241.0, 193.0, 243.0, 136.0, 236.0, 155.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [80.0, 99.0, 36.0, 65.0, 77.0, 73.0, 57.0, 70.0, 74.0, 75.0, 56.0, 48.0, 67.0, 69.0, 83.0, 107.0, 68.0, 84.0, 89.0, 104.0, 60.0, 58.0, 79.0, 80.0, 100.0, 90.0, 42.0, 41.0, 57.0, 66.0, 106.0, 135.0, 73.0, 83.0, 11.0, 12.0, 67.0, 72.0, 67.0, 97.0, 71.0, 78.0, 66.0, 87.0, 57.0, 67.0, 66.0, 67.0, 40.0, 45.0, 9.0, 8.0, 50.0, 60.0, 99.0, 111.0, 0.0, 9.0, 73.0, 79.0, 78.0, 79.0, 112.0, 132.0, 68.0, 74.0, 79.0, 73.0, 104.0, 118.0, 74.0, 68.0, 55.0, 49.0, 50.0, 68.0, 43.0, 58.0, 89.0, 77.0, 139.0, 146.0, 19.0, 31.0, 70.0, 64.0, 90.0, 83.0, 59.0, 43.0, 57.0, 57.0, 79.0, 77.0, 72.0, 80.0, 68.0, 81.0, 59.0, 94.0, 110.0, 86.0, 68.0, 62.0, 69.0, 60.0, 114.0, 124.0, 120.0, 99.0, 116.0, 93.0, 84.0, 94.0, 37.0, 42.0, 95.0, 93.0, 60.0, 89.0, 93.0, 103.0, 76.0, 71.0, 44.0, 41.0, 121.0, 124.0, 113.0, 90.0, 68.0, 71.0, 80.0, 78.0, 79.0, 66.0, 61.0, 72.0, 70.0, 77.0, 79.0, 57.0, 114.0, 127.0, 84.0, 108.0, 88.0, 102.0, 42.0, 49.0, 40.0, 48.0, 45.0, 45.0, 49.0, 68.0, 77.0, 59.0, 89.0, 104.0, 44.0, 52.0, 56.0, 59.0, 25.0, 39.0, 124.0, 126.0, 63.0, 70.0, 110.0, 128.0, 94.0, 93.0, 50.0, 37.0, 132.0, 153.0, 82.0, 79.0, 79.0, 82.0, 108.0, 96.0, 45.0, 59.0, 110.0, 94.0, 122.0, 119.0, 88.0, 105.0, 118.0, 125.0, 71.0, 65.0, 116.0, 120.0, 64.0, 91.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.688973223754642, "mean_inference_ms": 1.2122168487178766, "mean_action_processing_ms": 0.13326004234755068, "mean_env_wait_ms": 0.8776148200734798, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 285.0, "episode_reward_min": 9.0, "episode_reward_mean": 153.15, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 153.0}, "policy_reward_mean": {"ppo": 76.575}, "hist_stats": {"episode_reward": [179.0, 101.0, 150.0, 127.0, 149.0, 104.0, 136.0, 190.0, 152.0, 193.0, 118.0, 159.0, 190.0, 83.0, 123.0, 241.0, 156.0, 23.0, 139.0, 164.0, 149.0, 153.0, 124.0, 133.0, 85.0, 17.0, 110.0, 210.0, 9.0, 152.0, 157.0, 244.0, 142.0, 152.0, 222.0, 142.0, 104.0, 118.0, 101.0, 166.0, 285.0, 50.0, 134.0, 173.0, 102.0, 114.0, 156.0, 152.0, 149.0, 153.0, 196.0, 130.0, 129.0, 238.0, 219.0, 209.0, 178.0, 79.0, 188.0, 149.0, 196.0, 147.0, 85.0, 245.0, 203.0, 139.0, 158.0, 145.0, 133.0, 147.0, 136.0, 241.0, 192.0, 190.0, 91.0, 88.0, 90.0, 117.0, 136.0, 193.0, 96.0, 115.0, 64.0, 250.0, 133.0, 238.0, 187.0, 87.0, 285.0, 161.0, 161.0, 204.0, 104.0, 204.0, 241.0, 193.0, 243.0, 136.0, 236.0, 155.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [80.0, 99.0, 36.0, 65.0, 77.0, 73.0, 57.0, 70.0, 74.0, 75.0, 56.0, 48.0, 67.0, 69.0, 83.0, 107.0, 68.0, 84.0, 89.0, 104.0, 60.0, 58.0, 79.0, 80.0, 100.0, 90.0, 42.0, 41.0, 57.0, 66.0, 106.0, 135.0, 73.0, 83.0, 11.0, 12.0, 67.0, 72.0, 67.0, 97.0, 71.0, 78.0, 66.0, 87.0, 57.0, 67.0, 66.0, 67.0, 40.0, 45.0, 9.0, 8.0, 50.0, 60.0, 99.0, 111.0, 0.0, 9.0, 73.0, 79.0, 78.0, 79.0, 112.0, 132.0, 68.0, 74.0, 79.0, 73.0, 104.0, 118.0, 74.0, 68.0, 55.0, 49.0, 50.0, 68.0, 43.0, 58.0, 89.0, 77.0, 139.0, 146.0, 19.0, 31.0, 70.0, 64.0, 90.0, 83.0, 59.0, 43.0, 57.0, 57.0, 79.0, 77.0, 72.0, 80.0, 68.0, 81.0, 59.0, 94.0, 110.0, 86.0, 68.0, 62.0, 69.0, 60.0, 114.0, 124.0, 120.0, 99.0, 116.0, 93.0, 84.0, 94.0, 37.0, 42.0, 95.0, 93.0, 60.0, 89.0, 93.0, 103.0, 76.0, 71.0, 44.0, 41.0, 121.0, 124.0, 113.0, 90.0, 68.0, 71.0, 80.0, 78.0, 79.0, 66.0, 61.0, 72.0, 70.0, 77.0, 79.0, 57.0, 114.0, 127.0, 84.0, 108.0, 88.0, 102.0, 42.0, 49.0, 40.0, 48.0, 45.0, 45.0, 49.0, 68.0, 77.0, 59.0, 89.0, 104.0, 44.0, 52.0, 56.0, 59.0, 25.0, 39.0, 124.0, 126.0, 63.0, 70.0, 110.0, 128.0, 94.0, 93.0, 50.0, 37.0, 132.0, 153.0, 82.0, 79.0, 79.0, 82.0, 108.0, 96.0, 45.0, 59.0, 110.0, 94.0, 122.0, 119.0, 88.0, 105.0, 118.0, 125.0, 71.0, 65.0, 116.0, 120.0, 64.0, 91.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.688973223754642, "mean_inference_ms": 1.2122168487178766, "mean_action_processing_ms": 0.13326004234755068, "mean_env_wait_ms": 0.8776148200734798, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1203200, "num_agent_steps_trained": 1203200, "num_env_steps_sampled": 601600, "num_env_steps_trained": 601600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 601600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1203200, "timers": {"training_iteration_time_ms": 3688.223, "learn_time_ms": 1109.916, "learn_throughput": 11532.4, "synch_weights_time_ms": 10.615}, "counters": {"num_env_steps_sampled": 601600, "num_env_steps_trained": 601600, "num_agent_steps_sampled": 1203200, "num_agent_steps_trained": 1203200}, "done": false, "episodes_total": 1504, "training_iteration": 47, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-59", "timestamp": 1666580579, "time_this_iter_s": 3.698033094406128, "time_total_s": 177.72738885879517, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 177.72738885879517, "timesteps_since_restore": 0, "iterations_since_restore": 47, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.03333333333333, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 46.4, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 68.81, "shaped_reward_min": 12, "shaped_reward_max": 101, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.2, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 8.96, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 7.01, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 7.79, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 2.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.8, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.96, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 1.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.71, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 13, "potting_onion_agent_1_mean": 6.87, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 14, "dish_pickup_agent_0_mean": 5.09, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.61, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.05, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 1.04, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 2.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.83, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.93, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.58, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.69, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.91, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.21, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.12, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.44, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.64, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 5.71, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 13, "optimal_onion_potting_agent_1_mean": 6.87, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 14, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.71, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 13, "viable_onion_potting_agent_1_mean": 6.87, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 14, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.421085492696024e-15, "cur_lr": 0.0010000000474974513, "total_loss": -0.004614857956767082, "policy_loss": -0.004442564211785793, "vf_loss": 6.058649063110352, "vf_explained_var": 0.4786253869533539, "kl": 0.0018073207465931773, "entropy": 1.556322693824768, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 614400, "num_env_steps_trained": 614400, "num_agent_steps_sampled": 1228800, "num_agent_steps_trained": 1228800}, "sampler_results": {"episode_reward_max": 299.0, "episode_reward_min": 12.0, "episode_reward_mean": 161.61, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 153.0}, "policy_reward_mean": {"ppo": 80.805}, "custom_metrics": {"sparse_reward_mean": 46.4, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 68.81, "shaped_reward_min": 12, "shaped_reward_max": 101, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.2, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 8.96, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 7.01, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 7.79, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 2.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.8, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.96, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 1.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.71, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 13, "potting_onion_agent_1_mean": 6.87, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 14, "dish_pickup_agent_0_mean": 5.09, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.61, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.05, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 1.04, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 2.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.83, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.93, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.58, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.69, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.91, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.21, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.12, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.44, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.64, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 5.71, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 13, "optimal_onion_potting_agent_1_mean": 6.87, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 14, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.71, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 13, "viable_onion_potting_agent_1_mean": 6.87, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 14, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [142.0, 152.0, 222.0, 142.0, 104.0, 118.0, 101.0, 166.0, 285.0, 50.0, 134.0, 173.0, 102.0, 114.0, 156.0, 152.0, 149.0, 153.0, 196.0, 130.0, 129.0, 238.0, 219.0, 209.0, 178.0, 79.0, 188.0, 149.0, 196.0, 147.0, 85.0, 245.0, 203.0, 139.0, 158.0, 145.0, 133.0, 147.0, 136.0, 241.0, 192.0, 190.0, 91.0, 88.0, 90.0, 117.0, 136.0, 193.0, 96.0, 115.0, 64.0, 250.0, 133.0, 238.0, 187.0, 87.0, 285.0, 161.0, 161.0, 204.0, 104.0, 204.0, 241.0, 193.0, 243.0, 136.0, 236.0, 155.0, 147.0, 95.0, 158.0, 141.0, 66.0, 132.0, 182.0, 253.0, 106.0, 185.0, 130.0, 158.0, 133.0, 299.0, 123.0, 246.0, 187.0, 74.0, 230.0, 12.0, 290.0, 155.0, 88.0, 158.0, 258.0, 160.0, 193.0, 252.0, 244.0, 204.0, 120.0, 87.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [68.0, 74.0, 79.0, 73.0, 104.0, 118.0, 74.0, 68.0, 55.0, 49.0, 50.0, 68.0, 43.0, 58.0, 89.0, 77.0, 139.0, 146.0, 19.0, 31.0, 70.0, 64.0, 90.0, 83.0, 59.0, 43.0, 57.0, 57.0, 79.0, 77.0, 72.0, 80.0, 68.0, 81.0, 59.0, 94.0, 110.0, 86.0, 68.0, 62.0, 69.0, 60.0, 114.0, 124.0, 120.0, 99.0, 116.0, 93.0, 84.0, 94.0, 37.0, 42.0, 95.0, 93.0, 60.0, 89.0, 93.0, 103.0, 76.0, 71.0, 44.0, 41.0, 121.0, 124.0, 113.0, 90.0, 68.0, 71.0, 80.0, 78.0, 79.0, 66.0, 61.0, 72.0, 70.0, 77.0, 79.0, 57.0, 114.0, 127.0, 84.0, 108.0, 88.0, 102.0, 42.0, 49.0, 40.0, 48.0, 45.0, 45.0, 49.0, 68.0, 77.0, 59.0, 89.0, 104.0, 44.0, 52.0, 56.0, 59.0, 25.0, 39.0, 124.0, 126.0, 63.0, 70.0, 110.0, 128.0, 94.0, 93.0, 50.0, 37.0, 132.0, 153.0, 82.0, 79.0, 79.0, 82.0, 108.0, 96.0, 45.0, 59.0, 110.0, 94.0, 122.0, 119.0, 88.0, 105.0, 118.0, 125.0, 71.0, 65.0, 116.0, 120.0, 64.0, 91.0, 69.0, 78.0, 45.0, 50.0, 78.0, 80.0, 62.0, 79.0, 40.0, 26.0, 55.0, 77.0, 94.0, 88.0, 124.0, 129.0, 51.0, 55.0, 91.0, 94.0, 62.0, 68.0, 67.0, 91.0, 68.0, 65.0, 148.0, 151.0, 57.0, 66.0, 129.0, 117.0, 87.0, 100.0, 39.0, 35.0, 123.0, 107.0, 3.0, 9.0, 146.0, 144.0, 66.0, 89.0, 37.0, 51.0, 61.0, 97.0, 142.0, 116.0, 68.0, 92.0, 87.0, 106.0, 105.0, 147.0, 120.0, 124.0, 91.0, 113.0, 52.0, 68.0, 51.0, 36.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6888171226626579, "mean_inference_ms": 1.2119425342109795, "mean_action_processing_ms": 0.13324135084796007, "mean_env_wait_ms": 0.8770887664610575, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 299.0, "episode_reward_min": 12.0, "episode_reward_mean": 161.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 153.0}, "policy_reward_mean": {"ppo": 80.805}, "hist_stats": {"episode_reward": [142.0, 152.0, 222.0, 142.0, 104.0, 118.0, 101.0, 166.0, 285.0, 50.0, 134.0, 173.0, 102.0, 114.0, 156.0, 152.0, 149.0, 153.0, 196.0, 130.0, 129.0, 238.0, 219.0, 209.0, 178.0, 79.0, 188.0, 149.0, 196.0, 147.0, 85.0, 245.0, 203.0, 139.0, 158.0, 145.0, 133.0, 147.0, 136.0, 241.0, 192.0, 190.0, 91.0, 88.0, 90.0, 117.0, 136.0, 193.0, 96.0, 115.0, 64.0, 250.0, 133.0, 238.0, 187.0, 87.0, 285.0, 161.0, 161.0, 204.0, 104.0, 204.0, 241.0, 193.0, 243.0, 136.0, 236.0, 155.0, 147.0, 95.0, 158.0, 141.0, 66.0, 132.0, 182.0, 253.0, 106.0, 185.0, 130.0, 158.0, 133.0, 299.0, 123.0, 246.0, 187.0, 74.0, 230.0, 12.0, 290.0, 155.0, 88.0, 158.0, 258.0, 160.0, 193.0, 252.0, 244.0, 204.0, 120.0, 87.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [68.0, 74.0, 79.0, 73.0, 104.0, 118.0, 74.0, 68.0, 55.0, 49.0, 50.0, 68.0, 43.0, 58.0, 89.0, 77.0, 139.0, 146.0, 19.0, 31.0, 70.0, 64.0, 90.0, 83.0, 59.0, 43.0, 57.0, 57.0, 79.0, 77.0, 72.0, 80.0, 68.0, 81.0, 59.0, 94.0, 110.0, 86.0, 68.0, 62.0, 69.0, 60.0, 114.0, 124.0, 120.0, 99.0, 116.0, 93.0, 84.0, 94.0, 37.0, 42.0, 95.0, 93.0, 60.0, 89.0, 93.0, 103.0, 76.0, 71.0, 44.0, 41.0, 121.0, 124.0, 113.0, 90.0, 68.0, 71.0, 80.0, 78.0, 79.0, 66.0, 61.0, 72.0, 70.0, 77.0, 79.0, 57.0, 114.0, 127.0, 84.0, 108.0, 88.0, 102.0, 42.0, 49.0, 40.0, 48.0, 45.0, 45.0, 49.0, 68.0, 77.0, 59.0, 89.0, 104.0, 44.0, 52.0, 56.0, 59.0, 25.0, 39.0, 124.0, 126.0, 63.0, 70.0, 110.0, 128.0, 94.0, 93.0, 50.0, 37.0, 132.0, 153.0, 82.0, 79.0, 79.0, 82.0, 108.0, 96.0, 45.0, 59.0, 110.0, 94.0, 122.0, 119.0, 88.0, 105.0, 118.0, 125.0, 71.0, 65.0, 116.0, 120.0, 64.0, 91.0, 69.0, 78.0, 45.0, 50.0, 78.0, 80.0, 62.0, 79.0, 40.0, 26.0, 55.0, 77.0, 94.0, 88.0, 124.0, 129.0, 51.0, 55.0, 91.0, 94.0, 62.0, 68.0, 67.0, 91.0, 68.0, 65.0, 148.0, 151.0, 57.0, 66.0, 129.0, 117.0, 87.0, 100.0, 39.0, 35.0, 123.0, 107.0, 3.0, 9.0, 146.0, 144.0, 66.0, 89.0, 37.0, 51.0, 61.0, 97.0, 142.0, 116.0, 68.0, 92.0, 87.0, 106.0, 105.0, 147.0, 120.0, 124.0, 91.0, 113.0, 52.0, 68.0, 51.0, 36.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6888171226626579, "mean_inference_ms": 1.2119425342109795, "mean_action_processing_ms": 0.13324135084796007, "mean_env_wait_ms": 0.8770887664610575, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1228800, "num_agent_steps_trained": 1228800, "num_env_steps_sampled": 614400, "num_env_steps_trained": 614400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 614400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1228800, "timers": {"training_iteration_time_ms": 3673.047, "learn_time_ms": 1105.463, "learn_throughput": 11578.86, "synch_weights_time_ms": 10.566}, "counters": {"num_env_steps_sampled": 614400, "num_env_steps_trained": 614400, "num_agent_steps_sampled": 1228800, "num_agent_steps_trained": 1228800}, "done": false, "episodes_total": 1536, "training_iteration": 48, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-03", "timestamp": 1666580583, "time_this_iter_s": 3.613640546798706, "time_total_s": 181.34102940559387, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 181.34102940559387, "timesteps_since_restore": 0, "iterations_since_restore": 48, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.92, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 50.8, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 71.75, "shaped_reward_min": 12, "shaped_reward_max": 118, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.53, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 9.02, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 7.31, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.12, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 2.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.87, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 6.01, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 13, "potting_onion_agent_1_mean": 7.16, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 14, "dish_pickup_agent_0_mean": 5.13, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 1.12, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.06, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 2.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.93, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.72, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.76, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.05, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.24, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.29, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.42, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.6, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 6.01, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 13, "optimal_onion_potting_agent_1_mean": 7.16, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 14, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.01, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 13, "viable_onion_potting_agent_1_mean": 7.16, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 14, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 7.10542746348012e-16, "cur_lr": 0.0010000000474974513, "total_loss": -0.00159242725931108, "policy_loss": -0.001501859282143414, "vf_loss": 6.712205410003662, "vf_explained_var": 0.4798794388771057, "kl": 0.0016322416486218572, "entropy": 1.5235841274261475, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 627200, "num_env_steps_trained": 627200, "num_agent_steps_sampled": 1254400, "num_agent_steps_trained": 1254400}, "sampler_results": {"episode_reward_max": 318.0, "episode_reward_min": 12.0, "episode_reward_mean": 173.35, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 173.0}, "policy_reward_mean": {"ppo": 86.675}, "custom_metrics": {"sparse_reward_mean": 50.8, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 71.75, "shaped_reward_min": 12, "shaped_reward_max": 118, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.53, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 9.02, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 7.31, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.12, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 2.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.87, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 6.01, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 13, "potting_onion_agent_1_mean": 7.16, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 14, "dish_pickup_agent_0_mean": 5.13, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 1.12, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.06, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 2.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.93, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.72, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.76, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.05, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.24, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.29, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.42, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.6, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 6.01, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 13, "optimal_onion_potting_agent_1_mean": 7.16, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 14, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.01, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 13, "viable_onion_potting_agent_1_mean": 7.16, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 14, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [203.0, 139.0, 158.0, 145.0, 133.0, 147.0, 136.0, 241.0, 192.0, 190.0, 91.0, 88.0, 90.0, 117.0, 136.0, 193.0, 96.0, 115.0, 64.0, 250.0, 133.0, 238.0, 187.0, 87.0, 285.0, 161.0, 161.0, 204.0, 104.0, 204.0, 241.0, 193.0, 243.0, 136.0, 236.0, 155.0, 147.0, 95.0, 158.0, 141.0, 66.0, 132.0, 182.0, 253.0, 106.0, 185.0, 130.0, 158.0, 133.0, 299.0, 123.0, 246.0, 187.0, 74.0, 230.0, 12.0, 290.0, 155.0, 88.0, 158.0, 258.0, 160.0, 193.0, 252.0, 244.0, 204.0, 120.0, 87.0, 134.0, 209.0, 85.0, 90.0, 55.0, 142.0, 189.0, 278.0, 239.0, 236.0, 110.0, 192.0, 253.0, 244.0, 195.0, 147.0, 299.0, 276.0, 213.0, 170.0, 206.0, 184.0, 233.0, 318.0, 207.0, 302.0, 199.0, 141.0, 80.0, 207.0, 189.0, 155.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [113.0, 90.0, 68.0, 71.0, 80.0, 78.0, 79.0, 66.0, 61.0, 72.0, 70.0, 77.0, 79.0, 57.0, 114.0, 127.0, 84.0, 108.0, 88.0, 102.0, 42.0, 49.0, 40.0, 48.0, 45.0, 45.0, 49.0, 68.0, 77.0, 59.0, 89.0, 104.0, 44.0, 52.0, 56.0, 59.0, 25.0, 39.0, 124.0, 126.0, 63.0, 70.0, 110.0, 128.0, 94.0, 93.0, 50.0, 37.0, 132.0, 153.0, 82.0, 79.0, 79.0, 82.0, 108.0, 96.0, 45.0, 59.0, 110.0, 94.0, 122.0, 119.0, 88.0, 105.0, 118.0, 125.0, 71.0, 65.0, 116.0, 120.0, 64.0, 91.0, 69.0, 78.0, 45.0, 50.0, 78.0, 80.0, 62.0, 79.0, 40.0, 26.0, 55.0, 77.0, 94.0, 88.0, 124.0, 129.0, 51.0, 55.0, 91.0, 94.0, 62.0, 68.0, 67.0, 91.0, 68.0, 65.0, 148.0, 151.0, 57.0, 66.0, 129.0, 117.0, 87.0, 100.0, 39.0, 35.0, 123.0, 107.0, 3.0, 9.0, 146.0, 144.0, 66.0, 89.0, 37.0, 51.0, 61.0, 97.0, 142.0, 116.0, 68.0, 92.0, 87.0, 106.0, 105.0, 147.0, 120.0, 124.0, 91.0, 113.0, 52.0, 68.0, 51.0, 36.0, 79.0, 55.0, 100.0, 109.0, 29.0, 56.0, 46.0, 44.0, 30.0, 25.0, 60.0, 82.0, 103.0, 86.0, 131.0, 147.0, 122.0, 117.0, 118.0, 118.0, 67.0, 43.0, 105.0, 87.0, 133.0, 120.0, 110.0, 134.0, 97.0, 98.0, 82.0, 65.0, 143.0, 156.0, 136.0, 140.0, 91.0, 122.0, 98.0, 72.0, 104.0, 102.0, 70.0, 114.0, 103.0, 130.0, 145.0, 173.0, 112.0, 95.0, 156.0, 146.0, 80.0, 119.0, 64.0, 77.0, 37.0, 43.0, 112.0, 95.0, 97.0, 92.0, 87.0, 68.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6887080065883876, "mean_inference_ms": 1.2116564477850689, "mean_action_processing_ms": 0.13322398997678847, "mean_env_wait_ms": 0.8764812890451134, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 318.0, "episode_reward_min": 12.0, "episode_reward_mean": 173.35, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 173.0}, "policy_reward_mean": {"ppo": 86.675}, "hist_stats": {"episode_reward": [203.0, 139.0, 158.0, 145.0, 133.0, 147.0, 136.0, 241.0, 192.0, 190.0, 91.0, 88.0, 90.0, 117.0, 136.0, 193.0, 96.0, 115.0, 64.0, 250.0, 133.0, 238.0, 187.0, 87.0, 285.0, 161.0, 161.0, 204.0, 104.0, 204.0, 241.0, 193.0, 243.0, 136.0, 236.0, 155.0, 147.0, 95.0, 158.0, 141.0, 66.0, 132.0, 182.0, 253.0, 106.0, 185.0, 130.0, 158.0, 133.0, 299.0, 123.0, 246.0, 187.0, 74.0, 230.0, 12.0, 290.0, 155.0, 88.0, 158.0, 258.0, 160.0, 193.0, 252.0, 244.0, 204.0, 120.0, 87.0, 134.0, 209.0, 85.0, 90.0, 55.0, 142.0, 189.0, 278.0, 239.0, 236.0, 110.0, 192.0, 253.0, 244.0, 195.0, 147.0, 299.0, 276.0, 213.0, 170.0, 206.0, 184.0, 233.0, 318.0, 207.0, 302.0, 199.0, 141.0, 80.0, 207.0, 189.0, 155.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [113.0, 90.0, 68.0, 71.0, 80.0, 78.0, 79.0, 66.0, 61.0, 72.0, 70.0, 77.0, 79.0, 57.0, 114.0, 127.0, 84.0, 108.0, 88.0, 102.0, 42.0, 49.0, 40.0, 48.0, 45.0, 45.0, 49.0, 68.0, 77.0, 59.0, 89.0, 104.0, 44.0, 52.0, 56.0, 59.0, 25.0, 39.0, 124.0, 126.0, 63.0, 70.0, 110.0, 128.0, 94.0, 93.0, 50.0, 37.0, 132.0, 153.0, 82.0, 79.0, 79.0, 82.0, 108.0, 96.0, 45.0, 59.0, 110.0, 94.0, 122.0, 119.0, 88.0, 105.0, 118.0, 125.0, 71.0, 65.0, 116.0, 120.0, 64.0, 91.0, 69.0, 78.0, 45.0, 50.0, 78.0, 80.0, 62.0, 79.0, 40.0, 26.0, 55.0, 77.0, 94.0, 88.0, 124.0, 129.0, 51.0, 55.0, 91.0, 94.0, 62.0, 68.0, 67.0, 91.0, 68.0, 65.0, 148.0, 151.0, 57.0, 66.0, 129.0, 117.0, 87.0, 100.0, 39.0, 35.0, 123.0, 107.0, 3.0, 9.0, 146.0, 144.0, 66.0, 89.0, 37.0, 51.0, 61.0, 97.0, 142.0, 116.0, 68.0, 92.0, 87.0, 106.0, 105.0, 147.0, 120.0, 124.0, 91.0, 113.0, 52.0, 68.0, 51.0, 36.0, 79.0, 55.0, 100.0, 109.0, 29.0, 56.0, 46.0, 44.0, 30.0, 25.0, 60.0, 82.0, 103.0, 86.0, 131.0, 147.0, 122.0, 117.0, 118.0, 118.0, 67.0, 43.0, 105.0, 87.0, 133.0, 120.0, 110.0, 134.0, 97.0, 98.0, 82.0, 65.0, 143.0, 156.0, 136.0, 140.0, 91.0, 122.0, 98.0, 72.0, 104.0, 102.0, 70.0, 114.0, 103.0, 130.0, 145.0, 173.0, 112.0, 95.0, 156.0, 146.0, 80.0, 119.0, 64.0, 77.0, 37.0, 43.0, 112.0, 95.0, 97.0, 92.0, 87.0, 68.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6887080065883876, "mean_inference_ms": 1.2116564477850689, "mean_action_processing_ms": 0.13322398997678847, "mean_env_wait_ms": 0.8764812890451134, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1254400, "num_agent_steps_trained": 1254400, "num_env_steps_sampled": 627200, "num_env_steps_trained": 627200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 627200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1254400, "timers": {"training_iteration_time_ms": 3667.292, "learn_time_ms": 1097.604, "learn_throughput": 11661.76, "synch_weights_time_ms": 10.726}, "counters": {"num_env_steps_sampled": 627200, "num_env_steps_trained": 627200, "num_agent_steps_sampled": 1254400, "num_agent_steps_trained": 1254400}, "done": false, "episodes_total": 1568, "training_iteration": 49, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-07", "timestamp": 1666580587, "time_this_iter_s": 3.711414098739624, "time_total_s": 185.0524435043335, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 185.0524435043335, "timesteps_since_restore": 0, "iterations_since_restore": 49, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.000000000000004, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 57.0, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 73.04, "shaped_reward_min": 9, "shaped_reward_max": 118, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.56, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 9.37, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 7.32, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.37, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 15, "onion_drop_agent_0_mean": 1.9, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.7, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.95, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.88, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 6.22, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 12, "potting_onion_agent_1_mean": 7.38, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 5.1, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 1.04, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.14, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 2.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.75, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.69, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 3.02, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.41, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.4, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.48, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 6.22, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 12, "optimal_onion_potting_agent_1_mean": 7.38, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.22, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 12, "viable_onion_potting_agent_1_mean": 7.38, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.55271373174006e-16, "cur_lr": 0.0010000000474974513, "total_loss": -0.004688685759902, "policy_loss": -0.004600519314408302, "vf_loss": 6.720330238342285, "vf_explained_var": 0.5159987807273865, "kl": 0.0016663586720824242, "entropy": 1.5204063653945923, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 640000, "num_env_steps_trained": 640000, "num_agent_steps_sampled": 1280000, "num_agent_steps_trained": 1280000}, "sampler_results": {"episode_reward_max": 338.0, "episode_reward_min": 9.0, "episode_reward_mean": 187.04, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 176.0}, "policy_reward_mean": {"ppo": 93.52}, "custom_metrics": {"sparse_reward_mean": 57.0, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 73.04, "shaped_reward_min": 9, "shaped_reward_max": 118, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.56, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 9.37, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 7.32, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.37, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 15, "onion_drop_agent_0_mean": 1.9, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.7, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.95, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.88, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 6.22, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 12, "potting_onion_agent_1_mean": 7.38, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 5.1, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 1.04, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.14, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 2.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.75, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.69, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 3.02, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.41, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.4, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.48, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 6.22, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 12, "optimal_onion_potting_agent_1_mean": 7.38, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.22, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 12, "viable_onion_potting_agent_1_mean": 7.38, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [243.0, 136.0, 236.0, 155.0, 147.0, 95.0, 158.0, 141.0, 66.0, 132.0, 182.0, 253.0, 106.0, 185.0, 130.0, 158.0, 133.0, 299.0, 123.0, 246.0, 187.0, 74.0, 230.0, 12.0, 290.0, 155.0, 88.0, 158.0, 258.0, 160.0, 193.0, 252.0, 244.0, 204.0, 120.0, 87.0, 134.0, 209.0, 85.0, 90.0, 55.0, 142.0, 189.0, 278.0, 239.0, 236.0, 110.0, 192.0, 253.0, 244.0, 195.0, 147.0, 299.0, 276.0, 213.0, 170.0, 206.0, 184.0, 233.0, 318.0, 207.0, 302.0, 199.0, 141.0, 80.0, 207.0, 189.0, 155.0, 213.0, 287.0, 250.0, 196.0, 193.0, 93.0, 131.0, 292.0, 281.0, 282.0, 247.0, 247.0, 132.0, 227.0, 9.0, 284.0, 292.0, 110.0, 195.0, 145.0, 165.0, 338.0, 279.0, 193.0, 244.0, 136.0, 187.0, 285.0, 12.0, 188.0, 238.0, 120.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [118.0, 125.0, 71.0, 65.0, 116.0, 120.0, 64.0, 91.0, 69.0, 78.0, 45.0, 50.0, 78.0, 80.0, 62.0, 79.0, 40.0, 26.0, 55.0, 77.0, 94.0, 88.0, 124.0, 129.0, 51.0, 55.0, 91.0, 94.0, 62.0, 68.0, 67.0, 91.0, 68.0, 65.0, 148.0, 151.0, 57.0, 66.0, 129.0, 117.0, 87.0, 100.0, 39.0, 35.0, 123.0, 107.0, 3.0, 9.0, 146.0, 144.0, 66.0, 89.0, 37.0, 51.0, 61.0, 97.0, 142.0, 116.0, 68.0, 92.0, 87.0, 106.0, 105.0, 147.0, 120.0, 124.0, 91.0, 113.0, 52.0, 68.0, 51.0, 36.0, 79.0, 55.0, 100.0, 109.0, 29.0, 56.0, 46.0, 44.0, 30.0, 25.0, 60.0, 82.0, 103.0, 86.0, 131.0, 147.0, 122.0, 117.0, 118.0, 118.0, 67.0, 43.0, 105.0, 87.0, 133.0, 120.0, 110.0, 134.0, 97.0, 98.0, 82.0, 65.0, 143.0, 156.0, 136.0, 140.0, 91.0, 122.0, 98.0, 72.0, 104.0, 102.0, 70.0, 114.0, 103.0, 130.0, 145.0, 173.0, 112.0, 95.0, 156.0, 146.0, 80.0, 119.0, 64.0, 77.0, 37.0, 43.0, 112.0, 95.0, 97.0, 92.0, 87.0, 68.0, 106.0, 107.0, 142.0, 145.0, 122.0, 128.0, 85.0, 111.0, 89.0, 104.0, 48.0, 45.0, 62.0, 69.0, 159.0, 133.0, 139.0, 142.0, 133.0, 149.0, 123.0, 124.0, 119.0, 128.0, 64.0, 68.0, 105.0, 122.0, 3.0, 6.0, 132.0, 152.0, 131.0, 161.0, 56.0, 54.0, 104.0, 91.0, 61.0, 84.0, 83.0, 82.0, 162.0, 176.0, 139.0, 140.0, 82.0, 111.0, 109.0, 135.0, 71.0, 65.0, 105.0, 82.0, 142.0, 143.0, 3.0, 9.0, 86.0, 102.0, 111.0, 127.0, 58.0, 62.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6886453974613268, "mean_inference_ms": 1.2113947739430153, "mean_action_processing_ms": 0.13321217952264544, "mean_env_wait_ms": 0.8759285348880911, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 338.0, "episode_reward_min": 9.0, "episode_reward_mean": 187.04, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 176.0}, "policy_reward_mean": {"ppo": 93.52}, "hist_stats": {"episode_reward": [243.0, 136.0, 236.0, 155.0, 147.0, 95.0, 158.0, 141.0, 66.0, 132.0, 182.0, 253.0, 106.0, 185.0, 130.0, 158.0, 133.0, 299.0, 123.0, 246.0, 187.0, 74.0, 230.0, 12.0, 290.0, 155.0, 88.0, 158.0, 258.0, 160.0, 193.0, 252.0, 244.0, 204.0, 120.0, 87.0, 134.0, 209.0, 85.0, 90.0, 55.0, 142.0, 189.0, 278.0, 239.0, 236.0, 110.0, 192.0, 253.0, 244.0, 195.0, 147.0, 299.0, 276.0, 213.0, 170.0, 206.0, 184.0, 233.0, 318.0, 207.0, 302.0, 199.0, 141.0, 80.0, 207.0, 189.0, 155.0, 213.0, 287.0, 250.0, 196.0, 193.0, 93.0, 131.0, 292.0, 281.0, 282.0, 247.0, 247.0, 132.0, 227.0, 9.0, 284.0, 292.0, 110.0, 195.0, 145.0, 165.0, 338.0, 279.0, 193.0, 244.0, 136.0, 187.0, 285.0, 12.0, 188.0, 238.0, 120.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [118.0, 125.0, 71.0, 65.0, 116.0, 120.0, 64.0, 91.0, 69.0, 78.0, 45.0, 50.0, 78.0, 80.0, 62.0, 79.0, 40.0, 26.0, 55.0, 77.0, 94.0, 88.0, 124.0, 129.0, 51.0, 55.0, 91.0, 94.0, 62.0, 68.0, 67.0, 91.0, 68.0, 65.0, 148.0, 151.0, 57.0, 66.0, 129.0, 117.0, 87.0, 100.0, 39.0, 35.0, 123.0, 107.0, 3.0, 9.0, 146.0, 144.0, 66.0, 89.0, 37.0, 51.0, 61.0, 97.0, 142.0, 116.0, 68.0, 92.0, 87.0, 106.0, 105.0, 147.0, 120.0, 124.0, 91.0, 113.0, 52.0, 68.0, 51.0, 36.0, 79.0, 55.0, 100.0, 109.0, 29.0, 56.0, 46.0, 44.0, 30.0, 25.0, 60.0, 82.0, 103.0, 86.0, 131.0, 147.0, 122.0, 117.0, 118.0, 118.0, 67.0, 43.0, 105.0, 87.0, 133.0, 120.0, 110.0, 134.0, 97.0, 98.0, 82.0, 65.0, 143.0, 156.0, 136.0, 140.0, 91.0, 122.0, 98.0, 72.0, 104.0, 102.0, 70.0, 114.0, 103.0, 130.0, 145.0, 173.0, 112.0, 95.0, 156.0, 146.0, 80.0, 119.0, 64.0, 77.0, 37.0, 43.0, 112.0, 95.0, 97.0, 92.0, 87.0, 68.0, 106.0, 107.0, 142.0, 145.0, 122.0, 128.0, 85.0, 111.0, 89.0, 104.0, 48.0, 45.0, 62.0, 69.0, 159.0, 133.0, 139.0, 142.0, 133.0, 149.0, 123.0, 124.0, 119.0, 128.0, 64.0, 68.0, 105.0, 122.0, 3.0, 6.0, 132.0, 152.0, 131.0, 161.0, 56.0, 54.0, 104.0, 91.0, 61.0, 84.0, 83.0, 82.0, 162.0, 176.0, 139.0, 140.0, 82.0, 111.0, 109.0, 135.0, 71.0, 65.0, 105.0, 82.0, 142.0, 143.0, 3.0, 9.0, 86.0, 102.0, 111.0, 127.0, 58.0, 62.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6886453974613268, "mean_inference_ms": 1.2113947739430153, "mean_action_processing_ms": 0.13321217952264544, "mean_env_wait_ms": 0.8759285348880911, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1280000, "num_agent_steps_trained": 1280000, "num_env_steps_sampled": 640000, "num_env_steps_trained": 640000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 640000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1280000, "timers": {"training_iteration_time_ms": 3674.795, "learn_time_ms": 1102.244, "learn_throughput": 11612.672, "synch_weights_time_ms": 11.619}, "counters": {"num_env_steps_sampled": 640000, "num_env_steps_trained": 640000, "num_agent_steps_sampled": 1280000, "num_agent_steps_trained": 1280000}, "done": false, "episodes_total": 1600, "training_iteration": 50, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-11", "timestamp": 1666580591, "time_this_iter_s": 3.7579612731933594, "time_total_s": 188.81040477752686, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 188.81040477752686, "timesteps_since_restore": 0, "iterations_since_restore": 50, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.94, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 58.8, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 74.18, "shaped_reward_min": 9, "shaped_reward_max": 118, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.97, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 9.16, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 7.71, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.13, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 15, "onion_drop_agent_0_mean": 1.81, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.74, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.83, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 6.74, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 7.12, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.79, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 1.04, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.26, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.74, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.75, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.69, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.96, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.13, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.41, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.4, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.43, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 6.74, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 7.12, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.74, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 7.12, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.77635686587003e-16, "cur_lr": 0.0010000000474974513, "total_loss": -0.00452738581225276, "policy_loss": -0.004407214000821114, "vf_loss": 6.4294867515563965, "vf_explained_var": 0.5273363590240479, "kl": 0.0014962749555706978, "entropy": 1.526247262954712, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 652800, "num_env_steps_trained": 652800, "num_agent_steps_sampled": 1305600, "num_agent_steps_trained": 1305600}, "sampler_results": {"episode_reward_max": 338.0, "episode_reward_min": 9.0, "episode_reward_mean": 191.78, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 176.0}, "policy_reward_mean": {"ppo": 95.89}, "custom_metrics": {"sparse_reward_mean": 58.8, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 74.18, "shaped_reward_min": 9, "shaped_reward_max": 118, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.97, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 9.16, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 7.71, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.13, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 15, "onion_drop_agent_0_mean": 1.81, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.74, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.83, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 6.74, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 7.12, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.79, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 1.04, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.26, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.74, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.75, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.69, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.96, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.13, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.41, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.4, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.43, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 6.74, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 7.12, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.74, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 7.12, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [244.0, 204.0, 120.0, 87.0, 134.0, 209.0, 85.0, 90.0, 55.0, 142.0, 189.0, 278.0, 239.0, 236.0, 110.0, 192.0, 253.0, 244.0, 195.0, 147.0, 299.0, 276.0, 213.0, 170.0, 206.0, 184.0, 233.0, 318.0, 207.0, 302.0, 199.0, 141.0, 80.0, 207.0, 189.0, 155.0, 213.0, 287.0, 250.0, 196.0, 193.0, 93.0, 131.0, 292.0, 281.0, 282.0, 247.0, 247.0, 132.0, 227.0, 9.0, 284.0, 292.0, 110.0, 195.0, 145.0, 165.0, 338.0, 279.0, 193.0, 244.0, 136.0, 187.0, 285.0, 12.0, 188.0, 238.0, 120.0, 178.0, 235.0, 204.0, 260.0, 190.0, 156.0, 80.0, 250.0, 182.0, 209.0, 136.0, 244.0, 168.0, 23.0, 238.0, 247.0, 207.0, 193.0, 187.0, 94.0, 231.0, 135.0, 120.0, 253.0, 148.0, 292.0, 77.0, 96.0, 296.0, 239.0, 210.0, 77.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [120.0, 124.0, 91.0, 113.0, 52.0, 68.0, 51.0, 36.0, 79.0, 55.0, 100.0, 109.0, 29.0, 56.0, 46.0, 44.0, 30.0, 25.0, 60.0, 82.0, 103.0, 86.0, 131.0, 147.0, 122.0, 117.0, 118.0, 118.0, 67.0, 43.0, 105.0, 87.0, 133.0, 120.0, 110.0, 134.0, 97.0, 98.0, 82.0, 65.0, 143.0, 156.0, 136.0, 140.0, 91.0, 122.0, 98.0, 72.0, 104.0, 102.0, 70.0, 114.0, 103.0, 130.0, 145.0, 173.0, 112.0, 95.0, 156.0, 146.0, 80.0, 119.0, 64.0, 77.0, 37.0, 43.0, 112.0, 95.0, 97.0, 92.0, 87.0, 68.0, 106.0, 107.0, 142.0, 145.0, 122.0, 128.0, 85.0, 111.0, 89.0, 104.0, 48.0, 45.0, 62.0, 69.0, 159.0, 133.0, 139.0, 142.0, 133.0, 149.0, 123.0, 124.0, 119.0, 128.0, 64.0, 68.0, 105.0, 122.0, 3.0, 6.0, 132.0, 152.0, 131.0, 161.0, 56.0, 54.0, 104.0, 91.0, 61.0, 84.0, 83.0, 82.0, 162.0, 176.0, 139.0, 140.0, 82.0, 111.0, 109.0, 135.0, 71.0, 65.0, 105.0, 82.0, 142.0, 143.0, 3.0, 9.0, 86.0, 102.0, 111.0, 127.0, 58.0, 62.0, 79.0, 99.0, 112.0, 123.0, 105.0, 99.0, 141.0, 119.0, 82.0, 108.0, 88.0, 68.0, 43.0, 37.0, 123.0, 127.0, 85.0, 97.0, 106.0, 103.0, 68.0, 68.0, 123.0, 121.0, 85.0, 83.0, 3.0, 20.0, 108.0, 130.0, 137.0, 110.0, 104.0, 103.0, 107.0, 86.0, 98.0, 89.0, 52.0, 42.0, 102.0, 129.0, 69.0, 66.0, 46.0, 74.0, 112.0, 141.0, 83.0, 65.0, 152.0, 140.0, 40.0, 37.0, 50.0, 46.0, 157.0, 139.0, 134.0, 105.0, 96.0, 114.0, 39.0, 38.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6886981609168396, "mean_inference_ms": 1.2112370650019237, "mean_action_processing_ms": 0.1332088502478148, "mean_env_wait_ms": 0.8754223222096275, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 338.0, "episode_reward_min": 9.0, "episode_reward_mean": 191.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 176.0}, "policy_reward_mean": {"ppo": 95.89}, "hist_stats": {"episode_reward": [244.0, 204.0, 120.0, 87.0, 134.0, 209.0, 85.0, 90.0, 55.0, 142.0, 189.0, 278.0, 239.0, 236.0, 110.0, 192.0, 253.0, 244.0, 195.0, 147.0, 299.0, 276.0, 213.0, 170.0, 206.0, 184.0, 233.0, 318.0, 207.0, 302.0, 199.0, 141.0, 80.0, 207.0, 189.0, 155.0, 213.0, 287.0, 250.0, 196.0, 193.0, 93.0, 131.0, 292.0, 281.0, 282.0, 247.0, 247.0, 132.0, 227.0, 9.0, 284.0, 292.0, 110.0, 195.0, 145.0, 165.0, 338.0, 279.0, 193.0, 244.0, 136.0, 187.0, 285.0, 12.0, 188.0, 238.0, 120.0, 178.0, 235.0, 204.0, 260.0, 190.0, 156.0, 80.0, 250.0, 182.0, 209.0, 136.0, 244.0, 168.0, 23.0, 238.0, 247.0, 207.0, 193.0, 187.0, 94.0, 231.0, 135.0, 120.0, 253.0, 148.0, 292.0, 77.0, 96.0, 296.0, 239.0, 210.0, 77.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [120.0, 124.0, 91.0, 113.0, 52.0, 68.0, 51.0, 36.0, 79.0, 55.0, 100.0, 109.0, 29.0, 56.0, 46.0, 44.0, 30.0, 25.0, 60.0, 82.0, 103.0, 86.0, 131.0, 147.0, 122.0, 117.0, 118.0, 118.0, 67.0, 43.0, 105.0, 87.0, 133.0, 120.0, 110.0, 134.0, 97.0, 98.0, 82.0, 65.0, 143.0, 156.0, 136.0, 140.0, 91.0, 122.0, 98.0, 72.0, 104.0, 102.0, 70.0, 114.0, 103.0, 130.0, 145.0, 173.0, 112.0, 95.0, 156.0, 146.0, 80.0, 119.0, 64.0, 77.0, 37.0, 43.0, 112.0, 95.0, 97.0, 92.0, 87.0, 68.0, 106.0, 107.0, 142.0, 145.0, 122.0, 128.0, 85.0, 111.0, 89.0, 104.0, 48.0, 45.0, 62.0, 69.0, 159.0, 133.0, 139.0, 142.0, 133.0, 149.0, 123.0, 124.0, 119.0, 128.0, 64.0, 68.0, 105.0, 122.0, 3.0, 6.0, 132.0, 152.0, 131.0, 161.0, 56.0, 54.0, 104.0, 91.0, 61.0, 84.0, 83.0, 82.0, 162.0, 176.0, 139.0, 140.0, 82.0, 111.0, 109.0, 135.0, 71.0, 65.0, 105.0, 82.0, 142.0, 143.0, 3.0, 9.0, 86.0, 102.0, 111.0, 127.0, 58.0, 62.0, 79.0, 99.0, 112.0, 123.0, 105.0, 99.0, 141.0, 119.0, 82.0, 108.0, 88.0, 68.0, 43.0, 37.0, 123.0, 127.0, 85.0, 97.0, 106.0, 103.0, 68.0, 68.0, 123.0, 121.0, 85.0, 83.0, 3.0, 20.0, 108.0, 130.0, 137.0, 110.0, 104.0, 103.0, 107.0, 86.0, 98.0, 89.0, 52.0, 42.0, 102.0, 129.0, 69.0, 66.0, 46.0, 74.0, 112.0, 141.0, 83.0, 65.0, 152.0, 140.0, 40.0, 37.0, 50.0, 46.0, 157.0, 139.0, 134.0, 105.0, 96.0, 114.0, 39.0, 38.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6886981609168396, "mean_inference_ms": 1.2112370650019237, "mean_action_processing_ms": 0.1332088502478148, "mean_env_wait_ms": 0.8754223222096275, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1305600, "num_agent_steps_trained": 1305600, "num_env_steps_sampled": 652800, "num_env_steps_trained": 652800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 652800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1305600, "timers": {"training_iteration_time_ms": 3681.953, "learn_time_ms": 1104.988, "learn_throughput": 11583.841, "synch_weights_time_ms": 11.741}, "counters": {"num_env_steps_sampled": 652800, "num_env_steps_trained": 652800, "num_agent_steps_sampled": 1305600, "num_agent_steps_trained": 1305600}, "done": false, "episodes_total": 1632, "training_iteration": 51, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-15", "timestamp": 1666580595, "time_this_iter_s": 3.8342444896698, "time_total_s": 192.64464926719666, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 192.64464926719666, "timesteps_since_restore": 0, "iterations_since_restore": 51, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.21666666666667, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 61.8, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 73.3, "shaped_reward_min": 9, "shaped_reward_max": 115, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.73, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 9.28, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 7.59, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.13, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.65, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.95, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.8, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 6.65, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 7.23, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 4.86, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 1.06, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.11, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.66, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.79, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.77, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.67, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.29, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.27, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.36, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.33, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 6.65, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 7.23, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.65, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 7.23, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 8.88178432935015e-17, "cur_lr": 0.0010000000474974513, "total_loss": -0.004982168320566416, "policy_loss": -0.0049325828440487385, "vf_loss": 6.978565216064453, "vf_explained_var": 0.522409200668335, "kl": 0.0016363689210265875, "entropy": 1.4948899745941162, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 665600, "num_env_steps_trained": 665600, "num_agent_steps_sampled": 1331200, "num_agent_steps_trained": 1331200}, "sampler_results": {"episode_reward_max": 338.0, "episode_reward_min": 9.0, "episode_reward_mean": 196.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 184.0}, "policy_reward_mean": {"ppo": 98.45}, "custom_metrics": {"sparse_reward_mean": 61.8, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 73.3, "shaped_reward_min": 9, "shaped_reward_max": 115, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.73, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 9.28, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 7.59, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.13, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.65, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.95, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.8, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 6.65, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 7.23, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 4.86, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 1.06, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.11, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.66, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.79, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.77, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.67, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.29, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.27, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.36, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.33, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 6.65, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 7.23, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.65, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 7.23, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [80.0, 207.0, 189.0, 155.0, 213.0, 287.0, 250.0, 196.0, 193.0, 93.0, 131.0, 292.0, 281.0, 282.0, 247.0, 247.0, 132.0, 227.0, 9.0, 284.0, 292.0, 110.0, 195.0, 145.0, 165.0, 338.0, 279.0, 193.0, 244.0, 136.0, 187.0, 285.0, 12.0, 188.0, 238.0, 120.0, 178.0, 235.0, 204.0, 260.0, 190.0, 156.0, 80.0, 250.0, 182.0, 209.0, 136.0, 244.0, 168.0, 23.0, 238.0, 247.0, 207.0, 193.0, 187.0, 94.0, 231.0, 135.0, 120.0, 253.0, 148.0, 292.0, 77.0, 96.0, 296.0, 239.0, 210.0, 77.0, 255.0, 133.0, 193.0, 298.0, 239.0, 176.0, 71.0, 69.0, 231.0, 273.0, 212.0, 127.0, 180.0, 236.0, 158.0, 276.0, 282.0, 213.0, 264.0, 284.0, 196.0, 256.0, 163.0, 145.0, 204.0, 168.0, 179.0, 335.0, 299.0, 126.0, 176.0, 296.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [37.0, 43.0, 112.0, 95.0, 97.0, 92.0, 87.0, 68.0, 106.0, 107.0, 142.0, 145.0, 122.0, 128.0, 85.0, 111.0, 89.0, 104.0, 48.0, 45.0, 62.0, 69.0, 159.0, 133.0, 139.0, 142.0, 133.0, 149.0, 123.0, 124.0, 119.0, 128.0, 64.0, 68.0, 105.0, 122.0, 3.0, 6.0, 132.0, 152.0, 131.0, 161.0, 56.0, 54.0, 104.0, 91.0, 61.0, 84.0, 83.0, 82.0, 162.0, 176.0, 139.0, 140.0, 82.0, 111.0, 109.0, 135.0, 71.0, 65.0, 105.0, 82.0, 142.0, 143.0, 3.0, 9.0, 86.0, 102.0, 111.0, 127.0, 58.0, 62.0, 79.0, 99.0, 112.0, 123.0, 105.0, 99.0, 141.0, 119.0, 82.0, 108.0, 88.0, 68.0, 43.0, 37.0, 123.0, 127.0, 85.0, 97.0, 106.0, 103.0, 68.0, 68.0, 123.0, 121.0, 85.0, 83.0, 3.0, 20.0, 108.0, 130.0, 137.0, 110.0, 104.0, 103.0, 107.0, 86.0, 98.0, 89.0, 52.0, 42.0, 102.0, 129.0, 69.0, 66.0, 46.0, 74.0, 112.0, 141.0, 83.0, 65.0, 152.0, 140.0, 40.0, 37.0, 50.0, 46.0, 157.0, 139.0, 134.0, 105.0, 96.0, 114.0, 39.0, 38.0, 127.0, 128.0, 55.0, 78.0, 94.0, 99.0, 157.0, 141.0, 126.0, 113.0, 88.0, 88.0, 28.0, 43.0, 37.0, 32.0, 127.0, 104.0, 140.0, 133.0, 113.0, 99.0, 59.0, 68.0, 100.0, 80.0, 120.0, 116.0, 78.0, 80.0, 135.0, 141.0, 140.0, 142.0, 108.0, 105.0, 132.0, 132.0, 151.0, 133.0, 107.0, 89.0, 131.0, 125.0, 75.0, 88.0, 75.0, 70.0, 94.0, 110.0, 85.0, 83.0, 85.0, 94.0, 151.0, 184.0, 148.0, 151.0, 68.0, 58.0, 100.0, 76.0, 153.0, 143.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.688734889605353, "mean_inference_ms": 1.2110129239657774, "mean_action_processing_ms": 0.13319930408174357, "mean_env_wait_ms": 0.8749155207185934, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 338.0, "episode_reward_min": 9.0, "episode_reward_mean": 196.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 184.0}, "policy_reward_mean": {"ppo": 98.45}, "hist_stats": {"episode_reward": [80.0, 207.0, 189.0, 155.0, 213.0, 287.0, 250.0, 196.0, 193.0, 93.0, 131.0, 292.0, 281.0, 282.0, 247.0, 247.0, 132.0, 227.0, 9.0, 284.0, 292.0, 110.0, 195.0, 145.0, 165.0, 338.0, 279.0, 193.0, 244.0, 136.0, 187.0, 285.0, 12.0, 188.0, 238.0, 120.0, 178.0, 235.0, 204.0, 260.0, 190.0, 156.0, 80.0, 250.0, 182.0, 209.0, 136.0, 244.0, 168.0, 23.0, 238.0, 247.0, 207.0, 193.0, 187.0, 94.0, 231.0, 135.0, 120.0, 253.0, 148.0, 292.0, 77.0, 96.0, 296.0, 239.0, 210.0, 77.0, 255.0, 133.0, 193.0, 298.0, 239.0, 176.0, 71.0, 69.0, 231.0, 273.0, 212.0, 127.0, 180.0, 236.0, 158.0, 276.0, 282.0, 213.0, 264.0, 284.0, 196.0, 256.0, 163.0, 145.0, 204.0, 168.0, 179.0, 335.0, 299.0, 126.0, 176.0, 296.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [37.0, 43.0, 112.0, 95.0, 97.0, 92.0, 87.0, 68.0, 106.0, 107.0, 142.0, 145.0, 122.0, 128.0, 85.0, 111.0, 89.0, 104.0, 48.0, 45.0, 62.0, 69.0, 159.0, 133.0, 139.0, 142.0, 133.0, 149.0, 123.0, 124.0, 119.0, 128.0, 64.0, 68.0, 105.0, 122.0, 3.0, 6.0, 132.0, 152.0, 131.0, 161.0, 56.0, 54.0, 104.0, 91.0, 61.0, 84.0, 83.0, 82.0, 162.0, 176.0, 139.0, 140.0, 82.0, 111.0, 109.0, 135.0, 71.0, 65.0, 105.0, 82.0, 142.0, 143.0, 3.0, 9.0, 86.0, 102.0, 111.0, 127.0, 58.0, 62.0, 79.0, 99.0, 112.0, 123.0, 105.0, 99.0, 141.0, 119.0, 82.0, 108.0, 88.0, 68.0, 43.0, 37.0, 123.0, 127.0, 85.0, 97.0, 106.0, 103.0, 68.0, 68.0, 123.0, 121.0, 85.0, 83.0, 3.0, 20.0, 108.0, 130.0, 137.0, 110.0, 104.0, 103.0, 107.0, 86.0, 98.0, 89.0, 52.0, 42.0, 102.0, 129.0, 69.0, 66.0, 46.0, 74.0, 112.0, 141.0, 83.0, 65.0, 152.0, 140.0, 40.0, 37.0, 50.0, 46.0, 157.0, 139.0, 134.0, 105.0, 96.0, 114.0, 39.0, 38.0, 127.0, 128.0, 55.0, 78.0, 94.0, 99.0, 157.0, 141.0, 126.0, 113.0, 88.0, 88.0, 28.0, 43.0, 37.0, 32.0, 127.0, 104.0, 140.0, 133.0, 113.0, 99.0, 59.0, 68.0, 100.0, 80.0, 120.0, 116.0, 78.0, 80.0, 135.0, 141.0, 140.0, 142.0, 108.0, 105.0, 132.0, 132.0, 151.0, 133.0, 107.0, 89.0, 131.0, 125.0, 75.0, 88.0, 75.0, 70.0, 94.0, 110.0, 85.0, 83.0, 85.0, 94.0, 151.0, 184.0, 148.0, 151.0, 68.0, 58.0, 100.0, 76.0, 153.0, 143.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.688734889605353, "mean_inference_ms": 1.2110129239657774, "mean_action_processing_ms": 0.13319930408174357, "mean_env_wait_ms": 0.8749155207185934, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1331200, "num_agent_steps_trained": 1331200, "num_env_steps_sampled": 665600, "num_env_steps_trained": 665600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 665600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1331200, "timers": {"training_iteration_time_ms": 3673.186, "learn_time_ms": 1096.032, "learn_throughput": 11678.494, "synch_weights_time_ms": 11.104}, "counters": {"num_env_steps_sampled": 665600, "num_env_steps_trained": 665600, "num_agent_steps_sampled": 1331200, "num_agent_steps_trained": 1331200}, "done": false, "episodes_total": 1664, "training_iteration": 52, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-19", "timestamp": 1666580599, "time_this_iter_s": 3.693157911300659, "time_total_s": 196.33780717849731, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 196.33780717849731, "timesteps_since_restore": 0, "iterations_since_restore": 52, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.73333333333333, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 63.8, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 75.57, "shaped_reward_min": 12, "shaped_reward_max": 116, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.84, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 9.63, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 7.64, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.54, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 1.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.8, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 6.76, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 7.5, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 4.88, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.82, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 1.3, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.13, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.95, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.58, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.72, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.81, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.62, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.42, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.28, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.3, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.28, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 6.76, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 7.5, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.76, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 7.5, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 4.440892164675075e-17, "cur_lr": 0.0010000000474974513, "total_loss": -0.003979704808443785, "policy_loss": -0.0039646499790251255, "vf_loss": 7.258543968200684, "vf_explained_var": 0.5249006748199463, "kl": 0.0017533027566969395, "entropy": 1.481823205947876, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 678400, "num_env_steps_trained": 678400, "num_agent_steps_sampled": 1356800, "num_agent_steps_trained": 1356800}, "sampler_results": {"episode_reward_max": 356.0, "episode_reward_min": 12.0, "episode_reward_mean": 203.17, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 185.0}, "policy_reward_mean": {"ppo": 101.585}, "custom_metrics": {"sparse_reward_mean": 63.8, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 75.57, "shaped_reward_min": 12, "shaped_reward_max": 116, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.84, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 9.63, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 7.64, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.54, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 1.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.8, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 6.76, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 7.5, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 4.88, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.82, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 1.3, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.13, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.95, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.58, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.72, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.81, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.62, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.42, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.28, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.3, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.28, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 6.76, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 7.5, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.76, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 7.5, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [12.0, 188.0, 238.0, 120.0, 178.0, 235.0, 204.0, 260.0, 190.0, 156.0, 80.0, 250.0, 182.0, 209.0, 136.0, 244.0, 168.0, 23.0, 238.0, 247.0, 207.0, 193.0, 187.0, 94.0, 231.0, 135.0, 120.0, 253.0, 148.0, 292.0, 77.0, 96.0, 296.0, 239.0, 210.0, 77.0, 255.0, 133.0, 193.0, 298.0, 239.0, 176.0, 71.0, 69.0, 231.0, 273.0, 212.0, 127.0, 180.0, 236.0, 158.0, 276.0, 282.0, 213.0, 264.0, 284.0, 196.0, 256.0, 163.0, 145.0, 204.0, 168.0, 179.0, 335.0, 299.0, 126.0, 176.0, 296.0, 256.0, 302.0, 356.0, 255.0, 233.0, 230.0, 299.0, 141.0, 202.0, 190.0, 301.0, 74.0, 179.0, 201.0, 236.0, 152.0, 121.0, 239.0, 144.0, 288.0, 330.0, 193.0, 228.0, 113.0, 316.0, 288.0, 305.0, 215.0, 256.0, 230.0, 176.0, 142.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 9.0, 86.0, 102.0, 111.0, 127.0, 58.0, 62.0, 79.0, 99.0, 112.0, 123.0, 105.0, 99.0, 141.0, 119.0, 82.0, 108.0, 88.0, 68.0, 43.0, 37.0, 123.0, 127.0, 85.0, 97.0, 106.0, 103.0, 68.0, 68.0, 123.0, 121.0, 85.0, 83.0, 3.0, 20.0, 108.0, 130.0, 137.0, 110.0, 104.0, 103.0, 107.0, 86.0, 98.0, 89.0, 52.0, 42.0, 102.0, 129.0, 69.0, 66.0, 46.0, 74.0, 112.0, 141.0, 83.0, 65.0, 152.0, 140.0, 40.0, 37.0, 50.0, 46.0, 157.0, 139.0, 134.0, 105.0, 96.0, 114.0, 39.0, 38.0, 127.0, 128.0, 55.0, 78.0, 94.0, 99.0, 157.0, 141.0, 126.0, 113.0, 88.0, 88.0, 28.0, 43.0, 37.0, 32.0, 127.0, 104.0, 140.0, 133.0, 113.0, 99.0, 59.0, 68.0, 100.0, 80.0, 120.0, 116.0, 78.0, 80.0, 135.0, 141.0, 140.0, 142.0, 108.0, 105.0, 132.0, 132.0, 151.0, 133.0, 107.0, 89.0, 131.0, 125.0, 75.0, 88.0, 75.0, 70.0, 94.0, 110.0, 85.0, 83.0, 85.0, 94.0, 151.0, 184.0, 148.0, 151.0, 68.0, 58.0, 100.0, 76.0, 153.0, 143.0, 126.0, 130.0, 162.0, 140.0, 171.0, 185.0, 130.0, 125.0, 125.0, 108.0, 113.0, 117.0, 129.0, 170.0, 70.0, 71.0, 99.0, 103.0, 97.0, 93.0, 161.0, 140.0, 42.0, 32.0, 92.0, 87.0, 101.0, 100.0, 121.0, 115.0, 73.0, 79.0, 51.0, 70.0, 120.0, 119.0, 81.0, 63.0, 148.0, 140.0, 162.0, 168.0, 93.0, 100.0, 113.0, 115.0, 53.0, 60.0, 169.0, 147.0, 137.0, 151.0, 157.0, 148.0, 96.0, 119.0, 124.0, 132.0, 99.0, 131.0, 69.0, 107.0, 80.0, 62.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6887014698007187, "mean_inference_ms": 1.2107337327663068, "mean_action_processing_ms": 0.13317170881738327, "mean_env_wait_ms": 0.8742339680417999, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 356.0, "episode_reward_min": 12.0, "episode_reward_mean": 203.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 185.0}, "policy_reward_mean": {"ppo": 101.585}, "hist_stats": {"episode_reward": [12.0, 188.0, 238.0, 120.0, 178.0, 235.0, 204.0, 260.0, 190.0, 156.0, 80.0, 250.0, 182.0, 209.0, 136.0, 244.0, 168.0, 23.0, 238.0, 247.0, 207.0, 193.0, 187.0, 94.0, 231.0, 135.0, 120.0, 253.0, 148.0, 292.0, 77.0, 96.0, 296.0, 239.0, 210.0, 77.0, 255.0, 133.0, 193.0, 298.0, 239.0, 176.0, 71.0, 69.0, 231.0, 273.0, 212.0, 127.0, 180.0, 236.0, 158.0, 276.0, 282.0, 213.0, 264.0, 284.0, 196.0, 256.0, 163.0, 145.0, 204.0, 168.0, 179.0, 335.0, 299.0, 126.0, 176.0, 296.0, 256.0, 302.0, 356.0, 255.0, 233.0, 230.0, 299.0, 141.0, 202.0, 190.0, 301.0, 74.0, 179.0, 201.0, 236.0, 152.0, 121.0, 239.0, 144.0, 288.0, 330.0, 193.0, 228.0, 113.0, 316.0, 288.0, 305.0, 215.0, 256.0, 230.0, 176.0, 142.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 9.0, 86.0, 102.0, 111.0, 127.0, 58.0, 62.0, 79.0, 99.0, 112.0, 123.0, 105.0, 99.0, 141.0, 119.0, 82.0, 108.0, 88.0, 68.0, 43.0, 37.0, 123.0, 127.0, 85.0, 97.0, 106.0, 103.0, 68.0, 68.0, 123.0, 121.0, 85.0, 83.0, 3.0, 20.0, 108.0, 130.0, 137.0, 110.0, 104.0, 103.0, 107.0, 86.0, 98.0, 89.0, 52.0, 42.0, 102.0, 129.0, 69.0, 66.0, 46.0, 74.0, 112.0, 141.0, 83.0, 65.0, 152.0, 140.0, 40.0, 37.0, 50.0, 46.0, 157.0, 139.0, 134.0, 105.0, 96.0, 114.0, 39.0, 38.0, 127.0, 128.0, 55.0, 78.0, 94.0, 99.0, 157.0, 141.0, 126.0, 113.0, 88.0, 88.0, 28.0, 43.0, 37.0, 32.0, 127.0, 104.0, 140.0, 133.0, 113.0, 99.0, 59.0, 68.0, 100.0, 80.0, 120.0, 116.0, 78.0, 80.0, 135.0, 141.0, 140.0, 142.0, 108.0, 105.0, 132.0, 132.0, 151.0, 133.0, 107.0, 89.0, 131.0, 125.0, 75.0, 88.0, 75.0, 70.0, 94.0, 110.0, 85.0, 83.0, 85.0, 94.0, 151.0, 184.0, 148.0, 151.0, 68.0, 58.0, 100.0, 76.0, 153.0, 143.0, 126.0, 130.0, 162.0, 140.0, 171.0, 185.0, 130.0, 125.0, 125.0, 108.0, 113.0, 117.0, 129.0, 170.0, 70.0, 71.0, 99.0, 103.0, 97.0, 93.0, 161.0, 140.0, 42.0, 32.0, 92.0, 87.0, 101.0, 100.0, 121.0, 115.0, 73.0, 79.0, 51.0, 70.0, 120.0, 119.0, 81.0, 63.0, 148.0, 140.0, 162.0, 168.0, 93.0, 100.0, 113.0, 115.0, 53.0, 60.0, 169.0, 147.0, 137.0, 151.0, 157.0, 148.0, 96.0, 119.0, 124.0, 132.0, 99.0, 131.0, 69.0, 107.0, 80.0, 62.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6887014698007187, "mean_inference_ms": 1.2107337327663068, "mean_action_processing_ms": 0.13317170881738327, "mean_env_wait_ms": 0.8742339680417999, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1356800, "num_agent_steps_trained": 1356800, "num_env_steps_sampled": 678400, "num_env_steps_trained": 678400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 678400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1356800, "timers": {"training_iteration_time_ms": 3668.9, "learn_time_ms": 1097.264, "learn_throughput": 11665.377, "synch_weights_time_ms": 10.808}, "counters": {"num_env_steps_sampled": 678400, "num_env_steps_trained": 678400, "num_agent_steps_sampled": 1356800, "num_agent_steps_trained": 1356800}, "done": false, "episodes_total": 1696, "training_iteration": 53, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-23", "timestamp": 1666580603, "time_this_iter_s": 3.820449113845825, "time_total_s": 200.15825629234314, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 200.15825629234314, "timesteps_since_restore": 0, "iterations_since_restore": 53, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.82, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 72.4, "sparse_reward_min": 20, "sparse_reward_max": 120, "shaped_reward_mean": 81.74, "shaped_reward_min": 29, "shaped_reward_max": 127, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 9.06, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 10.42, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 7.89, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 9.39, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.77, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.65, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.83, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 6.98, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 8.35, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.11, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.71, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 1.54, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.16, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.78, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.97, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.66, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.78, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 3.17, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.61, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 2.79, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.37, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.26, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.21, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 6.98, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 8.35, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.98, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 8.35, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.2204460823375376e-17, "cur_lr": 0.0010000000474974513, "total_loss": -0.005493534728884697, "policy_loss": -0.005526891443878412, "vf_loss": 7.647051811218262, "vf_explained_var": 0.5422403216362, "kl": 0.001551097142510116, "entropy": 1.4626996517181396, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 691200, "num_env_steps_trained": 691200, "num_agent_steps_sampled": 1382400, "num_agent_steps_trained": 1382400}, "sampler_results": {"episode_reward_max": 356.0, "episode_reward_min": 69.0, "episode_reward_mean": 226.54, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 28.0}, "policy_reward_max": {"ppo": 185.0}, "policy_reward_mean": {"ppo": 113.27}, "custom_metrics": {"sparse_reward_mean": 72.4, "sparse_reward_min": 20, "sparse_reward_max": 120, "shaped_reward_mean": 81.74, "shaped_reward_min": 29, "shaped_reward_max": 127, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 9.06, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 10.42, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 7.89, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 9.39, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.77, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.65, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.83, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 6.98, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 8.35, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.11, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.71, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 1.54, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.16, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.78, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.97, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.66, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.78, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 3.17, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.61, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 2.79, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.37, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.26, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.21, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 6.98, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 8.35, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.98, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 8.35, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [296.0, 239.0, 210.0, 77.0, 255.0, 133.0, 193.0, 298.0, 239.0, 176.0, 71.0, 69.0, 231.0, 273.0, 212.0, 127.0, 180.0, 236.0, 158.0, 276.0, 282.0, 213.0, 264.0, 284.0, 196.0, 256.0, 163.0, 145.0, 204.0, 168.0, 179.0, 335.0, 299.0, 126.0, 176.0, 296.0, 256.0, 302.0, 356.0, 255.0, 233.0, 230.0, 299.0, 141.0, 202.0, 190.0, 301.0, 74.0, 179.0, 201.0, 236.0, 152.0, 121.0, 239.0, 144.0, 288.0, 330.0, 193.0, 228.0, 113.0, 316.0, 288.0, 305.0, 215.0, 256.0, 230.0, 176.0, 142.0, 233.0, 285.0, 252.0, 198.0, 155.0, 301.0, 190.0, 258.0, 289.0, 175.0, 188.0, 233.0, 293.0, 241.0, 297.0, 292.0, 348.0, 327.0, 188.0, 305.0, 175.0, 290.0, 158.0, 259.0, 264.0, 290.0, 248.0, 209.0, 301.0, 246.0, 199.0, 241.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [157.0, 139.0, 134.0, 105.0, 96.0, 114.0, 39.0, 38.0, 127.0, 128.0, 55.0, 78.0, 94.0, 99.0, 157.0, 141.0, 126.0, 113.0, 88.0, 88.0, 28.0, 43.0, 37.0, 32.0, 127.0, 104.0, 140.0, 133.0, 113.0, 99.0, 59.0, 68.0, 100.0, 80.0, 120.0, 116.0, 78.0, 80.0, 135.0, 141.0, 140.0, 142.0, 108.0, 105.0, 132.0, 132.0, 151.0, 133.0, 107.0, 89.0, 131.0, 125.0, 75.0, 88.0, 75.0, 70.0, 94.0, 110.0, 85.0, 83.0, 85.0, 94.0, 151.0, 184.0, 148.0, 151.0, 68.0, 58.0, 100.0, 76.0, 153.0, 143.0, 126.0, 130.0, 162.0, 140.0, 171.0, 185.0, 130.0, 125.0, 125.0, 108.0, 113.0, 117.0, 129.0, 170.0, 70.0, 71.0, 99.0, 103.0, 97.0, 93.0, 161.0, 140.0, 42.0, 32.0, 92.0, 87.0, 101.0, 100.0, 121.0, 115.0, 73.0, 79.0, 51.0, 70.0, 120.0, 119.0, 81.0, 63.0, 148.0, 140.0, 162.0, 168.0, 93.0, 100.0, 113.0, 115.0, 53.0, 60.0, 169.0, 147.0, 137.0, 151.0, 157.0, 148.0, 96.0, 119.0, 124.0, 132.0, 99.0, 131.0, 69.0, 107.0, 80.0, 62.0, 125.0, 108.0, 146.0, 139.0, 117.0, 135.0, 102.0, 96.0, 84.0, 71.0, 156.0, 145.0, 96.0, 94.0, 126.0, 132.0, 133.0, 156.0, 83.0, 92.0, 100.0, 88.0, 122.0, 111.0, 143.0, 150.0, 119.0, 122.0, 150.0, 147.0, 137.0, 155.0, 176.0, 172.0, 166.0, 161.0, 85.0, 103.0, 151.0, 154.0, 92.0, 83.0, 131.0, 159.0, 81.0, 77.0, 134.0, 125.0, 132.0, 132.0, 157.0, 133.0, 121.0, 127.0, 96.0, 113.0, 140.0, 161.0, 135.0, 111.0, 101.0, 98.0, 117.0, 124.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6885774823538797, "mean_inference_ms": 1.2104421536435193, "mean_action_processing_ms": 0.1331451018686985, "mean_env_wait_ms": 0.8735049641694252, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 356.0, "episode_reward_min": 69.0, "episode_reward_mean": 226.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 28.0}, "policy_reward_max": {"ppo": 185.0}, "policy_reward_mean": {"ppo": 113.27}, "hist_stats": {"episode_reward": [296.0, 239.0, 210.0, 77.0, 255.0, 133.0, 193.0, 298.0, 239.0, 176.0, 71.0, 69.0, 231.0, 273.0, 212.0, 127.0, 180.0, 236.0, 158.0, 276.0, 282.0, 213.0, 264.0, 284.0, 196.0, 256.0, 163.0, 145.0, 204.0, 168.0, 179.0, 335.0, 299.0, 126.0, 176.0, 296.0, 256.0, 302.0, 356.0, 255.0, 233.0, 230.0, 299.0, 141.0, 202.0, 190.0, 301.0, 74.0, 179.0, 201.0, 236.0, 152.0, 121.0, 239.0, 144.0, 288.0, 330.0, 193.0, 228.0, 113.0, 316.0, 288.0, 305.0, 215.0, 256.0, 230.0, 176.0, 142.0, 233.0, 285.0, 252.0, 198.0, 155.0, 301.0, 190.0, 258.0, 289.0, 175.0, 188.0, 233.0, 293.0, 241.0, 297.0, 292.0, 348.0, 327.0, 188.0, 305.0, 175.0, 290.0, 158.0, 259.0, 264.0, 290.0, 248.0, 209.0, 301.0, 246.0, 199.0, 241.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [157.0, 139.0, 134.0, 105.0, 96.0, 114.0, 39.0, 38.0, 127.0, 128.0, 55.0, 78.0, 94.0, 99.0, 157.0, 141.0, 126.0, 113.0, 88.0, 88.0, 28.0, 43.0, 37.0, 32.0, 127.0, 104.0, 140.0, 133.0, 113.0, 99.0, 59.0, 68.0, 100.0, 80.0, 120.0, 116.0, 78.0, 80.0, 135.0, 141.0, 140.0, 142.0, 108.0, 105.0, 132.0, 132.0, 151.0, 133.0, 107.0, 89.0, 131.0, 125.0, 75.0, 88.0, 75.0, 70.0, 94.0, 110.0, 85.0, 83.0, 85.0, 94.0, 151.0, 184.0, 148.0, 151.0, 68.0, 58.0, 100.0, 76.0, 153.0, 143.0, 126.0, 130.0, 162.0, 140.0, 171.0, 185.0, 130.0, 125.0, 125.0, 108.0, 113.0, 117.0, 129.0, 170.0, 70.0, 71.0, 99.0, 103.0, 97.0, 93.0, 161.0, 140.0, 42.0, 32.0, 92.0, 87.0, 101.0, 100.0, 121.0, 115.0, 73.0, 79.0, 51.0, 70.0, 120.0, 119.0, 81.0, 63.0, 148.0, 140.0, 162.0, 168.0, 93.0, 100.0, 113.0, 115.0, 53.0, 60.0, 169.0, 147.0, 137.0, 151.0, 157.0, 148.0, 96.0, 119.0, 124.0, 132.0, 99.0, 131.0, 69.0, 107.0, 80.0, 62.0, 125.0, 108.0, 146.0, 139.0, 117.0, 135.0, 102.0, 96.0, 84.0, 71.0, 156.0, 145.0, 96.0, 94.0, 126.0, 132.0, 133.0, 156.0, 83.0, 92.0, 100.0, 88.0, 122.0, 111.0, 143.0, 150.0, 119.0, 122.0, 150.0, 147.0, 137.0, 155.0, 176.0, 172.0, 166.0, 161.0, 85.0, 103.0, 151.0, 154.0, 92.0, 83.0, 131.0, 159.0, 81.0, 77.0, 134.0, 125.0, 132.0, 132.0, 157.0, 133.0, 121.0, 127.0, 96.0, 113.0, 140.0, 161.0, 135.0, 111.0, 101.0, 98.0, 117.0, 124.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6885774823538797, "mean_inference_ms": 1.2104421536435193, "mean_action_processing_ms": 0.1331451018686985, "mean_env_wait_ms": 0.8735049641694252, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1382400, "num_agent_steps_trained": 1382400, "num_env_steps_sampled": 691200, "num_env_steps_trained": 691200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 691200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1382400, "timers": {"training_iteration_time_ms": 3649.094, "learn_time_ms": 1101.345, "learn_throughput": 11622.149, "synch_weights_time_ms": 11.009}, "counters": {"num_env_steps_sampled": 691200, "num_env_steps_trained": 691200, "num_agent_steps_sampled": 1382400, "num_agent_steps_trained": 1382400}, "done": false, "episodes_total": 1728, "training_iteration": 54, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-27", "timestamp": 1666580607, "time_this_iter_s": 3.6673367023468018, "time_total_s": 203.82559299468994, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 203.82559299468994, "timesteps_since_restore": 0, "iterations_since_restore": 54, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.0, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 76.2, "sparse_reward_min": 20, "sparse_reward_max": 140, "shaped_reward_mean": 83.9, "shaped_reward_min": 29, "shaped_reward_max": 127, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 9.21, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 10.41, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 8.1, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 9.47, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.58, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.88, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.8, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 7.25, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 13, "potting_onion_agent_1_mean": 8.47, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 16, "dish_pickup_agent_0_mean": 5.16, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.48, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.35, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.87, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.0, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.68, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.73, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 3.05, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.76, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 2.76, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.54, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.17, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.17, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 7.25, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 13, "optimal_onion_potting_agent_1_mean": 8.47, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 16, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 7.25, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 13, "viable_onion_potting_agent_1_mean": 8.47, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 16, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.1102230411687688e-17, "cur_lr": 0.0010000000474974513, "total_loss": -0.003027592319995165, "policy_loss": -0.0030293785966932774, "vf_loss": 7.28782320022583, "vf_explained_var": 0.5821901559829712, "kl": 0.0017681021708995104, "entropy": 1.4539985656738281, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 704000, "num_env_steps_trained": 704000, "num_agent_steps_sampled": 1408000, "num_agent_steps_trained": 1408000}, "sampler_results": {"episode_reward_max": 390.0, "episode_reward_min": 69.0, "episode_reward_mean": 236.3, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 219.0}, "policy_reward_mean": {"ppo": 118.15}, "custom_metrics": {"sparse_reward_mean": 76.2, "sparse_reward_min": 20, "sparse_reward_max": 140, "shaped_reward_mean": 83.9, "shaped_reward_min": 29, "shaped_reward_max": 127, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 9.21, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 10.41, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 8.1, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 9.47, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.58, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.88, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.8, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 7.25, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 13, "potting_onion_agent_1_mean": 8.47, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 16, "dish_pickup_agent_0_mean": 5.16, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.48, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.35, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.87, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.0, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.68, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.73, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 3.05, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.76, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 2.76, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.54, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.17, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.17, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 7.25, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 13, "optimal_onion_potting_agent_1_mean": 8.47, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 16, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 7.25, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 13, "viable_onion_potting_agent_1_mean": 8.47, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 16, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [299.0, 126.0, 176.0, 296.0, 256.0, 302.0, 356.0, 255.0, 233.0, 230.0, 299.0, 141.0, 202.0, 190.0, 301.0, 74.0, 179.0, 201.0, 236.0, 152.0, 121.0, 239.0, 144.0, 288.0, 330.0, 193.0, 228.0, 113.0, 316.0, 288.0, 305.0, 215.0, 256.0, 230.0, 176.0, 142.0, 233.0, 285.0, 252.0, 198.0, 155.0, 301.0, 190.0, 258.0, 289.0, 175.0, 188.0, 233.0, 293.0, 241.0, 297.0, 292.0, 348.0, 327.0, 188.0, 305.0, 175.0, 290.0, 158.0, 259.0, 264.0, 290.0, 248.0, 209.0, 301.0, 246.0, 199.0, 241.0, 273.0, 390.0, 188.0, 296.0, 239.0, 253.0, 196.0, 356.0, 69.0, 125.0, 247.0, 182.0, 239.0, 161.0, 123.0, 230.0, 307.0, 348.0, 188.0, 190.0, 252.0, 218.0, 342.0, 290.0, 341.0, 293.0, 180.0, 273.0, 128.0, 206.0, 249.0, 242.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [148.0, 151.0, 68.0, 58.0, 100.0, 76.0, 153.0, 143.0, 126.0, 130.0, 162.0, 140.0, 171.0, 185.0, 130.0, 125.0, 125.0, 108.0, 113.0, 117.0, 129.0, 170.0, 70.0, 71.0, 99.0, 103.0, 97.0, 93.0, 161.0, 140.0, 42.0, 32.0, 92.0, 87.0, 101.0, 100.0, 121.0, 115.0, 73.0, 79.0, 51.0, 70.0, 120.0, 119.0, 81.0, 63.0, 148.0, 140.0, 162.0, 168.0, 93.0, 100.0, 113.0, 115.0, 53.0, 60.0, 169.0, 147.0, 137.0, 151.0, 157.0, 148.0, 96.0, 119.0, 124.0, 132.0, 99.0, 131.0, 69.0, 107.0, 80.0, 62.0, 125.0, 108.0, 146.0, 139.0, 117.0, 135.0, 102.0, 96.0, 84.0, 71.0, 156.0, 145.0, 96.0, 94.0, 126.0, 132.0, 133.0, 156.0, 83.0, 92.0, 100.0, 88.0, 122.0, 111.0, 143.0, 150.0, 119.0, 122.0, 150.0, 147.0, 137.0, 155.0, 176.0, 172.0, 166.0, 161.0, 85.0, 103.0, 151.0, 154.0, 92.0, 83.0, 131.0, 159.0, 81.0, 77.0, 134.0, 125.0, 132.0, 132.0, 157.0, 133.0, 121.0, 127.0, 96.0, 113.0, 140.0, 161.0, 135.0, 111.0, 101.0, 98.0, 117.0, 124.0, 149.0, 124.0, 171.0, 219.0, 95.0, 93.0, 139.0, 157.0, 114.0, 125.0, 127.0, 126.0, 101.0, 95.0, 163.0, 193.0, 32.0, 37.0, 59.0, 66.0, 117.0, 130.0, 88.0, 94.0, 119.0, 120.0, 79.0, 82.0, 57.0, 66.0, 122.0, 108.0, 148.0, 159.0, 174.0, 174.0, 89.0, 99.0, 105.0, 85.0, 134.0, 118.0, 111.0, 107.0, 170.0, 172.0, 159.0, 131.0, 169.0, 172.0, 148.0, 145.0, 80.0, 100.0, 138.0, 135.0, 69.0, 59.0, 90.0, 116.0, 116.0, 133.0, 120.0, 122.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6884233098602586, "mean_inference_ms": 1.2102454391431337, "mean_action_processing_ms": 0.133125106706314, "mean_env_wait_ms": 0.8727177708913959, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 390.0, "episode_reward_min": 69.0, "episode_reward_mean": 236.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 219.0}, "policy_reward_mean": {"ppo": 118.15}, "hist_stats": {"episode_reward": [299.0, 126.0, 176.0, 296.0, 256.0, 302.0, 356.0, 255.0, 233.0, 230.0, 299.0, 141.0, 202.0, 190.0, 301.0, 74.0, 179.0, 201.0, 236.0, 152.0, 121.0, 239.0, 144.0, 288.0, 330.0, 193.0, 228.0, 113.0, 316.0, 288.0, 305.0, 215.0, 256.0, 230.0, 176.0, 142.0, 233.0, 285.0, 252.0, 198.0, 155.0, 301.0, 190.0, 258.0, 289.0, 175.0, 188.0, 233.0, 293.0, 241.0, 297.0, 292.0, 348.0, 327.0, 188.0, 305.0, 175.0, 290.0, 158.0, 259.0, 264.0, 290.0, 248.0, 209.0, 301.0, 246.0, 199.0, 241.0, 273.0, 390.0, 188.0, 296.0, 239.0, 253.0, 196.0, 356.0, 69.0, 125.0, 247.0, 182.0, 239.0, 161.0, 123.0, 230.0, 307.0, 348.0, 188.0, 190.0, 252.0, 218.0, 342.0, 290.0, 341.0, 293.0, 180.0, 273.0, 128.0, 206.0, 249.0, 242.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [148.0, 151.0, 68.0, 58.0, 100.0, 76.0, 153.0, 143.0, 126.0, 130.0, 162.0, 140.0, 171.0, 185.0, 130.0, 125.0, 125.0, 108.0, 113.0, 117.0, 129.0, 170.0, 70.0, 71.0, 99.0, 103.0, 97.0, 93.0, 161.0, 140.0, 42.0, 32.0, 92.0, 87.0, 101.0, 100.0, 121.0, 115.0, 73.0, 79.0, 51.0, 70.0, 120.0, 119.0, 81.0, 63.0, 148.0, 140.0, 162.0, 168.0, 93.0, 100.0, 113.0, 115.0, 53.0, 60.0, 169.0, 147.0, 137.0, 151.0, 157.0, 148.0, 96.0, 119.0, 124.0, 132.0, 99.0, 131.0, 69.0, 107.0, 80.0, 62.0, 125.0, 108.0, 146.0, 139.0, 117.0, 135.0, 102.0, 96.0, 84.0, 71.0, 156.0, 145.0, 96.0, 94.0, 126.0, 132.0, 133.0, 156.0, 83.0, 92.0, 100.0, 88.0, 122.0, 111.0, 143.0, 150.0, 119.0, 122.0, 150.0, 147.0, 137.0, 155.0, 176.0, 172.0, 166.0, 161.0, 85.0, 103.0, 151.0, 154.0, 92.0, 83.0, 131.0, 159.0, 81.0, 77.0, 134.0, 125.0, 132.0, 132.0, 157.0, 133.0, 121.0, 127.0, 96.0, 113.0, 140.0, 161.0, 135.0, 111.0, 101.0, 98.0, 117.0, 124.0, 149.0, 124.0, 171.0, 219.0, 95.0, 93.0, 139.0, 157.0, 114.0, 125.0, 127.0, 126.0, 101.0, 95.0, 163.0, 193.0, 32.0, 37.0, 59.0, 66.0, 117.0, 130.0, 88.0, 94.0, 119.0, 120.0, 79.0, 82.0, 57.0, 66.0, 122.0, 108.0, 148.0, 159.0, 174.0, 174.0, 89.0, 99.0, 105.0, 85.0, 134.0, 118.0, 111.0, 107.0, 170.0, 172.0, 159.0, 131.0, 169.0, 172.0, 148.0, 145.0, 80.0, 100.0, 138.0, 135.0, 69.0, 59.0, 90.0, 116.0, 116.0, 133.0, 120.0, 122.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6884233098602586, "mean_inference_ms": 1.2102454391431337, "mean_action_processing_ms": 0.133125106706314, "mean_env_wait_ms": 0.8727177708913959, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1408000, "num_agent_steps_trained": 1408000, "num_env_steps_sampled": 704000, "num_env_steps_trained": 704000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 704000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1408000, "timers": {"training_iteration_time_ms": 3656.641, "learn_time_ms": 1113.051, "learn_throughput": 11499.919, "synch_weights_time_ms": 10.828}, "counters": {"num_env_steps_sampled": 704000, "num_env_steps_trained": 704000, "num_agent_steps_sampled": 1408000, "num_agent_steps_trained": 1408000}, "done": false, "episodes_total": 1760, "training_iteration": 55, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-30", "timestamp": 1666580610, "time_this_iter_s": 3.7109124660491943, "time_total_s": 207.53650546073914, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 207.53650546073914, "timesteps_since_restore": 0, "iterations_since_restore": 55, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.1, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 80.8, "sparse_reward_min": 20, "sparse_reward_max": 140, "shaped_reward_mean": 85.72, "shaped_reward_min": 29, "shaped_reward_max": 127, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.96, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 10.54, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 8.13, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 9.72, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.32, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.72, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 7.26, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 8.9, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 16, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.36, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.27, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.82, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.8, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 3.06, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 3.06, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 2.72, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.69, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.21, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.28, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 7.26, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 8.9, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 16, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 7.26, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 8.9, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 16, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 5.551115205843844e-18, "cur_lr": 0.0010000000474974513, "total_loss": -7.447449024766684e-05, "policy_loss": -0.00013297703117132187, "vf_loss": 7.748741149902344, "vf_explained_var": 0.5911275744438171, "kl": 0.002213613362982869, "entropy": 1.4327480792999268, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 716800, "num_env_steps_trained": 716800, "num_agent_steps_sampled": 1433600, "num_agent_steps_trained": 1433600}, "sampler_results": {"episode_reward_max": 390.0, "episode_reward_min": 69.0, "episode_reward_mean": 247.32, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 219.0}, "policy_reward_mean": {"ppo": 123.66}, "custom_metrics": {"sparse_reward_mean": 80.8, "sparse_reward_min": 20, "sparse_reward_max": 140, "shaped_reward_mean": 85.72, "shaped_reward_min": 29, "shaped_reward_max": 127, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.96, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 10.54, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 8.13, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 9.72, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.32, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.72, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 7.26, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 8.9, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 16, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.36, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.27, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.82, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.8, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 3.06, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 3.06, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 2.72, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.69, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.21, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.28, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 7.26, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 8.9, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 16, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 7.26, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 8.9, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 16, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [256.0, 230.0, 176.0, 142.0, 233.0, 285.0, 252.0, 198.0, 155.0, 301.0, 190.0, 258.0, 289.0, 175.0, 188.0, 233.0, 293.0, 241.0, 297.0, 292.0, 348.0, 327.0, 188.0, 305.0, 175.0, 290.0, 158.0, 259.0, 264.0, 290.0, 248.0, 209.0, 301.0, 246.0, 199.0, 241.0, 273.0, 390.0, 188.0, 296.0, 239.0, 253.0, 196.0, 356.0, 69.0, 125.0, 247.0, 182.0, 239.0, 161.0, 123.0, 230.0, 307.0, 348.0, 188.0, 190.0, 252.0, 218.0, 342.0, 290.0, 341.0, 293.0, 180.0, 273.0, 128.0, 206.0, 249.0, 242.0, 195.0, 196.0, 170.0, 293.0, 295.0, 310.0, 244.0, 345.0, 252.0, 85.0, 287.0, 161.0, 342.0, 87.0, 333.0, 255.0, 190.0, 298.0, 163.0, 296.0, 285.0, 286.0, 306.0, 298.0, 284.0, 381.0, 292.0, 117.0, 336.0, 276.0, 338.0, 390.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [124.0, 132.0, 99.0, 131.0, 69.0, 107.0, 80.0, 62.0, 125.0, 108.0, 146.0, 139.0, 117.0, 135.0, 102.0, 96.0, 84.0, 71.0, 156.0, 145.0, 96.0, 94.0, 126.0, 132.0, 133.0, 156.0, 83.0, 92.0, 100.0, 88.0, 122.0, 111.0, 143.0, 150.0, 119.0, 122.0, 150.0, 147.0, 137.0, 155.0, 176.0, 172.0, 166.0, 161.0, 85.0, 103.0, 151.0, 154.0, 92.0, 83.0, 131.0, 159.0, 81.0, 77.0, 134.0, 125.0, 132.0, 132.0, 157.0, 133.0, 121.0, 127.0, 96.0, 113.0, 140.0, 161.0, 135.0, 111.0, 101.0, 98.0, 117.0, 124.0, 149.0, 124.0, 171.0, 219.0, 95.0, 93.0, 139.0, 157.0, 114.0, 125.0, 127.0, 126.0, 101.0, 95.0, 163.0, 193.0, 32.0, 37.0, 59.0, 66.0, 117.0, 130.0, 88.0, 94.0, 119.0, 120.0, 79.0, 82.0, 57.0, 66.0, 122.0, 108.0, 148.0, 159.0, 174.0, 174.0, 89.0, 99.0, 105.0, 85.0, 134.0, 118.0, 111.0, 107.0, 170.0, 172.0, 159.0, 131.0, 169.0, 172.0, 148.0, 145.0, 80.0, 100.0, 138.0, 135.0, 69.0, 59.0, 90.0, 116.0, 116.0, 133.0, 120.0, 122.0, 96.0, 99.0, 100.0, 96.0, 73.0, 97.0, 128.0, 165.0, 144.0, 151.0, 156.0, 154.0, 127.0, 117.0, 179.0, 166.0, 108.0, 144.0, 45.0, 40.0, 131.0, 156.0, 82.0, 79.0, 162.0, 180.0, 34.0, 53.0, 169.0, 164.0, 135.0, 120.0, 99.0, 91.0, 150.0, 148.0, 65.0, 98.0, 162.0, 134.0, 140.0, 145.0, 132.0, 154.0, 144.0, 162.0, 148.0, 150.0, 148.0, 136.0, 200.0, 181.0, 117.0, 175.0, 52.0, 65.0, 156.0, 180.0, 137.0, 139.0, 164.0, 174.0, 200.0, 190.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6882648118682849, "mean_inference_ms": 1.2100162140522224, "mean_action_processing_ms": 0.13309189461136894, "mean_env_wait_ms": 0.8718994654894457, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 390.0, "episode_reward_min": 69.0, "episode_reward_mean": 247.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 219.0}, "policy_reward_mean": {"ppo": 123.66}, "hist_stats": {"episode_reward": [256.0, 230.0, 176.0, 142.0, 233.0, 285.0, 252.0, 198.0, 155.0, 301.0, 190.0, 258.0, 289.0, 175.0, 188.0, 233.0, 293.0, 241.0, 297.0, 292.0, 348.0, 327.0, 188.0, 305.0, 175.0, 290.0, 158.0, 259.0, 264.0, 290.0, 248.0, 209.0, 301.0, 246.0, 199.0, 241.0, 273.0, 390.0, 188.0, 296.0, 239.0, 253.0, 196.0, 356.0, 69.0, 125.0, 247.0, 182.0, 239.0, 161.0, 123.0, 230.0, 307.0, 348.0, 188.0, 190.0, 252.0, 218.0, 342.0, 290.0, 341.0, 293.0, 180.0, 273.0, 128.0, 206.0, 249.0, 242.0, 195.0, 196.0, 170.0, 293.0, 295.0, 310.0, 244.0, 345.0, 252.0, 85.0, 287.0, 161.0, 342.0, 87.0, 333.0, 255.0, 190.0, 298.0, 163.0, 296.0, 285.0, 286.0, 306.0, 298.0, 284.0, 381.0, 292.0, 117.0, 336.0, 276.0, 338.0, 390.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [124.0, 132.0, 99.0, 131.0, 69.0, 107.0, 80.0, 62.0, 125.0, 108.0, 146.0, 139.0, 117.0, 135.0, 102.0, 96.0, 84.0, 71.0, 156.0, 145.0, 96.0, 94.0, 126.0, 132.0, 133.0, 156.0, 83.0, 92.0, 100.0, 88.0, 122.0, 111.0, 143.0, 150.0, 119.0, 122.0, 150.0, 147.0, 137.0, 155.0, 176.0, 172.0, 166.0, 161.0, 85.0, 103.0, 151.0, 154.0, 92.0, 83.0, 131.0, 159.0, 81.0, 77.0, 134.0, 125.0, 132.0, 132.0, 157.0, 133.0, 121.0, 127.0, 96.0, 113.0, 140.0, 161.0, 135.0, 111.0, 101.0, 98.0, 117.0, 124.0, 149.0, 124.0, 171.0, 219.0, 95.0, 93.0, 139.0, 157.0, 114.0, 125.0, 127.0, 126.0, 101.0, 95.0, 163.0, 193.0, 32.0, 37.0, 59.0, 66.0, 117.0, 130.0, 88.0, 94.0, 119.0, 120.0, 79.0, 82.0, 57.0, 66.0, 122.0, 108.0, 148.0, 159.0, 174.0, 174.0, 89.0, 99.0, 105.0, 85.0, 134.0, 118.0, 111.0, 107.0, 170.0, 172.0, 159.0, 131.0, 169.0, 172.0, 148.0, 145.0, 80.0, 100.0, 138.0, 135.0, 69.0, 59.0, 90.0, 116.0, 116.0, 133.0, 120.0, 122.0, 96.0, 99.0, 100.0, 96.0, 73.0, 97.0, 128.0, 165.0, 144.0, 151.0, 156.0, 154.0, 127.0, 117.0, 179.0, 166.0, 108.0, 144.0, 45.0, 40.0, 131.0, 156.0, 82.0, 79.0, 162.0, 180.0, 34.0, 53.0, 169.0, 164.0, 135.0, 120.0, 99.0, 91.0, 150.0, 148.0, 65.0, 98.0, 162.0, 134.0, 140.0, 145.0, 132.0, 154.0, 144.0, 162.0, 148.0, 150.0, 148.0, 136.0, 200.0, 181.0, 117.0, 175.0, 52.0, 65.0, 156.0, 180.0, 137.0, 139.0, 164.0, 174.0, 200.0, 190.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6882648118682849, "mean_inference_ms": 1.2100162140522224, "mean_action_processing_ms": 0.13309189461136894, "mean_env_wait_ms": 0.8718994654894457, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1433600, "num_agent_steps_trained": 1433600, "num_env_steps_sampled": 716800, "num_env_steps_trained": 716800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 716800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1433600, "timers": {"training_iteration_time_ms": 3651.298, "learn_time_ms": 1117.544, "learn_throughput": 11453.684, "synch_weights_time_ms": 11.295}, "counters": {"num_env_steps_sampled": 716800, "num_env_steps_trained": 716800, "num_agent_steps_sampled": 1433600, "num_agent_steps_trained": 1433600}, "done": false, "episodes_total": 1792, "training_iteration": 56, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-34", "timestamp": 1666580614, "time_this_iter_s": 3.633439302444458, "time_total_s": 211.1699447631836, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 211.1699447631836, "timesteps_since_restore": 0, "iterations_since_restore": 56, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.733333333333334, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 87.4, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 88.81, "shaped_reward_min": 29, "shaped_reward_max": 124, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 9.41, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 10.68, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 8.77, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 10.08, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.53, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.55, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 7.87, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 9.21, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 5.56, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.54, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.16, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 2.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.83, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.04, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 3.26, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.84, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.86, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.3, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 7.87, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 9.21, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 7.87, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 9.21, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.775557602921922e-18, "cur_lr": 0.0010000000474974513, "total_loss": -0.00404885271564126, "policy_loss": -0.004144429694861174, "vf_loss": 8.007272720336914, "vf_explained_var": 0.5925405025482178, "kl": 0.002025268506258726, "entropy": 1.4103046655654907, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 729600, "num_env_steps_trained": 729600, "num_agent_steps_sampled": 1459200, "num_agent_steps_trained": 1459200}, "sampler_results": {"episode_reward_max": 401.0, "episode_reward_min": 69.0, "episode_reward_mean": 263.61, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 219.0}, "policy_reward_mean": {"ppo": 131.805}, "custom_metrics": {"sparse_reward_mean": 87.4, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 88.81, "shaped_reward_min": 29, "shaped_reward_max": 124, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 9.41, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 10.68, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 8.77, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 10.08, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.53, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.55, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 7.87, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 9.21, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 5.56, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.54, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.16, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 2.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.83, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.04, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 3.26, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.84, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.86, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.3, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 7.87, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 9.21, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 7.87, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 9.21, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [301.0, 246.0, 199.0, 241.0, 273.0, 390.0, 188.0, 296.0, 239.0, 253.0, 196.0, 356.0, 69.0, 125.0, 247.0, 182.0, 239.0, 161.0, 123.0, 230.0, 307.0, 348.0, 188.0, 190.0, 252.0, 218.0, 342.0, 290.0, 341.0, 293.0, 180.0, 273.0, 128.0, 206.0, 249.0, 242.0, 195.0, 196.0, 170.0, 293.0, 295.0, 310.0, 244.0, 345.0, 252.0, 85.0, 287.0, 161.0, 342.0, 87.0, 333.0, 255.0, 190.0, 298.0, 163.0, 296.0, 285.0, 286.0, 306.0, 298.0, 284.0, 381.0, 292.0, 117.0, 336.0, 276.0, 338.0, 390.0, 287.0, 399.0, 384.0, 347.0, 247.0, 241.0, 174.0, 341.0, 332.0, 293.0, 292.0, 341.0, 330.0, 98.0, 338.0, 401.0, 312.0, 381.0, 236.0, 230.0, 284.0, 344.0, 351.0, 355.0, 298.0, 324.0, 220.0, 147.0, 175.0, 316.0, 287.0, 269.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [140.0, 161.0, 135.0, 111.0, 101.0, 98.0, 117.0, 124.0, 149.0, 124.0, 171.0, 219.0, 95.0, 93.0, 139.0, 157.0, 114.0, 125.0, 127.0, 126.0, 101.0, 95.0, 163.0, 193.0, 32.0, 37.0, 59.0, 66.0, 117.0, 130.0, 88.0, 94.0, 119.0, 120.0, 79.0, 82.0, 57.0, 66.0, 122.0, 108.0, 148.0, 159.0, 174.0, 174.0, 89.0, 99.0, 105.0, 85.0, 134.0, 118.0, 111.0, 107.0, 170.0, 172.0, 159.0, 131.0, 169.0, 172.0, 148.0, 145.0, 80.0, 100.0, 138.0, 135.0, 69.0, 59.0, 90.0, 116.0, 116.0, 133.0, 120.0, 122.0, 96.0, 99.0, 100.0, 96.0, 73.0, 97.0, 128.0, 165.0, 144.0, 151.0, 156.0, 154.0, 127.0, 117.0, 179.0, 166.0, 108.0, 144.0, 45.0, 40.0, 131.0, 156.0, 82.0, 79.0, 162.0, 180.0, 34.0, 53.0, 169.0, 164.0, 135.0, 120.0, 99.0, 91.0, 150.0, 148.0, 65.0, 98.0, 162.0, 134.0, 140.0, 145.0, 132.0, 154.0, 144.0, 162.0, 148.0, 150.0, 148.0, 136.0, 200.0, 181.0, 117.0, 175.0, 52.0, 65.0, 156.0, 180.0, 137.0, 139.0, 164.0, 174.0, 200.0, 190.0, 135.0, 152.0, 202.0, 197.0, 185.0, 199.0, 177.0, 170.0, 122.0, 125.0, 106.0, 135.0, 97.0, 77.0, 163.0, 178.0, 169.0, 163.0, 144.0, 149.0, 149.0, 143.0, 165.0, 176.0, 170.0, 160.0, 45.0, 53.0, 173.0, 165.0, 212.0, 189.0, 154.0, 158.0, 172.0, 209.0, 119.0, 117.0, 124.0, 106.0, 140.0, 144.0, 174.0, 170.0, 174.0, 177.0, 176.0, 179.0, 128.0, 170.0, 157.0, 167.0, 115.0, 105.0, 72.0, 75.0, 80.0, 95.0, 143.0, 173.0, 145.0, 142.0, 134.0, 135.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6881554542706896, "mean_inference_ms": 1.2097325725572832, "mean_action_processing_ms": 0.1330667926674265, "mean_env_wait_ms": 0.8711230320174765, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 401.0, "episode_reward_min": 69.0, "episode_reward_mean": 263.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 219.0}, "policy_reward_mean": {"ppo": 131.805}, "hist_stats": {"episode_reward": [301.0, 246.0, 199.0, 241.0, 273.0, 390.0, 188.0, 296.0, 239.0, 253.0, 196.0, 356.0, 69.0, 125.0, 247.0, 182.0, 239.0, 161.0, 123.0, 230.0, 307.0, 348.0, 188.0, 190.0, 252.0, 218.0, 342.0, 290.0, 341.0, 293.0, 180.0, 273.0, 128.0, 206.0, 249.0, 242.0, 195.0, 196.0, 170.0, 293.0, 295.0, 310.0, 244.0, 345.0, 252.0, 85.0, 287.0, 161.0, 342.0, 87.0, 333.0, 255.0, 190.0, 298.0, 163.0, 296.0, 285.0, 286.0, 306.0, 298.0, 284.0, 381.0, 292.0, 117.0, 336.0, 276.0, 338.0, 390.0, 287.0, 399.0, 384.0, 347.0, 247.0, 241.0, 174.0, 341.0, 332.0, 293.0, 292.0, 341.0, 330.0, 98.0, 338.0, 401.0, 312.0, 381.0, 236.0, 230.0, 284.0, 344.0, 351.0, 355.0, 298.0, 324.0, 220.0, 147.0, 175.0, 316.0, 287.0, 269.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [140.0, 161.0, 135.0, 111.0, 101.0, 98.0, 117.0, 124.0, 149.0, 124.0, 171.0, 219.0, 95.0, 93.0, 139.0, 157.0, 114.0, 125.0, 127.0, 126.0, 101.0, 95.0, 163.0, 193.0, 32.0, 37.0, 59.0, 66.0, 117.0, 130.0, 88.0, 94.0, 119.0, 120.0, 79.0, 82.0, 57.0, 66.0, 122.0, 108.0, 148.0, 159.0, 174.0, 174.0, 89.0, 99.0, 105.0, 85.0, 134.0, 118.0, 111.0, 107.0, 170.0, 172.0, 159.0, 131.0, 169.0, 172.0, 148.0, 145.0, 80.0, 100.0, 138.0, 135.0, 69.0, 59.0, 90.0, 116.0, 116.0, 133.0, 120.0, 122.0, 96.0, 99.0, 100.0, 96.0, 73.0, 97.0, 128.0, 165.0, 144.0, 151.0, 156.0, 154.0, 127.0, 117.0, 179.0, 166.0, 108.0, 144.0, 45.0, 40.0, 131.0, 156.0, 82.0, 79.0, 162.0, 180.0, 34.0, 53.0, 169.0, 164.0, 135.0, 120.0, 99.0, 91.0, 150.0, 148.0, 65.0, 98.0, 162.0, 134.0, 140.0, 145.0, 132.0, 154.0, 144.0, 162.0, 148.0, 150.0, 148.0, 136.0, 200.0, 181.0, 117.0, 175.0, 52.0, 65.0, 156.0, 180.0, 137.0, 139.0, 164.0, 174.0, 200.0, 190.0, 135.0, 152.0, 202.0, 197.0, 185.0, 199.0, 177.0, 170.0, 122.0, 125.0, 106.0, 135.0, 97.0, 77.0, 163.0, 178.0, 169.0, 163.0, 144.0, 149.0, 149.0, 143.0, 165.0, 176.0, 170.0, 160.0, 45.0, 53.0, 173.0, 165.0, 212.0, 189.0, 154.0, 158.0, 172.0, 209.0, 119.0, 117.0, 124.0, 106.0, 140.0, 144.0, 174.0, 170.0, 174.0, 177.0, 176.0, 179.0, 128.0, 170.0, 157.0, 167.0, 115.0, 105.0, 72.0, 75.0, 80.0, 95.0, 143.0, 173.0, 145.0, 142.0, 134.0, 135.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6881554542706896, "mean_inference_ms": 1.2097325725572832, "mean_action_processing_ms": 0.1330667926674265, "mean_env_wait_ms": 0.8711230320174765, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1459200, "num_agent_steps_trained": 1459200, "num_env_steps_sampled": 729600, "num_env_steps_trained": 729600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 729600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1459200, "timers": {"training_iteration_time_ms": 3658.574, "learn_time_ms": 1121.88, "learn_throughput": 11409.424, "synch_weights_time_ms": 12.08}, "counters": {"num_env_steps_sampled": 729600, "num_env_steps_trained": 729600, "num_agent_steps_sampled": 1459200, "num_agent_steps_trained": 1459200}, "done": false, "episodes_total": 1824, "training_iteration": 57, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-38", "timestamp": 1666580618, "time_this_iter_s": 3.7686376571655273, "time_total_s": 214.93858242034912, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 214.93858242034912, "timesteps_since_restore": 0, "iterations_since_restore": 57, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.7, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 95.0, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 93.52, "shaped_reward_min": 45, "shaped_reward_max": 124, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 9.86, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 11.11, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 9.27, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 17, "useful_onion_pickup_agent_1_mean": 10.57, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.52, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.51, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.35, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 9.72, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.65, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.25, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.27, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 2.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.89, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.85, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.11, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 3.43, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.94, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.06, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.12, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.26, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 8.35, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 9.72, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.35, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 9.72, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.387778801460961e-18, "cur_lr": 0.0010000000474974513, "total_loss": -0.0027208023238927126, "policy_loss": -0.0028445827774703503, "vf_loss": 8.135013580322266, "vf_explained_var": 0.5896250009536743, "kl": 0.001949745579622686, "entropy": 1.3794457912445068, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 742400, "num_env_steps_trained": 742400, "num_agent_steps_sampled": 1484800, "num_agent_steps_trained": 1484800}, "sampler_results": {"episode_reward_max": 402.0, "episode_reward_min": 85.0, "episode_reward_mean": 283.52, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 212.0}, "policy_reward_mean": {"ppo": 141.76}, "custom_metrics": {"sparse_reward_mean": 95.0, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 93.52, "shaped_reward_min": 45, "shaped_reward_max": 124, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 9.86, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 11.11, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 9.27, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 17, "useful_onion_pickup_agent_1_mean": 10.57, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.52, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.51, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.35, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 9.72, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.65, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.25, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.27, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 2.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.89, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.85, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.11, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 3.43, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.94, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.06, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.12, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.26, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 8.35, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 9.72, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.35, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 9.72, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [128.0, 206.0, 249.0, 242.0, 195.0, 196.0, 170.0, 293.0, 295.0, 310.0, 244.0, 345.0, 252.0, 85.0, 287.0, 161.0, 342.0, 87.0, 333.0, 255.0, 190.0, 298.0, 163.0, 296.0, 285.0, 286.0, 306.0, 298.0, 284.0, 381.0, 292.0, 117.0, 336.0, 276.0, 338.0, 390.0, 287.0, 399.0, 384.0, 347.0, 247.0, 241.0, 174.0, 341.0, 332.0, 293.0, 292.0, 341.0, 330.0, 98.0, 338.0, 401.0, 312.0, 381.0, 236.0, 230.0, 284.0, 344.0, 351.0, 355.0, 298.0, 324.0, 220.0, 147.0, 175.0, 316.0, 287.0, 269.0, 281.0, 352.0, 336.0, 287.0, 395.0, 241.0, 231.0, 354.0, 229.0, 402.0, 291.0, 273.0, 347.0, 298.0, 247.0, 336.0, 399.0, 338.0, 296.0, 313.0, 336.0, 256.0, 87.0, 344.0, 273.0, 250.0, 333.0, 276.0, 296.0, 290.0, 390.0, 390.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [69.0, 59.0, 90.0, 116.0, 116.0, 133.0, 120.0, 122.0, 96.0, 99.0, 100.0, 96.0, 73.0, 97.0, 128.0, 165.0, 144.0, 151.0, 156.0, 154.0, 127.0, 117.0, 179.0, 166.0, 108.0, 144.0, 45.0, 40.0, 131.0, 156.0, 82.0, 79.0, 162.0, 180.0, 34.0, 53.0, 169.0, 164.0, 135.0, 120.0, 99.0, 91.0, 150.0, 148.0, 65.0, 98.0, 162.0, 134.0, 140.0, 145.0, 132.0, 154.0, 144.0, 162.0, 148.0, 150.0, 148.0, 136.0, 200.0, 181.0, 117.0, 175.0, 52.0, 65.0, 156.0, 180.0, 137.0, 139.0, 164.0, 174.0, 200.0, 190.0, 135.0, 152.0, 202.0, 197.0, 185.0, 199.0, 177.0, 170.0, 122.0, 125.0, 106.0, 135.0, 97.0, 77.0, 163.0, 178.0, 169.0, 163.0, 144.0, 149.0, 149.0, 143.0, 165.0, 176.0, 170.0, 160.0, 45.0, 53.0, 173.0, 165.0, 212.0, 189.0, 154.0, 158.0, 172.0, 209.0, 119.0, 117.0, 124.0, 106.0, 140.0, 144.0, 174.0, 170.0, 174.0, 177.0, 176.0, 179.0, 128.0, 170.0, 157.0, 167.0, 115.0, 105.0, 72.0, 75.0, 80.0, 95.0, 143.0, 173.0, 145.0, 142.0, 134.0, 135.0, 139.0, 142.0, 167.0, 185.0, 161.0, 175.0, 136.0, 151.0, 203.0, 192.0, 125.0, 116.0, 127.0, 104.0, 172.0, 182.0, 99.0, 130.0, 200.0, 202.0, 145.0, 146.0, 148.0, 125.0, 159.0, 188.0, 135.0, 163.0, 130.0, 117.0, 167.0, 169.0, 201.0, 198.0, 157.0, 181.0, 142.0, 154.0, 162.0, 151.0, 165.0, 171.0, 136.0, 120.0, 31.0, 56.0, 158.0, 186.0, 133.0, 140.0, 131.0, 119.0, 176.0, 157.0, 130.0, 146.0, 139.0, 157.0, 144.0, 146.0, 195.0, 195.0, 198.0, 192.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6880473354254739, "mean_inference_ms": 1.210219939048055, "mean_action_processing_ms": 0.1330240938839774, "mean_env_wait_ms": 0.8706293930800345, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 402.0, "episode_reward_min": 85.0, "episode_reward_mean": 283.52, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 212.0}, "policy_reward_mean": {"ppo": 141.76}, "hist_stats": {"episode_reward": [128.0, 206.0, 249.0, 242.0, 195.0, 196.0, 170.0, 293.0, 295.0, 310.0, 244.0, 345.0, 252.0, 85.0, 287.0, 161.0, 342.0, 87.0, 333.0, 255.0, 190.0, 298.0, 163.0, 296.0, 285.0, 286.0, 306.0, 298.0, 284.0, 381.0, 292.0, 117.0, 336.0, 276.0, 338.0, 390.0, 287.0, 399.0, 384.0, 347.0, 247.0, 241.0, 174.0, 341.0, 332.0, 293.0, 292.0, 341.0, 330.0, 98.0, 338.0, 401.0, 312.0, 381.0, 236.0, 230.0, 284.0, 344.0, 351.0, 355.0, 298.0, 324.0, 220.0, 147.0, 175.0, 316.0, 287.0, 269.0, 281.0, 352.0, 336.0, 287.0, 395.0, 241.0, 231.0, 354.0, 229.0, 402.0, 291.0, 273.0, 347.0, 298.0, 247.0, 336.0, 399.0, 338.0, 296.0, 313.0, 336.0, 256.0, 87.0, 344.0, 273.0, 250.0, 333.0, 276.0, 296.0, 290.0, 390.0, 390.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [69.0, 59.0, 90.0, 116.0, 116.0, 133.0, 120.0, 122.0, 96.0, 99.0, 100.0, 96.0, 73.0, 97.0, 128.0, 165.0, 144.0, 151.0, 156.0, 154.0, 127.0, 117.0, 179.0, 166.0, 108.0, 144.0, 45.0, 40.0, 131.0, 156.0, 82.0, 79.0, 162.0, 180.0, 34.0, 53.0, 169.0, 164.0, 135.0, 120.0, 99.0, 91.0, 150.0, 148.0, 65.0, 98.0, 162.0, 134.0, 140.0, 145.0, 132.0, 154.0, 144.0, 162.0, 148.0, 150.0, 148.0, 136.0, 200.0, 181.0, 117.0, 175.0, 52.0, 65.0, 156.0, 180.0, 137.0, 139.0, 164.0, 174.0, 200.0, 190.0, 135.0, 152.0, 202.0, 197.0, 185.0, 199.0, 177.0, 170.0, 122.0, 125.0, 106.0, 135.0, 97.0, 77.0, 163.0, 178.0, 169.0, 163.0, 144.0, 149.0, 149.0, 143.0, 165.0, 176.0, 170.0, 160.0, 45.0, 53.0, 173.0, 165.0, 212.0, 189.0, 154.0, 158.0, 172.0, 209.0, 119.0, 117.0, 124.0, 106.0, 140.0, 144.0, 174.0, 170.0, 174.0, 177.0, 176.0, 179.0, 128.0, 170.0, 157.0, 167.0, 115.0, 105.0, 72.0, 75.0, 80.0, 95.0, 143.0, 173.0, 145.0, 142.0, 134.0, 135.0, 139.0, 142.0, 167.0, 185.0, 161.0, 175.0, 136.0, 151.0, 203.0, 192.0, 125.0, 116.0, 127.0, 104.0, 172.0, 182.0, 99.0, 130.0, 200.0, 202.0, 145.0, 146.0, 148.0, 125.0, 159.0, 188.0, 135.0, 163.0, 130.0, 117.0, 167.0, 169.0, 201.0, 198.0, 157.0, 181.0, 142.0, 154.0, 162.0, 151.0, 165.0, 171.0, 136.0, 120.0, 31.0, 56.0, 158.0, 186.0, 133.0, 140.0, 131.0, 119.0, 176.0, 157.0, 130.0, 146.0, 139.0, 157.0, 144.0, 146.0, 195.0, 195.0, 198.0, 192.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6880473354254739, "mean_inference_ms": 1.210219939048055, "mean_action_processing_ms": 0.1330240938839774, "mean_env_wait_ms": 0.8706293930800345, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1484800, "num_agent_steps_trained": 1484800, "num_env_steps_sampled": 742400, "num_env_steps_trained": 742400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 742400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1484800, "timers": {"training_iteration_time_ms": 3684.015, "learn_time_ms": 1122.652, "learn_throughput": 11401.576, "synch_weights_time_ms": 12.325}, "counters": {"num_env_steps_sampled": 742400, "num_env_steps_trained": 742400, "num_agent_steps_sampled": 1484800, "num_agent_steps_trained": 1484800}, "done": false, "episodes_total": 1856, "training_iteration": 58, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-42", "timestamp": 1666580622, "time_this_iter_s": 3.8663227558135986, "time_total_s": 218.80490517616272, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 218.80490517616272, "timesteps_since_restore": 0, "iterations_since_restore": 58, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.733333333333334, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 100.6, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 95.3, "shaped_reward_min": 42, "shaped_reward_max": 124, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.13, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 11.47, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 9.6, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 17, "useful_onion_pickup_agent_1_mean": 10.85, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.5, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.57, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 9.98, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 5.76, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.44, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.31, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.98, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.84, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.39, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.11, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.21, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.89, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.16, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 8.57, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 9.98, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.57, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 9.98, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 6.938894007304805e-19, "cur_lr": 0.0010000000474974513, "total_loss": -0.00216166814789176, "policy_loss": -0.002247137250378728, "vf_loss": 7.820727348327637, "vf_explained_var": 0.620682954788208, "kl": 0.0020027090795338154, "entropy": 1.3932123184204102, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 755200, "num_env_steps_trained": 755200, "num_agent_steps_sampled": 1510400, "num_agent_steps_trained": 1510400}, "sampler_results": {"episode_reward_max": 404.0, "episode_reward_min": 77.0, "episode_reward_mean": 296.5, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 218.0}, "policy_reward_mean": {"ppo": 148.25}, "custom_metrics": {"sparse_reward_mean": 100.6, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 95.3, "shaped_reward_min": 42, "shaped_reward_max": 124, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.13, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 11.47, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 9.6, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 17, "useful_onion_pickup_agent_1_mean": 10.85, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.5, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.57, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 9.98, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 5.76, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.44, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.31, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.98, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.84, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.39, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.11, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.21, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.89, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.16, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 8.57, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 9.98, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.57, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 9.98, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [336.0, 276.0, 338.0, 390.0, 287.0, 399.0, 384.0, 347.0, 247.0, 241.0, 174.0, 341.0, 332.0, 293.0, 292.0, 341.0, 330.0, 98.0, 338.0, 401.0, 312.0, 381.0, 236.0, 230.0, 284.0, 344.0, 351.0, 355.0, 298.0, 324.0, 220.0, 147.0, 175.0, 316.0, 287.0, 269.0, 281.0, 352.0, 336.0, 287.0, 395.0, 241.0, 231.0, 354.0, 229.0, 402.0, 291.0, 273.0, 347.0, 298.0, 247.0, 336.0, 399.0, 338.0, 296.0, 313.0, 336.0, 256.0, 87.0, 344.0, 273.0, 250.0, 333.0, 276.0, 296.0, 290.0, 390.0, 390.0, 336.0, 327.0, 244.0, 141.0, 287.0, 231.0, 316.0, 253.0, 288.0, 298.0, 298.0, 381.0, 171.0, 162.0, 234.0, 342.0, 384.0, 304.0, 381.0, 399.0, 298.0, 295.0, 347.0, 182.0, 301.0, 339.0, 244.0, 296.0, 350.0, 77.0, 404.0, 259.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [156.0, 180.0, 137.0, 139.0, 164.0, 174.0, 200.0, 190.0, 135.0, 152.0, 202.0, 197.0, 185.0, 199.0, 177.0, 170.0, 122.0, 125.0, 106.0, 135.0, 97.0, 77.0, 163.0, 178.0, 169.0, 163.0, 144.0, 149.0, 149.0, 143.0, 165.0, 176.0, 170.0, 160.0, 45.0, 53.0, 173.0, 165.0, 212.0, 189.0, 154.0, 158.0, 172.0, 209.0, 119.0, 117.0, 124.0, 106.0, 140.0, 144.0, 174.0, 170.0, 174.0, 177.0, 176.0, 179.0, 128.0, 170.0, 157.0, 167.0, 115.0, 105.0, 72.0, 75.0, 80.0, 95.0, 143.0, 173.0, 145.0, 142.0, 134.0, 135.0, 139.0, 142.0, 167.0, 185.0, 161.0, 175.0, 136.0, 151.0, 203.0, 192.0, 125.0, 116.0, 127.0, 104.0, 172.0, 182.0, 99.0, 130.0, 200.0, 202.0, 145.0, 146.0, 148.0, 125.0, 159.0, 188.0, 135.0, 163.0, 130.0, 117.0, 167.0, 169.0, 201.0, 198.0, 157.0, 181.0, 142.0, 154.0, 162.0, 151.0, 165.0, 171.0, 136.0, 120.0, 31.0, 56.0, 158.0, 186.0, 133.0, 140.0, 131.0, 119.0, 176.0, 157.0, 130.0, 146.0, 139.0, 157.0, 144.0, 146.0, 195.0, 195.0, 198.0, 192.0, 172.0, 164.0, 170.0, 157.0, 127.0, 117.0, 70.0, 71.0, 143.0, 144.0, 115.0, 116.0, 164.0, 152.0, 128.0, 125.0, 148.0, 140.0, 162.0, 136.0, 136.0, 162.0, 163.0, 218.0, 82.0, 89.0, 79.0, 83.0, 116.0, 118.0, 180.0, 162.0, 203.0, 181.0, 142.0, 162.0, 192.0, 189.0, 206.0, 193.0, 152.0, 146.0, 145.0, 150.0, 178.0, 169.0, 94.0, 88.0, 136.0, 165.0, 181.0, 158.0, 134.0, 110.0, 135.0, 161.0, 165.0, 185.0, 34.0, 43.0, 204.0, 200.0, 133.0, 126.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6879893960396974, "mean_inference_ms": 1.210747046238731, "mean_action_processing_ms": 0.13300267223965467, "mean_env_wait_ms": 0.8702240867475001, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 404.0, "episode_reward_min": 77.0, "episode_reward_mean": 296.5, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 218.0}, "policy_reward_mean": {"ppo": 148.25}, "hist_stats": {"episode_reward": [336.0, 276.0, 338.0, 390.0, 287.0, 399.0, 384.0, 347.0, 247.0, 241.0, 174.0, 341.0, 332.0, 293.0, 292.0, 341.0, 330.0, 98.0, 338.0, 401.0, 312.0, 381.0, 236.0, 230.0, 284.0, 344.0, 351.0, 355.0, 298.0, 324.0, 220.0, 147.0, 175.0, 316.0, 287.0, 269.0, 281.0, 352.0, 336.0, 287.0, 395.0, 241.0, 231.0, 354.0, 229.0, 402.0, 291.0, 273.0, 347.0, 298.0, 247.0, 336.0, 399.0, 338.0, 296.0, 313.0, 336.0, 256.0, 87.0, 344.0, 273.0, 250.0, 333.0, 276.0, 296.0, 290.0, 390.0, 390.0, 336.0, 327.0, 244.0, 141.0, 287.0, 231.0, 316.0, 253.0, 288.0, 298.0, 298.0, 381.0, 171.0, 162.0, 234.0, 342.0, 384.0, 304.0, 381.0, 399.0, 298.0, 295.0, 347.0, 182.0, 301.0, 339.0, 244.0, 296.0, 350.0, 77.0, 404.0, 259.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [156.0, 180.0, 137.0, 139.0, 164.0, 174.0, 200.0, 190.0, 135.0, 152.0, 202.0, 197.0, 185.0, 199.0, 177.0, 170.0, 122.0, 125.0, 106.0, 135.0, 97.0, 77.0, 163.0, 178.0, 169.0, 163.0, 144.0, 149.0, 149.0, 143.0, 165.0, 176.0, 170.0, 160.0, 45.0, 53.0, 173.0, 165.0, 212.0, 189.0, 154.0, 158.0, 172.0, 209.0, 119.0, 117.0, 124.0, 106.0, 140.0, 144.0, 174.0, 170.0, 174.0, 177.0, 176.0, 179.0, 128.0, 170.0, 157.0, 167.0, 115.0, 105.0, 72.0, 75.0, 80.0, 95.0, 143.0, 173.0, 145.0, 142.0, 134.0, 135.0, 139.0, 142.0, 167.0, 185.0, 161.0, 175.0, 136.0, 151.0, 203.0, 192.0, 125.0, 116.0, 127.0, 104.0, 172.0, 182.0, 99.0, 130.0, 200.0, 202.0, 145.0, 146.0, 148.0, 125.0, 159.0, 188.0, 135.0, 163.0, 130.0, 117.0, 167.0, 169.0, 201.0, 198.0, 157.0, 181.0, 142.0, 154.0, 162.0, 151.0, 165.0, 171.0, 136.0, 120.0, 31.0, 56.0, 158.0, 186.0, 133.0, 140.0, 131.0, 119.0, 176.0, 157.0, 130.0, 146.0, 139.0, 157.0, 144.0, 146.0, 195.0, 195.0, 198.0, 192.0, 172.0, 164.0, 170.0, 157.0, 127.0, 117.0, 70.0, 71.0, 143.0, 144.0, 115.0, 116.0, 164.0, 152.0, 128.0, 125.0, 148.0, 140.0, 162.0, 136.0, 136.0, 162.0, 163.0, 218.0, 82.0, 89.0, 79.0, 83.0, 116.0, 118.0, 180.0, 162.0, 203.0, 181.0, 142.0, 162.0, 192.0, 189.0, 206.0, 193.0, 152.0, 146.0, 145.0, 150.0, 178.0, 169.0, 94.0, 88.0, 136.0, 165.0, 181.0, 158.0, 134.0, 110.0, 135.0, 161.0, 165.0, 185.0, 34.0, 43.0, 204.0, 200.0, 133.0, 126.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6879893960396974, "mean_inference_ms": 1.210747046238731, "mean_action_processing_ms": 0.13300267223965467, "mean_env_wait_ms": 0.8702240867475001, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1510400, "num_agent_steps_trained": 1510400, "num_env_steps_sampled": 755200, "num_env_steps_trained": 755200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 755200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1510400, "timers": {"training_iteration_time_ms": 3682.605, "learn_time_ms": 1126.084, "learn_throughput": 11366.825, "synch_weights_time_ms": 12.668}, "counters": {"num_env_steps_sampled": 755200, "num_env_steps_trained": 755200, "num_agent_steps_sampled": 1510400, "num_agent_steps_trained": 1510400}, "done": false, "episodes_total": 1888, "training_iteration": 59, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-46", "timestamp": 1666580626, "time_this_iter_s": 3.6895713806152344, "time_total_s": 222.49447655677795, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 222.49447655677795, "timesteps_since_restore": 0, "iterations_since_restore": 59, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.94, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 101.2, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 95.08, "shaped_reward_min": 34, "shaped_reward_max": 128, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.35, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 11.07, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 9.85, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 10.36, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.51, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.52, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.77, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 9.67, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 16, "dish_pickup_agent_0_mean": 5.59, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.44, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.41, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.95, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.93, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.3, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.1, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.11, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.88, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.16, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 8.77, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 9.67, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 16, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.77, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 9.67, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 16, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.4694470036524025e-19, "cur_lr": 0.0010000000474974513, "total_loss": -0.003538618329912424, "policy_loss": -0.003653008723631501, "vf_loss": 8.01357650756836, "vf_explained_var": 0.6434005498886108, "kl": 0.0017745888326317072, "entropy": 1.3739397525787354, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 768000, "num_env_steps_trained": 768000, "num_agent_steps_sampled": 1536000, "num_agent_steps_trained": 1536000}, "sampler_results": {"episode_reward_max": 408.0, "episode_reward_min": 74.0, "episode_reward_mean": 297.48, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 218.0}, "policy_reward_mean": {"ppo": 148.74}, "custom_metrics": {"sparse_reward_mean": 101.2, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 95.08, "shaped_reward_min": 34, "shaped_reward_max": 128, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.35, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 11.07, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 9.85, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 10.36, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.51, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.52, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.77, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 9.67, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 16, "dish_pickup_agent_0_mean": 5.59, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.44, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.41, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.95, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.93, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.3, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.1, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.11, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.88, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.16, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 8.77, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 9.67, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 16, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.77, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 9.67, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 16, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [175.0, 316.0, 287.0, 269.0, 281.0, 352.0, 336.0, 287.0, 395.0, 241.0, 231.0, 354.0, 229.0, 402.0, 291.0, 273.0, 347.0, 298.0, 247.0, 336.0, 399.0, 338.0, 296.0, 313.0, 336.0, 256.0, 87.0, 344.0, 273.0, 250.0, 333.0, 276.0, 296.0, 290.0, 390.0, 390.0, 336.0, 327.0, 244.0, 141.0, 287.0, 231.0, 316.0, 253.0, 288.0, 298.0, 298.0, 381.0, 171.0, 162.0, 234.0, 342.0, 384.0, 304.0, 381.0, 399.0, 298.0, 295.0, 347.0, 182.0, 301.0, 339.0, 244.0, 296.0, 350.0, 77.0, 404.0, 259.0, 290.0, 283.0, 293.0, 393.0, 408.0, 384.0, 381.0, 342.0, 290.0, 350.0, 299.0, 356.0, 230.0, 248.0, 273.0, 74.0, 330.0, 289.0, 299.0, 298.0, 188.0, 236.0, 393.0, 350.0, 336.0, 338.0, 349.0, 312.0, 120.0, 336.0, 361.0, 336.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [80.0, 95.0, 143.0, 173.0, 145.0, 142.0, 134.0, 135.0, 139.0, 142.0, 167.0, 185.0, 161.0, 175.0, 136.0, 151.0, 203.0, 192.0, 125.0, 116.0, 127.0, 104.0, 172.0, 182.0, 99.0, 130.0, 200.0, 202.0, 145.0, 146.0, 148.0, 125.0, 159.0, 188.0, 135.0, 163.0, 130.0, 117.0, 167.0, 169.0, 201.0, 198.0, 157.0, 181.0, 142.0, 154.0, 162.0, 151.0, 165.0, 171.0, 136.0, 120.0, 31.0, 56.0, 158.0, 186.0, 133.0, 140.0, 131.0, 119.0, 176.0, 157.0, 130.0, 146.0, 139.0, 157.0, 144.0, 146.0, 195.0, 195.0, 198.0, 192.0, 172.0, 164.0, 170.0, 157.0, 127.0, 117.0, 70.0, 71.0, 143.0, 144.0, 115.0, 116.0, 164.0, 152.0, 128.0, 125.0, 148.0, 140.0, 162.0, 136.0, 136.0, 162.0, 163.0, 218.0, 82.0, 89.0, 79.0, 83.0, 116.0, 118.0, 180.0, 162.0, 203.0, 181.0, 142.0, 162.0, 192.0, 189.0, 206.0, 193.0, 152.0, 146.0, 145.0, 150.0, 178.0, 169.0, 94.0, 88.0, 136.0, 165.0, 181.0, 158.0, 134.0, 110.0, 135.0, 161.0, 165.0, 185.0, 34.0, 43.0, 204.0, 200.0, 133.0, 126.0, 159.0, 131.0, 135.0, 148.0, 159.0, 134.0, 204.0, 189.0, 207.0, 201.0, 182.0, 202.0, 174.0, 207.0, 159.0, 183.0, 153.0, 137.0, 175.0, 175.0, 143.0, 156.0, 181.0, 175.0, 110.0, 120.0, 126.0, 122.0, 138.0, 135.0, 43.0, 31.0, 148.0, 182.0, 146.0, 143.0, 154.0, 145.0, 149.0, 149.0, 83.0, 105.0, 134.0, 102.0, 186.0, 207.0, 169.0, 181.0, 174.0, 162.0, 174.0, 164.0, 173.0, 176.0, 164.0, 148.0, 57.0, 63.0, 170.0, 166.0, 171.0, 190.0, 167.0, 169.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6878877620979216, "mean_inference_ms": 1.2112749680126509, "mean_action_processing_ms": 0.13297599011569022, "mean_env_wait_ms": 0.8697325368255246, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 408.0, "episode_reward_min": 74.0, "episode_reward_mean": 297.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 218.0}, "policy_reward_mean": {"ppo": 148.74}, "hist_stats": {"episode_reward": [175.0, 316.0, 287.0, 269.0, 281.0, 352.0, 336.0, 287.0, 395.0, 241.0, 231.0, 354.0, 229.0, 402.0, 291.0, 273.0, 347.0, 298.0, 247.0, 336.0, 399.0, 338.0, 296.0, 313.0, 336.0, 256.0, 87.0, 344.0, 273.0, 250.0, 333.0, 276.0, 296.0, 290.0, 390.0, 390.0, 336.0, 327.0, 244.0, 141.0, 287.0, 231.0, 316.0, 253.0, 288.0, 298.0, 298.0, 381.0, 171.0, 162.0, 234.0, 342.0, 384.0, 304.0, 381.0, 399.0, 298.0, 295.0, 347.0, 182.0, 301.0, 339.0, 244.0, 296.0, 350.0, 77.0, 404.0, 259.0, 290.0, 283.0, 293.0, 393.0, 408.0, 384.0, 381.0, 342.0, 290.0, 350.0, 299.0, 356.0, 230.0, 248.0, 273.0, 74.0, 330.0, 289.0, 299.0, 298.0, 188.0, 236.0, 393.0, 350.0, 336.0, 338.0, 349.0, 312.0, 120.0, 336.0, 361.0, 336.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [80.0, 95.0, 143.0, 173.0, 145.0, 142.0, 134.0, 135.0, 139.0, 142.0, 167.0, 185.0, 161.0, 175.0, 136.0, 151.0, 203.0, 192.0, 125.0, 116.0, 127.0, 104.0, 172.0, 182.0, 99.0, 130.0, 200.0, 202.0, 145.0, 146.0, 148.0, 125.0, 159.0, 188.0, 135.0, 163.0, 130.0, 117.0, 167.0, 169.0, 201.0, 198.0, 157.0, 181.0, 142.0, 154.0, 162.0, 151.0, 165.0, 171.0, 136.0, 120.0, 31.0, 56.0, 158.0, 186.0, 133.0, 140.0, 131.0, 119.0, 176.0, 157.0, 130.0, 146.0, 139.0, 157.0, 144.0, 146.0, 195.0, 195.0, 198.0, 192.0, 172.0, 164.0, 170.0, 157.0, 127.0, 117.0, 70.0, 71.0, 143.0, 144.0, 115.0, 116.0, 164.0, 152.0, 128.0, 125.0, 148.0, 140.0, 162.0, 136.0, 136.0, 162.0, 163.0, 218.0, 82.0, 89.0, 79.0, 83.0, 116.0, 118.0, 180.0, 162.0, 203.0, 181.0, 142.0, 162.0, 192.0, 189.0, 206.0, 193.0, 152.0, 146.0, 145.0, 150.0, 178.0, 169.0, 94.0, 88.0, 136.0, 165.0, 181.0, 158.0, 134.0, 110.0, 135.0, 161.0, 165.0, 185.0, 34.0, 43.0, 204.0, 200.0, 133.0, 126.0, 159.0, 131.0, 135.0, 148.0, 159.0, 134.0, 204.0, 189.0, 207.0, 201.0, 182.0, 202.0, 174.0, 207.0, 159.0, 183.0, 153.0, 137.0, 175.0, 175.0, 143.0, 156.0, 181.0, 175.0, 110.0, 120.0, 126.0, 122.0, 138.0, 135.0, 43.0, 31.0, 148.0, 182.0, 146.0, 143.0, 154.0, 145.0, 149.0, 149.0, 83.0, 105.0, 134.0, 102.0, 186.0, 207.0, 169.0, 181.0, 174.0, 162.0, 174.0, 164.0, 173.0, 176.0, 164.0, 148.0, 57.0, 63.0, 170.0, 166.0, 171.0, 190.0, 167.0, 169.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6878877620979216, "mean_inference_ms": 1.2112749680126509, "mean_action_processing_ms": 0.13297599011569022, "mean_env_wait_ms": 0.8697325368255246, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1536000, "num_agent_steps_trained": 1536000, "num_env_steps_sampled": 768000, "num_env_steps_trained": 768000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 768000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1536000, "timers": {"training_iteration_time_ms": 3674.237, "learn_time_ms": 1115.272, "learn_throughput": 11477.021, "synch_weights_time_ms": 12.3}, "counters": {"num_env_steps_sampled": 768000, "num_env_steps_trained": 768000, "num_agent_steps_sampled": 1536000, "num_agent_steps_trained": 1536000}, "done": false, "episodes_total": 1920, "training_iteration": 60, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-50", "timestamp": 1666580630, "time_this_iter_s": 3.67628812789917, "time_total_s": 226.17076468467712, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 226.17076468467712, "timesteps_since_restore": 0, "iterations_since_restore": 60, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.86666666666667, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 108.4, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 97.08, "shaped_reward_min": 34, "shaped_reward_max": 128, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.26, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 11.58, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 9.87, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 10.88, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.46, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.53, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 8.85, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 10.17, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 16, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.4, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.39, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 1.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 0.91, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.54, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 2.96, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.32, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.77, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.15, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 8.85, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 10.17, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.85, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 10.17, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.7347235018262012e-19, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008456122595816851, "policy_loss": -0.001012232038192451, "vf_loss": 8.314811706542969, "vf_explained_var": 0.6157503128051758, "kl": 0.0020367184188216925, "entropy": 1.3297278881072998, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 780800, "num_env_steps_trained": 780800, "num_agent_steps_sampled": 1561600, "num_agent_steps_trained": 1561600}, "sampler_results": {"episode_reward_max": 441.0, "episode_reward_min": 74.0, "episode_reward_mean": 313.88, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 156.94}, "custom_metrics": {"sparse_reward_mean": 108.4, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 97.08, "shaped_reward_min": 34, "shaped_reward_max": 128, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.26, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 11.58, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 9.87, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 10.88, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.46, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.53, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 8.85, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 10.17, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 16, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.4, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.39, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 1.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 0.91, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.54, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 2.96, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.32, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.77, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.15, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 8.85, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 10.17, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.85, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 10.17, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [296.0, 290.0, 390.0, 390.0, 336.0, 327.0, 244.0, 141.0, 287.0, 231.0, 316.0, 253.0, 288.0, 298.0, 298.0, 381.0, 171.0, 162.0, 234.0, 342.0, 384.0, 304.0, 381.0, 399.0, 298.0, 295.0, 347.0, 182.0, 301.0, 339.0, 244.0, 296.0, 350.0, 77.0, 404.0, 259.0, 290.0, 283.0, 293.0, 393.0, 408.0, 384.0, 381.0, 342.0, 290.0, 350.0, 299.0, 356.0, 230.0, 248.0, 273.0, 74.0, 330.0, 289.0, 299.0, 298.0, 188.0, 236.0, 393.0, 350.0, 336.0, 338.0, 349.0, 312.0, 120.0, 336.0, 361.0, 336.0, 402.0, 390.0, 174.0, 395.0, 336.0, 396.0, 345.0, 255.0, 396.0, 384.0, 190.0, 441.0, 301.0, 343.0, 344.0, 393.0, 356.0, 347.0, 351.0, 346.0, 438.0, 438.0, 387.0, 344.0, 345.0, 344.0, 396.0, 342.0, 287.0, 305.0, 338.0, 239.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [139.0, 157.0, 144.0, 146.0, 195.0, 195.0, 198.0, 192.0, 172.0, 164.0, 170.0, 157.0, 127.0, 117.0, 70.0, 71.0, 143.0, 144.0, 115.0, 116.0, 164.0, 152.0, 128.0, 125.0, 148.0, 140.0, 162.0, 136.0, 136.0, 162.0, 163.0, 218.0, 82.0, 89.0, 79.0, 83.0, 116.0, 118.0, 180.0, 162.0, 203.0, 181.0, 142.0, 162.0, 192.0, 189.0, 206.0, 193.0, 152.0, 146.0, 145.0, 150.0, 178.0, 169.0, 94.0, 88.0, 136.0, 165.0, 181.0, 158.0, 134.0, 110.0, 135.0, 161.0, 165.0, 185.0, 34.0, 43.0, 204.0, 200.0, 133.0, 126.0, 159.0, 131.0, 135.0, 148.0, 159.0, 134.0, 204.0, 189.0, 207.0, 201.0, 182.0, 202.0, 174.0, 207.0, 159.0, 183.0, 153.0, 137.0, 175.0, 175.0, 143.0, 156.0, 181.0, 175.0, 110.0, 120.0, 126.0, 122.0, 138.0, 135.0, 43.0, 31.0, 148.0, 182.0, 146.0, 143.0, 154.0, 145.0, 149.0, 149.0, 83.0, 105.0, 134.0, 102.0, 186.0, 207.0, 169.0, 181.0, 174.0, 162.0, 174.0, 164.0, 173.0, 176.0, 164.0, 148.0, 57.0, 63.0, 170.0, 166.0, 171.0, 190.0, 167.0, 169.0, 202.0, 200.0, 185.0, 205.0, 80.0, 94.0, 195.0, 200.0, 164.0, 172.0, 201.0, 195.0, 163.0, 182.0, 121.0, 134.0, 203.0, 193.0, 171.0, 213.0, 104.0, 86.0, 200.0, 241.0, 136.0, 165.0, 171.0, 172.0, 185.0, 159.0, 198.0, 195.0, 166.0, 190.0, 167.0, 180.0, 163.0, 188.0, 186.0, 160.0, 226.0, 212.0, 224.0, 214.0, 187.0, 200.0, 174.0, 170.0, 166.0, 179.0, 181.0, 163.0, 186.0, 210.0, 186.0, 156.0, 159.0, 128.0, 155.0, 150.0, 165.0, 173.0, 128.0, 111.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6878282523618396, "mean_inference_ms": 1.211237416116329, "mean_action_processing_ms": 0.1329784454803539, "mean_env_wait_ms": 0.8689713930511266, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 441.0, "episode_reward_min": 74.0, "episode_reward_mean": 313.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 156.94}, "hist_stats": {"episode_reward": [296.0, 290.0, 390.0, 390.0, 336.0, 327.0, 244.0, 141.0, 287.0, 231.0, 316.0, 253.0, 288.0, 298.0, 298.0, 381.0, 171.0, 162.0, 234.0, 342.0, 384.0, 304.0, 381.0, 399.0, 298.0, 295.0, 347.0, 182.0, 301.0, 339.0, 244.0, 296.0, 350.0, 77.0, 404.0, 259.0, 290.0, 283.0, 293.0, 393.0, 408.0, 384.0, 381.0, 342.0, 290.0, 350.0, 299.0, 356.0, 230.0, 248.0, 273.0, 74.0, 330.0, 289.0, 299.0, 298.0, 188.0, 236.0, 393.0, 350.0, 336.0, 338.0, 349.0, 312.0, 120.0, 336.0, 361.0, 336.0, 402.0, 390.0, 174.0, 395.0, 336.0, 396.0, 345.0, 255.0, 396.0, 384.0, 190.0, 441.0, 301.0, 343.0, 344.0, 393.0, 356.0, 347.0, 351.0, 346.0, 438.0, 438.0, 387.0, 344.0, 345.0, 344.0, 396.0, 342.0, 287.0, 305.0, 338.0, 239.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [139.0, 157.0, 144.0, 146.0, 195.0, 195.0, 198.0, 192.0, 172.0, 164.0, 170.0, 157.0, 127.0, 117.0, 70.0, 71.0, 143.0, 144.0, 115.0, 116.0, 164.0, 152.0, 128.0, 125.0, 148.0, 140.0, 162.0, 136.0, 136.0, 162.0, 163.0, 218.0, 82.0, 89.0, 79.0, 83.0, 116.0, 118.0, 180.0, 162.0, 203.0, 181.0, 142.0, 162.0, 192.0, 189.0, 206.0, 193.0, 152.0, 146.0, 145.0, 150.0, 178.0, 169.0, 94.0, 88.0, 136.0, 165.0, 181.0, 158.0, 134.0, 110.0, 135.0, 161.0, 165.0, 185.0, 34.0, 43.0, 204.0, 200.0, 133.0, 126.0, 159.0, 131.0, 135.0, 148.0, 159.0, 134.0, 204.0, 189.0, 207.0, 201.0, 182.0, 202.0, 174.0, 207.0, 159.0, 183.0, 153.0, 137.0, 175.0, 175.0, 143.0, 156.0, 181.0, 175.0, 110.0, 120.0, 126.0, 122.0, 138.0, 135.0, 43.0, 31.0, 148.0, 182.0, 146.0, 143.0, 154.0, 145.0, 149.0, 149.0, 83.0, 105.0, 134.0, 102.0, 186.0, 207.0, 169.0, 181.0, 174.0, 162.0, 174.0, 164.0, 173.0, 176.0, 164.0, 148.0, 57.0, 63.0, 170.0, 166.0, 171.0, 190.0, 167.0, 169.0, 202.0, 200.0, 185.0, 205.0, 80.0, 94.0, 195.0, 200.0, 164.0, 172.0, 201.0, 195.0, 163.0, 182.0, 121.0, 134.0, 203.0, 193.0, 171.0, 213.0, 104.0, 86.0, 200.0, 241.0, 136.0, 165.0, 171.0, 172.0, 185.0, 159.0, 198.0, 195.0, 166.0, 190.0, 167.0, 180.0, 163.0, 188.0, 186.0, 160.0, 226.0, 212.0, 224.0, 214.0, 187.0, 200.0, 174.0, 170.0, 166.0, 179.0, 181.0, 163.0, 186.0, 210.0, 186.0, 156.0, 159.0, 128.0, 155.0, 150.0, 165.0, 173.0, 128.0, 111.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6878282523618396, "mean_inference_ms": 1.211237416116329, "mean_action_processing_ms": 0.1329784454803539, "mean_env_wait_ms": 0.8689713930511266, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1561600, "num_agent_steps_trained": 1561600, "num_env_steps_sampled": 780800, "num_env_steps_trained": 780800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 780800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1561600, "timers": {"training_iteration_time_ms": 3664.703, "learn_time_ms": 1108.565, "learn_throughput": 11546.455, "synch_weights_time_ms": 12.646}, "counters": {"num_env_steps_sampled": 780800, "num_env_steps_trained": 780800, "num_agent_steps_sampled": 1561600, "num_agent_steps_trained": 1561600}, "done": false, "episodes_total": 1952, "training_iteration": 61, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-54", "timestamp": 1666580634, "time_this_iter_s": 3.7346341609954834, "time_total_s": 229.9053988456726, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 229.9053988456726, "timesteps_since_restore": 0, "iterations_since_restore": 61, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.92, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 113.4, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 102.17, "shaped_reward_min": 34, "shaped_reward_max": 130, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.83, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 11.76, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 10.39, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 11.05, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.99, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.98, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.51, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 9.47, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 10.42, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.77, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 16, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.57, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 1.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.93, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 0.81, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.61, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.21, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.42, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.0, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.12, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.15, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 9.47, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 10.42, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.47, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 10.42, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 8.673617509131006e-20, "cur_lr": 0.0010000000474974513, "total_loss": -0.002962626051157713, "policy_loss": -0.003128279000520706, "vf_loss": 8.295660018920898, "vf_explained_var": 0.6091577410697937, "kl": 0.0019532288424670696, "entropy": 1.3278286457061768, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 793600, "num_env_steps_trained": 793600, "num_agent_steps_sampled": 1587200, "num_agent_steps_trained": 1587200}, "sampler_results": {"episode_reward_max": 441.0, "episode_reward_min": 74.0, "episode_reward_mean": 328.97, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 164.485}, "custom_metrics": {"sparse_reward_mean": 113.4, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 102.17, "shaped_reward_min": 34, "shaped_reward_max": 130, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.83, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 11.76, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 10.39, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 11.05, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.99, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.98, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.51, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 9.47, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 10.42, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.77, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 16, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.57, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 1.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.93, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 0.81, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.61, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.21, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.42, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.0, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.12, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.15, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 9.47, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 10.42, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.47, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 10.42, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [350.0, 77.0, 404.0, 259.0, 290.0, 283.0, 293.0, 393.0, 408.0, 384.0, 381.0, 342.0, 290.0, 350.0, 299.0, 356.0, 230.0, 248.0, 273.0, 74.0, 330.0, 289.0, 299.0, 298.0, 188.0, 236.0, 393.0, 350.0, 336.0, 338.0, 349.0, 312.0, 120.0, 336.0, 361.0, 336.0, 402.0, 390.0, 174.0, 395.0, 336.0, 396.0, 345.0, 255.0, 396.0, 384.0, 190.0, 441.0, 301.0, 343.0, 344.0, 393.0, 356.0, 347.0, 351.0, 346.0, 438.0, 438.0, 387.0, 344.0, 345.0, 344.0, 396.0, 342.0, 287.0, 305.0, 338.0, 239.0, 298.0, 347.0, 207.0, 344.0, 370.0, 384.0, 407.0, 407.0, 313.0, 402.0, 341.0, 339.0, 396.0, 393.0, 250.0, 396.0, 298.0, 390.0, 327.0, 347.0, 361.0, 390.0, 390.0, 261.0, 297.0, 401.0, 307.0, 290.0, 407.0, 293.0, 344.0, 257.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [165.0, 185.0, 34.0, 43.0, 204.0, 200.0, 133.0, 126.0, 159.0, 131.0, 135.0, 148.0, 159.0, 134.0, 204.0, 189.0, 207.0, 201.0, 182.0, 202.0, 174.0, 207.0, 159.0, 183.0, 153.0, 137.0, 175.0, 175.0, 143.0, 156.0, 181.0, 175.0, 110.0, 120.0, 126.0, 122.0, 138.0, 135.0, 43.0, 31.0, 148.0, 182.0, 146.0, 143.0, 154.0, 145.0, 149.0, 149.0, 83.0, 105.0, 134.0, 102.0, 186.0, 207.0, 169.0, 181.0, 174.0, 162.0, 174.0, 164.0, 173.0, 176.0, 164.0, 148.0, 57.0, 63.0, 170.0, 166.0, 171.0, 190.0, 167.0, 169.0, 202.0, 200.0, 185.0, 205.0, 80.0, 94.0, 195.0, 200.0, 164.0, 172.0, 201.0, 195.0, 163.0, 182.0, 121.0, 134.0, 203.0, 193.0, 171.0, 213.0, 104.0, 86.0, 200.0, 241.0, 136.0, 165.0, 171.0, 172.0, 185.0, 159.0, 198.0, 195.0, 166.0, 190.0, 167.0, 180.0, 163.0, 188.0, 186.0, 160.0, 226.0, 212.0, 224.0, 214.0, 187.0, 200.0, 174.0, 170.0, 166.0, 179.0, 181.0, 163.0, 186.0, 210.0, 186.0, 156.0, 159.0, 128.0, 155.0, 150.0, 165.0, 173.0, 128.0, 111.0, 148.0, 150.0, 160.0, 187.0, 101.0, 106.0, 176.0, 168.0, 179.0, 191.0, 194.0, 190.0, 201.0, 206.0, 202.0, 205.0, 158.0, 155.0, 206.0, 196.0, 182.0, 159.0, 154.0, 185.0, 197.0, 199.0, 191.0, 202.0, 123.0, 127.0, 203.0, 193.0, 156.0, 142.0, 202.0, 188.0, 158.0, 169.0, 180.0, 167.0, 185.0, 176.0, 216.0, 174.0, 196.0, 194.0, 130.0, 131.0, 146.0, 151.0, 215.0, 186.0, 153.0, 154.0, 153.0, 137.0, 203.0, 204.0, 137.0, 156.0, 174.0, 170.0, 122.0, 135.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6877589020473881, "mean_inference_ms": 1.210984106554491, "mean_action_processing_ms": 0.13297596780002988, "mean_env_wait_ms": 0.8682082840509878, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 441.0, "episode_reward_min": 74.0, "episode_reward_mean": 328.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 164.485}, "hist_stats": {"episode_reward": [350.0, 77.0, 404.0, 259.0, 290.0, 283.0, 293.0, 393.0, 408.0, 384.0, 381.0, 342.0, 290.0, 350.0, 299.0, 356.0, 230.0, 248.0, 273.0, 74.0, 330.0, 289.0, 299.0, 298.0, 188.0, 236.0, 393.0, 350.0, 336.0, 338.0, 349.0, 312.0, 120.0, 336.0, 361.0, 336.0, 402.0, 390.0, 174.0, 395.0, 336.0, 396.0, 345.0, 255.0, 396.0, 384.0, 190.0, 441.0, 301.0, 343.0, 344.0, 393.0, 356.0, 347.0, 351.0, 346.0, 438.0, 438.0, 387.0, 344.0, 345.0, 344.0, 396.0, 342.0, 287.0, 305.0, 338.0, 239.0, 298.0, 347.0, 207.0, 344.0, 370.0, 384.0, 407.0, 407.0, 313.0, 402.0, 341.0, 339.0, 396.0, 393.0, 250.0, 396.0, 298.0, 390.0, 327.0, 347.0, 361.0, 390.0, 390.0, 261.0, 297.0, 401.0, 307.0, 290.0, 407.0, 293.0, 344.0, 257.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [165.0, 185.0, 34.0, 43.0, 204.0, 200.0, 133.0, 126.0, 159.0, 131.0, 135.0, 148.0, 159.0, 134.0, 204.0, 189.0, 207.0, 201.0, 182.0, 202.0, 174.0, 207.0, 159.0, 183.0, 153.0, 137.0, 175.0, 175.0, 143.0, 156.0, 181.0, 175.0, 110.0, 120.0, 126.0, 122.0, 138.0, 135.0, 43.0, 31.0, 148.0, 182.0, 146.0, 143.0, 154.0, 145.0, 149.0, 149.0, 83.0, 105.0, 134.0, 102.0, 186.0, 207.0, 169.0, 181.0, 174.0, 162.0, 174.0, 164.0, 173.0, 176.0, 164.0, 148.0, 57.0, 63.0, 170.0, 166.0, 171.0, 190.0, 167.0, 169.0, 202.0, 200.0, 185.0, 205.0, 80.0, 94.0, 195.0, 200.0, 164.0, 172.0, 201.0, 195.0, 163.0, 182.0, 121.0, 134.0, 203.0, 193.0, 171.0, 213.0, 104.0, 86.0, 200.0, 241.0, 136.0, 165.0, 171.0, 172.0, 185.0, 159.0, 198.0, 195.0, 166.0, 190.0, 167.0, 180.0, 163.0, 188.0, 186.0, 160.0, 226.0, 212.0, 224.0, 214.0, 187.0, 200.0, 174.0, 170.0, 166.0, 179.0, 181.0, 163.0, 186.0, 210.0, 186.0, 156.0, 159.0, 128.0, 155.0, 150.0, 165.0, 173.0, 128.0, 111.0, 148.0, 150.0, 160.0, 187.0, 101.0, 106.0, 176.0, 168.0, 179.0, 191.0, 194.0, 190.0, 201.0, 206.0, 202.0, 205.0, 158.0, 155.0, 206.0, 196.0, 182.0, 159.0, 154.0, 185.0, 197.0, 199.0, 191.0, 202.0, 123.0, 127.0, 203.0, 193.0, 156.0, 142.0, 202.0, 188.0, 158.0, 169.0, 180.0, 167.0, 185.0, 176.0, 216.0, 174.0, 196.0, 194.0, 130.0, 131.0, 146.0, 151.0, 215.0, 186.0, 153.0, 154.0, 153.0, 137.0, 203.0, 204.0, 137.0, 156.0, 174.0, 170.0, 122.0, 135.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6877589020473881, "mean_inference_ms": 1.210984106554491, "mean_action_processing_ms": 0.13297596780002988, "mean_env_wait_ms": 0.8682082840509878, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1587200, "num_agent_steps_trained": 1587200, "num_env_steps_sampled": 793600, "num_env_steps_trained": 793600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 793600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1587200, "timers": {"training_iteration_time_ms": 3665.294, "learn_time_ms": 1113.003, "learn_throughput": 11500.417, "synch_weights_time_ms": 12.408}, "counters": {"num_env_steps_sampled": 793600, "num_env_steps_trained": 793600, "num_agent_steps_sampled": 1587200, "num_agent_steps_trained": 1587200}, "done": false, "episodes_total": 1984, "training_iteration": 62, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-58", "timestamp": 1666580638, "time_this_iter_s": 3.70135235786438, "time_total_s": 233.606751203537, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 233.606751203537, "timesteps_since_restore": 0, "iterations_since_restore": 62, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.683333333333334, "ram_util_percent": 10.616666666666665}}
+{"custom_metrics": {"sparse_reward_mean": 119.8, "sparse_reward_min": 40, "sparse_reward_max": 160, "shaped_reward_mean": 106.01, "shaped_reward_min": 40, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.04, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 12.38, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 10.59, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 11.88, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.89, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.52, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 9.59, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.07, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.79, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 16, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.66, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 1.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.87, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 0.79, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.71, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.24, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.59, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.03, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.16, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 9.59, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.07, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.59, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.07, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 4.336808754565503e-20, "cur_lr": 0.0010000000474974513, "total_loss": -0.003959214314818382, "policy_loss": -0.004130211658775806, "vf_loss": 8.356800079345703, "vf_explained_var": 0.6421783566474915, "kl": 0.0018275229958817363, "entropy": 1.3293664455413818, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 806400, "num_env_steps_trained": 806400, "num_agent_steps_sampled": 1612800, "num_agent_steps_trained": 1612800}, "sampler_results": {"episode_reward_max": 462.0, "episode_reward_min": 120.0, "episode_reward_mean": 345.61, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 54.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 172.805}, "custom_metrics": {"sparse_reward_mean": 119.8, "sparse_reward_min": 40, "sparse_reward_max": 160, "shaped_reward_mean": 106.01, "shaped_reward_min": 40, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.04, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 12.38, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 10.59, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 11.88, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.89, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.52, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 9.59, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.07, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.79, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 16, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.66, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 1.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.87, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 0.79, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.71, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.24, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.59, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.03, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.16, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 9.59, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.07, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.59, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.07, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [120.0, 336.0, 361.0, 336.0, 402.0, 390.0, 174.0, 395.0, 336.0, 396.0, 345.0, 255.0, 396.0, 384.0, 190.0, 441.0, 301.0, 343.0, 344.0, 393.0, 356.0, 347.0, 351.0, 346.0, 438.0, 438.0, 387.0, 344.0, 345.0, 344.0, 396.0, 342.0, 287.0, 305.0, 338.0, 239.0, 298.0, 347.0, 207.0, 344.0, 370.0, 384.0, 407.0, 407.0, 313.0, 402.0, 341.0, 339.0, 396.0, 393.0, 250.0, 396.0, 298.0, 390.0, 327.0, 347.0, 361.0, 390.0, 390.0, 261.0, 297.0, 401.0, 307.0, 290.0, 407.0, 293.0, 344.0, 257.0, 344.0, 413.0, 301.0, 350.0, 307.0, 399.0, 353.0, 396.0, 399.0, 405.0, 384.0, 292.0, 462.0, 401.0, 295.0, 345.0, 353.0, 398.0, 416.0, 390.0, 387.0, 342.0, 341.0, 362.0, 290.0, 396.0, 399.0, 387.0, 133.0, 344.0, 335.0, 247.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [57.0, 63.0, 170.0, 166.0, 171.0, 190.0, 167.0, 169.0, 202.0, 200.0, 185.0, 205.0, 80.0, 94.0, 195.0, 200.0, 164.0, 172.0, 201.0, 195.0, 163.0, 182.0, 121.0, 134.0, 203.0, 193.0, 171.0, 213.0, 104.0, 86.0, 200.0, 241.0, 136.0, 165.0, 171.0, 172.0, 185.0, 159.0, 198.0, 195.0, 166.0, 190.0, 167.0, 180.0, 163.0, 188.0, 186.0, 160.0, 226.0, 212.0, 224.0, 214.0, 187.0, 200.0, 174.0, 170.0, 166.0, 179.0, 181.0, 163.0, 186.0, 210.0, 186.0, 156.0, 159.0, 128.0, 155.0, 150.0, 165.0, 173.0, 128.0, 111.0, 148.0, 150.0, 160.0, 187.0, 101.0, 106.0, 176.0, 168.0, 179.0, 191.0, 194.0, 190.0, 201.0, 206.0, 202.0, 205.0, 158.0, 155.0, 206.0, 196.0, 182.0, 159.0, 154.0, 185.0, 197.0, 199.0, 191.0, 202.0, 123.0, 127.0, 203.0, 193.0, 156.0, 142.0, 202.0, 188.0, 158.0, 169.0, 180.0, 167.0, 185.0, 176.0, 216.0, 174.0, 196.0, 194.0, 130.0, 131.0, 146.0, 151.0, 215.0, 186.0, 153.0, 154.0, 153.0, 137.0, 203.0, 204.0, 137.0, 156.0, 174.0, 170.0, 122.0, 135.0, 174.0, 170.0, 196.0, 217.0, 141.0, 160.0, 169.0, 181.0, 155.0, 152.0, 202.0, 197.0, 182.0, 171.0, 194.0, 202.0, 204.0, 195.0, 194.0, 211.0, 183.0, 201.0, 160.0, 132.0, 229.0, 233.0, 211.0, 190.0, 152.0, 143.0, 176.0, 169.0, 170.0, 183.0, 196.0, 202.0, 221.0, 195.0, 179.0, 211.0, 189.0, 198.0, 156.0, 186.0, 176.0, 165.0, 180.0, 182.0, 143.0, 147.0, 212.0, 184.0, 191.0, 208.0, 190.0, 197.0, 54.0, 79.0, 167.0, 177.0, 170.0, 165.0, 115.0, 132.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6877425590338123, "mean_inference_ms": 1.2107950864096755, "mean_action_processing_ms": 0.13298834860070177, "mean_env_wait_ms": 0.8675140813460829, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 462.0, "episode_reward_min": 120.0, "episode_reward_mean": 345.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 54.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 172.805}, "hist_stats": {"episode_reward": [120.0, 336.0, 361.0, 336.0, 402.0, 390.0, 174.0, 395.0, 336.0, 396.0, 345.0, 255.0, 396.0, 384.0, 190.0, 441.0, 301.0, 343.0, 344.0, 393.0, 356.0, 347.0, 351.0, 346.0, 438.0, 438.0, 387.0, 344.0, 345.0, 344.0, 396.0, 342.0, 287.0, 305.0, 338.0, 239.0, 298.0, 347.0, 207.0, 344.0, 370.0, 384.0, 407.0, 407.0, 313.0, 402.0, 341.0, 339.0, 396.0, 393.0, 250.0, 396.0, 298.0, 390.0, 327.0, 347.0, 361.0, 390.0, 390.0, 261.0, 297.0, 401.0, 307.0, 290.0, 407.0, 293.0, 344.0, 257.0, 344.0, 413.0, 301.0, 350.0, 307.0, 399.0, 353.0, 396.0, 399.0, 405.0, 384.0, 292.0, 462.0, 401.0, 295.0, 345.0, 353.0, 398.0, 416.0, 390.0, 387.0, 342.0, 341.0, 362.0, 290.0, 396.0, 399.0, 387.0, 133.0, 344.0, 335.0, 247.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [57.0, 63.0, 170.0, 166.0, 171.0, 190.0, 167.0, 169.0, 202.0, 200.0, 185.0, 205.0, 80.0, 94.0, 195.0, 200.0, 164.0, 172.0, 201.0, 195.0, 163.0, 182.0, 121.0, 134.0, 203.0, 193.0, 171.0, 213.0, 104.0, 86.0, 200.0, 241.0, 136.0, 165.0, 171.0, 172.0, 185.0, 159.0, 198.0, 195.0, 166.0, 190.0, 167.0, 180.0, 163.0, 188.0, 186.0, 160.0, 226.0, 212.0, 224.0, 214.0, 187.0, 200.0, 174.0, 170.0, 166.0, 179.0, 181.0, 163.0, 186.0, 210.0, 186.0, 156.0, 159.0, 128.0, 155.0, 150.0, 165.0, 173.0, 128.0, 111.0, 148.0, 150.0, 160.0, 187.0, 101.0, 106.0, 176.0, 168.0, 179.0, 191.0, 194.0, 190.0, 201.0, 206.0, 202.0, 205.0, 158.0, 155.0, 206.0, 196.0, 182.0, 159.0, 154.0, 185.0, 197.0, 199.0, 191.0, 202.0, 123.0, 127.0, 203.0, 193.0, 156.0, 142.0, 202.0, 188.0, 158.0, 169.0, 180.0, 167.0, 185.0, 176.0, 216.0, 174.0, 196.0, 194.0, 130.0, 131.0, 146.0, 151.0, 215.0, 186.0, 153.0, 154.0, 153.0, 137.0, 203.0, 204.0, 137.0, 156.0, 174.0, 170.0, 122.0, 135.0, 174.0, 170.0, 196.0, 217.0, 141.0, 160.0, 169.0, 181.0, 155.0, 152.0, 202.0, 197.0, 182.0, 171.0, 194.0, 202.0, 204.0, 195.0, 194.0, 211.0, 183.0, 201.0, 160.0, 132.0, 229.0, 233.0, 211.0, 190.0, 152.0, 143.0, 176.0, 169.0, 170.0, 183.0, 196.0, 202.0, 221.0, 195.0, 179.0, 211.0, 189.0, 198.0, 156.0, 186.0, 176.0, 165.0, 180.0, 182.0, 143.0, 147.0, 212.0, 184.0, 191.0, 208.0, 190.0, 197.0, 54.0, 79.0, 167.0, 177.0, 170.0, 165.0, 115.0, 132.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6877425590338123, "mean_inference_ms": 1.2107950864096755, "mean_action_processing_ms": 0.13298834860070177, "mean_env_wait_ms": 0.8675140813460829, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1612800, "num_agent_steps_trained": 1612800, "num_env_steps_sampled": 806400, "num_env_steps_trained": 806400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 806400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1612800, "timers": {"training_iteration_time_ms": 3656.391, "learn_time_ms": 1105.489, "learn_throughput": 11578.584, "synch_weights_time_ms": 12.386}, "counters": {"num_env_steps_sampled": 806400, "num_env_steps_trained": 806400, "num_agent_steps_sampled": 1612800, "num_agent_steps_trained": 1612800}, "done": false, "episodes_total": 2016, "training_iteration": 63, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-01", "timestamp": 1666580641, "time_this_iter_s": 3.72748064994812, "time_total_s": 237.3342318534851, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 237.3342318534851, "timesteps_since_restore": 0, "iterations_since_restore": 63, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.0, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 122.4, "sparse_reward_min": 20, "sparse_reward_max": 160, "shaped_reward_mean": 108.28, "shaped_reward_min": 40, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.6, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 12.04, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 11.13, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 11.64, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.73, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.57, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.41, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 10.06, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 10.95, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.76, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.69, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.71, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.75, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.72, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 3.38, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.6, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.15, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 10.06, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 10.95, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.06, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 10.95, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.1684043772827515e-20, "cur_lr": 0.0010000000474974513, "total_loss": -0.004289156757295132, "policy_loss": -0.004483077675104141, "vf_loss": 8.452171325683594, "vf_explained_var": 0.6298333406448364, "kl": 0.001715940423309803, "entropy": 1.3025939464569092, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 819200, "num_env_steps_trained": 819200, "num_agent_steps_sampled": 1638400, "num_agent_steps_trained": 1638400}, "sampler_results": {"episode_reward_max": 462.0, "episode_reward_min": 80.0, "episode_reward_mean": 353.08, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 238.0}, "policy_reward_mean": {"ppo": 176.54}, "custom_metrics": {"sparse_reward_mean": 122.4, "sparse_reward_min": 20, "sparse_reward_max": 160, "shaped_reward_mean": 108.28, "shaped_reward_min": 40, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.6, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 12.04, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 11.13, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 11.64, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.73, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.57, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.41, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 10.06, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 10.95, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.76, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.69, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.71, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.75, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.72, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 3.38, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.6, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.15, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 10.06, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 10.95, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.06, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 10.95, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [287.0, 305.0, 338.0, 239.0, 298.0, 347.0, 207.0, 344.0, 370.0, 384.0, 407.0, 407.0, 313.0, 402.0, 341.0, 339.0, 396.0, 393.0, 250.0, 396.0, 298.0, 390.0, 327.0, 347.0, 361.0, 390.0, 390.0, 261.0, 297.0, 401.0, 307.0, 290.0, 407.0, 293.0, 344.0, 257.0, 344.0, 413.0, 301.0, 350.0, 307.0, 399.0, 353.0, 396.0, 399.0, 405.0, 384.0, 292.0, 462.0, 401.0, 295.0, 345.0, 353.0, 398.0, 416.0, 390.0, 387.0, 342.0, 341.0, 362.0, 290.0, 396.0, 399.0, 387.0, 133.0, 344.0, 335.0, 247.0, 395.0, 450.0, 384.0, 387.0, 387.0, 404.0, 410.0, 279.0, 441.0, 447.0, 459.0, 336.0, 80.0, 390.0, 290.0, 247.0, 358.0, 336.0, 410.0, 318.0, 392.0, 387.0, 370.0, 298.0, 438.0, 353.0, 396.0, 438.0, 381.0, 355.0, 398.0, 405.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [159.0, 128.0, 155.0, 150.0, 165.0, 173.0, 128.0, 111.0, 148.0, 150.0, 160.0, 187.0, 101.0, 106.0, 176.0, 168.0, 179.0, 191.0, 194.0, 190.0, 201.0, 206.0, 202.0, 205.0, 158.0, 155.0, 206.0, 196.0, 182.0, 159.0, 154.0, 185.0, 197.0, 199.0, 191.0, 202.0, 123.0, 127.0, 203.0, 193.0, 156.0, 142.0, 202.0, 188.0, 158.0, 169.0, 180.0, 167.0, 185.0, 176.0, 216.0, 174.0, 196.0, 194.0, 130.0, 131.0, 146.0, 151.0, 215.0, 186.0, 153.0, 154.0, 153.0, 137.0, 203.0, 204.0, 137.0, 156.0, 174.0, 170.0, 122.0, 135.0, 174.0, 170.0, 196.0, 217.0, 141.0, 160.0, 169.0, 181.0, 155.0, 152.0, 202.0, 197.0, 182.0, 171.0, 194.0, 202.0, 204.0, 195.0, 194.0, 211.0, 183.0, 201.0, 160.0, 132.0, 229.0, 233.0, 211.0, 190.0, 152.0, 143.0, 176.0, 169.0, 170.0, 183.0, 196.0, 202.0, 221.0, 195.0, 179.0, 211.0, 189.0, 198.0, 156.0, 186.0, 176.0, 165.0, 180.0, 182.0, 143.0, 147.0, 212.0, 184.0, 191.0, 208.0, 190.0, 197.0, 54.0, 79.0, 167.0, 177.0, 170.0, 165.0, 115.0, 132.0, 195.0, 200.0, 238.0, 212.0, 179.0, 205.0, 200.0, 187.0, 192.0, 195.0, 206.0, 198.0, 200.0, 210.0, 137.0, 142.0, 233.0, 208.0, 219.0, 228.0, 229.0, 230.0, 165.0, 171.0, 34.0, 46.0, 201.0, 189.0, 139.0, 151.0, 133.0, 114.0, 179.0, 179.0, 160.0, 176.0, 210.0, 200.0, 150.0, 168.0, 207.0, 185.0, 194.0, 193.0, 193.0, 177.0, 135.0, 163.0, 214.0, 224.0, 180.0, 173.0, 205.0, 191.0, 207.0, 231.0, 169.0, 212.0, 184.0, 171.0, 203.0, 195.0, 197.0, 208.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6876820593781335, "mean_inference_ms": 1.2105886481957269, "mean_action_processing_ms": 0.13299015160909294, "mean_env_wait_ms": 0.8668253107554783, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 462.0, "episode_reward_min": 80.0, "episode_reward_mean": 353.08, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 238.0}, "policy_reward_mean": {"ppo": 176.54}, "hist_stats": {"episode_reward": [287.0, 305.0, 338.0, 239.0, 298.0, 347.0, 207.0, 344.0, 370.0, 384.0, 407.0, 407.0, 313.0, 402.0, 341.0, 339.0, 396.0, 393.0, 250.0, 396.0, 298.0, 390.0, 327.0, 347.0, 361.0, 390.0, 390.0, 261.0, 297.0, 401.0, 307.0, 290.0, 407.0, 293.0, 344.0, 257.0, 344.0, 413.0, 301.0, 350.0, 307.0, 399.0, 353.0, 396.0, 399.0, 405.0, 384.0, 292.0, 462.0, 401.0, 295.0, 345.0, 353.0, 398.0, 416.0, 390.0, 387.0, 342.0, 341.0, 362.0, 290.0, 396.0, 399.0, 387.0, 133.0, 344.0, 335.0, 247.0, 395.0, 450.0, 384.0, 387.0, 387.0, 404.0, 410.0, 279.0, 441.0, 447.0, 459.0, 336.0, 80.0, 390.0, 290.0, 247.0, 358.0, 336.0, 410.0, 318.0, 392.0, 387.0, 370.0, 298.0, 438.0, 353.0, 396.0, 438.0, 381.0, 355.0, 398.0, 405.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [159.0, 128.0, 155.0, 150.0, 165.0, 173.0, 128.0, 111.0, 148.0, 150.0, 160.0, 187.0, 101.0, 106.0, 176.0, 168.0, 179.0, 191.0, 194.0, 190.0, 201.0, 206.0, 202.0, 205.0, 158.0, 155.0, 206.0, 196.0, 182.0, 159.0, 154.0, 185.0, 197.0, 199.0, 191.0, 202.0, 123.0, 127.0, 203.0, 193.0, 156.0, 142.0, 202.0, 188.0, 158.0, 169.0, 180.0, 167.0, 185.0, 176.0, 216.0, 174.0, 196.0, 194.0, 130.0, 131.0, 146.0, 151.0, 215.0, 186.0, 153.0, 154.0, 153.0, 137.0, 203.0, 204.0, 137.0, 156.0, 174.0, 170.0, 122.0, 135.0, 174.0, 170.0, 196.0, 217.0, 141.0, 160.0, 169.0, 181.0, 155.0, 152.0, 202.0, 197.0, 182.0, 171.0, 194.0, 202.0, 204.0, 195.0, 194.0, 211.0, 183.0, 201.0, 160.0, 132.0, 229.0, 233.0, 211.0, 190.0, 152.0, 143.0, 176.0, 169.0, 170.0, 183.0, 196.0, 202.0, 221.0, 195.0, 179.0, 211.0, 189.0, 198.0, 156.0, 186.0, 176.0, 165.0, 180.0, 182.0, 143.0, 147.0, 212.0, 184.0, 191.0, 208.0, 190.0, 197.0, 54.0, 79.0, 167.0, 177.0, 170.0, 165.0, 115.0, 132.0, 195.0, 200.0, 238.0, 212.0, 179.0, 205.0, 200.0, 187.0, 192.0, 195.0, 206.0, 198.0, 200.0, 210.0, 137.0, 142.0, 233.0, 208.0, 219.0, 228.0, 229.0, 230.0, 165.0, 171.0, 34.0, 46.0, 201.0, 189.0, 139.0, 151.0, 133.0, 114.0, 179.0, 179.0, 160.0, 176.0, 210.0, 200.0, 150.0, 168.0, 207.0, 185.0, 194.0, 193.0, 193.0, 177.0, 135.0, 163.0, 214.0, 224.0, 180.0, 173.0, 205.0, 191.0, 207.0, 231.0, 169.0, 212.0, 184.0, 171.0, 203.0, 195.0, 197.0, 208.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6876820593781335, "mean_inference_ms": 1.2105886481957269, "mean_action_processing_ms": 0.13299015160909294, "mean_env_wait_ms": 0.8668253107554783, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1638400, "num_agent_steps_trained": 1638400, "num_env_steps_sampled": 819200, "num_env_steps_trained": 819200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 819200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1638400, "timers": {"training_iteration_time_ms": 3652.492, "learn_time_ms": 1103.358, "learn_throughput": 11600.947, "synch_weights_time_ms": 13.037}, "counters": {"num_env_steps_sampled": 819200, "num_env_steps_trained": 819200, "num_agent_steps_sampled": 1638400, "num_agent_steps_trained": 1638400}, "done": false, "episodes_total": 2048, "training_iteration": 64, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-05", "timestamp": 1666580645, "time_this_iter_s": 3.637770414352417, "time_total_s": 240.97200226783752, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 240.97200226783752, "timesteps_since_restore": 0, "iterations_since_restore": 64, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.916666666666668, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 124.2, "sparse_reward_min": 20, "sparse_reward_max": 160, "shaped_reward_mean": 109.47, "shaped_reward_min": 40, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.15, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 12.57, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 10.67, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 12.1, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.83, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.49, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.42, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 9.8, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.39, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.59, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.74, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.81, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.68, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.72, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.8, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.72, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.45, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.56, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.2, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.16, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 9.8, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.39, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.8, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.39, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.0842021886413758e-20, "cur_lr": 0.0010000000474974513, "total_loss": 0.00183533423114568, "policy_loss": 0.001654433086514473, "vf_loss": 8.310755729675293, "vf_explained_var": 0.6230897903442383, "kl": 0.002343923319131136, "entropy": 1.3003530502319336, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 832000, "num_env_steps_trained": 832000, "num_agent_steps_sampled": 1664000, "num_agent_steps_trained": 1664000}, "sampler_results": {"episode_reward_max": 462.0, "episode_reward_min": 80.0, "episode_reward_mean": 357.87, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 178.935}, "custom_metrics": {"sparse_reward_mean": 124.2, "sparse_reward_min": 20, "sparse_reward_max": 160, "shaped_reward_mean": 109.47, "shaped_reward_min": 40, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.15, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 12.57, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 10.67, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 12.1, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.83, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.49, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.42, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 9.8, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.39, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.59, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.74, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.81, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.68, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.72, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.8, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.72, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.45, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.56, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.2, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.16, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 9.8, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.39, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.8, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.39, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [407.0, 293.0, 344.0, 257.0, 344.0, 413.0, 301.0, 350.0, 307.0, 399.0, 353.0, 396.0, 399.0, 405.0, 384.0, 292.0, 462.0, 401.0, 295.0, 345.0, 353.0, 398.0, 416.0, 390.0, 387.0, 342.0, 341.0, 362.0, 290.0, 396.0, 399.0, 387.0, 133.0, 344.0, 335.0, 247.0, 395.0, 450.0, 384.0, 387.0, 387.0, 404.0, 410.0, 279.0, 441.0, 447.0, 459.0, 336.0, 80.0, 390.0, 290.0, 247.0, 358.0, 336.0, 410.0, 318.0, 392.0, 387.0, 370.0, 298.0, 438.0, 353.0, 396.0, 438.0, 381.0, 355.0, 398.0, 405.0, 255.0, 344.0, 347.0, 344.0, 179.0, 330.0, 390.0, 299.0, 343.0, 356.0, 350.0, 362.0, 450.0, 304.0, 281.0, 396.0, 401.0, 356.0, 393.0, 392.0, 353.0, 361.0, 353.0, 342.0, 416.0, 387.0, 361.0, 447.0, 348.0, 402.0, 349.0, 310.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [203.0, 204.0, 137.0, 156.0, 174.0, 170.0, 122.0, 135.0, 174.0, 170.0, 196.0, 217.0, 141.0, 160.0, 169.0, 181.0, 155.0, 152.0, 202.0, 197.0, 182.0, 171.0, 194.0, 202.0, 204.0, 195.0, 194.0, 211.0, 183.0, 201.0, 160.0, 132.0, 229.0, 233.0, 211.0, 190.0, 152.0, 143.0, 176.0, 169.0, 170.0, 183.0, 196.0, 202.0, 221.0, 195.0, 179.0, 211.0, 189.0, 198.0, 156.0, 186.0, 176.0, 165.0, 180.0, 182.0, 143.0, 147.0, 212.0, 184.0, 191.0, 208.0, 190.0, 197.0, 54.0, 79.0, 167.0, 177.0, 170.0, 165.0, 115.0, 132.0, 195.0, 200.0, 238.0, 212.0, 179.0, 205.0, 200.0, 187.0, 192.0, 195.0, 206.0, 198.0, 200.0, 210.0, 137.0, 142.0, 233.0, 208.0, 219.0, 228.0, 229.0, 230.0, 165.0, 171.0, 34.0, 46.0, 201.0, 189.0, 139.0, 151.0, 133.0, 114.0, 179.0, 179.0, 160.0, 176.0, 210.0, 200.0, 150.0, 168.0, 207.0, 185.0, 194.0, 193.0, 193.0, 177.0, 135.0, 163.0, 214.0, 224.0, 180.0, 173.0, 205.0, 191.0, 207.0, 231.0, 169.0, 212.0, 184.0, 171.0, 203.0, 195.0, 197.0, 208.0, 119.0, 136.0, 173.0, 171.0, 178.0, 169.0, 167.0, 177.0, 74.0, 105.0, 168.0, 162.0, 187.0, 203.0, 158.0, 141.0, 177.0, 166.0, 168.0, 188.0, 163.0, 187.0, 178.0, 184.0, 241.0, 209.0, 152.0, 152.0, 134.0, 147.0, 191.0, 205.0, 195.0, 206.0, 174.0, 182.0, 188.0, 205.0, 188.0, 204.0, 173.0, 180.0, 178.0, 183.0, 181.0, 172.0, 174.0, 168.0, 186.0, 230.0, 187.0, 200.0, 194.0, 167.0, 217.0, 230.0, 168.0, 180.0, 194.0, 208.0, 185.0, 164.0, 158.0, 152.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6875963042536799, "mean_inference_ms": 1.2103686825657565, "mean_action_processing_ms": 0.13298642621311788, "mean_env_wait_ms": 0.8660832998378942, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 462.0, "episode_reward_min": 80.0, "episode_reward_mean": 357.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 178.935}, "hist_stats": {"episode_reward": [407.0, 293.0, 344.0, 257.0, 344.0, 413.0, 301.0, 350.0, 307.0, 399.0, 353.0, 396.0, 399.0, 405.0, 384.0, 292.0, 462.0, 401.0, 295.0, 345.0, 353.0, 398.0, 416.0, 390.0, 387.0, 342.0, 341.0, 362.0, 290.0, 396.0, 399.0, 387.0, 133.0, 344.0, 335.0, 247.0, 395.0, 450.0, 384.0, 387.0, 387.0, 404.0, 410.0, 279.0, 441.0, 447.0, 459.0, 336.0, 80.0, 390.0, 290.0, 247.0, 358.0, 336.0, 410.0, 318.0, 392.0, 387.0, 370.0, 298.0, 438.0, 353.0, 396.0, 438.0, 381.0, 355.0, 398.0, 405.0, 255.0, 344.0, 347.0, 344.0, 179.0, 330.0, 390.0, 299.0, 343.0, 356.0, 350.0, 362.0, 450.0, 304.0, 281.0, 396.0, 401.0, 356.0, 393.0, 392.0, 353.0, 361.0, 353.0, 342.0, 416.0, 387.0, 361.0, 447.0, 348.0, 402.0, 349.0, 310.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [203.0, 204.0, 137.0, 156.0, 174.0, 170.0, 122.0, 135.0, 174.0, 170.0, 196.0, 217.0, 141.0, 160.0, 169.0, 181.0, 155.0, 152.0, 202.0, 197.0, 182.0, 171.0, 194.0, 202.0, 204.0, 195.0, 194.0, 211.0, 183.0, 201.0, 160.0, 132.0, 229.0, 233.0, 211.0, 190.0, 152.0, 143.0, 176.0, 169.0, 170.0, 183.0, 196.0, 202.0, 221.0, 195.0, 179.0, 211.0, 189.0, 198.0, 156.0, 186.0, 176.0, 165.0, 180.0, 182.0, 143.0, 147.0, 212.0, 184.0, 191.0, 208.0, 190.0, 197.0, 54.0, 79.0, 167.0, 177.0, 170.0, 165.0, 115.0, 132.0, 195.0, 200.0, 238.0, 212.0, 179.0, 205.0, 200.0, 187.0, 192.0, 195.0, 206.0, 198.0, 200.0, 210.0, 137.0, 142.0, 233.0, 208.0, 219.0, 228.0, 229.0, 230.0, 165.0, 171.0, 34.0, 46.0, 201.0, 189.0, 139.0, 151.0, 133.0, 114.0, 179.0, 179.0, 160.0, 176.0, 210.0, 200.0, 150.0, 168.0, 207.0, 185.0, 194.0, 193.0, 193.0, 177.0, 135.0, 163.0, 214.0, 224.0, 180.0, 173.0, 205.0, 191.0, 207.0, 231.0, 169.0, 212.0, 184.0, 171.0, 203.0, 195.0, 197.0, 208.0, 119.0, 136.0, 173.0, 171.0, 178.0, 169.0, 167.0, 177.0, 74.0, 105.0, 168.0, 162.0, 187.0, 203.0, 158.0, 141.0, 177.0, 166.0, 168.0, 188.0, 163.0, 187.0, 178.0, 184.0, 241.0, 209.0, 152.0, 152.0, 134.0, 147.0, 191.0, 205.0, 195.0, 206.0, 174.0, 182.0, 188.0, 205.0, 188.0, 204.0, 173.0, 180.0, 178.0, 183.0, 181.0, 172.0, 174.0, 168.0, 186.0, 230.0, 187.0, 200.0, 194.0, 167.0, 217.0, 230.0, 168.0, 180.0, 194.0, 208.0, 185.0, 164.0, 158.0, 152.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6875963042536799, "mean_inference_ms": 1.2103686825657565, "mean_action_processing_ms": 0.13298642621311788, "mean_env_wait_ms": 0.8660832998378942, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1664000, "num_agent_steps_trained": 1664000, "num_env_steps_sampled": 832000, "num_env_steps_trained": 832000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 832000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1664000, "timers": {"training_iteration_time_ms": 3636.033, "learn_time_ms": 1096.102, "learn_throughput": 11677.75, "synch_weights_time_ms": 13.065}, "counters": {"num_env_steps_sampled": 832000, "num_env_steps_trained": 832000, "num_agent_steps_sampled": 1664000, "num_agent_steps_trained": 1664000}, "done": false, "episodes_total": 2080, "training_iteration": 65, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-09", "timestamp": 1666580649, "time_this_iter_s": 3.55340576171875, "time_total_s": 244.52540802955627, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 244.52540802955627, "timesteps_since_restore": 0, "iterations_since_restore": 65, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.74, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 126.4, "sparse_reward_min": 20, "sparse_reward_max": 160, "shaped_reward_mean": 108.84, "shaped_reward_min": 40, "shaped_reward_max": 139, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.96, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 12.67, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 10.48, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 17, "useful_onion_pickup_agent_1_mean": 12.12, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 0.98, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.89, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.54, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.41, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 9.73, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 11.46, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.8, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.68, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.71, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.74, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.69, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.73, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.69, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.45, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.56, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.21, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.15, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 9.73, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 11.46, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.73, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 11.46, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 5.421010943206879e-21, "cur_lr": 0.0010000000474974513, "total_loss": -0.0016171643510460854, "policy_loss": -0.0018112201942130923, "vf_loss": 8.35134506225586, "vf_explained_var": 0.6184874773025513, "kl": 0.0019764369353652, "entropy": 1.2821624279022217, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 844800, "num_env_steps_trained": 844800, "num_agent_steps_sampled": 1689600, "num_agent_steps_trained": 1689600}, "sampler_results": {"episode_reward_max": 459.0, "episode_reward_min": 80.0, "episode_reward_mean": 361.64, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 180.82}, "custom_metrics": {"sparse_reward_mean": 126.4, "sparse_reward_min": 20, "sparse_reward_max": 160, "shaped_reward_mean": 108.84, "shaped_reward_min": 40, "shaped_reward_max": 139, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.96, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 12.67, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 10.48, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 17, "useful_onion_pickup_agent_1_mean": 12.12, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 0.98, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.89, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.54, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.41, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 9.73, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 11.46, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.8, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.68, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.71, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.74, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.69, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.73, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.69, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.45, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.56, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.21, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.15, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 9.73, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 11.46, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.73, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 11.46, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [133.0, 344.0, 335.0, 247.0, 395.0, 450.0, 384.0, 387.0, 387.0, 404.0, 410.0, 279.0, 441.0, 447.0, 459.0, 336.0, 80.0, 390.0, 290.0, 247.0, 358.0, 336.0, 410.0, 318.0, 392.0, 387.0, 370.0, 298.0, 438.0, 353.0, 396.0, 438.0, 381.0, 355.0, 398.0, 405.0, 255.0, 344.0, 347.0, 344.0, 179.0, 330.0, 390.0, 299.0, 343.0, 356.0, 350.0, 362.0, 450.0, 304.0, 281.0, 396.0, 401.0, 356.0, 393.0, 392.0, 353.0, 361.0, 353.0, 342.0, 416.0, 387.0, 361.0, 447.0, 348.0, 402.0, 349.0, 310.0, 292.0, 456.0, 390.0, 381.0, 341.0, 387.0, 384.0, 341.0, 345.0, 444.0, 444.0, 411.0, 401.0, 438.0, 282.0, 441.0, 344.0, 352.0, 408.0, 384.0, 384.0, 456.0, 348.0, 313.0, 399.0, 395.0, 281.0, 225.0, 390.0, 390.0, 390.0, 348.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [54.0, 79.0, 167.0, 177.0, 170.0, 165.0, 115.0, 132.0, 195.0, 200.0, 238.0, 212.0, 179.0, 205.0, 200.0, 187.0, 192.0, 195.0, 206.0, 198.0, 200.0, 210.0, 137.0, 142.0, 233.0, 208.0, 219.0, 228.0, 229.0, 230.0, 165.0, 171.0, 34.0, 46.0, 201.0, 189.0, 139.0, 151.0, 133.0, 114.0, 179.0, 179.0, 160.0, 176.0, 210.0, 200.0, 150.0, 168.0, 207.0, 185.0, 194.0, 193.0, 193.0, 177.0, 135.0, 163.0, 214.0, 224.0, 180.0, 173.0, 205.0, 191.0, 207.0, 231.0, 169.0, 212.0, 184.0, 171.0, 203.0, 195.0, 197.0, 208.0, 119.0, 136.0, 173.0, 171.0, 178.0, 169.0, 167.0, 177.0, 74.0, 105.0, 168.0, 162.0, 187.0, 203.0, 158.0, 141.0, 177.0, 166.0, 168.0, 188.0, 163.0, 187.0, 178.0, 184.0, 241.0, 209.0, 152.0, 152.0, 134.0, 147.0, 191.0, 205.0, 195.0, 206.0, 174.0, 182.0, 188.0, 205.0, 188.0, 204.0, 173.0, 180.0, 178.0, 183.0, 181.0, 172.0, 174.0, 168.0, 186.0, 230.0, 187.0, 200.0, 194.0, 167.0, 217.0, 230.0, 168.0, 180.0, 194.0, 208.0, 185.0, 164.0, 158.0, 152.0, 135.0, 157.0, 228.0, 228.0, 205.0, 185.0, 184.0, 197.0, 167.0, 174.0, 179.0, 208.0, 205.0, 179.0, 174.0, 167.0, 174.0, 171.0, 222.0, 222.0, 216.0, 228.0, 204.0, 207.0, 203.0, 198.0, 226.0, 212.0, 139.0, 143.0, 223.0, 218.0, 183.0, 161.0, 184.0, 168.0, 206.0, 202.0, 196.0, 188.0, 177.0, 207.0, 217.0, 239.0, 155.0, 193.0, 150.0, 163.0, 200.0, 199.0, 209.0, 186.0, 137.0, 144.0, 120.0, 105.0, 185.0, 205.0, 178.0, 212.0, 190.0, 200.0, 161.0, 187.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6874455833951449, "mean_inference_ms": 1.2101589324370519, "mean_action_processing_ms": 0.13297401832247158, "mean_env_wait_ms": 0.8652994841084243, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 459.0, "episode_reward_min": 80.0, "episode_reward_mean": 361.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 180.82}, "hist_stats": {"episode_reward": [133.0, 344.0, 335.0, 247.0, 395.0, 450.0, 384.0, 387.0, 387.0, 404.0, 410.0, 279.0, 441.0, 447.0, 459.0, 336.0, 80.0, 390.0, 290.0, 247.0, 358.0, 336.0, 410.0, 318.0, 392.0, 387.0, 370.0, 298.0, 438.0, 353.0, 396.0, 438.0, 381.0, 355.0, 398.0, 405.0, 255.0, 344.0, 347.0, 344.0, 179.0, 330.0, 390.0, 299.0, 343.0, 356.0, 350.0, 362.0, 450.0, 304.0, 281.0, 396.0, 401.0, 356.0, 393.0, 392.0, 353.0, 361.0, 353.0, 342.0, 416.0, 387.0, 361.0, 447.0, 348.0, 402.0, 349.0, 310.0, 292.0, 456.0, 390.0, 381.0, 341.0, 387.0, 384.0, 341.0, 345.0, 444.0, 444.0, 411.0, 401.0, 438.0, 282.0, 441.0, 344.0, 352.0, 408.0, 384.0, 384.0, 456.0, 348.0, 313.0, 399.0, 395.0, 281.0, 225.0, 390.0, 390.0, 390.0, 348.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [54.0, 79.0, 167.0, 177.0, 170.0, 165.0, 115.0, 132.0, 195.0, 200.0, 238.0, 212.0, 179.0, 205.0, 200.0, 187.0, 192.0, 195.0, 206.0, 198.0, 200.0, 210.0, 137.0, 142.0, 233.0, 208.0, 219.0, 228.0, 229.0, 230.0, 165.0, 171.0, 34.0, 46.0, 201.0, 189.0, 139.0, 151.0, 133.0, 114.0, 179.0, 179.0, 160.0, 176.0, 210.0, 200.0, 150.0, 168.0, 207.0, 185.0, 194.0, 193.0, 193.0, 177.0, 135.0, 163.0, 214.0, 224.0, 180.0, 173.0, 205.0, 191.0, 207.0, 231.0, 169.0, 212.0, 184.0, 171.0, 203.0, 195.0, 197.0, 208.0, 119.0, 136.0, 173.0, 171.0, 178.0, 169.0, 167.0, 177.0, 74.0, 105.0, 168.0, 162.0, 187.0, 203.0, 158.0, 141.0, 177.0, 166.0, 168.0, 188.0, 163.0, 187.0, 178.0, 184.0, 241.0, 209.0, 152.0, 152.0, 134.0, 147.0, 191.0, 205.0, 195.0, 206.0, 174.0, 182.0, 188.0, 205.0, 188.0, 204.0, 173.0, 180.0, 178.0, 183.0, 181.0, 172.0, 174.0, 168.0, 186.0, 230.0, 187.0, 200.0, 194.0, 167.0, 217.0, 230.0, 168.0, 180.0, 194.0, 208.0, 185.0, 164.0, 158.0, 152.0, 135.0, 157.0, 228.0, 228.0, 205.0, 185.0, 184.0, 197.0, 167.0, 174.0, 179.0, 208.0, 205.0, 179.0, 174.0, 167.0, 174.0, 171.0, 222.0, 222.0, 216.0, 228.0, 204.0, 207.0, 203.0, 198.0, 226.0, 212.0, 139.0, 143.0, 223.0, 218.0, 183.0, 161.0, 184.0, 168.0, 206.0, 202.0, 196.0, 188.0, 177.0, 207.0, 217.0, 239.0, 155.0, 193.0, 150.0, 163.0, 200.0, 199.0, 209.0, 186.0, 137.0, 144.0, 120.0, 105.0, 185.0, 205.0, 178.0, 212.0, 190.0, 200.0, 161.0, 187.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6874455833951449, "mean_inference_ms": 1.2101589324370519, "mean_action_processing_ms": 0.13297401832247158, "mean_env_wait_ms": 0.8652994841084243, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1689600, "num_agent_steps_trained": 1689600, "num_env_steps_sampled": 844800, "num_env_steps_trained": 844800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 844800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1689600, "timers": {"training_iteration_time_ms": 3639.566, "learn_time_ms": 1098.766, "learn_throughput": 11649.427, "synch_weights_time_ms": 13.026}, "counters": {"num_env_steps_sampled": 844800, "num_env_steps_trained": 844800, "num_agent_steps_sampled": 1689600, "num_agent_steps_trained": 1689600}, "done": false, "episodes_total": 2112, "training_iteration": 66, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-13", "timestamp": 1666580653, "time_this_iter_s": 3.6690165996551514, "time_total_s": 248.19442462921143, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 248.19442462921143, "timesteps_since_restore": 0, "iterations_since_restore": 66, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.73333333333333, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 129.0, "sparse_reward_min": 60, "sparse_reward_max": 160, "shaped_reward_mean": 111.57, "shaped_reward_min": 59, "shaped_reward_max": 139, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.24, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 13.26, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 10.74, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 12.58, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.57, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 9.8, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 11.77, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.95, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 1.82, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.67, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.79, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.44, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.66, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.23, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 9.8, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 11.77, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.8, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 11.77, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.7105054716034394e-21, "cur_lr": 0.0010000000474974513, "total_loss": -0.00021978653967380524, "policy_loss": -0.0004263484152033925, "vf_loss": 8.452777862548828, "vf_explained_var": 0.6492389440536499, "kl": 0.002371986396610737, "entropy": 1.277435064315796, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 857600, "num_env_steps_trained": 857600, "num_agent_steps_sampled": 1715200, "num_agent_steps_trained": 1715200}, "sampler_results": {"episode_reward_max": 459.0, "episode_reward_min": 179.0, "episode_reward_mean": 369.57, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 74.0}, "policy_reward_max": {"ppo": 245.0}, "policy_reward_mean": {"ppo": 184.785}, "custom_metrics": {"sparse_reward_mean": 129.0, "sparse_reward_min": 60, "sparse_reward_max": 160, "shaped_reward_mean": 111.57, "shaped_reward_min": 59, "shaped_reward_max": 139, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.24, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 13.26, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 10.74, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 12.58, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.57, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 9.8, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 11.77, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.95, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 1.82, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.67, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.79, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.44, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.66, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.23, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 9.8, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 11.77, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.8, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 11.77, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [381.0, 355.0, 398.0, 405.0, 255.0, 344.0, 347.0, 344.0, 179.0, 330.0, 390.0, 299.0, 343.0, 356.0, 350.0, 362.0, 450.0, 304.0, 281.0, 396.0, 401.0, 356.0, 393.0, 392.0, 353.0, 361.0, 353.0, 342.0, 416.0, 387.0, 361.0, 447.0, 348.0, 402.0, 349.0, 310.0, 292.0, 456.0, 390.0, 381.0, 341.0, 387.0, 384.0, 341.0, 345.0, 444.0, 444.0, 411.0, 401.0, 438.0, 282.0, 441.0, 344.0, 352.0, 408.0, 384.0, 384.0, 456.0, 348.0, 313.0, 399.0, 395.0, 281.0, 225.0, 390.0, 390.0, 390.0, 348.0, 447.0, 381.0, 315.0, 444.0, 390.0, 402.0, 393.0, 459.0, 450.0, 435.0, 249.0, 298.0, 410.0, 405.0, 435.0, 404.0, 399.0, 447.0, 396.0, 378.0, 447.0, 354.0, 270.0, 341.0, 399.0, 396.0, 285.0, 302.0, 350.0, 237.0, 419.0, 395.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [169.0, 212.0, 184.0, 171.0, 203.0, 195.0, 197.0, 208.0, 119.0, 136.0, 173.0, 171.0, 178.0, 169.0, 167.0, 177.0, 74.0, 105.0, 168.0, 162.0, 187.0, 203.0, 158.0, 141.0, 177.0, 166.0, 168.0, 188.0, 163.0, 187.0, 178.0, 184.0, 241.0, 209.0, 152.0, 152.0, 134.0, 147.0, 191.0, 205.0, 195.0, 206.0, 174.0, 182.0, 188.0, 205.0, 188.0, 204.0, 173.0, 180.0, 178.0, 183.0, 181.0, 172.0, 174.0, 168.0, 186.0, 230.0, 187.0, 200.0, 194.0, 167.0, 217.0, 230.0, 168.0, 180.0, 194.0, 208.0, 185.0, 164.0, 158.0, 152.0, 135.0, 157.0, 228.0, 228.0, 205.0, 185.0, 184.0, 197.0, 167.0, 174.0, 179.0, 208.0, 205.0, 179.0, 174.0, 167.0, 174.0, 171.0, 222.0, 222.0, 216.0, 228.0, 204.0, 207.0, 203.0, 198.0, 226.0, 212.0, 139.0, 143.0, 223.0, 218.0, 183.0, 161.0, 184.0, 168.0, 206.0, 202.0, 196.0, 188.0, 177.0, 207.0, 217.0, 239.0, 155.0, 193.0, 150.0, 163.0, 200.0, 199.0, 209.0, 186.0, 137.0, 144.0, 120.0, 105.0, 185.0, 205.0, 178.0, 212.0, 190.0, 200.0, 161.0, 187.0, 236.0, 211.0, 179.0, 202.0, 154.0, 161.0, 211.0, 233.0, 198.0, 192.0, 210.0, 192.0, 197.0, 196.0, 245.0, 214.0, 235.0, 215.0, 220.0, 215.0, 116.0, 133.0, 145.0, 153.0, 203.0, 207.0, 210.0, 195.0, 202.0, 233.0, 187.0, 217.0, 191.0, 208.0, 223.0, 224.0, 186.0, 210.0, 212.0, 166.0, 227.0, 220.0, 169.0, 185.0, 124.0, 146.0, 189.0, 152.0, 190.0, 209.0, 187.0, 209.0, 135.0, 150.0, 146.0, 156.0, 190.0, 160.0, 119.0, 118.0, 208.0, 211.0, 203.0, 192.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6872752632050905, "mean_inference_ms": 1.2099295792073617, "mean_action_processing_ms": 0.1329654963865704, "mean_env_wait_ms": 0.8645353892883192, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 459.0, "episode_reward_min": 179.0, "episode_reward_mean": 369.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 74.0}, "policy_reward_max": {"ppo": 245.0}, "policy_reward_mean": {"ppo": 184.785}, "hist_stats": {"episode_reward": [381.0, 355.0, 398.0, 405.0, 255.0, 344.0, 347.0, 344.0, 179.0, 330.0, 390.0, 299.0, 343.0, 356.0, 350.0, 362.0, 450.0, 304.0, 281.0, 396.0, 401.0, 356.0, 393.0, 392.0, 353.0, 361.0, 353.0, 342.0, 416.0, 387.0, 361.0, 447.0, 348.0, 402.0, 349.0, 310.0, 292.0, 456.0, 390.0, 381.0, 341.0, 387.0, 384.0, 341.0, 345.0, 444.0, 444.0, 411.0, 401.0, 438.0, 282.0, 441.0, 344.0, 352.0, 408.0, 384.0, 384.0, 456.0, 348.0, 313.0, 399.0, 395.0, 281.0, 225.0, 390.0, 390.0, 390.0, 348.0, 447.0, 381.0, 315.0, 444.0, 390.0, 402.0, 393.0, 459.0, 450.0, 435.0, 249.0, 298.0, 410.0, 405.0, 435.0, 404.0, 399.0, 447.0, 396.0, 378.0, 447.0, 354.0, 270.0, 341.0, 399.0, 396.0, 285.0, 302.0, 350.0, 237.0, 419.0, 395.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [169.0, 212.0, 184.0, 171.0, 203.0, 195.0, 197.0, 208.0, 119.0, 136.0, 173.0, 171.0, 178.0, 169.0, 167.0, 177.0, 74.0, 105.0, 168.0, 162.0, 187.0, 203.0, 158.0, 141.0, 177.0, 166.0, 168.0, 188.0, 163.0, 187.0, 178.0, 184.0, 241.0, 209.0, 152.0, 152.0, 134.0, 147.0, 191.0, 205.0, 195.0, 206.0, 174.0, 182.0, 188.0, 205.0, 188.0, 204.0, 173.0, 180.0, 178.0, 183.0, 181.0, 172.0, 174.0, 168.0, 186.0, 230.0, 187.0, 200.0, 194.0, 167.0, 217.0, 230.0, 168.0, 180.0, 194.0, 208.0, 185.0, 164.0, 158.0, 152.0, 135.0, 157.0, 228.0, 228.0, 205.0, 185.0, 184.0, 197.0, 167.0, 174.0, 179.0, 208.0, 205.0, 179.0, 174.0, 167.0, 174.0, 171.0, 222.0, 222.0, 216.0, 228.0, 204.0, 207.0, 203.0, 198.0, 226.0, 212.0, 139.0, 143.0, 223.0, 218.0, 183.0, 161.0, 184.0, 168.0, 206.0, 202.0, 196.0, 188.0, 177.0, 207.0, 217.0, 239.0, 155.0, 193.0, 150.0, 163.0, 200.0, 199.0, 209.0, 186.0, 137.0, 144.0, 120.0, 105.0, 185.0, 205.0, 178.0, 212.0, 190.0, 200.0, 161.0, 187.0, 236.0, 211.0, 179.0, 202.0, 154.0, 161.0, 211.0, 233.0, 198.0, 192.0, 210.0, 192.0, 197.0, 196.0, 245.0, 214.0, 235.0, 215.0, 220.0, 215.0, 116.0, 133.0, 145.0, 153.0, 203.0, 207.0, 210.0, 195.0, 202.0, 233.0, 187.0, 217.0, 191.0, 208.0, 223.0, 224.0, 186.0, 210.0, 212.0, 166.0, 227.0, 220.0, 169.0, 185.0, 124.0, 146.0, 189.0, 152.0, 190.0, 209.0, 187.0, 209.0, 135.0, 150.0, 146.0, 156.0, 190.0, 160.0, 119.0, 118.0, 208.0, 211.0, 203.0, 192.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6872752632050905, "mean_inference_ms": 1.2099295792073617, "mean_action_processing_ms": 0.1329654963865704, "mean_env_wait_ms": 0.8645353892883192, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1715200, "num_agent_steps_trained": 1715200, "num_env_steps_sampled": 857600, "num_env_steps_trained": 857600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 857600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1715200, "timers": {"training_iteration_time_ms": 3630.775, "learn_time_ms": 1088.931, "learn_throughput": 11754.645, "synch_weights_time_ms": 12.476}, "counters": {"num_env_steps_sampled": 857600, "num_env_steps_trained": 857600, "num_agent_steps_sampled": 1715200, "num_agent_steps_trained": 1715200}, "done": false, "episodes_total": 2144, "training_iteration": 67, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-17", "timestamp": 1666580657, "time_this_iter_s": 3.681520700454712, "time_total_s": 251.87594532966614, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 251.87594532966614, "timesteps_since_restore": 0, "iterations_since_restore": 67, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.82, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 129.6, "sparse_reward_min": 60, "sparse_reward_max": 160, "shaped_reward_mean": 112.1, "shaped_reward_min": 65, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.87, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.09, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 11.23, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 12.34, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.64, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.75, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 10.21, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 11.42, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.37, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.91, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.71, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.55, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.54, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.68, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.44, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.48, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.12, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 10.21, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 11.42, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.21, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 11.42, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.3552527358017197e-21, "cur_lr": 0.0010000000474974513, "total_loss": -0.002791440347209573, "policy_loss": -0.00296858511865139, "vf_loss": 8.239323616027832, "vf_explained_var": 0.6559525728225708, "kl": 0.0022639285307377577, "entropy": 1.2935813665390015, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 870400, "num_env_steps_trained": 870400, "num_agent_steps_sampled": 1740800, "num_agent_steps_trained": 1740800}, "sampler_results": {"episode_reward_max": 462.0, "episode_reward_min": 190.0, "episode_reward_mean": 371.3, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 94.0}, "policy_reward_max": {"ppo": 245.0}, "policy_reward_mean": {"ppo": 185.65}, "custom_metrics": {"sparse_reward_mean": 129.6, "sparse_reward_min": 60, "sparse_reward_max": 160, "shaped_reward_mean": 112.1, "shaped_reward_min": 65, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.87, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.09, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 11.23, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 12.34, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.64, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.75, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 10.21, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 11.42, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.37, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.91, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.71, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.55, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.54, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.68, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.44, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.48, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.12, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 10.21, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 11.42, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.21, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 11.42, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [348.0, 402.0, 349.0, 310.0, 292.0, 456.0, 390.0, 381.0, 341.0, 387.0, 384.0, 341.0, 345.0, 444.0, 444.0, 411.0, 401.0, 438.0, 282.0, 441.0, 344.0, 352.0, 408.0, 384.0, 384.0, 456.0, 348.0, 313.0, 399.0, 395.0, 281.0, 225.0, 390.0, 390.0, 390.0, 348.0, 447.0, 381.0, 315.0, 444.0, 390.0, 402.0, 393.0, 459.0, 450.0, 435.0, 249.0, 298.0, 410.0, 405.0, 435.0, 404.0, 399.0, 447.0, 396.0, 378.0, 447.0, 354.0, 270.0, 341.0, 399.0, 396.0, 285.0, 302.0, 350.0, 237.0, 419.0, 395.0, 288.0, 441.0, 290.0, 287.0, 453.0, 399.0, 381.0, 347.0, 405.0, 355.0, 381.0, 402.0, 290.0, 404.0, 247.0, 305.0, 347.0, 263.0, 462.0, 393.0, 450.0, 402.0, 190.0, 290.0, 444.0, 399.0, 396.0, 358.0, 387.0, 393.0, 407.0, 348.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [168.0, 180.0, 194.0, 208.0, 185.0, 164.0, 158.0, 152.0, 135.0, 157.0, 228.0, 228.0, 205.0, 185.0, 184.0, 197.0, 167.0, 174.0, 179.0, 208.0, 205.0, 179.0, 174.0, 167.0, 174.0, 171.0, 222.0, 222.0, 216.0, 228.0, 204.0, 207.0, 203.0, 198.0, 226.0, 212.0, 139.0, 143.0, 223.0, 218.0, 183.0, 161.0, 184.0, 168.0, 206.0, 202.0, 196.0, 188.0, 177.0, 207.0, 217.0, 239.0, 155.0, 193.0, 150.0, 163.0, 200.0, 199.0, 209.0, 186.0, 137.0, 144.0, 120.0, 105.0, 185.0, 205.0, 178.0, 212.0, 190.0, 200.0, 161.0, 187.0, 236.0, 211.0, 179.0, 202.0, 154.0, 161.0, 211.0, 233.0, 198.0, 192.0, 210.0, 192.0, 197.0, 196.0, 245.0, 214.0, 235.0, 215.0, 220.0, 215.0, 116.0, 133.0, 145.0, 153.0, 203.0, 207.0, 210.0, 195.0, 202.0, 233.0, 187.0, 217.0, 191.0, 208.0, 223.0, 224.0, 186.0, 210.0, 212.0, 166.0, 227.0, 220.0, 169.0, 185.0, 124.0, 146.0, 189.0, 152.0, 190.0, 209.0, 187.0, 209.0, 135.0, 150.0, 146.0, 156.0, 190.0, 160.0, 119.0, 118.0, 208.0, 211.0, 203.0, 192.0, 140.0, 148.0, 214.0, 227.0, 138.0, 152.0, 133.0, 154.0, 234.0, 219.0, 202.0, 197.0, 178.0, 203.0, 182.0, 165.0, 206.0, 199.0, 178.0, 177.0, 182.0, 199.0, 197.0, 205.0, 136.0, 154.0, 201.0, 203.0, 107.0, 140.0, 149.0, 156.0, 161.0, 186.0, 135.0, 128.0, 221.0, 241.0, 200.0, 193.0, 205.0, 245.0, 198.0, 204.0, 94.0, 96.0, 144.0, 146.0, 222.0, 222.0, 189.0, 210.0, 188.0, 208.0, 180.0, 178.0, 196.0, 191.0, 193.0, 200.0, 200.0, 207.0, 164.0, 184.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6871706321490018, "mean_inference_ms": 1.2098465022841542, "mean_action_processing_ms": 0.13297585985904375, "mean_env_wait_ms": 0.8639868307798764, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 462.0, "episode_reward_min": 190.0, "episode_reward_mean": 371.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 94.0}, "policy_reward_max": {"ppo": 245.0}, "policy_reward_mean": {"ppo": 185.65}, "hist_stats": {"episode_reward": [348.0, 402.0, 349.0, 310.0, 292.0, 456.0, 390.0, 381.0, 341.0, 387.0, 384.0, 341.0, 345.0, 444.0, 444.0, 411.0, 401.0, 438.0, 282.0, 441.0, 344.0, 352.0, 408.0, 384.0, 384.0, 456.0, 348.0, 313.0, 399.0, 395.0, 281.0, 225.0, 390.0, 390.0, 390.0, 348.0, 447.0, 381.0, 315.0, 444.0, 390.0, 402.0, 393.0, 459.0, 450.0, 435.0, 249.0, 298.0, 410.0, 405.0, 435.0, 404.0, 399.0, 447.0, 396.0, 378.0, 447.0, 354.0, 270.0, 341.0, 399.0, 396.0, 285.0, 302.0, 350.0, 237.0, 419.0, 395.0, 288.0, 441.0, 290.0, 287.0, 453.0, 399.0, 381.0, 347.0, 405.0, 355.0, 381.0, 402.0, 290.0, 404.0, 247.0, 305.0, 347.0, 263.0, 462.0, 393.0, 450.0, 402.0, 190.0, 290.0, 444.0, 399.0, 396.0, 358.0, 387.0, 393.0, 407.0, 348.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [168.0, 180.0, 194.0, 208.0, 185.0, 164.0, 158.0, 152.0, 135.0, 157.0, 228.0, 228.0, 205.0, 185.0, 184.0, 197.0, 167.0, 174.0, 179.0, 208.0, 205.0, 179.0, 174.0, 167.0, 174.0, 171.0, 222.0, 222.0, 216.0, 228.0, 204.0, 207.0, 203.0, 198.0, 226.0, 212.0, 139.0, 143.0, 223.0, 218.0, 183.0, 161.0, 184.0, 168.0, 206.0, 202.0, 196.0, 188.0, 177.0, 207.0, 217.0, 239.0, 155.0, 193.0, 150.0, 163.0, 200.0, 199.0, 209.0, 186.0, 137.0, 144.0, 120.0, 105.0, 185.0, 205.0, 178.0, 212.0, 190.0, 200.0, 161.0, 187.0, 236.0, 211.0, 179.0, 202.0, 154.0, 161.0, 211.0, 233.0, 198.0, 192.0, 210.0, 192.0, 197.0, 196.0, 245.0, 214.0, 235.0, 215.0, 220.0, 215.0, 116.0, 133.0, 145.0, 153.0, 203.0, 207.0, 210.0, 195.0, 202.0, 233.0, 187.0, 217.0, 191.0, 208.0, 223.0, 224.0, 186.0, 210.0, 212.0, 166.0, 227.0, 220.0, 169.0, 185.0, 124.0, 146.0, 189.0, 152.0, 190.0, 209.0, 187.0, 209.0, 135.0, 150.0, 146.0, 156.0, 190.0, 160.0, 119.0, 118.0, 208.0, 211.0, 203.0, 192.0, 140.0, 148.0, 214.0, 227.0, 138.0, 152.0, 133.0, 154.0, 234.0, 219.0, 202.0, 197.0, 178.0, 203.0, 182.0, 165.0, 206.0, 199.0, 178.0, 177.0, 182.0, 199.0, 197.0, 205.0, 136.0, 154.0, 201.0, 203.0, 107.0, 140.0, 149.0, 156.0, 161.0, 186.0, 135.0, 128.0, 221.0, 241.0, 200.0, 193.0, 205.0, 245.0, 198.0, 204.0, 94.0, 96.0, 144.0, 146.0, 222.0, 222.0, 189.0, 210.0, 188.0, 208.0, 180.0, 178.0, 196.0, 191.0, 193.0, 200.0, 200.0, 207.0, 164.0, 184.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6871706321490018, "mean_inference_ms": 1.2098465022841542, "mean_action_processing_ms": 0.13297585985904375, "mean_env_wait_ms": 0.8639868307798764, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1740800, "num_agent_steps_trained": 1740800, "num_env_steps_sampled": 870400, "num_env_steps_trained": 870400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 870400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1740800, "timers": {"training_iteration_time_ms": 3611.638, "learn_time_ms": 1084.248, "learn_throughput": 11805.418, "synch_weights_time_ms": 12.855}, "counters": {"num_env_steps_sampled": 870400, "num_env_steps_trained": 870400, "num_agent_steps_sampled": 1740800, "num_agent_steps_trained": 1740800}, "done": false, "episodes_total": 2176, "training_iteration": 68, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-21", "timestamp": 1666580661, "time_this_iter_s": 3.684041738510132, "time_total_s": 255.55998706817627, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 255.55998706817627, "timesteps_since_restore": 0, "iterations_since_restore": 68, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.349999999999998, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 132.2, "sparse_reward_min": 60, "sparse_reward_max": 160, "shaped_reward_mean": 114.32, "shaped_reward_min": 70, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.59, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 12.76, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 11.92, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 12.14, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.61, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.79, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 10.85, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.14, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.11, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.92, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.77, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.6, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.39, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.88, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.3, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.7, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.1, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 10.85, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.14, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.85, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.14, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 6.776263679008599e-22, "cur_lr": 0.0010000000474974513, "total_loss": -0.0012864982709288597, "policy_loss": -0.0014888072619214654, "vf_loss": 8.330397605895996, "vf_explained_var": 0.6499449014663696, "kl": 0.0019524524686858058, "entropy": 1.261465311050415, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 883200, "num_env_steps_trained": 883200, "num_agent_steps_sampled": 1766400, "num_agent_steps_trained": 1766400}, "sampler_results": {"episode_reward_max": 462.0, "episode_reward_min": 190.0, "episode_reward_mean": 378.72, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 94.0}, "policy_reward_max": {"ppo": 249.0}, "policy_reward_mean": {"ppo": 189.36}, "custom_metrics": {"sparse_reward_mean": 132.2, "sparse_reward_min": 60, "sparse_reward_max": 160, "shaped_reward_mean": 114.32, "shaped_reward_min": 70, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.59, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 12.76, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 11.92, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 12.14, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.61, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.79, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 10.85, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.14, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.11, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.92, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.77, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.6, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.39, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.88, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.3, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.7, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.1, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 10.85, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.14, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.85, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.14, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [390.0, 390.0, 390.0, 348.0, 447.0, 381.0, 315.0, 444.0, 390.0, 402.0, 393.0, 459.0, 450.0, 435.0, 249.0, 298.0, 410.0, 405.0, 435.0, 404.0, 399.0, 447.0, 396.0, 378.0, 447.0, 354.0, 270.0, 341.0, 399.0, 396.0, 285.0, 302.0, 350.0, 237.0, 419.0, 395.0, 288.0, 441.0, 290.0, 287.0, 453.0, 399.0, 381.0, 347.0, 405.0, 355.0, 381.0, 402.0, 290.0, 404.0, 247.0, 305.0, 347.0, 263.0, 462.0, 393.0, 450.0, 402.0, 190.0, 290.0, 444.0, 399.0, 396.0, 358.0, 387.0, 393.0, 407.0, 348.0, 396.0, 441.0, 384.0, 299.0, 444.0, 381.0, 395.0, 456.0, 247.0, 416.0, 447.0, 341.0, 390.0, 449.0, 450.0, 450.0, 358.0, 390.0, 456.0, 342.0, 293.0, 387.0, 330.0, 462.0, 410.0, 362.0, 408.0, 387.0, 459.0, 393.0, 399.0, 396.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [185.0, 205.0, 178.0, 212.0, 190.0, 200.0, 161.0, 187.0, 236.0, 211.0, 179.0, 202.0, 154.0, 161.0, 211.0, 233.0, 198.0, 192.0, 210.0, 192.0, 197.0, 196.0, 245.0, 214.0, 235.0, 215.0, 220.0, 215.0, 116.0, 133.0, 145.0, 153.0, 203.0, 207.0, 210.0, 195.0, 202.0, 233.0, 187.0, 217.0, 191.0, 208.0, 223.0, 224.0, 186.0, 210.0, 212.0, 166.0, 227.0, 220.0, 169.0, 185.0, 124.0, 146.0, 189.0, 152.0, 190.0, 209.0, 187.0, 209.0, 135.0, 150.0, 146.0, 156.0, 190.0, 160.0, 119.0, 118.0, 208.0, 211.0, 203.0, 192.0, 140.0, 148.0, 214.0, 227.0, 138.0, 152.0, 133.0, 154.0, 234.0, 219.0, 202.0, 197.0, 178.0, 203.0, 182.0, 165.0, 206.0, 199.0, 178.0, 177.0, 182.0, 199.0, 197.0, 205.0, 136.0, 154.0, 201.0, 203.0, 107.0, 140.0, 149.0, 156.0, 161.0, 186.0, 135.0, 128.0, 221.0, 241.0, 200.0, 193.0, 205.0, 245.0, 198.0, 204.0, 94.0, 96.0, 144.0, 146.0, 222.0, 222.0, 189.0, 210.0, 188.0, 208.0, 180.0, 178.0, 196.0, 191.0, 193.0, 200.0, 200.0, 207.0, 164.0, 184.0, 205.0, 191.0, 228.0, 213.0, 203.0, 181.0, 156.0, 143.0, 214.0, 230.0, 204.0, 177.0, 187.0, 208.0, 229.0, 227.0, 122.0, 125.0, 214.0, 202.0, 220.0, 227.0, 166.0, 175.0, 188.0, 202.0, 230.0, 219.0, 225.0, 225.0, 222.0, 228.0, 188.0, 170.0, 175.0, 215.0, 231.0, 225.0, 173.0, 169.0, 150.0, 143.0, 195.0, 192.0, 156.0, 174.0, 228.0, 234.0, 190.0, 220.0, 172.0, 190.0, 206.0, 202.0, 191.0, 196.0, 249.0, 210.0, 196.0, 197.0, 196.0, 203.0, 194.0, 202.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.687156924098904, "mean_inference_ms": 1.2097514898289816, "mean_action_processing_ms": 0.13298900302876707, "mean_env_wait_ms": 0.8634657831003358, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 462.0, "episode_reward_min": 190.0, "episode_reward_mean": 378.72, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 94.0}, "policy_reward_max": {"ppo": 249.0}, "policy_reward_mean": {"ppo": 189.36}, "hist_stats": {"episode_reward": [390.0, 390.0, 390.0, 348.0, 447.0, 381.0, 315.0, 444.0, 390.0, 402.0, 393.0, 459.0, 450.0, 435.0, 249.0, 298.0, 410.0, 405.0, 435.0, 404.0, 399.0, 447.0, 396.0, 378.0, 447.0, 354.0, 270.0, 341.0, 399.0, 396.0, 285.0, 302.0, 350.0, 237.0, 419.0, 395.0, 288.0, 441.0, 290.0, 287.0, 453.0, 399.0, 381.0, 347.0, 405.0, 355.0, 381.0, 402.0, 290.0, 404.0, 247.0, 305.0, 347.0, 263.0, 462.0, 393.0, 450.0, 402.0, 190.0, 290.0, 444.0, 399.0, 396.0, 358.0, 387.0, 393.0, 407.0, 348.0, 396.0, 441.0, 384.0, 299.0, 444.0, 381.0, 395.0, 456.0, 247.0, 416.0, 447.0, 341.0, 390.0, 449.0, 450.0, 450.0, 358.0, 390.0, 456.0, 342.0, 293.0, 387.0, 330.0, 462.0, 410.0, 362.0, 408.0, 387.0, 459.0, 393.0, 399.0, 396.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [185.0, 205.0, 178.0, 212.0, 190.0, 200.0, 161.0, 187.0, 236.0, 211.0, 179.0, 202.0, 154.0, 161.0, 211.0, 233.0, 198.0, 192.0, 210.0, 192.0, 197.0, 196.0, 245.0, 214.0, 235.0, 215.0, 220.0, 215.0, 116.0, 133.0, 145.0, 153.0, 203.0, 207.0, 210.0, 195.0, 202.0, 233.0, 187.0, 217.0, 191.0, 208.0, 223.0, 224.0, 186.0, 210.0, 212.0, 166.0, 227.0, 220.0, 169.0, 185.0, 124.0, 146.0, 189.0, 152.0, 190.0, 209.0, 187.0, 209.0, 135.0, 150.0, 146.0, 156.0, 190.0, 160.0, 119.0, 118.0, 208.0, 211.0, 203.0, 192.0, 140.0, 148.0, 214.0, 227.0, 138.0, 152.0, 133.0, 154.0, 234.0, 219.0, 202.0, 197.0, 178.0, 203.0, 182.0, 165.0, 206.0, 199.0, 178.0, 177.0, 182.0, 199.0, 197.0, 205.0, 136.0, 154.0, 201.0, 203.0, 107.0, 140.0, 149.0, 156.0, 161.0, 186.0, 135.0, 128.0, 221.0, 241.0, 200.0, 193.0, 205.0, 245.0, 198.0, 204.0, 94.0, 96.0, 144.0, 146.0, 222.0, 222.0, 189.0, 210.0, 188.0, 208.0, 180.0, 178.0, 196.0, 191.0, 193.0, 200.0, 200.0, 207.0, 164.0, 184.0, 205.0, 191.0, 228.0, 213.0, 203.0, 181.0, 156.0, 143.0, 214.0, 230.0, 204.0, 177.0, 187.0, 208.0, 229.0, 227.0, 122.0, 125.0, 214.0, 202.0, 220.0, 227.0, 166.0, 175.0, 188.0, 202.0, 230.0, 219.0, 225.0, 225.0, 222.0, 228.0, 188.0, 170.0, 175.0, 215.0, 231.0, 225.0, 173.0, 169.0, 150.0, 143.0, 195.0, 192.0, 156.0, 174.0, 228.0, 234.0, 190.0, 220.0, 172.0, 190.0, 206.0, 202.0, 191.0, 196.0, 249.0, 210.0, 196.0, 197.0, 196.0, 203.0, 194.0, 202.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.687156924098904, "mean_inference_ms": 1.2097514898289816, "mean_action_processing_ms": 0.13298900302876707, "mean_env_wait_ms": 0.8634657831003358, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1766400, "num_agent_steps_trained": 1766400, "num_env_steps_sampled": 883200, "num_env_steps_trained": 883200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 883200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1766400, "timers": {"training_iteration_time_ms": 3615.423, "learn_time_ms": 1085.045, "learn_throughput": 11796.743, "synch_weights_time_ms": 12.673}, "counters": {"num_env_steps_sampled": 883200, "num_env_steps_trained": 883200, "num_agent_steps_sampled": 1766400, "num_agent_steps_trained": 1766400}, "done": false, "episodes_total": 2208, "training_iteration": 69, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-24", "timestamp": 1666580664, "time_this_iter_s": 3.738740921020508, "time_total_s": 259.2987279891968, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 259.2987279891968, "timesteps_since_restore": 0, "iterations_since_restore": 69, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.48, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 133.4, "sparse_reward_min": 60, "sparse_reward_max": 160, "shaped_reward_mean": 114.6, "shaped_reward_min": 70, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.96, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 12.34, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 12.32, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 11.79, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.62, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.72, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 11.39, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 10.75, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.07, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.81, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.77, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.75, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.68, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.4, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.3, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.09, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.39, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 10.75, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.39, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 10.75, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.3881318395042993e-22, "cur_lr": 0.0010000000474974513, "total_loss": -0.0034247650764882565, "policy_loss": -0.0036217491142451763, "vf_loss": 8.257144927978516, "vf_explained_var": 0.6091008186340332, "kl": 0.002094808267429471, "entropy": 1.2574673891067505, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 896000, "num_env_steps_trained": 896000, "num_agent_steps_sampled": 1792000, "num_agent_steps_trained": 1792000}, "sampler_results": {"episode_reward_max": 462.0, "episode_reward_min": 190.0, "episode_reward_mean": 381.4, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 94.0}, "policy_reward_max": {"ppo": 253.0}, "policy_reward_mean": {"ppo": 190.7}, "custom_metrics": {"sparse_reward_mean": 133.4, "sparse_reward_min": 60, "sparse_reward_max": 160, "shaped_reward_mean": 114.6, "shaped_reward_min": 70, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.96, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 12.34, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 12.32, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 11.79, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.62, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.72, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 11.39, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 10.75, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.07, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.81, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.77, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.75, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.68, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.4, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.3, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.09, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.39, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 10.75, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.39, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 10.75, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [350.0, 237.0, 419.0, 395.0, 288.0, 441.0, 290.0, 287.0, 453.0, 399.0, 381.0, 347.0, 405.0, 355.0, 381.0, 402.0, 290.0, 404.0, 247.0, 305.0, 347.0, 263.0, 462.0, 393.0, 450.0, 402.0, 190.0, 290.0, 444.0, 399.0, 396.0, 358.0, 387.0, 393.0, 407.0, 348.0, 396.0, 441.0, 384.0, 299.0, 444.0, 381.0, 395.0, 456.0, 247.0, 416.0, 447.0, 341.0, 390.0, 449.0, 450.0, 450.0, 358.0, 390.0, 456.0, 342.0, 293.0, 387.0, 330.0, 462.0, 410.0, 362.0, 408.0, 387.0, 459.0, 393.0, 399.0, 396.0, 250.0, 393.0, 407.0, 408.0, 390.0, 450.0, 387.0, 401.0, 390.0, 453.0, 333.0, 384.0, 401.0, 444.0, 341.0, 395.0, 318.0, 462.0, 339.0, 344.0, 450.0, 413.0, 339.0, 456.0, 387.0, 336.0, 381.0, 407.0, 413.0, 456.0, 390.0, 399.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [190.0, 160.0, 119.0, 118.0, 208.0, 211.0, 203.0, 192.0, 140.0, 148.0, 214.0, 227.0, 138.0, 152.0, 133.0, 154.0, 234.0, 219.0, 202.0, 197.0, 178.0, 203.0, 182.0, 165.0, 206.0, 199.0, 178.0, 177.0, 182.0, 199.0, 197.0, 205.0, 136.0, 154.0, 201.0, 203.0, 107.0, 140.0, 149.0, 156.0, 161.0, 186.0, 135.0, 128.0, 221.0, 241.0, 200.0, 193.0, 205.0, 245.0, 198.0, 204.0, 94.0, 96.0, 144.0, 146.0, 222.0, 222.0, 189.0, 210.0, 188.0, 208.0, 180.0, 178.0, 196.0, 191.0, 193.0, 200.0, 200.0, 207.0, 164.0, 184.0, 205.0, 191.0, 228.0, 213.0, 203.0, 181.0, 156.0, 143.0, 214.0, 230.0, 204.0, 177.0, 187.0, 208.0, 229.0, 227.0, 122.0, 125.0, 214.0, 202.0, 220.0, 227.0, 166.0, 175.0, 188.0, 202.0, 230.0, 219.0, 225.0, 225.0, 222.0, 228.0, 188.0, 170.0, 175.0, 215.0, 231.0, 225.0, 173.0, 169.0, 150.0, 143.0, 195.0, 192.0, 156.0, 174.0, 228.0, 234.0, 190.0, 220.0, 172.0, 190.0, 206.0, 202.0, 191.0, 196.0, 249.0, 210.0, 196.0, 197.0, 196.0, 203.0, 194.0, 202.0, 134.0, 116.0, 191.0, 202.0, 194.0, 213.0, 203.0, 205.0, 185.0, 205.0, 241.0, 209.0, 212.0, 175.0, 196.0, 205.0, 188.0, 202.0, 229.0, 224.0, 175.0, 158.0, 204.0, 180.0, 190.0, 211.0, 238.0, 206.0, 179.0, 162.0, 199.0, 196.0, 173.0, 145.0, 226.0, 236.0, 176.0, 163.0, 182.0, 162.0, 197.0, 253.0, 189.0, 224.0, 173.0, 166.0, 222.0, 234.0, 212.0, 175.0, 168.0, 168.0, 187.0, 194.0, 200.0, 207.0, 214.0, 199.0, 228.0, 228.0, 195.0, 195.0, 190.0, 209.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6871680708954622, "mean_inference_ms": 1.2096280272943736, "mean_action_processing_ms": 0.13298745342255253, "mean_env_wait_ms": 0.8628729261058392, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 462.0, "episode_reward_min": 190.0, "episode_reward_mean": 381.4, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 94.0}, "policy_reward_max": {"ppo": 253.0}, "policy_reward_mean": {"ppo": 190.7}, "hist_stats": {"episode_reward": [350.0, 237.0, 419.0, 395.0, 288.0, 441.0, 290.0, 287.0, 453.0, 399.0, 381.0, 347.0, 405.0, 355.0, 381.0, 402.0, 290.0, 404.0, 247.0, 305.0, 347.0, 263.0, 462.0, 393.0, 450.0, 402.0, 190.0, 290.0, 444.0, 399.0, 396.0, 358.0, 387.0, 393.0, 407.0, 348.0, 396.0, 441.0, 384.0, 299.0, 444.0, 381.0, 395.0, 456.0, 247.0, 416.0, 447.0, 341.0, 390.0, 449.0, 450.0, 450.0, 358.0, 390.0, 456.0, 342.0, 293.0, 387.0, 330.0, 462.0, 410.0, 362.0, 408.0, 387.0, 459.0, 393.0, 399.0, 396.0, 250.0, 393.0, 407.0, 408.0, 390.0, 450.0, 387.0, 401.0, 390.0, 453.0, 333.0, 384.0, 401.0, 444.0, 341.0, 395.0, 318.0, 462.0, 339.0, 344.0, 450.0, 413.0, 339.0, 456.0, 387.0, 336.0, 381.0, 407.0, 413.0, 456.0, 390.0, 399.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [190.0, 160.0, 119.0, 118.0, 208.0, 211.0, 203.0, 192.0, 140.0, 148.0, 214.0, 227.0, 138.0, 152.0, 133.0, 154.0, 234.0, 219.0, 202.0, 197.0, 178.0, 203.0, 182.0, 165.0, 206.0, 199.0, 178.0, 177.0, 182.0, 199.0, 197.0, 205.0, 136.0, 154.0, 201.0, 203.0, 107.0, 140.0, 149.0, 156.0, 161.0, 186.0, 135.0, 128.0, 221.0, 241.0, 200.0, 193.0, 205.0, 245.0, 198.0, 204.0, 94.0, 96.0, 144.0, 146.0, 222.0, 222.0, 189.0, 210.0, 188.0, 208.0, 180.0, 178.0, 196.0, 191.0, 193.0, 200.0, 200.0, 207.0, 164.0, 184.0, 205.0, 191.0, 228.0, 213.0, 203.0, 181.0, 156.0, 143.0, 214.0, 230.0, 204.0, 177.0, 187.0, 208.0, 229.0, 227.0, 122.0, 125.0, 214.0, 202.0, 220.0, 227.0, 166.0, 175.0, 188.0, 202.0, 230.0, 219.0, 225.0, 225.0, 222.0, 228.0, 188.0, 170.0, 175.0, 215.0, 231.0, 225.0, 173.0, 169.0, 150.0, 143.0, 195.0, 192.0, 156.0, 174.0, 228.0, 234.0, 190.0, 220.0, 172.0, 190.0, 206.0, 202.0, 191.0, 196.0, 249.0, 210.0, 196.0, 197.0, 196.0, 203.0, 194.0, 202.0, 134.0, 116.0, 191.0, 202.0, 194.0, 213.0, 203.0, 205.0, 185.0, 205.0, 241.0, 209.0, 212.0, 175.0, 196.0, 205.0, 188.0, 202.0, 229.0, 224.0, 175.0, 158.0, 204.0, 180.0, 190.0, 211.0, 238.0, 206.0, 179.0, 162.0, 199.0, 196.0, 173.0, 145.0, 226.0, 236.0, 176.0, 163.0, 182.0, 162.0, 197.0, 253.0, 189.0, 224.0, 173.0, 166.0, 222.0, 234.0, 212.0, 175.0, 168.0, 168.0, 187.0, 194.0, 200.0, 207.0, 214.0, 199.0, 228.0, 228.0, 195.0, 195.0, 190.0, 209.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6871680708954622, "mean_inference_ms": 1.2096280272943736, "mean_action_processing_ms": 0.13298745342255253, "mean_env_wait_ms": 0.8628729261058392, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1792000, "num_agent_steps_trained": 1792000, "num_env_steps_sampled": 896000, "num_env_steps_trained": 896000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 896000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1792000, "timers": {"training_iteration_time_ms": 3606.34, "learn_time_ms": 1082.566, "learn_throughput": 11823.76, "synch_weights_time_ms": 12.718}, "counters": {"num_env_steps_sampled": 896000, "num_env_steps_trained": 896000, "num_agent_steps_sampled": 1792000, "num_agent_steps_trained": 1792000}, "done": false, "episodes_total": 2240, "training_iteration": 70, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-28", "timestamp": 1666580668, "time_this_iter_s": 3.580299139022827, "time_total_s": 262.8790271282196, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 262.8790271282196, "timesteps_since_restore": 0, "iterations_since_restore": 70, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.740000000000002, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 135.6, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 115.26, "shaped_reward_min": 12, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.69, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 12.6, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 12.17, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 12.04, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.57, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.68, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 11.23, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.13, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.41, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.95, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 1.96, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.94, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.71, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.77, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.68, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.63, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.61, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.48, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 11.23, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.13, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.23, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.13, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.6940659197521496e-22, "cur_lr": 0.0010000000474974513, "total_loss": -0.004838631488382816, "policy_loss": -0.0050162081606686115, "vf_loss": 8.161375999450684, "vf_explained_var": 0.6948555707931519, "kl": 0.0016221408732235432, "entropy": 1.277127981185913, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 908800, "num_env_steps_trained": 908800, "num_agent_steps_sampled": 1817600, "num_agent_steps_trained": 1817600}, "sampler_results": {"episode_reward_max": 462.0, "episode_reward_min": 12.0, "episode_reward_mean": 386.46, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 253.0}, "policy_reward_mean": {"ppo": 193.23}, "custom_metrics": {"sparse_reward_mean": 135.6, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 115.26, "shaped_reward_min": 12, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.69, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 12.6, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 12.17, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 12.04, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.57, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.68, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 11.23, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.13, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.41, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.95, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 1.96, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.94, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.71, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.77, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.68, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.63, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.61, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.48, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 11.23, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.13, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.23, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.13, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [387.0, 393.0, 407.0, 348.0, 396.0, 441.0, 384.0, 299.0, 444.0, 381.0, 395.0, 456.0, 247.0, 416.0, 447.0, 341.0, 390.0, 449.0, 450.0, 450.0, 358.0, 390.0, 456.0, 342.0, 293.0, 387.0, 330.0, 462.0, 410.0, 362.0, 408.0, 387.0, 459.0, 393.0, 399.0, 396.0, 250.0, 393.0, 407.0, 408.0, 390.0, 450.0, 387.0, 401.0, 390.0, 453.0, 333.0, 384.0, 401.0, 444.0, 341.0, 395.0, 318.0, 462.0, 339.0, 344.0, 450.0, 413.0, 339.0, 456.0, 387.0, 336.0, 381.0, 407.0, 413.0, 456.0, 390.0, 399.0, 295.0, 352.0, 396.0, 444.0, 444.0, 405.0, 444.0, 444.0, 402.0, 292.0, 393.0, 398.0, 401.0, 387.0, 401.0, 441.0, 393.0, 413.0, 12.0, 358.0, 453.0, 321.0, 358.0, 367.0, 402.0, 441.0, 399.0, 352.0, 450.0, 141.0, 384.0, 393.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [196.0, 191.0, 193.0, 200.0, 200.0, 207.0, 164.0, 184.0, 205.0, 191.0, 228.0, 213.0, 203.0, 181.0, 156.0, 143.0, 214.0, 230.0, 204.0, 177.0, 187.0, 208.0, 229.0, 227.0, 122.0, 125.0, 214.0, 202.0, 220.0, 227.0, 166.0, 175.0, 188.0, 202.0, 230.0, 219.0, 225.0, 225.0, 222.0, 228.0, 188.0, 170.0, 175.0, 215.0, 231.0, 225.0, 173.0, 169.0, 150.0, 143.0, 195.0, 192.0, 156.0, 174.0, 228.0, 234.0, 190.0, 220.0, 172.0, 190.0, 206.0, 202.0, 191.0, 196.0, 249.0, 210.0, 196.0, 197.0, 196.0, 203.0, 194.0, 202.0, 134.0, 116.0, 191.0, 202.0, 194.0, 213.0, 203.0, 205.0, 185.0, 205.0, 241.0, 209.0, 212.0, 175.0, 196.0, 205.0, 188.0, 202.0, 229.0, 224.0, 175.0, 158.0, 204.0, 180.0, 190.0, 211.0, 238.0, 206.0, 179.0, 162.0, 199.0, 196.0, 173.0, 145.0, 226.0, 236.0, 176.0, 163.0, 182.0, 162.0, 197.0, 253.0, 189.0, 224.0, 173.0, 166.0, 222.0, 234.0, 212.0, 175.0, 168.0, 168.0, 187.0, 194.0, 200.0, 207.0, 214.0, 199.0, 228.0, 228.0, 195.0, 195.0, 190.0, 209.0, 155.0, 140.0, 176.0, 176.0, 204.0, 192.0, 231.0, 213.0, 219.0, 225.0, 199.0, 206.0, 216.0, 228.0, 216.0, 228.0, 204.0, 198.0, 149.0, 143.0, 190.0, 203.0, 191.0, 207.0, 190.0, 211.0, 182.0, 205.0, 210.0, 191.0, 222.0, 219.0, 187.0, 206.0, 202.0, 211.0, 3.0, 9.0, 182.0, 176.0, 219.0, 234.0, 168.0, 153.0, 188.0, 170.0, 192.0, 175.0, 206.0, 196.0, 221.0, 220.0, 216.0, 183.0, 189.0, 163.0, 223.0, 227.0, 82.0, 59.0, 178.0, 206.0, 193.0, 200.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6871857253872651, "mean_inference_ms": 1.2093943881934999, "mean_action_processing_ms": 0.1329846227979475, "mean_env_wait_ms": 0.8622240571478598, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 462.0, "episode_reward_min": 12.0, "episode_reward_mean": 386.46, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 253.0}, "policy_reward_mean": {"ppo": 193.23}, "hist_stats": {"episode_reward": [387.0, 393.0, 407.0, 348.0, 396.0, 441.0, 384.0, 299.0, 444.0, 381.0, 395.0, 456.0, 247.0, 416.0, 447.0, 341.0, 390.0, 449.0, 450.0, 450.0, 358.0, 390.0, 456.0, 342.0, 293.0, 387.0, 330.0, 462.0, 410.0, 362.0, 408.0, 387.0, 459.0, 393.0, 399.0, 396.0, 250.0, 393.0, 407.0, 408.0, 390.0, 450.0, 387.0, 401.0, 390.0, 453.0, 333.0, 384.0, 401.0, 444.0, 341.0, 395.0, 318.0, 462.0, 339.0, 344.0, 450.0, 413.0, 339.0, 456.0, 387.0, 336.0, 381.0, 407.0, 413.0, 456.0, 390.0, 399.0, 295.0, 352.0, 396.0, 444.0, 444.0, 405.0, 444.0, 444.0, 402.0, 292.0, 393.0, 398.0, 401.0, 387.0, 401.0, 441.0, 393.0, 413.0, 12.0, 358.0, 453.0, 321.0, 358.0, 367.0, 402.0, 441.0, 399.0, 352.0, 450.0, 141.0, 384.0, 393.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [196.0, 191.0, 193.0, 200.0, 200.0, 207.0, 164.0, 184.0, 205.0, 191.0, 228.0, 213.0, 203.0, 181.0, 156.0, 143.0, 214.0, 230.0, 204.0, 177.0, 187.0, 208.0, 229.0, 227.0, 122.0, 125.0, 214.0, 202.0, 220.0, 227.0, 166.0, 175.0, 188.0, 202.0, 230.0, 219.0, 225.0, 225.0, 222.0, 228.0, 188.0, 170.0, 175.0, 215.0, 231.0, 225.0, 173.0, 169.0, 150.0, 143.0, 195.0, 192.0, 156.0, 174.0, 228.0, 234.0, 190.0, 220.0, 172.0, 190.0, 206.0, 202.0, 191.0, 196.0, 249.0, 210.0, 196.0, 197.0, 196.0, 203.0, 194.0, 202.0, 134.0, 116.0, 191.0, 202.0, 194.0, 213.0, 203.0, 205.0, 185.0, 205.0, 241.0, 209.0, 212.0, 175.0, 196.0, 205.0, 188.0, 202.0, 229.0, 224.0, 175.0, 158.0, 204.0, 180.0, 190.0, 211.0, 238.0, 206.0, 179.0, 162.0, 199.0, 196.0, 173.0, 145.0, 226.0, 236.0, 176.0, 163.0, 182.0, 162.0, 197.0, 253.0, 189.0, 224.0, 173.0, 166.0, 222.0, 234.0, 212.0, 175.0, 168.0, 168.0, 187.0, 194.0, 200.0, 207.0, 214.0, 199.0, 228.0, 228.0, 195.0, 195.0, 190.0, 209.0, 155.0, 140.0, 176.0, 176.0, 204.0, 192.0, 231.0, 213.0, 219.0, 225.0, 199.0, 206.0, 216.0, 228.0, 216.0, 228.0, 204.0, 198.0, 149.0, 143.0, 190.0, 203.0, 191.0, 207.0, 190.0, 211.0, 182.0, 205.0, 210.0, 191.0, 222.0, 219.0, 187.0, 206.0, 202.0, 211.0, 3.0, 9.0, 182.0, 176.0, 219.0, 234.0, 168.0, 153.0, 188.0, 170.0, 192.0, 175.0, 206.0, 196.0, 221.0, 220.0, 216.0, 183.0, 189.0, 163.0, 223.0, 227.0, 82.0, 59.0, 178.0, 206.0, 193.0, 200.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6871857253872651, "mean_inference_ms": 1.2093943881934999, "mean_action_processing_ms": 0.1329846227979475, "mean_env_wait_ms": 0.8622240571478598, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1817600, "num_agent_steps_trained": 1817600, "num_env_steps_sampled": 908800, "num_env_steps_trained": 908800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 908800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1817600, "timers": {"training_iteration_time_ms": 3594.621, "learn_time_ms": 1077.333, "learn_throughput": 11881.19, "synch_weights_time_ms": 12.178}, "counters": {"num_env_steps_sampled": 908800, "num_env_steps_trained": 908800, "num_agent_steps_sampled": 1817600, "num_agent_steps_trained": 1817600}, "done": false, "episodes_total": 2272, "training_iteration": 71, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-32", "timestamp": 1666580672, "time_this_iter_s": 3.6173627376556396, "time_total_s": 266.49638986587524, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 266.49638986587524, "timesteps_since_restore": 0, "iterations_since_restore": 71, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.983333333333334, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 134.4, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 115.68, "shaped_reward_min": 12, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.1, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 13.1, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 11.48, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 12.51, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.68, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.6, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 11.78, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 2.18, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 1.8, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.88, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.72, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.8, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 4.0, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.36, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.95, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.19, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 10.6, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 11.78, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.6, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 11.78, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 8.470329598760748e-23, "cur_lr": 0.0010000000474974513, "total_loss": -0.001648187288083136, "policy_loss": -0.0018436491955071688, "vf_loss": 8.266682624816895, "vf_explained_var": 0.6168429851531982, "kl": 0.001819998025894165, "entropy": 1.2624164819717407, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 921600, "num_env_steps_trained": 921600, "num_agent_steps_sampled": 1843200, "num_agent_steps_trained": 1843200}, "sampler_results": {"episode_reward_max": 462.0, "episode_reward_min": 12.0, "episode_reward_mean": 384.48, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 253.0}, "policy_reward_mean": {"ppo": 192.24}, "custom_metrics": {"sparse_reward_mean": 134.4, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 115.68, "shaped_reward_min": 12, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.1, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 13.1, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 11.48, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 12.51, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.68, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.6, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 11.78, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 2.18, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 1.8, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.88, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.72, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.8, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 4.0, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.36, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.95, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.19, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 10.6, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 11.78, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.6, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 11.78, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [459.0, 393.0, 399.0, 396.0, 250.0, 393.0, 407.0, 408.0, 390.0, 450.0, 387.0, 401.0, 390.0, 453.0, 333.0, 384.0, 401.0, 444.0, 341.0, 395.0, 318.0, 462.0, 339.0, 344.0, 450.0, 413.0, 339.0, 456.0, 387.0, 336.0, 381.0, 407.0, 413.0, 456.0, 390.0, 399.0, 295.0, 352.0, 396.0, 444.0, 444.0, 405.0, 444.0, 444.0, 402.0, 292.0, 393.0, 398.0, 401.0, 387.0, 401.0, 441.0, 393.0, 413.0, 12.0, 358.0, 453.0, 321.0, 358.0, 367.0, 402.0, 441.0, 399.0, 352.0, 450.0, 141.0, 384.0, 393.0, 339.0, 373.0, 370.0, 441.0, 408.0, 387.0, 341.0, 402.0, 400.0, 401.0, 396.0, 333.0, 441.0, 387.0, 407.0, 350.0, 401.0, 411.0, 390.0, 398.0, 407.0, 367.0, 291.0, 362.0, 392.0, 453.0, 462.0, 355.0, 347.0, 353.0, 347.0, 396.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [249.0, 210.0, 196.0, 197.0, 196.0, 203.0, 194.0, 202.0, 134.0, 116.0, 191.0, 202.0, 194.0, 213.0, 203.0, 205.0, 185.0, 205.0, 241.0, 209.0, 212.0, 175.0, 196.0, 205.0, 188.0, 202.0, 229.0, 224.0, 175.0, 158.0, 204.0, 180.0, 190.0, 211.0, 238.0, 206.0, 179.0, 162.0, 199.0, 196.0, 173.0, 145.0, 226.0, 236.0, 176.0, 163.0, 182.0, 162.0, 197.0, 253.0, 189.0, 224.0, 173.0, 166.0, 222.0, 234.0, 212.0, 175.0, 168.0, 168.0, 187.0, 194.0, 200.0, 207.0, 214.0, 199.0, 228.0, 228.0, 195.0, 195.0, 190.0, 209.0, 155.0, 140.0, 176.0, 176.0, 204.0, 192.0, 231.0, 213.0, 219.0, 225.0, 199.0, 206.0, 216.0, 228.0, 216.0, 228.0, 204.0, 198.0, 149.0, 143.0, 190.0, 203.0, 191.0, 207.0, 190.0, 211.0, 182.0, 205.0, 210.0, 191.0, 222.0, 219.0, 187.0, 206.0, 202.0, 211.0, 3.0, 9.0, 182.0, 176.0, 219.0, 234.0, 168.0, 153.0, 188.0, 170.0, 192.0, 175.0, 206.0, 196.0, 221.0, 220.0, 216.0, 183.0, 189.0, 163.0, 223.0, 227.0, 82.0, 59.0, 178.0, 206.0, 193.0, 200.0, 170.0, 169.0, 189.0, 184.0, 185.0, 185.0, 227.0, 214.0, 201.0, 207.0, 182.0, 205.0, 160.0, 181.0, 203.0, 199.0, 189.0, 211.0, 197.0, 204.0, 195.0, 201.0, 154.0, 179.0, 216.0, 225.0, 190.0, 197.0, 222.0, 185.0, 177.0, 173.0, 205.0, 196.0, 208.0, 203.0, 198.0, 192.0, 189.0, 209.0, 199.0, 208.0, 190.0, 177.0, 165.0, 126.0, 174.0, 188.0, 196.0, 196.0, 235.0, 218.0, 237.0, 225.0, 164.0, 191.0, 188.0, 159.0, 176.0, 177.0, 161.0, 186.0, 199.0, 197.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6871988687200685, "mean_inference_ms": 1.2092524040203814, "mean_action_processing_ms": 0.13299117900125684, "mean_env_wait_ms": 0.8617021906416065, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 462.0, "episode_reward_min": 12.0, "episode_reward_mean": 384.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 253.0}, "policy_reward_mean": {"ppo": 192.24}, "hist_stats": {"episode_reward": [459.0, 393.0, 399.0, 396.0, 250.0, 393.0, 407.0, 408.0, 390.0, 450.0, 387.0, 401.0, 390.0, 453.0, 333.0, 384.0, 401.0, 444.0, 341.0, 395.0, 318.0, 462.0, 339.0, 344.0, 450.0, 413.0, 339.0, 456.0, 387.0, 336.0, 381.0, 407.0, 413.0, 456.0, 390.0, 399.0, 295.0, 352.0, 396.0, 444.0, 444.0, 405.0, 444.0, 444.0, 402.0, 292.0, 393.0, 398.0, 401.0, 387.0, 401.0, 441.0, 393.0, 413.0, 12.0, 358.0, 453.0, 321.0, 358.0, 367.0, 402.0, 441.0, 399.0, 352.0, 450.0, 141.0, 384.0, 393.0, 339.0, 373.0, 370.0, 441.0, 408.0, 387.0, 341.0, 402.0, 400.0, 401.0, 396.0, 333.0, 441.0, 387.0, 407.0, 350.0, 401.0, 411.0, 390.0, 398.0, 407.0, 367.0, 291.0, 362.0, 392.0, 453.0, 462.0, 355.0, 347.0, 353.0, 347.0, 396.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [249.0, 210.0, 196.0, 197.0, 196.0, 203.0, 194.0, 202.0, 134.0, 116.0, 191.0, 202.0, 194.0, 213.0, 203.0, 205.0, 185.0, 205.0, 241.0, 209.0, 212.0, 175.0, 196.0, 205.0, 188.0, 202.0, 229.0, 224.0, 175.0, 158.0, 204.0, 180.0, 190.0, 211.0, 238.0, 206.0, 179.0, 162.0, 199.0, 196.0, 173.0, 145.0, 226.0, 236.0, 176.0, 163.0, 182.0, 162.0, 197.0, 253.0, 189.0, 224.0, 173.0, 166.0, 222.0, 234.0, 212.0, 175.0, 168.0, 168.0, 187.0, 194.0, 200.0, 207.0, 214.0, 199.0, 228.0, 228.0, 195.0, 195.0, 190.0, 209.0, 155.0, 140.0, 176.0, 176.0, 204.0, 192.0, 231.0, 213.0, 219.0, 225.0, 199.0, 206.0, 216.0, 228.0, 216.0, 228.0, 204.0, 198.0, 149.0, 143.0, 190.0, 203.0, 191.0, 207.0, 190.0, 211.0, 182.0, 205.0, 210.0, 191.0, 222.0, 219.0, 187.0, 206.0, 202.0, 211.0, 3.0, 9.0, 182.0, 176.0, 219.0, 234.0, 168.0, 153.0, 188.0, 170.0, 192.0, 175.0, 206.0, 196.0, 221.0, 220.0, 216.0, 183.0, 189.0, 163.0, 223.0, 227.0, 82.0, 59.0, 178.0, 206.0, 193.0, 200.0, 170.0, 169.0, 189.0, 184.0, 185.0, 185.0, 227.0, 214.0, 201.0, 207.0, 182.0, 205.0, 160.0, 181.0, 203.0, 199.0, 189.0, 211.0, 197.0, 204.0, 195.0, 201.0, 154.0, 179.0, 216.0, 225.0, 190.0, 197.0, 222.0, 185.0, 177.0, 173.0, 205.0, 196.0, 208.0, 203.0, 198.0, 192.0, 189.0, 209.0, 199.0, 208.0, 190.0, 177.0, 165.0, 126.0, 174.0, 188.0, 196.0, 196.0, 235.0, 218.0, 237.0, 225.0, 164.0, 191.0, 188.0, 159.0, 176.0, 177.0, 161.0, 186.0, 199.0, 197.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6871988687200685, "mean_inference_ms": 1.2092524040203814, "mean_action_processing_ms": 0.13299117900125684, "mean_env_wait_ms": 0.8617021906416065, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1843200, "num_agent_steps_trained": 1843200, "num_env_steps_sampled": 921600, "num_env_steps_trained": 921600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 921600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1843200, "timers": {"training_iteration_time_ms": 3591.057, "learn_time_ms": 1068.352, "learn_throughput": 11981.073, "synch_weights_time_ms": 12.227}, "counters": {"num_env_steps_sampled": 921600, "num_env_steps_trained": 921600, "num_agent_steps_sampled": 1843200, "num_agent_steps_trained": 1843200}, "done": false, "episodes_total": 2304, "training_iteration": 72, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-36", "timestamp": 1666580676, "time_this_iter_s": 3.6749939918518066, "time_total_s": 270.17138385772705, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 270.17138385772705, "timesteps_since_restore": 0, "iterations_since_restore": 72, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.94, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 132.4, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 116.34, "shaped_reward_min": 12, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.0, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 13.55, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 11.3, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 12.81, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.71, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.69, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.37, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 12.05, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.79, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 2.21, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 1.85, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.7, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.83, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 4.02, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.42, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.93, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.27, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 10.37, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 12.05, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.37, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 12.05, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 4.235164799380374e-23, "cur_lr": 0.0010000000474974513, "total_loss": -0.0015343400882557034, "policy_loss": -0.0017145425081253052, "vf_loss": 8.12104606628418, "vf_explained_var": 0.6849566698074341, "kl": 0.0017699010204523802, "entropy": 1.2638133764266968, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 934400, "num_env_steps_trained": 934400, "num_agent_steps_sampled": 1868800, "num_agent_steps_trained": 1868800}, "sampler_results": {"episode_reward_max": 462.0, "episode_reward_min": 12.0, "episode_reward_mean": 381.14, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 237.0}, "policy_reward_mean": {"ppo": 190.57}, "custom_metrics": {"sparse_reward_mean": 132.4, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 116.34, "shaped_reward_min": 12, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.0, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 13.55, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 11.3, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 12.81, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.71, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.69, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.37, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 12.05, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.79, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 2.21, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 1.85, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.7, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.83, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 4.02, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.42, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.93, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.27, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 10.37, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 12.05, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.37, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 12.05, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [413.0, 456.0, 390.0, 399.0, 295.0, 352.0, 396.0, 444.0, 444.0, 405.0, 444.0, 444.0, 402.0, 292.0, 393.0, 398.0, 401.0, 387.0, 401.0, 441.0, 393.0, 413.0, 12.0, 358.0, 453.0, 321.0, 358.0, 367.0, 402.0, 441.0, 399.0, 352.0, 450.0, 141.0, 384.0, 393.0, 339.0, 373.0, 370.0, 441.0, 408.0, 387.0, 341.0, 402.0, 400.0, 401.0, 396.0, 333.0, 441.0, 387.0, 407.0, 350.0, 401.0, 411.0, 390.0, 398.0, 407.0, 367.0, 291.0, 362.0, 392.0, 453.0, 462.0, 355.0, 347.0, 353.0, 347.0, 396.0, 367.0, 410.0, 355.0, 395.0, 349.0, 405.0, 450.0, 459.0, 447.0, 410.0, 293.0, 399.0, 164.0, 450.0, 353.0, 444.0, 356.0, 387.0, 405.0, 356.0, 404.0, 356.0, 398.0, 407.0, 361.0, 387.0, 413.0, 392.0, 353.0, 364.0, 285.0, 398.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [214.0, 199.0, 228.0, 228.0, 195.0, 195.0, 190.0, 209.0, 155.0, 140.0, 176.0, 176.0, 204.0, 192.0, 231.0, 213.0, 219.0, 225.0, 199.0, 206.0, 216.0, 228.0, 216.0, 228.0, 204.0, 198.0, 149.0, 143.0, 190.0, 203.0, 191.0, 207.0, 190.0, 211.0, 182.0, 205.0, 210.0, 191.0, 222.0, 219.0, 187.0, 206.0, 202.0, 211.0, 3.0, 9.0, 182.0, 176.0, 219.0, 234.0, 168.0, 153.0, 188.0, 170.0, 192.0, 175.0, 206.0, 196.0, 221.0, 220.0, 216.0, 183.0, 189.0, 163.0, 223.0, 227.0, 82.0, 59.0, 178.0, 206.0, 193.0, 200.0, 170.0, 169.0, 189.0, 184.0, 185.0, 185.0, 227.0, 214.0, 201.0, 207.0, 182.0, 205.0, 160.0, 181.0, 203.0, 199.0, 189.0, 211.0, 197.0, 204.0, 195.0, 201.0, 154.0, 179.0, 216.0, 225.0, 190.0, 197.0, 222.0, 185.0, 177.0, 173.0, 205.0, 196.0, 208.0, 203.0, 198.0, 192.0, 189.0, 209.0, 199.0, 208.0, 190.0, 177.0, 165.0, 126.0, 174.0, 188.0, 196.0, 196.0, 235.0, 218.0, 237.0, 225.0, 164.0, 191.0, 188.0, 159.0, 176.0, 177.0, 161.0, 186.0, 199.0, 197.0, 175.0, 192.0, 194.0, 216.0, 174.0, 181.0, 190.0, 205.0, 182.0, 167.0, 211.0, 194.0, 217.0, 233.0, 236.0, 223.0, 234.0, 213.0, 211.0, 199.0, 153.0, 140.0, 199.0, 200.0, 87.0, 77.0, 222.0, 228.0, 170.0, 183.0, 223.0, 221.0, 194.0, 162.0, 190.0, 197.0, 210.0, 195.0, 185.0, 171.0, 205.0, 199.0, 185.0, 171.0, 174.0, 224.0, 184.0, 223.0, 182.0, 179.0, 183.0, 204.0, 198.0, 215.0, 202.0, 190.0, 163.0, 190.0, 164.0, 200.0, 145.0, 140.0, 201.0, 197.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6871159082430706, "mean_inference_ms": 1.2095804059053032, "mean_action_processing_ms": 0.1329925772348698, "mean_env_wait_ms": 0.8617921215158202, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 462.0, "episode_reward_min": 12.0, "episode_reward_mean": 381.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 237.0}, "policy_reward_mean": {"ppo": 190.57}, "hist_stats": {"episode_reward": [413.0, 456.0, 390.0, 399.0, 295.0, 352.0, 396.0, 444.0, 444.0, 405.0, 444.0, 444.0, 402.0, 292.0, 393.0, 398.0, 401.0, 387.0, 401.0, 441.0, 393.0, 413.0, 12.0, 358.0, 453.0, 321.0, 358.0, 367.0, 402.0, 441.0, 399.0, 352.0, 450.0, 141.0, 384.0, 393.0, 339.0, 373.0, 370.0, 441.0, 408.0, 387.0, 341.0, 402.0, 400.0, 401.0, 396.0, 333.0, 441.0, 387.0, 407.0, 350.0, 401.0, 411.0, 390.0, 398.0, 407.0, 367.0, 291.0, 362.0, 392.0, 453.0, 462.0, 355.0, 347.0, 353.0, 347.0, 396.0, 367.0, 410.0, 355.0, 395.0, 349.0, 405.0, 450.0, 459.0, 447.0, 410.0, 293.0, 399.0, 164.0, 450.0, 353.0, 444.0, 356.0, 387.0, 405.0, 356.0, 404.0, 356.0, 398.0, 407.0, 361.0, 387.0, 413.0, 392.0, 353.0, 364.0, 285.0, 398.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [214.0, 199.0, 228.0, 228.0, 195.0, 195.0, 190.0, 209.0, 155.0, 140.0, 176.0, 176.0, 204.0, 192.0, 231.0, 213.0, 219.0, 225.0, 199.0, 206.0, 216.0, 228.0, 216.0, 228.0, 204.0, 198.0, 149.0, 143.0, 190.0, 203.0, 191.0, 207.0, 190.0, 211.0, 182.0, 205.0, 210.0, 191.0, 222.0, 219.0, 187.0, 206.0, 202.0, 211.0, 3.0, 9.0, 182.0, 176.0, 219.0, 234.0, 168.0, 153.0, 188.0, 170.0, 192.0, 175.0, 206.0, 196.0, 221.0, 220.0, 216.0, 183.0, 189.0, 163.0, 223.0, 227.0, 82.0, 59.0, 178.0, 206.0, 193.0, 200.0, 170.0, 169.0, 189.0, 184.0, 185.0, 185.0, 227.0, 214.0, 201.0, 207.0, 182.0, 205.0, 160.0, 181.0, 203.0, 199.0, 189.0, 211.0, 197.0, 204.0, 195.0, 201.0, 154.0, 179.0, 216.0, 225.0, 190.0, 197.0, 222.0, 185.0, 177.0, 173.0, 205.0, 196.0, 208.0, 203.0, 198.0, 192.0, 189.0, 209.0, 199.0, 208.0, 190.0, 177.0, 165.0, 126.0, 174.0, 188.0, 196.0, 196.0, 235.0, 218.0, 237.0, 225.0, 164.0, 191.0, 188.0, 159.0, 176.0, 177.0, 161.0, 186.0, 199.0, 197.0, 175.0, 192.0, 194.0, 216.0, 174.0, 181.0, 190.0, 205.0, 182.0, 167.0, 211.0, 194.0, 217.0, 233.0, 236.0, 223.0, 234.0, 213.0, 211.0, 199.0, 153.0, 140.0, 199.0, 200.0, 87.0, 77.0, 222.0, 228.0, 170.0, 183.0, 223.0, 221.0, 194.0, 162.0, 190.0, 197.0, 210.0, 195.0, 185.0, 171.0, 205.0, 199.0, 185.0, 171.0, 174.0, 224.0, 184.0, 223.0, 182.0, 179.0, 183.0, 204.0, 198.0, 215.0, 202.0, 190.0, 163.0, 190.0, 164.0, 200.0, 145.0, 140.0, 201.0, 197.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6871159082430706, "mean_inference_ms": 1.2095804059053032, "mean_action_processing_ms": 0.1329925772348698, "mean_env_wait_ms": 0.8617921215158202, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1868800, "num_agent_steps_trained": 1868800, "num_env_steps_sampled": 934400, "num_env_steps_trained": 934400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 934400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1868800, "timers": {"training_iteration_time_ms": 3617.965, "learn_time_ms": 1066.048, "learn_throughput": 12006.96, "synch_weights_time_ms": 12.585}, "counters": {"num_env_steps_sampled": 934400, "num_env_steps_trained": 934400, "num_agent_steps_sampled": 1868800, "num_agent_steps_trained": 1868800}, "done": false, "episodes_total": 2336, "training_iteration": 73, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-40", "timestamp": 1666580680, "time_this_iter_s": 3.999202013015747, "time_total_s": 274.1705858707428, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 274.1705858707428, "timesteps_since_restore": 0, "iterations_since_restore": 73, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.833333333333332, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 133.0, "sparse_reward_min": 40, "sparse_reward_max": 160, "shaped_reward_mean": 116.89, "shaped_reward_min": 61, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.08, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 13.47, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 11.36, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 12.77, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.69, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.47, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 12.01, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 2.16, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.09, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.76, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.8, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 3.88, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.52, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.78, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.38, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 10.47, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 12.01, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.47, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 12.01, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.117582399690187e-23, "cur_lr": 0.0010000000474974513, "total_loss": 0.0002928805770352483, "policy_loss": 0.0001178687671199441, "vf_loss": 8.05293083190918, "vf_explained_var": 0.6890039443969727, "kl": 0.001798928715288639, "entropy": 1.2605663537979126, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 947200, "num_env_steps_trained": 947200, "num_agent_steps_sampled": 1894400, "num_agent_steps_trained": 1894400}, "sampler_results": {"episode_reward_max": 462.0, "episode_reward_min": 141.0, "episode_reward_mean": 382.89, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 59.0}, "policy_reward_max": {"ppo": 252.0}, "policy_reward_mean": {"ppo": 191.445}, "custom_metrics": {"sparse_reward_mean": 133.0, "sparse_reward_min": 40, "sparse_reward_max": 160, "shaped_reward_mean": 116.89, "shaped_reward_min": 61, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.08, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 13.47, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 11.36, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 12.77, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.69, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.47, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 12.01, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 2.16, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.09, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.76, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.8, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 3.88, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.52, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.78, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.38, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 10.47, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 12.01, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.47, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 12.01, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [450.0, 141.0, 384.0, 393.0, 339.0, 373.0, 370.0, 441.0, 408.0, 387.0, 341.0, 402.0, 400.0, 401.0, 396.0, 333.0, 441.0, 387.0, 407.0, 350.0, 401.0, 411.0, 390.0, 398.0, 407.0, 367.0, 291.0, 362.0, 392.0, 453.0, 462.0, 355.0, 347.0, 353.0, 347.0, 396.0, 367.0, 410.0, 355.0, 395.0, 349.0, 405.0, 450.0, 459.0, 447.0, 410.0, 293.0, 399.0, 164.0, 450.0, 353.0, 444.0, 356.0, 387.0, 405.0, 356.0, 404.0, 356.0, 398.0, 407.0, 361.0, 387.0, 413.0, 392.0, 353.0, 364.0, 285.0, 398.0, 405.0, 407.0, 399.0, 393.0, 402.0, 456.0, 396.0, 453.0, 390.0, 287.0, 444.0, 341.0, 359.0, 387.0, 396.0, 459.0, 282.0, 450.0, 402.0, 399.0, 390.0, 408.0, 413.0, 396.0, 402.0, 447.0, 304.0, 456.0, 349.0, 304.0, 252.0, 413.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [223.0, 227.0, 82.0, 59.0, 178.0, 206.0, 193.0, 200.0, 170.0, 169.0, 189.0, 184.0, 185.0, 185.0, 227.0, 214.0, 201.0, 207.0, 182.0, 205.0, 160.0, 181.0, 203.0, 199.0, 189.0, 211.0, 197.0, 204.0, 195.0, 201.0, 154.0, 179.0, 216.0, 225.0, 190.0, 197.0, 222.0, 185.0, 177.0, 173.0, 205.0, 196.0, 208.0, 203.0, 198.0, 192.0, 189.0, 209.0, 199.0, 208.0, 190.0, 177.0, 165.0, 126.0, 174.0, 188.0, 196.0, 196.0, 235.0, 218.0, 237.0, 225.0, 164.0, 191.0, 188.0, 159.0, 176.0, 177.0, 161.0, 186.0, 199.0, 197.0, 175.0, 192.0, 194.0, 216.0, 174.0, 181.0, 190.0, 205.0, 182.0, 167.0, 211.0, 194.0, 217.0, 233.0, 236.0, 223.0, 234.0, 213.0, 211.0, 199.0, 153.0, 140.0, 199.0, 200.0, 87.0, 77.0, 222.0, 228.0, 170.0, 183.0, 223.0, 221.0, 194.0, 162.0, 190.0, 197.0, 210.0, 195.0, 185.0, 171.0, 205.0, 199.0, 185.0, 171.0, 174.0, 224.0, 184.0, 223.0, 182.0, 179.0, 183.0, 204.0, 198.0, 215.0, 202.0, 190.0, 163.0, 190.0, 164.0, 200.0, 145.0, 140.0, 201.0, 197.0, 199.0, 206.0, 191.0, 216.0, 189.0, 210.0, 201.0, 192.0, 197.0, 205.0, 240.0, 216.0, 201.0, 195.0, 201.0, 252.0, 199.0, 191.0, 148.0, 139.0, 222.0, 222.0, 164.0, 177.0, 156.0, 203.0, 179.0, 208.0, 209.0, 187.0, 233.0, 226.0, 135.0, 147.0, 238.0, 212.0, 211.0, 191.0, 200.0, 199.0, 180.0, 210.0, 210.0, 198.0, 201.0, 212.0, 192.0, 204.0, 199.0, 203.0, 229.0, 218.0, 140.0, 164.0, 230.0, 226.0, 170.0, 179.0, 142.0, 162.0, 140.0, 112.0, 213.0, 200.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.686933826173813, "mean_inference_ms": 1.2098385795333195, "mean_action_processing_ms": 0.13296885818111198, "mean_env_wait_ms": 0.861716192870976, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 462.0, "episode_reward_min": 141.0, "episode_reward_mean": 382.89, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 59.0}, "policy_reward_max": {"ppo": 252.0}, "policy_reward_mean": {"ppo": 191.445}, "hist_stats": {"episode_reward": [450.0, 141.0, 384.0, 393.0, 339.0, 373.0, 370.0, 441.0, 408.0, 387.0, 341.0, 402.0, 400.0, 401.0, 396.0, 333.0, 441.0, 387.0, 407.0, 350.0, 401.0, 411.0, 390.0, 398.0, 407.0, 367.0, 291.0, 362.0, 392.0, 453.0, 462.0, 355.0, 347.0, 353.0, 347.0, 396.0, 367.0, 410.0, 355.0, 395.0, 349.0, 405.0, 450.0, 459.0, 447.0, 410.0, 293.0, 399.0, 164.0, 450.0, 353.0, 444.0, 356.0, 387.0, 405.0, 356.0, 404.0, 356.0, 398.0, 407.0, 361.0, 387.0, 413.0, 392.0, 353.0, 364.0, 285.0, 398.0, 405.0, 407.0, 399.0, 393.0, 402.0, 456.0, 396.0, 453.0, 390.0, 287.0, 444.0, 341.0, 359.0, 387.0, 396.0, 459.0, 282.0, 450.0, 402.0, 399.0, 390.0, 408.0, 413.0, 396.0, 402.0, 447.0, 304.0, 456.0, 349.0, 304.0, 252.0, 413.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [223.0, 227.0, 82.0, 59.0, 178.0, 206.0, 193.0, 200.0, 170.0, 169.0, 189.0, 184.0, 185.0, 185.0, 227.0, 214.0, 201.0, 207.0, 182.0, 205.0, 160.0, 181.0, 203.0, 199.0, 189.0, 211.0, 197.0, 204.0, 195.0, 201.0, 154.0, 179.0, 216.0, 225.0, 190.0, 197.0, 222.0, 185.0, 177.0, 173.0, 205.0, 196.0, 208.0, 203.0, 198.0, 192.0, 189.0, 209.0, 199.0, 208.0, 190.0, 177.0, 165.0, 126.0, 174.0, 188.0, 196.0, 196.0, 235.0, 218.0, 237.0, 225.0, 164.0, 191.0, 188.0, 159.0, 176.0, 177.0, 161.0, 186.0, 199.0, 197.0, 175.0, 192.0, 194.0, 216.0, 174.0, 181.0, 190.0, 205.0, 182.0, 167.0, 211.0, 194.0, 217.0, 233.0, 236.0, 223.0, 234.0, 213.0, 211.0, 199.0, 153.0, 140.0, 199.0, 200.0, 87.0, 77.0, 222.0, 228.0, 170.0, 183.0, 223.0, 221.0, 194.0, 162.0, 190.0, 197.0, 210.0, 195.0, 185.0, 171.0, 205.0, 199.0, 185.0, 171.0, 174.0, 224.0, 184.0, 223.0, 182.0, 179.0, 183.0, 204.0, 198.0, 215.0, 202.0, 190.0, 163.0, 190.0, 164.0, 200.0, 145.0, 140.0, 201.0, 197.0, 199.0, 206.0, 191.0, 216.0, 189.0, 210.0, 201.0, 192.0, 197.0, 205.0, 240.0, 216.0, 201.0, 195.0, 201.0, 252.0, 199.0, 191.0, 148.0, 139.0, 222.0, 222.0, 164.0, 177.0, 156.0, 203.0, 179.0, 208.0, 209.0, 187.0, 233.0, 226.0, 135.0, 147.0, 238.0, 212.0, 211.0, 191.0, 200.0, 199.0, 180.0, 210.0, 210.0, 198.0, 201.0, 212.0, 192.0, 204.0, 199.0, 203.0, 229.0, 218.0, 140.0, 164.0, 230.0, 226.0, 170.0, 179.0, 142.0, 162.0, 140.0, 112.0, 213.0, 200.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.686933826173813, "mean_inference_ms": 1.2098385795333195, "mean_action_processing_ms": 0.13296885818111198, "mean_env_wait_ms": 0.861716192870976, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1894400, "num_agent_steps_trained": 1894400, "num_env_steps_sampled": 947200, "num_env_steps_trained": 947200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 947200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1894400, "timers": {"training_iteration_time_ms": 3619.249, "learn_time_ms": 1070.336, "learn_throughput": 11958.865, "synch_weights_time_ms": 13.61}, "counters": {"num_env_steps_sampled": 947200, "num_env_steps_trained": 947200, "num_agent_steps_sampled": 1894400, "num_agent_steps_trained": 1894400}, "done": false, "episodes_total": 2368, "training_iteration": 74, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-44", "timestamp": 1666580684, "time_this_iter_s": 3.642000436782837, "time_total_s": 277.81258630752563, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 277.81258630752563, "timesteps_since_restore": 0, "iterations_since_restore": 74, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.666666666666668, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 136.2, "sparse_reward_min": 40, "sparse_reward_max": 160, "shaped_reward_mean": 119.42, "shaped_reward_min": 82, "shaped_reward_max": 153, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.55, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 13.41, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 11.86, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 12.77, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.65, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.69, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.9, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.91, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.39, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.39, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 2.12, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 2.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 1.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.78, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.66, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 3.72, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.77, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.6, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.66, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 10.9, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.91, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.9, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.91, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.0587911998450935e-23, "cur_lr": 0.0010000000474974513, "total_loss": -0.002930144779384136, "policy_loss": -0.0031254859641194344, "vf_loss": 8.116436958312988, "vf_explained_var": 0.6190094947814941, "kl": 0.0018185349181294441, "entropy": 1.2326085567474365, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 960000, "num_env_steps_trained": 960000, "num_agent_steps_sampled": 1920000, "num_agent_steps_trained": 1920000}, "sampler_results": {"episode_reward_max": 473.0, "episode_reward_min": 164.0, "episode_reward_mean": 391.82, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 77.0}, "policy_reward_max": {"ppo": 252.0}, "policy_reward_mean": {"ppo": 195.91}, "custom_metrics": {"sparse_reward_mean": 136.2, "sparse_reward_min": 40, "sparse_reward_max": 160, "shaped_reward_mean": 119.42, "shaped_reward_min": 82, "shaped_reward_max": 153, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.55, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 13.41, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 11.86, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 12.77, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.65, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.69, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.9, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.91, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.39, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.39, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 2.12, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 2.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 1.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.78, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.66, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 3.72, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.77, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.6, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.66, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 10.9, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.91, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.9, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.91, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [347.0, 353.0, 347.0, 396.0, 367.0, 410.0, 355.0, 395.0, 349.0, 405.0, 450.0, 459.0, 447.0, 410.0, 293.0, 399.0, 164.0, 450.0, 353.0, 444.0, 356.0, 387.0, 405.0, 356.0, 404.0, 356.0, 398.0, 407.0, 361.0, 387.0, 413.0, 392.0, 353.0, 364.0, 285.0, 398.0, 405.0, 407.0, 399.0, 393.0, 402.0, 456.0, 396.0, 453.0, 390.0, 287.0, 444.0, 341.0, 359.0, 387.0, 396.0, 459.0, 282.0, 450.0, 402.0, 399.0, 390.0, 408.0, 413.0, 396.0, 402.0, 447.0, 304.0, 456.0, 349.0, 304.0, 252.0, 413.0, 332.0, 416.0, 347.0, 441.0, 399.0, 342.0, 413.0, 344.0, 336.0, 456.0, 473.0, 408.0, 459.0, 450.0, 456.0, 456.0, 410.0, 459.0, 384.0, 410.0, 349.0, 381.0, 462.0, 404.0, 404.0, 398.0, 390.0, 387.0, 444.0, 407.0, 450.0, 459.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [188.0, 159.0, 176.0, 177.0, 161.0, 186.0, 199.0, 197.0, 175.0, 192.0, 194.0, 216.0, 174.0, 181.0, 190.0, 205.0, 182.0, 167.0, 211.0, 194.0, 217.0, 233.0, 236.0, 223.0, 234.0, 213.0, 211.0, 199.0, 153.0, 140.0, 199.0, 200.0, 87.0, 77.0, 222.0, 228.0, 170.0, 183.0, 223.0, 221.0, 194.0, 162.0, 190.0, 197.0, 210.0, 195.0, 185.0, 171.0, 205.0, 199.0, 185.0, 171.0, 174.0, 224.0, 184.0, 223.0, 182.0, 179.0, 183.0, 204.0, 198.0, 215.0, 202.0, 190.0, 163.0, 190.0, 164.0, 200.0, 145.0, 140.0, 201.0, 197.0, 199.0, 206.0, 191.0, 216.0, 189.0, 210.0, 201.0, 192.0, 197.0, 205.0, 240.0, 216.0, 201.0, 195.0, 201.0, 252.0, 199.0, 191.0, 148.0, 139.0, 222.0, 222.0, 164.0, 177.0, 156.0, 203.0, 179.0, 208.0, 209.0, 187.0, 233.0, 226.0, 135.0, 147.0, 238.0, 212.0, 211.0, 191.0, 200.0, 199.0, 180.0, 210.0, 210.0, 198.0, 201.0, 212.0, 192.0, 204.0, 199.0, 203.0, 229.0, 218.0, 140.0, 164.0, 230.0, 226.0, 170.0, 179.0, 142.0, 162.0, 140.0, 112.0, 213.0, 200.0, 169.0, 163.0, 213.0, 203.0, 179.0, 168.0, 205.0, 236.0, 190.0, 209.0, 156.0, 186.0, 200.0, 213.0, 166.0, 178.0, 179.0, 157.0, 221.0, 235.0, 235.0, 238.0, 190.0, 218.0, 238.0, 221.0, 217.0, 233.0, 225.0, 231.0, 228.0, 228.0, 209.0, 201.0, 225.0, 234.0, 179.0, 205.0, 201.0, 209.0, 183.0, 166.0, 169.0, 212.0, 211.0, 251.0, 199.0, 205.0, 203.0, 201.0, 208.0, 190.0, 185.0, 205.0, 190.0, 197.0, 209.0, 235.0, 199.0, 208.0, 236.0, 214.0, 233.0, 226.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6867719801502224, "mean_inference_ms": 1.2099976017691645, "mean_action_processing_ms": 0.1329235273872266, "mean_env_wait_ms": 0.861473241182441, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 473.0, "episode_reward_min": 164.0, "episode_reward_mean": 391.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 77.0}, "policy_reward_max": {"ppo": 252.0}, "policy_reward_mean": {"ppo": 195.91}, "hist_stats": {"episode_reward": [347.0, 353.0, 347.0, 396.0, 367.0, 410.0, 355.0, 395.0, 349.0, 405.0, 450.0, 459.0, 447.0, 410.0, 293.0, 399.0, 164.0, 450.0, 353.0, 444.0, 356.0, 387.0, 405.0, 356.0, 404.0, 356.0, 398.0, 407.0, 361.0, 387.0, 413.0, 392.0, 353.0, 364.0, 285.0, 398.0, 405.0, 407.0, 399.0, 393.0, 402.0, 456.0, 396.0, 453.0, 390.0, 287.0, 444.0, 341.0, 359.0, 387.0, 396.0, 459.0, 282.0, 450.0, 402.0, 399.0, 390.0, 408.0, 413.0, 396.0, 402.0, 447.0, 304.0, 456.0, 349.0, 304.0, 252.0, 413.0, 332.0, 416.0, 347.0, 441.0, 399.0, 342.0, 413.0, 344.0, 336.0, 456.0, 473.0, 408.0, 459.0, 450.0, 456.0, 456.0, 410.0, 459.0, 384.0, 410.0, 349.0, 381.0, 462.0, 404.0, 404.0, 398.0, 390.0, 387.0, 444.0, 407.0, 450.0, 459.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [188.0, 159.0, 176.0, 177.0, 161.0, 186.0, 199.0, 197.0, 175.0, 192.0, 194.0, 216.0, 174.0, 181.0, 190.0, 205.0, 182.0, 167.0, 211.0, 194.0, 217.0, 233.0, 236.0, 223.0, 234.0, 213.0, 211.0, 199.0, 153.0, 140.0, 199.0, 200.0, 87.0, 77.0, 222.0, 228.0, 170.0, 183.0, 223.0, 221.0, 194.0, 162.0, 190.0, 197.0, 210.0, 195.0, 185.0, 171.0, 205.0, 199.0, 185.0, 171.0, 174.0, 224.0, 184.0, 223.0, 182.0, 179.0, 183.0, 204.0, 198.0, 215.0, 202.0, 190.0, 163.0, 190.0, 164.0, 200.0, 145.0, 140.0, 201.0, 197.0, 199.0, 206.0, 191.0, 216.0, 189.0, 210.0, 201.0, 192.0, 197.0, 205.0, 240.0, 216.0, 201.0, 195.0, 201.0, 252.0, 199.0, 191.0, 148.0, 139.0, 222.0, 222.0, 164.0, 177.0, 156.0, 203.0, 179.0, 208.0, 209.0, 187.0, 233.0, 226.0, 135.0, 147.0, 238.0, 212.0, 211.0, 191.0, 200.0, 199.0, 180.0, 210.0, 210.0, 198.0, 201.0, 212.0, 192.0, 204.0, 199.0, 203.0, 229.0, 218.0, 140.0, 164.0, 230.0, 226.0, 170.0, 179.0, 142.0, 162.0, 140.0, 112.0, 213.0, 200.0, 169.0, 163.0, 213.0, 203.0, 179.0, 168.0, 205.0, 236.0, 190.0, 209.0, 156.0, 186.0, 200.0, 213.0, 166.0, 178.0, 179.0, 157.0, 221.0, 235.0, 235.0, 238.0, 190.0, 218.0, 238.0, 221.0, 217.0, 233.0, 225.0, 231.0, 228.0, 228.0, 209.0, 201.0, 225.0, 234.0, 179.0, 205.0, 201.0, 209.0, 183.0, 166.0, 169.0, 212.0, 211.0, 251.0, 199.0, 205.0, 203.0, 201.0, 208.0, 190.0, 185.0, 205.0, 190.0, 197.0, 209.0, 235.0, 199.0, 208.0, 236.0, 214.0, 233.0, 226.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6867719801502224, "mean_inference_ms": 1.2099976017691645, "mean_action_processing_ms": 0.1329235273872266, "mean_env_wait_ms": 0.861473241182441, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1920000, "num_agent_steps_trained": 1920000, "num_env_steps_sampled": 960000, "num_env_steps_trained": 960000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 960000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1920000, "timers": {"training_iteration_time_ms": 3615.628, "learn_time_ms": 1066.355, "learn_throughput": 12003.507, "synch_weights_time_ms": 13.679}, "counters": {"num_env_steps_sampled": 960000, "num_env_steps_trained": 960000, "num_agent_steps_sampled": 1920000, "num_agent_steps_trained": 1920000}, "done": false, "episodes_total": 2400, "training_iteration": 75, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-47", "timestamp": 1666580687, "time_this_iter_s": 3.4958388805389404, "time_total_s": 281.3084251880646, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 281.3084251880646, "timesteps_since_restore": 0, "iterations_since_restore": 75, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.6, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 139.0, "sparse_reward_min": 80, "sparse_reward_max": 160, "shaped_reward_mean": 120.43, "shaped_reward_min": 82, "shaped_reward_max": 153, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.53, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.27, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 11.9, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 12.67, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.56, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.71, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.06, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.85, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.2, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 2.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 1.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.76, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.51, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.7, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.79, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.57, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.69, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 11.06, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.85, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.06, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.85, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 5.293955999225468e-24, "cur_lr": 0.0010000000474974513, "total_loss": -0.0020715848077088594, "policy_loss": -0.002257078420370817, "vf_loss": 8.082441329956055, "vf_explained_var": 0.6958982944488525, "kl": 0.0021980281453579664, "entropy": 1.2455064058303833, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 972800, "num_env_steps_trained": 972800, "num_agent_steps_sampled": 1945600, "num_agent_steps_trained": 1945600}, "sampler_results": {"episode_reward_max": 473.0, "episode_reward_min": 247.0, "episode_reward_mean": 398.43, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 109.0}, "policy_reward_max": {"ppo": 252.0}, "policy_reward_mean": {"ppo": 199.215}, "custom_metrics": {"sparse_reward_mean": 139.0, "sparse_reward_min": 80, "sparse_reward_max": 160, "shaped_reward_mean": 120.43, "shaped_reward_min": 82, "shaped_reward_max": 153, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.53, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.27, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 11.9, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 12.67, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.56, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.71, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.06, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.85, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.2, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 2.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 1.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.76, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.51, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.7, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.79, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.57, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.69, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 11.06, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.85, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.06, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.85, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [353.0, 364.0, 285.0, 398.0, 405.0, 407.0, 399.0, 393.0, 402.0, 456.0, 396.0, 453.0, 390.0, 287.0, 444.0, 341.0, 359.0, 387.0, 396.0, 459.0, 282.0, 450.0, 402.0, 399.0, 390.0, 408.0, 413.0, 396.0, 402.0, 447.0, 304.0, 456.0, 349.0, 304.0, 252.0, 413.0, 332.0, 416.0, 347.0, 441.0, 399.0, 342.0, 413.0, 344.0, 336.0, 456.0, 473.0, 408.0, 459.0, 450.0, 456.0, 456.0, 410.0, 459.0, 384.0, 410.0, 349.0, 381.0, 462.0, 404.0, 404.0, 398.0, 390.0, 387.0, 444.0, 407.0, 450.0, 459.0, 384.0, 399.0, 296.0, 450.0, 453.0, 441.0, 405.0, 305.0, 370.0, 405.0, 456.0, 427.0, 398.0, 247.0, 405.0, 456.0, 370.0, 293.0, 387.0, 396.0, 456.0, 450.0, 404.0, 393.0, 444.0, 387.0, 404.0, 453.0, 398.0, 441.0, 456.0, 447.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [163.0, 190.0, 164.0, 200.0, 145.0, 140.0, 201.0, 197.0, 199.0, 206.0, 191.0, 216.0, 189.0, 210.0, 201.0, 192.0, 197.0, 205.0, 240.0, 216.0, 201.0, 195.0, 201.0, 252.0, 199.0, 191.0, 148.0, 139.0, 222.0, 222.0, 164.0, 177.0, 156.0, 203.0, 179.0, 208.0, 209.0, 187.0, 233.0, 226.0, 135.0, 147.0, 238.0, 212.0, 211.0, 191.0, 200.0, 199.0, 180.0, 210.0, 210.0, 198.0, 201.0, 212.0, 192.0, 204.0, 199.0, 203.0, 229.0, 218.0, 140.0, 164.0, 230.0, 226.0, 170.0, 179.0, 142.0, 162.0, 140.0, 112.0, 213.0, 200.0, 169.0, 163.0, 213.0, 203.0, 179.0, 168.0, 205.0, 236.0, 190.0, 209.0, 156.0, 186.0, 200.0, 213.0, 166.0, 178.0, 179.0, 157.0, 221.0, 235.0, 235.0, 238.0, 190.0, 218.0, 238.0, 221.0, 217.0, 233.0, 225.0, 231.0, 228.0, 228.0, 209.0, 201.0, 225.0, 234.0, 179.0, 205.0, 201.0, 209.0, 183.0, 166.0, 169.0, 212.0, 211.0, 251.0, 199.0, 205.0, 203.0, 201.0, 208.0, 190.0, 185.0, 205.0, 190.0, 197.0, 209.0, 235.0, 199.0, 208.0, 236.0, 214.0, 233.0, 226.0, 202.0, 182.0, 206.0, 193.0, 153.0, 143.0, 214.0, 236.0, 217.0, 236.0, 232.0, 209.0, 192.0, 213.0, 142.0, 163.0, 186.0, 184.0, 205.0, 200.0, 228.0, 228.0, 210.0, 217.0, 190.0, 208.0, 138.0, 109.0, 208.0, 197.0, 236.0, 220.0, 190.0, 180.0, 155.0, 138.0, 187.0, 200.0, 198.0, 198.0, 220.0, 236.0, 228.0, 222.0, 180.0, 224.0, 188.0, 205.0, 225.0, 219.0, 202.0, 185.0, 204.0, 200.0, 226.0, 227.0, 210.0, 188.0, 216.0, 225.0, 226.0, 230.0, 227.0, 220.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6866173620774393, "mean_inference_ms": 1.2097806306130832, "mean_action_processing_ms": 0.13288691750121373, "mean_env_wait_ms": 0.8607359373881026, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 473.0, "episode_reward_min": 247.0, "episode_reward_mean": 398.43, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 109.0}, "policy_reward_max": {"ppo": 252.0}, "policy_reward_mean": {"ppo": 199.215}, "hist_stats": {"episode_reward": [353.0, 364.0, 285.0, 398.0, 405.0, 407.0, 399.0, 393.0, 402.0, 456.0, 396.0, 453.0, 390.0, 287.0, 444.0, 341.0, 359.0, 387.0, 396.0, 459.0, 282.0, 450.0, 402.0, 399.0, 390.0, 408.0, 413.0, 396.0, 402.0, 447.0, 304.0, 456.0, 349.0, 304.0, 252.0, 413.0, 332.0, 416.0, 347.0, 441.0, 399.0, 342.0, 413.0, 344.0, 336.0, 456.0, 473.0, 408.0, 459.0, 450.0, 456.0, 456.0, 410.0, 459.0, 384.0, 410.0, 349.0, 381.0, 462.0, 404.0, 404.0, 398.0, 390.0, 387.0, 444.0, 407.0, 450.0, 459.0, 384.0, 399.0, 296.0, 450.0, 453.0, 441.0, 405.0, 305.0, 370.0, 405.0, 456.0, 427.0, 398.0, 247.0, 405.0, 456.0, 370.0, 293.0, 387.0, 396.0, 456.0, 450.0, 404.0, 393.0, 444.0, 387.0, 404.0, 453.0, 398.0, 441.0, 456.0, 447.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [163.0, 190.0, 164.0, 200.0, 145.0, 140.0, 201.0, 197.0, 199.0, 206.0, 191.0, 216.0, 189.0, 210.0, 201.0, 192.0, 197.0, 205.0, 240.0, 216.0, 201.0, 195.0, 201.0, 252.0, 199.0, 191.0, 148.0, 139.0, 222.0, 222.0, 164.0, 177.0, 156.0, 203.0, 179.0, 208.0, 209.0, 187.0, 233.0, 226.0, 135.0, 147.0, 238.0, 212.0, 211.0, 191.0, 200.0, 199.0, 180.0, 210.0, 210.0, 198.0, 201.0, 212.0, 192.0, 204.0, 199.0, 203.0, 229.0, 218.0, 140.0, 164.0, 230.0, 226.0, 170.0, 179.0, 142.0, 162.0, 140.0, 112.0, 213.0, 200.0, 169.0, 163.0, 213.0, 203.0, 179.0, 168.0, 205.0, 236.0, 190.0, 209.0, 156.0, 186.0, 200.0, 213.0, 166.0, 178.0, 179.0, 157.0, 221.0, 235.0, 235.0, 238.0, 190.0, 218.0, 238.0, 221.0, 217.0, 233.0, 225.0, 231.0, 228.0, 228.0, 209.0, 201.0, 225.0, 234.0, 179.0, 205.0, 201.0, 209.0, 183.0, 166.0, 169.0, 212.0, 211.0, 251.0, 199.0, 205.0, 203.0, 201.0, 208.0, 190.0, 185.0, 205.0, 190.0, 197.0, 209.0, 235.0, 199.0, 208.0, 236.0, 214.0, 233.0, 226.0, 202.0, 182.0, 206.0, 193.0, 153.0, 143.0, 214.0, 236.0, 217.0, 236.0, 232.0, 209.0, 192.0, 213.0, 142.0, 163.0, 186.0, 184.0, 205.0, 200.0, 228.0, 228.0, 210.0, 217.0, 190.0, 208.0, 138.0, 109.0, 208.0, 197.0, 236.0, 220.0, 190.0, 180.0, 155.0, 138.0, 187.0, 200.0, 198.0, 198.0, 220.0, 236.0, 228.0, 222.0, 180.0, 224.0, 188.0, 205.0, 225.0, 219.0, 202.0, 185.0, 204.0, 200.0, 226.0, 227.0, 210.0, 188.0, 216.0, 225.0, 226.0, 230.0, 227.0, 220.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6866173620774393, "mean_inference_ms": 1.2097806306130832, "mean_action_processing_ms": 0.13288691750121373, "mean_env_wait_ms": 0.8607359373881026, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1945600, "num_agent_steps_trained": 1945600, "num_env_steps_sampled": 972800, "num_env_steps_trained": 972800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 972800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1945600, "timers": {"training_iteration_time_ms": 3608.38, "learn_time_ms": 1057.828, "learn_throughput": 12100.26, "synch_weights_time_ms": 13.809}, "counters": {"num_env_steps_sampled": 972800, "num_env_steps_trained": 972800, "num_agent_steps_sampled": 1945600, "num_agent_steps_trained": 1945600}, "done": false, "episodes_total": 2432, "training_iteration": 76, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-51", "timestamp": 1666580691, "time_this_iter_s": 3.582211494445801, "time_total_s": 284.8906366825104, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 284.8906366825104, "timesteps_since_restore": 0, "iterations_since_restore": 76, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.580000000000002, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 143.2, "sparse_reward_min": 80, "sparse_reward_max": 180, "shaped_reward_mean": 123.3, "shaped_reward_min": 87, "shaped_reward_max": 153, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.95, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.37, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 12.34, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 12.66, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.93, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.54, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.67, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.57, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.89, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.23, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.35, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 2.59, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 1.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.56, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.5, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.85, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.8, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.72, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.73, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 11.57, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.89, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.57, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.89, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.646977999612734e-24, "cur_lr": 0.0010000000474974513, "total_loss": 0.0009492296376265585, "policy_loss": 0.0007508254493586719, "vf_loss": 8.144251823425293, "vf_explained_var": 0.6694010496139526, "kl": 0.002012323122471571, "entropy": 1.2320451736450195, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 985600, "num_env_steps_trained": 985600, "num_agent_steps_sampled": 1971200, "num_agent_steps_trained": 1971200}, "sampler_results": {"episode_reward_max": 513.0, "episode_reward_min": 247.0, "episode_reward_mean": 409.7, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 109.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 204.85}, "custom_metrics": {"sparse_reward_mean": 143.2, "sparse_reward_min": 80, "sparse_reward_max": 180, "shaped_reward_mean": 123.3, "shaped_reward_min": 87, "shaped_reward_max": 153, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.95, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.37, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 12.34, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 12.66, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.93, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.54, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.67, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.57, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.89, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.23, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.35, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 2.59, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 1.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.56, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.5, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.85, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.8, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.72, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.73, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 11.57, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.89, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.57, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.89, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [349.0, 304.0, 252.0, 413.0, 332.0, 416.0, 347.0, 441.0, 399.0, 342.0, 413.0, 344.0, 336.0, 456.0, 473.0, 408.0, 459.0, 450.0, 456.0, 456.0, 410.0, 459.0, 384.0, 410.0, 349.0, 381.0, 462.0, 404.0, 404.0, 398.0, 390.0, 387.0, 444.0, 407.0, 450.0, 459.0, 384.0, 399.0, 296.0, 450.0, 453.0, 441.0, 405.0, 305.0, 370.0, 405.0, 456.0, 427.0, 398.0, 247.0, 405.0, 456.0, 370.0, 293.0, 387.0, 396.0, 456.0, 450.0, 404.0, 393.0, 444.0, 387.0, 404.0, 453.0, 398.0, 441.0, 456.0, 447.0, 444.0, 393.0, 365.0, 356.0, 450.0, 453.0, 453.0, 390.0, 513.0, 462.0, 330.0, 447.0, 416.0, 407.0, 307.0, 450.0, 456.0, 453.0, 399.0, 447.0, 396.0, 444.0, 450.0, 390.0, 447.0, 462.0, 465.0, 450.0, 405.0, 444.0, 453.0, 453.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [170.0, 179.0, 142.0, 162.0, 140.0, 112.0, 213.0, 200.0, 169.0, 163.0, 213.0, 203.0, 179.0, 168.0, 205.0, 236.0, 190.0, 209.0, 156.0, 186.0, 200.0, 213.0, 166.0, 178.0, 179.0, 157.0, 221.0, 235.0, 235.0, 238.0, 190.0, 218.0, 238.0, 221.0, 217.0, 233.0, 225.0, 231.0, 228.0, 228.0, 209.0, 201.0, 225.0, 234.0, 179.0, 205.0, 201.0, 209.0, 183.0, 166.0, 169.0, 212.0, 211.0, 251.0, 199.0, 205.0, 203.0, 201.0, 208.0, 190.0, 185.0, 205.0, 190.0, 197.0, 209.0, 235.0, 199.0, 208.0, 236.0, 214.0, 233.0, 226.0, 202.0, 182.0, 206.0, 193.0, 153.0, 143.0, 214.0, 236.0, 217.0, 236.0, 232.0, 209.0, 192.0, 213.0, 142.0, 163.0, 186.0, 184.0, 205.0, 200.0, 228.0, 228.0, 210.0, 217.0, 190.0, 208.0, 138.0, 109.0, 208.0, 197.0, 236.0, 220.0, 190.0, 180.0, 155.0, 138.0, 187.0, 200.0, 198.0, 198.0, 220.0, 236.0, 228.0, 222.0, 180.0, 224.0, 188.0, 205.0, 225.0, 219.0, 202.0, 185.0, 204.0, 200.0, 226.0, 227.0, 210.0, 188.0, 216.0, 225.0, 226.0, 230.0, 227.0, 220.0, 219.0, 225.0, 213.0, 180.0, 175.0, 190.0, 175.0, 181.0, 213.0, 237.0, 225.0, 228.0, 227.0, 226.0, 184.0, 206.0, 247.0, 266.0, 237.0, 225.0, 172.0, 158.0, 221.0, 226.0, 209.0, 207.0, 200.0, 207.0, 162.0, 145.0, 233.0, 217.0, 218.0, 238.0, 233.0, 220.0, 194.0, 205.0, 230.0, 217.0, 215.0, 181.0, 214.0, 230.0, 219.0, 231.0, 197.0, 193.0, 225.0, 222.0, 238.0, 224.0, 235.0, 230.0, 234.0, 216.0, 199.0, 206.0, 223.0, 221.0, 239.0, 214.0, 230.0, 223.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6865708178972108, "mean_inference_ms": 1.2096254806364681, "mean_action_processing_ms": 0.13287110233558327, "mean_env_wait_ms": 0.8600840821401189, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 513.0, "episode_reward_min": 247.0, "episode_reward_mean": 409.7, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 109.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 204.85}, "hist_stats": {"episode_reward": [349.0, 304.0, 252.0, 413.0, 332.0, 416.0, 347.0, 441.0, 399.0, 342.0, 413.0, 344.0, 336.0, 456.0, 473.0, 408.0, 459.0, 450.0, 456.0, 456.0, 410.0, 459.0, 384.0, 410.0, 349.0, 381.0, 462.0, 404.0, 404.0, 398.0, 390.0, 387.0, 444.0, 407.0, 450.0, 459.0, 384.0, 399.0, 296.0, 450.0, 453.0, 441.0, 405.0, 305.0, 370.0, 405.0, 456.0, 427.0, 398.0, 247.0, 405.0, 456.0, 370.0, 293.0, 387.0, 396.0, 456.0, 450.0, 404.0, 393.0, 444.0, 387.0, 404.0, 453.0, 398.0, 441.0, 456.0, 447.0, 444.0, 393.0, 365.0, 356.0, 450.0, 453.0, 453.0, 390.0, 513.0, 462.0, 330.0, 447.0, 416.0, 407.0, 307.0, 450.0, 456.0, 453.0, 399.0, 447.0, 396.0, 444.0, 450.0, 390.0, 447.0, 462.0, 465.0, 450.0, 405.0, 444.0, 453.0, 453.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [170.0, 179.0, 142.0, 162.0, 140.0, 112.0, 213.0, 200.0, 169.0, 163.0, 213.0, 203.0, 179.0, 168.0, 205.0, 236.0, 190.0, 209.0, 156.0, 186.0, 200.0, 213.0, 166.0, 178.0, 179.0, 157.0, 221.0, 235.0, 235.0, 238.0, 190.0, 218.0, 238.0, 221.0, 217.0, 233.0, 225.0, 231.0, 228.0, 228.0, 209.0, 201.0, 225.0, 234.0, 179.0, 205.0, 201.0, 209.0, 183.0, 166.0, 169.0, 212.0, 211.0, 251.0, 199.0, 205.0, 203.0, 201.0, 208.0, 190.0, 185.0, 205.0, 190.0, 197.0, 209.0, 235.0, 199.0, 208.0, 236.0, 214.0, 233.0, 226.0, 202.0, 182.0, 206.0, 193.0, 153.0, 143.0, 214.0, 236.0, 217.0, 236.0, 232.0, 209.0, 192.0, 213.0, 142.0, 163.0, 186.0, 184.0, 205.0, 200.0, 228.0, 228.0, 210.0, 217.0, 190.0, 208.0, 138.0, 109.0, 208.0, 197.0, 236.0, 220.0, 190.0, 180.0, 155.0, 138.0, 187.0, 200.0, 198.0, 198.0, 220.0, 236.0, 228.0, 222.0, 180.0, 224.0, 188.0, 205.0, 225.0, 219.0, 202.0, 185.0, 204.0, 200.0, 226.0, 227.0, 210.0, 188.0, 216.0, 225.0, 226.0, 230.0, 227.0, 220.0, 219.0, 225.0, 213.0, 180.0, 175.0, 190.0, 175.0, 181.0, 213.0, 237.0, 225.0, 228.0, 227.0, 226.0, 184.0, 206.0, 247.0, 266.0, 237.0, 225.0, 172.0, 158.0, 221.0, 226.0, 209.0, 207.0, 200.0, 207.0, 162.0, 145.0, 233.0, 217.0, 218.0, 238.0, 233.0, 220.0, 194.0, 205.0, 230.0, 217.0, 215.0, 181.0, 214.0, 230.0, 219.0, 231.0, 197.0, 193.0, 225.0, 222.0, 238.0, 224.0, 235.0, 230.0, 234.0, 216.0, 199.0, 206.0, 223.0, 221.0, 239.0, 214.0, 230.0, 223.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6865708178972108, "mean_inference_ms": 1.2096254806364681, "mean_action_processing_ms": 0.13287110233558327, "mean_env_wait_ms": 0.8600840821401189, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1971200, "num_agent_steps_trained": 1971200, "num_env_steps_sampled": 985600, "num_env_steps_trained": 985600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 985600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1971200, "timers": {"training_iteration_time_ms": 3608.625, "learn_time_ms": 1058.889, "learn_throughput": 12088.147, "synch_weights_time_ms": 13.773}, "counters": {"num_env_steps_sampled": 985600, "num_env_steps_trained": 985600, "num_agent_steps_sampled": 1971200, "num_agent_steps_trained": 1971200}, "done": false, "episodes_total": 2464, "training_iteration": 77, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-55", "timestamp": 1666580695, "time_this_iter_s": 3.6801352500915527, "time_total_s": 288.57077193260193, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 288.57077193260193, "timesteps_since_restore": 0, "iterations_since_restore": 77, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.916666666666668, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 144.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 123.12, "shaped_reward_min": 60, "shaped_reward_max": 153, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.73, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.44, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.16, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 12.79, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.83, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.46, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.02, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.12, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.54, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.37, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.26, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.52, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.46, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.93, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.69, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.82, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.61, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.46, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.02, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.46, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.02, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.323488999806367e-24, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015237071784213185, "policy_loss": 0.0013373284600675106, "vf_loss": 8.073976516723633, "vf_explained_var": 0.6980078220367432, "kl": 0.0019213203340768814, "entropy": 1.2420413494110107, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 998400, "num_env_steps_trained": 998400, "num_agent_steps_sampled": 1996800, "num_agent_steps_trained": 1996800}, "sampler_results": {"episode_reward_max": 513.0, "episode_reward_min": 180.0, "episode_reward_mean": 412.32, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 80.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 206.16}, "custom_metrics": {"sparse_reward_mean": 144.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 123.12, "shaped_reward_min": 60, "shaped_reward_max": 153, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.73, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.44, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.16, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 12.79, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.83, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.46, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.02, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.12, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.54, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.37, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.26, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.52, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.46, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.93, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.69, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.82, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.61, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.46, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.02, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.46, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.02, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [444.0, 407.0, 450.0, 459.0, 384.0, 399.0, 296.0, 450.0, 453.0, 441.0, 405.0, 305.0, 370.0, 405.0, 456.0, 427.0, 398.0, 247.0, 405.0, 456.0, 370.0, 293.0, 387.0, 396.0, 456.0, 450.0, 404.0, 393.0, 444.0, 387.0, 404.0, 453.0, 398.0, 441.0, 456.0, 447.0, 444.0, 393.0, 365.0, 356.0, 450.0, 453.0, 453.0, 390.0, 513.0, 462.0, 330.0, 447.0, 416.0, 407.0, 307.0, 450.0, 456.0, 453.0, 399.0, 447.0, 396.0, 444.0, 450.0, 390.0, 447.0, 462.0, 465.0, 450.0, 405.0, 444.0, 453.0, 453.0, 464.0, 390.0, 424.0, 455.0, 453.0, 441.0, 444.0, 387.0, 450.0, 462.0, 447.0, 447.0, 290.0, 408.0, 453.0, 470.0, 470.0, 183.0, 339.0, 438.0, 450.0, 352.0, 344.0, 392.0, 450.0, 444.0, 180.0, 407.0, 401.0, 410.0, 351.0, 450.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [209.0, 235.0, 199.0, 208.0, 236.0, 214.0, 233.0, 226.0, 202.0, 182.0, 206.0, 193.0, 153.0, 143.0, 214.0, 236.0, 217.0, 236.0, 232.0, 209.0, 192.0, 213.0, 142.0, 163.0, 186.0, 184.0, 205.0, 200.0, 228.0, 228.0, 210.0, 217.0, 190.0, 208.0, 138.0, 109.0, 208.0, 197.0, 236.0, 220.0, 190.0, 180.0, 155.0, 138.0, 187.0, 200.0, 198.0, 198.0, 220.0, 236.0, 228.0, 222.0, 180.0, 224.0, 188.0, 205.0, 225.0, 219.0, 202.0, 185.0, 204.0, 200.0, 226.0, 227.0, 210.0, 188.0, 216.0, 225.0, 226.0, 230.0, 227.0, 220.0, 219.0, 225.0, 213.0, 180.0, 175.0, 190.0, 175.0, 181.0, 213.0, 237.0, 225.0, 228.0, 227.0, 226.0, 184.0, 206.0, 247.0, 266.0, 237.0, 225.0, 172.0, 158.0, 221.0, 226.0, 209.0, 207.0, 200.0, 207.0, 162.0, 145.0, 233.0, 217.0, 218.0, 238.0, 233.0, 220.0, 194.0, 205.0, 230.0, 217.0, 215.0, 181.0, 214.0, 230.0, 219.0, 231.0, 197.0, 193.0, 225.0, 222.0, 238.0, 224.0, 235.0, 230.0, 234.0, 216.0, 199.0, 206.0, 223.0, 221.0, 239.0, 214.0, 230.0, 223.0, 238.0, 226.0, 185.0, 205.0, 226.0, 198.0, 228.0, 227.0, 233.0, 220.0, 217.0, 224.0, 214.0, 230.0, 191.0, 196.0, 214.0, 236.0, 233.0, 229.0, 208.0, 239.0, 221.0, 226.0, 145.0, 145.0, 200.0, 208.0, 221.0, 232.0, 227.0, 243.0, 244.0, 226.0, 103.0, 80.0, 184.0, 155.0, 212.0, 226.0, 228.0, 222.0, 159.0, 193.0, 167.0, 177.0, 205.0, 187.0, 224.0, 226.0, 212.0, 232.0, 89.0, 91.0, 210.0, 197.0, 190.0, 211.0, 200.0, 210.0, 180.0, 171.0, 234.0, 216.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6866386191695207, "mean_inference_ms": 1.2096373080243905, "mean_action_processing_ms": 0.13288295376807122, "mean_env_wait_ms": 0.8596358143701005, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 513.0, "episode_reward_min": 180.0, "episode_reward_mean": 412.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 80.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 206.16}, "hist_stats": {"episode_reward": [444.0, 407.0, 450.0, 459.0, 384.0, 399.0, 296.0, 450.0, 453.0, 441.0, 405.0, 305.0, 370.0, 405.0, 456.0, 427.0, 398.0, 247.0, 405.0, 456.0, 370.0, 293.0, 387.0, 396.0, 456.0, 450.0, 404.0, 393.0, 444.0, 387.0, 404.0, 453.0, 398.0, 441.0, 456.0, 447.0, 444.0, 393.0, 365.0, 356.0, 450.0, 453.0, 453.0, 390.0, 513.0, 462.0, 330.0, 447.0, 416.0, 407.0, 307.0, 450.0, 456.0, 453.0, 399.0, 447.0, 396.0, 444.0, 450.0, 390.0, 447.0, 462.0, 465.0, 450.0, 405.0, 444.0, 453.0, 453.0, 464.0, 390.0, 424.0, 455.0, 453.0, 441.0, 444.0, 387.0, 450.0, 462.0, 447.0, 447.0, 290.0, 408.0, 453.0, 470.0, 470.0, 183.0, 339.0, 438.0, 450.0, 352.0, 344.0, 392.0, 450.0, 444.0, 180.0, 407.0, 401.0, 410.0, 351.0, 450.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [209.0, 235.0, 199.0, 208.0, 236.0, 214.0, 233.0, 226.0, 202.0, 182.0, 206.0, 193.0, 153.0, 143.0, 214.0, 236.0, 217.0, 236.0, 232.0, 209.0, 192.0, 213.0, 142.0, 163.0, 186.0, 184.0, 205.0, 200.0, 228.0, 228.0, 210.0, 217.0, 190.0, 208.0, 138.0, 109.0, 208.0, 197.0, 236.0, 220.0, 190.0, 180.0, 155.0, 138.0, 187.0, 200.0, 198.0, 198.0, 220.0, 236.0, 228.0, 222.0, 180.0, 224.0, 188.0, 205.0, 225.0, 219.0, 202.0, 185.0, 204.0, 200.0, 226.0, 227.0, 210.0, 188.0, 216.0, 225.0, 226.0, 230.0, 227.0, 220.0, 219.0, 225.0, 213.0, 180.0, 175.0, 190.0, 175.0, 181.0, 213.0, 237.0, 225.0, 228.0, 227.0, 226.0, 184.0, 206.0, 247.0, 266.0, 237.0, 225.0, 172.0, 158.0, 221.0, 226.0, 209.0, 207.0, 200.0, 207.0, 162.0, 145.0, 233.0, 217.0, 218.0, 238.0, 233.0, 220.0, 194.0, 205.0, 230.0, 217.0, 215.0, 181.0, 214.0, 230.0, 219.0, 231.0, 197.0, 193.0, 225.0, 222.0, 238.0, 224.0, 235.0, 230.0, 234.0, 216.0, 199.0, 206.0, 223.0, 221.0, 239.0, 214.0, 230.0, 223.0, 238.0, 226.0, 185.0, 205.0, 226.0, 198.0, 228.0, 227.0, 233.0, 220.0, 217.0, 224.0, 214.0, 230.0, 191.0, 196.0, 214.0, 236.0, 233.0, 229.0, 208.0, 239.0, 221.0, 226.0, 145.0, 145.0, 200.0, 208.0, 221.0, 232.0, 227.0, 243.0, 244.0, 226.0, 103.0, 80.0, 184.0, 155.0, 212.0, 226.0, 228.0, 222.0, 159.0, 193.0, 167.0, 177.0, 205.0, 187.0, 224.0, 226.0, 212.0, 232.0, 89.0, 91.0, 210.0, 197.0, 190.0, 211.0, 200.0, 210.0, 180.0, 171.0, 234.0, 216.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6866386191695207, "mean_inference_ms": 1.2096373080243905, "mean_action_processing_ms": 0.13288295376807122, "mean_env_wait_ms": 0.8596358143701005, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1996800, "num_agent_steps_trained": 1996800, "num_env_steps_sampled": 998400, "num_env_steps_trained": 998400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 998400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1996800, "timers": {"training_iteration_time_ms": 3616.948, "learn_time_ms": 1068.299, "learn_throughput": 11981.665, "synch_weights_time_ms": 13.675}, "counters": {"num_env_steps_sampled": 998400, "num_env_steps_trained": 998400, "num_agent_steps_sampled": 1996800, "num_agent_steps_trained": 1996800}, "done": false, "episodes_total": 2496, "training_iteration": 78, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-59", "timestamp": 1666580699, "time_this_iter_s": 3.7753701210021973, "time_total_s": 292.3461420536041, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 292.3461420536041, "timesteps_since_restore": 0, "iterations_since_restore": 78, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.220000000000002, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 146.0, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 123.91, "shaped_reward_min": 60, "shaped_reward_max": 153, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.87, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.49, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.31, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 12.84, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.91, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.96, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.55, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.54, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.48, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.16, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.54, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.49, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.46, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.52, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.97, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.72, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.86, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.63, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.48, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.16, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.48, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.16, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 6.617444999031835e-25, "cur_lr": 0.0010000000474974513, "total_loss": -0.001587323728017509, "policy_loss": -0.0017705481732264161, "vf_loss": 8.071381568908691, "vf_explained_var": 0.6893019676208496, "kl": 0.0017396470066159964, "entropy": 1.2478301525115967, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1011200, "num_env_steps_trained": 1011200, "num_agent_steps_sampled": 2022400, "num_agent_steps_trained": 2022400}, "sampler_results": {"episode_reward_max": 513.0, "episode_reward_min": 180.0, "episode_reward_mean": 415.91, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 80.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 207.955}, "custom_metrics": {"sparse_reward_mean": 146.0, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 123.91, "shaped_reward_min": 60, "shaped_reward_max": 153, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.87, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.49, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.31, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 12.84, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.91, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.96, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.55, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.54, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.48, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.16, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.54, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.49, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.46, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.52, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.97, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.72, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.86, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.63, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.48, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.16, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.48, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.16, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [398.0, 441.0, 456.0, 447.0, 444.0, 393.0, 365.0, 356.0, 450.0, 453.0, 453.0, 390.0, 513.0, 462.0, 330.0, 447.0, 416.0, 407.0, 307.0, 450.0, 456.0, 453.0, 399.0, 447.0, 396.0, 444.0, 450.0, 390.0, 447.0, 462.0, 465.0, 450.0, 405.0, 444.0, 453.0, 453.0, 464.0, 390.0, 424.0, 455.0, 453.0, 441.0, 444.0, 387.0, 450.0, 462.0, 447.0, 447.0, 290.0, 408.0, 453.0, 470.0, 470.0, 183.0, 339.0, 438.0, 450.0, 352.0, 344.0, 392.0, 450.0, 444.0, 180.0, 407.0, 401.0, 410.0, 351.0, 450.0, 456.0, 462.0, 447.0, 309.0, 462.0, 441.0, 453.0, 450.0, 293.0, 390.0, 294.0, 453.0, 361.0, 459.0, 456.0, 465.0, 390.0, 421.0, 396.0, 390.0, 504.0, 310.0, 404.0, 395.0, 444.0, 330.0, 453.0, 407.0, 444.0, 459.0, 459.0, 396.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [210.0, 188.0, 216.0, 225.0, 226.0, 230.0, 227.0, 220.0, 219.0, 225.0, 213.0, 180.0, 175.0, 190.0, 175.0, 181.0, 213.0, 237.0, 225.0, 228.0, 227.0, 226.0, 184.0, 206.0, 247.0, 266.0, 237.0, 225.0, 172.0, 158.0, 221.0, 226.0, 209.0, 207.0, 200.0, 207.0, 162.0, 145.0, 233.0, 217.0, 218.0, 238.0, 233.0, 220.0, 194.0, 205.0, 230.0, 217.0, 215.0, 181.0, 214.0, 230.0, 219.0, 231.0, 197.0, 193.0, 225.0, 222.0, 238.0, 224.0, 235.0, 230.0, 234.0, 216.0, 199.0, 206.0, 223.0, 221.0, 239.0, 214.0, 230.0, 223.0, 238.0, 226.0, 185.0, 205.0, 226.0, 198.0, 228.0, 227.0, 233.0, 220.0, 217.0, 224.0, 214.0, 230.0, 191.0, 196.0, 214.0, 236.0, 233.0, 229.0, 208.0, 239.0, 221.0, 226.0, 145.0, 145.0, 200.0, 208.0, 221.0, 232.0, 227.0, 243.0, 244.0, 226.0, 103.0, 80.0, 184.0, 155.0, 212.0, 226.0, 228.0, 222.0, 159.0, 193.0, 167.0, 177.0, 205.0, 187.0, 224.0, 226.0, 212.0, 232.0, 89.0, 91.0, 210.0, 197.0, 190.0, 211.0, 200.0, 210.0, 180.0, 171.0, 234.0, 216.0, 238.0, 218.0, 224.0, 238.0, 222.0, 225.0, 158.0, 151.0, 232.0, 230.0, 223.0, 218.0, 241.0, 212.0, 212.0, 238.0, 134.0, 159.0, 205.0, 185.0, 154.0, 140.0, 228.0, 225.0, 185.0, 176.0, 225.0, 234.0, 218.0, 238.0, 236.0, 229.0, 189.0, 201.0, 199.0, 222.0, 176.0, 220.0, 190.0, 200.0, 253.0, 251.0, 156.0, 154.0, 201.0, 203.0, 190.0, 205.0, 221.0, 223.0, 161.0, 169.0, 225.0, 228.0, 204.0, 203.0, 224.0, 220.0, 226.0, 233.0, 235.0, 224.0, 200.0, 196.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6867761483431252, "mean_inference_ms": 1.2096410126566395, "mean_action_processing_ms": 0.13289617100446416, "mean_env_wait_ms": 0.8591877879141236, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 513.0, "episode_reward_min": 180.0, "episode_reward_mean": 415.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 80.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 207.955}, "hist_stats": {"episode_reward": [398.0, 441.0, 456.0, 447.0, 444.0, 393.0, 365.0, 356.0, 450.0, 453.0, 453.0, 390.0, 513.0, 462.0, 330.0, 447.0, 416.0, 407.0, 307.0, 450.0, 456.0, 453.0, 399.0, 447.0, 396.0, 444.0, 450.0, 390.0, 447.0, 462.0, 465.0, 450.0, 405.0, 444.0, 453.0, 453.0, 464.0, 390.0, 424.0, 455.0, 453.0, 441.0, 444.0, 387.0, 450.0, 462.0, 447.0, 447.0, 290.0, 408.0, 453.0, 470.0, 470.0, 183.0, 339.0, 438.0, 450.0, 352.0, 344.0, 392.0, 450.0, 444.0, 180.0, 407.0, 401.0, 410.0, 351.0, 450.0, 456.0, 462.0, 447.0, 309.0, 462.0, 441.0, 453.0, 450.0, 293.0, 390.0, 294.0, 453.0, 361.0, 459.0, 456.0, 465.0, 390.0, 421.0, 396.0, 390.0, 504.0, 310.0, 404.0, 395.0, 444.0, 330.0, 453.0, 407.0, 444.0, 459.0, 459.0, 396.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [210.0, 188.0, 216.0, 225.0, 226.0, 230.0, 227.0, 220.0, 219.0, 225.0, 213.0, 180.0, 175.0, 190.0, 175.0, 181.0, 213.0, 237.0, 225.0, 228.0, 227.0, 226.0, 184.0, 206.0, 247.0, 266.0, 237.0, 225.0, 172.0, 158.0, 221.0, 226.0, 209.0, 207.0, 200.0, 207.0, 162.0, 145.0, 233.0, 217.0, 218.0, 238.0, 233.0, 220.0, 194.0, 205.0, 230.0, 217.0, 215.0, 181.0, 214.0, 230.0, 219.0, 231.0, 197.0, 193.0, 225.0, 222.0, 238.0, 224.0, 235.0, 230.0, 234.0, 216.0, 199.0, 206.0, 223.0, 221.0, 239.0, 214.0, 230.0, 223.0, 238.0, 226.0, 185.0, 205.0, 226.0, 198.0, 228.0, 227.0, 233.0, 220.0, 217.0, 224.0, 214.0, 230.0, 191.0, 196.0, 214.0, 236.0, 233.0, 229.0, 208.0, 239.0, 221.0, 226.0, 145.0, 145.0, 200.0, 208.0, 221.0, 232.0, 227.0, 243.0, 244.0, 226.0, 103.0, 80.0, 184.0, 155.0, 212.0, 226.0, 228.0, 222.0, 159.0, 193.0, 167.0, 177.0, 205.0, 187.0, 224.0, 226.0, 212.0, 232.0, 89.0, 91.0, 210.0, 197.0, 190.0, 211.0, 200.0, 210.0, 180.0, 171.0, 234.0, 216.0, 238.0, 218.0, 224.0, 238.0, 222.0, 225.0, 158.0, 151.0, 232.0, 230.0, 223.0, 218.0, 241.0, 212.0, 212.0, 238.0, 134.0, 159.0, 205.0, 185.0, 154.0, 140.0, 228.0, 225.0, 185.0, 176.0, 225.0, 234.0, 218.0, 238.0, 236.0, 229.0, 189.0, 201.0, 199.0, 222.0, 176.0, 220.0, 190.0, 200.0, 253.0, 251.0, 156.0, 154.0, 201.0, 203.0, 190.0, 205.0, 221.0, 223.0, 161.0, 169.0, 225.0, 228.0, 204.0, 203.0, 224.0, 220.0, 226.0, 233.0, 235.0, 224.0, 200.0, 196.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6867761483431252, "mean_inference_ms": 1.2096410126566395, "mean_action_processing_ms": 0.13289617100446416, "mean_env_wait_ms": 0.8591877879141236, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2022400, "num_agent_steps_trained": 2022400, "num_env_steps_sampled": 1011200, "num_env_steps_trained": 1011200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1011200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2022400, "timers": {"training_iteration_time_ms": 3596.697, "learn_time_ms": 1053.01, "learn_throughput": 12155.626, "synch_weights_time_ms": 13.294}, "counters": {"num_env_steps_sampled": 1011200, "num_env_steps_trained": 1011200, "num_agent_steps_sampled": 2022400, "num_agent_steps_trained": 2022400}, "done": false, "episodes_total": 2528, "training_iteration": 79, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-03", "timestamp": 1666580703, "time_this_iter_s": 3.525020122528076, "time_total_s": 295.8711621761322, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 295.8711621761322, "timesteps_since_restore": 0, "iterations_since_restore": 79, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.34, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 145.2, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 123.27, "shaped_reward_min": 60, "shaped_reward_max": 159, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.0, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 13.29, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.49, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 12.73, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.97, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.58, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.55, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 11.55, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.0, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.63, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 2.39, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.4, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.54, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.59, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.9, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.8, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.78, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.69, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.09, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.55, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.0, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.55, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.0, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.3087224995159173e-25, "cur_lr": 0.0010000000474974513, "total_loss": 3.7081073969602585e-05, "policy_loss": -0.000135608424898237, "vf_loss": 7.9923295974731445, "vf_explained_var": 0.6859964728355408, "kl": 0.0021027429029345512, "entropy": 1.2530903816223145, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1024000, "num_env_steps_trained": 1024000, "num_agent_steps_sampled": 2048000, "num_agent_steps_trained": 2048000}, "sampler_results": {"episode_reward_max": 513.0, "episode_reward_min": 180.0, "episode_reward_mean": 413.67, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 80.0}, "policy_reward_max": {"ppo": 257.0}, "policy_reward_mean": {"ppo": 206.835}, "custom_metrics": {"sparse_reward_mean": 145.2, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 123.27, "shaped_reward_min": 60, "shaped_reward_max": 159, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.0, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 13.29, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.49, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 12.73, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.97, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.58, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.55, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 11.55, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.0, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.63, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 2.39, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.4, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.54, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.59, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.9, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.8, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.78, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.69, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.09, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.55, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.0, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.55, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.0, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [405.0, 444.0, 453.0, 453.0, 464.0, 390.0, 424.0, 455.0, 453.0, 441.0, 444.0, 387.0, 450.0, 462.0, 447.0, 447.0, 290.0, 408.0, 453.0, 470.0, 470.0, 183.0, 339.0, 438.0, 450.0, 352.0, 344.0, 392.0, 450.0, 444.0, 180.0, 407.0, 401.0, 410.0, 351.0, 450.0, 456.0, 462.0, 447.0, 309.0, 462.0, 441.0, 453.0, 450.0, 293.0, 390.0, 294.0, 453.0, 361.0, 459.0, 456.0, 465.0, 390.0, 421.0, 396.0, 390.0, 504.0, 310.0, 404.0, 395.0, 444.0, 330.0, 453.0, 407.0, 444.0, 459.0, 459.0, 396.0, 438.0, 441.0, 455.0, 450.0, 433.0, 441.0, 359.0, 453.0, 410.0, 393.0, 459.0, 456.0, 301.0, 456.0, 447.0, 387.0, 513.0, 387.0, 453.0, 479.0, 407.0, 447.0, 456.0, 401.0, 465.0, 330.0, 398.0, 227.0, 444.0, 387.0, 396.0, 444.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [199.0, 206.0, 223.0, 221.0, 239.0, 214.0, 230.0, 223.0, 238.0, 226.0, 185.0, 205.0, 226.0, 198.0, 228.0, 227.0, 233.0, 220.0, 217.0, 224.0, 214.0, 230.0, 191.0, 196.0, 214.0, 236.0, 233.0, 229.0, 208.0, 239.0, 221.0, 226.0, 145.0, 145.0, 200.0, 208.0, 221.0, 232.0, 227.0, 243.0, 244.0, 226.0, 103.0, 80.0, 184.0, 155.0, 212.0, 226.0, 228.0, 222.0, 159.0, 193.0, 167.0, 177.0, 205.0, 187.0, 224.0, 226.0, 212.0, 232.0, 89.0, 91.0, 210.0, 197.0, 190.0, 211.0, 200.0, 210.0, 180.0, 171.0, 234.0, 216.0, 238.0, 218.0, 224.0, 238.0, 222.0, 225.0, 158.0, 151.0, 232.0, 230.0, 223.0, 218.0, 241.0, 212.0, 212.0, 238.0, 134.0, 159.0, 205.0, 185.0, 154.0, 140.0, 228.0, 225.0, 185.0, 176.0, 225.0, 234.0, 218.0, 238.0, 236.0, 229.0, 189.0, 201.0, 199.0, 222.0, 176.0, 220.0, 190.0, 200.0, 253.0, 251.0, 156.0, 154.0, 201.0, 203.0, 190.0, 205.0, 221.0, 223.0, 161.0, 169.0, 225.0, 228.0, 204.0, 203.0, 224.0, 220.0, 226.0, 233.0, 235.0, 224.0, 200.0, 196.0, 202.0, 236.0, 238.0, 203.0, 231.0, 224.0, 218.0, 232.0, 206.0, 227.0, 225.0, 216.0, 180.0, 179.0, 239.0, 214.0, 193.0, 217.0, 200.0, 193.0, 218.0, 241.0, 244.0, 212.0, 150.0, 151.0, 234.0, 222.0, 221.0, 226.0, 183.0, 204.0, 256.0, 257.0, 204.0, 183.0, 225.0, 228.0, 239.0, 240.0, 206.0, 201.0, 230.0, 217.0, 216.0, 240.0, 204.0, 197.0, 229.0, 236.0, 141.0, 189.0, 201.0, 197.0, 114.0, 113.0, 232.0, 212.0, 203.0, 184.0, 213.0, 183.0, 229.0, 215.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.686908689458971, "mean_inference_ms": 1.2096171268596805, "mean_action_processing_ms": 0.13290034895586486, "mean_env_wait_ms": 0.8587133886503696, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 513.0, "episode_reward_min": 180.0, "episode_reward_mean": 413.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 80.0}, "policy_reward_max": {"ppo": 257.0}, "policy_reward_mean": {"ppo": 206.835}, "hist_stats": {"episode_reward": [405.0, 444.0, 453.0, 453.0, 464.0, 390.0, 424.0, 455.0, 453.0, 441.0, 444.0, 387.0, 450.0, 462.0, 447.0, 447.0, 290.0, 408.0, 453.0, 470.0, 470.0, 183.0, 339.0, 438.0, 450.0, 352.0, 344.0, 392.0, 450.0, 444.0, 180.0, 407.0, 401.0, 410.0, 351.0, 450.0, 456.0, 462.0, 447.0, 309.0, 462.0, 441.0, 453.0, 450.0, 293.0, 390.0, 294.0, 453.0, 361.0, 459.0, 456.0, 465.0, 390.0, 421.0, 396.0, 390.0, 504.0, 310.0, 404.0, 395.0, 444.0, 330.0, 453.0, 407.0, 444.0, 459.0, 459.0, 396.0, 438.0, 441.0, 455.0, 450.0, 433.0, 441.0, 359.0, 453.0, 410.0, 393.0, 459.0, 456.0, 301.0, 456.0, 447.0, 387.0, 513.0, 387.0, 453.0, 479.0, 407.0, 447.0, 456.0, 401.0, 465.0, 330.0, 398.0, 227.0, 444.0, 387.0, 396.0, 444.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [199.0, 206.0, 223.0, 221.0, 239.0, 214.0, 230.0, 223.0, 238.0, 226.0, 185.0, 205.0, 226.0, 198.0, 228.0, 227.0, 233.0, 220.0, 217.0, 224.0, 214.0, 230.0, 191.0, 196.0, 214.0, 236.0, 233.0, 229.0, 208.0, 239.0, 221.0, 226.0, 145.0, 145.0, 200.0, 208.0, 221.0, 232.0, 227.0, 243.0, 244.0, 226.0, 103.0, 80.0, 184.0, 155.0, 212.0, 226.0, 228.0, 222.0, 159.0, 193.0, 167.0, 177.0, 205.0, 187.0, 224.0, 226.0, 212.0, 232.0, 89.0, 91.0, 210.0, 197.0, 190.0, 211.0, 200.0, 210.0, 180.0, 171.0, 234.0, 216.0, 238.0, 218.0, 224.0, 238.0, 222.0, 225.0, 158.0, 151.0, 232.0, 230.0, 223.0, 218.0, 241.0, 212.0, 212.0, 238.0, 134.0, 159.0, 205.0, 185.0, 154.0, 140.0, 228.0, 225.0, 185.0, 176.0, 225.0, 234.0, 218.0, 238.0, 236.0, 229.0, 189.0, 201.0, 199.0, 222.0, 176.0, 220.0, 190.0, 200.0, 253.0, 251.0, 156.0, 154.0, 201.0, 203.0, 190.0, 205.0, 221.0, 223.0, 161.0, 169.0, 225.0, 228.0, 204.0, 203.0, 224.0, 220.0, 226.0, 233.0, 235.0, 224.0, 200.0, 196.0, 202.0, 236.0, 238.0, 203.0, 231.0, 224.0, 218.0, 232.0, 206.0, 227.0, 225.0, 216.0, 180.0, 179.0, 239.0, 214.0, 193.0, 217.0, 200.0, 193.0, 218.0, 241.0, 244.0, 212.0, 150.0, 151.0, 234.0, 222.0, 221.0, 226.0, 183.0, 204.0, 256.0, 257.0, 204.0, 183.0, 225.0, 228.0, 239.0, 240.0, 206.0, 201.0, 230.0, 217.0, 216.0, 240.0, 204.0, 197.0, 229.0, 236.0, 141.0, 189.0, 201.0, 197.0, 114.0, 113.0, 232.0, 212.0, 203.0, 184.0, 213.0, 183.0, 229.0, 215.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.686908689458971, "mean_inference_ms": 1.2096171268596805, "mean_action_processing_ms": 0.13290034895586486, "mean_env_wait_ms": 0.8587133886503696, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2048000, "num_agent_steps_trained": 2048000, "num_env_steps_sampled": 1024000, "num_env_steps_trained": 1024000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1024000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2048000, "timers": {"training_iteration_time_ms": 3611.57, "learn_time_ms": 1067.578, "learn_throughput": 11989.755, "synch_weights_time_ms": 12.678}, "counters": {"num_env_steps_sampled": 1024000, "num_env_steps_trained": 1024000, "num_agent_steps_sampled": 2048000, "num_agent_steps_trained": 2048000}, "done": false, "episodes_total": 2560, "training_iteration": 80, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-06", "timestamp": 1666580706, "time_this_iter_s": 3.7415013313293457, "time_total_s": 299.61266350746155, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 299.61266350746155, "timesteps_since_restore": 0, "iterations_since_restore": 80, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.083333333333332, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 145.2, "sparse_reward_min": 80, "sparse_reward_max": 180, "shaped_reward_mean": 124.22, "shaped_reward_min": 67, "shaped_reward_max": 159, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.27, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 13.24, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.74, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 12.69, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.96, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.65, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.54, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 11.79, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.92, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.48, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.62, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.56, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.91, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.78, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.82, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.71, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.79, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.92, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.79, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.92, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.6543612497579586e-25, "cur_lr": 0.0010000000474974513, "total_loss": -0.0057379272766411304, "policy_loss": -0.005914425943046808, "vf_loss": 7.993932723999023, "vf_explained_var": 0.675482451915741, "kl": 0.001754501834511757, "entropy": 1.2457929849624634, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1036800, "num_env_steps_trained": 1036800, "num_agent_steps_sampled": 2073600, "num_agent_steps_trained": 2073600}, "sampler_results": {"episode_reward_max": 513.0, "episode_reward_min": 227.0, "episode_reward_mean": 414.62, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 113.0}, "policy_reward_max": {"ppo": 257.0}, "policy_reward_mean": {"ppo": 207.31}, "custom_metrics": {"sparse_reward_mean": 145.2, "sparse_reward_min": 80, "sparse_reward_max": 180, "shaped_reward_mean": 124.22, "shaped_reward_min": 67, "shaped_reward_max": 159, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.27, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 13.24, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.74, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 12.69, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.96, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.65, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.54, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 11.79, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.92, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.48, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.62, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.56, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.91, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.78, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.82, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.71, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.79, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.92, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.79, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.92, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [401.0, 410.0, 351.0, 450.0, 456.0, 462.0, 447.0, 309.0, 462.0, 441.0, 453.0, 450.0, 293.0, 390.0, 294.0, 453.0, 361.0, 459.0, 456.0, 465.0, 390.0, 421.0, 396.0, 390.0, 504.0, 310.0, 404.0, 395.0, 444.0, 330.0, 453.0, 407.0, 444.0, 459.0, 459.0, 396.0, 438.0, 441.0, 455.0, 450.0, 433.0, 441.0, 359.0, 453.0, 410.0, 393.0, 459.0, 456.0, 301.0, 456.0, 447.0, 387.0, 513.0, 387.0, 453.0, 479.0, 407.0, 447.0, 456.0, 401.0, 465.0, 330.0, 398.0, 227.0, 444.0, 387.0, 396.0, 444.0, 416.0, 402.0, 462.0, 393.0, 456.0, 456.0, 401.0, 450.0, 441.0, 459.0, 319.0, 344.0, 447.0, 410.0, 453.0, 447.0, 447.0, 455.0, 393.0, 453.0, 453.0, 396.0, 470.0, 345.0, 307.0, 396.0, 307.0, 288.0, 444.0, 416.0, 459.0, 399.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [190.0, 211.0, 200.0, 210.0, 180.0, 171.0, 234.0, 216.0, 238.0, 218.0, 224.0, 238.0, 222.0, 225.0, 158.0, 151.0, 232.0, 230.0, 223.0, 218.0, 241.0, 212.0, 212.0, 238.0, 134.0, 159.0, 205.0, 185.0, 154.0, 140.0, 228.0, 225.0, 185.0, 176.0, 225.0, 234.0, 218.0, 238.0, 236.0, 229.0, 189.0, 201.0, 199.0, 222.0, 176.0, 220.0, 190.0, 200.0, 253.0, 251.0, 156.0, 154.0, 201.0, 203.0, 190.0, 205.0, 221.0, 223.0, 161.0, 169.0, 225.0, 228.0, 204.0, 203.0, 224.0, 220.0, 226.0, 233.0, 235.0, 224.0, 200.0, 196.0, 202.0, 236.0, 238.0, 203.0, 231.0, 224.0, 218.0, 232.0, 206.0, 227.0, 225.0, 216.0, 180.0, 179.0, 239.0, 214.0, 193.0, 217.0, 200.0, 193.0, 218.0, 241.0, 244.0, 212.0, 150.0, 151.0, 234.0, 222.0, 221.0, 226.0, 183.0, 204.0, 256.0, 257.0, 204.0, 183.0, 225.0, 228.0, 239.0, 240.0, 206.0, 201.0, 230.0, 217.0, 216.0, 240.0, 204.0, 197.0, 229.0, 236.0, 141.0, 189.0, 201.0, 197.0, 114.0, 113.0, 232.0, 212.0, 203.0, 184.0, 213.0, 183.0, 229.0, 215.0, 213.0, 203.0, 207.0, 195.0, 234.0, 228.0, 202.0, 191.0, 219.0, 237.0, 231.0, 225.0, 198.0, 203.0, 226.0, 224.0, 219.0, 222.0, 232.0, 227.0, 154.0, 165.0, 167.0, 177.0, 205.0, 242.0, 206.0, 204.0, 224.0, 229.0, 238.0, 209.0, 225.0, 222.0, 242.0, 213.0, 208.0, 185.0, 228.0, 225.0, 234.0, 219.0, 197.0, 199.0, 240.0, 230.0, 184.0, 161.0, 152.0, 155.0, 203.0, 193.0, 157.0, 150.0, 147.0, 141.0, 216.0, 228.0, 217.0, 199.0, 214.0, 245.0, 190.0, 209.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868749087582313, "mean_inference_ms": 1.2094324542263242, "mean_action_processing_ms": 0.1328880160108227, "mean_env_wait_ms": 0.8581099319466553, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 513.0, "episode_reward_min": 227.0, "episode_reward_mean": 414.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 113.0}, "policy_reward_max": {"ppo": 257.0}, "policy_reward_mean": {"ppo": 207.31}, "hist_stats": {"episode_reward": [401.0, 410.0, 351.0, 450.0, 456.0, 462.0, 447.0, 309.0, 462.0, 441.0, 453.0, 450.0, 293.0, 390.0, 294.0, 453.0, 361.0, 459.0, 456.0, 465.0, 390.0, 421.0, 396.0, 390.0, 504.0, 310.0, 404.0, 395.0, 444.0, 330.0, 453.0, 407.0, 444.0, 459.0, 459.0, 396.0, 438.0, 441.0, 455.0, 450.0, 433.0, 441.0, 359.0, 453.0, 410.0, 393.0, 459.0, 456.0, 301.0, 456.0, 447.0, 387.0, 513.0, 387.0, 453.0, 479.0, 407.0, 447.0, 456.0, 401.0, 465.0, 330.0, 398.0, 227.0, 444.0, 387.0, 396.0, 444.0, 416.0, 402.0, 462.0, 393.0, 456.0, 456.0, 401.0, 450.0, 441.0, 459.0, 319.0, 344.0, 447.0, 410.0, 453.0, 447.0, 447.0, 455.0, 393.0, 453.0, 453.0, 396.0, 470.0, 345.0, 307.0, 396.0, 307.0, 288.0, 444.0, 416.0, 459.0, 399.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [190.0, 211.0, 200.0, 210.0, 180.0, 171.0, 234.0, 216.0, 238.0, 218.0, 224.0, 238.0, 222.0, 225.0, 158.0, 151.0, 232.0, 230.0, 223.0, 218.0, 241.0, 212.0, 212.0, 238.0, 134.0, 159.0, 205.0, 185.0, 154.0, 140.0, 228.0, 225.0, 185.0, 176.0, 225.0, 234.0, 218.0, 238.0, 236.0, 229.0, 189.0, 201.0, 199.0, 222.0, 176.0, 220.0, 190.0, 200.0, 253.0, 251.0, 156.0, 154.0, 201.0, 203.0, 190.0, 205.0, 221.0, 223.0, 161.0, 169.0, 225.0, 228.0, 204.0, 203.0, 224.0, 220.0, 226.0, 233.0, 235.0, 224.0, 200.0, 196.0, 202.0, 236.0, 238.0, 203.0, 231.0, 224.0, 218.0, 232.0, 206.0, 227.0, 225.0, 216.0, 180.0, 179.0, 239.0, 214.0, 193.0, 217.0, 200.0, 193.0, 218.0, 241.0, 244.0, 212.0, 150.0, 151.0, 234.0, 222.0, 221.0, 226.0, 183.0, 204.0, 256.0, 257.0, 204.0, 183.0, 225.0, 228.0, 239.0, 240.0, 206.0, 201.0, 230.0, 217.0, 216.0, 240.0, 204.0, 197.0, 229.0, 236.0, 141.0, 189.0, 201.0, 197.0, 114.0, 113.0, 232.0, 212.0, 203.0, 184.0, 213.0, 183.0, 229.0, 215.0, 213.0, 203.0, 207.0, 195.0, 234.0, 228.0, 202.0, 191.0, 219.0, 237.0, 231.0, 225.0, 198.0, 203.0, 226.0, 224.0, 219.0, 222.0, 232.0, 227.0, 154.0, 165.0, 167.0, 177.0, 205.0, 242.0, 206.0, 204.0, 224.0, 229.0, 238.0, 209.0, 225.0, 222.0, 242.0, 213.0, 208.0, 185.0, 228.0, 225.0, 234.0, 219.0, 197.0, 199.0, 240.0, 230.0, 184.0, 161.0, 152.0, 155.0, 203.0, 193.0, 157.0, 150.0, 147.0, 141.0, 216.0, 228.0, 217.0, 199.0, 214.0, 245.0, 190.0, 209.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868749087582313, "mean_inference_ms": 1.2094324542263242, "mean_action_processing_ms": 0.1328880160108227, "mean_env_wait_ms": 0.8581099319466553, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2073600, "num_agent_steps_trained": 2073600, "num_env_steps_sampled": 1036800, "num_env_steps_trained": 1036800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1036800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2073600, "timers": {"training_iteration_time_ms": 3615.382, "learn_time_ms": 1075.141, "learn_throughput": 11905.41, "synch_weights_time_ms": 12.896}, "counters": {"num_env_steps_sampled": 1036800, "num_env_steps_trained": 1036800, "num_agent_steps_sampled": 2073600, "num_agent_steps_trained": 2073600}, "done": false, "episodes_total": 2592, "training_iteration": 81, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-10", "timestamp": 1666580710, "time_this_iter_s": 3.6494596004486084, "time_total_s": 303.26212310791016, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 303.26212310791016, "timesteps_since_restore": 0, "iterations_since_restore": 81, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.1, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 143.4, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 124.12, "shaped_reward_min": 67, "shaped_reward_max": 159, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.48, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 13.08, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.84, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 12.46, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 0.69, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.64, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 11.99, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.59, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.54, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.47, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.75, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.73, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.62, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.97, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.83, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 3.79, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.1, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.13, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 11.99, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.59, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.99, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.59, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 8.271806248789793e-26, "cur_lr": 0.0010000000474974513, "total_loss": -0.004569070879369974, "policy_loss": -0.00474266242235899, "vf_loss": 8.008241653442383, "vf_explained_var": 0.7025030851364136, "kl": 0.0017891178140416741, "entropy": 1.2544690370559692, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1049600, "num_env_steps_trained": 1049600, "num_agent_steps_sampled": 2099200, "num_agent_steps_trained": 2099200}, "sampler_results": {"episode_reward_max": 516.0, "episode_reward_min": 164.0, "episode_reward_mean": 410.92, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 81.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 205.46}, "custom_metrics": {"sparse_reward_mean": 143.4, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 124.12, "shaped_reward_min": 67, "shaped_reward_max": 159, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.48, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 13.08, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.84, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 12.46, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 0.69, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.64, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 11.99, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.59, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.54, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.47, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.75, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.73, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.62, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.97, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.83, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 3.79, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.1, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.13, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 11.99, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.59, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.99, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.59, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [444.0, 459.0, 459.0, 396.0, 438.0, 441.0, 455.0, 450.0, 433.0, 441.0, 359.0, 453.0, 410.0, 393.0, 459.0, 456.0, 301.0, 456.0, 447.0, 387.0, 513.0, 387.0, 453.0, 479.0, 407.0, 447.0, 456.0, 401.0, 465.0, 330.0, 398.0, 227.0, 444.0, 387.0, 396.0, 444.0, 416.0, 402.0, 462.0, 393.0, 456.0, 456.0, 401.0, 450.0, 441.0, 459.0, 319.0, 344.0, 447.0, 410.0, 453.0, 447.0, 447.0, 455.0, 393.0, 453.0, 453.0, 396.0, 470.0, 345.0, 307.0, 396.0, 307.0, 288.0, 444.0, 416.0, 459.0, 399.0, 473.0, 467.0, 458.0, 306.0, 164.0, 444.0, 459.0, 444.0, 358.0, 413.0, 402.0, 405.0, 393.0, 516.0, 399.0, 410.0, 453.0, 401.0, 410.0, 361.0, 244.0, 441.0, 468.0, 462.0, 344.0, 345.0, 285.0, 398.0, 462.0, 347.0, 424.0, 381.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [224.0, 220.0, 226.0, 233.0, 235.0, 224.0, 200.0, 196.0, 202.0, 236.0, 238.0, 203.0, 231.0, 224.0, 218.0, 232.0, 206.0, 227.0, 225.0, 216.0, 180.0, 179.0, 239.0, 214.0, 193.0, 217.0, 200.0, 193.0, 218.0, 241.0, 244.0, 212.0, 150.0, 151.0, 234.0, 222.0, 221.0, 226.0, 183.0, 204.0, 256.0, 257.0, 204.0, 183.0, 225.0, 228.0, 239.0, 240.0, 206.0, 201.0, 230.0, 217.0, 216.0, 240.0, 204.0, 197.0, 229.0, 236.0, 141.0, 189.0, 201.0, 197.0, 114.0, 113.0, 232.0, 212.0, 203.0, 184.0, 213.0, 183.0, 229.0, 215.0, 213.0, 203.0, 207.0, 195.0, 234.0, 228.0, 202.0, 191.0, 219.0, 237.0, 231.0, 225.0, 198.0, 203.0, 226.0, 224.0, 219.0, 222.0, 232.0, 227.0, 154.0, 165.0, 167.0, 177.0, 205.0, 242.0, 206.0, 204.0, 224.0, 229.0, 238.0, 209.0, 225.0, 222.0, 242.0, 213.0, 208.0, 185.0, 228.0, 225.0, 234.0, 219.0, 197.0, 199.0, 240.0, 230.0, 184.0, 161.0, 152.0, 155.0, 203.0, 193.0, 157.0, 150.0, 147.0, 141.0, 216.0, 228.0, 217.0, 199.0, 214.0, 245.0, 190.0, 209.0, 236.0, 237.0, 235.0, 232.0, 229.0, 229.0, 152.0, 154.0, 83.0, 81.0, 235.0, 209.0, 215.0, 244.0, 224.0, 220.0, 170.0, 188.0, 199.0, 214.0, 219.0, 183.0, 211.0, 194.0, 189.0, 204.0, 266.0, 250.0, 219.0, 180.0, 197.0, 213.0, 219.0, 234.0, 200.0, 201.0, 203.0, 207.0, 179.0, 182.0, 125.0, 119.0, 219.0, 222.0, 228.0, 240.0, 230.0, 232.0, 184.0, 160.0, 164.0, 181.0, 156.0, 129.0, 214.0, 184.0, 246.0, 216.0, 168.0, 179.0, 213.0, 211.0, 194.0, 187.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868536372512659, "mean_inference_ms": 1.2092900191294707, "mean_action_processing_ms": 0.13288051052549346, "mean_env_wait_ms": 0.8576067512026819, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 516.0, "episode_reward_min": 164.0, "episode_reward_mean": 410.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 81.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 205.46}, "hist_stats": {"episode_reward": [444.0, 459.0, 459.0, 396.0, 438.0, 441.0, 455.0, 450.0, 433.0, 441.0, 359.0, 453.0, 410.0, 393.0, 459.0, 456.0, 301.0, 456.0, 447.0, 387.0, 513.0, 387.0, 453.0, 479.0, 407.0, 447.0, 456.0, 401.0, 465.0, 330.0, 398.0, 227.0, 444.0, 387.0, 396.0, 444.0, 416.0, 402.0, 462.0, 393.0, 456.0, 456.0, 401.0, 450.0, 441.0, 459.0, 319.0, 344.0, 447.0, 410.0, 453.0, 447.0, 447.0, 455.0, 393.0, 453.0, 453.0, 396.0, 470.0, 345.0, 307.0, 396.0, 307.0, 288.0, 444.0, 416.0, 459.0, 399.0, 473.0, 467.0, 458.0, 306.0, 164.0, 444.0, 459.0, 444.0, 358.0, 413.0, 402.0, 405.0, 393.0, 516.0, 399.0, 410.0, 453.0, 401.0, 410.0, 361.0, 244.0, 441.0, 468.0, 462.0, 344.0, 345.0, 285.0, 398.0, 462.0, 347.0, 424.0, 381.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [224.0, 220.0, 226.0, 233.0, 235.0, 224.0, 200.0, 196.0, 202.0, 236.0, 238.0, 203.0, 231.0, 224.0, 218.0, 232.0, 206.0, 227.0, 225.0, 216.0, 180.0, 179.0, 239.0, 214.0, 193.0, 217.0, 200.0, 193.0, 218.0, 241.0, 244.0, 212.0, 150.0, 151.0, 234.0, 222.0, 221.0, 226.0, 183.0, 204.0, 256.0, 257.0, 204.0, 183.0, 225.0, 228.0, 239.0, 240.0, 206.0, 201.0, 230.0, 217.0, 216.0, 240.0, 204.0, 197.0, 229.0, 236.0, 141.0, 189.0, 201.0, 197.0, 114.0, 113.0, 232.0, 212.0, 203.0, 184.0, 213.0, 183.0, 229.0, 215.0, 213.0, 203.0, 207.0, 195.0, 234.0, 228.0, 202.0, 191.0, 219.0, 237.0, 231.0, 225.0, 198.0, 203.0, 226.0, 224.0, 219.0, 222.0, 232.0, 227.0, 154.0, 165.0, 167.0, 177.0, 205.0, 242.0, 206.0, 204.0, 224.0, 229.0, 238.0, 209.0, 225.0, 222.0, 242.0, 213.0, 208.0, 185.0, 228.0, 225.0, 234.0, 219.0, 197.0, 199.0, 240.0, 230.0, 184.0, 161.0, 152.0, 155.0, 203.0, 193.0, 157.0, 150.0, 147.0, 141.0, 216.0, 228.0, 217.0, 199.0, 214.0, 245.0, 190.0, 209.0, 236.0, 237.0, 235.0, 232.0, 229.0, 229.0, 152.0, 154.0, 83.0, 81.0, 235.0, 209.0, 215.0, 244.0, 224.0, 220.0, 170.0, 188.0, 199.0, 214.0, 219.0, 183.0, 211.0, 194.0, 189.0, 204.0, 266.0, 250.0, 219.0, 180.0, 197.0, 213.0, 219.0, 234.0, 200.0, 201.0, 203.0, 207.0, 179.0, 182.0, 125.0, 119.0, 219.0, 222.0, 228.0, 240.0, 230.0, 232.0, 184.0, 160.0, 164.0, 181.0, 156.0, 129.0, 214.0, 184.0, 246.0, 216.0, 168.0, 179.0, 213.0, 211.0, 194.0, 187.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868536372512659, "mean_inference_ms": 1.2092900191294707, "mean_action_processing_ms": 0.13288051052549346, "mean_env_wait_ms": 0.8576067512026819, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2099200, "num_agent_steps_trained": 2099200, "num_env_steps_sampled": 1049600, "num_env_steps_trained": 1049600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1049600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2099200, "timers": {"training_iteration_time_ms": 3617.381, "learn_time_ms": 1077.647, "learn_throughput": 11877.732, "synch_weights_time_ms": 12.896}, "counters": {"num_env_steps_sampled": 1049600, "num_env_steps_trained": 1049600, "num_agent_steps_sampled": 2099200, "num_agent_steps_trained": 2099200}, "done": false, "episodes_total": 2624, "training_iteration": 82, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-14", "timestamp": 1666580714, "time_this_iter_s": 3.685708999633789, "time_total_s": 306.94783210754395, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 306.94783210754395, "timesteps_since_restore": 0, "iterations_since_restore": 82, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.249999999999996, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 144.4, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 125.24, "shaped_reward_min": 84, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.64, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 13.09, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.93, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 12.47, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 0.7, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 12.13, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 11.6, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 2.58, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.52, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.71, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.81, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.56, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.94, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.95, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 3.75, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.76, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 12.13, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 11.6, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.13, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 11.6, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 4.1359031243948966e-26, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003301555407233536, "policy_loss": -0.000513147097080946, "vf_loss": 8.041152954101562, "vf_explained_var": 0.6765406131744385, "kl": 0.002183079021051526, "entropy": 1.2422490119934082, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1062400, "num_env_steps_trained": 1062400, "num_agent_steps_sampled": 2124800, "num_agent_steps_trained": 2124800}, "sampler_results": {"episode_reward_max": 516.0, "episode_reward_min": 164.0, "episode_reward_mean": 414.04, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 81.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 207.02}, "custom_metrics": {"sparse_reward_mean": 144.4, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 125.24, "shaped_reward_min": 84, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.64, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 13.09, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.93, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 12.47, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 0.7, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 12.13, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 11.6, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 2.58, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.52, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.71, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.81, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.56, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.94, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.95, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 3.75, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.76, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 12.13, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 11.6, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.13, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 11.6, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [444.0, 387.0, 396.0, 444.0, 416.0, 402.0, 462.0, 393.0, 456.0, 456.0, 401.0, 450.0, 441.0, 459.0, 319.0, 344.0, 447.0, 410.0, 453.0, 447.0, 447.0, 455.0, 393.0, 453.0, 453.0, 396.0, 470.0, 345.0, 307.0, 396.0, 307.0, 288.0, 444.0, 416.0, 459.0, 399.0, 473.0, 467.0, 458.0, 306.0, 164.0, 444.0, 459.0, 444.0, 358.0, 413.0, 402.0, 405.0, 393.0, 516.0, 399.0, 410.0, 453.0, 401.0, 410.0, 361.0, 244.0, 441.0, 468.0, 462.0, 344.0, 345.0, 285.0, 398.0, 462.0, 347.0, 424.0, 381.0, 453.0, 359.0, 413.0, 450.0, 462.0, 396.0, 447.0, 444.0, 465.0, 455.0, 452.0, 462.0, 390.0, 453.0, 344.0, 456.0, 450.0, 498.0, 421.0, 456.0, 390.0, 378.0, 444.0, 453.0, 464.0, 452.0, 402.0, 424.0, 404.0, 462.0, 412.0, 401.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [232.0, 212.0, 203.0, 184.0, 213.0, 183.0, 229.0, 215.0, 213.0, 203.0, 207.0, 195.0, 234.0, 228.0, 202.0, 191.0, 219.0, 237.0, 231.0, 225.0, 198.0, 203.0, 226.0, 224.0, 219.0, 222.0, 232.0, 227.0, 154.0, 165.0, 167.0, 177.0, 205.0, 242.0, 206.0, 204.0, 224.0, 229.0, 238.0, 209.0, 225.0, 222.0, 242.0, 213.0, 208.0, 185.0, 228.0, 225.0, 234.0, 219.0, 197.0, 199.0, 240.0, 230.0, 184.0, 161.0, 152.0, 155.0, 203.0, 193.0, 157.0, 150.0, 147.0, 141.0, 216.0, 228.0, 217.0, 199.0, 214.0, 245.0, 190.0, 209.0, 236.0, 237.0, 235.0, 232.0, 229.0, 229.0, 152.0, 154.0, 83.0, 81.0, 235.0, 209.0, 215.0, 244.0, 224.0, 220.0, 170.0, 188.0, 199.0, 214.0, 219.0, 183.0, 211.0, 194.0, 189.0, 204.0, 266.0, 250.0, 219.0, 180.0, 197.0, 213.0, 219.0, 234.0, 200.0, 201.0, 203.0, 207.0, 179.0, 182.0, 125.0, 119.0, 219.0, 222.0, 228.0, 240.0, 230.0, 232.0, 184.0, 160.0, 164.0, 181.0, 156.0, 129.0, 214.0, 184.0, 246.0, 216.0, 168.0, 179.0, 213.0, 211.0, 194.0, 187.0, 237.0, 216.0, 187.0, 172.0, 191.0, 222.0, 226.0, 224.0, 236.0, 226.0, 189.0, 207.0, 225.0, 222.0, 221.0, 223.0, 224.0, 241.0, 236.0, 219.0, 239.0, 213.0, 231.0, 231.0, 192.0, 198.0, 224.0, 229.0, 160.0, 184.0, 218.0, 238.0, 218.0, 232.0, 240.0, 258.0, 204.0, 217.0, 215.0, 241.0, 198.0, 192.0, 182.0, 196.0, 233.0, 211.0, 222.0, 231.0, 226.0, 238.0, 240.0, 212.0, 195.0, 207.0, 218.0, 206.0, 203.0, 201.0, 234.0, 228.0, 203.0, 209.0, 204.0, 197.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868376859123841, "mean_inference_ms": 1.2091793351063924, "mean_action_processing_ms": 0.1328811999340644, "mean_env_wait_ms": 0.8571544936742166, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 516.0, "episode_reward_min": 164.0, "episode_reward_mean": 414.04, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 81.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 207.02}, "hist_stats": {"episode_reward": [444.0, 387.0, 396.0, 444.0, 416.0, 402.0, 462.0, 393.0, 456.0, 456.0, 401.0, 450.0, 441.0, 459.0, 319.0, 344.0, 447.0, 410.0, 453.0, 447.0, 447.0, 455.0, 393.0, 453.0, 453.0, 396.0, 470.0, 345.0, 307.0, 396.0, 307.0, 288.0, 444.0, 416.0, 459.0, 399.0, 473.0, 467.0, 458.0, 306.0, 164.0, 444.0, 459.0, 444.0, 358.0, 413.0, 402.0, 405.0, 393.0, 516.0, 399.0, 410.0, 453.0, 401.0, 410.0, 361.0, 244.0, 441.0, 468.0, 462.0, 344.0, 345.0, 285.0, 398.0, 462.0, 347.0, 424.0, 381.0, 453.0, 359.0, 413.0, 450.0, 462.0, 396.0, 447.0, 444.0, 465.0, 455.0, 452.0, 462.0, 390.0, 453.0, 344.0, 456.0, 450.0, 498.0, 421.0, 456.0, 390.0, 378.0, 444.0, 453.0, 464.0, 452.0, 402.0, 424.0, 404.0, 462.0, 412.0, 401.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [232.0, 212.0, 203.0, 184.0, 213.0, 183.0, 229.0, 215.0, 213.0, 203.0, 207.0, 195.0, 234.0, 228.0, 202.0, 191.0, 219.0, 237.0, 231.0, 225.0, 198.0, 203.0, 226.0, 224.0, 219.0, 222.0, 232.0, 227.0, 154.0, 165.0, 167.0, 177.0, 205.0, 242.0, 206.0, 204.0, 224.0, 229.0, 238.0, 209.0, 225.0, 222.0, 242.0, 213.0, 208.0, 185.0, 228.0, 225.0, 234.0, 219.0, 197.0, 199.0, 240.0, 230.0, 184.0, 161.0, 152.0, 155.0, 203.0, 193.0, 157.0, 150.0, 147.0, 141.0, 216.0, 228.0, 217.0, 199.0, 214.0, 245.0, 190.0, 209.0, 236.0, 237.0, 235.0, 232.0, 229.0, 229.0, 152.0, 154.0, 83.0, 81.0, 235.0, 209.0, 215.0, 244.0, 224.0, 220.0, 170.0, 188.0, 199.0, 214.0, 219.0, 183.0, 211.0, 194.0, 189.0, 204.0, 266.0, 250.0, 219.0, 180.0, 197.0, 213.0, 219.0, 234.0, 200.0, 201.0, 203.0, 207.0, 179.0, 182.0, 125.0, 119.0, 219.0, 222.0, 228.0, 240.0, 230.0, 232.0, 184.0, 160.0, 164.0, 181.0, 156.0, 129.0, 214.0, 184.0, 246.0, 216.0, 168.0, 179.0, 213.0, 211.0, 194.0, 187.0, 237.0, 216.0, 187.0, 172.0, 191.0, 222.0, 226.0, 224.0, 236.0, 226.0, 189.0, 207.0, 225.0, 222.0, 221.0, 223.0, 224.0, 241.0, 236.0, 219.0, 239.0, 213.0, 231.0, 231.0, 192.0, 198.0, 224.0, 229.0, 160.0, 184.0, 218.0, 238.0, 218.0, 232.0, 240.0, 258.0, 204.0, 217.0, 215.0, 241.0, 198.0, 192.0, 182.0, 196.0, 233.0, 211.0, 222.0, 231.0, 226.0, 238.0, 240.0, 212.0, 195.0, 207.0, 218.0, 206.0, 203.0, 201.0, 234.0, 228.0, 203.0, 209.0, 204.0, 197.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868376859123841, "mean_inference_ms": 1.2091793351063924, "mean_action_processing_ms": 0.1328811999340644, "mean_env_wait_ms": 0.8571544936742166, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2124800, "num_agent_steps_trained": 2124800, "num_env_steps_sampled": 1062400, "num_env_steps_trained": 1062400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1062400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2124800, "timers": {"training_iteration_time_ms": 3579.484, "learn_time_ms": 1070.136, "learn_throughput": 11961.094, "synch_weights_time_ms": 12.536}, "counters": {"num_env_steps_sampled": 1062400, "num_env_steps_trained": 1062400, "num_agent_steps_sampled": 2124800, "num_agent_steps_trained": 2124800}, "done": false, "episodes_total": 2656, "training_iteration": 83, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-18", "timestamp": 1666580718, "time_this_iter_s": 3.6267499923706055, "time_total_s": 310.57458209991455, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 310.57458209991455, "timesteps_since_restore": 0, "iterations_since_restore": 83, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.639999999999997, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 145.0, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 126.3, "shaped_reward_min": 74, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.6, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 13.15, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.9, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 12.4, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.98, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 0.71, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.56, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 11.97, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.83, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.99, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 2.51, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.79, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.82, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.82, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.67, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.82, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.13, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 3.64, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.92, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.15, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 11.97, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.83, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.97, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.83, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.0679515621974483e-26, "cur_lr": 0.0010000000474974513, "total_loss": -0.00299975648522377, "policy_loss": -0.00317044323310256, "vf_loss": 7.9165191650390625, "vf_explained_var": 0.6919734477996826, "kl": 0.0018455530516803265, "entropy": 1.2419322729110718, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1075200, "num_env_steps_trained": 1075200, "num_agent_steps_sampled": 2150400, "num_agent_steps_trained": 2150400}, "sampler_results": {"episode_reward_max": 516.0, "episode_reward_min": 164.0, "episode_reward_mean": 416.3, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 81.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 208.15}, "custom_metrics": {"sparse_reward_mean": 145.0, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 126.3, "shaped_reward_min": 74, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.6, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 13.15, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.9, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 12.4, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.98, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 0.71, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.56, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 11.97, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.83, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.99, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 2.51, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.79, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.82, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.82, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.67, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.82, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.13, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 3.64, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.92, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.15, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 11.97, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.83, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.97, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.83, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [444.0, 416.0, 459.0, 399.0, 473.0, 467.0, 458.0, 306.0, 164.0, 444.0, 459.0, 444.0, 358.0, 413.0, 402.0, 405.0, 393.0, 516.0, 399.0, 410.0, 453.0, 401.0, 410.0, 361.0, 244.0, 441.0, 468.0, 462.0, 344.0, 345.0, 285.0, 398.0, 462.0, 347.0, 424.0, 381.0, 453.0, 359.0, 413.0, 450.0, 462.0, 396.0, 447.0, 444.0, 465.0, 455.0, 452.0, 462.0, 390.0, 453.0, 344.0, 456.0, 450.0, 498.0, 421.0, 456.0, 390.0, 378.0, 444.0, 453.0, 464.0, 452.0, 402.0, 424.0, 404.0, 462.0, 412.0, 401.0, 444.0, 465.0, 387.0, 465.0, 453.0, 390.0, 450.0, 341.0, 411.0, 453.0, 447.0, 462.0, 367.0, 335.0, 398.0, 441.0, 413.0, 456.0, 399.0, 462.0, 444.0, 194.0, 462.0, 410.0, 450.0, 462.0, 373.0, 416.0, 447.0, 441.0, 459.0, 366.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [216.0, 228.0, 217.0, 199.0, 214.0, 245.0, 190.0, 209.0, 236.0, 237.0, 235.0, 232.0, 229.0, 229.0, 152.0, 154.0, 83.0, 81.0, 235.0, 209.0, 215.0, 244.0, 224.0, 220.0, 170.0, 188.0, 199.0, 214.0, 219.0, 183.0, 211.0, 194.0, 189.0, 204.0, 266.0, 250.0, 219.0, 180.0, 197.0, 213.0, 219.0, 234.0, 200.0, 201.0, 203.0, 207.0, 179.0, 182.0, 125.0, 119.0, 219.0, 222.0, 228.0, 240.0, 230.0, 232.0, 184.0, 160.0, 164.0, 181.0, 156.0, 129.0, 214.0, 184.0, 246.0, 216.0, 168.0, 179.0, 213.0, 211.0, 194.0, 187.0, 237.0, 216.0, 187.0, 172.0, 191.0, 222.0, 226.0, 224.0, 236.0, 226.0, 189.0, 207.0, 225.0, 222.0, 221.0, 223.0, 224.0, 241.0, 236.0, 219.0, 239.0, 213.0, 231.0, 231.0, 192.0, 198.0, 224.0, 229.0, 160.0, 184.0, 218.0, 238.0, 218.0, 232.0, 240.0, 258.0, 204.0, 217.0, 215.0, 241.0, 198.0, 192.0, 182.0, 196.0, 233.0, 211.0, 222.0, 231.0, 226.0, 238.0, 240.0, 212.0, 195.0, 207.0, 218.0, 206.0, 203.0, 201.0, 234.0, 228.0, 203.0, 209.0, 204.0, 197.0, 216.0, 228.0, 236.0, 229.0, 194.0, 193.0, 226.0, 239.0, 233.0, 220.0, 181.0, 209.0, 209.0, 241.0, 172.0, 169.0, 215.0, 196.0, 214.0, 239.0, 219.0, 228.0, 217.0, 245.0, 181.0, 186.0, 179.0, 156.0, 201.0, 197.0, 223.0, 218.0, 203.0, 210.0, 220.0, 236.0, 195.0, 204.0, 226.0, 236.0, 229.0, 215.0, 90.0, 104.0, 223.0, 239.0, 205.0, 205.0, 222.0, 228.0, 236.0, 226.0, 190.0, 183.0, 203.0, 213.0, 220.0, 227.0, 219.0, 222.0, 223.0, 236.0, 180.0, 186.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.686855671363231, "mean_inference_ms": 1.2091449764918027, "mean_action_processing_ms": 0.13288788327384127, "mean_env_wait_ms": 0.8567981341073625, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 516.0, "episode_reward_min": 164.0, "episode_reward_mean": 416.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 81.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 208.15}, "hist_stats": {"episode_reward": [444.0, 416.0, 459.0, 399.0, 473.0, 467.0, 458.0, 306.0, 164.0, 444.0, 459.0, 444.0, 358.0, 413.0, 402.0, 405.0, 393.0, 516.0, 399.0, 410.0, 453.0, 401.0, 410.0, 361.0, 244.0, 441.0, 468.0, 462.0, 344.0, 345.0, 285.0, 398.0, 462.0, 347.0, 424.0, 381.0, 453.0, 359.0, 413.0, 450.0, 462.0, 396.0, 447.0, 444.0, 465.0, 455.0, 452.0, 462.0, 390.0, 453.0, 344.0, 456.0, 450.0, 498.0, 421.0, 456.0, 390.0, 378.0, 444.0, 453.0, 464.0, 452.0, 402.0, 424.0, 404.0, 462.0, 412.0, 401.0, 444.0, 465.0, 387.0, 465.0, 453.0, 390.0, 450.0, 341.0, 411.0, 453.0, 447.0, 462.0, 367.0, 335.0, 398.0, 441.0, 413.0, 456.0, 399.0, 462.0, 444.0, 194.0, 462.0, 410.0, 450.0, 462.0, 373.0, 416.0, 447.0, 441.0, 459.0, 366.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [216.0, 228.0, 217.0, 199.0, 214.0, 245.0, 190.0, 209.0, 236.0, 237.0, 235.0, 232.0, 229.0, 229.0, 152.0, 154.0, 83.0, 81.0, 235.0, 209.0, 215.0, 244.0, 224.0, 220.0, 170.0, 188.0, 199.0, 214.0, 219.0, 183.0, 211.0, 194.0, 189.0, 204.0, 266.0, 250.0, 219.0, 180.0, 197.0, 213.0, 219.0, 234.0, 200.0, 201.0, 203.0, 207.0, 179.0, 182.0, 125.0, 119.0, 219.0, 222.0, 228.0, 240.0, 230.0, 232.0, 184.0, 160.0, 164.0, 181.0, 156.0, 129.0, 214.0, 184.0, 246.0, 216.0, 168.0, 179.0, 213.0, 211.0, 194.0, 187.0, 237.0, 216.0, 187.0, 172.0, 191.0, 222.0, 226.0, 224.0, 236.0, 226.0, 189.0, 207.0, 225.0, 222.0, 221.0, 223.0, 224.0, 241.0, 236.0, 219.0, 239.0, 213.0, 231.0, 231.0, 192.0, 198.0, 224.0, 229.0, 160.0, 184.0, 218.0, 238.0, 218.0, 232.0, 240.0, 258.0, 204.0, 217.0, 215.0, 241.0, 198.0, 192.0, 182.0, 196.0, 233.0, 211.0, 222.0, 231.0, 226.0, 238.0, 240.0, 212.0, 195.0, 207.0, 218.0, 206.0, 203.0, 201.0, 234.0, 228.0, 203.0, 209.0, 204.0, 197.0, 216.0, 228.0, 236.0, 229.0, 194.0, 193.0, 226.0, 239.0, 233.0, 220.0, 181.0, 209.0, 209.0, 241.0, 172.0, 169.0, 215.0, 196.0, 214.0, 239.0, 219.0, 228.0, 217.0, 245.0, 181.0, 186.0, 179.0, 156.0, 201.0, 197.0, 223.0, 218.0, 203.0, 210.0, 220.0, 236.0, 195.0, 204.0, 226.0, 236.0, 229.0, 215.0, 90.0, 104.0, 223.0, 239.0, 205.0, 205.0, 222.0, 228.0, 236.0, 226.0, 190.0, 183.0, 203.0, 213.0, 220.0, 227.0, 219.0, 222.0, 223.0, 236.0, 180.0, 186.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.686855671363231, "mean_inference_ms": 1.2091449764918027, "mean_action_processing_ms": 0.13288788327384127, "mean_env_wait_ms": 0.8567981341073625, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2150400, "num_agent_steps_trained": 2150400, "num_env_steps_sampled": 1075200, "num_env_steps_trained": 1075200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1075200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2150400, "timers": {"training_iteration_time_ms": 3576.175, "learn_time_ms": 1064.297, "learn_throughput": 12026.715, "synch_weights_time_ms": 10.67}, "counters": {"num_env_steps_sampled": 1075200, "num_env_steps_trained": 1075200, "num_agent_steps_sampled": 2150400, "num_agent_steps_trained": 2150400}, "done": false, "episodes_total": 2688, "training_iteration": 84, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-22", "timestamp": 1666580722, "time_this_iter_s": 3.6167852878570557, "time_total_s": 314.1913673877716, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 314.1913673877716, "timesteps_since_restore": 0, "iterations_since_restore": 84, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.366666666666664, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 148.2, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 127.01, "shaped_reward_min": 68, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.34, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 13.71, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.65, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 12.98, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.0, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.68, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.56, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 11.72, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.37, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 2.6, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.72, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.59, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.89, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.96, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.82, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 3.86, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.72, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.37, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.72, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.37, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.0339757810987241e-26, "cur_lr": 0.0010000000474974513, "total_loss": -0.00338260130956769, "policy_loss": -0.003559306263923645, "vf_loss": 7.971312522888184, "vf_explained_var": 0.6739051342010498, "kl": 0.0015661935321986675, "entropy": 1.2408554553985596, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1088000, "num_env_steps_trained": 1088000, "num_agent_steps_sampled": 2176000, "num_agent_steps_trained": 2176000}, "sampler_results": {"episode_reward_max": 522.0, "episode_reward_min": 194.0, "episode_reward_mean": 423.41, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 211.705}, "custom_metrics": {"sparse_reward_mean": 148.2, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 127.01, "shaped_reward_min": 68, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.34, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 13.71, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.65, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 12.98, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.0, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.68, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.56, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 11.72, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.37, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 2.6, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.72, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.59, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.89, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.96, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.82, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 3.86, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.72, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.37, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.72, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.37, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [462.0, 347.0, 424.0, 381.0, 453.0, 359.0, 413.0, 450.0, 462.0, 396.0, 447.0, 444.0, 465.0, 455.0, 452.0, 462.0, 390.0, 453.0, 344.0, 456.0, 450.0, 498.0, 421.0, 456.0, 390.0, 378.0, 444.0, 453.0, 464.0, 452.0, 402.0, 424.0, 404.0, 462.0, 412.0, 401.0, 444.0, 465.0, 387.0, 465.0, 453.0, 390.0, 450.0, 341.0, 411.0, 453.0, 447.0, 462.0, 367.0, 335.0, 398.0, 441.0, 413.0, 456.0, 399.0, 462.0, 444.0, 194.0, 462.0, 410.0, 450.0, 462.0, 373.0, 416.0, 447.0, 441.0, 459.0, 366.0, 447.0, 513.0, 393.0, 402.0, 462.0, 393.0, 450.0, 404.0, 462.0, 441.0, 507.0, 453.0, 390.0, 522.0, 401.0, 456.0, 459.0, 339.0, 453.0, 384.0, 401.0, 465.0, 247.0, 314.0, 513.0, 387.0, 404.0, 458.0, 228.0, 453.0, 438.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [246.0, 216.0, 168.0, 179.0, 213.0, 211.0, 194.0, 187.0, 237.0, 216.0, 187.0, 172.0, 191.0, 222.0, 226.0, 224.0, 236.0, 226.0, 189.0, 207.0, 225.0, 222.0, 221.0, 223.0, 224.0, 241.0, 236.0, 219.0, 239.0, 213.0, 231.0, 231.0, 192.0, 198.0, 224.0, 229.0, 160.0, 184.0, 218.0, 238.0, 218.0, 232.0, 240.0, 258.0, 204.0, 217.0, 215.0, 241.0, 198.0, 192.0, 182.0, 196.0, 233.0, 211.0, 222.0, 231.0, 226.0, 238.0, 240.0, 212.0, 195.0, 207.0, 218.0, 206.0, 203.0, 201.0, 234.0, 228.0, 203.0, 209.0, 204.0, 197.0, 216.0, 228.0, 236.0, 229.0, 194.0, 193.0, 226.0, 239.0, 233.0, 220.0, 181.0, 209.0, 209.0, 241.0, 172.0, 169.0, 215.0, 196.0, 214.0, 239.0, 219.0, 228.0, 217.0, 245.0, 181.0, 186.0, 179.0, 156.0, 201.0, 197.0, 223.0, 218.0, 203.0, 210.0, 220.0, 236.0, 195.0, 204.0, 226.0, 236.0, 229.0, 215.0, 90.0, 104.0, 223.0, 239.0, 205.0, 205.0, 222.0, 228.0, 236.0, 226.0, 190.0, 183.0, 203.0, 213.0, 220.0, 227.0, 219.0, 222.0, 223.0, 236.0, 180.0, 186.0, 228.0, 219.0, 255.0, 258.0, 181.0, 212.0, 205.0, 197.0, 217.0, 245.0, 192.0, 201.0, 231.0, 219.0, 212.0, 192.0, 237.0, 225.0, 226.0, 215.0, 249.0, 258.0, 234.0, 219.0, 199.0, 191.0, 251.0, 271.0, 202.0, 199.0, 229.0, 227.0, 221.0, 238.0, 163.0, 176.0, 222.0, 231.0, 195.0, 189.0, 185.0, 216.0, 222.0, 243.0, 123.0, 124.0, 164.0, 150.0, 256.0, 257.0, 200.0, 187.0, 222.0, 182.0, 221.0, 237.0, 107.0, 121.0, 239.0, 214.0, 209.0, 229.0, 267.0, 246.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868809329842898, "mean_inference_ms": 1.2091180245018194, "mean_action_processing_ms": 0.13290214276243056, "mean_env_wait_ms": 0.8564687501783257, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 522.0, "episode_reward_min": 194.0, "episode_reward_mean": 423.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 211.705}, "hist_stats": {"episode_reward": [462.0, 347.0, 424.0, 381.0, 453.0, 359.0, 413.0, 450.0, 462.0, 396.0, 447.0, 444.0, 465.0, 455.0, 452.0, 462.0, 390.0, 453.0, 344.0, 456.0, 450.0, 498.0, 421.0, 456.0, 390.0, 378.0, 444.0, 453.0, 464.0, 452.0, 402.0, 424.0, 404.0, 462.0, 412.0, 401.0, 444.0, 465.0, 387.0, 465.0, 453.0, 390.0, 450.0, 341.0, 411.0, 453.0, 447.0, 462.0, 367.0, 335.0, 398.0, 441.0, 413.0, 456.0, 399.0, 462.0, 444.0, 194.0, 462.0, 410.0, 450.0, 462.0, 373.0, 416.0, 447.0, 441.0, 459.0, 366.0, 447.0, 513.0, 393.0, 402.0, 462.0, 393.0, 450.0, 404.0, 462.0, 441.0, 507.0, 453.0, 390.0, 522.0, 401.0, 456.0, 459.0, 339.0, 453.0, 384.0, 401.0, 465.0, 247.0, 314.0, 513.0, 387.0, 404.0, 458.0, 228.0, 453.0, 438.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [246.0, 216.0, 168.0, 179.0, 213.0, 211.0, 194.0, 187.0, 237.0, 216.0, 187.0, 172.0, 191.0, 222.0, 226.0, 224.0, 236.0, 226.0, 189.0, 207.0, 225.0, 222.0, 221.0, 223.0, 224.0, 241.0, 236.0, 219.0, 239.0, 213.0, 231.0, 231.0, 192.0, 198.0, 224.0, 229.0, 160.0, 184.0, 218.0, 238.0, 218.0, 232.0, 240.0, 258.0, 204.0, 217.0, 215.0, 241.0, 198.0, 192.0, 182.0, 196.0, 233.0, 211.0, 222.0, 231.0, 226.0, 238.0, 240.0, 212.0, 195.0, 207.0, 218.0, 206.0, 203.0, 201.0, 234.0, 228.0, 203.0, 209.0, 204.0, 197.0, 216.0, 228.0, 236.0, 229.0, 194.0, 193.0, 226.0, 239.0, 233.0, 220.0, 181.0, 209.0, 209.0, 241.0, 172.0, 169.0, 215.0, 196.0, 214.0, 239.0, 219.0, 228.0, 217.0, 245.0, 181.0, 186.0, 179.0, 156.0, 201.0, 197.0, 223.0, 218.0, 203.0, 210.0, 220.0, 236.0, 195.0, 204.0, 226.0, 236.0, 229.0, 215.0, 90.0, 104.0, 223.0, 239.0, 205.0, 205.0, 222.0, 228.0, 236.0, 226.0, 190.0, 183.0, 203.0, 213.0, 220.0, 227.0, 219.0, 222.0, 223.0, 236.0, 180.0, 186.0, 228.0, 219.0, 255.0, 258.0, 181.0, 212.0, 205.0, 197.0, 217.0, 245.0, 192.0, 201.0, 231.0, 219.0, 212.0, 192.0, 237.0, 225.0, 226.0, 215.0, 249.0, 258.0, 234.0, 219.0, 199.0, 191.0, 251.0, 271.0, 202.0, 199.0, 229.0, 227.0, 221.0, 238.0, 163.0, 176.0, 222.0, 231.0, 195.0, 189.0, 185.0, 216.0, 222.0, 243.0, 123.0, 124.0, 164.0, 150.0, 256.0, 257.0, 200.0, 187.0, 222.0, 182.0, 221.0, 237.0, 107.0, 121.0, 239.0, 214.0, 209.0, 229.0, 267.0, 246.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868809329842898, "mean_inference_ms": 1.2091180245018194, "mean_action_processing_ms": 0.13290214276243056, "mean_env_wait_ms": 0.8564687501783257, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2176000, "num_agent_steps_trained": 2176000, "num_env_steps_sampled": 1088000, "num_env_steps_trained": 1088000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1088000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2176000, "timers": {"training_iteration_time_ms": 3597.35, "learn_time_ms": 1073.382, "learn_throughput": 11924.93, "synch_weights_time_ms": 10.686}, "counters": {"num_env_steps_sampled": 1088000, "num_env_steps_trained": 1088000, "num_agent_steps_sampled": 2176000, "num_agent_steps_trained": 2176000}, "done": false, "episodes_total": 2720, "training_iteration": 85, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-26", "timestamp": 1666580726, "time_this_iter_s": 3.7110044956207275, "time_total_s": 317.90237188339233, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 317.90237188339233, "timesteps_since_restore": 0, "iterations_since_restore": 85, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.419999999999998, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 148.2, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 127.55, "shaped_reward_min": 68, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.49, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 13.51, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.75, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 12.79, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.65, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.55, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 11.95, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.19, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.69, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 2.66, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.75, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.59, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.5, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.9, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.92, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.82, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 3.83, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.95, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.19, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.95, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.19, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 5.169878905493621e-27, "cur_lr": 0.0010000000474974513, "total_loss": -0.0018517355201765895, "policy_loss": -0.002034394070506096, "vf_loss": 7.965593338012695, "vf_explained_var": 0.6685700416564941, "kl": 0.0015647329855710268, "entropy": 1.227802038192749, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1100800, "num_env_steps_trained": 1100800, "num_agent_steps_sampled": 2201600, "num_agent_steps_trained": 2201600}, "sampler_results": {"episode_reward_max": 522.0, "episode_reward_min": 194.0, "episode_reward_mean": 423.95, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 272.0}, "policy_reward_mean": {"ppo": 211.975}, "custom_metrics": {"sparse_reward_mean": 148.2, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 127.55, "shaped_reward_min": 68, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.49, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 13.51, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.75, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 12.79, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.65, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.55, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 11.95, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.19, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.69, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 2.66, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.75, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.59, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.5, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.9, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.92, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.82, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 3.83, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.95, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.19, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.95, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.19, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [404.0, 462.0, 412.0, 401.0, 444.0, 465.0, 387.0, 465.0, 453.0, 390.0, 450.0, 341.0, 411.0, 453.0, 447.0, 462.0, 367.0, 335.0, 398.0, 441.0, 413.0, 456.0, 399.0, 462.0, 444.0, 194.0, 462.0, 410.0, 450.0, 462.0, 373.0, 416.0, 447.0, 441.0, 459.0, 366.0, 447.0, 513.0, 393.0, 402.0, 462.0, 393.0, 450.0, 404.0, 462.0, 441.0, 507.0, 453.0, 390.0, 522.0, 401.0, 456.0, 459.0, 339.0, 453.0, 384.0, 401.0, 465.0, 247.0, 314.0, 513.0, 387.0, 404.0, 458.0, 228.0, 453.0, 438.0, 513.0, 381.0, 513.0, 459.0, 507.0, 453.0, 404.0, 356.0, 441.0, 462.0, 402.0, 438.0, 453.0, 416.0, 398.0, 462.0, 447.0, 470.0, 456.0, 467.0, 459.0, 456.0, 456.0, 465.0, 291.0, 413.0, 435.0, 462.0, 396.0, 359.0, 453.0, 401.0, 370.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [203.0, 201.0, 234.0, 228.0, 203.0, 209.0, 204.0, 197.0, 216.0, 228.0, 236.0, 229.0, 194.0, 193.0, 226.0, 239.0, 233.0, 220.0, 181.0, 209.0, 209.0, 241.0, 172.0, 169.0, 215.0, 196.0, 214.0, 239.0, 219.0, 228.0, 217.0, 245.0, 181.0, 186.0, 179.0, 156.0, 201.0, 197.0, 223.0, 218.0, 203.0, 210.0, 220.0, 236.0, 195.0, 204.0, 226.0, 236.0, 229.0, 215.0, 90.0, 104.0, 223.0, 239.0, 205.0, 205.0, 222.0, 228.0, 236.0, 226.0, 190.0, 183.0, 203.0, 213.0, 220.0, 227.0, 219.0, 222.0, 223.0, 236.0, 180.0, 186.0, 228.0, 219.0, 255.0, 258.0, 181.0, 212.0, 205.0, 197.0, 217.0, 245.0, 192.0, 201.0, 231.0, 219.0, 212.0, 192.0, 237.0, 225.0, 226.0, 215.0, 249.0, 258.0, 234.0, 219.0, 199.0, 191.0, 251.0, 271.0, 202.0, 199.0, 229.0, 227.0, 221.0, 238.0, 163.0, 176.0, 222.0, 231.0, 195.0, 189.0, 185.0, 216.0, 222.0, 243.0, 123.0, 124.0, 164.0, 150.0, 256.0, 257.0, 200.0, 187.0, 222.0, 182.0, 221.0, 237.0, 107.0, 121.0, 239.0, 214.0, 209.0, 229.0, 267.0, 246.0, 190.0, 191.0, 241.0, 272.0, 230.0, 229.0, 271.0, 236.0, 240.0, 213.0, 196.0, 208.0, 185.0, 171.0, 216.0, 225.0, 235.0, 227.0, 202.0, 200.0, 220.0, 218.0, 231.0, 222.0, 205.0, 211.0, 191.0, 207.0, 232.0, 230.0, 211.0, 236.0, 230.0, 240.0, 239.0, 217.0, 234.0, 233.0, 220.0, 239.0, 231.0, 225.0, 235.0, 221.0, 226.0, 239.0, 138.0, 153.0, 193.0, 220.0, 231.0, 204.0, 245.0, 217.0, 207.0, 189.0, 174.0, 185.0, 230.0, 223.0, 216.0, 185.0, 185.0, 185.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.686874370240036, "mean_inference_ms": 1.2090592390348984, "mean_action_processing_ms": 0.1329117043730399, "mean_env_wait_ms": 0.8561022738174671, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 522.0, "episode_reward_min": 194.0, "episode_reward_mean": 423.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 272.0}, "policy_reward_mean": {"ppo": 211.975}, "hist_stats": {"episode_reward": [404.0, 462.0, 412.0, 401.0, 444.0, 465.0, 387.0, 465.0, 453.0, 390.0, 450.0, 341.0, 411.0, 453.0, 447.0, 462.0, 367.0, 335.0, 398.0, 441.0, 413.0, 456.0, 399.0, 462.0, 444.0, 194.0, 462.0, 410.0, 450.0, 462.0, 373.0, 416.0, 447.0, 441.0, 459.0, 366.0, 447.0, 513.0, 393.0, 402.0, 462.0, 393.0, 450.0, 404.0, 462.0, 441.0, 507.0, 453.0, 390.0, 522.0, 401.0, 456.0, 459.0, 339.0, 453.0, 384.0, 401.0, 465.0, 247.0, 314.0, 513.0, 387.0, 404.0, 458.0, 228.0, 453.0, 438.0, 513.0, 381.0, 513.0, 459.0, 507.0, 453.0, 404.0, 356.0, 441.0, 462.0, 402.0, 438.0, 453.0, 416.0, 398.0, 462.0, 447.0, 470.0, 456.0, 467.0, 459.0, 456.0, 456.0, 465.0, 291.0, 413.0, 435.0, 462.0, 396.0, 359.0, 453.0, 401.0, 370.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [203.0, 201.0, 234.0, 228.0, 203.0, 209.0, 204.0, 197.0, 216.0, 228.0, 236.0, 229.0, 194.0, 193.0, 226.0, 239.0, 233.0, 220.0, 181.0, 209.0, 209.0, 241.0, 172.0, 169.0, 215.0, 196.0, 214.0, 239.0, 219.0, 228.0, 217.0, 245.0, 181.0, 186.0, 179.0, 156.0, 201.0, 197.0, 223.0, 218.0, 203.0, 210.0, 220.0, 236.0, 195.0, 204.0, 226.0, 236.0, 229.0, 215.0, 90.0, 104.0, 223.0, 239.0, 205.0, 205.0, 222.0, 228.0, 236.0, 226.0, 190.0, 183.0, 203.0, 213.0, 220.0, 227.0, 219.0, 222.0, 223.0, 236.0, 180.0, 186.0, 228.0, 219.0, 255.0, 258.0, 181.0, 212.0, 205.0, 197.0, 217.0, 245.0, 192.0, 201.0, 231.0, 219.0, 212.0, 192.0, 237.0, 225.0, 226.0, 215.0, 249.0, 258.0, 234.0, 219.0, 199.0, 191.0, 251.0, 271.0, 202.0, 199.0, 229.0, 227.0, 221.0, 238.0, 163.0, 176.0, 222.0, 231.0, 195.0, 189.0, 185.0, 216.0, 222.0, 243.0, 123.0, 124.0, 164.0, 150.0, 256.0, 257.0, 200.0, 187.0, 222.0, 182.0, 221.0, 237.0, 107.0, 121.0, 239.0, 214.0, 209.0, 229.0, 267.0, 246.0, 190.0, 191.0, 241.0, 272.0, 230.0, 229.0, 271.0, 236.0, 240.0, 213.0, 196.0, 208.0, 185.0, 171.0, 216.0, 225.0, 235.0, 227.0, 202.0, 200.0, 220.0, 218.0, 231.0, 222.0, 205.0, 211.0, 191.0, 207.0, 232.0, 230.0, 211.0, 236.0, 230.0, 240.0, 239.0, 217.0, 234.0, 233.0, 220.0, 239.0, 231.0, 225.0, 235.0, 221.0, 226.0, 239.0, 138.0, 153.0, 193.0, 220.0, 231.0, 204.0, 245.0, 217.0, 207.0, 189.0, 174.0, 185.0, 230.0, 223.0, 216.0, 185.0, 185.0, 185.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.686874370240036, "mean_inference_ms": 1.2090592390348984, "mean_action_processing_ms": 0.1329117043730399, "mean_env_wait_ms": 0.8561022738174671, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2201600, "num_agent_steps_trained": 2201600, "num_env_steps_sampled": 1100800, "num_env_steps_trained": 1100800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1100800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2201600, "timers": {"training_iteration_time_ms": 3611.444, "learn_time_ms": 1086.391, "learn_throughput": 11782.133, "synch_weights_time_ms": 10.058}, "counters": {"num_env_steps_sampled": 1100800, "num_env_steps_trained": 1100800, "num_agent_steps_sampled": 2201600, "num_agent_steps_trained": 2201600}, "done": false, "episodes_total": 2752, "training_iteration": 86, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-29", "timestamp": 1666580729, "time_this_iter_s": 3.7309343814849854, "time_total_s": 321.6333062648773, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 321.6333062648773, "timesteps_since_restore": 0, "iterations_since_restore": 86, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.200000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 150.0, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 128.63, "shaped_reward_min": 60, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.47, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 13.56, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.69, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 12.85, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.55, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.58, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.06, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.16, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.29, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 2.96, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.73, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.47, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.35, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 4.0, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.81, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.92, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 3.74, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 12.06, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.16, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.06, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.16, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.5849394527468104e-27, "cur_lr": 0.0010000000474974513, "total_loss": -0.0015968354418873787, "policy_loss": -0.001768257119692862, "vf_loss": 7.829046726226807, "vf_explained_var": 0.6879750490188599, "kl": 0.001952783903107047, "entropy": 1.2229652404785156, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1113600, "num_env_steps_trained": 1113600, "num_agent_steps_sampled": 2227200, "num_agent_steps_trained": 2227200}, "sampler_results": {"episode_reward_max": 522.0, "episode_reward_min": 180.0, "episode_reward_mean": 428.63, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 87.0}, "policy_reward_max": {"ppo": 272.0}, "policy_reward_mean": {"ppo": 214.315}, "custom_metrics": {"sparse_reward_mean": 150.0, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 128.63, "shaped_reward_min": 60, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.47, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 13.56, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.69, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 12.85, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.55, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.58, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.06, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.16, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.29, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 2.96, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.73, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.47, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.35, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 4.0, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.81, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.92, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 3.74, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 12.06, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.16, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.06, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.16, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [447.0, 441.0, 459.0, 366.0, 447.0, 513.0, 393.0, 402.0, 462.0, 393.0, 450.0, 404.0, 462.0, 441.0, 507.0, 453.0, 390.0, 522.0, 401.0, 456.0, 459.0, 339.0, 453.0, 384.0, 401.0, 465.0, 247.0, 314.0, 513.0, 387.0, 404.0, 458.0, 228.0, 453.0, 438.0, 513.0, 381.0, 513.0, 459.0, 507.0, 453.0, 404.0, 356.0, 441.0, 462.0, 402.0, 438.0, 453.0, 416.0, 398.0, 462.0, 447.0, 470.0, 456.0, 467.0, 459.0, 456.0, 456.0, 465.0, 291.0, 413.0, 435.0, 462.0, 396.0, 359.0, 453.0, 401.0, 370.0, 461.0, 459.0, 402.0, 465.0, 465.0, 456.0, 453.0, 396.0, 444.0, 462.0, 456.0, 441.0, 402.0, 416.0, 410.0, 395.0, 180.0, 462.0, 411.0, 465.0, 336.0, 465.0, 464.0, 513.0, 456.0, 456.0, 441.0, 465.0, 495.0, 341.0, 459.0, 405.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [220.0, 227.0, 219.0, 222.0, 223.0, 236.0, 180.0, 186.0, 228.0, 219.0, 255.0, 258.0, 181.0, 212.0, 205.0, 197.0, 217.0, 245.0, 192.0, 201.0, 231.0, 219.0, 212.0, 192.0, 237.0, 225.0, 226.0, 215.0, 249.0, 258.0, 234.0, 219.0, 199.0, 191.0, 251.0, 271.0, 202.0, 199.0, 229.0, 227.0, 221.0, 238.0, 163.0, 176.0, 222.0, 231.0, 195.0, 189.0, 185.0, 216.0, 222.0, 243.0, 123.0, 124.0, 164.0, 150.0, 256.0, 257.0, 200.0, 187.0, 222.0, 182.0, 221.0, 237.0, 107.0, 121.0, 239.0, 214.0, 209.0, 229.0, 267.0, 246.0, 190.0, 191.0, 241.0, 272.0, 230.0, 229.0, 271.0, 236.0, 240.0, 213.0, 196.0, 208.0, 185.0, 171.0, 216.0, 225.0, 235.0, 227.0, 202.0, 200.0, 220.0, 218.0, 231.0, 222.0, 205.0, 211.0, 191.0, 207.0, 232.0, 230.0, 211.0, 236.0, 230.0, 240.0, 239.0, 217.0, 234.0, 233.0, 220.0, 239.0, 231.0, 225.0, 235.0, 221.0, 226.0, 239.0, 138.0, 153.0, 193.0, 220.0, 231.0, 204.0, 245.0, 217.0, 207.0, 189.0, 174.0, 185.0, 230.0, 223.0, 216.0, 185.0, 185.0, 185.0, 228.0, 233.0, 222.0, 237.0, 206.0, 196.0, 228.0, 237.0, 220.0, 245.0, 220.0, 236.0, 230.0, 223.0, 214.0, 182.0, 232.0, 212.0, 243.0, 219.0, 228.0, 228.0, 226.0, 215.0, 203.0, 199.0, 202.0, 214.0, 214.0, 196.0, 199.0, 196.0, 87.0, 93.0, 234.0, 228.0, 203.0, 208.0, 231.0, 234.0, 152.0, 184.0, 234.0, 231.0, 237.0, 227.0, 267.0, 246.0, 221.0, 235.0, 230.0, 226.0, 220.0, 221.0, 250.0, 215.0, 254.0, 241.0, 171.0, 170.0, 239.0, 220.0, 210.0, 195.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868516517488307, "mean_inference_ms": 1.209345762889332, "mean_action_processing_ms": 0.13291795937199066, "mean_env_wait_ms": 0.8561722429975059, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 522.0, "episode_reward_min": 180.0, "episode_reward_mean": 428.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 87.0}, "policy_reward_max": {"ppo": 272.0}, "policy_reward_mean": {"ppo": 214.315}, "hist_stats": {"episode_reward": [447.0, 441.0, 459.0, 366.0, 447.0, 513.0, 393.0, 402.0, 462.0, 393.0, 450.0, 404.0, 462.0, 441.0, 507.0, 453.0, 390.0, 522.0, 401.0, 456.0, 459.0, 339.0, 453.0, 384.0, 401.0, 465.0, 247.0, 314.0, 513.0, 387.0, 404.0, 458.0, 228.0, 453.0, 438.0, 513.0, 381.0, 513.0, 459.0, 507.0, 453.0, 404.0, 356.0, 441.0, 462.0, 402.0, 438.0, 453.0, 416.0, 398.0, 462.0, 447.0, 470.0, 456.0, 467.0, 459.0, 456.0, 456.0, 465.0, 291.0, 413.0, 435.0, 462.0, 396.0, 359.0, 453.0, 401.0, 370.0, 461.0, 459.0, 402.0, 465.0, 465.0, 456.0, 453.0, 396.0, 444.0, 462.0, 456.0, 441.0, 402.0, 416.0, 410.0, 395.0, 180.0, 462.0, 411.0, 465.0, 336.0, 465.0, 464.0, 513.0, 456.0, 456.0, 441.0, 465.0, 495.0, 341.0, 459.0, 405.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [220.0, 227.0, 219.0, 222.0, 223.0, 236.0, 180.0, 186.0, 228.0, 219.0, 255.0, 258.0, 181.0, 212.0, 205.0, 197.0, 217.0, 245.0, 192.0, 201.0, 231.0, 219.0, 212.0, 192.0, 237.0, 225.0, 226.0, 215.0, 249.0, 258.0, 234.0, 219.0, 199.0, 191.0, 251.0, 271.0, 202.0, 199.0, 229.0, 227.0, 221.0, 238.0, 163.0, 176.0, 222.0, 231.0, 195.0, 189.0, 185.0, 216.0, 222.0, 243.0, 123.0, 124.0, 164.0, 150.0, 256.0, 257.0, 200.0, 187.0, 222.0, 182.0, 221.0, 237.0, 107.0, 121.0, 239.0, 214.0, 209.0, 229.0, 267.0, 246.0, 190.0, 191.0, 241.0, 272.0, 230.0, 229.0, 271.0, 236.0, 240.0, 213.0, 196.0, 208.0, 185.0, 171.0, 216.0, 225.0, 235.0, 227.0, 202.0, 200.0, 220.0, 218.0, 231.0, 222.0, 205.0, 211.0, 191.0, 207.0, 232.0, 230.0, 211.0, 236.0, 230.0, 240.0, 239.0, 217.0, 234.0, 233.0, 220.0, 239.0, 231.0, 225.0, 235.0, 221.0, 226.0, 239.0, 138.0, 153.0, 193.0, 220.0, 231.0, 204.0, 245.0, 217.0, 207.0, 189.0, 174.0, 185.0, 230.0, 223.0, 216.0, 185.0, 185.0, 185.0, 228.0, 233.0, 222.0, 237.0, 206.0, 196.0, 228.0, 237.0, 220.0, 245.0, 220.0, 236.0, 230.0, 223.0, 214.0, 182.0, 232.0, 212.0, 243.0, 219.0, 228.0, 228.0, 226.0, 215.0, 203.0, 199.0, 202.0, 214.0, 214.0, 196.0, 199.0, 196.0, 87.0, 93.0, 234.0, 228.0, 203.0, 208.0, 231.0, 234.0, 152.0, 184.0, 234.0, 231.0, 237.0, 227.0, 267.0, 246.0, 221.0, 235.0, 230.0, 226.0, 220.0, 221.0, 250.0, 215.0, 254.0, 241.0, 171.0, 170.0, 239.0, 220.0, 210.0, 195.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868516517488307, "mean_inference_ms": 1.209345762889332, "mean_action_processing_ms": 0.13291795937199066, "mean_env_wait_ms": 0.8561722429975059, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2227200, "num_agent_steps_trained": 2227200, "num_env_steps_sampled": 1113600, "num_env_steps_trained": 1113600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1113600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2227200, "timers": {"training_iteration_time_ms": 3629.961, "learn_time_ms": 1091.432, "learn_throughput": 11727.714, "synch_weights_time_ms": 9.59}, "counters": {"num_env_steps_sampled": 1113600, "num_env_steps_trained": 1113600, "num_agent_steps_sampled": 2227200, "num_agent_steps_trained": 2227200}, "done": false, "episodes_total": 2784, "training_iteration": 87, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-34", "timestamp": 1666580734, "time_this_iter_s": 3.8736486434936523, "time_total_s": 325.506954908371, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 325.506954908371, "timesteps_since_restore": 0, "iterations_since_restore": 87, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.98, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 150.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 129.13, "shaped_reward_min": 60, "shaped_reward_max": 164, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.45, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 13.8, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.69, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 12.94, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.9, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 0.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.72, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.16, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.16, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 2.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.83, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.24, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.42, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.05, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.81, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.97, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 3.74, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.16, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.16, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.16, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.16, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.2924697263734052e-27, "cur_lr": 0.0010000000474974513, "total_loss": -0.003479755949229002, "policy_loss": -0.003657393390312791, "vf_loss": 7.948546886444092, "vf_explained_var": 0.6666540503501892, "kl": 0.0016054816078394651, "entropy": 1.2344354391098022, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1126400, "num_env_steps_trained": 1126400, "num_agent_steps_sampled": 2252800, "num_agent_steps_trained": 2252800}, "sampler_results": {"episode_reward_max": 524.0, "episode_reward_min": 180.0, "episode_reward_mean": 430.33, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 87.0}, "policy_reward_max": {"ppo": 278.0}, "policy_reward_mean": {"ppo": 215.165}, "custom_metrics": {"sparse_reward_mean": 150.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 129.13, "shaped_reward_min": 60, "shaped_reward_max": 164, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.45, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 13.8, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.69, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 12.94, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.9, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 0.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.72, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.16, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.16, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 2.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.83, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.24, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.42, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.05, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.81, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.97, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 3.74, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.16, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.16, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.16, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.16, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [228.0, 453.0, 438.0, 513.0, 381.0, 513.0, 459.0, 507.0, 453.0, 404.0, 356.0, 441.0, 462.0, 402.0, 438.0, 453.0, 416.0, 398.0, 462.0, 447.0, 470.0, 456.0, 467.0, 459.0, 456.0, 456.0, 465.0, 291.0, 413.0, 435.0, 462.0, 396.0, 359.0, 453.0, 401.0, 370.0, 461.0, 459.0, 402.0, 465.0, 465.0, 456.0, 453.0, 396.0, 444.0, 462.0, 456.0, 441.0, 402.0, 416.0, 410.0, 395.0, 180.0, 462.0, 411.0, 465.0, 336.0, 465.0, 464.0, 513.0, 456.0, 456.0, 441.0, 465.0, 495.0, 341.0, 459.0, 405.0, 507.0, 447.0, 441.0, 470.0, 338.0, 410.0, 453.0, 402.0, 456.0, 453.0, 453.0, 398.0, 444.0, 459.0, 444.0, 341.0, 467.0, 407.0, 444.0, 456.0, 459.0, 407.0, 404.0, 450.0, 378.0, 465.0, 524.0, 393.0, 399.0, 359.0, 416.0, 459.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [107.0, 121.0, 239.0, 214.0, 209.0, 229.0, 267.0, 246.0, 190.0, 191.0, 241.0, 272.0, 230.0, 229.0, 271.0, 236.0, 240.0, 213.0, 196.0, 208.0, 185.0, 171.0, 216.0, 225.0, 235.0, 227.0, 202.0, 200.0, 220.0, 218.0, 231.0, 222.0, 205.0, 211.0, 191.0, 207.0, 232.0, 230.0, 211.0, 236.0, 230.0, 240.0, 239.0, 217.0, 234.0, 233.0, 220.0, 239.0, 231.0, 225.0, 235.0, 221.0, 226.0, 239.0, 138.0, 153.0, 193.0, 220.0, 231.0, 204.0, 245.0, 217.0, 207.0, 189.0, 174.0, 185.0, 230.0, 223.0, 216.0, 185.0, 185.0, 185.0, 228.0, 233.0, 222.0, 237.0, 206.0, 196.0, 228.0, 237.0, 220.0, 245.0, 220.0, 236.0, 230.0, 223.0, 214.0, 182.0, 232.0, 212.0, 243.0, 219.0, 228.0, 228.0, 226.0, 215.0, 203.0, 199.0, 202.0, 214.0, 214.0, 196.0, 199.0, 196.0, 87.0, 93.0, 234.0, 228.0, 203.0, 208.0, 231.0, 234.0, 152.0, 184.0, 234.0, 231.0, 237.0, 227.0, 267.0, 246.0, 221.0, 235.0, 230.0, 226.0, 220.0, 221.0, 250.0, 215.0, 254.0, 241.0, 171.0, 170.0, 239.0, 220.0, 210.0, 195.0, 246.0, 261.0, 226.0, 221.0, 221.0, 220.0, 221.0, 249.0, 169.0, 169.0, 222.0, 188.0, 229.0, 224.0, 197.0, 205.0, 220.0, 236.0, 230.0, 223.0, 222.0, 231.0, 192.0, 206.0, 221.0, 223.0, 230.0, 229.0, 218.0, 226.0, 171.0, 170.0, 229.0, 238.0, 209.0, 198.0, 215.0, 229.0, 223.0, 233.0, 241.0, 218.0, 219.0, 188.0, 210.0, 194.0, 232.0, 218.0, 187.0, 191.0, 231.0, 234.0, 246.0, 278.0, 193.0, 200.0, 191.0, 208.0, 171.0, 188.0, 211.0, 205.0, 221.0, 238.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868056919516162, "mean_inference_ms": 1.2096204283652425, "mean_action_processing_ms": 0.1329055296257274, "mean_env_wait_ms": 0.8561982876915627, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 524.0, "episode_reward_min": 180.0, "episode_reward_mean": 430.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 87.0}, "policy_reward_max": {"ppo": 278.0}, "policy_reward_mean": {"ppo": 215.165}, "hist_stats": {"episode_reward": [228.0, 453.0, 438.0, 513.0, 381.0, 513.0, 459.0, 507.0, 453.0, 404.0, 356.0, 441.0, 462.0, 402.0, 438.0, 453.0, 416.0, 398.0, 462.0, 447.0, 470.0, 456.0, 467.0, 459.0, 456.0, 456.0, 465.0, 291.0, 413.0, 435.0, 462.0, 396.0, 359.0, 453.0, 401.0, 370.0, 461.0, 459.0, 402.0, 465.0, 465.0, 456.0, 453.0, 396.0, 444.0, 462.0, 456.0, 441.0, 402.0, 416.0, 410.0, 395.0, 180.0, 462.0, 411.0, 465.0, 336.0, 465.0, 464.0, 513.0, 456.0, 456.0, 441.0, 465.0, 495.0, 341.0, 459.0, 405.0, 507.0, 447.0, 441.0, 470.0, 338.0, 410.0, 453.0, 402.0, 456.0, 453.0, 453.0, 398.0, 444.0, 459.0, 444.0, 341.0, 467.0, 407.0, 444.0, 456.0, 459.0, 407.0, 404.0, 450.0, 378.0, 465.0, 524.0, 393.0, 399.0, 359.0, 416.0, 459.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [107.0, 121.0, 239.0, 214.0, 209.0, 229.0, 267.0, 246.0, 190.0, 191.0, 241.0, 272.0, 230.0, 229.0, 271.0, 236.0, 240.0, 213.0, 196.0, 208.0, 185.0, 171.0, 216.0, 225.0, 235.0, 227.0, 202.0, 200.0, 220.0, 218.0, 231.0, 222.0, 205.0, 211.0, 191.0, 207.0, 232.0, 230.0, 211.0, 236.0, 230.0, 240.0, 239.0, 217.0, 234.0, 233.0, 220.0, 239.0, 231.0, 225.0, 235.0, 221.0, 226.0, 239.0, 138.0, 153.0, 193.0, 220.0, 231.0, 204.0, 245.0, 217.0, 207.0, 189.0, 174.0, 185.0, 230.0, 223.0, 216.0, 185.0, 185.0, 185.0, 228.0, 233.0, 222.0, 237.0, 206.0, 196.0, 228.0, 237.0, 220.0, 245.0, 220.0, 236.0, 230.0, 223.0, 214.0, 182.0, 232.0, 212.0, 243.0, 219.0, 228.0, 228.0, 226.0, 215.0, 203.0, 199.0, 202.0, 214.0, 214.0, 196.0, 199.0, 196.0, 87.0, 93.0, 234.0, 228.0, 203.0, 208.0, 231.0, 234.0, 152.0, 184.0, 234.0, 231.0, 237.0, 227.0, 267.0, 246.0, 221.0, 235.0, 230.0, 226.0, 220.0, 221.0, 250.0, 215.0, 254.0, 241.0, 171.0, 170.0, 239.0, 220.0, 210.0, 195.0, 246.0, 261.0, 226.0, 221.0, 221.0, 220.0, 221.0, 249.0, 169.0, 169.0, 222.0, 188.0, 229.0, 224.0, 197.0, 205.0, 220.0, 236.0, 230.0, 223.0, 222.0, 231.0, 192.0, 206.0, 221.0, 223.0, 230.0, 229.0, 218.0, 226.0, 171.0, 170.0, 229.0, 238.0, 209.0, 198.0, 215.0, 229.0, 223.0, 233.0, 241.0, 218.0, 219.0, 188.0, 210.0, 194.0, 232.0, 218.0, 187.0, 191.0, 231.0, 234.0, 246.0, 278.0, 193.0, 200.0, 191.0, 208.0, 171.0, 188.0, 211.0, 205.0, 221.0, 238.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868056919516162, "mean_inference_ms": 1.2096204283652425, "mean_action_processing_ms": 0.1329055296257274, "mean_env_wait_ms": 0.8561982876915627, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2252800, "num_agent_steps_trained": 2252800, "num_env_steps_sampled": 1126400, "num_env_steps_trained": 1126400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1126400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2252800, "timers": {"training_iteration_time_ms": 3648.703, "learn_time_ms": 1098.582, "learn_throughput": 11651.38, "synch_weights_time_ms": 9.691}, "counters": {"num_env_steps_sampled": 1126400, "num_env_steps_trained": 1126400, "num_agent_steps_sampled": 2252800, "num_agent_steps_trained": 2252800}, "done": false, "episodes_total": 2816, "training_iteration": 88, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-38", "timestamp": 1666580738, "time_this_iter_s": 3.93886137008667, "time_total_s": 329.44581627845764, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 329.44581627845764, "timesteps_since_restore": 0, "iterations_since_restore": 88, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.916666666666664, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 149.8, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 130.82, "shaped_reward_min": 60, "shaped_reward_max": 164, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.42, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.16, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.57, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 13.1, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.45, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 0.58, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.88, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.06, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.35, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.04, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.12, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.17, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.8, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.01, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.76, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.11, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.06, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.35, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.06, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.35, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 6.462348631867026e-28, "cur_lr": 0.0010000000474974513, "total_loss": -0.0031336257234215736, "policy_loss": -0.003304777666926384, "vf_loss": 7.8458757400512695, "vf_explained_var": 0.7096177339553833, "kl": 0.0018895509419962764, "entropy": 1.2268693447113037, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1139200, "num_env_steps_trained": 1139200, "num_agent_steps_sampled": 2278400, "num_agent_steps_trained": 2278400}, "sampler_results": {"episode_reward_max": 524.0, "episode_reward_min": 180.0, "episode_reward_mean": 430.42, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 87.0}, "policy_reward_max": {"ppo": 278.0}, "policy_reward_mean": {"ppo": 215.21}, "custom_metrics": {"sparse_reward_mean": 149.8, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 130.82, "shaped_reward_min": 60, "shaped_reward_max": 164, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.42, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.16, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.57, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 13.1, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.45, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 0.58, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.88, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.06, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.35, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.04, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.12, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.17, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.8, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.01, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.76, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.11, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.06, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.35, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.06, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.35, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [359.0, 453.0, 401.0, 370.0, 461.0, 459.0, 402.0, 465.0, 465.0, 456.0, 453.0, 396.0, 444.0, 462.0, 456.0, 441.0, 402.0, 416.0, 410.0, 395.0, 180.0, 462.0, 411.0, 465.0, 336.0, 465.0, 464.0, 513.0, 456.0, 456.0, 441.0, 465.0, 495.0, 341.0, 459.0, 405.0, 507.0, 447.0, 441.0, 470.0, 338.0, 410.0, 453.0, 402.0, 456.0, 453.0, 453.0, 398.0, 444.0, 459.0, 444.0, 341.0, 467.0, 407.0, 444.0, 456.0, 459.0, 407.0, 404.0, 450.0, 378.0, 465.0, 524.0, 393.0, 399.0, 359.0, 416.0, 459.0, 416.0, 462.0, 510.0, 447.0, 384.0, 467.0, 465.0, 211.0, 462.0, 450.0, 461.0, 467.0, 416.0, 410.0, 410.0, 330.0, 367.0, 456.0, 452.0, 513.0, 413.0, 402.0, 387.0, 422.0, 513.0, 456.0, 476.0, 392.0, 450.0, 468.0, 459.0, 465.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [174.0, 185.0, 230.0, 223.0, 216.0, 185.0, 185.0, 185.0, 228.0, 233.0, 222.0, 237.0, 206.0, 196.0, 228.0, 237.0, 220.0, 245.0, 220.0, 236.0, 230.0, 223.0, 214.0, 182.0, 232.0, 212.0, 243.0, 219.0, 228.0, 228.0, 226.0, 215.0, 203.0, 199.0, 202.0, 214.0, 214.0, 196.0, 199.0, 196.0, 87.0, 93.0, 234.0, 228.0, 203.0, 208.0, 231.0, 234.0, 152.0, 184.0, 234.0, 231.0, 237.0, 227.0, 267.0, 246.0, 221.0, 235.0, 230.0, 226.0, 220.0, 221.0, 250.0, 215.0, 254.0, 241.0, 171.0, 170.0, 239.0, 220.0, 210.0, 195.0, 246.0, 261.0, 226.0, 221.0, 221.0, 220.0, 221.0, 249.0, 169.0, 169.0, 222.0, 188.0, 229.0, 224.0, 197.0, 205.0, 220.0, 236.0, 230.0, 223.0, 222.0, 231.0, 192.0, 206.0, 221.0, 223.0, 230.0, 229.0, 218.0, 226.0, 171.0, 170.0, 229.0, 238.0, 209.0, 198.0, 215.0, 229.0, 223.0, 233.0, 241.0, 218.0, 219.0, 188.0, 210.0, 194.0, 232.0, 218.0, 187.0, 191.0, 231.0, 234.0, 246.0, 278.0, 193.0, 200.0, 191.0, 208.0, 171.0, 188.0, 211.0, 205.0, 221.0, 238.0, 226.0, 190.0, 250.0, 212.0, 260.0, 250.0, 213.0, 234.0, 194.0, 190.0, 236.0, 231.0, 232.0, 233.0, 100.0, 111.0, 233.0, 229.0, 238.0, 212.0, 221.0, 240.0, 235.0, 232.0, 215.0, 201.0, 191.0, 219.0, 214.0, 196.0, 163.0, 167.0, 182.0, 185.0, 218.0, 238.0, 227.0, 225.0, 257.0, 256.0, 200.0, 213.0, 194.0, 208.0, 182.0, 205.0, 216.0, 206.0, 264.0, 249.0, 226.0, 230.0, 232.0, 244.0, 185.0, 207.0, 226.0, 224.0, 249.0, 219.0, 242.0, 217.0, 239.0, 226.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6867888499461925, "mean_inference_ms": 1.2099295994346604, "mean_action_processing_ms": 0.13289270671609127, "mean_env_wait_ms": 0.8562557347149742, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 524.0, "episode_reward_min": 180.0, "episode_reward_mean": 430.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 87.0}, "policy_reward_max": {"ppo": 278.0}, "policy_reward_mean": {"ppo": 215.21}, "hist_stats": {"episode_reward": [359.0, 453.0, 401.0, 370.0, 461.0, 459.0, 402.0, 465.0, 465.0, 456.0, 453.0, 396.0, 444.0, 462.0, 456.0, 441.0, 402.0, 416.0, 410.0, 395.0, 180.0, 462.0, 411.0, 465.0, 336.0, 465.0, 464.0, 513.0, 456.0, 456.0, 441.0, 465.0, 495.0, 341.0, 459.0, 405.0, 507.0, 447.0, 441.0, 470.0, 338.0, 410.0, 453.0, 402.0, 456.0, 453.0, 453.0, 398.0, 444.0, 459.0, 444.0, 341.0, 467.0, 407.0, 444.0, 456.0, 459.0, 407.0, 404.0, 450.0, 378.0, 465.0, 524.0, 393.0, 399.0, 359.0, 416.0, 459.0, 416.0, 462.0, 510.0, 447.0, 384.0, 467.0, 465.0, 211.0, 462.0, 450.0, 461.0, 467.0, 416.0, 410.0, 410.0, 330.0, 367.0, 456.0, 452.0, 513.0, 413.0, 402.0, 387.0, 422.0, 513.0, 456.0, 476.0, 392.0, 450.0, 468.0, 459.0, 465.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [174.0, 185.0, 230.0, 223.0, 216.0, 185.0, 185.0, 185.0, 228.0, 233.0, 222.0, 237.0, 206.0, 196.0, 228.0, 237.0, 220.0, 245.0, 220.0, 236.0, 230.0, 223.0, 214.0, 182.0, 232.0, 212.0, 243.0, 219.0, 228.0, 228.0, 226.0, 215.0, 203.0, 199.0, 202.0, 214.0, 214.0, 196.0, 199.0, 196.0, 87.0, 93.0, 234.0, 228.0, 203.0, 208.0, 231.0, 234.0, 152.0, 184.0, 234.0, 231.0, 237.0, 227.0, 267.0, 246.0, 221.0, 235.0, 230.0, 226.0, 220.0, 221.0, 250.0, 215.0, 254.0, 241.0, 171.0, 170.0, 239.0, 220.0, 210.0, 195.0, 246.0, 261.0, 226.0, 221.0, 221.0, 220.0, 221.0, 249.0, 169.0, 169.0, 222.0, 188.0, 229.0, 224.0, 197.0, 205.0, 220.0, 236.0, 230.0, 223.0, 222.0, 231.0, 192.0, 206.0, 221.0, 223.0, 230.0, 229.0, 218.0, 226.0, 171.0, 170.0, 229.0, 238.0, 209.0, 198.0, 215.0, 229.0, 223.0, 233.0, 241.0, 218.0, 219.0, 188.0, 210.0, 194.0, 232.0, 218.0, 187.0, 191.0, 231.0, 234.0, 246.0, 278.0, 193.0, 200.0, 191.0, 208.0, 171.0, 188.0, 211.0, 205.0, 221.0, 238.0, 226.0, 190.0, 250.0, 212.0, 260.0, 250.0, 213.0, 234.0, 194.0, 190.0, 236.0, 231.0, 232.0, 233.0, 100.0, 111.0, 233.0, 229.0, 238.0, 212.0, 221.0, 240.0, 235.0, 232.0, 215.0, 201.0, 191.0, 219.0, 214.0, 196.0, 163.0, 167.0, 182.0, 185.0, 218.0, 238.0, 227.0, 225.0, 257.0, 256.0, 200.0, 213.0, 194.0, 208.0, 182.0, 205.0, 216.0, 206.0, 264.0, 249.0, 226.0, 230.0, 232.0, 244.0, 185.0, 207.0, 226.0, 224.0, 249.0, 219.0, 242.0, 217.0, 239.0, 226.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6867888499461925, "mean_inference_ms": 1.2099295994346604, "mean_action_processing_ms": 0.13289270671609127, "mean_env_wait_ms": 0.8562557347149742, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2278400, "num_agent_steps_trained": 2278400, "num_env_steps_sampled": 1139200, "num_env_steps_trained": 1139200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1139200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2278400, "timers": {"training_iteration_time_ms": 3660.43, "learn_time_ms": 1108.366, "learn_throughput": 11548.535, "synch_weights_time_ms": 9.815}, "counters": {"num_env_steps_sampled": 1139200, "num_env_steps_trained": 1139200, "num_agent_steps_sampled": 2278400, "num_agent_steps_trained": 2278400}, "done": false, "episodes_total": 2848, "training_iteration": 89, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-41", "timestamp": 1666580741, "time_this_iter_s": 3.6420814990997314, "time_total_s": 333.0878977775574, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 333.0878977775574, "timesteps_since_restore": 0, "iterations_since_restore": 89, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.133333333333333, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 151.4, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 131.9, "shaped_reward_min": 84, "shaped_reward_max": 164, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.59, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.43, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.85, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 13.42, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 0.67, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.89, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.08, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.57, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.02, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.35, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.43, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 4.16, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.03, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.83, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.08, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.57, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.08, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.57, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.231174315933513e-28, "cur_lr": 0.0010000000474974513, "total_loss": -0.0013424127828329802, "policy_loss": -0.0015107805375009775, "vf_loss": 7.762334823608398, "vf_explained_var": 0.7075027227401733, "kl": 0.0016440870240330696, "entropy": 1.2157301902770996, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1152000, "num_env_steps_trained": 1152000, "num_agent_steps_sampled": 2304000, "num_agent_steps_trained": 2304000}, "sampler_results": {"episode_reward_max": 524.0, "episode_reward_min": 211.0, "episode_reward_mean": 434.7, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 100.0}, "policy_reward_max": {"ppo": 278.0}, "policy_reward_mean": {"ppo": 217.35}, "custom_metrics": {"sparse_reward_mean": 151.4, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 131.9, "shaped_reward_min": 84, "shaped_reward_max": 164, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.59, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.43, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.85, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 13.42, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 0.67, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.89, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.08, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.57, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.02, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.35, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.43, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 4.16, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.03, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.83, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.08, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.57, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.08, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.57, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [495.0, 341.0, 459.0, 405.0, 507.0, 447.0, 441.0, 470.0, 338.0, 410.0, 453.0, 402.0, 456.0, 453.0, 453.0, 398.0, 444.0, 459.0, 444.0, 341.0, 467.0, 407.0, 444.0, 456.0, 459.0, 407.0, 404.0, 450.0, 378.0, 465.0, 524.0, 393.0, 399.0, 359.0, 416.0, 459.0, 416.0, 462.0, 510.0, 447.0, 384.0, 467.0, 465.0, 211.0, 462.0, 450.0, 461.0, 467.0, 416.0, 410.0, 410.0, 330.0, 367.0, 456.0, 452.0, 513.0, 413.0, 402.0, 387.0, 422.0, 513.0, 456.0, 476.0, 392.0, 450.0, 468.0, 459.0, 465.0, 456.0, 435.0, 284.0, 450.0, 462.0, 462.0, 473.0, 405.0, 407.0, 476.0, 459.0, 453.0, 453.0, 416.0, 447.0, 407.0, 459.0, 447.0, 450.0, 459.0, 465.0, 347.0, 462.0, 456.0, 459.0, 458.0, 450.0, 453.0, 462.0, 465.0, 444.0, 427.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 241.0, 171.0, 170.0, 239.0, 220.0, 210.0, 195.0, 246.0, 261.0, 226.0, 221.0, 221.0, 220.0, 221.0, 249.0, 169.0, 169.0, 222.0, 188.0, 229.0, 224.0, 197.0, 205.0, 220.0, 236.0, 230.0, 223.0, 222.0, 231.0, 192.0, 206.0, 221.0, 223.0, 230.0, 229.0, 218.0, 226.0, 171.0, 170.0, 229.0, 238.0, 209.0, 198.0, 215.0, 229.0, 223.0, 233.0, 241.0, 218.0, 219.0, 188.0, 210.0, 194.0, 232.0, 218.0, 187.0, 191.0, 231.0, 234.0, 246.0, 278.0, 193.0, 200.0, 191.0, 208.0, 171.0, 188.0, 211.0, 205.0, 221.0, 238.0, 226.0, 190.0, 250.0, 212.0, 260.0, 250.0, 213.0, 234.0, 194.0, 190.0, 236.0, 231.0, 232.0, 233.0, 100.0, 111.0, 233.0, 229.0, 238.0, 212.0, 221.0, 240.0, 235.0, 232.0, 215.0, 201.0, 191.0, 219.0, 214.0, 196.0, 163.0, 167.0, 182.0, 185.0, 218.0, 238.0, 227.0, 225.0, 257.0, 256.0, 200.0, 213.0, 194.0, 208.0, 182.0, 205.0, 216.0, 206.0, 264.0, 249.0, 226.0, 230.0, 232.0, 244.0, 185.0, 207.0, 226.0, 224.0, 249.0, 219.0, 242.0, 217.0, 239.0, 226.0, 227.0, 229.0, 222.0, 213.0, 132.0, 152.0, 231.0, 219.0, 230.0, 232.0, 232.0, 230.0, 238.0, 235.0, 200.0, 205.0, 201.0, 206.0, 247.0, 229.0, 224.0, 235.0, 217.0, 236.0, 213.0, 240.0, 212.0, 204.0, 228.0, 219.0, 213.0, 194.0, 232.0, 227.0, 230.0, 217.0, 219.0, 231.0, 236.0, 223.0, 239.0, 226.0, 174.0, 173.0, 221.0, 241.0, 217.0, 239.0, 220.0, 239.0, 224.0, 234.0, 219.0, 231.0, 224.0, 229.0, 232.0, 230.0, 239.0, 226.0, 229.0, 215.0, 200.0, 227.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868064875086877, "mean_inference_ms": 1.20987618428007, "mean_action_processing_ms": 0.13288695715863633, "mean_env_wait_ms": 0.8560040849674999, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 524.0, "episode_reward_min": 211.0, "episode_reward_mean": 434.7, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 100.0}, "policy_reward_max": {"ppo": 278.0}, "policy_reward_mean": {"ppo": 217.35}, "hist_stats": {"episode_reward": [495.0, 341.0, 459.0, 405.0, 507.0, 447.0, 441.0, 470.0, 338.0, 410.0, 453.0, 402.0, 456.0, 453.0, 453.0, 398.0, 444.0, 459.0, 444.0, 341.0, 467.0, 407.0, 444.0, 456.0, 459.0, 407.0, 404.0, 450.0, 378.0, 465.0, 524.0, 393.0, 399.0, 359.0, 416.0, 459.0, 416.0, 462.0, 510.0, 447.0, 384.0, 467.0, 465.0, 211.0, 462.0, 450.0, 461.0, 467.0, 416.0, 410.0, 410.0, 330.0, 367.0, 456.0, 452.0, 513.0, 413.0, 402.0, 387.0, 422.0, 513.0, 456.0, 476.0, 392.0, 450.0, 468.0, 459.0, 465.0, 456.0, 435.0, 284.0, 450.0, 462.0, 462.0, 473.0, 405.0, 407.0, 476.0, 459.0, 453.0, 453.0, 416.0, 447.0, 407.0, 459.0, 447.0, 450.0, 459.0, 465.0, 347.0, 462.0, 456.0, 459.0, 458.0, 450.0, 453.0, 462.0, 465.0, 444.0, 427.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 241.0, 171.0, 170.0, 239.0, 220.0, 210.0, 195.0, 246.0, 261.0, 226.0, 221.0, 221.0, 220.0, 221.0, 249.0, 169.0, 169.0, 222.0, 188.0, 229.0, 224.0, 197.0, 205.0, 220.0, 236.0, 230.0, 223.0, 222.0, 231.0, 192.0, 206.0, 221.0, 223.0, 230.0, 229.0, 218.0, 226.0, 171.0, 170.0, 229.0, 238.0, 209.0, 198.0, 215.0, 229.0, 223.0, 233.0, 241.0, 218.0, 219.0, 188.0, 210.0, 194.0, 232.0, 218.0, 187.0, 191.0, 231.0, 234.0, 246.0, 278.0, 193.0, 200.0, 191.0, 208.0, 171.0, 188.0, 211.0, 205.0, 221.0, 238.0, 226.0, 190.0, 250.0, 212.0, 260.0, 250.0, 213.0, 234.0, 194.0, 190.0, 236.0, 231.0, 232.0, 233.0, 100.0, 111.0, 233.0, 229.0, 238.0, 212.0, 221.0, 240.0, 235.0, 232.0, 215.0, 201.0, 191.0, 219.0, 214.0, 196.0, 163.0, 167.0, 182.0, 185.0, 218.0, 238.0, 227.0, 225.0, 257.0, 256.0, 200.0, 213.0, 194.0, 208.0, 182.0, 205.0, 216.0, 206.0, 264.0, 249.0, 226.0, 230.0, 232.0, 244.0, 185.0, 207.0, 226.0, 224.0, 249.0, 219.0, 242.0, 217.0, 239.0, 226.0, 227.0, 229.0, 222.0, 213.0, 132.0, 152.0, 231.0, 219.0, 230.0, 232.0, 232.0, 230.0, 238.0, 235.0, 200.0, 205.0, 201.0, 206.0, 247.0, 229.0, 224.0, 235.0, 217.0, 236.0, 213.0, 240.0, 212.0, 204.0, 228.0, 219.0, 213.0, 194.0, 232.0, 227.0, 230.0, 217.0, 219.0, 231.0, 236.0, 223.0, 239.0, 226.0, 174.0, 173.0, 221.0, 241.0, 217.0, 239.0, 220.0, 239.0, 224.0, 234.0, 219.0, 231.0, 224.0, 229.0, 232.0, 230.0, 239.0, 226.0, 229.0, 215.0, 200.0, 227.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868064875086877, "mean_inference_ms": 1.20987618428007, "mean_action_processing_ms": 0.13288695715863633, "mean_env_wait_ms": 0.8560040849674999, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2304000, "num_agent_steps_trained": 2304000, "num_env_steps_sampled": 1152000, "num_env_steps_trained": 1152000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1152000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2304000, "timers": {"training_iteration_time_ms": 3653.028, "learn_time_ms": 1101.239, "learn_throughput": 11623.267, "synch_weights_time_ms": 10.331}, "counters": {"num_env_steps_sampled": 1152000, "num_env_steps_trained": 1152000, "num_agent_steps_sampled": 2304000, "num_agent_steps_trained": 2304000}, "done": false, "episodes_total": 2880, "training_iteration": 90, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-45", "timestamp": 1666580745, "time_this_iter_s": 3.6563451290130615, "time_total_s": 336.74424290657043, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 336.74424290657043, "timesteps_since_restore": 0, "iterations_since_restore": 90, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.96, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 152.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 134.4, "shaped_reward_min": 84, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.73, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.54, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 12.92, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 13.64, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.35, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.79, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.76, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.03, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.86, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.15, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.36, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.99, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.34, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.37, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 4.17, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.94, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.06, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.03, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.86, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.03, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.86, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.6155871579667565e-28, "cur_lr": 0.0010000000474974513, "total_loss": -0.0027565264608711004, "policy_loss": -0.0029142703860998154, "vf_loss": 7.637598514556885, "vf_explained_var": 0.6873199343681335, "kl": 0.0016575659392401576, "entropy": 1.2120314836502075, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1164800, "num_env_steps_trained": 1164800, "num_agent_steps_sampled": 2329600, "num_agent_steps_trained": 2329600}, "sampler_results": {"episode_reward_max": 516.0, "episode_reward_min": 211.0, "episode_reward_mean": 439.6, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 100.0}, "policy_reward_max": {"ppo": 265.0}, "policy_reward_mean": {"ppo": 219.8}, "custom_metrics": {"sparse_reward_mean": 152.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 134.4, "shaped_reward_min": 84, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.73, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.54, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 12.92, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 13.64, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.35, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.79, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.76, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.03, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.86, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.15, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.36, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.99, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.34, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.37, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 4.17, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.94, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.06, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.03, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.86, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.03, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.86, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [399.0, 359.0, 416.0, 459.0, 416.0, 462.0, 510.0, 447.0, 384.0, 467.0, 465.0, 211.0, 462.0, 450.0, 461.0, 467.0, 416.0, 410.0, 410.0, 330.0, 367.0, 456.0, 452.0, 513.0, 413.0, 402.0, 387.0, 422.0, 513.0, 456.0, 476.0, 392.0, 450.0, 468.0, 459.0, 465.0, 456.0, 435.0, 284.0, 450.0, 462.0, 462.0, 473.0, 405.0, 407.0, 476.0, 459.0, 453.0, 453.0, 416.0, 447.0, 407.0, 459.0, 447.0, 450.0, 459.0, 465.0, 347.0, 462.0, 456.0, 459.0, 458.0, 450.0, 453.0, 462.0, 465.0, 444.0, 427.0, 407.0, 453.0, 513.0, 473.0, 401.0, 516.0, 370.0, 470.0, 467.0, 465.0, 513.0, 447.0, 390.0, 410.0, 456.0, 456.0, 513.0, 456.0, 395.0, 450.0, 456.0, 492.0, 456.0, 416.0, 419.0, 459.0, 462.0, 453.0, 450.0, 416.0, 444.0, 416.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [191.0, 208.0, 171.0, 188.0, 211.0, 205.0, 221.0, 238.0, 226.0, 190.0, 250.0, 212.0, 260.0, 250.0, 213.0, 234.0, 194.0, 190.0, 236.0, 231.0, 232.0, 233.0, 100.0, 111.0, 233.0, 229.0, 238.0, 212.0, 221.0, 240.0, 235.0, 232.0, 215.0, 201.0, 191.0, 219.0, 214.0, 196.0, 163.0, 167.0, 182.0, 185.0, 218.0, 238.0, 227.0, 225.0, 257.0, 256.0, 200.0, 213.0, 194.0, 208.0, 182.0, 205.0, 216.0, 206.0, 264.0, 249.0, 226.0, 230.0, 232.0, 244.0, 185.0, 207.0, 226.0, 224.0, 249.0, 219.0, 242.0, 217.0, 239.0, 226.0, 227.0, 229.0, 222.0, 213.0, 132.0, 152.0, 231.0, 219.0, 230.0, 232.0, 232.0, 230.0, 238.0, 235.0, 200.0, 205.0, 201.0, 206.0, 247.0, 229.0, 224.0, 235.0, 217.0, 236.0, 213.0, 240.0, 212.0, 204.0, 228.0, 219.0, 213.0, 194.0, 232.0, 227.0, 230.0, 217.0, 219.0, 231.0, 236.0, 223.0, 239.0, 226.0, 174.0, 173.0, 221.0, 241.0, 217.0, 239.0, 220.0, 239.0, 224.0, 234.0, 219.0, 231.0, 224.0, 229.0, 232.0, 230.0, 239.0, 226.0, 229.0, 215.0, 200.0, 227.0, 208.0, 199.0, 215.0, 238.0, 258.0, 255.0, 230.0, 243.0, 206.0, 195.0, 262.0, 254.0, 197.0, 173.0, 228.0, 242.0, 234.0, 233.0, 234.0, 231.0, 252.0, 261.0, 221.0, 226.0, 194.0, 196.0, 198.0, 212.0, 244.0, 212.0, 220.0, 236.0, 248.0, 265.0, 218.0, 238.0, 216.0, 179.0, 216.0, 234.0, 229.0, 227.0, 250.0, 242.0, 224.0, 232.0, 195.0, 221.0, 208.0, 211.0, 240.0, 219.0, 235.0, 227.0, 220.0, 233.0, 228.0, 222.0, 210.0, 206.0, 217.0, 227.0, 211.0, 205.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6867829633180885, "mean_inference_ms": 1.2097543508448176, "mean_action_processing_ms": 0.13288373966344313, "mean_env_wait_ms": 0.8555906230183881, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 516.0, "episode_reward_min": 211.0, "episode_reward_mean": 439.6, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 100.0}, "policy_reward_max": {"ppo": 265.0}, "policy_reward_mean": {"ppo": 219.8}, "hist_stats": {"episode_reward": [399.0, 359.0, 416.0, 459.0, 416.0, 462.0, 510.0, 447.0, 384.0, 467.0, 465.0, 211.0, 462.0, 450.0, 461.0, 467.0, 416.0, 410.0, 410.0, 330.0, 367.0, 456.0, 452.0, 513.0, 413.0, 402.0, 387.0, 422.0, 513.0, 456.0, 476.0, 392.0, 450.0, 468.0, 459.0, 465.0, 456.0, 435.0, 284.0, 450.0, 462.0, 462.0, 473.0, 405.0, 407.0, 476.0, 459.0, 453.0, 453.0, 416.0, 447.0, 407.0, 459.0, 447.0, 450.0, 459.0, 465.0, 347.0, 462.0, 456.0, 459.0, 458.0, 450.0, 453.0, 462.0, 465.0, 444.0, 427.0, 407.0, 453.0, 513.0, 473.0, 401.0, 516.0, 370.0, 470.0, 467.0, 465.0, 513.0, 447.0, 390.0, 410.0, 456.0, 456.0, 513.0, 456.0, 395.0, 450.0, 456.0, 492.0, 456.0, 416.0, 419.0, 459.0, 462.0, 453.0, 450.0, 416.0, 444.0, 416.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [191.0, 208.0, 171.0, 188.0, 211.0, 205.0, 221.0, 238.0, 226.0, 190.0, 250.0, 212.0, 260.0, 250.0, 213.0, 234.0, 194.0, 190.0, 236.0, 231.0, 232.0, 233.0, 100.0, 111.0, 233.0, 229.0, 238.0, 212.0, 221.0, 240.0, 235.0, 232.0, 215.0, 201.0, 191.0, 219.0, 214.0, 196.0, 163.0, 167.0, 182.0, 185.0, 218.0, 238.0, 227.0, 225.0, 257.0, 256.0, 200.0, 213.0, 194.0, 208.0, 182.0, 205.0, 216.0, 206.0, 264.0, 249.0, 226.0, 230.0, 232.0, 244.0, 185.0, 207.0, 226.0, 224.0, 249.0, 219.0, 242.0, 217.0, 239.0, 226.0, 227.0, 229.0, 222.0, 213.0, 132.0, 152.0, 231.0, 219.0, 230.0, 232.0, 232.0, 230.0, 238.0, 235.0, 200.0, 205.0, 201.0, 206.0, 247.0, 229.0, 224.0, 235.0, 217.0, 236.0, 213.0, 240.0, 212.0, 204.0, 228.0, 219.0, 213.0, 194.0, 232.0, 227.0, 230.0, 217.0, 219.0, 231.0, 236.0, 223.0, 239.0, 226.0, 174.0, 173.0, 221.0, 241.0, 217.0, 239.0, 220.0, 239.0, 224.0, 234.0, 219.0, 231.0, 224.0, 229.0, 232.0, 230.0, 239.0, 226.0, 229.0, 215.0, 200.0, 227.0, 208.0, 199.0, 215.0, 238.0, 258.0, 255.0, 230.0, 243.0, 206.0, 195.0, 262.0, 254.0, 197.0, 173.0, 228.0, 242.0, 234.0, 233.0, 234.0, 231.0, 252.0, 261.0, 221.0, 226.0, 194.0, 196.0, 198.0, 212.0, 244.0, 212.0, 220.0, 236.0, 248.0, 265.0, 218.0, 238.0, 216.0, 179.0, 216.0, 234.0, 229.0, 227.0, 250.0, 242.0, 224.0, 232.0, 195.0, 221.0, 208.0, 211.0, 240.0, 219.0, 235.0, 227.0, 220.0, 233.0, 228.0, 222.0, 210.0, 206.0, 217.0, 227.0, 211.0, 205.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6867829633180885, "mean_inference_ms": 1.2097543508448176, "mean_action_processing_ms": 0.13288373966344313, "mean_env_wait_ms": 0.8555906230183881, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2329600, "num_agent_steps_trained": 2329600, "num_env_steps_sampled": 1164800, "num_env_steps_trained": 1164800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1164800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2329600, "timers": {"training_iteration_time_ms": 3657.266, "learn_time_ms": 1100.033, "learn_throughput": 11636.013, "synch_weights_time_ms": 10.542}, "counters": {"num_env_steps_sampled": 1164800, "num_env_steps_trained": 1164800, "num_agent_steps_sampled": 2329600, "num_agent_steps_trained": 2329600}, "done": false, "episodes_total": 2912, "training_iteration": 91, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-49", "timestamp": 1666580749, "time_this_iter_s": 3.698246955871582, "time_total_s": 340.442489862442, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 340.442489862442, "timesteps_since_restore": 0, "iterations_since_restore": 91, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.766666666666666, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 155.2, "sparse_reward_min": 80, "sparse_reward_max": 180, "shaped_reward_mean": 133.87, "shaped_reward_min": 68, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.84, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.23, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 13.03, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.57, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.97, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.78, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.53, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 12.17, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.85, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.54, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.23, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.99, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.44, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.46, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 4.19, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.84, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.18, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.76, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.17, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.85, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.17, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.85, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 8.077935789833782e-29, "cur_lr": 0.0010000000474974513, "total_loss": -0.002236593747511506, "policy_loss": -0.0023881965316832066, "vf_loss": 7.572493553161621, "vf_explained_var": 0.7193116545677185, "kl": 0.0020622664596885443, "entropy": 1.211289405822754, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1177600, "num_env_steps_trained": 1177600, "num_agent_steps_sampled": 2355200, "num_agent_steps_trained": 2355200}, "sampler_results": {"episode_reward_max": 516.0, "episode_reward_min": 228.0, "episode_reward_mean": 444.27, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 111.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 222.135}, "custom_metrics": {"sparse_reward_mean": 155.2, "sparse_reward_min": 80, "sparse_reward_max": 180, "shaped_reward_mean": 133.87, "shaped_reward_min": 68, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.84, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.23, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 13.03, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.57, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.97, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.78, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.53, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 12.17, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.85, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.54, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.23, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.99, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.44, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.46, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 4.19, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.84, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.18, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.76, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.17, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.85, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.17, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.85, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [450.0, 468.0, 459.0, 465.0, 456.0, 435.0, 284.0, 450.0, 462.0, 462.0, 473.0, 405.0, 407.0, 476.0, 459.0, 453.0, 453.0, 416.0, 447.0, 407.0, 459.0, 447.0, 450.0, 459.0, 465.0, 347.0, 462.0, 456.0, 459.0, 458.0, 450.0, 453.0, 462.0, 465.0, 444.0, 427.0, 407.0, 453.0, 513.0, 473.0, 401.0, 516.0, 370.0, 470.0, 467.0, 465.0, 513.0, 447.0, 390.0, 410.0, 456.0, 456.0, 513.0, 456.0, 395.0, 450.0, 456.0, 492.0, 456.0, 416.0, 419.0, 459.0, 462.0, 453.0, 450.0, 416.0, 444.0, 416.0, 453.0, 450.0, 513.0, 408.0, 516.0, 456.0, 476.0, 444.0, 296.0, 419.0, 413.0, 441.0, 438.0, 516.0, 462.0, 453.0, 456.0, 399.0, 447.0, 465.0, 459.0, 450.0, 465.0, 432.0, 228.0, 450.0, 393.0, 501.0, 444.0, 456.0, 459.0, 459.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [226.0, 224.0, 249.0, 219.0, 242.0, 217.0, 239.0, 226.0, 227.0, 229.0, 222.0, 213.0, 132.0, 152.0, 231.0, 219.0, 230.0, 232.0, 232.0, 230.0, 238.0, 235.0, 200.0, 205.0, 201.0, 206.0, 247.0, 229.0, 224.0, 235.0, 217.0, 236.0, 213.0, 240.0, 212.0, 204.0, 228.0, 219.0, 213.0, 194.0, 232.0, 227.0, 230.0, 217.0, 219.0, 231.0, 236.0, 223.0, 239.0, 226.0, 174.0, 173.0, 221.0, 241.0, 217.0, 239.0, 220.0, 239.0, 224.0, 234.0, 219.0, 231.0, 224.0, 229.0, 232.0, 230.0, 239.0, 226.0, 229.0, 215.0, 200.0, 227.0, 208.0, 199.0, 215.0, 238.0, 258.0, 255.0, 230.0, 243.0, 206.0, 195.0, 262.0, 254.0, 197.0, 173.0, 228.0, 242.0, 234.0, 233.0, 234.0, 231.0, 252.0, 261.0, 221.0, 226.0, 194.0, 196.0, 198.0, 212.0, 244.0, 212.0, 220.0, 236.0, 248.0, 265.0, 218.0, 238.0, 216.0, 179.0, 216.0, 234.0, 229.0, 227.0, 250.0, 242.0, 224.0, 232.0, 195.0, 221.0, 208.0, 211.0, 240.0, 219.0, 235.0, 227.0, 220.0, 233.0, 228.0, 222.0, 210.0, 206.0, 217.0, 227.0, 211.0, 205.0, 217.0, 236.0, 226.0, 224.0, 258.0, 255.0, 209.0, 199.0, 256.0, 260.0, 235.0, 221.0, 238.0, 238.0, 223.0, 221.0, 150.0, 146.0, 216.0, 203.0, 197.0, 216.0, 228.0, 213.0, 225.0, 213.0, 246.0, 270.0, 226.0, 236.0, 231.0, 222.0, 234.0, 222.0, 197.0, 202.0, 218.0, 229.0, 241.0, 224.0, 229.0, 230.0, 226.0, 224.0, 233.0, 232.0, 217.0, 215.0, 117.0, 111.0, 220.0, 230.0, 197.0, 196.0, 258.0, 243.0, 219.0, 225.0, 236.0, 220.0, 254.0, 205.0, 214.0, 245.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6867800256071077, "mean_inference_ms": 1.2096947134492912, "mean_action_processing_ms": 0.13288857442312746, "mean_env_wait_ms": 0.8552171842142949, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 516.0, "episode_reward_min": 228.0, "episode_reward_mean": 444.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 111.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 222.135}, "hist_stats": {"episode_reward": [450.0, 468.0, 459.0, 465.0, 456.0, 435.0, 284.0, 450.0, 462.0, 462.0, 473.0, 405.0, 407.0, 476.0, 459.0, 453.0, 453.0, 416.0, 447.0, 407.0, 459.0, 447.0, 450.0, 459.0, 465.0, 347.0, 462.0, 456.0, 459.0, 458.0, 450.0, 453.0, 462.0, 465.0, 444.0, 427.0, 407.0, 453.0, 513.0, 473.0, 401.0, 516.0, 370.0, 470.0, 467.0, 465.0, 513.0, 447.0, 390.0, 410.0, 456.0, 456.0, 513.0, 456.0, 395.0, 450.0, 456.0, 492.0, 456.0, 416.0, 419.0, 459.0, 462.0, 453.0, 450.0, 416.0, 444.0, 416.0, 453.0, 450.0, 513.0, 408.0, 516.0, 456.0, 476.0, 444.0, 296.0, 419.0, 413.0, 441.0, 438.0, 516.0, 462.0, 453.0, 456.0, 399.0, 447.0, 465.0, 459.0, 450.0, 465.0, 432.0, 228.0, 450.0, 393.0, 501.0, 444.0, 456.0, 459.0, 459.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [226.0, 224.0, 249.0, 219.0, 242.0, 217.0, 239.0, 226.0, 227.0, 229.0, 222.0, 213.0, 132.0, 152.0, 231.0, 219.0, 230.0, 232.0, 232.0, 230.0, 238.0, 235.0, 200.0, 205.0, 201.0, 206.0, 247.0, 229.0, 224.0, 235.0, 217.0, 236.0, 213.0, 240.0, 212.0, 204.0, 228.0, 219.0, 213.0, 194.0, 232.0, 227.0, 230.0, 217.0, 219.0, 231.0, 236.0, 223.0, 239.0, 226.0, 174.0, 173.0, 221.0, 241.0, 217.0, 239.0, 220.0, 239.0, 224.0, 234.0, 219.0, 231.0, 224.0, 229.0, 232.0, 230.0, 239.0, 226.0, 229.0, 215.0, 200.0, 227.0, 208.0, 199.0, 215.0, 238.0, 258.0, 255.0, 230.0, 243.0, 206.0, 195.0, 262.0, 254.0, 197.0, 173.0, 228.0, 242.0, 234.0, 233.0, 234.0, 231.0, 252.0, 261.0, 221.0, 226.0, 194.0, 196.0, 198.0, 212.0, 244.0, 212.0, 220.0, 236.0, 248.0, 265.0, 218.0, 238.0, 216.0, 179.0, 216.0, 234.0, 229.0, 227.0, 250.0, 242.0, 224.0, 232.0, 195.0, 221.0, 208.0, 211.0, 240.0, 219.0, 235.0, 227.0, 220.0, 233.0, 228.0, 222.0, 210.0, 206.0, 217.0, 227.0, 211.0, 205.0, 217.0, 236.0, 226.0, 224.0, 258.0, 255.0, 209.0, 199.0, 256.0, 260.0, 235.0, 221.0, 238.0, 238.0, 223.0, 221.0, 150.0, 146.0, 216.0, 203.0, 197.0, 216.0, 228.0, 213.0, 225.0, 213.0, 246.0, 270.0, 226.0, 236.0, 231.0, 222.0, 234.0, 222.0, 197.0, 202.0, 218.0, 229.0, 241.0, 224.0, 229.0, 230.0, 226.0, 224.0, 233.0, 232.0, 217.0, 215.0, 117.0, 111.0, 220.0, 230.0, 197.0, 196.0, 258.0, 243.0, 219.0, 225.0, 236.0, 220.0, 254.0, 205.0, 214.0, 245.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6867800256071077, "mean_inference_ms": 1.2096947134492912, "mean_action_processing_ms": 0.13288857442312746, "mean_env_wait_ms": 0.8552171842142949, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2355200, "num_agent_steps_trained": 2355200, "num_env_steps_sampled": 1177600, "num_env_steps_trained": 1177600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1177600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2355200, "timers": {"training_iteration_time_ms": 3662.618, "learn_time_ms": 1103.205, "learn_throughput": 11602.554, "synch_weights_time_ms": 10.56}, "counters": {"num_env_steps_sampled": 1177600, "num_env_steps_trained": 1177600, "num_agent_steps_sampled": 2355200, "num_agent_steps_trained": 2355200}, "done": false, "episodes_total": 2944, "training_iteration": 92, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-53", "timestamp": 1666580753, "time_this_iter_s": 3.739091634750366, "time_total_s": 344.1815814971924, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 344.1815814971924, "timesteps_since_restore": 0, "iterations_since_restore": 92, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.840000000000003, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 156.2, "sparse_reward_min": 80, "sparse_reward_max": 180, "shaped_reward_mean": 134.23, "shaped_reward_min": 68, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.39, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.53, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 12.64, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.9, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.93, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.73, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.47, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.82, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 13.21, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.85, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.47, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.4, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 4.36, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.71, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.35, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.63, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 11.82, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 13.21, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.82, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 13.21, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 4.038967894916891e-29, "cur_lr": 0.0010000000474974513, "total_loss": -3.875233232975006e-06, "policy_loss": -0.00017003831453621387, "vf_loss": 7.67985725402832, "vf_explained_var": 0.6588976383209229, "kl": 0.0017944644205272198, "entropy": 1.2036434412002563, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1190400, "num_env_steps_trained": 1190400, "num_agent_steps_sampled": 2380800, "num_agent_steps_trained": 2380800}, "sampler_results": {"episode_reward_max": 516.0, "episode_reward_min": 228.0, "episode_reward_mean": 446.63, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 111.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 223.315}, "custom_metrics": {"sparse_reward_mean": 156.2, "sparse_reward_min": 80, "sparse_reward_max": 180, "shaped_reward_mean": 134.23, "shaped_reward_min": 68, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.39, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.53, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 12.64, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.9, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.93, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.73, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.47, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.82, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 13.21, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.85, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.47, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.4, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 4.36, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.71, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.35, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.63, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 11.82, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 13.21, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.82, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 13.21, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [462.0, 465.0, 444.0, 427.0, 407.0, 453.0, 513.0, 473.0, 401.0, 516.0, 370.0, 470.0, 467.0, 465.0, 513.0, 447.0, 390.0, 410.0, 456.0, 456.0, 513.0, 456.0, 395.0, 450.0, 456.0, 492.0, 456.0, 416.0, 419.0, 459.0, 462.0, 453.0, 450.0, 416.0, 444.0, 416.0, 453.0, 450.0, 513.0, 408.0, 516.0, 456.0, 476.0, 444.0, 296.0, 419.0, 413.0, 441.0, 438.0, 516.0, 462.0, 453.0, 456.0, 399.0, 447.0, 465.0, 459.0, 450.0, 465.0, 432.0, 228.0, 450.0, 393.0, 501.0, 444.0, 456.0, 459.0, 459.0, 453.0, 456.0, 416.0, 456.0, 441.0, 459.0, 464.0, 404.0, 459.0, 408.0, 387.0, 456.0, 462.0, 444.0, 402.0, 456.0, 507.0, 456.0, 356.0, 516.0, 470.0, 462.0, 453.0, 413.0, 456.0, 510.0, 510.0, 456.0, 390.0, 462.0, 444.0, 504.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [232.0, 230.0, 239.0, 226.0, 229.0, 215.0, 200.0, 227.0, 208.0, 199.0, 215.0, 238.0, 258.0, 255.0, 230.0, 243.0, 206.0, 195.0, 262.0, 254.0, 197.0, 173.0, 228.0, 242.0, 234.0, 233.0, 234.0, 231.0, 252.0, 261.0, 221.0, 226.0, 194.0, 196.0, 198.0, 212.0, 244.0, 212.0, 220.0, 236.0, 248.0, 265.0, 218.0, 238.0, 216.0, 179.0, 216.0, 234.0, 229.0, 227.0, 250.0, 242.0, 224.0, 232.0, 195.0, 221.0, 208.0, 211.0, 240.0, 219.0, 235.0, 227.0, 220.0, 233.0, 228.0, 222.0, 210.0, 206.0, 217.0, 227.0, 211.0, 205.0, 217.0, 236.0, 226.0, 224.0, 258.0, 255.0, 209.0, 199.0, 256.0, 260.0, 235.0, 221.0, 238.0, 238.0, 223.0, 221.0, 150.0, 146.0, 216.0, 203.0, 197.0, 216.0, 228.0, 213.0, 225.0, 213.0, 246.0, 270.0, 226.0, 236.0, 231.0, 222.0, 234.0, 222.0, 197.0, 202.0, 218.0, 229.0, 241.0, 224.0, 229.0, 230.0, 226.0, 224.0, 233.0, 232.0, 217.0, 215.0, 117.0, 111.0, 220.0, 230.0, 197.0, 196.0, 258.0, 243.0, 219.0, 225.0, 236.0, 220.0, 254.0, 205.0, 214.0, 245.0, 226.0, 227.0, 229.0, 227.0, 221.0, 195.0, 222.0, 234.0, 229.0, 212.0, 229.0, 230.0, 232.0, 232.0, 190.0, 214.0, 239.0, 220.0, 205.0, 203.0, 193.0, 194.0, 223.0, 233.0, 232.0, 230.0, 201.0, 243.0, 200.0, 202.0, 228.0, 228.0, 254.0, 253.0, 240.0, 216.0, 178.0, 178.0, 258.0, 258.0, 233.0, 237.0, 234.0, 228.0, 233.0, 220.0, 204.0, 209.0, 244.0, 212.0, 253.0, 257.0, 258.0, 252.0, 233.0, 223.0, 184.0, 206.0, 226.0, 236.0, 227.0, 217.0, 242.0, 262.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6867222139007848, "mean_inference_ms": 1.2095571930483882, "mean_action_processing_ms": 0.13287619607452736, "mean_env_wait_ms": 0.8547307612516322, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 516.0, "episode_reward_min": 228.0, "episode_reward_mean": 446.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 111.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 223.315}, "hist_stats": {"episode_reward": [462.0, 465.0, 444.0, 427.0, 407.0, 453.0, 513.0, 473.0, 401.0, 516.0, 370.0, 470.0, 467.0, 465.0, 513.0, 447.0, 390.0, 410.0, 456.0, 456.0, 513.0, 456.0, 395.0, 450.0, 456.0, 492.0, 456.0, 416.0, 419.0, 459.0, 462.0, 453.0, 450.0, 416.0, 444.0, 416.0, 453.0, 450.0, 513.0, 408.0, 516.0, 456.0, 476.0, 444.0, 296.0, 419.0, 413.0, 441.0, 438.0, 516.0, 462.0, 453.0, 456.0, 399.0, 447.0, 465.0, 459.0, 450.0, 465.0, 432.0, 228.0, 450.0, 393.0, 501.0, 444.0, 456.0, 459.0, 459.0, 453.0, 456.0, 416.0, 456.0, 441.0, 459.0, 464.0, 404.0, 459.0, 408.0, 387.0, 456.0, 462.0, 444.0, 402.0, 456.0, 507.0, 456.0, 356.0, 516.0, 470.0, 462.0, 453.0, 413.0, 456.0, 510.0, 510.0, 456.0, 390.0, 462.0, 444.0, 504.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [232.0, 230.0, 239.0, 226.0, 229.0, 215.0, 200.0, 227.0, 208.0, 199.0, 215.0, 238.0, 258.0, 255.0, 230.0, 243.0, 206.0, 195.0, 262.0, 254.0, 197.0, 173.0, 228.0, 242.0, 234.0, 233.0, 234.0, 231.0, 252.0, 261.0, 221.0, 226.0, 194.0, 196.0, 198.0, 212.0, 244.0, 212.0, 220.0, 236.0, 248.0, 265.0, 218.0, 238.0, 216.0, 179.0, 216.0, 234.0, 229.0, 227.0, 250.0, 242.0, 224.0, 232.0, 195.0, 221.0, 208.0, 211.0, 240.0, 219.0, 235.0, 227.0, 220.0, 233.0, 228.0, 222.0, 210.0, 206.0, 217.0, 227.0, 211.0, 205.0, 217.0, 236.0, 226.0, 224.0, 258.0, 255.0, 209.0, 199.0, 256.0, 260.0, 235.0, 221.0, 238.0, 238.0, 223.0, 221.0, 150.0, 146.0, 216.0, 203.0, 197.0, 216.0, 228.0, 213.0, 225.0, 213.0, 246.0, 270.0, 226.0, 236.0, 231.0, 222.0, 234.0, 222.0, 197.0, 202.0, 218.0, 229.0, 241.0, 224.0, 229.0, 230.0, 226.0, 224.0, 233.0, 232.0, 217.0, 215.0, 117.0, 111.0, 220.0, 230.0, 197.0, 196.0, 258.0, 243.0, 219.0, 225.0, 236.0, 220.0, 254.0, 205.0, 214.0, 245.0, 226.0, 227.0, 229.0, 227.0, 221.0, 195.0, 222.0, 234.0, 229.0, 212.0, 229.0, 230.0, 232.0, 232.0, 190.0, 214.0, 239.0, 220.0, 205.0, 203.0, 193.0, 194.0, 223.0, 233.0, 232.0, 230.0, 201.0, 243.0, 200.0, 202.0, 228.0, 228.0, 254.0, 253.0, 240.0, 216.0, 178.0, 178.0, 258.0, 258.0, 233.0, 237.0, 234.0, 228.0, 233.0, 220.0, 204.0, 209.0, 244.0, 212.0, 253.0, 257.0, 258.0, 252.0, 233.0, 223.0, 184.0, 206.0, 226.0, 236.0, 227.0, 217.0, 242.0, 262.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6867222139007848, "mean_inference_ms": 1.2095571930483882, "mean_action_processing_ms": 0.13287619607452736, "mean_env_wait_ms": 0.8547307612516322, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2380800, "num_agent_steps_trained": 2380800, "num_env_steps_sampled": 1190400, "num_env_steps_trained": 1190400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1190400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2380800, "timers": {"training_iteration_time_ms": 3671.226, "learn_time_ms": 1120.3, "learn_throughput": 11425.515, "synch_weights_time_ms": 10.939}, "counters": {"num_env_steps_sampled": 1190400, "num_env_steps_trained": 1190400, "num_agent_steps_sampled": 2380800, "num_agent_steps_trained": 2380800}, "done": false, "episodes_total": 2976, "training_iteration": 93, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-57", "timestamp": 1666580757, "time_this_iter_s": 3.6920645236968994, "time_total_s": 347.8736460208893, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 347.8736460208893, "timesteps_since_restore": 0, "iterations_since_restore": 93, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.63333333333333, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 157.0, "sparse_reward_min": 80, "sparse_reward_max": 180, "shaped_reward_mean": 134.47, "shaped_reward_min": 68, "shaped_reward_max": 159, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.69, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.28, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 12.85, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.57, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.79, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.61, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 12.11, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.95, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.4, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.28, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.76, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.27, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.72, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.11, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.95, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.11, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.95, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.0194839474584456e-29, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005498485988937318, "policy_loss": 0.00038948917062953115, "vf_loss": 7.58758544921875, "vf_explained_var": 0.6836056709289551, "kl": 0.0016990273725241423, "entropy": 1.1967947483062744, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1203200, "num_env_steps_trained": 1203200, "num_agent_steps_sampled": 2406400, "num_agent_steps_trained": 2406400}, "sampler_results": {"episode_reward_max": 519.0, "episode_reward_min": 228.0, "episode_reward_mean": 448.47, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 111.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 224.235}, "custom_metrics": {"sparse_reward_mean": 157.0, "sparse_reward_min": 80, "sparse_reward_max": 180, "shaped_reward_mean": 134.47, "shaped_reward_min": 68, "shaped_reward_max": 159, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.69, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.28, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 12.85, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.57, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.79, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.61, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 12.11, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.95, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.4, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.28, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.76, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.27, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.72, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.11, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.95, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.11, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.95, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [450.0, 416.0, 444.0, 416.0, 453.0, 450.0, 513.0, 408.0, 516.0, 456.0, 476.0, 444.0, 296.0, 419.0, 413.0, 441.0, 438.0, 516.0, 462.0, 453.0, 456.0, 399.0, 447.0, 465.0, 459.0, 450.0, 465.0, 432.0, 228.0, 450.0, 393.0, 501.0, 444.0, 456.0, 459.0, 459.0, 453.0, 456.0, 416.0, 456.0, 441.0, 459.0, 464.0, 404.0, 459.0, 408.0, 387.0, 456.0, 462.0, 444.0, 402.0, 456.0, 507.0, 456.0, 356.0, 516.0, 470.0, 462.0, 453.0, 413.0, 456.0, 510.0, 510.0, 456.0, 390.0, 462.0, 444.0, 504.0, 473.0, 453.0, 459.0, 510.0, 516.0, 465.0, 513.0, 435.0, 470.0, 519.0, 453.0, 398.0, 456.0, 459.0, 422.0, 453.0, 396.0, 462.0, 456.0, 456.0, 453.0, 450.0, 516.0, 516.0, 302.0, 456.0, 456.0, 456.0, 453.0, 404.0, 465.0, 465.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [228.0, 222.0, 210.0, 206.0, 217.0, 227.0, 211.0, 205.0, 217.0, 236.0, 226.0, 224.0, 258.0, 255.0, 209.0, 199.0, 256.0, 260.0, 235.0, 221.0, 238.0, 238.0, 223.0, 221.0, 150.0, 146.0, 216.0, 203.0, 197.0, 216.0, 228.0, 213.0, 225.0, 213.0, 246.0, 270.0, 226.0, 236.0, 231.0, 222.0, 234.0, 222.0, 197.0, 202.0, 218.0, 229.0, 241.0, 224.0, 229.0, 230.0, 226.0, 224.0, 233.0, 232.0, 217.0, 215.0, 117.0, 111.0, 220.0, 230.0, 197.0, 196.0, 258.0, 243.0, 219.0, 225.0, 236.0, 220.0, 254.0, 205.0, 214.0, 245.0, 226.0, 227.0, 229.0, 227.0, 221.0, 195.0, 222.0, 234.0, 229.0, 212.0, 229.0, 230.0, 232.0, 232.0, 190.0, 214.0, 239.0, 220.0, 205.0, 203.0, 193.0, 194.0, 223.0, 233.0, 232.0, 230.0, 201.0, 243.0, 200.0, 202.0, 228.0, 228.0, 254.0, 253.0, 240.0, 216.0, 178.0, 178.0, 258.0, 258.0, 233.0, 237.0, 234.0, 228.0, 233.0, 220.0, 204.0, 209.0, 244.0, 212.0, 253.0, 257.0, 258.0, 252.0, 233.0, 223.0, 184.0, 206.0, 226.0, 236.0, 227.0, 217.0, 242.0, 262.0, 237.0, 236.0, 233.0, 220.0, 224.0, 235.0, 239.0, 271.0, 251.0, 265.0, 234.0, 231.0, 254.0, 259.0, 208.0, 227.0, 230.0, 240.0, 270.0, 249.0, 211.0, 242.0, 199.0, 199.0, 245.0, 211.0, 226.0, 233.0, 207.0, 215.0, 228.0, 225.0, 213.0, 183.0, 244.0, 218.0, 240.0, 216.0, 232.0, 224.0, 226.0, 227.0, 230.0, 220.0, 259.0, 257.0, 256.0, 260.0, 146.0, 156.0, 218.0, 238.0, 229.0, 227.0, 239.0, 217.0, 227.0, 226.0, 200.0, 204.0, 236.0, 229.0, 236.0, 229.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.686638822903419, "mean_inference_ms": 1.2093328801666652, "mean_action_processing_ms": 0.13286058550342847, "mean_env_wait_ms": 0.854187887715765, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 519.0, "episode_reward_min": 228.0, "episode_reward_mean": 448.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 111.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 224.235}, "hist_stats": {"episode_reward": [450.0, 416.0, 444.0, 416.0, 453.0, 450.0, 513.0, 408.0, 516.0, 456.0, 476.0, 444.0, 296.0, 419.0, 413.0, 441.0, 438.0, 516.0, 462.0, 453.0, 456.0, 399.0, 447.0, 465.0, 459.0, 450.0, 465.0, 432.0, 228.0, 450.0, 393.0, 501.0, 444.0, 456.0, 459.0, 459.0, 453.0, 456.0, 416.0, 456.0, 441.0, 459.0, 464.0, 404.0, 459.0, 408.0, 387.0, 456.0, 462.0, 444.0, 402.0, 456.0, 507.0, 456.0, 356.0, 516.0, 470.0, 462.0, 453.0, 413.0, 456.0, 510.0, 510.0, 456.0, 390.0, 462.0, 444.0, 504.0, 473.0, 453.0, 459.0, 510.0, 516.0, 465.0, 513.0, 435.0, 470.0, 519.0, 453.0, 398.0, 456.0, 459.0, 422.0, 453.0, 396.0, 462.0, 456.0, 456.0, 453.0, 450.0, 516.0, 516.0, 302.0, 456.0, 456.0, 456.0, 453.0, 404.0, 465.0, 465.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [228.0, 222.0, 210.0, 206.0, 217.0, 227.0, 211.0, 205.0, 217.0, 236.0, 226.0, 224.0, 258.0, 255.0, 209.0, 199.0, 256.0, 260.0, 235.0, 221.0, 238.0, 238.0, 223.0, 221.0, 150.0, 146.0, 216.0, 203.0, 197.0, 216.0, 228.0, 213.0, 225.0, 213.0, 246.0, 270.0, 226.0, 236.0, 231.0, 222.0, 234.0, 222.0, 197.0, 202.0, 218.0, 229.0, 241.0, 224.0, 229.0, 230.0, 226.0, 224.0, 233.0, 232.0, 217.0, 215.0, 117.0, 111.0, 220.0, 230.0, 197.0, 196.0, 258.0, 243.0, 219.0, 225.0, 236.0, 220.0, 254.0, 205.0, 214.0, 245.0, 226.0, 227.0, 229.0, 227.0, 221.0, 195.0, 222.0, 234.0, 229.0, 212.0, 229.0, 230.0, 232.0, 232.0, 190.0, 214.0, 239.0, 220.0, 205.0, 203.0, 193.0, 194.0, 223.0, 233.0, 232.0, 230.0, 201.0, 243.0, 200.0, 202.0, 228.0, 228.0, 254.0, 253.0, 240.0, 216.0, 178.0, 178.0, 258.0, 258.0, 233.0, 237.0, 234.0, 228.0, 233.0, 220.0, 204.0, 209.0, 244.0, 212.0, 253.0, 257.0, 258.0, 252.0, 233.0, 223.0, 184.0, 206.0, 226.0, 236.0, 227.0, 217.0, 242.0, 262.0, 237.0, 236.0, 233.0, 220.0, 224.0, 235.0, 239.0, 271.0, 251.0, 265.0, 234.0, 231.0, 254.0, 259.0, 208.0, 227.0, 230.0, 240.0, 270.0, 249.0, 211.0, 242.0, 199.0, 199.0, 245.0, 211.0, 226.0, 233.0, 207.0, 215.0, 228.0, 225.0, 213.0, 183.0, 244.0, 218.0, 240.0, 216.0, 232.0, 224.0, 226.0, 227.0, 230.0, 220.0, 259.0, 257.0, 256.0, 260.0, 146.0, 156.0, 218.0, 238.0, 229.0, 227.0, 239.0, 217.0, 227.0, 226.0, 200.0, 204.0, 236.0, 229.0, 236.0, 229.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.686638822903419, "mean_inference_ms": 1.2093328801666652, "mean_action_processing_ms": 0.13286058550342847, "mean_env_wait_ms": 0.854187887715765, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2406400, "num_agent_steps_trained": 2406400, "num_env_steps_sampled": 1203200, "num_env_steps_trained": 1203200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1203200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2406400, "timers": {"training_iteration_time_ms": 3677.405, "learn_time_ms": 1128.573, "learn_throughput": 11341.759, "synch_weights_time_ms": 11.522}, "counters": {"num_env_steps_sampled": 1203200, "num_env_steps_trained": 1203200, "num_agent_steps_sampled": 2406400, "num_agent_steps_trained": 2406400}, "done": false, "episodes_total": 3008, "training_iteration": 94, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-01", "timestamp": 1666580761, "time_this_iter_s": 3.6629152297973633, "time_total_s": 351.53656125068665, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 351.53656125068665, "timesteps_since_restore": 0, "iterations_since_restore": 94, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.660000000000004, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 157.8, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 136.24, "shaped_reward_min": 79, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.0, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.13, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 13.2, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.49, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.96, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.77, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.55, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 12.43, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.84, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 3.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.22, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.91, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.31, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.29, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.25, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.87, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.19, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.83, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.43, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.84, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.43, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.84, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.0097419737292228e-29, "cur_lr": 0.0010000000474974513, "total_loss": 5.3522700909525156e-05, "policy_loss": -0.00010001769987866282, "vf_loss": 7.527695655822754, "vf_explained_var": 0.6990654468536377, "kl": 0.0018153074197471142, "entropy": 1.1984570026397705, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1216000, "num_env_steps_trained": 1216000, "num_agent_steps_sampled": 2432000, "num_agent_steps_trained": 2432000}, "sampler_results": {"episode_reward_max": 522.0, "episode_reward_min": 210.0, "episode_reward_mean": 451.84, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 96.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 225.92}, "custom_metrics": {"sparse_reward_mean": 157.8, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 136.24, "shaped_reward_min": 79, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.0, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.13, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 13.2, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.49, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.96, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.77, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.55, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 12.43, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.84, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 3.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.22, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.91, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.31, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.29, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.25, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.87, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.19, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.83, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.43, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.84, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.43, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.84, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [444.0, 456.0, 459.0, 459.0, 453.0, 456.0, 416.0, 456.0, 441.0, 459.0, 464.0, 404.0, 459.0, 408.0, 387.0, 456.0, 462.0, 444.0, 402.0, 456.0, 507.0, 456.0, 356.0, 516.0, 470.0, 462.0, 453.0, 413.0, 456.0, 510.0, 510.0, 456.0, 390.0, 462.0, 444.0, 504.0, 473.0, 453.0, 459.0, 510.0, 516.0, 465.0, 513.0, 435.0, 470.0, 519.0, 453.0, 398.0, 456.0, 459.0, 422.0, 453.0, 396.0, 462.0, 456.0, 456.0, 453.0, 450.0, 516.0, 516.0, 302.0, 456.0, 456.0, 456.0, 453.0, 404.0, 465.0, 465.0, 516.0, 210.0, 459.0, 473.0, 459.0, 513.0, 450.0, 447.0, 522.0, 513.0, 462.0, 450.0, 461.0, 456.0, 459.0, 402.0, 462.0, 459.0, 510.0, 416.0, 441.0, 510.0, 450.0, 408.0, 459.0, 453.0, 462.0, 470.0, 239.0, 450.0, 465.0, 456.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [219.0, 225.0, 236.0, 220.0, 254.0, 205.0, 214.0, 245.0, 226.0, 227.0, 229.0, 227.0, 221.0, 195.0, 222.0, 234.0, 229.0, 212.0, 229.0, 230.0, 232.0, 232.0, 190.0, 214.0, 239.0, 220.0, 205.0, 203.0, 193.0, 194.0, 223.0, 233.0, 232.0, 230.0, 201.0, 243.0, 200.0, 202.0, 228.0, 228.0, 254.0, 253.0, 240.0, 216.0, 178.0, 178.0, 258.0, 258.0, 233.0, 237.0, 234.0, 228.0, 233.0, 220.0, 204.0, 209.0, 244.0, 212.0, 253.0, 257.0, 258.0, 252.0, 233.0, 223.0, 184.0, 206.0, 226.0, 236.0, 227.0, 217.0, 242.0, 262.0, 237.0, 236.0, 233.0, 220.0, 224.0, 235.0, 239.0, 271.0, 251.0, 265.0, 234.0, 231.0, 254.0, 259.0, 208.0, 227.0, 230.0, 240.0, 270.0, 249.0, 211.0, 242.0, 199.0, 199.0, 245.0, 211.0, 226.0, 233.0, 207.0, 215.0, 228.0, 225.0, 213.0, 183.0, 244.0, 218.0, 240.0, 216.0, 232.0, 224.0, 226.0, 227.0, 230.0, 220.0, 259.0, 257.0, 256.0, 260.0, 146.0, 156.0, 218.0, 238.0, 229.0, 227.0, 239.0, 217.0, 227.0, 226.0, 200.0, 204.0, 236.0, 229.0, 236.0, 229.0, 267.0, 249.0, 114.0, 96.0, 234.0, 225.0, 241.0, 232.0, 228.0, 231.0, 254.0, 259.0, 236.0, 214.0, 226.0, 221.0, 257.0, 265.0, 249.0, 264.0, 234.0, 228.0, 216.0, 234.0, 221.0, 240.0, 231.0, 225.0, 220.0, 239.0, 195.0, 207.0, 244.0, 218.0, 248.0, 211.0, 259.0, 251.0, 200.0, 216.0, 210.0, 231.0, 267.0, 243.0, 240.0, 210.0, 210.0, 198.0, 234.0, 225.0, 223.0, 230.0, 229.0, 233.0, 232.0, 238.0, 108.0, 131.0, 220.0, 230.0, 225.0, 240.0, 234.0, 222.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.686491869898288, "mean_inference_ms": 1.209051302333497, "mean_action_processing_ms": 0.1328341330608934, "mean_env_wait_ms": 0.8535721275289399, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 522.0, "episode_reward_min": 210.0, "episode_reward_mean": 451.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 96.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 225.92}, "hist_stats": {"episode_reward": [444.0, 456.0, 459.0, 459.0, 453.0, 456.0, 416.0, 456.0, 441.0, 459.0, 464.0, 404.0, 459.0, 408.0, 387.0, 456.0, 462.0, 444.0, 402.0, 456.0, 507.0, 456.0, 356.0, 516.0, 470.0, 462.0, 453.0, 413.0, 456.0, 510.0, 510.0, 456.0, 390.0, 462.0, 444.0, 504.0, 473.0, 453.0, 459.0, 510.0, 516.0, 465.0, 513.0, 435.0, 470.0, 519.0, 453.0, 398.0, 456.0, 459.0, 422.0, 453.0, 396.0, 462.0, 456.0, 456.0, 453.0, 450.0, 516.0, 516.0, 302.0, 456.0, 456.0, 456.0, 453.0, 404.0, 465.0, 465.0, 516.0, 210.0, 459.0, 473.0, 459.0, 513.0, 450.0, 447.0, 522.0, 513.0, 462.0, 450.0, 461.0, 456.0, 459.0, 402.0, 462.0, 459.0, 510.0, 416.0, 441.0, 510.0, 450.0, 408.0, 459.0, 453.0, 462.0, 470.0, 239.0, 450.0, 465.0, 456.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [219.0, 225.0, 236.0, 220.0, 254.0, 205.0, 214.0, 245.0, 226.0, 227.0, 229.0, 227.0, 221.0, 195.0, 222.0, 234.0, 229.0, 212.0, 229.0, 230.0, 232.0, 232.0, 190.0, 214.0, 239.0, 220.0, 205.0, 203.0, 193.0, 194.0, 223.0, 233.0, 232.0, 230.0, 201.0, 243.0, 200.0, 202.0, 228.0, 228.0, 254.0, 253.0, 240.0, 216.0, 178.0, 178.0, 258.0, 258.0, 233.0, 237.0, 234.0, 228.0, 233.0, 220.0, 204.0, 209.0, 244.0, 212.0, 253.0, 257.0, 258.0, 252.0, 233.0, 223.0, 184.0, 206.0, 226.0, 236.0, 227.0, 217.0, 242.0, 262.0, 237.0, 236.0, 233.0, 220.0, 224.0, 235.0, 239.0, 271.0, 251.0, 265.0, 234.0, 231.0, 254.0, 259.0, 208.0, 227.0, 230.0, 240.0, 270.0, 249.0, 211.0, 242.0, 199.0, 199.0, 245.0, 211.0, 226.0, 233.0, 207.0, 215.0, 228.0, 225.0, 213.0, 183.0, 244.0, 218.0, 240.0, 216.0, 232.0, 224.0, 226.0, 227.0, 230.0, 220.0, 259.0, 257.0, 256.0, 260.0, 146.0, 156.0, 218.0, 238.0, 229.0, 227.0, 239.0, 217.0, 227.0, 226.0, 200.0, 204.0, 236.0, 229.0, 236.0, 229.0, 267.0, 249.0, 114.0, 96.0, 234.0, 225.0, 241.0, 232.0, 228.0, 231.0, 254.0, 259.0, 236.0, 214.0, 226.0, 221.0, 257.0, 265.0, 249.0, 264.0, 234.0, 228.0, 216.0, 234.0, 221.0, 240.0, 231.0, 225.0, 220.0, 239.0, 195.0, 207.0, 244.0, 218.0, 248.0, 211.0, 259.0, 251.0, 200.0, 216.0, 210.0, 231.0, 267.0, 243.0, 240.0, 210.0, 210.0, 198.0, 234.0, 225.0, 223.0, 230.0, 229.0, 233.0, 232.0, 238.0, 108.0, 131.0, 220.0, 230.0, 225.0, 240.0, 234.0, 222.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.686491869898288, "mean_inference_ms": 1.209051302333497, "mean_action_processing_ms": 0.1328341330608934, "mean_env_wait_ms": 0.8535721275289399, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2432000, "num_agent_steps_trained": 2432000, "num_env_steps_sampled": 1216000, "num_env_steps_trained": 1216000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1216000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2432000, "timers": {"training_iteration_time_ms": 3673.561, "learn_time_ms": 1132.205, "learn_throughput": 11305.37, "synch_weights_time_ms": 12.381}, "counters": {"num_env_steps_sampled": 1216000, "num_env_steps_trained": 1216000, "num_agent_steps_sampled": 2432000, "num_agent_steps_trained": 2432000}, "done": false, "episodes_total": 3040, "training_iteration": 95, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-05", "timestamp": 1666580765, "time_this_iter_s": 3.685410499572754, "time_total_s": 355.2219717502594, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 355.2219717502594, "timesteps_since_restore": 0, "iterations_since_restore": 95, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.816666666666666, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 156.2, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 135.15, "shaped_reward_min": 43, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.17, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 13.86, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 13.2, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 13.17, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.79, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.65, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 12.56, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.52, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.06, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 3.27, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.35, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.74, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.27, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.23, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.03, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.02, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.92, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.98, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 12.56, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.52, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.56, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.52, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 5.048709868646114e-30, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009595048613846302, "policy_loss": -0.0011315889423713088, "vf_loss": 7.664634704589844, "vf_explained_var": 0.693601667881012, "kl": 0.0018983024638146162, "entropy": 1.1887578964233398, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1228800, "num_env_steps_trained": 1228800, "num_agent_steps_sampled": 2457600, "num_agent_steps_trained": 2457600}, "sampler_results": {"episode_reward_max": 522.0, "episode_reward_min": 123.0, "episode_reward_mean": 447.55, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 57.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 223.775}, "custom_metrics": {"sparse_reward_mean": 156.2, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 135.15, "shaped_reward_min": 43, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.17, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 13.86, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 13.2, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 13.17, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.79, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.65, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 12.56, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.52, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.06, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 3.27, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.35, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.74, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.27, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.23, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.03, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.02, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.92, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.98, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 12.56, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.52, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.56, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.52, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [390.0, 462.0, 444.0, 504.0, 473.0, 453.0, 459.0, 510.0, 516.0, 465.0, 513.0, 435.0, 470.0, 519.0, 453.0, 398.0, 456.0, 459.0, 422.0, 453.0, 396.0, 462.0, 456.0, 456.0, 453.0, 450.0, 516.0, 516.0, 302.0, 456.0, 456.0, 456.0, 453.0, 404.0, 465.0, 465.0, 516.0, 210.0, 459.0, 473.0, 459.0, 513.0, 450.0, 447.0, 522.0, 513.0, 462.0, 450.0, 461.0, 456.0, 459.0, 402.0, 462.0, 459.0, 510.0, 416.0, 441.0, 510.0, 450.0, 408.0, 459.0, 453.0, 462.0, 470.0, 239.0, 450.0, 465.0, 456.0, 507.0, 453.0, 462.0, 459.0, 459.0, 464.0, 467.0, 461.0, 413.0, 456.0, 464.0, 237.0, 450.0, 507.0, 342.0, 456.0, 468.0, 444.0, 399.0, 402.0, 447.0, 510.0, 513.0, 516.0, 123.0, 462.0, 393.0, 459.0, 459.0, 399.0, 413.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [184.0, 206.0, 226.0, 236.0, 227.0, 217.0, 242.0, 262.0, 237.0, 236.0, 233.0, 220.0, 224.0, 235.0, 239.0, 271.0, 251.0, 265.0, 234.0, 231.0, 254.0, 259.0, 208.0, 227.0, 230.0, 240.0, 270.0, 249.0, 211.0, 242.0, 199.0, 199.0, 245.0, 211.0, 226.0, 233.0, 207.0, 215.0, 228.0, 225.0, 213.0, 183.0, 244.0, 218.0, 240.0, 216.0, 232.0, 224.0, 226.0, 227.0, 230.0, 220.0, 259.0, 257.0, 256.0, 260.0, 146.0, 156.0, 218.0, 238.0, 229.0, 227.0, 239.0, 217.0, 227.0, 226.0, 200.0, 204.0, 236.0, 229.0, 236.0, 229.0, 267.0, 249.0, 114.0, 96.0, 234.0, 225.0, 241.0, 232.0, 228.0, 231.0, 254.0, 259.0, 236.0, 214.0, 226.0, 221.0, 257.0, 265.0, 249.0, 264.0, 234.0, 228.0, 216.0, 234.0, 221.0, 240.0, 231.0, 225.0, 220.0, 239.0, 195.0, 207.0, 244.0, 218.0, 248.0, 211.0, 259.0, 251.0, 200.0, 216.0, 210.0, 231.0, 267.0, 243.0, 240.0, 210.0, 210.0, 198.0, 234.0, 225.0, 223.0, 230.0, 229.0, 233.0, 232.0, 238.0, 108.0, 131.0, 220.0, 230.0, 225.0, 240.0, 234.0, 222.0, 260.0, 247.0, 233.0, 220.0, 238.0, 224.0, 233.0, 226.0, 227.0, 232.0, 244.0, 220.0, 230.0, 237.0, 228.0, 233.0, 205.0, 208.0, 216.0, 240.0, 237.0, 227.0, 117.0, 120.0, 219.0, 231.0, 262.0, 245.0, 171.0, 171.0, 224.0, 232.0, 219.0, 249.0, 208.0, 236.0, 197.0, 202.0, 205.0, 197.0, 211.0, 236.0, 244.0, 266.0, 257.0, 256.0, 247.0, 269.0, 66.0, 57.0, 230.0, 232.0, 186.0, 207.0, 232.0, 227.0, 232.0, 227.0, 209.0, 190.0, 204.0, 209.0, 266.0, 247.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6863606600516278, "mean_inference_ms": 1.2088294285353811, "mean_action_processing_ms": 0.13281410100148, "mean_env_wait_ms": 0.8530127778652258, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 522.0, "episode_reward_min": 123.0, "episode_reward_mean": 447.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 57.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 223.775}, "hist_stats": {"episode_reward": [390.0, 462.0, 444.0, 504.0, 473.0, 453.0, 459.0, 510.0, 516.0, 465.0, 513.0, 435.0, 470.0, 519.0, 453.0, 398.0, 456.0, 459.0, 422.0, 453.0, 396.0, 462.0, 456.0, 456.0, 453.0, 450.0, 516.0, 516.0, 302.0, 456.0, 456.0, 456.0, 453.0, 404.0, 465.0, 465.0, 516.0, 210.0, 459.0, 473.0, 459.0, 513.0, 450.0, 447.0, 522.0, 513.0, 462.0, 450.0, 461.0, 456.0, 459.0, 402.0, 462.0, 459.0, 510.0, 416.0, 441.0, 510.0, 450.0, 408.0, 459.0, 453.0, 462.0, 470.0, 239.0, 450.0, 465.0, 456.0, 507.0, 453.0, 462.0, 459.0, 459.0, 464.0, 467.0, 461.0, 413.0, 456.0, 464.0, 237.0, 450.0, 507.0, 342.0, 456.0, 468.0, 444.0, 399.0, 402.0, 447.0, 510.0, 513.0, 516.0, 123.0, 462.0, 393.0, 459.0, 459.0, 399.0, 413.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [184.0, 206.0, 226.0, 236.0, 227.0, 217.0, 242.0, 262.0, 237.0, 236.0, 233.0, 220.0, 224.0, 235.0, 239.0, 271.0, 251.0, 265.0, 234.0, 231.0, 254.0, 259.0, 208.0, 227.0, 230.0, 240.0, 270.0, 249.0, 211.0, 242.0, 199.0, 199.0, 245.0, 211.0, 226.0, 233.0, 207.0, 215.0, 228.0, 225.0, 213.0, 183.0, 244.0, 218.0, 240.0, 216.0, 232.0, 224.0, 226.0, 227.0, 230.0, 220.0, 259.0, 257.0, 256.0, 260.0, 146.0, 156.0, 218.0, 238.0, 229.0, 227.0, 239.0, 217.0, 227.0, 226.0, 200.0, 204.0, 236.0, 229.0, 236.0, 229.0, 267.0, 249.0, 114.0, 96.0, 234.0, 225.0, 241.0, 232.0, 228.0, 231.0, 254.0, 259.0, 236.0, 214.0, 226.0, 221.0, 257.0, 265.0, 249.0, 264.0, 234.0, 228.0, 216.0, 234.0, 221.0, 240.0, 231.0, 225.0, 220.0, 239.0, 195.0, 207.0, 244.0, 218.0, 248.0, 211.0, 259.0, 251.0, 200.0, 216.0, 210.0, 231.0, 267.0, 243.0, 240.0, 210.0, 210.0, 198.0, 234.0, 225.0, 223.0, 230.0, 229.0, 233.0, 232.0, 238.0, 108.0, 131.0, 220.0, 230.0, 225.0, 240.0, 234.0, 222.0, 260.0, 247.0, 233.0, 220.0, 238.0, 224.0, 233.0, 226.0, 227.0, 232.0, 244.0, 220.0, 230.0, 237.0, 228.0, 233.0, 205.0, 208.0, 216.0, 240.0, 237.0, 227.0, 117.0, 120.0, 219.0, 231.0, 262.0, 245.0, 171.0, 171.0, 224.0, 232.0, 219.0, 249.0, 208.0, 236.0, 197.0, 202.0, 205.0, 197.0, 211.0, 236.0, 244.0, 266.0, 257.0, 256.0, 247.0, 269.0, 66.0, 57.0, 230.0, 232.0, 186.0, 207.0, 232.0, 227.0, 232.0, 227.0, 209.0, 190.0, 204.0, 209.0, 266.0, 247.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6863606600516278, "mean_inference_ms": 1.2088294285353811, "mean_action_processing_ms": 0.13281410100148, "mean_env_wait_ms": 0.8530127778652258, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2457600, "num_agent_steps_trained": 2457600, "num_env_steps_sampled": 1228800, "num_env_steps_trained": 1228800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1228800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2457600, "timers": {"training_iteration_time_ms": 3664.002, "learn_time_ms": 1123.086, "learn_throughput": 11397.169, "synch_weights_time_ms": 13.512}, "counters": {"num_env_steps_sampled": 1228800, "num_env_steps_trained": 1228800, "num_agent_steps_sampled": 2457600, "num_agent_steps_trained": 2457600}, "done": false, "episodes_total": 3072, "training_iteration": 96, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-08", "timestamp": 1666580768, "time_this_iter_s": 3.63649320602417, "time_total_s": 358.85846495628357, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 358.85846495628357, "timesteps_since_restore": 0, "iterations_since_restore": 96, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.500000000000004, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 156.4, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 136.25, "shaped_reward_min": 43, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.71, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.33, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 12.83, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.65, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.66, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.3, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.98, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.18, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 3.39, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.33, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.85, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.75, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.26, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.24, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.19, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.93, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.06, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.85, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.3, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.98, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.3, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.98, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.524354934323057e-30, "cur_lr": 0.0010000000474974513, "total_loss": -0.0026760417968034744, "policy_loss": -0.002843119204044342, "vf_loss": 7.638774871826172, "vf_explained_var": 0.6889978647232056, "kl": 0.0018203468061983585, "entropy": 1.1935973167419434, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1241600, "num_env_steps_trained": 1241600, "num_agent_steps_sampled": 2483200, "num_agent_steps_trained": 2483200}, "sampler_results": {"episode_reward_max": 522.0, "episode_reward_min": 123.0, "episode_reward_mean": 449.05, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 57.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 224.525}, "custom_metrics": {"sparse_reward_mean": 156.4, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 136.25, "shaped_reward_min": 43, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.71, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.33, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 12.83, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.65, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.66, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.3, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.98, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.18, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 3.39, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.33, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.85, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.75, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.26, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.24, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.19, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.93, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.06, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.85, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.3, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.98, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.3, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.98, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [453.0, 404.0, 465.0, 465.0, 516.0, 210.0, 459.0, 473.0, 459.0, 513.0, 450.0, 447.0, 522.0, 513.0, 462.0, 450.0, 461.0, 456.0, 459.0, 402.0, 462.0, 459.0, 510.0, 416.0, 441.0, 510.0, 450.0, 408.0, 459.0, 453.0, 462.0, 470.0, 239.0, 450.0, 465.0, 456.0, 507.0, 453.0, 462.0, 459.0, 459.0, 464.0, 467.0, 461.0, 413.0, 456.0, 464.0, 237.0, 450.0, 507.0, 342.0, 456.0, 468.0, 444.0, 399.0, 402.0, 447.0, 510.0, 513.0, 516.0, 123.0, 462.0, 393.0, 459.0, 459.0, 399.0, 413.0, 513.0, 459.0, 516.0, 468.0, 510.0, 462.0, 459.0, 399.0, 465.0, 410.0, 401.0, 510.0, 470.0, 473.0, 470.0, 348.0, 513.0, 459.0, 510.0, 473.0, 453.0, 513.0, 459.0, 516.0, 513.0, 419.0, 453.0, 402.0, 450.0, 504.0, 413.0, 459.0, 450.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [227.0, 226.0, 200.0, 204.0, 236.0, 229.0, 236.0, 229.0, 267.0, 249.0, 114.0, 96.0, 234.0, 225.0, 241.0, 232.0, 228.0, 231.0, 254.0, 259.0, 236.0, 214.0, 226.0, 221.0, 257.0, 265.0, 249.0, 264.0, 234.0, 228.0, 216.0, 234.0, 221.0, 240.0, 231.0, 225.0, 220.0, 239.0, 195.0, 207.0, 244.0, 218.0, 248.0, 211.0, 259.0, 251.0, 200.0, 216.0, 210.0, 231.0, 267.0, 243.0, 240.0, 210.0, 210.0, 198.0, 234.0, 225.0, 223.0, 230.0, 229.0, 233.0, 232.0, 238.0, 108.0, 131.0, 220.0, 230.0, 225.0, 240.0, 234.0, 222.0, 260.0, 247.0, 233.0, 220.0, 238.0, 224.0, 233.0, 226.0, 227.0, 232.0, 244.0, 220.0, 230.0, 237.0, 228.0, 233.0, 205.0, 208.0, 216.0, 240.0, 237.0, 227.0, 117.0, 120.0, 219.0, 231.0, 262.0, 245.0, 171.0, 171.0, 224.0, 232.0, 219.0, 249.0, 208.0, 236.0, 197.0, 202.0, 205.0, 197.0, 211.0, 236.0, 244.0, 266.0, 257.0, 256.0, 247.0, 269.0, 66.0, 57.0, 230.0, 232.0, 186.0, 207.0, 232.0, 227.0, 232.0, 227.0, 209.0, 190.0, 204.0, 209.0, 266.0, 247.0, 221.0, 238.0, 255.0, 261.0, 230.0, 238.0, 249.0, 261.0, 241.0, 221.0, 225.0, 234.0, 187.0, 212.0, 237.0, 228.0, 197.0, 213.0, 207.0, 194.0, 251.0, 259.0, 245.0, 225.0, 231.0, 242.0, 231.0, 239.0, 169.0, 179.0, 270.0, 243.0, 235.0, 224.0, 259.0, 251.0, 245.0, 228.0, 227.0, 226.0, 246.0, 267.0, 219.0, 240.0, 252.0, 264.0, 265.0, 248.0, 205.0, 214.0, 215.0, 238.0, 202.0, 200.0, 231.0, 219.0, 255.0, 249.0, 206.0, 207.0, 244.0, 215.0, 208.0, 242.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6862933417965331, "mean_inference_ms": 1.2086692382951651, "mean_action_processing_ms": 0.13279365519382014, "mean_env_wait_ms": 0.8524899468324156, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 522.0, "episode_reward_min": 123.0, "episode_reward_mean": 449.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 57.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 224.525}, "hist_stats": {"episode_reward": [453.0, 404.0, 465.0, 465.0, 516.0, 210.0, 459.0, 473.0, 459.0, 513.0, 450.0, 447.0, 522.0, 513.0, 462.0, 450.0, 461.0, 456.0, 459.0, 402.0, 462.0, 459.0, 510.0, 416.0, 441.0, 510.0, 450.0, 408.0, 459.0, 453.0, 462.0, 470.0, 239.0, 450.0, 465.0, 456.0, 507.0, 453.0, 462.0, 459.0, 459.0, 464.0, 467.0, 461.0, 413.0, 456.0, 464.0, 237.0, 450.0, 507.0, 342.0, 456.0, 468.0, 444.0, 399.0, 402.0, 447.0, 510.0, 513.0, 516.0, 123.0, 462.0, 393.0, 459.0, 459.0, 399.0, 413.0, 513.0, 459.0, 516.0, 468.0, 510.0, 462.0, 459.0, 399.0, 465.0, 410.0, 401.0, 510.0, 470.0, 473.0, 470.0, 348.0, 513.0, 459.0, 510.0, 473.0, 453.0, 513.0, 459.0, 516.0, 513.0, 419.0, 453.0, 402.0, 450.0, 504.0, 413.0, 459.0, 450.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [227.0, 226.0, 200.0, 204.0, 236.0, 229.0, 236.0, 229.0, 267.0, 249.0, 114.0, 96.0, 234.0, 225.0, 241.0, 232.0, 228.0, 231.0, 254.0, 259.0, 236.0, 214.0, 226.0, 221.0, 257.0, 265.0, 249.0, 264.0, 234.0, 228.0, 216.0, 234.0, 221.0, 240.0, 231.0, 225.0, 220.0, 239.0, 195.0, 207.0, 244.0, 218.0, 248.0, 211.0, 259.0, 251.0, 200.0, 216.0, 210.0, 231.0, 267.0, 243.0, 240.0, 210.0, 210.0, 198.0, 234.0, 225.0, 223.0, 230.0, 229.0, 233.0, 232.0, 238.0, 108.0, 131.0, 220.0, 230.0, 225.0, 240.0, 234.0, 222.0, 260.0, 247.0, 233.0, 220.0, 238.0, 224.0, 233.0, 226.0, 227.0, 232.0, 244.0, 220.0, 230.0, 237.0, 228.0, 233.0, 205.0, 208.0, 216.0, 240.0, 237.0, 227.0, 117.0, 120.0, 219.0, 231.0, 262.0, 245.0, 171.0, 171.0, 224.0, 232.0, 219.0, 249.0, 208.0, 236.0, 197.0, 202.0, 205.0, 197.0, 211.0, 236.0, 244.0, 266.0, 257.0, 256.0, 247.0, 269.0, 66.0, 57.0, 230.0, 232.0, 186.0, 207.0, 232.0, 227.0, 232.0, 227.0, 209.0, 190.0, 204.0, 209.0, 266.0, 247.0, 221.0, 238.0, 255.0, 261.0, 230.0, 238.0, 249.0, 261.0, 241.0, 221.0, 225.0, 234.0, 187.0, 212.0, 237.0, 228.0, 197.0, 213.0, 207.0, 194.0, 251.0, 259.0, 245.0, 225.0, 231.0, 242.0, 231.0, 239.0, 169.0, 179.0, 270.0, 243.0, 235.0, 224.0, 259.0, 251.0, 245.0, 228.0, 227.0, 226.0, 246.0, 267.0, 219.0, 240.0, 252.0, 264.0, 265.0, 248.0, 205.0, 214.0, 215.0, 238.0, 202.0, 200.0, 231.0, 219.0, 255.0, 249.0, 206.0, 207.0, 244.0, 215.0, 208.0, 242.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6862933417965331, "mean_inference_ms": 1.2086692382951651, "mean_action_processing_ms": 0.13279365519382014, "mean_env_wait_ms": 0.8524899468324156, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2483200, "num_agent_steps_trained": 2483200, "num_env_steps_sampled": 1241600, "num_env_steps_trained": 1241600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1241600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2483200, "timers": {"training_iteration_time_ms": 3645.286, "learn_time_ms": 1124.006, "learn_throughput": 11387.84, "synch_weights_time_ms": 13.444}, "counters": {"num_env_steps_sampled": 1241600, "num_env_steps_trained": 1241600, "num_agent_steps_sampled": 2483200, "num_agent_steps_trained": 2483200}, "done": false, "episodes_total": 3104, "training_iteration": 97, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-12", "timestamp": 1666580772, "time_this_iter_s": 3.674302577972412, "time_total_s": 362.532767534256, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 362.532767534256, "timesteps_since_restore": 0, "iterations_since_restore": 97, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.6, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 157.0, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 137.17, "shaped_reward_min": 43, "shaped_reward_max": 161, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.46, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.56, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 12.67, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.79, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.89, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.51, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.71, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.21, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.2, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.13, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.7, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.25, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.19, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.13, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.05, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.03, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.94, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.21, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.2, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.21, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.2, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.2621774671615285e-30, "cur_lr": 0.0010000000474974513, "total_loss": -0.002659280551597476, "policy_loss": -0.0028225441928952932, "vf_loss": 7.56541633605957, "vf_explained_var": 0.677125871181488, "kl": 0.0019446380902081728, "entropy": 1.186551809310913, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1254400, "num_env_steps_trained": 1254400, "num_agent_steps_sampled": 2508800, "num_agent_steps_trained": 2508800}, "sampler_results": {"episode_reward_max": 516.0, "episode_reward_min": 123.0, "episode_reward_mean": 451.17, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 57.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 225.585}, "custom_metrics": {"sparse_reward_mean": 157.0, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 137.17, "shaped_reward_min": 43, "shaped_reward_max": 161, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.46, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.56, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 12.67, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.79, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.89, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.51, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.71, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.21, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.2, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.13, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.7, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.25, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.19, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.13, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.05, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.03, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.94, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.21, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.2, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.21, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.2, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [239.0, 450.0, 465.0, 456.0, 507.0, 453.0, 462.0, 459.0, 459.0, 464.0, 467.0, 461.0, 413.0, 456.0, 464.0, 237.0, 450.0, 507.0, 342.0, 456.0, 468.0, 444.0, 399.0, 402.0, 447.0, 510.0, 513.0, 516.0, 123.0, 462.0, 393.0, 459.0, 459.0, 399.0, 413.0, 513.0, 459.0, 516.0, 468.0, 510.0, 462.0, 459.0, 399.0, 465.0, 410.0, 401.0, 510.0, 470.0, 473.0, 470.0, 348.0, 513.0, 459.0, 510.0, 473.0, 453.0, 513.0, 459.0, 516.0, 513.0, 419.0, 453.0, 402.0, 450.0, 504.0, 413.0, 459.0, 450.0, 462.0, 513.0, 459.0, 462.0, 462.0, 416.0, 453.0, 465.0, 456.0, 419.0, 513.0, 462.0, 465.0, 513.0, 438.0, 453.0, 459.0, 507.0, 476.0, 510.0, 459.0, 407.0, 481.0, 456.0, 470.0, 405.0, 465.0, 416.0, 444.0, 462.0, 450.0, 473.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [108.0, 131.0, 220.0, 230.0, 225.0, 240.0, 234.0, 222.0, 260.0, 247.0, 233.0, 220.0, 238.0, 224.0, 233.0, 226.0, 227.0, 232.0, 244.0, 220.0, 230.0, 237.0, 228.0, 233.0, 205.0, 208.0, 216.0, 240.0, 237.0, 227.0, 117.0, 120.0, 219.0, 231.0, 262.0, 245.0, 171.0, 171.0, 224.0, 232.0, 219.0, 249.0, 208.0, 236.0, 197.0, 202.0, 205.0, 197.0, 211.0, 236.0, 244.0, 266.0, 257.0, 256.0, 247.0, 269.0, 66.0, 57.0, 230.0, 232.0, 186.0, 207.0, 232.0, 227.0, 232.0, 227.0, 209.0, 190.0, 204.0, 209.0, 266.0, 247.0, 221.0, 238.0, 255.0, 261.0, 230.0, 238.0, 249.0, 261.0, 241.0, 221.0, 225.0, 234.0, 187.0, 212.0, 237.0, 228.0, 197.0, 213.0, 207.0, 194.0, 251.0, 259.0, 245.0, 225.0, 231.0, 242.0, 231.0, 239.0, 169.0, 179.0, 270.0, 243.0, 235.0, 224.0, 259.0, 251.0, 245.0, 228.0, 227.0, 226.0, 246.0, 267.0, 219.0, 240.0, 252.0, 264.0, 265.0, 248.0, 205.0, 214.0, 215.0, 238.0, 202.0, 200.0, 231.0, 219.0, 255.0, 249.0, 206.0, 207.0, 244.0, 215.0, 208.0, 242.0, 230.0, 232.0, 254.0, 259.0, 238.0, 221.0, 229.0, 233.0, 218.0, 244.0, 202.0, 214.0, 217.0, 236.0, 226.0, 239.0, 222.0, 234.0, 208.0, 211.0, 258.0, 255.0, 226.0, 236.0, 223.0, 242.0, 251.0, 262.0, 216.0, 222.0, 224.0, 229.0, 231.0, 228.0, 239.0, 268.0, 233.0, 243.0, 253.0, 257.0, 218.0, 241.0, 202.0, 205.0, 236.0, 245.0, 214.0, 242.0, 235.0, 235.0, 218.0, 187.0, 244.0, 221.0, 221.0, 195.0, 233.0, 211.0, 221.0, 241.0, 234.0, 216.0, 241.0, 232.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6862475893075703, "mean_inference_ms": 1.2084870807976076, "mean_action_processing_ms": 0.1327752879991328, "mean_env_wait_ms": 0.8519673280789902, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 516.0, "episode_reward_min": 123.0, "episode_reward_mean": 451.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 57.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 225.585}, "hist_stats": {"episode_reward": [239.0, 450.0, 465.0, 456.0, 507.0, 453.0, 462.0, 459.0, 459.0, 464.0, 467.0, 461.0, 413.0, 456.0, 464.0, 237.0, 450.0, 507.0, 342.0, 456.0, 468.0, 444.0, 399.0, 402.0, 447.0, 510.0, 513.0, 516.0, 123.0, 462.0, 393.0, 459.0, 459.0, 399.0, 413.0, 513.0, 459.0, 516.0, 468.0, 510.0, 462.0, 459.0, 399.0, 465.0, 410.0, 401.0, 510.0, 470.0, 473.0, 470.0, 348.0, 513.0, 459.0, 510.0, 473.0, 453.0, 513.0, 459.0, 516.0, 513.0, 419.0, 453.0, 402.0, 450.0, 504.0, 413.0, 459.0, 450.0, 462.0, 513.0, 459.0, 462.0, 462.0, 416.0, 453.0, 465.0, 456.0, 419.0, 513.0, 462.0, 465.0, 513.0, 438.0, 453.0, 459.0, 507.0, 476.0, 510.0, 459.0, 407.0, 481.0, 456.0, 470.0, 405.0, 465.0, 416.0, 444.0, 462.0, 450.0, 473.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [108.0, 131.0, 220.0, 230.0, 225.0, 240.0, 234.0, 222.0, 260.0, 247.0, 233.0, 220.0, 238.0, 224.0, 233.0, 226.0, 227.0, 232.0, 244.0, 220.0, 230.0, 237.0, 228.0, 233.0, 205.0, 208.0, 216.0, 240.0, 237.0, 227.0, 117.0, 120.0, 219.0, 231.0, 262.0, 245.0, 171.0, 171.0, 224.0, 232.0, 219.0, 249.0, 208.0, 236.0, 197.0, 202.0, 205.0, 197.0, 211.0, 236.0, 244.0, 266.0, 257.0, 256.0, 247.0, 269.0, 66.0, 57.0, 230.0, 232.0, 186.0, 207.0, 232.0, 227.0, 232.0, 227.0, 209.0, 190.0, 204.0, 209.0, 266.0, 247.0, 221.0, 238.0, 255.0, 261.0, 230.0, 238.0, 249.0, 261.0, 241.0, 221.0, 225.0, 234.0, 187.0, 212.0, 237.0, 228.0, 197.0, 213.0, 207.0, 194.0, 251.0, 259.0, 245.0, 225.0, 231.0, 242.0, 231.0, 239.0, 169.0, 179.0, 270.0, 243.0, 235.0, 224.0, 259.0, 251.0, 245.0, 228.0, 227.0, 226.0, 246.0, 267.0, 219.0, 240.0, 252.0, 264.0, 265.0, 248.0, 205.0, 214.0, 215.0, 238.0, 202.0, 200.0, 231.0, 219.0, 255.0, 249.0, 206.0, 207.0, 244.0, 215.0, 208.0, 242.0, 230.0, 232.0, 254.0, 259.0, 238.0, 221.0, 229.0, 233.0, 218.0, 244.0, 202.0, 214.0, 217.0, 236.0, 226.0, 239.0, 222.0, 234.0, 208.0, 211.0, 258.0, 255.0, 226.0, 236.0, 223.0, 242.0, 251.0, 262.0, 216.0, 222.0, 224.0, 229.0, 231.0, 228.0, 239.0, 268.0, 233.0, 243.0, 253.0, 257.0, 218.0, 241.0, 202.0, 205.0, 236.0, 245.0, 214.0, 242.0, 235.0, 235.0, 218.0, 187.0, 244.0, 221.0, 221.0, 195.0, 233.0, 211.0, 221.0, 241.0, 234.0, 216.0, 241.0, 232.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6862475893075703, "mean_inference_ms": 1.2084870807976076, "mean_action_processing_ms": 0.1327752879991328, "mean_env_wait_ms": 0.8519673280789902, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2508800, "num_agent_steps_trained": 2508800, "num_env_steps_sampled": 1254400, "num_env_steps_trained": 1254400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1254400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2508800, "timers": {"training_iteration_time_ms": 3612.914, "learn_time_ms": 1114.988, "learn_throughput": 11479.942, "synch_weights_time_ms": 12.775}, "counters": {"num_env_steps_sampled": 1254400, "num_env_steps_trained": 1254400, "num_agent_steps_sampled": 2508800, "num_agent_steps_trained": 2508800}, "done": false, "episodes_total": 3136, "training_iteration": 98, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-16", "timestamp": 1666580776, "time_this_iter_s": 3.6175270080566406, "time_total_s": 366.1502945423126, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 366.1502945423126, "timesteps_since_restore": 0, "iterations_since_restore": 98, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.849999999999998, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 161.4, "sparse_reward_min": 120, "sparse_reward_max": 180, "shaped_reward_mean": 141.62, "shaped_reward_min": 108, "shaped_reward_max": 161, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.95, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 14.55, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 13.29, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 13.8, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.4, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.67, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.83, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.24, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 4.98, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.5, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.72, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.74, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.23, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.2, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.17, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.25, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.08, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.13, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.83, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.24, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.83, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.24, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 6.3108873358076425e-31, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005003714468330145, "policy_loss": 0.00033585677738301456, "vf_loss": 7.55753231048584, "vf_explained_var": 0.6947751045227051, "kl": 0.0021784892305731773, "entropy": 1.1824755668640137, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1267200, "num_env_steps_trained": 1267200, "num_agent_steps_sampled": 2534400, "num_agent_steps_trained": 2534400}, "sampler_results": {"episode_reward_max": 519.0, "episode_reward_min": 348.0, "episode_reward_mean": 464.42, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 169.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 232.21}, "custom_metrics": {"sparse_reward_mean": 161.4, "sparse_reward_min": 120, "sparse_reward_max": 180, "shaped_reward_mean": 141.62, "shaped_reward_min": 108, "shaped_reward_max": 161, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.95, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 14.55, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 13.29, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 13.8, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.4, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.67, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.83, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.24, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 4.98, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.5, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.72, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.74, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.23, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.2, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.17, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.25, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.08, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.13, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.83, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.24, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.83, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.24, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [459.0, 399.0, 413.0, 513.0, 459.0, 516.0, 468.0, 510.0, 462.0, 459.0, 399.0, 465.0, 410.0, 401.0, 510.0, 470.0, 473.0, 470.0, 348.0, 513.0, 459.0, 510.0, 473.0, 453.0, 513.0, 459.0, 516.0, 513.0, 419.0, 453.0, 402.0, 450.0, 504.0, 413.0, 459.0, 450.0, 462.0, 513.0, 459.0, 462.0, 462.0, 416.0, 453.0, 465.0, 456.0, 419.0, 513.0, 462.0, 465.0, 513.0, 438.0, 453.0, 459.0, 507.0, 476.0, 510.0, 459.0, 407.0, 481.0, 456.0, 470.0, 405.0, 465.0, 416.0, 444.0, 462.0, 450.0, 473.0, 459.0, 519.0, 467.0, 465.0, 465.0, 473.0, 519.0, 421.0, 519.0, 453.0, 510.0, 462.0, 476.0, 516.0, 470.0, 450.0, 462.0, 456.0, 467.0, 492.0, 455.0, 402.0, 519.0, 459.0, 513.0, 510.0, 444.0, 459.0, 465.0, 513.0, 418.0, 450.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [232.0, 227.0, 209.0, 190.0, 204.0, 209.0, 266.0, 247.0, 221.0, 238.0, 255.0, 261.0, 230.0, 238.0, 249.0, 261.0, 241.0, 221.0, 225.0, 234.0, 187.0, 212.0, 237.0, 228.0, 197.0, 213.0, 207.0, 194.0, 251.0, 259.0, 245.0, 225.0, 231.0, 242.0, 231.0, 239.0, 169.0, 179.0, 270.0, 243.0, 235.0, 224.0, 259.0, 251.0, 245.0, 228.0, 227.0, 226.0, 246.0, 267.0, 219.0, 240.0, 252.0, 264.0, 265.0, 248.0, 205.0, 214.0, 215.0, 238.0, 202.0, 200.0, 231.0, 219.0, 255.0, 249.0, 206.0, 207.0, 244.0, 215.0, 208.0, 242.0, 230.0, 232.0, 254.0, 259.0, 238.0, 221.0, 229.0, 233.0, 218.0, 244.0, 202.0, 214.0, 217.0, 236.0, 226.0, 239.0, 222.0, 234.0, 208.0, 211.0, 258.0, 255.0, 226.0, 236.0, 223.0, 242.0, 251.0, 262.0, 216.0, 222.0, 224.0, 229.0, 231.0, 228.0, 239.0, 268.0, 233.0, 243.0, 253.0, 257.0, 218.0, 241.0, 202.0, 205.0, 236.0, 245.0, 214.0, 242.0, 235.0, 235.0, 218.0, 187.0, 244.0, 221.0, 221.0, 195.0, 233.0, 211.0, 221.0, 241.0, 234.0, 216.0, 241.0, 232.0, 239.0, 220.0, 255.0, 264.0, 231.0, 236.0, 223.0, 242.0, 234.0, 231.0, 242.0, 231.0, 258.0, 261.0, 205.0, 216.0, 258.0, 261.0, 210.0, 243.0, 254.0, 256.0, 235.0, 227.0, 235.0, 241.0, 253.0, 263.0, 233.0, 237.0, 228.0, 222.0, 234.0, 228.0, 230.0, 226.0, 234.0, 233.0, 251.0, 241.0, 215.0, 240.0, 195.0, 207.0, 258.0, 261.0, 225.0, 234.0, 247.0, 266.0, 253.0, 257.0, 234.0, 210.0, 240.0, 219.0, 234.0, 231.0, 253.0, 260.0, 206.0, 212.0, 221.0, 229.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6861435262389413, "mean_inference_ms": 1.2082559596790565, "mean_action_processing_ms": 0.13275586923142962, "mean_env_wait_ms": 0.8514212289161043, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 519.0, "episode_reward_min": 348.0, "episode_reward_mean": 464.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 169.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 232.21}, "hist_stats": {"episode_reward": [459.0, 399.0, 413.0, 513.0, 459.0, 516.0, 468.0, 510.0, 462.0, 459.0, 399.0, 465.0, 410.0, 401.0, 510.0, 470.0, 473.0, 470.0, 348.0, 513.0, 459.0, 510.0, 473.0, 453.0, 513.0, 459.0, 516.0, 513.0, 419.0, 453.0, 402.0, 450.0, 504.0, 413.0, 459.0, 450.0, 462.0, 513.0, 459.0, 462.0, 462.0, 416.0, 453.0, 465.0, 456.0, 419.0, 513.0, 462.0, 465.0, 513.0, 438.0, 453.0, 459.0, 507.0, 476.0, 510.0, 459.0, 407.0, 481.0, 456.0, 470.0, 405.0, 465.0, 416.0, 444.0, 462.0, 450.0, 473.0, 459.0, 519.0, 467.0, 465.0, 465.0, 473.0, 519.0, 421.0, 519.0, 453.0, 510.0, 462.0, 476.0, 516.0, 470.0, 450.0, 462.0, 456.0, 467.0, 492.0, 455.0, 402.0, 519.0, 459.0, 513.0, 510.0, 444.0, 459.0, 465.0, 513.0, 418.0, 450.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [232.0, 227.0, 209.0, 190.0, 204.0, 209.0, 266.0, 247.0, 221.0, 238.0, 255.0, 261.0, 230.0, 238.0, 249.0, 261.0, 241.0, 221.0, 225.0, 234.0, 187.0, 212.0, 237.0, 228.0, 197.0, 213.0, 207.0, 194.0, 251.0, 259.0, 245.0, 225.0, 231.0, 242.0, 231.0, 239.0, 169.0, 179.0, 270.0, 243.0, 235.0, 224.0, 259.0, 251.0, 245.0, 228.0, 227.0, 226.0, 246.0, 267.0, 219.0, 240.0, 252.0, 264.0, 265.0, 248.0, 205.0, 214.0, 215.0, 238.0, 202.0, 200.0, 231.0, 219.0, 255.0, 249.0, 206.0, 207.0, 244.0, 215.0, 208.0, 242.0, 230.0, 232.0, 254.0, 259.0, 238.0, 221.0, 229.0, 233.0, 218.0, 244.0, 202.0, 214.0, 217.0, 236.0, 226.0, 239.0, 222.0, 234.0, 208.0, 211.0, 258.0, 255.0, 226.0, 236.0, 223.0, 242.0, 251.0, 262.0, 216.0, 222.0, 224.0, 229.0, 231.0, 228.0, 239.0, 268.0, 233.0, 243.0, 253.0, 257.0, 218.0, 241.0, 202.0, 205.0, 236.0, 245.0, 214.0, 242.0, 235.0, 235.0, 218.0, 187.0, 244.0, 221.0, 221.0, 195.0, 233.0, 211.0, 221.0, 241.0, 234.0, 216.0, 241.0, 232.0, 239.0, 220.0, 255.0, 264.0, 231.0, 236.0, 223.0, 242.0, 234.0, 231.0, 242.0, 231.0, 258.0, 261.0, 205.0, 216.0, 258.0, 261.0, 210.0, 243.0, 254.0, 256.0, 235.0, 227.0, 235.0, 241.0, 253.0, 263.0, 233.0, 237.0, 228.0, 222.0, 234.0, 228.0, 230.0, 226.0, 234.0, 233.0, 251.0, 241.0, 215.0, 240.0, 195.0, 207.0, 258.0, 261.0, 225.0, 234.0, 247.0, 266.0, 253.0, 257.0, 234.0, 210.0, 240.0, 219.0, 234.0, 231.0, 253.0, 260.0, 206.0, 212.0, 221.0, 229.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6861435262389413, "mean_inference_ms": 1.2082559596790565, "mean_action_processing_ms": 0.13275586923142962, "mean_env_wait_ms": 0.8514212289161043, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2534400, "num_agent_steps_trained": 2534400, "num_env_steps_sampled": 1267200, "num_env_steps_trained": 1267200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1267200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2534400, "timers": {"training_iteration_time_ms": 3602.781, "learn_time_ms": 1111.695, "learn_throughput": 11513.947, "synch_weights_time_ms": 13.184}, "counters": {"num_env_steps_sampled": 1267200, "num_env_steps_trained": 1267200, "num_agent_steps_sampled": 2534400, "num_agent_steps_trained": 2534400}, "done": false, "episodes_total": 3168, "training_iteration": 99, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-20", "timestamp": 1666580780, "time_this_iter_s": 3.540189743041992, "time_total_s": 369.6904842853546, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 369.6904842853546, "timesteps_since_restore": 0, "iterations_since_restore": 99, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.580000000000002, "ram_util_percent": 10.6}}
+{"evaluation": {"average_sparse_reward": 100.0, "num_healthy_workers": 0, "num_recreated_workers": 0}, "custom_metrics": {"sparse_reward_mean": 165.4, "sparse_reward_min": 140, "sparse_reward_max": 180, "shaped_reward_mean": 143.52, "shaped_reward_min": 118, "shaped_reward_max": 161, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.1, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 14.48, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 13.54, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 13.84, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.77, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.75, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.33, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.47, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.97, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.46, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.06, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.51, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.7, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.22, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.25, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.24, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.35, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.18, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.21, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.97, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.46, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.97, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.46, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.1554436679038213e-31, "cur_lr": 0.0010000000474974513, "total_loss": -0.0026895212940871716, "policy_loss": -0.0028634630143642426, "vf_loss": 7.576631546020508, "vf_explained_var": 0.6813052296638489, "kl": 0.0019578980281949043, "entropy": 1.167441725730896, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1280000, "num_env_steps_trained": 1280000, "num_agent_steps_sampled": 2560000, "num_agent_steps_trained": 2560000}, "sampler_results": {"episode_reward_max": 519.0, "episode_reward_min": 399.0, "episode_reward_mean": 474.32, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 187.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 237.16}, "custom_metrics": {"sparse_reward_mean": 165.4, "sparse_reward_min": 140, "sparse_reward_max": 180, "shaped_reward_mean": 143.52, "shaped_reward_min": 118, "shaped_reward_max": 161, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.1, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 14.48, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 13.54, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 13.84, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.77, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.75, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.33, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.47, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.97, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.46, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.06, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.51, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.7, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.22, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.25, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.24, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.35, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.18, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.21, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.97, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.46, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.97, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.46, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [504.0, 413.0, 459.0, 450.0, 462.0, 513.0, 459.0, 462.0, 462.0, 416.0, 453.0, 465.0, 456.0, 419.0, 513.0, 462.0, 465.0, 513.0, 438.0, 453.0, 459.0, 507.0, 476.0, 510.0, 459.0, 407.0, 481.0, 456.0, 470.0, 405.0, 465.0, 416.0, 444.0, 462.0, 450.0, 473.0, 459.0, 519.0, 467.0, 465.0, 465.0, 473.0, 519.0, 421.0, 519.0, 453.0, 510.0, 462.0, 476.0, 516.0, 470.0, 450.0, 462.0, 456.0, 467.0, 492.0, 455.0, 402.0, 519.0, 459.0, 513.0, 510.0, 444.0, 459.0, 465.0, 513.0, 418.0, 450.0, 501.0, 444.0, 516.0, 473.0, 467.0, 513.0, 504.0, 446.0, 447.0, 501.0, 516.0, 510.0, 510.0, 504.0, 447.0, 519.0, 513.0, 513.0, 459.0, 447.0, 516.0, 519.0, 510.0, 519.0, 513.0, 470.0, 516.0, 519.0, 399.0, 516.0, 467.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [255.0, 249.0, 206.0, 207.0, 244.0, 215.0, 208.0, 242.0, 230.0, 232.0, 254.0, 259.0, 238.0, 221.0, 229.0, 233.0, 218.0, 244.0, 202.0, 214.0, 217.0, 236.0, 226.0, 239.0, 222.0, 234.0, 208.0, 211.0, 258.0, 255.0, 226.0, 236.0, 223.0, 242.0, 251.0, 262.0, 216.0, 222.0, 224.0, 229.0, 231.0, 228.0, 239.0, 268.0, 233.0, 243.0, 253.0, 257.0, 218.0, 241.0, 202.0, 205.0, 236.0, 245.0, 214.0, 242.0, 235.0, 235.0, 218.0, 187.0, 244.0, 221.0, 221.0, 195.0, 233.0, 211.0, 221.0, 241.0, 234.0, 216.0, 241.0, 232.0, 239.0, 220.0, 255.0, 264.0, 231.0, 236.0, 223.0, 242.0, 234.0, 231.0, 242.0, 231.0, 258.0, 261.0, 205.0, 216.0, 258.0, 261.0, 210.0, 243.0, 254.0, 256.0, 235.0, 227.0, 235.0, 241.0, 253.0, 263.0, 233.0, 237.0, 228.0, 222.0, 234.0, 228.0, 230.0, 226.0, 234.0, 233.0, 251.0, 241.0, 215.0, 240.0, 195.0, 207.0, 258.0, 261.0, 225.0, 234.0, 247.0, 266.0, 253.0, 257.0, 234.0, 210.0, 240.0, 219.0, 234.0, 231.0, 253.0, 260.0, 206.0, 212.0, 221.0, 229.0, 242.0, 259.0, 229.0, 215.0, 256.0, 260.0, 225.0, 248.0, 223.0, 244.0, 255.0, 258.0, 255.0, 249.0, 219.0, 227.0, 222.0, 225.0, 259.0, 242.0, 255.0, 261.0, 260.0, 250.0, 253.0, 257.0, 253.0, 251.0, 219.0, 228.0, 271.0, 248.0, 261.0, 252.0, 251.0, 262.0, 220.0, 239.0, 225.0, 222.0, 262.0, 254.0, 266.0, 253.0, 261.0, 249.0, 250.0, 269.0, 254.0, 259.0, 234.0, 236.0, 253.0, 263.0, 262.0, 257.0, 199.0, 200.0, 252.0, 264.0, 248.0, 219.0, 249.0, 264.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6860127053263756, "mean_inference_ms": 1.2080154662639697, "mean_action_processing_ms": 0.13273940443506851, "mean_env_wait_ms": 0.8508791865244192, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 519.0, "episode_reward_min": 399.0, "episode_reward_mean": 474.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 187.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 237.16}, "hist_stats": {"episode_reward": [504.0, 413.0, 459.0, 450.0, 462.0, 513.0, 459.0, 462.0, 462.0, 416.0, 453.0, 465.0, 456.0, 419.0, 513.0, 462.0, 465.0, 513.0, 438.0, 453.0, 459.0, 507.0, 476.0, 510.0, 459.0, 407.0, 481.0, 456.0, 470.0, 405.0, 465.0, 416.0, 444.0, 462.0, 450.0, 473.0, 459.0, 519.0, 467.0, 465.0, 465.0, 473.0, 519.0, 421.0, 519.0, 453.0, 510.0, 462.0, 476.0, 516.0, 470.0, 450.0, 462.0, 456.0, 467.0, 492.0, 455.0, 402.0, 519.0, 459.0, 513.0, 510.0, 444.0, 459.0, 465.0, 513.0, 418.0, 450.0, 501.0, 444.0, 516.0, 473.0, 467.0, 513.0, 504.0, 446.0, 447.0, 501.0, 516.0, 510.0, 510.0, 504.0, 447.0, 519.0, 513.0, 513.0, 459.0, 447.0, 516.0, 519.0, 510.0, 519.0, 513.0, 470.0, 516.0, 519.0, 399.0, 516.0, 467.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [255.0, 249.0, 206.0, 207.0, 244.0, 215.0, 208.0, 242.0, 230.0, 232.0, 254.0, 259.0, 238.0, 221.0, 229.0, 233.0, 218.0, 244.0, 202.0, 214.0, 217.0, 236.0, 226.0, 239.0, 222.0, 234.0, 208.0, 211.0, 258.0, 255.0, 226.0, 236.0, 223.0, 242.0, 251.0, 262.0, 216.0, 222.0, 224.0, 229.0, 231.0, 228.0, 239.0, 268.0, 233.0, 243.0, 253.0, 257.0, 218.0, 241.0, 202.0, 205.0, 236.0, 245.0, 214.0, 242.0, 235.0, 235.0, 218.0, 187.0, 244.0, 221.0, 221.0, 195.0, 233.0, 211.0, 221.0, 241.0, 234.0, 216.0, 241.0, 232.0, 239.0, 220.0, 255.0, 264.0, 231.0, 236.0, 223.0, 242.0, 234.0, 231.0, 242.0, 231.0, 258.0, 261.0, 205.0, 216.0, 258.0, 261.0, 210.0, 243.0, 254.0, 256.0, 235.0, 227.0, 235.0, 241.0, 253.0, 263.0, 233.0, 237.0, 228.0, 222.0, 234.0, 228.0, 230.0, 226.0, 234.0, 233.0, 251.0, 241.0, 215.0, 240.0, 195.0, 207.0, 258.0, 261.0, 225.0, 234.0, 247.0, 266.0, 253.0, 257.0, 234.0, 210.0, 240.0, 219.0, 234.0, 231.0, 253.0, 260.0, 206.0, 212.0, 221.0, 229.0, 242.0, 259.0, 229.0, 215.0, 256.0, 260.0, 225.0, 248.0, 223.0, 244.0, 255.0, 258.0, 255.0, 249.0, 219.0, 227.0, 222.0, 225.0, 259.0, 242.0, 255.0, 261.0, 260.0, 250.0, 253.0, 257.0, 253.0, 251.0, 219.0, 228.0, 271.0, 248.0, 261.0, 252.0, 251.0, 262.0, 220.0, 239.0, 225.0, 222.0, 262.0, 254.0, 266.0, 253.0, 261.0, 249.0, 250.0, 269.0, 254.0, 259.0, 234.0, 236.0, 253.0, 263.0, 262.0, 257.0, 199.0, 200.0, 252.0, 264.0, 248.0, 219.0, 249.0, 264.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6860127053263756, "mean_inference_ms": 1.2080154662639697, "mean_action_processing_ms": 0.13273940443506851, "mean_env_wait_ms": 0.8508791865244192, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2560000, "num_agent_steps_trained": 2560000, "num_env_steps_sampled": 1280000, "num_env_steps_trained": 1280000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1280000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2560000, "timers": {"training_iteration_time_ms": 3603.202, "learn_time_ms": 1117.688, "learn_throughput": 11452.208, "synch_weights_time_ms": 12.968}, "counters": {"num_env_steps_sampled": 1280000, "num_env_steps_trained": 1280000, "num_agent_steps_sampled": 2560000, "num_agent_steps_trained": 2560000}, "done": false, "episodes_total": 3200, "training_iteration": 100, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-27", "timestamp": 1666580787, "time_this_iter_s": 7.235398530960083, "time_total_s": 376.9258828163147, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 376.9258828163147, "timesteps_since_restore": 0, "iterations_since_restore": 100, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 14.363636363636367, "ram_util_percent": 10.599999999999998}}
+{"custom_metrics": {"sparse_reward_mean": 167.0, "sparse_reward_min": 120, "sparse_reward_max": 180, "shaped_reward_mean": 144.04, "shaped_reward_min": 93, "shaped_reward_max": 159, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.7, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 13.97, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.07, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 13.38, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.81, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.75, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.42, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.39, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.58, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 12.91, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.48, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.76, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.74, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.36, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.31, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 4.08, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.01, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.44, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.58, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 12.91, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.58, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 12.91, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.5777218339519106e-31, "cur_lr": 0.0010000000474974513, "total_loss": -0.001677508233115077, "policy_loss": -0.0018479716964066029, "vf_loss": 7.583566188812256, "vf_explained_var": 0.7014689445495605, "kl": 0.001794546958990395, "entropy": 1.1757853031158447, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1292800, "num_env_steps_trained": 1292800, "num_agent_steps_sampled": 2585600, "num_agent_steps_trained": 2585600}, "sampler_results": {"episode_reward_max": 519.0, "episode_reward_min": 333.0, "episode_reward_mean": 478.04, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 158.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 239.02}, "custom_metrics": {"sparse_reward_mean": 167.0, "sparse_reward_min": 120, "sparse_reward_max": 180, "shaped_reward_mean": 144.04, "shaped_reward_min": 93, "shaped_reward_max": 159, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.7, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 13.97, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.07, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 13.38, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.81, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.75, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.42, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.39, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.58, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 12.91, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.48, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.76, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.74, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.36, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.31, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 4.08, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.01, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.44, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.58, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 12.91, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.58, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 12.91, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [444.0, 462.0, 450.0, 473.0, 459.0, 519.0, 467.0, 465.0, 465.0, 473.0, 519.0, 421.0, 519.0, 453.0, 510.0, 462.0, 476.0, 516.0, 470.0, 450.0, 462.0, 456.0, 467.0, 492.0, 455.0, 402.0, 519.0, 459.0, 513.0, 510.0, 444.0, 459.0, 465.0, 513.0, 418.0, 450.0, 501.0, 444.0, 516.0, 473.0, 467.0, 513.0, 504.0, 446.0, 447.0, 501.0, 516.0, 510.0, 510.0, 504.0, 447.0, 519.0, 513.0, 513.0, 459.0, 447.0, 516.0, 519.0, 510.0, 519.0, 513.0, 470.0, 516.0, 519.0, 399.0, 516.0, 467.0, 513.0, 513.0, 462.0, 513.0, 459.0, 516.0, 333.0, 513.0, 410.0, 459.0, 462.0, 456.0, 461.0, 516.0, 513.0, 447.0, 465.0, 476.0, 519.0, 447.0, 401.0, 513.0, 513.0, 470.0, 410.0, 516.0, 516.0, 459.0, 513.0, 447.0, 450.0, 516.0, 456.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [233.0, 211.0, 221.0, 241.0, 234.0, 216.0, 241.0, 232.0, 239.0, 220.0, 255.0, 264.0, 231.0, 236.0, 223.0, 242.0, 234.0, 231.0, 242.0, 231.0, 258.0, 261.0, 205.0, 216.0, 258.0, 261.0, 210.0, 243.0, 254.0, 256.0, 235.0, 227.0, 235.0, 241.0, 253.0, 263.0, 233.0, 237.0, 228.0, 222.0, 234.0, 228.0, 230.0, 226.0, 234.0, 233.0, 251.0, 241.0, 215.0, 240.0, 195.0, 207.0, 258.0, 261.0, 225.0, 234.0, 247.0, 266.0, 253.0, 257.0, 234.0, 210.0, 240.0, 219.0, 234.0, 231.0, 253.0, 260.0, 206.0, 212.0, 221.0, 229.0, 242.0, 259.0, 229.0, 215.0, 256.0, 260.0, 225.0, 248.0, 223.0, 244.0, 255.0, 258.0, 255.0, 249.0, 219.0, 227.0, 222.0, 225.0, 259.0, 242.0, 255.0, 261.0, 260.0, 250.0, 253.0, 257.0, 253.0, 251.0, 219.0, 228.0, 271.0, 248.0, 261.0, 252.0, 251.0, 262.0, 220.0, 239.0, 225.0, 222.0, 262.0, 254.0, 266.0, 253.0, 261.0, 249.0, 250.0, 269.0, 254.0, 259.0, 234.0, 236.0, 253.0, 263.0, 262.0, 257.0, 199.0, 200.0, 252.0, 264.0, 248.0, 219.0, 249.0, 264.0, 263.0, 250.0, 234.0, 228.0, 261.0, 252.0, 218.0, 241.0, 254.0, 262.0, 175.0, 158.0, 253.0, 260.0, 208.0, 202.0, 228.0, 231.0, 240.0, 222.0, 215.0, 241.0, 214.0, 247.0, 260.0, 256.0, 254.0, 259.0, 231.0, 216.0, 231.0, 234.0, 250.0, 226.0, 260.0, 259.0, 230.0, 217.0, 199.0, 202.0, 263.0, 250.0, 250.0, 263.0, 241.0, 229.0, 204.0, 206.0, 254.0, 262.0, 249.0, 267.0, 221.0, 238.0, 261.0, 252.0, 231.0, 216.0, 223.0, 227.0, 250.0, 266.0, 228.0, 228.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6858588435315744, "mean_inference_ms": 1.207797081605332, "mean_action_processing_ms": 0.13272237266615255, "mean_env_wait_ms": 0.8503743147416343, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 519.0, "episode_reward_min": 333.0, "episode_reward_mean": 478.04, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 158.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 239.02}, "hist_stats": {"episode_reward": [444.0, 462.0, 450.0, 473.0, 459.0, 519.0, 467.0, 465.0, 465.0, 473.0, 519.0, 421.0, 519.0, 453.0, 510.0, 462.0, 476.0, 516.0, 470.0, 450.0, 462.0, 456.0, 467.0, 492.0, 455.0, 402.0, 519.0, 459.0, 513.0, 510.0, 444.0, 459.0, 465.0, 513.0, 418.0, 450.0, 501.0, 444.0, 516.0, 473.0, 467.0, 513.0, 504.0, 446.0, 447.0, 501.0, 516.0, 510.0, 510.0, 504.0, 447.0, 519.0, 513.0, 513.0, 459.0, 447.0, 516.0, 519.0, 510.0, 519.0, 513.0, 470.0, 516.0, 519.0, 399.0, 516.0, 467.0, 513.0, 513.0, 462.0, 513.0, 459.0, 516.0, 333.0, 513.0, 410.0, 459.0, 462.0, 456.0, 461.0, 516.0, 513.0, 447.0, 465.0, 476.0, 519.0, 447.0, 401.0, 513.0, 513.0, 470.0, 410.0, 516.0, 516.0, 459.0, 513.0, 447.0, 450.0, 516.0, 456.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [233.0, 211.0, 221.0, 241.0, 234.0, 216.0, 241.0, 232.0, 239.0, 220.0, 255.0, 264.0, 231.0, 236.0, 223.0, 242.0, 234.0, 231.0, 242.0, 231.0, 258.0, 261.0, 205.0, 216.0, 258.0, 261.0, 210.0, 243.0, 254.0, 256.0, 235.0, 227.0, 235.0, 241.0, 253.0, 263.0, 233.0, 237.0, 228.0, 222.0, 234.0, 228.0, 230.0, 226.0, 234.0, 233.0, 251.0, 241.0, 215.0, 240.0, 195.0, 207.0, 258.0, 261.0, 225.0, 234.0, 247.0, 266.0, 253.0, 257.0, 234.0, 210.0, 240.0, 219.0, 234.0, 231.0, 253.0, 260.0, 206.0, 212.0, 221.0, 229.0, 242.0, 259.0, 229.0, 215.0, 256.0, 260.0, 225.0, 248.0, 223.0, 244.0, 255.0, 258.0, 255.0, 249.0, 219.0, 227.0, 222.0, 225.0, 259.0, 242.0, 255.0, 261.0, 260.0, 250.0, 253.0, 257.0, 253.0, 251.0, 219.0, 228.0, 271.0, 248.0, 261.0, 252.0, 251.0, 262.0, 220.0, 239.0, 225.0, 222.0, 262.0, 254.0, 266.0, 253.0, 261.0, 249.0, 250.0, 269.0, 254.0, 259.0, 234.0, 236.0, 253.0, 263.0, 262.0, 257.0, 199.0, 200.0, 252.0, 264.0, 248.0, 219.0, 249.0, 264.0, 263.0, 250.0, 234.0, 228.0, 261.0, 252.0, 218.0, 241.0, 254.0, 262.0, 175.0, 158.0, 253.0, 260.0, 208.0, 202.0, 228.0, 231.0, 240.0, 222.0, 215.0, 241.0, 214.0, 247.0, 260.0, 256.0, 254.0, 259.0, 231.0, 216.0, 231.0, 234.0, 250.0, 226.0, 260.0, 259.0, 230.0, 217.0, 199.0, 202.0, 263.0, 250.0, 250.0, 263.0, 241.0, 229.0, 204.0, 206.0, 254.0, 262.0, 249.0, 267.0, 221.0, 238.0, 261.0, 252.0, 231.0, 216.0, 223.0, 227.0, 250.0, 266.0, 228.0, 228.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6858588435315744, "mean_inference_ms": 1.207797081605332, "mean_action_processing_ms": 0.13272237266615255, "mean_env_wait_ms": 0.8503743147416343, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2585600, "num_agent_steps_trained": 2585600, "num_env_steps_sampled": 1292800, "num_env_steps_trained": 1292800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1292800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2585600, "timers": {"training_iteration_time_ms": 3600.864, "learn_time_ms": 1114.9, "learn_throughput": 11480.855, "synch_weights_time_ms": 13.357}, "counters": {"num_env_steps_sampled": 1292800, "num_env_steps_trained": 1292800, "num_agent_steps_sampled": 2585600, "num_agent_steps_trained": 2585600}, "done": false, "episodes_total": 3232, "training_iteration": 101, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-31", "timestamp": 1666580791, "time_this_iter_s": 3.6746954917907715, "time_total_s": 380.60057830810547, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 380.60057830810547, "timesteps_since_restore": 0, "iterations_since_restore": 101, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.740000000000002, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 167.2, "sparse_reward_min": 120, "sparse_reward_max": 180, "shaped_reward_mean": 143.43, "shaped_reward_min": 93, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.47, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 14.14, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.94, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 13.57, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.76, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.38, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.52, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 13.08, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.17, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.48, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.43, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.9, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.36, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.35, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 4.12, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.44, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.09, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.38, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.52, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 13.08, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.52, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 13.08, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 7.888609169759553e-32, "cur_lr": 0.0010000000474974513, "total_loss": -0.004032195080071688, "policy_loss": -0.004206694662570953, "vf_loss": 7.6045732498168945, "vf_explained_var": 0.6989070177078247, "kl": 0.0014940658584237099, "entropy": 1.1719143390655518, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1305600, "num_env_steps_trained": 1305600, "num_agent_steps_sampled": 2611200, "num_agent_steps_trained": 2611200}, "sampler_results": {"episode_reward_max": 522.0, "episode_reward_min": 333.0, "episode_reward_mean": 477.83, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 158.0}, "policy_reward_max": {"ppo": 272.0}, "policy_reward_mean": {"ppo": 238.915}, "custom_metrics": {"sparse_reward_mean": 167.2, "sparse_reward_min": 120, "sparse_reward_max": 180, "shaped_reward_mean": 143.43, "shaped_reward_min": 93, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.47, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 14.14, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.94, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 13.57, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.76, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.38, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.52, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 13.08, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.17, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.48, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.43, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.9, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.36, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.35, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 4.12, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.44, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.09, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.38, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.52, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 13.08, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.52, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 13.08, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [465.0, 513.0, 418.0, 450.0, 501.0, 444.0, 516.0, 473.0, 467.0, 513.0, 504.0, 446.0, 447.0, 501.0, 516.0, 510.0, 510.0, 504.0, 447.0, 519.0, 513.0, 513.0, 459.0, 447.0, 516.0, 519.0, 510.0, 519.0, 513.0, 470.0, 516.0, 519.0, 399.0, 516.0, 467.0, 513.0, 513.0, 462.0, 513.0, 459.0, 516.0, 333.0, 513.0, 410.0, 459.0, 462.0, 456.0, 461.0, 516.0, 513.0, 447.0, 465.0, 476.0, 519.0, 447.0, 401.0, 513.0, 513.0, 470.0, 410.0, 516.0, 516.0, 459.0, 513.0, 447.0, 450.0, 516.0, 456.0, 507.0, 513.0, 419.0, 510.0, 519.0, 444.0, 513.0, 453.0, 441.0, 510.0, 435.0, 510.0, 450.0, 453.0, 473.0, 522.0, 462.0, 519.0, 419.0, 516.0, 462.0, 396.0, 459.0, 468.0, 387.0, 501.0, 455.0, 465.0, 516.0, 462.0, 519.0, 412.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [234.0, 231.0, 253.0, 260.0, 206.0, 212.0, 221.0, 229.0, 242.0, 259.0, 229.0, 215.0, 256.0, 260.0, 225.0, 248.0, 223.0, 244.0, 255.0, 258.0, 255.0, 249.0, 219.0, 227.0, 222.0, 225.0, 259.0, 242.0, 255.0, 261.0, 260.0, 250.0, 253.0, 257.0, 253.0, 251.0, 219.0, 228.0, 271.0, 248.0, 261.0, 252.0, 251.0, 262.0, 220.0, 239.0, 225.0, 222.0, 262.0, 254.0, 266.0, 253.0, 261.0, 249.0, 250.0, 269.0, 254.0, 259.0, 234.0, 236.0, 253.0, 263.0, 262.0, 257.0, 199.0, 200.0, 252.0, 264.0, 248.0, 219.0, 249.0, 264.0, 263.0, 250.0, 234.0, 228.0, 261.0, 252.0, 218.0, 241.0, 254.0, 262.0, 175.0, 158.0, 253.0, 260.0, 208.0, 202.0, 228.0, 231.0, 240.0, 222.0, 215.0, 241.0, 214.0, 247.0, 260.0, 256.0, 254.0, 259.0, 231.0, 216.0, 231.0, 234.0, 250.0, 226.0, 260.0, 259.0, 230.0, 217.0, 199.0, 202.0, 263.0, 250.0, 250.0, 263.0, 241.0, 229.0, 204.0, 206.0, 254.0, 262.0, 249.0, 267.0, 221.0, 238.0, 261.0, 252.0, 231.0, 216.0, 223.0, 227.0, 250.0, 266.0, 228.0, 228.0, 258.0, 249.0, 252.0, 261.0, 216.0, 203.0, 249.0, 261.0, 252.0, 267.0, 223.0, 221.0, 261.0, 252.0, 222.0, 231.0, 225.0, 216.0, 263.0, 247.0, 207.0, 228.0, 260.0, 250.0, 233.0, 217.0, 229.0, 224.0, 229.0, 244.0, 267.0, 255.0, 217.0, 245.0, 270.0, 249.0, 213.0, 206.0, 250.0, 266.0, 242.0, 220.0, 181.0, 215.0, 231.0, 228.0, 230.0, 238.0, 200.0, 187.0, 244.0, 257.0, 219.0, 236.0, 247.0, 218.0, 247.0, 269.0, 235.0, 227.0, 272.0, 247.0, 211.0, 201.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6857117863988279, "mean_inference_ms": 1.2078722559315558, "mean_action_processing_ms": 0.1327028684250975, "mean_env_wait_ms": 0.850440345842403, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 522.0, "episode_reward_min": 333.0, "episode_reward_mean": 477.83, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 158.0}, "policy_reward_max": {"ppo": 272.0}, "policy_reward_mean": {"ppo": 238.915}, "hist_stats": {"episode_reward": [465.0, 513.0, 418.0, 450.0, 501.0, 444.0, 516.0, 473.0, 467.0, 513.0, 504.0, 446.0, 447.0, 501.0, 516.0, 510.0, 510.0, 504.0, 447.0, 519.0, 513.0, 513.0, 459.0, 447.0, 516.0, 519.0, 510.0, 519.0, 513.0, 470.0, 516.0, 519.0, 399.0, 516.0, 467.0, 513.0, 513.0, 462.0, 513.0, 459.0, 516.0, 333.0, 513.0, 410.0, 459.0, 462.0, 456.0, 461.0, 516.0, 513.0, 447.0, 465.0, 476.0, 519.0, 447.0, 401.0, 513.0, 513.0, 470.0, 410.0, 516.0, 516.0, 459.0, 513.0, 447.0, 450.0, 516.0, 456.0, 507.0, 513.0, 419.0, 510.0, 519.0, 444.0, 513.0, 453.0, 441.0, 510.0, 435.0, 510.0, 450.0, 453.0, 473.0, 522.0, 462.0, 519.0, 419.0, 516.0, 462.0, 396.0, 459.0, 468.0, 387.0, 501.0, 455.0, 465.0, 516.0, 462.0, 519.0, 412.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [234.0, 231.0, 253.0, 260.0, 206.0, 212.0, 221.0, 229.0, 242.0, 259.0, 229.0, 215.0, 256.0, 260.0, 225.0, 248.0, 223.0, 244.0, 255.0, 258.0, 255.0, 249.0, 219.0, 227.0, 222.0, 225.0, 259.0, 242.0, 255.0, 261.0, 260.0, 250.0, 253.0, 257.0, 253.0, 251.0, 219.0, 228.0, 271.0, 248.0, 261.0, 252.0, 251.0, 262.0, 220.0, 239.0, 225.0, 222.0, 262.0, 254.0, 266.0, 253.0, 261.0, 249.0, 250.0, 269.0, 254.0, 259.0, 234.0, 236.0, 253.0, 263.0, 262.0, 257.0, 199.0, 200.0, 252.0, 264.0, 248.0, 219.0, 249.0, 264.0, 263.0, 250.0, 234.0, 228.0, 261.0, 252.0, 218.0, 241.0, 254.0, 262.0, 175.0, 158.0, 253.0, 260.0, 208.0, 202.0, 228.0, 231.0, 240.0, 222.0, 215.0, 241.0, 214.0, 247.0, 260.0, 256.0, 254.0, 259.0, 231.0, 216.0, 231.0, 234.0, 250.0, 226.0, 260.0, 259.0, 230.0, 217.0, 199.0, 202.0, 263.0, 250.0, 250.0, 263.0, 241.0, 229.0, 204.0, 206.0, 254.0, 262.0, 249.0, 267.0, 221.0, 238.0, 261.0, 252.0, 231.0, 216.0, 223.0, 227.0, 250.0, 266.0, 228.0, 228.0, 258.0, 249.0, 252.0, 261.0, 216.0, 203.0, 249.0, 261.0, 252.0, 267.0, 223.0, 221.0, 261.0, 252.0, 222.0, 231.0, 225.0, 216.0, 263.0, 247.0, 207.0, 228.0, 260.0, 250.0, 233.0, 217.0, 229.0, 224.0, 229.0, 244.0, 267.0, 255.0, 217.0, 245.0, 270.0, 249.0, 213.0, 206.0, 250.0, 266.0, 242.0, 220.0, 181.0, 215.0, 231.0, 228.0, 230.0, 238.0, 200.0, 187.0, 244.0, 257.0, 219.0, 236.0, 247.0, 218.0, 247.0, 269.0, 235.0, 227.0, 272.0, 247.0, 211.0, 201.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6857117863988279, "mean_inference_ms": 1.2078722559315558, "mean_action_processing_ms": 0.1327028684250975, "mean_env_wait_ms": 0.850440345842403, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2611200, "num_agent_steps_trained": 2611200, "num_env_steps_sampled": 1305600, "num_env_steps_trained": 1305600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1305600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2611200, "timers": {"training_iteration_time_ms": 3629.146, "learn_time_ms": 1112.093, "learn_throughput": 11509.827, "synch_weights_time_ms": 13.994}, "counters": {"num_env_steps_sampled": 1305600, "num_env_steps_trained": 1305600, "num_agent_steps_sampled": 2611200, "num_agent_steps_trained": 2611200}, "done": false, "episodes_total": 3264, "training_iteration": 102, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-35", "timestamp": 1666580795, "time_this_iter_s": 4.029216766357422, "time_total_s": 384.6297950744629, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 384.6297950744629, "timesteps_since_restore": 0, "iterations_since_restore": 102, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.53333333333333, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 164.2, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 142.26, "shaped_reward_min": 48, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.53, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 14.05, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.94, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 13.46, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.8, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.44, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.53, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 12.85, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.38, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.62, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.82, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.79, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.35, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.25, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 3.99, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.49, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 3.95, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.41, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.53, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 12.85, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.53, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 12.85, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.9443045848797766e-32, "cur_lr": 0.0010000000474974513, "total_loss": 0.000442040734924376, "policy_loss": 0.0002623022301122546, "vf_loss": 7.644353866577148, "vf_explained_var": 0.6994333267211914, "kl": 0.002050078473985195, "entropy": 1.1693940162658691, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1318400, "num_env_steps_trained": 1318400, "num_agent_steps_sampled": 2636800, "num_agent_steps_trained": 2636800}, "sampler_results": {"episode_reward_max": 525.0, "episode_reward_min": 168.0, "episode_reward_mean": 470.66, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 80.0}, "policy_reward_max": {"ppo": 272.0}, "policy_reward_mean": {"ppo": 235.33}, "custom_metrics": {"sparse_reward_mean": 164.2, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 142.26, "shaped_reward_min": 48, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.53, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 14.05, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.94, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 13.46, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.8, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.44, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.53, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 12.85, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.38, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.62, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.82, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.79, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.35, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.25, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 3.99, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.49, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 3.95, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.41, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.53, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 12.85, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.53, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 12.85, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [399.0, 516.0, 467.0, 513.0, 513.0, 462.0, 513.0, 459.0, 516.0, 333.0, 513.0, 410.0, 459.0, 462.0, 456.0, 461.0, 516.0, 513.0, 447.0, 465.0, 476.0, 519.0, 447.0, 401.0, 513.0, 513.0, 470.0, 410.0, 516.0, 516.0, 459.0, 513.0, 447.0, 450.0, 516.0, 456.0, 507.0, 513.0, 419.0, 510.0, 519.0, 444.0, 513.0, 453.0, 441.0, 510.0, 435.0, 510.0, 450.0, 453.0, 473.0, 522.0, 462.0, 519.0, 419.0, 516.0, 462.0, 396.0, 459.0, 468.0, 387.0, 501.0, 455.0, 465.0, 516.0, 462.0, 519.0, 412.0, 408.0, 513.0, 393.0, 507.0, 422.0, 344.0, 516.0, 462.0, 501.0, 470.0, 456.0, 408.0, 513.0, 519.0, 422.0, 525.0, 525.0, 504.0, 459.0, 168.0, 430.0, 484.0, 516.0, 504.0, 519.0, 516.0, 435.0, 507.0, 470.0, 525.0, 516.0, 504.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [199.0, 200.0, 252.0, 264.0, 248.0, 219.0, 249.0, 264.0, 263.0, 250.0, 234.0, 228.0, 261.0, 252.0, 218.0, 241.0, 254.0, 262.0, 175.0, 158.0, 253.0, 260.0, 208.0, 202.0, 228.0, 231.0, 240.0, 222.0, 215.0, 241.0, 214.0, 247.0, 260.0, 256.0, 254.0, 259.0, 231.0, 216.0, 231.0, 234.0, 250.0, 226.0, 260.0, 259.0, 230.0, 217.0, 199.0, 202.0, 263.0, 250.0, 250.0, 263.0, 241.0, 229.0, 204.0, 206.0, 254.0, 262.0, 249.0, 267.0, 221.0, 238.0, 261.0, 252.0, 231.0, 216.0, 223.0, 227.0, 250.0, 266.0, 228.0, 228.0, 258.0, 249.0, 252.0, 261.0, 216.0, 203.0, 249.0, 261.0, 252.0, 267.0, 223.0, 221.0, 261.0, 252.0, 222.0, 231.0, 225.0, 216.0, 263.0, 247.0, 207.0, 228.0, 260.0, 250.0, 233.0, 217.0, 229.0, 224.0, 229.0, 244.0, 267.0, 255.0, 217.0, 245.0, 270.0, 249.0, 213.0, 206.0, 250.0, 266.0, 242.0, 220.0, 181.0, 215.0, 231.0, 228.0, 230.0, 238.0, 200.0, 187.0, 244.0, 257.0, 219.0, 236.0, 247.0, 218.0, 247.0, 269.0, 235.0, 227.0, 272.0, 247.0, 211.0, 201.0, 208.0, 200.0, 267.0, 246.0, 197.0, 196.0, 254.0, 253.0, 214.0, 208.0, 165.0, 179.0, 253.0, 263.0, 230.0, 232.0, 266.0, 235.0, 231.0, 239.0, 222.0, 234.0, 199.0, 209.0, 269.0, 244.0, 260.0, 259.0, 209.0, 213.0, 266.0, 259.0, 260.0, 265.0, 250.0, 254.0, 217.0, 242.0, 80.0, 88.0, 217.0, 213.0, 231.0, 253.0, 256.0, 260.0, 238.0, 266.0, 258.0, 261.0, 246.0, 270.0, 219.0, 216.0, 244.0, 263.0, 221.0, 249.0, 256.0, 269.0, 265.0, 251.0, 256.0, 248.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6855786813359219, "mean_inference_ms": 1.2080034013854677, "mean_action_processing_ms": 0.13268411791367785, "mean_env_wait_ms": 0.8505193336217425, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 525.0, "episode_reward_min": 168.0, "episode_reward_mean": 470.66, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 80.0}, "policy_reward_max": {"ppo": 272.0}, "policy_reward_mean": {"ppo": 235.33}, "hist_stats": {"episode_reward": [399.0, 516.0, 467.0, 513.0, 513.0, 462.0, 513.0, 459.0, 516.0, 333.0, 513.0, 410.0, 459.0, 462.0, 456.0, 461.0, 516.0, 513.0, 447.0, 465.0, 476.0, 519.0, 447.0, 401.0, 513.0, 513.0, 470.0, 410.0, 516.0, 516.0, 459.0, 513.0, 447.0, 450.0, 516.0, 456.0, 507.0, 513.0, 419.0, 510.0, 519.0, 444.0, 513.0, 453.0, 441.0, 510.0, 435.0, 510.0, 450.0, 453.0, 473.0, 522.0, 462.0, 519.0, 419.0, 516.0, 462.0, 396.0, 459.0, 468.0, 387.0, 501.0, 455.0, 465.0, 516.0, 462.0, 519.0, 412.0, 408.0, 513.0, 393.0, 507.0, 422.0, 344.0, 516.0, 462.0, 501.0, 470.0, 456.0, 408.0, 513.0, 519.0, 422.0, 525.0, 525.0, 504.0, 459.0, 168.0, 430.0, 484.0, 516.0, 504.0, 519.0, 516.0, 435.0, 507.0, 470.0, 525.0, 516.0, 504.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [199.0, 200.0, 252.0, 264.0, 248.0, 219.0, 249.0, 264.0, 263.0, 250.0, 234.0, 228.0, 261.0, 252.0, 218.0, 241.0, 254.0, 262.0, 175.0, 158.0, 253.0, 260.0, 208.0, 202.0, 228.0, 231.0, 240.0, 222.0, 215.0, 241.0, 214.0, 247.0, 260.0, 256.0, 254.0, 259.0, 231.0, 216.0, 231.0, 234.0, 250.0, 226.0, 260.0, 259.0, 230.0, 217.0, 199.0, 202.0, 263.0, 250.0, 250.0, 263.0, 241.0, 229.0, 204.0, 206.0, 254.0, 262.0, 249.0, 267.0, 221.0, 238.0, 261.0, 252.0, 231.0, 216.0, 223.0, 227.0, 250.0, 266.0, 228.0, 228.0, 258.0, 249.0, 252.0, 261.0, 216.0, 203.0, 249.0, 261.0, 252.0, 267.0, 223.0, 221.0, 261.0, 252.0, 222.0, 231.0, 225.0, 216.0, 263.0, 247.0, 207.0, 228.0, 260.0, 250.0, 233.0, 217.0, 229.0, 224.0, 229.0, 244.0, 267.0, 255.0, 217.0, 245.0, 270.0, 249.0, 213.0, 206.0, 250.0, 266.0, 242.0, 220.0, 181.0, 215.0, 231.0, 228.0, 230.0, 238.0, 200.0, 187.0, 244.0, 257.0, 219.0, 236.0, 247.0, 218.0, 247.0, 269.0, 235.0, 227.0, 272.0, 247.0, 211.0, 201.0, 208.0, 200.0, 267.0, 246.0, 197.0, 196.0, 254.0, 253.0, 214.0, 208.0, 165.0, 179.0, 253.0, 263.0, 230.0, 232.0, 266.0, 235.0, 231.0, 239.0, 222.0, 234.0, 199.0, 209.0, 269.0, 244.0, 260.0, 259.0, 209.0, 213.0, 266.0, 259.0, 260.0, 265.0, 250.0, 254.0, 217.0, 242.0, 80.0, 88.0, 217.0, 213.0, 231.0, 253.0, 256.0, 260.0, 238.0, 266.0, 258.0, 261.0, 246.0, 270.0, 219.0, 216.0, 244.0, 263.0, 221.0, 249.0, 256.0, 269.0, 265.0, 251.0, 256.0, 248.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6855786813359219, "mean_inference_ms": 1.2080034013854677, "mean_action_processing_ms": 0.13268411791367785, "mean_env_wait_ms": 0.8505193336217425, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2636800, "num_agent_steps_trained": 2636800, "num_env_steps_sampled": 1318400, "num_env_steps_trained": 1318400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1318400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2636800, "timers": {"training_iteration_time_ms": 3640.331, "learn_time_ms": 1102.938, "learn_throughput": 11605.365, "synch_weights_time_ms": 13.813}, "counters": {"num_env_steps_sampled": 1318400, "num_env_steps_trained": 1318400, "num_agent_steps_sampled": 2636800, "num_agent_steps_trained": 2636800}, "done": false, "episodes_total": 3296, "training_iteration": 103, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-39", "timestamp": 1666580799, "time_this_iter_s": 3.794299602508545, "time_total_s": 388.42409467697144, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 388.42409467697144, "timesteps_since_restore": 0, "iterations_since_restore": 103, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.8, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 162.4, "sparse_reward_min": 20, "sparse_reward_max": 180, "shaped_reward_mean": 141.22, "shaped_reward_min": 26, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.08, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.03, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.56, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.49, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.17, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.07, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.1, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.27, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.24, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.2, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.25, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.11, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.16, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.17, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.07, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.17, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.07, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.9721522924398883e-32, "cur_lr": 0.0010000000474974513, "total_loss": -0.0031845830380916595, "policy_loss": -0.0033730631694197655, "vf_loss": 7.6778059005737305, "vf_explained_var": 0.6982693672180176, "kl": 0.0018961275927722454, "entropy": 1.1586014032363892, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1331200, "num_env_steps_trained": 1331200, "num_agent_steps_sampled": 2662400, "num_agent_steps_trained": 2662400}, "sampler_results": {"episode_reward_max": 525.0, "episode_reward_min": 66.0, "episode_reward_mean": 466.02, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 277.0}, "policy_reward_mean": {"ppo": 233.01}, "custom_metrics": {"sparse_reward_mean": 162.4, "sparse_reward_min": 20, "sparse_reward_max": 180, "shaped_reward_mean": 141.22, "shaped_reward_min": 26, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.08, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.03, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.56, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.49, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.17, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.07, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.1, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.27, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.24, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.2, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.25, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.11, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.16, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.17, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.07, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.17, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.07, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [447.0, 450.0, 516.0, 456.0, 507.0, 513.0, 419.0, 510.0, 519.0, 444.0, 513.0, 453.0, 441.0, 510.0, 435.0, 510.0, 450.0, 453.0, 473.0, 522.0, 462.0, 519.0, 419.0, 516.0, 462.0, 396.0, 459.0, 468.0, 387.0, 501.0, 455.0, 465.0, 516.0, 462.0, 519.0, 412.0, 408.0, 513.0, 393.0, 507.0, 422.0, 344.0, 516.0, 462.0, 501.0, 470.0, 456.0, 408.0, 513.0, 519.0, 422.0, 525.0, 525.0, 504.0, 459.0, 168.0, 430.0, 484.0, 516.0, 504.0, 519.0, 516.0, 435.0, 507.0, 470.0, 525.0, 516.0, 504.0, 473.0, 525.0, 513.0, 416.0, 449.0, 456.0, 513.0, 453.0, 462.0, 470.0, 470.0, 424.0, 516.0, 516.0, 464.0, 513.0, 450.0, 180.0, 465.0, 467.0, 405.0, 516.0, 513.0, 408.0, 507.0, 513.0, 519.0, 498.0, 513.0, 516.0, 66.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [231.0, 216.0, 223.0, 227.0, 250.0, 266.0, 228.0, 228.0, 258.0, 249.0, 252.0, 261.0, 216.0, 203.0, 249.0, 261.0, 252.0, 267.0, 223.0, 221.0, 261.0, 252.0, 222.0, 231.0, 225.0, 216.0, 263.0, 247.0, 207.0, 228.0, 260.0, 250.0, 233.0, 217.0, 229.0, 224.0, 229.0, 244.0, 267.0, 255.0, 217.0, 245.0, 270.0, 249.0, 213.0, 206.0, 250.0, 266.0, 242.0, 220.0, 181.0, 215.0, 231.0, 228.0, 230.0, 238.0, 200.0, 187.0, 244.0, 257.0, 219.0, 236.0, 247.0, 218.0, 247.0, 269.0, 235.0, 227.0, 272.0, 247.0, 211.0, 201.0, 208.0, 200.0, 267.0, 246.0, 197.0, 196.0, 254.0, 253.0, 214.0, 208.0, 165.0, 179.0, 253.0, 263.0, 230.0, 232.0, 266.0, 235.0, 231.0, 239.0, 222.0, 234.0, 199.0, 209.0, 269.0, 244.0, 260.0, 259.0, 209.0, 213.0, 266.0, 259.0, 260.0, 265.0, 250.0, 254.0, 217.0, 242.0, 80.0, 88.0, 217.0, 213.0, 231.0, 253.0, 256.0, 260.0, 238.0, 266.0, 258.0, 261.0, 246.0, 270.0, 219.0, 216.0, 244.0, 263.0, 221.0, 249.0, 256.0, 269.0, 265.0, 251.0, 256.0, 248.0, 241.0, 232.0, 253.0, 272.0, 257.0, 256.0, 213.0, 203.0, 230.0, 219.0, 232.0, 224.0, 260.0, 253.0, 230.0, 223.0, 239.0, 223.0, 233.0, 237.0, 238.0, 232.0, 202.0, 222.0, 256.0, 260.0, 260.0, 256.0, 228.0, 236.0, 267.0, 246.0, 217.0, 233.0, 94.0, 86.0, 242.0, 223.0, 227.0, 240.0, 205.0, 200.0, 258.0, 258.0, 258.0, 255.0, 204.0, 204.0, 260.0, 247.0, 258.0, 255.0, 248.0, 271.0, 248.0, 250.0, 277.0, 236.0, 267.0, 249.0, 29.0, 37.0, 261.0, 252.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.685451167099999, "mean_inference_ms": 1.208162856694142, "mean_action_processing_ms": 0.1326708078206156, "mean_env_wait_ms": 0.8506212298485927, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 525.0, "episode_reward_min": 66.0, "episode_reward_mean": 466.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 277.0}, "policy_reward_mean": {"ppo": 233.01}, "hist_stats": {"episode_reward": [447.0, 450.0, 516.0, 456.0, 507.0, 513.0, 419.0, 510.0, 519.0, 444.0, 513.0, 453.0, 441.0, 510.0, 435.0, 510.0, 450.0, 453.0, 473.0, 522.0, 462.0, 519.0, 419.0, 516.0, 462.0, 396.0, 459.0, 468.0, 387.0, 501.0, 455.0, 465.0, 516.0, 462.0, 519.0, 412.0, 408.0, 513.0, 393.0, 507.0, 422.0, 344.0, 516.0, 462.0, 501.0, 470.0, 456.0, 408.0, 513.0, 519.0, 422.0, 525.0, 525.0, 504.0, 459.0, 168.0, 430.0, 484.0, 516.0, 504.0, 519.0, 516.0, 435.0, 507.0, 470.0, 525.0, 516.0, 504.0, 473.0, 525.0, 513.0, 416.0, 449.0, 456.0, 513.0, 453.0, 462.0, 470.0, 470.0, 424.0, 516.0, 516.0, 464.0, 513.0, 450.0, 180.0, 465.0, 467.0, 405.0, 516.0, 513.0, 408.0, 507.0, 513.0, 519.0, 498.0, 513.0, 516.0, 66.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [231.0, 216.0, 223.0, 227.0, 250.0, 266.0, 228.0, 228.0, 258.0, 249.0, 252.0, 261.0, 216.0, 203.0, 249.0, 261.0, 252.0, 267.0, 223.0, 221.0, 261.0, 252.0, 222.0, 231.0, 225.0, 216.0, 263.0, 247.0, 207.0, 228.0, 260.0, 250.0, 233.0, 217.0, 229.0, 224.0, 229.0, 244.0, 267.0, 255.0, 217.0, 245.0, 270.0, 249.0, 213.0, 206.0, 250.0, 266.0, 242.0, 220.0, 181.0, 215.0, 231.0, 228.0, 230.0, 238.0, 200.0, 187.0, 244.0, 257.0, 219.0, 236.0, 247.0, 218.0, 247.0, 269.0, 235.0, 227.0, 272.0, 247.0, 211.0, 201.0, 208.0, 200.0, 267.0, 246.0, 197.0, 196.0, 254.0, 253.0, 214.0, 208.0, 165.0, 179.0, 253.0, 263.0, 230.0, 232.0, 266.0, 235.0, 231.0, 239.0, 222.0, 234.0, 199.0, 209.0, 269.0, 244.0, 260.0, 259.0, 209.0, 213.0, 266.0, 259.0, 260.0, 265.0, 250.0, 254.0, 217.0, 242.0, 80.0, 88.0, 217.0, 213.0, 231.0, 253.0, 256.0, 260.0, 238.0, 266.0, 258.0, 261.0, 246.0, 270.0, 219.0, 216.0, 244.0, 263.0, 221.0, 249.0, 256.0, 269.0, 265.0, 251.0, 256.0, 248.0, 241.0, 232.0, 253.0, 272.0, 257.0, 256.0, 213.0, 203.0, 230.0, 219.0, 232.0, 224.0, 260.0, 253.0, 230.0, 223.0, 239.0, 223.0, 233.0, 237.0, 238.0, 232.0, 202.0, 222.0, 256.0, 260.0, 260.0, 256.0, 228.0, 236.0, 267.0, 246.0, 217.0, 233.0, 94.0, 86.0, 242.0, 223.0, 227.0, 240.0, 205.0, 200.0, 258.0, 258.0, 258.0, 255.0, 204.0, 204.0, 260.0, 247.0, 258.0, 255.0, 248.0, 271.0, 248.0, 250.0, 277.0, 236.0, 267.0, 249.0, 29.0, 37.0, 261.0, 252.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.685451167099999, "mean_inference_ms": 1.208162856694142, "mean_action_processing_ms": 0.1326708078206156, "mean_env_wait_ms": 0.8506212298485927, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2662400, "num_agent_steps_trained": 2662400, "num_env_steps_sampled": 1331200, "num_env_steps_trained": 1331200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1331200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2662400, "timers": {"training_iteration_time_ms": 3642.307, "learn_time_ms": 1101.425, "learn_throughput": 11621.309, "synch_weights_time_ms": 13.658}, "counters": {"num_env_steps_sampled": 1331200, "num_env_steps_trained": 1331200, "num_agent_steps_sampled": 2662400, "num_agent_steps_trained": 2662400}, "done": false, "episodes_total": 3328, "training_iteration": 104, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-43", "timestamp": 1666580803, "time_this_iter_s": 3.688504695892334, "time_total_s": 392.11259937286377, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 392.11259937286377, "timesteps_since_restore": 0, "iterations_since_restore": 104, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.020000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 164.4, "sparse_reward_min": 20, "sparse_reward_max": 180, "shaped_reward_mean": 144.37, "shaped_reward_min": 26, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.79, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.65, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.3, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 14.11, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.35, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.85, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.68, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.2, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.55, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.21, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.4, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.21, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.32, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.07, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 12.85, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.68, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.85, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.68, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 9.860761462199441e-33, "cur_lr": 0.0010000000474974513, "total_loss": -0.0018112241523340344, "policy_loss": -0.0019815461710095406, "vf_loss": 7.434688568115234, "vf_explained_var": 0.6963086128234863, "kl": 0.0017134303925558925, "entropy": 1.1462903022766113, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1344000, "num_env_steps_trained": 1344000, "num_agent_steps_sampled": 2688000, "num_agent_steps_trained": 2688000}, "sampler_results": {"episode_reward_max": 525.0, "episode_reward_min": 66.0, "episode_reward_mean": 473.17, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 280.0}, "policy_reward_mean": {"ppo": 236.585}, "custom_metrics": {"sparse_reward_mean": 164.4, "sparse_reward_min": 20, "sparse_reward_max": 180, "shaped_reward_mean": 144.37, "shaped_reward_min": 26, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.79, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.65, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.3, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 14.11, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.35, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.85, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.68, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.2, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.55, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.21, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.4, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.21, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.32, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.07, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 12.85, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.68, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.85, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.68, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [516.0, 462.0, 519.0, 412.0, 408.0, 513.0, 393.0, 507.0, 422.0, 344.0, 516.0, 462.0, 501.0, 470.0, 456.0, 408.0, 513.0, 519.0, 422.0, 525.0, 525.0, 504.0, 459.0, 168.0, 430.0, 484.0, 516.0, 504.0, 519.0, 516.0, 435.0, 507.0, 470.0, 525.0, 516.0, 504.0, 473.0, 525.0, 513.0, 416.0, 449.0, 456.0, 513.0, 453.0, 462.0, 470.0, 470.0, 424.0, 516.0, 516.0, 464.0, 513.0, 450.0, 180.0, 465.0, 467.0, 405.0, 516.0, 513.0, 408.0, 507.0, 513.0, 519.0, 498.0, 513.0, 516.0, 66.0, 513.0, 510.0, 513.0, 465.0, 519.0, 510.0, 516.0, 473.0, 465.0, 456.0, 510.0, 522.0, 461.0, 473.0, 501.0, 510.0, 513.0, 516.0, 458.0, 410.0, 516.0, 507.0, 519.0, 421.0, 473.0, 525.0, 504.0, 519.0, 510.0, 519.0, 464.0, 468.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [247.0, 269.0, 235.0, 227.0, 272.0, 247.0, 211.0, 201.0, 208.0, 200.0, 267.0, 246.0, 197.0, 196.0, 254.0, 253.0, 214.0, 208.0, 165.0, 179.0, 253.0, 263.0, 230.0, 232.0, 266.0, 235.0, 231.0, 239.0, 222.0, 234.0, 199.0, 209.0, 269.0, 244.0, 260.0, 259.0, 209.0, 213.0, 266.0, 259.0, 260.0, 265.0, 250.0, 254.0, 217.0, 242.0, 80.0, 88.0, 217.0, 213.0, 231.0, 253.0, 256.0, 260.0, 238.0, 266.0, 258.0, 261.0, 246.0, 270.0, 219.0, 216.0, 244.0, 263.0, 221.0, 249.0, 256.0, 269.0, 265.0, 251.0, 256.0, 248.0, 241.0, 232.0, 253.0, 272.0, 257.0, 256.0, 213.0, 203.0, 230.0, 219.0, 232.0, 224.0, 260.0, 253.0, 230.0, 223.0, 239.0, 223.0, 233.0, 237.0, 238.0, 232.0, 202.0, 222.0, 256.0, 260.0, 260.0, 256.0, 228.0, 236.0, 267.0, 246.0, 217.0, 233.0, 94.0, 86.0, 242.0, 223.0, 227.0, 240.0, 205.0, 200.0, 258.0, 258.0, 258.0, 255.0, 204.0, 204.0, 260.0, 247.0, 258.0, 255.0, 248.0, 271.0, 248.0, 250.0, 277.0, 236.0, 267.0, 249.0, 29.0, 37.0, 261.0, 252.0, 254.0, 256.0, 256.0, 257.0, 211.0, 254.0, 254.0, 265.0, 248.0, 262.0, 244.0, 272.0, 246.0, 227.0, 239.0, 226.0, 238.0, 218.0, 250.0, 260.0, 280.0, 242.0, 224.0, 237.0, 232.0, 241.0, 252.0, 249.0, 254.0, 256.0, 239.0, 274.0, 260.0, 256.0, 220.0, 238.0, 205.0, 205.0, 256.0, 260.0, 252.0, 255.0, 264.0, 255.0, 216.0, 205.0, 235.0, 238.0, 256.0, 269.0, 247.0, 257.0, 265.0, 254.0, 262.0, 248.0, 257.0, 262.0, 242.0, 222.0, 236.0, 232.0, 258.0, 261.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6853260190455254, "mean_inference_ms": 1.2081047276134962, "mean_action_processing_ms": 0.1326570049745876, "mean_env_wait_ms": 0.8501976782803979, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 525.0, "episode_reward_min": 66.0, "episode_reward_mean": 473.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 280.0}, "policy_reward_mean": {"ppo": 236.585}, "hist_stats": {"episode_reward": [516.0, 462.0, 519.0, 412.0, 408.0, 513.0, 393.0, 507.0, 422.0, 344.0, 516.0, 462.0, 501.0, 470.0, 456.0, 408.0, 513.0, 519.0, 422.0, 525.0, 525.0, 504.0, 459.0, 168.0, 430.0, 484.0, 516.0, 504.0, 519.0, 516.0, 435.0, 507.0, 470.0, 525.0, 516.0, 504.0, 473.0, 525.0, 513.0, 416.0, 449.0, 456.0, 513.0, 453.0, 462.0, 470.0, 470.0, 424.0, 516.0, 516.0, 464.0, 513.0, 450.0, 180.0, 465.0, 467.0, 405.0, 516.0, 513.0, 408.0, 507.0, 513.0, 519.0, 498.0, 513.0, 516.0, 66.0, 513.0, 510.0, 513.0, 465.0, 519.0, 510.0, 516.0, 473.0, 465.0, 456.0, 510.0, 522.0, 461.0, 473.0, 501.0, 510.0, 513.0, 516.0, 458.0, 410.0, 516.0, 507.0, 519.0, 421.0, 473.0, 525.0, 504.0, 519.0, 510.0, 519.0, 464.0, 468.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [247.0, 269.0, 235.0, 227.0, 272.0, 247.0, 211.0, 201.0, 208.0, 200.0, 267.0, 246.0, 197.0, 196.0, 254.0, 253.0, 214.0, 208.0, 165.0, 179.0, 253.0, 263.0, 230.0, 232.0, 266.0, 235.0, 231.0, 239.0, 222.0, 234.0, 199.0, 209.0, 269.0, 244.0, 260.0, 259.0, 209.0, 213.0, 266.0, 259.0, 260.0, 265.0, 250.0, 254.0, 217.0, 242.0, 80.0, 88.0, 217.0, 213.0, 231.0, 253.0, 256.0, 260.0, 238.0, 266.0, 258.0, 261.0, 246.0, 270.0, 219.0, 216.0, 244.0, 263.0, 221.0, 249.0, 256.0, 269.0, 265.0, 251.0, 256.0, 248.0, 241.0, 232.0, 253.0, 272.0, 257.0, 256.0, 213.0, 203.0, 230.0, 219.0, 232.0, 224.0, 260.0, 253.0, 230.0, 223.0, 239.0, 223.0, 233.0, 237.0, 238.0, 232.0, 202.0, 222.0, 256.0, 260.0, 260.0, 256.0, 228.0, 236.0, 267.0, 246.0, 217.0, 233.0, 94.0, 86.0, 242.0, 223.0, 227.0, 240.0, 205.0, 200.0, 258.0, 258.0, 258.0, 255.0, 204.0, 204.0, 260.0, 247.0, 258.0, 255.0, 248.0, 271.0, 248.0, 250.0, 277.0, 236.0, 267.0, 249.0, 29.0, 37.0, 261.0, 252.0, 254.0, 256.0, 256.0, 257.0, 211.0, 254.0, 254.0, 265.0, 248.0, 262.0, 244.0, 272.0, 246.0, 227.0, 239.0, 226.0, 238.0, 218.0, 250.0, 260.0, 280.0, 242.0, 224.0, 237.0, 232.0, 241.0, 252.0, 249.0, 254.0, 256.0, 239.0, 274.0, 260.0, 256.0, 220.0, 238.0, 205.0, 205.0, 256.0, 260.0, 252.0, 255.0, 264.0, 255.0, 216.0, 205.0, 235.0, 238.0, 256.0, 269.0, 247.0, 257.0, 265.0, 254.0, 262.0, 248.0, 257.0, 262.0, 242.0, 222.0, 236.0, 232.0, 258.0, 261.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6853260190455254, "mean_inference_ms": 1.2081047276134962, "mean_action_processing_ms": 0.1326570049745876, "mean_env_wait_ms": 0.8501976782803979, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2688000, "num_agent_steps_trained": 2688000, "num_env_steps_sampled": 1344000, "num_env_steps_trained": 1344000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1344000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2688000, "timers": {"training_iteration_time_ms": 3643.382, "learn_time_ms": 1101.078, "learn_throughput": 11624.97, "synch_weights_time_ms": 12.672}, "counters": {"num_env_steps_sampled": 1344000, "num_env_steps_trained": 1344000, "num_agent_steps_sampled": 2688000, "num_agent_steps_trained": 2688000}, "done": false, "episodes_total": 3360, "training_iteration": 105, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-47", "timestamp": 1666580807, "time_this_iter_s": 3.699756622314453, "time_total_s": 395.8123559951782, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 395.8123559951782, "timesteps_since_restore": 0, "iterations_since_restore": 105, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.616666666666667, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 166.4, "sparse_reward_min": 20, "sparse_reward_max": 180, "shaped_reward_mean": 145.88, "shaped_reward_min": 26, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.1, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.49, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.67, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.0, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.29, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.35, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.14, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.62, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.27, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.04, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.9, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.71, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.32, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.19, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.42, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.23, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.34, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.14, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.14, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.62, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.14, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.62, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 4.930380731099721e-33, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009489480871707201, "policy_loss": -0.00112776062451303, "vf_loss": 7.568265914916992, "vf_explained_var": 0.6967692375183105, "kl": 0.0018486479530110955, "entropy": 1.1560269594192505, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1356800, "num_env_steps_trained": 1356800, "num_agent_steps_sampled": 2713600, "num_agent_steps_trained": 2713600}, "sampler_results": {"episode_reward_max": 525.0, "episode_reward_min": 66.0, "episode_reward_mean": 478.68, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 280.0}, "policy_reward_mean": {"ppo": 239.34}, "custom_metrics": {"sparse_reward_mean": 166.4, "sparse_reward_min": 20, "sparse_reward_max": 180, "shaped_reward_mean": 145.88, "shaped_reward_min": 26, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.1, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.49, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.67, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.0, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.29, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.35, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.14, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.62, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.27, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.04, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.9, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.71, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.32, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.19, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.42, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.23, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.34, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.14, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.14, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.62, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.14, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.62, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [470.0, 525.0, 516.0, 504.0, 473.0, 525.0, 513.0, 416.0, 449.0, 456.0, 513.0, 453.0, 462.0, 470.0, 470.0, 424.0, 516.0, 516.0, 464.0, 513.0, 450.0, 180.0, 465.0, 467.0, 405.0, 516.0, 513.0, 408.0, 507.0, 513.0, 519.0, 498.0, 513.0, 516.0, 66.0, 513.0, 510.0, 513.0, 465.0, 519.0, 510.0, 516.0, 473.0, 465.0, 456.0, 510.0, 522.0, 461.0, 473.0, 501.0, 510.0, 513.0, 516.0, 458.0, 410.0, 516.0, 507.0, 519.0, 421.0, 473.0, 525.0, 504.0, 519.0, 510.0, 519.0, 464.0, 468.0, 519.0, 513.0, 522.0, 519.0, 413.0, 516.0, 425.0, 459.0, 465.0, 467.0, 449.0, 513.0, 513.0, 452.0, 513.0, 447.0, 519.0, 513.0, 510.0, 516.0, 473.0, 465.0, 519.0, 465.0, 458.0, 465.0, 456.0, 456.0, 459.0, 516.0, 441.0, 516.0, 473.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [221.0, 249.0, 256.0, 269.0, 265.0, 251.0, 256.0, 248.0, 241.0, 232.0, 253.0, 272.0, 257.0, 256.0, 213.0, 203.0, 230.0, 219.0, 232.0, 224.0, 260.0, 253.0, 230.0, 223.0, 239.0, 223.0, 233.0, 237.0, 238.0, 232.0, 202.0, 222.0, 256.0, 260.0, 260.0, 256.0, 228.0, 236.0, 267.0, 246.0, 217.0, 233.0, 94.0, 86.0, 242.0, 223.0, 227.0, 240.0, 205.0, 200.0, 258.0, 258.0, 258.0, 255.0, 204.0, 204.0, 260.0, 247.0, 258.0, 255.0, 248.0, 271.0, 248.0, 250.0, 277.0, 236.0, 267.0, 249.0, 29.0, 37.0, 261.0, 252.0, 254.0, 256.0, 256.0, 257.0, 211.0, 254.0, 254.0, 265.0, 248.0, 262.0, 244.0, 272.0, 246.0, 227.0, 239.0, 226.0, 238.0, 218.0, 250.0, 260.0, 280.0, 242.0, 224.0, 237.0, 232.0, 241.0, 252.0, 249.0, 254.0, 256.0, 239.0, 274.0, 260.0, 256.0, 220.0, 238.0, 205.0, 205.0, 256.0, 260.0, 252.0, 255.0, 264.0, 255.0, 216.0, 205.0, 235.0, 238.0, 256.0, 269.0, 247.0, 257.0, 265.0, 254.0, 262.0, 248.0, 257.0, 262.0, 242.0, 222.0, 236.0, 232.0, 258.0, 261.0, 254.0, 259.0, 260.0, 262.0, 248.0, 271.0, 211.0, 202.0, 261.0, 255.0, 213.0, 212.0, 228.0, 231.0, 242.0, 223.0, 231.0, 236.0, 228.0, 221.0, 243.0, 270.0, 259.0, 254.0, 213.0, 239.0, 270.0, 243.0, 226.0, 221.0, 262.0, 257.0, 253.0, 260.0, 247.0, 263.0, 252.0, 264.0, 241.0, 232.0, 235.0, 230.0, 259.0, 260.0, 234.0, 231.0, 213.0, 245.0, 236.0, 229.0, 236.0, 220.0, 243.0, 213.0, 245.0, 214.0, 260.0, 256.0, 205.0, 236.0, 262.0, 254.0, 235.0, 238.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6852323252055342, "mean_inference_ms": 1.2080135818780215, "mean_action_processing_ms": 0.13264710505548277, "mean_env_wait_ms": 0.8497943535045519, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 525.0, "episode_reward_min": 66.0, "episode_reward_mean": 478.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 280.0}, "policy_reward_mean": {"ppo": 239.34}, "hist_stats": {"episode_reward": [470.0, 525.0, 516.0, 504.0, 473.0, 525.0, 513.0, 416.0, 449.0, 456.0, 513.0, 453.0, 462.0, 470.0, 470.0, 424.0, 516.0, 516.0, 464.0, 513.0, 450.0, 180.0, 465.0, 467.0, 405.0, 516.0, 513.0, 408.0, 507.0, 513.0, 519.0, 498.0, 513.0, 516.0, 66.0, 513.0, 510.0, 513.0, 465.0, 519.0, 510.0, 516.0, 473.0, 465.0, 456.0, 510.0, 522.0, 461.0, 473.0, 501.0, 510.0, 513.0, 516.0, 458.0, 410.0, 516.0, 507.0, 519.0, 421.0, 473.0, 525.0, 504.0, 519.0, 510.0, 519.0, 464.0, 468.0, 519.0, 513.0, 522.0, 519.0, 413.0, 516.0, 425.0, 459.0, 465.0, 467.0, 449.0, 513.0, 513.0, 452.0, 513.0, 447.0, 519.0, 513.0, 510.0, 516.0, 473.0, 465.0, 519.0, 465.0, 458.0, 465.0, 456.0, 456.0, 459.0, 516.0, 441.0, 516.0, 473.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [221.0, 249.0, 256.0, 269.0, 265.0, 251.0, 256.0, 248.0, 241.0, 232.0, 253.0, 272.0, 257.0, 256.0, 213.0, 203.0, 230.0, 219.0, 232.0, 224.0, 260.0, 253.0, 230.0, 223.0, 239.0, 223.0, 233.0, 237.0, 238.0, 232.0, 202.0, 222.0, 256.0, 260.0, 260.0, 256.0, 228.0, 236.0, 267.0, 246.0, 217.0, 233.0, 94.0, 86.0, 242.0, 223.0, 227.0, 240.0, 205.0, 200.0, 258.0, 258.0, 258.0, 255.0, 204.0, 204.0, 260.0, 247.0, 258.0, 255.0, 248.0, 271.0, 248.0, 250.0, 277.0, 236.0, 267.0, 249.0, 29.0, 37.0, 261.0, 252.0, 254.0, 256.0, 256.0, 257.0, 211.0, 254.0, 254.0, 265.0, 248.0, 262.0, 244.0, 272.0, 246.0, 227.0, 239.0, 226.0, 238.0, 218.0, 250.0, 260.0, 280.0, 242.0, 224.0, 237.0, 232.0, 241.0, 252.0, 249.0, 254.0, 256.0, 239.0, 274.0, 260.0, 256.0, 220.0, 238.0, 205.0, 205.0, 256.0, 260.0, 252.0, 255.0, 264.0, 255.0, 216.0, 205.0, 235.0, 238.0, 256.0, 269.0, 247.0, 257.0, 265.0, 254.0, 262.0, 248.0, 257.0, 262.0, 242.0, 222.0, 236.0, 232.0, 258.0, 261.0, 254.0, 259.0, 260.0, 262.0, 248.0, 271.0, 211.0, 202.0, 261.0, 255.0, 213.0, 212.0, 228.0, 231.0, 242.0, 223.0, 231.0, 236.0, 228.0, 221.0, 243.0, 270.0, 259.0, 254.0, 213.0, 239.0, 270.0, 243.0, 226.0, 221.0, 262.0, 257.0, 253.0, 260.0, 247.0, 263.0, 252.0, 264.0, 241.0, 232.0, 235.0, 230.0, 259.0, 260.0, 234.0, 231.0, 213.0, 245.0, 236.0, 229.0, 236.0, 220.0, 243.0, 213.0, 245.0, 214.0, 260.0, 256.0, 205.0, 236.0, 262.0, 254.0, 235.0, 238.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6852323252055342, "mean_inference_ms": 1.2080135818780215, "mean_action_processing_ms": 0.13264710505548277, "mean_env_wait_ms": 0.8497943535045519, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2713600, "num_agent_steps_trained": 2713600, "num_env_steps_sampled": 1356800, "num_env_steps_trained": 1356800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1356800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2713600, "timers": {"training_iteration_time_ms": 3649.561, "learn_time_ms": 1105.95, "learn_throughput": 11573.764, "synch_weights_time_ms": 11.506}, "counters": {"num_env_steps_sampled": 1356800, "num_env_steps_trained": 1356800, "num_agent_steps_sampled": 2713600, "num_agent_steps_trained": 2713600}, "done": false, "episodes_total": 3392, "training_iteration": 106, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-51", "timestamp": 1666580811, "time_this_iter_s": 3.704148292541504, "time_total_s": 399.5165042877197, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 399.5165042877197, "timesteps_since_restore": 0, "iterations_since_restore": 106, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.98, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 167.8, "sparse_reward_min": 20, "sparse_reward_max": 180, "shaped_reward_mean": 147.14, "shaped_reward_min": 26, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.94, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.13, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.49, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.62, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.76, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.43, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.92, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.07, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.78, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.15, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.54, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.71, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.27, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.54, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.1, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.52, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.01, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 12.92, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.07, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.92, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.07, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.4651903655498604e-33, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011604693718254566, "policy_loss": -0.0013407319784164429, "vf_loss": 7.517999649047852, "vf_explained_var": 0.7039157748222351, "kl": 0.0018529343651607633, "entropy": 1.1430747509002686, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1369600, "num_env_steps_trained": 1369600, "num_agent_steps_sampled": 2739200, "num_agent_steps_trained": 2739200}, "sampler_results": {"episode_reward_max": 533.0, "episode_reward_min": 66.0, "episode_reward_mean": 482.74, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 280.0}, "policy_reward_mean": {"ppo": 241.37}, "custom_metrics": {"sparse_reward_mean": 167.8, "sparse_reward_min": 20, "sparse_reward_max": 180, "shaped_reward_mean": 147.14, "shaped_reward_min": 26, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.94, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.13, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.49, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.62, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.76, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.43, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.92, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.07, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.78, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.15, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.54, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.71, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.27, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.54, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.1, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.52, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.01, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 12.92, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.07, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.92, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.07, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [513.0, 516.0, 66.0, 513.0, 510.0, 513.0, 465.0, 519.0, 510.0, 516.0, 473.0, 465.0, 456.0, 510.0, 522.0, 461.0, 473.0, 501.0, 510.0, 513.0, 516.0, 458.0, 410.0, 516.0, 507.0, 519.0, 421.0, 473.0, 525.0, 504.0, 519.0, 510.0, 519.0, 464.0, 468.0, 519.0, 513.0, 522.0, 519.0, 413.0, 516.0, 425.0, 459.0, 465.0, 467.0, 449.0, 513.0, 513.0, 452.0, 513.0, 447.0, 519.0, 513.0, 510.0, 516.0, 473.0, 465.0, 519.0, 465.0, 458.0, 465.0, 456.0, 456.0, 459.0, 516.0, 441.0, 516.0, 473.0, 453.0, 519.0, 455.0, 519.0, 519.0, 510.0, 180.0, 516.0, 473.0, 516.0, 462.0, 522.0, 470.0, 507.0, 419.0, 525.0, 507.0, 470.0, 533.0, 462.0, 513.0, 519.0, 495.0, 462.0, 519.0, 513.0, 513.0, 456.0, 468.0, 465.0, 516.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [277.0, 236.0, 267.0, 249.0, 29.0, 37.0, 261.0, 252.0, 254.0, 256.0, 256.0, 257.0, 211.0, 254.0, 254.0, 265.0, 248.0, 262.0, 244.0, 272.0, 246.0, 227.0, 239.0, 226.0, 238.0, 218.0, 250.0, 260.0, 280.0, 242.0, 224.0, 237.0, 232.0, 241.0, 252.0, 249.0, 254.0, 256.0, 239.0, 274.0, 260.0, 256.0, 220.0, 238.0, 205.0, 205.0, 256.0, 260.0, 252.0, 255.0, 264.0, 255.0, 216.0, 205.0, 235.0, 238.0, 256.0, 269.0, 247.0, 257.0, 265.0, 254.0, 262.0, 248.0, 257.0, 262.0, 242.0, 222.0, 236.0, 232.0, 258.0, 261.0, 254.0, 259.0, 260.0, 262.0, 248.0, 271.0, 211.0, 202.0, 261.0, 255.0, 213.0, 212.0, 228.0, 231.0, 242.0, 223.0, 231.0, 236.0, 228.0, 221.0, 243.0, 270.0, 259.0, 254.0, 213.0, 239.0, 270.0, 243.0, 226.0, 221.0, 262.0, 257.0, 253.0, 260.0, 247.0, 263.0, 252.0, 264.0, 241.0, 232.0, 235.0, 230.0, 259.0, 260.0, 234.0, 231.0, 213.0, 245.0, 236.0, 229.0, 236.0, 220.0, 243.0, 213.0, 245.0, 214.0, 260.0, 256.0, 205.0, 236.0, 262.0, 254.0, 235.0, 238.0, 225.0, 228.0, 253.0, 266.0, 227.0, 228.0, 268.0, 251.0, 258.0, 261.0, 259.0, 251.0, 91.0, 89.0, 261.0, 255.0, 238.0, 235.0, 254.0, 262.0, 236.0, 226.0, 268.0, 254.0, 239.0, 231.0, 253.0, 254.0, 204.0, 215.0, 267.0, 258.0, 256.0, 251.0, 247.0, 223.0, 273.0, 260.0, 210.0, 252.0, 254.0, 259.0, 267.0, 252.0, 241.0, 254.0, 231.0, 231.0, 264.0, 255.0, 254.0, 259.0, 255.0, 258.0, 237.0, 219.0, 236.0, 232.0, 228.0, 237.0, 262.0, 254.0, 264.0, 255.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851764219862165, "mean_inference_ms": 1.207909821947583, "mean_action_processing_ms": 0.1326402215421815, "mean_env_wait_ms": 0.8493898967596079, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 533.0, "episode_reward_min": 66.0, "episode_reward_mean": 482.74, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 280.0}, "policy_reward_mean": {"ppo": 241.37}, "hist_stats": {"episode_reward": [513.0, 516.0, 66.0, 513.0, 510.0, 513.0, 465.0, 519.0, 510.0, 516.0, 473.0, 465.0, 456.0, 510.0, 522.0, 461.0, 473.0, 501.0, 510.0, 513.0, 516.0, 458.0, 410.0, 516.0, 507.0, 519.0, 421.0, 473.0, 525.0, 504.0, 519.0, 510.0, 519.0, 464.0, 468.0, 519.0, 513.0, 522.0, 519.0, 413.0, 516.0, 425.0, 459.0, 465.0, 467.0, 449.0, 513.0, 513.0, 452.0, 513.0, 447.0, 519.0, 513.0, 510.0, 516.0, 473.0, 465.0, 519.0, 465.0, 458.0, 465.0, 456.0, 456.0, 459.0, 516.0, 441.0, 516.0, 473.0, 453.0, 519.0, 455.0, 519.0, 519.0, 510.0, 180.0, 516.0, 473.0, 516.0, 462.0, 522.0, 470.0, 507.0, 419.0, 525.0, 507.0, 470.0, 533.0, 462.0, 513.0, 519.0, 495.0, 462.0, 519.0, 513.0, 513.0, 456.0, 468.0, 465.0, 516.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [277.0, 236.0, 267.0, 249.0, 29.0, 37.0, 261.0, 252.0, 254.0, 256.0, 256.0, 257.0, 211.0, 254.0, 254.0, 265.0, 248.0, 262.0, 244.0, 272.0, 246.0, 227.0, 239.0, 226.0, 238.0, 218.0, 250.0, 260.0, 280.0, 242.0, 224.0, 237.0, 232.0, 241.0, 252.0, 249.0, 254.0, 256.0, 239.0, 274.0, 260.0, 256.0, 220.0, 238.0, 205.0, 205.0, 256.0, 260.0, 252.0, 255.0, 264.0, 255.0, 216.0, 205.0, 235.0, 238.0, 256.0, 269.0, 247.0, 257.0, 265.0, 254.0, 262.0, 248.0, 257.0, 262.0, 242.0, 222.0, 236.0, 232.0, 258.0, 261.0, 254.0, 259.0, 260.0, 262.0, 248.0, 271.0, 211.0, 202.0, 261.0, 255.0, 213.0, 212.0, 228.0, 231.0, 242.0, 223.0, 231.0, 236.0, 228.0, 221.0, 243.0, 270.0, 259.0, 254.0, 213.0, 239.0, 270.0, 243.0, 226.0, 221.0, 262.0, 257.0, 253.0, 260.0, 247.0, 263.0, 252.0, 264.0, 241.0, 232.0, 235.0, 230.0, 259.0, 260.0, 234.0, 231.0, 213.0, 245.0, 236.0, 229.0, 236.0, 220.0, 243.0, 213.0, 245.0, 214.0, 260.0, 256.0, 205.0, 236.0, 262.0, 254.0, 235.0, 238.0, 225.0, 228.0, 253.0, 266.0, 227.0, 228.0, 268.0, 251.0, 258.0, 261.0, 259.0, 251.0, 91.0, 89.0, 261.0, 255.0, 238.0, 235.0, 254.0, 262.0, 236.0, 226.0, 268.0, 254.0, 239.0, 231.0, 253.0, 254.0, 204.0, 215.0, 267.0, 258.0, 256.0, 251.0, 247.0, 223.0, 273.0, 260.0, 210.0, 252.0, 254.0, 259.0, 267.0, 252.0, 241.0, 254.0, 231.0, 231.0, 264.0, 255.0, 254.0, 259.0, 255.0, 258.0, 237.0, 219.0, 236.0, 232.0, 228.0, 237.0, 262.0, 254.0, 264.0, 255.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851764219862165, "mean_inference_ms": 1.207909821947583, "mean_action_processing_ms": 0.1326402215421815, "mean_env_wait_ms": 0.8493898967596079, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2739200, "num_agent_steps_trained": 2739200, "num_env_steps_sampled": 1369600, "num_env_steps_trained": 1369600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1369600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2739200, "timers": {"training_iteration_time_ms": 3638.166, "learn_time_ms": 1094.745, "learn_throughput": 11692.226, "synch_weights_time_ms": 11.752}, "counters": {"num_env_steps_sampled": 1369600, "num_env_steps_trained": 1369600, "num_agent_steps_sampled": 2739200, "num_agent_steps_trained": 2739200}, "done": false, "episodes_total": 3424, "training_iteration": 107, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-54", "timestamp": 1666580814, "time_this_iter_s": 3.574979543685913, "time_total_s": 403.09148383140564, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 403.09148383140564, "timesteps_since_restore": 0, "iterations_since_restore": 107, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.75, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 168.8, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 148.8, "shaped_reward_min": 60, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.56, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.87, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.01, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.35, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.83, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.48, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.44, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.78, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.2, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.96, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.11, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.82, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.22, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.46, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.22, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.42, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.16, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.44, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.78, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.44, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.78, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.2325951827749302e-33, "cur_lr": 0.0010000000474974513, "total_loss": -0.00016931036952883005, "policy_loss": -0.00034154567401856184, "vf_loss": 7.427540302276611, "vf_explained_var": 0.7095834016799927, "kl": 0.0018368299352005124, "entropy": 1.141036033630371, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1382400, "num_env_steps_trained": 1382400, "num_agent_steps_sampled": 2764800, "num_agent_steps_trained": 2764800}, "sampler_results": {"episode_reward_max": 533.0, "episode_reward_min": 180.0, "episode_reward_mean": 486.4, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 273.0}, "policy_reward_mean": {"ppo": 243.2}, "custom_metrics": {"sparse_reward_mean": 168.8, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 148.8, "shaped_reward_min": 60, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.56, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.87, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.01, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.35, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.83, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.48, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.44, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.78, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.2, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.96, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.11, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.82, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.22, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.46, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.22, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.42, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.16, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.44, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.78, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.44, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.78, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 464.0, 468.0, 519.0, 513.0, 522.0, 519.0, 413.0, 516.0, 425.0, 459.0, 465.0, 467.0, 449.0, 513.0, 513.0, 452.0, 513.0, 447.0, 519.0, 513.0, 510.0, 516.0, 473.0, 465.0, 519.0, 465.0, 458.0, 465.0, 456.0, 456.0, 459.0, 516.0, 441.0, 516.0, 473.0, 453.0, 519.0, 455.0, 519.0, 519.0, 510.0, 180.0, 516.0, 473.0, 516.0, 462.0, 522.0, 470.0, 507.0, 419.0, 525.0, 507.0, 470.0, 533.0, 462.0, 513.0, 519.0, 495.0, 462.0, 519.0, 513.0, 513.0, 456.0, 468.0, 465.0, 516.0, 519.0, 519.0, 522.0, 467.0, 479.0, 516.0, 513.0, 516.0, 425.0, 467.0, 504.0, 470.0, 516.0, 516.0, 473.0, 513.0, 473.0, 462.0, 450.0, 516.0, 402.0, 459.0, 513.0, 516.0, 513.0, 516.0, 470.0, 519.0, 462.0, 522.0, 522.0, 516.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 262.0, 242.0, 222.0, 236.0, 232.0, 258.0, 261.0, 254.0, 259.0, 260.0, 262.0, 248.0, 271.0, 211.0, 202.0, 261.0, 255.0, 213.0, 212.0, 228.0, 231.0, 242.0, 223.0, 231.0, 236.0, 228.0, 221.0, 243.0, 270.0, 259.0, 254.0, 213.0, 239.0, 270.0, 243.0, 226.0, 221.0, 262.0, 257.0, 253.0, 260.0, 247.0, 263.0, 252.0, 264.0, 241.0, 232.0, 235.0, 230.0, 259.0, 260.0, 234.0, 231.0, 213.0, 245.0, 236.0, 229.0, 236.0, 220.0, 243.0, 213.0, 245.0, 214.0, 260.0, 256.0, 205.0, 236.0, 262.0, 254.0, 235.0, 238.0, 225.0, 228.0, 253.0, 266.0, 227.0, 228.0, 268.0, 251.0, 258.0, 261.0, 259.0, 251.0, 91.0, 89.0, 261.0, 255.0, 238.0, 235.0, 254.0, 262.0, 236.0, 226.0, 268.0, 254.0, 239.0, 231.0, 253.0, 254.0, 204.0, 215.0, 267.0, 258.0, 256.0, 251.0, 247.0, 223.0, 273.0, 260.0, 210.0, 252.0, 254.0, 259.0, 267.0, 252.0, 241.0, 254.0, 231.0, 231.0, 264.0, 255.0, 254.0, 259.0, 255.0, 258.0, 237.0, 219.0, 236.0, 232.0, 228.0, 237.0, 262.0, 254.0, 264.0, 255.0, 257.0, 262.0, 254.0, 268.0, 222.0, 245.0, 247.0, 232.0, 266.0, 250.0, 247.0, 266.0, 255.0, 261.0, 196.0, 229.0, 240.0, 227.0, 246.0, 258.0, 244.0, 226.0, 246.0, 270.0, 256.0, 260.0, 233.0, 240.0, 259.0, 254.0, 246.0, 227.0, 231.0, 231.0, 229.0, 221.0, 265.0, 251.0, 216.0, 186.0, 225.0, 234.0, 267.0, 246.0, 257.0, 259.0, 254.0, 259.0, 260.0, 256.0, 244.0, 226.0, 261.0, 258.0, 230.0, 232.0, 257.0, 265.0, 263.0, 259.0, 261.0, 255.0, 263.0, 259.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851621848436128, "mean_inference_ms": 1.2078610476021172, "mean_action_processing_ms": 0.1326424297598641, "mean_env_wait_ms": 0.8490743544207925, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 533.0, "episode_reward_min": 180.0, "episode_reward_mean": 486.4, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 273.0}, "policy_reward_mean": {"ppo": 243.2}, "hist_stats": {"episode_reward": [519.0, 464.0, 468.0, 519.0, 513.0, 522.0, 519.0, 413.0, 516.0, 425.0, 459.0, 465.0, 467.0, 449.0, 513.0, 513.0, 452.0, 513.0, 447.0, 519.0, 513.0, 510.0, 516.0, 473.0, 465.0, 519.0, 465.0, 458.0, 465.0, 456.0, 456.0, 459.0, 516.0, 441.0, 516.0, 473.0, 453.0, 519.0, 455.0, 519.0, 519.0, 510.0, 180.0, 516.0, 473.0, 516.0, 462.0, 522.0, 470.0, 507.0, 419.0, 525.0, 507.0, 470.0, 533.0, 462.0, 513.0, 519.0, 495.0, 462.0, 519.0, 513.0, 513.0, 456.0, 468.0, 465.0, 516.0, 519.0, 519.0, 522.0, 467.0, 479.0, 516.0, 513.0, 516.0, 425.0, 467.0, 504.0, 470.0, 516.0, 516.0, 473.0, 513.0, 473.0, 462.0, 450.0, 516.0, 402.0, 459.0, 513.0, 516.0, 513.0, 516.0, 470.0, 519.0, 462.0, 522.0, 522.0, 516.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 262.0, 242.0, 222.0, 236.0, 232.0, 258.0, 261.0, 254.0, 259.0, 260.0, 262.0, 248.0, 271.0, 211.0, 202.0, 261.0, 255.0, 213.0, 212.0, 228.0, 231.0, 242.0, 223.0, 231.0, 236.0, 228.0, 221.0, 243.0, 270.0, 259.0, 254.0, 213.0, 239.0, 270.0, 243.0, 226.0, 221.0, 262.0, 257.0, 253.0, 260.0, 247.0, 263.0, 252.0, 264.0, 241.0, 232.0, 235.0, 230.0, 259.0, 260.0, 234.0, 231.0, 213.0, 245.0, 236.0, 229.0, 236.0, 220.0, 243.0, 213.0, 245.0, 214.0, 260.0, 256.0, 205.0, 236.0, 262.0, 254.0, 235.0, 238.0, 225.0, 228.0, 253.0, 266.0, 227.0, 228.0, 268.0, 251.0, 258.0, 261.0, 259.0, 251.0, 91.0, 89.0, 261.0, 255.0, 238.0, 235.0, 254.0, 262.0, 236.0, 226.0, 268.0, 254.0, 239.0, 231.0, 253.0, 254.0, 204.0, 215.0, 267.0, 258.0, 256.0, 251.0, 247.0, 223.0, 273.0, 260.0, 210.0, 252.0, 254.0, 259.0, 267.0, 252.0, 241.0, 254.0, 231.0, 231.0, 264.0, 255.0, 254.0, 259.0, 255.0, 258.0, 237.0, 219.0, 236.0, 232.0, 228.0, 237.0, 262.0, 254.0, 264.0, 255.0, 257.0, 262.0, 254.0, 268.0, 222.0, 245.0, 247.0, 232.0, 266.0, 250.0, 247.0, 266.0, 255.0, 261.0, 196.0, 229.0, 240.0, 227.0, 246.0, 258.0, 244.0, 226.0, 246.0, 270.0, 256.0, 260.0, 233.0, 240.0, 259.0, 254.0, 246.0, 227.0, 231.0, 231.0, 229.0, 221.0, 265.0, 251.0, 216.0, 186.0, 225.0, 234.0, 267.0, 246.0, 257.0, 259.0, 254.0, 259.0, 260.0, 256.0, 244.0, 226.0, 261.0, 258.0, 230.0, 232.0, 257.0, 265.0, 263.0, 259.0, 261.0, 255.0, 263.0, 259.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851621848436128, "mean_inference_ms": 1.2078610476021172, "mean_action_processing_ms": 0.1326424297598641, "mean_env_wait_ms": 0.8490743544207925, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2764800, "num_agent_steps_trained": 2764800, "num_env_steps_sampled": 1382400, "num_env_steps_trained": 1382400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1382400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2764800, "timers": {"training_iteration_time_ms": 3642.999, "learn_time_ms": 1092.393, "learn_throughput": 11717.398, "synch_weights_time_ms": 11.784}, "counters": {"num_env_steps_sampled": 1382400, "num_env_steps_trained": 1382400, "num_agent_steps_sampled": 2764800, "num_agent_steps_trained": 2764800}, "done": false, "episodes_total": 3456, "training_iteration": 108, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-58", "timestamp": 1666580818, "time_this_iter_s": 3.658635377883911, "time_total_s": 406.75011920928955, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 406.75011920928955, "timesteps_since_restore": 0, "iterations_since_restore": 108, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.779999999999998, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 169.4, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 150.21, "shaped_reward_min": 60, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.21, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.21, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 13.67, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 14.64, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.76, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.43, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.16, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.2, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.79, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.32, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.84, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.58, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.17, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.55, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.07, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.16, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.2, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.16, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.2, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 6.162975913874651e-34, "cur_lr": 0.0010000000474974513, "total_loss": -0.0027838372625410557, "policy_loss": -0.002956016920506954, "vf_loss": 7.4473161697387695, "vf_explained_var": 0.7100299596786499, "kl": 0.0014991976786404848, "entropy": 1.1451025009155273, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1395200, "num_env_steps_trained": 1395200, "num_agent_steps_sampled": 2790400, "num_agent_steps_trained": 2790400}, "sampler_results": {"episode_reward_max": 533.0, "episode_reward_min": 180.0, "episode_reward_mean": 489.01, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 274.0}, "policy_reward_mean": {"ppo": 244.505}, "custom_metrics": {"sparse_reward_mean": 169.4, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 150.21, "shaped_reward_min": 60, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.21, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.21, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 13.67, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 14.64, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.76, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.43, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.16, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.2, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.79, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.32, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.84, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.58, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.17, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.55, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.07, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.16, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.2, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.16, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.2, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [516.0, 441.0, 516.0, 473.0, 453.0, 519.0, 455.0, 519.0, 519.0, 510.0, 180.0, 516.0, 473.0, 516.0, 462.0, 522.0, 470.0, 507.0, 419.0, 525.0, 507.0, 470.0, 533.0, 462.0, 513.0, 519.0, 495.0, 462.0, 519.0, 513.0, 513.0, 456.0, 468.0, 465.0, 516.0, 519.0, 519.0, 522.0, 467.0, 479.0, 516.0, 513.0, 516.0, 425.0, 467.0, 504.0, 470.0, 516.0, 516.0, 473.0, 513.0, 473.0, 462.0, 450.0, 516.0, 402.0, 459.0, 513.0, 516.0, 513.0, 516.0, 470.0, 519.0, 462.0, 522.0, 522.0, 516.0, 522.0, 458.0, 468.0, 522.0, 513.0, 525.0, 404.0, 516.0, 476.0, 527.0, 464.0, 516.0, 376.0, 513.0, 522.0, 507.0, 468.0, 467.0, 516.0, 522.0, 513.0, 519.0, 516.0, 522.0, 519.0, 516.0, 356.0, 465.0, 513.0, 470.0, 519.0, 513.0, 470.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 256.0, 205.0, 236.0, 262.0, 254.0, 235.0, 238.0, 225.0, 228.0, 253.0, 266.0, 227.0, 228.0, 268.0, 251.0, 258.0, 261.0, 259.0, 251.0, 91.0, 89.0, 261.0, 255.0, 238.0, 235.0, 254.0, 262.0, 236.0, 226.0, 268.0, 254.0, 239.0, 231.0, 253.0, 254.0, 204.0, 215.0, 267.0, 258.0, 256.0, 251.0, 247.0, 223.0, 273.0, 260.0, 210.0, 252.0, 254.0, 259.0, 267.0, 252.0, 241.0, 254.0, 231.0, 231.0, 264.0, 255.0, 254.0, 259.0, 255.0, 258.0, 237.0, 219.0, 236.0, 232.0, 228.0, 237.0, 262.0, 254.0, 264.0, 255.0, 257.0, 262.0, 254.0, 268.0, 222.0, 245.0, 247.0, 232.0, 266.0, 250.0, 247.0, 266.0, 255.0, 261.0, 196.0, 229.0, 240.0, 227.0, 246.0, 258.0, 244.0, 226.0, 246.0, 270.0, 256.0, 260.0, 233.0, 240.0, 259.0, 254.0, 246.0, 227.0, 231.0, 231.0, 229.0, 221.0, 265.0, 251.0, 216.0, 186.0, 225.0, 234.0, 267.0, 246.0, 257.0, 259.0, 254.0, 259.0, 260.0, 256.0, 244.0, 226.0, 261.0, 258.0, 230.0, 232.0, 257.0, 265.0, 263.0, 259.0, 261.0, 255.0, 263.0, 259.0, 224.0, 234.0, 237.0, 231.0, 255.0, 267.0, 254.0, 259.0, 251.0, 274.0, 216.0, 188.0, 260.0, 256.0, 239.0, 237.0, 263.0, 264.0, 232.0, 232.0, 251.0, 265.0, 186.0, 190.0, 272.0, 241.0, 261.0, 261.0, 245.0, 262.0, 226.0, 242.0, 242.0, 225.0, 258.0, 258.0, 270.0, 252.0, 259.0, 254.0, 255.0, 264.0, 259.0, 257.0, 267.0, 255.0, 267.0, 252.0, 259.0, 257.0, 184.0, 172.0, 237.0, 228.0, 248.0, 265.0, 225.0, 245.0, 256.0, 263.0, 246.0, 267.0, 231.0, 239.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851414554562826, "mean_inference_ms": 1.2077767111984112, "mean_action_processing_ms": 0.13264407790813673, "mean_env_wait_ms": 0.8487302506137289, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 533.0, "episode_reward_min": 180.0, "episode_reward_mean": 489.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 274.0}, "policy_reward_mean": {"ppo": 244.505}, "hist_stats": {"episode_reward": [516.0, 441.0, 516.0, 473.0, 453.0, 519.0, 455.0, 519.0, 519.0, 510.0, 180.0, 516.0, 473.0, 516.0, 462.0, 522.0, 470.0, 507.0, 419.0, 525.0, 507.0, 470.0, 533.0, 462.0, 513.0, 519.0, 495.0, 462.0, 519.0, 513.0, 513.0, 456.0, 468.0, 465.0, 516.0, 519.0, 519.0, 522.0, 467.0, 479.0, 516.0, 513.0, 516.0, 425.0, 467.0, 504.0, 470.0, 516.0, 516.0, 473.0, 513.0, 473.0, 462.0, 450.0, 516.0, 402.0, 459.0, 513.0, 516.0, 513.0, 516.0, 470.0, 519.0, 462.0, 522.0, 522.0, 516.0, 522.0, 458.0, 468.0, 522.0, 513.0, 525.0, 404.0, 516.0, 476.0, 527.0, 464.0, 516.0, 376.0, 513.0, 522.0, 507.0, 468.0, 467.0, 516.0, 522.0, 513.0, 519.0, 516.0, 522.0, 519.0, 516.0, 356.0, 465.0, 513.0, 470.0, 519.0, 513.0, 470.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 256.0, 205.0, 236.0, 262.0, 254.0, 235.0, 238.0, 225.0, 228.0, 253.0, 266.0, 227.0, 228.0, 268.0, 251.0, 258.0, 261.0, 259.0, 251.0, 91.0, 89.0, 261.0, 255.0, 238.0, 235.0, 254.0, 262.0, 236.0, 226.0, 268.0, 254.0, 239.0, 231.0, 253.0, 254.0, 204.0, 215.0, 267.0, 258.0, 256.0, 251.0, 247.0, 223.0, 273.0, 260.0, 210.0, 252.0, 254.0, 259.0, 267.0, 252.0, 241.0, 254.0, 231.0, 231.0, 264.0, 255.0, 254.0, 259.0, 255.0, 258.0, 237.0, 219.0, 236.0, 232.0, 228.0, 237.0, 262.0, 254.0, 264.0, 255.0, 257.0, 262.0, 254.0, 268.0, 222.0, 245.0, 247.0, 232.0, 266.0, 250.0, 247.0, 266.0, 255.0, 261.0, 196.0, 229.0, 240.0, 227.0, 246.0, 258.0, 244.0, 226.0, 246.0, 270.0, 256.0, 260.0, 233.0, 240.0, 259.0, 254.0, 246.0, 227.0, 231.0, 231.0, 229.0, 221.0, 265.0, 251.0, 216.0, 186.0, 225.0, 234.0, 267.0, 246.0, 257.0, 259.0, 254.0, 259.0, 260.0, 256.0, 244.0, 226.0, 261.0, 258.0, 230.0, 232.0, 257.0, 265.0, 263.0, 259.0, 261.0, 255.0, 263.0, 259.0, 224.0, 234.0, 237.0, 231.0, 255.0, 267.0, 254.0, 259.0, 251.0, 274.0, 216.0, 188.0, 260.0, 256.0, 239.0, 237.0, 263.0, 264.0, 232.0, 232.0, 251.0, 265.0, 186.0, 190.0, 272.0, 241.0, 261.0, 261.0, 245.0, 262.0, 226.0, 242.0, 242.0, 225.0, 258.0, 258.0, 270.0, 252.0, 259.0, 254.0, 255.0, 264.0, 259.0, 257.0, 267.0, 255.0, 267.0, 252.0, 259.0, 257.0, 184.0, 172.0, 237.0, 228.0, 248.0, 265.0, 225.0, 245.0, 256.0, 263.0, 246.0, 267.0, 231.0, 239.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851414554562826, "mean_inference_ms": 1.2077767111984112, "mean_action_processing_ms": 0.13264407790813673, "mean_env_wait_ms": 0.8487302506137289, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2790400, "num_agent_steps_trained": 2790400, "num_env_steps_sampled": 1395200, "num_env_steps_trained": 1395200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1395200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2790400, "timers": {"training_iteration_time_ms": 3657.592, "learn_time_ms": 1102.772, "learn_throughput": 11607.116, "synch_weights_time_ms": 11.338}, "counters": {"num_env_steps_sampled": 1395200, "num_env_steps_trained": 1395200, "num_agent_steps_sampled": 2790400, "num_agent_steps_trained": 2790400}, "done": false, "episodes_total": 3488, "training_iteration": 109, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-02", "timestamp": 1666580822, "time_this_iter_s": 3.695673704147339, "time_total_s": 410.4457929134369, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 410.4457929134369, "timesteps_since_restore": 0, "iterations_since_restore": 109, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.15, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 170.6, "sparse_reward_min": 120, "sparse_reward_max": 180, "shaped_reward_mean": 151.82, "shaped_reward_min": 116, "shaped_reward_max": 167, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.59, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 14.99, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.05, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.3, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.66, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.81, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.3, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.52, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.59, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.94, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.12, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.29, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.12, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.47, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.34, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.42, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.27, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.59, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.94, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.59, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.94, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.0814879569373254e-34, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004494154709391296, "policy_loss": 0.00027276657056063414, "vf_loss": 7.438180446624756, "vf_explained_var": 0.6973411440849304, "kl": 0.0017573704244568944, "entropy": 1.134337067604065, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1408000, "num_env_steps_trained": 1408000, "num_agent_steps_sampled": 2816000, "num_agent_steps_trained": 2816000}, "sampler_results": {"episode_reward_max": 527.0, "episode_reward_min": 356.0, "episode_reward_mean": 493.02, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 279.0}, "policy_reward_mean": {"ppo": 246.51}, "custom_metrics": {"sparse_reward_mean": 170.6, "sparse_reward_min": 120, "sparse_reward_max": 180, "shaped_reward_mean": 151.82, "shaped_reward_min": 116, "shaped_reward_max": 167, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.59, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 14.99, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.05, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.3, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.66, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.81, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.3, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.52, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.59, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.94, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.12, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.29, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.12, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.47, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.34, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.42, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.27, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.59, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.94, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.59, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.94, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [468.0, 465.0, 516.0, 519.0, 519.0, 522.0, 467.0, 479.0, 516.0, 513.0, 516.0, 425.0, 467.0, 504.0, 470.0, 516.0, 516.0, 473.0, 513.0, 473.0, 462.0, 450.0, 516.0, 402.0, 459.0, 513.0, 516.0, 513.0, 516.0, 470.0, 519.0, 462.0, 522.0, 522.0, 516.0, 522.0, 458.0, 468.0, 522.0, 513.0, 525.0, 404.0, 516.0, 476.0, 527.0, 464.0, 516.0, 376.0, 513.0, 522.0, 507.0, 468.0, 467.0, 516.0, 522.0, 513.0, 519.0, 516.0, 522.0, 519.0, 516.0, 356.0, 465.0, 513.0, 470.0, 519.0, 513.0, 470.0, 473.0, 513.0, 510.0, 519.0, 465.0, 465.0, 516.0, 510.0, 513.0, 522.0, 513.0, 522.0, 516.0, 516.0, 462.0, 473.0, 459.0, 519.0, 470.0, 465.0, 519.0, 513.0, 513.0, 458.0, 479.0, 513.0, 522.0, 456.0, 524.0, 473.0, 465.0, 518.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [236.0, 232.0, 228.0, 237.0, 262.0, 254.0, 264.0, 255.0, 257.0, 262.0, 254.0, 268.0, 222.0, 245.0, 247.0, 232.0, 266.0, 250.0, 247.0, 266.0, 255.0, 261.0, 196.0, 229.0, 240.0, 227.0, 246.0, 258.0, 244.0, 226.0, 246.0, 270.0, 256.0, 260.0, 233.0, 240.0, 259.0, 254.0, 246.0, 227.0, 231.0, 231.0, 229.0, 221.0, 265.0, 251.0, 216.0, 186.0, 225.0, 234.0, 267.0, 246.0, 257.0, 259.0, 254.0, 259.0, 260.0, 256.0, 244.0, 226.0, 261.0, 258.0, 230.0, 232.0, 257.0, 265.0, 263.0, 259.0, 261.0, 255.0, 263.0, 259.0, 224.0, 234.0, 237.0, 231.0, 255.0, 267.0, 254.0, 259.0, 251.0, 274.0, 216.0, 188.0, 260.0, 256.0, 239.0, 237.0, 263.0, 264.0, 232.0, 232.0, 251.0, 265.0, 186.0, 190.0, 272.0, 241.0, 261.0, 261.0, 245.0, 262.0, 226.0, 242.0, 242.0, 225.0, 258.0, 258.0, 270.0, 252.0, 259.0, 254.0, 255.0, 264.0, 259.0, 257.0, 267.0, 255.0, 267.0, 252.0, 259.0, 257.0, 184.0, 172.0, 237.0, 228.0, 248.0, 265.0, 225.0, 245.0, 256.0, 263.0, 246.0, 267.0, 231.0, 239.0, 252.0, 221.0, 249.0, 264.0, 269.0, 241.0, 254.0, 265.0, 233.0, 232.0, 230.0, 235.0, 259.0, 257.0, 253.0, 257.0, 251.0, 262.0, 243.0, 279.0, 264.0, 249.0, 259.0, 263.0, 244.0, 272.0, 263.0, 253.0, 244.0, 218.0, 230.0, 243.0, 241.0, 218.0, 253.0, 266.0, 244.0, 226.0, 223.0, 242.0, 258.0, 261.0, 254.0, 259.0, 250.0, 263.0, 232.0, 226.0, 226.0, 253.0, 261.0, 252.0, 271.0, 251.0, 226.0, 230.0, 275.0, 249.0, 232.0, 241.0, 233.0, 232.0, 261.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851207606609288, "mean_inference_ms": 1.2077257093049802, "mean_action_processing_ms": 0.13264280811149973, "mean_env_wait_ms": 0.8484066460590849, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 527.0, "episode_reward_min": 356.0, "episode_reward_mean": 493.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 279.0}, "policy_reward_mean": {"ppo": 246.51}, "hist_stats": {"episode_reward": [468.0, 465.0, 516.0, 519.0, 519.0, 522.0, 467.0, 479.0, 516.0, 513.0, 516.0, 425.0, 467.0, 504.0, 470.0, 516.0, 516.0, 473.0, 513.0, 473.0, 462.0, 450.0, 516.0, 402.0, 459.0, 513.0, 516.0, 513.0, 516.0, 470.0, 519.0, 462.0, 522.0, 522.0, 516.0, 522.0, 458.0, 468.0, 522.0, 513.0, 525.0, 404.0, 516.0, 476.0, 527.0, 464.0, 516.0, 376.0, 513.0, 522.0, 507.0, 468.0, 467.0, 516.0, 522.0, 513.0, 519.0, 516.0, 522.0, 519.0, 516.0, 356.0, 465.0, 513.0, 470.0, 519.0, 513.0, 470.0, 473.0, 513.0, 510.0, 519.0, 465.0, 465.0, 516.0, 510.0, 513.0, 522.0, 513.0, 522.0, 516.0, 516.0, 462.0, 473.0, 459.0, 519.0, 470.0, 465.0, 519.0, 513.0, 513.0, 458.0, 479.0, 513.0, 522.0, 456.0, 524.0, 473.0, 465.0, 518.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [236.0, 232.0, 228.0, 237.0, 262.0, 254.0, 264.0, 255.0, 257.0, 262.0, 254.0, 268.0, 222.0, 245.0, 247.0, 232.0, 266.0, 250.0, 247.0, 266.0, 255.0, 261.0, 196.0, 229.0, 240.0, 227.0, 246.0, 258.0, 244.0, 226.0, 246.0, 270.0, 256.0, 260.0, 233.0, 240.0, 259.0, 254.0, 246.0, 227.0, 231.0, 231.0, 229.0, 221.0, 265.0, 251.0, 216.0, 186.0, 225.0, 234.0, 267.0, 246.0, 257.0, 259.0, 254.0, 259.0, 260.0, 256.0, 244.0, 226.0, 261.0, 258.0, 230.0, 232.0, 257.0, 265.0, 263.0, 259.0, 261.0, 255.0, 263.0, 259.0, 224.0, 234.0, 237.0, 231.0, 255.0, 267.0, 254.0, 259.0, 251.0, 274.0, 216.0, 188.0, 260.0, 256.0, 239.0, 237.0, 263.0, 264.0, 232.0, 232.0, 251.0, 265.0, 186.0, 190.0, 272.0, 241.0, 261.0, 261.0, 245.0, 262.0, 226.0, 242.0, 242.0, 225.0, 258.0, 258.0, 270.0, 252.0, 259.0, 254.0, 255.0, 264.0, 259.0, 257.0, 267.0, 255.0, 267.0, 252.0, 259.0, 257.0, 184.0, 172.0, 237.0, 228.0, 248.0, 265.0, 225.0, 245.0, 256.0, 263.0, 246.0, 267.0, 231.0, 239.0, 252.0, 221.0, 249.0, 264.0, 269.0, 241.0, 254.0, 265.0, 233.0, 232.0, 230.0, 235.0, 259.0, 257.0, 253.0, 257.0, 251.0, 262.0, 243.0, 279.0, 264.0, 249.0, 259.0, 263.0, 244.0, 272.0, 263.0, 253.0, 244.0, 218.0, 230.0, 243.0, 241.0, 218.0, 253.0, 266.0, 244.0, 226.0, 223.0, 242.0, 258.0, 261.0, 254.0, 259.0, 250.0, 263.0, 232.0, 226.0, 226.0, 253.0, 261.0, 252.0, 271.0, 251.0, 226.0, 230.0, 275.0, 249.0, 232.0, 241.0, 233.0, 232.0, 261.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851207606609288, "mean_inference_ms": 1.2077257093049802, "mean_action_processing_ms": 0.13264280811149973, "mean_env_wait_ms": 0.8484066460590849, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2816000, "num_agent_steps_trained": 2816000, "num_env_steps_sampled": 1408000, "num_env_steps_trained": 1408000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1408000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2816000, "timers": {"training_iteration_time_ms": 3660.1, "learn_time_ms": 1095.962, "learn_throughput": 11679.235, "synch_weights_time_ms": 11.779}, "counters": {"num_env_steps_sampled": 1408000, "num_env_steps_trained": 1408000, "num_agent_steps_sampled": 2816000, "num_agent_steps_trained": 2816000}, "done": false, "episodes_total": 3520, "training_iteration": 110, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-06", "timestamp": 1666580826, "time_this_iter_s": 3.6962802410125732, "time_total_s": 414.14207315444946, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 414.14207315444946, "timesteps_since_restore": 0, "iterations_since_restore": 110, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.86, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 171.4, "sparse_reward_min": 120, "sparse_reward_max": 180, "shaped_reward_mean": 152.62, "shaped_reward_min": 116, "shaped_reward_max": 167, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.24, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 14.63, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.61, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 13.9, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.8, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.48, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.47, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.21, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.49, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.08, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.13, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.36, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.23, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.18, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.21, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.49, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.21, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.49, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.5407439784686627e-34, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007949502905830741, "policy_loss": 0.0006166819366626441, "vf_loss": 7.434291839599609, "vf_explained_var": 0.6929993629455566, "kl": 0.00210048770532012, "entropy": 1.1303188800811768, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1420800, "num_env_steps_trained": 1420800, "num_agent_steps_sampled": 2841600, "num_agent_steps_trained": 2841600}, "sampler_results": {"episode_reward_max": 527.0, "episode_reward_min": 356.0, "episode_reward_mean": 495.42, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 279.0}, "policy_reward_mean": {"ppo": 247.71}, "custom_metrics": {"sparse_reward_mean": 171.4, "sparse_reward_min": 120, "sparse_reward_max": 180, "shaped_reward_mean": 152.62, "shaped_reward_min": 116, "shaped_reward_max": 167, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.24, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 14.63, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.61, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 13.9, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.8, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.48, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.47, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.21, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.49, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.08, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.13, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.36, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.23, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.18, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.21, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.49, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.21, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.49, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 522.0, 516.0, 522.0, 458.0, 468.0, 522.0, 513.0, 525.0, 404.0, 516.0, 476.0, 527.0, 464.0, 516.0, 376.0, 513.0, 522.0, 507.0, 468.0, 467.0, 516.0, 522.0, 513.0, 519.0, 516.0, 522.0, 519.0, 516.0, 356.0, 465.0, 513.0, 470.0, 519.0, 513.0, 470.0, 473.0, 513.0, 510.0, 519.0, 465.0, 465.0, 516.0, 510.0, 513.0, 522.0, 513.0, 522.0, 516.0, 516.0, 462.0, 473.0, 459.0, 519.0, 470.0, 465.0, 519.0, 513.0, 513.0, 458.0, 479.0, 513.0, 522.0, 456.0, 524.0, 473.0, 465.0, 518.0, 513.0, 516.0, 519.0, 465.0, 516.0, 464.0, 519.0, 468.0, 522.0, 522.0, 527.0, 510.0, 513.0, 522.0, 519.0, 516.0, 465.0, 513.0, 473.0, 525.0, 465.0, 462.0, 468.0, 453.0, 462.0, 465.0, 516.0, 519.0, 513.0, 419.0, 519.0, 527.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 265.0, 263.0, 259.0, 261.0, 255.0, 263.0, 259.0, 224.0, 234.0, 237.0, 231.0, 255.0, 267.0, 254.0, 259.0, 251.0, 274.0, 216.0, 188.0, 260.0, 256.0, 239.0, 237.0, 263.0, 264.0, 232.0, 232.0, 251.0, 265.0, 186.0, 190.0, 272.0, 241.0, 261.0, 261.0, 245.0, 262.0, 226.0, 242.0, 242.0, 225.0, 258.0, 258.0, 270.0, 252.0, 259.0, 254.0, 255.0, 264.0, 259.0, 257.0, 267.0, 255.0, 267.0, 252.0, 259.0, 257.0, 184.0, 172.0, 237.0, 228.0, 248.0, 265.0, 225.0, 245.0, 256.0, 263.0, 246.0, 267.0, 231.0, 239.0, 252.0, 221.0, 249.0, 264.0, 269.0, 241.0, 254.0, 265.0, 233.0, 232.0, 230.0, 235.0, 259.0, 257.0, 253.0, 257.0, 251.0, 262.0, 243.0, 279.0, 264.0, 249.0, 259.0, 263.0, 244.0, 272.0, 263.0, 253.0, 244.0, 218.0, 230.0, 243.0, 241.0, 218.0, 253.0, 266.0, 244.0, 226.0, 223.0, 242.0, 258.0, 261.0, 254.0, 259.0, 250.0, 263.0, 232.0, 226.0, 226.0, 253.0, 261.0, 252.0, 271.0, 251.0, 226.0, 230.0, 275.0, 249.0, 232.0, 241.0, 233.0, 232.0, 261.0, 257.0, 260.0, 253.0, 266.0, 250.0, 262.0, 257.0, 226.0, 239.0, 267.0, 249.0, 231.0, 233.0, 265.0, 254.0, 239.0, 229.0, 261.0, 261.0, 260.0, 262.0, 272.0, 255.0, 250.0, 260.0, 258.0, 255.0, 260.0, 262.0, 257.0, 262.0, 269.0, 247.0, 234.0, 231.0, 238.0, 275.0, 235.0, 238.0, 260.0, 265.0, 225.0, 240.0, 238.0, 224.0, 221.0, 247.0, 239.0, 214.0, 238.0, 224.0, 237.0, 228.0, 261.0, 255.0, 252.0, 267.0, 257.0, 256.0, 201.0, 218.0, 245.0, 274.0, 257.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851923964426513, "mean_inference_ms": 1.2076839793340488, "mean_action_processing_ms": 0.13263903306444721, "mean_env_wait_ms": 0.8480824894398985, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 527.0, "episode_reward_min": 356.0, "episode_reward_mean": 495.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 279.0}, "policy_reward_mean": {"ppo": 247.71}, "hist_stats": {"episode_reward": [522.0, 522.0, 516.0, 522.0, 458.0, 468.0, 522.0, 513.0, 525.0, 404.0, 516.0, 476.0, 527.0, 464.0, 516.0, 376.0, 513.0, 522.0, 507.0, 468.0, 467.0, 516.0, 522.0, 513.0, 519.0, 516.0, 522.0, 519.0, 516.0, 356.0, 465.0, 513.0, 470.0, 519.0, 513.0, 470.0, 473.0, 513.0, 510.0, 519.0, 465.0, 465.0, 516.0, 510.0, 513.0, 522.0, 513.0, 522.0, 516.0, 516.0, 462.0, 473.0, 459.0, 519.0, 470.0, 465.0, 519.0, 513.0, 513.0, 458.0, 479.0, 513.0, 522.0, 456.0, 524.0, 473.0, 465.0, 518.0, 513.0, 516.0, 519.0, 465.0, 516.0, 464.0, 519.0, 468.0, 522.0, 522.0, 527.0, 510.0, 513.0, 522.0, 519.0, 516.0, 465.0, 513.0, 473.0, 525.0, 465.0, 462.0, 468.0, 453.0, 462.0, 465.0, 516.0, 519.0, 513.0, 419.0, 519.0, 527.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 265.0, 263.0, 259.0, 261.0, 255.0, 263.0, 259.0, 224.0, 234.0, 237.0, 231.0, 255.0, 267.0, 254.0, 259.0, 251.0, 274.0, 216.0, 188.0, 260.0, 256.0, 239.0, 237.0, 263.0, 264.0, 232.0, 232.0, 251.0, 265.0, 186.0, 190.0, 272.0, 241.0, 261.0, 261.0, 245.0, 262.0, 226.0, 242.0, 242.0, 225.0, 258.0, 258.0, 270.0, 252.0, 259.0, 254.0, 255.0, 264.0, 259.0, 257.0, 267.0, 255.0, 267.0, 252.0, 259.0, 257.0, 184.0, 172.0, 237.0, 228.0, 248.0, 265.0, 225.0, 245.0, 256.0, 263.0, 246.0, 267.0, 231.0, 239.0, 252.0, 221.0, 249.0, 264.0, 269.0, 241.0, 254.0, 265.0, 233.0, 232.0, 230.0, 235.0, 259.0, 257.0, 253.0, 257.0, 251.0, 262.0, 243.0, 279.0, 264.0, 249.0, 259.0, 263.0, 244.0, 272.0, 263.0, 253.0, 244.0, 218.0, 230.0, 243.0, 241.0, 218.0, 253.0, 266.0, 244.0, 226.0, 223.0, 242.0, 258.0, 261.0, 254.0, 259.0, 250.0, 263.0, 232.0, 226.0, 226.0, 253.0, 261.0, 252.0, 271.0, 251.0, 226.0, 230.0, 275.0, 249.0, 232.0, 241.0, 233.0, 232.0, 261.0, 257.0, 260.0, 253.0, 266.0, 250.0, 262.0, 257.0, 226.0, 239.0, 267.0, 249.0, 231.0, 233.0, 265.0, 254.0, 239.0, 229.0, 261.0, 261.0, 260.0, 262.0, 272.0, 255.0, 250.0, 260.0, 258.0, 255.0, 260.0, 262.0, 257.0, 262.0, 269.0, 247.0, 234.0, 231.0, 238.0, 275.0, 235.0, 238.0, 260.0, 265.0, 225.0, 240.0, 238.0, 224.0, 221.0, 247.0, 239.0, 214.0, 238.0, 224.0, 237.0, 228.0, 261.0, 255.0, 252.0, 267.0, 257.0, 256.0, 201.0, 218.0, 245.0, 274.0, 257.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851923964426513, "mean_inference_ms": 1.2076839793340488, "mean_action_processing_ms": 0.13263903306444721, "mean_env_wait_ms": 0.8480824894398985, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2841600, "num_agent_steps_trained": 2841600, "num_env_steps_sampled": 1420800, "num_env_steps_trained": 1420800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1420800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2841600, "timers": {"training_iteration_time_ms": 3665.378, "learn_time_ms": 1099.576, "learn_throughput": 11640.853, "synch_weights_time_ms": 11.683}, "counters": {"num_env_steps_sampled": 1420800, "num_env_steps_trained": 1420800, "num_agent_steps_sampled": 2841600, "num_agent_steps_trained": 2841600}, "done": false, "episodes_total": 3552, "training_iteration": 111, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-10", "timestamp": 1666580830, "time_this_iter_s": 3.7287049293518066, "time_total_s": 417.87077808380127, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 417.87077808380127, "timesteps_since_restore": 0, "iterations_since_restore": 111, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.116666666666667, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 172.6, "sparse_reward_min": 140, "sparse_reward_max": 180, "shaped_reward_mean": 153.23, "shaped_reward_min": 133, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.16, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 14.68, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.51, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.01, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.65, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.72, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.46, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.18, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.59, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 4.8, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.1, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.18, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.12, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.18, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.59, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.18, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.59, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 7.703719892343314e-35, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005474128993228078, "policy_loss": 0.0003818509867414832, "vf_loss": 7.298244476318359, "vf_explained_var": 0.6948171854019165, "kl": 0.0017750629922375083, "entropy": 1.1285228729248047, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1433600, "num_env_steps_trained": 1433600, "num_agent_steps_sampled": 2867200, "num_agent_steps_trained": 2867200}, "sampler_results": {"episode_reward_max": 533.0, "episode_reward_min": 419.0, "episode_reward_mean": 498.43, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 279.0}, "policy_reward_mean": {"ppo": 249.215}, "custom_metrics": {"sparse_reward_mean": 172.6, "sparse_reward_min": 140, "sparse_reward_max": 180, "shaped_reward_mean": 153.23, "shaped_reward_min": 133, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.16, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 14.68, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.51, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.01, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.65, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.72, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.46, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.18, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.59, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 4.8, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.1, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.18, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.12, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.18, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.59, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.18, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.59, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [470.0, 519.0, 513.0, 470.0, 473.0, 513.0, 510.0, 519.0, 465.0, 465.0, 516.0, 510.0, 513.0, 522.0, 513.0, 522.0, 516.0, 516.0, 462.0, 473.0, 459.0, 519.0, 470.0, 465.0, 519.0, 513.0, 513.0, 458.0, 479.0, 513.0, 522.0, 456.0, 524.0, 473.0, 465.0, 518.0, 513.0, 516.0, 519.0, 465.0, 516.0, 464.0, 519.0, 468.0, 522.0, 522.0, 527.0, 510.0, 513.0, 522.0, 519.0, 516.0, 465.0, 513.0, 473.0, 525.0, 465.0, 462.0, 468.0, 453.0, 462.0, 465.0, 516.0, 519.0, 513.0, 419.0, 519.0, 527.0, 424.0, 516.0, 513.0, 516.0, 513.0, 519.0, 516.0, 462.0, 516.0, 516.0, 516.0, 524.0, 533.0, 482.0, 516.0, 516.0, 510.0, 519.0, 470.0, 462.0, 513.0, 481.0, 525.0, 510.0, 513.0, 516.0, 516.0, 462.0, 510.0, 513.0, 462.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [225.0, 245.0, 256.0, 263.0, 246.0, 267.0, 231.0, 239.0, 252.0, 221.0, 249.0, 264.0, 269.0, 241.0, 254.0, 265.0, 233.0, 232.0, 230.0, 235.0, 259.0, 257.0, 253.0, 257.0, 251.0, 262.0, 243.0, 279.0, 264.0, 249.0, 259.0, 263.0, 244.0, 272.0, 263.0, 253.0, 244.0, 218.0, 230.0, 243.0, 241.0, 218.0, 253.0, 266.0, 244.0, 226.0, 223.0, 242.0, 258.0, 261.0, 254.0, 259.0, 250.0, 263.0, 232.0, 226.0, 226.0, 253.0, 261.0, 252.0, 271.0, 251.0, 226.0, 230.0, 275.0, 249.0, 232.0, 241.0, 233.0, 232.0, 261.0, 257.0, 260.0, 253.0, 266.0, 250.0, 262.0, 257.0, 226.0, 239.0, 267.0, 249.0, 231.0, 233.0, 265.0, 254.0, 239.0, 229.0, 261.0, 261.0, 260.0, 262.0, 272.0, 255.0, 250.0, 260.0, 258.0, 255.0, 260.0, 262.0, 257.0, 262.0, 269.0, 247.0, 234.0, 231.0, 238.0, 275.0, 235.0, 238.0, 260.0, 265.0, 225.0, 240.0, 238.0, 224.0, 221.0, 247.0, 239.0, 214.0, 238.0, 224.0, 237.0, 228.0, 261.0, 255.0, 252.0, 267.0, 257.0, 256.0, 201.0, 218.0, 245.0, 274.0, 257.0, 270.0, 206.0, 218.0, 264.0, 252.0, 243.0, 270.0, 253.0, 263.0, 262.0, 251.0, 257.0, 262.0, 259.0, 257.0, 229.0, 233.0, 250.0, 266.0, 265.0, 251.0, 255.0, 261.0, 256.0, 268.0, 262.0, 271.0, 249.0, 233.0, 256.0, 260.0, 256.0, 260.0, 252.0, 258.0, 247.0, 272.0, 251.0, 219.0, 225.0, 237.0, 242.0, 271.0, 240.0, 241.0, 254.0, 271.0, 258.0, 252.0, 264.0, 249.0, 250.0, 266.0, 258.0, 258.0, 229.0, 233.0, 256.0, 254.0, 260.0, 253.0, 214.0, 248.0, 276.0, 246.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6852829337082552, "mean_inference_ms": 1.207667498626502, "mean_action_processing_ms": 0.1326380195196134, "mean_env_wait_ms": 0.8477856021099819, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 533.0, "episode_reward_min": 419.0, "episode_reward_mean": 498.43, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 279.0}, "policy_reward_mean": {"ppo": 249.215}, "hist_stats": {"episode_reward": [470.0, 519.0, 513.0, 470.0, 473.0, 513.0, 510.0, 519.0, 465.0, 465.0, 516.0, 510.0, 513.0, 522.0, 513.0, 522.0, 516.0, 516.0, 462.0, 473.0, 459.0, 519.0, 470.0, 465.0, 519.0, 513.0, 513.0, 458.0, 479.0, 513.0, 522.0, 456.0, 524.0, 473.0, 465.0, 518.0, 513.0, 516.0, 519.0, 465.0, 516.0, 464.0, 519.0, 468.0, 522.0, 522.0, 527.0, 510.0, 513.0, 522.0, 519.0, 516.0, 465.0, 513.0, 473.0, 525.0, 465.0, 462.0, 468.0, 453.0, 462.0, 465.0, 516.0, 519.0, 513.0, 419.0, 519.0, 527.0, 424.0, 516.0, 513.0, 516.0, 513.0, 519.0, 516.0, 462.0, 516.0, 516.0, 516.0, 524.0, 533.0, 482.0, 516.0, 516.0, 510.0, 519.0, 470.0, 462.0, 513.0, 481.0, 525.0, 510.0, 513.0, 516.0, 516.0, 462.0, 510.0, 513.0, 462.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [225.0, 245.0, 256.0, 263.0, 246.0, 267.0, 231.0, 239.0, 252.0, 221.0, 249.0, 264.0, 269.0, 241.0, 254.0, 265.0, 233.0, 232.0, 230.0, 235.0, 259.0, 257.0, 253.0, 257.0, 251.0, 262.0, 243.0, 279.0, 264.0, 249.0, 259.0, 263.0, 244.0, 272.0, 263.0, 253.0, 244.0, 218.0, 230.0, 243.0, 241.0, 218.0, 253.0, 266.0, 244.0, 226.0, 223.0, 242.0, 258.0, 261.0, 254.0, 259.0, 250.0, 263.0, 232.0, 226.0, 226.0, 253.0, 261.0, 252.0, 271.0, 251.0, 226.0, 230.0, 275.0, 249.0, 232.0, 241.0, 233.0, 232.0, 261.0, 257.0, 260.0, 253.0, 266.0, 250.0, 262.0, 257.0, 226.0, 239.0, 267.0, 249.0, 231.0, 233.0, 265.0, 254.0, 239.0, 229.0, 261.0, 261.0, 260.0, 262.0, 272.0, 255.0, 250.0, 260.0, 258.0, 255.0, 260.0, 262.0, 257.0, 262.0, 269.0, 247.0, 234.0, 231.0, 238.0, 275.0, 235.0, 238.0, 260.0, 265.0, 225.0, 240.0, 238.0, 224.0, 221.0, 247.0, 239.0, 214.0, 238.0, 224.0, 237.0, 228.0, 261.0, 255.0, 252.0, 267.0, 257.0, 256.0, 201.0, 218.0, 245.0, 274.0, 257.0, 270.0, 206.0, 218.0, 264.0, 252.0, 243.0, 270.0, 253.0, 263.0, 262.0, 251.0, 257.0, 262.0, 259.0, 257.0, 229.0, 233.0, 250.0, 266.0, 265.0, 251.0, 255.0, 261.0, 256.0, 268.0, 262.0, 271.0, 249.0, 233.0, 256.0, 260.0, 256.0, 260.0, 252.0, 258.0, 247.0, 272.0, 251.0, 219.0, 225.0, 237.0, 242.0, 271.0, 240.0, 241.0, 254.0, 271.0, 258.0, 252.0, 264.0, 249.0, 250.0, 266.0, 258.0, 258.0, 229.0, 233.0, 256.0, 254.0, 260.0, 253.0, 214.0, 248.0, 276.0, 246.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6852829337082552, "mean_inference_ms": 1.207667498626502, "mean_action_processing_ms": 0.1326380195196134, "mean_env_wait_ms": 0.8477856021099819, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2867200, "num_agent_steps_trained": 2867200, "num_env_steps_sampled": 1433600, "num_env_steps_trained": 1433600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1433600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2867200, "timers": {"training_iteration_time_ms": 3638.258, "learn_time_ms": 1106.563, "learn_throughput": 11567.347, "synch_weights_time_ms": 11.157}, "counters": {"num_env_steps_sampled": 1433600, "num_env_steps_trained": 1433600, "num_agent_steps_sampled": 2867200, "num_agent_steps_trained": 2867200}, "done": false, "episodes_total": 3584, "training_iteration": 112, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-14", "timestamp": 1666580834, "time_this_iter_s": 3.762916326522827, "time_total_s": 421.6336944103241, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 421.6336944103241, "timesteps_since_restore": 0, "iterations_since_restore": 112, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.799999999999997, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 173.4, "sparse_reward_min": 140, "sparse_reward_max": 180, "shaped_reward_mean": 153.91, "shaped_reward_min": 127, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.12, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 14.61, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.54, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 14.05, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.44, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.39, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.28, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 13.61, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 4.9, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.15, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.43, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.18, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.26, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.17, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.28, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 13.61, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.28, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 13.61, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.851859946171657e-35, "cur_lr": 0.0010000000474974513, "total_loss": 7.296015974134207e-05, "policy_loss": -0.00010429794201627374, "vf_loss": 7.440869331359863, "vf_explained_var": 0.6927081346511841, "kl": 0.0020735471043735743, "entropy": 1.1336543560028076, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1446400, "num_env_steps_trained": 1446400, "num_agent_steps_sampled": 2892800, "num_agent_steps_trained": 2892800}, "sampler_results": {"episode_reward_max": 533.0, "episode_reward_min": 419.0, "episode_reward_mean": 500.71, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 276.0}, "policy_reward_mean": {"ppo": 250.355}, "custom_metrics": {"sparse_reward_mean": 173.4, "sparse_reward_min": 140, "sparse_reward_max": 180, "shaped_reward_mean": 153.91, "shaped_reward_min": 127, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.12, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 14.61, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.54, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 14.05, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.44, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.39, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.28, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 13.61, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 4.9, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.15, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.43, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.18, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.26, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.17, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.28, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 13.61, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.28, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 13.61, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [524.0, 473.0, 465.0, 518.0, 513.0, 516.0, 519.0, 465.0, 516.0, 464.0, 519.0, 468.0, 522.0, 522.0, 527.0, 510.0, 513.0, 522.0, 519.0, 516.0, 465.0, 513.0, 473.0, 525.0, 465.0, 462.0, 468.0, 453.0, 462.0, 465.0, 516.0, 519.0, 513.0, 419.0, 519.0, 527.0, 424.0, 516.0, 513.0, 516.0, 513.0, 519.0, 516.0, 462.0, 516.0, 516.0, 516.0, 524.0, 533.0, 482.0, 516.0, 516.0, 510.0, 519.0, 470.0, 462.0, 513.0, 481.0, 525.0, 510.0, 513.0, 516.0, 516.0, 462.0, 510.0, 513.0, 462.0, 522.0, 519.0, 519.0, 462.0, 459.0, 524.0, 519.0, 447.0, 513.0, 473.0, 522.0, 516.0, 516.0, 530.0, 525.0, 473.0, 519.0, 513.0, 462.0, 510.0, 522.0, 530.0, 516.0, 516.0, 519.0, 522.0, 468.0, 470.0, 513.0, 455.0, 519.0, 510.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [275.0, 249.0, 232.0, 241.0, 233.0, 232.0, 261.0, 257.0, 260.0, 253.0, 266.0, 250.0, 262.0, 257.0, 226.0, 239.0, 267.0, 249.0, 231.0, 233.0, 265.0, 254.0, 239.0, 229.0, 261.0, 261.0, 260.0, 262.0, 272.0, 255.0, 250.0, 260.0, 258.0, 255.0, 260.0, 262.0, 257.0, 262.0, 269.0, 247.0, 234.0, 231.0, 238.0, 275.0, 235.0, 238.0, 260.0, 265.0, 225.0, 240.0, 238.0, 224.0, 221.0, 247.0, 239.0, 214.0, 238.0, 224.0, 237.0, 228.0, 261.0, 255.0, 252.0, 267.0, 257.0, 256.0, 201.0, 218.0, 245.0, 274.0, 257.0, 270.0, 206.0, 218.0, 264.0, 252.0, 243.0, 270.0, 253.0, 263.0, 262.0, 251.0, 257.0, 262.0, 259.0, 257.0, 229.0, 233.0, 250.0, 266.0, 265.0, 251.0, 255.0, 261.0, 256.0, 268.0, 262.0, 271.0, 249.0, 233.0, 256.0, 260.0, 256.0, 260.0, 252.0, 258.0, 247.0, 272.0, 251.0, 219.0, 225.0, 237.0, 242.0, 271.0, 240.0, 241.0, 254.0, 271.0, 258.0, 252.0, 264.0, 249.0, 250.0, 266.0, 258.0, 258.0, 229.0, 233.0, 256.0, 254.0, 260.0, 253.0, 214.0, 248.0, 276.0, 246.0, 248.0, 271.0, 253.0, 266.0, 238.0, 224.0, 228.0, 231.0, 255.0, 269.0, 245.0, 274.0, 236.0, 211.0, 252.0, 261.0, 229.0, 244.0, 258.0, 264.0, 252.0, 264.0, 262.0, 254.0, 268.0, 262.0, 262.0, 263.0, 242.0, 231.0, 262.0, 257.0, 260.0, 253.0, 227.0, 235.0, 253.0, 257.0, 254.0, 268.0, 268.0, 262.0, 271.0, 245.0, 256.0, 260.0, 267.0, 252.0, 269.0, 253.0, 245.0, 223.0, 254.0, 216.0, 253.0, 260.0, 230.0, 225.0, 262.0, 257.0, 257.0, 253.0, 243.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6853944062468555, "mean_inference_ms": 1.2076545245921264, "mean_action_processing_ms": 0.1326425287956505, "mean_env_wait_ms": 0.8475362224447032, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 533.0, "episode_reward_min": 419.0, "episode_reward_mean": 500.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 276.0}, "policy_reward_mean": {"ppo": 250.355}, "hist_stats": {"episode_reward": [524.0, 473.0, 465.0, 518.0, 513.0, 516.0, 519.0, 465.0, 516.0, 464.0, 519.0, 468.0, 522.0, 522.0, 527.0, 510.0, 513.0, 522.0, 519.0, 516.0, 465.0, 513.0, 473.0, 525.0, 465.0, 462.0, 468.0, 453.0, 462.0, 465.0, 516.0, 519.0, 513.0, 419.0, 519.0, 527.0, 424.0, 516.0, 513.0, 516.0, 513.0, 519.0, 516.0, 462.0, 516.0, 516.0, 516.0, 524.0, 533.0, 482.0, 516.0, 516.0, 510.0, 519.0, 470.0, 462.0, 513.0, 481.0, 525.0, 510.0, 513.0, 516.0, 516.0, 462.0, 510.0, 513.0, 462.0, 522.0, 519.0, 519.0, 462.0, 459.0, 524.0, 519.0, 447.0, 513.0, 473.0, 522.0, 516.0, 516.0, 530.0, 525.0, 473.0, 519.0, 513.0, 462.0, 510.0, 522.0, 530.0, 516.0, 516.0, 519.0, 522.0, 468.0, 470.0, 513.0, 455.0, 519.0, 510.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [275.0, 249.0, 232.0, 241.0, 233.0, 232.0, 261.0, 257.0, 260.0, 253.0, 266.0, 250.0, 262.0, 257.0, 226.0, 239.0, 267.0, 249.0, 231.0, 233.0, 265.0, 254.0, 239.0, 229.0, 261.0, 261.0, 260.0, 262.0, 272.0, 255.0, 250.0, 260.0, 258.0, 255.0, 260.0, 262.0, 257.0, 262.0, 269.0, 247.0, 234.0, 231.0, 238.0, 275.0, 235.0, 238.0, 260.0, 265.0, 225.0, 240.0, 238.0, 224.0, 221.0, 247.0, 239.0, 214.0, 238.0, 224.0, 237.0, 228.0, 261.0, 255.0, 252.0, 267.0, 257.0, 256.0, 201.0, 218.0, 245.0, 274.0, 257.0, 270.0, 206.0, 218.0, 264.0, 252.0, 243.0, 270.0, 253.0, 263.0, 262.0, 251.0, 257.0, 262.0, 259.0, 257.0, 229.0, 233.0, 250.0, 266.0, 265.0, 251.0, 255.0, 261.0, 256.0, 268.0, 262.0, 271.0, 249.0, 233.0, 256.0, 260.0, 256.0, 260.0, 252.0, 258.0, 247.0, 272.0, 251.0, 219.0, 225.0, 237.0, 242.0, 271.0, 240.0, 241.0, 254.0, 271.0, 258.0, 252.0, 264.0, 249.0, 250.0, 266.0, 258.0, 258.0, 229.0, 233.0, 256.0, 254.0, 260.0, 253.0, 214.0, 248.0, 276.0, 246.0, 248.0, 271.0, 253.0, 266.0, 238.0, 224.0, 228.0, 231.0, 255.0, 269.0, 245.0, 274.0, 236.0, 211.0, 252.0, 261.0, 229.0, 244.0, 258.0, 264.0, 252.0, 264.0, 262.0, 254.0, 268.0, 262.0, 262.0, 263.0, 242.0, 231.0, 262.0, 257.0, 260.0, 253.0, 227.0, 235.0, 253.0, 257.0, 254.0, 268.0, 268.0, 262.0, 271.0, 245.0, 256.0, 260.0, 267.0, 252.0, 269.0, 253.0, 245.0, 223.0, 254.0, 216.0, 253.0, 260.0, 230.0, 225.0, 262.0, 257.0, 257.0, 253.0, 243.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6853944062468555, "mean_inference_ms": 1.2076545245921264, "mean_action_processing_ms": 0.1326425287956505, "mean_env_wait_ms": 0.8475362224447032, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2892800, "num_agent_steps_trained": 2892800, "num_env_steps_sampled": 1446400, "num_env_steps_trained": 1446400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1446400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2892800, "timers": {"training_iteration_time_ms": 3635.054, "learn_time_ms": 1110.849, "learn_throughput": 11522.717, "synch_weights_time_ms": 12.34}, "counters": {"num_env_steps_sampled": 1446400, "num_env_steps_trained": 1446400, "num_agent_steps_sampled": 2892800, "num_agent_steps_trained": 2892800}, "done": false, "episodes_total": 3616, "training_iteration": 113, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-18", "timestamp": 1666580838, "time_this_iter_s": 3.7796876430511475, "time_total_s": 425.41338205337524, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 425.41338205337524, "timesteps_since_restore": 0, "iterations_since_restore": 113, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.933333333333334, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 170.2, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 152.01, "shaped_reward_min": 45, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.38, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 14.76, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 13.93, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 14.28, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.48, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.38, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.65, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 13.86, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.06, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.04, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.08, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.33, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.35, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.21, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.34, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.28, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.44, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.65, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 13.86, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.65, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 13.86, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.9259299730858284e-35, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005465570138767362, "policy_loss": 0.0003737257793545723, "vf_loss": 7.462190628051758, "vf_explained_var": 0.7065198421478271, "kl": 0.0016828658990561962, "entropy": 1.1467739343643188, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1459200, "num_env_steps_trained": 1459200, "num_agent_steps_sampled": 2918400, "num_agent_steps_trained": 2918400}, "sampler_results": {"episode_reward_max": 533.0, "episode_reward_min": 165.0, "episode_reward_mean": 492.41, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 78.0}, "policy_reward_max": {"ppo": 276.0}, "policy_reward_mean": {"ppo": 246.205}, "custom_metrics": {"sparse_reward_mean": 170.2, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 152.01, "shaped_reward_min": 45, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.38, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 14.76, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 13.93, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 14.28, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.48, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.38, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.65, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 13.86, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.06, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.04, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.08, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.33, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.35, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.21, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.34, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.28, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.44, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.65, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 13.86, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.65, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 13.86, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [513.0, 419.0, 519.0, 527.0, 424.0, 516.0, 513.0, 516.0, 513.0, 519.0, 516.0, 462.0, 516.0, 516.0, 516.0, 524.0, 533.0, 482.0, 516.0, 516.0, 510.0, 519.0, 470.0, 462.0, 513.0, 481.0, 525.0, 510.0, 513.0, 516.0, 516.0, 462.0, 510.0, 513.0, 462.0, 522.0, 519.0, 519.0, 462.0, 459.0, 524.0, 519.0, 447.0, 513.0, 473.0, 522.0, 516.0, 516.0, 530.0, 525.0, 473.0, 519.0, 513.0, 462.0, 510.0, 522.0, 530.0, 516.0, 516.0, 519.0, 522.0, 468.0, 470.0, 513.0, 455.0, 519.0, 510.0, 513.0, 465.0, 519.0, 473.0, 513.0, 473.0, 467.0, 370.0, 510.0, 516.0, 467.0, 513.0, 165.0, 516.0, 479.0, 516.0, 378.0, 522.0, 473.0, 522.0, 473.0, 513.0, 516.0, 425.0, 468.0, 479.0, 513.0, 455.0, 462.0, 453.0, 516.0, 467.0, 470.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 256.0, 201.0, 218.0, 245.0, 274.0, 257.0, 270.0, 206.0, 218.0, 264.0, 252.0, 243.0, 270.0, 253.0, 263.0, 262.0, 251.0, 257.0, 262.0, 259.0, 257.0, 229.0, 233.0, 250.0, 266.0, 265.0, 251.0, 255.0, 261.0, 256.0, 268.0, 262.0, 271.0, 249.0, 233.0, 256.0, 260.0, 256.0, 260.0, 252.0, 258.0, 247.0, 272.0, 251.0, 219.0, 225.0, 237.0, 242.0, 271.0, 240.0, 241.0, 254.0, 271.0, 258.0, 252.0, 264.0, 249.0, 250.0, 266.0, 258.0, 258.0, 229.0, 233.0, 256.0, 254.0, 260.0, 253.0, 214.0, 248.0, 276.0, 246.0, 248.0, 271.0, 253.0, 266.0, 238.0, 224.0, 228.0, 231.0, 255.0, 269.0, 245.0, 274.0, 236.0, 211.0, 252.0, 261.0, 229.0, 244.0, 258.0, 264.0, 252.0, 264.0, 262.0, 254.0, 268.0, 262.0, 262.0, 263.0, 242.0, 231.0, 262.0, 257.0, 260.0, 253.0, 227.0, 235.0, 253.0, 257.0, 254.0, 268.0, 268.0, 262.0, 271.0, 245.0, 256.0, 260.0, 267.0, 252.0, 269.0, 253.0, 245.0, 223.0, 254.0, 216.0, 253.0, 260.0, 230.0, 225.0, 262.0, 257.0, 257.0, 253.0, 243.0, 270.0, 223.0, 242.0, 252.0, 267.0, 230.0, 243.0, 266.0, 247.0, 241.0, 232.0, 228.0, 239.0, 170.0, 200.0, 246.0, 264.0, 257.0, 259.0, 233.0, 234.0, 254.0, 259.0, 87.0, 78.0, 253.0, 263.0, 239.0, 240.0, 253.0, 263.0, 194.0, 184.0, 255.0, 267.0, 234.0, 239.0, 257.0, 265.0, 218.0, 255.0, 262.0, 251.0, 275.0, 241.0, 219.0, 206.0, 235.0, 233.0, 242.0, 237.0, 267.0, 246.0, 228.0, 227.0, 226.0, 236.0, 227.0, 226.0, 262.0, 254.0, 228.0, 239.0, 231.0, 239.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6854311581097831, "mean_inference_ms": 1.2076565796296106, "mean_action_processing_ms": 0.13265011807136642, "mean_env_wait_ms": 0.8473229051130425, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 533.0, "episode_reward_min": 165.0, "episode_reward_mean": 492.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 78.0}, "policy_reward_max": {"ppo": 276.0}, "policy_reward_mean": {"ppo": 246.205}, "hist_stats": {"episode_reward": [513.0, 419.0, 519.0, 527.0, 424.0, 516.0, 513.0, 516.0, 513.0, 519.0, 516.0, 462.0, 516.0, 516.0, 516.0, 524.0, 533.0, 482.0, 516.0, 516.0, 510.0, 519.0, 470.0, 462.0, 513.0, 481.0, 525.0, 510.0, 513.0, 516.0, 516.0, 462.0, 510.0, 513.0, 462.0, 522.0, 519.0, 519.0, 462.0, 459.0, 524.0, 519.0, 447.0, 513.0, 473.0, 522.0, 516.0, 516.0, 530.0, 525.0, 473.0, 519.0, 513.0, 462.0, 510.0, 522.0, 530.0, 516.0, 516.0, 519.0, 522.0, 468.0, 470.0, 513.0, 455.0, 519.0, 510.0, 513.0, 465.0, 519.0, 473.0, 513.0, 473.0, 467.0, 370.0, 510.0, 516.0, 467.0, 513.0, 165.0, 516.0, 479.0, 516.0, 378.0, 522.0, 473.0, 522.0, 473.0, 513.0, 516.0, 425.0, 468.0, 479.0, 513.0, 455.0, 462.0, 453.0, 516.0, 467.0, 470.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 256.0, 201.0, 218.0, 245.0, 274.0, 257.0, 270.0, 206.0, 218.0, 264.0, 252.0, 243.0, 270.0, 253.0, 263.0, 262.0, 251.0, 257.0, 262.0, 259.0, 257.0, 229.0, 233.0, 250.0, 266.0, 265.0, 251.0, 255.0, 261.0, 256.0, 268.0, 262.0, 271.0, 249.0, 233.0, 256.0, 260.0, 256.0, 260.0, 252.0, 258.0, 247.0, 272.0, 251.0, 219.0, 225.0, 237.0, 242.0, 271.0, 240.0, 241.0, 254.0, 271.0, 258.0, 252.0, 264.0, 249.0, 250.0, 266.0, 258.0, 258.0, 229.0, 233.0, 256.0, 254.0, 260.0, 253.0, 214.0, 248.0, 276.0, 246.0, 248.0, 271.0, 253.0, 266.0, 238.0, 224.0, 228.0, 231.0, 255.0, 269.0, 245.0, 274.0, 236.0, 211.0, 252.0, 261.0, 229.0, 244.0, 258.0, 264.0, 252.0, 264.0, 262.0, 254.0, 268.0, 262.0, 262.0, 263.0, 242.0, 231.0, 262.0, 257.0, 260.0, 253.0, 227.0, 235.0, 253.0, 257.0, 254.0, 268.0, 268.0, 262.0, 271.0, 245.0, 256.0, 260.0, 267.0, 252.0, 269.0, 253.0, 245.0, 223.0, 254.0, 216.0, 253.0, 260.0, 230.0, 225.0, 262.0, 257.0, 257.0, 253.0, 243.0, 270.0, 223.0, 242.0, 252.0, 267.0, 230.0, 243.0, 266.0, 247.0, 241.0, 232.0, 228.0, 239.0, 170.0, 200.0, 246.0, 264.0, 257.0, 259.0, 233.0, 234.0, 254.0, 259.0, 87.0, 78.0, 253.0, 263.0, 239.0, 240.0, 253.0, 263.0, 194.0, 184.0, 255.0, 267.0, 234.0, 239.0, 257.0, 265.0, 218.0, 255.0, 262.0, 251.0, 275.0, 241.0, 219.0, 206.0, 235.0, 233.0, 242.0, 237.0, 267.0, 246.0, 228.0, 227.0, 226.0, 236.0, 227.0, 226.0, 262.0, 254.0, 228.0, 239.0, 231.0, 239.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6854311581097831, "mean_inference_ms": 1.2076565796296106, "mean_action_processing_ms": 0.13265011807136642, "mean_env_wait_ms": 0.8473229051130425, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2918400, "num_agent_steps_trained": 2918400, "num_env_steps_sampled": 1459200, "num_env_steps_trained": 1459200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1459200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2918400, "timers": {"training_iteration_time_ms": 3635.297, "learn_time_ms": 1114.383, "learn_throughput": 11486.175, "synch_weights_time_ms": 12.746}, "counters": {"num_env_steps_sampled": 1459200, "num_env_steps_trained": 1459200, "num_agent_steps_sampled": 2918400, "num_agent_steps_trained": 2918400}, "done": false, "episodes_total": 3648, "training_iteration": 114, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-22", "timestamp": 1666580842, "time_this_iter_s": 3.6836071014404297, "time_total_s": 429.0969891548157, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 429.0969891548157, "timesteps_since_restore": 0, "iterations_since_restore": 114, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.04, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 169.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 151.24, "shaped_reward_min": 45, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.64, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 14.44, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.19, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 13.98, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.92, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 13.53, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.94, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.08, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.98, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.35, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.18, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.22, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.92, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 13.53, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.92, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 13.53, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 9.629649865429142e-36, "cur_lr": 0.0010000000474974513, "total_loss": -0.0018681013025343418, "policy_loss": -0.002045394852757454, "vf_loss": 7.456493377685547, "vf_explained_var": 0.6910403966903687, "kl": 0.0016753775998950005, "entropy": 1.1367100477218628, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1472000, "num_env_steps_trained": 1472000, "num_agent_steps_sampled": 2944000, "num_agent_steps_trained": 2944000}, "sampler_results": {"episode_reward_max": 530.0, "episode_reward_min": 165.0, "episode_reward_mean": 490.44, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 78.0}, "policy_reward_max": {"ppo": 276.0}, "policy_reward_mean": {"ppo": 245.22}, "custom_metrics": {"sparse_reward_mean": 169.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 151.24, "shaped_reward_min": 45, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.64, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 14.44, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.19, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 13.98, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.92, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 13.53, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.94, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.08, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.98, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.35, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.18, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.22, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.92, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 13.53, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.92, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 13.53, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [510.0, 513.0, 462.0, 522.0, 519.0, 519.0, 462.0, 459.0, 524.0, 519.0, 447.0, 513.0, 473.0, 522.0, 516.0, 516.0, 530.0, 525.0, 473.0, 519.0, 513.0, 462.0, 510.0, 522.0, 530.0, 516.0, 516.0, 519.0, 522.0, 468.0, 470.0, 513.0, 455.0, 519.0, 510.0, 513.0, 465.0, 519.0, 473.0, 513.0, 473.0, 467.0, 370.0, 510.0, 516.0, 467.0, 513.0, 165.0, 516.0, 479.0, 516.0, 378.0, 522.0, 473.0, 522.0, 473.0, 513.0, 516.0, 425.0, 468.0, 479.0, 513.0, 455.0, 462.0, 453.0, 516.0, 467.0, 470.0, 516.0, 473.0, 516.0, 465.0, 522.0, 510.0, 522.0, 516.0, 522.0, 519.0, 465.0, 510.0, 519.0, 410.0, 519.0, 522.0, 513.0, 453.0, 465.0, 513.0, 513.0, 519.0, 525.0, 519.0, 482.0, 516.0, 519.0, 464.0, 467.0, 458.0, 462.0, 462.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [256.0, 254.0, 260.0, 253.0, 214.0, 248.0, 276.0, 246.0, 248.0, 271.0, 253.0, 266.0, 238.0, 224.0, 228.0, 231.0, 255.0, 269.0, 245.0, 274.0, 236.0, 211.0, 252.0, 261.0, 229.0, 244.0, 258.0, 264.0, 252.0, 264.0, 262.0, 254.0, 268.0, 262.0, 262.0, 263.0, 242.0, 231.0, 262.0, 257.0, 260.0, 253.0, 227.0, 235.0, 253.0, 257.0, 254.0, 268.0, 268.0, 262.0, 271.0, 245.0, 256.0, 260.0, 267.0, 252.0, 269.0, 253.0, 245.0, 223.0, 254.0, 216.0, 253.0, 260.0, 230.0, 225.0, 262.0, 257.0, 257.0, 253.0, 243.0, 270.0, 223.0, 242.0, 252.0, 267.0, 230.0, 243.0, 266.0, 247.0, 241.0, 232.0, 228.0, 239.0, 170.0, 200.0, 246.0, 264.0, 257.0, 259.0, 233.0, 234.0, 254.0, 259.0, 87.0, 78.0, 253.0, 263.0, 239.0, 240.0, 253.0, 263.0, 194.0, 184.0, 255.0, 267.0, 234.0, 239.0, 257.0, 265.0, 218.0, 255.0, 262.0, 251.0, 275.0, 241.0, 219.0, 206.0, 235.0, 233.0, 242.0, 237.0, 267.0, 246.0, 228.0, 227.0, 226.0, 236.0, 227.0, 226.0, 262.0, 254.0, 228.0, 239.0, 231.0, 239.0, 259.0, 257.0, 242.0, 231.0, 265.0, 251.0, 231.0, 234.0, 260.0, 262.0, 250.0, 260.0, 260.0, 262.0, 267.0, 249.0, 274.0, 248.0, 260.0, 259.0, 236.0, 229.0, 250.0, 260.0, 254.0, 265.0, 203.0, 207.0, 259.0, 260.0, 265.0, 257.0, 261.0, 252.0, 220.0, 233.0, 221.0, 244.0, 246.0, 267.0, 245.0, 268.0, 267.0, 252.0, 267.0, 258.0, 252.0, 267.0, 241.0, 241.0, 246.0, 270.0, 253.0, 266.0, 228.0, 236.0, 225.0, 242.0, 218.0, 240.0, 227.0, 235.0, 231.0, 231.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6854759185382301, "mean_inference_ms": 1.2076344021717942, "mean_action_processing_ms": 0.13265548187961484, "mean_env_wait_ms": 0.8470913220135837, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 530.0, "episode_reward_min": 165.0, "episode_reward_mean": 490.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 78.0}, "policy_reward_max": {"ppo": 276.0}, "policy_reward_mean": {"ppo": 245.22}, "hist_stats": {"episode_reward": [510.0, 513.0, 462.0, 522.0, 519.0, 519.0, 462.0, 459.0, 524.0, 519.0, 447.0, 513.0, 473.0, 522.0, 516.0, 516.0, 530.0, 525.0, 473.0, 519.0, 513.0, 462.0, 510.0, 522.0, 530.0, 516.0, 516.0, 519.0, 522.0, 468.0, 470.0, 513.0, 455.0, 519.0, 510.0, 513.0, 465.0, 519.0, 473.0, 513.0, 473.0, 467.0, 370.0, 510.0, 516.0, 467.0, 513.0, 165.0, 516.0, 479.0, 516.0, 378.0, 522.0, 473.0, 522.0, 473.0, 513.0, 516.0, 425.0, 468.0, 479.0, 513.0, 455.0, 462.0, 453.0, 516.0, 467.0, 470.0, 516.0, 473.0, 516.0, 465.0, 522.0, 510.0, 522.0, 516.0, 522.0, 519.0, 465.0, 510.0, 519.0, 410.0, 519.0, 522.0, 513.0, 453.0, 465.0, 513.0, 513.0, 519.0, 525.0, 519.0, 482.0, 516.0, 519.0, 464.0, 467.0, 458.0, 462.0, 462.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [256.0, 254.0, 260.0, 253.0, 214.0, 248.0, 276.0, 246.0, 248.0, 271.0, 253.0, 266.0, 238.0, 224.0, 228.0, 231.0, 255.0, 269.0, 245.0, 274.0, 236.0, 211.0, 252.0, 261.0, 229.0, 244.0, 258.0, 264.0, 252.0, 264.0, 262.0, 254.0, 268.0, 262.0, 262.0, 263.0, 242.0, 231.0, 262.0, 257.0, 260.0, 253.0, 227.0, 235.0, 253.0, 257.0, 254.0, 268.0, 268.0, 262.0, 271.0, 245.0, 256.0, 260.0, 267.0, 252.0, 269.0, 253.0, 245.0, 223.0, 254.0, 216.0, 253.0, 260.0, 230.0, 225.0, 262.0, 257.0, 257.0, 253.0, 243.0, 270.0, 223.0, 242.0, 252.0, 267.0, 230.0, 243.0, 266.0, 247.0, 241.0, 232.0, 228.0, 239.0, 170.0, 200.0, 246.0, 264.0, 257.0, 259.0, 233.0, 234.0, 254.0, 259.0, 87.0, 78.0, 253.0, 263.0, 239.0, 240.0, 253.0, 263.0, 194.0, 184.0, 255.0, 267.0, 234.0, 239.0, 257.0, 265.0, 218.0, 255.0, 262.0, 251.0, 275.0, 241.0, 219.0, 206.0, 235.0, 233.0, 242.0, 237.0, 267.0, 246.0, 228.0, 227.0, 226.0, 236.0, 227.0, 226.0, 262.0, 254.0, 228.0, 239.0, 231.0, 239.0, 259.0, 257.0, 242.0, 231.0, 265.0, 251.0, 231.0, 234.0, 260.0, 262.0, 250.0, 260.0, 260.0, 262.0, 267.0, 249.0, 274.0, 248.0, 260.0, 259.0, 236.0, 229.0, 250.0, 260.0, 254.0, 265.0, 203.0, 207.0, 259.0, 260.0, 265.0, 257.0, 261.0, 252.0, 220.0, 233.0, 221.0, 244.0, 246.0, 267.0, 245.0, 268.0, 267.0, 252.0, 267.0, 258.0, 252.0, 267.0, 241.0, 241.0, 246.0, 270.0, 253.0, 266.0, 228.0, 236.0, 225.0, 242.0, 218.0, 240.0, 227.0, 235.0, 231.0, 231.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6854759185382301, "mean_inference_ms": 1.2076344021717942, "mean_action_processing_ms": 0.13265548187961484, "mean_env_wait_ms": 0.8470913220135837, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2944000, "num_agent_steps_trained": 2944000, "num_env_steps_sampled": 1472000, "num_env_steps_trained": 1472000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1472000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2944000, "timers": {"training_iteration_time_ms": 3637.242, "learn_time_ms": 1116.26, "learn_throughput": 11466.867, "synch_weights_time_ms": 12.813}, "counters": {"num_env_steps_sampled": 1472000, "num_env_steps_trained": 1472000, "num_agent_steps_sampled": 2944000, "num_agent_steps_trained": 2944000}, "done": false, "episodes_total": 3680, "training_iteration": 115, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-25", "timestamp": 1666580845, "time_this_iter_s": 3.708575487136841, "time_total_s": 432.8055646419525, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 432.8055646419525, "timesteps_since_restore": 0, "iterations_since_restore": 115, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.849999999999998, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 167.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 149.23, "shaped_reward_min": 45, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.56, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 14.35, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.08, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 13.87, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.38, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.76, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.46, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.88, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.21, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.18, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.2, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.51, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.14, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.4, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.76, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.46, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.76, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.46, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 4.814824932714571e-36, "cur_lr": 0.0010000000474974513, "total_loss": -0.003027984406799078, "policy_loss": -0.0032042870298027992, "vf_loss": 7.424014091491699, "vf_explained_var": 0.6924208402633667, "kl": 0.0019326722249388695, "entropy": 1.1321947574615479, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1484800, "num_env_steps_trained": 1484800, "num_agent_steps_sampled": 2969600, "num_agent_steps_trained": 2969600}, "sampler_results": {"episode_reward_max": 525.0, "episode_reward_min": 165.0, "episode_reward_mean": 484.43, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 78.0}, "policy_reward_max": {"ppo": 276.0}, "policy_reward_mean": {"ppo": 242.215}, "custom_metrics": {"sparse_reward_mean": 167.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 149.23, "shaped_reward_min": 45, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.56, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 14.35, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.08, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 13.87, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.38, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.76, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.46, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.88, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.21, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.18, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.2, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.51, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.14, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.4, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.76, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.46, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.76, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.46, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [455.0, 519.0, 510.0, 513.0, 465.0, 519.0, 473.0, 513.0, 473.0, 467.0, 370.0, 510.0, 516.0, 467.0, 513.0, 165.0, 516.0, 479.0, 516.0, 378.0, 522.0, 473.0, 522.0, 473.0, 513.0, 516.0, 425.0, 468.0, 479.0, 513.0, 455.0, 462.0, 453.0, 516.0, 467.0, 470.0, 516.0, 473.0, 516.0, 465.0, 522.0, 510.0, 522.0, 516.0, 522.0, 519.0, 465.0, 510.0, 519.0, 410.0, 519.0, 522.0, 513.0, 453.0, 465.0, 513.0, 513.0, 519.0, 525.0, 519.0, 482.0, 516.0, 519.0, 464.0, 467.0, 458.0, 462.0, 462.0, 516.0, 416.0, 462.0, 465.0, 456.0, 516.0, 519.0, 399.0, 470.0, 519.0, 411.0, 459.0, 516.0, 470.0, 444.0, 465.0, 521.0, 456.0, 456.0, 519.0, 507.0, 522.0, 516.0, 519.0, 525.0, 513.0, 519.0, 525.0, 462.0, 519.0, 468.0, 453.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [230.0, 225.0, 262.0, 257.0, 257.0, 253.0, 243.0, 270.0, 223.0, 242.0, 252.0, 267.0, 230.0, 243.0, 266.0, 247.0, 241.0, 232.0, 228.0, 239.0, 170.0, 200.0, 246.0, 264.0, 257.0, 259.0, 233.0, 234.0, 254.0, 259.0, 87.0, 78.0, 253.0, 263.0, 239.0, 240.0, 253.0, 263.0, 194.0, 184.0, 255.0, 267.0, 234.0, 239.0, 257.0, 265.0, 218.0, 255.0, 262.0, 251.0, 275.0, 241.0, 219.0, 206.0, 235.0, 233.0, 242.0, 237.0, 267.0, 246.0, 228.0, 227.0, 226.0, 236.0, 227.0, 226.0, 262.0, 254.0, 228.0, 239.0, 231.0, 239.0, 259.0, 257.0, 242.0, 231.0, 265.0, 251.0, 231.0, 234.0, 260.0, 262.0, 250.0, 260.0, 260.0, 262.0, 267.0, 249.0, 274.0, 248.0, 260.0, 259.0, 236.0, 229.0, 250.0, 260.0, 254.0, 265.0, 203.0, 207.0, 259.0, 260.0, 265.0, 257.0, 261.0, 252.0, 220.0, 233.0, 221.0, 244.0, 246.0, 267.0, 245.0, 268.0, 267.0, 252.0, 267.0, 258.0, 252.0, 267.0, 241.0, 241.0, 246.0, 270.0, 253.0, 266.0, 228.0, 236.0, 225.0, 242.0, 218.0, 240.0, 227.0, 235.0, 231.0, 231.0, 260.0, 256.0, 225.0, 191.0, 227.0, 235.0, 220.0, 245.0, 238.0, 218.0, 267.0, 249.0, 253.0, 266.0, 196.0, 203.0, 247.0, 223.0, 267.0, 252.0, 211.0, 200.0, 220.0, 239.0, 256.0, 260.0, 235.0, 235.0, 208.0, 236.0, 234.0, 231.0, 276.0, 245.0, 223.0, 233.0, 243.0, 213.0, 255.0, 264.0, 257.0, 250.0, 255.0, 267.0, 245.0, 271.0, 253.0, 266.0, 250.0, 275.0, 255.0, 258.0, 272.0, 247.0, 258.0, 267.0, 228.0, 234.0, 269.0, 250.0, 240.0, 228.0, 237.0, 216.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6854368390251488, "mean_inference_ms": 1.2076880422103875, "mean_action_processing_ms": 0.13265155972540793, "mean_env_wait_ms": 0.8467858667409799, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 525.0, "episode_reward_min": 165.0, "episode_reward_mean": 484.43, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 78.0}, "policy_reward_max": {"ppo": 276.0}, "policy_reward_mean": {"ppo": 242.215}, "hist_stats": {"episode_reward": [455.0, 519.0, 510.0, 513.0, 465.0, 519.0, 473.0, 513.0, 473.0, 467.0, 370.0, 510.0, 516.0, 467.0, 513.0, 165.0, 516.0, 479.0, 516.0, 378.0, 522.0, 473.0, 522.0, 473.0, 513.0, 516.0, 425.0, 468.0, 479.0, 513.0, 455.0, 462.0, 453.0, 516.0, 467.0, 470.0, 516.0, 473.0, 516.0, 465.0, 522.0, 510.0, 522.0, 516.0, 522.0, 519.0, 465.0, 510.0, 519.0, 410.0, 519.0, 522.0, 513.0, 453.0, 465.0, 513.0, 513.0, 519.0, 525.0, 519.0, 482.0, 516.0, 519.0, 464.0, 467.0, 458.0, 462.0, 462.0, 516.0, 416.0, 462.0, 465.0, 456.0, 516.0, 519.0, 399.0, 470.0, 519.0, 411.0, 459.0, 516.0, 470.0, 444.0, 465.0, 521.0, 456.0, 456.0, 519.0, 507.0, 522.0, 516.0, 519.0, 525.0, 513.0, 519.0, 525.0, 462.0, 519.0, 468.0, 453.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [230.0, 225.0, 262.0, 257.0, 257.0, 253.0, 243.0, 270.0, 223.0, 242.0, 252.0, 267.0, 230.0, 243.0, 266.0, 247.0, 241.0, 232.0, 228.0, 239.0, 170.0, 200.0, 246.0, 264.0, 257.0, 259.0, 233.0, 234.0, 254.0, 259.0, 87.0, 78.0, 253.0, 263.0, 239.0, 240.0, 253.0, 263.0, 194.0, 184.0, 255.0, 267.0, 234.0, 239.0, 257.0, 265.0, 218.0, 255.0, 262.0, 251.0, 275.0, 241.0, 219.0, 206.0, 235.0, 233.0, 242.0, 237.0, 267.0, 246.0, 228.0, 227.0, 226.0, 236.0, 227.0, 226.0, 262.0, 254.0, 228.0, 239.0, 231.0, 239.0, 259.0, 257.0, 242.0, 231.0, 265.0, 251.0, 231.0, 234.0, 260.0, 262.0, 250.0, 260.0, 260.0, 262.0, 267.0, 249.0, 274.0, 248.0, 260.0, 259.0, 236.0, 229.0, 250.0, 260.0, 254.0, 265.0, 203.0, 207.0, 259.0, 260.0, 265.0, 257.0, 261.0, 252.0, 220.0, 233.0, 221.0, 244.0, 246.0, 267.0, 245.0, 268.0, 267.0, 252.0, 267.0, 258.0, 252.0, 267.0, 241.0, 241.0, 246.0, 270.0, 253.0, 266.0, 228.0, 236.0, 225.0, 242.0, 218.0, 240.0, 227.0, 235.0, 231.0, 231.0, 260.0, 256.0, 225.0, 191.0, 227.0, 235.0, 220.0, 245.0, 238.0, 218.0, 267.0, 249.0, 253.0, 266.0, 196.0, 203.0, 247.0, 223.0, 267.0, 252.0, 211.0, 200.0, 220.0, 239.0, 256.0, 260.0, 235.0, 235.0, 208.0, 236.0, 234.0, 231.0, 276.0, 245.0, 223.0, 233.0, 243.0, 213.0, 255.0, 264.0, 257.0, 250.0, 255.0, 267.0, 245.0, 271.0, 253.0, 266.0, 250.0, 275.0, 255.0, 258.0, 272.0, 247.0, 258.0, 267.0, 228.0, 234.0, 269.0, 250.0, 240.0, 228.0, 237.0, 216.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6854368390251488, "mean_inference_ms": 1.2076880422103875, "mean_action_processing_ms": 0.13265155972540793, "mean_env_wait_ms": 0.8467858667409799, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2969600, "num_agent_steps_trained": 2969600, "num_env_steps_sampled": 1484800, "num_env_steps_trained": 1484800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1484800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2969600, "timers": {"training_iteration_time_ms": 3656.379, "learn_time_ms": 1121.671, "learn_throughput": 11411.55, "synch_weights_time_ms": 12.795}, "counters": {"num_env_steps_sampled": 1484800, "num_env_steps_trained": 1484800, "num_agent_steps_sampled": 2969600, "num_agent_steps_trained": 2969600}, "done": false, "episodes_total": 3712, "training_iteration": 116, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-29", "timestamp": 1666580849, "time_this_iter_s": 3.8891963958740234, "time_total_s": 436.69476103782654, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 436.69476103782654, "timesteps_since_restore": 0, "iterations_since_restore": 116, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.14, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 168.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 149.2, "shaped_reward_min": 60, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.32, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.46, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.88, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.02, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 13.61, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.66, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.0, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.08, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.27, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.42, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.19, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.35, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.61, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.66, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.61, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.66, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.4074124663572855e-36, "cur_lr": 0.0010000000474974513, "total_loss": -0.0016507103573530912, "policy_loss": -0.0018331120954826474, "vf_loss": 7.434719085693359, "vf_explained_var": 0.6968331933021545, "kl": 0.0018940645968541503, "entropy": 1.122139811515808, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1497600, "num_env_steps_trained": 1497600, "num_agent_steps_sampled": 2995200, "num_agent_steps_trained": 2995200}, "sampler_results": {"episode_reward_max": 525.0, "episode_reward_min": 180.0, "episode_reward_mean": 486.4, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 282.0}, "policy_reward_mean": {"ppo": 243.2}, "custom_metrics": {"sparse_reward_mean": 168.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 149.2, "shaped_reward_min": 60, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.32, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.46, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.88, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.02, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 13.61, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.66, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.0, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.08, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.27, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.42, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.19, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.35, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.61, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.66, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.61, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.66, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [453.0, 516.0, 467.0, 470.0, 516.0, 473.0, 516.0, 465.0, 522.0, 510.0, 522.0, 516.0, 522.0, 519.0, 465.0, 510.0, 519.0, 410.0, 519.0, 522.0, 513.0, 453.0, 465.0, 513.0, 513.0, 519.0, 525.0, 519.0, 482.0, 516.0, 519.0, 464.0, 467.0, 458.0, 462.0, 462.0, 516.0, 416.0, 462.0, 465.0, 456.0, 516.0, 519.0, 399.0, 470.0, 519.0, 411.0, 459.0, 516.0, 470.0, 444.0, 465.0, 521.0, 456.0, 456.0, 519.0, 507.0, 522.0, 516.0, 519.0, 525.0, 513.0, 519.0, 525.0, 462.0, 519.0, 468.0, 453.0, 516.0, 513.0, 519.0, 501.0, 419.0, 516.0, 462.0, 522.0, 180.0, 479.0, 504.0, 465.0, 507.0, 522.0, 468.0, 519.0, 510.0, 522.0, 465.0, 413.0, 513.0, 519.0, 427.0, 473.0, 465.0, 513.0, 513.0, 516.0, 464.0, 476.0, 513.0, 441.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [227.0, 226.0, 262.0, 254.0, 228.0, 239.0, 231.0, 239.0, 259.0, 257.0, 242.0, 231.0, 265.0, 251.0, 231.0, 234.0, 260.0, 262.0, 250.0, 260.0, 260.0, 262.0, 267.0, 249.0, 274.0, 248.0, 260.0, 259.0, 236.0, 229.0, 250.0, 260.0, 254.0, 265.0, 203.0, 207.0, 259.0, 260.0, 265.0, 257.0, 261.0, 252.0, 220.0, 233.0, 221.0, 244.0, 246.0, 267.0, 245.0, 268.0, 267.0, 252.0, 267.0, 258.0, 252.0, 267.0, 241.0, 241.0, 246.0, 270.0, 253.0, 266.0, 228.0, 236.0, 225.0, 242.0, 218.0, 240.0, 227.0, 235.0, 231.0, 231.0, 260.0, 256.0, 225.0, 191.0, 227.0, 235.0, 220.0, 245.0, 238.0, 218.0, 267.0, 249.0, 253.0, 266.0, 196.0, 203.0, 247.0, 223.0, 267.0, 252.0, 211.0, 200.0, 220.0, 239.0, 256.0, 260.0, 235.0, 235.0, 208.0, 236.0, 234.0, 231.0, 276.0, 245.0, 223.0, 233.0, 243.0, 213.0, 255.0, 264.0, 257.0, 250.0, 255.0, 267.0, 245.0, 271.0, 253.0, 266.0, 250.0, 275.0, 255.0, 258.0, 272.0, 247.0, 258.0, 267.0, 228.0, 234.0, 269.0, 250.0, 240.0, 228.0, 237.0, 216.0, 257.0, 259.0, 257.0, 256.0, 253.0, 266.0, 250.0, 251.0, 208.0, 211.0, 271.0, 245.0, 224.0, 238.0, 255.0, 267.0, 90.0, 90.0, 241.0, 238.0, 247.0, 257.0, 223.0, 242.0, 248.0, 259.0, 254.0, 268.0, 231.0, 237.0, 253.0, 266.0, 262.0, 248.0, 257.0, 265.0, 245.0, 220.0, 212.0, 201.0, 240.0, 273.0, 282.0, 237.0, 217.0, 210.0, 228.0, 245.0, 244.0, 221.0, 255.0, 258.0, 248.0, 265.0, 267.0, 249.0, 218.0, 246.0, 233.0, 243.0, 263.0, 250.0, 219.0, 222.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6853727837400302, "mean_inference_ms": 1.2079962459958373, "mean_action_processing_ms": 0.13264166399823427, "mean_env_wait_ms": 0.8467385166751192, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 525.0, "episode_reward_min": 180.0, "episode_reward_mean": 486.4, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 282.0}, "policy_reward_mean": {"ppo": 243.2}, "hist_stats": {"episode_reward": [453.0, 516.0, 467.0, 470.0, 516.0, 473.0, 516.0, 465.0, 522.0, 510.0, 522.0, 516.0, 522.0, 519.0, 465.0, 510.0, 519.0, 410.0, 519.0, 522.0, 513.0, 453.0, 465.0, 513.0, 513.0, 519.0, 525.0, 519.0, 482.0, 516.0, 519.0, 464.0, 467.0, 458.0, 462.0, 462.0, 516.0, 416.0, 462.0, 465.0, 456.0, 516.0, 519.0, 399.0, 470.0, 519.0, 411.0, 459.0, 516.0, 470.0, 444.0, 465.0, 521.0, 456.0, 456.0, 519.0, 507.0, 522.0, 516.0, 519.0, 525.0, 513.0, 519.0, 525.0, 462.0, 519.0, 468.0, 453.0, 516.0, 513.0, 519.0, 501.0, 419.0, 516.0, 462.0, 522.0, 180.0, 479.0, 504.0, 465.0, 507.0, 522.0, 468.0, 519.0, 510.0, 522.0, 465.0, 413.0, 513.0, 519.0, 427.0, 473.0, 465.0, 513.0, 513.0, 516.0, 464.0, 476.0, 513.0, 441.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [227.0, 226.0, 262.0, 254.0, 228.0, 239.0, 231.0, 239.0, 259.0, 257.0, 242.0, 231.0, 265.0, 251.0, 231.0, 234.0, 260.0, 262.0, 250.0, 260.0, 260.0, 262.0, 267.0, 249.0, 274.0, 248.0, 260.0, 259.0, 236.0, 229.0, 250.0, 260.0, 254.0, 265.0, 203.0, 207.0, 259.0, 260.0, 265.0, 257.0, 261.0, 252.0, 220.0, 233.0, 221.0, 244.0, 246.0, 267.0, 245.0, 268.0, 267.0, 252.0, 267.0, 258.0, 252.0, 267.0, 241.0, 241.0, 246.0, 270.0, 253.0, 266.0, 228.0, 236.0, 225.0, 242.0, 218.0, 240.0, 227.0, 235.0, 231.0, 231.0, 260.0, 256.0, 225.0, 191.0, 227.0, 235.0, 220.0, 245.0, 238.0, 218.0, 267.0, 249.0, 253.0, 266.0, 196.0, 203.0, 247.0, 223.0, 267.0, 252.0, 211.0, 200.0, 220.0, 239.0, 256.0, 260.0, 235.0, 235.0, 208.0, 236.0, 234.0, 231.0, 276.0, 245.0, 223.0, 233.0, 243.0, 213.0, 255.0, 264.0, 257.0, 250.0, 255.0, 267.0, 245.0, 271.0, 253.0, 266.0, 250.0, 275.0, 255.0, 258.0, 272.0, 247.0, 258.0, 267.0, 228.0, 234.0, 269.0, 250.0, 240.0, 228.0, 237.0, 216.0, 257.0, 259.0, 257.0, 256.0, 253.0, 266.0, 250.0, 251.0, 208.0, 211.0, 271.0, 245.0, 224.0, 238.0, 255.0, 267.0, 90.0, 90.0, 241.0, 238.0, 247.0, 257.0, 223.0, 242.0, 248.0, 259.0, 254.0, 268.0, 231.0, 237.0, 253.0, 266.0, 262.0, 248.0, 257.0, 265.0, 245.0, 220.0, 212.0, 201.0, 240.0, 273.0, 282.0, 237.0, 217.0, 210.0, 228.0, 245.0, 244.0, 221.0, 255.0, 258.0, 248.0, 265.0, 267.0, 249.0, 218.0, 246.0, 233.0, 243.0, 263.0, 250.0, 219.0, 222.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6853727837400302, "mean_inference_ms": 1.2079962459958373, "mean_action_processing_ms": 0.13264166399823427, "mean_env_wait_ms": 0.8467385166751192, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2995200, "num_agent_steps_trained": 2995200, "num_env_steps_sampled": 1497600, "num_env_steps_trained": 1497600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1497600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2995200, "timers": {"training_iteration_time_ms": 3696.818, "learn_time_ms": 1132.17, "learn_throughput": 11305.723, "synch_weights_time_ms": 12.436}, "counters": {"num_env_steps_sampled": 1497600, "num_env_steps_trained": 1497600, "num_agent_steps_sampled": 2995200, "num_agent_steps_trained": 2995200}, "done": false, "episodes_total": 3744, "training_iteration": 117, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-34", "timestamp": 1666580854, "time_this_iter_s": 3.9661037921905518, "time_total_s": 440.6608648300171, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 440.6608648300171, "timesteps_since_restore": 0, "iterations_since_restore": 117, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.383333333333336, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 168.2, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 148.6, "shaped_reward_min": 60, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.12, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.76, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.63, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.28, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.45, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.32, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.95, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.23, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.77, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.09, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.84, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.16, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.43, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.23, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.34, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.18, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.32, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.95, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.32, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.95, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.2037062331786428e-36, "cur_lr": 0.0010000000474974513, "total_loss": 0.000875909929163754, "policy_loss": 0.0006957841105759144, "vf_loss": 7.3936967849731445, "vf_explained_var": 0.7045917510986328, "kl": 0.0021515809930860996, "entropy": 1.1184874773025513, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1510400, "num_env_steps_trained": 1510400, "num_agent_steps_sampled": 3020800, "num_agent_steps_trained": 3020800}, "sampler_results": {"episode_reward_max": 525.0, "episode_reward_min": 180.0, "episode_reward_mean": 485.0, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 282.0}, "policy_reward_mean": {"ppo": 242.5}, "custom_metrics": {"sparse_reward_mean": 168.2, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 148.6, "shaped_reward_min": 60, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.12, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.76, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.63, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.28, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.45, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.32, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.95, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.23, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.77, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.09, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.84, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.16, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.43, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.23, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.34, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.18, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.32, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.95, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.32, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.95, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [467.0, 458.0, 462.0, 462.0, 516.0, 416.0, 462.0, 465.0, 456.0, 516.0, 519.0, 399.0, 470.0, 519.0, 411.0, 459.0, 516.0, 470.0, 444.0, 465.0, 521.0, 456.0, 456.0, 519.0, 507.0, 522.0, 516.0, 519.0, 525.0, 513.0, 519.0, 525.0, 462.0, 519.0, 468.0, 453.0, 516.0, 513.0, 519.0, 501.0, 419.0, 516.0, 462.0, 522.0, 180.0, 479.0, 504.0, 465.0, 507.0, 522.0, 468.0, 519.0, 510.0, 522.0, 465.0, 413.0, 513.0, 519.0, 427.0, 473.0, 465.0, 513.0, 513.0, 516.0, 464.0, 476.0, 513.0, 441.0, 522.0, 516.0, 519.0, 519.0, 516.0, 468.0, 522.0, 465.0, 470.0, 507.0, 462.0, 513.0, 470.0, 516.0, 308.0, 519.0, 519.0, 459.0, 467.0, 516.0, 513.0, 510.0, 516.0, 513.0, 522.0, 522.0, 473.0, 516.0, 522.0, 435.0, 465.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [225.0, 242.0, 218.0, 240.0, 227.0, 235.0, 231.0, 231.0, 260.0, 256.0, 225.0, 191.0, 227.0, 235.0, 220.0, 245.0, 238.0, 218.0, 267.0, 249.0, 253.0, 266.0, 196.0, 203.0, 247.0, 223.0, 267.0, 252.0, 211.0, 200.0, 220.0, 239.0, 256.0, 260.0, 235.0, 235.0, 208.0, 236.0, 234.0, 231.0, 276.0, 245.0, 223.0, 233.0, 243.0, 213.0, 255.0, 264.0, 257.0, 250.0, 255.0, 267.0, 245.0, 271.0, 253.0, 266.0, 250.0, 275.0, 255.0, 258.0, 272.0, 247.0, 258.0, 267.0, 228.0, 234.0, 269.0, 250.0, 240.0, 228.0, 237.0, 216.0, 257.0, 259.0, 257.0, 256.0, 253.0, 266.0, 250.0, 251.0, 208.0, 211.0, 271.0, 245.0, 224.0, 238.0, 255.0, 267.0, 90.0, 90.0, 241.0, 238.0, 247.0, 257.0, 223.0, 242.0, 248.0, 259.0, 254.0, 268.0, 231.0, 237.0, 253.0, 266.0, 262.0, 248.0, 257.0, 265.0, 245.0, 220.0, 212.0, 201.0, 240.0, 273.0, 282.0, 237.0, 217.0, 210.0, 228.0, 245.0, 244.0, 221.0, 255.0, 258.0, 248.0, 265.0, 267.0, 249.0, 218.0, 246.0, 233.0, 243.0, 263.0, 250.0, 219.0, 222.0, 259.0, 263.0, 251.0, 265.0, 268.0, 251.0, 260.0, 259.0, 260.0, 256.0, 239.0, 229.0, 265.0, 257.0, 234.0, 231.0, 231.0, 239.0, 258.0, 249.0, 225.0, 237.0, 264.0, 249.0, 244.0, 226.0, 261.0, 255.0, 159.0, 149.0, 272.0, 247.0, 256.0, 263.0, 222.0, 237.0, 222.0, 245.0, 250.0, 266.0, 248.0, 265.0, 262.0, 248.0, 257.0, 259.0, 259.0, 254.0, 269.0, 253.0, 260.0, 262.0, 236.0, 237.0, 261.0, 255.0, 264.0, 258.0, 212.0, 223.0, 237.0, 228.0, 245.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6852450585935208, "mean_inference_ms": 1.208216584112233, "mean_action_processing_ms": 0.13261600408321741, "mean_env_wait_ms": 0.8465971694239437, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 525.0, "episode_reward_min": 180.0, "episode_reward_mean": 485.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 282.0}, "policy_reward_mean": {"ppo": 242.5}, "hist_stats": {"episode_reward": [467.0, 458.0, 462.0, 462.0, 516.0, 416.0, 462.0, 465.0, 456.0, 516.0, 519.0, 399.0, 470.0, 519.0, 411.0, 459.0, 516.0, 470.0, 444.0, 465.0, 521.0, 456.0, 456.0, 519.0, 507.0, 522.0, 516.0, 519.0, 525.0, 513.0, 519.0, 525.0, 462.0, 519.0, 468.0, 453.0, 516.0, 513.0, 519.0, 501.0, 419.0, 516.0, 462.0, 522.0, 180.0, 479.0, 504.0, 465.0, 507.0, 522.0, 468.0, 519.0, 510.0, 522.0, 465.0, 413.0, 513.0, 519.0, 427.0, 473.0, 465.0, 513.0, 513.0, 516.0, 464.0, 476.0, 513.0, 441.0, 522.0, 516.0, 519.0, 519.0, 516.0, 468.0, 522.0, 465.0, 470.0, 507.0, 462.0, 513.0, 470.0, 516.0, 308.0, 519.0, 519.0, 459.0, 467.0, 516.0, 513.0, 510.0, 516.0, 513.0, 522.0, 522.0, 473.0, 516.0, 522.0, 435.0, 465.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [225.0, 242.0, 218.0, 240.0, 227.0, 235.0, 231.0, 231.0, 260.0, 256.0, 225.0, 191.0, 227.0, 235.0, 220.0, 245.0, 238.0, 218.0, 267.0, 249.0, 253.0, 266.0, 196.0, 203.0, 247.0, 223.0, 267.0, 252.0, 211.0, 200.0, 220.0, 239.0, 256.0, 260.0, 235.0, 235.0, 208.0, 236.0, 234.0, 231.0, 276.0, 245.0, 223.0, 233.0, 243.0, 213.0, 255.0, 264.0, 257.0, 250.0, 255.0, 267.0, 245.0, 271.0, 253.0, 266.0, 250.0, 275.0, 255.0, 258.0, 272.0, 247.0, 258.0, 267.0, 228.0, 234.0, 269.0, 250.0, 240.0, 228.0, 237.0, 216.0, 257.0, 259.0, 257.0, 256.0, 253.0, 266.0, 250.0, 251.0, 208.0, 211.0, 271.0, 245.0, 224.0, 238.0, 255.0, 267.0, 90.0, 90.0, 241.0, 238.0, 247.0, 257.0, 223.0, 242.0, 248.0, 259.0, 254.0, 268.0, 231.0, 237.0, 253.0, 266.0, 262.0, 248.0, 257.0, 265.0, 245.0, 220.0, 212.0, 201.0, 240.0, 273.0, 282.0, 237.0, 217.0, 210.0, 228.0, 245.0, 244.0, 221.0, 255.0, 258.0, 248.0, 265.0, 267.0, 249.0, 218.0, 246.0, 233.0, 243.0, 263.0, 250.0, 219.0, 222.0, 259.0, 263.0, 251.0, 265.0, 268.0, 251.0, 260.0, 259.0, 260.0, 256.0, 239.0, 229.0, 265.0, 257.0, 234.0, 231.0, 231.0, 239.0, 258.0, 249.0, 225.0, 237.0, 264.0, 249.0, 244.0, 226.0, 261.0, 255.0, 159.0, 149.0, 272.0, 247.0, 256.0, 263.0, 222.0, 237.0, 222.0, 245.0, 250.0, 266.0, 248.0, 265.0, 262.0, 248.0, 257.0, 259.0, 259.0, 254.0, 269.0, 253.0, 260.0, 262.0, 236.0, 237.0, 261.0, 255.0, 264.0, 258.0, 212.0, 223.0, 237.0, 228.0, 245.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6852450585935208, "mean_inference_ms": 1.208216584112233, "mean_action_processing_ms": 0.13261600408321741, "mean_env_wait_ms": 0.8465971694239437, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3020800, "num_agent_steps_trained": 3020800, "num_env_steps_sampled": 1510400, "num_env_steps_trained": 1510400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1510400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3020800, "timers": {"training_iteration_time_ms": 3691.368, "learn_time_ms": 1140.138, "learn_throughput": 11226.714, "synch_weights_time_ms": 12.254}, "counters": {"num_env_steps_sampled": 1510400, "num_env_steps_trained": 1510400, "num_agent_steps_sampled": 3020800, "num_agent_steps_trained": 3020800}, "done": false, "episodes_total": 3776, "training_iteration": 118, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-37", "timestamp": 1666580857, "time_this_iter_s": 3.6140036582946777, "time_total_s": 444.27486848831177, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 444.27486848831177, "timesteps_since_restore": 0, "iterations_since_restore": 118, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.383333333333333, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 169.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 150.04, "shaped_reward_min": 60, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.03, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.02, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.53, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.47, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.3, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.17, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.27, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.72, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.25, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.5, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.19, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.42, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.15, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.3, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.17, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.3, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.17, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 6.018531165893214e-37, "cur_lr": 0.0010000000474974513, "total_loss": -2.7321191737428308e-05, "policy_loss": -0.0002118002448696643, "vf_loss": 7.386599540710449, "vf_explained_var": 0.6774031519889832, "kl": 0.0019258097745478153, "entropy": 1.1083590984344482, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1523200, "num_env_steps_trained": 1523200, "num_agent_steps_sampled": 3046400, "num_agent_steps_trained": 3046400}, "sampler_results": {"episode_reward_max": 530.0, "episode_reward_min": 180.0, "episode_reward_mean": 489.24, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 282.0}, "policy_reward_mean": {"ppo": 244.62}, "custom_metrics": {"sparse_reward_mean": 169.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 150.04, "shaped_reward_min": 60, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.03, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.02, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.53, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.47, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.3, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.17, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.27, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.72, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.25, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.5, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.19, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.42, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.15, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.3, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.17, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.3, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.17, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [462.0, 519.0, 468.0, 453.0, 516.0, 513.0, 519.0, 501.0, 419.0, 516.0, 462.0, 522.0, 180.0, 479.0, 504.0, 465.0, 507.0, 522.0, 468.0, 519.0, 510.0, 522.0, 465.0, 413.0, 513.0, 519.0, 427.0, 473.0, 465.0, 513.0, 513.0, 516.0, 464.0, 476.0, 513.0, 441.0, 522.0, 516.0, 519.0, 519.0, 516.0, 468.0, 522.0, 465.0, 470.0, 507.0, 462.0, 513.0, 470.0, 516.0, 308.0, 519.0, 519.0, 459.0, 467.0, 516.0, 513.0, 510.0, 516.0, 513.0, 522.0, 522.0, 473.0, 516.0, 522.0, 435.0, 465.0, 513.0, 519.0, 473.0, 354.0, 513.0, 510.0, 468.0, 507.0, 519.0, 419.0, 516.0, 456.0, 519.0, 513.0, 519.0, 468.0, 525.0, 516.0, 519.0, 519.0, 473.0, 513.0, 504.0, 519.0, 516.0, 519.0, 519.0, 516.0, 459.0, 416.0, 516.0, 530.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [228.0, 234.0, 269.0, 250.0, 240.0, 228.0, 237.0, 216.0, 257.0, 259.0, 257.0, 256.0, 253.0, 266.0, 250.0, 251.0, 208.0, 211.0, 271.0, 245.0, 224.0, 238.0, 255.0, 267.0, 90.0, 90.0, 241.0, 238.0, 247.0, 257.0, 223.0, 242.0, 248.0, 259.0, 254.0, 268.0, 231.0, 237.0, 253.0, 266.0, 262.0, 248.0, 257.0, 265.0, 245.0, 220.0, 212.0, 201.0, 240.0, 273.0, 282.0, 237.0, 217.0, 210.0, 228.0, 245.0, 244.0, 221.0, 255.0, 258.0, 248.0, 265.0, 267.0, 249.0, 218.0, 246.0, 233.0, 243.0, 263.0, 250.0, 219.0, 222.0, 259.0, 263.0, 251.0, 265.0, 268.0, 251.0, 260.0, 259.0, 260.0, 256.0, 239.0, 229.0, 265.0, 257.0, 234.0, 231.0, 231.0, 239.0, 258.0, 249.0, 225.0, 237.0, 264.0, 249.0, 244.0, 226.0, 261.0, 255.0, 159.0, 149.0, 272.0, 247.0, 256.0, 263.0, 222.0, 237.0, 222.0, 245.0, 250.0, 266.0, 248.0, 265.0, 262.0, 248.0, 257.0, 259.0, 259.0, 254.0, 269.0, 253.0, 260.0, 262.0, 236.0, 237.0, 261.0, 255.0, 264.0, 258.0, 212.0, 223.0, 237.0, 228.0, 245.0, 268.0, 264.0, 255.0, 237.0, 236.0, 184.0, 170.0, 256.0, 257.0, 246.0, 264.0, 236.0, 232.0, 256.0, 251.0, 248.0, 271.0, 214.0, 205.0, 260.0, 256.0, 221.0, 235.0, 255.0, 264.0, 252.0, 261.0, 275.0, 244.0, 229.0, 239.0, 250.0, 275.0, 247.0, 269.0, 270.0, 249.0, 257.0, 262.0, 234.0, 239.0, 256.0, 257.0, 257.0, 247.0, 269.0, 250.0, 270.0, 246.0, 257.0, 262.0, 263.0, 256.0, 252.0, 264.0, 224.0, 235.0, 201.0, 215.0, 257.0, 259.0, 260.0, 270.0, 272.0, 250.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.68516331162428, "mean_inference_ms": 1.208340954911563, "mean_action_processing_ms": 0.13259036340208671, "mean_env_wait_ms": 0.8464620351800528, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 530.0, "episode_reward_min": 180.0, "episode_reward_mean": 489.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 282.0}, "policy_reward_mean": {"ppo": 244.62}, "hist_stats": {"episode_reward": [462.0, 519.0, 468.0, 453.0, 516.0, 513.0, 519.0, 501.0, 419.0, 516.0, 462.0, 522.0, 180.0, 479.0, 504.0, 465.0, 507.0, 522.0, 468.0, 519.0, 510.0, 522.0, 465.0, 413.0, 513.0, 519.0, 427.0, 473.0, 465.0, 513.0, 513.0, 516.0, 464.0, 476.0, 513.0, 441.0, 522.0, 516.0, 519.0, 519.0, 516.0, 468.0, 522.0, 465.0, 470.0, 507.0, 462.0, 513.0, 470.0, 516.0, 308.0, 519.0, 519.0, 459.0, 467.0, 516.0, 513.0, 510.0, 516.0, 513.0, 522.0, 522.0, 473.0, 516.0, 522.0, 435.0, 465.0, 513.0, 519.0, 473.0, 354.0, 513.0, 510.0, 468.0, 507.0, 519.0, 419.0, 516.0, 456.0, 519.0, 513.0, 519.0, 468.0, 525.0, 516.0, 519.0, 519.0, 473.0, 513.0, 504.0, 519.0, 516.0, 519.0, 519.0, 516.0, 459.0, 416.0, 516.0, 530.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [228.0, 234.0, 269.0, 250.0, 240.0, 228.0, 237.0, 216.0, 257.0, 259.0, 257.0, 256.0, 253.0, 266.0, 250.0, 251.0, 208.0, 211.0, 271.0, 245.0, 224.0, 238.0, 255.0, 267.0, 90.0, 90.0, 241.0, 238.0, 247.0, 257.0, 223.0, 242.0, 248.0, 259.0, 254.0, 268.0, 231.0, 237.0, 253.0, 266.0, 262.0, 248.0, 257.0, 265.0, 245.0, 220.0, 212.0, 201.0, 240.0, 273.0, 282.0, 237.0, 217.0, 210.0, 228.0, 245.0, 244.0, 221.0, 255.0, 258.0, 248.0, 265.0, 267.0, 249.0, 218.0, 246.0, 233.0, 243.0, 263.0, 250.0, 219.0, 222.0, 259.0, 263.0, 251.0, 265.0, 268.0, 251.0, 260.0, 259.0, 260.0, 256.0, 239.0, 229.0, 265.0, 257.0, 234.0, 231.0, 231.0, 239.0, 258.0, 249.0, 225.0, 237.0, 264.0, 249.0, 244.0, 226.0, 261.0, 255.0, 159.0, 149.0, 272.0, 247.0, 256.0, 263.0, 222.0, 237.0, 222.0, 245.0, 250.0, 266.0, 248.0, 265.0, 262.0, 248.0, 257.0, 259.0, 259.0, 254.0, 269.0, 253.0, 260.0, 262.0, 236.0, 237.0, 261.0, 255.0, 264.0, 258.0, 212.0, 223.0, 237.0, 228.0, 245.0, 268.0, 264.0, 255.0, 237.0, 236.0, 184.0, 170.0, 256.0, 257.0, 246.0, 264.0, 236.0, 232.0, 256.0, 251.0, 248.0, 271.0, 214.0, 205.0, 260.0, 256.0, 221.0, 235.0, 255.0, 264.0, 252.0, 261.0, 275.0, 244.0, 229.0, 239.0, 250.0, 275.0, 247.0, 269.0, 270.0, 249.0, 257.0, 262.0, 234.0, 239.0, 256.0, 257.0, 257.0, 247.0, 269.0, 250.0, 270.0, 246.0, 257.0, 262.0, 263.0, 256.0, 252.0, 264.0, 224.0, 235.0, 201.0, 215.0, 257.0, 259.0, 260.0, 270.0, 272.0, 250.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.68516331162428, "mean_inference_ms": 1.208340954911563, "mean_action_processing_ms": 0.13259036340208671, "mean_env_wait_ms": 0.8464620351800528, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3046400, "num_agent_steps_trained": 3046400, "num_env_steps_sampled": 1523200, "num_env_steps_trained": 1523200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1523200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3046400, "timers": {"training_iteration_time_ms": 3695.547, "learn_time_ms": 1140.914, "learn_throughput": 11219.077, "synch_weights_time_ms": 12.357}, "counters": {"num_env_steps_sampled": 1523200, "num_env_steps_trained": 1523200, "num_agent_steps_sampled": 3046400, "num_agent_steps_trained": 3046400}, "done": false, "episodes_total": 3808, "training_iteration": 119, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-41", "timestamp": 1666580861, "time_this_iter_s": 3.732775926589966, "time_total_s": 448.00764441490173, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 448.00764441490173, "timesteps_since_restore": 0, "iterations_since_restore": 119, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.880000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 172.8, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 153.3, "shaped_reward_min": 108, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.35, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 15.19, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.87, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 14.7, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.35, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.46, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.63, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.4, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.28, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.09, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.17, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.51, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.31, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.45, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.27, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.63, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.4, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.63, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.4, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.009265582946607e-37, "cur_lr": 0.0010000000474974513, "total_loss": -0.005632634274661541, "policy_loss": -0.0058066705241799355, "vf_loss": 7.275279998779297, "vf_explained_var": 0.7075681090354919, "kl": 0.0015871080104261637, "entropy": 1.1069822311401367, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1536000, "num_env_steps_trained": 1536000, "num_agent_steps_sampled": 3072000, "num_agent_steps_trained": 3072000}, "sampler_results": {"episode_reward_max": 567.0, "episode_reward_min": 308.0, "episode_reward_mean": 498.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 149.0}, "policy_reward_max": {"ppo": 285.0}, "policy_reward_mean": {"ppo": 249.45}, "custom_metrics": {"sparse_reward_mean": 172.8, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 153.3, "shaped_reward_min": 108, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.35, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 15.19, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.87, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 14.7, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.35, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.46, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.63, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.4, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.28, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.09, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.17, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.51, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.31, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.45, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.27, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.63, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.4, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.63, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.4, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [464.0, 476.0, 513.0, 441.0, 522.0, 516.0, 519.0, 519.0, 516.0, 468.0, 522.0, 465.0, 470.0, 507.0, 462.0, 513.0, 470.0, 516.0, 308.0, 519.0, 519.0, 459.0, 467.0, 516.0, 513.0, 510.0, 516.0, 513.0, 522.0, 522.0, 473.0, 516.0, 522.0, 435.0, 465.0, 513.0, 519.0, 473.0, 354.0, 513.0, 510.0, 468.0, 507.0, 519.0, 419.0, 516.0, 456.0, 519.0, 513.0, 519.0, 468.0, 525.0, 516.0, 519.0, 519.0, 473.0, 513.0, 504.0, 519.0, 516.0, 519.0, 519.0, 516.0, 459.0, 416.0, 516.0, 530.0, 522.0, 522.0, 522.0, 525.0, 522.0, 516.0, 519.0, 504.0, 473.0, 473.0, 522.0, 516.0, 424.0, 525.0, 510.0, 522.0, 524.0, 525.0, 567.0, 522.0, 527.0, 510.0, 519.0, 507.0, 468.0, 522.0, 519.0, 522.0, 522.0, 465.0, 525.0, 522.0, 468.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [218.0, 246.0, 233.0, 243.0, 263.0, 250.0, 219.0, 222.0, 259.0, 263.0, 251.0, 265.0, 268.0, 251.0, 260.0, 259.0, 260.0, 256.0, 239.0, 229.0, 265.0, 257.0, 234.0, 231.0, 231.0, 239.0, 258.0, 249.0, 225.0, 237.0, 264.0, 249.0, 244.0, 226.0, 261.0, 255.0, 159.0, 149.0, 272.0, 247.0, 256.0, 263.0, 222.0, 237.0, 222.0, 245.0, 250.0, 266.0, 248.0, 265.0, 262.0, 248.0, 257.0, 259.0, 259.0, 254.0, 269.0, 253.0, 260.0, 262.0, 236.0, 237.0, 261.0, 255.0, 264.0, 258.0, 212.0, 223.0, 237.0, 228.0, 245.0, 268.0, 264.0, 255.0, 237.0, 236.0, 184.0, 170.0, 256.0, 257.0, 246.0, 264.0, 236.0, 232.0, 256.0, 251.0, 248.0, 271.0, 214.0, 205.0, 260.0, 256.0, 221.0, 235.0, 255.0, 264.0, 252.0, 261.0, 275.0, 244.0, 229.0, 239.0, 250.0, 275.0, 247.0, 269.0, 270.0, 249.0, 257.0, 262.0, 234.0, 239.0, 256.0, 257.0, 257.0, 247.0, 269.0, 250.0, 270.0, 246.0, 257.0, 262.0, 263.0, 256.0, 252.0, 264.0, 224.0, 235.0, 201.0, 215.0, 257.0, 259.0, 260.0, 270.0, 272.0, 250.0, 254.0, 268.0, 254.0, 268.0, 262.0, 263.0, 266.0, 256.0, 268.0, 248.0, 246.0, 273.0, 239.0, 265.0, 229.0, 244.0, 229.0, 244.0, 267.0, 255.0, 250.0, 266.0, 211.0, 213.0, 254.0, 271.0, 257.0, 253.0, 256.0, 266.0, 275.0, 249.0, 275.0, 250.0, 282.0, 285.0, 261.0, 261.0, 271.0, 256.0, 263.0, 247.0, 241.0, 278.0, 255.0, 252.0, 236.0, 232.0, 258.0, 264.0, 253.0, 266.0, 265.0, 257.0, 262.0, 260.0, 237.0, 228.0, 258.0, 267.0, 263.0, 259.0, 239.0, 229.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.685130443905528, "mean_inference_ms": 1.2081711376748996, "mean_action_processing_ms": 0.1325676931020594, "mean_env_wait_ms": 0.8460857022725299, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 567.0, "episode_reward_min": 308.0, "episode_reward_mean": 498.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 149.0}, "policy_reward_max": {"ppo": 285.0}, "policy_reward_mean": {"ppo": 249.45}, "hist_stats": {"episode_reward": [464.0, 476.0, 513.0, 441.0, 522.0, 516.0, 519.0, 519.0, 516.0, 468.0, 522.0, 465.0, 470.0, 507.0, 462.0, 513.0, 470.0, 516.0, 308.0, 519.0, 519.0, 459.0, 467.0, 516.0, 513.0, 510.0, 516.0, 513.0, 522.0, 522.0, 473.0, 516.0, 522.0, 435.0, 465.0, 513.0, 519.0, 473.0, 354.0, 513.0, 510.0, 468.0, 507.0, 519.0, 419.0, 516.0, 456.0, 519.0, 513.0, 519.0, 468.0, 525.0, 516.0, 519.0, 519.0, 473.0, 513.0, 504.0, 519.0, 516.0, 519.0, 519.0, 516.0, 459.0, 416.0, 516.0, 530.0, 522.0, 522.0, 522.0, 525.0, 522.0, 516.0, 519.0, 504.0, 473.0, 473.0, 522.0, 516.0, 424.0, 525.0, 510.0, 522.0, 524.0, 525.0, 567.0, 522.0, 527.0, 510.0, 519.0, 507.0, 468.0, 522.0, 519.0, 522.0, 522.0, 465.0, 525.0, 522.0, 468.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [218.0, 246.0, 233.0, 243.0, 263.0, 250.0, 219.0, 222.0, 259.0, 263.0, 251.0, 265.0, 268.0, 251.0, 260.0, 259.0, 260.0, 256.0, 239.0, 229.0, 265.0, 257.0, 234.0, 231.0, 231.0, 239.0, 258.0, 249.0, 225.0, 237.0, 264.0, 249.0, 244.0, 226.0, 261.0, 255.0, 159.0, 149.0, 272.0, 247.0, 256.0, 263.0, 222.0, 237.0, 222.0, 245.0, 250.0, 266.0, 248.0, 265.0, 262.0, 248.0, 257.0, 259.0, 259.0, 254.0, 269.0, 253.0, 260.0, 262.0, 236.0, 237.0, 261.0, 255.0, 264.0, 258.0, 212.0, 223.0, 237.0, 228.0, 245.0, 268.0, 264.0, 255.0, 237.0, 236.0, 184.0, 170.0, 256.0, 257.0, 246.0, 264.0, 236.0, 232.0, 256.0, 251.0, 248.0, 271.0, 214.0, 205.0, 260.0, 256.0, 221.0, 235.0, 255.0, 264.0, 252.0, 261.0, 275.0, 244.0, 229.0, 239.0, 250.0, 275.0, 247.0, 269.0, 270.0, 249.0, 257.0, 262.0, 234.0, 239.0, 256.0, 257.0, 257.0, 247.0, 269.0, 250.0, 270.0, 246.0, 257.0, 262.0, 263.0, 256.0, 252.0, 264.0, 224.0, 235.0, 201.0, 215.0, 257.0, 259.0, 260.0, 270.0, 272.0, 250.0, 254.0, 268.0, 254.0, 268.0, 262.0, 263.0, 266.0, 256.0, 268.0, 248.0, 246.0, 273.0, 239.0, 265.0, 229.0, 244.0, 229.0, 244.0, 267.0, 255.0, 250.0, 266.0, 211.0, 213.0, 254.0, 271.0, 257.0, 253.0, 256.0, 266.0, 275.0, 249.0, 275.0, 250.0, 282.0, 285.0, 261.0, 261.0, 271.0, 256.0, 263.0, 247.0, 241.0, 278.0, 255.0, 252.0, 236.0, 232.0, 258.0, 264.0, 253.0, 266.0, 265.0, 257.0, 262.0, 260.0, 237.0, 228.0, 258.0, 267.0, 263.0, 259.0, 239.0, 229.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.685130443905528, "mean_inference_ms": 1.2081711376748996, "mean_action_processing_ms": 0.1325676931020594, "mean_env_wait_ms": 0.8460857022725299, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3072000, "num_agent_steps_trained": 3072000, "num_env_steps_sampled": 1536000, "num_env_steps_trained": 1536000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1536000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3072000, "timers": {"training_iteration_time_ms": 3696.184, "learn_time_ms": 1147.37, "learn_throughput": 11155.952, "synch_weights_time_ms": 11.575}, "counters": {"num_env_steps_sampled": 1536000, "num_env_steps_trained": 1536000, "num_agent_steps_sampled": 3072000, "num_agent_steps_trained": 3072000}, "done": false, "episodes_total": 3840, "training_iteration": 120, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-45", "timestamp": 1666580865, "time_this_iter_s": 3.6966631412506104, "time_total_s": 451.70430755615234, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 451.70430755615234, "timesteps_since_restore": 0, "iterations_since_restore": 120, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.88333333333333, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 174.4, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 154.83, "shaped_reward_min": 114, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.39, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.24, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.04, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.78, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.8, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.47, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.14, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.26, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.45, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.44, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.39, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.39, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.8, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.47, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.8, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.47, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.5046327914733034e-37, "cur_lr": 0.0010000000474974513, "total_loss": 0.000531826401129365, "policy_loss": 0.00035990215837955475, "vf_loss": 7.2422380447387695, "vf_explained_var": 0.7075891494750977, "kl": 0.0017436475027352571, "entropy": 1.1045974493026733, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1548800, "num_env_steps_trained": 1548800, "num_agent_steps_sampled": 3097600, "num_agent_steps_trained": 3097600}, "sampler_results": {"episode_reward_max": 567.0, "episode_reward_min": 354.0, "episode_reward_mean": 503.63, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 170.0}, "policy_reward_max": {"ppo": 286.0}, "policy_reward_mean": {"ppo": 251.815}, "custom_metrics": {"sparse_reward_mean": 174.4, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 154.83, "shaped_reward_min": 114, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.39, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.24, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.04, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.78, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.8, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.47, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.14, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.26, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.45, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.44, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.39, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.39, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.8, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.47, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.8, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.47, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 435.0, 465.0, 513.0, 519.0, 473.0, 354.0, 513.0, 510.0, 468.0, 507.0, 519.0, 419.0, 516.0, 456.0, 519.0, 513.0, 519.0, 468.0, 525.0, 516.0, 519.0, 519.0, 473.0, 513.0, 504.0, 519.0, 516.0, 519.0, 519.0, 516.0, 459.0, 416.0, 516.0, 530.0, 522.0, 522.0, 522.0, 525.0, 522.0, 516.0, 519.0, 504.0, 473.0, 473.0, 522.0, 516.0, 424.0, 525.0, 510.0, 522.0, 524.0, 525.0, 567.0, 522.0, 527.0, 510.0, 519.0, 507.0, 468.0, 522.0, 519.0, 522.0, 522.0, 465.0, 525.0, 522.0, 468.0, 519.0, 519.0, 513.0, 516.0, 516.0, 468.0, 567.0, 530.0, 482.0, 516.0, 467.0, 513.0, 507.0, 462.0, 519.0, 525.0, 516.0, 513.0, 468.0, 464.0, 504.0, 516.0, 479.0, 516.0, 522.0, 522.0, 525.0, 525.0, 519.0, 519.0, 462.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 258.0, 212.0, 223.0, 237.0, 228.0, 245.0, 268.0, 264.0, 255.0, 237.0, 236.0, 184.0, 170.0, 256.0, 257.0, 246.0, 264.0, 236.0, 232.0, 256.0, 251.0, 248.0, 271.0, 214.0, 205.0, 260.0, 256.0, 221.0, 235.0, 255.0, 264.0, 252.0, 261.0, 275.0, 244.0, 229.0, 239.0, 250.0, 275.0, 247.0, 269.0, 270.0, 249.0, 257.0, 262.0, 234.0, 239.0, 256.0, 257.0, 257.0, 247.0, 269.0, 250.0, 270.0, 246.0, 257.0, 262.0, 263.0, 256.0, 252.0, 264.0, 224.0, 235.0, 201.0, 215.0, 257.0, 259.0, 260.0, 270.0, 272.0, 250.0, 254.0, 268.0, 254.0, 268.0, 262.0, 263.0, 266.0, 256.0, 268.0, 248.0, 246.0, 273.0, 239.0, 265.0, 229.0, 244.0, 229.0, 244.0, 267.0, 255.0, 250.0, 266.0, 211.0, 213.0, 254.0, 271.0, 257.0, 253.0, 256.0, 266.0, 275.0, 249.0, 275.0, 250.0, 282.0, 285.0, 261.0, 261.0, 271.0, 256.0, 263.0, 247.0, 241.0, 278.0, 255.0, 252.0, 236.0, 232.0, 258.0, 264.0, 253.0, 266.0, 265.0, 257.0, 262.0, 260.0, 237.0, 228.0, 258.0, 267.0, 263.0, 259.0, 239.0, 229.0, 250.0, 269.0, 256.0, 263.0, 252.0, 261.0, 251.0, 265.0, 271.0, 245.0, 232.0, 236.0, 281.0, 286.0, 275.0, 255.0, 248.0, 234.0, 267.0, 249.0, 238.0, 229.0, 261.0, 252.0, 250.0, 257.0, 219.0, 243.0, 258.0, 261.0, 262.0, 263.0, 266.0, 250.0, 246.0, 267.0, 240.0, 228.0, 212.0, 252.0, 256.0, 248.0, 257.0, 259.0, 246.0, 233.0, 251.0, 265.0, 244.0, 278.0, 260.0, 262.0, 264.0, 261.0, 254.0, 271.0, 254.0, 265.0, 264.0, 255.0, 227.0, 235.0, 248.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851763737865477, "mean_inference_ms": 1.2080972065799291, "mean_action_processing_ms": 0.13256303185673912, "mean_env_wait_ms": 0.8457883628449312, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 567.0, "episode_reward_min": 354.0, "episode_reward_mean": 503.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 170.0}, "policy_reward_max": {"ppo": 286.0}, "policy_reward_mean": {"ppo": 251.815}, "hist_stats": {"episode_reward": [522.0, 435.0, 465.0, 513.0, 519.0, 473.0, 354.0, 513.0, 510.0, 468.0, 507.0, 519.0, 419.0, 516.0, 456.0, 519.0, 513.0, 519.0, 468.0, 525.0, 516.0, 519.0, 519.0, 473.0, 513.0, 504.0, 519.0, 516.0, 519.0, 519.0, 516.0, 459.0, 416.0, 516.0, 530.0, 522.0, 522.0, 522.0, 525.0, 522.0, 516.0, 519.0, 504.0, 473.0, 473.0, 522.0, 516.0, 424.0, 525.0, 510.0, 522.0, 524.0, 525.0, 567.0, 522.0, 527.0, 510.0, 519.0, 507.0, 468.0, 522.0, 519.0, 522.0, 522.0, 465.0, 525.0, 522.0, 468.0, 519.0, 519.0, 513.0, 516.0, 516.0, 468.0, 567.0, 530.0, 482.0, 516.0, 467.0, 513.0, 507.0, 462.0, 519.0, 525.0, 516.0, 513.0, 468.0, 464.0, 504.0, 516.0, 479.0, 516.0, 522.0, 522.0, 525.0, 525.0, 519.0, 519.0, 462.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 258.0, 212.0, 223.0, 237.0, 228.0, 245.0, 268.0, 264.0, 255.0, 237.0, 236.0, 184.0, 170.0, 256.0, 257.0, 246.0, 264.0, 236.0, 232.0, 256.0, 251.0, 248.0, 271.0, 214.0, 205.0, 260.0, 256.0, 221.0, 235.0, 255.0, 264.0, 252.0, 261.0, 275.0, 244.0, 229.0, 239.0, 250.0, 275.0, 247.0, 269.0, 270.0, 249.0, 257.0, 262.0, 234.0, 239.0, 256.0, 257.0, 257.0, 247.0, 269.0, 250.0, 270.0, 246.0, 257.0, 262.0, 263.0, 256.0, 252.0, 264.0, 224.0, 235.0, 201.0, 215.0, 257.0, 259.0, 260.0, 270.0, 272.0, 250.0, 254.0, 268.0, 254.0, 268.0, 262.0, 263.0, 266.0, 256.0, 268.0, 248.0, 246.0, 273.0, 239.0, 265.0, 229.0, 244.0, 229.0, 244.0, 267.0, 255.0, 250.0, 266.0, 211.0, 213.0, 254.0, 271.0, 257.0, 253.0, 256.0, 266.0, 275.0, 249.0, 275.0, 250.0, 282.0, 285.0, 261.0, 261.0, 271.0, 256.0, 263.0, 247.0, 241.0, 278.0, 255.0, 252.0, 236.0, 232.0, 258.0, 264.0, 253.0, 266.0, 265.0, 257.0, 262.0, 260.0, 237.0, 228.0, 258.0, 267.0, 263.0, 259.0, 239.0, 229.0, 250.0, 269.0, 256.0, 263.0, 252.0, 261.0, 251.0, 265.0, 271.0, 245.0, 232.0, 236.0, 281.0, 286.0, 275.0, 255.0, 248.0, 234.0, 267.0, 249.0, 238.0, 229.0, 261.0, 252.0, 250.0, 257.0, 219.0, 243.0, 258.0, 261.0, 262.0, 263.0, 266.0, 250.0, 246.0, 267.0, 240.0, 228.0, 212.0, 252.0, 256.0, 248.0, 257.0, 259.0, 246.0, 233.0, 251.0, 265.0, 244.0, 278.0, 260.0, 262.0, 264.0, 261.0, 254.0, 271.0, 254.0, 265.0, 264.0, 255.0, 227.0, 235.0, 248.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851763737865477, "mean_inference_ms": 1.2080972065799291, "mean_action_processing_ms": 0.13256303185673912, "mean_env_wait_ms": 0.8457883628449312, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3097600, "num_agent_steps_trained": 3097600, "num_env_steps_sampled": 1548800, "num_env_steps_trained": 1548800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1548800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3097600, "timers": {"training_iteration_time_ms": 3710.556, "learn_time_ms": 1164.117, "learn_throughput": 10995.457, "synch_weights_time_ms": 11.488}, "counters": {"num_env_steps_sampled": 1548800, "num_env_steps_trained": 1548800, "num_agent_steps_sampled": 3097600, "num_agent_steps_trained": 3097600}, "done": false, "episodes_total": 3872, "training_iteration": 121, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-49", "timestamp": 1666580869, "time_this_iter_s": 3.874966859817505, "time_total_s": 455.57927441596985, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 455.57927441596985, "timesteps_since_restore": 0, "iterations_since_restore": 121, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.6, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 175.4, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.16, "shaped_reward_min": 60, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.52, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.29, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 14.15, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.89, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.49, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.91, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.51, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.95, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.08, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.29, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.42, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.38, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.45, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.91, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.51, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.91, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.51, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 7.523163957366517e-38, "cur_lr": 0.0010000000474974513, "total_loss": 5.595572292804718e-05, "policy_loss": -0.00012787984451279044, "vf_loss": 7.336641311645508, "vf_explained_var": 0.7027114629745483, "kl": 0.001904007513076067, "entropy": 1.0996546745300293, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1561600, "num_env_steps_trained": 1561600, "num_agent_steps_sampled": 3123200, "num_agent_steps_trained": 3123200}, "sampler_results": {"episode_reward_max": 573.0, "episode_reward_min": 180.0, "episode_reward_mean": 506.96, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 288.0}, "policy_reward_mean": {"ppo": 253.48}, "custom_metrics": {"sparse_reward_mean": 175.4, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.16, "shaped_reward_min": 60, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.52, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.29, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 14.15, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.89, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.49, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.91, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.51, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.95, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.08, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.29, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.42, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.38, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.45, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.91, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.51, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.91, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.51, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [416.0, 516.0, 530.0, 522.0, 522.0, 522.0, 525.0, 522.0, 516.0, 519.0, 504.0, 473.0, 473.0, 522.0, 516.0, 424.0, 525.0, 510.0, 522.0, 524.0, 525.0, 567.0, 522.0, 527.0, 510.0, 519.0, 507.0, 468.0, 522.0, 519.0, 522.0, 522.0, 465.0, 525.0, 522.0, 468.0, 519.0, 519.0, 513.0, 516.0, 516.0, 468.0, 567.0, 530.0, 482.0, 516.0, 467.0, 513.0, 507.0, 462.0, 519.0, 525.0, 516.0, 513.0, 468.0, 464.0, 504.0, 516.0, 479.0, 516.0, 522.0, 522.0, 525.0, 525.0, 519.0, 519.0, 462.0, 516.0, 519.0, 519.0, 516.0, 519.0, 479.0, 459.0, 522.0, 516.0, 519.0, 519.0, 522.0, 516.0, 519.0, 513.0, 462.0, 522.0, 522.0, 525.0, 519.0, 519.0, 513.0, 510.0, 573.0, 513.0, 180.0, 519.0, 519.0, 516.0, 530.0, 519.0, 513.0, 527.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [201.0, 215.0, 257.0, 259.0, 260.0, 270.0, 272.0, 250.0, 254.0, 268.0, 254.0, 268.0, 262.0, 263.0, 266.0, 256.0, 268.0, 248.0, 246.0, 273.0, 239.0, 265.0, 229.0, 244.0, 229.0, 244.0, 267.0, 255.0, 250.0, 266.0, 211.0, 213.0, 254.0, 271.0, 257.0, 253.0, 256.0, 266.0, 275.0, 249.0, 275.0, 250.0, 282.0, 285.0, 261.0, 261.0, 271.0, 256.0, 263.0, 247.0, 241.0, 278.0, 255.0, 252.0, 236.0, 232.0, 258.0, 264.0, 253.0, 266.0, 265.0, 257.0, 262.0, 260.0, 237.0, 228.0, 258.0, 267.0, 263.0, 259.0, 239.0, 229.0, 250.0, 269.0, 256.0, 263.0, 252.0, 261.0, 251.0, 265.0, 271.0, 245.0, 232.0, 236.0, 281.0, 286.0, 275.0, 255.0, 248.0, 234.0, 267.0, 249.0, 238.0, 229.0, 261.0, 252.0, 250.0, 257.0, 219.0, 243.0, 258.0, 261.0, 262.0, 263.0, 266.0, 250.0, 246.0, 267.0, 240.0, 228.0, 212.0, 252.0, 256.0, 248.0, 257.0, 259.0, 246.0, 233.0, 251.0, 265.0, 244.0, 278.0, 260.0, 262.0, 264.0, 261.0, 254.0, 271.0, 254.0, 265.0, 264.0, 255.0, 227.0, 235.0, 248.0, 268.0, 254.0, 265.0, 262.0, 257.0, 262.0, 254.0, 254.0, 265.0, 235.0, 244.0, 223.0, 236.0, 267.0, 255.0, 262.0, 254.0, 251.0, 268.0, 256.0, 263.0, 254.0, 268.0, 241.0, 275.0, 247.0, 272.0, 264.0, 249.0, 232.0, 230.0, 262.0, 260.0, 265.0, 257.0, 261.0, 264.0, 267.0, 252.0, 258.0, 261.0, 246.0, 267.0, 266.0, 244.0, 285.0, 288.0, 249.0, 264.0, 91.0, 89.0, 261.0, 258.0, 267.0, 252.0, 255.0, 261.0, 262.0, 268.0, 264.0, 255.0, 234.0, 279.0, 267.0, 260.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851629443359439, "mean_inference_ms": 1.208022696415082, "mean_action_processing_ms": 0.1325572923429858, "mean_env_wait_ms": 0.8454999723140769, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 573.0, "episode_reward_min": 180.0, "episode_reward_mean": 506.96, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 288.0}, "policy_reward_mean": {"ppo": 253.48}, "hist_stats": {"episode_reward": [416.0, 516.0, 530.0, 522.0, 522.0, 522.0, 525.0, 522.0, 516.0, 519.0, 504.0, 473.0, 473.0, 522.0, 516.0, 424.0, 525.0, 510.0, 522.0, 524.0, 525.0, 567.0, 522.0, 527.0, 510.0, 519.0, 507.0, 468.0, 522.0, 519.0, 522.0, 522.0, 465.0, 525.0, 522.0, 468.0, 519.0, 519.0, 513.0, 516.0, 516.0, 468.0, 567.0, 530.0, 482.0, 516.0, 467.0, 513.0, 507.0, 462.0, 519.0, 525.0, 516.0, 513.0, 468.0, 464.0, 504.0, 516.0, 479.0, 516.0, 522.0, 522.0, 525.0, 525.0, 519.0, 519.0, 462.0, 516.0, 519.0, 519.0, 516.0, 519.0, 479.0, 459.0, 522.0, 516.0, 519.0, 519.0, 522.0, 516.0, 519.0, 513.0, 462.0, 522.0, 522.0, 525.0, 519.0, 519.0, 513.0, 510.0, 573.0, 513.0, 180.0, 519.0, 519.0, 516.0, 530.0, 519.0, 513.0, 527.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [201.0, 215.0, 257.0, 259.0, 260.0, 270.0, 272.0, 250.0, 254.0, 268.0, 254.0, 268.0, 262.0, 263.0, 266.0, 256.0, 268.0, 248.0, 246.0, 273.0, 239.0, 265.0, 229.0, 244.0, 229.0, 244.0, 267.0, 255.0, 250.0, 266.0, 211.0, 213.0, 254.0, 271.0, 257.0, 253.0, 256.0, 266.0, 275.0, 249.0, 275.0, 250.0, 282.0, 285.0, 261.0, 261.0, 271.0, 256.0, 263.0, 247.0, 241.0, 278.0, 255.0, 252.0, 236.0, 232.0, 258.0, 264.0, 253.0, 266.0, 265.0, 257.0, 262.0, 260.0, 237.0, 228.0, 258.0, 267.0, 263.0, 259.0, 239.0, 229.0, 250.0, 269.0, 256.0, 263.0, 252.0, 261.0, 251.0, 265.0, 271.0, 245.0, 232.0, 236.0, 281.0, 286.0, 275.0, 255.0, 248.0, 234.0, 267.0, 249.0, 238.0, 229.0, 261.0, 252.0, 250.0, 257.0, 219.0, 243.0, 258.0, 261.0, 262.0, 263.0, 266.0, 250.0, 246.0, 267.0, 240.0, 228.0, 212.0, 252.0, 256.0, 248.0, 257.0, 259.0, 246.0, 233.0, 251.0, 265.0, 244.0, 278.0, 260.0, 262.0, 264.0, 261.0, 254.0, 271.0, 254.0, 265.0, 264.0, 255.0, 227.0, 235.0, 248.0, 268.0, 254.0, 265.0, 262.0, 257.0, 262.0, 254.0, 254.0, 265.0, 235.0, 244.0, 223.0, 236.0, 267.0, 255.0, 262.0, 254.0, 251.0, 268.0, 256.0, 263.0, 254.0, 268.0, 241.0, 275.0, 247.0, 272.0, 264.0, 249.0, 232.0, 230.0, 262.0, 260.0, 265.0, 257.0, 261.0, 264.0, 267.0, 252.0, 258.0, 261.0, 246.0, 267.0, 266.0, 244.0, 285.0, 288.0, 249.0, 264.0, 91.0, 89.0, 261.0, 258.0, 267.0, 252.0, 255.0, 261.0, 262.0, 268.0, 264.0, 255.0, 234.0, 279.0, 267.0, 260.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851629443359439, "mean_inference_ms": 1.208022696415082, "mean_action_processing_ms": 0.1325572923429858, "mean_env_wait_ms": 0.8454999723140769, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3123200, "num_agent_steps_trained": 3123200, "num_env_steps_sampled": 1561600, "num_env_steps_trained": 1561600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1561600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3123200, "timers": {"training_iteration_time_ms": 3704.116, "learn_time_ms": 1167.28, "learn_throughput": 10965.667, "synch_weights_time_ms": 11.523}, "counters": {"num_env_steps_sampled": 1561600, "num_env_steps_trained": 1561600, "num_agent_steps_sampled": 3123200, "num_agent_steps_trained": 3123200}, "done": false, "episodes_total": 3904, "training_iteration": 122, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-53", "timestamp": 1666580873, "time_this_iter_s": 3.687739849090576, "time_total_s": 459.2670142650604, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 459.2670142650604, "timesteps_since_restore": 0, "iterations_since_restore": 122, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.883333333333333, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 176.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.11, "shaped_reward_min": 60, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.65, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.34, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.28, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 14.92, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.57, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.96, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 14.48, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.92, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.31, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.42, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.43, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.5, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.4, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.45, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.96, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 14.48, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.96, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 14.48, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.7615819786832586e-38, "cur_lr": 0.0010000000474974513, "total_loss": -0.0018413320649415255, "policy_loss": -0.0020147243048995733, "vf_loss": 7.221100807189941, "vf_explained_var": 0.7078213691711426, "kl": 0.0020635072141885757, "entropy": 1.0974304676055908, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1574400, "num_env_steps_trained": 1574400, "num_agent_steps_sampled": 3148800, "num_agent_steps_trained": 3148800}, "sampler_results": {"episode_reward_max": 573.0, "episode_reward_min": 180.0, "episode_reward_mean": 508.51, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 290.0}, "policy_reward_mean": {"ppo": 254.255}, "custom_metrics": {"sparse_reward_mean": 176.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.11, "shaped_reward_min": 60, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.65, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.34, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.28, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 14.92, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.57, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.96, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 14.48, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.92, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.31, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.42, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.43, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.5, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.4, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.45, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.96, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 14.48, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.96, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 14.48, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [465.0, 525.0, 522.0, 468.0, 519.0, 519.0, 513.0, 516.0, 516.0, 468.0, 567.0, 530.0, 482.0, 516.0, 467.0, 513.0, 507.0, 462.0, 519.0, 525.0, 516.0, 513.0, 468.0, 464.0, 504.0, 516.0, 479.0, 516.0, 522.0, 522.0, 525.0, 525.0, 519.0, 519.0, 462.0, 516.0, 519.0, 519.0, 516.0, 519.0, 479.0, 459.0, 522.0, 516.0, 519.0, 519.0, 522.0, 516.0, 519.0, 513.0, 462.0, 522.0, 522.0, 525.0, 519.0, 519.0, 513.0, 510.0, 573.0, 513.0, 180.0, 519.0, 519.0, 516.0, 530.0, 519.0, 513.0, 527.0, 519.0, 527.0, 456.0, 446.0, 525.0, 507.0, 522.0, 522.0, 519.0, 522.0, 519.0, 519.0, 519.0, 513.0, 516.0, 510.0, 522.0, 522.0, 525.0, 525.0, 513.0, 513.0, 516.0, 530.0, 519.0, 516.0, 573.0, 525.0, 468.0, 519.0, 516.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [237.0, 228.0, 258.0, 267.0, 263.0, 259.0, 239.0, 229.0, 250.0, 269.0, 256.0, 263.0, 252.0, 261.0, 251.0, 265.0, 271.0, 245.0, 232.0, 236.0, 281.0, 286.0, 275.0, 255.0, 248.0, 234.0, 267.0, 249.0, 238.0, 229.0, 261.0, 252.0, 250.0, 257.0, 219.0, 243.0, 258.0, 261.0, 262.0, 263.0, 266.0, 250.0, 246.0, 267.0, 240.0, 228.0, 212.0, 252.0, 256.0, 248.0, 257.0, 259.0, 246.0, 233.0, 251.0, 265.0, 244.0, 278.0, 260.0, 262.0, 264.0, 261.0, 254.0, 271.0, 254.0, 265.0, 264.0, 255.0, 227.0, 235.0, 248.0, 268.0, 254.0, 265.0, 262.0, 257.0, 262.0, 254.0, 254.0, 265.0, 235.0, 244.0, 223.0, 236.0, 267.0, 255.0, 262.0, 254.0, 251.0, 268.0, 256.0, 263.0, 254.0, 268.0, 241.0, 275.0, 247.0, 272.0, 264.0, 249.0, 232.0, 230.0, 262.0, 260.0, 265.0, 257.0, 261.0, 264.0, 267.0, 252.0, 258.0, 261.0, 246.0, 267.0, 266.0, 244.0, 285.0, 288.0, 249.0, 264.0, 91.0, 89.0, 261.0, 258.0, 267.0, 252.0, 255.0, 261.0, 262.0, 268.0, 264.0, 255.0, 234.0, 279.0, 267.0, 260.0, 259.0, 260.0, 269.0, 258.0, 237.0, 219.0, 226.0, 220.0, 263.0, 262.0, 251.0, 256.0, 270.0, 252.0, 252.0, 270.0, 262.0, 257.0, 259.0, 263.0, 261.0, 258.0, 275.0, 244.0, 252.0, 267.0, 252.0, 261.0, 264.0, 252.0, 250.0, 260.0, 246.0, 276.0, 264.0, 258.0, 259.0, 266.0, 251.0, 274.0, 259.0, 254.0, 253.0, 260.0, 259.0, 257.0, 269.0, 261.0, 257.0, 262.0, 266.0, 250.0, 290.0, 283.0, 269.0, 256.0, 220.0, 248.0, 264.0, 255.0, 248.0, 268.0, 259.0, 266.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851096061071673, "mean_inference_ms": 1.2079490216862163, "mean_action_processing_ms": 0.13255382184265097, "mean_env_wait_ms": 0.8452099267656442, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 573.0, "episode_reward_min": 180.0, "episode_reward_mean": 508.51, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 290.0}, "policy_reward_mean": {"ppo": 254.255}, "hist_stats": {"episode_reward": [465.0, 525.0, 522.0, 468.0, 519.0, 519.0, 513.0, 516.0, 516.0, 468.0, 567.0, 530.0, 482.0, 516.0, 467.0, 513.0, 507.0, 462.0, 519.0, 525.0, 516.0, 513.0, 468.0, 464.0, 504.0, 516.0, 479.0, 516.0, 522.0, 522.0, 525.0, 525.0, 519.0, 519.0, 462.0, 516.0, 519.0, 519.0, 516.0, 519.0, 479.0, 459.0, 522.0, 516.0, 519.0, 519.0, 522.0, 516.0, 519.0, 513.0, 462.0, 522.0, 522.0, 525.0, 519.0, 519.0, 513.0, 510.0, 573.0, 513.0, 180.0, 519.0, 519.0, 516.0, 530.0, 519.0, 513.0, 527.0, 519.0, 527.0, 456.0, 446.0, 525.0, 507.0, 522.0, 522.0, 519.0, 522.0, 519.0, 519.0, 519.0, 513.0, 516.0, 510.0, 522.0, 522.0, 525.0, 525.0, 513.0, 513.0, 516.0, 530.0, 519.0, 516.0, 573.0, 525.0, 468.0, 519.0, 516.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [237.0, 228.0, 258.0, 267.0, 263.0, 259.0, 239.0, 229.0, 250.0, 269.0, 256.0, 263.0, 252.0, 261.0, 251.0, 265.0, 271.0, 245.0, 232.0, 236.0, 281.0, 286.0, 275.0, 255.0, 248.0, 234.0, 267.0, 249.0, 238.0, 229.0, 261.0, 252.0, 250.0, 257.0, 219.0, 243.0, 258.0, 261.0, 262.0, 263.0, 266.0, 250.0, 246.0, 267.0, 240.0, 228.0, 212.0, 252.0, 256.0, 248.0, 257.0, 259.0, 246.0, 233.0, 251.0, 265.0, 244.0, 278.0, 260.0, 262.0, 264.0, 261.0, 254.0, 271.0, 254.0, 265.0, 264.0, 255.0, 227.0, 235.0, 248.0, 268.0, 254.0, 265.0, 262.0, 257.0, 262.0, 254.0, 254.0, 265.0, 235.0, 244.0, 223.0, 236.0, 267.0, 255.0, 262.0, 254.0, 251.0, 268.0, 256.0, 263.0, 254.0, 268.0, 241.0, 275.0, 247.0, 272.0, 264.0, 249.0, 232.0, 230.0, 262.0, 260.0, 265.0, 257.0, 261.0, 264.0, 267.0, 252.0, 258.0, 261.0, 246.0, 267.0, 266.0, 244.0, 285.0, 288.0, 249.0, 264.0, 91.0, 89.0, 261.0, 258.0, 267.0, 252.0, 255.0, 261.0, 262.0, 268.0, 264.0, 255.0, 234.0, 279.0, 267.0, 260.0, 259.0, 260.0, 269.0, 258.0, 237.0, 219.0, 226.0, 220.0, 263.0, 262.0, 251.0, 256.0, 270.0, 252.0, 252.0, 270.0, 262.0, 257.0, 259.0, 263.0, 261.0, 258.0, 275.0, 244.0, 252.0, 267.0, 252.0, 261.0, 264.0, 252.0, 250.0, 260.0, 246.0, 276.0, 264.0, 258.0, 259.0, 266.0, 251.0, 274.0, 259.0, 254.0, 253.0, 260.0, 259.0, 257.0, 269.0, 261.0, 257.0, 262.0, 266.0, 250.0, 290.0, 283.0, 269.0, 256.0, 220.0, 248.0, 264.0, 255.0, 248.0, 268.0, 259.0, 266.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851096061071673, "mean_inference_ms": 1.2079490216862163, "mean_action_processing_ms": 0.13255382184265097, "mean_env_wait_ms": 0.8452099267656442, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3148800, "num_agent_steps_trained": 3148800, "num_env_steps_sampled": 1574400, "num_env_steps_trained": 1574400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1574400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3148800, "timers": {"training_iteration_time_ms": 3706.167, "learn_time_ms": 1173.127, "learn_throughput": 10911.01, "synch_weights_time_ms": 10.047}, "counters": {"num_env_steps_sampled": 1574400, "num_env_steps_trained": 1574400, "num_agent_steps_sampled": 3148800, "num_agent_steps_trained": 3148800}, "done": false, "episodes_total": 3936, "training_iteration": 123, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-57", "timestamp": 1666580877, "time_this_iter_s": 3.806124210357666, "time_total_s": 463.0731384754181, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 463.0731384754181, "timesteps_since_restore": 0, "iterations_since_restore": 123, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.8, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 176.6, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.68, "shaped_reward_min": 60, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.72, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.41, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.22, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 14.99, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.98, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 14.59, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.23, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.51, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.12, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.43, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.4, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 13.98, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 14.59, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.98, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 14.59, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.8807909893416293e-38, "cur_lr": 0.0010000000474974513, "total_loss": -0.003325252328068018, "policy_loss": -0.003511276328936219, "vf_loss": 7.359045028686523, "vf_explained_var": 0.703230619430542, "kl": 0.001878553070127964, "entropy": 1.0997591018676758, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1587200, "num_env_steps_trained": 1587200, "num_agent_steps_sampled": 3174400, "num_agent_steps_trained": 3174400}, "sampler_results": {"episode_reward_max": 573.0, "episode_reward_min": 180.0, "episode_reward_mean": 509.88, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 290.0}, "policy_reward_mean": {"ppo": 254.94}, "custom_metrics": {"sparse_reward_mean": 176.6, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.68, "shaped_reward_min": 60, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.72, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.41, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.22, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 14.99, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.98, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 14.59, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.23, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.51, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.12, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.43, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.4, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 13.98, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 14.59, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.98, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 14.59, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 519.0, 462.0, 516.0, 519.0, 519.0, 516.0, 519.0, 479.0, 459.0, 522.0, 516.0, 519.0, 519.0, 522.0, 516.0, 519.0, 513.0, 462.0, 522.0, 522.0, 525.0, 519.0, 519.0, 513.0, 510.0, 573.0, 513.0, 180.0, 519.0, 519.0, 516.0, 530.0, 519.0, 513.0, 527.0, 519.0, 527.0, 456.0, 446.0, 525.0, 507.0, 522.0, 522.0, 519.0, 522.0, 519.0, 519.0, 519.0, 513.0, 516.0, 510.0, 522.0, 522.0, 525.0, 525.0, 513.0, 513.0, 516.0, 530.0, 519.0, 516.0, 573.0, 525.0, 468.0, 519.0, 516.0, 525.0, 516.0, 525.0, 467.0, 519.0, 516.0, 516.0, 522.0, 519.0, 524.0, 522.0, 470.0, 522.0, 516.0, 513.0, 522.0, 522.0, 522.0, 573.0, 525.0, 519.0, 405.0, 525.0, 522.0, 476.0, 525.0, 519.0, 525.0, 516.0, 453.0, 467.0, 519.0, 524.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 265.0, 264.0, 255.0, 227.0, 235.0, 248.0, 268.0, 254.0, 265.0, 262.0, 257.0, 262.0, 254.0, 254.0, 265.0, 235.0, 244.0, 223.0, 236.0, 267.0, 255.0, 262.0, 254.0, 251.0, 268.0, 256.0, 263.0, 254.0, 268.0, 241.0, 275.0, 247.0, 272.0, 264.0, 249.0, 232.0, 230.0, 262.0, 260.0, 265.0, 257.0, 261.0, 264.0, 267.0, 252.0, 258.0, 261.0, 246.0, 267.0, 266.0, 244.0, 285.0, 288.0, 249.0, 264.0, 91.0, 89.0, 261.0, 258.0, 267.0, 252.0, 255.0, 261.0, 262.0, 268.0, 264.0, 255.0, 234.0, 279.0, 267.0, 260.0, 259.0, 260.0, 269.0, 258.0, 237.0, 219.0, 226.0, 220.0, 263.0, 262.0, 251.0, 256.0, 270.0, 252.0, 252.0, 270.0, 262.0, 257.0, 259.0, 263.0, 261.0, 258.0, 275.0, 244.0, 252.0, 267.0, 252.0, 261.0, 264.0, 252.0, 250.0, 260.0, 246.0, 276.0, 264.0, 258.0, 259.0, 266.0, 251.0, 274.0, 259.0, 254.0, 253.0, 260.0, 259.0, 257.0, 269.0, 261.0, 257.0, 262.0, 266.0, 250.0, 290.0, 283.0, 269.0, 256.0, 220.0, 248.0, 264.0, 255.0, 248.0, 268.0, 259.0, 266.0, 249.0, 267.0, 253.0, 272.0, 229.0, 238.0, 269.0, 250.0, 265.0, 251.0, 256.0, 260.0, 240.0, 282.0, 254.0, 265.0, 262.0, 262.0, 262.0, 260.0, 233.0, 237.0, 264.0, 258.0, 255.0, 261.0, 249.0, 264.0, 258.0, 264.0, 255.0, 267.0, 268.0, 254.0, 288.0, 285.0, 265.0, 260.0, 252.0, 267.0, 210.0, 195.0, 257.0, 268.0, 244.0, 278.0, 233.0, 243.0, 256.0, 269.0, 251.0, 268.0, 260.0, 265.0, 255.0, 261.0, 245.0, 208.0, 219.0, 248.0, 254.0, 265.0, 271.0, 253.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.685017408158804, "mean_inference_ms": 1.2078514184124396, "mean_action_processing_ms": 0.13254463761073323, "mean_env_wait_ms": 0.8448968158681478, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 573.0, "episode_reward_min": 180.0, "episode_reward_mean": 509.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 290.0}, "policy_reward_mean": {"ppo": 254.94}, "hist_stats": {"episode_reward": [519.0, 519.0, 462.0, 516.0, 519.0, 519.0, 516.0, 519.0, 479.0, 459.0, 522.0, 516.0, 519.0, 519.0, 522.0, 516.0, 519.0, 513.0, 462.0, 522.0, 522.0, 525.0, 519.0, 519.0, 513.0, 510.0, 573.0, 513.0, 180.0, 519.0, 519.0, 516.0, 530.0, 519.0, 513.0, 527.0, 519.0, 527.0, 456.0, 446.0, 525.0, 507.0, 522.0, 522.0, 519.0, 522.0, 519.0, 519.0, 519.0, 513.0, 516.0, 510.0, 522.0, 522.0, 525.0, 525.0, 513.0, 513.0, 516.0, 530.0, 519.0, 516.0, 573.0, 525.0, 468.0, 519.0, 516.0, 525.0, 516.0, 525.0, 467.0, 519.0, 516.0, 516.0, 522.0, 519.0, 524.0, 522.0, 470.0, 522.0, 516.0, 513.0, 522.0, 522.0, 522.0, 573.0, 525.0, 519.0, 405.0, 525.0, 522.0, 476.0, 525.0, 519.0, 525.0, 516.0, 453.0, 467.0, 519.0, 524.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 265.0, 264.0, 255.0, 227.0, 235.0, 248.0, 268.0, 254.0, 265.0, 262.0, 257.0, 262.0, 254.0, 254.0, 265.0, 235.0, 244.0, 223.0, 236.0, 267.0, 255.0, 262.0, 254.0, 251.0, 268.0, 256.0, 263.0, 254.0, 268.0, 241.0, 275.0, 247.0, 272.0, 264.0, 249.0, 232.0, 230.0, 262.0, 260.0, 265.0, 257.0, 261.0, 264.0, 267.0, 252.0, 258.0, 261.0, 246.0, 267.0, 266.0, 244.0, 285.0, 288.0, 249.0, 264.0, 91.0, 89.0, 261.0, 258.0, 267.0, 252.0, 255.0, 261.0, 262.0, 268.0, 264.0, 255.0, 234.0, 279.0, 267.0, 260.0, 259.0, 260.0, 269.0, 258.0, 237.0, 219.0, 226.0, 220.0, 263.0, 262.0, 251.0, 256.0, 270.0, 252.0, 252.0, 270.0, 262.0, 257.0, 259.0, 263.0, 261.0, 258.0, 275.0, 244.0, 252.0, 267.0, 252.0, 261.0, 264.0, 252.0, 250.0, 260.0, 246.0, 276.0, 264.0, 258.0, 259.0, 266.0, 251.0, 274.0, 259.0, 254.0, 253.0, 260.0, 259.0, 257.0, 269.0, 261.0, 257.0, 262.0, 266.0, 250.0, 290.0, 283.0, 269.0, 256.0, 220.0, 248.0, 264.0, 255.0, 248.0, 268.0, 259.0, 266.0, 249.0, 267.0, 253.0, 272.0, 229.0, 238.0, 269.0, 250.0, 265.0, 251.0, 256.0, 260.0, 240.0, 282.0, 254.0, 265.0, 262.0, 262.0, 262.0, 260.0, 233.0, 237.0, 264.0, 258.0, 255.0, 261.0, 249.0, 264.0, 258.0, 264.0, 255.0, 267.0, 268.0, 254.0, 288.0, 285.0, 265.0, 260.0, 252.0, 267.0, 210.0, 195.0, 257.0, 268.0, 244.0, 278.0, 233.0, 243.0, 256.0, 269.0, 251.0, 268.0, 260.0, 265.0, 255.0, 261.0, 245.0, 208.0, 219.0, 248.0, 254.0, 265.0, 271.0, 253.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.685017408158804, "mean_inference_ms": 1.2078514184124396, "mean_action_processing_ms": 0.13254463761073323, "mean_env_wait_ms": 0.8448968158681478, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3174400, "num_agent_steps_trained": 3174400, "num_env_steps_sampled": 1587200, "num_env_steps_trained": 1587200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1587200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3174400, "timers": {"training_iteration_time_ms": 3710.841, "learn_time_ms": 1171.544, "learn_throughput": 10925.753, "synch_weights_time_ms": 9.167}, "counters": {"num_env_steps_sampled": 1587200, "num_env_steps_trained": 1587200, "num_agent_steps_sampled": 3174400, "num_agent_steps_trained": 3174400}, "done": false, "episodes_total": 3968, "training_iteration": 124, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-01", "timestamp": 1666580881, "time_this_iter_s": 3.731330394744873, "time_total_s": 466.80446887016296, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 466.80446887016296, "timesteps_since_restore": 0, "iterations_since_restore": 124, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.499999999999996, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 177.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 157.79, "shaped_reward_min": 125, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.65, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.63, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.21, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.18, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.92, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 14.88, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.02, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.39, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.39, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.62, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.4, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.59, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.35, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.92, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 14.88, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.92, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 14.88, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00022825569612905383, "policy_loss": 3.9655715227127075e-05, "vf_loss": 7.364105224609375, "vf_explained_var": 0.7113704681396484, "kl": 0.002136504976078868, "entropy": 1.0956169366836548, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1600000, "num_env_steps_trained": 1600000, "num_agent_steps_sampled": 3200000, "num_agent_steps_trained": 3200000}, "sampler_results": {"episode_reward_max": 576.0, "episode_reward_min": 405.0, "episode_reward_mean": 512.19, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 195.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 256.095}, "custom_metrics": {"sparse_reward_mean": 177.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 157.79, "shaped_reward_min": 125, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.65, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.63, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.21, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.18, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.92, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 14.88, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.02, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.39, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.39, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.62, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.4, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.59, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.35, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.92, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 14.88, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.92, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 14.88, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 519.0, 513.0, 527.0, 519.0, 527.0, 456.0, 446.0, 525.0, 507.0, 522.0, 522.0, 519.0, 522.0, 519.0, 519.0, 519.0, 513.0, 516.0, 510.0, 522.0, 522.0, 525.0, 525.0, 513.0, 513.0, 516.0, 530.0, 519.0, 516.0, 573.0, 525.0, 468.0, 519.0, 516.0, 525.0, 516.0, 525.0, 467.0, 519.0, 516.0, 516.0, 522.0, 519.0, 524.0, 522.0, 470.0, 522.0, 516.0, 513.0, 522.0, 522.0, 522.0, 573.0, 525.0, 519.0, 405.0, 525.0, 522.0, 476.0, 525.0, 519.0, 525.0, 516.0, 453.0, 467.0, 519.0, 524.0, 513.0, 468.0, 516.0, 519.0, 507.0, 519.0, 408.0, 530.0, 519.0, 519.0, 519.0, 522.0, 573.0, 573.0, 513.0, 522.0, 462.0, 519.0, 519.0, 468.0, 573.0, 525.0, 522.0, 530.0, 436.0, 519.0, 513.0, 576.0, 405.0, 468.0, 522.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 268.0, 264.0, 255.0, 234.0, 279.0, 267.0, 260.0, 259.0, 260.0, 269.0, 258.0, 237.0, 219.0, 226.0, 220.0, 263.0, 262.0, 251.0, 256.0, 270.0, 252.0, 252.0, 270.0, 262.0, 257.0, 259.0, 263.0, 261.0, 258.0, 275.0, 244.0, 252.0, 267.0, 252.0, 261.0, 264.0, 252.0, 250.0, 260.0, 246.0, 276.0, 264.0, 258.0, 259.0, 266.0, 251.0, 274.0, 259.0, 254.0, 253.0, 260.0, 259.0, 257.0, 269.0, 261.0, 257.0, 262.0, 266.0, 250.0, 290.0, 283.0, 269.0, 256.0, 220.0, 248.0, 264.0, 255.0, 248.0, 268.0, 259.0, 266.0, 249.0, 267.0, 253.0, 272.0, 229.0, 238.0, 269.0, 250.0, 265.0, 251.0, 256.0, 260.0, 240.0, 282.0, 254.0, 265.0, 262.0, 262.0, 262.0, 260.0, 233.0, 237.0, 264.0, 258.0, 255.0, 261.0, 249.0, 264.0, 258.0, 264.0, 255.0, 267.0, 268.0, 254.0, 288.0, 285.0, 265.0, 260.0, 252.0, 267.0, 210.0, 195.0, 257.0, 268.0, 244.0, 278.0, 233.0, 243.0, 256.0, 269.0, 251.0, 268.0, 260.0, 265.0, 255.0, 261.0, 245.0, 208.0, 219.0, 248.0, 254.0, 265.0, 271.0, 253.0, 254.0, 259.0, 224.0, 244.0, 252.0, 264.0, 262.0, 257.0, 256.0, 251.0, 265.0, 254.0, 202.0, 206.0, 275.0, 255.0, 271.0, 248.0, 264.0, 255.0, 257.0, 262.0, 273.0, 249.0, 298.0, 275.0, 294.0, 279.0, 243.0, 270.0, 257.0, 265.0, 221.0, 241.0, 252.0, 267.0, 260.0, 259.0, 224.0, 244.0, 289.0, 284.0, 258.0, 267.0, 266.0, 256.0, 267.0, 263.0, 220.0, 216.0, 251.0, 268.0, 254.0, 259.0, 288.0, 288.0, 207.0, 198.0, 240.0, 228.0, 267.0, 255.0, 264.0, 255.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6849697162845529, "mean_inference_ms": 1.2077556939870815, "mean_action_processing_ms": 0.1325400427025592, "mean_env_wait_ms": 0.8445976040558727, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 576.0, "episode_reward_min": 405.0, "episode_reward_mean": 512.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 195.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 256.095}, "hist_stats": {"episode_reward": [530.0, 519.0, 513.0, 527.0, 519.0, 527.0, 456.0, 446.0, 525.0, 507.0, 522.0, 522.0, 519.0, 522.0, 519.0, 519.0, 519.0, 513.0, 516.0, 510.0, 522.0, 522.0, 525.0, 525.0, 513.0, 513.0, 516.0, 530.0, 519.0, 516.0, 573.0, 525.0, 468.0, 519.0, 516.0, 525.0, 516.0, 525.0, 467.0, 519.0, 516.0, 516.0, 522.0, 519.0, 524.0, 522.0, 470.0, 522.0, 516.0, 513.0, 522.0, 522.0, 522.0, 573.0, 525.0, 519.0, 405.0, 525.0, 522.0, 476.0, 525.0, 519.0, 525.0, 516.0, 453.0, 467.0, 519.0, 524.0, 513.0, 468.0, 516.0, 519.0, 507.0, 519.0, 408.0, 530.0, 519.0, 519.0, 519.0, 522.0, 573.0, 573.0, 513.0, 522.0, 462.0, 519.0, 519.0, 468.0, 573.0, 525.0, 522.0, 530.0, 436.0, 519.0, 513.0, 576.0, 405.0, 468.0, 522.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 268.0, 264.0, 255.0, 234.0, 279.0, 267.0, 260.0, 259.0, 260.0, 269.0, 258.0, 237.0, 219.0, 226.0, 220.0, 263.0, 262.0, 251.0, 256.0, 270.0, 252.0, 252.0, 270.0, 262.0, 257.0, 259.0, 263.0, 261.0, 258.0, 275.0, 244.0, 252.0, 267.0, 252.0, 261.0, 264.0, 252.0, 250.0, 260.0, 246.0, 276.0, 264.0, 258.0, 259.0, 266.0, 251.0, 274.0, 259.0, 254.0, 253.0, 260.0, 259.0, 257.0, 269.0, 261.0, 257.0, 262.0, 266.0, 250.0, 290.0, 283.0, 269.0, 256.0, 220.0, 248.0, 264.0, 255.0, 248.0, 268.0, 259.0, 266.0, 249.0, 267.0, 253.0, 272.0, 229.0, 238.0, 269.0, 250.0, 265.0, 251.0, 256.0, 260.0, 240.0, 282.0, 254.0, 265.0, 262.0, 262.0, 262.0, 260.0, 233.0, 237.0, 264.0, 258.0, 255.0, 261.0, 249.0, 264.0, 258.0, 264.0, 255.0, 267.0, 268.0, 254.0, 288.0, 285.0, 265.0, 260.0, 252.0, 267.0, 210.0, 195.0, 257.0, 268.0, 244.0, 278.0, 233.0, 243.0, 256.0, 269.0, 251.0, 268.0, 260.0, 265.0, 255.0, 261.0, 245.0, 208.0, 219.0, 248.0, 254.0, 265.0, 271.0, 253.0, 254.0, 259.0, 224.0, 244.0, 252.0, 264.0, 262.0, 257.0, 256.0, 251.0, 265.0, 254.0, 202.0, 206.0, 275.0, 255.0, 271.0, 248.0, 264.0, 255.0, 257.0, 262.0, 273.0, 249.0, 298.0, 275.0, 294.0, 279.0, 243.0, 270.0, 257.0, 265.0, 221.0, 241.0, 252.0, 267.0, 260.0, 259.0, 224.0, 244.0, 289.0, 284.0, 258.0, 267.0, 266.0, 256.0, 267.0, 263.0, 220.0, 216.0, 251.0, 268.0, 254.0, 259.0, 288.0, 288.0, 207.0, 198.0, 240.0, 228.0, 267.0, 255.0, 264.0, 255.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6849697162845529, "mean_inference_ms": 1.2077556939870815, "mean_action_processing_ms": 0.1325400427025592, "mean_env_wait_ms": 0.8445976040558727, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3200000, "num_agent_steps_trained": 3200000, "num_env_steps_sampled": 1600000, "num_env_steps_trained": 1600000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1600000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3200000, "timers": {"training_iteration_time_ms": 3695.721, "learn_time_ms": 1159.375, "learn_throughput": 11040.435, "synch_weights_time_ms": 9.116}, "counters": {"num_env_steps_sampled": 1600000, "num_env_steps_trained": 1600000, "num_agent_steps_sampled": 3200000, "num_agent_steps_trained": 3200000}, "done": false, "episodes_total": 4000, "training_iteration": 125, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-05", "timestamp": 1666580885, "time_this_iter_s": 3.5378715991973877, "time_total_s": 470.34234046936035, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 470.34234046936035, "timesteps_since_restore": 0, "iterations_since_restore": 125, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.800000000000004, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 175.0, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.43, "shaped_reward_min": 60, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.41, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.62, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.95, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.17, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.43, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 13.71, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 14.96, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.3, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.55, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.37, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.52, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.3, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.71, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 14.96, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.71, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 14.96, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008826033445075154, "policy_loss": -0.00106078723911196, "vf_loss": 7.301456928253174, "vf_explained_var": 0.7096636295318604, "kl": 0.0017461793031543493, "entropy": 1.1039214134216309, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1612800, "num_env_steps_trained": 1612800, "num_agent_steps_sampled": 3225600, "num_agent_steps_trained": 3225600}, "sampler_results": {"episode_reward_max": 576.0, "episode_reward_min": 180.0, "episode_reward_mean": 506.43, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 253.215}, "custom_metrics": {"sparse_reward_mean": 175.0, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.43, "shaped_reward_min": 60, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.41, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.62, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.95, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.17, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.43, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 13.71, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 14.96, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.3, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.55, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.37, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.52, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.3, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.71, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 14.96, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.71, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 14.96, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [468.0, 519.0, 516.0, 525.0, 516.0, 525.0, 467.0, 519.0, 516.0, 516.0, 522.0, 519.0, 524.0, 522.0, 470.0, 522.0, 516.0, 513.0, 522.0, 522.0, 522.0, 573.0, 525.0, 519.0, 405.0, 525.0, 522.0, 476.0, 525.0, 519.0, 525.0, 516.0, 453.0, 467.0, 519.0, 524.0, 513.0, 468.0, 516.0, 519.0, 507.0, 519.0, 408.0, 530.0, 519.0, 519.0, 519.0, 522.0, 573.0, 573.0, 513.0, 522.0, 462.0, 519.0, 519.0, 468.0, 573.0, 525.0, 522.0, 530.0, 436.0, 519.0, 513.0, 576.0, 405.0, 468.0, 522.0, 519.0, 516.0, 519.0, 519.0, 522.0, 404.0, 522.0, 524.0, 519.0, 522.0, 525.0, 525.0, 522.0, 180.0, 519.0, 516.0, 522.0, 456.0, 464.0, 464.0, 525.0, 522.0, 522.0, 516.0, 525.0, 525.0, 525.0, 522.0, 504.0, 513.0, 482.0, 513.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [220.0, 248.0, 264.0, 255.0, 248.0, 268.0, 259.0, 266.0, 249.0, 267.0, 253.0, 272.0, 229.0, 238.0, 269.0, 250.0, 265.0, 251.0, 256.0, 260.0, 240.0, 282.0, 254.0, 265.0, 262.0, 262.0, 262.0, 260.0, 233.0, 237.0, 264.0, 258.0, 255.0, 261.0, 249.0, 264.0, 258.0, 264.0, 255.0, 267.0, 268.0, 254.0, 288.0, 285.0, 265.0, 260.0, 252.0, 267.0, 210.0, 195.0, 257.0, 268.0, 244.0, 278.0, 233.0, 243.0, 256.0, 269.0, 251.0, 268.0, 260.0, 265.0, 255.0, 261.0, 245.0, 208.0, 219.0, 248.0, 254.0, 265.0, 271.0, 253.0, 254.0, 259.0, 224.0, 244.0, 252.0, 264.0, 262.0, 257.0, 256.0, 251.0, 265.0, 254.0, 202.0, 206.0, 275.0, 255.0, 271.0, 248.0, 264.0, 255.0, 257.0, 262.0, 273.0, 249.0, 298.0, 275.0, 294.0, 279.0, 243.0, 270.0, 257.0, 265.0, 221.0, 241.0, 252.0, 267.0, 260.0, 259.0, 224.0, 244.0, 289.0, 284.0, 258.0, 267.0, 266.0, 256.0, 267.0, 263.0, 220.0, 216.0, 251.0, 268.0, 254.0, 259.0, 288.0, 288.0, 207.0, 198.0, 240.0, 228.0, 267.0, 255.0, 264.0, 255.0, 254.0, 262.0, 254.0, 265.0, 261.0, 258.0, 249.0, 273.0, 203.0, 201.0, 261.0, 261.0, 269.0, 255.0, 261.0, 258.0, 255.0, 267.0, 260.0, 265.0, 269.0, 256.0, 264.0, 258.0, 83.0, 97.0, 254.0, 265.0, 268.0, 248.0, 254.0, 268.0, 230.0, 226.0, 213.0, 251.0, 228.0, 236.0, 277.0, 248.0, 255.0, 267.0, 265.0, 257.0, 261.0, 255.0, 258.0, 267.0, 262.0, 263.0, 249.0, 276.0, 268.0, 254.0, 257.0, 247.0, 243.0, 270.0, 236.0, 246.0, 257.0, 256.0, 260.0, 259.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6849329471543086, "mean_inference_ms": 1.2076212947173204, "mean_action_processing_ms": 0.13253184470704837, "mean_env_wait_ms": 0.8442733889150424, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 576.0, "episode_reward_min": 180.0, "episode_reward_mean": 506.43, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 253.215}, "hist_stats": {"episode_reward": [468.0, 519.0, 516.0, 525.0, 516.0, 525.0, 467.0, 519.0, 516.0, 516.0, 522.0, 519.0, 524.0, 522.0, 470.0, 522.0, 516.0, 513.0, 522.0, 522.0, 522.0, 573.0, 525.0, 519.0, 405.0, 525.0, 522.0, 476.0, 525.0, 519.0, 525.0, 516.0, 453.0, 467.0, 519.0, 524.0, 513.0, 468.0, 516.0, 519.0, 507.0, 519.0, 408.0, 530.0, 519.0, 519.0, 519.0, 522.0, 573.0, 573.0, 513.0, 522.0, 462.0, 519.0, 519.0, 468.0, 573.0, 525.0, 522.0, 530.0, 436.0, 519.0, 513.0, 576.0, 405.0, 468.0, 522.0, 519.0, 516.0, 519.0, 519.0, 522.0, 404.0, 522.0, 524.0, 519.0, 522.0, 525.0, 525.0, 522.0, 180.0, 519.0, 516.0, 522.0, 456.0, 464.0, 464.0, 525.0, 522.0, 522.0, 516.0, 525.0, 525.0, 525.0, 522.0, 504.0, 513.0, 482.0, 513.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [220.0, 248.0, 264.0, 255.0, 248.0, 268.0, 259.0, 266.0, 249.0, 267.0, 253.0, 272.0, 229.0, 238.0, 269.0, 250.0, 265.0, 251.0, 256.0, 260.0, 240.0, 282.0, 254.0, 265.0, 262.0, 262.0, 262.0, 260.0, 233.0, 237.0, 264.0, 258.0, 255.0, 261.0, 249.0, 264.0, 258.0, 264.0, 255.0, 267.0, 268.0, 254.0, 288.0, 285.0, 265.0, 260.0, 252.0, 267.0, 210.0, 195.0, 257.0, 268.0, 244.0, 278.0, 233.0, 243.0, 256.0, 269.0, 251.0, 268.0, 260.0, 265.0, 255.0, 261.0, 245.0, 208.0, 219.0, 248.0, 254.0, 265.0, 271.0, 253.0, 254.0, 259.0, 224.0, 244.0, 252.0, 264.0, 262.0, 257.0, 256.0, 251.0, 265.0, 254.0, 202.0, 206.0, 275.0, 255.0, 271.0, 248.0, 264.0, 255.0, 257.0, 262.0, 273.0, 249.0, 298.0, 275.0, 294.0, 279.0, 243.0, 270.0, 257.0, 265.0, 221.0, 241.0, 252.0, 267.0, 260.0, 259.0, 224.0, 244.0, 289.0, 284.0, 258.0, 267.0, 266.0, 256.0, 267.0, 263.0, 220.0, 216.0, 251.0, 268.0, 254.0, 259.0, 288.0, 288.0, 207.0, 198.0, 240.0, 228.0, 267.0, 255.0, 264.0, 255.0, 254.0, 262.0, 254.0, 265.0, 261.0, 258.0, 249.0, 273.0, 203.0, 201.0, 261.0, 261.0, 269.0, 255.0, 261.0, 258.0, 255.0, 267.0, 260.0, 265.0, 269.0, 256.0, 264.0, 258.0, 83.0, 97.0, 254.0, 265.0, 268.0, 248.0, 254.0, 268.0, 230.0, 226.0, 213.0, 251.0, 228.0, 236.0, 277.0, 248.0, 255.0, 267.0, 265.0, 257.0, 261.0, 255.0, 258.0, 267.0, 262.0, 263.0, 249.0, 276.0, 268.0, 254.0, 257.0, 247.0, 243.0, 270.0, 236.0, 246.0, 257.0, 256.0, 260.0, 259.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6849329471543086, "mean_inference_ms": 1.2076212947173204, "mean_action_processing_ms": 0.13253184470704837, "mean_env_wait_ms": 0.8442733889150424, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3225600, "num_agent_steps_trained": 3225600, "num_env_steps_sampled": 1612800, "num_env_steps_trained": 1612800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1612800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3225600, "timers": {"training_iteration_time_ms": 3670.112, "learn_time_ms": 1151.969, "learn_throughput": 11111.414, "synch_weights_time_ms": 9.104}, "counters": {"num_env_steps_sampled": 1612800, "num_env_steps_trained": 1612800, "num_agent_steps_sampled": 3225600, "num_agent_steps_trained": 3225600}, "done": false, "episodes_total": 4032, "training_iteration": 126, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-08", "timestamp": 1666580888, "time_this_iter_s": 3.6375370025634766, "time_total_s": 473.9798774719238, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 473.9798774719238, "timesteps_since_restore": 0, "iterations_since_restore": 126, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.683333333333334, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 174.6, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.37, "shaped_reward_min": 60, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.64, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 15.39, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.14, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 14.9, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.42, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 13.9, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 14.71, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.0, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.31, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.51, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.41, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.47, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.32, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.9, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 14.71, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.9, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 14.71, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0034485086798667908, "policy_loss": -0.003633841872215271, "vf_loss": 7.3317694664001465, "vf_explained_var": 0.6929588317871094, "kl": 0.001574978232383728, "entropy": 1.0956857204437256, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1625600, "num_env_steps_trained": 1625600, "num_agent_steps_sampled": 3251200, "num_agent_steps_trained": 3251200}, "sampler_results": {"episode_reward_max": 576.0, "episode_reward_min": 180.0, "episode_reward_mean": 505.57, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 252.785}, "custom_metrics": {"sparse_reward_mean": 174.6, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.37, "shaped_reward_min": 60, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.64, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 15.39, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.14, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 14.9, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.42, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 13.9, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 14.71, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.0, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.31, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.51, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.41, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.47, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.32, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.9, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 14.71, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.9, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 14.71, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [453.0, 467.0, 519.0, 524.0, 513.0, 468.0, 516.0, 519.0, 507.0, 519.0, 408.0, 530.0, 519.0, 519.0, 519.0, 522.0, 573.0, 573.0, 513.0, 522.0, 462.0, 519.0, 519.0, 468.0, 573.0, 525.0, 522.0, 530.0, 436.0, 519.0, 513.0, 576.0, 405.0, 468.0, 522.0, 519.0, 516.0, 519.0, 519.0, 522.0, 404.0, 522.0, 524.0, 519.0, 522.0, 525.0, 525.0, 522.0, 180.0, 519.0, 516.0, 522.0, 456.0, 464.0, 464.0, 525.0, 522.0, 522.0, 516.0, 525.0, 525.0, 525.0, 522.0, 504.0, 513.0, 482.0, 513.0, 519.0, 522.0, 525.0, 530.0, 513.0, 519.0, 570.0, 513.0, 519.0, 522.0, 470.0, 525.0, 533.0, 525.0, 516.0, 525.0, 405.0, 525.0, 516.0, 519.0, 522.0, 519.0, 525.0, 459.0, 519.0, 530.0, 405.0, 519.0, 482.0, 530.0, 522.0, 459.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [245.0, 208.0, 219.0, 248.0, 254.0, 265.0, 271.0, 253.0, 254.0, 259.0, 224.0, 244.0, 252.0, 264.0, 262.0, 257.0, 256.0, 251.0, 265.0, 254.0, 202.0, 206.0, 275.0, 255.0, 271.0, 248.0, 264.0, 255.0, 257.0, 262.0, 273.0, 249.0, 298.0, 275.0, 294.0, 279.0, 243.0, 270.0, 257.0, 265.0, 221.0, 241.0, 252.0, 267.0, 260.0, 259.0, 224.0, 244.0, 289.0, 284.0, 258.0, 267.0, 266.0, 256.0, 267.0, 263.0, 220.0, 216.0, 251.0, 268.0, 254.0, 259.0, 288.0, 288.0, 207.0, 198.0, 240.0, 228.0, 267.0, 255.0, 264.0, 255.0, 254.0, 262.0, 254.0, 265.0, 261.0, 258.0, 249.0, 273.0, 203.0, 201.0, 261.0, 261.0, 269.0, 255.0, 261.0, 258.0, 255.0, 267.0, 260.0, 265.0, 269.0, 256.0, 264.0, 258.0, 83.0, 97.0, 254.0, 265.0, 268.0, 248.0, 254.0, 268.0, 230.0, 226.0, 213.0, 251.0, 228.0, 236.0, 277.0, 248.0, 255.0, 267.0, 265.0, 257.0, 261.0, 255.0, 258.0, 267.0, 262.0, 263.0, 249.0, 276.0, 268.0, 254.0, 257.0, 247.0, 243.0, 270.0, 236.0, 246.0, 257.0, 256.0, 260.0, 259.0, 260.0, 262.0, 262.0, 263.0, 265.0, 265.0, 256.0, 257.0, 259.0, 260.0, 283.0, 287.0, 257.0, 256.0, 243.0, 276.0, 265.0, 257.0, 236.0, 234.0, 257.0, 268.0, 263.0, 270.0, 266.0, 259.0, 248.0, 268.0, 266.0, 259.0, 209.0, 196.0, 274.0, 251.0, 257.0, 259.0, 266.0, 253.0, 264.0, 258.0, 254.0, 265.0, 243.0, 282.0, 235.0, 224.0, 260.0, 259.0, 260.0, 270.0, 193.0, 212.0, 257.0, 262.0, 239.0, 243.0, 259.0, 271.0, 252.0, 270.0, 225.0, 234.0, 256.0, 266.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684867033205738, "mean_inference_ms": 1.2074572036214568, "mean_action_processing_ms": 0.1325194643634045, "mean_env_wait_ms": 0.8439127867584169, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 576.0, "episode_reward_min": 180.0, "episode_reward_mean": 505.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 252.785}, "hist_stats": {"episode_reward": [453.0, 467.0, 519.0, 524.0, 513.0, 468.0, 516.0, 519.0, 507.0, 519.0, 408.0, 530.0, 519.0, 519.0, 519.0, 522.0, 573.0, 573.0, 513.0, 522.0, 462.0, 519.0, 519.0, 468.0, 573.0, 525.0, 522.0, 530.0, 436.0, 519.0, 513.0, 576.0, 405.0, 468.0, 522.0, 519.0, 516.0, 519.0, 519.0, 522.0, 404.0, 522.0, 524.0, 519.0, 522.0, 525.0, 525.0, 522.0, 180.0, 519.0, 516.0, 522.0, 456.0, 464.0, 464.0, 525.0, 522.0, 522.0, 516.0, 525.0, 525.0, 525.0, 522.0, 504.0, 513.0, 482.0, 513.0, 519.0, 522.0, 525.0, 530.0, 513.0, 519.0, 570.0, 513.0, 519.0, 522.0, 470.0, 525.0, 533.0, 525.0, 516.0, 525.0, 405.0, 525.0, 516.0, 519.0, 522.0, 519.0, 525.0, 459.0, 519.0, 530.0, 405.0, 519.0, 482.0, 530.0, 522.0, 459.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [245.0, 208.0, 219.0, 248.0, 254.0, 265.0, 271.0, 253.0, 254.0, 259.0, 224.0, 244.0, 252.0, 264.0, 262.0, 257.0, 256.0, 251.0, 265.0, 254.0, 202.0, 206.0, 275.0, 255.0, 271.0, 248.0, 264.0, 255.0, 257.0, 262.0, 273.0, 249.0, 298.0, 275.0, 294.0, 279.0, 243.0, 270.0, 257.0, 265.0, 221.0, 241.0, 252.0, 267.0, 260.0, 259.0, 224.0, 244.0, 289.0, 284.0, 258.0, 267.0, 266.0, 256.0, 267.0, 263.0, 220.0, 216.0, 251.0, 268.0, 254.0, 259.0, 288.0, 288.0, 207.0, 198.0, 240.0, 228.0, 267.0, 255.0, 264.0, 255.0, 254.0, 262.0, 254.0, 265.0, 261.0, 258.0, 249.0, 273.0, 203.0, 201.0, 261.0, 261.0, 269.0, 255.0, 261.0, 258.0, 255.0, 267.0, 260.0, 265.0, 269.0, 256.0, 264.0, 258.0, 83.0, 97.0, 254.0, 265.0, 268.0, 248.0, 254.0, 268.0, 230.0, 226.0, 213.0, 251.0, 228.0, 236.0, 277.0, 248.0, 255.0, 267.0, 265.0, 257.0, 261.0, 255.0, 258.0, 267.0, 262.0, 263.0, 249.0, 276.0, 268.0, 254.0, 257.0, 247.0, 243.0, 270.0, 236.0, 246.0, 257.0, 256.0, 260.0, 259.0, 260.0, 262.0, 262.0, 263.0, 265.0, 265.0, 256.0, 257.0, 259.0, 260.0, 283.0, 287.0, 257.0, 256.0, 243.0, 276.0, 265.0, 257.0, 236.0, 234.0, 257.0, 268.0, 263.0, 270.0, 266.0, 259.0, 248.0, 268.0, 266.0, 259.0, 209.0, 196.0, 274.0, 251.0, 257.0, 259.0, 266.0, 253.0, 264.0, 258.0, 254.0, 265.0, 243.0, 282.0, 235.0, 224.0, 260.0, 259.0, 260.0, 270.0, 193.0, 212.0, 257.0, 262.0, 239.0, 243.0, 259.0, 271.0, 252.0, 270.0, 225.0, 234.0, 256.0, 266.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684867033205738, "mean_inference_ms": 1.2074572036214568, "mean_action_processing_ms": 0.1325194643634045, "mean_env_wait_ms": 0.8439127867584169, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3251200, "num_agent_steps_trained": 3251200, "num_env_steps_sampled": 1625600, "num_env_steps_trained": 1625600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1625600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3251200, "timers": {"training_iteration_time_ms": 3636.835, "learn_time_ms": 1150.015, "learn_throughput": 11130.293, "synch_weights_time_ms": 9.702}, "counters": {"num_env_steps_sampled": 1625600, "num_env_steps_trained": 1625600, "num_agent_steps_sampled": 3251200, "num_agent_steps_trained": 3251200}, "done": false, "episodes_total": 4064, "training_iteration": 127, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-12", "timestamp": 1666580892, "time_this_iter_s": 3.6391212940216064, "time_total_s": 477.61899876594543, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 477.61899876594543, "timesteps_since_restore": 0, "iterations_since_restore": 127, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.5, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 174.8, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.31, "shaped_reward_min": 60, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.7, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 15.42, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.16, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 14.97, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.42, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.41, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 13.91, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 14.77, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.11, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.29, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.5, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.45, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.46, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.37, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.91, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 14.77, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.91, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 14.77, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002333069918677211, "policy_loss": -0.002517772139981389, "vf_loss": 7.354443550109863, "vf_explained_var": 0.7034727334976196, "kl": 0.001937449211254716, "entropy": 1.1014816761016846, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1638400, "num_env_steps_trained": 1638400, "num_agent_steps_sampled": 3276800, "num_agent_steps_trained": 3276800}, "sampler_results": {"episode_reward_max": 576.0, "episode_reward_min": 180.0, "episode_reward_mean": 505.91, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 296.0}, "policy_reward_mean": {"ppo": 252.955}, "custom_metrics": {"sparse_reward_mean": 174.8, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.31, "shaped_reward_min": 60, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.7, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 15.42, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.16, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 14.97, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.42, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.41, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 13.91, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 14.77, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.11, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.29, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.5, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.45, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.46, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.37, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.91, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 14.77, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.91, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 14.77, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [405.0, 468.0, 522.0, 519.0, 516.0, 519.0, 519.0, 522.0, 404.0, 522.0, 524.0, 519.0, 522.0, 525.0, 525.0, 522.0, 180.0, 519.0, 516.0, 522.0, 456.0, 464.0, 464.0, 525.0, 522.0, 522.0, 516.0, 525.0, 525.0, 525.0, 522.0, 504.0, 513.0, 482.0, 513.0, 519.0, 522.0, 525.0, 530.0, 513.0, 519.0, 570.0, 513.0, 519.0, 522.0, 470.0, 525.0, 533.0, 525.0, 516.0, 525.0, 405.0, 525.0, 516.0, 519.0, 522.0, 519.0, 525.0, 459.0, 519.0, 530.0, 405.0, 519.0, 482.0, 530.0, 522.0, 459.0, 522.0, 530.0, 513.0, 519.0, 470.0, 522.0, 522.0, 467.0, 516.0, 525.0, 473.0, 522.0, 516.0, 510.0, 519.0, 516.0, 519.0, 525.0, 525.0, 476.0, 513.0, 455.0, 576.0, 513.0, 576.0, 458.0, 513.0, 516.0, 507.0, 522.0, 473.0, 570.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [207.0, 198.0, 240.0, 228.0, 267.0, 255.0, 264.0, 255.0, 254.0, 262.0, 254.0, 265.0, 261.0, 258.0, 249.0, 273.0, 203.0, 201.0, 261.0, 261.0, 269.0, 255.0, 261.0, 258.0, 255.0, 267.0, 260.0, 265.0, 269.0, 256.0, 264.0, 258.0, 83.0, 97.0, 254.0, 265.0, 268.0, 248.0, 254.0, 268.0, 230.0, 226.0, 213.0, 251.0, 228.0, 236.0, 277.0, 248.0, 255.0, 267.0, 265.0, 257.0, 261.0, 255.0, 258.0, 267.0, 262.0, 263.0, 249.0, 276.0, 268.0, 254.0, 257.0, 247.0, 243.0, 270.0, 236.0, 246.0, 257.0, 256.0, 260.0, 259.0, 260.0, 262.0, 262.0, 263.0, 265.0, 265.0, 256.0, 257.0, 259.0, 260.0, 283.0, 287.0, 257.0, 256.0, 243.0, 276.0, 265.0, 257.0, 236.0, 234.0, 257.0, 268.0, 263.0, 270.0, 266.0, 259.0, 248.0, 268.0, 266.0, 259.0, 209.0, 196.0, 274.0, 251.0, 257.0, 259.0, 266.0, 253.0, 264.0, 258.0, 254.0, 265.0, 243.0, 282.0, 235.0, 224.0, 260.0, 259.0, 260.0, 270.0, 193.0, 212.0, 257.0, 262.0, 239.0, 243.0, 259.0, 271.0, 252.0, 270.0, 225.0, 234.0, 256.0, 266.0, 252.0, 278.0, 269.0, 244.0, 259.0, 260.0, 243.0, 227.0, 262.0, 260.0, 272.0, 250.0, 240.0, 227.0, 250.0, 266.0, 259.0, 266.0, 236.0, 237.0, 262.0, 260.0, 257.0, 259.0, 245.0, 265.0, 260.0, 259.0, 261.0, 255.0, 250.0, 269.0, 265.0, 260.0, 258.0, 267.0, 239.0, 237.0, 260.0, 253.0, 232.0, 223.0, 296.0, 280.0, 252.0, 261.0, 290.0, 286.0, 222.0, 236.0, 255.0, 258.0, 267.0, 249.0, 244.0, 263.0, 245.0, 277.0, 241.0, 232.0, 285.0, 285.0, 252.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6847936163608077, "mean_inference_ms": 1.207310297745448, "mean_action_processing_ms": 0.1325129850608212, "mean_env_wait_ms": 0.8436156857138133, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 576.0, "episode_reward_min": 180.0, "episode_reward_mean": 505.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 296.0}, "policy_reward_mean": {"ppo": 252.955}, "hist_stats": {"episode_reward": [405.0, 468.0, 522.0, 519.0, 516.0, 519.0, 519.0, 522.0, 404.0, 522.0, 524.0, 519.0, 522.0, 525.0, 525.0, 522.0, 180.0, 519.0, 516.0, 522.0, 456.0, 464.0, 464.0, 525.0, 522.0, 522.0, 516.0, 525.0, 525.0, 525.0, 522.0, 504.0, 513.0, 482.0, 513.0, 519.0, 522.0, 525.0, 530.0, 513.0, 519.0, 570.0, 513.0, 519.0, 522.0, 470.0, 525.0, 533.0, 525.0, 516.0, 525.0, 405.0, 525.0, 516.0, 519.0, 522.0, 519.0, 525.0, 459.0, 519.0, 530.0, 405.0, 519.0, 482.0, 530.0, 522.0, 459.0, 522.0, 530.0, 513.0, 519.0, 470.0, 522.0, 522.0, 467.0, 516.0, 525.0, 473.0, 522.0, 516.0, 510.0, 519.0, 516.0, 519.0, 525.0, 525.0, 476.0, 513.0, 455.0, 576.0, 513.0, 576.0, 458.0, 513.0, 516.0, 507.0, 522.0, 473.0, 570.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [207.0, 198.0, 240.0, 228.0, 267.0, 255.0, 264.0, 255.0, 254.0, 262.0, 254.0, 265.0, 261.0, 258.0, 249.0, 273.0, 203.0, 201.0, 261.0, 261.0, 269.0, 255.0, 261.0, 258.0, 255.0, 267.0, 260.0, 265.0, 269.0, 256.0, 264.0, 258.0, 83.0, 97.0, 254.0, 265.0, 268.0, 248.0, 254.0, 268.0, 230.0, 226.0, 213.0, 251.0, 228.0, 236.0, 277.0, 248.0, 255.0, 267.0, 265.0, 257.0, 261.0, 255.0, 258.0, 267.0, 262.0, 263.0, 249.0, 276.0, 268.0, 254.0, 257.0, 247.0, 243.0, 270.0, 236.0, 246.0, 257.0, 256.0, 260.0, 259.0, 260.0, 262.0, 262.0, 263.0, 265.0, 265.0, 256.0, 257.0, 259.0, 260.0, 283.0, 287.0, 257.0, 256.0, 243.0, 276.0, 265.0, 257.0, 236.0, 234.0, 257.0, 268.0, 263.0, 270.0, 266.0, 259.0, 248.0, 268.0, 266.0, 259.0, 209.0, 196.0, 274.0, 251.0, 257.0, 259.0, 266.0, 253.0, 264.0, 258.0, 254.0, 265.0, 243.0, 282.0, 235.0, 224.0, 260.0, 259.0, 260.0, 270.0, 193.0, 212.0, 257.0, 262.0, 239.0, 243.0, 259.0, 271.0, 252.0, 270.0, 225.0, 234.0, 256.0, 266.0, 252.0, 278.0, 269.0, 244.0, 259.0, 260.0, 243.0, 227.0, 262.0, 260.0, 272.0, 250.0, 240.0, 227.0, 250.0, 266.0, 259.0, 266.0, 236.0, 237.0, 262.0, 260.0, 257.0, 259.0, 245.0, 265.0, 260.0, 259.0, 261.0, 255.0, 250.0, 269.0, 265.0, 260.0, 258.0, 267.0, 239.0, 237.0, 260.0, 253.0, 232.0, 223.0, 296.0, 280.0, 252.0, 261.0, 290.0, 286.0, 222.0, 236.0, 255.0, 258.0, 267.0, 249.0, 244.0, 263.0, 245.0, 277.0, 241.0, 232.0, 285.0, 285.0, 252.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6847936163608077, "mean_inference_ms": 1.207310297745448, "mean_action_processing_ms": 0.1325129850608212, "mean_env_wait_ms": 0.8436156857138133, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3276800, "num_agent_steps_trained": 3276800, "num_env_steps_sampled": 1638400, "num_env_steps_trained": 1638400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1638400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3276800, "timers": {"training_iteration_time_ms": 3645.971, "learn_time_ms": 1154.61, "learn_throughput": 11085.992, "synch_weights_time_ms": 9.907}, "counters": {"num_env_steps_sampled": 1638400, "num_env_steps_trained": 1638400, "num_agent_steps_sampled": 3276800, "num_agent_steps_trained": 3276800}, "done": false, "episodes_total": 4096, "training_iteration": 128, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-16", "timestamp": 1666580896, "time_this_iter_s": 3.716869592666626, "time_total_s": 481.33586835861206, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 481.33586835861206, "timesteps_since_restore": 0, "iterations_since_restore": 128, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.833333333333332, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 176.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 156.82, "shaped_reward_min": 125, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.02, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 15.29, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.46, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 14.79, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.26, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 14.53, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.95, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.14, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.36, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.17, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.37, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.33, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.26, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 14.53, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.26, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 14.53, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011624525068327785, "policy_loss": 0.0009821219136938453, "vf_loss": 7.322172164916992, "vf_explained_var": 0.6801432371139526, "kl": 0.0017549579497426748, "entropy": 1.1037721633911133, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1651200, "num_env_steps_trained": 1651200, "num_agent_steps_sampled": 3302400, "num_agent_steps_trained": 3302400}, "sampler_results": {"episode_reward_max": 576.0, "episode_reward_min": 405.0, "episode_reward_mean": 509.22, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 296.0}, "policy_reward_mean": {"ppo": 254.61}, "custom_metrics": {"sparse_reward_mean": 176.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 156.82, "shaped_reward_min": 125, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.02, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 15.29, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.46, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 14.79, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.26, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 14.53, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.95, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.14, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.36, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.17, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.37, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.33, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.26, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 14.53, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.26, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 14.53, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [513.0, 482.0, 513.0, 519.0, 522.0, 525.0, 530.0, 513.0, 519.0, 570.0, 513.0, 519.0, 522.0, 470.0, 525.0, 533.0, 525.0, 516.0, 525.0, 405.0, 525.0, 516.0, 519.0, 522.0, 519.0, 525.0, 459.0, 519.0, 530.0, 405.0, 519.0, 482.0, 530.0, 522.0, 459.0, 522.0, 530.0, 513.0, 519.0, 470.0, 522.0, 522.0, 467.0, 516.0, 525.0, 473.0, 522.0, 516.0, 510.0, 519.0, 516.0, 519.0, 525.0, 525.0, 476.0, 513.0, 455.0, 576.0, 513.0, 576.0, 458.0, 513.0, 516.0, 507.0, 522.0, 473.0, 570.0, 522.0, 522.0, 410.0, 522.0, 453.0, 516.0, 519.0, 516.0, 522.0, 507.0, 516.0, 573.0, 462.0, 465.0, 516.0, 525.0, 555.0, 519.0, 468.0, 510.0, 513.0, 522.0, 513.0, 522.0, 516.0, 516.0, 573.0, 479.0, 419.0, 516.0, 519.0, 465.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [243.0, 270.0, 236.0, 246.0, 257.0, 256.0, 260.0, 259.0, 260.0, 262.0, 262.0, 263.0, 265.0, 265.0, 256.0, 257.0, 259.0, 260.0, 283.0, 287.0, 257.0, 256.0, 243.0, 276.0, 265.0, 257.0, 236.0, 234.0, 257.0, 268.0, 263.0, 270.0, 266.0, 259.0, 248.0, 268.0, 266.0, 259.0, 209.0, 196.0, 274.0, 251.0, 257.0, 259.0, 266.0, 253.0, 264.0, 258.0, 254.0, 265.0, 243.0, 282.0, 235.0, 224.0, 260.0, 259.0, 260.0, 270.0, 193.0, 212.0, 257.0, 262.0, 239.0, 243.0, 259.0, 271.0, 252.0, 270.0, 225.0, 234.0, 256.0, 266.0, 252.0, 278.0, 269.0, 244.0, 259.0, 260.0, 243.0, 227.0, 262.0, 260.0, 272.0, 250.0, 240.0, 227.0, 250.0, 266.0, 259.0, 266.0, 236.0, 237.0, 262.0, 260.0, 257.0, 259.0, 245.0, 265.0, 260.0, 259.0, 261.0, 255.0, 250.0, 269.0, 265.0, 260.0, 258.0, 267.0, 239.0, 237.0, 260.0, 253.0, 232.0, 223.0, 296.0, 280.0, 252.0, 261.0, 290.0, 286.0, 222.0, 236.0, 255.0, 258.0, 267.0, 249.0, 244.0, 263.0, 245.0, 277.0, 241.0, 232.0, 285.0, 285.0, 252.0, 270.0, 259.0, 263.0, 204.0, 206.0, 246.0, 276.0, 226.0, 227.0, 264.0, 252.0, 265.0, 254.0, 260.0, 256.0, 248.0, 274.0, 256.0, 251.0, 272.0, 244.0, 284.0, 289.0, 231.0, 231.0, 231.0, 234.0, 261.0, 255.0, 259.0, 266.0, 269.0, 286.0, 246.0, 273.0, 221.0, 247.0, 259.0, 251.0, 256.0, 257.0, 251.0, 271.0, 243.0, 270.0, 255.0, 267.0, 252.0, 264.0, 270.0, 246.0, 277.0, 296.0, 251.0, 228.0, 223.0, 196.0, 267.0, 249.0, 255.0, 264.0, 233.0, 232.0, 261.0, 261.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6846920137628598, "mean_inference_ms": 1.2071382331619824, "mean_action_processing_ms": 0.13250689057418247, "mean_env_wait_ms": 0.8433088508691953, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 576.0, "episode_reward_min": 405.0, "episode_reward_mean": 509.22, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 296.0}, "policy_reward_mean": {"ppo": 254.61}, "hist_stats": {"episode_reward": [513.0, 482.0, 513.0, 519.0, 522.0, 525.0, 530.0, 513.0, 519.0, 570.0, 513.0, 519.0, 522.0, 470.0, 525.0, 533.0, 525.0, 516.0, 525.0, 405.0, 525.0, 516.0, 519.0, 522.0, 519.0, 525.0, 459.0, 519.0, 530.0, 405.0, 519.0, 482.0, 530.0, 522.0, 459.0, 522.0, 530.0, 513.0, 519.0, 470.0, 522.0, 522.0, 467.0, 516.0, 525.0, 473.0, 522.0, 516.0, 510.0, 519.0, 516.0, 519.0, 525.0, 525.0, 476.0, 513.0, 455.0, 576.0, 513.0, 576.0, 458.0, 513.0, 516.0, 507.0, 522.0, 473.0, 570.0, 522.0, 522.0, 410.0, 522.0, 453.0, 516.0, 519.0, 516.0, 522.0, 507.0, 516.0, 573.0, 462.0, 465.0, 516.0, 525.0, 555.0, 519.0, 468.0, 510.0, 513.0, 522.0, 513.0, 522.0, 516.0, 516.0, 573.0, 479.0, 419.0, 516.0, 519.0, 465.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [243.0, 270.0, 236.0, 246.0, 257.0, 256.0, 260.0, 259.0, 260.0, 262.0, 262.0, 263.0, 265.0, 265.0, 256.0, 257.0, 259.0, 260.0, 283.0, 287.0, 257.0, 256.0, 243.0, 276.0, 265.0, 257.0, 236.0, 234.0, 257.0, 268.0, 263.0, 270.0, 266.0, 259.0, 248.0, 268.0, 266.0, 259.0, 209.0, 196.0, 274.0, 251.0, 257.0, 259.0, 266.0, 253.0, 264.0, 258.0, 254.0, 265.0, 243.0, 282.0, 235.0, 224.0, 260.0, 259.0, 260.0, 270.0, 193.0, 212.0, 257.0, 262.0, 239.0, 243.0, 259.0, 271.0, 252.0, 270.0, 225.0, 234.0, 256.0, 266.0, 252.0, 278.0, 269.0, 244.0, 259.0, 260.0, 243.0, 227.0, 262.0, 260.0, 272.0, 250.0, 240.0, 227.0, 250.0, 266.0, 259.0, 266.0, 236.0, 237.0, 262.0, 260.0, 257.0, 259.0, 245.0, 265.0, 260.0, 259.0, 261.0, 255.0, 250.0, 269.0, 265.0, 260.0, 258.0, 267.0, 239.0, 237.0, 260.0, 253.0, 232.0, 223.0, 296.0, 280.0, 252.0, 261.0, 290.0, 286.0, 222.0, 236.0, 255.0, 258.0, 267.0, 249.0, 244.0, 263.0, 245.0, 277.0, 241.0, 232.0, 285.0, 285.0, 252.0, 270.0, 259.0, 263.0, 204.0, 206.0, 246.0, 276.0, 226.0, 227.0, 264.0, 252.0, 265.0, 254.0, 260.0, 256.0, 248.0, 274.0, 256.0, 251.0, 272.0, 244.0, 284.0, 289.0, 231.0, 231.0, 231.0, 234.0, 261.0, 255.0, 259.0, 266.0, 269.0, 286.0, 246.0, 273.0, 221.0, 247.0, 259.0, 251.0, 256.0, 257.0, 251.0, 271.0, 243.0, 270.0, 255.0, 267.0, 252.0, 264.0, 270.0, 246.0, 277.0, 296.0, 251.0, 228.0, 223.0, 196.0, 267.0, 249.0, 255.0, 264.0, 233.0, 232.0, 261.0, 261.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6846920137628598, "mean_inference_ms": 1.2071382331619824, "mean_action_processing_ms": 0.13250689057418247, "mean_env_wait_ms": 0.8433088508691953, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3302400, "num_agent_steps_trained": 3302400, "num_env_steps_sampled": 1651200, "num_env_steps_trained": 1651200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1651200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3302400, "timers": {"training_iteration_time_ms": 3646.276, "learn_time_ms": 1156.884, "learn_throughput": 11064.203, "synch_weights_time_ms": 10.691}, "counters": {"num_env_steps_sampled": 1651200, "num_env_steps_trained": 1651200, "num_agent_steps_sampled": 3302400, "num_agent_steps_trained": 3302400}, "done": false, "episodes_total": 4128, "training_iteration": 129, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-20", "timestamp": 1666580900, "time_this_iter_s": 3.720264434814453, "time_total_s": 485.0561327934265, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 485.0561327934265, "timesteps_since_restore": 0, "iterations_since_restore": 129, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.78, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 176.2, "sparse_reward_min": 0, "sparse_reward_max": 200, "shaped_reward_mean": 155.87, "shaped_reward_min": 9, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.87, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.37, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.41, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 14.91, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.18, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.54, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.05, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.14, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.13, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.19, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.46, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.41, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.18, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.54, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.18, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.54, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0018369148019701242, "policy_loss": 0.0016497662290930748, "vf_loss": 7.313329219818115, "vf_explained_var": 0.7119976282119751, "kl": 0.001812935690395534, "entropy": 1.0883688926696777, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1664000, "num_env_steps_trained": 1664000, "num_agent_steps_sampled": 3328000, "num_agent_steps_trained": 3328000}, "sampler_results": {"episode_reward_max": 579.0, "episode_reward_min": 9.0, "episode_reward_mean": 508.27, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 254.135}, "custom_metrics": {"sparse_reward_mean": 176.2, "sparse_reward_min": 0, "sparse_reward_max": 200, "shaped_reward_mean": 155.87, "shaped_reward_min": 9, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.87, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.37, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.41, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 14.91, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.18, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.54, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.05, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.14, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.13, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.19, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.46, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.41, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.18, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.54, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.18, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.54, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 522.0, 459.0, 522.0, 530.0, 513.0, 519.0, 470.0, 522.0, 522.0, 467.0, 516.0, 525.0, 473.0, 522.0, 516.0, 510.0, 519.0, 516.0, 519.0, 525.0, 525.0, 476.0, 513.0, 455.0, 576.0, 513.0, 576.0, 458.0, 513.0, 516.0, 507.0, 522.0, 473.0, 570.0, 522.0, 522.0, 410.0, 522.0, 453.0, 516.0, 519.0, 516.0, 522.0, 507.0, 516.0, 573.0, 462.0, 465.0, 516.0, 525.0, 555.0, 519.0, 468.0, 510.0, 513.0, 522.0, 513.0, 522.0, 516.0, 516.0, 573.0, 479.0, 419.0, 516.0, 519.0, 465.0, 522.0, 530.0, 530.0, 519.0, 570.0, 9.0, 522.0, 573.0, 495.0, 456.0, 513.0, 522.0, 498.0, 527.0, 519.0, 473.0, 519.0, 522.0, 525.0, 510.0, 522.0, 525.0, 465.0, 525.0, 513.0, 513.0, 570.0, 570.0, 525.0, 516.0, 579.0, 530.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [259.0, 271.0, 252.0, 270.0, 225.0, 234.0, 256.0, 266.0, 252.0, 278.0, 269.0, 244.0, 259.0, 260.0, 243.0, 227.0, 262.0, 260.0, 272.0, 250.0, 240.0, 227.0, 250.0, 266.0, 259.0, 266.0, 236.0, 237.0, 262.0, 260.0, 257.0, 259.0, 245.0, 265.0, 260.0, 259.0, 261.0, 255.0, 250.0, 269.0, 265.0, 260.0, 258.0, 267.0, 239.0, 237.0, 260.0, 253.0, 232.0, 223.0, 296.0, 280.0, 252.0, 261.0, 290.0, 286.0, 222.0, 236.0, 255.0, 258.0, 267.0, 249.0, 244.0, 263.0, 245.0, 277.0, 241.0, 232.0, 285.0, 285.0, 252.0, 270.0, 259.0, 263.0, 204.0, 206.0, 246.0, 276.0, 226.0, 227.0, 264.0, 252.0, 265.0, 254.0, 260.0, 256.0, 248.0, 274.0, 256.0, 251.0, 272.0, 244.0, 284.0, 289.0, 231.0, 231.0, 231.0, 234.0, 261.0, 255.0, 259.0, 266.0, 269.0, 286.0, 246.0, 273.0, 221.0, 247.0, 259.0, 251.0, 256.0, 257.0, 251.0, 271.0, 243.0, 270.0, 255.0, 267.0, 252.0, 264.0, 270.0, 246.0, 277.0, 296.0, 251.0, 228.0, 223.0, 196.0, 267.0, 249.0, 255.0, 264.0, 233.0, 232.0, 261.0, 261.0, 264.0, 266.0, 253.0, 277.0, 250.0, 269.0, 303.0, 267.0, 3.0, 6.0, 258.0, 264.0, 294.0, 279.0, 249.0, 246.0, 212.0, 244.0, 279.0, 234.0, 273.0, 249.0, 247.0, 251.0, 262.0, 265.0, 265.0, 254.0, 231.0, 242.0, 252.0, 267.0, 263.0, 259.0, 251.0, 274.0, 240.0, 270.0, 257.0, 265.0, 263.0, 262.0, 246.0, 219.0, 254.0, 271.0, 262.0, 251.0, 261.0, 252.0, 292.0, 278.0, 290.0, 280.0, 264.0, 261.0, 269.0, 247.0, 289.0, 290.0, 254.0, 276.0, 269.0, 250.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6846467193085607, "mean_inference_ms": 1.2069910164086037, "mean_action_processing_ms": 0.13250549815495508, "mean_env_wait_ms": 0.8430438612717461, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 579.0, "episode_reward_min": 9.0, "episode_reward_mean": 508.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 254.135}, "hist_stats": {"episode_reward": [530.0, 522.0, 459.0, 522.0, 530.0, 513.0, 519.0, 470.0, 522.0, 522.0, 467.0, 516.0, 525.0, 473.0, 522.0, 516.0, 510.0, 519.0, 516.0, 519.0, 525.0, 525.0, 476.0, 513.0, 455.0, 576.0, 513.0, 576.0, 458.0, 513.0, 516.0, 507.0, 522.0, 473.0, 570.0, 522.0, 522.0, 410.0, 522.0, 453.0, 516.0, 519.0, 516.0, 522.0, 507.0, 516.0, 573.0, 462.0, 465.0, 516.0, 525.0, 555.0, 519.0, 468.0, 510.0, 513.0, 522.0, 513.0, 522.0, 516.0, 516.0, 573.0, 479.0, 419.0, 516.0, 519.0, 465.0, 522.0, 530.0, 530.0, 519.0, 570.0, 9.0, 522.0, 573.0, 495.0, 456.0, 513.0, 522.0, 498.0, 527.0, 519.0, 473.0, 519.0, 522.0, 525.0, 510.0, 522.0, 525.0, 465.0, 525.0, 513.0, 513.0, 570.0, 570.0, 525.0, 516.0, 579.0, 530.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [259.0, 271.0, 252.0, 270.0, 225.0, 234.0, 256.0, 266.0, 252.0, 278.0, 269.0, 244.0, 259.0, 260.0, 243.0, 227.0, 262.0, 260.0, 272.0, 250.0, 240.0, 227.0, 250.0, 266.0, 259.0, 266.0, 236.0, 237.0, 262.0, 260.0, 257.0, 259.0, 245.0, 265.0, 260.0, 259.0, 261.0, 255.0, 250.0, 269.0, 265.0, 260.0, 258.0, 267.0, 239.0, 237.0, 260.0, 253.0, 232.0, 223.0, 296.0, 280.0, 252.0, 261.0, 290.0, 286.0, 222.0, 236.0, 255.0, 258.0, 267.0, 249.0, 244.0, 263.0, 245.0, 277.0, 241.0, 232.0, 285.0, 285.0, 252.0, 270.0, 259.0, 263.0, 204.0, 206.0, 246.0, 276.0, 226.0, 227.0, 264.0, 252.0, 265.0, 254.0, 260.0, 256.0, 248.0, 274.0, 256.0, 251.0, 272.0, 244.0, 284.0, 289.0, 231.0, 231.0, 231.0, 234.0, 261.0, 255.0, 259.0, 266.0, 269.0, 286.0, 246.0, 273.0, 221.0, 247.0, 259.0, 251.0, 256.0, 257.0, 251.0, 271.0, 243.0, 270.0, 255.0, 267.0, 252.0, 264.0, 270.0, 246.0, 277.0, 296.0, 251.0, 228.0, 223.0, 196.0, 267.0, 249.0, 255.0, 264.0, 233.0, 232.0, 261.0, 261.0, 264.0, 266.0, 253.0, 277.0, 250.0, 269.0, 303.0, 267.0, 3.0, 6.0, 258.0, 264.0, 294.0, 279.0, 249.0, 246.0, 212.0, 244.0, 279.0, 234.0, 273.0, 249.0, 247.0, 251.0, 262.0, 265.0, 265.0, 254.0, 231.0, 242.0, 252.0, 267.0, 263.0, 259.0, 251.0, 274.0, 240.0, 270.0, 257.0, 265.0, 263.0, 262.0, 246.0, 219.0, 254.0, 271.0, 262.0, 251.0, 261.0, 252.0, 292.0, 278.0, 290.0, 280.0, 264.0, 261.0, 269.0, 247.0, 289.0, 290.0, 254.0, 276.0, 269.0, 250.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6846467193085607, "mean_inference_ms": 1.2069910164086037, "mean_action_processing_ms": 0.13250549815495508, "mean_env_wait_ms": 0.8430438612717461, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3328000, "num_agent_steps_trained": 3328000, "num_env_steps_sampled": 1664000, "num_env_steps_trained": 1664000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1664000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3328000, "timers": {"training_iteration_time_ms": 3653.288, "learn_time_ms": 1161.453, "learn_throughput": 11020.674, "synch_weights_time_ms": 11.693}, "counters": {"num_env_steps_sampled": 1664000, "num_env_steps_trained": 1664000, "num_agent_steps_sampled": 3328000, "num_agent_steps_trained": 3328000}, "done": false, "episodes_total": 4160, "training_iteration": 130, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-24", "timestamp": 1666580904, "time_this_iter_s": 3.7706971168518066, "time_total_s": 488.8268299102783, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 488.8268299102783, "timesteps_since_restore": 0, "iterations_since_restore": 130, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.73333333333333, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 175.0, "sparse_reward_min": 0, "sparse_reward_max": 200, "shaped_reward_mean": 154.88, "shaped_reward_min": 9, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.72, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.49, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.28, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.91, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.02, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.55, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.09, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.88, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.22, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.97, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.17, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.5, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.43, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.46, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.35, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.02, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.55, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.02, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.55, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003063816111534834, "policy_loss": -0.0004948408459313214, "vf_loss": 7.325481414794922, "vf_explained_var": 0.7079624533653259, "kl": 0.0018343559931963682, "entropy": 1.0881778001785278, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1676800, "num_env_steps_trained": 1676800, "num_agent_steps_sampled": 3353600, "num_agent_steps_trained": 3353600}, "sampler_results": {"episode_reward_max": 579.0, "episode_reward_min": 9.0, "episode_reward_mean": 504.88, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 252.44}, "custom_metrics": {"sparse_reward_mean": 175.0, "sparse_reward_min": 0, "sparse_reward_max": 200, "shaped_reward_mean": 154.88, "shaped_reward_min": 9, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.72, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.49, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.28, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.91, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.02, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.55, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.09, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.88, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.22, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.97, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.17, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.5, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.43, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.46, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.35, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.02, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.55, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.02, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.55, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 473.0, 570.0, 522.0, 522.0, 410.0, 522.0, 453.0, 516.0, 519.0, 516.0, 522.0, 507.0, 516.0, 573.0, 462.0, 465.0, 516.0, 525.0, 555.0, 519.0, 468.0, 510.0, 513.0, 522.0, 513.0, 522.0, 516.0, 516.0, 573.0, 479.0, 419.0, 516.0, 519.0, 465.0, 522.0, 530.0, 530.0, 519.0, 570.0, 9.0, 522.0, 573.0, 495.0, 456.0, 513.0, 522.0, 498.0, 527.0, 519.0, 473.0, 519.0, 522.0, 525.0, 510.0, 522.0, 525.0, 465.0, 525.0, 513.0, 513.0, 570.0, 570.0, 525.0, 516.0, 579.0, 530.0, 519.0, 525.0, 522.0, 519.0, 413.0, 573.0, 519.0, 522.0, 519.0, 470.0, 522.0, 573.0, 513.0, 522.0, 513.0, 519.0, 450.0, 513.0, 522.0, 180.0, 476.0, 515.0, 530.0, 519.0, 504.0, 530.0, 516.0, 519.0, 530.0, 404.0, 519.0, 516.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [245.0, 277.0, 241.0, 232.0, 285.0, 285.0, 252.0, 270.0, 259.0, 263.0, 204.0, 206.0, 246.0, 276.0, 226.0, 227.0, 264.0, 252.0, 265.0, 254.0, 260.0, 256.0, 248.0, 274.0, 256.0, 251.0, 272.0, 244.0, 284.0, 289.0, 231.0, 231.0, 231.0, 234.0, 261.0, 255.0, 259.0, 266.0, 269.0, 286.0, 246.0, 273.0, 221.0, 247.0, 259.0, 251.0, 256.0, 257.0, 251.0, 271.0, 243.0, 270.0, 255.0, 267.0, 252.0, 264.0, 270.0, 246.0, 277.0, 296.0, 251.0, 228.0, 223.0, 196.0, 267.0, 249.0, 255.0, 264.0, 233.0, 232.0, 261.0, 261.0, 264.0, 266.0, 253.0, 277.0, 250.0, 269.0, 303.0, 267.0, 3.0, 6.0, 258.0, 264.0, 294.0, 279.0, 249.0, 246.0, 212.0, 244.0, 279.0, 234.0, 273.0, 249.0, 247.0, 251.0, 262.0, 265.0, 265.0, 254.0, 231.0, 242.0, 252.0, 267.0, 263.0, 259.0, 251.0, 274.0, 240.0, 270.0, 257.0, 265.0, 263.0, 262.0, 246.0, 219.0, 254.0, 271.0, 262.0, 251.0, 261.0, 252.0, 292.0, 278.0, 290.0, 280.0, 264.0, 261.0, 269.0, 247.0, 289.0, 290.0, 254.0, 276.0, 269.0, 250.0, 262.0, 263.0, 259.0, 263.0, 259.0, 260.0, 216.0, 197.0, 295.0, 278.0, 270.0, 249.0, 259.0, 263.0, 266.0, 253.0, 239.0, 231.0, 268.0, 254.0, 282.0, 291.0, 252.0, 261.0, 260.0, 262.0, 254.0, 259.0, 256.0, 263.0, 213.0, 237.0, 253.0, 260.0, 268.0, 254.0, 88.0, 92.0, 231.0, 245.0, 248.0, 267.0, 268.0, 262.0, 264.0, 255.0, 266.0, 238.0, 264.0, 266.0, 256.0, 260.0, 267.0, 252.0, 269.0, 261.0, 204.0, 200.0, 264.0, 255.0, 251.0, 265.0, 254.0, 265.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6845801041167872, "mean_inference_ms": 1.207078100830706, "mean_action_processing_ms": 0.13249811702206304, "mean_env_wait_ms": 0.8430198604368112, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 579.0, "episode_reward_min": 9.0, "episode_reward_mean": 504.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 252.44}, "hist_stats": {"episode_reward": [522.0, 473.0, 570.0, 522.0, 522.0, 410.0, 522.0, 453.0, 516.0, 519.0, 516.0, 522.0, 507.0, 516.0, 573.0, 462.0, 465.0, 516.0, 525.0, 555.0, 519.0, 468.0, 510.0, 513.0, 522.0, 513.0, 522.0, 516.0, 516.0, 573.0, 479.0, 419.0, 516.0, 519.0, 465.0, 522.0, 530.0, 530.0, 519.0, 570.0, 9.0, 522.0, 573.0, 495.0, 456.0, 513.0, 522.0, 498.0, 527.0, 519.0, 473.0, 519.0, 522.0, 525.0, 510.0, 522.0, 525.0, 465.0, 525.0, 513.0, 513.0, 570.0, 570.0, 525.0, 516.0, 579.0, 530.0, 519.0, 525.0, 522.0, 519.0, 413.0, 573.0, 519.0, 522.0, 519.0, 470.0, 522.0, 573.0, 513.0, 522.0, 513.0, 519.0, 450.0, 513.0, 522.0, 180.0, 476.0, 515.0, 530.0, 519.0, 504.0, 530.0, 516.0, 519.0, 530.0, 404.0, 519.0, 516.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [245.0, 277.0, 241.0, 232.0, 285.0, 285.0, 252.0, 270.0, 259.0, 263.0, 204.0, 206.0, 246.0, 276.0, 226.0, 227.0, 264.0, 252.0, 265.0, 254.0, 260.0, 256.0, 248.0, 274.0, 256.0, 251.0, 272.0, 244.0, 284.0, 289.0, 231.0, 231.0, 231.0, 234.0, 261.0, 255.0, 259.0, 266.0, 269.0, 286.0, 246.0, 273.0, 221.0, 247.0, 259.0, 251.0, 256.0, 257.0, 251.0, 271.0, 243.0, 270.0, 255.0, 267.0, 252.0, 264.0, 270.0, 246.0, 277.0, 296.0, 251.0, 228.0, 223.0, 196.0, 267.0, 249.0, 255.0, 264.0, 233.0, 232.0, 261.0, 261.0, 264.0, 266.0, 253.0, 277.0, 250.0, 269.0, 303.0, 267.0, 3.0, 6.0, 258.0, 264.0, 294.0, 279.0, 249.0, 246.0, 212.0, 244.0, 279.0, 234.0, 273.0, 249.0, 247.0, 251.0, 262.0, 265.0, 265.0, 254.0, 231.0, 242.0, 252.0, 267.0, 263.0, 259.0, 251.0, 274.0, 240.0, 270.0, 257.0, 265.0, 263.0, 262.0, 246.0, 219.0, 254.0, 271.0, 262.0, 251.0, 261.0, 252.0, 292.0, 278.0, 290.0, 280.0, 264.0, 261.0, 269.0, 247.0, 289.0, 290.0, 254.0, 276.0, 269.0, 250.0, 262.0, 263.0, 259.0, 263.0, 259.0, 260.0, 216.0, 197.0, 295.0, 278.0, 270.0, 249.0, 259.0, 263.0, 266.0, 253.0, 239.0, 231.0, 268.0, 254.0, 282.0, 291.0, 252.0, 261.0, 260.0, 262.0, 254.0, 259.0, 256.0, 263.0, 213.0, 237.0, 253.0, 260.0, 268.0, 254.0, 88.0, 92.0, 231.0, 245.0, 248.0, 267.0, 268.0, 262.0, 264.0, 255.0, 266.0, 238.0, 264.0, 266.0, 256.0, 260.0, 267.0, 252.0, 269.0, 261.0, 204.0, 200.0, 264.0, 255.0, 251.0, 265.0, 254.0, 265.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6845801041167872, "mean_inference_ms": 1.207078100830706, "mean_action_processing_ms": 0.13249811702206304, "mean_env_wait_ms": 0.8430198604368112, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3353600, "num_agent_steps_trained": 3353600, "num_env_steps_sampled": 1676800, "num_env_steps_trained": 1676800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1676800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3353600, "timers": {"training_iteration_time_ms": 3668.455, "learn_time_ms": 1137.089, "learn_throughput": 11256.819, "synch_weights_time_ms": 11.163}, "counters": {"num_env_steps_sampled": 1676800, "num_env_steps_trained": 1676800, "num_agent_steps_sampled": 3353600, "num_agent_steps_trained": 3353600}, "done": false, "episodes_total": 4192, "training_iteration": 131, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-28", "timestamp": 1666580908, "time_this_iter_s": 4.0141332149505615, "time_total_s": 492.8409631252289, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 492.8409631252289, "timesteps_since_restore": 0, "iterations_since_restore": 131, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.433333333333334, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 175.8, "sparse_reward_min": 0, "sparse_reward_max": 200, "shaped_reward_mean": 155.98, "shaped_reward_min": 9, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.79, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.65, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.43, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.12, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.11, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.64, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.28, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.31, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.62, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.37, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.57, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.3, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.11, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.64, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.11, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.64, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0006394966039806604, "policy_loss": -0.0008195515256375074, "vf_loss": 7.270031929016113, "vf_explained_var": 0.7044593095779419, "kl": 0.001650436781346798, "entropy": 1.093894600868225, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1689600, "num_env_steps_trained": 1689600, "num_agent_steps_sampled": 3379200, "num_agent_steps_trained": 3379200}, "sampler_results": {"episode_reward_max": 579.0, "episode_reward_min": 9.0, "episode_reward_mean": 507.58, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 253.79}, "custom_metrics": {"sparse_reward_mean": 175.8, "sparse_reward_min": 0, "sparse_reward_max": 200, "shaped_reward_mean": 155.98, "shaped_reward_min": 9, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.79, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.65, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.43, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.12, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.11, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.64, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.28, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.31, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.62, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.37, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.57, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.3, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.11, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.64, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.11, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.64, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [516.0, 519.0, 465.0, 522.0, 530.0, 530.0, 519.0, 570.0, 9.0, 522.0, 573.0, 495.0, 456.0, 513.0, 522.0, 498.0, 527.0, 519.0, 473.0, 519.0, 522.0, 525.0, 510.0, 522.0, 525.0, 465.0, 525.0, 513.0, 513.0, 570.0, 570.0, 525.0, 516.0, 579.0, 530.0, 519.0, 525.0, 522.0, 519.0, 413.0, 573.0, 519.0, 522.0, 519.0, 470.0, 522.0, 573.0, 513.0, 522.0, 513.0, 519.0, 450.0, 513.0, 522.0, 180.0, 476.0, 515.0, 530.0, 519.0, 504.0, 530.0, 516.0, 519.0, 530.0, 404.0, 519.0, 516.0, 519.0, 576.0, 522.0, 513.0, 473.0, 484.0, 516.0, 516.0, 527.0, 522.0, 522.0, 522.0, 522.0, 464.0, 516.0, 530.0, 516.0, 519.0, 516.0, 519.0, 522.0, 516.0, 476.0, 522.0, 513.0, 510.0, 522.0, 519.0, 555.0, 522.0, 507.0, 522.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 249.0, 255.0, 264.0, 233.0, 232.0, 261.0, 261.0, 264.0, 266.0, 253.0, 277.0, 250.0, 269.0, 303.0, 267.0, 3.0, 6.0, 258.0, 264.0, 294.0, 279.0, 249.0, 246.0, 212.0, 244.0, 279.0, 234.0, 273.0, 249.0, 247.0, 251.0, 262.0, 265.0, 265.0, 254.0, 231.0, 242.0, 252.0, 267.0, 263.0, 259.0, 251.0, 274.0, 240.0, 270.0, 257.0, 265.0, 263.0, 262.0, 246.0, 219.0, 254.0, 271.0, 262.0, 251.0, 261.0, 252.0, 292.0, 278.0, 290.0, 280.0, 264.0, 261.0, 269.0, 247.0, 289.0, 290.0, 254.0, 276.0, 269.0, 250.0, 262.0, 263.0, 259.0, 263.0, 259.0, 260.0, 216.0, 197.0, 295.0, 278.0, 270.0, 249.0, 259.0, 263.0, 266.0, 253.0, 239.0, 231.0, 268.0, 254.0, 282.0, 291.0, 252.0, 261.0, 260.0, 262.0, 254.0, 259.0, 256.0, 263.0, 213.0, 237.0, 253.0, 260.0, 268.0, 254.0, 88.0, 92.0, 231.0, 245.0, 248.0, 267.0, 268.0, 262.0, 264.0, 255.0, 266.0, 238.0, 264.0, 266.0, 256.0, 260.0, 267.0, 252.0, 269.0, 261.0, 204.0, 200.0, 264.0, 255.0, 251.0, 265.0, 254.0, 265.0, 281.0, 295.0, 266.0, 256.0, 262.0, 251.0, 234.0, 239.0, 243.0, 241.0, 268.0, 248.0, 255.0, 261.0, 257.0, 270.0, 269.0, 253.0, 260.0, 262.0, 268.0, 254.0, 254.0, 268.0, 236.0, 228.0, 251.0, 265.0, 271.0, 259.0, 272.0, 244.0, 259.0, 260.0, 254.0, 262.0, 246.0, 273.0, 262.0, 260.0, 264.0, 252.0, 240.0, 236.0, 255.0, 267.0, 262.0, 251.0, 248.0, 262.0, 261.0, 261.0, 253.0, 266.0, 268.0, 287.0, 253.0, 269.0, 261.0, 246.0, 267.0, 255.0, 256.0, 269.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6844890239893716, "mean_inference_ms": 1.2072226173218075, "mean_action_processing_ms": 0.13248499328671362, "mean_env_wait_ms": 0.8430095449132798, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 579.0, "episode_reward_min": 9.0, "episode_reward_mean": 507.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 253.79}, "hist_stats": {"episode_reward": [516.0, 519.0, 465.0, 522.0, 530.0, 530.0, 519.0, 570.0, 9.0, 522.0, 573.0, 495.0, 456.0, 513.0, 522.0, 498.0, 527.0, 519.0, 473.0, 519.0, 522.0, 525.0, 510.0, 522.0, 525.0, 465.0, 525.0, 513.0, 513.0, 570.0, 570.0, 525.0, 516.0, 579.0, 530.0, 519.0, 525.0, 522.0, 519.0, 413.0, 573.0, 519.0, 522.0, 519.0, 470.0, 522.0, 573.0, 513.0, 522.0, 513.0, 519.0, 450.0, 513.0, 522.0, 180.0, 476.0, 515.0, 530.0, 519.0, 504.0, 530.0, 516.0, 519.0, 530.0, 404.0, 519.0, 516.0, 519.0, 576.0, 522.0, 513.0, 473.0, 484.0, 516.0, 516.0, 527.0, 522.0, 522.0, 522.0, 522.0, 464.0, 516.0, 530.0, 516.0, 519.0, 516.0, 519.0, 522.0, 516.0, 476.0, 522.0, 513.0, 510.0, 522.0, 519.0, 555.0, 522.0, 507.0, 522.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 249.0, 255.0, 264.0, 233.0, 232.0, 261.0, 261.0, 264.0, 266.0, 253.0, 277.0, 250.0, 269.0, 303.0, 267.0, 3.0, 6.0, 258.0, 264.0, 294.0, 279.0, 249.0, 246.0, 212.0, 244.0, 279.0, 234.0, 273.0, 249.0, 247.0, 251.0, 262.0, 265.0, 265.0, 254.0, 231.0, 242.0, 252.0, 267.0, 263.0, 259.0, 251.0, 274.0, 240.0, 270.0, 257.0, 265.0, 263.0, 262.0, 246.0, 219.0, 254.0, 271.0, 262.0, 251.0, 261.0, 252.0, 292.0, 278.0, 290.0, 280.0, 264.0, 261.0, 269.0, 247.0, 289.0, 290.0, 254.0, 276.0, 269.0, 250.0, 262.0, 263.0, 259.0, 263.0, 259.0, 260.0, 216.0, 197.0, 295.0, 278.0, 270.0, 249.0, 259.0, 263.0, 266.0, 253.0, 239.0, 231.0, 268.0, 254.0, 282.0, 291.0, 252.0, 261.0, 260.0, 262.0, 254.0, 259.0, 256.0, 263.0, 213.0, 237.0, 253.0, 260.0, 268.0, 254.0, 88.0, 92.0, 231.0, 245.0, 248.0, 267.0, 268.0, 262.0, 264.0, 255.0, 266.0, 238.0, 264.0, 266.0, 256.0, 260.0, 267.0, 252.0, 269.0, 261.0, 204.0, 200.0, 264.0, 255.0, 251.0, 265.0, 254.0, 265.0, 281.0, 295.0, 266.0, 256.0, 262.0, 251.0, 234.0, 239.0, 243.0, 241.0, 268.0, 248.0, 255.0, 261.0, 257.0, 270.0, 269.0, 253.0, 260.0, 262.0, 268.0, 254.0, 254.0, 268.0, 236.0, 228.0, 251.0, 265.0, 271.0, 259.0, 272.0, 244.0, 259.0, 260.0, 254.0, 262.0, 246.0, 273.0, 262.0, 260.0, 264.0, 252.0, 240.0, 236.0, 255.0, 267.0, 262.0, 251.0, 248.0, 262.0, 261.0, 261.0, 253.0, 266.0, 268.0, 287.0, 253.0, 269.0, 261.0, 246.0, 267.0, 255.0, 256.0, 269.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6844890239893716, "mean_inference_ms": 1.2072226173218075, "mean_action_processing_ms": 0.13248499328671362, "mean_env_wait_ms": 0.8430095449132798, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3379200, "num_agent_steps_trained": 3379200, "num_env_steps_sampled": 1689600, "num_env_steps_trained": 1689600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1689600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3379200, "timers": {"training_iteration_time_ms": 3684.035, "learn_time_ms": 1133.131, "learn_throughput": 11296.134, "synch_weights_time_ms": 11.078}, "counters": {"num_env_steps_sampled": 1689600, "num_env_steps_trained": 1689600, "num_agent_steps_sampled": 3379200, "num_agent_steps_trained": 3379200}, "done": false, "episodes_total": 4224, "training_iteration": 132, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-32", "timestamp": 1666580912, "time_this_iter_s": 3.850553035736084, "time_total_s": 496.69151616096497, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 496.69151616096497, "timesteps_since_restore": 0, "iterations_since_restore": 132, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.616666666666667, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 177.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 157.9, "shaped_reward_min": 60, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.94, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.88, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.58, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.32, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.44, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.29, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.19, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.82, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.53, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.8, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.32, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.72, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.26, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.19, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.82, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.19, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.82, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0012471978552639484, "policy_loss": -0.001432041055522859, "vf_loss": 7.322253227233887, "vf_explained_var": 0.7088576555252075, "kl": 0.001987504307180643, "entropy": 1.0947635173797607, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1702400, "num_env_steps_trained": 1702400, "num_agent_steps_sampled": 3404800, "num_agent_steps_trained": 3404800}, "sampler_results": {"episode_reward_max": 579.0, "episode_reward_min": 180.0, "episode_reward_mean": 512.3, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 256.15}, "custom_metrics": {"sparse_reward_mean": 177.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 157.9, "shaped_reward_min": 60, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.94, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.88, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.58, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.32, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.44, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.29, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.19, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.82, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.53, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.8, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.32, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.72, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.26, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.19, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.82, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.19, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.82, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [516.0, 579.0, 530.0, 519.0, 525.0, 522.0, 519.0, 413.0, 573.0, 519.0, 522.0, 519.0, 470.0, 522.0, 573.0, 513.0, 522.0, 513.0, 519.0, 450.0, 513.0, 522.0, 180.0, 476.0, 515.0, 530.0, 519.0, 504.0, 530.0, 516.0, 519.0, 530.0, 404.0, 519.0, 516.0, 519.0, 576.0, 522.0, 513.0, 473.0, 484.0, 516.0, 516.0, 527.0, 522.0, 522.0, 522.0, 522.0, 464.0, 516.0, 530.0, 516.0, 519.0, 516.0, 519.0, 522.0, 516.0, 476.0, 522.0, 513.0, 510.0, 522.0, 519.0, 555.0, 522.0, 507.0, 522.0, 525.0, 573.0, 519.0, 573.0, 470.0, 522.0, 456.0, 516.0, 525.0, 525.0, 513.0, 379.0, 464.0, 567.0, 522.0, 510.0, 476.0, 524.0, 522.0, 513.0, 516.0, 522.0, 576.0, 573.0, 522.0, 530.0, 576.0, 522.0, 576.0, 513.0, 522.0, 421.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 247.0, 289.0, 290.0, 254.0, 276.0, 269.0, 250.0, 262.0, 263.0, 259.0, 263.0, 259.0, 260.0, 216.0, 197.0, 295.0, 278.0, 270.0, 249.0, 259.0, 263.0, 266.0, 253.0, 239.0, 231.0, 268.0, 254.0, 282.0, 291.0, 252.0, 261.0, 260.0, 262.0, 254.0, 259.0, 256.0, 263.0, 213.0, 237.0, 253.0, 260.0, 268.0, 254.0, 88.0, 92.0, 231.0, 245.0, 248.0, 267.0, 268.0, 262.0, 264.0, 255.0, 266.0, 238.0, 264.0, 266.0, 256.0, 260.0, 267.0, 252.0, 269.0, 261.0, 204.0, 200.0, 264.0, 255.0, 251.0, 265.0, 254.0, 265.0, 281.0, 295.0, 266.0, 256.0, 262.0, 251.0, 234.0, 239.0, 243.0, 241.0, 268.0, 248.0, 255.0, 261.0, 257.0, 270.0, 269.0, 253.0, 260.0, 262.0, 268.0, 254.0, 254.0, 268.0, 236.0, 228.0, 251.0, 265.0, 271.0, 259.0, 272.0, 244.0, 259.0, 260.0, 254.0, 262.0, 246.0, 273.0, 262.0, 260.0, 264.0, 252.0, 240.0, 236.0, 255.0, 267.0, 262.0, 251.0, 248.0, 262.0, 261.0, 261.0, 253.0, 266.0, 268.0, 287.0, 253.0, 269.0, 261.0, 246.0, 267.0, 255.0, 256.0, 269.0, 292.0, 281.0, 260.0, 259.0, 271.0, 302.0, 237.0, 233.0, 273.0, 249.0, 218.0, 238.0, 249.0, 267.0, 257.0, 268.0, 272.0, 253.0, 248.0, 265.0, 184.0, 195.0, 233.0, 231.0, 283.0, 284.0, 276.0, 246.0, 267.0, 243.0, 236.0, 240.0, 268.0, 256.0, 266.0, 256.0, 264.0, 249.0, 269.0, 247.0, 277.0, 245.0, 289.0, 287.0, 284.0, 289.0, 265.0, 257.0, 275.0, 255.0, 283.0, 293.0, 267.0, 255.0, 296.0, 280.0, 257.0, 256.0, 267.0, 255.0, 216.0, 205.0, 270.0, 246.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684376433534071, "mean_inference_ms": 1.2074014996959537, "mean_action_processing_ms": 0.1324722504419282, "mean_env_wait_ms": 0.8430133635583676, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 579.0, "episode_reward_min": 180.0, "episode_reward_mean": 512.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 256.15}, "hist_stats": {"episode_reward": [516.0, 579.0, 530.0, 519.0, 525.0, 522.0, 519.0, 413.0, 573.0, 519.0, 522.0, 519.0, 470.0, 522.0, 573.0, 513.0, 522.0, 513.0, 519.0, 450.0, 513.0, 522.0, 180.0, 476.0, 515.0, 530.0, 519.0, 504.0, 530.0, 516.0, 519.0, 530.0, 404.0, 519.0, 516.0, 519.0, 576.0, 522.0, 513.0, 473.0, 484.0, 516.0, 516.0, 527.0, 522.0, 522.0, 522.0, 522.0, 464.0, 516.0, 530.0, 516.0, 519.0, 516.0, 519.0, 522.0, 516.0, 476.0, 522.0, 513.0, 510.0, 522.0, 519.0, 555.0, 522.0, 507.0, 522.0, 525.0, 573.0, 519.0, 573.0, 470.0, 522.0, 456.0, 516.0, 525.0, 525.0, 513.0, 379.0, 464.0, 567.0, 522.0, 510.0, 476.0, 524.0, 522.0, 513.0, 516.0, 522.0, 576.0, 573.0, 522.0, 530.0, 576.0, 522.0, 576.0, 513.0, 522.0, 421.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 247.0, 289.0, 290.0, 254.0, 276.0, 269.0, 250.0, 262.0, 263.0, 259.0, 263.0, 259.0, 260.0, 216.0, 197.0, 295.0, 278.0, 270.0, 249.0, 259.0, 263.0, 266.0, 253.0, 239.0, 231.0, 268.0, 254.0, 282.0, 291.0, 252.0, 261.0, 260.0, 262.0, 254.0, 259.0, 256.0, 263.0, 213.0, 237.0, 253.0, 260.0, 268.0, 254.0, 88.0, 92.0, 231.0, 245.0, 248.0, 267.0, 268.0, 262.0, 264.0, 255.0, 266.0, 238.0, 264.0, 266.0, 256.0, 260.0, 267.0, 252.0, 269.0, 261.0, 204.0, 200.0, 264.0, 255.0, 251.0, 265.0, 254.0, 265.0, 281.0, 295.0, 266.0, 256.0, 262.0, 251.0, 234.0, 239.0, 243.0, 241.0, 268.0, 248.0, 255.0, 261.0, 257.0, 270.0, 269.0, 253.0, 260.0, 262.0, 268.0, 254.0, 254.0, 268.0, 236.0, 228.0, 251.0, 265.0, 271.0, 259.0, 272.0, 244.0, 259.0, 260.0, 254.0, 262.0, 246.0, 273.0, 262.0, 260.0, 264.0, 252.0, 240.0, 236.0, 255.0, 267.0, 262.0, 251.0, 248.0, 262.0, 261.0, 261.0, 253.0, 266.0, 268.0, 287.0, 253.0, 269.0, 261.0, 246.0, 267.0, 255.0, 256.0, 269.0, 292.0, 281.0, 260.0, 259.0, 271.0, 302.0, 237.0, 233.0, 273.0, 249.0, 218.0, 238.0, 249.0, 267.0, 257.0, 268.0, 272.0, 253.0, 248.0, 265.0, 184.0, 195.0, 233.0, 231.0, 283.0, 284.0, 276.0, 246.0, 267.0, 243.0, 236.0, 240.0, 268.0, 256.0, 266.0, 256.0, 264.0, 249.0, 269.0, 247.0, 277.0, 245.0, 289.0, 287.0, 284.0, 289.0, 265.0, 257.0, 275.0, 255.0, 283.0, 293.0, 267.0, 255.0, 296.0, 280.0, 257.0, 256.0, 267.0, 255.0, 216.0, 205.0, 270.0, 246.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684376433534071, "mean_inference_ms": 1.2074014996959537, "mean_action_processing_ms": 0.1324722504419282, "mean_env_wait_ms": 0.8430133635583676, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3404800, "num_agent_steps_trained": 3404800, "num_env_steps_sampled": 1702400, "num_env_steps_trained": 1702400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1702400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3404800, "timers": {"training_iteration_time_ms": 3677.997, "learn_time_ms": 1133.053, "learn_throughput": 11296.916, "synch_weights_time_ms": 11.447}, "counters": {"num_env_steps_sampled": 1702400, "num_env_steps_trained": 1702400, "num_agent_steps_sampled": 3404800, "num_agent_steps_trained": 3404800}, "done": false, "episodes_total": 4256, "training_iteration": 133, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-36", "timestamp": 1666580916, "time_this_iter_s": 3.7330322265625, "time_total_s": 500.42454838752747, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 500.42454838752747, "timesteps_since_restore": 0, "iterations_since_restore": 133, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.92, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 180.4, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 159.85, "shaped_reward_min": 124, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.09, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.03, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.72, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 15.44, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.46, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.25, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 15.1, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.03, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.16, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.41, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.75, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.39, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.25, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 15.1, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.25, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 15.1, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002365380059927702, "policy_loss": -0.002549594035372138, "vf_loss": 7.225664138793945, "vf_explained_var": 0.7080831527709961, "kl": 0.0018610151018947363, "entropy": 1.0767040252685547, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1715200, "num_env_steps_trained": 1715200, "num_agent_steps_sampled": 3430400, "num_agent_steps_trained": 3430400}, "sampler_results": {"episode_reward_max": 579.0, "episode_reward_min": 379.0, "episode_reward_mean": 520.65, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 184.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 260.325}, "custom_metrics": {"sparse_reward_mean": 180.4, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 159.85, "shaped_reward_min": 124, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.09, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.03, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.72, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 15.44, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.46, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.25, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 15.1, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.03, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.16, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.41, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.75, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.39, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.25, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 15.1, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.25, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 15.1, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [404.0, 519.0, 516.0, 519.0, 576.0, 522.0, 513.0, 473.0, 484.0, 516.0, 516.0, 527.0, 522.0, 522.0, 522.0, 522.0, 464.0, 516.0, 530.0, 516.0, 519.0, 516.0, 519.0, 522.0, 516.0, 476.0, 522.0, 513.0, 510.0, 522.0, 519.0, 555.0, 522.0, 507.0, 522.0, 525.0, 573.0, 519.0, 573.0, 470.0, 522.0, 456.0, 516.0, 525.0, 525.0, 513.0, 379.0, 464.0, 567.0, 522.0, 510.0, 476.0, 524.0, 522.0, 513.0, 516.0, 522.0, 576.0, 573.0, 522.0, 530.0, 576.0, 522.0, 576.0, 513.0, 522.0, 421.0, 516.0, 519.0, 516.0, 519.0, 504.0, 576.0, 530.0, 519.0, 525.0, 507.0, 522.0, 573.0, 573.0, 573.0, 527.0, 522.0, 573.0, 521.0, 522.0, 504.0, 519.0, 522.0, 579.0, 522.0, 570.0, 516.0, 570.0, 530.0, 522.0, 501.0, 519.0, 513.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [204.0, 200.0, 264.0, 255.0, 251.0, 265.0, 254.0, 265.0, 281.0, 295.0, 266.0, 256.0, 262.0, 251.0, 234.0, 239.0, 243.0, 241.0, 268.0, 248.0, 255.0, 261.0, 257.0, 270.0, 269.0, 253.0, 260.0, 262.0, 268.0, 254.0, 254.0, 268.0, 236.0, 228.0, 251.0, 265.0, 271.0, 259.0, 272.0, 244.0, 259.0, 260.0, 254.0, 262.0, 246.0, 273.0, 262.0, 260.0, 264.0, 252.0, 240.0, 236.0, 255.0, 267.0, 262.0, 251.0, 248.0, 262.0, 261.0, 261.0, 253.0, 266.0, 268.0, 287.0, 253.0, 269.0, 261.0, 246.0, 267.0, 255.0, 256.0, 269.0, 292.0, 281.0, 260.0, 259.0, 271.0, 302.0, 237.0, 233.0, 273.0, 249.0, 218.0, 238.0, 249.0, 267.0, 257.0, 268.0, 272.0, 253.0, 248.0, 265.0, 184.0, 195.0, 233.0, 231.0, 283.0, 284.0, 276.0, 246.0, 267.0, 243.0, 236.0, 240.0, 268.0, 256.0, 266.0, 256.0, 264.0, 249.0, 269.0, 247.0, 277.0, 245.0, 289.0, 287.0, 284.0, 289.0, 265.0, 257.0, 275.0, 255.0, 283.0, 293.0, 267.0, 255.0, 296.0, 280.0, 257.0, 256.0, 267.0, 255.0, 216.0, 205.0, 270.0, 246.0, 263.0, 256.0, 260.0, 256.0, 263.0, 256.0, 244.0, 260.0, 280.0, 296.0, 258.0, 272.0, 271.0, 248.0, 289.0, 236.0, 243.0, 264.0, 254.0, 268.0, 286.0, 287.0, 283.0, 290.0, 280.0, 293.0, 265.0, 262.0, 270.0, 252.0, 290.0, 283.0, 250.0, 271.0, 264.0, 258.0, 240.0, 264.0, 261.0, 258.0, 253.0, 269.0, 278.0, 301.0, 265.0, 257.0, 280.0, 290.0, 258.0, 258.0, 292.0, 278.0, 264.0, 266.0, 261.0, 261.0, 253.0, 248.0, 264.0, 255.0, 252.0, 261.0, 249.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6842959995242882, "mean_inference_ms": 1.2073346375744007, "mean_action_processing_ms": 0.13245642360808568, "mean_env_wait_ms": 0.8428054226282944, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 579.0, "episode_reward_min": 379.0, "episode_reward_mean": 520.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 184.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 260.325}, "hist_stats": {"episode_reward": [404.0, 519.0, 516.0, 519.0, 576.0, 522.0, 513.0, 473.0, 484.0, 516.0, 516.0, 527.0, 522.0, 522.0, 522.0, 522.0, 464.0, 516.0, 530.0, 516.0, 519.0, 516.0, 519.0, 522.0, 516.0, 476.0, 522.0, 513.0, 510.0, 522.0, 519.0, 555.0, 522.0, 507.0, 522.0, 525.0, 573.0, 519.0, 573.0, 470.0, 522.0, 456.0, 516.0, 525.0, 525.0, 513.0, 379.0, 464.0, 567.0, 522.0, 510.0, 476.0, 524.0, 522.0, 513.0, 516.0, 522.0, 576.0, 573.0, 522.0, 530.0, 576.0, 522.0, 576.0, 513.0, 522.0, 421.0, 516.0, 519.0, 516.0, 519.0, 504.0, 576.0, 530.0, 519.0, 525.0, 507.0, 522.0, 573.0, 573.0, 573.0, 527.0, 522.0, 573.0, 521.0, 522.0, 504.0, 519.0, 522.0, 579.0, 522.0, 570.0, 516.0, 570.0, 530.0, 522.0, 501.0, 519.0, 513.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [204.0, 200.0, 264.0, 255.0, 251.0, 265.0, 254.0, 265.0, 281.0, 295.0, 266.0, 256.0, 262.0, 251.0, 234.0, 239.0, 243.0, 241.0, 268.0, 248.0, 255.0, 261.0, 257.0, 270.0, 269.0, 253.0, 260.0, 262.0, 268.0, 254.0, 254.0, 268.0, 236.0, 228.0, 251.0, 265.0, 271.0, 259.0, 272.0, 244.0, 259.0, 260.0, 254.0, 262.0, 246.0, 273.0, 262.0, 260.0, 264.0, 252.0, 240.0, 236.0, 255.0, 267.0, 262.0, 251.0, 248.0, 262.0, 261.0, 261.0, 253.0, 266.0, 268.0, 287.0, 253.0, 269.0, 261.0, 246.0, 267.0, 255.0, 256.0, 269.0, 292.0, 281.0, 260.0, 259.0, 271.0, 302.0, 237.0, 233.0, 273.0, 249.0, 218.0, 238.0, 249.0, 267.0, 257.0, 268.0, 272.0, 253.0, 248.0, 265.0, 184.0, 195.0, 233.0, 231.0, 283.0, 284.0, 276.0, 246.0, 267.0, 243.0, 236.0, 240.0, 268.0, 256.0, 266.0, 256.0, 264.0, 249.0, 269.0, 247.0, 277.0, 245.0, 289.0, 287.0, 284.0, 289.0, 265.0, 257.0, 275.0, 255.0, 283.0, 293.0, 267.0, 255.0, 296.0, 280.0, 257.0, 256.0, 267.0, 255.0, 216.0, 205.0, 270.0, 246.0, 263.0, 256.0, 260.0, 256.0, 263.0, 256.0, 244.0, 260.0, 280.0, 296.0, 258.0, 272.0, 271.0, 248.0, 289.0, 236.0, 243.0, 264.0, 254.0, 268.0, 286.0, 287.0, 283.0, 290.0, 280.0, 293.0, 265.0, 262.0, 270.0, 252.0, 290.0, 283.0, 250.0, 271.0, 264.0, 258.0, 240.0, 264.0, 261.0, 258.0, 253.0, 269.0, 278.0, 301.0, 265.0, 257.0, 280.0, 290.0, 258.0, 258.0, 292.0, 278.0, 264.0, 266.0, 261.0, 261.0, 253.0, 248.0, 264.0, 255.0, 252.0, 261.0, 249.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6842959995242882, "mean_inference_ms": 1.2073346375744007, "mean_action_processing_ms": 0.13245642360808568, "mean_env_wait_ms": 0.8428054226282944, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3430400, "num_agent_steps_trained": 3430400, "num_env_steps_sampled": 1715200, "num_env_steps_trained": 1715200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1715200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3430400, "timers": {"training_iteration_time_ms": 3671.115, "learn_time_ms": 1132.114, "learn_throughput": 11306.28, "synch_weights_time_ms": 12.164}, "counters": {"num_env_steps_sampled": 1715200, "num_env_steps_trained": 1715200, "num_agent_steps_sampled": 3430400, "num_agent_steps_trained": 3430400}, "done": false, "episodes_total": 4288, "training_iteration": 134, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-40", "timestamp": 1666580920, "time_this_iter_s": 3.6687631607055664, "time_total_s": 504.09331154823303, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 504.09331154823303, "timesteps_since_restore": 0, "iterations_since_restore": 134, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.883333333333333, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 181.0, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 159.96, "shaped_reward_min": 60, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.07, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.95, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.67, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.44, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.44, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.42, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.25, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.12, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.58, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.35, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.85, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.29, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.25, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.12, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.25, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.12, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008085625013336539, "policy_loss": -0.0010100638028234243, "vf_loss": 7.388925552368164, "vf_explained_var": 0.7019363641738892, "kl": 0.0018008106853812933, "entropy": 1.074782371520996, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1728000, "num_env_steps_trained": 1728000, "num_agent_steps_sampled": 3456000, "num_agent_steps_trained": 3456000}, "sampler_results": {"episode_reward_max": 579.0, "episode_reward_min": 180.0, "episode_reward_mean": 521.96, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 260.98}, "custom_metrics": {"sparse_reward_mean": 181.0, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 159.96, "shaped_reward_min": 60, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.07, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.95, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.67, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.44, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.44, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.42, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.25, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.12, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.58, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.35, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.85, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.29, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.25, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.12, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.25, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.12, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 507.0, 522.0, 525.0, 573.0, 519.0, 573.0, 470.0, 522.0, 456.0, 516.0, 525.0, 525.0, 513.0, 379.0, 464.0, 567.0, 522.0, 510.0, 476.0, 524.0, 522.0, 513.0, 516.0, 522.0, 576.0, 573.0, 522.0, 530.0, 576.0, 522.0, 576.0, 513.0, 522.0, 421.0, 516.0, 519.0, 516.0, 519.0, 504.0, 576.0, 530.0, 519.0, 525.0, 507.0, 522.0, 573.0, 573.0, 573.0, 527.0, 522.0, 573.0, 521.0, 522.0, 504.0, 519.0, 522.0, 579.0, 522.0, 570.0, 516.0, 570.0, 530.0, 522.0, 501.0, 519.0, 513.0, 519.0, 522.0, 573.0, 573.0, 567.0, 561.0, 567.0, 473.0, 484.0, 516.0, 180.0, 516.0, 519.0, 519.0, 521.0, 530.0, 525.0, 525.0, 570.0, 465.0, 573.0, 527.0, 522.0, 501.0, 527.0, 524.0, 518.0, 498.0, 516.0, 519.0, 522.0, 513.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [253.0, 269.0, 261.0, 246.0, 267.0, 255.0, 256.0, 269.0, 292.0, 281.0, 260.0, 259.0, 271.0, 302.0, 237.0, 233.0, 273.0, 249.0, 218.0, 238.0, 249.0, 267.0, 257.0, 268.0, 272.0, 253.0, 248.0, 265.0, 184.0, 195.0, 233.0, 231.0, 283.0, 284.0, 276.0, 246.0, 267.0, 243.0, 236.0, 240.0, 268.0, 256.0, 266.0, 256.0, 264.0, 249.0, 269.0, 247.0, 277.0, 245.0, 289.0, 287.0, 284.0, 289.0, 265.0, 257.0, 275.0, 255.0, 283.0, 293.0, 267.0, 255.0, 296.0, 280.0, 257.0, 256.0, 267.0, 255.0, 216.0, 205.0, 270.0, 246.0, 263.0, 256.0, 260.0, 256.0, 263.0, 256.0, 244.0, 260.0, 280.0, 296.0, 258.0, 272.0, 271.0, 248.0, 289.0, 236.0, 243.0, 264.0, 254.0, 268.0, 286.0, 287.0, 283.0, 290.0, 280.0, 293.0, 265.0, 262.0, 270.0, 252.0, 290.0, 283.0, 250.0, 271.0, 264.0, 258.0, 240.0, 264.0, 261.0, 258.0, 253.0, 269.0, 278.0, 301.0, 265.0, 257.0, 280.0, 290.0, 258.0, 258.0, 292.0, 278.0, 264.0, 266.0, 261.0, 261.0, 253.0, 248.0, 264.0, 255.0, 252.0, 261.0, 249.0, 270.0, 258.0, 264.0, 295.0, 278.0, 281.0, 292.0, 284.0, 283.0, 278.0, 283.0, 275.0, 292.0, 232.0, 241.0, 263.0, 221.0, 259.0, 257.0, 91.0, 89.0, 270.0, 246.0, 261.0, 258.0, 272.0, 247.0, 271.0, 250.0, 262.0, 268.0, 263.0, 262.0, 260.0, 265.0, 293.0, 277.0, 226.0, 239.0, 283.0, 290.0, 264.0, 263.0, 251.0, 271.0, 257.0, 244.0, 277.0, 250.0, 260.0, 264.0, 276.0, 242.0, 242.0, 256.0, 255.0, 261.0, 255.0, 264.0, 251.0, 271.0, 261.0, 252.0, 299.0, 274.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6843184248913212, "mean_inference_ms": 1.2072503599124262, "mean_action_processing_ms": 0.13244891511509294, "mean_env_wait_ms": 0.8425490004197971, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 579.0, "episode_reward_min": 180.0, "episode_reward_mean": 521.96, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 260.98}, "hist_stats": {"episode_reward": [522.0, 507.0, 522.0, 525.0, 573.0, 519.0, 573.0, 470.0, 522.0, 456.0, 516.0, 525.0, 525.0, 513.0, 379.0, 464.0, 567.0, 522.0, 510.0, 476.0, 524.0, 522.0, 513.0, 516.0, 522.0, 576.0, 573.0, 522.0, 530.0, 576.0, 522.0, 576.0, 513.0, 522.0, 421.0, 516.0, 519.0, 516.0, 519.0, 504.0, 576.0, 530.0, 519.0, 525.0, 507.0, 522.0, 573.0, 573.0, 573.0, 527.0, 522.0, 573.0, 521.0, 522.0, 504.0, 519.0, 522.0, 579.0, 522.0, 570.0, 516.0, 570.0, 530.0, 522.0, 501.0, 519.0, 513.0, 519.0, 522.0, 573.0, 573.0, 567.0, 561.0, 567.0, 473.0, 484.0, 516.0, 180.0, 516.0, 519.0, 519.0, 521.0, 530.0, 525.0, 525.0, 570.0, 465.0, 573.0, 527.0, 522.0, 501.0, 527.0, 524.0, 518.0, 498.0, 516.0, 519.0, 522.0, 513.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [253.0, 269.0, 261.0, 246.0, 267.0, 255.0, 256.0, 269.0, 292.0, 281.0, 260.0, 259.0, 271.0, 302.0, 237.0, 233.0, 273.0, 249.0, 218.0, 238.0, 249.0, 267.0, 257.0, 268.0, 272.0, 253.0, 248.0, 265.0, 184.0, 195.0, 233.0, 231.0, 283.0, 284.0, 276.0, 246.0, 267.0, 243.0, 236.0, 240.0, 268.0, 256.0, 266.0, 256.0, 264.0, 249.0, 269.0, 247.0, 277.0, 245.0, 289.0, 287.0, 284.0, 289.0, 265.0, 257.0, 275.0, 255.0, 283.0, 293.0, 267.0, 255.0, 296.0, 280.0, 257.0, 256.0, 267.0, 255.0, 216.0, 205.0, 270.0, 246.0, 263.0, 256.0, 260.0, 256.0, 263.0, 256.0, 244.0, 260.0, 280.0, 296.0, 258.0, 272.0, 271.0, 248.0, 289.0, 236.0, 243.0, 264.0, 254.0, 268.0, 286.0, 287.0, 283.0, 290.0, 280.0, 293.0, 265.0, 262.0, 270.0, 252.0, 290.0, 283.0, 250.0, 271.0, 264.0, 258.0, 240.0, 264.0, 261.0, 258.0, 253.0, 269.0, 278.0, 301.0, 265.0, 257.0, 280.0, 290.0, 258.0, 258.0, 292.0, 278.0, 264.0, 266.0, 261.0, 261.0, 253.0, 248.0, 264.0, 255.0, 252.0, 261.0, 249.0, 270.0, 258.0, 264.0, 295.0, 278.0, 281.0, 292.0, 284.0, 283.0, 278.0, 283.0, 275.0, 292.0, 232.0, 241.0, 263.0, 221.0, 259.0, 257.0, 91.0, 89.0, 270.0, 246.0, 261.0, 258.0, 272.0, 247.0, 271.0, 250.0, 262.0, 268.0, 263.0, 262.0, 260.0, 265.0, 293.0, 277.0, 226.0, 239.0, 283.0, 290.0, 264.0, 263.0, 251.0, 271.0, 257.0, 244.0, 277.0, 250.0, 260.0, 264.0, 276.0, 242.0, 242.0, 256.0, 255.0, 261.0, 255.0, 264.0, 251.0, 271.0, 261.0, 252.0, 299.0, 274.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6843184248913212, "mean_inference_ms": 1.2072503599124262, "mean_action_processing_ms": 0.13244891511509294, "mean_env_wait_ms": 0.8425490004197971, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3456000, "num_agent_steps_trained": 3456000, "num_env_steps_sampled": 1728000, "num_env_steps_trained": 1728000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1728000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3456000, "timers": {"training_iteration_time_ms": 3693.588, "learn_time_ms": 1146.458, "learn_throughput": 11164.821, "synch_weights_time_ms": 12.713}, "counters": {"num_env_steps_sampled": 1728000, "num_env_steps_trained": 1728000, "num_agent_steps_sampled": 3456000, "num_agent_steps_trained": 3456000}, "done": false, "episodes_total": 4320, "training_iteration": 135, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-44", "timestamp": 1666580924, "time_this_iter_s": 3.7817649841308594, "time_total_s": 507.8750765323639, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 507.8750765323639, "timesteps_since_restore": 0, "iterations_since_restore": 135, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.72, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 183.6, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 161.99, "shaped_reward_min": 60, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.39, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.75, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.92, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.27, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.61, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.1, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.26, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.42, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.79, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.7, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.61, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.1, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.61, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.1, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0024380050599575043, "policy_loss": -0.002622002735733986, "vf_loss": 7.156221389770508, "vf_explained_var": 0.6949341297149658, "kl": 0.001646326039917767, "entropy": 1.0632463693618774, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1740800, "num_env_steps_trained": 1740800, "num_agent_steps_sampled": 3481600, "num_agent_steps_trained": 3481600}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 180.0, "episode_reward_mean": 529.19, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 301.0}, "policy_reward_mean": {"ppo": 264.595}, "custom_metrics": {"sparse_reward_mean": 183.6, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 161.99, "shaped_reward_min": 60, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.39, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.75, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.92, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.27, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.61, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.1, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.26, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.42, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.79, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.7, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.61, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.1, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.61, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.1, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [513.0, 522.0, 421.0, 516.0, 519.0, 516.0, 519.0, 504.0, 576.0, 530.0, 519.0, 525.0, 507.0, 522.0, 573.0, 573.0, 573.0, 527.0, 522.0, 573.0, 521.0, 522.0, 504.0, 519.0, 522.0, 579.0, 522.0, 570.0, 516.0, 570.0, 530.0, 522.0, 501.0, 519.0, 513.0, 519.0, 522.0, 573.0, 573.0, 567.0, 561.0, 567.0, 473.0, 484.0, 516.0, 180.0, 516.0, 519.0, 519.0, 521.0, 530.0, 525.0, 525.0, 570.0, 465.0, 573.0, 527.0, 522.0, 501.0, 527.0, 524.0, 518.0, 498.0, 516.0, 519.0, 522.0, 513.0, 573.0, 579.0, 516.0, 525.0, 579.0, 567.0, 522.0, 570.0, 570.0, 530.0, 522.0, 573.0, 573.0, 530.0, 522.0, 519.0, 582.0, 525.0, 530.0, 525.0, 515.0, 567.0, 522.0, 522.0, 576.0, 519.0, 573.0, 573.0, 525.0, 576.0, 510.0, 522.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 256.0, 267.0, 255.0, 216.0, 205.0, 270.0, 246.0, 263.0, 256.0, 260.0, 256.0, 263.0, 256.0, 244.0, 260.0, 280.0, 296.0, 258.0, 272.0, 271.0, 248.0, 289.0, 236.0, 243.0, 264.0, 254.0, 268.0, 286.0, 287.0, 283.0, 290.0, 280.0, 293.0, 265.0, 262.0, 270.0, 252.0, 290.0, 283.0, 250.0, 271.0, 264.0, 258.0, 240.0, 264.0, 261.0, 258.0, 253.0, 269.0, 278.0, 301.0, 265.0, 257.0, 280.0, 290.0, 258.0, 258.0, 292.0, 278.0, 264.0, 266.0, 261.0, 261.0, 253.0, 248.0, 264.0, 255.0, 252.0, 261.0, 249.0, 270.0, 258.0, 264.0, 295.0, 278.0, 281.0, 292.0, 284.0, 283.0, 278.0, 283.0, 275.0, 292.0, 232.0, 241.0, 263.0, 221.0, 259.0, 257.0, 91.0, 89.0, 270.0, 246.0, 261.0, 258.0, 272.0, 247.0, 271.0, 250.0, 262.0, 268.0, 263.0, 262.0, 260.0, 265.0, 293.0, 277.0, 226.0, 239.0, 283.0, 290.0, 264.0, 263.0, 251.0, 271.0, 257.0, 244.0, 277.0, 250.0, 260.0, 264.0, 276.0, 242.0, 242.0, 256.0, 255.0, 261.0, 255.0, 264.0, 251.0, 271.0, 261.0, 252.0, 299.0, 274.0, 288.0, 291.0, 267.0, 249.0, 262.0, 263.0, 282.0, 297.0, 275.0, 292.0, 255.0, 267.0, 287.0, 283.0, 290.0, 280.0, 263.0, 267.0, 253.0, 269.0, 286.0, 287.0, 279.0, 294.0, 263.0, 267.0, 263.0, 259.0, 259.0, 260.0, 288.0, 294.0, 261.0, 264.0, 260.0, 270.0, 260.0, 265.0, 262.0, 253.0, 279.0, 288.0, 250.0, 272.0, 267.0, 255.0, 290.0, 286.0, 271.0, 248.0, 277.0, 296.0, 293.0, 280.0, 252.0, 273.0, 276.0, 300.0, 261.0, 249.0, 252.0, 270.0, 255.0, 267.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6843760468855757, "mean_inference_ms": 1.2071599936525805, "mean_action_processing_ms": 0.1324412382116291, "mean_env_wait_ms": 0.8422914478679702, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 180.0, "episode_reward_mean": 529.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 301.0}, "policy_reward_mean": {"ppo": 264.595}, "hist_stats": {"episode_reward": [513.0, 522.0, 421.0, 516.0, 519.0, 516.0, 519.0, 504.0, 576.0, 530.0, 519.0, 525.0, 507.0, 522.0, 573.0, 573.0, 573.0, 527.0, 522.0, 573.0, 521.0, 522.0, 504.0, 519.0, 522.0, 579.0, 522.0, 570.0, 516.0, 570.0, 530.0, 522.0, 501.0, 519.0, 513.0, 519.0, 522.0, 573.0, 573.0, 567.0, 561.0, 567.0, 473.0, 484.0, 516.0, 180.0, 516.0, 519.0, 519.0, 521.0, 530.0, 525.0, 525.0, 570.0, 465.0, 573.0, 527.0, 522.0, 501.0, 527.0, 524.0, 518.0, 498.0, 516.0, 519.0, 522.0, 513.0, 573.0, 579.0, 516.0, 525.0, 579.0, 567.0, 522.0, 570.0, 570.0, 530.0, 522.0, 573.0, 573.0, 530.0, 522.0, 519.0, 582.0, 525.0, 530.0, 525.0, 515.0, 567.0, 522.0, 522.0, 576.0, 519.0, 573.0, 573.0, 525.0, 576.0, 510.0, 522.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 256.0, 267.0, 255.0, 216.0, 205.0, 270.0, 246.0, 263.0, 256.0, 260.0, 256.0, 263.0, 256.0, 244.0, 260.0, 280.0, 296.0, 258.0, 272.0, 271.0, 248.0, 289.0, 236.0, 243.0, 264.0, 254.0, 268.0, 286.0, 287.0, 283.0, 290.0, 280.0, 293.0, 265.0, 262.0, 270.0, 252.0, 290.0, 283.0, 250.0, 271.0, 264.0, 258.0, 240.0, 264.0, 261.0, 258.0, 253.0, 269.0, 278.0, 301.0, 265.0, 257.0, 280.0, 290.0, 258.0, 258.0, 292.0, 278.0, 264.0, 266.0, 261.0, 261.0, 253.0, 248.0, 264.0, 255.0, 252.0, 261.0, 249.0, 270.0, 258.0, 264.0, 295.0, 278.0, 281.0, 292.0, 284.0, 283.0, 278.0, 283.0, 275.0, 292.0, 232.0, 241.0, 263.0, 221.0, 259.0, 257.0, 91.0, 89.0, 270.0, 246.0, 261.0, 258.0, 272.0, 247.0, 271.0, 250.0, 262.0, 268.0, 263.0, 262.0, 260.0, 265.0, 293.0, 277.0, 226.0, 239.0, 283.0, 290.0, 264.0, 263.0, 251.0, 271.0, 257.0, 244.0, 277.0, 250.0, 260.0, 264.0, 276.0, 242.0, 242.0, 256.0, 255.0, 261.0, 255.0, 264.0, 251.0, 271.0, 261.0, 252.0, 299.0, 274.0, 288.0, 291.0, 267.0, 249.0, 262.0, 263.0, 282.0, 297.0, 275.0, 292.0, 255.0, 267.0, 287.0, 283.0, 290.0, 280.0, 263.0, 267.0, 253.0, 269.0, 286.0, 287.0, 279.0, 294.0, 263.0, 267.0, 263.0, 259.0, 259.0, 260.0, 288.0, 294.0, 261.0, 264.0, 260.0, 270.0, 260.0, 265.0, 262.0, 253.0, 279.0, 288.0, 250.0, 272.0, 267.0, 255.0, 290.0, 286.0, 271.0, 248.0, 277.0, 296.0, 293.0, 280.0, 252.0, 273.0, 276.0, 300.0, 261.0, 249.0, 252.0, 270.0, 255.0, 267.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6843760468855757, "mean_inference_ms": 1.2071599936525805, "mean_action_processing_ms": 0.1324412382116291, "mean_env_wait_ms": 0.8422914478679702, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3481600, "num_agent_steps_trained": 3481600, "num_env_steps_sampled": 1740800, "num_env_steps_trained": 1740800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1740800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3481600, "timers": {"training_iteration_time_ms": 3693.703, "learn_time_ms": 1147.257, "learn_throughput": 11157.045, "synch_weights_time_ms": 13.631}, "counters": {"num_env_steps_sampled": 1740800, "num_env_steps_trained": 1740800, "num_agent_steps_sampled": 3481600, "num_agent_steps_trained": 3481600}, "done": false, "episodes_total": 4352, "training_iteration": 136, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-48", "timestamp": 1666580928, "time_this_iter_s": 3.640089273452759, "time_total_s": 511.51516580581665, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 511.51516580581665, "timesteps_since_restore": 0, "iterations_since_restore": 136, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.866666666666667, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 184.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 163.23, "shaped_reward_min": 60, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.36, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.84, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.88, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.53, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.38, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.67, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.17, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.17, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.41, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.35, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.71, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.63, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.67, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.17, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.67, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.17, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0016262221615761518, "policy_loss": -0.0018370456527918577, "vf_loss": 7.367179870605469, "vf_explained_var": 0.6974908113479614, "kl": 0.001954792533069849, "entropy": 1.0517871379852295, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1753600, "num_env_steps_trained": 1753600, "num_agent_steps_sampled": 3507200, "num_agent_steps_trained": 3507200}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 180.0, "episode_reward_mean": 531.63, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 311.0}, "policy_reward_mean": {"ppo": 265.815}, "custom_metrics": {"sparse_reward_mean": 184.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 163.23, "shaped_reward_min": 60, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.36, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.84, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.88, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.53, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.38, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.67, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.17, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.17, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.41, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.35, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.71, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.63, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.67, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.17, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.67, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.17, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [501.0, 519.0, 513.0, 519.0, 522.0, 573.0, 573.0, 567.0, 561.0, 567.0, 473.0, 484.0, 516.0, 180.0, 516.0, 519.0, 519.0, 521.0, 530.0, 525.0, 525.0, 570.0, 465.0, 573.0, 527.0, 522.0, 501.0, 527.0, 524.0, 518.0, 498.0, 516.0, 519.0, 522.0, 513.0, 573.0, 579.0, 516.0, 525.0, 579.0, 567.0, 522.0, 570.0, 570.0, 530.0, 522.0, 573.0, 573.0, 530.0, 522.0, 519.0, 582.0, 525.0, 530.0, 525.0, 515.0, 567.0, 522.0, 522.0, 576.0, 519.0, 573.0, 573.0, 525.0, 576.0, 510.0, 522.0, 522.0, 576.0, 525.0, 576.0, 522.0, 573.0, 573.0, 582.0, 573.0, 527.0, 519.0, 579.0, 525.0, 533.0, 510.0, 516.0, 519.0, 573.0, 570.0, 522.0, 516.0, 495.0, 525.0, 515.0, 582.0, 576.0, 465.0, 573.0, 576.0, 516.0, 576.0, 490.0, 393.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [253.0, 248.0, 264.0, 255.0, 252.0, 261.0, 249.0, 270.0, 258.0, 264.0, 295.0, 278.0, 281.0, 292.0, 284.0, 283.0, 278.0, 283.0, 275.0, 292.0, 232.0, 241.0, 263.0, 221.0, 259.0, 257.0, 91.0, 89.0, 270.0, 246.0, 261.0, 258.0, 272.0, 247.0, 271.0, 250.0, 262.0, 268.0, 263.0, 262.0, 260.0, 265.0, 293.0, 277.0, 226.0, 239.0, 283.0, 290.0, 264.0, 263.0, 251.0, 271.0, 257.0, 244.0, 277.0, 250.0, 260.0, 264.0, 276.0, 242.0, 242.0, 256.0, 255.0, 261.0, 255.0, 264.0, 251.0, 271.0, 261.0, 252.0, 299.0, 274.0, 288.0, 291.0, 267.0, 249.0, 262.0, 263.0, 282.0, 297.0, 275.0, 292.0, 255.0, 267.0, 287.0, 283.0, 290.0, 280.0, 263.0, 267.0, 253.0, 269.0, 286.0, 287.0, 279.0, 294.0, 263.0, 267.0, 263.0, 259.0, 259.0, 260.0, 288.0, 294.0, 261.0, 264.0, 260.0, 270.0, 260.0, 265.0, 262.0, 253.0, 279.0, 288.0, 250.0, 272.0, 267.0, 255.0, 290.0, 286.0, 271.0, 248.0, 277.0, 296.0, 293.0, 280.0, 252.0, 273.0, 276.0, 300.0, 261.0, 249.0, 252.0, 270.0, 255.0, 267.0, 290.0, 286.0, 256.0, 269.0, 302.0, 274.0, 253.0, 269.0, 288.0, 285.0, 279.0, 294.0, 271.0, 311.0, 296.0, 277.0, 257.0, 270.0, 266.0, 253.0, 286.0, 293.0, 269.0, 256.0, 263.0, 270.0, 237.0, 273.0, 254.0, 262.0, 267.0, 252.0, 285.0, 288.0, 283.0, 287.0, 265.0, 257.0, 254.0, 262.0, 235.0, 260.0, 267.0, 258.0, 259.0, 256.0, 297.0, 285.0, 285.0, 291.0, 231.0, 234.0, 270.0, 303.0, 290.0, 286.0, 258.0, 258.0, 279.0, 297.0, 235.0, 255.0, 188.0, 205.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684422121241015, "mean_inference_ms": 1.2070898198034101, "mean_action_processing_ms": 0.13243730383591618, "mean_env_wait_ms": 0.8420708101762324, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 180.0, "episode_reward_mean": 531.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 311.0}, "policy_reward_mean": {"ppo": 265.815}, "hist_stats": {"episode_reward": [501.0, 519.0, 513.0, 519.0, 522.0, 573.0, 573.0, 567.0, 561.0, 567.0, 473.0, 484.0, 516.0, 180.0, 516.0, 519.0, 519.0, 521.0, 530.0, 525.0, 525.0, 570.0, 465.0, 573.0, 527.0, 522.0, 501.0, 527.0, 524.0, 518.0, 498.0, 516.0, 519.0, 522.0, 513.0, 573.0, 579.0, 516.0, 525.0, 579.0, 567.0, 522.0, 570.0, 570.0, 530.0, 522.0, 573.0, 573.0, 530.0, 522.0, 519.0, 582.0, 525.0, 530.0, 525.0, 515.0, 567.0, 522.0, 522.0, 576.0, 519.0, 573.0, 573.0, 525.0, 576.0, 510.0, 522.0, 522.0, 576.0, 525.0, 576.0, 522.0, 573.0, 573.0, 582.0, 573.0, 527.0, 519.0, 579.0, 525.0, 533.0, 510.0, 516.0, 519.0, 573.0, 570.0, 522.0, 516.0, 495.0, 525.0, 515.0, 582.0, 576.0, 465.0, 573.0, 576.0, 516.0, 576.0, 490.0, 393.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [253.0, 248.0, 264.0, 255.0, 252.0, 261.0, 249.0, 270.0, 258.0, 264.0, 295.0, 278.0, 281.0, 292.0, 284.0, 283.0, 278.0, 283.0, 275.0, 292.0, 232.0, 241.0, 263.0, 221.0, 259.0, 257.0, 91.0, 89.0, 270.0, 246.0, 261.0, 258.0, 272.0, 247.0, 271.0, 250.0, 262.0, 268.0, 263.0, 262.0, 260.0, 265.0, 293.0, 277.0, 226.0, 239.0, 283.0, 290.0, 264.0, 263.0, 251.0, 271.0, 257.0, 244.0, 277.0, 250.0, 260.0, 264.0, 276.0, 242.0, 242.0, 256.0, 255.0, 261.0, 255.0, 264.0, 251.0, 271.0, 261.0, 252.0, 299.0, 274.0, 288.0, 291.0, 267.0, 249.0, 262.0, 263.0, 282.0, 297.0, 275.0, 292.0, 255.0, 267.0, 287.0, 283.0, 290.0, 280.0, 263.0, 267.0, 253.0, 269.0, 286.0, 287.0, 279.0, 294.0, 263.0, 267.0, 263.0, 259.0, 259.0, 260.0, 288.0, 294.0, 261.0, 264.0, 260.0, 270.0, 260.0, 265.0, 262.0, 253.0, 279.0, 288.0, 250.0, 272.0, 267.0, 255.0, 290.0, 286.0, 271.0, 248.0, 277.0, 296.0, 293.0, 280.0, 252.0, 273.0, 276.0, 300.0, 261.0, 249.0, 252.0, 270.0, 255.0, 267.0, 290.0, 286.0, 256.0, 269.0, 302.0, 274.0, 253.0, 269.0, 288.0, 285.0, 279.0, 294.0, 271.0, 311.0, 296.0, 277.0, 257.0, 270.0, 266.0, 253.0, 286.0, 293.0, 269.0, 256.0, 263.0, 270.0, 237.0, 273.0, 254.0, 262.0, 267.0, 252.0, 285.0, 288.0, 283.0, 287.0, 265.0, 257.0, 254.0, 262.0, 235.0, 260.0, 267.0, 258.0, 259.0, 256.0, 297.0, 285.0, 285.0, 291.0, 231.0, 234.0, 270.0, 303.0, 290.0, 286.0, 258.0, 258.0, 279.0, 297.0, 235.0, 255.0, 188.0, 205.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684422121241015, "mean_inference_ms": 1.2070898198034101, "mean_action_processing_ms": 0.13243730383591618, "mean_env_wait_ms": 0.8420708101762324, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3507200, "num_agent_steps_trained": 3507200, "num_env_steps_sampled": 1753600, "num_env_steps_trained": 1753600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1753600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3507200, "timers": {"training_iteration_time_ms": 3692.907, "learn_time_ms": 1143.935, "learn_throughput": 11189.442, "synch_weights_time_ms": 13.824}, "counters": {"num_env_steps_sampled": 1753600, "num_env_steps_trained": 1753600, "num_agent_steps_sampled": 3507200, "num_agent_steps_trained": 3507200}, "done": false, "episodes_total": 4384, "training_iteration": 137, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-51", "timestamp": 1666580931, "time_this_iter_s": 3.6196677684783936, "time_total_s": 515.134833574295, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 515.134833574295, "timesteps_since_restore": 0, "iterations_since_restore": 137, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.880000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 188.0, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 166.32, "shaped_reward_min": 132, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.72, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.96, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.3, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.64, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.01, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.35, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.07, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.35, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.78, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.12, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.59, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.53, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.01, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.35, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.01, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.35, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0028702523559331894, "policy_loss": -0.0030703365337103605, "vf_loss": 7.255067825317383, "vf_explained_var": 0.7090296745300293, "kl": 0.002137089380994439, "entropy": 1.05084228515625, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1766400, "num_env_steps_trained": 1766400, "num_agent_steps_sampled": 3532800, "num_agent_steps_trained": 3532800}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 393.0, "episode_reward_mean": 542.32, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 188.0}, "policy_reward_max": {"ppo": 311.0}, "policy_reward_mean": {"ppo": 271.16}, "custom_metrics": {"sparse_reward_mean": 188.0, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 166.32, "shaped_reward_min": 132, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.72, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.96, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.3, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.64, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.01, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.35, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.07, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.35, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.78, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.12, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.59, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.53, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.01, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.35, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.01, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.35, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 522.0, 513.0, 573.0, 579.0, 516.0, 525.0, 579.0, 567.0, 522.0, 570.0, 570.0, 530.0, 522.0, 573.0, 573.0, 530.0, 522.0, 519.0, 582.0, 525.0, 530.0, 525.0, 515.0, 567.0, 522.0, 522.0, 576.0, 519.0, 573.0, 573.0, 525.0, 576.0, 510.0, 522.0, 522.0, 576.0, 525.0, 576.0, 522.0, 573.0, 573.0, 582.0, 573.0, 527.0, 519.0, 579.0, 525.0, 533.0, 510.0, 516.0, 519.0, 573.0, 570.0, 522.0, 516.0, 495.0, 525.0, 515.0, 582.0, 576.0, 465.0, 573.0, 576.0, 516.0, 576.0, 490.0, 393.0, 513.0, 513.0, 576.0, 576.0, 561.0, 492.0, 567.0, 527.0, 573.0, 519.0, 576.0, 522.0, 567.0, 530.0, 567.0, 522.0, 579.0, 564.0, 525.0, 519.0, 573.0, 522.0, 573.0, 573.0, 579.0, 536.0, 573.0, 527.0, 522.0, 576.0, 521.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [255.0, 264.0, 251.0, 271.0, 261.0, 252.0, 299.0, 274.0, 288.0, 291.0, 267.0, 249.0, 262.0, 263.0, 282.0, 297.0, 275.0, 292.0, 255.0, 267.0, 287.0, 283.0, 290.0, 280.0, 263.0, 267.0, 253.0, 269.0, 286.0, 287.0, 279.0, 294.0, 263.0, 267.0, 263.0, 259.0, 259.0, 260.0, 288.0, 294.0, 261.0, 264.0, 260.0, 270.0, 260.0, 265.0, 262.0, 253.0, 279.0, 288.0, 250.0, 272.0, 267.0, 255.0, 290.0, 286.0, 271.0, 248.0, 277.0, 296.0, 293.0, 280.0, 252.0, 273.0, 276.0, 300.0, 261.0, 249.0, 252.0, 270.0, 255.0, 267.0, 290.0, 286.0, 256.0, 269.0, 302.0, 274.0, 253.0, 269.0, 288.0, 285.0, 279.0, 294.0, 271.0, 311.0, 296.0, 277.0, 257.0, 270.0, 266.0, 253.0, 286.0, 293.0, 269.0, 256.0, 263.0, 270.0, 237.0, 273.0, 254.0, 262.0, 267.0, 252.0, 285.0, 288.0, 283.0, 287.0, 265.0, 257.0, 254.0, 262.0, 235.0, 260.0, 267.0, 258.0, 259.0, 256.0, 297.0, 285.0, 285.0, 291.0, 231.0, 234.0, 270.0, 303.0, 290.0, 286.0, 258.0, 258.0, 279.0, 297.0, 235.0, 255.0, 188.0, 205.0, 271.0, 242.0, 239.0, 274.0, 267.0, 309.0, 298.0, 278.0, 274.0, 287.0, 244.0, 248.0, 278.0, 289.0, 257.0, 270.0, 288.0, 285.0, 263.0, 256.0, 290.0, 286.0, 255.0, 267.0, 276.0, 291.0, 271.0, 259.0, 286.0, 281.0, 264.0, 258.0, 287.0, 292.0, 284.0, 280.0, 254.0, 271.0, 240.0, 279.0, 291.0, 282.0, 269.0, 253.0, 287.0, 286.0, 289.0, 284.0, 292.0, 287.0, 263.0, 273.0, 275.0, 298.0, 265.0, 262.0, 264.0, 258.0, 287.0, 289.0, 249.0, 272.0, 269.0, 301.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6844500985302645, "mean_inference_ms": 1.2070711367201046, "mean_action_processing_ms": 0.13244079028015465, "mean_env_wait_ms": 0.8419031688694641, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 393.0, "episode_reward_mean": 542.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 188.0}, "policy_reward_max": {"ppo": 311.0}, "policy_reward_mean": {"ppo": 271.16}, "hist_stats": {"episode_reward": [519.0, 522.0, 513.0, 573.0, 579.0, 516.0, 525.0, 579.0, 567.0, 522.0, 570.0, 570.0, 530.0, 522.0, 573.0, 573.0, 530.0, 522.0, 519.0, 582.0, 525.0, 530.0, 525.0, 515.0, 567.0, 522.0, 522.0, 576.0, 519.0, 573.0, 573.0, 525.0, 576.0, 510.0, 522.0, 522.0, 576.0, 525.0, 576.0, 522.0, 573.0, 573.0, 582.0, 573.0, 527.0, 519.0, 579.0, 525.0, 533.0, 510.0, 516.0, 519.0, 573.0, 570.0, 522.0, 516.0, 495.0, 525.0, 515.0, 582.0, 576.0, 465.0, 573.0, 576.0, 516.0, 576.0, 490.0, 393.0, 513.0, 513.0, 576.0, 576.0, 561.0, 492.0, 567.0, 527.0, 573.0, 519.0, 576.0, 522.0, 567.0, 530.0, 567.0, 522.0, 579.0, 564.0, 525.0, 519.0, 573.0, 522.0, 573.0, 573.0, 579.0, 536.0, 573.0, 527.0, 522.0, 576.0, 521.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [255.0, 264.0, 251.0, 271.0, 261.0, 252.0, 299.0, 274.0, 288.0, 291.0, 267.0, 249.0, 262.0, 263.0, 282.0, 297.0, 275.0, 292.0, 255.0, 267.0, 287.0, 283.0, 290.0, 280.0, 263.0, 267.0, 253.0, 269.0, 286.0, 287.0, 279.0, 294.0, 263.0, 267.0, 263.0, 259.0, 259.0, 260.0, 288.0, 294.0, 261.0, 264.0, 260.0, 270.0, 260.0, 265.0, 262.0, 253.0, 279.0, 288.0, 250.0, 272.0, 267.0, 255.0, 290.0, 286.0, 271.0, 248.0, 277.0, 296.0, 293.0, 280.0, 252.0, 273.0, 276.0, 300.0, 261.0, 249.0, 252.0, 270.0, 255.0, 267.0, 290.0, 286.0, 256.0, 269.0, 302.0, 274.0, 253.0, 269.0, 288.0, 285.0, 279.0, 294.0, 271.0, 311.0, 296.0, 277.0, 257.0, 270.0, 266.0, 253.0, 286.0, 293.0, 269.0, 256.0, 263.0, 270.0, 237.0, 273.0, 254.0, 262.0, 267.0, 252.0, 285.0, 288.0, 283.0, 287.0, 265.0, 257.0, 254.0, 262.0, 235.0, 260.0, 267.0, 258.0, 259.0, 256.0, 297.0, 285.0, 285.0, 291.0, 231.0, 234.0, 270.0, 303.0, 290.0, 286.0, 258.0, 258.0, 279.0, 297.0, 235.0, 255.0, 188.0, 205.0, 271.0, 242.0, 239.0, 274.0, 267.0, 309.0, 298.0, 278.0, 274.0, 287.0, 244.0, 248.0, 278.0, 289.0, 257.0, 270.0, 288.0, 285.0, 263.0, 256.0, 290.0, 286.0, 255.0, 267.0, 276.0, 291.0, 271.0, 259.0, 286.0, 281.0, 264.0, 258.0, 287.0, 292.0, 284.0, 280.0, 254.0, 271.0, 240.0, 279.0, 291.0, 282.0, 269.0, 253.0, 287.0, 286.0, 289.0, 284.0, 292.0, 287.0, 263.0, 273.0, 275.0, 298.0, 265.0, 262.0, 264.0, 258.0, 287.0, 289.0, 249.0, 272.0, 269.0, 301.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6844500985302645, "mean_inference_ms": 1.2070711367201046, "mean_action_processing_ms": 0.13244079028015465, "mean_env_wait_ms": 0.8419031688694641, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3532800, "num_agent_steps_trained": 3532800, "num_env_steps_sampled": 1766400, "num_env_steps_trained": 1766400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1766400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3532800, "timers": {"training_iteration_time_ms": 3703.337, "learn_time_ms": 1139.988, "learn_throughput": 11228.189, "synch_weights_time_ms": 14.732}, "counters": {"num_env_steps_sampled": 1766400, "num_env_steps_trained": 1766400, "num_agent_steps_sampled": 3532800, "num_agent_steps_trained": 3532800}, "done": false, "episodes_total": 4416, "training_iteration": 138, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-55", "timestamp": 1666580935, "time_this_iter_s": 3.8024544715881348, "time_total_s": 518.9372880458832, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 518.9372880458832, "timesteps_since_restore": 0, "iterations_since_restore": 138, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.8, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 186.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 165.67, "shaped_reward_min": 132, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.58, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.0, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.18, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.65, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.86, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.36, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.16, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.62, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.15, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.68, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.61, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.86, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.36, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.86, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.36, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001902009709738195, "policy_loss": -0.002099734963849187, "vf_loss": 7.285922050476074, "vf_explained_var": 0.6993359327316284, "kl": 0.0017973913345485926, "entropy": 1.0617306232452393, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1779200, "num_env_steps_trained": 1779200, "num_agent_steps_sampled": 3558400, "num_agent_steps_trained": 3558400}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 379.0, "episode_reward_mean": 538.87, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 180.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 269.435}, "custom_metrics": {"sparse_reward_mean": 186.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 165.67, "shaped_reward_min": 132, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.58, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.0, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.18, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.65, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.86, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.36, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.16, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.62, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.15, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.68, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.61, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.86, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.36, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.86, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.36, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 510.0, 522.0, 522.0, 576.0, 525.0, 576.0, 522.0, 573.0, 573.0, 582.0, 573.0, 527.0, 519.0, 579.0, 525.0, 533.0, 510.0, 516.0, 519.0, 573.0, 570.0, 522.0, 516.0, 495.0, 525.0, 515.0, 582.0, 576.0, 465.0, 573.0, 576.0, 516.0, 576.0, 490.0, 393.0, 513.0, 513.0, 576.0, 576.0, 561.0, 492.0, 567.0, 527.0, 573.0, 519.0, 576.0, 522.0, 567.0, 530.0, 567.0, 522.0, 579.0, 564.0, 525.0, 519.0, 573.0, 522.0, 573.0, 573.0, 579.0, 536.0, 573.0, 527.0, 522.0, 576.0, 521.0, 570.0, 573.0, 479.0, 579.0, 518.0, 573.0, 573.0, 567.0, 522.0, 501.0, 576.0, 630.0, 525.0, 516.0, 519.0, 573.0, 527.0, 573.0, 510.0, 522.0, 473.0, 525.0, 487.0, 516.0, 570.0, 579.0, 525.0, 530.0, 379.0, 525.0, 522.0, 473.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 300.0, 261.0, 249.0, 252.0, 270.0, 255.0, 267.0, 290.0, 286.0, 256.0, 269.0, 302.0, 274.0, 253.0, 269.0, 288.0, 285.0, 279.0, 294.0, 271.0, 311.0, 296.0, 277.0, 257.0, 270.0, 266.0, 253.0, 286.0, 293.0, 269.0, 256.0, 263.0, 270.0, 237.0, 273.0, 254.0, 262.0, 267.0, 252.0, 285.0, 288.0, 283.0, 287.0, 265.0, 257.0, 254.0, 262.0, 235.0, 260.0, 267.0, 258.0, 259.0, 256.0, 297.0, 285.0, 285.0, 291.0, 231.0, 234.0, 270.0, 303.0, 290.0, 286.0, 258.0, 258.0, 279.0, 297.0, 235.0, 255.0, 188.0, 205.0, 271.0, 242.0, 239.0, 274.0, 267.0, 309.0, 298.0, 278.0, 274.0, 287.0, 244.0, 248.0, 278.0, 289.0, 257.0, 270.0, 288.0, 285.0, 263.0, 256.0, 290.0, 286.0, 255.0, 267.0, 276.0, 291.0, 271.0, 259.0, 286.0, 281.0, 264.0, 258.0, 287.0, 292.0, 284.0, 280.0, 254.0, 271.0, 240.0, 279.0, 291.0, 282.0, 269.0, 253.0, 287.0, 286.0, 289.0, 284.0, 292.0, 287.0, 263.0, 273.0, 275.0, 298.0, 265.0, 262.0, 264.0, 258.0, 287.0, 289.0, 249.0, 272.0, 269.0, 301.0, 294.0, 279.0, 240.0, 239.0, 295.0, 284.0, 262.0, 256.0, 281.0, 292.0, 269.0, 304.0, 281.0, 286.0, 273.0, 249.0, 249.0, 252.0, 287.0, 289.0, 316.0, 314.0, 268.0, 257.0, 260.0, 256.0, 257.0, 262.0, 272.0, 301.0, 273.0, 254.0, 285.0, 288.0, 254.0, 256.0, 258.0, 264.0, 231.0, 242.0, 271.0, 254.0, 238.0, 249.0, 268.0, 248.0, 295.0, 275.0, 293.0, 286.0, 261.0, 264.0, 265.0, 265.0, 180.0, 199.0, 267.0, 258.0, 257.0, 265.0, 247.0, 226.0, 301.0, 272.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6844477582222729, "mean_inference_ms": 1.2070333180594002, "mean_action_processing_ms": 0.1324468637600638, "mean_env_wait_ms": 0.8417296114074044, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 379.0, "episode_reward_mean": 538.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 180.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 269.435}, "hist_stats": {"episode_reward": [576.0, 510.0, 522.0, 522.0, 576.0, 525.0, 576.0, 522.0, 573.0, 573.0, 582.0, 573.0, 527.0, 519.0, 579.0, 525.0, 533.0, 510.0, 516.0, 519.0, 573.0, 570.0, 522.0, 516.0, 495.0, 525.0, 515.0, 582.0, 576.0, 465.0, 573.0, 576.0, 516.0, 576.0, 490.0, 393.0, 513.0, 513.0, 576.0, 576.0, 561.0, 492.0, 567.0, 527.0, 573.0, 519.0, 576.0, 522.0, 567.0, 530.0, 567.0, 522.0, 579.0, 564.0, 525.0, 519.0, 573.0, 522.0, 573.0, 573.0, 579.0, 536.0, 573.0, 527.0, 522.0, 576.0, 521.0, 570.0, 573.0, 479.0, 579.0, 518.0, 573.0, 573.0, 567.0, 522.0, 501.0, 576.0, 630.0, 525.0, 516.0, 519.0, 573.0, 527.0, 573.0, 510.0, 522.0, 473.0, 525.0, 487.0, 516.0, 570.0, 579.0, 525.0, 530.0, 379.0, 525.0, 522.0, 473.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 300.0, 261.0, 249.0, 252.0, 270.0, 255.0, 267.0, 290.0, 286.0, 256.0, 269.0, 302.0, 274.0, 253.0, 269.0, 288.0, 285.0, 279.0, 294.0, 271.0, 311.0, 296.0, 277.0, 257.0, 270.0, 266.0, 253.0, 286.0, 293.0, 269.0, 256.0, 263.0, 270.0, 237.0, 273.0, 254.0, 262.0, 267.0, 252.0, 285.0, 288.0, 283.0, 287.0, 265.0, 257.0, 254.0, 262.0, 235.0, 260.0, 267.0, 258.0, 259.0, 256.0, 297.0, 285.0, 285.0, 291.0, 231.0, 234.0, 270.0, 303.0, 290.0, 286.0, 258.0, 258.0, 279.0, 297.0, 235.0, 255.0, 188.0, 205.0, 271.0, 242.0, 239.0, 274.0, 267.0, 309.0, 298.0, 278.0, 274.0, 287.0, 244.0, 248.0, 278.0, 289.0, 257.0, 270.0, 288.0, 285.0, 263.0, 256.0, 290.0, 286.0, 255.0, 267.0, 276.0, 291.0, 271.0, 259.0, 286.0, 281.0, 264.0, 258.0, 287.0, 292.0, 284.0, 280.0, 254.0, 271.0, 240.0, 279.0, 291.0, 282.0, 269.0, 253.0, 287.0, 286.0, 289.0, 284.0, 292.0, 287.0, 263.0, 273.0, 275.0, 298.0, 265.0, 262.0, 264.0, 258.0, 287.0, 289.0, 249.0, 272.0, 269.0, 301.0, 294.0, 279.0, 240.0, 239.0, 295.0, 284.0, 262.0, 256.0, 281.0, 292.0, 269.0, 304.0, 281.0, 286.0, 273.0, 249.0, 249.0, 252.0, 287.0, 289.0, 316.0, 314.0, 268.0, 257.0, 260.0, 256.0, 257.0, 262.0, 272.0, 301.0, 273.0, 254.0, 285.0, 288.0, 254.0, 256.0, 258.0, 264.0, 231.0, 242.0, 271.0, 254.0, 238.0, 249.0, 268.0, 248.0, 295.0, 275.0, 293.0, 286.0, 261.0, 264.0, 265.0, 265.0, 180.0, 199.0, 267.0, 258.0, 257.0, 265.0, 247.0, 226.0, 301.0, 272.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6844477582222729, "mean_inference_ms": 1.2070333180594002, "mean_action_processing_ms": 0.1324468637600638, "mean_env_wait_ms": 0.8417296114074044, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3558400, "num_agent_steps_trained": 3558400, "num_env_steps_sampled": 1779200, "num_env_steps_trained": 1779200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1779200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3558400, "timers": {"training_iteration_time_ms": 3695.789, "learn_time_ms": 1128.481, "learn_throughput": 11342.685, "synch_weights_time_ms": 13.949}, "counters": {"num_env_steps_sampled": 1779200, "num_env_steps_trained": 1779200, "num_agent_steps_sampled": 3558400, "num_agent_steps_trained": 3558400}, "done": false, "episodes_total": 4448, "training_iteration": 139, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-59", "timestamp": 1666580939, "time_this_iter_s": 3.6463100910186768, "time_total_s": 522.5835981369019, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 522.5835981369019, "timesteps_since_restore": 0, "iterations_since_restore": 139, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.700000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 185.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 165.19, "shaped_reward_min": 111, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.11, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.24, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.84, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.88, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.43, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.66, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.28, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.65, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.39, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.85, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.43, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.66, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.43, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.66, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001023371354676783, "policy_loss": -0.0012154708383604884, "vf_loss": 7.2035813331604, "vf_explained_var": 0.7100783586502075, "kl": 0.0020363512448966503, "entropy": 1.0565154552459717, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1792000, "num_env_steps_trained": 1792000, "num_agent_steps_sampled": 3584000, "num_agent_steps_trained": 3584000}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 351.0, "episode_reward_mean": 536.39, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 268.195}, "custom_metrics": {"sparse_reward_mean": 185.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 165.19, "shaped_reward_min": 111, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.11, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.24, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.84, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.88, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.43, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.66, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.28, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.65, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.39, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.85, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.43, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.66, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.43, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.66, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [516.0, 576.0, 490.0, 393.0, 513.0, 513.0, 576.0, 576.0, 561.0, 492.0, 567.0, 527.0, 573.0, 519.0, 576.0, 522.0, 567.0, 530.0, 567.0, 522.0, 579.0, 564.0, 525.0, 519.0, 573.0, 522.0, 573.0, 573.0, 579.0, 536.0, 573.0, 527.0, 522.0, 576.0, 521.0, 570.0, 573.0, 479.0, 579.0, 518.0, 573.0, 573.0, 567.0, 522.0, 501.0, 576.0, 630.0, 525.0, 516.0, 519.0, 573.0, 527.0, 573.0, 510.0, 522.0, 473.0, 525.0, 487.0, 516.0, 570.0, 579.0, 525.0, 530.0, 379.0, 525.0, 522.0, 473.0, 573.0, 567.0, 533.0, 552.0, 570.0, 530.0, 351.0, 525.0, 516.0, 530.0, 519.0, 522.0, 573.0, 573.0, 516.0, 533.0, 525.0, 527.0, 576.0, 473.0, 579.0, 501.0, 573.0, 525.0, 522.0, 530.0, 573.0, 519.0, 522.0, 573.0, 576.0, 521.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [258.0, 258.0, 279.0, 297.0, 235.0, 255.0, 188.0, 205.0, 271.0, 242.0, 239.0, 274.0, 267.0, 309.0, 298.0, 278.0, 274.0, 287.0, 244.0, 248.0, 278.0, 289.0, 257.0, 270.0, 288.0, 285.0, 263.0, 256.0, 290.0, 286.0, 255.0, 267.0, 276.0, 291.0, 271.0, 259.0, 286.0, 281.0, 264.0, 258.0, 287.0, 292.0, 284.0, 280.0, 254.0, 271.0, 240.0, 279.0, 291.0, 282.0, 269.0, 253.0, 287.0, 286.0, 289.0, 284.0, 292.0, 287.0, 263.0, 273.0, 275.0, 298.0, 265.0, 262.0, 264.0, 258.0, 287.0, 289.0, 249.0, 272.0, 269.0, 301.0, 294.0, 279.0, 240.0, 239.0, 295.0, 284.0, 262.0, 256.0, 281.0, 292.0, 269.0, 304.0, 281.0, 286.0, 273.0, 249.0, 249.0, 252.0, 287.0, 289.0, 316.0, 314.0, 268.0, 257.0, 260.0, 256.0, 257.0, 262.0, 272.0, 301.0, 273.0, 254.0, 285.0, 288.0, 254.0, 256.0, 258.0, 264.0, 231.0, 242.0, 271.0, 254.0, 238.0, 249.0, 268.0, 248.0, 295.0, 275.0, 293.0, 286.0, 261.0, 264.0, 265.0, 265.0, 180.0, 199.0, 267.0, 258.0, 257.0, 265.0, 247.0, 226.0, 301.0, 272.0, 281.0, 286.0, 267.0, 266.0, 272.0, 280.0, 292.0, 278.0, 254.0, 276.0, 179.0, 172.0, 258.0, 267.0, 260.0, 256.0, 269.0, 261.0, 277.0, 242.0, 250.0, 272.0, 290.0, 283.0, 287.0, 286.0, 260.0, 256.0, 272.0, 261.0, 270.0, 255.0, 251.0, 276.0, 275.0, 301.0, 237.0, 236.0, 293.0, 286.0, 260.0, 241.0, 288.0, 285.0, 267.0, 258.0, 254.0, 268.0, 266.0, 264.0, 285.0, 288.0, 264.0, 255.0, 251.0, 271.0, 286.0, 287.0, 288.0, 288.0, 254.0, 267.0, 298.0, 275.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6844424072145202, "mean_inference_ms": 1.206964806118154, "mean_action_processing_ms": 0.13245111758107558, "mean_env_wait_ms": 0.8415324870317537, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 351.0, "episode_reward_mean": 536.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 268.195}, "hist_stats": {"episode_reward": [516.0, 576.0, 490.0, 393.0, 513.0, 513.0, 576.0, 576.0, 561.0, 492.0, 567.0, 527.0, 573.0, 519.0, 576.0, 522.0, 567.0, 530.0, 567.0, 522.0, 579.0, 564.0, 525.0, 519.0, 573.0, 522.0, 573.0, 573.0, 579.0, 536.0, 573.0, 527.0, 522.0, 576.0, 521.0, 570.0, 573.0, 479.0, 579.0, 518.0, 573.0, 573.0, 567.0, 522.0, 501.0, 576.0, 630.0, 525.0, 516.0, 519.0, 573.0, 527.0, 573.0, 510.0, 522.0, 473.0, 525.0, 487.0, 516.0, 570.0, 579.0, 525.0, 530.0, 379.0, 525.0, 522.0, 473.0, 573.0, 567.0, 533.0, 552.0, 570.0, 530.0, 351.0, 525.0, 516.0, 530.0, 519.0, 522.0, 573.0, 573.0, 516.0, 533.0, 525.0, 527.0, 576.0, 473.0, 579.0, 501.0, 573.0, 525.0, 522.0, 530.0, 573.0, 519.0, 522.0, 573.0, 576.0, 521.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [258.0, 258.0, 279.0, 297.0, 235.0, 255.0, 188.0, 205.0, 271.0, 242.0, 239.0, 274.0, 267.0, 309.0, 298.0, 278.0, 274.0, 287.0, 244.0, 248.0, 278.0, 289.0, 257.0, 270.0, 288.0, 285.0, 263.0, 256.0, 290.0, 286.0, 255.0, 267.0, 276.0, 291.0, 271.0, 259.0, 286.0, 281.0, 264.0, 258.0, 287.0, 292.0, 284.0, 280.0, 254.0, 271.0, 240.0, 279.0, 291.0, 282.0, 269.0, 253.0, 287.0, 286.0, 289.0, 284.0, 292.0, 287.0, 263.0, 273.0, 275.0, 298.0, 265.0, 262.0, 264.0, 258.0, 287.0, 289.0, 249.0, 272.0, 269.0, 301.0, 294.0, 279.0, 240.0, 239.0, 295.0, 284.0, 262.0, 256.0, 281.0, 292.0, 269.0, 304.0, 281.0, 286.0, 273.0, 249.0, 249.0, 252.0, 287.0, 289.0, 316.0, 314.0, 268.0, 257.0, 260.0, 256.0, 257.0, 262.0, 272.0, 301.0, 273.0, 254.0, 285.0, 288.0, 254.0, 256.0, 258.0, 264.0, 231.0, 242.0, 271.0, 254.0, 238.0, 249.0, 268.0, 248.0, 295.0, 275.0, 293.0, 286.0, 261.0, 264.0, 265.0, 265.0, 180.0, 199.0, 267.0, 258.0, 257.0, 265.0, 247.0, 226.0, 301.0, 272.0, 281.0, 286.0, 267.0, 266.0, 272.0, 280.0, 292.0, 278.0, 254.0, 276.0, 179.0, 172.0, 258.0, 267.0, 260.0, 256.0, 269.0, 261.0, 277.0, 242.0, 250.0, 272.0, 290.0, 283.0, 287.0, 286.0, 260.0, 256.0, 272.0, 261.0, 270.0, 255.0, 251.0, 276.0, 275.0, 301.0, 237.0, 236.0, 293.0, 286.0, 260.0, 241.0, 288.0, 285.0, 267.0, 258.0, 254.0, 268.0, 266.0, 264.0, 285.0, 288.0, 264.0, 255.0, 251.0, 271.0, 286.0, 287.0, 288.0, 288.0, 254.0, 267.0, 298.0, 275.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6844424072145202, "mean_inference_ms": 1.206964806118154, "mean_action_processing_ms": 0.13245111758107558, "mean_env_wait_ms": 0.8415324870317537, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3584000, "num_agent_steps_trained": 3584000, "num_env_steps_sampled": 1792000, "num_env_steps_trained": 1792000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1792000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3584000, "timers": {"training_iteration_time_ms": 3686.752, "learn_time_ms": 1117.713, "learn_throughput": 11451.954, "synch_weights_time_ms": 13.08}, "counters": {"num_env_steps_sampled": 1792000, "num_env_steps_trained": 1792000, "num_agent_steps_sampled": 3584000, "num_agent_steps_trained": 3584000}, "done": false, "episodes_total": 4480, "training_iteration": 140, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-03", "timestamp": 1666580943, "time_this_iter_s": 3.671760320663452, "time_total_s": 526.2553584575653, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 526.2553584575653, "timesteps_since_restore": 0, "iterations_since_restore": 140, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.9, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 185.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 165.05, "shaped_reward_min": 111, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.03, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.32, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.72, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.99, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.45, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.64, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.7, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.31, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.86, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.45, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.64, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.45, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.64, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001100448309443891, "policy_loss": -0.0013004107167944312, "vf_loss": 7.32533597946167, "vf_explained_var": 0.6947555541992188, "kl": 0.0018763296538963914, "entropy": 1.065138578414917, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1804800, "num_env_steps_trained": 1804800, "num_agent_steps_sampled": 3609600, "num_agent_steps_trained": 3609600}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 351.0, "episode_reward_mean": 536.65, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 268.325}, "custom_metrics": {"sparse_reward_mean": 185.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 165.05, "shaped_reward_min": 111, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.03, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.32, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.72, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.99, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.45, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.64, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.7, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.31, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.86, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.45, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.64, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.45, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.64, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 576.0, 521.0, 570.0, 573.0, 479.0, 579.0, 518.0, 573.0, 573.0, 567.0, 522.0, 501.0, 576.0, 630.0, 525.0, 516.0, 519.0, 573.0, 527.0, 573.0, 510.0, 522.0, 473.0, 525.0, 487.0, 516.0, 570.0, 579.0, 525.0, 530.0, 379.0, 525.0, 522.0, 473.0, 573.0, 567.0, 533.0, 552.0, 570.0, 530.0, 351.0, 525.0, 516.0, 530.0, 519.0, 522.0, 573.0, 573.0, 516.0, 533.0, 525.0, 527.0, 576.0, 473.0, 579.0, 501.0, 573.0, 525.0, 522.0, 530.0, 573.0, 519.0, 522.0, 573.0, 576.0, 521.0, 573.0, 570.0, 504.0, 522.0, 522.0, 564.0, 525.0, 539.0, 582.0, 525.0, 570.0, 579.0, 519.0, 530.0, 513.0, 573.0, 530.0, 576.0, 573.0, 530.0, 507.0, 533.0, 507.0, 516.0, 510.0, 516.0, 515.0, 530.0, 582.0, 576.0, 570.0, 567.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 258.0, 287.0, 289.0, 249.0, 272.0, 269.0, 301.0, 294.0, 279.0, 240.0, 239.0, 295.0, 284.0, 262.0, 256.0, 281.0, 292.0, 269.0, 304.0, 281.0, 286.0, 273.0, 249.0, 249.0, 252.0, 287.0, 289.0, 316.0, 314.0, 268.0, 257.0, 260.0, 256.0, 257.0, 262.0, 272.0, 301.0, 273.0, 254.0, 285.0, 288.0, 254.0, 256.0, 258.0, 264.0, 231.0, 242.0, 271.0, 254.0, 238.0, 249.0, 268.0, 248.0, 295.0, 275.0, 293.0, 286.0, 261.0, 264.0, 265.0, 265.0, 180.0, 199.0, 267.0, 258.0, 257.0, 265.0, 247.0, 226.0, 301.0, 272.0, 281.0, 286.0, 267.0, 266.0, 272.0, 280.0, 292.0, 278.0, 254.0, 276.0, 179.0, 172.0, 258.0, 267.0, 260.0, 256.0, 269.0, 261.0, 277.0, 242.0, 250.0, 272.0, 290.0, 283.0, 287.0, 286.0, 260.0, 256.0, 272.0, 261.0, 270.0, 255.0, 251.0, 276.0, 275.0, 301.0, 237.0, 236.0, 293.0, 286.0, 260.0, 241.0, 288.0, 285.0, 267.0, 258.0, 254.0, 268.0, 266.0, 264.0, 285.0, 288.0, 264.0, 255.0, 251.0, 271.0, 286.0, 287.0, 288.0, 288.0, 254.0, 267.0, 298.0, 275.0, 285.0, 285.0, 242.0, 262.0, 259.0, 263.0, 260.0, 262.0, 279.0, 285.0, 262.0, 263.0, 278.0, 261.0, 290.0, 292.0, 259.0, 266.0, 295.0, 275.0, 278.0, 301.0, 255.0, 264.0, 265.0, 265.0, 250.0, 263.0, 299.0, 274.0, 277.0, 253.0, 274.0, 302.0, 284.0, 289.0, 257.0, 273.0, 247.0, 260.0, 270.0, 263.0, 258.0, 249.0, 263.0, 253.0, 235.0, 275.0, 256.0, 260.0, 251.0, 264.0, 270.0, 260.0, 287.0, 295.0, 293.0, 283.0, 287.0, 283.0, 279.0, 288.0, 284.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684355643486077, "mean_inference_ms": 1.2068429403105816, "mean_action_processing_ms": 0.13244201724234905, "mean_env_wait_ms": 0.8412518122991941, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 351.0, "episode_reward_mean": 536.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 268.325}, "hist_stats": {"episode_reward": [522.0, 576.0, 521.0, 570.0, 573.0, 479.0, 579.0, 518.0, 573.0, 573.0, 567.0, 522.0, 501.0, 576.0, 630.0, 525.0, 516.0, 519.0, 573.0, 527.0, 573.0, 510.0, 522.0, 473.0, 525.0, 487.0, 516.0, 570.0, 579.0, 525.0, 530.0, 379.0, 525.0, 522.0, 473.0, 573.0, 567.0, 533.0, 552.0, 570.0, 530.0, 351.0, 525.0, 516.0, 530.0, 519.0, 522.0, 573.0, 573.0, 516.0, 533.0, 525.0, 527.0, 576.0, 473.0, 579.0, 501.0, 573.0, 525.0, 522.0, 530.0, 573.0, 519.0, 522.0, 573.0, 576.0, 521.0, 573.0, 570.0, 504.0, 522.0, 522.0, 564.0, 525.0, 539.0, 582.0, 525.0, 570.0, 579.0, 519.0, 530.0, 513.0, 573.0, 530.0, 576.0, 573.0, 530.0, 507.0, 533.0, 507.0, 516.0, 510.0, 516.0, 515.0, 530.0, 582.0, 576.0, 570.0, 567.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 258.0, 287.0, 289.0, 249.0, 272.0, 269.0, 301.0, 294.0, 279.0, 240.0, 239.0, 295.0, 284.0, 262.0, 256.0, 281.0, 292.0, 269.0, 304.0, 281.0, 286.0, 273.0, 249.0, 249.0, 252.0, 287.0, 289.0, 316.0, 314.0, 268.0, 257.0, 260.0, 256.0, 257.0, 262.0, 272.0, 301.0, 273.0, 254.0, 285.0, 288.0, 254.0, 256.0, 258.0, 264.0, 231.0, 242.0, 271.0, 254.0, 238.0, 249.0, 268.0, 248.0, 295.0, 275.0, 293.0, 286.0, 261.0, 264.0, 265.0, 265.0, 180.0, 199.0, 267.0, 258.0, 257.0, 265.0, 247.0, 226.0, 301.0, 272.0, 281.0, 286.0, 267.0, 266.0, 272.0, 280.0, 292.0, 278.0, 254.0, 276.0, 179.0, 172.0, 258.0, 267.0, 260.0, 256.0, 269.0, 261.0, 277.0, 242.0, 250.0, 272.0, 290.0, 283.0, 287.0, 286.0, 260.0, 256.0, 272.0, 261.0, 270.0, 255.0, 251.0, 276.0, 275.0, 301.0, 237.0, 236.0, 293.0, 286.0, 260.0, 241.0, 288.0, 285.0, 267.0, 258.0, 254.0, 268.0, 266.0, 264.0, 285.0, 288.0, 264.0, 255.0, 251.0, 271.0, 286.0, 287.0, 288.0, 288.0, 254.0, 267.0, 298.0, 275.0, 285.0, 285.0, 242.0, 262.0, 259.0, 263.0, 260.0, 262.0, 279.0, 285.0, 262.0, 263.0, 278.0, 261.0, 290.0, 292.0, 259.0, 266.0, 295.0, 275.0, 278.0, 301.0, 255.0, 264.0, 265.0, 265.0, 250.0, 263.0, 299.0, 274.0, 277.0, 253.0, 274.0, 302.0, 284.0, 289.0, 257.0, 273.0, 247.0, 260.0, 270.0, 263.0, 258.0, 249.0, 263.0, 253.0, 235.0, 275.0, 256.0, 260.0, 251.0, 264.0, 270.0, 260.0, 287.0, 295.0, 293.0, 283.0, 287.0, 283.0, 279.0, 288.0, 284.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684355643486077, "mean_inference_ms": 1.2068429403105816, "mean_action_processing_ms": 0.13244201724234905, "mean_env_wait_ms": 0.8412518122991941, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3609600, "num_agent_steps_trained": 3609600, "num_env_steps_sampled": 1804800, "num_env_steps_trained": 1804800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1804800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3609600, "timers": {"training_iteration_time_ms": 3659.837, "learn_time_ms": 1131.102, "learn_throughput": 11316.395, "synch_weights_time_ms": 12.953}, "counters": {"num_env_steps_sampled": 1804800, "num_env_steps_trained": 1804800, "num_agent_steps_sampled": 3609600, "num_agent_steps_trained": 3609600}, "done": false, "episodes_total": 4512, "training_iteration": 141, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-07", "timestamp": 1666580947, "time_this_iter_s": 3.75894832611084, "time_total_s": 530.0143067836761, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 530.0143067836761, "timesteps_since_restore": 0, "iterations_since_restore": 141, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.1, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 184.8, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 164.18, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.23, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.07, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.9, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.81, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.61, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.42, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.39, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.51, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.37, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.87, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.61, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.42, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.61, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.42, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 4.625937435775995e-05, "policy_loss": -0.00017143398872576654, "vf_loss": 7.427959442138672, "vf_explained_var": 0.6894431114196777, "kl": 0.0025915263686329126, "entropy": 1.0502040386199951, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1817600, "num_env_steps_trained": 1817600, "num_agent_steps_sampled": 3635200, "num_agent_steps_trained": 3635200}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 533.78, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 266.89}, "custom_metrics": {"sparse_reward_mean": 184.8, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 164.18, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.23, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.07, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.9, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.81, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.61, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.42, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.39, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.51, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.37, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.87, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.61, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.42, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.61, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.42, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 522.0, 473.0, 573.0, 567.0, 533.0, 552.0, 570.0, 530.0, 351.0, 525.0, 516.0, 530.0, 519.0, 522.0, 573.0, 573.0, 516.0, 533.0, 525.0, 527.0, 576.0, 473.0, 579.0, 501.0, 573.0, 525.0, 522.0, 530.0, 573.0, 519.0, 522.0, 573.0, 576.0, 521.0, 573.0, 570.0, 504.0, 522.0, 522.0, 564.0, 525.0, 539.0, 582.0, 525.0, 570.0, 579.0, 519.0, 530.0, 513.0, 573.0, 530.0, 576.0, 573.0, 530.0, 507.0, 533.0, 507.0, 516.0, 510.0, 516.0, 515.0, 530.0, 582.0, 576.0, 570.0, 567.0, 570.0, 513.0, 573.0, 527.0, 519.0, 579.0, 525.0, 533.0, 525.0, 530.0, 533.0, 524.0, 516.0, 519.0, 573.0, 519.0, 522.0, 570.0, 530.0, 525.0, 527.0, 237.0, 507.0, 576.0, 570.0, 576.0, 570.0, 522.0, 530.0, 522.0, 473.0, 573.0, 504.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 258.0, 257.0, 265.0, 247.0, 226.0, 301.0, 272.0, 281.0, 286.0, 267.0, 266.0, 272.0, 280.0, 292.0, 278.0, 254.0, 276.0, 179.0, 172.0, 258.0, 267.0, 260.0, 256.0, 269.0, 261.0, 277.0, 242.0, 250.0, 272.0, 290.0, 283.0, 287.0, 286.0, 260.0, 256.0, 272.0, 261.0, 270.0, 255.0, 251.0, 276.0, 275.0, 301.0, 237.0, 236.0, 293.0, 286.0, 260.0, 241.0, 288.0, 285.0, 267.0, 258.0, 254.0, 268.0, 266.0, 264.0, 285.0, 288.0, 264.0, 255.0, 251.0, 271.0, 286.0, 287.0, 288.0, 288.0, 254.0, 267.0, 298.0, 275.0, 285.0, 285.0, 242.0, 262.0, 259.0, 263.0, 260.0, 262.0, 279.0, 285.0, 262.0, 263.0, 278.0, 261.0, 290.0, 292.0, 259.0, 266.0, 295.0, 275.0, 278.0, 301.0, 255.0, 264.0, 265.0, 265.0, 250.0, 263.0, 299.0, 274.0, 277.0, 253.0, 274.0, 302.0, 284.0, 289.0, 257.0, 273.0, 247.0, 260.0, 270.0, 263.0, 258.0, 249.0, 263.0, 253.0, 235.0, 275.0, 256.0, 260.0, 251.0, 264.0, 270.0, 260.0, 287.0, 295.0, 293.0, 283.0, 287.0, 283.0, 279.0, 288.0, 284.0, 286.0, 250.0, 263.0, 286.0, 287.0, 267.0, 260.0, 257.0, 262.0, 292.0, 287.0, 260.0, 265.0, 257.0, 276.0, 265.0, 260.0, 260.0, 270.0, 268.0, 265.0, 271.0, 253.0, 266.0, 250.0, 252.0, 267.0, 282.0, 291.0, 260.0, 259.0, 261.0, 261.0, 290.0, 280.0, 268.0, 262.0, 256.0, 269.0, 257.0, 270.0, 123.0, 114.0, 258.0, 249.0, 301.0, 275.0, 293.0, 277.0, 287.0, 289.0, 275.0, 295.0, 254.0, 268.0, 249.0, 281.0, 260.0, 262.0, 239.0, 234.0, 292.0, 281.0, 240.0, 264.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6842755530238879, "mean_inference_ms": 1.2067346536692405, "mean_action_processing_ms": 0.13243207984798036, "mean_env_wait_ms": 0.8409767392353698, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 533.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 266.89}, "hist_stats": {"episode_reward": [525.0, 522.0, 473.0, 573.0, 567.0, 533.0, 552.0, 570.0, 530.0, 351.0, 525.0, 516.0, 530.0, 519.0, 522.0, 573.0, 573.0, 516.0, 533.0, 525.0, 527.0, 576.0, 473.0, 579.0, 501.0, 573.0, 525.0, 522.0, 530.0, 573.0, 519.0, 522.0, 573.0, 576.0, 521.0, 573.0, 570.0, 504.0, 522.0, 522.0, 564.0, 525.0, 539.0, 582.0, 525.0, 570.0, 579.0, 519.0, 530.0, 513.0, 573.0, 530.0, 576.0, 573.0, 530.0, 507.0, 533.0, 507.0, 516.0, 510.0, 516.0, 515.0, 530.0, 582.0, 576.0, 570.0, 567.0, 570.0, 513.0, 573.0, 527.0, 519.0, 579.0, 525.0, 533.0, 525.0, 530.0, 533.0, 524.0, 516.0, 519.0, 573.0, 519.0, 522.0, 570.0, 530.0, 525.0, 527.0, 237.0, 507.0, 576.0, 570.0, 576.0, 570.0, 522.0, 530.0, 522.0, 473.0, 573.0, 504.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 258.0, 257.0, 265.0, 247.0, 226.0, 301.0, 272.0, 281.0, 286.0, 267.0, 266.0, 272.0, 280.0, 292.0, 278.0, 254.0, 276.0, 179.0, 172.0, 258.0, 267.0, 260.0, 256.0, 269.0, 261.0, 277.0, 242.0, 250.0, 272.0, 290.0, 283.0, 287.0, 286.0, 260.0, 256.0, 272.0, 261.0, 270.0, 255.0, 251.0, 276.0, 275.0, 301.0, 237.0, 236.0, 293.0, 286.0, 260.0, 241.0, 288.0, 285.0, 267.0, 258.0, 254.0, 268.0, 266.0, 264.0, 285.0, 288.0, 264.0, 255.0, 251.0, 271.0, 286.0, 287.0, 288.0, 288.0, 254.0, 267.0, 298.0, 275.0, 285.0, 285.0, 242.0, 262.0, 259.0, 263.0, 260.0, 262.0, 279.0, 285.0, 262.0, 263.0, 278.0, 261.0, 290.0, 292.0, 259.0, 266.0, 295.0, 275.0, 278.0, 301.0, 255.0, 264.0, 265.0, 265.0, 250.0, 263.0, 299.0, 274.0, 277.0, 253.0, 274.0, 302.0, 284.0, 289.0, 257.0, 273.0, 247.0, 260.0, 270.0, 263.0, 258.0, 249.0, 263.0, 253.0, 235.0, 275.0, 256.0, 260.0, 251.0, 264.0, 270.0, 260.0, 287.0, 295.0, 293.0, 283.0, 287.0, 283.0, 279.0, 288.0, 284.0, 286.0, 250.0, 263.0, 286.0, 287.0, 267.0, 260.0, 257.0, 262.0, 292.0, 287.0, 260.0, 265.0, 257.0, 276.0, 265.0, 260.0, 260.0, 270.0, 268.0, 265.0, 271.0, 253.0, 266.0, 250.0, 252.0, 267.0, 282.0, 291.0, 260.0, 259.0, 261.0, 261.0, 290.0, 280.0, 268.0, 262.0, 256.0, 269.0, 257.0, 270.0, 123.0, 114.0, 258.0, 249.0, 301.0, 275.0, 293.0, 277.0, 287.0, 289.0, 275.0, 295.0, 254.0, 268.0, 249.0, 281.0, 260.0, 262.0, 239.0, 234.0, 292.0, 281.0, 240.0, 264.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6842755530238879, "mean_inference_ms": 1.2067346536692405, "mean_action_processing_ms": 0.13243207984798036, "mean_env_wait_ms": 0.8409767392353698, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3635200, "num_agent_steps_trained": 3635200, "num_env_steps_sampled": 1817600, "num_env_steps_trained": 1817600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1817600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3635200, "timers": {"training_iteration_time_ms": 3648.912, "learn_time_ms": 1135.149, "learn_throughput": 11276.048, "synch_weights_time_ms": 13.551}, "counters": {"num_env_steps_sampled": 1817600, "num_env_steps_trained": 1817600, "num_agent_steps_sampled": 3635200, "num_agent_steps_trained": 3635200}, "done": false, "episodes_total": 4544, "training_iteration": 142, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-11", "timestamp": 1666580951, "time_this_iter_s": 3.7470552921295166, "time_total_s": 533.7613620758057, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 533.7613620758057, "timesteps_since_restore": 0, "iterations_since_restore": 142, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.849999999999998, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 185.0, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 164.2, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.44, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.97, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.97, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.67, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.73, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.28, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.28, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.42, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.47, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.75, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.66, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.73, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.28, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.73, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.28, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0044081672094762325, "policy_loss": -0.004621940199285746, "vf_loss": 7.469595432281494, "vf_explained_var": 0.6961889266967773, "kl": 0.001743752509355545, "entropy": 1.0663713216781616, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1830400, "num_env_steps_trained": 1830400, "num_agent_steps_sampled": 3660800, "num_agent_steps_trained": 3660800}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 534.2, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 267.1}, "custom_metrics": {"sparse_reward_mean": 185.0, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 164.2, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.44, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.97, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.97, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.67, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.73, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.28, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.28, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.42, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.47, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.75, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.66, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.73, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.28, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.73, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.28, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 576.0, 521.0, 573.0, 570.0, 504.0, 522.0, 522.0, 564.0, 525.0, 539.0, 582.0, 525.0, 570.0, 579.0, 519.0, 530.0, 513.0, 573.0, 530.0, 576.0, 573.0, 530.0, 507.0, 533.0, 507.0, 516.0, 510.0, 516.0, 515.0, 530.0, 582.0, 576.0, 570.0, 567.0, 570.0, 513.0, 573.0, 527.0, 519.0, 579.0, 525.0, 533.0, 525.0, 530.0, 533.0, 524.0, 516.0, 519.0, 573.0, 519.0, 522.0, 570.0, 530.0, 525.0, 527.0, 237.0, 507.0, 576.0, 570.0, 576.0, 570.0, 522.0, 530.0, 522.0, 473.0, 573.0, 504.0, 530.0, 576.0, 527.0, 513.0, 476.0, 573.0, 576.0, 522.0, 519.0, 530.0, 422.0, 525.0, 573.0, 527.0, 522.0, 522.0, 516.0, 576.0, 501.0, 530.0, 561.0, 522.0, 527.0, 573.0, 519.0, 525.0, 519.0, 570.0, 519.0, 516.0, 570.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 287.0, 288.0, 288.0, 254.0, 267.0, 298.0, 275.0, 285.0, 285.0, 242.0, 262.0, 259.0, 263.0, 260.0, 262.0, 279.0, 285.0, 262.0, 263.0, 278.0, 261.0, 290.0, 292.0, 259.0, 266.0, 295.0, 275.0, 278.0, 301.0, 255.0, 264.0, 265.0, 265.0, 250.0, 263.0, 299.0, 274.0, 277.0, 253.0, 274.0, 302.0, 284.0, 289.0, 257.0, 273.0, 247.0, 260.0, 270.0, 263.0, 258.0, 249.0, 263.0, 253.0, 235.0, 275.0, 256.0, 260.0, 251.0, 264.0, 270.0, 260.0, 287.0, 295.0, 293.0, 283.0, 287.0, 283.0, 279.0, 288.0, 284.0, 286.0, 250.0, 263.0, 286.0, 287.0, 267.0, 260.0, 257.0, 262.0, 292.0, 287.0, 260.0, 265.0, 257.0, 276.0, 265.0, 260.0, 260.0, 270.0, 268.0, 265.0, 271.0, 253.0, 266.0, 250.0, 252.0, 267.0, 282.0, 291.0, 260.0, 259.0, 261.0, 261.0, 290.0, 280.0, 268.0, 262.0, 256.0, 269.0, 257.0, 270.0, 123.0, 114.0, 258.0, 249.0, 301.0, 275.0, 293.0, 277.0, 287.0, 289.0, 275.0, 295.0, 254.0, 268.0, 249.0, 281.0, 260.0, 262.0, 239.0, 234.0, 292.0, 281.0, 240.0, 264.0, 265.0, 265.0, 297.0, 279.0, 261.0, 266.0, 244.0, 269.0, 241.0, 235.0, 273.0, 300.0, 285.0, 291.0, 263.0, 259.0, 261.0, 258.0, 257.0, 273.0, 206.0, 216.0, 261.0, 264.0, 298.0, 275.0, 264.0, 263.0, 266.0, 256.0, 267.0, 255.0, 257.0, 259.0, 290.0, 286.0, 250.0, 251.0, 271.0, 259.0, 292.0, 269.0, 267.0, 255.0, 254.0, 273.0, 283.0, 290.0, 267.0, 252.0, 250.0, 275.0, 251.0, 268.0, 286.0, 284.0, 247.0, 272.0, 250.0, 266.0, 295.0, 275.0, 251.0, 262.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684214198815861, "mean_inference_ms": 1.2066361573929743, "mean_action_processing_ms": 0.13242315818689165, "mean_env_wait_ms": 0.8407223869871694, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 534.2, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 267.1}, "hist_stats": {"episode_reward": [573.0, 576.0, 521.0, 573.0, 570.0, 504.0, 522.0, 522.0, 564.0, 525.0, 539.0, 582.0, 525.0, 570.0, 579.0, 519.0, 530.0, 513.0, 573.0, 530.0, 576.0, 573.0, 530.0, 507.0, 533.0, 507.0, 516.0, 510.0, 516.0, 515.0, 530.0, 582.0, 576.0, 570.0, 567.0, 570.0, 513.0, 573.0, 527.0, 519.0, 579.0, 525.0, 533.0, 525.0, 530.0, 533.0, 524.0, 516.0, 519.0, 573.0, 519.0, 522.0, 570.0, 530.0, 525.0, 527.0, 237.0, 507.0, 576.0, 570.0, 576.0, 570.0, 522.0, 530.0, 522.0, 473.0, 573.0, 504.0, 530.0, 576.0, 527.0, 513.0, 476.0, 573.0, 576.0, 522.0, 519.0, 530.0, 422.0, 525.0, 573.0, 527.0, 522.0, 522.0, 516.0, 576.0, 501.0, 530.0, 561.0, 522.0, 527.0, 573.0, 519.0, 525.0, 519.0, 570.0, 519.0, 516.0, 570.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 287.0, 288.0, 288.0, 254.0, 267.0, 298.0, 275.0, 285.0, 285.0, 242.0, 262.0, 259.0, 263.0, 260.0, 262.0, 279.0, 285.0, 262.0, 263.0, 278.0, 261.0, 290.0, 292.0, 259.0, 266.0, 295.0, 275.0, 278.0, 301.0, 255.0, 264.0, 265.0, 265.0, 250.0, 263.0, 299.0, 274.0, 277.0, 253.0, 274.0, 302.0, 284.0, 289.0, 257.0, 273.0, 247.0, 260.0, 270.0, 263.0, 258.0, 249.0, 263.0, 253.0, 235.0, 275.0, 256.0, 260.0, 251.0, 264.0, 270.0, 260.0, 287.0, 295.0, 293.0, 283.0, 287.0, 283.0, 279.0, 288.0, 284.0, 286.0, 250.0, 263.0, 286.0, 287.0, 267.0, 260.0, 257.0, 262.0, 292.0, 287.0, 260.0, 265.0, 257.0, 276.0, 265.0, 260.0, 260.0, 270.0, 268.0, 265.0, 271.0, 253.0, 266.0, 250.0, 252.0, 267.0, 282.0, 291.0, 260.0, 259.0, 261.0, 261.0, 290.0, 280.0, 268.0, 262.0, 256.0, 269.0, 257.0, 270.0, 123.0, 114.0, 258.0, 249.0, 301.0, 275.0, 293.0, 277.0, 287.0, 289.0, 275.0, 295.0, 254.0, 268.0, 249.0, 281.0, 260.0, 262.0, 239.0, 234.0, 292.0, 281.0, 240.0, 264.0, 265.0, 265.0, 297.0, 279.0, 261.0, 266.0, 244.0, 269.0, 241.0, 235.0, 273.0, 300.0, 285.0, 291.0, 263.0, 259.0, 261.0, 258.0, 257.0, 273.0, 206.0, 216.0, 261.0, 264.0, 298.0, 275.0, 264.0, 263.0, 266.0, 256.0, 267.0, 255.0, 257.0, 259.0, 290.0, 286.0, 250.0, 251.0, 271.0, 259.0, 292.0, 269.0, 267.0, 255.0, 254.0, 273.0, 283.0, 290.0, 267.0, 252.0, 250.0, 275.0, 251.0, 268.0, 286.0, 284.0, 247.0, 272.0, 250.0, 266.0, 295.0, 275.0, 251.0, 262.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684214198815861, "mean_inference_ms": 1.2066361573929743, "mean_action_processing_ms": 0.13242315818689165, "mean_env_wait_ms": 0.8407223869871694, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3660800, "num_agent_steps_trained": 3660800, "num_env_steps_sampled": 1830400, "num_env_steps_trained": 1830400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1830400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3660800, "timers": {"training_iteration_time_ms": 3652.047, "learn_time_ms": 1134.207, "learn_throughput": 11285.42, "synch_weights_time_ms": 13.158}, "counters": {"num_env_steps_sampled": 1830400, "num_env_steps_trained": 1830400, "num_agent_steps_sampled": 3660800, "num_agent_steps_trained": 3660800}, "done": false, "episodes_total": 4576, "training_iteration": 143, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-15", "timestamp": 1666580955, "time_this_iter_s": 3.773031711578369, "time_total_s": 537.534393787384, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 537.534393787384, "timesteps_since_restore": 0, "iterations_since_restore": 143, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.9, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 184.6, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 163.5, "shaped_reward_min": 77, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.84, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.01, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.56, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.75, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.13, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.31, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.49, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.38, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.24, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.8, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.74, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.75, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.13, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.75, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.13, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001283028395846486, "policy_loss": -0.001493385061621666, "vf_loss": 7.433184623718262, "vf_explained_var": 0.6879395246505737, "kl": 0.0020021807868033648, "entropy": 1.0659205913543701, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1843200, "num_env_steps_trained": 1843200, "num_agent_steps_sampled": 3686400, "num_agent_steps_trained": 3686400}, "sampler_results": {"episode_reward_max": 579.0, "episode_reward_min": 237.0, "episode_reward_mean": 532.7, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 301.0}, "policy_reward_mean": {"ppo": 266.35}, "custom_metrics": {"sparse_reward_mean": 184.6, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 163.5, "shaped_reward_min": 77, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.84, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.01, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.56, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.75, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.13, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.31, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.49, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.38, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.24, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.8, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.74, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.75, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.13, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.75, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.13, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 570.0, 567.0, 570.0, 513.0, 573.0, 527.0, 519.0, 579.0, 525.0, 533.0, 525.0, 530.0, 533.0, 524.0, 516.0, 519.0, 573.0, 519.0, 522.0, 570.0, 530.0, 525.0, 527.0, 237.0, 507.0, 576.0, 570.0, 576.0, 570.0, 522.0, 530.0, 522.0, 473.0, 573.0, 504.0, 530.0, 576.0, 527.0, 513.0, 476.0, 573.0, 576.0, 522.0, 519.0, 530.0, 422.0, 525.0, 573.0, 527.0, 522.0, 522.0, 516.0, 576.0, 501.0, 530.0, 561.0, 522.0, 527.0, 573.0, 519.0, 525.0, 519.0, 570.0, 519.0, 516.0, 570.0, 513.0, 503.0, 522.0, 530.0, 570.0, 527.0, 573.0, 567.0, 576.0, 573.0, 519.0, 522.0, 573.0, 579.0, 519.0, 573.0, 516.0, 573.0, 510.0, 476.0, 525.0, 530.0, 522.0, 525.0, 516.0, 456.0, 522.0, 507.0, 507.0, 570.0, 579.0, 579.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 283.0, 287.0, 283.0, 279.0, 288.0, 284.0, 286.0, 250.0, 263.0, 286.0, 287.0, 267.0, 260.0, 257.0, 262.0, 292.0, 287.0, 260.0, 265.0, 257.0, 276.0, 265.0, 260.0, 260.0, 270.0, 268.0, 265.0, 271.0, 253.0, 266.0, 250.0, 252.0, 267.0, 282.0, 291.0, 260.0, 259.0, 261.0, 261.0, 290.0, 280.0, 268.0, 262.0, 256.0, 269.0, 257.0, 270.0, 123.0, 114.0, 258.0, 249.0, 301.0, 275.0, 293.0, 277.0, 287.0, 289.0, 275.0, 295.0, 254.0, 268.0, 249.0, 281.0, 260.0, 262.0, 239.0, 234.0, 292.0, 281.0, 240.0, 264.0, 265.0, 265.0, 297.0, 279.0, 261.0, 266.0, 244.0, 269.0, 241.0, 235.0, 273.0, 300.0, 285.0, 291.0, 263.0, 259.0, 261.0, 258.0, 257.0, 273.0, 206.0, 216.0, 261.0, 264.0, 298.0, 275.0, 264.0, 263.0, 266.0, 256.0, 267.0, 255.0, 257.0, 259.0, 290.0, 286.0, 250.0, 251.0, 271.0, 259.0, 292.0, 269.0, 267.0, 255.0, 254.0, 273.0, 283.0, 290.0, 267.0, 252.0, 250.0, 275.0, 251.0, 268.0, 286.0, 284.0, 247.0, 272.0, 250.0, 266.0, 295.0, 275.0, 251.0, 262.0, 248.0, 255.0, 255.0, 267.0, 270.0, 260.0, 290.0, 280.0, 273.0, 254.0, 287.0, 286.0, 294.0, 273.0, 287.0, 289.0, 294.0, 279.0, 249.0, 270.0, 265.0, 257.0, 301.0, 272.0, 295.0, 284.0, 270.0, 249.0, 286.0, 287.0, 242.0, 274.0, 292.0, 281.0, 256.0, 254.0, 253.0, 223.0, 266.0, 259.0, 264.0, 266.0, 270.0, 252.0, 259.0, 266.0, 256.0, 260.0, 239.0, 217.0, 271.0, 251.0, 254.0, 253.0, 254.0, 253.0, 269.0, 301.0, 291.0, 288.0, 283.0, 296.0, 240.0, 276.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6842438570605083, "mean_inference_ms": 1.2065519067365493, "mean_action_processing_ms": 0.13242026295874262, "mean_env_wait_ms": 0.8405006849522966, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 579.0, "episode_reward_min": 237.0, "episode_reward_mean": 532.7, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 301.0}, "policy_reward_mean": {"ppo": 266.35}, "hist_stats": {"episode_reward": [576.0, 570.0, 567.0, 570.0, 513.0, 573.0, 527.0, 519.0, 579.0, 525.0, 533.0, 525.0, 530.0, 533.0, 524.0, 516.0, 519.0, 573.0, 519.0, 522.0, 570.0, 530.0, 525.0, 527.0, 237.0, 507.0, 576.0, 570.0, 576.0, 570.0, 522.0, 530.0, 522.0, 473.0, 573.0, 504.0, 530.0, 576.0, 527.0, 513.0, 476.0, 573.0, 576.0, 522.0, 519.0, 530.0, 422.0, 525.0, 573.0, 527.0, 522.0, 522.0, 516.0, 576.0, 501.0, 530.0, 561.0, 522.0, 527.0, 573.0, 519.0, 525.0, 519.0, 570.0, 519.0, 516.0, 570.0, 513.0, 503.0, 522.0, 530.0, 570.0, 527.0, 573.0, 567.0, 576.0, 573.0, 519.0, 522.0, 573.0, 579.0, 519.0, 573.0, 516.0, 573.0, 510.0, 476.0, 525.0, 530.0, 522.0, 525.0, 516.0, 456.0, 522.0, 507.0, 507.0, 570.0, 579.0, 579.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 283.0, 287.0, 283.0, 279.0, 288.0, 284.0, 286.0, 250.0, 263.0, 286.0, 287.0, 267.0, 260.0, 257.0, 262.0, 292.0, 287.0, 260.0, 265.0, 257.0, 276.0, 265.0, 260.0, 260.0, 270.0, 268.0, 265.0, 271.0, 253.0, 266.0, 250.0, 252.0, 267.0, 282.0, 291.0, 260.0, 259.0, 261.0, 261.0, 290.0, 280.0, 268.0, 262.0, 256.0, 269.0, 257.0, 270.0, 123.0, 114.0, 258.0, 249.0, 301.0, 275.0, 293.0, 277.0, 287.0, 289.0, 275.0, 295.0, 254.0, 268.0, 249.0, 281.0, 260.0, 262.0, 239.0, 234.0, 292.0, 281.0, 240.0, 264.0, 265.0, 265.0, 297.0, 279.0, 261.0, 266.0, 244.0, 269.0, 241.0, 235.0, 273.0, 300.0, 285.0, 291.0, 263.0, 259.0, 261.0, 258.0, 257.0, 273.0, 206.0, 216.0, 261.0, 264.0, 298.0, 275.0, 264.0, 263.0, 266.0, 256.0, 267.0, 255.0, 257.0, 259.0, 290.0, 286.0, 250.0, 251.0, 271.0, 259.0, 292.0, 269.0, 267.0, 255.0, 254.0, 273.0, 283.0, 290.0, 267.0, 252.0, 250.0, 275.0, 251.0, 268.0, 286.0, 284.0, 247.0, 272.0, 250.0, 266.0, 295.0, 275.0, 251.0, 262.0, 248.0, 255.0, 255.0, 267.0, 270.0, 260.0, 290.0, 280.0, 273.0, 254.0, 287.0, 286.0, 294.0, 273.0, 287.0, 289.0, 294.0, 279.0, 249.0, 270.0, 265.0, 257.0, 301.0, 272.0, 295.0, 284.0, 270.0, 249.0, 286.0, 287.0, 242.0, 274.0, 292.0, 281.0, 256.0, 254.0, 253.0, 223.0, 266.0, 259.0, 264.0, 266.0, 270.0, 252.0, 259.0, 266.0, 256.0, 260.0, 239.0, 217.0, 271.0, 251.0, 254.0, 253.0, 254.0, 253.0, 269.0, 301.0, 291.0, 288.0, 283.0, 296.0, 240.0, 276.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6842438570605083, "mean_inference_ms": 1.2065519067365493, "mean_action_processing_ms": 0.13242026295874262, "mean_env_wait_ms": 0.8405006849522966, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3686400, "num_agent_steps_trained": 3686400, "num_env_steps_sampled": 1843200, "num_env_steps_trained": 1843200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1843200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3686400, "timers": {"training_iteration_time_ms": 3658.38, "learn_time_ms": 1137.758, "learn_throughput": 11250.194, "synch_weights_time_ms": 12.419}, "counters": {"num_env_steps_sampled": 1843200, "num_env_steps_trained": 1843200, "num_agent_steps_sampled": 3686400, "num_agent_steps_trained": 3686400}, "done": false, "episodes_total": 4608, "training_iteration": 144, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-19", "timestamp": 1666580959, "time_this_iter_s": 3.7352027893066406, "time_total_s": 541.2695965766907, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 541.2695965766907, "timesteps_since_restore": 0, "iterations_since_restore": 144, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.033333333333335, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 185.0, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 163.88, "shaped_reward_min": 136, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.96, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.31, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.57, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.96, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.36, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.54, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.55, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.41, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.84, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.76, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.36, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.54, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.36, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.54, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00016856600996106863, "policy_loss": -0.000373037182725966, "vf_loss": 7.367165565490723, "vf_explained_var": 0.690015435218811, "kl": 0.001808413420803845, "entropy": 1.0644876956939697, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1856000, "num_env_steps_trained": 1856000, "num_agent_steps_sampled": 3712000, "num_agent_steps_trained": 3712000}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 422.0, "episode_reward_mean": 533.88, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 206.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 266.94}, "custom_metrics": {"sparse_reward_mean": 185.0, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 163.88, "shaped_reward_min": 136, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.96, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.31, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.57, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.96, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.36, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.54, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.55, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.41, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.84, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.76, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.36, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.54, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.36, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.54, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 473.0, 573.0, 504.0, 530.0, 576.0, 527.0, 513.0, 476.0, 573.0, 576.0, 522.0, 519.0, 530.0, 422.0, 525.0, 573.0, 527.0, 522.0, 522.0, 516.0, 576.0, 501.0, 530.0, 561.0, 522.0, 527.0, 573.0, 519.0, 525.0, 519.0, 570.0, 519.0, 516.0, 570.0, 513.0, 503.0, 522.0, 530.0, 570.0, 527.0, 573.0, 567.0, 576.0, 573.0, 519.0, 522.0, 573.0, 579.0, 519.0, 573.0, 516.0, 573.0, 510.0, 476.0, 525.0, 530.0, 522.0, 525.0, 516.0, 456.0, 522.0, 507.0, 507.0, 570.0, 579.0, 579.0, 516.0, 573.0, 513.0, 513.0, 516.0, 519.0, 519.0, 518.0, 530.0, 522.0, 524.0, 530.0, 573.0, 524.0, 522.0, 522.0, 522.0, 582.0, 510.0, 576.0, 522.0, 576.0, 519.0, 525.0, 573.0, 573.0, 525.0, 525.0, 581.0, 524.0, 519.0, 579.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 262.0, 239.0, 234.0, 292.0, 281.0, 240.0, 264.0, 265.0, 265.0, 297.0, 279.0, 261.0, 266.0, 244.0, 269.0, 241.0, 235.0, 273.0, 300.0, 285.0, 291.0, 263.0, 259.0, 261.0, 258.0, 257.0, 273.0, 206.0, 216.0, 261.0, 264.0, 298.0, 275.0, 264.0, 263.0, 266.0, 256.0, 267.0, 255.0, 257.0, 259.0, 290.0, 286.0, 250.0, 251.0, 271.0, 259.0, 292.0, 269.0, 267.0, 255.0, 254.0, 273.0, 283.0, 290.0, 267.0, 252.0, 250.0, 275.0, 251.0, 268.0, 286.0, 284.0, 247.0, 272.0, 250.0, 266.0, 295.0, 275.0, 251.0, 262.0, 248.0, 255.0, 255.0, 267.0, 270.0, 260.0, 290.0, 280.0, 273.0, 254.0, 287.0, 286.0, 294.0, 273.0, 287.0, 289.0, 294.0, 279.0, 249.0, 270.0, 265.0, 257.0, 301.0, 272.0, 295.0, 284.0, 270.0, 249.0, 286.0, 287.0, 242.0, 274.0, 292.0, 281.0, 256.0, 254.0, 253.0, 223.0, 266.0, 259.0, 264.0, 266.0, 270.0, 252.0, 259.0, 266.0, 256.0, 260.0, 239.0, 217.0, 271.0, 251.0, 254.0, 253.0, 254.0, 253.0, 269.0, 301.0, 291.0, 288.0, 283.0, 296.0, 240.0, 276.0, 277.0, 296.0, 261.0, 252.0, 258.0, 255.0, 251.0, 265.0, 240.0, 279.0, 241.0, 278.0, 258.0, 260.0, 258.0, 272.0, 259.0, 263.0, 249.0, 275.0, 259.0, 271.0, 275.0, 298.0, 251.0, 273.0, 257.0, 265.0, 259.0, 263.0, 266.0, 256.0, 290.0, 292.0, 253.0, 257.0, 306.0, 270.0, 264.0, 258.0, 279.0, 297.0, 251.0, 268.0, 277.0, 248.0, 285.0, 288.0, 288.0, 285.0, 260.0, 265.0, 262.0, 263.0, 292.0, 289.0, 258.0, 266.0, 246.0, 273.0, 294.0, 285.0, 245.0, 277.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684303727724854, "mean_inference_ms": 1.2064871898020082, "mean_action_processing_ms": 0.13241503366929952, "mean_env_wait_ms": 0.8403057315186702, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 422.0, "episode_reward_mean": 533.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 206.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 266.94}, "hist_stats": {"episode_reward": [522.0, 473.0, 573.0, 504.0, 530.0, 576.0, 527.0, 513.0, 476.0, 573.0, 576.0, 522.0, 519.0, 530.0, 422.0, 525.0, 573.0, 527.0, 522.0, 522.0, 516.0, 576.0, 501.0, 530.0, 561.0, 522.0, 527.0, 573.0, 519.0, 525.0, 519.0, 570.0, 519.0, 516.0, 570.0, 513.0, 503.0, 522.0, 530.0, 570.0, 527.0, 573.0, 567.0, 576.0, 573.0, 519.0, 522.0, 573.0, 579.0, 519.0, 573.0, 516.0, 573.0, 510.0, 476.0, 525.0, 530.0, 522.0, 525.0, 516.0, 456.0, 522.0, 507.0, 507.0, 570.0, 579.0, 579.0, 516.0, 573.0, 513.0, 513.0, 516.0, 519.0, 519.0, 518.0, 530.0, 522.0, 524.0, 530.0, 573.0, 524.0, 522.0, 522.0, 522.0, 582.0, 510.0, 576.0, 522.0, 576.0, 519.0, 525.0, 573.0, 573.0, 525.0, 525.0, 581.0, 524.0, 519.0, 579.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 262.0, 239.0, 234.0, 292.0, 281.0, 240.0, 264.0, 265.0, 265.0, 297.0, 279.0, 261.0, 266.0, 244.0, 269.0, 241.0, 235.0, 273.0, 300.0, 285.0, 291.0, 263.0, 259.0, 261.0, 258.0, 257.0, 273.0, 206.0, 216.0, 261.0, 264.0, 298.0, 275.0, 264.0, 263.0, 266.0, 256.0, 267.0, 255.0, 257.0, 259.0, 290.0, 286.0, 250.0, 251.0, 271.0, 259.0, 292.0, 269.0, 267.0, 255.0, 254.0, 273.0, 283.0, 290.0, 267.0, 252.0, 250.0, 275.0, 251.0, 268.0, 286.0, 284.0, 247.0, 272.0, 250.0, 266.0, 295.0, 275.0, 251.0, 262.0, 248.0, 255.0, 255.0, 267.0, 270.0, 260.0, 290.0, 280.0, 273.0, 254.0, 287.0, 286.0, 294.0, 273.0, 287.0, 289.0, 294.0, 279.0, 249.0, 270.0, 265.0, 257.0, 301.0, 272.0, 295.0, 284.0, 270.0, 249.0, 286.0, 287.0, 242.0, 274.0, 292.0, 281.0, 256.0, 254.0, 253.0, 223.0, 266.0, 259.0, 264.0, 266.0, 270.0, 252.0, 259.0, 266.0, 256.0, 260.0, 239.0, 217.0, 271.0, 251.0, 254.0, 253.0, 254.0, 253.0, 269.0, 301.0, 291.0, 288.0, 283.0, 296.0, 240.0, 276.0, 277.0, 296.0, 261.0, 252.0, 258.0, 255.0, 251.0, 265.0, 240.0, 279.0, 241.0, 278.0, 258.0, 260.0, 258.0, 272.0, 259.0, 263.0, 249.0, 275.0, 259.0, 271.0, 275.0, 298.0, 251.0, 273.0, 257.0, 265.0, 259.0, 263.0, 266.0, 256.0, 290.0, 292.0, 253.0, 257.0, 306.0, 270.0, 264.0, 258.0, 279.0, 297.0, 251.0, 268.0, 277.0, 248.0, 285.0, 288.0, 288.0, 285.0, 260.0, 265.0, 262.0, 263.0, 292.0, 289.0, 258.0, 266.0, 246.0, 273.0, 294.0, 285.0, 245.0, 277.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684303727724854, "mean_inference_ms": 1.2064871898020082, "mean_action_processing_ms": 0.13241503366929952, "mean_env_wait_ms": 0.8403057315186702, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3712000, "num_agent_steps_trained": 3712000, "num_env_steps_sampled": 1856000, "num_env_steps_trained": 1856000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1856000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3712000, "timers": {"training_iteration_time_ms": 3680.475, "learn_time_ms": 1144.678, "learn_throughput": 11182.18, "synch_weights_time_ms": 12.299}, "counters": {"num_env_steps_sampled": 1856000, "num_env_steps_trained": 1856000, "num_agent_steps_sampled": 3712000, "num_agent_steps_trained": 3712000}, "done": false, "episodes_total": 4640, "training_iteration": 145, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-23", "timestamp": 1666580963, "time_this_iter_s": 4.009440183639526, "time_total_s": 545.2790367603302, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 545.2790367603302, "timesteps_since_restore": 0, "iterations_since_restore": 145, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.849999999999998, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 186.4, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 164.41, "shaped_reward_min": 111, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.31, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.0, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.01, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.67, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.78, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.22, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.26, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.52, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.74, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.73, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.66, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.78, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.22, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.78, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.22, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00215164665132761, "policy_loss": 0.001941345864906907, "vf_loss": 7.410245895385742, "vf_explained_var": 0.6896635890007019, "kl": 0.0020127412863075733, "entropy": 1.061444640159607, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1868800, "num_env_steps_trained": 1868800, "num_agent_steps_sampled": 3737600, "num_agent_steps_trained": 3737600}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 351.0, "episode_reward_mean": 537.21, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 268.605}, "custom_metrics": {"sparse_reward_mean": 186.4, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 164.41, "shaped_reward_min": 111, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.31, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.0, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.01, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.67, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.78, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.22, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.26, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.52, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.74, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.73, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.66, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.78, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.22, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.78, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.22, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 516.0, 570.0, 513.0, 503.0, 522.0, 530.0, 570.0, 527.0, 573.0, 567.0, 576.0, 573.0, 519.0, 522.0, 573.0, 579.0, 519.0, 573.0, 516.0, 573.0, 510.0, 476.0, 525.0, 530.0, 522.0, 525.0, 516.0, 456.0, 522.0, 507.0, 507.0, 570.0, 579.0, 579.0, 516.0, 573.0, 513.0, 513.0, 516.0, 519.0, 519.0, 518.0, 530.0, 522.0, 524.0, 530.0, 573.0, 524.0, 522.0, 522.0, 522.0, 582.0, 510.0, 576.0, 522.0, 576.0, 519.0, 525.0, 573.0, 573.0, 525.0, 525.0, 581.0, 524.0, 519.0, 579.0, 522.0, 570.0, 522.0, 570.0, 525.0, 522.0, 516.0, 567.0, 525.0, 570.0, 579.0, 567.0, 570.0, 573.0, 570.0, 573.0, 525.0, 527.0, 522.0, 522.0, 579.0, 525.0, 527.0, 453.0, 504.0, 573.0, 573.0, 351.0, 527.0, 573.0, 579.0, 525.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [247.0, 272.0, 250.0, 266.0, 295.0, 275.0, 251.0, 262.0, 248.0, 255.0, 255.0, 267.0, 270.0, 260.0, 290.0, 280.0, 273.0, 254.0, 287.0, 286.0, 294.0, 273.0, 287.0, 289.0, 294.0, 279.0, 249.0, 270.0, 265.0, 257.0, 301.0, 272.0, 295.0, 284.0, 270.0, 249.0, 286.0, 287.0, 242.0, 274.0, 292.0, 281.0, 256.0, 254.0, 253.0, 223.0, 266.0, 259.0, 264.0, 266.0, 270.0, 252.0, 259.0, 266.0, 256.0, 260.0, 239.0, 217.0, 271.0, 251.0, 254.0, 253.0, 254.0, 253.0, 269.0, 301.0, 291.0, 288.0, 283.0, 296.0, 240.0, 276.0, 277.0, 296.0, 261.0, 252.0, 258.0, 255.0, 251.0, 265.0, 240.0, 279.0, 241.0, 278.0, 258.0, 260.0, 258.0, 272.0, 259.0, 263.0, 249.0, 275.0, 259.0, 271.0, 275.0, 298.0, 251.0, 273.0, 257.0, 265.0, 259.0, 263.0, 266.0, 256.0, 290.0, 292.0, 253.0, 257.0, 306.0, 270.0, 264.0, 258.0, 279.0, 297.0, 251.0, 268.0, 277.0, 248.0, 285.0, 288.0, 288.0, 285.0, 260.0, 265.0, 262.0, 263.0, 292.0, 289.0, 258.0, 266.0, 246.0, 273.0, 294.0, 285.0, 245.0, 277.0, 277.0, 293.0, 257.0, 265.0, 286.0, 284.0, 270.0, 255.0, 264.0, 258.0, 250.0, 266.0, 289.0, 278.0, 265.0, 260.0, 305.0, 265.0, 292.0, 287.0, 276.0, 291.0, 280.0, 290.0, 291.0, 282.0, 282.0, 288.0, 288.0, 285.0, 262.0, 263.0, 265.0, 262.0, 257.0, 265.0, 267.0, 255.0, 278.0, 301.0, 267.0, 258.0, 262.0, 265.0, 213.0, 240.0, 258.0, 246.0, 297.0, 276.0, 290.0, 283.0, 173.0, 178.0, 266.0, 261.0, 283.0, 290.0, 290.0, 289.0, 260.0, 265.0, 299.0, 274.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684348570132233, "mean_inference_ms": 1.2065833340662058, "mean_action_processing_ms": 0.13241048060485988, "mean_env_wait_ms": 0.8404282247161632, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 351.0, "episode_reward_mean": 537.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 268.605}, "hist_stats": {"episode_reward": [519.0, 516.0, 570.0, 513.0, 503.0, 522.0, 530.0, 570.0, 527.0, 573.0, 567.0, 576.0, 573.0, 519.0, 522.0, 573.0, 579.0, 519.0, 573.0, 516.0, 573.0, 510.0, 476.0, 525.0, 530.0, 522.0, 525.0, 516.0, 456.0, 522.0, 507.0, 507.0, 570.0, 579.0, 579.0, 516.0, 573.0, 513.0, 513.0, 516.0, 519.0, 519.0, 518.0, 530.0, 522.0, 524.0, 530.0, 573.0, 524.0, 522.0, 522.0, 522.0, 582.0, 510.0, 576.0, 522.0, 576.0, 519.0, 525.0, 573.0, 573.0, 525.0, 525.0, 581.0, 524.0, 519.0, 579.0, 522.0, 570.0, 522.0, 570.0, 525.0, 522.0, 516.0, 567.0, 525.0, 570.0, 579.0, 567.0, 570.0, 573.0, 570.0, 573.0, 525.0, 527.0, 522.0, 522.0, 579.0, 525.0, 527.0, 453.0, 504.0, 573.0, 573.0, 351.0, 527.0, 573.0, 579.0, 525.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [247.0, 272.0, 250.0, 266.0, 295.0, 275.0, 251.0, 262.0, 248.0, 255.0, 255.0, 267.0, 270.0, 260.0, 290.0, 280.0, 273.0, 254.0, 287.0, 286.0, 294.0, 273.0, 287.0, 289.0, 294.0, 279.0, 249.0, 270.0, 265.0, 257.0, 301.0, 272.0, 295.0, 284.0, 270.0, 249.0, 286.0, 287.0, 242.0, 274.0, 292.0, 281.0, 256.0, 254.0, 253.0, 223.0, 266.0, 259.0, 264.0, 266.0, 270.0, 252.0, 259.0, 266.0, 256.0, 260.0, 239.0, 217.0, 271.0, 251.0, 254.0, 253.0, 254.0, 253.0, 269.0, 301.0, 291.0, 288.0, 283.0, 296.0, 240.0, 276.0, 277.0, 296.0, 261.0, 252.0, 258.0, 255.0, 251.0, 265.0, 240.0, 279.0, 241.0, 278.0, 258.0, 260.0, 258.0, 272.0, 259.0, 263.0, 249.0, 275.0, 259.0, 271.0, 275.0, 298.0, 251.0, 273.0, 257.0, 265.0, 259.0, 263.0, 266.0, 256.0, 290.0, 292.0, 253.0, 257.0, 306.0, 270.0, 264.0, 258.0, 279.0, 297.0, 251.0, 268.0, 277.0, 248.0, 285.0, 288.0, 288.0, 285.0, 260.0, 265.0, 262.0, 263.0, 292.0, 289.0, 258.0, 266.0, 246.0, 273.0, 294.0, 285.0, 245.0, 277.0, 277.0, 293.0, 257.0, 265.0, 286.0, 284.0, 270.0, 255.0, 264.0, 258.0, 250.0, 266.0, 289.0, 278.0, 265.0, 260.0, 305.0, 265.0, 292.0, 287.0, 276.0, 291.0, 280.0, 290.0, 291.0, 282.0, 282.0, 288.0, 288.0, 285.0, 262.0, 263.0, 265.0, 262.0, 257.0, 265.0, 267.0, 255.0, 278.0, 301.0, 267.0, 258.0, 262.0, 265.0, 213.0, 240.0, 258.0, 246.0, 297.0, 276.0, 290.0, 283.0, 173.0, 178.0, 266.0, 261.0, 283.0, 290.0, 290.0, 289.0, 260.0, 265.0, 299.0, 274.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684348570132233, "mean_inference_ms": 1.2065833340662058, "mean_action_processing_ms": 0.13241048060485988, "mean_env_wait_ms": 0.8404282247161632, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3737600, "num_agent_steps_trained": 3737600, "num_env_steps_sampled": 1868800, "num_env_steps_trained": 1868800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1868800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3737600, "timers": {"training_iteration_time_ms": 3723.772, "learn_time_ms": 1145.054, "learn_throughput": 11178.514, "synch_weights_time_ms": 12.116}, "counters": {"num_env_steps_sampled": 1868800, "num_env_steps_trained": 1868800, "num_agent_steps_sampled": 3737600, "num_agent_steps_trained": 3737600}, "done": false, "episodes_total": 4672, "training_iteration": 146, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-27", "timestamp": 1666580967, "time_this_iter_s": 4.079414129257202, "time_total_s": 549.3584508895874, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 549.3584508895874, "timesteps_since_restore": 0, "iterations_since_restore": 146, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.383333333333336, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 185.4, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 164.51, "shaped_reward_min": 102, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.58, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.55, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.29, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.25, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.02, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 14.94, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.09, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.29, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.58, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.91, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.48, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.02, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 14.94, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.02, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 14.94, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0007207918679341674, "policy_loss": -0.0009259622893296182, "vf_loss": 7.4250664710998535, "vf_explained_var": 0.6794564723968506, "kl": 0.0020139352418482304, "entropy": 1.0746699571609497, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1881600, "num_env_steps_trained": 1881600, "num_agent_steps_sampled": 3763200, "num_agent_steps_trained": 3763200}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 302.0, "episode_reward_mean": 535.31, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 151.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 267.655}, "custom_metrics": {"sparse_reward_mean": 185.4, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 164.51, "shaped_reward_min": 102, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.58, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.55, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.29, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.25, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.02, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 14.94, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.09, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.29, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.58, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.91, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.48, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.02, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 14.94, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.02, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 14.94, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 579.0, 579.0, 516.0, 573.0, 513.0, 513.0, 516.0, 519.0, 519.0, 518.0, 530.0, 522.0, 524.0, 530.0, 573.0, 524.0, 522.0, 522.0, 522.0, 582.0, 510.0, 576.0, 522.0, 576.0, 519.0, 525.0, 573.0, 573.0, 525.0, 525.0, 581.0, 524.0, 519.0, 579.0, 522.0, 570.0, 522.0, 570.0, 525.0, 522.0, 516.0, 567.0, 525.0, 570.0, 579.0, 567.0, 570.0, 573.0, 570.0, 573.0, 525.0, 527.0, 522.0, 522.0, 579.0, 525.0, 527.0, 453.0, 504.0, 573.0, 573.0, 351.0, 527.0, 573.0, 579.0, 525.0, 573.0, 530.0, 516.0, 408.0, 567.0, 522.0, 576.0, 524.0, 513.0, 530.0, 510.0, 576.0, 579.0, 524.0, 513.0, 573.0, 504.0, 576.0, 567.0, 581.0, 527.0, 527.0, 530.0, 522.0, 485.0, 516.0, 525.0, 573.0, 522.0, 302.0, 503.0, 539.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 301.0, 291.0, 288.0, 283.0, 296.0, 240.0, 276.0, 277.0, 296.0, 261.0, 252.0, 258.0, 255.0, 251.0, 265.0, 240.0, 279.0, 241.0, 278.0, 258.0, 260.0, 258.0, 272.0, 259.0, 263.0, 249.0, 275.0, 259.0, 271.0, 275.0, 298.0, 251.0, 273.0, 257.0, 265.0, 259.0, 263.0, 266.0, 256.0, 290.0, 292.0, 253.0, 257.0, 306.0, 270.0, 264.0, 258.0, 279.0, 297.0, 251.0, 268.0, 277.0, 248.0, 285.0, 288.0, 288.0, 285.0, 260.0, 265.0, 262.0, 263.0, 292.0, 289.0, 258.0, 266.0, 246.0, 273.0, 294.0, 285.0, 245.0, 277.0, 277.0, 293.0, 257.0, 265.0, 286.0, 284.0, 270.0, 255.0, 264.0, 258.0, 250.0, 266.0, 289.0, 278.0, 265.0, 260.0, 305.0, 265.0, 292.0, 287.0, 276.0, 291.0, 280.0, 290.0, 291.0, 282.0, 282.0, 288.0, 288.0, 285.0, 262.0, 263.0, 265.0, 262.0, 257.0, 265.0, 267.0, 255.0, 278.0, 301.0, 267.0, 258.0, 262.0, 265.0, 213.0, 240.0, 258.0, 246.0, 297.0, 276.0, 290.0, 283.0, 173.0, 178.0, 266.0, 261.0, 283.0, 290.0, 290.0, 289.0, 260.0, 265.0, 299.0, 274.0, 257.0, 273.0, 250.0, 266.0, 210.0, 198.0, 274.0, 293.0, 260.0, 262.0, 294.0, 282.0, 262.0, 262.0, 253.0, 260.0, 259.0, 271.0, 255.0, 255.0, 289.0, 287.0, 292.0, 287.0, 266.0, 258.0, 258.0, 255.0, 286.0, 287.0, 242.0, 262.0, 296.0, 280.0, 288.0, 279.0, 293.0, 288.0, 247.0, 280.0, 278.0, 249.0, 276.0, 254.0, 259.0, 263.0, 235.0, 250.0, 264.0, 252.0, 266.0, 259.0, 289.0, 284.0, 252.0, 270.0, 151.0, 151.0, 257.0, 246.0, 268.0, 271.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684336111759047, "mean_inference_ms": 1.2067049974759831, "mean_action_processing_ms": 0.1324082233532618, "mean_env_wait_ms": 0.8405535827561619, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 302.0, "episode_reward_mean": 535.31, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 151.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 267.655}, "hist_stats": {"episode_reward": [570.0, 579.0, 579.0, 516.0, 573.0, 513.0, 513.0, 516.0, 519.0, 519.0, 518.0, 530.0, 522.0, 524.0, 530.0, 573.0, 524.0, 522.0, 522.0, 522.0, 582.0, 510.0, 576.0, 522.0, 576.0, 519.0, 525.0, 573.0, 573.0, 525.0, 525.0, 581.0, 524.0, 519.0, 579.0, 522.0, 570.0, 522.0, 570.0, 525.0, 522.0, 516.0, 567.0, 525.0, 570.0, 579.0, 567.0, 570.0, 573.0, 570.0, 573.0, 525.0, 527.0, 522.0, 522.0, 579.0, 525.0, 527.0, 453.0, 504.0, 573.0, 573.0, 351.0, 527.0, 573.0, 579.0, 525.0, 573.0, 530.0, 516.0, 408.0, 567.0, 522.0, 576.0, 524.0, 513.0, 530.0, 510.0, 576.0, 579.0, 524.0, 513.0, 573.0, 504.0, 576.0, 567.0, 581.0, 527.0, 527.0, 530.0, 522.0, 485.0, 516.0, 525.0, 573.0, 522.0, 302.0, 503.0, 539.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 301.0, 291.0, 288.0, 283.0, 296.0, 240.0, 276.0, 277.0, 296.0, 261.0, 252.0, 258.0, 255.0, 251.0, 265.0, 240.0, 279.0, 241.0, 278.0, 258.0, 260.0, 258.0, 272.0, 259.0, 263.0, 249.0, 275.0, 259.0, 271.0, 275.0, 298.0, 251.0, 273.0, 257.0, 265.0, 259.0, 263.0, 266.0, 256.0, 290.0, 292.0, 253.0, 257.0, 306.0, 270.0, 264.0, 258.0, 279.0, 297.0, 251.0, 268.0, 277.0, 248.0, 285.0, 288.0, 288.0, 285.0, 260.0, 265.0, 262.0, 263.0, 292.0, 289.0, 258.0, 266.0, 246.0, 273.0, 294.0, 285.0, 245.0, 277.0, 277.0, 293.0, 257.0, 265.0, 286.0, 284.0, 270.0, 255.0, 264.0, 258.0, 250.0, 266.0, 289.0, 278.0, 265.0, 260.0, 305.0, 265.0, 292.0, 287.0, 276.0, 291.0, 280.0, 290.0, 291.0, 282.0, 282.0, 288.0, 288.0, 285.0, 262.0, 263.0, 265.0, 262.0, 257.0, 265.0, 267.0, 255.0, 278.0, 301.0, 267.0, 258.0, 262.0, 265.0, 213.0, 240.0, 258.0, 246.0, 297.0, 276.0, 290.0, 283.0, 173.0, 178.0, 266.0, 261.0, 283.0, 290.0, 290.0, 289.0, 260.0, 265.0, 299.0, 274.0, 257.0, 273.0, 250.0, 266.0, 210.0, 198.0, 274.0, 293.0, 260.0, 262.0, 294.0, 282.0, 262.0, 262.0, 253.0, 260.0, 259.0, 271.0, 255.0, 255.0, 289.0, 287.0, 292.0, 287.0, 266.0, 258.0, 258.0, 255.0, 286.0, 287.0, 242.0, 262.0, 296.0, 280.0, 288.0, 279.0, 293.0, 288.0, 247.0, 280.0, 278.0, 249.0, 276.0, 254.0, 259.0, 263.0, 235.0, 250.0, 264.0, 252.0, 266.0, 259.0, 289.0, 284.0, 252.0, 270.0, 151.0, 151.0, 257.0, 246.0, 268.0, 271.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684336111759047, "mean_inference_ms": 1.2067049974759831, "mean_action_processing_ms": 0.1324082233532618, "mean_env_wait_ms": 0.8405535827561619, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3763200, "num_agent_steps_trained": 3763200, "num_env_steps_sampled": 1881600, "num_env_steps_trained": 1881600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1881600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3763200, "timers": {"training_iteration_time_ms": 3726.949, "learn_time_ms": 1146.605, "learn_throughput": 11163.396, "synch_weights_time_ms": 11.601}, "counters": {"num_env_steps_sampled": 1881600, "num_env_steps_trained": 1881600, "num_agent_steps_sampled": 3763200, "num_agent_steps_trained": 3763200}, "done": false, "episodes_total": 4704, "training_iteration": 147, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-31", "timestamp": 1666580971, "time_this_iter_s": 3.6851279735565186, "time_total_s": 553.0435788631439, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 553.0435788631439, "timesteps_since_restore": 0, "iterations_since_restore": 147, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.1, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 184.4, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 164.0, "shaped_reward_min": 102, "shaped_reward_max": 181, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.85, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.2, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.54, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 14.95, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.19, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 14.72, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.29, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.53, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.94, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.45, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.19, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 14.72, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.19, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 14.72, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0018294923938810825, "policy_loss": -0.0020315488800406456, "vf_loss": 7.403303146362305, "vf_explained_var": 0.683193564414978, "kl": 0.0019366566557437181, "entropy": 1.0765454769134521, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1894400, "num_env_steps_trained": 1894400, "num_agent_steps_sampled": 3788800, "num_agent_steps_trained": 3788800}, "sampler_results": {"episode_reward_max": 581.0, "episode_reward_min": 302.0, "episode_reward_mean": 532.8, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 151.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 266.4}, "custom_metrics": {"sparse_reward_mean": 184.4, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 164.0, "shaped_reward_min": 102, "shaped_reward_max": 181, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.85, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.2, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.54, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 14.95, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.19, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 14.72, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.29, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.53, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.94, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.45, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.19, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 14.72, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.19, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 14.72, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [524.0, 519.0, 579.0, 522.0, 570.0, 522.0, 570.0, 525.0, 522.0, 516.0, 567.0, 525.0, 570.0, 579.0, 567.0, 570.0, 573.0, 570.0, 573.0, 525.0, 527.0, 522.0, 522.0, 579.0, 525.0, 527.0, 453.0, 504.0, 573.0, 573.0, 351.0, 527.0, 573.0, 579.0, 525.0, 573.0, 530.0, 516.0, 408.0, 567.0, 522.0, 576.0, 524.0, 513.0, 530.0, 510.0, 576.0, 579.0, 524.0, 513.0, 573.0, 504.0, 576.0, 567.0, 581.0, 527.0, 527.0, 530.0, 522.0, 485.0, 516.0, 525.0, 573.0, 522.0, 302.0, 503.0, 539.0, 579.0, 525.0, 570.0, 567.0, 525.0, 527.0, 522.0, 519.0, 573.0, 493.0, 516.0, 479.0, 525.0, 519.0, 573.0, 513.0, 522.0, 513.0, 507.0, 519.0, 579.0, 573.0, 524.0, 567.0, 479.0, 576.0, 516.0, 573.0, 525.0, 530.0, 522.0, 522.0, 527.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [258.0, 266.0, 246.0, 273.0, 294.0, 285.0, 245.0, 277.0, 277.0, 293.0, 257.0, 265.0, 286.0, 284.0, 270.0, 255.0, 264.0, 258.0, 250.0, 266.0, 289.0, 278.0, 265.0, 260.0, 305.0, 265.0, 292.0, 287.0, 276.0, 291.0, 280.0, 290.0, 291.0, 282.0, 282.0, 288.0, 288.0, 285.0, 262.0, 263.0, 265.0, 262.0, 257.0, 265.0, 267.0, 255.0, 278.0, 301.0, 267.0, 258.0, 262.0, 265.0, 213.0, 240.0, 258.0, 246.0, 297.0, 276.0, 290.0, 283.0, 173.0, 178.0, 266.0, 261.0, 283.0, 290.0, 290.0, 289.0, 260.0, 265.0, 299.0, 274.0, 257.0, 273.0, 250.0, 266.0, 210.0, 198.0, 274.0, 293.0, 260.0, 262.0, 294.0, 282.0, 262.0, 262.0, 253.0, 260.0, 259.0, 271.0, 255.0, 255.0, 289.0, 287.0, 292.0, 287.0, 266.0, 258.0, 258.0, 255.0, 286.0, 287.0, 242.0, 262.0, 296.0, 280.0, 288.0, 279.0, 293.0, 288.0, 247.0, 280.0, 278.0, 249.0, 276.0, 254.0, 259.0, 263.0, 235.0, 250.0, 264.0, 252.0, 266.0, 259.0, 289.0, 284.0, 252.0, 270.0, 151.0, 151.0, 257.0, 246.0, 268.0, 271.0, 286.0, 293.0, 260.0, 265.0, 283.0, 287.0, 279.0, 288.0, 258.0, 267.0, 265.0, 262.0, 271.0, 251.0, 266.0, 253.0, 292.0, 281.0, 235.0, 258.0, 245.0, 271.0, 232.0, 247.0, 262.0, 263.0, 243.0, 276.0, 287.0, 286.0, 252.0, 261.0, 249.0, 273.0, 242.0, 271.0, 243.0, 264.0, 271.0, 248.0, 285.0, 294.0, 294.0, 279.0, 262.0, 262.0, 278.0, 289.0, 241.0, 238.0, 285.0, 291.0, 256.0, 260.0, 285.0, 288.0, 269.0, 256.0, 276.0, 254.0, 255.0, 267.0, 258.0, 264.0, 256.0, 271.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6843041815019703, "mean_inference_ms": 1.206781597770555, "mean_action_processing_ms": 0.1324074581959862, "mean_env_wait_ms": 0.8406561526217006, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 581.0, "episode_reward_min": 302.0, "episode_reward_mean": 532.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 151.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 266.4}, "hist_stats": {"episode_reward": [524.0, 519.0, 579.0, 522.0, 570.0, 522.0, 570.0, 525.0, 522.0, 516.0, 567.0, 525.0, 570.0, 579.0, 567.0, 570.0, 573.0, 570.0, 573.0, 525.0, 527.0, 522.0, 522.0, 579.0, 525.0, 527.0, 453.0, 504.0, 573.0, 573.0, 351.0, 527.0, 573.0, 579.0, 525.0, 573.0, 530.0, 516.0, 408.0, 567.0, 522.0, 576.0, 524.0, 513.0, 530.0, 510.0, 576.0, 579.0, 524.0, 513.0, 573.0, 504.0, 576.0, 567.0, 581.0, 527.0, 527.0, 530.0, 522.0, 485.0, 516.0, 525.0, 573.0, 522.0, 302.0, 503.0, 539.0, 579.0, 525.0, 570.0, 567.0, 525.0, 527.0, 522.0, 519.0, 573.0, 493.0, 516.0, 479.0, 525.0, 519.0, 573.0, 513.0, 522.0, 513.0, 507.0, 519.0, 579.0, 573.0, 524.0, 567.0, 479.0, 576.0, 516.0, 573.0, 525.0, 530.0, 522.0, 522.0, 527.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [258.0, 266.0, 246.0, 273.0, 294.0, 285.0, 245.0, 277.0, 277.0, 293.0, 257.0, 265.0, 286.0, 284.0, 270.0, 255.0, 264.0, 258.0, 250.0, 266.0, 289.0, 278.0, 265.0, 260.0, 305.0, 265.0, 292.0, 287.0, 276.0, 291.0, 280.0, 290.0, 291.0, 282.0, 282.0, 288.0, 288.0, 285.0, 262.0, 263.0, 265.0, 262.0, 257.0, 265.0, 267.0, 255.0, 278.0, 301.0, 267.0, 258.0, 262.0, 265.0, 213.0, 240.0, 258.0, 246.0, 297.0, 276.0, 290.0, 283.0, 173.0, 178.0, 266.0, 261.0, 283.0, 290.0, 290.0, 289.0, 260.0, 265.0, 299.0, 274.0, 257.0, 273.0, 250.0, 266.0, 210.0, 198.0, 274.0, 293.0, 260.0, 262.0, 294.0, 282.0, 262.0, 262.0, 253.0, 260.0, 259.0, 271.0, 255.0, 255.0, 289.0, 287.0, 292.0, 287.0, 266.0, 258.0, 258.0, 255.0, 286.0, 287.0, 242.0, 262.0, 296.0, 280.0, 288.0, 279.0, 293.0, 288.0, 247.0, 280.0, 278.0, 249.0, 276.0, 254.0, 259.0, 263.0, 235.0, 250.0, 264.0, 252.0, 266.0, 259.0, 289.0, 284.0, 252.0, 270.0, 151.0, 151.0, 257.0, 246.0, 268.0, 271.0, 286.0, 293.0, 260.0, 265.0, 283.0, 287.0, 279.0, 288.0, 258.0, 267.0, 265.0, 262.0, 271.0, 251.0, 266.0, 253.0, 292.0, 281.0, 235.0, 258.0, 245.0, 271.0, 232.0, 247.0, 262.0, 263.0, 243.0, 276.0, 287.0, 286.0, 252.0, 261.0, 249.0, 273.0, 242.0, 271.0, 243.0, 264.0, 271.0, 248.0, 285.0, 294.0, 294.0, 279.0, 262.0, 262.0, 278.0, 289.0, 241.0, 238.0, 285.0, 291.0, 256.0, 260.0, 285.0, 288.0, 269.0, 256.0, 276.0, 254.0, 255.0, 267.0, 258.0, 264.0, 256.0, 271.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6843041815019703, "mean_inference_ms": 1.206781597770555, "mean_action_processing_ms": 0.1324074581959862, "mean_env_wait_ms": 0.8406561526217006, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3788800, "num_agent_steps_trained": 3788800, "num_env_steps_sampled": 1894400, "num_env_steps_trained": 1894400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1894400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3788800, "timers": {"training_iteration_time_ms": 3713.706, "learn_time_ms": 1146.201, "learn_throughput": 11167.331, "synch_weights_time_ms": 10.546}, "counters": {"num_env_steps_sampled": 1894400, "num_env_steps_trained": 1894400, "num_agent_steps_sampled": 3788800, "num_agent_steps_trained": 3788800}, "done": false, "episodes_total": 4736, "training_iteration": 148, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-35", "timestamp": 1666580975, "time_this_iter_s": 3.6709916591644287, "time_total_s": 556.7145705223083, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 556.7145705223083, "timesteps_since_restore": 0, "iterations_since_restore": 148, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.966666666666665, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 183.6, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 163.28, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.11, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 14.77, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.82, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 14.62, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.45, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.38, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.94, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.11, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.15, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.38, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.32, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.45, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.38, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.45, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.38, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.003550482913851738, "policy_loss": -0.0037556730676442385, "vf_loss": 7.407064437866211, "vf_explained_var": 0.6945219039916992, "kl": 0.0018070796504616737, "entropy": 1.0710303783416748, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1907200, "num_env_steps_trained": 1907200, "num_agent_steps_sampled": 3814400, "num_agent_steps_trained": 3814400}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 530.48, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 265.24}, "custom_metrics": {"sparse_reward_mean": 183.6, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 163.28, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.11, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 14.77, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.82, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 14.62, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.45, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.38, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.94, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.11, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.15, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.38, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.32, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.45, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.38, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.45, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.38, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 579.0, 525.0, 573.0, 530.0, 516.0, 408.0, 567.0, 522.0, 576.0, 524.0, 513.0, 530.0, 510.0, 576.0, 579.0, 524.0, 513.0, 573.0, 504.0, 576.0, 567.0, 581.0, 527.0, 527.0, 530.0, 522.0, 485.0, 516.0, 525.0, 573.0, 522.0, 302.0, 503.0, 539.0, 579.0, 525.0, 570.0, 567.0, 525.0, 527.0, 522.0, 519.0, 573.0, 493.0, 516.0, 479.0, 525.0, 519.0, 573.0, 513.0, 522.0, 513.0, 507.0, 519.0, 579.0, 573.0, 524.0, 567.0, 479.0, 576.0, 516.0, 573.0, 525.0, 530.0, 522.0, 522.0, 527.0, 570.0, 525.0, 522.0, 564.0, 519.0, 519.0, 525.0, 579.0, 567.0, 570.0, 570.0, 498.0, 525.0, 573.0, 504.0, 237.0, 527.0, 530.0, 525.0, 522.0, 576.0, 525.0, 519.0, 441.0, 527.0, 579.0, 527.0, 582.0, 525.0, 524.0, 576.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 290.0, 290.0, 289.0, 260.0, 265.0, 299.0, 274.0, 257.0, 273.0, 250.0, 266.0, 210.0, 198.0, 274.0, 293.0, 260.0, 262.0, 294.0, 282.0, 262.0, 262.0, 253.0, 260.0, 259.0, 271.0, 255.0, 255.0, 289.0, 287.0, 292.0, 287.0, 266.0, 258.0, 258.0, 255.0, 286.0, 287.0, 242.0, 262.0, 296.0, 280.0, 288.0, 279.0, 293.0, 288.0, 247.0, 280.0, 278.0, 249.0, 276.0, 254.0, 259.0, 263.0, 235.0, 250.0, 264.0, 252.0, 266.0, 259.0, 289.0, 284.0, 252.0, 270.0, 151.0, 151.0, 257.0, 246.0, 268.0, 271.0, 286.0, 293.0, 260.0, 265.0, 283.0, 287.0, 279.0, 288.0, 258.0, 267.0, 265.0, 262.0, 271.0, 251.0, 266.0, 253.0, 292.0, 281.0, 235.0, 258.0, 245.0, 271.0, 232.0, 247.0, 262.0, 263.0, 243.0, 276.0, 287.0, 286.0, 252.0, 261.0, 249.0, 273.0, 242.0, 271.0, 243.0, 264.0, 271.0, 248.0, 285.0, 294.0, 294.0, 279.0, 262.0, 262.0, 278.0, 289.0, 241.0, 238.0, 285.0, 291.0, 256.0, 260.0, 285.0, 288.0, 269.0, 256.0, 276.0, 254.0, 255.0, 267.0, 258.0, 264.0, 256.0, 271.0, 284.0, 286.0, 256.0, 269.0, 267.0, 255.0, 270.0, 294.0, 255.0, 264.0, 258.0, 261.0, 265.0, 260.0, 291.0, 288.0, 277.0, 290.0, 294.0, 276.0, 279.0, 291.0, 244.0, 254.0, 269.0, 256.0, 283.0, 290.0, 257.0, 247.0, 123.0, 114.0, 272.0, 255.0, 261.0, 269.0, 270.0, 255.0, 248.0, 274.0, 280.0, 296.0, 268.0, 257.0, 256.0, 263.0, 222.0, 219.0, 270.0, 257.0, 292.0, 287.0, 262.0, 265.0, 276.0, 306.0, 259.0, 266.0, 265.0, 259.0, 279.0, 297.0, 287.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6842792572036572, "mean_inference_ms": 1.2066941044549526, "mean_action_processing_ms": 0.13240474735810767, "mean_env_wait_ms": 0.840493100645701, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 530.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 265.24}, "hist_stats": {"episode_reward": [573.0, 579.0, 525.0, 573.0, 530.0, 516.0, 408.0, 567.0, 522.0, 576.0, 524.0, 513.0, 530.0, 510.0, 576.0, 579.0, 524.0, 513.0, 573.0, 504.0, 576.0, 567.0, 581.0, 527.0, 527.0, 530.0, 522.0, 485.0, 516.0, 525.0, 573.0, 522.0, 302.0, 503.0, 539.0, 579.0, 525.0, 570.0, 567.0, 525.0, 527.0, 522.0, 519.0, 573.0, 493.0, 516.0, 479.0, 525.0, 519.0, 573.0, 513.0, 522.0, 513.0, 507.0, 519.0, 579.0, 573.0, 524.0, 567.0, 479.0, 576.0, 516.0, 573.0, 525.0, 530.0, 522.0, 522.0, 527.0, 570.0, 525.0, 522.0, 564.0, 519.0, 519.0, 525.0, 579.0, 567.0, 570.0, 570.0, 498.0, 525.0, 573.0, 504.0, 237.0, 527.0, 530.0, 525.0, 522.0, 576.0, 525.0, 519.0, 441.0, 527.0, 579.0, 527.0, 582.0, 525.0, 524.0, 576.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 290.0, 290.0, 289.0, 260.0, 265.0, 299.0, 274.0, 257.0, 273.0, 250.0, 266.0, 210.0, 198.0, 274.0, 293.0, 260.0, 262.0, 294.0, 282.0, 262.0, 262.0, 253.0, 260.0, 259.0, 271.0, 255.0, 255.0, 289.0, 287.0, 292.0, 287.0, 266.0, 258.0, 258.0, 255.0, 286.0, 287.0, 242.0, 262.0, 296.0, 280.0, 288.0, 279.0, 293.0, 288.0, 247.0, 280.0, 278.0, 249.0, 276.0, 254.0, 259.0, 263.0, 235.0, 250.0, 264.0, 252.0, 266.0, 259.0, 289.0, 284.0, 252.0, 270.0, 151.0, 151.0, 257.0, 246.0, 268.0, 271.0, 286.0, 293.0, 260.0, 265.0, 283.0, 287.0, 279.0, 288.0, 258.0, 267.0, 265.0, 262.0, 271.0, 251.0, 266.0, 253.0, 292.0, 281.0, 235.0, 258.0, 245.0, 271.0, 232.0, 247.0, 262.0, 263.0, 243.0, 276.0, 287.0, 286.0, 252.0, 261.0, 249.0, 273.0, 242.0, 271.0, 243.0, 264.0, 271.0, 248.0, 285.0, 294.0, 294.0, 279.0, 262.0, 262.0, 278.0, 289.0, 241.0, 238.0, 285.0, 291.0, 256.0, 260.0, 285.0, 288.0, 269.0, 256.0, 276.0, 254.0, 255.0, 267.0, 258.0, 264.0, 256.0, 271.0, 284.0, 286.0, 256.0, 269.0, 267.0, 255.0, 270.0, 294.0, 255.0, 264.0, 258.0, 261.0, 265.0, 260.0, 291.0, 288.0, 277.0, 290.0, 294.0, 276.0, 279.0, 291.0, 244.0, 254.0, 269.0, 256.0, 283.0, 290.0, 257.0, 247.0, 123.0, 114.0, 272.0, 255.0, 261.0, 269.0, 270.0, 255.0, 248.0, 274.0, 280.0, 296.0, 268.0, 257.0, 256.0, 263.0, 222.0, 219.0, 270.0, 257.0, 292.0, 287.0, 262.0, 265.0, 276.0, 306.0, 259.0, 266.0, 265.0, 259.0, 279.0, 297.0, 287.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6842792572036572, "mean_inference_ms": 1.2066941044549526, "mean_action_processing_ms": 0.13240474735810767, "mean_env_wait_ms": 0.840493100645701, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3814400, "num_agent_steps_trained": 3814400, "num_env_steps_sampled": 1907200, "num_env_steps_trained": 1907200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1907200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3814400, "timers": {"training_iteration_time_ms": 3731.597, "learn_time_ms": 1168.313, "learn_throughput": 10955.968, "synch_weights_time_ms": 10.569}, "counters": {"num_env_steps_sampled": 1907200, "num_env_steps_trained": 1907200, "num_agent_steps_sampled": 3814400, "num_agent_steps_trained": 3814400}, "done": false, "episodes_total": 4768, "training_iteration": 149, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-39", "timestamp": 1666580979, "time_this_iter_s": 3.847028970718384, "time_total_s": 560.5615994930267, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 560.5615994930267, "timesteps_since_restore": 0, "iterations_since_restore": 149, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.4, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 184.6, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 163.27, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.18, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 14.92, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.72, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 14.7, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.43, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.47, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 4.93, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.98, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.14, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.18, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.36, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.33, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.43, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.47, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.43, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.47, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.005244456231594086, "policy_loss": -0.005450435448437929, "vf_loss": 7.37734317779541, "vf_explained_var": 0.6951683759689331, "kl": 0.0019620051607489586, "entropy": 1.0635082721710205, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1920000, "num_env_steps_trained": 1920000, "num_agent_steps_sampled": 3840000, "num_agent_steps_trained": 3840000}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 532.47, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 266.235}, "custom_metrics": {"sparse_reward_mean": 184.6, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 163.27, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.18, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 14.92, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.72, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 14.7, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.43, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.47, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 4.93, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.98, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.14, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.18, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.36, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.33, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.43, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.47, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.43, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.47, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [302.0, 503.0, 539.0, 579.0, 525.0, 570.0, 567.0, 525.0, 527.0, 522.0, 519.0, 573.0, 493.0, 516.0, 479.0, 525.0, 519.0, 573.0, 513.0, 522.0, 513.0, 507.0, 519.0, 579.0, 573.0, 524.0, 567.0, 479.0, 576.0, 516.0, 573.0, 525.0, 530.0, 522.0, 522.0, 527.0, 570.0, 525.0, 522.0, 564.0, 519.0, 519.0, 525.0, 579.0, 567.0, 570.0, 570.0, 498.0, 525.0, 573.0, 504.0, 237.0, 527.0, 530.0, 525.0, 522.0, 576.0, 525.0, 519.0, 441.0, 527.0, 579.0, 527.0, 582.0, 525.0, 524.0, 576.0, 567.0, 582.0, 570.0, 573.0, 525.0, 522.0, 504.0, 570.0, 519.0, 576.0, 522.0, 567.0, 533.0, 333.0, 573.0, 522.0, 570.0, 570.0, 525.0, 513.0, 573.0, 522.0, 522.0, 576.0, 576.0, 570.0, 573.0, 573.0, 567.0, 573.0, 525.0, 521.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [151.0, 151.0, 257.0, 246.0, 268.0, 271.0, 286.0, 293.0, 260.0, 265.0, 283.0, 287.0, 279.0, 288.0, 258.0, 267.0, 265.0, 262.0, 271.0, 251.0, 266.0, 253.0, 292.0, 281.0, 235.0, 258.0, 245.0, 271.0, 232.0, 247.0, 262.0, 263.0, 243.0, 276.0, 287.0, 286.0, 252.0, 261.0, 249.0, 273.0, 242.0, 271.0, 243.0, 264.0, 271.0, 248.0, 285.0, 294.0, 294.0, 279.0, 262.0, 262.0, 278.0, 289.0, 241.0, 238.0, 285.0, 291.0, 256.0, 260.0, 285.0, 288.0, 269.0, 256.0, 276.0, 254.0, 255.0, 267.0, 258.0, 264.0, 256.0, 271.0, 284.0, 286.0, 256.0, 269.0, 267.0, 255.0, 270.0, 294.0, 255.0, 264.0, 258.0, 261.0, 265.0, 260.0, 291.0, 288.0, 277.0, 290.0, 294.0, 276.0, 279.0, 291.0, 244.0, 254.0, 269.0, 256.0, 283.0, 290.0, 257.0, 247.0, 123.0, 114.0, 272.0, 255.0, 261.0, 269.0, 270.0, 255.0, 248.0, 274.0, 280.0, 296.0, 268.0, 257.0, 256.0, 263.0, 222.0, 219.0, 270.0, 257.0, 292.0, 287.0, 262.0, 265.0, 276.0, 306.0, 259.0, 266.0, 265.0, 259.0, 279.0, 297.0, 287.0, 280.0, 299.0, 283.0, 296.0, 274.0, 287.0, 286.0, 253.0, 272.0, 256.0, 266.0, 248.0, 256.0, 284.0, 286.0, 261.0, 258.0, 283.0, 293.0, 258.0, 264.0, 286.0, 281.0, 265.0, 268.0, 169.0, 164.0, 286.0, 287.0, 259.0, 263.0, 282.0, 288.0, 274.0, 296.0, 263.0, 262.0, 272.0, 241.0, 277.0, 296.0, 263.0, 259.0, 271.0, 251.0, 299.0, 277.0, 296.0, 280.0, 293.0, 277.0, 283.0, 290.0, 283.0, 290.0, 272.0, 295.0, 287.0, 286.0, 260.0, 265.0, 259.0, 262.0, 251.0, 274.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6842531086706194, "mean_inference_ms": 1.2065739664225887, "mean_action_processing_ms": 0.13239801410378102, "mean_env_wait_ms": 0.8402566274765949, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 532.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 266.235}, "hist_stats": {"episode_reward": [302.0, 503.0, 539.0, 579.0, 525.0, 570.0, 567.0, 525.0, 527.0, 522.0, 519.0, 573.0, 493.0, 516.0, 479.0, 525.0, 519.0, 573.0, 513.0, 522.0, 513.0, 507.0, 519.0, 579.0, 573.0, 524.0, 567.0, 479.0, 576.0, 516.0, 573.0, 525.0, 530.0, 522.0, 522.0, 527.0, 570.0, 525.0, 522.0, 564.0, 519.0, 519.0, 525.0, 579.0, 567.0, 570.0, 570.0, 498.0, 525.0, 573.0, 504.0, 237.0, 527.0, 530.0, 525.0, 522.0, 576.0, 525.0, 519.0, 441.0, 527.0, 579.0, 527.0, 582.0, 525.0, 524.0, 576.0, 567.0, 582.0, 570.0, 573.0, 525.0, 522.0, 504.0, 570.0, 519.0, 576.0, 522.0, 567.0, 533.0, 333.0, 573.0, 522.0, 570.0, 570.0, 525.0, 513.0, 573.0, 522.0, 522.0, 576.0, 576.0, 570.0, 573.0, 573.0, 567.0, 573.0, 525.0, 521.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [151.0, 151.0, 257.0, 246.0, 268.0, 271.0, 286.0, 293.0, 260.0, 265.0, 283.0, 287.0, 279.0, 288.0, 258.0, 267.0, 265.0, 262.0, 271.0, 251.0, 266.0, 253.0, 292.0, 281.0, 235.0, 258.0, 245.0, 271.0, 232.0, 247.0, 262.0, 263.0, 243.0, 276.0, 287.0, 286.0, 252.0, 261.0, 249.0, 273.0, 242.0, 271.0, 243.0, 264.0, 271.0, 248.0, 285.0, 294.0, 294.0, 279.0, 262.0, 262.0, 278.0, 289.0, 241.0, 238.0, 285.0, 291.0, 256.0, 260.0, 285.0, 288.0, 269.0, 256.0, 276.0, 254.0, 255.0, 267.0, 258.0, 264.0, 256.0, 271.0, 284.0, 286.0, 256.0, 269.0, 267.0, 255.0, 270.0, 294.0, 255.0, 264.0, 258.0, 261.0, 265.0, 260.0, 291.0, 288.0, 277.0, 290.0, 294.0, 276.0, 279.0, 291.0, 244.0, 254.0, 269.0, 256.0, 283.0, 290.0, 257.0, 247.0, 123.0, 114.0, 272.0, 255.0, 261.0, 269.0, 270.0, 255.0, 248.0, 274.0, 280.0, 296.0, 268.0, 257.0, 256.0, 263.0, 222.0, 219.0, 270.0, 257.0, 292.0, 287.0, 262.0, 265.0, 276.0, 306.0, 259.0, 266.0, 265.0, 259.0, 279.0, 297.0, 287.0, 280.0, 299.0, 283.0, 296.0, 274.0, 287.0, 286.0, 253.0, 272.0, 256.0, 266.0, 248.0, 256.0, 284.0, 286.0, 261.0, 258.0, 283.0, 293.0, 258.0, 264.0, 286.0, 281.0, 265.0, 268.0, 169.0, 164.0, 286.0, 287.0, 259.0, 263.0, 282.0, 288.0, 274.0, 296.0, 263.0, 262.0, 272.0, 241.0, 277.0, 296.0, 263.0, 259.0, 271.0, 251.0, 299.0, 277.0, 296.0, 280.0, 293.0, 277.0, 283.0, 290.0, 283.0, 290.0, 272.0, 295.0, 287.0, 286.0, 260.0, 265.0, 259.0, 262.0, 251.0, 274.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6842531086706194, "mean_inference_ms": 1.2065739664225887, "mean_action_processing_ms": 0.13239801410378102, "mean_env_wait_ms": 0.8402566274765949, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3840000, "num_agent_steps_trained": 3840000, "num_env_steps_sampled": 1920000, "num_env_steps_trained": 1920000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1920000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3840000, "timers": {"training_iteration_time_ms": 3727.268, "learn_time_ms": 1171.553, "learn_throughput": 10925.667, "synch_weights_time_ms": 11.124}, "counters": {"num_env_steps_sampled": 1920000, "num_env_steps_trained": 1920000, "num_agent_steps_sampled": 3840000, "num_agent_steps_trained": 3840000}, "done": false, "episodes_total": 4800, "training_iteration": 150, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-43", "timestamp": 1666580983, "time_this_iter_s": 3.611518144607544, "time_total_s": 564.1731176376343, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 564.1731176376343, "timesteps_since_restore": 0, "iterations_since_restore": 150, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 22.58333333333333, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 188.2, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 165.17, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.36, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.3, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.89, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.08, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.62, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 14.68, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 4.93, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.23, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.47, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.43, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.62, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 14.68, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.62, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 14.68, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00285069621168077, "policy_loss": -0.0030640815384685993, "vf_loss": 7.426201820373535, "vf_explained_var": 0.6838560104370117, "kl": 0.002210653852671385, "entropy": 1.0584666728973389, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1932800, "num_env_steps_trained": 1932800, "num_agent_steps_sampled": 3865600, "num_agent_steps_trained": 3865600}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 541.57, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 308.0}, "policy_reward_mean": {"ppo": 270.785}, "custom_metrics": {"sparse_reward_mean": 188.2, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 165.17, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.36, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.3, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.89, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.08, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.62, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 14.68, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 4.93, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.23, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.47, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.43, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.62, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 14.68, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.62, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 14.68, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 522.0, 522.0, 527.0, 570.0, 525.0, 522.0, 564.0, 519.0, 519.0, 525.0, 579.0, 567.0, 570.0, 570.0, 498.0, 525.0, 573.0, 504.0, 237.0, 527.0, 530.0, 525.0, 522.0, 576.0, 525.0, 519.0, 441.0, 527.0, 579.0, 527.0, 582.0, 525.0, 524.0, 576.0, 567.0, 582.0, 570.0, 573.0, 525.0, 522.0, 504.0, 570.0, 519.0, 576.0, 522.0, 567.0, 533.0, 333.0, 573.0, 522.0, 570.0, 570.0, 525.0, 513.0, 573.0, 522.0, 522.0, 576.0, 576.0, 570.0, 573.0, 573.0, 567.0, 573.0, 525.0, 521.0, 525.0, 576.0, 570.0, 573.0, 519.0, 573.0, 570.0, 549.0, 516.0, 522.0, 522.0, 573.0, 573.0, 576.0, 519.0, 530.0, 573.0, 516.0, 576.0, 570.0, 579.0, 576.0, 530.0, 579.0, 579.0, 570.0, 573.0, 456.0, 579.0, 510.0, 576.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 254.0, 255.0, 267.0, 258.0, 264.0, 256.0, 271.0, 284.0, 286.0, 256.0, 269.0, 267.0, 255.0, 270.0, 294.0, 255.0, 264.0, 258.0, 261.0, 265.0, 260.0, 291.0, 288.0, 277.0, 290.0, 294.0, 276.0, 279.0, 291.0, 244.0, 254.0, 269.0, 256.0, 283.0, 290.0, 257.0, 247.0, 123.0, 114.0, 272.0, 255.0, 261.0, 269.0, 270.0, 255.0, 248.0, 274.0, 280.0, 296.0, 268.0, 257.0, 256.0, 263.0, 222.0, 219.0, 270.0, 257.0, 292.0, 287.0, 262.0, 265.0, 276.0, 306.0, 259.0, 266.0, 265.0, 259.0, 279.0, 297.0, 287.0, 280.0, 299.0, 283.0, 296.0, 274.0, 287.0, 286.0, 253.0, 272.0, 256.0, 266.0, 248.0, 256.0, 284.0, 286.0, 261.0, 258.0, 283.0, 293.0, 258.0, 264.0, 286.0, 281.0, 265.0, 268.0, 169.0, 164.0, 286.0, 287.0, 259.0, 263.0, 282.0, 288.0, 274.0, 296.0, 263.0, 262.0, 272.0, 241.0, 277.0, 296.0, 263.0, 259.0, 271.0, 251.0, 299.0, 277.0, 296.0, 280.0, 293.0, 277.0, 283.0, 290.0, 283.0, 290.0, 272.0, 295.0, 287.0, 286.0, 260.0, 265.0, 259.0, 262.0, 251.0, 274.0, 293.0, 283.0, 284.0, 286.0, 297.0, 276.0, 252.0, 267.0, 274.0, 299.0, 282.0, 288.0, 274.0, 275.0, 263.0, 253.0, 254.0, 268.0, 263.0, 259.0, 288.0, 285.0, 282.0, 291.0, 280.0, 296.0, 262.0, 257.0, 260.0, 270.0, 272.0, 301.0, 262.0, 254.0, 292.0, 284.0, 281.0, 289.0, 291.0, 288.0, 288.0, 288.0, 269.0, 261.0, 308.0, 271.0, 282.0, 297.0, 281.0, 289.0, 282.0, 291.0, 223.0, 233.0, 293.0, 286.0, 261.0, 249.0, 288.0, 288.0, 292.0, 284.0, 278.0, 295.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6842379170569299, "mean_inference_ms": 1.2064662067783543, "mean_action_processing_ms": 0.13238804610453048, "mean_env_wait_ms": 0.8400068049251856, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 541.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 308.0}, "policy_reward_mean": {"ppo": 270.785}, "hist_stats": {"episode_reward": [530.0, 522.0, 522.0, 527.0, 570.0, 525.0, 522.0, 564.0, 519.0, 519.0, 525.0, 579.0, 567.0, 570.0, 570.0, 498.0, 525.0, 573.0, 504.0, 237.0, 527.0, 530.0, 525.0, 522.0, 576.0, 525.0, 519.0, 441.0, 527.0, 579.0, 527.0, 582.0, 525.0, 524.0, 576.0, 567.0, 582.0, 570.0, 573.0, 525.0, 522.0, 504.0, 570.0, 519.0, 576.0, 522.0, 567.0, 533.0, 333.0, 573.0, 522.0, 570.0, 570.0, 525.0, 513.0, 573.0, 522.0, 522.0, 576.0, 576.0, 570.0, 573.0, 573.0, 567.0, 573.0, 525.0, 521.0, 525.0, 576.0, 570.0, 573.0, 519.0, 573.0, 570.0, 549.0, 516.0, 522.0, 522.0, 573.0, 573.0, 576.0, 519.0, 530.0, 573.0, 516.0, 576.0, 570.0, 579.0, 576.0, 530.0, 579.0, 579.0, 570.0, 573.0, 456.0, 579.0, 510.0, 576.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 254.0, 255.0, 267.0, 258.0, 264.0, 256.0, 271.0, 284.0, 286.0, 256.0, 269.0, 267.0, 255.0, 270.0, 294.0, 255.0, 264.0, 258.0, 261.0, 265.0, 260.0, 291.0, 288.0, 277.0, 290.0, 294.0, 276.0, 279.0, 291.0, 244.0, 254.0, 269.0, 256.0, 283.0, 290.0, 257.0, 247.0, 123.0, 114.0, 272.0, 255.0, 261.0, 269.0, 270.0, 255.0, 248.0, 274.0, 280.0, 296.0, 268.0, 257.0, 256.0, 263.0, 222.0, 219.0, 270.0, 257.0, 292.0, 287.0, 262.0, 265.0, 276.0, 306.0, 259.0, 266.0, 265.0, 259.0, 279.0, 297.0, 287.0, 280.0, 299.0, 283.0, 296.0, 274.0, 287.0, 286.0, 253.0, 272.0, 256.0, 266.0, 248.0, 256.0, 284.0, 286.0, 261.0, 258.0, 283.0, 293.0, 258.0, 264.0, 286.0, 281.0, 265.0, 268.0, 169.0, 164.0, 286.0, 287.0, 259.0, 263.0, 282.0, 288.0, 274.0, 296.0, 263.0, 262.0, 272.0, 241.0, 277.0, 296.0, 263.0, 259.0, 271.0, 251.0, 299.0, 277.0, 296.0, 280.0, 293.0, 277.0, 283.0, 290.0, 283.0, 290.0, 272.0, 295.0, 287.0, 286.0, 260.0, 265.0, 259.0, 262.0, 251.0, 274.0, 293.0, 283.0, 284.0, 286.0, 297.0, 276.0, 252.0, 267.0, 274.0, 299.0, 282.0, 288.0, 274.0, 275.0, 263.0, 253.0, 254.0, 268.0, 263.0, 259.0, 288.0, 285.0, 282.0, 291.0, 280.0, 296.0, 262.0, 257.0, 260.0, 270.0, 272.0, 301.0, 262.0, 254.0, 292.0, 284.0, 281.0, 289.0, 291.0, 288.0, 288.0, 288.0, 269.0, 261.0, 308.0, 271.0, 282.0, 297.0, 281.0, 289.0, 282.0, 291.0, 223.0, 233.0, 293.0, 286.0, 261.0, 249.0, 288.0, 288.0, 292.0, 284.0, 278.0, 295.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6842379170569299, "mean_inference_ms": 1.2064662067783543, "mean_action_processing_ms": 0.13238804610453048, "mean_env_wait_ms": 0.8400068049251856, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3865600, "num_agent_steps_trained": 3865600, "num_env_steps_sampled": 1932800, "num_env_steps_trained": 1932800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1932800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3865600, "timers": {"training_iteration_time_ms": 3704.776, "learn_time_ms": 1153.444, "learn_throughput": 11097.201, "synch_weights_time_ms": 11.24}, "counters": {"num_env_steps_sampled": 1932800, "num_env_steps_trained": 1932800, "num_agent_steps_sampled": 3865600, "num_agent_steps_trained": 3865600}, "done": false, "episodes_total": 4832, "training_iteration": 151, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-46", "timestamp": 1666580986, "time_this_iter_s": 3.5203089714050293, "time_total_s": 567.6934266090393, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 567.6934266090393, "timesteps_since_restore": 0, "iterations_since_restore": 151, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.68, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 189.8, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 167.1, "shaped_reward_min": 93, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.1, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.01, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.52, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.69, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.47, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.25, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.33, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.6, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 4.61, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.78, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.85, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.72, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.79, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.25, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.33, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.25, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.33, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008631563978269696, "policy_loss": 0.0006459215073846281, "vf_loss": 7.484288692474365, "vf_explained_var": 0.6832795143127441, "kl": 0.0022935159504413605, "entropy": 1.0623841285705566, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1945600, "num_env_steps_trained": 1945600, "num_agent_steps_sampled": 3891200, "num_agent_steps_trained": 3891200}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 333.0, "episode_reward_mean": 546.7, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 164.0}, "policy_reward_max": {"ppo": 308.0}, "policy_reward_mean": {"ppo": 273.35}, "custom_metrics": {"sparse_reward_mean": 189.8, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 167.1, "shaped_reward_min": 93, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.1, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.01, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.52, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.69, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.47, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.25, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.33, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.6, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 4.61, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.78, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.85, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.72, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.79, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.25, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.33, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.25, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.33, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 524.0, 576.0, 567.0, 582.0, 570.0, 573.0, 525.0, 522.0, 504.0, 570.0, 519.0, 576.0, 522.0, 567.0, 533.0, 333.0, 573.0, 522.0, 570.0, 570.0, 525.0, 513.0, 573.0, 522.0, 522.0, 576.0, 576.0, 570.0, 573.0, 573.0, 567.0, 573.0, 525.0, 521.0, 525.0, 576.0, 570.0, 573.0, 519.0, 573.0, 570.0, 549.0, 516.0, 522.0, 522.0, 573.0, 573.0, 576.0, 519.0, 530.0, 573.0, 516.0, 576.0, 570.0, 579.0, 576.0, 530.0, 579.0, 579.0, 570.0, 573.0, 456.0, 579.0, 510.0, 576.0, 576.0, 573.0, 522.0, 527.0, 525.0, 573.0, 561.0, 530.0, 525.0, 573.0, 516.0, 530.0, 573.0, 576.0, 579.0, 579.0, 573.0, 501.0, 527.0, 533.0, 579.0, 530.0, 579.0, 519.0, 522.0, 570.0, 525.0, 510.0, 573.0, 582.0, 530.0, 576.0, 473.0, 470.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [259.0, 266.0, 265.0, 259.0, 279.0, 297.0, 287.0, 280.0, 299.0, 283.0, 296.0, 274.0, 287.0, 286.0, 253.0, 272.0, 256.0, 266.0, 248.0, 256.0, 284.0, 286.0, 261.0, 258.0, 283.0, 293.0, 258.0, 264.0, 286.0, 281.0, 265.0, 268.0, 169.0, 164.0, 286.0, 287.0, 259.0, 263.0, 282.0, 288.0, 274.0, 296.0, 263.0, 262.0, 272.0, 241.0, 277.0, 296.0, 263.0, 259.0, 271.0, 251.0, 299.0, 277.0, 296.0, 280.0, 293.0, 277.0, 283.0, 290.0, 283.0, 290.0, 272.0, 295.0, 287.0, 286.0, 260.0, 265.0, 259.0, 262.0, 251.0, 274.0, 293.0, 283.0, 284.0, 286.0, 297.0, 276.0, 252.0, 267.0, 274.0, 299.0, 282.0, 288.0, 274.0, 275.0, 263.0, 253.0, 254.0, 268.0, 263.0, 259.0, 288.0, 285.0, 282.0, 291.0, 280.0, 296.0, 262.0, 257.0, 260.0, 270.0, 272.0, 301.0, 262.0, 254.0, 292.0, 284.0, 281.0, 289.0, 291.0, 288.0, 288.0, 288.0, 269.0, 261.0, 308.0, 271.0, 282.0, 297.0, 281.0, 289.0, 282.0, 291.0, 223.0, 233.0, 293.0, 286.0, 261.0, 249.0, 288.0, 288.0, 292.0, 284.0, 278.0, 295.0, 255.0, 267.0, 274.0, 253.0, 268.0, 257.0, 268.0, 305.0, 301.0, 260.0, 273.0, 257.0, 258.0, 267.0, 295.0, 278.0, 245.0, 271.0, 272.0, 258.0, 292.0, 281.0, 290.0, 286.0, 290.0, 289.0, 290.0, 289.0, 281.0, 292.0, 253.0, 248.0, 261.0, 266.0, 265.0, 268.0, 281.0, 298.0, 270.0, 260.0, 295.0, 284.0, 270.0, 249.0, 255.0, 267.0, 295.0, 275.0, 265.0, 260.0, 255.0, 255.0, 267.0, 306.0, 302.0, 280.0, 251.0, 279.0, 293.0, 283.0, 243.0, 230.0, 239.0, 231.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6841903962359359, "mean_inference_ms": 1.2063546072455014, "mean_action_processing_ms": 0.13237385340172605, "mean_env_wait_ms": 0.8397353790639168, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 333.0, "episode_reward_mean": 546.7, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 164.0}, "policy_reward_max": {"ppo": 308.0}, "policy_reward_mean": {"ppo": 273.35}, "hist_stats": {"episode_reward": [525.0, 524.0, 576.0, 567.0, 582.0, 570.0, 573.0, 525.0, 522.0, 504.0, 570.0, 519.0, 576.0, 522.0, 567.0, 533.0, 333.0, 573.0, 522.0, 570.0, 570.0, 525.0, 513.0, 573.0, 522.0, 522.0, 576.0, 576.0, 570.0, 573.0, 573.0, 567.0, 573.0, 525.0, 521.0, 525.0, 576.0, 570.0, 573.0, 519.0, 573.0, 570.0, 549.0, 516.0, 522.0, 522.0, 573.0, 573.0, 576.0, 519.0, 530.0, 573.0, 516.0, 576.0, 570.0, 579.0, 576.0, 530.0, 579.0, 579.0, 570.0, 573.0, 456.0, 579.0, 510.0, 576.0, 576.0, 573.0, 522.0, 527.0, 525.0, 573.0, 561.0, 530.0, 525.0, 573.0, 516.0, 530.0, 573.0, 576.0, 579.0, 579.0, 573.0, 501.0, 527.0, 533.0, 579.0, 530.0, 579.0, 519.0, 522.0, 570.0, 525.0, 510.0, 573.0, 582.0, 530.0, 576.0, 473.0, 470.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [259.0, 266.0, 265.0, 259.0, 279.0, 297.0, 287.0, 280.0, 299.0, 283.0, 296.0, 274.0, 287.0, 286.0, 253.0, 272.0, 256.0, 266.0, 248.0, 256.0, 284.0, 286.0, 261.0, 258.0, 283.0, 293.0, 258.0, 264.0, 286.0, 281.0, 265.0, 268.0, 169.0, 164.0, 286.0, 287.0, 259.0, 263.0, 282.0, 288.0, 274.0, 296.0, 263.0, 262.0, 272.0, 241.0, 277.0, 296.0, 263.0, 259.0, 271.0, 251.0, 299.0, 277.0, 296.0, 280.0, 293.0, 277.0, 283.0, 290.0, 283.0, 290.0, 272.0, 295.0, 287.0, 286.0, 260.0, 265.0, 259.0, 262.0, 251.0, 274.0, 293.0, 283.0, 284.0, 286.0, 297.0, 276.0, 252.0, 267.0, 274.0, 299.0, 282.0, 288.0, 274.0, 275.0, 263.0, 253.0, 254.0, 268.0, 263.0, 259.0, 288.0, 285.0, 282.0, 291.0, 280.0, 296.0, 262.0, 257.0, 260.0, 270.0, 272.0, 301.0, 262.0, 254.0, 292.0, 284.0, 281.0, 289.0, 291.0, 288.0, 288.0, 288.0, 269.0, 261.0, 308.0, 271.0, 282.0, 297.0, 281.0, 289.0, 282.0, 291.0, 223.0, 233.0, 293.0, 286.0, 261.0, 249.0, 288.0, 288.0, 292.0, 284.0, 278.0, 295.0, 255.0, 267.0, 274.0, 253.0, 268.0, 257.0, 268.0, 305.0, 301.0, 260.0, 273.0, 257.0, 258.0, 267.0, 295.0, 278.0, 245.0, 271.0, 272.0, 258.0, 292.0, 281.0, 290.0, 286.0, 290.0, 289.0, 290.0, 289.0, 281.0, 292.0, 253.0, 248.0, 261.0, 266.0, 265.0, 268.0, 281.0, 298.0, 270.0, 260.0, 295.0, 284.0, 270.0, 249.0, 255.0, 267.0, 295.0, 275.0, 265.0, 260.0, 255.0, 255.0, 267.0, 306.0, 302.0, 280.0, 251.0, 279.0, 293.0, 283.0, 243.0, 230.0, 239.0, 231.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6841903962359359, "mean_inference_ms": 1.2063546072455014, "mean_action_processing_ms": 0.13237385340172605, "mean_env_wait_ms": 0.8397353790639168, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3891200, "num_agent_steps_trained": 3891200, "num_env_steps_sampled": 1945600, "num_env_steps_trained": 1945600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1945600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3891200, "timers": {"training_iteration_time_ms": 3703.336, "learn_time_ms": 1148.195, "learn_throughput": 11147.932, "synch_weights_time_ms": 10.616}, "counters": {"num_env_steps_sampled": 1945600, "num_env_steps_trained": 1945600, "num_agent_steps_sampled": 3891200, "num_agent_steps_trained": 3891200}, "done": false, "episodes_total": 4864, "training_iteration": 152, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-50", "timestamp": 1666580990, "time_this_iter_s": 3.713094472885132, "time_total_s": 571.4065210819244, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 571.4065210819244, "timesteps_since_restore": 0, "iterations_since_restore": 152, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.683333333333334, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 189.6, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 167.69, "shaped_reward_min": 136, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.7, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.39, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.22, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.02, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.32, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.7, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.43, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 4.76, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.12, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.9, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.7, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.7, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002908505266532302, "policy_loss": -0.003109340090304613, "vf_loss": 7.339377403259277, "vf_explained_var": 0.6975228190422058, "kl": 0.0020682807080447674, "entropy": 1.0662044286727905, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1958400, "num_env_steps_trained": 1958400, "num_agent_steps_sampled": 3916800, "num_agent_steps_trained": 3916800}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 456.0, "episode_reward_mean": 546.89, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 309.0}, "policy_reward_mean": {"ppo": 273.445}, "custom_metrics": {"sparse_reward_mean": 189.6, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 167.69, "shaped_reward_min": 136, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.7, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.39, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.22, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.02, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.32, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.7, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.43, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 4.76, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.12, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.9, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.7, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.7, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 525.0, 521.0, 525.0, 576.0, 570.0, 573.0, 519.0, 573.0, 570.0, 549.0, 516.0, 522.0, 522.0, 573.0, 573.0, 576.0, 519.0, 530.0, 573.0, 516.0, 576.0, 570.0, 579.0, 576.0, 530.0, 579.0, 579.0, 570.0, 573.0, 456.0, 579.0, 510.0, 576.0, 576.0, 573.0, 522.0, 527.0, 525.0, 573.0, 561.0, 530.0, 525.0, 573.0, 516.0, 530.0, 573.0, 576.0, 579.0, 579.0, 573.0, 501.0, 527.0, 533.0, 579.0, 530.0, 579.0, 519.0, 522.0, 570.0, 525.0, 510.0, 573.0, 582.0, 530.0, 576.0, 473.0, 470.0, 582.0, 525.0, 570.0, 579.0, 519.0, 507.0, 462.0, 587.0, 519.0, 558.0, 570.0, 576.0, 525.0, 510.0, 573.0, 525.0, 525.0, 519.0, 525.0, 573.0, 519.0, 530.0, 525.0, 522.0, 573.0, 576.0, 582.0, 527.0, 573.0, 576.0, 530.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 286.0, 260.0, 265.0, 259.0, 262.0, 251.0, 274.0, 293.0, 283.0, 284.0, 286.0, 297.0, 276.0, 252.0, 267.0, 274.0, 299.0, 282.0, 288.0, 274.0, 275.0, 263.0, 253.0, 254.0, 268.0, 263.0, 259.0, 288.0, 285.0, 282.0, 291.0, 280.0, 296.0, 262.0, 257.0, 260.0, 270.0, 272.0, 301.0, 262.0, 254.0, 292.0, 284.0, 281.0, 289.0, 291.0, 288.0, 288.0, 288.0, 269.0, 261.0, 308.0, 271.0, 282.0, 297.0, 281.0, 289.0, 282.0, 291.0, 223.0, 233.0, 293.0, 286.0, 261.0, 249.0, 288.0, 288.0, 292.0, 284.0, 278.0, 295.0, 255.0, 267.0, 274.0, 253.0, 268.0, 257.0, 268.0, 305.0, 301.0, 260.0, 273.0, 257.0, 258.0, 267.0, 295.0, 278.0, 245.0, 271.0, 272.0, 258.0, 292.0, 281.0, 290.0, 286.0, 290.0, 289.0, 290.0, 289.0, 281.0, 292.0, 253.0, 248.0, 261.0, 266.0, 265.0, 268.0, 281.0, 298.0, 270.0, 260.0, 295.0, 284.0, 270.0, 249.0, 255.0, 267.0, 295.0, 275.0, 265.0, 260.0, 255.0, 255.0, 267.0, 306.0, 302.0, 280.0, 251.0, 279.0, 293.0, 283.0, 243.0, 230.0, 239.0, 231.0, 286.0, 296.0, 267.0, 258.0, 280.0, 290.0, 293.0, 286.0, 256.0, 263.0, 250.0, 257.0, 239.0, 223.0, 278.0, 309.0, 261.0, 258.0, 284.0, 274.0, 287.0, 283.0, 279.0, 297.0, 264.0, 261.0, 249.0, 261.0, 287.0, 286.0, 269.0, 256.0, 257.0, 268.0, 248.0, 271.0, 267.0, 258.0, 285.0, 288.0, 272.0, 247.0, 269.0, 261.0, 271.0, 254.0, 254.0, 268.0, 281.0, 292.0, 287.0, 289.0, 281.0, 301.0, 269.0, 258.0, 281.0, 292.0, 298.0, 278.0, 263.0, 267.0, 280.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6841687379475, "mean_inference_ms": 1.2062809665348004, "mean_action_processing_ms": 0.13236151110511024, "mean_env_wait_ms": 0.8394849551462821, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 456.0, "episode_reward_mean": 546.89, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 309.0}, "policy_reward_mean": {"ppo": 273.445}, "hist_stats": {"episode_reward": [573.0, 525.0, 521.0, 525.0, 576.0, 570.0, 573.0, 519.0, 573.0, 570.0, 549.0, 516.0, 522.0, 522.0, 573.0, 573.0, 576.0, 519.0, 530.0, 573.0, 516.0, 576.0, 570.0, 579.0, 576.0, 530.0, 579.0, 579.0, 570.0, 573.0, 456.0, 579.0, 510.0, 576.0, 576.0, 573.0, 522.0, 527.0, 525.0, 573.0, 561.0, 530.0, 525.0, 573.0, 516.0, 530.0, 573.0, 576.0, 579.0, 579.0, 573.0, 501.0, 527.0, 533.0, 579.0, 530.0, 579.0, 519.0, 522.0, 570.0, 525.0, 510.0, 573.0, 582.0, 530.0, 576.0, 473.0, 470.0, 582.0, 525.0, 570.0, 579.0, 519.0, 507.0, 462.0, 587.0, 519.0, 558.0, 570.0, 576.0, 525.0, 510.0, 573.0, 525.0, 525.0, 519.0, 525.0, 573.0, 519.0, 530.0, 525.0, 522.0, 573.0, 576.0, 582.0, 527.0, 573.0, 576.0, 530.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 286.0, 260.0, 265.0, 259.0, 262.0, 251.0, 274.0, 293.0, 283.0, 284.0, 286.0, 297.0, 276.0, 252.0, 267.0, 274.0, 299.0, 282.0, 288.0, 274.0, 275.0, 263.0, 253.0, 254.0, 268.0, 263.0, 259.0, 288.0, 285.0, 282.0, 291.0, 280.0, 296.0, 262.0, 257.0, 260.0, 270.0, 272.0, 301.0, 262.0, 254.0, 292.0, 284.0, 281.0, 289.0, 291.0, 288.0, 288.0, 288.0, 269.0, 261.0, 308.0, 271.0, 282.0, 297.0, 281.0, 289.0, 282.0, 291.0, 223.0, 233.0, 293.0, 286.0, 261.0, 249.0, 288.0, 288.0, 292.0, 284.0, 278.0, 295.0, 255.0, 267.0, 274.0, 253.0, 268.0, 257.0, 268.0, 305.0, 301.0, 260.0, 273.0, 257.0, 258.0, 267.0, 295.0, 278.0, 245.0, 271.0, 272.0, 258.0, 292.0, 281.0, 290.0, 286.0, 290.0, 289.0, 290.0, 289.0, 281.0, 292.0, 253.0, 248.0, 261.0, 266.0, 265.0, 268.0, 281.0, 298.0, 270.0, 260.0, 295.0, 284.0, 270.0, 249.0, 255.0, 267.0, 295.0, 275.0, 265.0, 260.0, 255.0, 255.0, 267.0, 306.0, 302.0, 280.0, 251.0, 279.0, 293.0, 283.0, 243.0, 230.0, 239.0, 231.0, 286.0, 296.0, 267.0, 258.0, 280.0, 290.0, 293.0, 286.0, 256.0, 263.0, 250.0, 257.0, 239.0, 223.0, 278.0, 309.0, 261.0, 258.0, 284.0, 274.0, 287.0, 283.0, 279.0, 297.0, 264.0, 261.0, 249.0, 261.0, 287.0, 286.0, 269.0, 256.0, 257.0, 268.0, 248.0, 271.0, 267.0, 258.0, 285.0, 288.0, 272.0, 247.0, 269.0, 261.0, 271.0, 254.0, 254.0, 268.0, 281.0, 292.0, 287.0, 289.0, 281.0, 301.0, 269.0, 258.0, 281.0, 292.0, 298.0, 278.0, 263.0, 267.0, 280.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6841687379475, "mean_inference_ms": 1.2062809665348004, "mean_action_processing_ms": 0.13236151110511024, "mean_env_wait_ms": 0.8394849551462821, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3916800, "num_agent_steps_trained": 3916800, "num_env_steps_sampled": 1958400, "num_env_steps_trained": 1958400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1958400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3916800, "timers": {"training_iteration_time_ms": 3698.273, "learn_time_ms": 1140.347, "learn_throughput": 11224.649, "synch_weights_time_ms": 11.356}, "counters": {"num_env_steps_sampled": 1958400, "num_env_steps_trained": 1958400, "num_agent_steps_sampled": 3916800, "num_agent_steps_trained": 3916800}, "done": false, "episodes_total": 4896, "training_iteration": 153, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-54", "timestamp": 1666580994, "time_this_iter_s": 3.7132623195648193, "time_total_s": 575.1197834014893, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 575.1197834014893, "timesteps_since_restore": 0, "iterations_since_restore": 153, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.82, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 189.4, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 167.21, "shaped_reward_min": 138, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.44, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.97, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.06, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.71, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.84, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.64, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 4.8, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.32, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.16, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.01, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.92, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.71, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.84, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.71, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.84, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0023376313038170338, "policy_loss": -0.0025501232594251633, "vf_loss": 7.455283164978027, "vf_explained_var": 0.6846986413002014, "kl": 0.0021816184744238853, "entropy": 1.0660704374313354, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1971200, "num_env_steps_trained": 1971200, "num_agent_steps_sampled": 3942400, "num_agent_steps_trained": 3942400}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 462.0, "episode_reward_mean": 546.01, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 309.0}, "policy_reward_mean": {"ppo": 273.005}, "custom_metrics": {"sparse_reward_mean": 189.4, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 167.21, "shaped_reward_min": 138, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.44, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.97, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.06, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.71, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.84, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.64, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 4.8, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.32, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.16, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.01, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.92, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.71, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.84, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.71, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.84, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [510.0, 576.0, 576.0, 573.0, 522.0, 527.0, 525.0, 573.0, 561.0, 530.0, 525.0, 573.0, 516.0, 530.0, 573.0, 576.0, 579.0, 579.0, 573.0, 501.0, 527.0, 533.0, 579.0, 530.0, 579.0, 519.0, 522.0, 570.0, 525.0, 510.0, 573.0, 582.0, 530.0, 576.0, 473.0, 470.0, 582.0, 525.0, 570.0, 579.0, 519.0, 507.0, 462.0, 587.0, 519.0, 558.0, 570.0, 576.0, 525.0, 510.0, 573.0, 525.0, 525.0, 519.0, 525.0, 573.0, 519.0, 530.0, 525.0, 522.0, 573.0, 576.0, 582.0, 527.0, 573.0, 576.0, 530.0, 570.0, 530.0, 555.0, 558.0, 498.0, 573.0, 567.0, 530.0, 507.0, 570.0, 519.0, 573.0, 570.0, 516.0, 570.0, 525.0, 579.0, 576.0, 533.0, 522.0, 519.0, 576.0, 519.0, 573.0, 513.0, 579.0, 570.0, 533.0, 576.0, 579.0, 525.0, 573.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 249.0, 288.0, 288.0, 292.0, 284.0, 278.0, 295.0, 255.0, 267.0, 274.0, 253.0, 268.0, 257.0, 268.0, 305.0, 301.0, 260.0, 273.0, 257.0, 258.0, 267.0, 295.0, 278.0, 245.0, 271.0, 272.0, 258.0, 292.0, 281.0, 290.0, 286.0, 290.0, 289.0, 290.0, 289.0, 281.0, 292.0, 253.0, 248.0, 261.0, 266.0, 265.0, 268.0, 281.0, 298.0, 270.0, 260.0, 295.0, 284.0, 270.0, 249.0, 255.0, 267.0, 295.0, 275.0, 265.0, 260.0, 255.0, 255.0, 267.0, 306.0, 302.0, 280.0, 251.0, 279.0, 293.0, 283.0, 243.0, 230.0, 239.0, 231.0, 286.0, 296.0, 267.0, 258.0, 280.0, 290.0, 293.0, 286.0, 256.0, 263.0, 250.0, 257.0, 239.0, 223.0, 278.0, 309.0, 261.0, 258.0, 284.0, 274.0, 287.0, 283.0, 279.0, 297.0, 264.0, 261.0, 249.0, 261.0, 287.0, 286.0, 269.0, 256.0, 257.0, 268.0, 248.0, 271.0, 267.0, 258.0, 285.0, 288.0, 272.0, 247.0, 269.0, 261.0, 271.0, 254.0, 254.0, 268.0, 281.0, 292.0, 287.0, 289.0, 281.0, 301.0, 269.0, 258.0, 281.0, 292.0, 298.0, 278.0, 263.0, 267.0, 280.0, 290.0, 256.0, 274.0, 277.0, 278.0, 286.0, 272.0, 264.0, 234.0, 299.0, 274.0, 280.0, 287.0, 263.0, 267.0, 239.0, 268.0, 287.0, 283.0, 258.0, 261.0, 289.0, 284.0, 273.0, 297.0, 255.0, 261.0, 285.0, 285.0, 262.0, 263.0, 292.0, 287.0, 286.0, 290.0, 269.0, 264.0, 259.0, 263.0, 272.0, 247.0, 276.0, 300.0, 268.0, 251.0, 277.0, 296.0, 251.0, 262.0, 294.0, 285.0, 286.0, 284.0, 267.0, 266.0, 284.0, 292.0, 297.0, 282.0, 260.0, 265.0, 288.0, 285.0, 266.0, 301.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6840882825634185, "mean_inference_ms": 1.2061931707471094, "mean_action_processing_ms": 0.13235037392991747, "mean_env_wait_ms": 0.839236522341682, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 462.0, "episode_reward_mean": 546.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 309.0}, "policy_reward_mean": {"ppo": 273.005}, "hist_stats": {"episode_reward": [510.0, 576.0, 576.0, 573.0, 522.0, 527.0, 525.0, 573.0, 561.0, 530.0, 525.0, 573.0, 516.0, 530.0, 573.0, 576.0, 579.0, 579.0, 573.0, 501.0, 527.0, 533.0, 579.0, 530.0, 579.0, 519.0, 522.0, 570.0, 525.0, 510.0, 573.0, 582.0, 530.0, 576.0, 473.0, 470.0, 582.0, 525.0, 570.0, 579.0, 519.0, 507.0, 462.0, 587.0, 519.0, 558.0, 570.0, 576.0, 525.0, 510.0, 573.0, 525.0, 525.0, 519.0, 525.0, 573.0, 519.0, 530.0, 525.0, 522.0, 573.0, 576.0, 582.0, 527.0, 573.0, 576.0, 530.0, 570.0, 530.0, 555.0, 558.0, 498.0, 573.0, 567.0, 530.0, 507.0, 570.0, 519.0, 573.0, 570.0, 516.0, 570.0, 525.0, 579.0, 576.0, 533.0, 522.0, 519.0, 576.0, 519.0, 573.0, 513.0, 579.0, 570.0, 533.0, 576.0, 579.0, 525.0, 573.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 249.0, 288.0, 288.0, 292.0, 284.0, 278.0, 295.0, 255.0, 267.0, 274.0, 253.0, 268.0, 257.0, 268.0, 305.0, 301.0, 260.0, 273.0, 257.0, 258.0, 267.0, 295.0, 278.0, 245.0, 271.0, 272.0, 258.0, 292.0, 281.0, 290.0, 286.0, 290.0, 289.0, 290.0, 289.0, 281.0, 292.0, 253.0, 248.0, 261.0, 266.0, 265.0, 268.0, 281.0, 298.0, 270.0, 260.0, 295.0, 284.0, 270.0, 249.0, 255.0, 267.0, 295.0, 275.0, 265.0, 260.0, 255.0, 255.0, 267.0, 306.0, 302.0, 280.0, 251.0, 279.0, 293.0, 283.0, 243.0, 230.0, 239.0, 231.0, 286.0, 296.0, 267.0, 258.0, 280.0, 290.0, 293.0, 286.0, 256.0, 263.0, 250.0, 257.0, 239.0, 223.0, 278.0, 309.0, 261.0, 258.0, 284.0, 274.0, 287.0, 283.0, 279.0, 297.0, 264.0, 261.0, 249.0, 261.0, 287.0, 286.0, 269.0, 256.0, 257.0, 268.0, 248.0, 271.0, 267.0, 258.0, 285.0, 288.0, 272.0, 247.0, 269.0, 261.0, 271.0, 254.0, 254.0, 268.0, 281.0, 292.0, 287.0, 289.0, 281.0, 301.0, 269.0, 258.0, 281.0, 292.0, 298.0, 278.0, 263.0, 267.0, 280.0, 290.0, 256.0, 274.0, 277.0, 278.0, 286.0, 272.0, 264.0, 234.0, 299.0, 274.0, 280.0, 287.0, 263.0, 267.0, 239.0, 268.0, 287.0, 283.0, 258.0, 261.0, 289.0, 284.0, 273.0, 297.0, 255.0, 261.0, 285.0, 285.0, 262.0, 263.0, 292.0, 287.0, 286.0, 290.0, 269.0, 264.0, 259.0, 263.0, 272.0, 247.0, 276.0, 300.0, 268.0, 251.0, 277.0, 296.0, 251.0, 262.0, 294.0, 285.0, 286.0, 284.0, 267.0, 266.0, 284.0, 292.0, 297.0, 282.0, 260.0, 265.0, 288.0, 285.0, 266.0, 301.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6840882825634185, "mean_inference_ms": 1.2061931707471094, "mean_action_processing_ms": 0.13235037392991747, "mean_env_wait_ms": 0.839236522341682, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3942400, "num_agent_steps_trained": 3942400, "num_env_steps_sampled": 1971200, "num_env_steps_trained": 1971200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1971200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3942400, "timers": {"training_iteration_time_ms": 3693.082, "learn_time_ms": 1137.156, "learn_throughput": 11256.149, "synch_weights_time_ms": 11.882}, "counters": {"num_env_steps_sampled": 1971200, "num_env_steps_trained": 1971200, "num_agent_steps_sampled": 3942400, "num_agent_steps_trained": 3942400}, "done": false, "episodes_total": 4928, "training_iteration": 154, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-58", "timestamp": 1666580998, "time_this_iter_s": 3.6708226203918457, "time_total_s": 578.7906060218811, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 578.7906060218811, "timesteps_since_restore": 0, "iterations_since_restore": 154, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.583333333333332, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 189.6, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 167.31, "shaped_reward_min": 138, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.3, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.44, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.91, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.05, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.67, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.85, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.77, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.43, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.98, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.89, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.67, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.85, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.67, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.85, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011531509226188064, "policy_loss": -0.0013669736217707396, "vf_loss": 7.4545488357543945, "vf_explained_var": 0.6869913339614868, "kl": 0.0018564509227871895, "entropy": 1.063261866569519, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1984000, "num_env_steps_trained": 1984000, "num_agent_steps_sampled": 3968000, "num_agent_steps_trained": 3968000}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 462.0, "episode_reward_mean": 546.51, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 309.0}, "policy_reward_mean": {"ppo": 273.255}, "custom_metrics": {"sparse_reward_mean": 189.6, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 167.31, "shaped_reward_min": 138, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.3, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.44, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.91, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.05, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.67, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.85, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.77, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.43, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.98, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.89, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.67, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.85, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.67, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.85, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 576.0, 473.0, 470.0, 582.0, 525.0, 570.0, 579.0, 519.0, 507.0, 462.0, 587.0, 519.0, 558.0, 570.0, 576.0, 525.0, 510.0, 573.0, 525.0, 525.0, 519.0, 525.0, 573.0, 519.0, 530.0, 525.0, 522.0, 573.0, 576.0, 582.0, 527.0, 573.0, 576.0, 530.0, 570.0, 530.0, 555.0, 558.0, 498.0, 573.0, 567.0, 530.0, 507.0, 570.0, 519.0, 573.0, 570.0, 516.0, 570.0, 525.0, 579.0, 576.0, 533.0, 522.0, 519.0, 576.0, 519.0, 573.0, 513.0, 579.0, 570.0, 533.0, 576.0, 579.0, 525.0, 573.0, 567.0, 573.0, 519.0, 519.0, 576.0, 552.0, 576.0, 576.0, 519.0, 464.0, 530.0, 570.0, 576.0, 576.0, 519.0, 570.0, 582.0, 522.0, 579.0, 525.0, 573.0, 576.0, 573.0, 525.0, 576.0, 576.0, 530.0, 525.0, 579.0, 570.0, 530.0, 525.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [251.0, 279.0, 293.0, 283.0, 243.0, 230.0, 239.0, 231.0, 286.0, 296.0, 267.0, 258.0, 280.0, 290.0, 293.0, 286.0, 256.0, 263.0, 250.0, 257.0, 239.0, 223.0, 278.0, 309.0, 261.0, 258.0, 284.0, 274.0, 287.0, 283.0, 279.0, 297.0, 264.0, 261.0, 249.0, 261.0, 287.0, 286.0, 269.0, 256.0, 257.0, 268.0, 248.0, 271.0, 267.0, 258.0, 285.0, 288.0, 272.0, 247.0, 269.0, 261.0, 271.0, 254.0, 254.0, 268.0, 281.0, 292.0, 287.0, 289.0, 281.0, 301.0, 269.0, 258.0, 281.0, 292.0, 298.0, 278.0, 263.0, 267.0, 280.0, 290.0, 256.0, 274.0, 277.0, 278.0, 286.0, 272.0, 264.0, 234.0, 299.0, 274.0, 280.0, 287.0, 263.0, 267.0, 239.0, 268.0, 287.0, 283.0, 258.0, 261.0, 289.0, 284.0, 273.0, 297.0, 255.0, 261.0, 285.0, 285.0, 262.0, 263.0, 292.0, 287.0, 286.0, 290.0, 269.0, 264.0, 259.0, 263.0, 272.0, 247.0, 276.0, 300.0, 268.0, 251.0, 277.0, 296.0, 251.0, 262.0, 294.0, 285.0, 286.0, 284.0, 267.0, 266.0, 284.0, 292.0, 297.0, 282.0, 260.0, 265.0, 288.0, 285.0, 266.0, 301.0, 289.0, 284.0, 265.0, 254.0, 256.0, 263.0, 275.0, 301.0, 272.0, 280.0, 292.0, 284.0, 293.0, 283.0, 252.0, 267.0, 231.0, 233.0, 262.0, 268.0, 288.0, 282.0, 286.0, 290.0, 274.0, 302.0, 263.0, 256.0, 280.0, 290.0, 290.0, 292.0, 263.0, 259.0, 289.0, 290.0, 260.0, 265.0, 289.0, 284.0, 302.0, 274.0, 277.0, 296.0, 254.0, 271.0, 289.0, 287.0, 281.0, 295.0, 274.0, 256.0, 268.0, 257.0, 285.0, 294.0, 283.0, 287.0, 264.0, 266.0, 266.0, 259.0, 267.0, 249.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.68402604365027, "mean_inference_ms": 1.2061263336972392, "mean_action_processing_ms": 0.1323433901173326, "mean_env_wait_ms": 0.839023040007508, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 462.0, "episode_reward_mean": 546.51, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 309.0}, "policy_reward_mean": {"ppo": 273.255}, "hist_stats": {"episode_reward": [530.0, 576.0, 473.0, 470.0, 582.0, 525.0, 570.0, 579.0, 519.0, 507.0, 462.0, 587.0, 519.0, 558.0, 570.0, 576.0, 525.0, 510.0, 573.0, 525.0, 525.0, 519.0, 525.0, 573.0, 519.0, 530.0, 525.0, 522.0, 573.0, 576.0, 582.0, 527.0, 573.0, 576.0, 530.0, 570.0, 530.0, 555.0, 558.0, 498.0, 573.0, 567.0, 530.0, 507.0, 570.0, 519.0, 573.0, 570.0, 516.0, 570.0, 525.0, 579.0, 576.0, 533.0, 522.0, 519.0, 576.0, 519.0, 573.0, 513.0, 579.0, 570.0, 533.0, 576.0, 579.0, 525.0, 573.0, 567.0, 573.0, 519.0, 519.0, 576.0, 552.0, 576.0, 576.0, 519.0, 464.0, 530.0, 570.0, 576.0, 576.0, 519.0, 570.0, 582.0, 522.0, 579.0, 525.0, 573.0, 576.0, 573.0, 525.0, 576.0, 576.0, 530.0, 525.0, 579.0, 570.0, 530.0, 525.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [251.0, 279.0, 293.0, 283.0, 243.0, 230.0, 239.0, 231.0, 286.0, 296.0, 267.0, 258.0, 280.0, 290.0, 293.0, 286.0, 256.0, 263.0, 250.0, 257.0, 239.0, 223.0, 278.0, 309.0, 261.0, 258.0, 284.0, 274.0, 287.0, 283.0, 279.0, 297.0, 264.0, 261.0, 249.0, 261.0, 287.0, 286.0, 269.0, 256.0, 257.0, 268.0, 248.0, 271.0, 267.0, 258.0, 285.0, 288.0, 272.0, 247.0, 269.0, 261.0, 271.0, 254.0, 254.0, 268.0, 281.0, 292.0, 287.0, 289.0, 281.0, 301.0, 269.0, 258.0, 281.0, 292.0, 298.0, 278.0, 263.0, 267.0, 280.0, 290.0, 256.0, 274.0, 277.0, 278.0, 286.0, 272.0, 264.0, 234.0, 299.0, 274.0, 280.0, 287.0, 263.0, 267.0, 239.0, 268.0, 287.0, 283.0, 258.0, 261.0, 289.0, 284.0, 273.0, 297.0, 255.0, 261.0, 285.0, 285.0, 262.0, 263.0, 292.0, 287.0, 286.0, 290.0, 269.0, 264.0, 259.0, 263.0, 272.0, 247.0, 276.0, 300.0, 268.0, 251.0, 277.0, 296.0, 251.0, 262.0, 294.0, 285.0, 286.0, 284.0, 267.0, 266.0, 284.0, 292.0, 297.0, 282.0, 260.0, 265.0, 288.0, 285.0, 266.0, 301.0, 289.0, 284.0, 265.0, 254.0, 256.0, 263.0, 275.0, 301.0, 272.0, 280.0, 292.0, 284.0, 293.0, 283.0, 252.0, 267.0, 231.0, 233.0, 262.0, 268.0, 288.0, 282.0, 286.0, 290.0, 274.0, 302.0, 263.0, 256.0, 280.0, 290.0, 290.0, 292.0, 263.0, 259.0, 289.0, 290.0, 260.0, 265.0, 289.0, 284.0, 302.0, 274.0, 277.0, 296.0, 254.0, 271.0, 289.0, 287.0, 281.0, 295.0, 274.0, 256.0, 268.0, 257.0, 285.0, 294.0, 283.0, 287.0, 264.0, 266.0, 266.0, 259.0, 267.0, 249.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.68402604365027, "mean_inference_ms": 1.2061263336972392, "mean_action_processing_ms": 0.1323433901173326, "mean_env_wait_ms": 0.839023040007508, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3968000, "num_agent_steps_trained": 3968000, "num_env_steps_sampled": 1984000, "num_env_steps_trained": 1984000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1984000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3968000, "timers": {"training_iteration_time_ms": 3661.855, "learn_time_ms": 1121.113, "learn_throughput": 11417.226, "synch_weights_time_ms": 12.244}, "counters": {"num_env_steps_sampled": 1984000, "num_env_steps_trained": 1984000, "num_agent_steps_sampled": 3968000, "num_agent_steps_trained": 3968000}, "done": false, "episodes_total": 4960, "training_iteration": 155, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-02", "timestamp": 1666581002, "time_this_iter_s": 3.6832804679870605, "time_total_s": 582.4738864898682, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 582.4738864898682, "timesteps_since_restore": 0, "iterations_since_restore": 155, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.14, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 190.0, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 168.36, "shaped_reward_min": 138, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.21, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.59, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.86, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.29, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.64, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.08, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.85, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.35, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.64, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.08, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.64, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.08, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00024176511215046048, "policy_loss": 3.316625952720642e-05, "vf_loss": 7.437552452087402, "vf_explained_var": 0.6896594166755676, "kl": 0.002203156938776374, "entropy": 1.0703125, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1996800, "num_env_steps_trained": 1996800, "num_agent_steps_sampled": 3993600, "num_agent_steps_trained": 3993600}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 436.0, "episode_reward_mean": 548.36, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 215.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 274.18}, "custom_metrics": {"sparse_reward_mean": 190.0, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 168.36, "shaped_reward_min": 138, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.21, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.59, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.86, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.29, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.64, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.08, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.85, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.35, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.64, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.08, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.64, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.08, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 576.0, 530.0, 570.0, 530.0, 555.0, 558.0, 498.0, 573.0, 567.0, 530.0, 507.0, 570.0, 519.0, 573.0, 570.0, 516.0, 570.0, 525.0, 579.0, 576.0, 533.0, 522.0, 519.0, 576.0, 519.0, 573.0, 513.0, 579.0, 570.0, 533.0, 576.0, 579.0, 525.0, 573.0, 567.0, 573.0, 519.0, 519.0, 576.0, 552.0, 576.0, 576.0, 519.0, 464.0, 530.0, 570.0, 576.0, 576.0, 519.0, 570.0, 582.0, 522.0, 579.0, 525.0, 573.0, 576.0, 573.0, 525.0, 576.0, 576.0, 530.0, 525.0, 579.0, 570.0, 530.0, 525.0, 516.0, 582.0, 525.0, 579.0, 576.0, 573.0, 530.0, 579.0, 513.0, 533.0, 525.0, 576.0, 533.0, 576.0, 576.0, 579.0, 484.0, 516.0, 573.0, 461.0, 576.0, 476.0, 579.0, 530.0, 518.0, 518.0, 570.0, 576.0, 576.0, 436.0, 576.0, 527.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 292.0, 298.0, 278.0, 263.0, 267.0, 280.0, 290.0, 256.0, 274.0, 277.0, 278.0, 286.0, 272.0, 264.0, 234.0, 299.0, 274.0, 280.0, 287.0, 263.0, 267.0, 239.0, 268.0, 287.0, 283.0, 258.0, 261.0, 289.0, 284.0, 273.0, 297.0, 255.0, 261.0, 285.0, 285.0, 262.0, 263.0, 292.0, 287.0, 286.0, 290.0, 269.0, 264.0, 259.0, 263.0, 272.0, 247.0, 276.0, 300.0, 268.0, 251.0, 277.0, 296.0, 251.0, 262.0, 294.0, 285.0, 286.0, 284.0, 267.0, 266.0, 284.0, 292.0, 297.0, 282.0, 260.0, 265.0, 288.0, 285.0, 266.0, 301.0, 289.0, 284.0, 265.0, 254.0, 256.0, 263.0, 275.0, 301.0, 272.0, 280.0, 292.0, 284.0, 293.0, 283.0, 252.0, 267.0, 231.0, 233.0, 262.0, 268.0, 288.0, 282.0, 286.0, 290.0, 274.0, 302.0, 263.0, 256.0, 280.0, 290.0, 290.0, 292.0, 263.0, 259.0, 289.0, 290.0, 260.0, 265.0, 289.0, 284.0, 302.0, 274.0, 277.0, 296.0, 254.0, 271.0, 289.0, 287.0, 281.0, 295.0, 274.0, 256.0, 268.0, 257.0, 285.0, 294.0, 283.0, 287.0, 264.0, 266.0, 266.0, 259.0, 267.0, 249.0, 301.0, 281.0, 265.0, 260.0, 282.0, 297.0, 294.0, 282.0, 286.0, 287.0, 262.0, 268.0, 290.0, 289.0, 254.0, 259.0, 271.0, 262.0, 251.0, 274.0, 287.0, 289.0, 267.0, 266.0, 295.0, 281.0, 298.0, 278.0, 277.0, 302.0, 241.0, 243.0, 246.0, 270.0, 277.0, 296.0, 234.0, 227.0, 292.0, 284.0, 237.0, 239.0, 287.0, 292.0, 263.0, 267.0, 271.0, 247.0, 267.0, 251.0, 287.0, 283.0, 293.0, 283.0, 275.0, 301.0, 221.0, 215.0, 292.0, 284.0, 261.0, 266.0, 302.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6839270699036237, "mean_inference_ms": 1.206053543227454, "mean_action_processing_ms": 0.13233675331157269, "mean_env_wait_ms": 0.8388131632821678, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 436.0, "episode_reward_mean": 548.36, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 215.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 274.18}, "hist_stats": {"episode_reward": [573.0, 576.0, 530.0, 570.0, 530.0, 555.0, 558.0, 498.0, 573.0, 567.0, 530.0, 507.0, 570.0, 519.0, 573.0, 570.0, 516.0, 570.0, 525.0, 579.0, 576.0, 533.0, 522.0, 519.0, 576.0, 519.0, 573.0, 513.0, 579.0, 570.0, 533.0, 576.0, 579.0, 525.0, 573.0, 567.0, 573.0, 519.0, 519.0, 576.0, 552.0, 576.0, 576.0, 519.0, 464.0, 530.0, 570.0, 576.0, 576.0, 519.0, 570.0, 582.0, 522.0, 579.0, 525.0, 573.0, 576.0, 573.0, 525.0, 576.0, 576.0, 530.0, 525.0, 579.0, 570.0, 530.0, 525.0, 516.0, 582.0, 525.0, 579.0, 576.0, 573.0, 530.0, 579.0, 513.0, 533.0, 525.0, 576.0, 533.0, 576.0, 576.0, 579.0, 484.0, 516.0, 573.0, 461.0, 576.0, 476.0, 579.0, 530.0, 518.0, 518.0, 570.0, 576.0, 576.0, 436.0, 576.0, 527.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 292.0, 298.0, 278.0, 263.0, 267.0, 280.0, 290.0, 256.0, 274.0, 277.0, 278.0, 286.0, 272.0, 264.0, 234.0, 299.0, 274.0, 280.0, 287.0, 263.0, 267.0, 239.0, 268.0, 287.0, 283.0, 258.0, 261.0, 289.0, 284.0, 273.0, 297.0, 255.0, 261.0, 285.0, 285.0, 262.0, 263.0, 292.0, 287.0, 286.0, 290.0, 269.0, 264.0, 259.0, 263.0, 272.0, 247.0, 276.0, 300.0, 268.0, 251.0, 277.0, 296.0, 251.0, 262.0, 294.0, 285.0, 286.0, 284.0, 267.0, 266.0, 284.0, 292.0, 297.0, 282.0, 260.0, 265.0, 288.0, 285.0, 266.0, 301.0, 289.0, 284.0, 265.0, 254.0, 256.0, 263.0, 275.0, 301.0, 272.0, 280.0, 292.0, 284.0, 293.0, 283.0, 252.0, 267.0, 231.0, 233.0, 262.0, 268.0, 288.0, 282.0, 286.0, 290.0, 274.0, 302.0, 263.0, 256.0, 280.0, 290.0, 290.0, 292.0, 263.0, 259.0, 289.0, 290.0, 260.0, 265.0, 289.0, 284.0, 302.0, 274.0, 277.0, 296.0, 254.0, 271.0, 289.0, 287.0, 281.0, 295.0, 274.0, 256.0, 268.0, 257.0, 285.0, 294.0, 283.0, 287.0, 264.0, 266.0, 266.0, 259.0, 267.0, 249.0, 301.0, 281.0, 265.0, 260.0, 282.0, 297.0, 294.0, 282.0, 286.0, 287.0, 262.0, 268.0, 290.0, 289.0, 254.0, 259.0, 271.0, 262.0, 251.0, 274.0, 287.0, 289.0, 267.0, 266.0, 295.0, 281.0, 298.0, 278.0, 277.0, 302.0, 241.0, 243.0, 246.0, 270.0, 277.0, 296.0, 234.0, 227.0, 292.0, 284.0, 237.0, 239.0, 287.0, 292.0, 263.0, 267.0, 271.0, 247.0, 267.0, 251.0, 287.0, 283.0, 293.0, 283.0, 275.0, 301.0, 221.0, 215.0, 292.0, 284.0, 261.0, 266.0, 302.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6839270699036237, "mean_inference_ms": 1.206053543227454, "mean_action_processing_ms": 0.13233675331157269, "mean_env_wait_ms": 0.8388131632821678, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3993600, "num_agent_steps_trained": 3993600, "num_env_steps_sampled": 1996800, "num_env_steps_trained": 1996800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1996800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3993600, "timers": {"training_iteration_time_ms": 3616.468, "learn_time_ms": 1118.033, "learn_throughput": 11448.682, "synch_weights_time_ms": 11.585}, "counters": {"num_env_steps_sampled": 1996800, "num_env_steps_trained": 1996800, "num_agent_steps_sampled": 3993600, "num_agent_steps_trained": 3993600}, "done": false, "episodes_total": 4992, "training_iteration": 156, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-06", "timestamp": 1666581006, "time_this_iter_s": 3.615835189819336, "time_total_s": 586.0897216796875, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 586.0897216796875, "timesteps_since_restore": 0, "iterations_since_restore": 156, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.916666666666668, "ram_util_percent": 10.616666666666665}}
+{"custom_metrics": {"sparse_reward_mean": 190.0, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 169.32, "shaped_reward_min": 137, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.56, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.43, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.11, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.97, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.87, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.55, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.9, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.05, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.73, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.97, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.87, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.97, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.87, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0025629051961004734, "policy_loss": -0.0027682920917868614, "vf_loss": 7.394125938415527, "vf_explained_var": 0.6789853572845459, "kl": 0.0019060579361394048, "entropy": 1.0680499076843262, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2009600, "num_env_steps_trained": 2009600, "num_agent_steps_sampled": 4019200, "num_agent_steps_trained": 4019200}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 417.0, "episode_reward_mean": 549.32, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 274.66}, "custom_metrics": {"sparse_reward_mean": 190.0, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 169.32, "shaped_reward_min": 137, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.56, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.43, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.11, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.97, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.87, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.55, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.9, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.05, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.73, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.97, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.87, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.97, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.87, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 525.0, 573.0, 567.0, 573.0, 519.0, 519.0, 576.0, 552.0, 576.0, 576.0, 519.0, 464.0, 530.0, 570.0, 576.0, 576.0, 519.0, 570.0, 582.0, 522.0, 579.0, 525.0, 573.0, 576.0, 573.0, 525.0, 576.0, 576.0, 530.0, 525.0, 579.0, 570.0, 530.0, 525.0, 516.0, 582.0, 525.0, 579.0, 576.0, 573.0, 530.0, 579.0, 513.0, 533.0, 525.0, 576.0, 533.0, 576.0, 576.0, 579.0, 484.0, 516.0, 573.0, 461.0, 576.0, 476.0, 579.0, 530.0, 518.0, 518.0, 570.0, 576.0, 576.0, 436.0, 576.0, 527.0, 570.0, 573.0, 573.0, 530.0, 582.0, 525.0, 573.0, 570.0, 516.0, 576.0, 570.0, 579.0, 579.0, 417.0, 576.0, 579.0, 525.0, 482.0, 522.0, 579.0, 573.0, 579.0, 576.0, 576.0, 567.0, 570.0, 519.0, 522.0, 519.0, 576.0, 522.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 282.0, 260.0, 265.0, 288.0, 285.0, 266.0, 301.0, 289.0, 284.0, 265.0, 254.0, 256.0, 263.0, 275.0, 301.0, 272.0, 280.0, 292.0, 284.0, 293.0, 283.0, 252.0, 267.0, 231.0, 233.0, 262.0, 268.0, 288.0, 282.0, 286.0, 290.0, 274.0, 302.0, 263.0, 256.0, 280.0, 290.0, 290.0, 292.0, 263.0, 259.0, 289.0, 290.0, 260.0, 265.0, 289.0, 284.0, 302.0, 274.0, 277.0, 296.0, 254.0, 271.0, 289.0, 287.0, 281.0, 295.0, 274.0, 256.0, 268.0, 257.0, 285.0, 294.0, 283.0, 287.0, 264.0, 266.0, 266.0, 259.0, 267.0, 249.0, 301.0, 281.0, 265.0, 260.0, 282.0, 297.0, 294.0, 282.0, 286.0, 287.0, 262.0, 268.0, 290.0, 289.0, 254.0, 259.0, 271.0, 262.0, 251.0, 274.0, 287.0, 289.0, 267.0, 266.0, 295.0, 281.0, 298.0, 278.0, 277.0, 302.0, 241.0, 243.0, 246.0, 270.0, 277.0, 296.0, 234.0, 227.0, 292.0, 284.0, 237.0, 239.0, 287.0, 292.0, 263.0, 267.0, 271.0, 247.0, 267.0, 251.0, 287.0, 283.0, 293.0, 283.0, 275.0, 301.0, 221.0, 215.0, 292.0, 284.0, 261.0, 266.0, 302.0, 268.0, 285.0, 288.0, 287.0, 286.0, 261.0, 269.0, 289.0, 293.0, 275.0, 250.0, 288.0, 285.0, 302.0, 268.0, 256.0, 260.0, 277.0, 299.0, 286.0, 284.0, 291.0, 288.0, 294.0, 285.0, 224.0, 193.0, 286.0, 290.0, 283.0, 296.0, 246.0, 279.0, 236.0, 246.0, 267.0, 255.0, 285.0, 294.0, 290.0, 283.0, 303.0, 276.0, 283.0, 293.0, 283.0, 293.0, 290.0, 277.0, 275.0, 295.0, 259.0, 260.0, 258.0, 264.0, 257.0, 262.0, 296.0, 280.0, 275.0, 247.0, 278.0, 298.0, 295.0, 278.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6838652052858929, "mean_inference_ms": 1.205993726372932, "mean_action_processing_ms": 0.13233281493670865, "mean_env_wait_ms": 0.8386165066666643, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 417.0, "episode_reward_mean": 549.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 274.66}, "hist_stats": {"episode_reward": [579.0, 525.0, 573.0, 567.0, 573.0, 519.0, 519.0, 576.0, 552.0, 576.0, 576.0, 519.0, 464.0, 530.0, 570.0, 576.0, 576.0, 519.0, 570.0, 582.0, 522.0, 579.0, 525.0, 573.0, 576.0, 573.0, 525.0, 576.0, 576.0, 530.0, 525.0, 579.0, 570.0, 530.0, 525.0, 516.0, 582.0, 525.0, 579.0, 576.0, 573.0, 530.0, 579.0, 513.0, 533.0, 525.0, 576.0, 533.0, 576.0, 576.0, 579.0, 484.0, 516.0, 573.0, 461.0, 576.0, 476.0, 579.0, 530.0, 518.0, 518.0, 570.0, 576.0, 576.0, 436.0, 576.0, 527.0, 570.0, 573.0, 573.0, 530.0, 582.0, 525.0, 573.0, 570.0, 516.0, 576.0, 570.0, 579.0, 579.0, 417.0, 576.0, 579.0, 525.0, 482.0, 522.0, 579.0, 573.0, 579.0, 576.0, 576.0, 567.0, 570.0, 519.0, 522.0, 519.0, 576.0, 522.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 282.0, 260.0, 265.0, 288.0, 285.0, 266.0, 301.0, 289.0, 284.0, 265.0, 254.0, 256.0, 263.0, 275.0, 301.0, 272.0, 280.0, 292.0, 284.0, 293.0, 283.0, 252.0, 267.0, 231.0, 233.0, 262.0, 268.0, 288.0, 282.0, 286.0, 290.0, 274.0, 302.0, 263.0, 256.0, 280.0, 290.0, 290.0, 292.0, 263.0, 259.0, 289.0, 290.0, 260.0, 265.0, 289.0, 284.0, 302.0, 274.0, 277.0, 296.0, 254.0, 271.0, 289.0, 287.0, 281.0, 295.0, 274.0, 256.0, 268.0, 257.0, 285.0, 294.0, 283.0, 287.0, 264.0, 266.0, 266.0, 259.0, 267.0, 249.0, 301.0, 281.0, 265.0, 260.0, 282.0, 297.0, 294.0, 282.0, 286.0, 287.0, 262.0, 268.0, 290.0, 289.0, 254.0, 259.0, 271.0, 262.0, 251.0, 274.0, 287.0, 289.0, 267.0, 266.0, 295.0, 281.0, 298.0, 278.0, 277.0, 302.0, 241.0, 243.0, 246.0, 270.0, 277.0, 296.0, 234.0, 227.0, 292.0, 284.0, 237.0, 239.0, 287.0, 292.0, 263.0, 267.0, 271.0, 247.0, 267.0, 251.0, 287.0, 283.0, 293.0, 283.0, 275.0, 301.0, 221.0, 215.0, 292.0, 284.0, 261.0, 266.0, 302.0, 268.0, 285.0, 288.0, 287.0, 286.0, 261.0, 269.0, 289.0, 293.0, 275.0, 250.0, 288.0, 285.0, 302.0, 268.0, 256.0, 260.0, 277.0, 299.0, 286.0, 284.0, 291.0, 288.0, 294.0, 285.0, 224.0, 193.0, 286.0, 290.0, 283.0, 296.0, 246.0, 279.0, 236.0, 246.0, 267.0, 255.0, 285.0, 294.0, 290.0, 283.0, 303.0, 276.0, 283.0, 293.0, 283.0, 293.0, 290.0, 277.0, 275.0, 295.0, 259.0, 260.0, 258.0, 264.0, 257.0, 262.0, 296.0, 280.0, 275.0, 247.0, 278.0, 298.0, 295.0, 278.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6838652052858929, "mean_inference_ms": 1.205993726372932, "mean_action_processing_ms": 0.13233281493670865, "mean_env_wait_ms": 0.8386165066666643, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4019200, "num_agent_steps_trained": 4019200, "num_env_steps_sampled": 2009600, "num_env_steps_trained": 2009600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2009600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4019200, "timers": {"training_iteration_time_ms": 3617.124, "learn_time_ms": 1120.099, "learn_throughput": 11427.556, "synch_weights_time_ms": 11.442}, "counters": {"num_env_steps_sampled": 2009600, "num_env_steps_trained": 2009600, "num_agent_steps_sampled": 4019200, "num_agent_steps_trained": 4019200}, "done": false, "episodes_total": 5024, "training_iteration": 157, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-09", "timestamp": 1666581009, "time_this_iter_s": 3.6556949615478516, "time_total_s": 589.7454166412354, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 589.7454166412354, "timesteps_since_restore": 0, "iterations_since_restore": 157, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.160000000000004, "ram_util_percent": 10.620000000000001}}
+{"custom_metrics": {"sparse_reward_mean": 190.4, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 169.76, "shaped_reward_min": 137, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.33, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.69, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.98, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.33, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.8, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.11, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.64, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.12, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.98, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.43, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.8, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.11, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.8, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.11, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012510658707469702, "policy_loss": 0.001050335355103016, "vf_loss": 7.332821846008301, "vf_explained_var": 0.6917320489883423, "kl": 0.0018009908962994814, "entropy": 1.0651018619537354, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2022400, "num_env_steps_trained": 2022400, "num_agent_steps_sampled": 4044800, "num_agent_steps_trained": 4044800}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 417.0, "episode_reward_mean": 550.56, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 275.28}, "custom_metrics": {"sparse_reward_mean": 190.4, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 169.76, "shaped_reward_min": 137, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.33, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.69, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.98, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.33, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.8, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.11, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.64, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.12, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.98, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.43, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.8, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.11, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.8, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.11, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 530.0, 525.0, 516.0, 582.0, 525.0, 579.0, 576.0, 573.0, 530.0, 579.0, 513.0, 533.0, 525.0, 576.0, 533.0, 576.0, 576.0, 579.0, 484.0, 516.0, 573.0, 461.0, 576.0, 476.0, 579.0, 530.0, 518.0, 518.0, 570.0, 576.0, 576.0, 436.0, 576.0, 527.0, 570.0, 573.0, 573.0, 530.0, 582.0, 525.0, 573.0, 570.0, 516.0, 576.0, 570.0, 579.0, 579.0, 417.0, 576.0, 579.0, 525.0, 482.0, 522.0, 579.0, 573.0, 579.0, 576.0, 576.0, 567.0, 570.0, 519.0, 522.0, 519.0, 576.0, 522.0, 576.0, 573.0, 519.0, 579.0, 576.0, 525.0, 576.0, 530.0, 570.0, 573.0, 530.0, 579.0, 573.0, 573.0, 530.0, 522.0, 570.0, 519.0, 522.0, 570.0, 570.0, 579.0, 573.0, 576.0, 512.0, 582.0, 516.0, 570.0, 530.0, 579.0, 573.0, 582.0, 576.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 287.0, 264.0, 266.0, 266.0, 259.0, 267.0, 249.0, 301.0, 281.0, 265.0, 260.0, 282.0, 297.0, 294.0, 282.0, 286.0, 287.0, 262.0, 268.0, 290.0, 289.0, 254.0, 259.0, 271.0, 262.0, 251.0, 274.0, 287.0, 289.0, 267.0, 266.0, 295.0, 281.0, 298.0, 278.0, 277.0, 302.0, 241.0, 243.0, 246.0, 270.0, 277.0, 296.0, 234.0, 227.0, 292.0, 284.0, 237.0, 239.0, 287.0, 292.0, 263.0, 267.0, 271.0, 247.0, 267.0, 251.0, 287.0, 283.0, 293.0, 283.0, 275.0, 301.0, 221.0, 215.0, 292.0, 284.0, 261.0, 266.0, 302.0, 268.0, 285.0, 288.0, 287.0, 286.0, 261.0, 269.0, 289.0, 293.0, 275.0, 250.0, 288.0, 285.0, 302.0, 268.0, 256.0, 260.0, 277.0, 299.0, 286.0, 284.0, 291.0, 288.0, 294.0, 285.0, 224.0, 193.0, 286.0, 290.0, 283.0, 296.0, 246.0, 279.0, 236.0, 246.0, 267.0, 255.0, 285.0, 294.0, 290.0, 283.0, 303.0, 276.0, 283.0, 293.0, 283.0, 293.0, 290.0, 277.0, 275.0, 295.0, 259.0, 260.0, 258.0, 264.0, 257.0, 262.0, 296.0, 280.0, 275.0, 247.0, 278.0, 298.0, 295.0, 278.0, 266.0, 253.0, 292.0, 287.0, 299.0, 277.0, 272.0, 253.0, 287.0, 289.0, 259.0, 271.0, 287.0, 283.0, 281.0, 292.0, 268.0, 262.0, 296.0, 283.0, 293.0, 280.0, 282.0, 291.0, 253.0, 277.0, 253.0, 269.0, 280.0, 290.0, 245.0, 274.0, 264.0, 258.0, 282.0, 288.0, 285.0, 285.0, 279.0, 300.0, 282.0, 291.0, 286.0, 290.0, 257.0, 255.0, 301.0, 281.0, 262.0, 254.0, 279.0, 291.0, 257.0, 273.0, 278.0, 301.0, 295.0, 278.0, 286.0, 296.0, 290.0, 286.0, 282.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6837868313458785, "mean_inference_ms": 1.2059317974395252, "mean_action_processing_ms": 0.1323313020756567, "mean_env_wait_ms": 0.838427317675574, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 417.0, "episode_reward_mean": 550.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 275.28}, "hist_stats": {"episode_reward": [570.0, 530.0, 525.0, 516.0, 582.0, 525.0, 579.0, 576.0, 573.0, 530.0, 579.0, 513.0, 533.0, 525.0, 576.0, 533.0, 576.0, 576.0, 579.0, 484.0, 516.0, 573.0, 461.0, 576.0, 476.0, 579.0, 530.0, 518.0, 518.0, 570.0, 576.0, 576.0, 436.0, 576.0, 527.0, 570.0, 573.0, 573.0, 530.0, 582.0, 525.0, 573.0, 570.0, 516.0, 576.0, 570.0, 579.0, 579.0, 417.0, 576.0, 579.0, 525.0, 482.0, 522.0, 579.0, 573.0, 579.0, 576.0, 576.0, 567.0, 570.0, 519.0, 522.0, 519.0, 576.0, 522.0, 576.0, 573.0, 519.0, 579.0, 576.0, 525.0, 576.0, 530.0, 570.0, 573.0, 530.0, 579.0, 573.0, 573.0, 530.0, 522.0, 570.0, 519.0, 522.0, 570.0, 570.0, 579.0, 573.0, 576.0, 512.0, 582.0, 516.0, 570.0, 530.0, 579.0, 573.0, 582.0, 576.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 287.0, 264.0, 266.0, 266.0, 259.0, 267.0, 249.0, 301.0, 281.0, 265.0, 260.0, 282.0, 297.0, 294.0, 282.0, 286.0, 287.0, 262.0, 268.0, 290.0, 289.0, 254.0, 259.0, 271.0, 262.0, 251.0, 274.0, 287.0, 289.0, 267.0, 266.0, 295.0, 281.0, 298.0, 278.0, 277.0, 302.0, 241.0, 243.0, 246.0, 270.0, 277.0, 296.0, 234.0, 227.0, 292.0, 284.0, 237.0, 239.0, 287.0, 292.0, 263.0, 267.0, 271.0, 247.0, 267.0, 251.0, 287.0, 283.0, 293.0, 283.0, 275.0, 301.0, 221.0, 215.0, 292.0, 284.0, 261.0, 266.0, 302.0, 268.0, 285.0, 288.0, 287.0, 286.0, 261.0, 269.0, 289.0, 293.0, 275.0, 250.0, 288.0, 285.0, 302.0, 268.0, 256.0, 260.0, 277.0, 299.0, 286.0, 284.0, 291.0, 288.0, 294.0, 285.0, 224.0, 193.0, 286.0, 290.0, 283.0, 296.0, 246.0, 279.0, 236.0, 246.0, 267.0, 255.0, 285.0, 294.0, 290.0, 283.0, 303.0, 276.0, 283.0, 293.0, 283.0, 293.0, 290.0, 277.0, 275.0, 295.0, 259.0, 260.0, 258.0, 264.0, 257.0, 262.0, 296.0, 280.0, 275.0, 247.0, 278.0, 298.0, 295.0, 278.0, 266.0, 253.0, 292.0, 287.0, 299.0, 277.0, 272.0, 253.0, 287.0, 289.0, 259.0, 271.0, 287.0, 283.0, 281.0, 292.0, 268.0, 262.0, 296.0, 283.0, 293.0, 280.0, 282.0, 291.0, 253.0, 277.0, 253.0, 269.0, 280.0, 290.0, 245.0, 274.0, 264.0, 258.0, 282.0, 288.0, 285.0, 285.0, 279.0, 300.0, 282.0, 291.0, 286.0, 290.0, 257.0, 255.0, 301.0, 281.0, 262.0, 254.0, 279.0, 291.0, 257.0, 273.0, 278.0, 301.0, 295.0, 278.0, 286.0, 296.0, 290.0, 286.0, 282.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6837868313458785, "mean_inference_ms": 1.2059317974395252, "mean_action_processing_ms": 0.1323313020756567, "mean_env_wait_ms": 0.838427317675574, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4044800, "num_agent_steps_trained": 4044800, "num_env_steps_sampled": 2022400, "num_env_steps_trained": 2022400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2022400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4044800, "timers": {"training_iteration_time_ms": 3613.095, "learn_time_ms": 1111.871, "learn_throughput": 11512.124, "synch_weights_time_ms": 12.441}, "counters": {"num_env_steps_sampled": 2022400, "num_env_steps_trained": 2022400, "num_agent_steps_sampled": 4044800, "num_agent_steps_trained": 4044800}, "done": false, "episodes_total": 5056, "training_iteration": 158, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-13", "timestamp": 1666581013, "time_this_iter_s": 3.6319639682769775, "time_total_s": 593.3773806095123, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 593.3773806095123, "timesteps_since_restore": 0, "iterations_since_restore": 158, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.720000000000002, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 191.4, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 169.91, "shaped_reward_min": 133, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.36, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.78, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.94, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.42, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.88, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.17, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.56, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.05, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.97, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.4, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.06, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.88, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.17, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.88, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.17, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0031918580643832684, "policy_loss": -0.0034064347855746746, "vf_loss": 7.42955207824707, "vf_explained_var": 0.682861864566803, "kl": 0.0020245909690856934, "entropy": 1.0567570924758911, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2035200, "num_env_steps_trained": 2035200, "num_agent_steps_sampled": 4070400, "num_agent_steps_trained": 4070400}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 417.0, "episode_reward_mean": 552.71, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 307.0}, "policy_reward_mean": {"ppo": 276.355}, "custom_metrics": {"sparse_reward_mean": 191.4, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 169.91, "shaped_reward_min": 133, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.36, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.78, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.94, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.42, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.88, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.17, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.56, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.05, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.97, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.4, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.06, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.88, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.17, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.88, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.17, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [436.0, 576.0, 527.0, 570.0, 573.0, 573.0, 530.0, 582.0, 525.0, 573.0, 570.0, 516.0, 576.0, 570.0, 579.0, 579.0, 417.0, 576.0, 579.0, 525.0, 482.0, 522.0, 579.0, 573.0, 579.0, 576.0, 576.0, 567.0, 570.0, 519.0, 522.0, 519.0, 576.0, 522.0, 576.0, 573.0, 519.0, 579.0, 576.0, 525.0, 576.0, 530.0, 570.0, 573.0, 530.0, 579.0, 573.0, 573.0, 530.0, 522.0, 570.0, 519.0, 522.0, 570.0, 570.0, 579.0, 573.0, 576.0, 512.0, 582.0, 516.0, 570.0, 530.0, 579.0, 573.0, 582.0, 576.0, 570.0, 522.0, 519.0, 579.0, 522.0, 525.0, 573.0, 579.0, 573.0, 573.0, 579.0, 576.0, 573.0, 573.0, 501.0, 561.0, 530.0, 522.0, 579.0, 522.0, 530.0, 573.0, 579.0, 582.0, 522.0, 530.0, 582.0, 453.0, 573.0, 579.0, 522.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [221.0, 215.0, 292.0, 284.0, 261.0, 266.0, 302.0, 268.0, 285.0, 288.0, 287.0, 286.0, 261.0, 269.0, 289.0, 293.0, 275.0, 250.0, 288.0, 285.0, 302.0, 268.0, 256.0, 260.0, 277.0, 299.0, 286.0, 284.0, 291.0, 288.0, 294.0, 285.0, 224.0, 193.0, 286.0, 290.0, 283.0, 296.0, 246.0, 279.0, 236.0, 246.0, 267.0, 255.0, 285.0, 294.0, 290.0, 283.0, 303.0, 276.0, 283.0, 293.0, 283.0, 293.0, 290.0, 277.0, 275.0, 295.0, 259.0, 260.0, 258.0, 264.0, 257.0, 262.0, 296.0, 280.0, 275.0, 247.0, 278.0, 298.0, 295.0, 278.0, 266.0, 253.0, 292.0, 287.0, 299.0, 277.0, 272.0, 253.0, 287.0, 289.0, 259.0, 271.0, 287.0, 283.0, 281.0, 292.0, 268.0, 262.0, 296.0, 283.0, 293.0, 280.0, 282.0, 291.0, 253.0, 277.0, 253.0, 269.0, 280.0, 290.0, 245.0, 274.0, 264.0, 258.0, 282.0, 288.0, 285.0, 285.0, 279.0, 300.0, 282.0, 291.0, 286.0, 290.0, 257.0, 255.0, 301.0, 281.0, 262.0, 254.0, 279.0, 291.0, 257.0, 273.0, 278.0, 301.0, 295.0, 278.0, 286.0, 296.0, 290.0, 286.0, 282.0, 288.0, 249.0, 273.0, 263.0, 256.0, 295.0, 284.0, 264.0, 258.0, 263.0, 262.0, 292.0, 281.0, 294.0, 285.0, 283.0, 290.0, 286.0, 287.0, 286.0, 293.0, 285.0, 291.0, 287.0, 286.0, 266.0, 307.0, 246.0, 255.0, 280.0, 281.0, 259.0, 271.0, 268.0, 254.0, 288.0, 291.0, 261.0, 261.0, 273.0, 257.0, 293.0, 280.0, 290.0, 289.0, 284.0, 298.0, 260.0, 262.0, 258.0, 272.0, 294.0, 288.0, 204.0, 249.0, 290.0, 283.0, 297.0, 282.0, 264.0, 258.0, 283.0, 296.0, 306.0, 273.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6837424136699048, "mean_inference_ms": 1.205881311016033, "mean_action_processing_ms": 0.13233429471967506, "mean_env_wait_ms": 0.8382726198110214, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 417.0, "episode_reward_mean": 552.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 307.0}, "policy_reward_mean": {"ppo": 276.355}, "hist_stats": {"episode_reward": [436.0, 576.0, 527.0, 570.0, 573.0, 573.0, 530.0, 582.0, 525.0, 573.0, 570.0, 516.0, 576.0, 570.0, 579.0, 579.0, 417.0, 576.0, 579.0, 525.0, 482.0, 522.0, 579.0, 573.0, 579.0, 576.0, 576.0, 567.0, 570.0, 519.0, 522.0, 519.0, 576.0, 522.0, 576.0, 573.0, 519.0, 579.0, 576.0, 525.0, 576.0, 530.0, 570.0, 573.0, 530.0, 579.0, 573.0, 573.0, 530.0, 522.0, 570.0, 519.0, 522.0, 570.0, 570.0, 579.0, 573.0, 576.0, 512.0, 582.0, 516.0, 570.0, 530.0, 579.0, 573.0, 582.0, 576.0, 570.0, 522.0, 519.0, 579.0, 522.0, 525.0, 573.0, 579.0, 573.0, 573.0, 579.0, 576.0, 573.0, 573.0, 501.0, 561.0, 530.0, 522.0, 579.0, 522.0, 530.0, 573.0, 579.0, 582.0, 522.0, 530.0, 582.0, 453.0, 573.0, 579.0, 522.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [221.0, 215.0, 292.0, 284.0, 261.0, 266.0, 302.0, 268.0, 285.0, 288.0, 287.0, 286.0, 261.0, 269.0, 289.0, 293.0, 275.0, 250.0, 288.0, 285.0, 302.0, 268.0, 256.0, 260.0, 277.0, 299.0, 286.0, 284.0, 291.0, 288.0, 294.0, 285.0, 224.0, 193.0, 286.0, 290.0, 283.0, 296.0, 246.0, 279.0, 236.0, 246.0, 267.0, 255.0, 285.0, 294.0, 290.0, 283.0, 303.0, 276.0, 283.0, 293.0, 283.0, 293.0, 290.0, 277.0, 275.0, 295.0, 259.0, 260.0, 258.0, 264.0, 257.0, 262.0, 296.0, 280.0, 275.0, 247.0, 278.0, 298.0, 295.0, 278.0, 266.0, 253.0, 292.0, 287.0, 299.0, 277.0, 272.0, 253.0, 287.0, 289.0, 259.0, 271.0, 287.0, 283.0, 281.0, 292.0, 268.0, 262.0, 296.0, 283.0, 293.0, 280.0, 282.0, 291.0, 253.0, 277.0, 253.0, 269.0, 280.0, 290.0, 245.0, 274.0, 264.0, 258.0, 282.0, 288.0, 285.0, 285.0, 279.0, 300.0, 282.0, 291.0, 286.0, 290.0, 257.0, 255.0, 301.0, 281.0, 262.0, 254.0, 279.0, 291.0, 257.0, 273.0, 278.0, 301.0, 295.0, 278.0, 286.0, 296.0, 290.0, 286.0, 282.0, 288.0, 249.0, 273.0, 263.0, 256.0, 295.0, 284.0, 264.0, 258.0, 263.0, 262.0, 292.0, 281.0, 294.0, 285.0, 283.0, 290.0, 286.0, 287.0, 286.0, 293.0, 285.0, 291.0, 287.0, 286.0, 266.0, 307.0, 246.0, 255.0, 280.0, 281.0, 259.0, 271.0, 268.0, 254.0, 288.0, 291.0, 261.0, 261.0, 273.0, 257.0, 293.0, 280.0, 290.0, 289.0, 284.0, 298.0, 260.0, 262.0, 258.0, 272.0, 294.0, 288.0, 204.0, 249.0, 290.0, 283.0, 297.0, 282.0, 264.0, 258.0, 283.0, 296.0, 306.0, 273.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6837424136699048, "mean_inference_ms": 1.205881311016033, "mean_action_processing_ms": 0.13233429471967506, "mean_env_wait_ms": 0.8382726198110214, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4070400, "num_agent_steps_trained": 4070400, "num_env_steps_sampled": 2035200, "num_env_steps_trained": 2035200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2035200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4070400, "timers": {"training_iteration_time_ms": 3599.198, "learn_time_ms": 1093.268, "learn_throughput": 11708.018, "synch_weights_time_ms": 12.338}, "counters": {"num_env_steps_sampled": 2035200, "num_env_steps_trained": 2035200, "num_agent_steps_sampled": 4070400, "num_agent_steps_trained": 4070400}, "done": false, "episodes_total": 5088, "training_iteration": 159, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-17", "timestamp": 1666581017, "time_this_iter_s": 3.688915967941284, "time_total_s": 597.0662965774536, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 597.0662965774536, "timesteps_since_restore": 0, "iterations_since_restore": 159, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.349999999999998, "ram_util_percent": 10.633333333333333}}
+{"custom_metrics": {"sparse_reward_mean": 191.2, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 169.5, "shaped_reward_min": 82, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.34, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.83, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.86, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.37, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.21, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.45, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.97, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.72, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.92, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.67, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.21, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.21, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009515164420008659, "policy_loss": -0.0011683932971209288, "vf_loss": 7.463791847229004, "vf_explained_var": 0.6746877431869507, "kl": 0.0020292492117732763, "entropy": 1.0590016841888428, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2048000, "num_env_steps_trained": 2048000, "num_agent_steps_sampled": 4096000, "num_agent_steps_trained": 4096000}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 282.0, "episode_reward_mean": 551.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 137.0}, "policy_reward_max": {"ppo": 307.0}, "policy_reward_mean": {"ppo": 275.95}, "custom_metrics": {"sparse_reward_mean": 191.2, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 169.5, "shaped_reward_min": 82, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.34, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.83, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.86, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.37, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.21, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.45, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.97, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.72, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.92, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.67, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.21, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.21, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 522.0, 576.0, 573.0, 519.0, 579.0, 576.0, 525.0, 576.0, 530.0, 570.0, 573.0, 530.0, 579.0, 573.0, 573.0, 530.0, 522.0, 570.0, 519.0, 522.0, 570.0, 570.0, 579.0, 573.0, 576.0, 512.0, 582.0, 516.0, 570.0, 530.0, 579.0, 573.0, 582.0, 576.0, 570.0, 522.0, 519.0, 579.0, 522.0, 525.0, 573.0, 579.0, 573.0, 573.0, 579.0, 576.0, 573.0, 573.0, 501.0, 561.0, 530.0, 522.0, 579.0, 522.0, 530.0, 573.0, 579.0, 582.0, 522.0, 530.0, 582.0, 453.0, 573.0, 579.0, 522.0, 579.0, 579.0, 579.0, 522.0, 579.0, 521.0, 573.0, 579.0, 579.0, 573.0, 573.0, 579.0, 533.0, 519.0, 525.0, 522.0, 573.0, 530.0, 476.0, 582.0, 576.0, 282.0, 579.0, 573.0, 573.0, 555.0, 530.0, 579.0, 576.0, 522.0, 579.0, 567.0, 525.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 280.0, 275.0, 247.0, 278.0, 298.0, 295.0, 278.0, 266.0, 253.0, 292.0, 287.0, 299.0, 277.0, 272.0, 253.0, 287.0, 289.0, 259.0, 271.0, 287.0, 283.0, 281.0, 292.0, 268.0, 262.0, 296.0, 283.0, 293.0, 280.0, 282.0, 291.0, 253.0, 277.0, 253.0, 269.0, 280.0, 290.0, 245.0, 274.0, 264.0, 258.0, 282.0, 288.0, 285.0, 285.0, 279.0, 300.0, 282.0, 291.0, 286.0, 290.0, 257.0, 255.0, 301.0, 281.0, 262.0, 254.0, 279.0, 291.0, 257.0, 273.0, 278.0, 301.0, 295.0, 278.0, 286.0, 296.0, 290.0, 286.0, 282.0, 288.0, 249.0, 273.0, 263.0, 256.0, 295.0, 284.0, 264.0, 258.0, 263.0, 262.0, 292.0, 281.0, 294.0, 285.0, 283.0, 290.0, 286.0, 287.0, 286.0, 293.0, 285.0, 291.0, 287.0, 286.0, 266.0, 307.0, 246.0, 255.0, 280.0, 281.0, 259.0, 271.0, 268.0, 254.0, 288.0, 291.0, 261.0, 261.0, 273.0, 257.0, 293.0, 280.0, 290.0, 289.0, 284.0, 298.0, 260.0, 262.0, 258.0, 272.0, 294.0, 288.0, 204.0, 249.0, 290.0, 283.0, 297.0, 282.0, 264.0, 258.0, 283.0, 296.0, 306.0, 273.0, 287.0, 292.0, 261.0, 261.0, 293.0, 286.0, 271.0, 250.0, 283.0, 290.0, 283.0, 296.0, 290.0, 289.0, 282.0, 291.0, 293.0, 280.0, 290.0, 289.0, 265.0, 268.0, 250.0, 269.0, 252.0, 273.0, 248.0, 274.0, 278.0, 295.0, 264.0, 266.0, 240.0, 236.0, 303.0, 279.0, 284.0, 292.0, 145.0, 137.0, 285.0, 294.0, 293.0, 280.0, 284.0, 289.0, 264.0, 291.0, 259.0, 271.0, 289.0, 290.0, 300.0, 276.0, 250.0, 272.0, 285.0, 294.0, 281.0, 286.0, 273.0, 252.0, 254.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6836964337068232, "mean_inference_ms": 1.2058291748355823, "mean_action_processing_ms": 0.13234047861272333, "mean_env_wait_ms": 0.8381292335140906, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 282.0, "episode_reward_mean": 551.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 137.0}, "policy_reward_max": {"ppo": 307.0}, "policy_reward_mean": {"ppo": 275.95}, "hist_stats": {"episode_reward": [576.0, 522.0, 576.0, 573.0, 519.0, 579.0, 576.0, 525.0, 576.0, 530.0, 570.0, 573.0, 530.0, 579.0, 573.0, 573.0, 530.0, 522.0, 570.0, 519.0, 522.0, 570.0, 570.0, 579.0, 573.0, 576.0, 512.0, 582.0, 516.0, 570.0, 530.0, 579.0, 573.0, 582.0, 576.0, 570.0, 522.0, 519.0, 579.0, 522.0, 525.0, 573.0, 579.0, 573.0, 573.0, 579.0, 576.0, 573.0, 573.0, 501.0, 561.0, 530.0, 522.0, 579.0, 522.0, 530.0, 573.0, 579.0, 582.0, 522.0, 530.0, 582.0, 453.0, 573.0, 579.0, 522.0, 579.0, 579.0, 579.0, 522.0, 579.0, 521.0, 573.0, 579.0, 579.0, 573.0, 573.0, 579.0, 533.0, 519.0, 525.0, 522.0, 573.0, 530.0, 476.0, 582.0, 576.0, 282.0, 579.0, 573.0, 573.0, 555.0, 530.0, 579.0, 576.0, 522.0, 579.0, 567.0, 525.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 280.0, 275.0, 247.0, 278.0, 298.0, 295.0, 278.0, 266.0, 253.0, 292.0, 287.0, 299.0, 277.0, 272.0, 253.0, 287.0, 289.0, 259.0, 271.0, 287.0, 283.0, 281.0, 292.0, 268.0, 262.0, 296.0, 283.0, 293.0, 280.0, 282.0, 291.0, 253.0, 277.0, 253.0, 269.0, 280.0, 290.0, 245.0, 274.0, 264.0, 258.0, 282.0, 288.0, 285.0, 285.0, 279.0, 300.0, 282.0, 291.0, 286.0, 290.0, 257.0, 255.0, 301.0, 281.0, 262.0, 254.0, 279.0, 291.0, 257.0, 273.0, 278.0, 301.0, 295.0, 278.0, 286.0, 296.0, 290.0, 286.0, 282.0, 288.0, 249.0, 273.0, 263.0, 256.0, 295.0, 284.0, 264.0, 258.0, 263.0, 262.0, 292.0, 281.0, 294.0, 285.0, 283.0, 290.0, 286.0, 287.0, 286.0, 293.0, 285.0, 291.0, 287.0, 286.0, 266.0, 307.0, 246.0, 255.0, 280.0, 281.0, 259.0, 271.0, 268.0, 254.0, 288.0, 291.0, 261.0, 261.0, 273.0, 257.0, 293.0, 280.0, 290.0, 289.0, 284.0, 298.0, 260.0, 262.0, 258.0, 272.0, 294.0, 288.0, 204.0, 249.0, 290.0, 283.0, 297.0, 282.0, 264.0, 258.0, 283.0, 296.0, 306.0, 273.0, 287.0, 292.0, 261.0, 261.0, 293.0, 286.0, 271.0, 250.0, 283.0, 290.0, 283.0, 296.0, 290.0, 289.0, 282.0, 291.0, 293.0, 280.0, 290.0, 289.0, 265.0, 268.0, 250.0, 269.0, 252.0, 273.0, 248.0, 274.0, 278.0, 295.0, 264.0, 266.0, 240.0, 236.0, 303.0, 279.0, 284.0, 292.0, 145.0, 137.0, 285.0, 294.0, 293.0, 280.0, 284.0, 289.0, 264.0, 291.0, 259.0, 271.0, 289.0, 290.0, 300.0, 276.0, 250.0, 272.0, 285.0, 294.0, 281.0, 286.0, 273.0, 252.0, 254.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6836964337068232, "mean_inference_ms": 1.2058291748355823, "mean_action_processing_ms": 0.13234047861272333, "mean_env_wait_ms": 0.8381292335140906, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4096000, "num_agent_steps_trained": 4096000, "num_env_steps_sampled": 2048000, "num_env_steps_trained": 2048000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2048000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4096000, "timers": {"training_iteration_time_ms": 3595.401, "learn_time_ms": 1086.959, "learn_throughput": 11775.969, "synch_weights_time_ms": 11.722}, "counters": {"num_env_steps_sampled": 2048000, "num_env_steps_trained": 2048000, "num_agent_steps_sampled": 4096000, "num_agent_steps_trained": 4096000}, "done": false, "episodes_total": 5120, "training_iteration": 160, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-21", "timestamp": 1666581021, "time_this_iter_s": 3.5925498008728027, "time_total_s": 600.6588463783264, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 600.6588463783264, "timesteps_since_restore": 0, "iterations_since_restore": 160, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.16, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 191.2, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 169.95, "shaped_reward_min": 82, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.68, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.54, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.21, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.2, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.14, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.95, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.25, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.78, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.63, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.8, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.81, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.14, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.95, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.14, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.95, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019562705419957638, "policy_loss": 0.0017559619154781103, "vf_loss": 7.291747093200684, "vf_explained_var": 0.6828951835632324, "kl": 0.0024195481091737747, "entropy": 1.0577306747436523, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2060800, "num_env_steps_trained": 2060800, "num_agent_steps_sampled": 4121600, "num_agent_steps_trained": 4121600}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 282.0, "episode_reward_mean": 552.35, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 137.0}, "policy_reward_max": {"ppo": 307.0}, "policy_reward_mean": {"ppo": 276.175}, "custom_metrics": {"sparse_reward_mean": 191.2, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 169.95, "shaped_reward_min": 82, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.68, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.54, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.21, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.2, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.14, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.95, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.25, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.78, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.63, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.8, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.81, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.14, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.95, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.14, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.95, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 582.0, 576.0, 570.0, 522.0, 519.0, 579.0, 522.0, 525.0, 573.0, 579.0, 573.0, 573.0, 579.0, 576.0, 573.0, 573.0, 501.0, 561.0, 530.0, 522.0, 579.0, 522.0, 530.0, 573.0, 579.0, 582.0, 522.0, 530.0, 582.0, 453.0, 573.0, 579.0, 522.0, 579.0, 579.0, 579.0, 522.0, 579.0, 521.0, 573.0, 579.0, 579.0, 573.0, 573.0, 579.0, 533.0, 519.0, 525.0, 522.0, 573.0, 530.0, 476.0, 582.0, 576.0, 282.0, 579.0, 573.0, 573.0, 555.0, 530.0, 579.0, 576.0, 522.0, 579.0, 567.0, 525.0, 522.0, 570.0, 525.0, 522.0, 570.0, 579.0, 536.0, 576.0, 522.0, 576.0, 519.0, 533.0, 573.0, 530.0, 576.0, 579.0, 570.0, 579.0, 522.0, 573.0, 519.0, 533.0, 579.0, 530.0, 579.0, 579.0, 579.0, 576.0, 573.0, 525.0, 579.0, 564.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 278.0, 286.0, 296.0, 290.0, 286.0, 282.0, 288.0, 249.0, 273.0, 263.0, 256.0, 295.0, 284.0, 264.0, 258.0, 263.0, 262.0, 292.0, 281.0, 294.0, 285.0, 283.0, 290.0, 286.0, 287.0, 286.0, 293.0, 285.0, 291.0, 287.0, 286.0, 266.0, 307.0, 246.0, 255.0, 280.0, 281.0, 259.0, 271.0, 268.0, 254.0, 288.0, 291.0, 261.0, 261.0, 273.0, 257.0, 293.0, 280.0, 290.0, 289.0, 284.0, 298.0, 260.0, 262.0, 258.0, 272.0, 294.0, 288.0, 204.0, 249.0, 290.0, 283.0, 297.0, 282.0, 264.0, 258.0, 283.0, 296.0, 306.0, 273.0, 287.0, 292.0, 261.0, 261.0, 293.0, 286.0, 271.0, 250.0, 283.0, 290.0, 283.0, 296.0, 290.0, 289.0, 282.0, 291.0, 293.0, 280.0, 290.0, 289.0, 265.0, 268.0, 250.0, 269.0, 252.0, 273.0, 248.0, 274.0, 278.0, 295.0, 264.0, 266.0, 240.0, 236.0, 303.0, 279.0, 284.0, 292.0, 145.0, 137.0, 285.0, 294.0, 293.0, 280.0, 284.0, 289.0, 264.0, 291.0, 259.0, 271.0, 289.0, 290.0, 300.0, 276.0, 250.0, 272.0, 285.0, 294.0, 281.0, 286.0, 273.0, 252.0, 254.0, 268.0, 276.0, 294.0, 267.0, 258.0, 268.0, 254.0, 289.0, 281.0, 296.0, 283.0, 270.0, 266.0, 288.0, 288.0, 258.0, 264.0, 289.0, 287.0, 249.0, 270.0, 270.0, 263.0, 293.0, 280.0, 273.0, 257.0, 291.0, 285.0, 281.0, 298.0, 283.0, 287.0, 293.0, 286.0, 252.0, 270.0, 278.0, 295.0, 267.0, 252.0, 263.0, 270.0, 282.0, 297.0, 268.0, 262.0, 287.0, 292.0, 292.0, 287.0, 284.0, 295.0, 276.0, 300.0, 280.0, 293.0, 261.0, 264.0, 285.0, 294.0, 276.0, 288.0, 283.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6836692176439121, "mean_inference_ms": 1.2057925696042, "mean_action_processing_ms": 0.13234922853721529, "mean_env_wait_ms": 0.8380159133935706, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 282.0, "episode_reward_mean": 552.35, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 137.0}, "policy_reward_max": {"ppo": 307.0}, "policy_reward_mean": {"ppo": 276.175}, "hist_stats": {"episode_reward": [573.0, 582.0, 576.0, 570.0, 522.0, 519.0, 579.0, 522.0, 525.0, 573.0, 579.0, 573.0, 573.0, 579.0, 576.0, 573.0, 573.0, 501.0, 561.0, 530.0, 522.0, 579.0, 522.0, 530.0, 573.0, 579.0, 582.0, 522.0, 530.0, 582.0, 453.0, 573.0, 579.0, 522.0, 579.0, 579.0, 579.0, 522.0, 579.0, 521.0, 573.0, 579.0, 579.0, 573.0, 573.0, 579.0, 533.0, 519.0, 525.0, 522.0, 573.0, 530.0, 476.0, 582.0, 576.0, 282.0, 579.0, 573.0, 573.0, 555.0, 530.0, 579.0, 576.0, 522.0, 579.0, 567.0, 525.0, 522.0, 570.0, 525.0, 522.0, 570.0, 579.0, 536.0, 576.0, 522.0, 576.0, 519.0, 533.0, 573.0, 530.0, 576.0, 579.0, 570.0, 579.0, 522.0, 573.0, 519.0, 533.0, 579.0, 530.0, 579.0, 579.0, 579.0, 576.0, 573.0, 525.0, 579.0, 564.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 278.0, 286.0, 296.0, 290.0, 286.0, 282.0, 288.0, 249.0, 273.0, 263.0, 256.0, 295.0, 284.0, 264.0, 258.0, 263.0, 262.0, 292.0, 281.0, 294.0, 285.0, 283.0, 290.0, 286.0, 287.0, 286.0, 293.0, 285.0, 291.0, 287.0, 286.0, 266.0, 307.0, 246.0, 255.0, 280.0, 281.0, 259.0, 271.0, 268.0, 254.0, 288.0, 291.0, 261.0, 261.0, 273.0, 257.0, 293.0, 280.0, 290.0, 289.0, 284.0, 298.0, 260.0, 262.0, 258.0, 272.0, 294.0, 288.0, 204.0, 249.0, 290.0, 283.0, 297.0, 282.0, 264.0, 258.0, 283.0, 296.0, 306.0, 273.0, 287.0, 292.0, 261.0, 261.0, 293.0, 286.0, 271.0, 250.0, 283.0, 290.0, 283.0, 296.0, 290.0, 289.0, 282.0, 291.0, 293.0, 280.0, 290.0, 289.0, 265.0, 268.0, 250.0, 269.0, 252.0, 273.0, 248.0, 274.0, 278.0, 295.0, 264.0, 266.0, 240.0, 236.0, 303.0, 279.0, 284.0, 292.0, 145.0, 137.0, 285.0, 294.0, 293.0, 280.0, 284.0, 289.0, 264.0, 291.0, 259.0, 271.0, 289.0, 290.0, 300.0, 276.0, 250.0, 272.0, 285.0, 294.0, 281.0, 286.0, 273.0, 252.0, 254.0, 268.0, 276.0, 294.0, 267.0, 258.0, 268.0, 254.0, 289.0, 281.0, 296.0, 283.0, 270.0, 266.0, 288.0, 288.0, 258.0, 264.0, 289.0, 287.0, 249.0, 270.0, 270.0, 263.0, 293.0, 280.0, 273.0, 257.0, 291.0, 285.0, 281.0, 298.0, 283.0, 287.0, 293.0, 286.0, 252.0, 270.0, 278.0, 295.0, 267.0, 252.0, 263.0, 270.0, 282.0, 297.0, 268.0, 262.0, 287.0, 292.0, 292.0, 287.0, 284.0, 295.0, 276.0, 300.0, 280.0, 293.0, 261.0, 264.0, 285.0, 294.0, 276.0, 288.0, 283.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6836692176439121, "mean_inference_ms": 1.2057925696042, "mean_action_processing_ms": 0.13234922853721529, "mean_env_wait_ms": 0.8380159133935706, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4121600, "num_agent_steps_trained": 4121600, "num_env_steps_sampled": 2060800, "num_env_steps_trained": 2060800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2060800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4121600, "timers": {"training_iteration_time_ms": 3609.109, "learn_time_ms": 1096.295, "learn_throughput": 11675.688, "synch_weights_time_ms": 12.439}, "counters": {"num_env_steps_sampled": 2060800, "num_env_steps_trained": 2060800, "num_agent_steps_sampled": 4121600, "num_agent_steps_trained": 4121600}, "done": false, "episodes_total": 5152, "training_iteration": 161, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-25", "timestamp": 1666581025, "time_this_iter_s": 3.668421745300293, "time_total_s": 604.3272681236267, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 604.3272681236267, "timesteps_since_restore": 0, "iterations_since_restore": 161, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.283333333333335, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 191.8, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 170.53, "shaped_reward_min": 82, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.86, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.31, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.45, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.97, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.32, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.78, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.22, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.74, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.87, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.8, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.32, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.78, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.32, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.78, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0015147824306041002, "policy_loss": -0.001708789961412549, "vf_loss": 7.2465362548828125, "vf_explained_var": 0.684109091758728, "kl": 0.002135517541319132, "entropy": 1.0612905025482178, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2073600, "num_env_steps_trained": 2073600, "num_agent_steps_sampled": 4147200, "num_agent_steps_trained": 4147200}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 282.0, "episode_reward_mean": 554.13, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 137.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 277.065}, "custom_metrics": {"sparse_reward_mean": 191.8, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 170.53, "shaped_reward_min": 82, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.86, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.31, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.45, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.97, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.32, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.78, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.22, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.74, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.87, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.8, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.32, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.78, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.32, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.78, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 522.0, 579.0, 579.0, 579.0, 522.0, 579.0, 521.0, 573.0, 579.0, 579.0, 573.0, 573.0, 579.0, 533.0, 519.0, 525.0, 522.0, 573.0, 530.0, 476.0, 582.0, 576.0, 282.0, 579.0, 573.0, 573.0, 555.0, 530.0, 579.0, 576.0, 522.0, 579.0, 567.0, 525.0, 522.0, 570.0, 525.0, 522.0, 570.0, 579.0, 536.0, 576.0, 522.0, 576.0, 519.0, 533.0, 573.0, 530.0, 576.0, 579.0, 570.0, 579.0, 522.0, 573.0, 519.0, 533.0, 579.0, 530.0, 579.0, 579.0, 579.0, 576.0, 573.0, 525.0, 579.0, 564.0, 570.0, 519.0, 567.0, 564.0, 573.0, 576.0, 576.0, 579.0, 533.0, 576.0, 530.0, 525.0, 573.0, 579.0, 570.0, 573.0, 573.0, 570.0, 570.0, 579.0, 516.0, 576.0, 530.0, 482.0, 576.0, 576.0, 527.0, 573.0, 582.0, 579.0, 522.0, 579.0, 561.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 282.0, 264.0, 258.0, 283.0, 296.0, 306.0, 273.0, 287.0, 292.0, 261.0, 261.0, 293.0, 286.0, 271.0, 250.0, 283.0, 290.0, 283.0, 296.0, 290.0, 289.0, 282.0, 291.0, 293.0, 280.0, 290.0, 289.0, 265.0, 268.0, 250.0, 269.0, 252.0, 273.0, 248.0, 274.0, 278.0, 295.0, 264.0, 266.0, 240.0, 236.0, 303.0, 279.0, 284.0, 292.0, 145.0, 137.0, 285.0, 294.0, 293.0, 280.0, 284.0, 289.0, 264.0, 291.0, 259.0, 271.0, 289.0, 290.0, 300.0, 276.0, 250.0, 272.0, 285.0, 294.0, 281.0, 286.0, 273.0, 252.0, 254.0, 268.0, 276.0, 294.0, 267.0, 258.0, 268.0, 254.0, 289.0, 281.0, 296.0, 283.0, 270.0, 266.0, 288.0, 288.0, 258.0, 264.0, 289.0, 287.0, 249.0, 270.0, 270.0, 263.0, 293.0, 280.0, 273.0, 257.0, 291.0, 285.0, 281.0, 298.0, 283.0, 287.0, 293.0, 286.0, 252.0, 270.0, 278.0, 295.0, 267.0, 252.0, 263.0, 270.0, 282.0, 297.0, 268.0, 262.0, 287.0, 292.0, 292.0, 287.0, 284.0, 295.0, 276.0, 300.0, 280.0, 293.0, 261.0, 264.0, 285.0, 294.0, 276.0, 288.0, 283.0, 287.0, 264.0, 255.0, 301.0, 266.0, 279.0, 285.0, 278.0, 295.0, 292.0, 284.0, 300.0, 276.0, 289.0, 290.0, 271.0, 262.0, 288.0, 288.0, 265.0, 265.0, 262.0, 263.0, 286.0, 287.0, 290.0, 289.0, 279.0, 291.0, 288.0, 285.0, 291.0, 282.0, 280.0, 290.0, 293.0, 277.0, 292.0, 287.0, 242.0, 274.0, 278.0, 298.0, 270.0, 260.0, 236.0, 246.0, 295.0, 281.0, 292.0, 284.0, 260.0, 267.0, 283.0, 290.0, 286.0, 296.0, 291.0, 288.0, 258.0, 264.0, 278.0, 301.0, 274.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6836431633165089, "mean_inference_ms": 1.2057460710873107, "mean_action_processing_ms": 0.13235222793419196, "mean_env_wait_ms": 0.8378768884664711, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 282.0, "episode_reward_mean": 554.13, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 137.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 277.065}, "hist_stats": {"episode_reward": [579.0, 522.0, 579.0, 579.0, 579.0, 522.0, 579.0, 521.0, 573.0, 579.0, 579.0, 573.0, 573.0, 579.0, 533.0, 519.0, 525.0, 522.0, 573.0, 530.0, 476.0, 582.0, 576.0, 282.0, 579.0, 573.0, 573.0, 555.0, 530.0, 579.0, 576.0, 522.0, 579.0, 567.0, 525.0, 522.0, 570.0, 525.0, 522.0, 570.0, 579.0, 536.0, 576.0, 522.0, 576.0, 519.0, 533.0, 573.0, 530.0, 576.0, 579.0, 570.0, 579.0, 522.0, 573.0, 519.0, 533.0, 579.0, 530.0, 579.0, 579.0, 579.0, 576.0, 573.0, 525.0, 579.0, 564.0, 570.0, 519.0, 567.0, 564.0, 573.0, 576.0, 576.0, 579.0, 533.0, 576.0, 530.0, 525.0, 573.0, 579.0, 570.0, 573.0, 573.0, 570.0, 570.0, 579.0, 516.0, 576.0, 530.0, 482.0, 576.0, 576.0, 527.0, 573.0, 582.0, 579.0, 522.0, 579.0, 561.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 282.0, 264.0, 258.0, 283.0, 296.0, 306.0, 273.0, 287.0, 292.0, 261.0, 261.0, 293.0, 286.0, 271.0, 250.0, 283.0, 290.0, 283.0, 296.0, 290.0, 289.0, 282.0, 291.0, 293.0, 280.0, 290.0, 289.0, 265.0, 268.0, 250.0, 269.0, 252.0, 273.0, 248.0, 274.0, 278.0, 295.0, 264.0, 266.0, 240.0, 236.0, 303.0, 279.0, 284.0, 292.0, 145.0, 137.0, 285.0, 294.0, 293.0, 280.0, 284.0, 289.0, 264.0, 291.0, 259.0, 271.0, 289.0, 290.0, 300.0, 276.0, 250.0, 272.0, 285.0, 294.0, 281.0, 286.0, 273.0, 252.0, 254.0, 268.0, 276.0, 294.0, 267.0, 258.0, 268.0, 254.0, 289.0, 281.0, 296.0, 283.0, 270.0, 266.0, 288.0, 288.0, 258.0, 264.0, 289.0, 287.0, 249.0, 270.0, 270.0, 263.0, 293.0, 280.0, 273.0, 257.0, 291.0, 285.0, 281.0, 298.0, 283.0, 287.0, 293.0, 286.0, 252.0, 270.0, 278.0, 295.0, 267.0, 252.0, 263.0, 270.0, 282.0, 297.0, 268.0, 262.0, 287.0, 292.0, 292.0, 287.0, 284.0, 295.0, 276.0, 300.0, 280.0, 293.0, 261.0, 264.0, 285.0, 294.0, 276.0, 288.0, 283.0, 287.0, 264.0, 255.0, 301.0, 266.0, 279.0, 285.0, 278.0, 295.0, 292.0, 284.0, 300.0, 276.0, 289.0, 290.0, 271.0, 262.0, 288.0, 288.0, 265.0, 265.0, 262.0, 263.0, 286.0, 287.0, 290.0, 289.0, 279.0, 291.0, 288.0, 285.0, 291.0, 282.0, 280.0, 290.0, 293.0, 277.0, 292.0, 287.0, 242.0, 274.0, 278.0, 298.0, 270.0, 260.0, 236.0, 246.0, 295.0, 281.0, 292.0, 284.0, 260.0, 267.0, 283.0, 290.0, 286.0, 296.0, 291.0, 288.0, 258.0, 264.0, 278.0, 301.0, 274.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6836431633165089, "mean_inference_ms": 1.2057460710873107, "mean_action_processing_ms": 0.13235222793419196, "mean_env_wait_ms": 0.8378768884664711, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4147200, "num_agent_steps_trained": 4147200, "num_env_steps_sampled": 2073600, "num_env_steps_trained": 2073600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2073600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4147200, "timers": {"training_iteration_time_ms": 3604.206, "learn_time_ms": 1093.522, "learn_throughput": 11705.3, "synch_weights_time_ms": 12.527}, "counters": {"num_env_steps_sampled": 2073600, "num_env_steps_trained": 2073600, "num_agent_steps_sampled": 4147200, "num_agent_steps_trained": 4147200}, "done": false, "episodes_total": 5184, "training_iteration": 162, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-28", "timestamp": 1666581028, "time_this_iter_s": 3.661637544631958, "time_total_s": 607.9889056682587, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 607.9889056682587, "timesteps_since_restore": 0, "iterations_since_restore": 162, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.88, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 189.2, "sparse_reward_min": 20, "sparse_reward_max": 200, "shaped_reward_mean": 168.45, "shaped_reward_min": 26, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.64, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.05, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.19, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.72, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.12, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.57, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.2, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.79, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.62, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.72, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.84, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.12, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.57, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.12, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.57, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005986830219626427, "policy_loss": 0.00038289371877908707, "vf_loss": 7.4827728271484375, "vf_explained_var": 0.6809073090553284, "kl": 0.001983621623367071, "entropy": 1.0649769306182861, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2086400, "num_env_steps_trained": 2086400, "num_agent_steps_sampled": 4172800, "num_agent_steps_trained": 4172800}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 66.0, "episode_reward_mean": 546.85, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 307.0}, "policy_reward_mean": {"ppo": 273.425}, "custom_metrics": {"sparse_reward_mean": 189.2, "sparse_reward_min": 20, "sparse_reward_max": 200, "shaped_reward_mean": 168.45, "shaped_reward_min": 26, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.64, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.05, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.19, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.72, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.12, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.57, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.2, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.79, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.62, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.72, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.84, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.12, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.57, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.12, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.57, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 567.0, 525.0, 522.0, 570.0, 525.0, 522.0, 570.0, 579.0, 536.0, 576.0, 522.0, 576.0, 519.0, 533.0, 573.0, 530.0, 576.0, 579.0, 570.0, 579.0, 522.0, 573.0, 519.0, 533.0, 579.0, 530.0, 579.0, 579.0, 579.0, 576.0, 573.0, 525.0, 579.0, 564.0, 570.0, 519.0, 567.0, 564.0, 573.0, 576.0, 576.0, 579.0, 533.0, 576.0, 530.0, 525.0, 573.0, 579.0, 570.0, 573.0, 573.0, 570.0, 570.0, 579.0, 516.0, 576.0, 530.0, 482.0, 576.0, 576.0, 527.0, 573.0, 582.0, 579.0, 522.0, 579.0, 561.0, 567.0, 513.0, 522.0, 66.0, 530.0, 522.0, 533.0, 473.0, 519.0, 516.0, 530.0, 582.0, 527.0, 522.0, 567.0, 527.0, 533.0, 573.0, 576.0, 519.0, 573.0, 516.0, 522.0, 582.0, 522.0, 573.0, 519.0, 576.0, 579.0, 573.0, 525.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 294.0, 281.0, 286.0, 273.0, 252.0, 254.0, 268.0, 276.0, 294.0, 267.0, 258.0, 268.0, 254.0, 289.0, 281.0, 296.0, 283.0, 270.0, 266.0, 288.0, 288.0, 258.0, 264.0, 289.0, 287.0, 249.0, 270.0, 270.0, 263.0, 293.0, 280.0, 273.0, 257.0, 291.0, 285.0, 281.0, 298.0, 283.0, 287.0, 293.0, 286.0, 252.0, 270.0, 278.0, 295.0, 267.0, 252.0, 263.0, 270.0, 282.0, 297.0, 268.0, 262.0, 287.0, 292.0, 292.0, 287.0, 284.0, 295.0, 276.0, 300.0, 280.0, 293.0, 261.0, 264.0, 285.0, 294.0, 276.0, 288.0, 283.0, 287.0, 264.0, 255.0, 301.0, 266.0, 279.0, 285.0, 278.0, 295.0, 292.0, 284.0, 300.0, 276.0, 289.0, 290.0, 271.0, 262.0, 288.0, 288.0, 265.0, 265.0, 262.0, 263.0, 286.0, 287.0, 290.0, 289.0, 279.0, 291.0, 288.0, 285.0, 291.0, 282.0, 280.0, 290.0, 293.0, 277.0, 292.0, 287.0, 242.0, 274.0, 278.0, 298.0, 270.0, 260.0, 236.0, 246.0, 295.0, 281.0, 292.0, 284.0, 260.0, 267.0, 283.0, 290.0, 286.0, 296.0, 291.0, 288.0, 258.0, 264.0, 278.0, 301.0, 274.0, 287.0, 269.0, 298.0, 267.0, 246.0, 265.0, 257.0, 34.0, 32.0, 278.0, 252.0, 248.0, 274.0, 277.0, 256.0, 248.0, 225.0, 266.0, 253.0, 263.0, 253.0, 278.0, 252.0, 307.0, 275.0, 247.0, 280.0, 264.0, 258.0, 285.0, 282.0, 269.0, 258.0, 260.0, 273.0, 285.0, 288.0, 287.0, 289.0, 273.0, 246.0, 285.0, 288.0, 249.0, 267.0, 263.0, 259.0, 286.0, 296.0, 259.0, 263.0, 294.0, 279.0, 270.0, 249.0, 277.0, 299.0, 291.0, 288.0, 282.0, 291.0, 260.0, 265.0, 270.0, 246.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6836350018322129, "mean_inference_ms": 1.2057417308134253, "mean_action_processing_ms": 0.13235457058603195, "mean_env_wait_ms": 0.837765417455068, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 66.0, "episode_reward_mean": 546.85, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 307.0}, "policy_reward_mean": {"ppo": 273.425}, "hist_stats": {"episode_reward": [579.0, 567.0, 525.0, 522.0, 570.0, 525.0, 522.0, 570.0, 579.0, 536.0, 576.0, 522.0, 576.0, 519.0, 533.0, 573.0, 530.0, 576.0, 579.0, 570.0, 579.0, 522.0, 573.0, 519.0, 533.0, 579.0, 530.0, 579.0, 579.0, 579.0, 576.0, 573.0, 525.0, 579.0, 564.0, 570.0, 519.0, 567.0, 564.0, 573.0, 576.0, 576.0, 579.0, 533.0, 576.0, 530.0, 525.0, 573.0, 579.0, 570.0, 573.0, 573.0, 570.0, 570.0, 579.0, 516.0, 576.0, 530.0, 482.0, 576.0, 576.0, 527.0, 573.0, 582.0, 579.0, 522.0, 579.0, 561.0, 567.0, 513.0, 522.0, 66.0, 530.0, 522.0, 533.0, 473.0, 519.0, 516.0, 530.0, 582.0, 527.0, 522.0, 567.0, 527.0, 533.0, 573.0, 576.0, 519.0, 573.0, 516.0, 522.0, 582.0, 522.0, 573.0, 519.0, 576.0, 579.0, 573.0, 525.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 294.0, 281.0, 286.0, 273.0, 252.0, 254.0, 268.0, 276.0, 294.0, 267.0, 258.0, 268.0, 254.0, 289.0, 281.0, 296.0, 283.0, 270.0, 266.0, 288.0, 288.0, 258.0, 264.0, 289.0, 287.0, 249.0, 270.0, 270.0, 263.0, 293.0, 280.0, 273.0, 257.0, 291.0, 285.0, 281.0, 298.0, 283.0, 287.0, 293.0, 286.0, 252.0, 270.0, 278.0, 295.0, 267.0, 252.0, 263.0, 270.0, 282.0, 297.0, 268.0, 262.0, 287.0, 292.0, 292.0, 287.0, 284.0, 295.0, 276.0, 300.0, 280.0, 293.0, 261.0, 264.0, 285.0, 294.0, 276.0, 288.0, 283.0, 287.0, 264.0, 255.0, 301.0, 266.0, 279.0, 285.0, 278.0, 295.0, 292.0, 284.0, 300.0, 276.0, 289.0, 290.0, 271.0, 262.0, 288.0, 288.0, 265.0, 265.0, 262.0, 263.0, 286.0, 287.0, 290.0, 289.0, 279.0, 291.0, 288.0, 285.0, 291.0, 282.0, 280.0, 290.0, 293.0, 277.0, 292.0, 287.0, 242.0, 274.0, 278.0, 298.0, 270.0, 260.0, 236.0, 246.0, 295.0, 281.0, 292.0, 284.0, 260.0, 267.0, 283.0, 290.0, 286.0, 296.0, 291.0, 288.0, 258.0, 264.0, 278.0, 301.0, 274.0, 287.0, 269.0, 298.0, 267.0, 246.0, 265.0, 257.0, 34.0, 32.0, 278.0, 252.0, 248.0, 274.0, 277.0, 256.0, 248.0, 225.0, 266.0, 253.0, 263.0, 253.0, 278.0, 252.0, 307.0, 275.0, 247.0, 280.0, 264.0, 258.0, 285.0, 282.0, 269.0, 258.0, 260.0, 273.0, 285.0, 288.0, 287.0, 289.0, 273.0, 246.0, 285.0, 288.0, 249.0, 267.0, 263.0, 259.0, 286.0, 296.0, 259.0, 263.0, 294.0, 279.0, 270.0, 249.0, 277.0, 299.0, 291.0, 288.0, 282.0, 291.0, 260.0, 265.0, 270.0, 246.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6836350018322129, "mean_inference_ms": 1.2057417308134253, "mean_action_processing_ms": 0.13235457058603195, "mean_env_wait_ms": 0.837765417455068, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4172800, "num_agent_steps_trained": 4172800, "num_env_steps_sampled": 2086400, "num_env_steps_trained": 2086400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2086400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4172800, "timers": {"training_iteration_time_ms": 3598.076, "learn_time_ms": 1088.97, "learn_throughput": 11754.222, "synch_weights_time_ms": 12.913}, "counters": {"num_env_steps_sampled": 2086400, "num_env_steps_trained": 2086400, "num_agent_steps_sampled": 4172800, "num_agent_steps_trained": 4172800}, "done": false, "episodes_total": 5216, "training_iteration": 163, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-32", "timestamp": 1666581032, "time_this_iter_s": 3.6558845043182373, "time_total_s": 611.6447901725769, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 611.6447901725769, "timesteps_since_restore": 0, "iterations_since_restore": 163, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.0, "ram_util_percent": 10.616666666666665}}
+{"custom_metrics": {"sparse_reward_mean": 188.6, "sparse_reward_min": 20, "sparse_reward_max": 200, "shaped_reward_mean": 166.81, "shaped_reward_min": 26, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.62, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.76, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.17, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.34, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.14, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.32, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.3, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.65, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.56, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.91, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.14, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.32, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.14, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.32, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0022505151573568583, "policy_loss": -0.00245782732963562, "vf_loss": 7.431617736816406, "vf_explained_var": 0.670035719871521, "kl": 0.0017980989068746567, "entropy": 1.07169771194458, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2099200, "num_env_steps_trained": 2099200, "num_agent_steps_sampled": 4198400, "num_agent_steps_trained": 4198400}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 66.0, "episode_reward_mean": 544.01, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 307.0}, "policy_reward_mean": {"ppo": 272.005}, "custom_metrics": {"sparse_reward_mean": 188.6, "sparse_reward_min": 20, "sparse_reward_max": 200, "shaped_reward_mean": 166.81, "shaped_reward_min": 26, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.62, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.76, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.17, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.34, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.14, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.32, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.3, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.65, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.56, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.91, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.14, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.32, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.14, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.32, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 579.0, 564.0, 570.0, 519.0, 567.0, 564.0, 573.0, 576.0, 576.0, 579.0, 533.0, 576.0, 530.0, 525.0, 573.0, 579.0, 570.0, 573.0, 573.0, 570.0, 570.0, 579.0, 516.0, 576.0, 530.0, 482.0, 576.0, 576.0, 527.0, 573.0, 582.0, 579.0, 522.0, 579.0, 561.0, 567.0, 513.0, 522.0, 66.0, 530.0, 522.0, 533.0, 473.0, 519.0, 516.0, 530.0, 582.0, 527.0, 522.0, 567.0, 527.0, 533.0, 573.0, 576.0, 519.0, 573.0, 516.0, 522.0, 582.0, 522.0, 573.0, 519.0, 576.0, 579.0, 573.0, 525.0, 516.0, 522.0, 579.0, 576.0, 530.0, 582.0, 522.0, 567.0, 513.0, 582.0, 519.0, 576.0, 525.0, 552.0, 582.0, 525.0, 567.0, 444.0, 522.0, 579.0, 525.0, 525.0, 579.0, 570.0, 573.0, 570.0, 522.0, 573.0, 513.0, 519.0, 564.0, 570.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 264.0, 285.0, 294.0, 276.0, 288.0, 283.0, 287.0, 264.0, 255.0, 301.0, 266.0, 279.0, 285.0, 278.0, 295.0, 292.0, 284.0, 300.0, 276.0, 289.0, 290.0, 271.0, 262.0, 288.0, 288.0, 265.0, 265.0, 262.0, 263.0, 286.0, 287.0, 290.0, 289.0, 279.0, 291.0, 288.0, 285.0, 291.0, 282.0, 280.0, 290.0, 293.0, 277.0, 292.0, 287.0, 242.0, 274.0, 278.0, 298.0, 270.0, 260.0, 236.0, 246.0, 295.0, 281.0, 292.0, 284.0, 260.0, 267.0, 283.0, 290.0, 286.0, 296.0, 291.0, 288.0, 258.0, 264.0, 278.0, 301.0, 274.0, 287.0, 269.0, 298.0, 267.0, 246.0, 265.0, 257.0, 34.0, 32.0, 278.0, 252.0, 248.0, 274.0, 277.0, 256.0, 248.0, 225.0, 266.0, 253.0, 263.0, 253.0, 278.0, 252.0, 307.0, 275.0, 247.0, 280.0, 264.0, 258.0, 285.0, 282.0, 269.0, 258.0, 260.0, 273.0, 285.0, 288.0, 287.0, 289.0, 273.0, 246.0, 285.0, 288.0, 249.0, 267.0, 263.0, 259.0, 286.0, 296.0, 259.0, 263.0, 294.0, 279.0, 270.0, 249.0, 277.0, 299.0, 291.0, 288.0, 282.0, 291.0, 260.0, 265.0, 270.0, 246.0, 261.0, 261.0, 303.0, 276.0, 289.0, 287.0, 262.0, 268.0, 293.0, 289.0, 254.0, 268.0, 277.0, 290.0, 266.0, 247.0, 294.0, 288.0, 265.0, 254.0, 276.0, 300.0, 261.0, 264.0, 288.0, 264.0, 290.0, 292.0, 270.0, 255.0, 281.0, 286.0, 230.0, 214.0, 249.0, 273.0, 287.0, 292.0, 263.0, 262.0, 270.0, 255.0, 273.0, 306.0, 282.0, 288.0, 282.0, 291.0, 291.0, 279.0, 265.0, 257.0, 294.0, 279.0, 264.0, 249.0, 243.0, 276.0, 274.0, 290.0, 280.0, 290.0, 263.0, 256.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6836047998540244, "mean_inference_ms": 1.205887706872132, "mean_action_processing_ms": 0.13234677021859106, "mean_env_wait_ms": 0.8378906222560302, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 66.0, "episode_reward_mean": 544.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 307.0}, "policy_reward_mean": {"ppo": 272.005}, "hist_stats": {"episode_reward": [525.0, 579.0, 564.0, 570.0, 519.0, 567.0, 564.0, 573.0, 576.0, 576.0, 579.0, 533.0, 576.0, 530.0, 525.0, 573.0, 579.0, 570.0, 573.0, 573.0, 570.0, 570.0, 579.0, 516.0, 576.0, 530.0, 482.0, 576.0, 576.0, 527.0, 573.0, 582.0, 579.0, 522.0, 579.0, 561.0, 567.0, 513.0, 522.0, 66.0, 530.0, 522.0, 533.0, 473.0, 519.0, 516.0, 530.0, 582.0, 527.0, 522.0, 567.0, 527.0, 533.0, 573.0, 576.0, 519.0, 573.0, 516.0, 522.0, 582.0, 522.0, 573.0, 519.0, 576.0, 579.0, 573.0, 525.0, 516.0, 522.0, 579.0, 576.0, 530.0, 582.0, 522.0, 567.0, 513.0, 582.0, 519.0, 576.0, 525.0, 552.0, 582.0, 525.0, 567.0, 444.0, 522.0, 579.0, 525.0, 525.0, 579.0, 570.0, 573.0, 570.0, 522.0, 573.0, 513.0, 519.0, 564.0, 570.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 264.0, 285.0, 294.0, 276.0, 288.0, 283.0, 287.0, 264.0, 255.0, 301.0, 266.0, 279.0, 285.0, 278.0, 295.0, 292.0, 284.0, 300.0, 276.0, 289.0, 290.0, 271.0, 262.0, 288.0, 288.0, 265.0, 265.0, 262.0, 263.0, 286.0, 287.0, 290.0, 289.0, 279.0, 291.0, 288.0, 285.0, 291.0, 282.0, 280.0, 290.0, 293.0, 277.0, 292.0, 287.0, 242.0, 274.0, 278.0, 298.0, 270.0, 260.0, 236.0, 246.0, 295.0, 281.0, 292.0, 284.0, 260.0, 267.0, 283.0, 290.0, 286.0, 296.0, 291.0, 288.0, 258.0, 264.0, 278.0, 301.0, 274.0, 287.0, 269.0, 298.0, 267.0, 246.0, 265.0, 257.0, 34.0, 32.0, 278.0, 252.0, 248.0, 274.0, 277.0, 256.0, 248.0, 225.0, 266.0, 253.0, 263.0, 253.0, 278.0, 252.0, 307.0, 275.0, 247.0, 280.0, 264.0, 258.0, 285.0, 282.0, 269.0, 258.0, 260.0, 273.0, 285.0, 288.0, 287.0, 289.0, 273.0, 246.0, 285.0, 288.0, 249.0, 267.0, 263.0, 259.0, 286.0, 296.0, 259.0, 263.0, 294.0, 279.0, 270.0, 249.0, 277.0, 299.0, 291.0, 288.0, 282.0, 291.0, 260.0, 265.0, 270.0, 246.0, 261.0, 261.0, 303.0, 276.0, 289.0, 287.0, 262.0, 268.0, 293.0, 289.0, 254.0, 268.0, 277.0, 290.0, 266.0, 247.0, 294.0, 288.0, 265.0, 254.0, 276.0, 300.0, 261.0, 264.0, 288.0, 264.0, 290.0, 292.0, 270.0, 255.0, 281.0, 286.0, 230.0, 214.0, 249.0, 273.0, 287.0, 292.0, 263.0, 262.0, 270.0, 255.0, 273.0, 306.0, 282.0, 288.0, 282.0, 291.0, 291.0, 279.0, 265.0, 257.0, 294.0, 279.0, 264.0, 249.0, 243.0, 276.0, 274.0, 290.0, 280.0, 290.0, 263.0, 256.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6836047998540244, "mean_inference_ms": 1.205887706872132, "mean_action_processing_ms": 0.13234677021859106, "mean_env_wait_ms": 0.8378906222560302, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4198400, "num_agent_steps_trained": 4198400, "num_env_steps_sampled": 2099200, "num_env_steps_trained": 2099200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2099200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4198400, "timers": {"training_iteration_time_ms": 3611.249, "learn_time_ms": 1083.015, "learn_throughput": 11818.859, "synch_weights_time_ms": 13.057}, "counters": {"num_env_steps_sampled": 2099200, "num_env_steps_trained": 2099200, "num_agent_steps_sampled": 4198400, "num_agent_steps_trained": 4198400}, "done": false, "episodes_total": 5248, "training_iteration": 164, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-36", "timestamp": 1666581036, "time_this_iter_s": 3.8051187992095947, "time_total_s": 615.4499089717865, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 615.4499089717865, "timesteps_since_restore": 0, "iterations_since_restore": 164, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.8, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 188.6, "sparse_reward_min": 20, "sparse_reward_max": 200, "shaped_reward_mean": 166.78, "shaped_reward_min": 26, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.0, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.07, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.53, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.99, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.44, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.31, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.73, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.87, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.99, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.44, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.99, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.44, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00199544383212924, "policy_loss": 0.0018049610080197453, "vf_loss": 7.202265739440918, "vf_explained_var": 0.6753696799278259, "kl": 0.001835355069488287, "entropy": 1.0594866275787354, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2112000, "num_env_steps_trained": 2112000, "num_agent_steps_sampled": 4224000, "num_agent_steps_trained": 4224000}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 66.0, "episode_reward_mean": 543.98, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 309.0}, "policy_reward_mean": {"ppo": 271.99}, "custom_metrics": {"sparse_reward_mean": 188.6, "sparse_reward_min": 20, "sparse_reward_max": 200, "shaped_reward_mean": 166.78, "shaped_reward_min": 26, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.0, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.07, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.53, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.99, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.44, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.31, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.73, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.87, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.99, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.44, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.99, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.44, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 522.0, 579.0, 561.0, 567.0, 513.0, 522.0, 66.0, 530.0, 522.0, 533.0, 473.0, 519.0, 516.0, 530.0, 582.0, 527.0, 522.0, 567.0, 527.0, 533.0, 573.0, 576.0, 519.0, 573.0, 516.0, 522.0, 582.0, 522.0, 573.0, 519.0, 576.0, 579.0, 573.0, 525.0, 516.0, 522.0, 579.0, 576.0, 530.0, 582.0, 522.0, 567.0, 513.0, 582.0, 519.0, 576.0, 525.0, 552.0, 582.0, 525.0, 567.0, 444.0, 522.0, 579.0, 525.0, 525.0, 579.0, 570.0, 573.0, 570.0, 522.0, 573.0, 513.0, 519.0, 564.0, 570.0, 519.0, 576.0, 579.0, 573.0, 527.0, 521.0, 522.0, 530.0, 573.0, 525.0, 527.0, 570.0, 570.0, 576.0, 576.0, 570.0, 579.0, 576.0, 530.0, 570.0, 576.0, 576.0, 573.0, 525.0, 522.0, 576.0, 570.0, 570.0, 570.0, 522.0, 579.0, 570.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 258.0, 264.0, 278.0, 301.0, 274.0, 287.0, 269.0, 298.0, 267.0, 246.0, 265.0, 257.0, 34.0, 32.0, 278.0, 252.0, 248.0, 274.0, 277.0, 256.0, 248.0, 225.0, 266.0, 253.0, 263.0, 253.0, 278.0, 252.0, 307.0, 275.0, 247.0, 280.0, 264.0, 258.0, 285.0, 282.0, 269.0, 258.0, 260.0, 273.0, 285.0, 288.0, 287.0, 289.0, 273.0, 246.0, 285.0, 288.0, 249.0, 267.0, 263.0, 259.0, 286.0, 296.0, 259.0, 263.0, 294.0, 279.0, 270.0, 249.0, 277.0, 299.0, 291.0, 288.0, 282.0, 291.0, 260.0, 265.0, 270.0, 246.0, 261.0, 261.0, 303.0, 276.0, 289.0, 287.0, 262.0, 268.0, 293.0, 289.0, 254.0, 268.0, 277.0, 290.0, 266.0, 247.0, 294.0, 288.0, 265.0, 254.0, 276.0, 300.0, 261.0, 264.0, 288.0, 264.0, 290.0, 292.0, 270.0, 255.0, 281.0, 286.0, 230.0, 214.0, 249.0, 273.0, 287.0, 292.0, 263.0, 262.0, 270.0, 255.0, 273.0, 306.0, 282.0, 288.0, 282.0, 291.0, 291.0, 279.0, 265.0, 257.0, 294.0, 279.0, 264.0, 249.0, 243.0, 276.0, 274.0, 290.0, 280.0, 290.0, 263.0, 256.0, 289.0, 287.0, 290.0, 289.0, 283.0, 290.0, 252.0, 275.0, 264.0, 257.0, 266.0, 256.0, 270.0, 260.0, 290.0, 283.0, 259.0, 266.0, 248.0, 279.0, 283.0, 287.0, 287.0, 283.0, 296.0, 280.0, 300.0, 276.0, 284.0, 286.0, 285.0, 294.0, 283.0, 293.0, 268.0, 262.0, 291.0, 279.0, 290.0, 286.0, 295.0, 281.0, 285.0, 288.0, 274.0, 251.0, 270.0, 252.0, 284.0, 292.0, 285.0, 285.0, 280.0, 290.0, 290.0, 280.0, 254.0, 268.0, 289.0, 290.0, 285.0, 285.0, 270.0, 309.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6835710921630246, "mean_inference_ms": 1.2060351092122445, "mean_action_processing_ms": 0.13234189490578338, "mean_env_wait_ms": 0.8380345595280514, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 66.0, "episode_reward_mean": 543.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 309.0}, "policy_reward_mean": {"ppo": 271.99}, "hist_stats": {"episode_reward": [579.0, 522.0, 579.0, 561.0, 567.0, 513.0, 522.0, 66.0, 530.0, 522.0, 533.0, 473.0, 519.0, 516.0, 530.0, 582.0, 527.0, 522.0, 567.0, 527.0, 533.0, 573.0, 576.0, 519.0, 573.0, 516.0, 522.0, 582.0, 522.0, 573.0, 519.0, 576.0, 579.0, 573.0, 525.0, 516.0, 522.0, 579.0, 576.0, 530.0, 582.0, 522.0, 567.0, 513.0, 582.0, 519.0, 576.0, 525.0, 552.0, 582.0, 525.0, 567.0, 444.0, 522.0, 579.0, 525.0, 525.0, 579.0, 570.0, 573.0, 570.0, 522.0, 573.0, 513.0, 519.0, 564.0, 570.0, 519.0, 576.0, 579.0, 573.0, 527.0, 521.0, 522.0, 530.0, 573.0, 525.0, 527.0, 570.0, 570.0, 576.0, 576.0, 570.0, 579.0, 576.0, 530.0, 570.0, 576.0, 576.0, 573.0, 525.0, 522.0, 576.0, 570.0, 570.0, 570.0, 522.0, 579.0, 570.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 258.0, 264.0, 278.0, 301.0, 274.0, 287.0, 269.0, 298.0, 267.0, 246.0, 265.0, 257.0, 34.0, 32.0, 278.0, 252.0, 248.0, 274.0, 277.0, 256.0, 248.0, 225.0, 266.0, 253.0, 263.0, 253.0, 278.0, 252.0, 307.0, 275.0, 247.0, 280.0, 264.0, 258.0, 285.0, 282.0, 269.0, 258.0, 260.0, 273.0, 285.0, 288.0, 287.0, 289.0, 273.0, 246.0, 285.0, 288.0, 249.0, 267.0, 263.0, 259.0, 286.0, 296.0, 259.0, 263.0, 294.0, 279.0, 270.0, 249.0, 277.0, 299.0, 291.0, 288.0, 282.0, 291.0, 260.0, 265.0, 270.0, 246.0, 261.0, 261.0, 303.0, 276.0, 289.0, 287.0, 262.0, 268.0, 293.0, 289.0, 254.0, 268.0, 277.0, 290.0, 266.0, 247.0, 294.0, 288.0, 265.0, 254.0, 276.0, 300.0, 261.0, 264.0, 288.0, 264.0, 290.0, 292.0, 270.0, 255.0, 281.0, 286.0, 230.0, 214.0, 249.0, 273.0, 287.0, 292.0, 263.0, 262.0, 270.0, 255.0, 273.0, 306.0, 282.0, 288.0, 282.0, 291.0, 291.0, 279.0, 265.0, 257.0, 294.0, 279.0, 264.0, 249.0, 243.0, 276.0, 274.0, 290.0, 280.0, 290.0, 263.0, 256.0, 289.0, 287.0, 290.0, 289.0, 283.0, 290.0, 252.0, 275.0, 264.0, 257.0, 266.0, 256.0, 270.0, 260.0, 290.0, 283.0, 259.0, 266.0, 248.0, 279.0, 283.0, 287.0, 287.0, 283.0, 296.0, 280.0, 300.0, 276.0, 284.0, 286.0, 285.0, 294.0, 283.0, 293.0, 268.0, 262.0, 291.0, 279.0, 290.0, 286.0, 295.0, 281.0, 285.0, 288.0, 274.0, 251.0, 270.0, 252.0, 284.0, 292.0, 285.0, 285.0, 280.0, 290.0, 290.0, 280.0, 254.0, 268.0, 289.0, 290.0, 285.0, 285.0, 270.0, 309.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6835710921630246, "mean_inference_ms": 1.2060351092122445, "mean_action_processing_ms": 0.13234189490578338, "mean_env_wait_ms": 0.8380345595280514, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4224000, "num_agent_steps_trained": 4224000, "num_env_steps_sampled": 2112000, "num_env_steps_trained": 2112000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2112000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4224000, "timers": {"training_iteration_time_ms": 3600.771, "learn_time_ms": 1081.655, "learn_throughput": 11833.72, "synch_weights_time_ms": 12.764}, "counters": {"num_env_steps_sampled": 2112000, "num_env_steps_trained": 2112000, "num_agent_steps_sampled": 4224000, "num_agent_steps_trained": 4224000}, "done": false, "episodes_total": 5280, "training_iteration": 165, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-40", "timestamp": 1666581040, "time_this_iter_s": 3.583083391189575, "time_total_s": 619.0329923629761, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 619.0329923629761, "timesteps_since_restore": 0, "iterations_since_restore": 165, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.116666666666667, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 192.2, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.17, "shaped_reward_min": 124, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.74, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.22, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.28, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.72, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.13, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.62, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.25, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.84, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.0, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.65, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.13, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.62, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.13, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.62, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0029030703008174896, "policy_loss": -0.0030936463735997677, "vf_loss": 7.157515048980713, "vf_explained_var": 0.6804929971694946, "kl": 0.0017253122059628367, "entropy": 1.050349473953247, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2124800, "num_env_steps_trained": 2124800, "num_agent_steps_sampled": 4249600, "num_agent_steps_trained": 4249600}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 444.0, "episode_reward_mean": 553.57, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 214.0}, "policy_reward_max": {"ppo": 309.0}, "policy_reward_mean": {"ppo": 276.785}, "custom_metrics": {"sparse_reward_mean": 192.2, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.17, "shaped_reward_min": 124, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.74, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.22, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.28, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.72, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.13, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.62, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.25, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.84, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.0, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.65, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.13, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.62, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.13, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.62, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 573.0, 525.0, 516.0, 522.0, 579.0, 576.0, 530.0, 582.0, 522.0, 567.0, 513.0, 582.0, 519.0, 576.0, 525.0, 552.0, 582.0, 525.0, 567.0, 444.0, 522.0, 579.0, 525.0, 525.0, 579.0, 570.0, 573.0, 570.0, 522.0, 573.0, 513.0, 519.0, 564.0, 570.0, 519.0, 576.0, 579.0, 573.0, 527.0, 521.0, 522.0, 530.0, 573.0, 525.0, 527.0, 570.0, 570.0, 576.0, 576.0, 570.0, 579.0, 576.0, 530.0, 570.0, 576.0, 576.0, 573.0, 525.0, 522.0, 576.0, 570.0, 570.0, 570.0, 522.0, 579.0, 570.0, 579.0, 522.0, 573.0, 522.0, 570.0, 530.0, 570.0, 573.0, 573.0, 576.0, 573.0, 530.0, 522.0, 573.0, 570.0, 576.0, 519.0, 525.0, 579.0, 570.0, 525.0, 522.0, 576.0, 570.0, 576.0, 525.0, 570.0, 564.0, 525.0, 576.0, 567.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 282.0, 291.0, 260.0, 265.0, 270.0, 246.0, 261.0, 261.0, 303.0, 276.0, 289.0, 287.0, 262.0, 268.0, 293.0, 289.0, 254.0, 268.0, 277.0, 290.0, 266.0, 247.0, 294.0, 288.0, 265.0, 254.0, 276.0, 300.0, 261.0, 264.0, 288.0, 264.0, 290.0, 292.0, 270.0, 255.0, 281.0, 286.0, 230.0, 214.0, 249.0, 273.0, 287.0, 292.0, 263.0, 262.0, 270.0, 255.0, 273.0, 306.0, 282.0, 288.0, 282.0, 291.0, 291.0, 279.0, 265.0, 257.0, 294.0, 279.0, 264.0, 249.0, 243.0, 276.0, 274.0, 290.0, 280.0, 290.0, 263.0, 256.0, 289.0, 287.0, 290.0, 289.0, 283.0, 290.0, 252.0, 275.0, 264.0, 257.0, 266.0, 256.0, 270.0, 260.0, 290.0, 283.0, 259.0, 266.0, 248.0, 279.0, 283.0, 287.0, 287.0, 283.0, 296.0, 280.0, 300.0, 276.0, 284.0, 286.0, 285.0, 294.0, 283.0, 293.0, 268.0, 262.0, 291.0, 279.0, 290.0, 286.0, 295.0, 281.0, 285.0, 288.0, 274.0, 251.0, 270.0, 252.0, 284.0, 292.0, 285.0, 285.0, 280.0, 290.0, 290.0, 280.0, 254.0, 268.0, 289.0, 290.0, 285.0, 285.0, 270.0, 309.0, 275.0, 247.0, 280.0, 293.0, 266.0, 256.0, 303.0, 267.0, 259.0, 271.0, 299.0, 271.0, 290.0, 283.0, 301.0, 272.0, 280.0, 296.0, 279.0, 294.0, 281.0, 249.0, 262.0, 260.0, 283.0, 290.0, 287.0, 283.0, 289.0, 287.0, 257.0, 262.0, 258.0, 267.0, 287.0, 292.0, 278.0, 292.0, 254.0, 271.0, 257.0, 265.0, 300.0, 276.0, 295.0, 275.0, 293.0, 283.0, 265.0, 260.0, 279.0, 291.0, 276.0, 288.0, 256.0, 269.0, 276.0, 300.0, 290.0, 277.0, 292.0, 290.0, 282.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6835082199229373, "mean_inference_ms": 1.2061519460108794, "mean_action_processing_ms": 0.13233370628692803, "mean_env_wait_ms": 0.8381257388110719, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 444.0, "episode_reward_mean": 553.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 214.0}, "policy_reward_max": {"ppo": 309.0}, "policy_reward_mean": {"ppo": 276.785}, "hist_stats": {"episode_reward": [579.0, 573.0, 525.0, 516.0, 522.0, 579.0, 576.0, 530.0, 582.0, 522.0, 567.0, 513.0, 582.0, 519.0, 576.0, 525.0, 552.0, 582.0, 525.0, 567.0, 444.0, 522.0, 579.0, 525.0, 525.0, 579.0, 570.0, 573.0, 570.0, 522.0, 573.0, 513.0, 519.0, 564.0, 570.0, 519.0, 576.0, 579.0, 573.0, 527.0, 521.0, 522.0, 530.0, 573.0, 525.0, 527.0, 570.0, 570.0, 576.0, 576.0, 570.0, 579.0, 576.0, 530.0, 570.0, 576.0, 576.0, 573.0, 525.0, 522.0, 576.0, 570.0, 570.0, 570.0, 522.0, 579.0, 570.0, 579.0, 522.0, 573.0, 522.0, 570.0, 530.0, 570.0, 573.0, 573.0, 576.0, 573.0, 530.0, 522.0, 573.0, 570.0, 576.0, 519.0, 525.0, 579.0, 570.0, 525.0, 522.0, 576.0, 570.0, 576.0, 525.0, 570.0, 564.0, 525.0, 576.0, 567.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 282.0, 291.0, 260.0, 265.0, 270.0, 246.0, 261.0, 261.0, 303.0, 276.0, 289.0, 287.0, 262.0, 268.0, 293.0, 289.0, 254.0, 268.0, 277.0, 290.0, 266.0, 247.0, 294.0, 288.0, 265.0, 254.0, 276.0, 300.0, 261.0, 264.0, 288.0, 264.0, 290.0, 292.0, 270.0, 255.0, 281.0, 286.0, 230.0, 214.0, 249.0, 273.0, 287.0, 292.0, 263.0, 262.0, 270.0, 255.0, 273.0, 306.0, 282.0, 288.0, 282.0, 291.0, 291.0, 279.0, 265.0, 257.0, 294.0, 279.0, 264.0, 249.0, 243.0, 276.0, 274.0, 290.0, 280.0, 290.0, 263.0, 256.0, 289.0, 287.0, 290.0, 289.0, 283.0, 290.0, 252.0, 275.0, 264.0, 257.0, 266.0, 256.0, 270.0, 260.0, 290.0, 283.0, 259.0, 266.0, 248.0, 279.0, 283.0, 287.0, 287.0, 283.0, 296.0, 280.0, 300.0, 276.0, 284.0, 286.0, 285.0, 294.0, 283.0, 293.0, 268.0, 262.0, 291.0, 279.0, 290.0, 286.0, 295.0, 281.0, 285.0, 288.0, 274.0, 251.0, 270.0, 252.0, 284.0, 292.0, 285.0, 285.0, 280.0, 290.0, 290.0, 280.0, 254.0, 268.0, 289.0, 290.0, 285.0, 285.0, 270.0, 309.0, 275.0, 247.0, 280.0, 293.0, 266.0, 256.0, 303.0, 267.0, 259.0, 271.0, 299.0, 271.0, 290.0, 283.0, 301.0, 272.0, 280.0, 296.0, 279.0, 294.0, 281.0, 249.0, 262.0, 260.0, 283.0, 290.0, 287.0, 283.0, 289.0, 287.0, 257.0, 262.0, 258.0, 267.0, 287.0, 292.0, 278.0, 292.0, 254.0, 271.0, 257.0, 265.0, 300.0, 276.0, 295.0, 275.0, 293.0, 283.0, 265.0, 260.0, 279.0, 291.0, 276.0, 288.0, 256.0, 269.0, 276.0, 300.0, 290.0, 277.0, 292.0, 290.0, 282.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6835082199229373, "mean_inference_ms": 1.2061519460108794, "mean_action_processing_ms": 0.13233370628692803, "mean_env_wait_ms": 0.8381257388110719, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4249600, "num_agent_steps_trained": 4249600, "num_env_steps_sampled": 2124800, "num_env_steps_trained": 2124800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2124800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4249600, "timers": {"training_iteration_time_ms": 3607.208, "learn_time_ms": 1092.222, "learn_throughput": 11719.229, "synch_weights_time_ms": 12.734}, "counters": {"num_env_steps_sampled": 2124800, "num_env_steps_trained": 2124800, "num_agent_steps_sampled": 4249600, "num_agent_steps_trained": 4249600}, "done": false, "episodes_total": 5312, "training_iteration": 166, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-44", "timestamp": 1666581044, "time_this_iter_s": 3.674652099609375, "time_total_s": 622.7076444625854, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 622.7076444625854, "timesteps_since_restore": 0, "iterations_since_restore": 166, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.52, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 191.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 168.43, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.63, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.12, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.24, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.71, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.99, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.56, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.22, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.79, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.67, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.91, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.86, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.99, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.56, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.99, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.56, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0013110751751810312, "policy_loss": -0.0015277772909030318, "vf_loss": 7.448184013366699, "vf_explained_var": 0.6839404106140137, "kl": 0.002003659959882498, "entropy": 1.0562318563461304, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2137600, "num_env_steps_trained": 2137600, "num_agent_steps_sampled": 4275200, "num_agent_steps_trained": 4275200}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 180.0, "episode_reward_mean": 551.23, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 84.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 275.615}, "custom_metrics": {"sparse_reward_mean": 191.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 168.43, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.63, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.12, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.24, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.71, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.99, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.56, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.22, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.79, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.67, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.91, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.86, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.99, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.56, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.99, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.56, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 564.0, 570.0, 519.0, 576.0, 579.0, 573.0, 527.0, 521.0, 522.0, 530.0, 573.0, 525.0, 527.0, 570.0, 570.0, 576.0, 576.0, 570.0, 579.0, 576.0, 530.0, 570.0, 576.0, 576.0, 573.0, 525.0, 522.0, 576.0, 570.0, 570.0, 570.0, 522.0, 579.0, 570.0, 579.0, 522.0, 573.0, 522.0, 570.0, 530.0, 570.0, 573.0, 573.0, 576.0, 573.0, 530.0, 522.0, 573.0, 570.0, 576.0, 519.0, 525.0, 579.0, 570.0, 525.0, 522.0, 576.0, 570.0, 576.0, 525.0, 570.0, 564.0, 525.0, 576.0, 567.0, 582.0, 576.0, 522.0, 522.0, 573.0, 630.0, 461.0, 519.0, 519.0, 579.0, 573.0, 576.0, 570.0, 570.0, 570.0, 530.0, 519.0, 519.0, 573.0, 530.0, 570.0, 576.0, 495.0, 570.0, 527.0, 180.0, 576.0, 576.0, 573.0, 522.0, 525.0, 582.0, 576.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [243.0, 276.0, 274.0, 290.0, 280.0, 290.0, 263.0, 256.0, 289.0, 287.0, 290.0, 289.0, 283.0, 290.0, 252.0, 275.0, 264.0, 257.0, 266.0, 256.0, 270.0, 260.0, 290.0, 283.0, 259.0, 266.0, 248.0, 279.0, 283.0, 287.0, 287.0, 283.0, 296.0, 280.0, 300.0, 276.0, 284.0, 286.0, 285.0, 294.0, 283.0, 293.0, 268.0, 262.0, 291.0, 279.0, 290.0, 286.0, 295.0, 281.0, 285.0, 288.0, 274.0, 251.0, 270.0, 252.0, 284.0, 292.0, 285.0, 285.0, 280.0, 290.0, 290.0, 280.0, 254.0, 268.0, 289.0, 290.0, 285.0, 285.0, 270.0, 309.0, 275.0, 247.0, 280.0, 293.0, 266.0, 256.0, 303.0, 267.0, 259.0, 271.0, 299.0, 271.0, 290.0, 283.0, 301.0, 272.0, 280.0, 296.0, 279.0, 294.0, 281.0, 249.0, 262.0, 260.0, 283.0, 290.0, 287.0, 283.0, 289.0, 287.0, 257.0, 262.0, 258.0, 267.0, 287.0, 292.0, 278.0, 292.0, 254.0, 271.0, 257.0, 265.0, 300.0, 276.0, 295.0, 275.0, 293.0, 283.0, 265.0, 260.0, 279.0, 291.0, 276.0, 288.0, 256.0, 269.0, 276.0, 300.0, 290.0, 277.0, 292.0, 290.0, 282.0, 294.0, 252.0, 270.0, 265.0, 257.0, 277.0, 296.0, 306.0, 324.0, 230.0, 231.0, 250.0, 269.0, 256.0, 263.0, 287.0, 292.0, 287.0, 286.0, 278.0, 298.0, 288.0, 282.0, 286.0, 284.0, 280.0, 290.0, 262.0, 268.0, 248.0, 271.0, 275.0, 244.0, 289.0, 284.0, 265.0, 265.0, 285.0, 285.0, 280.0, 296.0, 240.0, 255.0, 286.0, 284.0, 266.0, 261.0, 96.0, 84.0, 285.0, 291.0, 287.0, 289.0, 282.0, 291.0, 273.0, 249.0, 267.0, 258.0, 293.0, 289.0, 288.0, 288.0, 292.0, 278.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6834355732851334, "mean_inference_ms": 1.2060678323861582, "mean_action_processing_ms": 0.13232880100409083, "mean_env_wait_ms": 0.8379897883903749, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 180.0, "episode_reward_mean": 551.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 84.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 275.615}, "hist_stats": {"episode_reward": [519.0, 564.0, 570.0, 519.0, 576.0, 579.0, 573.0, 527.0, 521.0, 522.0, 530.0, 573.0, 525.0, 527.0, 570.0, 570.0, 576.0, 576.0, 570.0, 579.0, 576.0, 530.0, 570.0, 576.0, 576.0, 573.0, 525.0, 522.0, 576.0, 570.0, 570.0, 570.0, 522.0, 579.0, 570.0, 579.0, 522.0, 573.0, 522.0, 570.0, 530.0, 570.0, 573.0, 573.0, 576.0, 573.0, 530.0, 522.0, 573.0, 570.0, 576.0, 519.0, 525.0, 579.0, 570.0, 525.0, 522.0, 576.0, 570.0, 576.0, 525.0, 570.0, 564.0, 525.0, 576.0, 567.0, 582.0, 576.0, 522.0, 522.0, 573.0, 630.0, 461.0, 519.0, 519.0, 579.0, 573.0, 576.0, 570.0, 570.0, 570.0, 530.0, 519.0, 519.0, 573.0, 530.0, 570.0, 576.0, 495.0, 570.0, 527.0, 180.0, 576.0, 576.0, 573.0, 522.0, 525.0, 582.0, 576.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [243.0, 276.0, 274.0, 290.0, 280.0, 290.0, 263.0, 256.0, 289.0, 287.0, 290.0, 289.0, 283.0, 290.0, 252.0, 275.0, 264.0, 257.0, 266.0, 256.0, 270.0, 260.0, 290.0, 283.0, 259.0, 266.0, 248.0, 279.0, 283.0, 287.0, 287.0, 283.0, 296.0, 280.0, 300.0, 276.0, 284.0, 286.0, 285.0, 294.0, 283.0, 293.0, 268.0, 262.0, 291.0, 279.0, 290.0, 286.0, 295.0, 281.0, 285.0, 288.0, 274.0, 251.0, 270.0, 252.0, 284.0, 292.0, 285.0, 285.0, 280.0, 290.0, 290.0, 280.0, 254.0, 268.0, 289.0, 290.0, 285.0, 285.0, 270.0, 309.0, 275.0, 247.0, 280.0, 293.0, 266.0, 256.0, 303.0, 267.0, 259.0, 271.0, 299.0, 271.0, 290.0, 283.0, 301.0, 272.0, 280.0, 296.0, 279.0, 294.0, 281.0, 249.0, 262.0, 260.0, 283.0, 290.0, 287.0, 283.0, 289.0, 287.0, 257.0, 262.0, 258.0, 267.0, 287.0, 292.0, 278.0, 292.0, 254.0, 271.0, 257.0, 265.0, 300.0, 276.0, 295.0, 275.0, 293.0, 283.0, 265.0, 260.0, 279.0, 291.0, 276.0, 288.0, 256.0, 269.0, 276.0, 300.0, 290.0, 277.0, 292.0, 290.0, 282.0, 294.0, 252.0, 270.0, 265.0, 257.0, 277.0, 296.0, 306.0, 324.0, 230.0, 231.0, 250.0, 269.0, 256.0, 263.0, 287.0, 292.0, 287.0, 286.0, 278.0, 298.0, 288.0, 282.0, 286.0, 284.0, 280.0, 290.0, 262.0, 268.0, 248.0, 271.0, 275.0, 244.0, 289.0, 284.0, 265.0, 265.0, 285.0, 285.0, 280.0, 296.0, 240.0, 255.0, 286.0, 284.0, 266.0, 261.0, 96.0, 84.0, 285.0, 291.0, 287.0, 289.0, 282.0, 291.0, 273.0, 249.0, 267.0, 258.0, 293.0, 289.0, 288.0, 288.0, 292.0, 278.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6834355732851334, "mean_inference_ms": 1.2060678323861582, "mean_action_processing_ms": 0.13232880100409083, "mean_env_wait_ms": 0.8379897883903749, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4275200, "num_agent_steps_trained": 4275200, "num_env_steps_sampled": 2137600, "num_env_steps_trained": 2137600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2137600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4275200, "timers": {"training_iteration_time_ms": 3605.504, "learn_time_ms": 1092.752, "learn_throughput": 11713.543, "synch_weights_time_ms": 12.734}, "counters": {"num_env_steps_sampled": 2137600, "num_env_steps_trained": 2137600, "num_agent_steps_sampled": 4275200, "num_agent_steps_trained": 4275200}, "done": false, "episodes_total": 5344, "training_iteration": 167, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-48", "timestamp": 1666581048, "time_this_iter_s": 3.6411495208740234, "time_total_s": 626.3487939834595, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 626.3487939834595, "timesteps_since_restore": 0, "iterations_since_restore": 167, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.3, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 192.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 169.53, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.98, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.93, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.65, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.58, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.38, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.43, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.61, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.69, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.67, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.38, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.43, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.38, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.43, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00048789544962346554, "policy_loss": -0.0007036026800051332, "vf_loss": 7.372027397155762, "vf_explained_var": 0.699303388595581, "kl": 0.0018403129652142525, "entropy": 1.0429902076721191, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2150400, "num_env_steps_trained": 2150400, "num_agent_steps_sampled": 4300800, "num_agent_steps_trained": 4300800}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 180.0, "episode_reward_mean": 553.53, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 84.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 276.765}, "custom_metrics": {"sparse_reward_mean": 192.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 169.53, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.98, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.93, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.65, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.58, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.38, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.43, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.61, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.69, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.67, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.38, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.43, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.38, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.43, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 579.0, 570.0, 579.0, 522.0, 573.0, 522.0, 570.0, 530.0, 570.0, 573.0, 573.0, 576.0, 573.0, 530.0, 522.0, 573.0, 570.0, 576.0, 519.0, 525.0, 579.0, 570.0, 525.0, 522.0, 576.0, 570.0, 576.0, 525.0, 570.0, 564.0, 525.0, 576.0, 567.0, 582.0, 576.0, 522.0, 522.0, 573.0, 630.0, 461.0, 519.0, 519.0, 579.0, 573.0, 576.0, 570.0, 570.0, 570.0, 530.0, 519.0, 519.0, 573.0, 530.0, 570.0, 576.0, 495.0, 570.0, 527.0, 180.0, 576.0, 576.0, 573.0, 522.0, 525.0, 582.0, 576.0, 570.0, 533.0, 573.0, 582.0, 579.0, 579.0, 579.0, 522.0, 579.0, 522.0, 579.0, 567.0, 570.0, 522.0, 576.0, 522.0, 576.0, 530.0, 576.0, 579.0, 525.0, 573.0, 582.0, 564.0, 579.0, 579.0, 522.0, 630.0, 576.0, 576.0, 573.0, 576.0, 530.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 268.0, 289.0, 290.0, 285.0, 285.0, 270.0, 309.0, 275.0, 247.0, 280.0, 293.0, 266.0, 256.0, 303.0, 267.0, 259.0, 271.0, 299.0, 271.0, 290.0, 283.0, 301.0, 272.0, 280.0, 296.0, 279.0, 294.0, 281.0, 249.0, 262.0, 260.0, 283.0, 290.0, 287.0, 283.0, 289.0, 287.0, 257.0, 262.0, 258.0, 267.0, 287.0, 292.0, 278.0, 292.0, 254.0, 271.0, 257.0, 265.0, 300.0, 276.0, 295.0, 275.0, 293.0, 283.0, 265.0, 260.0, 279.0, 291.0, 276.0, 288.0, 256.0, 269.0, 276.0, 300.0, 290.0, 277.0, 292.0, 290.0, 282.0, 294.0, 252.0, 270.0, 265.0, 257.0, 277.0, 296.0, 306.0, 324.0, 230.0, 231.0, 250.0, 269.0, 256.0, 263.0, 287.0, 292.0, 287.0, 286.0, 278.0, 298.0, 288.0, 282.0, 286.0, 284.0, 280.0, 290.0, 262.0, 268.0, 248.0, 271.0, 275.0, 244.0, 289.0, 284.0, 265.0, 265.0, 285.0, 285.0, 280.0, 296.0, 240.0, 255.0, 286.0, 284.0, 266.0, 261.0, 96.0, 84.0, 285.0, 291.0, 287.0, 289.0, 282.0, 291.0, 273.0, 249.0, 267.0, 258.0, 293.0, 289.0, 288.0, 288.0, 292.0, 278.0, 268.0, 265.0, 278.0, 295.0, 285.0, 297.0, 283.0, 296.0, 282.0, 297.0, 278.0, 301.0, 269.0, 253.0, 282.0, 297.0, 265.0, 257.0, 288.0, 291.0, 287.0, 280.0, 294.0, 276.0, 260.0, 262.0, 285.0, 291.0, 264.0, 258.0, 292.0, 284.0, 259.0, 271.0, 285.0, 291.0, 277.0, 302.0, 251.0, 274.0, 283.0, 290.0, 300.0, 282.0, 279.0, 285.0, 286.0, 293.0, 277.0, 302.0, 251.0, 271.0, 310.0, 320.0, 291.0, 285.0, 283.0, 293.0, 275.0, 298.0, 291.0, 285.0, 256.0, 274.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6833894892310337, "mean_inference_ms": 1.2059795926228936, "mean_action_processing_ms": 0.13232587728396183, "mean_env_wait_ms": 0.8377997008246847, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 180.0, "episode_reward_mean": 553.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 84.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 276.765}, "hist_stats": {"episode_reward": [522.0, 579.0, 570.0, 579.0, 522.0, 573.0, 522.0, 570.0, 530.0, 570.0, 573.0, 573.0, 576.0, 573.0, 530.0, 522.0, 573.0, 570.0, 576.0, 519.0, 525.0, 579.0, 570.0, 525.0, 522.0, 576.0, 570.0, 576.0, 525.0, 570.0, 564.0, 525.0, 576.0, 567.0, 582.0, 576.0, 522.0, 522.0, 573.0, 630.0, 461.0, 519.0, 519.0, 579.0, 573.0, 576.0, 570.0, 570.0, 570.0, 530.0, 519.0, 519.0, 573.0, 530.0, 570.0, 576.0, 495.0, 570.0, 527.0, 180.0, 576.0, 576.0, 573.0, 522.0, 525.0, 582.0, 576.0, 570.0, 533.0, 573.0, 582.0, 579.0, 579.0, 579.0, 522.0, 579.0, 522.0, 579.0, 567.0, 570.0, 522.0, 576.0, 522.0, 576.0, 530.0, 576.0, 579.0, 525.0, 573.0, 582.0, 564.0, 579.0, 579.0, 522.0, 630.0, 576.0, 576.0, 573.0, 576.0, 530.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 268.0, 289.0, 290.0, 285.0, 285.0, 270.0, 309.0, 275.0, 247.0, 280.0, 293.0, 266.0, 256.0, 303.0, 267.0, 259.0, 271.0, 299.0, 271.0, 290.0, 283.0, 301.0, 272.0, 280.0, 296.0, 279.0, 294.0, 281.0, 249.0, 262.0, 260.0, 283.0, 290.0, 287.0, 283.0, 289.0, 287.0, 257.0, 262.0, 258.0, 267.0, 287.0, 292.0, 278.0, 292.0, 254.0, 271.0, 257.0, 265.0, 300.0, 276.0, 295.0, 275.0, 293.0, 283.0, 265.0, 260.0, 279.0, 291.0, 276.0, 288.0, 256.0, 269.0, 276.0, 300.0, 290.0, 277.0, 292.0, 290.0, 282.0, 294.0, 252.0, 270.0, 265.0, 257.0, 277.0, 296.0, 306.0, 324.0, 230.0, 231.0, 250.0, 269.0, 256.0, 263.0, 287.0, 292.0, 287.0, 286.0, 278.0, 298.0, 288.0, 282.0, 286.0, 284.0, 280.0, 290.0, 262.0, 268.0, 248.0, 271.0, 275.0, 244.0, 289.0, 284.0, 265.0, 265.0, 285.0, 285.0, 280.0, 296.0, 240.0, 255.0, 286.0, 284.0, 266.0, 261.0, 96.0, 84.0, 285.0, 291.0, 287.0, 289.0, 282.0, 291.0, 273.0, 249.0, 267.0, 258.0, 293.0, 289.0, 288.0, 288.0, 292.0, 278.0, 268.0, 265.0, 278.0, 295.0, 285.0, 297.0, 283.0, 296.0, 282.0, 297.0, 278.0, 301.0, 269.0, 253.0, 282.0, 297.0, 265.0, 257.0, 288.0, 291.0, 287.0, 280.0, 294.0, 276.0, 260.0, 262.0, 285.0, 291.0, 264.0, 258.0, 292.0, 284.0, 259.0, 271.0, 285.0, 291.0, 277.0, 302.0, 251.0, 274.0, 283.0, 290.0, 300.0, 282.0, 279.0, 285.0, 286.0, 293.0, 277.0, 302.0, 251.0, 271.0, 310.0, 320.0, 291.0, 285.0, 283.0, 293.0, 275.0, 298.0, 291.0, 285.0, 256.0, 274.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6833894892310337, "mean_inference_ms": 1.2059795926228936, "mean_action_processing_ms": 0.13232587728396183, "mean_env_wait_ms": 0.8377997008246847, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4300800, "num_agent_steps_trained": 4300800, "num_env_steps_sampled": 2150400, "num_env_steps_trained": 2150400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2150400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4300800, "timers": {"training_iteration_time_ms": 3618.811, "learn_time_ms": 1103.342, "learn_throughput": 11601.121, "synch_weights_time_ms": 12.497}, "counters": {"num_env_steps_sampled": 2150400, "num_env_steps_trained": 2150400, "num_agent_steps_sampled": 4300800, "num_agent_steps_trained": 4300800}, "done": false, "episodes_total": 5376, "training_iteration": 168, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-52", "timestamp": 1666581052, "time_this_iter_s": 3.766408681869507, "time_total_s": 630.115202665329, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 630.115202665329, "timesteps_since_restore": 0, "iterations_since_restore": 168, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.916666666666664, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 191.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 170.25, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.22, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.85, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.86, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.62, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.63, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.4, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.12, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.57, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.55, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.68, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.62, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.63, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.4, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.63, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.4, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001223443541675806, "policy_loss": -0.0014694086275994778, "vf_loss": 7.61738920211792, "vf_explained_var": 0.6677297949790955, "kl": 0.001981202280148864, "entropy": 1.031548261642456, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2163200, "num_env_steps_trained": 2163200, "num_agent_steps_sampled": 4326400, "num_agent_steps_trained": 4326400}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 180.0, "episode_reward_mean": 553.85, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 84.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 276.925}, "custom_metrics": {"sparse_reward_mean": 191.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 170.25, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.22, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.85, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.86, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.62, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.63, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.4, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.12, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.57, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.55, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.68, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.62, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.63, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.4, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.63, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.4, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 567.0, 582.0, 576.0, 522.0, 522.0, 573.0, 630.0, 461.0, 519.0, 519.0, 579.0, 573.0, 576.0, 570.0, 570.0, 570.0, 530.0, 519.0, 519.0, 573.0, 530.0, 570.0, 576.0, 495.0, 570.0, 527.0, 180.0, 576.0, 576.0, 573.0, 522.0, 525.0, 582.0, 576.0, 570.0, 533.0, 573.0, 582.0, 579.0, 579.0, 579.0, 522.0, 579.0, 522.0, 579.0, 567.0, 570.0, 522.0, 576.0, 522.0, 576.0, 530.0, 576.0, 579.0, 525.0, 573.0, 582.0, 564.0, 579.0, 579.0, 522.0, 630.0, 576.0, 576.0, 573.0, 576.0, 530.0, 527.0, 576.0, 630.0, 527.0, 582.0, 579.0, 573.0, 579.0, 527.0, 567.0, 522.0, 579.0, 579.0, 536.0, 582.0, 484.0, 582.0, 576.0, 503.0, 570.0, 573.0, 530.0, 525.0, 536.0, 525.0, 573.0, 582.0, 579.0, 576.0, 573.0, 582.0, 447.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 300.0, 290.0, 277.0, 292.0, 290.0, 282.0, 294.0, 252.0, 270.0, 265.0, 257.0, 277.0, 296.0, 306.0, 324.0, 230.0, 231.0, 250.0, 269.0, 256.0, 263.0, 287.0, 292.0, 287.0, 286.0, 278.0, 298.0, 288.0, 282.0, 286.0, 284.0, 280.0, 290.0, 262.0, 268.0, 248.0, 271.0, 275.0, 244.0, 289.0, 284.0, 265.0, 265.0, 285.0, 285.0, 280.0, 296.0, 240.0, 255.0, 286.0, 284.0, 266.0, 261.0, 96.0, 84.0, 285.0, 291.0, 287.0, 289.0, 282.0, 291.0, 273.0, 249.0, 267.0, 258.0, 293.0, 289.0, 288.0, 288.0, 292.0, 278.0, 268.0, 265.0, 278.0, 295.0, 285.0, 297.0, 283.0, 296.0, 282.0, 297.0, 278.0, 301.0, 269.0, 253.0, 282.0, 297.0, 265.0, 257.0, 288.0, 291.0, 287.0, 280.0, 294.0, 276.0, 260.0, 262.0, 285.0, 291.0, 264.0, 258.0, 292.0, 284.0, 259.0, 271.0, 285.0, 291.0, 277.0, 302.0, 251.0, 274.0, 283.0, 290.0, 300.0, 282.0, 279.0, 285.0, 286.0, 293.0, 277.0, 302.0, 251.0, 271.0, 310.0, 320.0, 291.0, 285.0, 283.0, 293.0, 275.0, 298.0, 291.0, 285.0, 256.0, 274.0, 257.0, 270.0, 292.0, 284.0, 318.0, 312.0, 277.0, 250.0, 282.0, 300.0, 290.0, 289.0, 290.0, 283.0, 285.0, 294.0, 267.0, 260.0, 286.0, 281.0, 281.0, 241.0, 292.0, 287.0, 281.0, 298.0, 268.0, 268.0, 300.0, 282.0, 236.0, 248.0, 295.0, 287.0, 291.0, 285.0, 251.0, 252.0, 283.0, 287.0, 286.0, 287.0, 258.0, 272.0, 266.0, 259.0, 268.0, 268.0, 270.0, 255.0, 278.0, 295.0, 283.0, 299.0, 281.0, 298.0, 297.0, 279.0, 283.0, 290.0, 286.0, 296.0, 226.0, 221.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6834106688383577, "mean_inference_ms": 1.2059047272839276, "mean_action_processing_ms": 0.13233080968882707, "mean_env_wait_ms": 0.8376732814698284, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 180.0, "episode_reward_mean": 553.85, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 84.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 276.925}, "hist_stats": {"episode_reward": [576.0, 567.0, 582.0, 576.0, 522.0, 522.0, 573.0, 630.0, 461.0, 519.0, 519.0, 579.0, 573.0, 576.0, 570.0, 570.0, 570.0, 530.0, 519.0, 519.0, 573.0, 530.0, 570.0, 576.0, 495.0, 570.0, 527.0, 180.0, 576.0, 576.0, 573.0, 522.0, 525.0, 582.0, 576.0, 570.0, 533.0, 573.0, 582.0, 579.0, 579.0, 579.0, 522.0, 579.0, 522.0, 579.0, 567.0, 570.0, 522.0, 576.0, 522.0, 576.0, 530.0, 576.0, 579.0, 525.0, 573.0, 582.0, 564.0, 579.0, 579.0, 522.0, 630.0, 576.0, 576.0, 573.0, 576.0, 530.0, 527.0, 576.0, 630.0, 527.0, 582.0, 579.0, 573.0, 579.0, 527.0, 567.0, 522.0, 579.0, 579.0, 536.0, 582.0, 484.0, 582.0, 576.0, 503.0, 570.0, 573.0, 530.0, 525.0, 536.0, 525.0, 573.0, 582.0, 579.0, 576.0, 573.0, 582.0, 447.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 300.0, 290.0, 277.0, 292.0, 290.0, 282.0, 294.0, 252.0, 270.0, 265.0, 257.0, 277.0, 296.0, 306.0, 324.0, 230.0, 231.0, 250.0, 269.0, 256.0, 263.0, 287.0, 292.0, 287.0, 286.0, 278.0, 298.0, 288.0, 282.0, 286.0, 284.0, 280.0, 290.0, 262.0, 268.0, 248.0, 271.0, 275.0, 244.0, 289.0, 284.0, 265.0, 265.0, 285.0, 285.0, 280.0, 296.0, 240.0, 255.0, 286.0, 284.0, 266.0, 261.0, 96.0, 84.0, 285.0, 291.0, 287.0, 289.0, 282.0, 291.0, 273.0, 249.0, 267.0, 258.0, 293.0, 289.0, 288.0, 288.0, 292.0, 278.0, 268.0, 265.0, 278.0, 295.0, 285.0, 297.0, 283.0, 296.0, 282.0, 297.0, 278.0, 301.0, 269.0, 253.0, 282.0, 297.0, 265.0, 257.0, 288.0, 291.0, 287.0, 280.0, 294.0, 276.0, 260.0, 262.0, 285.0, 291.0, 264.0, 258.0, 292.0, 284.0, 259.0, 271.0, 285.0, 291.0, 277.0, 302.0, 251.0, 274.0, 283.0, 290.0, 300.0, 282.0, 279.0, 285.0, 286.0, 293.0, 277.0, 302.0, 251.0, 271.0, 310.0, 320.0, 291.0, 285.0, 283.0, 293.0, 275.0, 298.0, 291.0, 285.0, 256.0, 274.0, 257.0, 270.0, 292.0, 284.0, 318.0, 312.0, 277.0, 250.0, 282.0, 300.0, 290.0, 289.0, 290.0, 283.0, 285.0, 294.0, 267.0, 260.0, 286.0, 281.0, 281.0, 241.0, 292.0, 287.0, 281.0, 298.0, 268.0, 268.0, 300.0, 282.0, 236.0, 248.0, 295.0, 287.0, 291.0, 285.0, 251.0, 252.0, 283.0, 287.0, 286.0, 287.0, 258.0, 272.0, 266.0, 259.0, 268.0, 268.0, 270.0, 255.0, 278.0, 295.0, 283.0, 299.0, 281.0, 298.0, 297.0, 279.0, 283.0, 290.0, 286.0, 296.0, 226.0, 221.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6834106688383577, "mean_inference_ms": 1.2059047272839276, "mean_action_processing_ms": 0.13233080968882707, "mean_env_wait_ms": 0.8376732814698284, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4326400, "num_agent_steps_trained": 4326400, "num_env_steps_sampled": 2163200, "num_env_steps_trained": 2163200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2163200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4326400, "timers": {"training_iteration_time_ms": 3620.158, "learn_time_ms": 1104.52, "learn_throughput": 11588.742, "synch_weights_time_ms": 12.513}, "counters": {"num_env_steps_sampled": 2163200, "num_env_steps_trained": 2163200, "num_agent_steps_sampled": 4326400, "num_agent_steps_trained": 4326400}, "done": false, "episodes_total": 5408, "training_iteration": 169, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-56", "timestamp": 1666581056, "time_this_iter_s": 3.7082767486572266, "time_total_s": 633.8234794139862, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 633.8234794139862, "timesteps_since_restore": 0, "iterations_since_restore": 169, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.883333333333333, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 194.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 172.21, "shaped_reward_min": 127, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.52, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.04, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.13, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.77, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.92, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.6, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.22, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.58, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.72, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.68, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.08, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.92, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.6, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.92, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.6, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00020206667250022292, "policy_loss": -2.2467749658972025e-05, "vf_loss": 7.46675968170166, "vf_explained_var": 0.680292010307312, "kl": 0.0021671182475984097, "entropy": 1.0442825555801392, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2176000, "num_env_steps_trained": 2176000, "num_agent_steps_sampled": 4352000, "num_agent_steps_trained": 4352000}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 447.0, "episode_reward_mean": 560.21, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 221.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 280.105}, "custom_metrics": {"sparse_reward_mean": 194.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 172.21, "shaped_reward_min": 127, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.52, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.04, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.13, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.77, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.92, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.6, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.22, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.58, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.72, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.68, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.08, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.92, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.6, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.92, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.6, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 582.0, 576.0, 570.0, 533.0, 573.0, 582.0, 579.0, 579.0, 579.0, 522.0, 579.0, 522.0, 579.0, 567.0, 570.0, 522.0, 576.0, 522.0, 576.0, 530.0, 576.0, 579.0, 525.0, 573.0, 582.0, 564.0, 579.0, 579.0, 522.0, 630.0, 576.0, 576.0, 573.0, 576.0, 530.0, 527.0, 576.0, 630.0, 527.0, 582.0, 579.0, 573.0, 579.0, 527.0, 567.0, 522.0, 579.0, 579.0, 536.0, 582.0, 484.0, 582.0, 576.0, 503.0, 570.0, 573.0, 530.0, 525.0, 536.0, 525.0, 573.0, 582.0, 579.0, 576.0, 573.0, 582.0, 447.0, 522.0, 582.0, 513.0, 567.0, 576.0, 573.0, 570.0, 519.0, 579.0, 579.0, 579.0, 576.0, 570.0, 630.0, 527.0, 573.0, 530.0, 509.0, 579.0, 576.0, 519.0, 579.0, 579.0, 576.0, 579.0, 515.0, 573.0, 576.0, 573.0, 582.0, 525.0, 552.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 258.0, 293.0, 289.0, 288.0, 288.0, 292.0, 278.0, 268.0, 265.0, 278.0, 295.0, 285.0, 297.0, 283.0, 296.0, 282.0, 297.0, 278.0, 301.0, 269.0, 253.0, 282.0, 297.0, 265.0, 257.0, 288.0, 291.0, 287.0, 280.0, 294.0, 276.0, 260.0, 262.0, 285.0, 291.0, 264.0, 258.0, 292.0, 284.0, 259.0, 271.0, 285.0, 291.0, 277.0, 302.0, 251.0, 274.0, 283.0, 290.0, 300.0, 282.0, 279.0, 285.0, 286.0, 293.0, 277.0, 302.0, 251.0, 271.0, 310.0, 320.0, 291.0, 285.0, 283.0, 293.0, 275.0, 298.0, 291.0, 285.0, 256.0, 274.0, 257.0, 270.0, 292.0, 284.0, 318.0, 312.0, 277.0, 250.0, 282.0, 300.0, 290.0, 289.0, 290.0, 283.0, 285.0, 294.0, 267.0, 260.0, 286.0, 281.0, 281.0, 241.0, 292.0, 287.0, 281.0, 298.0, 268.0, 268.0, 300.0, 282.0, 236.0, 248.0, 295.0, 287.0, 291.0, 285.0, 251.0, 252.0, 283.0, 287.0, 286.0, 287.0, 258.0, 272.0, 266.0, 259.0, 268.0, 268.0, 270.0, 255.0, 278.0, 295.0, 283.0, 299.0, 281.0, 298.0, 297.0, 279.0, 283.0, 290.0, 286.0, 296.0, 226.0, 221.0, 260.0, 262.0, 292.0, 290.0, 262.0, 251.0, 289.0, 278.0, 281.0, 295.0, 268.0, 305.0, 292.0, 278.0, 256.0, 263.0, 279.0, 300.0, 296.0, 283.0, 285.0, 294.0, 295.0, 281.0, 291.0, 279.0, 307.0, 323.0, 254.0, 273.0, 283.0, 290.0, 250.0, 280.0, 256.0, 253.0, 290.0, 289.0, 274.0, 302.0, 266.0, 253.0, 298.0, 281.0, 283.0, 296.0, 281.0, 295.0, 289.0, 290.0, 258.0, 257.0, 302.0, 271.0, 305.0, 271.0, 277.0, 296.0, 289.0, 293.0, 256.0, 269.0, 273.0, 279.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6834095161080666, "mean_inference_ms": 1.2058352661434038, "mean_action_processing_ms": 0.13233467371881377, "mean_env_wait_ms": 0.8375451378942773, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 447.0, "episode_reward_mean": 560.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 221.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 280.105}, "hist_stats": {"episode_reward": [525.0, 582.0, 576.0, 570.0, 533.0, 573.0, 582.0, 579.0, 579.0, 579.0, 522.0, 579.0, 522.0, 579.0, 567.0, 570.0, 522.0, 576.0, 522.0, 576.0, 530.0, 576.0, 579.0, 525.0, 573.0, 582.0, 564.0, 579.0, 579.0, 522.0, 630.0, 576.0, 576.0, 573.0, 576.0, 530.0, 527.0, 576.0, 630.0, 527.0, 582.0, 579.0, 573.0, 579.0, 527.0, 567.0, 522.0, 579.0, 579.0, 536.0, 582.0, 484.0, 582.0, 576.0, 503.0, 570.0, 573.0, 530.0, 525.0, 536.0, 525.0, 573.0, 582.0, 579.0, 576.0, 573.0, 582.0, 447.0, 522.0, 582.0, 513.0, 567.0, 576.0, 573.0, 570.0, 519.0, 579.0, 579.0, 579.0, 576.0, 570.0, 630.0, 527.0, 573.0, 530.0, 509.0, 579.0, 576.0, 519.0, 579.0, 579.0, 576.0, 579.0, 515.0, 573.0, 576.0, 573.0, 582.0, 525.0, 552.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 258.0, 293.0, 289.0, 288.0, 288.0, 292.0, 278.0, 268.0, 265.0, 278.0, 295.0, 285.0, 297.0, 283.0, 296.0, 282.0, 297.0, 278.0, 301.0, 269.0, 253.0, 282.0, 297.0, 265.0, 257.0, 288.0, 291.0, 287.0, 280.0, 294.0, 276.0, 260.0, 262.0, 285.0, 291.0, 264.0, 258.0, 292.0, 284.0, 259.0, 271.0, 285.0, 291.0, 277.0, 302.0, 251.0, 274.0, 283.0, 290.0, 300.0, 282.0, 279.0, 285.0, 286.0, 293.0, 277.0, 302.0, 251.0, 271.0, 310.0, 320.0, 291.0, 285.0, 283.0, 293.0, 275.0, 298.0, 291.0, 285.0, 256.0, 274.0, 257.0, 270.0, 292.0, 284.0, 318.0, 312.0, 277.0, 250.0, 282.0, 300.0, 290.0, 289.0, 290.0, 283.0, 285.0, 294.0, 267.0, 260.0, 286.0, 281.0, 281.0, 241.0, 292.0, 287.0, 281.0, 298.0, 268.0, 268.0, 300.0, 282.0, 236.0, 248.0, 295.0, 287.0, 291.0, 285.0, 251.0, 252.0, 283.0, 287.0, 286.0, 287.0, 258.0, 272.0, 266.0, 259.0, 268.0, 268.0, 270.0, 255.0, 278.0, 295.0, 283.0, 299.0, 281.0, 298.0, 297.0, 279.0, 283.0, 290.0, 286.0, 296.0, 226.0, 221.0, 260.0, 262.0, 292.0, 290.0, 262.0, 251.0, 289.0, 278.0, 281.0, 295.0, 268.0, 305.0, 292.0, 278.0, 256.0, 263.0, 279.0, 300.0, 296.0, 283.0, 285.0, 294.0, 295.0, 281.0, 291.0, 279.0, 307.0, 323.0, 254.0, 273.0, 283.0, 290.0, 250.0, 280.0, 256.0, 253.0, 290.0, 289.0, 274.0, 302.0, 266.0, 253.0, 298.0, 281.0, 283.0, 296.0, 281.0, 295.0, 289.0, 290.0, 258.0, 257.0, 302.0, 271.0, 305.0, 271.0, 277.0, 296.0, 289.0, 293.0, 256.0, 269.0, 273.0, 279.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6834095161080666, "mean_inference_ms": 1.2058352661434038, "mean_action_processing_ms": 0.13233467371881377, "mean_env_wait_ms": 0.8375451378942773, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4352000, "num_agent_steps_trained": 4352000, "num_env_steps_sampled": 2176000, "num_env_steps_trained": 2176000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2176000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4352000, "timers": {"training_iteration_time_ms": 3629.259, "learn_time_ms": 1114.057, "learn_throughput": 11489.537, "synch_weights_time_ms": 12.651}, "counters": {"num_env_steps_sampled": 2176000, "num_env_steps_trained": 2176000, "num_agent_steps_sampled": 4352000, "num_agent_steps_trained": 4352000}, "done": false, "episodes_total": 5440, "training_iteration": 170, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-59", "timestamp": 1666581059, "time_this_iter_s": 3.667999029159546, "time_total_s": 637.4914784431458, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 637.4914784431458, "timesteps_since_restore": 0, "iterations_since_restore": 170, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.56, "ram_util_percent": 10.620000000000001}}
+{"custom_metrics": {"sparse_reward_mean": 192.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 171.38, "shaped_reward_min": 54, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.49, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.05, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.96, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.71, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.85, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.53, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.55, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.64, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.74, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.37, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.81, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.75, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.85, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.53, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.85, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.53, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007277140975929797, "policy_loss": 0.0004762199823744595, "vf_loss": 7.702224254608154, "vf_explained_var": 0.6904242634773254, "kl": 0.002471720799803734, "entropy": 1.0374559164047241, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2188800, "num_env_steps_trained": 2188800, "num_agent_steps_sampled": 4377600, "num_agent_steps_trained": 4377600}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 174.0, "episode_reward_mean": 556.98, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 82.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 278.49}, "custom_metrics": {"sparse_reward_mean": 192.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 171.38, "shaped_reward_min": 54, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.49, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.05, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.96, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.71, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.85, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.53, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.55, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.64, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.74, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.37, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.81, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.75, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.85, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.53, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.85, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.53, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 573.0, 576.0, 530.0, 527.0, 576.0, 630.0, 527.0, 582.0, 579.0, 573.0, 579.0, 527.0, 567.0, 522.0, 579.0, 579.0, 536.0, 582.0, 484.0, 582.0, 576.0, 503.0, 570.0, 573.0, 530.0, 525.0, 536.0, 525.0, 573.0, 582.0, 579.0, 576.0, 573.0, 582.0, 447.0, 522.0, 582.0, 513.0, 567.0, 576.0, 573.0, 570.0, 519.0, 579.0, 579.0, 579.0, 576.0, 570.0, 630.0, 527.0, 573.0, 530.0, 509.0, 579.0, 576.0, 519.0, 579.0, 579.0, 576.0, 579.0, 515.0, 573.0, 576.0, 573.0, 582.0, 525.0, 552.0, 522.0, 582.0, 573.0, 525.0, 582.0, 582.0, 579.0, 627.0, 579.0, 518.0, 573.0, 576.0, 579.0, 573.0, 582.0, 579.0, 576.0, 582.0, 582.0, 579.0, 573.0, 587.0, 174.0, 573.0, 465.0, 570.0, 530.0, 530.0, 522.0, 579.0, 570.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 293.0, 275.0, 298.0, 291.0, 285.0, 256.0, 274.0, 257.0, 270.0, 292.0, 284.0, 318.0, 312.0, 277.0, 250.0, 282.0, 300.0, 290.0, 289.0, 290.0, 283.0, 285.0, 294.0, 267.0, 260.0, 286.0, 281.0, 281.0, 241.0, 292.0, 287.0, 281.0, 298.0, 268.0, 268.0, 300.0, 282.0, 236.0, 248.0, 295.0, 287.0, 291.0, 285.0, 251.0, 252.0, 283.0, 287.0, 286.0, 287.0, 258.0, 272.0, 266.0, 259.0, 268.0, 268.0, 270.0, 255.0, 278.0, 295.0, 283.0, 299.0, 281.0, 298.0, 297.0, 279.0, 283.0, 290.0, 286.0, 296.0, 226.0, 221.0, 260.0, 262.0, 292.0, 290.0, 262.0, 251.0, 289.0, 278.0, 281.0, 295.0, 268.0, 305.0, 292.0, 278.0, 256.0, 263.0, 279.0, 300.0, 296.0, 283.0, 285.0, 294.0, 295.0, 281.0, 291.0, 279.0, 307.0, 323.0, 254.0, 273.0, 283.0, 290.0, 250.0, 280.0, 256.0, 253.0, 290.0, 289.0, 274.0, 302.0, 266.0, 253.0, 298.0, 281.0, 283.0, 296.0, 281.0, 295.0, 289.0, 290.0, 258.0, 257.0, 302.0, 271.0, 305.0, 271.0, 277.0, 296.0, 289.0, 293.0, 256.0, 269.0, 273.0, 279.0, 277.0, 245.0, 291.0, 291.0, 287.0, 286.0, 262.0, 263.0, 296.0, 286.0, 280.0, 302.0, 283.0, 296.0, 301.0, 326.0, 281.0, 298.0, 261.0, 257.0, 274.0, 299.0, 289.0, 287.0, 297.0, 282.0, 296.0, 277.0, 290.0, 292.0, 303.0, 276.0, 291.0, 285.0, 284.0, 298.0, 294.0, 288.0, 295.0, 284.0, 283.0, 290.0, 304.0, 283.0, 92.0, 82.0, 279.0, 294.0, 227.0, 238.0, 288.0, 282.0, 268.0, 262.0, 263.0, 267.0, 261.0, 261.0, 288.0, 291.0, 285.0, 285.0, 296.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6833479634776382, "mean_inference_ms": 1.2057306823526823, "mean_action_processing_ms": 0.13233109475853205, "mean_env_wait_ms": 0.8373725097047232, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 174.0, "episode_reward_mean": 556.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 82.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 278.49}, "hist_stats": {"episode_reward": [576.0, 573.0, 576.0, 530.0, 527.0, 576.0, 630.0, 527.0, 582.0, 579.0, 573.0, 579.0, 527.0, 567.0, 522.0, 579.0, 579.0, 536.0, 582.0, 484.0, 582.0, 576.0, 503.0, 570.0, 573.0, 530.0, 525.0, 536.0, 525.0, 573.0, 582.0, 579.0, 576.0, 573.0, 582.0, 447.0, 522.0, 582.0, 513.0, 567.0, 576.0, 573.0, 570.0, 519.0, 579.0, 579.0, 579.0, 576.0, 570.0, 630.0, 527.0, 573.0, 530.0, 509.0, 579.0, 576.0, 519.0, 579.0, 579.0, 576.0, 579.0, 515.0, 573.0, 576.0, 573.0, 582.0, 525.0, 552.0, 522.0, 582.0, 573.0, 525.0, 582.0, 582.0, 579.0, 627.0, 579.0, 518.0, 573.0, 576.0, 579.0, 573.0, 582.0, 579.0, 576.0, 582.0, 582.0, 579.0, 573.0, 587.0, 174.0, 573.0, 465.0, 570.0, 530.0, 530.0, 522.0, 579.0, 570.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 293.0, 275.0, 298.0, 291.0, 285.0, 256.0, 274.0, 257.0, 270.0, 292.0, 284.0, 318.0, 312.0, 277.0, 250.0, 282.0, 300.0, 290.0, 289.0, 290.0, 283.0, 285.0, 294.0, 267.0, 260.0, 286.0, 281.0, 281.0, 241.0, 292.0, 287.0, 281.0, 298.0, 268.0, 268.0, 300.0, 282.0, 236.0, 248.0, 295.0, 287.0, 291.0, 285.0, 251.0, 252.0, 283.0, 287.0, 286.0, 287.0, 258.0, 272.0, 266.0, 259.0, 268.0, 268.0, 270.0, 255.0, 278.0, 295.0, 283.0, 299.0, 281.0, 298.0, 297.0, 279.0, 283.0, 290.0, 286.0, 296.0, 226.0, 221.0, 260.0, 262.0, 292.0, 290.0, 262.0, 251.0, 289.0, 278.0, 281.0, 295.0, 268.0, 305.0, 292.0, 278.0, 256.0, 263.0, 279.0, 300.0, 296.0, 283.0, 285.0, 294.0, 295.0, 281.0, 291.0, 279.0, 307.0, 323.0, 254.0, 273.0, 283.0, 290.0, 250.0, 280.0, 256.0, 253.0, 290.0, 289.0, 274.0, 302.0, 266.0, 253.0, 298.0, 281.0, 283.0, 296.0, 281.0, 295.0, 289.0, 290.0, 258.0, 257.0, 302.0, 271.0, 305.0, 271.0, 277.0, 296.0, 289.0, 293.0, 256.0, 269.0, 273.0, 279.0, 277.0, 245.0, 291.0, 291.0, 287.0, 286.0, 262.0, 263.0, 296.0, 286.0, 280.0, 302.0, 283.0, 296.0, 301.0, 326.0, 281.0, 298.0, 261.0, 257.0, 274.0, 299.0, 289.0, 287.0, 297.0, 282.0, 296.0, 277.0, 290.0, 292.0, 303.0, 276.0, 291.0, 285.0, 284.0, 298.0, 294.0, 288.0, 295.0, 284.0, 283.0, 290.0, 304.0, 283.0, 92.0, 82.0, 279.0, 294.0, 227.0, 238.0, 288.0, 282.0, 268.0, 262.0, 263.0, 267.0, 261.0, 261.0, 288.0, 291.0, 285.0, 285.0, 296.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6833479634776382, "mean_inference_ms": 1.2057306823526823, "mean_action_processing_ms": 0.13233109475853205, "mean_env_wait_ms": 0.8373725097047232, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4377600, "num_agent_steps_trained": 4377600, "num_env_steps_sampled": 2188800, "num_env_steps_trained": 2188800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2188800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4377600, "timers": {"training_iteration_time_ms": 3640.814, "learn_time_ms": 1127.714, "learn_throughput": 11350.399, "synch_weights_time_ms": 11.954}, "counters": {"num_env_steps_sampled": 2188800, "num_env_steps_trained": 2188800, "num_agent_steps_sampled": 4377600, "num_agent_steps_trained": 4377600}, "done": false, "episodes_total": 5472, "training_iteration": 171, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-03", "timestamp": 1666581063, "time_this_iter_s": 3.769057512283325, "time_total_s": 641.2605359554291, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 641.2605359554291, "timesteps_since_restore": 0, "iterations_since_restore": 171, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.46666666666667, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 171.99, "shaped_reward_min": 54, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.34, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 16.29, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.9, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 15.89, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.73, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 15.77, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.74, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.89, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.85, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.73, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 15.77, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.73, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 15.77, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007936491165310144, "policy_loss": 0.0005551030626520514, "vf_loss": 7.581031322479248, "vf_explained_var": 0.6723864674568176, "kl": 0.0018949867226183414, "entropy": 1.039113998413086, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2201600, "num_env_steps_trained": 2201600, "num_agent_steps_sampled": 4403200, "num_agent_steps_trained": 4403200}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 174.0, "episode_reward_mean": 560.39, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 82.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 280.195}, "custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 171.99, "shaped_reward_min": 54, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.34, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 16.29, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.9, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 15.89, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.73, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 15.77, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.74, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.89, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.85, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.73, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 15.77, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.73, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 15.77, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 573.0, 582.0, 447.0, 522.0, 582.0, 513.0, 567.0, 576.0, 573.0, 570.0, 519.0, 579.0, 579.0, 579.0, 576.0, 570.0, 630.0, 527.0, 573.0, 530.0, 509.0, 579.0, 576.0, 519.0, 579.0, 579.0, 576.0, 579.0, 515.0, 573.0, 576.0, 573.0, 582.0, 525.0, 552.0, 522.0, 582.0, 573.0, 525.0, 582.0, 582.0, 579.0, 627.0, 579.0, 518.0, 573.0, 576.0, 579.0, 573.0, 582.0, 579.0, 576.0, 582.0, 582.0, 579.0, 573.0, 587.0, 174.0, 573.0, 465.0, 570.0, 530.0, 530.0, 522.0, 579.0, 570.0, 582.0, 573.0, 530.0, 579.0, 573.0, 587.0, 573.0, 533.0, 582.0, 570.0, 576.0, 579.0, 555.0, 530.0, 582.0, 525.0, 582.0, 525.0, 558.0, 525.0, 576.0, 573.0, 582.0, 627.0, 573.0, 590.0, 576.0, 579.0, 579.0, 630.0, 525.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 279.0, 283.0, 290.0, 286.0, 296.0, 226.0, 221.0, 260.0, 262.0, 292.0, 290.0, 262.0, 251.0, 289.0, 278.0, 281.0, 295.0, 268.0, 305.0, 292.0, 278.0, 256.0, 263.0, 279.0, 300.0, 296.0, 283.0, 285.0, 294.0, 295.0, 281.0, 291.0, 279.0, 307.0, 323.0, 254.0, 273.0, 283.0, 290.0, 250.0, 280.0, 256.0, 253.0, 290.0, 289.0, 274.0, 302.0, 266.0, 253.0, 298.0, 281.0, 283.0, 296.0, 281.0, 295.0, 289.0, 290.0, 258.0, 257.0, 302.0, 271.0, 305.0, 271.0, 277.0, 296.0, 289.0, 293.0, 256.0, 269.0, 273.0, 279.0, 277.0, 245.0, 291.0, 291.0, 287.0, 286.0, 262.0, 263.0, 296.0, 286.0, 280.0, 302.0, 283.0, 296.0, 301.0, 326.0, 281.0, 298.0, 261.0, 257.0, 274.0, 299.0, 289.0, 287.0, 297.0, 282.0, 296.0, 277.0, 290.0, 292.0, 303.0, 276.0, 291.0, 285.0, 284.0, 298.0, 294.0, 288.0, 295.0, 284.0, 283.0, 290.0, 304.0, 283.0, 92.0, 82.0, 279.0, 294.0, 227.0, 238.0, 288.0, 282.0, 268.0, 262.0, 263.0, 267.0, 261.0, 261.0, 288.0, 291.0, 285.0, 285.0, 296.0, 286.0, 273.0, 300.0, 267.0, 263.0, 289.0, 290.0, 289.0, 284.0, 278.0, 309.0, 281.0, 292.0, 253.0, 280.0, 294.0, 288.0, 282.0, 288.0, 295.0, 281.0, 297.0, 282.0, 278.0, 277.0, 256.0, 274.0, 280.0, 302.0, 261.0, 264.0, 288.0, 294.0, 269.0, 256.0, 282.0, 276.0, 269.0, 256.0, 296.0, 280.0, 293.0, 280.0, 302.0, 280.0, 316.0, 311.0, 291.0, 282.0, 296.0, 294.0, 290.0, 286.0, 287.0, 292.0, 297.0, 282.0, 308.0, 322.0, 263.0, 262.0, 287.0, 289.0, 299.0, 277.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6832310630134222, "mean_inference_ms": 1.2056266619739329, "mean_action_processing_ms": 0.13232402816482625, "mean_env_wait_ms": 0.8371789914650529, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 174.0, "episode_reward_mean": 560.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 82.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 280.195}, "hist_stats": {"episode_reward": [576.0, 573.0, 582.0, 447.0, 522.0, 582.0, 513.0, 567.0, 576.0, 573.0, 570.0, 519.0, 579.0, 579.0, 579.0, 576.0, 570.0, 630.0, 527.0, 573.0, 530.0, 509.0, 579.0, 576.0, 519.0, 579.0, 579.0, 576.0, 579.0, 515.0, 573.0, 576.0, 573.0, 582.0, 525.0, 552.0, 522.0, 582.0, 573.0, 525.0, 582.0, 582.0, 579.0, 627.0, 579.0, 518.0, 573.0, 576.0, 579.0, 573.0, 582.0, 579.0, 576.0, 582.0, 582.0, 579.0, 573.0, 587.0, 174.0, 573.0, 465.0, 570.0, 530.0, 530.0, 522.0, 579.0, 570.0, 582.0, 573.0, 530.0, 579.0, 573.0, 587.0, 573.0, 533.0, 582.0, 570.0, 576.0, 579.0, 555.0, 530.0, 582.0, 525.0, 582.0, 525.0, 558.0, 525.0, 576.0, 573.0, 582.0, 627.0, 573.0, 590.0, 576.0, 579.0, 579.0, 630.0, 525.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 279.0, 283.0, 290.0, 286.0, 296.0, 226.0, 221.0, 260.0, 262.0, 292.0, 290.0, 262.0, 251.0, 289.0, 278.0, 281.0, 295.0, 268.0, 305.0, 292.0, 278.0, 256.0, 263.0, 279.0, 300.0, 296.0, 283.0, 285.0, 294.0, 295.0, 281.0, 291.0, 279.0, 307.0, 323.0, 254.0, 273.0, 283.0, 290.0, 250.0, 280.0, 256.0, 253.0, 290.0, 289.0, 274.0, 302.0, 266.0, 253.0, 298.0, 281.0, 283.0, 296.0, 281.0, 295.0, 289.0, 290.0, 258.0, 257.0, 302.0, 271.0, 305.0, 271.0, 277.0, 296.0, 289.0, 293.0, 256.0, 269.0, 273.0, 279.0, 277.0, 245.0, 291.0, 291.0, 287.0, 286.0, 262.0, 263.0, 296.0, 286.0, 280.0, 302.0, 283.0, 296.0, 301.0, 326.0, 281.0, 298.0, 261.0, 257.0, 274.0, 299.0, 289.0, 287.0, 297.0, 282.0, 296.0, 277.0, 290.0, 292.0, 303.0, 276.0, 291.0, 285.0, 284.0, 298.0, 294.0, 288.0, 295.0, 284.0, 283.0, 290.0, 304.0, 283.0, 92.0, 82.0, 279.0, 294.0, 227.0, 238.0, 288.0, 282.0, 268.0, 262.0, 263.0, 267.0, 261.0, 261.0, 288.0, 291.0, 285.0, 285.0, 296.0, 286.0, 273.0, 300.0, 267.0, 263.0, 289.0, 290.0, 289.0, 284.0, 278.0, 309.0, 281.0, 292.0, 253.0, 280.0, 294.0, 288.0, 282.0, 288.0, 295.0, 281.0, 297.0, 282.0, 278.0, 277.0, 256.0, 274.0, 280.0, 302.0, 261.0, 264.0, 288.0, 294.0, 269.0, 256.0, 282.0, 276.0, 269.0, 256.0, 296.0, 280.0, 293.0, 280.0, 302.0, 280.0, 316.0, 311.0, 291.0, 282.0, 296.0, 294.0, 290.0, 286.0, 287.0, 292.0, 297.0, 282.0, 308.0, 322.0, 263.0, 262.0, 287.0, 289.0, 299.0, 277.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6832310630134222, "mean_inference_ms": 1.2056266619739329, "mean_action_processing_ms": 0.13232402816482625, "mean_env_wait_ms": 0.8371789914650529, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4403200, "num_agent_steps_trained": 4403200, "num_env_steps_sampled": 2201600, "num_env_steps_trained": 2201600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2201600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4403200, "timers": {"training_iteration_time_ms": 3629.816, "learn_time_ms": 1123.526, "learn_throughput": 11392.702, "synch_weights_time_ms": 12.34}, "counters": {"num_env_steps_sampled": 2201600, "num_env_steps_trained": 2201600, "num_agent_steps_sampled": 4403200, "num_agent_steps_trained": 4403200}, "done": false, "episodes_total": 5504, "training_iteration": 172, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-07", "timestamp": 1666581067, "time_this_iter_s": 3.5678622722625732, "time_total_s": 644.8283982276917, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 644.8283982276917, "timesteps_since_restore": 0, "iterations_since_restore": 172, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.580000000000002, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.24, "shaped_reward_min": 54, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.66, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.19, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.18, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.71, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.02, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.61, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.83, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.08, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.48, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.42, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.24, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.14, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.02, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.61, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.02, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.61, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011171973310410976, "policy_loss": -0.0013455945299938321, "vf_loss": 7.470318794250488, "vf_explained_var": 0.6936505436897278, "kl": 0.002078745514154434, "entropy": 1.0372672080993652, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2214400, "num_env_steps_trained": 2214400, "num_agent_steps_sampled": 4428800, "num_agent_steps_trained": 4428800}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 174.0, "episode_reward_mean": 562.04, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 82.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 281.02}, "custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.24, "shaped_reward_min": 54, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.66, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.19, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.18, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.71, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.02, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.61, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.83, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.08, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.48, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.42, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.24, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.14, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.02, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.61, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.02, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.61, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 582.0, 525.0, 552.0, 522.0, 582.0, 573.0, 525.0, 582.0, 582.0, 579.0, 627.0, 579.0, 518.0, 573.0, 576.0, 579.0, 573.0, 582.0, 579.0, 576.0, 582.0, 582.0, 579.0, 573.0, 587.0, 174.0, 573.0, 465.0, 570.0, 530.0, 530.0, 522.0, 579.0, 570.0, 582.0, 573.0, 530.0, 579.0, 573.0, 587.0, 573.0, 533.0, 582.0, 570.0, 576.0, 579.0, 555.0, 530.0, 582.0, 525.0, 582.0, 525.0, 558.0, 525.0, 576.0, 573.0, 582.0, 627.0, 573.0, 590.0, 576.0, 579.0, 579.0, 630.0, 525.0, 576.0, 576.0, 576.0, 530.0, 579.0, 579.0, 576.0, 582.0, 570.0, 570.0, 576.0, 561.0, 582.0, 579.0, 576.0, 567.0, 579.0, 579.0, 579.0, 539.0, 576.0, 587.0, 579.0, 576.0, 573.0, 573.0, 587.0, 582.0, 576.0, 582.0, 579.0, 564.0, 273.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [277.0, 296.0, 289.0, 293.0, 256.0, 269.0, 273.0, 279.0, 277.0, 245.0, 291.0, 291.0, 287.0, 286.0, 262.0, 263.0, 296.0, 286.0, 280.0, 302.0, 283.0, 296.0, 301.0, 326.0, 281.0, 298.0, 261.0, 257.0, 274.0, 299.0, 289.0, 287.0, 297.0, 282.0, 296.0, 277.0, 290.0, 292.0, 303.0, 276.0, 291.0, 285.0, 284.0, 298.0, 294.0, 288.0, 295.0, 284.0, 283.0, 290.0, 304.0, 283.0, 92.0, 82.0, 279.0, 294.0, 227.0, 238.0, 288.0, 282.0, 268.0, 262.0, 263.0, 267.0, 261.0, 261.0, 288.0, 291.0, 285.0, 285.0, 296.0, 286.0, 273.0, 300.0, 267.0, 263.0, 289.0, 290.0, 289.0, 284.0, 278.0, 309.0, 281.0, 292.0, 253.0, 280.0, 294.0, 288.0, 282.0, 288.0, 295.0, 281.0, 297.0, 282.0, 278.0, 277.0, 256.0, 274.0, 280.0, 302.0, 261.0, 264.0, 288.0, 294.0, 269.0, 256.0, 282.0, 276.0, 269.0, 256.0, 296.0, 280.0, 293.0, 280.0, 302.0, 280.0, 316.0, 311.0, 291.0, 282.0, 296.0, 294.0, 290.0, 286.0, 287.0, 292.0, 297.0, 282.0, 308.0, 322.0, 263.0, 262.0, 287.0, 289.0, 299.0, 277.0, 300.0, 276.0, 272.0, 258.0, 297.0, 282.0, 291.0, 288.0, 286.0, 290.0, 293.0, 289.0, 276.0, 294.0, 283.0, 287.0, 288.0, 288.0, 286.0, 275.0, 271.0, 311.0, 285.0, 294.0, 291.0, 285.0, 287.0, 280.0, 280.0, 299.0, 306.0, 273.0, 285.0, 294.0, 269.0, 270.0, 296.0, 280.0, 288.0, 299.0, 288.0, 291.0, 289.0, 287.0, 290.0, 283.0, 283.0, 290.0, 298.0, 289.0, 295.0, 287.0, 279.0, 297.0, 275.0, 307.0, 286.0, 293.0, 281.0, 283.0, 138.0, 135.0, 298.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6830980537233386, "mean_inference_ms": 1.205477553894199, "mean_action_processing_ms": 0.13231296218106148, "mean_env_wait_ms": 0.8369617254633316, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 174.0, "episode_reward_mean": 562.04, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 82.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 281.02}, "hist_stats": {"episode_reward": [573.0, 582.0, 525.0, 552.0, 522.0, 582.0, 573.0, 525.0, 582.0, 582.0, 579.0, 627.0, 579.0, 518.0, 573.0, 576.0, 579.0, 573.0, 582.0, 579.0, 576.0, 582.0, 582.0, 579.0, 573.0, 587.0, 174.0, 573.0, 465.0, 570.0, 530.0, 530.0, 522.0, 579.0, 570.0, 582.0, 573.0, 530.0, 579.0, 573.0, 587.0, 573.0, 533.0, 582.0, 570.0, 576.0, 579.0, 555.0, 530.0, 582.0, 525.0, 582.0, 525.0, 558.0, 525.0, 576.0, 573.0, 582.0, 627.0, 573.0, 590.0, 576.0, 579.0, 579.0, 630.0, 525.0, 576.0, 576.0, 576.0, 530.0, 579.0, 579.0, 576.0, 582.0, 570.0, 570.0, 576.0, 561.0, 582.0, 579.0, 576.0, 567.0, 579.0, 579.0, 579.0, 539.0, 576.0, 587.0, 579.0, 576.0, 573.0, 573.0, 587.0, 582.0, 576.0, 582.0, 579.0, 564.0, 273.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [277.0, 296.0, 289.0, 293.0, 256.0, 269.0, 273.0, 279.0, 277.0, 245.0, 291.0, 291.0, 287.0, 286.0, 262.0, 263.0, 296.0, 286.0, 280.0, 302.0, 283.0, 296.0, 301.0, 326.0, 281.0, 298.0, 261.0, 257.0, 274.0, 299.0, 289.0, 287.0, 297.0, 282.0, 296.0, 277.0, 290.0, 292.0, 303.0, 276.0, 291.0, 285.0, 284.0, 298.0, 294.0, 288.0, 295.0, 284.0, 283.0, 290.0, 304.0, 283.0, 92.0, 82.0, 279.0, 294.0, 227.0, 238.0, 288.0, 282.0, 268.0, 262.0, 263.0, 267.0, 261.0, 261.0, 288.0, 291.0, 285.0, 285.0, 296.0, 286.0, 273.0, 300.0, 267.0, 263.0, 289.0, 290.0, 289.0, 284.0, 278.0, 309.0, 281.0, 292.0, 253.0, 280.0, 294.0, 288.0, 282.0, 288.0, 295.0, 281.0, 297.0, 282.0, 278.0, 277.0, 256.0, 274.0, 280.0, 302.0, 261.0, 264.0, 288.0, 294.0, 269.0, 256.0, 282.0, 276.0, 269.0, 256.0, 296.0, 280.0, 293.0, 280.0, 302.0, 280.0, 316.0, 311.0, 291.0, 282.0, 296.0, 294.0, 290.0, 286.0, 287.0, 292.0, 297.0, 282.0, 308.0, 322.0, 263.0, 262.0, 287.0, 289.0, 299.0, 277.0, 300.0, 276.0, 272.0, 258.0, 297.0, 282.0, 291.0, 288.0, 286.0, 290.0, 293.0, 289.0, 276.0, 294.0, 283.0, 287.0, 288.0, 288.0, 286.0, 275.0, 271.0, 311.0, 285.0, 294.0, 291.0, 285.0, 287.0, 280.0, 280.0, 299.0, 306.0, 273.0, 285.0, 294.0, 269.0, 270.0, 296.0, 280.0, 288.0, 299.0, 288.0, 291.0, 289.0, 287.0, 290.0, 283.0, 283.0, 290.0, 298.0, 289.0, 295.0, 287.0, 279.0, 297.0, 275.0, 307.0, 286.0, 293.0, 281.0, 283.0, 138.0, 135.0, 298.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6830980537233386, "mean_inference_ms": 1.205477553894199, "mean_action_processing_ms": 0.13231296218106148, "mean_env_wait_ms": 0.8369617254633316, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4428800, "num_agent_steps_trained": 4428800, "num_env_steps_sampled": 2214400, "num_env_steps_trained": 2214400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2214400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4428800, "timers": {"training_iteration_time_ms": 3637.654, "learn_time_ms": 1139.364, "learn_throughput": 11234.342, "synch_weights_time_ms": 11.368}, "counters": {"num_env_steps_sampled": 2214400, "num_env_steps_trained": 2214400, "num_agent_steps_sampled": 4428800, "num_agent_steps_trained": 4428800}, "done": false, "episodes_total": 5536, "training_iteration": 173, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-11", "timestamp": 1666581071, "time_this_iter_s": 3.733807325363159, "time_total_s": 648.5622055530548, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 648.5622055530548, "timesteps_since_restore": 0, "iterations_since_restore": 173, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.300000000000004, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.57, "shaped_reward_min": 113, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.1, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 16.91, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.75, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.53, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.48, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.37, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.69, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.04, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.48, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.37, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.48, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.37, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003722138935700059, "policy_loss": -0.0006101266480982304, "vf_loss": 7.5663604736328125, "vf_explained_var": 0.672671914100647, "kl": 0.0019681788980960846, "entropy": 1.0374469757080078, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2227200, "num_env_steps_trained": 2227200, "num_agent_steps_sampled": 4454400, "num_agent_steps_trained": 4454400}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 273.0, "episode_reward_mean": 566.17, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 135.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.085}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.57, "shaped_reward_min": 113, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.1, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 16.91, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.75, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.53, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.48, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.37, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.69, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.04, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.48, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.37, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.48, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.37, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 579.0, 570.0, 582.0, 573.0, 530.0, 579.0, 573.0, 587.0, 573.0, 533.0, 582.0, 570.0, 576.0, 579.0, 555.0, 530.0, 582.0, 525.0, 582.0, 525.0, 558.0, 525.0, 576.0, 573.0, 582.0, 627.0, 573.0, 590.0, 576.0, 579.0, 579.0, 630.0, 525.0, 576.0, 576.0, 576.0, 530.0, 579.0, 579.0, 576.0, 582.0, 570.0, 570.0, 576.0, 561.0, 582.0, 579.0, 576.0, 567.0, 579.0, 579.0, 579.0, 539.0, 576.0, 587.0, 579.0, 576.0, 573.0, 573.0, 587.0, 582.0, 576.0, 582.0, 579.0, 564.0, 273.0, 582.0, 573.0, 519.0, 579.0, 582.0, 579.0, 522.0, 567.0, 567.0, 579.0, 576.0, 576.0, 573.0, 522.0, 564.0, 579.0, 576.0, 527.0, 579.0, 584.0, 576.0, 584.0, 582.0, 587.0, 576.0, 576.0, 522.0, 582.0, 582.0, 533.0, 582.0, 582.0, 510.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 261.0, 288.0, 291.0, 285.0, 285.0, 296.0, 286.0, 273.0, 300.0, 267.0, 263.0, 289.0, 290.0, 289.0, 284.0, 278.0, 309.0, 281.0, 292.0, 253.0, 280.0, 294.0, 288.0, 282.0, 288.0, 295.0, 281.0, 297.0, 282.0, 278.0, 277.0, 256.0, 274.0, 280.0, 302.0, 261.0, 264.0, 288.0, 294.0, 269.0, 256.0, 282.0, 276.0, 269.0, 256.0, 296.0, 280.0, 293.0, 280.0, 302.0, 280.0, 316.0, 311.0, 291.0, 282.0, 296.0, 294.0, 290.0, 286.0, 287.0, 292.0, 297.0, 282.0, 308.0, 322.0, 263.0, 262.0, 287.0, 289.0, 299.0, 277.0, 300.0, 276.0, 272.0, 258.0, 297.0, 282.0, 291.0, 288.0, 286.0, 290.0, 293.0, 289.0, 276.0, 294.0, 283.0, 287.0, 288.0, 288.0, 286.0, 275.0, 271.0, 311.0, 285.0, 294.0, 291.0, 285.0, 287.0, 280.0, 280.0, 299.0, 306.0, 273.0, 285.0, 294.0, 269.0, 270.0, 296.0, 280.0, 288.0, 299.0, 288.0, 291.0, 289.0, 287.0, 290.0, 283.0, 283.0, 290.0, 298.0, 289.0, 295.0, 287.0, 279.0, 297.0, 275.0, 307.0, 286.0, 293.0, 281.0, 283.0, 138.0, 135.0, 298.0, 284.0, 279.0, 294.0, 256.0, 263.0, 288.0, 291.0, 295.0, 287.0, 295.0, 284.0, 255.0, 267.0, 266.0, 301.0, 295.0, 272.0, 292.0, 287.0, 295.0, 281.0, 285.0, 291.0, 269.0, 304.0, 255.0, 267.0, 279.0, 285.0, 289.0, 290.0, 296.0, 280.0, 279.0, 248.0, 300.0, 279.0, 285.0, 299.0, 298.0, 278.0, 290.0, 294.0, 281.0, 301.0, 304.0, 283.0, 279.0, 297.0, 286.0, 290.0, 257.0, 265.0, 289.0, 293.0, 281.0, 301.0, 280.0, 253.0, 286.0, 296.0, 283.0, 299.0, 263.0, 247.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.682994178956515, "mean_inference_ms": 1.205319486334547, "mean_action_processing_ms": 0.13230146982573307, "mean_env_wait_ms": 0.8367514360219342, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 273.0, "episode_reward_mean": 566.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 135.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.085}, "hist_stats": {"episode_reward": [522.0, 579.0, 570.0, 582.0, 573.0, 530.0, 579.0, 573.0, 587.0, 573.0, 533.0, 582.0, 570.0, 576.0, 579.0, 555.0, 530.0, 582.0, 525.0, 582.0, 525.0, 558.0, 525.0, 576.0, 573.0, 582.0, 627.0, 573.0, 590.0, 576.0, 579.0, 579.0, 630.0, 525.0, 576.0, 576.0, 576.0, 530.0, 579.0, 579.0, 576.0, 582.0, 570.0, 570.0, 576.0, 561.0, 582.0, 579.0, 576.0, 567.0, 579.0, 579.0, 579.0, 539.0, 576.0, 587.0, 579.0, 576.0, 573.0, 573.0, 587.0, 582.0, 576.0, 582.0, 579.0, 564.0, 273.0, 582.0, 573.0, 519.0, 579.0, 582.0, 579.0, 522.0, 567.0, 567.0, 579.0, 576.0, 576.0, 573.0, 522.0, 564.0, 579.0, 576.0, 527.0, 579.0, 584.0, 576.0, 584.0, 582.0, 587.0, 576.0, 576.0, 522.0, 582.0, 582.0, 533.0, 582.0, 582.0, 510.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 261.0, 288.0, 291.0, 285.0, 285.0, 296.0, 286.0, 273.0, 300.0, 267.0, 263.0, 289.0, 290.0, 289.0, 284.0, 278.0, 309.0, 281.0, 292.0, 253.0, 280.0, 294.0, 288.0, 282.0, 288.0, 295.0, 281.0, 297.0, 282.0, 278.0, 277.0, 256.0, 274.0, 280.0, 302.0, 261.0, 264.0, 288.0, 294.0, 269.0, 256.0, 282.0, 276.0, 269.0, 256.0, 296.0, 280.0, 293.0, 280.0, 302.0, 280.0, 316.0, 311.0, 291.0, 282.0, 296.0, 294.0, 290.0, 286.0, 287.0, 292.0, 297.0, 282.0, 308.0, 322.0, 263.0, 262.0, 287.0, 289.0, 299.0, 277.0, 300.0, 276.0, 272.0, 258.0, 297.0, 282.0, 291.0, 288.0, 286.0, 290.0, 293.0, 289.0, 276.0, 294.0, 283.0, 287.0, 288.0, 288.0, 286.0, 275.0, 271.0, 311.0, 285.0, 294.0, 291.0, 285.0, 287.0, 280.0, 280.0, 299.0, 306.0, 273.0, 285.0, 294.0, 269.0, 270.0, 296.0, 280.0, 288.0, 299.0, 288.0, 291.0, 289.0, 287.0, 290.0, 283.0, 283.0, 290.0, 298.0, 289.0, 295.0, 287.0, 279.0, 297.0, 275.0, 307.0, 286.0, 293.0, 281.0, 283.0, 138.0, 135.0, 298.0, 284.0, 279.0, 294.0, 256.0, 263.0, 288.0, 291.0, 295.0, 287.0, 295.0, 284.0, 255.0, 267.0, 266.0, 301.0, 295.0, 272.0, 292.0, 287.0, 295.0, 281.0, 285.0, 291.0, 269.0, 304.0, 255.0, 267.0, 279.0, 285.0, 289.0, 290.0, 296.0, 280.0, 279.0, 248.0, 300.0, 279.0, 285.0, 299.0, 298.0, 278.0, 290.0, 294.0, 281.0, 301.0, 304.0, 283.0, 279.0, 297.0, 286.0, 290.0, 257.0, 265.0, 289.0, 293.0, 281.0, 301.0, 280.0, 253.0, 286.0, 296.0, 283.0, 299.0, 263.0, 247.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.682994178956515, "mean_inference_ms": 1.205319486334547, "mean_action_processing_ms": 0.13230146982573307, "mean_env_wait_ms": 0.8367514360219342, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4454400, "num_agent_steps_trained": 4454400, "num_env_steps_sampled": 2227200, "num_env_steps_trained": 2227200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2227200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4454400, "timers": {"training_iteration_time_ms": 3618.034, "learn_time_ms": 1141.84, "learn_throughput": 11209.981, "synch_weights_time_ms": 10.787}, "counters": {"num_env_steps_sampled": 2227200, "num_env_steps_trained": 2227200, "num_agent_steps_sampled": 4454400, "num_agent_steps_trained": 4454400}, "done": false, "episodes_total": 5568, "training_iteration": 174, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-15", "timestamp": 1666581075, "time_this_iter_s": 3.605084180831909, "time_total_s": 652.1672897338867, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 652.1672897338867, "timesteps_since_restore": 0, "iterations_since_restore": 174, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.633333333333333, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.07, "shaped_reward_min": 113, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.19, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.9, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.77, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.4, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.32, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.48, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.26, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.61, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.99, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.48, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.26, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.48, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.26, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0019827396608889103, "policy_loss": -0.0022173135075718164, "vf_loss": 7.520552635192871, "vf_explained_var": 0.6868171095848083, "kl": 0.00197706394828856, "entropy": 1.0349608659744263, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2240000, "num_env_steps_trained": 2240000, "num_agent_steps_sampled": 4480000, "num_agent_steps_trained": 4480000}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 273.0, "episode_reward_mean": 564.87, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 135.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 282.435}, "custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.07, "shaped_reward_min": 113, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.19, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.9, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.77, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.4, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.32, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.48, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.26, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.61, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.99, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.48, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.26, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.48, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.26, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 525.0, 576.0, 576.0, 576.0, 530.0, 579.0, 579.0, 576.0, 582.0, 570.0, 570.0, 576.0, 561.0, 582.0, 579.0, 576.0, 567.0, 579.0, 579.0, 579.0, 539.0, 576.0, 587.0, 579.0, 576.0, 573.0, 573.0, 587.0, 582.0, 576.0, 582.0, 579.0, 564.0, 273.0, 582.0, 573.0, 519.0, 579.0, 582.0, 579.0, 522.0, 567.0, 567.0, 579.0, 576.0, 576.0, 573.0, 522.0, 564.0, 579.0, 576.0, 527.0, 579.0, 584.0, 576.0, 584.0, 582.0, 587.0, 576.0, 576.0, 522.0, 582.0, 582.0, 533.0, 582.0, 582.0, 510.0, 573.0, 579.0, 522.0, 527.0, 579.0, 530.0, 573.0, 525.0, 570.0, 579.0, 573.0, 579.0, 570.0, 576.0, 573.0, 579.0, 525.0, 579.0, 582.0, 576.0, 582.0, 573.0, 579.0, 522.0, 522.0, 522.0, 576.0, 630.0, 579.0, 512.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [308.0, 322.0, 263.0, 262.0, 287.0, 289.0, 299.0, 277.0, 300.0, 276.0, 272.0, 258.0, 297.0, 282.0, 291.0, 288.0, 286.0, 290.0, 293.0, 289.0, 276.0, 294.0, 283.0, 287.0, 288.0, 288.0, 286.0, 275.0, 271.0, 311.0, 285.0, 294.0, 291.0, 285.0, 287.0, 280.0, 280.0, 299.0, 306.0, 273.0, 285.0, 294.0, 269.0, 270.0, 296.0, 280.0, 288.0, 299.0, 288.0, 291.0, 289.0, 287.0, 290.0, 283.0, 283.0, 290.0, 298.0, 289.0, 295.0, 287.0, 279.0, 297.0, 275.0, 307.0, 286.0, 293.0, 281.0, 283.0, 138.0, 135.0, 298.0, 284.0, 279.0, 294.0, 256.0, 263.0, 288.0, 291.0, 295.0, 287.0, 295.0, 284.0, 255.0, 267.0, 266.0, 301.0, 295.0, 272.0, 292.0, 287.0, 295.0, 281.0, 285.0, 291.0, 269.0, 304.0, 255.0, 267.0, 279.0, 285.0, 289.0, 290.0, 296.0, 280.0, 279.0, 248.0, 300.0, 279.0, 285.0, 299.0, 298.0, 278.0, 290.0, 294.0, 281.0, 301.0, 304.0, 283.0, 279.0, 297.0, 286.0, 290.0, 257.0, 265.0, 289.0, 293.0, 281.0, 301.0, 280.0, 253.0, 286.0, 296.0, 283.0, 299.0, 263.0, 247.0, 284.0, 289.0, 283.0, 296.0, 267.0, 255.0, 276.0, 251.0, 288.0, 291.0, 267.0, 263.0, 295.0, 278.0, 256.0, 269.0, 293.0, 277.0, 296.0, 283.0, 275.0, 298.0, 288.0, 291.0, 296.0, 274.0, 282.0, 294.0, 283.0, 290.0, 281.0, 298.0, 267.0, 258.0, 284.0, 295.0, 283.0, 299.0, 290.0, 286.0, 301.0, 281.0, 282.0, 291.0, 293.0, 286.0, 260.0, 262.0, 267.0, 255.0, 247.0, 275.0, 290.0, 286.0, 318.0, 312.0, 306.0, 273.0, 256.0, 256.0, 274.0, 299.0, 286.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6829100253627054, "mean_inference_ms": 1.2051592701617222, "mean_action_processing_ms": 0.13229100091564924, "mean_env_wait_ms": 0.8365500433383677, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 273.0, "episode_reward_mean": 564.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 135.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 282.435}, "hist_stats": {"episode_reward": [630.0, 525.0, 576.0, 576.0, 576.0, 530.0, 579.0, 579.0, 576.0, 582.0, 570.0, 570.0, 576.0, 561.0, 582.0, 579.0, 576.0, 567.0, 579.0, 579.0, 579.0, 539.0, 576.0, 587.0, 579.0, 576.0, 573.0, 573.0, 587.0, 582.0, 576.0, 582.0, 579.0, 564.0, 273.0, 582.0, 573.0, 519.0, 579.0, 582.0, 579.0, 522.0, 567.0, 567.0, 579.0, 576.0, 576.0, 573.0, 522.0, 564.0, 579.0, 576.0, 527.0, 579.0, 584.0, 576.0, 584.0, 582.0, 587.0, 576.0, 576.0, 522.0, 582.0, 582.0, 533.0, 582.0, 582.0, 510.0, 573.0, 579.0, 522.0, 527.0, 579.0, 530.0, 573.0, 525.0, 570.0, 579.0, 573.0, 579.0, 570.0, 576.0, 573.0, 579.0, 525.0, 579.0, 582.0, 576.0, 582.0, 573.0, 579.0, 522.0, 522.0, 522.0, 576.0, 630.0, 579.0, 512.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [308.0, 322.0, 263.0, 262.0, 287.0, 289.0, 299.0, 277.0, 300.0, 276.0, 272.0, 258.0, 297.0, 282.0, 291.0, 288.0, 286.0, 290.0, 293.0, 289.0, 276.0, 294.0, 283.0, 287.0, 288.0, 288.0, 286.0, 275.0, 271.0, 311.0, 285.0, 294.0, 291.0, 285.0, 287.0, 280.0, 280.0, 299.0, 306.0, 273.0, 285.0, 294.0, 269.0, 270.0, 296.0, 280.0, 288.0, 299.0, 288.0, 291.0, 289.0, 287.0, 290.0, 283.0, 283.0, 290.0, 298.0, 289.0, 295.0, 287.0, 279.0, 297.0, 275.0, 307.0, 286.0, 293.0, 281.0, 283.0, 138.0, 135.0, 298.0, 284.0, 279.0, 294.0, 256.0, 263.0, 288.0, 291.0, 295.0, 287.0, 295.0, 284.0, 255.0, 267.0, 266.0, 301.0, 295.0, 272.0, 292.0, 287.0, 295.0, 281.0, 285.0, 291.0, 269.0, 304.0, 255.0, 267.0, 279.0, 285.0, 289.0, 290.0, 296.0, 280.0, 279.0, 248.0, 300.0, 279.0, 285.0, 299.0, 298.0, 278.0, 290.0, 294.0, 281.0, 301.0, 304.0, 283.0, 279.0, 297.0, 286.0, 290.0, 257.0, 265.0, 289.0, 293.0, 281.0, 301.0, 280.0, 253.0, 286.0, 296.0, 283.0, 299.0, 263.0, 247.0, 284.0, 289.0, 283.0, 296.0, 267.0, 255.0, 276.0, 251.0, 288.0, 291.0, 267.0, 263.0, 295.0, 278.0, 256.0, 269.0, 293.0, 277.0, 296.0, 283.0, 275.0, 298.0, 288.0, 291.0, 296.0, 274.0, 282.0, 294.0, 283.0, 290.0, 281.0, 298.0, 267.0, 258.0, 284.0, 295.0, 283.0, 299.0, 290.0, 286.0, 301.0, 281.0, 282.0, 291.0, 293.0, 286.0, 260.0, 262.0, 267.0, 255.0, 247.0, 275.0, 290.0, 286.0, 318.0, 312.0, 306.0, 273.0, 256.0, 256.0, 274.0, 299.0, 286.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6829100253627054, "mean_inference_ms": 1.2051592701617222, "mean_action_processing_ms": 0.13229100091564924, "mean_env_wait_ms": 0.8365500433383677, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4480000, "num_agent_steps_trained": 4480000, "num_env_steps_sampled": 2240000, "num_env_steps_trained": 2240000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2240000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4480000, "timers": {"training_iteration_time_ms": 3633.635, "learn_time_ms": 1148.824, "learn_throughput": 11141.83, "synch_weights_time_ms": 10.544}, "counters": {"num_env_steps_sampled": 2240000, "num_env_steps_trained": 2240000, "num_agent_steps_sampled": 4480000, "num_agent_steps_trained": 4480000}, "done": false, "episodes_total": 5600, "training_iteration": 175, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-19", "timestamp": 1666581079, "time_this_iter_s": 3.750723123550415, "time_total_s": 655.9180128574371, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 655.9180128574371, "timesteps_since_restore": 0, "iterations_since_restore": 175, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.8, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 173.55, "shaped_reward_min": 113, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.38, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.48, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.92, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.03, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.71, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.89, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.98, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.8, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.71, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.89, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.71, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.89, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0017110540065914392, "policy_loss": 0.0014627662021666765, "vf_loss": 7.637947082519531, "vf_explained_var": 0.6661041975021362, "kl": 0.0020245490595698357, "entropy": 1.031015157699585, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2252800, "num_env_steps_trained": 2252800, "num_agent_steps_sampled": 4505600, "num_agent_steps_trained": 4505600}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 273.0, "episode_reward_mean": 562.35, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 135.0}, "policy_reward_max": {"ppo": 318.0}, "policy_reward_mean": {"ppo": 281.175}, "custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 173.55, "shaped_reward_min": 113, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.38, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.48, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.92, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.03, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.71, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.89, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.98, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.8, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.71, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.89, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.71, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.89, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 564.0, 273.0, 582.0, 573.0, 519.0, 579.0, 582.0, 579.0, 522.0, 567.0, 567.0, 579.0, 576.0, 576.0, 573.0, 522.0, 564.0, 579.0, 576.0, 527.0, 579.0, 584.0, 576.0, 584.0, 582.0, 587.0, 576.0, 576.0, 522.0, 582.0, 582.0, 533.0, 582.0, 582.0, 510.0, 573.0, 579.0, 522.0, 527.0, 579.0, 530.0, 573.0, 525.0, 570.0, 579.0, 573.0, 579.0, 570.0, 576.0, 573.0, 579.0, 525.0, 579.0, 582.0, 576.0, 582.0, 573.0, 579.0, 522.0, 522.0, 522.0, 576.0, 630.0, 579.0, 512.0, 573.0, 576.0, 567.0, 570.0, 569.0, 579.0, 576.0, 582.0, 530.0, 579.0, 582.0, 582.0, 567.0, 587.0, 576.0, 522.0, 576.0, 582.0, 582.0, 522.0, 587.0, 579.0, 582.0, 576.0, 582.0, 570.0, 441.0, 522.0, 576.0, 576.0, 576.0, 576.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 281.0, 283.0, 138.0, 135.0, 298.0, 284.0, 279.0, 294.0, 256.0, 263.0, 288.0, 291.0, 295.0, 287.0, 295.0, 284.0, 255.0, 267.0, 266.0, 301.0, 295.0, 272.0, 292.0, 287.0, 295.0, 281.0, 285.0, 291.0, 269.0, 304.0, 255.0, 267.0, 279.0, 285.0, 289.0, 290.0, 296.0, 280.0, 279.0, 248.0, 300.0, 279.0, 285.0, 299.0, 298.0, 278.0, 290.0, 294.0, 281.0, 301.0, 304.0, 283.0, 279.0, 297.0, 286.0, 290.0, 257.0, 265.0, 289.0, 293.0, 281.0, 301.0, 280.0, 253.0, 286.0, 296.0, 283.0, 299.0, 263.0, 247.0, 284.0, 289.0, 283.0, 296.0, 267.0, 255.0, 276.0, 251.0, 288.0, 291.0, 267.0, 263.0, 295.0, 278.0, 256.0, 269.0, 293.0, 277.0, 296.0, 283.0, 275.0, 298.0, 288.0, 291.0, 296.0, 274.0, 282.0, 294.0, 283.0, 290.0, 281.0, 298.0, 267.0, 258.0, 284.0, 295.0, 283.0, 299.0, 290.0, 286.0, 301.0, 281.0, 282.0, 291.0, 293.0, 286.0, 260.0, 262.0, 267.0, 255.0, 247.0, 275.0, 290.0, 286.0, 318.0, 312.0, 306.0, 273.0, 256.0, 256.0, 274.0, 299.0, 286.0, 290.0, 290.0, 277.0, 287.0, 283.0, 286.0, 283.0, 296.0, 283.0, 293.0, 283.0, 276.0, 306.0, 273.0, 257.0, 289.0, 290.0, 293.0, 289.0, 301.0, 281.0, 285.0, 282.0, 296.0, 291.0, 292.0, 284.0, 267.0, 255.0, 288.0, 288.0, 296.0, 286.0, 289.0, 293.0, 262.0, 260.0, 304.0, 283.0, 295.0, 284.0, 304.0, 278.0, 297.0, 279.0, 293.0, 289.0, 285.0, 285.0, 221.0, 220.0, 250.0, 272.0, 302.0, 274.0, 293.0, 283.0, 277.0, 299.0, 298.0, 278.0, 291.0, 285.0, 296.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6828569282642821, "mean_inference_ms": 1.2050293820721487, "mean_action_processing_ms": 0.13228236830438264, "mean_env_wait_ms": 0.8363800663320577, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 273.0, "episode_reward_mean": 562.35, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 135.0}, "policy_reward_max": {"ppo": 318.0}, "policy_reward_mean": {"ppo": 281.175}, "hist_stats": {"episode_reward": [579.0, 564.0, 273.0, 582.0, 573.0, 519.0, 579.0, 582.0, 579.0, 522.0, 567.0, 567.0, 579.0, 576.0, 576.0, 573.0, 522.0, 564.0, 579.0, 576.0, 527.0, 579.0, 584.0, 576.0, 584.0, 582.0, 587.0, 576.0, 576.0, 522.0, 582.0, 582.0, 533.0, 582.0, 582.0, 510.0, 573.0, 579.0, 522.0, 527.0, 579.0, 530.0, 573.0, 525.0, 570.0, 579.0, 573.0, 579.0, 570.0, 576.0, 573.0, 579.0, 525.0, 579.0, 582.0, 576.0, 582.0, 573.0, 579.0, 522.0, 522.0, 522.0, 576.0, 630.0, 579.0, 512.0, 573.0, 576.0, 567.0, 570.0, 569.0, 579.0, 576.0, 582.0, 530.0, 579.0, 582.0, 582.0, 567.0, 587.0, 576.0, 522.0, 576.0, 582.0, 582.0, 522.0, 587.0, 579.0, 582.0, 576.0, 582.0, 570.0, 441.0, 522.0, 576.0, 576.0, 576.0, 576.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 281.0, 283.0, 138.0, 135.0, 298.0, 284.0, 279.0, 294.0, 256.0, 263.0, 288.0, 291.0, 295.0, 287.0, 295.0, 284.0, 255.0, 267.0, 266.0, 301.0, 295.0, 272.0, 292.0, 287.0, 295.0, 281.0, 285.0, 291.0, 269.0, 304.0, 255.0, 267.0, 279.0, 285.0, 289.0, 290.0, 296.0, 280.0, 279.0, 248.0, 300.0, 279.0, 285.0, 299.0, 298.0, 278.0, 290.0, 294.0, 281.0, 301.0, 304.0, 283.0, 279.0, 297.0, 286.0, 290.0, 257.0, 265.0, 289.0, 293.0, 281.0, 301.0, 280.0, 253.0, 286.0, 296.0, 283.0, 299.0, 263.0, 247.0, 284.0, 289.0, 283.0, 296.0, 267.0, 255.0, 276.0, 251.0, 288.0, 291.0, 267.0, 263.0, 295.0, 278.0, 256.0, 269.0, 293.0, 277.0, 296.0, 283.0, 275.0, 298.0, 288.0, 291.0, 296.0, 274.0, 282.0, 294.0, 283.0, 290.0, 281.0, 298.0, 267.0, 258.0, 284.0, 295.0, 283.0, 299.0, 290.0, 286.0, 301.0, 281.0, 282.0, 291.0, 293.0, 286.0, 260.0, 262.0, 267.0, 255.0, 247.0, 275.0, 290.0, 286.0, 318.0, 312.0, 306.0, 273.0, 256.0, 256.0, 274.0, 299.0, 286.0, 290.0, 290.0, 277.0, 287.0, 283.0, 286.0, 283.0, 296.0, 283.0, 293.0, 283.0, 276.0, 306.0, 273.0, 257.0, 289.0, 290.0, 293.0, 289.0, 301.0, 281.0, 285.0, 282.0, 296.0, 291.0, 292.0, 284.0, 267.0, 255.0, 288.0, 288.0, 296.0, 286.0, 289.0, 293.0, 262.0, 260.0, 304.0, 283.0, 295.0, 284.0, 304.0, 278.0, 297.0, 279.0, 293.0, 289.0, 285.0, 285.0, 221.0, 220.0, 250.0, 272.0, 302.0, 274.0, 293.0, 283.0, 277.0, 299.0, 298.0, 278.0, 291.0, 285.0, 296.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6828569282642821, "mean_inference_ms": 1.2050293820721487, "mean_action_processing_ms": 0.13228236830438264, "mean_env_wait_ms": 0.8363800663320577, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4505600, "num_agent_steps_trained": 4505600, "num_env_steps_sampled": 2252800, "num_env_steps_trained": 2252800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2252800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4505600, "timers": {"training_iteration_time_ms": 3632.017, "learn_time_ms": 1144.529, "learn_throughput": 11183.643, "synch_weights_time_ms": 11.141}, "counters": {"num_env_steps_sampled": 2252800, "num_env_steps_trained": 2252800, "num_agent_steps_sampled": 4505600, "num_agent_steps_trained": 4505600}, "done": false, "episodes_total": 5632, "training_iteration": 176, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-22", "timestamp": 1666581082, "time_this_iter_s": 3.664487361907959, "time_total_s": 659.5825002193451, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 659.5825002193451, "timesteps_since_restore": 0, "iterations_since_restore": 176, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.333333333333332, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 174.91, "shaped_reward_min": 150, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.3, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.76, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.85, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.26, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.72, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.12, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.12, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.26, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.26, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.72, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.12, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.72, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.12, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003441051230765879, "policy_loss": -0.0005928004975430667, "vf_loss": 7.614487171173096, "vf_explained_var": 0.663824200630188, "kl": 0.002044765744358301, "entropy": 1.0255047082901, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2265600, "num_env_steps_trained": 2265600, "num_agent_steps_sampled": 4531200, "num_agent_steps_trained": 4531200}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 441.0, "episode_reward_mean": 567.71, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.855}, "custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 174.91, "shaped_reward_min": 150, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.3, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.76, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.85, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.26, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.72, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.12, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.12, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.26, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.26, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.72, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.12, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.72, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.12, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [533.0, 582.0, 582.0, 510.0, 573.0, 579.0, 522.0, 527.0, 579.0, 530.0, 573.0, 525.0, 570.0, 579.0, 573.0, 579.0, 570.0, 576.0, 573.0, 579.0, 525.0, 579.0, 582.0, 576.0, 582.0, 573.0, 579.0, 522.0, 522.0, 522.0, 576.0, 630.0, 579.0, 512.0, 573.0, 576.0, 567.0, 570.0, 569.0, 579.0, 576.0, 582.0, 530.0, 579.0, 582.0, 582.0, 567.0, 587.0, 576.0, 522.0, 576.0, 582.0, 582.0, 522.0, 587.0, 579.0, 582.0, 576.0, 582.0, 570.0, 441.0, 522.0, 576.0, 576.0, 576.0, 576.0, 576.0, 576.0, 579.0, 522.0, 570.0, 579.0, 579.0, 519.0, 579.0, 576.0, 573.0, 630.0, 576.0, 576.0, 576.0, 587.0, 579.0, 582.0, 576.0, 522.0, 570.0, 579.0, 582.0, 573.0, 579.0, 579.0, 573.0, 584.0, 630.0, 579.0, 582.0, 576.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [280.0, 253.0, 286.0, 296.0, 283.0, 299.0, 263.0, 247.0, 284.0, 289.0, 283.0, 296.0, 267.0, 255.0, 276.0, 251.0, 288.0, 291.0, 267.0, 263.0, 295.0, 278.0, 256.0, 269.0, 293.0, 277.0, 296.0, 283.0, 275.0, 298.0, 288.0, 291.0, 296.0, 274.0, 282.0, 294.0, 283.0, 290.0, 281.0, 298.0, 267.0, 258.0, 284.0, 295.0, 283.0, 299.0, 290.0, 286.0, 301.0, 281.0, 282.0, 291.0, 293.0, 286.0, 260.0, 262.0, 267.0, 255.0, 247.0, 275.0, 290.0, 286.0, 318.0, 312.0, 306.0, 273.0, 256.0, 256.0, 274.0, 299.0, 286.0, 290.0, 290.0, 277.0, 287.0, 283.0, 286.0, 283.0, 296.0, 283.0, 293.0, 283.0, 276.0, 306.0, 273.0, 257.0, 289.0, 290.0, 293.0, 289.0, 301.0, 281.0, 285.0, 282.0, 296.0, 291.0, 292.0, 284.0, 267.0, 255.0, 288.0, 288.0, 296.0, 286.0, 289.0, 293.0, 262.0, 260.0, 304.0, 283.0, 295.0, 284.0, 304.0, 278.0, 297.0, 279.0, 293.0, 289.0, 285.0, 285.0, 221.0, 220.0, 250.0, 272.0, 302.0, 274.0, 293.0, 283.0, 277.0, 299.0, 298.0, 278.0, 291.0, 285.0, 296.0, 280.0, 296.0, 283.0, 260.0, 262.0, 281.0, 289.0, 298.0, 281.0, 304.0, 275.0, 272.0, 247.0, 286.0, 293.0, 288.0, 288.0, 281.0, 292.0, 311.0, 319.0, 290.0, 286.0, 287.0, 289.0, 300.0, 276.0, 308.0, 279.0, 297.0, 282.0, 293.0, 289.0, 293.0, 283.0, 277.0, 245.0, 291.0, 279.0, 295.0, 284.0, 295.0, 287.0, 287.0, 286.0, 290.0, 289.0, 287.0, 292.0, 284.0, 289.0, 285.0, 299.0, 321.0, 309.0, 291.0, 288.0, 286.0, 296.0, 275.0, 301.0, 279.0, 300.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6828439834282318, "mean_inference_ms": 1.2049497279210897, "mean_action_processing_ms": 0.13228139387451182, "mean_env_wait_ms": 0.8362641339684356, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 441.0, "episode_reward_mean": 567.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.855}, "hist_stats": {"episode_reward": [533.0, 582.0, 582.0, 510.0, 573.0, 579.0, 522.0, 527.0, 579.0, 530.0, 573.0, 525.0, 570.0, 579.0, 573.0, 579.0, 570.0, 576.0, 573.0, 579.0, 525.0, 579.0, 582.0, 576.0, 582.0, 573.0, 579.0, 522.0, 522.0, 522.0, 576.0, 630.0, 579.0, 512.0, 573.0, 576.0, 567.0, 570.0, 569.0, 579.0, 576.0, 582.0, 530.0, 579.0, 582.0, 582.0, 567.0, 587.0, 576.0, 522.0, 576.0, 582.0, 582.0, 522.0, 587.0, 579.0, 582.0, 576.0, 582.0, 570.0, 441.0, 522.0, 576.0, 576.0, 576.0, 576.0, 576.0, 576.0, 579.0, 522.0, 570.0, 579.0, 579.0, 519.0, 579.0, 576.0, 573.0, 630.0, 576.0, 576.0, 576.0, 587.0, 579.0, 582.0, 576.0, 522.0, 570.0, 579.0, 582.0, 573.0, 579.0, 579.0, 573.0, 584.0, 630.0, 579.0, 582.0, 576.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [280.0, 253.0, 286.0, 296.0, 283.0, 299.0, 263.0, 247.0, 284.0, 289.0, 283.0, 296.0, 267.0, 255.0, 276.0, 251.0, 288.0, 291.0, 267.0, 263.0, 295.0, 278.0, 256.0, 269.0, 293.0, 277.0, 296.0, 283.0, 275.0, 298.0, 288.0, 291.0, 296.0, 274.0, 282.0, 294.0, 283.0, 290.0, 281.0, 298.0, 267.0, 258.0, 284.0, 295.0, 283.0, 299.0, 290.0, 286.0, 301.0, 281.0, 282.0, 291.0, 293.0, 286.0, 260.0, 262.0, 267.0, 255.0, 247.0, 275.0, 290.0, 286.0, 318.0, 312.0, 306.0, 273.0, 256.0, 256.0, 274.0, 299.0, 286.0, 290.0, 290.0, 277.0, 287.0, 283.0, 286.0, 283.0, 296.0, 283.0, 293.0, 283.0, 276.0, 306.0, 273.0, 257.0, 289.0, 290.0, 293.0, 289.0, 301.0, 281.0, 285.0, 282.0, 296.0, 291.0, 292.0, 284.0, 267.0, 255.0, 288.0, 288.0, 296.0, 286.0, 289.0, 293.0, 262.0, 260.0, 304.0, 283.0, 295.0, 284.0, 304.0, 278.0, 297.0, 279.0, 293.0, 289.0, 285.0, 285.0, 221.0, 220.0, 250.0, 272.0, 302.0, 274.0, 293.0, 283.0, 277.0, 299.0, 298.0, 278.0, 291.0, 285.0, 296.0, 280.0, 296.0, 283.0, 260.0, 262.0, 281.0, 289.0, 298.0, 281.0, 304.0, 275.0, 272.0, 247.0, 286.0, 293.0, 288.0, 288.0, 281.0, 292.0, 311.0, 319.0, 290.0, 286.0, 287.0, 289.0, 300.0, 276.0, 308.0, 279.0, 297.0, 282.0, 293.0, 289.0, 293.0, 283.0, 277.0, 245.0, 291.0, 279.0, 295.0, 284.0, 295.0, 287.0, 287.0, 286.0, 290.0, 289.0, 287.0, 292.0, 284.0, 289.0, 285.0, 299.0, 321.0, 309.0, 291.0, 288.0, 286.0, 296.0, 275.0, 301.0, 279.0, 300.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6828439834282318, "mean_inference_ms": 1.2049497279210897, "mean_action_processing_ms": 0.13228139387451182, "mean_env_wait_ms": 0.8362641339684356, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4531200, "num_agent_steps_trained": 4531200, "num_env_steps_sampled": 2265600, "num_env_steps_trained": 2265600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2265600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4531200, "timers": {"training_iteration_time_ms": 3632.949, "learn_time_ms": 1142.508, "learn_throughput": 11203.42, "synch_weights_time_ms": 11.479}, "counters": {"num_env_steps_sampled": 2265600, "num_env_steps_trained": 2265600, "num_agent_steps_sampled": 4531200, "num_agent_steps_trained": 4531200}, "done": false, "episodes_total": 5664, "training_iteration": 177, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-26", "timestamp": 1666581086, "time_this_iter_s": 3.659736156463623, "time_total_s": 663.2422363758087, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 663.2422363758087, "timesteps_since_restore": 0, "iterations_since_restore": 177, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.020000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.13, "shaped_reward_min": 152, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.2, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.91, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.78, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.46, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.74, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.31, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.36, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.6, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.36, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.74, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.31, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.74, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.31, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015791004989296198, "policy_loss": 0.0013302306178957224, "vf_loss": 7.5834221839904785, "vf_explained_var": 0.6672846078872681, "kl": 0.0019746399484574795, "entropy": 1.0189414024353027, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2278400, "num_env_steps_trained": 2278400, "num_agent_steps_sampled": 4556800, "num_agent_steps_trained": 4556800}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 441.0, "episode_reward_mean": 572.13, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 286.065}, "custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.13, "shaped_reward_min": 152, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.2, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.91, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.78, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.46, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.74, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.31, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.36, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.6, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.36, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.74, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.31, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.74, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.31, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 512.0, 573.0, 576.0, 567.0, 570.0, 569.0, 579.0, 576.0, 582.0, 530.0, 579.0, 582.0, 582.0, 567.0, 587.0, 576.0, 522.0, 576.0, 582.0, 582.0, 522.0, 587.0, 579.0, 582.0, 576.0, 582.0, 570.0, 441.0, 522.0, 576.0, 576.0, 576.0, 576.0, 576.0, 576.0, 579.0, 522.0, 570.0, 579.0, 579.0, 519.0, 579.0, 576.0, 573.0, 630.0, 576.0, 576.0, 576.0, 587.0, 579.0, 582.0, 576.0, 522.0, 570.0, 579.0, 582.0, 573.0, 579.0, 579.0, 573.0, 584.0, 630.0, 579.0, 582.0, 576.0, 579.0, 579.0, 579.0, 573.0, 570.0, 579.0, 576.0, 579.0, 630.0, 576.0, 576.0, 579.0, 579.0, 536.0, 573.0, 627.0, 587.0, 522.0, 579.0, 582.0, 579.0, 582.0, 576.0, 573.0, 576.0, 582.0, 579.0, 570.0, 573.0, 582.0, 576.0, 519.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [306.0, 273.0, 256.0, 256.0, 274.0, 299.0, 286.0, 290.0, 290.0, 277.0, 287.0, 283.0, 286.0, 283.0, 296.0, 283.0, 293.0, 283.0, 276.0, 306.0, 273.0, 257.0, 289.0, 290.0, 293.0, 289.0, 301.0, 281.0, 285.0, 282.0, 296.0, 291.0, 292.0, 284.0, 267.0, 255.0, 288.0, 288.0, 296.0, 286.0, 289.0, 293.0, 262.0, 260.0, 304.0, 283.0, 295.0, 284.0, 304.0, 278.0, 297.0, 279.0, 293.0, 289.0, 285.0, 285.0, 221.0, 220.0, 250.0, 272.0, 302.0, 274.0, 293.0, 283.0, 277.0, 299.0, 298.0, 278.0, 291.0, 285.0, 296.0, 280.0, 296.0, 283.0, 260.0, 262.0, 281.0, 289.0, 298.0, 281.0, 304.0, 275.0, 272.0, 247.0, 286.0, 293.0, 288.0, 288.0, 281.0, 292.0, 311.0, 319.0, 290.0, 286.0, 287.0, 289.0, 300.0, 276.0, 308.0, 279.0, 297.0, 282.0, 293.0, 289.0, 293.0, 283.0, 277.0, 245.0, 291.0, 279.0, 295.0, 284.0, 295.0, 287.0, 287.0, 286.0, 290.0, 289.0, 287.0, 292.0, 284.0, 289.0, 285.0, 299.0, 321.0, 309.0, 291.0, 288.0, 286.0, 296.0, 275.0, 301.0, 279.0, 300.0, 289.0, 290.0, 293.0, 286.0, 298.0, 275.0, 277.0, 293.0, 293.0, 286.0, 288.0, 288.0, 282.0, 297.0, 321.0, 309.0, 292.0, 284.0, 283.0, 293.0, 288.0, 291.0, 299.0, 280.0, 278.0, 258.0, 286.0, 287.0, 322.0, 305.0, 288.0, 299.0, 274.0, 248.0, 294.0, 285.0, 280.0, 302.0, 293.0, 286.0, 296.0, 286.0, 293.0, 283.0, 296.0, 277.0, 305.0, 271.0, 276.0, 306.0, 279.0, 300.0, 282.0, 288.0, 283.0, 290.0, 293.0, 289.0, 301.0, 275.0, 269.0, 250.0, 279.0, 297.0, 290.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6828191897271219, "mean_inference_ms": 1.204880157997455, "mean_action_processing_ms": 0.13227938063631006, "mean_env_wait_ms": 0.8361496603307731, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 441.0, "episode_reward_mean": 572.13, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 286.065}, "hist_stats": {"episode_reward": [579.0, 512.0, 573.0, 576.0, 567.0, 570.0, 569.0, 579.0, 576.0, 582.0, 530.0, 579.0, 582.0, 582.0, 567.0, 587.0, 576.0, 522.0, 576.0, 582.0, 582.0, 522.0, 587.0, 579.0, 582.0, 576.0, 582.0, 570.0, 441.0, 522.0, 576.0, 576.0, 576.0, 576.0, 576.0, 576.0, 579.0, 522.0, 570.0, 579.0, 579.0, 519.0, 579.0, 576.0, 573.0, 630.0, 576.0, 576.0, 576.0, 587.0, 579.0, 582.0, 576.0, 522.0, 570.0, 579.0, 582.0, 573.0, 579.0, 579.0, 573.0, 584.0, 630.0, 579.0, 582.0, 576.0, 579.0, 579.0, 579.0, 573.0, 570.0, 579.0, 576.0, 579.0, 630.0, 576.0, 576.0, 579.0, 579.0, 536.0, 573.0, 627.0, 587.0, 522.0, 579.0, 582.0, 579.0, 582.0, 576.0, 573.0, 576.0, 582.0, 579.0, 570.0, 573.0, 582.0, 576.0, 519.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [306.0, 273.0, 256.0, 256.0, 274.0, 299.0, 286.0, 290.0, 290.0, 277.0, 287.0, 283.0, 286.0, 283.0, 296.0, 283.0, 293.0, 283.0, 276.0, 306.0, 273.0, 257.0, 289.0, 290.0, 293.0, 289.0, 301.0, 281.0, 285.0, 282.0, 296.0, 291.0, 292.0, 284.0, 267.0, 255.0, 288.0, 288.0, 296.0, 286.0, 289.0, 293.0, 262.0, 260.0, 304.0, 283.0, 295.0, 284.0, 304.0, 278.0, 297.0, 279.0, 293.0, 289.0, 285.0, 285.0, 221.0, 220.0, 250.0, 272.0, 302.0, 274.0, 293.0, 283.0, 277.0, 299.0, 298.0, 278.0, 291.0, 285.0, 296.0, 280.0, 296.0, 283.0, 260.0, 262.0, 281.0, 289.0, 298.0, 281.0, 304.0, 275.0, 272.0, 247.0, 286.0, 293.0, 288.0, 288.0, 281.0, 292.0, 311.0, 319.0, 290.0, 286.0, 287.0, 289.0, 300.0, 276.0, 308.0, 279.0, 297.0, 282.0, 293.0, 289.0, 293.0, 283.0, 277.0, 245.0, 291.0, 279.0, 295.0, 284.0, 295.0, 287.0, 287.0, 286.0, 290.0, 289.0, 287.0, 292.0, 284.0, 289.0, 285.0, 299.0, 321.0, 309.0, 291.0, 288.0, 286.0, 296.0, 275.0, 301.0, 279.0, 300.0, 289.0, 290.0, 293.0, 286.0, 298.0, 275.0, 277.0, 293.0, 293.0, 286.0, 288.0, 288.0, 282.0, 297.0, 321.0, 309.0, 292.0, 284.0, 283.0, 293.0, 288.0, 291.0, 299.0, 280.0, 278.0, 258.0, 286.0, 287.0, 322.0, 305.0, 288.0, 299.0, 274.0, 248.0, 294.0, 285.0, 280.0, 302.0, 293.0, 286.0, 296.0, 286.0, 293.0, 283.0, 296.0, 277.0, 305.0, 271.0, 276.0, 306.0, 279.0, 300.0, 282.0, 288.0, 283.0, 290.0, 293.0, 289.0, 301.0, 275.0, 269.0, 250.0, 279.0, 297.0, 290.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6828191897271219, "mean_inference_ms": 1.204880157997455, "mean_action_processing_ms": 0.13227938063631006, "mean_env_wait_ms": 0.8361496603307731, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4556800, "num_agent_steps_trained": 4556800, "num_env_steps_sampled": 2278400, "num_env_steps_trained": 2278400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2278400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4556800, "timers": {"training_iteration_time_ms": 3626.15, "learn_time_ms": 1137.311, "learn_throughput": 11254.617, "synch_weights_time_ms": 10.795}, "counters": {"num_env_steps_sampled": 2278400, "num_env_steps_trained": 2278400, "num_agent_steps_sampled": 4556800, "num_agent_steps_trained": 4556800}, "done": false, "episodes_total": 5696, "training_iteration": 178, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-30", "timestamp": 1666581090, "time_this_iter_s": 3.7108657360076904, "time_total_s": 666.9531021118164, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 666.9531021118164, "timesteps_since_restore": 0, "iterations_since_restore": 178, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.7, "ram_util_percent": 10.633333333333333}}
+{"custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.01, "shaped_reward_min": 158, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.13, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.94, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.75, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.5, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.72, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.29, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.71, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.18, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.25, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 15.72, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.29, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.72, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.29, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0018393670907244086, "policy_loss": -0.0020911097526550293, "vf_loss": 7.623180389404297, "vf_explained_var": 0.6745402812957764, "kl": 0.0019281021086499095, "entropy": 1.021147608757019, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2291200, "num_env_steps_trained": 2291200, "num_agent_steps_sampled": 4582400, "num_agent_steps_trained": 4582400}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 519.0, "episode_reward_mean": 572.01, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 245.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 286.005}, "custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.01, "shaped_reward_min": 158, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.13, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.94, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.75, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.5, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.72, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.29, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.71, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.18, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.25, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 15.72, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.29, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.72, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.29, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 576.0, 576.0, 576.0, 579.0, 522.0, 570.0, 579.0, 579.0, 519.0, 579.0, 576.0, 573.0, 630.0, 576.0, 576.0, 576.0, 587.0, 579.0, 582.0, 576.0, 522.0, 570.0, 579.0, 582.0, 573.0, 579.0, 579.0, 573.0, 584.0, 630.0, 579.0, 582.0, 576.0, 579.0, 579.0, 579.0, 573.0, 570.0, 579.0, 576.0, 579.0, 630.0, 576.0, 576.0, 579.0, 579.0, 536.0, 573.0, 627.0, 587.0, 522.0, 579.0, 582.0, 579.0, 582.0, 576.0, 573.0, 576.0, 582.0, 579.0, 570.0, 573.0, 582.0, 576.0, 519.0, 576.0, 579.0, 576.0, 570.0, 582.0, 573.0, 567.0, 579.0, 527.0, 582.0, 576.0, 525.0, 576.0, 582.0, 579.0, 533.0, 573.0, 573.0, 579.0, 573.0, 581.0, 582.0, 579.0, 582.0, 530.0, 579.0, 522.0, 530.0, 576.0, 525.0, 558.0, 579.0, 519.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [277.0, 299.0, 298.0, 278.0, 291.0, 285.0, 296.0, 280.0, 296.0, 283.0, 260.0, 262.0, 281.0, 289.0, 298.0, 281.0, 304.0, 275.0, 272.0, 247.0, 286.0, 293.0, 288.0, 288.0, 281.0, 292.0, 311.0, 319.0, 290.0, 286.0, 287.0, 289.0, 300.0, 276.0, 308.0, 279.0, 297.0, 282.0, 293.0, 289.0, 293.0, 283.0, 277.0, 245.0, 291.0, 279.0, 295.0, 284.0, 295.0, 287.0, 287.0, 286.0, 290.0, 289.0, 287.0, 292.0, 284.0, 289.0, 285.0, 299.0, 321.0, 309.0, 291.0, 288.0, 286.0, 296.0, 275.0, 301.0, 279.0, 300.0, 289.0, 290.0, 293.0, 286.0, 298.0, 275.0, 277.0, 293.0, 293.0, 286.0, 288.0, 288.0, 282.0, 297.0, 321.0, 309.0, 292.0, 284.0, 283.0, 293.0, 288.0, 291.0, 299.0, 280.0, 278.0, 258.0, 286.0, 287.0, 322.0, 305.0, 288.0, 299.0, 274.0, 248.0, 294.0, 285.0, 280.0, 302.0, 293.0, 286.0, 296.0, 286.0, 293.0, 283.0, 296.0, 277.0, 305.0, 271.0, 276.0, 306.0, 279.0, 300.0, 282.0, 288.0, 283.0, 290.0, 293.0, 289.0, 301.0, 275.0, 269.0, 250.0, 279.0, 297.0, 290.0, 289.0, 295.0, 281.0, 285.0, 285.0, 289.0, 293.0, 279.0, 294.0, 282.0, 285.0, 286.0, 293.0, 246.0, 281.0, 296.0, 286.0, 293.0, 283.0, 253.0, 272.0, 287.0, 289.0, 291.0, 291.0, 285.0, 294.0, 268.0, 265.0, 281.0, 292.0, 282.0, 291.0, 282.0, 297.0, 299.0, 274.0, 289.0, 292.0, 286.0, 296.0, 289.0, 290.0, 295.0, 287.0, 272.0, 258.0, 290.0, 289.0, 260.0, 262.0, 261.0, 269.0, 276.0, 300.0, 259.0, 266.0, 290.0, 268.0, 278.0, 301.0, 257.0, 262.0, 284.0, 298.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6828123814988936, "mean_inference_ms": 1.2051653630779808, "mean_action_processing_ms": 0.13228301841831588, "mean_env_wait_ms": 0.8362093901992204, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 519.0, "episode_reward_mean": 572.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 245.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 286.005}, "hist_stats": {"episode_reward": [576.0, 576.0, 576.0, 576.0, 579.0, 522.0, 570.0, 579.0, 579.0, 519.0, 579.0, 576.0, 573.0, 630.0, 576.0, 576.0, 576.0, 587.0, 579.0, 582.0, 576.0, 522.0, 570.0, 579.0, 582.0, 573.0, 579.0, 579.0, 573.0, 584.0, 630.0, 579.0, 582.0, 576.0, 579.0, 579.0, 579.0, 573.0, 570.0, 579.0, 576.0, 579.0, 630.0, 576.0, 576.0, 579.0, 579.0, 536.0, 573.0, 627.0, 587.0, 522.0, 579.0, 582.0, 579.0, 582.0, 576.0, 573.0, 576.0, 582.0, 579.0, 570.0, 573.0, 582.0, 576.0, 519.0, 576.0, 579.0, 576.0, 570.0, 582.0, 573.0, 567.0, 579.0, 527.0, 582.0, 576.0, 525.0, 576.0, 582.0, 579.0, 533.0, 573.0, 573.0, 579.0, 573.0, 581.0, 582.0, 579.0, 582.0, 530.0, 579.0, 522.0, 530.0, 576.0, 525.0, 558.0, 579.0, 519.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [277.0, 299.0, 298.0, 278.0, 291.0, 285.0, 296.0, 280.0, 296.0, 283.0, 260.0, 262.0, 281.0, 289.0, 298.0, 281.0, 304.0, 275.0, 272.0, 247.0, 286.0, 293.0, 288.0, 288.0, 281.0, 292.0, 311.0, 319.0, 290.0, 286.0, 287.0, 289.0, 300.0, 276.0, 308.0, 279.0, 297.0, 282.0, 293.0, 289.0, 293.0, 283.0, 277.0, 245.0, 291.0, 279.0, 295.0, 284.0, 295.0, 287.0, 287.0, 286.0, 290.0, 289.0, 287.0, 292.0, 284.0, 289.0, 285.0, 299.0, 321.0, 309.0, 291.0, 288.0, 286.0, 296.0, 275.0, 301.0, 279.0, 300.0, 289.0, 290.0, 293.0, 286.0, 298.0, 275.0, 277.0, 293.0, 293.0, 286.0, 288.0, 288.0, 282.0, 297.0, 321.0, 309.0, 292.0, 284.0, 283.0, 293.0, 288.0, 291.0, 299.0, 280.0, 278.0, 258.0, 286.0, 287.0, 322.0, 305.0, 288.0, 299.0, 274.0, 248.0, 294.0, 285.0, 280.0, 302.0, 293.0, 286.0, 296.0, 286.0, 293.0, 283.0, 296.0, 277.0, 305.0, 271.0, 276.0, 306.0, 279.0, 300.0, 282.0, 288.0, 283.0, 290.0, 293.0, 289.0, 301.0, 275.0, 269.0, 250.0, 279.0, 297.0, 290.0, 289.0, 295.0, 281.0, 285.0, 285.0, 289.0, 293.0, 279.0, 294.0, 282.0, 285.0, 286.0, 293.0, 246.0, 281.0, 296.0, 286.0, 293.0, 283.0, 253.0, 272.0, 287.0, 289.0, 291.0, 291.0, 285.0, 294.0, 268.0, 265.0, 281.0, 292.0, 282.0, 291.0, 282.0, 297.0, 299.0, 274.0, 289.0, 292.0, 286.0, 296.0, 289.0, 290.0, 295.0, 287.0, 272.0, 258.0, 290.0, 289.0, 260.0, 262.0, 261.0, 269.0, 276.0, 300.0, 259.0, 266.0, 290.0, 268.0, 278.0, 301.0, 257.0, 262.0, 284.0, 298.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6828123814988936, "mean_inference_ms": 1.2051653630779808, "mean_action_processing_ms": 0.13228301841831588, "mean_env_wait_ms": 0.8362093901992204, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4582400, "num_agent_steps_trained": 4582400, "num_env_steps_sampled": 2291200, "num_env_steps_trained": 2291200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2291200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4582400, "timers": {"training_iteration_time_ms": 3645.952, "learn_time_ms": 1141.641, "learn_throughput": 11211.928, "synch_weights_time_ms": 11.207}, "counters": {"num_env_steps_sampled": 2291200, "num_env_steps_trained": 2291200, "num_agent_steps_sampled": 4582400, "num_agent_steps_trained": 4582400}, "done": false, "episodes_total": 5728, "training_iteration": 179, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-34", "timestamp": 1666581094, "time_this_iter_s": 3.916750431060791, "time_total_s": 670.8698525428772, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 670.8698525428772, "timesteps_since_restore": 0, "iterations_since_restore": 179, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 22.25, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 175.98, "shaped_reward_min": 158, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.88, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.18, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.49, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.77, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.52, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.52, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.76, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.03, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.78, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.8, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.71, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 15.52, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.52, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.52, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.52, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010324614122509956, "policy_loss": -0.0012710680020973086, "vf_loss": 7.529854774475098, "vf_explained_var": 0.6815111041069031, "kl": 0.0017792152939364314, "entropy": 1.0287561416625977, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2304000, "num_env_steps_trained": 2304000, "num_agent_steps_sampled": 4608000, "num_agent_steps_trained": 4608000}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 519.0, "episode_reward_mean": 571.18, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 246.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 285.59}, "custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 175.98, "shaped_reward_min": 158, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.88, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.18, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.49, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.77, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.52, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.52, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.76, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.03, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.78, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.8, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.71, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 15.52, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.52, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.52, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.52, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 576.0, 579.0, 579.0, 579.0, 573.0, 570.0, 579.0, 576.0, 579.0, 630.0, 576.0, 576.0, 579.0, 579.0, 536.0, 573.0, 627.0, 587.0, 522.0, 579.0, 582.0, 579.0, 582.0, 576.0, 573.0, 576.0, 582.0, 579.0, 570.0, 573.0, 582.0, 576.0, 519.0, 576.0, 579.0, 576.0, 570.0, 582.0, 573.0, 567.0, 579.0, 527.0, 582.0, 576.0, 525.0, 576.0, 582.0, 579.0, 533.0, 573.0, 573.0, 579.0, 573.0, 581.0, 582.0, 579.0, 582.0, 530.0, 579.0, 522.0, 530.0, 576.0, 525.0, 558.0, 579.0, 519.0, 582.0, 579.0, 587.0, 582.0, 573.0, 576.0, 519.0, 576.0, 582.0, 587.0, 576.0, 570.0, 579.0, 576.0, 533.0, 579.0, 561.0, 576.0, 579.0, 579.0, 582.0, 573.0, 579.0, 573.0, 579.0, 582.0, 579.0, 564.0, 576.0, 533.0, 582.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 296.0, 275.0, 301.0, 279.0, 300.0, 289.0, 290.0, 293.0, 286.0, 298.0, 275.0, 277.0, 293.0, 293.0, 286.0, 288.0, 288.0, 282.0, 297.0, 321.0, 309.0, 292.0, 284.0, 283.0, 293.0, 288.0, 291.0, 299.0, 280.0, 278.0, 258.0, 286.0, 287.0, 322.0, 305.0, 288.0, 299.0, 274.0, 248.0, 294.0, 285.0, 280.0, 302.0, 293.0, 286.0, 296.0, 286.0, 293.0, 283.0, 296.0, 277.0, 305.0, 271.0, 276.0, 306.0, 279.0, 300.0, 282.0, 288.0, 283.0, 290.0, 293.0, 289.0, 301.0, 275.0, 269.0, 250.0, 279.0, 297.0, 290.0, 289.0, 295.0, 281.0, 285.0, 285.0, 289.0, 293.0, 279.0, 294.0, 282.0, 285.0, 286.0, 293.0, 246.0, 281.0, 296.0, 286.0, 293.0, 283.0, 253.0, 272.0, 287.0, 289.0, 291.0, 291.0, 285.0, 294.0, 268.0, 265.0, 281.0, 292.0, 282.0, 291.0, 282.0, 297.0, 299.0, 274.0, 289.0, 292.0, 286.0, 296.0, 289.0, 290.0, 295.0, 287.0, 272.0, 258.0, 290.0, 289.0, 260.0, 262.0, 261.0, 269.0, 276.0, 300.0, 259.0, 266.0, 290.0, 268.0, 278.0, 301.0, 257.0, 262.0, 284.0, 298.0, 291.0, 288.0, 301.0, 286.0, 292.0, 290.0, 285.0, 288.0, 293.0, 283.0, 270.0, 249.0, 297.0, 279.0, 281.0, 301.0, 293.0, 294.0, 297.0, 279.0, 294.0, 276.0, 287.0, 292.0, 278.0, 298.0, 285.0, 248.0, 301.0, 278.0, 287.0, 274.0, 294.0, 282.0, 288.0, 291.0, 270.0, 309.0, 296.0, 286.0, 288.0, 285.0, 289.0, 290.0, 290.0, 283.0, 286.0, 293.0, 278.0, 304.0, 290.0, 289.0, 285.0, 279.0, 303.0, 273.0, 266.0, 267.0, 291.0, 291.0, 291.0, 288.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6827948903707423, "mean_inference_ms": 1.2054504095198442, "mean_action_processing_ms": 0.13228362079130832, "mean_env_wait_ms": 0.8362570836763952, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 519.0, "episode_reward_mean": 571.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 246.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 285.59}, "hist_stats": {"episode_reward": [582.0, 576.0, 579.0, 579.0, 579.0, 573.0, 570.0, 579.0, 576.0, 579.0, 630.0, 576.0, 576.0, 579.0, 579.0, 536.0, 573.0, 627.0, 587.0, 522.0, 579.0, 582.0, 579.0, 582.0, 576.0, 573.0, 576.0, 582.0, 579.0, 570.0, 573.0, 582.0, 576.0, 519.0, 576.0, 579.0, 576.0, 570.0, 582.0, 573.0, 567.0, 579.0, 527.0, 582.0, 576.0, 525.0, 576.0, 582.0, 579.0, 533.0, 573.0, 573.0, 579.0, 573.0, 581.0, 582.0, 579.0, 582.0, 530.0, 579.0, 522.0, 530.0, 576.0, 525.0, 558.0, 579.0, 519.0, 582.0, 579.0, 587.0, 582.0, 573.0, 576.0, 519.0, 576.0, 582.0, 587.0, 576.0, 570.0, 579.0, 576.0, 533.0, 579.0, 561.0, 576.0, 579.0, 579.0, 582.0, 573.0, 579.0, 573.0, 579.0, 582.0, 579.0, 564.0, 576.0, 533.0, 582.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 296.0, 275.0, 301.0, 279.0, 300.0, 289.0, 290.0, 293.0, 286.0, 298.0, 275.0, 277.0, 293.0, 293.0, 286.0, 288.0, 288.0, 282.0, 297.0, 321.0, 309.0, 292.0, 284.0, 283.0, 293.0, 288.0, 291.0, 299.0, 280.0, 278.0, 258.0, 286.0, 287.0, 322.0, 305.0, 288.0, 299.0, 274.0, 248.0, 294.0, 285.0, 280.0, 302.0, 293.0, 286.0, 296.0, 286.0, 293.0, 283.0, 296.0, 277.0, 305.0, 271.0, 276.0, 306.0, 279.0, 300.0, 282.0, 288.0, 283.0, 290.0, 293.0, 289.0, 301.0, 275.0, 269.0, 250.0, 279.0, 297.0, 290.0, 289.0, 295.0, 281.0, 285.0, 285.0, 289.0, 293.0, 279.0, 294.0, 282.0, 285.0, 286.0, 293.0, 246.0, 281.0, 296.0, 286.0, 293.0, 283.0, 253.0, 272.0, 287.0, 289.0, 291.0, 291.0, 285.0, 294.0, 268.0, 265.0, 281.0, 292.0, 282.0, 291.0, 282.0, 297.0, 299.0, 274.0, 289.0, 292.0, 286.0, 296.0, 289.0, 290.0, 295.0, 287.0, 272.0, 258.0, 290.0, 289.0, 260.0, 262.0, 261.0, 269.0, 276.0, 300.0, 259.0, 266.0, 290.0, 268.0, 278.0, 301.0, 257.0, 262.0, 284.0, 298.0, 291.0, 288.0, 301.0, 286.0, 292.0, 290.0, 285.0, 288.0, 293.0, 283.0, 270.0, 249.0, 297.0, 279.0, 281.0, 301.0, 293.0, 294.0, 297.0, 279.0, 294.0, 276.0, 287.0, 292.0, 278.0, 298.0, 285.0, 248.0, 301.0, 278.0, 287.0, 274.0, 294.0, 282.0, 288.0, 291.0, 270.0, 309.0, 296.0, 286.0, 288.0, 285.0, 289.0, 290.0, 290.0, 283.0, 286.0, 293.0, 278.0, 304.0, 290.0, 289.0, 285.0, 279.0, 303.0, 273.0, 266.0, 267.0, 291.0, 291.0, 291.0, 288.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6827948903707423, "mean_inference_ms": 1.2054504095198442, "mean_action_processing_ms": 0.13228362079130832, "mean_env_wait_ms": 0.8362570836763952, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4608000, "num_agent_steps_trained": 4608000, "num_env_steps_sampled": 2304000, "num_env_steps_trained": 2304000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2304000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4608000, "timers": {"training_iteration_time_ms": 3650.521, "learn_time_ms": 1140.233, "learn_throughput": 11225.772, "synch_weights_time_ms": 11.194}, "counters": {"num_env_steps_sampled": 2304000, "num_env_steps_trained": 2304000, "num_agent_steps_sampled": 4608000, "num_agent_steps_trained": 4608000}, "done": false, "episodes_total": 5760, "training_iteration": 180, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-38", "timestamp": 1666581098, "time_this_iter_s": 3.7083230018615723, "time_total_s": 674.5781755447388, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 674.5781755447388, "timesteps_since_restore": 0, "iterations_since_restore": 180, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.28, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.2, "shaped_reward_min": 153, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.75, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.24, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.34, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.84, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.32, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.45, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.1, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 15.32, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.45, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.32, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.45, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019686259329319, "policy_loss": 0.0017239430453628302, "vf_loss": 7.565124034881592, "vf_explained_var": 0.6716065406799316, "kl": 0.00241199042648077, "entropy": 1.023657202720642, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2316800, "num_env_steps_trained": 2316800, "num_agent_steps_sampled": 4633600, "num_agent_steps_trained": 4633600}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 493.0, "episode_reward_mean": 565.8, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 243.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 282.9}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.2, "shaped_reward_min": 153, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.75, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.24, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.34, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.84, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.32, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.45, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.1, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 15.32, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.45, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.32, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.45, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 519.0, 576.0, 579.0, 576.0, 570.0, 582.0, 573.0, 567.0, 579.0, 527.0, 582.0, 576.0, 525.0, 576.0, 582.0, 579.0, 533.0, 573.0, 573.0, 579.0, 573.0, 581.0, 582.0, 579.0, 582.0, 530.0, 579.0, 522.0, 530.0, 576.0, 525.0, 558.0, 579.0, 519.0, 582.0, 579.0, 587.0, 582.0, 573.0, 576.0, 519.0, 576.0, 582.0, 587.0, 576.0, 570.0, 579.0, 576.0, 533.0, 579.0, 561.0, 576.0, 579.0, 579.0, 582.0, 573.0, 579.0, 573.0, 579.0, 582.0, 579.0, 564.0, 576.0, 533.0, 582.0, 579.0, 579.0, 522.0, 570.0, 493.0, 513.0, 570.0, 579.0, 530.0, 573.0, 570.0, 522.0, 576.0, 576.0, 573.0, 525.0, 579.0, 579.0, 522.0, 576.0, 576.0, 573.0, 573.0, 576.0, 567.0, 573.0, 582.0, 630.0, 579.0, 525.0, 519.0, 579.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [301.0, 275.0, 269.0, 250.0, 279.0, 297.0, 290.0, 289.0, 295.0, 281.0, 285.0, 285.0, 289.0, 293.0, 279.0, 294.0, 282.0, 285.0, 286.0, 293.0, 246.0, 281.0, 296.0, 286.0, 293.0, 283.0, 253.0, 272.0, 287.0, 289.0, 291.0, 291.0, 285.0, 294.0, 268.0, 265.0, 281.0, 292.0, 282.0, 291.0, 282.0, 297.0, 299.0, 274.0, 289.0, 292.0, 286.0, 296.0, 289.0, 290.0, 295.0, 287.0, 272.0, 258.0, 290.0, 289.0, 260.0, 262.0, 261.0, 269.0, 276.0, 300.0, 259.0, 266.0, 290.0, 268.0, 278.0, 301.0, 257.0, 262.0, 284.0, 298.0, 291.0, 288.0, 301.0, 286.0, 292.0, 290.0, 285.0, 288.0, 293.0, 283.0, 270.0, 249.0, 297.0, 279.0, 281.0, 301.0, 293.0, 294.0, 297.0, 279.0, 294.0, 276.0, 287.0, 292.0, 278.0, 298.0, 285.0, 248.0, 301.0, 278.0, 287.0, 274.0, 294.0, 282.0, 288.0, 291.0, 270.0, 309.0, 296.0, 286.0, 288.0, 285.0, 289.0, 290.0, 290.0, 283.0, 286.0, 293.0, 278.0, 304.0, 290.0, 289.0, 285.0, 279.0, 303.0, 273.0, 266.0, 267.0, 291.0, 291.0, 291.0, 288.0, 291.0, 288.0, 257.0, 265.0, 294.0, 276.0, 243.0, 250.0, 261.0, 252.0, 289.0, 281.0, 281.0, 298.0, 279.0, 251.0, 295.0, 278.0, 279.0, 291.0, 257.0, 265.0, 293.0, 283.0, 280.0, 296.0, 281.0, 292.0, 254.0, 271.0, 298.0, 281.0, 288.0, 291.0, 251.0, 271.0, 287.0, 289.0, 286.0, 290.0, 285.0, 288.0, 293.0, 280.0, 278.0, 298.0, 291.0, 276.0, 290.0, 283.0, 291.0, 291.0, 325.0, 305.0, 294.0, 285.0, 272.0, 253.0, 246.0, 273.0, 284.0, 295.0, 293.0, 286.0, 289.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6827979529664868, "mean_inference_ms": 1.2057173897529836, "mean_action_processing_ms": 0.13228127451561433, "mean_env_wait_ms": 0.8362816793672555, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 493.0, "episode_reward_mean": 565.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 243.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 282.9}, "hist_stats": {"episode_reward": [576.0, 519.0, 576.0, 579.0, 576.0, 570.0, 582.0, 573.0, 567.0, 579.0, 527.0, 582.0, 576.0, 525.0, 576.0, 582.0, 579.0, 533.0, 573.0, 573.0, 579.0, 573.0, 581.0, 582.0, 579.0, 582.0, 530.0, 579.0, 522.0, 530.0, 576.0, 525.0, 558.0, 579.0, 519.0, 582.0, 579.0, 587.0, 582.0, 573.0, 576.0, 519.0, 576.0, 582.0, 587.0, 576.0, 570.0, 579.0, 576.0, 533.0, 579.0, 561.0, 576.0, 579.0, 579.0, 582.0, 573.0, 579.0, 573.0, 579.0, 582.0, 579.0, 564.0, 576.0, 533.0, 582.0, 579.0, 579.0, 522.0, 570.0, 493.0, 513.0, 570.0, 579.0, 530.0, 573.0, 570.0, 522.0, 576.0, 576.0, 573.0, 525.0, 579.0, 579.0, 522.0, 576.0, 576.0, 573.0, 573.0, 576.0, 567.0, 573.0, 582.0, 630.0, 579.0, 525.0, 519.0, 579.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [301.0, 275.0, 269.0, 250.0, 279.0, 297.0, 290.0, 289.0, 295.0, 281.0, 285.0, 285.0, 289.0, 293.0, 279.0, 294.0, 282.0, 285.0, 286.0, 293.0, 246.0, 281.0, 296.0, 286.0, 293.0, 283.0, 253.0, 272.0, 287.0, 289.0, 291.0, 291.0, 285.0, 294.0, 268.0, 265.0, 281.0, 292.0, 282.0, 291.0, 282.0, 297.0, 299.0, 274.0, 289.0, 292.0, 286.0, 296.0, 289.0, 290.0, 295.0, 287.0, 272.0, 258.0, 290.0, 289.0, 260.0, 262.0, 261.0, 269.0, 276.0, 300.0, 259.0, 266.0, 290.0, 268.0, 278.0, 301.0, 257.0, 262.0, 284.0, 298.0, 291.0, 288.0, 301.0, 286.0, 292.0, 290.0, 285.0, 288.0, 293.0, 283.0, 270.0, 249.0, 297.0, 279.0, 281.0, 301.0, 293.0, 294.0, 297.0, 279.0, 294.0, 276.0, 287.0, 292.0, 278.0, 298.0, 285.0, 248.0, 301.0, 278.0, 287.0, 274.0, 294.0, 282.0, 288.0, 291.0, 270.0, 309.0, 296.0, 286.0, 288.0, 285.0, 289.0, 290.0, 290.0, 283.0, 286.0, 293.0, 278.0, 304.0, 290.0, 289.0, 285.0, 279.0, 303.0, 273.0, 266.0, 267.0, 291.0, 291.0, 291.0, 288.0, 291.0, 288.0, 257.0, 265.0, 294.0, 276.0, 243.0, 250.0, 261.0, 252.0, 289.0, 281.0, 281.0, 298.0, 279.0, 251.0, 295.0, 278.0, 279.0, 291.0, 257.0, 265.0, 293.0, 283.0, 280.0, 296.0, 281.0, 292.0, 254.0, 271.0, 298.0, 281.0, 288.0, 291.0, 251.0, 271.0, 287.0, 289.0, 286.0, 290.0, 285.0, 288.0, 293.0, 280.0, 278.0, 298.0, 291.0, 276.0, 290.0, 283.0, 291.0, 291.0, 325.0, 305.0, 294.0, 285.0, 272.0, 253.0, 246.0, 273.0, 284.0, 295.0, 293.0, 286.0, 289.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6827979529664868, "mean_inference_ms": 1.2057173897529836, "mean_action_processing_ms": 0.13228127451561433, "mean_env_wait_ms": 0.8362816793672555, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4633600, "num_agent_steps_trained": 4633600, "num_env_steps_sampled": 2316800, "num_env_steps_trained": 2316800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2316800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4633600, "timers": {"training_iteration_time_ms": 3652.906, "learn_time_ms": 1139.757, "learn_throughput": 11230.46, "synch_weights_time_ms": 11.425}, "counters": {"num_env_steps_sampled": 2316800, "num_env_steps_trained": 2316800, "num_agent_steps_sampled": 4633600, "num_agent_steps_trained": 4633600}, "done": false, "episodes_total": 5792, "training_iteration": 181, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-42", "timestamp": 1666581102, "time_this_iter_s": 3.7982163429260254, "time_total_s": 678.3763918876648, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 678.3763918876648, "timesteps_since_restore": 0, "iterations_since_restore": 181, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.683333333333334, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 173.06, "shaped_reward_min": 127, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.53, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.11, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.18, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.74, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.05, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.44, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.78, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.03, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.24, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.53, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.05, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.44, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.05, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.44, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0005980606656521559, "policy_loss": -0.0008600912988185883, "vf_loss": 7.714970588684082, "vf_explained_var": 0.6483292579650879, "kl": 0.0020733263809233904, "entropy": 1.0189299583435059, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2329600, "num_env_steps_trained": 2329600, "num_agent_steps_sampled": 4659200, "num_agent_steps_trained": 4659200}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 327.0, "episode_reward_mean": 561.46, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 157.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 280.73}, "custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 173.06, "shaped_reward_min": 127, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.53, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.11, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.18, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.74, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.05, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.44, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.78, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.03, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.24, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.53, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.05, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.44, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.05, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.44, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [558.0, 579.0, 519.0, 582.0, 579.0, 587.0, 582.0, 573.0, 576.0, 519.0, 576.0, 582.0, 587.0, 576.0, 570.0, 579.0, 576.0, 533.0, 579.0, 561.0, 576.0, 579.0, 579.0, 582.0, 573.0, 579.0, 573.0, 579.0, 582.0, 579.0, 564.0, 576.0, 533.0, 582.0, 579.0, 579.0, 522.0, 570.0, 493.0, 513.0, 570.0, 579.0, 530.0, 573.0, 570.0, 522.0, 576.0, 576.0, 573.0, 525.0, 579.0, 579.0, 522.0, 576.0, 576.0, 573.0, 573.0, 576.0, 567.0, 573.0, 582.0, 630.0, 579.0, 525.0, 519.0, 579.0, 579.0, 573.0, 582.0, 570.0, 521.0, 527.0, 576.0, 425.0, 576.0, 582.0, 327.0, 579.0, 573.0, 576.0, 530.0, 573.0, 576.0, 573.0, 573.0, 579.0, 579.0, 576.0, 522.0, 570.0, 579.0, 519.0, 576.0, 573.0, 573.0, 579.0, 576.0, 530.0, 430.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 268.0, 278.0, 301.0, 257.0, 262.0, 284.0, 298.0, 291.0, 288.0, 301.0, 286.0, 292.0, 290.0, 285.0, 288.0, 293.0, 283.0, 270.0, 249.0, 297.0, 279.0, 281.0, 301.0, 293.0, 294.0, 297.0, 279.0, 294.0, 276.0, 287.0, 292.0, 278.0, 298.0, 285.0, 248.0, 301.0, 278.0, 287.0, 274.0, 294.0, 282.0, 288.0, 291.0, 270.0, 309.0, 296.0, 286.0, 288.0, 285.0, 289.0, 290.0, 290.0, 283.0, 286.0, 293.0, 278.0, 304.0, 290.0, 289.0, 285.0, 279.0, 303.0, 273.0, 266.0, 267.0, 291.0, 291.0, 291.0, 288.0, 291.0, 288.0, 257.0, 265.0, 294.0, 276.0, 243.0, 250.0, 261.0, 252.0, 289.0, 281.0, 281.0, 298.0, 279.0, 251.0, 295.0, 278.0, 279.0, 291.0, 257.0, 265.0, 293.0, 283.0, 280.0, 296.0, 281.0, 292.0, 254.0, 271.0, 298.0, 281.0, 288.0, 291.0, 251.0, 271.0, 287.0, 289.0, 286.0, 290.0, 285.0, 288.0, 293.0, 280.0, 278.0, 298.0, 291.0, 276.0, 290.0, 283.0, 291.0, 291.0, 325.0, 305.0, 294.0, 285.0, 272.0, 253.0, 246.0, 273.0, 284.0, 295.0, 293.0, 286.0, 289.0, 284.0, 293.0, 289.0, 282.0, 288.0, 259.0, 262.0, 262.0, 265.0, 290.0, 286.0, 205.0, 220.0, 276.0, 300.0, 283.0, 299.0, 170.0, 157.0, 290.0, 289.0, 288.0, 285.0, 283.0, 293.0, 265.0, 265.0, 289.0, 284.0, 296.0, 280.0, 290.0, 283.0, 285.0, 288.0, 304.0, 275.0, 299.0, 280.0, 283.0, 293.0, 252.0, 270.0, 293.0, 277.0, 289.0, 290.0, 256.0, 263.0, 296.0, 280.0, 293.0, 280.0, 293.0, 280.0, 283.0, 296.0, 288.0, 288.0, 269.0, 261.0, 218.0, 212.0, 312.0, 315.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6827873842497127, "mean_inference_ms": 1.20570575492883, "mean_action_processing_ms": 0.13227875312442067, "mean_env_wait_ms": 0.8361696516164074, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 327.0, "episode_reward_mean": 561.46, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 157.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 280.73}, "hist_stats": {"episode_reward": [558.0, 579.0, 519.0, 582.0, 579.0, 587.0, 582.0, 573.0, 576.0, 519.0, 576.0, 582.0, 587.0, 576.0, 570.0, 579.0, 576.0, 533.0, 579.0, 561.0, 576.0, 579.0, 579.0, 582.0, 573.0, 579.0, 573.0, 579.0, 582.0, 579.0, 564.0, 576.0, 533.0, 582.0, 579.0, 579.0, 522.0, 570.0, 493.0, 513.0, 570.0, 579.0, 530.0, 573.0, 570.0, 522.0, 576.0, 576.0, 573.0, 525.0, 579.0, 579.0, 522.0, 576.0, 576.0, 573.0, 573.0, 576.0, 567.0, 573.0, 582.0, 630.0, 579.0, 525.0, 519.0, 579.0, 579.0, 573.0, 582.0, 570.0, 521.0, 527.0, 576.0, 425.0, 576.0, 582.0, 327.0, 579.0, 573.0, 576.0, 530.0, 573.0, 576.0, 573.0, 573.0, 579.0, 579.0, 576.0, 522.0, 570.0, 579.0, 519.0, 576.0, 573.0, 573.0, 579.0, 576.0, 530.0, 430.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 268.0, 278.0, 301.0, 257.0, 262.0, 284.0, 298.0, 291.0, 288.0, 301.0, 286.0, 292.0, 290.0, 285.0, 288.0, 293.0, 283.0, 270.0, 249.0, 297.0, 279.0, 281.0, 301.0, 293.0, 294.0, 297.0, 279.0, 294.0, 276.0, 287.0, 292.0, 278.0, 298.0, 285.0, 248.0, 301.0, 278.0, 287.0, 274.0, 294.0, 282.0, 288.0, 291.0, 270.0, 309.0, 296.0, 286.0, 288.0, 285.0, 289.0, 290.0, 290.0, 283.0, 286.0, 293.0, 278.0, 304.0, 290.0, 289.0, 285.0, 279.0, 303.0, 273.0, 266.0, 267.0, 291.0, 291.0, 291.0, 288.0, 291.0, 288.0, 257.0, 265.0, 294.0, 276.0, 243.0, 250.0, 261.0, 252.0, 289.0, 281.0, 281.0, 298.0, 279.0, 251.0, 295.0, 278.0, 279.0, 291.0, 257.0, 265.0, 293.0, 283.0, 280.0, 296.0, 281.0, 292.0, 254.0, 271.0, 298.0, 281.0, 288.0, 291.0, 251.0, 271.0, 287.0, 289.0, 286.0, 290.0, 285.0, 288.0, 293.0, 280.0, 278.0, 298.0, 291.0, 276.0, 290.0, 283.0, 291.0, 291.0, 325.0, 305.0, 294.0, 285.0, 272.0, 253.0, 246.0, 273.0, 284.0, 295.0, 293.0, 286.0, 289.0, 284.0, 293.0, 289.0, 282.0, 288.0, 259.0, 262.0, 262.0, 265.0, 290.0, 286.0, 205.0, 220.0, 276.0, 300.0, 283.0, 299.0, 170.0, 157.0, 290.0, 289.0, 288.0, 285.0, 283.0, 293.0, 265.0, 265.0, 289.0, 284.0, 296.0, 280.0, 290.0, 283.0, 285.0, 288.0, 304.0, 275.0, 299.0, 280.0, 283.0, 293.0, 252.0, 270.0, 293.0, 277.0, 289.0, 290.0, 256.0, 263.0, 296.0, 280.0, 293.0, 280.0, 293.0, 280.0, 283.0, 296.0, 288.0, 288.0, 269.0, 261.0, 218.0, 212.0, 312.0, 315.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6827873842497127, "mean_inference_ms": 1.20570575492883, "mean_action_processing_ms": 0.13227875312442067, "mean_env_wait_ms": 0.8361696516164074, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4659200, "num_agent_steps_trained": 4659200, "num_env_steps_sampled": 2329600, "num_env_steps_trained": 2329600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2329600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4659200, "timers": {"training_iteration_time_ms": 3671.351, "learn_time_ms": 1151.599, "learn_throughput": 11114.977, "synch_weights_time_ms": 10.944}, "counters": {"num_env_steps_sampled": 2329600, "num_env_steps_trained": 2329600, "num_agent_steps_sampled": 4659200, "num_agent_steps_trained": 4659200}, "done": false, "episodes_total": 5824, "training_iteration": 182, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-46", "timestamp": 1666581106, "time_this_iter_s": 3.7461531162261963, "time_total_s": 682.122545003891, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 682.122545003891, "timesteps_since_restore": 0, "iterations_since_restore": 182, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.880000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 172.92, "shaped_reward_min": 127, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.55, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.06, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.7, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.91, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.45, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.23, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.91, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.45, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.91, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.45, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00046833715168759227, "policy_loss": 0.0002174130640923977, "vf_loss": 7.621037006378174, "vf_explained_var": 0.6563353538513184, "kl": 0.001816212316043675, "entropy": 1.0223599672317505, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2342400, "num_env_steps_trained": 2342400, "num_agent_steps_sampled": 4684800, "num_agent_steps_trained": 4684800}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 327.0, "episode_reward_mean": 561.32, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 157.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 280.66}, "custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 172.92, "shaped_reward_min": 127, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.55, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.06, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.7, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.91, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.45, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.23, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.91, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.45, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.91, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.45, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [533.0, 582.0, 579.0, 579.0, 522.0, 570.0, 493.0, 513.0, 570.0, 579.0, 530.0, 573.0, 570.0, 522.0, 576.0, 576.0, 573.0, 525.0, 579.0, 579.0, 522.0, 576.0, 576.0, 573.0, 573.0, 576.0, 567.0, 573.0, 582.0, 630.0, 579.0, 525.0, 519.0, 579.0, 579.0, 573.0, 582.0, 570.0, 521.0, 527.0, 576.0, 425.0, 576.0, 582.0, 327.0, 579.0, 573.0, 576.0, 530.0, 573.0, 576.0, 573.0, 573.0, 579.0, 579.0, 576.0, 522.0, 570.0, 579.0, 519.0, 576.0, 573.0, 573.0, 579.0, 576.0, 530.0, 430.0, 627.0, 582.0, 582.0, 576.0, 576.0, 570.0, 576.0, 573.0, 579.0, 576.0, 573.0, 573.0, 573.0, 573.0, 564.0, 579.0, 576.0, 576.0, 576.0, 579.0, 582.0, 579.0, 576.0, 576.0, 573.0, 570.0, 530.0, 579.0, 582.0, 530.0, 522.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [266.0, 267.0, 291.0, 291.0, 291.0, 288.0, 291.0, 288.0, 257.0, 265.0, 294.0, 276.0, 243.0, 250.0, 261.0, 252.0, 289.0, 281.0, 281.0, 298.0, 279.0, 251.0, 295.0, 278.0, 279.0, 291.0, 257.0, 265.0, 293.0, 283.0, 280.0, 296.0, 281.0, 292.0, 254.0, 271.0, 298.0, 281.0, 288.0, 291.0, 251.0, 271.0, 287.0, 289.0, 286.0, 290.0, 285.0, 288.0, 293.0, 280.0, 278.0, 298.0, 291.0, 276.0, 290.0, 283.0, 291.0, 291.0, 325.0, 305.0, 294.0, 285.0, 272.0, 253.0, 246.0, 273.0, 284.0, 295.0, 293.0, 286.0, 289.0, 284.0, 293.0, 289.0, 282.0, 288.0, 259.0, 262.0, 262.0, 265.0, 290.0, 286.0, 205.0, 220.0, 276.0, 300.0, 283.0, 299.0, 170.0, 157.0, 290.0, 289.0, 288.0, 285.0, 283.0, 293.0, 265.0, 265.0, 289.0, 284.0, 296.0, 280.0, 290.0, 283.0, 285.0, 288.0, 304.0, 275.0, 299.0, 280.0, 283.0, 293.0, 252.0, 270.0, 293.0, 277.0, 289.0, 290.0, 256.0, 263.0, 296.0, 280.0, 293.0, 280.0, 293.0, 280.0, 283.0, 296.0, 288.0, 288.0, 269.0, 261.0, 218.0, 212.0, 312.0, 315.0, 298.0, 284.0, 289.0, 293.0, 290.0, 286.0, 277.0, 299.0, 288.0, 282.0, 285.0, 291.0, 286.0, 287.0, 303.0, 276.0, 296.0, 280.0, 277.0, 296.0, 288.0, 285.0, 287.0, 286.0, 303.0, 270.0, 297.0, 267.0, 280.0, 299.0, 285.0, 291.0, 299.0, 277.0, 284.0, 292.0, 290.0, 289.0, 290.0, 292.0, 283.0, 296.0, 283.0, 293.0, 287.0, 289.0, 288.0, 285.0, 298.0, 272.0, 265.0, 265.0, 289.0, 290.0, 278.0, 304.0, 270.0, 260.0, 243.0, 279.0, 279.0, 297.0, 292.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6827640519501001, "mean_inference_ms": 1.2056064329122549, "mean_action_processing_ms": 0.132274341413401, "mean_env_wait_ms": 0.8360329786459079, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 327.0, "episode_reward_mean": 561.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 157.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 280.66}, "hist_stats": {"episode_reward": [533.0, 582.0, 579.0, 579.0, 522.0, 570.0, 493.0, 513.0, 570.0, 579.0, 530.0, 573.0, 570.0, 522.0, 576.0, 576.0, 573.0, 525.0, 579.0, 579.0, 522.0, 576.0, 576.0, 573.0, 573.0, 576.0, 567.0, 573.0, 582.0, 630.0, 579.0, 525.0, 519.0, 579.0, 579.0, 573.0, 582.0, 570.0, 521.0, 527.0, 576.0, 425.0, 576.0, 582.0, 327.0, 579.0, 573.0, 576.0, 530.0, 573.0, 576.0, 573.0, 573.0, 579.0, 579.0, 576.0, 522.0, 570.0, 579.0, 519.0, 576.0, 573.0, 573.0, 579.0, 576.0, 530.0, 430.0, 627.0, 582.0, 582.0, 576.0, 576.0, 570.0, 576.0, 573.0, 579.0, 576.0, 573.0, 573.0, 573.0, 573.0, 564.0, 579.0, 576.0, 576.0, 576.0, 579.0, 582.0, 579.0, 576.0, 576.0, 573.0, 570.0, 530.0, 579.0, 582.0, 530.0, 522.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [266.0, 267.0, 291.0, 291.0, 291.0, 288.0, 291.0, 288.0, 257.0, 265.0, 294.0, 276.0, 243.0, 250.0, 261.0, 252.0, 289.0, 281.0, 281.0, 298.0, 279.0, 251.0, 295.0, 278.0, 279.0, 291.0, 257.0, 265.0, 293.0, 283.0, 280.0, 296.0, 281.0, 292.0, 254.0, 271.0, 298.0, 281.0, 288.0, 291.0, 251.0, 271.0, 287.0, 289.0, 286.0, 290.0, 285.0, 288.0, 293.0, 280.0, 278.0, 298.0, 291.0, 276.0, 290.0, 283.0, 291.0, 291.0, 325.0, 305.0, 294.0, 285.0, 272.0, 253.0, 246.0, 273.0, 284.0, 295.0, 293.0, 286.0, 289.0, 284.0, 293.0, 289.0, 282.0, 288.0, 259.0, 262.0, 262.0, 265.0, 290.0, 286.0, 205.0, 220.0, 276.0, 300.0, 283.0, 299.0, 170.0, 157.0, 290.0, 289.0, 288.0, 285.0, 283.0, 293.0, 265.0, 265.0, 289.0, 284.0, 296.0, 280.0, 290.0, 283.0, 285.0, 288.0, 304.0, 275.0, 299.0, 280.0, 283.0, 293.0, 252.0, 270.0, 293.0, 277.0, 289.0, 290.0, 256.0, 263.0, 296.0, 280.0, 293.0, 280.0, 293.0, 280.0, 283.0, 296.0, 288.0, 288.0, 269.0, 261.0, 218.0, 212.0, 312.0, 315.0, 298.0, 284.0, 289.0, 293.0, 290.0, 286.0, 277.0, 299.0, 288.0, 282.0, 285.0, 291.0, 286.0, 287.0, 303.0, 276.0, 296.0, 280.0, 277.0, 296.0, 288.0, 285.0, 287.0, 286.0, 303.0, 270.0, 297.0, 267.0, 280.0, 299.0, 285.0, 291.0, 299.0, 277.0, 284.0, 292.0, 290.0, 289.0, 290.0, 292.0, 283.0, 296.0, 283.0, 293.0, 287.0, 289.0, 288.0, 285.0, 298.0, 272.0, 265.0, 265.0, 289.0, 290.0, 278.0, 304.0, 270.0, 260.0, 243.0, 279.0, 279.0, 297.0, 292.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6827640519501001, "mean_inference_ms": 1.2056064329122549, "mean_action_processing_ms": 0.132274341413401, "mean_env_wait_ms": 0.8360329786459079, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4684800, "num_agent_steps_trained": 4684800, "num_env_steps_sampled": 2342400, "num_env_steps_trained": 2342400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2342400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4684800, "timers": {"training_iteration_time_ms": 3661.407, "learn_time_ms": 1135.083, "learn_throughput": 11276.706, "synch_weights_time_ms": 11.507}, "counters": {"num_env_steps_sampled": 2342400, "num_env_steps_trained": 2342400, "num_agent_steps_sampled": 4684800, "num_agent_steps_trained": 4684800}, "done": false, "episodes_total": 5856, "training_iteration": 183, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-50", "timestamp": 1666581110, "time_this_iter_s": 3.645003080368042, "time_total_s": 685.767548084259, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 685.767548084259, "timesteps_since_restore": 0, "iterations_since_restore": 183, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.466666666666665, "ram_util_percent": 10.616666666666665}}
+{"custom_metrics": {"sparse_reward_mean": 192.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 171.94, "shaped_reward_min": 127, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.96, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.3, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 16.48, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.38, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.05, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.15, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.26, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.38, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.38, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0024932546075433493, "policy_loss": 0.0022324277088046074, "vf_loss": 7.732563495635986, "vf_explained_var": 0.6382303833961487, "kl": 0.0018180841580033302, "entropy": 1.0248595476150513, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2355200, "num_env_steps_trained": 2355200, "num_agent_steps_sampled": 4710400, "num_agent_steps_trained": 4710400}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 327.0, "episode_reward_mean": 557.14, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 157.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 278.57}, "custom_metrics": {"sparse_reward_mean": 192.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 171.94, "shaped_reward_min": 127, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.96, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.3, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 16.48, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.38, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.05, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.15, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.26, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.38, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.38, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 579.0, 579.0, 573.0, 582.0, 570.0, 521.0, 527.0, 576.0, 425.0, 576.0, 582.0, 327.0, 579.0, 573.0, 576.0, 530.0, 573.0, 576.0, 573.0, 573.0, 579.0, 579.0, 576.0, 522.0, 570.0, 579.0, 519.0, 576.0, 573.0, 573.0, 579.0, 576.0, 530.0, 430.0, 627.0, 582.0, 582.0, 576.0, 576.0, 570.0, 576.0, 573.0, 579.0, 576.0, 573.0, 573.0, 573.0, 573.0, 564.0, 579.0, 576.0, 576.0, 576.0, 579.0, 582.0, 579.0, 576.0, 576.0, 573.0, 570.0, 530.0, 579.0, 582.0, 530.0, 522.0, 576.0, 573.0, 522.0, 533.0, 530.0, 570.0, 522.0, 530.0, 516.0, 582.0, 498.0, 579.0, 525.0, 522.0, 630.0, 573.0, 582.0, 441.0, 573.0, 579.0, 573.0, 584.0, 576.0, 573.0, 527.0, 579.0, 510.0, 576.0, 481.0, 525.0, 527.0, 582.0, 579.0, 558.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [246.0, 273.0, 284.0, 295.0, 293.0, 286.0, 289.0, 284.0, 293.0, 289.0, 282.0, 288.0, 259.0, 262.0, 262.0, 265.0, 290.0, 286.0, 205.0, 220.0, 276.0, 300.0, 283.0, 299.0, 170.0, 157.0, 290.0, 289.0, 288.0, 285.0, 283.0, 293.0, 265.0, 265.0, 289.0, 284.0, 296.0, 280.0, 290.0, 283.0, 285.0, 288.0, 304.0, 275.0, 299.0, 280.0, 283.0, 293.0, 252.0, 270.0, 293.0, 277.0, 289.0, 290.0, 256.0, 263.0, 296.0, 280.0, 293.0, 280.0, 293.0, 280.0, 283.0, 296.0, 288.0, 288.0, 269.0, 261.0, 218.0, 212.0, 312.0, 315.0, 298.0, 284.0, 289.0, 293.0, 290.0, 286.0, 277.0, 299.0, 288.0, 282.0, 285.0, 291.0, 286.0, 287.0, 303.0, 276.0, 296.0, 280.0, 277.0, 296.0, 288.0, 285.0, 287.0, 286.0, 303.0, 270.0, 297.0, 267.0, 280.0, 299.0, 285.0, 291.0, 299.0, 277.0, 284.0, 292.0, 290.0, 289.0, 290.0, 292.0, 283.0, 296.0, 283.0, 293.0, 287.0, 289.0, 288.0, 285.0, 298.0, 272.0, 265.0, 265.0, 289.0, 290.0, 278.0, 304.0, 270.0, 260.0, 243.0, 279.0, 279.0, 297.0, 292.0, 281.0, 255.0, 267.0, 266.0, 267.0, 281.0, 249.0, 282.0, 288.0, 262.0, 260.0, 259.0, 271.0, 257.0, 259.0, 292.0, 290.0, 247.0, 251.0, 286.0, 293.0, 262.0, 263.0, 258.0, 264.0, 316.0, 314.0, 289.0, 284.0, 288.0, 294.0, 215.0, 226.0, 299.0, 274.0, 285.0, 294.0, 288.0, 285.0, 294.0, 290.0, 288.0, 288.0, 282.0, 291.0, 274.0, 253.0, 283.0, 296.0, 241.0, 269.0, 298.0, 278.0, 240.0, 241.0, 263.0, 262.0, 270.0, 257.0, 281.0, 301.0, 293.0, 286.0, 284.0, 274.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.682712178829132, "mean_inference_ms": 1.2054880729211481, "mean_action_processing_ms": 0.13226720987812923, "mean_env_wait_ms": 0.8358785654687424, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 327.0, "episode_reward_mean": 557.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 157.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 278.57}, "hist_stats": {"episode_reward": [519.0, 579.0, 579.0, 573.0, 582.0, 570.0, 521.0, 527.0, 576.0, 425.0, 576.0, 582.0, 327.0, 579.0, 573.0, 576.0, 530.0, 573.0, 576.0, 573.0, 573.0, 579.0, 579.0, 576.0, 522.0, 570.0, 579.0, 519.0, 576.0, 573.0, 573.0, 579.0, 576.0, 530.0, 430.0, 627.0, 582.0, 582.0, 576.0, 576.0, 570.0, 576.0, 573.0, 579.0, 576.0, 573.0, 573.0, 573.0, 573.0, 564.0, 579.0, 576.0, 576.0, 576.0, 579.0, 582.0, 579.0, 576.0, 576.0, 573.0, 570.0, 530.0, 579.0, 582.0, 530.0, 522.0, 576.0, 573.0, 522.0, 533.0, 530.0, 570.0, 522.0, 530.0, 516.0, 582.0, 498.0, 579.0, 525.0, 522.0, 630.0, 573.0, 582.0, 441.0, 573.0, 579.0, 573.0, 584.0, 576.0, 573.0, 527.0, 579.0, 510.0, 576.0, 481.0, 525.0, 527.0, 582.0, 579.0, 558.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [246.0, 273.0, 284.0, 295.0, 293.0, 286.0, 289.0, 284.0, 293.0, 289.0, 282.0, 288.0, 259.0, 262.0, 262.0, 265.0, 290.0, 286.0, 205.0, 220.0, 276.0, 300.0, 283.0, 299.0, 170.0, 157.0, 290.0, 289.0, 288.0, 285.0, 283.0, 293.0, 265.0, 265.0, 289.0, 284.0, 296.0, 280.0, 290.0, 283.0, 285.0, 288.0, 304.0, 275.0, 299.0, 280.0, 283.0, 293.0, 252.0, 270.0, 293.0, 277.0, 289.0, 290.0, 256.0, 263.0, 296.0, 280.0, 293.0, 280.0, 293.0, 280.0, 283.0, 296.0, 288.0, 288.0, 269.0, 261.0, 218.0, 212.0, 312.0, 315.0, 298.0, 284.0, 289.0, 293.0, 290.0, 286.0, 277.0, 299.0, 288.0, 282.0, 285.0, 291.0, 286.0, 287.0, 303.0, 276.0, 296.0, 280.0, 277.0, 296.0, 288.0, 285.0, 287.0, 286.0, 303.0, 270.0, 297.0, 267.0, 280.0, 299.0, 285.0, 291.0, 299.0, 277.0, 284.0, 292.0, 290.0, 289.0, 290.0, 292.0, 283.0, 296.0, 283.0, 293.0, 287.0, 289.0, 288.0, 285.0, 298.0, 272.0, 265.0, 265.0, 289.0, 290.0, 278.0, 304.0, 270.0, 260.0, 243.0, 279.0, 279.0, 297.0, 292.0, 281.0, 255.0, 267.0, 266.0, 267.0, 281.0, 249.0, 282.0, 288.0, 262.0, 260.0, 259.0, 271.0, 257.0, 259.0, 292.0, 290.0, 247.0, 251.0, 286.0, 293.0, 262.0, 263.0, 258.0, 264.0, 316.0, 314.0, 289.0, 284.0, 288.0, 294.0, 215.0, 226.0, 299.0, 274.0, 285.0, 294.0, 288.0, 285.0, 294.0, 290.0, 288.0, 288.0, 282.0, 291.0, 274.0, 253.0, 283.0, 296.0, 241.0, 269.0, 298.0, 278.0, 240.0, 241.0, 263.0, 262.0, 270.0, 257.0, 281.0, 301.0, 293.0, 286.0, 284.0, 274.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.682712178829132, "mean_inference_ms": 1.2054880729211481, "mean_action_processing_ms": 0.13226720987812923, "mean_env_wait_ms": 0.8358785654687424, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4710400, "num_agent_steps_trained": 4710400, "num_env_steps_sampled": 2355200, "num_env_steps_trained": 2355200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2355200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4710400, "timers": {"training_iteration_time_ms": 3667.443, "learn_time_ms": 1140.231, "learn_throughput": 11225.797, "synch_weights_time_ms": 11.702}, "counters": {"num_env_steps_sampled": 2355200, "num_env_steps_trained": 2355200, "num_agent_steps_sampled": 4710400, "num_agent_steps_trained": 4710400}, "done": false, "episodes_total": 5888, "training_iteration": 184, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-54", "timestamp": 1666581114, "time_this_iter_s": 3.6834990978240967, "time_total_s": 689.4510471820831, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 689.4510471820831, "timesteps_since_restore": 0, "iterations_since_restore": 184, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.52, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 192.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 171.48, "shaped_reward_min": 138, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.34, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.05, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.08, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 16.64, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.68, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.43, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.73, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.21, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.3, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.22, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.5, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.68, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.43, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.68, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.43, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012522500474005938, "policy_loss": 0.0009863225277513266, "vf_loss": 7.730276584625244, "vf_explained_var": 0.6408652663230896, "kl": 0.002116520656272769, "entropy": 1.0141997337341309, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2368000, "num_env_steps_trained": 2368000, "num_agent_steps_sampled": 4736000, "num_agent_steps_trained": 4736000}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 430.0, "episode_reward_mean": 557.08, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 212.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 278.54}, "custom_metrics": {"sparse_reward_mean": 192.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 171.48, "shaped_reward_min": 138, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.34, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.05, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.08, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 16.64, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.68, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.43, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.73, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.21, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.3, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.22, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.5, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.68, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.43, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.68, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.43, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 530.0, 430.0, 627.0, 582.0, 582.0, 576.0, 576.0, 570.0, 576.0, 573.0, 579.0, 576.0, 573.0, 573.0, 573.0, 573.0, 564.0, 579.0, 576.0, 576.0, 576.0, 579.0, 582.0, 579.0, 576.0, 576.0, 573.0, 570.0, 530.0, 579.0, 582.0, 530.0, 522.0, 576.0, 573.0, 522.0, 533.0, 530.0, 570.0, 522.0, 530.0, 516.0, 582.0, 498.0, 579.0, 525.0, 522.0, 630.0, 573.0, 582.0, 441.0, 573.0, 579.0, 573.0, 584.0, 576.0, 573.0, 527.0, 579.0, 510.0, 576.0, 481.0, 525.0, 527.0, 582.0, 579.0, 558.0, 522.0, 579.0, 579.0, 579.0, 570.0, 516.0, 570.0, 576.0, 573.0, 525.0, 581.0, 519.0, 584.0, 465.0, 524.0, 530.0, 573.0, 576.0, 513.0, 579.0, 516.0, 573.0, 522.0, 465.0, 573.0, 573.0, 576.0, 582.0, 582.0, 570.0, 570.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 288.0, 269.0, 261.0, 218.0, 212.0, 312.0, 315.0, 298.0, 284.0, 289.0, 293.0, 290.0, 286.0, 277.0, 299.0, 288.0, 282.0, 285.0, 291.0, 286.0, 287.0, 303.0, 276.0, 296.0, 280.0, 277.0, 296.0, 288.0, 285.0, 287.0, 286.0, 303.0, 270.0, 297.0, 267.0, 280.0, 299.0, 285.0, 291.0, 299.0, 277.0, 284.0, 292.0, 290.0, 289.0, 290.0, 292.0, 283.0, 296.0, 283.0, 293.0, 287.0, 289.0, 288.0, 285.0, 298.0, 272.0, 265.0, 265.0, 289.0, 290.0, 278.0, 304.0, 270.0, 260.0, 243.0, 279.0, 279.0, 297.0, 292.0, 281.0, 255.0, 267.0, 266.0, 267.0, 281.0, 249.0, 282.0, 288.0, 262.0, 260.0, 259.0, 271.0, 257.0, 259.0, 292.0, 290.0, 247.0, 251.0, 286.0, 293.0, 262.0, 263.0, 258.0, 264.0, 316.0, 314.0, 289.0, 284.0, 288.0, 294.0, 215.0, 226.0, 299.0, 274.0, 285.0, 294.0, 288.0, 285.0, 294.0, 290.0, 288.0, 288.0, 282.0, 291.0, 274.0, 253.0, 283.0, 296.0, 241.0, 269.0, 298.0, 278.0, 240.0, 241.0, 263.0, 262.0, 270.0, 257.0, 281.0, 301.0, 293.0, 286.0, 284.0, 274.0, 265.0, 257.0, 287.0, 292.0, 299.0, 280.0, 280.0, 299.0, 285.0, 285.0, 249.0, 267.0, 290.0, 280.0, 287.0, 289.0, 290.0, 283.0, 273.0, 252.0, 298.0, 283.0, 265.0, 254.0, 298.0, 286.0, 224.0, 241.0, 257.0, 267.0, 264.0, 266.0, 289.0, 284.0, 278.0, 298.0, 260.0, 253.0, 293.0, 286.0, 259.0, 257.0, 290.0, 283.0, 270.0, 252.0, 216.0, 249.0, 295.0, 278.0, 291.0, 282.0, 282.0, 294.0, 288.0, 294.0, 287.0, 295.0, 280.0, 290.0, 288.0, 282.0, 293.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6827000572252169, "mean_inference_ms": 1.2053664161777287, "mean_action_processing_ms": 0.1322612138205102, "mean_env_wait_ms": 0.835721045762788, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 430.0, "episode_reward_mean": 557.08, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 212.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 278.54}, "hist_stats": {"episode_reward": [576.0, 530.0, 430.0, 627.0, 582.0, 582.0, 576.0, 576.0, 570.0, 576.0, 573.0, 579.0, 576.0, 573.0, 573.0, 573.0, 573.0, 564.0, 579.0, 576.0, 576.0, 576.0, 579.0, 582.0, 579.0, 576.0, 576.0, 573.0, 570.0, 530.0, 579.0, 582.0, 530.0, 522.0, 576.0, 573.0, 522.0, 533.0, 530.0, 570.0, 522.0, 530.0, 516.0, 582.0, 498.0, 579.0, 525.0, 522.0, 630.0, 573.0, 582.0, 441.0, 573.0, 579.0, 573.0, 584.0, 576.0, 573.0, 527.0, 579.0, 510.0, 576.0, 481.0, 525.0, 527.0, 582.0, 579.0, 558.0, 522.0, 579.0, 579.0, 579.0, 570.0, 516.0, 570.0, 576.0, 573.0, 525.0, 581.0, 519.0, 584.0, 465.0, 524.0, 530.0, 573.0, 576.0, 513.0, 579.0, 516.0, 573.0, 522.0, 465.0, 573.0, 573.0, 576.0, 582.0, 582.0, 570.0, 570.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 288.0, 269.0, 261.0, 218.0, 212.0, 312.0, 315.0, 298.0, 284.0, 289.0, 293.0, 290.0, 286.0, 277.0, 299.0, 288.0, 282.0, 285.0, 291.0, 286.0, 287.0, 303.0, 276.0, 296.0, 280.0, 277.0, 296.0, 288.0, 285.0, 287.0, 286.0, 303.0, 270.0, 297.0, 267.0, 280.0, 299.0, 285.0, 291.0, 299.0, 277.0, 284.0, 292.0, 290.0, 289.0, 290.0, 292.0, 283.0, 296.0, 283.0, 293.0, 287.0, 289.0, 288.0, 285.0, 298.0, 272.0, 265.0, 265.0, 289.0, 290.0, 278.0, 304.0, 270.0, 260.0, 243.0, 279.0, 279.0, 297.0, 292.0, 281.0, 255.0, 267.0, 266.0, 267.0, 281.0, 249.0, 282.0, 288.0, 262.0, 260.0, 259.0, 271.0, 257.0, 259.0, 292.0, 290.0, 247.0, 251.0, 286.0, 293.0, 262.0, 263.0, 258.0, 264.0, 316.0, 314.0, 289.0, 284.0, 288.0, 294.0, 215.0, 226.0, 299.0, 274.0, 285.0, 294.0, 288.0, 285.0, 294.0, 290.0, 288.0, 288.0, 282.0, 291.0, 274.0, 253.0, 283.0, 296.0, 241.0, 269.0, 298.0, 278.0, 240.0, 241.0, 263.0, 262.0, 270.0, 257.0, 281.0, 301.0, 293.0, 286.0, 284.0, 274.0, 265.0, 257.0, 287.0, 292.0, 299.0, 280.0, 280.0, 299.0, 285.0, 285.0, 249.0, 267.0, 290.0, 280.0, 287.0, 289.0, 290.0, 283.0, 273.0, 252.0, 298.0, 283.0, 265.0, 254.0, 298.0, 286.0, 224.0, 241.0, 257.0, 267.0, 264.0, 266.0, 289.0, 284.0, 278.0, 298.0, 260.0, 253.0, 293.0, 286.0, 259.0, 257.0, 290.0, 283.0, 270.0, 252.0, 216.0, 249.0, 295.0, 278.0, 291.0, 282.0, 282.0, 294.0, 288.0, 294.0, 287.0, 295.0, 280.0, 290.0, 288.0, 282.0, 293.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6827000572252169, "mean_inference_ms": 1.2053664161777287, "mean_action_processing_ms": 0.1322612138205102, "mean_env_wait_ms": 0.835721045762788, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4736000, "num_agent_steps_trained": 4736000, "num_env_steps_sampled": 2368000, "num_env_steps_trained": 2368000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2368000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4736000, "timers": {"training_iteration_time_ms": 3656.599, "learn_time_ms": 1129.782, "learn_throughput": 11329.616, "synch_weights_time_ms": 12.079}, "counters": {"num_env_steps_sampled": 2368000, "num_env_steps_trained": 2368000, "num_agent_steps_sampled": 4736000, "num_agent_steps_trained": 4736000}, "done": false, "episodes_total": 5920, "training_iteration": 185, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-57", "timestamp": 1666581117, "time_this_iter_s": 3.6200082302093506, "time_total_s": 693.0710554122925, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 693.0710554122925, "timesteps_since_restore": 0, "iterations_since_restore": 185, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.933333333333334, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 190.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 170.0, "shaped_reward_min": 136, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.78, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.08, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.4, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.78, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.12, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.61, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.08, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.04, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.7, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.78, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.12, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.78, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.12, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013259206898510456, "policy_loss": 0.001057651941664517, "vf_loss": 7.706806659698486, "vf_explained_var": 0.6470644474029541, "kl": 0.003712405450642109, "entropy": 1.0048234462738037, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2380800, "num_env_steps_trained": 2380800, "num_agent_steps_sampled": 4761600, "num_agent_steps_trained": 4761600}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 416.0, "episode_reward_mean": 551.6, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 275.8}, "custom_metrics": {"sparse_reward_mean": 190.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 170.0, "shaped_reward_min": 136, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.78, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.08, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.4, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.78, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.12, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.61, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.08, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.04, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.7, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.78, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.12, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.78, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.12, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 522.0, 576.0, 573.0, 522.0, 533.0, 530.0, 570.0, 522.0, 530.0, 516.0, 582.0, 498.0, 579.0, 525.0, 522.0, 630.0, 573.0, 582.0, 441.0, 573.0, 579.0, 573.0, 584.0, 576.0, 573.0, 527.0, 579.0, 510.0, 576.0, 481.0, 525.0, 527.0, 582.0, 579.0, 558.0, 522.0, 579.0, 579.0, 579.0, 570.0, 516.0, 570.0, 576.0, 573.0, 525.0, 581.0, 519.0, 584.0, 465.0, 524.0, 530.0, 573.0, 576.0, 513.0, 579.0, 516.0, 573.0, 522.0, 465.0, 573.0, 573.0, 576.0, 582.0, 582.0, 570.0, 570.0, 573.0, 582.0, 539.0, 519.0, 579.0, 573.0, 567.0, 573.0, 579.0, 576.0, 582.0, 564.0, 579.0, 573.0, 524.0, 530.0, 573.0, 582.0, 525.0, 579.0, 576.0, 498.0, 525.0, 519.0, 573.0, 576.0, 579.0, 579.0, 533.0, 570.0, 470.0, 582.0, 416.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [270.0, 260.0, 243.0, 279.0, 279.0, 297.0, 292.0, 281.0, 255.0, 267.0, 266.0, 267.0, 281.0, 249.0, 282.0, 288.0, 262.0, 260.0, 259.0, 271.0, 257.0, 259.0, 292.0, 290.0, 247.0, 251.0, 286.0, 293.0, 262.0, 263.0, 258.0, 264.0, 316.0, 314.0, 289.0, 284.0, 288.0, 294.0, 215.0, 226.0, 299.0, 274.0, 285.0, 294.0, 288.0, 285.0, 294.0, 290.0, 288.0, 288.0, 282.0, 291.0, 274.0, 253.0, 283.0, 296.0, 241.0, 269.0, 298.0, 278.0, 240.0, 241.0, 263.0, 262.0, 270.0, 257.0, 281.0, 301.0, 293.0, 286.0, 284.0, 274.0, 265.0, 257.0, 287.0, 292.0, 299.0, 280.0, 280.0, 299.0, 285.0, 285.0, 249.0, 267.0, 290.0, 280.0, 287.0, 289.0, 290.0, 283.0, 273.0, 252.0, 298.0, 283.0, 265.0, 254.0, 298.0, 286.0, 224.0, 241.0, 257.0, 267.0, 264.0, 266.0, 289.0, 284.0, 278.0, 298.0, 260.0, 253.0, 293.0, 286.0, 259.0, 257.0, 290.0, 283.0, 270.0, 252.0, 216.0, 249.0, 295.0, 278.0, 291.0, 282.0, 282.0, 294.0, 288.0, 294.0, 287.0, 295.0, 280.0, 290.0, 288.0, 282.0, 293.0, 280.0, 277.0, 305.0, 272.0, 267.0, 250.0, 269.0, 304.0, 275.0, 292.0, 281.0, 270.0, 297.0, 290.0, 283.0, 299.0, 280.0, 277.0, 299.0, 287.0, 295.0, 289.0, 275.0, 286.0, 293.0, 286.0, 287.0, 268.0, 256.0, 267.0, 263.0, 293.0, 280.0, 296.0, 286.0, 252.0, 273.0, 290.0, 289.0, 278.0, 298.0, 255.0, 243.0, 269.0, 256.0, 265.0, 254.0, 299.0, 274.0, 282.0, 294.0, 288.0, 291.0, 298.0, 281.0, 261.0, 272.0, 274.0, 296.0, 222.0, 248.0, 293.0, 289.0, 215.0, 201.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6826704429125031, "mean_inference_ms": 1.2052526937150345, "mean_action_processing_ms": 0.13225667069598454, "mean_env_wait_ms": 0.8355802940746705, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 416.0, "episode_reward_mean": 551.6, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 275.8}, "hist_stats": {"episode_reward": [530.0, 522.0, 576.0, 573.0, 522.0, 533.0, 530.0, 570.0, 522.0, 530.0, 516.0, 582.0, 498.0, 579.0, 525.0, 522.0, 630.0, 573.0, 582.0, 441.0, 573.0, 579.0, 573.0, 584.0, 576.0, 573.0, 527.0, 579.0, 510.0, 576.0, 481.0, 525.0, 527.0, 582.0, 579.0, 558.0, 522.0, 579.0, 579.0, 579.0, 570.0, 516.0, 570.0, 576.0, 573.0, 525.0, 581.0, 519.0, 584.0, 465.0, 524.0, 530.0, 573.0, 576.0, 513.0, 579.0, 516.0, 573.0, 522.0, 465.0, 573.0, 573.0, 576.0, 582.0, 582.0, 570.0, 570.0, 573.0, 582.0, 539.0, 519.0, 579.0, 573.0, 567.0, 573.0, 579.0, 576.0, 582.0, 564.0, 579.0, 573.0, 524.0, 530.0, 573.0, 582.0, 525.0, 579.0, 576.0, 498.0, 525.0, 519.0, 573.0, 576.0, 579.0, 579.0, 533.0, 570.0, 470.0, 582.0, 416.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [270.0, 260.0, 243.0, 279.0, 279.0, 297.0, 292.0, 281.0, 255.0, 267.0, 266.0, 267.0, 281.0, 249.0, 282.0, 288.0, 262.0, 260.0, 259.0, 271.0, 257.0, 259.0, 292.0, 290.0, 247.0, 251.0, 286.0, 293.0, 262.0, 263.0, 258.0, 264.0, 316.0, 314.0, 289.0, 284.0, 288.0, 294.0, 215.0, 226.0, 299.0, 274.0, 285.0, 294.0, 288.0, 285.0, 294.0, 290.0, 288.0, 288.0, 282.0, 291.0, 274.0, 253.0, 283.0, 296.0, 241.0, 269.0, 298.0, 278.0, 240.0, 241.0, 263.0, 262.0, 270.0, 257.0, 281.0, 301.0, 293.0, 286.0, 284.0, 274.0, 265.0, 257.0, 287.0, 292.0, 299.0, 280.0, 280.0, 299.0, 285.0, 285.0, 249.0, 267.0, 290.0, 280.0, 287.0, 289.0, 290.0, 283.0, 273.0, 252.0, 298.0, 283.0, 265.0, 254.0, 298.0, 286.0, 224.0, 241.0, 257.0, 267.0, 264.0, 266.0, 289.0, 284.0, 278.0, 298.0, 260.0, 253.0, 293.0, 286.0, 259.0, 257.0, 290.0, 283.0, 270.0, 252.0, 216.0, 249.0, 295.0, 278.0, 291.0, 282.0, 282.0, 294.0, 288.0, 294.0, 287.0, 295.0, 280.0, 290.0, 288.0, 282.0, 293.0, 280.0, 277.0, 305.0, 272.0, 267.0, 250.0, 269.0, 304.0, 275.0, 292.0, 281.0, 270.0, 297.0, 290.0, 283.0, 299.0, 280.0, 277.0, 299.0, 287.0, 295.0, 289.0, 275.0, 286.0, 293.0, 286.0, 287.0, 268.0, 256.0, 267.0, 263.0, 293.0, 280.0, 296.0, 286.0, 252.0, 273.0, 290.0, 289.0, 278.0, 298.0, 255.0, 243.0, 269.0, 256.0, 265.0, 254.0, 299.0, 274.0, 282.0, 294.0, 288.0, 291.0, 298.0, 281.0, 261.0, 272.0, 274.0, 296.0, 222.0, 248.0, 293.0, 289.0, 215.0, 201.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6826704429125031, "mean_inference_ms": 1.2052526937150345, "mean_action_processing_ms": 0.13225667069598454, "mean_env_wait_ms": 0.8355802940746705, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4761600, "num_agent_steps_trained": 4761600, "num_env_steps_sampled": 2380800, "num_env_steps_trained": 2380800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2380800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4761600, "timers": {"training_iteration_time_ms": 3653.653, "learn_time_ms": 1127.739, "learn_throughput": 11350.146, "synch_weights_time_ms": 11.485}, "counters": {"num_env_steps_sampled": 2380800, "num_env_steps_trained": 2380800, "num_agent_steps_sampled": 4761600, "num_agent_steps_trained": 4761600}, "done": false, "episodes_total": 5952, "training_iteration": 186, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-12-01", "timestamp": 1666581121, "time_this_iter_s": 3.6360175609588623, "time_total_s": 696.7070729732513, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 696.7070729732513, "timesteps_since_restore": 0, "iterations_since_restore": 186, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.339999999999996, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 192.6, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 170.61, "shaped_reward_min": 136, "shaped_reward_max": 184, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.59, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.28, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.29, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.0, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.02, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.59, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.05, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.72, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.08, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.0, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.02, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.0, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.02, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003195032477378845, "policy_loss": -0.0005675801075994968, "vf_loss": 7.603795051574707, "vf_explained_var": 0.6576536893844604, "kl": 0.0021047075279057026, "entropy": 1.0246057510375977, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2393600, "num_env_steps_trained": 2393600, "num_agent_steps_sampled": 4787200, "num_agent_steps_trained": 4787200}, "sampler_results": {"episode_reward_max": 584.0, "episode_reward_min": 416.0, "episode_reward_mean": 555.81, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 277.905}, "custom_metrics": {"sparse_reward_mean": 192.6, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 170.61, "shaped_reward_min": 136, "shaped_reward_max": 184, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.59, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.28, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.29, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.0, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.02, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.59, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.05, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.72, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.08, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.0, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.02, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.0, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.02, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [527.0, 582.0, 579.0, 558.0, 522.0, 579.0, 579.0, 579.0, 570.0, 516.0, 570.0, 576.0, 573.0, 525.0, 581.0, 519.0, 584.0, 465.0, 524.0, 530.0, 573.0, 576.0, 513.0, 579.0, 516.0, 573.0, 522.0, 465.0, 573.0, 573.0, 576.0, 582.0, 582.0, 570.0, 570.0, 573.0, 582.0, 539.0, 519.0, 579.0, 573.0, 567.0, 573.0, 579.0, 576.0, 582.0, 564.0, 579.0, 573.0, 524.0, 530.0, 573.0, 582.0, 525.0, 579.0, 576.0, 498.0, 525.0, 519.0, 573.0, 576.0, 579.0, 579.0, 533.0, 570.0, 470.0, 582.0, 416.0, 576.0, 570.0, 576.0, 579.0, 582.0, 516.0, 576.0, 573.0, 564.0, 576.0, 582.0, 513.0, 519.0, 578.0, 573.0, 516.0, 582.0, 570.0, 579.0, 570.0, 573.0, 576.0, 516.0, 576.0, 579.0, 573.0, 530.0, 584.0, 525.0, 522.0, 530.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [270.0, 257.0, 281.0, 301.0, 293.0, 286.0, 284.0, 274.0, 265.0, 257.0, 287.0, 292.0, 299.0, 280.0, 280.0, 299.0, 285.0, 285.0, 249.0, 267.0, 290.0, 280.0, 287.0, 289.0, 290.0, 283.0, 273.0, 252.0, 298.0, 283.0, 265.0, 254.0, 298.0, 286.0, 224.0, 241.0, 257.0, 267.0, 264.0, 266.0, 289.0, 284.0, 278.0, 298.0, 260.0, 253.0, 293.0, 286.0, 259.0, 257.0, 290.0, 283.0, 270.0, 252.0, 216.0, 249.0, 295.0, 278.0, 291.0, 282.0, 282.0, 294.0, 288.0, 294.0, 287.0, 295.0, 280.0, 290.0, 288.0, 282.0, 293.0, 280.0, 277.0, 305.0, 272.0, 267.0, 250.0, 269.0, 304.0, 275.0, 292.0, 281.0, 270.0, 297.0, 290.0, 283.0, 299.0, 280.0, 277.0, 299.0, 287.0, 295.0, 289.0, 275.0, 286.0, 293.0, 286.0, 287.0, 268.0, 256.0, 267.0, 263.0, 293.0, 280.0, 296.0, 286.0, 252.0, 273.0, 290.0, 289.0, 278.0, 298.0, 255.0, 243.0, 269.0, 256.0, 265.0, 254.0, 299.0, 274.0, 282.0, 294.0, 288.0, 291.0, 298.0, 281.0, 261.0, 272.0, 274.0, 296.0, 222.0, 248.0, 293.0, 289.0, 215.0, 201.0, 294.0, 282.0, 281.0, 289.0, 281.0, 295.0, 300.0, 279.0, 291.0, 291.0, 258.0, 258.0, 283.0, 293.0, 283.0, 290.0, 282.0, 282.0, 303.0, 273.0, 306.0, 276.0, 255.0, 258.0, 273.0, 246.0, 272.0, 306.0, 294.0, 279.0, 253.0, 263.0, 296.0, 286.0, 294.0, 276.0, 291.0, 288.0, 278.0, 292.0, 286.0, 287.0, 291.0, 285.0, 256.0, 260.0, 286.0, 290.0, 286.0, 293.0, 287.0, 286.0, 263.0, 267.0, 296.0, 288.0, 262.0, 263.0, 256.0, 266.0, 270.0, 260.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.68267583764297, "mean_inference_ms": 1.2051792659431373, "mean_action_processing_ms": 0.13225872711682485, "mean_env_wait_ms": 0.8354850608241722, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 584.0, "episode_reward_min": 416.0, "episode_reward_mean": 555.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 277.905}, "hist_stats": {"episode_reward": [527.0, 582.0, 579.0, 558.0, 522.0, 579.0, 579.0, 579.0, 570.0, 516.0, 570.0, 576.0, 573.0, 525.0, 581.0, 519.0, 584.0, 465.0, 524.0, 530.0, 573.0, 576.0, 513.0, 579.0, 516.0, 573.0, 522.0, 465.0, 573.0, 573.0, 576.0, 582.0, 582.0, 570.0, 570.0, 573.0, 582.0, 539.0, 519.0, 579.0, 573.0, 567.0, 573.0, 579.0, 576.0, 582.0, 564.0, 579.0, 573.0, 524.0, 530.0, 573.0, 582.0, 525.0, 579.0, 576.0, 498.0, 525.0, 519.0, 573.0, 576.0, 579.0, 579.0, 533.0, 570.0, 470.0, 582.0, 416.0, 576.0, 570.0, 576.0, 579.0, 582.0, 516.0, 576.0, 573.0, 564.0, 576.0, 582.0, 513.0, 519.0, 578.0, 573.0, 516.0, 582.0, 570.0, 579.0, 570.0, 573.0, 576.0, 516.0, 576.0, 579.0, 573.0, 530.0, 584.0, 525.0, 522.0, 530.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [270.0, 257.0, 281.0, 301.0, 293.0, 286.0, 284.0, 274.0, 265.0, 257.0, 287.0, 292.0, 299.0, 280.0, 280.0, 299.0, 285.0, 285.0, 249.0, 267.0, 290.0, 280.0, 287.0, 289.0, 290.0, 283.0, 273.0, 252.0, 298.0, 283.0, 265.0, 254.0, 298.0, 286.0, 224.0, 241.0, 257.0, 267.0, 264.0, 266.0, 289.0, 284.0, 278.0, 298.0, 260.0, 253.0, 293.0, 286.0, 259.0, 257.0, 290.0, 283.0, 270.0, 252.0, 216.0, 249.0, 295.0, 278.0, 291.0, 282.0, 282.0, 294.0, 288.0, 294.0, 287.0, 295.0, 280.0, 290.0, 288.0, 282.0, 293.0, 280.0, 277.0, 305.0, 272.0, 267.0, 250.0, 269.0, 304.0, 275.0, 292.0, 281.0, 270.0, 297.0, 290.0, 283.0, 299.0, 280.0, 277.0, 299.0, 287.0, 295.0, 289.0, 275.0, 286.0, 293.0, 286.0, 287.0, 268.0, 256.0, 267.0, 263.0, 293.0, 280.0, 296.0, 286.0, 252.0, 273.0, 290.0, 289.0, 278.0, 298.0, 255.0, 243.0, 269.0, 256.0, 265.0, 254.0, 299.0, 274.0, 282.0, 294.0, 288.0, 291.0, 298.0, 281.0, 261.0, 272.0, 274.0, 296.0, 222.0, 248.0, 293.0, 289.0, 215.0, 201.0, 294.0, 282.0, 281.0, 289.0, 281.0, 295.0, 300.0, 279.0, 291.0, 291.0, 258.0, 258.0, 283.0, 293.0, 283.0, 290.0, 282.0, 282.0, 303.0, 273.0, 306.0, 276.0, 255.0, 258.0, 273.0, 246.0, 272.0, 306.0, 294.0, 279.0, 253.0, 263.0, 296.0, 286.0, 294.0, 276.0, 291.0, 288.0, 278.0, 292.0, 286.0, 287.0, 291.0, 285.0, 256.0, 260.0, 286.0, 290.0, 286.0, 293.0, 287.0, 286.0, 263.0, 267.0, 296.0, 288.0, 262.0, 263.0, 256.0, 266.0, 270.0, 260.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.68267583764297, "mean_inference_ms": 1.2051792659431373, "mean_action_processing_ms": 0.13225872711682485, "mean_env_wait_ms": 0.8354850608241722, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4787200, "num_agent_steps_trained": 4787200, "num_env_steps_sampled": 2393600, "num_env_steps_trained": 2393600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2393600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4787200, "timers": {"training_iteration_time_ms": 3661.62, "learn_time_ms": 1133.198, "learn_throughput": 11295.462, "synch_weights_time_ms": 11.112}, "counters": {"num_env_steps_sampled": 2393600, "num_env_steps_trained": 2393600, "num_agent_steps_sampled": 4787200, "num_agent_steps_trained": 4787200}, "done": false, "episodes_total": 5984, "training_iteration": 187, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-12-05", "timestamp": 1666581125, "time_this_iter_s": 3.723355770111084, "time_total_s": 700.4304287433624, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 700.4304287433624, "timesteps_since_restore": 0, "iterations_since_restore": 187, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.0, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 192.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 171.29, "shaped_reward_min": 136, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.49, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.76, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.18, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.45, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.98, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.21, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.09, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.18, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.13, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.53, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.1, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.98, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.21, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.98, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.21, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0006394333904609084, "policy_loss": -0.0008913751225918531, "vf_loss": 7.587137222290039, "vf_explained_var": 0.6698799133300781, "kl": 0.0019602831453084946, "entropy": 1.013542652130127, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2406400, "num_env_steps_trained": 2406400, "num_agent_steps_sampled": 4812800, "num_agent_steps_trained": 4812800}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 416.0, "episode_reward_mean": 556.49, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 315.0}, "policy_reward_mean": {"ppo": 278.245}, "custom_metrics": {"sparse_reward_mean": 192.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 171.29, "shaped_reward_min": 136, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.49, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.76, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.18, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.45, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.98, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.21, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.09, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.18, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.13, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.53, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.1, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.98, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.21, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.98, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.21, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 570.0, 570.0, 573.0, 582.0, 539.0, 519.0, 579.0, 573.0, 567.0, 573.0, 579.0, 576.0, 582.0, 564.0, 579.0, 573.0, 524.0, 530.0, 573.0, 582.0, 525.0, 579.0, 576.0, 498.0, 525.0, 519.0, 573.0, 576.0, 579.0, 579.0, 533.0, 570.0, 470.0, 582.0, 416.0, 576.0, 570.0, 576.0, 579.0, 582.0, 516.0, 576.0, 573.0, 564.0, 576.0, 582.0, 513.0, 519.0, 578.0, 573.0, 516.0, 582.0, 570.0, 579.0, 570.0, 573.0, 576.0, 516.0, 576.0, 579.0, 573.0, 530.0, 584.0, 525.0, 522.0, 530.0, 579.0, 525.0, 573.0, 530.0, 630.0, 576.0, 525.0, 570.0, 522.0, 570.0, 579.0, 573.0, 576.0, 522.0, 579.0, 582.0, 530.0, 539.0, 522.0, 573.0, 579.0, 522.0, 487.0, 558.0, 465.0, 525.0, 573.0, 579.0, 530.0, 582.0, 579.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 295.0, 280.0, 290.0, 288.0, 282.0, 293.0, 280.0, 277.0, 305.0, 272.0, 267.0, 250.0, 269.0, 304.0, 275.0, 292.0, 281.0, 270.0, 297.0, 290.0, 283.0, 299.0, 280.0, 277.0, 299.0, 287.0, 295.0, 289.0, 275.0, 286.0, 293.0, 286.0, 287.0, 268.0, 256.0, 267.0, 263.0, 293.0, 280.0, 296.0, 286.0, 252.0, 273.0, 290.0, 289.0, 278.0, 298.0, 255.0, 243.0, 269.0, 256.0, 265.0, 254.0, 299.0, 274.0, 282.0, 294.0, 288.0, 291.0, 298.0, 281.0, 261.0, 272.0, 274.0, 296.0, 222.0, 248.0, 293.0, 289.0, 215.0, 201.0, 294.0, 282.0, 281.0, 289.0, 281.0, 295.0, 300.0, 279.0, 291.0, 291.0, 258.0, 258.0, 283.0, 293.0, 283.0, 290.0, 282.0, 282.0, 303.0, 273.0, 306.0, 276.0, 255.0, 258.0, 273.0, 246.0, 272.0, 306.0, 294.0, 279.0, 253.0, 263.0, 296.0, 286.0, 294.0, 276.0, 291.0, 288.0, 278.0, 292.0, 286.0, 287.0, 291.0, 285.0, 256.0, 260.0, 286.0, 290.0, 286.0, 293.0, 287.0, 286.0, 263.0, 267.0, 296.0, 288.0, 262.0, 263.0, 256.0, 266.0, 270.0, 260.0, 291.0, 288.0, 257.0, 268.0, 289.0, 284.0, 265.0, 265.0, 315.0, 315.0, 298.0, 278.0, 261.0, 264.0, 272.0, 298.0, 254.0, 268.0, 309.0, 261.0, 285.0, 294.0, 276.0, 297.0, 295.0, 281.0, 255.0, 267.0, 290.0, 289.0, 305.0, 277.0, 267.0, 263.0, 267.0, 272.0, 266.0, 256.0, 285.0, 288.0, 278.0, 301.0, 250.0, 272.0, 235.0, 252.0, 284.0, 274.0, 222.0, 243.0, 279.0, 246.0, 291.0, 282.0, 287.0, 292.0, 268.0, 262.0, 300.0, 282.0, 278.0, 301.0, 304.0, 272.0, 294.0, 282.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.682641226773663, "mean_inference_ms": 1.2050772392466162, "mean_action_processing_ms": 0.13225750950538415, "mean_env_wait_ms": 0.8353663191948928, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 416.0, "episode_reward_mean": 556.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 315.0}, "policy_reward_mean": {"ppo": 278.245}, "hist_stats": {"episode_reward": [582.0, 570.0, 570.0, 573.0, 582.0, 539.0, 519.0, 579.0, 573.0, 567.0, 573.0, 579.0, 576.0, 582.0, 564.0, 579.0, 573.0, 524.0, 530.0, 573.0, 582.0, 525.0, 579.0, 576.0, 498.0, 525.0, 519.0, 573.0, 576.0, 579.0, 579.0, 533.0, 570.0, 470.0, 582.0, 416.0, 576.0, 570.0, 576.0, 579.0, 582.0, 516.0, 576.0, 573.0, 564.0, 576.0, 582.0, 513.0, 519.0, 578.0, 573.0, 516.0, 582.0, 570.0, 579.0, 570.0, 573.0, 576.0, 516.0, 576.0, 579.0, 573.0, 530.0, 584.0, 525.0, 522.0, 530.0, 579.0, 525.0, 573.0, 530.0, 630.0, 576.0, 525.0, 570.0, 522.0, 570.0, 579.0, 573.0, 576.0, 522.0, 579.0, 582.0, 530.0, 539.0, 522.0, 573.0, 579.0, 522.0, 487.0, 558.0, 465.0, 525.0, 573.0, 579.0, 530.0, 582.0, 579.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 295.0, 280.0, 290.0, 288.0, 282.0, 293.0, 280.0, 277.0, 305.0, 272.0, 267.0, 250.0, 269.0, 304.0, 275.0, 292.0, 281.0, 270.0, 297.0, 290.0, 283.0, 299.0, 280.0, 277.0, 299.0, 287.0, 295.0, 289.0, 275.0, 286.0, 293.0, 286.0, 287.0, 268.0, 256.0, 267.0, 263.0, 293.0, 280.0, 296.0, 286.0, 252.0, 273.0, 290.0, 289.0, 278.0, 298.0, 255.0, 243.0, 269.0, 256.0, 265.0, 254.0, 299.0, 274.0, 282.0, 294.0, 288.0, 291.0, 298.0, 281.0, 261.0, 272.0, 274.0, 296.0, 222.0, 248.0, 293.0, 289.0, 215.0, 201.0, 294.0, 282.0, 281.0, 289.0, 281.0, 295.0, 300.0, 279.0, 291.0, 291.0, 258.0, 258.0, 283.0, 293.0, 283.0, 290.0, 282.0, 282.0, 303.0, 273.0, 306.0, 276.0, 255.0, 258.0, 273.0, 246.0, 272.0, 306.0, 294.0, 279.0, 253.0, 263.0, 296.0, 286.0, 294.0, 276.0, 291.0, 288.0, 278.0, 292.0, 286.0, 287.0, 291.0, 285.0, 256.0, 260.0, 286.0, 290.0, 286.0, 293.0, 287.0, 286.0, 263.0, 267.0, 296.0, 288.0, 262.0, 263.0, 256.0, 266.0, 270.0, 260.0, 291.0, 288.0, 257.0, 268.0, 289.0, 284.0, 265.0, 265.0, 315.0, 315.0, 298.0, 278.0, 261.0, 264.0, 272.0, 298.0, 254.0, 268.0, 309.0, 261.0, 285.0, 294.0, 276.0, 297.0, 295.0, 281.0, 255.0, 267.0, 290.0, 289.0, 305.0, 277.0, 267.0, 263.0, 267.0, 272.0, 266.0, 256.0, 285.0, 288.0, 278.0, 301.0, 250.0, 272.0, 235.0, 252.0, 284.0, 274.0, 222.0, 243.0, 279.0, 246.0, 291.0, 282.0, 287.0, 292.0, 268.0, 262.0, 300.0, 282.0, 278.0, 301.0, 304.0, 272.0, 294.0, 282.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.682641226773663, "mean_inference_ms": 1.2050772392466162, "mean_action_processing_ms": 0.13225750950538415, "mean_env_wait_ms": 0.8353663191948928, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4812800, "num_agent_steps_trained": 4812800, "num_env_steps_sampled": 2406400, "num_env_steps_trained": 2406400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2406400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4812800, "timers": {"training_iteration_time_ms": 3668.657, "learn_time_ms": 1140.068, "learn_throughput": 11227.397, "synch_weights_time_ms": 11.499}, "counters": {"num_env_steps_sampled": 2406400, "num_env_steps_trained": 2406400, "num_agent_steps_sampled": 4812800, "num_agent_steps_trained": 4812800}, "done": false, "episodes_total": 6016, "training_iteration": 188, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-12-09", "timestamp": 1666581129, "time_this_iter_s": 3.769735813140869, "time_total_s": 704.2001645565033, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 704.2001645565033, "timesteps_since_restore": 0, "iterations_since_restore": 188, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.479999999999997, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 192.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 170.97, "shaped_reward_min": 136, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.15, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.84, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.76, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.57, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.58, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.62, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.38, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.3, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.24, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.4, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.57, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.58, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.57, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.58, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001346064847894013, "policy_loss": 0.0010941242799162865, "vf_loss": 7.551443099975586, "vf_explained_var": 0.6755005121231079, "kl": 0.0019845583010464907, "entropy": 1.00640869140625, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2419200, "num_env_steps_trained": 2419200, "num_agent_steps_sampled": 4838400, "num_agent_steps_trained": 4838400}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 416.0, "episode_reward_mean": 555.37, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 315.0}, "policy_reward_mean": {"ppo": 277.685}, "custom_metrics": {"sparse_reward_mean": 192.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 170.97, "shaped_reward_min": 136, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.15, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.84, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.76, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.57, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.58, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.62, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.38, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.3, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.24, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.4, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.57, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.58, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.57, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.58, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 470.0, 582.0, 416.0, 576.0, 570.0, 576.0, 579.0, 582.0, 516.0, 576.0, 573.0, 564.0, 576.0, 582.0, 513.0, 519.0, 578.0, 573.0, 516.0, 582.0, 570.0, 579.0, 570.0, 573.0, 576.0, 516.0, 576.0, 579.0, 573.0, 530.0, 584.0, 525.0, 522.0, 530.0, 579.0, 525.0, 573.0, 530.0, 630.0, 576.0, 525.0, 570.0, 522.0, 570.0, 579.0, 573.0, 576.0, 522.0, 579.0, 582.0, 530.0, 539.0, 522.0, 573.0, 579.0, 522.0, 487.0, 558.0, 465.0, 525.0, 573.0, 579.0, 530.0, 582.0, 579.0, 576.0, 576.0, 582.0, 579.0, 573.0, 579.0, 579.0, 576.0, 527.0, 579.0, 530.0, 573.0, 521.0, 516.0, 576.0, 573.0, 579.0, 573.0, 573.0, 576.0, 519.0, 579.0, 582.0, 579.0, 419.0, 519.0, 570.0, 519.0, 587.0, 576.0, 522.0, 576.0, 558.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [274.0, 296.0, 222.0, 248.0, 293.0, 289.0, 215.0, 201.0, 294.0, 282.0, 281.0, 289.0, 281.0, 295.0, 300.0, 279.0, 291.0, 291.0, 258.0, 258.0, 283.0, 293.0, 283.0, 290.0, 282.0, 282.0, 303.0, 273.0, 306.0, 276.0, 255.0, 258.0, 273.0, 246.0, 272.0, 306.0, 294.0, 279.0, 253.0, 263.0, 296.0, 286.0, 294.0, 276.0, 291.0, 288.0, 278.0, 292.0, 286.0, 287.0, 291.0, 285.0, 256.0, 260.0, 286.0, 290.0, 286.0, 293.0, 287.0, 286.0, 263.0, 267.0, 296.0, 288.0, 262.0, 263.0, 256.0, 266.0, 270.0, 260.0, 291.0, 288.0, 257.0, 268.0, 289.0, 284.0, 265.0, 265.0, 315.0, 315.0, 298.0, 278.0, 261.0, 264.0, 272.0, 298.0, 254.0, 268.0, 309.0, 261.0, 285.0, 294.0, 276.0, 297.0, 295.0, 281.0, 255.0, 267.0, 290.0, 289.0, 305.0, 277.0, 267.0, 263.0, 267.0, 272.0, 266.0, 256.0, 285.0, 288.0, 278.0, 301.0, 250.0, 272.0, 235.0, 252.0, 284.0, 274.0, 222.0, 243.0, 279.0, 246.0, 291.0, 282.0, 287.0, 292.0, 268.0, 262.0, 300.0, 282.0, 278.0, 301.0, 304.0, 272.0, 294.0, 282.0, 301.0, 281.0, 296.0, 283.0, 272.0, 301.0, 283.0, 296.0, 288.0, 291.0, 301.0, 275.0, 246.0, 281.0, 291.0, 288.0, 266.0, 264.0, 282.0, 291.0, 271.0, 250.0, 249.0, 267.0, 280.0, 296.0, 297.0, 276.0, 285.0, 294.0, 293.0, 280.0, 286.0, 287.0, 291.0, 285.0, 261.0, 258.0, 308.0, 271.0, 288.0, 294.0, 283.0, 296.0, 193.0, 226.0, 258.0, 261.0, 287.0, 283.0, 263.0, 256.0, 301.0, 286.0, 290.0, 286.0, 254.0, 268.0, 292.0, 284.0, 275.0, 283.0, 275.0, 295.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6826179017430108, "mean_inference_ms": 1.2049754873065845, "mean_action_processing_ms": 0.1322529508434523, "mean_env_wait_ms": 0.8352187465180257, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 416.0, "episode_reward_mean": 555.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 315.0}, "policy_reward_mean": {"ppo": 277.685}, "hist_stats": {"episode_reward": [570.0, 470.0, 582.0, 416.0, 576.0, 570.0, 576.0, 579.0, 582.0, 516.0, 576.0, 573.0, 564.0, 576.0, 582.0, 513.0, 519.0, 578.0, 573.0, 516.0, 582.0, 570.0, 579.0, 570.0, 573.0, 576.0, 516.0, 576.0, 579.0, 573.0, 530.0, 584.0, 525.0, 522.0, 530.0, 579.0, 525.0, 573.0, 530.0, 630.0, 576.0, 525.0, 570.0, 522.0, 570.0, 579.0, 573.0, 576.0, 522.0, 579.0, 582.0, 530.0, 539.0, 522.0, 573.0, 579.0, 522.0, 487.0, 558.0, 465.0, 525.0, 573.0, 579.0, 530.0, 582.0, 579.0, 576.0, 576.0, 582.0, 579.0, 573.0, 579.0, 579.0, 576.0, 527.0, 579.0, 530.0, 573.0, 521.0, 516.0, 576.0, 573.0, 579.0, 573.0, 573.0, 576.0, 519.0, 579.0, 582.0, 579.0, 419.0, 519.0, 570.0, 519.0, 587.0, 576.0, 522.0, 576.0, 558.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [274.0, 296.0, 222.0, 248.0, 293.0, 289.0, 215.0, 201.0, 294.0, 282.0, 281.0, 289.0, 281.0, 295.0, 300.0, 279.0, 291.0, 291.0, 258.0, 258.0, 283.0, 293.0, 283.0, 290.0, 282.0, 282.0, 303.0, 273.0, 306.0, 276.0, 255.0, 258.0, 273.0, 246.0, 272.0, 306.0, 294.0, 279.0, 253.0, 263.0, 296.0, 286.0, 294.0, 276.0, 291.0, 288.0, 278.0, 292.0, 286.0, 287.0, 291.0, 285.0, 256.0, 260.0, 286.0, 290.0, 286.0, 293.0, 287.0, 286.0, 263.0, 267.0, 296.0, 288.0, 262.0, 263.0, 256.0, 266.0, 270.0, 260.0, 291.0, 288.0, 257.0, 268.0, 289.0, 284.0, 265.0, 265.0, 315.0, 315.0, 298.0, 278.0, 261.0, 264.0, 272.0, 298.0, 254.0, 268.0, 309.0, 261.0, 285.0, 294.0, 276.0, 297.0, 295.0, 281.0, 255.0, 267.0, 290.0, 289.0, 305.0, 277.0, 267.0, 263.0, 267.0, 272.0, 266.0, 256.0, 285.0, 288.0, 278.0, 301.0, 250.0, 272.0, 235.0, 252.0, 284.0, 274.0, 222.0, 243.0, 279.0, 246.0, 291.0, 282.0, 287.0, 292.0, 268.0, 262.0, 300.0, 282.0, 278.0, 301.0, 304.0, 272.0, 294.0, 282.0, 301.0, 281.0, 296.0, 283.0, 272.0, 301.0, 283.0, 296.0, 288.0, 291.0, 301.0, 275.0, 246.0, 281.0, 291.0, 288.0, 266.0, 264.0, 282.0, 291.0, 271.0, 250.0, 249.0, 267.0, 280.0, 296.0, 297.0, 276.0, 285.0, 294.0, 293.0, 280.0, 286.0, 287.0, 291.0, 285.0, 261.0, 258.0, 308.0, 271.0, 288.0, 294.0, 283.0, 296.0, 193.0, 226.0, 258.0, 261.0, 287.0, 283.0, 263.0, 256.0, 301.0, 286.0, 290.0, 286.0, 254.0, 268.0, 292.0, 284.0, 275.0, 283.0, 275.0, 295.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6826179017430108, "mean_inference_ms": 1.2049754873065845, "mean_action_processing_ms": 0.1322529508434523, "mean_env_wait_ms": 0.8352187465180257, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4838400, "num_agent_steps_trained": 4838400, "num_env_steps_sampled": 2419200, "num_env_steps_trained": 2419200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2419200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4838400, "timers": {"training_iteration_time_ms": 3634.315, "learn_time_ms": 1128.996, "learn_throughput": 11337.511, "synch_weights_time_ms": 11.43}, "counters": {"num_env_steps_sampled": 2419200, "num_env_steps_trained": 2419200, "num_agent_steps_sampled": 4838400, "num_agent_steps_trained": 4838400}, "done": false, "episodes_total": 6048, "training_iteration": 189, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-12-13", "timestamp": 1666581133, "time_this_iter_s": 3.596635580062866, "time_total_s": 707.7968001365662, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 707.7968001365662, "timesteps_since_restore": 0, "iterations_since_restore": 189, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.979999999999997, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 191.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 169.87, "shaped_reward_min": 132, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.97, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.22, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.63, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.8, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.38, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.63, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.72, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.21, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.34, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.43, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.26, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.35, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.38, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.63, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.38, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.63, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009595003793947399, "policy_loss": -0.0012168899411335588, "vf_loss": 7.624897003173828, "vf_explained_var": 0.659218430519104, "kl": 0.0018678703345358372, "entropy": 1.0101993083953857, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2432000, "num_env_steps_trained": 2432000, "num_agent_steps_sampled": 4864000, "num_agent_steps_trained": 4864000}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 419.0, "episode_reward_mean": 553.07, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 315.0}, "policy_reward_mean": {"ppo": 276.535}, "custom_metrics": {"sparse_reward_mean": 191.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 169.87, "shaped_reward_min": 132, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.97, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.22, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.63, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.8, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.38, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.63, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.72, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.21, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.34, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.43, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.26, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.35, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.38, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.63, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.38, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.63, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 522.0, 530.0, 579.0, 525.0, 573.0, 530.0, 630.0, 576.0, 525.0, 570.0, 522.0, 570.0, 579.0, 573.0, 576.0, 522.0, 579.0, 582.0, 530.0, 539.0, 522.0, 573.0, 579.0, 522.0, 487.0, 558.0, 465.0, 525.0, 573.0, 579.0, 530.0, 582.0, 579.0, 576.0, 576.0, 582.0, 579.0, 573.0, 579.0, 579.0, 576.0, 527.0, 579.0, 530.0, 573.0, 521.0, 516.0, 576.0, 573.0, 579.0, 573.0, 573.0, 576.0, 519.0, 579.0, 582.0, 579.0, 419.0, 519.0, 570.0, 519.0, 587.0, 576.0, 522.0, 576.0, 558.0, 570.0, 570.0, 522.0, 573.0, 519.0, 576.0, 573.0, 570.0, 527.0, 522.0, 587.0, 579.0, 573.0, 564.0, 510.0, 452.0, 519.0, 504.0, 573.0, 525.0, 582.0, 573.0, 519.0, 576.0, 510.0, 573.0, 570.0, 573.0, 533.0, 570.0, 522.0, 570.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 263.0, 256.0, 266.0, 270.0, 260.0, 291.0, 288.0, 257.0, 268.0, 289.0, 284.0, 265.0, 265.0, 315.0, 315.0, 298.0, 278.0, 261.0, 264.0, 272.0, 298.0, 254.0, 268.0, 309.0, 261.0, 285.0, 294.0, 276.0, 297.0, 295.0, 281.0, 255.0, 267.0, 290.0, 289.0, 305.0, 277.0, 267.0, 263.0, 267.0, 272.0, 266.0, 256.0, 285.0, 288.0, 278.0, 301.0, 250.0, 272.0, 235.0, 252.0, 284.0, 274.0, 222.0, 243.0, 279.0, 246.0, 291.0, 282.0, 287.0, 292.0, 268.0, 262.0, 300.0, 282.0, 278.0, 301.0, 304.0, 272.0, 294.0, 282.0, 301.0, 281.0, 296.0, 283.0, 272.0, 301.0, 283.0, 296.0, 288.0, 291.0, 301.0, 275.0, 246.0, 281.0, 291.0, 288.0, 266.0, 264.0, 282.0, 291.0, 271.0, 250.0, 249.0, 267.0, 280.0, 296.0, 297.0, 276.0, 285.0, 294.0, 293.0, 280.0, 286.0, 287.0, 291.0, 285.0, 261.0, 258.0, 308.0, 271.0, 288.0, 294.0, 283.0, 296.0, 193.0, 226.0, 258.0, 261.0, 287.0, 283.0, 263.0, 256.0, 301.0, 286.0, 290.0, 286.0, 254.0, 268.0, 292.0, 284.0, 275.0, 283.0, 275.0, 295.0, 291.0, 279.0, 261.0, 261.0, 282.0, 291.0, 266.0, 253.0, 282.0, 294.0, 274.0, 299.0, 273.0, 297.0, 285.0, 242.0, 260.0, 262.0, 299.0, 288.0, 294.0, 285.0, 298.0, 275.0, 275.0, 289.0, 260.0, 250.0, 223.0, 229.0, 253.0, 266.0, 243.0, 261.0, 278.0, 295.0, 277.0, 248.0, 298.0, 284.0, 286.0, 287.0, 247.0, 272.0, 280.0, 296.0, 263.0, 247.0, 294.0, 279.0, 278.0, 292.0, 287.0, 286.0, 288.0, 245.0, 286.0, 284.0, 272.0, 250.0, 290.0, 280.0, 286.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6844611466940688, "mean_inference_ms": 1.2103155404175103, "mean_action_processing_ms": 0.13240161079494261, "mean_env_wait_ms": 0.8363401360482277, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 419.0, "episode_reward_mean": 553.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 315.0}, "policy_reward_mean": {"ppo": 276.535}, "hist_stats": {"episode_reward": [525.0, 522.0, 530.0, 579.0, 525.0, 573.0, 530.0, 630.0, 576.0, 525.0, 570.0, 522.0, 570.0, 579.0, 573.0, 576.0, 522.0, 579.0, 582.0, 530.0, 539.0, 522.0, 573.0, 579.0, 522.0, 487.0, 558.0, 465.0, 525.0, 573.0, 579.0, 530.0, 582.0, 579.0, 576.0, 576.0, 582.0, 579.0, 573.0, 579.0, 579.0, 576.0, 527.0, 579.0, 530.0, 573.0, 521.0, 516.0, 576.0, 573.0, 579.0, 573.0, 573.0, 576.0, 519.0, 579.0, 582.0, 579.0, 419.0, 519.0, 570.0, 519.0, 587.0, 576.0, 522.0, 576.0, 558.0, 570.0, 570.0, 522.0, 573.0, 519.0, 576.0, 573.0, 570.0, 527.0, 522.0, 587.0, 579.0, 573.0, 564.0, 510.0, 452.0, 519.0, 504.0, 573.0, 525.0, 582.0, 573.0, 519.0, 576.0, 510.0, 573.0, 570.0, 573.0, 533.0, 570.0, 522.0, 570.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 263.0, 256.0, 266.0, 270.0, 260.0, 291.0, 288.0, 257.0, 268.0, 289.0, 284.0, 265.0, 265.0, 315.0, 315.0, 298.0, 278.0, 261.0, 264.0, 272.0, 298.0, 254.0, 268.0, 309.0, 261.0, 285.0, 294.0, 276.0, 297.0, 295.0, 281.0, 255.0, 267.0, 290.0, 289.0, 305.0, 277.0, 267.0, 263.0, 267.0, 272.0, 266.0, 256.0, 285.0, 288.0, 278.0, 301.0, 250.0, 272.0, 235.0, 252.0, 284.0, 274.0, 222.0, 243.0, 279.0, 246.0, 291.0, 282.0, 287.0, 292.0, 268.0, 262.0, 300.0, 282.0, 278.0, 301.0, 304.0, 272.0, 294.0, 282.0, 301.0, 281.0, 296.0, 283.0, 272.0, 301.0, 283.0, 296.0, 288.0, 291.0, 301.0, 275.0, 246.0, 281.0, 291.0, 288.0, 266.0, 264.0, 282.0, 291.0, 271.0, 250.0, 249.0, 267.0, 280.0, 296.0, 297.0, 276.0, 285.0, 294.0, 293.0, 280.0, 286.0, 287.0, 291.0, 285.0, 261.0, 258.0, 308.0, 271.0, 288.0, 294.0, 283.0, 296.0, 193.0, 226.0, 258.0, 261.0, 287.0, 283.0, 263.0, 256.0, 301.0, 286.0, 290.0, 286.0, 254.0, 268.0, 292.0, 284.0, 275.0, 283.0, 275.0, 295.0, 291.0, 279.0, 261.0, 261.0, 282.0, 291.0, 266.0, 253.0, 282.0, 294.0, 274.0, 299.0, 273.0, 297.0, 285.0, 242.0, 260.0, 262.0, 299.0, 288.0, 294.0, 285.0, 298.0, 275.0, 275.0, 289.0, 260.0, 250.0, 223.0, 229.0, 253.0, 266.0, 243.0, 261.0, 278.0, 295.0, 277.0, 248.0, 298.0, 284.0, 286.0, 287.0, 247.0, 272.0, 280.0, 296.0, 263.0, 247.0, 294.0, 279.0, 278.0, 292.0, 287.0, 286.0, 288.0, 245.0, 286.0, 284.0, 272.0, 250.0, 290.0, 280.0, 286.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6844611466940688, "mean_inference_ms": 1.2103155404175103, "mean_action_processing_ms": 0.13240161079494261, "mean_env_wait_ms": 0.8363401360482277, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4864000, "num_agent_steps_trained": 4864000, "num_env_steps_sampled": 2432000, "num_env_steps_trained": 2432000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2432000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4864000, "timers": {"training_iteration_time_ms": 4318.739, "learn_time_ms": 1136.065, "learn_throughput": 11266.96, "synch_weights_time_ms": 11.862}, "counters": {"num_env_steps_sampled": 2432000, "num_env_steps_trained": 2432000, "num_agent_steps_sampled": 4864000, "num_agent_steps_trained": 4864000}, "done": false, "episodes_total": 6080, "training_iteration": 190, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-12-23", "timestamp": 1666581143, "time_this_iter_s": 10.565764904022217, "time_total_s": 718.3625650405884, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 718.3625650405884, "timesteps_since_restore": 0, "iterations_since_restore": 190, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 83.35, "ram_util_percent": 17.51875}}
+{"custom_metrics": {"sparse_reward_mean": 192.8, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 170.62, "shaped_reward_min": 132, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.39, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.87, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.06, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.46, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.78, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.33, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.03, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.4, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.22, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.51, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.78, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.33, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.78, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.33, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.000840085675008595, "policy_loss": -0.0011022280668839812, "vf_loss": 7.6430559158325195, "vf_explained_var": 0.6706925630569458, "kl": 0.0019743097946047783, "entropy": 1.0043245553970337, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2444800, "num_env_steps_trained": 2444800, "num_agent_steps_sampled": 4889600, "num_agent_steps_trained": 4889600}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 419.0, "episode_reward_mean": 556.22, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 310.0}, "policy_reward_mean": {"ppo": 278.11}, "custom_metrics": {"sparse_reward_mean": 192.8, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 170.62, "shaped_reward_min": 132, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.39, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.87, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.06, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.46, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.78, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.33, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.03, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.4, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.22, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.51, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.78, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.33, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.78, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.33, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 576.0, 576.0, 582.0, 579.0, 573.0, 579.0, 579.0, 576.0, 527.0, 579.0, 530.0, 573.0, 521.0, 516.0, 576.0, 573.0, 579.0, 573.0, 573.0, 576.0, 519.0, 579.0, 582.0, 579.0, 419.0, 519.0, 570.0, 519.0, 587.0, 576.0, 522.0, 576.0, 558.0, 570.0, 570.0, 522.0, 573.0, 519.0, 576.0, 573.0, 570.0, 527.0, 522.0, 587.0, 579.0, 573.0, 564.0, 510.0, 452.0, 519.0, 504.0, 573.0, 525.0, 582.0, 573.0, 519.0, 576.0, 510.0, 573.0, 570.0, 573.0, 533.0, 570.0, 522.0, 570.0, 576.0, 579.0, 567.0, 576.0, 576.0, 522.0, 579.0, 576.0, 584.0, 507.0, 579.0, 573.0, 579.0, 582.0, 576.0, 576.0, 576.0, 573.0, 582.0, 522.0, 579.0, 579.0, 468.0, 522.0, 570.0, 579.0, 573.0, 582.0, 539.0, 525.0, 576.0, 490.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [300.0, 282.0, 278.0, 301.0, 304.0, 272.0, 294.0, 282.0, 301.0, 281.0, 296.0, 283.0, 272.0, 301.0, 283.0, 296.0, 288.0, 291.0, 301.0, 275.0, 246.0, 281.0, 291.0, 288.0, 266.0, 264.0, 282.0, 291.0, 271.0, 250.0, 249.0, 267.0, 280.0, 296.0, 297.0, 276.0, 285.0, 294.0, 293.0, 280.0, 286.0, 287.0, 291.0, 285.0, 261.0, 258.0, 308.0, 271.0, 288.0, 294.0, 283.0, 296.0, 193.0, 226.0, 258.0, 261.0, 287.0, 283.0, 263.0, 256.0, 301.0, 286.0, 290.0, 286.0, 254.0, 268.0, 292.0, 284.0, 275.0, 283.0, 275.0, 295.0, 291.0, 279.0, 261.0, 261.0, 282.0, 291.0, 266.0, 253.0, 282.0, 294.0, 274.0, 299.0, 273.0, 297.0, 285.0, 242.0, 260.0, 262.0, 299.0, 288.0, 294.0, 285.0, 298.0, 275.0, 275.0, 289.0, 260.0, 250.0, 223.0, 229.0, 253.0, 266.0, 243.0, 261.0, 278.0, 295.0, 277.0, 248.0, 298.0, 284.0, 286.0, 287.0, 247.0, 272.0, 280.0, 296.0, 263.0, 247.0, 294.0, 279.0, 278.0, 292.0, 287.0, 286.0, 288.0, 245.0, 286.0, 284.0, 272.0, 250.0, 290.0, 280.0, 286.0, 290.0, 290.0, 289.0, 286.0, 281.0, 266.0, 310.0, 280.0, 296.0, 265.0, 257.0, 292.0, 287.0, 297.0, 279.0, 304.0, 280.0, 247.0, 260.0, 293.0, 286.0, 274.0, 299.0, 304.0, 275.0, 292.0, 290.0, 280.0, 296.0, 300.0, 276.0, 287.0, 289.0, 290.0, 283.0, 287.0, 295.0, 256.0, 266.0, 285.0, 294.0, 284.0, 295.0, 230.0, 238.0, 258.0, 264.0, 283.0, 287.0, 303.0, 276.0, 274.0, 299.0, 299.0, 283.0, 270.0, 269.0, 262.0, 263.0, 290.0, 286.0, 248.0, 242.0, 261.0, 258.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6866233248561839, "mean_inference_ms": 1.2159121953751708, "mean_action_processing_ms": 0.13259290392719372, "mean_env_wait_ms": 0.8377164271221949, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 419.0, "episode_reward_mean": 556.22, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 310.0}, "policy_reward_mean": {"ppo": 278.11}, "hist_stats": {"episode_reward": [582.0, 579.0, 576.0, 576.0, 582.0, 579.0, 573.0, 579.0, 579.0, 576.0, 527.0, 579.0, 530.0, 573.0, 521.0, 516.0, 576.0, 573.0, 579.0, 573.0, 573.0, 576.0, 519.0, 579.0, 582.0, 579.0, 419.0, 519.0, 570.0, 519.0, 587.0, 576.0, 522.0, 576.0, 558.0, 570.0, 570.0, 522.0, 573.0, 519.0, 576.0, 573.0, 570.0, 527.0, 522.0, 587.0, 579.0, 573.0, 564.0, 510.0, 452.0, 519.0, 504.0, 573.0, 525.0, 582.0, 573.0, 519.0, 576.0, 510.0, 573.0, 570.0, 573.0, 533.0, 570.0, 522.0, 570.0, 576.0, 579.0, 567.0, 576.0, 576.0, 522.0, 579.0, 576.0, 584.0, 507.0, 579.0, 573.0, 579.0, 582.0, 576.0, 576.0, 576.0, 573.0, 582.0, 522.0, 579.0, 579.0, 468.0, 522.0, 570.0, 579.0, 573.0, 582.0, 539.0, 525.0, 576.0, 490.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [300.0, 282.0, 278.0, 301.0, 304.0, 272.0, 294.0, 282.0, 301.0, 281.0, 296.0, 283.0, 272.0, 301.0, 283.0, 296.0, 288.0, 291.0, 301.0, 275.0, 246.0, 281.0, 291.0, 288.0, 266.0, 264.0, 282.0, 291.0, 271.0, 250.0, 249.0, 267.0, 280.0, 296.0, 297.0, 276.0, 285.0, 294.0, 293.0, 280.0, 286.0, 287.0, 291.0, 285.0, 261.0, 258.0, 308.0, 271.0, 288.0, 294.0, 283.0, 296.0, 193.0, 226.0, 258.0, 261.0, 287.0, 283.0, 263.0, 256.0, 301.0, 286.0, 290.0, 286.0, 254.0, 268.0, 292.0, 284.0, 275.0, 283.0, 275.0, 295.0, 291.0, 279.0, 261.0, 261.0, 282.0, 291.0, 266.0, 253.0, 282.0, 294.0, 274.0, 299.0, 273.0, 297.0, 285.0, 242.0, 260.0, 262.0, 299.0, 288.0, 294.0, 285.0, 298.0, 275.0, 275.0, 289.0, 260.0, 250.0, 223.0, 229.0, 253.0, 266.0, 243.0, 261.0, 278.0, 295.0, 277.0, 248.0, 298.0, 284.0, 286.0, 287.0, 247.0, 272.0, 280.0, 296.0, 263.0, 247.0, 294.0, 279.0, 278.0, 292.0, 287.0, 286.0, 288.0, 245.0, 286.0, 284.0, 272.0, 250.0, 290.0, 280.0, 286.0, 290.0, 290.0, 289.0, 286.0, 281.0, 266.0, 310.0, 280.0, 296.0, 265.0, 257.0, 292.0, 287.0, 297.0, 279.0, 304.0, 280.0, 247.0, 260.0, 293.0, 286.0, 274.0, 299.0, 304.0, 275.0, 292.0, 290.0, 280.0, 296.0, 300.0, 276.0, 287.0, 289.0, 290.0, 283.0, 287.0, 295.0, 256.0, 266.0, 285.0, 294.0, 284.0, 295.0, 230.0, 238.0, 258.0, 264.0, 283.0, 287.0, 303.0, 276.0, 274.0, 299.0, 299.0, 283.0, 270.0, 269.0, 262.0, 263.0, 290.0, 286.0, 248.0, 242.0, 261.0, 258.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6866233248561839, "mean_inference_ms": 1.2159121953751708, "mean_action_processing_ms": 0.13259290392719372, "mean_env_wait_ms": 0.8377164271221949, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4889600, "num_agent_steps_trained": 4889600, "num_env_steps_sampled": 2444800, "num_env_steps_trained": 2444800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2444800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4889600, "timers": {"training_iteration_time_ms": 4368.115, "learn_time_ms": 1141.284, "learn_throughput": 11215.439, "synch_weights_time_ms": 12.131}, "counters": {"num_env_steps_sampled": 2444800, "num_env_steps_trained": 2444800, "num_agent_steps_sampled": 4889600, "num_agent_steps_trained": 4889600}, "done": false, "episodes_total": 6112, "training_iteration": 191, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-12-28", "timestamp": 1666581148, "time_this_iter_s": 4.299537658691406, "time_total_s": 722.6621026992798, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 722.6621026992798, "timesteps_since_restore": 0, "iterations_since_restore": 191, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 45.98333333333334, "ram_util_percent": 20.7}}
+{"custom_metrics": {"sparse_reward_mean": 193.4, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.02, "shaped_reward_min": 132, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.58, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.7, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.37, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.99, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.15, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.61, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.02, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.99, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.15, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.99, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.15, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 3.6748824641108513e-06, "policy_loss": -0.0002568246563896537, "vf_loss": 7.626540660858154, "vf_explained_var": 0.6705838441848755, "kl": 0.0022698636166751385, "entropy": 1.0043076276779175, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2457600, "num_env_steps_trained": 2457600, "num_agent_steps_sampled": 4915200, "num_agent_steps_trained": 4915200}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 452.0, "episode_reward_mean": 557.82, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 310.0}, "policy_reward_mean": {"ppo": 278.91}, "custom_metrics": {"sparse_reward_mean": 193.4, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.02, "shaped_reward_min": 132, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.58, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.7, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.37, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.99, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.15, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.61, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.02, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.99, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.15, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.99, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.15, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 576.0, 558.0, 570.0, 570.0, 522.0, 573.0, 519.0, 576.0, 573.0, 570.0, 527.0, 522.0, 587.0, 579.0, 573.0, 564.0, 510.0, 452.0, 519.0, 504.0, 573.0, 525.0, 582.0, 573.0, 519.0, 576.0, 510.0, 573.0, 570.0, 573.0, 533.0, 570.0, 522.0, 570.0, 576.0, 579.0, 567.0, 576.0, 576.0, 522.0, 579.0, 576.0, 584.0, 507.0, 579.0, 573.0, 579.0, 582.0, 576.0, 576.0, 576.0, 573.0, 582.0, 522.0, 579.0, 579.0, 468.0, 522.0, 570.0, 579.0, 573.0, 582.0, 539.0, 525.0, 576.0, 490.0, 519.0, 576.0, 579.0, 582.0, 530.0, 576.0, 525.0, 519.0, 570.0, 575.0, 530.0, 576.0, 579.0, 576.0, 573.0, 579.0, 582.0, 579.0, 570.0, 453.0, 525.0, 579.0, 576.0, 573.0, 573.0, 573.0, 576.0, 579.0, 587.0, 582.0, 582.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 268.0, 292.0, 284.0, 275.0, 283.0, 275.0, 295.0, 291.0, 279.0, 261.0, 261.0, 282.0, 291.0, 266.0, 253.0, 282.0, 294.0, 274.0, 299.0, 273.0, 297.0, 285.0, 242.0, 260.0, 262.0, 299.0, 288.0, 294.0, 285.0, 298.0, 275.0, 275.0, 289.0, 260.0, 250.0, 223.0, 229.0, 253.0, 266.0, 243.0, 261.0, 278.0, 295.0, 277.0, 248.0, 298.0, 284.0, 286.0, 287.0, 247.0, 272.0, 280.0, 296.0, 263.0, 247.0, 294.0, 279.0, 278.0, 292.0, 287.0, 286.0, 288.0, 245.0, 286.0, 284.0, 272.0, 250.0, 290.0, 280.0, 286.0, 290.0, 290.0, 289.0, 286.0, 281.0, 266.0, 310.0, 280.0, 296.0, 265.0, 257.0, 292.0, 287.0, 297.0, 279.0, 304.0, 280.0, 247.0, 260.0, 293.0, 286.0, 274.0, 299.0, 304.0, 275.0, 292.0, 290.0, 280.0, 296.0, 300.0, 276.0, 287.0, 289.0, 290.0, 283.0, 287.0, 295.0, 256.0, 266.0, 285.0, 294.0, 284.0, 295.0, 230.0, 238.0, 258.0, 264.0, 283.0, 287.0, 303.0, 276.0, 274.0, 299.0, 299.0, 283.0, 270.0, 269.0, 262.0, 263.0, 290.0, 286.0, 248.0, 242.0, 261.0, 258.0, 277.0, 299.0, 293.0, 286.0, 285.0, 297.0, 265.0, 265.0, 295.0, 281.0, 266.0, 259.0, 262.0, 257.0, 285.0, 285.0, 286.0, 289.0, 260.0, 270.0, 291.0, 285.0, 297.0, 282.0, 290.0, 286.0, 283.0, 290.0, 292.0, 287.0, 284.0, 298.0, 304.0, 275.0, 294.0, 276.0, 233.0, 220.0, 250.0, 275.0, 283.0, 296.0, 290.0, 286.0, 276.0, 297.0, 290.0, 283.0, 287.0, 286.0, 292.0, 284.0, 284.0, 295.0, 291.0, 296.0, 278.0, 304.0, 304.0, 278.0, 289.0, 290.0, 290.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6890653996220631, "mean_inference_ms": 1.2217288104133466, "mean_action_processing_ms": 0.13282370029567642, "mean_env_wait_ms": 0.839345002381688, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 452.0, "episode_reward_mean": 557.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 310.0}, "policy_reward_mean": {"ppo": 278.91}, "hist_stats": {"episode_reward": [522.0, 576.0, 558.0, 570.0, 570.0, 522.0, 573.0, 519.0, 576.0, 573.0, 570.0, 527.0, 522.0, 587.0, 579.0, 573.0, 564.0, 510.0, 452.0, 519.0, 504.0, 573.0, 525.0, 582.0, 573.0, 519.0, 576.0, 510.0, 573.0, 570.0, 573.0, 533.0, 570.0, 522.0, 570.0, 576.0, 579.0, 567.0, 576.0, 576.0, 522.0, 579.0, 576.0, 584.0, 507.0, 579.0, 573.0, 579.0, 582.0, 576.0, 576.0, 576.0, 573.0, 582.0, 522.0, 579.0, 579.0, 468.0, 522.0, 570.0, 579.0, 573.0, 582.0, 539.0, 525.0, 576.0, 490.0, 519.0, 576.0, 579.0, 582.0, 530.0, 576.0, 525.0, 519.0, 570.0, 575.0, 530.0, 576.0, 579.0, 576.0, 573.0, 579.0, 582.0, 579.0, 570.0, 453.0, 525.0, 579.0, 576.0, 573.0, 573.0, 573.0, 576.0, 579.0, 587.0, 582.0, 582.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 268.0, 292.0, 284.0, 275.0, 283.0, 275.0, 295.0, 291.0, 279.0, 261.0, 261.0, 282.0, 291.0, 266.0, 253.0, 282.0, 294.0, 274.0, 299.0, 273.0, 297.0, 285.0, 242.0, 260.0, 262.0, 299.0, 288.0, 294.0, 285.0, 298.0, 275.0, 275.0, 289.0, 260.0, 250.0, 223.0, 229.0, 253.0, 266.0, 243.0, 261.0, 278.0, 295.0, 277.0, 248.0, 298.0, 284.0, 286.0, 287.0, 247.0, 272.0, 280.0, 296.0, 263.0, 247.0, 294.0, 279.0, 278.0, 292.0, 287.0, 286.0, 288.0, 245.0, 286.0, 284.0, 272.0, 250.0, 290.0, 280.0, 286.0, 290.0, 290.0, 289.0, 286.0, 281.0, 266.0, 310.0, 280.0, 296.0, 265.0, 257.0, 292.0, 287.0, 297.0, 279.0, 304.0, 280.0, 247.0, 260.0, 293.0, 286.0, 274.0, 299.0, 304.0, 275.0, 292.0, 290.0, 280.0, 296.0, 300.0, 276.0, 287.0, 289.0, 290.0, 283.0, 287.0, 295.0, 256.0, 266.0, 285.0, 294.0, 284.0, 295.0, 230.0, 238.0, 258.0, 264.0, 283.0, 287.0, 303.0, 276.0, 274.0, 299.0, 299.0, 283.0, 270.0, 269.0, 262.0, 263.0, 290.0, 286.0, 248.0, 242.0, 261.0, 258.0, 277.0, 299.0, 293.0, 286.0, 285.0, 297.0, 265.0, 265.0, 295.0, 281.0, 266.0, 259.0, 262.0, 257.0, 285.0, 285.0, 286.0, 289.0, 260.0, 270.0, 291.0, 285.0, 297.0, 282.0, 290.0, 286.0, 283.0, 290.0, 292.0, 287.0, 284.0, 298.0, 304.0, 275.0, 294.0, 276.0, 233.0, 220.0, 250.0, 275.0, 283.0, 296.0, 290.0, 286.0, 276.0, 297.0, 290.0, 283.0, 287.0, 286.0, 292.0, 284.0, 284.0, 295.0, 291.0, 296.0, 278.0, 304.0, 304.0, 278.0, 289.0, 290.0, 290.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6890653996220631, "mean_inference_ms": 1.2217288104133466, "mean_action_processing_ms": 0.13282370029567642, "mean_env_wait_ms": 0.839345002381688, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4915200, "num_agent_steps_trained": 4915200, "num_env_steps_sampled": 2457600, "num_env_steps_trained": 2457600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2457600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4915200, "timers": {"training_iteration_time_ms": 4412.084, "learn_time_ms": 1145.822, "learn_throughput": 11171.018, "synch_weights_time_ms": 12.484}, "counters": {"num_env_steps_sampled": 2457600, "num_env_steps_trained": 2457600, "num_agent_steps_sampled": 4915200, "num_agent_steps_trained": 4915200}, "done": false, "episodes_total": 6144, "training_iteration": 192, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-12-32", "timestamp": 1666581152, "time_this_iter_s": 4.201019287109375, "time_total_s": 726.8631219863892, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 726.8631219863892, "timesteps_since_restore": 0, "iterations_since_restore": 192, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 43.0, "ram_util_percent": 21.433333333333334}}
+{"custom_metrics": {"sparse_reward_mean": 193.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 172.61, "shaped_reward_min": 133, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.07, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.52, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.73, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.27, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.37, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.0, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.05, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.08, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.72, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.37, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.0, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.37, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.0, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00027986563509330153, "policy_loss": -0.0005315254675224423, "vf_loss": 7.590443134307861, "vf_explained_var": 0.667829155921936, "kl": 0.001756403362378478, "entropy": 1.0147672891616821, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2470400, "num_env_steps_trained": 2470400, "num_agent_steps_sampled": 4940800, "num_agent_steps_trained": 4940800}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 453.0, "episode_reward_mean": 560.21, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 310.0}, "policy_reward_mean": {"ppo": 280.105}, "custom_metrics": {"sparse_reward_mean": 193.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 172.61, "shaped_reward_min": 133, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.07, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.52, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.73, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.27, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.37, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.0, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.05, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.08, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.72, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.37, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.0, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.37, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.0, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 522.0, 570.0, 576.0, 579.0, 567.0, 576.0, 576.0, 522.0, 579.0, 576.0, 584.0, 507.0, 579.0, 573.0, 579.0, 582.0, 576.0, 576.0, 576.0, 573.0, 582.0, 522.0, 579.0, 579.0, 468.0, 522.0, 570.0, 579.0, 573.0, 582.0, 539.0, 525.0, 576.0, 490.0, 519.0, 576.0, 579.0, 582.0, 530.0, 576.0, 525.0, 519.0, 570.0, 575.0, 530.0, 576.0, 579.0, 576.0, 573.0, 579.0, 582.0, 579.0, 570.0, 453.0, 525.0, 579.0, 576.0, 573.0, 573.0, 573.0, 576.0, 579.0, 587.0, 582.0, 582.0, 579.0, 573.0, 576.0, 573.0, 582.0, 576.0, 525.0, 525.0, 573.0, 576.0, 525.0, 525.0, 576.0, 573.0, 573.0, 573.0, 522.0, 573.0, 519.0, 573.0, 525.0, 576.0, 579.0, 576.0, 522.0, 522.0, 570.0, 579.0, 530.0, 582.0, 530.0, 525.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 284.0, 272.0, 250.0, 290.0, 280.0, 286.0, 290.0, 290.0, 289.0, 286.0, 281.0, 266.0, 310.0, 280.0, 296.0, 265.0, 257.0, 292.0, 287.0, 297.0, 279.0, 304.0, 280.0, 247.0, 260.0, 293.0, 286.0, 274.0, 299.0, 304.0, 275.0, 292.0, 290.0, 280.0, 296.0, 300.0, 276.0, 287.0, 289.0, 290.0, 283.0, 287.0, 295.0, 256.0, 266.0, 285.0, 294.0, 284.0, 295.0, 230.0, 238.0, 258.0, 264.0, 283.0, 287.0, 303.0, 276.0, 274.0, 299.0, 299.0, 283.0, 270.0, 269.0, 262.0, 263.0, 290.0, 286.0, 248.0, 242.0, 261.0, 258.0, 277.0, 299.0, 293.0, 286.0, 285.0, 297.0, 265.0, 265.0, 295.0, 281.0, 266.0, 259.0, 262.0, 257.0, 285.0, 285.0, 286.0, 289.0, 260.0, 270.0, 291.0, 285.0, 297.0, 282.0, 290.0, 286.0, 283.0, 290.0, 292.0, 287.0, 284.0, 298.0, 304.0, 275.0, 294.0, 276.0, 233.0, 220.0, 250.0, 275.0, 283.0, 296.0, 290.0, 286.0, 276.0, 297.0, 290.0, 283.0, 287.0, 286.0, 292.0, 284.0, 284.0, 295.0, 291.0, 296.0, 278.0, 304.0, 304.0, 278.0, 289.0, 290.0, 290.0, 283.0, 303.0, 273.0, 292.0, 281.0, 289.0, 293.0, 295.0, 281.0, 269.0, 256.0, 255.0, 270.0, 292.0, 281.0, 277.0, 299.0, 246.0, 279.0, 265.0, 260.0, 300.0, 276.0, 289.0, 284.0, 306.0, 267.0, 283.0, 290.0, 256.0, 266.0, 284.0, 289.0, 264.0, 255.0, 284.0, 289.0, 260.0, 265.0, 292.0, 284.0, 288.0, 291.0, 279.0, 297.0, 269.0, 253.0, 263.0, 259.0, 278.0, 292.0, 290.0, 289.0, 258.0, 272.0, 296.0, 286.0, 278.0, 252.0, 259.0, 266.0, 279.0, 303.0, 286.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6900892375397361, "mean_inference_ms": 1.2230523595362317, "mean_action_processing_ms": 0.1329422939345136, "mean_env_wait_ms": 0.8401157934099642, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 453.0, "episode_reward_mean": 560.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 310.0}, "policy_reward_mean": {"ppo": 280.105}, "hist_stats": {"episode_reward": [570.0, 522.0, 570.0, 576.0, 579.0, 567.0, 576.0, 576.0, 522.0, 579.0, 576.0, 584.0, 507.0, 579.0, 573.0, 579.0, 582.0, 576.0, 576.0, 576.0, 573.0, 582.0, 522.0, 579.0, 579.0, 468.0, 522.0, 570.0, 579.0, 573.0, 582.0, 539.0, 525.0, 576.0, 490.0, 519.0, 576.0, 579.0, 582.0, 530.0, 576.0, 525.0, 519.0, 570.0, 575.0, 530.0, 576.0, 579.0, 576.0, 573.0, 579.0, 582.0, 579.0, 570.0, 453.0, 525.0, 579.0, 576.0, 573.0, 573.0, 573.0, 576.0, 579.0, 587.0, 582.0, 582.0, 579.0, 573.0, 576.0, 573.0, 582.0, 576.0, 525.0, 525.0, 573.0, 576.0, 525.0, 525.0, 576.0, 573.0, 573.0, 573.0, 522.0, 573.0, 519.0, 573.0, 525.0, 576.0, 579.0, 576.0, 522.0, 522.0, 570.0, 579.0, 530.0, 582.0, 530.0, 525.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 284.0, 272.0, 250.0, 290.0, 280.0, 286.0, 290.0, 290.0, 289.0, 286.0, 281.0, 266.0, 310.0, 280.0, 296.0, 265.0, 257.0, 292.0, 287.0, 297.0, 279.0, 304.0, 280.0, 247.0, 260.0, 293.0, 286.0, 274.0, 299.0, 304.0, 275.0, 292.0, 290.0, 280.0, 296.0, 300.0, 276.0, 287.0, 289.0, 290.0, 283.0, 287.0, 295.0, 256.0, 266.0, 285.0, 294.0, 284.0, 295.0, 230.0, 238.0, 258.0, 264.0, 283.0, 287.0, 303.0, 276.0, 274.0, 299.0, 299.0, 283.0, 270.0, 269.0, 262.0, 263.0, 290.0, 286.0, 248.0, 242.0, 261.0, 258.0, 277.0, 299.0, 293.0, 286.0, 285.0, 297.0, 265.0, 265.0, 295.0, 281.0, 266.0, 259.0, 262.0, 257.0, 285.0, 285.0, 286.0, 289.0, 260.0, 270.0, 291.0, 285.0, 297.0, 282.0, 290.0, 286.0, 283.0, 290.0, 292.0, 287.0, 284.0, 298.0, 304.0, 275.0, 294.0, 276.0, 233.0, 220.0, 250.0, 275.0, 283.0, 296.0, 290.0, 286.0, 276.0, 297.0, 290.0, 283.0, 287.0, 286.0, 292.0, 284.0, 284.0, 295.0, 291.0, 296.0, 278.0, 304.0, 304.0, 278.0, 289.0, 290.0, 290.0, 283.0, 303.0, 273.0, 292.0, 281.0, 289.0, 293.0, 295.0, 281.0, 269.0, 256.0, 255.0, 270.0, 292.0, 281.0, 277.0, 299.0, 246.0, 279.0, 265.0, 260.0, 300.0, 276.0, 289.0, 284.0, 306.0, 267.0, 283.0, 290.0, 256.0, 266.0, 284.0, 289.0, 264.0, 255.0, 284.0, 289.0, 260.0, 265.0, 292.0, 284.0, 288.0, 291.0, 279.0, 297.0, 269.0, 253.0, 263.0, 259.0, 278.0, 292.0, 290.0, 289.0, 258.0, 272.0, 296.0, 286.0, 278.0, 252.0, 259.0, 266.0, 279.0, 303.0, 286.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6900892375397361, "mean_inference_ms": 1.2230523595362317, "mean_action_processing_ms": 0.1329422939345136, "mean_env_wait_ms": 0.8401157934099642, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4940800, "num_agent_steps_trained": 4940800, "num_env_steps_sampled": 2470400, "num_env_steps_trained": 2470400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2470400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4940800, "timers": {"training_iteration_time_ms": 4481.05, "learn_time_ms": 1158.916, "learn_throughput": 11044.805, "synch_weights_time_ms": 11.827}, "counters": {"num_env_steps_sampled": 2470400, "num_env_steps_trained": 2470400, "num_agent_steps_sampled": 4940800, "num_agent_steps_trained": 4940800}, "done": false, "episodes_total": 6176, "training_iteration": 193, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-12-37", "timestamp": 1666581157, "time_this_iter_s": 4.319738388061523, "time_total_s": 731.1828603744507, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 731.1828603744507, "timesteps_since_restore": 0, "iterations_since_restore": 193, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 33.17142857142857, "ram_util_percent": 21.985714285714288}}
+{"custom_metrics": {"sparse_reward_mean": 192.4, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.63, "shaped_reward_min": 133, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.9, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.49, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.54, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.17, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.22, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.94, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.49, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.24, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.05, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.22, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.94, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.22, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.94, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005898141534999013, "policy_loss": 0.00032977614318951964, "vf_loss": 7.652249813079834, "vf_explained_var": 0.6789402365684509, "kl": 0.0019943215884268284, "entropy": 1.0103744268417358, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2483200, "num_env_steps_trained": 2483200, "num_agent_steps_sampled": 4966400, "num_agent_steps_trained": 4966400}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 453.0, "episode_reward_mean": 556.43, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 308.0}, "policy_reward_mean": {"ppo": 278.215}, "custom_metrics": {"sparse_reward_mean": 192.4, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.63, "shaped_reward_min": 133, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.9, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.49, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.54, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.17, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.22, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.94, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.49, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.24, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.05, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.22, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.94, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.22, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.94, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 576.0, 490.0, 519.0, 576.0, 579.0, 582.0, 530.0, 576.0, 525.0, 519.0, 570.0, 575.0, 530.0, 576.0, 579.0, 576.0, 573.0, 579.0, 582.0, 579.0, 570.0, 453.0, 525.0, 579.0, 576.0, 573.0, 573.0, 573.0, 576.0, 579.0, 587.0, 582.0, 582.0, 579.0, 573.0, 576.0, 573.0, 582.0, 576.0, 525.0, 525.0, 573.0, 576.0, 525.0, 525.0, 576.0, 573.0, 573.0, 573.0, 522.0, 573.0, 519.0, 573.0, 525.0, 576.0, 579.0, 576.0, 522.0, 522.0, 570.0, 579.0, 530.0, 582.0, 530.0, 525.0, 582.0, 576.0, 576.0, 522.0, 522.0, 516.0, 576.0, 573.0, 573.0, 570.0, 465.0, 519.0, 525.0, 525.0, 573.0, 525.0, 576.0, 582.0, 576.0, 573.0, 570.0, 573.0, 522.0, 525.0, 536.0, 579.0, 519.0, 573.0, 579.0, 579.0, 579.0, 530.0, 525.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 263.0, 290.0, 286.0, 248.0, 242.0, 261.0, 258.0, 277.0, 299.0, 293.0, 286.0, 285.0, 297.0, 265.0, 265.0, 295.0, 281.0, 266.0, 259.0, 262.0, 257.0, 285.0, 285.0, 286.0, 289.0, 260.0, 270.0, 291.0, 285.0, 297.0, 282.0, 290.0, 286.0, 283.0, 290.0, 292.0, 287.0, 284.0, 298.0, 304.0, 275.0, 294.0, 276.0, 233.0, 220.0, 250.0, 275.0, 283.0, 296.0, 290.0, 286.0, 276.0, 297.0, 290.0, 283.0, 287.0, 286.0, 292.0, 284.0, 284.0, 295.0, 291.0, 296.0, 278.0, 304.0, 304.0, 278.0, 289.0, 290.0, 290.0, 283.0, 303.0, 273.0, 292.0, 281.0, 289.0, 293.0, 295.0, 281.0, 269.0, 256.0, 255.0, 270.0, 292.0, 281.0, 277.0, 299.0, 246.0, 279.0, 265.0, 260.0, 300.0, 276.0, 289.0, 284.0, 306.0, 267.0, 283.0, 290.0, 256.0, 266.0, 284.0, 289.0, 264.0, 255.0, 284.0, 289.0, 260.0, 265.0, 292.0, 284.0, 288.0, 291.0, 279.0, 297.0, 269.0, 253.0, 263.0, 259.0, 278.0, 292.0, 290.0, 289.0, 258.0, 272.0, 296.0, 286.0, 278.0, 252.0, 259.0, 266.0, 279.0, 303.0, 286.0, 290.0, 291.0, 285.0, 262.0, 260.0, 255.0, 267.0, 270.0, 246.0, 282.0, 294.0, 290.0, 283.0, 308.0, 265.0, 286.0, 284.0, 231.0, 234.0, 253.0, 266.0, 271.0, 254.0, 261.0, 264.0, 283.0, 290.0, 257.0, 268.0, 297.0, 279.0, 282.0, 300.0, 287.0, 289.0, 280.0, 293.0, 280.0, 290.0, 283.0, 290.0, 263.0, 259.0, 270.0, 255.0, 266.0, 270.0, 298.0, 281.0, 255.0, 264.0, 302.0, 271.0, 282.0, 297.0, 288.0, 291.0, 280.0, 299.0, 256.0, 274.0, 265.0, 260.0, 285.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6908932370553954, "mean_inference_ms": 1.2239034901785653, "mean_action_processing_ms": 0.13303384998747472, "mean_env_wait_ms": 0.8406845568626884, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 453.0, "episode_reward_mean": 556.43, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 308.0}, "policy_reward_mean": {"ppo": 278.215}, "hist_stats": {"episode_reward": [525.0, 576.0, 490.0, 519.0, 576.0, 579.0, 582.0, 530.0, 576.0, 525.0, 519.0, 570.0, 575.0, 530.0, 576.0, 579.0, 576.0, 573.0, 579.0, 582.0, 579.0, 570.0, 453.0, 525.0, 579.0, 576.0, 573.0, 573.0, 573.0, 576.0, 579.0, 587.0, 582.0, 582.0, 579.0, 573.0, 576.0, 573.0, 582.0, 576.0, 525.0, 525.0, 573.0, 576.0, 525.0, 525.0, 576.0, 573.0, 573.0, 573.0, 522.0, 573.0, 519.0, 573.0, 525.0, 576.0, 579.0, 576.0, 522.0, 522.0, 570.0, 579.0, 530.0, 582.0, 530.0, 525.0, 582.0, 576.0, 576.0, 522.0, 522.0, 516.0, 576.0, 573.0, 573.0, 570.0, 465.0, 519.0, 525.0, 525.0, 573.0, 525.0, 576.0, 582.0, 576.0, 573.0, 570.0, 573.0, 522.0, 525.0, 536.0, 579.0, 519.0, 573.0, 579.0, 579.0, 579.0, 530.0, 525.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 263.0, 290.0, 286.0, 248.0, 242.0, 261.0, 258.0, 277.0, 299.0, 293.0, 286.0, 285.0, 297.0, 265.0, 265.0, 295.0, 281.0, 266.0, 259.0, 262.0, 257.0, 285.0, 285.0, 286.0, 289.0, 260.0, 270.0, 291.0, 285.0, 297.0, 282.0, 290.0, 286.0, 283.0, 290.0, 292.0, 287.0, 284.0, 298.0, 304.0, 275.0, 294.0, 276.0, 233.0, 220.0, 250.0, 275.0, 283.0, 296.0, 290.0, 286.0, 276.0, 297.0, 290.0, 283.0, 287.0, 286.0, 292.0, 284.0, 284.0, 295.0, 291.0, 296.0, 278.0, 304.0, 304.0, 278.0, 289.0, 290.0, 290.0, 283.0, 303.0, 273.0, 292.0, 281.0, 289.0, 293.0, 295.0, 281.0, 269.0, 256.0, 255.0, 270.0, 292.0, 281.0, 277.0, 299.0, 246.0, 279.0, 265.0, 260.0, 300.0, 276.0, 289.0, 284.0, 306.0, 267.0, 283.0, 290.0, 256.0, 266.0, 284.0, 289.0, 264.0, 255.0, 284.0, 289.0, 260.0, 265.0, 292.0, 284.0, 288.0, 291.0, 279.0, 297.0, 269.0, 253.0, 263.0, 259.0, 278.0, 292.0, 290.0, 289.0, 258.0, 272.0, 296.0, 286.0, 278.0, 252.0, 259.0, 266.0, 279.0, 303.0, 286.0, 290.0, 291.0, 285.0, 262.0, 260.0, 255.0, 267.0, 270.0, 246.0, 282.0, 294.0, 290.0, 283.0, 308.0, 265.0, 286.0, 284.0, 231.0, 234.0, 253.0, 266.0, 271.0, 254.0, 261.0, 264.0, 283.0, 290.0, 257.0, 268.0, 297.0, 279.0, 282.0, 300.0, 287.0, 289.0, 280.0, 293.0, 280.0, 290.0, 283.0, 290.0, 263.0, 259.0, 270.0, 255.0, 266.0, 270.0, 298.0, 281.0, 255.0, 264.0, 302.0, 271.0, 282.0, 297.0, 288.0, 291.0, 280.0, 299.0, 256.0, 274.0, 265.0, 260.0, 285.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6908932370553954, "mean_inference_ms": 1.2239034901785653, "mean_action_processing_ms": 0.13303384998747472, "mean_env_wait_ms": 0.8406845568626884, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4966400, "num_agent_steps_trained": 4966400, "num_env_steps_sampled": 2483200, "num_env_steps_trained": 2483200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2483200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4966400, "timers": {"training_iteration_time_ms": 4557.46, "learn_time_ms": 1181.905, "learn_throughput": 10829.973, "synch_weights_time_ms": 12.829}, "counters": {"num_env_steps_sampled": 2483200, "num_env_steps_trained": 2483200, "num_agent_steps_sampled": 4966400, "num_agent_steps_trained": 4966400}, "done": false, "episodes_total": 6208, "training_iteration": 194, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-12-42", "timestamp": 1666581162, "time_this_iter_s": 4.991002321243286, "time_total_s": 736.173862695694, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 736.173862695694, "timesteps_since_restore": 0, "iterations_since_restore": 194, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 66.88571428571429, "ram_util_percent": 15.585714285714284}}
+{"custom_metrics": {"sparse_reward_mean": 192.4, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.24, "shaped_reward_min": 145, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.75, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.71, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.36, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.34, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.99, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.15, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.69, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.62, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.35, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.18, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.12, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.53, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.1, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 14.99, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.15, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.99, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.15, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001025318168103695, "policy_loss": -0.001293760840781033, "vf_loss": 7.650734901428223, "vf_explained_var": 0.6610172390937805, "kl": 0.0021573223639279604, "entropy": 0.993259072303772, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2496000, "num_env_steps_trained": 2496000, "num_agent_steps_sampled": 4992000, "num_agent_steps_trained": 4992000}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 465.0, "episode_reward_mean": 556.04, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 308.0}, "policy_reward_mean": {"ppo": 278.02}, "custom_metrics": {"sparse_reward_mean": 192.4, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.24, "shaped_reward_min": 145, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.75, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.71, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.36, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.34, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.99, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.15, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.69, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.62, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.35, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.18, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.12, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.53, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.1, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 14.99, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.15, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.99, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.15, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 579.0, 573.0, 576.0, 573.0, 582.0, 576.0, 525.0, 525.0, 573.0, 576.0, 525.0, 525.0, 576.0, 573.0, 573.0, 573.0, 522.0, 573.0, 519.0, 573.0, 525.0, 576.0, 579.0, 576.0, 522.0, 522.0, 570.0, 579.0, 530.0, 582.0, 530.0, 525.0, 582.0, 576.0, 576.0, 522.0, 522.0, 516.0, 576.0, 573.0, 573.0, 570.0, 465.0, 519.0, 525.0, 525.0, 573.0, 525.0, 576.0, 582.0, 576.0, 573.0, 570.0, 573.0, 522.0, 525.0, 536.0, 579.0, 519.0, 573.0, 579.0, 579.0, 579.0, 530.0, 525.0, 579.0, 530.0, 567.0, 522.0, 579.0, 527.0, 570.0, 582.0, 573.0, 522.0, 576.0, 579.0, 576.0, 522.0, 573.0, 582.0, 576.0, 576.0, 570.0, 573.0, 579.0, 522.0, 567.0, 522.0, 582.0, 525.0, 507.0, 582.0, 573.0, 530.0, 576.0, 522.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 304.0, 304.0, 278.0, 289.0, 290.0, 290.0, 283.0, 303.0, 273.0, 292.0, 281.0, 289.0, 293.0, 295.0, 281.0, 269.0, 256.0, 255.0, 270.0, 292.0, 281.0, 277.0, 299.0, 246.0, 279.0, 265.0, 260.0, 300.0, 276.0, 289.0, 284.0, 306.0, 267.0, 283.0, 290.0, 256.0, 266.0, 284.0, 289.0, 264.0, 255.0, 284.0, 289.0, 260.0, 265.0, 292.0, 284.0, 288.0, 291.0, 279.0, 297.0, 269.0, 253.0, 263.0, 259.0, 278.0, 292.0, 290.0, 289.0, 258.0, 272.0, 296.0, 286.0, 278.0, 252.0, 259.0, 266.0, 279.0, 303.0, 286.0, 290.0, 291.0, 285.0, 262.0, 260.0, 255.0, 267.0, 270.0, 246.0, 282.0, 294.0, 290.0, 283.0, 308.0, 265.0, 286.0, 284.0, 231.0, 234.0, 253.0, 266.0, 271.0, 254.0, 261.0, 264.0, 283.0, 290.0, 257.0, 268.0, 297.0, 279.0, 282.0, 300.0, 287.0, 289.0, 280.0, 293.0, 280.0, 290.0, 283.0, 290.0, 263.0, 259.0, 270.0, 255.0, 266.0, 270.0, 298.0, 281.0, 255.0, 264.0, 302.0, 271.0, 282.0, 297.0, 288.0, 291.0, 280.0, 299.0, 256.0, 274.0, 265.0, 260.0, 285.0, 294.0, 274.0, 256.0, 297.0, 270.0, 254.0, 268.0, 275.0, 304.0, 275.0, 252.0, 279.0, 291.0, 298.0, 284.0, 293.0, 280.0, 263.0, 259.0, 298.0, 278.0, 290.0, 289.0, 296.0, 280.0, 256.0, 266.0, 288.0, 285.0, 295.0, 287.0, 276.0, 300.0, 291.0, 285.0, 281.0, 289.0, 290.0, 283.0, 278.0, 301.0, 265.0, 257.0, 279.0, 288.0, 265.0, 257.0, 294.0, 288.0, 275.0, 250.0, 248.0, 259.0, 294.0, 288.0, 281.0, 292.0, 265.0, 265.0, 296.0, 280.0, 262.0, 260.0, 283.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6928564263382961, "mean_inference_ms": 1.2294020123590537, "mean_action_processing_ms": 0.13325836540923008, "mean_env_wait_ms": 0.8421654977463985, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 465.0, "episode_reward_mean": 556.04, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 308.0}, "policy_reward_mean": {"ppo": 278.02}, "hist_stats": {"episode_reward": [582.0, 582.0, 579.0, 573.0, 576.0, 573.0, 582.0, 576.0, 525.0, 525.0, 573.0, 576.0, 525.0, 525.0, 576.0, 573.0, 573.0, 573.0, 522.0, 573.0, 519.0, 573.0, 525.0, 576.0, 579.0, 576.0, 522.0, 522.0, 570.0, 579.0, 530.0, 582.0, 530.0, 525.0, 582.0, 576.0, 576.0, 522.0, 522.0, 516.0, 576.0, 573.0, 573.0, 570.0, 465.0, 519.0, 525.0, 525.0, 573.0, 525.0, 576.0, 582.0, 576.0, 573.0, 570.0, 573.0, 522.0, 525.0, 536.0, 579.0, 519.0, 573.0, 579.0, 579.0, 579.0, 530.0, 525.0, 579.0, 530.0, 567.0, 522.0, 579.0, 527.0, 570.0, 582.0, 573.0, 522.0, 576.0, 579.0, 576.0, 522.0, 573.0, 582.0, 576.0, 576.0, 570.0, 573.0, 579.0, 522.0, 567.0, 522.0, 582.0, 525.0, 507.0, 582.0, 573.0, 530.0, 576.0, 522.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 304.0, 304.0, 278.0, 289.0, 290.0, 290.0, 283.0, 303.0, 273.0, 292.0, 281.0, 289.0, 293.0, 295.0, 281.0, 269.0, 256.0, 255.0, 270.0, 292.0, 281.0, 277.0, 299.0, 246.0, 279.0, 265.0, 260.0, 300.0, 276.0, 289.0, 284.0, 306.0, 267.0, 283.0, 290.0, 256.0, 266.0, 284.0, 289.0, 264.0, 255.0, 284.0, 289.0, 260.0, 265.0, 292.0, 284.0, 288.0, 291.0, 279.0, 297.0, 269.0, 253.0, 263.0, 259.0, 278.0, 292.0, 290.0, 289.0, 258.0, 272.0, 296.0, 286.0, 278.0, 252.0, 259.0, 266.0, 279.0, 303.0, 286.0, 290.0, 291.0, 285.0, 262.0, 260.0, 255.0, 267.0, 270.0, 246.0, 282.0, 294.0, 290.0, 283.0, 308.0, 265.0, 286.0, 284.0, 231.0, 234.0, 253.0, 266.0, 271.0, 254.0, 261.0, 264.0, 283.0, 290.0, 257.0, 268.0, 297.0, 279.0, 282.0, 300.0, 287.0, 289.0, 280.0, 293.0, 280.0, 290.0, 283.0, 290.0, 263.0, 259.0, 270.0, 255.0, 266.0, 270.0, 298.0, 281.0, 255.0, 264.0, 302.0, 271.0, 282.0, 297.0, 288.0, 291.0, 280.0, 299.0, 256.0, 274.0, 265.0, 260.0, 285.0, 294.0, 274.0, 256.0, 297.0, 270.0, 254.0, 268.0, 275.0, 304.0, 275.0, 252.0, 279.0, 291.0, 298.0, 284.0, 293.0, 280.0, 263.0, 259.0, 298.0, 278.0, 290.0, 289.0, 296.0, 280.0, 256.0, 266.0, 288.0, 285.0, 295.0, 287.0, 276.0, 300.0, 291.0, 285.0, 281.0, 289.0, 290.0, 283.0, 278.0, 301.0, 265.0, 257.0, 279.0, 288.0, 265.0, 257.0, 294.0, 288.0, 275.0, 250.0, 248.0, 259.0, 294.0, 288.0, 281.0, 292.0, 265.0, 265.0, 296.0, 280.0, 262.0, 260.0, 283.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6928564263382961, "mean_inference_ms": 1.2294020123590537, "mean_action_processing_ms": 0.13325836540923008, "mean_env_wait_ms": 0.8421654977463985, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4992000, "num_agent_steps_trained": 4992000, "num_env_steps_sampled": 2496000, "num_env_steps_trained": 2496000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2496000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4992000, "timers": {"training_iteration_time_ms": 5042.959, "learn_time_ms": 1197.253, "learn_throughput": 10691.143, "synch_weights_time_ms": 12.679}, "counters": {"num_env_steps_sampled": 2496000, "num_env_steps_trained": 2496000, "num_agent_steps_sampled": 4992000, "num_agent_steps_trained": 4992000}, "done": false, "episodes_total": 6240, "training_iteration": 195, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-12-51", "timestamp": 1666581171, "time_this_iter_s": 8.491342067718506, "time_total_s": 744.6652047634125, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 744.6652047634125, "timesteps_since_restore": 0, "iterations_since_restore": 195, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 83.90833333333333, "ram_util_percent": 18.958333333333336}}
+{"custom_metrics": {"sparse_reward_mean": 191.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 170.88, "shaped_reward_min": 145, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.82, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.59, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.4, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.19, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.37, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.08, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.95, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.52, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.12, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 15.08, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.95, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.08, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.95, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011484096758067608, "policy_loss": 0.0008887922158464789, "vf_loss": 7.625007629394531, "vf_explained_var": 0.6411457061767578, "kl": 0.0023449528962373734, "entropy": 1.0057647228240967, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2508800, "num_env_steps_trained": 2508800, "num_agent_steps_sampled": 5017600, "num_agent_steps_trained": 5017600}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 465.0, "episode_reward_mean": 554.48, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 277.24}, "custom_metrics": {"sparse_reward_mean": 191.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 170.88, "shaped_reward_min": 145, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.82, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.59, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.4, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.19, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.37, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.08, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.95, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.52, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.12, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 15.08, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.95, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.08, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.95, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 525.0, 582.0, 576.0, 576.0, 522.0, 522.0, 516.0, 576.0, 573.0, 573.0, 570.0, 465.0, 519.0, 525.0, 525.0, 573.0, 525.0, 576.0, 582.0, 576.0, 573.0, 570.0, 573.0, 522.0, 525.0, 536.0, 579.0, 519.0, 573.0, 579.0, 579.0, 579.0, 530.0, 525.0, 579.0, 530.0, 567.0, 522.0, 579.0, 527.0, 570.0, 582.0, 573.0, 522.0, 576.0, 579.0, 576.0, 522.0, 573.0, 582.0, 576.0, 576.0, 570.0, 573.0, 579.0, 522.0, 567.0, 522.0, 582.0, 525.0, 507.0, 582.0, 573.0, 530.0, 576.0, 522.0, 579.0, 522.0, 582.0, 576.0, 525.0, 476.0, 576.0, 479.0, 582.0, 522.0, 627.0, 525.0, 558.0, 525.0, 576.0, 582.0, 576.0, 579.0, 576.0, 573.0, 579.0, 573.0, 530.0, 519.0, 573.0, 525.0, 530.0, 519.0, 576.0, 570.0, 573.0, 582.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 252.0, 259.0, 266.0, 279.0, 303.0, 286.0, 290.0, 291.0, 285.0, 262.0, 260.0, 255.0, 267.0, 270.0, 246.0, 282.0, 294.0, 290.0, 283.0, 308.0, 265.0, 286.0, 284.0, 231.0, 234.0, 253.0, 266.0, 271.0, 254.0, 261.0, 264.0, 283.0, 290.0, 257.0, 268.0, 297.0, 279.0, 282.0, 300.0, 287.0, 289.0, 280.0, 293.0, 280.0, 290.0, 283.0, 290.0, 263.0, 259.0, 270.0, 255.0, 266.0, 270.0, 298.0, 281.0, 255.0, 264.0, 302.0, 271.0, 282.0, 297.0, 288.0, 291.0, 280.0, 299.0, 256.0, 274.0, 265.0, 260.0, 285.0, 294.0, 274.0, 256.0, 297.0, 270.0, 254.0, 268.0, 275.0, 304.0, 275.0, 252.0, 279.0, 291.0, 298.0, 284.0, 293.0, 280.0, 263.0, 259.0, 298.0, 278.0, 290.0, 289.0, 296.0, 280.0, 256.0, 266.0, 288.0, 285.0, 295.0, 287.0, 276.0, 300.0, 291.0, 285.0, 281.0, 289.0, 290.0, 283.0, 278.0, 301.0, 265.0, 257.0, 279.0, 288.0, 265.0, 257.0, 294.0, 288.0, 275.0, 250.0, 248.0, 259.0, 294.0, 288.0, 281.0, 292.0, 265.0, 265.0, 296.0, 280.0, 262.0, 260.0, 283.0, 296.0, 262.0, 260.0, 298.0, 284.0, 285.0, 291.0, 261.0, 264.0, 237.0, 239.0, 289.0, 287.0, 237.0, 242.0, 294.0, 288.0, 265.0, 257.0, 321.0, 306.0, 268.0, 257.0, 270.0, 288.0, 261.0, 264.0, 285.0, 291.0, 290.0, 292.0, 292.0, 284.0, 284.0, 295.0, 295.0, 281.0, 287.0, 286.0, 291.0, 288.0, 290.0, 283.0, 268.0, 262.0, 261.0, 258.0, 285.0, 288.0, 267.0, 258.0, 260.0, 270.0, 254.0, 265.0, 292.0, 284.0, 283.0, 287.0, 282.0, 291.0, 287.0, 295.0, 282.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6950939124240252, "mean_inference_ms": 1.2349222522925547, "mean_action_processing_ms": 0.13351355067475765, "mean_env_wait_ms": 0.8436288772371432, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 465.0, "episode_reward_mean": 554.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 277.24}, "hist_stats": {"episode_reward": [530.0, 525.0, 582.0, 576.0, 576.0, 522.0, 522.0, 516.0, 576.0, 573.0, 573.0, 570.0, 465.0, 519.0, 525.0, 525.0, 573.0, 525.0, 576.0, 582.0, 576.0, 573.0, 570.0, 573.0, 522.0, 525.0, 536.0, 579.0, 519.0, 573.0, 579.0, 579.0, 579.0, 530.0, 525.0, 579.0, 530.0, 567.0, 522.0, 579.0, 527.0, 570.0, 582.0, 573.0, 522.0, 576.0, 579.0, 576.0, 522.0, 573.0, 582.0, 576.0, 576.0, 570.0, 573.0, 579.0, 522.0, 567.0, 522.0, 582.0, 525.0, 507.0, 582.0, 573.0, 530.0, 576.0, 522.0, 579.0, 522.0, 582.0, 576.0, 525.0, 476.0, 576.0, 479.0, 582.0, 522.0, 627.0, 525.0, 558.0, 525.0, 576.0, 582.0, 576.0, 579.0, 576.0, 573.0, 579.0, 573.0, 530.0, 519.0, 573.0, 525.0, 530.0, 519.0, 576.0, 570.0, 573.0, 582.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 252.0, 259.0, 266.0, 279.0, 303.0, 286.0, 290.0, 291.0, 285.0, 262.0, 260.0, 255.0, 267.0, 270.0, 246.0, 282.0, 294.0, 290.0, 283.0, 308.0, 265.0, 286.0, 284.0, 231.0, 234.0, 253.0, 266.0, 271.0, 254.0, 261.0, 264.0, 283.0, 290.0, 257.0, 268.0, 297.0, 279.0, 282.0, 300.0, 287.0, 289.0, 280.0, 293.0, 280.0, 290.0, 283.0, 290.0, 263.0, 259.0, 270.0, 255.0, 266.0, 270.0, 298.0, 281.0, 255.0, 264.0, 302.0, 271.0, 282.0, 297.0, 288.0, 291.0, 280.0, 299.0, 256.0, 274.0, 265.0, 260.0, 285.0, 294.0, 274.0, 256.0, 297.0, 270.0, 254.0, 268.0, 275.0, 304.0, 275.0, 252.0, 279.0, 291.0, 298.0, 284.0, 293.0, 280.0, 263.0, 259.0, 298.0, 278.0, 290.0, 289.0, 296.0, 280.0, 256.0, 266.0, 288.0, 285.0, 295.0, 287.0, 276.0, 300.0, 291.0, 285.0, 281.0, 289.0, 290.0, 283.0, 278.0, 301.0, 265.0, 257.0, 279.0, 288.0, 265.0, 257.0, 294.0, 288.0, 275.0, 250.0, 248.0, 259.0, 294.0, 288.0, 281.0, 292.0, 265.0, 265.0, 296.0, 280.0, 262.0, 260.0, 283.0, 296.0, 262.0, 260.0, 298.0, 284.0, 285.0, 291.0, 261.0, 264.0, 237.0, 239.0, 289.0, 287.0, 237.0, 242.0, 294.0, 288.0, 265.0, 257.0, 321.0, 306.0, 268.0, 257.0, 270.0, 288.0, 261.0, 264.0, 285.0, 291.0, 290.0, 292.0, 292.0, 284.0, 284.0, 295.0, 295.0, 281.0, 287.0, 286.0, 291.0, 288.0, 290.0, 283.0, 268.0, 262.0, 261.0, 258.0, 285.0, 288.0, 267.0, 258.0, 260.0, 270.0, 254.0, 265.0, 292.0, 284.0, 283.0, 287.0, 282.0, 291.0, 287.0, 295.0, 282.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6950939124240252, "mean_inference_ms": 1.2349222522925547, "mean_action_processing_ms": 0.13351355067475765, "mean_env_wait_ms": 0.8436288772371432, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5017600, "num_agent_steps_trained": 5017600, "num_env_steps_sampled": 2508800, "num_env_steps_trained": 2508800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2508800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5017600, "timers": {"training_iteration_time_ms": 5149.375, "learn_time_ms": 1227.13, "learn_throughput": 10430.84, "synch_weights_time_ms": 14.269}, "counters": {"num_env_steps_sampled": 2508800, "num_env_steps_trained": 2508800, "num_agent_steps_sampled": 5017600, "num_agent_steps_trained": 5017600}, "done": false, "episodes_total": 6272, "training_iteration": 196, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-12-56", "timestamp": 1666581176, "time_this_iter_s": 5.200287818908691, "time_total_s": 749.8654925823212, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 749.8654925823212, "timesteps_since_restore": 0, "iterations_since_restore": 196, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 71.8125, "ram_util_percent": 19.0}}
+{"custom_metrics": {"sparse_reward_mean": 191.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 171.01, "shaped_reward_min": 122, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.47, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.0, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.04, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.6, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.37, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.33, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.69, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.34, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.72, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.54, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.23, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.41, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 14.69, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.34, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.69, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.34, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005259969620965421, "policy_loss": 0.0002679735189303756, "vf_loss": 7.586305618286133, "vf_explained_var": 0.6561870574951172, "kl": 0.001961564412340522, "entropy": 1.0012136697769165, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2521600, "num_env_steps_trained": 2521600, "num_agent_steps_sampled": 5043200, "num_agent_steps_trained": 5043200}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 362.0, "episode_reward_mean": 554.21, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 177.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 277.105}, "custom_metrics": {"sparse_reward_mean": 191.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 171.01, "shaped_reward_min": 122, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.47, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.0, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.04, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.6, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.37, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.33, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.69, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.34, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.72, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.54, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.23, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.41, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 14.69, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.34, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.69, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.34, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 530.0, 525.0, 579.0, 530.0, 567.0, 522.0, 579.0, 527.0, 570.0, 582.0, 573.0, 522.0, 576.0, 579.0, 576.0, 522.0, 573.0, 582.0, 576.0, 576.0, 570.0, 573.0, 579.0, 522.0, 567.0, 522.0, 582.0, 525.0, 507.0, 582.0, 573.0, 530.0, 576.0, 522.0, 579.0, 522.0, 582.0, 576.0, 525.0, 476.0, 576.0, 479.0, 582.0, 522.0, 627.0, 525.0, 558.0, 525.0, 576.0, 582.0, 576.0, 579.0, 576.0, 573.0, 579.0, 573.0, 530.0, 519.0, 573.0, 525.0, 530.0, 519.0, 576.0, 570.0, 573.0, 582.0, 573.0, 519.0, 576.0, 573.0, 576.0, 525.0, 584.0, 573.0, 362.0, 582.0, 582.0, 522.0, 579.0, 573.0, 573.0, 573.0, 573.0, 436.0, 527.0, 579.0, 513.0, 573.0, 576.0, 522.0, 579.0, 525.0, 579.0, 576.0, 582.0, 579.0, 579.0, 579.0, 459.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [280.0, 299.0, 256.0, 274.0, 265.0, 260.0, 285.0, 294.0, 274.0, 256.0, 297.0, 270.0, 254.0, 268.0, 275.0, 304.0, 275.0, 252.0, 279.0, 291.0, 298.0, 284.0, 293.0, 280.0, 263.0, 259.0, 298.0, 278.0, 290.0, 289.0, 296.0, 280.0, 256.0, 266.0, 288.0, 285.0, 295.0, 287.0, 276.0, 300.0, 291.0, 285.0, 281.0, 289.0, 290.0, 283.0, 278.0, 301.0, 265.0, 257.0, 279.0, 288.0, 265.0, 257.0, 294.0, 288.0, 275.0, 250.0, 248.0, 259.0, 294.0, 288.0, 281.0, 292.0, 265.0, 265.0, 296.0, 280.0, 262.0, 260.0, 283.0, 296.0, 262.0, 260.0, 298.0, 284.0, 285.0, 291.0, 261.0, 264.0, 237.0, 239.0, 289.0, 287.0, 237.0, 242.0, 294.0, 288.0, 265.0, 257.0, 321.0, 306.0, 268.0, 257.0, 270.0, 288.0, 261.0, 264.0, 285.0, 291.0, 290.0, 292.0, 292.0, 284.0, 284.0, 295.0, 295.0, 281.0, 287.0, 286.0, 291.0, 288.0, 290.0, 283.0, 268.0, 262.0, 261.0, 258.0, 285.0, 288.0, 267.0, 258.0, 260.0, 270.0, 254.0, 265.0, 292.0, 284.0, 283.0, 287.0, 282.0, 291.0, 287.0, 295.0, 282.0, 291.0, 277.0, 242.0, 296.0, 280.0, 285.0, 288.0, 294.0, 282.0, 252.0, 273.0, 302.0, 282.0, 290.0, 283.0, 177.0, 185.0, 290.0, 292.0, 299.0, 283.0, 249.0, 273.0, 296.0, 283.0, 287.0, 286.0, 289.0, 284.0, 290.0, 283.0, 280.0, 293.0, 215.0, 221.0, 261.0, 266.0, 283.0, 296.0, 253.0, 260.0, 301.0, 272.0, 292.0, 284.0, 251.0, 271.0, 298.0, 281.0, 253.0, 272.0, 291.0, 288.0, 290.0, 286.0, 291.0, 291.0, 297.0, 282.0, 290.0, 289.0, 297.0, 282.0, 238.0, 221.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6988818221866362, "mean_inference_ms": 1.2486215365158422, "mean_action_processing_ms": 0.13399369704943986, "mean_env_wait_ms": 0.8473021284275585, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 362.0, "episode_reward_mean": 554.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 177.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 277.105}, "hist_stats": {"episode_reward": [579.0, 530.0, 525.0, 579.0, 530.0, 567.0, 522.0, 579.0, 527.0, 570.0, 582.0, 573.0, 522.0, 576.0, 579.0, 576.0, 522.0, 573.0, 582.0, 576.0, 576.0, 570.0, 573.0, 579.0, 522.0, 567.0, 522.0, 582.0, 525.0, 507.0, 582.0, 573.0, 530.0, 576.0, 522.0, 579.0, 522.0, 582.0, 576.0, 525.0, 476.0, 576.0, 479.0, 582.0, 522.0, 627.0, 525.0, 558.0, 525.0, 576.0, 582.0, 576.0, 579.0, 576.0, 573.0, 579.0, 573.0, 530.0, 519.0, 573.0, 525.0, 530.0, 519.0, 576.0, 570.0, 573.0, 582.0, 573.0, 519.0, 576.0, 573.0, 576.0, 525.0, 584.0, 573.0, 362.0, 582.0, 582.0, 522.0, 579.0, 573.0, 573.0, 573.0, 573.0, 436.0, 527.0, 579.0, 513.0, 573.0, 576.0, 522.0, 579.0, 525.0, 579.0, 576.0, 582.0, 579.0, 579.0, 579.0, 459.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [280.0, 299.0, 256.0, 274.0, 265.0, 260.0, 285.0, 294.0, 274.0, 256.0, 297.0, 270.0, 254.0, 268.0, 275.0, 304.0, 275.0, 252.0, 279.0, 291.0, 298.0, 284.0, 293.0, 280.0, 263.0, 259.0, 298.0, 278.0, 290.0, 289.0, 296.0, 280.0, 256.0, 266.0, 288.0, 285.0, 295.0, 287.0, 276.0, 300.0, 291.0, 285.0, 281.0, 289.0, 290.0, 283.0, 278.0, 301.0, 265.0, 257.0, 279.0, 288.0, 265.0, 257.0, 294.0, 288.0, 275.0, 250.0, 248.0, 259.0, 294.0, 288.0, 281.0, 292.0, 265.0, 265.0, 296.0, 280.0, 262.0, 260.0, 283.0, 296.0, 262.0, 260.0, 298.0, 284.0, 285.0, 291.0, 261.0, 264.0, 237.0, 239.0, 289.0, 287.0, 237.0, 242.0, 294.0, 288.0, 265.0, 257.0, 321.0, 306.0, 268.0, 257.0, 270.0, 288.0, 261.0, 264.0, 285.0, 291.0, 290.0, 292.0, 292.0, 284.0, 284.0, 295.0, 295.0, 281.0, 287.0, 286.0, 291.0, 288.0, 290.0, 283.0, 268.0, 262.0, 261.0, 258.0, 285.0, 288.0, 267.0, 258.0, 260.0, 270.0, 254.0, 265.0, 292.0, 284.0, 283.0, 287.0, 282.0, 291.0, 287.0, 295.0, 282.0, 291.0, 277.0, 242.0, 296.0, 280.0, 285.0, 288.0, 294.0, 282.0, 252.0, 273.0, 302.0, 282.0, 290.0, 283.0, 177.0, 185.0, 290.0, 292.0, 299.0, 283.0, 249.0, 273.0, 296.0, 283.0, 287.0, 286.0, 289.0, 284.0, 290.0, 283.0, 280.0, 293.0, 215.0, 221.0, 261.0, 266.0, 283.0, 296.0, 253.0, 260.0, 301.0, 272.0, 292.0, 284.0, 251.0, 271.0, 298.0, 281.0, 253.0, 272.0, 291.0, 288.0, 290.0, 286.0, 291.0, 291.0, 297.0, 282.0, 290.0, 289.0, 297.0, 282.0, 238.0, 221.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6988818221866362, "mean_inference_ms": 1.2486215365158422, "mean_action_processing_ms": 0.13399369704943986, "mean_env_wait_ms": 0.8473021284275585, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5043200, "num_agent_steps_trained": 5043200, "num_env_steps_sampled": 2521600, "num_env_steps_trained": 2521600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2521600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5043200, "timers": {"training_iteration_time_ms": 5864.751, "learn_time_ms": 1229.533, "learn_throughput": 10410.46, "synch_weights_time_ms": 15.262}, "counters": {"num_env_steps_sampled": 2521600, "num_env_steps_trained": 2521600, "num_agent_steps_sampled": 5043200, "num_agent_steps_trained": 5043200}, "done": false, "episodes_total": 6304, "training_iteration": 197, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-13-07", "timestamp": 1666581187, "time_this_iter_s": 10.88671064376831, "time_total_s": 760.7522032260895, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 760.7522032260895, "timesteps_since_restore": 0, "iterations_since_restore": 197, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 83.80000000000001, "ram_util_percent": 21.525}}
+{"custom_metrics": {"sparse_reward_mean": 190.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 170.23, "shaped_reward_min": 111, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.66, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.66, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.28, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.29, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.96, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.62, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.26, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.24, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.41, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.11, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.96, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.96, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -1.2172211427241564e-05, "policy_loss": -0.0002627079957164824, "vf_loss": 7.566778182983398, "vf_explained_var": 0.6604992151260376, "kl": 0.0018601968185976148, "entropy": 1.0122833251953125, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2534400, "num_env_steps_trained": 2534400, "num_agent_steps_sampled": 5068800, "num_agent_steps_trained": 5068800}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 351.0, "episode_reward_mean": 551.03, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 275.515}, "custom_metrics": {"sparse_reward_mean": 190.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 170.23, "shaped_reward_min": 111, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.66, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.66, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.28, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.29, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.96, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.62, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.26, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.24, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.41, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.11, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.96, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.96, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 576.0, 522.0, 579.0, 522.0, 582.0, 576.0, 525.0, 476.0, 576.0, 479.0, 582.0, 522.0, 627.0, 525.0, 558.0, 525.0, 576.0, 582.0, 576.0, 579.0, 576.0, 573.0, 579.0, 573.0, 530.0, 519.0, 573.0, 525.0, 530.0, 519.0, 576.0, 570.0, 573.0, 582.0, 573.0, 519.0, 576.0, 573.0, 576.0, 525.0, 584.0, 573.0, 362.0, 582.0, 582.0, 522.0, 579.0, 573.0, 573.0, 573.0, 573.0, 436.0, 527.0, 579.0, 513.0, 573.0, 576.0, 522.0, 579.0, 525.0, 579.0, 576.0, 582.0, 579.0, 579.0, 579.0, 459.0, 573.0, 576.0, 518.0, 522.0, 573.0, 525.0, 576.0, 579.0, 525.0, 527.0, 579.0, 525.0, 570.0, 579.0, 570.0, 570.0, 530.0, 576.0, 351.0, 573.0, 579.0, 530.0, 579.0, 522.0, 570.0, 573.0, 533.0, 576.0, 525.0, 573.0, 530.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 265.0, 296.0, 280.0, 262.0, 260.0, 283.0, 296.0, 262.0, 260.0, 298.0, 284.0, 285.0, 291.0, 261.0, 264.0, 237.0, 239.0, 289.0, 287.0, 237.0, 242.0, 294.0, 288.0, 265.0, 257.0, 321.0, 306.0, 268.0, 257.0, 270.0, 288.0, 261.0, 264.0, 285.0, 291.0, 290.0, 292.0, 292.0, 284.0, 284.0, 295.0, 295.0, 281.0, 287.0, 286.0, 291.0, 288.0, 290.0, 283.0, 268.0, 262.0, 261.0, 258.0, 285.0, 288.0, 267.0, 258.0, 260.0, 270.0, 254.0, 265.0, 292.0, 284.0, 283.0, 287.0, 282.0, 291.0, 287.0, 295.0, 282.0, 291.0, 277.0, 242.0, 296.0, 280.0, 285.0, 288.0, 294.0, 282.0, 252.0, 273.0, 302.0, 282.0, 290.0, 283.0, 177.0, 185.0, 290.0, 292.0, 299.0, 283.0, 249.0, 273.0, 296.0, 283.0, 287.0, 286.0, 289.0, 284.0, 290.0, 283.0, 280.0, 293.0, 215.0, 221.0, 261.0, 266.0, 283.0, 296.0, 253.0, 260.0, 301.0, 272.0, 292.0, 284.0, 251.0, 271.0, 298.0, 281.0, 253.0, 272.0, 291.0, 288.0, 290.0, 286.0, 291.0, 291.0, 297.0, 282.0, 290.0, 289.0, 297.0, 282.0, 238.0, 221.0, 290.0, 283.0, 291.0, 285.0, 261.0, 257.0, 259.0, 263.0, 283.0, 290.0, 267.0, 258.0, 303.0, 273.0, 292.0, 287.0, 275.0, 250.0, 265.0, 262.0, 289.0, 290.0, 261.0, 264.0, 279.0, 291.0, 286.0, 293.0, 285.0, 285.0, 279.0, 291.0, 266.0, 264.0, 296.0, 280.0, 176.0, 175.0, 288.0, 285.0, 277.0, 302.0, 271.0, 259.0, 291.0, 288.0, 273.0, 249.0, 299.0, 271.0, 275.0, 298.0, 263.0, 270.0, 291.0, 285.0, 265.0, 260.0, 298.0, 275.0, 259.0, 271.0, 280.0, 242.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7016750382633622, "mean_inference_ms": 1.2584238626418844, "mean_action_processing_ms": 0.1343697593979121, "mean_env_wait_ms": 0.8502420561186278, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 351.0, "episode_reward_mean": 551.03, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 275.515}, "hist_stats": {"episode_reward": [530.0, 576.0, 522.0, 579.0, 522.0, 582.0, 576.0, 525.0, 476.0, 576.0, 479.0, 582.0, 522.0, 627.0, 525.0, 558.0, 525.0, 576.0, 582.0, 576.0, 579.0, 576.0, 573.0, 579.0, 573.0, 530.0, 519.0, 573.0, 525.0, 530.0, 519.0, 576.0, 570.0, 573.0, 582.0, 573.0, 519.0, 576.0, 573.0, 576.0, 525.0, 584.0, 573.0, 362.0, 582.0, 582.0, 522.0, 579.0, 573.0, 573.0, 573.0, 573.0, 436.0, 527.0, 579.0, 513.0, 573.0, 576.0, 522.0, 579.0, 525.0, 579.0, 576.0, 582.0, 579.0, 579.0, 579.0, 459.0, 573.0, 576.0, 518.0, 522.0, 573.0, 525.0, 576.0, 579.0, 525.0, 527.0, 579.0, 525.0, 570.0, 579.0, 570.0, 570.0, 530.0, 576.0, 351.0, 573.0, 579.0, 530.0, 579.0, 522.0, 570.0, 573.0, 533.0, 576.0, 525.0, 573.0, 530.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 265.0, 296.0, 280.0, 262.0, 260.0, 283.0, 296.0, 262.0, 260.0, 298.0, 284.0, 285.0, 291.0, 261.0, 264.0, 237.0, 239.0, 289.0, 287.0, 237.0, 242.0, 294.0, 288.0, 265.0, 257.0, 321.0, 306.0, 268.0, 257.0, 270.0, 288.0, 261.0, 264.0, 285.0, 291.0, 290.0, 292.0, 292.0, 284.0, 284.0, 295.0, 295.0, 281.0, 287.0, 286.0, 291.0, 288.0, 290.0, 283.0, 268.0, 262.0, 261.0, 258.0, 285.0, 288.0, 267.0, 258.0, 260.0, 270.0, 254.0, 265.0, 292.0, 284.0, 283.0, 287.0, 282.0, 291.0, 287.0, 295.0, 282.0, 291.0, 277.0, 242.0, 296.0, 280.0, 285.0, 288.0, 294.0, 282.0, 252.0, 273.0, 302.0, 282.0, 290.0, 283.0, 177.0, 185.0, 290.0, 292.0, 299.0, 283.0, 249.0, 273.0, 296.0, 283.0, 287.0, 286.0, 289.0, 284.0, 290.0, 283.0, 280.0, 293.0, 215.0, 221.0, 261.0, 266.0, 283.0, 296.0, 253.0, 260.0, 301.0, 272.0, 292.0, 284.0, 251.0, 271.0, 298.0, 281.0, 253.0, 272.0, 291.0, 288.0, 290.0, 286.0, 291.0, 291.0, 297.0, 282.0, 290.0, 289.0, 297.0, 282.0, 238.0, 221.0, 290.0, 283.0, 291.0, 285.0, 261.0, 257.0, 259.0, 263.0, 283.0, 290.0, 267.0, 258.0, 303.0, 273.0, 292.0, 287.0, 275.0, 250.0, 265.0, 262.0, 289.0, 290.0, 261.0, 264.0, 279.0, 291.0, 286.0, 293.0, 285.0, 285.0, 279.0, 291.0, 266.0, 264.0, 296.0, 280.0, 176.0, 175.0, 288.0, 285.0, 277.0, 302.0, 271.0, 259.0, 291.0, 288.0, 273.0, 249.0, 299.0, 271.0, 275.0, 298.0, 263.0, 270.0, 291.0, 285.0, 265.0, 260.0, 298.0, 275.0, 259.0, 271.0, 280.0, 242.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7016750382633622, "mean_inference_ms": 1.2584238626418844, "mean_action_processing_ms": 0.1343697593979121, "mean_env_wait_ms": 0.8502420561186278, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5068800, "num_agent_steps_trained": 5068800, "num_env_steps_sampled": 2534400, "num_env_steps_trained": 2534400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2534400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5068800, "timers": {"training_iteration_time_ms": 5913.439, "learn_time_ms": 1228.193, "learn_throughput": 10421.814, "synch_weights_time_ms": 15.093}, "counters": {"num_env_steps_sampled": 2534400, "num_env_steps_trained": 2534400, "num_agent_steps_sampled": 5068800, "num_agent_steps_trained": 5068800}, "done": false, "episodes_total": 6336, "training_iteration": 198, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-13-12", "timestamp": 1666581192, "time_this_iter_s": 4.2692272663116455, "time_total_s": 765.0214304924011, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 765.0214304924011, "timesteps_since_restore": 0, "iterations_since_restore": 198, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 46.93333333333334, "ram_util_percent": 21.333333333333332}}
+{"custom_metrics": {"sparse_reward_mean": 189.4, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 168.38, "shaped_reward_min": 94, "shaped_reward_max": 184, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.33, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.56, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.95, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.17, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.69, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.91, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.52, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.88, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.04, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.47, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.24, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.69, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.91, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.69, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.91, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008545387536287308, "policy_loss": -0.0011101895943284035, "vf_loss": 7.604818820953369, "vf_explained_var": 0.6474588513374329, "kl": 0.002114715985953808, "entropy": 1.0096606016159058, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2547200, "num_env_steps_trained": 2547200, "num_agent_steps_sampled": 5094400, "num_agent_steps_trained": 5094400}, "sampler_results": {"episode_reward_max": 584.0, "episode_reward_min": 294.0, "episode_reward_mean": 547.18, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 141.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 273.59}, "custom_metrics": {"sparse_reward_mean": 189.4, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 168.38, "shaped_reward_min": 94, "shaped_reward_max": 184, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.33, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.56, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.95, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.17, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.69, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.91, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.52, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.88, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.04, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.47, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.24, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.69, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.91, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.69, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.91, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 573.0, 582.0, 573.0, 519.0, 576.0, 573.0, 576.0, 525.0, 584.0, 573.0, 362.0, 582.0, 582.0, 522.0, 579.0, 573.0, 573.0, 573.0, 573.0, 436.0, 527.0, 579.0, 513.0, 573.0, 576.0, 522.0, 579.0, 525.0, 579.0, 576.0, 582.0, 579.0, 579.0, 579.0, 459.0, 573.0, 576.0, 518.0, 522.0, 573.0, 525.0, 576.0, 579.0, 525.0, 527.0, 579.0, 525.0, 570.0, 579.0, 570.0, 570.0, 530.0, 576.0, 351.0, 573.0, 579.0, 530.0, 579.0, 522.0, 570.0, 573.0, 533.0, 576.0, 525.0, 573.0, 530.0, 522.0, 522.0, 294.0, 519.0, 525.0, 570.0, 576.0, 576.0, 579.0, 370.0, 522.0, 519.0, 573.0, 530.0, 519.0, 561.0, 573.0, 525.0, 576.0, 525.0, 579.0, 573.0, 576.0, 573.0, 492.0, 573.0, 522.0, 576.0, 573.0, 573.0, 576.0, 570.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 287.0, 282.0, 291.0, 287.0, 295.0, 282.0, 291.0, 277.0, 242.0, 296.0, 280.0, 285.0, 288.0, 294.0, 282.0, 252.0, 273.0, 302.0, 282.0, 290.0, 283.0, 177.0, 185.0, 290.0, 292.0, 299.0, 283.0, 249.0, 273.0, 296.0, 283.0, 287.0, 286.0, 289.0, 284.0, 290.0, 283.0, 280.0, 293.0, 215.0, 221.0, 261.0, 266.0, 283.0, 296.0, 253.0, 260.0, 301.0, 272.0, 292.0, 284.0, 251.0, 271.0, 298.0, 281.0, 253.0, 272.0, 291.0, 288.0, 290.0, 286.0, 291.0, 291.0, 297.0, 282.0, 290.0, 289.0, 297.0, 282.0, 238.0, 221.0, 290.0, 283.0, 291.0, 285.0, 261.0, 257.0, 259.0, 263.0, 283.0, 290.0, 267.0, 258.0, 303.0, 273.0, 292.0, 287.0, 275.0, 250.0, 265.0, 262.0, 289.0, 290.0, 261.0, 264.0, 279.0, 291.0, 286.0, 293.0, 285.0, 285.0, 279.0, 291.0, 266.0, 264.0, 296.0, 280.0, 176.0, 175.0, 288.0, 285.0, 277.0, 302.0, 271.0, 259.0, 291.0, 288.0, 273.0, 249.0, 299.0, 271.0, 275.0, 298.0, 263.0, 270.0, 291.0, 285.0, 265.0, 260.0, 298.0, 275.0, 259.0, 271.0, 280.0, 242.0, 253.0, 269.0, 153.0, 141.0, 250.0, 269.0, 263.0, 262.0, 288.0, 282.0, 280.0, 296.0, 290.0, 286.0, 273.0, 306.0, 176.0, 194.0, 262.0, 260.0, 252.0, 267.0, 298.0, 275.0, 267.0, 263.0, 265.0, 254.0, 281.0, 280.0, 284.0, 289.0, 260.0, 265.0, 295.0, 281.0, 247.0, 278.0, 286.0, 293.0, 282.0, 291.0, 292.0, 284.0, 283.0, 290.0, 241.0, 251.0, 290.0, 283.0, 271.0, 251.0, 291.0, 285.0, 285.0, 288.0, 271.0, 302.0, 288.0, 288.0, 276.0, 294.0, 285.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7040670537319909, "mean_inference_ms": 1.2672877433683731, "mean_action_processing_ms": 0.13470833472821803, "mean_env_wait_ms": 0.8530001805779409, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 584.0, "episode_reward_min": 294.0, "episode_reward_mean": 547.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 141.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 273.59}, "hist_stats": {"episode_reward": [570.0, 573.0, 582.0, 573.0, 519.0, 576.0, 573.0, 576.0, 525.0, 584.0, 573.0, 362.0, 582.0, 582.0, 522.0, 579.0, 573.0, 573.0, 573.0, 573.0, 436.0, 527.0, 579.0, 513.0, 573.0, 576.0, 522.0, 579.0, 525.0, 579.0, 576.0, 582.0, 579.0, 579.0, 579.0, 459.0, 573.0, 576.0, 518.0, 522.0, 573.0, 525.0, 576.0, 579.0, 525.0, 527.0, 579.0, 525.0, 570.0, 579.0, 570.0, 570.0, 530.0, 576.0, 351.0, 573.0, 579.0, 530.0, 579.0, 522.0, 570.0, 573.0, 533.0, 576.0, 525.0, 573.0, 530.0, 522.0, 522.0, 294.0, 519.0, 525.0, 570.0, 576.0, 576.0, 579.0, 370.0, 522.0, 519.0, 573.0, 530.0, 519.0, 561.0, 573.0, 525.0, 576.0, 525.0, 579.0, 573.0, 576.0, 573.0, 492.0, 573.0, 522.0, 576.0, 573.0, 573.0, 576.0, 570.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 287.0, 282.0, 291.0, 287.0, 295.0, 282.0, 291.0, 277.0, 242.0, 296.0, 280.0, 285.0, 288.0, 294.0, 282.0, 252.0, 273.0, 302.0, 282.0, 290.0, 283.0, 177.0, 185.0, 290.0, 292.0, 299.0, 283.0, 249.0, 273.0, 296.0, 283.0, 287.0, 286.0, 289.0, 284.0, 290.0, 283.0, 280.0, 293.0, 215.0, 221.0, 261.0, 266.0, 283.0, 296.0, 253.0, 260.0, 301.0, 272.0, 292.0, 284.0, 251.0, 271.0, 298.0, 281.0, 253.0, 272.0, 291.0, 288.0, 290.0, 286.0, 291.0, 291.0, 297.0, 282.0, 290.0, 289.0, 297.0, 282.0, 238.0, 221.0, 290.0, 283.0, 291.0, 285.0, 261.0, 257.0, 259.0, 263.0, 283.0, 290.0, 267.0, 258.0, 303.0, 273.0, 292.0, 287.0, 275.0, 250.0, 265.0, 262.0, 289.0, 290.0, 261.0, 264.0, 279.0, 291.0, 286.0, 293.0, 285.0, 285.0, 279.0, 291.0, 266.0, 264.0, 296.0, 280.0, 176.0, 175.0, 288.0, 285.0, 277.0, 302.0, 271.0, 259.0, 291.0, 288.0, 273.0, 249.0, 299.0, 271.0, 275.0, 298.0, 263.0, 270.0, 291.0, 285.0, 265.0, 260.0, 298.0, 275.0, 259.0, 271.0, 280.0, 242.0, 253.0, 269.0, 153.0, 141.0, 250.0, 269.0, 263.0, 262.0, 288.0, 282.0, 280.0, 296.0, 290.0, 286.0, 273.0, 306.0, 176.0, 194.0, 262.0, 260.0, 252.0, 267.0, 298.0, 275.0, 267.0, 263.0, 265.0, 254.0, 281.0, 280.0, 284.0, 289.0, 260.0, 265.0, 295.0, 281.0, 247.0, 278.0, 286.0, 293.0, 282.0, 291.0, 292.0, 284.0, 283.0, 290.0, 241.0, 251.0, 290.0, 283.0, 271.0, 251.0, 291.0, 285.0, 285.0, 288.0, 271.0, 302.0, 288.0, 288.0, 276.0, 294.0, 285.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7040670537319909, "mean_inference_ms": 1.2672877433683731, "mean_action_processing_ms": 0.13470833472821803, "mean_env_wait_ms": 0.8530001805779409, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5094400, "num_agent_steps_trained": 5094400, "num_env_steps_sampled": 2547200, "num_env_steps_trained": 2547200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2547200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5094400, "timers": {"training_iteration_time_ms": 5977.71, "learn_time_ms": 1238.377, "learn_throughput": 10336.109, "synch_weights_time_ms": 14.996}, "counters": {"num_env_steps_sampled": 2547200, "num_env_steps_trained": 2547200, "num_agent_steps_sampled": 5094400, "num_agent_steps_trained": 5094400}, "done": false, "episodes_total": 6368, "training_iteration": 199, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-13-16", "timestamp": 1666581196, "time_this_iter_s": 4.223273992538452, "time_total_s": 769.2447044849396, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 769.2447044849396, "timesteps_since_restore": 0, "iterations_since_restore": 199, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 49.0, "ram_util_percent": 19.357142857142858}}
+{"evaluation": {"average_sparse_reward": 180.0, "num_healthy_workers": 0, "num_recreated_workers": 0}, "custom_metrics": {"sparse_reward_mean": 189.0, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 167.89, "shaped_reward_min": 94, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.35, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.39, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.02, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.99, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.81, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.77, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 5.03, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.95, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.81, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.77, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.81, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.77, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0019026012159883976, "policy_loss": -0.002160794800147414, "vf_loss": 7.632315635681152, "vf_explained_var": 0.6613626480102539, "kl": 0.0017705156933516264, "entropy": 1.0100735425949097, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2560000, "num_env_steps_trained": 2560000, "num_agent_steps_sampled": 5120000, "num_agent_steps_trained": 5120000}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 294.0, "episode_reward_mean": 545.89, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 141.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 272.945}, "custom_metrics": {"sparse_reward_mean": 189.0, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 167.89, "shaped_reward_min": 94, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.35, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.39, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.02, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.99, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.81, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.77, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 5.03, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.95, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.81, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.77, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.81, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.77, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 459.0, 573.0, 576.0, 518.0, 522.0, 573.0, 525.0, 576.0, 579.0, 525.0, 527.0, 579.0, 525.0, 570.0, 579.0, 570.0, 570.0, 530.0, 576.0, 351.0, 573.0, 579.0, 530.0, 579.0, 522.0, 570.0, 573.0, 533.0, 576.0, 525.0, 573.0, 530.0, 522.0, 522.0, 294.0, 519.0, 525.0, 570.0, 576.0, 576.0, 579.0, 370.0, 522.0, 519.0, 573.0, 530.0, 519.0, 561.0, 573.0, 525.0, 576.0, 525.0, 579.0, 573.0, 576.0, 573.0, 492.0, 573.0, 522.0, 576.0, 573.0, 573.0, 576.0, 570.0, 573.0, 582.0, 573.0, 525.0, 341.0, 573.0, 573.0, 576.0, 513.0, 533.0, 525.0, 525.0, 582.0, 530.0, 576.0, 579.0, 573.0, 582.0, 573.0, 576.0, 576.0, 576.0, 473.0, 525.0, 522.0, 530.0, 576.0, 576.0, 573.0, 525.0, 579.0, 570.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 282.0, 290.0, 289.0, 297.0, 282.0, 238.0, 221.0, 290.0, 283.0, 291.0, 285.0, 261.0, 257.0, 259.0, 263.0, 283.0, 290.0, 267.0, 258.0, 303.0, 273.0, 292.0, 287.0, 275.0, 250.0, 265.0, 262.0, 289.0, 290.0, 261.0, 264.0, 279.0, 291.0, 286.0, 293.0, 285.0, 285.0, 279.0, 291.0, 266.0, 264.0, 296.0, 280.0, 176.0, 175.0, 288.0, 285.0, 277.0, 302.0, 271.0, 259.0, 291.0, 288.0, 273.0, 249.0, 299.0, 271.0, 275.0, 298.0, 263.0, 270.0, 291.0, 285.0, 265.0, 260.0, 298.0, 275.0, 259.0, 271.0, 280.0, 242.0, 253.0, 269.0, 153.0, 141.0, 250.0, 269.0, 263.0, 262.0, 288.0, 282.0, 280.0, 296.0, 290.0, 286.0, 273.0, 306.0, 176.0, 194.0, 262.0, 260.0, 252.0, 267.0, 298.0, 275.0, 267.0, 263.0, 265.0, 254.0, 281.0, 280.0, 284.0, 289.0, 260.0, 265.0, 295.0, 281.0, 247.0, 278.0, 286.0, 293.0, 282.0, 291.0, 292.0, 284.0, 283.0, 290.0, 241.0, 251.0, 290.0, 283.0, 271.0, 251.0, 291.0, 285.0, 285.0, 288.0, 271.0, 302.0, 288.0, 288.0, 276.0, 294.0, 285.0, 288.0, 290.0, 292.0, 286.0, 287.0, 260.0, 265.0, 168.0, 173.0, 291.0, 282.0, 287.0, 286.0, 278.0, 298.0, 257.0, 256.0, 269.0, 264.0, 255.0, 270.0, 266.0, 259.0, 292.0, 290.0, 264.0, 266.0, 291.0, 285.0, 292.0, 287.0, 288.0, 285.0, 283.0, 299.0, 270.0, 303.0, 288.0, 288.0, 287.0, 289.0, 288.0, 288.0, 245.0, 228.0, 249.0, 276.0, 266.0, 256.0, 257.0, 273.0, 305.0, 271.0, 295.0, 281.0, 306.0, 267.0, 262.0, 263.0, 285.0, 294.0, 272.0, 298.0, 300.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7068202919176193, "mean_inference_ms": 1.273063021322113, "mean_action_processing_ms": 0.13496670264422456, "mean_env_wait_ms": 0.8546731088104613, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 294.0, "episode_reward_mean": 545.89, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 141.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 272.945}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 459.0, 573.0, 576.0, 518.0, 522.0, 573.0, 525.0, 576.0, 579.0, 525.0, 527.0, 579.0, 525.0, 570.0, 579.0, 570.0, 570.0, 530.0, 576.0, 351.0, 573.0, 579.0, 530.0, 579.0, 522.0, 570.0, 573.0, 533.0, 576.0, 525.0, 573.0, 530.0, 522.0, 522.0, 294.0, 519.0, 525.0, 570.0, 576.0, 576.0, 579.0, 370.0, 522.0, 519.0, 573.0, 530.0, 519.0, 561.0, 573.0, 525.0, 576.0, 525.0, 579.0, 573.0, 576.0, 573.0, 492.0, 573.0, 522.0, 576.0, 573.0, 573.0, 576.0, 570.0, 573.0, 582.0, 573.0, 525.0, 341.0, 573.0, 573.0, 576.0, 513.0, 533.0, 525.0, 525.0, 582.0, 530.0, 576.0, 579.0, 573.0, 582.0, 573.0, 576.0, 576.0, 576.0, 473.0, 525.0, 522.0, 530.0, 576.0, 576.0, 573.0, 525.0, 579.0, 570.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 282.0, 290.0, 289.0, 297.0, 282.0, 238.0, 221.0, 290.0, 283.0, 291.0, 285.0, 261.0, 257.0, 259.0, 263.0, 283.0, 290.0, 267.0, 258.0, 303.0, 273.0, 292.0, 287.0, 275.0, 250.0, 265.0, 262.0, 289.0, 290.0, 261.0, 264.0, 279.0, 291.0, 286.0, 293.0, 285.0, 285.0, 279.0, 291.0, 266.0, 264.0, 296.0, 280.0, 176.0, 175.0, 288.0, 285.0, 277.0, 302.0, 271.0, 259.0, 291.0, 288.0, 273.0, 249.0, 299.0, 271.0, 275.0, 298.0, 263.0, 270.0, 291.0, 285.0, 265.0, 260.0, 298.0, 275.0, 259.0, 271.0, 280.0, 242.0, 253.0, 269.0, 153.0, 141.0, 250.0, 269.0, 263.0, 262.0, 288.0, 282.0, 280.0, 296.0, 290.0, 286.0, 273.0, 306.0, 176.0, 194.0, 262.0, 260.0, 252.0, 267.0, 298.0, 275.0, 267.0, 263.0, 265.0, 254.0, 281.0, 280.0, 284.0, 289.0, 260.0, 265.0, 295.0, 281.0, 247.0, 278.0, 286.0, 293.0, 282.0, 291.0, 292.0, 284.0, 283.0, 290.0, 241.0, 251.0, 290.0, 283.0, 271.0, 251.0, 291.0, 285.0, 285.0, 288.0, 271.0, 302.0, 288.0, 288.0, 276.0, 294.0, 285.0, 288.0, 290.0, 292.0, 286.0, 287.0, 260.0, 265.0, 168.0, 173.0, 291.0, 282.0, 287.0, 286.0, 278.0, 298.0, 257.0, 256.0, 269.0, 264.0, 255.0, 270.0, 266.0, 259.0, 292.0, 290.0, 264.0, 266.0, 291.0, 285.0, 292.0, 287.0, 288.0, 285.0, 283.0, 299.0, 270.0, 303.0, 288.0, 288.0, 287.0, 289.0, 288.0, 288.0, 245.0, 228.0, 249.0, 276.0, 266.0, 256.0, 257.0, 273.0, 305.0, 271.0, 295.0, 281.0, 306.0, 267.0, 262.0, 263.0, 285.0, 294.0, 272.0, 298.0, 300.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7068202919176193, "mean_inference_ms": 1.273063021322113, "mean_action_processing_ms": 0.13496670264422456, "mean_env_wait_ms": 0.8546731088104613, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5120000, "num_agent_steps_trained": 5120000, "num_env_steps_sampled": 2560000, "num_env_steps_trained": 2560000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2560000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5120000, "timers": {"training_iteration_time_ms": 5798.303, "learn_time_ms": 1238.885, "learn_throughput": 10331.871, "synch_weights_time_ms": 16.153}, "counters": {"num_env_steps_sampled": 2560000, "num_env_steps_trained": 2560000, "num_agent_steps_sampled": 5120000, "num_agent_steps_trained": 5120000}, "done": false, "episodes_total": 6400, "training_iteration": 200, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-13-28", "timestamp": 1666581208, "time_this_iter_s": 12.11938214302063, "time_total_s": 781.3640866279602, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 781.3640866279602, "timesteps_since_restore": 0, "iterations_since_restore": 200, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 82.9470588235294, "ram_util_percent": 17.2764705882353}}
+{"custom_metrics": {"sparse_reward_mean": 189.2, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 168.23, "shaped_reward_min": 91, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.96, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.64, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.68, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.23, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.51, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.07, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.0, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.0, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.51, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.07, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.51, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.07, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002211037091910839, "policy_loss": 0.0019596759229898453, "vf_loss": 7.57273530960083, "vf_explained_var": 0.645828902721405, "kl": 0.005030768923461437, "entropy": 1.0118223428726196, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2572800, "num_env_steps_trained": 2572800, "num_agent_steps_sampled": 5145600, "num_agent_steps_trained": 5145600}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 251.0, "episode_reward_mean": 546.63, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 118.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 273.315}, "custom_metrics": {"sparse_reward_mean": 189.2, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 168.23, "shaped_reward_min": 91, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.96, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.64, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.68, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.23, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.51, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.07, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.0, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.0, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.51, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.07, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.51, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.07, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 573.0, 530.0, 522.0, 522.0, 294.0, 519.0, 525.0, 570.0, 576.0, 576.0, 579.0, 370.0, 522.0, 519.0, 573.0, 530.0, 519.0, 561.0, 573.0, 525.0, 576.0, 525.0, 579.0, 573.0, 576.0, 573.0, 492.0, 573.0, 522.0, 576.0, 573.0, 573.0, 576.0, 570.0, 573.0, 582.0, 573.0, 525.0, 341.0, 573.0, 573.0, 576.0, 513.0, 533.0, 525.0, 525.0, 582.0, 530.0, 576.0, 579.0, 573.0, 582.0, 573.0, 576.0, 576.0, 576.0, 473.0, 525.0, 522.0, 530.0, 576.0, 576.0, 573.0, 525.0, 579.0, 570.0, 570.0, 579.0, 530.0, 582.0, 579.0, 579.0, 251.0, 579.0, 576.0, 519.0, 573.0, 573.0, 576.0, 519.0, 579.0, 579.0, 579.0, 582.0, 516.0, 522.0, 530.0, 522.0, 579.0, 576.0, 576.0, 525.0, 576.0, 576.0, 576.0, 516.0, 573.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 260.0, 298.0, 275.0, 259.0, 271.0, 280.0, 242.0, 253.0, 269.0, 153.0, 141.0, 250.0, 269.0, 263.0, 262.0, 288.0, 282.0, 280.0, 296.0, 290.0, 286.0, 273.0, 306.0, 176.0, 194.0, 262.0, 260.0, 252.0, 267.0, 298.0, 275.0, 267.0, 263.0, 265.0, 254.0, 281.0, 280.0, 284.0, 289.0, 260.0, 265.0, 295.0, 281.0, 247.0, 278.0, 286.0, 293.0, 282.0, 291.0, 292.0, 284.0, 283.0, 290.0, 241.0, 251.0, 290.0, 283.0, 271.0, 251.0, 291.0, 285.0, 285.0, 288.0, 271.0, 302.0, 288.0, 288.0, 276.0, 294.0, 285.0, 288.0, 290.0, 292.0, 286.0, 287.0, 260.0, 265.0, 168.0, 173.0, 291.0, 282.0, 287.0, 286.0, 278.0, 298.0, 257.0, 256.0, 269.0, 264.0, 255.0, 270.0, 266.0, 259.0, 292.0, 290.0, 264.0, 266.0, 291.0, 285.0, 292.0, 287.0, 288.0, 285.0, 283.0, 299.0, 270.0, 303.0, 288.0, 288.0, 287.0, 289.0, 288.0, 288.0, 245.0, 228.0, 249.0, 276.0, 266.0, 256.0, 257.0, 273.0, 305.0, 271.0, 295.0, 281.0, 306.0, 267.0, 262.0, 263.0, 285.0, 294.0, 272.0, 298.0, 300.0, 270.0, 301.0, 278.0, 256.0, 274.0, 288.0, 294.0, 287.0, 292.0, 296.0, 283.0, 133.0, 118.0, 297.0, 282.0, 277.0, 299.0, 248.0, 271.0, 290.0, 283.0, 288.0, 285.0, 301.0, 275.0, 253.0, 266.0, 284.0, 295.0, 283.0, 296.0, 285.0, 294.0, 291.0, 291.0, 251.0, 265.0, 266.0, 256.0, 259.0, 271.0, 256.0, 266.0, 285.0, 294.0, 294.0, 282.0, 290.0, 286.0, 270.0, 255.0, 294.0, 282.0, 285.0, 291.0, 285.0, 291.0, 257.0, 259.0, 276.0, 297.0, 277.0, 299.0, 288.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7094424296946303, "mean_inference_ms": 1.277790367588252, "mean_action_processing_ms": 0.1351939188241857, "mean_env_wait_ms": 0.8561024471428149, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 251.0, "episode_reward_mean": 546.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 118.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 273.315}, "hist_stats": {"episode_reward": [525.0, 573.0, 530.0, 522.0, 522.0, 294.0, 519.0, 525.0, 570.0, 576.0, 576.0, 579.0, 370.0, 522.0, 519.0, 573.0, 530.0, 519.0, 561.0, 573.0, 525.0, 576.0, 525.0, 579.0, 573.0, 576.0, 573.0, 492.0, 573.0, 522.0, 576.0, 573.0, 573.0, 576.0, 570.0, 573.0, 582.0, 573.0, 525.0, 341.0, 573.0, 573.0, 576.0, 513.0, 533.0, 525.0, 525.0, 582.0, 530.0, 576.0, 579.0, 573.0, 582.0, 573.0, 576.0, 576.0, 576.0, 473.0, 525.0, 522.0, 530.0, 576.0, 576.0, 573.0, 525.0, 579.0, 570.0, 570.0, 579.0, 530.0, 582.0, 579.0, 579.0, 251.0, 579.0, 576.0, 519.0, 573.0, 573.0, 576.0, 519.0, 579.0, 579.0, 579.0, 582.0, 516.0, 522.0, 530.0, 522.0, 579.0, 576.0, 576.0, 525.0, 576.0, 576.0, 576.0, 516.0, 573.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 260.0, 298.0, 275.0, 259.0, 271.0, 280.0, 242.0, 253.0, 269.0, 153.0, 141.0, 250.0, 269.0, 263.0, 262.0, 288.0, 282.0, 280.0, 296.0, 290.0, 286.0, 273.0, 306.0, 176.0, 194.0, 262.0, 260.0, 252.0, 267.0, 298.0, 275.0, 267.0, 263.0, 265.0, 254.0, 281.0, 280.0, 284.0, 289.0, 260.0, 265.0, 295.0, 281.0, 247.0, 278.0, 286.0, 293.0, 282.0, 291.0, 292.0, 284.0, 283.0, 290.0, 241.0, 251.0, 290.0, 283.0, 271.0, 251.0, 291.0, 285.0, 285.0, 288.0, 271.0, 302.0, 288.0, 288.0, 276.0, 294.0, 285.0, 288.0, 290.0, 292.0, 286.0, 287.0, 260.0, 265.0, 168.0, 173.0, 291.0, 282.0, 287.0, 286.0, 278.0, 298.0, 257.0, 256.0, 269.0, 264.0, 255.0, 270.0, 266.0, 259.0, 292.0, 290.0, 264.0, 266.0, 291.0, 285.0, 292.0, 287.0, 288.0, 285.0, 283.0, 299.0, 270.0, 303.0, 288.0, 288.0, 287.0, 289.0, 288.0, 288.0, 245.0, 228.0, 249.0, 276.0, 266.0, 256.0, 257.0, 273.0, 305.0, 271.0, 295.0, 281.0, 306.0, 267.0, 262.0, 263.0, 285.0, 294.0, 272.0, 298.0, 300.0, 270.0, 301.0, 278.0, 256.0, 274.0, 288.0, 294.0, 287.0, 292.0, 296.0, 283.0, 133.0, 118.0, 297.0, 282.0, 277.0, 299.0, 248.0, 271.0, 290.0, 283.0, 288.0, 285.0, 301.0, 275.0, 253.0, 266.0, 284.0, 295.0, 283.0, 296.0, 285.0, 294.0, 291.0, 291.0, 251.0, 265.0, 266.0, 256.0, 259.0, 271.0, 256.0, 266.0, 285.0, 294.0, 294.0, 282.0, 290.0, 286.0, 270.0, 255.0, 294.0, 282.0, 285.0, 291.0, 285.0, 291.0, 257.0, 259.0, 276.0, 297.0, 277.0, 299.0, 288.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7094424296946303, "mean_inference_ms": 1.277790367588252, "mean_action_processing_ms": 0.1351939188241857, "mean_env_wait_ms": 0.8561024471428149, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5145600, "num_agent_steps_trained": 5145600, "num_env_steps_sampled": 2572800, "num_env_steps_trained": 2572800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2572800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5145600, "timers": {"training_iteration_time_ms": 5789.907, "learn_time_ms": 1229.164, "learn_throughput": 10413.58, "synch_weights_time_ms": 16.659}, "counters": {"num_env_steps_sampled": 2572800, "num_env_steps_trained": 2572800, "num_agent_steps_sampled": 5145600, "num_agent_steps_trained": 5145600}, "done": false, "episodes_total": 6432, "training_iteration": 201, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-13-33", "timestamp": 1666581213, "time_this_iter_s": 4.224009275436401, "time_total_s": 785.5880959033966, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 785.5880959033966, "timesteps_since_restore": 0, "iterations_since_restore": 201, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 47.76666666666667, "ram_util_percent": 20.93333333333333}}
+{"custom_metrics": {"sparse_reward_mean": 191.0, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 170.1, "shaped_reward_min": 91, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.26, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.6, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.99, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.23, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.04, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.05, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 5.01, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.04, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.04, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0002456922084093094, "policy_loss": -0.0005025984719395638, "vf_loss": 7.635164737701416, "vf_explained_var": 0.6201376914978027, "kl": 0.0017764116637408733, "entropy": 1.0132180452346802, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2585600, "num_env_steps_trained": 2585600, "num_agent_steps_sampled": 5171200, "num_agent_steps_trained": 5171200}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 251.0, "episode_reward_mean": 552.1, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 118.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 276.05}, "custom_metrics": {"sparse_reward_mean": 191.0, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 170.1, "shaped_reward_min": 91, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.26, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.6, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.99, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.23, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.04, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.05, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 5.01, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.04, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.04, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 576.0, 570.0, 573.0, 582.0, 573.0, 525.0, 341.0, 573.0, 573.0, 576.0, 513.0, 533.0, 525.0, 525.0, 582.0, 530.0, 576.0, 579.0, 573.0, 582.0, 573.0, 576.0, 576.0, 576.0, 473.0, 525.0, 522.0, 530.0, 576.0, 576.0, 573.0, 525.0, 579.0, 570.0, 570.0, 579.0, 530.0, 582.0, 579.0, 579.0, 251.0, 579.0, 576.0, 519.0, 573.0, 573.0, 576.0, 519.0, 579.0, 579.0, 579.0, 582.0, 516.0, 522.0, 530.0, 522.0, 579.0, 576.0, 576.0, 525.0, 576.0, 576.0, 576.0, 516.0, 573.0, 576.0, 576.0, 522.0, 462.0, 530.0, 570.0, 576.0, 522.0, 579.0, 510.0, 579.0, 576.0, 576.0, 530.0, 576.0, 576.0, 576.0, 525.0, 576.0, 576.0, 579.0, 579.0, 530.0, 573.0, 522.0, 522.0, 579.0, 459.0, 573.0, 579.0, 576.0, 582.0, 522.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [271.0, 302.0, 288.0, 288.0, 276.0, 294.0, 285.0, 288.0, 290.0, 292.0, 286.0, 287.0, 260.0, 265.0, 168.0, 173.0, 291.0, 282.0, 287.0, 286.0, 278.0, 298.0, 257.0, 256.0, 269.0, 264.0, 255.0, 270.0, 266.0, 259.0, 292.0, 290.0, 264.0, 266.0, 291.0, 285.0, 292.0, 287.0, 288.0, 285.0, 283.0, 299.0, 270.0, 303.0, 288.0, 288.0, 287.0, 289.0, 288.0, 288.0, 245.0, 228.0, 249.0, 276.0, 266.0, 256.0, 257.0, 273.0, 305.0, 271.0, 295.0, 281.0, 306.0, 267.0, 262.0, 263.0, 285.0, 294.0, 272.0, 298.0, 300.0, 270.0, 301.0, 278.0, 256.0, 274.0, 288.0, 294.0, 287.0, 292.0, 296.0, 283.0, 133.0, 118.0, 297.0, 282.0, 277.0, 299.0, 248.0, 271.0, 290.0, 283.0, 288.0, 285.0, 301.0, 275.0, 253.0, 266.0, 284.0, 295.0, 283.0, 296.0, 285.0, 294.0, 291.0, 291.0, 251.0, 265.0, 266.0, 256.0, 259.0, 271.0, 256.0, 266.0, 285.0, 294.0, 294.0, 282.0, 290.0, 286.0, 270.0, 255.0, 294.0, 282.0, 285.0, 291.0, 285.0, 291.0, 257.0, 259.0, 276.0, 297.0, 277.0, 299.0, 288.0, 288.0, 249.0, 273.0, 233.0, 229.0, 257.0, 273.0, 299.0, 271.0, 292.0, 284.0, 257.0, 265.0, 293.0, 286.0, 249.0, 261.0, 300.0, 279.0, 277.0, 299.0, 291.0, 285.0, 265.0, 265.0, 288.0, 288.0, 290.0, 286.0, 285.0, 291.0, 275.0, 250.0, 294.0, 282.0, 285.0, 291.0, 287.0, 292.0, 284.0, 295.0, 264.0, 266.0, 277.0, 296.0, 268.0, 254.0, 269.0, 253.0, 291.0, 288.0, 229.0, 230.0, 293.0, 280.0, 290.0, 289.0, 286.0, 290.0, 291.0, 291.0, 257.0, 265.0, 296.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7120554492810086, "mean_inference_ms": 1.2828197056843382, "mean_action_processing_ms": 0.13543027987221926, "mean_env_wait_ms": 0.857572884175066, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 251.0, "episode_reward_mean": 552.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 118.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 276.05}, "hist_stats": {"episode_reward": [573.0, 576.0, 570.0, 573.0, 582.0, 573.0, 525.0, 341.0, 573.0, 573.0, 576.0, 513.0, 533.0, 525.0, 525.0, 582.0, 530.0, 576.0, 579.0, 573.0, 582.0, 573.0, 576.0, 576.0, 576.0, 473.0, 525.0, 522.0, 530.0, 576.0, 576.0, 573.0, 525.0, 579.0, 570.0, 570.0, 579.0, 530.0, 582.0, 579.0, 579.0, 251.0, 579.0, 576.0, 519.0, 573.0, 573.0, 576.0, 519.0, 579.0, 579.0, 579.0, 582.0, 516.0, 522.0, 530.0, 522.0, 579.0, 576.0, 576.0, 525.0, 576.0, 576.0, 576.0, 516.0, 573.0, 576.0, 576.0, 522.0, 462.0, 530.0, 570.0, 576.0, 522.0, 579.0, 510.0, 579.0, 576.0, 576.0, 530.0, 576.0, 576.0, 576.0, 525.0, 576.0, 576.0, 579.0, 579.0, 530.0, 573.0, 522.0, 522.0, 579.0, 459.0, 573.0, 579.0, 576.0, 582.0, 522.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [271.0, 302.0, 288.0, 288.0, 276.0, 294.0, 285.0, 288.0, 290.0, 292.0, 286.0, 287.0, 260.0, 265.0, 168.0, 173.0, 291.0, 282.0, 287.0, 286.0, 278.0, 298.0, 257.0, 256.0, 269.0, 264.0, 255.0, 270.0, 266.0, 259.0, 292.0, 290.0, 264.0, 266.0, 291.0, 285.0, 292.0, 287.0, 288.0, 285.0, 283.0, 299.0, 270.0, 303.0, 288.0, 288.0, 287.0, 289.0, 288.0, 288.0, 245.0, 228.0, 249.0, 276.0, 266.0, 256.0, 257.0, 273.0, 305.0, 271.0, 295.0, 281.0, 306.0, 267.0, 262.0, 263.0, 285.0, 294.0, 272.0, 298.0, 300.0, 270.0, 301.0, 278.0, 256.0, 274.0, 288.0, 294.0, 287.0, 292.0, 296.0, 283.0, 133.0, 118.0, 297.0, 282.0, 277.0, 299.0, 248.0, 271.0, 290.0, 283.0, 288.0, 285.0, 301.0, 275.0, 253.0, 266.0, 284.0, 295.0, 283.0, 296.0, 285.0, 294.0, 291.0, 291.0, 251.0, 265.0, 266.0, 256.0, 259.0, 271.0, 256.0, 266.0, 285.0, 294.0, 294.0, 282.0, 290.0, 286.0, 270.0, 255.0, 294.0, 282.0, 285.0, 291.0, 285.0, 291.0, 257.0, 259.0, 276.0, 297.0, 277.0, 299.0, 288.0, 288.0, 249.0, 273.0, 233.0, 229.0, 257.0, 273.0, 299.0, 271.0, 292.0, 284.0, 257.0, 265.0, 293.0, 286.0, 249.0, 261.0, 300.0, 279.0, 277.0, 299.0, 291.0, 285.0, 265.0, 265.0, 288.0, 288.0, 290.0, 286.0, 285.0, 291.0, 275.0, 250.0, 294.0, 282.0, 285.0, 291.0, 287.0, 292.0, 284.0, 295.0, 264.0, 266.0, 277.0, 296.0, 268.0, 254.0, 269.0, 253.0, 291.0, 288.0, 229.0, 230.0, 293.0, 280.0, 290.0, 289.0, 286.0, 290.0, 291.0, 291.0, 257.0, 265.0, 296.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7120554492810086, "mean_inference_ms": 1.2828197056843382, "mean_action_processing_ms": 0.13543027987221926, "mean_env_wait_ms": 0.857572884175066, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5171200, "num_agent_steps_trained": 5171200, "num_env_steps_sampled": 2585600, "num_env_steps_trained": 2585600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2585600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5171200, "timers": {"training_iteration_time_ms": 5822.715, "learn_time_ms": 1227.852, "learn_throughput": 10424.712, "synch_weights_time_ms": 16.557}, "counters": {"num_env_steps_sampled": 2585600, "num_env_steps_trained": 2585600, "num_agent_steps_sampled": 5171200, "num_agent_steps_trained": 5171200}, "done": false, "episodes_total": 6464, "training_iteration": 202, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-13-37", "timestamp": 1666581217, "time_this_iter_s": 4.5087339878082275, "time_total_s": 790.0968298912048, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 790.0968298912048, "timesteps_since_restore": 0, "iterations_since_restore": 202, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 46.98571428571428, "ram_util_percent": 21.7}}
+{"custom_metrics": {"sparse_reward_mean": 191.4, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 169.69, "shaped_reward_min": 91, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.49, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.1, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.23, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.85, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.93, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.05, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.65, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.06, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.99, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.85, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.93, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.85, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.93, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0013950903667137027, "policy_loss": -0.0016577579081058502, "vf_loss": 7.6537275314331055, "vf_explained_var": 0.6278898119926453, "kl": 0.0020676562562584877, "entropy": 1.005408525466919, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2598400, "num_env_steps_trained": 2598400, "num_agent_steps_sampled": 5196800, "num_agent_steps_trained": 5196800}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 251.0, "episode_reward_mean": 552.49, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 118.0}, "policy_reward_max": {"ppo": 301.0}, "policy_reward_mean": {"ppo": 276.245}, "custom_metrics": {"sparse_reward_mean": 191.4, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 169.69, "shaped_reward_min": 91, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.49, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.1, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.23, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.85, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.93, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.05, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.65, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.06, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.99, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.85, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.93, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.85, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.93, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 579.0, 570.0, 570.0, 579.0, 530.0, 582.0, 579.0, 579.0, 251.0, 579.0, 576.0, 519.0, 573.0, 573.0, 576.0, 519.0, 579.0, 579.0, 579.0, 582.0, 516.0, 522.0, 530.0, 522.0, 579.0, 576.0, 576.0, 525.0, 576.0, 576.0, 576.0, 516.0, 573.0, 576.0, 576.0, 522.0, 462.0, 530.0, 570.0, 576.0, 522.0, 579.0, 510.0, 579.0, 576.0, 576.0, 530.0, 576.0, 576.0, 576.0, 525.0, 576.0, 576.0, 579.0, 579.0, 530.0, 573.0, 522.0, 522.0, 579.0, 459.0, 573.0, 579.0, 576.0, 582.0, 522.0, 576.0, 576.0, 570.0, 522.0, 576.0, 573.0, 522.0, 516.0, 576.0, 570.0, 570.0, 501.0, 573.0, 576.0, 473.0, 573.0, 525.0, 570.0, 576.0, 519.0, 522.0, 522.0, 570.0, 573.0, 522.0, 576.0, 576.0, 519.0, 530.0, 579.0, 570.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 263.0, 285.0, 294.0, 272.0, 298.0, 300.0, 270.0, 301.0, 278.0, 256.0, 274.0, 288.0, 294.0, 287.0, 292.0, 296.0, 283.0, 133.0, 118.0, 297.0, 282.0, 277.0, 299.0, 248.0, 271.0, 290.0, 283.0, 288.0, 285.0, 301.0, 275.0, 253.0, 266.0, 284.0, 295.0, 283.0, 296.0, 285.0, 294.0, 291.0, 291.0, 251.0, 265.0, 266.0, 256.0, 259.0, 271.0, 256.0, 266.0, 285.0, 294.0, 294.0, 282.0, 290.0, 286.0, 270.0, 255.0, 294.0, 282.0, 285.0, 291.0, 285.0, 291.0, 257.0, 259.0, 276.0, 297.0, 277.0, 299.0, 288.0, 288.0, 249.0, 273.0, 233.0, 229.0, 257.0, 273.0, 299.0, 271.0, 292.0, 284.0, 257.0, 265.0, 293.0, 286.0, 249.0, 261.0, 300.0, 279.0, 277.0, 299.0, 291.0, 285.0, 265.0, 265.0, 288.0, 288.0, 290.0, 286.0, 285.0, 291.0, 275.0, 250.0, 294.0, 282.0, 285.0, 291.0, 287.0, 292.0, 284.0, 295.0, 264.0, 266.0, 277.0, 296.0, 268.0, 254.0, 269.0, 253.0, 291.0, 288.0, 229.0, 230.0, 293.0, 280.0, 290.0, 289.0, 286.0, 290.0, 291.0, 291.0, 257.0, 265.0, 296.0, 280.0, 293.0, 283.0, 296.0, 274.0, 263.0, 259.0, 282.0, 294.0, 290.0, 283.0, 270.0, 252.0, 256.0, 260.0, 288.0, 288.0, 286.0, 284.0, 290.0, 280.0, 253.0, 248.0, 285.0, 288.0, 294.0, 282.0, 226.0, 247.0, 293.0, 280.0, 270.0, 255.0, 286.0, 284.0, 292.0, 284.0, 255.0, 264.0, 267.0, 255.0, 270.0, 252.0, 273.0, 297.0, 278.0, 295.0, 267.0, 255.0, 292.0, 284.0, 292.0, 284.0, 259.0, 260.0, 260.0, 270.0, 281.0, 298.0, 281.0, 289.0, 280.0, 293.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.712889251168771, "mean_inference_ms": 1.28390343160238, "mean_action_processing_ms": 0.13554123488560507, "mean_env_wait_ms": 0.8581576529366974, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 251.0, "episode_reward_mean": 552.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 118.0}, "policy_reward_max": {"ppo": 301.0}, "policy_reward_mean": {"ppo": 276.245}, "hist_stats": {"episode_reward": [525.0, 579.0, 570.0, 570.0, 579.0, 530.0, 582.0, 579.0, 579.0, 251.0, 579.0, 576.0, 519.0, 573.0, 573.0, 576.0, 519.0, 579.0, 579.0, 579.0, 582.0, 516.0, 522.0, 530.0, 522.0, 579.0, 576.0, 576.0, 525.0, 576.0, 576.0, 576.0, 516.0, 573.0, 576.0, 576.0, 522.0, 462.0, 530.0, 570.0, 576.0, 522.0, 579.0, 510.0, 579.0, 576.0, 576.0, 530.0, 576.0, 576.0, 576.0, 525.0, 576.0, 576.0, 579.0, 579.0, 530.0, 573.0, 522.0, 522.0, 579.0, 459.0, 573.0, 579.0, 576.0, 582.0, 522.0, 576.0, 576.0, 570.0, 522.0, 576.0, 573.0, 522.0, 516.0, 576.0, 570.0, 570.0, 501.0, 573.0, 576.0, 473.0, 573.0, 525.0, 570.0, 576.0, 519.0, 522.0, 522.0, 570.0, 573.0, 522.0, 576.0, 576.0, 519.0, 530.0, 579.0, 570.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 263.0, 285.0, 294.0, 272.0, 298.0, 300.0, 270.0, 301.0, 278.0, 256.0, 274.0, 288.0, 294.0, 287.0, 292.0, 296.0, 283.0, 133.0, 118.0, 297.0, 282.0, 277.0, 299.0, 248.0, 271.0, 290.0, 283.0, 288.0, 285.0, 301.0, 275.0, 253.0, 266.0, 284.0, 295.0, 283.0, 296.0, 285.0, 294.0, 291.0, 291.0, 251.0, 265.0, 266.0, 256.0, 259.0, 271.0, 256.0, 266.0, 285.0, 294.0, 294.0, 282.0, 290.0, 286.0, 270.0, 255.0, 294.0, 282.0, 285.0, 291.0, 285.0, 291.0, 257.0, 259.0, 276.0, 297.0, 277.0, 299.0, 288.0, 288.0, 249.0, 273.0, 233.0, 229.0, 257.0, 273.0, 299.0, 271.0, 292.0, 284.0, 257.0, 265.0, 293.0, 286.0, 249.0, 261.0, 300.0, 279.0, 277.0, 299.0, 291.0, 285.0, 265.0, 265.0, 288.0, 288.0, 290.0, 286.0, 285.0, 291.0, 275.0, 250.0, 294.0, 282.0, 285.0, 291.0, 287.0, 292.0, 284.0, 295.0, 264.0, 266.0, 277.0, 296.0, 268.0, 254.0, 269.0, 253.0, 291.0, 288.0, 229.0, 230.0, 293.0, 280.0, 290.0, 289.0, 286.0, 290.0, 291.0, 291.0, 257.0, 265.0, 296.0, 280.0, 293.0, 283.0, 296.0, 274.0, 263.0, 259.0, 282.0, 294.0, 290.0, 283.0, 270.0, 252.0, 256.0, 260.0, 288.0, 288.0, 286.0, 284.0, 290.0, 280.0, 253.0, 248.0, 285.0, 288.0, 294.0, 282.0, 226.0, 247.0, 293.0, 280.0, 270.0, 255.0, 286.0, 284.0, 292.0, 284.0, 255.0, 264.0, 267.0, 255.0, 270.0, 252.0, 273.0, 297.0, 278.0, 295.0, 267.0, 255.0, 292.0, 284.0, 292.0, 284.0, 259.0, 260.0, 260.0, 270.0, 281.0, 298.0, 281.0, 289.0, 280.0, 293.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.712889251168771, "mean_inference_ms": 1.28390343160238, "mean_action_processing_ms": 0.13554123488560507, "mean_env_wait_ms": 0.8581576529366974, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5196800, "num_agent_steps_trained": 5196800, "num_env_steps_sampled": 2598400, "num_env_steps_trained": 2598400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2598400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5196800, "timers": {"training_iteration_time_ms": 5768.202, "learn_time_ms": 1219.407, "learn_throughput": 10496.902, "synch_weights_time_ms": 16.577}, "counters": {"num_env_steps_sampled": 2598400, "num_env_steps_trained": 2598400, "num_agent_steps_sampled": 5196800, "num_agent_steps_trained": 5196800}, "done": false, "episodes_total": 6496, "training_iteration": 203, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-13-41", "timestamp": 1666581221, "time_this_iter_s": 3.7892675399780273, "time_total_s": 793.8860974311829, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 793.8860974311829, "timesteps_since_restore": 0, "iterations_since_restore": 203, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.3, "ram_util_percent": 22.016666666666666}}
+{"custom_metrics": {"sparse_reward_mean": 191.6, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.32, "shaped_reward_min": 139, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.93, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.98, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.55, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.71, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.29, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.46, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.23, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.84, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.9, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.81, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.83, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.29, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.46, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.29, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.46, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00187534443102777, "policy_loss": -0.0021297007333487272, "vf_loss": 7.590843677520752, "vf_explained_var": 0.6290745139122009, "kl": 0.002140925731509924, "entropy": 1.0094540119171143, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2611200, "num_env_steps_trained": 2611200, "num_agent_steps_sampled": 5222400, "num_agent_steps_trained": 5222400}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 459.0, "episode_reward_mean": 552.52, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 300.0}, "policy_reward_mean": {"ppo": 276.26}, "custom_metrics": {"sparse_reward_mean": 191.6, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.32, "shaped_reward_min": 139, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.93, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.98, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.55, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.71, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.29, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.46, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.23, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.84, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.9, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.81, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.83, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.29, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.46, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.29, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.46, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [516.0, 573.0, 576.0, 576.0, 522.0, 462.0, 530.0, 570.0, 576.0, 522.0, 579.0, 510.0, 579.0, 576.0, 576.0, 530.0, 576.0, 576.0, 576.0, 525.0, 576.0, 576.0, 579.0, 579.0, 530.0, 573.0, 522.0, 522.0, 579.0, 459.0, 573.0, 579.0, 576.0, 582.0, 522.0, 576.0, 576.0, 570.0, 522.0, 576.0, 573.0, 522.0, 516.0, 576.0, 570.0, 570.0, 501.0, 573.0, 576.0, 473.0, 573.0, 525.0, 570.0, 576.0, 519.0, 522.0, 522.0, 570.0, 573.0, 522.0, 576.0, 576.0, 519.0, 530.0, 579.0, 570.0, 573.0, 579.0, 570.0, 519.0, 579.0, 579.0, 525.0, 573.0, 522.0, 579.0, 582.0, 573.0, 522.0, 522.0, 522.0, 573.0, 576.0, 573.0, 567.0, 570.0, 525.0, 519.0, 525.0, 573.0, 582.0, 573.0, 573.0, 459.0, 522.0, 576.0, 522.0, 579.0, 525.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 259.0, 276.0, 297.0, 277.0, 299.0, 288.0, 288.0, 249.0, 273.0, 233.0, 229.0, 257.0, 273.0, 299.0, 271.0, 292.0, 284.0, 257.0, 265.0, 293.0, 286.0, 249.0, 261.0, 300.0, 279.0, 277.0, 299.0, 291.0, 285.0, 265.0, 265.0, 288.0, 288.0, 290.0, 286.0, 285.0, 291.0, 275.0, 250.0, 294.0, 282.0, 285.0, 291.0, 287.0, 292.0, 284.0, 295.0, 264.0, 266.0, 277.0, 296.0, 268.0, 254.0, 269.0, 253.0, 291.0, 288.0, 229.0, 230.0, 293.0, 280.0, 290.0, 289.0, 286.0, 290.0, 291.0, 291.0, 257.0, 265.0, 296.0, 280.0, 293.0, 283.0, 296.0, 274.0, 263.0, 259.0, 282.0, 294.0, 290.0, 283.0, 270.0, 252.0, 256.0, 260.0, 288.0, 288.0, 286.0, 284.0, 290.0, 280.0, 253.0, 248.0, 285.0, 288.0, 294.0, 282.0, 226.0, 247.0, 293.0, 280.0, 270.0, 255.0, 286.0, 284.0, 292.0, 284.0, 255.0, 264.0, 267.0, 255.0, 270.0, 252.0, 273.0, 297.0, 278.0, 295.0, 267.0, 255.0, 292.0, 284.0, 292.0, 284.0, 259.0, 260.0, 260.0, 270.0, 281.0, 298.0, 281.0, 289.0, 280.0, 293.0, 286.0, 293.0, 282.0, 288.0, 261.0, 258.0, 294.0, 285.0, 290.0, 289.0, 253.0, 272.0, 286.0, 287.0, 258.0, 264.0, 296.0, 283.0, 292.0, 290.0, 285.0, 288.0, 261.0, 261.0, 265.0, 257.0, 265.0, 257.0, 286.0, 287.0, 298.0, 278.0, 281.0, 292.0, 295.0, 272.0, 283.0, 287.0, 261.0, 264.0, 269.0, 250.0, 265.0, 260.0, 290.0, 283.0, 285.0, 297.0, 276.0, 297.0, 283.0, 290.0, 227.0, 232.0, 258.0, 264.0, 280.0, 296.0, 263.0, 259.0, 281.0, 298.0, 266.0, 259.0, 281.0, 295.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7131311767953434, "mean_inference_ms": 1.2843261845942395, "mean_action_processing_ms": 0.1356088421792502, "mean_env_wait_ms": 0.8584182074596766, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 459.0, "episode_reward_mean": 552.52, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 300.0}, "policy_reward_mean": {"ppo": 276.26}, "hist_stats": {"episode_reward": [516.0, 573.0, 576.0, 576.0, 522.0, 462.0, 530.0, 570.0, 576.0, 522.0, 579.0, 510.0, 579.0, 576.0, 576.0, 530.0, 576.0, 576.0, 576.0, 525.0, 576.0, 576.0, 579.0, 579.0, 530.0, 573.0, 522.0, 522.0, 579.0, 459.0, 573.0, 579.0, 576.0, 582.0, 522.0, 576.0, 576.0, 570.0, 522.0, 576.0, 573.0, 522.0, 516.0, 576.0, 570.0, 570.0, 501.0, 573.0, 576.0, 473.0, 573.0, 525.0, 570.0, 576.0, 519.0, 522.0, 522.0, 570.0, 573.0, 522.0, 576.0, 576.0, 519.0, 530.0, 579.0, 570.0, 573.0, 579.0, 570.0, 519.0, 579.0, 579.0, 525.0, 573.0, 522.0, 579.0, 582.0, 573.0, 522.0, 522.0, 522.0, 573.0, 576.0, 573.0, 567.0, 570.0, 525.0, 519.0, 525.0, 573.0, 582.0, 573.0, 573.0, 459.0, 522.0, 576.0, 522.0, 579.0, 525.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 259.0, 276.0, 297.0, 277.0, 299.0, 288.0, 288.0, 249.0, 273.0, 233.0, 229.0, 257.0, 273.0, 299.0, 271.0, 292.0, 284.0, 257.0, 265.0, 293.0, 286.0, 249.0, 261.0, 300.0, 279.0, 277.0, 299.0, 291.0, 285.0, 265.0, 265.0, 288.0, 288.0, 290.0, 286.0, 285.0, 291.0, 275.0, 250.0, 294.0, 282.0, 285.0, 291.0, 287.0, 292.0, 284.0, 295.0, 264.0, 266.0, 277.0, 296.0, 268.0, 254.0, 269.0, 253.0, 291.0, 288.0, 229.0, 230.0, 293.0, 280.0, 290.0, 289.0, 286.0, 290.0, 291.0, 291.0, 257.0, 265.0, 296.0, 280.0, 293.0, 283.0, 296.0, 274.0, 263.0, 259.0, 282.0, 294.0, 290.0, 283.0, 270.0, 252.0, 256.0, 260.0, 288.0, 288.0, 286.0, 284.0, 290.0, 280.0, 253.0, 248.0, 285.0, 288.0, 294.0, 282.0, 226.0, 247.0, 293.0, 280.0, 270.0, 255.0, 286.0, 284.0, 292.0, 284.0, 255.0, 264.0, 267.0, 255.0, 270.0, 252.0, 273.0, 297.0, 278.0, 295.0, 267.0, 255.0, 292.0, 284.0, 292.0, 284.0, 259.0, 260.0, 260.0, 270.0, 281.0, 298.0, 281.0, 289.0, 280.0, 293.0, 286.0, 293.0, 282.0, 288.0, 261.0, 258.0, 294.0, 285.0, 290.0, 289.0, 253.0, 272.0, 286.0, 287.0, 258.0, 264.0, 296.0, 283.0, 292.0, 290.0, 285.0, 288.0, 261.0, 261.0, 265.0, 257.0, 265.0, 257.0, 286.0, 287.0, 298.0, 278.0, 281.0, 292.0, 295.0, 272.0, 283.0, 287.0, 261.0, 264.0, 269.0, 250.0, 265.0, 260.0, 290.0, 283.0, 285.0, 297.0, 276.0, 297.0, 283.0, 290.0, 227.0, 232.0, 258.0, 264.0, 280.0, 296.0, 263.0, 259.0, 281.0, 298.0, 266.0, 259.0, 281.0, 295.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7131311767953434, "mean_inference_ms": 1.2843261845942395, "mean_action_processing_ms": 0.1356088421792502, "mean_env_wait_ms": 0.8584182074596766, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5222400, "num_agent_steps_trained": 5222400, "num_env_steps_sampled": 2611200, "num_env_steps_trained": 2611200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2611200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5222400, "timers": {"training_iteration_time_ms": 5710.771, "learn_time_ms": 1195.228, "learn_throughput": 10709.252, "synch_weights_time_ms": 15.847}, "counters": {"num_env_steps_sampled": 2611200, "num_env_steps_trained": 2611200, "num_agent_steps_sampled": 5222400, "num_agent_steps_trained": 5222400}, "done": false, "episodes_total": 6528, "training_iteration": 204, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-13-45", "timestamp": 1666581225, "time_this_iter_s": 3.8779067993164062, "time_total_s": 797.7640042304993, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 797.7640042304993, "timesteps_since_restore": 0, "iterations_since_restore": 204, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 30.820000000000004, "ram_util_percent": 12.9}}
+{"custom_metrics": {"sparse_reward_mean": 190.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.0, "shaped_reward_min": 139, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.81, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.09, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.43, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.86, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.04, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.55, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.89, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.04, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.55, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.04, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.55, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 3.6696321330964565e-05, "policy_loss": -0.00021456298418343067, "vf_loss": 7.583790302276611, "vf_explained_var": 0.6205594539642334, "kl": 0.00221795542165637, "entropy": 1.0142393112182617, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2624000, "num_env_steps_trained": 2624000, "num_agent_steps_sampled": 5248000, "num_agent_steps_trained": 5248000}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 459.0, "episode_reward_mean": 550.6, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 275.3}, "custom_metrics": {"sparse_reward_mean": 190.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.0, "shaped_reward_min": 139, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.81, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.09, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.43, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.86, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.04, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.55, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.89, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.04, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.55, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.04, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.55, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 582.0, 522.0, 576.0, 576.0, 570.0, 522.0, 576.0, 573.0, 522.0, 516.0, 576.0, 570.0, 570.0, 501.0, 573.0, 576.0, 473.0, 573.0, 525.0, 570.0, 576.0, 519.0, 522.0, 522.0, 570.0, 573.0, 522.0, 576.0, 576.0, 519.0, 530.0, 579.0, 570.0, 573.0, 579.0, 570.0, 519.0, 579.0, 579.0, 525.0, 573.0, 522.0, 579.0, 582.0, 573.0, 522.0, 522.0, 522.0, 573.0, 576.0, 573.0, 567.0, 570.0, 525.0, 519.0, 525.0, 573.0, 582.0, 573.0, 573.0, 459.0, 522.0, 576.0, 522.0, 579.0, 525.0, 576.0, 519.0, 576.0, 525.0, 579.0, 533.0, 525.0, 573.0, 579.0, 570.0, 530.0, 522.0, 527.0, 525.0, 576.0, 530.0, 530.0, 525.0, 519.0, 573.0, 522.0, 518.0, 522.0, 525.0, 573.0, 570.0, 573.0, 525.0, 519.0, 576.0, 576.0, 573.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 290.0, 291.0, 291.0, 257.0, 265.0, 296.0, 280.0, 293.0, 283.0, 296.0, 274.0, 263.0, 259.0, 282.0, 294.0, 290.0, 283.0, 270.0, 252.0, 256.0, 260.0, 288.0, 288.0, 286.0, 284.0, 290.0, 280.0, 253.0, 248.0, 285.0, 288.0, 294.0, 282.0, 226.0, 247.0, 293.0, 280.0, 270.0, 255.0, 286.0, 284.0, 292.0, 284.0, 255.0, 264.0, 267.0, 255.0, 270.0, 252.0, 273.0, 297.0, 278.0, 295.0, 267.0, 255.0, 292.0, 284.0, 292.0, 284.0, 259.0, 260.0, 260.0, 270.0, 281.0, 298.0, 281.0, 289.0, 280.0, 293.0, 286.0, 293.0, 282.0, 288.0, 261.0, 258.0, 294.0, 285.0, 290.0, 289.0, 253.0, 272.0, 286.0, 287.0, 258.0, 264.0, 296.0, 283.0, 292.0, 290.0, 285.0, 288.0, 261.0, 261.0, 265.0, 257.0, 265.0, 257.0, 286.0, 287.0, 298.0, 278.0, 281.0, 292.0, 295.0, 272.0, 283.0, 287.0, 261.0, 264.0, 269.0, 250.0, 265.0, 260.0, 290.0, 283.0, 285.0, 297.0, 276.0, 297.0, 283.0, 290.0, 227.0, 232.0, 258.0, 264.0, 280.0, 296.0, 263.0, 259.0, 281.0, 298.0, 266.0, 259.0, 281.0, 295.0, 245.0, 274.0, 287.0, 289.0, 262.0, 263.0, 291.0, 288.0, 266.0, 267.0, 268.0, 257.0, 301.0, 272.0, 291.0, 288.0, 275.0, 295.0, 278.0, 252.0, 272.0, 250.0, 280.0, 247.0, 255.0, 270.0, 283.0, 293.0, 264.0, 266.0, 249.0, 281.0, 259.0, 266.0, 257.0, 262.0, 284.0, 289.0, 246.0, 276.0, 245.0, 273.0, 251.0, 271.0, 262.0, 263.0, 287.0, 286.0, 291.0, 279.0, 271.0, 302.0, 266.0, 259.0, 262.0, 257.0, 282.0, 294.0, 290.0, 286.0, 295.0, 278.0, 289.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7131083339732492, "mean_inference_ms": 1.2842183875528914, "mean_action_processing_ms": 0.13563136158971595, "mean_env_wait_ms": 0.858410746337438, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 459.0, "episode_reward_mean": 550.6, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 275.3}, "hist_stats": {"episode_reward": [576.0, 582.0, 522.0, 576.0, 576.0, 570.0, 522.0, 576.0, 573.0, 522.0, 516.0, 576.0, 570.0, 570.0, 501.0, 573.0, 576.0, 473.0, 573.0, 525.0, 570.0, 576.0, 519.0, 522.0, 522.0, 570.0, 573.0, 522.0, 576.0, 576.0, 519.0, 530.0, 579.0, 570.0, 573.0, 579.0, 570.0, 519.0, 579.0, 579.0, 525.0, 573.0, 522.0, 579.0, 582.0, 573.0, 522.0, 522.0, 522.0, 573.0, 576.0, 573.0, 567.0, 570.0, 525.0, 519.0, 525.0, 573.0, 582.0, 573.0, 573.0, 459.0, 522.0, 576.0, 522.0, 579.0, 525.0, 576.0, 519.0, 576.0, 525.0, 579.0, 533.0, 525.0, 573.0, 579.0, 570.0, 530.0, 522.0, 527.0, 525.0, 576.0, 530.0, 530.0, 525.0, 519.0, 573.0, 522.0, 518.0, 522.0, 525.0, 573.0, 570.0, 573.0, 525.0, 519.0, 576.0, 576.0, 573.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 290.0, 291.0, 291.0, 257.0, 265.0, 296.0, 280.0, 293.0, 283.0, 296.0, 274.0, 263.0, 259.0, 282.0, 294.0, 290.0, 283.0, 270.0, 252.0, 256.0, 260.0, 288.0, 288.0, 286.0, 284.0, 290.0, 280.0, 253.0, 248.0, 285.0, 288.0, 294.0, 282.0, 226.0, 247.0, 293.0, 280.0, 270.0, 255.0, 286.0, 284.0, 292.0, 284.0, 255.0, 264.0, 267.0, 255.0, 270.0, 252.0, 273.0, 297.0, 278.0, 295.0, 267.0, 255.0, 292.0, 284.0, 292.0, 284.0, 259.0, 260.0, 260.0, 270.0, 281.0, 298.0, 281.0, 289.0, 280.0, 293.0, 286.0, 293.0, 282.0, 288.0, 261.0, 258.0, 294.0, 285.0, 290.0, 289.0, 253.0, 272.0, 286.0, 287.0, 258.0, 264.0, 296.0, 283.0, 292.0, 290.0, 285.0, 288.0, 261.0, 261.0, 265.0, 257.0, 265.0, 257.0, 286.0, 287.0, 298.0, 278.0, 281.0, 292.0, 295.0, 272.0, 283.0, 287.0, 261.0, 264.0, 269.0, 250.0, 265.0, 260.0, 290.0, 283.0, 285.0, 297.0, 276.0, 297.0, 283.0, 290.0, 227.0, 232.0, 258.0, 264.0, 280.0, 296.0, 263.0, 259.0, 281.0, 298.0, 266.0, 259.0, 281.0, 295.0, 245.0, 274.0, 287.0, 289.0, 262.0, 263.0, 291.0, 288.0, 266.0, 267.0, 268.0, 257.0, 301.0, 272.0, 291.0, 288.0, 275.0, 295.0, 278.0, 252.0, 272.0, 250.0, 280.0, 247.0, 255.0, 270.0, 283.0, 293.0, 264.0, 266.0, 249.0, 281.0, 259.0, 266.0, 257.0, 262.0, 284.0, 289.0, 246.0, 276.0, 245.0, 273.0, 251.0, 271.0, 262.0, 263.0, 287.0, 286.0, 291.0, 279.0, 271.0, 302.0, 266.0, 259.0, 262.0, 257.0, 282.0, 294.0, 290.0, 286.0, 295.0, 278.0, 289.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7131083339732492, "mean_inference_ms": 1.2842183875528914, "mean_action_processing_ms": 0.13563136158971595, "mean_env_wait_ms": 0.858410746337438, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5248000, "num_agent_steps_trained": 5248000, "num_env_steps_sampled": 2624000, "num_env_steps_trained": 2624000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2624000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5248000, "timers": {"training_iteration_time_ms": 5242.623, "learn_time_ms": 1190.196, "learn_throughput": 10754.527, "synch_weights_time_ms": 15.664}, "counters": {"num_env_steps_sampled": 2624000, "num_env_steps_trained": 2624000, "num_agent_steps_sampled": 5248000, "num_agent_steps_trained": 5248000}, "done": false, "episodes_total": 6560, "training_iteration": 205, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-13-49", "timestamp": 1666581229, "time_this_iter_s": 3.7952888011932373, "time_total_s": 801.5592930316925, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 801.5592930316925, "timesteps_since_restore": 0, "iterations_since_restore": 205, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.25, "ram_util_percent": 10.633333333333333}}
+{"custom_metrics": {"sparse_reward_mean": 191.2, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.83, "shaped_reward_min": 139, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.0, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.96, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.56, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.67, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.37, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.29, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.42, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.28, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.77, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.81, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.29, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.42, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.29, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.42, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009396851528435946, "policy_loss": -0.0011760840425267816, "vf_loss": 7.405368804931641, "vf_explained_var": 0.6177560091018677, "kl": 0.002221380826085806, "entropy": 1.0082740783691406, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2636800, "num_env_steps_trained": 2636800, "num_agent_steps_sampled": 5273600, "num_agent_steps_trained": 5273600}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 459.0, "episode_reward_mean": 552.23, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 227.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 276.115}, "custom_metrics": {"sparse_reward_mean": 191.2, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.83, "shaped_reward_min": 139, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.0, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.96, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.56, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.67, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.37, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.29, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.42, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.28, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.77, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.81, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.29, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.42, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.29, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.42, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 570.0, 573.0, 579.0, 570.0, 519.0, 579.0, 579.0, 525.0, 573.0, 522.0, 579.0, 582.0, 573.0, 522.0, 522.0, 522.0, 573.0, 576.0, 573.0, 567.0, 570.0, 525.0, 519.0, 525.0, 573.0, 582.0, 573.0, 573.0, 459.0, 522.0, 576.0, 522.0, 579.0, 525.0, 576.0, 519.0, 576.0, 525.0, 579.0, 533.0, 525.0, 573.0, 579.0, 570.0, 530.0, 522.0, 527.0, 525.0, 576.0, 530.0, 530.0, 525.0, 519.0, 573.0, 522.0, 518.0, 522.0, 525.0, 573.0, 570.0, 573.0, 525.0, 519.0, 576.0, 576.0, 573.0, 573.0, 573.0, 573.0, 573.0, 516.0, 530.0, 522.0, 579.0, 579.0, 570.0, 582.0, 576.0, 522.0, 573.0, 576.0, 525.0, 519.0, 576.0, 581.0, 579.0, 519.0, 570.0, 573.0, 573.0, 530.0, 525.0, 576.0, 519.0, 530.0, 522.0, 573.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 298.0, 281.0, 289.0, 280.0, 293.0, 286.0, 293.0, 282.0, 288.0, 261.0, 258.0, 294.0, 285.0, 290.0, 289.0, 253.0, 272.0, 286.0, 287.0, 258.0, 264.0, 296.0, 283.0, 292.0, 290.0, 285.0, 288.0, 261.0, 261.0, 265.0, 257.0, 265.0, 257.0, 286.0, 287.0, 298.0, 278.0, 281.0, 292.0, 295.0, 272.0, 283.0, 287.0, 261.0, 264.0, 269.0, 250.0, 265.0, 260.0, 290.0, 283.0, 285.0, 297.0, 276.0, 297.0, 283.0, 290.0, 227.0, 232.0, 258.0, 264.0, 280.0, 296.0, 263.0, 259.0, 281.0, 298.0, 266.0, 259.0, 281.0, 295.0, 245.0, 274.0, 287.0, 289.0, 262.0, 263.0, 291.0, 288.0, 266.0, 267.0, 268.0, 257.0, 301.0, 272.0, 291.0, 288.0, 275.0, 295.0, 278.0, 252.0, 272.0, 250.0, 280.0, 247.0, 255.0, 270.0, 283.0, 293.0, 264.0, 266.0, 249.0, 281.0, 259.0, 266.0, 257.0, 262.0, 284.0, 289.0, 246.0, 276.0, 245.0, 273.0, 251.0, 271.0, 262.0, 263.0, 287.0, 286.0, 291.0, 279.0, 271.0, 302.0, 266.0, 259.0, 262.0, 257.0, 282.0, 294.0, 290.0, 286.0, 295.0, 278.0, 289.0, 284.0, 287.0, 286.0, 292.0, 281.0, 283.0, 290.0, 253.0, 263.0, 268.0, 262.0, 265.0, 257.0, 285.0, 294.0, 297.0, 282.0, 297.0, 273.0, 292.0, 290.0, 290.0, 286.0, 263.0, 259.0, 280.0, 293.0, 282.0, 294.0, 253.0, 272.0, 265.0, 254.0, 278.0, 298.0, 297.0, 284.0, 295.0, 284.0, 268.0, 251.0, 290.0, 280.0, 299.0, 274.0, 288.0, 285.0, 267.0, 263.0, 267.0, 258.0, 285.0, 291.0, 251.0, 268.0, 269.0, 261.0, 261.0, 261.0, 287.0, 286.0, 289.0, 287.0, 284.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.713028566024557, "mean_inference_ms": 1.2839819542750601, "mean_action_processing_ms": 0.13563662092487597, "mean_env_wait_ms": 0.8583015391835335, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 459.0, "episode_reward_mean": 552.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 227.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 276.115}, "hist_stats": {"episode_reward": [579.0, 570.0, 573.0, 579.0, 570.0, 519.0, 579.0, 579.0, 525.0, 573.0, 522.0, 579.0, 582.0, 573.0, 522.0, 522.0, 522.0, 573.0, 576.0, 573.0, 567.0, 570.0, 525.0, 519.0, 525.0, 573.0, 582.0, 573.0, 573.0, 459.0, 522.0, 576.0, 522.0, 579.0, 525.0, 576.0, 519.0, 576.0, 525.0, 579.0, 533.0, 525.0, 573.0, 579.0, 570.0, 530.0, 522.0, 527.0, 525.0, 576.0, 530.0, 530.0, 525.0, 519.0, 573.0, 522.0, 518.0, 522.0, 525.0, 573.0, 570.0, 573.0, 525.0, 519.0, 576.0, 576.0, 573.0, 573.0, 573.0, 573.0, 573.0, 516.0, 530.0, 522.0, 579.0, 579.0, 570.0, 582.0, 576.0, 522.0, 573.0, 576.0, 525.0, 519.0, 576.0, 581.0, 579.0, 519.0, 570.0, 573.0, 573.0, 530.0, 525.0, 576.0, 519.0, 530.0, 522.0, 573.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 298.0, 281.0, 289.0, 280.0, 293.0, 286.0, 293.0, 282.0, 288.0, 261.0, 258.0, 294.0, 285.0, 290.0, 289.0, 253.0, 272.0, 286.0, 287.0, 258.0, 264.0, 296.0, 283.0, 292.0, 290.0, 285.0, 288.0, 261.0, 261.0, 265.0, 257.0, 265.0, 257.0, 286.0, 287.0, 298.0, 278.0, 281.0, 292.0, 295.0, 272.0, 283.0, 287.0, 261.0, 264.0, 269.0, 250.0, 265.0, 260.0, 290.0, 283.0, 285.0, 297.0, 276.0, 297.0, 283.0, 290.0, 227.0, 232.0, 258.0, 264.0, 280.0, 296.0, 263.0, 259.0, 281.0, 298.0, 266.0, 259.0, 281.0, 295.0, 245.0, 274.0, 287.0, 289.0, 262.0, 263.0, 291.0, 288.0, 266.0, 267.0, 268.0, 257.0, 301.0, 272.0, 291.0, 288.0, 275.0, 295.0, 278.0, 252.0, 272.0, 250.0, 280.0, 247.0, 255.0, 270.0, 283.0, 293.0, 264.0, 266.0, 249.0, 281.0, 259.0, 266.0, 257.0, 262.0, 284.0, 289.0, 246.0, 276.0, 245.0, 273.0, 251.0, 271.0, 262.0, 263.0, 287.0, 286.0, 291.0, 279.0, 271.0, 302.0, 266.0, 259.0, 262.0, 257.0, 282.0, 294.0, 290.0, 286.0, 295.0, 278.0, 289.0, 284.0, 287.0, 286.0, 292.0, 281.0, 283.0, 290.0, 253.0, 263.0, 268.0, 262.0, 265.0, 257.0, 285.0, 294.0, 297.0, 282.0, 297.0, 273.0, 292.0, 290.0, 290.0, 286.0, 263.0, 259.0, 280.0, 293.0, 282.0, 294.0, 253.0, 272.0, 265.0, 254.0, 278.0, 298.0, 297.0, 284.0, 295.0, 284.0, 268.0, 251.0, 290.0, 280.0, 299.0, 274.0, 288.0, 285.0, 267.0, 263.0, 267.0, 258.0, 285.0, 291.0, 251.0, 268.0, 269.0, 261.0, 261.0, 261.0, 287.0, 286.0, 289.0, 287.0, 284.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.713028566024557, "mean_inference_ms": 1.2839819542750601, "mean_action_processing_ms": 0.13563662092487597, "mean_env_wait_ms": 0.8583015391835335, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5273600, "num_agent_steps_trained": 5273600, "num_env_steps_sampled": 2636800, "num_env_steps_trained": 2636800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2636800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5273600, "timers": {"training_iteration_time_ms": 5145.504, "learn_time_ms": 1162.921, "learn_throughput": 11006.77, "synch_weights_time_ms": 14.404}, "counters": {"num_env_steps_sampled": 2636800, "num_env_steps_trained": 2636800, "num_agent_steps_sampled": 5273600, "num_agent_steps_trained": 5273600}, "done": false, "episodes_total": 6592, "training_iteration": 206, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-13-53", "timestamp": 1666581233, "time_this_iter_s": 3.7356982231140137, "time_total_s": 805.2949912548065, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 805.2949912548065, "timesteps_since_restore": 0, "iterations_since_restore": 206, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.183333333333334, "ram_util_percent": 10.616666666666665}}
+{"custom_metrics": {"sparse_reward_mean": 189.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.4, "shaped_reward_min": 148, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.85, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.01, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.45, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.71, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.15, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.48, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.9, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.26, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.87, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.15, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.48, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.15, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.48, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0016882693162187934, "policy_loss": -0.0019414026755839586, "vf_loss": 7.55963134765625, "vf_explained_var": 0.6160889863967896, "kl": 0.0016779176658019423, "entropy": 1.0056562423706055, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2649600, "num_env_steps_trained": 2649600, "num_agent_steps_sampled": 5299200, "num_agent_steps_trained": 5299200}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 468.0, "episode_reward_mean": 549.0, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 274.5}, "custom_metrics": {"sparse_reward_mean": 189.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.4, "shaped_reward_min": 148, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.85, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.01, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.45, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.71, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.15, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.48, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.9, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.26, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.87, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.15, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.48, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.15, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.48, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 579.0, 525.0, 576.0, 519.0, 576.0, 525.0, 579.0, 533.0, 525.0, 573.0, 579.0, 570.0, 530.0, 522.0, 527.0, 525.0, 576.0, 530.0, 530.0, 525.0, 519.0, 573.0, 522.0, 518.0, 522.0, 525.0, 573.0, 570.0, 573.0, 525.0, 519.0, 576.0, 576.0, 573.0, 573.0, 573.0, 573.0, 573.0, 516.0, 530.0, 522.0, 579.0, 579.0, 570.0, 582.0, 576.0, 522.0, 573.0, 576.0, 525.0, 519.0, 576.0, 581.0, 579.0, 519.0, 570.0, 573.0, 573.0, 530.0, 525.0, 576.0, 519.0, 530.0, 522.0, 573.0, 576.0, 576.0, 579.0, 525.0, 522.0, 522.0, 513.0, 479.0, 468.0, 576.0, 579.0, 573.0, 519.0, 522.0, 573.0, 530.0, 530.0, 522.0, 576.0, 573.0, 576.0, 522.0, 579.0, 579.0, 576.0, 576.0, 576.0, 522.0, 513.0, 527.0, 530.0, 525.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [263.0, 259.0, 281.0, 298.0, 266.0, 259.0, 281.0, 295.0, 245.0, 274.0, 287.0, 289.0, 262.0, 263.0, 291.0, 288.0, 266.0, 267.0, 268.0, 257.0, 301.0, 272.0, 291.0, 288.0, 275.0, 295.0, 278.0, 252.0, 272.0, 250.0, 280.0, 247.0, 255.0, 270.0, 283.0, 293.0, 264.0, 266.0, 249.0, 281.0, 259.0, 266.0, 257.0, 262.0, 284.0, 289.0, 246.0, 276.0, 245.0, 273.0, 251.0, 271.0, 262.0, 263.0, 287.0, 286.0, 291.0, 279.0, 271.0, 302.0, 266.0, 259.0, 262.0, 257.0, 282.0, 294.0, 290.0, 286.0, 295.0, 278.0, 289.0, 284.0, 287.0, 286.0, 292.0, 281.0, 283.0, 290.0, 253.0, 263.0, 268.0, 262.0, 265.0, 257.0, 285.0, 294.0, 297.0, 282.0, 297.0, 273.0, 292.0, 290.0, 290.0, 286.0, 263.0, 259.0, 280.0, 293.0, 282.0, 294.0, 253.0, 272.0, 265.0, 254.0, 278.0, 298.0, 297.0, 284.0, 295.0, 284.0, 268.0, 251.0, 290.0, 280.0, 299.0, 274.0, 288.0, 285.0, 267.0, 263.0, 267.0, 258.0, 285.0, 291.0, 251.0, 268.0, 269.0, 261.0, 261.0, 261.0, 287.0, 286.0, 289.0, 287.0, 284.0, 292.0, 274.0, 305.0, 261.0, 264.0, 261.0, 261.0, 262.0, 260.0, 272.0, 241.0, 229.0, 250.0, 240.0, 228.0, 299.0, 277.0, 283.0, 296.0, 280.0, 293.0, 280.0, 239.0, 252.0, 270.0, 279.0, 294.0, 263.0, 267.0, 269.0, 261.0, 266.0, 256.0, 299.0, 277.0, 290.0, 283.0, 285.0, 291.0, 261.0, 261.0, 293.0, 286.0, 299.0, 280.0, 286.0, 290.0, 286.0, 290.0, 281.0, 295.0, 261.0, 261.0, 236.0, 277.0, 256.0, 271.0, 273.0, 257.0, 269.0, 256.0, 286.0, 287.0, 277.0, 299.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7129065882467726, "mean_inference_ms": 1.2836145170529432, "mean_action_processing_ms": 0.13563081346771022, "mean_env_wait_ms": 0.8581502987526111, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 468.0, "episode_reward_mean": 549.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 274.5}, "hist_stats": {"episode_reward": [522.0, 579.0, 525.0, 576.0, 519.0, 576.0, 525.0, 579.0, 533.0, 525.0, 573.0, 579.0, 570.0, 530.0, 522.0, 527.0, 525.0, 576.0, 530.0, 530.0, 525.0, 519.0, 573.0, 522.0, 518.0, 522.0, 525.0, 573.0, 570.0, 573.0, 525.0, 519.0, 576.0, 576.0, 573.0, 573.0, 573.0, 573.0, 573.0, 516.0, 530.0, 522.0, 579.0, 579.0, 570.0, 582.0, 576.0, 522.0, 573.0, 576.0, 525.0, 519.0, 576.0, 581.0, 579.0, 519.0, 570.0, 573.0, 573.0, 530.0, 525.0, 576.0, 519.0, 530.0, 522.0, 573.0, 576.0, 576.0, 579.0, 525.0, 522.0, 522.0, 513.0, 479.0, 468.0, 576.0, 579.0, 573.0, 519.0, 522.0, 573.0, 530.0, 530.0, 522.0, 576.0, 573.0, 576.0, 522.0, 579.0, 579.0, 576.0, 576.0, 576.0, 522.0, 513.0, 527.0, 530.0, 525.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [263.0, 259.0, 281.0, 298.0, 266.0, 259.0, 281.0, 295.0, 245.0, 274.0, 287.0, 289.0, 262.0, 263.0, 291.0, 288.0, 266.0, 267.0, 268.0, 257.0, 301.0, 272.0, 291.0, 288.0, 275.0, 295.0, 278.0, 252.0, 272.0, 250.0, 280.0, 247.0, 255.0, 270.0, 283.0, 293.0, 264.0, 266.0, 249.0, 281.0, 259.0, 266.0, 257.0, 262.0, 284.0, 289.0, 246.0, 276.0, 245.0, 273.0, 251.0, 271.0, 262.0, 263.0, 287.0, 286.0, 291.0, 279.0, 271.0, 302.0, 266.0, 259.0, 262.0, 257.0, 282.0, 294.0, 290.0, 286.0, 295.0, 278.0, 289.0, 284.0, 287.0, 286.0, 292.0, 281.0, 283.0, 290.0, 253.0, 263.0, 268.0, 262.0, 265.0, 257.0, 285.0, 294.0, 297.0, 282.0, 297.0, 273.0, 292.0, 290.0, 290.0, 286.0, 263.0, 259.0, 280.0, 293.0, 282.0, 294.0, 253.0, 272.0, 265.0, 254.0, 278.0, 298.0, 297.0, 284.0, 295.0, 284.0, 268.0, 251.0, 290.0, 280.0, 299.0, 274.0, 288.0, 285.0, 267.0, 263.0, 267.0, 258.0, 285.0, 291.0, 251.0, 268.0, 269.0, 261.0, 261.0, 261.0, 287.0, 286.0, 289.0, 287.0, 284.0, 292.0, 274.0, 305.0, 261.0, 264.0, 261.0, 261.0, 262.0, 260.0, 272.0, 241.0, 229.0, 250.0, 240.0, 228.0, 299.0, 277.0, 283.0, 296.0, 280.0, 293.0, 280.0, 239.0, 252.0, 270.0, 279.0, 294.0, 263.0, 267.0, 269.0, 261.0, 266.0, 256.0, 299.0, 277.0, 290.0, 283.0, 285.0, 291.0, 261.0, 261.0, 293.0, 286.0, 299.0, 280.0, 286.0, 290.0, 286.0, 290.0, 281.0, 295.0, 261.0, 261.0, 236.0, 277.0, 256.0, 271.0, 273.0, 257.0, 269.0, 256.0, 286.0, 287.0, 277.0, 299.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7129065882467726, "mean_inference_ms": 1.2836145170529432, "mean_action_processing_ms": 0.13563081346771022, "mean_env_wait_ms": 0.8581502987526111, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5299200, "num_agent_steps_trained": 5299200, "num_env_steps_sampled": 2649600, "num_env_steps_trained": 2649600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2649600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5299200, "timers": {"training_iteration_time_ms": 4431.021, "learn_time_ms": 1160.982, "learn_throughput": 11025.15, "synch_weights_time_ms": 14.094}, "counters": {"num_env_steps_sampled": 2649600, "num_env_steps_trained": 2649600, "num_agent_steps_sampled": 5299200, "num_agent_steps_trained": 5299200}, "done": false, "episodes_total": 6624, "training_iteration": 207, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-13-57", "timestamp": 1666581237, "time_this_iter_s": 3.7433922290802, "time_total_s": 809.0383834838867, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 809.0383834838867, "timesteps_since_restore": 0, "iterations_since_restore": 207, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.380000000000003, "ram_util_percent": 10.64}}
+{"custom_metrics": {"sparse_reward_mean": 190.0, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.24, "shaped_reward_min": 148, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.71, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.15, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.32, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.75, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.08, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.52, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.02, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.71, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.98, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.71, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.86, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.08, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.52, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.08, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.52, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.004782018251717091, "policy_loss": -0.005048870109021664, "vf_loss": 7.675145149230957, "vf_explained_var": 0.6215179562568665, "kl": 0.0018105552298948169, "entropy": 1.001321792602539, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2662400, "num_env_steps_trained": 2662400, "num_agent_steps_sampled": 5324800, "num_agent_steps_trained": 5324800}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 468.0, "episode_reward_mean": 549.24, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 274.62}, "custom_metrics": {"sparse_reward_mean": 190.0, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.24, "shaped_reward_min": 148, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.71, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.15, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.32, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.75, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.08, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.52, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.02, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.71, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.98, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.71, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.86, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.08, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.52, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.08, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.52, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 576.0, 573.0, 573.0, 573.0, 573.0, 573.0, 516.0, 530.0, 522.0, 579.0, 579.0, 570.0, 582.0, 576.0, 522.0, 573.0, 576.0, 525.0, 519.0, 576.0, 581.0, 579.0, 519.0, 570.0, 573.0, 573.0, 530.0, 525.0, 576.0, 519.0, 530.0, 522.0, 573.0, 576.0, 576.0, 579.0, 525.0, 522.0, 522.0, 513.0, 479.0, 468.0, 576.0, 579.0, 573.0, 519.0, 522.0, 573.0, 530.0, 530.0, 522.0, 576.0, 573.0, 576.0, 522.0, 579.0, 579.0, 576.0, 576.0, 576.0, 522.0, 513.0, 527.0, 530.0, 525.0, 573.0, 576.0, 513.0, 570.0, 516.0, 573.0, 524.0, 522.0, 519.0, 573.0, 579.0, 527.0, 516.0, 579.0, 573.0, 522.0, 582.0, 576.0, 530.0, 522.0, 510.0, 573.0, 525.0, 582.0, 570.0, 530.0, 522.0, 576.0, 519.0, 522.0, 576.0, 533.0, 525.0, 530.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 294.0, 290.0, 286.0, 295.0, 278.0, 289.0, 284.0, 287.0, 286.0, 292.0, 281.0, 283.0, 290.0, 253.0, 263.0, 268.0, 262.0, 265.0, 257.0, 285.0, 294.0, 297.0, 282.0, 297.0, 273.0, 292.0, 290.0, 290.0, 286.0, 263.0, 259.0, 280.0, 293.0, 282.0, 294.0, 253.0, 272.0, 265.0, 254.0, 278.0, 298.0, 297.0, 284.0, 295.0, 284.0, 268.0, 251.0, 290.0, 280.0, 299.0, 274.0, 288.0, 285.0, 267.0, 263.0, 267.0, 258.0, 285.0, 291.0, 251.0, 268.0, 269.0, 261.0, 261.0, 261.0, 287.0, 286.0, 289.0, 287.0, 284.0, 292.0, 274.0, 305.0, 261.0, 264.0, 261.0, 261.0, 262.0, 260.0, 272.0, 241.0, 229.0, 250.0, 240.0, 228.0, 299.0, 277.0, 283.0, 296.0, 280.0, 293.0, 280.0, 239.0, 252.0, 270.0, 279.0, 294.0, 263.0, 267.0, 269.0, 261.0, 266.0, 256.0, 299.0, 277.0, 290.0, 283.0, 285.0, 291.0, 261.0, 261.0, 293.0, 286.0, 299.0, 280.0, 286.0, 290.0, 286.0, 290.0, 281.0, 295.0, 261.0, 261.0, 236.0, 277.0, 256.0, 271.0, 273.0, 257.0, 269.0, 256.0, 286.0, 287.0, 277.0, 299.0, 249.0, 264.0, 297.0, 273.0, 258.0, 258.0, 291.0, 282.0, 281.0, 243.0, 271.0, 251.0, 264.0, 255.0, 283.0, 290.0, 290.0, 289.0, 259.0, 268.0, 268.0, 248.0, 287.0, 292.0, 289.0, 284.0, 263.0, 259.0, 282.0, 300.0, 276.0, 300.0, 267.0, 263.0, 256.0, 266.0, 245.0, 265.0, 285.0, 288.0, 260.0, 265.0, 281.0, 301.0, 290.0, 280.0, 271.0, 259.0, 264.0, 258.0, 302.0, 274.0, 263.0, 256.0, 252.0, 270.0, 293.0, 283.0, 264.0, 269.0, 267.0, 258.0, 251.0, 279.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7127601978504233, "mean_inference_ms": 1.28335528460283, "mean_action_processing_ms": 0.135616836400308, "mean_env_wait_ms": 0.8582178172002506, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 468.0, "episode_reward_mean": 549.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 274.62}, "hist_stats": {"episode_reward": [576.0, 576.0, 573.0, 573.0, 573.0, 573.0, 573.0, 516.0, 530.0, 522.0, 579.0, 579.0, 570.0, 582.0, 576.0, 522.0, 573.0, 576.0, 525.0, 519.0, 576.0, 581.0, 579.0, 519.0, 570.0, 573.0, 573.0, 530.0, 525.0, 576.0, 519.0, 530.0, 522.0, 573.0, 576.0, 576.0, 579.0, 525.0, 522.0, 522.0, 513.0, 479.0, 468.0, 576.0, 579.0, 573.0, 519.0, 522.0, 573.0, 530.0, 530.0, 522.0, 576.0, 573.0, 576.0, 522.0, 579.0, 579.0, 576.0, 576.0, 576.0, 522.0, 513.0, 527.0, 530.0, 525.0, 573.0, 576.0, 513.0, 570.0, 516.0, 573.0, 524.0, 522.0, 519.0, 573.0, 579.0, 527.0, 516.0, 579.0, 573.0, 522.0, 582.0, 576.0, 530.0, 522.0, 510.0, 573.0, 525.0, 582.0, 570.0, 530.0, 522.0, 576.0, 519.0, 522.0, 576.0, 533.0, 525.0, 530.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 294.0, 290.0, 286.0, 295.0, 278.0, 289.0, 284.0, 287.0, 286.0, 292.0, 281.0, 283.0, 290.0, 253.0, 263.0, 268.0, 262.0, 265.0, 257.0, 285.0, 294.0, 297.0, 282.0, 297.0, 273.0, 292.0, 290.0, 290.0, 286.0, 263.0, 259.0, 280.0, 293.0, 282.0, 294.0, 253.0, 272.0, 265.0, 254.0, 278.0, 298.0, 297.0, 284.0, 295.0, 284.0, 268.0, 251.0, 290.0, 280.0, 299.0, 274.0, 288.0, 285.0, 267.0, 263.0, 267.0, 258.0, 285.0, 291.0, 251.0, 268.0, 269.0, 261.0, 261.0, 261.0, 287.0, 286.0, 289.0, 287.0, 284.0, 292.0, 274.0, 305.0, 261.0, 264.0, 261.0, 261.0, 262.0, 260.0, 272.0, 241.0, 229.0, 250.0, 240.0, 228.0, 299.0, 277.0, 283.0, 296.0, 280.0, 293.0, 280.0, 239.0, 252.0, 270.0, 279.0, 294.0, 263.0, 267.0, 269.0, 261.0, 266.0, 256.0, 299.0, 277.0, 290.0, 283.0, 285.0, 291.0, 261.0, 261.0, 293.0, 286.0, 299.0, 280.0, 286.0, 290.0, 286.0, 290.0, 281.0, 295.0, 261.0, 261.0, 236.0, 277.0, 256.0, 271.0, 273.0, 257.0, 269.0, 256.0, 286.0, 287.0, 277.0, 299.0, 249.0, 264.0, 297.0, 273.0, 258.0, 258.0, 291.0, 282.0, 281.0, 243.0, 271.0, 251.0, 264.0, 255.0, 283.0, 290.0, 290.0, 289.0, 259.0, 268.0, 268.0, 248.0, 287.0, 292.0, 289.0, 284.0, 263.0, 259.0, 282.0, 300.0, 276.0, 300.0, 267.0, 263.0, 256.0, 266.0, 245.0, 265.0, 285.0, 288.0, 260.0, 265.0, 281.0, 301.0, 290.0, 280.0, 271.0, 259.0, 264.0, 258.0, 302.0, 274.0, 263.0, 256.0, 252.0, 270.0, 293.0, 283.0, 264.0, 269.0, 267.0, 258.0, 251.0, 279.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7127601978504233, "mean_inference_ms": 1.28335528460283, "mean_action_processing_ms": 0.135616836400308, "mean_env_wait_ms": 0.8582178172002506, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5324800, "num_agent_steps_trained": 5324800, "num_env_steps_sampled": 2662400, "num_env_steps_trained": 2662400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2662400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5324800, "timers": {"training_iteration_time_ms": 4392.957, "learn_time_ms": 1154.591, "learn_throughput": 11086.175, "synch_weights_time_ms": 14.769}, "counters": {"num_env_steps_sampled": 2662400, "num_env_steps_trained": 2662400, "num_agent_steps_sampled": 5324800, "num_agent_steps_trained": 5324800}, "done": false, "episodes_total": 6656, "training_iteration": 208, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-01", "timestamp": 1666581241, "time_this_iter_s": 3.871088743209839, "time_total_s": 812.9094722270966, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 812.9094722270966, "timesteps_since_restore": 0, "iterations_since_restore": 208, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 22.099999999999998, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 188.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 168.36, "shaped_reward_min": 145, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.37, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.37, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.02, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.91, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.38, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.77, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.71, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.63, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.54, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.46, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.77, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.71, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.77, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.71, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0028461883775889874, "policy_loss": -0.003107053227722645, "vf_loss": 7.6106414794921875, "vf_explained_var": 0.6246351003646851, "kl": 0.0016450014663860202, "entropy": 1.0003979206085205, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2675200, "num_env_steps_trained": 2675200, "num_agent_steps_sampled": 5350400, "num_agent_steps_trained": 5350400}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 465.0, "episode_reward_mean": 545.96, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 227.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 272.98}, "custom_metrics": {"sparse_reward_mean": 188.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 168.36, "shaped_reward_min": 145, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.37, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.37, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.02, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.91, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.38, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.77, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.71, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.63, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.54, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.46, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.77, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.71, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.77, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.71, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 573.0, 576.0, 576.0, 579.0, 525.0, 522.0, 522.0, 513.0, 479.0, 468.0, 576.0, 579.0, 573.0, 519.0, 522.0, 573.0, 530.0, 530.0, 522.0, 576.0, 573.0, 576.0, 522.0, 579.0, 579.0, 576.0, 576.0, 576.0, 522.0, 513.0, 527.0, 530.0, 525.0, 573.0, 576.0, 513.0, 570.0, 516.0, 573.0, 524.0, 522.0, 519.0, 573.0, 579.0, 527.0, 516.0, 579.0, 573.0, 522.0, 582.0, 576.0, 530.0, 522.0, 510.0, 573.0, 525.0, 582.0, 570.0, 530.0, 522.0, 576.0, 519.0, 522.0, 576.0, 533.0, 525.0, 530.0, 579.0, 533.0, 519.0, 579.0, 525.0, 530.0, 525.0, 522.0, 465.0, 573.0, 573.0, 525.0, 579.0, 570.0, 570.0, 519.0, 525.0, 576.0, 525.0, 570.0, 525.0, 570.0, 573.0, 525.0, 573.0, 525.0, 522.0, 573.0, 567.0, 525.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 261.0, 287.0, 286.0, 289.0, 287.0, 284.0, 292.0, 274.0, 305.0, 261.0, 264.0, 261.0, 261.0, 262.0, 260.0, 272.0, 241.0, 229.0, 250.0, 240.0, 228.0, 299.0, 277.0, 283.0, 296.0, 280.0, 293.0, 280.0, 239.0, 252.0, 270.0, 279.0, 294.0, 263.0, 267.0, 269.0, 261.0, 266.0, 256.0, 299.0, 277.0, 290.0, 283.0, 285.0, 291.0, 261.0, 261.0, 293.0, 286.0, 299.0, 280.0, 286.0, 290.0, 286.0, 290.0, 281.0, 295.0, 261.0, 261.0, 236.0, 277.0, 256.0, 271.0, 273.0, 257.0, 269.0, 256.0, 286.0, 287.0, 277.0, 299.0, 249.0, 264.0, 297.0, 273.0, 258.0, 258.0, 291.0, 282.0, 281.0, 243.0, 271.0, 251.0, 264.0, 255.0, 283.0, 290.0, 290.0, 289.0, 259.0, 268.0, 268.0, 248.0, 287.0, 292.0, 289.0, 284.0, 263.0, 259.0, 282.0, 300.0, 276.0, 300.0, 267.0, 263.0, 256.0, 266.0, 245.0, 265.0, 285.0, 288.0, 260.0, 265.0, 281.0, 301.0, 290.0, 280.0, 271.0, 259.0, 264.0, 258.0, 302.0, 274.0, 263.0, 256.0, 252.0, 270.0, 293.0, 283.0, 264.0, 269.0, 267.0, 258.0, 251.0, 279.0, 286.0, 293.0, 267.0, 266.0, 259.0, 260.0, 285.0, 294.0, 256.0, 269.0, 274.0, 256.0, 271.0, 254.0, 258.0, 264.0, 238.0, 227.0, 290.0, 283.0, 287.0, 286.0, 265.0, 260.0, 283.0, 296.0, 283.0, 287.0, 288.0, 282.0, 271.0, 248.0, 265.0, 260.0, 292.0, 284.0, 279.0, 246.0, 289.0, 281.0, 266.0, 259.0, 292.0, 278.0, 293.0, 280.0, 264.0, 261.0, 292.0, 281.0, 268.0, 257.0, 267.0, 255.0, 286.0, 287.0, 278.0, 289.0, 267.0, 258.0, 287.0, 286.0, 300.0, 276.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7125994316173464, "mean_inference_ms": 1.2830898160989364, "mean_action_processing_ms": 0.1356002703740297, "mean_env_wait_ms": 0.8582668591214645, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 465.0, "episode_reward_mean": 545.96, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 227.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 272.98}, "hist_stats": {"episode_reward": [522.0, 573.0, 576.0, 576.0, 579.0, 525.0, 522.0, 522.0, 513.0, 479.0, 468.0, 576.0, 579.0, 573.0, 519.0, 522.0, 573.0, 530.0, 530.0, 522.0, 576.0, 573.0, 576.0, 522.0, 579.0, 579.0, 576.0, 576.0, 576.0, 522.0, 513.0, 527.0, 530.0, 525.0, 573.0, 576.0, 513.0, 570.0, 516.0, 573.0, 524.0, 522.0, 519.0, 573.0, 579.0, 527.0, 516.0, 579.0, 573.0, 522.0, 582.0, 576.0, 530.0, 522.0, 510.0, 573.0, 525.0, 582.0, 570.0, 530.0, 522.0, 576.0, 519.0, 522.0, 576.0, 533.0, 525.0, 530.0, 579.0, 533.0, 519.0, 579.0, 525.0, 530.0, 525.0, 522.0, 465.0, 573.0, 573.0, 525.0, 579.0, 570.0, 570.0, 519.0, 525.0, 576.0, 525.0, 570.0, 525.0, 570.0, 573.0, 525.0, 573.0, 525.0, 522.0, 573.0, 567.0, 525.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 261.0, 287.0, 286.0, 289.0, 287.0, 284.0, 292.0, 274.0, 305.0, 261.0, 264.0, 261.0, 261.0, 262.0, 260.0, 272.0, 241.0, 229.0, 250.0, 240.0, 228.0, 299.0, 277.0, 283.0, 296.0, 280.0, 293.0, 280.0, 239.0, 252.0, 270.0, 279.0, 294.0, 263.0, 267.0, 269.0, 261.0, 266.0, 256.0, 299.0, 277.0, 290.0, 283.0, 285.0, 291.0, 261.0, 261.0, 293.0, 286.0, 299.0, 280.0, 286.0, 290.0, 286.0, 290.0, 281.0, 295.0, 261.0, 261.0, 236.0, 277.0, 256.0, 271.0, 273.0, 257.0, 269.0, 256.0, 286.0, 287.0, 277.0, 299.0, 249.0, 264.0, 297.0, 273.0, 258.0, 258.0, 291.0, 282.0, 281.0, 243.0, 271.0, 251.0, 264.0, 255.0, 283.0, 290.0, 290.0, 289.0, 259.0, 268.0, 268.0, 248.0, 287.0, 292.0, 289.0, 284.0, 263.0, 259.0, 282.0, 300.0, 276.0, 300.0, 267.0, 263.0, 256.0, 266.0, 245.0, 265.0, 285.0, 288.0, 260.0, 265.0, 281.0, 301.0, 290.0, 280.0, 271.0, 259.0, 264.0, 258.0, 302.0, 274.0, 263.0, 256.0, 252.0, 270.0, 293.0, 283.0, 264.0, 269.0, 267.0, 258.0, 251.0, 279.0, 286.0, 293.0, 267.0, 266.0, 259.0, 260.0, 285.0, 294.0, 256.0, 269.0, 274.0, 256.0, 271.0, 254.0, 258.0, 264.0, 238.0, 227.0, 290.0, 283.0, 287.0, 286.0, 265.0, 260.0, 283.0, 296.0, 283.0, 287.0, 288.0, 282.0, 271.0, 248.0, 265.0, 260.0, 292.0, 284.0, 279.0, 246.0, 289.0, 281.0, 266.0, 259.0, 292.0, 278.0, 293.0, 280.0, 264.0, 261.0, 292.0, 281.0, 268.0, 257.0, 267.0, 255.0, 286.0, 287.0, 278.0, 289.0, 267.0, 258.0, 287.0, 286.0, 300.0, 276.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7125994316173464, "mean_inference_ms": 1.2830898160989364, "mean_action_processing_ms": 0.1356002703740297, "mean_env_wait_ms": 0.8582668591214645, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5350400, "num_agent_steps_trained": 5350400, "num_env_steps_sampled": 2675200, "num_env_steps_trained": 2675200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2675200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5350400, "timers": {"training_iteration_time_ms": 4348.02, "learn_time_ms": 1155.85, "learn_throughput": 11074.098, "synch_weights_time_ms": 15.261}, "counters": {"num_env_steps_sampled": 2675200, "num_env_steps_trained": 2675200, "num_agent_steps_sampled": 5350400, "num_agent_steps_trained": 5350400}, "done": false, "episodes_total": 6688, "training_iteration": 209, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-05", "timestamp": 1666581245, "time_this_iter_s": 3.7650973796844482, "time_total_s": 816.674569606781, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 816.674569606781, "timesteps_since_restore": 0, "iterations_since_restore": 209, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.34, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 189.2, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 167.94, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.03, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.63, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.57, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.12, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.38, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.91, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.79, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.4, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.25, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.39, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.15, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.32, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.38, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.91, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.38, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.91, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0021201916970312595, "policy_loss": -0.002378989476710558, "vf_loss": 7.568546772003174, "vf_explained_var": 0.6088274717330933, "kl": 0.0017179761780425906, "entropy": 0.9961112141609192, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2688000, "num_env_steps_trained": 2688000, "num_agent_steps_sampled": 5376000, "num_agent_steps_trained": 5376000}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 546.34, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 304.0}, "policy_reward_mean": {"ppo": 273.17}, "custom_metrics": {"sparse_reward_mean": 189.2, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 167.94, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.03, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.63, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.57, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.12, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.38, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.91, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.79, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.4, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.25, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.39, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.15, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.32, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.38, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.91, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.38, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.91, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 525.0, 573.0, 576.0, 513.0, 570.0, 516.0, 573.0, 524.0, 522.0, 519.0, 573.0, 579.0, 527.0, 516.0, 579.0, 573.0, 522.0, 582.0, 576.0, 530.0, 522.0, 510.0, 573.0, 525.0, 582.0, 570.0, 530.0, 522.0, 576.0, 519.0, 522.0, 576.0, 533.0, 525.0, 530.0, 579.0, 533.0, 519.0, 579.0, 525.0, 530.0, 525.0, 522.0, 465.0, 573.0, 573.0, 525.0, 579.0, 570.0, 570.0, 519.0, 525.0, 576.0, 525.0, 570.0, 525.0, 570.0, 573.0, 525.0, 573.0, 525.0, 522.0, 573.0, 567.0, 525.0, 573.0, 576.0, 519.0, 567.0, 525.0, 573.0, 576.0, 570.0, 573.0, 573.0, 576.0, 573.0, 573.0, 573.0, 573.0, 579.0, 519.0, 573.0, 570.0, 576.0, 525.0, 530.0, 573.0, 530.0, 576.0, 519.0, 573.0, 237.0, 573.0, 519.0, 525.0, 570.0, 576.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [273.0, 257.0, 269.0, 256.0, 286.0, 287.0, 277.0, 299.0, 249.0, 264.0, 297.0, 273.0, 258.0, 258.0, 291.0, 282.0, 281.0, 243.0, 271.0, 251.0, 264.0, 255.0, 283.0, 290.0, 290.0, 289.0, 259.0, 268.0, 268.0, 248.0, 287.0, 292.0, 289.0, 284.0, 263.0, 259.0, 282.0, 300.0, 276.0, 300.0, 267.0, 263.0, 256.0, 266.0, 245.0, 265.0, 285.0, 288.0, 260.0, 265.0, 281.0, 301.0, 290.0, 280.0, 271.0, 259.0, 264.0, 258.0, 302.0, 274.0, 263.0, 256.0, 252.0, 270.0, 293.0, 283.0, 264.0, 269.0, 267.0, 258.0, 251.0, 279.0, 286.0, 293.0, 267.0, 266.0, 259.0, 260.0, 285.0, 294.0, 256.0, 269.0, 274.0, 256.0, 271.0, 254.0, 258.0, 264.0, 238.0, 227.0, 290.0, 283.0, 287.0, 286.0, 265.0, 260.0, 283.0, 296.0, 283.0, 287.0, 288.0, 282.0, 271.0, 248.0, 265.0, 260.0, 292.0, 284.0, 279.0, 246.0, 289.0, 281.0, 266.0, 259.0, 292.0, 278.0, 293.0, 280.0, 264.0, 261.0, 292.0, 281.0, 268.0, 257.0, 267.0, 255.0, 286.0, 287.0, 278.0, 289.0, 267.0, 258.0, 287.0, 286.0, 300.0, 276.0, 263.0, 256.0, 284.0, 283.0, 257.0, 268.0, 284.0, 289.0, 293.0, 283.0, 304.0, 266.0, 284.0, 289.0, 283.0, 290.0, 290.0, 286.0, 284.0, 289.0, 288.0, 285.0, 297.0, 276.0, 280.0, 293.0, 288.0, 291.0, 261.0, 258.0, 287.0, 286.0, 277.0, 293.0, 289.0, 287.0, 259.0, 266.0, 271.0, 259.0, 286.0, 287.0, 259.0, 271.0, 291.0, 285.0, 253.0, 266.0, 288.0, 285.0, 117.0, 120.0, 290.0, 283.0, 273.0, 246.0, 256.0, 269.0, 282.0, 288.0, 288.0, 288.0, 265.0, 260.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7124529008996413, "mean_inference_ms": 1.2828236653647938, "mean_action_processing_ms": 0.135582557767892, "mean_env_wait_ms": 0.8583057373366506, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 546.34, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 304.0}, "policy_reward_mean": {"ppo": 273.17}, "hist_stats": {"episode_reward": [530.0, 525.0, 573.0, 576.0, 513.0, 570.0, 516.0, 573.0, 524.0, 522.0, 519.0, 573.0, 579.0, 527.0, 516.0, 579.0, 573.0, 522.0, 582.0, 576.0, 530.0, 522.0, 510.0, 573.0, 525.0, 582.0, 570.0, 530.0, 522.0, 576.0, 519.0, 522.0, 576.0, 533.0, 525.0, 530.0, 579.0, 533.0, 519.0, 579.0, 525.0, 530.0, 525.0, 522.0, 465.0, 573.0, 573.0, 525.0, 579.0, 570.0, 570.0, 519.0, 525.0, 576.0, 525.0, 570.0, 525.0, 570.0, 573.0, 525.0, 573.0, 525.0, 522.0, 573.0, 567.0, 525.0, 573.0, 576.0, 519.0, 567.0, 525.0, 573.0, 576.0, 570.0, 573.0, 573.0, 576.0, 573.0, 573.0, 573.0, 573.0, 579.0, 519.0, 573.0, 570.0, 576.0, 525.0, 530.0, 573.0, 530.0, 576.0, 519.0, 573.0, 237.0, 573.0, 519.0, 525.0, 570.0, 576.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [273.0, 257.0, 269.0, 256.0, 286.0, 287.0, 277.0, 299.0, 249.0, 264.0, 297.0, 273.0, 258.0, 258.0, 291.0, 282.0, 281.0, 243.0, 271.0, 251.0, 264.0, 255.0, 283.0, 290.0, 290.0, 289.0, 259.0, 268.0, 268.0, 248.0, 287.0, 292.0, 289.0, 284.0, 263.0, 259.0, 282.0, 300.0, 276.0, 300.0, 267.0, 263.0, 256.0, 266.0, 245.0, 265.0, 285.0, 288.0, 260.0, 265.0, 281.0, 301.0, 290.0, 280.0, 271.0, 259.0, 264.0, 258.0, 302.0, 274.0, 263.0, 256.0, 252.0, 270.0, 293.0, 283.0, 264.0, 269.0, 267.0, 258.0, 251.0, 279.0, 286.0, 293.0, 267.0, 266.0, 259.0, 260.0, 285.0, 294.0, 256.0, 269.0, 274.0, 256.0, 271.0, 254.0, 258.0, 264.0, 238.0, 227.0, 290.0, 283.0, 287.0, 286.0, 265.0, 260.0, 283.0, 296.0, 283.0, 287.0, 288.0, 282.0, 271.0, 248.0, 265.0, 260.0, 292.0, 284.0, 279.0, 246.0, 289.0, 281.0, 266.0, 259.0, 292.0, 278.0, 293.0, 280.0, 264.0, 261.0, 292.0, 281.0, 268.0, 257.0, 267.0, 255.0, 286.0, 287.0, 278.0, 289.0, 267.0, 258.0, 287.0, 286.0, 300.0, 276.0, 263.0, 256.0, 284.0, 283.0, 257.0, 268.0, 284.0, 289.0, 293.0, 283.0, 304.0, 266.0, 284.0, 289.0, 283.0, 290.0, 290.0, 286.0, 284.0, 289.0, 288.0, 285.0, 297.0, 276.0, 280.0, 293.0, 288.0, 291.0, 261.0, 258.0, 287.0, 286.0, 277.0, 293.0, 289.0, 287.0, 259.0, 266.0, 271.0, 259.0, 286.0, 287.0, 259.0, 271.0, 291.0, 285.0, 253.0, 266.0, 288.0, 285.0, 117.0, 120.0, 290.0, 283.0, 273.0, 246.0, 256.0, 269.0, 282.0, 288.0, 288.0, 288.0, 265.0, 260.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7124529008996413, "mean_inference_ms": 1.2828236653647938, "mean_action_processing_ms": 0.135582557767892, "mean_env_wait_ms": 0.8583057373366506, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5376000, "num_agent_steps_trained": 5376000, "num_env_steps_sampled": 2688000, "num_env_steps_trained": 2688000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2688000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5376000, "timers": {"training_iteration_time_ms": 3843.384, "learn_time_ms": 1149.996, "learn_throughput": 11130.47, "synch_weights_time_ms": 14.22}, "counters": {"num_env_steps_sampled": 2688000, "num_env_steps_trained": 2688000, "num_agent_steps_sampled": 5376000, "num_agent_steps_trained": 5376000}, "done": false, "episodes_total": 6720, "training_iteration": 210, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-09", "timestamp": 1666581249, "time_this_iter_s": 3.7147159576416016, "time_total_s": 820.3892855644226, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 820.3892855644226, "timesteps_since_restore": 0, "iterations_since_restore": 210, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.633333333333333, "ram_util_percent": 10.633333333333333}}
+{"custom_metrics": {"sparse_reward_mean": 186.4, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 166.85, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.1, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.31, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.72, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.87, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.35, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.32, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.4, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.67, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.48, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.43, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.31, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.4, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.67, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.4, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.67, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.003364190459251404, "policy_loss": -0.003640303621068597, "vf_loss": 7.733088493347168, "vf_explained_var": 0.6351046562194824, "kl": 0.0018460192950442433, "entropy": 0.9943915605545044, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2700800, "num_env_steps_trained": 2700800, "num_agent_steps_sampled": 5401600, "num_agent_steps_trained": 5401600}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 539.65, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 312.0}, "policy_reward_mean": {"ppo": 269.825}, "custom_metrics": {"sparse_reward_mean": 186.4, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 166.85, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.1, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.31, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.72, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.87, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.35, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.32, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.4, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.67, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.48, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.43, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.31, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.4, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.67, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.4, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.67, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 533.0, 525.0, 530.0, 579.0, 533.0, 519.0, 579.0, 525.0, 530.0, 525.0, 522.0, 465.0, 573.0, 573.0, 525.0, 579.0, 570.0, 570.0, 519.0, 525.0, 576.0, 525.0, 570.0, 525.0, 570.0, 573.0, 525.0, 573.0, 525.0, 522.0, 573.0, 567.0, 525.0, 573.0, 576.0, 519.0, 567.0, 525.0, 573.0, 576.0, 570.0, 573.0, 573.0, 576.0, 573.0, 573.0, 573.0, 573.0, 579.0, 519.0, 573.0, 570.0, 576.0, 525.0, 530.0, 573.0, 530.0, 576.0, 519.0, 573.0, 237.0, 573.0, 519.0, 525.0, 570.0, 576.0, 525.0, 525.0, 525.0, 516.0, 530.0, 573.0, 533.0, 436.0, 516.0, 530.0, 576.0, 522.0, 487.0, 516.0, 576.0, 522.0, 576.0, 533.0, 527.0, 522.0, 527.0, 482.0, 579.0, 516.0, 519.0, 570.0, 573.0, 351.0, 525.0, 525.0, 522.0, 468.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 283.0, 264.0, 269.0, 267.0, 258.0, 251.0, 279.0, 286.0, 293.0, 267.0, 266.0, 259.0, 260.0, 285.0, 294.0, 256.0, 269.0, 274.0, 256.0, 271.0, 254.0, 258.0, 264.0, 238.0, 227.0, 290.0, 283.0, 287.0, 286.0, 265.0, 260.0, 283.0, 296.0, 283.0, 287.0, 288.0, 282.0, 271.0, 248.0, 265.0, 260.0, 292.0, 284.0, 279.0, 246.0, 289.0, 281.0, 266.0, 259.0, 292.0, 278.0, 293.0, 280.0, 264.0, 261.0, 292.0, 281.0, 268.0, 257.0, 267.0, 255.0, 286.0, 287.0, 278.0, 289.0, 267.0, 258.0, 287.0, 286.0, 300.0, 276.0, 263.0, 256.0, 284.0, 283.0, 257.0, 268.0, 284.0, 289.0, 293.0, 283.0, 304.0, 266.0, 284.0, 289.0, 283.0, 290.0, 290.0, 286.0, 284.0, 289.0, 288.0, 285.0, 297.0, 276.0, 280.0, 293.0, 288.0, 291.0, 261.0, 258.0, 287.0, 286.0, 277.0, 293.0, 289.0, 287.0, 259.0, 266.0, 271.0, 259.0, 286.0, 287.0, 259.0, 271.0, 291.0, 285.0, 253.0, 266.0, 288.0, 285.0, 117.0, 120.0, 290.0, 283.0, 273.0, 246.0, 256.0, 269.0, 282.0, 288.0, 288.0, 288.0, 265.0, 260.0, 264.0, 261.0, 257.0, 268.0, 257.0, 259.0, 255.0, 275.0, 286.0, 287.0, 264.0, 269.0, 237.0, 199.0, 252.0, 264.0, 260.0, 270.0, 298.0, 278.0, 262.0, 260.0, 235.0, 252.0, 276.0, 240.0, 290.0, 286.0, 272.0, 250.0, 287.0, 289.0, 272.0, 261.0, 261.0, 266.0, 262.0, 260.0, 268.0, 259.0, 239.0, 243.0, 299.0, 280.0, 270.0, 246.0, 262.0, 257.0, 277.0, 293.0, 268.0, 305.0, 168.0, 183.0, 270.0, 255.0, 269.0, 256.0, 257.0, 265.0, 234.0, 234.0, 270.0, 312.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7123105813465136, "mean_inference_ms": 1.2824304752478446, "mean_action_processing_ms": 0.1355679315392704, "mean_env_wait_ms": 0.8581520951086739, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 539.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 312.0}, "policy_reward_mean": {"ppo": 269.825}, "hist_stats": {"episode_reward": [576.0, 533.0, 525.0, 530.0, 579.0, 533.0, 519.0, 579.0, 525.0, 530.0, 525.0, 522.0, 465.0, 573.0, 573.0, 525.0, 579.0, 570.0, 570.0, 519.0, 525.0, 576.0, 525.0, 570.0, 525.0, 570.0, 573.0, 525.0, 573.0, 525.0, 522.0, 573.0, 567.0, 525.0, 573.0, 576.0, 519.0, 567.0, 525.0, 573.0, 576.0, 570.0, 573.0, 573.0, 576.0, 573.0, 573.0, 573.0, 573.0, 579.0, 519.0, 573.0, 570.0, 576.0, 525.0, 530.0, 573.0, 530.0, 576.0, 519.0, 573.0, 237.0, 573.0, 519.0, 525.0, 570.0, 576.0, 525.0, 525.0, 525.0, 516.0, 530.0, 573.0, 533.0, 436.0, 516.0, 530.0, 576.0, 522.0, 487.0, 516.0, 576.0, 522.0, 576.0, 533.0, 527.0, 522.0, 527.0, 482.0, 579.0, 516.0, 519.0, 570.0, 573.0, 351.0, 525.0, 525.0, 522.0, 468.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 283.0, 264.0, 269.0, 267.0, 258.0, 251.0, 279.0, 286.0, 293.0, 267.0, 266.0, 259.0, 260.0, 285.0, 294.0, 256.0, 269.0, 274.0, 256.0, 271.0, 254.0, 258.0, 264.0, 238.0, 227.0, 290.0, 283.0, 287.0, 286.0, 265.0, 260.0, 283.0, 296.0, 283.0, 287.0, 288.0, 282.0, 271.0, 248.0, 265.0, 260.0, 292.0, 284.0, 279.0, 246.0, 289.0, 281.0, 266.0, 259.0, 292.0, 278.0, 293.0, 280.0, 264.0, 261.0, 292.0, 281.0, 268.0, 257.0, 267.0, 255.0, 286.0, 287.0, 278.0, 289.0, 267.0, 258.0, 287.0, 286.0, 300.0, 276.0, 263.0, 256.0, 284.0, 283.0, 257.0, 268.0, 284.0, 289.0, 293.0, 283.0, 304.0, 266.0, 284.0, 289.0, 283.0, 290.0, 290.0, 286.0, 284.0, 289.0, 288.0, 285.0, 297.0, 276.0, 280.0, 293.0, 288.0, 291.0, 261.0, 258.0, 287.0, 286.0, 277.0, 293.0, 289.0, 287.0, 259.0, 266.0, 271.0, 259.0, 286.0, 287.0, 259.0, 271.0, 291.0, 285.0, 253.0, 266.0, 288.0, 285.0, 117.0, 120.0, 290.0, 283.0, 273.0, 246.0, 256.0, 269.0, 282.0, 288.0, 288.0, 288.0, 265.0, 260.0, 264.0, 261.0, 257.0, 268.0, 257.0, 259.0, 255.0, 275.0, 286.0, 287.0, 264.0, 269.0, 237.0, 199.0, 252.0, 264.0, 260.0, 270.0, 298.0, 278.0, 262.0, 260.0, 235.0, 252.0, 276.0, 240.0, 290.0, 286.0, 272.0, 250.0, 287.0, 289.0, 272.0, 261.0, 261.0, 266.0, 262.0, 260.0, 268.0, 259.0, 239.0, 243.0, 299.0, 280.0, 270.0, 246.0, 262.0, 257.0, 277.0, 293.0, 268.0, 305.0, 168.0, 183.0, 270.0, 255.0, 269.0, 256.0, 257.0, 265.0, 234.0, 234.0, 270.0, 312.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7123105813465136, "mean_inference_ms": 1.2824304752478446, "mean_action_processing_ms": 0.1355679315392704, "mean_env_wait_ms": 0.8581520951086739, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5401600, "num_agent_steps_trained": 5401600, "num_env_steps_sampled": 2700800, "num_env_steps_trained": 2700800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2700800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5401600, "timers": {"training_iteration_time_ms": 3799.432, "learn_time_ms": 1153.936, "learn_throughput": 11092.47, "synch_weights_time_ms": 14.112}, "counters": {"num_env_steps_sampled": 2700800, "num_env_steps_trained": 2700800, "num_agent_steps_sampled": 5401600, "num_agent_steps_trained": 5401600}, "done": false, "episodes_total": 6752, "training_iteration": 211, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-13", "timestamp": 1666581253, "time_this_iter_s": 3.7709083557128906, "time_total_s": 824.1601939201355, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 824.1601939201355, "timesteps_since_restore": 0, "iterations_since_restore": 211, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.15, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 186.0, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 166.08, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.39, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.09, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.86, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.57, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.53, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.47, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.43, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.12, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.05, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.96, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.47, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.43, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.47, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.43, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00038073910400271416, "policy_loss": -0.0006448630592785776, "vf_loss": 7.668740272521973, "vf_explained_var": 0.6172520518302917, "kl": 0.00185579142998904, "entropy": 1.0054981708526611, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2713600, "num_env_steps_trained": 2713600, "num_agent_steps_sampled": 5427200, "num_agent_steps_trained": 5427200}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 538.08, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 312.0}, "policy_reward_mean": {"ppo": 269.04}, "custom_metrics": {"sparse_reward_mean": 186.0, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 166.08, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.39, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.09, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.86, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.57, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.53, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.47, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.43, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.12, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.05, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.96, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.47, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.43, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.47, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.43, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 525.0, 573.0, 576.0, 519.0, 567.0, 525.0, 573.0, 576.0, 570.0, 573.0, 573.0, 576.0, 573.0, 573.0, 573.0, 573.0, 579.0, 519.0, 573.0, 570.0, 576.0, 525.0, 530.0, 573.0, 530.0, 576.0, 519.0, 573.0, 237.0, 573.0, 519.0, 525.0, 570.0, 576.0, 525.0, 525.0, 525.0, 516.0, 530.0, 573.0, 533.0, 436.0, 516.0, 530.0, 576.0, 522.0, 487.0, 516.0, 576.0, 522.0, 576.0, 533.0, 527.0, 522.0, 527.0, 482.0, 579.0, 516.0, 519.0, 570.0, 573.0, 351.0, 525.0, 525.0, 522.0, 468.0, 582.0, 522.0, 573.0, 570.0, 519.0, 573.0, 522.0, 530.0, 522.0, 573.0, 576.0, 525.0, 459.0, 573.0, 522.0, 530.0, 570.0, 522.0, 573.0, 522.0, 527.0, 573.0, 519.0, 530.0, 519.0, 573.0, 525.0, 519.0, 519.0, 522.0, 530.0, 570.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 289.0, 267.0, 258.0, 287.0, 286.0, 300.0, 276.0, 263.0, 256.0, 284.0, 283.0, 257.0, 268.0, 284.0, 289.0, 293.0, 283.0, 304.0, 266.0, 284.0, 289.0, 283.0, 290.0, 290.0, 286.0, 284.0, 289.0, 288.0, 285.0, 297.0, 276.0, 280.0, 293.0, 288.0, 291.0, 261.0, 258.0, 287.0, 286.0, 277.0, 293.0, 289.0, 287.0, 259.0, 266.0, 271.0, 259.0, 286.0, 287.0, 259.0, 271.0, 291.0, 285.0, 253.0, 266.0, 288.0, 285.0, 117.0, 120.0, 290.0, 283.0, 273.0, 246.0, 256.0, 269.0, 282.0, 288.0, 288.0, 288.0, 265.0, 260.0, 264.0, 261.0, 257.0, 268.0, 257.0, 259.0, 255.0, 275.0, 286.0, 287.0, 264.0, 269.0, 237.0, 199.0, 252.0, 264.0, 260.0, 270.0, 298.0, 278.0, 262.0, 260.0, 235.0, 252.0, 276.0, 240.0, 290.0, 286.0, 272.0, 250.0, 287.0, 289.0, 272.0, 261.0, 261.0, 266.0, 262.0, 260.0, 268.0, 259.0, 239.0, 243.0, 299.0, 280.0, 270.0, 246.0, 262.0, 257.0, 277.0, 293.0, 268.0, 305.0, 168.0, 183.0, 270.0, 255.0, 269.0, 256.0, 257.0, 265.0, 234.0, 234.0, 270.0, 312.0, 274.0, 248.0, 291.0, 282.0, 276.0, 294.0, 253.0, 266.0, 293.0, 280.0, 256.0, 266.0, 278.0, 252.0, 259.0, 263.0, 277.0, 296.0, 282.0, 294.0, 256.0, 269.0, 232.0, 227.0, 285.0, 288.0, 261.0, 261.0, 268.0, 262.0, 279.0, 291.0, 266.0, 256.0, 290.0, 283.0, 259.0, 263.0, 276.0, 251.0, 280.0, 293.0, 268.0, 251.0, 266.0, 264.0, 242.0, 277.0, 298.0, 275.0, 258.0, 267.0, 268.0, 251.0, 257.0, 262.0, 250.0, 272.0, 258.0, 272.0, 288.0, 282.0, 281.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.712189427203816, "mean_inference_ms": 1.282044037184765, "mean_action_processing_ms": 0.13555489025797146, "mean_env_wait_ms": 0.8579692422947, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 538.08, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 312.0}, "policy_reward_mean": {"ppo": 269.04}, "hist_stats": {"episode_reward": [567.0, 525.0, 573.0, 576.0, 519.0, 567.0, 525.0, 573.0, 576.0, 570.0, 573.0, 573.0, 576.0, 573.0, 573.0, 573.0, 573.0, 579.0, 519.0, 573.0, 570.0, 576.0, 525.0, 530.0, 573.0, 530.0, 576.0, 519.0, 573.0, 237.0, 573.0, 519.0, 525.0, 570.0, 576.0, 525.0, 525.0, 525.0, 516.0, 530.0, 573.0, 533.0, 436.0, 516.0, 530.0, 576.0, 522.0, 487.0, 516.0, 576.0, 522.0, 576.0, 533.0, 527.0, 522.0, 527.0, 482.0, 579.0, 516.0, 519.0, 570.0, 573.0, 351.0, 525.0, 525.0, 522.0, 468.0, 582.0, 522.0, 573.0, 570.0, 519.0, 573.0, 522.0, 530.0, 522.0, 573.0, 576.0, 525.0, 459.0, 573.0, 522.0, 530.0, 570.0, 522.0, 573.0, 522.0, 527.0, 573.0, 519.0, 530.0, 519.0, 573.0, 525.0, 519.0, 519.0, 522.0, 530.0, 570.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 289.0, 267.0, 258.0, 287.0, 286.0, 300.0, 276.0, 263.0, 256.0, 284.0, 283.0, 257.0, 268.0, 284.0, 289.0, 293.0, 283.0, 304.0, 266.0, 284.0, 289.0, 283.0, 290.0, 290.0, 286.0, 284.0, 289.0, 288.0, 285.0, 297.0, 276.0, 280.0, 293.0, 288.0, 291.0, 261.0, 258.0, 287.0, 286.0, 277.0, 293.0, 289.0, 287.0, 259.0, 266.0, 271.0, 259.0, 286.0, 287.0, 259.0, 271.0, 291.0, 285.0, 253.0, 266.0, 288.0, 285.0, 117.0, 120.0, 290.0, 283.0, 273.0, 246.0, 256.0, 269.0, 282.0, 288.0, 288.0, 288.0, 265.0, 260.0, 264.0, 261.0, 257.0, 268.0, 257.0, 259.0, 255.0, 275.0, 286.0, 287.0, 264.0, 269.0, 237.0, 199.0, 252.0, 264.0, 260.0, 270.0, 298.0, 278.0, 262.0, 260.0, 235.0, 252.0, 276.0, 240.0, 290.0, 286.0, 272.0, 250.0, 287.0, 289.0, 272.0, 261.0, 261.0, 266.0, 262.0, 260.0, 268.0, 259.0, 239.0, 243.0, 299.0, 280.0, 270.0, 246.0, 262.0, 257.0, 277.0, 293.0, 268.0, 305.0, 168.0, 183.0, 270.0, 255.0, 269.0, 256.0, 257.0, 265.0, 234.0, 234.0, 270.0, 312.0, 274.0, 248.0, 291.0, 282.0, 276.0, 294.0, 253.0, 266.0, 293.0, 280.0, 256.0, 266.0, 278.0, 252.0, 259.0, 263.0, 277.0, 296.0, 282.0, 294.0, 256.0, 269.0, 232.0, 227.0, 285.0, 288.0, 261.0, 261.0, 268.0, 262.0, 279.0, 291.0, 266.0, 256.0, 290.0, 283.0, 259.0, 263.0, 276.0, 251.0, 280.0, 293.0, 268.0, 251.0, 266.0, 264.0, 242.0, 277.0, 298.0, 275.0, 258.0, 267.0, 268.0, 251.0, 257.0, 262.0, 250.0, 272.0, 258.0, 272.0, 288.0, 282.0, 281.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.712189427203816, "mean_inference_ms": 1.282044037184765, "mean_action_processing_ms": 0.13555489025797146, "mean_env_wait_ms": 0.8579692422947, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5427200, "num_agent_steps_trained": 5427200, "num_env_steps_sampled": 2713600, "num_env_steps_trained": 2713600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2713600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5427200, "timers": {"training_iteration_time_ms": 3723.329, "learn_time_ms": 1143.583, "learn_throughput": 11192.888, "synch_weights_time_ms": 13.75}, "counters": {"num_env_steps_sampled": 2713600, "num_env_steps_trained": 2713600, "num_agent_steps_sampled": 5427200, "num_agent_steps_trained": 5427200}, "done": false, "episodes_total": 6784, "training_iteration": 212, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-17", "timestamp": 1666581257, "time_this_iter_s": 3.75581431388855, "time_total_s": 827.916008234024, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 827.916008234024, "timesteps_since_restore": 0, "iterations_since_restore": 212, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.74, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 185.8, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 166.06, "shaped_reward_min": 111, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.14, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.3, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.74, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.82, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.48, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.29, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.68, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.53, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.04, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.51, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.04, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.94, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.41, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 14.29, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.68, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.29, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.68, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011561871506273746, "policy_loss": 0.0008946903399191797, "vf_loss": 7.627026557922363, "vf_explained_var": 0.601060152053833, "kl": 0.0017626096960157156, "entropy": 1.0024091005325317, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2726400, "num_env_steps_trained": 2726400, "num_agent_steps_sampled": 5452800, "num_agent_steps_trained": 5452800}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 351.0, "episode_reward_mean": 537.66, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 168.0}, "policy_reward_max": {"ppo": 312.0}, "policy_reward_mean": {"ppo": 268.83}, "custom_metrics": {"sparse_reward_mean": 185.8, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 166.06, "shaped_reward_min": 111, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.14, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.3, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.74, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.82, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.48, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.29, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.68, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.53, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.04, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.51, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.04, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.94, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.41, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 14.29, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.68, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.29, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.68, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 570.0, 576.0, 525.0, 525.0, 525.0, 516.0, 530.0, 573.0, 533.0, 436.0, 516.0, 530.0, 576.0, 522.0, 487.0, 516.0, 576.0, 522.0, 576.0, 533.0, 527.0, 522.0, 527.0, 482.0, 579.0, 516.0, 519.0, 570.0, 573.0, 351.0, 525.0, 525.0, 522.0, 468.0, 582.0, 522.0, 573.0, 570.0, 519.0, 573.0, 522.0, 530.0, 522.0, 573.0, 576.0, 525.0, 459.0, 573.0, 522.0, 530.0, 570.0, 522.0, 573.0, 522.0, 527.0, 573.0, 519.0, 530.0, 519.0, 573.0, 525.0, 519.0, 519.0, 522.0, 530.0, 570.0, 573.0, 570.0, 570.0, 513.0, 579.0, 510.0, 570.0, 519.0, 530.0, 516.0, 573.0, 510.0, 576.0, 530.0, 573.0, 570.0, 570.0, 573.0, 510.0, 576.0, 522.0, 522.0, 530.0, 579.0, 519.0, 573.0, 527.0, 525.0, 479.0, 570.0, 579.0, 570.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [256.0, 269.0, 282.0, 288.0, 288.0, 288.0, 265.0, 260.0, 264.0, 261.0, 257.0, 268.0, 257.0, 259.0, 255.0, 275.0, 286.0, 287.0, 264.0, 269.0, 237.0, 199.0, 252.0, 264.0, 260.0, 270.0, 298.0, 278.0, 262.0, 260.0, 235.0, 252.0, 276.0, 240.0, 290.0, 286.0, 272.0, 250.0, 287.0, 289.0, 272.0, 261.0, 261.0, 266.0, 262.0, 260.0, 268.0, 259.0, 239.0, 243.0, 299.0, 280.0, 270.0, 246.0, 262.0, 257.0, 277.0, 293.0, 268.0, 305.0, 168.0, 183.0, 270.0, 255.0, 269.0, 256.0, 257.0, 265.0, 234.0, 234.0, 270.0, 312.0, 274.0, 248.0, 291.0, 282.0, 276.0, 294.0, 253.0, 266.0, 293.0, 280.0, 256.0, 266.0, 278.0, 252.0, 259.0, 263.0, 277.0, 296.0, 282.0, 294.0, 256.0, 269.0, 232.0, 227.0, 285.0, 288.0, 261.0, 261.0, 268.0, 262.0, 279.0, 291.0, 266.0, 256.0, 290.0, 283.0, 259.0, 263.0, 276.0, 251.0, 280.0, 293.0, 268.0, 251.0, 266.0, 264.0, 242.0, 277.0, 298.0, 275.0, 258.0, 267.0, 268.0, 251.0, 257.0, 262.0, 250.0, 272.0, 258.0, 272.0, 288.0, 282.0, 281.0, 292.0, 279.0, 291.0, 288.0, 282.0, 257.0, 256.0, 285.0, 294.0, 253.0, 257.0, 289.0, 281.0, 252.0, 267.0, 262.0, 268.0, 238.0, 278.0, 288.0, 285.0, 259.0, 251.0, 282.0, 294.0, 273.0, 257.0, 299.0, 274.0, 291.0, 279.0, 287.0, 283.0, 291.0, 282.0, 250.0, 260.0, 282.0, 294.0, 260.0, 262.0, 265.0, 257.0, 268.0, 262.0, 298.0, 281.0, 264.0, 255.0, 287.0, 286.0, 259.0, 268.0, 260.0, 265.0, 252.0, 227.0, 291.0, 279.0, 286.0, 293.0, 282.0, 288.0, 303.0, 279.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.712087007666387, "mean_inference_ms": 1.2816612307739192, "mean_action_processing_ms": 0.1355417041234792, "mean_env_wait_ms": 0.8577920995302917, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 351.0, "episode_reward_mean": 537.66, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 168.0}, "policy_reward_max": {"ppo": 312.0}, "policy_reward_mean": {"ppo": 268.83}, "hist_stats": {"episode_reward": [525.0, 570.0, 576.0, 525.0, 525.0, 525.0, 516.0, 530.0, 573.0, 533.0, 436.0, 516.0, 530.0, 576.0, 522.0, 487.0, 516.0, 576.0, 522.0, 576.0, 533.0, 527.0, 522.0, 527.0, 482.0, 579.0, 516.0, 519.0, 570.0, 573.0, 351.0, 525.0, 525.0, 522.0, 468.0, 582.0, 522.0, 573.0, 570.0, 519.0, 573.0, 522.0, 530.0, 522.0, 573.0, 576.0, 525.0, 459.0, 573.0, 522.0, 530.0, 570.0, 522.0, 573.0, 522.0, 527.0, 573.0, 519.0, 530.0, 519.0, 573.0, 525.0, 519.0, 519.0, 522.0, 530.0, 570.0, 573.0, 570.0, 570.0, 513.0, 579.0, 510.0, 570.0, 519.0, 530.0, 516.0, 573.0, 510.0, 576.0, 530.0, 573.0, 570.0, 570.0, 573.0, 510.0, 576.0, 522.0, 522.0, 530.0, 579.0, 519.0, 573.0, 527.0, 525.0, 479.0, 570.0, 579.0, 570.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [256.0, 269.0, 282.0, 288.0, 288.0, 288.0, 265.0, 260.0, 264.0, 261.0, 257.0, 268.0, 257.0, 259.0, 255.0, 275.0, 286.0, 287.0, 264.0, 269.0, 237.0, 199.0, 252.0, 264.0, 260.0, 270.0, 298.0, 278.0, 262.0, 260.0, 235.0, 252.0, 276.0, 240.0, 290.0, 286.0, 272.0, 250.0, 287.0, 289.0, 272.0, 261.0, 261.0, 266.0, 262.0, 260.0, 268.0, 259.0, 239.0, 243.0, 299.0, 280.0, 270.0, 246.0, 262.0, 257.0, 277.0, 293.0, 268.0, 305.0, 168.0, 183.0, 270.0, 255.0, 269.0, 256.0, 257.0, 265.0, 234.0, 234.0, 270.0, 312.0, 274.0, 248.0, 291.0, 282.0, 276.0, 294.0, 253.0, 266.0, 293.0, 280.0, 256.0, 266.0, 278.0, 252.0, 259.0, 263.0, 277.0, 296.0, 282.0, 294.0, 256.0, 269.0, 232.0, 227.0, 285.0, 288.0, 261.0, 261.0, 268.0, 262.0, 279.0, 291.0, 266.0, 256.0, 290.0, 283.0, 259.0, 263.0, 276.0, 251.0, 280.0, 293.0, 268.0, 251.0, 266.0, 264.0, 242.0, 277.0, 298.0, 275.0, 258.0, 267.0, 268.0, 251.0, 257.0, 262.0, 250.0, 272.0, 258.0, 272.0, 288.0, 282.0, 281.0, 292.0, 279.0, 291.0, 288.0, 282.0, 257.0, 256.0, 285.0, 294.0, 253.0, 257.0, 289.0, 281.0, 252.0, 267.0, 262.0, 268.0, 238.0, 278.0, 288.0, 285.0, 259.0, 251.0, 282.0, 294.0, 273.0, 257.0, 299.0, 274.0, 291.0, 279.0, 287.0, 283.0, 291.0, 282.0, 250.0, 260.0, 282.0, 294.0, 260.0, 262.0, 265.0, 257.0, 268.0, 262.0, 298.0, 281.0, 264.0, 255.0, 287.0, 286.0, 259.0, 268.0, 260.0, 265.0, 252.0, 227.0, 291.0, 279.0, 286.0, 293.0, 282.0, 288.0, 303.0, 279.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.712087007666387, "mean_inference_ms": 1.2816612307739192, "mean_action_processing_ms": 0.1355417041234792, "mean_env_wait_ms": 0.8577920995302917, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5452800, "num_agent_steps_trained": 5452800, "num_env_steps_sampled": 2726400, "num_env_steps_trained": 2726400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2726400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5452800, "timers": {"training_iteration_time_ms": 3701.888, "learn_time_ms": 1133.36, "learn_throughput": 11293.849, "synch_weights_time_ms": 14.403}, "counters": {"num_env_steps_sampled": 2726400, "num_env_steps_trained": 2726400, "num_agent_steps_sampled": 5452800, "num_agent_steps_trained": 5452800}, "done": false, "episodes_total": 6816, "training_iteration": 213, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-21", "timestamp": 1666581261, "time_this_iter_s": 3.575244665145874, "time_total_s": 831.4912528991699, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 831.4912528991699, "timesteps_since_restore": 0, "iterations_since_restore": 213, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.82, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 188.2, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 166.79, "shaped_reward_min": 139, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.13, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.46, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.73, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.03, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.5, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.29, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.84, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.07, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.48, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.08, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 14.29, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.84, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.29, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.84, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0014452373143285513, "policy_loss": -0.0017055664211511612, "vf_loss": 7.584623336791992, "vf_explained_var": 0.6165767908096313, "kl": 0.002139848656952381, "entropy": 0.9962633848190308, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2739200, "num_env_steps_trained": 2739200, "num_agent_steps_sampled": 5478400, "num_agent_steps_trained": 5478400}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 459.0, "episode_reward_mean": 543.19, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 227.0}, "policy_reward_max": {"ppo": 312.0}, "policy_reward_mean": {"ppo": 271.595}, "custom_metrics": {"sparse_reward_mean": 188.2, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 166.79, "shaped_reward_min": 139, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.13, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.46, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.73, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.03, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.5, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.29, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.84, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.07, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.48, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.08, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 14.29, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.84, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.29, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.84, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 522.0, 468.0, 582.0, 522.0, 573.0, 570.0, 519.0, 573.0, 522.0, 530.0, 522.0, 573.0, 576.0, 525.0, 459.0, 573.0, 522.0, 530.0, 570.0, 522.0, 573.0, 522.0, 527.0, 573.0, 519.0, 530.0, 519.0, 573.0, 525.0, 519.0, 519.0, 522.0, 530.0, 570.0, 573.0, 570.0, 570.0, 513.0, 579.0, 510.0, 570.0, 519.0, 530.0, 516.0, 573.0, 510.0, 576.0, 530.0, 573.0, 570.0, 570.0, 573.0, 510.0, 576.0, 522.0, 522.0, 530.0, 579.0, 519.0, 573.0, 527.0, 525.0, 479.0, 570.0, 579.0, 570.0, 582.0, 513.0, 573.0, 579.0, 522.0, 519.0, 576.0, 519.0, 522.0, 522.0, 525.0, 582.0, 573.0, 533.0, 470.0, 582.0, 530.0, 570.0, 570.0, 513.0, 579.0, 519.0, 510.0, 573.0, 573.0, 522.0, 522.0, 539.0, 573.0, 570.0, 576.0, 570.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 256.0, 257.0, 265.0, 234.0, 234.0, 270.0, 312.0, 274.0, 248.0, 291.0, 282.0, 276.0, 294.0, 253.0, 266.0, 293.0, 280.0, 256.0, 266.0, 278.0, 252.0, 259.0, 263.0, 277.0, 296.0, 282.0, 294.0, 256.0, 269.0, 232.0, 227.0, 285.0, 288.0, 261.0, 261.0, 268.0, 262.0, 279.0, 291.0, 266.0, 256.0, 290.0, 283.0, 259.0, 263.0, 276.0, 251.0, 280.0, 293.0, 268.0, 251.0, 266.0, 264.0, 242.0, 277.0, 298.0, 275.0, 258.0, 267.0, 268.0, 251.0, 257.0, 262.0, 250.0, 272.0, 258.0, 272.0, 288.0, 282.0, 281.0, 292.0, 279.0, 291.0, 288.0, 282.0, 257.0, 256.0, 285.0, 294.0, 253.0, 257.0, 289.0, 281.0, 252.0, 267.0, 262.0, 268.0, 238.0, 278.0, 288.0, 285.0, 259.0, 251.0, 282.0, 294.0, 273.0, 257.0, 299.0, 274.0, 291.0, 279.0, 287.0, 283.0, 291.0, 282.0, 250.0, 260.0, 282.0, 294.0, 260.0, 262.0, 265.0, 257.0, 268.0, 262.0, 298.0, 281.0, 264.0, 255.0, 287.0, 286.0, 259.0, 268.0, 260.0, 265.0, 252.0, 227.0, 291.0, 279.0, 286.0, 293.0, 282.0, 288.0, 303.0, 279.0, 246.0, 267.0, 286.0, 287.0, 285.0, 294.0, 262.0, 260.0, 264.0, 255.0, 293.0, 283.0, 252.0, 267.0, 263.0, 259.0, 266.0, 256.0, 270.0, 255.0, 293.0, 289.0, 288.0, 285.0, 272.0, 261.0, 239.0, 231.0, 289.0, 293.0, 260.0, 270.0, 276.0, 294.0, 280.0, 290.0, 258.0, 255.0, 287.0, 292.0, 265.0, 254.0, 262.0, 248.0, 284.0, 289.0, 286.0, 287.0, 263.0, 259.0, 250.0, 272.0, 273.0, 266.0, 295.0, 278.0, 283.0, 287.0, 281.0, 295.0, 291.0, 279.0, 256.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.711992025997882, "mean_inference_ms": 1.2812842286076684, "mean_action_processing_ms": 0.13553223868846914, "mean_env_wait_ms": 0.8576304489114933, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 459.0, "episode_reward_mean": 543.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 227.0}, "policy_reward_max": {"ppo": 312.0}, "policy_reward_mean": {"ppo": 271.595}, "hist_stats": {"episode_reward": [525.0, 522.0, 468.0, 582.0, 522.0, 573.0, 570.0, 519.0, 573.0, 522.0, 530.0, 522.0, 573.0, 576.0, 525.0, 459.0, 573.0, 522.0, 530.0, 570.0, 522.0, 573.0, 522.0, 527.0, 573.0, 519.0, 530.0, 519.0, 573.0, 525.0, 519.0, 519.0, 522.0, 530.0, 570.0, 573.0, 570.0, 570.0, 513.0, 579.0, 510.0, 570.0, 519.0, 530.0, 516.0, 573.0, 510.0, 576.0, 530.0, 573.0, 570.0, 570.0, 573.0, 510.0, 576.0, 522.0, 522.0, 530.0, 579.0, 519.0, 573.0, 527.0, 525.0, 479.0, 570.0, 579.0, 570.0, 582.0, 513.0, 573.0, 579.0, 522.0, 519.0, 576.0, 519.0, 522.0, 522.0, 525.0, 582.0, 573.0, 533.0, 470.0, 582.0, 530.0, 570.0, 570.0, 513.0, 579.0, 519.0, 510.0, 573.0, 573.0, 522.0, 522.0, 539.0, 573.0, 570.0, 576.0, 570.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 256.0, 257.0, 265.0, 234.0, 234.0, 270.0, 312.0, 274.0, 248.0, 291.0, 282.0, 276.0, 294.0, 253.0, 266.0, 293.0, 280.0, 256.0, 266.0, 278.0, 252.0, 259.0, 263.0, 277.0, 296.0, 282.0, 294.0, 256.0, 269.0, 232.0, 227.0, 285.0, 288.0, 261.0, 261.0, 268.0, 262.0, 279.0, 291.0, 266.0, 256.0, 290.0, 283.0, 259.0, 263.0, 276.0, 251.0, 280.0, 293.0, 268.0, 251.0, 266.0, 264.0, 242.0, 277.0, 298.0, 275.0, 258.0, 267.0, 268.0, 251.0, 257.0, 262.0, 250.0, 272.0, 258.0, 272.0, 288.0, 282.0, 281.0, 292.0, 279.0, 291.0, 288.0, 282.0, 257.0, 256.0, 285.0, 294.0, 253.0, 257.0, 289.0, 281.0, 252.0, 267.0, 262.0, 268.0, 238.0, 278.0, 288.0, 285.0, 259.0, 251.0, 282.0, 294.0, 273.0, 257.0, 299.0, 274.0, 291.0, 279.0, 287.0, 283.0, 291.0, 282.0, 250.0, 260.0, 282.0, 294.0, 260.0, 262.0, 265.0, 257.0, 268.0, 262.0, 298.0, 281.0, 264.0, 255.0, 287.0, 286.0, 259.0, 268.0, 260.0, 265.0, 252.0, 227.0, 291.0, 279.0, 286.0, 293.0, 282.0, 288.0, 303.0, 279.0, 246.0, 267.0, 286.0, 287.0, 285.0, 294.0, 262.0, 260.0, 264.0, 255.0, 293.0, 283.0, 252.0, 267.0, 263.0, 259.0, 266.0, 256.0, 270.0, 255.0, 293.0, 289.0, 288.0, 285.0, 272.0, 261.0, 239.0, 231.0, 289.0, 293.0, 260.0, 270.0, 276.0, 294.0, 280.0, 290.0, 258.0, 255.0, 287.0, 292.0, 265.0, 254.0, 262.0, 248.0, 284.0, 289.0, 286.0, 287.0, 263.0, 259.0, 250.0, 272.0, 273.0, 266.0, 295.0, 278.0, 283.0, 287.0, 281.0, 295.0, 291.0, 279.0, 256.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.711992025997882, "mean_inference_ms": 1.2812842286076684, "mean_action_processing_ms": 0.13553223868846914, "mean_env_wait_ms": 0.8576304489114933, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5478400, "num_agent_steps_trained": 5478400, "num_env_steps_sampled": 2739200, "num_env_steps_trained": 2739200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2739200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5478400, "timers": {"training_iteration_time_ms": 3678.396, "learn_time_ms": 1128.202, "learn_throughput": 11345.488, "synch_weights_time_ms": 14.287}, "counters": {"num_env_steps_sampled": 2739200, "num_env_steps_trained": 2739200, "num_agent_steps_sampled": 5478400, "num_agent_steps_trained": 5478400}, "done": false, "episodes_total": 6848, "training_iteration": 214, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-24", "timestamp": 1666581264, "time_this_iter_s": 3.628016233444214, "time_total_s": 835.1192691326141, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 835.1192691326141, "timesteps_since_restore": 0, "iterations_since_restore": 214, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.083333333333332, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 188.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 167.95, "shaped_reward_min": 145, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.08, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.49, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.81, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.13, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.41, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.96, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.88, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.47, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.19, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.51, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.44, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 14.41, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.96, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.41, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.96, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008325978415086865, "policy_loss": 0.0005480643594637513, "vf_loss": 7.812173843383789, "vf_explained_var": 0.6112537384033203, "kl": 0.001983209513127804, "entropy": 0.9933664798736572, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2752000, "num_env_steps_trained": 2752000, "num_agent_steps_sampled": 5504000, "num_agent_steps_trained": 5504000}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 465.0, "episode_reward_mean": 545.55, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 225.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 272.775}, "custom_metrics": {"sparse_reward_mean": 188.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 167.95, "shaped_reward_min": 145, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.08, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.49, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.81, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.13, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.41, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.96, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.88, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.47, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.19, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.51, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.44, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 14.41, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.96, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.41, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.96, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 530.0, 570.0, 573.0, 570.0, 570.0, 513.0, 579.0, 510.0, 570.0, 519.0, 530.0, 516.0, 573.0, 510.0, 576.0, 530.0, 573.0, 570.0, 570.0, 573.0, 510.0, 576.0, 522.0, 522.0, 530.0, 579.0, 519.0, 573.0, 527.0, 525.0, 479.0, 570.0, 579.0, 570.0, 582.0, 513.0, 573.0, 579.0, 522.0, 519.0, 576.0, 519.0, 522.0, 522.0, 525.0, 582.0, 573.0, 533.0, 470.0, 582.0, 530.0, 570.0, 570.0, 513.0, 579.0, 519.0, 510.0, 573.0, 573.0, 522.0, 522.0, 539.0, 573.0, 570.0, 576.0, 570.0, 513.0, 513.0, 579.0, 530.0, 576.0, 576.0, 522.0, 576.0, 527.0, 576.0, 570.0, 533.0, 522.0, 576.0, 533.0, 527.0, 536.0, 579.0, 525.0, 573.0, 570.0, 525.0, 479.0, 519.0, 579.0, 533.0, 573.0, 525.0, 522.0, 573.0, 579.0, 465.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [250.0, 272.0, 258.0, 272.0, 288.0, 282.0, 281.0, 292.0, 279.0, 291.0, 288.0, 282.0, 257.0, 256.0, 285.0, 294.0, 253.0, 257.0, 289.0, 281.0, 252.0, 267.0, 262.0, 268.0, 238.0, 278.0, 288.0, 285.0, 259.0, 251.0, 282.0, 294.0, 273.0, 257.0, 299.0, 274.0, 291.0, 279.0, 287.0, 283.0, 291.0, 282.0, 250.0, 260.0, 282.0, 294.0, 260.0, 262.0, 265.0, 257.0, 268.0, 262.0, 298.0, 281.0, 264.0, 255.0, 287.0, 286.0, 259.0, 268.0, 260.0, 265.0, 252.0, 227.0, 291.0, 279.0, 286.0, 293.0, 282.0, 288.0, 303.0, 279.0, 246.0, 267.0, 286.0, 287.0, 285.0, 294.0, 262.0, 260.0, 264.0, 255.0, 293.0, 283.0, 252.0, 267.0, 263.0, 259.0, 266.0, 256.0, 270.0, 255.0, 293.0, 289.0, 288.0, 285.0, 272.0, 261.0, 239.0, 231.0, 289.0, 293.0, 260.0, 270.0, 276.0, 294.0, 280.0, 290.0, 258.0, 255.0, 287.0, 292.0, 265.0, 254.0, 262.0, 248.0, 284.0, 289.0, 286.0, 287.0, 263.0, 259.0, 250.0, 272.0, 273.0, 266.0, 295.0, 278.0, 283.0, 287.0, 281.0, 295.0, 291.0, 279.0, 256.0, 257.0, 246.0, 267.0, 291.0, 288.0, 260.0, 270.0, 285.0, 291.0, 295.0, 281.0, 254.0, 268.0, 281.0, 295.0, 262.0, 265.0, 289.0, 287.0, 280.0, 290.0, 269.0, 264.0, 263.0, 259.0, 279.0, 297.0, 262.0, 271.0, 276.0, 251.0, 277.0, 259.0, 287.0, 292.0, 270.0, 255.0, 290.0, 283.0, 273.0, 297.0, 260.0, 265.0, 245.0, 234.0, 270.0, 249.0, 287.0, 292.0, 264.0, 269.0, 296.0, 277.0, 274.0, 251.0, 264.0, 258.0, 289.0, 284.0, 299.0, 280.0, 240.0, 225.0, 254.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7118862679389156, "mean_inference_ms": 1.2809083346908747, "mean_action_processing_ms": 0.13552358445176294, "mean_env_wait_ms": 0.8574677314031771, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 465.0, "episode_reward_mean": 545.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 225.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 272.775}, "hist_stats": {"episode_reward": [522.0, 530.0, 570.0, 573.0, 570.0, 570.0, 513.0, 579.0, 510.0, 570.0, 519.0, 530.0, 516.0, 573.0, 510.0, 576.0, 530.0, 573.0, 570.0, 570.0, 573.0, 510.0, 576.0, 522.0, 522.0, 530.0, 579.0, 519.0, 573.0, 527.0, 525.0, 479.0, 570.0, 579.0, 570.0, 582.0, 513.0, 573.0, 579.0, 522.0, 519.0, 576.0, 519.0, 522.0, 522.0, 525.0, 582.0, 573.0, 533.0, 470.0, 582.0, 530.0, 570.0, 570.0, 513.0, 579.0, 519.0, 510.0, 573.0, 573.0, 522.0, 522.0, 539.0, 573.0, 570.0, 576.0, 570.0, 513.0, 513.0, 579.0, 530.0, 576.0, 576.0, 522.0, 576.0, 527.0, 576.0, 570.0, 533.0, 522.0, 576.0, 533.0, 527.0, 536.0, 579.0, 525.0, 573.0, 570.0, 525.0, 479.0, 519.0, 579.0, 533.0, 573.0, 525.0, 522.0, 573.0, 579.0, 465.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [250.0, 272.0, 258.0, 272.0, 288.0, 282.0, 281.0, 292.0, 279.0, 291.0, 288.0, 282.0, 257.0, 256.0, 285.0, 294.0, 253.0, 257.0, 289.0, 281.0, 252.0, 267.0, 262.0, 268.0, 238.0, 278.0, 288.0, 285.0, 259.0, 251.0, 282.0, 294.0, 273.0, 257.0, 299.0, 274.0, 291.0, 279.0, 287.0, 283.0, 291.0, 282.0, 250.0, 260.0, 282.0, 294.0, 260.0, 262.0, 265.0, 257.0, 268.0, 262.0, 298.0, 281.0, 264.0, 255.0, 287.0, 286.0, 259.0, 268.0, 260.0, 265.0, 252.0, 227.0, 291.0, 279.0, 286.0, 293.0, 282.0, 288.0, 303.0, 279.0, 246.0, 267.0, 286.0, 287.0, 285.0, 294.0, 262.0, 260.0, 264.0, 255.0, 293.0, 283.0, 252.0, 267.0, 263.0, 259.0, 266.0, 256.0, 270.0, 255.0, 293.0, 289.0, 288.0, 285.0, 272.0, 261.0, 239.0, 231.0, 289.0, 293.0, 260.0, 270.0, 276.0, 294.0, 280.0, 290.0, 258.0, 255.0, 287.0, 292.0, 265.0, 254.0, 262.0, 248.0, 284.0, 289.0, 286.0, 287.0, 263.0, 259.0, 250.0, 272.0, 273.0, 266.0, 295.0, 278.0, 283.0, 287.0, 281.0, 295.0, 291.0, 279.0, 256.0, 257.0, 246.0, 267.0, 291.0, 288.0, 260.0, 270.0, 285.0, 291.0, 295.0, 281.0, 254.0, 268.0, 281.0, 295.0, 262.0, 265.0, 289.0, 287.0, 280.0, 290.0, 269.0, 264.0, 263.0, 259.0, 279.0, 297.0, 262.0, 271.0, 276.0, 251.0, 277.0, 259.0, 287.0, 292.0, 270.0, 255.0, 290.0, 283.0, 273.0, 297.0, 260.0, 265.0, 245.0, 234.0, 270.0, 249.0, 287.0, 292.0, 264.0, 269.0, 296.0, 277.0, 274.0, 251.0, 264.0, 258.0, 289.0, 284.0, 299.0, 280.0, 240.0, 225.0, 254.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7118862679389156, "mean_inference_ms": 1.2809083346908747, "mean_action_processing_ms": 0.13552358445176294, "mean_env_wait_ms": 0.8574677314031771, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5504000, "num_agent_steps_trained": 5504000, "num_env_steps_sampled": 2752000, "num_env_steps_trained": 2752000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2752000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5504000, "timers": {"training_iteration_time_ms": 3667.458, "learn_time_ms": 1125.153, "learn_throughput": 11376.233, "synch_weights_time_ms": 14.192}, "counters": {"num_env_steps_sampled": 2752000, "num_env_steps_trained": 2752000, "num_agent_steps_sampled": 5504000, "num_agent_steps_trained": 5504000}, "done": false, "episodes_total": 6880, "training_iteration": 215, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-28", "timestamp": 1666581268, "time_this_iter_s": 3.688774347305298, "time_total_s": 838.8080434799194, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 838.8080434799194, "timesteps_since_restore": 0, "iterations_since_restore": 215, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.96, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 188.6, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 168.26, "shaped_reward_min": 145, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.33, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.11, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.04, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.76, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.76, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.65, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.51, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.03, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.76, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.65, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.76, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.65, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010710940696299076, "policy_loss": -0.0013515017926692963, "vf_loss": 7.728179931640625, "vf_explained_var": 0.6077972650527954, "kl": 0.002291465178132057, "entropy": 0.9848192930221558, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2764800, "num_env_steps_trained": 2764800, "num_agent_steps_sampled": 5529600, "num_agent_steps_trained": 5529600}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 465.0, "episode_reward_mean": 545.46, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 225.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 272.73}, "custom_metrics": {"sparse_reward_mean": 188.6, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 168.26, "shaped_reward_min": 145, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.33, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.11, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.04, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.76, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.76, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.65, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.51, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.03, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.76, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.65, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.76, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.65, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 579.0, 570.0, 582.0, 513.0, 573.0, 579.0, 522.0, 519.0, 576.0, 519.0, 522.0, 522.0, 525.0, 582.0, 573.0, 533.0, 470.0, 582.0, 530.0, 570.0, 570.0, 513.0, 579.0, 519.0, 510.0, 573.0, 573.0, 522.0, 522.0, 539.0, 573.0, 570.0, 576.0, 570.0, 513.0, 513.0, 579.0, 530.0, 576.0, 576.0, 522.0, 576.0, 527.0, 576.0, 570.0, 533.0, 522.0, 576.0, 533.0, 527.0, 536.0, 579.0, 525.0, 573.0, 570.0, 525.0, 479.0, 519.0, 579.0, 533.0, 573.0, 525.0, 522.0, 573.0, 579.0, 465.0, 522.0, 576.0, 522.0, 470.0, 522.0, 527.0, 573.0, 573.0, 576.0, 579.0, 573.0, 570.0, 519.0, 527.0, 536.0, 570.0, 573.0, 522.0, 519.0, 527.0, 522.0, 525.0, 579.0, 522.0, 513.0, 516.0, 576.0, 576.0, 519.0, 573.0, 533.0, 570.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 279.0, 286.0, 293.0, 282.0, 288.0, 303.0, 279.0, 246.0, 267.0, 286.0, 287.0, 285.0, 294.0, 262.0, 260.0, 264.0, 255.0, 293.0, 283.0, 252.0, 267.0, 263.0, 259.0, 266.0, 256.0, 270.0, 255.0, 293.0, 289.0, 288.0, 285.0, 272.0, 261.0, 239.0, 231.0, 289.0, 293.0, 260.0, 270.0, 276.0, 294.0, 280.0, 290.0, 258.0, 255.0, 287.0, 292.0, 265.0, 254.0, 262.0, 248.0, 284.0, 289.0, 286.0, 287.0, 263.0, 259.0, 250.0, 272.0, 273.0, 266.0, 295.0, 278.0, 283.0, 287.0, 281.0, 295.0, 291.0, 279.0, 256.0, 257.0, 246.0, 267.0, 291.0, 288.0, 260.0, 270.0, 285.0, 291.0, 295.0, 281.0, 254.0, 268.0, 281.0, 295.0, 262.0, 265.0, 289.0, 287.0, 280.0, 290.0, 269.0, 264.0, 263.0, 259.0, 279.0, 297.0, 262.0, 271.0, 276.0, 251.0, 277.0, 259.0, 287.0, 292.0, 270.0, 255.0, 290.0, 283.0, 273.0, 297.0, 260.0, 265.0, 245.0, 234.0, 270.0, 249.0, 287.0, 292.0, 264.0, 269.0, 296.0, 277.0, 274.0, 251.0, 264.0, 258.0, 289.0, 284.0, 299.0, 280.0, 240.0, 225.0, 254.0, 268.0, 286.0, 290.0, 262.0, 260.0, 233.0, 237.0, 281.0, 241.0, 264.0, 263.0, 290.0, 283.0, 288.0, 285.0, 284.0, 292.0, 301.0, 278.0, 288.0, 285.0, 298.0, 272.0, 267.0, 252.0, 253.0, 274.0, 273.0, 263.0, 290.0, 280.0, 293.0, 280.0, 267.0, 255.0, 253.0, 266.0, 255.0, 272.0, 254.0, 268.0, 270.0, 255.0, 282.0, 297.0, 261.0, 261.0, 272.0, 241.0, 262.0, 254.0, 289.0, 287.0, 302.0, 274.0, 247.0, 272.0, 298.0, 275.0, 261.0, 272.0, 286.0, 284.0, 265.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7117419372962004, "mean_inference_ms": 1.2805191624502714, "mean_action_processing_ms": 0.13551508961273653, "mean_env_wait_ms": 0.8572922734685937, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 465.0, "episode_reward_mean": 545.46, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 225.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 272.73}, "hist_stats": {"episode_reward": [570.0, 579.0, 570.0, 582.0, 513.0, 573.0, 579.0, 522.0, 519.0, 576.0, 519.0, 522.0, 522.0, 525.0, 582.0, 573.0, 533.0, 470.0, 582.0, 530.0, 570.0, 570.0, 513.0, 579.0, 519.0, 510.0, 573.0, 573.0, 522.0, 522.0, 539.0, 573.0, 570.0, 576.0, 570.0, 513.0, 513.0, 579.0, 530.0, 576.0, 576.0, 522.0, 576.0, 527.0, 576.0, 570.0, 533.0, 522.0, 576.0, 533.0, 527.0, 536.0, 579.0, 525.0, 573.0, 570.0, 525.0, 479.0, 519.0, 579.0, 533.0, 573.0, 525.0, 522.0, 573.0, 579.0, 465.0, 522.0, 576.0, 522.0, 470.0, 522.0, 527.0, 573.0, 573.0, 576.0, 579.0, 573.0, 570.0, 519.0, 527.0, 536.0, 570.0, 573.0, 522.0, 519.0, 527.0, 522.0, 525.0, 579.0, 522.0, 513.0, 516.0, 576.0, 576.0, 519.0, 573.0, 533.0, 570.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 279.0, 286.0, 293.0, 282.0, 288.0, 303.0, 279.0, 246.0, 267.0, 286.0, 287.0, 285.0, 294.0, 262.0, 260.0, 264.0, 255.0, 293.0, 283.0, 252.0, 267.0, 263.0, 259.0, 266.0, 256.0, 270.0, 255.0, 293.0, 289.0, 288.0, 285.0, 272.0, 261.0, 239.0, 231.0, 289.0, 293.0, 260.0, 270.0, 276.0, 294.0, 280.0, 290.0, 258.0, 255.0, 287.0, 292.0, 265.0, 254.0, 262.0, 248.0, 284.0, 289.0, 286.0, 287.0, 263.0, 259.0, 250.0, 272.0, 273.0, 266.0, 295.0, 278.0, 283.0, 287.0, 281.0, 295.0, 291.0, 279.0, 256.0, 257.0, 246.0, 267.0, 291.0, 288.0, 260.0, 270.0, 285.0, 291.0, 295.0, 281.0, 254.0, 268.0, 281.0, 295.0, 262.0, 265.0, 289.0, 287.0, 280.0, 290.0, 269.0, 264.0, 263.0, 259.0, 279.0, 297.0, 262.0, 271.0, 276.0, 251.0, 277.0, 259.0, 287.0, 292.0, 270.0, 255.0, 290.0, 283.0, 273.0, 297.0, 260.0, 265.0, 245.0, 234.0, 270.0, 249.0, 287.0, 292.0, 264.0, 269.0, 296.0, 277.0, 274.0, 251.0, 264.0, 258.0, 289.0, 284.0, 299.0, 280.0, 240.0, 225.0, 254.0, 268.0, 286.0, 290.0, 262.0, 260.0, 233.0, 237.0, 281.0, 241.0, 264.0, 263.0, 290.0, 283.0, 288.0, 285.0, 284.0, 292.0, 301.0, 278.0, 288.0, 285.0, 298.0, 272.0, 267.0, 252.0, 253.0, 274.0, 273.0, 263.0, 290.0, 280.0, 293.0, 280.0, 267.0, 255.0, 253.0, 266.0, 255.0, 272.0, 254.0, 268.0, 270.0, 255.0, 282.0, 297.0, 261.0, 261.0, 272.0, 241.0, 262.0, 254.0, 289.0, 287.0, 302.0, 274.0, 247.0, 272.0, 298.0, 275.0, 261.0, 272.0, 286.0, 284.0, 265.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7117419372962004, "mean_inference_ms": 1.2805191624502714, "mean_action_processing_ms": 0.13551508961273653, "mean_env_wait_ms": 0.8572922734685937, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5529600, "num_agent_steps_trained": 5529600, "num_env_steps_sampled": 2764800, "num_env_steps_trained": 2764800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2764800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5529600, "timers": {"training_iteration_time_ms": 3675.84, "learn_time_ms": 1133.075, "learn_throughput": 11296.692, "synch_weights_time_ms": 14.355}, "counters": {"num_env_steps_sampled": 2764800, "num_env_steps_trained": 2764800, "num_agent_steps_sampled": 5529600, "num_agent_steps_trained": 5529600}, "done": false, "episodes_total": 6912, "training_iteration": 216, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-32", "timestamp": 1666581272, "time_this_iter_s": 3.801997661590576, "time_total_s": 842.61004114151, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 842.61004114151, "timesteps_since_restore": 0, "iterations_since_restore": 216, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.566666666666666, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 188.0, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 167.89, "shaped_reward_min": 125, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.44, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.94, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.21, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.58, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.45, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.23, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.98, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.0, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.9, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.45, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.45, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0015308646252378821, "policy_loss": -0.0018033639062196016, "vf_loss": 7.642875671386719, "vf_explained_var": 0.6139187216758728, "kl": 0.0021336167119443417, "entropy": 0.9835748672485352, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2777600, "num_env_steps_trained": 2777600, "num_agent_steps_sampled": 5555200, "num_agent_steps_trained": 5555200}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 405.0, "episode_reward_mean": 543.89, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 311.0}, "policy_reward_mean": {"ppo": 271.945}, "custom_metrics": {"sparse_reward_mean": 188.0, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 167.89, "shaped_reward_min": 125, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.44, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.94, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.21, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.58, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.45, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.23, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.98, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.0, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.9, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.45, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.45, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 576.0, 570.0, 513.0, 513.0, 579.0, 530.0, 576.0, 576.0, 522.0, 576.0, 527.0, 576.0, 570.0, 533.0, 522.0, 576.0, 533.0, 527.0, 536.0, 579.0, 525.0, 573.0, 570.0, 525.0, 479.0, 519.0, 579.0, 533.0, 573.0, 525.0, 522.0, 573.0, 579.0, 465.0, 522.0, 576.0, 522.0, 470.0, 522.0, 527.0, 573.0, 573.0, 576.0, 579.0, 573.0, 570.0, 519.0, 527.0, 536.0, 570.0, 573.0, 522.0, 519.0, 527.0, 522.0, 525.0, 579.0, 522.0, 513.0, 516.0, 576.0, 576.0, 519.0, 573.0, 533.0, 570.0, 522.0, 576.0, 510.0, 564.0, 530.0, 582.0, 582.0, 567.0, 570.0, 522.0, 573.0, 522.0, 579.0, 579.0, 522.0, 519.0, 519.0, 525.0, 525.0, 579.0, 576.0, 530.0, 470.0, 525.0, 405.0, 533.0, 573.0, 522.0, 579.0, 519.0, 570.0, 527.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 287.0, 281.0, 295.0, 291.0, 279.0, 256.0, 257.0, 246.0, 267.0, 291.0, 288.0, 260.0, 270.0, 285.0, 291.0, 295.0, 281.0, 254.0, 268.0, 281.0, 295.0, 262.0, 265.0, 289.0, 287.0, 280.0, 290.0, 269.0, 264.0, 263.0, 259.0, 279.0, 297.0, 262.0, 271.0, 276.0, 251.0, 277.0, 259.0, 287.0, 292.0, 270.0, 255.0, 290.0, 283.0, 273.0, 297.0, 260.0, 265.0, 245.0, 234.0, 270.0, 249.0, 287.0, 292.0, 264.0, 269.0, 296.0, 277.0, 274.0, 251.0, 264.0, 258.0, 289.0, 284.0, 299.0, 280.0, 240.0, 225.0, 254.0, 268.0, 286.0, 290.0, 262.0, 260.0, 233.0, 237.0, 281.0, 241.0, 264.0, 263.0, 290.0, 283.0, 288.0, 285.0, 284.0, 292.0, 301.0, 278.0, 288.0, 285.0, 298.0, 272.0, 267.0, 252.0, 253.0, 274.0, 273.0, 263.0, 290.0, 280.0, 293.0, 280.0, 267.0, 255.0, 253.0, 266.0, 255.0, 272.0, 254.0, 268.0, 270.0, 255.0, 282.0, 297.0, 261.0, 261.0, 272.0, 241.0, 262.0, 254.0, 289.0, 287.0, 302.0, 274.0, 247.0, 272.0, 298.0, 275.0, 261.0, 272.0, 286.0, 284.0, 265.0, 257.0, 290.0, 286.0, 250.0, 260.0, 285.0, 279.0, 268.0, 262.0, 289.0, 293.0, 293.0, 289.0, 273.0, 294.0, 285.0, 285.0, 268.0, 254.0, 286.0, 287.0, 264.0, 258.0, 297.0, 282.0, 282.0, 297.0, 264.0, 258.0, 249.0, 270.0, 263.0, 256.0, 259.0, 266.0, 264.0, 261.0, 288.0, 291.0, 265.0, 311.0, 261.0, 269.0, 243.0, 227.0, 260.0, 265.0, 200.0, 205.0, 252.0, 281.0, 277.0, 296.0, 272.0, 250.0, 281.0, 298.0, 264.0, 255.0, 291.0, 279.0, 272.0, 255.0, 283.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7115811096238751, "mean_inference_ms": 1.2801026959798782, "mean_action_processing_ms": 0.13550184600075288, "mean_env_wait_ms": 0.857080086310055, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 405.0, "episode_reward_mean": 543.89, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 311.0}, "policy_reward_mean": {"ppo": 271.945}, "hist_stats": {"episode_reward": [570.0, 576.0, 570.0, 513.0, 513.0, 579.0, 530.0, 576.0, 576.0, 522.0, 576.0, 527.0, 576.0, 570.0, 533.0, 522.0, 576.0, 533.0, 527.0, 536.0, 579.0, 525.0, 573.0, 570.0, 525.0, 479.0, 519.0, 579.0, 533.0, 573.0, 525.0, 522.0, 573.0, 579.0, 465.0, 522.0, 576.0, 522.0, 470.0, 522.0, 527.0, 573.0, 573.0, 576.0, 579.0, 573.0, 570.0, 519.0, 527.0, 536.0, 570.0, 573.0, 522.0, 519.0, 527.0, 522.0, 525.0, 579.0, 522.0, 513.0, 516.0, 576.0, 576.0, 519.0, 573.0, 533.0, 570.0, 522.0, 576.0, 510.0, 564.0, 530.0, 582.0, 582.0, 567.0, 570.0, 522.0, 573.0, 522.0, 579.0, 579.0, 522.0, 519.0, 519.0, 525.0, 525.0, 579.0, 576.0, 530.0, 470.0, 525.0, 405.0, 533.0, 573.0, 522.0, 579.0, 519.0, 570.0, 527.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 287.0, 281.0, 295.0, 291.0, 279.0, 256.0, 257.0, 246.0, 267.0, 291.0, 288.0, 260.0, 270.0, 285.0, 291.0, 295.0, 281.0, 254.0, 268.0, 281.0, 295.0, 262.0, 265.0, 289.0, 287.0, 280.0, 290.0, 269.0, 264.0, 263.0, 259.0, 279.0, 297.0, 262.0, 271.0, 276.0, 251.0, 277.0, 259.0, 287.0, 292.0, 270.0, 255.0, 290.0, 283.0, 273.0, 297.0, 260.0, 265.0, 245.0, 234.0, 270.0, 249.0, 287.0, 292.0, 264.0, 269.0, 296.0, 277.0, 274.0, 251.0, 264.0, 258.0, 289.0, 284.0, 299.0, 280.0, 240.0, 225.0, 254.0, 268.0, 286.0, 290.0, 262.0, 260.0, 233.0, 237.0, 281.0, 241.0, 264.0, 263.0, 290.0, 283.0, 288.0, 285.0, 284.0, 292.0, 301.0, 278.0, 288.0, 285.0, 298.0, 272.0, 267.0, 252.0, 253.0, 274.0, 273.0, 263.0, 290.0, 280.0, 293.0, 280.0, 267.0, 255.0, 253.0, 266.0, 255.0, 272.0, 254.0, 268.0, 270.0, 255.0, 282.0, 297.0, 261.0, 261.0, 272.0, 241.0, 262.0, 254.0, 289.0, 287.0, 302.0, 274.0, 247.0, 272.0, 298.0, 275.0, 261.0, 272.0, 286.0, 284.0, 265.0, 257.0, 290.0, 286.0, 250.0, 260.0, 285.0, 279.0, 268.0, 262.0, 289.0, 293.0, 293.0, 289.0, 273.0, 294.0, 285.0, 285.0, 268.0, 254.0, 286.0, 287.0, 264.0, 258.0, 297.0, 282.0, 282.0, 297.0, 264.0, 258.0, 249.0, 270.0, 263.0, 256.0, 259.0, 266.0, 264.0, 261.0, 288.0, 291.0, 265.0, 311.0, 261.0, 269.0, 243.0, 227.0, 260.0, 265.0, 200.0, 205.0, 252.0, 281.0, 277.0, 296.0, 272.0, 250.0, 281.0, 298.0, 264.0, 255.0, 291.0, 279.0, 272.0, 255.0, 283.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7115811096238751, "mean_inference_ms": 1.2801026959798782, "mean_action_processing_ms": 0.13550184600075288, "mean_env_wait_ms": 0.857080086310055, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5555200, "num_agent_steps_trained": 5555200, "num_env_steps_sampled": 2777600, "num_env_steps_trained": 2777600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2777600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5555200, "timers": {"training_iteration_time_ms": 3663.772, "learn_time_ms": 1128.207, "learn_throughput": 11345.439, "synch_weights_time_ms": 13.645}, "counters": {"num_env_steps_sampled": 2777600, "num_env_steps_trained": 2777600, "num_agent_steps_sampled": 5555200, "num_agent_steps_trained": 5555200}, "done": false, "episodes_total": 6944, "training_iteration": 217, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-36", "timestamp": 1666581276, "time_this_iter_s": 3.6296896934509277, "time_total_s": 846.2397308349609, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 846.2397308349609, "timesteps_since_restore": 0, "iterations_since_restore": 217, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.700000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 187.8, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 167.64, "shaped_reward_min": 125, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.51, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.97, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.21, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.67, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.91, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.47, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.19, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.92, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.92, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.91, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.47, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.91, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.47, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001097099855542183, "policy_loss": -0.0013780472800135612, "vf_loss": 7.6653265953063965, "vf_explained_var": 0.6124542951583862, "kl": 0.002241886919364333, "entropy": 0.9711683988571167, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2790400, "num_env_steps_trained": 2790400, "num_agent_steps_sampled": 5580800, "num_agent_steps_trained": 5580800}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 405.0, "episode_reward_mean": 543.24, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 311.0}, "policy_reward_mean": {"ppo": 271.62}, "custom_metrics": {"sparse_reward_mean": 187.8, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 167.64, "shaped_reward_min": 125, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.51, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.97, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.21, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.67, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.91, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.47, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.19, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.92, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.92, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.91, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.47, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.91, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.47, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 579.0, 465.0, 522.0, 576.0, 522.0, 470.0, 522.0, 527.0, 573.0, 573.0, 576.0, 579.0, 573.0, 570.0, 519.0, 527.0, 536.0, 570.0, 573.0, 522.0, 519.0, 527.0, 522.0, 525.0, 579.0, 522.0, 513.0, 516.0, 576.0, 576.0, 519.0, 573.0, 533.0, 570.0, 522.0, 576.0, 510.0, 564.0, 530.0, 582.0, 582.0, 567.0, 570.0, 522.0, 573.0, 522.0, 579.0, 579.0, 522.0, 519.0, 519.0, 525.0, 525.0, 579.0, 576.0, 530.0, 470.0, 525.0, 405.0, 533.0, 573.0, 522.0, 579.0, 519.0, 570.0, 527.0, 573.0, 525.0, 579.0, 525.0, 522.0, 576.0, 522.0, 530.0, 576.0, 576.0, 516.0, 513.0, 579.0, 576.0, 530.0, 519.0, 525.0, 530.0, 525.0, 525.0, 576.0, 527.0, 570.0, 579.0, 522.0, 522.0, 573.0, 519.0, 576.0, 525.0, 525.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 284.0, 299.0, 280.0, 240.0, 225.0, 254.0, 268.0, 286.0, 290.0, 262.0, 260.0, 233.0, 237.0, 281.0, 241.0, 264.0, 263.0, 290.0, 283.0, 288.0, 285.0, 284.0, 292.0, 301.0, 278.0, 288.0, 285.0, 298.0, 272.0, 267.0, 252.0, 253.0, 274.0, 273.0, 263.0, 290.0, 280.0, 293.0, 280.0, 267.0, 255.0, 253.0, 266.0, 255.0, 272.0, 254.0, 268.0, 270.0, 255.0, 282.0, 297.0, 261.0, 261.0, 272.0, 241.0, 262.0, 254.0, 289.0, 287.0, 302.0, 274.0, 247.0, 272.0, 298.0, 275.0, 261.0, 272.0, 286.0, 284.0, 265.0, 257.0, 290.0, 286.0, 250.0, 260.0, 285.0, 279.0, 268.0, 262.0, 289.0, 293.0, 293.0, 289.0, 273.0, 294.0, 285.0, 285.0, 268.0, 254.0, 286.0, 287.0, 264.0, 258.0, 297.0, 282.0, 282.0, 297.0, 264.0, 258.0, 249.0, 270.0, 263.0, 256.0, 259.0, 266.0, 264.0, 261.0, 288.0, 291.0, 265.0, 311.0, 261.0, 269.0, 243.0, 227.0, 260.0, 265.0, 200.0, 205.0, 252.0, 281.0, 277.0, 296.0, 272.0, 250.0, 281.0, 298.0, 264.0, 255.0, 291.0, 279.0, 272.0, 255.0, 283.0, 290.0, 257.0, 268.0, 291.0, 288.0, 263.0, 262.0, 257.0, 265.0, 294.0, 282.0, 270.0, 252.0, 278.0, 252.0, 289.0, 287.0, 284.0, 292.0, 264.0, 252.0, 249.0, 264.0, 288.0, 291.0, 293.0, 283.0, 277.0, 253.0, 251.0, 268.0, 265.0, 260.0, 276.0, 254.0, 255.0, 270.0, 257.0, 268.0, 282.0, 294.0, 254.0, 273.0, 295.0, 275.0, 280.0, 299.0, 264.0, 258.0, 259.0, 263.0, 281.0, 292.0, 248.0, 271.0, 283.0, 293.0, 256.0, 269.0, 260.0, 265.0, 282.0, 294.0, 287.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.711399980779434, "mean_inference_ms": 1.2796816992981612, "mean_action_processing_ms": 0.13548881730847223, "mean_env_wait_ms": 0.8568613192615742, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 405.0, "episode_reward_mean": 543.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 311.0}, "policy_reward_mean": {"ppo": 271.62}, "hist_stats": {"episode_reward": [573.0, 579.0, 465.0, 522.0, 576.0, 522.0, 470.0, 522.0, 527.0, 573.0, 573.0, 576.0, 579.0, 573.0, 570.0, 519.0, 527.0, 536.0, 570.0, 573.0, 522.0, 519.0, 527.0, 522.0, 525.0, 579.0, 522.0, 513.0, 516.0, 576.0, 576.0, 519.0, 573.0, 533.0, 570.0, 522.0, 576.0, 510.0, 564.0, 530.0, 582.0, 582.0, 567.0, 570.0, 522.0, 573.0, 522.0, 579.0, 579.0, 522.0, 519.0, 519.0, 525.0, 525.0, 579.0, 576.0, 530.0, 470.0, 525.0, 405.0, 533.0, 573.0, 522.0, 579.0, 519.0, 570.0, 527.0, 573.0, 525.0, 579.0, 525.0, 522.0, 576.0, 522.0, 530.0, 576.0, 576.0, 516.0, 513.0, 579.0, 576.0, 530.0, 519.0, 525.0, 530.0, 525.0, 525.0, 576.0, 527.0, 570.0, 579.0, 522.0, 522.0, 573.0, 519.0, 576.0, 525.0, 525.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 284.0, 299.0, 280.0, 240.0, 225.0, 254.0, 268.0, 286.0, 290.0, 262.0, 260.0, 233.0, 237.0, 281.0, 241.0, 264.0, 263.0, 290.0, 283.0, 288.0, 285.0, 284.0, 292.0, 301.0, 278.0, 288.0, 285.0, 298.0, 272.0, 267.0, 252.0, 253.0, 274.0, 273.0, 263.0, 290.0, 280.0, 293.0, 280.0, 267.0, 255.0, 253.0, 266.0, 255.0, 272.0, 254.0, 268.0, 270.0, 255.0, 282.0, 297.0, 261.0, 261.0, 272.0, 241.0, 262.0, 254.0, 289.0, 287.0, 302.0, 274.0, 247.0, 272.0, 298.0, 275.0, 261.0, 272.0, 286.0, 284.0, 265.0, 257.0, 290.0, 286.0, 250.0, 260.0, 285.0, 279.0, 268.0, 262.0, 289.0, 293.0, 293.0, 289.0, 273.0, 294.0, 285.0, 285.0, 268.0, 254.0, 286.0, 287.0, 264.0, 258.0, 297.0, 282.0, 282.0, 297.0, 264.0, 258.0, 249.0, 270.0, 263.0, 256.0, 259.0, 266.0, 264.0, 261.0, 288.0, 291.0, 265.0, 311.0, 261.0, 269.0, 243.0, 227.0, 260.0, 265.0, 200.0, 205.0, 252.0, 281.0, 277.0, 296.0, 272.0, 250.0, 281.0, 298.0, 264.0, 255.0, 291.0, 279.0, 272.0, 255.0, 283.0, 290.0, 257.0, 268.0, 291.0, 288.0, 263.0, 262.0, 257.0, 265.0, 294.0, 282.0, 270.0, 252.0, 278.0, 252.0, 289.0, 287.0, 284.0, 292.0, 264.0, 252.0, 249.0, 264.0, 288.0, 291.0, 293.0, 283.0, 277.0, 253.0, 251.0, 268.0, 265.0, 260.0, 276.0, 254.0, 255.0, 270.0, 257.0, 268.0, 282.0, 294.0, 254.0, 273.0, 295.0, 275.0, 280.0, 299.0, 264.0, 258.0, 259.0, 263.0, 281.0, 292.0, 248.0, 271.0, 283.0, 293.0, 256.0, 269.0, 260.0, 265.0, 282.0, 294.0, 287.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.711399980779434, "mean_inference_ms": 1.2796816992981612, "mean_action_processing_ms": 0.13548881730847223, "mean_env_wait_ms": 0.8568613192615742, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5580800, "num_agent_steps_trained": 5580800, "num_env_steps_sampled": 2790400, "num_env_steps_trained": 2790400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2790400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5580800, "timers": {"training_iteration_time_ms": 3646.287, "learn_time_ms": 1130.626, "learn_throughput": 11321.157, "synch_weights_time_ms": 12.817}, "counters": {"num_env_steps_sampled": 2790400, "num_env_steps_trained": 2790400, "num_agent_steps_sampled": 5580800, "num_agent_steps_trained": 5580800}, "done": false, "episodes_total": 6976, "training_iteration": 218, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-40", "timestamp": 1666581280, "time_this_iter_s": 3.7043538093566895, "time_total_s": 849.9440846443176, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 849.9440846443176, "timesteps_since_restore": 0, "iterations_since_restore": 218, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.966666666666665, "ram_util_percent": 10.633333333333333}}
+{"custom_metrics": {"sparse_reward_mean": 189.6, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 169.37, "shaped_reward_min": 125, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.53, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.12, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.25, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 15.86, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.98, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 15.64, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.18, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.89, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.88, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.81, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.98, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 15.64, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.98, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 15.64, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008129839552566409, "policy_loss": -0.0010989793809130788, "vf_loss": 7.66006326675415, "vf_explained_var": 0.6172770857810974, "kl": 0.0020343316718935966, "entropy": 0.9600198268890381, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2803200, "num_env_steps_trained": 2803200, "num_agent_steps_sampled": 5606400, "num_agent_steps_trained": 5606400}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 405.0, "episode_reward_mean": 548.57, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 311.0}, "policy_reward_mean": {"ppo": 274.285}, "custom_metrics": {"sparse_reward_mean": 189.6, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 169.37, "shaped_reward_min": 125, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.53, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.12, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.25, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 15.86, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.98, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 15.64, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.18, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.89, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.88, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.81, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.98, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 15.64, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.98, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 15.64, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 533.0, 570.0, 522.0, 576.0, 510.0, 564.0, 530.0, 582.0, 582.0, 567.0, 570.0, 522.0, 573.0, 522.0, 579.0, 579.0, 522.0, 519.0, 519.0, 525.0, 525.0, 579.0, 576.0, 530.0, 470.0, 525.0, 405.0, 533.0, 573.0, 522.0, 579.0, 519.0, 570.0, 527.0, 573.0, 525.0, 579.0, 525.0, 522.0, 576.0, 522.0, 530.0, 576.0, 576.0, 516.0, 513.0, 579.0, 576.0, 530.0, 519.0, 525.0, 530.0, 525.0, 525.0, 576.0, 527.0, 570.0, 579.0, 522.0, 522.0, 573.0, 519.0, 576.0, 525.0, 525.0, 576.0, 579.0, 570.0, 570.0, 522.0, 576.0, 576.0, 530.0, 576.0, 576.0, 570.0, 573.0, 587.0, 576.0, 530.0, 522.0, 576.0, 579.0, 530.0, 573.0, 570.0, 579.0, 576.0, 573.0, 582.0, 519.0, 525.0, 530.0, 570.0, 579.0, 525.0, 536.0, 525.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [298.0, 275.0, 261.0, 272.0, 286.0, 284.0, 265.0, 257.0, 290.0, 286.0, 250.0, 260.0, 285.0, 279.0, 268.0, 262.0, 289.0, 293.0, 293.0, 289.0, 273.0, 294.0, 285.0, 285.0, 268.0, 254.0, 286.0, 287.0, 264.0, 258.0, 297.0, 282.0, 282.0, 297.0, 264.0, 258.0, 249.0, 270.0, 263.0, 256.0, 259.0, 266.0, 264.0, 261.0, 288.0, 291.0, 265.0, 311.0, 261.0, 269.0, 243.0, 227.0, 260.0, 265.0, 200.0, 205.0, 252.0, 281.0, 277.0, 296.0, 272.0, 250.0, 281.0, 298.0, 264.0, 255.0, 291.0, 279.0, 272.0, 255.0, 283.0, 290.0, 257.0, 268.0, 291.0, 288.0, 263.0, 262.0, 257.0, 265.0, 294.0, 282.0, 270.0, 252.0, 278.0, 252.0, 289.0, 287.0, 284.0, 292.0, 264.0, 252.0, 249.0, 264.0, 288.0, 291.0, 293.0, 283.0, 277.0, 253.0, 251.0, 268.0, 265.0, 260.0, 276.0, 254.0, 255.0, 270.0, 257.0, 268.0, 282.0, 294.0, 254.0, 273.0, 295.0, 275.0, 280.0, 299.0, 264.0, 258.0, 259.0, 263.0, 281.0, 292.0, 248.0, 271.0, 283.0, 293.0, 256.0, 269.0, 260.0, 265.0, 282.0, 294.0, 287.0, 292.0, 277.0, 293.0, 274.0, 296.0, 269.0, 253.0, 279.0, 297.0, 295.0, 281.0, 269.0, 261.0, 289.0, 287.0, 289.0, 287.0, 278.0, 292.0, 269.0, 304.0, 293.0, 294.0, 286.0, 290.0, 269.0, 261.0, 262.0, 260.0, 301.0, 275.0, 282.0, 297.0, 265.0, 265.0, 283.0, 290.0, 278.0, 292.0, 296.0, 283.0, 298.0, 278.0, 286.0, 287.0, 283.0, 299.0, 257.0, 262.0, 266.0, 259.0, 267.0, 263.0, 278.0, 292.0, 289.0, 290.0, 251.0, 274.0, 275.0, 261.0, 259.0, 266.0, 292.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7112164400179618, "mean_inference_ms": 1.2792621725564641, "mean_action_processing_ms": 0.13547534800810293, "mean_env_wait_ms": 0.8566341596312739, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 405.0, "episode_reward_mean": 548.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 311.0}, "policy_reward_mean": {"ppo": 274.285}, "hist_stats": {"episode_reward": [573.0, 533.0, 570.0, 522.0, 576.0, 510.0, 564.0, 530.0, 582.0, 582.0, 567.0, 570.0, 522.0, 573.0, 522.0, 579.0, 579.0, 522.0, 519.0, 519.0, 525.0, 525.0, 579.0, 576.0, 530.0, 470.0, 525.0, 405.0, 533.0, 573.0, 522.0, 579.0, 519.0, 570.0, 527.0, 573.0, 525.0, 579.0, 525.0, 522.0, 576.0, 522.0, 530.0, 576.0, 576.0, 516.0, 513.0, 579.0, 576.0, 530.0, 519.0, 525.0, 530.0, 525.0, 525.0, 576.0, 527.0, 570.0, 579.0, 522.0, 522.0, 573.0, 519.0, 576.0, 525.0, 525.0, 576.0, 579.0, 570.0, 570.0, 522.0, 576.0, 576.0, 530.0, 576.0, 576.0, 570.0, 573.0, 587.0, 576.0, 530.0, 522.0, 576.0, 579.0, 530.0, 573.0, 570.0, 579.0, 576.0, 573.0, 582.0, 519.0, 525.0, 530.0, 570.0, 579.0, 525.0, 536.0, 525.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [298.0, 275.0, 261.0, 272.0, 286.0, 284.0, 265.0, 257.0, 290.0, 286.0, 250.0, 260.0, 285.0, 279.0, 268.0, 262.0, 289.0, 293.0, 293.0, 289.0, 273.0, 294.0, 285.0, 285.0, 268.0, 254.0, 286.0, 287.0, 264.0, 258.0, 297.0, 282.0, 282.0, 297.0, 264.0, 258.0, 249.0, 270.0, 263.0, 256.0, 259.0, 266.0, 264.0, 261.0, 288.0, 291.0, 265.0, 311.0, 261.0, 269.0, 243.0, 227.0, 260.0, 265.0, 200.0, 205.0, 252.0, 281.0, 277.0, 296.0, 272.0, 250.0, 281.0, 298.0, 264.0, 255.0, 291.0, 279.0, 272.0, 255.0, 283.0, 290.0, 257.0, 268.0, 291.0, 288.0, 263.0, 262.0, 257.0, 265.0, 294.0, 282.0, 270.0, 252.0, 278.0, 252.0, 289.0, 287.0, 284.0, 292.0, 264.0, 252.0, 249.0, 264.0, 288.0, 291.0, 293.0, 283.0, 277.0, 253.0, 251.0, 268.0, 265.0, 260.0, 276.0, 254.0, 255.0, 270.0, 257.0, 268.0, 282.0, 294.0, 254.0, 273.0, 295.0, 275.0, 280.0, 299.0, 264.0, 258.0, 259.0, 263.0, 281.0, 292.0, 248.0, 271.0, 283.0, 293.0, 256.0, 269.0, 260.0, 265.0, 282.0, 294.0, 287.0, 292.0, 277.0, 293.0, 274.0, 296.0, 269.0, 253.0, 279.0, 297.0, 295.0, 281.0, 269.0, 261.0, 289.0, 287.0, 289.0, 287.0, 278.0, 292.0, 269.0, 304.0, 293.0, 294.0, 286.0, 290.0, 269.0, 261.0, 262.0, 260.0, 301.0, 275.0, 282.0, 297.0, 265.0, 265.0, 283.0, 290.0, 278.0, 292.0, 296.0, 283.0, 298.0, 278.0, 286.0, 287.0, 283.0, 299.0, 257.0, 262.0, 266.0, 259.0, 267.0, 263.0, 278.0, 292.0, 289.0, 290.0, 251.0, 274.0, 275.0, 261.0, 259.0, 266.0, 292.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7112164400179618, "mean_inference_ms": 1.2792621725564641, "mean_action_processing_ms": 0.13547534800810293, "mean_env_wait_ms": 0.8566341596312739, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5606400, "num_agent_steps_trained": 5606400, "num_env_steps_sampled": 2803200, "num_env_steps_trained": 2803200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2803200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5606400, "timers": {"training_iteration_time_ms": 3625.496, "learn_time_ms": 1119.684, "learn_throughput": 11431.802, "synch_weights_time_ms": 12.637}, "counters": {"num_env_steps_sampled": 2803200, "num_env_steps_trained": 2803200, "num_agent_steps_sampled": 5606400, "num_agent_steps_trained": 5606400}, "done": false, "episodes_total": 7008, "training_iteration": 219, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-44", "timestamp": 1666581284, "time_this_iter_s": 3.5609569549560547, "time_total_s": 853.5050415992737, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 853.5050415992737, "timesteps_since_restore": 0, "iterations_since_restore": 219, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.5, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 191.0, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 170.14, "shaped_reward_min": 148, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.34, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.42, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.06, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.1, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.79, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 15.93, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.49, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.0, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.15, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.79, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 15.93, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.79, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 15.93, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00014584301970899105, "policy_loss": -0.00043676793575286865, "vf_loss": 7.6577043533325195, "vf_explained_var": 0.6050065755844116, "kl": 0.0021726060658693314, "entropy": 0.9496897459030151, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2816000, "num_env_steps_trained": 2816000, "num_agent_steps_sampled": 5632000, "num_agent_steps_trained": 5632000}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 468.0, "episode_reward_mean": 552.14, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 233.0}, "policy_reward_max": {"ppo": 304.0}, "policy_reward_mean": {"ppo": 276.07}, "custom_metrics": {"sparse_reward_mean": 191.0, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 170.14, "shaped_reward_min": 148, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.34, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.42, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.06, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.1, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.79, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 15.93, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.49, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.0, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.15, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.79, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 15.93, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.79, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 15.93, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 570.0, 527.0, 573.0, 525.0, 579.0, 525.0, 522.0, 576.0, 522.0, 530.0, 576.0, 576.0, 516.0, 513.0, 579.0, 576.0, 530.0, 519.0, 525.0, 530.0, 525.0, 525.0, 576.0, 527.0, 570.0, 579.0, 522.0, 522.0, 573.0, 519.0, 576.0, 525.0, 525.0, 576.0, 579.0, 570.0, 570.0, 522.0, 576.0, 576.0, 530.0, 576.0, 576.0, 570.0, 573.0, 587.0, 576.0, 530.0, 522.0, 576.0, 579.0, 530.0, 573.0, 570.0, 579.0, 576.0, 573.0, 582.0, 519.0, 525.0, 530.0, 570.0, 579.0, 525.0, 536.0, 525.0, 573.0, 530.0, 576.0, 579.0, 570.0, 579.0, 522.0, 468.0, 524.0, 513.0, 530.0, 570.0, 579.0, 525.0, 579.0, 530.0, 573.0, 570.0, 573.0, 573.0, 573.0, 582.0, 573.0, 579.0, 573.0, 525.0, 573.0, 570.0, 527.0, 573.0, 510.0, 576.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 255.0, 291.0, 279.0, 272.0, 255.0, 283.0, 290.0, 257.0, 268.0, 291.0, 288.0, 263.0, 262.0, 257.0, 265.0, 294.0, 282.0, 270.0, 252.0, 278.0, 252.0, 289.0, 287.0, 284.0, 292.0, 264.0, 252.0, 249.0, 264.0, 288.0, 291.0, 293.0, 283.0, 277.0, 253.0, 251.0, 268.0, 265.0, 260.0, 276.0, 254.0, 255.0, 270.0, 257.0, 268.0, 282.0, 294.0, 254.0, 273.0, 295.0, 275.0, 280.0, 299.0, 264.0, 258.0, 259.0, 263.0, 281.0, 292.0, 248.0, 271.0, 283.0, 293.0, 256.0, 269.0, 260.0, 265.0, 282.0, 294.0, 287.0, 292.0, 277.0, 293.0, 274.0, 296.0, 269.0, 253.0, 279.0, 297.0, 295.0, 281.0, 269.0, 261.0, 289.0, 287.0, 289.0, 287.0, 278.0, 292.0, 269.0, 304.0, 293.0, 294.0, 286.0, 290.0, 269.0, 261.0, 262.0, 260.0, 301.0, 275.0, 282.0, 297.0, 265.0, 265.0, 283.0, 290.0, 278.0, 292.0, 296.0, 283.0, 298.0, 278.0, 286.0, 287.0, 283.0, 299.0, 257.0, 262.0, 266.0, 259.0, 267.0, 263.0, 278.0, 292.0, 289.0, 290.0, 251.0, 274.0, 275.0, 261.0, 259.0, 266.0, 292.0, 281.0, 269.0, 261.0, 298.0, 278.0, 289.0, 290.0, 288.0, 282.0, 300.0, 279.0, 267.0, 255.0, 235.0, 233.0, 274.0, 250.0, 273.0, 240.0, 258.0, 272.0, 290.0, 280.0, 281.0, 298.0, 252.0, 273.0, 299.0, 280.0, 264.0, 266.0, 293.0, 280.0, 293.0, 277.0, 273.0, 300.0, 288.0, 285.0, 281.0, 292.0, 289.0, 293.0, 284.0, 289.0, 293.0, 286.0, 289.0, 284.0, 273.0, 252.0, 282.0, 291.0, 280.0, 290.0, 249.0, 278.0, 293.0, 280.0, 257.0, 253.0, 304.0, 272.0, 266.0, 250.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.711028381075877, "mean_inference_ms": 1.2788274392677903, "mean_action_processing_ms": 0.13545884864442959, "mean_env_wait_ms": 0.8563895258714708, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 468.0, "episode_reward_mean": 552.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 233.0}, "policy_reward_max": {"ppo": 304.0}, "policy_reward_mean": {"ppo": 276.07}, "hist_stats": {"episode_reward": [519.0, 570.0, 527.0, 573.0, 525.0, 579.0, 525.0, 522.0, 576.0, 522.0, 530.0, 576.0, 576.0, 516.0, 513.0, 579.0, 576.0, 530.0, 519.0, 525.0, 530.0, 525.0, 525.0, 576.0, 527.0, 570.0, 579.0, 522.0, 522.0, 573.0, 519.0, 576.0, 525.0, 525.0, 576.0, 579.0, 570.0, 570.0, 522.0, 576.0, 576.0, 530.0, 576.0, 576.0, 570.0, 573.0, 587.0, 576.0, 530.0, 522.0, 576.0, 579.0, 530.0, 573.0, 570.0, 579.0, 576.0, 573.0, 582.0, 519.0, 525.0, 530.0, 570.0, 579.0, 525.0, 536.0, 525.0, 573.0, 530.0, 576.0, 579.0, 570.0, 579.0, 522.0, 468.0, 524.0, 513.0, 530.0, 570.0, 579.0, 525.0, 579.0, 530.0, 573.0, 570.0, 573.0, 573.0, 573.0, 582.0, 573.0, 579.0, 573.0, 525.0, 573.0, 570.0, 527.0, 573.0, 510.0, 576.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 255.0, 291.0, 279.0, 272.0, 255.0, 283.0, 290.0, 257.0, 268.0, 291.0, 288.0, 263.0, 262.0, 257.0, 265.0, 294.0, 282.0, 270.0, 252.0, 278.0, 252.0, 289.0, 287.0, 284.0, 292.0, 264.0, 252.0, 249.0, 264.0, 288.0, 291.0, 293.0, 283.0, 277.0, 253.0, 251.0, 268.0, 265.0, 260.0, 276.0, 254.0, 255.0, 270.0, 257.0, 268.0, 282.0, 294.0, 254.0, 273.0, 295.0, 275.0, 280.0, 299.0, 264.0, 258.0, 259.0, 263.0, 281.0, 292.0, 248.0, 271.0, 283.0, 293.0, 256.0, 269.0, 260.0, 265.0, 282.0, 294.0, 287.0, 292.0, 277.0, 293.0, 274.0, 296.0, 269.0, 253.0, 279.0, 297.0, 295.0, 281.0, 269.0, 261.0, 289.0, 287.0, 289.0, 287.0, 278.0, 292.0, 269.0, 304.0, 293.0, 294.0, 286.0, 290.0, 269.0, 261.0, 262.0, 260.0, 301.0, 275.0, 282.0, 297.0, 265.0, 265.0, 283.0, 290.0, 278.0, 292.0, 296.0, 283.0, 298.0, 278.0, 286.0, 287.0, 283.0, 299.0, 257.0, 262.0, 266.0, 259.0, 267.0, 263.0, 278.0, 292.0, 289.0, 290.0, 251.0, 274.0, 275.0, 261.0, 259.0, 266.0, 292.0, 281.0, 269.0, 261.0, 298.0, 278.0, 289.0, 290.0, 288.0, 282.0, 300.0, 279.0, 267.0, 255.0, 235.0, 233.0, 274.0, 250.0, 273.0, 240.0, 258.0, 272.0, 290.0, 280.0, 281.0, 298.0, 252.0, 273.0, 299.0, 280.0, 264.0, 266.0, 293.0, 280.0, 293.0, 277.0, 273.0, 300.0, 288.0, 285.0, 281.0, 292.0, 289.0, 293.0, 284.0, 289.0, 293.0, 286.0, 289.0, 284.0, 273.0, 252.0, 282.0, 291.0, 280.0, 290.0, 249.0, 278.0, 293.0, 280.0, 257.0, 253.0, 304.0, 272.0, 266.0, 250.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.711028381075877, "mean_inference_ms": 1.2788274392677903, "mean_action_processing_ms": 0.13545884864442959, "mean_env_wait_ms": 0.8563895258714708, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5632000, "num_agent_steps_trained": 5632000, "num_env_steps_sampled": 2816000, "num_env_steps_trained": 2816000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2816000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5632000, "timers": {"training_iteration_time_ms": 3621.065, "learn_time_ms": 1118.826, "learn_throughput": 11440.564, "synch_weights_time_ms": 11.987}, "counters": {"num_env_steps_sampled": 2816000, "num_env_steps_trained": 2816000, "num_agent_steps_sampled": 5632000, "num_agent_steps_trained": 5632000}, "done": false, "episodes_total": 7040, "training_iteration": 220, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-47", "timestamp": 1666581287, "time_this_iter_s": 3.691047430038452, "time_total_s": 857.1960890293121, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 857.1960890293121, "timesteps_since_restore": 0, "iterations_since_restore": 220, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.566666666666666, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 192.2, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.54, "shaped_reward_min": 148, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.09, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.86, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.82, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.53, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.62, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.34, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.89, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.58, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.62, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.34, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.62, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.34, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0024008420296013355, "policy_loss": 0.0021112796384841204, "vf_loss": 7.663437366485596, "vf_explained_var": 0.6173580288887024, "kl": 0.0023920456878840923, "entropy": 0.9535607099533081, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2828800, "num_env_steps_trained": 2828800, "num_agent_steps_sampled": 5657600, "num_agent_steps_trained": 5657600}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 468.0, "episode_reward_mean": 555.94, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 233.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 277.97}, "custom_metrics": {"sparse_reward_mean": 192.2, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.54, "shaped_reward_min": 148, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.09, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.86, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.82, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.53, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.62, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.34, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.89, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.58, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.62, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.34, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.62, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.34, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 525.0, 576.0, 579.0, 570.0, 570.0, 522.0, 576.0, 576.0, 530.0, 576.0, 576.0, 570.0, 573.0, 587.0, 576.0, 530.0, 522.0, 576.0, 579.0, 530.0, 573.0, 570.0, 579.0, 576.0, 573.0, 582.0, 519.0, 525.0, 530.0, 570.0, 579.0, 525.0, 536.0, 525.0, 573.0, 530.0, 576.0, 579.0, 570.0, 579.0, 522.0, 468.0, 524.0, 513.0, 530.0, 570.0, 579.0, 525.0, 579.0, 530.0, 573.0, 570.0, 573.0, 573.0, 573.0, 582.0, 573.0, 579.0, 573.0, 525.0, 573.0, 570.0, 527.0, 573.0, 510.0, 576.0, 516.0, 579.0, 582.0, 576.0, 576.0, 527.0, 533.0, 573.0, 530.0, 516.0, 573.0, 582.0, 530.0, 576.0, 582.0, 522.0, 579.0, 579.0, 576.0, 525.0, 582.0, 573.0, 570.0, 579.0, 530.0, 525.0, 582.0, 576.0, 579.0, 530.0, 522.0, 522.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [256.0, 269.0, 260.0, 265.0, 282.0, 294.0, 287.0, 292.0, 277.0, 293.0, 274.0, 296.0, 269.0, 253.0, 279.0, 297.0, 295.0, 281.0, 269.0, 261.0, 289.0, 287.0, 289.0, 287.0, 278.0, 292.0, 269.0, 304.0, 293.0, 294.0, 286.0, 290.0, 269.0, 261.0, 262.0, 260.0, 301.0, 275.0, 282.0, 297.0, 265.0, 265.0, 283.0, 290.0, 278.0, 292.0, 296.0, 283.0, 298.0, 278.0, 286.0, 287.0, 283.0, 299.0, 257.0, 262.0, 266.0, 259.0, 267.0, 263.0, 278.0, 292.0, 289.0, 290.0, 251.0, 274.0, 275.0, 261.0, 259.0, 266.0, 292.0, 281.0, 269.0, 261.0, 298.0, 278.0, 289.0, 290.0, 288.0, 282.0, 300.0, 279.0, 267.0, 255.0, 235.0, 233.0, 274.0, 250.0, 273.0, 240.0, 258.0, 272.0, 290.0, 280.0, 281.0, 298.0, 252.0, 273.0, 299.0, 280.0, 264.0, 266.0, 293.0, 280.0, 293.0, 277.0, 273.0, 300.0, 288.0, 285.0, 281.0, 292.0, 289.0, 293.0, 284.0, 289.0, 293.0, 286.0, 289.0, 284.0, 273.0, 252.0, 282.0, 291.0, 280.0, 290.0, 249.0, 278.0, 293.0, 280.0, 257.0, 253.0, 304.0, 272.0, 266.0, 250.0, 282.0, 297.0, 302.0, 280.0, 289.0, 287.0, 287.0, 289.0, 268.0, 259.0, 275.0, 258.0, 294.0, 279.0, 278.0, 252.0, 250.0, 266.0, 295.0, 278.0, 288.0, 294.0, 269.0, 261.0, 282.0, 294.0, 283.0, 299.0, 258.0, 264.0, 286.0, 293.0, 291.0, 288.0, 271.0, 305.0, 256.0, 269.0, 299.0, 283.0, 288.0, 285.0, 299.0, 271.0, 288.0, 291.0, 265.0, 265.0, 264.0, 261.0, 304.0, 278.0, 281.0, 295.0, 290.0, 289.0, 269.0, 261.0, 256.0, 266.0, 250.0, 272.0, 258.0, 258.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7108389704831163, "mean_inference_ms": 1.278375907898961, "mean_action_processing_ms": 0.13543735468459694, "mean_env_wait_ms": 0.8561217690529012, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 468.0, "episode_reward_mean": 555.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 233.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 277.97}, "hist_stats": {"episode_reward": [525.0, 525.0, 576.0, 579.0, 570.0, 570.0, 522.0, 576.0, 576.0, 530.0, 576.0, 576.0, 570.0, 573.0, 587.0, 576.0, 530.0, 522.0, 576.0, 579.0, 530.0, 573.0, 570.0, 579.0, 576.0, 573.0, 582.0, 519.0, 525.0, 530.0, 570.0, 579.0, 525.0, 536.0, 525.0, 573.0, 530.0, 576.0, 579.0, 570.0, 579.0, 522.0, 468.0, 524.0, 513.0, 530.0, 570.0, 579.0, 525.0, 579.0, 530.0, 573.0, 570.0, 573.0, 573.0, 573.0, 582.0, 573.0, 579.0, 573.0, 525.0, 573.0, 570.0, 527.0, 573.0, 510.0, 576.0, 516.0, 579.0, 582.0, 576.0, 576.0, 527.0, 533.0, 573.0, 530.0, 516.0, 573.0, 582.0, 530.0, 576.0, 582.0, 522.0, 579.0, 579.0, 576.0, 525.0, 582.0, 573.0, 570.0, 579.0, 530.0, 525.0, 582.0, 576.0, 579.0, 530.0, 522.0, 522.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [256.0, 269.0, 260.0, 265.0, 282.0, 294.0, 287.0, 292.0, 277.0, 293.0, 274.0, 296.0, 269.0, 253.0, 279.0, 297.0, 295.0, 281.0, 269.0, 261.0, 289.0, 287.0, 289.0, 287.0, 278.0, 292.0, 269.0, 304.0, 293.0, 294.0, 286.0, 290.0, 269.0, 261.0, 262.0, 260.0, 301.0, 275.0, 282.0, 297.0, 265.0, 265.0, 283.0, 290.0, 278.0, 292.0, 296.0, 283.0, 298.0, 278.0, 286.0, 287.0, 283.0, 299.0, 257.0, 262.0, 266.0, 259.0, 267.0, 263.0, 278.0, 292.0, 289.0, 290.0, 251.0, 274.0, 275.0, 261.0, 259.0, 266.0, 292.0, 281.0, 269.0, 261.0, 298.0, 278.0, 289.0, 290.0, 288.0, 282.0, 300.0, 279.0, 267.0, 255.0, 235.0, 233.0, 274.0, 250.0, 273.0, 240.0, 258.0, 272.0, 290.0, 280.0, 281.0, 298.0, 252.0, 273.0, 299.0, 280.0, 264.0, 266.0, 293.0, 280.0, 293.0, 277.0, 273.0, 300.0, 288.0, 285.0, 281.0, 292.0, 289.0, 293.0, 284.0, 289.0, 293.0, 286.0, 289.0, 284.0, 273.0, 252.0, 282.0, 291.0, 280.0, 290.0, 249.0, 278.0, 293.0, 280.0, 257.0, 253.0, 304.0, 272.0, 266.0, 250.0, 282.0, 297.0, 302.0, 280.0, 289.0, 287.0, 287.0, 289.0, 268.0, 259.0, 275.0, 258.0, 294.0, 279.0, 278.0, 252.0, 250.0, 266.0, 295.0, 278.0, 288.0, 294.0, 269.0, 261.0, 282.0, 294.0, 283.0, 299.0, 258.0, 264.0, 286.0, 293.0, 291.0, 288.0, 271.0, 305.0, 256.0, 269.0, 299.0, 283.0, 288.0, 285.0, 299.0, 271.0, 288.0, 291.0, 265.0, 265.0, 264.0, 261.0, 304.0, 278.0, 281.0, 295.0, 290.0, 289.0, 269.0, 261.0, 256.0, 266.0, 250.0, 272.0, 258.0, 258.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7108389704831163, "mean_inference_ms": 1.278375907898961, "mean_action_processing_ms": 0.13543735468459694, "mean_env_wait_ms": 0.8561217690529012, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5657600, "num_agent_steps_trained": 5657600, "num_env_steps_sampled": 2828800, "num_env_steps_trained": 2828800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2828800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5657600, "timers": {"training_iteration_time_ms": 3611.477, "learn_time_ms": 1112.851, "learn_throughput": 11501.993, "synch_weights_time_ms": 11.026}, "counters": {"num_env_steps_sampled": 2828800, "num_env_steps_trained": 2828800, "num_agent_steps_sampled": 5657600, "num_agent_steps_trained": 5657600}, "done": false, "episodes_total": 7072, "training_iteration": 221, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-51", "timestamp": 1666581291, "time_this_iter_s": 3.676072835922241, "time_total_s": 860.8721618652344, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 860.8721618652344, "timesteps_since_restore": 0, "iterations_since_restore": 221, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.700000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 192.0, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.0, "shaped_reward_min": 148, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.69, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.26, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.39, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.93, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.17, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.72, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.68, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.38, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.44, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.34, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.37, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.26, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.17, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.72, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.17, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.72, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011982894502580166, "policy_loss": -0.0014831130392849445, "vf_loss": 7.5979084968566895, "vf_explained_var": 0.6064113974571228, "kl": 0.0021043620072305202, "entropy": 0.9499344229698181, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2841600, "num_env_steps_trained": 2841600, "num_agent_steps_sampled": 5683200, "num_agent_steps_trained": 5683200}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 468.0, "episode_reward_mean": 555.0, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 233.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 277.5}, "custom_metrics": {"sparse_reward_mean": 192.0, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.0, "shaped_reward_min": 148, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.69, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.26, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.39, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.93, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.17, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.72, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.68, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.38, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.44, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.34, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.37, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.26, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.17, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.72, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.17, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.72, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 536.0, 525.0, 573.0, 530.0, 576.0, 579.0, 570.0, 579.0, 522.0, 468.0, 524.0, 513.0, 530.0, 570.0, 579.0, 525.0, 579.0, 530.0, 573.0, 570.0, 573.0, 573.0, 573.0, 582.0, 573.0, 579.0, 573.0, 525.0, 573.0, 570.0, 527.0, 573.0, 510.0, 576.0, 516.0, 579.0, 582.0, 576.0, 576.0, 527.0, 533.0, 573.0, 530.0, 516.0, 573.0, 582.0, 530.0, 576.0, 582.0, 522.0, 579.0, 579.0, 576.0, 525.0, 582.0, 573.0, 570.0, 579.0, 530.0, 525.0, 582.0, 576.0, 579.0, 530.0, 522.0, 522.0, 516.0, 525.0, 570.0, 573.0, 582.0, 576.0, 570.0, 521.0, 525.0, 576.0, 582.0, 516.0, 519.0, 522.0, 573.0, 573.0, 525.0, 579.0, 525.0, 533.0, 581.0, 570.0, 573.0, 579.0, 570.0, 576.0, 579.0, 573.0, 573.0, 519.0, 522.0, 576.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [251.0, 274.0, 275.0, 261.0, 259.0, 266.0, 292.0, 281.0, 269.0, 261.0, 298.0, 278.0, 289.0, 290.0, 288.0, 282.0, 300.0, 279.0, 267.0, 255.0, 235.0, 233.0, 274.0, 250.0, 273.0, 240.0, 258.0, 272.0, 290.0, 280.0, 281.0, 298.0, 252.0, 273.0, 299.0, 280.0, 264.0, 266.0, 293.0, 280.0, 293.0, 277.0, 273.0, 300.0, 288.0, 285.0, 281.0, 292.0, 289.0, 293.0, 284.0, 289.0, 293.0, 286.0, 289.0, 284.0, 273.0, 252.0, 282.0, 291.0, 280.0, 290.0, 249.0, 278.0, 293.0, 280.0, 257.0, 253.0, 304.0, 272.0, 266.0, 250.0, 282.0, 297.0, 302.0, 280.0, 289.0, 287.0, 287.0, 289.0, 268.0, 259.0, 275.0, 258.0, 294.0, 279.0, 278.0, 252.0, 250.0, 266.0, 295.0, 278.0, 288.0, 294.0, 269.0, 261.0, 282.0, 294.0, 283.0, 299.0, 258.0, 264.0, 286.0, 293.0, 291.0, 288.0, 271.0, 305.0, 256.0, 269.0, 299.0, 283.0, 288.0, 285.0, 299.0, 271.0, 288.0, 291.0, 265.0, 265.0, 264.0, 261.0, 304.0, 278.0, 281.0, 295.0, 290.0, 289.0, 269.0, 261.0, 256.0, 266.0, 250.0, 272.0, 258.0, 258.0, 267.0, 258.0, 280.0, 290.0, 294.0, 279.0, 284.0, 298.0, 300.0, 276.0, 269.0, 301.0, 262.0, 259.0, 245.0, 280.0, 284.0, 292.0, 303.0, 279.0, 257.0, 259.0, 261.0, 258.0, 268.0, 254.0, 281.0, 292.0, 285.0, 288.0, 248.0, 277.0, 299.0, 280.0, 254.0, 271.0, 261.0, 272.0, 282.0, 299.0, 293.0, 277.0, 286.0, 287.0, 286.0, 293.0, 287.0, 283.0, 280.0, 296.0, 291.0, 288.0, 300.0, 273.0, 290.0, 283.0, 256.0, 263.0, 268.0, 254.0, 280.0, 296.0, 298.0, 272.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7106626119466375, "mean_inference_ms": 1.278093081648415, "mean_action_processing_ms": 0.13541617822810648, "mean_env_wait_ms": 0.8560243668252465, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 468.0, "episode_reward_mean": 555.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 233.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 277.5}, "hist_stats": {"episode_reward": [525.0, 536.0, 525.0, 573.0, 530.0, 576.0, 579.0, 570.0, 579.0, 522.0, 468.0, 524.0, 513.0, 530.0, 570.0, 579.0, 525.0, 579.0, 530.0, 573.0, 570.0, 573.0, 573.0, 573.0, 582.0, 573.0, 579.0, 573.0, 525.0, 573.0, 570.0, 527.0, 573.0, 510.0, 576.0, 516.0, 579.0, 582.0, 576.0, 576.0, 527.0, 533.0, 573.0, 530.0, 516.0, 573.0, 582.0, 530.0, 576.0, 582.0, 522.0, 579.0, 579.0, 576.0, 525.0, 582.0, 573.0, 570.0, 579.0, 530.0, 525.0, 582.0, 576.0, 579.0, 530.0, 522.0, 522.0, 516.0, 525.0, 570.0, 573.0, 582.0, 576.0, 570.0, 521.0, 525.0, 576.0, 582.0, 516.0, 519.0, 522.0, 573.0, 573.0, 525.0, 579.0, 525.0, 533.0, 581.0, 570.0, 573.0, 579.0, 570.0, 576.0, 579.0, 573.0, 573.0, 519.0, 522.0, 576.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [251.0, 274.0, 275.0, 261.0, 259.0, 266.0, 292.0, 281.0, 269.0, 261.0, 298.0, 278.0, 289.0, 290.0, 288.0, 282.0, 300.0, 279.0, 267.0, 255.0, 235.0, 233.0, 274.0, 250.0, 273.0, 240.0, 258.0, 272.0, 290.0, 280.0, 281.0, 298.0, 252.0, 273.0, 299.0, 280.0, 264.0, 266.0, 293.0, 280.0, 293.0, 277.0, 273.0, 300.0, 288.0, 285.0, 281.0, 292.0, 289.0, 293.0, 284.0, 289.0, 293.0, 286.0, 289.0, 284.0, 273.0, 252.0, 282.0, 291.0, 280.0, 290.0, 249.0, 278.0, 293.0, 280.0, 257.0, 253.0, 304.0, 272.0, 266.0, 250.0, 282.0, 297.0, 302.0, 280.0, 289.0, 287.0, 287.0, 289.0, 268.0, 259.0, 275.0, 258.0, 294.0, 279.0, 278.0, 252.0, 250.0, 266.0, 295.0, 278.0, 288.0, 294.0, 269.0, 261.0, 282.0, 294.0, 283.0, 299.0, 258.0, 264.0, 286.0, 293.0, 291.0, 288.0, 271.0, 305.0, 256.0, 269.0, 299.0, 283.0, 288.0, 285.0, 299.0, 271.0, 288.0, 291.0, 265.0, 265.0, 264.0, 261.0, 304.0, 278.0, 281.0, 295.0, 290.0, 289.0, 269.0, 261.0, 256.0, 266.0, 250.0, 272.0, 258.0, 258.0, 267.0, 258.0, 280.0, 290.0, 294.0, 279.0, 284.0, 298.0, 300.0, 276.0, 269.0, 301.0, 262.0, 259.0, 245.0, 280.0, 284.0, 292.0, 303.0, 279.0, 257.0, 259.0, 261.0, 258.0, 268.0, 254.0, 281.0, 292.0, 285.0, 288.0, 248.0, 277.0, 299.0, 280.0, 254.0, 271.0, 261.0, 272.0, 282.0, 299.0, 293.0, 277.0, 286.0, 287.0, 286.0, 293.0, 287.0, 283.0, 280.0, 296.0, 291.0, 288.0, 300.0, 273.0, 290.0, 283.0, 256.0, 263.0, 268.0, 254.0, 280.0, 296.0, 298.0, 272.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7106626119466375, "mean_inference_ms": 1.278093081648415, "mean_action_processing_ms": 0.13541617822810648, "mean_env_wait_ms": 0.8560243668252465, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5683200, "num_agent_steps_trained": 5683200, "num_env_steps_sampled": 2841600, "num_env_steps_trained": 2841600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2841600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5683200, "timers": {"training_iteration_time_ms": 3639.618, "learn_time_ms": 1127.503, "learn_throughput": 11352.518, "synch_weights_time_ms": 11.232}, "counters": {"num_env_steps_sampled": 2841600, "num_env_steps_trained": 2841600, "num_agent_steps_sampled": 5683200, "num_agent_steps_trained": 5683200}, "done": false, "episodes_total": 7104, "training_iteration": 222, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-55", "timestamp": 1666581295, "time_this_iter_s": 4.044084072113037, "time_total_s": 864.9162459373474, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 864.9162459373474, "timesteps_since_restore": 0, "iterations_since_restore": 222, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.799999999999997, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 193.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 172.23, "shaped_reward_min": 150, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.93, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.18, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.58, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.88, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.46, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.68, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.77, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.55, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.4, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.43, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.33, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.37, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.46, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.68, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.46, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.68, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00024267204571515322, "policy_loss": -0.0005397425265982747, "vf_loss": 7.663079261779785, "vf_explained_var": 0.596228837966919, "kl": 0.00204327329993248, "entropy": 0.9384721517562866, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2854400, "num_env_steps_trained": 2854400, "num_agent_steps_sampled": 5708800, "num_agent_steps_trained": 5708800}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 510.0, "episode_reward_mean": 559.83, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 318.0}, "policy_reward_mean": {"ppo": 279.915}, "custom_metrics": {"sparse_reward_mean": 193.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 172.23, "shaped_reward_min": 150, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.93, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.18, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.58, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.88, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.46, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.68, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.77, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.55, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.4, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.43, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.33, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.37, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.46, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.68, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.46, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.68, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 510.0, 576.0, 516.0, 579.0, 582.0, 576.0, 576.0, 527.0, 533.0, 573.0, 530.0, 516.0, 573.0, 582.0, 530.0, 576.0, 582.0, 522.0, 579.0, 579.0, 576.0, 525.0, 582.0, 573.0, 570.0, 579.0, 530.0, 525.0, 582.0, 576.0, 579.0, 530.0, 522.0, 522.0, 516.0, 525.0, 570.0, 573.0, 582.0, 576.0, 570.0, 521.0, 525.0, 576.0, 582.0, 516.0, 519.0, 522.0, 573.0, 573.0, 525.0, 579.0, 525.0, 533.0, 581.0, 570.0, 573.0, 579.0, 570.0, 576.0, 579.0, 573.0, 573.0, 519.0, 522.0, 576.0, 570.0, 579.0, 582.0, 579.0, 573.0, 522.0, 573.0, 561.0, 525.0, 573.0, 525.0, 579.0, 579.0, 576.0, 573.0, 573.0, 627.0, 570.0, 581.0, 513.0, 527.0, 582.0, 533.0, 573.0, 573.0, 573.0, 582.0, 576.0, 579.0, 579.0, 582.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 280.0, 257.0, 253.0, 304.0, 272.0, 266.0, 250.0, 282.0, 297.0, 302.0, 280.0, 289.0, 287.0, 287.0, 289.0, 268.0, 259.0, 275.0, 258.0, 294.0, 279.0, 278.0, 252.0, 250.0, 266.0, 295.0, 278.0, 288.0, 294.0, 269.0, 261.0, 282.0, 294.0, 283.0, 299.0, 258.0, 264.0, 286.0, 293.0, 291.0, 288.0, 271.0, 305.0, 256.0, 269.0, 299.0, 283.0, 288.0, 285.0, 299.0, 271.0, 288.0, 291.0, 265.0, 265.0, 264.0, 261.0, 304.0, 278.0, 281.0, 295.0, 290.0, 289.0, 269.0, 261.0, 256.0, 266.0, 250.0, 272.0, 258.0, 258.0, 267.0, 258.0, 280.0, 290.0, 294.0, 279.0, 284.0, 298.0, 300.0, 276.0, 269.0, 301.0, 262.0, 259.0, 245.0, 280.0, 284.0, 292.0, 303.0, 279.0, 257.0, 259.0, 261.0, 258.0, 268.0, 254.0, 281.0, 292.0, 285.0, 288.0, 248.0, 277.0, 299.0, 280.0, 254.0, 271.0, 261.0, 272.0, 282.0, 299.0, 293.0, 277.0, 286.0, 287.0, 286.0, 293.0, 287.0, 283.0, 280.0, 296.0, 291.0, 288.0, 300.0, 273.0, 290.0, 283.0, 256.0, 263.0, 268.0, 254.0, 280.0, 296.0, 298.0, 272.0, 294.0, 285.0, 301.0, 281.0, 298.0, 281.0, 284.0, 289.0, 268.0, 254.0, 297.0, 276.0, 266.0, 295.0, 256.0, 269.0, 278.0, 295.0, 277.0, 248.0, 289.0, 290.0, 296.0, 283.0, 289.0, 287.0, 286.0, 287.0, 280.0, 293.0, 318.0, 309.0, 291.0, 279.0, 272.0, 309.0, 276.0, 237.0, 256.0, 271.0, 286.0, 296.0, 254.0, 279.0, 273.0, 300.0, 289.0, 284.0, 287.0, 286.0, 292.0, 290.0, 283.0, 293.0, 292.0, 287.0, 285.0, 294.0, 300.0, 282.0, 289.0, 287.0, 288.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7105164723411486, "mean_inference_ms": 1.2778515473445784, "mean_action_processing_ms": 0.13539721529474044, "mean_env_wait_ms": 0.8559981042684861, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 510.0, "episode_reward_mean": 559.83, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 318.0}, "policy_reward_mean": {"ppo": 279.915}, "hist_stats": {"episode_reward": [573.0, 510.0, 576.0, 516.0, 579.0, 582.0, 576.0, 576.0, 527.0, 533.0, 573.0, 530.0, 516.0, 573.0, 582.0, 530.0, 576.0, 582.0, 522.0, 579.0, 579.0, 576.0, 525.0, 582.0, 573.0, 570.0, 579.0, 530.0, 525.0, 582.0, 576.0, 579.0, 530.0, 522.0, 522.0, 516.0, 525.0, 570.0, 573.0, 582.0, 576.0, 570.0, 521.0, 525.0, 576.0, 582.0, 516.0, 519.0, 522.0, 573.0, 573.0, 525.0, 579.0, 525.0, 533.0, 581.0, 570.0, 573.0, 579.0, 570.0, 576.0, 579.0, 573.0, 573.0, 519.0, 522.0, 576.0, 570.0, 579.0, 582.0, 579.0, 573.0, 522.0, 573.0, 561.0, 525.0, 573.0, 525.0, 579.0, 579.0, 576.0, 573.0, 573.0, 627.0, 570.0, 581.0, 513.0, 527.0, 582.0, 533.0, 573.0, 573.0, 573.0, 582.0, 576.0, 579.0, 579.0, 582.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 280.0, 257.0, 253.0, 304.0, 272.0, 266.0, 250.0, 282.0, 297.0, 302.0, 280.0, 289.0, 287.0, 287.0, 289.0, 268.0, 259.0, 275.0, 258.0, 294.0, 279.0, 278.0, 252.0, 250.0, 266.0, 295.0, 278.0, 288.0, 294.0, 269.0, 261.0, 282.0, 294.0, 283.0, 299.0, 258.0, 264.0, 286.0, 293.0, 291.0, 288.0, 271.0, 305.0, 256.0, 269.0, 299.0, 283.0, 288.0, 285.0, 299.0, 271.0, 288.0, 291.0, 265.0, 265.0, 264.0, 261.0, 304.0, 278.0, 281.0, 295.0, 290.0, 289.0, 269.0, 261.0, 256.0, 266.0, 250.0, 272.0, 258.0, 258.0, 267.0, 258.0, 280.0, 290.0, 294.0, 279.0, 284.0, 298.0, 300.0, 276.0, 269.0, 301.0, 262.0, 259.0, 245.0, 280.0, 284.0, 292.0, 303.0, 279.0, 257.0, 259.0, 261.0, 258.0, 268.0, 254.0, 281.0, 292.0, 285.0, 288.0, 248.0, 277.0, 299.0, 280.0, 254.0, 271.0, 261.0, 272.0, 282.0, 299.0, 293.0, 277.0, 286.0, 287.0, 286.0, 293.0, 287.0, 283.0, 280.0, 296.0, 291.0, 288.0, 300.0, 273.0, 290.0, 283.0, 256.0, 263.0, 268.0, 254.0, 280.0, 296.0, 298.0, 272.0, 294.0, 285.0, 301.0, 281.0, 298.0, 281.0, 284.0, 289.0, 268.0, 254.0, 297.0, 276.0, 266.0, 295.0, 256.0, 269.0, 278.0, 295.0, 277.0, 248.0, 289.0, 290.0, 296.0, 283.0, 289.0, 287.0, 286.0, 287.0, 280.0, 293.0, 318.0, 309.0, 291.0, 279.0, 272.0, 309.0, 276.0, 237.0, 256.0, 271.0, 286.0, 296.0, 254.0, 279.0, 273.0, 300.0, 289.0, 284.0, 287.0, 286.0, 292.0, 290.0, 283.0, 293.0, 292.0, 287.0, 285.0, 294.0, 300.0, 282.0, 289.0, 287.0, 288.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7105164723411486, "mean_inference_ms": 1.2778515473445784, "mean_action_processing_ms": 0.13539721529474044, "mean_env_wait_ms": 0.8559981042684861, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5708800, "num_agent_steps_trained": 5708800, "num_env_steps_sampled": 2854400, "num_env_steps_trained": 2854400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2854400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5708800, "timers": {"training_iteration_time_ms": 3663.188, "learn_time_ms": 1138.019, "learn_throughput": 11247.616, "synch_weights_time_ms": 11.241}, "counters": {"num_env_steps_sampled": 2854400, "num_env_steps_trained": 2854400, "num_agent_steps_sampled": 5708800, "num_agent_steps_trained": 5708800}, "done": false, "episodes_total": 7136, "training_iteration": 223, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-59", "timestamp": 1666581299, "time_this_iter_s": 3.808103084564209, "time_total_s": 868.7243490219116, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 868.7243490219116, "timesteps_since_restore": 0, "iterations_since_restore": 223, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.75, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 194.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 172.71, "shaped_reward_min": 153, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.97, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.19, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.66, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.94, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.5, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.68, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.84, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.96, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.48, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.4, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.5, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.68, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.5, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.68, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00046854279935359955, "policy_loss": 0.00016595772467553616, "vf_loss": 7.699159622192383, "vf_explained_var": 0.617074728012085, "kl": 0.0019009055104106665, "entropy": 0.9346585273742676, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2867200, "num_env_steps_trained": 2867200, "num_agent_steps_sampled": 5734400, "num_agent_steps_trained": 5734400}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 513.0, "episode_reward_mean": 561.91, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 318.0}, "policy_reward_mean": {"ppo": 280.955}, "custom_metrics": {"sparse_reward_mean": 194.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 172.71, "shaped_reward_min": 153, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.97, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.19, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.66, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.94, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.5, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.68, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.84, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.96, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.48, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.4, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.5, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.68, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.5, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.68, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 522.0, 522.0, 516.0, 525.0, 570.0, 573.0, 582.0, 576.0, 570.0, 521.0, 525.0, 576.0, 582.0, 516.0, 519.0, 522.0, 573.0, 573.0, 525.0, 579.0, 525.0, 533.0, 581.0, 570.0, 573.0, 579.0, 570.0, 576.0, 579.0, 573.0, 573.0, 519.0, 522.0, 576.0, 570.0, 579.0, 582.0, 579.0, 573.0, 522.0, 573.0, 561.0, 525.0, 573.0, 525.0, 579.0, 579.0, 576.0, 573.0, 573.0, 627.0, 570.0, 581.0, 513.0, 527.0, 582.0, 533.0, 573.0, 573.0, 573.0, 582.0, 576.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 530.0, 579.0, 527.0, 579.0, 579.0, 573.0, 570.0, 582.0, 582.0, 576.0, 573.0, 584.0, 576.0, 576.0, 573.0, 519.0, 522.0, 573.0, 579.0, 570.0, 573.0, 573.0, 519.0, 576.0, 582.0, 582.0, 573.0, 522.0, 539.0, 573.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 261.0, 256.0, 266.0, 250.0, 272.0, 258.0, 258.0, 267.0, 258.0, 280.0, 290.0, 294.0, 279.0, 284.0, 298.0, 300.0, 276.0, 269.0, 301.0, 262.0, 259.0, 245.0, 280.0, 284.0, 292.0, 303.0, 279.0, 257.0, 259.0, 261.0, 258.0, 268.0, 254.0, 281.0, 292.0, 285.0, 288.0, 248.0, 277.0, 299.0, 280.0, 254.0, 271.0, 261.0, 272.0, 282.0, 299.0, 293.0, 277.0, 286.0, 287.0, 286.0, 293.0, 287.0, 283.0, 280.0, 296.0, 291.0, 288.0, 300.0, 273.0, 290.0, 283.0, 256.0, 263.0, 268.0, 254.0, 280.0, 296.0, 298.0, 272.0, 294.0, 285.0, 301.0, 281.0, 298.0, 281.0, 284.0, 289.0, 268.0, 254.0, 297.0, 276.0, 266.0, 295.0, 256.0, 269.0, 278.0, 295.0, 277.0, 248.0, 289.0, 290.0, 296.0, 283.0, 289.0, 287.0, 286.0, 287.0, 280.0, 293.0, 318.0, 309.0, 291.0, 279.0, 272.0, 309.0, 276.0, 237.0, 256.0, 271.0, 286.0, 296.0, 254.0, 279.0, 273.0, 300.0, 289.0, 284.0, 287.0, 286.0, 292.0, 290.0, 283.0, 293.0, 292.0, 287.0, 285.0, 294.0, 300.0, 282.0, 289.0, 287.0, 288.0, 294.0, 284.0, 295.0, 262.0, 268.0, 294.0, 285.0, 269.0, 258.0, 301.0, 278.0, 288.0, 291.0, 301.0, 272.0, 280.0, 290.0, 304.0, 278.0, 286.0, 296.0, 282.0, 294.0, 290.0, 283.0, 288.0, 296.0, 298.0, 278.0, 289.0, 287.0, 282.0, 291.0, 258.0, 261.0, 260.0, 262.0, 290.0, 283.0, 287.0, 292.0, 281.0, 289.0, 281.0, 292.0, 286.0, 287.0, 272.0, 247.0, 288.0, 288.0, 298.0, 284.0, 283.0, 299.0, 295.0, 278.0, 272.0, 250.0, 262.0, 277.0, 284.0, 289.0, 300.0, 282.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7103830562160084, "mean_inference_ms": 1.2776410987550098, "mean_action_processing_ms": 0.13537899989677257, "mean_env_wait_ms": 0.8559871633031326, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 513.0, "episode_reward_mean": 561.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 318.0}, "policy_reward_mean": {"ppo": 280.955}, "hist_stats": {"episode_reward": [530.0, 522.0, 522.0, 516.0, 525.0, 570.0, 573.0, 582.0, 576.0, 570.0, 521.0, 525.0, 576.0, 582.0, 516.0, 519.0, 522.0, 573.0, 573.0, 525.0, 579.0, 525.0, 533.0, 581.0, 570.0, 573.0, 579.0, 570.0, 576.0, 579.0, 573.0, 573.0, 519.0, 522.0, 576.0, 570.0, 579.0, 582.0, 579.0, 573.0, 522.0, 573.0, 561.0, 525.0, 573.0, 525.0, 579.0, 579.0, 576.0, 573.0, 573.0, 627.0, 570.0, 581.0, 513.0, 527.0, 582.0, 533.0, 573.0, 573.0, 573.0, 582.0, 576.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 530.0, 579.0, 527.0, 579.0, 579.0, 573.0, 570.0, 582.0, 582.0, 576.0, 573.0, 584.0, 576.0, 576.0, 573.0, 519.0, 522.0, 573.0, 579.0, 570.0, 573.0, 573.0, 519.0, 576.0, 582.0, 582.0, 573.0, 522.0, 539.0, 573.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 261.0, 256.0, 266.0, 250.0, 272.0, 258.0, 258.0, 267.0, 258.0, 280.0, 290.0, 294.0, 279.0, 284.0, 298.0, 300.0, 276.0, 269.0, 301.0, 262.0, 259.0, 245.0, 280.0, 284.0, 292.0, 303.0, 279.0, 257.0, 259.0, 261.0, 258.0, 268.0, 254.0, 281.0, 292.0, 285.0, 288.0, 248.0, 277.0, 299.0, 280.0, 254.0, 271.0, 261.0, 272.0, 282.0, 299.0, 293.0, 277.0, 286.0, 287.0, 286.0, 293.0, 287.0, 283.0, 280.0, 296.0, 291.0, 288.0, 300.0, 273.0, 290.0, 283.0, 256.0, 263.0, 268.0, 254.0, 280.0, 296.0, 298.0, 272.0, 294.0, 285.0, 301.0, 281.0, 298.0, 281.0, 284.0, 289.0, 268.0, 254.0, 297.0, 276.0, 266.0, 295.0, 256.0, 269.0, 278.0, 295.0, 277.0, 248.0, 289.0, 290.0, 296.0, 283.0, 289.0, 287.0, 286.0, 287.0, 280.0, 293.0, 318.0, 309.0, 291.0, 279.0, 272.0, 309.0, 276.0, 237.0, 256.0, 271.0, 286.0, 296.0, 254.0, 279.0, 273.0, 300.0, 289.0, 284.0, 287.0, 286.0, 292.0, 290.0, 283.0, 293.0, 292.0, 287.0, 285.0, 294.0, 300.0, 282.0, 289.0, 287.0, 288.0, 294.0, 284.0, 295.0, 262.0, 268.0, 294.0, 285.0, 269.0, 258.0, 301.0, 278.0, 288.0, 291.0, 301.0, 272.0, 280.0, 290.0, 304.0, 278.0, 286.0, 296.0, 282.0, 294.0, 290.0, 283.0, 288.0, 296.0, 298.0, 278.0, 289.0, 287.0, 282.0, 291.0, 258.0, 261.0, 260.0, 262.0, 290.0, 283.0, 287.0, 292.0, 281.0, 289.0, 281.0, 292.0, 286.0, 287.0, 272.0, 247.0, 288.0, 288.0, 298.0, 284.0, 283.0, 299.0, 295.0, 278.0, 272.0, 250.0, 262.0, 277.0, 284.0, 289.0, 300.0, 282.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7103830562160084, "mean_inference_ms": 1.2776410987550098, "mean_action_processing_ms": 0.13537899989677257, "mean_env_wait_ms": 0.8559871633031326, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5734400, "num_agent_steps_trained": 5734400, "num_env_steps_sampled": 2867200, "num_env_steps_trained": 2867200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2867200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5734400, "timers": {"training_iteration_time_ms": 3681.002, "learn_time_ms": 1151.865, "learn_throughput": 11112.412, "synch_weights_time_ms": 11.456}, "counters": {"num_env_steps_sampled": 2867200, "num_env_steps_trained": 2867200, "num_agent_steps_sampled": 5734400, "num_agent_steps_trained": 5734400}, "done": false, "episodes_total": 7168, "training_iteration": 224, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-03", "timestamp": 1666581303, "time_this_iter_s": 3.812587261199951, "time_total_s": 872.5369362831116, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 872.5369362831116, "timesteps_since_restore": 0, "iterations_since_restore": 224, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.4, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 196.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 175.0, "shaped_reward_min": 153, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.42, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.98, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.14, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.77, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.0, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.55, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.72, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.35, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.33, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.71, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.23, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.0, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.55, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.0, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.55, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0004360268940217793, "policy_loss": -0.0007370202802121639, "vf_loss": 7.678671360015869, "vf_explained_var": 0.6376967430114746, "kl": 0.00243803090415895, "entropy": 0.9337455630302429, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2880000, "num_env_steps_trained": 2880000, "num_agent_steps_sampled": 5760000, "num_agent_steps_trained": 5760000}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 513.0, "episode_reward_mean": 568.2, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 318.0}, "policy_reward_mean": {"ppo": 284.1}, "custom_metrics": {"sparse_reward_mean": 196.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 175.0, "shaped_reward_min": 153, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.42, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.98, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.14, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.77, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.0, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.55, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.72, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.35, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.33, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.71, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.23, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.0, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.55, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.0, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.55, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 522.0, 576.0, 570.0, 579.0, 582.0, 579.0, 573.0, 522.0, 573.0, 561.0, 525.0, 573.0, 525.0, 579.0, 579.0, 576.0, 573.0, 573.0, 627.0, 570.0, 581.0, 513.0, 527.0, 582.0, 533.0, 573.0, 573.0, 573.0, 582.0, 576.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 530.0, 579.0, 527.0, 579.0, 579.0, 573.0, 570.0, 582.0, 582.0, 576.0, 573.0, 584.0, 576.0, 576.0, 573.0, 519.0, 522.0, 573.0, 579.0, 570.0, 573.0, 573.0, 519.0, 576.0, 582.0, 582.0, 573.0, 522.0, 539.0, 573.0, 582.0, 576.0, 576.0, 579.0, 530.0, 579.0, 582.0, 570.0, 573.0, 576.0, 576.0, 576.0, 582.0, 525.0, 582.0, 584.0, 570.0, 579.0, 627.0, 573.0, 579.0, 582.0, 587.0, 530.0, 525.0, 584.0, 579.0, 582.0, 579.0, 576.0, 582.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [256.0, 263.0, 268.0, 254.0, 280.0, 296.0, 298.0, 272.0, 294.0, 285.0, 301.0, 281.0, 298.0, 281.0, 284.0, 289.0, 268.0, 254.0, 297.0, 276.0, 266.0, 295.0, 256.0, 269.0, 278.0, 295.0, 277.0, 248.0, 289.0, 290.0, 296.0, 283.0, 289.0, 287.0, 286.0, 287.0, 280.0, 293.0, 318.0, 309.0, 291.0, 279.0, 272.0, 309.0, 276.0, 237.0, 256.0, 271.0, 286.0, 296.0, 254.0, 279.0, 273.0, 300.0, 289.0, 284.0, 287.0, 286.0, 292.0, 290.0, 283.0, 293.0, 292.0, 287.0, 285.0, 294.0, 300.0, 282.0, 289.0, 287.0, 288.0, 294.0, 284.0, 295.0, 262.0, 268.0, 294.0, 285.0, 269.0, 258.0, 301.0, 278.0, 288.0, 291.0, 301.0, 272.0, 280.0, 290.0, 304.0, 278.0, 286.0, 296.0, 282.0, 294.0, 290.0, 283.0, 288.0, 296.0, 298.0, 278.0, 289.0, 287.0, 282.0, 291.0, 258.0, 261.0, 260.0, 262.0, 290.0, 283.0, 287.0, 292.0, 281.0, 289.0, 281.0, 292.0, 286.0, 287.0, 272.0, 247.0, 288.0, 288.0, 298.0, 284.0, 283.0, 299.0, 295.0, 278.0, 272.0, 250.0, 262.0, 277.0, 284.0, 289.0, 300.0, 282.0, 286.0, 290.0, 288.0, 288.0, 290.0, 289.0, 252.0, 278.0, 296.0, 283.0, 279.0, 303.0, 283.0, 287.0, 283.0, 290.0, 278.0, 298.0, 285.0, 291.0, 278.0, 298.0, 294.0, 288.0, 262.0, 263.0, 299.0, 283.0, 293.0, 291.0, 276.0, 294.0, 290.0, 289.0, 316.0, 311.0, 278.0, 295.0, 279.0, 300.0, 282.0, 300.0, 296.0, 291.0, 280.0, 250.0, 255.0, 270.0, 282.0, 302.0, 280.0, 299.0, 288.0, 294.0, 295.0, 284.0, 286.0, 290.0, 299.0, 283.0, 286.0, 290.0, 299.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7102671171729327, "mean_inference_ms": 1.2772917092306577, "mean_action_processing_ms": 0.13536466547217546, "mean_env_wait_ms": 0.8558784022983323, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 513.0, "episode_reward_mean": 568.2, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 318.0}, "policy_reward_mean": {"ppo": 284.1}, "hist_stats": {"episode_reward": [519.0, 522.0, 576.0, 570.0, 579.0, 582.0, 579.0, 573.0, 522.0, 573.0, 561.0, 525.0, 573.0, 525.0, 579.0, 579.0, 576.0, 573.0, 573.0, 627.0, 570.0, 581.0, 513.0, 527.0, 582.0, 533.0, 573.0, 573.0, 573.0, 582.0, 576.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 530.0, 579.0, 527.0, 579.0, 579.0, 573.0, 570.0, 582.0, 582.0, 576.0, 573.0, 584.0, 576.0, 576.0, 573.0, 519.0, 522.0, 573.0, 579.0, 570.0, 573.0, 573.0, 519.0, 576.0, 582.0, 582.0, 573.0, 522.0, 539.0, 573.0, 582.0, 576.0, 576.0, 579.0, 530.0, 579.0, 582.0, 570.0, 573.0, 576.0, 576.0, 576.0, 582.0, 525.0, 582.0, 584.0, 570.0, 579.0, 627.0, 573.0, 579.0, 582.0, 587.0, 530.0, 525.0, 584.0, 579.0, 582.0, 579.0, 576.0, 582.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [256.0, 263.0, 268.0, 254.0, 280.0, 296.0, 298.0, 272.0, 294.0, 285.0, 301.0, 281.0, 298.0, 281.0, 284.0, 289.0, 268.0, 254.0, 297.0, 276.0, 266.0, 295.0, 256.0, 269.0, 278.0, 295.0, 277.0, 248.0, 289.0, 290.0, 296.0, 283.0, 289.0, 287.0, 286.0, 287.0, 280.0, 293.0, 318.0, 309.0, 291.0, 279.0, 272.0, 309.0, 276.0, 237.0, 256.0, 271.0, 286.0, 296.0, 254.0, 279.0, 273.0, 300.0, 289.0, 284.0, 287.0, 286.0, 292.0, 290.0, 283.0, 293.0, 292.0, 287.0, 285.0, 294.0, 300.0, 282.0, 289.0, 287.0, 288.0, 294.0, 284.0, 295.0, 262.0, 268.0, 294.0, 285.0, 269.0, 258.0, 301.0, 278.0, 288.0, 291.0, 301.0, 272.0, 280.0, 290.0, 304.0, 278.0, 286.0, 296.0, 282.0, 294.0, 290.0, 283.0, 288.0, 296.0, 298.0, 278.0, 289.0, 287.0, 282.0, 291.0, 258.0, 261.0, 260.0, 262.0, 290.0, 283.0, 287.0, 292.0, 281.0, 289.0, 281.0, 292.0, 286.0, 287.0, 272.0, 247.0, 288.0, 288.0, 298.0, 284.0, 283.0, 299.0, 295.0, 278.0, 272.0, 250.0, 262.0, 277.0, 284.0, 289.0, 300.0, 282.0, 286.0, 290.0, 288.0, 288.0, 290.0, 289.0, 252.0, 278.0, 296.0, 283.0, 279.0, 303.0, 283.0, 287.0, 283.0, 290.0, 278.0, 298.0, 285.0, 291.0, 278.0, 298.0, 294.0, 288.0, 262.0, 263.0, 299.0, 283.0, 293.0, 291.0, 276.0, 294.0, 290.0, 289.0, 316.0, 311.0, 278.0, 295.0, 279.0, 300.0, 282.0, 300.0, 296.0, 291.0, 280.0, 250.0, 255.0, 270.0, 282.0, 302.0, 280.0, 299.0, 288.0, 294.0, 295.0, 284.0, 286.0, 290.0, 299.0, 283.0, 286.0, 290.0, 299.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7102671171729327, "mean_inference_ms": 1.2772917092306577, "mean_action_processing_ms": 0.13536466547217546, "mean_env_wait_ms": 0.8558784022983323, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5760000, "num_agent_steps_trained": 5760000, "num_env_steps_sampled": 2880000, "num_env_steps_trained": 2880000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2880000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5760000, "timers": {"training_iteration_time_ms": 3679.165, "learn_time_ms": 1150.927, "learn_throughput": 11121.47, "synch_weights_time_ms": 11.853}, "counters": {"num_env_steps_sampled": 2880000, "num_env_steps_trained": 2880000, "num_agent_steps_sampled": 5760000, "num_agent_steps_trained": 5760000}, "done": false, "episodes_total": 7200, "training_iteration": 225, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-07", "timestamp": 1666581307, "time_this_iter_s": 3.679936408996582, "time_total_s": 876.2168726921082, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 876.2168726921082, "timesteps_since_restore": 0, "iterations_since_restore": 225, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.200000000000003, "ram_util_percent": 10.616666666666665}}
+{"custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 174.63, "shaped_reward_min": 60, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.48, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.81, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.29, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.66, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.05, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.46, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.6, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.05, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.46, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.05, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.46, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002445029327645898, "policy_loss": 0.002136027906090021, "vf_loss": 7.732485771179199, "vf_explained_var": 0.622205913066864, "kl": 0.002588339149951935, "entropy": 0.9284940958023071, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2892800, "num_env_steps_trained": 2892800, "num_agent_steps_sampled": 5785600, "num_agent_steps_trained": 5785600}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 180.0, "episode_reward_mean": 565.43, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 282.715}, "custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 174.63, "shaped_reward_min": 60, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.48, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.81, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.29, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.66, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.05, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.46, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.6, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.05, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.46, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.05, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.46, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 576.0, 582.0, 579.0, 530.0, 579.0, 527.0, 579.0, 579.0, 573.0, 570.0, 582.0, 582.0, 576.0, 573.0, 584.0, 576.0, 576.0, 573.0, 519.0, 522.0, 573.0, 579.0, 570.0, 573.0, 573.0, 519.0, 576.0, 582.0, 582.0, 573.0, 522.0, 539.0, 573.0, 582.0, 576.0, 576.0, 579.0, 530.0, 579.0, 582.0, 570.0, 573.0, 576.0, 576.0, 576.0, 582.0, 525.0, 582.0, 584.0, 570.0, 579.0, 627.0, 573.0, 579.0, 582.0, 587.0, 530.0, 525.0, 584.0, 579.0, 582.0, 579.0, 576.0, 582.0, 576.0, 582.0, 582.0, 582.0, 525.0, 579.0, 576.0, 576.0, 582.0, 584.0, 579.0, 582.0, 579.0, 570.0, 573.0, 482.0, 573.0, 576.0, 582.0, 576.0, 527.0, 570.0, 573.0, 180.0, 524.0, 582.0, 567.0, 522.0, 573.0, 579.0, 576.0, 579.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 294.0, 300.0, 282.0, 289.0, 287.0, 288.0, 294.0, 284.0, 295.0, 262.0, 268.0, 294.0, 285.0, 269.0, 258.0, 301.0, 278.0, 288.0, 291.0, 301.0, 272.0, 280.0, 290.0, 304.0, 278.0, 286.0, 296.0, 282.0, 294.0, 290.0, 283.0, 288.0, 296.0, 298.0, 278.0, 289.0, 287.0, 282.0, 291.0, 258.0, 261.0, 260.0, 262.0, 290.0, 283.0, 287.0, 292.0, 281.0, 289.0, 281.0, 292.0, 286.0, 287.0, 272.0, 247.0, 288.0, 288.0, 298.0, 284.0, 283.0, 299.0, 295.0, 278.0, 272.0, 250.0, 262.0, 277.0, 284.0, 289.0, 300.0, 282.0, 286.0, 290.0, 288.0, 288.0, 290.0, 289.0, 252.0, 278.0, 296.0, 283.0, 279.0, 303.0, 283.0, 287.0, 283.0, 290.0, 278.0, 298.0, 285.0, 291.0, 278.0, 298.0, 294.0, 288.0, 262.0, 263.0, 299.0, 283.0, 293.0, 291.0, 276.0, 294.0, 290.0, 289.0, 316.0, 311.0, 278.0, 295.0, 279.0, 300.0, 282.0, 300.0, 296.0, 291.0, 280.0, 250.0, 255.0, 270.0, 282.0, 302.0, 280.0, 299.0, 288.0, 294.0, 295.0, 284.0, 286.0, 290.0, 299.0, 283.0, 286.0, 290.0, 299.0, 283.0, 278.0, 304.0, 284.0, 298.0, 262.0, 263.0, 282.0, 297.0, 284.0, 292.0, 287.0, 289.0, 289.0, 293.0, 296.0, 288.0, 279.0, 300.0, 292.0, 290.0, 294.0, 285.0, 284.0, 286.0, 288.0, 285.0, 247.0, 235.0, 294.0, 279.0, 289.0, 287.0, 297.0, 285.0, 279.0, 297.0, 262.0, 265.0, 274.0, 296.0, 283.0, 290.0, 89.0, 91.0, 266.0, 258.0, 278.0, 304.0, 288.0, 279.0, 255.0, 267.0, 285.0, 288.0, 289.0, 290.0, 299.0, 277.0, 291.0, 288.0, 298.0, 284.0, 285.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7101525919658326, "mean_inference_ms": 1.2769321856060183, "mean_action_processing_ms": 0.1353521633293626, "mean_env_wait_ms": 0.8556845453719546, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 180.0, "episode_reward_mean": 565.43, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 282.715}, "hist_stats": {"episode_reward": [579.0, 582.0, 576.0, 582.0, 579.0, 530.0, 579.0, 527.0, 579.0, 579.0, 573.0, 570.0, 582.0, 582.0, 576.0, 573.0, 584.0, 576.0, 576.0, 573.0, 519.0, 522.0, 573.0, 579.0, 570.0, 573.0, 573.0, 519.0, 576.0, 582.0, 582.0, 573.0, 522.0, 539.0, 573.0, 582.0, 576.0, 576.0, 579.0, 530.0, 579.0, 582.0, 570.0, 573.0, 576.0, 576.0, 576.0, 582.0, 525.0, 582.0, 584.0, 570.0, 579.0, 627.0, 573.0, 579.0, 582.0, 587.0, 530.0, 525.0, 584.0, 579.0, 582.0, 579.0, 576.0, 582.0, 576.0, 582.0, 582.0, 582.0, 525.0, 579.0, 576.0, 576.0, 582.0, 584.0, 579.0, 582.0, 579.0, 570.0, 573.0, 482.0, 573.0, 576.0, 582.0, 576.0, 527.0, 570.0, 573.0, 180.0, 524.0, 582.0, 567.0, 522.0, 573.0, 579.0, 576.0, 579.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 294.0, 300.0, 282.0, 289.0, 287.0, 288.0, 294.0, 284.0, 295.0, 262.0, 268.0, 294.0, 285.0, 269.0, 258.0, 301.0, 278.0, 288.0, 291.0, 301.0, 272.0, 280.0, 290.0, 304.0, 278.0, 286.0, 296.0, 282.0, 294.0, 290.0, 283.0, 288.0, 296.0, 298.0, 278.0, 289.0, 287.0, 282.0, 291.0, 258.0, 261.0, 260.0, 262.0, 290.0, 283.0, 287.0, 292.0, 281.0, 289.0, 281.0, 292.0, 286.0, 287.0, 272.0, 247.0, 288.0, 288.0, 298.0, 284.0, 283.0, 299.0, 295.0, 278.0, 272.0, 250.0, 262.0, 277.0, 284.0, 289.0, 300.0, 282.0, 286.0, 290.0, 288.0, 288.0, 290.0, 289.0, 252.0, 278.0, 296.0, 283.0, 279.0, 303.0, 283.0, 287.0, 283.0, 290.0, 278.0, 298.0, 285.0, 291.0, 278.0, 298.0, 294.0, 288.0, 262.0, 263.0, 299.0, 283.0, 293.0, 291.0, 276.0, 294.0, 290.0, 289.0, 316.0, 311.0, 278.0, 295.0, 279.0, 300.0, 282.0, 300.0, 296.0, 291.0, 280.0, 250.0, 255.0, 270.0, 282.0, 302.0, 280.0, 299.0, 288.0, 294.0, 295.0, 284.0, 286.0, 290.0, 299.0, 283.0, 286.0, 290.0, 299.0, 283.0, 278.0, 304.0, 284.0, 298.0, 262.0, 263.0, 282.0, 297.0, 284.0, 292.0, 287.0, 289.0, 289.0, 293.0, 296.0, 288.0, 279.0, 300.0, 292.0, 290.0, 294.0, 285.0, 284.0, 286.0, 288.0, 285.0, 247.0, 235.0, 294.0, 279.0, 289.0, 287.0, 297.0, 285.0, 279.0, 297.0, 262.0, 265.0, 274.0, 296.0, 283.0, 290.0, 89.0, 91.0, 266.0, 258.0, 278.0, 304.0, 288.0, 279.0, 255.0, 267.0, 285.0, 288.0, 289.0, 290.0, 299.0, 277.0, 291.0, 288.0, 298.0, 284.0, 285.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7101525919658326, "mean_inference_ms": 1.2769321856060183, "mean_action_processing_ms": 0.1353521633293626, "mean_env_wait_ms": 0.8556845453719546, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5785600, "num_agent_steps_trained": 5785600, "num_env_steps_sampled": 2892800, "num_env_steps_trained": 2892800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2892800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5785600, "timers": {"training_iteration_time_ms": 3665.77, "learn_time_ms": 1139.941, "learn_throughput": 11228.652, "synch_weights_time_ms": 11.812}, "counters": {"num_env_steps_sampled": 2892800, "num_env_steps_trained": 2892800, "num_agent_steps_sampled": 5785600, "num_agent_steps_trained": 5785600}, "done": false, "episodes_total": 7232, "training_iteration": 226, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-11", "timestamp": 1666581311, "time_this_iter_s": 3.691885471343994, "time_total_s": 879.9087581634521, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 879.9087581634521, "timesteps_since_restore": 0, "iterations_since_restore": 226, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.580000000000002, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 194.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.61, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.48, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.8, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.56, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.04, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.33, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.09, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.03, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.92, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.04, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.33, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.04, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.33, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002218155190348625, "policy_loss": -0.0025398321449756622, "vf_loss": 7.840729713439941, "vf_explained_var": 0.6095919609069824, "kl": 0.002450748812407255, "entropy": 0.9247908592224121, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2905600, "num_env_steps_trained": 2905600, "num_agent_steps_sampled": 5811200, "num_agent_steps_trained": 5811200}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 180.0, "episode_reward_mean": 561.61, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 280.805}, "custom_metrics": {"sparse_reward_mean": 194.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.61, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.48, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.8, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.56, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.04, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.33, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.09, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.03, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.92, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.04, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.33, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.04, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.33, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 539.0, 573.0, 582.0, 576.0, 576.0, 579.0, 530.0, 579.0, 582.0, 570.0, 573.0, 576.0, 576.0, 576.0, 582.0, 525.0, 582.0, 584.0, 570.0, 579.0, 627.0, 573.0, 579.0, 582.0, 587.0, 530.0, 525.0, 584.0, 579.0, 582.0, 579.0, 576.0, 582.0, 576.0, 582.0, 582.0, 582.0, 525.0, 579.0, 576.0, 576.0, 582.0, 584.0, 579.0, 582.0, 579.0, 570.0, 573.0, 482.0, 573.0, 576.0, 582.0, 576.0, 527.0, 570.0, 573.0, 180.0, 524.0, 582.0, 567.0, 522.0, 573.0, 579.0, 576.0, 579.0, 582.0, 579.0, 582.0, 579.0, 522.0, 582.0, 573.0, 561.0, 522.0, 525.0, 573.0, 525.0, 519.0, 573.0, 573.0, 584.0, 573.0, 576.0, 582.0, 530.0, 465.0, 570.0, 579.0, 579.0, 590.0, 582.0, 573.0, 530.0, 582.0, 519.0, 513.0, 576.0, 579.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [272.0, 250.0, 262.0, 277.0, 284.0, 289.0, 300.0, 282.0, 286.0, 290.0, 288.0, 288.0, 290.0, 289.0, 252.0, 278.0, 296.0, 283.0, 279.0, 303.0, 283.0, 287.0, 283.0, 290.0, 278.0, 298.0, 285.0, 291.0, 278.0, 298.0, 294.0, 288.0, 262.0, 263.0, 299.0, 283.0, 293.0, 291.0, 276.0, 294.0, 290.0, 289.0, 316.0, 311.0, 278.0, 295.0, 279.0, 300.0, 282.0, 300.0, 296.0, 291.0, 280.0, 250.0, 255.0, 270.0, 282.0, 302.0, 280.0, 299.0, 288.0, 294.0, 295.0, 284.0, 286.0, 290.0, 299.0, 283.0, 286.0, 290.0, 299.0, 283.0, 278.0, 304.0, 284.0, 298.0, 262.0, 263.0, 282.0, 297.0, 284.0, 292.0, 287.0, 289.0, 289.0, 293.0, 296.0, 288.0, 279.0, 300.0, 292.0, 290.0, 294.0, 285.0, 284.0, 286.0, 288.0, 285.0, 247.0, 235.0, 294.0, 279.0, 289.0, 287.0, 297.0, 285.0, 279.0, 297.0, 262.0, 265.0, 274.0, 296.0, 283.0, 290.0, 89.0, 91.0, 266.0, 258.0, 278.0, 304.0, 288.0, 279.0, 255.0, 267.0, 285.0, 288.0, 289.0, 290.0, 299.0, 277.0, 291.0, 288.0, 298.0, 284.0, 285.0, 294.0, 281.0, 301.0, 288.0, 291.0, 269.0, 253.0, 295.0, 287.0, 285.0, 288.0, 283.0, 278.0, 249.0, 273.0, 262.0, 263.0, 292.0, 281.0, 264.0, 261.0, 262.0, 257.0, 295.0, 278.0, 294.0, 279.0, 287.0, 297.0, 270.0, 303.0, 277.0, 299.0, 292.0, 290.0, 278.0, 252.0, 218.0, 247.0, 280.0, 290.0, 280.0, 299.0, 289.0, 290.0, 299.0, 291.0, 287.0, 295.0, 279.0, 294.0, 248.0, 282.0, 289.0, 293.0, 260.0, 259.0, 262.0, 251.0, 302.0, 274.0, 290.0, 289.0, 250.0, 275.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7100616240854177, "mean_inference_ms": 1.27656070500398, "mean_action_processing_ms": 0.13533752460087545, "mean_env_wait_ms": 0.8554818419261924, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 180.0, "episode_reward_mean": 561.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 280.805}, "hist_stats": {"episode_reward": [522.0, 539.0, 573.0, 582.0, 576.0, 576.0, 579.0, 530.0, 579.0, 582.0, 570.0, 573.0, 576.0, 576.0, 576.0, 582.0, 525.0, 582.0, 584.0, 570.0, 579.0, 627.0, 573.0, 579.0, 582.0, 587.0, 530.0, 525.0, 584.0, 579.0, 582.0, 579.0, 576.0, 582.0, 576.0, 582.0, 582.0, 582.0, 525.0, 579.0, 576.0, 576.0, 582.0, 584.0, 579.0, 582.0, 579.0, 570.0, 573.0, 482.0, 573.0, 576.0, 582.0, 576.0, 527.0, 570.0, 573.0, 180.0, 524.0, 582.0, 567.0, 522.0, 573.0, 579.0, 576.0, 579.0, 582.0, 579.0, 582.0, 579.0, 522.0, 582.0, 573.0, 561.0, 522.0, 525.0, 573.0, 525.0, 519.0, 573.0, 573.0, 584.0, 573.0, 576.0, 582.0, 530.0, 465.0, 570.0, 579.0, 579.0, 590.0, 582.0, 573.0, 530.0, 582.0, 519.0, 513.0, 576.0, 579.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [272.0, 250.0, 262.0, 277.0, 284.0, 289.0, 300.0, 282.0, 286.0, 290.0, 288.0, 288.0, 290.0, 289.0, 252.0, 278.0, 296.0, 283.0, 279.0, 303.0, 283.0, 287.0, 283.0, 290.0, 278.0, 298.0, 285.0, 291.0, 278.0, 298.0, 294.0, 288.0, 262.0, 263.0, 299.0, 283.0, 293.0, 291.0, 276.0, 294.0, 290.0, 289.0, 316.0, 311.0, 278.0, 295.0, 279.0, 300.0, 282.0, 300.0, 296.0, 291.0, 280.0, 250.0, 255.0, 270.0, 282.0, 302.0, 280.0, 299.0, 288.0, 294.0, 295.0, 284.0, 286.0, 290.0, 299.0, 283.0, 286.0, 290.0, 299.0, 283.0, 278.0, 304.0, 284.0, 298.0, 262.0, 263.0, 282.0, 297.0, 284.0, 292.0, 287.0, 289.0, 289.0, 293.0, 296.0, 288.0, 279.0, 300.0, 292.0, 290.0, 294.0, 285.0, 284.0, 286.0, 288.0, 285.0, 247.0, 235.0, 294.0, 279.0, 289.0, 287.0, 297.0, 285.0, 279.0, 297.0, 262.0, 265.0, 274.0, 296.0, 283.0, 290.0, 89.0, 91.0, 266.0, 258.0, 278.0, 304.0, 288.0, 279.0, 255.0, 267.0, 285.0, 288.0, 289.0, 290.0, 299.0, 277.0, 291.0, 288.0, 298.0, 284.0, 285.0, 294.0, 281.0, 301.0, 288.0, 291.0, 269.0, 253.0, 295.0, 287.0, 285.0, 288.0, 283.0, 278.0, 249.0, 273.0, 262.0, 263.0, 292.0, 281.0, 264.0, 261.0, 262.0, 257.0, 295.0, 278.0, 294.0, 279.0, 287.0, 297.0, 270.0, 303.0, 277.0, 299.0, 292.0, 290.0, 278.0, 252.0, 218.0, 247.0, 280.0, 290.0, 280.0, 299.0, 289.0, 290.0, 299.0, 291.0, 287.0, 295.0, 279.0, 294.0, 248.0, 282.0, 289.0, 293.0, 260.0, 259.0, 262.0, 251.0, 302.0, 274.0, 290.0, 289.0, 250.0, 275.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7100616240854177, "mean_inference_ms": 1.27656070500398, "mean_action_processing_ms": 0.13533752460087545, "mean_env_wait_ms": 0.8554818419261924, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5811200, "num_agent_steps_trained": 5811200, "num_env_steps_sampled": 2905600, "num_env_steps_trained": 2905600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2905600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5811200, "timers": {"training_iteration_time_ms": 3670.46, "learn_time_ms": 1136.875, "learn_throughput": 11258.933, "synch_weights_time_ms": 11.752}, "counters": {"num_env_steps_sampled": 2905600, "num_env_steps_trained": 2905600, "num_agent_steps_sampled": 5811200, "num_agent_steps_trained": 5811200}, "done": false, "episodes_total": 7264, "training_iteration": 227, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-15", "timestamp": 1666581315, "time_this_iter_s": 3.6734375953674316, "time_total_s": 883.5821957588196, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 883.5821957588196, "timesteps_since_restore": 0, "iterations_since_restore": 227, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.250000000000004, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 172.14, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.44, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.75, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.2, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.43, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 14.98, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.19, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.99, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 14.98, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.19, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.98, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.19, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002215281594544649, "policy_loss": -0.0025261661503463984, "vf_loss": 7.811648845672607, "vf_explained_var": 0.6087123155593872, "kl": 0.0020283572375774384, "entropy": 0.9405587911605835, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2918400, "num_env_steps_trained": 2918400, "num_agent_steps_sampled": 5836800, "num_agent_steps_trained": 5836800}, "sampler_results": {"episode_reward_max": 590.0, "episode_reward_min": 180.0, "episode_reward_mean": 558.54, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 304.0}, "policy_reward_mean": {"ppo": 279.27}, "custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 172.14, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.44, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.75, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.2, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.43, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 14.98, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.19, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.99, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 14.98, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.19, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.98, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.19, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 582.0, 576.0, 582.0, 582.0, 582.0, 525.0, 579.0, 576.0, 576.0, 582.0, 584.0, 579.0, 582.0, 579.0, 570.0, 573.0, 482.0, 573.0, 576.0, 582.0, 576.0, 527.0, 570.0, 573.0, 180.0, 524.0, 582.0, 567.0, 522.0, 573.0, 579.0, 576.0, 579.0, 582.0, 579.0, 582.0, 579.0, 522.0, 582.0, 573.0, 561.0, 522.0, 525.0, 573.0, 525.0, 519.0, 573.0, 573.0, 584.0, 573.0, 576.0, 582.0, 530.0, 465.0, 570.0, 579.0, 579.0, 590.0, 582.0, 573.0, 530.0, 582.0, 519.0, 513.0, 576.0, 579.0, 525.0, 573.0, 525.0, 573.0, 516.0, 579.0, 576.0, 576.0, 579.0, 576.0, 573.0, 570.0, 576.0, 573.0, 579.0, 570.0, 525.0, 584.0, 579.0, 519.0, 522.0, 530.0, 581.0, 519.0, 576.0, 582.0, 570.0, 573.0, 579.0, 527.0, 576.0, 570.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 290.0, 299.0, 283.0, 286.0, 290.0, 299.0, 283.0, 278.0, 304.0, 284.0, 298.0, 262.0, 263.0, 282.0, 297.0, 284.0, 292.0, 287.0, 289.0, 289.0, 293.0, 296.0, 288.0, 279.0, 300.0, 292.0, 290.0, 294.0, 285.0, 284.0, 286.0, 288.0, 285.0, 247.0, 235.0, 294.0, 279.0, 289.0, 287.0, 297.0, 285.0, 279.0, 297.0, 262.0, 265.0, 274.0, 296.0, 283.0, 290.0, 89.0, 91.0, 266.0, 258.0, 278.0, 304.0, 288.0, 279.0, 255.0, 267.0, 285.0, 288.0, 289.0, 290.0, 299.0, 277.0, 291.0, 288.0, 298.0, 284.0, 285.0, 294.0, 281.0, 301.0, 288.0, 291.0, 269.0, 253.0, 295.0, 287.0, 285.0, 288.0, 283.0, 278.0, 249.0, 273.0, 262.0, 263.0, 292.0, 281.0, 264.0, 261.0, 262.0, 257.0, 295.0, 278.0, 294.0, 279.0, 287.0, 297.0, 270.0, 303.0, 277.0, 299.0, 292.0, 290.0, 278.0, 252.0, 218.0, 247.0, 280.0, 290.0, 280.0, 299.0, 289.0, 290.0, 299.0, 291.0, 287.0, 295.0, 279.0, 294.0, 248.0, 282.0, 289.0, 293.0, 260.0, 259.0, 262.0, 251.0, 302.0, 274.0, 290.0, 289.0, 250.0, 275.0, 303.0, 270.0, 263.0, 262.0, 292.0, 281.0, 255.0, 261.0, 294.0, 285.0, 289.0, 287.0, 294.0, 282.0, 293.0, 286.0, 304.0, 272.0, 294.0, 279.0, 276.0, 294.0, 280.0, 296.0, 278.0, 295.0, 281.0, 298.0, 286.0, 284.0, 259.0, 266.0, 299.0, 285.0, 281.0, 298.0, 264.0, 255.0, 259.0, 263.0, 261.0, 269.0, 285.0, 296.0, 261.0, 258.0, 281.0, 295.0, 294.0, 288.0, 282.0, 288.0, 292.0, 281.0, 299.0, 280.0, 251.0, 276.0, 284.0, 292.0, 298.0, 272.0, 272.0, 253.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7099584001922209, "mean_inference_ms": 1.276192459329255, "mean_action_processing_ms": 0.13532474653956864, "mean_env_wait_ms": 0.8552878576602316, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 590.0, "episode_reward_min": 180.0, "episode_reward_mean": 558.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 304.0}, "policy_reward_mean": {"ppo": 279.27}, "hist_stats": {"episode_reward": [576.0, 582.0, 576.0, 582.0, 582.0, 582.0, 525.0, 579.0, 576.0, 576.0, 582.0, 584.0, 579.0, 582.0, 579.0, 570.0, 573.0, 482.0, 573.0, 576.0, 582.0, 576.0, 527.0, 570.0, 573.0, 180.0, 524.0, 582.0, 567.0, 522.0, 573.0, 579.0, 576.0, 579.0, 582.0, 579.0, 582.0, 579.0, 522.0, 582.0, 573.0, 561.0, 522.0, 525.0, 573.0, 525.0, 519.0, 573.0, 573.0, 584.0, 573.0, 576.0, 582.0, 530.0, 465.0, 570.0, 579.0, 579.0, 590.0, 582.0, 573.0, 530.0, 582.0, 519.0, 513.0, 576.0, 579.0, 525.0, 573.0, 525.0, 573.0, 516.0, 579.0, 576.0, 576.0, 579.0, 576.0, 573.0, 570.0, 576.0, 573.0, 579.0, 570.0, 525.0, 584.0, 579.0, 519.0, 522.0, 530.0, 581.0, 519.0, 576.0, 582.0, 570.0, 573.0, 579.0, 527.0, 576.0, 570.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 290.0, 299.0, 283.0, 286.0, 290.0, 299.0, 283.0, 278.0, 304.0, 284.0, 298.0, 262.0, 263.0, 282.0, 297.0, 284.0, 292.0, 287.0, 289.0, 289.0, 293.0, 296.0, 288.0, 279.0, 300.0, 292.0, 290.0, 294.0, 285.0, 284.0, 286.0, 288.0, 285.0, 247.0, 235.0, 294.0, 279.0, 289.0, 287.0, 297.0, 285.0, 279.0, 297.0, 262.0, 265.0, 274.0, 296.0, 283.0, 290.0, 89.0, 91.0, 266.0, 258.0, 278.0, 304.0, 288.0, 279.0, 255.0, 267.0, 285.0, 288.0, 289.0, 290.0, 299.0, 277.0, 291.0, 288.0, 298.0, 284.0, 285.0, 294.0, 281.0, 301.0, 288.0, 291.0, 269.0, 253.0, 295.0, 287.0, 285.0, 288.0, 283.0, 278.0, 249.0, 273.0, 262.0, 263.0, 292.0, 281.0, 264.0, 261.0, 262.0, 257.0, 295.0, 278.0, 294.0, 279.0, 287.0, 297.0, 270.0, 303.0, 277.0, 299.0, 292.0, 290.0, 278.0, 252.0, 218.0, 247.0, 280.0, 290.0, 280.0, 299.0, 289.0, 290.0, 299.0, 291.0, 287.0, 295.0, 279.0, 294.0, 248.0, 282.0, 289.0, 293.0, 260.0, 259.0, 262.0, 251.0, 302.0, 274.0, 290.0, 289.0, 250.0, 275.0, 303.0, 270.0, 263.0, 262.0, 292.0, 281.0, 255.0, 261.0, 294.0, 285.0, 289.0, 287.0, 294.0, 282.0, 293.0, 286.0, 304.0, 272.0, 294.0, 279.0, 276.0, 294.0, 280.0, 296.0, 278.0, 295.0, 281.0, 298.0, 286.0, 284.0, 259.0, 266.0, 299.0, 285.0, 281.0, 298.0, 264.0, 255.0, 259.0, 263.0, 261.0, 269.0, 285.0, 296.0, 261.0, 258.0, 281.0, 295.0, 294.0, 288.0, 282.0, 288.0, 292.0, 281.0, 299.0, 280.0, 251.0, 276.0, 284.0, 292.0, 298.0, 272.0, 272.0, 253.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7099584001922209, "mean_inference_ms": 1.276192459329255, "mean_action_processing_ms": 0.13532474653956864, "mean_env_wait_ms": 0.8552878576602316, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5836800, "num_agent_steps_trained": 5836800, "num_env_steps_sampled": 2918400, "num_env_steps_trained": 2918400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2918400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5836800, "timers": {"training_iteration_time_ms": 3671.44, "learn_time_ms": 1137.158, "learn_throughput": 11256.132, "synch_weights_time_ms": 12.303}, "counters": {"num_env_steps_sampled": 2918400, "num_env_steps_trained": 2918400, "num_agent_steps_sampled": 5836800, "num_agent_steps_trained": 5836800}, "done": false, "episodes_total": 7296, "training_iteration": 228, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-19", "timestamp": 1666581319, "time_this_iter_s": 3.7271084785461426, "time_total_s": 887.3093042373657, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 887.3093042373657, "timesteps_since_restore": 0, "iterations_since_restore": 228, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.580000000000002, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 193.6, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 172.48, "shaped_reward_min": 145, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.36, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.82, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.06, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.49, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.13, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.61, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.79, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.22, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.73, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.1, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.13, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.13, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.000783019233494997, "policy_loss": 0.00048736238386482, "vf_loss": 7.665143013000488, "vf_explained_var": 0.6067088842391968, "kl": 0.0021572881378233433, "entropy": 0.9417125582695007, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2931200, "num_env_steps_trained": 2931200, "num_agent_steps_sampled": 5862400, "num_agent_steps_trained": 5862400}, "sampler_results": {"episode_reward_max": 590.0, "episode_reward_min": 465.0, "episode_reward_mean": 559.68, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 218.0}, "policy_reward_max": {"ppo": 304.0}, "policy_reward_mean": {"ppo": 279.84}, "custom_metrics": {"sparse_reward_mean": 193.6, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 172.48, "shaped_reward_min": 145, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.36, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.82, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.06, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.49, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.13, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.61, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.79, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.22, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.73, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.1, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.13, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.13, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 579.0, 582.0, 579.0, 582.0, 579.0, 522.0, 582.0, 573.0, 561.0, 522.0, 525.0, 573.0, 525.0, 519.0, 573.0, 573.0, 584.0, 573.0, 576.0, 582.0, 530.0, 465.0, 570.0, 579.0, 579.0, 590.0, 582.0, 573.0, 530.0, 582.0, 519.0, 513.0, 576.0, 579.0, 525.0, 573.0, 525.0, 573.0, 516.0, 579.0, 576.0, 576.0, 579.0, 576.0, 573.0, 570.0, 576.0, 573.0, 579.0, 570.0, 525.0, 584.0, 579.0, 519.0, 522.0, 530.0, 581.0, 519.0, 576.0, 582.0, 570.0, 573.0, 579.0, 527.0, 576.0, 570.0, 525.0, 573.0, 530.0, 525.0, 525.0, 570.0, 530.0, 576.0, 573.0, 587.0, 570.0, 570.0, 582.0, 576.0, 527.0, 570.0, 570.0, 582.0, 582.0, 576.0, 573.0, 576.0, 570.0, 570.0, 522.0, 573.0, 530.0, 521.0, 530.0, 487.0, 576.0, 579.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [299.0, 277.0, 291.0, 288.0, 298.0, 284.0, 285.0, 294.0, 281.0, 301.0, 288.0, 291.0, 269.0, 253.0, 295.0, 287.0, 285.0, 288.0, 283.0, 278.0, 249.0, 273.0, 262.0, 263.0, 292.0, 281.0, 264.0, 261.0, 262.0, 257.0, 295.0, 278.0, 294.0, 279.0, 287.0, 297.0, 270.0, 303.0, 277.0, 299.0, 292.0, 290.0, 278.0, 252.0, 218.0, 247.0, 280.0, 290.0, 280.0, 299.0, 289.0, 290.0, 299.0, 291.0, 287.0, 295.0, 279.0, 294.0, 248.0, 282.0, 289.0, 293.0, 260.0, 259.0, 262.0, 251.0, 302.0, 274.0, 290.0, 289.0, 250.0, 275.0, 303.0, 270.0, 263.0, 262.0, 292.0, 281.0, 255.0, 261.0, 294.0, 285.0, 289.0, 287.0, 294.0, 282.0, 293.0, 286.0, 304.0, 272.0, 294.0, 279.0, 276.0, 294.0, 280.0, 296.0, 278.0, 295.0, 281.0, 298.0, 286.0, 284.0, 259.0, 266.0, 299.0, 285.0, 281.0, 298.0, 264.0, 255.0, 259.0, 263.0, 261.0, 269.0, 285.0, 296.0, 261.0, 258.0, 281.0, 295.0, 294.0, 288.0, 282.0, 288.0, 292.0, 281.0, 299.0, 280.0, 251.0, 276.0, 284.0, 292.0, 298.0, 272.0, 272.0, 253.0, 289.0, 284.0, 267.0, 263.0, 265.0, 260.0, 267.0, 258.0, 284.0, 286.0, 265.0, 265.0, 298.0, 278.0, 290.0, 283.0, 287.0, 300.0, 297.0, 273.0, 277.0, 293.0, 300.0, 282.0, 278.0, 298.0, 264.0, 263.0, 295.0, 275.0, 288.0, 282.0, 300.0, 282.0, 293.0, 289.0, 282.0, 294.0, 280.0, 293.0, 282.0, 294.0, 279.0, 291.0, 281.0, 289.0, 254.0, 268.0, 286.0, 287.0, 265.0, 265.0, 262.0, 259.0, 269.0, 261.0, 244.0, 243.0, 286.0, 290.0, 290.0, 289.0, 300.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7098545863064848, "mean_inference_ms": 1.2758538422651737, "mean_action_processing_ms": 0.13531428434171733, "mean_env_wait_ms": 0.8551108870485021, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 590.0, "episode_reward_min": 465.0, "episode_reward_mean": 559.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 218.0}, "policy_reward_max": {"ppo": 304.0}, "policy_reward_mean": {"ppo": 279.84}, "hist_stats": {"episode_reward": [576.0, 579.0, 582.0, 579.0, 582.0, 579.0, 522.0, 582.0, 573.0, 561.0, 522.0, 525.0, 573.0, 525.0, 519.0, 573.0, 573.0, 584.0, 573.0, 576.0, 582.0, 530.0, 465.0, 570.0, 579.0, 579.0, 590.0, 582.0, 573.0, 530.0, 582.0, 519.0, 513.0, 576.0, 579.0, 525.0, 573.0, 525.0, 573.0, 516.0, 579.0, 576.0, 576.0, 579.0, 576.0, 573.0, 570.0, 576.0, 573.0, 579.0, 570.0, 525.0, 584.0, 579.0, 519.0, 522.0, 530.0, 581.0, 519.0, 576.0, 582.0, 570.0, 573.0, 579.0, 527.0, 576.0, 570.0, 525.0, 573.0, 530.0, 525.0, 525.0, 570.0, 530.0, 576.0, 573.0, 587.0, 570.0, 570.0, 582.0, 576.0, 527.0, 570.0, 570.0, 582.0, 582.0, 576.0, 573.0, 576.0, 570.0, 570.0, 522.0, 573.0, 530.0, 521.0, 530.0, 487.0, 576.0, 579.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [299.0, 277.0, 291.0, 288.0, 298.0, 284.0, 285.0, 294.0, 281.0, 301.0, 288.0, 291.0, 269.0, 253.0, 295.0, 287.0, 285.0, 288.0, 283.0, 278.0, 249.0, 273.0, 262.0, 263.0, 292.0, 281.0, 264.0, 261.0, 262.0, 257.0, 295.0, 278.0, 294.0, 279.0, 287.0, 297.0, 270.0, 303.0, 277.0, 299.0, 292.0, 290.0, 278.0, 252.0, 218.0, 247.0, 280.0, 290.0, 280.0, 299.0, 289.0, 290.0, 299.0, 291.0, 287.0, 295.0, 279.0, 294.0, 248.0, 282.0, 289.0, 293.0, 260.0, 259.0, 262.0, 251.0, 302.0, 274.0, 290.0, 289.0, 250.0, 275.0, 303.0, 270.0, 263.0, 262.0, 292.0, 281.0, 255.0, 261.0, 294.0, 285.0, 289.0, 287.0, 294.0, 282.0, 293.0, 286.0, 304.0, 272.0, 294.0, 279.0, 276.0, 294.0, 280.0, 296.0, 278.0, 295.0, 281.0, 298.0, 286.0, 284.0, 259.0, 266.0, 299.0, 285.0, 281.0, 298.0, 264.0, 255.0, 259.0, 263.0, 261.0, 269.0, 285.0, 296.0, 261.0, 258.0, 281.0, 295.0, 294.0, 288.0, 282.0, 288.0, 292.0, 281.0, 299.0, 280.0, 251.0, 276.0, 284.0, 292.0, 298.0, 272.0, 272.0, 253.0, 289.0, 284.0, 267.0, 263.0, 265.0, 260.0, 267.0, 258.0, 284.0, 286.0, 265.0, 265.0, 298.0, 278.0, 290.0, 283.0, 287.0, 300.0, 297.0, 273.0, 277.0, 293.0, 300.0, 282.0, 278.0, 298.0, 264.0, 263.0, 295.0, 275.0, 288.0, 282.0, 300.0, 282.0, 293.0, 289.0, 282.0, 294.0, 280.0, 293.0, 282.0, 294.0, 279.0, 291.0, 281.0, 289.0, 254.0, 268.0, 286.0, 287.0, 265.0, 265.0, 262.0, 259.0, 269.0, 261.0, 244.0, 243.0, 286.0, 290.0, 290.0, 289.0, 300.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7098545863064848, "mean_inference_ms": 1.2758538422651737, "mean_action_processing_ms": 0.13531428434171733, "mean_env_wait_ms": 0.8551108870485021, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5862400, "num_agent_steps_trained": 5862400, "num_env_steps_sampled": 2931200, "num_env_steps_trained": 2931200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2931200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5862400, "timers": {"training_iteration_time_ms": 3680.392, "learn_time_ms": 1138.268, "learn_throughput": 11245.159, "synch_weights_time_ms": 11.92}, "counters": {"num_env_steps_sampled": 2931200, "num_env_steps_trained": 2931200, "num_agent_steps_sampled": 5862400, "num_agent_steps_trained": 5862400}, "done": false, "episodes_total": 7328, "training_iteration": 229, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-23", "timestamp": 1666581323, "time_this_iter_s": 3.6412642002105713, "time_total_s": 890.9505684375763, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 890.9505684375763, "timesteps_since_restore": 0, "iterations_since_restore": 229, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.46666666666667, "ram_util_percent": 10.633333333333333}}
+{"custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.92, "shaped_reward_min": 153, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.31, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.51, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.05, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.27, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.99, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.61, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.27, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.31, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.99, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.99, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00044891354627907276, "policy_loss": 0.000144109595566988, "vf_loss": 7.779896259307861, "vf_explained_var": 0.5919172167778015, "kl": 0.002152523258700967, "entropy": 0.9463684558868408, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2944000, "num_env_steps_trained": 2944000, "num_agent_steps_sampled": 5888000, "num_agent_steps_trained": 5888000}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 478.0, "episode_reward_mean": 558.32, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 238.0}, "policy_reward_max": {"ppo": 304.0}, "policy_reward_mean": {"ppo": 279.16}, "custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.92, "shaped_reward_min": 153, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.31, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.51, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.05, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.27, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.99, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.61, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.27, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.31, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.99, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.99, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [513.0, 576.0, 579.0, 525.0, 573.0, 525.0, 573.0, 516.0, 579.0, 576.0, 576.0, 579.0, 576.0, 573.0, 570.0, 576.0, 573.0, 579.0, 570.0, 525.0, 584.0, 579.0, 519.0, 522.0, 530.0, 581.0, 519.0, 576.0, 582.0, 570.0, 573.0, 579.0, 527.0, 576.0, 570.0, 525.0, 573.0, 530.0, 525.0, 525.0, 570.0, 530.0, 576.0, 573.0, 587.0, 570.0, 570.0, 582.0, 576.0, 527.0, 570.0, 570.0, 582.0, 582.0, 576.0, 573.0, 576.0, 570.0, 570.0, 522.0, 573.0, 530.0, 521.0, 530.0, 487.0, 576.0, 579.0, 584.0, 584.0, 573.0, 573.0, 530.0, 570.0, 582.0, 576.0, 530.0, 573.0, 530.0, 576.0, 573.0, 570.0, 573.0, 579.0, 573.0, 522.0, 525.0, 576.0, 573.0, 570.0, 530.0, 525.0, 570.0, 573.0, 478.0, 573.0, 533.0, 573.0, 570.0, 525.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 251.0, 302.0, 274.0, 290.0, 289.0, 250.0, 275.0, 303.0, 270.0, 263.0, 262.0, 292.0, 281.0, 255.0, 261.0, 294.0, 285.0, 289.0, 287.0, 294.0, 282.0, 293.0, 286.0, 304.0, 272.0, 294.0, 279.0, 276.0, 294.0, 280.0, 296.0, 278.0, 295.0, 281.0, 298.0, 286.0, 284.0, 259.0, 266.0, 299.0, 285.0, 281.0, 298.0, 264.0, 255.0, 259.0, 263.0, 261.0, 269.0, 285.0, 296.0, 261.0, 258.0, 281.0, 295.0, 294.0, 288.0, 282.0, 288.0, 292.0, 281.0, 299.0, 280.0, 251.0, 276.0, 284.0, 292.0, 298.0, 272.0, 272.0, 253.0, 289.0, 284.0, 267.0, 263.0, 265.0, 260.0, 267.0, 258.0, 284.0, 286.0, 265.0, 265.0, 298.0, 278.0, 290.0, 283.0, 287.0, 300.0, 297.0, 273.0, 277.0, 293.0, 300.0, 282.0, 278.0, 298.0, 264.0, 263.0, 295.0, 275.0, 288.0, 282.0, 300.0, 282.0, 293.0, 289.0, 282.0, 294.0, 280.0, 293.0, 282.0, 294.0, 279.0, 291.0, 281.0, 289.0, 254.0, 268.0, 286.0, 287.0, 265.0, 265.0, 262.0, 259.0, 269.0, 261.0, 244.0, 243.0, 286.0, 290.0, 290.0, 289.0, 300.0, 284.0, 289.0, 295.0, 294.0, 279.0, 287.0, 286.0, 272.0, 258.0, 274.0, 296.0, 285.0, 297.0, 290.0, 286.0, 274.0, 256.0, 287.0, 286.0, 259.0, 271.0, 279.0, 297.0, 295.0, 278.0, 283.0, 287.0, 285.0, 288.0, 287.0, 292.0, 294.0, 279.0, 264.0, 258.0, 275.0, 250.0, 284.0, 292.0, 288.0, 285.0, 276.0, 294.0, 256.0, 274.0, 263.0, 262.0, 280.0, 290.0, 282.0, 291.0, 238.0, 240.0, 303.0, 270.0, 263.0, 270.0, 287.0, 286.0, 295.0, 275.0, 273.0, 252.0, 252.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7097215694937031, "mean_inference_ms": 1.2755180095867495, "mean_action_processing_ms": 0.13530297641326697, "mean_env_wait_ms": 0.8549260743234761, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 478.0, "episode_reward_mean": 558.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 238.0}, "policy_reward_max": {"ppo": 304.0}, "policy_reward_mean": {"ppo": 279.16}, "hist_stats": {"episode_reward": [513.0, 576.0, 579.0, 525.0, 573.0, 525.0, 573.0, 516.0, 579.0, 576.0, 576.0, 579.0, 576.0, 573.0, 570.0, 576.0, 573.0, 579.0, 570.0, 525.0, 584.0, 579.0, 519.0, 522.0, 530.0, 581.0, 519.0, 576.0, 582.0, 570.0, 573.0, 579.0, 527.0, 576.0, 570.0, 525.0, 573.0, 530.0, 525.0, 525.0, 570.0, 530.0, 576.0, 573.0, 587.0, 570.0, 570.0, 582.0, 576.0, 527.0, 570.0, 570.0, 582.0, 582.0, 576.0, 573.0, 576.0, 570.0, 570.0, 522.0, 573.0, 530.0, 521.0, 530.0, 487.0, 576.0, 579.0, 584.0, 584.0, 573.0, 573.0, 530.0, 570.0, 582.0, 576.0, 530.0, 573.0, 530.0, 576.0, 573.0, 570.0, 573.0, 579.0, 573.0, 522.0, 525.0, 576.0, 573.0, 570.0, 530.0, 525.0, 570.0, 573.0, 478.0, 573.0, 533.0, 573.0, 570.0, 525.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 251.0, 302.0, 274.0, 290.0, 289.0, 250.0, 275.0, 303.0, 270.0, 263.0, 262.0, 292.0, 281.0, 255.0, 261.0, 294.0, 285.0, 289.0, 287.0, 294.0, 282.0, 293.0, 286.0, 304.0, 272.0, 294.0, 279.0, 276.0, 294.0, 280.0, 296.0, 278.0, 295.0, 281.0, 298.0, 286.0, 284.0, 259.0, 266.0, 299.0, 285.0, 281.0, 298.0, 264.0, 255.0, 259.0, 263.0, 261.0, 269.0, 285.0, 296.0, 261.0, 258.0, 281.0, 295.0, 294.0, 288.0, 282.0, 288.0, 292.0, 281.0, 299.0, 280.0, 251.0, 276.0, 284.0, 292.0, 298.0, 272.0, 272.0, 253.0, 289.0, 284.0, 267.0, 263.0, 265.0, 260.0, 267.0, 258.0, 284.0, 286.0, 265.0, 265.0, 298.0, 278.0, 290.0, 283.0, 287.0, 300.0, 297.0, 273.0, 277.0, 293.0, 300.0, 282.0, 278.0, 298.0, 264.0, 263.0, 295.0, 275.0, 288.0, 282.0, 300.0, 282.0, 293.0, 289.0, 282.0, 294.0, 280.0, 293.0, 282.0, 294.0, 279.0, 291.0, 281.0, 289.0, 254.0, 268.0, 286.0, 287.0, 265.0, 265.0, 262.0, 259.0, 269.0, 261.0, 244.0, 243.0, 286.0, 290.0, 290.0, 289.0, 300.0, 284.0, 289.0, 295.0, 294.0, 279.0, 287.0, 286.0, 272.0, 258.0, 274.0, 296.0, 285.0, 297.0, 290.0, 286.0, 274.0, 256.0, 287.0, 286.0, 259.0, 271.0, 279.0, 297.0, 295.0, 278.0, 283.0, 287.0, 285.0, 288.0, 287.0, 292.0, 294.0, 279.0, 264.0, 258.0, 275.0, 250.0, 284.0, 292.0, 288.0, 285.0, 276.0, 294.0, 256.0, 274.0, 263.0, 262.0, 280.0, 290.0, 282.0, 291.0, 238.0, 240.0, 303.0, 270.0, 263.0, 270.0, 287.0, 286.0, 295.0, 275.0, 273.0, 252.0, 252.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7097215694937031, "mean_inference_ms": 1.2755180095867495, "mean_action_processing_ms": 0.13530297641326697, "mean_env_wait_ms": 0.8549260743234761, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5888000, "num_agent_steps_trained": 5888000, "num_env_steps_sampled": 2944000, "num_env_steps_trained": 2944000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2944000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5888000, "timers": {"training_iteration_time_ms": 3675.836, "learn_time_ms": 1136.519, "learn_throughput": 11262.463, "synch_weights_time_ms": 11.787}, "counters": {"num_env_steps_sampled": 2944000, "num_env_steps_trained": 2944000, "num_agent_steps_sampled": 5888000, "num_agent_steps_trained": 5888000}, "done": false, "episodes_total": 7360, "training_iteration": 230, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-26", "timestamp": 1666581326, "time_this_iter_s": 3.639521360397339, "time_total_s": 894.5900897979736, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 894.5900897979736, "timesteps_since_restore": 0, "iterations_since_restore": 230, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.68, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 192.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.69, "shaped_reward_min": 150, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.61, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.96, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.41, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.76, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.17, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.53, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.41, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.04, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.07, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.17, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.53, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.17, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.53, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0030337772332131863, "policy_loss": -0.0033358775544911623, "vf_loss": 7.728958606719971, "vf_explained_var": 0.5867734551429749, "kl": 0.002050921320915222, "entropy": 0.9415899515151978, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2956800, "num_env_steps_trained": 2956800, "num_agent_steps_sampled": 5913600, "num_agent_steps_trained": 5913600}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 478.0, "episode_reward_mean": 557.29, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 236.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 278.645}, "custom_metrics": {"sparse_reward_mean": 192.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.69, "shaped_reward_min": 150, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.61, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.96, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.41, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.76, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.17, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.53, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.41, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.04, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.07, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.17, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.53, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.17, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.53, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [527.0, 576.0, 570.0, 525.0, 573.0, 530.0, 525.0, 525.0, 570.0, 530.0, 576.0, 573.0, 587.0, 570.0, 570.0, 582.0, 576.0, 527.0, 570.0, 570.0, 582.0, 582.0, 576.0, 573.0, 576.0, 570.0, 570.0, 522.0, 573.0, 530.0, 521.0, 530.0, 487.0, 576.0, 579.0, 584.0, 584.0, 573.0, 573.0, 530.0, 570.0, 582.0, 576.0, 530.0, 573.0, 530.0, 576.0, 573.0, 570.0, 573.0, 579.0, 573.0, 522.0, 525.0, 576.0, 573.0, 570.0, 530.0, 525.0, 570.0, 573.0, 478.0, 573.0, 533.0, 573.0, 570.0, 525.0, 522.0, 576.0, 579.0, 573.0, 581.0, 525.0, 573.0, 570.0, 570.0, 527.0, 484.0, 576.0, 582.0, 573.0, 530.0, 567.0, 519.0, 573.0, 579.0, 582.0, 570.0, 570.0, 570.0, 527.0, 522.0, 582.0, 510.0, 530.0, 522.0, 582.0, 576.0, 573.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [251.0, 276.0, 284.0, 292.0, 298.0, 272.0, 272.0, 253.0, 289.0, 284.0, 267.0, 263.0, 265.0, 260.0, 267.0, 258.0, 284.0, 286.0, 265.0, 265.0, 298.0, 278.0, 290.0, 283.0, 287.0, 300.0, 297.0, 273.0, 277.0, 293.0, 300.0, 282.0, 278.0, 298.0, 264.0, 263.0, 295.0, 275.0, 288.0, 282.0, 300.0, 282.0, 293.0, 289.0, 282.0, 294.0, 280.0, 293.0, 282.0, 294.0, 279.0, 291.0, 281.0, 289.0, 254.0, 268.0, 286.0, 287.0, 265.0, 265.0, 262.0, 259.0, 269.0, 261.0, 244.0, 243.0, 286.0, 290.0, 290.0, 289.0, 300.0, 284.0, 289.0, 295.0, 294.0, 279.0, 287.0, 286.0, 272.0, 258.0, 274.0, 296.0, 285.0, 297.0, 290.0, 286.0, 274.0, 256.0, 287.0, 286.0, 259.0, 271.0, 279.0, 297.0, 295.0, 278.0, 283.0, 287.0, 285.0, 288.0, 287.0, 292.0, 294.0, 279.0, 264.0, 258.0, 275.0, 250.0, 284.0, 292.0, 288.0, 285.0, 276.0, 294.0, 256.0, 274.0, 263.0, 262.0, 280.0, 290.0, 282.0, 291.0, 238.0, 240.0, 303.0, 270.0, 263.0, 270.0, 287.0, 286.0, 295.0, 275.0, 273.0, 252.0, 252.0, 270.0, 282.0, 294.0, 297.0, 282.0, 286.0, 287.0, 298.0, 283.0, 260.0, 265.0, 293.0, 280.0, 281.0, 289.0, 268.0, 302.0, 270.0, 257.0, 236.0, 248.0, 285.0, 291.0, 287.0, 295.0, 286.0, 287.0, 256.0, 274.0, 286.0, 281.0, 253.0, 266.0, 276.0, 297.0, 292.0, 287.0, 291.0, 291.0, 279.0, 291.0, 288.0, 282.0, 285.0, 285.0, 257.0, 270.0, 266.0, 256.0, 294.0, 288.0, 263.0, 247.0, 263.0, 267.0, 252.0, 270.0, 285.0, 297.0, 291.0, 285.0, 284.0, 289.0, 291.0, 279.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7095770796673102, "mean_inference_ms": 1.2751883468459717, "mean_action_processing_ms": 0.135291247180748, "mean_env_wait_ms": 0.8547402841339049, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 478.0, "episode_reward_mean": 557.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 236.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 278.645}, "hist_stats": {"episode_reward": [527.0, 576.0, 570.0, 525.0, 573.0, 530.0, 525.0, 525.0, 570.0, 530.0, 576.0, 573.0, 587.0, 570.0, 570.0, 582.0, 576.0, 527.0, 570.0, 570.0, 582.0, 582.0, 576.0, 573.0, 576.0, 570.0, 570.0, 522.0, 573.0, 530.0, 521.0, 530.0, 487.0, 576.0, 579.0, 584.0, 584.0, 573.0, 573.0, 530.0, 570.0, 582.0, 576.0, 530.0, 573.0, 530.0, 576.0, 573.0, 570.0, 573.0, 579.0, 573.0, 522.0, 525.0, 576.0, 573.0, 570.0, 530.0, 525.0, 570.0, 573.0, 478.0, 573.0, 533.0, 573.0, 570.0, 525.0, 522.0, 576.0, 579.0, 573.0, 581.0, 525.0, 573.0, 570.0, 570.0, 527.0, 484.0, 576.0, 582.0, 573.0, 530.0, 567.0, 519.0, 573.0, 579.0, 582.0, 570.0, 570.0, 570.0, 527.0, 522.0, 582.0, 510.0, 530.0, 522.0, 582.0, 576.0, 573.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [251.0, 276.0, 284.0, 292.0, 298.0, 272.0, 272.0, 253.0, 289.0, 284.0, 267.0, 263.0, 265.0, 260.0, 267.0, 258.0, 284.0, 286.0, 265.0, 265.0, 298.0, 278.0, 290.0, 283.0, 287.0, 300.0, 297.0, 273.0, 277.0, 293.0, 300.0, 282.0, 278.0, 298.0, 264.0, 263.0, 295.0, 275.0, 288.0, 282.0, 300.0, 282.0, 293.0, 289.0, 282.0, 294.0, 280.0, 293.0, 282.0, 294.0, 279.0, 291.0, 281.0, 289.0, 254.0, 268.0, 286.0, 287.0, 265.0, 265.0, 262.0, 259.0, 269.0, 261.0, 244.0, 243.0, 286.0, 290.0, 290.0, 289.0, 300.0, 284.0, 289.0, 295.0, 294.0, 279.0, 287.0, 286.0, 272.0, 258.0, 274.0, 296.0, 285.0, 297.0, 290.0, 286.0, 274.0, 256.0, 287.0, 286.0, 259.0, 271.0, 279.0, 297.0, 295.0, 278.0, 283.0, 287.0, 285.0, 288.0, 287.0, 292.0, 294.0, 279.0, 264.0, 258.0, 275.0, 250.0, 284.0, 292.0, 288.0, 285.0, 276.0, 294.0, 256.0, 274.0, 263.0, 262.0, 280.0, 290.0, 282.0, 291.0, 238.0, 240.0, 303.0, 270.0, 263.0, 270.0, 287.0, 286.0, 295.0, 275.0, 273.0, 252.0, 252.0, 270.0, 282.0, 294.0, 297.0, 282.0, 286.0, 287.0, 298.0, 283.0, 260.0, 265.0, 293.0, 280.0, 281.0, 289.0, 268.0, 302.0, 270.0, 257.0, 236.0, 248.0, 285.0, 291.0, 287.0, 295.0, 286.0, 287.0, 256.0, 274.0, 286.0, 281.0, 253.0, 266.0, 276.0, 297.0, 292.0, 287.0, 291.0, 291.0, 279.0, 291.0, 288.0, 282.0, 285.0, 285.0, 257.0, 270.0, 266.0, 256.0, 294.0, 288.0, 263.0, 247.0, 263.0, 267.0, 252.0, 270.0, 285.0, 297.0, 291.0, 285.0, 284.0, 289.0, 291.0, 279.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7095770796673102, "mean_inference_ms": 1.2751883468459717, "mean_action_processing_ms": 0.135291247180748, "mean_env_wait_ms": 0.8547402841339049, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5913600, "num_agent_steps_trained": 5913600, "num_env_steps_sampled": 2956800, "num_env_steps_trained": 2956800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2956800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5913600, "timers": {"training_iteration_time_ms": 3666.75, "learn_time_ms": 1122.362, "learn_throughput": 11404.518, "synch_weights_time_ms": 11.831}, "counters": {"num_env_steps_sampled": 2956800, "num_env_steps_trained": 2956800, "num_agent_steps_sampled": 5913600, "num_agent_steps_trained": 5913600}, "done": false, "episodes_total": 7392, "training_iteration": 231, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-30", "timestamp": 1666581330, "time_this_iter_s": 3.5793042182922363, "time_total_s": 898.1693940162659, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 898.1693940162659, "timesteps_since_restore": 0, "iterations_since_restore": 231, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.96, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 192.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 171.14, "shaped_reward_min": 122, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.58, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.0, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.39, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.66, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.16, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.55, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.28, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.06, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.81, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.99, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.16, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.55, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.16, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.55, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002754440763965249, "policy_loss": -0.0030683977529406548, "vf_loss": 7.847390651702881, "vf_explained_var": 0.5783874988555908, "kl": 0.0022289445623755455, "entropy": 0.9415615797042847, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2969600, "num_env_steps_trained": 2969600, "num_agent_steps_sampled": 5939200, "num_agent_steps_trained": 5939200}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 402.0, "episode_reward_mean": 555.54, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 199.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 277.77}, "custom_metrics": {"sparse_reward_mean": 192.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 171.14, "shaped_reward_min": 122, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.58, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.0, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.39, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.66, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.16, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.55, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.28, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.06, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.81, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.99, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.16, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.55, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.16, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.55, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [487.0, 576.0, 579.0, 584.0, 584.0, 573.0, 573.0, 530.0, 570.0, 582.0, 576.0, 530.0, 573.0, 530.0, 576.0, 573.0, 570.0, 573.0, 579.0, 573.0, 522.0, 525.0, 576.0, 573.0, 570.0, 530.0, 525.0, 570.0, 573.0, 478.0, 573.0, 533.0, 573.0, 570.0, 525.0, 522.0, 576.0, 579.0, 573.0, 581.0, 525.0, 573.0, 570.0, 570.0, 527.0, 484.0, 576.0, 582.0, 573.0, 530.0, 567.0, 519.0, 573.0, 579.0, 582.0, 570.0, 570.0, 570.0, 527.0, 522.0, 582.0, 510.0, 530.0, 522.0, 582.0, 576.0, 573.0, 570.0, 576.0, 516.0, 530.0, 579.0, 573.0, 570.0, 576.0, 587.0, 561.0, 487.0, 573.0, 402.0, 516.0, 573.0, 579.0, 576.0, 527.0, 579.0, 576.0, 516.0, 579.0, 530.0, 573.0, 567.0, 530.0, 582.0, 570.0, 576.0, 573.0, 576.0, 527.0, 527.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [244.0, 243.0, 286.0, 290.0, 290.0, 289.0, 300.0, 284.0, 289.0, 295.0, 294.0, 279.0, 287.0, 286.0, 272.0, 258.0, 274.0, 296.0, 285.0, 297.0, 290.0, 286.0, 274.0, 256.0, 287.0, 286.0, 259.0, 271.0, 279.0, 297.0, 295.0, 278.0, 283.0, 287.0, 285.0, 288.0, 287.0, 292.0, 294.0, 279.0, 264.0, 258.0, 275.0, 250.0, 284.0, 292.0, 288.0, 285.0, 276.0, 294.0, 256.0, 274.0, 263.0, 262.0, 280.0, 290.0, 282.0, 291.0, 238.0, 240.0, 303.0, 270.0, 263.0, 270.0, 287.0, 286.0, 295.0, 275.0, 273.0, 252.0, 252.0, 270.0, 282.0, 294.0, 297.0, 282.0, 286.0, 287.0, 298.0, 283.0, 260.0, 265.0, 293.0, 280.0, 281.0, 289.0, 268.0, 302.0, 270.0, 257.0, 236.0, 248.0, 285.0, 291.0, 287.0, 295.0, 286.0, 287.0, 256.0, 274.0, 286.0, 281.0, 253.0, 266.0, 276.0, 297.0, 292.0, 287.0, 291.0, 291.0, 279.0, 291.0, 288.0, 282.0, 285.0, 285.0, 257.0, 270.0, 266.0, 256.0, 294.0, 288.0, 263.0, 247.0, 263.0, 267.0, 252.0, 270.0, 285.0, 297.0, 291.0, 285.0, 284.0, 289.0, 291.0, 279.0, 283.0, 293.0, 257.0, 259.0, 265.0, 265.0, 287.0, 292.0, 286.0, 287.0, 286.0, 284.0, 295.0, 281.0, 289.0, 298.0, 286.0, 275.0, 259.0, 228.0, 276.0, 297.0, 199.0, 203.0, 264.0, 252.0, 290.0, 283.0, 294.0, 285.0, 293.0, 283.0, 276.0, 251.0, 293.0, 286.0, 286.0, 290.0, 254.0, 262.0, 305.0, 274.0, 268.0, 262.0, 277.0, 296.0, 286.0, 281.0, 272.0, 258.0, 293.0, 289.0, 293.0, 277.0, 293.0, 283.0, 294.0, 279.0, 287.0, 289.0, 266.0, 261.0, 270.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7094305216316347, "mean_inference_ms": 1.2748349341795286, "mean_action_processing_ms": 0.13527921787792216, "mean_env_wait_ms": 0.8545440624324186, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 402.0, "episode_reward_mean": 555.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 199.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 277.77}, "hist_stats": {"episode_reward": [487.0, 576.0, 579.0, 584.0, 584.0, 573.0, 573.0, 530.0, 570.0, 582.0, 576.0, 530.0, 573.0, 530.0, 576.0, 573.0, 570.0, 573.0, 579.0, 573.0, 522.0, 525.0, 576.0, 573.0, 570.0, 530.0, 525.0, 570.0, 573.0, 478.0, 573.0, 533.0, 573.0, 570.0, 525.0, 522.0, 576.0, 579.0, 573.0, 581.0, 525.0, 573.0, 570.0, 570.0, 527.0, 484.0, 576.0, 582.0, 573.0, 530.0, 567.0, 519.0, 573.0, 579.0, 582.0, 570.0, 570.0, 570.0, 527.0, 522.0, 582.0, 510.0, 530.0, 522.0, 582.0, 576.0, 573.0, 570.0, 576.0, 516.0, 530.0, 579.0, 573.0, 570.0, 576.0, 587.0, 561.0, 487.0, 573.0, 402.0, 516.0, 573.0, 579.0, 576.0, 527.0, 579.0, 576.0, 516.0, 579.0, 530.0, 573.0, 567.0, 530.0, 582.0, 570.0, 576.0, 573.0, 576.0, 527.0, 527.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [244.0, 243.0, 286.0, 290.0, 290.0, 289.0, 300.0, 284.0, 289.0, 295.0, 294.0, 279.0, 287.0, 286.0, 272.0, 258.0, 274.0, 296.0, 285.0, 297.0, 290.0, 286.0, 274.0, 256.0, 287.0, 286.0, 259.0, 271.0, 279.0, 297.0, 295.0, 278.0, 283.0, 287.0, 285.0, 288.0, 287.0, 292.0, 294.0, 279.0, 264.0, 258.0, 275.0, 250.0, 284.0, 292.0, 288.0, 285.0, 276.0, 294.0, 256.0, 274.0, 263.0, 262.0, 280.0, 290.0, 282.0, 291.0, 238.0, 240.0, 303.0, 270.0, 263.0, 270.0, 287.0, 286.0, 295.0, 275.0, 273.0, 252.0, 252.0, 270.0, 282.0, 294.0, 297.0, 282.0, 286.0, 287.0, 298.0, 283.0, 260.0, 265.0, 293.0, 280.0, 281.0, 289.0, 268.0, 302.0, 270.0, 257.0, 236.0, 248.0, 285.0, 291.0, 287.0, 295.0, 286.0, 287.0, 256.0, 274.0, 286.0, 281.0, 253.0, 266.0, 276.0, 297.0, 292.0, 287.0, 291.0, 291.0, 279.0, 291.0, 288.0, 282.0, 285.0, 285.0, 257.0, 270.0, 266.0, 256.0, 294.0, 288.0, 263.0, 247.0, 263.0, 267.0, 252.0, 270.0, 285.0, 297.0, 291.0, 285.0, 284.0, 289.0, 291.0, 279.0, 283.0, 293.0, 257.0, 259.0, 265.0, 265.0, 287.0, 292.0, 286.0, 287.0, 286.0, 284.0, 295.0, 281.0, 289.0, 298.0, 286.0, 275.0, 259.0, 228.0, 276.0, 297.0, 199.0, 203.0, 264.0, 252.0, 290.0, 283.0, 294.0, 285.0, 293.0, 283.0, 276.0, 251.0, 293.0, 286.0, 286.0, 290.0, 254.0, 262.0, 305.0, 274.0, 268.0, 262.0, 277.0, 296.0, 286.0, 281.0, 272.0, 258.0, 293.0, 289.0, 293.0, 277.0, 293.0, 283.0, 294.0, 279.0, 287.0, 289.0, 266.0, 261.0, 270.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7094305216316347, "mean_inference_ms": 1.2748349341795286, "mean_action_processing_ms": 0.13527921787792216, "mean_env_wait_ms": 0.8545440624324186, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5939200, "num_agent_steps_trained": 5939200, "num_env_steps_sampled": 2969600, "num_env_steps_trained": 2969600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2969600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5939200, "timers": {"training_iteration_time_ms": 3630.374, "learn_time_ms": 1102.93, "learn_throughput": 11605.452, "synch_weights_time_ms": 11.753}, "counters": {"num_env_steps_sampled": 2969600, "num_env_steps_trained": 2969600, "num_agent_steps_sampled": 5939200, "num_agent_steps_trained": 5939200}, "done": false, "episodes_total": 7424, "training_iteration": 232, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-34", "timestamp": 1666581334, "time_this_iter_s": 3.6799514293670654, "time_total_s": 901.8493454456329, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 901.8493454456329, "timesteps_since_restore": 0, "iterations_since_restore": 232, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.416666666666668, "ram_util_percent": 10.633333333333333}}
+{"custom_metrics": {"sparse_reward_mean": 191.8, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 170.3, "shaped_reward_min": 122, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.1, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.43, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.96, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.06, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.67, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.97, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.79, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.26, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.26, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.67, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.97, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.67, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.97, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -1.860898919403553e-05, "policy_loss": -0.0003416483523324132, "vf_loss": 7.904554843902588, "vf_explained_var": 0.5716251134872437, "kl": 0.0022404068149626255, "entropy": 0.9348317384719849, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2982400, "num_env_steps_trained": 2982400, "num_agent_steps_sampled": 5964800, "num_agent_steps_trained": 5964800}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 402.0, "episode_reward_mean": 553.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 199.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 276.95}, "custom_metrics": {"sparse_reward_mean": 191.8, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 170.3, "shaped_reward_min": 122, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.1, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.43, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.96, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.06, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.67, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.97, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.79, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.26, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.26, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.67, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.97, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.67, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.97, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 570.0, 525.0, 522.0, 576.0, 579.0, 573.0, 581.0, 525.0, 573.0, 570.0, 570.0, 527.0, 484.0, 576.0, 582.0, 573.0, 530.0, 567.0, 519.0, 573.0, 579.0, 582.0, 570.0, 570.0, 570.0, 527.0, 522.0, 582.0, 510.0, 530.0, 522.0, 582.0, 576.0, 573.0, 570.0, 576.0, 516.0, 530.0, 579.0, 573.0, 570.0, 576.0, 587.0, 561.0, 487.0, 573.0, 402.0, 516.0, 573.0, 579.0, 576.0, 527.0, 579.0, 576.0, 516.0, 579.0, 530.0, 573.0, 567.0, 530.0, 582.0, 570.0, 576.0, 573.0, 576.0, 527.0, 527.0, 579.0, 573.0, 584.0, 525.0, 527.0, 579.0, 573.0, 570.0, 525.0, 576.0, 573.0, 504.0, 519.0, 527.0, 582.0, 530.0, 581.0, 516.0, 576.0, 570.0, 459.0, 522.0, 510.0, 576.0, 582.0, 530.0, 530.0, 576.0, 570.0, 579.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 286.0, 295.0, 275.0, 273.0, 252.0, 252.0, 270.0, 282.0, 294.0, 297.0, 282.0, 286.0, 287.0, 298.0, 283.0, 260.0, 265.0, 293.0, 280.0, 281.0, 289.0, 268.0, 302.0, 270.0, 257.0, 236.0, 248.0, 285.0, 291.0, 287.0, 295.0, 286.0, 287.0, 256.0, 274.0, 286.0, 281.0, 253.0, 266.0, 276.0, 297.0, 292.0, 287.0, 291.0, 291.0, 279.0, 291.0, 288.0, 282.0, 285.0, 285.0, 257.0, 270.0, 266.0, 256.0, 294.0, 288.0, 263.0, 247.0, 263.0, 267.0, 252.0, 270.0, 285.0, 297.0, 291.0, 285.0, 284.0, 289.0, 291.0, 279.0, 283.0, 293.0, 257.0, 259.0, 265.0, 265.0, 287.0, 292.0, 286.0, 287.0, 286.0, 284.0, 295.0, 281.0, 289.0, 298.0, 286.0, 275.0, 259.0, 228.0, 276.0, 297.0, 199.0, 203.0, 264.0, 252.0, 290.0, 283.0, 294.0, 285.0, 293.0, 283.0, 276.0, 251.0, 293.0, 286.0, 286.0, 290.0, 254.0, 262.0, 305.0, 274.0, 268.0, 262.0, 277.0, 296.0, 286.0, 281.0, 272.0, 258.0, 293.0, 289.0, 293.0, 277.0, 293.0, 283.0, 294.0, 279.0, 287.0, 289.0, 266.0, 261.0, 270.0, 257.0, 293.0, 286.0, 287.0, 286.0, 294.0, 290.0, 268.0, 257.0, 255.0, 272.0, 295.0, 284.0, 279.0, 294.0, 292.0, 278.0, 269.0, 256.0, 293.0, 283.0, 271.0, 302.0, 252.0, 252.0, 256.0, 263.0, 262.0, 265.0, 285.0, 297.0, 264.0, 266.0, 287.0, 294.0, 272.0, 244.0, 294.0, 282.0, 291.0, 279.0, 229.0, 230.0, 259.0, 263.0, 255.0, 255.0, 292.0, 284.0, 290.0, 292.0, 267.0, 263.0, 249.0, 281.0, 301.0, 275.0, 294.0, 276.0, 286.0, 293.0, 303.0, 273.0, 290.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7093009440633637, "mean_inference_ms": 1.2745047914178103, "mean_action_processing_ms": 0.135271318290213, "mean_env_wait_ms": 0.8543809304373801, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 402.0, "episode_reward_mean": 553.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 199.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 276.95}, "hist_stats": {"episode_reward": [573.0, 570.0, 525.0, 522.0, 576.0, 579.0, 573.0, 581.0, 525.0, 573.0, 570.0, 570.0, 527.0, 484.0, 576.0, 582.0, 573.0, 530.0, 567.0, 519.0, 573.0, 579.0, 582.0, 570.0, 570.0, 570.0, 527.0, 522.0, 582.0, 510.0, 530.0, 522.0, 582.0, 576.0, 573.0, 570.0, 576.0, 516.0, 530.0, 579.0, 573.0, 570.0, 576.0, 587.0, 561.0, 487.0, 573.0, 402.0, 516.0, 573.0, 579.0, 576.0, 527.0, 579.0, 576.0, 516.0, 579.0, 530.0, 573.0, 567.0, 530.0, 582.0, 570.0, 576.0, 573.0, 576.0, 527.0, 527.0, 579.0, 573.0, 584.0, 525.0, 527.0, 579.0, 573.0, 570.0, 525.0, 576.0, 573.0, 504.0, 519.0, 527.0, 582.0, 530.0, 581.0, 516.0, 576.0, 570.0, 459.0, 522.0, 510.0, 576.0, 582.0, 530.0, 530.0, 576.0, 570.0, 579.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 286.0, 295.0, 275.0, 273.0, 252.0, 252.0, 270.0, 282.0, 294.0, 297.0, 282.0, 286.0, 287.0, 298.0, 283.0, 260.0, 265.0, 293.0, 280.0, 281.0, 289.0, 268.0, 302.0, 270.0, 257.0, 236.0, 248.0, 285.0, 291.0, 287.0, 295.0, 286.0, 287.0, 256.0, 274.0, 286.0, 281.0, 253.0, 266.0, 276.0, 297.0, 292.0, 287.0, 291.0, 291.0, 279.0, 291.0, 288.0, 282.0, 285.0, 285.0, 257.0, 270.0, 266.0, 256.0, 294.0, 288.0, 263.0, 247.0, 263.0, 267.0, 252.0, 270.0, 285.0, 297.0, 291.0, 285.0, 284.0, 289.0, 291.0, 279.0, 283.0, 293.0, 257.0, 259.0, 265.0, 265.0, 287.0, 292.0, 286.0, 287.0, 286.0, 284.0, 295.0, 281.0, 289.0, 298.0, 286.0, 275.0, 259.0, 228.0, 276.0, 297.0, 199.0, 203.0, 264.0, 252.0, 290.0, 283.0, 294.0, 285.0, 293.0, 283.0, 276.0, 251.0, 293.0, 286.0, 286.0, 290.0, 254.0, 262.0, 305.0, 274.0, 268.0, 262.0, 277.0, 296.0, 286.0, 281.0, 272.0, 258.0, 293.0, 289.0, 293.0, 277.0, 293.0, 283.0, 294.0, 279.0, 287.0, 289.0, 266.0, 261.0, 270.0, 257.0, 293.0, 286.0, 287.0, 286.0, 294.0, 290.0, 268.0, 257.0, 255.0, 272.0, 295.0, 284.0, 279.0, 294.0, 292.0, 278.0, 269.0, 256.0, 293.0, 283.0, 271.0, 302.0, 252.0, 252.0, 256.0, 263.0, 262.0, 265.0, 285.0, 297.0, 264.0, 266.0, 287.0, 294.0, 272.0, 244.0, 294.0, 282.0, 291.0, 279.0, 229.0, 230.0, 259.0, 263.0, 255.0, 255.0, 292.0, 284.0, 290.0, 292.0, 267.0, 263.0, 249.0, 281.0, 301.0, 275.0, 294.0, 276.0, 286.0, 293.0, 303.0, 273.0, 290.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7093009440633637, "mean_inference_ms": 1.2745047914178103, "mean_action_processing_ms": 0.135271318290213, "mean_env_wait_ms": 0.8543809304373801, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5964800, "num_agent_steps_trained": 5964800, "num_env_steps_sampled": 2982400, "num_env_steps_trained": 2982400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2982400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5964800, "timers": {"training_iteration_time_ms": 3627.366, "learn_time_ms": 1109.897, "learn_throughput": 11532.6, "synch_weights_time_ms": 11.141}, "counters": {"num_env_steps_sampled": 2982400, "num_env_steps_trained": 2982400, "num_agent_steps_sampled": 5964800, "num_agent_steps_trained": 5964800}, "done": false, "episodes_total": 7456, "training_iteration": 233, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-38", "timestamp": 1666581338, "time_this_iter_s": 3.7769134044647217, "time_total_s": 905.6262588500977, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 905.6262588500977, "timesteps_since_restore": 0, "iterations_since_restore": 233, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.16, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 191.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 170.29, "shaped_reward_min": 122, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.0, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.49, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 13.88, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.11, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.64, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.93, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.28, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.46, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.8, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.77, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.08, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 3.94, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.64, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.93, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.64, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.93, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011096132220700383, "policy_loss": -0.0014260424068197608, "vf_loss": 7.822549343109131, "vf_explained_var": 0.5857524871826172, "kl": 0.0024624252691864967, "entropy": 0.9316513538360596, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2995200, "num_env_steps_trained": 2995200, "num_agent_steps_sampled": 5990400, "num_agent_steps_trained": 5990400}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 402.0, "episode_reward_mean": 552.69, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 199.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 276.345}, "custom_metrics": {"sparse_reward_mean": 191.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 170.29, "shaped_reward_min": 122, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.0, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.49, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 13.88, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.11, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.64, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.93, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.28, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.46, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.8, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.77, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.08, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 3.94, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.64, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.93, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.64, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.93, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 576.0, 573.0, 570.0, 576.0, 516.0, 530.0, 579.0, 573.0, 570.0, 576.0, 587.0, 561.0, 487.0, 573.0, 402.0, 516.0, 573.0, 579.0, 576.0, 527.0, 579.0, 576.0, 516.0, 579.0, 530.0, 573.0, 567.0, 530.0, 582.0, 570.0, 576.0, 573.0, 576.0, 527.0, 527.0, 579.0, 573.0, 584.0, 525.0, 527.0, 579.0, 573.0, 570.0, 525.0, 576.0, 573.0, 504.0, 519.0, 527.0, 582.0, 530.0, 581.0, 516.0, 576.0, 570.0, 459.0, 522.0, 510.0, 576.0, 582.0, 530.0, 530.0, 576.0, 570.0, 579.0, 576.0, 576.0, 578.0, 530.0, 530.0, 576.0, 573.0, 525.0, 516.0, 530.0, 524.0, 573.0, 582.0, 570.0, 584.0, 527.0, 570.0, 576.0, 519.0, 519.0, 525.0, 527.0, 527.0, 570.0, 525.0, 570.0, 522.0, 522.0, 570.0, 576.0, 582.0, 587.0, 527.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 297.0, 291.0, 285.0, 284.0, 289.0, 291.0, 279.0, 283.0, 293.0, 257.0, 259.0, 265.0, 265.0, 287.0, 292.0, 286.0, 287.0, 286.0, 284.0, 295.0, 281.0, 289.0, 298.0, 286.0, 275.0, 259.0, 228.0, 276.0, 297.0, 199.0, 203.0, 264.0, 252.0, 290.0, 283.0, 294.0, 285.0, 293.0, 283.0, 276.0, 251.0, 293.0, 286.0, 286.0, 290.0, 254.0, 262.0, 305.0, 274.0, 268.0, 262.0, 277.0, 296.0, 286.0, 281.0, 272.0, 258.0, 293.0, 289.0, 293.0, 277.0, 293.0, 283.0, 294.0, 279.0, 287.0, 289.0, 266.0, 261.0, 270.0, 257.0, 293.0, 286.0, 287.0, 286.0, 294.0, 290.0, 268.0, 257.0, 255.0, 272.0, 295.0, 284.0, 279.0, 294.0, 292.0, 278.0, 269.0, 256.0, 293.0, 283.0, 271.0, 302.0, 252.0, 252.0, 256.0, 263.0, 262.0, 265.0, 285.0, 297.0, 264.0, 266.0, 287.0, 294.0, 272.0, 244.0, 294.0, 282.0, 291.0, 279.0, 229.0, 230.0, 259.0, 263.0, 255.0, 255.0, 292.0, 284.0, 290.0, 292.0, 267.0, 263.0, 249.0, 281.0, 301.0, 275.0, 294.0, 276.0, 286.0, 293.0, 303.0, 273.0, 290.0, 286.0, 299.0, 279.0, 264.0, 266.0, 270.0, 260.0, 277.0, 299.0, 288.0, 285.0, 262.0, 263.0, 241.0, 275.0, 270.0, 260.0, 266.0, 258.0, 292.0, 281.0, 301.0, 281.0, 289.0, 281.0, 288.0, 296.0, 269.0, 258.0, 289.0, 281.0, 279.0, 297.0, 263.0, 256.0, 270.0, 249.0, 270.0, 255.0, 275.0, 252.0, 271.0, 256.0, 290.0, 280.0, 269.0, 256.0, 292.0, 278.0, 268.0, 254.0, 275.0, 247.0, 284.0, 286.0, 278.0, 298.0, 290.0, 292.0, 291.0, 296.0, 264.0, 263.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7091450891616614, "mean_inference_ms": 1.274153082248647, "mean_action_processing_ms": 0.13526185222798692, "mean_env_wait_ms": 0.8541923439015997, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 402.0, "episode_reward_mean": 552.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 199.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 276.345}, "hist_stats": {"episode_reward": [582.0, 576.0, 573.0, 570.0, 576.0, 516.0, 530.0, 579.0, 573.0, 570.0, 576.0, 587.0, 561.0, 487.0, 573.0, 402.0, 516.0, 573.0, 579.0, 576.0, 527.0, 579.0, 576.0, 516.0, 579.0, 530.0, 573.0, 567.0, 530.0, 582.0, 570.0, 576.0, 573.0, 576.0, 527.0, 527.0, 579.0, 573.0, 584.0, 525.0, 527.0, 579.0, 573.0, 570.0, 525.0, 576.0, 573.0, 504.0, 519.0, 527.0, 582.0, 530.0, 581.0, 516.0, 576.0, 570.0, 459.0, 522.0, 510.0, 576.0, 582.0, 530.0, 530.0, 576.0, 570.0, 579.0, 576.0, 576.0, 578.0, 530.0, 530.0, 576.0, 573.0, 525.0, 516.0, 530.0, 524.0, 573.0, 582.0, 570.0, 584.0, 527.0, 570.0, 576.0, 519.0, 519.0, 525.0, 527.0, 527.0, 570.0, 525.0, 570.0, 522.0, 522.0, 570.0, 576.0, 582.0, 587.0, 527.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 297.0, 291.0, 285.0, 284.0, 289.0, 291.0, 279.0, 283.0, 293.0, 257.0, 259.0, 265.0, 265.0, 287.0, 292.0, 286.0, 287.0, 286.0, 284.0, 295.0, 281.0, 289.0, 298.0, 286.0, 275.0, 259.0, 228.0, 276.0, 297.0, 199.0, 203.0, 264.0, 252.0, 290.0, 283.0, 294.0, 285.0, 293.0, 283.0, 276.0, 251.0, 293.0, 286.0, 286.0, 290.0, 254.0, 262.0, 305.0, 274.0, 268.0, 262.0, 277.0, 296.0, 286.0, 281.0, 272.0, 258.0, 293.0, 289.0, 293.0, 277.0, 293.0, 283.0, 294.0, 279.0, 287.0, 289.0, 266.0, 261.0, 270.0, 257.0, 293.0, 286.0, 287.0, 286.0, 294.0, 290.0, 268.0, 257.0, 255.0, 272.0, 295.0, 284.0, 279.0, 294.0, 292.0, 278.0, 269.0, 256.0, 293.0, 283.0, 271.0, 302.0, 252.0, 252.0, 256.0, 263.0, 262.0, 265.0, 285.0, 297.0, 264.0, 266.0, 287.0, 294.0, 272.0, 244.0, 294.0, 282.0, 291.0, 279.0, 229.0, 230.0, 259.0, 263.0, 255.0, 255.0, 292.0, 284.0, 290.0, 292.0, 267.0, 263.0, 249.0, 281.0, 301.0, 275.0, 294.0, 276.0, 286.0, 293.0, 303.0, 273.0, 290.0, 286.0, 299.0, 279.0, 264.0, 266.0, 270.0, 260.0, 277.0, 299.0, 288.0, 285.0, 262.0, 263.0, 241.0, 275.0, 270.0, 260.0, 266.0, 258.0, 292.0, 281.0, 301.0, 281.0, 289.0, 281.0, 288.0, 296.0, 269.0, 258.0, 289.0, 281.0, 279.0, 297.0, 263.0, 256.0, 270.0, 249.0, 270.0, 255.0, 275.0, 252.0, 271.0, 256.0, 290.0, 280.0, 269.0, 256.0, 292.0, 278.0, 268.0, 254.0, 275.0, 247.0, 284.0, 286.0, 278.0, 298.0, 290.0, 292.0, 291.0, 296.0, 264.0, 263.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7091450891616614, "mean_inference_ms": 1.274153082248647, "mean_action_processing_ms": 0.13526185222798692, "mean_env_wait_ms": 0.8541923439015997, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5990400, "num_agent_steps_trained": 5990400, "num_env_steps_sampled": 2995200, "num_env_steps_trained": 2995200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2995200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5990400, "timers": {"training_iteration_time_ms": 3619.728, "learn_time_ms": 1102.386, "learn_throughput": 11611.176, "synch_weights_time_ms": 10.479}, "counters": {"num_env_steps_sampled": 2995200, "num_env_steps_trained": 2995200, "num_agent_steps_sampled": 5990400, "num_agent_steps_trained": 5990400}, "done": false, "episodes_total": 7488, "training_iteration": 234, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-42", "timestamp": 1666581342, "time_this_iter_s": 3.723451614379883, "time_total_s": 909.3497104644775, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 909.3497104644775, "timesteps_since_restore": 0, "iterations_since_restore": 234, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.5, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 191.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 170.08, "shaped_reward_min": 128, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.12, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.27, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 13.98, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.0, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.74, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.79, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.63, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 0.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.21, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.08, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.74, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.79, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.74, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.79, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -1.2165401130914688e-06, "policy_loss": -0.00031783897429704666, "vf_loss": 7.84594202041626, "vf_explained_var": 0.5760541558265686, "kl": 0.0020695971325039864, "entropy": 0.9359432458877563, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3008000, "num_env_steps_trained": 3008000, "num_agent_steps_sampled": 6016000, "num_agent_steps_trained": 6016000}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 408.0, "episode_reward_mean": 552.48, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 195.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 276.24}, "custom_metrics": {"sparse_reward_mean": 191.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 170.08, "shaped_reward_min": 128, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.12, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.27, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 13.98, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.0, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.74, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.79, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.63, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 0.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.21, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.08, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.74, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.79, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.74, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.79, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 576.0, 527.0, 527.0, 579.0, 573.0, 584.0, 525.0, 527.0, 579.0, 573.0, 570.0, 525.0, 576.0, 573.0, 504.0, 519.0, 527.0, 582.0, 530.0, 581.0, 516.0, 576.0, 570.0, 459.0, 522.0, 510.0, 576.0, 582.0, 530.0, 530.0, 576.0, 570.0, 579.0, 576.0, 576.0, 578.0, 530.0, 530.0, 576.0, 573.0, 525.0, 516.0, 530.0, 524.0, 573.0, 582.0, 570.0, 584.0, 527.0, 570.0, 576.0, 519.0, 519.0, 525.0, 527.0, 527.0, 570.0, 525.0, 570.0, 522.0, 522.0, 570.0, 576.0, 582.0, 587.0, 527.0, 579.0, 570.0, 570.0, 525.0, 465.0, 570.0, 576.0, 525.0, 573.0, 570.0, 525.0, 516.0, 587.0, 579.0, 576.0, 530.0, 579.0, 573.0, 570.0, 573.0, 573.0, 570.0, 567.0, 573.0, 576.0, 579.0, 573.0, 579.0, 573.0, 408.0, 527.0, 533.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 279.0, 287.0, 289.0, 266.0, 261.0, 270.0, 257.0, 293.0, 286.0, 287.0, 286.0, 294.0, 290.0, 268.0, 257.0, 255.0, 272.0, 295.0, 284.0, 279.0, 294.0, 292.0, 278.0, 269.0, 256.0, 293.0, 283.0, 271.0, 302.0, 252.0, 252.0, 256.0, 263.0, 262.0, 265.0, 285.0, 297.0, 264.0, 266.0, 287.0, 294.0, 272.0, 244.0, 294.0, 282.0, 291.0, 279.0, 229.0, 230.0, 259.0, 263.0, 255.0, 255.0, 292.0, 284.0, 290.0, 292.0, 267.0, 263.0, 249.0, 281.0, 301.0, 275.0, 294.0, 276.0, 286.0, 293.0, 303.0, 273.0, 290.0, 286.0, 299.0, 279.0, 264.0, 266.0, 270.0, 260.0, 277.0, 299.0, 288.0, 285.0, 262.0, 263.0, 241.0, 275.0, 270.0, 260.0, 266.0, 258.0, 292.0, 281.0, 301.0, 281.0, 289.0, 281.0, 288.0, 296.0, 269.0, 258.0, 289.0, 281.0, 279.0, 297.0, 263.0, 256.0, 270.0, 249.0, 270.0, 255.0, 275.0, 252.0, 271.0, 256.0, 290.0, 280.0, 269.0, 256.0, 292.0, 278.0, 268.0, 254.0, 275.0, 247.0, 284.0, 286.0, 278.0, 298.0, 290.0, 292.0, 291.0, 296.0, 264.0, 263.0, 291.0, 288.0, 290.0, 280.0, 287.0, 283.0, 262.0, 263.0, 232.0, 233.0, 277.0, 293.0, 291.0, 285.0, 260.0, 265.0, 280.0, 293.0, 281.0, 289.0, 256.0, 269.0, 260.0, 256.0, 290.0, 297.0, 289.0, 290.0, 283.0, 293.0, 255.0, 275.0, 292.0, 287.0, 288.0, 285.0, 289.0, 281.0, 291.0, 282.0, 289.0, 284.0, 281.0, 289.0, 289.0, 278.0, 285.0, 288.0, 297.0, 279.0, 293.0, 286.0, 279.0, 294.0, 288.0, 291.0, 282.0, 291.0, 195.0, 213.0, 268.0, 259.0, 268.0, 265.0, 291.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7089912828807431, "mean_inference_ms": 1.2738039305353777, "mean_action_processing_ms": 0.135251658168302, "mean_env_wait_ms": 0.8539967288879649, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 408.0, "episode_reward_mean": 552.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 195.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 276.24}, "hist_stats": {"episode_reward": [573.0, 576.0, 527.0, 527.0, 579.0, 573.0, 584.0, 525.0, 527.0, 579.0, 573.0, 570.0, 525.0, 576.0, 573.0, 504.0, 519.0, 527.0, 582.0, 530.0, 581.0, 516.0, 576.0, 570.0, 459.0, 522.0, 510.0, 576.0, 582.0, 530.0, 530.0, 576.0, 570.0, 579.0, 576.0, 576.0, 578.0, 530.0, 530.0, 576.0, 573.0, 525.0, 516.0, 530.0, 524.0, 573.0, 582.0, 570.0, 584.0, 527.0, 570.0, 576.0, 519.0, 519.0, 525.0, 527.0, 527.0, 570.0, 525.0, 570.0, 522.0, 522.0, 570.0, 576.0, 582.0, 587.0, 527.0, 579.0, 570.0, 570.0, 525.0, 465.0, 570.0, 576.0, 525.0, 573.0, 570.0, 525.0, 516.0, 587.0, 579.0, 576.0, 530.0, 579.0, 573.0, 570.0, 573.0, 573.0, 570.0, 567.0, 573.0, 576.0, 579.0, 573.0, 579.0, 573.0, 408.0, 527.0, 533.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 279.0, 287.0, 289.0, 266.0, 261.0, 270.0, 257.0, 293.0, 286.0, 287.0, 286.0, 294.0, 290.0, 268.0, 257.0, 255.0, 272.0, 295.0, 284.0, 279.0, 294.0, 292.0, 278.0, 269.0, 256.0, 293.0, 283.0, 271.0, 302.0, 252.0, 252.0, 256.0, 263.0, 262.0, 265.0, 285.0, 297.0, 264.0, 266.0, 287.0, 294.0, 272.0, 244.0, 294.0, 282.0, 291.0, 279.0, 229.0, 230.0, 259.0, 263.0, 255.0, 255.0, 292.0, 284.0, 290.0, 292.0, 267.0, 263.0, 249.0, 281.0, 301.0, 275.0, 294.0, 276.0, 286.0, 293.0, 303.0, 273.0, 290.0, 286.0, 299.0, 279.0, 264.0, 266.0, 270.0, 260.0, 277.0, 299.0, 288.0, 285.0, 262.0, 263.0, 241.0, 275.0, 270.0, 260.0, 266.0, 258.0, 292.0, 281.0, 301.0, 281.0, 289.0, 281.0, 288.0, 296.0, 269.0, 258.0, 289.0, 281.0, 279.0, 297.0, 263.0, 256.0, 270.0, 249.0, 270.0, 255.0, 275.0, 252.0, 271.0, 256.0, 290.0, 280.0, 269.0, 256.0, 292.0, 278.0, 268.0, 254.0, 275.0, 247.0, 284.0, 286.0, 278.0, 298.0, 290.0, 292.0, 291.0, 296.0, 264.0, 263.0, 291.0, 288.0, 290.0, 280.0, 287.0, 283.0, 262.0, 263.0, 232.0, 233.0, 277.0, 293.0, 291.0, 285.0, 260.0, 265.0, 280.0, 293.0, 281.0, 289.0, 256.0, 269.0, 260.0, 256.0, 290.0, 297.0, 289.0, 290.0, 283.0, 293.0, 255.0, 275.0, 292.0, 287.0, 288.0, 285.0, 289.0, 281.0, 291.0, 282.0, 289.0, 284.0, 281.0, 289.0, 289.0, 278.0, 285.0, 288.0, 297.0, 279.0, 293.0, 286.0, 279.0, 294.0, 288.0, 291.0, 282.0, 291.0, 195.0, 213.0, 268.0, 259.0, 268.0, 265.0, 291.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7089912828807431, "mean_inference_ms": 1.2738039305353777, "mean_action_processing_ms": 0.135251658168302, "mean_env_wait_ms": 0.8539967288879649, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6016000, "num_agent_steps_trained": 6016000, "num_env_steps_sampled": 3008000, "num_env_steps_trained": 3008000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3008000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6016000, "timers": {"training_iteration_time_ms": 3623.981, "learn_time_ms": 1107.366, "learn_throughput": 11558.965, "synch_weights_time_ms": 9.935}, "counters": {"num_env_steps_sampled": 3008000, "num_env_steps_trained": 3008000, "num_agent_steps_sampled": 6016000, "num_agent_steps_trained": 6016000}, "done": false, "episodes_total": 7520, "training_iteration": 235, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-46", "timestamp": 1666581346, "time_this_iter_s": 3.7096731662750244, "time_total_s": 913.0593836307526, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 913.0593836307526, "timesteps_since_restore": 0, "iterations_since_restore": 235, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.883333333333333, "ram_util_percent": 10.616666666666665}}
+{"custom_metrics": {"sparse_reward_mean": 192.6, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 170.5, "shaped_reward_min": 128, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.5, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.0, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.32, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.74, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.14, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.49, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.33, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.33, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.26, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.14, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.49, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.14, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.49, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002625423716381192, "policy_loss": -0.0029444461688399315, "vf_loss": 7.811328887939453, "vf_explained_var": 0.6009083986282349, "kl": 0.0021797027438879013, "entropy": 0.9242209196090698, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3020800, "num_env_steps_trained": 3020800, "num_agent_steps_sampled": 6041600, "num_agent_steps_trained": 6041600}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 408.0, "episode_reward_mean": 555.7, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 195.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 277.85}, "custom_metrics": {"sparse_reward_mean": 192.6, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 170.5, "shaped_reward_min": 128, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.5, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.0, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.32, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.74, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.14, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.49, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.33, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.33, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.26, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.14, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.49, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.14, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.49, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 579.0, 576.0, 576.0, 578.0, 530.0, 530.0, 576.0, 573.0, 525.0, 516.0, 530.0, 524.0, 573.0, 582.0, 570.0, 584.0, 527.0, 570.0, 576.0, 519.0, 519.0, 525.0, 527.0, 527.0, 570.0, 525.0, 570.0, 522.0, 522.0, 570.0, 576.0, 582.0, 587.0, 527.0, 579.0, 570.0, 570.0, 525.0, 465.0, 570.0, 576.0, 525.0, 573.0, 570.0, 525.0, 516.0, 587.0, 579.0, 576.0, 530.0, 579.0, 573.0, 570.0, 573.0, 573.0, 570.0, 567.0, 573.0, 576.0, 579.0, 573.0, 579.0, 573.0, 408.0, 527.0, 533.0, 576.0, 521.0, 513.0, 579.0, 570.0, 576.0, 579.0, 567.0, 516.0, 573.0, 519.0, 576.0, 576.0, 570.0, 522.0, 579.0, 570.0, 573.0, 570.0, 579.0, 530.0, 573.0, 579.0, 579.0, 582.0, 573.0, 573.0, 576.0, 465.0, 573.0, 576.0, 522.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 276.0, 286.0, 293.0, 303.0, 273.0, 290.0, 286.0, 299.0, 279.0, 264.0, 266.0, 270.0, 260.0, 277.0, 299.0, 288.0, 285.0, 262.0, 263.0, 241.0, 275.0, 270.0, 260.0, 266.0, 258.0, 292.0, 281.0, 301.0, 281.0, 289.0, 281.0, 288.0, 296.0, 269.0, 258.0, 289.0, 281.0, 279.0, 297.0, 263.0, 256.0, 270.0, 249.0, 270.0, 255.0, 275.0, 252.0, 271.0, 256.0, 290.0, 280.0, 269.0, 256.0, 292.0, 278.0, 268.0, 254.0, 275.0, 247.0, 284.0, 286.0, 278.0, 298.0, 290.0, 292.0, 291.0, 296.0, 264.0, 263.0, 291.0, 288.0, 290.0, 280.0, 287.0, 283.0, 262.0, 263.0, 232.0, 233.0, 277.0, 293.0, 291.0, 285.0, 260.0, 265.0, 280.0, 293.0, 281.0, 289.0, 256.0, 269.0, 260.0, 256.0, 290.0, 297.0, 289.0, 290.0, 283.0, 293.0, 255.0, 275.0, 292.0, 287.0, 288.0, 285.0, 289.0, 281.0, 291.0, 282.0, 289.0, 284.0, 281.0, 289.0, 289.0, 278.0, 285.0, 288.0, 297.0, 279.0, 293.0, 286.0, 279.0, 294.0, 288.0, 291.0, 282.0, 291.0, 195.0, 213.0, 268.0, 259.0, 268.0, 265.0, 291.0, 285.0, 251.0, 270.0, 262.0, 251.0, 294.0, 285.0, 288.0, 282.0, 281.0, 295.0, 295.0, 284.0, 296.0, 271.0, 268.0, 248.0, 278.0, 295.0, 254.0, 265.0, 293.0, 283.0, 292.0, 284.0, 290.0, 280.0, 262.0, 260.0, 289.0, 290.0, 284.0, 286.0, 289.0, 284.0, 276.0, 294.0, 286.0, 293.0, 275.0, 255.0, 292.0, 281.0, 277.0, 302.0, 290.0, 289.0, 285.0, 297.0, 285.0, 288.0, 293.0, 280.0, 286.0, 290.0, 232.0, 233.0, 295.0, 278.0, 296.0, 280.0, 261.0, 261.0, 288.0, 282.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7088371959002824, "mean_inference_ms": 1.2734729518259271, "mean_action_processing_ms": 0.13524111288542504, "mean_env_wait_ms": 0.8538048193968771, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 408.0, "episode_reward_mean": 555.7, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 195.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 277.85}, "hist_stats": {"episode_reward": [570.0, 579.0, 576.0, 576.0, 578.0, 530.0, 530.0, 576.0, 573.0, 525.0, 516.0, 530.0, 524.0, 573.0, 582.0, 570.0, 584.0, 527.0, 570.0, 576.0, 519.0, 519.0, 525.0, 527.0, 527.0, 570.0, 525.0, 570.0, 522.0, 522.0, 570.0, 576.0, 582.0, 587.0, 527.0, 579.0, 570.0, 570.0, 525.0, 465.0, 570.0, 576.0, 525.0, 573.0, 570.0, 525.0, 516.0, 587.0, 579.0, 576.0, 530.0, 579.0, 573.0, 570.0, 573.0, 573.0, 570.0, 567.0, 573.0, 576.0, 579.0, 573.0, 579.0, 573.0, 408.0, 527.0, 533.0, 576.0, 521.0, 513.0, 579.0, 570.0, 576.0, 579.0, 567.0, 516.0, 573.0, 519.0, 576.0, 576.0, 570.0, 522.0, 579.0, 570.0, 573.0, 570.0, 579.0, 530.0, 573.0, 579.0, 579.0, 582.0, 573.0, 573.0, 576.0, 465.0, 573.0, 576.0, 522.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 276.0, 286.0, 293.0, 303.0, 273.0, 290.0, 286.0, 299.0, 279.0, 264.0, 266.0, 270.0, 260.0, 277.0, 299.0, 288.0, 285.0, 262.0, 263.0, 241.0, 275.0, 270.0, 260.0, 266.0, 258.0, 292.0, 281.0, 301.0, 281.0, 289.0, 281.0, 288.0, 296.0, 269.0, 258.0, 289.0, 281.0, 279.0, 297.0, 263.0, 256.0, 270.0, 249.0, 270.0, 255.0, 275.0, 252.0, 271.0, 256.0, 290.0, 280.0, 269.0, 256.0, 292.0, 278.0, 268.0, 254.0, 275.0, 247.0, 284.0, 286.0, 278.0, 298.0, 290.0, 292.0, 291.0, 296.0, 264.0, 263.0, 291.0, 288.0, 290.0, 280.0, 287.0, 283.0, 262.0, 263.0, 232.0, 233.0, 277.0, 293.0, 291.0, 285.0, 260.0, 265.0, 280.0, 293.0, 281.0, 289.0, 256.0, 269.0, 260.0, 256.0, 290.0, 297.0, 289.0, 290.0, 283.0, 293.0, 255.0, 275.0, 292.0, 287.0, 288.0, 285.0, 289.0, 281.0, 291.0, 282.0, 289.0, 284.0, 281.0, 289.0, 289.0, 278.0, 285.0, 288.0, 297.0, 279.0, 293.0, 286.0, 279.0, 294.0, 288.0, 291.0, 282.0, 291.0, 195.0, 213.0, 268.0, 259.0, 268.0, 265.0, 291.0, 285.0, 251.0, 270.0, 262.0, 251.0, 294.0, 285.0, 288.0, 282.0, 281.0, 295.0, 295.0, 284.0, 296.0, 271.0, 268.0, 248.0, 278.0, 295.0, 254.0, 265.0, 293.0, 283.0, 292.0, 284.0, 290.0, 280.0, 262.0, 260.0, 289.0, 290.0, 284.0, 286.0, 289.0, 284.0, 276.0, 294.0, 286.0, 293.0, 275.0, 255.0, 292.0, 281.0, 277.0, 302.0, 290.0, 289.0, 285.0, 297.0, 285.0, 288.0, 293.0, 280.0, 286.0, 290.0, 232.0, 233.0, 295.0, 278.0, 296.0, 280.0, 261.0, 261.0, 288.0, 282.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7088371959002824, "mean_inference_ms": 1.2734729518259271, "mean_action_processing_ms": 0.13524111288542504, "mean_env_wait_ms": 0.8538048193968771, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6041600, "num_agent_steps_trained": 6041600, "num_env_steps_sampled": 3020800, "num_env_steps_trained": 3020800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3020800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6041600, "timers": {"training_iteration_time_ms": 3643.608, "learn_time_ms": 1120.612, "learn_throughput": 11422.327, "synch_weights_time_ms": 10.109}, "counters": {"num_env_steps_sampled": 3020800, "num_env_steps_trained": 3020800, "num_agent_steps_sampled": 6041600, "num_agent_steps_trained": 6041600}, "done": false, "episodes_total": 7552, "training_iteration": 236, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-50", "timestamp": 1666581350, "time_this_iter_s": 3.8835103511810303, "time_total_s": 916.9428939819336, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 916.9428939819336, "timesteps_since_restore": 0, "iterations_since_restore": 236, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.08, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 171.24, "shaped_reward_min": 128, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.14, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.6, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.91, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.38, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.66, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.23, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.73, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.21, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.66, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.23, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.66, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.23, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 1.031567808240652e-05, "policy_loss": -0.0003100165631622076, "vf_loss": 7.778059482574463, "vf_explained_var": 0.6068955659866333, "kl": 0.0022331150248646736, "entropy": 0.9149467945098877, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3033600, "num_env_steps_trained": 3033600, "num_agent_steps_sampled": 6067200, "num_agent_steps_trained": 6067200}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 408.0, "episode_reward_mean": 559.64, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 195.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 279.82}, "custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 171.24, "shaped_reward_min": 128, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.14, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.6, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.91, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.38, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.66, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.23, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.73, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.21, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.66, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.23, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.66, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.23, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 527.0, 579.0, 570.0, 570.0, 525.0, 465.0, 570.0, 576.0, 525.0, 573.0, 570.0, 525.0, 516.0, 587.0, 579.0, 576.0, 530.0, 579.0, 573.0, 570.0, 573.0, 573.0, 570.0, 567.0, 573.0, 576.0, 579.0, 573.0, 579.0, 573.0, 408.0, 527.0, 533.0, 576.0, 521.0, 513.0, 579.0, 570.0, 576.0, 579.0, 567.0, 516.0, 573.0, 519.0, 576.0, 576.0, 570.0, 522.0, 579.0, 570.0, 573.0, 570.0, 579.0, 530.0, 573.0, 579.0, 579.0, 582.0, 573.0, 573.0, 576.0, 465.0, 573.0, 576.0, 522.0, 570.0, 579.0, 579.0, 522.0, 579.0, 570.0, 573.0, 525.0, 576.0, 519.0, 582.0, 530.0, 570.0, 573.0, 576.0, 525.0, 573.0, 573.0, 576.0, 570.0, 579.0, 573.0, 573.0, 455.0, 582.0, 579.0, 573.0, 576.0, 576.0, 630.0, 573.0, 576.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 291.0, 296.0, 264.0, 263.0, 291.0, 288.0, 290.0, 280.0, 287.0, 283.0, 262.0, 263.0, 232.0, 233.0, 277.0, 293.0, 291.0, 285.0, 260.0, 265.0, 280.0, 293.0, 281.0, 289.0, 256.0, 269.0, 260.0, 256.0, 290.0, 297.0, 289.0, 290.0, 283.0, 293.0, 255.0, 275.0, 292.0, 287.0, 288.0, 285.0, 289.0, 281.0, 291.0, 282.0, 289.0, 284.0, 281.0, 289.0, 289.0, 278.0, 285.0, 288.0, 297.0, 279.0, 293.0, 286.0, 279.0, 294.0, 288.0, 291.0, 282.0, 291.0, 195.0, 213.0, 268.0, 259.0, 268.0, 265.0, 291.0, 285.0, 251.0, 270.0, 262.0, 251.0, 294.0, 285.0, 288.0, 282.0, 281.0, 295.0, 295.0, 284.0, 296.0, 271.0, 268.0, 248.0, 278.0, 295.0, 254.0, 265.0, 293.0, 283.0, 292.0, 284.0, 290.0, 280.0, 262.0, 260.0, 289.0, 290.0, 284.0, 286.0, 289.0, 284.0, 276.0, 294.0, 286.0, 293.0, 275.0, 255.0, 292.0, 281.0, 277.0, 302.0, 290.0, 289.0, 285.0, 297.0, 285.0, 288.0, 293.0, 280.0, 286.0, 290.0, 232.0, 233.0, 295.0, 278.0, 296.0, 280.0, 261.0, 261.0, 288.0, 282.0, 297.0, 282.0, 291.0, 288.0, 256.0, 266.0, 288.0, 291.0, 279.0, 291.0, 285.0, 288.0, 264.0, 261.0, 287.0, 289.0, 246.0, 273.0, 299.0, 283.0, 259.0, 271.0, 291.0, 279.0, 284.0, 289.0, 290.0, 286.0, 268.0, 257.0, 283.0, 290.0, 281.0, 292.0, 287.0, 289.0, 287.0, 283.0, 289.0, 290.0, 298.0, 275.0, 291.0, 282.0, 233.0, 222.0, 293.0, 289.0, 292.0, 287.0, 290.0, 283.0, 288.0, 288.0, 293.0, 283.0, 309.0, 321.0, 293.0, 280.0, 289.0, 287.0, 269.0, 247.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7087295012561051, "mean_inference_ms": 1.2733068706265849, "mean_action_processing_ms": 0.13523200386488354, "mean_env_wait_ms": 0.8538258765354435, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 408.0, "episode_reward_mean": 559.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 195.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 279.82}, "hist_stats": {"episode_reward": [582.0, 587.0, 527.0, 579.0, 570.0, 570.0, 525.0, 465.0, 570.0, 576.0, 525.0, 573.0, 570.0, 525.0, 516.0, 587.0, 579.0, 576.0, 530.0, 579.0, 573.0, 570.0, 573.0, 573.0, 570.0, 567.0, 573.0, 576.0, 579.0, 573.0, 579.0, 573.0, 408.0, 527.0, 533.0, 576.0, 521.0, 513.0, 579.0, 570.0, 576.0, 579.0, 567.0, 516.0, 573.0, 519.0, 576.0, 576.0, 570.0, 522.0, 579.0, 570.0, 573.0, 570.0, 579.0, 530.0, 573.0, 579.0, 579.0, 582.0, 573.0, 573.0, 576.0, 465.0, 573.0, 576.0, 522.0, 570.0, 579.0, 579.0, 522.0, 579.0, 570.0, 573.0, 525.0, 576.0, 519.0, 582.0, 530.0, 570.0, 573.0, 576.0, 525.0, 573.0, 573.0, 576.0, 570.0, 579.0, 573.0, 573.0, 455.0, 582.0, 579.0, 573.0, 576.0, 576.0, 630.0, 573.0, 576.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 291.0, 296.0, 264.0, 263.0, 291.0, 288.0, 290.0, 280.0, 287.0, 283.0, 262.0, 263.0, 232.0, 233.0, 277.0, 293.0, 291.0, 285.0, 260.0, 265.0, 280.0, 293.0, 281.0, 289.0, 256.0, 269.0, 260.0, 256.0, 290.0, 297.0, 289.0, 290.0, 283.0, 293.0, 255.0, 275.0, 292.0, 287.0, 288.0, 285.0, 289.0, 281.0, 291.0, 282.0, 289.0, 284.0, 281.0, 289.0, 289.0, 278.0, 285.0, 288.0, 297.0, 279.0, 293.0, 286.0, 279.0, 294.0, 288.0, 291.0, 282.0, 291.0, 195.0, 213.0, 268.0, 259.0, 268.0, 265.0, 291.0, 285.0, 251.0, 270.0, 262.0, 251.0, 294.0, 285.0, 288.0, 282.0, 281.0, 295.0, 295.0, 284.0, 296.0, 271.0, 268.0, 248.0, 278.0, 295.0, 254.0, 265.0, 293.0, 283.0, 292.0, 284.0, 290.0, 280.0, 262.0, 260.0, 289.0, 290.0, 284.0, 286.0, 289.0, 284.0, 276.0, 294.0, 286.0, 293.0, 275.0, 255.0, 292.0, 281.0, 277.0, 302.0, 290.0, 289.0, 285.0, 297.0, 285.0, 288.0, 293.0, 280.0, 286.0, 290.0, 232.0, 233.0, 295.0, 278.0, 296.0, 280.0, 261.0, 261.0, 288.0, 282.0, 297.0, 282.0, 291.0, 288.0, 256.0, 266.0, 288.0, 291.0, 279.0, 291.0, 285.0, 288.0, 264.0, 261.0, 287.0, 289.0, 246.0, 273.0, 299.0, 283.0, 259.0, 271.0, 291.0, 279.0, 284.0, 289.0, 290.0, 286.0, 268.0, 257.0, 283.0, 290.0, 281.0, 292.0, 287.0, 289.0, 287.0, 283.0, 289.0, 290.0, 298.0, 275.0, 291.0, 282.0, 233.0, 222.0, 293.0, 289.0, 292.0, 287.0, 290.0, 283.0, 288.0, 288.0, 293.0, 283.0, 309.0, 321.0, 293.0, 280.0, 289.0, 287.0, 269.0, 247.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7087295012561051, "mean_inference_ms": 1.2733068706265849, "mean_action_processing_ms": 0.13523200386488354, "mean_env_wait_ms": 0.8538258765354435, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6067200, "num_agent_steps_trained": 6067200, "num_env_steps_sampled": 3033600, "num_env_steps_trained": 3033600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3033600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6067200, "timers": {"training_iteration_time_ms": 3671.299, "learn_time_ms": 1117.42, "learn_throughput": 11454.961, "synch_weights_time_ms": 10.815}, "counters": {"num_env_steps_sampled": 3033600, "num_env_steps_trained": 3033600, "num_agent_steps_sampled": 6067200, "num_agent_steps_trained": 6067200}, "done": false, "episodes_total": 7584, "training_iteration": 237, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-54", "timestamp": 1666581354, "time_this_iter_s": 3.9520177841186523, "time_total_s": 920.8949117660522, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 920.8949117660522, "timesteps_since_restore": 0, "iterations_since_restore": 237, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 22.033333333333335, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 171.1, "shaped_reward_min": 128, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.19, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.59, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.94, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.4, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.68, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.24, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.68, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.24, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.68, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.24, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0007050332496874034, "policy_loss": -0.001018117880448699, "vf_loss": 7.772661209106445, "vf_explained_var": 0.5922338962554932, "kl": 0.0018585395300760865, "entropy": 0.9283610582351685, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3046400, "num_env_steps_trained": 3046400, "num_agent_steps_sampled": 6092800, "num_agent_steps_trained": 6092800}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 408.0, "episode_reward_mean": 559.5, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 195.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 279.75}, "custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 171.1, "shaped_reward_min": 128, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.19, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.59, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.94, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.4, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.68, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.24, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.68, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.24, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.68, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.24, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [408.0, 527.0, 533.0, 576.0, 521.0, 513.0, 579.0, 570.0, 576.0, 579.0, 567.0, 516.0, 573.0, 519.0, 576.0, 576.0, 570.0, 522.0, 579.0, 570.0, 573.0, 570.0, 579.0, 530.0, 573.0, 579.0, 579.0, 582.0, 573.0, 573.0, 576.0, 465.0, 573.0, 576.0, 522.0, 570.0, 579.0, 579.0, 522.0, 579.0, 570.0, 573.0, 525.0, 576.0, 519.0, 582.0, 530.0, 570.0, 573.0, 576.0, 525.0, 573.0, 573.0, 576.0, 570.0, 579.0, 573.0, 573.0, 455.0, 582.0, 579.0, 573.0, 576.0, 576.0, 630.0, 573.0, 576.0, 516.0, 582.0, 579.0, 576.0, 573.0, 570.0, 567.0, 525.0, 533.0, 579.0, 579.0, 561.0, 576.0, 576.0, 579.0, 573.0, 527.0, 527.0, 573.0, 573.0, 573.0, 570.0, 582.0, 525.0, 522.0, 573.0, 573.0, 570.0, 573.0, 573.0, 522.0, 576.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [195.0, 213.0, 268.0, 259.0, 268.0, 265.0, 291.0, 285.0, 251.0, 270.0, 262.0, 251.0, 294.0, 285.0, 288.0, 282.0, 281.0, 295.0, 295.0, 284.0, 296.0, 271.0, 268.0, 248.0, 278.0, 295.0, 254.0, 265.0, 293.0, 283.0, 292.0, 284.0, 290.0, 280.0, 262.0, 260.0, 289.0, 290.0, 284.0, 286.0, 289.0, 284.0, 276.0, 294.0, 286.0, 293.0, 275.0, 255.0, 292.0, 281.0, 277.0, 302.0, 290.0, 289.0, 285.0, 297.0, 285.0, 288.0, 293.0, 280.0, 286.0, 290.0, 232.0, 233.0, 295.0, 278.0, 296.0, 280.0, 261.0, 261.0, 288.0, 282.0, 297.0, 282.0, 291.0, 288.0, 256.0, 266.0, 288.0, 291.0, 279.0, 291.0, 285.0, 288.0, 264.0, 261.0, 287.0, 289.0, 246.0, 273.0, 299.0, 283.0, 259.0, 271.0, 291.0, 279.0, 284.0, 289.0, 290.0, 286.0, 268.0, 257.0, 283.0, 290.0, 281.0, 292.0, 287.0, 289.0, 287.0, 283.0, 289.0, 290.0, 298.0, 275.0, 291.0, 282.0, 233.0, 222.0, 293.0, 289.0, 292.0, 287.0, 290.0, 283.0, 288.0, 288.0, 293.0, 283.0, 309.0, 321.0, 293.0, 280.0, 289.0, 287.0, 269.0, 247.0, 294.0, 288.0, 291.0, 288.0, 286.0, 290.0, 288.0, 285.0, 286.0, 284.0, 280.0, 287.0, 257.0, 268.0, 269.0, 264.0, 292.0, 287.0, 287.0, 292.0, 280.0, 281.0, 286.0, 290.0, 297.0, 279.0, 288.0, 291.0, 287.0, 286.0, 255.0, 272.0, 262.0, 265.0, 287.0, 286.0, 282.0, 291.0, 285.0, 288.0, 278.0, 292.0, 297.0, 285.0, 264.0, 261.0, 261.0, 261.0, 280.0, 293.0, 286.0, 287.0, 280.0, 290.0, 287.0, 286.0, 270.0, 303.0, 269.0, 253.0, 273.0, 303.0, 261.0, 255.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7086594012552062, "mean_inference_ms": 1.2731733769862408, "mean_action_processing_ms": 0.13522490952251545, "mean_env_wait_ms": 0.8538714821231937, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 408.0, "episode_reward_mean": 559.5, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 195.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 279.75}, "hist_stats": {"episode_reward": [408.0, 527.0, 533.0, 576.0, 521.0, 513.0, 579.0, 570.0, 576.0, 579.0, 567.0, 516.0, 573.0, 519.0, 576.0, 576.0, 570.0, 522.0, 579.0, 570.0, 573.0, 570.0, 579.0, 530.0, 573.0, 579.0, 579.0, 582.0, 573.0, 573.0, 576.0, 465.0, 573.0, 576.0, 522.0, 570.0, 579.0, 579.0, 522.0, 579.0, 570.0, 573.0, 525.0, 576.0, 519.0, 582.0, 530.0, 570.0, 573.0, 576.0, 525.0, 573.0, 573.0, 576.0, 570.0, 579.0, 573.0, 573.0, 455.0, 582.0, 579.0, 573.0, 576.0, 576.0, 630.0, 573.0, 576.0, 516.0, 582.0, 579.0, 576.0, 573.0, 570.0, 567.0, 525.0, 533.0, 579.0, 579.0, 561.0, 576.0, 576.0, 579.0, 573.0, 527.0, 527.0, 573.0, 573.0, 573.0, 570.0, 582.0, 525.0, 522.0, 573.0, 573.0, 570.0, 573.0, 573.0, 522.0, 576.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [195.0, 213.0, 268.0, 259.0, 268.0, 265.0, 291.0, 285.0, 251.0, 270.0, 262.0, 251.0, 294.0, 285.0, 288.0, 282.0, 281.0, 295.0, 295.0, 284.0, 296.0, 271.0, 268.0, 248.0, 278.0, 295.0, 254.0, 265.0, 293.0, 283.0, 292.0, 284.0, 290.0, 280.0, 262.0, 260.0, 289.0, 290.0, 284.0, 286.0, 289.0, 284.0, 276.0, 294.0, 286.0, 293.0, 275.0, 255.0, 292.0, 281.0, 277.0, 302.0, 290.0, 289.0, 285.0, 297.0, 285.0, 288.0, 293.0, 280.0, 286.0, 290.0, 232.0, 233.0, 295.0, 278.0, 296.0, 280.0, 261.0, 261.0, 288.0, 282.0, 297.0, 282.0, 291.0, 288.0, 256.0, 266.0, 288.0, 291.0, 279.0, 291.0, 285.0, 288.0, 264.0, 261.0, 287.0, 289.0, 246.0, 273.0, 299.0, 283.0, 259.0, 271.0, 291.0, 279.0, 284.0, 289.0, 290.0, 286.0, 268.0, 257.0, 283.0, 290.0, 281.0, 292.0, 287.0, 289.0, 287.0, 283.0, 289.0, 290.0, 298.0, 275.0, 291.0, 282.0, 233.0, 222.0, 293.0, 289.0, 292.0, 287.0, 290.0, 283.0, 288.0, 288.0, 293.0, 283.0, 309.0, 321.0, 293.0, 280.0, 289.0, 287.0, 269.0, 247.0, 294.0, 288.0, 291.0, 288.0, 286.0, 290.0, 288.0, 285.0, 286.0, 284.0, 280.0, 287.0, 257.0, 268.0, 269.0, 264.0, 292.0, 287.0, 287.0, 292.0, 280.0, 281.0, 286.0, 290.0, 297.0, 279.0, 288.0, 291.0, 287.0, 286.0, 255.0, 272.0, 262.0, 265.0, 287.0, 286.0, 282.0, 291.0, 285.0, 288.0, 278.0, 292.0, 297.0, 285.0, 264.0, 261.0, 261.0, 261.0, 280.0, 293.0, 286.0, 287.0, 280.0, 290.0, 287.0, 286.0, 270.0, 303.0, 269.0, 253.0, 273.0, 303.0, 261.0, 255.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7086594012552062, "mean_inference_ms": 1.2731733769862408, "mean_action_processing_ms": 0.13522490952251545, "mean_env_wait_ms": 0.8538714821231937, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6092800, "num_agent_steps_trained": 6092800, "num_env_steps_sampled": 3046400, "num_env_steps_trained": 3046400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3046400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6092800, "timers": {"training_iteration_time_ms": 3664.058, "learn_time_ms": 1110.218, "learn_throughput": 11529.27, "synch_weights_time_ms": 10.171}, "counters": {"num_env_steps_sampled": 3046400, "num_env_steps_trained": 3046400, "num_agent_steps_sampled": 6092800, "num_agent_steps_trained": 6092800}, "done": false, "episodes_total": 7616, "training_iteration": 238, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-58", "timestamp": 1666581358, "time_this_iter_s": 3.6473422050476074, "time_total_s": 924.5422539710999, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 924.5422539710999, "timesteps_since_restore": 0, "iterations_since_restore": 238, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.916666666666668, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 195.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 171.79, "shaped_reward_min": 135, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.78, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.03, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.56, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.85, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.32, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.73, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.84, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.35, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.38, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.35, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.32, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.73, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.32, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.73, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0005791244911961257, "policy_loss": -0.0008936794474720955, "vf_loss": 7.769590854644775, "vf_explained_var": 0.596868634223938, "kl": 0.0018960753222927451, "entropy": 0.924808144569397, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3059200, "num_env_steps_trained": 3059200, "num_agent_steps_sampled": 6118400, "num_agent_steps_trained": 6118400}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 455.0, "episode_reward_mean": 562.19, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 281.095}, "custom_metrics": {"sparse_reward_mean": 195.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 171.79, "shaped_reward_min": 135, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.78, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.03, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.56, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.85, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.32, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.73, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.84, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.35, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.38, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.35, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.32, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.73, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.32, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.73, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 576.0, 522.0, 570.0, 579.0, 579.0, 522.0, 579.0, 570.0, 573.0, 525.0, 576.0, 519.0, 582.0, 530.0, 570.0, 573.0, 576.0, 525.0, 573.0, 573.0, 576.0, 570.0, 579.0, 573.0, 573.0, 455.0, 582.0, 579.0, 573.0, 576.0, 576.0, 630.0, 573.0, 576.0, 516.0, 582.0, 579.0, 576.0, 573.0, 570.0, 567.0, 525.0, 533.0, 579.0, 579.0, 561.0, 576.0, 576.0, 579.0, 573.0, 527.0, 527.0, 573.0, 573.0, 573.0, 570.0, 582.0, 525.0, 522.0, 573.0, 573.0, 570.0, 573.0, 573.0, 522.0, 576.0, 516.0, 516.0, 579.0, 573.0, 573.0, 579.0, 582.0, 570.0, 525.0, 576.0, 522.0, 576.0, 576.0, 576.0, 579.0, 573.0, 582.0, 530.0, 570.0, 573.0, 576.0, 579.0, 573.0, 524.0, 576.0, 519.0, 510.0, 570.0, 579.0, 573.0, 573.0, 573.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 278.0, 296.0, 280.0, 261.0, 261.0, 288.0, 282.0, 297.0, 282.0, 291.0, 288.0, 256.0, 266.0, 288.0, 291.0, 279.0, 291.0, 285.0, 288.0, 264.0, 261.0, 287.0, 289.0, 246.0, 273.0, 299.0, 283.0, 259.0, 271.0, 291.0, 279.0, 284.0, 289.0, 290.0, 286.0, 268.0, 257.0, 283.0, 290.0, 281.0, 292.0, 287.0, 289.0, 287.0, 283.0, 289.0, 290.0, 298.0, 275.0, 291.0, 282.0, 233.0, 222.0, 293.0, 289.0, 292.0, 287.0, 290.0, 283.0, 288.0, 288.0, 293.0, 283.0, 309.0, 321.0, 293.0, 280.0, 289.0, 287.0, 269.0, 247.0, 294.0, 288.0, 291.0, 288.0, 286.0, 290.0, 288.0, 285.0, 286.0, 284.0, 280.0, 287.0, 257.0, 268.0, 269.0, 264.0, 292.0, 287.0, 287.0, 292.0, 280.0, 281.0, 286.0, 290.0, 297.0, 279.0, 288.0, 291.0, 287.0, 286.0, 255.0, 272.0, 262.0, 265.0, 287.0, 286.0, 282.0, 291.0, 285.0, 288.0, 278.0, 292.0, 297.0, 285.0, 264.0, 261.0, 261.0, 261.0, 280.0, 293.0, 286.0, 287.0, 280.0, 290.0, 287.0, 286.0, 270.0, 303.0, 269.0, 253.0, 273.0, 303.0, 261.0, 255.0, 263.0, 253.0, 293.0, 286.0, 283.0, 290.0, 287.0, 286.0, 289.0, 290.0, 285.0, 297.0, 276.0, 294.0, 269.0, 256.0, 291.0, 285.0, 260.0, 262.0, 287.0, 289.0, 297.0, 279.0, 291.0, 285.0, 293.0, 286.0, 283.0, 290.0, 287.0, 295.0, 263.0, 267.0, 289.0, 281.0, 292.0, 281.0, 278.0, 298.0, 294.0, 285.0, 294.0, 279.0, 276.0, 248.0, 303.0, 273.0, 272.0, 247.0, 257.0, 253.0, 282.0, 288.0, 289.0, 290.0, 291.0, 282.0, 294.0, 279.0, 290.0, 283.0, 258.0, 258.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7085613748997756, "mean_inference_ms": 1.2729861809868541, "mean_action_processing_ms": 0.13521385639155092, "mean_env_wait_ms": 0.85387454684184, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 455.0, "episode_reward_mean": 562.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 281.095}, "hist_stats": {"episode_reward": [573.0, 576.0, 522.0, 570.0, 579.0, 579.0, 522.0, 579.0, 570.0, 573.0, 525.0, 576.0, 519.0, 582.0, 530.0, 570.0, 573.0, 576.0, 525.0, 573.0, 573.0, 576.0, 570.0, 579.0, 573.0, 573.0, 455.0, 582.0, 579.0, 573.0, 576.0, 576.0, 630.0, 573.0, 576.0, 516.0, 582.0, 579.0, 576.0, 573.0, 570.0, 567.0, 525.0, 533.0, 579.0, 579.0, 561.0, 576.0, 576.0, 579.0, 573.0, 527.0, 527.0, 573.0, 573.0, 573.0, 570.0, 582.0, 525.0, 522.0, 573.0, 573.0, 570.0, 573.0, 573.0, 522.0, 576.0, 516.0, 516.0, 579.0, 573.0, 573.0, 579.0, 582.0, 570.0, 525.0, 576.0, 522.0, 576.0, 576.0, 576.0, 579.0, 573.0, 582.0, 530.0, 570.0, 573.0, 576.0, 579.0, 573.0, 524.0, 576.0, 519.0, 510.0, 570.0, 579.0, 573.0, 573.0, 573.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 278.0, 296.0, 280.0, 261.0, 261.0, 288.0, 282.0, 297.0, 282.0, 291.0, 288.0, 256.0, 266.0, 288.0, 291.0, 279.0, 291.0, 285.0, 288.0, 264.0, 261.0, 287.0, 289.0, 246.0, 273.0, 299.0, 283.0, 259.0, 271.0, 291.0, 279.0, 284.0, 289.0, 290.0, 286.0, 268.0, 257.0, 283.0, 290.0, 281.0, 292.0, 287.0, 289.0, 287.0, 283.0, 289.0, 290.0, 298.0, 275.0, 291.0, 282.0, 233.0, 222.0, 293.0, 289.0, 292.0, 287.0, 290.0, 283.0, 288.0, 288.0, 293.0, 283.0, 309.0, 321.0, 293.0, 280.0, 289.0, 287.0, 269.0, 247.0, 294.0, 288.0, 291.0, 288.0, 286.0, 290.0, 288.0, 285.0, 286.0, 284.0, 280.0, 287.0, 257.0, 268.0, 269.0, 264.0, 292.0, 287.0, 287.0, 292.0, 280.0, 281.0, 286.0, 290.0, 297.0, 279.0, 288.0, 291.0, 287.0, 286.0, 255.0, 272.0, 262.0, 265.0, 287.0, 286.0, 282.0, 291.0, 285.0, 288.0, 278.0, 292.0, 297.0, 285.0, 264.0, 261.0, 261.0, 261.0, 280.0, 293.0, 286.0, 287.0, 280.0, 290.0, 287.0, 286.0, 270.0, 303.0, 269.0, 253.0, 273.0, 303.0, 261.0, 255.0, 263.0, 253.0, 293.0, 286.0, 283.0, 290.0, 287.0, 286.0, 289.0, 290.0, 285.0, 297.0, 276.0, 294.0, 269.0, 256.0, 291.0, 285.0, 260.0, 262.0, 287.0, 289.0, 297.0, 279.0, 291.0, 285.0, 293.0, 286.0, 283.0, 290.0, 287.0, 295.0, 263.0, 267.0, 289.0, 281.0, 292.0, 281.0, 278.0, 298.0, 294.0, 285.0, 294.0, 279.0, 276.0, 248.0, 303.0, 273.0, 272.0, 247.0, 257.0, 253.0, 282.0, 288.0, 289.0, 290.0, 291.0, 282.0, 294.0, 279.0, 290.0, 283.0, 258.0, 258.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7085613748997756, "mean_inference_ms": 1.2729861809868541, "mean_action_processing_ms": 0.13521385639155092, "mean_env_wait_ms": 0.85387454684184, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6118400, "num_agent_steps_trained": 6118400, "num_env_steps_sampled": 3059200, "num_env_steps_trained": 3059200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3059200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6118400, "timers": {"training_iteration_time_ms": 3665.489, "learn_time_ms": 1111.722, "learn_throughput": 11513.671, "synch_weights_time_ms": 10.06}, "counters": {"num_env_steps_sampled": 3059200, "num_env_steps_trained": 3059200, "num_agent_steps_sampled": 6118400, "num_agent_steps_trained": 6118400}, "done": false, "episodes_total": 7648, "training_iteration": 239, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-02", "timestamp": 1666581362, "time_this_iter_s": 3.659172296524048, "time_total_s": 928.2014262676239, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 928.2014262676239, "timesteps_since_restore": 0, "iterations_since_restore": 239, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.54, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 171.99, "shaped_reward_min": 150, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.5, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.29, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.3, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.05, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.12, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.96, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.9, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.74, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.33, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.31, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.27, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.12, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.96, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.12, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.96, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0016768049681559205, "policy_loss": -0.00199872930534184, "vf_loss": 7.833566665649414, "vf_explained_var": 0.6034523248672485, "kl": 0.001984196715056896, "entropy": 0.9228615164756775, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3072000, "num_env_steps_trained": 3072000, "num_agent_steps_sampled": 6144000, "num_agent_steps_trained": 6144000}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 510.0, "episode_reward_mean": 562.79, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 247.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 281.395}, "custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 171.99, "shaped_reward_min": 150, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.5, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.29, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.3, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.05, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.12, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.96, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.9, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.74, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.33, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.31, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.27, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.12, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.96, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.12, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.96, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 573.0, 576.0, 516.0, 582.0, 579.0, 576.0, 573.0, 570.0, 567.0, 525.0, 533.0, 579.0, 579.0, 561.0, 576.0, 576.0, 579.0, 573.0, 527.0, 527.0, 573.0, 573.0, 573.0, 570.0, 582.0, 525.0, 522.0, 573.0, 573.0, 570.0, 573.0, 573.0, 522.0, 576.0, 516.0, 516.0, 579.0, 573.0, 573.0, 579.0, 582.0, 570.0, 525.0, 576.0, 522.0, 576.0, 576.0, 576.0, 579.0, 573.0, 582.0, 530.0, 570.0, 573.0, 576.0, 579.0, 573.0, 524.0, 576.0, 519.0, 510.0, 570.0, 579.0, 573.0, 573.0, 573.0, 516.0, 525.0, 576.0, 516.0, 522.0, 630.0, 582.0, 516.0, 570.0, 582.0, 576.0, 573.0, 579.0, 570.0, 579.0, 579.0, 573.0, 573.0, 576.0, 573.0, 576.0, 576.0, 576.0, 573.0, 567.0, 573.0, 573.0, 530.0, 578.0, 525.0, 576.0, 522.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [309.0, 321.0, 293.0, 280.0, 289.0, 287.0, 269.0, 247.0, 294.0, 288.0, 291.0, 288.0, 286.0, 290.0, 288.0, 285.0, 286.0, 284.0, 280.0, 287.0, 257.0, 268.0, 269.0, 264.0, 292.0, 287.0, 287.0, 292.0, 280.0, 281.0, 286.0, 290.0, 297.0, 279.0, 288.0, 291.0, 287.0, 286.0, 255.0, 272.0, 262.0, 265.0, 287.0, 286.0, 282.0, 291.0, 285.0, 288.0, 278.0, 292.0, 297.0, 285.0, 264.0, 261.0, 261.0, 261.0, 280.0, 293.0, 286.0, 287.0, 280.0, 290.0, 287.0, 286.0, 270.0, 303.0, 269.0, 253.0, 273.0, 303.0, 261.0, 255.0, 263.0, 253.0, 293.0, 286.0, 283.0, 290.0, 287.0, 286.0, 289.0, 290.0, 285.0, 297.0, 276.0, 294.0, 269.0, 256.0, 291.0, 285.0, 260.0, 262.0, 287.0, 289.0, 297.0, 279.0, 291.0, 285.0, 293.0, 286.0, 283.0, 290.0, 287.0, 295.0, 263.0, 267.0, 289.0, 281.0, 292.0, 281.0, 278.0, 298.0, 294.0, 285.0, 294.0, 279.0, 276.0, 248.0, 303.0, 273.0, 272.0, 247.0, 257.0, 253.0, 282.0, 288.0, 289.0, 290.0, 291.0, 282.0, 294.0, 279.0, 290.0, 283.0, 258.0, 258.0, 260.0, 265.0, 289.0, 287.0, 255.0, 261.0, 271.0, 251.0, 318.0, 312.0, 297.0, 285.0, 256.0, 260.0, 273.0, 297.0, 287.0, 295.0, 279.0, 297.0, 294.0, 279.0, 281.0, 298.0, 289.0, 281.0, 289.0, 290.0, 301.0, 278.0, 276.0, 297.0, 295.0, 278.0, 279.0, 297.0, 280.0, 293.0, 281.0, 295.0, 293.0, 283.0, 287.0, 289.0, 289.0, 284.0, 289.0, 278.0, 296.0, 277.0, 292.0, 281.0, 268.0, 262.0, 294.0, 284.0, 266.0, 259.0, 290.0, 286.0, 265.0, 257.0, 262.0, 260.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7084314977660204, "mean_inference_ms": 1.2726471129035033, "mean_action_processing_ms": 0.135200399735534, "mean_env_wait_ms": 0.8537186524830526, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 510.0, "episode_reward_mean": 562.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 247.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 281.395}, "hist_stats": {"episode_reward": [630.0, 573.0, 576.0, 516.0, 582.0, 579.0, 576.0, 573.0, 570.0, 567.0, 525.0, 533.0, 579.0, 579.0, 561.0, 576.0, 576.0, 579.0, 573.0, 527.0, 527.0, 573.0, 573.0, 573.0, 570.0, 582.0, 525.0, 522.0, 573.0, 573.0, 570.0, 573.0, 573.0, 522.0, 576.0, 516.0, 516.0, 579.0, 573.0, 573.0, 579.0, 582.0, 570.0, 525.0, 576.0, 522.0, 576.0, 576.0, 576.0, 579.0, 573.0, 582.0, 530.0, 570.0, 573.0, 576.0, 579.0, 573.0, 524.0, 576.0, 519.0, 510.0, 570.0, 579.0, 573.0, 573.0, 573.0, 516.0, 525.0, 576.0, 516.0, 522.0, 630.0, 582.0, 516.0, 570.0, 582.0, 576.0, 573.0, 579.0, 570.0, 579.0, 579.0, 573.0, 573.0, 576.0, 573.0, 576.0, 576.0, 576.0, 573.0, 567.0, 573.0, 573.0, 530.0, 578.0, 525.0, 576.0, 522.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [309.0, 321.0, 293.0, 280.0, 289.0, 287.0, 269.0, 247.0, 294.0, 288.0, 291.0, 288.0, 286.0, 290.0, 288.0, 285.0, 286.0, 284.0, 280.0, 287.0, 257.0, 268.0, 269.0, 264.0, 292.0, 287.0, 287.0, 292.0, 280.0, 281.0, 286.0, 290.0, 297.0, 279.0, 288.0, 291.0, 287.0, 286.0, 255.0, 272.0, 262.0, 265.0, 287.0, 286.0, 282.0, 291.0, 285.0, 288.0, 278.0, 292.0, 297.0, 285.0, 264.0, 261.0, 261.0, 261.0, 280.0, 293.0, 286.0, 287.0, 280.0, 290.0, 287.0, 286.0, 270.0, 303.0, 269.0, 253.0, 273.0, 303.0, 261.0, 255.0, 263.0, 253.0, 293.0, 286.0, 283.0, 290.0, 287.0, 286.0, 289.0, 290.0, 285.0, 297.0, 276.0, 294.0, 269.0, 256.0, 291.0, 285.0, 260.0, 262.0, 287.0, 289.0, 297.0, 279.0, 291.0, 285.0, 293.0, 286.0, 283.0, 290.0, 287.0, 295.0, 263.0, 267.0, 289.0, 281.0, 292.0, 281.0, 278.0, 298.0, 294.0, 285.0, 294.0, 279.0, 276.0, 248.0, 303.0, 273.0, 272.0, 247.0, 257.0, 253.0, 282.0, 288.0, 289.0, 290.0, 291.0, 282.0, 294.0, 279.0, 290.0, 283.0, 258.0, 258.0, 260.0, 265.0, 289.0, 287.0, 255.0, 261.0, 271.0, 251.0, 318.0, 312.0, 297.0, 285.0, 256.0, 260.0, 273.0, 297.0, 287.0, 295.0, 279.0, 297.0, 294.0, 279.0, 281.0, 298.0, 289.0, 281.0, 289.0, 290.0, 301.0, 278.0, 276.0, 297.0, 295.0, 278.0, 279.0, 297.0, 280.0, 293.0, 281.0, 295.0, 293.0, 283.0, 287.0, 289.0, 289.0, 284.0, 289.0, 278.0, 296.0, 277.0, 292.0, 281.0, 268.0, 262.0, 294.0, 284.0, 266.0, 259.0, 290.0, 286.0, 265.0, 257.0, 262.0, 260.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7084314977660204, "mean_inference_ms": 1.2726471129035033, "mean_action_processing_ms": 0.135200399735534, "mean_env_wait_ms": 0.8537186524830526, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6144000, "num_agent_steps_trained": 6144000, "num_env_steps_sampled": 3072000, "num_env_steps_trained": 3072000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3072000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6144000, "timers": {"training_iteration_time_ms": 3666.943, "learn_time_ms": 1113.043, "learn_throughput": 11500.005, "synch_weights_time_ms": 11.202}, "counters": {"num_env_steps_sampled": 3072000, "num_env_steps_trained": 3072000, "num_agent_steps_sampled": 6144000, "num_agent_steps_trained": 6144000}, "done": false, "episodes_total": 7680, "training_iteration": 240, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-05", "timestamp": 1666581365, "time_this_iter_s": 3.6721017360687256, "time_total_s": 931.8735280036926, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 931.8735280036926, "timesteps_since_restore": 0, "iterations_since_restore": 240, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.640000000000004, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 193.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 171.26, "shaped_reward_min": 150, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.64, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.06, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.45, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.83, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.3, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.68, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.8, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.87, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.41, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.41, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.36, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.31, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.3, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.68, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.3, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.68, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0015853033401072025, "policy_loss": -0.0019000378670170903, "vf_loss": 7.7883172035217285, "vf_explained_var": 0.595491886138916, "kl": 0.0021413369104266167, "entropy": 0.9281942844390869, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3084800, "num_env_steps_trained": 3084800, "num_agent_steps_sampled": 6169600, "num_agent_steps_trained": 6169600}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 510.0, "episode_reward_mean": 558.46, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 247.0}, "policy_reward_max": {"ppo": 318.0}, "policy_reward_mean": {"ppo": 279.23}, "custom_metrics": {"sparse_reward_mean": 193.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 171.26, "shaped_reward_min": 150, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.64, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.06, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.45, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.83, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.3, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.68, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.8, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.87, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.41, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.41, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.36, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.31, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.3, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.68, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.3, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.68, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 522.0, 576.0, 516.0, 516.0, 579.0, 573.0, 573.0, 579.0, 582.0, 570.0, 525.0, 576.0, 522.0, 576.0, 576.0, 576.0, 579.0, 573.0, 582.0, 530.0, 570.0, 573.0, 576.0, 579.0, 573.0, 524.0, 576.0, 519.0, 510.0, 570.0, 579.0, 573.0, 573.0, 573.0, 516.0, 525.0, 576.0, 516.0, 522.0, 630.0, 582.0, 516.0, 570.0, 582.0, 576.0, 573.0, 579.0, 570.0, 579.0, 579.0, 573.0, 573.0, 576.0, 573.0, 576.0, 576.0, 576.0, 573.0, 567.0, 573.0, 573.0, 530.0, 578.0, 525.0, 576.0, 522.0, 522.0, 516.0, 579.0, 527.0, 573.0, 516.0, 525.0, 525.0, 576.0, 525.0, 525.0, 573.0, 567.0, 519.0, 573.0, 573.0, 525.0, 530.0, 579.0, 522.0, 581.0, 525.0, 525.0, 582.0, 530.0, 579.0, 573.0, 525.0, 576.0, 576.0, 579.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [270.0, 303.0, 269.0, 253.0, 273.0, 303.0, 261.0, 255.0, 263.0, 253.0, 293.0, 286.0, 283.0, 290.0, 287.0, 286.0, 289.0, 290.0, 285.0, 297.0, 276.0, 294.0, 269.0, 256.0, 291.0, 285.0, 260.0, 262.0, 287.0, 289.0, 297.0, 279.0, 291.0, 285.0, 293.0, 286.0, 283.0, 290.0, 287.0, 295.0, 263.0, 267.0, 289.0, 281.0, 292.0, 281.0, 278.0, 298.0, 294.0, 285.0, 294.0, 279.0, 276.0, 248.0, 303.0, 273.0, 272.0, 247.0, 257.0, 253.0, 282.0, 288.0, 289.0, 290.0, 291.0, 282.0, 294.0, 279.0, 290.0, 283.0, 258.0, 258.0, 260.0, 265.0, 289.0, 287.0, 255.0, 261.0, 271.0, 251.0, 318.0, 312.0, 297.0, 285.0, 256.0, 260.0, 273.0, 297.0, 287.0, 295.0, 279.0, 297.0, 294.0, 279.0, 281.0, 298.0, 289.0, 281.0, 289.0, 290.0, 301.0, 278.0, 276.0, 297.0, 295.0, 278.0, 279.0, 297.0, 280.0, 293.0, 281.0, 295.0, 293.0, 283.0, 287.0, 289.0, 289.0, 284.0, 289.0, 278.0, 296.0, 277.0, 292.0, 281.0, 268.0, 262.0, 294.0, 284.0, 266.0, 259.0, 290.0, 286.0, 265.0, 257.0, 262.0, 260.0, 250.0, 266.0, 295.0, 284.0, 255.0, 272.0, 284.0, 289.0, 254.0, 262.0, 255.0, 270.0, 259.0, 266.0, 289.0, 287.0, 266.0, 259.0, 257.0, 268.0, 295.0, 278.0, 294.0, 273.0, 263.0, 256.0, 287.0, 286.0, 299.0, 274.0, 259.0, 266.0, 266.0, 264.0, 286.0, 293.0, 261.0, 261.0, 293.0, 288.0, 261.0, 264.0, 256.0, 269.0, 292.0, 290.0, 257.0, 273.0, 282.0, 297.0, 283.0, 290.0, 265.0, 260.0, 292.0, 284.0, 289.0, 287.0, 301.0, 278.0, 281.0, 292.0, 283.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7082637673553855, "mean_inference_ms": 1.2722587608664173, "mean_action_processing_ms": 0.13518180542648156, "mean_env_wait_ms": 0.8534831822426292, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 510.0, "episode_reward_mean": 558.46, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 247.0}, "policy_reward_max": {"ppo": 318.0}, "policy_reward_mean": {"ppo": 279.23}, "hist_stats": {"episode_reward": [573.0, 522.0, 576.0, 516.0, 516.0, 579.0, 573.0, 573.0, 579.0, 582.0, 570.0, 525.0, 576.0, 522.0, 576.0, 576.0, 576.0, 579.0, 573.0, 582.0, 530.0, 570.0, 573.0, 576.0, 579.0, 573.0, 524.0, 576.0, 519.0, 510.0, 570.0, 579.0, 573.0, 573.0, 573.0, 516.0, 525.0, 576.0, 516.0, 522.0, 630.0, 582.0, 516.0, 570.0, 582.0, 576.0, 573.0, 579.0, 570.0, 579.0, 579.0, 573.0, 573.0, 576.0, 573.0, 576.0, 576.0, 576.0, 573.0, 567.0, 573.0, 573.0, 530.0, 578.0, 525.0, 576.0, 522.0, 522.0, 516.0, 579.0, 527.0, 573.0, 516.0, 525.0, 525.0, 576.0, 525.0, 525.0, 573.0, 567.0, 519.0, 573.0, 573.0, 525.0, 530.0, 579.0, 522.0, 581.0, 525.0, 525.0, 582.0, 530.0, 579.0, 573.0, 525.0, 576.0, 576.0, 579.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [270.0, 303.0, 269.0, 253.0, 273.0, 303.0, 261.0, 255.0, 263.0, 253.0, 293.0, 286.0, 283.0, 290.0, 287.0, 286.0, 289.0, 290.0, 285.0, 297.0, 276.0, 294.0, 269.0, 256.0, 291.0, 285.0, 260.0, 262.0, 287.0, 289.0, 297.0, 279.0, 291.0, 285.0, 293.0, 286.0, 283.0, 290.0, 287.0, 295.0, 263.0, 267.0, 289.0, 281.0, 292.0, 281.0, 278.0, 298.0, 294.0, 285.0, 294.0, 279.0, 276.0, 248.0, 303.0, 273.0, 272.0, 247.0, 257.0, 253.0, 282.0, 288.0, 289.0, 290.0, 291.0, 282.0, 294.0, 279.0, 290.0, 283.0, 258.0, 258.0, 260.0, 265.0, 289.0, 287.0, 255.0, 261.0, 271.0, 251.0, 318.0, 312.0, 297.0, 285.0, 256.0, 260.0, 273.0, 297.0, 287.0, 295.0, 279.0, 297.0, 294.0, 279.0, 281.0, 298.0, 289.0, 281.0, 289.0, 290.0, 301.0, 278.0, 276.0, 297.0, 295.0, 278.0, 279.0, 297.0, 280.0, 293.0, 281.0, 295.0, 293.0, 283.0, 287.0, 289.0, 289.0, 284.0, 289.0, 278.0, 296.0, 277.0, 292.0, 281.0, 268.0, 262.0, 294.0, 284.0, 266.0, 259.0, 290.0, 286.0, 265.0, 257.0, 262.0, 260.0, 250.0, 266.0, 295.0, 284.0, 255.0, 272.0, 284.0, 289.0, 254.0, 262.0, 255.0, 270.0, 259.0, 266.0, 289.0, 287.0, 266.0, 259.0, 257.0, 268.0, 295.0, 278.0, 294.0, 273.0, 263.0, 256.0, 287.0, 286.0, 299.0, 274.0, 259.0, 266.0, 266.0, 264.0, 286.0, 293.0, 261.0, 261.0, 293.0, 288.0, 261.0, 264.0, 256.0, 269.0, 292.0, 290.0, 257.0, 273.0, 282.0, 297.0, 283.0, 290.0, 265.0, 260.0, 292.0, 284.0, 289.0, 287.0, 301.0, 278.0, 281.0, 292.0, 283.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7082637673553855, "mean_inference_ms": 1.2722587608664173, "mean_action_processing_ms": 0.13518180542648156, "mean_env_wait_ms": 0.8534831822426292, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6169600, "num_agent_steps_trained": 6169600, "num_env_steps_sampled": 3084800, "num_env_steps_trained": 3084800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3084800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6169600, "timers": {"training_iteration_time_ms": 3671.08, "learn_time_ms": 1124.114, "learn_throughput": 11386.75, "synch_weights_time_ms": 11.662}, "counters": {"num_env_steps_sampled": 3084800, "num_env_steps_trained": 3084800, "num_agent_steps_sampled": 6169600, "num_agent_steps_trained": 6169600}, "done": false, "episodes_total": 7712, "training_iteration": 241, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-09", "timestamp": 1666581369, "time_this_iter_s": 3.6422295570373535, "time_total_s": 935.51575756073, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 935.51575756073, "timesteps_since_restore": 0, "iterations_since_restore": 241, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.516666666666666, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 193.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 171.68, "shaped_reward_min": 156, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.6, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.15, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.43, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.91, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.26, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.73, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.76, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.84, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.47, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.4, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.32, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.26, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.73, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.26, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.73, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013410382671281695, "policy_loss": 0.0010229569161310792, "vf_loss": 7.782090187072754, "vf_explained_var": 0.5975505113601685, "kl": 0.0020362879149615765, "entropy": 0.920256495475769, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3097600, "num_env_steps_trained": 3097600, "num_agent_steps_sampled": 6195200, "num_agent_steps_trained": 6195200}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 516.0, "episode_reward_mean": 558.88, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 279.44}, "custom_metrics": {"sparse_reward_mean": 193.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 171.68, "shaped_reward_min": 156, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.6, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.15, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.43, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.91, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.26, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.73, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.76, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.84, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.47, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.4, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.32, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.26, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.73, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.26, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.73, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 573.0, 573.0, 516.0, 525.0, 576.0, 516.0, 522.0, 630.0, 582.0, 516.0, 570.0, 582.0, 576.0, 573.0, 579.0, 570.0, 579.0, 579.0, 573.0, 573.0, 576.0, 573.0, 576.0, 576.0, 576.0, 573.0, 567.0, 573.0, 573.0, 530.0, 578.0, 525.0, 576.0, 522.0, 522.0, 516.0, 579.0, 527.0, 573.0, 516.0, 525.0, 525.0, 576.0, 525.0, 525.0, 573.0, 567.0, 519.0, 573.0, 573.0, 525.0, 530.0, 579.0, 522.0, 581.0, 525.0, 525.0, 582.0, 530.0, 579.0, 573.0, 525.0, 576.0, 576.0, 579.0, 573.0, 579.0, 573.0, 576.0, 590.0, 527.0, 579.0, 516.0, 522.0, 573.0, 576.0, 576.0, 579.0, 576.0, 581.0, 579.0, 627.0, 573.0, 522.0, 530.0, 576.0, 576.0, 576.0, 570.0, 576.0, 576.0, 530.0, 579.0, 570.0, 570.0, 516.0, 525.0, 525.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 282.0, 294.0, 279.0, 290.0, 283.0, 258.0, 258.0, 260.0, 265.0, 289.0, 287.0, 255.0, 261.0, 271.0, 251.0, 318.0, 312.0, 297.0, 285.0, 256.0, 260.0, 273.0, 297.0, 287.0, 295.0, 279.0, 297.0, 294.0, 279.0, 281.0, 298.0, 289.0, 281.0, 289.0, 290.0, 301.0, 278.0, 276.0, 297.0, 295.0, 278.0, 279.0, 297.0, 280.0, 293.0, 281.0, 295.0, 293.0, 283.0, 287.0, 289.0, 289.0, 284.0, 289.0, 278.0, 296.0, 277.0, 292.0, 281.0, 268.0, 262.0, 294.0, 284.0, 266.0, 259.0, 290.0, 286.0, 265.0, 257.0, 262.0, 260.0, 250.0, 266.0, 295.0, 284.0, 255.0, 272.0, 284.0, 289.0, 254.0, 262.0, 255.0, 270.0, 259.0, 266.0, 289.0, 287.0, 266.0, 259.0, 257.0, 268.0, 295.0, 278.0, 294.0, 273.0, 263.0, 256.0, 287.0, 286.0, 299.0, 274.0, 259.0, 266.0, 266.0, 264.0, 286.0, 293.0, 261.0, 261.0, 293.0, 288.0, 261.0, 264.0, 256.0, 269.0, 292.0, 290.0, 257.0, 273.0, 282.0, 297.0, 283.0, 290.0, 265.0, 260.0, 292.0, 284.0, 289.0, 287.0, 301.0, 278.0, 281.0, 292.0, 283.0, 296.0, 294.0, 279.0, 283.0, 293.0, 292.0, 298.0, 271.0, 256.0, 286.0, 293.0, 260.0, 256.0, 260.0, 262.0, 297.0, 276.0, 283.0, 293.0, 289.0, 287.0, 301.0, 278.0, 291.0, 285.0, 299.0, 282.0, 290.0, 289.0, 304.0, 323.0, 276.0, 297.0, 264.0, 258.0, 266.0, 264.0, 280.0, 296.0, 297.0, 279.0, 281.0, 295.0, 286.0, 284.0, 289.0, 287.0, 282.0, 294.0, 275.0, 255.0, 297.0, 282.0, 283.0, 287.0, 286.0, 284.0, 253.0, 263.0, 269.0, 256.0, 248.0, 277.0, 263.0, 262.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7081123236132713, "mean_inference_ms": 1.2718984380888532, "mean_action_processing_ms": 0.1351636109557676, "mean_env_wait_ms": 0.8532669400902704, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 516.0, "episode_reward_mean": 558.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 279.44}, "hist_stats": {"episode_reward": [573.0, 573.0, 573.0, 516.0, 525.0, 576.0, 516.0, 522.0, 630.0, 582.0, 516.0, 570.0, 582.0, 576.0, 573.0, 579.0, 570.0, 579.0, 579.0, 573.0, 573.0, 576.0, 573.0, 576.0, 576.0, 576.0, 573.0, 567.0, 573.0, 573.0, 530.0, 578.0, 525.0, 576.0, 522.0, 522.0, 516.0, 579.0, 527.0, 573.0, 516.0, 525.0, 525.0, 576.0, 525.0, 525.0, 573.0, 567.0, 519.0, 573.0, 573.0, 525.0, 530.0, 579.0, 522.0, 581.0, 525.0, 525.0, 582.0, 530.0, 579.0, 573.0, 525.0, 576.0, 576.0, 579.0, 573.0, 579.0, 573.0, 576.0, 590.0, 527.0, 579.0, 516.0, 522.0, 573.0, 576.0, 576.0, 579.0, 576.0, 581.0, 579.0, 627.0, 573.0, 522.0, 530.0, 576.0, 576.0, 576.0, 570.0, 576.0, 576.0, 530.0, 579.0, 570.0, 570.0, 516.0, 525.0, 525.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 282.0, 294.0, 279.0, 290.0, 283.0, 258.0, 258.0, 260.0, 265.0, 289.0, 287.0, 255.0, 261.0, 271.0, 251.0, 318.0, 312.0, 297.0, 285.0, 256.0, 260.0, 273.0, 297.0, 287.0, 295.0, 279.0, 297.0, 294.0, 279.0, 281.0, 298.0, 289.0, 281.0, 289.0, 290.0, 301.0, 278.0, 276.0, 297.0, 295.0, 278.0, 279.0, 297.0, 280.0, 293.0, 281.0, 295.0, 293.0, 283.0, 287.0, 289.0, 289.0, 284.0, 289.0, 278.0, 296.0, 277.0, 292.0, 281.0, 268.0, 262.0, 294.0, 284.0, 266.0, 259.0, 290.0, 286.0, 265.0, 257.0, 262.0, 260.0, 250.0, 266.0, 295.0, 284.0, 255.0, 272.0, 284.0, 289.0, 254.0, 262.0, 255.0, 270.0, 259.0, 266.0, 289.0, 287.0, 266.0, 259.0, 257.0, 268.0, 295.0, 278.0, 294.0, 273.0, 263.0, 256.0, 287.0, 286.0, 299.0, 274.0, 259.0, 266.0, 266.0, 264.0, 286.0, 293.0, 261.0, 261.0, 293.0, 288.0, 261.0, 264.0, 256.0, 269.0, 292.0, 290.0, 257.0, 273.0, 282.0, 297.0, 283.0, 290.0, 265.0, 260.0, 292.0, 284.0, 289.0, 287.0, 301.0, 278.0, 281.0, 292.0, 283.0, 296.0, 294.0, 279.0, 283.0, 293.0, 292.0, 298.0, 271.0, 256.0, 286.0, 293.0, 260.0, 256.0, 260.0, 262.0, 297.0, 276.0, 283.0, 293.0, 289.0, 287.0, 301.0, 278.0, 291.0, 285.0, 299.0, 282.0, 290.0, 289.0, 304.0, 323.0, 276.0, 297.0, 264.0, 258.0, 266.0, 264.0, 280.0, 296.0, 297.0, 279.0, 281.0, 295.0, 286.0, 284.0, 289.0, 287.0, 282.0, 294.0, 275.0, 255.0, 297.0, 282.0, 283.0, 287.0, 286.0, 284.0, 253.0, 263.0, 269.0, 256.0, 248.0, 277.0, 263.0, 262.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7081123236132713, "mean_inference_ms": 1.2718984380888532, "mean_action_processing_ms": 0.1351636109557676, "mean_env_wait_ms": 0.8532669400902704, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6195200, "num_agent_steps_trained": 6195200, "num_env_steps_sampled": 3097600, "num_env_steps_trained": 3097600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3097600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6195200, "timers": {"training_iteration_time_ms": 3658.23, "learn_time_ms": 1121.317, "learn_throughput": 11415.149, "synch_weights_time_ms": 11.586}, "counters": {"num_env_steps_sampled": 3097600, "num_env_steps_trained": 3097600, "num_agent_steps_sampled": 6195200, "num_agent_steps_trained": 6195200}, "done": false, "episodes_total": 7744, "training_iteration": 242, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-13", "timestamp": 1666581373, "time_this_iter_s": 3.5342307090759277, "time_total_s": 939.0499882698059, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 939.0499882698059, "timesteps_since_restore": 0, "iterations_since_restore": 242, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.94, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 171.68, "shaped_reward_min": 156, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.48, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.23, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.31, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.07, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.14, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.83, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.8, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.83, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.44, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.35, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.28, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.14, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.83, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.14, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.83, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011442599352449179, "policy_loss": -0.0014641538728028536, "vf_loss": 7.753890037536621, "vf_explained_var": 0.5846375823020935, "kl": 0.002333172596991062, "entropy": 0.9109914302825928, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3110400, "num_env_steps_trained": 3110400, "num_agent_steps_sampled": 6220800, "num_agent_steps_trained": 6220800}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 516.0, "episode_reward_mean": 558.08, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 279.04}, "custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 171.68, "shaped_reward_min": 156, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.48, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.23, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.31, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.07, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.14, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.83, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.8, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.83, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.44, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.35, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.28, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.14, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.83, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.14, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.83, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 576.0, 522.0, 522.0, 516.0, 579.0, 527.0, 573.0, 516.0, 525.0, 525.0, 576.0, 525.0, 525.0, 573.0, 567.0, 519.0, 573.0, 573.0, 525.0, 530.0, 579.0, 522.0, 581.0, 525.0, 525.0, 582.0, 530.0, 579.0, 573.0, 525.0, 576.0, 576.0, 579.0, 573.0, 579.0, 573.0, 576.0, 590.0, 527.0, 579.0, 516.0, 522.0, 573.0, 576.0, 576.0, 579.0, 576.0, 581.0, 579.0, 627.0, 573.0, 522.0, 530.0, 576.0, 576.0, 576.0, 570.0, 576.0, 576.0, 530.0, 579.0, 570.0, 570.0, 516.0, 525.0, 525.0, 525.0, 570.0, 525.0, 630.0, 525.0, 576.0, 570.0, 573.0, 522.0, 582.0, 533.0, 570.0, 573.0, 576.0, 576.0, 576.0, 525.0, 576.0, 522.0, 579.0, 576.0, 573.0, 525.0, 576.0, 573.0, 573.0, 576.0, 579.0, 573.0, 525.0, 573.0, 573.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [266.0, 259.0, 290.0, 286.0, 265.0, 257.0, 262.0, 260.0, 250.0, 266.0, 295.0, 284.0, 255.0, 272.0, 284.0, 289.0, 254.0, 262.0, 255.0, 270.0, 259.0, 266.0, 289.0, 287.0, 266.0, 259.0, 257.0, 268.0, 295.0, 278.0, 294.0, 273.0, 263.0, 256.0, 287.0, 286.0, 299.0, 274.0, 259.0, 266.0, 266.0, 264.0, 286.0, 293.0, 261.0, 261.0, 293.0, 288.0, 261.0, 264.0, 256.0, 269.0, 292.0, 290.0, 257.0, 273.0, 282.0, 297.0, 283.0, 290.0, 265.0, 260.0, 292.0, 284.0, 289.0, 287.0, 301.0, 278.0, 281.0, 292.0, 283.0, 296.0, 294.0, 279.0, 283.0, 293.0, 292.0, 298.0, 271.0, 256.0, 286.0, 293.0, 260.0, 256.0, 260.0, 262.0, 297.0, 276.0, 283.0, 293.0, 289.0, 287.0, 301.0, 278.0, 291.0, 285.0, 299.0, 282.0, 290.0, 289.0, 304.0, 323.0, 276.0, 297.0, 264.0, 258.0, 266.0, 264.0, 280.0, 296.0, 297.0, 279.0, 281.0, 295.0, 286.0, 284.0, 289.0, 287.0, 282.0, 294.0, 275.0, 255.0, 297.0, 282.0, 283.0, 287.0, 286.0, 284.0, 253.0, 263.0, 269.0, 256.0, 248.0, 277.0, 263.0, 262.0, 287.0, 283.0, 259.0, 266.0, 323.0, 307.0, 271.0, 254.0, 277.0, 299.0, 282.0, 288.0, 289.0, 284.0, 257.0, 265.0, 293.0, 289.0, 262.0, 271.0, 286.0, 284.0, 288.0, 285.0, 293.0, 283.0, 285.0, 291.0, 289.0, 287.0, 271.0, 254.0, 285.0, 291.0, 259.0, 263.0, 291.0, 288.0, 287.0, 289.0, 291.0, 282.0, 264.0, 261.0, 282.0, 294.0, 282.0, 291.0, 293.0, 280.0, 295.0, 281.0, 286.0, 293.0, 281.0, 292.0, 269.0, 256.0, 288.0, 285.0, 288.0, 285.0, 296.0, 277.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7079764704995039, "mean_inference_ms": 1.2715597737459958, "mean_action_processing_ms": 0.13514716224342163, "mean_env_wait_ms": 0.8530627219429394, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 516.0, "episode_reward_mean": 558.08, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 279.04}, "hist_stats": {"episode_reward": [525.0, 576.0, 522.0, 522.0, 516.0, 579.0, 527.0, 573.0, 516.0, 525.0, 525.0, 576.0, 525.0, 525.0, 573.0, 567.0, 519.0, 573.0, 573.0, 525.0, 530.0, 579.0, 522.0, 581.0, 525.0, 525.0, 582.0, 530.0, 579.0, 573.0, 525.0, 576.0, 576.0, 579.0, 573.0, 579.0, 573.0, 576.0, 590.0, 527.0, 579.0, 516.0, 522.0, 573.0, 576.0, 576.0, 579.0, 576.0, 581.0, 579.0, 627.0, 573.0, 522.0, 530.0, 576.0, 576.0, 576.0, 570.0, 576.0, 576.0, 530.0, 579.0, 570.0, 570.0, 516.0, 525.0, 525.0, 525.0, 570.0, 525.0, 630.0, 525.0, 576.0, 570.0, 573.0, 522.0, 582.0, 533.0, 570.0, 573.0, 576.0, 576.0, 576.0, 525.0, 576.0, 522.0, 579.0, 576.0, 573.0, 525.0, 576.0, 573.0, 573.0, 576.0, 579.0, 573.0, 525.0, 573.0, 573.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [266.0, 259.0, 290.0, 286.0, 265.0, 257.0, 262.0, 260.0, 250.0, 266.0, 295.0, 284.0, 255.0, 272.0, 284.0, 289.0, 254.0, 262.0, 255.0, 270.0, 259.0, 266.0, 289.0, 287.0, 266.0, 259.0, 257.0, 268.0, 295.0, 278.0, 294.0, 273.0, 263.0, 256.0, 287.0, 286.0, 299.0, 274.0, 259.0, 266.0, 266.0, 264.0, 286.0, 293.0, 261.0, 261.0, 293.0, 288.0, 261.0, 264.0, 256.0, 269.0, 292.0, 290.0, 257.0, 273.0, 282.0, 297.0, 283.0, 290.0, 265.0, 260.0, 292.0, 284.0, 289.0, 287.0, 301.0, 278.0, 281.0, 292.0, 283.0, 296.0, 294.0, 279.0, 283.0, 293.0, 292.0, 298.0, 271.0, 256.0, 286.0, 293.0, 260.0, 256.0, 260.0, 262.0, 297.0, 276.0, 283.0, 293.0, 289.0, 287.0, 301.0, 278.0, 291.0, 285.0, 299.0, 282.0, 290.0, 289.0, 304.0, 323.0, 276.0, 297.0, 264.0, 258.0, 266.0, 264.0, 280.0, 296.0, 297.0, 279.0, 281.0, 295.0, 286.0, 284.0, 289.0, 287.0, 282.0, 294.0, 275.0, 255.0, 297.0, 282.0, 283.0, 287.0, 286.0, 284.0, 253.0, 263.0, 269.0, 256.0, 248.0, 277.0, 263.0, 262.0, 287.0, 283.0, 259.0, 266.0, 323.0, 307.0, 271.0, 254.0, 277.0, 299.0, 282.0, 288.0, 289.0, 284.0, 257.0, 265.0, 293.0, 289.0, 262.0, 271.0, 286.0, 284.0, 288.0, 285.0, 293.0, 283.0, 285.0, 291.0, 289.0, 287.0, 271.0, 254.0, 285.0, 291.0, 259.0, 263.0, 291.0, 288.0, 287.0, 289.0, 291.0, 282.0, 264.0, 261.0, 282.0, 294.0, 282.0, 291.0, 293.0, 280.0, 295.0, 281.0, 286.0, 293.0, 281.0, 292.0, 269.0, 256.0, 288.0, 285.0, 288.0, 285.0, 296.0, 277.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7079764704995039, "mean_inference_ms": 1.2715597737459958, "mean_action_processing_ms": 0.13514716224342163, "mean_env_wait_ms": 0.8530627219429394, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6220800, "num_agent_steps_trained": 6220800, "num_env_steps_sampled": 3110400, "num_env_steps_trained": 3110400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3110400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6220800, "timers": {"training_iteration_time_ms": 3653.687, "learn_time_ms": 1113.56, "learn_throughput": 11494.667, "synch_weights_time_ms": 12.808}, "counters": {"num_env_steps_sampled": 3110400, "num_env_steps_trained": 3110400, "num_agent_steps_sampled": 6220800, "num_agent_steps_trained": 6220800}, "done": false, "episodes_total": 7776, "training_iteration": 243, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-17", "timestamp": 1666581377, "time_this_iter_s": 3.7209229469299316, "time_total_s": 942.7709112167358, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 942.7709112167358, "timesteps_since_restore": 0, "iterations_since_restore": 243, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.566666666666666, "ram_util_percent": 10.633333333333333}}
+{"custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 172.82, "shaped_reward_min": 139, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.3, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.47, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.14, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.01, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.09, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.06, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.7, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.36, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.24, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.2, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.01, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.09, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.01, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.09, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00035139062674716115, "policy_loss": -0.0006855684332549572, "vf_loss": 7.859228610992432, "vf_explained_var": 0.5928146243095398, "kl": 0.002471503335982561, "entropy": 0.903489351272583, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3123200, "num_env_steps_trained": 3123200, "num_agent_steps_sampled": 6246400, "num_agent_steps_trained": 6246400}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 459.0, "episode_reward_mean": 564.02, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 282.01}, "custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 172.82, "shaped_reward_min": 139, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.3, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.47, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.14, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.01, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.09, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.06, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.7, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.36, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.24, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.2, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.01, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.09, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.01, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.09, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 579.0, 573.0, 579.0, 573.0, 576.0, 590.0, 527.0, 579.0, 516.0, 522.0, 573.0, 576.0, 576.0, 579.0, 576.0, 581.0, 579.0, 627.0, 573.0, 522.0, 530.0, 576.0, 576.0, 576.0, 570.0, 576.0, 576.0, 530.0, 579.0, 570.0, 570.0, 516.0, 525.0, 525.0, 525.0, 570.0, 525.0, 630.0, 525.0, 576.0, 570.0, 573.0, 522.0, 582.0, 533.0, 570.0, 573.0, 576.0, 576.0, 576.0, 525.0, 576.0, 522.0, 579.0, 576.0, 573.0, 525.0, 576.0, 573.0, 573.0, 576.0, 579.0, 573.0, 525.0, 573.0, 573.0, 573.0, 582.0, 570.0, 579.0, 582.0, 576.0, 576.0, 573.0, 576.0, 579.0, 567.0, 576.0, 522.0, 573.0, 522.0, 570.0, 579.0, 567.0, 579.0, 567.0, 573.0, 587.0, 576.0, 525.0, 573.0, 573.0, 579.0, 525.0, 459.0, 570.0, 576.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 287.0, 301.0, 278.0, 281.0, 292.0, 283.0, 296.0, 294.0, 279.0, 283.0, 293.0, 292.0, 298.0, 271.0, 256.0, 286.0, 293.0, 260.0, 256.0, 260.0, 262.0, 297.0, 276.0, 283.0, 293.0, 289.0, 287.0, 301.0, 278.0, 291.0, 285.0, 299.0, 282.0, 290.0, 289.0, 304.0, 323.0, 276.0, 297.0, 264.0, 258.0, 266.0, 264.0, 280.0, 296.0, 297.0, 279.0, 281.0, 295.0, 286.0, 284.0, 289.0, 287.0, 282.0, 294.0, 275.0, 255.0, 297.0, 282.0, 283.0, 287.0, 286.0, 284.0, 253.0, 263.0, 269.0, 256.0, 248.0, 277.0, 263.0, 262.0, 287.0, 283.0, 259.0, 266.0, 323.0, 307.0, 271.0, 254.0, 277.0, 299.0, 282.0, 288.0, 289.0, 284.0, 257.0, 265.0, 293.0, 289.0, 262.0, 271.0, 286.0, 284.0, 288.0, 285.0, 293.0, 283.0, 285.0, 291.0, 289.0, 287.0, 271.0, 254.0, 285.0, 291.0, 259.0, 263.0, 291.0, 288.0, 287.0, 289.0, 291.0, 282.0, 264.0, 261.0, 282.0, 294.0, 282.0, 291.0, 293.0, 280.0, 295.0, 281.0, 286.0, 293.0, 281.0, 292.0, 269.0, 256.0, 288.0, 285.0, 288.0, 285.0, 296.0, 277.0, 294.0, 288.0, 287.0, 283.0, 292.0, 287.0, 285.0, 297.0, 292.0, 284.0, 281.0, 295.0, 284.0, 289.0, 297.0, 279.0, 293.0, 286.0, 294.0, 273.0, 291.0, 285.0, 271.0, 251.0, 272.0, 301.0, 257.0, 265.0, 286.0, 284.0, 296.0, 283.0, 294.0, 273.0, 295.0, 284.0, 284.0, 283.0, 282.0, 291.0, 295.0, 292.0, 293.0, 283.0, 261.0, 264.0, 300.0, 273.0, 278.0, 295.0, 287.0, 292.0, 260.0, 265.0, 240.0, 219.0, 292.0, 278.0, 295.0, 281.0, 278.0, 298.0, 293.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7078702523741072, "mean_inference_ms": 1.2712685166157995, "mean_action_processing_ms": 0.13513782419630282, "mean_env_wait_ms": 0.8529134216315836, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 459.0, "episode_reward_mean": 564.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 282.01}, "hist_stats": {"episode_reward": [576.0, 579.0, 573.0, 579.0, 573.0, 576.0, 590.0, 527.0, 579.0, 516.0, 522.0, 573.0, 576.0, 576.0, 579.0, 576.0, 581.0, 579.0, 627.0, 573.0, 522.0, 530.0, 576.0, 576.0, 576.0, 570.0, 576.0, 576.0, 530.0, 579.0, 570.0, 570.0, 516.0, 525.0, 525.0, 525.0, 570.0, 525.0, 630.0, 525.0, 576.0, 570.0, 573.0, 522.0, 582.0, 533.0, 570.0, 573.0, 576.0, 576.0, 576.0, 525.0, 576.0, 522.0, 579.0, 576.0, 573.0, 525.0, 576.0, 573.0, 573.0, 576.0, 579.0, 573.0, 525.0, 573.0, 573.0, 573.0, 582.0, 570.0, 579.0, 582.0, 576.0, 576.0, 573.0, 576.0, 579.0, 567.0, 576.0, 522.0, 573.0, 522.0, 570.0, 579.0, 567.0, 579.0, 567.0, 573.0, 587.0, 576.0, 525.0, 573.0, 573.0, 579.0, 525.0, 459.0, 570.0, 576.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 287.0, 301.0, 278.0, 281.0, 292.0, 283.0, 296.0, 294.0, 279.0, 283.0, 293.0, 292.0, 298.0, 271.0, 256.0, 286.0, 293.0, 260.0, 256.0, 260.0, 262.0, 297.0, 276.0, 283.0, 293.0, 289.0, 287.0, 301.0, 278.0, 291.0, 285.0, 299.0, 282.0, 290.0, 289.0, 304.0, 323.0, 276.0, 297.0, 264.0, 258.0, 266.0, 264.0, 280.0, 296.0, 297.0, 279.0, 281.0, 295.0, 286.0, 284.0, 289.0, 287.0, 282.0, 294.0, 275.0, 255.0, 297.0, 282.0, 283.0, 287.0, 286.0, 284.0, 253.0, 263.0, 269.0, 256.0, 248.0, 277.0, 263.0, 262.0, 287.0, 283.0, 259.0, 266.0, 323.0, 307.0, 271.0, 254.0, 277.0, 299.0, 282.0, 288.0, 289.0, 284.0, 257.0, 265.0, 293.0, 289.0, 262.0, 271.0, 286.0, 284.0, 288.0, 285.0, 293.0, 283.0, 285.0, 291.0, 289.0, 287.0, 271.0, 254.0, 285.0, 291.0, 259.0, 263.0, 291.0, 288.0, 287.0, 289.0, 291.0, 282.0, 264.0, 261.0, 282.0, 294.0, 282.0, 291.0, 293.0, 280.0, 295.0, 281.0, 286.0, 293.0, 281.0, 292.0, 269.0, 256.0, 288.0, 285.0, 288.0, 285.0, 296.0, 277.0, 294.0, 288.0, 287.0, 283.0, 292.0, 287.0, 285.0, 297.0, 292.0, 284.0, 281.0, 295.0, 284.0, 289.0, 297.0, 279.0, 293.0, 286.0, 294.0, 273.0, 291.0, 285.0, 271.0, 251.0, 272.0, 301.0, 257.0, 265.0, 286.0, 284.0, 296.0, 283.0, 294.0, 273.0, 295.0, 284.0, 284.0, 283.0, 282.0, 291.0, 295.0, 292.0, 293.0, 283.0, 261.0, 264.0, 300.0, 273.0, 278.0, 295.0, 287.0, 292.0, 260.0, 265.0, 240.0, 219.0, 292.0, 278.0, 295.0, 281.0, 278.0, 298.0, 293.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7078702523741072, "mean_inference_ms": 1.2712685166157995, "mean_action_processing_ms": 0.13513782419630282, "mean_env_wait_ms": 0.8529134216315836, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6246400, "num_agent_steps_trained": 6246400, "num_env_steps_sampled": 3123200, "num_env_steps_trained": 3123200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3123200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6246400, "timers": {"training_iteration_time_ms": 3653.097, "learn_time_ms": 1104.408, "learn_throughput": 11589.92, "synch_weights_time_ms": 14.292}, "counters": {"num_env_steps_sampled": 3123200, "num_env_steps_trained": 3123200, "num_agent_steps_sampled": 6246400, "num_agent_steps_trained": 6246400}, "done": false, "episodes_total": 7808, "training_iteration": 244, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-21", "timestamp": 1666581381, "time_this_iter_s": 3.7214155197143555, "time_total_s": 946.4923267364502, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 946.4923267364502, "timesteps_since_restore": 0, "iterations_since_restore": 244, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.88, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 172.64, "shaped_reward_min": 139, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.64, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.47, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.93, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.28, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.79, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.9, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.87, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.54, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.4, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.33, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.28, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.79, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.28, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.79, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0025330903008580208, "policy_loss": -0.0028688705060631037, "vf_loss": 7.884381294250488, "vf_explained_var": 0.582945704460144, "kl": 0.0022667073644697666, "entropy": 0.9053138494491577, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3136000, "num_env_steps_trained": 3136000, "num_agent_steps_sampled": 6272000, "num_agent_steps_trained": 6272000}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 459.0, "episode_reward_mean": 561.44, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 280.72}, "custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 172.64, "shaped_reward_min": 139, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.64, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.47, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.93, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.28, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.79, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.9, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.87, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.54, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.4, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.33, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.28, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.79, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.28, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.79, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [516.0, 525.0, 525.0, 525.0, 570.0, 525.0, 630.0, 525.0, 576.0, 570.0, 573.0, 522.0, 582.0, 533.0, 570.0, 573.0, 576.0, 576.0, 576.0, 525.0, 576.0, 522.0, 579.0, 576.0, 573.0, 525.0, 576.0, 573.0, 573.0, 576.0, 579.0, 573.0, 525.0, 573.0, 573.0, 573.0, 582.0, 570.0, 579.0, 582.0, 576.0, 576.0, 573.0, 576.0, 579.0, 567.0, 576.0, 522.0, 573.0, 522.0, 570.0, 579.0, 567.0, 579.0, 567.0, 573.0, 587.0, 576.0, 525.0, 573.0, 573.0, 579.0, 525.0, 459.0, 570.0, 576.0, 576.0, 576.0, 533.0, 573.0, 522.0, 576.0, 576.0, 570.0, 576.0, 579.0, 487.0, 525.0, 525.0, 576.0, 530.0, 519.0, 530.0, 579.0, 579.0, 522.0, 573.0, 530.0, 627.0, 576.0, 582.0, 576.0, 570.0, 573.0, 582.0, 627.0, 525.0, 582.0, 544.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [253.0, 263.0, 269.0, 256.0, 248.0, 277.0, 263.0, 262.0, 287.0, 283.0, 259.0, 266.0, 323.0, 307.0, 271.0, 254.0, 277.0, 299.0, 282.0, 288.0, 289.0, 284.0, 257.0, 265.0, 293.0, 289.0, 262.0, 271.0, 286.0, 284.0, 288.0, 285.0, 293.0, 283.0, 285.0, 291.0, 289.0, 287.0, 271.0, 254.0, 285.0, 291.0, 259.0, 263.0, 291.0, 288.0, 287.0, 289.0, 291.0, 282.0, 264.0, 261.0, 282.0, 294.0, 282.0, 291.0, 293.0, 280.0, 295.0, 281.0, 286.0, 293.0, 281.0, 292.0, 269.0, 256.0, 288.0, 285.0, 288.0, 285.0, 296.0, 277.0, 294.0, 288.0, 287.0, 283.0, 292.0, 287.0, 285.0, 297.0, 292.0, 284.0, 281.0, 295.0, 284.0, 289.0, 297.0, 279.0, 293.0, 286.0, 294.0, 273.0, 291.0, 285.0, 271.0, 251.0, 272.0, 301.0, 257.0, 265.0, 286.0, 284.0, 296.0, 283.0, 294.0, 273.0, 295.0, 284.0, 284.0, 283.0, 282.0, 291.0, 295.0, 292.0, 293.0, 283.0, 261.0, 264.0, 300.0, 273.0, 278.0, 295.0, 287.0, 292.0, 260.0, 265.0, 240.0, 219.0, 292.0, 278.0, 295.0, 281.0, 278.0, 298.0, 293.0, 283.0, 263.0, 270.0, 280.0, 293.0, 258.0, 264.0, 290.0, 286.0, 281.0, 295.0, 284.0, 286.0, 284.0, 292.0, 285.0, 294.0, 242.0, 245.0, 266.0, 259.0, 265.0, 260.0, 287.0, 289.0, 272.0, 258.0, 260.0, 259.0, 269.0, 261.0, 295.0, 284.0, 296.0, 283.0, 266.0, 256.0, 287.0, 286.0, 265.0, 265.0, 314.0, 313.0, 291.0, 285.0, 293.0, 289.0, 281.0, 295.0, 277.0, 293.0, 286.0, 287.0, 291.0, 291.0, 300.0, 327.0, 264.0, 261.0, 289.0, 293.0, 271.0, 273.0, 292.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7077555771781661, "mean_inference_ms": 1.2709830719233666, "mean_action_processing_ms": 0.13513002534520807, "mean_env_wait_ms": 0.8527685578396544, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 459.0, "episode_reward_mean": 561.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 280.72}, "hist_stats": {"episode_reward": [516.0, 525.0, 525.0, 525.0, 570.0, 525.0, 630.0, 525.0, 576.0, 570.0, 573.0, 522.0, 582.0, 533.0, 570.0, 573.0, 576.0, 576.0, 576.0, 525.0, 576.0, 522.0, 579.0, 576.0, 573.0, 525.0, 576.0, 573.0, 573.0, 576.0, 579.0, 573.0, 525.0, 573.0, 573.0, 573.0, 582.0, 570.0, 579.0, 582.0, 576.0, 576.0, 573.0, 576.0, 579.0, 567.0, 576.0, 522.0, 573.0, 522.0, 570.0, 579.0, 567.0, 579.0, 567.0, 573.0, 587.0, 576.0, 525.0, 573.0, 573.0, 579.0, 525.0, 459.0, 570.0, 576.0, 576.0, 576.0, 533.0, 573.0, 522.0, 576.0, 576.0, 570.0, 576.0, 579.0, 487.0, 525.0, 525.0, 576.0, 530.0, 519.0, 530.0, 579.0, 579.0, 522.0, 573.0, 530.0, 627.0, 576.0, 582.0, 576.0, 570.0, 573.0, 582.0, 627.0, 525.0, 582.0, 544.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [253.0, 263.0, 269.0, 256.0, 248.0, 277.0, 263.0, 262.0, 287.0, 283.0, 259.0, 266.0, 323.0, 307.0, 271.0, 254.0, 277.0, 299.0, 282.0, 288.0, 289.0, 284.0, 257.0, 265.0, 293.0, 289.0, 262.0, 271.0, 286.0, 284.0, 288.0, 285.0, 293.0, 283.0, 285.0, 291.0, 289.0, 287.0, 271.0, 254.0, 285.0, 291.0, 259.0, 263.0, 291.0, 288.0, 287.0, 289.0, 291.0, 282.0, 264.0, 261.0, 282.0, 294.0, 282.0, 291.0, 293.0, 280.0, 295.0, 281.0, 286.0, 293.0, 281.0, 292.0, 269.0, 256.0, 288.0, 285.0, 288.0, 285.0, 296.0, 277.0, 294.0, 288.0, 287.0, 283.0, 292.0, 287.0, 285.0, 297.0, 292.0, 284.0, 281.0, 295.0, 284.0, 289.0, 297.0, 279.0, 293.0, 286.0, 294.0, 273.0, 291.0, 285.0, 271.0, 251.0, 272.0, 301.0, 257.0, 265.0, 286.0, 284.0, 296.0, 283.0, 294.0, 273.0, 295.0, 284.0, 284.0, 283.0, 282.0, 291.0, 295.0, 292.0, 293.0, 283.0, 261.0, 264.0, 300.0, 273.0, 278.0, 295.0, 287.0, 292.0, 260.0, 265.0, 240.0, 219.0, 292.0, 278.0, 295.0, 281.0, 278.0, 298.0, 293.0, 283.0, 263.0, 270.0, 280.0, 293.0, 258.0, 264.0, 290.0, 286.0, 281.0, 295.0, 284.0, 286.0, 284.0, 292.0, 285.0, 294.0, 242.0, 245.0, 266.0, 259.0, 265.0, 260.0, 287.0, 289.0, 272.0, 258.0, 260.0, 259.0, 269.0, 261.0, 295.0, 284.0, 296.0, 283.0, 266.0, 256.0, 287.0, 286.0, 265.0, 265.0, 314.0, 313.0, 291.0, 285.0, 293.0, 289.0, 281.0, 295.0, 277.0, 293.0, 286.0, 287.0, 291.0, 291.0, 300.0, 327.0, 264.0, 261.0, 289.0, 293.0, 271.0, 273.0, 292.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7077555771781661, "mean_inference_ms": 1.2709830719233666, "mean_action_processing_ms": 0.13513002534520807, "mean_env_wait_ms": 0.8527685578396544, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6272000, "num_agent_steps_trained": 6272000, "num_env_steps_sampled": 3136000, "num_env_steps_trained": 3136000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3136000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6272000, "timers": {"training_iteration_time_ms": 3645.573, "learn_time_ms": 1095.891, "learn_throughput": 11679.998, "synch_weights_time_ms": 14.283}, "counters": {"num_env_steps_sampled": 3136000, "num_env_steps_trained": 3136000, "num_agent_steps_sampled": 6272000, "num_agent_steps_trained": 6272000}, "done": false, "episodes_total": 7840, "training_iteration": 245, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-24", "timestamp": 1666581384, "time_this_iter_s": 3.633239984512329, "time_total_s": 950.1255667209625, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 950.1255667209625, "timesteps_since_restore": 0, "iterations_since_restore": 245, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.849999999999998, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 193.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 171.95, "shaped_reward_min": 139, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.22, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 16.53, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.03, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.3, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.15, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.04, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.26, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.25, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.15, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.15, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00012638827320188284, "policy_loss": -0.00020441529341042042, "vf_loss": 7.864180564880371, "vf_explained_var": 0.5898736715316772, "kl": 0.0021721776574850082, "entropy": 0.9112254977226257, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3148800, "num_env_steps_trained": 3148800, "num_agent_steps_sampled": 6297600, "num_agent_steps_trained": 6297600}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 459.0, "episode_reward_mean": 559.15, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 279.575}, "custom_metrics": {"sparse_reward_mean": 193.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 171.95, "shaped_reward_min": 139, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.22, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 16.53, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.03, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.3, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.15, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.04, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.26, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.25, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.15, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.15, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 573.0, 573.0, 573.0, 582.0, 570.0, 579.0, 582.0, 576.0, 576.0, 573.0, 576.0, 579.0, 567.0, 576.0, 522.0, 573.0, 522.0, 570.0, 579.0, 567.0, 579.0, 567.0, 573.0, 587.0, 576.0, 525.0, 573.0, 573.0, 579.0, 525.0, 459.0, 570.0, 576.0, 576.0, 576.0, 533.0, 573.0, 522.0, 576.0, 576.0, 570.0, 576.0, 579.0, 487.0, 525.0, 525.0, 576.0, 530.0, 519.0, 530.0, 579.0, 579.0, 522.0, 573.0, 530.0, 627.0, 576.0, 582.0, 576.0, 570.0, 573.0, 582.0, 627.0, 525.0, 582.0, 544.0, 579.0, 579.0, 525.0, 570.0, 519.0, 522.0, 519.0, 573.0, 519.0, 576.0, 522.0, 570.0, 570.0, 582.0, 582.0, 522.0, 573.0, 530.0, 530.0, 525.0, 519.0, 570.0, 522.0, 522.0, 522.0, 573.0, 570.0, 579.0, 579.0, 570.0, 582.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 256.0, 288.0, 285.0, 288.0, 285.0, 296.0, 277.0, 294.0, 288.0, 287.0, 283.0, 292.0, 287.0, 285.0, 297.0, 292.0, 284.0, 281.0, 295.0, 284.0, 289.0, 297.0, 279.0, 293.0, 286.0, 294.0, 273.0, 291.0, 285.0, 271.0, 251.0, 272.0, 301.0, 257.0, 265.0, 286.0, 284.0, 296.0, 283.0, 294.0, 273.0, 295.0, 284.0, 284.0, 283.0, 282.0, 291.0, 295.0, 292.0, 293.0, 283.0, 261.0, 264.0, 300.0, 273.0, 278.0, 295.0, 287.0, 292.0, 260.0, 265.0, 240.0, 219.0, 292.0, 278.0, 295.0, 281.0, 278.0, 298.0, 293.0, 283.0, 263.0, 270.0, 280.0, 293.0, 258.0, 264.0, 290.0, 286.0, 281.0, 295.0, 284.0, 286.0, 284.0, 292.0, 285.0, 294.0, 242.0, 245.0, 266.0, 259.0, 265.0, 260.0, 287.0, 289.0, 272.0, 258.0, 260.0, 259.0, 269.0, 261.0, 295.0, 284.0, 296.0, 283.0, 266.0, 256.0, 287.0, 286.0, 265.0, 265.0, 314.0, 313.0, 291.0, 285.0, 293.0, 289.0, 281.0, 295.0, 277.0, 293.0, 286.0, 287.0, 291.0, 291.0, 300.0, 327.0, 264.0, 261.0, 289.0, 293.0, 271.0, 273.0, 292.0, 287.0, 293.0, 286.0, 261.0, 264.0, 277.0, 293.0, 254.0, 265.0, 256.0, 266.0, 264.0, 255.0, 293.0, 280.0, 257.0, 262.0, 285.0, 291.0, 272.0, 250.0, 282.0, 288.0, 289.0, 281.0, 300.0, 282.0, 287.0, 295.0, 264.0, 258.0, 297.0, 276.0, 272.0, 258.0, 263.0, 267.0, 259.0, 266.0, 263.0, 256.0, 289.0, 281.0, 253.0, 269.0, 261.0, 261.0, 265.0, 257.0, 284.0, 289.0, 288.0, 282.0, 291.0, 288.0, 280.0, 299.0, 282.0, 288.0, 300.0, 282.0, 286.0, 290.0, 297.0, 276.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7076406330396047, "mean_inference_ms": 1.2706812436953308, "mean_action_processing_ms": 0.13512051871923203, "mean_env_wait_ms": 0.8526030070406487, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 459.0, "episode_reward_mean": 559.15, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 279.575}, "hist_stats": {"episode_reward": [525.0, 573.0, 573.0, 573.0, 582.0, 570.0, 579.0, 582.0, 576.0, 576.0, 573.0, 576.0, 579.0, 567.0, 576.0, 522.0, 573.0, 522.0, 570.0, 579.0, 567.0, 579.0, 567.0, 573.0, 587.0, 576.0, 525.0, 573.0, 573.0, 579.0, 525.0, 459.0, 570.0, 576.0, 576.0, 576.0, 533.0, 573.0, 522.0, 576.0, 576.0, 570.0, 576.0, 579.0, 487.0, 525.0, 525.0, 576.0, 530.0, 519.0, 530.0, 579.0, 579.0, 522.0, 573.0, 530.0, 627.0, 576.0, 582.0, 576.0, 570.0, 573.0, 582.0, 627.0, 525.0, 582.0, 544.0, 579.0, 579.0, 525.0, 570.0, 519.0, 522.0, 519.0, 573.0, 519.0, 576.0, 522.0, 570.0, 570.0, 582.0, 582.0, 522.0, 573.0, 530.0, 530.0, 525.0, 519.0, 570.0, 522.0, 522.0, 522.0, 573.0, 570.0, 579.0, 579.0, 570.0, 582.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 256.0, 288.0, 285.0, 288.0, 285.0, 296.0, 277.0, 294.0, 288.0, 287.0, 283.0, 292.0, 287.0, 285.0, 297.0, 292.0, 284.0, 281.0, 295.0, 284.0, 289.0, 297.0, 279.0, 293.0, 286.0, 294.0, 273.0, 291.0, 285.0, 271.0, 251.0, 272.0, 301.0, 257.0, 265.0, 286.0, 284.0, 296.0, 283.0, 294.0, 273.0, 295.0, 284.0, 284.0, 283.0, 282.0, 291.0, 295.0, 292.0, 293.0, 283.0, 261.0, 264.0, 300.0, 273.0, 278.0, 295.0, 287.0, 292.0, 260.0, 265.0, 240.0, 219.0, 292.0, 278.0, 295.0, 281.0, 278.0, 298.0, 293.0, 283.0, 263.0, 270.0, 280.0, 293.0, 258.0, 264.0, 290.0, 286.0, 281.0, 295.0, 284.0, 286.0, 284.0, 292.0, 285.0, 294.0, 242.0, 245.0, 266.0, 259.0, 265.0, 260.0, 287.0, 289.0, 272.0, 258.0, 260.0, 259.0, 269.0, 261.0, 295.0, 284.0, 296.0, 283.0, 266.0, 256.0, 287.0, 286.0, 265.0, 265.0, 314.0, 313.0, 291.0, 285.0, 293.0, 289.0, 281.0, 295.0, 277.0, 293.0, 286.0, 287.0, 291.0, 291.0, 300.0, 327.0, 264.0, 261.0, 289.0, 293.0, 271.0, 273.0, 292.0, 287.0, 293.0, 286.0, 261.0, 264.0, 277.0, 293.0, 254.0, 265.0, 256.0, 266.0, 264.0, 255.0, 293.0, 280.0, 257.0, 262.0, 285.0, 291.0, 272.0, 250.0, 282.0, 288.0, 289.0, 281.0, 300.0, 282.0, 287.0, 295.0, 264.0, 258.0, 297.0, 276.0, 272.0, 258.0, 263.0, 267.0, 259.0, 266.0, 263.0, 256.0, 289.0, 281.0, 253.0, 269.0, 261.0, 261.0, 265.0, 257.0, 284.0, 289.0, 288.0, 282.0, 291.0, 288.0, 280.0, 299.0, 282.0, 288.0, 300.0, 282.0, 286.0, 290.0, 297.0, 276.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7076406330396047, "mean_inference_ms": 1.2706812436953308, "mean_action_processing_ms": 0.13512051871923203, "mean_env_wait_ms": 0.8526030070406487, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6297600, "num_agent_steps_trained": 6297600, "num_env_steps_sampled": 3148800, "num_env_steps_trained": 3148800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3148800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6297600, "timers": {"training_iteration_time_ms": 3623.908, "learn_time_ms": 1080.918, "learn_throughput": 11841.787, "synch_weights_time_ms": 14.216}, "counters": {"num_env_steps_sampled": 3148800, "num_env_steps_trained": 3148800, "num_agent_steps_sampled": 6297600, "num_agent_steps_trained": 6297600}, "done": false, "episodes_total": 7872, "training_iteration": 246, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-28", "timestamp": 1666581388, "time_this_iter_s": 3.6680450439453125, "time_total_s": 953.7936117649078, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 953.7936117649078, "timesteps_since_restore": 0, "iterations_since_restore": 246, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.62, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 191.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 170.89, "shaped_reward_min": 156, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.81, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 15.81, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.63, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 15.61, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.39, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 15.42, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.3, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.0, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.24, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.89, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.39, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 15.42, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.39, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 15.42, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 8.097290992736816e-05, "policy_loss": -0.00024945399491116405, "vf_loss": 7.770112037658691, "vf_explained_var": 0.5763081908226013, "kl": 0.0022793293464928865, "entropy": 0.8931667804718018, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3161600, "num_env_steps_trained": 3161600, "num_agent_steps_sampled": 6323200, "num_agent_steps_trained": 6323200}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 476.0, "episode_reward_mean": 553.69, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 276.845}, "custom_metrics": {"sparse_reward_mean": 191.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 170.89, "shaped_reward_min": 156, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.81, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 15.81, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.63, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 15.61, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.39, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 15.42, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.3, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.0, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.24, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.89, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.39, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 15.42, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.39, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 15.42, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 576.0, 576.0, 576.0, 533.0, 573.0, 522.0, 576.0, 576.0, 570.0, 576.0, 579.0, 487.0, 525.0, 525.0, 576.0, 530.0, 519.0, 530.0, 579.0, 579.0, 522.0, 573.0, 530.0, 627.0, 576.0, 582.0, 576.0, 570.0, 573.0, 582.0, 627.0, 525.0, 582.0, 544.0, 579.0, 579.0, 525.0, 570.0, 519.0, 522.0, 519.0, 573.0, 519.0, 576.0, 522.0, 570.0, 570.0, 582.0, 582.0, 522.0, 573.0, 530.0, 530.0, 525.0, 519.0, 570.0, 522.0, 522.0, 522.0, 573.0, 570.0, 579.0, 579.0, 570.0, 582.0, 576.0, 573.0, 479.0, 573.0, 579.0, 576.0, 579.0, 573.0, 576.0, 570.0, 573.0, 516.0, 570.0, 476.0, 522.0, 573.0, 516.0, 570.0, 522.0, 582.0, 525.0, 522.0, 582.0, 522.0, 479.0, 533.0, 522.0, 525.0, 525.0, 579.0, 570.0, 570.0, 579.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 278.0, 295.0, 281.0, 278.0, 298.0, 293.0, 283.0, 263.0, 270.0, 280.0, 293.0, 258.0, 264.0, 290.0, 286.0, 281.0, 295.0, 284.0, 286.0, 284.0, 292.0, 285.0, 294.0, 242.0, 245.0, 266.0, 259.0, 265.0, 260.0, 287.0, 289.0, 272.0, 258.0, 260.0, 259.0, 269.0, 261.0, 295.0, 284.0, 296.0, 283.0, 266.0, 256.0, 287.0, 286.0, 265.0, 265.0, 314.0, 313.0, 291.0, 285.0, 293.0, 289.0, 281.0, 295.0, 277.0, 293.0, 286.0, 287.0, 291.0, 291.0, 300.0, 327.0, 264.0, 261.0, 289.0, 293.0, 271.0, 273.0, 292.0, 287.0, 293.0, 286.0, 261.0, 264.0, 277.0, 293.0, 254.0, 265.0, 256.0, 266.0, 264.0, 255.0, 293.0, 280.0, 257.0, 262.0, 285.0, 291.0, 272.0, 250.0, 282.0, 288.0, 289.0, 281.0, 300.0, 282.0, 287.0, 295.0, 264.0, 258.0, 297.0, 276.0, 272.0, 258.0, 263.0, 267.0, 259.0, 266.0, 263.0, 256.0, 289.0, 281.0, 253.0, 269.0, 261.0, 261.0, 265.0, 257.0, 284.0, 289.0, 288.0, 282.0, 291.0, 288.0, 280.0, 299.0, 282.0, 288.0, 300.0, 282.0, 286.0, 290.0, 297.0, 276.0, 243.0, 236.0, 288.0, 285.0, 291.0, 288.0, 293.0, 283.0, 296.0, 283.0, 276.0, 297.0, 291.0, 285.0, 285.0, 285.0, 282.0, 291.0, 270.0, 246.0, 280.0, 290.0, 246.0, 230.0, 265.0, 257.0, 284.0, 289.0, 261.0, 255.0, 292.0, 278.0, 269.0, 253.0, 279.0, 303.0, 262.0, 263.0, 265.0, 257.0, 297.0, 285.0, 252.0, 270.0, 238.0, 241.0, 262.0, 271.0, 261.0, 261.0, 263.0, 262.0, 270.0, 255.0, 286.0, 293.0, 285.0, 285.0, 281.0, 289.0, 290.0, 289.0, 253.0, 272.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.707511445459952, "mean_inference_ms": 1.2703453898963024, "mean_action_processing_ms": 0.13510609992774492, "mean_env_wait_ms": 0.8523951092536021, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 476.0, "episode_reward_mean": 553.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 276.845}, "hist_stats": {"episode_reward": [570.0, 576.0, 576.0, 576.0, 533.0, 573.0, 522.0, 576.0, 576.0, 570.0, 576.0, 579.0, 487.0, 525.0, 525.0, 576.0, 530.0, 519.0, 530.0, 579.0, 579.0, 522.0, 573.0, 530.0, 627.0, 576.0, 582.0, 576.0, 570.0, 573.0, 582.0, 627.0, 525.0, 582.0, 544.0, 579.0, 579.0, 525.0, 570.0, 519.0, 522.0, 519.0, 573.0, 519.0, 576.0, 522.0, 570.0, 570.0, 582.0, 582.0, 522.0, 573.0, 530.0, 530.0, 525.0, 519.0, 570.0, 522.0, 522.0, 522.0, 573.0, 570.0, 579.0, 579.0, 570.0, 582.0, 576.0, 573.0, 479.0, 573.0, 579.0, 576.0, 579.0, 573.0, 576.0, 570.0, 573.0, 516.0, 570.0, 476.0, 522.0, 573.0, 516.0, 570.0, 522.0, 582.0, 525.0, 522.0, 582.0, 522.0, 479.0, 533.0, 522.0, 525.0, 525.0, 579.0, 570.0, 570.0, 579.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 278.0, 295.0, 281.0, 278.0, 298.0, 293.0, 283.0, 263.0, 270.0, 280.0, 293.0, 258.0, 264.0, 290.0, 286.0, 281.0, 295.0, 284.0, 286.0, 284.0, 292.0, 285.0, 294.0, 242.0, 245.0, 266.0, 259.0, 265.0, 260.0, 287.0, 289.0, 272.0, 258.0, 260.0, 259.0, 269.0, 261.0, 295.0, 284.0, 296.0, 283.0, 266.0, 256.0, 287.0, 286.0, 265.0, 265.0, 314.0, 313.0, 291.0, 285.0, 293.0, 289.0, 281.0, 295.0, 277.0, 293.0, 286.0, 287.0, 291.0, 291.0, 300.0, 327.0, 264.0, 261.0, 289.0, 293.0, 271.0, 273.0, 292.0, 287.0, 293.0, 286.0, 261.0, 264.0, 277.0, 293.0, 254.0, 265.0, 256.0, 266.0, 264.0, 255.0, 293.0, 280.0, 257.0, 262.0, 285.0, 291.0, 272.0, 250.0, 282.0, 288.0, 289.0, 281.0, 300.0, 282.0, 287.0, 295.0, 264.0, 258.0, 297.0, 276.0, 272.0, 258.0, 263.0, 267.0, 259.0, 266.0, 263.0, 256.0, 289.0, 281.0, 253.0, 269.0, 261.0, 261.0, 265.0, 257.0, 284.0, 289.0, 288.0, 282.0, 291.0, 288.0, 280.0, 299.0, 282.0, 288.0, 300.0, 282.0, 286.0, 290.0, 297.0, 276.0, 243.0, 236.0, 288.0, 285.0, 291.0, 288.0, 293.0, 283.0, 296.0, 283.0, 276.0, 297.0, 291.0, 285.0, 285.0, 285.0, 282.0, 291.0, 270.0, 246.0, 280.0, 290.0, 246.0, 230.0, 265.0, 257.0, 284.0, 289.0, 261.0, 255.0, 292.0, 278.0, 269.0, 253.0, 279.0, 303.0, 262.0, 263.0, 265.0, 257.0, 297.0, 285.0, 252.0, 270.0, 238.0, 241.0, 262.0, 271.0, 261.0, 261.0, 263.0, 262.0, 270.0, 255.0, 286.0, 293.0, 285.0, 285.0, 281.0, 289.0, 290.0, 289.0, 253.0, 272.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.707511445459952, "mean_inference_ms": 1.2703453898963024, "mean_action_processing_ms": 0.13510609992774492, "mean_env_wait_ms": 0.8523951092536021, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6323200, "num_agent_steps_trained": 6323200, "num_env_steps_sampled": 3161600, "num_env_steps_trained": 3161600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3161600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6323200, "timers": {"training_iteration_time_ms": 3597.532, "learn_time_ms": 1084.034, "learn_throughput": 11807.752, "synch_weights_time_ms": 14.162}, "counters": {"num_env_steps_sampled": 3161600, "num_env_steps_trained": 3161600, "num_agent_steps_sampled": 6323200, "num_agent_steps_trained": 6323200}, "done": false, "episodes_total": 7904, "training_iteration": 247, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-32", "timestamp": 1666581392, "time_this_iter_s": 3.6923258304595947, "time_total_s": 957.4859375953674, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 957.4859375953674, "timesteps_since_restore": 0, "iterations_since_restore": 247, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.48333333333333, "ram_util_percent": 10.616666666666665}}
+{"custom_metrics": {"sparse_reward_mean": 190.4, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.62, "shaped_reward_min": 153, "shaped_reward_max": 184, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.83, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 15.51, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.67, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 15.38, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.41, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 15.2, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.15, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.88, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.82, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.75, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.82, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.41, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 15.2, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.41, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 15.2, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0028802365995943546, "policy_loss": 0.0025495009031146765, "vf_loss": 7.888293266296387, "vf_explained_var": 0.5572105050086975, "kl": 0.0023757275193929672, "entropy": 0.9161854386329651, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3174400, "num_env_steps_trained": 3174400, "num_agent_steps_sampled": 6348800, "num_agent_steps_trained": 6348800}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 476.0, "episode_reward_mean": 550.42, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 275.21}, "custom_metrics": {"sparse_reward_mean": 190.4, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.62, "shaped_reward_min": 153, "shaped_reward_max": 184, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.83, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 15.51, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.67, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 15.38, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.41, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 15.2, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.15, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.88, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.82, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.75, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.82, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.41, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 15.2, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.41, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 15.2, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 582.0, 544.0, 579.0, 579.0, 525.0, 570.0, 519.0, 522.0, 519.0, 573.0, 519.0, 576.0, 522.0, 570.0, 570.0, 582.0, 582.0, 522.0, 573.0, 530.0, 530.0, 525.0, 519.0, 570.0, 522.0, 522.0, 522.0, 573.0, 570.0, 579.0, 579.0, 570.0, 582.0, 576.0, 573.0, 479.0, 573.0, 579.0, 576.0, 579.0, 573.0, 576.0, 570.0, 573.0, 516.0, 570.0, 476.0, 522.0, 573.0, 516.0, 570.0, 522.0, 582.0, 525.0, 522.0, 582.0, 522.0, 479.0, 533.0, 522.0, 525.0, 525.0, 579.0, 570.0, 570.0, 579.0, 525.0, 570.0, 522.0, 522.0, 573.0, 573.0, 516.0, 576.0, 530.0, 527.0, 525.0, 579.0, 570.0, 527.0, 582.0, 519.0, 582.0, 579.0, 576.0, 573.0, 522.0, 522.0, 573.0, 525.0, 573.0, 567.0, 579.0, 513.0, 570.0, 573.0, 525.0, 576.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 261.0, 289.0, 293.0, 271.0, 273.0, 292.0, 287.0, 293.0, 286.0, 261.0, 264.0, 277.0, 293.0, 254.0, 265.0, 256.0, 266.0, 264.0, 255.0, 293.0, 280.0, 257.0, 262.0, 285.0, 291.0, 272.0, 250.0, 282.0, 288.0, 289.0, 281.0, 300.0, 282.0, 287.0, 295.0, 264.0, 258.0, 297.0, 276.0, 272.0, 258.0, 263.0, 267.0, 259.0, 266.0, 263.0, 256.0, 289.0, 281.0, 253.0, 269.0, 261.0, 261.0, 265.0, 257.0, 284.0, 289.0, 288.0, 282.0, 291.0, 288.0, 280.0, 299.0, 282.0, 288.0, 300.0, 282.0, 286.0, 290.0, 297.0, 276.0, 243.0, 236.0, 288.0, 285.0, 291.0, 288.0, 293.0, 283.0, 296.0, 283.0, 276.0, 297.0, 291.0, 285.0, 285.0, 285.0, 282.0, 291.0, 270.0, 246.0, 280.0, 290.0, 246.0, 230.0, 265.0, 257.0, 284.0, 289.0, 261.0, 255.0, 292.0, 278.0, 269.0, 253.0, 279.0, 303.0, 262.0, 263.0, 265.0, 257.0, 297.0, 285.0, 252.0, 270.0, 238.0, 241.0, 262.0, 271.0, 261.0, 261.0, 263.0, 262.0, 270.0, 255.0, 286.0, 293.0, 285.0, 285.0, 281.0, 289.0, 290.0, 289.0, 253.0, 272.0, 279.0, 291.0, 248.0, 274.0, 263.0, 259.0, 291.0, 282.0, 283.0, 290.0, 260.0, 256.0, 284.0, 292.0, 262.0, 268.0, 270.0, 257.0, 255.0, 270.0, 289.0, 290.0, 276.0, 294.0, 267.0, 260.0, 287.0, 295.0, 256.0, 263.0, 289.0, 293.0, 302.0, 277.0, 281.0, 295.0, 279.0, 294.0, 259.0, 263.0, 252.0, 270.0, 293.0, 280.0, 264.0, 261.0, 280.0, 293.0, 301.0, 266.0, 287.0, 292.0, 255.0, 258.0, 273.0, 297.0, 296.0, 277.0, 258.0, 267.0, 284.0, 292.0, 258.0, 267.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.70739328529775, "mean_inference_ms": 1.2700437704412777, "mean_action_processing_ms": 0.1350949326437043, "mean_env_wait_ms": 0.8522100793537379, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 476.0, "episode_reward_mean": 550.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 275.21}, "hist_stats": {"episode_reward": [525.0, 582.0, 544.0, 579.0, 579.0, 525.0, 570.0, 519.0, 522.0, 519.0, 573.0, 519.0, 576.0, 522.0, 570.0, 570.0, 582.0, 582.0, 522.0, 573.0, 530.0, 530.0, 525.0, 519.0, 570.0, 522.0, 522.0, 522.0, 573.0, 570.0, 579.0, 579.0, 570.0, 582.0, 576.0, 573.0, 479.0, 573.0, 579.0, 576.0, 579.0, 573.0, 576.0, 570.0, 573.0, 516.0, 570.0, 476.0, 522.0, 573.0, 516.0, 570.0, 522.0, 582.0, 525.0, 522.0, 582.0, 522.0, 479.0, 533.0, 522.0, 525.0, 525.0, 579.0, 570.0, 570.0, 579.0, 525.0, 570.0, 522.0, 522.0, 573.0, 573.0, 516.0, 576.0, 530.0, 527.0, 525.0, 579.0, 570.0, 527.0, 582.0, 519.0, 582.0, 579.0, 576.0, 573.0, 522.0, 522.0, 573.0, 525.0, 573.0, 567.0, 579.0, 513.0, 570.0, 573.0, 525.0, 576.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 261.0, 289.0, 293.0, 271.0, 273.0, 292.0, 287.0, 293.0, 286.0, 261.0, 264.0, 277.0, 293.0, 254.0, 265.0, 256.0, 266.0, 264.0, 255.0, 293.0, 280.0, 257.0, 262.0, 285.0, 291.0, 272.0, 250.0, 282.0, 288.0, 289.0, 281.0, 300.0, 282.0, 287.0, 295.0, 264.0, 258.0, 297.0, 276.0, 272.0, 258.0, 263.0, 267.0, 259.0, 266.0, 263.0, 256.0, 289.0, 281.0, 253.0, 269.0, 261.0, 261.0, 265.0, 257.0, 284.0, 289.0, 288.0, 282.0, 291.0, 288.0, 280.0, 299.0, 282.0, 288.0, 300.0, 282.0, 286.0, 290.0, 297.0, 276.0, 243.0, 236.0, 288.0, 285.0, 291.0, 288.0, 293.0, 283.0, 296.0, 283.0, 276.0, 297.0, 291.0, 285.0, 285.0, 285.0, 282.0, 291.0, 270.0, 246.0, 280.0, 290.0, 246.0, 230.0, 265.0, 257.0, 284.0, 289.0, 261.0, 255.0, 292.0, 278.0, 269.0, 253.0, 279.0, 303.0, 262.0, 263.0, 265.0, 257.0, 297.0, 285.0, 252.0, 270.0, 238.0, 241.0, 262.0, 271.0, 261.0, 261.0, 263.0, 262.0, 270.0, 255.0, 286.0, 293.0, 285.0, 285.0, 281.0, 289.0, 290.0, 289.0, 253.0, 272.0, 279.0, 291.0, 248.0, 274.0, 263.0, 259.0, 291.0, 282.0, 283.0, 290.0, 260.0, 256.0, 284.0, 292.0, 262.0, 268.0, 270.0, 257.0, 255.0, 270.0, 289.0, 290.0, 276.0, 294.0, 267.0, 260.0, 287.0, 295.0, 256.0, 263.0, 289.0, 293.0, 302.0, 277.0, 281.0, 295.0, 279.0, 294.0, 259.0, 263.0, 252.0, 270.0, 293.0, 280.0, 264.0, 261.0, 280.0, 293.0, 301.0, 266.0, 287.0, 292.0, 255.0, 258.0, 273.0, 297.0, 296.0, 277.0, 258.0, 267.0, 284.0, 292.0, 258.0, 267.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.70739328529775, "mean_inference_ms": 1.2700437704412777, "mean_action_processing_ms": 0.1350949326437043, "mean_env_wait_ms": 0.8522100793537379, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6348800, "num_agent_steps_trained": 6348800, "num_env_steps_sampled": 3174400, "num_env_steps_trained": 3174400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3174400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6348800, "timers": {"training_iteration_time_ms": 3606.368, "learn_time_ms": 1089.396, "learn_throughput": 11749.629, "synch_weights_time_ms": 14.267}, "counters": {"num_env_steps_sampled": 3174400, "num_env_steps_trained": 3174400, "num_agent_steps_sampled": 6348800, "num_agent_steps_trained": 6348800}, "done": false, "episodes_total": 7936, "training_iteration": 248, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-36", "timestamp": 1666581396, "time_this_iter_s": 3.726221799850464, "time_total_s": 961.2121593952179, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 961.2121593952179, "timesteps_since_restore": 0, "iterations_since_restore": 248, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.919999999999998, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 190.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.96, "shaped_reward_min": 153, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.35, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 15.99, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.19, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 15.89, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.97, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 15.7, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.79, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.04, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.94, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.97, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 15.7, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.97, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 15.7, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0006418681005015969, "policy_loss": -0.0009683937532827258, "vf_loss": 7.752774238586426, "vf_explained_var": 0.5776857137680054, "kl": 0.0023451021406799555, "entropy": 0.8975032567977905, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3187200, "num_env_steps_trained": 3187200, "num_agent_steps_sampled": 6374400, "num_agent_steps_trained": 6374400}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 476.0, "episode_reward_mean": 551.56, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 275.78}, "custom_metrics": {"sparse_reward_mean": 190.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.96, "shaped_reward_min": 153, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.35, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 15.99, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.19, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 15.89, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.97, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 15.7, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.79, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.04, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.94, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.97, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 15.7, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.97, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 15.7, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 582.0, 576.0, 573.0, 479.0, 573.0, 579.0, 576.0, 579.0, 573.0, 576.0, 570.0, 573.0, 516.0, 570.0, 476.0, 522.0, 573.0, 516.0, 570.0, 522.0, 582.0, 525.0, 522.0, 582.0, 522.0, 479.0, 533.0, 522.0, 525.0, 525.0, 579.0, 570.0, 570.0, 579.0, 525.0, 570.0, 522.0, 522.0, 573.0, 573.0, 516.0, 576.0, 530.0, 527.0, 525.0, 579.0, 570.0, 527.0, 582.0, 519.0, 582.0, 579.0, 576.0, 573.0, 522.0, 522.0, 573.0, 525.0, 573.0, 567.0, 579.0, 513.0, 570.0, 573.0, 525.0, 576.0, 525.0, 525.0, 530.0, 579.0, 576.0, 573.0, 525.0, 573.0, 576.0, 522.0, 576.0, 582.0, 525.0, 582.0, 573.0, 525.0, 519.0, 573.0, 582.0, 573.0, 579.0, 573.0, 570.0, 525.0, 522.0, 576.0, 582.0, 516.0, 525.0, 573.0, 524.0, 530.0, 524.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 288.0, 300.0, 282.0, 286.0, 290.0, 297.0, 276.0, 243.0, 236.0, 288.0, 285.0, 291.0, 288.0, 293.0, 283.0, 296.0, 283.0, 276.0, 297.0, 291.0, 285.0, 285.0, 285.0, 282.0, 291.0, 270.0, 246.0, 280.0, 290.0, 246.0, 230.0, 265.0, 257.0, 284.0, 289.0, 261.0, 255.0, 292.0, 278.0, 269.0, 253.0, 279.0, 303.0, 262.0, 263.0, 265.0, 257.0, 297.0, 285.0, 252.0, 270.0, 238.0, 241.0, 262.0, 271.0, 261.0, 261.0, 263.0, 262.0, 270.0, 255.0, 286.0, 293.0, 285.0, 285.0, 281.0, 289.0, 290.0, 289.0, 253.0, 272.0, 279.0, 291.0, 248.0, 274.0, 263.0, 259.0, 291.0, 282.0, 283.0, 290.0, 260.0, 256.0, 284.0, 292.0, 262.0, 268.0, 270.0, 257.0, 255.0, 270.0, 289.0, 290.0, 276.0, 294.0, 267.0, 260.0, 287.0, 295.0, 256.0, 263.0, 289.0, 293.0, 302.0, 277.0, 281.0, 295.0, 279.0, 294.0, 259.0, 263.0, 252.0, 270.0, 293.0, 280.0, 264.0, 261.0, 280.0, 293.0, 301.0, 266.0, 287.0, 292.0, 255.0, 258.0, 273.0, 297.0, 296.0, 277.0, 258.0, 267.0, 284.0, 292.0, 258.0, 267.0, 265.0, 260.0, 255.0, 275.0, 296.0, 283.0, 284.0, 292.0, 281.0, 292.0, 267.0, 258.0, 284.0, 289.0, 289.0, 287.0, 258.0, 264.0, 285.0, 291.0, 293.0, 289.0, 267.0, 258.0, 296.0, 286.0, 294.0, 279.0, 268.0, 257.0, 260.0, 259.0, 290.0, 283.0, 300.0, 282.0, 286.0, 287.0, 286.0, 293.0, 288.0, 285.0, 283.0, 287.0, 261.0, 264.0, 264.0, 258.0, 291.0, 285.0, 297.0, 285.0, 259.0, 257.0, 255.0, 270.0, 291.0, 282.0, 266.0, 258.0, 265.0, 265.0, 268.0, 256.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.707276742555301, "mean_inference_ms": 1.2697386632886414, "mean_action_processing_ms": 0.13508406524906952, "mean_env_wait_ms": 0.8520395956203936, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 476.0, "episode_reward_mean": 551.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 275.78}, "hist_stats": {"episode_reward": [570.0, 582.0, 576.0, 573.0, 479.0, 573.0, 579.0, 576.0, 579.0, 573.0, 576.0, 570.0, 573.0, 516.0, 570.0, 476.0, 522.0, 573.0, 516.0, 570.0, 522.0, 582.0, 525.0, 522.0, 582.0, 522.0, 479.0, 533.0, 522.0, 525.0, 525.0, 579.0, 570.0, 570.0, 579.0, 525.0, 570.0, 522.0, 522.0, 573.0, 573.0, 516.0, 576.0, 530.0, 527.0, 525.0, 579.0, 570.0, 527.0, 582.0, 519.0, 582.0, 579.0, 576.0, 573.0, 522.0, 522.0, 573.0, 525.0, 573.0, 567.0, 579.0, 513.0, 570.0, 573.0, 525.0, 576.0, 525.0, 525.0, 530.0, 579.0, 576.0, 573.0, 525.0, 573.0, 576.0, 522.0, 576.0, 582.0, 525.0, 582.0, 573.0, 525.0, 519.0, 573.0, 582.0, 573.0, 579.0, 573.0, 570.0, 525.0, 522.0, 576.0, 582.0, 516.0, 525.0, 573.0, 524.0, 530.0, 524.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 288.0, 300.0, 282.0, 286.0, 290.0, 297.0, 276.0, 243.0, 236.0, 288.0, 285.0, 291.0, 288.0, 293.0, 283.0, 296.0, 283.0, 276.0, 297.0, 291.0, 285.0, 285.0, 285.0, 282.0, 291.0, 270.0, 246.0, 280.0, 290.0, 246.0, 230.0, 265.0, 257.0, 284.0, 289.0, 261.0, 255.0, 292.0, 278.0, 269.0, 253.0, 279.0, 303.0, 262.0, 263.0, 265.0, 257.0, 297.0, 285.0, 252.0, 270.0, 238.0, 241.0, 262.0, 271.0, 261.0, 261.0, 263.0, 262.0, 270.0, 255.0, 286.0, 293.0, 285.0, 285.0, 281.0, 289.0, 290.0, 289.0, 253.0, 272.0, 279.0, 291.0, 248.0, 274.0, 263.0, 259.0, 291.0, 282.0, 283.0, 290.0, 260.0, 256.0, 284.0, 292.0, 262.0, 268.0, 270.0, 257.0, 255.0, 270.0, 289.0, 290.0, 276.0, 294.0, 267.0, 260.0, 287.0, 295.0, 256.0, 263.0, 289.0, 293.0, 302.0, 277.0, 281.0, 295.0, 279.0, 294.0, 259.0, 263.0, 252.0, 270.0, 293.0, 280.0, 264.0, 261.0, 280.0, 293.0, 301.0, 266.0, 287.0, 292.0, 255.0, 258.0, 273.0, 297.0, 296.0, 277.0, 258.0, 267.0, 284.0, 292.0, 258.0, 267.0, 265.0, 260.0, 255.0, 275.0, 296.0, 283.0, 284.0, 292.0, 281.0, 292.0, 267.0, 258.0, 284.0, 289.0, 289.0, 287.0, 258.0, 264.0, 285.0, 291.0, 293.0, 289.0, 267.0, 258.0, 296.0, 286.0, 294.0, 279.0, 268.0, 257.0, 260.0, 259.0, 290.0, 283.0, 300.0, 282.0, 286.0, 287.0, 286.0, 293.0, 288.0, 285.0, 283.0, 287.0, 261.0, 264.0, 264.0, 258.0, 291.0, 285.0, 297.0, 285.0, 259.0, 257.0, 255.0, 270.0, 291.0, 282.0, 266.0, 258.0, 265.0, 265.0, 268.0, 256.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.707276742555301, "mean_inference_ms": 1.2697386632886414, "mean_action_processing_ms": 0.13508406524906952, "mean_env_wait_ms": 0.8520395956203936, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6374400, "num_agent_steps_trained": 6374400, "num_env_steps_sampled": 3187200, "num_env_steps_trained": 3187200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3187200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6374400, "timers": {"training_iteration_time_ms": 3601.128, "learn_time_ms": 1084.687, "learn_throughput": 11800.639, "synch_weights_time_ms": 14.231}, "counters": {"num_env_steps_sampled": 3187200, "num_env_steps_trained": 3187200, "num_agent_steps_sampled": 6374400, "num_agent_steps_trained": 6374400}, "done": false, "episodes_total": 7968, "training_iteration": 249, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-40", "timestamp": 1666581400, "time_this_iter_s": 3.60356068611145, "time_total_s": 964.8157200813293, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 964.8157200813293, "timesteps_since_restore": 0, "iterations_since_restore": 249, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.033333333333328, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 191.6, "sparse_reward_min": 180, "sparse_reward_max": 200, "shaped_reward_mean": 170.78, "shaped_reward_min": 153, "shaped_reward_max": 184, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.78, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.66, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.62, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.52, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.48, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.37, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.35, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.26, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.5, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.14, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.46, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.48, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.37, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.48, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.37, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007210352923721075, "policy_loss": 0.0003855510149151087, "vf_loss": 7.820246696472168, "vf_explained_var": 0.577731728553772, "kl": 0.001891107764095068, "entropy": 0.8930797576904297, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3200000, "num_env_steps_trained": 3200000, "num_agent_steps_sampled": 6400000, "num_agent_steps_trained": 6400000}, "sampler_results": {"episode_reward_max": 584.0, "episode_reward_min": 513.0, "episode_reward_mean": 553.98, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 276.99}, "custom_metrics": {"sparse_reward_mean": 191.6, "sparse_reward_min": 180, "sparse_reward_max": 200, "shaped_reward_mean": 170.78, "shaped_reward_min": 153, "shaped_reward_max": 184, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.78, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.66, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.62, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.52, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.48, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.37, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.35, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.26, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.5, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.14, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.46, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.48, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.37, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.48, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.37, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 570.0, 579.0, 525.0, 570.0, 522.0, 522.0, 573.0, 573.0, 516.0, 576.0, 530.0, 527.0, 525.0, 579.0, 570.0, 527.0, 582.0, 519.0, 582.0, 579.0, 576.0, 573.0, 522.0, 522.0, 573.0, 525.0, 573.0, 567.0, 579.0, 513.0, 570.0, 573.0, 525.0, 576.0, 525.0, 525.0, 530.0, 579.0, 576.0, 573.0, 525.0, 573.0, 576.0, 522.0, 576.0, 582.0, 525.0, 582.0, 573.0, 525.0, 519.0, 573.0, 582.0, 573.0, 579.0, 573.0, 570.0, 525.0, 522.0, 576.0, 582.0, 516.0, 525.0, 573.0, 524.0, 530.0, 524.0, 579.0, 573.0, 582.0, 525.0, 582.0, 573.0, 525.0, 522.0, 525.0, 573.0, 525.0, 530.0, 525.0, 519.0, 582.0, 582.0, 573.0, 582.0, 522.0, 525.0, 570.0, 573.0, 519.0, 584.0, 579.0, 519.0, 579.0, 579.0, 573.0, 579.0, 522.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 285.0, 281.0, 289.0, 290.0, 289.0, 253.0, 272.0, 279.0, 291.0, 248.0, 274.0, 263.0, 259.0, 291.0, 282.0, 283.0, 290.0, 260.0, 256.0, 284.0, 292.0, 262.0, 268.0, 270.0, 257.0, 255.0, 270.0, 289.0, 290.0, 276.0, 294.0, 267.0, 260.0, 287.0, 295.0, 256.0, 263.0, 289.0, 293.0, 302.0, 277.0, 281.0, 295.0, 279.0, 294.0, 259.0, 263.0, 252.0, 270.0, 293.0, 280.0, 264.0, 261.0, 280.0, 293.0, 301.0, 266.0, 287.0, 292.0, 255.0, 258.0, 273.0, 297.0, 296.0, 277.0, 258.0, 267.0, 284.0, 292.0, 258.0, 267.0, 265.0, 260.0, 255.0, 275.0, 296.0, 283.0, 284.0, 292.0, 281.0, 292.0, 267.0, 258.0, 284.0, 289.0, 289.0, 287.0, 258.0, 264.0, 285.0, 291.0, 293.0, 289.0, 267.0, 258.0, 296.0, 286.0, 294.0, 279.0, 268.0, 257.0, 260.0, 259.0, 290.0, 283.0, 300.0, 282.0, 286.0, 287.0, 286.0, 293.0, 288.0, 285.0, 283.0, 287.0, 261.0, 264.0, 264.0, 258.0, 291.0, 285.0, 297.0, 285.0, 259.0, 257.0, 255.0, 270.0, 291.0, 282.0, 266.0, 258.0, 265.0, 265.0, 268.0, 256.0, 287.0, 292.0, 287.0, 286.0, 287.0, 295.0, 272.0, 253.0, 292.0, 290.0, 285.0, 288.0, 265.0, 260.0, 257.0, 265.0, 271.0, 254.0, 285.0, 288.0, 264.0, 261.0, 265.0, 265.0, 259.0, 266.0, 269.0, 250.0, 290.0, 292.0, 296.0, 286.0, 294.0, 279.0, 294.0, 288.0, 268.0, 254.0, 264.0, 261.0, 286.0, 284.0, 286.0, 287.0, 262.0, 257.0, 292.0, 292.0, 285.0, 294.0, 268.0, 251.0, 286.0, 293.0, 283.0, 296.0, 287.0, 286.0, 286.0, 293.0, 260.0, 262.0, 285.0, 297.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7071785885693055, "mean_inference_ms": 1.269468341528467, "mean_action_processing_ms": 0.13507871917250727, "mean_env_wait_ms": 0.8519080078969777, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 584.0, "episode_reward_min": 513.0, "episode_reward_mean": 553.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 276.99}, "hist_stats": {"episode_reward": [570.0, 570.0, 579.0, 525.0, 570.0, 522.0, 522.0, 573.0, 573.0, 516.0, 576.0, 530.0, 527.0, 525.0, 579.0, 570.0, 527.0, 582.0, 519.0, 582.0, 579.0, 576.0, 573.0, 522.0, 522.0, 573.0, 525.0, 573.0, 567.0, 579.0, 513.0, 570.0, 573.0, 525.0, 576.0, 525.0, 525.0, 530.0, 579.0, 576.0, 573.0, 525.0, 573.0, 576.0, 522.0, 576.0, 582.0, 525.0, 582.0, 573.0, 525.0, 519.0, 573.0, 582.0, 573.0, 579.0, 573.0, 570.0, 525.0, 522.0, 576.0, 582.0, 516.0, 525.0, 573.0, 524.0, 530.0, 524.0, 579.0, 573.0, 582.0, 525.0, 582.0, 573.0, 525.0, 522.0, 525.0, 573.0, 525.0, 530.0, 525.0, 519.0, 582.0, 582.0, 573.0, 582.0, 522.0, 525.0, 570.0, 573.0, 519.0, 584.0, 579.0, 519.0, 579.0, 579.0, 573.0, 579.0, 522.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 285.0, 281.0, 289.0, 290.0, 289.0, 253.0, 272.0, 279.0, 291.0, 248.0, 274.0, 263.0, 259.0, 291.0, 282.0, 283.0, 290.0, 260.0, 256.0, 284.0, 292.0, 262.0, 268.0, 270.0, 257.0, 255.0, 270.0, 289.0, 290.0, 276.0, 294.0, 267.0, 260.0, 287.0, 295.0, 256.0, 263.0, 289.0, 293.0, 302.0, 277.0, 281.0, 295.0, 279.0, 294.0, 259.0, 263.0, 252.0, 270.0, 293.0, 280.0, 264.0, 261.0, 280.0, 293.0, 301.0, 266.0, 287.0, 292.0, 255.0, 258.0, 273.0, 297.0, 296.0, 277.0, 258.0, 267.0, 284.0, 292.0, 258.0, 267.0, 265.0, 260.0, 255.0, 275.0, 296.0, 283.0, 284.0, 292.0, 281.0, 292.0, 267.0, 258.0, 284.0, 289.0, 289.0, 287.0, 258.0, 264.0, 285.0, 291.0, 293.0, 289.0, 267.0, 258.0, 296.0, 286.0, 294.0, 279.0, 268.0, 257.0, 260.0, 259.0, 290.0, 283.0, 300.0, 282.0, 286.0, 287.0, 286.0, 293.0, 288.0, 285.0, 283.0, 287.0, 261.0, 264.0, 264.0, 258.0, 291.0, 285.0, 297.0, 285.0, 259.0, 257.0, 255.0, 270.0, 291.0, 282.0, 266.0, 258.0, 265.0, 265.0, 268.0, 256.0, 287.0, 292.0, 287.0, 286.0, 287.0, 295.0, 272.0, 253.0, 292.0, 290.0, 285.0, 288.0, 265.0, 260.0, 257.0, 265.0, 271.0, 254.0, 285.0, 288.0, 264.0, 261.0, 265.0, 265.0, 259.0, 266.0, 269.0, 250.0, 290.0, 292.0, 296.0, 286.0, 294.0, 279.0, 294.0, 288.0, 268.0, 254.0, 264.0, 261.0, 286.0, 284.0, 286.0, 287.0, 262.0, 257.0, 292.0, 292.0, 285.0, 294.0, 268.0, 251.0, 286.0, 293.0, 283.0, 296.0, 287.0, 286.0, 286.0, 293.0, 260.0, 262.0, 285.0, 297.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7071785885693055, "mean_inference_ms": 1.269468341528467, "mean_action_processing_ms": 0.13507871917250727, "mean_env_wait_ms": 0.8519080078969777, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6400000, "num_agent_steps_trained": 6400000, "num_env_steps_sampled": 3200000, "num_env_steps_trained": 3200000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3200000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6400000, "timers": {"training_iteration_time_ms": 3602.763, "learn_time_ms": 1076.709, "learn_throughput": 11888.083, "synch_weights_time_ms": 13.686}, "counters": {"num_env_steps_sampled": 3200000, "num_env_steps_trained": 3200000, "num_agent_steps_sampled": 6400000, "num_agent_steps_trained": 6400000}, "done": false, "episodes_total": 8000, "training_iteration": 250, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-44", "timestamp": 1666581404, "time_this_iter_s": 3.6658709049224854, "time_total_s": 968.4815909862518, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 968.4815909862518, "timesteps_since_restore": 0, "iterations_since_restore": 250, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.740000000000002, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 192.0, "sparse_reward_min": 180, "sparse_reward_max": 200, "shaped_reward_mean": 171.37, "shaped_reward_min": 153, "shaped_reward_max": 184, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.34, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.21, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.18, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.11, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 14.06, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.97, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.88, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.64, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.38, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.26, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.2, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.06, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.97, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.06, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.97, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0018597999587655067, "policy_loss": -0.0021993389818817377, "vf_loss": 7.826320171356201, "vf_explained_var": 0.5782947540283203, "kl": 0.0020893034525215626, "entropy": 0.8861854076385498, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3212800, "num_env_steps_trained": 3212800, "num_agent_steps_sampled": 6425600, "num_agent_steps_trained": 6425600}, "sampler_results": {"episode_reward_max": 584.0, "episode_reward_min": 513.0, "episode_reward_mean": 555.37, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 250.0}, "policy_reward_max": {"ppo": 300.0}, "policy_reward_mean": {"ppo": 277.685}, "custom_metrics": {"sparse_reward_mean": 192.0, "sparse_reward_min": 180, "sparse_reward_max": 200, "shaped_reward_mean": 171.37, "shaped_reward_min": 153, "shaped_reward_max": 184, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.34, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.21, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.18, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.11, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 14.06, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.97, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.88, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.64, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.38, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.26, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.2, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.06, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.97, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.06, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.97, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 525.0, 576.0, 525.0, 525.0, 530.0, 579.0, 576.0, 573.0, 525.0, 573.0, 576.0, 522.0, 576.0, 582.0, 525.0, 582.0, 573.0, 525.0, 519.0, 573.0, 582.0, 573.0, 579.0, 573.0, 570.0, 525.0, 522.0, 576.0, 582.0, 516.0, 525.0, 573.0, 524.0, 530.0, 524.0, 579.0, 573.0, 582.0, 525.0, 582.0, 573.0, 525.0, 522.0, 525.0, 573.0, 525.0, 530.0, 525.0, 519.0, 582.0, 582.0, 573.0, 582.0, 522.0, 525.0, 570.0, 573.0, 519.0, 584.0, 579.0, 519.0, 579.0, 579.0, 573.0, 579.0, 522.0, 582.0, 570.0, 522.0, 522.0, 527.0, 582.0, 513.0, 516.0, 582.0, 579.0, 582.0, 573.0, 522.0, 579.0, 522.0, 576.0, 573.0, 579.0, 570.0, 525.0, 573.0, 530.0, 576.0, 525.0, 582.0, 570.0, 573.0, 570.0, 582.0, 579.0, 573.0, 519.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 277.0, 258.0, 267.0, 284.0, 292.0, 258.0, 267.0, 265.0, 260.0, 255.0, 275.0, 296.0, 283.0, 284.0, 292.0, 281.0, 292.0, 267.0, 258.0, 284.0, 289.0, 289.0, 287.0, 258.0, 264.0, 285.0, 291.0, 293.0, 289.0, 267.0, 258.0, 296.0, 286.0, 294.0, 279.0, 268.0, 257.0, 260.0, 259.0, 290.0, 283.0, 300.0, 282.0, 286.0, 287.0, 286.0, 293.0, 288.0, 285.0, 283.0, 287.0, 261.0, 264.0, 264.0, 258.0, 291.0, 285.0, 297.0, 285.0, 259.0, 257.0, 255.0, 270.0, 291.0, 282.0, 266.0, 258.0, 265.0, 265.0, 268.0, 256.0, 287.0, 292.0, 287.0, 286.0, 287.0, 295.0, 272.0, 253.0, 292.0, 290.0, 285.0, 288.0, 265.0, 260.0, 257.0, 265.0, 271.0, 254.0, 285.0, 288.0, 264.0, 261.0, 265.0, 265.0, 259.0, 266.0, 269.0, 250.0, 290.0, 292.0, 296.0, 286.0, 294.0, 279.0, 294.0, 288.0, 268.0, 254.0, 264.0, 261.0, 286.0, 284.0, 286.0, 287.0, 262.0, 257.0, 292.0, 292.0, 285.0, 294.0, 268.0, 251.0, 286.0, 293.0, 283.0, 296.0, 287.0, 286.0, 286.0, 293.0, 260.0, 262.0, 285.0, 297.0, 286.0, 284.0, 258.0, 264.0, 261.0, 261.0, 260.0, 267.0, 300.0, 282.0, 253.0, 260.0, 251.0, 265.0, 291.0, 291.0, 297.0, 282.0, 289.0, 293.0, 277.0, 296.0, 264.0, 258.0, 295.0, 284.0, 265.0, 257.0, 289.0, 287.0, 284.0, 289.0, 285.0, 294.0, 284.0, 286.0, 264.0, 261.0, 283.0, 290.0, 269.0, 261.0, 293.0, 283.0, 258.0, 267.0, 289.0, 293.0, 294.0, 276.0, 282.0, 291.0, 285.0, 285.0, 288.0, 294.0, 288.0, 291.0, 276.0, 297.0, 258.0, 261.0, 296.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7071430805255033, "mean_inference_ms": 1.2692005567694524, "mean_action_processing_ms": 0.13507150565449955, "mean_env_wait_ms": 0.8517631405985867, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 584.0, "episode_reward_min": 513.0, "episode_reward_mean": 555.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 250.0}, "policy_reward_max": {"ppo": 300.0}, "policy_reward_mean": {"ppo": 277.685}, "hist_stats": {"episode_reward": [573.0, 525.0, 576.0, 525.0, 525.0, 530.0, 579.0, 576.0, 573.0, 525.0, 573.0, 576.0, 522.0, 576.0, 582.0, 525.0, 582.0, 573.0, 525.0, 519.0, 573.0, 582.0, 573.0, 579.0, 573.0, 570.0, 525.0, 522.0, 576.0, 582.0, 516.0, 525.0, 573.0, 524.0, 530.0, 524.0, 579.0, 573.0, 582.0, 525.0, 582.0, 573.0, 525.0, 522.0, 525.0, 573.0, 525.0, 530.0, 525.0, 519.0, 582.0, 582.0, 573.0, 582.0, 522.0, 525.0, 570.0, 573.0, 519.0, 584.0, 579.0, 519.0, 579.0, 579.0, 573.0, 579.0, 522.0, 582.0, 570.0, 522.0, 522.0, 527.0, 582.0, 513.0, 516.0, 582.0, 579.0, 582.0, 573.0, 522.0, 579.0, 522.0, 576.0, 573.0, 579.0, 570.0, 525.0, 573.0, 530.0, 576.0, 525.0, 582.0, 570.0, 573.0, 570.0, 582.0, 579.0, 573.0, 519.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 277.0, 258.0, 267.0, 284.0, 292.0, 258.0, 267.0, 265.0, 260.0, 255.0, 275.0, 296.0, 283.0, 284.0, 292.0, 281.0, 292.0, 267.0, 258.0, 284.0, 289.0, 289.0, 287.0, 258.0, 264.0, 285.0, 291.0, 293.0, 289.0, 267.0, 258.0, 296.0, 286.0, 294.0, 279.0, 268.0, 257.0, 260.0, 259.0, 290.0, 283.0, 300.0, 282.0, 286.0, 287.0, 286.0, 293.0, 288.0, 285.0, 283.0, 287.0, 261.0, 264.0, 264.0, 258.0, 291.0, 285.0, 297.0, 285.0, 259.0, 257.0, 255.0, 270.0, 291.0, 282.0, 266.0, 258.0, 265.0, 265.0, 268.0, 256.0, 287.0, 292.0, 287.0, 286.0, 287.0, 295.0, 272.0, 253.0, 292.0, 290.0, 285.0, 288.0, 265.0, 260.0, 257.0, 265.0, 271.0, 254.0, 285.0, 288.0, 264.0, 261.0, 265.0, 265.0, 259.0, 266.0, 269.0, 250.0, 290.0, 292.0, 296.0, 286.0, 294.0, 279.0, 294.0, 288.0, 268.0, 254.0, 264.0, 261.0, 286.0, 284.0, 286.0, 287.0, 262.0, 257.0, 292.0, 292.0, 285.0, 294.0, 268.0, 251.0, 286.0, 293.0, 283.0, 296.0, 287.0, 286.0, 286.0, 293.0, 260.0, 262.0, 285.0, 297.0, 286.0, 284.0, 258.0, 264.0, 261.0, 261.0, 260.0, 267.0, 300.0, 282.0, 253.0, 260.0, 251.0, 265.0, 291.0, 291.0, 297.0, 282.0, 289.0, 293.0, 277.0, 296.0, 264.0, 258.0, 295.0, 284.0, 265.0, 257.0, 289.0, 287.0, 284.0, 289.0, 285.0, 294.0, 284.0, 286.0, 264.0, 261.0, 283.0, 290.0, 269.0, 261.0, 293.0, 283.0, 258.0, 267.0, 289.0, 293.0, 294.0, 276.0, 282.0, 291.0, 285.0, 285.0, 288.0, 294.0, 288.0, 291.0, 276.0, 297.0, 258.0, 261.0, 296.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7071430805255033, "mean_inference_ms": 1.2692005567694524, "mean_action_processing_ms": 0.13507150565449955, "mean_env_wait_ms": 0.8517631405985867, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6425600, "num_agent_steps_trained": 6425600, "num_env_steps_sampled": 3212800, "num_env_steps_trained": 3212800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3212800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6425600, "timers": {"training_iteration_time_ms": 3631.708, "learn_time_ms": 1079.384, "learn_throughput": 11858.62, "synch_weights_time_ms": 13.293}, "counters": {"num_env_steps_sampled": 3212800, "num_env_steps_trained": 3212800, "num_agent_steps_sampled": 6425600, "num_agent_steps_trained": 6425600}, "done": false, "episodes_total": 8032, "training_iteration": 251, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-48", "timestamp": 1666581408, "time_this_iter_s": 3.9061965942382812, "time_total_s": 972.3877875804901, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 972.3877875804901, "timesteps_since_restore": 0, "iterations_since_restore": 251, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.066666666666666, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 190.0, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 170.32, "shaped_reward_min": 71, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.7, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.9, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.54, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.64, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 13, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 13, "potting_onion_agent_0_mean": 14.38, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.45, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.81, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.36, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.55, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.47, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.37, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.38, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.45, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.38, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.45, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006767911836504936, "policy_loss": 0.006420888472348452, "vf_loss": 7.912266731262207, "vf_explained_var": 0.6025801301002502, "kl": 0.002451230539008975, "entropy": 0.8884056210517883, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3225600, "num_env_steps_trained": 3225600, "num_agent_steps_sampled": 6451200, "num_agent_steps_trained": 6451200}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 151.0, "episode_reward_mean": 550.32, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 65.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 275.16}, "custom_metrics": {"sparse_reward_mean": 190.0, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 170.32, "shaped_reward_min": 71, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.7, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.9, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.54, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.64, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 13, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 13, "potting_onion_agent_0_mean": 14.38, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.45, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.81, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.36, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.55, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.47, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.37, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.38, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.45, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.38, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.45, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 524.0, 530.0, 524.0, 579.0, 573.0, 582.0, 525.0, 582.0, 573.0, 525.0, 522.0, 525.0, 573.0, 525.0, 530.0, 525.0, 519.0, 582.0, 582.0, 573.0, 582.0, 522.0, 525.0, 570.0, 573.0, 519.0, 584.0, 579.0, 519.0, 579.0, 579.0, 573.0, 579.0, 522.0, 582.0, 570.0, 522.0, 522.0, 527.0, 582.0, 513.0, 516.0, 582.0, 579.0, 582.0, 573.0, 522.0, 579.0, 522.0, 576.0, 573.0, 579.0, 570.0, 525.0, 573.0, 530.0, 576.0, 525.0, 582.0, 570.0, 573.0, 570.0, 582.0, 579.0, 573.0, 519.0, 582.0, 522.0, 576.0, 582.0, 573.0, 151.0, 527.0, 525.0, 573.0, 525.0, 525.0, 522.0, 516.0, 525.0, 582.0, 530.0, 579.0, 530.0, 519.0, 582.0, 522.0, 582.0, 576.0, 525.0, 579.0, 576.0, 525.0, 522.0, 576.0, 579.0, 627.0, 576.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 282.0, 266.0, 258.0, 265.0, 265.0, 268.0, 256.0, 287.0, 292.0, 287.0, 286.0, 287.0, 295.0, 272.0, 253.0, 292.0, 290.0, 285.0, 288.0, 265.0, 260.0, 257.0, 265.0, 271.0, 254.0, 285.0, 288.0, 264.0, 261.0, 265.0, 265.0, 259.0, 266.0, 269.0, 250.0, 290.0, 292.0, 296.0, 286.0, 294.0, 279.0, 294.0, 288.0, 268.0, 254.0, 264.0, 261.0, 286.0, 284.0, 286.0, 287.0, 262.0, 257.0, 292.0, 292.0, 285.0, 294.0, 268.0, 251.0, 286.0, 293.0, 283.0, 296.0, 287.0, 286.0, 286.0, 293.0, 260.0, 262.0, 285.0, 297.0, 286.0, 284.0, 258.0, 264.0, 261.0, 261.0, 260.0, 267.0, 300.0, 282.0, 253.0, 260.0, 251.0, 265.0, 291.0, 291.0, 297.0, 282.0, 289.0, 293.0, 277.0, 296.0, 264.0, 258.0, 295.0, 284.0, 265.0, 257.0, 289.0, 287.0, 284.0, 289.0, 285.0, 294.0, 284.0, 286.0, 264.0, 261.0, 283.0, 290.0, 269.0, 261.0, 293.0, 283.0, 258.0, 267.0, 289.0, 293.0, 294.0, 276.0, 282.0, 291.0, 285.0, 285.0, 288.0, 294.0, 288.0, 291.0, 276.0, 297.0, 258.0, 261.0, 296.0, 286.0, 259.0, 263.0, 284.0, 292.0, 287.0, 295.0, 289.0, 284.0, 86.0, 65.0, 256.0, 271.0, 256.0, 269.0, 289.0, 284.0, 268.0, 257.0, 263.0, 262.0, 269.0, 253.0, 271.0, 245.0, 261.0, 264.0, 287.0, 295.0, 259.0, 271.0, 294.0, 285.0, 266.0, 264.0, 257.0, 262.0, 285.0, 297.0, 261.0, 261.0, 287.0, 295.0, 278.0, 298.0, 263.0, 262.0, 293.0, 286.0, 288.0, 288.0, 256.0, 269.0, 252.0, 270.0, 293.0, 283.0, 295.0, 284.0, 316.0, 311.0, 287.0, 289.0, 253.0, 269.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7070981576947253, "mean_inference_ms": 1.2691200903714683, "mean_action_processing_ms": 0.13505852038860622, "mean_env_wait_ms": 0.851625419616148, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 151.0, "episode_reward_mean": 550.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 65.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 275.16}, "hist_stats": {"episode_reward": [573.0, 524.0, 530.0, 524.0, 579.0, 573.0, 582.0, 525.0, 582.0, 573.0, 525.0, 522.0, 525.0, 573.0, 525.0, 530.0, 525.0, 519.0, 582.0, 582.0, 573.0, 582.0, 522.0, 525.0, 570.0, 573.0, 519.0, 584.0, 579.0, 519.0, 579.0, 579.0, 573.0, 579.0, 522.0, 582.0, 570.0, 522.0, 522.0, 527.0, 582.0, 513.0, 516.0, 582.0, 579.0, 582.0, 573.0, 522.0, 579.0, 522.0, 576.0, 573.0, 579.0, 570.0, 525.0, 573.0, 530.0, 576.0, 525.0, 582.0, 570.0, 573.0, 570.0, 582.0, 579.0, 573.0, 519.0, 582.0, 522.0, 576.0, 582.0, 573.0, 151.0, 527.0, 525.0, 573.0, 525.0, 525.0, 522.0, 516.0, 525.0, 582.0, 530.0, 579.0, 530.0, 519.0, 582.0, 522.0, 582.0, 576.0, 525.0, 579.0, 576.0, 525.0, 522.0, 576.0, 579.0, 627.0, 576.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 282.0, 266.0, 258.0, 265.0, 265.0, 268.0, 256.0, 287.0, 292.0, 287.0, 286.0, 287.0, 295.0, 272.0, 253.0, 292.0, 290.0, 285.0, 288.0, 265.0, 260.0, 257.0, 265.0, 271.0, 254.0, 285.0, 288.0, 264.0, 261.0, 265.0, 265.0, 259.0, 266.0, 269.0, 250.0, 290.0, 292.0, 296.0, 286.0, 294.0, 279.0, 294.0, 288.0, 268.0, 254.0, 264.0, 261.0, 286.0, 284.0, 286.0, 287.0, 262.0, 257.0, 292.0, 292.0, 285.0, 294.0, 268.0, 251.0, 286.0, 293.0, 283.0, 296.0, 287.0, 286.0, 286.0, 293.0, 260.0, 262.0, 285.0, 297.0, 286.0, 284.0, 258.0, 264.0, 261.0, 261.0, 260.0, 267.0, 300.0, 282.0, 253.0, 260.0, 251.0, 265.0, 291.0, 291.0, 297.0, 282.0, 289.0, 293.0, 277.0, 296.0, 264.0, 258.0, 295.0, 284.0, 265.0, 257.0, 289.0, 287.0, 284.0, 289.0, 285.0, 294.0, 284.0, 286.0, 264.0, 261.0, 283.0, 290.0, 269.0, 261.0, 293.0, 283.0, 258.0, 267.0, 289.0, 293.0, 294.0, 276.0, 282.0, 291.0, 285.0, 285.0, 288.0, 294.0, 288.0, 291.0, 276.0, 297.0, 258.0, 261.0, 296.0, 286.0, 259.0, 263.0, 284.0, 292.0, 287.0, 295.0, 289.0, 284.0, 86.0, 65.0, 256.0, 271.0, 256.0, 269.0, 289.0, 284.0, 268.0, 257.0, 263.0, 262.0, 269.0, 253.0, 271.0, 245.0, 261.0, 264.0, 287.0, 295.0, 259.0, 271.0, 294.0, 285.0, 266.0, 264.0, 257.0, 262.0, 285.0, 297.0, 261.0, 261.0, 287.0, 295.0, 278.0, 298.0, 263.0, 262.0, 293.0, 286.0, 288.0, 288.0, 256.0, 269.0, 252.0, 270.0, 293.0, 283.0, 295.0, 284.0, 316.0, 311.0, 287.0, 289.0, 253.0, 269.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7070981576947253, "mean_inference_ms": 1.2691200903714683, "mean_action_processing_ms": 0.13505852038860622, "mean_env_wait_ms": 0.851625419616148, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6451200, "num_agent_steps_trained": 6451200, "num_env_steps_sampled": 3225600, "num_env_steps_trained": 3225600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3225600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6451200, "timers": {"training_iteration_time_ms": 3663.845, "learn_time_ms": 1085.247, "learn_throughput": 11794.546, "synch_weights_time_ms": 14.734}, "counters": {"num_env_steps_sampled": 3225600, "num_env_steps_trained": 3225600, "num_agent_steps_sampled": 6451200, "num_agent_steps_trained": 6451200}, "done": false, "episodes_total": 8064, "training_iteration": 252, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-52", "timestamp": 1666581412, "time_this_iter_s": 3.8708066940307617, "time_total_s": 976.2585942745209, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 976.2585942745209, "timesteps_since_restore": 0, "iterations_since_restore": 252, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.3, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 190.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 170.02, "shaped_reward_min": 71, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.72, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.99, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.55, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.68, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 13, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 13, "potting_onion_agent_0_mean": 14.29, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.47, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.63, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.18, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.46, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.29, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.47, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.29, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.47, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0021860229317098856, "policy_loss": -0.0025187181308865547, "vf_loss": 7.718568325042725, "vf_explained_var": 0.5755909085273743, "kl": 0.0020678252913057804, "entropy": 0.8783210515975952, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3238400, "num_env_steps_trained": 3238400, "num_agent_steps_sampled": 6476800, "num_agent_steps_trained": 6476800}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 151.0, "episode_reward_mean": 550.42, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 65.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 275.21}, "custom_metrics": {"sparse_reward_mean": 190.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 170.02, "shaped_reward_min": 71, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.72, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.99, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.55, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.68, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 13, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 13, "potting_onion_agent_0_mean": 14.29, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.47, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.63, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.18, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.46, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.29, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.47, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.29, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.47, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 579.0, 522.0, 582.0, 570.0, 522.0, 522.0, 527.0, 582.0, 513.0, 516.0, 582.0, 579.0, 582.0, 573.0, 522.0, 579.0, 522.0, 576.0, 573.0, 579.0, 570.0, 525.0, 573.0, 530.0, 576.0, 525.0, 582.0, 570.0, 573.0, 570.0, 582.0, 579.0, 573.0, 519.0, 582.0, 522.0, 576.0, 582.0, 573.0, 151.0, 527.0, 525.0, 573.0, 525.0, 525.0, 522.0, 516.0, 525.0, 582.0, 530.0, 579.0, 530.0, 519.0, 582.0, 522.0, 582.0, 576.0, 525.0, 579.0, 576.0, 525.0, 522.0, 576.0, 579.0, 627.0, 576.0, 522.0, 573.0, 516.0, 573.0, 522.0, 570.0, 576.0, 576.0, 576.0, 579.0, 570.0, 525.0, 576.0, 525.0, 579.0, 579.0, 522.0, 573.0, 530.0, 576.0, 579.0, 533.0, 576.0, 522.0, 527.0, 525.0, 573.0, 570.0, 576.0, 510.0, 533.0, 522.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 286.0, 286.0, 293.0, 260.0, 262.0, 285.0, 297.0, 286.0, 284.0, 258.0, 264.0, 261.0, 261.0, 260.0, 267.0, 300.0, 282.0, 253.0, 260.0, 251.0, 265.0, 291.0, 291.0, 297.0, 282.0, 289.0, 293.0, 277.0, 296.0, 264.0, 258.0, 295.0, 284.0, 265.0, 257.0, 289.0, 287.0, 284.0, 289.0, 285.0, 294.0, 284.0, 286.0, 264.0, 261.0, 283.0, 290.0, 269.0, 261.0, 293.0, 283.0, 258.0, 267.0, 289.0, 293.0, 294.0, 276.0, 282.0, 291.0, 285.0, 285.0, 288.0, 294.0, 288.0, 291.0, 276.0, 297.0, 258.0, 261.0, 296.0, 286.0, 259.0, 263.0, 284.0, 292.0, 287.0, 295.0, 289.0, 284.0, 86.0, 65.0, 256.0, 271.0, 256.0, 269.0, 289.0, 284.0, 268.0, 257.0, 263.0, 262.0, 269.0, 253.0, 271.0, 245.0, 261.0, 264.0, 287.0, 295.0, 259.0, 271.0, 294.0, 285.0, 266.0, 264.0, 257.0, 262.0, 285.0, 297.0, 261.0, 261.0, 287.0, 295.0, 278.0, 298.0, 263.0, 262.0, 293.0, 286.0, 288.0, 288.0, 256.0, 269.0, 252.0, 270.0, 293.0, 283.0, 295.0, 284.0, 316.0, 311.0, 287.0, 289.0, 253.0, 269.0, 296.0, 277.0, 279.0, 237.0, 282.0, 291.0, 246.0, 276.0, 287.0, 283.0, 289.0, 287.0, 292.0, 284.0, 289.0, 287.0, 291.0, 288.0, 292.0, 278.0, 266.0, 259.0, 291.0, 285.0, 262.0, 263.0, 281.0, 298.0, 279.0, 300.0, 263.0, 259.0, 288.0, 285.0, 259.0, 271.0, 286.0, 290.0, 286.0, 293.0, 265.0, 268.0, 281.0, 295.0, 250.0, 272.0, 266.0, 261.0, 259.0, 266.0, 284.0, 289.0, 274.0, 296.0, 283.0, 293.0, 251.0, 259.0, 269.0, 264.0, 259.0, 263.0, 264.0, 261.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7070492393693716, "mean_inference_ms": 1.2690477704332812, "mean_action_processing_ms": 0.135043148832932, "mean_env_wait_ms": 0.8514795009637173, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 151.0, "episode_reward_mean": 550.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 65.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 275.21}, "hist_stats": {"episode_reward": [573.0, 579.0, 522.0, 582.0, 570.0, 522.0, 522.0, 527.0, 582.0, 513.0, 516.0, 582.0, 579.0, 582.0, 573.0, 522.0, 579.0, 522.0, 576.0, 573.0, 579.0, 570.0, 525.0, 573.0, 530.0, 576.0, 525.0, 582.0, 570.0, 573.0, 570.0, 582.0, 579.0, 573.0, 519.0, 582.0, 522.0, 576.0, 582.0, 573.0, 151.0, 527.0, 525.0, 573.0, 525.0, 525.0, 522.0, 516.0, 525.0, 582.0, 530.0, 579.0, 530.0, 519.0, 582.0, 522.0, 582.0, 576.0, 525.0, 579.0, 576.0, 525.0, 522.0, 576.0, 579.0, 627.0, 576.0, 522.0, 573.0, 516.0, 573.0, 522.0, 570.0, 576.0, 576.0, 576.0, 579.0, 570.0, 525.0, 576.0, 525.0, 579.0, 579.0, 522.0, 573.0, 530.0, 576.0, 579.0, 533.0, 576.0, 522.0, 527.0, 525.0, 573.0, 570.0, 576.0, 510.0, 533.0, 522.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 286.0, 286.0, 293.0, 260.0, 262.0, 285.0, 297.0, 286.0, 284.0, 258.0, 264.0, 261.0, 261.0, 260.0, 267.0, 300.0, 282.0, 253.0, 260.0, 251.0, 265.0, 291.0, 291.0, 297.0, 282.0, 289.0, 293.0, 277.0, 296.0, 264.0, 258.0, 295.0, 284.0, 265.0, 257.0, 289.0, 287.0, 284.0, 289.0, 285.0, 294.0, 284.0, 286.0, 264.0, 261.0, 283.0, 290.0, 269.0, 261.0, 293.0, 283.0, 258.0, 267.0, 289.0, 293.0, 294.0, 276.0, 282.0, 291.0, 285.0, 285.0, 288.0, 294.0, 288.0, 291.0, 276.0, 297.0, 258.0, 261.0, 296.0, 286.0, 259.0, 263.0, 284.0, 292.0, 287.0, 295.0, 289.0, 284.0, 86.0, 65.0, 256.0, 271.0, 256.0, 269.0, 289.0, 284.0, 268.0, 257.0, 263.0, 262.0, 269.0, 253.0, 271.0, 245.0, 261.0, 264.0, 287.0, 295.0, 259.0, 271.0, 294.0, 285.0, 266.0, 264.0, 257.0, 262.0, 285.0, 297.0, 261.0, 261.0, 287.0, 295.0, 278.0, 298.0, 263.0, 262.0, 293.0, 286.0, 288.0, 288.0, 256.0, 269.0, 252.0, 270.0, 293.0, 283.0, 295.0, 284.0, 316.0, 311.0, 287.0, 289.0, 253.0, 269.0, 296.0, 277.0, 279.0, 237.0, 282.0, 291.0, 246.0, 276.0, 287.0, 283.0, 289.0, 287.0, 292.0, 284.0, 289.0, 287.0, 291.0, 288.0, 292.0, 278.0, 266.0, 259.0, 291.0, 285.0, 262.0, 263.0, 281.0, 298.0, 279.0, 300.0, 263.0, 259.0, 288.0, 285.0, 259.0, 271.0, 286.0, 290.0, 286.0, 293.0, 265.0, 268.0, 281.0, 295.0, 250.0, 272.0, 266.0, 261.0, 259.0, 266.0, 284.0, 289.0, 274.0, 296.0, 283.0, 293.0, 251.0, 259.0, 269.0, 264.0, 259.0, 263.0, 264.0, 261.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7070492393693716, "mean_inference_ms": 1.2690477704332812, "mean_action_processing_ms": 0.135043148832932, "mean_env_wait_ms": 0.8514795009637173, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6476800, "num_agent_steps_trained": 6476800, "num_env_steps_sampled": 3238400, "num_env_steps_trained": 3238400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3238400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6476800, "timers": {"training_iteration_time_ms": 3652.753, "learn_time_ms": 1082.801, "learn_throughput": 11821.191, "synch_weights_time_ms": 14.031}, "counters": {"num_env_steps_sampled": 3238400, "num_env_steps_trained": 3238400, "num_agent_steps_sampled": 6476800, "num_agent_steps_trained": 6476800}, "done": false, "episodes_total": 8096, "training_iteration": 253, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-55", "timestamp": 1666581415, "time_this_iter_s": 3.622879981994629, "time_total_s": 979.8814742565155, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 979.8814742565155, "timesteps_since_restore": 0, "iterations_since_restore": 253, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.76, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 191.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 170.87, "shaped_reward_min": 71, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.35, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.57, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.17, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.25, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 13, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 13, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.01, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.09, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.01, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.92, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.01, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.01, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007734141545370221, "policy_loss": 0.0004366911016404629, "vf_loss": 7.683775424957275, "vf_explained_var": 0.6006571054458618, "kl": 0.002255759434774518, "entropy": 0.8633049130439758, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3251200, "num_env_steps_trained": 3251200, "num_agent_steps_sampled": 6502400, "num_agent_steps_trained": 6502400}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 151.0, "episode_reward_mean": 553.27, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 65.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 276.635}, "custom_metrics": {"sparse_reward_mean": 191.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 170.87, "shaped_reward_min": 71, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.35, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.57, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.17, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.25, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 13, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 13, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.01, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.09, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.01, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.92, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.01, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.01, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 573.0, 519.0, 582.0, 522.0, 576.0, 582.0, 573.0, 151.0, 527.0, 525.0, 573.0, 525.0, 525.0, 522.0, 516.0, 525.0, 582.0, 530.0, 579.0, 530.0, 519.0, 582.0, 522.0, 582.0, 576.0, 525.0, 579.0, 576.0, 525.0, 522.0, 576.0, 579.0, 627.0, 576.0, 522.0, 573.0, 516.0, 573.0, 522.0, 570.0, 576.0, 576.0, 576.0, 579.0, 570.0, 525.0, 576.0, 525.0, 579.0, 579.0, 522.0, 573.0, 530.0, 576.0, 579.0, 533.0, 576.0, 522.0, 527.0, 525.0, 573.0, 570.0, 576.0, 510.0, 533.0, 522.0, 525.0, 582.0, 579.0, 579.0, 573.0, 527.0, 576.0, 573.0, 582.0, 579.0, 525.0, 582.0, 579.0, 576.0, 570.0, 576.0, 567.0, 582.0, 576.0, 579.0, 519.0, 525.0, 576.0, 573.0, 527.0, 525.0, 576.0, 579.0, 573.0, 573.0, 582.0, 570.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 276.0, 297.0, 258.0, 261.0, 296.0, 286.0, 259.0, 263.0, 284.0, 292.0, 287.0, 295.0, 289.0, 284.0, 86.0, 65.0, 256.0, 271.0, 256.0, 269.0, 289.0, 284.0, 268.0, 257.0, 263.0, 262.0, 269.0, 253.0, 271.0, 245.0, 261.0, 264.0, 287.0, 295.0, 259.0, 271.0, 294.0, 285.0, 266.0, 264.0, 257.0, 262.0, 285.0, 297.0, 261.0, 261.0, 287.0, 295.0, 278.0, 298.0, 263.0, 262.0, 293.0, 286.0, 288.0, 288.0, 256.0, 269.0, 252.0, 270.0, 293.0, 283.0, 295.0, 284.0, 316.0, 311.0, 287.0, 289.0, 253.0, 269.0, 296.0, 277.0, 279.0, 237.0, 282.0, 291.0, 246.0, 276.0, 287.0, 283.0, 289.0, 287.0, 292.0, 284.0, 289.0, 287.0, 291.0, 288.0, 292.0, 278.0, 266.0, 259.0, 291.0, 285.0, 262.0, 263.0, 281.0, 298.0, 279.0, 300.0, 263.0, 259.0, 288.0, 285.0, 259.0, 271.0, 286.0, 290.0, 286.0, 293.0, 265.0, 268.0, 281.0, 295.0, 250.0, 272.0, 266.0, 261.0, 259.0, 266.0, 284.0, 289.0, 274.0, 296.0, 283.0, 293.0, 251.0, 259.0, 269.0, 264.0, 259.0, 263.0, 264.0, 261.0, 288.0, 294.0, 299.0, 280.0, 281.0, 298.0, 297.0, 276.0, 260.0, 267.0, 297.0, 279.0, 291.0, 282.0, 288.0, 294.0, 284.0, 295.0, 264.0, 261.0, 291.0, 291.0, 289.0, 290.0, 288.0, 288.0, 279.0, 291.0, 288.0, 288.0, 274.0, 293.0, 289.0, 293.0, 284.0, 292.0, 286.0, 293.0, 258.0, 261.0, 257.0, 268.0, 287.0, 289.0, 279.0, 294.0, 266.0, 261.0, 265.0, 260.0, 295.0, 281.0, 287.0, 292.0, 279.0, 294.0, 288.0, 285.0, 285.0, 297.0, 290.0, 280.0, 283.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.706976358210824, "mean_inference_ms": 1.2689465769288457, "mean_action_processing_ms": 0.13502696676780063, "mean_env_wait_ms": 0.8513285528676952, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 151.0, "episode_reward_mean": 553.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 65.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 276.635}, "hist_stats": {"episode_reward": [579.0, 573.0, 519.0, 582.0, 522.0, 576.0, 582.0, 573.0, 151.0, 527.0, 525.0, 573.0, 525.0, 525.0, 522.0, 516.0, 525.0, 582.0, 530.0, 579.0, 530.0, 519.0, 582.0, 522.0, 582.0, 576.0, 525.0, 579.0, 576.0, 525.0, 522.0, 576.0, 579.0, 627.0, 576.0, 522.0, 573.0, 516.0, 573.0, 522.0, 570.0, 576.0, 576.0, 576.0, 579.0, 570.0, 525.0, 576.0, 525.0, 579.0, 579.0, 522.0, 573.0, 530.0, 576.0, 579.0, 533.0, 576.0, 522.0, 527.0, 525.0, 573.0, 570.0, 576.0, 510.0, 533.0, 522.0, 525.0, 582.0, 579.0, 579.0, 573.0, 527.0, 576.0, 573.0, 582.0, 579.0, 525.0, 582.0, 579.0, 576.0, 570.0, 576.0, 567.0, 582.0, 576.0, 579.0, 519.0, 525.0, 576.0, 573.0, 527.0, 525.0, 576.0, 579.0, 573.0, 573.0, 582.0, 570.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 276.0, 297.0, 258.0, 261.0, 296.0, 286.0, 259.0, 263.0, 284.0, 292.0, 287.0, 295.0, 289.0, 284.0, 86.0, 65.0, 256.0, 271.0, 256.0, 269.0, 289.0, 284.0, 268.0, 257.0, 263.0, 262.0, 269.0, 253.0, 271.0, 245.0, 261.0, 264.0, 287.0, 295.0, 259.0, 271.0, 294.0, 285.0, 266.0, 264.0, 257.0, 262.0, 285.0, 297.0, 261.0, 261.0, 287.0, 295.0, 278.0, 298.0, 263.0, 262.0, 293.0, 286.0, 288.0, 288.0, 256.0, 269.0, 252.0, 270.0, 293.0, 283.0, 295.0, 284.0, 316.0, 311.0, 287.0, 289.0, 253.0, 269.0, 296.0, 277.0, 279.0, 237.0, 282.0, 291.0, 246.0, 276.0, 287.0, 283.0, 289.0, 287.0, 292.0, 284.0, 289.0, 287.0, 291.0, 288.0, 292.0, 278.0, 266.0, 259.0, 291.0, 285.0, 262.0, 263.0, 281.0, 298.0, 279.0, 300.0, 263.0, 259.0, 288.0, 285.0, 259.0, 271.0, 286.0, 290.0, 286.0, 293.0, 265.0, 268.0, 281.0, 295.0, 250.0, 272.0, 266.0, 261.0, 259.0, 266.0, 284.0, 289.0, 274.0, 296.0, 283.0, 293.0, 251.0, 259.0, 269.0, 264.0, 259.0, 263.0, 264.0, 261.0, 288.0, 294.0, 299.0, 280.0, 281.0, 298.0, 297.0, 276.0, 260.0, 267.0, 297.0, 279.0, 291.0, 282.0, 288.0, 294.0, 284.0, 295.0, 264.0, 261.0, 291.0, 291.0, 289.0, 290.0, 288.0, 288.0, 279.0, 291.0, 288.0, 288.0, 274.0, 293.0, 289.0, 293.0, 284.0, 292.0, 286.0, 293.0, 258.0, 261.0, 257.0, 268.0, 287.0, 289.0, 279.0, 294.0, 266.0, 261.0, 265.0, 260.0, 295.0, 281.0, 287.0, 292.0, 279.0, 294.0, 288.0, 285.0, 285.0, 297.0, 290.0, 280.0, 283.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.706976358210824, "mean_inference_ms": 1.2689465769288457, "mean_action_processing_ms": 0.13502696676780063, "mean_env_wait_ms": 0.8513285528676952, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6502400, "num_agent_steps_trained": 6502400, "num_env_steps_sampled": 3251200, "num_env_steps_trained": 3251200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3251200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6502400, "timers": {"training_iteration_time_ms": 3637.223, "learn_time_ms": 1078.626, "learn_throughput": 11866.953, "synch_weights_time_ms": 12.602}, "counters": {"num_env_steps_sampled": 3251200, "num_env_steps_trained": 3251200, "num_agent_steps_sampled": 6502400, "num_agent_steps_trained": 6502400}, "done": false, "episodes_total": 8128, "training_iteration": 254, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-59", "timestamp": 1666581419, "time_this_iter_s": 3.5743308067321777, "time_total_s": 983.4558050632477, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 983.4558050632477, "timesteps_since_restore": 0, "iterations_since_restore": 254, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.060000000000002, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 172.78, "shaped_reward_min": 150, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.36, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.81, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.2, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.59, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.39, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.47, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.71, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.08, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.39, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.39, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0007592239417135715, "policy_loss": -0.001087638782337308, "vf_loss": 7.597389221191406, "vf_explained_var": 0.5985276699066162, "kl": 0.0018774853087961674, "entropy": 0.8626449108123779, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3264000, "num_env_steps_trained": 3264000, "num_agent_steps_sampled": 6528000, "num_agent_steps_trained": 6528000}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 510.0, "episode_reward_mean": 561.18, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 280.59}, "custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 172.78, "shaped_reward_min": 150, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.36, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.81, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.2, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.59, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.39, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.47, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.71, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.08, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.39, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.39, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 627.0, 576.0, 522.0, 573.0, 516.0, 573.0, 522.0, 570.0, 576.0, 576.0, 576.0, 579.0, 570.0, 525.0, 576.0, 525.0, 579.0, 579.0, 522.0, 573.0, 530.0, 576.0, 579.0, 533.0, 576.0, 522.0, 527.0, 525.0, 573.0, 570.0, 576.0, 510.0, 533.0, 522.0, 525.0, 582.0, 579.0, 579.0, 573.0, 527.0, 576.0, 573.0, 582.0, 579.0, 525.0, 582.0, 579.0, 576.0, 570.0, 576.0, 567.0, 582.0, 576.0, 579.0, 519.0, 525.0, 576.0, 573.0, 527.0, 525.0, 576.0, 579.0, 573.0, 573.0, 582.0, 570.0, 576.0, 573.0, 519.0, 579.0, 579.0, 570.0, 579.0, 525.0, 576.0, 519.0, 573.0, 576.0, 584.0, 582.0, 570.0, 576.0, 525.0, 579.0, 576.0, 579.0, 587.0, 527.0, 576.0, 579.0, 579.0, 576.0, 525.0, 579.0, 522.0, 579.0, 522.0, 522.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 284.0, 316.0, 311.0, 287.0, 289.0, 253.0, 269.0, 296.0, 277.0, 279.0, 237.0, 282.0, 291.0, 246.0, 276.0, 287.0, 283.0, 289.0, 287.0, 292.0, 284.0, 289.0, 287.0, 291.0, 288.0, 292.0, 278.0, 266.0, 259.0, 291.0, 285.0, 262.0, 263.0, 281.0, 298.0, 279.0, 300.0, 263.0, 259.0, 288.0, 285.0, 259.0, 271.0, 286.0, 290.0, 286.0, 293.0, 265.0, 268.0, 281.0, 295.0, 250.0, 272.0, 266.0, 261.0, 259.0, 266.0, 284.0, 289.0, 274.0, 296.0, 283.0, 293.0, 251.0, 259.0, 269.0, 264.0, 259.0, 263.0, 264.0, 261.0, 288.0, 294.0, 299.0, 280.0, 281.0, 298.0, 297.0, 276.0, 260.0, 267.0, 297.0, 279.0, 291.0, 282.0, 288.0, 294.0, 284.0, 295.0, 264.0, 261.0, 291.0, 291.0, 289.0, 290.0, 288.0, 288.0, 279.0, 291.0, 288.0, 288.0, 274.0, 293.0, 289.0, 293.0, 284.0, 292.0, 286.0, 293.0, 258.0, 261.0, 257.0, 268.0, 287.0, 289.0, 279.0, 294.0, 266.0, 261.0, 265.0, 260.0, 295.0, 281.0, 287.0, 292.0, 279.0, 294.0, 288.0, 285.0, 285.0, 297.0, 290.0, 280.0, 283.0, 293.0, 290.0, 283.0, 260.0, 259.0, 282.0, 297.0, 290.0, 289.0, 282.0, 288.0, 285.0, 294.0, 262.0, 263.0, 285.0, 291.0, 249.0, 270.0, 280.0, 293.0, 287.0, 289.0, 302.0, 282.0, 296.0, 286.0, 283.0, 287.0, 277.0, 299.0, 263.0, 262.0, 284.0, 295.0, 288.0, 288.0, 288.0, 291.0, 293.0, 294.0, 266.0, 261.0, 287.0, 289.0, 292.0, 287.0, 290.0, 289.0, 291.0, 285.0, 263.0, 262.0, 301.0, 278.0, 257.0, 265.0, 283.0, 296.0, 263.0, 259.0, 254.0, 268.0, 295.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7068609015084573, "mean_inference_ms": 1.26862164727497, "mean_action_processing_ms": 0.13501117923771774, "mean_env_wait_ms": 0.8511349115150145, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 510.0, "episode_reward_mean": 561.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 280.59}, "hist_stats": {"episode_reward": [579.0, 627.0, 576.0, 522.0, 573.0, 516.0, 573.0, 522.0, 570.0, 576.0, 576.0, 576.0, 579.0, 570.0, 525.0, 576.0, 525.0, 579.0, 579.0, 522.0, 573.0, 530.0, 576.0, 579.0, 533.0, 576.0, 522.0, 527.0, 525.0, 573.0, 570.0, 576.0, 510.0, 533.0, 522.0, 525.0, 582.0, 579.0, 579.0, 573.0, 527.0, 576.0, 573.0, 582.0, 579.0, 525.0, 582.0, 579.0, 576.0, 570.0, 576.0, 567.0, 582.0, 576.0, 579.0, 519.0, 525.0, 576.0, 573.0, 527.0, 525.0, 576.0, 579.0, 573.0, 573.0, 582.0, 570.0, 576.0, 573.0, 519.0, 579.0, 579.0, 570.0, 579.0, 525.0, 576.0, 519.0, 573.0, 576.0, 584.0, 582.0, 570.0, 576.0, 525.0, 579.0, 576.0, 579.0, 587.0, 527.0, 576.0, 579.0, 579.0, 576.0, 525.0, 579.0, 522.0, 579.0, 522.0, 522.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 284.0, 316.0, 311.0, 287.0, 289.0, 253.0, 269.0, 296.0, 277.0, 279.0, 237.0, 282.0, 291.0, 246.0, 276.0, 287.0, 283.0, 289.0, 287.0, 292.0, 284.0, 289.0, 287.0, 291.0, 288.0, 292.0, 278.0, 266.0, 259.0, 291.0, 285.0, 262.0, 263.0, 281.0, 298.0, 279.0, 300.0, 263.0, 259.0, 288.0, 285.0, 259.0, 271.0, 286.0, 290.0, 286.0, 293.0, 265.0, 268.0, 281.0, 295.0, 250.0, 272.0, 266.0, 261.0, 259.0, 266.0, 284.0, 289.0, 274.0, 296.0, 283.0, 293.0, 251.0, 259.0, 269.0, 264.0, 259.0, 263.0, 264.0, 261.0, 288.0, 294.0, 299.0, 280.0, 281.0, 298.0, 297.0, 276.0, 260.0, 267.0, 297.0, 279.0, 291.0, 282.0, 288.0, 294.0, 284.0, 295.0, 264.0, 261.0, 291.0, 291.0, 289.0, 290.0, 288.0, 288.0, 279.0, 291.0, 288.0, 288.0, 274.0, 293.0, 289.0, 293.0, 284.0, 292.0, 286.0, 293.0, 258.0, 261.0, 257.0, 268.0, 287.0, 289.0, 279.0, 294.0, 266.0, 261.0, 265.0, 260.0, 295.0, 281.0, 287.0, 292.0, 279.0, 294.0, 288.0, 285.0, 285.0, 297.0, 290.0, 280.0, 283.0, 293.0, 290.0, 283.0, 260.0, 259.0, 282.0, 297.0, 290.0, 289.0, 282.0, 288.0, 285.0, 294.0, 262.0, 263.0, 285.0, 291.0, 249.0, 270.0, 280.0, 293.0, 287.0, 289.0, 302.0, 282.0, 296.0, 286.0, 283.0, 287.0, 277.0, 299.0, 263.0, 262.0, 284.0, 295.0, 288.0, 288.0, 288.0, 291.0, 293.0, 294.0, 266.0, 261.0, 287.0, 289.0, 292.0, 287.0, 290.0, 289.0, 291.0, 285.0, 263.0, 262.0, 301.0, 278.0, 257.0, 265.0, 283.0, 296.0, 263.0, 259.0, 254.0, 268.0, 295.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7068609015084573, "mean_inference_ms": 1.26862164727497, "mean_action_processing_ms": 0.13501117923771774, "mean_env_wait_ms": 0.8511349115150145, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6528000, "num_agent_steps_trained": 6528000, "num_env_steps_sampled": 3264000, "num_env_steps_trained": 3264000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3264000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6528000, "timers": {"training_iteration_time_ms": 3637.109, "learn_time_ms": 1085.381, "learn_throughput": 11793.093, "synch_weights_time_ms": 12.557}, "counters": {"num_env_steps_sampled": 3264000, "num_env_steps_trained": 3264000, "num_agent_steps_sampled": 6528000, "num_agent_steps_trained": 6528000}, "done": false, "episodes_total": 8160, "training_iteration": 255, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-03", "timestamp": 1666581423, "time_this_iter_s": 3.6406946182250977, "time_total_s": 987.0964996814728, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 987.0964996814728, "timesteps_since_restore": 0, "iterations_since_restore": 255, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.316666666666666, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 195.2, "sparse_reward_min": 180, "sparse_reward_max": 200, "shaped_reward_mean": 173.56, "shaped_reward_min": 150, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.56, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.63, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.39, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.41, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.06, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.27, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.14, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.06, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.27, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.06, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.27, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0021585412323474884, "policy_loss": -0.0025006746873259544, "vf_loss": 7.743043899536133, "vf_explained_var": 0.5868951082229614, "kl": 0.0020559565164148808, "entropy": 0.8643389344215393, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3276800, "num_env_steps_trained": 3276800, "num_agent_steps_sampled": 6553600, "num_agent_steps_trained": 6553600}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 510.0, "episode_reward_mean": 563.96, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 281.98}, "custom_metrics": {"sparse_reward_mean": 195.2, "sparse_reward_min": 180, "sparse_reward_max": 200, "shaped_reward_mean": 173.56, "shaped_reward_min": 150, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.56, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.63, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.39, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.41, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.06, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.27, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.14, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.06, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.27, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.06, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.27, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [510.0, 533.0, 522.0, 525.0, 582.0, 579.0, 579.0, 573.0, 527.0, 576.0, 573.0, 582.0, 579.0, 525.0, 582.0, 579.0, 576.0, 570.0, 576.0, 567.0, 582.0, 576.0, 579.0, 519.0, 525.0, 576.0, 573.0, 527.0, 525.0, 576.0, 579.0, 573.0, 573.0, 582.0, 570.0, 576.0, 573.0, 519.0, 579.0, 579.0, 570.0, 579.0, 525.0, 576.0, 519.0, 573.0, 576.0, 584.0, 582.0, 570.0, 576.0, 525.0, 579.0, 576.0, 579.0, 587.0, 527.0, 576.0, 579.0, 579.0, 576.0, 525.0, 579.0, 522.0, 579.0, 522.0, 522.0, 579.0, 525.0, 582.0, 579.0, 573.0, 524.0, 570.0, 576.0, 576.0, 576.0, 579.0, 576.0, 573.0, 579.0, 576.0, 579.0, 582.0, 573.0, 522.0, 584.0, 576.0, 570.0, 573.0, 530.0, 576.0, 573.0, 570.0, 573.0, 576.0, 576.0, 530.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [251.0, 259.0, 269.0, 264.0, 259.0, 263.0, 264.0, 261.0, 288.0, 294.0, 299.0, 280.0, 281.0, 298.0, 297.0, 276.0, 260.0, 267.0, 297.0, 279.0, 291.0, 282.0, 288.0, 294.0, 284.0, 295.0, 264.0, 261.0, 291.0, 291.0, 289.0, 290.0, 288.0, 288.0, 279.0, 291.0, 288.0, 288.0, 274.0, 293.0, 289.0, 293.0, 284.0, 292.0, 286.0, 293.0, 258.0, 261.0, 257.0, 268.0, 287.0, 289.0, 279.0, 294.0, 266.0, 261.0, 265.0, 260.0, 295.0, 281.0, 287.0, 292.0, 279.0, 294.0, 288.0, 285.0, 285.0, 297.0, 290.0, 280.0, 283.0, 293.0, 290.0, 283.0, 260.0, 259.0, 282.0, 297.0, 290.0, 289.0, 282.0, 288.0, 285.0, 294.0, 262.0, 263.0, 285.0, 291.0, 249.0, 270.0, 280.0, 293.0, 287.0, 289.0, 302.0, 282.0, 296.0, 286.0, 283.0, 287.0, 277.0, 299.0, 263.0, 262.0, 284.0, 295.0, 288.0, 288.0, 288.0, 291.0, 293.0, 294.0, 266.0, 261.0, 287.0, 289.0, 292.0, 287.0, 290.0, 289.0, 291.0, 285.0, 263.0, 262.0, 301.0, 278.0, 257.0, 265.0, 283.0, 296.0, 263.0, 259.0, 254.0, 268.0, 295.0, 284.0, 264.0, 261.0, 296.0, 286.0, 291.0, 288.0, 282.0, 291.0, 256.0, 268.0, 286.0, 284.0, 283.0, 293.0, 294.0, 282.0, 292.0, 284.0, 283.0, 296.0, 283.0, 293.0, 290.0, 283.0, 293.0, 286.0, 285.0, 291.0, 297.0, 282.0, 285.0, 297.0, 280.0, 293.0, 264.0, 258.0, 297.0, 287.0, 292.0, 284.0, 280.0, 290.0, 290.0, 283.0, 262.0, 268.0, 274.0, 302.0, 298.0, 275.0, 278.0, 292.0, 289.0, 284.0, 283.0, 293.0, 287.0, 289.0, 257.0, 273.0, 287.0, 292.0, 295.0, 278.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7067256217430281, "mean_inference_ms": 1.2682765239266274, "mean_action_processing_ms": 0.13499352668107936, "mean_env_wait_ms": 0.8509188370516406, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 510.0, "episode_reward_mean": 563.96, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 281.98}, "hist_stats": {"episode_reward": [510.0, 533.0, 522.0, 525.0, 582.0, 579.0, 579.0, 573.0, 527.0, 576.0, 573.0, 582.0, 579.0, 525.0, 582.0, 579.0, 576.0, 570.0, 576.0, 567.0, 582.0, 576.0, 579.0, 519.0, 525.0, 576.0, 573.0, 527.0, 525.0, 576.0, 579.0, 573.0, 573.0, 582.0, 570.0, 576.0, 573.0, 519.0, 579.0, 579.0, 570.0, 579.0, 525.0, 576.0, 519.0, 573.0, 576.0, 584.0, 582.0, 570.0, 576.0, 525.0, 579.0, 576.0, 579.0, 587.0, 527.0, 576.0, 579.0, 579.0, 576.0, 525.0, 579.0, 522.0, 579.0, 522.0, 522.0, 579.0, 525.0, 582.0, 579.0, 573.0, 524.0, 570.0, 576.0, 576.0, 576.0, 579.0, 576.0, 573.0, 579.0, 576.0, 579.0, 582.0, 573.0, 522.0, 584.0, 576.0, 570.0, 573.0, 530.0, 576.0, 573.0, 570.0, 573.0, 576.0, 576.0, 530.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [251.0, 259.0, 269.0, 264.0, 259.0, 263.0, 264.0, 261.0, 288.0, 294.0, 299.0, 280.0, 281.0, 298.0, 297.0, 276.0, 260.0, 267.0, 297.0, 279.0, 291.0, 282.0, 288.0, 294.0, 284.0, 295.0, 264.0, 261.0, 291.0, 291.0, 289.0, 290.0, 288.0, 288.0, 279.0, 291.0, 288.0, 288.0, 274.0, 293.0, 289.0, 293.0, 284.0, 292.0, 286.0, 293.0, 258.0, 261.0, 257.0, 268.0, 287.0, 289.0, 279.0, 294.0, 266.0, 261.0, 265.0, 260.0, 295.0, 281.0, 287.0, 292.0, 279.0, 294.0, 288.0, 285.0, 285.0, 297.0, 290.0, 280.0, 283.0, 293.0, 290.0, 283.0, 260.0, 259.0, 282.0, 297.0, 290.0, 289.0, 282.0, 288.0, 285.0, 294.0, 262.0, 263.0, 285.0, 291.0, 249.0, 270.0, 280.0, 293.0, 287.0, 289.0, 302.0, 282.0, 296.0, 286.0, 283.0, 287.0, 277.0, 299.0, 263.0, 262.0, 284.0, 295.0, 288.0, 288.0, 288.0, 291.0, 293.0, 294.0, 266.0, 261.0, 287.0, 289.0, 292.0, 287.0, 290.0, 289.0, 291.0, 285.0, 263.0, 262.0, 301.0, 278.0, 257.0, 265.0, 283.0, 296.0, 263.0, 259.0, 254.0, 268.0, 295.0, 284.0, 264.0, 261.0, 296.0, 286.0, 291.0, 288.0, 282.0, 291.0, 256.0, 268.0, 286.0, 284.0, 283.0, 293.0, 294.0, 282.0, 292.0, 284.0, 283.0, 296.0, 283.0, 293.0, 290.0, 283.0, 293.0, 286.0, 285.0, 291.0, 297.0, 282.0, 285.0, 297.0, 280.0, 293.0, 264.0, 258.0, 297.0, 287.0, 292.0, 284.0, 280.0, 290.0, 290.0, 283.0, 262.0, 268.0, 274.0, 302.0, 298.0, 275.0, 278.0, 292.0, 289.0, 284.0, 283.0, 293.0, 287.0, 289.0, 257.0, 273.0, 287.0, 292.0, 295.0, 278.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7067256217430281, "mean_inference_ms": 1.2682765239266274, "mean_action_processing_ms": 0.13499352668107936, "mean_env_wait_ms": 0.8509188370516406, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6553600, "num_agent_steps_trained": 6553600, "num_env_steps_sampled": 3276800, "num_env_steps_trained": 3276800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3276800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6553600, "timers": {"training_iteration_time_ms": 3632.587, "learn_time_ms": 1086.132, "learn_throughput": 11784.934, "synch_weights_time_ms": 11.912}, "counters": {"num_env_steps_sampled": 3276800, "num_env_steps_trained": 3276800, "num_agent_steps_sampled": 6553600, "num_agent_steps_trained": 6553600}, "done": false, "episodes_total": 8192, "training_iteration": 256, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-07", "timestamp": 1666581427, "time_this_iter_s": 3.608936071395874, "time_total_s": 990.7054357528687, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 990.7054357528687, "timesteps_since_restore": 0, "iterations_since_restore": 256, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.419999999999998, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 174.8, "shaped_reward_min": 156, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.4, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.92, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.25, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.69, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.95, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.59, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.58, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.2, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.71, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.19, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.95, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.59, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.95, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.59, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0006072861724533141, "policy_loss": -0.0009418814443051815, "vf_loss": 7.661257743835449, "vf_explained_var": 0.5704012513160706, "kl": 0.001968295779079199, "entropy": 0.8630570769309998, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3289600, "num_env_steps_trained": 3289600, "num_agent_steps_sampled": 6579200, "num_agent_steps_trained": 6579200}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 516.0, "episode_reward_mean": 567.6, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 283.8}, "custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 174.8, "shaped_reward_min": 156, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.4, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.92, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.25, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.69, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.95, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.59, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.58, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.2, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.71, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.19, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.95, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.59, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.95, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.59, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 582.0, 570.0, 576.0, 573.0, 519.0, 579.0, 579.0, 570.0, 579.0, 525.0, 576.0, 519.0, 573.0, 576.0, 584.0, 582.0, 570.0, 576.0, 525.0, 579.0, 576.0, 579.0, 587.0, 527.0, 576.0, 579.0, 579.0, 576.0, 525.0, 579.0, 522.0, 579.0, 522.0, 522.0, 579.0, 525.0, 582.0, 579.0, 573.0, 524.0, 570.0, 576.0, 576.0, 576.0, 579.0, 576.0, 573.0, 579.0, 576.0, 579.0, 582.0, 573.0, 522.0, 584.0, 576.0, 570.0, 573.0, 530.0, 576.0, 573.0, 570.0, 573.0, 576.0, 576.0, 530.0, 579.0, 573.0, 579.0, 582.0, 573.0, 582.0, 582.0, 582.0, 525.0, 579.0, 573.0, 573.0, 582.0, 576.0, 579.0, 576.0, 522.0, 582.0, 582.0, 582.0, 573.0, 582.0, 570.0, 525.0, 576.0, 579.0, 541.0, 582.0, 573.0, 633.0, 570.0, 576.0, 582.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 285.0, 285.0, 297.0, 290.0, 280.0, 283.0, 293.0, 290.0, 283.0, 260.0, 259.0, 282.0, 297.0, 290.0, 289.0, 282.0, 288.0, 285.0, 294.0, 262.0, 263.0, 285.0, 291.0, 249.0, 270.0, 280.0, 293.0, 287.0, 289.0, 302.0, 282.0, 296.0, 286.0, 283.0, 287.0, 277.0, 299.0, 263.0, 262.0, 284.0, 295.0, 288.0, 288.0, 288.0, 291.0, 293.0, 294.0, 266.0, 261.0, 287.0, 289.0, 292.0, 287.0, 290.0, 289.0, 291.0, 285.0, 263.0, 262.0, 301.0, 278.0, 257.0, 265.0, 283.0, 296.0, 263.0, 259.0, 254.0, 268.0, 295.0, 284.0, 264.0, 261.0, 296.0, 286.0, 291.0, 288.0, 282.0, 291.0, 256.0, 268.0, 286.0, 284.0, 283.0, 293.0, 294.0, 282.0, 292.0, 284.0, 283.0, 296.0, 283.0, 293.0, 290.0, 283.0, 293.0, 286.0, 285.0, 291.0, 297.0, 282.0, 285.0, 297.0, 280.0, 293.0, 264.0, 258.0, 297.0, 287.0, 292.0, 284.0, 280.0, 290.0, 290.0, 283.0, 262.0, 268.0, 274.0, 302.0, 298.0, 275.0, 278.0, 292.0, 289.0, 284.0, 283.0, 293.0, 287.0, 289.0, 257.0, 273.0, 287.0, 292.0, 295.0, 278.0, 286.0, 293.0, 292.0, 290.0, 280.0, 293.0, 289.0, 293.0, 285.0, 297.0, 288.0, 294.0, 259.0, 266.0, 285.0, 294.0, 282.0, 291.0, 288.0, 285.0, 290.0, 292.0, 289.0, 287.0, 289.0, 290.0, 294.0, 282.0, 268.0, 254.0, 286.0, 296.0, 288.0, 294.0, 293.0, 289.0, 290.0, 283.0, 294.0, 288.0, 288.0, 282.0, 261.0, 264.0, 289.0, 287.0, 285.0, 294.0, 282.0, 259.0, 291.0, 291.0, 286.0, 287.0, 309.0, 324.0, 280.0, 290.0, 291.0, 285.0, 293.0, 289.0, 261.0, 255.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7066194733142604, "mean_inference_ms": 1.2679197114805454, "mean_action_processing_ms": 0.1349735798694533, "mean_env_wait_ms": 0.8506980210996916, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 516.0, "episode_reward_mean": 567.6, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 283.8}, "hist_stats": {"episode_reward": [573.0, 582.0, 570.0, 576.0, 573.0, 519.0, 579.0, 579.0, 570.0, 579.0, 525.0, 576.0, 519.0, 573.0, 576.0, 584.0, 582.0, 570.0, 576.0, 525.0, 579.0, 576.0, 579.0, 587.0, 527.0, 576.0, 579.0, 579.0, 576.0, 525.0, 579.0, 522.0, 579.0, 522.0, 522.0, 579.0, 525.0, 582.0, 579.0, 573.0, 524.0, 570.0, 576.0, 576.0, 576.0, 579.0, 576.0, 573.0, 579.0, 576.0, 579.0, 582.0, 573.0, 522.0, 584.0, 576.0, 570.0, 573.0, 530.0, 576.0, 573.0, 570.0, 573.0, 576.0, 576.0, 530.0, 579.0, 573.0, 579.0, 582.0, 573.0, 582.0, 582.0, 582.0, 525.0, 579.0, 573.0, 573.0, 582.0, 576.0, 579.0, 576.0, 522.0, 582.0, 582.0, 582.0, 573.0, 582.0, 570.0, 525.0, 576.0, 579.0, 541.0, 582.0, 573.0, 633.0, 570.0, 576.0, 582.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 285.0, 285.0, 297.0, 290.0, 280.0, 283.0, 293.0, 290.0, 283.0, 260.0, 259.0, 282.0, 297.0, 290.0, 289.0, 282.0, 288.0, 285.0, 294.0, 262.0, 263.0, 285.0, 291.0, 249.0, 270.0, 280.0, 293.0, 287.0, 289.0, 302.0, 282.0, 296.0, 286.0, 283.0, 287.0, 277.0, 299.0, 263.0, 262.0, 284.0, 295.0, 288.0, 288.0, 288.0, 291.0, 293.0, 294.0, 266.0, 261.0, 287.0, 289.0, 292.0, 287.0, 290.0, 289.0, 291.0, 285.0, 263.0, 262.0, 301.0, 278.0, 257.0, 265.0, 283.0, 296.0, 263.0, 259.0, 254.0, 268.0, 295.0, 284.0, 264.0, 261.0, 296.0, 286.0, 291.0, 288.0, 282.0, 291.0, 256.0, 268.0, 286.0, 284.0, 283.0, 293.0, 294.0, 282.0, 292.0, 284.0, 283.0, 296.0, 283.0, 293.0, 290.0, 283.0, 293.0, 286.0, 285.0, 291.0, 297.0, 282.0, 285.0, 297.0, 280.0, 293.0, 264.0, 258.0, 297.0, 287.0, 292.0, 284.0, 280.0, 290.0, 290.0, 283.0, 262.0, 268.0, 274.0, 302.0, 298.0, 275.0, 278.0, 292.0, 289.0, 284.0, 283.0, 293.0, 287.0, 289.0, 257.0, 273.0, 287.0, 292.0, 295.0, 278.0, 286.0, 293.0, 292.0, 290.0, 280.0, 293.0, 289.0, 293.0, 285.0, 297.0, 288.0, 294.0, 259.0, 266.0, 285.0, 294.0, 282.0, 291.0, 288.0, 285.0, 290.0, 292.0, 289.0, 287.0, 289.0, 290.0, 294.0, 282.0, 268.0, 254.0, 286.0, 296.0, 288.0, 294.0, 293.0, 289.0, 290.0, 283.0, 294.0, 288.0, 288.0, 282.0, 261.0, 264.0, 289.0, 287.0, 285.0, 294.0, 282.0, 259.0, 291.0, 291.0, 286.0, 287.0, 309.0, 324.0, 280.0, 290.0, 291.0, 285.0, 293.0, 289.0, 261.0, 255.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7066194733142604, "mean_inference_ms": 1.2679197114805454, "mean_action_processing_ms": 0.1349735798694533, "mean_env_wait_ms": 0.8506980210996916, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6579200, "num_agent_steps_trained": 6579200, "num_env_steps_sampled": 3289600, "num_env_steps_trained": 3289600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3289600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6579200, "timers": {"training_iteration_time_ms": 3627.519, "learn_time_ms": 1083.827, "learn_throughput": 11810.001, "synch_weights_time_ms": 12.02}, "counters": {"num_env_steps_sampled": 3289600, "num_env_steps_trained": 3289600, "num_agent_steps_sampled": 6579200, "num_agent_steps_trained": 6579200}, "done": false, "episodes_total": 8224, "training_iteration": 257, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-11", "timestamp": 1666581431, "time_this_iter_s": 3.631376266479492, "time_total_s": 994.3368120193481, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 994.3368120193481, "timesteps_since_restore": 0, "iterations_since_restore": 257, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.880000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 195.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 173.66, "shaped_reward_min": 96, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.45, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.77, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.32, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.51, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.96, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.38, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.56, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.23, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.24, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.1, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.96, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.38, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.96, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.38, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00214711157605052, "policy_loss": -0.002498897723853588, "vf_loss": 7.818296432495117, "vf_explained_var": 0.5837655663490295, "kl": 0.002699965611100197, "entropy": 0.86008620262146, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3302400, "num_env_steps_trained": 3302400, "num_agent_steps_sampled": 6604800, "num_agent_steps_trained": 6604800}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 296.0, "episode_reward_mean": 563.66, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 142.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 281.83}, "custom_metrics": {"sparse_reward_mean": 195.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 173.66, "shaped_reward_min": 96, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.45, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.77, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.32, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.51, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.96, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.38, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.56, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.23, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.24, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.1, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.96, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.38, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.96, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.38, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 522.0, 522.0, 579.0, 525.0, 582.0, 579.0, 573.0, 524.0, 570.0, 576.0, 576.0, 576.0, 579.0, 576.0, 573.0, 579.0, 576.0, 579.0, 582.0, 573.0, 522.0, 584.0, 576.0, 570.0, 573.0, 530.0, 576.0, 573.0, 570.0, 573.0, 576.0, 576.0, 530.0, 579.0, 573.0, 579.0, 582.0, 573.0, 582.0, 582.0, 582.0, 525.0, 579.0, 573.0, 573.0, 582.0, 576.0, 579.0, 576.0, 522.0, 582.0, 582.0, 582.0, 573.0, 582.0, 570.0, 525.0, 576.0, 579.0, 541.0, 582.0, 573.0, 633.0, 570.0, 576.0, 582.0, 516.0, 587.0, 536.0, 567.0, 573.0, 296.0, 582.0, 576.0, 582.0, 525.0, 576.0, 576.0, 579.0, 465.0, 582.0, 522.0, 564.0, 576.0, 630.0, 525.0, 570.0, 522.0, 579.0, 533.0, 576.0, 522.0, 570.0, 579.0, 576.0, 579.0, 522.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 263.0, 259.0, 254.0, 268.0, 295.0, 284.0, 264.0, 261.0, 296.0, 286.0, 291.0, 288.0, 282.0, 291.0, 256.0, 268.0, 286.0, 284.0, 283.0, 293.0, 294.0, 282.0, 292.0, 284.0, 283.0, 296.0, 283.0, 293.0, 290.0, 283.0, 293.0, 286.0, 285.0, 291.0, 297.0, 282.0, 285.0, 297.0, 280.0, 293.0, 264.0, 258.0, 297.0, 287.0, 292.0, 284.0, 280.0, 290.0, 290.0, 283.0, 262.0, 268.0, 274.0, 302.0, 298.0, 275.0, 278.0, 292.0, 289.0, 284.0, 283.0, 293.0, 287.0, 289.0, 257.0, 273.0, 287.0, 292.0, 295.0, 278.0, 286.0, 293.0, 292.0, 290.0, 280.0, 293.0, 289.0, 293.0, 285.0, 297.0, 288.0, 294.0, 259.0, 266.0, 285.0, 294.0, 282.0, 291.0, 288.0, 285.0, 290.0, 292.0, 289.0, 287.0, 289.0, 290.0, 294.0, 282.0, 268.0, 254.0, 286.0, 296.0, 288.0, 294.0, 293.0, 289.0, 290.0, 283.0, 294.0, 288.0, 288.0, 282.0, 261.0, 264.0, 289.0, 287.0, 285.0, 294.0, 282.0, 259.0, 291.0, 291.0, 286.0, 287.0, 309.0, 324.0, 280.0, 290.0, 291.0, 285.0, 293.0, 289.0, 261.0, 255.0, 291.0, 296.0, 260.0, 276.0, 280.0, 287.0, 295.0, 278.0, 142.0, 154.0, 295.0, 287.0, 283.0, 293.0, 298.0, 284.0, 254.0, 271.0, 278.0, 298.0, 294.0, 282.0, 289.0, 290.0, 231.0, 234.0, 296.0, 286.0, 262.0, 260.0, 294.0, 270.0, 291.0, 285.0, 313.0, 317.0, 262.0, 263.0, 279.0, 291.0, 252.0, 270.0, 282.0, 297.0, 270.0, 263.0, 293.0, 283.0, 253.0, 269.0, 279.0, 291.0, 280.0, 299.0, 286.0, 290.0, 288.0, 291.0, 260.0, 262.0, 285.0, 288.0, 293.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7065245715933673, "mean_inference_ms": 1.2675972055067737, "mean_action_processing_ms": 0.1349561136398719, "mean_env_wait_ms": 0.8505030940724262, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 296.0, "episode_reward_mean": 563.66, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 142.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 281.83}, "hist_stats": {"episode_reward": [579.0, 522.0, 522.0, 579.0, 525.0, 582.0, 579.0, 573.0, 524.0, 570.0, 576.0, 576.0, 576.0, 579.0, 576.0, 573.0, 579.0, 576.0, 579.0, 582.0, 573.0, 522.0, 584.0, 576.0, 570.0, 573.0, 530.0, 576.0, 573.0, 570.0, 573.0, 576.0, 576.0, 530.0, 579.0, 573.0, 579.0, 582.0, 573.0, 582.0, 582.0, 582.0, 525.0, 579.0, 573.0, 573.0, 582.0, 576.0, 579.0, 576.0, 522.0, 582.0, 582.0, 582.0, 573.0, 582.0, 570.0, 525.0, 576.0, 579.0, 541.0, 582.0, 573.0, 633.0, 570.0, 576.0, 582.0, 516.0, 587.0, 536.0, 567.0, 573.0, 296.0, 582.0, 576.0, 582.0, 525.0, 576.0, 576.0, 579.0, 465.0, 582.0, 522.0, 564.0, 576.0, 630.0, 525.0, 570.0, 522.0, 579.0, 533.0, 576.0, 522.0, 570.0, 579.0, 576.0, 579.0, 522.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 263.0, 259.0, 254.0, 268.0, 295.0, 284.0, 264.0, 261.0, 296.0, 286.0, 291.0, 288.0, 282.0, 291.0, 256.0, 268.0, 286.0, 284.0, 283.0, 293.0, 294.0, 282.0, 292.0, 284.0, 283.0, 296.0, 283.0, 293.0, 290.0, 283.0, 293.0, 286.0, 285.0, 291.0, 297.0, 282.0, 285.0, 297.0, 280.0, 293.0, 264.0, 258.0, 297.0, 287.0, 292.0, 284.0, 280.0, 290.0, 290.0, 283.0, 262.0, 268.0, 274.0, 302.0, 298.0, 275.0, 278.0, 292.0, 289.0, 284.0, 283.0, 293.0, 287.0, 289.0, 257.0, 273.0, 287.0, 292.0, 295.0, 278.0, 286.0, 293.0, 292.0, 290.0, 280.0, 293.0, 289.0, 293.0, 285.0, 297.0, 288.0, 294.0, 259.0, 266.0, 285.0, 294.0, 282.0, 291.0, 288.0, 285.0, 290.0, 292.0, 289.0, 287.0, 289.0, 290.0, 294.0, 282.0, 268.0, 254.0, 286.0, 296.0, 288.0, 294.0, 293.0, 289.0, 290.0, 283.0, 294.0, 288.0, 288.0, 282.0, 261.0, 264.0, 289.0, 287.0, 285.0, 294.0, 282.0, 259.0, 291.0, 291.0, 286.0, 287.0, 309.0, 324.0, 280.0, 290.0, 291.0, 285.0, 293.0, 289.0, 261.0, 255.0, 291.0, 296.0, 260.0, 276.0, 280.0, 287.0, 295.0, 278.0, 142.0, 154.0, 295.0, 287.0, 283.0, 293.0, 298.0, 284.0, 254.0, 271.0, 278.0, 298.0, 294.0, 282.0, 289.0, 290.0, 231.0, 234.0, 296.0, 286.0, 262.0, 260.0, 294.0, 270.0, 291.0, 285.0, 313.0, 317.0, 262.0, 263.0, 279.0, 291.0, 252.0, 270.0, 282.0, 297.0, 270.0, 263.0, 293.0, 283.0, 253.0, 269.0, 279.0, 291.0, 280.0, 299.0, 286.0, 290.0, 288.0, 291.0, 260.0, 262.0, 285.0, 288.0, 293.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7065245715933673, "mean_inference_ms": 1.2675972055067737, "mean_action_processing_ms": 0.1349561136398719, "mean_env_wait_ms": 0.8505030940724262, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6604800, "num_agent_steps_trained": 6604800, "num_env_steps_sampled": 3302400, "num_env_steps_trained": 3302400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3302400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6604800, "timers": {"training_iteration_time_ms": 3611.336, "learn_time_ms": 1074.018, "learn_throughput": 11917.868, "synch_weights_time_ms": 12.001}, "counters": {"num_env_steps_sampled": 3302400, "num_env_steps_trained": 3302400, "num_agent_steps_sampled": 6604800, "num_agent_steps_trained": 6604800}, "done": false, "episodes_total": 8256, "training_iteration": 258, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-14", "timestamp": 1666581434, "time_this_iter_s": 3.57814884185791, "time_total_s": 997.914960861206, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 997.914960861206, "timesteps_since_restore": 0, "iterations_since_restore": 258, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.716666666666665, "ram_util_percent": 10.633333333333333}}
+{"custom_metrics": {"sparse_reward_mean": 193.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 172.75, "shaped_reward_min": 74, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.99, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.21, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.86, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.99, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.53, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.8, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.24, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.23, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.5, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.53, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.8, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.53, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.8, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0009613498696126044, "policy_loss": 0.0006067568901926279, "vf_loss": 7.799860000610352, "vf_explained_var": 0.5667149424552917, "kl": 0.0024039496202021837, "entropy": 0.8507847785949707, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3315200, "num_env_steps_trained": 3315200, "num_agent_steps_sampled": 6630400, "num_agent_steps_trained": 6630400}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 234.0, "episode_reward_mean": 559.95, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 109.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 279.975}, "custom_metrics": {"sparse_reward_mean": 193.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 172.75, "shaped_reward_min": 74, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.99, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.21, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.86, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.99, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.53, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.8, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.24, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.23, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.5, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.53, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.8, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.53, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.8, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 530.0, 579.0, 573.0, 579.0, 582.0, 573.0, 582.0, 582.0, 582.0, 525.0, 579.0, 573.0, 573.0, 582.0, 576.0, 579.0, 576.0, 522.0, 582.0, 582.0, 582.0, 573.0, 582.0, 570.0, 525.0, 576.0, 579.0, 541.0, 582.0, 573.0, 633.0, 570.0, 576.0, 582.0, 516.0, 587.0, 536.0, 567.0, 573.0, 296.0, 582.0, 576.0, 582.0, 525.0, 576.0, 576.0, 579.0, 465.0, 582.0, 522.0, 564.0, 576.0, 630.0, 525.0, 570.0, 522.0, 579.0, 533.0, 576.0, 522.0, 570.0, 579.0, 576.0, 579.0, 522.0, 573.0, 576.0, 579.0, 573.0, 582.0, 576.0, 576.0, 522.0, 576.0, 536.0, 579.0, 633.0, 576.0, 530.0, 576.0, 579.0, 576.0, 576.0, 576.0, 522.0, 522.0, 576.0, 579.0, 234.0, 573.0, 522.0, 525.0, 579.0, 570.0, 573.0, 579.0, 525.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 289.0, 257.0, 273.0, 287.0, 292.0, 295.0, 278.0, 286.0, 293.0, 292.0, 290.0, 280.0, 293.0, 289.0, 293.0, 285.0, 297.0, 288.0, 294.0, 259.0, 266.0, 285.0, 294.0, 282.0, 291.0, 288.0, 285.0, 290.0, 292.0, 289.0, 287.0, 289.0, 290.0, 294.0, 282.0, 268.0, 254.0, 286.0, 296.0, 288.0, 294.0, 293.0, 289.0, 290.0, 283.0, 294.0, 288.0, 288.0, 282.0, 261.0, 264.0, 289.0, 287.0, 285.0, 294.0, 282.0, 259.0, 291.0, 291.0, 286.0, 287.0, 309.0, 324.0, 280.0, 290.0, 291.0, 285.0, 293.0, 289.0, 261.0, 255.0, 291.0, 296.0, 260.0, 276.0, 280.0, 287.0, 295.0, 278.0, 142.0, 154.0, 295.0, 287.0, 283.0, 293.0, 298.0, 284.0, 254.0, 271.0, 278.0, 298.0, 294.0, 282.0, 289.0, 290.0, 231.0, 234.0, 296.0, 286.0, 262.0, 260.0, 294.0, 270.0, 291.0, 285.0, 313.0, 317.0, 262.0, 263.0, 279.0, 291.0, 252.0, 270.0, 282.0, 297.0, 270.0, 263.0, 293.0, 283.0, 253.0, 269.0, 279.0, 291.0, 280.0, 299.0, 286.0, 290.0, 288.0, 291.0, 260.0, 262.0, 285.0, 288.0, 293.0, 283.0, 288.0, 291.0, 289.0, 284.0, 294.0, 288.0, 295.0, 281.0, 282.0, 294.0, 267.0, 255.0, 279.0, 297.0, 277.0, 259.0, 288.0, 291.0, 319.0, 314.0, 294.0, 282.0, 271.0, 259.0, 286.0, 290.0, 289.0, 290.0, 288.0, 288.0, 290.0, 286.0, 290.0, 286.0, 258.0, 264.0, 264.0, 258.0, 286.0, 290.0, 290.0, 289.0, 125.0, 109.0, 289.0, 284.0, 259.0, 263.0, 259.0, 266.0, 282.0, 297.0, 287.0, 283.0, 287.0, 286.0, 286.0, 293.0, 254.0, 271.0, 284.0, 289.0, 280.0, 299.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7064238285392225, "mean_inference_ms": 1.2672823442085852, "mean_action_processing_ms": 0.1349392502739434, "mean_env_wait_ms": 0.850318293245503, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 234.0, "episode_reward_mean": 559.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 109.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 279.975}, "hist_stats": {"episode_reward": [576.0, 530.0, 579.0, 573.0, 579.0, 582.0, 573.0, 582.0, 582.0, 582.0, 525.0, 579.0, 573.0, 573.0, 582.0, 576.0, 579.0, 576.0, 522.0, 582.0, 582.0, 582.0, 573.0, 582.0, 570.0, 525.0, 576.0, 579.0, 541.0, 582.0, 573.0, 633.0, 570.0, 576.0, 582.0, 516.0, 587.0, 536.0, 567.0, 573.0, 296.0, 582.0, 576.0, 582.0, 525.0, 576.0, 576.0, 579.0, 465.0, 582.0, 522.0, 564.0, 576.0, 630.0, 525.0, 570.0, 522.0, 579.0, 533.0, 576.0, 522.0, 570.0, 579.0, 576.0, 579.0, 522.0, 573.0, 576.0, 579.0, 573.0, 582.0, 576.0, 576.0, 522.0, 576.0, 536.0, 579.0, 633.0, 576.0, 530.0, 576.0, 579.0, 576.0, 576.0, 576.0, 522.0, 522.0, 576.0, 579.0, 234.0, 573.0, 522.0, 525.0, 579.0, 570.0, 573.0, 579.0, 525.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 289.0, 257.0, 273.0, 287.0, 292.0, 295.0, 278.0, 286.0, 293.0, 292.0, 290.0, 280.0, 293.0, 289.0, 293.0, 285.0, 297.0, 288.0, 294.0, 259.0, 266.0, 285.0, 294.0, 282.0, 291.0, 288.0, 285.0, 290.0, 292.0, 289.0, 287.0, 289.0, 290.0, 294.0, 282.0, 268.0, 254.0, 286.0, 296.0, 288.0, 294.0, 293.0, 289.0, 290.0, 283.0, 294.0, 288.0, 288.0, 282.0, 261.0, 264.0, 289.0, 287.0, 285.0, 294.0, 282.0, 259.0, 291.0, 291.0, 286.0, 287.0, 309.0, 324.0, 280.0, 290.0, 291.0, 285.0, 293.0, 289.0, 261.0, 255.0, 291.0, 296.0, 260.0, 276.0, 280.0, 287.0, 295.0, 278.0, 142.0, 154.0, 295.0, 287.0, 283.0, 293.0, 298.0, 284.0, 254.0, 271.0, 278.0, 298.0, 294.0, 282.0, 289.0, 290.0, 231.0, 234.0, 296.0, 286.0, 262.0, 260.0, 294.0, 270.0, 291.0, 285.0, 313.0, 317.0, 262.0, 263.0, 279.0, 291.0, 252.0, 270.0, 282.0, 297.0, 270.0, 263.0, 293.0, 283.0, 253.0, 269.0, 279.0, 291.0, 280.0, 299.0, 286.0, 290.0, 288.0, 291.0, 260.0, 262.0, 285.0, 288.0, 293.0, 283.0, 288.0, 291.0, 289.0, 284.0, 294.0, 288.0, 295.0, 281.0, 282.0, 294.0, 267.0, 255.0, 279.0, 297.0, 277.0, 259.0, 288.0, 291.0, 319.0, 314.0, 294.0, 282.0, 271.0, 259.0, 286.0, 290.0, 289.0, 290.0, 288.0, 288.0, 290.0, 286.0, 290.0, 286.0, 258.0, 264.0, 264.0, 258.0, 286.0, 290.0, 290.0, 289.0, 125.0, 109.0, 289.0, 284.0, 259.0, 263.0, 259.0, 266.0, 282.0, 297.0, 287.0, 283.0, 287.0, 286.0, 286.0, 293.0, 254.0, 271.0, 284.0, 289.0, 280.0, 299.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7064238285392225, "mean_inference_ms": 1.2672823442085852, "mean_action_processing_ms": 0.1349392502739434, "mean_env_wait_ms": 0.850318293245503, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6630400, "num_agent_steps_trained": 6630400, "num_env_steps_sampled": 3315200, "num_env_steps_trained": 3315200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3315200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6630400, "timers": {"training_iteration_time_ms": 3610.145, "learn_time_ms": 1077.179, "learn_throughput": 11882.887, "synch_weights_time_ms": 11.925}, "counters": {"num_env_steps_sampled": 3315200, "num_env_steps_trained": 3315200, "num_agent_steps_sampled": 6630400, "num_agent_steps_trained": 6630400}, "done": false, "episodes_total": 8288, "training_iteration": 259, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-18", "timestamp": 1666581438, "time_this_iter_s": 3.6026687622070312, "time_total_s": 1001.5176296234131, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1001.5176296234131, "timesteps_since_restore": 0, "iterations_since_restore": 259, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.559999999999995, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 193.4, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 172.24, "shaped_reward_min": 74, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.14, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.11, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.95, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.91, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.67, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.67, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.18, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.15, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.67, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.67, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.67, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.67, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006135091534815729, "policy_loss": 0.00027569825761020184, "vf_loss": 7.666841506958008, "vf_explained_var": 0.5820589661598206, "kl": 0.0019985504914075136, "entropy": 0.8577451109886169, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3328000, "num_env_steps_trained": 3328000, "num_agent_steps_sampled": 6656000, "num_agent_steps_trained": 6656000}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 234.0, "episode_reward_mean": 559.04, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 109.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 279.52}, "custom_metrics": {"sparse_reward_mean": 193.4, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 172.24, "shaped_reward_min": 74, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.14, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.11, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.95, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.91, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.67, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.67, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.18, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.15, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.67, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.67, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.67, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.67, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 576.0, 582.0, 516.0, 587.0, 536.0, 567.0, 573.0, 296.0, 582.0, 576.0, 582.0, 525.0, 576.0, 576.0, 579.0, 465.0, 582.0, 522.0, 564.0, 576.0, 630.0, 525.0, 570.0, 522.0, 579.0, 533.0, 576.0, 522.0, 570.0, 579.0, 576.0, 579.0, 522.0, 573.0, 576.0, 579.0, 573.0, 582.0, 576.0, 576.0, 522.0, 576.0, 536.0, 579.0, 633.0, 576.0, 530.0, 576.0, 579.0, 576.0, 576.0, 576.0, 522.0, 522.0, 576.0, 579.0, 234.0, 573.0, 522.0, 525.0, 579.0, 570.0, 573.0, 579.0, 525.0, 573.0, 579.0, 579.0, 587.0, 579.0, 573.0, 579.0, 576.0, 522.0, 522.0, 582.0, 573.0, 579.0, 579.0, 573.0, 522.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 570.0, 582.0, 576.0, 579.0, 522.0, 576.0, 576.0, 579.0, 576.0, 522.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [280.0, 290.0, 291.0, 285.0, 293.0, 289.0, 261.0, 255.0, 291.0, 296.0, 260.0, 276.0, 280.0, 287.0, 295.0, 278.0, 142.0, 154.0, 295.0, 287.0, 283.0, 293.0, 298.0, 284.0, 254.0, 271.0, 278.0, 298.0, 294.0, 282.0, 289.0, 290.0, 231.0, 234.0, 296.0, 286.0, 262.0, 260.0, 294.0, 270.0, 291.0, 285.0, 313.0, 317.0, 262.0, 263.0, 279.0, 291.0, 252.0, 270.0, 282.0, 297.0, 270.0, 263.0, 293.0, 283.0, 253.0, 269.0, 279.0, 291.0, 280.0, 299.0, 286.0, 290.0, 288.0, 291.0, 260.0, 262.0, 285.0, 288.0, 293.0, 283.0, 288.0, 291.0, 289.0, 284.0, 294.0, 288.0, 295.0, 281.0, 282.0, 294.0, 267.0, 255.0, 279.0, 297.0, 277.0, 259.0, 288.0, 291.0, 319.0, 314.0, 294.0, 282.0, 271.0, 259.0, 286.0, 290.0, 289.0, 290.0, 288.0, 288.0, 290.0, 286.0, 290.0, 286.0, 258.0, 264.0, 264.0, 258.0, 286.0, 290.0, 290.0, 289.0, 125.0, 109.0, 289.0, 284.0, 259.0, 263.0, 259.0, 266.0, 282.0, 297.0, 287.0, 283.0, 287.0, 286.0, 286.0, 293.0, 254.0, 271.0, 284.0, 289.0, 280.0, 299.0, 289.0, 290.0, 292.0, 295.0, 287.0, 292.0, 288.0, 285.0, 293.0, 286.0, 291.0, 285.0, 262.0, 260.0, 261.0, 261.0, 297.0, 285.0, 278.0, 295.0, 288.0, 291.0, 289.0, 290.0, 280.0, 293.0, 254.0, 268.0, 290.0, 292.0, 295.0, 287.0, 280.0, 290.0, 289.0, 293.0, 291.0, 285.0, 285.0, 294.0, 284.0, 286.0, 288.0, 294.0, 282.0, 294.0, 284.0, 295.0, 262.0, 260.0, 284.0, 292.0, 283.0, 293.0, 285.0, 294.0, 287.0, 289.0, 263.0, 259.0, 289.0, 293.0, 296.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.706283089866591, "mean_inference_ms": 1.2669765834633018, "mean_action_processing_ms": 0.13492354244984092, "mean_env_wait_ms": 0.8501340572572202, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 234.0, "episode_reward_mean": 559.04, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 109.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 279.52}, "hist_stats": {"episode_reward": [570.0, 576.0, 582.0, 516.0, 587.0, 536.0, 567.0, 573.0, 296.0, 582.0, 576.0, 582.0, 525.0, 576.0, 576.0, 579.0, 465.0, 582.0, 522.0, 564.0, 576.0, 630.0, 525.0, 570.0, 522.0, 579.0, 533.0, 576.0, 522.0, 570.0, 579.0, 576.0, 579.0, 522.0, 573.0, 576.0, 579.0, 573.0, 582.0, 576.0, 576.0, 522.0, 576.0, 536.0, 579.0, 633.0, 576.0, 530.0, 576.0, 579.0, 576.0, 576.0, 576.0, 522.0, 522.0, 576.0, 579.0, 234.0, 573.0, 522.0, 525.0, 579.0, 570.0, 573.0, 579.0, 525.0, 573.0, 579.0, 579.0, 587.0, 579.0, 573.0, 579.0, 576.0, 522.0, 522.0, 582.0, 573.0, 579.0, 579.0, 573.0, 522.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 570.0, 582.0, 576.0, 579.0, 522.0, 576.0, 576.0, 579.0, 576.0, 522.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [280.0, 290.0, 291.0, 285.0, 293.0, 289.0, 261.0, 255.0, 291.0, 296.0, 260.0, 276.0, 280.0, 287.0, 295.0, 278.0, 142.0, 154.0, 295.0, 287.0, 283.0, 293.0, 298.0, 284.0, 254.0, 271.0, 278.0, 298.0, 294.0, 282.0, 289.0, 290.0, 231.0, 234.0, 296.0, 286.0, 262.0, 260.0, 294.0, 270.0, 291.0, 285.0, 313.0, 317.0, 262.0, 263.0, 279.0, 291.0, 252.0, 270.0, 282.0, 297.0, 270.0, 263.0, 293.0, 283.0, 253.0, 269.0, 279.0, 291.0, 280.0, 299.0, 286.0, 290.0, 288.0, 291.0, 260.0, 262.0, 285.0, 288.0, 293.0, 283.0, 288.0, 291.0, 289.0, 284.0, 294.0, 288.0, 295.0, 281.0, 282.0, 294.0, 267.0, 255.0, 279.0, 297.0, 277.0, 259.0, 288.0, 291.0, 319.0, 314.0, 294.0, 282.0, 271.0, 259.0, 286.0, 290.0, 289.0, 290.0, 288.0, 288.0, 290.0, 286.0, 290.0, 286.0, 258.0, 264.0, 264.0, 258.0, 286.0, 290.0, 290.0, 289.0, 125.0, 109.0, 289.0, 284.0, 259.0, 263.0, 259.0, 266.0, 282.0, 297.0, 287.0, 283.0, 287.0, 286.0, 286.0, 293.0, 254.0, 271.0, 284.0, 289.0, 280.0, 299.0, 289.0, 290.0, 292.0, 295.0, 287.0, 292.0, 288.0, 285.0, 293.0, 286.0, 291.0, 285.0, 262.0, 260.0, 261.0, 261.0, 297.0, 285.0, 278.0, 295.0, 288.0, 291.0, 289.0, 290.0, 280.0, 293.0, 254.0, 268.0, 290.0, 292.0, 295.0, 287.0, 280.0, 290.0, 289.0, 293.0, 291.0, 285.0, 285.0, 294.0, 284.0, 286.0, 288.0, 294.0, 282.0, 294.0, 284.0, 295.0, 262.0, 260.0, 284.0, 292.0, 283.0, 293.0, 285.0, 294.0, 287.0, 289.0, 263.0, 259.0, 289.0, 293.0, 296.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.706283089866591, "mean_inference_ms": 1.2669765834633018, "mean_action_processing_ms": 0.13492354244984092, "mean_env_wait_ms": 0.8501340572572202, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6656000, "num_agent_steps_trained": 6656000, "num_env_steps_sampled": 3328000, "num_env_steps_trained": 3328000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3328000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6656000, "timers": {"training_iteration_time_ms": 3613.965, "learn_time_ms": 1085.571, "learn_throughput": 11791.033, "synch_weights_time_ms": 12.394}, "counters": {"num_env_steps_sampled": 3328000, "num_env_steps_trained": 3328000, "num_agent_steps_sampled": 6656000, "num_agent_steps_trained": 6656000}, "done": false, "episodes_total": 8320, "training_iteration": 260, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-22", "timestamp": 1666581442, "time_this_iter_s": 3.7049059867858887, "time_total_s": 1005.222535610199, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1005.222535610199, "timesteps_since_restore": 0, "iterations_since_restore": 260, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.816666666666666, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 194.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 173.44, "shaped_reward_min": 74, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.7, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.53, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.55, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.42, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 14.36, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.18, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.79, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.83, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.49, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.38, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.39, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.36, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.36, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.18, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.36, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.18, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0020203336607664824, "policy_loss": -0.0023637900594621897, "vf_loss": 7.752152442932129, "vf_explained_var": 0.5669132471084595, "kl": 0.00226578488945961, "entropy": 0.8635151386260986, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3340800, "num_env_steps_trained": 3340800, "num_agent_steps_sampled": 6681600, "num_agent_steps_trained": 6681600}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 234.0, "episode_reward_mean": 562.64, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 109.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 281.32}, "custom_metrics": {"sparse_reward_mean": 194.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 173.44, "shaped_reward_min": 74, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.7, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.53, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.55, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.42, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 14.36, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.18, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.79, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.83, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.49, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.38, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.39, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.36, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.36, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.18, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.36, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.18, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 522.0, 573.0, 576.0, 579.0, 573.0, 582.0, 576.0, 576.0, 522.0, 576.0, 536.0, 579.0, 633.0, 576.0, 530.0, 576.0, 579.0, 576.0, 576.0, 576.0, 522.0, 522.0, 576.0, 579.0, 234.0, 573.0, 522.0, 525.0, 579.0, 570.0, 573.0, 579.0, 525.0, 573.0, 579.0, 579.0, 587.0, 579.0, 573.0, 579.0, 576.0, 522.0, 522.0, 582.0, 573.0, 579.0, 579.0, 573.0, 522.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 570.0, 582.0, 576.0, 579.0, 522.0, 576.0, 576.0, 579.0, 576.0, 522.0, 582.0, 576.0, 297.0, 582.0, 576.0, 573.0, 579.0, 576.0, 582.0, 573.0, 570.0, 579.0, 582.0, 570.0, 587.0, 579.0, 576.0, 522.0, 587.0, 573.0, 576.0, 582.0, 570.0, 582.0, 573.0, 584.0, 582.0, 579.0, 579.0, 522.0, 573.0, 579.0, 527.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 260.0, 262.0, 285.0, 288.0, 293.0, 283.0, 288.0, 291.0, 289.0, 284.0, 294.0, 288.0, 295.0, 281.0, 282.0, 294.0, 267.0, 255.0, 279.0, 297.0, 277.0, 259.0, 288.0, 291.0, 319.0, 314.0, 294.0, 282.0, 271.0, 259.0, 286.0, 290.0, 289.0, 290.0, 288.0, 288.0, 290.0, 286.0, 290.0, 286.0, 258.0, 264.0, 264.0, 258.0, 286.0, 290.0, 290.0, 289.0, 125.0, 109.0, 289.0, 284.0, 259.0, 263.0, 259.0, 266.0, 282.0, 297.0, 287.0, 283.0, 287.0, 286.0, 286.0, 293.0, 254.0, 271.0, 284.0, 289.0, 280.0, 299.0, 289.0, 290.0, 292.0, 295.0, 287.0, 292.0, 288.0, 285.0, 293.0, 286.0, 291.0, 285.0, 262.0, 260.0, 261.0, 261.0, 297.0, 285.0, 278.0, 295.0, 288.0, 291.0, 289.0, 290.0, 280.0, 293.0, 254.0, 268.0, 290.0, 292.0, 295.0, 287.0, 280.0, 290.0, 289.0, 293.0, 291.0, 285.0, 285.0, 294.0, 284.0, 286.0, 288.0, 294.0, 282.0, 294.0, 284.0, 295.0, 262.0, 260.0, 284.0, 292.0, 283.0, 293.0, 285.0, 294.0, 287.0, 289.0, 263.0, 259.0, 289.0, 293.0, 296.0, 280.0, 149.0, 148.0, 286.0, 296.0, 292.0, 284.0, 280.0, 293.0, 291.0, 288.0, 294.0, 282.0, 289.0, 293.0, 288.0, 285.0, 283.0, 287.0, 288.0, 291.0, 291.0, 291.0, 282.0, 288.0, 300.0, 287.0, 288.0, 291.0, 296.0, 280.0, 255.0, 267.0, 294.0, 293.0, 290.0, 283.0, 288.0, 288.0, 289.0, 293.0, 283.0, 287.0, 291.0, 291.0, 277.0, 296.0, 292.0, 292.0, 297.0, 285.0, 292.0, 287.0, 290.0, 289.0, 266.0, 256.0, 288.0, 285.0, 291.0, 288.0, 273.0, 254.0, 292.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7061518572828223, "mean_inference_ms": 1.2667114392745973, "mean_action_processing_ms": 0.13491271579073816, "mean_env_wait_ms": 0.8499858420504343, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 234.0, "episode_reward_mean": 562.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 109.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 281.32}, "hist_stats": {"episode_reward": [579.0, 522.0, 573.0, 576.0, 579.0, 573.0, 582.0, 576.0, 576.0, 522.0, 576.0, 536.0, 579.0, 633.0, 576.0, 530.0, 576.0, 579.0, 576.0, 576.0, 576.0, 522.0, 522.0, 576.0, 579.0, 234.0, 573.0, 522.0, 525.0, 579.0, 570.0, 573.0, 579.0, 525.0, 573.0, 579.0, 579.0, 587.0, 579.0, 573.0, 579.0, 576.0, 522.0, 522.0, 582.0, 573.0, 579.0, 579.0, 573.0, 522.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 570.0, 582.0, 576.0, 579.0, 522.0, 576.0, 576.0, 579.0, 576.0, 522.0, 582.0, 576.0, 297.0, 582.0, 576.0, 573.0, 579.0, 576.0, 582.0, 573.0, 570.0, 579.0, 582.0, 570.0, 587.0, 579.0, 576.0, 522.0, 587.0, 573.0, 576.0, 582.0, 570.0, 582.0, 573.0, 584.0, 582.0, 579.0, 579.0, 522.0, 573.0, 579.0, 527.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 260.0, 262.0, 285.0, 288.0, 293.0, 283.0, 288.0, 291.0, 289.0, 284.0, 294.0, 288.0, 295.0, 281.0, 282.0, 294.0, 267.0, 255.0, 279.0, 297.0, 277.0, 259.0, 288.0, 291.0, 319.0, 314.0, 294.0, 282.0, 271.0, 259.0, 286.0, 290.0, 289.0, 290.0, 288.0, 288.0, 290.0, 286.0, 290.0, 286.0, 258.0, 264.0, 264.0, 258.0, 286.0, 290.0, 290.0, 289.0, 125.0, 109.0, 289.0, 284.0, 259.0, 263.0, 259.0, 266.0, 282.0, 297.0, 287.0, 283.0, 287.0, 286.0, 286.0, 293.0, 254.0, 271.0, 284.0, 289.0, 280.0, 299.0, 289.0, 290.0, 292.0, 295.0, 287.0, 292.0, 288.0, 285.0, 293.0, 286.0, 291.0, 285.0, 262.0, 260.0, 261.0, 261.0, 297.0, 285.0, 278.0, 295.0, 288.0, 291.0, 289.0, 290.0, 280.0, 293.0, 254.0, 268.0, 290.0, 292.0, 295.0, 287.0, 280.0, 290.0, 289.0, 293.0, 291.0, 285.0, 285.0, 294.0, 284.0, 286.0, 288.0, 294.0, 282.0, 294.0, 284.0, 295.0, 262.0, 260.0, 284.0, 292.0, 283.0, 293.0, 285.0, 294.0, 287.0, 289.0, 263.0, 259.0, 289.0, 293.0, 296.0, 280.0, 149.0, 148.0, 286.0, 296.0, 292.0, 284.0, 280.0, 293.0, 291.0, 288.0, 294.0, 282.0, 289.0, 293.0, 288.0, 285.0, 283.0, 287.0, 288.0, 291.0, 291.0, 291.0, 282.0, 288.0, 300.0, 287.0, 288.0, 291.0, 296.0, 280.0, 255.0, 267.0, 294.0, 293.0, 290.0, 283.0, 288.0, 288.0, 289.0, 293.0, 283.0, 287.0, 291.0, 291.0, 277.0, 296.0, 292.0, 292.0, 297.0, 285.0, 292.0, 287.0, 290.0, 289.0, 266.0, 256.0, 288.0, 285.0, 291.0, 288.0, 273.0, 254.0, 292.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7061518572828223, "mean_inference_ms": 1.2667114392745973, "mean_action_processing_ms": 0.13491271579073816, "mean_env_wait_ms": 0.8499858420504343, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6681600, "num_agent_steps_trained": 6681600, "num_env_steps_sampled": 3340800, "num_env_steps_trained": 3340800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3340800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6681600, "timers": {"training_iteration_time_ms": 3592.788, "learn_time_ms": 1077.759, "learn_throughput": 11876.493, "synch_weights_time_ms": 12.268}, "counters": {"num_env_steps_sampled": 3340800, "num_env_steps_trained": 3340800, "num_agent_steps_sampled": 6681600, "num_agent_steps_trained": 6681600}, "done": false, "episodes_total": 8352, "training_iteration": 261, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-26", "timestamp": 1666581446, "time_this_iter_s": 3.7127888202667236, "time_total_s": 1008.9353244304657, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1008.9353244304657, "timesteps_since_restore": 0, "iterations_since_restore": 261, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.72, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 176.04, "shaped_reward_min": 97, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.12, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.45, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.92, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.34, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.74, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.17, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.73, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.33, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.31, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.74, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.17, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.74, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.17, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0016281807329505682, "policy_loss": -0.0019714063964784145, "vf_loss": 7.717082977294922, "vf_explained_var": 0.5875511169433594, "kl": 0.00202760798856616, "entropy": 0.8569626808166504, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3353600, "num_env_steps_trained": 3353600, "num_agent_steps_sampled": 6707200, "num_agent_steps_trained": 6707200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 297.0, "episode_reward_mean": 571.24, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 148.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.62}, "custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 176.04, "shaped_reward_min": 97, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.12, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.45, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.92, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.34, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.74, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.17, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.73, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.33, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.31, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.74, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.17, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.74, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.17, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 525.0, 573.0, 579.0, 579.0, 587.0, 579.0, 573.0, 579.0, 576.0, 522.0, 522.0, 582.0, 573.0, 579.0, 579.0, 573.0, 522.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 570.0, 582.0, 576.0, 579.0, 522.0, 576.0, 576.0, 579.0, 576.0, 522.0, 582.0, 576.0, 297.0, 582.0, 576.0, 573.0, 579.0, 576.0, 582.0, 573.0, 570.0, 579.0, 582.0, 570.0, 587.0, 579.0, 576.0, 522.0, 587.0, 573.0, 576.0, 582.0, 570.0, 582.0, 573.0, 584.0, 582.0, 579.0, 579.0, 522.0, 573.0, 579.0, 527.0, 579.0, 576.0, 587.0, 582.0, 579.0, 579.0, 567.0, 582.0, 579.0, 584.0, 576.0, 582.0, 576.0, 579.0, 576.0, 579.0, 639.0, 582.0, 573.0, 576.0, 552.0, 576.0, 579.0, 570.0, 582.0, 579.0, 636.0, 576.0, 584.0, 579.0, 582.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 254.0, 271.0, 284.0, 289.0, 280.0, 299.0, 289.0, 290.0, 292.0, 295.0, 287.0, 292.0, 288.0, 285.0, 293.0, 286.0, 291.0, 285.0, 262.0, 260.0, 261.0, 261.0, 297.0, 285.0, 278.0, 295.0, 288.0, 291.0, 289.0, 290.0, 280.0, 293.0, 254.0, 268.0, 290.0, 292.0, 295.0, 287.0, 280.0, 290.0, 289.0, 293.0, 291.0, 285.0, 285.0, 294.0, 284.0, 286.0, 288.0, 294.0, 282.0, 294.0, 284.0, 295.0, 262.0, 260.0, 284.0, 292.0, 283.0, 293.0, 285.0, 294.0, 287.0, 289.0, 263.0, 259.0, 289.0, 293.0, 296.0, 280.0, 149.0, 148.0, 286.0, 296.0, 292.0, 284.0, 280.0, 293.0, 291.0, 288.0, 294.0, 282.0, 289.0, 293.0, 288.0, 285.0, 283.0, 287.0, 288.0, 291.0, 291.0, 291.0, 282.0, 288.0, 300.0, 287.0, 288.0, 291.0, 296.0, 280.0, 255.0, 267.0, 294.0, 293.0, 290.0, 283.0, 288.0, 288.0, 289.0, 293.0, 283.0, 287.0, 291.0, 291.0, 277.0, 296.0, 292.0, 292.0, 297.0, 285.0, 292.0, 287.0, 290.0, 289.0, 266.0, 256.0, 288.0, 285.0, 291.0, 288.0, 273.0, 254.0, 292.0, 287.0, 276.0, 300.0, 288.0, 299.0, 296.0, 286.0, 285.0, 294.0, 282.0, 297.0, 291.0, 276.0, 286.0, 296.0, 292.0, 287.0, 284.0, 300.0, 278.0, 298.0, 288.0, 294.0, 295.0, 281.0, 285.0, 294.0, 295.0, 281.0, 283.0, 296.0, 324.0, 315.0, 283.0, 299.0, 288.0, 285.0, 278.0, 298.0, 278.0, 274.0, 289.0, 287.0, 293.0, 286.0, 277.0, 293.0, 292.0, 290.0, 288.0, 291.0, 314.0, 322.0, 286.0, 290.0, 290.0, 294.0, 291.0, 288.0, 295.0, 287.0, 291.0, 288.0, 282.0, 297.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7060269888990688, "mean_inference_ms": 1.2664474945522755, "mean_action_processing_ms": 0.13490230202482045, "mean_env_wait_ms": 0.8498471862817376, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 297.0, "episode_reward_mean": 571.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 148.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.62}, "hist_stats": {"episode_reward": [579.0, 525.0, 573.0, 579.0, 579.0, 587.0, 579.0, 573.0, 579.0, 576.0, 522.0, 522.0, 582.0, 573.0, 579.0, 579.0, 573.0, 522.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 570.0, 582.0, 576.0, 579.0, 522.0, 576.0, 576.0, 579.0, 576.0, 522.0, 582.0, 576.0, 297.0, 582.0, 576.0, 573.0, 579.0, 576.0, 582.0, 573.0, 570.0, 579.0, 582.0, 570.0, 587.0, 579.0, 576.0, 522.0, 587.0, 573.0, 576.0, 582.0, 570.0, 582.0, 573.0, 584.0, 582.0, 579.0, 579.0, 522.0, 573.0, 579.0, 527.0, 579.0, 576.0, 587.0, 582.0, 579.0, 579.0, 567.0, 582.0, 579.0, 584.0, 576.0, 582.0, 576.0, 579.0, 576.0, 579.0, 639.0, 582.0, 573.0, 576.0, 552.0, 576.0, 579.0, 570.0, 582.0, 579.0, 636.0, 576.0, 584.0, 579.0, 582.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 254.0, 271.0, 284.0, 289.0, 280.0, 299.0, 289.0, 290.0, 292.0, 295.0, 287.0, 292.0, 288.0, 285.0, 293.0, 286.0, 291.0, 285.0, 262.0, 260.0, 261.0, 261.0, 297.0, 285.0, 278.0, 295.0, 288.0, 291.0, 289.0, 290.0, 280.0, 293.0, 254.0, 268.0, 290.0, 292.0, 295.0, 287.0, 280.0, 290.0, 289.0, 293.0, 291.0, 285.0, 285.0, 294.0, 284.0, 286.0, 288.0, 294.0, 282.0, 294.0, 284.0, 295.0, 262.0, 260.0, 284.0, 292.0, 283.0, 293.0, 285.0, 294.0, 287.0, 289.0, 263.0, 259.0, 289.0, 293.0, 296.0, 280.0, 149.0, 148.0, 286.0, 296.0, 292.0, 284.0, 280.0, 293.0, 291.0, 288.0, 294.0, 282.0, 289.0, 293.0, 288.0, 285.0, 283.0, 287.0, 288.0, 291.0, 291.0, 291.0, 282.0, 288.0, 300.0, 287.0, 288.0, 291.0, 296.0, 280.0, 255.0, 267.0, 294.0, 293.0, 290.0, 283.0, 288.0, 288.0, 289.0, 293.0, 283.0, 287.0, 291.0, 291.0, 277.0, 296.0, 292.0, 292.0, 297.0, 285.0, 292.0, 287.0, 290.0, 289.0, 266.0, 256.0, 288.0, 285.0, 291.0, 288.0, 273.0, 254.0, 292.0, 287.0, 276.0, 300.0, 288.0, 299.0, 296.0, 286.0, 285.0, 294.0, 282.0, 297.0, 291.0, 276.0, 286.0, 296.0, 292.0, 287.0, 284.0, 300.0, 278.0, 298.0, 288.0, 294.0, 295.0, 281.0, 285.0, 294.0, 295.0, 281.0, 283.0, 296.0, 324.0, 315.0, 283.0, 299.0, 288.0, 285.0, 278.0, 298.0, 278.0, 274.0, 289.0, 287.0, 293.0, 286.0, 277.0, 293.0, 292.0, 290.0, 288.0, 291.0, 314.0, 322.0, 286.0, 290.0, 290.0, 294.0, 291.0, 288.0, 295.0, 287.0, 291.0, 288.0, 282.0, 297.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7060269888990688, "mean_inference_ms": 1.2664474945522755, "mean_action_processing_ms": 0.13490230202482045, "mean_env_wait_ms": 0.8498471862817376, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6707200, "num_agent_steps_trained": 6707200, "num_env_steps_sampled": 3353600, "num_env_steps_trained": 3353600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3353600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6707200, "timers": {"training_iteration_time_ms": 3580.2, "learn_time_ms": 1085.817, "learn_throughput": 11788.361, "synch_weights_time_ms": 10.814}, "counters": {"num_env_steps_sampled": 3353600, "num_env_steps_trained": 3353600, "num_agent_steps_sampled": 6707200, "num_agent_steps_trained": 6707200}, "done": false, "episodes_total": 8384, "training_iteration": 262, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-30", "timestamp": 1666581450, "time_this_iter_s": 3.735849142074585, "time_total_s": 1012.6711735725403, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1012.6711735725403, "timesteps_since_restore": 0, "iterations_since_restore": 262, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.650000000000002, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 175.09, "shaped_reward_min": 29, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.72, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.53, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.61, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.38, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.27, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.93, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.6, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.44, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.38, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.38, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.27, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.38, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.27, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011134549276903272, "policy_loss": 0.0007515454199165106, "vf_loss": 7.840696334838867, "vf_explained_var": 0.5784326791763306, "kl": 0.016453076153993607, "entropy": 0.8443182706832886, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3366400, "num_env_steps_trained": 3366400, "num_agent_steps_sampled": 6732800, "num_agent_steps_trained": 6732800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 69.0, "episode_reward_mean": 567.49, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 283.745}, "custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 175.09, "shaped_reward_min": 29, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.72, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.53, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.61, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.38, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.27, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.93, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.6, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.44, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.38, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.38, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.27, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.38, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.27, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 522.0, 582.0, 576.0, 297.0, 582.0, 576.0, 573.0, 579.0, 576.0, 582.0, 573.0, 570.0, 579.0, 582.0, 570.0, 587.0, 579.0, 576.0, 522.0, 587.0, 573.0, 576.0, 582.0, 570.0, 582.0, 573.0, 584.0, 582.0, 579.0, 579.0, 522.0, 573.0, 579.0, 527.0, 579.0, 576.0, 587.0, 582.0, 579.0, 579.0, 567.0, 582.0, 579.0, 584.0, 576.0, 582.0, 576.0, 579.0, 576.0, 579.0, 639.0, 582.0, 573.0, 576.0, 552.0, 576.0, 579.0, 570.0, 582.0, 579.0, 636.0, 576.0, 584.0, 579.0, 582.0, 579.0, 579.0, 582.0, 579.0, 579.0, 579.0, 581.0, 587.0, 570.0, 522.0, 579.0, 525.0, 576.0, 570.0, 533.0, 630.0, 579.0, 582.0, 630.0, 522.0, 582.0, 579.0, 579.0, 627.0, 570.0, 482.0, 579.0, 576.0, 573.0, 69.0, 573.0, 579.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 289.0, 263.0, 259.0, 289.0, 293.0, 296.0, 280.0, 149.0, 148.0, 286.0, 296.0, 292.0, 284.0, 280.0, 293.0, 291.0, 288.0, 294.0, 282.0, 289.0, 293.0, 288.0, 285.0, 283.0, 287.0, 288.0, 291.0, 291.0, 291.0, 282.0, 288.0, 300.0, 287.0, 288.0, 291.0, 296.0, 280.0, 255.0, 267.0, 294.0, 293.0, 290.0, 283.0, 288.0, 288.0, 289.0, 293.0, 283.0, 287.0, 291.0, 291.0, 277.0, 296.0, 292.0, 292.0, 297.0, 285.0, 292.0, 287.0, 290.0, 289.0, 266.0, 256.0, 288.0, 285.0, 291.0, 288.0, 273.0, 254.0, 292.0, 287.0, 276.0, 300.0, 288.0, 299.0, 296.0, 286.0, 285.0, 294.0, 282.0, 297.0, 291.0, 276.0, 286.0, 296.0, 292.0, 287.0, 284.0, 300.0, 278.0, 298.0, 288.0, 294.0, 295.0, 281.0, 285.0, 294.0, 295.0, 281.0, 283.0, 296.0, 324.0, 315.0, 283.0, 299.0, 288.0, 285.0, 278.0, 298.0, 278.0, 274.0, 289.0, 287.0, 293.0, 286.0, 277.0, 293.0, 292.0, 290.0, 288.0, 291.0, 314.0, 322.0, 286.0, 290.0, 290.0, 294.0, 291.0, 288.0, 295.0, 287.0, 291.0, 288.0, 282.0, 297.0, 289.0, 293.0, 295.0, 284.0, 291.0, 288.0, 286.0, 293.0, 287.0, 294.0, 296.0, 291.0, 281.0, 289.0, 255.0, 267.0, 301.0, 278.0, 261.0, 264.0, 289.0, 287.0, 288.0, 282.0, 261.0, 272.0, 326.0, 304.0, 288.0, 291.0, 297.0, 285.0, 319.0, 311.0, 263.0, 259.0, 287.0, 295.0, 290.0, 289.0, 286.0, 293.0, 314.0, 313.0, 285.0, 285.0, 245.0, 237.0, 289.0, 290.0, 282.0, 294.0, 285.0, 288.0, 34.0, 35.0, 289.0, 284.0, 286.0, 293.0, 291.0, 291.0, 294.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7059000375335294, "mean_inference_ms": 1.2661800925391582, "mean_action_processing_ms": 0.13489277283830975, "mean_env_wait_ms": 0.8497105315759467, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 69.0, "episode_reward_mean": 567.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 283.745}, "hist_stats": {"episode_reward": [576.0, 522.0, 582.0, 576.0, 297.0, 582.0, 576.0, 573.0, 579.0, 576.0, 582.0, 573.0, 570.0, 579.0, 582.0, 570.0, 587.0, 579.0, 576.0, 522.0, 587.0, 573.0, 576.0, 582.0, 570.0, 582.0, 573.0, 584.0, 582.0, 579.0, 579.0, 522.0, 573.0, 579.0, 527.0, 579.0, 576.0, 587.0, 582.0, 579.0, 579.0, 567.0, 582.0, 579.0, 584.0, 576.0, 582.0, 576.0, 579.0, 576.0, 579.0, 639.0, 582.0, 573.0, 576.0, 552.0, 576.0, 579.0, 570.0, 582.0, 579.0, 636.0, 576.0, 584.0, 579.0, 582.0, 579.0, 579.0, 582.0, 579.0, 579.0, 579.0, 581.0, 587.0, 570.0, 522.0, 579.0, 525.0, 576.0, 570.0, 533.0, 630.0, 579.0, 582.0, 630.0, 522.0, 582.0, 579.0, 579.0, 627.0, 570.0, 482.0, 579.0, 576.0, 573.0, 69.0, 573.0, 579.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 289.0, 263.0, 259.0, 289.0, 293.0, 296.0, 280.0, 149.0, 148.0, 286.0, 296.0, 292.0, 284.0, 280.0, 293.0, 291.0, 288.0, 294.0, 282.0, 289.0, 293.0, 288.0, 285.0, 283.0, 287.0, 288.0, 291.0, 291.0, 291.0, 282.0, 288.0, 300.0, 287.0, 288.0, 291.0, 296.0, 280.0, 255.0, 267.0, 294.0, 293.0, 290.0, 283.0, 288.0, 288.0, 289.0, 293.0, 283.0, 287.0, 291.0, 291.0, 277.0, 296.0, 292.0, 292.0, 297.0, 285.0, 292.0, 287.0, 290.0, 289.0, 266.0, 256.0, 288.0, 285.0, 291.0, 288.0, 273.0, 254.0, 292.0, 287.0, 276.0, 300.0, 288.0, 299.0, 296.0, 286.0, 285.0, 294.0, 282.0, 297.0, 291.0, 276.0, 286.0, 296.0, 292.0, 287.0, 284.0, 300.0, 278.0, 298.0, 288.0, 294.0, 295.0, 281.0, 285.0, 294.0, 295.0, 281.0, 283.0, 296.0, 324.0, 315.0, 283.0, 299.0, 288.0, 285.0, 278.0, 298.0, 278.0, 274.0, 289.0, 287.0, 293.0, 286.0, 277.0, 293.0, 292.0, 290.0, 288.0, 291.0, 314.0, 322.0, 286.0, 290.0, 290.0, 294.0, 291.0, 288.0, 295.0, 287.0, 291.0, 288.0, 282.0, 297.0, 289.0, 293.0, 295.0, 284.0, 291.0, 288.0, 286.0, 293.0, 287.0, 294.0, 296.0, 291.0, 281.0, 289.0, 255.0, 267.0, 301.0, 278.0, 261.0, 264.0, 289.0, 287.0, 288.0, 282.0, 261.0, 272.0, 326.0, 304.0, 288.0, 291.0, 297.0, 285.0, 319.0, 311.0, 263.0, 259.0, 287.0, 295.0, 290.0, 289.0, 286.0, 293.0, 314.0, 313.0, 285.0, 285.0, 245.0, 237.0, 289.0, 290.0, 282.0, 294.0, 285.0, 288.0, 34.0, 35.0, 289.0, 284.0, 286.0, 293.0, 291.0, 291.0, 294.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7059000375335294, "mean_inference_ms": 1.2661800925391582, "mean_action_processing_ms": 0.13489277283830975, "mean_env_wait_ms": 0.8497105315759467, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6732800, "num_agent_steps_trained": 6732800, "num_env_steps_sampled": 3366400, "num_env_steps_trained": 3366400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3366400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6732800, "timers": {"training_iteration_time_ms": 3589.346, "learn_time_ms": 1089.994, "learn_throughput": 11743.188, "synch_weights_time_ms": 10.681}, "counters": {"num_env_steps_sampled": 3366400, "num_env_steps_trained": 3366400, "num_agent_steps_sampled": 6732800, "num_agent_steps_trained": 6732800}, "done": false, "episodes_total": 8416, "training_iteration": 263, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-34", "timestamp": 1666581454, "time_this_iter_s": 3.7120368480682373, "time_total_s": 1016.3832104206085, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1016.3832104206085, "timesteps_since_restore": 0, "iterations_since_restore": 263, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.779999999999998, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 175.94, "shaped_reward_min": 29, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.58, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.78, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.42, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.62, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.22, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.49, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.56, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.41, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.3, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.22, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.49, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.22, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.49, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0022870218381285667, "policy_loss": 0.0019353741081431508, "vf_loss": 7.78421688079834, "vf_explained_var": 0.5842685699462891, "kl": 0.002388710156083107, "entropy": 0.8535453081130981, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3379200, "num_env_steps_trained": 3379200, "num_agent_steps_sampled": 6758400, "num_agent_steps_trained": 6758400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 69.0, "episode_reward_mean": 568.34, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 284.17}, "custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 175.94, "shaped_reward_min": 29, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.58, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.78, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.42, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.62, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.22, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.49, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.56, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.41, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.3, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.22, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.49, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.22, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.49, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 579.0, 527.0, 579.0, 576.0, 587.0, 582.0, 579.0, 579.0, 567.0, 582.0, 579.0, 584.0, 576.0, 582.0, 576.0, 579.0, 576.0, 579.0, 639.0, 582.0, 573.0, 576.0, 552.0, 576.0, 579.0, 570.0, 582.0, 579.0, 636.0, 576.0, 584.0, 579.0, 582.0, 579.0, 579.0, 582.0, 579.0, 579.0, 579.0, 581.0, 587.0, 570.0, 522.0, 579.0, 525.0, 576.0, 570.0, 533.0, 630.0, 579.0, 582.0, 630.0, 522.0, 582.0, 579.0, 579.0, 627.0, 570.0, 482.0, 579.0, 576.0, 573.0, 69.0, 573.0, 579.0, 582.0, 582.0, 576.0, 533.0, 530.0, 581.0, 573.0, 576.0, 576.0, 576.0, 573.0, 582.0, 576.0, 570.0, 519.0, 579.0, 573.0, 573.0, 582.0, 579.0, 590.0, 579.0, 582.0, 584.0, 582.0, 533.0, 479.0, 579.0, 576.0, 582.0, 584.0, 590.0, 479.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 285.0, 291.0, 288.0, 273.0, 254.0, 292.0, 287.0, 276.0, 300.0, 288.0, 299.0, 296.0, 286.0, 285.0, 294.0, 282.0, 297.0, 291.0, 276.0, 286.0, 296.0, 292.0, 287.0, 284.0, 300.0, 278.0, 298.0, 288.0, 294.0, 295.0, 281.0, 285.0, 294.0, 295.0, 281.0, 283.0, 296.0, 324.0, 315.0, 283.0, 299.0, 288.0, 285.0, 278.0, 298.0, 278.0, 274.0, 289.0, 287.0, 293.0, 286.0, 277.0, 293.0, 292.0, 290.0, 288.0, 291.0, 314.0, 322.0, 286.0, 290.0, 290.0, 294.0, 291.0, 288.0, 295.0, 287.0, 291.0, 288.0, 282.0, 297.0, 289.0, 293.0, 295.0, 284.0, 291.0, 288.0, 286.0, 293.0, 287.0, 294.0, 296.0, 291.0, 281.0, 289.0, 255.0, 267.0, 301.0, 278.0, 261.0, 264.0, 289.0, 287.0, 288.0, 282.0, 261.0, 272.0, 326.0, 304.0, 288.0, 291.0, 297.0, 285.0, 319.0, 311.0, 263.0, 259.0, 287.0, 295.0, 290.0, 289.0, 286.0, 293.0, 314.0, 313.0, 285.0, 285.0, 245.0, 237.0, 289.0, 290.0, 282.0, 294.0, 285.0, 288.0, 34.0, 35.0, 289.0, 284.0, 286.0, 293.0, 291.0, 291.0, 294.0, 288.0, 281.0, 295.0, 263.0, 270.0, 269.0, 261.0, 282.0, 299.0, 295.0, 278.0, 289.0, 287.0, 296.0, 280.0, 299.0, 277.0, 289.0, 284.0, 298.0, 284.0, 282.0, 294.0, 287.0, 283.0, 262.0, 257.0, 291.0, 288.0, 291.0, 282.0, 285.0, 288.0, 292.0, 290.0, 289.0, 290.0, 293.0, 297.0, 297.0, 282.0, 298.0, 284.0, 290.0, 294.0, 292.0, 290.0, 276.0, 257.0, 229.0, 250.0, 293.0, 286.0, 278.0, 298.0, 293.0, 289.0, 273.0, 311.0, 303.0, 287.0, 241.0, 238.0, 296.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7057810537052588, "mean_inference_ms": 1.2658757102807892, "mean_action_processing_ms": 0.13487831523960633, "mean_env_wait_ms": 0.849539354254739, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 69.0, "episode_reward_mean": 568.34, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 284.17}, "hist_stats": {"episode_reward": [573.0, 579.0, 527.0, 579.0, 576.0, 587.0, 582.0, 579.0, 579.0, 567.0, 582.0, 579.0, 584.0, 576.0, 582.0, 576.0, 579.0, 576.0, 579.0, 639.0, 582.0, 573.0, 576.0, 552.0, 576.0, 579.0, 570.0, 582.0, 579.0, 636.0, 576.0, 584.0, 579.0, 582.0, 579.0, 579.0, 582.0, 579.0, 579.0, 579.0, 581.0, 587.0, 570.0, 522.0, 579.0, 525.0, 576.0, 570.0, 533.0, 630.0, 579.0, 582.0, 630.0, 522.0, 582.0, 579.0, 579.0, 627.0, 570.0, 482.0, 579.0, 576.0, 573.0, 69.0, 573.0, 579.0, 582.0, 582.0, 576.0, 533.0, 530.0, 581.0, 573.0, 576.0, 576.0, 576.0, 573.0, 582.0, 576.0, 570.0, 519.0, 579.0, 573.0, 573.0, 582.0, 579.0, 590.0, 579.0, 582.0, 584.0, 582.0, 533.0, 479.0, 579.0, 576.0, 582.0, 584.0, 590.0, 479.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 285.0, 291.0, 288.0, 273.0, 254.0, 292.0, 287.0, 276.0, 300.0, 288.0, 299.0, 296.0, 286.0, 285.0, 294.0, 282.0, 297.0, 291.0, 276.0, 286.0, 296.0, 292.0, 287.0, 284.0, 300.0, 278.0, 298.0, 288.0, 294.0, 295.0, 281.0, 285.0, 294.0, 295.0, 281.0, 283.0, 296.0, 324.0, 315.0, 283.0, 299.0, 288.0, 285.0, 278.0, 298.0, 278.0, 274.0, 289.0, 287.0, 293.0, 286.0, 277.0, 293.0, 292.0, 290.0, 288.0, 291.0, 314.0, 322.0, 286.0, 290.0, 290.0, 294.0, 291.0, 288.0, 295.0, 287.0, 291.0, 288.0, 282.0, 297.0, 289.0, 293.0, 295.0, 284.0, 291.0, 288.0, 286.0, 293.0, 287.0, 294.0, 296.0, 291.0, 281.0, 289.0, 255.0, 267.0, 301.0, 278.0, 261.0, 264.0, 289.0, 287.0, 288.0, 282.0, 261.0, 272.0, 326.0, 304.0, 288.0, 291.0, 297.0, 285.0, 319.0, 311.0, 263.0, 259.0, 287.0, 295.0, 290.0, 289.0, 286.0, 293.0, 314.0, 313.0, 285.0, 285.0, 245.0, 237.0, 289.0, 290.0, 282.0, 294.0, 285.0, 288.0, 34.0, 35.0, 289.0, 284.0, 286.0, 293.0, 291.0, 291.0, 294.0, 288.0, 281.0, 295.0, 263.0, 270.0, 269.0, 261.0, 282.0, 299.0, 295.0, 278.0, 289.0, 287.0, 296.0, 280.0, 299.0, 277.0, 289.0, 284.0, 298.0, 284.0, 282.0, 294.0, 287.0, 283.0, 262.0, 257.0, 291.0, 288.0, 291.0, 282.0, 285.0, 288.0, 292.0, 290.0, 289.0, 290.0, 293.0, 297.0, 297.0, 282.0, 298.0, 284.0, 290.0, 294.0, 292.0, 290.0, 276.0, 257.0, 229.0, 250.0, 293.0, 286.0, 278.0, 298.0, 293.0, 289.0, 273.0, 311.0, 303.0, 287.0, 241.0, 238.0, 296.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7057810537052588, "mean_inference_ms": 1.2658757102807892, "mean_action_processing_ms": 0.13487831523960633, "mean_env_wait_ms": 0.849539354254739, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6758400, "num_agent_steps_trained": 6758400, "num_env_steps_sampled": 3379200, "num_env_steps_trained": 3379200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3379200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6758400, "timers": {"training_iteration_time_ms": 3595.683, "learn_time_ms": 1098.205, "learn_throughput": 11655.379, "synch_weights_time_ms": 10.673}, "counters": {"num_env_steps_sampled": 3379200, "num_env_steps_trained": 3379200, "num_agent_steps_sampled": 6758400, "num_agent_steps_trained": 6758400}, "done": false, "episodes_total": 8448, "training_iteration": 264, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-37", "timestamp": 1666581457, "time_this_iter_s": 3.6259381771087646, "time_total_s": 1020.0091485977173, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1020.0091485977173, "timesteps_since_restore": 0, "iterations_since_restore": 264, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.48, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 175.54, "shaped_reward_min": 29, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.55, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.83, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.45, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.22, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.41, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.76, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.85, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.4, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.32, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.66, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.23, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.22, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.41, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.22, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.41, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003440435044467449, "policy_loss": 0.003094225190579891, "vf_loss": 7.7306413650512695, "vf_explained_var": 0.5569076538085938, "kl": 0.0026197489351034164, "entropy": 0.8537062406539917, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3392000, "num_env_steps_trained": 3392000, "num_agent_steps_sampled": 6784000, "num_agent_steps_trained": 6784000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 69.0, "episode_reward_mean": 567.54, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 283.77}, "custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 175.54, "shaped_reward_min": 29, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.55, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.83, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.45, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.22, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.41, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.76, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.85, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.4, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.32, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.66, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.23, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.22, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.41, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.22, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.41, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 579.0, 579.0, 582.0, 579.0, 579.0, 579.0, 581.0, 587.0, 570.0, 522.0, 579.0, 525.0, 576.0, 570.0, 533.0, 630.0, 579.0, 582.0, 630.0, 522.0, 582.0, 579.0, 579.0, 627.0, 570.0, 482.0, 579.0, 576.0, 573.0, 69.0, 573.0, 579.0, 582.0, 582.0, 576.0, 533.0, 530.0, 581.0, 573.0, 576.0, 576.0, 576.0, 573.0, 582.0, 576.0, 570.0, 519.0, 579.0, 573.0, 573.0, 582.0, 579.0, 590.0, 579.0, 582.0, 584.0, 582.0, 533.0, 479.0, 579.0, 576.0, 582.0, 584.0, 590.0, 479.0, 587.0, 624.0, 587.0, 579.0, 579.0, 630.0, 570.0, 639.0, 582.0, 579.0, 576.0, 579.0, 576.0, 579.0, 519.0, 579.0, 573.0, 573.0, 573.0, 627.0, 582.0, 522.0, 581.0, 573.0, 576.0, 579.0, 576.0, 524.0, 567.0, 525.0, 582.0, 573.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 295.0, 287.0, 291.0, 288.0, 282.0, 297.0, 289.0, 293.0, 295.0, 284.0, 291.0, 288.0, 286.0, 293.0, 287.0, 294.0, 296.0, 291.0, 281.0, 289.0, 255.0, 267.0, 301.0, 278.0, 261.0, 264.0, 289.0, 287.0, 288.0, 282.0, 261.0, 272.0, 326.0, 304.0, 288.0, 291.0, 297.0, 285.0, 319.0, 311.0, 263.0, 259.0, 287.0, 295.0, 290.0, 289.0, 286.0, 293.0, 314.0, 313.0, 285.0, 285.0, 245.0, 237.0, 289.0, 290.0, 282.0, 294.0, 285.0, 288.0, 34.0, 35.0, 289.0, 284.0, 286.0, 293.0, 291.0, 291.0, 294.0, 288.0, 281.0, 295.0, 263.0, 270.0, 269.0, 261.0, 282.0, 299.0, 295.0, 278.0, 289.0, 287.0, 296.0, 280.0, 299.0, 277.0, 289.0, 284.0, 298.0, 284.0, 282.0, 294.0, 287.0, 283.0, 262.0, 257.0, 291.0, 288.0, 291.0, 282.0, 285.0, 288.0, 292.0, 290.0, 289.0, 290.0, 293.0, 297.0, 297.0, 282.0, 298.0, 284.0, 290.0, 294.0, 292.0, 290.0, 276.0, 257.0, 229.0, 250.0, 293.0, 286.0, 278.0, 298.0, 293.0, 289.0, 273.0, 311.0, 303.0, 287.0, 241.0, 238.0, 296.0, 291.0, 316.0, 308.0, 296.0, 291.0, 292.0, 287.0, 293.0, 286.0, 323.0, 307.0, 284.0, 286.0, 321.0, 318.0, 292.0, 290.0, 287.0, 292.0, 293.0, 283.0, 288.0, 291.0, 289.0, 287.0, 295.0, 284.0, 262.0, 257.0, 300.0, 279.0, 285.0, 288.0, 294.0, 279.0, 286.0, 287.0, 323.0, 304.0, 290.0, 292.0, 257.0, 265.0, 288.0, 293.0, 288.0, 285.0, 285.0, 291.0, 289.0, 290.0, 284.0, 292.0, 261.0, 263.0, 286.0, 281.0, 260.0, 265.0, 297.0, 285.0, 282.0, 291.0, 299.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7056736532332322, "mean_inference_ms": 1.265572151465473, "mean_action_processing_ms": 0.13486356298137628, "mean_env_wait_ms": 0.849365499448659, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 69.0, "episode_reward_mean": 567.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 283.77}, "hist_stats": {"episode_reward": [579.0, 582.0, 579.0, 579.0, 582.0, 579.0, 579.0, 579.0, 581.0, 587.0, 570.0, 522.0, 579.0, 525.0, 576.0, 570.0, 533.0, 630.0, 579.0, 582.0, 630.0, 522.0, 582.0, 579.0, 579.0, 627.0, 570.0, 482.0, 579.0, 576.0, 573.0, 69.0, 573.0, 579.0, 582.0, 582.0, 576.0, 533.0, 530.0, 581.0, 573.0, 576.0, 576.0, 576.0, 573.0, 582.0, 576.0, 570.0, 519.0, 579.0, 573.0, 573.0, 582.0, 579.0, 590.0, 579.0, 582.0, 584.0, 582.0, 533.0, 479.0, 579.0, 576.0, 582.0, 584.0, 590.0, 479.0, 587.0, 624.0, 587.0, 579.0, 579.0, 630.0, 570.0, 639.0, 582.0, 579.0, 576.0, 579.0, 576.0, 579.0, 519.0, 579.0, 573.0, 573.0, 573.0, 627.0, 582.0, 522.0, 581.0, 573.0, 576.0, 579.0, 576.0, 524.0, 567.0, 525.0, 582.0, 573.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 295.0, 287.0, 291.0, 288.0, 282.0, 297.0, 289.0, 293.0, 295.0, 284.0, 291.0, 288.0, 286.0, 293.0, 287.0, 294.0, 296.0, 291.0, 281.0, 289.0, 255.0, 267.0, 301.0, 278.0, 261.0, 264.0, 289.0, 287.0, 288.0, 282.0, 261.0, 272.0, 326.0, 304.0, 288.0, 291.0, 297.0, 285.0, 319.0, 311.0, 263.0, 259.0, 287.0, 295.0, 290.0, 289.0, 286.0, 293.0, 314.0, 313.0, 285.0, 285.0, 245.0, 237.0, 289.0, 290.0, 282.0, 294.0, 285.0, 288.0, 34.0, 35.0, 289.0, 284.0, 286.0, 293.0, 291.0, 291.0, 294.0, 288.0, 281.0, 295.0, 263.0, 270.0, 269.0, 261.0, 282.0, 299.0, 295.0, 278.0, 289.0, 287.0, 296.0, 280.0, 299.0, 277.0, 289.0, 284.0, 298.0, 284.0, 282.0, 294.0, 287.0, 283.0, 262.0, 257.0, 291.0, 288.0, 291.0, 282.0, 285.0, 288.0, 292.0, 290.0, 289.0, 290.0, 293.0, 297.0, 297.0, 282.0, 298.0, 284.0, 290.0, 294.0, 292.0, 290.0, 276.0, 257.0, 229.0, 250.0, 293.0, 286.0, 278.0, 298.0, 293.0, 289.0, 273.0, 311.0, 303.0, 287.0, 241.0, 238.0, 296.0, 291.0, 316.0, 308.0, 296.0, 291.0, 292.0, 287.0, 293.0, 286.0, 323.0, 307.0, 284.0, 286.0, 321.0, 318.0, 292.0, 290.0, 287.0, 292.0, 293.0, 283.0, 288.0, 291.0, 289.0, 287.0, 295.0, 284.0, 262.0, 257.0, 300.0, 279.0, 285.0, 288.0, 294.0, 279.0, 286.0, 287.0, 323.0, 304.0, 290.0, 292.0, 257.0, 265.0, 288.0, 293.0, 288.0, 285.0, 285.0, 291.0, 289.0, 290.0, 284.0, 292.0, 261.0, 263.0, 286.0, 281.0, 260.0, 265.0, 297.0, 285.0, 282.0, 291.0, 299.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7056736532332322, "mean_inference_ms": 1.265572151465473, "mean_action_processing_ms": 0.13486356298137628, "mean_env_wait_ms": 0.849365499448659, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6784000, "num_agent_steps_trained": 6784000, "num_env_steps_sampled": 3392000, "num_env_steps_trained": 3392000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3392000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6784000, "timers": {"training_iteration_time_ms": 3588.842, "learn_time_ms": 1087.972, "learn_throughput": 11765.014, "synch_weights_time_ms": 11.318}, "counters": {"num_env_steps_sampled": 3392000, "num_env_steps_trained": 3392000, "num_agent_steps_sampled": 6784000, "num_agent_steps_trained": 6784000}, "done": false, "episodes_total": 8480, "training_iteration": 265, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-41", "timestamp": 1666581461, "time_this_iter_s": 3.5863428115844727, "time_total_s": 1023.5954914093018, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1023.5954914093018, "timesteps_since_restore": 0, "iterations_since_restore": 265, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.649999999999995, "ram_util_percent": 10.616666666666665}}
+{"custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.71, "shaped_reward_min": 132, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.77, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.71, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.62, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.41, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.24, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.8, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.42, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.39, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.3, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.41, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.24, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.41, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.24, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019726380705833435, "policy_loss": 0.0016227063024416566, "vf_loss": 7.775620937347412, "vf_explained_var": 0.5693689584732056, "kl": 0.0020303893834352493, "entropy": 0.8552579879760742, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3404800, "num_env_steps_trained": 3404800, "num_agent_steps_sampled": 6809600, "num_agent_steps_trained": 6809600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 452.0, "episode_reward_mean": 568.51, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 224.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 284.255}, "custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.71, "shaped_reward_min": 132, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.77, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.71, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.62, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.41, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.24, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.8, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.42, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.39, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.3, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.41, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.24, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.41, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.24, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 579.0, 582.0, 582.0, 576.0, 533.0, 530.0, 581.0, 573.0, 576.0, 576.0, 576.0, 573.0, 582.0, 576.0, 570.0, 519.0, 579.0, 573.0, 573.0, 582.0, 579.0, 590.0, 579.0, 582.0, 584.0, 582.0, 533.0, 479.0, 579.0, 576.0, 582.0, 584.0, 590.0, 479.0, 587.0, 624.0, 587.0, 579.0, 579.0, 630.0, 570.0, 639.0, 582.0, 579.0, 576.0, 579.0, 576.0, 579.0, 519.0, 579.0, 573.0, 573.0, 573.0, 627.0, 582.0, 522.0, 581.0, 573.0, 576.0, 579.0, 576.0, 524.0, 567.0, 525.0, 582.0, 573.0, 582.0, 519.0, 576.0, 579.0, 452.0, 582.0, 570.0, 570.0, 525.0, 582.0, 573.0, 576.0, 527.0, 567.0, 579.0, 582.0, 582.0, 533.0, 576.0, 522.0, 576.0, 576.0, 516.0, 584.0, 582.0, 582.0, 576.0, 587.0, 579.0, 522.0, 584.0, 582.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 284.0, 286.0, 293.0, 291.0, 291.0, 294.0, 288.0, 281.0, 295.0, 263.0, 270.0, 269.0, 261.0, 282.0, 299.0, 295.0, 278.0, 289.0, 287.0, 296.0, 280.0, 299.0, 277.0, 289.0, 284.0, 298.0, 284.0, 282.0, 294.0, 287.0, 283.0, 262.0, 257.0, 291.0, 288.0, 291.0, 282.0, 285.0, 288.0, 292.0, 290.0, 289.0, 290.0, 293.0, 297.0, 297.0, 282.0, 298.0, 284.0, 290.0, 294.0, 292.0, 290.0, 276.0, 257.0, 229.0, 250.0, 293.0, 286.0, 278.0, 298.0, 293.0, 289.0, 273.0, 311.0, 303.0, 287.0, 241.0, 238.0, 296.0, 291.0, 316.0, 308.0, 296.0, 291.0, 292.0, 287.0, 293.0, 286.0, 323.0, 307.0, 284.0, 286.0, 321.0, 318.0, 292.0, 290.0, 287.0, 292.0, 293.0, 283.0, 288.0, 291.0, 289.0, 287.0, 295.0, 284.0, 262.0, 257.0, 300.0, 279.0, 285.0, 288.0, 294.0, 279.0, 286.0, 287.0, 323.0, 304.0, 290.0, 292.0, 257.0, 265.0, 288.0, 293.0, 288.0, 285.0, 285.0, 291.0, 289.0, 290.0, 284.0, 292.0, 261.0, 263.0, 286.0, 281.0, 260.0, 265.0, 297.0, 285.0, 282.0, 291.0, 299.0, 283.0, 258.0, 261.0, 290.0, 286.0, 298.0, 281.0, 224.0, 228.0, 296.0, 286.0, 290.0, 280.0, 289.0, 281.0, 267.0, 258.0, 293.0, 289.0, 292.0, 281.0, 293.0, 283.0, 269.0, 258.0, 282.0, 285.0, 296.0, 283.0, 292.0, 290.0, 298.0, 284.0, 260.0, 273.0, 290.0, 286.0, 261.0, 261.0, 291.0, 285.0, 288.0, 288.0, 264.0, 252.0, 291.0, 293.0, 292.0, 290.0, 285.0, 297.0, 275.0, 301.0, 297.0, 290.0, 288.0, 291.0, 258.0, 264.0, 287.0, 297.0, 283.0, 299.0, 259.0, 260.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7055510816247841, "mean_inference_ms": 1.2652897116048947, "mean_action_processing_ms": 0.13484551900006134, "mean_env_wait_ms": 0.8493587874707947, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 452.0, "episode_reward_mean": 568.51, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 224.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 284.255}, "hist_stats": {"episode_reward": [573.0, 579.0, 582.0, 582.0, 576.0, 533.0, 530.0, 581.0, 573.0, 576.0, 576.0, 576.0, 573.0, 582.0, 576.0, 570.0, 519.0, 579.0, 573.0, 573.0, 582.0, 579.0, 590.0, 579.0, 582.0, 584.0, 582.0, 533.0, 479.0, 579.0, 576.0, 582.0, 584.0, 590.0, 479.0, 587.0, 624.0, 587.0, 579.0, 579.0, 630.0, 570.0, 639.0, 582.0, 579.0, 576.0, 579.0, 576.0, 579.0, 519.0, 579.0, 573.0, 573.0, 573.0, 627.0, 582.0, 522.0, 581.0, 573.0, 576.0, 579.0, 576.0, 524.0, 567.0, 525.0, 582.0, 573.0, 582.0, 519.0, 576.0, 579.0, 452.0, 582.0, 570.0, 570.0, 525.0, 582.0, 573.0, 576.0, 527.0, 567.0, 579.0, 582.0, 582.0, 533.0, 576.0, 522.0, 576.0, 576.0, 516.0, 584.0, 582.0, 582.0, 576.0, 587.0, 579.0, 522.0, 584.0, 582.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 284.0, 286.0, 293.0, 291.0, 291.0, 294.0, 288.0, 281.0, 295.0, 263.0, 270.0, 269.0, 261.0, 282.0, 299.0, 295.0, 278.0, 289.0, 287.0, 296.0, 280.0, 299.0, 277.0, 289.0, 284.0, 298.0, 284.0, 282.0, 294.0, 287.0, 283.0, 262.0, 257.0, 291.0, 288.0, 291.0, 282.0, 285.0, 288.0, 292.0, 290.0, 289.0, 290.0, 293.0, 297.0, 297.0, 282.0, 298.0, 284.0, 290.0, 294.0, 292.0, 290.0, 276.0, 257.0, 229.0, 250.0, 293.0, 286.0, 278.0, 298.0, 293.0, 289.0, 273.0, 311.0, 303.0, 287.0, 241.0, 238.0, 296.0, 291.0, 316.0, 308.0, 296.0, 291.0, 292.0, 287.0, 293.0, 286.0, 323.0, 307.0, 284.0, 286.0, 321.0, 318.0, 292.0, 290.0, 287.0, 292.0, 293.0, 283.0, 288.0, 291.0, 289.0, 287.0, 295.0, 284.0, 262.0, 257.0, 300.0, 279.0, 285.0, 288.0, 294.0, 279.0, 286.0, 287.0, 323.0, 304.0, 290.0, 292.0, 257.0, 265.0, 288.0, 293.0, 288.0, 285.0, 285.0, 291.0, 289.0, 290.0, 284.0, 292.0, 261.0, 263.0, 286.0, 281.0, 260.0, 265.0, 297.0, 285.0, 282.0, 291.0, 299.0, 283.0, 258.0, 261.0, 290.0, 286.0, 298.0, 281.0, 224.0, 228.0, 296.0, 286.0, 290.0, 280.0, 289.0, 281.0, 267.0, 258.0, 293.0, 289.0, 292.0, 281.0, 293.0, 283.0, 269.0, 258.0, 282.0, 285.0, 296.0, 283.0, 292.0, 290.0, 298.0, 284.0, 260.0, 273.0, 290.0, 286.0, 261.0, 261.0, 291.0, 285.0, 288.0, 288.0, 264.0, 252.0, 291.0, 293.0, 292.0, 290.0, 285.0, 297.0, 275.0, 301.0, 297.0, 290.0, 288.0, 291.0, 258.0, 264.0, 287.0, 297.0, 283.0, 299.0, 259.0, 260.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7055510816247841, "mean_inference_ms": 1.2652897116048947, "mean_action_processing_ms": 0.13484551900006134, "mean_env_wait_ms": 0.8493587874707947, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6809600, "num_agent_steps_trained": 6809600, "num_env_steps_sampled": 3404800, "num_env_steps_trained": 3404800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3404800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6809600, "timers": {"training_iteration_time_ms": 3623.845, "learn_time_ms": 1094.074, "learn_throughput": 11699.389, "synch_weights_time_ms": 12.251}, "counters": {"num_env_steps_sampled": 3404800, "num_env_steps_trained": 3404800, "num_agent_steps_sampled": 6809600, "num_agent_steps_trained": 6809600}, "done": false, "episodes_total": 8512, "training_iteration": 266, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-45", "timestamp": 1666581465, "time_this_iter_s": 3.9619455337524414, "time_total_s": 1027.5574369430542, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1027.5574369430542, "timesteps_since_restore": 0, "iterations_since_restore": 266, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.383333333333336, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.01, "shaped_reward_min": 132, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.04, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.57, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.88, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.33, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.65, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.11, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.96, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.67, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.49, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.51, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.44, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.65, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.11, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.65, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.11, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0002950136549770832, "policy_loss": -0.0006376116070896387, "vf_loss": 7.6767144203186035, "vf_explained_var": 0.6027138829231262, "kl": 0.0018985953647643328, "entropy": 0.850143551826477, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3417600, "num_env_steps_trained": 3417600, "num_agent_steps_sampled": 6835200, "num_agent_steps_trained": 6835200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 452.0, "episode_reward_mean": 571.61, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 224.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 285.805}, "custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.01, "shaped_reward_min": 132, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.04, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.57, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.88, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.33, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.65, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.11, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.96, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.67, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.49, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.51, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.44, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.65, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.11, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.65, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.11, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 590.0, 479.0, 587.0, 624.0, 587.0, 579.0, 579.0, 630.0, 570.0, 639.0, 582.0, 579.0, 576.0, 579.0, 576.0, 579.0, 519.0, 579.0, 573.0, 573.0, 573.0, 627.0, 582.0, 522.0, 581.0, 573.0, 576.0, 579.0, 576.0, 524.0, 567.0, 525.0, 582.0, 573.0, 582.0, 519.0, 576.0, 579.0, 452.0, 582.0, 570.0, 570.0, 525.0, 582.0, 573.0, 576.0, 527.0, 567.0, 579.0, 582.0, 582.0, 533.0, 576.0, 522.0, 576.0, 576.0, 516.0, 584.0, 582.0, 582.0, 576.0, 587.0, 579.0, 522.0, 584.0, 582.0, 519.0, 582.0, 584.0, 576.0, 582.0, 581.0, 579.0, 582.0, 576.0, 579.0, 576.0, 579.0, 633.0, 573.0, 576.0, 570.0, 576.0, 576.0, 519.0, 624.0, 576.0, 581.0, 576.0, 579.0, 570.0, 576.0, 582.0, 576.0, 576.0, 573.0, 576.0, 582.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [273.0, 311.0, 303.0, 287.0, 241.0, 238.0, 296.0, 291.0, 316.0, 308.0, 296.0, 291.0, 292.0, 287.0, 293.0, 286.0, 323.0, 307.0, 284.0, 286.0, 321.0, 318.0, 292.0, 290.0, 287.0, 292.0, 293.0, 283.0, 288.0, 291.0, 289.0, 287.0, 295.0, 284.0, 262.0, 257.0, 300.0, 279.0, 285.0, 288.0, 294.0, 279.0, 286.0, 287.0, 323.0, 304.0, 290.0, 292.0, 257.0, 265.0, 288.0, 293.0, 288.0, 285.0, 285.0, 291.0, 289.0, 290.0, 284.0, 292.0, 261.0, 263.0, 286.0, 281.0, 260.0, 265.0, 297.0, 285.0, 282.0, 291.0, 299.0, 283.0, 258.0, 261.0, 290.0, 286.0, 298.0, 281.0, 224.0, 228.0, 296.0, 286.0, 290.0, 280.0, 289.0, 281.0, 267.0, 258.0, 293.0, 289.0, 292.0, 281.0, 293.0, 283.0, 269.0, 258.0, 282.0, 285.0, 296.0, 283.0, 292.0, 290.0, 298.0, 284.0, 260.0, 273.0, 290.0, 286.0, 261.0, 261.0, 291.0, 285.0, 288.0, 288.0, 264.0, 252.0, 291.0, 293.0, 292.0, 290.0, 285.0, 297.0, 275.0, 301.0, 297.0, 290.0, 288.0, 291.0, 258.0, 264.0, 287.0, 297.0, 283.0, 299.0, 259.0, 260.0, 296.0, 286.0, 298.0, 286.0, 293.0, 283.0, 296.0, 286.0, 298.0, 283.0, 283.0, 296.0, 288.0, 294.0, 291.0, 285.0, 291.0, 288.0, 292.0, 284.0, 297.0, 282.0, 319.0, 314.0, 290.0, 283.0, 279.0, 297.0, 297.0, 273.0, 291.0, 285.0, 285.0, 291.0, 250.0, 269.0, 303.0, 321.0, 282.0, 294.0, 290.0, 291.0, 295.0, 281.0, 288.0, 291.0, 284.0, 286.0, 281.0, 295.0, 287.0, 295.0, 286.0, 290.0, 287.0, 289.0, 284.0, 289.0, 291.0, 285.0, 286.0, 296.0, 284.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7054255631638321, "mean_inference_ms": 1.2650641076249054, "mean_action_processing_ms": 0.1348278615851263, "mean_env_wait_ms": 0.849377041592129, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 452.0, "episode_reward_mean": 571.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 224.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 285.805}, "hist_stats": {"episode_reward": [584.0, 590.0, 479.0, 587.0, 624.0, 587.0, 579.0, 579.0, 630.0, 570.0, 639.0, 582.0, 579.0, 576.0, 579.0, 576.0, 579.0, 519.0, 579.0, 573.0, 573.0, 573.0, 627.0, 582.0, 522.0, 581.0, 573.0, 576.0, 579.0, 576.0, 524.0, 567.0, 525.0, 582.0, 573.0, 582.0, 519.0, 576.0, 579.0, 452.0, 582.0, 570.0, 570.0, 525.0, 582.0, 573.0, 576.0, 527.0, 567.0, 579.0, 582.0, 582.0, 533.0, 576.0, 522.0, 576.0, 576.0, 516.0, 584.0, 582.0, 582.0, 576.0, 587.0, 579.0, 522.0, 584.0, 582.0, 519.0, 582.0, 584.0, 576.0, 582.0, 581.0, 579.0, 582.0, 576.0, 579.0, 576.0, 579.0, 633.0, 573.0, 576.0, 570.0, 576.0, 576.0, 519.0, 624.0, 576.0, 581.0, 576.0, 579.0, 570.0, 576.0, 582.0, 576.0, 576.0, 573.0, 576.0, 582.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [273.0, 311.0, 303.0, 287.0, 241.0, 238.0, 296.0, 291.0, 316.0, 308.0, 296.0, 291.0, 292.0, 287.0, 293.0, 286.0, 323.0, 307.0, 284.0, 286.0, 321.0, 318.0, 292.0, 290.0, 287.0, 292.0, 293.0, 283.0, 288.0, 291.0, 289.0, 287.0, 295.0, 284.0, 262.0, 257.0, 300.0, 279.0, 285.0, 288.0, 294.0, 279.0, 286.0, 287.0, 323.0, 304.0, 290.0, 292.0, 257.0, 265.0, 288.0, 293.0, 288.0, 285.0, 285.0, 291.0, 289.0, 290.0, 284.0, 292.0, 261.0, 263.0, 286.0, 281.0, 260.0, 265.0, 297.0, 285.0, 282.0, 291.0, 299.0, 283.0, 258.0, 261.0, 290.0, 286.0, 298.0, 281.0, 224.0, 228.0, 296.0, 286.0, 290.0, 280.0, 289.0, 281.0, 267.0, 258.0, 293.0, 289.0, 292.0, 281.0, 293.0, 283.0, 269.0, 258.0, 282.0, 285.0, 296.0, 283.0, 292.0, 290.0, 298.0, 284.0, 260.0, 273.0, 290.0, 286.0, 261.0, 261.0, 291.0, 285.0, 288.0, 288.0, 264.0, 252.0, 291.0, 293.0, 292.0, 290.0, 285.0, 297.0, 275.0, 301.0, 297.0, 290.0, 288.0, 291.0, 258.0, 264.0, 287.0, 297.0, 283.0, 299.0, 259.0, 260.0, 296.0, 286.0, 298.0, 286.0, 293.0, 283.0, 296.0, 286.0, 298.0, 283.0, 283.0, 296.0, 288.0, 294.0, 291.0, 285.0, 291.0, 288.0, 292.0, 284.0, 297.0, 282.0, 319.0, 314.0, 290.0, 283.0, 279.0, 297.0, 297.0, 273.0, 291.0, 285.0, 285.0, 291.0, 250.0, 269.0, 303.0, 321.0, 282.0, 294.0, 290.0, 291.0, 295.0, 281.0, 288.0, 291.0, 284.0, 286.0, 281.0, 295.0, 287.0, 295.0, 286.0, 290.0, 287.0, 289.0, 284.0, 289.0, 291.0, 285.0, 286.0, 296.0, 284.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7054255631638321, "mean_inference_ms": 1.2650641076249054, "mean_action_processing_ms": 0.1348278615851263, "mean_env_wait_ms": 0.849377041592129, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6835200, "num_agent_steps_trained": 6835200, "num_env_steps_sampled": 3417600, "num_env_steps_trained": 3417600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3417600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6835200, "timers": {"training_iteration_time_ms": 3637.345, "learn_time_ms": 1097.041, "learn_throughput": 11667.754, "synch_weights_time_ms": 11.519}, "counters": {"num_env_steps_sampled": 3417600, "num_env_steps_trained": 3417600, "num_agent_steps_sampled": 6835200, "num_agent_steps_trained": 6835200}, "done": false, "episodes_total": 8544, "training_iteration": 267, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-49", "timestamp": 1666581469, "time_this_iter_s": 3.773404359817505, "time_total_s": 1031.3308413028717, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1031.3308413028717, "timesteps_since_restore": 0, "iterations_since_restore": 267, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.880000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 175.02, "shaped_reward_min": 119, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.93, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.44, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.79, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.2, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.59, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.03, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.89, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.49, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.46, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.59, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.03, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.59, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.03, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004363615065813065, "policy_loss": 0.00400199368596077, "vf_loss": 7.881130218505859, "vf_explained_var": 0.5937398672103882, "kl": 0.002955435309559107, "entropy": 0.8529794812202454, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3430400, "num_env_steps_trained": 3430400, "num_agent_steps_sampled": 6860800, "num_agent_steps_trained": 6860800}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 359.0, "episode_reward_mean": 568.62, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.31}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 175.02, "shaped_reward_min": 119, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.93, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.44, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.79, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.2, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.59, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.03, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.89, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.49, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.46, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.59, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.03, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.59, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.03, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 582.0, 573.0, 582.0, 519.0, 576.0, 579.0, 452.0, 582.0, 570.0, 570.0, 525.0, 582.0, 573.0, 576.0, 527.0, 567.0, 579.0, 582.0, 582.0, 533.0, 576.0, 522.0, 576.0, 576.0, 516.0, 584.0, 582.0, 582.0, 576.0, 587.0, 579.0, 522.0, 584.0, 582.0, 519.0, 582.0, 584.0, 576.0, 582.0, 581.0, 579.0, 582.0, 576.0, 579.0, 576.0, 579.0, 633.0, 573.0, 576.0, 570.0, 576.0, 576.0, 519.0, 624.0, 576.0, 581.0, 576.0, 579.0, 570.0, 576.0, 582.0, 576.0, 576.0, 573.0, 576.0, 582.0, 573.0, 573.0, 576.0, 576.0, 579.0, 584.0, 582.0, 576.0, 573.0, 570.0, 359.0, 573.0, 579.0, 582.0, 525.0, 576.0, 570.0, 582.0, 630.0, 579.0, 579.0, 576.0, 576.0, 630.0, 579.0, 576.0, 573.0, 584.0, 579.0, 579.0, 582.0, 579.0, 408.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 265.0, 297.0, 285.0, 282.0, 291.0, 299.0, 283.0, 258.0, 261.0, 290.0, 286.0, 298.0, 281.0, 224.0, 228.0, 296.0, 286.0, 290.0, 280.0, 289.0, 281.0, 267.0, 258.0, 293.0, 289.0, 292.0, 281.0, 293.0, 283.0, 269.0, 258.0, 282.0, 285.0, 296.0, 283.0, 292.0, 290.0, 298.0, 284.0, 260.0, 273.0, 290.0, 286.0, 261.0, 261.0, 291.0, 285.0, 288.0, 288.0, 264.0, 252.0, 291.0, 293.0, 292.0, 290.0, 285.0, 297.0, 275.0, 301.0, 297.0, 290.0, 288.0, 291.0, 258.0, 264.0, 287.0, 297.0, 283.0, 299.0, 259.0, 260.0, 296.0, 286.0, 298.0, 286.0, 293.0, 283.0, 296.0, 286.0, 298.0, 283.0, 283.0, 296.0, 288.0, 294.0, 291.0, 285.0, 291.0, 288.0, 292.0, 284.0, 297.0, 282.0, 319.0, 314.0, 290.0, 283.0, 279.0, 297.0, 297.0, 273.0, 291.0, 285.0, 285.0, 291.0, 250.0, 269.0, 303.0, 321.0, 282.0, 294.0, 290.0, 291.0, 295.0, 281.0, 288.0, 291.0, 284.0, 286.0, 281.0, 295.0, 287.0, 295.0, 286.0, 290.0, 287.0, 289.0, 284.0, 289.0, 291.0, 285.0, 286.0, 296.0, 284.0, 289.0, 288.0, 285.0, 289.0, 287.0, 296.0, 280.0, 291.0, 288.0, 299.0, 285.0, 298.0, 284.0, 289.0, 287.0, 286.0, 287.0, 291.0, 279.0, 175.0, 184.0, 290.0, 283.0, 285.0, 294.0, 275.0, 307.0, 268.0, 257.0, 290.0, 286.0, 284.0, 286.0, 288.0, 294.0, 306.0, 324.0, 290.0, 289.0, 290.0, 289.0, 287.0, 289.0, 282.0, 294.0, 317.0, 313.0, 291.0, 288.0, 290.0, 286.0, 281.0, 292.0, 296.0, 288.0, 290.0, 289.0, 294.0, 285.0, 291.0, 291.0, 287.0, 292.0, 199.0, 209.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7053109366828282, "mean_inference_ms": 1.2648639446405867, "mean_action_processing_ms": 0.13481287585190244, "mean_env_wait_ms": 0.8494139376583145, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 359.0, "episode_reward_mean": 568.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.31}, "hist_stats": {"episode_reward": [525.0, 582.0, 573.0, 582.0, 519.0, 576.0, 579.0, 452.0, 582.0, 570.0, 570.0, 525.0, 582.0, 573.0, 576.0, 527.0, 567.0, 579.0, 582.0, 582.0, 533.0, 576.0, 522.0, 576.0, 576.0, 516.0, 584.0, 582.0, 582.0, 576.0, 587.0, 579.0, 522.0, 584.0, 582.0, 519.0, 582.0, 584.0, 576.0, 582.0, 581.0, 579.0, 582.0, 576.0, 579.0, 576.0, 579.0, 633.0, 573.0, 576.0, 570.0, 576.0, 576.0, 519.0, 624.0, 576.0, 581.0, 576.0, 579.0, 570.0, 576.0, 582.0, 576.0, 576.0, 573.0, 576.0, 582.0, 573.0, 573.0, 576.0, 576.0, 579.0, 584.0, 582.0, 576.0, 573.0, 570.0, 359.0, 573.0, 579.0, 582.0, 525.0, 576.0, 570.0, 582.0, 630.0, 579.0, 579.0, 576.0, 576.0, 630.0, 579.0, 576.0, 573.0, 584.0, 579.0, 579.0, 582.0, 579.0, 408.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 265.0, 297.0, 285.0, 282.0, 291.0, 299.0, 283.0, 258.0, 261.0, 290.0, 286.0, 298.0, 281.0, 224.0, 228.0, 296.0, 286.0, 290.0, 280.0, 289.0, 281.0, 267.0, 258.0, 293.0, 289.0, 292.0, 281.0, 293.0, 283.0, 269.0, 258.0, 282.0, 285.0, 296.0, 283.0, 292.0, 290.0, 298.0, 284.0, 260.0, 273.0, 290.0, 286.0, 261.0, 261.0, 291.0, 285.0, 288.0, 288.0, 264.0, 252.0, 291.0, 293.0, 292.0, 290.0, 285.0, 297.0, 275.0, 301.0, 297.0, 290.0, 288.0, 291.0, 258.0, 264.0, 287.0, 297.0, 283.0, 299.0, 259.0, 260.0, 296.0, 286.0, 298.0, 286.0, 293.0, 283.0, 296.0, 286.0, 298.0, 283.0, 283.0, 296.0, 288.0, 294.0, 291.0, 285.0, 291.0, 288.0, 292.0, 284.0, 297.0, 282.0, 319.0, 314.0, 290.0, 283.0, 279.0, 297.0, 297.0, 273.0, 291.0, 285.0, 285.0, 291.0, 250.0, 269.0, 303.0, 321.0, 282.0, 294.0, 290.0, 291.0, 295.0, 281.0, 288.0, 291.0, 284.0, 286.0, 281.0, 295.0, 287.0, 295.0, 286.0, 290.0, 287.0, 289.0, 284.0, 289.0, 291.0, 285.0, 286.0, 296.0, 284.0, 289.0, 288.0, 285.0, 289.0, 287.0, 296.0, 280.0, 291.0, 288.0, 299.0, 285.0, 298.0, 284.0, 289.0, 287.0, 286.0, 287.0, 291.0, 279.0, 175.0, 184.0, 290.0, 283.0, 285.0, 294.0, 275.0, 307.0, 268.0, 257.0, 290.0, 286.0, 284.0, 286.0, 288.0, 294.0, 306.0, 324.0, 290.0, 289.0, 290.0, 289.0, 287.0, 289.0, 282.0, 294.0, 317.0, 313.0, 291.0, 288.0, 290.0, 286.0, 281.0, 292.0, 296.0, 288.0, 290.0, 289.0, 294.0, 285.0, 291.0, 291.0, 287.0, 292.0, 199.0, 209.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7053109366828282, "mean_inference_ms": 1.2648639446405867, "mean_action_processing_ms": 0.13481287585190244, "mean_env_wait_ms": 0.8494139376583145, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6860800, "num_agent_steps_trained": 6860800, "num_env_steps_sampled": 3430400, "num_env_steps_trained": 3430400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3430400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6860800, "timers": {"training_iteration_time_ms": 3657.302, "learn_time_ms": 1113.485, "learn_throughput": 11495.444, "synch_weights_time_ms": 12.352}, "counters": {"num_env_steps_sampled": 3430400, "num_env_steps_trained": 3430400, "num_agent_steps_sampled": 6860800, "num_agent_steps_trained": 6860800}, "done": false, "episodes_total": 8576, "training_iteration": 268, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-53", "timestamp": 1666581473, "time_this_iter_s": 3.783074140548706, "time_total_s": 1035.1139154434204, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1035.1139154434204, "timesteps_since_restore": 0, "iterations_since_restore": 268, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.033333333333335, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 175.46, "shaped_reward_min": 119, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.3, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.14, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.13, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.93, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.76, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.62, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.24, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.2, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.76, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.76, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0009098669979721308, "policy_loss": 0.000542132300324738, "vf_loss": 7.941966533660889, "vf_explained_var": 0.5730843544006348, "kl": 0.0019231629557907581, "entropy": 0.8529212474822998, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3443200, "num_env_steps_trained": 3443200, "num_agent_steps_sampled": 6886400, "num_agent_steps_trained": 6886400}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 359.0, "episode_reward_mean": 571.86, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.93}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 175.46, "shaped_reward_min": 119, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.3, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.14, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.13, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.93, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.76, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.62, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.24, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.2, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.76, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.76, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 584.0, 582.0, 519.0, 582.0, 584.0, 576.0, 582.0, 581.0, 579.0, 582.0, 576.0, 579.0, 576.0, 579.0, 633.0, 573.0, 576.0, 570.0, 576.0, 576.0, 519.0, 624.0, 576.0, 581.0, 576.0, 579.0, 570.0, 576.0, 582.0, 576.0, 576.0, 573.0, 576.0, 582.0, 573.0, 573.0, 576.0, 576.0, 579.0, 584.0, 582.0, 576.0, 573.0, 570.0, 359.0, 573.0, 579.0, 582.0, 525.0, 576.0, 570.0, 582.0, 630.0, 579.0, 579.0, 576.0, 576.0, 630.0, 579.0, 576.0, 573.0, 584.0, 579.0, 579.0, 582.0, 579.0, 408.0, 576.0, 576.0, 630.0, 573.0, 468.0, 587.0, 573.0, 573.0, 570.0, 570.0, 584.0, 582.0, 573.0, 579.0, 630.0, 573.0, 633.0, 570.0, 570.0, 582.0, 576.0, 579.0, 576.0, 579.0, 576.0, 576.0, 579.0, 582.0, 522.0, 510.0, 570.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [258.0, 264.0, 287.0, 297.0, 283.0, 299.0, 259.0, 260.0, 296.0, 286.0, 298.0, 286.0, 293.0, 283.0, 296.0, 286.0, 298.0, 283.0, 283.0, 296.0, 288.0, 294.0, 291.0, 285.0, 291.0, 288.0, 292.0, 284.0, 297.0, 282.0, 319.0, 314.0, 290.0, 283.0, 279.0, 297.0, 297.0, 273.0, 291.0, 285.0, 285.0, 291.0, 250.0, 269.0, 303.0, 321.0, 282.0, 294.0, 290.0, 291.0, 295.0, 281.0, 288.0, 291.0, 284.0, 286.0, 281.0, 295.0, 287.0, 295.0, 286.0, 290.0, 287.0, 289.0, 284.0, 289.0, 291.0, 285.0, 286.0, 296.0, 284.0, 289.0, 288.0, 285.0, 289.0, 287.0, 296.0, 280.0, 291.0, 288.0, 299.0, 285.0, 298.0, 284.0, 289.0, 287.0, 286.0, 287.0, 291.0, 279.0, 175.0, 184.0, 290.0, 283.0, 285.0, 294.0, 275.0, 307.0, 268.0, 257.0, 290.0, 286.0, 284.0, 286.0, 288.0, 294.0, 306.0, 324.0, 290.0, 289.0, 290.0, 289.0, 287.0, 289.0, 282.0, 294.0, 317.0, 313.0, 291.0, 288.0, 290.0, 286.0, 281.0, 292.0, 296.0, 288.0, 290.0, 289.0, 294.0, 285.0, 291.0, 291.0, 287.0, 292.0, 199.0, 209.0, 284.0, 292.0, 288.0, 288.0, 311.0, 319.0, 284.0, 289.0, 219.0, 249.0, 291.0, 296.0, 283.0, 290.0, 289.0, 284.0, 279.0, 291.0, 289.0, 281.0, 291.0, 293.0, 294.0, 288.0, 287.0, 286.0, 288.0, 291.0, 316.0, 314.0, 292.0, 281.0, 310.0, 323.0, 278.0, 292.0, 278.0, 292.0, 295.0, 287.0, 287.0, 289.0, 285.0, 294.0, 289.0, 287.0, 283.0, 296.0, 281.0, 295.0, 293.0, 283.0, 284.0, 295.0, 294.0, 288.0, 260.0, 262.0, 249.0, 261.0, 287.0, 283.0, 252.0, 267.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7051935011860442, "mean_inference_ms": 1.264642375620096, "mean_action_processing_ms": 0.13480085895955796, "mean_env_wait_ms": 0.8493182842486506, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 359.0, "episode_reward_mean": 571.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.93}, "hist_stats": {"episode_reward": [522.0, 584.0, 582.0, 519.0, 582.0, 584.0, 576.0, 582.0, 581.0, 579.0, 582.0, 576.0, 579.0, 576.0, 579.0, 633.0, 573.0, 576.0, 570.0, 576.0, 576.0, 519.0, 624.0, 576.0, 581.0, 576.0, 579.0, 570.0, 576.0, 582.0, 576.0, 576.0, 573.0, 576.0, 582.0, 573.0, 573.0, 576.0, 576.0, 579.0, 584.0, 582.0, 576.0, 573.0, 570.0, 359.0, 573.0, 579.0, 582.0, 525.0, 576.0, 570.0, 582.0, 630.0, 579.0, 579.0, 576.0, 576.0, 630.0, 579.0, 576.0, 573.0, 584.0, 579.0, 579.0, 582.0, 579.0, 408.0, 576.0, 576.0, 630.0, 573.0, 468.0, 587.0, 573.0, 573.0, 570.0, 570.0, 584.0, 582.0, 573.0, 579.0, 630.0, 573.0, 633.0, 570.0, 570.0, 582.0, 576.0, 579.0, 576.0, 579.0, 576.0, 576.0, 579.0, 582.0, 522.0, 510.0, 570.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [258.0, 264.0, 287.0, 297.0, 283.0, 299.0, 259.0, 260.0, 296.0, 286.0, 298.0, 286.0, 293.0, 283.0, 296.0, 286.0, 298.0, 283.0, 283.0, 296.0, 288.0, 294.0, 291.0, 285.0, 291.0, 288.0, 292.0, 284.0, 297.0, 282.0, 319.0, 314.0, 290.0, 283.0, 279.0, 297.0, 297.0, 273.0, 291.0, 285.0, 285.0, 291.0, 250.0, 269.0, 303.0, 321.0, 282.0, 294.0, 290.0, 291.0, 295.0, 281.0, 288.0, 291.0, 284.0, 286.0, 281.0, 295.0, 287.0, 295.0, 286.0, 290.0, 287.0, 289.0, 284.0, 289.0, 291.0, 285.0, 286.0, 296.0, 284.0, 289.0, 288.0, 285.0, 289.0, 287.0, 296.0, 280.0, 291.0, 288.0, 299.0, 285.0, 298.0, 284.0, 289.0, 287.0, 286.0, 287.0, 291.0, 279.0, 175.0, 184.0, 290.0, 283.0, 285.0, 294.0, 275.0, 307.0, 268.0, 257.0, 290.0, 286.0, 284.0, 286.0, 288.0, 294.0, 306.0, 324.0, 290.0, 289.0, 290.0, 289.0, 287.0, 289.0, 282.0, 294.0, 317.0, 313.0, 291.0, 288.0, 290.0, 286.0, 281.0, 292.0, 296.0, 288.0, 290.0, 289.0, 294.0, 285.0, 291.0, 291.0, 287.0, 292.0, 199.0, 209.0, 284.0, 292.0, 288.0, 288.0, 311.0, 319.0, 284.0, 289.0, 219.0, 249.0, 291.0, 296.0, 283.0, 290.0, 289.0, 284.0, 279.0, 291.0, 289.0, 281.0, 291.0, 293.0, 294.0, 288.0, 287.0, 286.0, 288.0, 291.0, 316.0, 314.0, 292.0, 281.0, 310.0, 323.0, 278.0, 292.0, 278.0, 292.0, 295.0, 287.0, 287.0, 289.0, 285.0, 294.0, 289.0, 287.0, 283.0, 296.0, 281.0, 295.0, 293.0, 283.0, 284.0, 295.0, 294.0, 288.0, 260.0, 262.0, 249.0, 261.0, 287.0, 283.0, 252.0, 267.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7051935011860442, "mean_inference_ms": 1.264642375620096, "mean_action_processing_ms": 0.13480085895955796, "mean_env_wait_ms": 0.8493182842486506, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6886400, "num_agent_steps_trained": 6886400, "num_env_steps_sampled": 3443200, "num_env_steps_trained": 3443200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3443200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6886400, "timers": {"training_iteration_time_ms": 3656.099, "learn_time_ms": 1112.816, "learn_throughput": 11502.35, "synch_weights_time_ms": 13.273}, "counters": {"num_env_steps_sampled": 3443200, "num_env_steps_trained": 3443200, "num_agent_steps_sampled": 6886400, "num_agent_steps_trained": 6886400}, "done": false, "episodes_total": 8608, "training_iteration": 269, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-57", "timestamp": 1666581477, "time_this_iter_s": 3.5823559761047363, "time_total_s": 1038.6962714195251, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1038.6962714195251, "timesteps_since_restore": 0, "iterations_since_restore": 269, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.560000000000002, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 175.21, "shaped_reward_min": 119, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.34, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.11, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.12, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.86, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.99, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.69, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.2, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.21, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.85, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.15, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.99, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.69, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.99, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.69, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001868363469839096, "policy_loss": 0.001498084980994463, "vf_loss": 7.933145523071289, "vf_explained_var": 0.59946209192276, "kl": 0.0020921118557453156, "entropy": 0.846068263053894, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3456000, "num_env_steps_trained": 3456000, "num_agent_steps_sampled": 6912000, "num_agent_steps_trained": 6912000}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 359.0, "episode_reward_mean": 571.21, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.605}, "custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 175.21, "shaped_reward_min": 119, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.34, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.11, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.12, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.86, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.99, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.69, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.2, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.21, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.85, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.15, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.99, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.69, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.99, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.69, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 576.0, 582.0, 573.0, 573.0, 576.0, 576.0, 579.0, 584.0, 582.0, 576.0, 573.0, 570.0, 359.0, 573.0, 579.0, 582.0, 525.0, 576.0, 570.0, 582.0, 630.0, 579.0, 579.0, 576.0, 576.0, 630.0, 579.0, 576.0, 573.0, 584.0, 579.0, 579.0, 582.0, 579.0, 408.0, 576.0, 576.0, 630.0, 573.0, 468.0, 587.0, 573.0, 573.0, 570.0, 570.0, 584.0, 582.0, 573.0, 579.0, 630.0, 573.0, 633.0, 570.0, 570.0, 582.0, 576.0, 579.0, 576.0, 579.0, 576.0, 576.0, 579.0, 582.0, 522.0, 510.0, 570.0, 519.0, 579.0, 573.0, 579.0, 587.0, 525.0, 627.0, 582.0, 579.0, 573.0, 564.0, 584.0, 573.0, 576.0, 584.0, 627.0, 573.0, 582.0, 576.0, 495.0, 576.0, 525.0, 630.0, 579.0, 582.0, 527.0, 579.0, 581.0, 579.0, 576.0, 524.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 289.0, 291.0, 285.0, 286.0, 296.0, 284.0, 289.0, 288.0, 285.0, 289.0, 287.0, 296.0, 280.0, 291.0, 288.0, 299.0, 285.0, 298.0, 284.0, 289.0, 287.0, 286.0, 287.0, 291.0, 279.0, 175.0, 184.0, 290.0, 283.0, 285.0, 294.0, 275.0, 307.0, 268.0, 257.0, 290.0, 286.0, 284.0, 286.0, 288.0, 294.0, 306.0, 324.0, 290.0, 289.0, 290.0, 289.0, 287.0, 289.0, 282.0, 294.0, 317.0, 313.0, 291.0, 288.0, 290.0, 286.0, 281.0, 292.0, 296.0, 288.0, 290.0, 289.0, 294.0, 285.0, 291.0, 291.0, 287.0, 292.0, 199.0, 209.0, 284.0, 292.0, 288.0, 288.0, 311.0, 319.0, 284.0, 289.0, 219.0, 249.0, 291.0, 296.0, 283.0, 290.0, 289.0, 284.0, 279.0, 291.0, 289.0, 281.0, 291.0, 293.0, 294.0, 288.0, 287.0, 286.0, 288.0, 291.0, 316.0, 314.0, 292.0, 281.0, 310.0, 323.0, 278.0, 292.0, 278.0, 292.0, 295.0, 287.0, 287.0, 289.0, 285.0, 294.0, 289.0, 287.0, 283.0, 296.0, 281.0, 295.0, 293.0, 283.0, 284.0, 295.0, 294.0, 288.0, 260.0, 262.0, 249.0, 261.0, 287.0, 283.0, 252.0, 267.0, 286.0, 293.0, 281.0, 292.0, 281.0, 298.0, 302.0, 285.0, 262.0, 263.0, 311.0, 316.0, 288.0, 294.0, 290.0, 289.0, 291.0, 282.0, 273.0, 291.0, 287.0, 297.0, 292.0, 281.0, 285.0, 291.0, 289.0, 295.0, 317.0, 310.0, 282.0, 291.0, 285.0, 297.0, 288.0, 288.0, 249.0, 246.0, 287.0, 289.0, 267.0, 258.0, 314.0, 316.0, 284.0, 295.0, 285.0, 297.0, 271.0, 256.0, 287.0, 292.0, 287.0, 294.0, 287.0, 292.0, 287.0, 289.0, 251.0, 273.0, 293.0, 289.0, 293.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7050858585894852, "mean_inference_ms": 1.2643870852329173, "mean_action_processing_ms": 0.1347919372266095, "mean_env_wait_ms": 0.849190680492891, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 359.0, "episode_reward_mean": 571.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.605}, "hist_stats": {"episode_reward": [573.0, 576.0, 582.0, 573.0, 573.0, 576.0, 576.0, 579.0, 584.0, 582.0, 576.0, 573.0, 570.0, 359.0, 573.0, 579.0, 582.0, 525.0, 576.0, 570.0, 582.0, 630.0, 579.0, 579.0, 576.0, 576.0, 630.0, 579.0, 576.0, 573.0, 584.0, 579.0, 579.0, 582.0, 579.0, 408.0, 576.0, 576.0, 630.0, 573.0, 468.0, 587.0, 573.0, 573.0, 570.0, 570.0, 584.0, 582.0, 573.0, 579.0, 630.0, 573.0, 633.0, 570.0, 570.0, 582.0, 576.0, 579.0, 576.0, 579.0, 576.0, 576.0, 579.0, 582.0, 522.0, 510.0, 570.0, 519.0, 579.0, 573.0, 579.0, 587.0, 525.0, 627.0, 582.0, 579.0, 573.0, 564.0, 584.0, 573.0, 576.0, 584.0, 627.0, 573.0, 582.0, 576.0, 495.0, 576.0, 525.0, 630.0, 579.0, 582.0, 527.0, 579.0, 581.0, 579.0, 576.0, 524.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 289.0, 291.0, 285.0, 286.0, 296.0, 284.0, 289.0, 288.0, 285.0, 289.0, 287.0, 296.0, 280.0, 291.0, 288.0, 299.0, 285.0, 298.0, 284.0, 289.0, 287.0, 286.0, 287.0, 291.0, 279.0, 175.0, 184.0, 290.0, 283.0, 285.0, 294.0, 275.0, 307.0, 268.0, 257.0, 290.0, 286.0, 284.0, 286.0, 288.0, 294.0, 306.0, 324.0, 290.0, 289.0, 290.0, 289.0, 287.0, 289.0, 282.0, 294.0, 317.0, 313.0, 291.0, 288.0, 290.0, 286.0, 281.0, 292.0, 296.0, 288.0, 290.0, 289.0, 294.0, 285.0, 291.0, 291.0, 287.0, 292.0, 199.0, 209.0, 284.0, 292.0, 288.0, 288.0, 311.0, 319.0, 284.0, 289.0, 219.0, 249.0, 291.0, 296.0, 283.0, 290.0, 289.0, 284.0, 279.0, 291.0, 289.0, 281.0, 291.0, 293.0, 294.0, 288.0, 287.0, 286.0, 288.0, 291.0, 316.0, 314.0, 292.0, 281.0, 310.0, 323.0, 278.0, 292.0, 278.0, 292.0, 295.0, 287.0, 287.0, 289.0, 285.0, 294.0, 289.0, 287.0, 283.0, 296.0, 281.0, 295.0, 293.0, 283.0, 284.0, 295.0, 294.0, 288.0, 260.0, 262.0, 249.0, 261.0, 287.0, 283.0, 252.0, 267.0, 286.0, 293.0, 281.0, 292.0, 281.0, 298.0, 302.0, 285.0, 262.0, 263.0, 311.0, 316.0, 288.0, 294.0, 290.0, 289.0, 291.0, 282.0, 273.0, 291.0, 287.0, 297.0, 292.0, 281.0, 285.0, 291.0, 289.0, 295.0, 317.0, 310.0, 282.0, 291.0, 285.0, 297.0, 288.0, 288.0, 249.0, 246.0, 287.0, 289.0, 267.0, 258.0, 314.0, 316.0, 284.0, 295.0, 285.0, 297.0, 271.0, 256.0, 287.0, 292.0, 287.0, 294.0, 287.0, 292.0, 287.0, 289.0, 251.0, 273.0, 293.0, 289.0, 293.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7050858585894852, "mean_inference_ms": 1.2643870852329173, "mean_action_processing_ms": 0.1347919372266095, "mean_env_wait_ms": 0.849190680492891, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6912000, "num_agent_steps_trained": 6912000, "num_env_steps_sampled": 3456000, "num_env_steps_trained": 3456000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3456000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6912000, "timers": {"training_iteration_time_ms": 3646.365, "learn_time_ms": 1107.179, "learn_throughput": 11560.911, "synch_weights_time_ms": 12.349}, "counters": {"num_env_steps_sampled": 3456000, "num_env_steps_trained": 3456000, "num_agent_steps_sampled": 6912000, "num_agent_steps_trained": 6912000}, "done": false, "episodes_total": 8640, "training_iteration": 270, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-01", "timestamp": 1666581481, "time_this_iter_s": 3.6196086406707764, "time_total_s": 1042.315880060196, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1042.315880060196, "timesteps_since_restore": 0, "iterations_since_restore": 270, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.76666666666667, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.44, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.72, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.07, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.35, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.73, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.23, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.64, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.2, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.91, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.19, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.23, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.64, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.23, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.64, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006792682688683271, "policy_loss": 0.00031318137189373374, "vf_loss": 7.904168128967285, "vf_explained_var": 0.5880827307701111, "kl": 0.002125304192304611, "entropy": 0.8486584424972534, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3468800, "num_env_steps_trained": 3468800, "num_agent_steps_sampled": 6937600, "num_agent_steps_trained": 6937600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 574.44, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 199.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 287.22}, "custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.44, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.72, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.07, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.35, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.73, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.23, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.64, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.2, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.91, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.19, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.23, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.64, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.23, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.64, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 579.0, 408.0, 576.0, 576.0, 630.0, 573.0, 468.0, 587.0, 573.0, 573.0, 570.0, 570.0, 584.0, 582.0, 573.0, 579.0, 630.0, 573.0, 633.0, 570.0, 570.0, 582.0, 576.0, 579.0, 576.0, 579.0, 576.0, 576.0, 579.0, 582.0, 522.0, 510.0, 570.0, 519.0, 579.0, 573.0, 579.0, 587.0, 525.0, 627.0, 582.0, 579.0, 573.0, 564.0, 584.0, 573.0, 576.0, 584.0, 627.0, 573.0, 582.0, 576.0, 495.0, 576.0, 525.0, 630.0, 579.0, 582.0, 527.0, 579.0, 581.0, 579.0, 576.0, 524.0, 582.0, 579.0, 633.0, 579.0, 573.0, 579.0, 584.0, 581.0, 624.0, 579.0, 567.0, 579.0, 579.0, 582.0, 630.0, 584.0, 639.0, 636.0, 584.0, 582.0, 573.0, 573.0, 573.0, 587.0, 582.0, 579.0, 464.0, 573.0, 576.0, 582.0, 579.0, 536.0, 525.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 285.0, 291.0, 291.0, 287.0, 292.0, 199.0, 209.0, 284.0, 292.0, 288.0, 288.0, 311.0, 319.0, 284.0, 289.0, 219.0, 249.0, 291.0, 296.0, 283.0, 290.0, 289.0, 284.0, 279.0, 291.0, 289.0, 281.0, 291.0, 293.0, 294.0, 288.0, 287.0, 286.0, 288.0, 291.0, 316.0, 314.0, 292.0, 281.0, 310.0, 323.0, 278.0, 292.0, 278.0, 292.0, 295.0, 287.0, 287.0, 289.0, 285.0, 294.0, 289.0, 287.0, 283.0, 296.0, 281.0, 295.0, 293.0, 283.0, 284.0, 295.0, 294.0, 288.0, 260.0, 262.0, 249.0, 261.0, 287.0, 283.0, 252.0, 267.0, 286.0, 293.0, 281.0, 292.0, 281.0, 298.0, 302.0, 285.0, 262.0, 263.0, 311.0, 316.0, 288.0, 294.0, 290.0, 289.0, 291.0, 282.0, 273.0, 291.0, 287.0, 297.0, 292.0, 281.0, 285.0, 291.0, 289.0, 295.0, 317.0, 310.0, 282.0, 291.0, 285.0, 297.0, 288.0, 288.0, 249.0, 246.0, 287.0, 289.0, 267.0, 258.0, 314.0, 316.0, 284.0, 295.0, 285.0, 297.0, 271.0, 256.0, 287.0, 292.0, 287.0, 294.0, 287.0, 292.0, 287.0, 289.0, 251.0, 273.0, 293.0, 289.0, 293.0, 286.0, 315.0, 318.0, 291.0, 288.0, 290.0, 283.0, 296.0, 283.0, 292.0, 292.0, 281.0, 300.0, 316.0, 308.0, 287.0, 292.0, 282.0, 285.0, 283.0, 296.0, 300.0, 279.0, 295.0, 287.0, 309.0, 321.0, 299.0, 285.0, 322.0, 317.0, 316.0, 320.0, 286.0, 298.0, 295.0, 287.0, 287.0, 286.0, 280.0, 293.0, 283.0, 290.0, 288.0, 299.0, 288.0, 294.0, 304.0, 275.0, 235.0, 229.0, 285.0, 288.0, 288.0, 288.0, 291.0, 291.0, 297.0, 282.0, 269.0, 267.0, 265.0, 260.0, 313.0, 314.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7049859829023956, "mean_inference_ms": 1.2641320160489828, "mean_action_processing_ms": 0.13478291473748163, "mean_env_wait_ms": 0.8490647234068114, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 574.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 199.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 287.22}, "hist_stats": {"episode_reward": [579.0, 582.0, 579.0, 408.0, 576.0, 576.0, 630.0, 573.0, 468.0, 587.0, 573.0, 573.0, 570.0, 570.0, 584.0, 582.0, 573.0, 579.0, 630.0, 573.0, 633.0, 570.0, 570.0, 582.0, 576.0, 579.0, 576.0, 579.0, 576.0, 576.0, 579.0, 582.0, 522.0, 510.0, 570.0, 519.0, 579.0, 573.0, 579.0, 587.0, 525.0, 627.0, 582.0, 579.0, 573.0, 564.0, 584.0, 573.0, 576.0, 584.0, 627.0, 573.0, 582.0, 576.0, 495.0, 576.0, 525.0, 630.0, 579.0, 582.0, 527.0, 579.0, 581.0, 579.0, 576.0, 524.0, 582.0, 579.0, 633.0, 579.0, 573.0, 579.0, 584.0, 581.0, 624.0, 579.0, 567.0, 579.0, 579.0, 582.0, 630.0, 584.0, 639.0, 636.0, 584.0, 582.0, 573.0, 573.0, 573.0, 587.0, 582.0, 579.0, 464.0, 573.0, 576.0, 582.0, 579.0, 536.0, 525.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 285.0, 291.0, 291.0, 287.0, 292.0, 199.0, 209.0, 284.0, 292.0, 288.0, 288.0, 311.0, 319.0, 284.0, 289.0, 219.0, 249.0, 291.0, 296.0, 283.0, 290.0, 289.0, 284.0, 279.0, 291.0, 289.0, 281.0, 291.0, 293.0, 294.0, 288.0, 287.0, 286.0, 288.0, 291.0, 316.0, 314.0, 292.0, 281.0, 310.0, 323.0, 278.0, 292.0, 278.0, 292.0, 295.0, 287.0, 287.0, 289.0, 285.0, 294.0, 289.0, 287.0, 283.0, 296.0, 281.0, 295.0, 293.0, 283.0, 284.0, 295.0, 294.0, 288.0, 260.0, 262.0, 249.0, 261.0, 287.0, 283.0, 252.0, 267.0, 286.0, 293.0, 281.0, 292.0, 281.0, 298.0, 302.0, 285.0, 262.0, 263.0, 311.0, 316.0, 288.0, 294.0, 290.0, 289.0, 291.0, 282.0, 273.0, 291.0, 287.0, 297.0, 292.0, 281.0, 285.0, 291.0, 289.0, 295.0, 317.0, 310.0, 282.0, 291.0, 285.0, 297.0, 288.0, 288.0, 249.0, 246.0, 287.0, 289.0, 267.0, 258.0, 314.0, 316.0, 284.0, 295.0, 285.0, 297.0, 271.0, 256.0, 287.0, 292.0, 287.0, 294.0, 287.0, 292.0, 287.0, 289.0, 251.0, 273.0, 293.0, 289.0, 293.0, 286.0, 315.0, 318.0, 291.0, 288.0, 290.0, 283.0, 296.0, 283.0, 292.0, 292.0, 281.0, 300.0, 316.0, 308.0, 287.0, 292.0, 282.0, 285.0, 283.0, 296.0, 300.0, 279.0, 295.0, 287.0, 309.0, 321.0, 299.0, 285.0, 322.0, 317.0, 316.0, 320.0, 286.0, 298.0, 295.0, 287.0, 287.0, 286.0, 280.0, 293.0, 283.0, 290.0, 288.0, 299.0, 288.0, 294.0, 304.0, 275.0, 235.0, 229.0, 285.0, 288.0, 288.0, 288.0, 291.0, 291.0, 297.0, 282.0, 269.0, 267.0, 265.0, 260.0, 313.0, 314.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7049859829023956, "mean_inference_ms": 1.2641320160489828, "mean_action_processing_ms": 0.13478291473748163, "mean_env_wait_ms": 0.8490647234068114, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6937600, "num_agent_steps_trained": 6937600, "num_env_steps_sampled": 3468800, "num_env_steps_trained": 3468800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3468800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6937600, "timers": {"training_iteration_time_ms": 3638.877, "learn_time_ms": 1110.437, "learn_throughput": 11526.989, "synch_weights_time_ms": 12.903}, "counters": {"num_env_steps_sampled": 3468800, "num_env_steps_trained": 3468800, "num_agent_steps_sampled": 6937600, "num_agent_steps_trained": 6937600}, "done": false, "episodes_total": 8672, "training_iteration": 271, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-04", "timestamp": 1666581484, "time_this_iter_s": 3.6376683712005615, "time_total_s": 1045.9535484313965, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1045.9535484313965, "timesteps_since_restore": 0, "iterations_since_restore": 271, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.8, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 178.05, "shaped_reward_min": 135, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.69, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.34, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.33, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.98, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.19, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.93, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.81, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.19, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.93, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.19, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.93, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001299827708862722, "policy_loss": 0.0009380197152495384, "vf_loss": 7.8011393547058105, "vf_explained_var": 0.5883727669715881, "kl": 0.002125523053109646, "entropy": 0.8366072177886963, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3481600, "num_env_steps_trained": 3481600, "num_agent_steps_sampled": 6963200, "num_agent_steps_trained": 6963200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 464.0, "episode_reward_mean": 577.65, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 288.825}, "custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 178.05, "shaped_reward_min": 135, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.69, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.34, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.33, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.98, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.19, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.93, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.81, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.19, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.93, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.19, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.93, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 510.0, 570.0, 519.0, 579.0, 573.0, 579.0, 587.0, 525.0, 627.0, 582.0, 579.0, 573.0, 564.0, 584.0, 573.0, 576.0, 584.0, 627.0, 573.0, 582.0, 576.0, 495.0, 576.0, 525.0, 630.0, 579.0, 582.0, 527.0, 579.0, 581.0, 579.0, 576.0, 524.0, 582.0, 579.0, 633.0, 579.0, 573.0, 579.0, 584.0, 581.0, 624.0, 579.0, 567.0, 579.0, 579.0, 582.0, 630.0, 584.0, 639.0, 636.0, 584.0, 582.0, 573.0, 573.0, 573.0, 587.0, 582.0, 579.0, 464.0, 573.0, 576.0, 582.0, 579.0, 536.0, 525.0, 627.0, 584.0, 576.0, 590.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 627.0, 530.0, 630.0, 579.0, 587.0, 582.0, 587.0, 587.0, 576.0, 581.0, 533.0, 582.0, 582.0, 639.0, 516.0, 582.0, 582.0, 630.0, 576.0, 630.0, 570.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 262.0, 249.0, 261.0, 287.0, 283.0, 252.0, 267.0, 286.0, 293.0, 281.0, 292.0, 281.0, 298.0, 302.0, 285.0, 262.0, 263.0, 311.0, 316.0, 288.0, 294.0, 290.0, 289.0, 291.0, 282.0, 273.0, 291.0, 287.0, 297.0, 292.0, 281.0, 285.0, 291.0, 289.0, 295.0, 317.0, 310.0, 282.0, 291.0, 285.0, 297.0, 288.0, 288.0, 249.0, 246.0, 287.0, 289.0, 267.0, 258.0, 314.0, 316.0, 284.0, 295.0, 285.0, 297.0, 271.0, 256.0, 287.0, 292.0, 287.0, 294.0, 287.0, 292.0, 287.0, 289.0, 251.0, 273.0, 293.0, 289.0, 293.0, 286.0, 315.0, 318.0, 291.0, 288.0, 290.0, 283.0, 296.0, 283.0, 292.0, 292.0, 281.0, 300.0, 316.0, 308.0, 287.0, 292.0, 282.0, 285.0, 283.0, 296.0, 300.0, 279.0, 295.0, 287.0, 309.0, 321.0, 299.0, 285.0, 322.0, 317.0, 316.0, 320.0, 286.0, 298.0, 295.0, 287.0, 287.0, 286.0, 280.0, 293.0, 283.0, 290.0, 288.0, 299.0, 288.0, 294.0, 304.0, 275.0, 235.0, 229.0, 285.0, 288.0, 288.0, 288.0, 291.0, 291.0, 297.0, 282.0, 269.0, 267.0, 265.0, 260.0, 313.0, 314.0, 284.0, 300.0, 282.0, 294.0, 295.0, 295.0, 278.0, 301.0, 289.0, 290.0, 288.0, 291.0, 286.0, 293.0, 284.0, 295.0, 281.0, 295.0, 287.0, 289.0, 319.0, 308.0, 268.0, 262.0, 314.0, 316.0, 286.0, 293.0, 289.0, 298.0, 293.0, 289.0, 291.0, 296.0, 293.0, 294.0, 283.0, 293.0, 292.0, 289.0, 264.0, 269.0, 291.0, 291.0, 290.0, 292.0, 320.0, 319.0, 259.0, 257.0, 298.0, 284.0, 292.0, 290.0, 321.0, 309.0, 293.0, 283.0, 311.0, 319.0, 289.0, 281.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7048976136960431, "mean_inference_ms": 1.2638769872483644, "mean_action_processing_ms": 0.13477343774655234, "mean_env_wait_ms": 0.8489374954857171, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 464.0, "episode_reward_mean": 577.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 288.825}, "hist_stats": {"episode_reward": [522.0, 510.0, 570.0, 519.0, 579.0, 573.0, 579.0, 587.0, 525.0, 627.0, 582.0, 579.0, 573.0, 564.0, 584.0, 573.0, 576.0, 584.0, 627.0, 573.0, 582.0, 576.0, 495.0, 576.0, 525.0, 630.0, 579.0, 582.0, 527.0, 579.0, 581.0, 579.0, 576.0, 524.0, 582.0, 579.0, 633.0, 579.0, 573.0, 579.0, 584.0, 581.0, 624.0, 579.0, 567.0, 579.0, 579.0, 582.0, 630.0, 584.0, 639.0, 636.0, 584.0, 582.0, 573.0, 573.0, 573.0, 587.0, 582.0, 579.0, 464.0, 573.0, 576.0, 582.0, 579.0, 536.0, 525.0, 627.0, 584.0, 576.0, 590.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 627.0, 530.0, 630.0, 579.0, 587.0, 582.0, 587.0, 587.0, 576.0, 581.0, 533.0, 582.0, 582.0, 639.0, 516.0, 582.0, 582.0, 630.0, 576.0, 630.0, 570.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 262.0, 249.0, 261.0, 287.0, 283.0, 252.0, 267.0, 286.0, 293.0, 281.0, 292.0, 281.0, 298.0, 302.0, 285.0, 262.0, 263.0, 311.0, 316.0, 288.0, 294.0, 290.0, 289.0, 291.0, 282.0, 273.0, 291.0, 287.0, 297.0, 292.0, 281.0, 285.0, 291.0, 289.0, 295.0, 317.0, 310.0, 282.0, 291.0, 285.0, 297.0, 288.0, 288.0, 249.0, 246.0, 287.0, 289.0, 267.0, 258.0, 314.0, 316.0, 284.0, 295.0, 285.0, 297.0, 271.0, 256.0, 287.0, 292.0, 287.0, 294.0, 287.0, 292.0, 287.0, 289.0, 251.0, 273.0, 293.0, 289.0, 293.0, 286.0, 315.0, 318.0, 291.0, 288.0, 290.0, 283.0, 296.0, 283.0, 292.0, 292.0, 281.0, 300.0, 316.0, 308.0, 287.0, 292.0, 282.0, 285.0, 283.0, 296.0, 300.0, 279.0, 295.0, 287.0, 309.0, 321.0, 299.0, 285.0, 322.0, 317.0, 316.0, 320.0, 286.0, 298.0, 295.0, 287.0, 287.0, 286.0, 280.0, 293.0, 283.0, 290.0, 288.0, 299.0, 288.0, 294.0, 304.0, 275.0, 235.0, 229.0, 285.0, 288.0, 288.0, 288.0, 291.0, 291.0, 297.0, 282.0, 269.0, 267.0, 265.0, 260.0, 313.0, 314.0, 284.0, 300.0, 282.0, 294.0, 295.0, 295.0, 278.0, 301.0, 289.0, 290.0, 288.0, 291.0, 286.0, 293.0, 284.0, 295.0, 281.0, 295.0, 287.0, 289.0, 319.0, 308.0, 268.0, 262.0, 314.0, 316.0, 286.0, 293.0, 289.0, 298.0, 293.0, 289.0, 291.0, 296.0, 293.0, 294.0, 283.0, 293.0, 292.0, 289.0, 264.0, 269.0, 291.0, 291.0, 290.0, 292.0, 320.0, 319.0, 259.0, 257.0, 298.0, 284.0, 292.0, 290.0, 321.0, 309.0, 293.0, 283.0, 311.0, 319.0, 289.0, 281.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7048976136960431, "mean_inference_ms": 1.2638769872483644, "mean_action_processing_ms": 0.13477343774655234, "mean_env_wait_ms": 0.8489374954857171, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6963200, "num_agent_steps_trained": 6963200, "num_env_steps_sampled": 3481600, "num_env_steps_trained": 3481600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3481600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6963200, "timers": {"training_iteration_time_ms": 3625.048, "learn_time_ms": 1102.444, "learn_throughput": 11610.568, "synch_weights_time_ms": 12.999}, "counters": {"num_env_steps_sampled": 3481600, "num_env_steps_trained": 3481600, "num_agent_steps_sampled": 6963200, "num_agent_steps_trained": 6963200}, "done": false, "episodes_total": 8704, "training_iteration": 272, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-08", "timestamp": 1666581488, "time_this_iter_s": 3.6092796325683594, "time_total_s": 1049.5628280639648, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1049.5628280639648, "timesteps_since_restore": 0, "iterations_since_restore": 272, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.979999999999997, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 179.3, "shaped_reward_min": 105, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.65, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.57, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.33, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.15, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.18, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.08, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.92, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.18, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.08, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.18, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.08, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.003036621492356062, "policy_loss": -0.0034028268419206142, "vf_loss": 7.827790260314941, "vf_explained_var": 0.5869630575180054, "kl": 0.001971776131540537, "entropy": 0.8331469297409058, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3494400, "num_env_steps_trained": 3494400, "num_agent_steps_sampled": 6988800, "num_agent_steps_trained": 6988800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 345.0, "episode_reward_mean": 578.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 170.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 289.45}, "custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 179.3, "shaped_reward_min": 105, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.65, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.57, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.33, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.15, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.18, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.08, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.92, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.18, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.08, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.18, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.08, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 524.0, 582.0, 579.0, 633.0, 579.0, 573.0, 579.0, 584.0, 581.0, 624.0, 579.0, 567.0, 579.0, 579.0, 582.0, 630.0, 584.0, 639.0, 636.0, 584.0, 582.0, 573.0, 573.0, 573.0, 587.0, 582.0, 579.0, 464.0, 573.0, 576.0, 582.0, 579.0, 536.0, 525.0, 627.0, 584.0, 576.0, 590.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 627.0, 530.0, 630.0, 579.0, 587.0, 582.0, 587.0, 587.0, 576.0, 581.0, 533.0, 582.0, 582.0, 639.0, 516.0, 582.0, 582.0, 630.0, 576.0, 630.0, 570.0, 579.0, 582.0, 582.0, 579.0, 582.0, 576.0, 627.0, 576.0, 582.0, 579.0, 345.0, 582.0, 627.0, 522.0, 579.0, 587.0, 576.0, 587.0, 582.0, 582.0, 576.0, 581.0, 576.0, 579.0, 530.0, 587.0, 584.0, 576.0, 587.0, 590.0, 579.0, 579.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 289.0, 251.0, 273.0, 293.0, 289.0, 293.0, 286.0, 315.0, 318.0, 291.0, 288.0, 290.0, 283.0, 296.0, 283.0, 292.0, 292.0, 281.0, 300.0, 316.0, 308.0, 287.0, 292.0, 282.0, 285.0, 283.0, 296.0, 300.0, 279.0, 295.0, 287.0, 309.0, 321.0, 299.0, 285.0, 322.0, 317.0, 316.0, 320.0, 286.0, 298.0, 295.0, 287.0, 287.0, 286.0, 280.0, 293.0, 283.0, 290.0, 288.0, 299.0, 288.0, 294.0, 304.0, 275.0, 235.0, 229.0, 285.0, 288.0, 288.0, 288.0, 291.0, 291.0, 297.0, 282.0, 269.0, 267.0, 265.0, 260.0, 313.0, 314.0, 284.0, 300.0, 282.0, 294.0, 295.0, 295.0, 278.0, 301.0, 289.0, 290.0, 288.0, 291.0, 286.0, 293.0, 284.0, 295.0, 281.0, 295.0, 287.0, 289.0, 319.0, 308.0, 268.0, 262.0, 314.0, 316.0, 286.0, 293.0, 289.0, 298.0, 293.0, 289.0, 291.0, 296.0, 293.0, 294.0, 283.0, 293.0, 292.0, 289.0, 264.0, 269.0, 291.0, 291.0, 290.0, 292.0, 320.0, 319.0, 259.0, 257.0, 298.0, 284.0, 292.0, 290.0, 321.0, 309.0, 293.0, 283.0, 311.0, 319.0, 289.0, 281.0, 291.0, 288.0, 293.0, 289.0, 285.0, 297.0, 286.0, 293.0, 295.0, 287.0, 283.0, 293.0, 321.0, 306.0, 284.0, 292.0, 295.0, 287.0, 286.0, 293.0, 175.0, 170.0, 288.0, 294.0, 313.0, 314.0, 260.0, 262.0, 285.0, 294.0, 297.0, 290.0, 274.0, 302.0, 291.0, 296.0, 290.0, 292.0, 291.0, 291.0, 288.0, 288.0, 288.0, 293.0, 293.0, 283.0, 288.0, 291.0, 270.0, 260.0, 301.0, 286.0, 294.0, 290.0, 295.0, 281.0, 291.0, 296.0, 300.0, 290.0, 286.0, 293.0, 291.0, 288.0, 294.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7048025547681772, "mean_inference_ms": 1.2636074586322337, "mean_action_processing_ms": 0.1347604788542491, "mean_env_wait_ms": 0.8487813382272864, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 345.0, "episode_reward_mean": 578.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 170.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 289.45}, "hist_stats": {"episode_reward": [576.0, 524.0, 582.0, 579.0, 633.0, 579.0, 573.0, 579.0, 584.0, 581.0, 624.0, 579.0, 567.0, 579.0, 579.0, 582.0, 630.0, 584.0, 639.0, 636.0, 584.0, 582.0, 573.0, 573.0, 573.0, 587.0, 582.0, 579.0, 464.0, 573.0, 576.0, 582.0, 579.0, 536.0, 525.0, 627.0, 584.0, 576.0, 590.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 627.0, 530.0, 630.0, 579.0, 587.0, 582.0, 587.0, 587.0, 576.0, 581.0, 533.0, 582.0, 582.0, 639.0, 516.0, 582.0, 582.0, 630.0, 576.0, 630.0, 570.0, 579.0, 582.0, 582.0, 579.0, 582.0, 576.0, 627.0, 576.0, 582.0, 579.0, 345.0, 582.0, 627.0, 522.0, 579.0, 587.0, 576.0, 587.0, 582.0, 582.0, 576.0, 581.0, 576.0, 579.0, 530.0, 587.0, 584.0, 576.0, 587.0, 590.0, 579.0, 579.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 289.0, 251.0, 273.0, 293.0, 289.0, 293.0, 286.0, 315.0, 318.0, 291.0, 288.0, 290.0, 283.0, 296.0, 283.0, 292.0, 292.0, 281.0, 300.0, 316.0, 308.0, 287.0, 292.0, 282.0, 285.0, 283.0, 296.0, 300.0, 279.0, 295.0, 287.0, 309.0, 321.0, 299.0, 285.0, 322.0, 317.0, 316.0, 320.0, 286.0, 298.0, 295.0, 287.0, 287.0, 286.0, 280.0, 293.0, 283.0, 290.0, 288.0, 299.0, 288.0, 294.0, 304.0, 275.0, 235.0, 229.0, 285.0, 288.0, 288.0, 288.0, 291.0, 291.0, 297.0, 282.0, 269.0, 267.0, 265.0, 260.0, 313.0, 314.0, 284.0, 300.0, 282.0, 294.0, 295.0, 295.0, 278.0, 301.0, 289.0, 290.0, 288.0, 291.0, 286.0, 293.0, 284.0, 295.0, 281.0, 295.0, 287.0, 289.0, 319.0, 308.0, 268.0, 262.0, 314.0, 316.0, 286.0, 293.0, 289.0, 298.0, 293.0, 289.0, 291.0, 296.0, 293.0, 294.0, 283.0, 293.0, 292.0, 289.0, 264.0, 269.0, 291.0, 291.0, 290.0, 292.0, 320.0, 319.0, 259.0, 257.0, 298.0, 284.0, 292.0, 290.0, 321.0, 309.0, 293.0, 283.0, 311.0, 319.0, 289.0, 281.0, 291.0, 288.0, 293.0, 289.0, 285.0, 297.0, 286.0, 293.0, 295.0, 287.0, 283.0, 293.0, 321.0, 306.0, 284.0, 292.0, 295.0, 287.0, 286.0, 293.0, 175.0, 170.0, 288.0, 294.0, 313.0, 314.0, 260.0, 262.0, 285.0, 294.0, 297.0, 290.0, 274.0, 302.0, 291.0, 296.0, 290.0, 292.0, 291.0, 291.0, 288.0, 288.0, 288.0, 293.0, 293.0, 283.0, 288.0, 291.0, 270.0, 260.0, 301.0, 286.0, 294.0, 290.0, 295.0, 281.0, 291.0, 296.0, 300.0, 290.0, 286.0, 293.0, 291.0, 288.0, 294.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7048025547681772, "mean_inference_ms": 1.2636074586322337, "mean_action_processing_ms": 0.1347604788542491, "mean_env_wait_ms": 0.8487813382272864, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6988800, "num_agent_steps_trained": 6988800, "num_env_steps_sampled": 3494400, "num_env_steps_trained": 3494400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3494400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6988800, "timers": {"training_iteration_time_ms": 3607.661, "learn_time_ms": 1093.892, "learn_throughput": 11701.342, "synch_weights_time_ms": 12.561}, "counters": {"num_env_steps_sampled": 3494400, "num_env_steps_trained": 3494400, "num_agent_steps_sampled": 6988800, "num_agent_steps_trained": 6988800}, "done": false, "episodes_total": 8736, "training_iteration": 273, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-12", "timestamp": 1666581492, "time_this_iter_s": 3.537813425064087, "time_total_s": 1053.100641489029, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1053.100641489029, "timesteps_since_restore": 0, "iterations_since_restore": 273, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.400000000000002, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 200.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.9, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.0, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.76, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.81, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.57, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.69, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.35, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.26, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.2, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.93, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.57, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.69, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.57, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.69, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 1.652922946959734e-05, "policy_loss": -0.0003470139345154166, "vf_loss": 7.779203414916992, "vf_explained_var": 0.5858237147331238, "kl": 0.002200545510277152, "entropy": 0.828750729560852, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3507200, "num_env_steps_trained": 3507200, "num_agent_steps_sampled": 7014400, "num_agent_steps_trained": 7014400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 578.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 289.45}, "custom_metrics": {"sparse_reward_mean": 200.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.9, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.0, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.76, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.81, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.57, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.69, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.35, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.26, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.2, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.93, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.57, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.69, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.57, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.69, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 536.0, 525.0, 627.0, 584.0, 576.0, 590.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 627.0, 530.0, 630.0, 579.0, 587.0, 582.0, 587.0, 587.0, 576.0, 581.0, 533.0, 582.0, 582.0, 639.0, 516.0, 582.0, 582.0, 630.0, 576.0, 630.0, 570.0, 579.0, 582.0, 582.0, 579.0, 582.0, 576.0, 627.0, 576.0, 582.0, 579.0, 345.0, 582.0, 627.0, 522.0, 579.0, 587.0, 576.0, 587.0, 582.0, 582.0, 576.0, 581.0, 576.0, 579.0, 530.0, 587.0, 584.0, 576.0, 587.0, 590.0, 579.0, 579.0, 584.0, 579.0, 584.0, 579.0, 576.0, 582.0, 630.0, 576.0, 579.0, 579.0, 576.0, 630.0, 570.0, 579.0, 576.0, 627.0, 576.0, 633.0, 630.0, 630.0, 576.0, 579.0, 579.0, 579.0, 582.0, 630.0, 630.0, 576.0, 579.0, 510.0, 582.0, 294.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 282.0, 269.0, 267.0, 265.0, 260.0, 313.0, 314.0, 284.0, 300.0, 282.0, 294.0, 295.0, 295.0, 278.0, 301.0, 289.0, 290.0, 288.0, 291.0, 286.0, 293.0, 284.0, 295.0, 281.0, 295.0, 287.0, 289.0, 319.0, 308.0, 268.0, 262.0, 314.0, 316.0, 286.0, 293.0, 289.0, 298.0, 293.0, 289.0, 291.0, 296.0, 293.0, 294.0, 283.0, 293.0, 292.0, 289.0, 264.0, 269.0, 291.0, 291.0, 290.0, 292.0, 320.0, 319.0, 259.0, 257.0, 298.0, 284.0, 292.0, 290.0, 321.0, 309.0, 293.0, 283.0, 311.0, 319.0, 289.0, 281.0, 291.0, 288.0, 293.0, 289.0, 285.0, 297.0, 286.0, 293.0, 295.0, 287.0, 283.0, 293.0, 321.0, 306.0, 284.0, 292.0, 295.0, 287.0, 286.0, 293.0, 175.0, 170.0, 288.0, 294.0, 313.0, 314.0, 260.0, 262.0, 285.0, 294.0, 297.0, 290.0, 274.0, 302.0, 291.0, 296.0, 290.0, 292.0, 291.0, 291.0, 288.0, 288.0, 288.0, 293.0, 293.0, 283.0, 288.0, 291.0, 270.0, 260.0, 301.0, 286.0, 294.0, 290.0, 295.0, 281.0, 291.0, 296.0, 300.0, 290.0, 286.0, 293.0, 291.0, 288.0, 294.0, 290.0, 290.0, 289.0, 285.0, 299.0, 288.0, 291.0, 286.0, 290.0, 285.0, 297.0, 314.0, 316.0, 288.0, 288.0, 295.0, 284.0, 292.0, 287.0, 288.0, 288.0, 306.0, 324.0, 282.0, 288.0, 293.0, 286.0, 295.0, 281.0, 320.0, 307.0, 292.0, 284.0, 316.0, 317.0, 315.0, 315.0, 314.0, 316.0, 284.0, 292.0, 293.0, 286.0, 289.0, 290.0, 291.0, 288.0, 283.0, 299.0, 303.0, 327.0, 318.0, 312.0, 286.0, 290.0, 291.0, 288.0, 249.0, 261.0, 292.0, 290.0, 143.0, 151.0, 314.0, 316.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7046816164464604, "mean_inference_ms": 1.2633067797403978, "mean_action_processing_ms": 0.13474408191316176, "mean_env_wait_ms": 0.8485969737751016, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 578.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 289.45}, "hist_stats": {"episode_reward": [579.0, 536.0, 525.0, 627.0, 584.0, 576.0, 590.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 627.0, 530.0, 630.0, 579.0, 587.0, 582.0, 587.0, 587.0, 576.0, 581.0, 533.0, 582.0, 582.0, 639.0, 516.0, 582.0, 582.0, 630.0, 576.0, 630.0, 570.0, 579.0, 582.0, 582.0, 579.0, 582.0, 576.0, 627.0, 576.0, 582.0, 579.0, 345.0, 582.0, 627.0, 522.0, 579.0, 587.0, 576.0, 587.0, 582.0, 582.0, 576.0, 581.0, 576.0, 579.0, 530.0, 587.0, 584.0, 576.0, 587.0, 590.0, 579.0, 579.0, 584.0, 579.0, 584.0, 579.0, 576.0, 582.0, 630.0, 576.0, 579.0, 579.0, 576.0, 630.0, 570.0, 579.0, 576.0, 627.0, 576.0, 633.0, 630.0, 630.0, 576.0, 579.0, 579.0, 579.0, 582.0, 630.0, 630.0, 576.0, 579.0, 510.0, 582.0, 294.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 282.0, 269.0, 267.0, 265.0, 260.0, 313.0, 314.0, 284.0, 300.0, 282.0, 294.0, 295.0, 295.0, 278.0, 301.0, 289.0, 290.0, 288.0, 291.0, 286.0, 293.0, 284.0, 295.0, 281.0, 295.0, 287.0, 289.0, 319.0, 308.0, 268.0, 262.0, 314.0, 316.0, 286.0, 293.0, 289.0, 298.0, 293.0, 289.0, 291.0, 296.0, 293.0, 294.0, 283.0, 293.0, 292.0, 289.0, 264.0, 269.0, 291.0, 291.0, 290.0, 292.0, 320.0, 319.0, 259.0, 257.0, 298.0, 284.0, 292.0, 290.0, 321.0, 309.0, 293.0, 283.0, 311.0, 319.0, 289.0, 281.0, 291.0, 288.0, 293.0, 289.0, 285.0, 297.0, 286.0, 293.0, 295.0, 287.0, 283.0, 293.0, 321.0, 306.0, 284.0, 292.0, 295.0, 287.0, 286.0, 293.0, 175.0, 170.0, 288.0, 294.0, 313.0, 314.0, 260.0, 262.0, 285.0, 294.0, 297.0, 290.0, 274.0, 302.0, 291.0, 296.0, 290.0, 292.0, 291.0, 291.0, 288.0, 288.0, 288.0, 293.0, 293.0, 283.0, 288.0, 291.0, 270.0, 260.0, 301.0, 286.0, 294.0, 290.0, 295.0, 281.0, 291.0, 296.0, 300.0, 290.0, 286.0, 293.0, 291.0, 288.0, 294.0, 290.0, 290.0, 289.0, 285.0, 299.0, 288.0, 291.0, 286.0, 290.0, 285.0, 297.0, 314.0, 316.0, 288.0, 288.0, 295.0, 284.0, 292.0, 287.0, 288.0, 288.0, 306.0, 324.0, 282.0, 288.0, 293.0, 286.0, 295.0, 281.0, 320.0, 307.0, 292.0, 284.0, 316.0, 317.0, 315.0, 315.0, 314.0, 316.0, 284.0, 292.0, 293.0, 286.0, 289.0, 290.0, 291.0, 288.0, 283.0, 299.0, 303.0, 327.0, 318.0, 312.0, 286.0, 290.0, 291.0, 288.0, 249.0, 261.0, 292.0, 290.0, 143.0, 151.0, 314.0, 316.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7046816164464604, "mean_inference_ms": 1.2633067797403978, "mean_action_processing_ms": 0.13474408191316176, "mean_env_wait_ms": 0.8485969737751016, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7014400, "num_agent_steps_trained": 7014400, "num_env_steps_sampled": 3507200, "num_env_steps_trained": 3507200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3507200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7014400, "timers": {"training_iteration_time_ms": 3601.405, "learn_time_ms": 1092.369, "learn_throughput": 11717.652, "synch_weights_time_ms": 13.092}, "counters": {"num_env_steps_sampled": 3507200, "num_env_steps_trained": 3507200, "num_agent_steps_sampled": 7014400, "num_agent_steps_trained": 7014400}, "done": false, "episodes_total": 8768, "training_iteration": 274, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-16", "timestamp": 1666581496, "time_this_iter_s": 3.5803771018981934, "time_total_s": 1056.6810185909271, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1056.6810185909271, "timesteps_since_restore": 0, "iterations_since_restore": 274, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.180000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 200.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.73, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.87, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.2, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.66, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.89, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.51, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.77, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.69, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.3, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.87, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.24, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.51, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.77, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.51, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.77, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0033253917936235666, "policy_loss": -0.003682898124679923, "vf_loss": 7.720019340515137, "vf_explained_var": 0.5786682963371277, "kl": 0.0020081857219338417, "entropy": 0.8289890289306641, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3520000, "num_env_steps_trained": 3520000, "num_agent_steps_sampled": 7040000, "num_agent_steps_trained": 7040000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 578.73, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 289.365}, "custom_metrics": {"sparse_reward_mean": 200.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.73, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.87, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.2, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.66, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.89, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.51, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.77, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.69, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.3, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.87, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.24, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.51, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.77, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.51, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.77, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 630.0, 570.0, 579.0, 582.0, 582.0, 579.0, 582.0, 576.0, 627.0, 576.0, 582.0, 579.0, 345.0, 582.0, 627.0, 522.0, 579.0, 587.0, 576.0, 587.0, 582.0, 582.0, 576.0, 581.0, 576.0, 579.0, 530.0, 587.0, 584.0, 576.0, 587.0, 590.0, 579.0, 579.0, 584.0, 579.0, 584.0, 579.0, 576.0, 582.0, 630.0, 576.0, 579.0, 579.0, 576.0, 630.0, 570.0, 579.0, 576.0, 627.0, 576.0, 633.0, 630.0, 630.0, 576.0, 579.0, 579.0, 579.0, 582.0, 630.0, 630.0, 576.0, 579.0, 510.0, 582.0, 294.0, 630.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 582.0, 570.0, 576.0, 530.0, 639.0, 582.0, 582.0, 579.0, 582.0, 587.0, 582.0, 584.0, 539.0, 573.0, 627.0, 582.0, 582.0, 576.0, 582.0, 584.0, 579.0, 582.0, 579.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 283.0, 311.0, 319.0, 289.0, 281.0, 291.0, 288.0, 293.0, 289.0, 285.0, 297.0, 286.0, 293.0, 295.0, 287.0, 283.0, 293.0, 321.0, 306.0, 284.0, 292.0, 295.0, 287.0, 286.0, 293.0, 175.0, 170.0, 288.0, 294.0, 313.0, 314.0, 260.0, 262.0, 285.0, 294.0, 297.0, 290.0, 274.0, 302.0, 291.0, 296.0, 290.0, 292.0, 291.0, 291.0, 288.0, 288.0, 288.0, 293.0, 293.0, 283.0, 288.0, 291.0, 270.0, 260.0, 301.0, 286.0, 294.0, 290.0, 295.0, 281.0, 291.0, 296.0, 300.0, 290.0, 286.0, 293.0, 291.0, 288.0, 294.0, 290.0, 290.0, 289.0, 285.0, 299.0, 288.0, 291.0, 286.0, 290.0, 285.0, 297.0, 314.0, 316.0, 288.0, 288.0, 295.0, 284.0, 292.0, 287.0, 288.0, 288.0, 306.0, 324.0, 282.0, 288.0, 293.0, 286.0, 295.0, 281.0, 320.0, 307.0, 292.0, 284.0, 316.0, 317.0, 315.0, 315.0, 314.0, 316.0, 284.0, 292.0, 293.0, 286.0, 289.0, 290.0, 291.0, 288.0, 283.0, 299.0, 303.0, 327.0, 318.0, 312.0, 286.0, 290.0, 291.0, 288.0, 249.0, 261.0, 292.0, 290.0, 143.0, 151.0, 314.0, 316.0, 289.0, 284.0, 285.0, 294.0, 286.0, 290.0, 287.0, 292.0, 295.0, 281.0, 292.0, 290.0, 282.0, 294.0, 287.0, 295.0, 286.0, 284.0, 288.0, 288.0, 275.0, 255.0, 320.0, 319.0, 294.0, 288.0, 292.0, 290.0, 292.0, 287.0, 282.0, 300.0, 296.0, 291.0, 293.0, 289.0, 296.0, 288.0, 267.0, 272.0, 292.0, 281.0, 320.0, 307.0, 295.0, 287.0, 290.0, 292.0, 285.0, 291.0, 298.0, 284.0, 277.0, 307.0, 284.0, 295.0, 296.0, 286.0, 291.0, 288.0, 290.0, 286.0, 285.0, 297.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7045692651464103, "mean_inference_ms": 1.263009476276083, "mean_action_processing_ms": 0.13472837140180471, "mean_env_wait_ms": 0.8484162488219968, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 578.73, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 289.365}, "hist_stats": {"episode_reward": [576.0, 630.0, 570.0, 579.0, 582.0, 582.0, 579.0, 582.0, 576.0, 627.0, 576.0, 582.0, 579.0, 345.0, 582.0, 627.0, 522.0, 579.0, 587.0, 576.0, 587.0, 582.0, 582.0, 576.0, 581.0, 576.0, 579.0, 530.0, 587.0, 584.0, 576.0, 587.0, 590.0, 579.0, 579.0, 584.0, 579.0, 584.0, 579.0, 576.0, 582.0, 630.0, 576.0, 579.0, 579.0, 576.0, 630.0, 570.0, 579.0, 576.0, 627.0, 576.0, 633.0, 630.0, 630.0, 576.0, 579.0, 579.0, 579.0, 582.0, 630.0, 630.0, 576.0, 579.0, 510.0, 582.0, 294.0, 630.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 582.0, 570.0, 576.0, 530.0, 639.0, 582.0, 582.0, 579.0, 582.0, 587.0, 582.0, 584.0, 539.0, 573.0, 627.0, 582.0, 582.0, 576.0, 582.0, 584.0, 579.0, 582.0, 579.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 283.0, 311.0, 319.0, 289.0, 281.0, 291.0, 288.0, 293.0, 289.0, 285.0, 297.0, 286.0, 293.0, 295.0, 287.0, 283.0, 293.0, 321.0, 306.0, 284.0, 292.0, 295.0, 287.0, 286.0, 293.0, 175.0, 170.0, 288.0, 294.0, 313.0, 314.0, 260.0, 262.0, 285.0, 294.0, 297.0, 290.0, 274.0, 302.0, 291.0, 296.0, 290.0, 292.0, 291.0, 291.0, 288.0, 288.0, 288.0, 293.0, 293.0, 283.0, 288.0, 291.0, 270.0, 260.0, 301.0, 286.0, 294.0, 290.0, 295.0, 281.0, 291.0, 296.0, 300.0, 290.0, 286.0, 293.0, 291.0, 288.0, 294.0, 290.0, 290.0, 289.0, 285.0, 299.0, 288.0, 291.0, 286.0, 290.0, 285.0, 297.0, 314.0, 316.0, 288.0, 288.0, 295.0, 284.0, 292.0, 287.0, 288.0, 288.0, 306.0, 324.0, 282.0, 288.0, 293.0, 286.0, 295.0, 281.0, 320.0, 307.0, 292.0, 284.0, 316.0, 317.0, 315.0, 315.0, 314.0, 316.0, 284.0, 292.0, 293.0, 286.0, 289.0, 290.0, 291.0, 288.0, 283.0, 299.0, 303.0, 327.0, 318.0, 312.0, 286.0, 290.0, 291.0, 288.0, 249.0, 261.0, 292.0, 290.0, 143.0, 151.0, 314.0, 316.0, 289.0, 284.0, 285.0, 294.0, 286.0, 290.0, 287.0, 292.0, 295.0, 281.0, 292.0, 290.0, 282.0, 294.0, 287.0, 295.0, 286.0, 284.0, 288.0, 288.0, 275.0, 255.0, 320.0, 319.0, 294.0, 288.0, 292.0, 290.0, 292.0, 287.0, 282.0, 300.0, 296.0, 291.0, 293.0, 289.0, 296.0, 288.0, 267.0, 272.0, 292.0, 281.0, 320.0, 307.0, 295.0, 287.0, 290.0, 292.0, 285.0, 291.0, 298.0, 284.0, 277.0, 307.0, 284.0, 295.0, 296.0, 286.0, 291.0, 288.0, 290.0, 286.0, 285.0, 297.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7045692651464103, "mean_inference_ms": 1.263009476276083, "mean_action_processing_ms": 0.13472837140180471, "mean_env_wait_ms": 0.8484162488219968, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7040000, "num_agent_steps_trained": 7040000, "num_env_steps_sampled": 3520000, "num_env_steps_trained": 3520000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3520000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7040000, "timers": {"training_iteration_time_ms": 3611.327, "learn_time_ms": 1100.351, "learn_throughput": 11632.654, "synch_weights_time_ms": 12.268}, "counters": {"num_env_steps_sampled": 3520000, "num_env_steps_trained": 3520000, "num_agent_steps_sampled": 7040000, "num_agent_steps_trained": 7040000}, "done": false, "episodes_total": 8800, "training_iteration": 275, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-20", "timestamp": 1666581500, "time_this_iter_s": 3.6779873371124268, "time_total_s": 1060.3590059280396, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1060.3590059280396, "timesteps_since_restore": 0, "iterations_since_restore": 275, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.0, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 201.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 179.48, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.76, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.36, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.5, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.15, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.43, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.98, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.83, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.41, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.43, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.98, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.43, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.98, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011727798264473677, "policy_loss": 0.0008132933871820569, "vf_loss": 7.713962554931641, "vf_explained_var": 0.5945428013801575, "kl": 0.0018068891949951649, "entropy": 0.8238167762756348, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3532800, "num_env_steps_trained": 3532800, "num_agent_steps_sampled": 7065600, "num_agent_steps_trained": 7065600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 581.88, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 290.94}, "custom_metrics": {"sparse_reward_mean": 201.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 179.48, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.76, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.36, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.5, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.15, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.43, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.98, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.83, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.41, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.43, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.98, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.43, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.98, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [590.0, 579.0, 579.0, 584.0, 579.0, 584.0, 579.0, 576.0, 582.0, 630.0, 576.0, 579.0, 579.0, 576.0, 630.0, 570.0, 579.0, 576.0, 627.0, 576.0, 633.0, 630.0, 630.0, 576.0, 579.0, 579.0, 579.0, 582.0, 630.0, 630.0, 576.0, 579.0, 510.0, 582.0, 294.0, 630.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 582.0, 570.0, 576.0, 530.0, 639.0, 582.0, 582.0, 579.0, 582.0, 587.0, 582.0, 584.0, 539.0, 573.0, 627.0, 582.0, 582.0, 576.0, 582.0, 584.0, 579.0, 582.0, 579.0, 576.0, 582.0, 582.0, 573.0, 570.0, 573.0, 582.0, 582.0, 582.0, 579.0, 636.0, 582.0, 587.0, 584.0, 587.0, 579.0, 573.0, 525.0, 624.0, 587.0, 579.0, 630.0, 582.0, 570.0, 576.0, 582.0, 582.0, 573.0, 573.0, 582.0, 579.0, 579.0, 579.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [300.0, 290.0, 286.0, 293.0, 291.0, 288.0, 294.0, 290.0, 290.0, 289.0, 285.0, 299.0, 288.0, 291.0, 286.0, 290.0, 285.0, 297.0, 314.0, 316.0, 288.0, 288.0, 295.0, 284.0, 292.0, 287.0, 288.0, 288.0, 306.0, 324.0, 282.0, 288.0, 293.0, 286.0, 295.0, 281.0, 320.0, 307.0, 292.0, 284.0, 316.0, 317.0, 315.0, 315.0, 314.0, 316.0, 284.0, 292.0, 293.0, 286.0, 289.0, 290.0, 291.0, 288.0, 283.0, 299.0, 303.0, 327.0, 318.0, 312.0, 286.0, 290.0, 291.0, 288.0, 249.0, 261.0, 292.0, 290.0, 143.0, 151.0, 314.0, 316.0, 289.0, 284.0, 285.0, 294.0, 286.0, 290.0, 287.0, 292.0, 295.0, 281.0, 292.0, 290.0, 282.0, 294.0, 287.0, 295.0, 286.0, 284.0, 288.0, 288.0, 275.0, 255.0, 320.0, 319.0, 294.0, 288.0, 292.0, 290.0, 292.0, 287.0, 282.0, 300.0, 296.0, 291.0, 293.0, 289.0, 296.0, 288.0, 267.0, 272.0, 292.0, 281.0, 320.0, 307.0, 295.0, 287.0, 290.0, 292.0, 285.0, 291.0, 298.0, 284.0, 277.0, 307.0, 284.0, 295.0, 296.0, 286.0, 291.0, 288.0, 290.0, 286.0, 285.0, 297.0, 295.0, 287.0, 288.0, 285.0, 295.0, 275.0, 284.0, 289.0, 292.0, 290.0, 298.0, 284.0, 296.0, 286.0, 294.0, 285.0, 317.0, 319.0, 292.0, 290.0, 293.0, 294.0, 292.0, 292.0, 294.0, 293.0, 291.0, 288.0, 293.0, 280.0, 263.0, 262.0, 320.0, 304.0, 290.0, 297.0, 287.0, 292.0, 310.0, 320.0, 301.0, 281.0, 293.0, 277.0, 292.0, 284.0, 295.0, 287.0, 301.0, 281.0, 280.0, 293.0, 279.0, 294.0, 295.0, 287.0, 288.0, 291.0, 292.0, 287.0, 289.0, 290.0, 304.0, 323.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7044540486410702, "mean_inference_ms": 1.2627304137168671, "mean_action_processing_ms": 0.13471491738433086, "mean_env_wait_ms": 0.8482519969369372, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 581.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 290.94}, "hist_stats": {"episode_reward": [590.0, 579.0, 579.0, 584.0, 579.0, 584.0, 579.0, 576.0, 582.0, 630.0, 576.0, 579.0, 579.0, 576.0, 630.0, 570.0, 579.0, 576.0, 627.0, 576.0, 633.0, 630.0, 630.0, 576.0, 579.0, 579.0, 579.0, 582.0, 630.0, 630.0, 576.0, 579.0, 510.0, 582.0, 294.0, 630.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 582.0, 570.0, 576.0, 530.0, 639.0, 582.0, 582.0, 579.0, 582.0, 587.0, 582.0, 584.0, 539.0, 573.0, 627.0, 582.0, 582.0, 576.0, 582.0, 584.0, 579.0, 582.0, 579.0, 576.0, 582.0, 582.0, 573.0, 570.0, 573.0, 582.0, 582.0, 582.0, 579.0, 636.0, 582.0, 587.0, 584.0, 587.0, 579.0, 573.0, 525.0, 624.0, 587.0, 579.0, 630.0, 582.0, 570.0, 576.0, 582.0, 582.0, 573.0, 573.0, 582.0, 579.0, 579.0, 579.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [300.0, 290.0, 286.0, 293.0, 291.0, 288.0, 294.0, 290.0, 290.0, 289.0, 285.0, 299.0, 288.0, 291.0, 286.0, 290.0, 285.0, 297.0, 314.0, 316.0, 288.0, 288.0, 295.0, 284.0, 292.0, 287.0, 288.0, 288.0, 306.0, 324.0, 282.0, 288.0, 293.0, 286.0, 295.0, 281.0, 320.0, 307.0, 292.0, 284.0, 316.0, 317.0, 315.0, 315.0, 314.0, 316.0, 284.0, 292.0, 293.0, 286.0, 289.0, 290.0, 291.0, 288.0, 283.0, 299.0, 303.0, 327.0, 318.0, 312.0, 286.0, 290.0, 291.0, 288.0, 249.0, 261.0, 292.0, 290.0, 143.0, 151.0, 314.0, 316.0, 289.0, 284.0, 285.0, 294.0, 286.0, 290.0, 287.0, 292.0, 295.0, 281.0, 292.0, 290.0, 282.0, 294.0, 287.0, 295.0, 286.0, 284.0, 288.0, 288.0, 275.0, 255.0, 320.0, 319.0, 294.0, 288.0, 292.0, 290.0, 292.0, 287.0, 282.0, 300.0, 296.0, 291.0, 293.0, 289.0, 296.0, 288.0, 267.0, 272.0, 292.0, 281.0, 320.0, 307.0, 295.0, 287.0, 290.0, 292.0, 285.0, 291.0, 298.0, 284.0, 277.0, 307.0, 284.0, 295.0, 296.0, 286.0, 291.0, 288.0, 290.0, 286.0, 285.0, 297.0, 295.0, 287.0, 288.0, 285.0, 295.0, 275.0, 284.0, 289.0, 292.0, 290.0, 298.0, 284.0, 296.0, 286.0, 294.0, 285.0, 317.0, 319.0, 292.0, 290.0, 293.0, 294.0, 292.0, 292.0, 294.0, 293.0, 291.0, 288.0, 293.0, 280.0, 263.0, 262.0, 320.0, 304.0, 290.0, 297.0, 287.0, 292.0, 310.0, 320.0, 301.0, 281.0, 293.0, 277.0, 292.0, 284.0, 295.0, 287.0, 301.0, 281.0, 280.0, 293.0, 279.0, 294.0, 295.0, 287.0, 288.0, 291.0, 292.0, 287.0, 289.0, 290.0, 304.0, 323.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7044540486410702, "mean_inference_ms": 1.2627304137168671, "mean_action_processing_ms": 0.13471491738433086, "mean_env_wait_ms": 0.8482519969369372, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7065600, "num_agent_steps_trained": 7065600, "num_env_steps_sampled": 3532800, "num_env_steps_trained": 3532800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3532800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7065600, "timers": {"training_iteration_time_ms": 3585.259, "learn_time_ms": 1099.51, "learn_throughput": 11641.547, "synch_weights_time_ms": 11.475}, "counters": {"num_env_steps_sampled": 3532800, "num_env_steps_trained": 3532800, "num_agent_steps_sampled": 7065600, "num_agent_steps_trained": 7065600}, "done": false, "episodes_total": 8832, "training_iteration": 276, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-23", "timestamp": 1666581503, "time_this_iter_s": 3.70689058303833, "time_total_s": 1064.0658965110779, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1064.0658965110779, "timesteps_since_restore": 0, "iterations_since_restore": 276, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.566666666666666, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.89, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.24, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.86, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.01, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.63, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.95, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.37, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.96, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.78, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.95, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.37, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.95, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.37, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003189620329067111, "policy_loss": -0.000677441421430558, "vf_loss": 7.689172744750977, "vf_explained_var": 0.5767568349838257, "kl": 0.002302885055541992, "entropy": 0.8208730220794678, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3545600, "num_env_steps_trained": 3545600, "num_agent_steps_sampled": 7091200, "num_agent_steps_trained": 7091200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 578.49, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 289.245}, "custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.89, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.24, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.86, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.01, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.63, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.95, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.37, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.96, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.78, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.95, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.37, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.95, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.37, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [510.0, 582.0, 294.0, 630.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 582.0, 570.0, 576.0, 530.0, 639.0, 582.0, 582.0, 579.0, 582.0, 587.0, 582.0, 584.0, 539.0, 573.0, 627.0, 582.0, 582.0, 576.0, 582.0, 584.0, 579.0, 582.0, 579.0, 576.0, 582.0, 582.0, 573.0, 570.0, 573.0, 582.0, 582.0, 582.0, 579.0, 636.0, 582.0, 587.0, 584.0, 587.0, 579.0, 573.0, 525.0, 624.0, 587.0, 579.0, 630.0, 582.0, 570.0, 576.0, 582.0, 582.0, 573.0, 573.0, 582.0, 579.0, 579.0, 579.0, 627.0, 579.0, 573.0, 582.0, 579.0, 573.0, 587.0, 516.0, 579.0, 630.0, 582.0, 576.0, 633.0, 582.0, 579.0, 633.0, 582.0, 582.0, 573.0, 582.0, 582.0, 584.0, 576.0, 579.0, 582.0, 576.0, 582.0, 590.0, 525.0, 579.0, 579.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [249.0, 261.0, 292.0, 290.0, 143.0, 151.0, 314.0, 316.0, 289.0, 284.0, 285.0, 294.0, 286.0, 290.0, 287.0, 292.0, 295.0, 281.0, 292.0, 290.0, 282.0, 294.0, 287.0, 295.0, 286.0, 284.0, 288.0, 288.0, 275.0, 255.0, 320.0, 319.0, 294.0, 288.0, 292.0, 290.0, 292.0, 287.0, 282.0, 300.0, 296.0, 291.0, 293.0, 289.0, 296.0, 288.0, 267.0, 272.0, 292.0, 281.0, 320.0, 307.0, 295.0, 287.0, 290.0, 292.0, 285.0, 291.0, 298.0, 284.0, 277.0, 307.0, 284.0, 295.0, 296.0, 286.0, 291.0, 288.0, 290.0, 286.0, 285.0, 297.0, 295.0, 287.0, 288.0, 285.0, 295.0, 275.0, 284.0, 289.0, 292.0, 290.0, 298.0, 284.0, 296.0, 286.0, 294.0, 285.0, 317.0, 319.0, 292.0, 290.0, 293.0, 294.0, 292.0, 292.0, 294.0, 293.0, 291.0, 288.0, 293.0, 280.0, 263.0, 262.0, 320.0, 304.0, 290.0, 297.0, 287.0, 292.0, 310.0, 320.0, 301.0, 281.0, 293.0, 277.0, 292.0, 284.0, 295.0, 287.0, 301.0, 281.0, 280.0, 293.0, 279.0, 294.0, 295.0, 287.0, 288.0, 291.0, 292.0, 287.0, 289.0, 290.0, 304.0, 323.0, 287.0, 292.0, 278.0, 295.0, 290.0, 292.0, 285.0, 294.0, 286.0, 287.0, 291.0, 296.0, 257.0, 259.0, 299.0, 280.0, 313.0, 317.0, 295.0, 287.0, 289.0, 287.0, 313.0, 320.0, 292.0, 290.0, 284.0, 295.0, 317.0, 316.0, 295.0, 287.0, 294.0, 288.0, 289.0, 284.0, 286.0, 296.0, 291.0, 291.0, 280.0, 304.0, 287.0, 289.0, 294.0, 285.0, 287.0, 295.0, 284.0, 292.0, 293.0, 289.0, 299.0, 291.0, 267.0, 258.0, 293.0, 286.0, 289.0, 290.0, 301.0, 278.0, 296.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.704359598352958, "mean_inference_ms": 1.2624859412248715, "mean_action_processing_ms": 0.1347036538331141, "mean_env_wait_ms": 0.8481206399033451, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 578.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 289.245}, "hist_stats": {"episode_reward": [510.0, 582.0, 294.0, 630.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 582.0, 570.0, 576.0, 530.0, 639.0, 582.0, 582.0, 579.0, 582.0, 587.0, 582.0, 584.0, 539.0, 573.0, 627.0, 582.0, 582.0, 576.0, 582.0, 584.0, 579.0, 582.0, 579.0, 576.0, 582.0, 582.0, 573.0, 570.0, 573.0, 582.0, 582.0, 582.0, 579.0, 636.0, 582.0, 587.0, 584.0, 587.0, 579.0, 573.0, 525.0, 624.0, 587.0, 579.0, 630.0, 582.0, 570.0, 576.0, 582.0, 582.0, 573.0, 573.0, 582.0, 579.0, 579.0, 579.0, 627.0, 579.0, 573.0, 582.0, 579.0, 573.0, 587.0, 516.0, 579.0, 630.0, 582.0, 576.0, 633.0, 582.0, 579.0, 633.0, 582.0, 582.0, 573.0, 582.0, 582.0, 584.0, 576.0, 579.0, 582.0, 576.0, 582.0, 590.0, 525.0, 579.0, 579.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [249.0, 261.0, 292.0, 290.0, 143.0, 151.0, 314.0, 316.0, 289.0, 284.0, 285.0, 294.0, 286.0, 290.0, 287.0, 292.0, 295.0, 281.0, 292.0, 290.0, 282.0, 294.0, 287.0, 295.0, 286.0, 284.0, 288.0, 288.0, 275.0, 255.0, 320.0, 319.0, 294.0, 288.0, 292.0, 290.0, 292.0, 287.0, 282.0, 300.0, 296.0, 291.0, 293.0, 289.0, 296.0, 288.0, 267.0, 272.0, 292.0, 281.0, 320.0, 307.0, 295.0, 287.0, 290.0, 292.0, 285.0, 291.0, 298.0, 284.0, 277.0, 307.0, 284.0, 295.0, 296.0, 286.0, 291.0, 288.0, 290.0, 286.0, 285.0, 297.0, 295.0, 287.0, 288.0, 285.0, 295.0, 275.0, 284.0, 289.0, 292.0, 290.0, 298.0, 284.0, 296.0, 286.0, 294.0, 285.0, 317.0, 319.0, 292.0, 290.0, 293.0, 294.0, 292.0, 292.0, 294.0, 293.0, 291.0, 288.0, 293.0, 280.0, 263.0, 262.0, 320.0, 304.0, 290.0, 297.0, 287.0, 292.0, 310.0, 320.0, 301.0, 281.0, 293.0, 277.0, 292.0, 284.0, 295.0, 287.0, 301.0, 281.0, 280.0, 293.0, 279.0, 294.0, 295.0, 287.0, 288.0, 291.0, 292.0, 287.0, 289.0, 290.0, 304.0, 323.0, 287.0, 292.0, 278.0, 295.0, 290.0, 292.0, 285.0, 294.0, 286.0, 287.0, 291.0, 296.0, 257.0, 259.0, 299.0, 280.0, 313.0, 317.0, 295.0, 287.0, 289.0, 287.0, 313.0, 320.0, 292.0, 290.0, 284.0, 295.0, 317.0, 316.0, 295.0, 287.0, 294.0, 288.0, 289.0, 284.0, 286.0, 296.0, 291.0, 291.0, 280.0, 304.0, 287.0, 289.0, 294.0, 285.0, 287.0, 295.0, 284.0, 292.0, 293.0, 289.0, 299.0, 291.0, 267.0, 258.0, 293.0, 286.0, 289.0, 290.0, 301.0, 278.0, 296.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.704359598352958, "mean_inference_ms": 1.2624859412248715, "mean_action_processing_ms": 0.1347036538331141, "mean_env_wait_ms": 0.8481206399033451, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7091200, "num_agent_steps_trained": 7091200, "num_env_steps_sampled": 3545600, "num_env_steps_trained": 3545600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3545600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7091200, "timers": {"training_iteration_time_ms": 3576.088, "learn_time_ms": 1106.017, "learn_throughput": 11573.055, "synch_weights_time_ms": 11.964}, "counters": {"num_env_steps_sampled": 3545600, "num_env_steps_trained": 3545600, "num_agent_steps_sampled": 7091200, "num_agent_steps_trained": 7091200}, "done": false, "episodes_total": 8864, "training_iteration": 277, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-27", "timestamp": 1666581507, "time_this_iter_s": 3.6707231998443604, "time_total_s": 1067.7366197109222, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1067.7366197109222, "timesteps_since_restore": 0, "iterations_since_restore": 277, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.84, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 198.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 178.14, "shaped_reward_min": 80, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.44, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.62, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.15, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.12, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.06, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.74, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.12, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.06, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.12, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.06, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0021596523001790047, "policy_loss": 0.0017828154377639294, "vf_loss": 7.876211166381836, "vf_explained_var": 0.5684356689453125, "kl": 0.0026554595679044724, "entropy": 0.8215670585632324, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3558400, "num_env_steps_trained": 3558400, "num_agent_steps_sampled": 7116800, "num_agent_steps_trained": 7116800}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 240.0, "episode_reward_mean": 575.74, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 287.87}, "custom_metrics": {"sparse_reward_mean": 198.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 178.14, "shaped_reward_min": 80, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.44, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.62, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.15, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.12, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.06, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.74, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.12, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.06, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.12, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.06, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 576.0, 582.0, 582.0, 573.0, 570.0, 573.0, 582.0, 582.0, 582.0, 579.0, 636.0, 582.0, 587.0, 584.0, 587.0, 579.0, 573.0, 525.0, 624.0, 587.0, 579.0, 630.0, 582.0, 570.0, 576.0, 582.0, 582.0, 573.0, 573.0, 582.0, 579.0, 579.0, 579.0, 627.0, 579.0, 573.0, 582.0, 579.0, 573.0, 587.0, 516.0, 579.0, 630.0, 582.0, 576.0, 633.0, 582.0, 579.0, 633.0, 582.0, 582.0, 573.0, 582.0, 582.0, 584.0, 576.0, 579.0, 582.0, 576.0, 582.0, 590.0, 525.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 516.0, 579.0, 582.0, 582.0, 240.0, 297.0, 579.0, 579.0, 582.0, 587.0, 587.0, 582.0, 633.0, 576.0, 582.0, 579.0, 582.0, 633.0, 579.0, 582.0, 573.0, 579.0, 579.0, 582.0, 576.0, 576.0, 587.0, 584.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 291.0, 288.0, 290.0, 286.0, 285.0, 297.0, 295.0, 287.0, 288.0, 285.0, 295.0, 275.0, 284.0, 289.0, 292.0, 290.0, 298.0, 284.0, 296.0, 286.0, 294.0, 285.0, 317.0, 319.0, 292.0, 290.0, 293.0, 294.0, 292.0, 292.0, 294.0, 293.0, 291.0, 288.0, 293.0, 280.0, 263.0, 262.0, 320.0, 304.0, 290.0, 297.0, 287.0, 292.0, 310.0, 320.0, 301.0, 281.0, 293.0, 277.0, 292.0, 284.0, 295.0, 287.0, 301.0, 281.0, 280.0, 293.0, 279.0, 294.0, 295.0, 287.0, 288.0, 291.0, 292.0, 287.0, 289.0, 290.0, 304.0, 323.0, 287.0, 292.0, 278.0, 295.0, 290.0, 292.0, 285.0, 294.0, 286.0, 287.0, 291.0, 296.0, 257.0, 259.0, 299.0, 280.0, 313.0, 317.0, 295.0, 287.0, 289.0, 287.0, 313.0, 320.0, 292.0, 290.0, 284.0, 295.0, 317.0, 316.0, 295.0, 287.0, 294.0, 288.0, 289.0, 284.0, 286.0, 296.0, 291.0, 291.0, 280.0, 304.0, 287.0, 289.0, 294.0, 285.0, 287.0, 295.0, 284.0, 292.0, 293.0, 289.0, 299.0, 291.0, 267.0, 258.0, 293.0, 286.0, 289.0, 290.0, 301.0, 278.0, 296.0, 283.0, 294.0, 282.0, 293.0, 283.0, 261.0, 255.0, 282.0, 297.0, 288.0, 294.0, 291.0, 291.0, 117.0, 123.0, 148.0, 149.0, 290.0, 289.0, 290.0, 289.0, 291.0, 291.0, 293.0, 294.0, 280.0, 307.0, 298.0, 284.0, 313.0, 320.0, 301.0, 275.0, 292.0, 290.0, 290.0, 289.0, 291.0, 291.0, 322.0, 311.0, 291.0, 288.0, 287.0, 295.0, 283.0, 290.0, 292.0, 287.0, 297.0, 282.0, 296.0, 286.0, 285.0, 291.0, 292.0, 284.0, 294.0, 293.0, 292.0, 292.0, 291.0, 285.0, 288.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7042493166823506, "mean_inference_ms": 1.262217330634093, "mean_action_processing_ms": 0.13468999463424317, "mean_env_wait_ms": 0.8479689242467895, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 240.0, "episode_reward_mean": 575.74, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 287.87}, "hist_stats": {"episode_reward": [582.0, 579.0, 576.0, 582.0, 582.0, 573.0, 570.0, 573.0, 582.0, 582.0, 582.0, 579.0, 636.0, 582.0, 587.0, 584.0, 587.0, 579.0, 573.0, 525.0, 624.0, 587.0, 579.0, 630.0, 582.0, 570.0, 576.0, 582.0, 582.0, 573.0, 573.0, 582.0, 579.0, 579.0, 579.0, 627.0, 579.0, 573.0, 582.0, 579.0, 573.0, 587.0, 516.0, 579.0, 630.0, 582.0, 576.0, 633.0, 582.0, 579.0, 633.0, 582.0, 582.0, 573.0, 582.0, 582.0, 584.0, 576.0, 579.0, 582.0, 576.0, 582.0, 590.0, 525.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 516.0, 579.0, 582.0, 582.0, 240.0, 297.0, 579.0, 579.0, 582.0, 587.0, 587.0, 582.0, 633.0, 576.0, 582.0, 579.0, 582.0, 633.0, 579.0, 582.0, 573.0, 579.0, 579.0, 582.0, 576.0, 576.0, 587.0, 584.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 291.0, 288.0, 290.0, 286.0, 285.0, 297.0, 295.0, 287.0, 288.0, 285.0, 295.0, 275.0, 284.0, 289.0, 292.0, 290.0, 298.0, 284.0, 296.0, 286.0, 294.0, 285.0, 317.0, 319.0, 292.0, 290.0, 293.0, 294.0, 292.0, 292.0, 294.0, 293.0, 291.0, 288.0, 293.0, 280.0, 263.0, 262.0, 320.0, 304.0, 290.0, 297.0, 287.0, 292.0, 310.0, 320.0, 301.0, 281.0, 293.0, 277.0, 292.0, 284.0, 295.0, 287.0, 301.0, 281.0, 280.0, 293.0, 279.0, 294.0, 295.0, 287.0, 288.0, 291.0, 292.0, 287.0, 289.0, 290.0, 304.0, 323.0, 287.0, 292.0, 278.0, 295.0, 290.0, 292.0, 285.0, 294.0, 286.0, 287.0, 291.0, 296.0, 257.0, 259.0, 299.0, 280.0, 313.0, 317.0, 295.0, 287.0, 289.0, 287.0, 313.0, 320.0, 292.0, 290.0, 284.0, 295.0, 317.0, 316.0, 295.0, 287.0, 294.0, 288.0, 289.0, 284.0, 286.0, 296.0, 291.0, 291.0, 280.0, 304.0, 287.0, 289.0, 294.0, 285.0, 287.0, 295.0, 284.0, 292.0, 293.0, 289.0, 299.0, 291.0, 267.0, 258.0, 293.0, 286.0, 289.0, 290.0, 301.0, 278.0, 296.0, 283.0, 294.0, 282.0, 293.0, 283.0, 261.0, 255.0, 282.0, 297.0, 288.0, 294.0, 291.0, 291.0, 117.0, 123.0, 148.0, 149.0, 290.0, 289.0, 290.0, 289.0, 291.0, 291.0, 293.0, 294.0, 280.0, 307.0, 298.0, 284.0, 313.0, 320.0, 301.0, 275.0, 292.0, 290.0, 290.0, 289.0, 291.0, 291.0, 322.0, 311.0, 291.0, 288.0, 287.0, 295.0, 283.0, 290.0, 292.0, 287.0, 297.0, 282.0, 296.0, 286.0, 285.0, 291.0, 292.0, 284.0, 294.0, 293.0, 292.0, 292.0, 291.0, 285.0, 288.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7042493166823506, "mean_inference_ms": 1.262217330634093, "mean_action_processing_ms": 0.13468999463424317, "mean_env_wait_ms": 0.8479689242467895, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7116800, "num_agent_steps_trained": 7116800, "num_env_steps_sampled": 3558400, "num_env_steps_trained": 3558400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3558400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7116800, "timers": {"training_iteration_time_ms": 3572.014, "learn_time_ms": 1102.164, "learn_throughput": 11613.517, "synch_weights_time_ms": 11.764}, "counters": {"num_env_steps_sampled": 3558400, "num_env_steps_trained": 3558400, "num_agent_steps_sampled": 7116800, "num_agent_steps_trained": 7116800}, "done": false, "episodes_total": 8896, "training_iteration": 278, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-31", "timestamp": 1666581511, "time_this_iter_s": 3.733790397644043, "time_total_s": 1071.4704101085663, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1071.4704101085663, "timesteps_since_restore": 0, "iterations_since_restore": 278, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.716666666666665, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.6, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 176.43, "shaped_reward_min": 12, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.08, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.7, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.78, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.27, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.76, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.11, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.76, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.11, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.76, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.11, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0020696595311164856, "policy_loss": 0.001691511832177639, "vf_loss": 7.902426242828369, "vf_explained_var": 0.5661084651947021, "kl": 0.002486064564436674, "entropy": 0.8241865634918213, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3571200, "num_env_steps_trained": 3571200, "num_agent_steps_sampled": 7142400, "num_agent_steps_trained": 7142400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 12.0, "episode_reward_mean": 569.63, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 6.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 284.815}, "custom_metrics": {"sparse_reward_mean": 196.6, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 176.43, "shaped_reward_min": 12, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.08, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.7, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.78, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.27, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.76, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.11, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.76, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.11, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.76, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.11, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 627.0, 579.0, 573.0, 582.0, 579.0, 573.0, 587.0, 516.0, 579.0, 630.0, 582.0, 576.0, 633.0, 582.0, 579.0, 633.0, 582.0, 582.0, 573.0, 582.0, 582.0, 584.0, 576.0, 579.0, 582.0, 576.0, 582.0, 590.0, 525.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 516.0, 579.0, 582.0, 582.0, 240.0, 297.0, 579.0, 579.0, 582.0, 587.0, 587.0, 582.0, 633.0, 576.0, 582.0, 579.0, 582.0, 633.0, 579.0, 582.0, 573.0, 579.0, 579.0, 582.0, 576.0, 576.0, 587.0, 584.0, 576.0, 579.0, 584.0, 579.0, 579.0, 633.0, 582.0, 627.0, 639.0, 576.0, 582.0, 570.0, 587.0, 579.0, 579.0, 579.0, 579.0, 573.0, 576.0, 582.0, 582.0, 530.0, 582.0, 573.0, 12.0, 522.0, 579.0, 576.0, 536.0, 627.0, 570.0, 576.0, 587.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 292.0, 287.0, 289.0, 290.0, 304.0, 323.0, 287.0, 292.0, 278.0, 295.0, 290.0, 292.0, 285.0, 294.0, 286.0, 287.0, 291.0, 296.0, 257.0, 259.0, 299.0, 280.0, 313.0, 317.0, 295.0, 287.0, 289.0, 287.0, 313.0, 320.0, 292.0, 290.0, 284.0, 295.0, 317.0, 316.0, 295.0, 287.0, 294.0, 288.0, 289.0, 284.0, 286.0, 296.0, 291.0, 291.0, 280.0, 304.0, 287.0, 289.0, 294.0, 285.0, 287.0, 295.0, 284.0, 292.0, 293.0, 289.0, 299.0, 291.0, 267.0, 258.0, 293.0, 286.0, 289.0, 290.0, 301.0, 278.0, 296.0, 283.0, 294.0, 282.0, 293.0, 283.0, 261.0, 255.0, 282.0, 297.0, 288.0, 294.0, 291.0, 291.0, 117.0, 123.0, 148.0, 149.0, 290.0, 289.0, 290.0, 289.0, 291.0, 291.0, 293.0, 294.0, 280.0, 307.0, 298.0, 284.0, 313.0, 320.0, 301.0, 275.0, 292.0, 290.0, 290.0, 289.0, 291.0, 291.0, 322.0, 311.0, 291.0, 288.0, 287.0, 295.0, 283.0, 290.0, 292.0, 287.0, 297.0, 282.0, 296.0, 286.0, 285.0, 291.0, 292.0, 284.0, 294.0, 293.0, 292.0, 292.0, 291.0, 285.0, 288.0, 291.0, 299.0, 285.0, 288.0, 291.0, 289.0, 290.0, 318.0, 315.0, 296.0, 286.0, 315.0, 312.0, 317.0, 322.0, 283.0, 293.0, 294.0, 288.0, 286.0, 284.0, 298.0, 289.0, 282.0, 297.0, 283.0, 296.0, 291.0, 288.0, 286.0, 293.0, 287.0, 286.0, 283.0, 293.0, 295.0, 287.0, 282.0, 300.0, 266.0, 264.0, 288.0, 294.0, 289.0, 284.0, 6.0, 6.0, 257.0, 265.0, 283.0, 296.0, 284.0, 292.0, 270.0, 266.0, 306.0, 321.0, 292.0, 278.0, 285.0, 291.0, 294.0, 293.0, 286.0, 301.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7041604228757821, "mean_inference_ms": 1.2619712995732546, "mean_action_processing_ms": 0.13468033891449124, "mean_env_wait_ms": 0.8478502017312357, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 12.0, "episode_reward_mean": 569.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 6.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 284.815}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 627.0, 579.0, 573.0, 582.0, 579.0, 573.0, 587.0, 516.0, 579.0, 630.0, 582.0, 576.0, 633.0, 582.0, 579.0, 633.0, 582.0, 582.0, 573.0, 582.0, 582.0, 584.0, 576.0, 579.0, 582.0, 576.0, 582.0, 590.0, 525.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 516.0, 579.0, 582.0, 582.0, 240.0, 297.0, 579.0, 579.0, 582.0, 587.0, 587.0, 582.0, 633.0, 576.0, 582.0, 579.0, 582.0, 633.0, 579.0, 582.0, 573.0, 579.0, 579.0, 582.0, 576.0, 576.0, 587.0, 584.0, 576.0, 579.0, 584.0, 579.0, 579.0, 633.0, 582.0, 627.0, 639.0, 576.0, 582.0, 570.0, 587.0, 579.0, 579.0, 579.0, 579.0, 573.0, 576.0, 582.0, 582.0, 530.0, 582.0, 573.0, 12.0, 522.0, 579.0, 576.0, 536.0, 627.0, 570.0, 576.0, 587.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 292.0, 287.0, 289.0, 290.0, 304.0, 323.0, 287.0, 292.0, 278.0, 295.0, 290.0, 292.0, 285.0, 294.0, 286.0, 287.0, 291.0, 296.0, 257.0, 259.0, 299.0, 280.0, 313.0, 317.0, 295.0, 287.0, 289.0, 287.0, 313.0, 320.0, 292.0, 290.0, 284.0, 295.0, 317.0, 316.0, 295.0, 287.0, 294.0, 288.0, 289.0, 284.0, 286.0, 296.0, 291.0, 291.0, 280.0, 304.0, 287.0, 289.0, 294.0, 285.0, 287.0, 295.0, 284.0, 292.0, 293.0, 289.0, 299.0, 291.0, 267.0, 258.0, 293.0, 286.0, 289.0, 290.0, 301.0, 278.0, 296.0, 283.0, 294.0, 282.0, 293.0, 283.0, 261.0, 255.0, 282.0, 297.0, 288.0, 294.0, 291.0, 291.0, 117.0, 123.0, 148.0, 149.0, 290.0, 289.0, 290.0, 289.0, 291.0, 291.0, 293.0, 294.0, 280.0, 307.0, 298.0, 284.0, 313.0, 320.0, 301.0, 275.0, 292.0, 290.0, 290.0, 289.0, 291.0, 291.0, 322.0, 311.0, 291.0, 288.0, 287.0, 295.0, 283.0, 290.0, 292.0, 287.0, 297.0, 282.0, 296.0, 286.0, 285.0, 291.0, 292.0, 284.0, 294.0, 293.0, 292.0, 292.0, 291.0, 285.0, 288.0, 291.0, 299.0, 285.0, 288.0, 291.0, 289.0, 290.0, 318.0, 315.0, 296.0, 286.0, 315.0, 312.0, 317.0, 322.0, 283.0, 293.0, 294.0, 288.0, 286.0, 284.0, 298.0, 289.0, 282.0, 297.0, 283.0, 296.0, 291.0, 288.0, 286.0, 293.0, 287.0, 286.0, 283.0, 293.0, 295.0, 287.0, 282.0, 300.0, 266.0, 264.0, 288.0, 294.0, 289.0, 284.0, 6.0, 6.0, 257.0, 265.0, 283.0, 296.0, 284.0, 292.0, 270.0, 266.0, 306.0, 321.0, 292.0, 278.0, 285.0, 291.0, 294.0, 293.0, 286.0, 301.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7041604228757821, "mean_inference_ms": 1.2619712995732546, "mean_action_processing_ms": 0.13468033891449124, "mean_env_wait_ms": 0.8478502017312357, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7142400, "num_agent_steps_trained": 7142400, "num_env_steps_sampled": 3571200, "num_env_steps_trained": 3571200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3571200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7142400, "timers": {"training_iteration_time_ms": 3587.641, "learn_time_ms": 1111.633, "learn_throughput": 11514.594, "synch_weights_time_ms": 10.828}, "counters": {"num_env_steps_sampled": 3571200, "num_env_steps_trained": 3571200, "num_agent_steps_sampled": 7142400, "num_agent_steps_trained": 7142400}, "done": false, "episodes_total": 8928, "training_iteration": 279, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-35", "timestamp": 1666581515, "time_this_iter_s": 3.741086959838867, "time_total_s": 1075.2114970684052, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1075.2114970684052, "timesteps_since_restore": 0, "iterations_since_restore": 279, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.033333333333335, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 175.76, "shaped_reward_min": 12, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.24, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.51, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.96, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.03, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.84, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.89, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.92, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.12, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.33, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.3, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.84, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.89, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.84, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.89, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001337396795861423, "policy_loss": 0.0009652754524722695, "vf_loss": 7.775339126586914, "vf_explained_var": 0.5859867334365845, "kl": 0.0022643147967755795, "entropy": 0.8108214139938354, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3584000, "num_env_steps_trained": 3584000, "num_agent_steps_sampled": 7168000, "num_agent_steps_trained": 7168000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 12.0, "episode_reward_mean": 567.76, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 6.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 283.88}, "custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 175.76, "shaped_reward_min": 12, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.24, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.51, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.96, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.03, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.84, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.89, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.92, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.12, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.33, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.3, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.84, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.89, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.84, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.89, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 516.0, 579.0, 582.0, 582.0, 240.0, 297.0, 579.0, 579.0, 582.0, 587.0, 587.0, 582.0, 633.0, 576.0, 582.0, 579.0, 582.0, 633.0, 579.0, 582.0, 573.0, 579.0, 579.0, 582.0, 576.0, 576.0, 587.0, 584.0, 576.0, 579.0, 584.0, 579.0, 579.0, 633.0, 582.0, 627.0, 639.0, 576.0, 582.0, 570.0, 587.0, 579.0, 579.0, 579.0, 579.0, 573.0, 576.0, 582.0, 582.0, 530.0, 582.0, 573.0, 12.0, 522.0, 579.0, 576.0, 536.0, 627.0, 570.0, 576.0, 587.0, 587.0, 576.0, 633.0, 579.0, 584.0, 582.0, 582.0, 582.0, 455.0, 582.0, 576.0, 582.0, 582.0, 587.0, 627.0, 579.0, 519.0, 579.0, 587.0, 405.0, 579.0, 630.0, 630.0, 633.0, 567.0, 576.0, 587.0, 576.0, 584.0, 587.0, 579.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 289.0, 290.0, 301.0, 278.0, 296.0, 283.0, 294.0, 282.0, 293.0, 283.0, 261.0, 255.0, 282.0, 297.0, 288.0, 294.0, 291.0, 291.0, 117.0, 123.0, 148.0, 149.0, 290.0, 289.0, 290.0, 289.0, 291.0, 291.0, 293.0, 294.0, 280.0, 307.0, 298.0, 284.0, 313.0, 320.0, 301.0, 275.0, 292.0, 290.0, 290.0, 289.0, 291.0, 291.0, 322.0, 311.0, 291.0, 288.0, 287.0, 295.0, 283.0, 290.0, 292.0, 287.0, 297.0, 282.0, 296.0, 286.0, 285.0, 291.0, 292.0, 284.0, 294.0, 293.0, 292.0, 292.0, 291.0, 285.0, 288.0, 291.0, 299.0, 285.0, 288.0, 291.0, 289.0, 290.0, 318.0, 315.0, 296.0, 286.0, 315.0, 312.0, 317.0, 322.0, 283.0, 293.0, 294.0, 288.0, 286.0, 284.0, 298.0, 289.0, 282.0, 297.0, 283.0, 296.0, 291.0, 288.0, 286.0, 293.0, 287.0, 286.0, 283.0, 293.0, 295.0, 287.0, 282.0, 300.0, 266.0, 264.0, 288.0, 294.0, 289.0, 284.0, 6.0, 6.0, 257.0, 265.0, 283.0, 296.0, 284.0, 292.0, 270.0, 266.0, 306.0, 321.0, 292.0, 278.0, 285.0, 291.0, 294.0, 293.0, 286.0, 301.0, 286.0, 290.0, 325.0, 308.0, 287.0, 292.0, 299.0, 285.0, 290.0, 292.0, 289.0, 293.0, 291.0, 291.0, 225.0, 230.0, 282.0, 300.0, 283.0, 293.0, 297.0, 285.0, 286.0, 296.0, 295.0, 292.0, 316.0, 311.0, 290.0, 289.0, 259.0, 260.0, 286.0, 293.0, 286.0, 301.0, 203.0, 202.0, 286.0, 293.0, 310.0, 320.0, 314.0, 316.0, 325.0, 308.0, 297.0, 270.0, 283.0, 293.0, 288.0, 299.0, 285.0, 291.0, 298.0, 286.0, 296.0, 291.0, 290.0, 289.0, 277.0, 296.0, 285.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7040566160339953, "mean_inference_ms": 1.2617214516765958, "mean_action_processing_ms": 0.13467032915806704, "mean_env_wait_ms": 0.8477503990101285, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 12.0, "episode_reward_mean": 567.76, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 6.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 283.88}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 516.0, 579.0, 582.0, 582.0, 240.0, 297.0, 579.0, 579.0, 582.0, 587.0, 587.0, 582.0, 633.0, 576.0, 582.0, 579.0, 582.0, 633.0, 579.0, 582.0, 573.0, 579.0, 579.0, 582.0, 576.0, 576.0, 587.0, 584.0, 576.0, 579.0, 584.0, 579.0, 579.0, 633.0, 582.0, 627.0, 639.0, 576.0, 582.0, 570.0, 587.0, 579.0, 579.0, 579.0, 579.0, 573.0, 576.0, 582.0, 582.0, 530.0, 582.0, 573.0, 12.0, 522.0, 579.0, 576.0, 536.0, 627.0, 570.0, 576.0, 587.0, 587.0, 576.0, 633.0, 579.0, 584.0, 582.0, 582.0, 582.0, 455.0, 582.0, 576.0, 582.0, 582.0, 587.0, 627.0, 579.0, 519.0, 579.0, 587.0, 405.0, 579.0, 630.0, 630.0, 633.0, 567.0, 576.0, 587.0, 576.0, 584.0, 587.0, 579.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 289.0, 290.0, 301.0, 278.0, 296.0, 283.0, 294.0, 282.0, 293.0, 283.0, 261.0, 255.0, 282.0, 297.0, 288.0, 294.0, 291.0, 291.0, 117.0, 123.0, 148.0, 149.0, 290.0, 289.0, 290.0, 289.0, 291.0, 291.0, 293.0, 294.0, 280.0, 307.0, 298.0, 284.0, 313.0, 320.0, 301.0, 275.0, 292.0, 290.0, 290.0, 289.0, 291.0, 291.0, 322.0, 311.0, 291.0, 288.0, 287.0, 295.0, 283.0, 290.0, 292.0, 287.0, 297.0, 282.0, 296.0, 286.0, 285.0, 291.0, 292.0, 284.0, 294.0, 293.0, 292.0, 292.0, 291.0, 285.0, 288.0, 291.0, 299.0, 285.0, 288.0, 291.0, 289.0, 290.0, 318.0, 315.0, 296.0, 286.0, 315.0, 312.0, 317.0, 322.0, 283.0, 293.0, 294.0, 288.0, 286.0, 284.0, 298.0, 289.0, 282.0, 297.0, 283.0, 296.0, 291.0, 288.0, 286.0, 293.0, 287.0, 286.0, 283.0, 293.0, 295.0, 287.0, 282.0, 300.0, 266.0, 264.0, 288.0, 294.0, 289.0, 284.0, 6.0, 6.0, 257.0, 265.0, 283.0, 296.0, 284.0, 292.0, 270.0, 266.0, 306.0, 321.0, 292.0, 278.0, 285.0, 291.0, 294.0, 293.0, 286.0, 301.0, 286.0, 290.0, 325.0, 308.0, 287.0, 292.0, 299.0, 285.0, 290.0, 292.0, 289.0, 293.0, 291.0, 291.0, 225.0, 230.0, 282.0, 300.0, 283.0, 293.0, 297.0, 285.0, 286.0, 296.0, 295.0, 292.0, 316.0, 311.0, 290.0, 289.0, 259.0, 260.0, 286.0, 293.0, 286.0, 301.0, 203.0, 202.0, 286.0, 293.0, 310.0, 320.0, 314.0, 316.0, 325.0, 308.0, 297.0, 270.0, 283.0, 293.0, 288.0, 299.0, 285.0, 291.0, 298.0, 286.0, 296.0, 291.0, 290.0, 289.0, 277.0, 296.0, 285.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7040566160339953, "mean_inference_ms": 1.2617214516765958, "mean_action_processing_ms": 0.13467032915806704, "mean_env_wait_ms": 0.8477503990101285, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7168000, "num_agent_steps_trained": 7168000, "num_env_steps_sampled": 3584000, "num_env_steps_trained": 3584000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3584000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7168000, "timers": {"training_iteration_time_ms": 3600.953, "learn_time_ms": 1114.253, "learn_throughput": 11487.52, "synch_weights_time_ms": 11.515}, "counters": {"num_env_steps_sampled": 3584000, "num_env_steps_trained": 3584000, "num_agent_steps_sampled": 7168000, "num_agent_steps_trained": 7168000}, "done": false, "episodes_total": 8960, "training_iteration": 280, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-39", "timestamp": 1666581519, "time_this_iter_s": 3.7522544860839844, "time_total_s": 1078.9637515544891, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1078.9637515544891, "timesteps_since_restore": 0, "iterations_since_restore": 280, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.520000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 198.4, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 177.53, "shaped_reward_min": 12, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.06, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.91, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.78, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.62, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.33, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.71, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.62, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.33, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.62, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.33, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0034903634805232286, "policy_loss": -0.0038492782041430473, "vf_loss": 7.702734470367432, "vf_explained_var": 0.5874840021133423, "kl": 0.0021524939220398664, "entropy": 0.8227143287658691, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3596800, "num_env_steps_trained": 3596800, "num_agent_steps_sampled": 7193600, "num_agent_steps_trained": 7193600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 12.0, "episode_reward_mean": 574.33, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 6.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 287.165}, "custom_metrics": {"sparse_reward_mean": 198.4, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 177.53, "shaped_reward_min": 12, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.06, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.91, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.78, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.62, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.33, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.71, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.62, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.33, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.62, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.33, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 584.0, 576.0, 579.0, 584.0, 579.0, 579.0, 633.0, 582.0, 627.0, 639.0, 576.0, 582.0, 570.0, 587.0, 579.0, 579.0, 579.0, 579.0, 573.0, 576.0, 582.0, 582.0, 530.0, 582.0, 573.0, 12.0, 522.0, 579.0, 576.0, 536.0, 627.0, 570.0, 576.0, 587.0, 587.0, 576.0, 633.0, 579.0, 584.0, 582.0, 582.0, 582.0, 455.0, 582.0, 576.0, 582.0, 582.0, 587.0, 627.0, 579.0, 519.0, 579.0, 587.0, 405.0, 579.0, 630.0, 630.0, 633.0, 567.0, 576.0, 587.0, 576.0, 584.0, 587.0, 579.0, 573.0, 576.0, 587.0, 582.0, 584.0, 579.0, 536.0, 570.0, 579.0, 584.0, 582.0, 582.0, 576.0, 582.0, 630.0, 579.0, 582.0, 576.0, 582.0, 576.0, 570.0, 579.0, 576.0, 630.0, 579.0, 582.0, 630.0, 579.0, 630.0, 573.0, 573.0, 576.0, 573.0, 530.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 293.0, 292.0, 292.0, 291.0, 285.0, 288.0, 291.0, 299.0, 285.0, 288.0, 291.0, 289.0, 290.0, 318.0, 315.0, 296.0, 286.0, 315.0, 312.0, 317.0, 322.0, 283.0, 293.0, 294.0, 288.0, 286.0, 284.0, 298.0, 289.0, 282.0, 297.0, 283.0, 296.0, 291.0, 288.0, 286.0, 293.0, 287.0, 286.0, 283.0, 293.0, 295.0, 287.0, 282.0, 300.0, 266.0, 264.0, 288.0, 294.0, 289.0, 284.0, 6.0, 6.0, 257.0, 265.0, 283.0, 296.0, 284.0, 292.0, 270.0, 266.0, 306.0, 321.0, 292.0, 278.0, 285.0, 291.0, 294.0, 293.0, 286.0, 301.0, 286.0, 290.0, 325.0, 308.0, 287.0, 292.0, 299.0, 285.0, 290.0, 292.0, 289.0, 293.0, 291.0, 291.0, 225.0, 230.0, 282.0, 300.0, 283.0, 293.0, 297.0, 285.0, 286.0, 296.0, 295.0, 292.0, 316.0, 311.0, 290.0, 289.0, 259.0, 260.0, 286.0, 293.0, 286.0, 301.0, 203.0, 202.0, 286.0, 293.0, 310.0, 320.0, 314.0, 316.0, 325.0, 308.0, 297.0, 270.0, 283.0, 293.0, 288.0, 299.0, 285.0, 291.0, 298.0, 286.0, 296.0, 291.0, 290.0, 289.0, 277.0, 296.0, 285.0, 291.0, 290.0, 297.0, 295.0, 287.0, 290.0, 294.0, 290.0, 289.0, 274.0, 262.0, 290.0, 280.0, 285.0, 294.0, 288.0, 296.0, 292.0, 290.0, 289.0, 293.0, 290.0, 286.0, 284.0, 298.0, 321.0, 309.0, 286.0, 293.0, 289.0, 293.0, 285.0, 291.0, 290.0, 292.0, 278.0, 298.0, 280.0, 290.0, 291.0, 288.0, 292.0, 284.0, 309.0, 321.0, 289.0, 290.0, 286.0, 296.0, 314.0, 316.0, 294.0, 285.0, 319.0, 311.0, 286.0, 287.0, 284.0, 289.0, 286.0, 290.0, 295.0, 278.0, 273.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7039546134033401, "mean_inference_ms": 1.2616056310396504, "mean_action_processing_ms": 0.1346598567562352, "mean_env_wait_ms": 0.8477289895301865, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 12.0, "episode_reward_mean": 574.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 6.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 287.165}, "hist_stats": {"episode_reward": [587.0, 584.0, 576.0, 579.0, 584.0, 579.0, 579.0, 633.0, 582.0, 627.0, 639.0, 576.0, 582.0, 570.0, 587.0, 579.0, 579.0, 579.0, 579.0, 573.0, 576.0, 582.0, 582.0, 530.0, 582.0, 573.0, 12.0, 522.0, 579.0, 576.0, 536.0, 627.0, 570.0, 576.0, 587.0, 587.0, 576.0, 633.0, 579.0, 584.0, 582.0, 582.0, 582.0, 455.0, 582.0, 576.0, 582.0, 582.0, 587.0, 627.0, 579.0, 519.0, 579.0, 587.0, 405.0, 579.0, 630.0, 630.0, 633.0, 567.0, 576.0, 587.0, 576.0, 584.0, 587.0, 579.0, 573.0, 576.0, 587.0, 582.0, 584.0, 579.0, 536.0, 570.0, 579.0, 584.0, 582.0, 582.0, 576.0, 582.0, 630.0, 579.0, 582.0, 576.0, 582.0, 576.0, 570.0, 579.0, 576.0, 630.0, 579.0, 582.0, 630.0, 579.0, 630.0, 573.0, 573.0, 576.0, 573.0, 530.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 293.0, 292.0, 292.0, 291.0, 285.0, 288.0, 291.0, 299.0, 285.0, 288.0, 291.0, 289.0, 290.0, 318.0, 315.0, 296.0, 286.0, 315.0, 312.0, 317.0, 322.0, 283.0, 293.0, 294.0, 288.0, 286.0, 284.0, 298.0, 289.0, 282.0, 297.0, 283.0, 296.0, 291.0, 288.0, 286.0, 293.0, 287.0, 286.0, 283.0, 293.0, 295.0, 287.0, 282.0, 300.0, 266.0, 264.0, 288.0, 294.0, 289.0, 284.0, 6.0, 6.0, 257.0, 265.0, 283.0, 296.0, 284.0, 292.0, 270.0, 266.0, 306.0, 321.0, 292.0, 278.0, 285.0, 291.0, 294.0, 293.0, 286.0, 301.0, 286.0, 290.0, 325.0, 308.0, 287.0, 292.0, 299.0, 285.0, 290.0, 292.0, 289.0, 293.0, 291.0, 291.0, 225.0, 230.0, 282.0, 300.0, 283.0, 293.0, 297.0, 285.0, 286.0, 296.0, 295.0, 292.0, 316.0, 311.0, 290.0, 289.0, 259.0, 260.0, 286.0, 293.0, 286.0, 301.0, 203.0, 202.0, 286.0, 293.0, 310.0, 320.0, 314.0, 316.0, 325.0, 308.0, 297.0, 270.0, 283.0, 293.0, 288.0, 299.0, 285.0, 291.0, 298.0, 286.0, 296.0, 291.0, 290.0, 289.0, 277.0, 296.0, 285.0, 291.0, 290.0, 297.0, 295.0, 287.0, 290.0, 294.0, 290.0, 289.0, 274.0, 262.0, 290.0, 280.0, 285.0, 294.0, 288.0, 296.0, 292.0, 290.0, 289.0, 293.0, 290.0, 286.0, 284.0, 298.0, 321.0, 309.0, 286.0, 293.0, 289.0, 293.0, 285.0, 291.0, 290.0, 292.0, 278.0, 298.0, 280.0, 290.0, 291.0, 288.0, 292.0, 284.0, 309.0, 321.0, 289.0, 290.0, 286.0, 296.0, 314.0, 316.0, 294.0, 285.0, 319.0, 311.0, 286.0, 287.0, 284.0, 289.0, 286.0, 290.0, 295.0, 278.0, 273.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7039546134033401, "mean_inference_ms": 1.2616056310396504, "mean_action_processing_ms": 0.1346598567562352, "mean_env_wait_ms": 0.8477289895301865, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7193600, "num_agent_steps_trained": 7193600, "num_env_steps_sampled": 3596800, "num_env_steps_trained": 3596800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3596800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7193600, "timers": {"training_iteration_time_ms": 3631.142, "learn_time_ms": 1113.579, "learn_throughput": 11494.468, "synch_weights_time_ms": 11.537}, "counters": {"num_env_steps_sampled": 3596800, "num_env_steps_trained": 3596800, "num_agent_steps_sampled": 7193600, "num_agent_steps_trained": 7193600}, "done": false, "episodes_total": 8992, "training_iteration": 281, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-43", "timestamp": 1666581523, "time_this_iter_s": 3.939497232437134, "time_total_s": 1082.9032487869263, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1082.9032487869263, "timesteps_since_restore": 0, "iterations_since_restore": 281, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.5, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 200.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 179.08, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.5, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.78, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.17, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.01, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.27, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.95, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.79, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.24, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.01, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.27, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.01, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.27, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00012389960465952754, "policy_loss": -0.0002203599433414638, "vf_loss": 7.57411527633667, "vf_explained_var": 0.614063560962677, "kl": 0.0017929250607267022, "entropy": 0.8263012170791626, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3609600, "num_env_steps_trained": 3609600, "num_agent_steps_sampled": 7219200, "num_agent_steps_trained": 7219200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 405.0, "episode_reward_mean": 579.48, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 202.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 289.74}, "custom_metrics": {"sparse_reward_mean": 200.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 179.08, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.5, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.78, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.17, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.01, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.27, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.95, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.79, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.24, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.01, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.27, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.01, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.27, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 576.0, 587.0, 587.0, 576.0, 633.0, 579.0, 584.0, 582.0, 582.0, 582.0, 455.0, 582.0, 576.0, 582.0, 582.0, 587.0, 627.0, 579.0, 519.0, 579.0, 587.0, 405.0, 579.0, 630.0, 630.0, 633.0, 567.0, 576.0, 587.0, 576.0, 584.0, 587.0, 579.0, 573.0, 576.0, 587.0, 582.0, 584.0, 579.0, 536.0, 570.0, 579.0, 584.0, 582.0, 582.0, 576.0, 582.0, 630.0, 579.0, 582.0, 576.0, 582.0, 576.0, 570.0, 579.0, 576.0, 630.0, 579.0, 582.0, 630.0, 579.0, 630.0, 573.0, 573.0, 576.0, 573.0, 530.0, 579.0, 579.0, 582.0, 582.0, 584.0, 576.0, 582.0, 576.0, 576.0, 584.0, 579.0, 579.0, 582.0, 582.0, 576.0, 633.0, 579.0, 582.0, 579.0, 473.0, 579.0, 582.0, 573.0, 582.0, 579.0, 579.0, 576.0, 587.0, 573.0, 573.0, 639.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 278.0, 285.0, 291.0, 294.0, 293.0, 286.0, 301.0, 286.0, 290.0, 325.0, 308.0, 287.0, 292.0, 299.0, 285.0, 290.0, 292.0, 289.0, 293.0, 291.0, 291.0, 225.0, 230.0, 282.0, 300.0, 283.0, 293.0, 297.0, 285.0, 286.0, 296.0, 295.0, 292.0, 316.0, 311.0, 290.0, 289.0, 259.0, 260.0, 286.0, 293.0, 286.0, 301.0, 203.0, 202.0, 286.0, 293.0, 310.0, 320.0, 314.0, 316.0, 325.0, 308.0, 297.0, 270.0, 283.0, 293.0, 288.0, 299.0, 285.0, 291.0, 298.0, 286.0, 296.0, 291.0, 290.0, 289.0, 277.0, 296.0, 285.0, 291.0, 290.0, 297.0, 295.0, 287.0, 290.0, 294.0, 290.0, 289.0, 274.0, 262.0, 290.0, 280.0, 285.0, 294.0, 288.0, 296.0, 292.0, 290.0, 289.0, 293.0, 290.0, 286.0, 284.0, 298.0, 321.0, 309.0, 286.0, 293.0, 289.0, 293.0, 285.0, 291.0, 290.0, 292.0, 278.0, 298.0, 280.0, 290.0, 291.0, 288.0, 292.0, 284.0, 309.0, 321.0, 289.0, 290.0, 286.0, 296.0, 314.0, 316.0, 294.0, 285.0, 319.0, 311.0, 286.0, 287.0, 284.0, 289.0, 286.0, 290.0, 295.0, 278.0, 273.0, 257.0, 289.0, 290.0, 284.0, 295.0, 294.0, 288.0, 300.0, 282.0, 298.0, 286.0, 288.0, 288.0, 288.0, 294.0, 286.0, 290.0, 292.0, 284.0, 294.0, 290.0, 288.0, 291.0, 291.0, 288.0, 292.0, 290.0, 289.0, 293.0, 292.0, 284.0, 317.0, 316.0, 289.0, 290.0, 290.0, 292.0, 287.0, 292.0, 227.0, 246.0, 295.0, 284.0, 297.0, 285.0, 283.0, 290.0, 289.0, 293.0, 288.0, 291.0, 291.0, 288.0, 287.0, 289.0, 290.0, 297.0, 286.0, 287.0, 293.0, 280.0, 324.0, 315.0, 297.0, 282.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7038370220595624, "mean_inference_ms": 1.261504645612948, "mean_action_processing_ms": 0.1346470754198018, "mean_env_wait_ms": 0.8476888602932366, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 405.0, "episode_reward_mean": 579.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 202.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 289.74}, "hist_stats": {"episode_reward": [570.0, 576.0, 587.0, 587.0, 576.0, 633.0, 579.0, 584.0, 582.0, 582.0, 582.0, 455.0, 582.0, 576.0, 582.0, 582.0, 587.0, 627.0, 579.0, 519.0, 579.0, 587.0, 405.0, 579.0, 630.0, 630.0, 633.0, 567.0, 576.0, 587.0, 576.0, 584.0, 587.0, 579.0, 573.0, 576.0, 587.0, 582.0, 584.0, 579.0, 536.0, 570.0, 579.0, 584.0, 582.0, 582.0, 576.0, 582.0, 630.0, 579.0, 582.0, 576.0, 582.0, 576.0, 570.0, 579.0, 576.0, 630.0, 579.0, 582.0, 630.0, 579.0, 630.0, 573.0, 573.0, 576.0, 573.0, 530.0, 579.0, 579.0, 582.0, 582.0, 584.0, 576.0, 582.0, 576.0, 576.0, 584.0, 579.0, 579.0, 582.0, 582.0, 576.0, 633.0, 579.0, 582.0, 579.0, 473.0, 579.0, 582.0, 573.0, 582.0, 579.0, 579.0, 576.0, 587.0, 573.0, 573.0, 639.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 278.0, 285.0, 291.0, 294.0, 293.0, 286.0, 301.0, 286.0, 290.0, 325.0, 308.0, 287.0, 292.0, 299.0, 285.0, 290.0, 292.0, 289.0, 293.0, 291.0, 291.0, 225.0, 230.0, 282.0, 300.0, 283.0, 293.0, 297.0, 285.0, 286.0, 296.0, 295.0, 292.0, 316.0, 311.0, 290.0, 289.0, 259.0, 260.0, 286.0, 293.0, 286.0, 301.0, 203.0, 202.0, 286.0, 293.0, 310.0, 320.0, 314.0, 316.0, 325.0, 308.0, 297.0, 270.0, 283.0, 293.0, 288.0, 299.0, 285.0, 291.0, 298.0, 286.0, 296.0, 291.0, 290.0, 289.0, 277.0, 296.0, 285.0, 291.0, 290.0, 297.0, 295.0, 287.0, 290.0, 294.0, 290.0, 289.0, 274.0, 262.0, 290.0, 280.0, 285.0, 294.0, 288.0, 296.0, 292.0, 290.0, 289.0, 293.0, 290.0, 286.0, 284.0, 298.0, 321.0, 309.0, 286.0, 293.0, 289.0, 293.0, 285.0, 291.0, 290.0, 292.0, 278.0, 298.0, 280.0, 290.0, 291.0, 288.0, 292.0, 284.0, 309.0, 321.0, 289.0, 290.0, 286.0, 296.0, 314.0, 316.0, 294.0, 285.0, 319.0, 311.0, 286.0, 287.0, 284.0, 289.0, 286.0, 290.0, 295.0, 278.0, 273.0, 257.0, 289.0, 290.0, 284.0, 295.0, 294.0, 288.0, 300.0, 282.0, 298.0, 286.0, 288.0, 288.0, 288.0, 294.0, 286.0, 290.0, 292.0, 284.0, 294.0, 290.0, 288.0, 291.0, 291.0, 288.0, 292.0, 290.0, 289.0, 293.0, 292.0, 284.0, 317.0, 316.0, 289.0, 290.0, 290.0, 292.0, 287.0, 292.0, 227.0, 246.0, 295.0, 284.0, 297.0, 285.0, 283.0, 290.0, 289.0, 293.0, 288.0, 291.0, 291.0, 288.0, 287.0, 289.0, 290.0, 297.0, 286.0, 287.0, 293.0, 280.0, 324.0, 315.0, 297.0, 282.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7038370220595624, "mean_inference_ms": 1.261504645612948, "mean_action_processing_ms": 0.1346470754198018, "mean_env_wait_ms": 0.8476888602932366, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7219200, "num_agent_steps_trained": 7219200, "num_env_steps_sampled": 3609600, "num_env_steps_trained": 3609600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3609600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7219200, "timers": {"training_iteration_time_ms": 3647.237, "learn_time_ms": 1120.464, "learn_throughput": 11423.84, "synch_weights_time_ms": 12.239}, "counters": {"num_env_steps_sampled": 3609600, "num_env_steps_trained": 3609600, "num_agent_steps_sampled": 7219200, "num_agent_steps_trained": 7219200}, "done": false, "episodes_total": 9024, "training_iteration": 282, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-47", "timestamp": 1666581527, "time_this_iter_s": 3.7760872840881348, "time_total_s": 1086.6793360710144, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1086.6793360710144, "timesteps_since_restore": 0, "iterations_since_restore": 282, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.900000000000002, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 178.76, "shaped_reward_min": 80, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.26, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.86, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.9, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.42, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.85, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.43, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.78, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.85, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.43, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.85, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.43, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -8.680624887347221e-05, "policy_loss": -0.00043897959403693676, "vf_loss": 7.619510173797607, "vf_explained_var": 0.6051114797592163, "kl": 0.0019998771604150534, "entropy": 0.8195531368255615, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3622400, "num_env_steps_trained": 3622400, "num_agent_steps_sampled": 7244800, "num_agent_steps_trained": 7244800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 240.0, "episode_reward_mean": 578.36, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 119.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 289.18}, "custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 178.76, "shaped_reward_min": 80, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.26, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.86, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.9, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.42, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.85, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.43, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.78, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.85, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.43, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.85, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.43, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 579.0, 573.0, 576.0, 587.0, 582.0, 584.0, 579.0, 536.0, 570.0, 579.0, 584.0, 582.0, 582.0, 576.0, 582.0, 630.0, 579.0, 582.0, 576.0, 582.0, 576.0, 570.0, 579.0, 576.0, 630.0, 579.0, 582.0, 630.0, 579.0, 630.0, 573.0, 573.0, 576.0, 573.0, 530.0, 579.0, 579.0, 582.0, 582.0, 584.0, 576.0, 582.0, 576.0, 576.0, 584.0, 579.0, 579.0, 582.0, 582.0, 576.0, 633.0, 579.0, 582.0, 579.0, 473.0, 579.0, 582.0, 573.0, 582.0, 579.0, 579.0, 576.0, 587.0, 573.0, 573.0, 639.0, 579.0, 578.0, 579.0, 636.0, 582.0, 576.0, 576.0, 582.0, 627.0, 579.0, 579.0, 240.0, 576.0, 582.0, 576.0, 582.0, 573.0, 582.0, 582.0, 579.0, 579.0, 579.0, 579.0, 581.0, 581.0, 576.0, 582.0, 525.0, 636.0, 576.0, 579.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 291.0, 290.0, 289.0, 277.0, 296.0, 285.0, 291.0, 290.0, 297.0, 295.0, 287.0, 290.0, 294.0, 290.0, 289.0, 274.0, 262.0, 290.0, 280.0, 285.0, 294.0, 288.0, 296.0, 292.0, 290.0, 289.0, 293.0, 290.0, 286.0, 284.0, 298.0, 321.0, 309.0, 286.0, 293.0, 289.0, 293.0, 285.0, 291.0, 290.0, 292.0, 278.0, 298.0, 280.0, 290.0, 291.0, 288.0, 292.0, 284.0, 309.0, 321.0, 289.0, 290.0, 286.0, 296.0, 314.0, 316.0, 294.0, 285.0, 319.0, 311.0, 286.0, 287.0, 284.0, 289.0, 286.0, 290.0, 295.0, 278.0, 273.0, 257.0, 289.0, 290.0, 284.0, 295.0, 294.0, 288.0, 300.0, 282.0, 298.0, 286.0, 288.0, 288.0, 288.0, 294.0, 286.0, 290.0, 292.0, 284.0, 294.0, 290.0, 288.0, 291.0, 291.0, 288.0, 292.0, 290.0, 289.0, 293.0, 292.0, 284.0, 317.0, 316.0, 289.0, 290.0, 290.0, 292.0, 287.0, 292.0, 227.0, 246.0, 295.0, 284.0, 297.0, 285.0, 283.0, 290.0, 289.0, 293.0, 288.0, 291.0, 291.0, 288.0, 287.0, 289.0, 290.0, 297.0, 286.0, 287.0, 293.0, 280.0, 324.0, 315.0, 297.0, 282.0, 293.0, 285.0, 290.0, 289.0, 320.0, 316.0, 288.0, 294.0, 288.0, 288.0, 285.0, 291.0, 290.0, 292.0, 308.0, 319.0, 291.0, 288.0, 289.0, 290.0, 119.0, 121.0, 288.0, 288.0, 290.0, 292.0, 291.0, 285.0, 283.0, 299.0, 286.0, 287.0, 289.0, 293.0, 293.0, 289.0, 283.0, 296.0, 295.0, 284.0, 289.0, 290.0, 287.0, 292.0, 285.0, 296.0, 284.0, 297.0, 286.0, 290.0, 289.0, 293.0, 261.0, 264.0, 313.0, 323.0, 286.0, 290.0, 292.0, 287.0, 314.0, 316.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7037063875887167, "mean_inference_ms": 1.2613887993604969, "mean_action_processing_ms": 0.13463334098781574, "mean_env_wait_ms": 0.8476441305310047, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 240.0, "episode_reward_mean": 578.36, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 119.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 289.18}, "hist_stats": {"episode_reward": [587.0, 579.0, 573.0, 576.0, 587.0, 582.0, 584.0, 579.0, 536.0, 570.0, 579.0, 584.0, 582.0, 582.0, 576.0, 582.0, 630.0, 579.0, 582.0, 576.0, 582.0, 576.0, 570.0, 579.0, 576.0, 630.0, 579.0, 582.0, 630.0, 579.0, 630.0, 573.0, 573.0, 576.0, 573.0, 530.0, 579.0, 579.0, 582.0, 582.0, 584.0, 576.0, 582.0, 576.0, 576.0, 584.0, 579.0, 579.0, 582.0, 582.0, 576.0, 633.0, 579.0, 582.0, 579.0, 473.0, 579.0, 582.0, 573.0, 582.0, 579.0, 579.0, 576.0, 587.0, 573.0, 573.0, 639.0, 579.0, 578.0, 579.0, 636.0, 582.0, 576.0, 576.0, 582.0, 627.0, 579.0, 579.0, 240.0, 576.0, 582.0, 576.0, 582.0, 573.0, 582.0, 582.0, 579.0, 579.0, 579.0, 579.0, 581.0, 581.0, 576.0, 582.0, 525.0, 636.0, 576.0, 579.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 291.0, 290.0, 289.0, 277.0, 296.0, 285.0, 291.0, 290.0, 297.0, 295.0, 287.0, 290.0, 294.0, 290.0, 289.0, 274.0, 262.0, 290.0, 280.0, 285.0, 294.0, 288.0, 296.0, 292.0, 290.0, 289.0, 293.0, 290.0, 286.0, 284.0, 298.0, 321.0, 309.0, 286.0, 293.0, 289.0, 293.0, 285.0, 291.0, 290.0, 292.0, 278.0, 298.0, 280.0, 290.0, 291.0, 288.0, 292.0, 284.0, 309.0, 321.0, 289.0, 290.0, 286.0, 296.0, 314.0, 316.0, 294.0, 285.0, 319.0, 311.0, 286.0, 287.0, 284.0, 289.0, 286.0, 290.0, 295.0, 278.0, 273.0, 257.0, 289.0, 290.0, 284.0, 295.0, 294.0, 288.0, 300.0, 282.0, 298.0, 286.0, 288.0, 288.0, 288.0, 294.0, 286.0, 290.0, 292.0, 284.0, 294.0, 290.0, 288.0, 291.0, 291.0, 288.0, 292.0, 290.0, 289.0, 293.0, 292.0, 284.0, 317.0, 316.0, 289.0, 290.0, 290.0, 292.0, 287.0, 292.0, 227.0, 246.0, 295.0, 284.0, 297.0, 285.0, 283.0, 290.0, 289.0, 293.0, 288.0, 291.0, 291.0, 288.0, 287.0, 289.0, 290.0, 297.0, 286.0, 287.0, 293.0, 280.0, 324.0, 315.0, 297.0, 282.0, 293.0, 285.0, 290.0, 289.0, 320.0, 316.0, 288.0, 294.0, 288.0, 288.0, 285.0, 291.0, 290.0, 292.0, 308.0, 319.0, 291.0, 288.0, 289.0, 290.0, 119.0, 121.0, 288.0, 288.0, 290.0, 292.0, 291.0, 285.0, 283.0, 299.0, 286.0, 287.0, 289.0, 293.0, 293.0, 289.0, 283.0, 296.0, 295.0, 284.0, 289.0, 290.0, 287.0, 292.0, 285.0, 296.0, 284.0, 297.0, 286.0, 290.0, 289.0, 293.0, 261.0, 264.0, 313.0, 323.0, 286.0, 290.0, 292.0, 287.0, 314.0, 316.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7037063875887167, "mean_inference_ms": 1.2613887993604969, "mean_action_processing_ms": 0.13463334098781574, "mean_env_wait_ms": 0.8476441305310047, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7244800, "num_agent_steps_trained": 7244800, "num_env_steps_sampled": 3622400, "num_env_steps_trained": 3622400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3622400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7244800, "timers": {"training_iteration_time_ms": 3675.302, "learn_time_ms": 1140.271, "learn_throughput": 11225.403, "synch_weights_time_ms": 12.245}, "counters": {"num_env_steps_sampled": 3622400, "num_env_steps_trained": 3622400, "num_agent_steps_sampled": 7244800, "num_agent_steps_trained": 7244800}, "done": false, "episodes_total": 9056, "training_iteration": 283, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-51", "timestamp": 1666581531, "time_this_iter_s": 3.807605266571045, "time_total_s": 1090.4869413375854, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1090.4869413375854, "timesteps_since_restore": 0, "iterations_since_restore": 283, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.18, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 199.4, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 178.46, "shaped_reward_min": 80, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.27, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.82, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.91, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.4, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.85, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.42, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.95, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.87, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.74, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.85, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.42, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.85, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.42, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0001002021599560976, "policy_loss": -0.00044721108861267567, "vf_loss": 7.613537788391113, "vf_explained_var": 0.6043774485588074, "kl": 0.002004144247621298, "entropy": 0.8286857604980469, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3635200, "num_env_steps_trained": 3635200, "num_agent_steps_sampled": 7270400, "num_agent_steps_trained": 7270400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 240.0, "episode_reward_mean": 577.26, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 119.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 288.63}, "custom_metrics": {"sparse_reward_mean": 199.4, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 178.46, "shaped_reward_min": 80, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.27, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.82, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.91, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.4, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.85, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.42, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.95, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.87, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.74, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.85, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.42, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.85, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.42, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 576.0, 573.0, 530.0, 579.0, 579.0, 582.0, 582.0, 584.0, 576.0, 582.0, 576.0, 576.0, 584.0, 579.0, 579.0, 582.0, 582.0, 576.0, 633.0, 579.0, 582.0, 579.0, 473.0, 579.0, 582.0, 573.0, 582.0, 579.0, 579.0, 576.0, 587.0, 573.0, 573.0, 639.0, 579.0, 578.0, 579.0, 636.0, 582.0, 576.0, 576.0, 582.0, 627.0, 579.0, 579.0, 240.0, 576.0, 582.0, 576.0, 582.0, 573.0, 582.0, 582.0, 579.0, 579.0, 579.0, 579.0, 581.0, 581.0, 576.0, 582.0, 525.0, 636.0, 576.0, 579.0, 630.0, 579.0, 573.0, 525.0, 579.0, 587.0, 579.0, 570.0, 630.0, 579.0, 579.0, 582.0, 582.0, 519.0, 576.0, 582.0, 582.0, 576.0, 579.0, 579.0, 630.0, 579.0, 582.0, 579.0, 636.0, 582.0, 582.0, 573.0, 582.0, 576.0, 582.0, 579.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 289.0, 286.0, 290.0, 295.0, 278.0, 273.0, 257.0, 289.0, 290.0, 284.0, 295.0, 294.0, 288.0, 300.0, 282.0, 298.0, 286.0, 288.0, 288.0, 288.0, 294.0, 286.0, 290.0, 292.0, 284.0, 294.0, 290.0, 288.0, 291.0, 291.0, 288.0, 292.0, 290.0, 289.0, 293.0, 292.0, 284.0, 317.0, 316.0, 289.0, 290.0, 290.0, 292.0, 287.0, 292.0, 227.0, 246.0, 295.0, 284.0, 297.0, 285.0, 283.0, 290.0, 289.0, 293.0, 288.0, 291.0, 291.0, 288.0, 287.0, 289.0, 290.0, 297.0, 286.0, 287.0, 293.0, 280.0, 324.0, 315.0, 297.0, 282.0, 293.0, 285.0, 290.0, 289.0, 320.0, 316.0, 288.0, 294.0, 288.0, 288.0, 285.0, 291.0, 290.0, 292.0, 308.0, 319.0, 291.0, 288.0, 289.0, 290.0, 119.0, 121.0, 288.0, 288.0, 290.0, 292.0, 291.0, 285.0, 283.0, 299.0, 286.0, 287.0, 289.0, 293.0, 293.0, 289.0, 283.0, 296.0, 295.0, 284.0, 289.0, 290.0, 287.0, 292.0, 285.0, 296.0, 284.0, 297.0, 286.0, 290.0, 289.0, 293.0, 261.0, 264.0, 313.0, 323.0, 286.0, 290.0, 292.0, 287.0, 314.0, 316.0, 291.0, 288.0, 287.0, 286.0, 263.0, 262.0, 296.0, 283.0, 298.0, 289.0, 293.0, 286.0, 285.0, 285.0, 305.0, 325.0, 291.0, 288.0, 288.0, 291.0, 289.0, 293.0, 295.0, 287.0, 258.0, 261.0, 295.0, 281.0, 291.0, 291.0, 296.0, 286.0, 279.0, 297.0, 287.0, 292.0, 290.0, 289.0, 310.0, 320.0, 292.0, 287.0, 287.0, 295.0, 287.0, 292.0, 312.0, 324.0, 290.0, 292.0, 291.0, 291.0, 290.0, 283.0, 289.0, 293.0, 287.0, 289.0, 293.0, 289.0, 291.0, 288.0, 286.0, 296.0, 293.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7036136534117213, "mean_inference_ms": 1.2611472236830996, "mean_action_processing_ms": 0.13462100380644718, "mean_env_wait_ms": 0.8474923370648417, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 240.0, "episode_reward_mean": 577.26, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 119.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 288.63}, "hist_stats": {"episode_reward": [573.0, 576.0, 573.0, 530.0, 579.0, 579.0, 582.0, 582.0, 584.0, 576.0, 582.0, 576.0, 576.0, 584.0, 579.0, 579.0, 582.0, 582.0, 576.0, 633.0, 579.0, 582.0, 579.0, 473.0, 579.0, 582.0, 573.0, 582.0, 579.0, 579.0, 576.0, 587.0, 573.0, 573.0, 639.0, 579.0, 578.0, 579.0, 636.0, 582.0, 576.0, 576.0, 582.0, 627.0, 579.0, 579.0, 240.0, 576.0, 582.0, 576.0, 582.0, 573.0, 582.0, 582.0, 579.0, 579.0, 579.0, 579.0, 581.0, 581.0, 576.0, 582.0, 525.0, 636.0, 576.0, 579.0, 630.0, 579.0, 573.0, 525.0, 579.0, 587.0, 579.0, 570.0, 630.0, 579.0, 579.0, 582.0, 582.0, 519.0, 576.0, 582.0, 582.0, 576.0, 579.0, 579.0, 630.0, 579.0, 582.0, 579.0, 636.0, 582.0, 582.0, 573.0, 582.0, 576.0, 582.0, 579.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 289.0, 286.0, 290.0, 295.0, 278.0, 273.0, 257.0, 289.0, 290.0, 284.0, 295.0, 294.0, 288.0, 300.0, 282.0, 298.0, 286.0, 288.0, 288.0, 288.0, 294.0, 286.0, 290.0, 292.0, 284.0, 294.0, 290.0, 288.0, 291.0, 291.0, 288.0, 292.0, 290.0, 289.0, 293.0, 292.0, 284.0, 317.0, 316.0, 289.0, 290.0, 290.0, 292.0, 287.0, 292.0, 227.0, 246.0, 295.0, 284.0, 297.0, 285.0, 283.0, 290.0, 289.0, 293.0, 288.0, 291.0, 291.0, 288.0, 287.0, 289.0, 290.0, 297.0, 286.0, 287.0, 293.0, 280.0, 324.0, 315.0, 297.0, 282.0, 293.0, 285.0, 290.0, 289.0, 320.0, 316.0, 288.0, 294.0, 288.0, 288.0, 285.0, 291.0, 290.0, 292.0, 308.0, 319.0, 291.0, 288.0, 289.0, 290.0, 119.0, 121.0, 288.0, 288.0, 290.0, 292.0, 291.0, 285.0, 283.0, 299.0, 286.0, 287.0, 289.0, 293.0, 293.0, 289.0, 283.0, 296.0, 295.0, 284.0, 289.0, 290.0, 287.0, 292.0, 285.0, 296.0, 284.0, 297.0, 286.0, 290.0, 289.0, 293.0, 261.0, 264.0, 313.0, 323.0, 286.0, 290.0, 292.0, 287.0, 314.0, 316.0, 291.0, 288.0, 287.0, 286.0, 263.0, 262.0, 296.0, 283.0, 298.0, 289.0, 293.0, 286.0, 285.0, 285.0, 305.0, 325.0, 291.0, 288.0, 288.0, 291.0, 289.0, 293.0, 295.0, 287.0, 258.0, 261.0, 295.0, 281.0, 291.0, 291.0, 296.0, 286.0, 279.0, 297.0, 287.0, 292.0, 290.0, 289.0, 310.0, 320.0, 292.0, 287.0, 287.0, 295.0, 287.0, 292.0, 312.0, 324.0, 290.0, 292.0, 291.0, 291.0, 290.0, 283.0, 289.0, 293.0, 287.0, 289.0, 293.0, 289.0, 291.0, 288.0, 286.0, 296.0, 293.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7036136534117213, "mean_inference_ms": 1.2611472236830996, "mean_action_processing_ms": 0.13462100380644718, "mean_env_wait_ms": 0.8474923370648417, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7270400, "num_agent_steps_trained": 7270400, "num_env_steps_sampled": 3635200, "num_env_steps_trained": 3635200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3635200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7270400, "timers": {"training_iteration_time_ms": 3685.546, "learn_time_ms": 1141.17, "learn_throughput": 11216.56, "synch_weights_time_ms": 11.751}, "counters": {"num_env_steps_sampled": 3635200, "num_env_steps_trained": 3635200, "num_agent_steps_sampled": 7270400, "num_agent_steps_trained": 7270400}, "done": false, "episodes_total": 9088, "training_iteration": 284, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-55", "timestamp": 1666581535, "time_this_iter_s": 3.668269395828247, "time_total_s": 1094.1552107334137, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1094.1552107334137, "timesteps_since_restore": 0, "iterations_since_restore": 284, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.05, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 200.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 179.14, "shaped_reward_min": 80, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.69, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.46, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.38, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.15, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.29, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.08, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.78, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.33, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.31, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.29, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.08, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.29, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.08, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0017654591938480735, "policy_loss": -0.0021084710024297237, "vf_loss": 7.559107780456543, "vf_explained_var": 0.6072764992713928, "kl": 0.0019133866298943758, "entropy": 0.8257938623428345, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3648000, "num_env_steps_trained": 3648000, "num_agent_steps_sampled": 7296000, "num_agent_steps_trained": 7296000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 240.0, "episode_reward_mean": 580.34, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 119.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 290.17}, "custom_metrics": {"sparse_reward_mean": 200.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 179.14, "shaped_reward_min": 80, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.69, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.46, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.38, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.15, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.29, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.08, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.78, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.33, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.31, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.29, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.08, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.29, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.08, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 573.0, 639.0, 579.0, 578.0, 579.0, 636.0, 582.0, 576.0, 576.0, 582.0, 627.0, 579.0, 579.0, 240.0, 576.0, 582.0, 576.0, 582.0, 573.0, 582.0, 582.0, 579.0, 579.0, 579.0, 579.0, 581.0, 581.0, 576.0, 582.0, 525.0, 636.0, 576.0, 579.0, 630.0, 579.0, 573.0, 525.0, 579.0, 587.0, 579.0, 570.0, 630.0, 579.0, 579.0, 582.0, 582.0, 519.0, 576.0, 582.0, 582.0, 576.0, 579.0, 579.0, 630.0, 579.0, 582.0, 579.0, 636.0, 582.0, 582.0, 573.0, 582.0, 576.0, 582.0, 579.0, 582.0, 579.0, 636.0, 582.0, 630.0, 582.0, 522.0, 579.0, 579.0, 627.0, 582.0, 633.0, 573.0, 582.0, 582.0, 636.0, 579.0, 581.0, 582.0, 570.0, 582.0, 579.0, 582.0, 587.0, 579.0, 579.0, 582.0, 627.0, 576.0, 582.0, 521.0, 582.0, 576.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 287.0, 293.0, 280.0, 324.0, 315.0, 297.0, 282.0, 293.0, 285.0, 290.0, 289.0, 320.0, 316.0, 288.0, 294.0, 288.0, 288.0, 285.0, 291.0, 290.0, 292.0, 308.0, 319.0, 291.0, 288.0, 289.0, 290.0, 119.0, 121.0, 288.0, 288.0, 290.0, 292.0, 291.0, 285.0, 283.0, 299.0, 286.0, 287.0, 289.0, 293.0, 293.0, 289.0, 283.0, 296.0, 295.0, 284.0, 289.0, 290.0, 287.0, 292.0, 285.0, 296.0, 284.0, 297.0, 286.0, 290.0, 289.0, 293.0, 261.0, 264.0, 313.0, 323.0, 286.0, 290.0, 292.0, 287.0, 314.0, 316.0, 291.0, 288.0, 287.0, 286.0, 263.0, 262.0, 296.0, 283.0, 298.0, 289.0, 293.0, 286.0, 285.0, 285.0, 305.0, 325.0, 291.0, 288.0, 288.0, 291.0, 289.0, 293.0, 295.0, 287.0, 258.0, 261.0, 295.0, 281.0, 291.0, 291.0, 296.0, 286.0, 279.0, 297.0, 287.0, 292.0, 290.0, 289.0, 310.0, 320.0, 292.0, 287.0, 287.0, 295.0, 287.0, 292.0, 312.0, 324.0, 290.0, 292.0, 291.0, 291.0, 290.0, 283.0, 289.0, 293.0, 287.0, 289.0, 293.0, 289.0, 291.0, 288.0, 286.0, 296.0, 293.0, 286.0, 314.0, 322.0, 283.0, 299.0, 314.0, 316.0, 283.0, 299.0, 260.0, 262.0, 289.0, 290.0, 289.0, 290.0, 307.0, 320.0, 292.0, 290.0, 316.0, 317.0, 280.0, 293.0, 291.0, 291.0, 295.0, 287.0, 321.0, 315.0, 285.0, 294.0, 289.0, 292.0, 290.0, 292.0, 283.0, 287.0, 291.0, 291.0, 294.0, 285.0, 294.0, 288.0, 298.0, 289.0, 291.0, 288.0, 290.0, 289.0, 292.0, 290.0, 314.0, 313.0, 284.0, 292.0, 294.0, 288.0, 260.0, 261.0, 293.0, 289.0, 286.0, 290.0, 289.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7035381333603594, "mean_inference_ms": 1.2608786606592253, "mean_action_processing_ms": 0.13460708506122054, "mean_env_wait_ms": 0.8473337645098168, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 240.0, "episode_reward_mean": 580.34, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 119.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 290.17}, "hist_stats": {"episode_reward": [573.0, 573.0, 639.0, 579.0, 578.0, 579.0, 636.0, 582.0, 576.0, 576.0, 582.0, 627.0, 579.0, 579.0, 240.0, 576.0, 582.0, 576.0, 582.0, 573.0, 582.0, 582.0, 579.0, 579.0, 579.0, 579.0, 581.0, 581.0, 576.0, 582.0, 525.0, 636.0, 576.0, 579.0, 630.0, 579.0, 573.0, 525.0, 579.0, 587.0, 579.0, 570.0, 630.0, 579.0, 579.0, 582.0, 582.0, 519.0, 576.0, 582.0, 582.0, 576.0, 579.0, 579.0, 630.0, 579.0, 582.0, 579.0, 636.0, 582.0, 582.0, 573.0, 582.0, 576.0, 582.0, 579.0, 582.0, 579.0, 636.0, 582.0, 630.0, 582.0, 522.0, 579.0, 579.0, 627.0, 582.0, 633.0, 573.0, 582.0, 582.0, 636.0, 579.0, 581.0, 582.0, 570.0, 582.0, 579.0, 582.0, 587.0, 579.0, 579.0, 582.0, 627.0, 576.0, 582.0, 521.0, 582.0, 576.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 287.0, 293.0, 280.0, 324.0, 315.0, 297.0, 282.0, 293.0, 285.0, 290.0, 289.0, 320.0, 316.0, 288.0, 294.0, 288.0, 288.0, 285.0, 291.0, 290.0, 292.0, 308.0, 319.0, 291.0, 288.0, 289.0, 290.0, 119.0, 121.0, 288.0, 288.0, 290.0, 292.0, 291.0, 285.0, 283.0, 299.0, 286.0, 287.0, 289.0, 293.0, 293.0, 289.0, 283.0, 296.0, 295.0, 284.0, 289.0, 290.0, 287.0, 292.0, 285.0, 296.0, 284.0, 297.0, 286.0, 290.0, 289.0, 293.0, 261.0, 264.0, 313.0, 323.0, 286.0, 290.0, 292.0, 287.0, 314.0, 316.0, 291.0, 288.0, 287.0, 286.0, 263.0, 262.0, 296.0, 283.0, 298.0, 289.0, 293.0, 286.0, 285.0, 285.0, 305.0, 325.0, 291.0, 288.0, 288.0, 291.0, 289.0, 293.0, 295.0, 287.0, 258.0, 261.0, 295.0, 281.0, 291.0, 291.0, 296.0, 286.0, 279.0, 297.0, 287.0, 292.0, 290.0, 289.0, 310.0, 320.0, 292.0, 287.0, 287.0, 295.0, 287.0, 292.0, 312.0, 324.0, 290.0, 292.0, 291.0, 291.0, 290.0, 283.0, 289.0, 293.0, 287.0, 289.0, 293.0, 289.0, 291.0, 288.0, 286.0, 296.0, 293.0, 286.0, 314.0, 322.0, 283.0, 299.0, 314.0, 316.0, 283.0, 299.0, 260.0, 262.0, 289.0, 290.0, 289.0, 290.0, 307.0, 320.0, 292.0, 290.0, 316.0, 317.0, 280.0, 293.0, 291.0, 291.0, 295.0, 287.0, 321.0, 315.0, 285.0, 294.0, 289.0, 292.0, 290.0, 292.0, 283.0, 287.0, 291.0, 291.0, 294.0, 285.0, 294.0, 288.0, 298.0, 289.0, 291.0, 288.0, 290.0, 289.0, 292.0, 290.0, 314.0, 313.0, 284.0, 292.0, 294.0, 288.0, 260.0, 261.0, 293.0, 289.0, 286.0, 290.0, 289.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7035381333603594, "mean_inference_ms": 1.2608786606592253, "mean_action_processing_ms": 0.13460708506122054, "mean_env_wait_ms": 0.8473337645098168, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7296000, "num_agent_steps_trained": 7296000, "num_env_steps_sampled": 3648000, "num_env_steps_trained": 3648000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3648000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7296000, "timers": {"training_iteration_time_ms": 3689.238, "learn_time_ms": 1146.734, "learn_throughput": 11162.136, "synch_weights_time_ms": 11.943}, "counters": {"num_env_steps_sampled": 3648000, "num_env_steps_trained": 3648000, "num_agent_steps_sampled": 7296000, "num_agent_steps_trained": 7296000}, "done": false, "episodes_total": 9120, "training_iteration": 285, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-59", "timestamp": 1666581539, "time_this_iter_s": 3.6995468139648438, "time_total_s": 1097.8547575473785, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1097.8547575473785, "timesteps_since_restore": 0, "iterations_since_restore": 285, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.86, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 201.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.7, "shaped_reward_min": 148, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.07, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.13, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.79, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.86, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.68, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.79, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.33, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.23, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.2, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.68, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.79, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.68, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.79, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0029266555793583393, "policy_loss": -0.0032794035505503416, "vf_loss": 7.646122932434082, "vf_explained_var": 0.5895668268203735, "kl": 0.001966602634638548, "entropy": 0.8237244486808777, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3660800, "num_env_steps_trained": 3660800, "num_agent_steps_sampled": 7321600, "num_agent_steps_trained": 7321600}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 468.0, "episode_reward_mean": 581.7, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 290.85}, "custom_metrics": {"sparse_reward_mean": 201.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.7, "shaped_reward_min": 148, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.07, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.13, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.79, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.86, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.68, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.79, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.33, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.23, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.2, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.68, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.79, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.68, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.79, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 579.0, 630.0, 579.0, 573.0, 525.0, 579.0, 587.0, 579.0, 570.0, 630.0, 579.0, 579.0, 582.0, 582.0, 519.0, 576.0, 582.0, 582.0, 576.0, 579.0, 579.0, 630.0, 579.0, 582.0, 579.0, 636.0, 582.0, 582.0, 573.0, 582.0, 576.0, 582.0, 579.0, 582.0, 579.0, 636.0, 582.0, 630.0, 582.0, 522.0, 579.0, 579.0, 627.0, 582.0, 633.0, 573.0, 582.0, 582.0, 636.0, 579.0, 581.0, 582.0, 570.0, 582.0, 579.0, 582.0, 587.0, 579.0, 579.0, 582.0, 627.0, 576.0, 582.0, 521.0, 582.0, 576.0, 570.0, 630.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 576.0, 582.0, 582.0, 576.0, 570.0, 576.0, 587.0, 579.0, 579.0, 582.0, 582.0, 587.0, 579.0, 579.0, 525.0, 576.0, 627.0, 582.0, 582.0, 579.0, 576.0, 582.0, 630.0, 468.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 290.0, 292.0, 287.0, 314.0, 316.0, 291.0, 288.0, 287.0, 286.0, 263.0, 262.0, 296.0, 283.0, 298.0, 289.0, 293.0, 286.0, 285.0, 285.0, 305.0, 325.0, 291.0, 288.0, 288.0, 291.0, 289.0, 293.0, 295.0, 287.0, 258.0, 261.0, 295.0, 281.0, 291.0, 291.0, 296.0, 286.0, 279.0, 297.0, 287.0, 292.0, 290.0, 289.0, 310.0, 320.0, 292.0, 287.0, 287.0, 295.0, 287.0, 292.0, 312.0, 324.0, 290.0, 292.0, 291.0, 291.0, 290.0, 283.0, 289.0, 293.0, 287.0, 289.0, 293.0, 289.0, 291.0, 288.0, 286.0, 296.0, 293.0, 286.0, 314.0, 322.0, 283.0, 299.0, 314.0, 316.0, 283.0, 299.0, 260.0, 262.0, 289.0, 290.0, 289.0, 290.0, 307.0, 320.0, 292.0, 290.0, 316.0, 317.0, 280.0, 293.0, 291.0, 291.0, 295.0, 287.0, 321.0, 315.0, 285.0, 294.0, 289.0, 292.0, 290.0, 292.0, 283.0, 287.0, 291.0, 291.0, 294.0, 285.0, 294.0, 288.0, 298.0, 289.0, 291.0, 288.0, 290.0, 289.0, 292.0, 290.0, 314.0, 313.0, 284.0, 292.0, 294.0, 288.0, 260.0, 261.0, 293.0, 289.0, 286.0, 290.0, 289.0, 281.0, 315.0, 315.0, 288.0, 291.0, 291.0, 291.0, 286.0, 296.0, 295.0, 287.0, 293.0, 289.0, 296.0, 286.0, 292.0, 284.0, 294.0, 288.0, 284.0, 298.0, 294.0, 282.0, 291.0, 279.0, 285.0, 291.0, 288.0, 299.0, 288.0, 291.0, 285.0, 294.0, 289.0, 293.0, 293.0, 289.0, 297.0, 290.0, 287.0, 292.0, 289.0, 290.0, 266.0, 259.0, 281.0, 295.0, 313.0, 314.0, 287.0, 295.0, 289.0, 293.0, 296.0, 283.0, 281.0, 295.0, 289.0, 293.0, 317.0, 313.0, 239.0, 229.0, 259.0, 263.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7034854686787455, "mean_inference_ms": 1.2606134572888865, "mean_action_processing_ms": 0.13459397004636353, "mean_env_wait_ms": 0.8471829775070431, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 468.0, "episode_reward_mean": 581.7, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 290.85}, "hist_stats": {"episode_reward": [576.0, 579.0, 630.0, 579.0, 573.0, 525.0, 579.0, 587.0, 579.0, 570.0, 630.0, 579.0, 579.0, 582.0, 582.0, 519.0, 576.0, 582.0, 582.0, 576.0, 579.0, 579.0, 630.0, 579.0, 582.0, 579.0, 636.0, 582.0, 582.0, 573.0, 582.0, 576.0, 582.0, 579.0, 582.0, 579.0, 636.0, 582.0, 630.0, 582.0, 522.0, 579.0, 579.0, 627.0, 582.0, 633.0, 573.0, 582.0, 582.0, 636.0, 579.0, 581.0, 582.0, 570.0, 582.0, 579.0, 582.0, 587.0, 579.0, 579.0, 582.0, 627.0, 576.0, 582.0, 521.0, 582.0, 576.0, 570.0, 630.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 576.0, 582.0, 582.0, 576.0, 570.0, 576.0, 587.0, 579.0, 579.0, 582.0, 582.0, 587.0, 579.0, 579.0, 525.0, 576.0, 627.0, 582.0, 582.0, 579.0, 576.0, 582.0, 630.0, 468.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 290.0, 292.0, 287.0, 314.0, 316.0, 291.0, 288.0, 287.0, 286.0, 263.0, 262.0, 296.0, 283.0, 298.0, 289.0, 293.0, 286.0, 285.0, 285.0, 305.0, 325.0, 291.0, 288.0, 288.0, 291.0, 289.0, 293.0, 295.0, 287.0, 258.0, 261.0, 295.0, 281.0, 291.0, 291.0, 296.0, 286.0, 279.0, 297.0, 287.0, 292.0, 290.0, 289.0, 310.0, 320.0, 292.0, 287.0, 287.0, 295.0, 287.0, 292.0, 312.0, 324.0, 290.0, 292.0, 291.0, 291.0, 290.0, 283.0, 289.0, 293.0, 287.0, 289.0, 293.0, 289.0, 291.0, 288.0, 286.0, 296.0, 293.0, 286.0, 314.0, 322.0, 283.0, 299.0, 314.0, 316.0, 283.0, 299.0, 260.0, 262.0, 289.0, 290.0, 289.0, 290.0, 307.0, 320.0, 292.0, 290.0, 316.0, 317.0, 280.0, 293.0, 291.0, 291.0, 295.0, 287.0, 321.0, 315.0, 285.0, 294.0, 289.0, 292.0, 290.0, 292.0, 283.0, 287.0, 291.0, 291.0, 294.0, 285.0, 294.0, 288.0, 298.0, 289.0, 291.0, 288.0, 290.0, 289.0, 292.0, 290.0, 314.0, 313.0, 284.0, 292.0, 294.0, 288.0, 260.0, 261.0, 293.0, 289.0, 286.0, 290.0, 289.0, 281.0, 315.0, 315.0, 288.0, 291.0, 291.0, 291.0, 286.0, 296.0, 295.0, 287.0, 293.0, 289.0, 296.0, 286.0, 292.0, 284.0, 294.0, 288.0, 284.0, 298.0, 294.0, 282.0, 291.0, 279.0, 285.0, 291.0, 288.0, 299.0, 288.0, 291.0, 285.0, 294.0, 289.0, 293.0, 293.0, 289.0, 297.0, 290.0, 287.0, 292.0, 289.0, 290.0, 266.0, 259.0, 281.0, 295.0, 313.0, 314.0, 287.0, 295.0, 289.0, 293.0, 296.0, 283.0, 281.0, 295.0, 289.0, 293.0, 317.0, 313.0, 239.0, 229.0, 259.0, 263.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7034854686787455, "mean_inference_ms": 1.2606134572888865, "mean_action_processing_ms": 0.13459397004636353, "mean_env_wait_ms": 0.8471829775070431, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7321600, "num_agent_steps_trained": 7321600, "num_env_steps_sampled": 3660800, "num_env_steps_trained": 3660800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3660800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7321600, "timers": {"training_iteration_time_ms": 3689.191, "learn_time_ms": 1150.78, "learn_throughput": 11122.895, "synch_weights_time_ms": 12.204}, "counters": {"num_env_steps_sampled": 3660800, "num_env_steps_trained": 3660800, "num_agent_steps_sampled": 7321600, "num_agent_steps_trained": 7321600}, "done": false, "episodes_total": 9152, "training_iteration": 286, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-03", "timestamp": 1666581543, "time_this_iter_s": 3.6968560218811035, "time_total_s": 1101.5516135692596, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1101.5516135692596, "timesteps_since_restore": 0, "iterations_since_restore": 286, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.516666666666666, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 178.74, "shaped_reward_min": 111, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.75, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.32, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.52, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.08, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.34, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.98, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.77, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.29, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.71, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.34, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.98, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.34, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.98, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010225408477708697, "policy_loss": -0.0013909590197727084, "vf_loss": 7.777632236480713, "vf_explained_var": 0.571610689163208, "kl": 0.002125002443790436, "entropy": 0.8186874389648438, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3673600, "num_env_steps_trained": 3673600, "num_agent_steps_sampled": 7347200, "num_agent_steps_trained": 7347200}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 351.0, "episode_reward_mean": 578.34, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 289.17}, "custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 178.74, "shaped_reward_min": 111, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.75, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.32, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.52, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.08, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.34, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.98, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.77, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.29, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.71, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.34, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.98, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.34, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.98, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 582.0, 579.0, 636.0, 582.0, 630.0, 582.0, 522.0, 579.0, 579.0, 627.0, 582.0, 633.0, 573.0, 582.0, 582.0, 636.0, 579.0, 581.0, 582.0, 570.0, 582.0, 579.0, 582.0, 587.0, 579.0, 579.0, 582.0, 627.0, 576.0, 582.0, 521.0, 582.0, 576.0, 570.0, 630.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 576.0, 582.0, 582.0, 576.0, 570.0, 576.0, 587.0, 579.0, 579.0, 582.0, 582.0, 587.0, 579.0, 579.0, 525.0, 576.0, 627.0, 582.0, 582.0, 579.0, 576.0, 582.0, 630.0, 468.0, 522.0, 522.0, 582.0, 576.0, 582.0, 579.0, 576.0, 351.0, 576.0, 576.0, 633.0, 579.0, 582.0, 582.0, 530.0, 582.0, 573.0, 525.0, 633.0, 579.0, 582.0, 582.0, 633.0, 570.0, 576.0, 579.0, 579.0, 582.0, 582.0, 525.0, 576.0, 627.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 291.0, 288.0, 286.0, 296.0, 293.0, 286.0, 314.0, 322.0, 283.0, 299.0, 314.0, 316.0, 283.0, 299.0, 260.0, 262.0, 289.0, 290.0, 289.0, 290.0, 307.0, 320.0, 292.0, 290.0, 316.0, 317.0, 280.0, 293.0, 291.0, 291.0, 295.0, 287.0, 321.0, 315.0, 285.0, 294.0, 289.0, 292.0, 290.0, 292.0, 283.0, 287.0, 291.0, 291.0, 294.0, 285.0, 294.0, 288.0, 298.0, 289.0, 291.0, 288.0, 290.0, 289.0, 292.0, 290.0, 314.0, 313.0, 284.0, 292.0, 294.0, 288.0, 260.0, 261.0, 293.0, 289.0, 286.0, 290.0, 289.0, 281.0, 315.0, 315.0, 288.0, 291.0, 291.0, 291.0, 286.0, 296.0, 295.0, 287.0, 293.0, 289.0, 296.0, 286.0, 292.0, 284.0, 294.0, 288.0, 284.0, 298.0, 294.0, 282.0, 291.0, 279.0, 285.0, 291.0, 288.0, 299.0, 288.0, 291.0, 285.0, 294.0, 289.0, 293.0, 293.0, 289.0, 297.0, 290.0, 287.0, 292.0, 289.0, 290.0, 266.0, 259.0, 281.0, 295.0, 313.0, 314.0, 287.0, 295.0, 289.0, 293.0, 296.0, 283.0, 281.0, 295.0, 289.0, 293.0, 317.0, 313.0, 239.0, 229.0, 259.0, 263.0, 259.0, 263.0, 291.0, 291.0, 287.0, 289.0, 286.0, 296.0, 283.0, 296.0, 286.0, 290.0, 179.0, 172.0, 294.0, 282.0, 294.0, 282.0, 321.0, 312.0, 287.0, 292.0, 292.0, 290.0, 290.0, 292.0, 268.0, 262.0, 291.0, 291.0, 289.0, 284.0, 256.0, 269.0, 326.0, 307.0, 286.0, 293.0, 287.0, 295.0, 295.0, 287.0, 317.0, 316.0, 279.0, 291.0, 291.0, 285.0, 293.0, 286.0, 290.0, 289.0, 286.0, 296.0, 284.0, 298.0, 267.0, 258.0, 293.0, 283.0, 309.0, 318.0, 295.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7034303932588013, "mean_inference_ms": 1.2603550541368185, "mean_action_processing_ms": 0.13458294799057233, "mean_env_wait_ms": 0.8470510383832814, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 351.0, "episode_reward_mean": 578.34, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 289.17}, "hist_stats": {"episode_reward": [582.0, 579.0, 582.0, 579.0, 636.0, 582.0, 630.0, 582.0, 522.0, 579.0, 579.0, 627.0, 582.0, 633.0, 573.0, 582.0, 582.0, 636.0, 579.0, 581.0, 582.0, 570.0, 582.0, 579.0, 582.0, 587.0, 579.0, 579.0, 582.0, 627.0, 576.0, 582.0, 521.0, 582.0, 576.0, 570.0, 630.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 576.0, 582.0, 582.0, 576.0, 570.0, 576.0, 587.0, 579.0, 579.0, 582.0, 582.0, 587.0, 579.0, 579.0, 525.0, 576.0, 627.0, 582.0, 582.0, 579.0, 576.0, 582.0, 630.0, 468.0, 522.0, 522.0, 582.0, 576.0, 582.0, 579.0, 576.0, 351.0, 576.0, 576.0, 633.0, 579.0, 582.0, 582.0, 530.0, 582.0, 573.0, 525.0, 633.0, 579.0, 582.0, 582.0, 633.0, 570.0, 576.0, 579.0, 579.0, 582.0, 582.0, 525.0, 576.0, 627.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 291.0, 288.0, 286.0, 296.0, 293.0, 286.0, 314.0, 322.0, 283.0, 299.0, 314.0, 316.0, 283.0, 299.0, 260.0, 262.0, 289.0, 290.0, 289.0, 290.0, 307.0, 320.0, 292.0, 290.0, 316.0, 317.0, 280.0, 293.0, 291.0, 291.0, 295.0, 287.0, 321.0, 315.0, 285.0, 294.0, 289.0, 292.0, 290.0, 292.0, 283.0, 287.0, 291.0, 291.0, 294.0, 285.0, 294.0, 288.0, 298.0, 289.0, 291.0, 288.0, 290.0, 289.0, 292.0, 290.0, 314.0, 313.0, 284.0, 292.0, 294.0, 288.0, 260.0, 261.0, 293.0, 289.0, 286.0, 290.0, 289.0, 281.0, 315.0, 315.0, 288.0, 291.0, 291.0, 291.0, 286.0, 296.0, 295.0, 287.0, 293.0, 289.0, 296.0, 286.0, 292.0, 284.0, 294.0, 288.0, 284.0, 298.0, 294.0, 282.0, 291.0, 279.0, 285.0, 291.0, 288.0, 299.0, 288.0, 291.0, 285.0, 294.0, 289.0, 293.0, 293.0, 289.0, 297.0, 290.0, 287.0, 292.0, 289.0, 290.0, 266.0, 259.0, 281.0, 295.0, 313.0, 314.0, 287.0, 295.0, 289.0, 293.0, 296.0, 283.0, 281.0, 295.0, 289.0, 293.0, 317.0, 313.0, 239.0, 229.0, 259.0, 263.0, 259.0, 263.0, 291.0, 291.0, 287.0, 289.0, 286.0, 296.0, 283.0, 296.0, 286.0, 290.0, 179.0, 172.0, 294.0, 282.0, 294.0, 282.0, 321.0, 312.0, 287.0, 292.0, 292.0, 290.0, 290.0, 292.0, 268.0, 262.0, 291.0, 291.0, 289.0, 284.0, 256.0, 269.0, 326.0, 307.0, 286.0, 293.0, 287.0, 295.0, 295.0, 287.0, 317.0, 316.0, 279.0, 291.0, 291.0, 285.0, 293.0, 286.0, 290.0, 289.0, 286.0, 296.0, 284.0, 298.0, 267.0, 258.0, 293.0, 283.0, 309.0, 318.0, 295.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7034303932588013, "mean_inference_ms": 1.2603550541368185, "mean_action_processing_ms": 0.13458294799057233, "mean_env_wait_ms": 0.8470510383832814, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7347200, "num_agent_steps_trained": 7347200, "num_env_steps_sampled": 3673600, "num_env_steps_trained": 3673600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3673600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7347200, "timers": {"training_iteration_time_ms": 3701.219, "learn_time_ms": 1154.78, "learn_throughput": 11084.364, "synch_weights_time_ms": 11.718}, "counters": {"num_env_steps_sampled": 3673600, "num_env_steps_trained": 3673600, "num_agent_steps_sampled": 7347200, "num_agent_steps_trained": 7347200}, "done": false, "episodes_total": 9184, "training_iteration": 287, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-06", "timestamp": 1666581546, "time_this_iter_s": 3.8007616996765137, "time_total_s": 1105.3523752689362, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1105.3523752689362, "timesteps_since_restore": 0, "iterations_since_restore": 287, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.800000000000004, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 198.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 178.06, "shaped_reward_min": 111, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.48, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 27, "onion_pickup_agent_1_mean": 17.37, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.34, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 27, "useful_onion_pickup_agent_1_mean": 17.12, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 15.08, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 17.05, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.87, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.4, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.08, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 17.05, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.08, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 17.05, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0019629087764769793, "policy_loss": -0.0023256689310073853, "vf_loss": 7.742514133453369, "vf_explained_var": 0.5878534913063049, "kl": 0.002066924935206771, "entropy": 0.8229769468307495, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3686400, "num_env_steps_trained": 3686400, "num_agent_steps_sampled": 7372800, "num_agent_steps_trained": 7372800}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 351.0, "episode_reward_mean": 574.86, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 287.43}, "custom_metrics": {"sparse_reward_mean": 198.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 178.06, "shaped_reward_min": 111, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.48, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 27, "onion_pickup_agent_1_mean": 17.37, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.34, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 27, "useful_onion_pickup_agent_1_mean": 17.12, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 15.08, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 17.05, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.87, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.4, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.08, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 17.05, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.08, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 17.05, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [521.0, 582.0, 576.0, 570.0, 630.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 576.0, 582.0, 582.0, 576.0, 570.0, 576.0, 587.0, 579.0, 579.0, 582.0, 582.0, 587.0, 579.0, 579.0, 525.0, 576.0, 627.0, 582.0, 582.0, 579.0, 576.0, 582.0, 630.0, 468.0, 522.0, 522.0, 582.0, 576.0, 582.0, 579.0, 576.0, 351.0, 576.0, 576.0, 633.0, 579.0, 582.0, 582.0, 530.0, 582.0, 573.0, 525.0, 633.0, 579.0, 582.0, 582.0, 633.0, 570.0, 576.0, 579.0, 579.0, 582.0, 582.0, 525.0, 576.0, 627.0, 576.0, 584.0, 582.0, 582.0, 576.0, 576.0, 582.0, 581.0, 582.0, 579.0, 582.0, 530.0, 579.0, 582.0, 579.0, 579.0, 581.0, 573.0, 570.0, 573.0, 579.0, 536.0, 539.0, 582.0, 636.0, 576.0, 582.0, 630.0, 530.0, 582.0, 587.0, 582.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 261.0, 293.0, 289.0, 286.0, 290.0, 289.0, 281.0, 315.0, 315.0, 288.0, 291.0, 291.0, 291.0, 286.0, 296.0, 295.0, 287.0, 293.0, 289.0, 296.0, 286.0, 292.0, 284.0, 294.0, 288.0, 284.0, 298.0, 294.0, 282.0, 291.0, 279.0, 285.0, 291.0, 288.0, 299.0, 288.0, 291.0, 285.0, 294.0, 289.0, 293.0, 293.0, 289.0, 297.0, 290.0, 287.0, 292.0, 289.0, 290.0, 266.0, 259.0, 281.0, 295.0, 313.0, 314.0, 287.0, 295.0, 289.0, 293.0, 296.0, 283.0, 281.0, 295.0, 289.0, 293.0, 317.0, 313.0, 239.0, 229.0, 259.0, 263.0, 259.0, 263.0, 291.0, 291.0, 287.0, 289.0, 286.0, 296.0, 283.0, 296.0, 286.0, 290.0, 179.0, 172.0, 294.0, 282.0, 294.0, 282.0, 321.0, 312.0, 287.0, 292.0, 292.0, 290.0, 290.0, 292.0, 268.0, 262.0, 291.0, 291.0, 289.0, 284.0, 256.0, 269.0, 326.0, 307.0, 286.0, 293.0, 287.0, 295.0, 295.0, 287.0, 317.0, 316.0, 279.0, 291.0, 291.0, 285.0, 293.0, 286.0, 290.0, 289.0, 286.0, 296.0, 284.0, 298.0, 267.0, 258.0, 293.0, 283.0, 309.0, 318.0, 295.0, 281.0, 303.0, 281.0, 294.0, 288.0, 288.0, 294.0, 287.0, 289.0, 284.0, 292.0, 293.0, 289.0, 297.0, 284.0, 294.0, 288.0, 289.0, 290.0, 286.0, 296.0, 260.0, 270.0, 285.0, 294.0, 295.0, 287.0, 291.0, 288.0, 297.0, 282.0, 290.0, 291.0, 284.0, 289.0, 287.0, 283.0, 284.0, 289.0, 292.0, 287.0, 263.0, 273.0, 271.0, 268.0, 288.0, 294.0, 322.0, 314.0, 286.0, 290.0, 299.0, 283.0, 313.0, 317.0, 266.0, 264.0, 297.0, 285.0, 292.0, 295.0, 287.0, 295.0, 293.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7033596138763246, "mean_inference_ms": 1.2601166688201468, "mean_action_processing_ms": 0.13457362199660333, "mean_env_wait_ms": 0.8469378616858445, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 351.0, "episode_reward_mean": 574.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 287.43}, "hist_stats": {"episode_reward": [521.0, 582.0, 576.0, 570.0, 630.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 576.0, 582.0, 582.0, 576.0, 570.0, 576.0, 587.0, 579.0, 579.0, 582.0, 582.0, 587.0, 579.0, 579.0, 525.0, 576.0, 627.0, 582.0, 582.0, 579.0, 576.0, 582.0, 630.0, 468.0, 522.0, 522.0, 582.0, 576.0, 582.0, 579.0, 576.0, 351.0, 576.0, 576.0, 633.0, 579.0, 582.0, 582.0, 530.0, 582.0, 573.0, 525.0, 633.0, 579.0, 582.0, 582.0, 633.0, 570.0, 576.0, 579.0, 579.0, 582.0, 582.0, 525.0, 576.0, 627.0, 576.0, 584.0, 582.0, 582.0, 576.0, 576.0, 582.0, 581.0, 582.0, 579.0, 582.0, 530.0, 579.0, 582.0, 579.0, 579.0, 581.0, 573.0, 570.0, 573.0, 579.0, 536.0, 539.0, 582.0, 636.0, 576.0, 582.0, 630.0, 530.0, 582.0, 587.0, 582.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 261.0, 293.0, 289.0, 286.0, 290.0, 289.0, 281.0, 315.0, 315.0, 288.0, 291.0, 291.0, 291.0, 286.0, 296.0, 295.0, 287.0, 293.0, 289.0, 296.0, 286.0, 292.0, 284.0, 294.0, 288.0, 284.0, 298.0, 294.0, 282.0, 291.0, 279.0, 285.0, 291.0, 288.0, 299.0, 288.0, 291.0, 285.0, 294.0, 289.0, 293.0, 293.0, 289.0, 297.0, 290.0, 287.0, 292.0, 289.0, 290.0, 266.0, 259.0, 281.0, 295.0, 313.0, 314.0, 287.0, 295.0, 289.0, 293.0, 296.0, 283.0, 281.0, 295.0, 289.0, 293.0, 317.0, 313.0, 239.0, 229.0, 259.0, 263.0, 259.0, 263.0, 291.0, 291.0, 287.0, 289.0, 286.0, 296.0, 283.0, 296.0, 286.0, 290.0, 179.0, 172.0, 294.0, 282.0, 294.0, 282.0, 321.0, 312.0, 287.0, 292.0, 292.0, 290.0, 290.0, 292.0, 268.0, 262.0, 291.0, 291.0, 289.0, 284.0, 256.0, 269.0, 326.0, 307.0, 286.0, 293.0, 287.0, 295.0, 295.0, 287.0, 317.0, 316.0, 279.0, 291.0, 291.0, 285.0, 293.0, 286.0, 290.0, 289.0, 286.0, 296.0, 284.0, 298.0, 267.0, 258.0, 293.0, 283.0, 309.0, 318.0, 295.0, 281.0, 303.0, 281.0, 294.0, 288.0, 288.0, 294.0, 287.0, 289.0, 284.0, 292.0, 293.0, 289.0, 297.0, 284.0, 294.0, 288.0, 289.0, 290.0, 286.0, 296.0, 260.0, 270.0, 285.0, 294.0, 295.0, 287.0, 291.0, 288.0, 297.0, 282.0, 290.0, 291.0, 284.0, 289.0, 287.0, 283.0, 284.0, 289.0, 292.0, 287.0, 263.0, 273.0, 271.0, 268.0, 288.0, 294.0, 322.0, 314.0, 286.0, 290.0, 299.0, 283.0, 313.0, 317.0, 266.0, 264.0, 297.0, 285.0, 292.0, 295.0, 287.0, 295.0, 293.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7033596138763246, "mean_inference_ms": 1.2601166688201468, "mean_action_processing_ms": 0.13457362199660333, "mean_env_wait_ms": 0.8469378616858445, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7372800, "num_agent_steps_trained": 7372800, "num_env_steps_sampled": 3686400, "num_env_steps_trained": 3686400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3686400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7372800, "timers": {"training_iteration_time_ms": 3706.021, "learn_time_ms": 1160.239, "learn_throughput": 11032.208, "synch_weights_time_ms": 11.723}, "counters": {"num_env_steps_sampled": 3686400, "num_env_steps_trained": 3686400, "num_agent_steps_sampled": 7372800, "num_agent_steps_trained": 7372800}, "done": false, "episodes_total": 9216, "training_iteration": 288, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-10", "timestamp": 1666581550, "time_this_iter_s": 3.7877204418182373, "time_total_s": 1109.1400957107544, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1109.1400957107544, "timesteps_since_restore": 0, "iterations_since_restore": 288, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.750000000000004, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 177.21, "shaped_reward_min": 97, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.84, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 27, "onion_pickup_agent_1_mean": 17.85, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.73, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 27, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 14.46, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 17.52, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.06, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.82, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.79, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.38, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.36, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.46, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 17.52, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.46, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 17.52, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0014422027161344886, "policy_loss": 0.0010773971443995833, "vf_loss": 7.724160194396973, "vf_explained_var": 0.5966507196426392, "kl": 0.0017015428747981787, "entropy": 0.8152189254760742, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3699200, "num_env_steps_trained": 3699200, "num_agent_steps_sampled": 7398400, "num_agent_steps_trained": 7398400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 297.0, "episode_reward_mean": 572.41, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 146.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 286.205}, "custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 177.21, "shaped_reward_min": 97, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.84, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 27, "onion_pickup_agent_1_mean": 17.85, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.73, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 27, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 14.46, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 17.52, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.06, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.82, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.79, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.38, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.36, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.46, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 17.52, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.46, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 17.52, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 468.0, 522.0, 522.0, 582.0, 576.0, 582.0, 579.0, 576.0, 351.0, 576.0, 576.0, 633.0, 579.0, 582.0, 582.0, 530.0, 582.0, 573.0, 525.0, 633.0, 579.0, 582.0, 582.0, 633.0, 570.0, 576.0, 579.0, 579.0, 582.0, 582.0, 525.0, 576.0, 627.0, 576.0, 584.0, 582.0, 582.0, 576.0, 576.0, 582.0, 581.0, 582.0, 579.0, 582.0, 530.0, 579.0, 582.0, 579.0, 579.0, 581.0, 573.0, 570.0, 573.0, 579.0, 536.0, 539.0, 582.0, 636.0, 576.0, 582.0, 630.0, 530.0, 582.0, 587.0, 582.0, 573.0, 527.0, 579.0, 630.0, 582.0, 584.0, 573.0, 627.0, 579.0, 582.0, 582.0, 570.0, 573.0, 630.0, 576.0, 573.0, 579.0, 579.0, 524.0, 587.0, 582.0, 584.0, 576.0, 576.0, 582.0, 579.0, 297.0, 579.0, 582.0, 582.0, 582.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 317.0, 313.0, 239.0, 229.0, 259.0, 263.0, 259.0, 263.0, 291.0, 291.0, 287.0, 289.0, 286.0, 296.0, 283.0, 296.0, 286.0, 290.0, 179.0, 172.0, 294.0, 282.0, 294.0, 282.0, 321.0, 312.0, 287.0, 292.0, 292.0, 290.0, 290.0, 292.0, 268.0, 262.0, 291.0, 291.0, 289.0, 284.0, 256.0, 269.0, 326.0, 307.0, 286.0, 293.0, 287.0, 295.0, 295.0, 287.0, 317.0, 316.0, 279.0, 291.0, 291.0, 285.0, 293.0, 286.0, 290.0, 289.0, 286.0, 296.0, 284.0, 298.0, 267.0, 258.0, 293.0, 283.0, 309.0, 318.0, 295.0, 281.0, 303.0, 281.0, 294.0, 288.0, 288.0, 294.0, 287.0, 289.0, 284.0, 292.0, 293.0, 289.0, 297.0, 284.0, 294.0, 288.0, 289.0, 290.0, 286.0, 296.0, 260.0, 270.0, 285.0, 294.0, 295.0, 287.0, 291.0, 288.0, 297.0, 282.0, 290.0, 291.0, 284.0, 289.0, 287.0, 283.0, 284.0, 289.0, 292.0, 287.0, 263.0, 273.0, 271.0, 268.0, 288.0, 294.0, 322.0, 314.0, 286.0, 290.0, 299.0, 283.0, 313.0, 317.0, 266.0, 264.0, 297.0, 285.0, 292.0, 295.0, 287.0, 295.0, 293.0, 280.0, 264.0, 263.0, 292.0, 287.0, 321.0, 309.0, 295.0, 287.0, 289.0, 295.0, 294.0, 279.0, 309.0, 318.0, 285.0, 294.0, 294.0, 288.0, 289.0, 293.0, 283.0, 287.0, 288.0, 285.0, 311.0, 319.0, 285.0, 291.0, 284.0, 289.0, 288.0, 291.0, 285.0, 294.0, 256.0, 268.0, 294.0, 293.0, 290.0, 292.0, 297.0, 287.0, 286.0, 290.0, 288.0, 288.0, 281.0, 301.0, 290.0, 289.0, 151.0, 146.0, 287.0, 292.0, 299.0, 283.0, 296.0, 286.0, 294.0, 288.0, 287.0, 289.0, 291.0, 282.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7032903656120397, "mean_inference_ms": 1.2598923935864008, "mean_action_processing_ms": 0.13456767469158165, "mean_env_wait_ms": 0.8468338102519962, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 297.0, "episode_reward_mean": 572.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 146.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 286.205}, "hist_stats": {"episode_reward": [582.0, 630.0, 468.0, 522.0, 522.0, 582.0, 576.0, 582.0, 579.0, 576.0, 351.0, 576.0, 576.0, 633.0, 579.0, 582.0, 582.0, 530.0, 582.0, 573.0, 525.0, 633.0, 579.0, 582.0, 582.0, 633.0, 570.0, 576.0, 579.0, 579.0, 582.0, 582.0, 525.0, 576.0, 627.0, 576.0, 584.0, 582.0, 582.0, 576.0, 576.0, 582.0, 581.0, 582.0, 579.0, 582.0, 530.0, 579.0, 582.0, 579.0, 579.0, 581.0, 573.0, 570.0, 573.0, 579.0, 536.0, 539.0, 582.0, 636.0, 576.0, 582.0, 630.0, 530.0, 582.0, 587.0, 582.0, 573.0, 527.0, 579.0, 630.0, 582.0, 584.0, 573.0, 627.0, 579.0, 582.0, 582.0, 570.0, 573.0, 630.0, 576.0, 573.0, 579.0, 579.0, 524.0, 587.0, 582.0, 584.0, 576.0, 576.0, 582.0, 579.0, 297.0, 579.0, 582.0, 582.0, 582.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 317.0, 313.0, 239.0, 229.0, 259.0, 263.0, 259.0, 263.0, 291.0, 291.0, 287.0, 289.0, 286.0, 296.0, 283.0, 296.0, 286.0, 290.0, 179.0, 172.0, 294.0, 282.0, 294.0, 282.0, 321.0, 312.0, 287.0, 292.0, 292.0, 290.0, 290.0, 292.0, 268.0, 262.0, 291.0, 291.0, 289.0, 284.0, 256.0, 269.0, 326.0, 307.0, 286.0, 293.0, 287.0, 295.0, 295.0, 287.0, 317.0, 316.0, 279.0, 291.0, 291.0, 285.0, 293.0, 286.0, 290.0, 289.0, 286.0, 296.0, 284.0, 298.0, 267.0, 258.0, 293.0, 283.0, 309.0, 318.0, 295.0, 281.0, 303.0, 281.0, 294.0, 288.0, 288.0, 294.0, 287.0, 289.0, 284.0, 292.0, 293.0, 289.0, 297.0, 284.0, 294.0, 288.0, 289.0, 290.0, 286.0, 296.0, 260.0, 270.0, 285.0, 294.0, 295.0, 287.0, 291.0, 288.0, 297.0, 282.0, 290.0, 291.0, 284.0, 289.0, 287.0, 283.0, 284.0, 289.0, 292.0, 287.0, 263.0, 273.0, 271.0, 268.0, 288.0, 294.0, 322.0, 314.0, 286.0, 290.0, 299.0, 283.0, 313.0, 317.0, 266.0, 264.0, 297.0, 285.0, 292.0, 295.0, 287.0, 295.0, 293.0, 280.0, 264.0, 263.0, 292.0, 287.0, 321.0, 309.0, 295.0, 287.0, 289.0, 295.0, 294.0, 279.0, 309.0, 318.0, 285.0, 294.0, 294.0, 288.0, 289.0, 293.0, 283.0, 287.0, 288.0, 285.0, 311.0, 319.0, 285.0, 291.0, 284.0, 289.0, 288.0, 291.0, 285.0, 294.0, 256.0, 268.0, 294.0, 293.0, 290.0, 292.0, 297.0, 287.0, 286.0, 290.0, 288.0, 288.0, 281.0, 301.0, 290.0, 289.0, 151.0, 146.0, 287.0, 292.0, 299.0, 283.0, 296.0, 286.0, 294.0, 288.0, 287.0, 289.0, 291.0, 282.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7032903656120397, "mean_inference_ms": 1.2598923935864008, "mean_action_processing_ms": 0.13456767469158165, "mean_env_wait_ms": 0.8468338102519962, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7398400, "num_agent_steps_trained": 7398400, "num_env_steps_sampled": 3699200, "num_env_steps_trained": 3699200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3699200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7398400, "timers": {"training_iteration_time_ms": 3696.327, "learn_time_ms": 1152.384, "learn_throughput": 11107.41, "synch_weights_time_ms": 12.455}, "counters": {"num_env_steps_sampled": 3699200, "num_env_steps_trained": 3699200, "num_agent_steps_sampled": 7398400, "num_agent_steps_trained": 7398400}, "done": false, "episodes_total": 9248, "training_iteration": 289, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-14", "timestamp": 1666581554, "time_this_iter_s": 3.6477789878845215, "time_total_s": 1112.787874698639, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1112.787874698639, "timesteps_since_restore": 0, "iterations_since_restore": 289, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.74, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.02, "shaped_reward_min": 97, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.62, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 27, "onion_pickup_agent_1_mean": 18.15, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.48, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 27, "useful_onion_pickup_agent_1_mean": 17.89, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.27, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 17.85, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.15, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.77, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.85, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.57, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.78, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.3, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.74, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.28, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.27, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 17.85, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.27, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 17.85, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0031906738877296448, "policy_loss": 0.002833613660186529, "vf_loss": 7.63752555847168, "vf_explained_var": 0.5924453735351562, "kl": 0.002333316020667553, "entropy": 0.8133819103240967, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3712000, "num_env_steps_trained": 3712000, "num_agent_steps_sampled": 7424000, "num_agent_steps_trained": 7424000}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 297.0, "episode_reward_mean": 576.02, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 146.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 288.01}, "custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.02, "shaped_reward_min": 97, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.62, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 27, "onion_pickup_agent_1_mean": 18.15, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.48, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 27, "useful_onion_pickup_agent_1_mean": 17.89, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.27, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 17.85, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.15, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.77, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.85, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.57, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.78, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.3, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.74, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.28, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.27, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 17.85, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.27, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 17.85, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 576.0, 627.0, 576.0, 584.0, 582.0, 582.0, 576.0, 576.0, 582.0, 581.0, 582.0, 579.0, 582.0, 530.0, 579.0, 582.0, 579.0, 579.0, 581.0, 573.0, 570.0, 573.0, 579.0, 536.0, 539.0, 582.0, 636.0, 576.0, 582.0, 630.0, 530.0, 582.0, 587.0, 582.0, 573.0, 527.0, 579.0, 630.0, 582.0, 584.0, 573.0, 627.0, 579.0, 582.0, 582.0, 570.0, 573.0, 630.0, 576.0, 573.0, 579.0, 579.0, 524.0, 587.0, 582.0, 584.0, 576.0, 576.0, 582.0, 579.0, 297.0, 579.0, 582.0, 582.0, 582.0, 576.0, 573.0, 582.0, 576.0, 579.0, 627.0, 576.0, 576.0, 582.0, 582.0, 582.0, 525.0, 579.0, 570.0, 579.0, 579.0, 576.0, 579.0, 525.0, 582.0, 576.0, 579.0, 579.0, 576.0, 573.0, 630.0, 576.0, 579.0, 525.0, 582.0, 576.0, 576.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 258.0, 293.0, 283.0, 309.0, 318.0, 295.0, 281.0, 303.0, 281.0, 294.0, 288.0, 288.0, 294.0, 287.0, 289.0, 284.0, 292.0, 293.0, 289.0, 297.0, 284.0, 294.0, 288.0, 289.0, 290.0, 286.0, 296.0, 260.0, 270.0, 285.0, 294.0, 295.0, 287.0, 291.0, 288.0, 297.0, 282.0, 290.0, 291.0, 284.0, 289.0, 287.0, 283.0, 284.0, 289.0, 292.0, 287.0, 263.0, 273.0, 271.0, 268.0, 288.0, 294.0, 322.0, 314.0, 286.0, 290.0, 299.0, 283.0, 313.0, 317.0, 266.0, 264.0, 297.0, 285.0, 292.0, 295.0, 287.0, 295.0, 293.0, 280.0, 264.0, 263.0, 292.0, 287.0, 321.0, 309.0, 295.0, 287.0, 289.0, 295.0, 294.0, 279.0, 309.0, 318.0, 285.0, 294.0, 294.0, 288.0, 289.0, 293.0, 283.0, 287.0, 288.0, 285.0, 311.0, 319.0, 285.0, 291.0, 284.0, 289.0, 288.0, 291.0, 285.0, 294.0, 256.0, 268.0, 294.0, 293.0, 290.0, 292.0, 297.0, 287.0, 286.0, 290.0, 288.0, 288.0, 281.0, 301.0, 290.0, 289.0, 151.0, 146.0, 287.0, 292.0, 299.0, 283.0, 296.0, 286.0, 294.0, 288.0, 287.0, 289.0, 291.0, 282.0, 285.0, 297.0, 290.0, 286.0, 297.0, 282.0, 313.0, 314.0, 291.0, 285.0, 285.0, 291.0, 282.0, 300.0, 292.0, 290.0, 284.0, 298.0, 272.0, 253.0, 295.0, 284.0, 290.0, 280.0, 292.0, 287.0, 285.0, 294.0, 291.0, 285.0, 294.0, 285.0, 259.0, 266.0, 291.0, 291.0, 287.0, 289.0, 290.0, 289.0, 290.0, 289.0, 285.0, 291.0, 283.0, 290.0, 321.0, 309.0, 276.0, 300.0, 284.0, 295.0, 261.0, 264.0, 292.0, 290.0, 288.0, 288.0, 289.0, 287.0, 312.0, 318.0, 316.0, 317.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7032018944909996, "mean_inference_ms": 1.2596606000828354, "mean_action_processing_ms": 0.13456065293030725, "mean_env_wait_ms": 0.8467261832828717, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 297.0, "episode_reward_mean": 576.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 146.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 288.01}, "hist_stats": {"episode_reward": [525.0, 576.0, 627.0, 576.0, 584.0, 582.0, 582.0, 576.0, 576.0, 582.0, 581.0, 582.0, 579.0, 582.0, 530.0, 579.0, 582.0, 579.0, 579.0, 581.0, 573.0, 570.0, 573.0, 579.0, 536.0, 539.0, 582.0, 636.0, 576.0, 582.0, 630.0, 530.0, 582.0, 587.0, 582.0, 573.0, 527.0, 579.0, 630.0, 582.0, 584.0, 573.0, 627.0, 579.0, 582.0, 582.0, 570.0, 573.0, 630.0, 576.0, 573.0, 579.0, 579.0, 524.0, 587.0, 582.0, 584.0, 576.0, 576.0, 582.0, 579.0, 297.0, 579.0, 582.0, 582.0, 582.0, 576.0, 573.0, 582.0, 576.0, 579.0, 627.0, 576.0, 576.0, 582.0, 582.0, 582.0, 525.0, 579.0, 570.0, 579.0, 579.0, 576.0, 579.0, 525.0, 582.0, 576.0, 579.0, 579.0, 576.0, 573.0, 630.0, 576.0, 579.0, 525.0, 582.0, 576.0, 576.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 258.0, 293.0, 283.0, 309.0, 318.0, 295.0, 281.0, 303.0, 281.0, 294.0, 288.0, 288.0, 294.0, 287.0, 289.0, 284.0, 292.0, 293.0, 289.0, 297.0, 284.0, 294.0, 288.0, 289.0, 290.0, 286.0, 296.0, 260.0, 270.0, 285.0, 294.0, 295.0, 287.0, 291.0, 288.0, 297.0, 282.0, 290.0, 291.0, 284.0, 289.0, 287.0, 283.0, 284.0, 289.0, 292.0, 287.0, 263.0, 273.0, 271.0, 268.0, 288.0, 294.0, 322.0, 314.0, 286.0, 290.0, 299.0, 283.0, 313.0, 317.0, 266.0, 264.0, 297.0, 285.0, 292.0, 295.0, 287.0, 295.0, 293.0, 280.0, 264.0, 263.0, 292.0, 287.0, 321.0, 309.0, 295.0, 287.0, 289.0, 295.0, 294.0, 279.0, 309.0, 318.0, 285.0, 294.0, 294.0, 288.0, 289.0, 293.0, 283.0, 287.0, 288.0, 285.0, 311.0, 319.0, 285.0, 291.0, 284.0, 289.0, 288.0, 291.0, 285.0, 294.0, 256.0, 268.0, 294.0, 293.0, 290.0, 292.0, 297.0, 287.0, 286.0, 290.0, 288.0, 288.0, 281.0, 301.0, 290.0, 289.0, 151.0, 146.0, 287.0, 292.0, 299.0, 283.0, 296.0, 286.0, 294.0, 288.0, 287.0, 289.0, 291.0, 282.0, 285.0, 297.0, 290.0, 286.0, 297.0, 282.0, 313.0, 314.0, 291.0, 285.0, 285.0, 291.0, 282.0, 300.0, 292.0, 290.0, 284.0, 298.0, 272.0, 253.0, 295.0, 284.0, 290.0, 280.0, 292.0, 287.0, 285.0, 294.0, 291.0, 285.0, 294.0, 285.0, 259.0, 266.0, 291.0, 291.0, 287.0, 289.0, 290.0, 289.0, 290.0, 289.0, 285.0, 291.0, 283.0, 290.0, 321.0, 309.0, 276.0, 300.0, 284.0, 295.0, 261.0, 264.0, 292.0, 290.0, 288.0, 288.0, 289.0, 287.0, 312.0, 318.0, 316.0, 317.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7032018944909996, "mean_inference_ms": 1.2596606000828354, "mean_action_processing_ms": 0.13456065293030725, "mean_env_wait_ms": 0.8467261832828717, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7424000, "num_agent_steps_trained": 7424000, "num_env_steps_sampled": 3712000, "num_env_steps_trained": 3712000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3712000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7424000, "timers": {"training_iteration_time_ms": 3681.817, "learn_time_ms": 1145.831, "learn_throughput": 11170.935, "synch_weights_time_ms": 11.814}, "counters": {"num_env_steps_sampled": 3712000, "num_env_steps_trained": 3712000, "num_agent_steps_sampled": 7424000, "num_agent_steps_trained": 7424000}, "done": false, "episodes_total": 9280, "training_iteration": 290, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-18", "timestamp": 1666581558, "time_this_iter_s": 3.60448956489563, "time_total_s": 1116.3923642635345, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1116.3923642635345, "timesteps_since_restore": 0, "iterations_since_restore": 290, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.9, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 201.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.9, "shaped_reward_min": 97, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.5, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.36, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.31, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 18.13, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.18, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 18.09, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.25, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.8, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.91, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.57, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.83, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.3, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.81, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.28, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.18, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 18.09, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.18, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 18.09, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010251335334032774, "policy_loss": -0.0013847454683855176, "vf_loss": 7.6011223793029785, "vf_explained_var": 0.5847921371459961, "kl": 0.0018953521503135562, "entropy": 0.8009949922561646, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3724800, "num_env_steps_trained": 3724800, "num_agent_steps_sampled": 7449600, "num_agent_steps_trained": 7449600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 297.0, "episode_reward_mean": 580.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 146.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 290.45}, "custom_metrics": {"sparse_reward_mean": 201.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.9, "shaped_reward_min": 97, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.5, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.36, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.31, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 18.13, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.18, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 18.09, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.25, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.8, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.91, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.57, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.83, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.3, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.81, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.28, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.18, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 18.09, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.18, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 18.09, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 582.0, 573.0, 527.0, 579.0, 630.0, 582.0, 584.0, 573.0, 627.0, 579.0, 582.0, 582.0, 570.0, 573.0, 630.0, 576.0, 573.0, 579.0, 579.0, 524.0, 587.0, 582.0, 584.0, 576.0, 576.0, 582.0, 579.0, 297.0, 579.0, 582.0, 582.0, 582.0, 576.0, 573.0, 582.0, 576.0, 579.0, 627.0, 576.0, 576.0, 582.0, 582.0, 582.0, 525.0, 579.0, 570.0, 579.0, 579.0, 576.0, 579.0, 525.0, 582.0, 576.0, 579.0, 579.0, 576.0, 573.0, 630.0, 576.0, 579.0, 525.0, 582.0, 576.0, 576.0, 630.0, 633.0, 587.0, 582.0, 576.0, 579.0, 582.0, 633.0, 576.0, 624.0, 582.0, 576.0, 627.0, 639.0, 627.0, 627.0, 573.0, 573.0, 579.0, 579.0, 582.0, 633.0, 581.0, 579.0, 579.0, 582.0, 576.0, 579.0, 573.0, 576.0, 582.0, 579.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 285.0, 292.0, 295.0, 287.0, 295.0, 293.0, 280.0, 264.0, 263.0, 292.0, 287.0, 321.0, 309.0, 295.0, 287.0, 289.0, 295.0, 294.0, 279.0, 309.0, 318.0, 285.0, 294.0, 294.0, 288.0, 289.0, 293.0, 283.0, 287.0, 288.0, 285.0, 311.0, 319.0, 285.0, 291.0, 284.0, 289.0, 288.0, 291.0, 285.0, 294.0, 256.0, 268.0, 294.0, 293.0, 290.0, 292.0, 297.0, 287.0, 286.0, 290.0, 288.0, 288.0, 281.0, 301.0, 290.0, 289.0, 151.0, 146.0, 287.0, 292.0, 299.0, 283.0, 296.0, 286.0, 294.0, 288.0, 287.0, 289.0, 291.0, 282.0, 285.0, 297.0, 290.0, 286.0, 297.0, 282.0, 313.0, 314.0, 291.0, 285.0, 285.0, 291.0, 282.0, 300.0, 292.0, 290.0, 284.0, 298.0, 272.0, 253.0, 295.0, 284.0, 290.0, 280.0, 292.0, 287.0, 285.0, 294.0, 291.0, 285.0, 294.0, 285.0, 259.0, 266.0, 291.0, 291.0, 287.0, 289.0, 290.0, 289.0, 290.0, 289.0, 285.0, 291.0, 283.0, 290.0, 321.0, 309.0, 276.0, 300.0, 284.0, 295.0, 261.0, 264.0, 292.0, 290.0, 288.0, 288.0, 289.0, 287.0, 312.0, 318.0, 316.0, 317.0, 297.0, 290.0, 293.0, 289.0, 290.0, 286.0, 289.0, 290.0, 287.0, 295.0, 313.0, 320.0, 291.0, 285.0, 318.0, 306.0, 289.0, 293.0, 288.0, 288.0, 318.0, 309.0, 317.0, 322.0, 309.0, 318.0, 314.0, 313.0, 280.0, 293.0, 289.0, 284.0, 288.0, 291.0, 290.0, 289.0, 292.0, 290.0, 317.0, 316.0, 295.0, 286.0, 287.0, 292.0, 288.0, 291.0, 294.0, 288.0, 287.0, 289.0, 289.0, 290.0, 291.0, 282.0, 288.0, 288.0, 293.0, 289.0, 285.0, 294.0, 308.0, 322.0, 294.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7031017145418876, "mean_inference_ms": 1.2594336419828618, "mean_action_processing_ms": 0.13455223025516794, "mean_env_wait_ms": 0.8466123493119135, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 297.0, "episode_reward_mean": 580.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 146.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 290.45}, "hist_stats": {"episode_reward": [582.0, 587.0, 582.0, 573.0, 527.0, 579.0, 630.0, 582.0, 584.0, 573.0, 627.0, 579.0, 582.0, 582.0, 570.0, 573.0, 630.0, 576.0, 573.0, 579.0, 579.0, 524.0, 587.0, 582.0, 584.0, 576.0, 576.0, 582.0, 579.0, 297.0, 579.0, 582.0, 582.0, 582.0, 576.0, 573.0, 582.0, 576.0, 579.0, 627.0, 576.0, 576.0, 582.0, 582.0, 582.0, 525.0, 579.0, 570.0, 579.0, 579.0, 576.0, 579.0, 525.0, 582.0, 576.0, 579.0, 579.0, 576.0, 573.0, 630.0, 576.0, 579.0, 525.0, 582.0, 576.0, 576.0, 630.0, 633.0, 587.0, 582.0, 576.0, 579.0, 582.0, 633.0, 576.0, 624.0, 582.0, 576.0, 627.0, 639.0, 627.0, 627.0, 573.0, 573.0, 579.0, 579.0, 582.0, 633.0, 581.0, 579.0, 579.0, 582.0, 576.0, 579.0, 573.0, 576.0, 582.0, 579.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 285.0, 292.0, 295.0, 287.0, 295.0, 293.0, 280.0, 264.0, 263.0, 292.0, 287.0, 321.0, 309.0, 295.0, 287.0, 289.0, 295.0, 294.0, 279.0, 309.0, 318.0, 285.0, 294.0, 294.0, 288.0, 289.0, 293.0, 283.0, 287.0, 288.0, 285.0, 311.0, 319.0, 285.0, 291.0, 284.0, 289.0, 288.0, 291.0, 285.0, 294.0, 256.0, 268.0, 294.0, 293.0, 290.0, 292.0, 297.0, 287.0, 286.0, 290.0, 288.0, 288.0, 281.0, 301.0, 290.0, 289.0, 151.0, 146.0, 287.0, 292.0, 299.0, 283.0, 296.0, 286.0, 294.0, 288.0, 287.0, 289.0, 291.0, 282.0, 285.0, 297.0, 290.0, 286.0, 297.0, 282.0, 313.0, 314.0, 291.0, 285.0, 285.0, 291.0, 282.0, 300.0, 292.0, 290.0, 284.0, 298.0, 272.0, 253.0, 295.0, 284.0, 290.0, 280.0, 292.0, 287.0, 285.0, 294.0, 291.0, 285.0, 294.0, 285.0, 259.0, 266.0, 291.0, 291.0, 287.0, 289.0, 290.0, 289.0, 290.0, 289.0, 285.0, 291.0, 283.0, 290.0, 321.0, 309.0, 276.0, 300.0, 284.0, 295.0, 261.0, 264.0, 292.0, 290.0, 288.0, 288.0, 289.0, 287.0, 312.0, 318.0, 316.0, 317.0, 297.0, 290.0, 293.0, 289.0, 290.0, 286.0, 289.0, 290.0, 287.0, 295.0, 313.0, 320.0, 291.0, 285.0, 318.0, 306.0, 289.0, 293.0, 288.0, 288.0, 318.0, 309.0, 317.0, 322.0, 309.0, 318.0, 314.0, 313.0, 280.0, 293.0, 289.0, 284.0, 288.0, 291.0, 290.0, 289.0, 292.0, 290.0, 317.0, 316.0, 295.0, 286.0, 287.0, 292.0, 288.0, 291.0, 294.0, 288.0, 287.0, 289.0, 289.0, 290.0, 291.0, 282.0, 288.0, 288.0, 293.0, 289.0, 285.0, 294.0, 308.0, 322.0, 294.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7031017145418876, "mean_inference_ms": 1.2594336419828618, "mean_action_processing_ms": 0.13455223025516794, "mean_env_wait_ms": 0.8466123493119135, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7449600, "num_agent_steps_trained": 7449600, "num_env_steps_sampled": 3724800, "num_env_steps_trained": 3724800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3724800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7449600, "timers": {"training_iteration_time_ms": 3651.182, "learn_time_ms": 1146.906, "learn_throughput": 11160.463, "synch_weights_time_ms": 11.194}, "counters": {"num_env_steps_sampled": 3724800, "num_env_steps_trained": 3724800, "num_agent_steps_sampled": 7449600, "num_agent_steps_trained": 7449600}, "done": false, "episodes_total": 9312, "training_iteration": 291, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-22", "timestamp": 1666581562, "time_this_iter_s": 3.6406378746032715, "time_total_s": 1120.0330021381378, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1120.0330021381378, "timesteps_since_restore": 0, "iterations_since_restore": 291, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.68, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 202.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.5, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.79, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.58, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.39, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.98, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.17, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.86, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.67, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.76, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.4, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.74, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.4, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.39, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.98, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.39, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.98, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013125705299898982, "policy_loss": 0.0009523761691525578, "vf_loss": 7.632638454437256, "vf_explained_var": 0.6011805534362793, "kl": 0.002176450565457344, "entropy": 0.8061335682868958, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3737600, "num_env_steps_trained": 3737600, "num_agent_steps_sampled": 7475200, "num_agent_steps_trained": 7475200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 583.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 291.95}, "custom_metrics": {"sparse_reward_mean": 202.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.5, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.79, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.58, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.39, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.98, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.17, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.86, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.67, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.76, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.4, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.74, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.4, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.39, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.98, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.39, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.98, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 576.0, 573.0, 582.0, 576.0, 579.0, 627.0, 576.0, 576.0, 582.0, 582.0, 582.0, 525.0, 579.0, 570.0, 579.0, 579.0, 576.0, 579.0, 525.0, 582.0, 576.0, 579.0, 579.0, 576.0, 573.0, 630.0, 576.0, 579.0, 525.0, 582.0, 576.0, 576.0, 630.0, 633.0, 587.0, 582.0, 576.0, 579.0, 582.0, 633.0, 576.0, 624.0, 582.0, 576.0, 627.0, 639.0, 627.0, 627.0, 573.0, 573.0, 579.0, 579.0, 582.0, 633.0, 581.0, 579.0, 579.0, 582.0, 576.0, 579.0, 573.0, 576.0, 582.0, 579.0, 630.0, 582.0, 576.0, 576.0, 530.0, 582.0, 579.0, 582.0, 579.0, 582.0, 633.0, 582.0, 579.0, 516.0, 587.0, 627.0, 627.0, 582.0, 579.0, 570.0, 624.0, 522.0, 581.0, 519.0, 630.0, 630.0, 579.0, 579.0, 582.0, 582.0, 579.0, 573.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 294.0, 288.0, 287.0, 289.0, 291.0, 282.0, 285.0, 297.0, 290.0, 286.0, 297.0, 282.0, 313.0, 314.0, 291.0, 285.0, 285.0, 291.0, 282.0, 300.0, 292.0, 290.0, 284.0, 298.0, 272.0, 253.0, 295.0, 284.0, 290.0, 280.0, 292.0, 287.0, 285.0, 294.0, 291.0, 285.0, 294.0, 285.0, 259.0, 266.0, 291.0, 291.0, 287.0, 289.0, 290.0, 289.0, 290.0, 289.0, 285.0, 291.0, 283.0, 290.0, 321.0, 309.0, 276.0, 300.0, 284.0, 295.0, 261.0, 264.0, 292.0, 290.0, 288.0, 288.0, 289.0, 287.0, 312.0, 318.0, 316.0, 317.0, 297.0, 290.0, 293.0, 289.0, 290.0, 286.0, 289.0, 290.0, 287.0, 295.0, 313.0, 320.0, 291.0, 285.0, 318.0, 306.0, 289.0, 293.0, 288.0, 288.0, 318.0, 309.0, 317.0, 322.0, 309.0, 318.0, 314.0, 313.0, 280.0, 293.0, 289.0, 284.0, 288.0, 291.0, 290.0, 289.0, 292.0, 290.0, 317.0, 316.0, 295.0, 286.0, 287.0, 292.0, 288.0, 291.0, 294.0, 288.0, 287.0, 289.0, 289.0, 290.0, 291.0, 282.0, 288.0, 288.0, 293.0, 289.0, 285.0, 294.0, 308.0, 322.0, 294.0, 288.0, 284.0, 292.0, 289.0, 287.0, 267.0, 263.0, 296.0, 286.0, 289.0, 290.0, 294.0, 288.0, 291.0, 288.0, 293.0, 289.0, 318.0, 315.0, 290.0, 292.0, 285.0, 294.0, 257.0, 259.0, 295.0, 292.0, 311.0, 316.0, 321.0, 306.0, 294.0, 288.0, 288.0, 291.0, 277.0, 293.0, 316.0, 308.0, 262.0, 260.0, 290.0, 291.0, 254.0, 265.0, 315.0, 315.0, 308.0, 322.0, 289.0, 290.0, 289.0, 290.0, 289.0, 293.0, 290.0, 292.0, 288.0, 291.0, 283.0, 290.0, 291.0, 285.0, 274.0, 299.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7029979193287846, "mean_inference_ms": 1.2592075008193908, "mean_action_processing_ms": 0.1345427518722338, "mean_env_wait_ms": 0.846499986824968, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 583.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 291.95}, "hist_stats": {"episode_reward": [582.0, 582.0, 576.0, 573.0, 582.0, 576.0, 579.0, 627.0, 576.0, 576.0, 582.0, 582.0, 582.0, 525.0, 579.0, 570.0, 579.0, 579.0, 576.0, 579.0, 525.0, 582.0, 576.0, 579.0, 579.0, 576.0, 573.0, 630.0, 576.0, 579.0, 525.0, 582.0, 576.0, 576.0, 630.0, 633.0, 587.0, 582.0, 576.0, 579.0, 582.0, 633.0, 576.0, 624.0, 582.0, 576.0, 627.0, 639.0, 627.0, 627.0, 573.0, 573.0, 579.0, 579.0, 582.0, 633.0, 581.0, 579.0, 579.0, 582.0, 576.0, 579.0, 573.0, 576.0, 582.0, 579.0, 630.0, 582.0, 576.0, 576.0, 530.0, 582.0, 579.0, 582.0, 579.0, 582.0, 633.0, 582.0, 579.0, 516.0, 587.0, 627.0, 627.0, 582.0, 579.0, 570.0, 624.0, 522.0, 581.0, 519.0, 630.0, 630.0, 579.0, 579.0, 582.0, 582.0, 579.0, 573.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 294.0, 288.0, 287.0, 289.0, 291.0, 282.0, 285.0, 297.0, 290.0, 286.0, 297.0, 282.0, 313.0, 314.0, 291.0, 285.0, 285.0, 291.0, 282.0, 300.0, 292.0, 290.0, 284.0, 298.0, 272.0, 253.0, 295.0, 284.0, 290.0, 280.0, 292.0, 287.0, 285.0, 294.0, 291.0, 285.0, 294.0, 285.0, 259.0, 266.0, 291.0, 291.0, 287.0, 289.0, 290.0, 289.0, 290.0, 289.0, 285.0, 291.0, 283.0, 290.0, 321.0, 309.0, 276.0, 300.0, 284.0, 295.0, 261.0, 264.0, 292.0, 290.0, 288.0, 288.0, 289.0, 287.0, 312.0, 318.0, 316.0, 317.0, 297.0, 290.0, 293.0, 289.0, 290.0, 286.0, 289.0, 290.0, 287.0, 295.0, 313.0, 320.0, 291.0, 285.0, 318.0, 306.0, 289.0, 293.0, 288.0, 288.0, 318.0, 309.0, 317.0, 322.0, 309.0, 318.0, 314.0, 313.0, 280.0, 293.0, 289.0, 284.0, 288.0, 291.0, 290.0, 289.0, 292.0, 290.0, 317.0, 316.0, 295.0, 286.0, 287.0, 292.0, 288.0, 291.0, 294.0, 288.0, 287.0, 289.0, 289.0, 290.0, 291.0, 282.0, 288.0, 288.0, 293.0, 289.0, 285.0, 294.0, 308.0, 322.0, 294.0, 288.0, 284.0, 292.0, 289.0, 287.0, 267.0, 263.0, 296.0, 286.0, 289.0, 290.0, 294.0, 288.0, 291.0, 288.0, 293.0, 289.0, 318.0, 315.0, 290.0, 292.0, 285.0, 294.0, 257.0, 259.0, 295.0, 292.0, 311.0, 316.0, 321.0, 306.0, 294.0, 288.0, 288.0, 291.0, 277.0, 293.0, 316.0, 308.0, 262.0, 260.0, 290.0, 291.0, 254.0, 265.0, 315.0, 315.0, 308.0, 322.0, 289.0, 290.0, 289.0, 290.0, 289.0, 293.0, 290.0, 292.0, 288.0, 291.0, 283.0, 290.0, 291.0, 285.0, 274.0, 299.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7029979193287846, "mean_inference_ms": 1.2592075008193908, "mean_action_processing_ms": 0.1345427518722338, "mean_env_wait_ms": 0.846499986824968, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7475200, "num_agent_steps_trained": 7475200, "num_env_steps_sampled": 3737600, "num_env_steps_trained": 3737600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3737600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7475200, "timers": {"training_iteration_time_ms": 3645.821, "learn_time_ms": 1146.461, "learn_throughput": 11164.789, "synch_weights_time_ms": 11.016}, "counters": {"num_env_steps_sampled": 3737600, "num_env_steps_trained": 3737600, "num_agent_steps_sampled": 7475200, "num_agent_steps_trained": 7475200}, "done": false, "episodes_total": 9344, "training_iteration": 292, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-26", "timestamp": 1666581566, "time_this_iter_s": 3.722670793533325, "time_total_s": 1123.7556729316711, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1123.7556729316711, "timesteps_since_restore": 0, "iterations_since_restore": 292, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.683333333333334, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 203.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 180.32, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.34, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.92, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.13, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.72, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.56, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.01, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.56, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.56, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0003355878870934248, "policy_loss": -9.82009805738926e-06, "vf_loss": 7.542318344116211, "vf_explained_var": 0.6062654256820679, "kl": 0.002053479664027691, "entropy": 0.8176416158676147, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3750400, "num_env_steps_trained": 3750400, "num_agent_steps_sampled": 7500800, "num_agent_steps_trained": 7500800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 586.32, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 293.16}, "custom_metrics": {"sparse_reward_mean": 203.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 180.32, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.34, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.92, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.13, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.72, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.56, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.01, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.56, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.56, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 576.0, 630.0, 633.0, 587.0, 582.0, 576.0, 579.0, 582.0, 633.0, 576.0, 624.0, 582.0, 576.0, 627.0, 639.0, 627.0, 627.0, 573.0, 573.0, 579.0, 579.0, 582.0, 633.0, 581.0, 579.0, 579.0, 582.0, 576.0, 579.0, 573.0, 576.0, 582.0, 579.0, 630.0, 582.0, 576.0, 576.0, 530.0, 582.0, 579.0, 582.0, 579.0, 582.0, 633.0, 582.0, 579.0, 516.0, 587.0, 627.0, 627.0, 582.0, 579.0, 570.0, 624.0, 522.0, 581.0, 519.0, 630.0, 630.0, 579.0, 579.0, 582.0, 582.0, 579.0, 573.0, 576.0, 573.0, 570.0, 573.0, 582.0, 573.0, 579.0, 633.0, 582.0, 579.0, 624.0, 573.0, 579.0, 582.0, 587.0, 522.0, 576.0, 630.0, 570.0, 587.0, 579.0, 582.0, 576.0, 582.0, 576.0, 582.0, 579.0, 582.0, 584.0, 633.0, 582.0, 587.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 288.0, 289.0, 287.0, 312.0, 318.0, 316.0, 317.0, 297.0, 290.0, 293.0, 289.0, 290.0, 286.0, 289.0, 290.0, 287.0, 295.0, 313.0, 320.0, 291.0, 285.0, 318.0, 306.0, 289.0, 293.0, 288.0, 288.0, 318.0, 309.0, 317.0, 322.0, 309.0, 318.0, 314.0, 313.0, 280.0, 293.0, 289.0, 284.0, 288.0, 291.0, 290.0, 289.0, 292.0, 290.0, 317.0, 316.0, 295.0, 286.0, 287.0, 292.0, 288.0, 291.0, 294.0, 288.0, 287.0, 289.0, 289.0, 290.0, 291.0, 282.0, 288.0, 288.0, 293.0, 289.0, 285.0, 294.0, 308.0, 322.0, 294.0, 288.0, 284.0, 292.0, 289.0, 287.0, 267.0, 263.0, 296.0, 286.0, 289.0, 290.0, 294.0, 288.0, 291.0, 288.0, 293.0, 289.0, 318.0, 315.0, 290.0, 292.0, 285.0, 294.0, 257.0, 259.0, 295.0, 292.0, 311.0, 316.0, 321.0, 306.0, 294.0, 288.0, 288.0, 291.0, 277.0, 293.0, 316.0, 308.0, 262.0, 260.0, 290.0, 291.0, 254.0, 265.0, 315.0, 315.0, 308.0, 322.0, 289.0, 290.0, 289.0, 290.0, 289.0, 293.0, 290.0, 292.0, 288.0, 291.0, 283.0, 290.0, 291.0, 285.0, 274.0, 299.0, 288.0, 282.0, 288.0, 285.0, 288.0, 294.0, 288.0, 285.0, 290.0, 289.0, 317.0, 316.0, 290.0, 292.0, 291.0, 288.0, 310.0, 314.0, 289.0, 284.0, 293.0, 286.0, 295.0, 287.0, 288.0, 299.0, 259.0, 263.0, 286.0, 290.0, 310.0, 320.0, 288.0, 282.0, 295.0, 292.0, 295.0, 284.0, 292.0, 290.0, 288.0, 288.0, 291.0, 291.0, 285.0, 291.0, 292.0, 290.0, 290.0, 289.0, 291.0, 291.0, 290.0, 294.0, 314.0, 319.0, 289.0, 293.0, 292.0, 295.0, 293.0, 286.0, 294.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7029100265072316, "mean_inference_ms": 1.2589701047132034, "mean_action_processing_ms": 0.1345321957448095, "mean_env_wait_ms": 0.8463812083404905, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 586.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 293.16}, "hist_stats": {"episode_reward": [576.0, 576.0, 630.0, 633.0, 587.0, 582.0, 576.0, 579.0, 582.0, 633.0, 576.0, 624.0, 582.0, 576.0, 627.0, 639.0, 627.0, 627.0, 573.0, 573.0, 579.0, 579.0, 582.0, 633.0, 581.0, 579.0, 579.0, 582.0, 576.0, 579.0, 573.0, 576.0, 582.0, 579.0, 630.0, 582.0, 576.0, 576.0, 530.0, 582.0, 579.0, 582.0, 579.0, 582.0, 633.0, 582.0, 579.0, 516.0, 587.0, 627.0, 627.0, 582.0, 579.0, 570.0, 624.0, 522.0, 581.0, 519.0, 630.0, 630.0, 579.0, 579.0, 582.0, 582.0, 579.0, 573.0, 576.0, 573.0, 570.0, 573.0, 582.0, 573.0, 579.0, 633.0, 582.0, 579.0, 624.0, 573.0, 579.0, 582.0, 587.0, 522.0, 576.0, 630.0, 570.0, 587.0, 579.0, 582.0, 576.0, 582.0, 576.0, 582.0, 579.0, 582.0, 584.0, 633.0, 582.0, 587.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 288.0, 289.0, 287.0, 312.0, 318.0, 316.0, 317.0, 297.0, 290.0, 293.0, 289.0, 290.0, 286.0, 289.0, 290.0, 287.0, 295.0, 313.0, 320.0, 291.0, 285.0, 318.0, 306.0, 289.0, 293.0, 288.0, 288.0, 318.0, 309.0, 317.0, 322.0, 309.0, 318.0, 314.0, 313.0, 280.0, 293.0, 289.0, 284.0, 288.0, 291.0, 290.0, 289.0, 292.0, 290.0, 317.0, 316.0, 295.0, 286.0, 287.0, 292.0, 288.0, 291.0, 294.0, 288.0, 287.0, 289.0, 289.0, 290.0, 291.0, 282.0, 288.0, 288.0, 293.0, 289.0, 285.0, 294.0, 308.0, 322.0, 294.0, 288.0, 284.0, 292.0, 289.0, 287.0, 267.0, 263.0, 296.0, 286.0, 289.0, 290.0, 294.0, 288.0, 291.0, 288.0, 293.0, 289.0, 318.0, 315.0, 290.0, 292.0, 285.0, 294.0, 257.0, 259.0, 295.0, 292.0, 311.0, 316.0, 321.0, 306.0, 294.0, 288.0, 288.0, 291.0, 277.0, 293.0, 316.0, 308.0, 262.0, 260.0, 290.0, 291.0, 254.0, 265.0, 315.0, 315.0, 308.0, 322.0, 289.0, 290.0, 289.0, 290.0, 289.0, 293.0, 290.0, 292.0, 288.0, 291.0, 283.0, 290.0, 291.0, 285.0, 274.0, 299.0, 288.0, 282.0, 288.0, 285.0, 288.0, 294.0, 288.0, 285.0, 290.0, 289.0, 317.0, 316.0, 290.0, 292.0, 291.0, 288.0, 310.0, 314.0, 289.0, 284.0, 293.0, 286.0, 295.0, 287.0, 288.0, 299.0, 259.0, 263.0, 286.0, 290.0, 310.0, 320.0, 288.0, 282.0, 295.0, 292.0, 295.0, 284.0, 292.0, 290.0, 288.0, 288.0, 291.0, 291.0, 285.0, 291.0, 292.0, 290.0, 290.0, 289.0, 291.0, 291.0, 290.0, 294.0, 314.0, 319.0, 289.0, 293.0, 292.0, 295.0, 293.0, 286.0, 294.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7029100265072316, "mean_inference_ms": 1.2589701047132034, "mean_action_processing_ms": 0.1345321957448095, "mean_env_wait_ms": 0.8463812083404905, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7500800, "num_agent_steps_trained": 7500800, "num_env_steps_sampled": 3750400, "num_env_steps_trained": 3750400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3750400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7500800, "timers": {"training_iteration_time_ms": 3628.866, "learn_time_ms": 1127.478, "learn_throughput": 11352.771, "synch_weights_time_ms": 11.607}, "counters": {"num_env_steps_sampled": 3750400, "num_env_steps_trained": 3750400, "num_agent_steps_sampled": 7500800, "num_agent_steps_trained": 7500800}, "done": false, "episodes_total": 9376, "training_iteration": 293, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-30", "timestamp": 1666581570, "time_this_iter_s": 3.6530778408050537, "time_total_s": 1127.4087507724762, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1127.4087507724762, "timesteps_since_restore": 0, "iterations_since_restore": 293, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.82, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 201.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.77, "shaped_reward_min": 156, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.24, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.04, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.07, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.82, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.78, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.02, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.03, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.78, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.78, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0026967034209519625, "policy_loss": -0.0030466155149042606, "vf_loss": 7.593386650085449, "vf_explained_var": 0.6006097793579102, "kl": 0.0022225300781428814, "entropy": 0.81884765625, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3763200, "num_env_steps_trained": 3763200, "num_agent_steps_sampled": 7526400, "num_agent_steps_trained": 7526400}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 516.0, "episode_reward_mean": 582.97, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 291.485}, "custom_metrics": {"sparse_reward_mean": 201.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.77, "shaped_reward_min": 156, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.24, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.04, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.07, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.82, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.78, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.02, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.03, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.78, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.78, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 630.0, 582.0, 576.0, 576.0, 530.0, 582.0, 579.0, 582.0, 579.0, 582.0, 633.0, 582.0, 579.0, 516.0, 587.0, 627.0, 627.0, 582.0, 579.0, 570.0, 624.0, 522.0, 581.0, 519.0, 630.0, 630.0, 579.0, 579.0, 582.0, 582.0, 579.0, 573.0, 576.0, 573.0, 570.0, 573.0, 582.0, 573.0, 579.0, 633.0, 582.0, 579.0, 624.0, 573.0, 579.0, 582.0, 587.0, 522.0, 576.0, 630.0, 570.0, 587.0, 579.0, 582.0, 576.0, 582.0, 576.0, 582.0, 579.0, 582.0, 584.0, 633.0, 582.0, 587.0, 579.0, 582.0, 582.0, 582.0, 579.0, 573.0, 584.0, 573.0, 576.0, 633.0, 573.0, 579.0, 590.0, 573.0, 582.0, 582.0, 576.0, 582.0, 579.0, 573.0, 582.0, 522.0, 573.0, 630.0, 584.0, 582.0, 579.0, 582.0, 579.0, 587.0, 579.0, 582.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 285.0, 294.0, 308.0, 322.0, 294.0, 288.0, 284.0, 292.0, 289.0, 287.0, 267.0, 263.0, 296.0, 286.0, 289.0, 290.0, 294.0, 288.0, 291.0, 288.0, 293.0, 289.0, 318.0, 315.0, 290.0, 292.0, 285.0, 294.0, 257.0, 259.0, 295.0, 292.0, 311.0, 316.0, 321.0, 306.0, 294.0, 288.0, 288.0, 291.0, 277.0, 293.0, 316.0, 308.0, 262.0, 260.0, 290.0, 291.0, 254.0, 265.0, 315.0, 315.0, 308.0, 322.0, 289.0, 290.0, 289.0, 290.0, 289.0, 293.0, 290.0, 292.0, 288.0, 291.0, 283.0, 290.0, 291.0, 285.0, 274.0, 299.0, 288.0, 282.0, 288.0, 285.0, 288.0, 294.0, 288.0, 285.0, 290.0, 289.0, 317.0, 316.0, 290.0, 292.0, 291.0, 288.0, 310.0, 314.0, 289.0, 284.0, 293.0, 286.0, 295.0, 287.0, 288.0, 299.0, 259.0, 263.0, 286.0, 290.0, 310.0, 320.0, 288.0, 282.0, 295.0, 292.0, 295.0, 284.0, 292.0, 290.0, 288.0, 288.0, 291.0, 291.0, 285.0, 291.0, 292.0, 290.0, 290.0, 289.0, 291.0, 291.0, 290.0, 294.0, 314.0, 319.0, 289.0, 293.0, 292.0, 295.0, 293.0, 286.0, 294.0, 288.0, 284.0, 298.0, 291.0, 291.0, 291.0, 288.0, 291.0, 282.0, 284.0, 300.0, 284.0, 289.0, 285.0, 291.0, 318.0, 315.0, 282.0, 291.0, 294.0, 285.0, 294.0, 296.0, 291.0, 282.0, 291.0, 291.0, 291.0, 291.0, 283.0, 293.0, 296.0, 286.0, 291.0, 288.0, 280.0, 293.0, 287.0, 295.0, 261.0, 261.0, 284.0, 289.0, 311.0, 319.0, 295.0, 289.0, 293.0, 289.0, 294.0, 285.0, 291.0, 291.0, 292.0, 287.0, 292.0, 295.0, 290.0, 289.0, 291.0, 291.0, 291.0, 288.0, 309.0, 321.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7028321055068703, "mean_inference_ms": 1.258741796548919, "mean_action_processing_ms": 0.13452419454572812, "mean_env_wait_ms": 0.8462833252091105, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 516.0, "episode_reward_mean": 582.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 291.485}, "hist_stats": {"episode_reward": [582.0, 579.0, 630.0, 582.0, 576.0, 576.0, 530.0, 582.0, 579.0, 582.0, 579.0, 582.0, 633.0, 582.0, 579.0, 516.0, 587.0, 627.0, 627.0, 582.0, 579.0, 570.0, 624.0, 522.0, 581.0, 519.0, 630.0, 630.0, 579.0, 579.0, 582.0, 582.0, 579.0, 573.0, 576.0, 573.0, 570.0, 573.0, 582.0, 573.0, 579.0, 633.0, 582.0, 579.0, 624.0, 573.0, 579.0, 582.0, 587.0, 522.0, 576.0, 630.0, 570.0, 587.0, 579.0, 582.0, 576.0, 582.0, 576.0, 582.0, 579.0, 582.0, 584.0, 633.0, 582.0, 587.0, 579.0, 582.0, 582.0, 582.0, 579.0, 573.0, 584.0, 573.0, 576.0, 633.0, 573.0, 579.0, 590.0, 573.0, 582.0, 582.0, 576.0, 582.0, 579.0, 573.0, 582.0, 522.0, 573.0, 630.0, 584.0, 582.0, 579.0, 582.0, 579.0, 587.0, 579.0, 582.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 285.0, 294.0, 308.0, 322.0, 294.0, 288.0, 284.0, 292.0, 289.0, 287.0, 267.0, 263.0, 296.0, 286.0, 289.0, 290.0, 294.0, 288.0, 291.0, 288.0, 293.0, 289.0, 318.0, 315.0, 290.0, 292.0, 285.0, 294.0, 257.0, 259.0, 295.0, 292.0, 311.0, 316.0, 321.0, 306.0, 294.0, 288.0, 288.0, 291.0, 277.0, 293.0, 316.0, 308.0, 262.0, 260.0, 290.0, 291.0, 254.0, 265.0, 315.0, 315.0, 308.0, 322.0, 289.0, 290.0, 289.0, 290.0, 289.0, 293.0, 290.0, 292.0, 288.0, 291.0, 283.0, 290.0, 291.0, 285.0, 274.0, 299.0, 288.0, 282.0, 288.0, 285.0, 288.0, 294.0, 288.0, 285.0, 290.0, 289.0, 317.0, 316.0, 290.0, 292.0, 291.0, 288.0, 310.0, 314.0, 289.0, 284.0, 293.0, 286.0, 295.0, 287.0, 288.0, 299.0, 259.0, 263.0, 286.0, 290.0, 310.0, 320.0, 288.0, 282.0, 295.0, 292.0, 295.0, 284.0, 292.0, 290.0, 288.0, 288.0, 291.0, 291.0, 285.0, 291.0, 292.0, 290.0, 290.0, 289.0, 291.0, 291.0, 290.0, 294.0, 314.0, 319.0, 289.0, 293.0, 292.0, 295.0, 293.0, 286.0, 294.0, 288.0, 284.0, 298.0, 291.0, 291.0, 291.0, 288.0, 291.0, 282.0, 284.0, 300.0, 284.0, 289.0, 285.0, 291.0, 318.0, 315.0, 282.0, 291.0, 294.0, 285.0, 294.0, 296.0, 291.0, 282.0, 291.0, 291.0, 291.0, 291.0, 283.0, 293.0, 296.0, 286.0, 291.0, 288.0, 280.0, 293.0, 287.0, 295.0, 261.0, 261.0, 284.0, 289.0, 311.0, 319.0, 295.0, 289.0, 293.0, 289.0, 294.0, 285.0, 291.0, 291.0, 292.0, 287.0, 292.0, 295.0, 290.0, 289.0, 291.0, 291.0, 291.0, 288.0, 309.0, 321.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7028321055068703, "mean_inference_ms": 1.258741796548919, "mean_action_processing_ms": 0.13452419454572812, "mean_env_wait_ms": 0.8462833252091105, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7526400, "num_agent_steps_trained": 7526400, "num_env_steps_sampled": 3763200, "num_env_steps_trained": 3763200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3763200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7526400, "timers": {"training_iteration_time_ms": 3627.155, "learn_time_ms": 1132.182, "learn_throughput": 11305.603, "synch_weights_time_ms": 11.691}, "counters": {"num_env_steps_sampled": 3763200, "num_env_steps_trained": 3763200, "num_agent_steps_sampled": 7526400, "num_agent_steps_trained": 7526400}, "done": false, "episodes_total": 9408, "training_iteration": 294, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-33", "timestamp": 1666581573, "time_this_iter_s": 3.654649496078491, "time_total_s": 1131.0634002685547, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1131.0634002685547, "timesteps_since_restore": 0, "iterations_since_restore": 294, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.25, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 201.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 180.01, "shaped_reward_min": 162, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.5, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.79, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.3, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.6, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.06, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.4, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.69, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.06, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.4, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.06, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.4, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0013797101564705372, "policy_loss": -0.0017297662561759353, "vf_loss": 7.56453800201416, "vf_explained_var": 0.5991897583007812, "kl": 0.002123283687978983, "entropy": 0.8127896785736084, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3776000, "num_env_steps_trained": 3776000, "num_agent_steps_sampled": 7552000, "num_agent_steps_trained": 7552000}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 522.0, "episode_reward_mean": 583.21, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 291.605}, "custom_metrics": {"sparse_reward_mean": 201.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 180.01, "shaped_reward_min": 162, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.5, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.79, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.3, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.6, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.06, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.4, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.69, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.06, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.4, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.06, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.4, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 573.0, 576.0, 573.0, 570.0, 573.0, 582.0, 573.0, 579.0, 633.0, 582.0, 579.0, 624.0, 573.0, 579.0, 582.0, 587.0, 522.0, 576.0, 630.0, 570.0, 587.0, 579.0, 582.0, 576.0, 582.0, 576.0, 582.0, 579.0, 582.0, 584.0, 633.0, 582.0, 587.0, 579.0, 582.0, 582.0, 582.0, 579.0, 573.0, 584.0, 573.0, 576.0, 633.0, 573.0, 579.0, 590.0, 573.0, 582.0, 582.0, 576.0, 582.0, 579.0, 573.0, 582.0, 522.0, 573.0, 630.0, 584.0, 582.0, 579.0, 582.0, 579.0, 587.0, 579.0, 582.0, 579.0, 630.0, 582.0, 576.0, 570.0, 573.0, 587.0, 587.0, 581.0, 582.0, 582.0, 522.0, 630.0, 582.0, 576.0, 579.0, 576.0, 579.0, 579.0, 587.0, 579.0, 582.0, 579.0, 630.0, 633.0, 530.0, 579.0, 627.0, 573.0, 587.0, 576.0, 630.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 283.0, 290.0, 291.0, 285.0, 274.0, 299.0, 288.0, 282.0, 288.0, 285.0, 288.0, 294.0, 288.0, 285.0, 290.0, 289.0, 317.0, 316.0, 290.0, 292.0, 291.0, 288.0, 310.0, 314.0, 289.0, 284.0, 293.0, 286.0, 295.0, 287.0, 288.0, 299.0, 259.0, 263.0, 286.0, 290.0, 310.0, 320.0, 288.0, 282.0, 295.0, 292.0, 295.0, 284.0, 292.0, 290.0, 288.0, 288.0, 291.0, 291.0, 285.0, 291.0, 292.0, 290.0, 290.0, 289.0, 291.0, 291.0, 290.0, 294.0, 314.0, 319.0, 289.0, 293.0, 292.0, 295.0, 293.0, 286.0, 294.0, 288.0, 284.0, 298.0, 291.0, 291.0, 291.0, 288.0, 291.0, 282.0, 284.0, 300.0, 284.0, 289.0, 285.0, 291.0, 318.0, 315.0, 282.0, 291.0, 294.0, 285.0, 294.0, 296.0, 291.0, 282.0, 291.0, 291.0, 291.0, 291.0, 283.0, 293.0, 296.0, 286.0, 291.0, 288.0, 280.0, 293.0, 287.0, 295.0, 261.0, 261.0, 284.0, 289.0, 311.0, 319.0, 295.0, 289.0, 293.0, 289.0, 294.0, 285.0, 291.0, 291.0, 292.0, 287.0, 292.0, 295.0, 290.0, 289.0, 291.0, 291.0, 291.0, 288.0, 309.0, 321.0, 296.0, 286.0, 285.0, 291.0, 283.0, 287.0, 288.0, 285.0, 302.0, 285.0, 295.0, 292.0, 293.0, 288.0, 291.0, 291.0, 292.0, 290.0, 265.0, 257.0, 315.0, 315.0, 287.0, 295.0, 291.0, 285.0, 290.0, 289.0, 295.0, 281.0, 289.0, 290.0, 279.0, 300.0, 292.0, 295.0, 290.0, 289.0, 289.0, 293.0, 295.0, 284.0, 315.0, 315.0, 313.0, 320.0, 268.0, 262.0, 288.0, 291.0, 322.0, 305.0, 281.0, 292.0, 290.0, 297.0, 284.0, 292.0, 319.0, 311.0, 293.0, 283.0, 298.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.702779161330629, "mean_inference_ms": 1.2585260649777843, "mean_action_processing_ms": 0.13451059035678656, "mean_env_wait_ms": 0.846147640378893, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 522.0, "episode_reward_mean": 583.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 291.605}, "hist_stats": {"episode_reward": [579.0, 573.0, 576.0, 573.0, 570.0, 573.0, 582.0, 573.0, 579.0, 633.0, 582.0, 579.0, 624.0, 573.0, 579.0, 582.0, 587.0, 522.0, 576.0, 630.0, 570.0, 587.0, 579.0, 582.0, 576.0, 582.0, 576.0, 582.0, 579.0, 582.0, 584.0, 633.0, 582.0, 587.0, 579.0, 582.0, 582.0, 582.0, 579.0, 573.0, 584.0, 573.0, 576.0, 633.0, 573.0, 579.0, 590.0, 573.0, 582.0, 582.0, 576.0, 582.0, 579.0, 573.0, 582.0, 522.0, 573.0, 630.0, 584.0, 582.0, 579.0, 582.0, 579.0, 587.0, 579.0, 582.0, 579.0, 630.0, 582.0, 576.0, 570.0, 573.0, 587.0, 587.0, 581.0, 582.0, 582.0, 522.0, 630.0, 582.0, 576.0, 579.0, 576.0, 579.0, 579.0, 587.0, 579.0, 582.0, 579.0, 630.0, 633.0, 530.0, 579.0, 627.0, 573.0, 587.0, 576.0, 630.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 283.0, 290.0, 291.0, 285.0, 274.0, 299.0, 288.0, 282.0, 288.0, 285.0, 288.0, 294.0, 288.0, 285.0, 290.0, 289.0, 317.0, 316.0, 290.0, 292.0, 291.0, 288.0, 310.0, 314.0, 289.0, 284.0, 293.0, 286.0, 295.0, 287.0, 288.0, 299.0, 259.0, 263.0, 286.0, 290.0, 310.0, 320.0, 288.0, 282.0, 295.0, 292.0, 295.0, 284.0, 292.0, 290.0, 288.0, 288.0, 291.0, 291.0, 285.0, 291.0, 292.0, 290.0, 290.0, 289.0, 291.0, 291.0, 290.0, 294.0, 314.0, 319.0, 289.0, 293.0, 292.0, 295.0, 293.0, 286.0, 294.0, 288.0, 284.0, 298.0, 291.0, 291.0, 291.0, 288.0, 291.0, 282.0, 284.0, 300.0, 284.0, 289.0, 285.0, 291.0, 318.0, 315.0, 282.0, 291.0, 294.0, 285.0, 294.0, 296.0, 291.0, 282.0, 291.0, 291.0, 291.0, 291.0, 283.0, 293.0, 296.0, 286.0, 291.0, 288.0, 280.0, 293.0, 287.0, 295.0, 261.0, 261.0, 284.0, 289.0, 311.0, 319.0, 295.0, 289.0, 293.0, 289.0, 294.0, 285.0, 291.0, 291.0, 292.0, 287.0, 292.0, 295.0, 290.0, 289.0, 291.0, 291.0, 291.0, 288.0, 309.0, 321.0, 296.0, 286.0, 285.0, 291.0, 283.0, 287.0, 288.0, 285.0, 302.0, 285.0, 295.0, 292.0, 293.0, 288.0, 291.0, 291.0, 292.0, 290.0, 265.0, 257.0, 315.0, 315.0, 287.0, 295.0, 291.0, 285.0, 290.0, 289.0, 295.0, 281.0, 289.0, 290.0, 279.0, 300.0, 292.0, 295.0, 290.0, 289.0, 289.0, 293.0, 295.0, 284.0, 315.0, 315.0, 313.0, 320.0, 268.0, 262.0, 288.0, 291.0, 322.0, 305.0, 281.0, 292.0, 290.0, 297.0, 284.0, 292.0, 319.0, 311.0, 293.0, 283.0, 298.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.702779161330629, "mean_inference_ms": 1.2585260649777843, "mean_action_processing_ms": 0.13451059035678656, "mean_env_wait_ms": 0.846147640378893, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7552000, "num_agent_steps_trained": 7552000, "num_env_steps_sampled": 3776000, "num_env_steps_trained": 3776000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3776000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7552000, "timers": {"training_iteration_time_ms": 3633.479, "learn_time_ms": 1129.111, "learn_throughput": 11336.351, "synch_weights_time_ms": 12.533}, "counters": {"num_env_steps_sampled": 3776000, "num_env_steps_trained": 3776000, "num_agent_steps_sampled": 7552000, "num_agent_steps_trained": 7552000}, "done": false, "episodes_total": 9440, "training_iteration": 295, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-37", "timestamp": 1666581577, "time_this_iter_s": 3.774996042251587, "time_total_s": 1134.8383963108063, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1134.8383963108063, "timesteps_since_restore": 0, "iterations_since_restore": 295, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.24, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 200.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 180.0, "shaped_reward_min": 114, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.26, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.0, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.06, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.6, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.13, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.81, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.71, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.6, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.6, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0031795313116163015, "policy_loss": 0.0028263104613870382, "vf_loss": 7.591133117675781, "vf_explained_var": 0.6210389137268066, "kl": 0.002431360073387623, "entropy": 0.8117805123329163, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3788800, "num_env_steps_trained": 3788800, "num_agent_steps_sampled": 7577600, "num_agent_steps_trained": 7577600}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 354.0, "episode_reward_mean": 581.2, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 290.6}, "custom_metrics": {"sparse_reward_mean": 200.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 180.0, "shaped_reward_min": 114, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.26, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.0, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.06, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.6, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.13, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.81, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.71, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.6, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.6, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 579.0, 582.0, 582.0, 582.0, 579.0, 573.0, 584.0, 573.0, 576.0, 633.0, 573.0, 579.0, 590.0, 573.0, 582.0, 582.0, 576.0, 582.0, 579.0, 573.0, 582.0, 522.0, 573.0, 630.0, 584.0, 582.0, 579.0, 582.0, 579.0, 587.0, 579.0, 582.0, 579.0, 630.0, 582.0, 576.0, 570.0, 573.0, 587.0, 587.0, 581.0, 582.0, 582.0, 522.0, 630.0, 582.0, 576.0, 579.0, 576.0, 579.0, 579.0, 587.0, 579.0, 582.0, 579.0, 630.0, 633.0, 530.0, 579.0, 627.0, 573.0, 587.0, 576.0, 630.0, 576.0, 582.0, 576.0, 579.0, 590.0, 630.0, 354.0, 584.0, 576.0, 587.0, 582.0, 582.0, 582.0, 584.0, 579.0, 627.0, 584.0, 576.0, 630.0, 633.0, 522.0, 582.0, 627.0, 582.0, 530.0, 579.0, 636.0, 519.0, 536.0, 582.0, 584.0, 584.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 292.0, 295.0, 293.0, 286.0, 294.0, 288.0, 284.0, 298.0, 291.0, 291.0, 291.0, 288.0, 291.0, 282.0, 284.0, 300.0, 284.0, 289.0, 285.0, 291.0, 318.0, 315.0, 282.0, 291.0, 294.0, 285.0, 294.0, 296.0, 291.0, 282.0, 291.0, 291.0, 291.0, 291.0, 283.0, 293.0, 296.0, 286.0, 291.0, 288.0, 280.0, 293.0, 287.0, 295.0, 261.0, 261.0, 284.0, 289.0, 311.0, 319.0, 295.0, 289.0, 293.0, 289.0, 294.0, 285.0, 291.0, 291.0, 292.0, 287.0, 292.0, 295.0, 290.0, 289.0, 291.0, 291.0, 291.0, 288.0, 309.0, 321.0, 296.0, 286.0, 285.0, 291.0, 283.0, 287.0, 288.0, 285.0, 302.0, 285.0, 295.0, 292.0, 293.0, 288.0, 291.0, 291.0, 292.0, 290.0, 265.0, 257.0, 315.0, 315.0, 287.0, 295.0, 291.0, 285.0, 290.0, 289.0, 295.0, 281.0, 289.0, 290.0, 279.0, 300.0, 292.0, 295.0, 290.0, 289.0, 289.0, 293.0, 295.0, 284.0, 315.0, 315.0, 313.0, 320.0, 268.0, 262.0, 288.0, 291.0, 322.0, 305.0, 281.0, 292.0, 290.0, 297.0, 284.0, 292.0, 319.0, 311.0, 293.0, 283.0, 298.0, 284.0, 286.0, 290.0, 290.0, 289.0, 294.0, 296.0, 313.0, 317.0, 179.0, 175.0, 282.0, 302.0, 287.0, 289.0, 289.0, 298.0, 282.0, 300.0, 292.0, 290.0, 295.0, 287.0, 292.0, 292.0, 283.0, 296.0, 313.0, 314.0, 296.0, 288.0, 288.0, 288.0, 315.0, 315.0, 312.0, 321.0, 263.0, 259.0, 289.0, 293.0, 316.0, 311.0, 290.0, 292.0, 269.0, 261.0, 290.0, 289.0, 323.0, 313.0, 256.0, 263.0, 271.0, 265.0, 286.0, 296.0, 297.0, 287.0, 305.0, 279.0, 289.0, 290.0, 287.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7028229567314425, "mean_inference_ms": 1.2584377676811593, "mean_action_processing_ms": 0.13449847542232196, "mean_env_wait_ms": 0.8460929726919433, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 354.0, "episode_reward_mean": 581.2, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 290.6}, "hist_stats": {"episode_reward": [582.0, 587.0, 579.0, 582.0, 582.0, 582.0, 579.0, 573.0, 584.0, 573.0, 576.0, 633.0, 573.0, 579.0, 590.0, 573.0, 582.0, 582.0, 576.0, 582.0, 579.0, 573.0, 582.0, 522.0, 573.0, 630.0, 584.0, 582.0, 579.0, 582.0, 579.0, 587.0, 579.0, 582.0, 579.0, 630.0, 582.0, 576.0, 570.0, 573.0, 587.0, 587.0, 581.0, 582.0, 582.0, 522.0, 630.0, 582.0, 576.0, 579.0, 576.0, 579.0, 579.0, 587.0, 579.0, 582.0, 579.0, 630.0, 633.0, 530.0, 579.0, 627.0, 573.0, 587.0, 576.0, 630.0, 576.0, 582.0, 576.0, 579.0, 590.0, 630.0, 354.0, 584.0, 576.0, 587.0, 582.0, 582.0, 582.0, 584.0, 579.0, 627.0, 584.0, 576.0, 630.0, 633.0, 522.0, 582.0, 627.0, 582.0, 530.0, 579.0, 636.0, 519.0, 536.0, 582.0, 584.0, 584.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 292.0, 295.0, 293.0, 286.0, 294.0, 288.0, 284.0, 298.0, 291.0, 291.0, 291.0, 288.0, 291.0, 282.0, 284.0, 300.0, 284.0, 289.0, 285.0, 291.0, 318.0, 315.0, 282.0, 291.0, 294.0, 285.0, 294.0, 296.0, 291.0, 282.0, 291.0, 291.0, 291.0, 291.0, 283.0, 293.0, 296.0, 286.0, 291.0, 288.0, 280.0, 293.0, 287.0, 295.0, 261.0, 261.0, 284.0, 289.0, 311.0, 319.0, 295.0, 289.0, 293.0, 289.0, 294.0, 285.0, 291.0, 291.0, 292.0, 287.0, 292.0, 295.0, 290.0, 289.0, 291.0, 291.0, 291.0, 288.0, 309.0, 321.0, 296.0, 286.0, 285.0, 291.0, 283.0, 287.0, 288.0, 285.0, 302.0, 285.0, 295.0, 292.0, 293.0, 288.0, 291.0, 291.0, 292.0, 290.0, 265.0, 257.0, 315.0, 315.0, 287.0, 295.0, 291.0, 285.0, 290.0, 289.0, 295.0, 281.0, 289.0, 290.0, 279.0, 300.0, 292.0, 295.0, 290.0, 289.0, 289.0, 293.0, 295.0, 284.0, 315.0, 315.0, 313.0, 320.0, 268.0, 262.0, 288.0, 291.0, 322.0, 305.0, 281.0, 292.0, 290.0, 297.0, 284.0, 292.0, 319.0, 311.0, 293.0, 283.0, 298.0, 284.0, 286.0, 290.0, 290.0, 289.0, 294.0, 296.0, 313.0, 317.0, 179.0, 175.0, 282.0, 302.0, 287.0, 289.0, 289.0, 298.0, 282.0, 300.0, 292.0, 290.0, 295.0, 287.0, 292.0, 292.0, 283.0, 296.0, 313.0, 314.0, 296.0, 288.0, 288.0, 288.0, 315.0, 315.0, 312.0, 321.0, 263.0, 259.0, 289.0, 293.0, 316.0, 311.0, 290.0, 292.0, 269.0, 261.0, 290.0, 289.0, 323.0, 313.0, 256.0, 263.0, 271.0, 265.0, 286.0, 296.0, 297.0, 287.0, 305.0, 279.0, 289.0, 290.0, 287.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7028229567314425, "mean_inference_ms": 1.2584377676811593, "mean_action_processing_ms": 0.13449847542232196, "mean_env_wait_ms": 0.8460929726919433, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7577600, "num_agent_steps_trained": 7577600, "num_env_steps_sampled": 3788800, "num_env_steps_trained": 3788800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3788800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7577600, "timers": {"training_iteration_time_ms": 3687.668, "learn_time_ms": 1131.973, "learn_throughput": 11307.692, "synch_weights_time_ms": 12.297}, "counters": {"num_env_steps_sampled": 3788800, "num_env_steps_trained": 3788800, "num_agent_steps_sampled": 7577600, "num_agent_steps_trained": 7577600}, "done": false, "episodes_total": 9472, "training_iteration": 296, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-42", "timestamp": 1666581582, "time_this_iter_s": 4.240463972091675, "time_total_s": 1139.078860282898, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1139.078860282898, "timesteps_since_restore": 0, "iterations_since_restore": 296, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.483333333333334, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 202.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 180.9, "shaped_reward_min": 114, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.27, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.08, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.06, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.87, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.86, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.64, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.12, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.81, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.79, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.86, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.64, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.86, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.64, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009865972679108381, "policy_loss": -0.001332993502728641, "vf_loss": 7.5051116943359375, "vf_explained_var": 0.6193879842758179, "kl": 0.0017398454947397113, "entropy": 0.8082244396209717, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3801600, "num_env_steps_trained": 3801600, "num_agent_steps_sampled": 7603200, "num_agent_steps_trained": 7603200}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 354.0, "episode_reward_mean": 586.1, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 293.05}, "custom_metrics": {"sparse_reward_mean": 202.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 180.9, "shaped_reward_min": 114, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.27, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.08, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.06, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.87, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.86, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.64, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.12, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.81, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.79, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.86, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.64, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.86, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.64, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 579.0, 630.0, 582.0, 576.0, 570.0, 573.0, 587.0, 587.0, 581.0, 582.0, 582.0, 522.0, 630.0, 582.0, 576.0, 579.0, 576.0, 579.0, 579.0, 587.0, 579.0, 582.0, 579.0, 630.0, 633.0, 530.0, 579.0, 627.0, 573.0, 587.0, 576.0, 630.0, 576.0, 582.0, 576.0, 579.0, 590.0, 630.0, 354.0, 584.0, 576.0, 587.0, 582.0, 582.0, 582.0, 584.0, 579.0, 627.0, 584.0, 576.0, 630.0, 633.0, 522.0, 582.0, 627.0, 582.0, 530.0, 579.0, 636.0, 519.0, 536.0, 582.0, 584.0, 584.0, 579.0, 579.0, 627.0, 570.0, 582.0, 630.0, 630.0, 630.0, 582.0, 627.0, 576.0, 627.0, 582.0, 582.0, 582.0, 579.0, 630.0, 590.0, 576.0, 570.0, 587.0, 579.0, 627.0, 636.0, 630.0, 630.0, 570.0, 582.0, 582.0, 576.0, 584.0, 576.0, 576.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 289.0, 291.0, 291.0, 291.0, 288.0, 309.0, 321.0, 296.0, 286.0, 285.0, 291.0, 283.0, 287.0, 288.0, 285.0, 302.0, 285.0, 295.0, 292.0, 293.0, 288.0, 291.0, 291.0, 292.0, 290.0, 265.0, 257.0, 315.0, 315.0, 287.0, 295.0, 291.0, 285.0, 290.0, 289.0, 295.0, 281.0, 289.0, 290.0, 279.0, 300.0, 292.0, 295.0, 290.0, 289.0, 289.0, 293.0, 295.0, 284.0, 315.0, 315.0, 313.0, 320.0, 268.0, 262.0, 288.0, 291.0, 322.0, 305.0, 281.0, 292.0, 290.0, 297.0, 284.0, 292.0, 319.0, 311.0, 293.0, 283.0, 298.0, 284.0, 286.0, 290.0, 290.0, 289.0, 294.0, 296.0, 313.0, 317.0, 179.0, 175.0, 282.0, 302.0, 287.0, 289.0, 289.0, 298.0, 282.0, 300.0, 292.0, 290.0, 295.0, 287.0, 292.0, 292.0, 283.0, 296.0, 313.0, 314.0, 296.0, 288.0, 288.0, 288.0, 315.0, 315.0, 312.0, 321.0, 263.0, 259.0, 289.0, 293.0, 316.0, 311.0, 290.0, 292.0, 269.0, 261.0, 290.0, 289.0, 323.0, 313.0, 256.0, 263.0, 271.0, 265.0, 286.0, 296.0, 297.0, 287.0, 305.0, 279.0, 289.0, 290.0, 287.0, 292.0, 313.0, 314.0, 280.0, 290.0, 290.0, 292.0, 317.0, 313.0, 318.0, 312.0, 316.0, 314.0, 296.0, 286.0, 315.0, 312.0, 283.0, 293.0, 310.0, 317.0, 288.0, 294.0, 291.0, 291.0, 287.0, 295.0, 289.0, 290.0, 313.0, 317.0, 296.0, 294.0, 286.0, 290.0, 279.0, 291.0, 289.0, 298.0, 290.0, 289.0, 318.0, 309.0, 322.0, 314.0, 310.0, 320.0, 316.0, 314.0, 288.0, 282.0, 287.0, 295.0, 292.0, 290.0, 292.0, 284.0, 286.0, 298.0, 290.0, 286.0, 289.0, 287.0, 292.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7028788544209611, "mean_inference_ms": 1.258342790567867, "mean_action_processing_ms": 0.134484969591017, "mean_env_wait_ms": 0.8460233377454522, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 354.0, "episode_reward_mean": 586.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 293.05}, "hist_stats": {"episode_reward": [579.0, 582.0, 579.0, 630.0, 582.0, 576.0, 570.0, 573.0, 587.0, 587.0, 581.0, 582.0, 582.0, 522.0, 630.0, 582.0, 576.0, 579.0, 576.0, 579.0, 579.0, 587.0, 579.0, 582.0, 579.0, 630.0, 633.0, 530.0, 579.0, 627.0, 573.0, 587.0, 576.0, 630.0, 576.0, 582.0, 576.0, 579.0, 590.0, 630.0, 354.0, 584.0, 576.0, 587.0, 582.0, 582.0, 582.0, 584.0, 579.0, 627.0, 584.0, 576.0, 630.0, 633.0, 522.0, 582.0, 627.0, 582.0, 530.0, 579.0, 636.0, 519.0, 536.0, 582.0, 584.0, 584.0, 579.0, 579.0, 627.0, 570.0, 582.0, 630.0, 630.0, 630.0, 582.0, 627.0, 576.0, 627.0, 582.0, 582.0, 582.0, 579.0, 630.0, 590.0, 576.0, 570.0, 587.0, 579.0, 627.0, 636.0, 630.0, 630.0, 570.0, 582.0, 582.0, 576.0, 584.0, 576.0, 576.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 289.0, 291.0, 291.0, 291.0, 288.0, 309.0, 321.0, 296.0, 286.0, 285.0, 291.0, 283.0, 287.0, 288.0, 285.0, 302.0, 285.0, 295.0, 292.0, 293.0, 288.0, 291.0, 291.0, 292.0, 290.0, 265.0, 257.0, 315.0, 315.0, 287.0, 295.0, 291.0, 285.0, 290.0, 289.0, 295.0, 281.0, 289.0, 290.0, 279.0, 300.0, 292.0, 295.0, 290.0, 289.0, 289.0, 293.0, 295.0, 284.0, 315.0, 315.0, 313.0, 320.0, 268.0, 262.0, 288.0, 291.0, 322.0, 305.0, 281.0, 292.0, 290.0, 297.0, 284.0, 292.0, 319.0, 311.0, 293.0, 283.0, 298.0, 284.0, 286.0, 290.0, 290.0, 289.0, 294.0, 296.0, 313.0, 317.0, 179.0, 175.0, 282.0, 302.0, 287.0, 289.0, 289.0, 298.0, 282.0, 300.0, 292.0, 290.0, 295.0, 287.0, 292.0, 292.0, 283.0, 296.0, 313.0, 314.0, 296.0, 288.0, 288.0, 288.0, 315.0, 315.0, 312.0, 321.0, 263.0, 259.0, 289.0, 293.0, 316.0, 311.0, 290.0, 292.0, 269.0, 261.0, 290.0, 289.0, 323.0, 313.0, 256.0, 263.0, 271.0, 265.0, 286.0, 296.0, 297.0, 287.0, 305.0, 279.0, 289.0, 290.0, 287.0, 292.0, 313.0, 314.0, 280.0, 290.0, 290.0, 292.0, 317.0, 313.0, 318.0, 312.0, 316.0, 314.0, 296.0, 286.0, 315.0, 312.0, 283.0, 293.0, 310.0, 317.0, 288.0, 294.0, 291.0, 291.0, 287.0, 295.0, 289.0, 290.0, 313.0, 317.0, 296.0, 294.0, 286.0, 290.0, 279.0, 291.0, 289.0, 298.0, 290.0, 289.0, 318.0, 309.0, 322.0, 314.0, 310.0, 320.0, 316.0, 314.0, 288.0, 282.0, 287.0, 295.0, 292.0, 290.0, 292.0, 284.0, 286.0, 298.0, 290.0, 286.0, 289.0, 287.0, 292.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7028788544209611, "mean_inference_ms": 1.258342790567867, "mean_action_processing_ms": 0.134484969591017, "mean_env_wait_ms": 0.8460233377454522, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7603200, "num_agent_steps_trained": 7603200, "num_env_steps_sampled": 3801600, "num_env_steps_trained": 3801600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3801600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7603200, "timers": {"training_iteration_time_ms": 3683.02, "learn_time_ms": 1123.786, "learn_throughput": 11390.065, "synch_weights_time_ms": 12.273}, "counters": {"num_env_steps_sampled": 3801600, "num_env_steps_trained": 3801600, "num_agent_steps_sampled": 7603200, "num_agent_steps_trained": 7603200}, "done": false, "episodes_total": 9504, "training_iteration": 297, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-46", "timestamp": 1666581586, "time_this_iter_s": 3.7531981468200684, "time_total_s": 1142.832058429718, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1142.832058429718, "timesteps_since_restore": 0, "iterations_since_restore": 297, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.78333333333333, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 202.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 181.07, "shaped_reward_min": 114, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.15, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.28, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.91, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.74, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.83, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.1, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.85, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.74, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.83, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.74, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.83, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00189976638648659, "policy_loss": 0.001558080199174583, "vf_loss": 7.5299530029296875, "vf_explained_var": 0.6205426454544067, "kl": 0.0019256204832345247, "entropy": 0.8226147890090942, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3814400, "num_env_steps_trained": 3814400, "num_agent_steps_sampled": 7628800, "num_agent_steps_trained": 7628800}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 354.0, "episode_reward_mean": 586.67, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 293.335}, "custom_metrics": {"sparse_reward_mean": 202.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 181.07, "shaped_reward_min": 114, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.15, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.28, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.91, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.74, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.83, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.1, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.85, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.74, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.83, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.74, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.83, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 630.0, 576.0, 582.0, 576.0, 579.0, 590.0, 630.0, 354.0, 584.0, 576.0, 587.0, 582.0, 582.0, 582.0, 584.0, 579.0, 627.0, 584.0, 576.0, 630.0, 633.0, 522.0, 582.0, 627.0, 582.0, 530.0, 579.0, 636.0, 519.0, 536.0, 582.0, 584.0, 584.0, 579.0, 579.0, 627.0, 570.0, 582.0, 630.0, 630.0, 630.0, 582.0, 627.0, 576.0, 627.0, 582.0, 582.0, 582.0, 579.0, 630.0, 590.0, 576.0, 570.0, 587.0, 579.0, 627.0, 636.0, 630.0, 630.0, 570.0, 582.0, 582.0, 576.0, 584.0, 576.0, 576.0, 584.0, 581.0, 576.0, 579.0, 633.0, 530.0, 579.0, 573.0, 579.0, 584.0, 525.0, 582.0, 582.0, 582.0, 576.0, 579.0, 630.0, 633.0, 579.0, 636.0, 582.0, 579.0, 582.0, 630.0, 582.0, 579.0, 576.0, 582.0, 573.0, 579.0, 579.0, 636.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 292.0, 319.0, 311.0, 293.0, 283.0, 298.0, 284.0, 286.0, 290.0, 290.0, 289.0, 294.0, 296.0, 313.0, 317.0, 179.0, 175.0, 282.0, 302.0, 287.0, 289.0, 289.0, 298.0, 282.0, 300.0, 292.0, 290.0, 295.0, 287.0, 292.0, 292.0, 283.0, 296.0, 313.0, 314.0, 296.0, 288.0, 288.0, 288.0, 315.0, 315.0, 312.0, 321.0, 263.0, 259.0, 289.0, 293.0, 316.0, 311.0, 290.0, 292.0, 269.0, 261.0, 290.0, 289.0, 323.0, 313.0, 256.0, 263.0, 271.0, 265.0, 286.0, 296.0, 297.0, 287.0, 305.0, 279.0, 289.0, 290.0, 287.0, 292.0, 313.0, 314.0, 280.0, 290.0, 290.0, 292.0, 317.0, 313.0, 318.0, 312.0, 316.0, 314.0, 296.0, 286.0, 315.0, 312.0, 283.0, 293.0, 310.0, 317.0, 288.0, 294.0, 291.0, 291.0, 287.0, 295.0, 289.0, 290.0, 313.0, 317.0, 296.0, 294.0, 286.0, 290.0, 279.0, 291.0, 289.0, 298.0, 290.0, 289.0, 318.0, 309.0, 322.0, 314.0, 310.0, 320.0, 316.0, 314.0, 288.0, 282.0, 287.0, 295.0, 292.0, 290.0, 292.0, 284.0, 286.0, 298.0, 290.0, 286.0, 289.0, 287.0, 292.0, 292.0, 292.0, 289.0, 288.0, 288.0, 287.0, 292.0, 317.0, 316.0, 264.0, 266.0, 291.0, 288.0, 288.0, 285.0, 292.0, 287.0, 295.0, 289.0, 260.0, 265.0, 286.0, 296.0, 291.0, 291.0, 285.0, 297.0, 295.0, 281.0, 290.0, 289.0, 314.0, 316.0, 317.0, 316.0, 288.0, 291.0, 318.0, 318.0, 293.0, 289.0, 287.0, 292.0, 289.0, 293.0, 318.0, 312.0, 291.0, 291.0, 285.0, 294.0, 287.0, 289.0, 289.0, 293.0, 290.0, 283.0, 287.0, 292.0, 290.0, 289.0, 318.0, 318.0, 284.0, 295.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7029101461745526, "mean_inference_ms": 1.2582519609207465, "mean_action_processing_ms": 0.13447394317000266, "mean_env_wait_ms": 0.8459687051230822, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 354.0, "episode_reward_mean": 586.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 293.335}, "hist_stats": {"episode_reward": [576.0, 630.0, 576.0, 582.0, 576.0, 579.0, 590.0, 630.0, 354.0, 584.0, 576.0, 587.0, 582.0, 582.0, 582.0, 584.0, 579.0, 627.0, 584.0, 576.0, 630.0, 633.0, 522.0, 582.0, 627.0, 582.0, 530.0, 579.0, 636.0, 519.0, 536.0, 582.0, 584.0, 584.0, 579.0, 579.0, 627.0, 570.0, 582.0, 630.0, 630.0, 630.0, 582.0, 627.0, 576.0, 627.0, 582.0, 582.0, 582.0, 579.0, 630.0, 590.0, 576.0, 570.0, 587.0, 579.0, 627.0, 636.0, 630.0, 630.0, 570.0, 582.0, 582.0, 576.0, 584.0, 576.0, 576.0, 584.0, 581.0, 576.0, 579.0, 633.0, 530.0, 579.0, 573.0, 579.0, 584.0, 525.0, 582.0, 582.0, 582.0, 576.0, 579.0, 630.0, 633.0, 579.0, 636.0, 582.0, 579.0, 582.0, 630.0, 582.0, 579.0, 576.0, 582.0, 573.0, 579.0, 579.0, 636.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 292.0, 319.0, 311.0, 293.0, 283.0, 298.0, 284.0, 286.0, 290.0, 290.0, 289.0, 294.0, 296.0, 313.0, 317.0, 179.0, 175.0, 282.0, 302.0, 287.0, 289.0, 289.0, 298.0, 282.0, 300.0, 292.0, 290.0, 295.0, 287.0, 292.0, 292.0, 283.0, 296.0, 313.0, 314.0, 296.0, 288.0, 288.0, 288.0, 315.0, 315.0, 312.0, 321.0, 263.0, 259.0, 289.0, 293.0, 316.0, 311.0, 290.0, 292.0, 269.0, 261.0, 290.0, 289.0, 323.0, 313.0, 256.0, 263.0, 271.0, 265.0, 286.0, 296.0, 297.0, 287.0, 305.0, 279.0, 289.0, 290.0, 287.0, 292.0, 313.0, 314.0, 280.0, 290.0, 290.0, 292.0, 317.0, 313.0, 318.0, 312.0, 316.0, 314.0, 296.0, 286.0, 315.0, 312.0, 283.0, 293.0, 310.0, 317.0, 288.0, 294.0, 291.0, 291.0, 287.0, 295.0, 289.0, 290.0, 313.0, 317.0, 296.0, 294.0, 286.0, 290.0, 279.0, 291.0, 289.0, 298.0, 290.0, 289.0, 318.0, 309.0, 322.0, 314.0, 310.0, 320.0, 316.0, 314.0, 288.0, 282.0, 287.0, 295.0, 292.0, 290.0, 292.0, 284.0, 286.0, 298.0, 290.0, 286.0, 289.0, 287.0, 292.0, 292.0, 292.0, 289.0, 288.0, 288.0, 287.0, 292.0, 317.0, 316.0, 264.0, 266.0, 291.0, 288.0, 288.0, 285.0, 292.0, 287.0, 295.0, 289.0, 260.0, 265.0, 286.0, 296.0, 291.0, 291.0, 285.0, 297.0, 295.0, 281.0, 290.0, 289.0, 314.0, 316.0, 317.0, 316.0, 288.0, 291.0, 318.0, 318.0, 293.0, 289.0, 287.0, 292.0, 289.0, 293.0, 318.0, 312.0, 291.0, 291.0, 285.0, 294.0, 287.0, 289.0, 289.0, 293.0, 290.0, 283.0, 287.0, 292.0, 290.0, 289.0, 318.0, 318.0, 284.0, 295.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7029101461745526, "mean_inference_ms": 1.2582519609207465, "mean_action_processing_ms": 0.13447394317000266, "mean_env_wait_ms": 0.8459687051230822, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7628800, "num_agent_steps_trained": 7628800, "num_env_steps_sampled": 3814400, "num_env_steps_trained": 3814400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3814400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7628800, "timers": {"training_iteration_time_ms": 3661.363, "learn_time_ms": 1106.611, "learn_throughput": 11566.848, "synch_weights_time_ms": 11.618}, "counters": {"num_env_steps_sampled": 3814400, "num_env_steps_trained": 3814400, "num_agent_steps_sampled": 7628800, "num_agent_steps_trained": 7628800}, "done": false, "episodes_total": 9536, "training_iteration": 298, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-49", "timestamp": 1666581589, "time_this_iter_s": 3.576004981994629, "time_total_s": 1146.4080634117126, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1146.4080634117126, "timesteps_since_restore": 0, "iterations_since_restore": 298, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.68, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 204.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.22, "shaped_reward_min": 165, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.29, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.52, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.08, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 18.26, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.79, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 18.0, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.15, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.95, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.79, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.74, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.79, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 18.0, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.79, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 18.0, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001324523356743157, "policy_loss": 0.0009796429658308625, "vf_loss": 7.541810512542725, "vf_explained_var": 0.6045008897781372, "kl": 0.002173337619751692, "entropy": 0.8185964226722717, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3827200, "num_env_steps_trained": 3827200, "num_agent_steps_sampled": 7654400, "num_agent_steps_trained": 7654400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 525.0, "episode_reward_mean": 591.42, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 295.71}, "custom_metrics": {"sparse_reward_mean": 204.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.22, "shaped_reward_min": 165, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.29, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.52, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.08, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 18.26, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.79, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 18.0, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.15, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.95, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.79, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.74, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.79, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 18.0, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.79, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 18.0, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 584.0, 579.0, 579.0, 627.0, 570.0, 582.0, 630.0, 630.0, 630.0, 582.0, 627.0, 576.0, 627.0, 582.0, 582.0, 582.0, 579.0, 630.0, 590.0, 576.0, 570.0, 587.0, 579.0, 627.0, 636.0, 630.0, 630.0, 570.0, 582.0, 582.0, 576.0, 584.0, 576.0, 576.0, 584.0, 581.0, 576.0, 579.0, 633.0, 530.0, 579.0, 573.0, 579.0, 584.0, 525.0, 582.0, 582.0, 582.0, 576.0, 579.0, 630.0, 633.0, 579.0, 636.0, 582.0, 579.0, 582.0, 630.0, 582.0, 579.0, 576.0, 582.0, 573.0, 579.0, 579.0, 636.0, 579.0, 587.0, 582.0, 630.0, 579.0, 579.0, 576.0, 579.0, 582.0, 582.0, 633.0, 630.0, 576.0, 576.0, 582.0, 579.0, 630.0, 576.0, 579.0, 582.0, 584.0, 582.0, 579.0, 582.0, 576.0, 579.0, 573.0, 636.0, 627.0, 633.0, 630.0, 582.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 287.0, 305.0, 279.0, 289.0, 290.0, 287.0, 292.0, 313.0, 314.0, 280.0, 290.0, 290.0, 292.0, 317.0, 313.0, 318.0, 312.0, 316.0, 314.0, 296.0, 286.0, 315.0, 312.0, 283.0, 293.0, 310.0, 317.0, 288.0, 294.0, 291.0, 291.0, 287.0, 295.0, 289.0, 290.0, 313.0, 317.0, 296.0, 294.0, 286.0, 290.0, 279.0, 291.0, 289.0, 298.0, 290.0, 289.0, 318.0, 309.0, 322.0, 314.0, 310.0, 320.0, 316.0, 314.0, 288.0, 282.0, 287.0, 295.0, 292.0, 290.0, 292.0, 284.0, 286.0, 298.0, 290.0, 286.0, 289.0, 287.0, 292.0, 292.0, 292.0, 289.0, 288.0, 288.0, 287.0, 292.0, 317.0, 316.0, 264.0, 266.0, 291.0, 288.0, 288.0, 285.0, 292.0, 287.0, 295.0, 289.0, 260.0, 265.0, 286.0, 296.0, 291.0, 291.0, 285.0, 297.0, 295.0, 281.0, 290.0, 289.0, 314.0, 316.0, 317.0, 316.0, 288.0, 291.0, 318.0, 318.0, 293.0, 289.0, 287.0, 292.0, 289.0, 293.0, 318.0, 312.0, 291.0, 291.0, 285.0, 294.0, 287.0, 289.0, 289.0, 293.0, 290.0, 283.0, 287.0, 292.0, 290.0, 289.0, 318.0, 318.0, 284.0, 295.0, 293.0, 294.0, 291.0, 291.0, 314.0, 316.0, 291.0, 288.0, 286.0, 293.0, 291.0, 285.0, 290.0, 289.0, 292.0, 290.0, 294.0, 288.0, 314.0, 319.0, 314.0, 316.0, 286.0, 290.0, 286.0, 290.0, 290.0, 292.0, 297.0, 282.0, 317.0, 313.0, 288.0, 288.0, 293.0, 286.0, 291.0, 291.0, 297.0, 287.0, 297.0, 285.0, 289.0, 290.0, 290.0, 292.0, 279.0, 297.0, 297.0, 282.0, 289.0, 284.0, 315.0, 321.0, 319.0, 308.0, 317.0, 316.0, 312.0, 318.0, 293.0, 289.0, 293.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7028457259266864, "mean_inference_ms": 1.258041513717552, "mean_action_processing_ms": 0.1344655857753008, "mean_env_wait_ms": 0.8458612410170017, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 525.0, "episode_reward_mean": 591.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 295.71}, "hist_stats": {"episode_reward": [584.0, 584.0, 579.0, 579.0, 627.0, 570.0, 582.0, 630.0, 630.0, 630.0, 582.0, 627.0, 576.0, 627.0, 582.0, 582.0, 582.0, 579.0, 630.0, 590.0, 576.0, 570.0, 587.0, 579.0, 627.0, 636.0, 630.0, 630.0, 570.0, 582.0, 582.0, 576.0, 584.0, 576.0, 576.0, 584.0, 581.0, 576.0, 579.0, 633.0, 530.0, 579.0, 573.0, 579.0, 584.0, 525.0, 582.0, 582.0, 582.0, 576.0, 579.0, 630.0, 633.0, 579.0, 636.0, 582.0, 579.0, 582.0, 630.0, 582.0, 579.0, 576.0, 582.0, 573.0, 579.0, 579.0, 636.0, 579.0, 587.0, 582.0, 630.0, 579.0, 579.0, 576.0, 579.0, 582.0, 582.0, 633.0, 630.0, 576.0, 576.0, 582.0, 579.0, 630.0, 576.0, 579.0, 582.0, 584.0, 582.0, 579.0, 582.0, 576.0, 579.0, 573.0, 636.0, 627.0, 633.0, 630.0, 582.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 287.0, 305.0, 279.0, 289.0, 290.0, 287.0, 292.0, 313.0, 314.0, 280.0, 290.0, 290.0, 292.0, 317.0, 313.0, 318.0, 312.0, 316.0, 314.0, 296.0, 286.0, 315.0, 312.0, 283.0, 293.0, 310.0, 317.0, 288.0, 294.0, 291.0, 291.0, 287.0, 295.0, 289.0, 290.0, 313.0, 317.0, 296.0, 294.0, 286.0, 290.0, 279.0, 291.0, 289.0, 298.0, 290.0, 289.0, 318.0, 309.0, 322.0, 314.0, 310.0, 320.0, 316.0, 314.0, 288.0, 282.0, 287.0, 295.0, 292.0, 290.0, 292.0, 284.0, 286.0, 298.0, 290.0, 286.0, 289.0, 287.0, 292.0, 292.0, 292.0, 289.0, 288.0, 288.0, 287.0, 292.0, 317.0, 316.0, 264.0, 266.0, 291.0, 288.0, 288.0, 285.0, 292.0, 287.0, 295.0, 289.0, 260.0, 265.0, 286.0, 296.0, 291.0, 291.0, 285.0, 297.0, 295.0, 281.0, 290.0, 289.0, 314.0, 316.0, 317.0, 316.0, 288.0, 291.0, 318.0, 318.0, 293.0, 289.0, 287.0, 292.0, 289.0, 293.0, 318.0, 312.0, 291.0, 291.0, 285.0, 294.0, 287.0, 289.0, 289.0, 293.0, 290.0, 283.0, 287.0, 292.0, 290.0, 289.0, 318.0, 318.0, 284.0, 295.0, 293.0, 294.0, 291.0, 291.0, 314.0, 316.0, 291.0, 288.0, 286.0, 293.0, 291.0, 285.0, 290.0, 289.0, 292.0, 290.0, 294.0, 288.0, 314.0, 319.0, 314.0, 316.0, 286.0, 290.0, 286.0, 290.0, 290.0, 292.0, 297.0, 282.0, 317.0, 313.0, 288.0, 288.0, 293.0, 286.0, 291.0, 291.0, 297.0, 287.0, 297.0, 285.0, 289.0, 290.0, 290.0, 292.0, 279.0, 297.0, 297.0, 282.0, 289.0, 284.0, 315.0, 321.0, 319.0, 308.0, 317.0, 316.0, 312.0, 318.0, 293.0, 289.0, 293.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7028457259266864, "mean_inference_ms": 1.258041513717552, "mean_action_processing_ms": 0.1344655857753008, "mean_env_wait_ms": 0.8458612410170017, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7654400, "num_agent_steps_trained": 7654400, "num_env_steps_sampled": 3827200, "num_env_steps_trained": 3827200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3827200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7654400, "timers": {"training_iteration_time_ms": 3658.483, "learn_time_ms": 1105.958, "learn_throughput": 11573.68, "synch_weights_time_ms": 11.662}, "counters": {"num_env_steps_sampled": 3827200, "num_env_steps_trained": 3827200, "num_agent_steps_sampled": 7654400, "num_agent_steps_trained": 7654400}, "done": false, "episodes_total": 9568, "training_iteration": 299, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-53", "timestamp": 1666581593, "time_this_iter_s": 3.615095615386963, "time_total_s": 1150.0231590270996, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1150.0231590270996, "timesteps_since_restore": 0, "iterations_since_restore": 299, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.3, "ram_util_percent": 10.62}}
+{"evaluation": {"average_sparse_reward": 200.0, "num_healthy_workers": 0, "num_recreated_workers": 0}, "custom_metrics": {"sparse_reward_mean": 204.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.8, "shaped_reward_min": 164, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.63, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.46, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 18.08, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.14, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.83, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.06, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.87, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.81, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.74, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.14, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.83, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.14, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.83, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00201015523634851, "policy_loss": -0.0023469526786357164, "vf_loss": 7.471502304077148, "vf_explained_var": 0.6078072786331177, "kl": 0.002010123338550329, "entropy": 0.8207009434700012, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3840000, "num_env_steps_trained": 3840000, "num_agent_steps_sampled": 7680000, "num_agent_steps_trained": 7680000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 591.6, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 295.8}, "custom_metrics": {"sparse_reward_mean": 204.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.8, "shaped_reward_min": 164, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.63, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.46, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 18.08, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.14, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.83, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.06, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.87, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.81, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.74, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.14, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.83, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.14, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.83, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 576.0, 576.0, 584.0, 581.0, 576.0, 579.0, 633.0, 530.0, 579.0, 573.0, 579.0, 584.0, 525.0, 582.0, 582.0, 582.0, 576.0, 579.0, 630.0, 633.0, 579.0, 636.0, 582.0, 579.0, 582.0, 630.0, 582.0, 579.0, 576.0, 582.0, 573.0, 579.0, 579.0, 636.0, 579.0, 587.0, 582.0, 630.0, 579.0, 579.0, 576.0, 579.0, 582.0, 582.0, 633.0, 630.0, 576.0, 576.0, 582.0, 579.0, 630.0, 576.0, 579.0, 582.0, 584.0, 582.0, 579.0, 582.0, 576.0, 579.0, 573.0, 636.0, 627.0, 633.0, 630.0, 582.0, 587.0, 627.0, 584.0, 633.0, 539.0, 579.0, 627.0, 576.0, 630.0, 630.0, 582.0, 630.0, 636.0, 636.0, 633.0, 590.0, 579.0, 584.0, 639.0, 579.0, 590.0, 582.0, 582.0, 582.0, 587.0, 630.0, 582.0, 564.0, 579.0, 582.0, 576.0, 582.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 298.0, 290.0, 286.0, 289.0, 287.0, 292.0, 292.0, 292.0, 289.0, 288.0, 288.0, 287.0, 292.0, 317.0, 316.0, 264.0, 266.0, 291.0, 288.0, 288.0, 285.0, 292.0, 287.0, 295.0, 289.0, 260.0, 265.0, 286.0, 296.0, 291.0, 291.0, 285.0, 297.0, 295.0, 281.0, 290.0, 289.0, 314.0, 316.0, 317.0, 316.0, 288.0, 291.0, 318.0, 318.0, 293.0, 289.0, 287.0, 292.0, 289.0, 293.0, 318.0, 312.0, 291.0, 291.0, 285.0, 294.0, 287.0, 289.0, 289.0, 293.0, 290.0, 283.0, 287.0, 292.0, 290.0, 289.0, 318.0, 318.0, 284.0, 295.0, 293.0, 294.0, 291.0, 291.0, 314.0, 316.0, 291.0, 288.0, 286.0, 293.0, 291.0, 285.0, 290.0, 289.0, 292.0, 290.0, 294.0, 288.0, 314.0, 319.0, 314.0, 316.0, 286.0, 290.0, 286.0, 290.0, 290.0, 292.0, 297.0, 282.0, 317.0, 313.0, 288.0, 288.0, 293.0, 286.0, 291.0, 291.0, 297.0, 287.0, 297.0, 285.0, 289.0, 290.0, 290.0, 292.0, 279.0, 297.0, 297.0, 282.0, 289.0, 284.0, 315.0, 321.0, 319.0, 308.0, 317.0, 316.0, 312.0, 318.0, 293.0, 289.0, 293.0, 294.0, 317.0, 310.0, 297.0, 287.0, 316.0, 317.0, 277.0, 262.0, 288.0, 291.0, 312.0, 315.0, 288.0, 288.0, 322.0, 308.0, 322.0, 308.0, 298.0, 284.0, 313.0, 317.0, 320.0, 316.0, 318.0, 318.0, 312.0, 321.0, 293.0, 297.0, 299.0, 280.0, 294.0, 290.0, 320.0, 319.0, 284.0, 295.0, 293.0, 297.0, 290.0, 292.0, 291.0, 291.0, 290.0, 292.0, 291.0, 296.0, 321.0, 309.0, 295.0, 287.0, 275.0, 289.0, 295.0, 284.0, 291.0, 291.0, 276.0, 300.0, 290.0, 292.0, 297.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7027726542537445, "mean_inference_ms": 1.257838613560412, "mean_action_processing_ms": 0.1344571221669019, "mean_env_wait_ms": 0.845761859715749, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 591.6, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 295.8}, "hist_stats": {"episode_reward": [584.0, 576.0, 576.0, 584.0, 581.0, 576.0, 579.0, 633.0, 530.0, 579.0, 573.0, 579.0, 584.0, 525.0, 582.0, 582.0, 582.0, 576.0, 579.0, 630.0, 633.0, 579.0, 636.0, 582.0, 579.0, 582.0, 630.0, 582.0, 579.0, 576.0, 582.0, 573.0, 579.0, 579.0, 636.0, 579.0, 587.0, 582.0, 630.0, 579.0, 579.0, 576.0, 579.0, 582.0, 582.0, 633.0, 630.0, 576.0, 576.0, 582.0, 579.0, 630.0, 576.0, 579.0, 582.0, 584.0, 582.0, 579.0, 582.0, 576.0, 579.0, 573.0, 636.0, 627.0, 633.0, 630.0, 582.0, 587.0, 627.0, 584.0, 633.0, 539.0, 579.0, 627.0, 576.0, 630.0, 630.0, 582.0, 630.0, 636.0, 636.0, 633.0, 590.0, 579.0, 584.0, 639.0, 579.0, 590.0, 582.0, 582.0, 582.0, 587.0, 630.0, 582.0, 564.0, 579.0, 582.0, 576.0, 582.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 298.0, 290.0, 286.0, 289.0, 287.0, 292.0, 292.0, 292.0, 289.0, 288.0, 288.0, 287.0, 292.0, 317.0, 316.0, 264.0, 266.0, 291.0, 288.0, 288.0, 285.0, 292.0, 287.0, 295.0, 289.0, 260.0, 265.0, 286.0, 296.0, 291.0, 291.0, 285.0, 297.0, 295.0, 281.0, 290.0, 289.0, 314.0, 316.0, 317.0, 316.0, 288.0, 291.0, 318.0, 318.0, 293.0, 289.0, 287.0, 292.0, 289.0, 293.0, 318.0, 312.0, 291.0, 291.0, 285.0, 294.0, 287.0, 289.0, 289.0, 293.0, 290.0, 283.0, 287.0, 292.0, 290.0, 289.0, 318.0, 318.0, 284.0, 295.0, 293.0, 294.0, 291.0, 291.0, 314.0, 316.0, 291.0, 288.0, 286.0, 293.0, 291.0, 285.0, 290.0, 289.0, 292.0, 290.0, 294.0, 288.0, 314.0, 319.0, 314.0, 316.0, 286.0, 290.0, 286.0, 290.0, 290.0, 292.0, 297.0, 282.0, 317.0, 313.0, 288.0, 288.0, 293.0, 286.0, 291.0, 291.0, 297.0, 287.0, 297.0, 285.0, 289.0, 290.0, 290.0, 292.0, 279.0, 297.0, 297.0, 282.0, 289.0, 284.0, 315.0, 321.0, 319.0, 308.0, 317.0, 316.0, 312.0, 318.0, 293.0, 289.0, 293.0, 294.0, 317.0, 310.0, 297.0, 287.0, 316.0, 317.0, 277.0, 262.0, 288.0, 291.0, 312.0, 315.0, 288.0, 288.0, 322.0, 308.0, 322.0, 308.0, 298.0, 284.0, 313.0, 317.0, 320.0, 316.0, 318.0, 318.0, 312.0, 321.0, 293.0, 297.0, 299.0, 280.0, 294.0, 290.0, 320.0, 319.0, 284.0, 295.0, 293.0, 297.0, 290.0, 292.0, 291.0, 291.0, 290.0, 292.0, 291.0, 296.0, 321.0, 309.0, 295.0, 287.0, 275.0, 289.0, 295.0, 284.0, 291.0, 291.0, 276.0, 300.0, 290.0, 292.0, 297.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7027726542537445, "mean_inference_ms": 1.257838613560412, "mean_action_processing_ms": 0.1344571221669019, "mean_env_wait_ms": 0.845761859715749, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7680000, "num_agent_steps_trained": 7680000, "num_env_steps_sampled": 3840000, "num_env_steps_trained": 3840000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3840000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7680000, "timers": {"training_iteration_time_ms": 3670.074, "learn_time_ms": 1112.802, "learn_throughput": 11502.495, "synch_weights_time_ms": 11.61}, "counters": {"num_env_steps_sampled": 3840000, "num_env_steps_trained": 3840000, "num_agent_steps_sampled": 7680000, "num_agent_steps_trained": 7680000}, "done": false, "episodes_total": 9600, "training_iteration": 300, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-00", "timestamp": 1666581600, "time_this_iter_s": 7.122778654098511, "time_total_s": 1157.1459376811981, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1157.1459376811981, "timesteps_since_restore": 0, "iterations_since_restore": 300, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 14.236363636363636, "ram_util_percent": 10.599999999999998}}
+{"custom_metrics": {"sparse_reward_mean": 204.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 182.02, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.77, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.09, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.65, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.82, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.3, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.62, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.79, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.78, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.66, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.3, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.62, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.3, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.62, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001016387133859098, "policy_loss": 0.0006756494985893369, "vf_loss": 7.564582824707031, "vf_explained_var": 0.6276317834854126, "kl": 0.0027429345063865185, "entropy": 0.8314375877380371, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3852800, "num_env_steps_trained": 3852800, "num_agent_steps_sampled": 7705600, "num_agent_steps_trained": 7705600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 590.02, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 295.01}, "custom_metrics": {"sparse_reward_mean": 204.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 182.02, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.77, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.09, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.65, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.82, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.3, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.62, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.79, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.78, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.66, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.3, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.62, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.3, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.62, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 636.0, 579.0, 587.0, 582.0, 630.0, 579.0, 579.0, 576.0, 579.0, 582.0, 582.0, 633.0, 630.0, 576.0, 576.0, 582.0, 579.0, 630.0, 576.0, 579.0, 582.0, 584.0, 582.0, 579.0, 582.0, 576.0, 579.0, 573.0, 636.0, 627.0, 633.0, 630.0, 582.0, 587.0, 627.0, 584.0, 633.0, 539.0, 579.0, 627.0, 576.0, 630.0, 630.0, 582.0, 630.0, 636.0, 636.0, 633.0, 590.0, 579.0, 584.0, 639.0, 579.0, 590.0, 582.0, 582.0, 582.0, 587.0, 630.0, 582.0, 564.0, 579.0, 582.0, 576.0, 582.0, 584.0, 639.0, 576.0, 573.0, 573.0, 633.0, 582.0, 636.0, 582.0, 582.0, 587.0, 579.0, 582.0, 582.0, 579.0, 627.0, 633.0, 576.0, 579.0, 561.0, 582.0, 582.0, 633.0, 582.0, 627.0, 573.0, 180.0, 582.0, 579.0, 624.0, 576.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 290.0, 289.0, 318.0, 318.0, 284.0, 295.0, 293.0, 294.0, 291.0, 291.0, 314.0, 316.0, 291.0, 288.0, 286.0, 293.0, 291.0, 285.0, 290.0, 289.0, 292.0, 290.0, 294.0, 288.0, 314.0, 319.0, 314.0, 316.0, 286.0, 290.0, 286.0, 290.0, 290.0, 292.0, 297.0, 282.0, 317.0, 313.0, 288.0, 288.0, 293.0, 286.0, 291.0, 291.0, 297.0, 287.0, 297.0, 285.0, 289.0, 290.0, 290.0, 292.0, 279.0, 297.0, 297.0, 282.0, 289.0, 284.0, 315.0, 321.0, 319.0, 308.0, 317.0, 316.0, 312.0, 318.0, 293.0, 289.0, 293.0, 294.0, 317.0, 310.0, 297.0, 287.0, 316.0, 317.0, 277.0, 262.0, 288.0, 291.0, 312.0, 315.0, 288.0, 288.0, 322.0, 308.0, 322.0, 308.0, 298.0, 284.0, 313.0, 317.0, 320.0, 316.0, 318.0, 318.0, 312.0, 321.0, 293.0, 297.0, 299.0, 280.0, 294.0, 290.0, 320.0, 319.0, 284.0, 295.0, 293.0, 297.0, 290.0, 292.0, 291.0, 291.0, 290.0, 292.0, 291.0, 296.0, 321.0, 309.0, 295.0, 287.0, 275.0, 289.0, 295.0, 284.0, 291.0, 291.0, 276.0, 300.0, 290.0, 292.0, 297.0, 287.0, 320.0, 319.0, 293.0, 283.0, 291.0, 282.0, 287.0, 286.0, 313.0, 320.0, 288.0, 294.0, 318.0, 318.0, 295.0, 287.0, 289.0, 293.0, 296.0, 291.0, 295.0, 284.0, 295.0, 287.0, 298.0, 284.0, 295.0, 284.0, 314.0, 313.0, 321.0, 312.0, 283.0, 293.0, 291.0, 288.0, 273.0, 288.0, 292.0, 290.0, 284.0, 298.0, 318.0, 315.0, 290.0, 292.0, 319.0, 308.0, 290.0, 283.0, 90.0, 90.0, 288.0, 294.0, 289.0, 290.0, 313.0, 311.0, 294.0, 282.0, 294.0, 288.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7027120703901027, "mean_inference_ms": 1.2576525999597852, "mean_action_processing_ms": 0.13445152413405506, "mean_env_wait_ms": 0.8456862565452087, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 590.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 295.01}, "hist_stats": {"episode_reward": [579.0, 579.0, 636.0, 579.0, 587.0, 582.0, 630.0, 579.0, 579.0, 576.0, 579.0, 582.0, 582.0, 633.0, 630.0, 576.0, 576.0, 582.0, 579.0, 630.0, 576.0, 579.0, 582.0, 584.0, 582.0, 579.0, 582.0, 576.0, 579.0, 573.0, 636.0, 627.0, 633.0, 630.0, 582.0, 587.0, 627.0, 584.0, 633.0, 539.0, 579.0, 627.0, 576.0, 630.0, 630.0, 582.0, 630.0, 636.0, 636.0, 633.0, 590.0, 579.0, 584.0, 639.0, 579.0, 590.0, 582.0, 582.0, 582.0, 587.0, 630.0, 582.0, 564.0, 579.0, 582.0, 576.0, 582.0, 584.0, 639.0, 576.0, 573.0, 573.0, 633.0, 582.0, 636.0, 582.0, 582.0, 587.0, 579.0, 582.0, 582.0, 579.0, 627.0, 633.0, 576.0, 579.0, 561.0, 582.0, 582.0, 633.0, 582.0, 627.0, 573.0, 180.0, 582.0, 579.0, 624.0, 576.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 290.0, 289.0, 318.0, 318.0, 284.0, 295.0, 293.0, 294.0, 291.0, 291.0, 314.0, 316.0, 291.0, 288.0, 286.0, 293.0, 291.0, 285.0, 290.0, 289.0, 292.0, 290.0, 294.0, 288.0, 314.0, 319.0, 314.0, 316.0, 286.0, 290.0, 286.0, 290.0, 290.0, 292.0, 297.0, 282.0, 317.0, 313.0, 288.0, 288.0, 293.0, 286.0, 291.0, 291.0, 297.0, 287.0, 297.0, 285.0, 289.0, 290.0, 290.0, 292.0, 279.0, 297.0, 297.0, 282.0, 289.0, 284.0, 315.0, 321.0, 319.0, 308.0, 317.0, 316.0, 312.0, 318.0, 293.0, 289.0, 293.0, 294.0, 317.0, 310.0, 297.0, 287.0, 316.0, 317.0, 277.0, 262.0, 288.0, 291.0, 312.0, 315.0, 288.0, 288.0, 322.0, 308.0, 322.0, 308.0, 298.0, 284.0, 313.0, 317.0, 320.0, 316.0, 318.0, 318.0, 312.0, 321.0, 293.0, 297.0, 299.0, 280.0, 294.0, 290.0, 320.0, 319.0, 284.0, 295.0, 293.0, 297.0, 290.0, 292.0, 291.0, 291.0, 290.0, 292.0, 291.0, 296.0, 321.0, 309.0, 295.0, 287.0, 275.0, 289.0, 295.0, 284.0, 291.0, 291.0, 276.0, 300.0, 290.0, 292.0, 297.0, 287.0, 320.0, 319.0, 293.0, 283.0, 291.0, 282.0, 287.0, 286.0, 313.0, 320.0, 288.0, 294.0, 318.0, 318.0, 295.0, 287.0, 289.0, 293.0, 296.0, 291.0, 295.0, 284.0, 295.0, 287.0, 298.0, 284.0, 295.0, 284.0, 314.0, 313.0, 321.0, 312.0, 283.0, 293.0, 291.0, 288.0, 273.0, 288.0, 292.0, 290.0, 284.0, 298.0, 318.0, 315.0, 290.0, 292.0, 319.0, 308.0, 290.0, 283.0, 90.0, 90.0, 288.0, 294.0, 289.0, 290.0, 313.0, 311.0, 294.0, 282.0, 294.0, 288.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7027120703901027, "mean_inference_ms": 1.2576525999597852, "mean_action_processing_ms": 0.13445152413405506, "mean_env_wait_ms": 0.8456862565452087, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7705600, "num_agent_steps_trained": 7705600, "num_env_steps_sampled": 3852800, "num_env_steps_trained": 3852800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3852800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7705600, "timers": {"training_iteration_time_ms": 3666.048, "learn_time_ms": 1107.077, "learn_throughput": 11561.979, "synch_weights_time_ms": 11.621}, "counters": {"num_env_steps_sampled": 3852800, "num_env_steps_trained": 3852800, "num_agent_steps_sampled": 7705600, "num_agent_steps_trained": 7705600}, "done": false, "episodes_total": 9632, "training_iteration": 301, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-04", "timestamp": 1666581604, "time_this_iter_s": 3.587852716445923, "time_total_s": 1160.733790397644, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1160.733790397644, "timesteps_since_restore": 0, "iterations_since_restore": 301, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.46, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 204.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 182.04, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.51, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.15, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.39, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.97, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.1, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.81, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.84, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.76, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.72, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.1, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.81, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.1, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.81, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015810655895620584, "policy_loss": 0.0012423595180734992, "vf_loss": 7.515679359436035, "vf_explained_var": 0.569876492023468, "kl": 0.0021238639019429684, "entropy": 0.8257216215133667, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3865600, "num_env_steps_trained": 3865600, "num_agent_steps_sampled": 7731200, "num_agent_steps_trained": 7731200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 590.04, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 295.02}, "custom_metrics": {"sparse_reward_mean": 204.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 182.04, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.51, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.15, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.39, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.97, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.1, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.81, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.84, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.76, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.72, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.1, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.81, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.1, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.81, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 582.0, 587.0, 627.0, 584.0, 633.0, 539.0, 579.0, 627.0, 576.0, 630.0, 630.0, 582.0, 630.0, 636.0, 636.0, 633.0, 590.0, 579.0, 584.0, 639.0, 579.0, 590.0, 582.0, 582.0, 582.0, 587.0, 630.0, 582.0, 564.0, 579.0, 582.0, 576.0, 582.0, 584.0, 639.0, 576.0, 573.0, 573.0, 633.0, 582.0, 636.0, 582.0, 582.0, 587.0, 579.0, 582.0, 582.0, 579.0, 627.0, 633.0, 576.0, 579.0, 561.0, 582.0, 582.0, 633.0, 582.0, 627.0, 573.0, 180.0, 582.0, 579.0, 624.0, 576.0, 582.0, 582.0, 570.0, 579.0, 587.0, 582.0, 630.0, 627.0, 573.0, 636.0, 636.0, 582.0, 630.0, 465.0, 582.0, 579.0, 639.0, 579.0, 579.0, 530.0, 579.0, 633.0, 582.0, 630.0, 582.0, 582.0, 582.0, 627.0, 584.0, 570.0, 579.0, 633.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 316.0, 312.0, 318.0, 293.0, 289.0, 293.0, 294.0, 317.0, 310.0, 297.0, 287.0, 316.0, 317.0, 277.0, 262.0, 288.0, 291.0, 312.0, 315.0, 288.0, 288.0, 322.0, 308.0, 322.0, 308.0, 298.0, 284.0, 313.0, 317.0, 320.0, 316.0, 318.0, 318.0, 312.0, 321.0, 293.0, 297.0, 299.0, 280.0, 294.0, 290.0, 320.0, 319.0, 284.0, 295.0, 293.0, 297.0, 290.0, 292.0, 291.0, 291.0, 290.0, 292.0, 291.0, 296.0, 321.0, 309.0, 295.0, 287.0, 275.0, 289.0, 295.0, 284.0, 291.0, 291.0, 276.0, 300.0, 290.0, 292.0, 297.0, 287.0, 320.0, 319.0, 293.0, 283.0, 291.0, 282.0, 287.0, 286.0, 313.0, 320.0, 288.0, 294.0, 318.0, 318.0, 295.0, 287.0, 289.0, 293.0, 296.0, 291.0, 295.0, 284.0, 295.0, 287.0, 298.0, 284.0, 295.0, 284.0, 314.0, 313.0, 321.0, 312.0, 283.0, 293.0, 291.0, 288.0, 273.0, 288.0, 292.0, 290.0, 284.0, 298.0, 318.0, 315.0, 290.0, 292.0, 319.0, 308.0, 290.0, 283.0, 90.0, 90.0, 288.0, 294.0, 289.0, 290.0, 313.0, 311.0, 294.0, 282.0, 294.0, 288.0, 292.0, 290.0, 274.0, 296.0, 288.0, 291.0, 290.0, 297.0, 288.0, 294.0, 310.0, 320.0, 304.0, 323.0, 289.0, 284.0, 319.0, 317.0, 317.0, 319.0, 293.0, 289.0, 319.0, 311.0, 236.0, 229.0, 292.0, 290.0, 289.0, 290.0, 318.0, 321.0, 294.0, 285.0, 290.0, 289.0, 264.0, 266.0, 292.0, 287.0, 317.0, 316.0, 287.0, 295.0, 308.0, 322.0, 297.0, 285.0, 292.0, 290.0, 292.0, 290.0, 323.0, 304.0, 291.0, 293.0, 290.0, 280.0, 293.0, 286.0, 318.0, 315.0, 291.0, 291.0, 289.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7026520157753002, "mean_inference_ms": 1.2574482022329547, "mean_action_processing_ms": 0.1344425360928672, "mean_env_wait_ms": 0.8455882415302208, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 590.04, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 295.02}, "hist_stats": {"episode_reward": [633.0, 630.0, 582.0, 587.0, 627.0, 584.0, 633.0, 539.0, 579.0, 627.0, 576.0, 630.0, 630.0, 582.0, 630.0, 636.0, 636.0, 633.0, 590.0, 579.0, 584.0, 639.0, 579.0, 590.0, 582.0, 582.0, 582.0, 587.0, 630.0, 582.0, 564.0, 579.0, 582.0, 576.0, 582.0, 584.0, 639.0, 576.0, 573.0, 573.0, 633.0, 582.0, 636.0, 582.0, 582.0, 587.0, 579.0, 582.0, 582.0, 579.0, 627.0, 633.0, 576.0, 579.0, 561.0, 582.0, 582.0, 633.0, 582.0, 627.0, 573.0, 180.0, 582.0, 579.0, 624.0, 576.0, 582.0, 582.0, 570.0, 579.0, 587.0, 582.0, 630.0, 627.0, 573.0, 636.0, 636.0, 582.0, 630.0, 465.0, 582.0, 579.0, 639.0, 579.0, 579.0, 530.0, 579.0, 633.0, 582.0, 630.0, 582.0, 582.0, 582.0, 627.0, 584.0, 570.0, 579.0, 633.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 316.0, 312.0, 318.0, 293.0, 289.0, 293.0, 294.0, 317.0, 310.0, 297.0, 287.0, 316.0, 317.0, 277.0, 262.0, 288.0, 291.0, 312.0, 315.0, 288.0, 288.0, 322.0, 308.0, 322.0, 308.0, 298.0, 284.0, 313.0, 317.0, 320.0, 316.0, 318.0, 318.0, 312.0, 321.0, 293.0, 297.0, 299.0, 280.0, 294.0, 290.0, 320.0, 319.0, 284.0, 295.0, 293.0, 297.0, 290.0, 292.0, 291.0, 291.0, 290.0, 292.0, 291.0, 296.0, 321.0, 309.0, 295.0, 287.0, 275.0, 289.0, 295.0, 284.0, 291.0, 291.0, 276.0, 300.0, 290.0, 292.0, 297.0, 287.0, 320.0, 319.0, 293.0, 283.0, 291.0, 282.0, 287.0, 286.0, 313.0, 320.0, 288.0, 294.0, 318.0, 318.0, 295.0, 287.0, 289.0, 293.0, 296.0, 291.0, 295.0, 284.0, 295.0, 287.0, 298.0, 284.0, 295.0, 284.0, 314.0, 313.0, 321.0, 312.0, 283.0, 293.0, 291.0, 288.0, 273.0, 288.0, 292.0, 290.0, 284.0, 298.0, 318.0, 315.0, 290.0, 292.0, 319.0, 308.0, 290.0, 283.0, 90.0, 90.0, 288.0, 294.0, 289.0, 290.0, 313.0, 311.0, 294.0, 282.0, 294.0, 288.0, 292.0, 290.0, 274.0, 296.0, 288.0, 291.0, 290.0, 297.0, 288.0, 294.0, 310.0, 320.0, 304.0, 323.0, 289.0, 284.0, 319.0, 317.0, 317.0, 319.0, 293.0, 289.0, 319.0, 311.0, 236.0, 229.0, 292.0, 290.0, 289.0, 290.0, 318.0, 321.0, 294.0, 285.0, 290.0, 289.0, 264.0, 266.0, 292.0, 287.0, 317.0, 316.0, 287.0, 295.0, 308.0, 322.0, 297.0, 285.0, 292.0, 290.0, 292.0, 290.0, 323.0, 304.0, 291.0, 293.0, 290.0, 280.0, 293.0, 286.0, 318.0, 315.0, 291.0, 291.0, 289.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7026520157753002, "mean_inference_ms": 1.2574482022329547, "mean_action_processing_ms": 0.1344425360928672, "mean_env_wait_ms": 0.8455882415302208, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7731200, "num_agent_steps_trained": 7731200, "num_env_steps_sampled": 3865600, "num_env_steps_trained": 3865600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3865600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7731200, "timers": {"training_iteration_time_ms": 3653.339, "learn_time_ms": 1097.641, "learn_throughput": 11661.368, "synch_weights_time_ms": 11.078}, "counters": {"num_env_steps_sampled": 3865600, "num_env_steps_trained": 3865600, "num_agent_steps_sampled": 7731200, "num_agent_steps_trained": 7731200}, "done": false, "episodes_total": 9664, "training_iteration": 302, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-08", "timestamp": 1666581608, "time_this_iter_s": 3.577674388885498, "time_total_s": 1164.3114647865295, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1164.3114647865295, "timesteps_since_restore": 0, "iterations_since_restore": 302, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.316666666666666, "ram_util_percent": 10.633333333333333}}
+{"custom_metrics": {"sparse_reward_mean": 201.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 180.42, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.23, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.14, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.06, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.95, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.81, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.79, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.8, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.74, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.24, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.81, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.79, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.81, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.79, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0004884917289018631, "policy_loss": -0.0008183724712580442, "vf_loss": 7.473912239074707, "vf_explained_var": 0.607109785079956, "kl": 0.0021743732504546642, "entropy": 0.8350157737731934, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3878400, "num_env_steps_trained": 3878400, "num_agent_steps_sampled": 7756800, "num_agent_steps_trained": 7756800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 584.02, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 292.01}, "custom_metrics": {"sparse_reward_mean": 201.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 180.42, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.23, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.14, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.06, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.95, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.81, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.79, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.8, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.74, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.24, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.81, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.79, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.81, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.79, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 576.0, 582.0, 584.0, 639.0, 576.0, 573.0, 573.0, 633.0, 582.0, 636.0, 582.0, 582.0, 587.0, 579.0, 582.0, 582.0, 579.0, 627.0, 633.0, 576.0, 579.0, 561.0, 582.0, 582.0, 633.0, 582.0, 627.0, 573.0, 180.0, 582.0, 579.0, 624.0, 576.0, 582.0, 582.0, 570.0, 579.0, 587.0, 582.0, 630.0, 627.0, 573.0, 636.0, 636.0, 582.0, 630.0, 465.0, 582.0, 579.0, 639.0, 579.0, 579.0, 530.0, 579.0, 633.0, 582.0, 630.0, 582.0, 582.0, 582.0, 627.0, 584.0, 570.0, 579.0, 633.0, 582.0, 582.0, 584.0, 593.0, 582.0, 630.0, 579.0, 579.0, 579.0, 582.0, 627.0, 582.0, 294.0, 579.0, 579.0, 579.0, 582.0, 630.0, 627.0, 579.0, 639.0, 587.0, 579.0, 590.0, 582.0, 576.0, 584.0, 630.0, 582.0, 587.0, 582.0, 573.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 276.0, 300.0, 290.0, 292.0, 297.0, 287.0, 320.0, 319.0, 293.0, 283.0, 291.0, 282.0, 287.0, 286.0, 313.0, 320.0, 288.0, 294.0, 318.0, 318.0, 295.0, 287.0, 289.0, 293.0, 296.0, 291.0, 295.0, 284.0, 295.0, 287.0, 298.0, 284.0, 295.0, 284.0, 314.0, 313.0, 321.0, 312.0, 283.0, 293.0, 291.0, 288.0, 273.0, 288.0, 292.0, 290.0, 284.0, 298.0, 318.0, 315.0, 290.0, 292.0, 319.0, 308.0, 290.0, 283.0, 90.0, 90.0, 288.0, 294.0, 289.0, 290.0, 313.0, 311.0, 294.0, 282.0, 294.0, 288.0, 292.0, 290.0, 274.0, 296.0, 288.0, 291.0, 290.0, 297.0, 288.0, 294.0, 310.0, 320.0, 304.0, 323.0, 289.0, 284.0, 319.0, 317.0, 317.0, 319.0, 293.0, 289.0, 319.0, 311.0, 236.0, 229.0, 292.0, 290.0, 289.0, 290.0, 318.0, 321.0, 294.0, 285.0, 290.0, 289.0, 264.0, 266.0, 292.0, 287.0, 317.0, 316.0, 287.0, 295.0, 308.0, 322.0, 297.0, 285.0, 292.0, 290.0, 292.0, 290.0, 323.0, 304.0, 291.0, 293.0, 290.0, 280.0, 293.0, 286.0, 318.0, 315.0, 291.0, 291.0, 289.0, 293.0, 298.0, 286.0, 293.0, 300.0, 296.0, 286.0, 315.0, 315.0, 288.0, 291.0, 290.0, 289.0, 285.0, 294.0, 294.0, 288.0, 310.0, 317.0, 287.0, 295.0, 149.0, 145.0, 291.0, 288.0, 290.0, 289.0, 288.0, 291.0, 287.0, 295.0, 313.0, 317.0, 311.0, 316.0, 292.0, 287.0, 318.0, 321.0, 296.0, 291.0, 285.0, 294.0, 294.0, 296.0, 285.0, 297.0, 285.0, 291.0, 289.0, 295.0, 315.0, 315.0, 291.0, 291.0, 295.0, 292.0, 291.0, 291.0, 291.0, 282.0, 284.0, 298.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7025833578758661, "mean_inference_ms": 1.2572199040666918, "mean_action_processing_ms": 0.13443044709578586, "mean_env_wait_ms": 0.8454600698531514, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 584.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 292.01}, "hist_stats": {"episode_reward": [582.0, 576.0, 582.0, 584.0, 639.0, 576.0, 573.0, 573.0, 633.0, 582.0, 636.0, 582.0, 582.0, 587.0, 579.0, 582.0, 582.0, 579.0, 627.0, 633.0, 576.0, 579.0, 561.0, 582.0, 582.0, 633.0, 582.0, 627.0, 573.0, 180.0, 582.0, 579.0, 624.0, 576.0, 582.0, 582.0, 570.0, 579.0, 587.0, 582.0, 630.0, 627.0, 573.0, 636.0, 636.0, 582.0, 630.0, 465.0, 582.0, 579.0, 639.0, 579.0, 579.0, 530.0, 579.0, 633.0, 582.0, 630.0, 582.0, 582.0, 582.0, 627.0, 584.0, 570.0, 579.0, 633.0, 582.0, 582.0, 584.0, 593.0, 582.0, 630.0, 579.0, 579.0, 579.0, 582.0, 627.0, 582.0, 294.0, 579.0, 579.0, 579.0, 582.0, 630.0, 627.0, 579.0, 639.0, 587.0, 579.0, 590.0, 582.0, 576.0, 584.0, 630.0, 582.0, 587.0, 582.0, 573.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 276.0, 300.0, 290.0, 292.0, 297.0, 287.0, 320.0, 319.0, 293.0, 283.0, 291.0, 282.0, 287.0, 286.0, 313.0, 320.0, 288.0, 294.0, 318.0, 318.0, 295.0, 287.0, 289.0, 293.0, 296.0, 291.0, 295.0, 284.0, 295.0, 287.0, 298.0, 284.0, 295.0, 284.0, 314.0, 313.0, 321.0, 312.0, 283.0, 293.0, 291.0, 288.0, 273.0, 288.0, 292.0, 290.0, 284.0, 298.0, 318.0, 315.0, 290.0, 292.0, 319.0, 308.0, 290.0, 283.0, 90.0, 90.0, 288.0, 294.0, 289.0, 290.0, 313.0, 311.0, 294.0, 282.0, 294.0, 288.0, 292.0, 290.0, 274.0, 296.0, 288.0, 291.0, 290.0, 297.0, 288.0, 294.0, 310.0, 320.0, 304.0, 323.0, 289.0, 284.0, 319.0, 317.0, 317.0, 319.0, 293.0, 289.0, 319.0, 311.0, 236.0, 229.0, 292.0, 290.0, 289.0, 290.0, 318.0, 321.0, 294.0, 285.0, 290.0, 289.0, 264.0, 266.0, 292.0, 287.0, 317.0, 316.0, 287.0, 295.0, 308.0, 322.0, 297.0, 285.0, 292.0, 290.0, 292.0, 290.0, 323.0, 304.0, 291.0, 293.0, 290.0, 280.0, 293.0, 286.0, 318.0, 315.0, 291.0, 291.0, 289.0, 293.0, 298.0, 286.0, 293.0, 300.0, 296.0, 286.0, 315.0, 315.0, 288.0, 291.0, 290.0, 289.0, 285.0, 294.0, 294.0, 288.0, 310.0, 317.0, 287.0, 295.0, 149.0, 145.0, 291.0, 288.0, 290.0, 289.0, 288.0, 291.0, 287.0, 295.0, 313.0, 317.0, 311.0, 316.0, 292.0, 287.0, 318.0, 321.0, 296.0, 291.0, 285.0, 294.0, 294.0, 296.0, 285.0, 297.0, 285.0, 291.0, 289.0, 295.0, 315.0, 315.0, 291.0, 291.0, 295.0, 292.0, 291.0, 291.0, 291.0, 282.0, 284.0, 298.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7025833578758661, "mean_inference_ms": 1.2572199040666918, "mean_action_processing_ms": 0.13443044709578586, "mean_env_wait_ms": 0.8454600698531514, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7756800, "num_agent_steps_trained": 7756800, "num_env_steps_sampled": 3878400, "num_env_steps_trained": 3878400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3878400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7756800, "timers": {"training_iteration_time_ms": 3641.816, "learn_time_ms": 1098.084, "learn_throughput": 11656.669, "synch_weights_time_ms": 11.273}, "counters": {"num_env_steps_sampled": 3878400, "num_env_steps_trained": 3878400, "num_agent_steps_sampled": 7756800, "num_agent_steps_trained": 7756800}, "done": false, "episodes_total": 9696, "training_iteration": 303, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-12", "timestamp": 1666581612, "time_this_iter_s": 3.5357253551483154, "time_total_s": 1167.8471901416779, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1167.8471901416779, "timesteps_since_restore": 0, "iterations_since_restore": 303, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 22.98, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 203.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 181.61, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.35, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.16, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.88, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.87, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.12, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.77, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.88, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.87, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.88, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.87, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -6.053224205970764e-05, "policy_loss": -0.00038139696698635817, "vf_loss": 7.4262614250183105, "vf_explained_var": 0.581686794757843, "kl": 0.002229546196758747, "entropy": 0.843519926071167, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3891200, "num_env_steps_trained": 3891200, "num_agent_steps_sampled": 7782400, "num_agent_steps_trained": 7782400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 588.41, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 294.205}, "custom_metrics": {"sparse_reward_mean": 203.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 181.61, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.35, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.16, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.88, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.87, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.12, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.77, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.88, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.87, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.88, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.87, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [624.0, 576.0, 582.0, 582.0, 570.0, 579.0, 587.0, 582.0, 630.0, 627.0, 573.0, 636.0, 636.0, 582.0, 630.0, 465.0, 582.0, 579.0, 639.0, 579.0, 579.0, 530.0, 579.0, 633.0, 582.0, 630.0, 582.0, 582.0, 582.0, 627.0, 584.0, 570.0, 579.0, 633.0, 582.0, 582.0, 584.0, 593.0, 582.0, 630.0, 579.0, 579.0, 579.0, 582.0, 627.0, 582.0, 294.0, 579.0, 579.0, 579.0, 582.0, 630.0, 627.0, 579.0, 639.0, 587.0, 579.0, 590.0, 582.0, 576.0, 584.0, 630.0, 582.0, 587.0, 582.0, 573.0, 582.0, 582.0, 627.0, 582.0, 579.0, 579.0, 584.0, 576.0, 630.0, 581.0, 579.0, 630.0, 582.0, 633.0, 587.0, 584.0, 576.0, 536.0, 582.0, 576.0, 582.0, 627.0, 624.0, 630.0, 576.0, 587.0, 569.0, 579.0, 582.0, 576.0, 630.0, 579.0, 573.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 311.0, 294.0, 282.0, 294.0, 288.0, 292.0, 290.0, 274.0, 296.0, 288.0, 291.0, 290.0, 297.0, 288.0, 294.0, 310.0, 320.0, 304.0, 323.0, 289.0, 284.0, 319.0, 317.0, 317.0, 319.0, 293.0, 289.0, 319.0, 311.0, 236.0, 229.0, 292.0, 290.0, 289.0, 290.0, 318.0, 321.0, 294.0, 285.0, 290.0, 289.0, 264.0, 266.0, 292.0, 287.0, 317.0, 316.0, 287.0, 295.0, 308.0, 322.0, 297.0, 285.0, 292.0, 290.0, 292.0, 290.0, 323.0, 304.0, 291.0, 293.0, 290.0, 280.0, 293.0, 286.0, 318.0, 315.0, 291.0, 291.0, 289.0, 293.0, 298.0, 286.0, 293.0, 300.0, 296.0, 286.0, 315.0, 315.0, 288.0, 291.0, 290.0, 289.0, 285.0, 294.0, 294.0, 288.0, 310.0, 317.0, 287.0, 295.0, 149.0, 145.0, 291.0, 288.0, 290.0, 289.0, 288.0, 291.0, 287.0, 295.0, 313.0, 317.0, 311.0, 316.0, 292.0, 287.0, 318.0, 321.0, 296.0, 291.0, 285.0, 294.0, 294.0, 296.0, 285.0, 297.0, 285.0, 291.0, 289.0, 295.0, 315.0, 315.0, 291.0, 291.0, 295.0, 292.0, 291.0, 291.0, 291.0, 282.0, 284.0, 298.0, 291.0, 291.0, 313.0, 314.0, 289.0, 293.0, 290.0, 289.0, 287.0, 292.0, 291.0, 293.0, 289.0, 287.0, 315.0, 315.0, 285.0, 296.0, 292.0, 287.0, 314.0, 316.0, 293.0, 289.0, 320.0, 313.0, 287.0, 300.0, 288.0, 296.0, 283.0, 293.0, 267.0, 269.0, 287.0, 295.0, 288.0, 288.0, 290.0, 292.0, 317.0, 310.0, 304.0, 320.0, 318.0, 312.0, 290.0, 286.0, 295.0, 292.0, 275.0, 294.0, 285.0, 294.0, 292.0, 290.0, 289.0, 287.0, 306.0, 324.0, 292.0, 287.0, 289.0, 284.0, 316.0, 311.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7025108781361386, "mean_inference_ms": 1.2569913037664557, "mean_action_processing_ms": 0.1344181305836977, "mean_env_wait_ms": 0.8453296828003616, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 588.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 294.205}, "hist_stats": {"episode_reward": [624.0, 576.0, 582.0, 582.0, 570.0, 579.0, 587.0, 582.0, 630.0, 627.0, 573.0, 636.0, 636.0, 582.0, 630.0, 465.0, 582.0, 579.0, 639.0, 579.0, 579.0, 530.0, 579.0, 633.0, 582.0, 630.0, 582.0, 582.0, 582.0, 627.0, 584.0, 570.0, 579.0, 633.0, 582.0, 582.0, 584.0, 593.0, 582.0, 630.0, 579.0, 579.0, 579.0, 582.0, 627.0, 582.0, 294.0, 579.0, 579.0, 579.0, 582.0, 630.0, 627.0, 579.0, 639.0, 587.0, 579.0, 590.0, 582.0, 576.0, 584.0, 630.0, 582.0, 587.0, 582.0, 573.0, 582.0, 582.0, 627.0, 582.0, 579.0, 579.0, 584.0, 576.0, 630.0, 581.0, 579.0, 630.0, 582.0, 633.0, 587.0, 584.0, 576.0, 536.0, 582.0, 576.0, 582.0, 627.0, 624.0, 630.0, 576.0, 587.0, 569.0, 579.0, 582.0, 576.0, 630.0, 579.0, 573.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 311.0, 294.0, 282.0, 294.0, 288.0, 292.0, 290.0, 274.0, 296.0, 288.0, 291.0, 290.0, 297.0, 288.0, 294.0, 310.0, 320.0, 304.0, 323.0, 289.0, 284.0, 319.0, 317.0, 317.0, 319.0, 293.0, 289.0, 319.0, 311.0, 236.0, 229.0, 292.0, 290.0, 289.0, 290.0, 318.0, 321.0, 294.0, 285.0, 290.0, 289.0, 264.0, 266.0, 292.0, 287.0, 317.0, 316.0, 287.0, 295.0, 308.0, 322.0, 297.0, 285.0, 292.0, 290.0, 292.0, 290.0, 323.0, 304.0, 291.0, 293.0, 290.0, 280.0, 293.0, 286.0, 318.0, 315.0, 291.0, 291.0, 289.0, 293.0, 298.0, 286.0, 293.0, 300.0, 296.0, 286.0, 315.0, 315.0, 288.0, 291.0, 290.0, 289.0, 285.0, 294.0, 294.0, 288.0, 310.0, 317.0, 287.0, 295.0, 149.0, 145.0, 291.0, 288.0, 290.0, 289.0, 288.0, 291.0, 287.0, 295.0, 313.0, 317.0, 311.0, 316.0, 292.0, 287.0, 318.0, 321.0, 296.0, 291.0, 285.0, 294.0, 294.0, 296.0, 285.0, 297.0, 285.0, 291.0, 289.0, 295.0, 315.0, 315.0, 291.0, 291.0, 295.0, 292.0, 291.0, 291.0, 291.0, 282.0, 284.0, 298.0, 291.0, 291.0, 313.0, 314.0, 289.0, 293.0, 290.0, 289.0, 287.0, 292.0, 291.0, 293.0, 289.0, 287.0, 315.0, 315.0, 285.0, 296.0, 292.0, 287.0, 314.0, 316.0, 293.0, 289.0, 320.0, 313.0, 287.0, 300.0, 288.0, 296.0, 283.0, 293.0, 267.0, 269.0, 287.0, 295.0, 288.0, 288.0, 290.0, 292.0, 317.0, 310.0, 304.0, 320.0, 318.0, 312.0, 290.0, 286.0, 295.0, 292.0, 275.0, 294.0, 285.0, 294.0, 292.0, 290.0, 289.0, 287.0, 306.0, 324.0, 292.0, 287.0, 289.0, 284.0, 316.0, 311.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7025108781361386, "mean_inference_ms": 1.2569913037664557, "mean_action_processing_ms": 0.1344181305836977, "mean_env_wait_ms": 0.8453296828003616, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7782400, "num_agent_steps_trained": 7782400, "num_env_steps_sampled": 3891200, "num_env_steps_trained": 3891200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3891200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7782400, "timers": {"training_iteration_time_ms": 3636.039, "learn_time_ms": 1086.337, "learn_throughput": 11782.72, "synch_weights_time_ms": 11.233}, "counters": {"num_env_steps_sampled": 3891200, "num_env_steps_trained": 3891200, "num_agent_steps_sampled": 7782400, "num_agent_steps_trained": 7782400}, "done": false, "episodes_total": 9728, "training_iteration": 304, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-15", "timestamp": 1666581615, "time_this_iter_s": 3.5915231704711914, "time_total_s": 1171.438713312149, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1171.438713312149, "timesteps_since_restore": 0, "iterations_since_restore": 304, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.979999999999997, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 203.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 181.91, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.69, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.03, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.51, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.85, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.24, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.6, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.29, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.82, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.24, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.6, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.24, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.6, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005292992573231459, "policy_loss": 0.00019212259212508798, "vf_loss": 7.56189489364624, "vf_explained_var": 0.5613787174224854, "kl": 0.0021602341439574957, "entropy": 0.8380213379859924, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3904000, "num_env_steps_trained": 3904000, "num_agent_steps_sampled": 7808000, "num_agent_steps_trained": 7808000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 588.71, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 294.355}, "custom_metrics": {"sparse_reward_mean": 203.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 181.91, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.69, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.03, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.51, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.85, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.24, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.6, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.29, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.82, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.24, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.6, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.24, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.6, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 633.0, 582.0, 582.0, 584.0, 593.0, 582.0, 630.0, 579.0, 579.0, 579.0, 582.0, 627.0, 582.0, 294.0, 579.0, 579.0, 579.0, 582.0, 630.0, 627.0, 579.0, 639.0, 587.0, 579.0, 590.0, 582.0, 576.0, 584.0, 630.0, 582.0, 587.0, 582.0, 573.0, 582.0, 582.0, 627.0, 582.0, 579.0, 579.0, 584.0, 576.0, 630.0, 581.0, 579.0, 630.0, 582.0, 633.0, 587.0, 584.0, 576.0, 536.0, 582.0, 576.0, 582.0, 627.0, 624.0, 630.0, 576.0, 587.0, 569.0, 579.0, 582.0, 576.0, 630.0, 579.0, 573.0, 627.0, 576.0, 522.0, 582.0, 576.0, 576.0, 630.0, 579.0, 579.0, 579.0, 636.0, 570.0, 636.0, 633.0, 582.0, 576.0, 582.0, 582.0, 636.0, 587.0, 582.0, 582.0, 630.0, 579.0, 573.0, 636.0, 587.0, 630.0, 582.0, 582.0, 587.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 318.0, 315.0, 291.0, 291.0, 289.0, 293.0, 298.0, 286.0, 293.0, 300.0, 296.0, 286.0, 315.0, 315.0, 288.0, 291.0, 290.0, 289.0, 285.0, 294.0, 294.0, 288.0, 310.0, 317.0, 287.0, 295.0, 149.0, 145.0, 291.0, 288.0, 290.0, 289.0, 288.0, 291.0, 287.0, 295.0, 313.0, 317.0, 311.0, 316.0, 292.0, 287.0, 318.0, 321.0, 296.0, 291.0, 285.0, 294.0, 294.0, 296.0, 285.0, 297.0, 285.0, 291.0, 289.0, 295.0, 315.0, 315.0, 291.0, 291.0, 295.0, 292.0, 291.0, 291.0, 291.0, 282.0, 284.0, 298.0, 291.0, 291.0, 313.0, 314.0, 289.0, 293.0, 290.0, 289.0, 287.0, 292.0, 291.0, 293.0, 289.0, 287.0, 315.0, 315.0, 285.0, 296.0, 292.0, 287.0, 314.0, 316.0, 293.0, 289.0, 320.0, 313.0, 287.0, 300.0, 288.0, 296.0, 283.0, 293.0, 267.0, 269.0, 287.0, 295.0, 288.0, 288.0, 290.0, 292.0, 317.0, 310.0, 304.0, 320.0, 318.0, 312.0, 290.0, 286.0, 295.0, 292.0, 275.0, 294.0, 285.0, 294.0, 292.0, 290.0, 289.0, 287.0, 306.0, 324.0, 292.0, 287.0, 289.0, 284.0, 316.0, 311.0, 287.0, 289.0, 256.0, 266.0, 288.0, 294.0, 293.0, 283.0, 287.0, 289.0, 310.0, 320.0, 291.0, 288.0, 291.0, 288.0, 292.0, 287.0, 316.0, 320.0, 281.0, 289.0, 317.0, 319.0, 316.0, 317.0, 294.0, 288.0, 282.0, 294.0, 287.0, 295.0, 288.0, 294.0, 320.0, 316.0, 296.0, 291.0, 286.0, 296.0, 292.0, 290.0, 319.0, 311.0, 293.0, 286.0, 283.0, 290.0, 319.0, 317.0, 289.0, 298.0, 314.0, 316.0, 288.0, 294.0, 296.0, 286.0, 295.0, 292.0, 288.0, 294.0, 288.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7024161841210586, "mean_inference_ms": 1.2567649124208453, "mean_action_processing_ms": 0.1344072531232298, "mean_env_wait_ms": 0.845211433040442, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 588.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 294.355}, "hist_stats": {"episode_reward": [579.0, 633.0, 582.0, 582.0, 584.0, 593.0, 582.0, 630.0, 579.0, 579.0, 579.0, 582.0, 627.0, 582.0, 294.0, 579.0, 579.0, 579.0, 582.0, 630.0, 627.0, 579.0, 639.0, 587.0, 579.0, 590.0, 582.0, 576.0, 584.0, 630.0, 582.0, 587.0, 582.0, 573.0, 582.0, 582.0, 627.0, 582.0, 579.0, 579.0, 584.0, 576.0, 630.0, 581.0, 579.0, 630.0, 582.0, 633.0, 587.0, 584.0, 576.0, 536.0, 582.0, 576.0, 582.0, 627.0, 624.0, 630.0, 576.0, 587.0, 569.0, 579.0, 582.0, 576.0, 630.0, 579.0, 573.0, 627.0, 576.0, 522.0, 582.0, 576.0, 576.0, 630.0, 579.0, 579.0, 579.0, 636.0, 570.0, 636.0, 633.0, 582.0, 576.0, 582.0, 582.0, 636.0, 587.0, 582.0, 582.0, 630.0, 579.0, 573.0, 636.0, 587.0, 630.0, 582.0, 582.0, 587.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 318.0, 315.0, 291.0, 291.0, 289.0, 293.0, 298.0, 286.0, 293.0, 300.0, 296.0, 286.0, 315.0, 315.0, 288.0, 291.0, 290.0, 289.0, 285.0, 294.0, 294.0, 288.0, 310.0, 317.0, 287.0, 295.0, 149.0, 145.0, 291.0, 288.0, 290.0, 289.0, 288.0, 291.0, 287.0, 295.0, 313.0, 317.0, 311.0, 316.0, 292.0, 287.0, 318.0, 321.0, 296.0, 291.0, 285.0, 294.0, 294.0, 296.0, 285.0, 297.0, 285.0, 291.0, 289.0, 295.0, 315.0, 315.0, 291.0, 291.0, 295.0, 292.0, 291.0, 291.0, 291.0, 282.0, 284.0, 298.0, 291.0, 291.0, 313.0, 314.0, 289.0, 293.0, 290.0, 289.0, 287.0, 292.0, 291.0, 293.0, 289.0, 287.0, 315.0, 315.0, 285.0, 296.0, 292.0, 287.0, 314.0, 316.0, 293.0, 289.0, 320.0, 313.0, 287.0, 300.0, 288.0, 296.0, 283.0, 293.0, 267.0, 269.0, 287.0, 295.0, 288.0, 288.0, 290.0, 292.0, 317.0, 310.0, 304.0, 320.0, 318.0, 312.0, 290.0, 286.0, 295.0, 292.0, 275.0, 294.0, 285.0, 294.0, 292.0, 290.0, 289.0, 287.0, 306.0, 324.0, 292.0, 287.0, 289.0, 284.0, 316.0, 311.0, 287.0, 289.0, 256.0, 266.0, 288.0, 294.0, 293.0, 283.0, 287.0, 289.0, 310.0, 320.0, 291.0, 288.0, 291.0, 288.0, 292.0, 287.0, 316.0, 320.0, 281.0, 289.0, 317.0, 319.0, 316.0, 317.0, 294.0, 288.0, 282.0, 294.0, 287.0, 295.0, 288.0, 294.0, 320.0, 316.0, 296.0, 291.0, 286.0, 296.0, 292.0, 290.0, 319.0, 311.0, 293.0, 286.0, 283.0, 290.0, 319.0, 317.0, 289.0, 298.0, 314.0, 316.0, 288.0, 294.0, 296.0, 286.0, 295.0, 292.0, 288.0, 294.0, 288.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7024161841210586, "mean_inference_ms": 1.2567649124208453, "mean_action_processing_ms": 0.1344072531232298, "mean_env_wait_ms": 0.845211433040442, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7808000, "num_agent_steps_trained": 7808000, "num_env_steps_sampled": 3904000, "num_env_steps_trained": 3904000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3904000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7808000, "timers": {"training_iteration_time_ms": 3619.018, "learn_time_ms": 1074.683, "learn_throughput": 11910.493, "synch_weights_time_ms": 10.387}, "counters": {"num_env_steps_sampled": 3904000, "num_env_steps_trained": 3904000, "num_agent_steps_sampled": 7808000, "num_agent_steps_trained": 7808000}, "done": false, "episodes_total": 9760, "training_iteration": 305, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-19", "timestamp": 1666581619, "time_this_iter_s": 3.606945037841797, "time_total_s": 1175.0456583499908, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1175.0456583499908, "timesteps_since_restore": 0, "iterations_since_restore": 305, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.616666666666664, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 204.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.85, "shaped_reward_min": 156, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.49, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.42, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.14, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.9, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.9, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.9, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0010284465970471501, "policy_loss": 0.0006863062153570354, "vf_loss": 7.621923446655273, "vf_explained_var": 0.5728992223739624, "kl": 0.0019947909750044346, "entropy": 0.8400977849960327, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3916800, "num_env_steps_trained": 3916800, "num_agent_steps_sampled": 7833600, "num_agent_steps_trained": 7833600}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 516.0, "episode_reward_mean": 591.05, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 295.525}, "custom_metrics": {"sparse_reward_mean": 204.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.85, "shaped_reward_min": 156, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.49, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.42, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.14, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.9, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.9, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.9, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 573.0, 582.0, 582.0, 627.0, 582.0, 579.0, 579.0, 584.0, 576.0, 630.0, 581.0, 579.0, 630.0, 582.0, 633.0, 587.0, 584.0, 576.0, 536.0, 582.0, 576.0, 582.0, 627.0, 624.0, 630.0, 576.0, 587.0, 569.0, 579.0, 582.0, 576.0, 630.0, 579.0, 573.0, 627.0, 576.0, 522.0, 582.0, 576.0, 576.0, 630.0, 579.0, 579.0, 579.0, 636.0, 570.0, 636.0, 633.0, 582.0, 576.0, 582.0, 582.0, 636.0, 587.0, 582.0, 582.0, 630.0, 579.0, 573.0, 636.0, 587.0, 630.0, 582.0, 582.0, 587.0, 582.0, 579.0, 587.0, 582.0, 582.0, 573.0, 579.0, 570.0, 627.0, 633.0, 636.0, 636.0, 579.0, 573.0, 584.0, 627.0, 582.0, 516.0, 576.0, 579.0, 627.0, 633.0, 570.0, 630.0, 627.0, 570.0, 579.0, 579.0, 576.0, 582.0, 584.0, 579.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 291.0, 282.0, 284.0, 298.0, 291.0, 291.0, 313.0, 314.0, 289.0, 293.0, 290.0, 289.0, 287.0, 292.0, 291.0, 293.0, 289.0, 287.0, 315.0, 315.0, 285.0, 296.0, 292.0, 287.0, 314.0, 316.0, 293.0, 289.0, 320.0, 313.0, 287.0, 300.0, 288.0, 296.0, 283.0, 293.0, 267.0, 269.0, 287.0, 295.0, 288.0, 288.0, 290.0, 292.0, 317.0, 310.0, 304.0, 320.0, 318.0, 312.0, 290.0, 286.0, 295.0, 292.0, 275.0, 294.0, 285.0, 294.0, 292.0, 290.0, 289.0, 287.0, 306.0, 324.0, 292.0, 287.0, 289.0, 284.0, 316.0, 311.0, 287.0, 289.0, 256.0, 266.0, 288.0, 294.0, 293.0, 283.0, 287.0, 289.0, 310.0, 320.0, 291.0, 288.0, 291.0, 288.0, 292.0, 287.0, 316.0, 320.0, 281.0, 289.0, 317.0, 319.0, 316.0, 317.0, 294.0, 288.0, 282.0, 294.0, 287.0, 295.0, 288.0, 294.0, 320.0, 316.0, 296.0, 291.0, 286.0, 296.0, 292.0, 290.0, 319.0, 311.0, 293.0, 286.0, 283.0, 290.0, 319.0, 317.0, 289.0, 298.0, 314.0, 316.0, 288.0, 294.0, 296.0, 286.0, 295.0, 292.0, 288.0, 294.0, 288.0, 291.0, 293.0, 294.0, 289.0, 293.0, 289.0, 293.0, 285.0, 288.0, 288.0, 291.0, 282.0, 288.0, 320.0, 307.0, 310.0, 323.0, 316.0, 320.0, 317.0, 319.0, 286.0, 293.0, 287.0, 286.0, 286.0, 298.0, 312.0, 315.0, 291.0, 291.0, 258.0, 258.0, 284.0, 292.0, 286.0, 293.0, 317.0, 310.0, 312.0, 321.0, 284.0, 286.0, 310.0, 320.0, 310.0, 317.0, 287.0, 283.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0, 287.0, 295.0, 287.0, 297.0, 289.0, 290.0, 293.0, 286.0, 280.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7023278356550204, "mean_inference_ms": 1.2565710368914762, "mean_action_processing_ms": 0.13440078245615697, "mean_env_wait_ms": 0.8451295423921351, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 516.0, "episode_reward_mean": 591.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 295.525}, "hist_stats": {"episode_reward": [582.0, 573.0, 582.0, 582.0, 627.0, 582.0, 579.0, 579.0, 584.0, 576.0, 630.0, 581.0, 579.0, 630.0, 582.0, 633.0, 587.0, 584.0, 576.0, 536.0, 582.0, 576.0, 582.0, 627.0, 624.0, 630.0, 576.0, 587.0, 569.0, 579.0, 582.0, 576.0, 630.0, 579.0, 573.0, 627.0, 576.0, 522.0, 582.0, 576.0, 576.0, 630.0, 579.0, 579.0, 579.0, 636.0, 570.0, 636.0, 633.0, 582.0, 576.0, 582.0, 582.0, 636.0, 587.0, 582.0, 582.0, 630.0, 579.0, 573.0, 636.0, 587.0, 630.0, 582.0, 582.0, 587.0, 582.0, 579.0, 587.0, 582.0, 582.0, 573.0, 579.0, 570.0, 627.0, 633.0, 636.0, 636.0, 579.0, 573.0, 584.0, 627.0, 582.0, 516.0, 576.0, 579.0, 627.0, 633.0, 570.0, 630.0, 627.0, 570.0, 579.0, 579.0, 576.0, 582.0, 584.0, 579.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 291.0, 282.0, 284.0, 298.0, 291.0, 291.0, 313.0, 314.0, 289.0, 293.0, 290.0, 289.0, 287.0, 292.0, 291.0, 293.0, 289.0, 287.0, 315.0, 315.0, 285.0, 296.0, 292.0, 287.0, 314.0, 316.0, 293.0, 289.0, 320.0, 313.0, 287.0, 300.0, 288.0, 296.0, 283.0, 293.0, 267.0, 269.0, 287.0, 295.0, 288.0, 288.0, 290.0, 292.0, 317.0, 310.0, 304.0, 320.0, 318.0, 312.0, 290.0, 286.0, 295.0, 292.0, 275.0, 294.0, 285.0, 294.0, 292.0, 290.0, 289.0, 287.0, 306.0, 324.0, 292.0, 287.0, 289.0, 284.0, 316.0, 311.0, 287.0, 289.0, 256.0, 266.0, 288.0, 294.0, 293.0, 283.0, 287.0, 289.0, 310.0, 320.0, 291.0, 288.0, 291.0, 288.0, 292.0, 287.0, 316.0, 320.0, 281.0, 289.0, 317.0, 319.0, 316.0, 317.0, 294.0, 288.0, 282.0, 294.0, 287.0, 295.0, 288.0, 294.0, 320.0, 316.0, 296.0, 291.0, 286.0, 296.0, 292.0, 290.0, 319.0, 311.0, 293.0, 286.0, 283.0, 290.0, 319.0, 317.0, 289.0, 298.0, 314.0, 316.0, 288.0, 294.0, 296.0, 286.0, 295.0, 292.0, 288.0, 294.0, 288.0, 291.0, 293.0, 294.0, 289.0, 293.0, 289.0, 293.0, 285.0, 288.0, 288.0, 291.0, 282.0, 288.0, 320.0, 307.0, 310.0, 323.0, 316.0, 320.0, 317.0, 319.0, 286.0, 293.0, 287.0, 286.0, 286.0, 298.0, 312.0, 315.0, 291.0, 291.0, 258.0, 258.0, 284.0, 292.0, 286.0, 293.0, 317.0, 310.0, 312.0, 321.0, 284.0, 286.0, 310.0, 320.0, 310.0, 317.0, 287.0, 283.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0, 287.0, 295.0, 287.0, 297.0, 289.0, 290.0, 293.0, 286.0, 280.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7023278356550204, "mean_inference_ms": 1.2565710368914762, "mean_action_processing_ms": 0.13440078245615697, "mean_env_wait_ms": 0.8451295423921351, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7833600, "num_agent_steps_trained": 7833600, "num_env_steps_sampled": 3916800, "num_env_steps_trained": 3916800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3916800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7833600, "timers": {"training_iteration_time_ms": 3558.221, "learn_time_ms": 1059.646, "learn_throughput": 12079.502, "synch_weights_time_ms": 10.299}, "counters": {"num_env_steps_sampled": 3916800, "num_env_steps_trained": 3916800, "num_agent_steps_sampled": 7833600, "num_agent_steps_trained": 7833600}, "done": false, "episodes_total": 9792, "training_iteration": 306, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-23", "timestamp": 1666581623, "time_this_iter_s": 3.6415607929229736, "time_total_s": 1178.6872191429138, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1178.6872191429138, "timesteps_since_restore": 0, "iterations_since_restore": 306, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.16, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 203.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 181.61, "shaped_reward_min": 71, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.39, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.13, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.24, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.89, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.92, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.75, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.81, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.71, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.67, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.89, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.92, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.89, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.92, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0003629318671301007, "policy_loss": 2.1011597709730268e-05, "vf_loss": 7.619492530822754, "vf_explained_var": 0.5936301946640015, "kl": 0.002933789975941181, "entropy": 0.8400548696517944, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3929600, "num_env_steps_trained": 3929600, "num_agent_steps_sampled": 7859200, "num_agent_steps_trained": 7859200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 191.0, "episode_reward_mean": 589.21, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 91.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 294.605}, "custom_metrics": {"sparse_reward_mean": 203.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 181.61, "shaped_reward_min": 71, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.39, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.13, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.24, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.89, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.92, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.75, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.81, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.71, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.67, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.89, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.92, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.89, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.92, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 579.0, 573.0, 627.0, 576.0, 522.0, 582.0, 576.0, 576.0, 630.0, 579.0, 579.0, 579.0, 636.0, 570.0, 636.0, 633.0, 582.0, 576.0, 582.0, 582.0, 636.0, 587.0, 582.0, 582.0, 630.0, 579.0, 573.0, 636.0, 587.0, 630.0, 582.0, 582.0, 587.0, 582.0, 579.0, 587.0, 582.0, 582.0, 573.0, 579.0, 570.0, 627.0, 633.0, 636.0, 636.0, 579.0, 573.0, 584.0, 627.0, 582.0, 516.0, 576.0, 579.0, 627.0, 633.0, 570.0, 630.0, 627.0, 570.0, 579.0, 579.0, 576.0, 582.0, 584.0, 579.0, 579.0, 576.0, 633.0, 582.0, 639.0, 639.0, 579.0, 582.0, 633.0, 624.0, 579.0, 584.0, 582.0, 582.0, 582.0, 462.0, 633.0, 582.0, 582.0, 636.0, 576.0, 584.0, 633.0, 593.0, 627.0, 191.0, 579.0, 639.0, 576.0, 579.0, 582.0, 579.0, 633.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [306.0, 324.0, 292.0, 287.0, 289.0, 284.0, 316.0, 311.0, 287.0, 289.0, 256.0, 266.0, 288.0, 294.0, 293.0, 283.0, 287.0, 289.0, 310.0, 320.0, 291.0, 288.0, 291.0, 288.0, 292.0, 287.0, 316.0, 320.0, 281.0, 289.0, 317.0, 319.0, 316.0, 317.0, 294.0, 288.0, 282.0, 294.0, 287.0, 295.0, 288.0, 294.0, 320.0, 316.0, 296.0, 291.0, 286.0, 296.0, 292.0, 290.0, 319.0, 311.0, 293.0, 286.0, 283.0, 290.0, 319.0, 317.0, 289.0, 298.0, 314.0, 316.0, 288.0, 294.0, 296.0, 286.0, 295.0, 292.0, 288.0, 294.0, 288.0, 291.0, 293.0, 294.0, 289.0, 293.0, 289.0, 293.0, 285.0, 288.0, 288.0, 291.0, 282.0, 288.0, 320.0, 307.0, 310.0, 323.0, 316.0, 320.0, 317.0, 319.0, 286.0, 293.0, 287.0, 286.0, 286.0, 298.0, 312.0, 315.0, 291.0, 291.0, 258.0, 258.0, 284.0, 292.0, 286.0, 293.0, 317.0, 310.0, 312.0, 321.0, 284.0, 286.0, 310.0, 320.0, 310.0, 317.0, 287.0, 283.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0, 287.0, 295.0, 287.0, 297.0, 289.0, 290.0, 293.0, 286.0, 280.0, 296.0, 311.0, 322.0, 293.0, 289.0, 318.0, 321.0, 313.0, 326.0, 287.0, 292.0, 287.0, 295.0, 318.0, 315.0, 316.0, 308.0, 292.0, 287.0, 303.0, 281.0, 294.0, 288.0, 291.0, 291.0, 291.0, 291.0, 230.0, 232.0, 318.0, 315.0, 290.0, 292.0, 292.0, 290.0, 316.0, 320.0, 284.0, 292.0, 292.0, 292.0, 309.0, 324.0, 299.0, 294.0, 317.0, 310.0, 91.0, 100.0, 288.0, 291.0, 319.0, 320.0, 288.0, 288.0, 288.0, 291.0, 290.0, 292.0, 287.0, 292.0, 315.0, 318.0, 287.0, 297.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7022557630562534, "mean_inference_ms": 1.2563812177822142, "mean_action_processing_ms": 0.13439551115108503, "mean_env_wait_ms": 0.8450603332508823, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 191.0, "episode_reward_mean": 589.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 91.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 294.605}, "hist_stats": {"episode_reward": [630.0, 579.0, 573.0, 627.0, 576.0, 522.0, 582.0, 576.0, 576.0, 630.0, 579.0, 579.0, 579.0, 636.0, 570.0, 636.0, 633.0, 582.0, 576.0, 582.0, 582.0, 636.0, 587.0, 582.0, 582.0, 630.0, 579.0, 573.0, 636.0, 587.0, 630.0, 582.0, 582.0, 587.0, 582.0, 579.0, 587.0, 582.0, 582.0, 573.0, 579.0, 570.0, 627.0, 633.0, 636.0, 636.0, 579.0, 573.0, 584.0, 627.0, 582.0, 516.0, 576.0, 579.0, 627.0, 633.0, 570.0, 630.0, 627.0, 570.0, 579.0, 579.0, 576.0, 582.0, 584.0, 579.0, 579.0, 576.0, 633.0, 582.0, 639.0, 639.0, 579.0, 582.0, 633.0, 624.0, 579.0, 584.0, 582.0, 582.0, 582.0, 462.0, 633.0, 582.0, 582.0, 636.0, 576.0, 584.0, 633.0, 593.0, 627.0, 191.0, 579.0, 639.0, 576.0, 579.0, 582.0, 579.0, 633.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [306.0, 324.0, 292.0, 287.0, 289.0, 284.0, 316.0, 311.0, 287.0, 289.0, 256.0, 266.0, 288.0, 294.0, 293.0, 283.0, 287.0, 289.0, 310.0, 320.0, 291.0, 288.0, 291.0, 288.0, 292.0, 287.0, 316.0, 320.0, 281.0, 289.0, 317.0, 319.0, 316.0, 317.0, 294.0, 288.0, 282.0, 294.0, 287.0, 295.0, 288.0, 294.0, 320.0, 316.0, 296.0, 291.0, 286.0, 296.0, 292.0, 290.0, 319.0, 311.0, 293.0, 286.0, 283.0, 290.0, 319.0, 317.0, 289.0, 298.0, 314.0, 316.0, 288.0, 294.0, 296.0, 286.0, 295.0, 292.0, 288.0, 294.0, 288.0, 291.0, 293.0, 294.0, 289.0, 293.0, 289.0, 293.0, 285.0, 288.0, 288.0, 291.0, 282.0, 288.0, 320.0, 307.0, 310.0, 323.0, 316.0, 320.0, 317.0, 319.0, 286.0, 293.0, 287.0, 286.0, 286.0, 298.0, 312.0, 315.0, 291.0, 291.0, 258.0, 258.0, 284.0, 292.0, 286.0, 293.0, 317.0, 310.0, 312.0, 321.0, 284.0, 286.0, 310.0, 320.0, 310.0, 317.0, 287.0, 283.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0, 287.0, 295.0, 287.0, 297.0, 289.0, 290.0, 293.0, 286.0, 280.0, 296.0, 311.0, 322.0, 293.0, 289.0, 318.0, 321.0, 313.0, 326.0, 287.0, 292.0, 287.0, 295.0, 318.0, 315.0, 316.0, 308.0, 292.0, 287.0, 303.0, 281.0, 294.0, 288.0, 291.0, 291.0, 291.0, 291.0, 230.0, 232.0, 318.0, 315.0, 290.0, 292.0, 292.0, 290.0, 316.0, 320.0, 284.0, 292.0, 292.0, 292.0, 309.0, 324.0, 299.0, 294.0, 317.0, 310.0, 91.0, 100.0, 288.0, 291.0, 319.0, 320.0, 288.0, 288.0, 288.0, 291.0, 290.0, 292.0, 287.0, 292.0, 315.0, 318.0, 287.0, 297.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7022557630562534, "mean_inference_ms": 1.2563812177822142, "mean_action_processing_ms": 0.13439551115108503, "mean_env_wait_ms": 0.8450603332508823, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7859200, "num_agent_steps_trained": 7859200, "num_env_steps_sampled": 3929600, "num_env_steps_trained": 3929600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3929600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7859200, "timers": {"training_iteration_time_ms": 3549.51, "learn_time_ms": 1058.208, "learn_throughput": 12095.923, "synch_weights_time_ms": 10.407}, "counters": {"num_env_steps_sampled": 3929600, "num_env_steps_trained": 3929600, "num_agent_steps_sampled": 7859200, "num_agent_steps_trained": 7859200}, "done": false, "episodes_total": 9824, "training_iteration": 307, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-27", "timestamp": 1666581627, "time_this_iter_s": 3.666581630706787, "time_total_s": 1182.3538007736206, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1182.3538007736206, "timesteps_since_restore": 0, "iterations_since_restore": 307, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.04, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 204.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 182.55, "shaped_reward_min": 71, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.7, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.43, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.0, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.19, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.75, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 6.12, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.19, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.75, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.19, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.75, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.004296698607504368, "policy_loss": -0.00463396031409502, "vf_loss": 7.600736618041992, "vf_explained_var": 0.5694471597671509, "kl": 0.0021462240256369114, "entropy": 0.8456211090087891, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3942400, "num_env_steps_trained": 3942400, "num_agent_steps_sampled": 7884800, "num_agent_steps_trained": 7884800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 191.0, "episode_reward_mean": 590.95, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 91.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 295.475}, "custom_metrics": {"sparse_reward_mean": 204.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 182.55, "shaped_reward_min": 71, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.7, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.43, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.0, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.19, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.75, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 6.12, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.19, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.75, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.19, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.75, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 582.0, 579.0, 587.0, 582.0, 582.0, 573.0, 579.0, 570.0, 627.0, 633.0, 636.0, 636.0, 579.0, 573.0, 584.0, 627.0, 582.0, 516.0, 576.0, 579.0, 627.0, 633.0, 570.0, 630.0, 627.0, 570.0, 579.0, 579.0, 576.0, 582.0, 584.0, 579.0, 579.0, 576.0, 633.0, 582.0, 639.0, 639.0, 579.0, 582.0, 633.0, 624.0, 579.0, 584.0, 582.0, 582.0, 582.0, 462.0, 633.0, 582.0, 582.0, 636.0, 576.0, 584.0, 633.0, 593.0, 627.0, 191.0, 579.0, 639.0, 576.0, 579.0, 582.0, 579.0, 633.0, 584.0, 590.0, 579.0, 636.0, 576.0, 582.0, 582.0, 582.0, 582.0, 636.0, 587.0, 630.0, 579.0, 587.0, 590.0, 633.0, 633.0, 627.0, 582.0, 633.0, 630.0, 576.0, 573.0, 579.0, 639.0, 630.0, 582.0, 639.0, 587.0, 582.0, 582.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 295.0, 292.0, 288.0, 294.0, 288.0, 291.0, 293.0, 294.0, 289.0, 293.0, 289.0, 293.0, 285.0, 288.0, 288.0, 291.0, 282.0, 288.0, 320.0, 307.0, 310.0, 323.0, 316.0, 320.0, 317.0, 319.0, 286.0, 293.0, 287.0, 286.0, 286.0, 298.0, 312.0, 315.0, 291.0, 291.0, 258.0, 258.0, 284.0, 292.0, 286.0, 293.0, 317.0, 310.0, 312.0, 321.0, 284.0, 286.0, 310.0, 320.0, 310.0, 317.0, 287.0, 283.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0, 287.0, 295.0, 287.0, 297.0, 289.0, 290.0, 293.0, 286.0, 280.0, 296.0, 311.0, 322.0, 293.0, 289.0, 318.0, 321.0, 313.0, 326.0, 287.0, 292.0, 287.0, 295.0, 318.0, 315.0, 316.0, 308.0, 292.0, 287.0, 303.0, 281.0, 294.0, 288.0, 291.0, 291.0, 291.0, 291.0, 230.0, 232.0, 318.0, 315.0, 290.0, 292.0, 292.0, 290.0, 316.0, 320.0, 284.0, 292.0, 292.0, 292.0, 309.0, 324.0, 299.0, 294.0, 317.0, 310.0, 91.0, 100.0, 288.0, 291.0, 319.0, 320.0, 288.0, 288.0, 288.0, 291.0, 290.0, 292.0, 287.0, 292.0, 315.0, 318.0, 287.0, 297.0, 296.0, 294.0, 290.0, 289.0, 320.0, 316.0, 286.0, 290.0, 290.0, 292.0, 295.0, 287.0, 292.0, 290.0, 293.0, 289.0, 317.0, 319.0, 294.0, 293.0, 312.0, 318.0, 291.0, 288.0, 284.0, 303.0, 297.0, 293.0, 319.0, 314.0, 319.0, 314.0, 314.0, 313.0, 293.0, 289.0, 317.0, 316.0, 314.0, 316.0, 288.0, 288.0, 283.0, 290.0, 290.0, 289.0, 320.0, 319.0, 319.0, 311.0, 294.0, 288.0, 319.0, 320.0, 291.0, 296.0, 286.0, 296.0, 291.0, 291.0, 288.0, 291.0, 290.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7021820115704458, "mean_inference_ms": 1.25616924276832, "mean_action_processing_ms": 0.13438751298201573, "mean_env_wait_ms": 0.8449627802904494, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 191.0, "episode_reward_mean": 590.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 91.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 295.475}, "hist_stats": {"episode_reward": [582.0, 587.0, 582.0, 579.0, 587.0, 582.0, 582.0, 573.0, 579.0, 570.0, 627.0, 633.0, 636.0, 636.0, 579.0, 573.0, 584.0, 627.0, 582.0, 516.0, 576.0, 579.0, 627.0, 633.0, 570.0, 630.0, 627.0, 570.0, 579.0, 579.0, 576.0, 582.0, 584.0, 579.0, 579.0, 576.0, 633.0, 582.0, 639.0, 639.0, 579.0, 582.0, 633.0, 624.0, 579.0, 584.0, 582.0, 582.0, 582.0, 462.0, 633.0, 582.0, 582.0, 636.0, 576.0, 584.0, 633.0, 593.0, 627.0, 191.0, 579.0, 639.0, 576.0, 579.0, 582.0, 579.0, 633.0, 584.0, 590.0, 579.0, 636.0, 576.0, 582.0, 582.0, 582.0, 582.0, 636.0, 587.0, 630.0, 579.0, 587.0, 590.0, 633.0, 633.0, 627.0, 582.0, 633.0, 630.0, 576.0, 573.0, 579.0, 639.0, 630.0, 582.0, 639.0, 587.0, 582.0, 582.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 295.0, 292.0, 288.0, 294.0, 288.0, 291.0, 293.0, 294.0, 289.0, 293.0, 289.0, 293.0, 285.0, 288.0, 288.0, 291.0, 282.0, 288.0, 320.0, 307.0, 310.0, 323.0, 316.0, 320.0, 317.0, 319.0, 286.0, 293.0, 287.0, 286.0, 286.0, 298.0, 312.0, 315.0, 291.0, 291.0, 258.0, 258.0, 284.0, 292.0, 286.0, 293.0, 317.0, 310.0, 312.0, 321.0, 284.0, 286.0, 310.0, 320.0, 310.0, 317.0, 287.0, 283.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0, 287.0, 295.0, 287.0, 297.0, 289.0, 290.0, 293.0, 286.0, 280.0, 296.0, 311.0, 322.0, 293.0, 289.0, 318.0, 321.0, 313.0, 326.0, 287.0, 292.0, 287.0, 295.0, 318.0, 315.0, 316.0, 308.0, 292.0, 287.0, 303.0, 281.0, 294.0, 288.0, 291.0, 291.0, 291.0, 291.0, 230.0, 232.0, 318.0, 315.0, 290.0, 292.0, 292.0, 290.0, 316.0, 320.0, 284.0, 292.0, 292.0, 292.0, 309.0, 324.0, 299.0, 294.0, 317.0, 310.0, 91.0, 100.0, 288.0, 291.0, 319.0, 320.0, 288.0, 288.0, 288.0, 291.0, 290.0, 292.0, 287.0, 292.0, 315.0, 318.0, 287.0, 297.0, 296.0, 294.0, 290.0, 289.0, 320.0, 316.0, 286.0, 290.0, 290.0, 292.0, 295.0, 287.0, 292.0, 290.0, 293.0, 289.0, 317.0, 319.0, 294.0, 293.0, 312.0, 318.0, 291.0, 288.0, 284.0, 303.0, 297.0, 293.0, 319.0, 314.0, 319.0, 314.0, 314.0, 313.0, 293.0, 289.0, 317.0, 316.0, 314.0, 316.0, 288.0, 288.0, 283.0, 290.0, 290.0, 289.0, 320.0, 319.0, 319.0, 311.0, 294.0, 288.0, 319.0, 320.0, 291.0, 296.0, 286.0, 296.0, 291.0, 291.0, 288.0, 291.0, 290.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7021820115704458, "mean_inference_ms": 1.25616924276832, "mean_action_processing_ms": 0.13438751298201573, "mean_env_wait_ms": 0.8449627802904494, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7884800, "num_agent_steps_trained": 7884800, "num_env_steps_sampled": 3942400, "num_env_steps_trained": 3942400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3942400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7884800, "timers": {"training_iteration_time_ms": 3550.198, "learn_time_ms": 1061.666, "learn_throughput": 12056.517, "synch_weights_time_ms": 10.477}, "counters": {"num_env_steps_sampled": 3942400, "num_env_steps_trained": 3942400, "num_agent_steps_sampled": 7884800, "num_agent_steps_trained": 7884800}, "done": false, "episodes_total": 9856, "training_iteration": 308, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-30", "timestamp": 1666581630, "time_this_iter_s": 3.5699198246002197, "time_total_s": 1185.9237205982208, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1185.9237205982208, "timesteps_since_restore": 0, "iterations_since_restore": 308, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.51666666666667, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 205.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 183.47, "shaped_reward_min": 71, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.13, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.08, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.94, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.82, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.6, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 17.55, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.6, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 17.55, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.6, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 17.55, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002151058055460453, "policy_loss": 0.0018065437907353044, "vf_loss": 7.667585372924805, "vf_explained_var": 0.560869574546814, "kl": 0.0021857996471226215, "entropy": 0.8444837331771851, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3955200, "num_env_steps_trained": 3955200, "num_agent_steps_sampled": 7910400, "num_agent_steps_trained": 7910400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 191.0, "episode_reward_mean": 593.87, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 91.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 296.935}, "custom_metrics": {"sparse_reward_mean": 205.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 183.47, "shaped_reward_min": 71, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.13, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.08, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.94, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.82, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.6, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 17.55, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.6, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 17.55, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.6, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 17.55, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 579.0, 579.0, 576.0, 633.0, 582.0, 639.0, 639.0, 579.0, 582.0, 633.0, 624.0, 579.0, 584.0, 582.0, 582.0, 582.0, 462.0, 633.0, 582.0, 582.0, 636.0, 576.0, 584.0, 633.0, 593.0, 627.0, 191.0, 579.0, 639.0, 576.0, 579.0, 582.0, 579.0, 633.0, 584.0, 590.0, 579.0, 636.0, 576.0, 582.0, 582.0, 582.0, 582.0, 636.0, 587.0, 630.0, 579.0, 587.0, 590.0, 633.0, 633.0, 627.0, 582.0, 633.0, 630.0, 576.0, 573.0, 579.0, 639.0, 630.0, 582.0, 639.0, 587.0, 582.0, 582.0, 579.0, 579.0, 582.0, 630.0, 630.0, 636.0, 579.0, 579.0, 587.0, 630.0, 630.0, 636.0, 579.0, 582.0, 582.0, 600.0, 630.0, 584.0, 630.0, 579.0, 579.0, 579.0, 593.0, 630.0, 576.0, 636.0, 630.0, 576.0, 584.0, 582.0, 584.0, 579.0, 570.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 297.0, 289.0, 290.0, 293.0, 286.0, 280.0, 296.0, 311.0, 322.0, 293.0, 289.0, 318.0, 321.0, 313.0, 326.0, 287.0, 292.0, 287.0, 295.0, 318.0, 315.0, 316.0, 308.0, 292.0, 287.0, 303.0, 281.0, 294.0, 288.0, 291.0, 291.0, 291.0, 291.0, 230.0, 232.0, 318.0, 315.0, 290.0, 292.0, 292.0, 290.0, 316.0, 320.0, 284.0, 292.0, 292.0, 292.0, 309.0, 324.0, 299.0, 294.0, 317.0, 310.0, 91.0, 100.0, 288.0, 291.0, 319.0, 320.0, 288.0, 288.0, 288.0, 291.0, 290.0, 292.0, 287.0, 292.0, 315.0, 318.0, 287.0, 297.0, 296.0, 294.0, 290.0, 289.0, 320.0, 316.0, 286.0, 290.0, 290.0, 292.0, 295.0, 287.0, 292.0, 290.0, 293.0, 289.0, 317.0, 319.0, 294.0, 293.0, 312.0, 318.0, 291.0, 288.0, 284.0, 303.0, 297.0, 293.0, 319.0, 314.0, 319.0, 314.0, 314.0, 313.0, 293.0, 289.0, 317.0, 316.0, 314.0, 316.0, 288.0, 288.0, 283.0, 290.0, 290.0, 289.0, 320.0, 319.0, 319.0, 311.0, 294.0, 288.0, 319.0, 320.0, 291.0, 296.0, 286.0, 296.0, 291.0, 291.0, 288.0, 291.0, 290.0, 289.0, 289.0, 293.0, 316.0, 314.0, 314.0, 316.0, 315.0, 321.0, 287.0, 292.0, 290.0, 289.0, 288.0, 299.0, 319.0, 311.0, 315.0, 315.0, 319.0, 317.0, 292.0, 287.0, 291.0, 291.0, 294.0, 288.0, 319.0, 281.0, 319.0, 311.0, 285.0, 299.0, 319.0, 311.0, 288.0, 291.0, 287.0, 292.0, 290.0, 289.0, 298.0, 295.0, 309.0, 321.0, 291.0, 285.0, 315.0, 321.0, 318.0, 312.0, 294.0, 282.0, 284.0, 300.0, 289.0, 293.0, 290.0, 294.0, 284.0, 295.0, 286.0, 284.0, 314.0, 319.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.702118067809688, "mean_inference_ms": 1.2559595497726916, "mean_action_processing_ms": 0.13437893028732162, "mean_env_wait_ms": 0.8448680672814765, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 191.0, "episode_reward_mean": 593.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 91.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 296.935}, "hist_stats": {"episode_reward": [584.0, 579.0, 579.0, 576.0, 633.0, 582.0, 639.0, 639.0, 579.0, 582.0, 633.0, 624.0, 579.0, 584.0, 582.0, 582.0, 582.0, 462.0, 633.0, 582.0, 582.0, 636.0, 576.0, 584.0, 633.0, 593.0, 627.0, 191.0, 579.0, 639.0, 576.0, 579.0, 582.0, 579.0, 633.0, 584.0, 590.0, 579.0, 636.0, 576.0, 582.0, 582.0, 582.0, 582.0, 636.0, 587.0, 630.0, 579.0, 587.0, 590.0, 633.0, 633.0, 627.0, 582.0, 633.0, 630.0, 576.0, 573.0, 579.0, 639.0, 630.0, 582.0, 639.0, 587.0, 582.0, 582.0, 579.0, 579.0, 582.0, 630.0, 630.0, 636.0, 579.0, 579.0, 587.0, 630.0, 630.0, 636.0, 579.0, 582.0, 582.0, 600.0, 630.0, 584.0, 630.0, 579.0, 579.0, 579.0, 593.0, 630.0, 576.0, 636.0, 630.0, 576.0, 584.0, 582.0, 584.0, 579.0, 570.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 297.0, 289.0, 290.0, 293.0, 286.0, 280.0, 296.0, 311.0, 322.0, 293.0, 289.0, 318.0, 321.0, 313.0, 326.0, 287.0, 292.0, 287.0, 295.0, 318.0, 315.0, 316.0, 308.0, 292.0, 287.0, 303.0, 281.0, 294.0, 288.0, 291.0, 291.0, 291.0, 291.0, 230.0, 232.0, 318.0, 315.0, 290.0, 292.0, 292.0, 290.0, 316.0, 320.0, 284.0, 292.0, 292.0, 292.0, 309.0, 324.0, 299.0, 294.0, 317.0, 310.0, 91.0, 100.0, 288.0, 291.0, 319.0, 320.0, 288.0, 288.0, 288.0, 291.0, 290.0, 292.0, 287.0, 292.0, 315.0, 318.0, 287.0, 297.0, 296.0, 294.0, 290.0, 289.0, 320.0, 316.0, 286.0, 290.0, 290.0, 292.0, 295.0, 287.0, 292.0, 290.0, 293.0, 289.0, 317.0, 319.0, 294.0, 293.0, 312.0, 318.0, 291.0, 288.0, 284.0, 303.0, 297.0, 293.0, 319.0, 314.0, 319.0, 314.0, 314.0, 313.0, 293.0, 289.0, 317.0, 316.0, 314.0, 316.0, 288.0, 288.0, 283.0, 290.0, 290.0, 289.0, 320.0, 319.0, 319.0, 311.0, 294.0, 288.0, 319.0, 320.0, 291.0, 296.0, 286.0, 296.0, 291.0, 291.0, 288.0, 291.0, 290.0, 289.0, 289.0, 293.0, 316.0, 314.0, 314.0, 316.0, 315.0, 321.0, 287.0, 292.0, 290.0, 289.0, 288.0, 299.0, 319.0, 311.0, 315.0, 315.0, 319.0, 317.0, 292.0, 287.0, 291.0, 291.0, 294.0, 288.0, 319.0, 281.0, 319.0, 311.0, 285.0, 299.0, 319.0, 311.0, 288.0, 291.0, 287.0, 292.0, 290.0, 289.0, 298.0, 295.0, 309.0, 321.0, 291.0, 285.0, 315.0, 321.0, 318.0, 312.0, 294.0, 282.0, 284.0, 300.0, 289.0, 293.0, 290.0, 294.0, 284.0, 295.0, 286.0, 284.0, 314.0, 319.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.702118067809688, "mean_inference_ms": 1.2559595497726916, "mean_action_processing_ms": 0.13437893028732162, "mean_env_wait_ms": 0.8448680672814765, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7910400, "num_agent_steps_trained": 7910400, "num_env_steps_sampled": 3955200, "num_env_steps_trained": 3955200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3955200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7910400, "timers": {"training_iteration_time_ms": 3550.934, "learn_time_ms": 1063.959, "learn_throughput": 12030.537, "synch_weights_time_ms": 9.979}, "counters": {"num_env_steps_sampled": 3955200, "num_env_steps_trained": 3955200, "num_agent_steps_sampled": 7910400, "num_agent_steps_trained": 7910400}, "done": false, "episodes_total": 9888, "training_iteration": 309, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-34", "timestamp": 1666581634, "time_this_iter_s": 3.6177916526794434, "time_total_s": 1189.5415122509003, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1189.5415122509003, "timesteps_since_restore": 0, "iterations_since_restore": 309, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.979999999999997, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 185.13, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.11, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.33, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.9, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.64, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.39, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.78, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.87, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.64, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.64, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0015459628775715828, "policy_loss": -0.001896196510642767, "vf_loss": 7.714743614196777, "vf_explained_var": 0.5556751489639282, "kl": 0.002252227161079645, "entropy": 0.8424784541130066, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3968000, "num_env_steps_trained": 3968000, "num_agent_steps_sampled": 7936000, "num_agent_steps_trained": 7936000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 536.0, "episode_reward_mean": 600.73, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 267.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 300.365}, "custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 185.13, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.11, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.33, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.9, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.64, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.39, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.78, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.87, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.64, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.64, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 633.0, 584.0, 590.0, 579.0, 636.0, 576.0, 582.0, 582.0, 582.0, 582.0, 636.0, 587.0, 630.0, 579.0, 587.0, 590.0, 633.0, 633.0, 627.0, 582.0, 633.0, 630.0, 576.0, 573.0, 579.0, 639.0, 630.0, 582.0, 639.0, 587.0, 582.0, 582.0, 579.0, 579.0, 582.0, 630.0, 630.0, 636.0, 579.0, 579.0, 587.0, 630.0, 630.0, 636.0, 579.0, 582.0, 582.0, 600.0, 630.0, 584.0, 630.0, 579.0, 579.0, 579.0, 593.0, 630.0, 576.0, 636.0, 630.0, 576.0, 584.0, 582.0, 584.0, 579.0, 570.0, 633.0, 582.0, 579.0, 630.0, 582.0, 630.0, 536.0, 590.0, 582.0, 579.0, 630.0, 636.0, 633.0, 582.0, 579.0, 570.0, 636.0, 627.0, 582.0, 579.0, 582.0, 576.0, 584.0, 633.0, 630.0, 627.0, 627.0, 633.0, 636.0, 579.0, 633.0, 633.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 287.0, 292.0, 315.0, 318.0, 287.0, 297.0, 296.0, 294.0, 290.0, 289.0, 320.0, 316.0, 286.0, 290.0, 290.0, 292.0, 295.0, 287.0, 292.0, 290.0, 293.0, 289.0, 317.0, 319.0, 294.0, 293.0, 312.0, 318.0, 291.0, 288.0, 284.0, 303.0, 297.0, 293.0, 319.0, 314.0, 319.0, 314.0, 314.0, 313.0, 293.0, 289.0, 317.0, 316.0, 314.0, 316.0, 288.0, 288.0, 283.0, 290.0, 290.0, 289.0, 320.0, 319.0, 319.0, 311.0, 294.0, 288.0, 319.0, 320.0, 291.0, 296.0, 286.0, 296.0, 291.0, 291.0, 288.0, 291.0, 290.0, 289.0, 289.0, 293.0, 316.0, 314.0, 314.0, 316.0, 315.0, 321.0, 287.0, 292.0, 290.0, 289.0, 288.0, 299.0, 319.0, 311.0, 315.0, 315.0, 319.0, 317.0, 292.0, 287.0, 291.0, 291.0, 294.0, 288.0, 319.0, 281.0, 319.0, 311.0, 285.0, 299.0, 319.0, 311.0, 288.0, 291.0, 287.0, 292.0, 290.0, 289.0, 298.0, 295.0, 309.0, 321.0, 291.0, 285.0, 315.0, 321.0, 318.0, 312.0, 294.0, 282.0, 284.0, 300.0, 289.0, 293.0, 290.0, 294.0, 284.0, 295.0, 286.0, 284.0, 314.0, 319.0, 294.0, 288.0, 286.0, 293.0, 319.0, 311.0, 288.0, 294.0, 314.0, 316.0, 267.0, 269.0, 289.0, 301.0, 293.0, 289.0, 288.0, 291.0, 322.0, 308.0, 319.0, 317.0, 315.0, 318.0, 298.0, 284.0, 286.0, 293.0, 288.0, 282.0, 321.0, 315.0, 312.0, 315.0, 294.0, 288.0, 289.0, 290.0, 289.0, 293.0, 293.0, 283.0, 294.0, 290.0, 315.0, 318.0, 314.0, 316.0, 317.0, 310.0, 318.0, 309.0, 313.0, 320.0, 317.0, 319.0, 284.0, 295.0, 313.0, 320.0, 320.0, 313.0, 293.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7020417621444395, "mean_inference_ms": 1.2558035576888165, "mean_action_processing_ms": 0.1343661017492229, "mean_env_wait_ms": 0.8449207548688427, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 536.0, "episode_reward_mean": 600.73, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 267.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 300.365}, "hist_stats": {"episode_reward": [582.0, 579.0, 633.0, 584.0, 590.0, 579.0, 636.0, 576.0, 582.0, 582.0, 582.0, 582.0, 636.0, 587.0, 630.0, 579.0, 587.0, 590.0, 633.0, 633.0, 627.0, 582.0, 633.0, 630.0, 576.0, 573.0, 579.0, 639.0, 630.0, 582.0, 639.0, 587.0, 582.0, 582.0, 579.0, 579.0, 582.0, 630.0, 630.0, 636.0, 579.0, 579.0, 587.0, 630.0, 630.0, 636.0, 579.0, 582.0, 582.0, 600.0, 630.0, 584.0, 630.0, 579.0, 579.0, 579.0, 593.0, 630.0, 576.0, 636.0, 630.0, 576.0, 584.0, 582.0, 584.0, 579.0, 570.0, 633.0, 582.0, 579.0, 630.0, 582.0, 630.0, 536.0, 590.0, 582.0, 579.0, 630.0, 636.0, 633.0, 582.0, 579.0, 570.0, 636.0, 627.0, 582.0, 579.0, 582.0, 576.0, 584.0, 633.0, 630.0, 627.0, 627.0, 633.0, 636.0, 579.0, 633.0, 633.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 287.0, 292.0, 315.0, 318.0, 287.0, 297.0, 296.0, 294.0, 290.0, 289.0, 320.0, 316.0, 286.0, 290.0, 290.0, 292.0, 295.0, 287.0, 292.0, 290.0, 293.0, 289.0, 317.0, 319.0, 294.0, 293.0, 312.0, 318.0, 291.0, 288.0, 284.0, 303.0, 297.0, 293.0, 319.0, 314.0, 319.0, 314.0, 314.0, 313.0, 293.0, 289.0, 317.0, 316.0, 314.0, 316.0, 288.0, 288.0, 283.0, 290.0, 290.0, 289.0, 320.0, 319.0, 319.0, 311.0, 294.0, 288.0, 319.0, 320.0, 291.0, 296.0, 286.0, 296.0, 291.0, 291.0, 288.0, 291.0, 290.0, 289.0, 289.0, 293.0, 316.0, 314.0, 314.0, 316.0, 315.0, 321.0, 287.0, 292.0, 290.0, 289.0, 288.0, 299.0, 319.0, 311.0, 315.0, 315.0, 319.0, 317.0, 292.0, 287.0, 291.0, 291.0, 294.0, 288.0, 319.0, 281.0, 319.0, 311.0, 285.0, 299.0, 319.0, 311.0, 288.0, 291.0, 287.0, 292.0, 290.0, 289.0, 298.0, 295.0, 309.0, 321.0, 291.0, 285.0, 315.0, 321.0, 318.0, 312.0, 294.0, 282.0, 284.0, 300.0, 289.0, 293.0, 290.0, 294.0, 284.0, 295.0, 286.0, 284.0, 314.0, 319.0, 294.0, 288.0, 286.0, 293.0, 319.0, 311.0, 288.0, 294.0, 314.0, 316.0, 267.0, 269.0, 289.0, 301.0, 293.0, 289.0, 288.0, 291.0, 322.0, 308.0, 319.0, 317.0, 315.0, 318.0, 298.0, 284.0, 286.0, 293.0, 288.0, 282.0, 321.0, 315.0, 312.0, 315.0, 294.0, 288.0, 289.0, 290.0, 289.0, 293.0, 293.0, 283.0, 294.0, 290.0, 315.0, 318.0, 314.0, 316.0, 317.0, 310.0, 318.0, 309.0, 313.0, 320.0, 317.0, 319.0, 284.0, 295.0, 313.0, 320.0, 320.0, 313.0, 293.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7020417621444395, "mean_inference_ms": 1.2558035576888165, "mean_action_processing_ms": 0.1343661017492229, "mean_env_wait_ms": 0.8449207548688427, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7936000, "num_agent_steps_trained": 7936000, "num_env_steps_sampled": 3968000, "num_env_steps_trained": 3968000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3968000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7936000, "timers": {"training_iteration_time_ms": 3558.356, "learn_time_ms": 1063.744, "learn_throughput": 12032.967, "synch_weights_time_ms": 10.031}, "counters": {"num_env_steps_sampled": 3968000, "num_env_steps_trained": 3968000, "num_agent_steps_sampled": 7936000, "num_agent_steps_trained": 7936000}, "done": false, "episodes_total": 9920, "training_iteration": 310, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-38", "timestamp": 1666581638, "time_this_iter_s": 3.787797212600708, "time_total_s": 1193.329309463501, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1193.329309463501, "timesteps_since_restore": 0, "iterations_since_restore": 310, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.733333333333334, "ram_util_percent": 10.616666666666665}}
+{"custom_metrics": {"sparse_reward_mean": 206.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.72, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.55, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.7, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.38, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.38, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.11, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 18.15, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.1, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.82, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.71, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.81, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.77, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.11, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 18.15, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.11, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 18.15, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.000862886430695653, "policy_loss": 0.0005224489723332226, "vf_loss": 7.662137031555176, "vf_explained_var": 0.55738765001297, "kl": 0.002454055706039071, "entropy": 0.8515514731407166, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3980800, "num_env_steps_trained": 3980800, "num_agent_steps_sampled": 7961600, "num_agent_steps_trained": 7961600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 597.32, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 298.66}, "custom_metrics": {"sparse_reward_mean": 206.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.72, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.55, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.7, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.38, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.38, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.11, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 18.15, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.1, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.82, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.71, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.81, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.77, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.11, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 18.15, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.11, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 18.15, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 579.0, 579.0, 582.0, 630.0, 630.0, 636.0, 579.0, 579.0, 587.0, 630.0, 630.0, 636.0, 579.0, 582.0, 582.0, 600.0, 630.0, 584.0, 630.0, 579.0, 579.0, 579.0, 593.0, 630.0, 576.0, 636.0, 630.0, 576.0, 584.0, 582.0, 584.0, 579.0, 570.0, 633.0, 582.0, 579.0, 630.0, 582.0, 630.0, 536.0, 590.0, 582.0, 579.0, 630.0, 636.0, 633.0, 582.0, 579.0, 570.0, 636.0, 627.0, 582.0, 579.0, 582.0, 576.0, 584.0, 633.0, 630.0, 627.0, 627.0, 633.0, 636.0, 579.0, 633.0, 633.0, 579.0, 579.0, 582.0, 587.0, 636.0, 627.0, 579.0, 570.0, 582.0, 587.0, 630.0, 582.0, 633.0, 567.0, 630.0, 576.0, 579.0, 639.0, 576.0, 587.0, 579.0, 582.0, 567.0, 630.0, 579.0, 630.0, 579.0, 538.0, 582.0, 522.0, 582.0, 627.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 296.0, 291.0, 291.0, 288.0, 291.0, 290.0, 289.0, 289.0, 293.0, 316.0, 314.0, 314.0, 316.0, 315.0, 321.0, 287.0, 292.0, 290.0, 289.0, 288.0, 299.0, 319.0, 311.0, 315.0, 315.0, 319.0, 317.0, 292.0, 287.0, 291.0, 291.0, 294.0, 288.0, 319.0, 281.0, 319.0, 311.0, 285.0, 299.0, 319.0, 311.0, 288.0, 291.0, 287.0, 292.0, 290.0, 289.0, 298.0, 295.0, 309.0, 321.0, 291.0, 285.0, 315.0, 321.0, 318.0, 312.0, 294.0, 282.0, 284.0, 300.0, 289.0, 293.0, 290.0, 294.0, 284.0, 295.0, 286.0, 284.0, 314.0, 319.0, 294.0, 288.0, 286.0, 293.0, 319.0, 311.0, 288.0, 294.0, 314.0, 316.0, 267.0, 269.0, 289.0, 301.0, 293.0, 289.0, 288.0, 291.0, 322.0, 308.0, 319.0, 317.0, 315.0, 318.0, 298.0, 284.0, 286.0, 293.0, 288.0, 282.0, 321.0, 315.0, 312.0, 315.0, 294.0, 288.0, 289.0, 290.0, 289.0, 293.0, 293.0, 283.0, 294.0, 290.0, 315.0, 318.0, 314.0, 316.0, 317.0, 310.0, 318.0, 309.0, 313.0, 320.0, 317.0, 319.0, 284.0, 295.0, 313.0, 320.0, 320.0, 313.0, 293.0, 286.0, 286.0, 293.0, 288.0, 294.0, 296.0, 291.0, 322.0, 314.0, 312.0, 315.0, 292.0, 287.0, 280.0, 290.0, 290.0, 292.0, 300.0, 287.0, 314.0, 316.0, 295.0, 287.0, 315.0, 318.0, 285.0, 282.0, 317.0, 313.0, 292.0, 284.0, 291.0, 288.0, 317.0, 322.0, 284.0, 292.0, 294.0, 293.0, 289.0, 290.0, 290.0, 292.0, 282.0, 285.0, 317.0, 313.0, 282.0, 297.0, 319.0, 311.0, 290.0, 289.0, 261.0, 277.0, 288.0, 294.0, 264.0, 258.0, 295.0, 287.0, 311.0, 316.0, 281.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7019608180315113, "mean_inference_ms": 1.255644418855455, "mean_action_processing_ms": 0.13435158068712152, "mean_env_wait_ms": 0.8449615353261167, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 597.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 298.66}, "hist_stats": {"episode_reward": [582.0, 582.0, 579.0, 579.0, 582.0, 630.0, 630.0, 636.0, 579.0, 579.0, 587.0, 630.0, 630.0, 636.0, 579.0, 582.0, 582.0, 600.0, 630.0, 584.0, 630.0, 579.0, 579.0, 579.0, 593.0, 630.0, 576.0, 636.0, 630.0, 576.0, 584.0, 582.0, 584.0, 579.0, 570.0, 633.0, 582.0, 579.0, 630.0, 582.0, 630.0, 536.0, 590.0, 582.0, 579.0, 630.0, 636.0, 633.0, 582.0, 579.0, 570.0, 636.0, 627.0, 582.0, 579.0, 582.0, 576.0, 584.0, 633.0, 630.0, 627.0, 627.0, 633.0, 636.0, 579.0, 633.0, 633.0, 579.0, 579.0, 582.0, 587.0, 636.0, 627.0, 579.0, 570.0, 582.0, 587.0, 630.0, 582.0, 633.0, 567.0, 630.0, 576.0, 579.0, 639.0, 576.0, 587.0, 579.0, 582.0, 567.0, 630.0, 579.0, 630.0, 579.0, 538.0, 582.0, 522.0, 582.0, 627.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 296.0, 291.0, 291.0, 288.0, 291.0, 290.0, 289.0, 289.0, 293.0, 316.0, 314.0, 314.0, 316.0, 315.0, 321.0, 287.0, 292.0, 290.0, 289.0, 288.0, 299.0, 319.0, 311.0, 315.0, 315.0, 319.0, 317.0, 292.0, 287.0, 291.0, 291.0, 294.0, 288.0, 319.0, 281.0, 319.0, 311.0, 285.0, 299.0, 319.0, 311.0, 288.0, 291.0, 287.0, 292.0, 290.0, 289.0, 298.0, 295.0, 309.0, 321.0, 291.0, 285.0, 315.0, 321.0, 318.0, 312.0, 294.0, 282.0, 284.0, 300.0, 289.0, 293.0, 290.0, 294.0, 284.0, 295.0, 286.0, 284.0, 314.0, 319.0, 294.0, 288.0, 286.0, 293.0, 319.0, 311.0, 288.0, 294.0, 314.0, 316.0, 267.0, 269.0, 289.0, 301.0, 293.0, 289.0, 288.0, 291.0, 322.0, 308.0, 319.0, 317.0, 315.0, 318.0, 298.0, 284.0, 286.0, 293.0, 288.0, 282.0, 321.0, 315.0, 312.0, 315.0, 294.0, 288.0, 289.0, 290.0, 289.0, 293.0, 293.0, 283.0, 294.0, 290.0, 315.0, 318.0, 314.0, 316.0, 317.0, 310.0, 318.0, 309.0, 313.0, 320.0, 317.0, 319.0, 284.0, 295.0, 313.0, 320.0, 320.0, 313.0, 293.0, 286.0, 286.0, 293.0, 288.0, 294.0, 296.0, 291.0, 322.0, 314.0, 312.0, 315.0, 292.0, 287.0, 280.0, 290.0, 290.0, 292.0, 300.0, 287.0, 314.0, 316.0, 295.0, 287.0, 315.0, 318.0, 285.0, 282.0, 317.0, 313.0, 292.0, 284.0, 291.0, 288.0, 317.0, 322.0, 284.0, 292.0, 294.0, 293.0, 289.0, 290.0, 290.0, 292.0, 282.0, 285.0, 317.0, 313.0, 282.0, 297.0, 319.0, 311.0, 290.0, 289.0, 261.0, 277.0, 288.0, 294.0, 264.0, 258.0, 295.0, 287.0, 311.0, 316.0, 281.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7019608180315113, "mean_inference_ms": 1.255644418855455, "mean_action_processing_ms": 0.13435158068712152, "mean_env_wait_ms": 0.8449615353261167, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7961600, "num_agent_steps_trained": 7961600, "num_env_steps_sampled": 3980800, "num_env_steps_trained": 3980800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3980800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7961600, "timers": {"training_iteration_time_ms": 3577.153, "learn_time_ms": 1073.814, "learn_throughput": 11920.123, "synch_weights_time_ms": 10.129}, "counters": {"num_env_steps_sampled": 3980800, "num_env_steps_trained": 3980800, "num_agent_steps_sampled": 7961600, "num_agent_steps_trained": 7961600}, "done": false, "episodes_total": 9952, "training_iteration": 311, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-42", "timestamp": 1666581642, "time_this_iter_s": 3.761920690536499, "time_total_s": 1197.0912301540375, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1197.0912301540375, "timesteps_since_restore": 0, "iterations_since_restore": 311, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.34, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 206.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.87, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.11, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.94, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.93, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 18.66, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.79, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 18.46, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.24, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.93, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.89, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.83, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.79, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 18.46, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.79, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 18.46, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001228528330102563, "policy_loss": -0.001570116844959557, "vf_loss": 7.6725358963012695, "vf_explained_var": 0.5661988854408264, "kl": 0.002121095545589924, "entropy": 0.8513274788856506, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3993600, "num_env_steps_trained": 3993600, "num_agent_steps_sampled": 7987200, "num_agent_steps_trained": 7987200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 596.67, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 298.335}, "custom_metrics": {"sparse_reward_mean": 206.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.87, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.11, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.94, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.93, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 18.66, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.79, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 18.46, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.24, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.93, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.89, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.83, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.79, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 18.46, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.79, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 18.46, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 579.0, 570.0, 633.0, 582.0, 579.0, 630.0, 582.0, 630.0, 536.0, 590.0, 582.0, 579.0, 630.0, 636.0, 633.0, 582.0, 579.0, 570.0, 636.0, 627.0, 582.0, 579.0, 582.0, 576.0, 584.0, 633.0, 630.0, 627.0, 627.0, 633.0, 636.0, 579.0, 633.0, 633.0, 579.0, 579.0, 582.0, 587.0, 636.0, 627.0, 579.0, 570.0, 582.0, 587.0, 630.0, 582.0, 633.0, 567.0, 630.0, 576.0, 579.0, 639.0, 576.0, 587.0, 579.0, 582.0, 567.0, 630.0, 579.0, 630.0, 579.0, 538.0, 582.0, 522.0, 582.0, 627.0, 573.0, 579.0, 579.0, 587.0, 584.0, 530.0, 582.0, 630.0, 582.0, 587.0, 630.0, 579.0, 582.0, 633.0, 630.0, 582.0, 636.0, 582.0, 630.0, 627.0, 579.0, 582.0, 576.0, 579.0, 579.0, 639.0, 630.0, 627.0, 582.0, 584.0, 587.0, 579.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 294.0, 284.0, 295.0, 286.0, 284.0, 314.0, 319.0, 294.0, 288.0, 286.0, 293.0, 319.0, 311.0, 288.0, 294.0, 314.0, 316.0, 267.0, 269.0, 289.0, 301.0, 293.0, 289.0, 288.0, 291.0, 322.0, 308.0, 319.0, 317.0, 315.0, 318.0, 298.0, 284.0, 286.0, 293.0, 288.0, 282.0, 321.0, 315.0, 312.0, 315.0, 294.0, 288.0, 289.0, 290.0, 289.0, 293.0, 293.0, 283.0, 294.0, 290.0, 315.0, 318.0, 314.0, 316.0, 317.0, 310.0, 318.0, 309.0, 313.0, 320.0, 317.0, 319.0, 284.0, 295.0, 313.0, 320.0, 320.0, 313.0, 293.0, 286.0, 286.0, 293.0, 288.0, 294.0, 296.0, 291.0, 322.0, 314.0, 312.0, 315.0, 292.0, 287.0, 280.0, 290.0, 290.0, 292.0, 300.0, 287.0, 314.0, 316.0, 295.0, 287.0, 315.0, 318.0, 285.0, 282.0, 317.0, 313.0, 292.0, 284.0, 291.0, 288.0, 317.0, 322.0, 284.0, 292.0, 294.0, 293.0, 289.0, 290.0, 290.0, 292.0, 282.0, 285.0, 317.0, 313.0, 282.0, 297.0, 319.0, 311.0, 290.0, 289.0, 261.0, 277.0, 288.0, 294.0, 264.0, 258.0, 295.0, 287.0, 311.0, 316.0, 281.0, 292.0, 287.0, 292.0, 285.0, 294.0, 298.0, 289.0, 291.0, 293.0, 267.0, 263.0, 288.0, 294.0, 317.0, 313.0, 288.0, 294.0, 292.0, 295.0, 316.0, 314.0, 290.0, 289.0, 294.0, 288.0, 317.0, 316.0, 311.0, 319.0, 290.0, 292.0, 321.0, 315.0, 293.0, 289.0, 306.0, 324.0, 312.0, 315.0, 294.0, 285.0, 290.0, 292.0, 290.0, 286.0, 289.0, 290.0, 296.0, 283.0, 318.0, 321.0, 312.0, 318.0, 309.0, 318.0, 286.0, 296.0, 290.0, 294.0, 291.0, 296.0, 287.0, 292.0, 311.0, 322.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7018695828111025, "mean_inference_ms": 1.255488717550174, "mean_action_processing_ms": 0.13433650614705042, "mean_env_wait_ms": 0.8449992857233162, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 596.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 298.335}, "hist_stats": {"episode_reward": [584.0, 579.0, 570.0, 633.0, 582.0, 579.0, 630.0, 582.0, 630.0, 536.0, 590.0, 582.0, 579.0, 630.0, 636.0, 633.0, 582.0, 579.0, 570.0, 636.0, 627.0, 582.0, 579.0, 582.0, 576.0, 584.0, 633.0, 630.0, 627.0, 627.0, 633.0, 636.0, 579.0, 633.0, 633.0, 579.0, 579.0, 582.0, 587.0, 636.0, 627.0, 579.0, 570.0, 582.0, 587.0, 630.0, 582.0, 633.0, 567.0, 630.0, 576.0, 579.0, 639.0, 576.0, 587.0, 579.0, 582.0, 567.0, 630.0, 579.0, 630.0, 579.0, 538.0, 582.0, 522.0, 582.0, 627.0, 573.0, 579.0, 579.0, 587.0, 584.0, 530.0, 582.0, 630.0, 582.0, 587.0, 630.0, 579.0, 582.0, 633.0, 630.0, 582.0, 636.0, 582.0, 630.0, 627.0, 579.0, 582.0, 576.0, 579.0, 579.0, 639.0, 630.0, 627.0, 582.0, 584.0, 587.0, 579.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 294.0, 284.0, 295.0, 286.0, 284.0, 314.0, 319.0, 294.0, 288.0, 286.0, 293.0, 319.0, 311.0, 288.0, 294.0, 314.0, 316.0, 267.0, 269.0, 289.0, 301.0, 293.0, 289.0, 288.0, 291.0, 322.0, 308.0, 319.0, 317.0, 315.0, 318.0, 298.0, 284.0, 286.0, 293.0, 288.0, 282.0, 321.0, 315.0, 312.0, 315.0, 294.0, 288.0, 289.0, 290.0, 289.0, 293.0, 293.0, 283.0, 294.0, 290.0, 315.0, 318.0, 314.0, 316.0, 317.0, 310.0, 318.0, 309.0, 313.0, 320.0, 317.0, 319.0, 284.0, 295.0, 313.0, 320.0, 320.0, 313.0, 293.0, 286.0, 286.0, 293.0, 288.0, 294.0, 296.0, 291.0, 322.0, 314.0, 312.0, 315.0, 292.0, 287.0, 280.0, 290.0, 290.0, 292.0, 300.0, 287.0, 314.0, 316.0, 295.0, 287.0, 315.0, 318.0, 285.0, 282.0, 317.0, 313.0, 292.0, 284.0, 291.0, 288.0, 317.0, 322.0, 284.0, 292.0, 294.0, 293.0, 289.0, 290.0, 290.0, 292.0, 282.0, 285.0, 317.0, 313.0, 282.0, 297.0, 319.0, 311.0, 290.0, 289.0, 261.0, 277.0, 288.0, 294.0, 264.0, 258.0, 295.0, 287.0, 311.0, 316.0, 281.0, 292.0, 287.0, 292.0, 285.0, 294.0, 298.0, 289.0, 291.0, 293.0, 267.0, 263.0, 288.0, 294.0, 317.0, 313.0, 288.0, 294.0, 292.0, 295.0, 316.0, 314.0, 290.0, 289.0, 294.0, 288.0, 317.0, 316.0, 311.0, 319.0, 290.0, 292.0, 321.0, 315.0, 293.0, 289.0, 306.0, 324.0, 312.0, 315.0, 294.0, 285.0, 290.0, 292.0, 290.0, 286.0, 289.0, 290.0, 296.0, 283.0, 318.0, 321.0, 312.0, 318.0, 309.0, 318.0, 286.0, 296.0, 290.0, 294.0, 291.0, 296.0, 287.0, 292.0, 311.0, 322.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7018695828111025, "mean_inference_ms": 1.255488717550174, "mean_action_processing_ms": 0.13433650614705042, "mean_env_wait_ms": 0.8449992857233162, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7987200, "num_agent_steps_trained": 7987200, "num_env_steps_sampled": 3993600, "num_env_steps_trained": 3993600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3993600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7987200, "timers": {"training_iteration_time_ms": 3578.867, "learn_time_ms": 1077.185, "learn_throughput": 11882.823, "synch_weights_time_ms": 10.361}, "counters": {"num_env_steps_sampled": 3993600, "num_env_steps_trained": 3993600, "num_agent_steps_sampled": 7987200, "num_agent_steps_trained": 7987200}, "done": false, "episodes_total": 9984, "training_iteration": 312, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-46", "timestamp": 1666581646, "time_this_iter_s": 3.587341547012329, "time_total_s": 1200.6785717010498, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1200.6785717010498, "timesteps_since_restore": 0, "iterations_since_restore": 312, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.583333333333332, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 206.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.9, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.87, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 19.13, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.71, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 18.9, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.54, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 18.67, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.35, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 6.04, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.6, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.97, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.5, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.86, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.54, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 18.67, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.54, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 18.67, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012437774566933513, "policy_loss": 0.0009095997083932161, "vf_loss": 7.573324203491211, "vf_explained_var": 0.5715011358261108, "kl": 0.002083378843963146, "entropy": 0.8463079929351807, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4006400, "num_env_steps_trained": 4006400, "num_agent_steps_sampled": 8012800, "num_agent_steps_trained": 8012800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 595.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 297.95}, "custom_metrics": {"sparse_reward_mean": 206.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.9, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.87, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 19.13, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.71, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 18.9, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.54, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 18.67, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.35, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 6.04, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.6, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.97, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.5, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.86, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.54, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 18.67, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.54, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 18.67, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 633.0, 633.0, 579.0, 579.0, 582.0, 587.0, 636.0, 627.0, 579.0, 570.0, 582.0, 587.0, 630.0, 582.0, 633.0, 567.0, 630.0, 576.0, 579.0, 639.0, 576.0, 587.0, 579.0, 582.0, 567.0, 630.0, 579.0, 630.0, 579.0, 538.0, 582.0, 522.0, 582.0, 627.0, 573.0, 579.0, 579.0, 587.0, 584.0, 530.0, 582.0, 630.0, 582.0, 587.0, 630.0, 579.0, 582.0, 633.0, 630.0, 582.0, 636.0, 582.0, 630.0, 627.0, 579.0, 582.0, 576.0, 579.0, 579.0, 639.0, 630.0, 627.0, 582.0, 584.0, 587.0, 579.0, 633.0, 630.0, 630.0, 630.0, 582.0, 587.0, 630.0, 627.0, 579.0, 587.0, 639.0, 579.0, 584.0, 584.0, 539.0, 579.0, 630.0, 582.0, 587.0, 582.0, 579.0, 582.0, 582.0, 633.0, 576.0, 630.0, 579.0, 582.0, 576.0, 630.0, 630.0, 579.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 295.0, 313.0, 320.0, 320.0, 313.0, 293.0, 286.0, 286.0, 293.0, 288.0, 294.0, 296.0, 291.0, 322.0, 314.0, 312.0, 315.0, 292.0, 287.0, 280.0, 290.0, 290.0, 292.0, 300.0, 287.0, 314.0, 316.0, 295.0, 287.0, 315.0, 318.0, 285.0, 282.0, 317.0, 313.0, 292.0, 284.0, 291.0, 288.0, 317.0, 322.0, 284.0, 292.0, 294.0, 293.0, 289.0, 290.0, 290.0, 292.0, 282.0, 285.0, 317.0, 313.0, 282.0, 297.0, 319.0, 311.0, 290.0, 289.0, 261.0, 277.0, 288.0, 294.0, 264.0, 258.0, 295.0, 287.0, 311.0, 316.0, 281.0, 292.0, 287.0, 292.0, 285.0, 294.0, 298.0, 289.0, 291.0, 293.0, 267.0, 263.0, 288.0, 294.0, 317.0, 313.0, 288.0, 294.0, 292.0, 295.0, 316.0, 314.0, 290.0, 289.0, 294.0, 288.0, 317.0, 316.0, 311.0, 319.0, 290.0, 292.0, 321.0, 315.0, 293.0, 289.0, 306.0, 324.0, 312.0, 315.0, 294.0, 285.0, 290.0, 292.0, 290.0, 286.0, 289.0, 290.0, 296.0, 283.0, 318.0, 321.0, 312.0, 318.0, 309.0, 318.0, 286.0, 296.0, 290.0, 294.0, 291.0, 296.0, 287.0, 292.0, 311.0, 322.0, 313.0, 317.0, 318.0, 312.0, 311.0, 319.0, 287.0, 295.0, 291.0, 296.0, 315.0, 315.0, 314.0, 313.0, 295.0, 284.0, 294.0, 293.0, 324.0, 315.0, 287.0, 292.0, 296.0, 288.0, 294.0, 290.0, 265.0, 274.0, 288.0, 291.0, 315.0, 315.0, 291.0, 291.0, 296.0, 291.0, 292.0, 290.0, 289.0, 290.0, 289.0, 293.0, 291.0, 291.0, 315.0, 318.0, 290.0, 286.0, 315.0, 315.0, 290.0, 289.0, 286.0, 296.0, 287.0, 289.0, 320.0, 310.0, 318.0, 312.0, 289.0, 290.0, 319.0, 317.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7017718876395496, "mean_inference_ms": 1.2552825267149847, "mean_action_processing_ms": 0.13432257141987783, "mean_env_wait_ms": 0.8448774369248566, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 595.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 297.95}, "hist_stats": {"episode_reward": [579.0, 633.0, 633.0, 579.0, 579.0, 582.0, 587.0, 636.0, 627.0, 579.0, 570.0, 582.0, 587.0, 630.0, 582.0, 633.0, 567.0, 630.0, 576.0, 579.0, 639.0, 576.0, 587.0, 579.0, 582.0, 567.0, 630.0, 579.0, 630.0, 579.0, 538.0, 582.0, 522.0, 582.0, 627.0, 573.0, 579.0, 579.0, 587.0, 584.0, 530.0, 582.0, 630.0, 582.0, 587.0, 630.0, 579.0, 582.0, 633.0, 630.0, 582.0, 636.0, 582.0, 630.0, 627.0, 579.0, 582.0, 576.0, 579.0, 579.0, 639.0, 630.0, 627.0, 582.0, 584.0, 587.0, 579.0, 633.0, 630.0, 630.0, 630.0, 582.0, 587.0, 630.0, 627.0, 579.0, 587.0, 639.0, 579.0, 584.0, 584.0, 539.0, 579.0, 630.0, 582.0, 587.0, 582.0, 579.0, 582.0, 582.0, 633.0, 576.0, 630.0, 579.0, 582.0, 576.0, 630.0, 630.0, 579.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 295.0, 313.0, 320.0, 320.0, 313.0, 293.0, 286.0, 286.0, 293.0, 288.0, 294.0, 296.0, 291.0, 322.0, 314.0, 312.0, 315.0, 292.0, 287.0, 280.0, 290.0, 290.0, 292.0, 300.0, 287.0, 314.0, 316.0, 295.0, 287.0, 315.0, 318.0, 285.0, 282.0, 317.0, 313.0, 292.0, 284.0, 291.0, 288.0, 317.0, 322.0, 284.0, 292.0, 294.0, 293.0, 289.0, 290.0, 290.0, 292.0, 282.0, 285.0, 317.0, 313.0, 282.0, 297.0, 319.0, 311.0, 290.0, 289.0, 261.0, 277.0, 288.0, 294.0, 264.0, 258.0, 295.0, 287.0, 311.0, 316.0, 281.0, 292.0, 287.0, 292.0, 285.0, 294.0, 298.0, 289.0, 291.0, 293.0, 267.0, 263.0, 288.0, 294.0, 317.0, 313.0, 288.0, 294.0, 292.0, 295.0, 316.0, 314.0, 290.0, 289.0, 294.0, 288.0, 317.0, 316.0, 311.0, 319.0, 290.0, 292.0, 321.0, 315.0, 293.0, 289.0, 306.0, 324.0, 312.0, 315.0, 294.0, 285.0, 290.0, 292.0, 290.0, 286.0, 289.0, 290.0, 296.0, 283.0, 318.0, 321.0, 312.0, 318.0, 309.0, 318.0, 286.0, 296.0, 290.0, 294.0, 291.0, 296.0, 287.0, 292.0, 311.0, 322.0, 313.0, 317.0, 318.0, 312.0, 311.0, 319.0, 287.0, 295.0, 291.0, 296.0, 315.0, 315.0, 314.0, 313.0, 295.0, 284.0, 294.0, 293.0, 324.0, 315.0, 287.0, 292.0, 296.0, 288.0, 294.0, 290.0, 265.0, 274.0, 288.0, 291.0, 315.0, 315.0, 291.0, 291.0, 296.0, 291.0, 292.0, 290.0, 289.0, 290.0, 289.0, 293.0, 291.0, 291.0, 315.0, 318.0, 290.0, 286.0, 315.0, 315.0, 290.0, 289.0, 286.0, 296.0, 287.0, 289.0, 320.0, 310.0, 318.0, 312.0, 289.0, 290.0, 319.0, 317.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7017718876395496, "mean_inference_ms": 1.2552825267149847, "mean_action_processing_ms": 0.13432257141987783, "mean_env_wait_ms": 0.8448774369248566, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8012800, "num_agent_steps_trained": 8012800, "num_env_steps_sampled": 4006400, "num_env_steps_trained": 4006400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4006400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8012800, "timers": {"training_iteration_time_ms": 3587.248, "learn_time_ms": 1076.252, "learn_throughput": 11893.126, "synch_weights_time_ms": 9.82}, "counters": {"num_env_steps_sampled": 4006400, "num_env_steps_trained": 4006400, "num_agent_steps_sampled": 8012800, "num_agent_steps_trained": 8012800}, "done": false, "episodes_total": 10016, "training_iteration": 313, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-50", "timestamp": 1666581650, "time_this_iter_s": 3.6089444160461426, "time_total_s": 1204.287516117096, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1204.287516117096, "timesteps_since_restore": 0, "iterations_since_restore": 313, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.72, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 206.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.17, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.07, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.96, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.88, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.74, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.67, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 18.49, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 6.02, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.86, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.78, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.67, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 18.49, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.67, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 18.49, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00017371168360114098, "policy_loss": -0.00016079226043075323, "vf_loss": 7.614766597747803, "vf_explained_var": 0.5785398483276367, "kl": 0.0018128352239727974, "entropy": 0.8539440631866455, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4019200, "num_env_steps_trained": 4019200, "num_agent_steps_sampled": 8038400, "num_agent_steps_trained": 8038400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 596.57, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 298.285}, "custom_metrics": {"sparse_reward_mean": 206.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.17, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.07, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.96, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.88, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.74, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.67, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 18.49, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 6.02, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.86, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.78, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.67, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 18.49, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.67, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 18.49, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 582.0, 627.0, 573.0, 579.0, 579.0, 587.0, 584.0, 530.0, 582.0, 630.0, 582.0, 587.0, 630.0, 579.0, 582.0, 633.0, 630.0, 582.0, 636.0, 582.0, 630.0, 627.0, 579.0, 582.0, 576.0, 579.0, 579.0, 639.0, 630.0, 627.0, 582.0, 584.0, 587.0, 579.0, 633.0, 630.0, 630.0, 630.0, 582.0, 587.0, 630.0, 627.0, 579.0, 587.0, 639.0, 579.0, 584.0, 584.0, 539.0, 579.0, 630.0, 582.0, 587.0, 582.0, 579.0, 582.0, 582.0, 633.0, 576.0, 630.0, 579.0, 582.0, 576.0, 630.0, 630.0, 579.0, 636.0, 633.0, 576.0, 627.0, 584.0, 636.0, 582.0, 573.0, 582.0, 630.0, 582.0, 579.0, 579.0, 576.0, 587.0, 573.0, 636.0, 633.0, 636.0, 576.0, 582.0, 579.0, 590.0, 579.0, 579.0, 579.0, 636.0, 581.0, 582.0, 582.0, 627.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 258.0, 295.0, 287.0, 311.0, 316.0, 281.0, 292.0, 287.0, 292.0, 285.0, 294.0, 298.0, 289.0, 291.0, 293.0, 267.0, 263.0, 288.0, 294.0, 317.0, 313.0, 288.0, 294.0, 292.0, 295.0, 316.0, 314.0, 290.0, 289.0, 294.0, 288.0, 317.0, 316.0, 311.0, 319.0, 290.0, 292.0, 321.0, 315.0, 293.0, 289.0, 306.0, 324.0, 312.0, 315.0, 294.0, 285.0, 290.0, 292.0, 290.0, 286.0, 289.0, 290.0, 296.0, 283.0, 318.0, 321.0, 312.0, 318.0, 309.0, 318.0, 286.0, 296.0, 290.0, 294.0, 291.0, 296.0, 287.0, 292.0, 311.0, 322.0, 313.0, 317.0, 318.0, 312.0, 311.0, 319.0, 287.0, 295.0, 291.0, 296.0, 315.0, 315.0, 314.0, 313.0, 295.0, 284.0, 294.0, 293.0, 324.0, 315.0, 287.0, 292.0, 296.0, 288.0, 294.0, 290.0, 265.0, 274.0, 288.0, 291.0, 315.0, 315.0, 291.0, 291.0, 296.0, 291.0, 292.0, 290.0, 289.0, 290.0, 289.0, 293.0, 291.0, 291.0, 315.0, 318.0, 290.0, 286.0, 315.0, 315.0, 290.0, 289.0, 286.0, 296.0, 287.0, 289.0, 320.0, 310.0, 318.0, 312.0, 289.0, 290.0, 319.0, 317.0, 317.0, 316.0, 288.0, 288.0, 312.0, 315.0, 290.0, 294.0, 319.0, 317.0, 292.0, 290.0, 283.0, 290.0, 286.0, 296.0, 317.0, 313.0, 286.0, 296.0, 286.0, 293.0, 286.0, 293.0, 283.0, 293.0, 293.0, 294.0, 282.0, 291.0, 318.0, 318.0, 318.0, 315.0, 316.0, 320.0, 284.0, 292.0, 286.0, 296.0, 290.0, 289.0, 293.0, 297.0, 289.0, 290.0, 294.0, 285.0, 289.0, 290.0, 319.0, 317.0, 287.0, 294.0, 284.0, 298.0, 297.0, 285.0, 314.0, 313.0, 286.0, 293.0, 312.0, 318.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7016816063473018, "mean_inference_ms": 1.255069617024935, "mean_action_processing_ms": 0.1343095240072887, "mean_env_wait_ms": 0.8447524656146783, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 596.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 298.285}, "hist_stats": {"episode_reward": [522.0, 582.0, 627.0, 573.0, 579.0, 579.0, 587.0, 584.0, 530.0, 582.0, 630.0, 582.0, 587.0, 630.0, 579.0, 582.0, 633.0, 630.0, 582.0, 636.0, 582.0, 630.0, 627.0, 579.0, 582.0, 576.0, 579.0, 579.0, 639.0, 630.0, 627.0, 582.0, 584.0, 587.0, 579.0, 633.0, 630.0, 630.0, 630.0, 582.0, 587.0, 630.0, 627.0, 579.0, 587.0, 639.0, 579.0, 584.0, 584.0, 539.0, 579.0, 630.0, 582.0, 587.0, 582.0, 579.0, 582.0, 582.0, 633.0, 576.0, 630.0, 579.0, 582.0, 576.0, 630.0, 630.0, 579.0, 636.0, 633.0, 576.0, 627.0, 584.0, 636.0, 582.0, 573.0, 582.0, 630.0, 582.0, 579.0, 579.0, 576.0, 587.0, 573.0, 636.0, 633.0, 636.0, 576.0, 582.0, 579.0, 590.0, 579.0, 579.0, 579.0, 636.0, 581.0, 582.0, 582.0, 627.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 258.0, 295.0, 287.0, 311.0, 316.0, 281.0, 292.0, 287.0, 292.0, 285.0, 294.0, 298.0, 289.0, 291.0, 293.0, 267.0, 263.0, 288.0, 294.0, 317.0, 313.0, 288.0, 294.0, 292.0, 295.0, 316.0, 314.0, 290.0, 289.0, 294.0, 288.0, 317.0, 316.0, 311.0, 319.0, 290.0, 292.0, 321.0, 315.0, 293.0, 289.0, 306.0, 324.0, 312.0, 315.0, 294.0, 285.0, 290.0, 292.0, 290.0, 286.0, 289.0, 290.0, 296.0, 283.0, 318.0, 321.0, 312.0, 318.0, 309.0, 318.0, 286.0, 296.0, 290.0, 294.0, 291.0, 296.0, 287.0, 292.0, 311.0, 322.0, 313.0, 317.0, 318.0, 312.0, 311.0, 319.0, 287.0, 295.0, 291.0, 296.0, 315.0, 315.0, 314.0, 313.0, 295.0, 284.0, 294.0, 293.0, 324.0, 315.0, 287.0, 292.0, 296.0, 288.0, 294.0, 290.0, 265.0, 274.0, 288.0, 291.0, 315.0, 315.0, 291.0, 291.0, 296.0, 291.0, 292.0, 290.0, 289.0, 290.0, 289.0, 293.0, 291.0, 291.0, 315.0, 318.0, 290.0, 286.0, 315.0, 315.0, 290.0, 289.0, 286.0, 296.0, 287.0, 289.0, 320.0, 310.0, 318.0, 312.0, 289.0, 290.0, 319.0, 317.0, 317.0, 316.0, 288.0, 288.0, 312.0, 315.0, 290.0, 294.0, 319.0, 317.0, 292.0, 290.0, 283.0, 290.0, 286.0, 296.0, 317.0, 313.0, 286.0, 296.0, 286.0, 293.0, 286.0, 293.0, 283.0, 293.0, 293.0, 294.0, 282.0, 291.0, 318.0, 318.0, 318.0, 315.0, 316.0, 320.0, 284.0, 292.0, 286.0, 296.0, 290.0, 289.0, 293.0, 297.0, 289.0, 290.0, 294.0, 285.0, 289.0, 290.0, 319.0, 317.0, 287.0, 294.0, 284.0, 298.0, 297.0, 285.0, 314.0, 313.0, 286.0, 293.0, 312.0, 318.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7016816063473018, "mean_inference_ms": 1.255069617024935, "mean_action_processing_ms": 0.1343095240072887, "mean_env_wait_ms": 0.8447524656146783, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8038400, "num_agent_steps_trained": 8038400, "num_env_steps_sampled": 4019200, "num_env_steps_trained": 4019200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4019200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8038400, "timers": {"training_iteration_time_ms": 3601.25, "learn_time_ms": 1093.157, "learn_throughput": 11709.21, "synch_weights_time_ms": 10.23}, "counters": {"num_env_steps_sampled": 4019200, "num_env_steps_trained": 4019200, "num_agent_steps_sampled": 8038400, "num_agent_steps_trained": 8038400}, "done": false, "episodes_total": 10048, "training_iteration": 314, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-54", "timestamp": 1666581654, "time_this_iter_s": 3.732663869857788, "time_total_s": 1208.0201799869537, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1208.0201799869537, "timesteps_since_restore": 0, "iterations_since_restore": 314, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.700000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 207.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.37, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.74, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 19.34, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.57, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 19.11, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.26, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 18.85, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.4, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.81, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 6.15, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.98, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.9, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.26, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 18.85, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.26, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 18.85, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0030033960938453674, "policy_loss": -0.0033531123772263527, "vf_loss": 7.692840576171875, "vf_explained_var": 0.5669675469398499, "kl": 0.003200692357495427, "entropy": 0.8391327261924744, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4032000, "num_env_steps_trained": 4032000, "num_agent_steps_sampled": 8064000, "num_agent_steps_trained": 8064000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 598.37, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 299.185}, "custom_metrics": {"sparse_reward_mean": 207.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.37, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.74, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 19.34, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.57, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 19.11, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.26, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 18.85, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.4, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.81, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 6.15, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.98, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.9, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.26, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 18.85, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.26, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 18.85, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 587.0, 579.0, 633.0, 630.0, 630.0, 630.0, 582.0, 587.0, 630.0, 627.0, 579.0, 587.0, 639.0, 579.0, 584.0, 584.0, 539.0, 579.0, 630.0, 582.0, 587.0, 582.0, 579.0, 582.0, 582.0, 633.0, 576.0, 630.0, 579.0, 582.0, 576.0, 630.0, 630.0, 579.0, 636.0, 633.0, 576.0, 627.0, 584.0, 636.0, 582.0, 573.0, 582.0, 630.0, 582.0, 579.0, 579.0, 576.0, 587.0, 573.0, 636.0, 633.0, 636.0, 576.0, 582.0, 579.0, 590.0, 579.0, 579.0, 579.0, 636.0, 581.0, 582.0, 582.0, 627.0, 579.0, 630.0, 582.0, 627.0, 516.0, 582.0, 584.0, 587.0, 582.0, 582.0, 576.0, 582.0, 627.0, 579.0, 627.0, 630.0, 630.0, 579.0, 627.0, 630.0, 582.0, 573.0, 636.0, 582.0, 576.0, 630.0, 627.0, 582.0, 630.0, 630.0, 587.0, 627.0, 587.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 294.0, 291.0, 296.0, 287.0, 292.0, 311.0, 322.0, 313.0, 317.0, 318.0, 312.0, 311.0, 319.0, 287.0, 295.0, 291.0, 296.0, 315.0, 315.0, 314.0, 313.0, 295.0, 284.0, 294.0, 293.0, 324.0, 315.0, 287.0, 292.0, 296.0, 288.0, 294.0, 290.0, 265.0, 274.0, 288.0, 291.0, 315.0, 315.0, 291.0, 291.0, 296.0, 291.0, 292.0, 290.0, 289.0, 290.0, 289.0, 293.0, 291.0, 291.0, 315.0, 318.0, 290.0, 286.0, 315.0, 315.0, 290.0, 289.0, 286.0, 296.0, 287.0, 289.0, 320.0, 310.0, 318.0, 312.0, 289.0, 290.0, 319.0, 317.0, 317.0, 316.0, 288.0, 288.0, 312.0, 315.0, 290.0, 294.0, 319.0, 317.0, 292.0, 290.0, 283.0, 290.0, 286.0, 296.0, 317.0, 313.0, 286.0, 296.0, 286.0, 293.0, 286.0, 293.0, 283.0, 293.0, 293.0, 294.0, 282.0, 291.0, 318.0, 318.0, 318.0, 315.0, 316.0, 320.0, 284.0, 292.0, 286.0, 296.0, 290.0, 289.0, 293.0, 297.0, 289.0, 290.0, 294.0, 285.0, 289.0, 290.0, 319.0, 317.0, 287.0, 294.0, 284.0, 298.0, 297.0, 285.0, 314.0, 313.0, 286.0, 293.0, 312.0, 318.0, 292.0, 290.0, 310.0, 317.0, 251.0, 265.0, 283.0, 299.0, 293.0, 291.0, 289.0, 298.0, 294.0, 288.0, 290.0, 292.0, 289.0, 287.0, 293.0, 289.0, 318.0, 309.0, 287.0, 292.0, 310.0, 317.0, 311.0, 319.0, 316.0, 314.0, 295.0, 284.0, 307.0, 320.0, 313.0, 317.0, 285.0, 297.0, 284.0, 289.0, 315.0, 321.0, 285.0, 297.0, 288.0, 288.0, 318.0, 312.0, 314.0, 313.0, 290.0, 292.0, 310.0, 320.0, 318.0, 312.0, 294.0, 293.0, 310.0, 317.0, 293.0, 294.0, 309.0, 321.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7016024368895014, "mean_inference_ms": 1.2548500981360906, "mean_action_processing_ms": 0.13429571912098523, "mean_env_wait_ms": 0.8446177174958618, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 598.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 299.185}, "hist_stats": {"episode_reward": [584.0, 587.0, 579.0, 633.0, 630.0, 630.0, 630.0, 582.0, 587.0, 630.0, 627.0, 579.0, 587.0, 639.0, 579.0, 584.0, 584.0, 539.0, 579.0, 630.0, 582.0, 587.0, 582.0, 579.0, 582.0, 582.0, 633.0, 576.0, 630.0, 579.0, 582.0, 576.0, 630.0, 630.0, 579.0, 636.0, 633.0, 576.0, 627.0, 584.0, 636.0, 582.0, 573.0, 582.0, 630.0, 582.0, 579.0, 579.0, 576.0, 587.0, 573.0, 636.0, 633.0, 636.0, 576.0, 582.0, 579.0, 590.0, 579.0, 579.0, 579.0, 636.0, 581.0, 582.0, 582.0, 627.0, 579.0, 630.0, 582.0, 627.0, 516.0, 582.0, 584.0, 587.0, 582.0, 582.0, 576.0, 582.0, 627.0, 579.0, 627.0, 630.0, 630.0, 579.0, 627.0, 630.0, 582.0, 573.0, 636.0, 582.0, 576.0, 630.0, 627.0, 582.0, 630.0, 630.0, 587.0, 627.0, 587.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 294.0, 291.0, 296.0, 287.0, 292.0, 311.0, 322.0, 313.0, 317.0, 318.0, 312.0, 311.0, 319.0, 287.0, 295.0, 291.0, 296.0, 315.0, 315.0, 314.0, 313.0, 295.0, 284.0, 294.0, 293.0, 324.0, 315.0, 287.0, 292.0, 296.0, 288.0, 294.0, 290.0, 265.0, 274.0, 288.0, 291.0, 315.0, 315.0, 291.0, 291.0, 296.0, 291.0, 292.0, 290.0, 289.0, 290.0, 289.0, 293.0, 291.0, 291.0, 315.0, 318.0, 290.0, 286.0, 315.0, 315.0, 290.0, 289.0, 286.0, 296.0, 287.0, 289.0, 320.0, 310.0, 318.0, 312.0, 289.0, 290.0, 319.0, 317.0, 317.0, 316.0, 288.0, 288.0, 312.0, 315.0, 290.0, 294.0, 319.0, 317.0, 292.0, 290.0, 283.0, 290.0, 286.0, 296.0, 317.0, 313.0, 286.0, 296.0, 286.0, 293.0, 286.0, 293.0, 283.0, 293.0, 293.0, 294.0, 282.0, 291.0, 318.0, 318.0, 318.0, 315.0, 316.0, 320.0, 284.0, 292.0, 286.0, 296.0, 290.0, 289.0, 293.0, 297.0, 289.0, 290.0, 294.0, 285.0, 289.0, 290.0, 319.0, 317.0, 287.0, 294.0, 284.0, 298.0, 297.0, 285.0, 314.0, 313.0, 286.0, 293.0, 312.0, 318.0, 292.0, 290.0, 310.0, 317.0, 251.0, 265.0, 283.0, 299.0, 293.0, 291.0, 289.0, 298.0, 294.0, 288.0, 290.0, 292.0, 289.0, 287.0, 293.0, 289.0, 318.0, 309.0, 287.0, 292.0, 310.0, 317.0, 311.0, 319.0, 316.0, 314.0, 295.0, 284.0, 307.0, 320.0, 313.0, 317.0, 285.0, 297.0, 284.0, 289.0, 315.0, 321.0, 285.0, 297.0, 288.0, 288.0, 318.0, 312.0, 314.0, 313.0, 290.0, 292.0, 310.0, 320.0, 318.0, 312.0, 294.0, 293.0, 310.0, 317.0, 293.0, 294.0, 309.0, 321.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7016024368895014, "mean_inference_ms": 1.2548500981360906, "mean_action_processing_ms": 0.13429571912098523, "mean_env_wait_ms": 0.8446177174958618, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8064000, "num_agent_steps_trained": 8064000, "num_env_steps_sampled": 4032000, "num_env_steps_trained": 4032000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4032000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8064000, "timers": {"training_iteration_time_ms": 3598.645, "learn_time_ms": 1096.864, "learn_throughput": 11669.635, "synch_weights_time_ms": 10.239}, "counters": {"num_env_steps_sampled": 4032000, "num_env_steps_trained": 4032000, "num_agent_steps_sampled": 8064000, "num_agent_steps_trained": 8064000}, "done": false, "episodes_total": 10080, "training_iteration": 315, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-57", "timestamp": 1666581657, "time_this_iter_s": 3.569254159927368, "time_total_s": 1211.589434146881, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1211.589434146881, "timesteps_since_restore": 0, "iterations_since_restore": 315, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.419999999999998, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 207.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.12, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.8, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 19.18, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.65, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.94, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.38, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 18.75, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.33, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.84, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 6.12, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.93, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.9, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.38, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 18.75, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.38, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 18.75, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010780815500766039, "policy_loss": -0.0014326130039989948, "vf_loss": 7.738142013549805, "vf_explained_var": 0.573646068572998, "kl": 0.0020240151789039373, "entropy": 0.8385658264160156, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4044800, "num_env_steps_trained": 4044800, "num_agent_steps_sampled": 8089600, "num_agent_steps_trained": 8089600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 598.12, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 299.06}, "custom_metrics": {"sparse_reward_mean": 207.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.12, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.8, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 19.18, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.65, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.94, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.38, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 18.75, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.33, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.84, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 6.12, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.93, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.9, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.38, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 18.75, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.38, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 18.75, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 630.0, 579.0, 636.0, 633.0, 576.0, 627.0, 584.0, 636.0, 582.0, 573.0, 582.0, 630.0, 582.0, 579.0, 579.0, 576.0, 587.0, 573.0, 636.0, 633.0, 636.0, 576.0, 582.0, 579.0, 590.0, 579.0, 579.0, 579.0, 636.0, 581.0, 582.0, 582.0, 627.0, 579.0, 630.0, 582.0, 627.0, 516.0, 582.0, 584.0, 587.0, 582.0, 582.0, 576.0, 582.0, 627.0, 579.0, 627.0, 630.0, 630.0, 579.0, 627.0, 630.0, 582.0, 573.0, 636.0, 582.0, 576.0, 630.0, 627.0, 582.0, 630.0, 630.0, 587.0, 627.0, 587.0, 630.0, 579.0, 582.0, 630.0, 582.0, 582.0, 636.0, 633.0, 582.0, 582.0, 627.0, 633.0, 587.0, 582.0, 582.0, 582.0, 522.0, 636.0, 630.0, 579.0, 579.0, 576.0, 579.0, 582.0, 579.0, 573.0, 633.0, 570.0, 639.0, 587.0, 579.0, 633.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [320.0, 310.0, 318.0, 312.0, 289.0, 290.0, 319.0, 317.0, 317.0, 316.0, 288.0, 288.0, 312.0, 315.0, 290.0, 294.0, 319.0, 317.0, 292.0, 290.0, 283.0, 290.0, 286.0, 296.0, 317.0, 313.0, 286.0, 296.0, 286.0, 293.0, 286.0, 293.0, 283.0, 293.0, 293.0, 294.0, 282.0, 291.0, 318.0, 318.0, 318.0, 315.0, 316.0, 320.0, 284.0, 292.0, 286.0, 296.0, 290.0, 289.0, 293.0, 297.0, 289.0, 290.0, 294.0, 285.0, 289.0, 290.0, 319.0, 317.0, 287.0, 294.0, 284.0, 298.0, 297.0, 285.0, 314.0, 313.0, 286.0, 293.0, 312.0, 318.0, 292.0, 290.0, 310.0, 317.0, 251.0, 265.0, 283.0, 299.0, 293.0, 291.0, 289.0, 298.0, 294.0, 288.0, 290.0, 292.0, 289.0, 287.0, 293.0, 289.0, 318.0, 309.0, 287.0, 292.0, 310.0, 317.0, 311.0, 319.0, 316.0, 314.0, 295.0, 284.0, 307.0, 320.0, 313.0, 317.0, 285.0, 297.0, 284.0, 289.0, 315.0, 321.0, 285.0, 297.0, 288.0, 288.0, 318.0, 312.0, 314.0, 313.0, 290.0, 292.0, 310.0, 320.0, 318.0, 312.0, 294.0, 293.0, 310.0, 317.0, 293.0, 294.0, 309.0, 321.0, 293.0, 286.0, 290.0, 292.0, 308.0, 322.0, 289.0, 293.0, 290.0, 292.0, 315.0, 321.0, 319.0, 314.0, 290.0, 292.0, 294.0, 288.0, 317.0, 310.0, 316.0, 317.0, 286.0, 301.0, 295.0, 287.0, 290.0, 292.0, 291.0, 291.0, 261.0, 261.0, 325.0, 311.0, 319.0, 311.0, 291.0, 288.0, 289.0, 290.0, 285.0, 291.0, 290.0, 289.0, 294.0, 288.0, 288.0, 291.0, 285.0, 288.0, 313.0, 320.0, 285.0, 285.0, 317.0, 322.0, 284.0, 303.0, 294.0, 285.0, 320.0, 313.0, 296.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7015318240181805, "mean_inference_ms": 1.2546321530437043, "mean_action_processing_ms": 0.13428390360660708, "mean_env_wait_ms": 0.8444934858469165, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 598.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 299.06}, "hist_stats": {"episode_reward": [630.0, 630.0, 579.0, 636.0, 633.0, 576.0, 627.0, 584.0, 636.0, 582.0, 573.0, 582.0, 630.0, 582.0, 579.0, 579.0, 576.0, 587.0, 573.0, 636.0, 633.0, 636.0, 576.0, 582.0, 579.0, 590.0, 579.0, 579.0, 579.0, 636.0, 581.0, 582.0, 582.0, 627.0, 579.0, 630.0, 582.0, 627.0, 516.0, 582.0, 584.0, 587.0, 582.0, 582.0, 576.0, 582.0, 627.0, 579.0, 627.0, 630.0, 630.0, 579.0, 627.0, 630.0, 582.0, 573.0, 636.0, 582.0, 576.0, 630.0, 627.0, 582.0, 630.0, 630.0, 587.0, 627.0, 587.0, 630.0, 579.0, 582.0, 630.0, 582.0, 582.0, 636.0, 633.0, 582.0, 582.0, 627.0, 633.0, 587.0, 582.0, 582.0, 582.0, 522.0, 636.0, 630.0, 579.0, 579.0, 576.0, 579.0, 582.0, 579.0, 573.0, 633.0, 570.0, 639.0, 587.0, 579.0, 633.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [320.0, 310.0, 318.0, 312.0, 289.0, 290.0, 319.0, 317.0, 317.0, 316.0, 288.0, 288.0, 312.0, 315.0, 290.0, 294.0, 319.0, 317.0, 292.0, 290.0, 283.0, 290.0, 286.0, 296.0, 317.0, 313.0, 286.0, 296.0, 286.0, 293.0, 286.0, 293.0, 283.0, 293.0, 293.0, 294.0, 282.0, 291.0, 318.0, 318.0, 318.0, 315.0, 316.0, 320.0, 284.0, 292.0, 286.0, 296.0, 290.0, 289.0, 293.0, 297.0, 289.0, 290.0, 294.0, 285.0, 289.0, 290.0, 319.0, 317.0, 287.0, 294.0, 284.0, 298.0, 297.0, 285.0, 314.0, 313.0, 286.0, 293.0, 312.0, 318.0, 292.0, 290.0, 310.0, 317.0, 251.0, 265.0, 283.0, 299.0, 293.0, 291.0, 289.0, 298.0, 294.0, 288.0, 290.0, 292.0, 289.0, 287.0, 293.0, 289.0, 318.0, 309.0, 287.0, 292.0, 310.0, 317.0, 311.0, 319.0, 316.0, 314.0, 295.0, 284.0, 307.0, 320.0, 313.0, 317.0, 285.0, 297.0, 284.0, 289.0, 315.0, 321.0, 285.0, 297.0, 288.0, 288.0, 318.0, 312.0, 314.0, 313.0, 290.0, 292.0, 310.0, 320.0, 318.0, 312.0, 294.0, 293.0, 310.0, 317.0, 293.0, 294.0, 309.0, 321.0, 293.0, 286.0, 290.0, 292.0, 308.0, 322.0, 289.0, 293.0, 290.0, 292.0, 315.0, 321.0, 319.0, 314.0, 290.0, 292.0, 294.0, 288.0, 317.0, 310.0, 316.0, 317.0, 286.0, 301.0, 295.0, 287.0, 290.0, 292.0, 291.0, 291.0, 261.0, 261.0, 325.0, 311.0, 319.0, 311.0, 291.0, 288.0, 289.0, 290.0, 285.0, 291.0, 290.0, 289.0, 294.0, 288.0, 288.0, 291.0, 285.0, 288.0, 313.0, 320.0, 285.0, 285.0, 317.0, 322.0, 284.0, 303.0, 294.0, 285.0, 320.0, 313.0, 296.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7015318240181805, "mean_inference_ms": 1.2546321530437043, "mean_action_processing_ms": 0.13428390360660708, "mean_env_wait_ms": 0.8444934858469165, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8089600, "num_agent_steps_trained": 8089600, "num_env_steps_sampled": 4044800, "num_env_steps_trained": 4044800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4044800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8089600, "timers": {"training_iteration_time_ms": 3602.352, "learn_time_ms": 1097.997, "learn_throughput": 11657.591, "synch_weights_time_ms": 10.858}, "counters": {"num_env_steps_sampled": 4044800, "num_env_steps_trained": 4044800, "num_agent_steps_sampled": 8089600, "num_agent_steps_trained": 8089600}, "done": false, "episodes_total": 10112, "training_iteration": 316, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-01", "timestamp": 1666581661, "time_this_iter_s": 3.6783833503723145, "time_total_s": 1215.2678174972534, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1215.2678174972534, "timesteps_since_restore": 0, "iterations_since_restore": 316, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.380000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 206.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.18, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.37, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 28, "onion_pickup_agent_1_mean": 19.5, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.23, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 28, "useful_onion_pickup_agent_1_mean": 19.27, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 13.93, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 27, "potting_onion_agent_1_mean": 19.09, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 6.46, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.62, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 6.24, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.45, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 6.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.31, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 6.06, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.27, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 13.93, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 27, "optimal_onion_potting_agent_1_mean": 19.09, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.93, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 27, "viable_onion_potting_agent_1_mean": 19.09, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019808358047157526, "policy_loss": 0.0016194693744182587, "vf_loss": 7.821843147277832, "vf_explained_var": 0.5842312574386597, "kl": 0.0032284390181303024, "entropy": 0.8416324853897095, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4057600, "num_env_steps_trained": 4057600, "num_agent_steps_sampled": 8115200, "num_agent_steps_trained": 8115200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 595.18, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 297.59}, "custom_metrics": {"sparse_reward_mean": 206.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.18, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.37, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 28, "onion_pickup_agent_1_mean": 19.5, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.23, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 28, "useful_onion_pickup_agent_1_mean": 19.27, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 13.93, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 27, "potting_onion_agent_1_mean": 19.09, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 6.46, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.62, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 6.24, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.45, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 6.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.31, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 6.06, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.27, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 13.93, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 27, "optimal_onion_potting_agent_1_mean": 19.09, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.93, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 27, "viable_onion_potting_agent_1_mean": 19.09, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 627.0, 579.0, 630.0, 582.0, 627.0, 516.0, 582.0, 584.0, 587.0, 582.0, 582.0, 576.0, 582.0, 627.0, 579.0, 627.0, 630.0, 630.0, 579.0, 627.0, 630.0, 582.0, 573.0, 636.0, 582.0, 576.0, 630.0, 627.0, 582.0, 630.0, 630.0, 587.0, 627.0, 587.0, 630.0, 579.0, 582.0, 630.0, 582.0, 582.0, 636.0, 633.0, 582.0, 582.0, 627.0, 633.0, 587.0, 582.0, 582.0, 582.0, 522.0, 636.0, 630.0, 579.0, 579.0, 576.0, 579.0, 582.0, 579.0, 573.0, 633.0, 570.0, 639.0, 587.0, 579.0, 633.0, 587.0, 576.0, 576.0, 579.0, 627.0, 579.0, 579.0, 630.0, 587.0, 573.0, 576.0, 582.0, 579.0, 573.0, 579.0, 630.0, 587.0, 570.0, 636.0, 579.0, 587.0, 627.0, 582.0, 582.0, 582.0, 582.0, 576.0, 579.0, 584.0, 627.0, 582.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 285.0, 314.0, 313.0, 286.0, 293.0, 312.0, 318.0, 292.0, 290.0, 310.0, 317.0, 251.0, 265.0, 283.0, 299.0, 293.0, 291.0, 289.0, 298.0, 294.0, 288.0, 290.0, 292.0, 289.0, 287.0, 293.0, 289.0, 318.0, 309.0, 287.0, 292.0, 310.0, 317.0, 311.0, 319.0, 316.0, 314.0, 295.0, 284.0, 307.0, 320.0, 313.0, 317.0, 285.0, 297.0, 284.0, 289.0, 315.0, 321.0, 285.0, 297.0, 288.0, 288.0, 318.0, 312.0, 314.0, 313.0, 290.0, 292.0, 310.0, 320.0, 318.0, 312.0, 294.0, 293.0, 310.0, 317.0, 293.0, 294.0, 309.0, 321.0, 293.0, 286.0, 290.0, 292.0, 308.0, 322.0, 289.0, 293.0, 290.0, 292.0, 315.0, 321.0, 319.0, 314.0, 290.0, 292.0, 294.0, 288.0, 317.0, 310.0, 316.0, 317.0, 286.0, 301.0, 295.0, 287.0, 290.0, 292.0, 291.0, 291.0, 261.0, 261.0, 325.0, 311.0, 319.0, 311.0, 291.0, 288.0, 289.0, 290.0, 285.0, 291.0, 290.0, 289.0, 294.0, 288.0, 288.0, 291.0, 285.0, 288.0, 313.0, 320.0, 285.0, 285.0, 317.0, 322.0, 284.0, 303.0, 294.0, 285.0, 320.0, 313.0, 296.0, 291.0, 289.0, 287.0, 284.0, 292.0, 285.0, 294.0, 315.0, 312.0, 287.0, 292.0, 298.0, 281.0, 317.0, 313.0, 296.0, 291.0, 286.0, 287.0, 290.0, 286.0, 293.0, 289.0, 284.0, 295.0, 287.0, 286.0, 286.0, 293.0, 313.0, 317.0, 296.0, 291.0, 285.0, 285.0, 314.0, 322.0, 287.0, 292.0, 297.0, 290.0, 312.0, 315.0, 296.0, 286.0, 286.0, 296.0, 291.0, 291.0, 291.0, 291.0, 281.0, 295.0, 295.0, 284.0, 292.0, 292.0, 314.0, 313.0, 291.0, 291.0, 291.0, 291.0, 290.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7014770298391392, "mean_inference_ms": 1.2544313326895178, "mean_action_processing_ms": 0.13427583305183213, "mean_env_wait_ms": 0.844396471243271, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 595.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 297.59}, "hist_stats": {"episode_reward": [582.0, 627.0, 579.0, 630.0, 582.0, 627.0, 516.0, 582.0, 584.0, 587.0, 582.0, 582.0, 576.0, 582.0, 627.0, 579.0, 627.0, 630.0, 630.0, 579.0, 627.0, 630.0, 582.0, 573.0, 636.0, 582.0, 576.0, 630.0, 627.0, 582.0, 630.0, 630.0, 587.0, 627.0, 587.0, 630.0, 579.0, 582.0, 630.0, 582.0, 582.0, 636.0, 633.0, 582.0, 582.0, 627.0, 633.0, 587.0, 582.0, 582.0, 582.0, 522.0, 636.0, 630.0, 579.0, 579.0, 576.0, 579.0, 582.0, 579.0, 573.0, 633.0, 570.0, 639.0, 587.0, 579.0, 633.0, 587.0, 576.0, 576.0, 579.0, 627.0, 579.0, 579.0, 630.0, 587.0, 573.0, 576.0, 582.0, 579.0, 573.0, 579.0, 630.0, 587.0, 570.0, 636.0, 579.0, 587.0, 627.0, 582.0, 582.0, 582.0, 582.0, 576.0, 579.0, 584.0, 627.0, 582.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 285.0, 314.0, 313.0, 286.0, 293.0, 312.0, 318.0, 292.0, 290.0, 310.0, 317.0, 251.0, 265.0, 283.0, 299.0, 293.0, 291.0, 289.0, 298.0, 294.0, 288.0, 290.0, 292.0, 289.0, 287.0, 293.0, 289.0, 318.0, 309.0, 287.0, 292.0, 310.0, 317.0, 311.0, 319.0, 316.0, 314.0, 295.0, 284.0, 307.0, 320.0, 313.0, 317.0, 285.0, 297.0, 284.0, 289.0, 315.0, 321.0, 285.0, 297.0, 288.0, 288.0, 318.0, 312.0, 314.0, 313.0, 290.0, 292.0, 310.0, 320.0, 318.0, 312.0, 294.0, 293.0, 310.0, 317.0, 293.0, 294.0, 309.0, 321.0, 293.0, 286.0, 290.0, 292.0, 308.0, 322.0, 289.0, 293.0, 290.0, 292.0, 315.0, 321.0, 319.0, 314.0, 290.0, 292.0, 294.0, 288.0, 317.0, 310.0, 316.0, 317.0, 286.0, 301.0, 295.0, 287.0, 290.0, 292.0, 291.0, 291.0, 261.0, 261.0, 325.0, 311.0, 319.0, 311.0, 291.0, 288.0, 289.0, 290.0, 285.0, 291.0, 290.0, 289.0, 294.0, 288.0, 288.0, 291.0, 285.0, 288.0, 313.0, 320.0, 285.0, 285.0, 317.0, 322.0, 284.0, 303.0, 294.0, 285.0, 320.0, 313.0, 296.0, 291.0, 289.0, 287.0, 284.0, 292.0, 285.0, 294.0, 315.0, 312.0, 287.0, 292.0, 298.0, 281.0, 317.0, 313.0, 296.0, 291.0, 286.0, 287.0, 290.0, 286.0, 293.0, 289.0, 284.0, 295.0, 287.0, 286.0, 286.0, 293.0, 313.0, 317.0, 296.0, 291.0, 285.0, 285.0, 314.0, 322.0, 287.0, 292.0, 297.0, 290.0, 312.0, 315.0, 296.0, 286.0, 286.0, 296.0, 291.0, 291.0, 291.0, 291.0, 281.0, 295.0, 295.0, 284.0, 292.0, 292.0, 314.0, 313.0, 291.0, 291.0, 291.0, 291.0, 290.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7014770298391392, "mean_inference_ms": 1.2544313326895178, "mean_action_processing_ms": 0.13427583305183213, "mean_env_wait_ms": 0.844396471243271, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8115200, "num_agent_steps_trained": 8115200, "num_env_steps_sampled": 4057600, "num_env_steps_trained": 4057600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4057600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8115200, "timers": {"training_iteration_time_ms": 3616.638, "learn_time_ms": 1109.991, "learn_throughput": 11531.625, "synch_weights_time_ms": 10.901}, "counters": {"num_env_steps_sampled": 4057600, "num_env_steps_trained": 4057600, "num_agent_steps_sampled": 8115200, "num_agent_steps_trained": 8115200}, "done": false, "episodes_total": 10144, "training_iteration": 317, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-05", "timestamp": 1666581665, "time_this_iter_s": 3.7974302768707275, "time_total_s": 1219.0652477741241, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1219.0652477741241, "timesteps_since_restore": 0, "iterations_since_restore": 317, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.01666666666667, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 205.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.32, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.97, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 28, "onion_pickup_agent_1_mean": 19.02, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.82, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 28, "useful_onion_pickup_agent_1_mean": 18.81, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.53, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 27, "potting_onion_agent_1_mean": 18.62, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 6.21, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.8, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 6.0, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.64, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.87, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.82, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.53, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 27, "optimal_onion_potting_agent_1_mean": 18.62, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.53, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 27, "viable_onion_potting_agent_1_mean": 18.62, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0010754279792308807, "policy_loss": 0.0007261198479682207, "vf_loss": 7.674044132232666, "vf_explained_var": 0.5930687785148621, "kl": 0.0021228990517556667, "entropy": 0.8361895084381104, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4070400, "num_env_steps_trained": 4070400, "num_agent_steps_sampled": 8140800, "num_agent_steps_trained": 8140800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 594.12, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 261.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 297.06}, "custom_metrics": {"sparse_reward_mean": 205.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.32, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.97, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 28, "onion_pickup_agent_1_mean": 19.02, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.82, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 28, "useful_onion_pickup_agent_1_mean": 18.81, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.53, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 27, "potting_onion_agent_1_mean": 18.62, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 6.21, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.8, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 6.0, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.64, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.87, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.82, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.53, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 27, "optimal_onion_potting_agent_1_mean": 18.62, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.53, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 27, "viable_onion_potting_agent_1_mean": 18.62, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 627.0, 587.0, 630.0, 579.0, 582.0, 630.0, 582.0, 582.0, 636.0, 633.0, 582.0, 582.0, 627.0, 633.0, 587.0, 582.0, 582.0, 582.0, 522.0, 636.0, 630.0, 579.0, 579.0, 576.0, 579.0, 582.0, 579.0, 573.0, 633.0, 570.0, 639.0, 587.0, 579.0, 633.0, 587.0, 576.0, 576.0, 579.0, 627.0, 579.0, 579.0, 630.0, 587.0, 573.0, 576.0, 582.0, 579.0, 573.0, 579.0, 630.0, 587.0, 570.0, 636.0, 579.0, 587.0, 627.0, 582.0, 582.0, 582.0, 582.0, 576.0, 579.0, 584.0, 627.0, 582.0, 582.0, 579.0, 630.0, 630.0, 573.0, 579.0, 576.0, 587.0, 633.0, 636.0, 630.0, 579.0, 630.0, 579.0, 582.0, 579.0, 633.0, 587.0, 636.0, 579.0, 582.0, 582.0, 627.0, 627.0, 576.0, 579.0, 582.0, 582.0, 587.0, 579.0, 582.0, 582.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 293.0, 310.0, 317.0, 293.0, 294.0, 309.0, 321.0, 293.0, 286.0, 290.0, 292.0, 308.0, 322.0, 289.0, 293.0, 290.0, 292.0, 315.0, 321.0, 319.0, 314.0, 290.0, 292.0, 294.0, 288.0, 317.0, 310.0, 316.0, 317.0, 286.0, 301.0, 295.0, 287.0, 290.0, 292.0, 291.0, 291.0, 261.0, 261.0, 325.0, 311.0, 319.0, 311.0, 291.0, 288.0, 289.0, 290.0, 285.0, 291.0, 290.0, 289.0, 294.0, 288.0, 288.0, 291.0, 285.0, 288.0, 313.0, 320.0, 285.0, 285.0, 317.0, 322.0, 284.0, 303.0, 294.0, 285.0, 320.0, 313.0, 296.0, 291.0, 289.0, 287.0, 284.0, 292.0, 285.0, 294.0, 315.0, 312.0, 287.0, 292.0, 298.0, 281.0, 317.0, 313.0, 296.0, 291.0, 286.0, 287.0, 290.0, 286.0, 293.0, 289.0, 284.0, 295.0, 287.0, 286.0, 286.0, 293.0, 313.0, 317.0, 296.0, 291.0, 285.0, 285.0, 314.0, 322.0, 287.0, 292.0, 297.0, 290.0, 312.0, 315.0, 296.0, 286.0, 286.0, 296.0, 291.0, 291.0, 291.0, 291.0, 281.0, 295.0, 295.0, 284.0, 292.0, 292.0, 314.0, 313.0, 291.0, 291.0, 291.0, 291.0, 290.0, 289.0, 311.0, 319.0, 309.0, 321.0, 278.0, 295.0, 288.0, 291.0, 286.0, 290.0, 290.0, 297.0, 314.0, 319.0, 319.0, 317.0, 316.0, 314.0, 294.0, 285.0, 313.0, 317.0, 291.0, 288.0, 288.0, 294.0, 288.0, 291.0, 313.0, 320.0, 296.0, 291.0, 317.0, 319.0, 295.0, 284.0, 289.0, 293.0, 294.0, 288.0, 311.0, 316.0, 311.0, 316.0, 285.0, 291.0, 286.0, 293.0, 286.0, 296.0, 293.0, 289.0, 289.0, 298.0, 288.0, 291.0, 289.0, 293.0, 292.0, 290.0, 289.0, 293.0, 282.0, 300.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7014010441921795, "mean_inference_ms": 1.2542360507656027, "mean_action_processing_ms": 0.13427080409938527, "mean_env_wait_ms": 0.8443139300228972, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 594.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 261.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 297.06}, "hist_stats": {"episode_reward": [587.0, 627.0, 587.0, 630.0, 579.0, 582.0, 630.0, 582.0, 582.0, 636.0, 633.0, 582.0, 582.0, 627.0, 633.0, 587.0, 582.0, 582.0, 582.0, 522.0, 636.0, 630.0, 579.0, 579.0, 576.0, 579.0, 582.0, 579.0, 573.0, 633.0, 570.0, 639.0, 587.0, 579.0, 633.0, 587.0, 576.0, 576.0, 579.0, 627.0, 579.0, 579.0, 630.0, 587.0, 573.0, 576.0, 582.0, 579.0, 573.0, 579.0, 630.0, 587.0, 570.0, 636.0, 579.0, 587.0, 627.0, 582.0, 582.0, 582.0, 582.0, 576.0, 579.0, 584.0, 627.0, 582.0, 582.0, 579.0, 630.0, 630.0, 573.0, 579.0, 576.0, 587.0, 633.0, 636.0, 630.0, 579.0, 630.0, 579.0, 582.0, 579.0, 633.0, 587.0, 636.0, 579.0, 582.0, 582.0, 627.0, 627.0, 576.0, 579.0, 582.0, 582.0, 587.0, 579.0, 582.0, 582.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 293.0, 310.0, 317.0, 293.0, 294.0, 309.0, 321.0, 293.0, 286.0, 290.0, 292.0, 308.0, 322.0, 289.0, 293.0, 290.0, 292.0, 315.0, 321.0, 319.0, 314.0, 290.0, 292.0, 294.0, 288.0, 317.0, 310.0, 316.0, 317.0, 286.0, 301.0, 295.0, 287.0, 290.0, 292.0, 291.0, 291.0, 261.0, 261.0, 325.0, 311.0, 319.0, 311.0, 291.0, 288.0, 289.0, 290.0, 285.0, 291.0, 290.0, 289.0, 294.0, 288.0, 288.0, 291.0, 285.0, 288.0, 313.0, 320.0, 285.0, 285.0, 317.0, 322.0, 284.0, 303.0, 294.0, 285.0, 320.0, 313.0, 296.0, 291.0, 289.0, 287.0, 284.0, 292.0, 285.0, 294.0, 315.0, 312.0, 287.0, 292.0, 298.0, 281.0, 317.0, 313.0, 296.0, 291.0, 286.0, 287.0, 290.0, 286.0, 293.0, 289.0, 284.0, 295.0, 287.0, 286.0, 286.0, 293.0, 313.0, 317.0, 296.0, 291.0, 285.0, 285.0, 314.0, 322.0, 287.0, 292.0, 297.0, 290.0, 312.0, 315.0, 296.0, 286.0, 286.0, 296.0, 291.0, 291.0, 291.0, 291.0, 281.0, 295.0, 295.0, 284.0, 292.0, 292.0, 314.0, 313.0, 291.0, 291.0, 291.0, 291.0, 290.0, 289.0, 311.0, 319.0, 309.0, 321.0, 278.0, 295.0, 288.0, 291.0, 286.0, 290.0, 290.0, 297.0, 314.0, 319.0, 319.0, 317.0, 316.0, 314.0, 294.0, 285.0, 313.0, 317.0, 291.0, 288.0, 288.0, 294.0, 288.0, 291.0, 313.0, 320.0, 296.0, 291.0, 317.0, 319.0, 295.0, 284.0, 289.0, 293.0, 294.0, 288.0, 311.0, 316.0, 311.0, 316.0, 285.0, 291.0, 286.0, 293.0, 286.0, 296.0, 293.0, 289.0, 289.0, 298.0, 288.0, 291.0, 289.0, 293.0, 292.0, 290.0, 289.0, 293.0, 282.0, 300.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7014010441921795, "mean_inference_ms": 1.2542360507656027, "mean_action_processing_ms": 0.13427080409938527, "mean_env_wait_ms": 0.8443139300228972, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8140800, "num_agent_steps_trained": 8140800, "num_env_steps_sampled": 4070400, "num_env_steps_trained": 4070400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4070400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8140800, "timers": {"training_iteration_time_ms": 3624.456, "learn_time_ms": 1115.287, "learn_throughput": 11476.862, "synch_weights_time_ms": 11.228}, "counters": {"num_env_steps_sampled": 4070400, "num_env_steps_trained": 4070400, "num_agent_steps_sampled": 8140800, "num_agent_steps_trained": 8140800}, "done": false, "episodes_total": 10176, "training_iteration": 318, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-09", "timestamp": 1666581669, "time_this_iter_s": 3.6486549377441406, "time_total_s": 1222.7139027118683, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1222.7139027118683, "timesteps_since_restore": 0, "iterations_since_restore": 318, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.639999999999997, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 204.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.56, "shaped_reward_min": 165, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.33, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 28, "onion_pickup_agent_1_mean": 18.63, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.15, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 28, "useful_onion_pickup_agent_1_mean": 18.41, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.87, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 27, "potting_onion_agent_1_mean": 18.19, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 6.01, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.88, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.83, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.87, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 27, "optimal_onion_potting_agent_1_mean": 18.19, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.87, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 27, "viable_onion_potting_agent_1_mean": 18.19, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013955773320049047, "policy_loss": 0.0010526780970394611, "vf_loss": 7.592601299285889, "vf_explained_var": 0.5996973514556885, "kl": 0.001881860545836389, "entropy": 0.8327209949493408, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4083200, "num_env_steps_trained": 4083200, "num_agent_steps_sampled": 8166400, "num_agent_steps_trained": 8166400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 525.0, "episode_reward_mean": 591.76, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 259.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 295.88}, "custom_metrics": {"sparse_reward_mean": 204.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.56, "shaped_reward_min": 165, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.33, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 28, "onion_pickup_agent_1_mean": 18.63, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.15, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 28, "useful_onion_pickup_agent_1_mean": 18.41, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.87, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 27, "potting_onion_agent_1_mean": 18.19, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 6.01, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.88, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.83, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.87, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 27, "optimal_onion_potting_agent_1_mean": 18.19, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.87, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 27, "viable_onion_potting_agent_1_mean": 18.19, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 579.0, 633.0, 587.0, 576.0, 576.0, 579.0, 627.0, 579.0, 579.0, 630.0, 587.0, 573.0, 576.0, 582.0, 579.0, 573.0, 579.0, 630.0, 587.0, 570.0, 636.0, 579.0, 587.0, 627.0, 582.0, 582.0, 582.0, 582.0, 576.0, 579.0, 584.0, 627.0, 582.0, 582.0, 579.0, 630.0, 630.0, 573.0, 579.0, 576.0, 587.0, 633.0, 636.0, 630.0, 579.0, 630.0, 579.0, 582.0, 579.0, 633.0, 587.0, 636.0, 579.0, 582.0, 582.0, 627.0, 627.0, 576.0, 579.0, 582.0, 582.0, 587.0, 579.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 579.0, 579.0, 630.0, 525.0, 587.0, 579.0, 579.0, 627.0, 579.0, 630.0, 579.0, 579.0, 630.0, 579.0, 579.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 576.0, 579.0, 630.0, 576.0, 579.0, 636.0, 636.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 303.0, 294.0, 285.0, 320.0, 313.0, 296.0, 291.0, 289.0, 287.0, 284.0, 292.0, 285.0, 294.0, 315.0, 312.0, 287.0, 292.0, 298.0, 281.0, 317.0, 313.0, 296.0, 291.0, 286.0, 287.0, 290.0, 286.0, 293.0, 289.0, 284.0, 295.0, 287.0, 286.0, 286.0, 293.0, 313.0, 317.0, 296.0, 291.0, 285.0, 285.0, 314.0, 322.0, 287.0, 292.0, 297.0, 290.0, 312.0, 315.0, 296.0, 286.0, 286.0, 296.0, 291.0, 291.0, 291.0, 291.0, 281.0, 295.0, 295.0, 284.0, 292.0, 292.0, 314.0, 313.0, 291.0, 291.0, 291.0, 291.0, 290.0, 289.0, 311.0, 319.0, 309.0, 321.0, 278.0, 295.0, 288.0, 291.0, 286.0, 290.0, 290.0, 297.0, 314.0, 319.0, 319.0, 317.0, 316.0, 314.0, 294.0, 285.0, 313.0, 317.0, 291.0, 288.0, 288.0, 294.0, 288.0, 291.0, 313.0, 320.0, 296.0, 291.0, 317.0, 319.0, 295.0, 284.0, 289.0, 293.0, 294.0, 288.0, 311.0, 316.0, 311.0, 316.0, 285.0, 291.0, 286.0, 293.0, 286.0, 296.0, 293.0, 289.0, 289.0, 298.0, 288.0, 291.0, 289.0, 293.0, 292.0, 290.0, 289.0, 293.0, 282.0, 300.0, 288.0, 291.0, 286.0, 290.0, 293.0, 286.0, 287.0, 292.0, 317.0, 313.0, 259.0, 266.0, 293.0, 294.0, 288.0, 291.0, 291.0, 288.0, 321.0, 306.0, 285.0, 294.0, 314.0, 316.0, 288.0, 291.0, 298.0, 281.0, 316.0, 314.0, 294.0, 285.0, 293.0, 286.0, 292.0, 290.0, 294.0, 293.0, 290.0, 289.0, 291.0, 288.0, 290.0, 292.0, 287.0, 292.0, 286.0, 290.0, 292.0, 287.0, 312.0, 318.0, 278.0, 298.0, 288.0, 291.0, 311.0, 325.0, 315.0, 321.0, 294.0, 285.0, 296.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7013202350924288, "mean_inference_ms": 1.2540252641135183, "mean_action_processing_ms": 0.1342621775402373, "mean_env_wait_ms": 0.8442089434128561, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 525.0, "episode_reward_mean": 591.76, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 259.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 295.88}, "hist_stats": {"episode_reward": [587.0, 579.0, 633.0, 587.0, 576.0, 576.0, 579.0, 627.0, 579.0, 579.0, 630.0, 587.0, 573.0, 576.0, 582.0, 579.0, 573.0, 579.0, 630.0, 587.0, 570.0, 636.0, 579.0, 587.0, 627.0, 582.0, 582.0, 582.0, 582.0, 576.0, 579.0, 584.0, 627.0, 582.0, 582.0, 579.0, 630.0, 630.0, 573.0, 579.0, 576.0, 587.0, 633.0, 636.0, 630.0, 579.0, 630.0, 579.0, 582.0, 579.0, 633.0, 587.0, 636.0, 579.0, 582.0, 582.0, 627.0, 627.0, 576.0, 579.0, 582.0, 582.0, 587.0, 579.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 579.0, 579.0, 630.0, 525.0, 587.0, 579.0, 579.0, 627.0, 579.0, 630.0, 579.0, 579.0, 630.0, 579.0, 579.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 576.0, 579.0, 630.0, 576.0, 579.0, 636.0, 636.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 303.0, 294.0, 285.0, 320.0, 313.0, 296.0, 291.0, 289.0, 287.0, 284.0, 292.0, 285.0, 294.0, 315.0, 312.0, 287.0, 292.0, 298.0, 281.0, 317.0, 313.0, 296.0, 291.0, 286.0, 287.0, 290.0, 286.0, 293.0, 289.0, 284.0, 295.0, 287.0, 286.0, 286.0, 293.0, 313.0, 317.0, 296.0, 291.0, 285.0, 285.0, 314.0, 322.0, 287.0, 292.0, 297.0, 290.0, 312.0, 315.0, 296.0, 286.0, 286.0, 296.0, 291.0, 291.0, 291.0, 291.0, 281.0, 295.0, 295.0, 284.0, 292.0, 292.0, 314.0, 313.0, 291.0, 291.0, 291.0, 291.0, 290.0, 289.0, 311.0, 319.0, 309.0, 321.0, 278.0, 295.0, 288.0, 291.0, 286.0, 290.0, 290.0, 297.0, 314.0, 319.0, 319.0, 317.0, 316.0, 314.0, 294.0, 285.0, 313.0, 317.0, 291.0, 288.0, 288.0, 294.0, 288.0, 291.0, 313.0, 320.0, 296.0, 291.0, 317.0, 319.0, 295.0, 284.0, 289.0, 293.0, 294.0, 288.0, 311.0, 316.0, 311.0, 316.0, 285.0, 291.0, 286.0, 293.0, 286.0, 296.0, 293.0, 289.0, 289.0, 298.0, 288.0, 291.0, 289.0, 293.0, 292.0, 290.0, 289.0, 293.0, 282.0, 300.0, 288.0, 291.0, 286.0, 290.0, 293.0, 286.0, 287.0, 292.0, 317.0, 313.0, 259.0, 266.0, 293.0, 294.0, 288.0, 291.0, 291.0, 288.0, 321.0, 306.0, 285.0, 294.0, 314.0, 316.0, 288.0, 291.0, 298.0, 281.0, 316.0, 314.0, 294.0, 285.0, 293.0, 286.0, 292.0, 290.0, 294.0, 293.0, 290.0, 289.0, 291.0, 288.0, 290.0, 292.0, 287.0, 292.0, 286.0, 290.0, 292.0, 287.0, 312.0, 318.0, 278.0, 298.0, 288.0, 291.0, 311.0, 325.0, 315.0, 321.0, 294.0, 285.0, 296.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7013202350924288, "mean_inference_ms": 1.2540252641135183, "mean_action_processing_ms": 0.1342621775402373, "mean_env_wait_ms": 0.8442089434128561, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8166400, "num_agent_steps_trained": 8166400, "num_env_steps_sampled": 4083200, "num_env_steps_trained": 4083200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4083200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8166400, "timers": {"training_iteration_time_ms": 3614.178, "learn_time_ms": 1109.138, "learn_throughput": 11540.489, "synch_weights_time_ms": 11.693}, "counters": {"num_env_steps_sampled": 4083200, "num_env_steps_trained": 4083200, "num_agent_steps_sampled": 8166400, "num_agent_steps_trained": 8166400}, "done": false, "episodes_total": 10208, "training_iteration": 319, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-13", "timestamp": 1666581673, "time_this_iter_s": 3.5185139179229736, "time_total_s": 1226.2324166297913, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1226.2324166297913, "timesteps_since_restore": 0, "iterations_since_restore": 319, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.533333333333335, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 204.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.82, "shaped_reward_min": 165, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.09, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 27, "onion_pickup_agent_1_mean": 17.89, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.93, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 27, "useful_onion_pickup_agent_1_mean": 17.71, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.66, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 27, "potting_onion_agent_1_mean": 17.48, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.33, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.66, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 27, "optimal_onion_potting_agent_1_mean": 17.48, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.66, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 27, "viable_onion_potting_agent_1_mean": 17.48, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008092686766758561, "policy_loss": 0.0004618025850504637, "vf_loss": 7.6494879722595215, "vf_explained_var": 0.5780466794967651, "kl": 0.002273120451718569, "entropy": 0.8349617719650269, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4096000, "num_env_steps_trained": 4096000, "num_agent_steps_sampled": 8192000, "num_agent_steps_trained": 8192000}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 525.0, "episode_reward_mean": 592.02, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 259.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 296.01}, "custom_metrics": {"sparse_reward_mean": 204.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.82, "shaped_reward_min": 165, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.09, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 27, "onion_pickup_agent_1_mean": 17.89, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.93, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 27, "useful_onion_pickup_agent_1_mean": 17.71, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.66, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 27, "potting_onion_agent_1_mean": 17.48, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.33, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.66, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 27, "optimal_onion_potting_agent_1_mean": 17.48, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.66, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 27, "viable_onion_potting_agent_1_mean": 17.48, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 582.0, 582.0, 579.0, 630.0, 630.0, 573.0, 579.0, 576.0, 587.0, 633.0, 636.0, 630.0, 579.0, 630.0, 579.0, 582.0, 579.0, 633.0, 587.0, 636.0, 579.0, 582.0, 582.0, 627.0, 627.0, 576.0, 579.0, 582.0, 582.0, 587.0, 579.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 579.0, 579.0, 630.0, 525.0, 587.0, 579.0, 579.0, 627.0, 579.0, 630.0, 579.0, 579.0, 630.0, 579.0, 579.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 576.0, 579.0, 630.0, 576.0, 579.0, 636.0, 636.0, 579.0, 579.0, 582.0, 587.0, 576.0, 579.0, 636.0, 573.0, 582.0, 587.0, 582.0, 636.0, 587.0, 576.0, 636.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 582.0, 582.0, 573.0, 630.0, 627.0, 633.0, 579.0, 579.0, 576.0, 579.0, 587.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 313.0, 291.0, 291.0, 291.0, 291.0, 290.0, 289.0, 311.0, 319.0, 309.0, 321.0, 278.0, 295.0, 288.0, 291.0, 286.0, 290.0, 290.0, 297.0, 314.0, 319.0, 319.0, 317.0, 316.0, 314.0, 294.0, 285.0, 313.0, 317.0, 291.0, 288.0, 288.0, 294.0, 288.0, 291.0, 313.0, 320.0, 296.0, 291.0, 317.0, 319.0, 295.0, 284.0, 289.0, 293.0, 294.0, 288.0, 311.0, 316.0, 311.0, 316.0, 285.0, 291.0, 286.0, 293.0, 286.0, 296.0, 293.0, 289.0, 289.0, 298.0, 288.0, 291.0, 289.0, 293.0, 292.0, 290.0, 289.0, 293.0, 282.0, 300.0, 288.0, 291.0, 286.0, 290.0, 293.0, 286.0, 287.0, 292.0, 317.0, 313.0, 259.0, 266.0, 293.0, 294.0, 288.0, 291.0, 291.0, 288.0, 321.0, 306.0, 285.0, 294.0, 314.0, 316.0, 288.0, 291.0, 298.0, 281.0, 316.0, 314.0, 294.0, 285.0, 293.0, 286.0, 292.0, 290.0, 294.0, 293.0, 290.0, 289.0, 291.0, 288.0, 290.0, 292.0, 287.0, 292.0, 286.0, 290.0, 292.0, 287.0, 312.0, 318.0, 278.0, 298.0, 288.0, 291.0, 311.0, 325.0, 315.0, 321.0, 294.0, 285.0, 296.0, 283.0, 287.0, 295.0, 288.0, 299.0, 291.0, 285.0, 291.0, 288.0, 315.0, 321.0, 278.0, 295.0, 286.0, 296.0, 292.0, 295.0, 289.0, 293.0, 313.0, 323.0, 291.0, 296.0, 288.0, 288.0, 316.0, 320.0, 293.0, 286.0, 289.0, 287.0, 292.0, 290.0, 291.0, 291.0, 290.0, 292.0, 292.0, 290.0, 288.0, 291.0, 284.0, 298.0, 288.0, 294.0, 280.0, 293.0, 311.0, 319.0, 312.0, 315.0, 317.0, 316.0, 287.0, 292.0, 281.0, 298.0, 291.0, 285.0, 286.0, 293.0, 295.0, 292.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7012302860360472, "mean_inference_ms": 1.2538165708631441, "mean_action_processing_ms": 0.13425232218221622, "mean_env_wait_ms": 0.8440993449119911, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 525.0, "episode_reward_mean": 592.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 259.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 296.01}, "hist_stats": {"episode_reward": [627.0, 582.0, 582.0, 579.0, 630.0, 630.0, 573.0, 579.0, 576.0, 587.0, 633.0, 636.0, 630.0, 579.0, 630.0, 579.0, 582.0, 579.0, 633.0, 587.0, 636.0, 579.0, 582.0, 582.0, 627.0, 627.0, 576.0, 579.0, 582.0, 582.0, 587.0, 579.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 579.0, 579.0, 630.0, 525.0, 587.0, 579.0, 579.0, 627.0, 579.0, 630.0, 579.0, 579.0, 630.0, 579.0, 579.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 576.0, 579.0, 630.0, 576.0, 579.0, 636.0, 636.0, 579.0, 579.0, 582.0, 587.0, 576.0, 579.0, 636.0, 573.0, 582.0, 587.0, 582.0, 636.0, 587.0, 576.0, 636.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 582.0, 582.0, 573.0, 630.0, 627.0, 633.0, 579.0, 579.0, 576.0, 579.0, 587.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 313.0, 291.0, 291.0, 291.0, 291.0, 290.0, 289.0, 311.0, 319.0, 309.0, 321.0, 278.0, 295.0, 288.0, 291.0, 286.0, 290.0, 290.0, 297.0, 314.0, 319.0, 319.0, 317.0, 316.0, 314.0, 294.0, 285.0, 313.0, 317.0, 291.0, 288.0, 288.0, 294.0, 288.0, 291.0, 313.0, 320.0, 296.0, 291.0, 317.0, 319.0, 295.0, 284.0, 289.0, 293.0, 294.0, 288.0, 311.0, 316.0, 311.0, 316.0, 285.0, 291.0, 286.0, 293.0, 286.0, 296.0, 293.0, 289.0, 289.0, 298.0, 288.0, 291.0, 289.0, 293.0, 292.0, 290.0, 289.0, 293.0, 282.0, 300.0, 288.0, 291.0, 286.0, 290.0, 293.0, 286.0, 287.0, 292.0, 317.0, 313.0, 259.0, 266.0, 293.0, 294.0, 288.0, 291.0, 291.0, 288.0, 321.0, 306.0, 285.0, 294.0, 314.0, 316.0, 288.0, 291.0, 298.0, 281.0, 316.0, 314.0, 294.0, 285.0, 293.0, 286.0, 292.0, 290.0, 294.0, 293.0, 290.0, 289.0, 291.0, 288.0, 290.0, 292.0, 287.0, 292.0, 286.0, 290.0, 292.0, 287.0, 312.0, 318.0, 278.0, 298.0, 288.0, 291.0, 311.0, 325.0, 315.0, 321.0, 294.0, 285.0, 296.0, 283.0, 287.0, 295.0, 288.0, 299.0, 291.0, 285.0, 291.0, 288.0, 315.0, 321.0, 278.0, 295.0, 286.0, 296.0, 292.0, 295.0, 289.0, 293.0, 313.0, 323.0, 291.0, 296.0, 288.0, 288.0, 316.0, 320.0, 293.0, 286.0, 289.0, 287.0, 292.0, 290.0, 291.0, 291.0, 290.0, 292.0, 292.0, 290.0, 288.0, 291.0, 284.0, 298.0, 288.0, 294.0, 280.0, 293.0, 311.0, 319.0, 312.0, 315.0, 317.0, 316.0, 287.0, 292.0, 281.0, 298.0, 291.0, 285.0, 286.0, 293.0, 295.0, 292.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7012302860360472, "mean_inference_ms": 1.2538165708631441, "mean_action_processing_ms": 0.13425232218221622, "mean_env_wait_ms": 0.8440993449119911, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8192000, "num_agent_steps_trained": 8192000, "num_env_steps_sampled": 4096000, "num_env_steps_trained": 4096000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4096000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8192000, "timers": {"training_iteration_time_ms": 3604.653, "learn_time_ms": 1110.183, "learn_throughput": 11529.626, "synch_weights_time_ms": 11.85}, "counters": {"num_env_steps_sampled": 4096000, "num_env_steps_trained": 4096000, "num_agent_steps_sampled": 8192000, "num_agent_steps_trained": 8192000}, "done": false, "episodes_total": 10240, "training_iteration": 320, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-16", "timestamp": 1666581676, "time_this_iter_s": 3.695418119430542, "time_total_s": 1229.9278347492218, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1229.9278347492218, "timesteps_since_restore": 0, "iterations_since_restore": 320, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.5, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 204.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.67, "shaped_reward_min": 165, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.17, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 27, "onion_pickup_agent_1_mean": 17.76, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 16.02, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 27, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.74, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 27, "potting_onion_agent_1_mean": 17.39, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.94, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.74, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 27, "optimal_onion_potting_agent_1_mean": 17.39, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.74, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 27, "viable_onion_potting_agent_1_mean": 17.39, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00031242857221513987, "policy_loss": -0.0006594202714040875, "vf_loss": 7.624888896942139, "vf_explained_var": 0.5988132953643799, "kl": 0.0022885880898684263, "entropy": 0.83099365234375, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4108800, "num_env_steps_trained": 4108800, "num_agent_steps_sampled": 8217600, "num_agent_steps_trained": 8217600}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 525.0, "episode_reward_mean": 590.67, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 259.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 295.335}, "custom_metrics": {"sparse_reward_mean": 204.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.67, "shaped_reward_min": 165, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.17, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 27, "onion_pickup_agent_1_mean": 17.76, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 16.02, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 27, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.74, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 27, "potting_onion_agent_1_mean": 17.39, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.94, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.74, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 27, "optimal_onion_potting_agent_1_mean": 17.39, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.74, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 27, "viable_onion_potting_agent_1_mean": 17.39, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 579.0, 579.0, 630.0, 525.0, 587.0, 579.0, 579.0, 627.0, 579.0, 630.0, 579.0, 579.0, 630.0, 579.0, 579.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 576.0, 579.0, 630.0, 576.0, 579.0, 636.0, 636.0, 579.0, 579.0, 582.0, 587.0, 576.0, 579.0, 636.0, 573.0, 582.0, 587.0, 582.0, 636.0, 587.0, 576.0, 636.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 582.0, 582.0, 573.0, 630.0, 627.0, 633.0, 579.0, 579.0, 576.0, 579.0, 587.0, 582.0, 576.0, 582.0, 582.0, 579.0, 633.0, 582.0, 587.0, 630.0, 570.0, 633.0, 582.0, 582.0, 579.0, 582.0, 596.0, 576.0, 627.0, 579.0, 582.0, 582.0, 587.0, 630.0, 582.0, 582.0, 633.0, 636.0, 582.0, 582.0, 633.0, 579.0, 579.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 292.0, 290.0, 289.0, 293.0, 282.0, 300.0, 288.0, 291.0, 286.0, 290.0, 293.0, 286.0, 287.0, 292.0, 317.0, 313.0, 259.0, 266.0, 293.0, 294.0, 288.0, 291.0, 291.0, 288.0, 321.0, 306.0, 285.0, 294.0, 314.0, 316.0, 288.0, 291.0, 298.0, 281.0, 316.0, 314.0, 294.0, 285.0, 293.0, 286.0, 292.0, 290.0, 294.0, 293.0, 290.0, 289.0, 291.0, 288.0, 290.0, 292.0, 287.0, 292.0, 286.0, 290.0, 292.0, 287.0, 312.0, 318.0, 278.0, 298.0, 288.0, 291.0, 311.0, 325.0, 315.0, 321.0, 294.0, 285.0, 296.0, 283.0, 287.0, 295.0, 288.0, 299.0, 291.0, 285.0, 291.0, 288.0, 315.0, 321.0, 278.0, 295.0, 286.0, 296.0, 292.0, 295.0, 289.0, 293.0, 313.0, 323.0, 291.0, 296.0, 288.0, 288.0, 316.0, 320.0, 293.0, 286.0, 289.0, 287.0, 292.0, 290.0, 291.0, 291.0, 290.0, 292.0, 292.0, 290.0, 288.0, 291.0, 284.0, 298.0, 288.0, 294.0, 280.0, 293.0, 311.0, 319.0, 312.0, 315.0, 317.0, 316.0, 287.0, 292.0, 281.0, 298.0, 291.0, 285.0, 286.0, 293.0, 295.0, 292.0, 291.0, 291.0, 283.0, 293.0, 286.0, 296.0, 292.0, 290.0, 296.0, 283.0, 316.0, 317.0, 292.0, 290.0, 297.0, 290.0, 314.0, 316.0, 289.0, 281.0, 319.0, 314.0, 294.0, 288.0, 292.0, 290.0, 287.0, 292.0, 286.0, 296.0, 299.0, 297.0, 288.0, 288.0, 312.0, 315.0, 284.0, 295.0, 291.0, 291.0, 286.0, 296.0, 291.0, 296.0, 320.0, 310.0, 289.0, 293.0, 289.0, 293.0, 314.0, 319.0, 318.0, 318.0, 292.0, 290.0, 294.0, 288.0, 317.0, 316.0, 283.0, 296.0, 292.0, 287.0, 289.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7011718559313173, "mean_inference_ms": 1.2536228958748068, "mean_action_processing_ms": 0.13424182848786143, "mean_env_wait_ms": 0.8439945384813131, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 525.0, "episode_reward_mean": 590.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 259.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 295.335}, "hist_stats": {"episode_reward": [582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 579.0, 579.0, 630.0, 525.0, 587.0, 579.0, 579.0, 627.0, 579.0, 630.0, 579.0, 579.0, 630.0, 579.0, 579.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 576.0, 579.0, 630.0, 576.0, 579.0, 636.0, 636.0, 579.0, 579.0, 582.0, 587.0, 576.0, 579.0, 636.0, 573.0, 582.0, 587.0, 582.0, 636.0, 587.0, 576.0, 636.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 582.0, 582.0, 573.0, 630.0, 627.0, 633.0, 579.0, 579.0, 576.0, 579.0, 587.0, 582.0, 576.0, 582.0, 582.0, 579.0, 633.0, 582.0, 587.0, 630.0, 570.0, 633.0, 582.0, 582.0, 579.0, 582.0, 596.0, 576.0, 627.0, 579.0, 582.0, 582.0, 587.0, 630.0, 582.0, 582.0, 633.0, 636.0, 582.0, 582.0, 633.0, 579.0, 579.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 292.0, 290.0, 289.0, 293.0, 282.0, 300.0, 288.0, 291.0, 286.0, 290.0, 293.0, 286.0, 287.0, 292.0, 317.0, 313.0, 259.0, 266.0, 293.0, 294.0, 288.0, 291.0, 291.0, 288.0, 321.0, 306.0, 285.0, 294.0, 314.0, 316.0, 288.0, 291.0, 298.0, 281.0, 316.0, 314.0, 294.0, 285.0, 293.0, 286.0, 292.0, 290.0, 294.0, 293.0, 290.0, 289.0, 291.0, 288.0, 290.0, 292.0, 287.0, 292.0, 286.0, 290.0, 292.0, 287.0, 312.0, 318.0, 278.0, 298.0, 288.0, 291.0, 311.0, 325.0, 315.0, 321.0, 294.0, 285.0, 296.0, 283.0, 287.0, 295.0, 288.0, 299.0, 291.0, 285.0, 291.0, 288.0, 315.0, 321.0, 278.0, 295.0, 286.0, 296.0, 292.0, 295.0, 289.0, 293.0, 313.0, 323.0, 291.0, 296.0, 288.0, 288.0, 316.0, 320.0, 293.0, 286.0, 289.0, 287.0, 292.0, 290.0, 291.0, 291.0, 290.0, 292.0, 292.0, 290.0, 288.0, 291.0, 284.0, 298.0, 288.0, 294.0, 280.0, 293.0, 311.0, 319.0, 312.0, 315.0, 317.0, 316.0, 287.0, 292.0, 281.0, 298.0, 291.0, 285.0, 286.0, 293.0, 295.0, 292.0, 291.0, 291.0, 283.0, 293.0, 286.0, 296.0, 292.0, 290.0, 296.0, 283.0, 316.0, 317.0, 292.0, 290.0, 297.0, 290.0, 314.0, 316.0, 289.0, 281.0, 319.0, 314.0, 294.0, 288.0, 292.0, 290.0, 287.0, 292.0, 286.0, 296.0, 299.0, 297.0, 288.0, 288.0, 312.0, 315.0, 284.0, 295.0, 291.0, 291.0, 286.0, 296.0, 291.0, 296.0, 320.0, 310.0, 289.0, 293.0, 289.0, 293.0, 314.0, 319.0, 318.0, 318.0, 292.0, 290.0, 294.0, 288.0, 317.0, 316.0, 283.0, 296.0, 292.0, 287.0, 289.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7011718559313173, "mean_inference_ms": 1.2536228958748068, "mean_action_processing_ms": 0.13424182848786143, "mean_env_wait_ms": 0.8439945384813131, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8217600, "num_agent_steps_trained": 8217600, "num_env_steps_sampled": 4108800, "num_env_steps_trained": 4108800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4108800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8217600, "timers": {"training_iteration_time_ms": 3593.772, "learn_time_ms": 1104.0, "learn_throughput": 11594.207, "synch_weights_time_ms": 12.066}, "counters": {"num_env_steps_sampled": 4108800, "num_env_steps_trained": 4108800, "num_agent_steps_sampled": 8217600, "num_agent_steps_trained": 8217600}, "done": false, "episodes_total": 10272, "training_iteration": 321, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-20", "timestamp": 1666581680, "time_this_iter_s": 3.6758861541748047, "time_total_s": 1233.6037209033966, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1233.6037209033966, "timesteps_since_restore": 0, "iterations_since_restore": 321, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.14, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 203.2, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 182.36, "shaped_reward_min": 82, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.73, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 27, "onion_pickup_agent_1_mean": 18.0, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.64, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 27, "useful_onion_pickup_agent_1_mean": 17.87, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.33, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 27, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.87, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.4, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.33, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 27, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.33, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 27, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002883757231757045, "policy_loss": -0.0032353729475289583, "vf_loss": 7.676279067993164, "vf_explained_var": 0.5991020202636719, "kl": 0.0022375802509486675, "entropy": 0.8320217728614807, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4121600, "num_env_steps_trained": 4121600, "num_agent_steps_sampled": 8243200, "num_agent_steps_trained": 8243200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 242.0, "episode_reward_mean": 588.76, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 294.38}, "custom_metrics": {"sparse_reward_mean": 203.2, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 182.36, "shaped_reward_min": 82, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.73, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 27, "onion_pickup_agent_1_mean": 18.0, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.64, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 27, "useful_onion_pickup_agent_1_mean": 17.87, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.33, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 27, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.87, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.4, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.33, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 27, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.33, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 27, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 636.0, 579.0, 579.0, 582.0, 587.0, 576.0, 579.0, 636.0, 573.0, 582.0, 587.0, 582.0, 636.0, 587.0, 576.0, 636.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 582.0, 582.0, 573.0, 630.0, 627.0, 633.0, 579.0, 579.0, 576.0, 579.0, 587.0, 582.0, 576.0, 582.0, 582.0, 579.0, 633.0, 582.0, 587.0, 630.0, 570.0, 633.0, 582.0, 582.0, 579.0, 582.0, 596.0, 576.0, 627.0, 579.0, 582.0, 582.0, 587.0, 630.0, 582.0, 582.0, 633.0, 636.0, 582.0, 582.0, 633.0, 579.0, 579.0, 570.0, 582.0, 576.0, 587.0, 639.0, 582.0, 582.0, 584.0, 576.0, 570.0, 579.0, 627.0, 630.0, 579.0, 573.0, 587.0, 579.0, 242.0, 570.0, 636.0, 582.0, 582.0, 582.0, 582.0, 587.0, 582.0, 582.0, 582.0, 627.0, 590.0, 582.0, 630.0, 590.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 325.0, 315.0, 321.0, 294.0, 285.0, 296.0, 283.0, 287.0, 295.0, 288.0, 299.0, 291.0, 285.0, 291.0, 288.0, 315.0, 321.0, 278.0, 295.0, 286.0, 296.0, 292.0, 295.0, 289.0, 293.0, 313.0, 323.0, 291.0, 296.0, 288.0, 288.0, 316.0, 320.0, 293.0, 286.0, 289.0, 287.0, 292.0, 290.0, 291.0, 291.0, 290.0, 292.0, 292.0, 290.0, 288.0, 291.0, 284.0, 298.0, 288.0, 294.0, 280.0, 293.0, 311.0, 319.0, 312.0, 315.0, 317.0, 316.0, 287.0, 292.0, 281.0, 298.0, 291.0, 285.0, 286.0, 293.0, 295.0, 292.0, 291.0, 291.0, 283.0, 293.0, 286.0, 296.0, 292.0, 290.0, 296.0, 283.0, 316.0, 317.0, 292.0, 290.0, 297.0, 290.0, 314.0, 316.0, 289.0, 281.0, 319.0, 314.0, 294.0, 288.0, 292.0, 290.0, 287.0, 292.0, 286.0, 296.0, 299.0, 297.0, 288.0, 288.0, 312.0, 315.0, 284.0, 295.0, 291.0, 291.0, 286.0, 296.0, 291.0, 296.0, 320.0, 310.0, 289.0, 293.0, 289.0, 293.0, 314.0, 319.0, 318.0, 318.0, 292.0, 290.0, 294.0, 288.0, 317.0, 316.0, 283.0, 296.0, 292.0, 287.0, 289.0, 281.0, 293.0, 289.0, 287.0, 289.0, 294.0, 293.0, 319.0, 320.0, 280.0, 302.0, 294.0, 288.0, 290.0, 294.0, 299.0, 277.0, 285.0, 285.0, 285.0, 294.0, 310.0, 317.0, 315.0, 315.0, 288.0, 291.0, 278.0, 295.0, 286.0, 301.0, 291.0, 288.0, 117.0, 125.0, 282.0, 288.0, 315.0, 321.0, 290.0, 292.0, 280.0, 302.0, 288.0, 294.0, 291.0, 291.0, 293.0, 294.0, 290.0, 292.0, 294.0, 288.0, 292.0, 290.0, 307.0, 320.0, 295.0, 295.0, 289.0, 293.0, 307.0, 323.0, 291.0, 299.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7011350387026024, "mean_inference_ms": 1.253450808715653, "mean_action_processing_ms": 0.13423384633768787, "mean_env_wait_ms": 0.8439112070475474, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 242.0, "episode_reward_mean": 588.76, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 294.38}, "hist_stats": {"episode_reward": [636.0, 636.0, 579.0, 579.0, 582.0, 587.0, 576.0, 579.0, 636.0, 573.0, 582.0, 587.0, 582.0, 636.0, 587.0, 576.0, 636.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 582.0, 582.0, 573.0, 630.0, 627.0, 633.0, 579.0, 579.0, 576.0, 579.0, 587.0, 582.0, 576.0, 582.0, 582.0, 579.0, 633.0, 582.0, 587.0, 630.0, 570.0, 633.0, 582.0, 582.0, 579.0, 582.0, 596.0, 576.0, 627.0, 579.0, 582.0, 582.0, 587.0, 630.0, 582.0, 582.0, 633.0, 636.0, 582.0, 582.0, 633.0, 579.0, 579.0, 570.0, 582.0, 576.0, 587.0, 639.0, 582.0, 582.0, 584.0, 576.0, 570.0, 579.0, 627.0, 630.0, 579.0, 573.0, 587.0, 579.0, 242.0, 570.0, 636.0, 582.0, 582.0, 582.0, 582.0, 587.0, 582.0, 582.0, 582.0, 627.0, 590.0, 582.0, 630.0, 590.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 325.0, 315.0, 321.0, 294.0, 285.0, 296.0, 283.0, 287.0, 295.0, 288.0, 299.0, 291.0, 285.0, 291.0, 288.0, 315.0, 321.0, 278.0, 295.0, 286.0, 296.0, 292.0, 295.0, 289.0, 293.0, 313.0, 323.0, 291.0, 296.0, 288.0, 288.0, 316.0, 320.0, 293.0, 286.0, 289.0, 287.0, 292.0, 290.0, 291.0, 291.0, 290.0, 292.0, 292.0, 290.0, 288.0, 291.0, 284.0, 298.0, 288.0, 294.0, 280.0, 293.0, 311.0, 319.0, 312.0, 315.0, 317.0, 316.0, 287.0, 292.0, 281.0, 298.0, 291.0, 285.0, 286.0, 293.0, 295.0, 292.0, 291.0, 291.0, 283.0, 293.0, 286.0, 296.0, 292.0, 290.0, 296.0, 283.0, 316.0, 317.0, 292.0, 290.0, 297.0, 290.0, 314.0, 316.0, 289.0, 281.0, 319.0, 314.0, 294.0, 288.0, 292.0, 290.0, 287.0, 292.0, 286.0, 296.0, 299.0, 297.0, 288.0, 288.0, 312.0, 315.0, 284.0, 295.0, 291.0, 291.0, 286.0, 296.0, 291.0, 296.0, 320.0, 310.0, 289.0, 293.0, 289.0, 293.0, 314.0, 319.0, 318.0, 318.0, 292.0, 290.0, 294.0, 288.0, 317.0, 316.0, 283.0, 296.0, 292.0, 287.0, 289.0, 281.0, 293.0, 289.0, 287.0, 289.0, 294.0, 293.0, 319.0, 320.0, 280.0, 302.0, 294.0, 288.0, 290.0, 294.0, 299.0, 277.0, 285.0, 285.0, 285.0, 294.0, 310.0, 317.0, 315.0, 315.0, 288.0, 291.0, 278.0, 295.0, 286.0, 301.0, 291.0, 288.0, 117.0, 125.0, 282.0, 288.0, 315.0, 321.0, 290.0, 292.0, 280.0, 302.0, 288.0, 294.0, 291.0, 291.0, 293.0, 294.0, 290.0, 292.0, 294.0, 288.0, 292.0, 290.0, 307.0, 320.0, 295.0, 295.0, 289.0, 293.0, 307.0, 323.0, 291.0, 299.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7011350387026024, "mean_inference_ms": 1.253450808715653, "mean_action_processing_ms": 0.13423384633768787, "mean_env_wait_ms": 0.8439112070475474, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8243200, "num_agent_steps_trained": 8243200, "num_env_steps_sampled": 4121600, "num_env_steps_trained": 4121600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4121600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8243200, "timers": {"training_iteration_time_ms": 3605.198, "learn_time_ms": 1104.684, "learn_throughput": 11587.023, "synch_weights_time_ms": 12.156}, "counters": {"num_env_steps_sampled": 4121600, "num_env_steps_trained": 4121600, "num_agent_steps_sampled": 8243200, "num_agent_steps_trained": 8243200}, "done": false, "episodes_total": 10304, "training_iteration": 322, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-24", "timestamp": 1666581684, "time_this_iter_s": 3.7152676582336426, "time_total_s": 1237.3189885616302, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1237.3189885616302, "timesteps_since_restore": 0, "iterations_since_restore": 322, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.816666666666666, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 202.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 181.67, "shaped_reward_min": 82, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.55, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.99, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.46, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.81, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.17, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 5.8, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.17, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.17, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00010902888607233763, "policy_loss": -0.0002475515939295292, "vf_loss": 7.680278301239014, "vf_explained_var": 0.5841261744499207, "kl": 0.0024923328310251236, "entropy": 0.8228923082351685, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4134400, "num_env_steps_trained": 4134400, "num_agent_steps_sampled": 8268800, "num_agent_steps_trained": 8268800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 242.0, "episode_reward_mean": 586.87, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 293.435}, "custom_metrics": {"sparse_reward_mean": 202.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 181.67, "shaped_reward_min": 82, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.55, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.99, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.46, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.81, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.17, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 5.8, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.17, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.17, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 579.0, 587.0, 582.0, 576.0, 582.0, 582.0, 579.0, 633.0, 582.0, 587.0, 630.0, 570.0, 633.0, 582.0, 582.0, 579.0, 582.0, 596.0, 576.0, 627.0, 579.0, 582.0, 582.0, 587.0, 630.0, 582.0, 582.0, 633.0, 636.0, 582.0, 582.0, 633.0, 579.0, 579.0, 570.0, 582.0, 576.0, 587.0, 639.0, 582.0, 582.0, 584.0, 576.0, 570.0, 579.0, 627.0, 630.0, 579.0, 573.0, 587.0, 579.0, 242.0, 570.0, 636.0, 582.0, 582.0, 582.0, 582.0, 587.0, 582.0, 582.0, 582.0, 627.0, 590.0, 582.0, 630.0, 590.0, 576.0, 579.0, 636.0, 587.0, 576.0, 579.0, 582.0, 573.0, 576.0, 627.0, 582.0, 582.0, 630.0, 579.0, 582.0, 579.0, 636.0, 579.0, 579.0, 630.0, 584.0, 579.0, 630.0, 582.0, 582.0, 582.0, 582.0, 576.0, 579.0, 579.0, 584.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 285.0, 286.0, 293.0, 295.0, 292.0, 291.0, 291.0, 283.0, 293.0, 286.0, 296.0, 292.0, 290.0, 296.0, 283.0, 316.0, 317.0, 292.0, 290.0, 297.0, 290.0, 314.0, 316.0, 289.0, 281.0, 319.0, 314.0, 294.0, 288.0, 292.0, 290.0, 287.0, 292.0, 286.0, 296.0, 299.0, 297.0, 288.0, 288.0, 312.0, 315.0, 284.0, 295.0, 291.0, 291.0, 286.0, 296.0, 291.0, 296.0, 320.0, 310.0, 289.0, 293.0, 289.0, 293.0, 314.0, 319.0, 318.0, 318.0, 292.0, 290.0, 294.0, 288.0, 317.0, 316.0, 283.0, 296.0, 292.0, 287.0, 289.0, 281.0, 293.0, 289.0, 287.0, 289.0, 294.0, 293.0, 319.0, 320.0, 280.0, 302.0, 294.0, 288.0, 290.0, 294.0, 299.0, 277.0, 285.0, 285.0, 285.0, 294.0, 310.0, 317.0, 315.0, 315.0, 288.0, 291.0, 278.0, 295.0, 286.0, 301.0, 291.0, 288.0, 117.0, 125.0, 282.0, 288.0, 315.0, 321.0, 290.0, 292.0, 280.0, 302.0, 288.0, 294.0, 291.0, 291.0, 293.0, 294.0, 290.0, 292.0, 294.0, 288.0, 292.0, 290.0, 307.0, 320.0, 295.0, 295.0, 289.0, 293.0, 307.0, 323.0, 291.0, 299.0, 286.0, 290.0, 288.0, 291.0, 320.0, 316.0, 283.0, 304.0, 290.0, 286.0, 285.0, 294.0, 295.0, 287.0, 289.0, 284.0, 288.0, 288.0, 312.0, 315.0, 293.0, 289.0, 287.0, 295.0, 308.0, 322.0, 286.0, 293.0, 284.0, 298.0, 291.0, 288.0, 311.0, 325.0, 287.0, 292.0, 290.0, 289.0, 320.0, 310.0, 292.0, 292.0, 287.0, 292.0, 317.0, 313.0, 288.0, 294.0, 289.0, 293.0, 289.0, 293.0, 289.0, 293.0, 285.0, 291.0, 291.0, 288.0, 292.0, 287.0, 294.0, 290.0, 256.0, 263.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7010995745847822, "mean_inference_ms": 1.2532775692677478, "mean_action_processing_ms": 0.1342266094642704, "mean_env_wait_ms": 0.8438295517099426, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 242.0, "episode_reward_mean": 586.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 293.435}, "hist_stats": {"episode_reward": [576.0, 579.0, 587.0, 582.0, 576.0, 582.0, 582.0, 579.0, 633.0, 582.0, 587.0, 630.0, 570.0, 633.0, 582.0, 582.0, 579.0, 582.0, 596.0, 576.0, 627.0, 579.0, 582.0, 582.0, 587.0, 630.0, 582.0, 582.0, 633.0, 636.0, 582.0, 582.0, 633.0, 579.0, 579.0, 570.0, 582.0, 576.0, 587.0, 639.0, 582.0, 582.0, 584.0, 576.0, 570.0, 579.0, 627.0, 630.0, 579.0, 573.0, 587.0, 579.0, 242.0, 570.0, 636.0, 582.0, 582.0, 582.0, 582.0, 587.0, 582.0, 582.0, 582.0, 627.0, 590.0, 582.0, 630.0, 590.0, 576.0, 579.0, 636.0, 587.0, 576.0, 579.0, 582.0, 573.0, 576.0, 627.0, 582.0, 582.0, 630.0, 579.0, 582.0, 579.0, 636.0, 579.0, 579.0, 630.0, 584.0, 579.0, 630.0, 582.0, 582.0, 582.0, 582.0, 576.0, 579.0, 579.0, 584.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 285.0, 286.0, 293.0, 295.0, 292.0, 291.0, 291.0, 283.0, 293.0, 286.0, 296.0, 292.0, 290.0, 296.0, 283.0, 316.0, 317.0, 292.0, 290.0, 297.0, 290.0, 314.0, 316.0, 289.0, 281.0, 319.0, 314.0, 294.0, 288.0, 292.0, 290.0, 287.0, 292.0, 286.0, 296.0, 299.0, 297.0, 288.0, 288.0, 312.0, 315.0, 284.0, 295.0, 291.0, 291.0, 286.0, 296.0, 291.0, 296.0, 320.0, 310.0, 289.0, 293.0, 289.0, 293.0, 314.0, 319.0, 318.0, 318.0, 292.0, 290.0, 294.0, 288.0, 317.0, 316.0, 283.0, 296.0, 292.0, 287.0, 289.0, 281.0, 293.0, 289.0, 287.0, 289.0, 294.0, 293.0, 319.0, 320.0, 280.0, 302.0, 294.0, 288.0, 290.0, 294.0, 299.0, 277.0, 285.0, 285.0, 285.0, 294.0, 310.0, 317.0, 315.0, 315.0, 288.0, 291.0, 278.0, 295.0, 286.0, 301.0, 291.0, 288.0, 117.0, 125.0, 282.0, 288.0, 315.0, 321.0, 290.0, 292.0, 280.0, 302.0, 288.0, 294.0, 291.0, 291.0, 293.0, 294.0, 290.0, 292.0, 294.0, 288.0, 292.0, 290.0, 307.0, 320.0, 295.0, 295.0, 289.0, 293.0, 307.0, 323.0, 291.0, 299.0, 286.0, 290.0, 288.0, 291.0, 320.0, 316.0, 283.0, 304.0, 290.0, 286.0, 285.0, 294.0, 295.0, 287.0, 289.0, 284.0, 288.0, 288.0, 312.0, 315.0, 293.0, 289.0, 287.0, 295.0, 308.0, 322.0, 286.0, 293.0, 284.0, 298.0, 291.0, 288.0, 311.0, 325.0, 287.0, 292.0, 290.0, 289.0, 320.0, 310.0, 292.0, 292.0, 287.0, 292.0, 317.0, 313.0, 288.0, 294.0, 289.0, 293.0, 289.0, 293.0, 289.0, 293.0, 285.0, 291.0, 291.0, 288.0, 292.0, 287.0, 294.0, 290.0, 256.0, 263.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7010995745847822, "mean_inference_ms": 1.2532775692677478, "mean_action_processing_ms": 0.1342266094642704, "mean_env_wait_ms": 0.8438295517099426, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8268800, "num_agent_steps_trained": 8268800, "num_env_steps_sampled": 4134400, "num_env_steps_trained": 4134400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4134400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8268800, "timers": {"training_iteration_time_ms": 3616.709, "learn_time_ms": 1114.568, "learn_throughput": 11484.274, "synch_weights_time_ms": 11.969}, "counters": {"num_env_steps_sampled": 4134400, "num_env_steps_trained": 4134400, "num_agent_steps_sampled": 8268800, "num_agent_steps_trained": 8268800}, "done": false, "episodes_total": 10336, "training_iteration": 323, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-28", "timestamp": 1666581688, "time_this_iter_s": 3.724001884460449, "time_total_s": 1241.0429904460907, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1241.0429904460907, "timesteps_since_restore": 0, "iterations_since_restore": 323, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.966666666666665, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 201.0, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 180.56, "shaped_reward_min": 82, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.27, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.1, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.15, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.91, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.89, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.74, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.79, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.89, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.74, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.89, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.74, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010833744890987873, "policy_loss": -0.001441783970221877, "vf_loss": 7.687665939331055, "vf_explained_var": 0.5905911922454834, "kl": 0.002230637241154909, "entropy": 0.8207129240036011, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4147200, "num_env_steps_trained": 4147200, "num_agent_steps_sampled": 8294400, "num_agent_steps_trained": 8294400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 242.0, "episode_reward_mean": 582.56, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 291.28}, "custom_metrics": {"sparse_reward_mean": 201.0, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 180.56, "shaped_reward_min": 82, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.27, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.1, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.15, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.91, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.89, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.74, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.79, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.89, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.74, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.89, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.74, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 579.0, 579.0, 570.0, 582.0, 576.0, 587.0, 639.0, 582.0, 582.0, 584.0, 576.0, 570.0, 579.0, 627.0, 630.0, 579.0, 573.0, 587.0, 579.0, 242.0, 570.0, 636.0, 582.0, 582.0, 582.0, 582.0, 587.0, 582.0, 582.0, 582.0, 627.0, 590.0, 582.0, 630.0, 590.0, 576.0, 579.0, 636.0, 587.0, 576.0, 579.0, 582.0, 573.0, 576.0, 627.0, 582.0, 582.0, 630.0, 579.0, 582.0, 579.0, 636.0, 579.0, 579.0, 630.0, 584.0, 579.0, 630.0, 582.0, 582.0, 582.0, 582.0, 576.0, 579.0, 579.0, 584.0, 519.0, 533.0, 579.0, 584.0, 584.0, 579.0, 579.0, 582.0, 570.0, 582.0, 582.0, 593.0, 536.0, 582.0, 579.0, 582.0, 579.0, 570.0, 579.0, 630.0, 627.0, 530.0, 587.0, 579.0, 576.0, 522.0, 582.0, 582.0, 587.0, 579.0, 582.0, 630.0, 581.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 316.0, 283.0, 296.0, 292.0, 287.0, 289.0, 281.0, 293.0, 289.0, 287.0, 289.0, 294.0, 293.0, 319.0, 320.0, 280.0, 302.0, 294.0, 288.0, 290.0, 294.0, 299.0, 277.0, 285.0, 285.0, 285.0, 294.0, 310.0, 317.0, 315.0, 315.0, 288.0, 291.0, 278.0, 295.0, 286.0, 301.0, 291.0, 288.0, 117.0, 125.0, 282.0, 288.0, 315.0, 321.0, 290.0, 292.0, 280.0, 302.0, 288.0, 294.0, 291.0, 291.0, 293.0, 294.0, 290.0, 292.0, 294.0, 288.0, 292.0, 290.0, 307.0, 320.0, 295.0, 295.0, 289.0, 293.0, 307.0, 323.0, 291.0, 299.0, 286.0, 290.0, 288.0, 291.0, 320.0, 316.0, 283.0, 304.0, 290.0, 286.0, 285.0, 294.0, 295.0, 287.0, 289.0, 284.0, 288.0, 288.0, 312.0, 315.0, 293.0, 289.0, 287.0, 295.0, 308.0, 322.0, 286.0, 293.0, 284.0, 298.0, 291.0, 288.0, 311.0, 325.0, 287.0, 292.0, 290.0, 289.0, 320.0, 310.0, 292.0, 292.0, 287.0, 292.0, 317.0, 313.0, 288.0, 294.0, 289.0, 293.0, 289.0, 293.0, 289.0, 293.0, 285.0, 291.0, 291.0, 288.0, 292.0, 287.0, 294.0, 290.0, 256.0, 263.0, 268.0, 265.0, 287.0, 292.0, 293.0, 291.0, 290.0, 294.0, 284.0, 295.0, 287.0, 292.0, 293.0, 289.0, 292.0, 278.0, 290.0, 292.0, 290.0, 292.0, 297.0, 296.0, 265.0, 271.0, 290.0, 292.0, 288.0, 291.0, 289.0, 293.0, 288.0, 291.0, 286.0, 284.0, 286.0, 293.0, 313.0, 317.0, 309.0, 318.0, 266.0, 264.0, 300.0, 287.0, 282.0, 297.0, 288.0, 288.0, 256.0, 266.0, 292.0, 290.0, 290.0, 292.0, 296.0, 291.0, 286.0, 293.0, 290.0, 292.0, 315.0, 315.0, 290.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7010663356915461, "mean_inference_ms": 1.2531089378273157, "mean_action_processing_ms": 0.13422209832165993, "mean_env_wait_ms": 0.8437644807329641, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 242.0, "episode_reward_mean": 582.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 291.28}, "hist_stats": {"episode_reward": [633.0, 579.0, 579.0, 570.0, 582.0, 576.0, 587.0, 639.0, 582.0, 582.0, 584.0, 576.0, 570.0, 579.0, 627.0, 630.0, 579.0, 573.0, 587.0, 579.0, 242.0, 570.0, 636.0, 582.0, 582.0, 582.0, 582.0, 587.0, 582.0, 582.0, 582.0, 627.0, 590.0, 582.0, 630.0, 590.0, 576.0, 579.0, 636.0, 587.0, 576.0, 579.0, 582.0, 573.0, 576.0, 627.0, 582.0, 582.0, 630.0, 579.0, 582.0, 579.0, 636.0, 579.0, 579.0, 630.0, 584.0, 579.0, 630.0, 582.0, 582.0, 582.0, 582.0, 576.0, 579.0, 579.0, 584.0, 519.0, 533.0, 579.0, 584.0, 584.0, 579.0, 579.0, 582.0, 570.0, 582.0, 582.0, 593.0, 536.0, 582.0, 579.0, 582.0, 579.0, 570.0, 579.0, 630.0, 627.0, 530.0, 587.0, 579.0, 576.0, 522.0, 582.0, 582.0, 587.0, 579.0, 582.0, 630.0, 581.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 316.0, 283.0, 296.0, 292.0, 287.0, 289.0, 281.0, 293.0, 289.0, 287.0, 289.0, 294.0, 293.0, 319.0, 320.0, 280.0, 302.0, 294.0, 288.0, 290.0, 294.0, 299.0, 277.0, 285.0, 285.0, 285.0, 294.0, 310.0, 317.0, 315.0, 315.0, 288.0, 291.0, 278.0, 295.0, 286.0, 301.0, 291.0, 288.0, 117.0, 125.0, 282.0, 288.0, 315.0, 321.0, 290.0, 292.0, 280.0, 302.0, 288.0, 294.0, 291.0, 291.0, 293.0, 294.0, 290.0, 292.0, 294.0, 288.0, 292.0, 290.0, 307.0, 320.0, 295.0, 295.0, 289.0, 293.0, 307.0, 323.0, 291.0, 299.0, 286.0, 290.0, 288.0, 291.0, 320.0, 316.0, 283.0, 304.0, 290.0, 286.0, 285.0, 294.0, 295.0, 287.0, 289.0, 284.0, 288.0, 288.0, 312.0, 315.0, 293.0, 289.0, 287.0, 295.0, 308.0, 322.0, 286.0, 293.0, 284.0, 298.0, 291.0, 288.0, 311.0, 325.0, 287.0, 292.0, 290.0, 289.0, 320.0, 310.0, 292.0, 292.0, 287.0, 292.0, 317.0, 313.0, 288.0, 294.0, 289.0, 293.0, 289.0, 293.0, 289.0, 293.0, 285.0, 291.0, 291.0, 288.0, 292.0, 287.0, 294.0, 290.0, 256.0, 263.0, 268.0, 265.0, 287.0, 292.0, 293.0, 291.0, 290.0, 294.0, 284.0, 295.0, 287.0, 292.0, 293.0, 289.0, 292.0, 278.0, 290.0, 292.0, 290.0, 292.0, 297.0, 296.0, 265.0, 271.0, 290.0, 292.0, 288.0, 291.0, 289.0, 293.0, 288.0, 291.0, 286.0, 284.0, 286.0, 293.0, 313.0, 317.0, 309.0, 318.0, 266.0, 264.0, 300.0, 287.0, 282.0, 297.0, 288.0, 288.0, 256.0, 266.0, 292.0, 290.0, 290.0, 292.0, 296.0, 291.0, 286.0, 293.0, 290.0, 292.0, 315.0, 315.0, 290.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7010663356915461, "mean_inference_ms": 1.2531089378273157, "mean_action_processing_ms": 0.13422209832165993, "mean_env_wait_ms": 0.8437644807329641, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8294400, "num_agent_steps_trained": 8294400, "num_env_steps_sampled": 4147200, "num_env_steps_trained": 4147200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4147200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8294400, "timers": {"training_iteration_time_ms": 3613.576, "learn_time_ms": 1108.298, "learn_throughput": 11549.24, "synch_weights_time_ms": 11.53}, "counters": {"num_env_steps_sampled": 4147200, "num_env_steps_trained": 4147200, "num_agent_steps_sampled": 8294400, "num_agent_steps_trained": 8294400}, "done": false, "episodes_total": 10368, "training_iteration": 324, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-32", "timestamp": 1666581692, "time_this_iter_s": 3.701406240463257, "time_total_s": 1244.744396686554, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1244.744396686554, "timesteps_since_restore": 0, "iterations_since_restore": 324, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.7, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 202.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.05, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.53, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.07, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.37, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.88, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.14, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.08, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.81, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.14, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.14, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 7.551535964012146e-05, "policy_loss": -0.0002647839719429612, "vf_loss": 7.565235137939453, "vf_explained_var": 0.5844516754150391, "kl": 0.0024913917295634747, "entropy": 0.8324460983276367, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4160000, "num_env_steps_trained": 4160000, "num_agent_steps_sampled": 8320000, "num_agent_steps_trained": 8320000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 587.65, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 293.825}, "custom_metrics": {"sparse_reward_mean": 202.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.05, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.53, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.07, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.37, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.88, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.14, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.08, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.81, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.14, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.14, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [590.0, 582.0, 630.0, 590.0, 576.0, 579.0, 636.0, 587.0, 576.0, 579.0, 582.0, 573.0, 576.0, 627.0, 582.0, 582.0, 630.0, 579.0, 582.0, 579.0, 636.0, 579.0, 579.0, 630.0, 584.0, 579.0, 630.0, 582.0, 582.0, 582.0, 582.0, 576.0, 579.0, 579.0, 584.0, 519.0, 533.0, 579.0, 584.0, 584.0, 579.0, 579.0, 582.0, 570.0, 582.0, 582.0, 593.0, 536.0, 582.0, 579.0, 582.0, 579.0, 570.0, 579.0, 630.0, 627.0, 530.0, 587.0, 579.0, 576.0, 522.0, 582.0, 582.0, 587.0, 579.0, 582.0, 630.0, 581.0, 582.0, 633.0, 582.0, 582.0, 576.0, 584.0, 582.0, 630.0, 584.0, 582.0, 570.0, 582.0, 633.0, 576.0, 576.0, 579.0, 579.0, 633.0, 579.0, 582.0, 636.0, 579.0, 587.0, 630.0, 630.0, 627.0, 582.0, 579.0, 582.0, 579.0, 582.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 295.0, 289.0, 293.0, 307.0, 323.0, 291.0, 299.0, 286.0, 290.0, 288.0, 291.0, 320.0, 316.0, 283.0, 304.0, 290.0, 286.0, 285.0, 294.0, 295.0, 287.0, 289.0, 284.0, 288.0, 288.0, 312.0, 315.0, 293.0, 289.0, 287.0, 295.0, 308.0, 322.0, 286.0, 293.0, 284.0, 298.0, 291.0, 288.0, 311.0, 325.0, 287.0, 292.0, 290.0, 289.0, 320.0, 310.0, 292.0, 292.0, 287.0, 292.0, 317.0, 313.0, 288.0, 294.0, 289.0, 293.0, 289.0, 293.0, 289.0, 293.0, 285.0, 291.0, 291.0, 288.0, 292.0, 287.0, 294.0, 290.0, 256.0, 263.0, 268.0, 265.0, 287.0, 292.0, 293.0, 291.0, 290.0, 294.0, 284.0, 295.0, 287.0, 292.0, 293.0, 289.0, 292.0, 278.0, 290.0, 292.0, 290.0, 292.0, 297.0, 296.0, 265.0, 271.0, 290.0, 292.0, 288.0, 291.0, 289.0, 293.0, 288.0, 291.0, 286.0, 284.0, 286.0, 293.0, 313.0, 317.0, 309.0, 318.0, 266.0, 264.0, 300.0, 287.0, 282.0, 297.0, 288.0, 288.0, 256.0, 266.0, 292.0, 290.0, 290.0, 292.0, 296.0, 291.0, 286.0, 293.0, 290.0, 292.0, 315.0, 315.0, 290.0, 291.0, 293.0, 289.0, 319.0, 314.0, 293.0, 289.0, 288.0, 294.0, 287.0, 289.0, 286.0, 298.0, 291.0, 291.0, 312.0, 318.0, 300.0, 284.0, 291.0, 291.0, 286.0, 284.0, 285.0, 297.0, 317.0, 316.0, 286.0, 290.0, 291.0, 285.0, 289.0, 290.0, 290.0, 289.0, 318.0, 315.0, 285.0, 294.0, 286.0, 296.0, 319.0, 317.0, 283.0, 296.0, 290.0, 297.0, 307.0, 323.0, 314.0, 316.0, 309.0, 318.0, 298.0, 284.0, 295.0, 284.0, 292.0, 290.0, 283.0, 296.0, 293.0, 289.0, 319.0, 320.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7010020568901624, "mean_inference_ms": 1.2530853564939755, "mean_action_processing_ms": 0.1342152056410183, "mean_env_wait_ms": 0.8437793845031866, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 587.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 293.825}, "hist_stats": {"episode_reward": [590.0, 582.0, 630.0, 590.0, 576.0, 579.0, 636.0, 587.0, 576.0, 579.0, 582.0, 573.0, 576.0, 627.0, 582.0, 582.0, 630.0, 579.0, 582.0, 579.0, 636.0, 579.0, 579.0, 630.0, 584.0, 579.0, 630.0, 582.0, 582.0, 582.0, 582.0, 576.0, 579.0, 579.0, 584.0, 519.0, 533.0, 579.0, 584.0, 584.0, 579.0, 579.0, 582.0, 570.0, 582.0, 582.0, 593.0, 536.0, 582.0, 579.0, 582.0, 579.0, 570.0, 579.0, 630.0, 627.0, 530.0, 587.0, 579.0, 576.0, 522.0, 582.0, 582.0, 587.0, 579.0, 582.0, 630.0, 581.0, 582.0, 633.0, 582.0, 582.0, 576.0, 584.0, 582.0, 630.0, 584.0, 582.0, 570.0, 582.0, 633.0, 576.0, 576.0, 579.0, 579.0, 633.0, 579.0, 582.0, 636.0, 579.0, 587.0, 630.0, 630.0, 627.0, 582.0, 579.0, 582.0, 579.0, 582.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 295.0, 289.0, 293.0, 307.0, 323.0, 291.0, 299.0, 286.0, 290.0, 288.0, 291.0, 320.0, 316.0, 283.0, 304.0, 290.0, 286.0, 285.0, 294.0, 295.0, 287.0, 289.0, 284.0, 288.0, 288.0, 312.0, 315.0, 293.0, 289.0, 287.0, 295.0, 308.0, 322.0, 286.0, 293.0, 284.0, 298.0, 291.0, 288.0, 311.0, 325.0, 287.0, 292.0, 290.0, 289.0, 320.0, 310.0, 292.0, 292.0, 287.0, 292.0, 317.0, 313.0, 288.0, 294.0, 289.0, 293.0, 289.0, 293.0, 289.0, 293.0, 285.0, 291.0, 291.0, 288.0, 292.0, 287.0, 294.0, 290.0, 256.0, 263.0, 268.0, 265.0, 287.0, 292.0, 293.0, 291.0, 290.0, 294.0, 284.0, 295.0, 287.0, 292.0, 293.0, 289.0, 292.0, 278.0, 290.0, 292.0, 290.0, 292.0, 297.0, 296.0, 265.0, 271.0, 290.0, 292.0, 288.0, 291.0, 289.0, 293.0, 288.0, 291.0, 286.0, 284.0, 286.0, 293.0, 313.0, 317.0, 309.0, 318.0, 266.0, 264.0, 300.0, 287.0, 282.0, 297.0, 288.0, 288.0, 256.0, 266.0, 292.0, 290.0, 290.0, 292.0, 296.0, 291.0, 286.0, 293.0, 290.0, 292.0, 315.0, 315.0, 290.0, 291.0, 293.0, 289.0, 319.0, 314.0, 293.0, 289.0, 288.0, 294.0, 287.0, 289.0, 286.0, 298.0, 291.0, 291.0, 312.0, 318.0, 300.0, 284.0, 291.0, 291.0, 286.0, 284.0, 285.0, 297.0, 317.0, 316.0, 286.0, 290.0, 291.0, 285.0, 289.0, 290.0, 290.0, 289.0, 318.0, 315.0, 285.0, 294.0, 286.0, 296.0, 319.0, 317.0, 283.0, 296.0, 290.0, 297.0, 307.0, 323.0, 314.0, 316.0, 309.0, 318.0, 298.0, 284.0, 295.0, 284.0, 292.0, 290.0, 283.0, 296.0, 293.0, 289.0, 319.0, 320.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7010020568901624, "mean_inference_ms": 1.2530853564939755, "mean_action_processing_ms": 0.1342152056410183, "mean_env_wait_ms": 0.8437793845031866, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8320000, "num_agent_steps_trained": 8320000, "num_env_steps_sampled": 4160000, "num_env_steps_trained": 4160000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4160000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8320000, "timers": {"training_iteration_time_ms": 3657.697, "learn_time_ms": 1121.055, "learn_throughput": 11417.814, "synch_weights_time_ms": 11.904}, "counters": {"num_env_steps_sampled": 4160000, "num_env_steps_trained": 4160000, "num_agent_steps_sampled": 8320000, "num_agent_steps_trained": 8320000}, "done": false, "episodes_total": 10400, "training_iteration": 325, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-36", "timestamp": 1666581696, "time_this_iter_s": 4.0308966636657715, "time_total_s": 1248.7752933502197, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1248.7752933502197, "timesteps_since_restore": 0, "iterations_since_restore": 325, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.500000000000004, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 202.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.45, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.27, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.32, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.07, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 18.11, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.84, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.98, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.89, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.96, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.84, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.98, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.84, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.98, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0042573739774525166, "policy_loss": 0.0038920738734304905, "vf_loss": 7.782495975494385, "vf_explained_var": 0.5672118663787842, "kl": 0.0021648143883794546, "entropy": 0.8258967399597168, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4172800, "num_env_steps_trained": 4172800, "num_agent_steps_sampled": 8345600, "num_agent_steps_trained": 8345600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 585.85, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 244.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 292.925}, "custom_metrics": {"sparse_reward_mean": 202.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.45, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.27, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.32, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.07, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 18.11, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.84, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.98, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.89, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.96, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.84, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.98, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.84, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.98, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 584.0, 519.0, 533.0, 579.0, 584.0, 584.0, 579.0, 579.0, 582.0, 570.0, 582.0, 582.0, 593.0, 536.0, 582.0, 579.0, 582.0, 579.0, 570.0, 579.0, 630.0, 627.0, 530.0, 587.0, 579.0, 576.0, 522.0, 582.0, 582.0, 587.0, 579.0, 582.0, 630.0, 581.0, 582.0, 633.0, 582.0, 582.0, 576.0, 584.0, 582.0, 630.0, 584.0, 582.0, 570.0, 582.0, 633.0, 576.0, 576.0, 579.0, 579.0, 633.0, 579.0, 582.0, 636.0, 579.0, 587.0, 630.0, 630.0, 627.0, 582.0, 579.0, 582.0, 579.0, 582.0, 639.0, 579.0, 579.0, 582.0, 579.0, 582.0, 587.0, 633.0, 525.0, 579.0, 582.0, 576.0, 582.0, 579.0, 513.0, 582.0, 579.0, 582.0, 576.0, 639.0, 582.0, 579.0, 630.0, 630.0, 579.0, 636.0, 582.0, 582.0, 579.0, 582.0, 630.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 292.0, 287.0, 294.0, 290.0, 256.0, 263.0, 268.0, 265.0, 287.0, 292.0, 293.0, 291.0, 290.0, 294.0, 284.0, 295.0, 287.0, 292.0, 293.0, 289.0, 292.0, 278.0, 290.0, 292.0, 290.0, 292.0, 297.0, 296.0, 265.0, 271.0, 290.0, 292.0, 288.0, 291.0, 289.0, 293.0, 288.0, 291.0, 286.0, 284.0, 286.0, 293.0, 313.0, 317.0, 309.0, 318.0, 266.0, 264.0, 300.0, 287.0, 282.0, 297.0, 288.0, 288.0, 256.0, 266.0, 292.0, 290.0, 290.0, 292.0, 296.0, 291.0, 286.0, 293.0, 290.0, 292.0, 315.0, 315.0, 290.0, 291.0, 293.0, 289.0, 319.0, 314.0, 293.0, 289.0, 288.0, 294.0, 287.0, 289.0, 286.0, 298.0, 291.0, 291.0, 312.0, 318.0, 300.0, 284.0, 291.0, 291.0, 286.0, 284.0, 285.0, 297.0, 317.0, 316.0, 286.0, 290.0, 291.0, 285.0, 289.0, 290.0, 290.0, 289.0, 318.0, 315.0, 285.0, 294.0, 286.0, 296.0, 319.0, 317.0, 283.0, 296.0, 290.0, 297.0, 307.0, 323.0, 314.0, 316.0, 309.0, 318.0, 298.0, 284.0, 295.0, 284.0, 292.0, 290.0, 283.0, 296.0, 293.0, 289.0, 319.0, 320.0, 288.0, 291.0, 287.0, 292.0, 288.0, 294.0, 289.0, 290.0, 291.0, 291.0, 292.0, 295.0, 316.0, 317.0, 259.0, 266.0, 278.0, 301.0, 294.0, 288.0, 291.0, 285.0, 290.0, 292.0, 289.0, 290.0, 244.0, 269.0, 283.0, 299.0, 286.0, 293.0, 290.0, 292.0, 289.0, 287.0, 321.0, 318.0, 290.0, 292.0, 292.0, 287.0, 309.0, 321.0, 317.0, 313.0, 289.0, 290.0, 311.0, 325.0, 287.0, 295.0, 295.0, 287.0, 287.0, 292.0, 291.0, 291.0, 317.0, 313.0, 283.0, 290.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7009214126610166, "mean_inference_ms": 1.253050791965419, "mean_action_processing_ms": 0.13420651041820075, "mean_env_wait_ms": 0.8437815261311539, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 585.85, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 244.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 292.925}, "hist_stats": {"episode_reward": [579.0, 579.0, 584.0, 519.0, 533.0, 579.0, 584.0, 584.0, 579.0, 579.0, 582.0, 570.0, 582.0, 582.0, 593.0, 536.0, 582.0, 579.0, 582.0, 579.0, 570.0, 579.0, 630.0, 627.0, 530.0, 587.0, 579.0, 576.0, 522.0, 582.0, 582.0, 587.0, 579.0, 582.0, 630.0, 581.0, 582.0, 633.0, 582.0, 582.0, 576.0, 584.0, 582.0, 630.0, 584.0, 582.0, 570.0, 582.0, 633.0, 576.0, 576.0, 579.0, 579.0, 633.0, 579.0, 582.0, 636.0, 579.0, 587.0, 630.0, 630.0, 627.0, 582.0, 579.0, 582.0, 579.0, 582.0, 639.0, 579.0, 579.0, 582.0, 579.0, 582.0, 587.0, 633.0, 525.0, 579.0, 582.0, 576.0, 582.0, 579.0, 513.0, 582.0, 579.0, 582.0, 576.0, 639.0, 582.0, 579.0, 630.0, 630.0, 579.0, 636.0, 582.0, 582.0, 579.0, 582.0, 630.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 292.0, 287.0, 294.0, 290.0, 256.0, 263.0, 268.0, 265.0, 287.0, 292.0, 293.0, 291.0, 290.0, 294.0, 284.0, 295.0, 287.0, 292.0, 293.0, 289.0, 292.0, 278.0, 290.0, 292.0, 290.0, 292.0, 297.0, 296.0, 265.0, 271.0, 290.0, 292.0, 288.0, 291.0, 289.0, 293.0, 288.0, 291.0, 286.0, 284.0, 286.0, 293.0, 313.0, 317.0, 309.0, 318.0, 266.0, 264.0, 300.0, 287.0, 282.0, 297.0, 288.0, 288.0, 256.0, 266.0, 292.0, 290.0, 290.0, 292.0, 296.0, 291.0, 286.0, 293.0, 290.0, 292.0, 315.0, 315.0, 290.0, 291.0, 293.0, 289.0, 319.0, 314.0, 293.0, 289.0, 288.0, 294.0, 287.0, 289.0, 286.0, 298.0, 291.0, 291.0, 312.0, 318.0, 300.0, 284.0, 291.0, 291.0, 286.0, 284.0, 285.0, 297.0, 317.0, 316.0, 286.0, 290.0, 291.0, 285.0, 289.0, 290.0, 290.0, 289.0, 318.0, 315.0, 285.0, 294.0, 286.0, 296.0, 319.0, 317.0, 283.0, 296.0, 290.0, 297.0, 307.0, 323.0, 314.0, 316.0, 309.0, 318.0, 298.0, 284.0, 295.0, 284.0, 292.0, 290.0, 283.0, 296.0, 293.0, 289.0, 319.0, 320.0, 288.0, 291.0, 287.0, 292.0, 288.0, 294.0, 289.0, 290.0, 291.0, 291.0, 292.0, 295.0, 316.0, 317.0, 259.0, 266.0, 278.0, 301.0, 294.0, 288.0, 291.0, 285.0, 290.0, 292.0, 289.0, 290.0, 244.0, 269.0, 283.0, 299.0, 286.0, 293.0, 290.0, 292.0, 289.0, 287.0, 321.0, 318.0, 290.0, 292.0, 292.0, 287.0, 309.0, 321.0, 317.0, 313.0, 289.0, 290.0, 311.0, 325.0, 287.0, 295.0, 295.0, 287.0, 287.0, 292.0, 291.0, 291.0, 317.0, 313.0, 283.0, 290.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7009214126610166, "mean_inference_ms": 1.253050791965419, "mean_action_processing_ms": 0.13420651041820075, "mean_env_wait_ms": 0.8437815261311539, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8345600, "num_agent_steps_trained": 8345600, "num_env_steps_sampled": 4172800, "num_env_steps_trained": 4172800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4172800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8345600, "timers": {"training_iteration_time_ms": 3657.009, "learn_time_ms": 1120.872, "learn_throughput": 11419.68, "synch_weights_time_ms": 11.883}, "counters": {"num_env_steps_sampled": 4172800, "num_env_steps_trained": 4172800, "num_agent_steps_sampled": 8345600, "num_agent_steps_trained": 8345600}, "done": false, "episodes_total": 10432, "training_iteration": 326, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-40", "timestamp": 1666581700, "time_this_iter_s": 3.6777994632720947, "time_total_s": 1252.4530928134918, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1252.4530928134918, "timesteps_since_restore": 0, "iterations_since_restore": 326, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.74, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 203.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.82, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.1, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.5, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.88, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 18.3, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.7, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 18.16, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.87, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.81, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.7, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 18.16, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.7, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 18.16, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001463708933442831, "policy_loss": 0.0011175316758453846, "vf_loss": 7.6503777503967285, "vf_explained_var": 0.5795704126358032, "kl": 0.002307520480826497, "entropy": 0.8377187848091125, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4185600, "num_env_steps_trained": 4185600, "num_agent_steps_sampled": 8371200, "num_agent_steps_trained": 8371200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 587.82, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 244.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 293.91}, "custom_metrics": {"sparse_reward_mean": 203.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.82, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.1, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.5, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.88, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 18.3, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.7, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 18.16, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.87, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.81, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.7, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 18.16, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.7, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 18.16, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 630.0, 581.0, 582.0, 633.0, 582.0, 582.0, 576.0, 584.0, 582.0, 630.0, 584.0, 582.0, 570.0, 582.0, 633.0, 576.0, 576.0, 579.0, 579.0, 633.0, 579.0, 582.0, 636.0, 579.0, 587.0, 630.0, 630.0, 627.0, 582.0, 579.0, 582.0, 579.0, 582.0, 639.0, 579.0, 579.0, 582.0, 579.0, 582.0, 587.0, 633.0, 525.0, 579.0, 582.0, 576.0, 582.0, 579.0, 513.0, 582.0, 579.0, 582.0, 576.0, 639.0, 582.0, 579.0, 630.0, 630.0, 579.0, 636.0, 582.0, 582.0, 579.0, 582.0, 630.0, 573.0, 579.0, 582.0, 579.0, 584.0, 579.0, 582.0, 582.0, 525.0, 579.0, 579.0, 584.0, 587.0, 582.0, 630.0, 582.0, 579.0, 579.0, 582.0, 630.0, 573.0, 576.0, 576.0, 576.0, 582.0, 582.0, 579.0, 530.0, 582.0, 582.0, 576.0, 582.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 290.0, 292.0, 315.0, 315.0, 290.0, 291.0, 293.0, 289.0, 319.0, 314.0, 293.0, 289.0, 288.0, 294.0, 287.0, 289.0, 286.0, 298.0, 291.0, 291.0, 312.0, 318.0, 300.0, 284.0, 291.0, 291.0, 286.0, 284.0, 285.0, 297.0, 317.0, 316.0, 286.0, 290.0, 291.0, 285.0, 289.0, 290.0, 290.0, 289.0, 318.0, 315.0, 285.0, 294.0, 286.0, 296.0, 319.0, 317.0, 283.0, 296.0, 290.0, 297.0, 307.0, 323.0, 314.0, 316.0, 309.0, 318.0, 298.0, 284.0, 295.0, 284.0, 292.0, 290.0, 283.0, 296.0, 293.0, 289.0, 319.0, 320.0, 288.0, 291.0, 287.0, 292.0, 288.0, 294.0, 289.0, 290.0, 291.0, 291.0, 292.0, 295.0, 316.0, 317.0, 259.0, 266.0, 278.0, 301.0, 294.0, 288.0, 291.0, 285.0, 290.0, 292.0, 289.0, 290.0, 244.0, 269.0, 283.0, 299.0, 286.0, 293.0, 290.0, 292.0, 289.0, 287.0, 321.0, 318.0, 290.0, 292.0, 292.0, 287.0, 309.0, 321.0, 317.0, 313.0, 289.0, 290.0, 311.0, 325.0, 287.0, 295.0, 295.0, 287.0, 287.0, 292.0, 291.0, 291.0, 317.0, 313.0, 283.0, 290.0, 289.0, 290.0, 293.0, 289.0, 290.0, 289.0, 294.0, 290.0, 293.0, 286.0, 285.0, 297.0, 292.0, 290.0, 266.0, 259.0, 294.0, 285.0, 290.0, 289.0, 292.0, 292.0, 296.0, 291.0, 286.0, 296.0, 317.0, 313.0, 285.0, 297.0, 288.0, 291.0, 290.0, 289.0, 293.0, 289.0, 307.0, 323.0, 290.0, 283.0, 286.0, 290.0, 292.0, 284.0, 290.0, 286.0, 295.0, 287.0, 292.0, 290.0, 290.0, 289.0, 260.0, 270.0, 291.0, 291.0, 286.0, 296.0, 283.0, 293.0, 294.0, 288.0, 289.0, 293.0, 319.0, 311.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7008398055665644, "mean_inference_ms": 1.2530141301165323, "mean_action_processing_ms": 0.13419647676777532, "mean_env_wait_ms": 0.8437727731291801, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 587.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 244.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 293.91}, "hist_stats": {"episode_reward": [579.0, 582.0, 630.0, 581.0, 582.0, 633.0, 582.0, 582.0, 576.0, 584.0, 582.0, 630.0, 584.0, 582.0, 570.0, 582.0, 633.0, 576.0, 576.0, 579.0, 579.0, 633.0, 579.0, 582.0, 636.0, 579.0, 587.0, 630.0, 630.0, 627.0, 582.0, 579.0, 582.0, 579.0, 582.0, 639.0, 579.0, 579.0, 582.0, 579.0, 582.0, 587.0, 633.0, 525.0, 579.0, 582.0, 576.0, 582.0, 579.0, 513.0, 582.0, 579.0, 582.0, 576.0, 639.0, 582.0, 579.0, 630.0, 630.0, 579.0, 636.0, 582.0, 582.0, 579.0, 582.0, 630.0, 573.0, 579.0, 582.0, 579.0, 584.0, 579.0, 582.0, 582.0, 525.0, 579.0, 579.0, 584.0, 587.0, 582.0, 630.0, 582.0, 579.0, 579.0, 582.0, 630.0, 573.0, 576.0, 576.0, 576.0, 582.0, 582.0, 579.0, 530.0, 582.0, 582.0, 576.0, 582.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 290.0, 292.0, 315.0, 315.0, 290.0, 291.0, 293.0, 289.0, 319.0, 314.0, 293.0, 289.0, 288.0, 294.0, 287.0, 289.0, 286.0, 298.0, 291.0, 291.0, 312.0, 318.0, 300.0, 284.0, 291.0, 291.0, 286.0, 284.0, 285.0, 297.0, 317.0, 316.0, 286.0, 290.0, 291.0, 285.0, 289.0, 290.0, 290.0, 289.0, 318.0, 315.0, 285.0, 294.0, 286.0, 296.0, 319.0, 317.0, 283.0, 296.0, 290.0, 297.0, 307.0, 323.0, 314.0, 316.0, 309.0, 318.0, 298.0, 284.0, 295.0, 284.0, 292.0, 290.0, 283.0, 296.0, 293.0, 289.0, 319.0, 320.0, 288.0, 291.0, 287.0, 292.0, 288.0, 294.0, 289.0, 290.0, 291.0, 291.0, 292.0, 295.0, 316.0, 317.0, 259.0, 266.0, 278.0, 301.0, 294.0, 288.0, 291.0, 285.0, 290.0, 292.0, 289.0, 290.0, 244.0, 269.0, 283.0, 299.0, 286.0, 293.0, 290.0, 292.0, 289.0, 287.0, 321.0, 318.0, 290.0, 292.0, 292.0, 287.0, 309.0, 321.0, 317.0, 313.0, 289.0, 290.0, 311.0, 325.0, 287.0, 295.0, 295.0, 287.0, 287.0, 292.0, 291.0, 291.0, 317.0, 313.0, 283.0, 290.0, 289.0, 290.0, 293.0, 289.0, 290.0, 289.0, 294.0, 290.0, 293.0, 286.0, 285.0, 297.0, 292.0, 290.0, 266.0, 259.0, 294.0, 285.0, 290.0, 289.0, 292.0, 292.0, 296.0, 291.0, 286.0, 296.0, 317.0, 313.0, 285.0, 297.0, 288.0, 291.0, 290.0, 289.0, 293.0, 289.0, 307.0, 323.0, 290.0, 283.0, 286.0, 290.0, 292.0, 284.0, 290.0, 286.0, 295.0, 287.0, 292.0, 290.0, 290.0, 289.0, 260.0, 270.0, 291.0, 291.0, 286.0, 296.0, 283.0, 293.0, 294.0, 288.0, 289.0, 293.0, 319.0, 311.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7008398055665644, "mean_inference_ms": 1.2530141301165323, "mean_action_processing_ms": 0.13419647676777532, "mean_env_wait_ms": 0.8437727731291801, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8371200, "num_agent_steps_trained": 8371200, "num_env_steps_sampled": 4185600, "num_env_steps_trained": 4185600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4185600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8371200, "timers": {"training_iteration_time_ms": 3643.802, "learn_time_ms": 1107.309, "learn_throughput": 11559.553, "synch_weights_time_ms": 12.586}, "counters": {"num_env_steps_sampled": 4185600, "num_env_steps_trained": 4185600, "num_agent_steps_sampled": 8371200, "num_agent_steps_trained": 8371200}, "done": false, "episodes_total": 10464, "training_iteration": 327, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-44", "timestamp": 1666581704, "time_this_iter_s": 3.6818816661834717, "time_total_s": 1256.1349744796753, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1256.1349744796753, "timesteps_since_restore": 0, "iterations_since_restore": 327, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.816666666666666, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 202.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 180.77, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.27, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 31, "onion_pickup_agent_1_mean": 18.26, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 30, "useful_onion_pickup_agent_0_mean": 15.04, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 31, "useful_onion_pickup_agent_1_mean": 18.03, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 30, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.89, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 30, "potting_onion_agent_1_mean": 17.84, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 30, "dish_pickup_agent_0_mean": 5.92, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.72, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.89, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 30, "optimal_onion_potting_agent_1_mean": 17.84, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 30, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.89, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 30, "viable_onion_potting_agent_1_mean": 17.84, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 30, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004695032839663327, "policy_loss": 0.00012688391143456101, "vf_loss": 7.660691261291504, "vf_explained_var": 0.5659324526786804, "kl": 0.0022306411992758512, "entropy": 0.846896231174469, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4198400, "num_env_steps_trained": 4198400, "num_agent_steps_sampled": 8396800, "num_agent_steps_trained": 8396800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 585.17, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 244.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 292.585}, "custom_metrics": {"sparse_reward_mean": 202.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 180.77, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.27, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 31, "onion_pickup_agent_1_mean": 18.26, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 30, "useful_onion_pickup_agent_0_mean": 15.04, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 31, "useful_onion_pickup_agent_1_mean": 18.03, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 30, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.89, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 30, "potting_onion_agent_1_mean": 17.84, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 30, "dish_pickup_agent_0_mean": 5.92, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.72, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.89, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 30, "optimal_onion_potting_agent_1_mean": 17.84, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 30, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.89, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 30, "viable_onion_potting_agent_1_mean": 17.84, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 30, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 582.0, 639.0, 579.0, 579.0, 582.0, 579.0, 582.0, 587.0, 633.0, 525.0, 579.0, 582.0, 576.0, 582.0, 579.0, 513.0, 582.0, 579.0, 582.0, 576.0, 639.0, 582.0, 579.0, 630.0, 630.0, 579.0, 636.0, 582.0, 582.0, 579.0, 582.0, 630.0, 573.0, 579.0, 582.0, 579.0, 584.0, 579.0, 582.0, 582.0, 525.0, 579.0, 579.0, 584.0, 587.0, 582.0, 630.0, 582.0, 579.0, 579.0, 582.0, 630.0, 573.0, 576.0, 576.0, 576.0, 582.0, 582.0, 579.0, 530.0, 582.0, 582.0, 576.0, 582.0, 582.0, 630.0, 633.0, 627.0, 624.0, 582.0, 573.0, 582.0, 579.0, 579.0, 579.0, 587.0, 573.0, 582.0, 570.0, 573.0, 582.0, 573.0, 587.0, 579.0, 579.0, 573.0, 636.0, 582.0, 579.0, 579.0, 636.0, 570.0, 579.0, 579.0, 582.0, 576.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 290.0, 283.0, 296.0, 293.0, 289.0, 319.0, 320.0, 288.0, 291.0, 287.0, 292.0, 288.0, 294.0, 289.0, 290.0, 291.0, 291.0, 292.0, 295.0, 316.0, 317.0, 259.0, 266.0, 278.0, 301.0, 294.0, 288.0, 291.0, 285.0, 290.0, 292.0, 289.0, 290.0, 244.0, 269.0, 283.0, 299.0, 286.0, 293.0, 290.0, 292.0, 289.0, 287.0, 321.0, 318.0, 290.0, 292.0, 292.0, 287.0, 309.0, 321.0, 317.0, 313.0, 289.0, 290.0, 311.0, 325.0, 287.0, 295.0, 295.0, 287.0, 287.0, 292.0, 291.0, 291.0, 317.0, 313.0, 283.0, 290.0, 289.0, 290.0, 293.0, 289.0, 290.0, 289.0, 294.0, 290.0, 293.0, 286.0, 285.0, 297.0, 292.0, 290.0, 266.0, 259.0, 294.0, 285.0, 290.0, 289.0, 292.0, 292.0, 296.0, 291.0, 286.0, 296.0, 317.0, 313.0, 285.0, 297.0, 288.0, 291.0, 290.0, 289.0, 293.0, 289.0, 307.0, 323.0, 290.0, 283.0, 286.0, 290.0, 292.0, 284.0, 290.0, 286.0, 295.0, 287.0, 292.0, 290.0, 290.0, 289.0, 260.0, 270.0, 291.0, 291.0, 286.0, 296.0, 283.0, 293.0, 294.0, 288.0, 289.0, 293.0, 319.0, 311.0, 320.0, 313.0, 313.0, 314.0, 311.0, 313.0, 292.0, 290.0, 283.0, 290.0, 292.0, 290.0, 293.0, 286.0, 292.0, 287.0, 282.0, 297.0, 290.0, 297.0, 285.0, 288.0, 294.0, 288.0, 286.0, 284.0, 287.0, 286.0, 292.0, 290.0, 284.0, 289.0, 298.0, 289.0, 293.0, 286.0, 294.0, 285.0, 284.0, 289.0, 318.0, 318.0, 289.0, 293.0, 291.0, 288.0, 291.0, 288.0, 316.0, 320.0, 279.0, 291.0, 289.0, 290.0, 291.0, 288.0, 288.0, 294.0, 288.0, 288.0, 283.0, 290.0, 278.0, 298.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7007466682641271, "mean_inference_ms": 1.2528424832108607, "mean_action_processing_ms": 0.13418538861990456, "mean_env_wait_ms": 0.8436620774338445, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 585.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 244.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 292.585}, "hist_stats": {"episode_reward": [582.0, 579.0, 582.0, 639.0, 579.0, 579.0, 582.0, 579.0, 582.0, 587.0, 633.0, 525.0, 579.0, 582.0, 576.0, 582.0, 579.0, 513.0, 582.0, 579.0, 582.0, 576.0, 639.0, 582.0, 579.0, 630.0, 630.0, 579.0, 636.0, 582.0, 582.0, 579.0, 582.0, 630.0, 573.0, 579.0, 582.0, 579.0, 584.0, 579.0, 582.0, 582.0, 525.0, 579.0, 579.0, 584.0, 587.0, 582.0, 630.0, 582.0, 579.0, 579.0, 582.0, 630.0, 573.0, 576.0, 576.0, 576.0, 582.0, 582.0, 579.0, 530.0, 582.0, 582.0, 576.0, 582.0, 582.0, 630.0, 633.0, 627.0, 624.0, 582.0, 573.0, 582.0, 579.0, 579.0, 579.0, 587.0, 573.0, 582.0, 570.0, 573.0, 582.0, 573.0, 587.0, 579.0, 579.0, 573.0, 636.0, 582.0, 579.0, 579.0, 636.0, 570.0, 579.0, 579.0, 582.0, 576.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 290.0, 283.0, 296.0, 293.0, 289.0, 319.0, 320.0, 288.0, 291.0, 287.0, 292.0, 288.0, 294.0, 289.0, 290.0, 291.0, 291.0, 292.0, 295.0, 316.0, 317.0, 259.0, 266.0, 278.0, 301.0, 294.0, 288.0, 291.0, 285.0, 290.0, 292.0, 289.0, 290.0, 244.0, 269.0, 283.0, 299.0, 286.0, 293.0, 290.0, 292.0, 289.0, 287.0, 321.0, 318.0, 290.0, 292.0, 292.0, 287.0, 309.0, 321.0, 317.0, 313.0, 289.0, 290.0, 311.0, 325.0, 287.0, 295.0, 295.0, 287.0, 287.0, 292.0, 291.0, 291.0, 317.0, 313.0, 283.0, 290.0, 289.0, 290.0, 293.0, 289.0, 290.0, 289.0, 294.0, 290.0, 293.0, 286.0, 285.0, 297.0, 292.0, 290.0, 266.0, 259.0, 294.0, 285.0, 290.0, 289.0, 292.0, 292.0, 296.0, 291.0, 286.0, 296.0, 317.0, 313.0, 285.0, 297.0, 288.0, 291.0, 290.0, 289.0, 293.0, 289.0, 307.0, 323.0, 290.0, 283.0, 286.0, 290.0, 292.0, 284.0, 290.0, 286.0, 295.0, 287.0, 292.0, 290.0, 290.0, 289.0, 260.0, 270.0, 291.0, 291.0, 286.0, 296.0, 283.0, 293.0, 294.0, 288.0, 289.0, 293.0, 319.0, 311.0, 320.0, 313.0, 313.0, 314.0, 311.0, 313.0, 292.0, 290.0, 283.0, 290.0, 292.0, 290.0, 293.0, 286.0, 292.0, 287.0, 282.0, 297.0, 290.0, 297.0, 285.0, 288.0, 294.0, 288.0, 286.0, 284.0, 287.0, 286.0, 292.0, 290.0, 284.0, 289.0, 298.0, 289.0, 293.0, 286.0, 294.0, 285.0, 284.0, 289.0, 318.0, 318.0, 289.0, 293.0, 291.0, 288.0, 291.0, 288.0, 316.0, 320.0, 279.0, 291.0, 289.0, 290.0, 291.0, 288.0, 288.0, 294.0, 288.0, 288.0, 283.0, 290.0, 278.0, 298.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7007466682641271, "mean_inference_ms": 1.2528424832108607, "mean_action_processing_ms": 0.13418538861990456, "mean_env_wait_ms": 0.8436620774338445, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8396800, "num_agent_steps_trained": 8396800, "num_env_steps_sampled": 4198400, "num_env_steps_trained": 4198400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4198400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8396800, "timers": {"training_iteration_time_ms": 3632.043, "learn_time_ms": 1099.786, "learn_throughput": 11638.625, "synch_weights_time_ms": 12.684}, "counters": {"num_env_steps_sampled": 4198400, "num_env_steps_trained": 4198400, "num_agent_steps_sampled": 8396800, "num_agent_steps_trained": 8396800}, "done": false, "episodes_total": 10496, "training_iteration": 328, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-47", "timestamp": 1666581707, "time_this_iter_s": 3.521747350692749, "time_total_s": 1259.656721830368, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1259.656721830368, "timesteps_since_restore": 0, "iterations_since_restore": 328, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.4, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 201.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.7, "shaped_reward_min": 144, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.56, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 31, "onion_pickup_agent_1_mean": 17.7, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 30, "useful_onion_pickup_agent_0_mean": 15.41, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 31, "useful_onion_pickup_agent_1_mean": 17.51, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 30, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.23, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 30, "potting_onion_agent_1_mean": 17.29, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 30, "dish_pickup_agent_0_mean": 5.83, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.39, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.23, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 30, "optimal_onion_potting_agent_1_mean": 17.29, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 30, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.23, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 30, "viable_onion_potting_agent_1_mean": 17.29, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 30, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0028664914425462484, "policy_loss": 0.002509418409317732, "vf_loss": 7.775605201721191, "vf_explained_var": 0.5675947666168213, "kl": 0.0030672703869640827, "entropy": 0.8409721255302429, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4211200, "num_env_steps_trained": 4211200, "num_agent_steps_sampled": 8422400, "num_agent_steps_trained": 8422400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 504.0, "episode_reward_mean": 582.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 291.45}, "custom_metrics": {"sparse_reward_mean": 201.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.7, "shaped_reward_min": 144, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.56, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 31, "onion_pickup_agent_1_mean": 17.7, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 30, "useful_onion_pickup_agent_0_mean": 15.41, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 31, "useful_onion_pickup_agent_1_mean": 17.51, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 30, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.23, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 30, "potting_onion_agent_1_mean": 17.29, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 30, "dish_pickup_agent_0_mean": 5.83, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.39, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.23, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 30, "optimal_onion_potting_agent_1_mean": 17.29, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 30, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.23, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 30, "viable_onion_potting_agent_1_mean": 17.29, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 30, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 573.0, 579.0, 582.0, 579.0, 584.0, 579.0, 582.0, 582.0, 525.0, 579.0, 579.0, 584.0, 587.0, 582.0, 630.0, 582.0, 579.0, 579.0, 582.0, 630.0, 573.0, 576.0, 576.0, 576.0, 582.0, 582.0, 579.0, 530.0, 582.0, 582.0, 576.0, 582.0, 582.0, 630.0, 633.0, 627.0, 624.0, 582.0, 573.0, 582.0, 579.0, 579.0, 579.0, 587.0, 573.0, 582.0, 570.0, 573.0, 582.0, 573.0, 587.0, 579.0, 579.0, 573.0, 636.0, 582.0, 579.0, 579.0, 636.0, 570.0, 579.0, 579.0, 582.0, 576.0, 573.0, 576.0, 567.0, 579.0, 627.0, 633.0, 587.0, 582.0, 579.0, 579.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 582.0, 504.0, 582.0, 576.0, 579.0, 579.0, 573.0, 570.0, 579.0, 527.0, 579.0, 630.0, 579.0, 567.0, 582.0, 587.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 317.0, 313.0, 283.0, 290.0, 289.0, 290.0, 293.0, 289.0, 290.0, 289.0, 294.0, 290.0, 293.0, 286.0, 285.0, 297.0, 292.0, 290.0, 266.0, 259.0, 294.0, 285.0, 290.0, 289.0, 292.0, 292.0, 296.0, 291.0, 286.0, 296.0, 317.0, 313.0, 285.0, 297.0, 288.0, 291.0, 290.0, 289.0, 293.0, 289.0, 307.0, 323.0, 290.0, 283.0, 286.0, 290.0, 292.0, 284.0, 290.0, 286.0, 295.0, 287.0, 292.0, 290.0, 290.0, 289.0, 260.0, 270.0, 291.0, 291.0, 286.0, 296.0, 283.0, 293.0, 294.0, 288.0, 289.0, 293.0, 319.0, 311.0, 320.0, 313.0, 313.0, 314.0, 311.0, 313.0, 292.0, 290.0, 283.0, 290.0, 292.0, 290.0, 293.0, 286.0, 292.0, 287.0, 282.0, 297.0, 290.0, 297.0, 285.0, 288.0, 294.0, 288.0, 286.0, 284.0, 287.0, 286.0, 292.0, 290.0, 284.0, 289.0, 298.0, 289.0, 293.0, 286.0, 294.0, 285.0, 284.0, 289.0, 318.0, 318.0, 289.0, 293.0, 291.0, 288.0, 291.0, 288.0, 316.0, 320.0, 279.0, 291.0, 289.0, 290.0, 291.0, 288.0, 288.0, 294.0, 288.0, 288.0, 283.0, 290.0, 278.0, 298.0, 287.0, 280.0, 290.0, 289.0, 307.0, 320.0, 319.0, 314.0, 300.0, 287.0, 291.0, 291.0, 287.0, 292.0, 285.0, 294.0, 281.0, 298.0, 284.0, 292.0, 291.0, 291.0, 288.0, 294.0, 292.0, 290.0, 291.0, 291.0, 295.0, 287.0, 255.0, 249.0, 286.0, 296.0, 290.0, 286.0, 286.0, 293.0, 292.0, 287.0, 284.0, 289.0, 276.0, 294.0, 290.0, 289.0, 263.0, 264.0, 290.0, 289.0, 317.0, 313.0, 293.0, 286.0, 289.0, 278.0, 287.0, 295.0, 289.0, 298.0, 288.0, 291.0, 287.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7006761995941965, "mean_inference_ms": 1.2526628427401818, "mean_action_processing_ms": 0.13417690551647285, "mean_env_wait_ms": 0.8435719265710255, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 504.0, "episode_reward_mean": 582.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 291.45}, "hist_stats": {"episode_reward": [582.0, 630.0, 573.0, 579.0, 582.0, 579.0, 584.0, 579.0, 582.0, 582.0, 525.0, 579.0, 579.0, 584.0, 587.0, 582.0, 630.0, 582.0, 579.0, 579.0, 582.0, 630.0, 573.0, 576.0, 576.0, 576.0, 582.0, 582.0, 579.0, 530.0, 582.0, 582.0, 576.0, 582.0, 582.0, 630.0, 633.0, 627.0, 624.0, 582.0, 573.0, 582.0, 579.0, 579.0, 579.0, 587.0, 573.0, 582.0, 570.0, 573.0, 582.0, 573.0, 587.0, 579.0, 579.0, 573.0, 636.0, 582.0, 579.0, 579.0, 636.0, 570.0, 579.0, 579.0, 582.0, 576.0, 573.0, 576.0, 567.0, 579.0, 627.0, 633.0, 587.0, 582.0, 579.0, 579.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 582.0, 504.0, 582.0, 576.0, 579.0, 579.0, 573.0, 570.0, 579.0, 527.0, 579.0, 630.0, 579.0, 567.0, 582.0, 587.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 317.0, 313.0, 283.0, 290.0, 289.0, 290.0, 293.0, 289.0, 290.0, 289.0, 294.0, 290.0, 293.0, 286.0, 285.0, 297.0, 292.0, 290.0, 266.0, 259.0, 294.0, 285.0, 290.0, 289.0, 292.0, 292.0, 296.0, 291.0, 286.0, 296.0, 317.0, 313.0, 285.0, 297.0, 288.0, 291.0, 290.0, 289.0, 293.0, 289.0, 307.0, 323.0, 290.0, 283.0, 286.0, 290.0, 292.0, 284.0, 290.0, 286.0, 295.0, 287.0, 292.0, 290.0, 290.0, 289.0, 260.0, 270.0, 291.0, 291.0, 286.0, 296.0, 283.0, 293.0, 294.0, 288.0, 289.0, 293.0, 319.0, 311.0, 320.0, 313.0, 313.0, 314.0, 311.0, 313.0, 292.0, 290.0, 283.0, 290.0, 292.0, 290.0, 293.0, 286.0, 292.0, 287.0, 282.0, 297.0, 290.0, 297.0, 285.0, 288.0, 294.0, 288.0, 286.0, 284.0, 287.0, 286.0, 292.0, 290.0, 284.0, 289.0, 298.0, 289.0, 293.0, 286.0, 294.0, 285.0, 284.0, 289.0, 318.0, 318.0, 289.0, 293.0, 291.0, 288.0, 291.0, 288.0, 316.0, 320.0, 279.0, 291.0, 289.0, 290.0, 291.0, 288.0, 288.0, 294.0, 288.0, 288.0, 283.0, 290.0, 278.0, 298.0, 287.0, 280.0, 290.0, 289.0, 307.0, 320.0, 319.0, 314.0, 300.0, 287.0, 291.0, 291.0, 287.0, 292.0, 285.0, 294.0, 281.0, 298.0, 284.0, 292.0, 291.0, 291.0, 288.0, 294.0, 292.0, 290.0, 291.0, 291.0, 295.0, 287.0, 255.0, 249.0, 286.0, 296.0, 290.0, 286.0, 286.0, 293.0, 292.0, 287.0, 284.0, 289.0, 276.0, 294.0, 290.0, 289.0, 263.0, 264.0, 290.0, 289.0, 317.0, 313.0, 293.0, 286.0, 289.0, 278.0, 287.0, 295.0, 289.0, 298.0, 288.0, 291.0, 287.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7006761995941965, "mean_inference_ms": 1.2526628427401818, "mean_action_processing_ms": 0.13417690551647285, "mean_env_wait_ms": 0.8435719265710255, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8422400, "num_agent_steps_trained": 8422400, "num_env_steps_sampled": 4211200, "num_env_steps_trained": 4211200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4211200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8422400, "timers": {"training_iteration_time_ms": 3632.868, "learn_time_ms": 1097.298, "learn_throughput": 11665.015, "synch_weights_time_ms": 12.013}, "counters": {"num_env_steps_sampled": 4211200, "num_env_steps_trained": 4211200, "num_agent_steps_sampled": 8422400, "num_agent_steps_trained": 8422400}, "done": false, "episodes_total": 10528, "training_iteration": 329, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-51", "timestamp": 1666581711, "time_this_iter_s": 3.5217649936676025, "time_total_s": 1263.1784868240356, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1263.1784868240356, "timesteps_since_restore": 0, "iterations_since_restore": 329, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.880000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 202.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 180.32, "shaped_reward_min": 144, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.49, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 31, "onion_pickup_agent_1_mean": 17.82, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 30, "useful_onion_pickup_agent_0_mean": 15.35, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 31, "useful_onion_pickup_agent_1_mean": 17.69, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 30, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.1, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 30, "potting_onion_agent_1_mean": 17.46, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 30, "dish_pickup_agent_0_mean": 5.8, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 11, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.8, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.1, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 30, "optimal_onion_potting_agent_1_mean": 17.46, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 30, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.1, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 30, "viable_onion_potting_agent_1_mean": 17.46, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 30, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004220202565193176, "policy_loss": 0.003862160723656416, "vf_loss": 7.747307777404785, "vf_explained_var": 0.5820736885070801, "kl": 0.002677002688869834, "entropy": 0.8333747386932373, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4224000, "num_env_steps_trained": 4224000, "num_agent_steps_sampled": 8448000, "num_agent_steps_trained": 8448000}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 504.0, "episode_reward_mean": 585.12, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 292.56}, "custom_metrics": {"sparse_reward_mean": 202.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 180.32, "shaped_reward_min": 144, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.49, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 31, "onion_pickup_agent_1_mean": 17.82, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 30, "useful_onion_pickup_agent_0_mean": 15.35, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 31, "useful_onion_pickup_agent_1_mean": 17.69, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 30, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.1, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 30, "potting_onion_agent_1_mean": 17.46, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 30, "dish_pickup_agent_0_mean": 5.8, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 11, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.8, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.1, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 30, "optimal_onion_potting_agent_1_mean": 17.46, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 30, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.1, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 30, "viable_onion_potting_agent_1_mean": 17.46, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 30, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 582.0, 582.0, 630.0, 633.0, 627.0, 624.0, 582.0, 573.0, 582.0, 579.0, 579.0, 579.0, 587.0, 573.0, 582.0, 570.0, 573.0, 582.0, 573.0, 587.0, 579.0, 579.0, 573.0, 636.0, 582.0, 579.0, 579.0, 636.0, 570.0, 579.0, 579.0, 582.0, 576.0, 573.0, 576.0, 567.0, 579.0, 627.0, 633.0, 587.0, 582.0, 579.0, 579.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 582.0, 504.0, 582.0, 576.0, 579.0, 579.0, 573.0, 570.0, 579.0, 527.0, 579.0, 630.0, 579.0, 567.0, 582.0, 587.0, 579.0, 579.0, 633.0, 581.0, 579.0, 579.0, 627.0, 582.0, 582.0, 573.0, 584.0, 582.0, 579.0, 627.0, 582.0, 627.0, 584.0, 584.0, 581.0, 582.0, 630.0, 576.0, 579.0, 587.0, 582.0, 527.0, 584.0, 587.0, 579.0, 582.0, 576.0, 627.0, 587.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 293.0, 294.0, 288.0, 289.0, 293.0, 319.0, 311.0, 320.0, 313.0, 313.0, 314.0, 311.0, 313.0, 292.0, 290.0, 283.0, 290.0, 292.0, 290.0, 293.0, 286.0, 292.0, 287.0, 282.0, 297.0, 290.0, 297.0, 285.0, 288.0, 294.0, 288.0, 286.0, 284.0, 287.0, 286.0, 292.0, 290.0, 284.0, 289.0, 298.0, 289.0, 293.0, 286.0, 294.0, 285.0, 284.0, 289.0, 318.0, 318.0, 289.0, 293.0, 291.0, 288.0, 291.0, 288.0, 316.0, 320.0, 279.0, 291.0, 289.0, 290.0, 291.0, 288.0, 288.0, 294.0, 288.0, 288.0, 283.0, 290.0, 278.0, 298.0, 287.0, 280.0, 290.0, 289.0, 307.0, 320.0, 319.0, 314.0, 300.0, 287.0, 291.0, 291.0, 287.0, 292.0, 285.0, 294.0, 281.0, 298.0, 284.0, 292.0, 291.0, 291.0, 288.0, 294.0, 292.0, 290.0, 291.0, 291.0, 295.0, 287.0, 255.0, 249.0, 286.0, 296.0, 290.0, 286.0, 286.0, 293.0, 292.0, 287.0, 284.0, 289.0, 276.0, 294.0, 290.0, 289.0, 263.0, 264.0, 290.0, 289.0, 317.0, 313.0, 293.0, 286.0, 289.0, 278.0, 287.0, 295.0, 289.0, 298.0, 288.0, 291.0, 287.0, 292.0, 315.0, 318.0, 294.0, 287.0, 283.0, 296.0, 282.0, 297.0, 315.0, 312.0, 294.0, 288.0, 290.0, 292.0, 278.0, 295.0, 283.0, 301.0, 293.0, 289.0, 289.0, 290.0, 309.0, 318.0, 294.0, 288.0, 308.0, 319.0, 285.0, 299.0, 295.0, 289.0, 283.0, 298.0, 298.0, 284.0, 310.0, 320.0, 284.0, 292.0, 292.0, 287.0, 290.0, 297.0, 293.0, 289.0, 258.0, 269.0, 285.0, 299.0, 285.0, 302.0, 290.0, 289.0, 291.0, 291.0, 285.0, 291.0, 316.0, 311.0, 298.0, 289.0, 292.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.70060102924662, "mean_inference_ms": 1.252481111686981, "mean_action_processing_ms": 0.13416844921696647, "mean_env_wait_ms": 0.8434800010347309, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 504.0, "episode_reward_mean": 585.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 292.56}, "hist_stats": {"episode_reward": [576.0, 582.0, 582.0, 630.0, 633.0, 627.0, 624.0, 582.0, 573.0, 582.0, 579.0, 579.0, 579.0, 587.0, 573.0, 582.0, 570.0, 573.0, 582.0, 573.0, 587.0, 579.0, 579.0, 573.0, 636.0, 582.0, 579.0, 579.0, 636.0, 570.0, 579.0, 579.0, 582.0, 576.0, 573.0, 576.0, 567.0, 579.0, 627.0, 633.0, 587.0, 582.0, 579.0, 579.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 582.0, 504.0, 582.0, 576.0, 579.0, 579.0, 573.0, 570.0, 579.0, 527.0, 579.0, 630.0, 579.0, 567.0, 582.0, 587.0, 579.0, 579.0, 633.0, 581.0, 579.0, 579.0, 627.0, 582.0, 582.0, 573.0, 584.0, 582.0, 579.0, 627.0, 582.0, 627.0, 584.0, 584.0, 581.0, 582.0, 630.0, 576.0, 579.0, 587.0, 582.0, 527.0, 584.0, 587.0, 579.0, 582.0, 576.0, 627.0, 587.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 293.0, 294.0, 288.0, 289.0, 293.0, 319.0, 311.0, 320.0, 313.0, 313.0, 314.0, 311.0, 313.0, 292.0, 290.0, 283.0, 290.0, 292.0, 290.0, 293.0, 286.0, 292.0, 287.0, 282.0, 297.0, 290.0, 297.0, 285.0, 288.0, 294.0, 288.0, 286.0, 284.0, 287.0, 286.0, 292.0, 290.0, 284.0, 289.0, 298.0, 289.0, 293.0, 286.0, 294.0, 285.0, 284.0, 289.0, 318.0, 318.0, 289.0, 293.0, 291.0, 288.0, 291.0, 288.0, 316.0, 320.0, 279.0, 291.0, 289.0, 290.0, 291.0, 288.0, 288.0, 294.0, 288.0, 288.0, 283.0, 290.0, 278.0, 298.0, 287.0, 280.0, 290.0, 289.0, 307.0, 320.0, 319.0, 314.0, 300.0, 287.0, 291.0, 291.0, 287.0, 292.0, 285.0, 294.0, 281.0, 298.0, 284.0, 292.0, 291.0, 291.0, 288.0, 294.0, 292.0, 290.0, 291.0, 291.0, 295.0, 287.0, 255.0, 249.0, 286.0, 296.0, 290.0, 286.0, 286.0, 293.0, 292.0, 287.0, 284.0, 289.0, 276.0, 294.0, 290.0, 289.0, 263.0, 264.0, 290.0, 289.0, 317.0, 313.0, 293.0, 286.0, 289.0, 278.0, 287.0, 295.0, 289.0, 298.0, 288.0, 291.0, 287.0, 292.0, 315.0, 318.0, 294.0, 287.0, 283.0, 296.0, 282.0, 297.0, 315.0, 312.0, 294.0, 288.0, 290.0, 292.0, 278.0, 295.0, 283.0, 301.0, 293.0, 289.0, 289.0, 290.0, 309.0, 318.0, 294.0, 288.0, 308.0, 319.0, 285.0, 299.0, 295.0, 289.0, 283.0, 298.0, 298.0, 284.0, 310.0, 320.0, 284.0, 292.0, 292.0, 287.0, 290.0, 297.0, 293.0, 289.0, 258.0, 269.0, 285.0, 299.0, 285.0, 302.0, 290.0, 289.0, 291.0, 291.0, 285.0, 291.0, 316.0, 311.0, 298.0, 289.0, 292.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.70060102924662, "mean_inference_ms": 1.252481111686981, "mean_action_processing_ms": 0.13416844921696647, "mean_env_wait_ms": 0.8434800010347309, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8448000, "num_agent_steps_trained": 8448000, "num_env_steps_sampled": 4224000, "num_env_steps_trained": 4224000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4224000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8448000, "timers": {"training_iteration_time_ms": 3635.768, "learn_time_ms": 1102.793, "learn_throughput": 11606.888, "synch_weights_time_ms": 12.53}, "counters": {"num_env_steps_sampled": 4224000, "num_env_steps_trained": 4224000, "num_agent_steps_sampled": 8448000, "num_agent_steps_trained": 8448000}, "done": false, "episodes_total": 10560, "training_iteration": 330, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-55", "timestamp": 1666581715, "time_this_iter_s": 3.7308542728424072, "time_total_s": 1266.909341096878, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1266.909341096878, "timesteps_since_restore": 0, "iterations_since_restore": 330, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.05, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 201.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.78, "shaped_reward_min": 144, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.69, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.46, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 30, "useful_onion_pickup_agent_0_mean": 14.58, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.3, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 30, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.35, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 18.11, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 30, "dish_pickup_agent_0_mean": 6.03, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.74, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 11, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.65, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.35, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 18.11, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 30, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.35, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 18.11, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 30, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.003572438843548298, "policy_loss": -0.003936300054192543, "vf_loss": 7.806034088134766, "vf_explained_var": 0.5505003929138184, "kl": 0.0024001419078558683, "entropy": 0.8334838151931763, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4236800, "num_env_steps_trained": 4236800, "num_agent_steps_sampled": 8473600, "num_agent_steps_trained": 8473600}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 504.0, "episode_reward_mean": 583.38, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 291.69}, "custom_metrics": {"sparse_reward_mean": 201.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.78, "shaped_reward_min": 144, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.69, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.46, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 30, "useful_onion_pickup_agent_0_mean": 14.58, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.3, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 30, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.35, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 18.11, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 30, "dish_pickup_agent_0_mean": 6.03, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.74, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 11, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.65, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.35, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 18.11, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 30, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.35, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 18.11, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 30, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 576.0, 573.0, 576.0, 567.0, 579.0, 627.0, 633.0, 587.0, 582.0, 579.0, 579.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 582.0, 504.0, 582.0, 576.0, 579.0, 579.0, 573.0, 570.0, 579.0, 527.0, 579.0, 630.0, 579.0, 567.0, 582.0, 587.0, 579.0, 579.0, 633.0, 581.0, 579.0, 579.0, 627.0, 582.0, 582.0, 573.0, 584.0, 582.0, 579.0, 627.0, 582.0, 627.0, 584.0, 584.0, 581.0, 582.0, 630.0, 576.0, 579.0, 587.0, 582.0, 527.0, 584.0, 587.0, 579.0, 582.0, 576.0, 627.0, 587.0, 579.0, 579.0, 525.0, 576.0, 576.0, 539.0, 576.0, 573.0, 579.0, 579.0, 570.0, 633.0, 582.0, 584.0, 630.0, 582.0, 573.0, 573.0, 582.0, 579.0, 579.0, 630.0, 576.0, 630.0, 579.0, 579.0, 582.0, 567.0, 582.0, 576.0, 576.0, 630.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 288.0, 288.0, 283.0, 290.0, 278.0, 298.0, 287.0, 280.0, 290.0, 289.0, 307.0, 320.0, 319.0, 314.0, 300.0, 287.0, 291.0, 291.0, 287.0, 292.0, 285.0, 294.0, 281.0, 298.0, 284.0, 292.0, 291.0, 291.0, 288.0, 294.0, 292.0, 290.0, 291.0, 291.0, 295.0, 287.0, 255.0, 249.0, 286.0, 296.0, 290.0, 286.0, 286.0, 293.0, 292.0, 287.0, 284.0, 289.0, 276.0, 294.0, 290.0, 289.0, 263.0, 264.0, 290.0, 289.0, 317.0, 313.0, 293.0, 286.0, 289.0, 278.0, 287.0, 295.0, 289.0, 298.0, 288.0, 291.0, 287.0, 292.0, 315.0, 318.0, 294.0, 287.0, 283.0, 296.0, 282.0, 297.0, 315.0, 312.0, 294.0, 288.0, 290.0, 292.0, 278.0, 295.0, 283.0, 301.0, 293.0, 289.0, 289.0, 290.0, 309.0, 318.0, 294.0, 288.0, 308.0, 319.0, 285.0, 299.0, 295.0, 289.0, 283.0, 298.0, 298.0, 284.0, 310.0, 320.0, 284.0, 292.0, 292.0, 287.0, 290.0, 297.0, 293.0, 289.0, 258.0, 269.0, 285.0, 299.0, 285.0, 302.0, 290.0, 289.0, 291.0, 291.0, 285.0, 291.0, 316.0, 311.0, 298.0, 289.0, 292.0, 287.0, 284.0, 295.0, 260.0, 265.0, 290.0, 286.0, 285.0, 291.0, 269.0, 270.0, 283.0, 293.0, 283.0, 290.0, 289.0, 290.0, 293.0, 286.0, 284.0, 286.0, 314.0, 319.0, 290.0, 292.0, 294.0, 290.0, 317.0, 313.0, 293.0, 289.0, 283.0, 290.0, 274.0, 299.0, 289.0, 293.0, 292.0, 287.0, 286.0, 293.0, 312.0, 318.0, 287.0, 289.0, 309.0, 321.0, 283.0, 296.0, 288.0, 291.0, 291.0, 291.0, 290.0, 277.0, 291.0, 291.0, 290.0, 286.0, 283.0, 293.0, 324.0, 306.0, 288.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7005318625687796, "mean_inference_ms": 1.2523039495697514, "mean_action_processing_ms": 0.1341592330617022, "mean_env_wait_ms": 0.8433814070155357, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 504.0, "episode_reward_mean": 583.38, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 291.69}, "hist_stats": {"episode_reward": [582.0, 576.0, 573.0, 576.0, 567.0, 579.0, 627.0, 633.0, 587.0, 582.0, 579.0, 579.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 582.0, 504.0, 582.0, 576.0, 579.0, 579.0, 573.0, 570.0, 579.0, 527.0, 579.0, 630.0, 579.0, 567.0, 582.0, 587.0, 579.0, 579.0, 633.0, 581.0, 579.0, 579.0, 627.0, 582.0, 582.0, 573.0, 584.0, 582.0, 579.0, 627.0, 582.0, 627.0, 584.0, 584.0, 581.0, 582.0, 630.0, 576.0, 579.0, 587.0, 582.0, 527.0, 584.0, 587.0, 579.0, 582.0, 576.0, 627.0, 587.0, 579.0, 579.0, 525.0, 576.0, 576.0, 539.0, 576.0, 573.0, 579.0, 579.0, 570.0, 633.0, 582.0, 584.0, 630.0, 582.0, 573.0, 573.0, 582.0, 579.0, 579.0, 630.0, 576.0, 630.0, 579.0, 579.0, 582.0, 567.0, 582.0, 576.0, 576.0, 630.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 288.0, 288.0, 283.0, 290.0, 278.0, 298.0, 287.0, 280.0, 290.0, 289.0, 307.0, 320.0, 319.0, 314.0, 300.0, 287.0, 291.0, 291.0, 287.0, 292.0, 285.0, 294.0, 281.0, 298.0, 284.0, 292.0, 291.0, 291.0, 288.0, 294.0, 292.0, 290.0, 291.0, 291.0, 295.0, 287.0, 255.0, 249.0, 286.0, 296.0, 290.0, 286.0, 286.0, 293.0, 292.0, 287.0, 284.0, 289.0, 276.0, 294.0, 290.0, 289.0, 263.0, 264.0, 290.0, 289.0, 317.0, 313.0, 293.0, 286.0, 289.0, 278.0, 287.0, 295.0, 289.0, 298.0, 288.0, 291.0, 287.0, 292.0, 315.0, 318.0, 294.0, 287.0, 283.0, 296.0, 282.0, 297.0, 315.0, 312.0, 294.0, 288.0, 290.0, 292.0, 278.0, 295.0, 283.0, 301.0, 293.0, 289.0, 289.0, 290.0, 309.0, 318.0, 294.0, 288.0, 308.0, 319.0, 285.0, 299.0, 295.0, 289.0, 283.0, 298.0, 298.0, 284.0, 310.0, 320.0, 284.0, 292.0, 292.0, 287.0, 290.0, 297.0, 293.0, 289.0, 258.0, 269.0, 285.0, 299.0, 285.0, 302.0, 290.0, 289.0, 291.0, 291.0, 285.0, 291.0, 316.0, 311.0, 298.0, 289.0, 292.0, 287.0, 284.0, 295.0, 260.0, 265.0, 290.0, 286.0, 285.0, 291.0, 269.0, 270.0, 283.0, 293.0, 283.0, 290.0, 289.0, 290.0, 293.0, 286.0, 284.0, 286.0, 314.0, 319.0, 290.0, 292.0, 294.0, 290.0, 317.0, 313.0, 293.0, 289.0, 283.0, 290.0, 274.0, 299.0, 289.0, 293.0, 292.0, 287.0, 286.0, 293.0, 312.0, 318.0, 287.0, 289.0, 309.0, 321.0, 283.0, 296.0, 288.0, 291.0, 291.0, 291.0, 290.0, 277.0, 291.0, 291.0, 290.0, 286.0, 283.0, 293.0, 324.0, 306.0, 288.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7005318625687796, "mean_inference_ms": 1.2523039495697514, "mean_action_processing_ms": 0.1341592330617022, "mean_env_wait_ms": 0.8433814070155357, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8473600, "num_agent_steps_trained": 8473600, "num_env_steps_sampled": 4236800, "num_env_steps_trained": 4236800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4236800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8473600, "timers": {"training_iteration_time_ms": 3635.238, "learn_time_ms": 1108.179, "learn_throughput": 11550.484, "synch_weights_time_ms": 13.173}, "counters": {"num_env_steps_sampled": 4236800, "num_env_steps_trained": 4236800, "num_agent_steps_sampled": 8473600, "num_agent_steps_trained": 8473600}, "done": false, "episodes_total": 10592, "training_iteration": 331, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-59", "timestamp": 1666581719, "time_this_iter_s": 3.6568710803985596, "time_total_s": 1270.5662121772766, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1270.5662121772766, "timesteps_since_restore": 0, "iterations_since_restore": 331, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.54, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 200.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.58, "shaped_reward_min": 159, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.78, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.29, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.63, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.1, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.46, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.94, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.74, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 11, "useful_dish_pickup_agent_1_mean": 4.72, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.46, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.94, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.46, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.94, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 3.2392097637057304e-05, "policy_loss": -0.00033404817804694176, "vf_loss": 7.88044548034668, "vf_explained_var": 0.5614603757858276, "kl": 0.002426392398774624, "entropy": 0.8432062864303589, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4249600, "num_env_steps_trained": 4249600, "num_agent_steps_sampled": 8499200, "num_agent_steps_trained": 8499200}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 519.0, "episode_reward_mean": 581.18, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 290.59}, "custom_metrics": {"sparse_reward_mean": 200.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.58, "shaped_reward_min": 159, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.78, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.29, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.63, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.1, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.46, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.94, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.74, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 11, "useful_dish_pickup_agent_1_mean": 4.72, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.46, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.94, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.46, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.94, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 579.0, 579.0, 633.0, 581.0, 579.0, 579.0, 627.0, 582.0, 582.0, 573.0, 584.0, 582.0, 579.0, 627.0, 582.0, 627.0, 584.0, 584.0, 581.0, 582.0, 630.0, 576.0, 579.0, 587.0, 582.0, 527.0, 584.0, 587.0, 579.0, 582.0, 576.0, 627.0, 587.0, 579.0, 579.0, 525.0, 576.0, 576.0, 539.0, 576.0, 573.0, 579.0, 579.0, 570.0, 633.0, 582.0, 584.0, 630.0, 582.0, 573.0, 573.0, 582.0, 579.0, 579.0, 630.0, 576.0, 630.0, 579.0, 579.0, 582.0, 567.0, 582.0, 576.0, 576.0, 630.0, 576.0, 582.0, 579.0, 579.0, 576.0, 582.0, 582.0, 630.0, 530.0, 579.0, 576.0, 630.0, 579.0, 582.0, 579.0, 582.0, 576.0, 579.0, 573.0, 582.0, 570.0, 519.0, 525.0, 522.0, 582.0, 582.0, 579.0, 581.0, 525.0, 587.0, 576.0, 582.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 295.0, 289.0, 298.0, 288.0, 291.0, 287.0, 292.0, 315.0, 318.0, 294.0, 287.0, 283.0, 296.0, 282.0, 297.0, 315.0, 312.0, 294.0, 288.0, 290.0, 292.0, 278.0, 295.0, 283.0, 301.0, 293.0, 289.0, 289.0, 290.0, 309.0, 318.0, 294.0, 288.0, 308.0, 319.0, 285.0, 299.0, 295.0, 289.0, 283.0, 298.0, 298.0, 284.0, 310.0, 320.0, 284.0, 292.0, 292.0, 287.0, 290.0, 297.0, 293.0, 289.0, 258.0, 269.0, 285.0, 299.0, 285.0, 302.0, 290.0, 289.0, 291.0, 291.0, 285.0, 291.0, 316.0, 311.0, 298.0, 289.0, 292.0, 287.0, 284.0, 295.0, 260.0, 265.0, 290.0, 286.0, 285.0, 291.0, 269.0, 270.0, 283.0, 293.0, 283.0, 290.0, 289.0, 290.0, 293.0, 286.0, 284.0, 286.0, 314.0, 319.0, 290.0, 292.0, 294.0, 290.0, 317.0, 313.0, 293.0, 289.0, 283.0, 290.0, 274.0, 299.0, 289.0, 293.0, 292.0, 287.0, 286.0, 293.0, 312.0, 318.0, 287.0, 289.0, 309.0, 321.0, 283.0, 296.0, 288.0, 291.0, 291.0, 291.0, 290.0, 277.0, 291.0, 291.0, 290.0, 286.0, 283.0, 293.0, 324.0, 306.0, 288.0, 288.0, 291.0, 291.0, 296.0, 283.0, 288.0, 291.0, 288.0, 288.0, 287.0, 295.0, 287.0, 295.0, 317.0, 313.0, 267.0, 263.0, 287.0, 292.0, 288.0, 288.0, 316.0, 314.0, 290.0, 289.0, 292.0, 290.0, 288.0, 291.0, 288.0, 294.0, 291.0, 285.0, 293.0, 286.0, 284.0, 289.0, 289.0, 293.0, 283.0, 287.0, 262.0, 257.0, 268.0, 257.0, 263.0, 259.0, 293.0, 289.0, 291.0, 291.0, 284.0, 295.0, 292.0, 289.0, 262.0, 263.0, 296.0, 291.0, 283.0, 293.0, 292.0, 290.0, 263.0, 259.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7004575792416293, "mean_inference_ms": 1.2521147919128353, "mean_action_processing_ms": 0.1341505685088555, "mean_env_wait_ms": 0.8432758339878859, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 519.0, "episode_reward_mean": 581.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 290.59}, "hist_stats": {"episode_reward": [582.0, 587.0, 579.0, 579.0, 633.0, 581.0, 579.0, 579.0, 627.0, 582.0, 582.0, 573.0, 584.0, 582.0, 579.0, 627.0, 582.0, 627.0, 584.0, 584.0, 581.0, 582.0, 630.0, 576.0, 579.0, 587.0, 582.0, 527.0, 584.0, 587.0, 579.0, 582.0, 576.0, 627.0, 587.0, 579.0, 579.0, 525.0, 576.0, 576.0, 539.0, 576.0, 573.0, 579.0, 579.0, 570.0, 633.0, 582.0, 584.0, 630.0, 582.0, 573.0, 573.0, 582.0, 579.0, 579.0, 630.0, 576.0, 630.0, 579.0, 579.0, 582.0, 567.0, 582.0, 576.0, 576.0, 630.0, 576.0, 582.0, 579.0, 579.0, 576.0, 582.0, 582.0, 630.0, 530.0, 579.0, 576.0, 630.0, 579.0, 582.0, 579.0, 582.0, 576.0, 579.0, 573.0, 582.0, 570.0, 519.0, 525.0, 522.0, 582.0, 582.0, 579.0, 581.0, 525.0, 587.0, 576.0, 582.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 295.0, 289.0, 298.0, 288.0, 291.0, 287.0, 292.0, 315.0, 318.0, 294.0, 287.0, 283.0, 296.0, 282.0, 297.0, 315.0, 312.0, 294.0, 288.0, 290.0, 292.0, 278.0, 295.0, 283.0, 301.0, 293.0, 289.0, 289.0, 290.0, 309.0, 318.0, 294.0, 288.0, 308.0, 319.0, 285.0, 299.0, 295.0, 289.0, 283.0, 298.0, 298.0, 284.0, 310.0, 320.0, 284.0, 292.0, 292.0, 287.0, 290.0, 297.0, 293.0, 289.0, 258.0, 269.0, 285.0, 299.0, 285.0, 302.0, 290.0, 289.0, 291.0, 291.0, 285.0, 291.0, 316.0, 311.0, 298.0, 289.0, 292.0, 287.0, 284.0, 295.0, 260.0, 265.0, 290.0, 286.0, 285.0, 291.0, 269.0, 270.0, 283.0, 293.0, 283.0, 290.0, 289.0, 290.0, 293.0, 286.0, 284.0, 286.0, 314.0, 319.0, 290.0, 292.0, 294.0, 290.0, 317.0, 313.0, 293.0, 289.0, 283.0, 290.0, 274.0, 299.0, 289.0, 293.0, 292.0, 287.0, 286.0, 293.0, 312.0, 318.0, 287.0, 289.0, 309.0, 321.0, 283.0, 296.0, 288.0, 291.0, 291.0, 291.0, 290.0, 277.0, 291.0, 291.0, 290.0, 286.0, 283.0, 293.0, 324.0, 306.0, 288.0, 288.0, 291.0, 291.0, 296.0, 283.0, 288.0, 291.0, 288.0, 288.0, 287.0, 295.0, 287.0, 295.0, 317.0, 313.0, 267.0, 263.0, 287.0, 292.0, 288.0, 288.0, 316.0, 314.0, 290.0, 289.0, 292.0, 290.0, 288.0, 291.0, 288.0, 294.0, 291.0, 285.0, 293.0, 286.0, 284.0, 289.0, 289.0, 293.0, 283.0, 287.0, 262.0, 257.0, 268.0, 257.0, 263.0, 259.0, 293.0, 289.0, 291.0, 291.0, 284.0, 295.0, 292.0, 289.0, 262.0, 263.0, 296.0, 291.0, 283.0, 293.0, 292.0, 290.0, 263.0, 259.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7004575792416293, "mean_inference_ms": 1.2521147919128353, "mean_action_processing_ms": 0.1341505685088555, "mean_env_wait_ms": 0.8432758339878859, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8499200, "num_agent_steps_trained": 8499200, "num_env_steps_sampled": 4249600, "num_env_steps_trained": 4249600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4249600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8499200, "timers": {"training_iteration_time_ms": 3630.316, "learn_time_ms": 1112.124, "learn_throughput": 11509.508, "synch_weights_time_ms": 12.733}, "counters": {"num_env_steps_sampled": 4249600, "num_env_steps_trained": 4249600, "num_agent_steps_sampled": 8499200, "num_agent_steps_trained": 8499200}, "done": false, "episodes_total": 10624, "training_iteration": 332, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-03", "timestamp": 1666581723, "time_this_iter_s": 3.664041042327881, "time_total_s": 1274.2302532196045, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1274.2302532196045, "timesteps_since_restore": 0, "iterations_since_restore": 332, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.71666666666667, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 200.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 178.92, "shaped_reward_min": 150, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.96, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.0, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.8, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.74, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.63, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.92, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.79, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.72, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.65, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.74, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.63, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.74, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.63, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0002707752864807844, "policy_loss": -0.0006344539579004049, "vf_loss": 7.797179222106934, "vf_explained_var": 0.5710536241531372, "kl": 0.003583373501896858, "entropy": 0.8320763111114502, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4262400, "num_env_steps_trained": 4262400, "num_agent_steps_sampled": 8524800, "num_agent_steps_trained": 8524800}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 390.0, "episode_reward_mean": 579.72, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 192.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 289.86}, "custom_metrics": {"sparse_reward_mean": 200.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 178.92, "shaped_reward_min": 150, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.96, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.0, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.8, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.74, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.63, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.92, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.79, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.72, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.65, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.74, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.63, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.74, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.63, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 627.0, 587.0, 579.0, 579.0, 525.0, 576.0, 576.0, 539.0, 576.0, 573.0, 579.0, 579.0, 570.0, 633.0, 582.0, 584.0, 630.0, 582.0, 573.0, 573.0, 582.0, 579.0, 579.0, 630.0, 576.0, 630.0, 579.0, 579.0, 582.0, 567.0, 582.0, 576.0, 576.0, 630.0, 576.0, 582.0, 579.0, 579.0, 576.0, 582.0, 582.0, 630.0, 530.0, 579.0, 576.0, 630.0, 579.0, 582.0, 579.0, 582.0, 576.0, 579.0, 573.0, 582.0, 570.0, 519.0, 525.0, 522.0, 582.0, 582.0, 579.0, 581.0, 525.0, 587.0, 576.0, 582.0, 522.0, 584.0, 576.0, 579.0, 516.0, 390.0, 630.0, 633.0, 579.0, 582.0, 582.0, 567.0, 579.0, 579.0, 630.0, 630.0, 579.0, 584.0, 636.0, 627.0, 579.0, 582.0, 533.0, 579.0, 582.0, 579.0, 579.0, 579.0, 570.0, 579.0, 630.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 316.0, 311.0, 298.0, 289.0, 292.0, 287.0, 284.0, 295.0, 260.0, 265.0, 290.0, 286.0, 285.0, 291.0, 269.0, 270.0, 283.0, 293.0, 283.0, 290.0, 289.0, 290.0, 293.0, 286.0, 284.0, 286.0, 314.0, 319.0, 290.0, 292.0, 294.0, 290.0, 317.0, 313.0, 293.0, 289.0, 283.0, 290.0, 274.0, 299.0, 289.0, 293.0, 292.0, 287.0, 286.0, 293.0, 312.0, 318.0, 287.0, 289.0, 309.0, 321.0, 283.0, 296.0, 288.0, 291.0, 291.0, 291.0, 290.0, 277.0, 291.0, 291.0, 290.0, 286.0, 283.0, 293.0, 324.0, 306.0, 288.0, 288.0, 291.0, 291.0, 296.0, 283.0, 288.0, 291.0, 288.0, 288.0, 287.0, 295.0, 287.0, 295.0, 317.0, 313.0, 267.0, 263.0, 287.0, 292.0, 288.0, 288.0, 316.0, 314.0, 290.0, 289.0, 292.0, 290.0, 288.0, 291.0, 288.0, 294.0, 291.0, 285.0, 293.0, 286.0, 284.0, 289.0, 289.0, 293.0, 283.0, 287.0, 262.0, 257.0, 268.0, 257.0, 263.0, 259.0, 293.0, 289.0, 291.0, 291.0, 284.0, 295.0, 292.0, 289.0, 262.0, 263.0, 296.0, 291.0, 283.0, 293.0, 292.0, 290.0, 263.0, 259.0, 293.0, 291.0, 288.0, 288.0, 286.0, 293.0, 259.0, 257.0, 198.0, 192.0, 322.0, 308.0, 315.0, 318.0, 289.0, 290.0, 285.0, 297.0, 293.0, 289.0, 288.0, 279.0, 297.0, 282.0, 292.0, 287.0, 315.0, 315.0, 316.0, 314.0, 284.0, 295.0, 290.0, 294.0, 316.0, 320.0, 313.0, 314.0, 291.0, 288.0, 291.0, 291.0, 268.0, 265.0, 292.0, 287.0, 292.0, 290.0, 284.0, 295.0, 299.0, 280.0, 287.0, 292.0, 285.0, 285.0, 289.0, 290.0, 311.0, 319.0, 314.0, 316.0, 288.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.700378879793017, "mean_inference_ms": 1.2519036473483978, "mean_action_processing_ms": 0.13413895914133342, "mean_env_wait_ms": 0.8431548787503442, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 390.0, "episode_reward_mean": 579.72, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 192.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 289.86}, "hist_stats": {"episode_reward": [576.0, 627.0, 587.0, 579.0, 579.0, 525.0, 576.0, 576.0, 539.0, 576.0, 573.0, 579.0, 579.0, 570.0, 633.0, 582.0, 584.0, 630.0, 582.0, 573.0, 573.0, 582.0, 579.0, 579.0, 630.0, 576.0, 630.0, 579.0, 579.0, 582.0, 567.0, 582.0, 576.0, 576.0, 630.0, 576.0, 582.0, 579.0, 579.0, 576.0, 582.0, 582.0, 630.0, 530.0, 579.0, 576.0, 630.0, 579.0, 582.0, 579.0, 582.0, 576.0, 579.0, 573.0, 582.0, 570.0, 519.0, 525.0, 522.0, 582.0, 582.0, 579.0, 581.0, 525.0, 587.0, 576.0, 582.0, 522.0, 584.0, 576.0, 579.0, 516.0, 390.0, 630.0, 633.0, 579.0, 582.0, 582.0, 567.0, 579.0, 579.0, 630.0, 630.0, 579.0, 584.0, 636.0, 627.0, 579.0, 582.0, 533.0, 579.0, 582.0, 579.0, 579.0, 579.0, 570.0, 579.0, 630.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 316.0, 311.0, 298.0, 289.0, 292.0, 287.0, 284.0, 295.0, 260.0, 265.0, 290.0, 286.0, 285.0, 291.0, 269.0, 270.0, 283.0, 293.0, 283.0, 290.0, 289.0, 290.0, 293.0, 286.0, 284.0, 286.0, 314.0, 319.0, 290.0, 292.0, 294.0, 290.0, 317.0, 313.0, 293.0, 289.0, 283.0, 290.0, 274.0, 299.0, 289.0, 293.0, 292.0, 287.0, 286.0, 293.0, 312.0, 318.0, 287.0, 289.0, 309.0, 321.0, 283.0, 296.0, 288.0, 291.0, 291.0, 291.0, 290.0, 277.0, 291.0, 291.0, 290.0, 286.0, 283.0, 293.0, 324.0, 306.0, 288.0, 288.0, 291.0, 291.0, 296.0, 283.0, 288.0, 291.0, 288.0, 288.0, 287.0, 295.0, 287.0, 295.0, 317.0, 313.0, 267.0, 263.0, 287.0, 292.0, 288.0, 288.0, 316.0, 314.0, 290.0, 289.0, 292.0, 290.0, 288.0, 291.0, 288.0, 294.0, 291.0, 285.0, 293.0, 286.0, 284.0, 289.0, 289.0, 293.0, 283.0, 287.0, 262.0, 257.0, 268.0, 257.0, 263.0, 259.0, 293.0, 289.0, 291.0, 291.0, 284.0, 295.0, 292.0, 289.0, 262.0, 263.0, 296.0, 291.0, 283.0, 293.0, 292.0, 290.0, 263.0, 259.0, 293.0, 291.0, 288.0, 288.0, 286.0, 293.0, 259.0, 257.0, 198.0, 192.0, 322.0, 308.0, 315.0, 318.0, 289.0, 290.0, 285.0, 297.0, 293.0, 289.0, 288.0, 279.0, 297.0, 282.0, 292.0, 287.0, 315.0, 315.0, 316.0, 314.0, 284.0, 295.0, 290.0, 294.0, 316.0, 320.0, 313.0, 314.0, 291.0, 288.0, 291.0, 291.0, 268.0, 265.0, 292.0, 287.0, 292.0, 290.0, 284.0, 295.0, 299.0, 280.0, 287.0, 292.0, 285.0, 285.0, 289.0, 290.0, 311.0, 319.0, 314.0, 316.0, 288.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.700378879793017, "mean_inference_ms": 1.2519036473483978, "mean_action_processing_ms": 0.13413895914133342, "mean_env_wait_ms": 0.8431548787503442, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8524800, "num_agent_steps_trained": 8524800, "num_env_steps_sampled": 4262400, "num_env_steps_trained": 4262400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4262400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8524800, "timers": {"training_iteration_time_ms": 3635.178, "learn_time_ms": 1121.744, "learn_throughput": 11410.806, "synch_weights_time_ms": 13.851}, "counters": {"num_env_steps_sampled": 4262400, "num_env_steps_trained": 4262400, "num_agent_steps_sampled": 8524800, "num_agent_steps_trained": 8524800}, "done": false, "episodes_total": 10656, "training_iteration": 333, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-07", "timestamp": 1666581727, "time_this_iter_s": 3.7733590602874756, "time_total_s": 1278.003612279892, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1278.003612279892, "timesteps_since_restore": 0, "iterations_since_restore": 333, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.650000000000002, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 200.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 178.8, "shaped_reward_min": 150, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.62, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.35, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.44, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.15, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.35, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.01, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.35, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.01, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.35, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.01, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00029662175802513957, "policy_loss": -5.7847180869430304e-05, "vf_loss": 7.726676940917969, "vf_explained_var": 0.5506268739700317, "kl": 0.0021445024758577347, "entropy": 0.8363960981369019, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4275200, "num_env_steps_trained": 4275200, "num_agent_steps_sampled": 8550400, "num_agent_steps_trained": 8550400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 390.0, "episode_reward_mean": 578.8, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 192.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 289.4}, "custom_metrics": {"sparse_reward_mean": 200.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 178.8, "shaped_reward_min": 150, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.62, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.35, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.44, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.15, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.35, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.01, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.35, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.01, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.35, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.01, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 576.0, 630.0, 576.0, 582.0, 579.0, 579.0, 576.0, 582.0, 582.0, 630.0, 530.0, 579.0, 576.0, 630.0, 579.0, 582.0, 579.0, 582.0, 576.0, 579.0, 573.0, 582.0, 570.0, 519.0, 525.0, 522.0, 582.0, 582.0, 579.0, 581.0, 525.0, 587.0, 576.0, 582.0, 522.0, 584.0, 576.0, 579.0, 516.0, 390.0, 630.0, 633.0, 579.0, 582.0, 582.0, 567.0, 579.0, 579.0, 630.0, 630.0, 579.0, 584.0, 636.0, 627.0, 579.0, 582.0, 533.0, 579.0, 582.0, 579.0, 579.0, 579.0, 570.0, 579.0, 630.0, 630.0, 579.0, 527.0, 579.0, 582.0, 587.0, 582.0, 576.0, 564.0, 627.0, 573.0, 576.0, 582.0, 582.0, 576.0, 579.0, 584.0, 576.0, 627.0, 584.0, 576.0, 579.0, 590.0, 582.0, 573.0, 579.0, 576.0, 579.0, 573.0, 579.0, 582.0, 582.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 286.0, 283.0, 293.0, 324.0, 306.0, 288.0, 288.0, 291.0, 291.0, 296.0, 283.0, 288.0, 291.0, 288.0, 288.0, 287.0, 295.0, 287.0, 295.0, 317.0, 313.0, 267.0, 263.0, 287.0, 292.0, 288.0, 288.0, 316.0, 314.0, 290.0, 289.0, 292.0, 290.0, 288.0, 291.0, 288.0, 294.0, 291.0, 285.0, 293.0, 286.0, 284.0, 289.0, 289.0, 293.0, 283.0, 287.0, 262.0, 257.0, 268.0, 257.0, 263.0, 259.0, 293.0, 289.0, 291.0, 291.0, 284.0, 295.0, 292.0, 289.0, 262.0, 263.0, 296.0, 291.0, 283.0, 293.0, 292.0, 290.0, 263.0, 259.0, 293.0, 291.0, 288.0, 288.0, 286.0, 293.0, 259.0, 257.0, 198.0, 192.0, 322.0, 308.0, 315.0, 318.0, 289.0, 290.0, 285.0, 297.0, 293.0, 289.0, 288.0, 279.0, 297.0, 282.0, 292.0, 287.0, 315.0, 315.0, 316.0, 314.0, 284.0, 295.0, 290.0, 294.0, 316.0, 320.0, 313.0, 314.0, 291.0, 288.0, 291.0, 291.0, 268.0, 265.0, 292.0, 287.0, 292.0, 290.0, 284.0, 295.0, 299.0, 280.0, 287.0, 292.0, 285.0, 285.0, 289.0, 290.0, 311.0, 319.0, 314.0, 316.0, 288.0, 291.0, 256.0, 271.0, 292.0, 287.0, 286.0, 296.0, 297.0, 290.0, 293.0, 289.0, 289.0, 287.0, 279.0, 285.0, 309.0, 318.0, 288.0, 285.0, 289.0, 287.0, 292.0, 290.0, 279.0, 303.0, 283.0, 293.0, 289.0, 290.0, 296.0, 288.0, 287.0, 289.0, 317.0, 310.0, 288.0, 296.0, 282.0, 294.0, 288.0, 291.0, 288.0, 302.0, 291.0, 291.0, 289.0, 284.0, 288.0, 291.0, 291.0, 285.0, 288.0, 291.0, 290.0, 283.0, 277.0, 302.0, 286.0, 296.0, 293.0, 289.0, 289.0, 287.0, 286.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7003150056262335, "mean_inference_ms": 1.251689765304647, "mean_action_processing_ms": 0.13412905696629593, "mean_env_wait_ms": 0.8430429668352946, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 390.0, "episode_reward_mean": 578.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 192.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 289.4}, "hist_stats": {"episode_reward": [576.0, 576.0, 630.0, 576.0, 582.0, 579.0, 579.0, 576.0, 582.0, 582.0, 630.0, 530.0, 579.0, 576.0, 630.0, 579.0, 582.0, 579.0, 582.0, 576.0, 579.0, 573.0, 582.0, 570.0, 519.0, 525.0, 522.0, 582.0, 582.0, 579.0, 581.0, 525.0, 587.0, 576.0, 582.0, 522.0, 584.0, 576.0, 579.0, 516.0, 390.0, 630.0, 633.0, 579.0, 582.0, 582.0, 567.0, 579.0, 579.0, 630.0, 630.0, 579.0, 584.0, 636.0, 627.0, 579.0, 582.0, 533.0, 579.0, 582.0, 579.0, 579.0, 579.0, 570.0, 579.0, 630.0, 630.0, 579.0, 527.0, 579.0, 582.0, 587.0, 582.0, 576.0, 564.0, 627.0, 573.0, 576.0, 582.0, 582.0, 576.0, 579.0, 584.0, 576.0, 627.0, 584.0, 576.0, 579.0, 590.0, 582.0, 573.0, 579.0, 576.0, 579.0, 573.0, 579.0, 582.0, 582.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 286.0, 283.0, 293.0, 324.0, 306.0, 288.0, 288.0, 291.0, 291.0, 296.0, 283.0, 288.0, 291.0, 288.0, 288.0, 287.0, 295.0, 287.0, 295.0, 317.0, 313.0, 267.0, 263.0, 287.0, 292.0, 288.0, 288.0, 316.0, 314.0, 290.0, 289.0, 292.0, 290.0, 288.0, 291.0, 288.0, 294.0, 291.0, 285.0, 293.0, 286.0, 284.0, 289.0, 289.0, 293.0, 283.0, 287.0, 262.0, 257.0, 268.0, 257.0, 263.0, 259.0, 293.0, 289.0, 291.0, 291.0, 284.0, 295.0, 292.0, 289.0, 262.0, 263.0, 296.0, 291.0, 283.0, 293.0, 292.0, 290.0, 263.0, 259.0, 293.0, 291.0, 288.0, 288.0, 286.0, 293.0, 259.0, 257.0, 198.0, 192.0, 322.0, 308.0, 315.0, 318.0, 289.0, 290.0, 285.0, 297.0, 293.0, 289.0, 288.0, 279.0, 297.0, 282.0, 292.0, 287.0, 315.0, 315.0, 316.0, 314.0, 284.0, 295.0, 290.0, 294.0, 316.0, 320.0, 313.0, 314.0, 291.0, 288.0, 291.0, 291.0, 268.0, 265.0, 292.0, 287.0, 292.0, 290.0, 284.0, 295.0, 299.0, 280.0, 287.0, 292.0, 285.0, 285.0, 289.0, 290.0, 311.0, 319.0, 314.0, 316.0, 288.0, 291.0, 256.0, 271.0, 292.0, 287.0, 286.0, 296.0, 297.0, 290.0, 293.0, 289.0, 289.0, 287.0, 279.0, 285.0, 309.0, 318.0, 288.0, 285.0, 289.0, 287.0, 292.0, 290.0, 279.0, 303.0, 283.0, 293.0, 289.0, 290.0, 296.0, 288.0, 287.0, 289.0, 317.0, 310.0, 288.0, 296.0, 282.0, 294.0, 288.0, 291.0, 288.0, 302.0, 291.0, 291.0, 289.0, 284.0, 288.0, 291.0, 291.0, 285.0, 288.0, 291.0, 290.0, 283.0, 277.0, 302.0, 286.0, 296.0, 293.0, 289.0, 289.0, 287.0, 286.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7003150056262335, "mean_inference_ms": 1.251689765304647, "mean_action_processing_ms": 0.13412905696629593, "mean_env_wait_ms": 0.8430429668352946, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8550400, "num_agent_steps_trained": 8550400, "num_env_steps_sampled": 4275200, "num_env_steps_trained": 4275200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4275200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8550400, "timers": {"training_iteration_time_ms": 3627.686, "learn_time_ms": 1120.74, "learn_throughput": 11421.03, "synch_weights_time_ms": 13.773}, "counters": {"num_env_steps_sampled": 4275200, "num_env_steps_trained": 4275200, "num_agent_steps_sampled": 8550400, "num_agent_steps_trained": 8550400}, "done": false, "episodes_total": 10688, "training_iteration": 334, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-10", "timestamp": 1666581730, "time_this_iter_s": 3.639566421508789, "time_total_s": 1281.6431787014008, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1281.6431787014008, "timesteps_since_restore": 0, "iterations_since_restore": 334, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.48, "ram_util_percent": 10.64}}
+{"custom_metrics": {"sparse_reward_mean": 201.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 179.45, "shaped_reward_min": 150, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.7, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.4, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.54, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.22, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.38, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.11, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.62, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.33, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.38, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.11, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.38, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.11, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00028831925010308623, "policy_loss": -0.0006428910419344902, "vf_loss": 7.709566116333008, "vf_explained_var": 0.5375313758850098, "kl": 0.002994687994942069, "entropy": 0.8327674865722656, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4288000, "num_env_steps_trained": 4288000, "num_agent_steps_sampled": 8576000, "num_agent_steps_trained": 8576000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 390.0, "episode_reward_mean": 581.45, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 192.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 290.725}, "custom_metrics": {"sparse_reward_mean": 201.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 179.45, "shaped_reward_min": 150, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.7, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.4, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.54, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.22, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.38, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.11, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.62, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.33, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.38, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.11, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.38, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.11, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 576.0, 582.0, 522.0, 584.0, 576.0, 579.0, 516.0, 390.0, 630.0, 633.0, 579.0, 582.0, 582.0, 567.0, 579.0, 579.0, 630.0, 630.0, 579.0, 584.0, 636.0, 627.0, 579.0, 582.0, 533.0, 579.0, 582.0, 579.0, 579.0, 579.0, 570.0, 579.0, 630.0, 630.0, 579.0, 527.0, 579.0, 582.0, 587.0, 582.0, 576.0, 564.0, 627.0, 573.0, 576.0, 582.0, 582.0, 576.0, 579.0, 584.0, 576.0, 627.0, 584.0, 576.0, 579.0, 590.0, 582.0, 573.0, 579.0, 576.0, 579.0, 573.0, 579.0, 582.0, 582.0, 576.0, 582.0, 576.0, 576.0, 579.0, 630.0, 582.0, 579.0, 587.0, 630.0, 579.0, 582.0, 573.0, 573.0, 576.0, 573.0, 573.0, 576.0, 525.0, 582.0, 525.0, 624.0, 630.0, 630.0, 582.0, 582.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 525.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 291.0, 283.0, 293.0, 292.0, 290.0, 263.0, 259.0, 293.0, 291.0, 288.0, 288.0, 286.0, 293.0, 259.0, 257.0, 198.0, 192.0, 322.0, 308.0, 315.0, 318.0, 289.0, 290.0, 285.0, 297.0, 293.0, 289.0, 288.0, 279.0, 297.0, 282.0, 292.0, 287.0, 315.0, 315.0, 316.0, 314.0, 284.0, 295.0, 290.0, 294.0, 316.0, 320.0, 313.0, 314.0, 291.0, 288.0, 291.0, 291.0, 268.0, 265.0, 292.0, 287.0, 292.0, 290.0, 284.0, 295.0, 299.0, 280.0, 287.0, 292.0, 285.0, 285.0, 289.0, 290.0, 311.0, 319.0, 314.0, 316.0, 288.0, 291.0, 256.0, 271.0, 292.0, 287.0, 286.0, 296.0, 297.0, 290.0, 293.0, 289.0, 289.0, 287.0, 279.0, 285.0, 309.0, 318.0, 288.0, 285.0, 289.0, 287.0, 292.0, 290.0, 279.0, 303.0, 283.0, 293.0, 289.0, 290.0, 296.0, 288.0, 287.0, 289.0, 317.0, 310.0, 288.0, 296.0, 282.0, 294.0, 288.0, 291.0, 288.0, 302.0, 291.0, 291.0, 289.0, 284.0, 288.0, 291.0, 291.0, 285.0, 288.0, 291.0, 290.0, 283.0, 277.0, 302.0, 286.0, 296.0, 293.0, 289.0, 289.0, 287.0, 286.0, 296.0, 285.0, 291.0, 292.0, 284.0, 288.0, 291.0, 319.0, 311.0, 286.0, 296.0, 295.0, 284.0, 290.0, 297.0, 321.0, 309.0, 288.0, 291.0, 287.0, 295.0, 288.0, 285.0, 279.0, 294.0, 285.0, 291.0, 286.0, 287.0, 290.0, 283.0, 287.0, 289.0, 264.0, 261.0, 288.0, 294.0, 261.0, 264.0, 314.0, 310.0, 312.0, 318.0, 313.0, 317.0, 296.0, 286.0, 292.0, 290.0, 280.0, 296.0, 293.0, 289.0, 290.0, 289.0, 289.0, 290.0, 292.0, 290.0, 289.0, 290.0, 263.0, 262.0, 323.0, 316.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.700256465783454, "mean_inference_ms": 1.2515049690584337, "mean_action_processing_ms": 0.13412077160613753, "mean_env_wait_ms": 0.8429494957222775, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 390.0, "episode_reward_mean": 581.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 192.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 290.725}, "hist_stats": {"episode_reward": [587.0, 576.0, 582.0, 522.0, 584.0, 576.0, 579.0, 516.0, 390.0, 630.0, 633.0, 579.0, 582.0, 582.0, 567.0, 579.0, 579.0, 630.0, 630.0, 579.0, 584.0, 636.0, 627.0, 579.0, 582.0, 533.0, 579.0, 582.0, 579.0, 579.0, 579.0, 570.0, 579.0, 630.0, 630.0, 579.0, 527.0, 579.0, 582.0, 587.0, 582.0, 576.0, 564.0, 627.0, 573.0, 576.0, 582.0, 582.0, 576.0, 579.0, 584.0, 576.0, 627.0, 584.0, 576.0, 579.0, 590.0, 582.0, 573.0, 579.0, 576.0, 579.0, 573.0, 579.0, 582.0, 582.0, 576.0, 582.0, 576.0, 576.0, 579.0, 630.0, 582.0, 579.0, 587.0, 630.0, 579.0, 582.0, 573.0, 573.0, 576.0, 573.0, 573.0, 576.0, 525.0, 582.0, 525.0, 624.0, 630.0, 630.0, 582.0, 582.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 525.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 291.0, 283.0, 293.0, 292.0, 290.0, 263.0, 259.0, 293.0, 291.0, 288.0, 288.0, 286.0, 293.0, 259.0, 257.0, 198.0, 192.0, 322.0, 308.0, 315.0, 318.0, 289.0, 290.0, 285.0, 297.0, 293.0, 289.0, 288.0, 279.0, 297.0, 282.0, 292.0, 287.0, 315.0, 315.0, 316.0, 314.0, 284.0, 295.0, 290.0, 294.0, 316.0, 320.0, 313.0, 314.0, 291.0, 288.0, 291.0, 291.0, 268.0, 265.0, 292.0, 287.0, 292.0, 290.0, 284.0, 295.0, 299.0, 280.0, 287.0, 292.0, 285.0, 285.0, 289.0, 290.0, 311.0, 319.0, 314.0, 316.0, 288.0, 291.0, 256.0, 271.0, 292.0, 287.0, 286.0, 296.0, 297.0, 290.0, 293.0, 289.0, 289.0, 287.0, 279.0, 285.0, 309.0, 318.0, 288.0, 285.0, 289.0, 287.0, 292.0, 290.0, 279.0, 303.0, 283.0, 293.0, 289.0, 290.0, 296.0, 288.0, 287.0, 289.0, 317.0, 310.0, 288.0, 296.0, 282.0, 294.0, 288.0, 291.0, 288.0, 302.0, 291.0, 291.0, 289.0, 284.0, 288.0, 291.0, 291.0, 285.0, 288.0, 291.0, 290.0, 283.0, 277.0, 302.0, 286.0, 296.0, 293.0, 289.0, 289.0, 287.0, 286.0, 296.0, 285.0, 291.0, 292.0, 284.0, 288.0, 291.0, 319.0, 311.0, 286.0, 296.0, 295.0, 284.0, 290.0, 297.0, 321.0, 309.0, 288.0, 291.0, 287.0, 295.0, 288.0, 285.0, 279.0, 294.0, 285.0, 291.0, 286.0, 287.0, 290.0, 283.0, 287.0, 289.0, 264.0, 261.0, 288.0, 294.0, 261.0, 264.0, 314.0, 310.0, 312.0, 318.0, 313.0, 317.0, 296.0, 286.0, 292.0, 290.0, 280.0, 296.0, 293.0, 289.0, 290.0, 289.0, 289.0, 290.0, 292.0, 290.0, 289.0, 290.0, 263.0, 262.0, 323.0, 316.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.700256465783454, "mean_inference_ms": 1.2515049690584337, "mean_action_processing_ms": 0.13412077160613753, "mean_env_wait_ms": 0.8429494957222775, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8576000, "num_agent_steps_trained": 8576000, "num_env_steps_sampled": 4288000, "num_env_steps_trained": 4288000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4288000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8576000, "timers": {"training_iteration_time_ms": 3597.338, "learn_time_ms": 1113.646, "learn_throughput": 11493.783, "synch_weights_time_ms": 14.092}, "counters": {"num_env_steps_sampled": 4288000, "num_env_steps_trained": 4288000, "num_agent_steps_sampled": 8576000, "num_agent_steps_trained": 8576000}, "done": false, "episodes_total": 10720, "training_iteration": 335, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-14", "timestamp": 1666581734, "time_this_iter_s": 3.7237629890441895, "time_total_s": 1285.366941690445, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1285.366941690445, "timesteps_since_restore": 0, "iterations_since_restore": 335, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.919999999999998, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.77, "shaped_reward_min": 96, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.82, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.18, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.66, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.02, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.43, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.93, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.55, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.23, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.21, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.43, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.93, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.43, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.93, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0048871831968426704, "policy_loss": 0.004516778513789177, "vf_loss": 7.847447395324707, "vf_explained_var": 0.5444375872612, "kl": 0.0027662317734211683, "entropy": 0.8286799192428589, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4300800, "num_env_steps_trained": 4300800, "num_agent_steps_sampled": 8601600, "num_agent_steps_trained": 8601600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 296.0, "episode_reward_mean": 578.37, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 144.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 289.185}, "custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.77, "shaped_reward_min": 96, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.82, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.18, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.66, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.02, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.43, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.93, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.55, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.23, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.21, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.43, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.93, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.43, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.93, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 630.0, 630.0, 579.0, 527.0, 579.0, 582.0, 587.0, 582.0, 576.0, 564.0, 627.0, 573.0, 576.0, 582.0, 582.0, 576.0, 579.0, 584.0, 576.0, 627.0, 584.0, 576.0, 579.0, 590.0, 582.0, 573.0, 579.0, 576.0, 579.0, 573.0, 579.0, 582.0, 582.0, 576.0, 582.0, 576.0, 576.0, 579.0, 630.0, 582.0, 579.0, 587.0, 630.0, 579.0, 582.0, 573.0, 573.0, 576.0, 573.0, 573.0, 576.0, 525.0, 582.0, 525.0, 624.0, 630.0, 630.0, 582.0, 582.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 525.0, 639.0, 587.0, 579.0, 573.0, 582.0, 579.0, 579.0, 579.0, 570.0, 630.0, 539.0, 582.0, 582.0, 579.0, 582.0, 582.0, 573.0, 536.0, 579.0, 579.0, 582.0, 296.0, 579.0, 582.0, 582.0, 579.0, 576.0, 579.0, 582.0, 576.0, 582.0, 530.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 290.0, 311.0, 319.0, 314.0, 316.0, 288.0, 291.0, 256.0, 271.0, 292.0, 287.0, 286.0, 296.0, 297.0, 290.0, 293.0, 289.0, 289.0, 287.0, 279.0, 285.0, 309.0, 318.0, 288.0, 285.0, 289.0, 287.0, 292.0, 290.0, 279.0, 303.0, 283.0, 293.0, 289.0, 290.0, 296.0, 288.0, 287.0, 289.0, 317.0, 310.0, 288.0, 296.0, 282.0, 294.0, 288.0, 291.0, 288.0, 302.0, 291.0, 291.0, 289.0, 284.0, 288.0, 291.0, 291.0, 285.0, 288.0, 291.0, 290.0, 283.0, 277.0, 302.0, 286.0, 296.0, 293.0, 289.0, 289.0, 287.0, 286.0, 296.0, 285.0, 291.0, 292.0, 284.0, 288.0, 291.0, 319.0, 311.0, 286.0, 296.0, 295.0, 284.0, 290.0, 297.0, 321.0, 309.0, 288.0, 291.0, 287.0, 295.0, 288.0, 285.0, 279.0, 294.0, 285.0, 291.0, 286.0, 287.0, 290.0, 283.0, 287.0, 289.0, 264.0, 261.0, 288.0, 294.0, 261.0, 264.0, 314.0, 310.0, 312.0, 318.0, 313.0, 317.0, 296.0, 286.0, 292.0, 290.0, 280.0, 296.0, 293.0, 289.0, 290.0, 289.0, 289.0, 290.0, 292.0, 290.0, 289.0, 290.0, 263.0, 262.0, 323.0, 316.0, 296.0, 291.0, 288.0, 291.0, 282.0, 291.0, 288.0, 294.0, 291.0, 288.0, 284.0, 295.0, 294.0, 285.0, 284.0, 286.0, 310.0, 320.0, 269.0, 270.0, 291.0, 291.0, 297.0, 285.0, 289.0, 290.0, 292.0, 290.0, 297.0, 285.0, 286.0, 287.0, 271.0, 265.0, 295.0, 284.0, 290.0, 289.0, 293.0, 289.0, 144.0, 152.0, 291.0, 288.0, 286.0, 296.0, 289.0, 293.0, 290.0, 289.0, 284.0, 292.0, 290.0, 289.0, 293.0, 289.0, 290.0, 286.0, 287.0, 295.0, 257.0, 273.0, 285.0, 302.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7002034651518352, "mean_inference_ms": 1.251341339209156, "mean_action_processing_ms": 0.13411410299742965, "mean_env_wait_ms": 0.8428665021811821, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 296.0, "episode_reward_mean": 578.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 144.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 289.185}, "hist_stats": {"episode_reward": [579.0, 630.0, 630.0, 579.0, 527.0, 579.0, 582.0, 587.0, 582.0, 576.0, 564.0, 627.0, 573.0, 576.0, 582.0, 582.0, 576.0, 579.0, 584.0, 576.0, 627.0, 584.0, 576.0, 579.0, 590.0, 582.0, 573.0, 579.0, 576.0, 579.0, 573.0, 579.0, 582.0, 582.0, 576.0, 582.0, 576.0, 576.0, 579.0, 630.0, 582.0, 579.0, 587.0, 630.0, 579.0, 582.0, 573.0, 573.0, 576.0, 573.0, 573.0, 576.0, 525.0, 582.0, 525.0, 624.0, 630.0, 630.0, 582.0, 582.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 525.0, 639.0, 587.0, 579.0, 573.0, 582.0, 579.0, 579.0, 579.0, 570.0, 630.0, 539.0, 582.0, 582.0, 579.0, 582.0, 582.0, 573.0, 536.0, 579.0, 579.0, 582.0, 296.0, 579.0, 582.0, 582.0, 579.0, 576.0, 579.0, 582.0, 576.0, 582.0, 530.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 290.0, 311.0, 319.0, 314.0, 316.0, 288.0, 291.0, 256.0, 271.0, 292.0, 287.0, 286.0, 296.0, 297.0, 290.0, 293.0, 289.0, 289.0, 287.0, 279.0, 285.0, 309.0, 318.0, 288.0, 285.0, 289.0, 287.0, 292.0, 290.0, 279.0, 303.0, 283.0, 293.0, 289.0, 290.0, 296.0, 288.0, 287.0, 289.0, 317.0, 310.0, 288.0, 296.0, 282.0, 294.0, 288.0, 291.0, 288.0, 302.0, 291.0, 291.0, 289.0, 284.0, 288.0, 291.0, 291.0, 285.0, 288.0, 291.0, 290.0, 283.0, 277.0, 302.0, 286.0, 296.0, 293.0, 289.0, 289.0, 287.0, 286.0, 296.0, 285.0, 291.0, 292.0, 284.0, 288.0, 291.0, 319.0, 311.0, 286.0, 296.0, 295.0, 284.0, 290.0, 297.0, 321.0, 309.0, 288.0, 291.0, 287.0, 295.0, 288.0, 285.0, 279.0, 294.0, 285.0, 291.0, 286.0, 287.0, 290.0, 283.0, 287.0, 289.0, 264.0, 261.0, 288.0, 294.0, 261.0, 264.0, 314.0, 310.0, 312.0, 318.0, 313.0, 317.0, 296.0, 286.0, 292.0, 290.0, 280.0, 296.0, 293.0, 289.0, 290.0, 289.0, 289.0, 290.0, 292.0, 290.0, 289.0, 290.0, 263.0, 262.0, 323.0, 316.0, 296.0, 291.0, 288.0, 291.0, 282.0, 291.0, 288.0, 294.0, 291.0, 288.0, 284.0, 295.0, 294.0, 285.0, 284.0, 286.0, 310.0, 320.0, 269.0, 270.0, 291.0, 291.0, 297.0, 285.0, 289.0, 290.0, 292.0, 290.0, 297.0, 285.0, 286.0, 287.0, 271.0, 265.0, 295.0, 284.0, 290.0, 289.0, 293.0, 289.0, 144.0, 152.0, 291.0, 288.0, 286.0, 296.0, 289.0, 293.0, 290.0, 289.0, 284.0, 292.0, 290.0, 289.0, 293.0, 289.0, 290.0, 286.0, 287.0, 295.0, 257.0, 273.0, 285.0, 302.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7002034651518352, "mean_inference_ms": 1.251341339209156, "mean_action_processing_ms": 0.13411410299742965, "mean_env_wait_ms": 0.8428665021811821, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8601600, "num_agent_steps_trained": 8601600, "num_env_steps_sampled": 4300800, "num_env_steps_trained": 4300800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4300800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8601600, "timers": {"training_iteration_time_ms": 3600.528, "learn_time_ms": 1117.896, "learn_throughput": 11450.085, "synch_weights_time_ms": 13.917}, "counters": {"num_env_steps_sampled": 4300800, "num_env_steps_trained": 4300800, "num_agent_steps_sampled": 8601600, "num_agent_steps_trained": 8601600}, "done": false, "episodes_total": 10752, "training_iteration": 336, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-18", "timestamp": 1666581738, "time_this_iter_s": 3.7050204277038574, "time_total_s": 1289.0719621181488, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1289.0719621181488, "timesteps_since_restore": 0, "iterations_since_restore": 336, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.96666666666667, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 178.23, "shaped_reward_min": 93, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.25, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.63, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.1, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.85, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.34, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.85, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.34, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.85, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.34, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002555068116635084, "policy_loss": -0.002903904300183058, "vf_loss": 7.657268524169922, "vf_explained_var": 0.5829470157623291, "kl": 0.002286064438521862, "entropy": 0.8337796926498413, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4313600, "num_env_steps_trained": 4313600, "num_agent_steps_sampled": 8627200, "num_agent_steps_trained": 8627200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 253.0, "episode_reward_mean": 574.63, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 124.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 287.315}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 178.23, "shaped_reward_min": 93, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.25, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.63, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.1, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.85, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.34, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.85, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.34, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.85, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.34, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 576.0, 582.0, 576.0, 576.0, 579.0, 630.0, 582.0, 579.0, 587.0, 630.0, 579.0, 582.0, 573.0, 573.0, 576.0, 573.0, 573.0, 576.0, 525.0, 582.0, 525.0, 624.0, 630.0, 630.0, 582.0, 582.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 525.0, 639.0, 587.0, 579.0, 573.0, 582.0, 579.0, 579.0, 579.0, 570.0, 630.0, 539.0, 582.0, 582.0, 579.0, 582.0, 582.0, 573.0, 536.0, 579.0, 579.0, 582.0, 296.0, 579.0, 582.0, 582.0, 579.0, 576.0, 579.0, 582.0, 576.0, 582.0, 530.0, 587.0, 584.0, 630.0, 584.0, 582.0, 584.0, 582.0, 582.0, 579.0, 582.0, 582.0, 582.0, 582.0, 253.0, 576.0, 584.0, 579.0, 579.0, 579.0, 582.0, 581.0, 576.0, 576.0, 579.0, 576.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 296.0, 293.0, 289.0, 289.0, 287.0, 286.0, 296.0, 285.0, 291.0, 292.0, 284.0, 288.0, 291.0, 319.0, 311.0, 286.0, 296.0, 295.0, 284.0, 290.0, 297.0, 321.0, 309.0, 288.0, 291.0, 287.0, 295.0, 288.0, 285.0, 279.0, 294.0, 285.0, 291.0, 286.0, 287.0, 290.0, 283.0, 287.0, 289.0, 264.0, 261.0, 288.0, 294.0, 261.0, 264.0, 314.0, 310.0, 312.0, 318.0, 313.0, 317.0, 296.0, 286.0, 292.0, 290.0, 280.0, 296.0, 293.0, 289.0, 290.0, 289.0, 289.0, 290.0, 292.0, 290.0, 289.0, 290.0, 263.0, 262.0, 323.0, 316.0, 296.0, 291.0, 288.0, 291.0, 282.0, 291.0, 288.0, 294.0, 291.0, 288.0, 284.0, 295.0, 294.0, 285.0, 284.0, 286.0, 310.0, 320.0, 269.0, 270.0, 291.0, 291.0, 297.0, 285.0, 289.0, 290.0, 292.0, 290.0, 297.0, 285.0, 286.0, 287.0, 271.0, 265.0, 295.0, 284.0, 290.0, 289.0, 293.0, 289.0, 144.0, 152.0, 291.0, 288.0, 286.0, 296.0, 289.0, 293.0, 290.0, 289.0, 284.0, 292.0, 290.0, 289.0, 293.0, 289.0, 290.0, 286.0, 287.0, 295.0, 257.0, 273.0, 285.0, 302.0, 290.0, 294.0, 311.0, 319.0, 289.0, 295.0, 290.0, 292.0, 287.0, 297.0, 294.0, 288.0, 291.0, 291.0, 289.0, 290.0, 291.0, 291.0, 284.0, 298.0, 290.0, 292.0, 290.0, 292.0, 129.0, 124.0, 282.0, 294.0, 285.0, 299.0, 282.0, 297.0, 287.0, 292.0, 286.0, 293.0, 287.0, 295.0, 296.0, 285.0, 291.0, 285.0, 291.0, 285.0, 291.0, 288.0, 280.0, 296.0, 290.0, 292.0, 289.0, 293.0, 284.0, 295.0, 285.0, 294.0, 292.0, 287.0, 290.0, 292.0, 292.0, 287.0, 280.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7001310320699491, "mean_inference_ms": 1.2511716681710092, "mean_action_processing_ms": 0.13410539359749385, "mean_env_wait_ms": 0.8427664592330052, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 253.0, "episode_reward_mean": 574.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 124.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 287.315}, "hist_stats": {"episode_reward": [582.0, 582.0, 576.0, 582.0, 576.0, 576.0, 579.0, 630.0, 582.0, 579.0, 587.0, 630.0, 579.0, 582.0, 573.0, 573.0, 576.0, 573.0, 573.0, 576.0, 525.0, 582.0, 525.0, 624.0, 630.0, 630.0, 582.0, 582.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 525.0, 639.0, 587.0, 579.0, 573.0, 582.0, 579.0, 579.0, 579.0, 570.0, 630.0, 539.0, 582.0, 582.0, 579.0, 582.0, 582.0, 573.0, 536.0, 579.0, 579.0, 582.0, 296.0, 579.0, 582.0, 582.0, 579.0, 576.0, 579.0, 582.0, 576.0, 582.0, 530.0, 587.0, 584.0, 630.0, 584.0, 582.0, 584.0, 582.0, 582.0, 579.0, 582.0, 582.0, 582.0, 582.0, 253.0, 576.0, 584.0, 579.0, 579.0, 579.0, 582.0, 581.0, 576.0, 576.0, 579.0, 576.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 296.0, 293.0, 289.0, 289.0, 287.0, 286.0, 296.0, 285.0, 291.0, 292.0, 284.0, 288.0, 291.0, 319.0, 311.0, 286.0, 296.0, 295.0, 284.0, 290.0, 297.0, 321.0, 309.0, 288.0, 291.0, 287.0, 295.0, 288.0, 285.0, 279.0, 294.0, 285.0, 291.0, 286.0, 287.0, 290.0, 283.0, 287.0, 289.0, 264.0, 261.0, 288.0, 294.0, 261.0, 264.0, 314.0, 310.0, 312.0, 318.0, 313.0, 317.0, 296.0, 286.0, 292.0, 290.0, 280.0, 296.0, 293.0, 289.0, 290.0, 289.0, 289.0, 290.0, 292.0, 290.0, 289.0, 290.0, 263.0, 262.0, 323.0, 316.0, 296.0, 291.0, 288.0, 291.0, 282.0, 291.0, 288.0, 294.0, 291.0, 288.0, 284.0, 295.0, 294.0, 285.0, 284.0, 286.0, 310.0, 320.0, 269.0, 270.0, 291.0, 291.0, 297.0, 285.0, 289.0, 290.0, 292.0, 290.0, 297.0, 285.0, 286.0, 287.0, 271.0, 265.0, 295.0, 284.0, 290.0, 289.0, 293.0, 289.0, 144.0, 152.0, 291.0, 288.0, 286.0, 296.0, 289.0, 293.0, 290.0, 289.0, 284.0, 292.0, 290.0, 289.0, 293.0, 289.0, 290.0, 286.0, 287.0, 295.0, 257.0, 273.0, 285.0, 302.0, 290.0, 294.0, 311.0, 319.0, 289.0, 295.0, 290.0, 292.0, 287.0, 297.0, 294.0, 288.0, 291.0, 291.0, 289.0, 290.0, 291.0, 291.0, 284.0, 298.0, 290.0, 292.0, 290.0, 292.0, 129.0, 124.0, 282.0, 294.0, 285.0, 299.0, 282.0, 297.0, 287.0, 292.0, 286.0, 293.0, 287.0, 295.0, 296.0, 285.0, 291.0, 285.0, 291.0, 285.0, 291.0, 288.0, 280.0, 296.0, 290.0, 292.0, 289.0, 293.0, 284.0, 295.0, 285.0, 294.0, 292.0, 287.0, 290.0, 292.0, 292.0, 287.0, 280.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7001310320699491, "mean_inference_ms": 1.2511716681710092, "mean_action_processing_ms": 0.13410539359749385, "mean_env_wait_ms": 0.8427664592330052, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8627200, "num_agent_steps_trained": 8627200, "num_env_steps_sampled": 4313600, "num_env_steps_trained": 4313600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4313600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8627200, "timers": {"training_iteration_time_ms": 3599.436, "learn_time_ms": 1126.026, "learn_throughput": 11367.408, "synch_weights_time_ms": 13.268}, "counters": {"num_env_steps_sampled": 4313600, "num_env_steps_trained": 4313600, "num_agent_steps_sampled": 8627200, "num_agent_steps_trained": 8627200}, "done": false, "episodes_total": 10784, "training_iteration": 337, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-22", "timestamp": 1666581742, "time_this_iter_s": 3.647874355316162, "time_total_s": 1292.719836473465, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1292.719836473465, "timesteps_since_restore": 0, "iterations_since_restore": 337, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.46, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 178.53, "shaped_reward_min": 93, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.56, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.41, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.39, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.09, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.16, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.07, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.59, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.25, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.22, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.16, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.07, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.16, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.07, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0018456100951880217, "policy_loss": -0.002207729499787092, "vf_loss": 7.778796672821045, "vf_explained_var": 0.5378071069717407, "kl": 0.0025057008024305105, "entropy": 0.8315178155899048, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4326400, "num_env_steps_trained": 4326400, "num_agent_steps_sampled": 8652800, "num_agent_steps_trained": 8652800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 253.0, "episode_reward_mean": 574.53, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 124.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 287.265}, "custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 178.53, "shaped_reward_min": 93, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.56, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.41, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.39, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.09, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.16, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.07, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.59, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.25, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.22, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.16, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.07, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.16, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.07, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 525.0, 639.0, 587.0, 579.0, 573.0, 582.0, 579.0, 579.0, 579.0, 570.0, 630.0, 539.0, 582.0, 582.0, 579.0, 582.0, 582.0, 573.0, 536.0, 579.0, 579.0, 582.0, 296.0, 579.0, 582.0, 582.0, 579.0, 576.0, 579.0, 582.0, 576.0, 582.0, 530.0, 587.0, 584.0, 630.0, 584.0, 582.0, 584.0, 582.0, 582.0, 579.0, 582.0, 582.0, 582.0, 582.0, 253.0, 576.0, 584.0, 579.0, 579.0, 579.0, 582.0, 581.0, 576.0, 576.0, 579.0, 576.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 576.0, 627.0, 582.0, 587.0, 636.0, 579.0, 587.0, 587.0, 630.0, 516.0, 579.0, 582.0, 582.0, 573.0, 579.0, 522.0, 582.0, 582.0, 579.0, 633.0, 581.0, 579.0, 633.0, 536.0, 639.0, 570.0, 579.0, 579.0, 525.0, 576.0, 576.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 290.0, 289.0, 290.0, 263.0, 262.0, 323.0, 316.0, 296.0, 291.0, 288.0, 291.0, 282.0, 291.0, 288.0, 294.0, 291.0, 288.0, 284.0, 295.0, 294.0, 285.0, 284.0, 286.0, 310.0, 320.0, 269.0, 270.0, 291.0, 291.0, 297.0, 285.0, 289.0, 290.0, 292.0, 290.0, 297.0, 285.0, 286.0, 287.0, 271.0, 265.0, 295.0, 284.0, 290.0, 289.0, 293.0, 289.0, 144.0, 152.0, 291.0, 288.0, 286.0, 296.0, 289.0, 293.0, 290.0, 289.0, 284.0, 292.0, 290.0, 289.0, 293.0, 289.0, 290.0, 286.0, 287.0, 295.0, 257.0, 273.0, 285.0, 302.0, 290.0, 294.0, 311.0, 319.0, 289.0, 295.0, 290.0, 292.0, 287.0, 297.0, 294.0, 288.0, 291.0, 291.0, 289.0, 290.0, 291.0, 291.0, 284.0, 298.0, 290.0, 292.0, 290.0, 292.0, 129.0, 124.0, 282.0, 294.0, 285.0, 299.0, 282.0, 297.0, 287.0, 292.0, 286.0, 293.0, 287.0, 295.0, 296.0, 285.0, 291.0, 285.0, 291.0, 285.0, 291.0, 288.0, 280.0, 296.0, 290.0, 292.0, 289.0, 293.0, 284.0, 295.0, 285.0, 294.0, 292.0, 287.0, 290.0, 292.0, 292.0, 287.0, 280.0, 296.0, 309.0, 318.0, 290.0, 292.0, 290.0, 297.0, 321.0, 315.0, 286.0, 293.0, 285.0, 302.0, 290.0, 297.0, 314.0, 316.0, 258.0, 258.0, 294.0, 285.0, 287.0, 295.0, 291.0, 291.0, 283.0, 290.0, 286.0, 293.0, 254.0, 268.0, 291.0, 291.0, 289.0, 293.0, 291.0, 288.0, 314.0, 319.0, 300.0, 281.0, 286.0, 293.0, 319.0, 314.0, 267.0, 269.0, 319.0, 320.0, 289.0, 281.0, 293.0, 286.0, 279.0, 300.0, 259.0, 266.0, 285.0, 291.0, 287.0, 289.0, 285.0, 291.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7000574987189117, "mean_inference_ms": 1.2509881997334336, "mean_action_processing_ms": 0.13409452514616124, "mean_env_wait_ms": 0.842656773384128, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 253.0, "episode_reward_mean": 574.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 124.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 287.265}, "hist_stats": {"episode_reward": [582.0, 579.0, 525.0, 639.0, 587.0, 579.0, 573.0, 582.0, 579.0, 579.0, 579.0, 570.0, 630.0, 539.0, 582.0, 582.0, 579.0, 582.0, 582.0, 573.0, 536.0, 579.0, 579.0, 582.0, 296.0, 579.0, 582.0, 582.0, 579.0, 576.0, 579.0, 582.0, 576.0, 582.0, 530.0, 587.0, 584.0, 630.0, 584.0, 582.0, 584.0, 582.0, 582.0, 579.0, 582.0, 582.0, 582.0, 582.0, 253.0, 576.0, 584.0, 579.0, 579.0, 579.0, 582.0, 581.0, 576.0, 576.0, 579.0, 576.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 576.0, 627.0, 582.0, 587.0, 636.0, 579.0, 587.0, 587.0, 630.0, 516.0, 579.0, 582.0, 582.0, 573.0, 579.0, 522.0, 582.0, 582.0, 579.0, 633.0, 581.0, 579.0, 633.0, 536.0, 639.0, 570.0, 579.0, 579.0, 525.0, 576.0, 576.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 290.0, 289.0, 290.0, 263.0, 262.0, 323.0, 316.0, 296.0, 291.0, 288.0, 291.0, 282.0, 291.0, 288.0, 294.0, 291.0, 288.0, 284.0, 295.0, 294.0, 285.0, 284.0, 286.0, 310.0, 320.0, 269.0, 270.0, 291.0, 291.0, 297.0, 285.0, 289.0, 290.0, 292.0, 290.0, 297.0, 285.0, 286.0, 287.0, 271.0, 265.0, 295.0, 284.0, 290.0, 289.0, 293.0, 289.0, 144.0, 152.0, 291.0, 288.0, 286.0, 296.0, 289.0, 293.0, 290.0, 289.0, 284.0, 292.0, 290.0, 289.0, 293.0, 289.0, 290.0, 286.0, 287.0, 295.0, 257.0, 273.0, 285.0, 302.0, 290.0, 294.0, 311.0, 319.0, 289.0, 295.0, 290.0, 292.0, 287.0, 297.0, 294.0, 288.0, 291.0, 291.0, 289.0, 290.0, 291.0, 291.0, 284.0, 298.0, 290.0, 292.0, 290.0, 292.0, 129.0, 124.0, 282.0, 294.0, 285.0, 299.0, 282.0, 297.0, 287.0, 292.0, 286.0, 293.0, 287.0, 295.0, 296.0, 285.0, 291.0, 285.0, 291.0, 285.0, 291.0, 288.0, 280.0, 296.0, 290.0, 292.0, 289.0, 293.0, 284.0, 295.0, 285.0, 294.0, 292.0, 287.0, 290.0, 292.0, 292.0, 287.0, 280.0, 296.0, 309.0, 318.0, 290.0, 292.0, 290.0, 297.0, 321.0, 315.0, 286.0, 293.0, 285.0, 302.0, 290.0, 297.0, 314.0, 316.0, 258.0, 258.0, 294.0, 285.0, 287.0, 295.0, 291.0, 291.0, 283.0, 290.0, 286.0, 293.0, 254.0, 268.0, 291.0, 291.0, 289.0, 293.0, 291.0, 288.0, 314.0, 319.0, 300.0, 281.0, 286.0, 293.0, 319.0, 314.0, 267.0, 269.0, 319.0, 320.0, 289.0, 281.0, 293.0, 286.0, 279.0, 300.0, 259.0, 266.0, 285.0, 291.0, 287.0, 289.0, 285.0, 291.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7000574987189117, "mean_inference_ms": 1.2509881997334336, "mean_action_processing_ms": 0.13409452514616124, "mean_env_wait_ms": 0.842656773384128, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8652800, "num_agent_steps_trained": 8652800, "num_env_steps_sampled": 4326400, "num_env_steps_trained": 4326400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4326400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8652800, "timers": {"training_iteration_time_ms": 3606.7, "learn_time_ms": 1130.174, "learn_throughput": 11325.694, "synch_weights_time_ms": 12.833}, "counters": {"num_env_steps_sampled": 4326400, "num_env_steps_trained": 4326400, "num_agent_steps_sampled": 8652800, "num_agent_steps_trained": 8652800}, "done": false, "episodes_total": 10816, "training_iteration": 338, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-26", "timestamp": 1666581746, "time_this_iter_s": 3.591172695159912, "time_total_s": 1296.3110091686249, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1296.3110091686249, "timesteps_since_restore": 0, "iterations_since_restore": 338, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.73333333333333, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 195.2, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 176.04, "shaped_reward_min": 17, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.16, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.38, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.0, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.01, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.95, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.58, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.8, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.12, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.95, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.95, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00020561664132401347, "policy_loss": -0.00017668434884399176, "vf_loss": 7.972438812255859, "vf_explained_var": 0.5549718141555786, "kl": 0.004623632878065109, "entropy": 0.8298848867416382, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4339200, "num_env_steps_trained": 4339200, "num_agent_steps_sampled": 8678400, "num_agent_steps_trained": 8678400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 17.0, "episode_reward_mean": 566.44, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.22}, "custom_metrics": {"sparse_reward_mean": 195.2, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 176.04, "shaped_reward_min": 17, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.16, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.38, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.0, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.01, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.95, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.58, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.8, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.12, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.95, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.95, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 582.0, 530.0, 587.0, 584.0, 630.0, 584.0, 582.0, 584.0, 582.0, 582.0, 579.0, 582.0, 582.0, 582.0, 582.0, 253.0, 576.0, 584.0, 579.0, 579.0, 579.0, 582.0, 581.0, 576.0, 576.0, 579.0, 576.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 576.0, 627.0, 582.0, 587.0, 636.0, 579.0, 587.0, 587.0, 630.0, 516.0, 579.0, 582.0, 582.0, 573.0, 579.0, 522.0, 582.0, 582.0, 579.0, 633.0, 581.0, 579.0, 633.0, 536.0, 639.0, 570.0, 579.0, 579.0, 525.0, 576.0, 576.0, 576.0, 579.0, 17.0, 579.0, 573.0, 579.0, 587.0, 582.0, 522.0, 582.0, 573.0, 576.0, 516.0, 582.0, 573.0, 579.0, 536.0, 525.0, 527.0, 582.0, 539.0, 579.0, 579.0, 525.0, 519.0, 579.0, 582.0, 579.0, 587.0, 516.0, 579.0, 579.0, 570.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 286.0, 287.0, 295.0, 257.0, 273.0, 285.0, 302.0, 290.0, 294.0, 311.0, 319.0, 289.0, 295.0, 290.0, 292.0, 287.0, 297.0, 294.0, 288.0, 291.0, 291.0, 289.0, 290.0, 291.0, 291.0, 284.0, 298.0, 290.0, 292.0, 290.0, 292.0, 129.0, 124.0, 282.0, 294.0, 285.0, 299.0, 282.0, 297.0, 287.0, 292.0, 286.0, 293.0, 287.0, 295.0, 296.0, 285.0, 291.0, 285.0, 291.0, 285.0, 291.0, 288.0, 280.0, 296.0, 290.0, 292.0, 289.0, 293.0, 284.0, 295.0, 285.0, 294.0, 292.0, 287.0, 290.0, 292.0, 292.0, 287.0, 280.0, 296.0, 309.0, 318.0, 290.0, 292.0, 290.0, 297.0, 321.0, 315.0, 286.0, 293.0, 285.0, 302.0, 290.0, 297.0, 314.0, 316.0, 258.0, 258.0, 294.0, 285.0, 287.0, 295.0, 291.0, 291.0, 283.0, 290.0, 286.0, 293.0, 254.0, 268.0, 291.0, 291.0, 289.0, 293.0, 291.0, 288.0, 314.0, 319.0, 300.0, 281.0, 286.0, 293.0, 319.0, 314.0, 267.0, 269.0, 319.0, 320.0, 289.0, 281.0, 293.0, 286.0, 279.0, 300.0, 259.0, 266.0, 285.0, 291.0, 287.0, 289.0, 285.0, 291.0, 286.0, 293.0, 3.0, 14.0, 284.0, 295.0, 285.0, 288.0, 292.0, 287.0, 300.0, 287.0, 296.0, 286.0, 256.0, 266.0, 288.0, 294.0, 293.0, 280.0, 288.0, 288.0, 254.0, 262.0, 292.0, 290.0, 284.0, 289.0, 287.0, 292.0, 258.0, 278.0, 258.0, 267.0, 265.0, 262.0, 286.0, 296.0, 266.0, 273.0, 288.0, 291.0, 292.0, 287.0, 257.0, 268.0, 259.0, 260.0, 285.0, 294.0, 289.0, 293.0, 292.0, 287.0, 293.0, 294.0, 250.0, 266.0, 288.0, 291.0, 287.0, 292.0, 287.0, 283.0, 256.0, 266.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6999861085772966, "mean_inference_ms": 1.250801244318193, "mean_action_processing_ms": 0.1340835429209035, "mean_env_wait_ms": 0.8425414523041331, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 17.0, "episode_reward_mean": 566.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.22}, "hist_stats": {"episode_reward": [576.0, 582.0, 530.0, 587.0, 584.0, 630.0, 584.0, 582.0, 584.0, 582.0, 582.0, 579.0, 582.0, 582.0, 582.0, 582.0, 253.0, 576.0, 584.0, 579.0, 579.0, 579.0, 582.0, 581.0, 576.0, 576.0, 579.0, 576.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 576.0, 627.0, 582.0, 587.0, 636.0, 579.0, 587.0, 587.0, 630.0, 516.0, 579.0, 582.0, 582.0, 573.0, 579.0, 522.0, 582.0, 582.0, 579.0, 633.0, 581.0, 579.0, 633.0, 536.0, 639.0, 570.0, 579.0, 579.0, 525.0, 576.0, 576.0, 576.0, 579.0, 17.0, 579.0, 573.0, 579.0, 587.0, 582.0, 522.0, 582.0, 573.0, 576.0, 516.0, 582.0, 573.0, 579.0, 536.0, 525.0, 527.0, 582.0, 539.0, 579.0, 579.0, 525.0, 519.0, 579.0, 582.0, 579.0, 587.0, 516.0, 579.0, 579.0, 570.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 286.0, 287.0, 295.0, 257.0, 273.0, 285.0, 302.0, 290.0, 294.0, 311.0, 319.0, 289.0, 295.0, 290.0, 292.0, 287.0, 297.0, 294.0, 288.0, 291.0, 291.0, 289.0, 290.0, 291.0, 291.0, 284.0, 298.0, 290.0, 292.0, 290.0, 292.0, 129.0, 124.0, 282.0, 294.0, 285.0, 299.0, 282.0, 297.0, 287.0, 292.0, 286.0, 293.0, 287.0, 295.0, 296.0, 285.0, 291.0, 285.0, 291.0, 285.0, 291.0, 288.0, 280.0, 296.0, 290.0, 292.0, 289.0, 293.0, 284.0, 295.0, 285.0, 294.0, 292.0, 287.0, 290.0, 292.0, 292.0, 287.0, 280.0, 296.0, 309.0, 318.0, 290.0, 292.0, 290.0, 297.0, 321.0, 315.0, 286.0, 293.0, 285.0, 302.0, 290.0, 297.0, 314.0, 316.0, 258.0, 258.0, 294.0, 285.0, 287.0, 295.0, 291.0, 291.0, 283.0, 290.0, 286.0, 293.0, 254.0, 268.0, 291.0, 291.0, 289.0, 293.0, 291.0, 288.0, 314.0, 319.0, 300.0, 281.0, 286.0, 293.0, 319.0, 314.0, 267.0, 269.0, 319.0, 320.0, 289.0, 281.0, 293.0, 286.0, 279.0, 300.0, 259.0, 266.0, 285.0, 291.0, 287.0, 289.0, 285.0, 291.0, 286.0, 293.0, 3.0, 14.0, 284.0, 295.0, 285.0, 288.0, 292.0, 287.0, 300.0, 287.0, 296.0, 286.0, 256.0, 266.0, 288.0, 294.0, 293.0, 280.0, 288.0, 288.0, 254.0, 262.0, 292.0, 290.0, 284.0, 289.0, 287.0, 292.0, 258.0, 278.0, 258.0, 267.0, 265.0, 262.0, 286.0, 296.0, 266.0, 273.0, 288.0, 291.0, 292.0, 287.0, 257.0, 268.0, 259.0, 260.0, 285.0, 294.0, 289.0, 293.0, 292.0, 287.0, 293.0, 294.0, 250.0, 266.0, 288.0, 291.0, 287.0, 292.0, 287.0, 283.0, 256.0, 266.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6999861085772966, "mean_inference_ms": 1.250801244318193, "mean_action_processing_ms": 0.1340835429209035, "mean_env_wait_ms": 0.8425414523041331, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8678400, "num_agent_steps_trained": 8678400, "num_env_steps_sampled": 4339200, "num_env_steps_trained": 4339200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4339200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8678400, "timers": {"training_iteration_time_ms": 3626.87, "learn_time_ms": 1147.917, "learn_throughput": 11150.631, "synch_weights_time_ms": 12.909}, "counters": {"num_env_steps_sampled": 4339200, "num_env_steps_trained": 4339200, "num_agent_steps_sampled": 8678400, "num_agent_steps_trained": 8678400}, "done": false, "episodes_total": 10848, "training_iteration": 339, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-30", "timestamp": 1666581750, "time_this_iter_s": 3.7197470664978027, "time_total_s": 1300.0307562351227, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1300.0307562351227, "timesteps_since_restore": 0, "iterations_since_restore": 339, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.880000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 175.24, "shaped_reward_min": 17, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.89, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.6, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.76, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.33, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.62, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.1, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.71, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.23, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.19, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.62, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.1, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.62, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.1, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00031251792097464204, "policy_loss": -0.0006752209737896919, "vf_loss": 7.836665153503418, "vf_explained_var": 0.5419378280639648, "kl": 0.0021702891681343317, "entropy": 0.8419244289398193, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4352000, "num_env_steps_trained": 4352000, "num_agent_steps_sampled": 8704000, "num_agent_steps_trained": 8704000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 17.0, "episode_reward_mean": 566.04, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.02}, "custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 175.24, "shaped_reward_min": 17, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.89, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.6, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.76, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.33, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.62, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.1, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.71, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.23, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.19, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.62, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.1, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.62, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.1, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 579.0, 576.0, 627.0, 582.0, 587.0, 636.0, 579.0, 587.0, 587.0, 630.0, 516.0, 579.0, 582.0, 582.0, 573.0, 579.0, 522.0, 582.0, 582.0, 579.0, 633.0, 581.0, 579.0, 633.0, 536.0, 639.0, 570.0, 579.0, 579.0, 525.0, 576.0, 576.0, 576.0, 579.0, 17.0, 579.0, 573.0, 579.0, 587.0, 582.0, 522.0, 582.0, 573.0, 576.0, 516.0, 582.0, 573.0, 579.0, 536.0, 525.0, 527.0, 582.0, 539.0, 579.0, 579.0, 525.0, 519.0, 579.0, 582.0, 579.0, 587.0, 516.0, 579.0, 579.0, 570.0, 522.0, 582.0, 582.0, 522.0, 579.0, 573.0, 579.0, 516.0, 570.0, 576.0, 522.0, 582.0, 582.0, 573.0, 570.0, 582.0, 576.0, 530.0, 579.0, 582.0, 522.0, 630.0, 573.0, 579.0, 582.0, 579.0, 579.0, 576.0, 579.0, 573.0, 522.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 287.0, 290.0, 292.0, 292.0, 287.0, 280.0, 296.0, 309.0, 318.0, 290.0, 292.0, 290.0, 297.0, 321.0, 315.0, 286.0, 293.0, 285.0, 302.0, 290.0, 297.0, 314.0, 316.0, 258.0, 258.0, 294.0, 285.0, 287.0, 295.0, 291.0, 291.0, 283.0, 290.0, 286.0, 293.0, 254.0, 268.0, 291.0, 291.0, 289.0, 293.0, 291.0, 288.0, 314.0, 319.0, 300.0, 281.0, 286.0, 293.0, 319.0, 314.0, 267.0, 269.0, 319.0, 320.0, 289.0, 281.0, 293.0, 286.0, 279.0, 300.0, 259.0, 266.0, 285.0, 291.0, 287.0, 289.0, 285.0, 291.0, 286.0, 293.0, 3.0, 14.0, 284.0, 295.0, 285.0, 288.0, 292.0, 287.0, 300.0, 287.0, 296.0, 286.0, 256.0, 266.0, 288.0, 294.0, 293.0, 280.0, 288.0, 288.0, 254.0, 262.0, 292.0, 290.0, 284.0, 289.0, 287.0, 292.0, 258.0, 278.0, 258.0, 267.0, 265.0, 262.0, 286.0, 296.0, 266.0, 273.0, 288.0, 291.0, 292.0, 287.0, 257.0, 268.0, 259.0, 260.0, 285.0, 294.0, 289.0, 293.0, 292.0, 287.0, 293.0, 294.0, 250.0, 266.0, 288.0, 291.0, 287.0, 292.0, 287.0, 283.0, 256.0, 266.0, 293.0, 289.0, 288.0, 294.0, 262.0, 260.0, 288.0, 291.0, 284.0, 289.0, 291.0, 288.0, 254.0, 262.0, 279.0, 291.0, 288.0, 288.0, 261.0, 261.0, 290.0, 292.0, 289.0, 293.0, 288.0, 285.0, 282.0, 288.0, 286.0, 296.0, 290.0, 286.0, 262.0, 268.0, 283.0, 296.0, 288.0, 294.0, 261.0, 261.0, 311.0, 319.0, 288.0, 285.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 292.0, 287.0, 286.0, 290.0, 292.0, 287.0, 282.0, 291.0, 262.0, 260.0, 291.0, 288.0, 290.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6999324824907194, "mean_inference_ms": 1.250869291011297, "mean_action_processing_ms": 0.13407466927511189, "mean_env_wait_ms": 0.8424427085405526, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 17.0, "episode_reward_mean": 566.04, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.02}, "hist_stats": {"episode_reward": [579.0, 582.0, 579.0, 576.0, 627.0, 582.0, 587.0, 636.0, 579.0, 587.0, 587.0, 630.0, 516.0, 579.0, 582.0, 582.0, 573.0, 579.0, 522.0, 582.0, 582.0, 579.0, 633.0, 581.0, 579.0, 633.0, 536.0, 639.0, 570.0, 579.0, 579.0, 525.0, 576.0, 576.0, 576.0, 579.0, 17.0, 579.0, 573.0, 579.0, 587.0, 582.0, 522.0, 582.0, 573.0, 576.0, 516.0, 582.0, 573.0, 579.0, 536.0, 525.0, 527.0, 582.0, 539.0, 579.0, 579.0, 525.0, 519.0, 579.0, 582.0, 579.0, 587.0, 516.0, 579.0, 579.0, 570.0, 522.0, 582.0, 582.0, 522.0, 579.0, 573.0, 579.0, 516.0, 570.0, 576.0, 522.0, 582.0, 582.0, 573.0, 570.0, 582.0, 576.0, 530.0, 579.0, 582.0, 522.0, 630.0, 573.0, 579.0, 582.0, 579.0, 579.0, 576.0, 579.0, 573.0, 522.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 287.0, 290.0, 292.0, 292.0, 287.0, 280.0, 296.0, 309.0, 318.0, 290.0, 292.0, 290.0, 297.0, 321.0, 315.0, 286.0, 293.0, 285.0, 302.0, 290.0, 297.0, 314.0, 316.0, 258.0, 258.0, 294.0, 285.0, 287.0, 295.0, 291.0, 291.0, 283.0, 290.0, 286.0, 293.0, 254.0, 268.0, 291.0, 291.0, 289.0, 293.0, 291.0, 288.0, 314.0, 319.0, 300.0, 281.0, 286.0, 293.0, 319.0, 314.0, 267.0, 269.0, 319.0, 320.0, 289.0, 281.0, 293.0, 286.0, 279.0, 300.0, 259.0, 266.0, 285.0, 291.0, 287.0, 289.0, 285.0, 291.0, 286.0, 293.0, 3.0, 14.0, 284.0, 295.0, 285.0, 288.0, 292.0, 287.0, 300.0, 287.0, 296.0, 286.0, 256.0, 266.0, 288.0, 294.0, 293.0, 280.0, 288.0, 288.0, 254.0, 262.0, 292.0, 290.0, 284.0, 289.0, 287.0, 292.0, 258.0, 278.0, 258.0, 267.0, 265.0, 262.0, 286.0, 296.0, 266.0, 273.0, 288.0, 291.0, 292.0, 287.0, 257.0, 268.0, 259.0, 260.0, 285.0, 294.0, 289.0, 293.0, 292.0, 287.0, 293.0, 294.0, 250.0, 266.0, 288.0, 291.0, 287.0, 292.0, 287.0, 283.0, 256.0, 266.0, 293.0, 289.0, 288.0, 294.0, 262.0, 260.0, 288.0, 291.0, 284.0, 289.0, 291.0, 288.0, 254.0, 262.0, 279.0, 291.0, 288.0, 288.0, 261.0, 261.0, 290.0, 292.0, 289.0, 293.0, 288.0, 285.0, 282.0, 288.0, 286.0, 296.0, 290.0, 286.0, 262.0, 268.0, 283.0, 296.0, 288.0, 294.0, 261.0, 261.0, 311.0, 319.0, 288.0, 285.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 292.0, 287.0, 286.0, 290.0, 292.0, 287.0, 282.0, 291.0, 262.0, 260.0, 291.0, 288.0, 290.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6999324824907194, "mean_inference_ms": 1.250869291011297, "mean_action_processing_ms": 0.13407466927511189, "mean_env_wait_ms": 0.8424427085405526, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8704000, "num_agent_steps_trained": 8704000, "num_env_steps_sampled": 4352000, "num_env_steps_trained": 4352000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4352000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8704000, "timers": {"training_iteration_time_ms": 3648.191, "learn_time_ms": 1147.534, "learn_throughput": 11154.352, "synch_weights_time_ms": 12.122}, "counters": {"num_env_steps_sampled": 4352000, "num_env_steps_trained": 4352000, "num_agent_steps_sampled": 8704000, "num_agent_steps_trained": 8704000}, "done": false, "episodes_total": 10880, "training_iteration": 340, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-34", "timestamp": 1666581754, "time_this_iter_s": 3.9341721534729004, "time_total_s": 1303.9649283885956, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1303.9649283885956, "timesteps_since_restore": 0, "iterations_since_restore": 340, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.899999999999995, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 173.38, "shaped_reward_min": 17, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.67, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.45, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.56, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.2, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.36, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.99, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.73, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.84, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.26, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.54, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.2, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.51, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.36, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.99, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.36, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.99, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00047894648741930723, "policy_loss": 0.00012302363757044077, "vf_loss": 7.830498695373535, "vf_explained_var": 0.5219379663467407, "kl": 0.0018186834640800953, "entropy": 0.8542532324790955, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4364800, "num_env_steps_trained": 4364800, "num_agent_steps_sampled": 8729600, "num_agent_steps_trained": 8729600}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 17.0, "episode_reward_mean": 559.78, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 279.89}, "custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 173.38, "shaped_reward_min": 17, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.67, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.45, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.56, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.2, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.36, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.99, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.73, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.84, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.26, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.54, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.2, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.51, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.36, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.99, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.36, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.99, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 576.0, 576.0, 579.0, 17.0, 579.0, 573.0, 579.0, 587.0, 582.0, 522.0, 582.0, 573.0, 576.0, 516.0, 582.0, 573.0, 579.0, 536.0, 525.0, 527.0, 582.0, 539.0, 579.0, 579.0, 525.0, 519.0, 579.0, 582.0, 579.0, 587.0, 516.0, 579.0, 579.0, 570.0, 522.0, 582.0, 582.0, 522.0, 579.0, 573.0, 579.0, 516.0, 570.0, 576.0, 522.0, 582.0, 582.0, 573.0, 570.0, 582.0, 576.0, 530.0, 579.0, 582.0, 522.0, 630.0, 573.0, 579.0, 582.0, 579.0, 579.0, 576.0, 579.0, 573.0, 522.0, 579.0, 582.0, 525.0, 579.0, 579.0, 582.0, 587.0, 579.0, 587.0, 579.0, 530.0, 522.0, 581.0, 579.0, 576.0, 579.0, 573.0, 522.0, 582.0, 513.0, 576.0, 576.0, 525.0, 582.0, 573.0, 627.0, 579.0, 527.0, 582.0, 530.0, 530.0, 570.0, 579.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 287.0, 289.0, 285.0, 291.0, 286.0, 293.0, 3.0, 14.0, 284.0, 295.0, 285.0, 288.0, 292.0, 287.0, 300.0, 287.0, 296.0, 286.0, 256.0, 266.0, 288.0, 294.0, 293.0, 280.0, 288.0, 288.0, 254.0, 262.0, 292.0, 290.0, 284.0, 289.0, 287.0, 292.0, 258.0, 278.0, 258.0, 267.0, 265.0, 262.0, 286.0, 296.0, 266.0, 273.0, 288.0, 291.0, 292.0, 287.0, 257.0, 268.0, 259.0, 260.0, 285.0, 294.0, 289.0, 293.0, 292.0, 287.0, 293.0, 294.0, 250.0, 266.0, 288.0, 291.0, 287.0, 292.0, 287.0, 283.0, 256.0, 266.0, 293.0, 289.0, 288.0, 294.0, 262.0, 260.0, 288.0, 291.0, 284.0, 289.0, 291.0, 288.0, 254.0, 262.0, 279.0, 291.0, 288.0, 288.0, 261.0, 261.0, 290.0, 292.0, 289.0, 293.0, 288.0, 285.0, 282.0, 288.0, 286.0, 296.0, 290.0, 286.0, 262.0, 268.0, 283.0, 296.0, 288.0, 294.0, 261.0, 261.0, 311.0, 319.0, 288.0, 285.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 292.0, 287.0, 286.0, 290.0, 292.0, 287.0, 282.0, 291.0, 262.0, 260.0, 291.0, 288.0, 290.0, 292.0, 264.0, 261.0, 289.0, 290.0, 292.0, 287.0, 289.0, 293.0, 287.0, 300.0, 293.0, 286.0, 294.0, 293.0, 290.0, 289.0, 274.0, 256.0, 258.0, 264.0, 297.0, 284.0, 287.0, 292.0, 286.0, 290.0, 293.0, 286.0, 284.0, 289.0, 267.0, 255.0, 287.0, 295.0, 251.0, 262.0, 287.0, 289.0, 290.0, 286.0, 257.0, 268.0, 293.0, 289.0, 280.0, 293.0, 318.0, 309.0, 291.0, 288.0, 263.0, 264.0, 295.0, 287.0, 273.0, 257.0, 263.0, 267.0, 281.0, 289.0, 290.0, 289.0, 260.0, 265.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6998735604012057, "mean_inference_ms": 1.2509366550400884, "mean_action_processing_ms": 0.13406633406435667, "mean_env_wait_ms": 0.8423450797843902, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 17.0, "episode_reward_mean": 559.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 279.89}, "hist_stats": {"episode_reward": [576.0, 576.0, 576.0, 579.0, 17.0, 579.0, 573.0, 579.0, 587.0, 582.0, 522.0, 582.0, 573.0, 576.0, 516.0, 582.0, 573.0, 579.0, 536.0, 525.0, 527.0, 582.0, 539.0, 579.0, 579.0, 525.0, 519.0, 579.0, 582.0, 579.0, 587.0, 516.0, 579.0, 579.0, 570.0, 522.0, 582.0, 582.0, 522.0, 579.0, 573.0, 579.0, 516.0, 570.0, 576.0, 522.0, 582.0, 582.0, 573.0, 570.0, 582.0, 576.0, 530.0, 579.0, 582.0, 522.0, 630.0, 573.0, 579.0, 582.0, 579.0, 579.0, 576.0, 579.0, 573.0, 522.0, 579.0, 582.0, 525.0, 579.0, 579.0, 582.0, 587.0, 579.0, 587.0, 579.0, 530.0, 522.0, 581.0, 579.0, 576.0, 579.0, 573.0, 522.0, 582.0, 513.0, 576.0, 576.0, 525.0, 582.0, 573.0, 627.0, 579.0, 527.0, 582.0, 530.0, 530.0, 570.0, 579.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 287.0, 289.0, 285.0, 291.0, 286.0, 293.0, 3.0, 14.0, 284.0, 295.0, 285.0, 288.0, 292.0, 287.0, 300.0, 287.0, 296.0, 286.0, 256.0, 266.0, 288.0, 294.0, 293.0, 280.0, 288.0, 288.0, 254.0, 262.0, 292.0, 290.0, 284.0, 289.0, 287.0, 292.0, 258.0, 278.0, 258.0, 267.0, 265.0, 262.0, 286.0, 296.0, 266.0, 273.0, 288.0, 291.0, 292.0, 287.0, 257.0, 268.0, 259.0, 260.0, 285.0, 294.0, 289.0, 293.0, 292.0, 287.0, 293.0, 294.0, 250.0, 266.0, 288.0, 291.0, 287.0, 292.0, 287.0, 283.0, 256.0, 266.0, 293.0, 289.0, 288.0, 294.0, 262.0, 260.0, 288.0, 291.0, 284.0, 289.0, 291.0, 288.0, 254.0, 262.0, 279.0, 291.0, 288.0, 288.0, 261.0, 261.0, 290.0, 292.0, 289.0, 293.0, 288.0, 285.0, 282.0, 288.0, 286.0, 296.0, 290.0, 286.0, 262.0, 268.0, 283.0, 296.0, 288.0, 294.0, 261.0, 261.0, 311.0, 319.0, 288.0, 285.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 292.0, 287.0, 286.0, 290.0, 292.0, 287.0, 282.0, 291.0, 262.0, 260.0, 291.0, 288.0, 290.0, 292.0, 264.0, 261.0, 289.0, 290.0, 292.0, 287.0, 289.0, 293.0, 287.0, 300.0, 293.0, 286.0, 294.0, 293.0, 290.0, 289.0, 274.0, 256.0, 258.0, 264.0, 297.0, 284.0, 287.0, 292.0, 286.0, 290.0, 293.0, 286.0, 284.0, 289.0, 267.0, 255.0, 287.0, 295.0, 251.0, 262.0, 287.0, 289.0, 290.0, 286.0, 257.0, 268.0, 293.0, 289.0, 280.0, 293.0, 318.0, 309.0, 291.0, 288.0, 263.0, 264.0, 295.0, 287.0, 273.0, 257.0, 263.0, 267.0, 281.0, 289.0, 290.0, 289.0, 260.0, 265.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6998735604012057, "mean_inference_ms": 1.2509366550400884, "mean_action_processing_ms": 0.13406633406435667, "mean_env_wait_ms": 0.8423450797843902, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8729600, "num_agent_steps_trained": 8729600, "num_env_steps_sampled": 4364800, "num_env_steps_trained": 4364800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4364800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8729600, "timers": {"training_iteration_time_ms": 3651.695, "learn_time_ms": 1144.334, "learn_throughput": 11185.543, "synch_weights_time_ms": 11.629}, "counters": {"num_env_steps_sampled": 4364800, "num_env_steps_trained": 4364800, "num_agent_steps_sampled": 8729600, "num_agent_steps_trained": 8729600}, "done": false, "episodes_total": 10912, "training_iteration": 341, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-38", "timestamp": 1666581758, "time_this_iter_s": 3.704293727874756, "time_total_s": 1307.6692221164703, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1307.6692221164703, "timesteps_since_restore": 0, "iterations_since_restore": 341, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.75, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 195.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.0, "shaped_reward_min": 136, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.6, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.72, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.49, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.26, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.3, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 5.85, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.82, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.44, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.48, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.42, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.26, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.3, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.26, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.3, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -8.920964319258928e-06, "policy_loss": -0.000365772342775017, "vf_loss": 7.832732677459717, "vf_explained_var": 0.5286672115325928, "kl": 0.0022454443387687206, "entropy": 0.8528425693511963, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4377600, "num_env_steps_trained": 4377600, "num_agent_steps_sampled": 8755200, "num_agent_steps_trained": 8755200}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 456.0, "episode_reward_mean": 565.4, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 282.7}, "custom_metrics": {"sparse_reward_mean": 195.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.0, "shaped_reward_min": 136, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.6, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.72, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.49, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.26, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.3, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 5.85, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.82, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.44, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.48, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.42, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.26, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.3, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.26, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.3, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 570.0, 522.0, 582.0, 582.0, 522.0, 579.0, 573.0, 579.0, 516.0, 570.0, 576.0, 522.0, 582.0, 582.0, 573.0, 570.0, 582.0, 576.0, 530.0, 579.0, 582.0, 522.0, 630.0, 573.0, 579.0, 582.0, 579.0, 579.0, 576.0, 579.0, 573.0, 522.0, 579.0, 582.0, 525.0, 579.0, 579.0, 582.0, 587.0, 579.0, 587.0, 579.0, 530.0, 522.0, 581.0, 579.0, 576.0, 579.0, 573.0, 522.0, 582.0, 513.0, 576.0, 576.0, 525.0, 582.0, 573.0, 627.0, 579.0, 527.0, 582.0, 530.0, 530.0, 570.0, 579.0, 525.0, 584.0, 582.0, 582.0, 579.0, 579.0, 584.0, 527.0, 522.0, 579.0, 582.0, 582.0, 579.0, 527.0, 576.0, 456.0, 582.0, 576.0, 579.0, 522.0, 582.0, 573.0, 584.0, 527.0, 522.0, 584.0, 579.0, 573.0, 530.0, 573.0, 633.0, 579.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 287.0, 292.0, 287.0, 283.0, 256.0, 266.0, 293.0, 289.0, 288.0, 294.0, 262.0, 260.0, 288.0, 291.0, 284.0, 289.0, 291.0, 288.0, 254.0, 262.0, 279.0, 291.0, 288.0, 288.0, 261.0, 261.0, 290.0, 292.0, 289.0, 293.0, 288.0, 285.0, 282.0, 288.0, 286.0, 296.0, 290.0, 286.0, 262.0, 268.0, 283.0, 296.0, 288.0, 294.0, 261.0, 261.0, 311.0, 319.0, 288.0, 285.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 292.0, 287.0, 286.0, 290.0, 292.0, 287.0, 282.0, 291.0, 262.0, 260.0, 291.0, 288.0, 290.0, 292.0, 264.0, 261.0, 289.0, 290.0, 292.0, 287.0, 289.0, 293.0, 287.0, 300.0, 293.0, 286.0, 294.0, 293.0, 290.0, 289.0, 274.0, 256.0, 258.0, 264.0, 297.0, 284.0, 287.0, 292.0, 286.0, 290.0, 293.0, 286.0, 284.0, 289.0, 267.0, 255.0, 287.0, 295.0, 251.0, 262.0, 287.0, 289.0, 290.0, 286.0, 257.0, 268.0, 293.0, 289.0, 280.0, 293.0, 318.0, 309.0, 291.0, 288.0, 263.0, 264.0, 295.0, 287.0, 273.0, 257.0, 263.0, 267.0, 281.0, 289.0, 290.0, 289.0, 260.0, 265.0, 287.0, 297.0, 287.0, 295.0, 284.0, 298.0, 287.0, 292.0, 291.0, 288.0, 286.0, 298.0, 268.0, 259.0, 265.0, 257.0, 289.0, 290.0, 293.0, 289.0, 293.0, 289.0, 284.0, 295.0, 259.0, 268.0, 286.0, 290.0, 222.0, 234.0, 287.0, 295.0, 284.0, 292.0, 292.0, 287.0, 261.0, 261.0, 294.0, 288.0, 289.0, 284.0, 291.0, 293.0, 269.0, 258.0, 261.0, 261.0, 292.0, 292.0, 288.0, 291.0, 283.0, 290.0, 260.0, 270.0, 289.0, 284.0, 322.0, 311.0, 293.0, 286.0, 261.0, 264.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6998021023353869, "mean_inference_ms": 1.250992359542731, "mean_action_processing_ms": 0.13405790576990334, "mean_env_wait_ms": 0.8422437053910774, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 456.0, "episode_reward_mean": 565.4, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 282.7}, "hist_stats": {"episode_reward": [579.0, 579.0, 570.0, 522.0, 582.0, 582.0, 522.0, 579.0, 573.0, 579.0, 516.0, 570.0, 576.0, 522.0, 582.0, 582.0, 573.0, 570.0, 582.0, 576.0, 530.0, 579.0, 582.0, 522.0, 630.0, 573.0, 579.0, 582.0, 579.0, 579.0, 576.0, 579.0, 573.0, 522.0, 579.0, 582.0, 525.0, 579.0, 579.0, 582.0, 587.0, 579.0, 587.0, 579.0, 530.0, 522.0, 581.0, 579.0, 576.0, 579.0, 573.0, 522.0, 582.0, 513.0, 576.0, 576.0, 525.0, 582.0, 573.0, 627.0, 579.0, 527.0, 582.0, 530.0, 530.0, 570.0, 579.0, 525.0, 584.0, 582.0, 582.0, 579.0, 579.0, 584.0, 527.0, 522.0, 579.0, 582.0, 582.0, 579.0, 527.0, 576.0, 456.0, 582.0, 576.0, 579.0, 522.0, 582.0, 573.0, 584.0, 527.0, 522.0, 584.0, 579.0, 573.0, 530.0, 573.0, 633.0, 579.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 287.0, 292.0, 287.0, 283.0, 256.0, 266.0, 293.0, 289.0, 288.0, 294.0, 262.0, 260.0, 288.0, 291.0, 284.0, 289.0, 291.0, 288.0, 254.0, 262.0, 279.0, 291.0, 288.0, 288.0, 261.0, 261.0, 290.0, 292.0, 289.0, 293.0, 288.0, 285.0, 282.0, 288.0, 286.0, 296.0, 290.0, 286.0, 262.0, 268.0, 283.0, 296.0, 288.0, 294.0, 261.0, 261.0, 311.0, 319.0, 288.0, 285.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 292.0, 287.0, 286.0, 290.0, 292.0, 287.0, 282.0, 291.0, 262.0, 260.0, 291.0, 288.0, 290.0, 292.0, 264.0, 261.0, 289.0, 290.0, 292.0, 287.0, 289.0, 293.0, 287.0, 300.0, 293.0, 286.0, 294.0, 293.0, 290.0, 289.0, 274.0, 256.0, 258.0, 264.0, 297.0, 284.0, 287.0, 292.0, 286.0, 290.0, 293.0, 286.0, 284.0, 289.0, 267.0, 255.0, 287.0, 295.0, 251.0, 262.0, 287.0, 289.0, 290.0, 286.0, 257.0, 268.0, 293.0, 289.0, 280.0, 293.0, 318.0, 309.0, 291.0, 288.0, 263.0, 264.0, 295.0, 287.0, 273.0, 257.0, 263.0, 267.0, 281.0, 289.0, 290.0, 289.0, 260.0, 265.0, 287.0, 297.0, 287.0, 295.0, 284.0, 298.0, 287.0, 292.0, 291.0, 288.0, 286.0, 298.0, 268.0, 259.0, 265.0, 257.0, 289.0, 290.0, 293.0, 289.0, 293.0, 289.0, 284.0, 295.0, 259.0, 268.0, 286.0, 290.0, 222.0, 234.0, 287.0, 295.0, 284.0, 292.0, 292.0, 287.0, 261.0, 261.0, 294.0, 288.0, 289.0, 284.0, 291.0, 293.0, 269.0, 258.0, 261.0, 261.0, 292.0, 292.0, 288.0, 291.0, 283.0, 290.0, 260.0, 270.0, 289.0, 284.0, 322.0, 311.0, 293.0, 286.0, 261.0, 264.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6998021023353869, "mean_inference_ms": 1.250992359542731, "mean_action_processing_ms": 0.13405790576990334, "mean_env_wait_ms": 0.8422437053910774, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8755200, "num_agent_steps_trained": 8755200, "num_env_steps_sampled": 4377600, "num_env_steps_trained": 4377600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4377600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8755200, "timers": {"training_iteration_time_ms": 3653.038, "learn_time_ms": 1147.561, "learn_throughput": 11154.089, "synch_weights_time_ms": 11.78}, "counters": {"num_env_steps_sampled": 4377600, "num_env_steps_trained": 4377600, "num_agent_steps_sampled": 8755200, "num_agent_steps_trained": 8755200}, "done": false, "episodes_total": 10944, "training_iteration": 342, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-41", "timestamp": 1666581761, "time_this_iter_s": 3.6865930557250977, "time_total_s": 1311.3558151721954, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1311.3558151721954, "timesteps_since_restore": 0, "iterations_since_restore": 342, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.44, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 174.81, "shaped_reward_min": 122, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.79, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.35, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.67, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.15, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.44, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.03, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.38, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.31, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.44, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.03, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.44, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.03, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.000201219052542001, "policy_loss": -0.00016514863818883896, "vf_loss": 7.8804121017456055, "vf_explained_var": 0.5171889066696167, "kl": 0.0026697758585214615, "entropy": 0.8433467149734497, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4390400, "num_env_steps_trained": 4390400, "num_agent_steps_sampled": 8780800, "num_agent_steps_trained": 8780800}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 322.0, "episode_reward_mean": 563.21, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 153.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 281.605}, "custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 174.81, "shaped_reward_min": 122, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.79, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.35, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.67, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.15, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.44, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.03, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.38, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.31, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.44, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.03, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.44, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.03, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 522.0, 579.0, 582.0, 525.0, 579.0, 579.0, 582.0, 587.0, 579.0, 587.0, 579.0, 530.0, 522.0, 581.0, 579.0, 576.0, 579.0, 573.0, 522.0, 582.0, 513.0, 576.0, 576.0, 525.0, 582.0, 573.0, 627.0, 579.0, 527.0, 582.0, 530.0, 530.0, 570.0, 579.0, 525.0, 584.0, 582.0, 582.0, 579.0, 579.0, 584.0, 527.0, 522.0, 579.0, 582.0, 582.0, 579.0, 527.0, 576.0, 456.0, 582.0, 576.0, 579.0, 522.0, 582.0, 573.0, 584.0, 527.0, 522.0, 584.0, 579.0, 573.0, 530.0, 573.0, 633.0, 579.0, 525.0, 576.0, 525.0, 584.0, 530.0, 582.0, 525.0, 536.0, 582.0, 570.0, 582.0, 630.0, 579.0, 587.0, 587.0, 582.0, 582.0, 630.0, 576.0, 582.0, 573.0, 462.0, 582.0, 567.0, 567.0, 525.0, 573.0, 582.0, 322.0, 573.0, 582.0, 570.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 291.0, 262.0, 260.0, 291.0, 288.0, 290.0, 292.0, 264.0, 261.0, 289.0, 290.0, 292.0, 287.0, 289.0, 293.0, 287.0, 300.0, 293.0, 286.0, 294.0, 293.0, 290.0, 289.0, 274.0, 256.0, 258.0, 264.0, 297.0, 284.0, 287.0, 292.0, 286.0, 290.0, 293.0, 286.0, 284.0, 289.0, 267.0, 255.0, 287.0, 295.0, 251.0, 262.0, 287.0, 289.0, 290.0, 286.0, 257.0, 268.0, 293.0, 289.0, 280.0, 293.0, 318.0, 309.0, 291.0, 288.0, 263.0, 264.0, 295.0, 287.0, 273.0, 257.0, 263.0, 267.0, 281.0, 289.0, 290.0, 289.0, 260.0, 265.0, 287.0, 297.0, 287.0, 295.0, 284.0, 298.0, 287.0, 292.0, 291.0, 288.0, 286.0, 298.0, 268.0, 259.0, 265.0, 257.0, 289.0, 290.0, 293.0, 289.0, 293.0, 289.0, 284.0, 295.0, 259.0, 268.0, 286.0, 290.0, 222.0, 234.0, 287.0, 295.0, 284.0, 292.0, 292.0, 287.0, 261.0, 261.0, 294.0, 288.0, 289.0, 284.0, 291.0, 293.0, 269.0, 258.0, 261.0, 261.0, 292.0, 292.0, 288.0, 291.0, 283.0, 290.0, 260.0, 270.0, 289.0, 284.0, 322.0, 311.0, 293.0, 286.0, 261.0, 264.0, 285.0, 291.0, 261.0, 264.0, 296.0, 288.0, 264.0, 266.0, 296.0, 286.0, 269.0, 256.0, 269.0, 267.0, 290.0, 292.0, 285.0, 285.0, 289.0, 293.0, 311.0, 319.0, 287.0, 292.0, 289.0, 298.0, 292.0, 295.0, 294.0, 288.0, 288.0, 294.0, 311.0, 319.0, 285.0, 291.0, 294.0, 288.0, 289.0, 284.0, 228.0, 234.0, 290.0, 292.0, 276.0, 291.0, 276.0, 291.0, 267.0, 258.0, 284.0, 289.0, 289.0, 293.0, 153.0, 169.0, 286.0, 287.0, 293.0, 289.0, 290.0, 280.0, 298.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.699720819683431, "mean_inference_ms": 1.2508368529183134, "mean_action_processing_ms": 0.13404914974769147, "mean_env_wait_ms": 0.8421437948391369, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 322.0, "episode_reward_mean": 563.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 153.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 281.605}, "hist_stats": {"episode_reward": [573.0, 522.0, 579.0, 582.0, 525.0, 579.0, 579.0, 582.0, 587.0, 579.0, 587.0, 579.0, 530.0, 522.0, 581.0, 579.0, 576.0, 579.0, 573.0, 522.0, 582.0, 513.0, 576.0, 576.0, 525.0, 582.0, 573.0, 627.0, 579.0, 527.0, 582.0, 530.0, 530.0, 570.0, 579.0, 525.0, 584.0, 582.0, 582.0, 579.0, 579.0, 584.0, 527.0, 522.0, 579.0, 582.0, 582.0, 579.0, 527.0, 576.0, 456.0, 582.0, 576.0, 579.0, 522.0, 582.0, 573.0, 584.0, 527.0, 522.0, 584.0, 579.0, 573.0, 530.0, 573.0, 633.0, 579.0, 525.0, 576.0, 525.0, 584.0, 530.0, 582.0, 525.0, 536.0, 582.0, 570.0, 582.0, 630.0, 579.0, 587.0, 587.0, 582.0, 582.0, 630.0, 576.0, 582.0, 573.0, 462.0, 582.0, 567.0, 567.0, 525.0, 573.0, 582.0, 322.0, 573.0, 582.0, 570.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 291.0, 262.0, 260.0, 291.0, 288.0, 290.0, 292.0, 264.0, 261.0, 289.0, 290.0, 292.0, 287.0, 289.0, 293.0, 287.0, 300.0, 293.0, 286.0, 294.0, 293.0, 290.0, 289.0, 274.0, 256.0, 258.0, 264.0, 297.0, 284.0, 287.0, 292.0, 286.0, 290.0, 293.0, 286.0, 284.0, 289.0, 267.0, 255.0, 287.0, 295.0, 251.0, 262.0, 287.0, 289.0, 290.0, 286.0, 257.0, 268.0, 293.0, 289.0, 280.0, 293.0, 318.0, 309.0, 291.0, 288.0, 263.0, 264.0, 295.0, 287.0, 273.0, 257.0, 263.0, 267.0, 281.0, 289.0, 290.0, 289.0, 260.0, 265.0, 287.0, 297.0, 287.0, 295.0, 284.0, 298.0, 287.0, 292.0, 291.0, 288.0, 286.0, 298.0, 268.0, 259.0, 265.0, 257.0, 289.0, 290.0, 293.0, 289.0, 293.0, 289.0, 284.0, 295.0, 259.0, 268.0, 286.0, 290.0, 222.0, 234.0, 287.0, 295.0, 284.0, 292.0, 292.0, 287.0, 261.0, 261.0, 294.0, 288.0, 289.0, 284.0, 291.0, 293.0, 269.0, 258.0, 261.0, 261.0, 292.0, 292.0, 288.0, 291.0, 283.0, 290.0, 260.0, 270.0, 289.0, 284.0, 322.0, 311.0, 293.0, 286.0, 261.0, 264.0, 285.0, 291.0, 261.0, 264.0, 296.0, 288.0, 264.0, 266.0, 296.0, 286.0, 269.0, 256.0, 269.0, 267.0, 290.0, 292.0, 285.0, 285.0, 289.0, 293.0, 311.0, 319.0, 287.0, 292.0, 289.0, 298.0, 292.0, 295.0, 294.0, 288.0, 288.0, 294.0, 311.0, 319.0, 285.0, 291.0, 294.0, 288.0, 289.0, 284.0, 228.0, 234.0, 290.0, 292.0, 276.0, 291.0, 276.0, 291.0, 267.0, 258.0, 284.0, 289.0, 289.0, 293.0, 153.0, 169.0, 286.0, 287.0, 293.0, 289.0, 290.0, 280.0, 298.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.699720819683431, "mean_inference_ms": 1.2508368529183134, "mean_action_processing_ms": 0.13404914974769147, "mean_env_wait_ms": 0.8421437948391369, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8780800, "num_agent_steps_trained": 8780800, "num_env_steps_sampled": 4390400, "num_env_steps_trained": 4390400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4390400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8780800, "timers": {"training_iteration_time_ms": 3647.502, "learn_time_ms": 1144.384, "learn_throughput": 11185.06, "synch_weights_time_ms": 10.64}, "counters": {"num_env_steps_sampled": 4390400, "num_env_steps_trained": 4390400, "num_agent_steps_sampled": 8780800, "num_agent_steps_trained": 8780800}, "done": false, "episodes_total": 10976, "training_iteration": 343, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-45", "timestamp": 1666581765, "time_this_iter_s": 3.7214314937591553, "time_total_s": 1315.0772466659546, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1315.0772466659546, "timesteps_since_restore": 0, "iterations_since_restore": 343, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.55, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 175.12, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.05, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.97, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.99, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.64, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.87, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.04, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.81, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.3, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.24, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.53, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.64, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.87, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.64, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.87, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00018610269762575626, "policy_loss": -0.00017776305321604013, "vf_loss": 7.827777862548828, "vf_explained_var": 0.5340909957885742, "kl": 0.0031581996008753777, "entropy": 0.8378207683563232, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4403200, "num_env_steps_trained": 4403200, "num_agent_steps_sampled": 8806400, "num_agent_steps_trained": 8806400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 322.0, "episode_reward_mean": 563.92, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 153.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 281.96}, "custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 175.12, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.05, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.97, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.99, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.64, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.87, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.04, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.81, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.3, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.24, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.53, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.64, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.87, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.64, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.87, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 570.0, 579.0, 525.0, 584.0, 582.0, 582.0, 579.0, 579.0, 584.0, 527.0, 522.0, 579.0, 582.0, 582.0, 579.0, 527.0, 576.0, 456.0, 582.0, 576.0, 579.0, 522.0, 582.0, 573.0, 584.0, 527.0, 522.0, 584.0, 579.0, 573.0, 530.0, 573.0, 633.0, 579.0, 525.0, 576.0, 525.0, 584.0, 530.0, 582.0, 525.0, 536.0, 582.0, 570.0, 582.0, 630.0, 579.0, 587.0, 587.0, 582.0, 582.0, 630.0, 576.0, 582.0, 573.0, 462.0, 582.0, 567.0, 567.0, 525.0, 573.0, 582.0, 322.0, 573.0, 582.0, 570.0, 582.0, 582.0, 579.0, 587.0, 582.0, 587.0, 573.0, 522.0, 579.0, 582.0, 411.0, 582.0, 582.0, 579.0, 582.0, 570.0, 579.0, 579.0, 482.0, 579.0, 524.0, 576.0, 579.0, 582.0, 582.0, 573.0, 570.0, 582.0, 639.0, 582.0, 573.0, 525.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [263.0, 267.0, 281.0, 289.0, 290.0, 289.0, 260.0, 265.0, 287.0, 297.0, 287.0, 295.0, 284.0, 298.0, 287.0, 292.0, 291.0, 288.0, 286.0, 298.0, 268.0, 259.0, 265.0, 257.0, 289.0, 290.0, 293.0, 289.0, 293.0, 289.0, 284.0, 295.0, 259.0, 268.0, 286.0, 290.0, 222.0, 234.0, 287.0, 295.0, 284.0, 292.0, 292.0, 287.0, 261.0, 261.0, 294.0, 288.0, 289.0, 284.0, 291.0, 293.0, 269.0, 258.0, 261.0, 261.0, 292.0, 292.0, 288.0, 291.0, 283.0, 290.0, 260.0, 270.0, 289.0, 284.0, 322.0, 311.0, 293.0, 286.0, 261.0, 264.0, 285.0, 291.0, 261.0, 264.0, 296.0, 288.0, 264.0, 266.0, 296.0, 286.0, 269.0, 256.0, 269.0, 267.0, 290.0, 292.0, 285.0, 285.0, 289.0, 293.0, 311.0, 319.0, 287.0, 292.0, 289.0, 298.0, 292.0, 295.0, 294.0, 288.0, 288.0, 294.0, 311.0, 319.0, 285.0, 291.0, 294.0, 288.0, 289.0, 284.0, 228.0, 234.0, 290.0, 292.0, 276.0, 291.0, 276.0, 291.0, 267.0, 258.0, 284.0, 289.0, 289.0, 293.0, 153.0, 169.0, 286.0, 287.0, 293.0, 289.0, 290.0, 280.0, 298.0, 284.0, 293.0, 289.0, 295.0, 284.0, 289.0, 298.0, 292.0, 290.0, 294.0, 293.0, 288.0, 285.0, 262.0, 260.0, 287.0, 292.0, 291.0, 291.0, 206.0, 205.0, 291.0, 291.0, 292.0, 290.0, 283.0, 296.0, 299.0, 283.0, 281.0, 289.0, 289.0, 290.0, 286.0, 293.0, 234.0, 248.0, 286.0, 293.0, 269.0, 255.0, 291.0, 285.0, 287.0, 292.0, 295.0, 287.0, 297.0, 285.0, 287.0, 286.0, 277.0, 293.0, 288.0, 294.0, 323.0, 316.0, 293.0, 289.0, 287.0, 286.0, 258.0, 267.0, 283.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6996287283780928, "mean_inference_ms": 1.250660796238729, "mean_action_processing_ms": 0.1340424521978151, "mean_env_wait_ms": 0.8420568664063134, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 322.0, "episode_reward_mean": 563.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 153.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 281.96}, "hist_stats": {"episode_reward": [530.0, 570.0, 579.0, 525.0, 584.0, 582.0, 582.0, 579.0, 579.0, 584.0, 527.0, 522.0, 579.0, 582.0, 582.0, 579.0, 527.0, 576.0, 456.0, 582.0, 576.0, 579.0, 522.0, 582.0, 573.0, 584.0, 527.0, 522.0, 584.0, 579.0, 573.0, 530.0, 573.0, 633.0, 579.0, 525.0, 576.0, 525.0, 584.0, 530.0, 582.0, 525.0, 536.0, 582.0, 570.0, 582.0, 630.0, 579.0, 587.0, 587.0, 582.0, 582.0, 630.0, 576.0, 582.0, 573.0, 462.0, 582.0, 567.0, 567.0, 525.0, 573.0, 582.0, 322.0, 573.0, 582.0, 570.0, 582.0, 582.0, 579.0, 587.0, 582.0, 587.0, 573.0, 522.0, 579.0, 582.0, 411.0, 582.0, 582.0, 579.0, 582.0, 570.0, 579.0, 579.0, 482.0, 579.0, 524.0, 576.0, 579.0, 582.0, 582.0, 573.0, 570.0, 582.0, 639.0, 582.0, 573.0, 525.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [263.0, 267.0, 281.0, 289.0, 290.0, 289.0, 260.0, 265.0, 287.0, 297.0, 287.0, 295.0, 284.0, 298.0, 287.0, 292.0, 291.0, 288.0, 286.0, 298.0, 268.0, 259.0, 265.0, 257.0, 289.0, 290.0, 293.0, 289.0, 293.0, 289.0, 284.0, 295.0, 259.0, 268.0, 286.0, 290.0, 222.0, 234.0, 287.0, 295.0, 284.0, 292.0, 292.0, 287.0, 261.0, 261.0, 294.0, 288.0, 289.0, 284.0, 291.0, 293.0, 269.0, 258.0, 261.0, 261.0, 292.0, 292.0, 288.0, 291.0, 283.0, 290.0, 260.0, 270.0, 289.0, 284.0, 322.0, 311.0, 293.0, 286.0, 261.0, 264.0, 285.0, 291.0, 261.0, 264.0, 296.0, 288.0, 264.0, 266.0, 296.0, 286.0, 269.0, 256.0, 269.0, 267.0, 290.0, 292.0, 285.0, 285.0, 289.0, 293.0, 311.0, 319.0, 287.0, 292.0, 289.0, 298.0, 292.0, 295.0, 294.0, 288.0, 288.0, 294.0, 311.0, 319.0, 285.0, 291.0, 294.0, 288.0, 289.0, 284.0, 228.0, 234.0, 290.0, 292.0, 276.0, 291.0, 276.0, 291.0, 267.0, 258.0, 284.0, 289.0, 289.0, 293.0, 153.0, 169.0, 286.0, 287.0, 293.0, 289.0, 290.0, 280.0, 298.0, 284.0, 293.0, 289.0, 295.0, 284.0, 289.0, 298.0, 292.0, 290.0, 294.0, 293.0, 288.0, 285.0, 262.0, 260.0, 287.0, 292.0, 291.0, 291.0, 206.0, 205.0, 291.0, 291.0, 292.0, 290.0, 283.0, 296.0, 299.0, 283.0, 281.0, 289.0, 289.0, 290.0, 286.0, 293.0, 234.0, 248.0, 286.0, 293.0, 269.0, 255.0, 291.0, 285.0, 287.0, 292.0, 295.0, 287.0, 297.0, 285.0, 287.0, 286.0, 277.0, 293.0, 288.0, 294.0, 323.0, 316.0, 293.0, 289.0, 287.0, 286.0, 258.0, 267.0, 283.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6996287283780928, "mean_inference_ms": 1.250660796238729, "mean_action_processing_ms": 0.1340424521978151, "mean_env_wait_ms": 0.8420568664063134, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8806400, "num_agent_steps_trained": 8806400, "num_env_steps_sampled": 4403200, "num_env_steps_trained": 4403200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4403200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8806400, "timers": {"training_iteration_time_ms": 3646.251, "learn_time_ms": 1143.163, "learn_throughput": 11197.002, "synch_weights_time_ms": 10.685}, "counters": {"num_env_steps_sampled": 4403200, "num_env_steps_trained": 4403200, "num_agent_steps_sampled": 8806400, "num_agent_steps_trained": 8806400}, "done": false, "episodes_total": 11008, "training_iteration": 344, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-49", "timestamp": 1666581769, "time_this_iter_s": 3.617185592651367, "time_total_s": 1318.694432258606, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1318.694432258606, "timesteps_since_restore": 0, "iterations_since_restore": 344, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.76, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 176.07, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.63, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.76, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.57, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.62, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.3, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.5, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.52, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.84, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.79, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.3, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.5, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.3, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.5, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0020328951068222523, "policy_loss": 0.0016740041319280863, "vf_loss": 7.817473411560059, "vf_explained_var": 0.5217878818511963, "kl": 0.002134096808731556, "entropy": 0.8457106351852417, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4416000, "num_env_steps_trained": 4416000, "num_agent_steps_sampled": 8832000, "num_agent_steps_trained": 8832000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 322.0, "episode_reward_mean": 568.47, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 153.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 284.235}, "custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 176.07, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.63, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.76, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.57, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.62, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.3, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.5, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.52, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.84, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.79, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.3, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.5, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.3, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.5, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 633.0, 579.0, 525.0, 576.0, 525.0, 584.0, 530.0, 582.0, 525.0, 536.0, 582.0, 570.0, 582.0, 630.0, 579.0, 587.0, 587.0, 582.0, 582.0, 630.0, 576.0, 582.0, 573.0, 462.0, 582.0, 567.0, 567.0, 525.0, 573.0, 582.0, 322.0, 573.0, 582.0, 570.0, 582.0, 582.0, 579.0, 587.0, 582.0, 587.0, 573.0, 522.0, 579.0, 582.0, 411.0, 582.0, 582.0, 579.0, 582.0, 570.0, 579.0, 579.0, 482.0, 579.0, 524.0, 576.0, 579.0, 582.0, 582.0, 573.0, 570.0, 582.0, 639.0, 582.0, 573.0, 525.0, 573.0, 579.0, 579.0, 579.0, 582.0, 582.0, 582.0, 573.0, 576.0, 582.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 513.0, 573.0, 573.0, 582.0, 570.0, 522.0, 581.0, 582.0, 630.0, 567.0, 582.0, 582.0, 579.0, 525.0, 579.0, 522.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 284.0, 322.0, 311.0, 293.0, 286.0, 261.0, 264.0, 285.0, 291.0, 261.0, 264.0, 296.0, 288.0, 264.0, 266.0, 296.0, 286.0, 269.0, 256.0, 269.0, 267.0, 290.0, 292.0, 285.0, 285.0, 289.0, 293.0, 311.0, 319.0, 287.0, 292.0, 289.0, 298.0, 292.0, 295.0, 294.0, 288.0, 288.0, 294.0, 311.0, 319.0, 285.0, 291.0, 294.0, 288.0, 289.0, 284.0, 228.0, 234.0, 290.0, 292.0, 276.0, 291.0, 276.0, 291.0, 267.0, 258.0, 284.0, 289.0, 289.0, 293.0, 153.0, 169.0, 286.0, 287.0, 293.0, 289.0, 290.0, 280.0, 298.0, 284.0, 293.0, 289.0, 295.0, 284.0, 289.0, 298.0, 292.0, 290.0, 294.0, 293.0, 288.0, 285.0, 262.0, 260.0, 287.0, 292.0, 291.0, 291.0, 206.0, 205.0, 291.0, 291.0, 292.0, 290.0, 283.0, 296.0, 299.0, 283.0, 281.0, 289.0, 289.0, 290.0, 286.0, 293.0, 234.0, 248.0, 286.0, 293.0, 269.0, 255.0, 291.0, 285.0, 287.0, 292.0, 295.0, 287.0, 297.0, 285.0, 287.0, 286.0, 277.0, 293.0, 288.0, 294.0, 323.0, 316.0, 293.0, 289.0, 287.0, 286.0, 258.0, 267.0, 283.0, 290.0, 286.0, 293.0, 290.0, 289.0, 286.0, 293.0, 288.0, 294.0, 287.0, 295.0, 280.0, 302.0, 285.0, 288.0, 291.0, 285.0, 286.0, 296.0, 292.0, 290.0, 297.0, 285.0, 285.0, 297.0, 294.0, 288.0, 289.0, 293.0, 282.0, 297.0, 250.0, 263.0, 294.0, 279.0, 287.0, 286.0, 290.0, 292.0, 279.0, 291.0, 258.0, 264.0, 284.0, 297.0, 291.0, 291.0, 314.0, 316.0, 283.0, 284.0, 290.0, 292.0, 291.0, 291.0, 291.0, 288.0, 260.0, 265.0, 288.0, 291.0, 258.0, 264.0, 304.0, 323.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6995297820196078, "mean_inference_ms": 1.2504793126314804, "mean_action_processing_ms": 0.13403454747663798, "mean_env_wait_ms": 0.8419635373672074, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 322.0, "episode_reward_mean": 568.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 153.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 284.235}, "hist_stats": {"episode_reward": [573.0, 633.0, 579.0, 525.0, 576.0, 525.0, 584.0, 530.0, 582.0, 525.0, 536.0, 582.0, 570.0, 582.0, 630.0, 579.0, 587.0, 587.0, 582.0, 582.0, 630.0, 576.0, 582.0, 573.0, 462.0, 582.0, 567.0, 567.0, 525.0, 573.0, 582.0, 322.0, 573.0, 582.0, 570.0, 582.0, 582.0, 579.0, 587.0, 582.0, 587.0, 573.0, 522.0, 579.0, 582.0, 411.0, 582.0, 582.0, 579.0, 582.0, 570.0, 579.0, 579.0, 482.0, 579.0, 524.0, 576.0, 579.0, 582.0, 582.0, 573.0, 570.0, 582.0, 639.0, 582.0, 573.0, 525.0, 573.0, 579.0, 579.0, 579.0, 582.0, 582.0, 582.0, 573.0, 576.0, 582.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 513.0, 573.0, 573.0, 582.0, 570.0, 522.0, 581.0, 582.0, 630.0, 567.0, 582.0, 582.0, 579.0, 525.0, 579.0, 522.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 284.0, 322.0, 311.0, 293.0, 286.0, 261.0, 264.0, 285.0, 291.0, 261.0, 264.0, 296.0, 288.0, 264.0, 266.0, 296.0, 286.0, 269.0, 256.0, 269.0, 267.0, 290.0, 292.0, 285.0, 285.0, 289.0, 293.0, 311.0, 319.0, 287.0, 292.0, 289.0, 298.0, 292.0, 295.0, 294.0, 288.0, 288.0, 294.0, 311.0, 319.0, 285.0, 291.0, 294.0, 288.0, 289.0, 284.0, 228.0, 234.0, 290.0, 292.0, 276.0, 291.0, 276.0, 291.0, 267.0, 258.0, 284.0, 289.0, 289.0, 293.0, 153.0, 169.0, 286.0, 287.0, 293.0, 289.0, 290.0, 280.0, 298.0, 284.0, 293.0, 289.0, 295.0, 284.0, 289.0, 298.0, 292.0, 290.0, 294.0, 293.0, 288.0, 285.0, 262.0, 260.0, 287.0, 292.0, 291.0, 291.0, 206.0, 205.0, 291.0, 291.0, 292.0, 290.0, 283.0, 296.0, 299.0, 283.0, 281.0, 289.0, 289.0, 290.0, 286.0, 293.0, 234.0, 248.0, 286.0, 293.0, 269.0, 255.0, 291.0, 285.0, 287.0, 292.0, 295.0, 287.0, 297.0, 285.0, 287.0, 286.0, 277.0, 293.0, 288.0, 294.0, 323.0, 316.0, 293.0, 289.0, 287.0, 286.0, 258.0, 267.0, 283.0, 290.0, 286.0, 293.0, 290.0, 289.0, 286.0, 293.0, 288.0, 294.0, 287.0, 295.0, 280.0, 302.0, 285.0, 288.0, 291.0, 285.0, 286.0, 296.0, 292.0, 290.0, 297.0, 285.0, 285.0, 297.0, 294.0, 288.0, 289.0, 293.0, 282.0, 297.0, 250.0, 263.0, 294.0, 279.0, 287.0, 286.0, 290.0, 292.0, 279.0, 291.0, 258.0, 264.0, 284.0, 297.0, 291.0, 291.0, 314.0, 316.0, 283.0, 284.0, 290.0, 292.0, 291.0, 291.0, 291.0, 288.0, 260.0, 265.0, 288.0, 291.0, 258.0, 264.0, 304.0, 323.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6995297820196078, "mean_inference_ms": 1.2504793126314804, "mean_action_processing_ms": 0.13403454747663798, "mean_env_wait_ms": 0.8419635373672074, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8832000, "num_agent_steps_trained": 8832000, "num_env_steps_sampled": 4416000, "num_env_steps_trained": 4416000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4416000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8832000, "timers": {"training_iteration_time_ms": 3645.109, "learn_time_ms": 1147.935, "learn_throughput": 11150.453, "synch_weights_time_ms": 10.013}, "counters": {"num_env_steps_sampled": 4416000, "num_env_steps_trained": 4416000, "num_agent_steps_sampled": 8832000, "num_agent_steps_trained": 8832000}, "done": false, "episodes_total": 11040, "training_iteration": 345, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-53", "timestamp": 1666581773, "time_this_iter_s": 3.703167676925659, "time_total_s": 1322.3975999355316, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1322.3975999355316, "timesteps_since_restore": 0, "iterations_since_restore": 345, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.683333333333334, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.98, "shaped_reward_min": 131, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.8, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 16.67, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.7, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.5, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.41, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.39, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.41, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.39, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.41, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.39, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0014021097449585795, "policy_loss": -0.0017652350943535566, "vf_loss": 7.849297523498535, "vf_explained_var": 0.5106943845748901, "kl": 0.0021750519517809153, "entropy": 0.8436084985733032, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4428800, "num_env_steps_trained": 4428800, "num_agent_steps_sampled": 8857600, "num_agent_steps_trained": 8857600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 411.0, "episode_reward_mean": 569.58, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 205.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 284.79}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.98, "shaped_reward_min": 131, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.8, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 16.67, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.7, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.5, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.41, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.39, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.41, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.39, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.41, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.39, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 582.0, 570.0, 582.0, 582.0, 579.0, 587.0, 582.0, 587.0, 573.0, 522.0, 579.0, 582.0, 411.0, 582.0, 582.0, 579.0, 582.0, 570.0, 579.0, 579.0, 482.0, 579.0, 524.0, 576.0, 579.0, 582.0, 582.0, 573.0, 570.0, 582.0, 639.0, 582.0, 573.0, 525.0, 573.0, 579.0, 579.0, 579.0, 582.0, 582.0, 582.0, 573.0, 576.0, 582.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 513.0, 573.0, 573.0, 582.0, 570.0, 522.0, 581.0, 582.0, 630.0, 567.0, 582.0, 582.0, 579.0, 525.0, 579.0, 522.0, 627.0, 479.0, 573.0, 567.0, 516.0, 516.0, 587.0, 579.0, 582.0, 576.0, 587.0, 584.0, 525.0, 573.0, 576.0, 579.0, 573.0, 579.0, 468.0, 573.0, 579.0, 582.0, 576.0, 576.0, 576.0, 579.0, 582.0, 567.0, 633.0, 582.0, 582.0, 525.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 287.0, 293.0, 289.0, 290.0, 280.0, 298.0, 284.0, 293.0, 289.0, 295.0, 284.0, 289.0, 298.0, 292.0, 290.0, 294.0, 293.0, 288.0, 285.0, 262.0, 260.0, 287.0, 292.0, 291.0, 291.0, 206.0, 205.0, 291.0, 291.0, 292.0, 290.0, 283.0, 296.0, 299.0, 283.0, 281.0, 289.0, 289.0, 290.0, 286.0, 293.0, 234.0, 248.0, 286.0, 293.0, 269.0, 255.0, 291.0, 285.0, 287.0, 292.0, 295.0, 287.0, 297.0, 285.0, 287.0, 286.0, 277.0, 293.0, 288.0, 294.0, 323.0, 316.0, 293.0, 289.0, 287.0, 286.0, 258.0, 267.0, 283.0, 290.0, 286.0, 293.0, 290.0, 289.0, 286.0, 293.0, 288.0, 294.0, 287.0, 295.0, 280.0, 302.0, 285.0, 288.0, 291.0, 285.0, 286.0, 296.0, 292.0, 290.0, 297.0, 285.0, 285.0, 297.0, 294.0, 288.0, 289.0, 293.0, 282.0, 297.0, 250.0, 263.0, 294.0, 279.0, 287.0, 286.0, 290.0, 292.0, 279.0, 291.0, 258.0, 264.0, 284.0, 297.0, 291.0, 291.0, 314.0, 316.0, 283.0, 284.0, 290.0, 292.0, 291.0, 291.0, 291.0, 288.0, 260.0, 265.0, 288.0, 291.0, 258.0, 264.0, 304.0, 323.0, 233.0, 246.0, 282.0, 291.0, 278.0, 289.0, 263.0, 253.0, 261.0, 255.0, 294.0, 293.0, 289.0, 290.0, 293.0, 289.0, 291.0, 285.0, 291.0, 296.0, 287.0, 297.0, 258.0, 267.0, 280.0, 293.0, 284.0, 292.0, 282.0, 297.0, 289.0, 284.0, 288.0, 291.0, 235.0, 233.0, 287.0, 286.0, 290.0, 289.0, 291.0, 291.0, 296.0, 280.0, 292.0, 284.0, 286.0, 290.0, 287.0, 292.0, 288.0, 294.0, 279.0, 288.0, 315.0, 318.0, 282.0, 300.0, 286.0, 296.0, 259.0, 266.0, 277.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6994271831971862, "mean_inference_ms": 1.250277241170522, "mean_action_processing_ms": 0.13402499118015013, "mean_env_wait_ms": 0.8418477165944581, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 411.0, "episode_reward_mean": 569.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 205.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 284.79}, "hist_stats": {"episode_reward": [573.0, 582.0, 570.0, 582.0, 582.0, 579.0, 587.0, 582.0, 587.0, 573.0, 522.0, 579.0, 582.0, 411.0, 582.0, 582.0, 579.0, 582.0, 570.0, 579.0, 579.0, 482.0, 579.0, 524.0, 576.0, 579.0, 582.0, 582.0, 573.0, 570.0, 582.0, 639.0, 582.0, 573.0, 525.0, 573.0, 579.0, 579.0, 579.0, 582.0, 582.0, 582.0, 573.0, 576.0, 582.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 513.0, 573.0, 573.0, 582.0, 570.0, 522.0, 581.0, 582.0, 630.0, 567.0, 582.0, 582.0, 579.0, 525.0, 579.0, 522.0, 627.0, 479.0, 573.0, 567.0, 516.0, 516.0, 587.0, 579.0, 582.0, 576.0, 587.0, 584.0, 525.0, 573.0, 576.0, 579.0, 573.0, 579.0, 468.0, 573.0, 579.0, 582.0, 576.0, 576.0, 576.0, 579.0, 582.0, 567.0, 633.0, 582.0, 582.0, 525.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 287.0, 293.0, 289.0, 290.0, 280.0, 298.0, 284.0, 293.0, 289.0, 295.0, 284.0, 289.0, 298.0, 292.0, 290.0, 294.0, 293.0, 288.0, 285.0, 262.0, 260.0, 287.0, 292.0, 291.0, 291.0, 206.0, 205.0, 291.0, 291.0, 292.0, 290.0, 283.0, 296.0, 299.0, 283.0, 281.0, 289.0, 289.0, 290.0, 286.0, 293.0, 234.0, 248.0, 286.0, 293.0, 269.0, 255.0, 291.0, 285.0, 287.0, 292.0, 295.0, 287.0, 297.0, 285.0, 287.0, 286.0, 277.0, 293.0, 288.0, 294.0, 323.0, 316.0, 293.0, 289.0, 287.0, 286.0, 258.0, 267.0, 283.0, 290.0, 286.0, 293.0, 290.0, 289.0, 286.0, 293.0, 288.0, 294.0, 287.0, 295.0, 280.0, 302.0, 285.0, 288.0, 291.0, 285.0, 286.0, 296.0, 292.0, 290.0, 297.0, 285.0, 285.0, 297.0, 294.0, 288.0, 289.0, 293.0, 282.0, 297.0, 250.0, 263.0, 294.0, 279.0, 287.0, 286.0, 290.0, 292.0, 279.0, 291.0, 258.0, 264.0, 284.0, 297.0, 291.0, 291.0, 314.0, 316.0, 283.0, 284.0, 290.0, 292.0, 291.0, 291.0, 291.0, 288.0, 260.0, 265.0, 288.0, 291.0, 258.0, 264.0, 304.0, 323.0, 233.0, 246.0, 282.0, 291.0, 278.0, 289.0, 263.0, 253.0, 261.0, 255.0, 294.0, 293.0, 289.0, 290.0, 293.0, 289.0, 291.0, 285.0, 291.0, 296.0, 287.0, 297.0, 258.0, 267.0, 280.0, 293.0, 284.0, 292.0, 282.0, 297.0, 289.0, 284.0, 288.0, 291.0, 235.0, 233.0, 287.0, 286.0, 290.0, 289.0, 291.0, 291.0, 296.0, 280.0, 292.0, 284.0, 286.0, 290.0, 287.0, 292.0, 288.0, 294.0, 279.0, 288.0, 315.0, 318.0, 282.0, 300.0, 286.0, 296.0, 259.0, 266.0, 277.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6994271831971862, "mean_inference_ms": 1.250277241170522, "mean_action_processing_ms": 0.13402499118015013, "mean_env_wait_ms": 0.8418477165944581, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8857600, "num_agent_steps_trained": 8857600, "num_env_steps_sampled": 4428800, "num_env_steps_trained": 4428800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4428800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8857600, "timers": {"training_iteration_time_ms": 3631.459, "learn_time_ms": 1144.888, "learn_throughput": 11180.137, "synch_weights_time_ms": 9.559}, "counters": {"num_env_steps_sampled": 4428800, "num_env_steps_trained": 4428800, "num_agent_steps_sampled": 8857600, "num_agent_steps_trained": 8857600}, "done": false, "episodes_total": 11072, "training_iteration": 346, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-57", "timestamp": 1666581777, "time_this_iter_s": 3.5644054412841797, "time_total_s": 1325.9620053768158, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1325.9620053768158, "timesteps_since_restore": 0, "iterations_since_restore": 346, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.18, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.31, "shaped_reward_min": 80, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.26, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 16.82, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.15, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.64, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.91, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.55, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.48, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.26, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.91, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.55, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.91, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.55, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.000530306831933558, "policy_loss": 0.00014949997421354055, "vf_loss": 8.010214805603027, "vf_explained_var": 0.5265098810195923, "kl": 0.0022951120045036077, "entropy": 0.8404301404953003, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4441600, "num_env_steps_trained": 4441600, "num_agent_steps_sampled": 8883200, "num_agent_steps_trained": 8883200}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 240.0, "episode_reward_mean": 565.91, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 282.955}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.31, "shaped_reward_min": 80, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.26, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 16.82, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.15, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.64, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.91, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.55, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.48, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.26, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.91, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.55, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.91, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.55, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 573.0, 525.0, 573.0, 579.0, 579.0, 579.0, 582.0, 582.0, 582.0, 573.0, 576.0, 582.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 513.0, 573.0, 573.0, 582.0, 570.0, 522.0, 581.0, 582.0, 630.0, 567.0, 582.0, 582.0, 579.0, 525.0, 579.0, 522.0, 627.0, 479.0, 573.0, 567.0, 516.0, 516.0, 587.0, 579.0, 582.0, 576.0, 587.0, 584.0, 525.0, 573.0, 576.0, 579.0, 573.0, 579.0, 468.0, 573.0, 579.0, 582.0, 576.0, 576.0, 576.0, 579.0, 582.0, 567.0, 633.0, 582.0, 582.0, 525.0, 570.0, 582.0, 582.0, 570.0, 582.0, 576.0, 576.0, 581.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 240.0, 573.0, 513.0, 408.0, 579.0, 573.0, 573.0, 576.0, 573.0, 525.0, 573.0, 524.0, 570.0, 570.0, 630.0, 576.0, 582.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 287.0, 286.0, 258.0, 267.0, 283.0, 290.0, 286.0, 293.0, 290.0, 289.0, 286.0, 293.0, 288.0, 294.0, 287.0, 295.0, 280.0, 302.0, 285.0, 288.0, 291.0, 285.0, 286.0, 296.0, 292.0, 290.0, 297.0, 285.0, 285.0, 297.0, 294.0, 288.0, 289.0, 293.0, 282.0, 297.0, 250.0, 263.0, 294.0, 279.0, 287.0, 286.0, 290.0, 292.0, 279.0, 291.0, 258.0, 264.0, 284.0, 297.0, 291.0, 291.0, 314.0, 316.0, 283.0, 284.0, 290.0, 292.0, 291.0, 291.0, 291.0, 288.0, 260.0, 265.0, 288.0, 291.0, 258.0, 264.0, 304.0, 323.0, 233.0, 246.0, 282.0, 291.0, 278.0, 289.0, 263.0, 253.0, 261.0, 255.0, 294.0, 293.0, 289.0, 290.0, 293.0, 289.0, 291.0, 285.0, 291.0, 296.0, 287.0, 297.0, 258.0, 267.0, 280.0, 293.0, 284.0, 292.0, 282.0, 297.0, 289.0, 284.0, 288.0, 291.0, 235.0, 233.0, 287.0, 286.0, 290.0, 289.0, 291.0, 291.0, 296.0, 280.0, 292.0, 284.0, 286.0, 290.0, 287.0, 292.0, 288.0, 294.0, 279.0, 288.0, 315.0, 318.0, 282.0, 300.0, 286.0, 296.0, 259.0, 266.0, 277.0, 293.0, 290.0, 292.0, 292.0, 290.0, 281.0, 289.0, 290.0, 292.0, 285.0, 291.0, 289.0, 287.0, 289.0, 292.0, 287.0, 292.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 299.0, 283.0, 295.0, 287.0, 297.0, 282.0, 117.0, 123.0, 283.0, 290.0, 259.0, 254.0, 198.0, 210.0, 283.0, 296.0, 295.0, 278.0, 292.0, 281.0, 291.0, 285.0, 276.0, 297.0, 262.0, 263.0, 294.0, 279.0, 255.0, 269.0, 293.0, 277.0, 287.0, 283.0, 318.0, 312.0, 289.0, 287.0, 288.0, 294.0, 281.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6993324647836038, "mean_inference_ms": 1.2500780366039015, "mean_action_processing_ms": 0.1340147881497094, "mean_env_wait_ms": 0.8417315597688259, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 240.0, "episode_reward_mean": 565.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 282.955}, "hist_stats": {"episode_reward": [582.0, 573.0, 525.0, 573.0, 579.0, 579.0, 579.0, 582.0, 582.0, 582.0, 573.0, 576.0, 582.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 513.0, 573.0, 573.0, 582.0, 570.0, 522.0, 581.0, 582.0, 630.0, 567.0, 582.0, 582.0, 579.0, 525.0, 579.0, 522.0, 627.0, 479.0, 573.0, 567.0, 516.0, 516.0, 587.0, 579.0, 582.0, 576.0, 587.0, 584.0, 525.0, 573.0, 576.0, 579.0, 573.0, 579.0, 468.0, 573.0, 579.0, 582.0, 576.0, 576.0, 576.0, 579.0, 582.0, 567.0, 633.0, 582.0, 582.0, 525.0, 570.0, 582.0, 582.0, 570.0, 582.0, 576.0, 576.0, 581.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 240.0, 573.0, 513.0, 408.0, 579.0, 573.0, 573.0, 576.0, 573.0, 525.0, 573.0, 524.0, 570.0, 570.0, 630.0, 576.0, 582.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 287.0, 286.0, 258.0, 267.0, 283.0, 290.0, 286.0, 293.0, 290.0, 289.0, 286.0, 293.0, 288.0, 294.0, 287.0, 295.0, 280.0, 302.0, 285.0, 288.0, 291.0, 285.0, 286.0, 296.0, 292.0, 290.0, 297.0, 285.0, 285.0, 297.0, 294.0, 288.0, 289.0, 293.0, 282.0, 297.0, 250.0, 263.0, 294.0, 279.0, 287.0, 286.0, 290.0, 292.0, 279.0, 291.0, 258.0, 264.0, 284.0, 297.0, 291.0, 291.0, 314.0, 316.0, 283.0, 284.0, 290.0, 292.0, 291.0, 291.0, 291.0, 288.0, 260.0, 265.0, 288.0, 291.0, 258.0, 264.0, 304.0, 323.0, 233.0, 246.0, 282.0, 291.0, 278.0, 289.0, 263.0, 253.0, 261.0, 255.0, 294.0, 293.0, 289.0, 290.0, 293.0, 289.0, 291.0, 285.0, 291.0, 296.0, 287.0, 297.0, 258.0, 267.0, 280.0, 293.0, 284.0, 292.0, 282.0, 297.0, 289.0, 284.0, 288.0, 291.0, 235.0, 233.0, 287.0, 286.0, 290.0, 289.0, 291.0, 291.0, 296.0, 280.0, 292.0, 284.0, 286.0, 290.0, 287.0, 292.0, 288.0, 294.0, 279.0, 288.0, 315.0, 318.0, 282.0, 300.0, 286.0, 296.0, 259.0, 266.0, 277.0, 293.0, 290.0, 292.0, 292.0, 290.0, 281.0, 289.0, 290.0, 292.0, 285.0, 291.0, 289.0, 287.0, 289.0, 292.0, 287.0, 292.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 299.0, 283.0, 295.0, 287.0, 297.0, 282.0, 117.0, 123.0, 283.0, 290.0, 259.0, 254.0, 198.0, 210.0, 283.0, 296.0, 295.0, 278.0, 292.0, 281.0, 291.0, 285.0, 276.0, 297.0, 262.0, 263.0, 294.0, 279.0, 255.0, 269.0, 293.0, 277.0, 287.0, 283.0, 318.0, 312.0, 289.0, 287.0, 288.0, 294.0, 281.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6993324647836038, "mean_inference_ms": 1.2500780366039015, "mean_action_processing_ms": 0.1340147881497094, "mean_env_wait_ms": 0.8417315597688259, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8883200, "num_agent_steps_trained": 8883200, "num_env_steps_sampled": 4441600, "num_env_steps_trained": 4441600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4441600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8883200, "timers": {"training_iteration_time_ms": 3628.325, "learn_time_ms": 1138.994, "learn_throughput": 11237.988, "synch_weights_time_ms": 9.363}, "counters": {"num_env_steps_sampled": 4441600, "num_env_steps_trained": 4441600, "num_agent_steps_sampled": 8883200, "num_agent_steps_trained": 8883200}, "done": false, "episodes_total": 11104, "training_iteration": 347, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-01", "timestamp": 1666581781, "time_this_iter_s": 3.624319553375244, "time_total_s": 1329.586324930191, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1329.586324930191, "timesteps_since_restore": 0, "iterations_since_restore": 347, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.660000000000004, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 172.49, "shaped_reward_min": 60, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.04, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 16.72, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.91, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.54, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.68, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.44, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.11, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.68, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.44, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.68, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.44, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008702042396180332, "policy_loss": 0.0005100345006212592, "vf_loss": 7.837711811065674, "vf_explained_var": 0.5423364639282227, "kl": 0.00229667779058218, "entropy": 0.8472009897232056, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4454400, "num_env_steps_trained": 4454400, "num_agent_steps_sampled": 8908800, "num_agent_steps_trained": 8908800}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 140.0, "episode_reward_mean": 558.89, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 66.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 279.445}, "custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 172.49, "shaped_reward_min": 60, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.04, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 16.72, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.91, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.54, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.68, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.44, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.11, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.68, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.44, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.68, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.44, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 579.0, 522.0, 627.0, 479.0, 573.0, 567.0, 516.0, 516.0, 587.0, 579.0, 582.0, 576.0, 587.0, 584.0, 525.0, 573.0, 576.0, 579.0, 573.0, 579.0, 468.0, 573.0, 579.0, 582.0, 576.0, 576.0, 576.0, 579.0, 582.0, 567.0, 633.0, 582.0, 582.0, 525.0, 570.0, 582.0, 582.0, 570.0, 582.0, 576.0, 576.0, 581.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 240.0, 573.0, 513.0, 408.0, 579.0, 573.0, 573.0, 576.0, 573.0, 525.0, 573.0, 524.0, 570.0, 570.0, 630.0, 576.0, 582.0, 570.0, 582.0, 582.0, 576.0, 579.0, 473.0, 573.0, 582.0, 576.0, 576.0, 582.0, 576.0, 582.0, 579.0, 630.0, 579.0, 573.0, 516.0, 630.0, 140.0, 576.0, 522.0, 530.0, 525.0, 582.0, 587.0, 582.0, 576.0, 630.0, 579.0, 582.0, 411.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 265.0, 288.0, 291.0, 258.0, 264.0, 304.0, 323.0, 233.0, 246.0, 282.0, 291.0, 278.0, 289.0, 263.0, 253.0, 261.0, 255.0, 294.0, 293.0, 289.0, 290.0, 293.0, 289.0, 291.0, 285.0, 291.0, 296.0, 287.0, 297.0, 258.0, 267.0, 280.0, 293.0, 284.0, 292.0, 282.0, 297.0, 289.0, 284.0, 288.0, 291.0, 235.0, 233.0, 287.0, 286.0, 290.0, 289.0, 291.0, 291.0, 296.0, 280.0, 292.0, 284.0, 286.0, 290.0, 287.0, 292.0, 288.0, 294.0, 279.0, 288.0, 315.0, 318.0, 282.0, 300.0, 286.0, 296.0, 259.0, 266.0, 277.0, 293.0, 290.0, 292.0, 292.0, 290.0, 281.0, 289.0, 290.0, 292.0, 285.0, 291.0, 289.0, 287.0, 289.0, 292.0, 287.0, 292.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 299.0, 283.0, 295.0, 287.0, 297.0, 282.0, 117.0, 123.0, 283.0, 290.0, 259.0, 254.0, 198.0, 210.0, 283.0, 296.0, 295.0, 278.0, 292.0, 281.0, 291.0, 285.0, 276.0, 297.0, 262.0, 263.0, 294.0, 279.0, 255.0, 269.0, 293.0, 277.0, 287.0, 283.0, 318.0, 312.0, 289.0, 287.0, 288.0, 294.0, 281.0, 289.0, 292.0, 290.0, 289.0, 293.0, 290.0, 286.0, 295.0, 284.0, 229.0, 244.0, 290.0, 283.0, 290.0, 292.0, 293.0, 283.0, 288.0, 288.0, 287.0, 295.0, 284.0, 292.0, 289.0, 293.0, 295.0, 284.0, 318.0, 312.0, 291.0, 288.0, 282.0, 291.0, 259.0, 257.0, 319.0, 311.0, 74.0, 66.0, 291.0, 285.0, 259.0, 263.0, 261.0, 269.0, 260.0, 265.0, 293.0, 289.0, 289.0, 298.0, 291.0, 291.0, 292.0, 284.0, 315.0, 315.0, 294.0, 285.0, 282.0, 300.0, 195.0, 216.0, 261.0, 261.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6992606177352377, "mean_inference_ms": 1.2499053635842985, "mean_action_processing_ms": 0.13400700343117392, "mean_env_wait_ms": 0.8416441949257816, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 140.0, "episode_reward_mean": 558.89, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 66.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 279.445}, "hist_stats": {"episode_reward": [525.0, 579.0, 522.0, 627.0, 479.0, 573.0, 567.0, 516.0, 516.0, 587.0, 579.0, 582.0, 576.0, 587.0, 584.0, 525.0, 573.0, 576.0, 579.0, 573.0, 579.0, 468.0, 573.0, 579.0, 582.0, 576.0, 576.0, 576.0, 579.0, 582.0, 567.0, 633.0, 582.0, 582.0, 525.0, 570.0, 582.0, 582.0, 570.0, 582.0, 576.0, 576.0, 581.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 240.0, 573.0, 513.0, 408.0, 579.0, 573.0, 573.0, 576.0, 573.0, 525.0, 573.0, 524.0, 570.0, 570.0, 630.0, 576.0, 582.0, 570.0, 582.0, 582.0, 576.0, 579.0, 473.0, 573.0, 582.0, 576.0, 576.0, 582.0, 576.0, 582.0, 579.0, 630.0, 579.0, 573.0, 516.0, 630.0, 140.0, 576.0, 522.0, 530.0, 525.0, 582.0, 587.0, 582.0, 576.0, 630.0, 579.0, 582.0, 411.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 265.0, 288.0, 291.0, 258.0, 264.0, 304.0, 323.0, 233.0, 246.0, 282.0, 291.0, 278.0, 289.0, 263.0, 253.0, 261.0, 255.0, 294.0, 293.0, 289.0, 290.0, 293.0, 289.0, 291.0, 285.0, 291.0, 296.0, 287.0, 297.0, 258.0, 267.0, 280.0, 293.0, 284.0, 292.0, 282.0, 297.0, 289.0, 284.0, 288.0, 291.0, 235.0, 233.0, 287.0, 286.0, 290.0, 289.0, 291.0, 291.0, 296.0, 280.0, 292.0, 284.0, 286.0, 290.0, 287.0, 292.0, 288.0, 294.0, 279.0, 288.0, 315.0, 318.0, 282.0, 300.0, 286.0, 296.0, 259.0, 266.0, 277.0, 293.0, 290.0, 292.0, 292.0, 290.0, 281.0, 289.0, 290.0, 292.0, 285.0, 291.0, 289.0, 287.0, 289.0, 292.0, 287.0, 292.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 299.0, 283.0, 295.0, 287.0, 297.0, 282.0, 117.0, 123.0, 283.0, 290.0, 259.0, 254.0, 198.0, 210.0, 283.0, 296.0, 295.0, 278.0, 292.0, 281.0, 291.0, 285.0, 276.0, 297.0, 262.0, 263.0, 294.0, 279.0, 255.0, 269.0, 293.0, 277.0, 287.0, 283.0, 318.0, 312.0, 289.0, 287.0, 288.0, 294.0, 281.0, 289.0, 292.0, 290.0, 289.0, 293.0, 290.0, 286.0, 295.0, 284.0, 229.0, 244.0, 290.0, 283.0, 290.0, 292.0, 293.0, 283.0, 288.0, 288.0, 287.0, 295.0, 284.0, 292.0, 289.0, 293.0, 295.0, 284.0, 318.0, 312.0, 291.0, 288.0, 282.0, 291.0, 259.0, 257.0, 319.0, 311.0, 74.0, 66.0, 291.0, 285.0, 259.0, 263.0, 261.0, 269.0, 260.0, 265.0, 293.0, 289.0, 289.0, 298.0, 291.0, 291.0, 292.0, 284.0, 315.0, 315.0, 294.0, 285.0, 282.0, 300.0, 195.0, 216.0, 261.0, 261.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6992606177352377, "mean_inference_ms": 1.2499053635842985, "mean_action_processing_ms": 0.13400700343117392, "mean_env_wait_ms": 0.8416441949257816, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8908800, "num_agent_steps_trained": 8908800, "num_env_steps_sampled": 4454400, "num_env_steps_trained": 4454400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4454400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8908800, "timers": {"training_iteration_time_ms": 3645.318, "learn_time_ms": 1148.275, "learn_throughput": 11147.157, "synch_weights_time_ms": 9.796}, "counters": {"num_env_steps_sampled": 4454400, "num_env_steps_trained": 4454400, "num_agent_steps_sampled": 8908800, "num_agent_steps_trained": 8908800}, "done": false, "episodes_total": 11136, "training_iteration": 348, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-04", "timestamp": 1666581784, "time_this_iter_s": 3.7834579944610596, "time_total_s": 1333.369782924652, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1333.369782924652, "timesteps_since_restore": 0, "iterations_since_restore": 348, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.950000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 172.42, "shaped_reward_min": 60, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.84, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.9, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.73, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.76, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.01, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.53, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.61, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.62, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.87, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.32, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.19, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.53, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.61, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.53, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.61, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001777943572960794, "policy_loss": -0.0021599321626126766, "vf_loss": 7.920291900634766, "vf_explained_var": 0.5149234533309937, "kl": 0.0022678468376398087, "entropy": 0.8200807571411133, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4467200, "num_env_steps_trained": 4467200, "num_agent_steps_sampled": 8934400, "num_agent_steps_trained": 8934400}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 140.0, "episode_reward_mean": 558.82, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 66.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 279.41}, "custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 172.42, "shaped_reward_min": 60, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.84, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.9, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.73, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.76, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.01, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.53, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.61, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.62, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.87, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.32, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.19, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.53, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.61, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.53, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.61, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 525.0, 570.0, 582.0, 582.0, 570.0, 582.0, 576.0, 576.0, 581.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 240.0, 573.0, 513.0, 408.0, 579.0, 573.0, 573.0, 576.0, 573.0, 525.0, 573.0, 524.0, 570.0, 570.0, 630.0, 576.0, 582.0, 570.0, 582.0, 582.0, 576.0, 579.0, 473.0, 573.0, 582.0, 576.0, 576.0, 582.0, 576.0, 582.0, 579.0, 630.0, 579.0, 573.0, 516.0, 630.0, 140.0, 576.0, 522.0, 530.0, 525.0, 582.0, 587.0, 582.0, 576.0, 630.0, 579.0, 582.0, 411.0, 522.0, 584.0, 582.0, 570.0, 579.0, 579.0, 579.0, 576.0, 627.0, 570.0, 573.0, 522.0, 576.0, 405.0, 530.0, 579.0, 587.0, 587.0, 582.0, 530.0, 630.0, 468.0, 576.0, 492.0, 576.0, 582.0, 582.0, 522.0, 582.0, 576.0, 573.0, 633.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 300.0, 286.0, 296.0, 259.0, 266.0, 277.0, 293.0, 290.0, 292.0, 292.0, 290.0, 281.0, 289.0, 290.0, 292.0, 285.0, 291.0, 289.0, 287.0, 289.0, 292.0, 287.0, 292.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 299.0, 283.0, 295.0, 287.0, 297.0, 282.0, 117.0, 123.0, 283.0, 290.0, 259.0, 254.0, 198.0, 210.0, 283.0, 296.0, 295.0, 278.0, 292.0, 281.0, 291.0, 285.0, 276.0, 297.0, 262.0, 263.0, 294.0, 279.0, 255.0, 269.0, 293.0, 277.0, 287.0, 283.0, 318.0, 312.0, 289.0, 287.0, 288.0, 294.0, 281.0, 289.0, 292.0, 290.0, 289.0, 293.0, 290.0, 286.0, 295.0, 284.0, 229.0, 244.0, 290.0, 283.0, 290.0, 292.0, 293.0, 283.0, 288.0, 288.0, 287.0, 295.0, 284.0, 292.0, 289.0, 293.0, 295.0, 284.0, 318.0, 312.0, 291.0, 288.0, 282.0, 291.0, 259.0, 257.0, 319.0, 311.0, 74.0, 66.0, 291.0, 285.0, 259.0, 263.0, 261.0, 269.0, 260.0, 265.0, 293.0, 289.0, 289.0, 298.0, 291.0, 291.0, 292.0, 284.0, 315.0, 315.0, 294.0, 285.0, 282.0, 300.0, 195.0, 216.0, 261.0, 261.0, 295.0, 289.0, 291.0, 291.0, 280.0, 290.0, 288.0, 291.0, 285.0, 294.0, 286.0, 293.0, 288.0, 288.0, 319.0, 308.0, 285.0, 285.0, 293.0, 280.0, 255.0, 267.0, 297.0, 279.0, 196.0, 209.0, 270.0, 260.0, 281.0, 298.0, 291.0, 296.0, 287.0, 300.0, 287.0, 295.0, 267.0, 263.0, 314.0, 316.0, 233.0, 235.0, 288.0, 288.0, 239.0, 253.0, 288.0, 288.0, 288.0, 294.0, 288.0, 294.0, 256.0, 266.0, 291.0, 291.0, 284.0, 292.0, 289.0, 284.0, 319.0, 314.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6992173718947344, "mean_inference_ms": 1.2497383539343545, "mean_action_processing_ms": 0.13400108868613897, "mean_env_wait_ms": 0.8415750013057658, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 140.0, "episode_reward_mean": 558.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 66.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 279.41}, "hist_stats": {"episode_reward": [582.0, 582.0, 525.0, 570.0, 582.0, 582.0, 570.0, 582.0, 576.0, 576.0, 581.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 240.0, 573.0, 513.0, 408.0, 579.0, 573.0, 573.0, 576.0, 573.0, 525.0, 573.0, 524.0, 570.0, 570.0, 630.0, 576.0, 582.0, 570.0, 582.0, 582.0, 576.0, 579.0, 473.0, 573.0, 582.0, 576.0, 576.0, 582.0, 576.0, 582.0, 579.0, 630.0, 579.0, 573.0, 516.0, 630.0, 140.0, 576.0, 522.0, 530.0, 525.0, 582.0, 587.0, 582.0, 576.0, 630.0, 579.0, 582.0, 411.0, 522.0, 584.0, 582.0, 570.0, 579.0, 579.0, 579.0, 576.0, 627.0, 570.0, 573.0, 522.0, 576.0, 405.0, 530.0, 579.0, 587.0, 587.0, 582.0, 530.0, 630.0, 468.0, 576.0, 492.0, 576.0, 582.0, 582.0, 522.0, 582.0, 576.0, 573.0, 633.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 300.0, 286.0, 296.0, 259.0, 266.0, 277.0, 293.0, 290.0, 292.0, 292.0, 290.0, 281.0, 289.0, 290.0, 292.0, 285.0, 291.0, 289.0, 287.0, 289.0, 292.0, 287.0, 292.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 299.0, 283.0, 295.0, 287.0, 297.0, 282.0, 117.0, 123.0, 283.0, 290.0, 259.0, 254.0, 198.0, 210.0, 283.0, 296.0, 295.0, 278.0, 292.0, 281.0, 291.0, 285.0, 276.0, 297.0, 262.0, 263.0, 294.0, 279.0, 255.0, 269.0, 293.0, 277.0, 287.0, 283.0, 318.0, 312.0, 289.0, 287.0, 288.0, 294.0, 281.0, 289.0, 292.0, 290.0, 289.0, 293.0, 290.0, 286.0, 295.0, 284.0, 229.0, 244.0, 290.0, 283.0, 290.0, 292.0, 293.0, 283.0, 288.0, 288.0, 287.0, 295.0, 284.0, 292.0, 289.0, 293.0, 295.0, 284.0, 318.0, 312.0, 291.0, 288.0, 282.0, 291.0, 259.0, 257.0, 319.0, 311.0, 74.0, 66.0, 291.0, 285.0, 259.0, 263.0, 261.0, 269.0, 260.0, 265.0, 293.0, 289.0, 289.0, 298.0, 291.0, 291.0, 292.0, 284.0, 315.0, 315.0, 294.0, 285.0, 282.0, 300.0, 195.0, 216.0, 261.0, 261.0, 295.0, 289.0, 291.0, 291.0, 280.0, 290.0, 288.0, 291.0, 285.0, 294.0, 286.0, 293.0, 288.0, 288.0, 319.0, 308.0, 285.0, 285.0, 293.0, 280.0, 255.0, 267.0, 297.0, 279.0, 196.0, 209.0, 270.0, 260.0, 281.0, 298.0, 291.0, 296.0, 287.0, 300.0, 287.0, 295.0, 267.0, 263.0, 314.0, 316.0, 233.0, 235.0, 288.0, 288.0, 239.0, 253.0, 288.0, 288.0, 288.0, 294.0, 288.0, 294.0, 256.0, 266.0, 291.0, 291.0, 284.0, 292.0, 289.0, 284.0, 319.0, 314.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6992173718947344, "mean_inference_ms": 1.2497383539343545, "mean_action_processing_ms": 0.13400108868613897, "mean_env_wait_ms": 0.8415750013057658, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8934400, "num_agent_steps_trained": 8934400, "num_env_steps_sampled": 4467200, "num_env_steps_trained": 4467200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4467200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8934400, "timers": {"training_iteration_time_ms": 3649.028, "learn_time_ms": 1153.769, "learn_throughput": 11094.076, "synch_weights_time_ms": 10.192}, "counters": {"num_env_steps_sampled": 4467200, "num_env_steps_trained": 4467200, "num_agent_steps_sampled": 8934400, "num_agent_steps_trained": 8934400}, "done": false, "episodes_total": 11168, "training_iteration": 349, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-08", "timestamp": 1666581788, "time_this_iter_s": 3.756636619567871, "time_total_s": 1337.12641954422, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1337.12641954422, "timesteps_since_restore": 0, "iterations_since_restore": 349, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.639999999999997, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 174.24, "shaped_reward_min": 60, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.49, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.62, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.37, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.47, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.14, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.34, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.15, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.06, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.84, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.14, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.34, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.14, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.34, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00298161618411541, "policy_loss": 0.0026111463084816933, "vf_loss": 7.9099225997924805, "vf_explained_var": 0.5091748833656311, "kl": 0.002554523292928934, "entropy": 0.8410444259643555, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4480000, "num_env_steps_trained": 4480000, "num_agent_steps_sampled": 8960000, "num_agent_steps_trained": 8960000}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 140.0, "episode_reward_mean": 563.84, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 66.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 281.92}, "custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 174.24, "shaped_reward_min": 60, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.49, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.62, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.37, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.47, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.14, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.34, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.15, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.06, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.84, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.14, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.34, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.14, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.34, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 576.0, 582.0, 570.0, 582.0, 582.0, 576.0, 579.0, 473.0, 573.0, 582.0, 576.0, 576.0, 582.0, 576.0, 582.0, 579.0, 630.0, 579.0, 573.0, 516.0, 630.0, 140.0, 576.0, 522.0, 530.0, 525.0, 582.0, 587.0, 582.0, 576.0, 630.0, 579.0, 582.0, 411.0, 522.0, 584.0, 582.0, 570.0, 579.0, 579.0, 579.0, 576.0, 627.0, 570.0, 573.0, 522.0, 576.0, 405.0, 530.0, 579.0, 587.0, 587.0, 582.0, 530.0, 630.0, 468.0, 576.0, 492.0, 576.0, 582.0, 582.0, 522.0, 582.0, 576.0, 573.0, 633.0, 579.0, 627.0, 525.0, 579.0, 584.0, 579.0, 573.0, 530.0, 530.0, 576.0, 576.0, 587.0, 579.0, 630.0, 582.0, 587.0, 576.0, 573.0, 525.0, 570.0, 579.0, 579.0, 570.0, 582.0, 522.0, 530.0, 579.0, 582.0, 579.0, 573.0, 579.0, 527.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [318.0, 312.0, 289.0, 287.0, 288.0, 294.0, 281.0, 289.0, 292.0, 290.0, 289.0, 293.0, 290.0, 286.0, 295.0, 284.0, 229.0, 244.0, 290.0, 283.0, 290.0, 292.0, 293.0, 283.0, 288.0, 288.0, 287.0, 295.0, 284.0, 292.0, 289.0, 293.0, 295.0, 284.0, 318.0, 312.0, 291.0, 288.0, 282.0, 291.0, 259.0, 257.0, 319.0, 311.0, 74.0, 66.0, 291.0, 285.0, 259.0, 263.0, 261.0, 269.0, 260.0, 265.0, 293.0, 289.0, 289.0, 298.0, 291.0, 291.0, 292.0, 284.0, 315.0, 315.0, 294.0, 285.0, 282.0, 300.0, 195.0, 216.0, 261.0, 261.0, 295.0, 289.0, 291.0, 291.0, 280.0, 290.0, 288.0, 291.0, 285.0, 294.0, 286.0, 293.0, 288.0, 288.0, 319.0, 308.0, 285.0, 285.0, 293.0, 280.0, 255.0, 267.0, 297.0, 279.0, 196.0, 209.0, 270.0, 260.0, 281.0, 298.0, 291.0, 296.0, 287.0, 300.0, 287.0, 295.0, 267.0, 263.0, 314.0, 316.0, 233.0, 235.0, 288.0, 288.0, 239.0, 253.0, 288.0, 288.0, 288.0, 294.0, 288.0, 294.0, 256.0, 266.0, 291.0, 291.0, 284.0, 292.0, 289.0, 284.0, 319.0, 314.0, 289.0, 290.0, 311.0, 316.0, 262.0, 263.0, 292.0, 287.0, 289.0, 295.0, 286.0, 293.0, 284.0, 289.0, 257.0, 273.0, 268.0, 262.0, 286.0, 290.0, 290.0, 286.0, 290.0, 297.0, 293.0, 286.0, 322.0, 308.0, 283.0, 299.0, 285.0, 302.0, 284.0, 292.0, 286.0, 287.0, 267.0, 258.0, 290.0, 280.0, 289.0, 290.0, 290.0, 289.0, 281.0, 289.0, 293.0, 289.0, 255.0, 267.0, 259.0, 271.0, 289.0, 290.0, 289.0, 293.0, 291.0, 288.0, 285.0, 288.0, 287.0, 292.0, 255.0, 272.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.699179377440831, "mean_inference_ms": 1.2495672904889525, "mean_action_processing_ms": 0.13399441691854486, "mean_env_wait_ms": 0.8414986864344643, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 140.0, "episode_reward_mean": 563.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 66.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 281.92}, "hist_stats": {"episode_reward": [630.0, 576.0, 582.0, 570.0, 582.0, 582.0, 576.0, 579.0, 473.0, 573.0, 582.0, 576.0, 576.0, 582.0, 576.0, 582.0, 579.0, 630.0, 579.0, 573.0, 516.0, 630.0, 140.0, 576.0, 522.0, 530.0, 525.0, 582.0, 587.0, 582.0, 576.0, 630.0, 579.0, 582.0, 411.0, 522.0, 584.0, 582.0, 570.0, 579.0, 579.0, 579.0, 576.0, 627.0, 570.0, 573.0, 522.0, 576.0, 405.0, 530.0, 579.0, 587.0, 587.0, 582.0, 530.0, 630.0, 468.0, 576.0, 492.0, 576.0, 582.0, 582.0, 522.0, 582.0, 576.0, 573.0, 633.0, 579.0, 627.0, 525.0, 579.0, 584.0, 579.0, 573.0, 530.0, 530.0, 576.0, 576.0, 587.0, 579.0, 630.0, 582.0, 587.0, 576.0, 573.0, 525.0, 570.0, 579.0, 579.0, 570.0, 582.0, 522.0, 530.0, 579.0, 582.0, 579.0, 573.0, 579.0, 527.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [318.0, 312.0, 289.0, 287.0, 288.0, 294.0, 281.0, 289.0, 292.0, 290.0, 289.0, 293.0, 290.0, 286.0, 295.0, 284.0, 229.0, 244.0, 290.0, 283.0, 290.0, 292.0, 293.0, 283.0, 288.0, 288.0, 287.0, 295.0, 284.0, 292.0, 289.0, 293.0, 295.0, 284.0, 318.0, 312.0, 291.0, 288.0, 282.0, 291.0, 259.0, 257.0, 319.0, 311.0, 74.0, 66.0, 291.0, 285.0, 259.0, 263.0, 261.0, 269.0, 260.0, 265.0, 293.0, 289.0, 289.0, 298.0, 291.0, 291.0, 292.0, 284.0, 315.0, 315.0, 294.0, 285.0, 282.0, 300.0, 195.0, 216.0, 261.0, 261.0, 295.0, 289.0, 291.0, 291.0, 280.0, 290.0, 288.0, 291.0, 285.0, 294.0, 286.0, 293.0, 288.0, 288.0, 319.0, 308.0, 285.0, 285.0, 293.0, 280.0, 255.0, 267.0, 297.0, 279.0, 196.0, 209.0, 270.0, 260.0, 281.0, 298.0, 291.0, 296.0, 287.0, 300.0, 287.0, 295.0, 267.0, 263.0, 314.0, 316.0, 233.0, 235.0, 288.0, 288.0, 239.0, 253.0, 288.0, 288.0, 288.0, 294.0, 288.0, 294.0, 256.0, 266.0, 291.0, 291.0, 284.0, 292.0, 289.0, 284.0, 319.0, 314.0, 289.0, 290.0, 311.0, 316.0, 262.0, 263.0, 292.0, 287.0, 289.0, 295.0, 286.0, 293.0, 284.0, 289.0, 257.0, 273.0, 268.0, 262.0, 286.0, 290.0, 290.0, 286.0, 290.0, 297.0, 293.0, 286.0, 322.0, 308.0, 283.0, 299.0, 285.0, 302.0, 284.0, 292.0, 286.0, 287.0, 267.0, 258.0, 290.0, 280.0, 289.0, 290.0, 290.0, 289.0, 281.0, 289.0, 293.0, 289.0, 255.0, 267.0, 259.0, 271.0, 289.0, 290.0, 289.0, 293.0, 291.0, 288.0, 285.0, 288.0, 287.0, 292.0, 255.0, 272.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.699179377440831, "mean_inference_ms": 1.2495672904889525, "mean_action_processing_ms": 0.13399441691854486, "mean_env_wait_ms": 0.8414986864344643, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8960000, "num_agent_steps_trained": 8960000, "num_env_steps_sampled": 4480000, "num_env_steps_trained": 4480000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4480000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8960000, "timers": {"training_iteration_time_ms": 3626.79, "learn_time_ms": 1150.532, "learn_throughput": 11125.291, "synch_weights_time_ms": 10.217}, "counters": {"num_env_steps_sampled": 4480000, "num_env_steps_trained": 4480000, "num_agent_steps_sampled": 8960000, "num_agent_steps_trained": 8960000}, "done": false, "episodes_total": 11200, "training_iteration": 350, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-12", "timestamp": 1666581792, "time_this_iter_s": 3.706865072250366, "time_total_s": 1340.8332846164703, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1340.8332846164703, "timesteps_since_restore": 0, "iterations_since_restore": 350, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.683333333333334, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 174.8, "shaped_reward_min": 125, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.32, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.9, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.17, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.75, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.64, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.49, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.82, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.64, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.64, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0001787699293345213, "policy_loss": -0.0005462196422740817, "vf_loss": 7.881190776824951, "vf_explained_var": 0.5432215929031372, "kl": 0.002550200093537569, "entropy": 0.8413398265838623, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4492800, "num_env_steps_trained": 4492800, "num_agent_steps_sampled": 8985600, "num_agent_steps_trained": 8985600}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 405.0, "episode_reward_mean": 566.0, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 195.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.0}, "custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 174.8, "shaped_reward_min": 125, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.32, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.9, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.17, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.75, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.64, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.49, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.82, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.64, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.64, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 411.0, 522.0, 584.0, 582.0, 570.0, 579.0, 579.0, 579.0, 576.0, 627.0, 570.0, 573.0, 522.0, 576.0, 405.0, 530.0, 579.0, 587.0, 587.0, 582.0, 530.0, 630.0, 468.0, 576.0, 492.0, 576.0, 582.0, 582.0, 522.0, 582.0, 576.0, 573.0, 633.0, 579.0, 627.0, 525.0, 579.0, 584.0, 579.0, 573.0, 530.0, 530.0, 576.0, 576.0, 587.0, 579.0, 630.0, 582.0, 587.0, 576.0, 573.0, 525.0, 570.0, 579.0, 579.0, 570.0, 582.0, 522.0, 530.0, 579.0, 582.0, 579.0, 573.0, 579.0, 527.0, 579.0, 573.0, 627.0, 587.0, 582.0, 582.0, 579.0, 525.0, 530.0, 570.0, 582.0, 525.0, 570.0, 527.0, 582.0, 579.0, 582.0, 587.0, 513.0, 582.0, 582.0, 576.0, 582.0, 582.0, 570.0, 522.0, 582.0, 522.0, 630.0, 576.0, 522.0, 573.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 285.0, 282.0, 300.0, 195.0, 216.0, 261.0, 261.0, 295.0, 289.0, 291.0, 291.0, 280.0, 290.0, 288.0, 291.0, 285.0, 294.0, 286.0, 293.0, 288.0, 288.0, 319.0, 308.0, 285.0, 285.0, 293.0, 280.0, 255.0, 267.0, 297.0, 279.0, 196.0, 209.0, 270.0, 260.0, 281.0, 298.0, 291.0, 296.0, 287.0, 300.0, 287.0, 295.0, 267.0, 263.0, 314.0, 316.0, 233.0, 235.0, 288.0, 288.0, 239.0, 253.0, 288.0, 288.0, 288.0, 294.0, 288.0, 294.0, 256.0, 266.0, 291.0, 291.0, 284.0, 292.0, 289.0, 284.0, 319.0, 314.0, 289.0, 290.0, 311.0, 316.0, 262.0, 263.0, 292.0, 287.0, 289.0, 295.0, 286.0, 293.0, 284.0, 289.0, 257.0, 273.0, 268.0, 262.0, 286.0, 290.0, 290.0, 286.0, 290.0, 297.0, 293.0, 286.0, 322.0, 308.0, 283.0, 299.0, 285.0, 302.0, 284.0, 292.0, 286.0, 287.0, 267.0, 258.0, 290.0, 280.0, 289.0, 290.0, 290.0, 289.0, 281.0, 289.0, 293.0, 289.0, 255.0, 267.0, 259.0, 271.0, 289.0, 290.0, 289.0, 293.0, 291.0, 288.0, 285.0, 288.0, 287.0, 292.0, 255.0, 272.0, 289.0, 290.0, 281.0, 292.0, 311.0, 316.0, 287.0, 300.0, 297.0, 285.0, 289.0, 293.0, 289.0, 290.0, 270.0, 255.0, 261.0, 269.0, 281.0, 289.0, 289.0, 293.0, 258.0, 267.0, 271.0, 299.0, 260.0, 267.0, 288.0, 294.0, 284.0, 295.0, 290.0, 292.0, 293.0, 294.0, 253.0, 260.0, 289.0, 293.0, 289.0, 293.0, 288.0, 288.0, 290.0, 292.0, 288.0, 294.0, 286.0, 284.0, 262.0, 260.0, 292.0, 290.0, 262.0, 260.0, 315.0, 315.0, 288.0, 288.0, 263.0, 259.0, 287.0, 286.0, 286.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6991359079758581, "mean_inference_ms": 1.2493866116169676, "mean_action_processing_ms": 0.13398713337582482, "mean_env_wait_ms": 0.8414112426488535, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 405.0, "episode_reward_mean": 566.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 195.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.0}, "hist_stats": {"episode_reward": [579.0, 582.0, 411.0, 522.0, 584.0, 582.0, 570.0, 579.0, 579.0, 579.0, 576.0, 627.0, 570.0, 573.0, 522.0, 576.0, 405.0, 530.0, 579.0, 587.0, 587.0, 582.0, 530.0, 630.0, 468.0, 576.0, 492.0, 576.0, 582.0, 582.0, 522.0, 582.0, 576.0, 573.0, 633.0, 579.0, 627.0, 525.0, 579.0, 584.0, 579.0, 573.0, 530.0, 530.0, 576.0, 576.0, 587.0, 579.0, 630.0, 582.0, 587.0, 576.0, 573.0, 525.0, 570.0, 579.0, 579.0, 570.0, 582.0, 522.0, 530.0, 579.0, 582.0, 579.0, 573.0, 579.0, 527.0, 579.0, 573.0, 627.0, 587.0, 582.0, 582.0, 579.0, 525.0, 530.0, 570.0, 582.0, 525.0, 570.0, 527.0, 582.0, 579.0, 582.0, 587.0, 513.0, 582.0, 582.0, 576.0, 582.0, 582.0, 570.0, 522.0, 582.0, 522.0, 630.0, 576.0, 522.0, 573.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 285.0, 282.0, 300.0, 195.0, 216.0, 261.0, 261.0, 295.0, 289.0, 291.0, 291.0, 280.0, 290.0, 288.0, 291.0, 285.0, 294.0, 286.0, 293.0, 288.0, 288.0, 319.0, 308.0, 285.0, 285.0, 293.0, 280.0, 255.0, 267.0, 297.0, 279.0, 196.0, 209.0, 270.0, 260.0, 281.0, 298.0, 291.0, 296.0, 287.0, 300.0, 287.0, 295.0, 267.0, 263.0, 314.0, 316.0, 233.0, 235.0, 288.0, 288.0, 239.0, 253.0, 288.0, 288.0, 288.0, 294.0, 288.0, 294.0, 256.0, 266.0, 291.0, 291.0, 284.0, 292.0, 289.0, 284.0, 319.0, 314.0, 289.0, 290.0, 311.0, 316.0, 262.0, 263.0, 292.0, 287.0, 289.0, 295.0, 286.0, 293.0, 284.0, 289.0, 257.0, 273.0, 268.0, 262.0, 286.0, 290.0, 290.0, 286.0, 290.0, 297.0, 293.0, 286.0, 322.0, 308.0, 283.0, 299.0, 285.0, 302.0, 284.0, 292.0, 286.0, 287.0, 267.0, 258.0, 290.0, 280.0, 289.0, 290.0, 290.0, 289.0, 281.0, 289.0, 293.0, 289.0, 255.0, 267.0, 259.0, 271.0, 289.0, 290.0, 289.0, 293.0, 291.0, 288.0, 285.0, 288.0, 287.0, 292.0, 255.0, 272.0, 289.0, 290.0, 281.0, 292.0, 311.0, 316.0, 287.0, 300.0, 297.0, 285.0, 289.0, 293.0, 289.0, 290.0, 270.0, 255.0, 261.0, 269.0, 281.0, 289.0, 289.0, 293.0, 258.0, 267.0, 271.0, 299.0, 260.0, 267.0, 288.0, 294.0, 284.0, 295.0, 290.0, 292.0, 293.0, 294.0, 253.0, 260.0, 289.0, 293.0, 289.0, 293.0, 288.0, 288.0, 290.0, 292.0, 288.0, 294.0, 286.0, 284.0, 262.0, 260.0, 292.0, 290.0, 262.0, 260.0, 315.0, 315.0, 288.0, 288.0, 263.0, 259.0, 287.0, 286.0, 286.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6991359079758581, "mean_inference_ms": 1.2493866116169676, "mean_action_processing_ms": 0.13398713337582482, "mean_env_wait_ms": 0.8414112426488535, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8985600, "num_agent_steps_trained": 8985600, "num_env_steps_sampled": 4492800, "num_env_steps_trained": 4492800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4492800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8985600, "timers": {"training_iteration_time_ms": 3629.976, "learn_time_ms": 1157.394, "learn_throughput": 11059.331, "synch_weights_time_ms": 9.886}, "counters": {"num_env_steps_sampled": 4492800, "num_env_steps_trained": 4492800, "num_agent_steps_sampled": 8985600, "num_agent_steps_trained": 8985600}, "done": false, "episodes_total": 11232, "training_iteration": 351, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-16", "timestamp": 1666581796, "time_this_iter_s": 3.7251431941986084, "time_total_s": 1344.558427810669, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1344.558427810669, "timesteps_since_restore": 0, "iterations_since_restore": 351, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.6, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.56, "shaped_reward_min": 145, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.2, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.02, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.94, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.75, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.87, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.53, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.05, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.22, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.19, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.75, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.87, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.75, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.87, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0004346300265751779, "policy_loss": -0.000794908672105521, "vf_loss": 7.800143241882324, "vf_explained_var": 0.5466172695159912, "kl": 0.002108823275193572, "entropy": 0.8394697904586792, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4505600, "num_env_steps_trained": 4505600, "num_agent_steps_sampled": 9011200, "num_agent_steps_trained": 9011200}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 465.0, "episode_reward_mean": 567.96, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.98}, "custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.56, "shaped_reward_min": 145, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.2, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.02, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.94, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.75, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.87, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.53, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.05, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.22, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.19, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.75, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.87, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.75, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.87, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 573.0, 633.0, 579.0, 627.0, 525.0, 579.0, 584.0, 579.0, 573.0, 530.0, 530.0, 576.0, 576.0, 587.0, 579.0, 630.0, 582.0, 587.0, 576.0, 573.0, 525.0, 570.0, 579.0, 579.0, 570.0, 582.0, 522.0, 530.0, 579.0, 582.0, 579.0, 573.0, 579.0, 527.0, 579.0, 573.0, 627.0, 587.0, 582.0, 582.0, 579.0, 525.0, 530.0, 570.0, 582.0, 525.0, 570.0, 527.0, 582.0, 579.0, 582.0, 587.0, 513.0, 582.0, 582.0, 576.0, 582.0, 582.0, 570.0, 522.0, 582.0, 522.0, 630.0, 576.0, 522.0, 573.0, 567.0, 582.0, 579.0, 573.0, 522.0, 579.0, 530.0, 570.0, 582.0, 581.0, 630.0, 584.0, 530.0, 582.0, 582.0, 579.0, 582.0, 576.0, 570.0, 513.0, 573.0, 576.0, 579.0, 579.0, 530.0, 573.0, 530.0, 530.0, 582.0, 465.0, 525.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 292.0, 289.0, 284.0, 319.0, 314.0, 289.0, 290.0, 311.0, 316.0, 262.0, 263.0, 292.0, 287.0, 289.0, 295.0, 286.0, 293.0, 284.0, 289.0, 257.0, 273.0, 268.0, 262.0, 286.0, 290.0, 290.0, 286.0, 290.0, 297.0, 293.0, 286.0, 322.0, 308.0, 283.0, 299.0, 285.0, 302.0, 284.0, 292.0, 286.0, 287.0, 267.0, 258.0, 290.0, 280.0, 289.0, 290.0, 290.0, 289.0, 281.0, 289.0, 293.0, 289.0, 255.0, 267.0, 259.0, 271.0, 289.0, 290.0, 289.0, 293.0, 291.0, 288.0, 285.0, 288.0, 287.0, 292.0, 255.0, 272.0, 289.0, 290.0, 281.0, 292.0, 311.0, 316.0, 287.0, 300.0, 297.0, 285.0, 289.0, 293.0, 289.0, 290.0, 270.0, 255.0, 261.0, 269.0, 281.0, 289.0, 289.0, 293.0, 258.0, 267.0, 271.0, 299.0, 260.0, 267.0, 288.0, 294.0, 284.0, 295.0, 290.0, 292.0, 293.0, 294.0, 253.0, 260.0, 289.0, 293.0, 289.0, 293.0, 288.0, 288.0, 290.0, 292.0, 288.0, 294.0, 286.0, 284.0, 262.0, 260.0, 292.0, 290.0, 262.0, 260.0, 315.0, 315.0, 288.0, 288.0, 263.0, 259.0, 287.0, 286.0, 286.0, 281.0, 287.0, 295.0, 288.0, 291.0, 282.0, 291.0, 262.0, 260.0, 289.0, 290.0, 256.0, 274.0, 279.0, 291.0, 282.0, 300.0, 293.0, 288.0, 315.0, 315.0, 289.0, 295.0, 256.0, 274.0, 293.0, 289.0, 292.0, 290.0, 291.0, 288.0, 284.0, 298.0, 290.0, 286.0, 280.0, 290.0, 258.0, 255.0, 281.0, 292.0, 289.0, 287.0, 287.0, 292.0, 290.0, 289.0, 272.0, 258.0, 283.0, 290.0, 272.0, 258.0, 261.0, 269.0, 293.0, 289.0, 234.0, 231.0, 262.0, 263.0, 285.0, 288.0, 287.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6990793068100484, "mean_inference_ms": 1.249211086551166, "mean_action_processing_ms": 0.13397983546694048, "mean_env_wait_ms": 0.8413220532129634, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 465.0, "episode_reward_mean": 567.96, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.98}, "hist_stats": {"episode_reward": [576.0, 573.0, 633.0, 579.0, 627.0, 525.0, 579.0, 584.0, 579.0, 573.0, 530.0, 530.0, 576.0, 576.0, 587.0, 579.0, 630.0, 582.0, 587.0, 576.0, 573.0, 525.0, 570.0, 579.0, 579.0, 570.0, 582.0, 522.0, 530.0, 579.0, 582.0, 579.0, 573.0, 579.0, 527.0, 579.0, 573.0, 627.0, 587.0, 582.0, 582.0, 579.0, 525.0, 530.0, 570.0, 582.0, 525.0, 570.0, 527.0, 582.0, 579.0, 582.0, 587.0, 513.0, 582.0, 582.0, 576.0, 582.0, 582.0, 570.0, 522.0, 582.0, 522.0, 630.0, 576.0, 522.0, 573.0, 567.0, 582.0, 579.0, 573.0, 522.0, 579.0, 530.0, 570.0, 582.0, 581.0, 630.0, 584.0, 530.0, 582.0, 582.0, 579.0, 582.0, 576.0, 570.0, 513.0, 573.0, 576.0, 579.0, 579.0, 530.0, 573.0, 530.0, 530.0, 582.0, 465.0, 525.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 292.0, 289.0, 284.0, 319.0, 314.0, 289.0, 290.0, 311.0, 316.0, 262.0, 263.0, 292.0, 287.0, 289.0, 295.0, 286.0, 293.0, 284.0, 289.0, 257.0, 273.0, 268.0, 262.0, 286.0, 290.0, 290.0, 286.0, 290.0, 297.0, 293.0, 286.0, 322.0, 308.0, 283.0, 299.0, 285.0, 302.0, 284.0, 292.0, 286.0, 287.0, 267.0, 258.0, 290.0, 280.0, 289.0, 290.0, 290.0, 289.0, 281.0, 289.0, 293.0, 289.0, 255.0, 267.0, 259.0, 271.0, 289.0, 290.0, 289.0, 293.0, 291.0, 288.0, 285.0, 288.0, 287.0, 292.0, 255.0, 272.0, 289.0, 290.0, 281.0, 292.0, 311.0, 316.0, 287.0, 300.0, 297.0, 285.0, 289.0, 293.0, 289.0, 290.0, 270.0, 255.0, 261.0, 269.0, 281.0, 289.0, 289.0, 293.0, 258.0, 267.0, 271.0, 299.0, 260.0, 267.0, 288.0, 294.0, 284.0, 295.0, 290.0, 292.0, 293.0, 294.0, 253.0, 260.0, 289.0, 293.0, 289.0, 293.0, 288.0, 288.0, 290.0, 292.0, 288.0, 294.0, 286.0, 284.0, 262.0, 260.0, 292.0, 290.0, 262.0, 260.0, 315.0, 315.0, 288.0, 288.0, 263.0, 259.0, 287.0, 286.0, 286.0, 281.0, 287.0, 295.0, 288.0, 291.0, 282.0, 291.0, 262.0, 260.0, 289.0, 290.0, 256.0, 274.0, 279.0, 291.0, 282.0, 300.0, 293.0, 288.0, 315.0, 315.0, 289.0, 295.0, 256.0, 274.0, 293.0, 289.0, 292.0, 290.0, 291.0, 288.0, 284.0, 298.0, 290.0, 286.0, 280.0, 290.0, 258.0, 255.0, 281.0, 292.0, 289.0, 287.0, 287.0, 292.0, 290.0, 289.0, 272.0, 258.0, 283.0, 290.0, 272.0, 258.0, 261.0, 269.0, 293.0, 289.0, 234.0, 231.0, 262.0, 263.0, 285.0, 288.0, 287.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6990793068100484, "mean_inference_ms": 1.249211086551166, "mean_action_processing_ms": 0.13397983546694048, "mean_env_wait_ms": 0.8413220532129634, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9011200, "num_agent_steps_trained": 9011200, "num_env_steps_sampled": 4505600, "num_env_steps_trained": 4505600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4505600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9011200, "timers": {"training_iteration_time_ms": 3630.348, "learn_time_ms": 1158.032, "learn_throughput": 11053.234, "synch_weights_time_ms": 9.73}, "counters": {"num_env_steps_sampled": 4505600, "num_env_steps_trained": 4505600, "num_agent_steps_sampled": 9011200, "num_agent_steps_trained": 9011200}, "done": false, "episodes_total": 11264, "training_iteration": 352, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-20", "timestamp": 1666581800, "time_this_iter_s": 3.6939845085144043, "time_total_s": 1348.2524123191833, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1348.2524123191833, "timesteps_since_restore": 0, "iterations_since_restore": 352, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.933333333333334, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.89, "shaped_reward_min": 145, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.18, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.08, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.97, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.9, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.72, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.78, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.81, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.72, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.78, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.72, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.78, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005870356690138578, "policy_loss": 0.0002167444326914847, "vf_loss": 7.8559651374816895, "vf_explained_var": 0.5242763161659241, "kl": 0.002454460132867098, "entropy": 0.8306089639663696, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4518400, "num_env_steps_trained": 4518400, "num_agent_steps_sampled": 9036800, "num_agent_steps_trained": 9036800}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 465.0, "episode_reward_mean": 566.09, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.045}, "custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.89, "shaped_reward_min": 145, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.18, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.08, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.97, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.9, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.72, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.78, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.81, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.72, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.78, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.72, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.78, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 579.0, 527.0, 579.0, 573.0, 627.0, 587.0, 582.0, 582.0, 579.0, 525.0, 530.0, 570.0, 582.0, 525.0, 570.0, 527.0, 582.0, 579.0, 582.0, 587.0, 513.0, 582.0, 582.0, 576.0, 582.0, 582.0, 570.0, 522.0, 582.0, 522.0, 630.0, 576.0, 522.0, 573.0, 567.0, 582.0, 579.0, 573.0, 522.0, 579.0, 530.0, 570.0, 582.0, 581.0, 630.0, 584.0, 530.0, 582.0, 582.0, 579.0, 582.0, 576.0, 570.0, 513.0, 573.0, 576.0, 579.0, 579.0, 530.0, 573.0, 530.0, 530.0, 582.0, 465.0, 525.0, 573.0, 576.0, 582.0, 579.0, 522.0, 576.0, 582.0, 564.0, 579.0, 519.0, 579.0, 530.0, 570.0, 579.0, 582.0, 522.0, 587.0, 579.0, 530.0, 567.0, 627.0, 522.0, 533.0, 576.0, 579.0, 570.0, 627.0, 579.0, 582.0, 579.0, 630.0, 582.0, 582.0, 468.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 288.0, 287.0, 292.0, 255.0, 272.0, 289.0, 290.0, 281.0, 292.0, 311.0, 316.0, 287.0, 300.0, 297.0, 285.0, 289.0, 293.0, 289.0, 290.0, 270.0, 255.0, 261.0, 269.0, 281.0, 289.0, 289.0, 293.0, 258.0, 267.0, 271.0, 299.0, 260.0, 267.0, 288.0, 294.0, 284.0, 295.0, 290.0, 292.0, 293.0, 294.0, 253.0, 260.0, 289.0, 293.0, 289.0, 293.0, 288.0, 288.0, 290.0, 292.0, 288.0, 294.0, 286.0, 284.0, 262.0, 260.0, 292.0, 290.0, 262.0, 260.0, 315.0, 315.0, 288.0, 288.0, 263.0, 259.0, 287.0, 286.0, 286.0, 281.0, 287.0, 295.0, 288.0, 291.0, 282.0, 291.0, 262.0, 260.0, 289.0, 290.0, 256.0, 274.0, 279.0, 291.0, 282.0, 300.0, 293.0, 288.0, 315.0, 315.0, 289.0, 295.0, 256.0, 274.0, 293.0, 289.0, 292.0, 290.0, 291.0, 288.0, 284.0, 298.0, 290.0, 286.0, 280.0, 290.0, 258.0, 255.0, 281.0, 292.0, 289.0, 287.0, 287.0, 292.0, 290.0, 289.0, 272.0, 258.0, 283.0, 290.0, 272.0, 258.0, 261.0, 269.0, 293.0, 289.0, 234.0, 231.0, 262.0, 263.0, 285.0, 288.0, 287.0, 289.0, 290.0, 292.0, 285.0, 294.0, 261.0, 261.0, 284.0, 292.0, 288.0, 294.0, 287.0, 277.0, 287.0, 292.0, 253.0, 266.0, 292.0, 287.0, 259.0, 271.0, 281.0, 289.0, 286.0, 293.0, 289.0, 293.0, 258.0, 264.0, 288.0, 299.0, 279.0, 300.0, 260.0, 270.0, 291.0, 276.0, 312.0, 315.0, 264.0, 258.0, 269.0, 264.0, 279.0, 297.0, 287.0, 292.0, 280.0, 290.0, 321.0, 306.0, 288.0, 291.0, 291.0, 291.0, 276.0, 303.0, 318.0, 312.0, 288.0, 294.0, 285.0, 297.0, 234.0, 234.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6990295269652824, "mean_inference_ms": 1.2490525243340393, "mean_action_processing_ms": 0.13397436423398662, "mean_env_wait_ms": 0.8412535322114826, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 465.0, "episode_reward_mean": 566.09, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.045}, "hist_stats": {"episode_reward": [573.0, 579.0, 527.0, 579.0, 573.0, 627.0, 587.0, 582.0, 582.0, 579.0, 525.0, 530.0, 570.0, 582.0, 525.0, 570.0, 527.0, 582.0, 579.0, 582.0, 587.0, 513.0, 582.0, 582.0, 576.0, 582.0, 582.0, 570.0, 522.0, 582.0, 522.0, 630.0, 576.0, 522.0, 573.0, 567.0, 582.0, 579.0, 573.0, 522.0, 579.0, 530.0, 570.0, 582.0, 581.0, 630.0, 584.0, 530.0, 582.0, 582.0, 579.0, 582.0, 576.0, 570.0, 513.0, 573.0, 576.0, 579.0, 579.0, 530.0, 573.0, 530.0, 530.0, 582.0, 465.0, 525.0, 573.0, 576.0, 582.0, 579.0, 522.0, 576.0, 582.0, 564.0, 579.0, 519.0, 579.0, 530.0, 570.0, 579.0, 582.0, 522.0, 587.0, 579.0, 530.0, 567.0, 627.0, 522.0, 533.0, 576.0, 579.0, 570.0, 627.0, 579.0, 582.0, 579.0, 630.0, 582.0, 582.0, 468.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 288.0, 287.0, 292.0, 255.0, 272.0, 289.0, 290.0, 281.0, 292.0, 311.0, 316.0, 287.0, 300.0, 297.0, 285.0, 289.0, 293.0, 289.0, 290.0, 270.0, 255.0, 261.0, 269.0, 281.0, 289.0, 289.0, 293.0, 258.0, 267.0, 271.0, 299.0, 260.0, 267.0, 288.0, 294.0, 284.0, 295.0, 290.0, 292.0, 293.0, 294.0, 253.0, 260.0, 289.0, 293.0, 289.0, 293.0, 288.0, 288.0, 290.0, 292.0, 288.0, 294.0, 286.0, 284.0, 262.0, 260.0, 292.0, 290.0, 262.0, 260.0, 315.0, 315.0, 288.0, 288.0, 263.0, 259.0, 287.0, 286.0, 286.0, 281.0, 287.0, 295.0, 288.0, 291.0, 282.0, 291.0, 262.0, 260.0, 289.0, 290.0, 256.0, 274.0, 279.0, 291.0, 282.0, 300.0, 293.0, 288.0, 315.0, 315.0, 289.0, 295.0, 256.0, 274.0, 293.0, 289.0, 292.0, 290.0, 291.0, 288.0, 284.0, 298.0, 290.0, 286.0, 280.0, 290.0, 258.0, 255.0, 281.0, 292.0, 289.0, 287.0, 287.0, 292.0, 290.0, 289.0, 272.0, 258.0, 283.0, 290.0, 272.0, 258.0, 261.0, 269.0, 293.0, 289.0, 234.0, 231.0, 262.0, 263.0, 285.0, 288.0, 287.0, 289.0, 290.0, 292.0, 285.0, 294.0, 261.0, 261.0, 284.0, 292.0, 288.0, 294.0, 287.0, 277.0, 287.0, 292.0, 253.0, 266.0, 292.0, 287.0, 259.0, 271.0, 281.0, 289.0, 286.0, 293.0, 289.0, 293.0, 258.0, 264.0, 288.0, 299.0, 279.0, 300.0, 260.0, 270.0, 291.0, 276.0, 312.0, 315.0, 264.0, 258.0, 269.0, 264.0, 279.0, 297.0, 287.0, 292.0, 280.0, 290.0, 321.0, 306.0, 288.0, 291.0, 291.0, 291.0, 276.0, 303.0, 318.0, 312.0, 288.0, 294.0, 285.0, 297.0, 234.0, 234.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6990295269652824, "mean_inference_ms": 1.2490525243340393, "mean_action_processing_ms": 0.13397436423398662, "mean_env_wait_ms": 0.8412535322114826, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9036800, "num_agent_steps_trained": 9036800, "num_env_steps_sampled": 4518400, "num_env_steps_trained": 4518400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4518400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9036800, "timers": {"training_iteration_time_ms": 3627.527, "learn_time_ms": 1151.632, "learn_throughput": 11114.666, "synch_weights_time_ms": 9.744}, "counters": {"num_env_steps_sampled": 4518400, "num_env_steps_trained": 4518400, "num_agent_steps_sampled": 9036800, "num_agent_steps_trained": 9036800}, "done": false, "episodes_total": 11296, "training_iteration": 353, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-24", "timestamp": 1666581804, "time_this_iter_s": 3.699242353439331, "time_total_s": 1351.9516546726227, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1351.9516546726227, "timesteps_since_restore": 0, "iterations_since_restore": 353, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.000000000000004, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 194.6, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 174.01, "shaped_reward_min": 43, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.28, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.98, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.07, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.75, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.62, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.23, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.07, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.62, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.62, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0013683558208867908, "policy_loss": -0.0017354553565382957, "vf_loss": 7.806475639343262, "vf_explained_var": 0.561039924621582, "kl": 0.004653509706258774, "entropy": 0.8270936608314514, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4531200, "num_env_steps_trained": 4531200, "num_agent_steps_sampled": 9062400, "num_agent_steps_trained": 9062400}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 123.0, "episode_reward_mean": 563.21, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 281.605}, "custom_metrics": {"sparse_reward_mean": 194.6, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 174.01, "shaped_reward_min": 43, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.28, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.98, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.07, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.75, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.62, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.23, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.07, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.62, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.62, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 522.0, 573.0, 567.0, 582.0, 579.0, 573.0, 522.0, 579.0, 530.0, 570.0, 582.0, 581.0, 630.0, 584.0, 530.0, 582.0, 582.0, 579.0, 582.0, 576.0, 570.0, 513.0, 573.0, 576.0, 579.0, 579.0, 530.0, 573.0, 530.0, 530.0, 582.0, 465.0, 525.0, 573.0, 576.0, 582.0, 579.0, 522.0, 576.0, 582.0, 564.0, 579.0, 519.0, 579.0, 530.0, 570.0, 579.0, 582.0, 522.0, 587.0, 579.0, 530.0, 567.0, 627.0, 522.0, 533.0, 576.0, 579.0, 570.0, 627.0, 579.0, 582.0, 579.0, 630.0, 582.0, 582.0, 468.0, 582.0, 582.0, 576.0, 579.0, 582.0, 579.0, 576.0, 579.0, 582.0, 582.0, 522.0, 582.0, 519.0, 630.0, 582.0, 123.0, 459.0, 579.0, 579.0, 522.0, 579.0, 582.0, 576.0, 584.0, 630.0, 579.0, 630.0, 579.0, 519.0, 582.0, 582.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 288.0, 263.0, 259.0, 287.0, 286.0, 286.0, 281.0, 287.0, 295.0, 288.0, 291.0, 282.0, 291.0, 262.0, 260.0, 289.0, 290.0, 256.0, 274.0, 279.0, 291.0, 282.0, 300.0, 293.0, 288.0, 315.0, 315.0, 289.0, 295.0, 256.0, 274.0, 293.0, 289.0, 292.0, 290.0, 291.0, 288.0, 284.0, 298.0, 290.0, 286.0, 280.0, 290.0, 258.0, 255.0, 281.0, 292.0, 289.0, 287.0, 287.0, 292.0, 290.0, 289.0, 272.0, 258.0, 283.0, 290.0, 272.0, 258.0, 261.0, 269.0, 293.0, 289.0, 234.0, 231.0, 262.0, 263.0, 285.0, 288.0, 287.0, 289.0, 290.0, 292.0, 285.0, 294.0, 261.0, 261.0, 284.0, 292.0, 288.0, 294.0, 287.0, 277.0, 287.0, 292.0, 253.0, 266.0, 292.0, 287.0, 259.0, 271.0, 281.0, 289.0, 286.0, 293.0, 289.0, 293.0, 258.0, 264.0, 288.0, 299.0, 279.0, 300.0, 260.0, 270.0, 291.0, 276.0, 312.0, 315.0, 264.0, 258.0, 269.0, 264.0, 279.0, 297.0, 287.0, 292.0, 280.0, 290.0, 321.0, 306.0, 288.0, 291.0, 291.0, 291.0, 276.0, 303.0, 318.0, 312.0, 288.0, 294.0, 285.0, 297.0, 234.0, 234.0, 291.0, 291.0, 290.0, 292.0, 297.0, 279.0, 280.0, 299.0, 293.0, 289.0, 286.0, 293.0, 287.0, 289.0, 296.0, 283.0, 288.0, 294.0, 290.0, 292.0, 260.0, 262.0, 289.0, 293.0, 257.0, 262.0, 316.0, 314.0, 292.0, 290.0, 60.0, 63.0, 225.0, 234.0, 291.0, 288.0, 295.0, 284.0, 261.0, 261.0, 291.0, 288.0, 288.0, 294.0, 285.0, 291.0, 295.0, 289.0, 310.0, 320.0, 285.0, 294.0, 313.0, 317.0, 292.0, 287.0, 250.0, 269.0, 290.0, 292.0, 285.0, 297.0, 286.0, 298.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6989651337371378, "mean_inference_ms": 1.2489964337375246, "mean_action_processing_ms": 0.13396585439248668, "mean_env_wait_ms": 0.8412787324349185, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 123.0, "episode_reward_mean": 563.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 281.605}, "hist_stats": {"episode_reward": [576.0, 522.0, 573.0, 567.0, 582.0, 579.0, 573.0, 522.0, 579.0, 530.0, 570.0, 582.0, 581.0, 630.0, 584.0, 530.0, 582.0, 582.0, 579.0, 582.0, 576.0, 570.0, 513.0, 573.0, 576.0, 579.0, 579.0, 530.0, 573.0, 530.0, 530.0, 582.0, 465.0, 525.0, 573.0, 576.0, 582.0, 579.0, 522.0, 576.0, 582.0, 564.0, 579.0, 519.0, 579.0, 530.0, 570.0, 579.0, 582.0, 522.0, 587.0, 579.0, 530.0, 567.0, 627.0, 522.0, 533.0, 576.0, 579.0, 570.0, 627.0, 579.0, 582.0, 579.0, 630.0, 582.0, 582.0, 468.0, 582.0, 582.0, 576.0, 579.0, 582.0, 579.0, 576.0, 579.0, 582.0, 582.0, 522.0, 582.0, 519.0, 630.0, 582.0, 123.0, 459.0, 579.0, 579.0, 522.0, 579.0, 582.0, 576.0, 584.0, 630.0, 579.0, 630.0, 579.0, 519.0, 582.0, 582.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 288.0, 263.0, 259.0, 287.0, 286.0, 286.0, 281.0, 287.0, 295.0, 288.0, 291.0, 282.0, 291.0, 262.0, 260.0, 289.0, 290.0, 256.0, 274.0, 279.0, 291.0, 282.0, 300.0, 293.0, 288.0, 315.0, 315.0, 289.0, 295.0, 256.0, 274.0, 293.0, 289.0, 292.0, 290.0, 291.0, 288.0, 284.0, 298.0, 290.0, 286.0, 280.0, 290.0, 258.0, 255.0, 281.0, 292.0, 289.0, 287.0, 287.0, 292.0, 290.0, 289.0, 272.0, 258.0, 283.0, 290.0, 272.0, 258.0, 261.0, 269.0, 293.0, 289.0, 234.0, 231.0, 262.0, 263.0, 285.0, 288.0, 287.0, 289.0, 290.0, 292.0, 285.0, 294.0, 261.0, 261.0, 284.0, 292.0, 288.0, 294.0, 287.0, 277.0, 287.0, 292.0, 253.0, 266.0, 292.0, 287.0, 259.0, 271.0, 281.0, 289.0, 286.0, 293.0, 289.0, 293.0, 258.0, 264.0, 288.0, 299.0, 279.0, 300.0, 260.0, 270.0, 291.0, 276.0, 312.0, 315.0, 264.0, 258.0, 269.0, 264.0, 279.0, 297.0, 287.0, 292.0, 280.0, 290.0, 321.0, 306.0, 288.0, 291.0, 291.0, 291.0, 276.0, 303.0, 318.0, 312.0, 288.0, 294.0, 285.0, 297.0, 234.0, 234.0, 291.0, 291.0, 290.0, 292.0, 297.0, 279.0, 280.0, 299.0, 293.0, 289.0, 286.0, 293.0, 287.0, 289.0, 296.0, 283.0, 288.0, 294.0, 290.0, 292.0, 260.0, 262.0, 289.0, 293.0, 257.0, 262.0, 316.0, 314.0, 292.0, 290.0, 60.0, 63.0, 225.0, 234.0, 291.0, 288.0, 295.0, 284.0, 261.0, 261.0, 291.0, 288.0, 288.0, 294.0, 285.0, 291.0, 295.0, 289.0, 310.0, 320.0, 285.0, 294.0, 313.0, 317.0, 292.0, 287.0, 250.0, 269.0, 290.0, 292.0, 285.0, 297.0, 286.0, 298.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6989651337371378, "mean_inference_ms": 1.2489964337375246, "mean_action_processing_ms": 0.13396585439248668, "mean_env_wait_ms": 0.8412787324349185, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9062400, "num_agent_steps_trained": 9062400, "num_env_steps_sampled": 4531200, "num_env_steps_trained": 4531200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4531200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9062400, "timers": {"training_iteration_time_ms": 3660.774, "learn_time_ms": 1163.312, "learn_throughput": 11003.067, "synch_weights_time_ms": 9.759}, "counters": {"num_env_steps_sampled": 4531200, "num_env_steps_trained": 4531200, "num_agent_steps_sampled": 9062400, "num_agent_steps_trained": 9062400}, "done": false, "episodes_total": 11328, "training_iteration": 354, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-28", "timestamp": 1666581808, "time_this_iter_s": 3.9559929370880127, "time_total_s": 1355.9076476097107, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1355.9076476097107, "timesteps_since_restore": 0, "iterations_since_restore": 354, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.759999999999998, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 175.01, "shaped_reward_min": 43, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.94, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.5, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.76, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.29, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.55, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.1, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.53, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.55, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.1, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.55, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.1, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0020636606495827436, "policy_loss": -0.0024204510264098644, "vf_loss": 7.716197967529297, "vf_explained_var": 0.5518883466720581, "kl": 0.0022711479105055332, "entropy": 0.8296573162078857, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4544000, "num_env_steps_trained": 4544000, "num_agent_steps_sampled": 9088000, "num_agent_steps_trained": 9088000}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 123.0, "episode_reward_mean": 566.61, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.305}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 175.01, "shaped_reward_min": 43, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.94, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.5, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.76, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.29, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.55, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.1, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.53, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.55, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.1, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.55, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.1, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [465.0, 525.0, 573.0, 576.0, 582.0, 579.0, 522.0, 576.0, 582.0, 564.0, 579.0, 519.0, 579.0, 530.0, 570.0, 579.0, 582.0, 522.0, 587.0, 579.0, 530.0, 567.0, 627.0, 522.0, 533.0, 576.0, 579.0, 570.0, 627.0, 579.0, 582.0, 579.0, 630.0, 582.0, 582.0, 468.0, 582.0, 582.0, 576.0, 579.0, 582.0, 579.0, 576.0, 579.0, 582.0, 582.0, 522.0, 582.0, 519.0, 630.0, 582.0, 123.0, 459.0, 579.0, 579.0, 522.0, 579.0, 582.0, 576.0, 584.0, 630.0, 579.0, 630.0, 579.0, 519.0, 582.0, 582.0, 584.0, 579.0, 573.0, 582.0, 579.0, 576.0, 522.0, 582.0, 530.0, 576.0, 573.0, 573.0, 525.0, 582.0, 587.0, 579.0, 582.0, 579.0, 579.0, 587.0, 576.0, 579.0, 627.0, 630.0, 630.0, 582.0, 573.0, 579.0, 579.0, 579.0, 582.0, 570.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [234.0, 231.0, 262.0, 263.0, 285.0, 288.0, 287.0, 289.0, 290.0, 292.0, 285.0, 294.0, 261.0, 261.0, 284.0, 292.0, 288.0, 294.0, 287.0, 277.0, 287.0, 292.0, 253.0, 266.0, 292.0, 287.0, 259.0, 271.0, 281.0, 289.0, 286.0, 293.0, 289.0, 293.0, 258.0, 264.0, 288.0, 299.0, 279.0, 300.0, 260.0, 270.0, 291.0, 276.0, 312.0, 315.0, 264.0, 258.0, 269.0, 264.0, 279.0, 297.0, 287.0, 292.0, 280.0, 290.0, 321.0, 306.0, 288.0, 291.0, 291.0, 291.0, 276.0, 303.0, 318.0, 312.0, 288.0, 294.0, 285.0, 297.0, 234.0, 234.0, 291.0, 291.0, 290.0, 292.0, 297.0, 279.0, 280.0, 299.0, 293.0, 289.0, 286.0, 293.0, 287.0, 289.0, 296.0, 283.0, 288.0, 294.0, 290.0, 292.0, 260.0, 262.0, 289.0, 293.0, 257.0, 262.0, 316.0, 314.0, 292.0, 290.0, 60.0, 63.0, 225.0, 234.0, 291.0, 288.0, 295.0, 284.0, 261.0, 261.0, 291.0, 288.0, 288.0, 294.0, 285.0, 291.0, 295.0, 289.0, 310.0, 320.0, 285.0, 294.0, 313.0, 317.0, 292.0, 287.0, 250.0, 269.0, 290.0, 292.0, 285.0, 297.0, 286.0, 298.0, 291.0, 288.0, 290.0, 283.0, 288.0, 294.0, 293.0, 286.0, 285.0, 291.0, 255.0, 267.0, 292.0, 290.0, 261.0, 269.0, 289.0, 287.0, 283.0, 290.0, 284.0, 289.0, 259.0, 266.0, 280.0, 302.0, 288.0, 299.0, 284.0, 295.0, 295.0, 287.0, 290.0, 289.0, 290.0, 289.0, 286.0, 301.0, 287.0, 289.0, 289.0, 290.0, 308.0, 319.0, 312.0, 318.0, 314.0, 316.0, 288.0, 294.0, 287.0, 286.0, 284.0, 295.0, 287.0, 292.0, 296.0, 283.0, 289.0, 293.0, 285.0, 285.0, 266.0, 259.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6988876950800459, "mean_inference_ms": 1.248936325069807, "mean_action_processing_ms": 0.13395738623505635, "mean_env_wait_ms": 0.8413004695451081, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 123.0, "episode_reward_mean": 566.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.305}, "hist_stats": {"episode_reward": [465.0, 525.0, 573.0, 576.0, 582.0, 579.0, 522.0, 576.0, 582.0, 564.0, 579.0, 519.0, 579.0, 530.0, 570.0, 579.0, 582.0, 522.0, 587.0, 579.0, 530.0, 567.0, 627.0, 522.0, 533.0, 576.0, 579.0, 570.0, 627.0, 579.0, 582.0, 579.0, 630.0, 582.0, 582.0, 468.0, 582.0, 582.0, 576.0, 579.0, 582.0, 579.0, 576.0, 579.0, 582.0, 582.0, 522.0, 582.0, 519.0, 630.0, 582.0, 123.0, 459.0, 579.0, 579.0, 522.0, 579.0, 582.0, 576.0, 584.0, 630.0, 579.0, 630.0, 579.0, 519.0, 582.0, 582.0, 584.0, 579.0, 573.0, 582.0, 579.0, 576.0, 522.0, 582.0, 530.0, 576.0, 573.0, 573.0, 525.0, 582.0, 587.0, 579.0, 582.0, 579.0, 579.0, 587.0, 576.0, 579.0, 627.0, 630.0, 630.0, 582.0, 573.0, 579.0, 579.0, 579.0, 582.0, 570.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [234.0, 231.0, 262.0, 263.0, 285.0, 288.0, 287.0, 289.0, 290.0, 292.0, 285.0, 294.0, 261.0, 261.0, 284.0, 292.0, 288.0, 294.0, 287.0, 277.0, 287.0, 292.0, 253.0, 266.0, 292.0, 287.0, 259.0, 271.0, 281.0, 289.0, 286.0, 293.0, 289.0, 293.0, 258.0, 264.0, 288.0, 299.0, 279.0, 300.0, 260.0, 270.0, 291.0, 276.0, 312.0, 315.0, 264.0, 258.0, 269.0, 264.0, 279.0, 297.0, 287.0, 292.0, 280.0, 290.0, 321.0, 306.0, 288.0, 291.0, 291.0, 291.0, 276.0, 303.0, 318.0, 312.0, 288.0, 294.0, 285.0, 297.0, 234.0, 234.0, 291.0, 291.0, 290.0, 292.0, 297.0, 279.0, 280.0, 299.0, 293.0, 289.0, 286.0, 293.0, 287.0, 289.0, 296.0, 283.0, 288.0, 294.0, 290.0, 292.0, 260.0, 262.0, 289.0, 293.0, 257.0, 262.0, 316.0, 314.0, 292.0, 290.0, 60.0, 63.0, 225.0, 234.0, 291.0, 288.0, 295.0, 284.0, 261.0, 261.0, 291.0, 288.0, 288.0, 294.0, 285.0, 291.0, 295.0, 289.0, 310.0, 320.0, 285.0, 294.0, 313.0, 317.0, 292.0, 287.0, 250.0, 269.0, 290.0, 292.0, 285.0, 297.0, 286.0, 298.0, 291.0, 288.0, 290.0, 283.0, 288.0, 294.0, 293.0, 286.0, 285.0, 291.0, 255.0, 267.0, 292.0, 290.0, 261.0, 269.0, 289.0, 287.0, 283.0, 290.0, 284.0, 289.0, 259.0, 266.0, 280.0, 302.0, 288.0, 299.0, 284.0, 295.0, 295.0, 287.0, 290.0, 289.0, 290.0, 289.0, 286.0, 301.0, 287.0, 289.0, 289.0, 290.0, 308.0, 319.0, 312.0, 318.0, 314.0, 316.0, 288.0, 294.0, 287.0, 286.0, 284.0, 295.0, 287.0, 292.0, 296.0, 283.0, 289.0, 293.0, 285.0, 285.0, 266.0, 259.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6988876950800459, "mean_inference_ms": 1.248936325069807, "mean_action_processing_ms": 0.13395738623505635, "mean_env_wait_ms": 0.8413004695451081, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9088000, "num_agent_steps_trained": 9088000, "num_env_steps_sampled": 4544000, "num_env_steps_trained": 4544000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4544000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9088000, "timers": {"training_iteration_time_ms": 3655.033, "learn_time_ms": 1153.612, "learn_throughput": 11095.586, "synch_weights_time_ms": 9.777}, "counters": {"num_env_steps_sampled": 4544000, "num_env_steps_trained": 4544000, "num_agent_steps_sampled": 9088000, "num_agent_steps_trained": 9088000}, "done": false, "episodes_total": 11360, "training_iteration": 355, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-32", "timestamp": 1666581812, "time_this_iter_s": 3.6556270122528076, "time_total_s": 1359.5632746219635, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1359.5632746219635, "timesteps_since_restore": 0, "iterations_since_restore": 355, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.53333333333333, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 197.4, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 176.15, "shaped_reward_min": 43, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.84, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.81, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.71, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.51, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.33, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.33, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.51, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.33, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.51, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.33, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013188866432756186, "policy_loss": 0.0009702978422865272, "vf_loss": 7.692901134490967, "vf_explained_var": 0.5492017269134521, "kl": 0.0021686244290322065, "entropy": 0.841401219367981, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4556800, "num_env_steps_trained": 4556800, "num_agent_steps_sampled": 9113600, "num_agent_steps_trained": 9113600}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 123.0, "episode_reward_mean": 570.95, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 285.475}, "custom_metrics": {"sparse_reward_mean": 197.4, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 176.15, "shaped_reward_min": 43, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.84, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.81, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.71, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.51, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.33, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.33, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.51, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.33, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.51, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.33, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 582.0, 468.0, 582.0, 582.0, 576.0, 579.0, 582.0, 579.0, 576.0, 579.0, 582.0, 582.0, 522.0, 582.0, 519.0, 630.0, 582.0, 123.0, 459.0, 579.0, 579.0, 522.0, 579.0, 582.0, 576.0, 584.0, 630.0, 579.0, 630.0, 579.0, 519.0, 582.0, 582.0, 584.0, 579.0, 573.0, 582.0, 579.0, 576.0, 522.0, 582.0, 530.0, 576.0, 573.0, 573.0, 525.0, 582.0, 587.0, 579.0, 582.0, 579.0, 579.0, 587.0, 576.0, 579.0, 627.0, 630.0, 630.0, 582.0, 573.0, 579.0, 579.0, 579.0, 582.0, 570.0, 525.0, 584.0, 570.0, 576.0, 573.0, 576.0, 579.0, 576.0, 584.0, 582.0, 627.0, 582.0, 576.0, 630.0, 582.0, 579.0, 570.0, 522.0, 576.0, 582.0, 582.0, 525.0, 579.0, 576.0, 570.0, 579.0, 582.0, 573.0, 573.0, 525.0, 627.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [318.0, 312.0, 288.0, 294.0, 285.0, 297.0, 234.0, 234.0, 291.0, 291.0, 290.0, 292.0, 297.0, 279.0, 280.0, 299.0, 293.0, 289.0, 286.0, 293.0, 287.0, 289.0, 296.0, 283.0, 288.0, 294.0, 290.0, 292.0, 260.0, 262.0, 289.0, 293.0, 257.0, 262.0, 316.0, 314.0, 292.0, 290.0, 60.0, 63.0, 225.0, 234.0, 291.0, 288.0, 295.0, 284.0, 261.0, 261.0, 291.0, 288.0, 288.0, 294.0, 285.0, 291.0, 295.0, 289.0, 310.0, 320.0, 285.0, 294.0, 313.0, 317.0, 292.0, 287.0, 250.0, 269.0, 290.0, 292.0, 285.0, 297.0, 286.0, 298.0, 291.0, 288.0, 290.0, 283.0, 288.0, 294.0, 293.0, 286.0, 285.0, 291.0, 255.0, 267.0, 292.0, 290.0, 261.0, 269.0, 289.0, 287.0, 283.0, 290.0, 284.0, 289.0, 259.0, 266.0, 280.0, 302.0, 288.0, 299.0, 284.0, 295.0, 295.0, 287.0, 290.0, 289.0, 290.0, 289.0, 286.0, 301.0, 287.0, 289.0, 289.0, 290.0, 308.0, 319.0, 312.0, 318.0, 314.0, 316.0, 288.0, 294.0, 287.0, 286.0, 284.0, 295.0, 287.0, 292.0, 296.0, 283.0, 289.0, 293.0, 285.0, 285.0, 266.0, 259.0, 290.0, 294.0, 293.0, 277.0, 286.0, 290.0, 281.0, 292.0, 284.0, 292.0, 296.0, 283.0, 291.0, 285.0, 283.0, 301.0, 284.0, 298.0, 319.0, 308.0, 291.0, 291.0, 284.0, 292.0, 313.0, 317.0, 293.0, 289.0, 285.0, 294.0, 283.0, 287.0, 266.0, 256.0, 288.0, 288.0, 295.0, 287.0, 290.0, 292.0, 266.0, 259.0, 288.0, 291.0, 286.0, 290.0, 287.0, 283.0, 291.0, 288.0, 290.0, 292.0, 283.0, 290.0, 281.0, 292.0, 264.0, 261.0, 318.0, 309.0, 285.0, 297.0, 285.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.698832879688069, "mean_inference_ms": 1.248881800163415, "mean_action_processing_ms": 0.133950154641813, "mean_env_wait_ms": 0.8413277649657975, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 123.0, "episode_reward_mean": 570.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 285.475}, "hist_stats": {"episode_reward": [630.0, 582.0, 582.0, 468.0, 582.0, 582.0, 576.0, 579.0, 582.0, 579.0, 576.0, 579.0, 582.0, 582.0, 522.0, 582.0, 519.0, 630.0, 582.0, 123.0, 459.0, 579.0, 579.0, 522.0, 579.0, 582.0, 576.0, 584.0, 630.0, 579.0, 630.0, 579.0, 519.0, 582.0, 582.0, 584.0, 579.0, 573.0, 582.0, 579.0, 576.0, 522.0, 582.0, 530.0, 576.0, 573.0, 573.0, 525.0, 582.0, 587.0, 579.0, 582.0, 579.0, 579.0, 587.0, 576.0, 579.0, 627.0, 630.0, 630.0, 582.0, 573.0, 579.0, 579.0, 579.0, 582.0, 570.0, 525.0, 584.0, 570.0, 576.0, 573.0, 576.0, 579.0, 576.0, 584.0, 582.0, 627.0, 582.0, 576.0, 630.0, 582.0, 579.0, 570.0, 522.0, 576.0, 582.0, 582.0, 525.0, 579.0, 576.0, 570.0, 579.0, 582.0, 573.0, 573.0, 525.0, 627.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [318.0, 312.0, 288.0, 294.0, 285.0, 297.0, 234.0, 234.0, 291.0, 291.0, 290.0, 292.0, 297.0, 279.0, 280.0, 299.0, 293.0, 289.0, 286.0, 293.0, 287.0, 289.0, 296.0, 283.0, 288.0, 294.0, 290.0, 292.0, 260.0, 262.0, 289.0, 293.0, 257.0, 262.0, 316.0, 314.0, 292.0, 290.0, 60.0, 63.0, 225.0, 234.0, 291.0, 288.0, 295.0, 284.0, 261.0, 261.0, 291.0, 288.0, 288.0, 294.0, 285.0, 291.0, 295.0, 289.0, 310.0, 320.0, 285.0, 294.0, 313.0, 317.0, 292.0, 287.0, 250.0, 269.0, 290.0, 292.0, 285.0, 297.0, 286.0, 298.0, 291.0, 288.0, 290.0, 283.0, 288.0, 294.0, 293.0, 286.0, 285.0, 291.0, 255.0, 267.0, 292.0, 290.0, 261.0, 269.0, 289.0, 287.0, 283.0, 290.0, 284.0, 289.0, 259.0, 266.0, 280.0, 302.0, 288.0, 299.0, 284.0, 295.0, 295.0, 287.0, 290.0, 289.0, 290.0, 289.0, 286.0, 301.0, 287.0, 289.0, 289.0, 290.0, 308.0, 319.0, 312.0, 318.0, 314.0, 316.0, 288.0, 294.0, 287.0, 286.0, 284.0, 295.0, 287.0, 292.0, 296.0, 283.0, 289.0, 293.0, 285.0, 285.0, 266.0, 259.0, 290.0, 294.0, 293.0, 277.0, 286.0, 290.0, 281.0, 292.0, 284.0, 292.0, 296.0, 283.0, 291.0, 285.0, 283.0, 301.0, 284.0, 298.0, 319.0, 308.0, 291.0, 291.0, 284.0, 292.0, 313.0, 317.0, 293.0, 289.0, 285.0, 294.0, 283.0, 287.0, 266.0, 256.0, 288.0, 288.0, 295.0, 287.0, 290.0, 292.0, 266.0, 259.0, 288.0, 291.0, 286.0, 290.0, 287.0, 283.0, 291.0, 288.0, 290.0, 292.0, 283.0, 290.0, 281.0, 292.0, 264.0, 261.0, 318.0, 309.0, 285.0, 297.0, 285.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.698832879688069, "mean_inference_ms": 1.248881800163415, "mean_action_processing_ms": 0.133950154641813, "mean_env_wait_ms": 0.8413277649657975, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9113600, "num_agent_steps_trained": 9113600, "num_env_steps_sampled": 4556800, "num_env_steps_trained": 4556800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4556800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9113600, "timers": {"training_iteration_time_ms": 3671.445, "learn_time_ms": 1161.805, "learn_throughput": 11017.344, "synch_weights_time_ms": 9.891}, "counters": {"num_env_steps_sampled": 4556800, "num_env_steps_trained": 4556800, "num_agent_steps_sampled": 9113600, "num_agent_steps_trained": 9113600}, "done": false, "episodes_total": 11392, "training_iteration": 356, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-36", "timestamp": 1666581816, "time_this_iter_s": 3.731449842453003, "time_total_s": 1363.2947244644165, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1363.2947244644165, "timesteps_since_restore": 0, "iterations_since_restore": 356, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.259999999999998, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.99, "shaped_reward_min": 159, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.75, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.97, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.65, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.43, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.48, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.44, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.39, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.43, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.48, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.43, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.48, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006175374146550894, "policy_loss": 0.0002533263177610934, "vf_loss": 7.782168388366699, "vf_explained_var": 0.52201247215271, "kl": 0.0021736156195402145, "entropy": 0.8280101418495178, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4569600, "num_env_steps_trained": 4569600, "num_agent_steps_sampled": 9139200, "num_agent_steps_trained": 9139200}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 482.0, "episode_reward_mean": 574.19, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 235.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 287.095}, "custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.99, "shaped_reward_min": 159, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.75, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.97, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.65, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.43, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.48, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.44, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.39, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.43, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.48, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.43, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.48, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 582.0, 582.0, 584.0, 579.0, 573.0, 582.0, 579.0, 576.0, 522.0, 582.0, 530.0, 576.0, 573.0, 573.0, 525.0, 582.0, 587.0, 579.0, 582.0, 579.0, 579.0, 587.0, 576.0, 579.0, 627.0, 630.0, 630.0, 582.0, 573.0, 579.0, 579.0, 579.0, 582.0, 570.0, 525.0, 584.0, 570.0, 576.0, 573.0, 576.0, 579.0, 576.0, 584.0, 582.0, 627.0, 582.0, 576.0, 630.0, 582.0, 579.0, 570.0, 522.0, 576.0, 582.0, 582.0, 525.0, 579.0, 576.0, 570.0, 579.0, 582.0, 573.0, 573.0, 525.0, 627.0, 582.0, 576.0, 576.0, 579.0, 582.0, 524.0, 582.0, 582.0, 573.0, 579.0, 522.0, 573.0, 579.0, 630.0, 630.0, 530.0, 627.0, 579.0, 530.0, 525.0, 582.0, 573.0, 576.0, 573.0, 579.0, 582.0, 570.0, 630.0, 482.0, 519.0, 582.0, 567.0, 579.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [250.0, 269.0, 290.0, 292.0, 285.0, 297.0, 286.0, 298.0, 291.0, 288.0, 290.0, 283.0, 288.0, 294.0, 293.0, 286.0, 285.0, 291.0, 255.0, 267.0, 292.0, 290.0, 261.0, 269.0, 289.0, 287.0, 283.0, 290.0, 284.0, 289.0, 259.0, 266.0, 280.0, 302.0, 288.0, 299.0, 284.0, 295.0, 295.0, 287.0, 290.0, 289.0, 290.0, 289.0, 286.0, 301.0, 287.0, 289.0, 289.0, 290.0, 308.0, 319.0, 312.0, 318.0, 314.0, 316.0, 288.0, 294.0, 287.0, 286.0, 284.0, 295.0, 287.0, 292.0, 296.0, 283.0, 289.0, 293.0, 285.0, 285.0, 266.0, 259.0, 290.0, 294.0, 293.0, 277.0, 286.0, 290.0, 281.0, 292.0, 284.0, 292.0, 296.0, 283.0, 291.0, 285.0, 283.0, 301.0, 284.0, 298.0, 319.0, 308.0, 291.0, 291.0, 284.0, 292.0, 313.0, 317.0, 293.0, 289.0, 285.0, 294.0, 283.0, 287.0, 266.0, 256.0, 288.0, 288.0, 295.0, 287.0, 290.0, 292.0, 266.0, 259.0, 288.0, 291.0, 286.0, 290.0, 287.0, 283.0, 291.0, 288.0, 290.0, 292.0, 283.0, 290.0, 281.0, 292.0, 264.0, 261.0, 318.0, 309.0, 285.0, 297.0, 285.0, 291.0, 289.0, 287.0, 291.0, 288.0, 291.0, 291.0, 250.0, 274.0, 291.0, 291.0, 296.0, 286.0, 284.0, 289.0, 289.0, 290.0, 264.0, 258.0, 284.0, 289.0, 284.0, 295.0, 313.0, 317.0, 317.0, 313.0, 265.0, 265.0, 320.0, 307.0, 287.0, 292.0, 269.0, 261.0, 262.0, 263.0, 288.0, 294.0, 280.0, 293.0, 292.0, 284.0, 286.0, 287.0, 285.0, 294.0, 287.0, 295.0, 278.0, 292.0, 314.0, 316.0, 235.0, 247.0, 259.0, 260.0, 289.0, 293.0, 280.0, 287.0, 293.0, 286.0, 267.0, 258.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.698796357014443, "mean_inference_ms": 1.2487321483893488, "mean_action_processing_ms": 0.13394084113340607, "mean_env_wait_ms": 0.8412337399649568, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 482.0, "episode_reward_mean": 574.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 235.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 287.095}, "hist_stats": {"episode_reward": [519.0, 582.0, 582.0, 584.0, 579.0, 573.0, 582.0, 579.0, 576.0, 522.0, 582.0, 530.0, 576.0, 573.0, 573.0, 525.0, 582.0, 587.0, 579.0, 582.0, 579.0, 579.0, 587.0, 576.0, 579.0, 627.0, 630.0, 630.0, 582.0, 573.0, 579.0, 579.0, 579.0, 582.0, 570.0, 525.0, 584.0, 570.0, 576.0, 573.0, 576.0, 579.0, 576.0, 584.0, 582.0, 627.0, 582.0, 576.0, 630.0, 582.0, 579.0, 570.0, 522.0, 576.0, 582.0, 582.0, 525.0, 579.0, 576.0, 570.0, 579.0, 582.0, 573.0, 573.0, 525.0, 627.0, 582.0, 576.0, 576.0, 579.0, 582.0, 524.0, 582.0, 582.0, 573.0, 579.0, 522.0, 573.0, 579.0, 630.0, 630.0, 530.0, 627.0, 579.0, 530.0, 525.0, 582.0, 573.0, 576.0, 573.0, 579.0, 582.0, 570.0, 630.0, 482.0, 519.0, 582.0, 567.0, 579.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [250.0, 269.0, 290.0, 292.0, 285.0, 297.0, 286.0, 298.0, 291.0, 288.0, 290.0, 283.0, 288.0, 294.0, 293.0, 286.0, 285.0, 291.0, 255.0, 267.0, 292.0, 290.0, 261.0, 269.0, 289.0, 287.0, 283.0, 290.0, 284.0, 289.0, 259.0, 266.0, 280.0, 302.0, 288.0, 299.0, 284.0, 295.0, 295.0, 287.0, 290.0, 289.0, 290.0, 289.0, 286.0, 301.0, 287.0, 289.0, 289.0, 290.0, 308.0, 319.0, 312.0, 318.0, 314.0, 316.0, 288.0, 294.0, 287.0, 286.0, 284.0, 295.0, 287.0, 292.0, 296.0, 283.0, 289.0, 293.0, 285.0, 285.0, 266.0, 259.0, 290.0, 294.0, 293.0, 277.0, 286.0, 290.0, 281.0, 292.0, 284.0, 292.0, 296.0, 283.0, 291.0, 285.0, 283.0, 301.0, 284.0, 298.0, 319.0, 308.0, 291.0, 291.0, 284.0, 292.0, 313.0, 317.0, 293.0, 289.0, 285.0, 294.0, 283.0, 287.0, 266.0, 256.0, 288.0, 288.0, 295.0, 287.0, 290.0, 292.0, 266.0, 259.0, 288.0, 291.0, 286.0, 290.0, 287.0, 283.0, 291.0, 288.0, 290.0, 292.0, 283.0, 290.0, 281.0, 292.0, 264.0, 261.0, 318.0, 309.0, 285.0, 297.0, 285.0, 291.0, 289.0, 287.0, 291.0, 288.0, 291.0, 291.0, 250.0, 274.0, 291.0, 291.0, 296.0, 286.0, 284.0, 289.0, 289.0, 290.0, 264.0, 258.0, 284.0, 289.0, 284.0, 295.0, 313.0, 317.0, 317.0, 313.0, 265.0, 265.0, 320.0, 307.0, 287.0, 292.0, 269.0, 261.0, 262.0, 263.0, 288.0, 294.0, 280.0, 293.0, 292.0, 284.0, 286.0, 287.0, 285.0, 294.0, 287.0, 295.0, 278.0, 292.0, 314.0, 316.0, 235.0, 247.0, 259.0, 260.0, 289.0, 293.0, 280.0, 287.0, 293.0, 286.0, 267.0, 258.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.698796357014443, "mean_inference_ms": 1.2487321483893488, "mean_action_processing_ms": 0.13394084113340607, "mean_env_wait_ms": 0.8412337399649568, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9139200, "num_agent_steps_trained": 9139200, "num_env_steps_sampled": 4569600, "num_env_steps_trained": 4569600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4569600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9139200, "timers": {"training_iteration_time_ms": 3679.359, "learn_time_ms": 1168.252, "learn_throughput": 10956.54, "synch_weights_time_ms": 9.969}, "counters": {"num_env_steps_sampled": 4569600, "num_env_steps_trained": 4569600, "num_agent_steps_sampled": 9139200, "num_agent_steps_trained": 9139200}, "done": false, "episodes_total": 11424, "training_iteration": 357, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-40", "timestamp": 1666581820, "time_this_iter_s": 3.717745304107666, "time_total_s": 1367.0124697685242, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1367.0124697685242, "timesteps_since_restore": 0, "iterations_since_restore": 357, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.73333333333333, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.49, "shaped_reward_min": 145, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.4, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.32, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.29, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.12, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.03, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.82, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.49, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.42, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.25, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.2, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.03, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.82, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.03, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.82, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00033347378484904766, "policy_loss": -0.0006874024402350187, "vf_loss": 7.688575267791748, "vf_explained_var": 0.544007420539856, "kl": 0.0025333897210657597, "entropy": 0.8298560380935669, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4582400, "num_env_steps_trained": 4582400, "num_agent_steps_sampled": 9164800, "num_agent_steps_trained": 9164800}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 465.0, "episode_reward_mean": 572.09, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 286.045}, "custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.49, "shaped_reward_min": 145, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.4, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.32, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.29, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.12, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.03, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.82, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.49, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.42, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.25, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.2, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.03, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.82, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.03, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.82, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 570.0, 525.0, 584.0, 570.0, 576.0, 573.0, 576.0, 579.0, 576.0, 584.0, 582.0, 627.0, 582.0, 576.0, 630.0, 582.0, 579.0, 570.0, 522.0, 576.0, 582.0, 582.0, 525.0, 579.0, 576.0, 570.0, 579.0, 582.0, 573.0, 573.0, 525.0, 627.0, 582.0, 576.0, 576.0, 579.0, 582.0, 524.0, 582.0, 582.0, 573.0, 579.0, 522.0, 573.0, 579.0, 630.0, 630.0, 530.0, 627.0, 579.0, 530.0, 525.0, 582.0, 573.0, 576.0, 573.0, 579.0, 582.0, 570.0, 630.0, 482.0, 519.0, 582.0, 567.0, 579.0, 525.0, 582.0, 579.0, 465.0, 582.0, 582.0, 573.0, 579.0, 533.0, 570.0, 579.0, 573.0, 579.0, 579.0, 573.0, 582.0, 582.0, 582.0, 573.0, 582.0, 570.0, 579.0, 576.0, 582.0, 576.0, 579.0, 582.0, 587.0, 579.0, 579.0, 530.0, 530.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 283.0, 289.0, 293.0, 285.0, 285.0, 266.0, 259.0, 290.0, 294.0, 293.0, 277.0, 286.0, 290.0, 281.0, 292.0, 284.0, 292.0, 296.0, 283.0, 291.0, 285.0, 283.0, 301.0, 284.0, 298.0, 319.0, 308.0, 291.0, 291.0, 284.0, 292.0, 313.0, 317.0, 293.0, 289.0, 285.0, 294.0, 283.0, 287.0, 266.0, 256.0, 288.0, 288.0, 295.0, 287.0, 290.0, 292.0, 266.0, 259.0, 288.0, 291.0, 286.0, 290.0, 287.0, 283.0, 291.0, 288.0, 290.0, 292.0, 283.0, 290.0, 281.0, 292.0, 264.0, 261.0, 318.0, 309.0, 285.0, 297.0, 285.0, 291.0, 289.0, 287.0, 291.0, 288.0, 291.0, 291.0, 250.0, 274.0, 291.0, 291.0, 296.0, 286.0, 284.0, 289.0, 289.0, 290.0, 264.0, 258.0, 284.0, 289.0, 284.0, 295.0, 313.0, 317.0, 317.0, 313.0, 265.0, 265.0, 320.0, 307.0, 287.0, 292.0, 269.0, 261.0, 262.0, 263.0, 288.0, 294.0, 280.0, 293.0, 292.0, 284.0, 286.0, 287.0, 285.0, 294.0, 287.0, 295.0, 278.0, 292.0, 314.0, 316.0, 235.0, 247.0, 259.0, 260.0, 289.0, 293.0, 280.0, 287.0, 293.0, 286.0, 267.0, 258.0, 290.0, 292.0, 286.0, 293.0, 242.0, 223.0, 295.0, 287.0, 292.0, 290.0, 289.0, 284.0, 288.0, 291.0, 260.0, 273.0, 288.0, 282.0, 285.0, 294.0, 283.0, 290.0, 290.0, 289.0, 288.0, 291.0, 291.0, 282.0, 288.0, 294.0, 284.0, 298.0, 292.0, 290.0, 283.0, 290.0, 293.0, 289.0, 281.0, 289.0, 289.0, 290.0, 291.0, 285.0, 293.0, 289.0, 290.0, 286.0, 287.0, 292.0, 288.0, 294.0, 285.0, 302.0, 286.0, 293.0, 295.0, 284.0, 264.0, 266.0, 264.0, 266.0, 281.0, 298.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6987775753081857, "mean_inference_ms": 1.2485649907058904, "mean_action_processing_ms": 0.1339327316237972, "mean_env_wait_ms": 0.8411460706100078, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 465.0, "episode_reward_mean": 572.09, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 286.045}, "hist_stats": {"episode_reward": [579.0, 582.0, 570.0, 525.0, 584.0, 570.0, 576.0, 573.0, 576.0, 579.0, 576.0, 584.0, 582.0, 627.0, 582.0, 576.0, 630.0, 582.0, 579.0, 570.0, 522.0, 576.0, 582.0, 582.0, 525.0, 579.0, 576.0, 570.0, 579.0, 582.0, 573.0, 573.0, 525.0, 627.0, 582.0, 576.0, 576.0, 579.0, 582.0, 524.0, 582.0, 582.0, 573.0, 579.0, 522.0, 573.0, 579.0, 630.0, 630.0, 530.0, 627.0, 579.0, 530.0, 525.0, 582.0, 573.0, 576.0, 573.0, 579.0, 582.0, 570.0, 630.0, 482.0, 519.0, 582.0, 567.0, 579.0, 525.0, 582.0, 579.0, 465.0, 582.0, 582.0, 573.0, 579.0, 533.0, 570.0, 579.0, 573.0, 579.0, 579.0, 573.0, 582.0, 582.0, 582.0, 573.0, 582.0, 570.0, 579.0, 576.0, 582.0, 576.0, 579.0, 582.0, 587.0, 579.0, 579.0, 530.0, 530.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 283.0, 289.0, 293.0, 285.0, 285.0, 266.0, 259.0, 290.0, 294.0, 293.0, 277.0, 286.0, 290.0, 281.0, 292.0, 284.0, 292.0, 296.0, 283.0, 291.0, 285.0, 283.0, 301.0, 284.0, 298.0, 319.0, 308.0, 291.0, 291.0, 284.0, 292.0, 313.0, 317.0, 293.0, 289.0, 285.0, 294.0, 283.0, 287.0, 266.0, 256.0, 288.0, 288.0, 295.0, 287.0, 290.0, 292.0, 266.0, 259.0, 288.0, 291.0, 286.0, 290.0, 287.0, 283.0, 291.0, 288.0, 290.0, 292.0, 283.0, 290.0, 281.0, 292.0, 264.0, 261.0, 318.0, 309.0, 285.0, 297.0, 285.0, 291.0, 289.0, 287.0, 291.0, 288.0, 291.0, 291.0, 250.0, 274.0, 291.0, 291.0, 296.0, 286.0, 284.0, 289.0, 289.0, 290.0, 264.0, 258.0, 284.0, 289.0, 284.0, 295.0, 313.0, 317.0, 317.0, 313.0, 265.0, 265.0, 320.0, 307.0, 287.0, 292.0, 269.0, 261.0, 262.0, 263.0, 288.0, 294.0, 280.0, 293.0, 292.0, 284.0, 286.0, 287.0, 285.0, 294.0, 287.0, 295.0, 278.0, 292.0, 314.0, 316.0, 235.0, 247.0, 259.0, 260.0, 289.0, 293.0, 280.0, 287.0, 293.0, 286.0, 267.0, 258.0, 290.0, 292.0, 286.0, 293.0, 242.0, 223.0, 295.0, 287.0, 292.0, 290.0, 289.0, 284.0, 288.0, 291.0, 260.0, 273.0, 288.0, 282.0, 285.0, 294.0, 283.0, 290.0, 290.0, 289.0, 288.0, 291.0, 291.0, 282.0, 288.0, 294.0, 284.0, 298.0, 292.0, 290.0, 283.0, 290.0, 293.0, 289.0, 281.0, 289.0, 289.0, 290.0, 291.0, 285.0, 293.0, 289.0, 290.0, 286.0, 287.0, 292.0, 288.0, 294.0, 285.0, 302.0, 286.0, 293.0, 295.0, 284.0, 264.0, 266.0, 264.0, 266.0, 281.0, 298.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6987775753081857, "mean_inference_ms": 1.2485649907058904, "mean_action_processing_ms": 0.1339327316237972, "mean_env_wait_ms": 0.8411460706100078, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9164800, "num_agent_steps_trained": 9164800, "num_env_steps_sampled": 4582400, "num_env_steps_trained": 4582400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4582400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9164800, "timers": {"training_iteration_time_ms": 3675.323, "learn_time_ms": 1165.406, "learn_throughput": 10983.294, "synch_weights_time_ms": 9.801}, "counters": {"num_env_steps_sampled": 4582400, "num_env_steps_trained": 4582400, "num_agent_steps_sampled": 9164800, "num_agent_steps_trained": 9164800}, "done": false, "episodes_total": 11456, "training_iteration": 358, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-44", "timestamp": 1666581824, "time_this_iter_s": 3.722245454788208, "time_total_s": 1370.7347152233124, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1370.7347152233124, "timesteps_since_restore": 0, "iterations_since_restore": 358, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.96, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.59, "shaped_reward_min": 128, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.96, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.79, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.79, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.61, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.52, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.38, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.25, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.04, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.52, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.38, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.52, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.38, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.003429124830290675, "policy_loss": -0.0037802057340741158, "vf_loss": 7.628129482269287, "vf_explained_var": 0.5590401887893677, "kl": 0.002472866792231798, "entropy": 0.823464572429657, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4595200, "num_env_steps_trained": 4595200, "num_agent_steps_sampled": 9190400, "num_agent_steps_trained": 9190400}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 408.0, "episode_reward_mean": 570.19, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 203.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 285.095}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.59, "shaped_reward_min": 128, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.96, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.79, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.79, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.61, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.52, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.38, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.25, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.04, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.52, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.38, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.52, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.38, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 627.0, 582.0, 576.0, 576.0, 579.0, 582.0, 524.0, 582.0, 582.0, 573.0, 579.0, 522.0, 573.0, 579.0, 630.0, 630.0, 530.0, 627.0, 579.0, 530.0, 525.0, 582.0, 573.0, 576.0, 573.0, 579.0, 582.0, 570.0, 630.0, 482.0, 519.0, 582.0, 567.0, 579.0, 525.0, 582.0, 579.0, 465.0, 582.0, 582.0, 573.0, 579.0, 533.0, 570.0, 579.0, 573.0, 579.0, 579.0, 573.0, 582.0, 582.0, 582.0, 573.0, 582.0, 570.0, 579.0, 576.0, 582.0, 576.0, 579.0, 582.0, 587.0, 579.0, 579.0, 530.0, 530.0, 579.0, 579.0, 579.0, 582.0, 576.0, 582.0, 570.0, 579.0, 582.0, 582.0, 630.0, 579.0, 579.0, 582.0, 519.0, 579.0, 587.0, 587.0, 573.0, 576.0, 587.0, 408.0, 522.0, 579.0, 579.0, 579.0, 582.0, 581.0, 582.0, 579.0, 487.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 261.0, 318.0, 309.0, 285.0, 297.0, 285.0, 291.0, 289.0, 287.0, 291.0, 288.0, 291.0, 291.0, 250.0, 274.0, 291.0, 291.0, 296.0, 286.0, 284.0, 289.0, 289.0, 290.0, 264.0, 258.0, 284.0, 289.0, 284.0, 295.0, 313.0, 317.0, 317.0, 313.0, 265.0, 265.0, 320.0, 307.0, 287.0, 292.0, 269.0, 261.0, 262.0, 263.0, 288.0, 294.0, 280.0, 293.0, 292.0, 284.0, 286.0, 287.0, 285.0, 294.0, 287.0, 295.0, 278.0, 292.0, 314.0, 316.0, 235.0, 247.0, 259.0, 260.0, 289.0, 293.0, 280.0, 287.0, 293.0, 286.0, 267.0, 258.0, 290.0, 292.0, 286.0, 293.0, 242.0, 223.0, 295.0, 287.0, 292.0, 290.0, 289.0, 284.0, 288.0, 291.0, 260.0, 273.0, 288.0, 282.0, 285.0, 294.0, 283.0, 290.0, 290.0, 289.0, 288.0, 291.0, 291.0, 282.0, 288.0, 294.0, 284.0, 298.0, 292.0, 290.0, 283.0, 290.0, 293.0, 289.0, 281.0, 289.0, 289.0, 290.0, 291.0, 285.0, 293.0, 289.0, 290.0, 286.0, 287.0, 292.0, 288.0, 294.0, 285.0, 302.0, 286.0, 293.0, 295.0, 284.0, 264.0, 266.0, 264.0, 266.0, 281.0, 298.0, 293.0, 286.0, 290.0, 289.0, 286.0, 296.0, 291.0, 285.0, 287.0, 295.0, 283.0, 287.0, 290.0, 289.0, 289.0, 293.0, 291.0, 291.0, 314.0, 316.0, 290.0, 289.0, 292.0, 287.0, 290.0, 292.0, 260.0, 259.0, 288.0, 291.0, 295.0, 292.0, 296.0, 291.0, 290.0, 283.0, 281.0, 295.0, 290.0, 297.0, 203.0, 205.0, 255.0, 267.0, 290.0, 289.0, 288.0, 291.0, 287.0, 292.0, 294.0, 288.0, 281.0, 300.0, 291.0, 291.0, 284.0, 295.0, 245.0, 242.0, 290.0, 292.0, 285.0, 297.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6987291256930882, "mean_inference_ms": 1.248387604510305, "mean_action_processing_ms": 0.13392288763838628, "mean_env_wait_ms": 0.8410501699452928, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 408.0, "episode_reward_mean": 570.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 203.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 285.095}, "hist_stats": {"episode_reward": [525.0, 627.0, 582.0, 576.0, 576.0, 579.0, 582.0, 524.0, 582.0, 582.0, 573.0, 579.0, 522.0, 573.0, 579.0, 630.0, 630.0, 530.0, 627.0, 579.0, 530.0, 525.0, 582.0, 573.0, 576.0, 573.0, 579.0, 582.0, 570.0, 630.0, 482.0, 519.0, 582.0, 567.0, 579.0, 525.0, 582.0, 579.0, 465.0, 582.0, 582.0, 573.0, 579.0, 533.0, 570.0, 579.0, 573.0, 579.0, 579.0, 573.0, 582.0, 582.0, 582.0, 573.0, 582.0, 570.0, 579.0, 576.0, 582.0, 576.0, 579.0, 582.0, 587.0, 579.0, 579.0, 530.0, 530.0, 579.0, 579.0, 579.0, 582.0, 576.0, 582.0, 570.0, 579.0, 582.0, 582.0, 630.0, 579.0, 579.0, 582.0, 519.0, 579.0, 587.0, 587.0, 573.0, 576.0, 587.0, 408.0, 522.0, 579.0, 579.0, 579.0, 582.0, 581.0, 582.0, 579.0, 487.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 261.0, 318.0, 309.0, 285.0, 297.0, 285.0, 291.0, 289.0, 287.0, 291.0, 288.0, 291.0, 291.0, 250.0, 274.0, 291.0, 291.0, 296.0, 286.0, 284.0, 289.0, 289.0, 290.0, 264.0, 258.0, 284.0, 289.0, 284.0, 295.0, 313.0, 317.0, 317.0, 313.0, 265.0, 265.0, 320.0, 307.0, 287.0, 292.0, 269.0, 261.0, 262.0, 263.0, 288.0, 294.0, 280.0, 293.0, 292.0, 284.0, 286.0, 287.0, 285.0, 294.0, 287.0, 295.0, 278.0, 292.0, 314.0, 316.0, 235.0, 247.0, 259.0, 260.0, 289.0, 293.0, 280.0, 287.0, 293.0, 286.0, 267.0, 258.0, 290.0, 292.0, 286.0, 293.0, 242.0, 223.0, 295.0, 287.0, 292.0, 290.0, 289.0, 284.0, 288.0, 291.0, 260.0, 273.0, 288.0, 282.0, 285.0, 294.0, 283.0, 290.0, 290.0, 289.0, 288.0, 291.0, 291.0, 282.0, 288.0, 294.0, 284.0, 298.0, 292.0, 290.0, 283.0, 290.0, 293.0, 289.0, 281.0, 289.0, 289.0, 290.0, 291.0, 285.0, 293.0, 289.0, 290.0, 286.0, 287.0, 292.0, 288.0, 294.0, 285.0, 302.0, 286.0, 293.0, 295.0, 284.0, 264.0, 266.0, 264.0, 266.0, 281.0, 298.0, 293.0, 286.0, 290.0, 289.0, 286.0, 296.0, 291.0, 285.0, 287.0, 295.0, 283.0, 287.0, 290.0, 289.0, 289.0, 293.0, 291.0, 291.0, 314.0, 316.0, 290.0, 289.0, 292.0, 287.0, 290.0, 292.0, 260.0, 259.0, 288.0, 291.0, 295.0, 292.0, 296.0, 291.0, 290.0, 283.0, 281.0, 295.0, 290.0, 297.0, 203.0, 205.0, 255.0, 267.0, 290.0, 289.0, 288.0, 291.0, 287.0, 292.0, 294.0, 288.0, 281.0, 300.0, 291.0, 291.0, 284.0, 295.0, 245.0, 242.0, 290.0, 292.0, 285.0, 297.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6987291256930882, "mean_inference_ms": 1.248387604510305, "mean_action_processing_ms": 0.13392288763838628, "mean_env_wait_ms": 0.8410501699452928, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9190400, "num_agent_steps_trained": 9190400, "num_env_steps_sampled": 4595200, "num_env_steps_trained": 4595200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4595200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9190400, "timers": {"training_iteration_time_ms": 3669.82, "learn_time_ms": 1154.499, "learn_throughput": 11087.057, "synch_weights_time_ms": 9.735}, "counters": {"num_env_steps_sampled": 4595200, "num_env_steps_trained": 4595200, "num_agent_steps_sampled": 9190400, "num_agent_steps_trained": 9190400}, "done": false, "episodes_total": 11488, "training_iteration": 359, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-47", "timestamp": 1666581827, "time_this_iter_s": 3.7149648666381836, "time_total_s": 1374.4496800899506, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1374.4496800899506, "timesteps_since_restore": 0, "iterations_since_restore": 359, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.46666666666667, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 177.58, "shaped_reward_min": 128, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.01, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.97, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.8, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.79, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.54, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.58, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.11, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.54, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.58, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.54, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.58, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0007686268072575331, "policy_loss": -0.001121054170653224, "vf_loss": 7.592923164367676, "vf_explained_var": 0.575827956199646, "kl": 0.002229448640719056, "entropy": 0.8137279748916626, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4608000, "num_env_steps_trained": 4608000, "num_agent_steps_sampled": 9216000, "num_agent_steps_trained": 9216000}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 408.0, "episode_reward_mean": 573.18, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 203.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.59}, "custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 177.58, "shaped_reward_min": 128, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.01, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.97, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.8, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.79, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.54, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.58, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.11, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.54, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.58, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.54, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.58, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 567.0, 579.0, 525.0, 582.0, 579.0, 465.0, 582.0, 582.0, 573.0, 579.0, 533.0, 570.0, 579.0, 573.0, 579.0, 579.0, 573.0, 582.0, 582.0, 582.0, 573.0, 582.0, 570.0, 579.0, 576.0, 582.0, 576.0, 579.0, 582.0, 587.0, 579.0, 579.0, 530.0, 530.0, 579.0, 579.0, 579.0, 582.0, 576.0, 582.0, 570.0, 579.0, 582.0, 582.0, 630.0, 579.0, 579.0, 582.0, 519.0, 579.0, 587.0, 587.0, 573.0, 576.0, 587.0, 408.0, 522.0, 579.0, 579.0, 579.0, 582.0, 581.0, 582.0, 579.0, 487.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 636.0, 582.0, 579.0, 576.0, 576.0, 582.0, 584.0, 579.0, 633.0, 579.0, 530.0, 579.0, 570.0, 576.0, 576.0, 630.0, 579.0, 579.0, 579.0, 576.0, 582.0, 579.0, 587.0, 581.0, 576.0, 576.0, 573.0, 536.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 280.0, 287.0, 293.0, 286.0, 267.0, 258.0, 290.0, 292.0, 286.0, 293.0, 242.0, 223.0, 295.0, 287.0, 292.0, 290.0, 289.0, 284.0, 288.0, 291.0, 260.0, 273.0, 288.0, 282.0, 285.0, 294.0, 283.0, 290.0, 290.0, 289.0, 288.0, 291.0, 291.0, 282.0, 288.0, 294.0, 284.0, 298.0, 292.0, 290.0, 283.0, 290.0, 293.0, 289.0, 281.0, 289.0, 289.0, 290.0, 291.0, 285.0, 293.0, 289.0, 290.0, 286.0, 287.0, 292.0, 288.0, 294.0, 285.0, 302.0, 286.0, 293.0, 295.0, 284.0, 264.0, 266.0, 264.0, 266.0, 281.0, 298.0, 293.0, 286.0, 290.0, 289.0, 286.0, 296.0, 291.0, 285.0, 287.0, 295.0, 283.0, 287.0, 290.0, 289.0, 289.0, 293.0, 291.0, 291.0, 314.0, 316.0, 290.0, 289.0, 292.0, 287.0, 290.0, 292.0, 260.0, 259.0, 288.0, 291.0, 295.0, 292.0, 296.0, 291.0, 290.0, 283.0, 281.0, 295.0, 290.0, 297.0, 203.0, 205.0, 255.0, 267.0, 290.0, 289.0, 288.0, 291.0, 287.0, 292.0, 294.0, 288.0, 281.0, 300.0, 291.0, 291.0, 284.0, 295.0, 245.0, 242.0, 290.0, 292.0, 285.0, 297.0, 288.0, 282.0, 291.0, 291.0, 288.0, 288.0, 294.0, 285.0, 318.0, 318.0, 286.0, 296.0, 286.0, 293.0, 288.0, 288.0, 295.0, 281.0, 292.0, 290.0, 292.0, 292.0, 287.0, 292.0, 312.0, 321.0, 292.0, 287.0, 268.0, 262.0, 289.0, 290.0, 288.0, 282.0, 281.0, 295.0, 281.0, 295.0, 316.0, 314.0, 291.0, 288.0, 291.0, 288.0, 287.0, 292.0, 290.0, 286.0, 289.0, 293.0, 285.0, 294.0, 294.0, 293.0, 294.0, 287.0, 282.0, 294.0, 285.0, 291.0, 286.0, 287.0, 266.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6986628664827095, "mean_inference_ms": 1.248223427178441, "mean_action_processing_ms": 0.13391442963440256, "mean_env_wait_ms": 0.8409629142801245, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 408.0, "episode_reward_mean": 573.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 203.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.59}, "hist_stats": {"episode_reward": [582.0, 567.0, 579.0, 525.0, 582.0, 579.0, 465.0, 582.0, 582.0, 573.0, 579.0, 533.0, 570.0, 579.0, 573.0, 579.0, 579.0, 573.0, 582.0, 582.0, 582.0, 573.0, 582.0, 570.0, 579.0, 576.0, 582.0, 576.0, 579.0, 582.0, 587.0, 579.0, 579.0, 530.0, 530.0, 579.0, 579.0, 579.0, 582.0, 576.0, 582.0, 570.0, 579.0, 582.0, 582.0, 630.0, 579.0, 579.0, 582.0, 519.0, 579.0, 587.0, 587.0, 573.0, 576.0, 587.0, 408.0, 522.0, 579.0, 579.0, 579.0, 582.0, 581.0, 582.0, 579.0, 487.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 636.0, 582.0, 579.0, 576.0, 576.0, 582.0, 584.0, 579.0, 633.0, 579.0, 530.0, 579.0, 570.0, 576.0, 576.0, 630.0, 579.0, 579.0, 579.0, 576.0, 582.0, 579.0, 587.0, 581.0, 576.0, 576.0, 573.0, 536.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 280.0, 287.0, 293.0, 286.0, 267.0, 258.0, 290.0, 292.0, 286.0, 293.0, 242.0, 223.0, 295.0, 287.0, 292.0, 290.0, 289.0, 284.0, 288.0, 291.0, 260.0, 273.0, 288.0, 282.0, 285.0, 294.0, 283.0, 290.0, 290.0, 289.0, 288.0, 291.0, 291.0, 282.0, 288.0, 294.0, 284.0, 298.0, 292.0, 290.0, 283.0, 290.0, 293.0, 289.0, 281.0, 289.0, 289.0, 290.0, 291.0, 285.0, 293.0, 289.0, 290.0, 286.0, 287.0, 292.0, 288.0, 294.0, 285.0, 302.0, 286.0, 293.0, 295.0, 284.0, 264.0, 266.0, 264.0, 266.0, 281.0, 298.0, 293.0, 286.0, 290.0, 289.0, 286.0, 296.0, 291.0, 285.0, 287.0, 295.0, 283.0, 287.0, 290.0, 289.0, 289.0, 293.0, 291.0, 291.0, 314.0, 316.0, 290.0, 289.0, 292.0, 287.0, 290.0, 292.0, 260.0, 259.0, 288.0, 291.0, 295.0, 292.0, 296.0, 291.0, 290.0, 283.0, 281.0, 295.0, 290.0, 297.0, 203.0, 205.0, 255.0, 267.0, 290.0, 289.0, 288.0, 291.0, 287.0, 292.0, 294.0, 288.0, 281.0, 300.0, 291.0, 291.0, 284.0, 295.0, 245.0, 242.0, 290.0, 292.0, 285.0, 297.0, 288.0, 282.0, 291.0, 291.0, 288.0, 288.0, 294.0, 285.0, 318.0, 318.0, 286.0, 296.0, 286.0, 293.0, 288.0, 288.0, 295.0, 281.0, 292.0, 290.0, 292.0, 292.0, 287.0, 292.0, 312.0, 321.0, 292.0, 287.0, 268.0, 262.0, 289.0, 290.0, 288.0, 282.0, 281.0, 295.0, 281.0, 295.0, 316.0, 314.0, 291.0, 288.0, 291.0, 288.0, 287.0, 292.0, 290.0, 286.0, 289.0, 293.0, 285.0, 294.0, 294.0, 293.0, 294.0, 287.0, 282.0, 294.0, 285.0, 291.0, 286.0, 287.0, 266.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6986628664827095, "mean_inference_ms": 1.248223427178441, "mean_action_processing_ms": 0.13391442963440256, "mean_env_wait_ms": 0.8409629142801245, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9216000, "num_agent_steps_trained": 9216000, "num_env_steps_sampled": 4608000, "num_env_steps_trained": 4608000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4608000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9216000, "timers": {"training_iteration_time_ms": 3663.145, "learn_time_ms": 1152.436, "learn_throughput": 11106.91, "synch_weights_time_ms": 9.775}, "counters": {"num_env_steps_sampled": 4608000, "num_env_steps_trained": 4608000, "num_agent_steps_sampled": 9216000, "num_agent_steps_trained": 9216000}, "done": false, "episodes_total": 11520, "training_iteration": 360, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-51", "timestamp": 1666581831, "time_this_iter_s": 3.6528384685516357, "time_total_s": 1378.1025185585022, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1378.1025185585022, "timesteps_since_restore": 0, "iterations_since_restore": 360, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.48, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 178.1, "shaped_reward_min": 128, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.6, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.21, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.44, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.0, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.23, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.25, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.0, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.23, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.0, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.23, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0016958118649199605, "policy_loss": -0.0020420937798917294, "vf_loss": 7.6369242668151855, "vf_explained_var": 0.5737049579620361, "kl": 0.0027597371954470873, "entropy": 0.8348207473754883, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4620800, "num_env_steps_trained": 4620800, "num_agent_steps_sampled": 9241600, "num_agent_steps_trained": 9241600}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 408.0, "episode_reward_mean": 574.1, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 203.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 287.05}, "custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 178.1, "shaped_reward_min": 128, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.6, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.21, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.44, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.0, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.23, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.25, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.0, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.23, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.0, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.23, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 530.0, 530.0, 579.0, 579.0, 579.0, 582.0, 576.0, 582.0, 570.0, 579.0, 582.0, 582.0, 630.0, 579.0, 579.0, 582.0, 519.0, 579.0, 587.0, 587.0, 573.0, 576.0, 587.0, 408.0, 522.0, 579.0, 579.0, 579.0, 582.0, 581.0, 582.0, 579.0, 487.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 636.0, 582.0, 579.0, 576.0, 576.0, 582.0, 584.0, 579.0, 633.0, 579.0, 530.0, 579.0, 570.0, 576.0, 576.0, 630.0, 579.0, 579.0, 579.0, 576.0, 582.0, 579.0, 587.0, 581.0, 576.0, 576.0, 573.0, 536.0, 573.0, 579.0, 579.0, 582.0, 579.0, 582.0, 579.0, 579.0, 522.0, 579.0, 582.0, 582.0, 582.0, 570.0, 582.0, 627.0, 530.0, 582.0, 579.0, 582.0, 579.0, 473.0, 576.0, 579.0, 587.0, 582.0, 582.0, 579.0, 627.0, 582.0, 525.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 284.0, 264.0, 266.0, 264.0, 266.0, 281.0, 298.0, 293.0, 286.0, 290.0, 289.0, 286.0, 296.0, 291.0, 285.0, 287.0, 295.0, 283.0, 287.0, 290.0, 289.0, 289.0, 293.0, 291.0, 291.0, 314.0, 316.0, 290.0, 289.0, 292.0, 287.0, 290.0, 292.0, 260.0, 259.0, 288.0, 291.0, 295.0, 292.0, 296.0, 291.0, 290.0, 283.0, 281.0, 295.0, 290.0, 297.0, 203.0, 205.0, 255.0, 267.0, 290.0, 289.0, 288.0, 291.0, 287.0, 292.0, 294.0, 288.0, 281.0, 300.0, 291.0, 291.0, 284.0, 295.0, 245.0, 242.0, 290.0, 292.0, 285.0, 297.0, 288.0, 282.0, 291.0, 291.0, 288.0, 288.0, 294.0, 285.0, 318.0, 318.0, 286.0, 296.0, 286.0, 293.0, 288.0, 288.0, 295.0, 281.0, 292.0, 290.0, 292.0, 292.0, 287.0, 292.0, 312.0, 321.0, 292.0, 287.0, 268.0, 262.0, 289.0, 290.0, 288.0, 282.0, 281.0, 295.0, 281.0, 295.0, 316.0, 314.0, 291.0, 288.0, 291.0, 288.0, 287.0, 292.0, 290.0, 286.0, 289.0, 293.0, 285.0, 294.0, 294.0, 293.0, 294.0, 287.0, 282.0, 294.0, 285.0, 291.0, 286.0, 287.0, 266.0, 270.0, 283.0, 290.0, 288.0, 291.0, 291.0, 288.0, 291.0, 291.0, 293.0, 286.0, 288.0, 294.0, 286.0, 293.0, 294.0, 285.0, 256.0, 266.0, 284.0, 295.0, 289.0, 293.0, 290.0, 292.0, 290.0, 292.0, 291.0, 279.0, 289.0, 293.0, 311.0, 316.0, 258.0, 272.0, 293.0, 289.0, 290.0, 289.0, 288.0, 294.0, 285.0, 294.0, 234.0, 239.0, 287.0, 289.0, 283.0, 296.0, 298.0, 289.0, 280.0, 302.0, 286.0, 296.0, 291.0, 288.0, 320.0, 307.0, 285.0, 297.0, 262.0, 263.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6985839313465746, "mean_inference_ms": 1.2480389289272427, "mean_action_processing_ms": 0.13390556369993323, "mean_env_wait_ms": 0.8408671873955664, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 408.0, "episode_reward_mean": 574.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 203.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 287.05}, "hist_stats": {"episode_reward": [579.0, 530.0, 530.0, 579.0, 579.0, 579.0, 582.0, 576.0, 582.0, 570.0, 579.0, 582.0, 582.0, 630.0, 579.0, 579.0, 582.0, 519.0, 579.0, 587.0, 587.0, 573.0, 576.0, 587.0, 408.0, 522.0, 579.0, 579.0, 579.0, 582.0, 581.0, 582.0, 579.0, 487.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 636.0, 582.0, 579.0, 576.0, 576.0, 582.0, 584.0, 579.0, 633.0, 579.0, 530.0, 579.0, 570.0, 576.0, 576.0, 630.0, 579.0, 579.0, 579.0, 576.0, 582.0, 579.0, 587.0, 581.0, 576.0, 576.0, 573.0, 536.0, 573.0, 579.0, 579.0, 582.0, 579.0, 582.0, 579.0, 579.0, 522.0, 579.0, 582.0, 582.0, 582.0, 570.0, 582.0, 627.0, 530.0, 582.0, 579.0, 582.0, 579.0, 473.0, 576.0, 579.0, 587.0, 582.0, 582.0, 579.0, 627.0, 582.0, 525.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 284.0, 264.0, 266.0, 264.0, 266.0, 281.0, 298.0, 293.0, 286.0, 290.0, 289.0, 286.0, 296.0, 291.0, 285.0, 287.0, 295.0, 283.0, 287.0, 290.0, 289.0, 289.0, 293.0, 291.0, 291.0, 314.0, 316.0, 290.0, 289.0, 292.0, 287.0, 290.0, 292.0, 260.0, 259.0, 288.0, 291.0, 295.0, 292.0, 296.0, 291.0, 290.0, 283.0, 281.0, 295.0, 290.0, 297.0, 203.0, 205.0, 255.0, 267.0, 290.0, 289.0, 288.0, 291.0, 287.0, 292.0, 294.0, 288.0, 281.0, 300.0, 291.0, 291.0, 284.0, 295.0, 245.0, 242.0, 290.0, 292.0, 285.0, 297.0, 288.0, 282.0, 291.0, 291.0, 288.0, 288.0, 294.0, 285.0, 318.0, 318.0, 286.0, 296.0, 286.0, 293.0, 288.0, 288.0, 295.0, 281.0, 292.0, 290.0, 292.0, 292.0, 287.0, 292.0, 312.0, 321.0, 292.0, 287.0, 268.0, 262.0, 289.0, 290.0, 288.0, 282.0, 281.0, 295.0, 281.0, 295.0, 316.0, 314.0, 291.0, 288.0, 291.0, 288.0, 287.0, 292.0, 290.0, 286.0, 289.0, 293.0, 285.0, 294.0, 294.0, 293.0, 294.0, 287.0, 282.0, 294.0, 285.0, 291.0, 286.0, 287.0, 266.0, 270.0, 283.0, 290.0, 288.0, 291.0, 291.0, 288.0, 291.0, 291.0, 293.0, 286.0, 288.0, 294.0, 286.0, 293.0, 294.0, 285.0, 256.0, 266.0, 284.0, 295.0, 289.0, 293.0, 290.0, 292.0, 290.0, 292.0, 291.0, 279.0, 289.0, 293.0, 311.0, 316.0, 258.0, 272.0, 293.0, 289.0, 290.0, 289.0, 288.0, 294.0, 285.0, 294.0, 234.0, 239.0, 287.0, 289.0, 283.0, 296.0, 298.0, 289.0, 280.0, 302.0, 286.0, 296.0, 291.0, 288.0, 320.0, 307.0, 285.0, 297.0, 262.0, 263.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6985839313465746, "mean_inference_ms": 1.2480389289272427, "mean_action_processing_ms": 0.13390556369993323, "mean_env_wait_ms": 0.8408671873955664, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9241600, "num_agent_steps_trained": 9241600, "num_env_steps_sampled": 4620800, "num_env_steps_trained": 4620800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4620800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9241600, "timers": {"training_iteration_time_ms": 3654.587, "learn_time_ms": 1141.059, "learn_throughput": 11217.65, "synch_weights_time_ms": 10.314}, "counters": {"num_env_steps_sampled": 4620800, "num_env_steps_trained": 4620800, "num_agent_steps_sampled": 9241600, "num_agent_steps_trained": 9241600}, "done": false, "episodes_total": 11552, "training_iteration": 361, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-55", "timestamp": 1666581835, "time_this_iter_s": 3.6370046138763428, "time_total_s": 1381.7395231723785, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1381.7395231723785, "timesteps_since_restore": 0, "iterations_since_restore": 361, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.683333333333334, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.31, "shaped_reward_min": 153, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.14, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.73, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.95, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.59, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.75, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.37, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.64, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.78, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.75, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.37, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.75, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.37, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002034829929471016, "policy_loss": 0.0016710225027054548, "vf_loss": 7.742871284484863, "vf_explained_var": 0.5606523752212524, "kl": 0.0033344109542667866, "entropy": 0.8209579586982727, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4633600, "num_env_steps_trained": 4633600, "num_agent_steps_sampled": 9267200, "num_agent_steps_trained": 9267200}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 473.0, "episode_reward_mean": 573.71, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 234.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.855}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.31, "shaped_reward_min": 153, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.14, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.73, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.95, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.59, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.75, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.37, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.64, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.78, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.75, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.37, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.75, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.37, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 487.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 636.0, 582.0, 579.0, 576.0, 576.0, 582.0, 584.0, 579.0, 633.0, 579.0, 530.0, 579.0, 570.0, 576.0, 576.0, 630.0, 579.0, 579.0, 579.0, 576.0, 582.0, 579.0, 587.0, 581.0, 576.0, 576.0, 573.0, 536.0, 573.0, 579.0, 579.0, 582.0, 579.0, 582.0, 579.0, 579.0, 522.0, 579.0, 582.0, 582.0, 582.0, 570.0, 582.0, 627.0, 530.0, 582.0, 579.0, 582.0, 579.0, 473.0, 576.0, 579.0, 587.0, 582.0, 582.0, 579.0, 627.0, 582.0, 525.0, 582.0, 522.0, 570.0, 582.0, 576.0, 525.0, 570.0, 570.0, 579.0, 525.0, 579.0, 579.0, 576.0, 525.0, 579.0, 579.0, 582.0, 576.0, 579.0, 624.0, 579.0, 570.0, 576.0, 513.0, 579.0, 570.0, 582.0, 582.0, 579.0, 576.0, 576.0, 522.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 295.0, 245.0, 242.0, 290.0, 292.0, 285.0, 297.0, 288.0, 282.0, 291.0, 291.0, 288.0, 288.0, 294.0, 285.0, 318.0, 318.0, 286.0, 296.0, 286.0, 293.0, 288.0, 288.0, 295.0, 281.0, 292.0, 290.0, 292.0, 292.0, 287.0, 292.0, 312.0, 321.0, 292.0, 287.0, 268.0, 262.0, 289.0, 290.0, 288.0, 282.0, 281.0, 295.0, 281.0, 295.0, 316.0, 314.0, 291.0, 288.0, 291.0, 288.0, 287.0, 292.0, 290.0, 286.0, 289.0, 293.0, 285.0, 294.0, 294.0, 293.0, 294.0, 287.0, 282.0, 294.0, 285.0, 291.0, 286.0, 287.0, 266.0, 270.0, 283.0, 290.0, 288.0, 291.0, 291.0, 288.0, 291.0, 291.0, 293.0, 286.0, 288.0, 294.0, 286.0, 293.0, 294.0, 285.0, 256.0, 266.0, 284.0, 295.0, 289.0, 293.0, 290.0, 292.0, 290.0, 292.0, 291.0, 279.0, 289.0, 293.0, 311.0, 316.0, 258.0, 272.0, 293.0, 289.0, 290.0, 289.0, 288.0, 294.0, 285.0, 294.0, 234.0, 239.0, 287.0, 289.0, 283.0, 296.0, 298.0, 289.0, 280.0, 302.0, 286.0, 296.0, 291.0, 288.0, 320.0, 307.0, 285.0, 297.0, 262.0, 263.0, 292.0, 290.0, 263.0, 259.0, 276.0, 294.0, 293.0, 289.0, 277.0, 299.0, 266.0, 259.0, 286.0, 284.0, 292.0, 278.0, 283.0, 296.0, 260.0, 265.0, 297.0, 282.0, 293.0, 286.0, 294.0, 282.0, 262.0, 263.0, 287.0, 292.0, 290.0, 289.0, 293.0, 289.0, 293.0, 283.0, 294.0, 285.0, 313.0, 311.0, 291.0, 288.0, 295.0, 275.0, 293.0, 283.0, 249.0, 264.0, 294.0, 285.0, 278.0, 292.0, 286.0, 296.0, 287.0, 295.0, 296.0, 283.0, 286.0, 290.0, 288.0, 288.0, 258.0, 264.0, 296.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6985101619700507, "mean_inference_ms": 1.2478661336702375, "mean_action_processing_ms": 0.1338990622465829, "mean_env_wait_ms": 0.8407821499189544, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 473.0, "episode_reward_mean": 573.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 234.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.855}, "hist_stats": {"episode_reward": [579.0, 487.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 636.0, 582.0, 579.0, 576.0, 576.0, 582.0, 584.0, 579.0, 633.0, 579.0, 530.0, 579.0, 570.0, 576.0, 576.0, 630.0, 579.0, 579.0, 579.0, 576.0, 582.0, 579.0, 587.0, 581.0, 576.0, 576.0, 573.0, 536.0, 573.0, 579.0, 579.0, 582.0, 579.0, 582.0, 579.0, 579.0, 522.0, 579.0, 582.0, 582.0, 582.0, 570.0, 582.0, 627.0, 530.0, 582.0, 579.0, 582.0, 579.0, 473.0, 576.0, 579.0, 587.0, 582.0, 582.0, 579.0, 627.0, 582.0, 525.0, 582.0, 522.0, 570.0, 582.0, 576.0, 525.0, 570.0, 570.0, 579.0, 525.0, 579.0, 579.0, 576.0, 525.0, 579.0, 579.0, 582.0, 576.0, 579.0, 624.0, 579.0, 570.0, 576.0, 513.0, 579.0, 570.0, 582.0, 582.0, 579.0, 576.0, 576.0, 522.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 295.0, 245.0, 242.0, 290.0, 292.0, 285.0, 297.0, 288.0, 282.0, 291.0, 291.0, 288.0, 288.0, 294.0, 285.0, 318.0, 318.0, 286.0, 296.0, 286.0, 293.0, 288.0, 288.0, 295.0, 281.0, 292.0, 290.0, 292.0, 292.0, 287.0, 292.0, 312.0, 321.0, 292.0, 287.0, 268.0, 262.0, 289.0, 290.0, 288.0, 282.0, 281.0, 295.0, 281.0, 295.0, 316.0, 314.0, 291.0, 288.0, 291.0, 288.0, 287.0, 292.0, 290.0, 286.0, 289.0, 293.0, 285.0, 294.0, 294.0, 293.0, 294.0, 287.0, 282.0, 294.0, 285.0, 291.0, 286.0, 287.0, 266.0, 270.0, 283.0, 290.0, 288.0, 291.0, 291.0, 288.0, 291.0, 291.0, 293.0, 286.0, 288.0, 294.0, 286.0, 293.0, 294.0, 285.0, 256.0, 266.0, 284.0, 295.0, 289.0, 293.0, 290.0, 292.0, 290.0, 292.0, 291.0, 279.0, 289.0, 293.0, 311.0, 316.0, 258.0, 272.0, 293.0, 289.0, 290.0, 289.0, 288.0, 294.0, 285.0, 294.0, 234.0, 239.0, 287.0, 289.0, 283.0, 296.0, 298.0, 289.0, 280.0, 302.0, 286.0, 296.0, 291.0, 288.0, 320.0, 307.0, 285.0, 297.0, 262.0, 263.0, 292.0, 290.0, 263.0, 259.0, 276.0, 294.0, 293.0, 289.0, 277.0, 299.0, 266.0, 259.0, 286.0, 284.0, 292.0, 278.0, 283.0, 296.0, 260.0, 265.0, 297.0, 282.0, 293.0, 286.0, 294.0, 282.0, 262.0, 263.0, 287.0, 292.0, 290.0, 289.0, 293.0, 289.0, 293.0, 283.0, 294.0, 285.0, 313.0, 311.0, 291.0, 288.0, 295.0, 275.0, 293.0, 283.0, 249.0, 264.0, 294.0, 285.0, 278.0, 292.0, 286.0, 296.0, 287.0, 295.0, 296.0, 283.0, 286.0, 290.0, 288.0, 288.0, 258.0, 264.0, 296.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6985101619700507, "mean_inference_ms": 1.2478661336702375, "mean_action_processing_ms": 0.1338990622465829, "mean_env_wait_ms": 0.8407821499189544, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9267200, "num_agent_steps_trained": 9267200, "num_env_steps_sampled": 4633600, "num_env_steps_trained": 4633600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4633600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9267200, "timers": {"training_iteration_time_ms": 3658.002, "learn_time_ms": 1132.247, "learn_throughput": 11304.955, "synch_weights_time_ms": 10.373}, "counters": {"num_env_steps_sampled": 4633600, "num_env_steps_trained": 4633600, "num_agent_steps_sampled": 9267200, "num_agent_steps_trained": 9267200}, "done": false, "episodes_total": 11584, "training_iteration": 362, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-59", "timestamp": 1666581839, "time_this_iter_s": 3.719611883163452, "time_total_s": 1385.459135055542, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1385.459135055542, "timesteps_since_restore": 0, "iterations_since_restore": 362, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.84, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.65, "shaped_reward_min": 153, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.86, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.94, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.7, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.86, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.48, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.57, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.73, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.67, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.49, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.48, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.57, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.48, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.57, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033650160767138004, "policy_loss": 0.0030029851477593184, "vf_loss": 7.754143714904785, "vf_explained_var": 0.5768305659294128, "kl": 0.002860500942915678, "entropy": 0.8267655372619629, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4646400, "num_env_steps_trained": 4646400, "num_agent_steps_sampled": 9292800, "num_agent_steps_trained": 9292800}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 473.0, "episode_reward_mean": 572.25, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 234.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 286.125}, "custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.65, "shaped_reward_min": 153, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.86, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.94, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.7, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.86, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.48, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.57, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.73, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.67, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.49, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.48, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.57, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.48, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.57, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 576.0, 573.0, 536.0, 573.0, 579.0, 579.0, 582.0, 579.0, 582.0, 579.0, 579.0, 522.0, 579.0, 582.0, 582.0, 582.0, 570.0, 582.0, 627.0, 530.0, 582.0, 579.0, 582.0, 579.0, 473.0, 576.0, 579.0, 587.0, 582.0, 582.0, 579.0, 627.0, 582.0, 525.0, 582.0, 522.0, 570.0, 582.0, 576.0, 525.0, 570.0, 570.0, 579.0, 525.0, 579.0, 579.0, 576.0, 525.0, 579.0, 579.0, 582.0, 576.0, 579.0, 624.0, 579.0, 570.0, 576.0, 513.0, 579.0, 570.0, 582.0, 582.0, 579.0, 576.0, 576.0, 522.0, 579.0, 576.0, 579.0, 573.0, 576.0, 579.0, 525.0, 573.0, 522.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 582.0, 522.0, 576.0, 579.0, 573.0, 587.0, 579.0, 573.0, 579.0, 587.0, 576.0, 582.0, 579.0, 627.0, 579.0, 579.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 294.0, 285.0, 291.0, 286.0, 287.0, 266.0, 270.0, 283.0, 290.0, 288.0, 291.0, 291.0, 288.0, 291.0, 291.0, 293.0, 286.0, 288.0, 294.0, 286.0, 293.0, 294.0, 285.0, 256.0, 266.0, 284.0, 295.0, 289.0, 293.0, 290.0, 292.0, 290.0, 292.0, 291.0, 279.0, 289.0, 293.0, 311.0, 316.0, 258.0, 272.0, 293.0, 289.0, 290.0, 289.0, 288.0, 294.0, 285.0, 294.0, 234.0, 239.0, 287.0, 289.0, 283.0, 296.0, 298.0, 289.0, 280.0, 302.0, 286.0, 296.0, 291.0, 288.0, 320.0, 307.0, 285.0, 297.0, 262.0, 263.0, 292.0, 290.0, 263.0, 259.0, 276.0, 294.0, 293.0, 289.0, 277.0, 299.0, 266.0, 259.0, 286.0, 284.0, 292.0, 278.0, 283.0, 296.0, 260.0, 265.0, 297.0, 282.0, 293.0, 286.0, 294.0, 282.0, 262.0, 263.0, 287.0, 292.0, 290.0, 289.0, 293.0, 289.0, 293.0, 283.0, 294.0, 285.0, 313.0, 311.0, 291.0, 288.0, 295.0, 275.0, 293.0, 283.0, 249.0, 264.0, 294.0, 285.0, 278.0, 292.0, 286.0, 296.0, 287.0, 295.0, 296.0, 283.0, 286.0, 290.0, 288.0, 288.0, 258.0, 264.0, 296.0, 283.0, 292.0, 284.0, 281.0, 298.0, 291.0, 282.0, 294.0, 282.0, 293.0, 286.0, 260.0, 265.0, 285.0, 288.0, 258.0, 264.0, 287.0, 292.0, 293.0, 286.0, 287.0, 292.0, 290.0, 292.0, 288.0, 291.0, 290.0, 292.0, 287.0, 295.0, 265.0, 257.0, 278.0, 298.0, 290.0, 289.0, 272.0, 301.0, 288.0, 299.0, 288.0, 291.0, 287.0, 286.0, 291.0, 288.0, 293.0, 294.0, 284.0, 292.0, 293.0, 289.0, 286.0, 293.0, 311.0, 316.0, 286.0, 293.0, 291.0, 288.0, 283.0, 296.0, 292.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6984367152375701, "mean_inference_ms": 1.2476924170403942, "mean_action_processing_ms": 0.13389245451619106, "mean_env_wait_ms": 0.8406926516686911, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 473.0, "episode_reward_mean": 572.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 234.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 286.125}, "hist_stats": {"episode_reward": [576.0, 576.0, 573.0, 536.0, 573.0, 579.0, 579.0, 582.0, 579.0, 582.0, 579.0, 579.0, 522.0, 579.0, 582.0, 582.0, 582.0, 570.0, 582.0, 627.0, 530.0, 582.0, 579.0, 582.0, 579.0, 473.0, 576.0, 579.0, 587.0, 582.0, 582.0, 579.0, 627.0, 582.0, 525.0, 582.0, 522.0, 570.0, 582.0, 576.0, 525.0, 570.0, 570.0, 579.0, 525.0, 579.0, 579.0, 576.0, 525.0, 579.0, 579.0, 582.0, 576.0, 579.0, 624.0, 579.0, 570.0, 576.0, 513.0, 579.0, 570.0, 582.0, 582.0, 579.0, 576.0, 576.0, 522.0, 579.0, 576.0, 579.0, 573.0, 576.0, 579.0, 525.0, 573.0, 522.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 582.0, 522.0, 576.0, 579.0, 573.0, 587.0, 579.0, 573.0, 579.0, 587.0, 576.0, 582.0, 579.0, 627.0, 579.0, 579.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 294.0, 285.0, 291.0, 286.0, 287.0, 266.0, 270.0, 283.0, 290.0, 288.0, 291.0, 291.0, 288.0, 291.0, 291.0, 293.0, 286.0, 288.0, 294.0, 286.0, 293.0, 294.0, 285.0, 256.0, 266.0, 284.0, 295.0, 289.0, 293.0, 290.0, 292.0, 290.0, 292.0, 291.0, 279.0, 289.0, 293.0, 311.0, 316.0, 258.0, 272.0, 293.0, 289.0, 290.0, 289.0, 288.0, 294.0, 285.0, 294.0, 234.0, 239.0, 287.0, 289.0, 283.0, 296.0, 298.0, 289.0, 280.0, 302.0, 286.0, 296.0, 291.0, 288.0, 320.0, 307.0, 285.0, 297.0, 262.0, 263.0, 292.0, 290.0, 263.0, 259.0, 276.0, 294.0, 293.0, 289.0, 277.0, 299.0, 266.0, 259.0, 286.0, 284.0, 292.0, 278.0, 283.0, 296.0, 260.0, 265.0, 297.0, 282.0, 293.0, 286.0, 294.0, 282.0, 262.0, 263.0, 287.0, 292.0, 290.0, 289.0, 293.0, 289.0, 293.0, 283.0, 294.0, 285.0, 313.0, 311.0, 291.0, 288.0, 295.0, 275.0, 293.0, 283.0, 249.0, 264.0, 294.0, 285.0, 278.0, 292.0, 286.0, 296.0, 287.0, 295.0, 296.0, 283.0, 286.0, 290.0, 288.0, 288.0, 258.0, 264.0, 296.0, 283.0, 292.0, 284.0, 281.0, 298.0, 291.0, 282.0, 294.0, 282.0, 293.0, 286.0, 260.0, 265.0, 285.0, 288.0, 258.0, 264.0, 287.0, 292.0, 293.0, 286.0, 287.0, 292.0, 290.0, 292.0, 288.0, 291.0, 290.0, 292.0, 287.0, 295.0, 265.0, 257.0, 278.0, 298.0, 290.0, 289.0, 272.0, 301.0, 288.0, 299.0, 288.0, 291.0, 287.0, 286.0, 291.0, 288.0, 293.0, 294.0, 284.0, 292.0, 293.0, 289.0, 286.0, 293.0, 311.0, 316.0, 286.0, 293.0, 291.0, 288.0, 283.0, 296.0, 292.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6984367152375701, "mean_inference_ms": 1.2476924170403942, "mean_action_processing_ms": 0.13389245451619106, "mean_env_wait_ms": 0.8406926516686911, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9292800, "num_agent_steps_trained": 9292800, "num_env_steps_sampled": 4646400, "num_env_steps_trained": 4646400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4646400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9292800, "timers": {"training_iteration_time_ms": 3657.561, "learn_time_ms": 1131.754, "learn_throughput": 11309.877, "synch_weights_time_ms": 10.997}, "counters": {"num_env_steps_sampled": 4646400, "num_env_steps_trained": 4646400, "num_agent_steps_sampled": 9292800, "num_agent_steps_trained": 9292800}, "done": false, "episodes_total": 11616, "training_iteration": 363, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-03", "timestamp": 1666581843, "time_this_iter_s": 3.6965436935424805, "time_total_s": 1389.1556787490845, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1389.1556787490845, "timesteps_since_restore": 0, "iterations_since_restore": 363, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.516666666666666, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 194.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 173.97, "shaped_reward_min": 74, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.13, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.29, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.96, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.2, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.72, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.9, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.33, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.74, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.23, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.55, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.21, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.72, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.9, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.72, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.9, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001889953389763832, "policy_loss": 0.0015104906633496284, "vf_loss": 7.811895847320557, "vf_explained_var": 0.5815200805664062, "kl": 0.004921192303299904, "entropy": 0.803455650806427, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4659200, "num_env_steps_trained": 4659200, "num_agent_steps_sampled": 9318400, "num_agent_steps_trained": 9318400}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 234.0, "episode_reward_mean": 563.17, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 281.585}, "custom_metrics": {"sparse_reward_mean": 194.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 173.97, "shaped_reward_min": 74, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.13, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.29, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.96, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.2, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.72, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.9, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.33, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.74, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.23, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.55, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.21, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.72, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.9, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.72, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.9, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 582.0, 525.0, 582.0, 522.0, 570.0, 582.0, 576.0, 525.0, 570.0, 570.0, 579.0, 525.0, 579.0, 579.0, 576.0, 525.0, 579.0, 579.0, 582.0, 576.0, 579.0, 624.0, 579.0, 570.0, 576.0, 513.0, 579.0, 570.0, 582.0, 582.0, 579.0, 576.0, 576.0, 522.0, 579.0, 576.0, 579.0, 573.0, 576.0, 579.0, 525.0, 573.0, 522.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 582.0, 522.0, 576.0, 579.0, 573.0, 587.0, 579.0, 573.0, 579.0, 587.0, 576.0, 582.0, 579.0, 627.0, 579.0, 579.0, 579.0, 579.0, 522.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 525.0, 576.0, 519.0, 570.0, 576.0, 579.0, 582.0, 533.0, 579.0, 582.0, 525.0, 573.0, 582.0, 262.0, 582.0, 234.0, 582.0, 525.0, 510.0, 579.0, 579.0, 522.0, 582.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [320.0, 307.0, 285.0, 297.0, 262.0, 263.0, 292.0, 290.0, 263.0, 259.0, 276.0, 294.0, 293.0, 289.0, 277.0, 299.0, 266.0, 259.0, 286.0, 284.0, 292.0, 278.0, 283.0, 296.0, 260.0, 265.0, 297.0, 282.0, 293.0, 286.0, 294.0, 282.0, 262.0, 263.0, 287.0, 292.0, 290.0, 289.0, 293.0, 289.0, 293.0, 283.0, 294.0, 285.0, 313.0, 311.0, 291.0, 288.0, 295.0, 275.0, 293.0, 283.0, 249.0, 264.0, 294.0, 285.0, 278.0, 292.0, 286.0, 296.0, 287.0, 295.0, 296.0, 283.0, 286.0, 290.0, 288.0, 288.0, 258.0, 264.0, 296.0, 283.0, 292.0, 284.0, 281.0, 298.0, 291.0, 282.0, 294.0, 282.0, 293.0, 286.0, 260.0, 265.0, 285.0, 288.0, 258.0, 264.0, 287.0, 292.0, 293.0, 286.0, 287.0, 292.0, 290.0, 292.0, 288.0, 291.0, 290.0, 292.0, 287.0, 295.0, 265.0, 257.0, 278.0, 298.0, 290.0, 289.0, 272.0, 301.0, 288.0, 299.0, 288.0, 291.0, 287.0, 286.0, 291.0, 288.0, 293.0, 294.0, 284.0, 292.0, 293.0, 289.0, 286.0, 293.0, 311.0, 316.0, 286.0, 293.0, 291.0, 288.0, 283.0, 296.0, 292.0, 287.0, 261.0, 261.0, 287.0, 292.0, 286.0, 296.0, 286.0, 290.0, 293.0, 289.0, 294.0, 285.0, 294.0, 288.0, 256.0, 269.0, 285.0, 291.0, 263.0, 256.0, 292.0, 278.0, 279.0, 297.0, 289.0, 290.0, 287.0, 295.0, 270.0, 263.0, 290.0, 289.0, 292.0, 290.0, 256.0, 269.0, 285.0, 288.0, 290.0, 292.0, 128.0, 134.0, 290.0, 292.0, 114.0, 120.0, 294.0, 288.0, 254.0, 271.0, 254.0, 256.0, 289.0, 290.0, 288.0, 291.0, 253.0, 269.0, 289.0, 293.0, 288.0, 291.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6983602775014129, "mean_inference_ms": 1.2474985858875547, "mean_action_processing_ms": 0.13388312760671892, "mean_env_wait_ms": 0.8405829253975301, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 234.0, "episode_reward_mean": 563.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 281.585}, "hist_stats": {"episode_reward": [627.0, 582.0, 525.0, 582.0, 522.0, 570.0, 582.0, 576.0, 525.0, 570.0, 570.0, 579.0, 525.0, 579.0, 579.0, 576.0, 525.0, 579.0, 579.0, 582.0, 576.0, 579.0, 624.0, 579.0, 570.0, 576.0, 513.0, 579.0, 570.0, 582.0, 582.0, 579.0, 576.0, 576.0, 522.0, 579.0, 576.0, 579.0, 573.0, 576.0, 579.0, 525.0, 573.0, 522.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 582.0, 522.0, 576.0, 579.0, 573.0, 587.0, 579.0, 573.0, 579.0, 587.0, 576.0, 582.0, 579.0, 627.0, 579.0, 579.0, 579.0, 579.0, 522.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 525.0, 576.0, 519.0, 570.0, 576.0, 579.0, 582.0, 533.0, 579.0, 582.0, 525.0, 573.0, 582.0, 262.0, 582.0, 234.0, 582.0, 525.0, 510.0, 579.0, 579.0, 522.0, 582.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [320.0, 307.0, 285.0, 297.0, 262.0, 263.0, 292.0, 290.0, 263.0, 259.0, 276.0, 294.0, 293.0, 289.0, 277.0, 299.0, 266.0, 259.0, 286.0, 284.0, 292.0, 278.0, 283.0, 296.0, 260.0, 265.0, 297.0, 282.0, 293.0, 286.0, 294.0, 282.0, 262.0, 263.0, 287.0, 292.0, 290.0, 289.0, 293.0, 289.0, 293.0, 283.0, 294.0, 285.0, 313.0, 311.0, 291.0, 288.0, 295.0, 275.0, 293.0, 283.0, 249.0, 264.0, 294.0, 285.0, 278.0, 292.0, 286.0, 296.0, 287.0, 295.0, 296.0, 283.0, 286.0, 290.0, 288.0, 288.0, 258.0, 264.0, 296.0, 283.0, 292.0, 284.0, 281.0, 298.0, 291.0, 282.0, 294.0, 282.0, 293.0, 286.0, 260.0, 265.0, 285.0, 288.0, 258.0, 264.0, 287.0, 292.0, 293.0, 286.0, 287.0, 292.0, 290.0, 292.0, 288.0, 291.0, 290.0, 292.0, 287.0, 295.0, 265.0, 257.0, 278.0, 298.0, 290.0, 289.0, 272.0, 301.0, 288.0, 299.0, 288.0, 291.0, 287.0, 286.0, 291.0, 288.0, 293.0, 294.0, 284.0, 292.0, 293.0, 289.0, 286.0, 293.0, 311.0, 316.0, 286.0, 293.0, 291.0, 288.0, 283.0, 296.0, 292.0, 287.0, 261.0, 261.0, 287.0, 292.0, 286.0, 296.0, 286.0, 290.0, 293.0, 289.0, 294.0, 285.0, 294.0, 288.0, 256.0, 269.0, 285.0, 291.0, 263.0, 256.0, 292.0, 278.0, 279.0, 297.0, 289.0, 290.0, 287.0, 295.0, 270.0, 263.0, 290.0, 289.0, 292.0, 290.0, 256.0, 269.0, 285.0, 288.0, 290.0, 292.0, 128.0, 134.0, 290.0, 292.0, 114.0, 120.0, 294.0, 288.0, 254.0, 271.0, 254.0, 256.0, 289.0, 290.0, 288.0, 291.0, 253.0, 269.0, 289.0, 293.0, 288.0, 291.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6983602775014129, "mean_inference_ms": 1.2474985858875547, "mean_action_processing_ms": 0.13388312760671892, "mean_env_wait_ms": 0.8405829253975301, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9318400, "num_agent_steps_trained": 9318400, "num_env_steps_sampled": 4659200, "num_env_steps_trained": 4659200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4659200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9318400, "timers": {"training_iteration_time_ms": 3621.735, "learn_time_ms": 1123.322, "learn_throughput": 11394.777, "synch_weights_time_ms": 11.703}, "counters": {"num_env_steps_sampled": 4659200, "num_env_steps_trained": 4659200, "num_agent_steps_sampled": 9318400, "num_agent_steps_trained": 9318400}, "done": false, "episodes_total": 11648, "training_iteration": 364, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-07", "timestamp": 1666581847, "time_this_iter_s": 3.602010726928711, "time_total_s": 1392.7576894760132, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1392.7576894760132, "timesteps_since_restore": 0, "iterations_since_restore": 364, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.020000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.76, "shaped_reward_min": 74, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.19, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.08, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.99, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.81, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.32, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.23, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.99, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.81, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.99, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.81, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001957516185939312, "policy_loss": 0.001595453592017293, "vf_loss": 7.755298614501953, "vf_explained_var": 0.5773959159851074, "kl": 0.0023900310043245554, "entropy": 0.826931357383728, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4672000, "num_env_steps_trained": 4672000, "num_agent_steps_sampled": 9344000, "num_agent_steps_trained": 9344000}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 234.0, "episode_reward_mean": 563.56, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 281.78}, "custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.76, "shaped_reward_min": 74, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.19, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.08, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.99, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.81, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.32, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.23, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.99, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.81, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.99, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.81, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 576.0, 522.0, 579.0, 576.0, 579.0, 573.0, 576.0, 579.0, 525.0, 573.0, 522.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 582.0, 522.0, 576.0, 579.0, 573.0, 587.0, 579.0, 573.0, 579.0, 587.0, 576.0, 582.0, 579.0, 627.0, 579.0, 579.0, 579.0, 579.0, 522.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 525.0, 576.0, 519.0, 570.0, 576.0, 579.0, 582.0, 533.0, 579.0, 582.0, 525.0, 573.0, 582.0, 262.0, 582.0, 234.0, 582.0, 525.0, 510.0, 579.0, 579.0, 522.0, 582.0, 579.0, 582.0, 587.0, 576.0, 582.0, 525.0, 530.0, 582.0, 582.0, 579.0, 579.0, 522.0, 587.0, 579.0, 627.0, 573.0, 573.0, 522.0, 582.0, 579.0, 579.0, 582.0, 582.0, 579.0, 536.0, 582.0, 587.0, 576.0, 530.0, 573.0, 579.0, 576.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 290.0, 288.0, 288.0, 258.0, 264.0, 296.0, 283.0, 292.0, 284.0, 281.0, 298.0, 291.0, 282.0, 294.0, 282.0, 293.0, 286.0, 260.0, 265.0, 285.0, 288.0, 258.0, 264.0, 287.0, 292.0, 293.0, 286.0, 287.0, 292.0, 290.0, 292.0, 288.0, 291.0, 290.0, 292.0, 287.0, 295.0, 265.0, 257.0, 278.0, 298.0, 290.0, 289.0, 272.0, 301.0, 288.0, 299.0, 288.0, 291.0, 287.0, 286.0, 291.0, 288.0, 293.0, 294.0, 284.0, 292.0, 293.0, 289.0, 286.0, 293.0, 311.0, 316.0, 286.0, 293.0, 291.0, 288.0, 283.0, 296.0, 292.0, 287.0, 261.0, 261.0, 287.0, 292.0, 286.0, 296.0, 286.0, 290.0, 293.0, 289.0, 294.0, 285.0, 294.0, 288.0, 256.0, 269.0, 285.0, 291.0, 263.0, 256.0, 292.0, 278.0, 279.0, 297.0, 289.0, 290.0, 287.0, 295.0, 270.0, 263.0, 290.0, 289.0, 292.0, 290.0, 256.0, 269.0, 285.0, 288.0, 290.0, 292.0, 128.0, 134.0, 290.0, 292.0, 114.0, 120.0, 294.0, 288.0, 254.0, 271.0, 254.0, 256.0, 289.0, 290.0, 288.0, 291.0, 253.0, 269.0, 289.0, 293.0, 288.0, 291.0, 292.0, 290.0, 297.0, 290.0, 294.0, 282.0, 293.0, 289.0, 261.0, 264.0, 272.0, 258.0, 290.0, 292.0, 290.0, 292.0, 291.0, 288.0, 284.0, 295.0, 262.0, 260.0, 296.0, 291.0, 276.0, 303.0, 323.0, 304.0, 284.0, 289.0, 294.0, 279.0, 270.0, 252.0, 291.0, 291.0, 293.0, 286.0, 296.0, 283.0, 293.0, 289.0, 289.0, 293.0, 291.0, 288.0, 272.0, 264.0, 288.0, 294.0, 294.0, 293.0, 293.0, 283.0, 270.0, 260.0, 288.0, 285.0, 292.0, 287.0, 287.0, 289.0, 288.0, 291.0, 291.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6982781829858854, "mean_inference_ms": 1.2473055426051993, "mean_action_processing_ms": 0.13387086509086388, "mean_env_wait_ms": 0.8404633684404762, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 234.0, "episode_reward_mean": 563.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 281.78}, "hist_stats": {"episode_reward": [576.0, 576.0, 522.0, 579.0, 576.0, 579.0, 573.0, 576.0, 579.0, 525.0, 573.0, 522.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 582.0, 522.0, 576.0, 579.0, 573.0, 587.0, 579.0, 573.0, 579.0, 587.0, 576.0, 582.0, 579.0, 627.0, 579.0, 579.0, 579.0, 579.0, 522.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 525.0, 576.0, 519.0, 570.0, 576.0, 579.0, 582.0, 533.0, 579.0, 582.0, 525.0, 573.0, 582.0, 262.0, 582.0, 234.0, 582.0, 525.0, 510.0, 579.0, 579.0, 522.0, 582.0, 579.0, 582.0, 587.0, 576.0, 582.0, 525.0, 530.0, 582.0, 582.0, 579.0, 579.0, 522.0, 587.0, 579.0, 627.0, 573.0, 573.0, 522.0, 582.0, 579.0, 579.0, 582.0, 582.0, 579.0, 536.0, 582.0, 587.0, 576.0, 530.0, 573.0, 579.0, 576.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 290.0, 288.0, 288.0, 258.0, 264.0, 296.0, 283.0, 292.0, 284.0, 281.0, 298.0, 291.0, 282.0, 294.0, 282.0, 293.0, 286.0, 260.0, 265.0, 285.0, 288.0, 258.0, 264.0, 287.0, 292.0, 293.0, 286.0, 287.0, 292.0, 290.0, 292.0, 288.0, 291.0, 290.0, 292.0, 287.0, 295.0, 265.0, 257.0, 278.0, 298.0, 290.0, 289.0, 272.0, 301.0, 288.0, 299.0, 288.0, 291.0, 287.0, 286.0, 291.0, 288.0, 293.0, 294.0, 284.0, 292.0, 293.0, 289.0, 286.0, 293.0, 311.0, 316.0, 286.0, 293.0, 291.0, 288.0, 283.0, 296.0, 292.0, 287.0, 261.0, 261.0, 287.0, 292.0, 286.0, 296.0, 286.0, 290.0, 293.0, 289.0, 294.0, 285.0, 294.0, 288.0, 256.0, 269.0, 285.0, 291.0, 263.0, 256.0, 292.0, 278.0, 279.0, 297.0, 289.0, 290.0, 287.0, 295.0, 270.0, 263.0, 290.0, 289.0, 292.0, 290.0, 256.0, 269.0, 285.0, 288.0, 290.0, 292.0, 128.0, 134.0, 290.0, 292.0, 114.0, 120.0, 294.0, 288.0, 254.0, 271.0, 254.0, 256.0, 289.0, 290.0, 288.0, 291.0, 253.0, 269.0, 289.0, 293.0, 288.0, 291.0, 292.0, 290.0, 297.0, 290.0, 294.0, 282.0, 293.0, 289.0, 261.0, 264.0, 272.0, 258.0, 290.0, 292.0, 290.0, 292.0, 291.0, 288.0, 284.0, 295.0, 262.0, 260.0, 296.0, 291.0, 276.0, 303.0, 323.0, 304.0, 284.0, 289.0, 294.0, 279.0, 270.0, 252.0, 291.0, 291.0, 293.0, 286.0, 296.0, 283.0, 293.0, 289.0, 289.0, 293.0, 291.0, 288.0, 272.0, 264.0, 288.0, 294.0, 294.0, 293.0, 293.0, 283.0, 270.0, 260.0, 288.0, 285.0, 292.0, 287.0, 287.0, 289.0, 288.0, 291.0, 291.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6982781829858854, "mean_inference_ms": 1.2473055426051993, "mean_action_processing_ms": 0.13387086509086388, "mean_env_wait_ms": 0.8404633684404762, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9344000, "num_agent_steps_trained": 9344000, "num_env_steps_sampled": 4672000, "num_env_steps_trained": 4672000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4672000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9344000, "timers": {"training_iteration_time_ms": 3618.683, "learn_time_ms": 1129.566, "learn_throughput": 11331.783, "synch_weights_time_ms": 12.572}, "counters": {"num_env_steps_sampled": 4672000, "num_env_steps_trained": 4672000, "num_agent_steps_sampled": 9344000, "num_agent_steps_trained": 9344000}, "done": false, "episodes_total": 11680, "training_iteration": 365, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-10", "timestamp": 1666581850, "time_this_iter_s": 3.625788450241089, "time_total_s": 1396.3834779262543, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1396.3834779262543, "timesteps_since_restore": 0, "iterations_since_restore": 365, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.933333333333334, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 193.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.63, "shaped_reward_min": 74, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.63, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.92, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.49, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.77, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.27, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.51, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.27, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.51, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.27, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.51, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009442369337193668, "policy_loss": -0.0013090715510770679, "vf_loss": 7.695399284362793, "vf_explained_var": 0.572861909866333, "kl": 0.002738791285082698, "entropy": 0.8094083666801453, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4684800, "num_env_steps_trained": 4684800, "num_agent_steps_sampled": 9369600, "num_agent_steps_trained": 9369600}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 234.0, "episode_reward_mean": 562.23, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 281.115}, "custom_metrics": {"sparse_reward_mean": 193.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.63, "shaped_reward_min": 74, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.63, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.92, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.49, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.77, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.27, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.51, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.27, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.51, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.27, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.51, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 579.0, 522.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 525.0, 576.0, 519.0, 570.0, 576.0, 579.0, 582.0, 533.0, 579.0, 582.0, 525.0, 573.0, 582.0, 262.0, 582.0, 234.0, 582.0, 525.0, 510.0, 579.0, 579.0, 522.0, 582.0, 579.0, 582.0, 587.0, 576.0, 582.0, 525.0, 530.0, 582.0, 582.0, 579.0, 579.0, 522.0, 587.0, 579.0, 627.0, 573.0, 573.0, 522.0, 582.0, 579.0, 579.0, 582.0, 582.0, 579.0, 536.0, 582.0, 587.0, 576.0, 530.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 587.0, 579.0, 476.0, 573.0, 522.0, 579.0, 522.0, 582.0, 579.0, 519.0, 579.0, 579.0, 579.0, 582.0, 576.0, 525.0, 582.0, 576.0, 584.0, 579.0, 579.0, 579.0, 570.0, 570.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 291.0, 288.0, 283.0, 296.0, 292.0, 287.0, 261.0, 261.0, 287.0, 292.0, 286.0, 296.0, 286.0, 290.0, 293.0, 289.0, 294.0, 285.0, 294.0, 288.0, 256.0, 269.0, 285.0, 291.0, 263.0, 256.0, 292.0, 278.0, 279.0, 297.0, 289.0, 290.0, 287.0, 295.0, 270.0, 263.0, 290.0, 289.0, 292.0, 290.0, 256.0, 269.0, 285.0, 288.0, 290.0, 292.0, 128.0, 134.0, 290.0, 292.0, 114.0, 120.0, 294.0, 288.0, 254.0, 271.0, 254.0, 256.0, 289.0, 290.0, 288.0, 291.0, 253.0, 269.0, 289.0, 293.0, 288.0, 291.0, 292.0, 290.0, 297.0, 290.0, 294.0, 282.0, 293.0, 289.0, 261.0, 264.0, 272.0, 258.0, 290.0, 292.0, 290.0, 292.0, 291.0, 288.0, 284.0, 295.0, 262.0, 260.0, 296.0, 291.0, 276.0, 303.0, 323.0, 304.0, 284.0, 289.0, 294.0, 279.0, 270.0, 252.0, 291.0, 291.0, 293.0, 286.0, 296.0, 283.0, 293.0, 289.0, 289.0, 293.0, 291.0, 288.0, 272.0, 264.0, 288.0, 294.0, 294.0, 293.0, 293.0, 283.0, 270.0, 260.0, 288.0, 285.0, 292.0, 287.0, 287.0, 289.0, 288.0, 291.0, 291.0, 285.0, 291.0, 291.0, 283.0, 293.0, 287.0, 300.0, 285.0, 294.0, 236.0, 240.0, 285.0, 288.0, 262.0, 260.0, 294.0, 285.0, 257.0, 265.0, 289.0, 293.0, 290.0, 289.0, 257.0, 262.0, 294.0, 285.0, 293.0, 286.0, 294.0, 285.0, 281.0, 301.0, 287.0, 289.0, 269.0, 256.0, 288.0, 294.0, 281.0, 295.0, 295.0, 289.0, 290.0, 289.0, 289.0, 290.0, 280.0, 299.0, 292.0, 278.0, 286.0, 284.0, 289.0, 293.0, 298.0, 284.0, 297.0, 285.0, 291.0, 291.0, 286.0, 296.0, 287.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6982017257399036, "mean_inference_ms": 1.2471213356037625, "mean_action_processing_ms": 0.13385877685009473, "mean_env_wait_ms": 0.8403536456725641, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 234.0, "episode_reward_mean": 562.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 281.115}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 579.0, 522.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 525.0, 576.0, 519.0, 570.0, 576.0, 579.0, 582.0, 533.0, 579.0, 582.0, 525.0, 573.0, 582.0, 262.0, 582.0, 234.0, 582.0, 525.0, 510.0, 579.0, 579.0, 522.0, 582.0, 579.0, 582.0, 587.0, 576.0, 582.0, 525.0, 530.0, 582.0, 582.0, 579.0, 579.0, 522.0, 587.0, 579.0, 627.0, 573.0, 573.0, 522.0, 582.0, 579.0, 579.0, 582.0, 582.0, 579.0, 536.0, 582.0, 587.0, 576.0, 530.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 587.0, 579.0, 476.0, 573.0, 522.0, 579.0, 522.0, 582.0, 579.0, 519.0, 579.0, 579.0, 579.0, 582.0, 576.0, 525.0, 582.0, 576.0, 584.0, 579.0, 579.0, 579.0, 570.0, 570.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 291.0, 288.0, 283.0, 296.0, 292.0, 287.0, 261.0, 261.0, 287.0, 292.0, 286.0, 296.0, 286.0, 290.0, 293.0, 289.0, 294.0, 285.0, 294.0, 288.0, 256.0, 269.0, 285.0, 291.0, 263.0, 256.0, 292.0, 278.0, 279.0, 297.0, 289.0, 290.0, 287.0, 295.0, 270.0, 263.0, 290.0, 289.0, 292.0, 290.0, 256.0, 269.0, 285.0, 288.0, 290.0, 292.0, 128.0, 134.0, 290.0, 292.0, 114.0, 120.0, 294.0, 288.0, 254.0, 271.0, 254.0, 256.0, 289.0, 290.0, 288.0, 291.0, 253.0, 269.0, 289.0, 293.0, 288.0, 291.0, 292.0, 290.0, 297.0, 290.0, 294.0, 282.0, 293.0, 289.0, 261.0, 264.0, 272.0, 258.0, 290.0, 292.0, 290.0, 292.0, 291.0, 288.0, 284.0, 295.0, 262.0, 260.0, 296.0, 291.0, 276.0, 303.0, 323.0, 304.0, 284.0, 289.0, 294.0, 279.0, 270.0, 252.0, 291.0, 291.0, 293.0, 286.0, 296.0, 283.0, 293.0, 289.0, 289.0, 293.0, 291.0, 288.0, 272.0, 264.0, 288.0, 294.0, 294.0, 293.0, 293.0, 283.0, 270.0, 260.0, 288.0, 285.0, 292.0, 287.0, 287.0, 289.0, 288.0, 291.0, 291.0, 285.0, 291.0, 291.0, 283.0, 293.0, 287.0, 300.0, 285.0, 294.0, 236.0, 240.0, 285.0, 288.0, 262.0, 260.0, 294.0, 285.0, 257.0, 265.0, 289.0, 293.0, 290.0, 289.0, 257.0, 262.0, 294.0, 285.0, 293.0, 286.0, 294.0, 285.0, 281.0, 301.0, 287.0, 289.0, 269.0, 256.0, 288.0, 294.0, 281.0, 295.0, 295.0, 289.0, 290.0, 289.0, 289.0, 290.0, 280.0, 299.0, 292.0, 278.0, 286.0, 284.0, 289.0, 293.0, 298.0, 284.0, 297.0, 285.0, 291.0, 291.0, 286.0, 296.0, 287.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6982017257399036, "mean_inference_ms": 1.2471213356037625, "mean_action_processing_ms": 0.13385877685009473, "mean_env_wait_ms": 0.8403536456725641, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9369600, "num_agent_steps_trained": 9369600, "num_env_steps_sampled": 4684800, "num_env_steps_trained": 4684800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4684800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9369600, "timers": {"training_iteration_time_ms": 3608.26, "learn_time_ms": 1122.231, "learn_throughput": 11405.851, "synch_weights_time_ms": 12.679}, "counters": {"num_env_steps_sampled": 4684800, "num_env_steps_trained": 4684800, "num_agent_steps_sampled": 9369600, "num_agent_steps_trained": 9369600}, "done": false, "episodes_total": 11712, "training_iteration": 366, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-14", "timestamp": 1666581854, "time_this_iter_s": 3.619035005569458, "time_total_s": 1400.0025129318237, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1400.0025129318237, "timesteps_since_restore": 0, "iterations_since_restore": 366, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.5, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 176.29, "shaped_reward_min": 111, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.5, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.33, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.28, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.14, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.1, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.9, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.87, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.33, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.25, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.1, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.9, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.1, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.9, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011664158664643764, "policy_loss": -0.001532219466753304, "vf_loss": 7.772946357727051, "vf_explained_var": 0.5732956528663635, "kl": 0.002272401936352253, "entropy": 0.8229776620864868, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4697600, "num_env_steps_trained": 4697600, "num_agent_steps_sampled": 9395200, "num_agent_steps_trained": 9395200}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 351.0, "episode_reward_mean": 568.69, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 169.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 284.345}, "custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 176.29, "shaped_reward_min": 111, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.5, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.33, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.28, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.14, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.1, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.9, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.87, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.33, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.25, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.1, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.9, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.1, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.9, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 582.0, 579.0, 582.0, 587.0, 576.0, 582.0, 525.0, 530.0, 582.0, 582.0, 579.0, 579.0, 522.0, 587.0, 579.0, 627.0, 573.0, 573.0, 522.0, 582.0, 579.0, 579.0, 582.0, 582.0, 579.0, 536.0, 582.0, 587.0, 576.0, 530.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 587.0, 579.0, 476.0, 573.0, 522.0, 579.0, 522.0, 582.0, 579.0, 519.0, 579.0, 579.0, 579.0, 582.0, 576.0, 525.0, 582.0, 576.0, 584.0, 579.0, 579.0, 579.0, 570.0, 570.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 579.0, 525.0, 581.0, 582.0, 584.0, 582.0, 522.0, 570.0, 579.0, 579.0, 582.0, 351.0, 579.0, 522.0, 582.0, 573.0, 576.0, 579.0, 576.0, 582.0, 582.0, 573.0, 627.0, 525.0, 582.0, 582.0, 573.0, 573.0, 582.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [253.0, 269.0, 289.0, 293.0, 288.0, 291.0, 292.0, 290.0, 297.0, 290.0, 294.0, 282.0, 293.0, 289.0, 261.0, 264.0, 272.0, 258.0, 290.0, 292.0, 290.0, 292.0, 291.0, 288.0, 284.0, 295.0, 262.0, 260.0, 296.0, 291.0, 276.0, 303.0, 323.0, 304.0, 284.0, 289.0, 294.0, 279.0, 270.0, 252.0, 291.0, 291.0, 293.0, 286.0, 296.0, 283.0, 293.0, 289.0, 289.0, 293.0, 291.0, 288.0, 272.0, 264.0, 288.0, 294.0, 294.0, 293.0, 293.0, 283.0, 270.0, 260.0, 288.0, 285.0, 292.0, 287.0, 287.0, 289.0, 288.0, 291.0, 291.0, 285.0, 291.0, 291.0, 283.0, 293.0, 287.0, 300.0, 285.0, 294.0, 236.0, 240.0, 285.0, 288.0, 262.0, 260.0, 294.0, 285.0, 257.0, 265.0, 289.0, 293.0, 290.0, 289.0, 257.0, 262.0, 294.0, 285.0, 293.0, 286.0, 294.0, 285.0, 281.0, 301.0, 287.0, 289.0, 269.0, 256.0, 288.0, 294.0, 281.0, 295.0, 295.0, 289.0, 290.0, 289.0, 289.0, 290.0, 280.0, 299.0, 292.0, 278.0, 286.0, 284.0, 289.0, 293.0, 298.0, 284.0, 297.0, 285.0, 291.0, 291.0, 286.0, 296.0, 287.0, 292.0, 284.0, 292.0, 290.0, 289.0, 263.0, 262.0, 291.0, 290.0, 290.0, 292.0, 298.0, 286.0, 290.0, 292.0, 269.0, 253.0, 290.0, 280.0, 288.0, 291.0, 287.0, 292.0, 294.0, 288.0, 169.0, 182.0, 288.0, 291.0, 262.0, 260.0, 295.0, 287.0, 281.0, 292.0, 286.0, 290.0, 291.0, 288.0, 287.0, 289.0, 290.0, 292.0, 291.0, 291.0, 287.0, 286.0, 315.0, 312.0, 257.0, 268.0, 290.0, 292.0, 289.0, 293.0, 286.0, 287.0, 287.0, 286.0, 289.0, 293.0, 282.0, 300.0, 285.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6981296342327503, "mean_inference_ms": 1.246966280399733, "mean_action_processing_ms": 0.1338488189025943, "mean_env_wait_ms": 0.8402657892625757, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 351.0, "episode_reward_mean": 568.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 169.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 284.345}, "hist_stats": {"episode_reward": [522.0, 582.0, 579.0, 582.0, 587.0, 576.0, 582.0, 525.0, 530.0, 582.0, 582.0, 579.0, 579.0, 522.0, 587.0, 579.0, 627.0, 573.0, 573.0, 522.0, 582.0, 579.0, 579.0, 582.0, 582.0, 579.0, 536.0, 582.0, 587.0, 576.0, 530.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 587.0, 579.0, 476.0, 573.0, 522.0, 579.0, 522.0, 582.0, 579.0, 519.0, 579.0, 579.0, 579.0, 582.0, 576.0, 525.0, 582.0, 576.0, 584.0, 579.0, 579.0, 579.0, 570.0, 570.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 579.0, 525.0, 581.0, 582.0, 584.0, 582.0, 522.0, 570.0, 579.0, 579.0, 582.0, 351.0, 579.0, 522.0, 582.0, 573.0, 576.0, 579.0, 576.0, 582.0, 582.0, 573.0, 627.0, 525.0, 582.0, 582.0, 573.0, 573.0, 582.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [253.0, 269.0, 289.0, 293.0, 288.0, 291.0, 292.0, 290.0, 297.0, 290.0, 294.0, 282.0, 293.0, 289.0, 261.0, 264.0, 272.0, 258.0, 290.0, 292.0, 290.0, 292.0, 291.0, 288.0, 284.0, 295.0, 262.0, 260.0, 296.0, 291.0, 276.0, 303.0, 323.0, 304.0, 284.0, 289.0, 294.0, 279.0, 270.0, 252.0, 291.0, 291.0, 293.0, 286.0, 296.0, 283.0, 293.0, 289.0, 289.0, 293.0, 291.0, 288.0, 272.0, 264.0, 288.0, 294.0, 294.0, 293.0, 293.0, 283.0, 270.0, 260.0, 288.0, 285.0, 292.0, 287.0, 287.0, 289.0, 288.0, 291.0, 291.0, 285.0, 291.0, 291.0, 283.0, 293.0, 287.0, 300.0, 285.0, 294.0, 236.0, 240.0, 285.0, 288.0, 262.0, 260.0, 294.0, 285.0, 257.0, 265.0, 289.0, 293.0, 290.0, 289.0, 257.0, 262.0, 294.0, 285.0, 293.0, 286.0, 294.0, 285.0, 281.0, 301.0, 287.0, 289.0, 269.0, 256.0, 288.0, 294.0, 281.0, 295.0, 295.0, 289.0, 290.0, 289.0, 289.0, 290.0, 280.0, 299.0, 292.0, 278.0, 286.0, 284.0, 289.0, 293.0, 298.0, 284.0, 297.0, 285.0, 291.0, 291.0, 286.0, 296.0, 287.0, 292.0, 284.0, 292.0, 290.0, 289.0, 263.0, 262.0, 291.0, 290.0, 290.0, 292.0, 298.0, 286.0, 290.0, 292.0, 269.0, 253.0, 290.0, 280.0, 288.0, 291.0, 287.0, 292.0, 294.0, 288.0, 169.0, 182.0, 288.0, 291.0, 262.0, 260.0, 295.0, 287.0, 281.0, 292.0, 286.0, 290.0, 291.0, 288.0, 287.0, 289.0, 290.0, 292.0, 291.0, 291.0, 287.0, 286.0, 315.0, 312.0, 257.0, 268.0, 290.0, 292.0, 289.0, 293.0, 286.0, 287.0, 287.0, 286.0, 289.0, 293.0, 282.0, 300.0, 285.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6981296342327503, "mean_inference_ms": 1.246966280399733, "mean_action_processing_ms": 0.1338488189025943, "mean_env_wait_ms": 0.8402657892625757, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9395200, "num_agent_steps_trained": 9395200, "num_env_steps_sampled": 4697600, "num_env_steps_trained": 4697600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4697600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9395200, "timers": {"training_iteration_time_ms": 3606.873, "learn_time_ms": 1118.554, "learn_throughput": 11443.345, "synch_weights_time_ms": 13.061}, "counters": {"num_env_steps_sampled": 4697600, "num_env_steps_trained": 4697600, "num_agent_steps_sampled": 9395200, "num_agent_steps_trained": 9395200}, "done": false, "episodes_total": 11744, "training_iteration": 367, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-18", "timestamp": 1666581858, "time_this_iter_s": 3.683014392852783, "time_total_s": 1403.6855273246765, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1403.6855273246765, "timesteps_since_restore": 0, "iterations_since_restore": 367, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.86, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 175.32, "shaped_reward_min": 111, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.33, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.34, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.14, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.15, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.94, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.39, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.65, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.94, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.94, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0005987448384985328, "policy_loss": -0.0009642803925089538, "vf_loss": 7.757267475128174, "vf_explained_var": 0.5559936165809631, "kl": 0.0024835034273564816, "entropy": 0.8203800916671753, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4710400, "num_env_steps_trained": 4710400, "num_agent_steps_sampled": 9420800, "num_agent_steps_trained": 9420800}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 351.0, "episode_reward_mean": 566.92, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 169.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 283.46}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 175.32, "shaped_reward_min": 111, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.33, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.34, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.14, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.15, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.94, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.39, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.65, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.94, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.94, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 587.0, 579.0, 476.0, 573.0, 522.0, 579.0, 522.0, 582.0, 579.0, 519.0, 579.0, 579.0, 579.0, 582.0, 576.0, 525.0, 582.0, 576.0, 584.0, 579.0, 579.0, 579.0, 570.0, 570.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 579.0, 525.0, 581.0, 582.0, 584.0, 582.0, 522.0, 570.0, 579.0, 579.0, 582.0, 351.0, 579.0, 522.0, 582.0, 573.0, 576.0, 579.0, 576.0, 582.0, 582.0, 573.0, 627.0, 525.0, 582.0, 582.0, 573.0, 573.0, 582.0, 582.0, 576.0, 579.0, 630.0, 539.0, 570.0, 579.0, 582.0, 579.0, 579.0, 570.0, 525.0, 579.0, 530.0, 579.0, 573.0, 579.0, 579.0, 579.0, 573.0, 519.0, 579.0, 522.0, 579.0, 579.0, 582.0, 465.0, 582.0, 522.0, 573.0, 579.0, 524.0, 582.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 287.0, 287.0, 289.0, 288.0, 291.0, 291.0, 285.0, 291.0, 291.0, 283.0, 293.0, 287.0, 300.0, 285.0, 294.0, 236.0, 240.0, 285.0, 288.0, 262.0, 260.0, 294.0, 285.0, 257.0, 265.0, 289.0, 293.0, 290.0, 289.0, 257.0, 262.0, 294.0, 285.0, 293.0, 286.0, 294.0, 285.0, 281.0, 301.0, 287.0, 289.0, 269.0, 256.0, 288.0, 294.0, 281.0, 295.0, 295.0, 289.0, 290.0, 289.0, 289.0, 290.0, 280.0, 299.0, 292.0, 278.0, 286.0, 284.0, 289.0, 293.0, 298.0, 284.0, 297.0, 285.0, 291.0, 291.0, 286.0, 296.0, 287.0, 292.0, 284.0, 292.0, 290.0, 289.0, 263.0, 262.0, 291.0, 290.0, 290.0, 292.0, 298.0, 286.0, 290.0, 292.0, 269.0, 253.0, 290.0, 280.0, 288.0, 291.0, 287.0, 292.0, 294.0, 288.0, 169.0, 182.0, 288.0, 291.0, 262.0, 260.0, 295.0, 287.0, 281.0, 292.0, 286.0, 290.0, 291.0, 288.0, 287.0, 289.0, 290.0, 292.0, 291.0, 291.0, 287.0, 286.0, 315.0, 312.0, 257.0, 268.0, 290.0, 292.0, 289.0, 293.0, 286.0, 287.0, 287.0, 286.0, 289.0, 293.0, 282.0, 300.0, 285.0, 291.0, 295.0, 284.0, 320.0, 310.0, 267.0, 272.0, 288.0, 282.0, 280.0, 299.0, 288.0, 294.0, 288.0, 291.0, 285.0, 294.0, 282.0, 288.0, 258.0, 267.0, 281.0, 298.0, 257.0, 273.0, 293.0, 286.0, 283.0, 290.0, 288.0, 291.0, 285.0, 294.0, 287.0, 292.0, 286.0, 287.0, 256.0, 263.0, 284.0, 295.0, 261.0, 261.0, 290.0, 289.0, 282.0, 297.0, 291.0, 291.0, 237.0, 228.0, 288.0, 294.0, 261.0, 261.0, 281.0, 292.0, 289.0, 290.0, 261.0, 263.0, 286.0, 296.0, 285.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6980672334992584, "mean_inference_ms": 1.2468138773384507, "mean_action_processing_ms": 0.13384109072673375, "mean_env_wait_ms": 0.8401887887646349, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 351.0, "episode_reward_mean": 566.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 169.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 283.46}, "hist_stats": {"episode_reward": [579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 587.0, 579.0, 476.0, 573.0, 522.0, 579.0, 522.0, 582.0, 579.0, 519.0, 579.0, 579.0, 579.0, 582.0, 576.0, 525.0, 582.0, 576.0, 584.0, 579.0, 579.0, 579.0, 570.0, 570.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 579.0, 525.0, 581.0, 582.0, 584.0, 582.0, 522.0, 570.0, 579.0, 579.0, 582.0, 351.0, 579.0, 522.0, 582.0, 573.0, 576.0, 579.0, 576.0, 582.0, 582.0, 573.0, 627.0, 525.0, 582.0, 582.0, 573.0, 573.0, 582.0, 582.0, 576.0, 579.0, 630.0, 539.0, 570.0, 579.0, 582.0, 579.0, 579.0, 570.0, 525.0, 579.0, 530.0, 579.0, 573.0, 579.0, 579.0, 579.0, 573.0, 519.0, 579.0, 522.0, 579.0, 579.0, 582.0, 465.0, 582.0, 522.0, 573.0, 579.0, 524.0, 582.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 287.0, 287.0, 289.0, 288.0, 291.0, 291.0, 285.0, 291.0, 291.0, 283.0, 293.0, 287.0, 300.0, 285.0, 294.0, 236.0, 240.0, 285.0, 288.0, 262.0, 260.0, 294.0, 285.0, 257.0, 265.0, 289.0, 293.0, 290.0, 289.0, 257.0, 262.0, 294.0, 285.0, 293.0, 286.0, 294.0, 285.0, 281.0, 301.0, 287.0, 289.0, 269.0, 256.0, 288.0, 294.0, 281.0, 295.0, 295.0, 289.0, 290.0, 289.0, 289.0, 290.0, 280.0, 299.0, 292.0, 278.0, 286.0, 284.0, 289.0, 293.0, 298.0, 284.0, 297.0, 285.0, 291.0, 291.0, 286.0, 296.0, 287.0, 292.0, 284.0, 292.0, 290.0, 289.0, 263.0, 262.0, 291.0, 290.0, 290.0, 292.0, 298.0, 286.0, 290.0, 292.0, 269.0, 253.0, 290.0, 280.0, 288.0, 291.0, 287.0, 292.0, 294.0, 288.0, 169.0, 182.0, 288.0, 291.0, 262.0, 260.0, 295.0, 287.0, 281.0, 292.0, 286.0, 290.0, 291.0, 288.0, 287.0, 289.0, 290.0, 292.0, 291.0, 291.0, 287.0, 286.0, 315.0, 312.0, 257.0, 268.0, 290.0, 292.0, 289.0, 293.0, 286.0, 287.0, 287.0, 286.0, 289.0, 293.0, 282.0, 300.0, 285.0, 291.0, 295.0, 284.0, 320.0, 310.0, 267.0, 272.0, 288.0, 282.0, 280.0, 299.0, 288.0, 294.0, 288.0, 291.0, 285.0, 294.0, 282.0, 288.0, 258.0, 267.0, 281.0, 298.0, 257.0, 273.0, 293.0, 286.0, 283.0, 290.0, 288.0, 291.0, 285.0, 294.0, 287.0, 292.0, 286.0, 287.0, 256.0, 263.0, 284.0, 295.0, 261.0, 261.0, 290.0, 289.0, 282.0, 297.0, 291.0, 291.0, 237.0, 228.0, 288.0, 294.0, 261.0, 261.0, 281.0, 292.0, 289.0, 290.0, 261.0, 263.0, 286.0, 296.0, 285.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6980672334992584, "mean_inference_ms": 1.2468138773384507, "mean_action_processing_ms": 0.13384109072673375, "mean_env_wait_ms": 0.8401887887646349, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9420800, "num_agent_steps_trained": 9420800, "num_env_steps_sampled": 4710400, "num_env_steps_trained": 4710400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4710400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9420800, "timers": {"training_iteration_time_ms": 3605.349, "learn_time_ms": 1118.749, "learn_throughput": 11441.347, "synch_weights_time_ms": 12.614}, "counters": {"num_env_steps_sampled": 4710400, "num_env_steps_trained": 4710400, "num_agent_steps_sampled": 9420800, "num_agent_steps_trained": 9420800}, "done": false, "episodes_total": 11776, "training_iteration": 368, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-22", "timestamp": 1666581862, "time_this_iter_s": 3.71199893951416, "time_total_s": 1407.3975262641907, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1407.3975262641907, "timesteps_since_restore": 0, "iterations_since_restore": 368, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.7, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 195.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.42, "shaped_reward_min": 111, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.08, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.22, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.92, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.98, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.73, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.3, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.11, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.72, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.08, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.98, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.73, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.98, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.73, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0002802152303047478, "policy_loss": -0.0006437780102714896, "vf_loss": 7.782103538513184, "vf_explained_var": 0.5572320818901062, "kl": 0.002535460516810417, "entropy": 0.8292930126190186, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4723200, "num_env_steps_trained": 4723200, "num_agent_steps_sampled": 9446400, "num_agent_steps_trained": 9446400}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 351.0, "episode_reward_mean": 564.42, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 169.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 282.21}, "custom_metrics": {"sparse_reward_mean": 195.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.42, "shaped_reward_min": 111, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.08, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.22, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.92, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.98, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.73, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.3, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.11, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.72, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.08, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.98, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.73, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.98, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.73, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 582.0, 579.0, 576.0, 579.0, 525.0, 581.0, 582.0, 584.0, 582.0, 522.0, 570.0, 579.0, 579.0, 582.0, 351.0, 579.0, 522.0, 582.0, 573.0, 576.0, 579.0, 576.0, 582.0, 582.0, 573.0, 627.0, 525.0, 582.0, 582.0, 573.0, 573.0, 582.0, 582.0, 576.0, 579.0, 630.0, 539.0, 570.0, 579.0, 582.0, 579.0, 579.0, 570.0, 525.0, 579.0, 530.0, 579.0, 573.0, 579.0, 579.0, 579.0, 573.0, 519.0, 579.0, 522.0, 579.0, 579.0, 582.0, 465.0, 582.0, 522.0, 573.0, 579.0, 524.0, 582.0, 570.0, 573.0, 525.0, 579.0, 579.0, 576.0, 582.0, 579.0, 513.0, 576.0, 579.0, 576.0, 576.0, 522.0, 579.0, 573.0, 579.0, 476.0, 573.0, 525.0, 522.0, 573.0, 579.0, 573.0, 525.0, 582.0, 582.0, 579.0, 522.0, 525.0, 579.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 285.0, 291.0, 291.0, 286.0, 296.0, 287.0, 292.0, 284.0, 292.0, 290.0, 289.0, 263.0, 262.0, 291.0, 290.0, 290.0, 292.0, 298.0, 286.0, 290.0, 292.0, 269.0, 253.0, 290.0, 280.0, 288.0, 291.0, 287.0, 292.0, 294.0, 288.0, 169.0, 182.0, 288.0, 291.0, 262.0, 260.0, 295.0, 287.0, 281.0, 292.0, 286.0, 290.0, 291.0, 288.0, 287.0, 289.0, 290.0, 292.0, 291.0, 291.0, 287.0, 286.0, 315.0, 312.0, 257.0, 268.0, 290.0, 292.0, 289.0, 293.0, 286.0, 287.0, 287.0, 286.0, 289.0, 293.0, 282.0, 300.0, 285.0, 291.0, 295.0, 284.0, 320.0, 310.0, 267.0, 272.0, 288.0, 282.0, 280.0, 299.0, 288.0, 294.0, 288.0, 291.0, 285.0, 294.0, 282.0, 288.0, 258.0, 267.0, 281.0, 298.0, 257.0, 273.0, 293.0, 286.0, 283.0, 290.0, 288.0, 291.0, 285.0, 294.0, 287.0, 292.0, 286.0, 287.0, 256.0, 263.0, 284.0, 295.0, 261.0, 261.0, 290.0, 289.0, 282.0, 297.0, 291.0, 291.0, 237.0, 228.0, 288.0, 294.0, 261.0, 261.0, 281.0, 292.0, 289.0, 290.0, 261.0, 263.0, 286.0, 296.0, 285.0, 285.0, 287.0, 286.0, 260.0, 265.0, 292.0, 287.0, 288.0, 291.0, 288.0, 288.0, 289.0, 293.0, 283.0, 296.0, 255.0, 258.0, 289.0, 287.0, 291.0, 288.0, 286.0, 290.0, 289.0, 287.0, 253.0, 269.0, 290.0, 289.0, 287.0, 286.0, 288.0, 291.0, 239.0, 237.0, 282.0, 291.0, 262.0, 263.0, 262.0, 260.0, 285.0, 288.0, 288.0, 291.0, 285.0, 288.0, 263.0, 262.0, 288.0, 294.0, 291.0, 291.0, 281.0, 298.0, 265.0, 257.0, 260.0, 265.0, 292.0, 287.0, 291.0, 291.0, 292.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6979899684883932, "mean_inference_ms": 1.246757177203936, "mean_action_processing_ms": 0.1338305857604492, "mean_env_wait_ms": 0.8401893480231749, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 351.0, "episode_reward_mean": 564.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 169.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 282.21}, "hist_stats": {"episode_reward": [582.0, 582.0, 582.0, 579.0, 576.0, 579.0, 525.0, 581.0, 582.0, 584.0, 582.0, 522.0, 570.0, 579.0, 579.0, 582.0, 351.0, 579.0, 522.0, 582.0, 573.0, 576.0, 579.0, 576.0, 582.0, 582.0, 573.0, 627.0, 525.0, 582.0, 582.0, 573.0, 573.0, 582.0, 582.0, 576.0, 579.0, 630.0, 539.0, 570.0, 579.0, 582.0, 579.0, 579.0, 570.0, 525.0, 579.0, 530.0, 579.0, 573.0, 579.0, 579.0, 579.0, 573.0, 519.0, 579.0, 522.0, 579.0, 579.0, 582.0, 465.0, 582.0, 522.0, 573.0, 579.0, 524.0, 582.0, 570.0, 573.0, 525.0, 579.0, 579.0, 576.0, 582.0, 579.0, 513.0, 576.0, 579.0, 576.0, 576.0, 522.0, 579.0, 573.0, 579.0, 476.0, 573.0, 525.0, 522.0, 573.0, 579.0, 573.0, 525.0, 582.0, 582.0, 579.0, 522.0, 525.0, 579.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 285.0, 291.0, 291.0, 286.0, 296.0, 287.0, 292.0, 284.0, 292.0, 290.0, 289.0, 263.0, 262.0, 291.0, 290.0, 290.0, 292.0, 298.0, 286.0, 290.0, 292.0, 269.0, 253.0, 290.0, 280.0, 288.0, 291.0, 287.0, 292.0, 294.0, 288.0, 169.0, 182.0, 288.0, 291.0, 262.0, 260.0, 295.0, 287.0, 281.0, 292.0, 286.0, 290.0, 291.0, 288.0, 287.0, 289.0, 290.0, 292.0, 291.0, 291.0, 287.0, 286.0, 315.0, 312.0, 257.0, 268.0, 290.0, 292.0, 289.0, 293.0, 286.0, 287.0, 287.0, 286.0, 289.0, 293.0, 282.0, 300.0, 285.0, 291.0, 295.0, 284.0, 320.0, 310.0, 267.0, 272.0, 288.0, 282.0, 280.0, 299.0, 288.0, 294.0, 288.0, 291.0, 285.0, 294.0, 282.0, 288.0, 258.0, 267.0, 281.0, 298.0, 257.0, 273.0, 293.0, 286.0, 283.0, 290.0, 288.0, 291.0, 285.0, 294.0, 287.0, 292.0, 286.0, 287.0, 256.0, 263.0, 284.0, 295.0, 261.0, 261.0, 290.0, 289.0, 282.0, 297.0, 291.0, 291.0, 237.0, 228.0, 288.0, 294.0, 261.0, 261.0, 281.0, 292.0, 289.0, 290.0, 261.0, 263.0, 286.0, 296.0, 285.0, 285.0, 287.0, 286.0, 260.0, 265.0, 292.0, 287.0, 288.0, 291.0, 288.0, 288.0, 289.0, 293.0, 283.0, 296.0, 255.0, 258.0, 289.0, 287.0, 291.0, 288.0, 286.0, 290.0, 289.0, 287.0, 253.0, 269.0, 290.0, 289.0, 287.0, 286.0, 288.0, 291.0, 239.0, 237.0, 282.0, 291.0, 262.0, 263.0, 262.0, 260.0, 285.0, 288.0, 288.0, 291.0, 285.0, 288.0, 263.0, 262.0, 288.0, 294.0, 291.0, 291.0, 281.0, 298.0, 265.0, 257.0, 260.0, 265.0, 292.0, 287.0, 291.0, 291.0, 292.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6979899684883932, "mean_inference_ms": 1.246757177203936, "mean_action_processing_ms": 0.1338305857604492, "mean_env_wait_ms": 0.8401893480231749, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9446400, "num_agent_steps_trained": 9446400, "num_env_steps_sampled": 4723200, "num_env_steps_trained": 4723200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4723200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9446400, "timers": {"training_iteration_time_ms": 3628.601, "learn_time_ms": 1118.822, "learn_throughput": 11440.607, "synch_weights_time_ms": 13.16}, "counters": {"num_env_steps_sampled": 4723200, "num_env_steps_trained": 4723200, "num_agent_steps_sampled": 9446400, "num_agent_steps_trained": 9446400}, "done": false, "episodes_total": 11808, "training_iteration": 369, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-26", "timestamp": 1666581866, "time_this_iter_s": 3.945258617401123, "time_total_s": 1411.3427848815918, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1411.3427848815918, "timesteps_since_restore": 0, "iterations_since_restore": 369, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.26666666666667, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 173.98, "shaped_reward_min": 94, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.52, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.93, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.42, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.83, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.05, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.56, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.04, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.04, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.81, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.77, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.05, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.56, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.05, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.56, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.000858169689308852, "policy_loss": 0.00048363849055022, "vf_loss": 7.798517227172852, "vf_explained_var": 0.5516868829727173, "kl": 0.003202717285603285, "entropy": 0.8106404542922974, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4736000, "num_env_steps_trained": 4736000, "num_agent_steps_sampled": 9472000, "num_agent_steps_trained": 9472000}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 294.0, "episode_reward_mean": 563.58, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 281.79}, "custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 173.98, "shaped_reward_min": 94, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.52, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.93, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.42, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.83, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.05, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.56, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.04, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.04, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.81, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.77, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.05, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.56, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.05, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.56, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 582.0, 582.0, 576.0, 579.0, 630.0, 539.0, 570.0, 579.0, 582.0, 579.0, 579.0, 570.0, 525.0, 579.0, 530.0, 579.0, 573.0, 579.0, 579.0, 579.0, 573.0, 519.0, 579.0, 522.0, 579.0, 579.0, 582.0, 465.0, 582.0, 522.0, 573.0, 579.0, 524.0, 582.0, 570.0, 573.0, 525.0, 579.0, 579.0, 576.0, 582.0, 579.0, 513.0, 576.0, 579.0, 576.0, 576.0, 522.0, 579.0, 573.0, 579.0, 476.0, 573.0, 525.0, 522.0, 573.0, 579.0, 573.0, 525.0, 582.0, 582.0, 579.0, 522.0, 525.0, 579.0, 582.0, 576.0, 579.0, 579.0, 573.0, 573.0, 579.0, 579.0, 587.0, 294.0, 579.0, 579.0, 573.0, 579.0, 582.0, 576.0, 530.0, 581.0, 579.0, 587.0, 576.0, 573.0, 530.0, 573.0, 579.0, 576.0, 579.0, 576.0, 579.0, 576.0, 573.0, 573.0, 576.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 286.0, 289.0, 293.0, 282.0, 300.0, 285.0, 291.0, 295.0, 284.0, 320.0, 310.0, 267.0, 272.0, 288.0, 282.0, 280.0, 299.0, 288.0, 294.0, 288.0, 291.0, 285.0, 294.0, 282.0, 288.0, 258.0, 267.0, 281.0, 298.0, 257.0, 273.0, 293.0, 286.0, 283.0, 290.0, 288.0, 291.0, 285.0, 294.0, 287.0, 292.0, 286.0, 287.0, 256.0, 263.0, 284.0, 295.0, 261.0, 261.0, 290.0, 289.0, 282.0, 297.0, 291.0, 291.0, 237.0, 228.0, 288.0, 294.0, 261.0, 261.0, 281.0, 292.0, 289.0, 290.0, 261.0, 263.0, 286.0, 296.0, 285.0, 285.0, 287.0, 286.0, 260.0, 265.0, 292.0, 287.0, 288.0, 291.0, 288.0, 288.0, 289.0, 293.0, 283.0, 296.0, 255.0, 258.0, 289.0, 287.0, 291.0, 288.0, 286.0, 290.0, 289.0, 287.0, 253.0, 269.0, 290.0, 289.0, 287.0, 286.0, 288.0, 291.0, 239.0, 237.0, 282.0, 291.0, 262.0, 263.0, 262.0, 260.0, 285.0, 288.0, 288.0, 291.0, 285.0, 288.0, 263.0, 262.0, 288.0, 294.0, 291.0, 291.0, 281.0, 298.0, 265.0, 257.0, 260.0, 265.0, 292.0, 287.0, 291.0, 291.0, 292.0, 284.0, 286.0, 293.0, 290.0, 289.0, 286.0, 287.0, 282.0, 291.0, 284.0, 295.0, 289.0, 290.0, 292.0, 295.0, 151.0, 143.0, 293.0, 286.0, 286.0, 293.0, 287.0, 286.0, 283.0, 296.0, 291.0, 291.0, 287.0, 289.0, 264.0, 266.0, 292.0, 289.0, 294.0, 285.0, 291.0, 296.0, 293.0, 283.0, 291.0, 282.0, 258.0, 272.0, 286.0, 287.0, 290.0, 289.0, 288.0, 288.0, 289.0, 290.0, 285.0, 291.0, 292.0, 287.0, 283.0, 293.0, 283.0, 290.0, 291.0, 282.0, 289.0, 287.0, 255.0, 264.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6979214666795684, "mean_inference_ms": 1.2466894042982537, "mean_action_processing_ms": 0.13382035056435135, "mean_env_wait_ms": 0.8401823475867125, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 294.0, "episode_reward_mean": 563.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 281.79}, "hist_stats": {"episode_reward": [573.0, 582.0, 582.0, 576.0, 579.0, 630.0, 539.0, 570.0, 579.0, 582.0, 579.0, 579.0, 570.0, 525.0, 579.0, 530.0, 579.0, 573.0, 579.0, 579.0, 579.0, 573.0, 519.0, 579.0, 522.0, 579.0, 579.0, 582.0, 465.0, 582.0, 522.0, 573.0, 579.0, 524.0, 582.0, 570.0, 573.0, 525.0, 579.0, 579.0, 576.0, 582.0, 579.0, 513.0, 576.0, 579.0, 576.0, 576.0, 522.0, 579.0, 573.0, 579.0, 476.0, 573.0, 525.0, 522.0, 573.0, 579.0, 573.0, 525.0, 582.0, 582.0, 579.0, 522.0, 525.0, 579.0, 582.0, 576.0, 579.0, 579.0, 573.0, 573.0, 579.0, 579.0, 587.0, 294.0, 579.0, 579.0, 573.0, 579.0, 582.0, 576.0, 530.0, 581.0, 579.0, 587.0, 576.0, 573.0, 530.0, 573.0, 579.0, 576.0, 579.0, 576.0, 579.0, 576.0, 573.0, 573.0, 576.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 286.0, 289.0, 293.0, 282.0, 300.0, 285.0, 291.0, 295.0, 284.0, 320.0, 310.0, 267.0, 272.0, 288.0, 282.0, 280.0, 299.0, 288.0, 294.0, 288.0, 291.0, 285.0, 294.0, 282.0, 288.0, 258.0, 267.0, 281.0, 298.0, 257.0, 273.0, 293.0, 286.0, 283.0, 290.0, 288.0, 291.0, 285.0, 294.0, 287.0, 292.0, 286.0, 287.0, 256.0, 263.0, 284.0, 295.0, 261.0, 261.0, 290.0, 289.0, 282.0, 297.0, 291.0, 291.0, 237.0, 228.0, 288.0, 294.0, 261.0, 261.0, 281.0, 292.0, 289.0, 290.0, 261.0, 263.0, 286.0, 296.0, 285.0, 285.0, 287.0, 286.0, 260.0, 265.0, 292.0, 287.0, 288.0, 291.0, 288.0, 288.0, 289.0, 293.0, 283.0, 296.0, 255.0, 258.0, 289.0, 287.0, 291.0, 288.0, 286.0, 290.0, 289.0, 287.0, 253.0, 269.0, 290.0, 289.0, 287.0, 286.0, 288.0, 291.0, 239.0, 237.0, 282.0, 291.0, 262.0, 263.0, 262.0, 260.0, 285.0, 288.0, 288.0, 291.0, 285.0, 288.0, 263.0, 262.0, 288.0, 294.0, 291.0, 291.0, 281.0, 298.0, 265.0, 257.0, 260.0, 265.0, 292.0, 287.0, 291.0, 291.0, 292.0, 284.0, 286.0, 293.0, 290.0, 289.0, 286.0, 287.0, 282.0, 291.0, 284.0, 295.0, 289.0, 290.0, 292.0, 295.0, 151.0, 143.0, 293.0, 286.0, 286.0, 293.0, 287.0, 286.0, 283.0, 296.0, 291.0, 291.0, 287.0, 289.0, 264.0, 266.0, 292.0, 289.0, 294.0, 285.0, 291.0, 296.0, 293.0, 283.0, 291.0, 282.0, 258.0, 272.0, 286.0, 287.0, 290.0, 289.0, 288.0, 288.0, 289.0, 290.0, 285.0, 291.0, 292.0, 287.0, 283.0, 293.0, 283.0, 290.0, 291.0, 282.0, 289.0, 287.0, 255.0, 264.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6979214666795684, "mean_inference_ms": 1.2466894042982537, "mean_action_processing_ms": 0.13382035056435135, "mean_env_wait_ms": 0.8401823475867125, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9472000, "num_agent_steps_trained": 9472000, "num_env_steps_sampled": 4736000, "num_env_steps_trained": 4736000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4736000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9472000, "timers": {"training_iteration_time_ms": 3638.398, "learn_time_ms": 1117.244, "learn_throughput": 11456.764, "synch_weights_time_ms": 13.584}, "counters": {"num_env_steps_sampled": 4736000, "num_env_steps_trained": 4736000, "num_agent_steps_sampled": 9472000, "num_agent_steps_trained": 9472000}, "done": false, "episodes_total": 11840, "training_iteration": 370, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-30", "timestamp": 1666581870, "time_this_iter_s": 3.7514426708221436, "time_total_s": 1415.094227552414, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1415.094227552414, "timesteps_since_restore": 0, "iterations_since_restore": 370, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.939999999999998, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 174.44, "shaped_reward_min": 94, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.12, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.27, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.03, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.96, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.68, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.39, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.18, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.15, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.96, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.68, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.96, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.68, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002380042104050517, "policy_loss": 0.002013041637837887, "vf_loss": 7.760648727416992, "vf_explained_var": 0.546655535697937, "kl": 0.00237697409465909, "entropy": 0.8181264400482178, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4748800, "num_env_steps_trained": 4748800, "num_agent_steps_sampled": 9497600, "num_agent_steps_trained": 9497600}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 294.0, "episode_reward_mean": 565.64, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 282.82}, "custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 174.44, "shaped_reward_min": 94, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.12, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.27, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.03, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.96, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.68, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.39, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.18, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.15, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.96, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.68, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.96, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.68, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 524.0, 582.0, 570.0, 573.0, 525.0, 579.0, 579.0, 576.0, 582.0, 579.0, 513.0, 576.0, 579.0, 576.0, 576.0, 522.0, 579.0, 573.0, 579.0, 476.0, 573.0, 525.0, 522.0, 573.0, 579.0, 573.0, 525.0, 582.0, 582.0, 579.0, 522.0, 525.0, 579.0, 582.0, 576.0, 579.0, 579.0, 573.0, 573.0, 579.0, 579.0, 587.0, 294.0, 579.0, 579.0, 573.0, 579.0, 582.0, 576.0, 530.0, 581.0, 579.0, 587.0, 576.0, 573.0, 530.0, 573.0, 579.0, 576.0, 579.0, 576.0, 579.0, 576.0, 573.0, 573.0, 576.0, 519.0, 522.0, 576.0, 570.0, 525.0, 582.0, 582.0, 573.0, 582.0, 579.0, 579.0, 582.0, 570.0, 579.0, 579.0, 582.0, 579.0, 579.0, 522.0, 627.0, 579.0, 579.0, 582.0, 522.0, 576.0, 579.0, 582.0, 576.0, 573.0, 582.0, 576.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 290.0, 261.0, 263.0, 286.0, 296.0, 285.0, 285.0, 287.0, 286.0, 260.0, 265.0, 292.0, 287.0, 288.0, 291.0, 288.0, 288.0, 289.0, 293.0, 283.0, 296.0, 255.0, 258.0, 289.0, 287.0, 291.0, 288.0, 286.0, 290.0, 289.0, 287.0, 253.0, 269.0, 290.0, 289.0, 287.0, 286.0, 288.0, 291.0, 239.0, 237.0, 282.0, 291.0, 262.0, 263.0, 262.0, 260.0, 285.0, 288.0, 288.0, 291.0, 285.0, 288.0, 263.0, 262.0, 288.0, 294.0, 291.0, 291.0, 281.0, 298.0, 265.0, 257.0, 260.0, 265.0, 292.0, 287.0, 291.0, 291.0, 292.0, 284.0, 286.0, 293.0, 290.0, 289.0, 286.0, 287.0, 282.0, 291.0, 284.0, 295.0, 289.0, 290.0, 292.0, 295.0, 151.0, 143.0, 293.0, 286.0, 286.0, 293.0, 287.0, 286.0, 283.0, 296.0, 291.0, 291.0, 287.0, 289.0, 264.0, 266.0, 292.0, 289.0, 294.0, 285.0, 291.0, 296.0, 293.0, 283.0, 291.0, 282.0, 258.0, 272.0, 286.0, 287.0, 290.0, 289.0, 288.0, 288.0, 289.0, 290.0, 285.0, 291.0, 292.0, 287.0, 283.0, 293.0, 283.0, 290.0, 291.0, 282.0, 289.0, 287.0, 255.0, 264.0, 264.0, 258.0, 289.0, 287.0, 281.0, 289.0, 259.0, 266.0, 294.0, 288.0, 292.0, 290.0, 287.0, 286.0, 296.0, 286.0, 291.0, 288.0, 291.0, 288.0, 285.0, 297.0, 287.0, 283.0, 292.0, 287.0, 291.0, 288.0, 284.0, 298.0, 299.0, 280.0, 292.0, 287.0, 263.0, 259.0, 316.0, 311.0, 286.0, 293.0, 288.0, 291.0, 288.0, 294.0, 260.0, 262.0, 288.0, 288.0, 287.0, 292.0, 291.0, 291.0, 291.0, 285.0, 285.0, 288.0, 290.0, 292.0, 281.0, 295.0, 284.0, 289.0, 290.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6978597284218533, "mean_inference_ms": 1.2466446787051728, "mean_action_processing_ms": 0.13381208853991922, "mean_env_wait_ms": 0.840190768123749, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 294.0, "episode_reward_mean": 565.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 282.82}, "hist_stats": {"episode_reward": [579.0, 524.0, 582.0, 570.0, 573.0, 525.0, 579.0, 579.0, 576.0, 582.0, 579.0, 513.0, 576.0, 579.0, 576.0, 576.0, 522.0, 579.0, 573.0, 579.0, 476.0, 573.0, 525.0, 522.0, 573.0, 579.0, 573.0, 525.0, 582.0, 582.0, 579.0, 522.0, 525.0, 579.0, 582.0, 576.0, 579.0, 579.0, 573.0, 573.0, 579.0, 579.0, 587.0, 294.0, 579.0, 579.0, 573.0, 579.0, 582.0, 576.0, 530.0, 581.0, 579.0, 587.0, 576.0, 573.0, 530.0, 573.0, 579.0, 576.0, 579.0, 576.0, 579.0, 576.0, 573.0, 573.0, 576.0, 519.0, 522.0, 576.0, 570.0, 525.0, 582.0, 582.0, 573.0, 582.0, 579.0, 579.0, 582.0, 570.0, 579.0, 579.0, 582.0, 579.0, 579.0, 522.0, 627.0, 579.0, 579.0, 582.0, 522.0, 576.0, 579.0, 582.0, 576.0, 573.0, 582.0, 576.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 290.0, 261.0, 263.0, 286.0, 296.0, 285.0, 285.0, 287.0, 286.0, 260.0, 265.0, 292.0, 287.0, 288.0, 291.0, 288.0, 288.0, 289.0, 293.0, 283.0, 296.0, 255.0, 258.0, 289.0, 287.0, 291.0, 288.0, 286.0, 290.0, 289.0, 287.0, 253.0, 269.0, 290.0, 289.0, 287.0, 286.0, 288.0, 291.0, 239.0, 237.0, 282.0, 291.0, 262.0, 263.0, 262.0, 260.0, 285.0, 288.0, 288.0, 291.0, 285.0, 288.0, 263.0, 262.0, 288.0, 294.0, 291.0, 291.0, 281.0, 298.0, 265.0, 257.0, 260.0, 265.0, 292.0, 287.0, 291.0, 291.0, 292.0, 284.0, 286.0, 293.0, 290.0, 289.0, 286.0, 287.0, 282.0, 291.0, 284.0, 295.0, 289.0, 290.0, 292.0, 295.0, 151.0, 143.0, 293.0, 286.0, 286.0, 293.0, 287.0, 286.0, 283.0, 296.0, 291.0, 291.0, 287.0, 289.0, 264.0, 266.0, 292.0, 289.0, 294.0, 285.0, 291.0, 296.0, 293.0, 283.0, 291.0, 282.0, 258.0, 272.0, 286.0, 287.0, 290.0, 289.0, 288.0, 288.0, 289.0, 290.0, 285.0, 291.0, 292.0, 287.0, 283.0, 293.0, 283.0, 290.0, 291.0, 282.0, 289.0, 287.0, 255.0, 264.0, 264.0, 258.0, 289.0, 287.0, 281.0, 289.0, 259.0, 266.0, 294.0, 288.0, 292.0, 290.0, 287.0, 286.0, 296.0, 286.0, 291.0, 288.0, 291.0, 288.0, 285.0, 297.0, 287.0, 283.0, 292.0, 287.0, 291.0, 288.0, 284.0, 298.0, 299.0, 280.0, 292.0, 287.0, 263.0, 259.0, 316.0, 311.0, 286.0, 293.0, 288.0, 291.0, 288.0, 294.0, 260.0, 262.0, 288.0, 288.0, 287.0, 292.0, 291.0, 291.0, 291.0, 285.0, 285.0, 288.0, 290.0, 292.0, 281.0, 295.0, 284.0, 289.0, 290.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6978597284218533, "mean_inference_ms": 1.2466446787051728, "mean_action_processing_ms": 0.13381208853991922, "mean_env_wait_ms": 0.840190768123749, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9497600, "num_agent_steps_trained": 9497600, "num_env_steps_sampled": 4748800, "num_env_steps_trained": 4748800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4748800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9497600, "timers": {"training_iteration_time_ms": 3647.884, "learn_time_ms": 1119.647, "learn_throughput": 11432.173, "synch_weights_time_ms": 13.259}, "counters": {"num_env_steps_sampled": 4748800, "num_env_steps_trained": 4748800, "num_agent_steps_sampled": 9497600, "num_agent_steps_trained": 9497600}, "done": false, "episodes_total": 11872, "training_iteration": 371, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-34", "timestamp": 1666581874, "time_this_iter_s": 3.723357677459717, "time_total_s": 1418.8175852298737, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1418.8175852298737, "timesteps_since_restore": 0, "iterations_since_restore": 371, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.95, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 197.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 175.64, "shaped_reward_min": 94, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.56, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.2, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.42, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.11, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.11, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.74, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.19, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.73, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.71, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.11, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.74, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.11, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.74, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016957891639322042, "policy_loss": 0.0013228158932179213, "vf_loss": 7.805115699768066, "vf_explained_var": 0.5548580288887024, "kl": 0.0029728966765105724, "entropy": 0.8150744438171387, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4761600, "num_env_steps_trained": 4761600, "num_agent_steps_sampled": 9523200, "num_agent_steps_trained": 9523200}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 294.0, "episode_reward_mean": 570.04, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 285.02}, "custom_metrics": {"sparse_reward_mean": 197.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 175.64, "shaped_reward_min": 94, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.56, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.2, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.42, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.11, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.11, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.74, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.19, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.73, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.71, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.11, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.74, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.11, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.74, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 579.0, 582.0, 576.0, 579.0, 579.0, 573.0, 573.0, 579.0, 579.0, 587.0, 294.0, 579.0, 579.0, 573.0, 579.0, 582.0, 576.0, 530.0, 581.0, 579.0, 587.0, 576.0, 573.0, 530.0, 573.0, 579.0, 576.0, 579.0, 576.0, 579.0, 576.0, 573.0, 573.0, 576.0, 519.0, 522.0, 576.0, 570.0, 525.0, 582.0, 582.0, 573.0, 582.0, 579.0, 579.0, 582.0, 570.0, 579.0, 579.0, 582.0, 579.0, 579.0, 522.0, 627.0, 579.0, 579.0, 582.0, 522.0, 576.0, 579.0, 582.0, 576.0, 573.0, 582.0, 576.0, 573.0, 576.0, 579.0, 582.0, 570.0, 579.0, 570.0, 582.0, 579.0, 579.0, 582.0, 579.0, 522.0, 579.0, 579.0, 570.0, 579.0, 579.0, 519.0, 573.0, 579.0, 579.0, 579.0, 522.0, 573.0, 582.0, 630.0, 579.0, 579.0, 582.0, 576.0, 579.0, 570.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 265.0, 292.0, 287.0, 291.0, 291.0, 292.0, 284.0, 286.0, 293.0, 290.0, 289.0, 286.0, 287.0, 282.0, 291.0, 284.0, 295.0, 289.0, 290.0, 292.0, 295.0, 151.0, 143.0, 293.0, 286.0, 286.0, 293.0, 287.0, 286.0, 283.0, 296.0, 291.0, 291.0, 287.0, 289.0, 264.0, 266.0, 292.0, 289.0, 294.0, 285.0, 291.0, 296.0, 293.0, 283.0, 291.0, 282.0, 258.0, 272.0, 286.0, 287.0, 290.0, 289.0, 288.0, 288.0, 289.0, 290.0, 285.0, 291.0, 292.0, 287.0, 283.0, 293.0, 283.0, 290.0, 291.0, 282.0, 289.0, 287.0, 255.0, 264.0, 264.0, 258.0, 289.0, 287.0, 281.0, 289.0, 259.0, 266.0, 294.0, 288.0, 292.0, 290.0, 287.0, 286.0, 296.0, 286.0, 291.0, 288.0, 291.0, 288.0, 285.0, 297.0, 287.0, 283.0, 292.0, 287.0, 291.0, 288.0, 284.0, 298.0, 299.0, 280.0, 292.0, 287.0, 263.0, 259.0, 316.0, 311.0, 286.0, 293.0, 288.0, 291.0, 288.0, 294.0, 260.0, 262.0, 288.0, 288.0, 287.0, 292.0, 291.0, 291.0, 291.0, 285.0, 285.0, 288.0, 290.0, 292.0, 281.0, 295.0, 284.0, 289.0, 290.0, 286.0, 290.0, 289.0, 289.0, 293.0, 291.0, 279.0, 281.0, 298.0, 281.0, 289.0, 284.0, 298.0, 289.0, 290.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 256.0, 266.0, 288.0, 291.0, 287.0, 292.0, 271.0, 299.0, 285.0, 294.0, 291.0, 288.0, 256.0, 263.0, 288.0, 285.0, 293.0, 286.0, 291.0, 288.0, 284.0, 295.0, 262.0, 260.0, 280.0, 293.0, 290.0, 292.0, 320.0, 310.0, 285.0, 294.0, 295.0, 284.0, 288.0, 294.0, 288.0, 288.0, 288.0, 291.0, 295.0, 275.0, 289.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6978080925470879, "mean_inference_ms": 1.2465000228512448, "mean_action_processing_ms": 0.13380573271542603, "mean_env_wait_ms": 0.840132564158288, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 294.0, "episode_reward_mean": 570.04, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 285.02}, "hist_stats": {"episode_reward": [525.0, 579.0, 582.0, 576.0, 579.0, 579.0, 573.0, 573.0, 579.0, 579.0, 587.0, 294.0, 579.0, 579.0, 573.0, 579.0, 582.0, 576.0, 530.0, 581.0, 579.0, 587.0, 576.0, 573.0, 530.0, 573.0, 579.0, 576.0, 579.0, 576.0, 579.0, 576.0, 573.0, 573.0, 576.0, 519.0, 522.0, 576.0, 570.0, 525.0, 582.0, 582.0, 573.0, 582.0, 579.0, 579.0, 582.0, 570.0, 579.0, 579.0, 582.0, 579.0, 579.0, 522.0, 627.0, 579.0, 579.0, 582.0, 522.0, 576.0, 579.0, 582.0, 576.0, 573.0, 582.0, 576.0, 573.0, 576.0, 579.0, 582.0, 570.0, 579.0, 570.0, 582.0, 579.0, 579.0, 582.0, 579.0, 522.0, 579.0, 579.0, 570.0, 579.0, 579.0, 519.0, 573.0, 579.0, 579.0, 579.0, 522.0, 573.0, 582.0, 630.0, 579.0, 579.0, 582.0, 576.0, 579.0, 570.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 265.0, 292.0, 287.0, 291.0, 291.0, 292.0, 284.0, 286.0, 293.0, 290.0, 289.0, 286.0, 287.0, 282.0, 291.0, 284.0, 295.0, 289.0, 290.0, 292.0, 295.0, 151.0, 143.0, 293.0, 286.0, 286.0, 293.0, 287.0, 286.0, 283.0, 296.0, 291.0, 291.0, 287.0, 289.0, 264.0, 266.0, 292.0, 289.0, 294.0, 285.0, 291.0, 296.0, 293.0, 283.0, 291.0, 282.0, 258.0, 272.0, 286.0, 287.0, 290.0, 289.0, 288.0, 288.0, 289.0, 290.0, 285.0, 291.0, 292.0, 287.0, 283.0, 293.0, 283.0, 290.0, 291.0, 282.0, 289.0, 287.0, 255.0, 264.0, 264.0, 258.0, 289.0, 287.0, 281.0, 289.0, 259.0, 266.0, 294.0, 288.0, 292.0, 290.0, 287.0, 286.0, 296.0, 286.0, 291.0, 288.0, 291.0, 288.0, 285.0, 297.0, 287.0, 283.0, 292.0, 287.0, 291.0, 288.0, 284.0, 298.0, 299.0, 280.0, 292.0, 287.0, 263.0, 259.0, 316.0, 311.0, 286.0, 293.0, 288.0, 291.0, 288.0, 294.0, 260.0, 262.0, 288.0, 288.0, 287.0, 292.0, 291.0, 291.0, 291.0, 285.0, 285.0, 288.0, 290.0, 292.0, 281.0, 295.0, 284.0, 289.0, 290.0, 286.0, 290.0, 289.0, 289.0, 293.0, 291.0, 279.0, 281.0, 298.0, 281.0, 289.0, 284.0, 298.0, 289.0, 290.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 256.0, 266.0, 288.0, 291.0, 287.0, 292.0, 271.0, 299.0, 285.0, 294.0, 291.0, 288.0, 256.0, 263.0, 288.0, 285.0, 293.0, 286.0, 291.0, 288.0, 284.0, 295.0, 262.0, 260.0, 280.0, 293.0, 290.0, 292.0, 320.0, 310.0, 285.0, 294.0, 295.0, 284.0, 288.0, 294.0, 288.0, 288.0, 288.0, 291.0, 295.0, 275.0, 289.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6978080925470879, "mean_inference_ms": 1.2465000228512448, "mean_action_processing_ms": 0.13380573271542603, "mean_env_wait_ms": 0.840132564158288, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9523200, "num_agent_steps_trained": 9523200, "num_env_steps_sampled": 4761600, "num_env_steps_trained": 4761600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4761600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9523200, "timers": {"training_iteration_time_ms": 3642.819, "learn_time_ms": 1120.988, "learn_throughput": 11418.499, "synch_weights_time_ms": 13.247}, "counters": {"num_env_steps_sampled": 4761600, "num_env_steps_trained": 4761600, "num_agent_steps_sampled": 9523200, "num_agent_steps_trained": 9523200}, "done": false, "episodes_total": 11904, "training_iteration": 372, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-38", "timestamp": 1666581878, "time_this_iter_s": 3.649524450302124, "time_total_s": 1422.4671096801758, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1422.4671096801758, "timesteps_since_restore": 0, "iterations_since_restore": 372, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.339999999999996, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.48, "shaped_reward_min": 159, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.4, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.41, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.29, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.03, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.99, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.78, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.34, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.03, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.99, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.03, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.99, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003319069743156433, "policy_loss": 0.002959703328087926, "vf_loss": 7.702720642089844, "vf_explained_var": 0.5470426678657532, "kl": 0.002669147914275527, "entropy": 0.8218092322349548, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4774400, "num_env_steps_trained": 4774400, "num_agent_steps_sampled": 9548800, "num_agent_steps_trained": 9548800}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 519.0, "episode_reward_mean": 572.08, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 286.04}, "custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.48, "shaped_reward_min": 159, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.4, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.41, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.29, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.03, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.99, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.78, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.34, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.03, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.99, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.03, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.99, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 573.0, 576.0, 519.0, 522.0, 576.0, 570.0, 525.0, 582.0, 582.0, 573.0, 582.0, 579.0, 579.0, 582.0, 570.0, 579.0, 579.0, 582.0, 579.0, 579.0, 522.0, 627.0, 579.0, 579.0, 582.0, 522.0, 576.0, 579.0, 582.0, 576.0, 573.0, 582.0, 576.0, 573.0, 576.0, 579.0, 582.0, 570.0, 579.0, 570.0, 582.0, 579.0, 579.0, 582.0, 579.0, 522.0, 579.0, 579.0, 570.0, 579.0, 579.0, 519.0, 573.0, 579.0, 579.0, 579.0, 522.0, 573.0, 582.0, 630.0, 579.0, 579.0, 582.0, 576.0, 579.0, 570.0, 582.0, 579.0, 630.0, 573.0, 576.0, 536.0, 579.0, 579.0, 576.0, 525.0, 530.0, 576.0, 579.0, 584.0, 522.0, 579.0, 536.0, 522.0, 579.0, 573.0, 582.0, 582.0, 579.0, 576.0, 530.0, 579.0, 630.0, 582.0, 582.0, 522.0, 582.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 290.0, 291.0, 282.0, 289.0, 287.0, 255.0, 264.0, 264.0, 258.0, 289.0, 287.0, 281.0, 289.0, 259.0, 266.0, 294.0, 288.0, 292.0, 290.0, 287.0, 286.0, 296.0, 286.0, 291.0, 288.0, 291.0, 288.0, 285.0, 297.0, 287.0, 283.0, 292.0, 287.0, 291.0, 288.0, 284.0, 298.0, 299.0, 280.0, 292.0, 287.0, 263.0, 259.0, 316.0, 311.0, 286.0, 293.0, 288.0, 291.0, 288.0, 294.0, 260.0, 262.0, 288.0, 288.0, 287.0, 292.0, 291.0, 291.0, 291.0, 285.0, 285.0, 288.0, 290.0, 292.0, 281.0, 295.0, 284.0, 289.0, 290.0, 286.0, 290.0, 289.0, 289.0, 293.0, 291.0, 279.0, 281.0, 298.0, 281.0, 289.0, 284.0, 298.0, 289.0, 290.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 256.0, 266.0, 288.0, 291.0, 287.0, 292.0, 271.0, 299.0, 285.0, 294.0, 291.0, 288.0, 256.0, 263.0, 288.0, 285.0, 293.0, 286.0, 291.0, 288.0, 284.0, 295.0, 262.0, 260.0, 280.0, 293.0, 290.0, 292.0, 320.0, 310.0, 285.0, 294.0, 295.0, 284.0, 288.0, 294.0, 288.0, 288.0, 288.0, 291.0, 295.0, 275.0, 289.0, 293.0, 288.0, 291.0, 322.0, 308.0, 291.0, 282.0, 286.0, 290.0, 266.0, 270.0, 289.0, 290.0, 290.0, 289.0, 286.0, 290.0, 263.0, 262.0, 268.0, 262.0, 287.0, 289.0, 294.0, 285.0, 295.0, 289.0, 269.0, 253.0, 292.0, 287.0, 272.0, 264.0, 267.0, 255.0, 289.0, 290.0, 284.0, 289.0, 288.0, 294.0, 296.0, 286.0, 287.0, 292.0, 286.0, 290.0, 268.0, 262.0, 291.0, 288.0, 311.0, 319.0, 294.0, 288.0, 291.0, 291.0, 267.0, 255.0, 295.0, 287.0, 289.0, 293.0, 313.0, 317.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6977569384581034, "mean_inference_ms": 1.2463679723179317, "mean_action_processing_ms": 0.13379979339919534, "mean_env_wait_ms": 0.8400612763604244, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 519.0, "episode_reward_mean": 572.08, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 286.04}, "hist_stats": {"episode_reward": [573.0, 573.0, 576.0, 519.0, 522.0, 576.0, 570.0, 525.0, 582.0, 582.0, 573.0, 582.0, 579.0, 579.0, 582.0, 570.0, 579.0, 579.0, 582.0, 579.0, 579.0, 522.0, 627.0, 579.0, 579.0, 582.0, 522.0, 576.0, 579.0, 582.0, 576.0, 573.0, 582.0, 576.0, 573.0, 576.0, 579.0, 582.0, 570.0, 579.0, 570.0, 582.0, 579.0, 579.0, 582.0, 579.0, 522.0, 579.0, 579.0, 570.0, 579.0, 579.0, 519.0, 573.0, 579.0, 579.0, 579.0, 522.0, 573.0, 582.0, 630.0, 579.0, 579.0, 582.0, 576.0, 579.0, 570.0, 582.0, 579.0, 630.0, 573.0, 576.0, 536.0, 579.0, 579.0, 576.0, 525.0, 530.0, 576.0, 579.0, 584.0, 522.0, 579.0, 536.0, 522.0, 579.0, 573.0, 582.0, 582.0, 579.0, 576.0, 530.0, 579.0, 630.0, 582.0, 582.0, 522.0, 582.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 290.0, 291.0, 282.0, 289.0, 287.0, 255.0, 264.0, 264.0, 258.0, 289.0, 287.0, 281.0, 289.0, 259.0, 266.0, 294.0, 288.0, 292.0, 290.0, 287.0, 286.0, 296.0, 286.0, 291.0, 288.0, 291.0, 288.0, 285.0, 297.0, 287.0, 283.0, 292.0, 287.0, 291.0, 288.0, 284.0, 298.0, 299.0, 280.0, 292.0, 287.0, 263.0, 259.0, 316.0, 311.0, 286.0, 293.0, 288.0, 291.0, 288.0, 294.0, 260.0, 262.0, 288.0, 288.0, 287.0, 292.0, 291.0, 291.0, 291.0, 285.0, 285.0, 288.0, 290.0, 292.0, 281.0, 295.0, 284.0, 289.0, 290.0, 286.0, 290.0, 289.0, 289.0, 293.0, 291.0, 279.0, 281.0, 298.0, 281.0, 289.0, 284.0, 298.0, 289.0, 290.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 256.0, 266.0, 288.0, 291.0, 287.0, 292.0, 271.0, 299.0, 285.0, 294.0, 291.0, 288.0, 256.0, 263.0, 288.0, 285.0, 293.0, 286.0, 291.0, 288.0, 284.0, 295.0, 262.0, 260.0, 280.0, 293.0, 290.0, 292.0, 320.0, 310.0, 285.0, 294.0, 295.0, 284.0, 288.0, 294.0, 288.0, 288.0, 288.0, 291.0, 295.0, 275.0, 289.0, 293.0, 288.0, 291.0, 322.0, 308.0, 291.0, 282.0, 286.0, 290.0, 266.0, 270.0, 289.0, 290.0, 290.0, 289.0, 286.0, 290.0, 263.0, 262.0, 268.0, 262.0, 287.0, 289.0, 294.0, 285.0, 295.0, 289.0, 269.0, 253.0, 292.0, 287.0, 272.0, 264.0, 267.0, 255.0, 289.0, 290.0, 284.0, 289.0, 288.0, 294.0, 296.0, 286.0, 287.0, 292.0, 286.0, 290.0, 268.0, 262.0, 291.0, 288.0, 311.0, 319.0, 294.0, 288.0, 291.0, 291.0, 267.0, 255.0, 295.0, 287.0, 289.0, 293.0, 313.0, 317.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6977569384581034, "mean_inference_ms": 1.2463679723179317, "mean_action_processing_ms": 0.13379979339919534, "mean_env_wait_ms": 0.8400612763604244, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9548800, "num_agent_steps_trained": 9548800, "num_env_steps_sampled": 4774400, "num_env_steps_trained": 4774400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4774400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9548800, "timers": {"training_iteration_time_ms": 3647.408, "learn_time_ms": 1122.247, "learn_throughput": 11405.689, "synch_weights_time_ms": 13.075}, "counters": {"num_env_steps_sampled": 4774400, "num_env_steps_trained": 4774400, "num_agent_steps_sampled": 9548800, "num_agent_steps_trained": 9548800}, "done": false, "episodes_total": 11936, "training_iteration": 373, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-41", "timestamp": 1666581881, "time_this_iter_s": 3.7315287590026855, "time_total_s": 1426.1986384391785, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1426.1986384391785, "timesteps_since_restore": 0, "iterations_since_restore": 373, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.833333333333332, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.81, "shaped_reward_min": 159, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.07, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.86, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.0, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.75, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.69, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.46, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.74, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.49, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.69, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.46, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.69, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.46, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0013253169599920511, "policy_loss": -0.0016902193892747164, "vf_loss": 7.725342750549316, "vf_explained_var": 0.5744255185127258, "kl": 0.0023879026994109154, "entropy": 0.8152618408203125, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4787200, "num_env_steps_trained": 4787200, "num_agent_steps_sampled": 9574400, "num_agent_steps_trained": 9574400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 519.0, "episode_reward_mean": 572.01, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 286.005}, "custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.81, "shaped_reward_min": 159, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.07, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.86, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.0, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.75, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.69, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.46, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.74, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.49, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.69, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.46, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.69, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.46, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 576.0, 573.0, 576.0, 579.0, 582.0, 570.0, 579.0, 570.0, 582.0, 579.0, 579.0, 582.0, 579.0, 522.0, 579.0, 579.0, 570.0, 579.0, 579.0, 519.0, 573.0, 579.0, 579.0, 579.0, 522.0, 573.0, 582.0, 630.0, 579.0, 579.0, 582.0, 576.0, 579.0, 570.0, 582.0, 579.0, 630.0, 573.0, 576.0, 536.0, 579.0, 579.0, 576.0, 525.0, 530.0, 576.0, 579.0, 584.0, 522.0, 579.0, 536.0, 522.0, 579.0, 573.0, 582.0, 582.0, 579.0, 576.0, 530.0, 579.0, 630.0, 582.0, 582.0, 522.0, 582.0, 582.0, 630.0, 573.0, 579.0, 579.0, 522.0, 525.0, 570.0, 582.0, 579.0, 636.0, 582.0, 522.0, 579.0, 522.0, 579.0, 576.0, 579.0, 579.0, 525.0, 576.0, 576.0, 579.0, 582.0, 576.0, 582.0, 582.0, 590.0, 582.0, 579.0, 522.0, 579.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 281.0, 295.0, 284.0, 289.0, 290.0, 286.0, 290.0, 289.0, 289.0, 293.0, 291.0, 279.0, 281.0, 298.0, 281.0, 289.0, 284.0, 298.0, 289.0, 290.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 256.0, 266.0, 288.0, 291.0, 287.0, 292.0, 271.0, 299.0, 285.0, 294.0, 291.0, 288.0, 256.0, 263.0, 288.0, 285.0, 293.0, 286.0, 291.0, 288.0, 284.0, 295.0, 262.0, 260.0, 280.0, 293.0, 290.0, 292.0, 320.0, 310.0, 285.0, 294.0, 295.0, 284.0, 288.0, 294.0, 288.0, 288.0, 288.0, 291.0, 295.0, 275.0, 289.0, 293.0, 288.0, 291.0, 322.0, 308.0, 291.0, 282.0, 286.0, 290.0, 266.0, 270.0, 289.0, 290.0, 290.0, 289.0, 286.0, 290.0, 263.0, 262.0, 268.0, 262.0, 287.0, 289.0, 294.0, 285.0, 295.0, 289.0, 269.0, 253.0, 292.0, 287.0, 272.0, 264.0, 267.0, 255.0, 289.0, 290.0, 284.0, 289.0, 288.0, 294.0, 296.0, 286.0, 287.0, 292.0, 286.0, 290.0, 268.0, 262.0, 291.0, 288.0, 311.0, 319.0, 294.0, 288.0, 291.0, 291.0, 267.0, 255.0, 295.0, 287.0, 289.0, 293.0, 313.0, 317.0, 283.0, 290.0, 291.0, 288.0, 290.0, 289.0, 262.0, 260.0, 260.0, 265.0, 281.0, 289.0, 291.0, 291.0, 285.0, 294.0, 315.0, 321.0, 289.0, 293.0, 263.0, 259.0, 290.0, 289.0, 270.0, 252.0, 289.0, 290.0, 288.0, 288.0, 291.0, 288.0, 289.0, 290.0, 258.0, 267.0, 288.0, 288.0, 283.0, 293.0, 294.0, 285.0, 293.0, 289.0, 285.0, 291.0, 289.0, 293.0, 292.0, 290.0, 293.0, 297.0, 290.0, 292.0, 290.0, 289.0, 248.0, 274.0, 288.0, 291.0, 289.0, 287.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6977054426253134, "mean_inference_ms": 1.2462279208812725, "mean_action_processing_ms": 0.1337937646791519, "mean_env_wait_ms": 0.8399847131397314, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 519.0, "episode_reward_mean": 572.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 286.005}, "hist_stats": {"episode_reward": [582.0, 576.0, 573.0, 576.0, 579.0, 582.0, 570.0, 579.0, 570.0, 582.0, 579.0, 579.0, 582.0, 579.0, 522.0, 579.0, 579.0, 570.0, 579.0, 579.0, 519.0, 573.0, 579.0, 579.0, 579.0, 522.0, 573.0, 582.0, 630.0, 579.0, 579.0, 582.0, 576.0, 579.0, 570.0, 582.0, 579.0, 630.0, 573.0, 576.0, 536.0, 579.0, 579.0, 576.0, 525.0, 530.0, 576.0, 579.0, 584.0, 522.0, 579.0, 536.0, 522.0, 579.0, 573.0, 582.0, 582.0, 579.0, 576.0, 530.0, 579.0, 630.0, 582.0, 582.0, 522.0, 582.0, 582.0, 630.0, 573.0, 579.0, 579.0, 522.0, 525.0, 570.0, 582.0, 579.0, 636.0, 582.0, 522.0, 579.0, 522.0, 579.0, 576.0, 579.0, 579.0, 525.0, 576.0, 576.0, 579.0, 582.0, 576.0, 582.0, 582.0, 590.0, 582.0, 579.0, 522.0, 579.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 281.0, 295.0, 284.0, 289.0, 290.0, 286.0, 290.0, 289.0, 289.0, 293.0, 291.0, 279.0, 281.0, 298.0, 281.0, 289.0, 284.0, 298.0, 289.0, 290.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 256.0, 266.0, 288.0, 291.0, 287.0, 292.0, 271.0, 299.0, 285.0, 294.0, 291.0, 288.0, 256.0, 263.0, 288.0, 285.0, 293.0, 286.0, 291.0, 288.0, 284.0, 295.0, 262.0, 260.0, 280.0, 293.0, 290.0, 292.0, 320.0, 310.0, 285.0, 294.0, 295.0, 284.0, 288.0, 294.0, 288.0, 288.0, 288.0, 291.0, 295.0, 275.0, 289.0, 293.0, 288.0, 291.0, 322.0, 308.0, 291.0, 282.0, 286.0, 290.0, 266.0, 270.0, 289.0, 290.0, 290.0, 289.0, 286.0, 290.0, 263.0, 262.0, 268.0, 262.0, 287.0, 289.0, 294.0, 285.0, 295.0, 289.0, 269.0, 253.0, 292.0, 287.0, 272.0, 264.0, 267.0, 255.0, 289.0, 290.0, 284.0, 289.0, 288.0, 294.0, 296.0, 286.0, 287.0, 292.0, 286.0, 290.0, 268.0, 262.0, 291.0, 288.0, 311.0, 319.0, 294.0, 288.0, 291.0, 291.0, 267.0, 255.0, 295.0, 287.0, 289.0, 293.0, 313.0, 317.0, 283.0, 290.0, 291.0, 288.0, 290.0, 289.0, 262.0, 260.0, 260.0, 265.0, 281.0, 289.0, 291.0, 291.0, 285.0, 294.0, 315.0, 321.0, 289.0, 293.0, 263.0, 259.0, 290.0, 289.0, 270.0, 252.0, 289.0, 290.0, 288.0, 288.0, 291.0, 288.0, 289.0, 290.0, 258.0, 267.0, 288.0, 288.0, 283.0, 293.0, 294.0, 285.0, 293.0, 289.0, 285.0, 291.0, 289.0, 293.0, 292.0, 290.0, 293.0, 297.0, 290.0, 292.0, 290.0, 289.0, 248.0, 274.0, 288.0, 291.0, 289.0, 287.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6977054426253134, "mean_inference_ms": 1.2462279208812725, "mean_action_processing_ms": 0.1337937646791519, "mean_env_wait_ms": 0.8399847131397314, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9574400, "num_agent_steps_trained": 9574400, "num_env_steps_sampled": 4787200, "num_env_steps_trained": 4787200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4787200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9574400, "timers": {"training_iteration_time_ms": 3656.761, "learn_time_ms": 1123.177, "learn_throughput": 11396.24, "synch_weights_time_ms": 12.437}, "counters": {"num_env_steps_sampled": 4787200, "num_env_steps_trained": 4787200, "num_agent_steps_sampled": 9574400, "num_agent_steps_trained": 9574400}, "done": false, "episodes_total": 11968, "training_iteration": 374, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-45", "timestamp": 1666581885, "time_this_iter_s": 3.694704532623291, "time_total_s": 1429.8933429718018, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1429.8933429718018, "timesteps_since_restore": 0, "iterations_since_restore": 374, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.859999999999996, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 197.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.98, "shaped_reward_min": 159, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.09, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.85, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.04, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.74, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.77, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.37, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.71, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.7, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.47, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.45, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.77, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.37, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.77, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.37, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00046897190622985363, "policy_loss": 0.000101038021966815, "vf_loss": 7.793358325958252, "vf_explained_var": 0.5690768361091614, "kl": 0.0023539250250905752, "entropy": 0.8228006362915039, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4800000, "num_env_steps_trained": 4800000, "num_agent_steps_sampled": 9600000, "num_agent_steps_trained": 9600000}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 519.0, "episode_reward_mean": 571.78, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 285.89}, "custom_metrics": {"sparse_reward_mean": 197.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.98, "shaped_reward_min": 159, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.09, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.85, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.04, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.74, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.77, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.37, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.71, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.7, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.47, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.45, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.77, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.37, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.77, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.37, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 579.0, 570.0, 582.0, 579.0, 630.0, 573.0, 576.0, 536.0, 579.0, 579.0, 576.0, 525.0, 530.0, 576.0, 579.0, 584.0, 522.0, 579.0, 536.0, 522.0, 579.0, 573.0, 582.0, 582.0, 579.0, 576.0, 530.0, 579.0, 630.0, 582.0, 582.0, 522.0, 582.0, 582.0, 630.0, 573.0, 579.0, 579.0, 522.0, 525.0, 570.0, 582.0, 579.0, 636.0, 582.0, 522.0, 579.0, 522.0, 579.0, 576.0, 579.0, 579.0, 525.0, 576.0, 576.0, 579.0, 582.0, 576.0, 582.0, 582.0, 590.0, 582.0, 579.0, 522.0, 579.0, 576.0, 582.0, 576.0, 570.0, 570.0, 579.0, 530.0, 522.0, 570.0, 519.0, 579.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 576.0, 582.0, 630.0, 579.0, 533.0, 579.0, 627.0, 582.0, 579.0, 582.0, 587.0, 533.0, 527.0, 633.0, 630.0, 576.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 288.0, 288.0, 291.0, 295.0, 275.0, 289.0, 293.0, 288.0, 291.0, 322.0, 308.0, 291.0, 282.0, 286.0, 290.0, 266.0, 270.0, 289.0, 290.0, 290.0, 289.0, 286.0, 290.0, 263.0, 262.0, 268.0, 262.0, 287.0, 289.0, 294.0, 285.0, 295.0, 289.0, 269.0, 253.0, 292.0, 287.0, 272.0, 264.0, 267.0, 255.0, 289.0, 290.0, 284.0, 289.0, 288.0, 294.0, 296.0, 286.0, 287.0, 292.0, 286.0, 290.0, 268.0, 262.0, 291.0, 288.0, 311.0, 319.0, 294.0, 288.0, 291.0, 291.0, 267.0, 255.0, 295.0, 287.0, 289.0, 293.0, 313.0, 317.0, 283.0, 290.0, 291.0, 288.0, 290.0, 289.0, 262.0, 260.0, 260.0, 265.0, 281.0, 289.0, 291.0, 291.0, 285.0, 294.0, 315.0, 321.0, 289.0, 293.0, 263.0, 259.0, 290.0, 289.0, 270.0, 252.0, 289.0, 290.0, 288.0, 288.0, 291.0, 288.0, 289.0, 290.0, 258.0, 267.0, 288.0, 288.0, 283.0, 293.0, 294.0, 285.0, 293.0, 289.0, 285.0, 291.0, 289.0, 293.0, 292.0, 290.0, 293.0, 297.0, 290.0, 292.0, 290.0, 289.0, 248.0, 274.0, 288.0, 291.0, 289.0, 287.0, 291.0, 291.0, 288.0, 288.0, 287.0, 283.0, 281.0, 289.0, 289.0, 290.0, 264.0, 266.0, 259.0, 263.0, 291.0, 279.0, 264.0, 255.0, 291.0, 288.0, 292.0, 284.0, 286.0, 296.0, 281.0, 298.0, 290.0, 289.0, 290.0, 292.0, 288.0, 291.0, 292.0, 284.0, 290.0, 292.0, 323.0, 307.0, 290.0, 289.0, 263.0, 270.0, 291.0, 288.0, 319.0, 308.0, 285.0, 297.0, 290.0, 289.0, 289.0, 293.0, 294.0, 293.0, 265.0, 268.0, 268.0, 259.0, 313.0, 320.0, 318.0, 312.0, 285.0, 291.0, 261.0, 261.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6976622511350119, "mean_inference_ms": 1.246101107480903, "mean_action_processing_ms": 0.1337887336031301, "mean_env_wait_ms": 0.8399220614415523, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 519.0, "episode_reward_mean": 571.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 285.89}, "hist_stats": {"episode_reward": [576.0, 579.0, 570.0, 582.0, 579.0, 630.0, 573.0, 576.0, 536.0, 579.0, 579.0, 576.0, 525.0, 530.0, 576.0, 579.0, 584.0, 522.0, 579.0, 536.0, 522.0, 579.0, 573.0, 582.0, 582.0, 579.0, 576.0, 530.0, 579.0, 630.0, 582.0, 582.0, 522.0, 582.0, 582.0, 630.0, 573.0, 579.0, 579.0, 522.0, 525.0, 570.0, 582.0, 579.0, 636.0, 582.0, 522.0, 579.0, 522.0, 579.0, 576.0, 579.0, 579.0, 525.0, 576.0, 576.0, 579.0, 582.0, 576.0, 582.0, 582.0, 590.0, 582.0, 579.0, 522.0, 579.0, 576.0, 582.0, 576.0, 570.0, 570.0, 579.0, 530.0, 522.0, 570.0, 519.0, 579.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 576.0, 582.0, 630.0, 579.0, 533.0, 579.0, 627.0, 582.0, 579.0, 582.0, 587.0, 533.0, 527.0, 633.0, 630.0, 576.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 288.0, 288.0, 291.0, 295.0, 275.0, 289.0, 293.0, 288.0, 291.0, 322.0, 308.0, 291.0, 282.0, 286.0, 290.0, 266.0, 270.0, 289.0, 290.0, 290.0, 289.0, 286.0, 290.0, 263.0, 262.0, 268.0, 262.0, 287.0, 289.0, 294.0, 285.0, 295.0, 289.0, 269.0, 253.0, 292.0, 287.0, 272.0, 264.0, 267.0, 255.0, 289.0, 290.0, 284.0, 289.0, 288.0, 294.0, 296.0, 286.0, 287.0, 292.0, 286.0, 290.0, 268.0, 262.0, 291.0, 288.0, 311.0, 319.0, 294.0, 288.0, 291.0, 291.0, 267.0, 255.0, 295.0, 287.0, 289.0, 293.0, 313.0, 317.0, 283.0, 290.0, 291.0, 288.0, 290.0, 289.0, 262.0, 260.0, 260.0, 265.0, 281.0, 289.0, 291.0, 291.0, 285.0, 294.0, 315.0, 321.0, 289.0, 293.0, 263.0, 259.0, 290.0, 289.0, 270.0, 252.0, 289.0, 290.0, 288.0, 288.0, 291.0, 288.0, 289.0, 290.0, 258.0, 267.0, 288.0, 288.0, 283.0, 293.0, 294.0, 285.0, 293.0, 289.0, 285.0, 291.0, 289.0, 293.0, 292.0, 290.0, 293.0, 297.0, 290.0, 292.0, 290.0, 289.0, 248.0, 274.0, 288.0, 291.0, 289.0, 287.0, 291.0, 291.0, 288.0, 288.0, 287.0, 283.0, 281.0, 289.0, 289.0, 290.0, 264.0, 266.0, 259.0, 263.0, 291.0, 279.0, 264.0, 255.0, 291.0, 288.0, 292.0, 284.0, 286.0, 296.0, 281.0, 298.0, 290.0, 289.0, 290.0, 292.0, 288.0, 291.0, 292.0, 284.0, 290.0, 292.0, 323.0, 307.0, 290.0, 289.0, 263.0, 270.0, 291.0, 288.0, 319.0, 308.0, 285.0, 297.0, 290.0, 289.0, 289.0, 293.0, 294.0, 293.0, 265.0, 268.0, 268.0, 259.0, 313.0, 320.0, 318.0, 312.0, 285.0, 291.0, 261.0, 261.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6976622511350119, "mean_inference_ms": 1.246101107480903, "mean_action_processing_ms": 0.1337887336031301, "mean_env_wait_ms": 0.8399220614415523, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9600000, "num_agent_steps_trained": 9600000, "num_env_steps_sampled": 4800000, "num_env_steps_trained": 4800000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4800000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9600000, "timers": {"training_iteration_time_ms": 3644.642, "learn_time_ms": 1108.163, "learn_throughput": 11550.646, "synch_weights_time_ms": 11.684}, "counters": {"num_env_steps_sampled": 4800000, "num_env_steps_trained": 4800000, "num_agent_steps_sampled": 9600000, "num_agent_steps_trained": 9600000}, "done": false, "episodes_total": 12000, "training_iteration": 375, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-49", "timestamp": 1666581889, "time_this_iter_s": 3.495922803878784, "time_total_s": 1433.3892657756805, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1433.3892657756805, "timesteps_since_restore": 0, "iterations_since_restore": 375, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.880000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 177.14, "shaped_reward_min": 159, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.75, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.64, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.08, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.42, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.72, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.68, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.56, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.43, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.38, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.42, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.72, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.42, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.72, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003129470278508961, "policy_loss": -0.000683111313264817, "vf_loss": 7.794277667999268, "vf_explained_var": 0.5665597319602966, "kl": 0.00211581913754344, "entropy": 0.8185231685638428, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4812800, "num_env_steps_trained": 4812800, "num_agent_steps_sampled": 9625600, "num_agent_steps_trained": 9625600}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 519.0, "episode_reward_mean": 573.14, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 286.57}, "custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 177.14, "shaped_reward_min": 159, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.75, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.64, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.08, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.42, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.72, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.68, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.56, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.43, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.38, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.42, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.72, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.42, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.72, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 582.0, 582.0, 630.0, 573.0, 579.0, 579.0, 522.0, 525.0, 570.0, 582.0, 579.0, 636.0, 582.0, 522.0, 579.0, 522.0, 579.0, 576.0, 579.0, 579.0, 525.0, 576.0, 576.0, 579.0, 582.0, 576.0, 582.0, 582.0, 590.0, 582.0, 579.0, 522.0, 579.0, 576.0, 582.0, 576.0, 570.0, 570.0, 579.0, 530.0, 522.0, 570.0, 519.0, 579.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 576.0, 582.0, 630.0, 579.0, 533.0, 579.0, 627.0, 582.0, 579.0, 582.0, 587.0, 533.0, 527.0, 633.0, 630.0, 576.0, 522.0, 579.0, 576.0, 630.0, 579.0, 582.0, 630.0, 579.0, 579.0, 582.0, 584.0, 579.0, 576.0, 519.0, 570.0, 530.0, 570.0, 573.0, 525.0, 579.0, 582.0, 522.0, 579.0, 582.0, 570.0, 630.0, 573.0, 582.0, 587.0, 582.0, 530.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 255.0, 295.0, 287.0, 289.0, 293.0, 313.0, 317.0, 283.0, 290.0, 291.0, 288.0, 290.0, 289.0, 262.0, 260.0, 260.0, 265.0, 281.0, 289.0, 291.0, 291.0, 285.0, 294.0, 315.0, 321.0, 289.0, 293.0, 263.0, 259.0, 290.0, 289.0, 270.0, 252.0, 289.0, 290.0, 288.0, 288.0, 291.0, 288.0, 289.0, 290.0, 258.0, 267.0, 288.0, 288.0, 283.0, 293.0, 294.0, 285.0, 293.0, 289.0, 285.0, 291.0, 289.0, 293.0, 292.0, 290.0, 293.0, 297.0, 290.0, 292.0, 290.0, 289.0, 248.0, 274.0, 288.0, 291.0, 289.0, 287.0, 291.0, 291.0, 288.0, 288.0, 287.0, 283.0, 281.0, 289.0, 289.0, 290.0, 264.0, 266.0, 259.0, 263.0, 291.0, 279.0, 264.0, 255.0, 291.0, 288.0, 292.0, 284.0, 286.0, 296.0, 281.0, 298.0, 290.0, 289.0, 290.0, 292.0, 288.0, 291.0, 292.0, 284.0, 290.0, 292.0, 323.0, 307.0, 290.0, 289.0, 263.0, 270.0, 291.0, 288.0, 319.0, 308.0, 285.0, 297.0, 290.0, 289.0, 289.0, 293.0, 294.0, 293.0, 265.0, 268.0, 268.0, 259.0, 313.0, 320.0, 318.0, 312.0, 285.0, 291.0, 261.0, 261.0, 289.0, 290.0, 284.0, 292.0, 314.0, 316.0, 293.0, 286.0, 298.0, 284.0, 309.0, 321.0, 283.0, 296.0, 288.0, 291.0, 285.0, 297.0, 286.0, 298.0, 282.0, 297.0, 291.0, 285.0, 264.0, 255.0, 290.0, 280.0, 266.0, 264.0, 282.0, 288.0, 280.0, 293.0, 257.0, 268.0, 292.0, 287.0, 290.0, 292.0, 254.0, 268.0, 287.0, 292.0, 292.0, 290.0, 289.0, 281.0, 313.0, 317.0, 285.0, 288.0, 293.0, 289.0, 293.0, 294.0, 293.0, 289.0, 272.0, 258.0, 296.0, 283.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6976294516149263, "mean_inference_ms": 1.2459799356312125, "mean_action_processing_ms": 0.1337841497120114, "mean_env_wait_ms": 0.8398647128407508, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 519.0, "episode_reward_mean": 573.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 286.57}, "hist_stats": {"episode_reward": [522.0, 582.0, 582.0, 630.0, 573.0, 579.0, 579.0, 522.0, 525.0, 570.0, 582.0, 579.0, 636.0, 582.0, 522.0, 579.0, 522.0, 579.0, 576.0, 579.0, 579.0, 525.0, 576.0, 576.0, 579.0, 582.0, 576.0, 582.0, 582.0, 590.0, 582.0, 579.0, 522.0, 579.0, 576.0, 582.0, 576.0, 570.0, 570.0, 579.0, 530.0, 522.0, 570.0, 519.0, 579.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 576.0, 582.0, 630.0, 579.0, 533.0, 579.0, 627.0, 582.0, 579.0, 582.0, 587.0, 533.0, 527.0, 633.0, 630.0, 576.0, 522.0, 579.0, 576.0, 630.0, 579.0, 582.0, 630.0, 579.0, 579.0, 582.0, 584.0, 579.0, 576.0, 519.0, 570.0, 530.0, 570.0, 573.0, 525.0, 579.0, 582.0, 522.0, 579.0, 582.0, 570.0, 630.0, 573.0, 582.0, 587.0, 582.0, 530.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 255.0, 295.0, 287.0, 289.0, 293.0, 313.0, 317.0, 283.0, 290.0, 291.0, 288.0, 290.0, 289.0, 262.0, 260.0, 260.0, 265.0, 281.0, 289.0, 291.0, 291.0, 285.0, 294.0, 315.0, 321.0, 289.0, 293.0, 263.0, 259.0, 290.0, 289.0, 270.0, 252.0, 289.0, 290.0, 288.0, 288.0, 291.0, 288.0, 289.0, 290.0, 258.0, 267.0, 288.0, 288.0, 283.0, 293.0, 294.0, 285.0, 293.0, 289.0, 285.0, 291.0, 289.0, 293.0, 292.0, 290.0, 293.0, 297.0, 290.0, 292.0, 290.0, 289.0, 248.0, 274.0, 288.0, 291.0, 289.0, 287.0, 291.0, 291.0, 288.0, 288.0, 287.0, 283.0, 281.0, 289.0, 289.0, 290.0, 264.0, 266.0, 259.0, 263.0, 291.0, 279.0, 264.0, 255.0, 291.0, 288.0, 292.0, 284.0, 286.0, 296.0, 281.0, 298.0, 290.0, 289.0, 290.0, 292.0, 288.0, 291.0, 292.0, 284.0, 290.0, 292.0, 323.0, 307.0, 290.0, 289.0, 263.0, 270.0, 291.0, 288.0, 319.0, 308.0, 285.0, 297.0, 290.0, 289.0, 289.0, 293.0, 294.0, 293.0, 265.0, 268.0, 268.0, 259.0, 313.0, 320.0, 318.0, 312.0, 285.0, 291.0, 261.0, 261.0, 289.0, 290.0, 284.0, 292.0, 314.0, 316.0, 293.0, 286.0, 298.0, 284.0, 309.0, 321.0, 283.0, 296.0, 288.0, 291.0, 285.0, 297.0, 286.0, 298.0, 282.0, 297.0, 291.0, 285.0, 264.0, 255.0, 290.0, 280.0, 266.0, 264.0, 282.0, 288.0, 280.0, 293.0, 257.0, 268.0, 292.0, 287.0, 290.0, 292.0, 254.0, 268.0, 287.0, 292.0, 292.0, 290.0, 289.0, 281.0, 313.0, 317.0, 285.0, 288.0, 293.0, 289.0, 293.0, 294.0, 293.0, 289.0, 272.0, 258.0, 296.0, 283.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6976294516149263, "mean_inference_ms": 1.2459799356312125, "mean_action_processing_ms": 0.1337841497120114, "mean_env_wait_ms": 0.8398647128407508, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9625600, "num_agent_steps_trained": 9625600, "num_env_steps_sampled": 4812800, "num_env_steps_trained": 4812800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4812800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9625600, "timers": {"training_iteration_time_ms": 3645.941, "learn_time_ms": 1099.023, "learn_throughput": 11646.708, "synch_weights_time_ms": 12.001}, "counters": {"num_env_steps_sampled": 4812800, "num_env_steps_trained": 4812800, "num_agent_steps_sampled": 9625600, "num_agent_steps_trained": 9625600}, "done": false, "episodes_total": 12032, "training_iteration": 376, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-53", "timestamp": 1666581893, "time_this_iter_s": 3.6507773399353027, "time_total_s": 1437.0400431156158, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1437.0400431156158, "timesteps_since_restore": 0, "iterations_since_restore": 376, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.966666666666665, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 176.08, "shaped_reward_min": 147, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.32, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.5, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.19, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.32, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.99, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.58, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.29, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.99, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.99, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0007580869714729488, "policy_loss": -0.0011221127351745963, "vf_loss": 7.772212982177734, "vf_explained_var": 0.5765685439109802, "kl": 0.0033011985942721367, "entropy": 0.826388955116272, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4825600, "num_env_steps_trained": 4825600, "num_agent_steps_sampled": 9651200, "num_agent_steps_trained": 9651200}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 387.0, "episode_reward_mean": 568.88, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 191.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 284.44}, "custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 176.08, "shaped_reward_min": 147, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.32, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.5, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.19, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.32, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.99, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.58, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.29, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.99, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.99, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 579.0, 576.0, 582.0, 576.0, 570.0, 570.0, 579.0, 530.0, 522.0, 570.0, 519.0, 579.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 576.0, 582.0, 630.0, 579.0, 533.0, 579.0, 627.0, 582.0, 579.0, 582.0, 587.0, 533.0, 527.0, 633.0, 630.0, 576.0, 522.0, 579.0, 576.0, 630.0, 579.0, 582.0, 630.0, 579.0, 579.0, 582.0, 584.0, 579.0, 576.0, 519.0, 570.0, 530.0, 570.0, 573.0, 525.0, 579.0, 582.0, 522.0, 579.0, 582.0, 570.0, 630.0, 573.0, 582.0, 587.0, 582.0, 530.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 573.0, 387.0, 633.0, 582.0, 522.0, 582.0, 579.0, 533.0, 579.0, 525.0, 576.0, 582.0, 576.0, 522.0, 579.0, 582.0, 579.0, 525.0, 573.0, 579.0, 587.0, 564.0, 570.0, 576.0, 527.0, 579.0, 522.0, 467.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [248.0, 274.0, 288.0, 291.0, 289.0, 287.0, 291.0, 291.0, 288.0, 288.0, 287.0, 283.0, 281.0, 289.0, 289.0, 290.0, 264.0, 266.0, 259.0, 263.0, 291.0, 279.0, 264.0, 255.0, 291.0, 288.0, 292.0, 284.0, 286.0, 296.0, 281.0, 298.0, 290.0, 289.0, 290.0, 292.0, 288.0, 291.0, 292.0, 284.0, 290.0, 292.0, 323.0, 307.0, 290.0, 289.0, 263.0, 270.0, 291.0, 288.0, 319.0, 308.0, 285.0, 297.0, 290.0, 289.0, 289.0, 293.0, 294.0, 293.0, 265.0, 268.0, 268.0, 259.0, 313.0, 320.0, 318.0, 312.0, 285.0, 291.0, 261.0, 261.0, 289.0, 290.0, 284.0, 292.0, 314.0, 316.0, 293.0, 286.0, 298.0, 284.0, 309.0, 321.0, 283.0, 296.0, 288.0, 291.0, 285.0, 297.0, 286.0, 298.0, 282.0, 297.0, 291.0, 285.0, 264.0, 255.0, 290.0, 280.0, 266.0, 264.0, 282.0, 288.0, 280.0, 293.0, 257.0, 268.0, 292.0, 287.0, 290.0, 292.0, 254.0, 268.0, 287.0, 292.0, 292.0, 290.0, 289.0, 281.0, 313.0, 317.0, 285.0, 288.0, 293.0, 289.0, 293.0, 294.0, 293.0, 289.0, 272.0, 258.0, 296.0, 283.0, 286.0, 293.0, 294.0, 288.0, 291.0, 288.0, 289.0, 290.0, 291.0, 291.0, 291.0, 282.0, 196.0, 191.0, 314.0, 319.0, 296.0, 286.0, 262.0, 260.0, 290.0, 292.0, 289.0, 290.0, 270.0, 263.0, 290.0, 289.0, 259.0, 266.0, 286.0, 290.0, 289.0, 293.0, 286.0, 290.0, 263.0, 259.0, 284.0, 295.0, 290.0, 292.0, 288.0, 291.0, 264.0, 261.0, 286.0, 287.0, 289.0, 290.0, 296.0, 291.0, 273.0, 291.0, 280.0, 290.0, 290.0, 286.0, 259.0, 268.0, 288.0, 291.0, 259.0, 263.0, 229.0, 238.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6975785576399118, "mean_inference_ms": 1.2458591730176685, "mean_action_processing_ms": 0.13377926353914563, "mean_env_wait_ms": 0.8398065533045238, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 387.0, "episode_reward_mean": 568.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 191.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 284.44}, "hist_stats": {"episode_reward": [522.0, 579.0, 576.0, 582.0, 576.0, 570.0, 570.0, 579.0, 530.0, 522.0, 570.0, 519.0, 579.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 576.0, 582.0, 630.0, 579.0, 533.0, 579.0, 627.0, 582.0, 579.0, 582.0, 587.0, 533.0, 527.0, 633.0, 630.0, 576.0, 522.0, 579.0, 576.0, 630.0, 579.0, 582.0, 630.0, 579.0, 579.0, 582.0, 584.0, 579.0, 576.0, 519.0, 570.0, 530.0, 570.0, 573.0, 525.0, 579.0, 582.0, 522.0, 579.0, 582.0, 570.0, 630.0, 573.0, 582.0, 587.0, 582.0, 530.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 573.0, 387.0, 633.0, 582.0, 522.0, 582.0, 579.0, 533.0, 579.0, 525.0, 576.0, 582.0, 576.0, 522.0, 579.0, 582.0, 579.0, 525.0, 573.0, 579.0, 587.0, 564.0, 570.0, 576.0, 527.0, 579.0, 522.0, 467.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [248.0, 274.0, 288.0, 291.0, 289.0, 287.0, 291.0, 291.0, 288.0, 288.0, 287.0, 283.0, 281.0, 289.0, 289.0, 290.0, 264.0, 266.0, 259.0, 263.0, 291.0, 279.0, 264.0, 255.0, 291.0, 288.0, 292.0, 284.0, 286.0, 296.0, 281.0, 298.0, 290.0, 289.0, 290.0, 292.0, 288.0, 291.0, 292.0, 284.0, 290.0, 292.0, 323.0, 307.0, 290.0, 289.0, 263.0, 270.0, 291.0, 288.0, 319.0, 308.0, 285.0, 297.0, 290.0, 289.0, 289.0, 293.0, 294.0, 293.0, 265.0, 268.0, 268.0, 259.0, 313.0, 320.0, 318.0, 312.0, 285.0, 291.0, 261.0, 261.0, 289.0, 290.0, 284.0, 292.0, 314.0, 316.0, 293.0, 286.0, 298.0, 284.0, 309.0, 321.0, 283.0, 296.0, 288.0, 291.0, 285.0, 297.0, 286.0, 298.0, 282.0, 297.0, 291.0, 285.0, 264.0, 255.0, 290.0, 280.0, 266.0, 264.0, 282.0, 288.0, 280.0, 293.0, 257.0, 268.0, 292.0, 287.0, 290.0, 292.0, 254.0, 268.0, 287.0, 292.0, 292.0, 290.0, 289.0, 281.0, 313.0, 317.0, 285.0, 288.0, 293.0, 289.0, 293.0, 294.0, 293.0, 289.0, 272.0, 258.0, 296.0, 283.0, 286.0, 293.0, 294.0, 288.0, 291.0, 288.0, 289.0, 290.0, 291.0, 291.0, 291.0, 282.0, 196.0, 191.0, 314.0, 319.0, 296.0, 286.0, 262.0, 260.0, 290.0, 292.0, 289.0, 290.0, 270.0, 263.0, 290.0, 289.0, 259.0, 266.0, 286.0, 290.0, 289.0, 293.0, 286.0, 290.0, 263.0, 259.0, 284.0, 295.0, 290.0, 292.0, 288.0, 291.0, 264.0, 261.0, 286.0, 287.0, 289.0, 290.0, 296.0, 291.0, 273.0, 291.0, 280.0, 290.0, 290.0, 286.0, 259.0, 268.0, 288.0, 291.0, 259.0, 263.0, 229.0, 238.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6975785576399118, "mean_inference_ms": 1.2458591730176685, "mean_action_processing_ms": 0.13377926353914563, "mean_env_wait_ms": 0.8398065533045238, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9651200, "num_agent_steps_trained": 9651200, "num_env_steps_sampled": 4825600, "num_env_steps_trained": 4825600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4825600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9651200, "timers": {"training_iteration_time_ms": 3642.325, "learn_time_ms": 1094.616, "learn_throughput": 11693.594, "synch_weights_time_ms": 11.968}, "counters": {"num_env_steps_sampled": 4825600, "num_env_steps_trained": 4825600, "num_agent_steps_sampled": 9651200, "num_agent_steps_trained": 9651200}, "done": false, "episodes_total": 12064, "training_iteration": 377, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-57", "timestamp": 1666581897, "time_this_iter_s": 3.6727993488311768, "time_total_s": 1440.712842464447, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1440.712842464447, "timesteps_since_restore": 0, "iterations_since_restore": 377, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.52, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 175.07, "shaped_reward_min": 102, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.28, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.34, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.11, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.18, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.91, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.25, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.91, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.91, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00013266084715723991, "policy_loss": -0.0005023379344493151, "vf_loss": 7.769681930541992, "vf_explained_var": 0.5716358423233032, "kl": 0.003659307025372982, "entropy": 0.8145787715911865, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4838400, "num_env_steps_trained": 4838400, "num_agent_steps_sampled": 9676800, "num_agent_steps_trained": 9676800}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 302.0, "episode_reward_mean": 565.87, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 282.935}, "custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 175.07, "shaped_reward_min": 102, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.28, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.34, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.11, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.18, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.91, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.25, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.91, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.91, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 576.0, 522.0, 579.0, 576.0, 630.0, 579.0, 582.0, 630.0, 579.0, 579.0, 582.0, 584.0, 579.0, 576.0, 519.0, 570.0, 530.0, 570.0, 573.0, 525.0, 579.0, 582.0, 522.0, 579.0, 582.0, 570.0, 630.0, 573.0, 582.0, 587.0, 582.0, 530.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 573.0, 387.0, 633.0, 582.0, 522.0, 582.0, 579.0, 533.0, 579.0, 525.0, 576.0, 582.0, 576.0, 522.0, 579.0, 582.0, 579.0, 525.0, 573.0, 579.0, 587.0, 564.0, 570.0, 576.0, 527.0, 579.0, 522.0, 467.0, 570.0, 525.0, 576.0, 573.0, 576.0, 582.0, 579.0, 573.0, 587.0, 582.0, 582.0, 573.0, 579.0, 570.0, 576.0, 573.0, 525.0, 579.0, 579.0, 579.0, 525.0, 570.0, 582.0, 576.0, 573.0, 579.0, 587.0, 302.0, 582.0, 525.0, 525.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 320.0, 318.0, 312.0, 285.0, 291.0, 261.0, 261.0, 289.0, 290.0, 284.0, 292.0, 314.0, 316.0, 293.0, 286.0, 298.0, 284.0, 309.0, 321.0, 283.0, 296.0, 288.0, 291.0, 285.0, 297.0, 286.0, 298.0, 282.0, 297.0, 291.0, 285.0, 264.0, 255.0, 290.0, 280.0, 266.0, 264.0, 282.0, 288.0, 280.0, 293.0, 257.0, 268.0, 292.0, 287.0, 290.0, 292.0, 254.0, 268.0, 287.0, 292.0, 292.0, 290.0, 289.0, 281.0, 313.0, 317.0, 285.0, 288.0, 293.0, 289.0, 293.0, 294.0, 293.0, 289.0, 272.0, 258.0, 296.0, 283.0, 286.0, 293.0, 294.0, 288.0, 291.0, 288.0, 289.0, 290.0, 291.0, 291.0, 291.0, 282.0, 196.0, 191.0, 314.0, 319.0, 296.0, 286.0, 262.0, 260.0, 290.0, 292.0, 289.0, 290.0, 270.0, 263.0, 290.0, 289.0, 259.0, 266.0, 286.0, 290.0, 289.0, 293.0, 286.0, 290.0, 263.0, 259.0, 284.0, 295.0, 290.0, 292.0, 288.0, 291.0, 264.0, 261.0, 286.0, 287.0, 289.0, 290.0, 296.0, 291.0, 273.0, 291.0, 280.0, 290.0, 290.0, 286.0, 259.0, 268.0, 288.0, 291.0, 259.0, 263.0, 229.0, 238.0, 284.0, 286.0, 260.0, 265.0, 299.0, 277.0, 282.0, 291.0, 284.0, 292.0, 290.0, 292.0, 298.0, 281.0, 285.0, 288.0, 294.0, 293.0, 287.0, 295.0, 293.0, 289.0, 290.0, 283.0, 288.0, 291.0, 282.0, 288.0, 287.0, 289.0, 283.0, 290.0, 255.0, 270.0, 289.0, 290.0, 293.0, 286.0, 294.0, 285.0, 260.0, 265.0, 281.0, 289.0, 291.0, 291.0, 287.0, 289.0, 291.0, 282.0, 287.0, 292.0, 296.0, 291.0, 145.0, 157.0, 290.0, 292.0, 258.0, 267.0, 259.0, 266.0, 287.0, 295.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6975360336417056, "mean_inference_ms": 1.2457301821099558, "mean_action_processing_ms": 0.1337741085947604, "mean_env_wait_ms": 0.8397488598807737, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 302.0, "episode_reward_mean": 565.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 282.935}, "hist_stats": {"episode_reward": [633.0, 630.0, 576.0, 522.0, 579.0, 576.0, 630.0, 579.0, 582.0, 630.0, 579.0, 579.0, 582.0, 584.0, 579.0, 576.0, 519.0, 570.0, 530.0, 570.0, 573.0, 525.0, 579.0, 582.0, 522.0, 579.0, 582.0, 570.0, 630.0, 573.0, 582.0, 587.0, 582.0, 530.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 573.0, 387.0, 633.0, 582.0, 522.0, 582.0, 579.0, 533.0, 579.0, 525.0, 576.0, 582.0, 576.0, 522.0, 579.0, 582.0, 579.0, 525.0, 573.0, 579.0, 587.0, 564.0, 570.0, 576.0, 527.0, 579.0, 522.0, 467.0, 570.0, 525.0, 576.0, 573.0, 576.0, 582.0, 579.0, 573.0, 587.0, 582.0, 582.0, 573.0, 579.0, 570.0, 576.0, 573.0, 525.0, 579.0, 579.0, 579.0, 525.0, 570.0, 582.0, 576.0, 573.0, 579.0, 587.0, 302.0, 582.0, 525.0, 525.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 320.0, 318.0, 312.0, 285.0, 291.0, 261.0, 261.0, 289.0, 290.0, 284.0, 292.0, 314.0, 316.0, 293.0, 286.0, 298.0, 284.0, 309.0, 321.0, 283.0, 296.0, 288.0, 291.0, 285.0, 297.0, 286.0, 298.0, 282.0, 297.0, 291.0, 285.0, 264.0, 255.0, 290.0, 280.0, 266.0, 264.0, 282.0, 288.0, 280.0, 293.0, 257.0, 268.0, 292.0, 287.0, 290.0, 292.0, 254.0, 268.0, 287.0, 292.0, 292.0, 290.0, 289.0, 281.0, 313.0, 317.0, 285.0, 288.0, 293.0, 289.0, 293.0, 294.0, 293.0, 289.0, 272.0, 258.0, 296.0, 283.0, 286.0, 293.0, 294.0, 288.0, 291.0, 288.0, 289.0, 290.0, 291.0, 291.0, 291.0, 282.0, 196.0, 191.0, 314.0, 319.0, 296.0, 286.0, 262.0, 260.0, 290.0, 292.0, 289.0, 290.0, 270.0, 263.0, 290.0, 289.0, 259.0, 266.0, 286.0, 290.0, 289.0, 293.0, 286.0, 290.0, 263.0, 259.0, 284.0, 295.0, 290.0, 292.0, 288.0, 291.0, 264.0, 261.0, 286.0, 287.0, 289.0, 290.0, 296.0, 291.0, 273.0, 291.0, 280.0, 290.0, 290.0, 286.0, 259.0, 268.0, 288.0, 291.0, 259.0, 263.0, 229.0, 238.0, 284.0, 286.0, 260.0, 265.0, 299.0, 277.0, 282.0, 291.0, 284.0, 292.0, 290.0, 292.0, 298.0, 281.0, 285.0, 288.0, 294.0, 293.0, 287.0, 295.0, 293.0, 289.0, 290.0, 283.0, 288.0, 291.0, 282.0, 288.0, 287.0, 289.0, 283.0, 290.0, 255.0, 270.0, 289.0, 290.0, 293.0, 286.0, 294.0, 285.0, 260.0, 265.0, 281.0, 289.0, 291.0, 291.0, 287.0, 289.0, 291.0, 282.0, 287.0, 292.0, 296.0, 291.0, 145.0, 157.0, 290.0, 292.0, 258.0, 267.0, 259.0, 266.0, 287.0, 295.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6975360336417056, "mean_inference_ms": 1.2457301821099558, "mean_action_processing_ms": 0.1337741085947604, "mean_env_wait_ms": 0.8397488598807737, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9676800, "num_agent_steps_trained": 9676800, "num_env_steps_sampled": 4838400, "num_env_steps_trained": 4838400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4838400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9676800, "timers": {"training_iteration_time_ms": 3635.925, "learn_time_ms": 1088.364, "learn_throughput": 11760.771, "synch_weights_time_ms": 12.047}, "counters": {"num_env_steps_sampled": 4838400, "num_env_steps_trained": 4838400, "num_agent_steps_sampled": 9676800, "num_agent_steps_trained": 9676800}, "done": false, "episodes_total": 12096, "training_iteration": 378, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-00", "timestamp": 1666581900, "time_this_iter_s": 3.6549689769744873, "time_total_s": 1444.3678114414215, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1444.3678114414215, "timesteps_since_restore": 0, "iterations_since_restore": 378, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.816666666666666, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 173.55, "shaped_reward_min": 102, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.99, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.21, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.82, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.89, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.61, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.21, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.79, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.14, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.89, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.61, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.89, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.61, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0016985749825835228, "policy_loss": -0.0020646799821406603, "vf_loss": 7.783526420593262, "vf_explained_var": 0.5932220220565796, "kl": 0.002582661574706435, "entropy": 0.8244947195053101, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4851200, "num_env_steps_trained": 4851200, "num_agent_steps_sampled": 9702400, "num_agent_steps_trained": 9702400}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 302.0, "episode_reward_mean": 559.95, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 279.975}, "custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 173.55, "shaped_reward_min": 102, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.99, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.21, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.82, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.89, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.61, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.21, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.79, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.14, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.89, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.61, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.89, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.61, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 530.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 573.0, 387.0, 633.0, 582.0, 522.0, 582.0, 579.0, 533.0, 579.0, 525.0, 576.0, 582.0, 576.0, 522.0, 579.0, 582.0, 579.0, 525.0, 573.0, 579.0, 587.0, 564.0, 570.0, 576.0, 527.0, 579.0, 522.0, 467.0, 570.0, 525.0, 576.0, 573.0, 576.0, 582.0, 579.0, 573.0, 587.0, 582.0, 582.0, 573.0, 579.0, 570.0, 576.0, 573.0, 525.0, 579.0, 579.0, 579.0, 525.0, 570.0, 582.0, 576.0, 573.0, 579.0, 587.0, 302.0, 582.0, 525.0, 525.0, 582.0, 573.0, 627.0, 579.0, 527.0, 576.0, 579.0, 579.0, 576.0, 527.0, 582.0, 525.0, 579.0, 579.0, 576.0, 582.0, 582.0, 579.0, 351.0, 579.0, 579.0, 522.0, 627.0, 525.0, 522.0, 579.0, 584.0, 573.0, 522.0, 579.0, 522.0, 582.0, 524.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 272.0, 258.0, 296.0, 283.0, 286.0, 293.0, 294.0, 288.0, 291.0, 288.0, 289.0, 290.0, 291.0, 291.0, 291.0, 282.0, 196.0, 191.0, 314.0, 319.0, 296.0, 286.0, 262.0, 260.0, 290.0, 292.0, 289.0, 290.0, 270.0, 263.0, 290.0, 289.0, 259.0, 266.0, 286.0, 290.0, 289.0, 293.0, 286.0, 290.0, 263.0, 259.0, 284.0, 295.0, 290.0, 292.0, 288.0, 291.0, 264.0, 261.0, 286.0, 287.0, 289.0, 290.0, 296.0, 291.0, 273.0, 291.0, 280.0, 290.0, 290.0, 286.0, 259.0, 268.0, 288.0, 291.0, 259.0, 263.0, 229.0, 238.0, 284.0, 286.0, 260.0, 265.0, 299.0, 277.0, 282.0, 291.0, 284.0, 292.0, 290.0, 292.0, 298.0, 281.0, 285.0, 288.0, 294.0, 293.0, 287.0, 295.0, 293.0, 289.0, 290.0, 283.0, 288.0, 291.0, 282.0, 288.0, 287.0, 289.0, 283.0, 290.0, 255.0, 270.0, 289.0, 290.0, 293.0, 286.0, 294.0, 285.0, 260.0, 265.0, 281.0, 289.0, 291.0, 291.0, 287.0, 289.0, 291.0, 282.0, 287.0, 292.0, 296.0, 291.0, 145.0, 157.0, 290.0, 292.0, 258.0, 267.0, 259.0, 266.0, 287.0, 295.0, 282.0, 291.0, 315.0, 312.0, 279.0, 300.0, 260.0, 267.0, 288.0, 288.0, 290.0, 289.0, 287.0, 292.0, 288.0, 288.0, 260.0, 267.0, 285.0, 297.0, 264.0, 261.0, 293.0, 286.0, 283.0, 296.0, 291.0, 285.0, 286.0, 296.0, 287.0, 295.0, 290.0, 289.0, 177.0, 174.0, 293.0, 286.0, 286.0, 293.0, 265.0, 257.0, 315.0, 312.0, 262.0, 263.0, 267.0, 255.0, 289.0, 290.0, 289.0, 295.0, 287.0, 286.0, 270.0, 252.0, 289.0, 290.0, 254.0, 268.0, 285.0, 297.0, 257.0, 267.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6974907736658347, "mean_inference_ms": 1.2455914003726156, "mean_action_processing_ms": 0.13376785719477183, "mean_env_wait_ms": 0.8396836843394329, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 302.0, "episode_reward_mean": 559.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 279.975}, "hist_stats": {"episode_reward": [582.0, 530.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 573.0, 387.0, 633.0, 582.0, 522.0, 582.0, 579.0, 533.0, 579.0, 525.0, 576.0, 582.0, 576.0, 522.0, 579.0, 582.0, 579.0, 525.0, 573.0, 579.0, 587.0, 564.0, 570.0, 576.0, 527.0, 579.0, 522.0, 467.0, 570.0, 525.0, 576.0, 573.0, 576.0, 582.0, 579.0, 573.0, 587.0, 582.0, 582.0, 573.0, 579.0, 570.0, 576.0, 573.0, 525.0, 579.0, 579.0, 579.0, 525.0, 570.0, 582.0, 576.0, 573.0, 579.0, 587.0, 302.0, 582.0, 525.0, 525.0, 582.0, 573.0, 627.0, 579.0, 527.0, 576.0, 579.0, 579.0, 576.0, 527.0, 582.0, 525.0, 579.0, 579.0, 576.0, 582.0, 582.0, 579.0, 351.0, 579.0, 579.0, 522.0, 627.0, 525.0, 522.0, 579.0, 584.0, 573.0, 522.0, 579.0, 522.0, 582.0, 524.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 272.0, 258.0, 296.0, 283.0, 286.0, 293.0, 294.0, 288.0, 291.0, 288.0, 289.0, 290.0, 291.0, 291.0, 291.0, 282.0, 196.0, 191.0, 314.0, 319.0, 296.0, 286.0, 262.0, 260.0, 290.0, 292.0, 289.0, 290.0, 270.0, 263.0, 290.0, 289.0, 259.0, 266.0, 286.0, 290.0, 289.0, 293.0, 286.0, 290.0, 263.0, 259.0, 284.0, 295.0, 290.0, 292.0, 288.0, 291.0, 264.0, 261.0, 286.0, 287.0, 289.0, 290.0, 296.0, 291.0, 273.0, 291.0, 280.0, 290.0, 290.0, 286.0, 259.0, 268.0, 288.0, 291.0, 259.0, 263.0, 229.0, 238.0, 284.0, 286.0, 260.0, 265.0, 299.0, 277.0, 282.0, 291.0, 284.0, 292.0, 290.0, 292.0, 298.0, 281.0, 285.0, 288.0, 294.0, 293.0, 287.0, 295.0, 293.0, 289.0, 290.0, 283.0, 288.0, 291.0, 282.0, 288.0, 287.0, 289.0, 283.0, 290.0, 255.0, 270.0, 289.0, 290.0, 293.0, 286.0, 294.0, 285.0, 260.0, 265.0, 281.0, 289.0, 291.0, 291.0, 287.0, 289.0, 291.0, 282.0, 287.0, 292.0, 296.0, 291.0, 145.0, 157.0, 290.0, 292.0, 258.0, 267.0, 259.0, 266.0, 287.0, 295.0, 282.0, 291.0, 315.0, 312.0, 279.0, 300.0, 260.0, 267.0, 288.0, 288.0, 290.0, 289.0, 287.0, 292.0, 288.0, 288.0, 260.0, 267.0, 285.0, 297.0, 264.0, 261.0, 293.0, 286.0, 283.0, 296.0, 291.0, 285.0, 286.0, 296.0, 287.0, 295.0, 290.0, 289.0, 177.0, 174.0, 293.0, 286.0, 286.0, 293.0, 265.0, 257.0, 315.0, 312.0, 262.0, 263.0, 267.0, 255.0, 289.0, 290.0, 289.0, 295.0, 287.0, 286.0, 270.0, 252.0, 289.0, 290.0, 254.0, 268.0, 285.0, 297.0, 257.0, 267.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6974907736658347, "mean_inference_ms": 1.2455914003726156, "mean_action_processing_ms": 0.13376785719477183, "mean_env_wait_ms": 0.8396836843394329, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9702400, "num_agent_steps_trained": 9702400, "num_env_steps_sampled": 4851200, "num_env_steps_trained": 4851200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4851200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9702400, "timers": {"training_iteration_time_ms": 3610.232, "learn_time_ms": 1086.244, "learn_throughput": 11783.729, "synch_weights_time_ms": 11.059}, "counters": {"num_env_steps_sampled": 4851200, "num_env_steps_trained": 4851200, "num_agent_steps_sampled": 9702400, "num_agent_steps_trained": 9702400}, "done": false, "episodes_total": 12128, "training_iteration": 379, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-04", "timestamp": 1666581904, "time_this_iter_s": 3.679121971130371, "time_total_s": 1448.0469334125519, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1448.0469334125519, "timesteps_since_restore": 0, "iterations_since_restore": 379, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.380000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 172.65, "shaped_reward_min": 102, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.21, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.06, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.96, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.95, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.66, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.71, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.88, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.19, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.66, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.71, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.66, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.71, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0017400092910975218, "policy_loss": 0.001374026178382337, "vf_loss": 7.744047164916992, "vf_explained_var": 0.5891155004501343, "kl": 0.00385090708732605, "entropy": 0.8168433308601379, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4864000, "num_env_steps_trained": 4864000, "num_agent_steps_sampled": 9728000, "num_agent_steps_trained": 9728000}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 302.0, "episode_reward_mean": 559.05, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 315.0}, "policy_reward_mean": {"ppo": 279.525}, "custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 172.65, "shaped_reward_min": 102, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.21, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.06, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.96, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.95, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.66, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.71, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.88, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.19, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.66, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.71, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.66, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.71, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [527.0, 579.0, 522.0, 467.0, 570.0, 525.0, 576.0, 573.0, 576.0, 582.0, 579.0, 573.0, 587.0, 582.0, 582.0, 573.0, 579.0, 570.0, 576.0, 573.0, 525.0, 579.0, 579.0, 579.0, 525.0, 570.0, 582.0, 576.0, 573.0, 579.0, 587.0, 302.0, 582.0, 525.0, 525.0, 582.0, 573.0, 627.0, 579.0, 527.0, 576.0, 579.0, 579.0, 576.0, 527.0, 582.0, 525.0, 579.0, 579.0, 576.0, 582.0, 582.0, 579.0, 351.0, 579.0, 579.0, 522.0, 627.0, 525.0, 522.0, 579.0, 584.0, 573.0, 522.0, 579.0, 522.0, 582.0, 524.0, 579.0, 561.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 576.0, 579.0, 519.0, 579.0, 582.0, 576.0, 525.0, 573.0, 582.0, 536.0, 462.0, 510.0, 567.0, 579.0, 579.0, 570.0, 579.0, 530.0, 576.0, 530.0, 567.0, 579.0, 576.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [259.0, 268.0, 288.0, 291.0, 259.0, 263.0, 229.0, 238.0, 284.0, 286.0, 260.0, 265.0, 299.0, 277.0, 282.0, 291.0, 284.0, 292.0, 290.0, 292.0, 298.0, 281.0, 285.0, 288.0, 294.0, 293.0, 287.0, 295.0, 293.0, 289.0, 290.0, 283.0, 288.0, 291.0, 282.0, 288.0, 287.0, 289.0, 283.0, 290.0, 255.0, 270.0, 289.0, 290.0, 293.0, 286.0, 294.0, 285.0, 260.0, 265.0, 281.0, 289.0, 291.0, 291.0, 287.0, 289.0, 291.0, 282.0, 287.0, 292.0, 296.0, 291.0, 145.0, 157.0, 290.0, 292.0, 258.0, 267.0, 259.0, 266.0, 287.0, 295.0, 282.0, 291.0, 315.0, 312.0, 279.0, 300.0, 260.0, 267.0, 288.0, 288.0, 290.0, 289.0, 287.0, 292.0, 288.0, 288.0, 260.0, 267.0, 285.0, 297.0, 264.0, 261.0, 293.0, 286.0, 283.0, 296.0, 291.0, 285.0, 286.0, 296.0, 287.0, 295.0, 290.0, 289.0, 177.0, 174.0, 293.0, 286.0, 286.0, 293.0, 265.0, 257.0, 315.0, 312.0, 262.0, 263.0, 267.0, 255.0, 289.0, 290.0, 289.0, 295.0, 287.0, 286.0, 270.0, 252.0, 289.0, 290.0, 254.0, 268.0, 285.0, 297.0, 257.0, 267.0, 287.0, 292.0, 281.0, 280.0, 291.0, 288.0, 296.0, 283.0, 293.0, 289.0, 287.0, 289.0, 292.0, 290.0, 290.0, 289.0, 287.0, 289.0, 285.0, 294.0, 259.0, 260.0, 286.0, 293.0, 291.0, 291.0, 287.0, 289.0, 260.0, 265.0, 289.0, 284.0, 287.0, 295.0, 261.0, 275.0, 232.0, 230.0, 255.0, 255.0, 288.0, 279.0, 288.0, 291.0, 288.0, 291.0, 282.0, 288.0, 288.0, 291.0, 265.0, 265.0, 285.0, 291.0, 267.0, 263.0, 290.0, 277.0, 286.0, 293.0, 288.0, 288.0, 262.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6974470945713677, "mean_inference_ms": 1.2454543364921187, "mean_action_processing_ms": 0.13376143031894988, "mean_env_wait_ms": 0.8396144520767223, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 302.0, "episode_reward_mean": 559.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 315.0}, "policy_reward_mean": {"ppo": 279.525}, "hist_stats": {"episode_reward": [527.0, 579.0, 522.0, 467.0, 570.0, 525.0, 576.0, 573.0, 576.0, 582.0, 579.0, 573.0, 587.0, 582.0, 582.0, 573.0, 579.0, 570.0, 576.0, 573.0, 525.0, 579.0, 579.0, 579.0, 525.0, 570.0, 582.0, 576.0, 573.0, 579.0, 587.0, 302.0, 582.0, 525.0, 525.0, 582.0, 573.0, 627.0, 579.0, 527.0, 576.0, 579.0, 579.0, 576.0, 527.0, 582.0, 525.0, 579.0, 579.0, 576.0, 582.0, 582.0, 579.0, 351.0, 579.0, 579.0, 522.0, 627.0, 525.0, 522.0, 579.0, 584.0, 573.0, 522.0, 579.0, 522.0, 582.0, 524.0, 579.0, 561.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 576.0, 579.0, 519.0, 579.0, 582.0, 576.0, 525.0, 573.0, 582.0, 536.0, 462.0, 510.0, 567.0, 579.0, 579.0, 570.0, 579.0, 530.0, 576.0, 530.0, 567.0, 579.0, 576.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [259.0, 268.0, 288.0, 291.0, 259.0, 263.0, 229.0, 238.0, 284.0, 286.0, 260.0, 265.0, 299.0, 277.0, 282.0, 291.0, 284.0, 292.0, 290.0, 292.0, 298.0, 281.0, 285.0, 288.0, 294.0, 293.0, 287.0, 295.0, 293.0, 289.0, 290.0, 283.0, 288.0, 291.0, 282.0, 288.0, 287.0, 289.0, 283.0, 290.0, 255.0, 270.0, 289.0, 290.0, 293.0, 286.0, 294.0, 285.0, 260.0, 265.0, 281.0, 289.0, 291.0, 291.0, 287.0, 289.0, 291.0, 282.0, 287.0, 292.0, 296.0, 291.0, 145.0, 157.0, 290.0, 292.0, 258.0, 267.0, 259.0, 266.0, 287.0, 295.0, 282.0, 291.0, 315.0, 312.0, 279.0, 300.0, 260.0, 267.0, 288.0, 288.0, 290.0, 289.0, 287.0, 292.0, 288.0, 288.0, 260.0, 267.0, 285.0, 297.0, 264.0, 261.0, 293.0, 286.0, 283.0, 296.0, 291.0, 285.0, 286.0, 296.0, 287.0, 295.0, 290.0, 289.0, 177.0, 174.0, 293.0, 286.0, 286.0, 293.0, 265.0, 257.0, 315.0, 312.0, 262.0, 263.0, 267.0, 255.0, 289.0, 290.0, 289.0, 295.0, 287.0, 286.0, 270.0, 252.0, 289.0, 290.0, 254.0, 268.0, 285.0, 297.0, 257.0, 267.0, 287.0, 292.0, 281.0, 280.0, 291.0, 288.0, 296.0, 283.0, 293.0, 289.0, 287.0, 289.0, 292.0, 290.0, 290.0, 289.0, 287.0, 289.0, 285.0, 294.0, 259.0, 260.0, 286.0, 293.0, 291.0, 291.0, 287.0, 289.0, 260.0, 265.0, 289.0, 284.0, 287.0, 295.0, 261.0, 275.0, 232.0, 230.0, 255.0, 255.0, 288.0, 279.0, 288.0, 291.0, 288.0, 291.0, 282.0, 288.0, 288.0, 291.0, 265.0, 265.0, 285.0, 291.0, 267.0, 263.0, 290.0, 277.0, 286.0, 293.0, 288.0, 288.0, 262.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6974470945713677, "mean_inference_ms": 1.2454543364921187, "mean_action_processing_ms": 0.13376143031894988, "mean_env_wait_ms": 0.8396144520767223, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9728000, "num_agent_steps_trained": 9728000, "num_env_steps_sampled": 4864000, "num_env_steps_trained": 4864000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4864000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9728000, "timers": {"training_iteration_time_ms": 3601.475, "learn_time_ms": 1085.233, "learn_throughput": 11794.707, "synch_weights_time_ms": 10.567}, "counters": {"num_env_steps_sampled": 4864000, "num_env_steps_trained": 4864000, "num_agent_steps_sampled": 9728000, "num_agent_steps_trained": 9728000}, "done": false, "episodes_total": 12160, "training_iteration": 380, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-08", "timestamp": 1666581908, "time_this_iter_s": 3.670078992843628, "time_total_s": 1451.7170124053955, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1451.7170124053955, "timesteps_since_restore": 0, "iterations_since_restore": 380, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.116666666666664, "ram_util_percent": 10.616666666666665}}
+{"custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 173.62, "shaped_reward_min": 111, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.29, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.03, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.08, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.88, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.81, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.67, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.41, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.81, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.67, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.81, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.67, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0003161475760862231, "policy_loss": -4.565366543829441e-05, "vf_loss": 7.759988307952881, "vf_explained_var": 0.5932192206382751, "kl": 0.002340888138860464, "entropy": 0.8283956050872803, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4876800, "num_env_steps_trained": 4876800, "num_agent_steps_sampled": 9753600, "num_agent_steps_trained": 9753600}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 351.0, "episode_reward_mean": 562.02, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 281.01}, "custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 173.62, "shaped_reward_min": 111, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.29, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.03, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.08, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.88, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.81, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.67, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.41, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.81, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.67, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.81, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.67, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 525.0, 525.0, 582.0, 573.0, 627.0, 579.0, 527.0, 576.0, 579.0, 579.0, 576.0, 527.0, 582.0, 525.0, 579.0, 579.0, 576.0, 582.0, 582.0, 579.0, 351.0, 579.0, 579.0, 522.0, 627.0, 525.0, 522.0, 579.0, 584.0, 573.0, 522.0, 579.0, 522.0, 582.0, 524.0, 579.0, 561.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 576.0, 579.0, 519.0, 579.0, 582.0, 576.0, 525.0, 573.0, 582.0, 536.0, 462.0, 510.0, 567.0, 579.0, 579.0, 570.0, 579.0, 530.0, 576.0, 530.0, 567.0, 579.0, 576.0, 519.0, 570.0, 579.0, 579.0, 522.0, 630.0, 573.0, 522.0, 573.0, 587.0, 525.0, 573.0, 582.0, 525.0, 525.0, 579.0, 527.0, 573.0, 627.0, 576.0, 525.0, 527.0, 630.0, 587.0, 584.0, 582.0, 579.0, 579.0, 582.0, 576.0, 522.0, 522.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 258.0, 267.0, 259.0, 266.0, 287.0, 295.0, 282.0, 291.0, 315.0, 312.0, 279.0, 300.0, 260.0, 267.0, 288.0, 288.0, 290.0, 289.0, 287.0, 292.0, 288.0, 288.0, 260.0, 267.0, 285.0, 297.0, 264.0, 261.0, 293.0, 286.0, 283.0, 296.0, 291.0, 285.0, 286.0, 296.0, 287.0, 295.0, 290.0, 289.0, 177.0, 174.0, 293.0, 286.0, 286.0, 293.0, 265.0, 257.0, 315.0, 312.0, 262.0, 263.0, 267.0, 255.0, 289.0, 290.0, 289.0, 295.0, 287.0, 286.0, 270.0, 252.0, 289.0, 290.0, 254.0, 268.0, 285.0, 297.0, 257.0, 267.0, 287.0, 292.0, 281.0, 280.0, 291.0, 288.0, 296.0, 283.0, 293.0, 289.0, 287.0, 289.0, 292.0, 290.0, 290.0, 289.0, 287.0, 289.0, 285.0, 294.0, 259.0, 260.0, 286.0, 293.0, 291.0, 291.0, 287.0, 289.0, 260.0, 265.0, 289.0, 284.0, 287.0, 295.0, 261.0, 275.0, 232.0, 230.0, 255.0, 255.0, 288.0, 279.0, 288.0, 291.0, 288.0, 291.0, 282.0, 288.0, 288.0, 291.0, 265.0, 265.0, 285.0, 291.0, 267.0, 263.0, 290.0, 277.0, 286.0, 293.0, 288.0, 288.0, 262.0, 257.0, 294.0, 276.0, 286.0, 293.0, 288.0, 291.0, 261.0, 261.0, 317.0, 313.0, 286.0, 287.0, 263.0, 259.0, 289.0, 284.0, 295.0, 292.0, 264.0, 261.0, 283.0, 290.0, 289.0, 293.0, 265.0, 260.0, 258.0, 267.0, 293.0, 286.0, 260.0, 267.0, 280.0, 293.0, 319.0, 308.0, 285.0, 291.0, 267.0, 258.0, 265.0, 262.0, 324.0, 306.0, 296.0, 291.0, 288.0, 296.0, 287.0, 295.0, 286.0, 293.0, 291.0, 288.0, 293.0, 289.0, 288.0, 288.0, 253.0, 269.0, 258.0, 264.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6973901172830818, "mean_inference_ms": 1.2453154096742738, "mean_action_processing_ms": 0.1337538870921113, "mean_env_wait_ms": 0.8395324954962717, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 351.0, "episode_reward_mean": 562.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 281.01}, "hist_stats": {"episode_reward": [582.0, 525.0, 525.0, 582.0, 573.0, 627.0, 579.0, 527.0, 576.0, 579.0, 579.0, 576.0, 527.0, 582.0, 525.0, 579.0, 579.0, 576.0, 582.0, 582.0, 579.0, 351.0, 579.0, 579.0, 522.0, 627.0, 525.0, 522.0, 579.0, 584.0, 573.0, 522.0, 579.0, 522.0, 582.0, 524.0, 579.0, 561.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 576.0, 579.0, 519.0, 579.0, 582.0, 576.0, 525.0, 573.0, 582.0, 536.0, 462.0, 510.0, 567.0, 579.0, 579.0, 570.0, 579.0, 530.0, 576.0, 530.0, 567.0, 579.0, 576.0, 519.0, 570.0, 579.0, 579.0, 522.0, 630.0, 573.0, 522.0, 573.0, 587.0, 525.0, 573.0, 582.0, 525.0, 525.0, 579.0, 527.0, 573.0, 627.0, 576.0, 525.0, 527.0, 630.0, 587.0, 584.0, 582.0, 579.0, 579.0, 582.0, 576.0, 522.0, 522.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 258.0, 267.0, 259.0, 266.0, 287.0, 295.0, 282.0, 291.0, 315.0, 312.0, 279.0, 300.0, 260.0, 267.0, 288.0, 288.0, 290.0, 289.0, 287.0, 292.0, 288.0, 288.0, 260.0, 267.0, 285.0, 297.0, 264.0, 261.0, 293.0, 286.0, 283.0, 296.0, 291.0, 285.0, 286.0, 296.0, 287.0, 295.0, 290.0, 289.0, 177.0, 174.0, 293.0, 286.0, 286.0, 293.0, 265.0, 257.0, 315.0, 312.0, 262.0, 263.0, 267.0, 255.0, 289.0, 290.0, 289.0, 295.0, 287.0, 286.0, 270.0, 252.0, 289.0, 290.0, 254.0, 268.0, 285.0, 297.0, 257.0, 267.0, 287.0, 292.0, 281.0, 280.0, 291.0, 288.0, 296.0, 283.0, 293.0, 289.0, 287.0, 289.0, 292.0, 290.0, 290.0, 289.0, 287.0, 289.0, 285.0, 294.0, 259.0, 260.0, 286.0, 293.0, 291.0, 291.0, 287.0, 289.0, 260.0, 265.0, 289.0, 284.0, 287.0, 295.0, 261.0, 275.0, 232.0, 230.0, 255.0, 255.0, 288.0, 279.0, 288.0, 291.0, 288.0, 291.0, 282.0, 288.0, 288.0, 291.0, 265.0, 265.0, 285.0, 291.0, 267.0, 263.0, 290.0, 277.0, 286.0, 293.0, 288.0, 288.0, 262.0, 257.0, 294.0, 276.0, 286.0, 293.0, 288.0, 291.0, 261.0, 261.0, 317.0, 313.0, 286.0, 287.0, 263.0, 259.0, 289.0, 284.0, 295.0, 292.0, 264.0, 261.0, 283.0, 290.0, 289.0, 293.0, 265.0, 260.0, 258.0, 267.0, 293.0, 286.0, 260.0, 267.0, 280.0, 293.0, 319.0, 308.0, 285.0, 291.0, 267.0, 258.0, 265.0, 262.0, 324.0, 306.0, 296.0, 291.0, 288.0, 296.0, 287.0, 295.0, 286.0, 293.0, 291.0, 288.0, 293.0, 289.0, 288.0, 288.0, 253.0, 269.0, 258.0, 264.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6973901172830818, "mean_inference_ms": 1.2453154096742738, "mean_action_processing_ms": 0.1337538870921113, "mean_env_wait_ms": 0.8395324954962717, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9753600, "num_agent_steps_trained": 9753600, "num_env_steps_sampled": 4876800, "num_env_steps_trained": 4876800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4876800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9753600, "timers": {"training_iteration_time_ms": 3587.267, "learn_time_ms": 1080.725, "learn_throughput": 11843.907, "synch_weights_time_ms": 10.048}, "counters": {"num_env_steps_sampled": 4876800, "num_env_steps_trained": 4876800, "num_agent_steps_sampled": 9753600, "num_agent_steps_trained": 9753600}, "done": false, "episodes_total": 12192, "training_iteration": 381, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-12", "timestamp": 1666581912, "time_this_iter_s": 3.5818943977355957, "time_total_s": 1455.298906803131, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1455.298906803131, "timesteps_since_restore": 0, "iterations_since_restore": 381, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.560000000000002, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.64, "shaped_reward_min": 114, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.71, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.65, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.57, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.54, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.28, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.29, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.28, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.29, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.28, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.29, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.004279726184904575, "policy_loss": -0.004651351366192102, "vf_loss": 7.765802383422852, "vf_explained_var": 0.5755102634429932, "kl": 0.002863064408302307, "entropy": 0.8099081516265869, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4889600, "num_env_steps_trained": 4889600, "num_agent_steps_sampled": 9779200, "num_agent_steps_trained": 9779200}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 354.0, "episode_reward_mean": 566.24, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 283.12}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.64, "shaped_reward_min": 114, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.71, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.65, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.57, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.54, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.28, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.29, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.28, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.29, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.28, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.29, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 522.0, 582.0, 524.0, 579.0, 561.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 576.0, 579.0, 519.0, 579.0, 582.0, 576.0, 525.0, 573.0, 582.0, 536.0, 462.0, 510.0, 567.0, 579.0, 579.0, 570.0, 579.0, 530.0, 576.0, 530.0, 567.0, 579.0, 576.0, 519.0, 570.0, 579.0, 579.0, 522.0, 630.0, 573.0, 522.0, 573.0, 587.0, 525.0, 573.0, 582.0, 525.0, 525.0, 579.0, 527.0, 573.0, 627.0, 576.0, 525.0, 527.0, 630.0, 587.0, 584.0, 582.0, 579.0, 579.0, 582.0, 576.0, 522.0, 522.0, 582.0, 633.0, 627.0, 570.0, 582.0, 582.0, 579.0, 525.0, 633.0, 570.0, 576.0, 636.0, 584.0, 573.0, 582.0, 584.0, 579.0, 570.0, 579.0, 570.0, 576.0, 570.0, 582.0, 530.0, 582.0, 354.0, 573.0, 587.0, 582.0, 579.0, 576.0, 579.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 290.0, 254.0, 268.0, 285.0, 297.0, 257.0, 267.0, 287.0, 292.0, 281.0, 280.0, 291.0, 288.0, 296.0, 283.0, 293.0, 289.0, 287.0, 289.0, 292.0, 290.0, 290.0, 289.0, 287.0, 289.0, 285.0, 294.0, 259.0, 260.0, 286.0, 293.0, 291.0, 291.0, 287.0, 289.0, 260.0, 265.0, 289.0, 284.0, 287.0, 295.0, 261.0, 275.0, 232.0, 230.0, 255.0, 255.0, 288.0, 279.0, 288.0, 291.0, 288.0, 291.0, 282.0, 288.0, 288.0, 291.0, 265.0, 265.0, 285.0, 291.0, 267.0, 263.0, 290.0, 277.0, 286.0, 293.0, 288.0, 288.0, 262.0, 257.0, 294.0, 276.0, 286.0, 293.0, 288.0, 291.0, 261.0, 261.0, 317.0, 313.0, 286.0, 287.0, 263.0, 259.0, 289.0, 284.0, 295.0, 292.0, 264.0, 261.0, 283.0, 290.0, 289.0, 293.0, 265.0, 260.0, 258.0, 267.0, 293.0, 286.0, 260.0, 267.0, 280.0, 293.0, 319.0, 308.0, 285.0, 291.0, 267.0, 258.0, 265.0, 262.0, 324.0, 306.0, 296.0, 291.0, 288.0, 296.0, 287.0, 295.0, 286.0, 293.0, 291.0, 288.0, 293.0, 289.0, 288.0, 288.0, 253.0, 269.0, 258.0, 264.0, 292.0, 290.0, 314.0, 319.0, 314.0, 313.0, 286.0, 284.0, 294.0, 288.0, 292.0, 290.0, 285.0, 294.0, 257.0, 268.0, 320.0, 313.0, 293.0, 277.0, 294.0, 282.0, 317.0, 319.0, 291.0, 293.0, 283.0, 290.0, 289.0, 293.0, 290.0, 294.0, 294.0, 285.0, 275.0, 295.0, 286.0, 293.0, 285.0, 285.0, 285.0, 291.0, 284.0, 286.0, 290.0, 292.0, 261.0, 269.0, 289.0, 293.0, 173.0, 181.0, 287.0, 286.0, 299.0, 288.0, 289.0, 293.0, 287.0, 292.0, 291.0, 285.0, 290.0, 289.0, 260.0, 262.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6973273806291641, "mean_inference_ms": 1.2451831322393523, "mean_action_processing_ms": 0.13374559805500635, "mean_env_wait_ms": 0.8394495822798175, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 354.0, "episode_reward_mean": 566.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 283.12}, "hist_stats": {"episode_reward": [579.0, 522.0, 582.0, 524.0, 579.0, 561.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 576.0, 579.0, 519.0, 579.0, 582.0, 576.0, 525.0, 573.0, 582.0, 536.0, 462.0, 510.0, 567.0, 579.0, 579.0, 570.0, 579.0, 530.0, 576.0, 530.0, 567.0, 579.0, 576.0, 519.0, 570.0, 579.0, 579.0, 522.0, 630.0, 573.0, 522.0, 573.0, 587.0, 525.0, 573.0, 582.0, 525.0, 525.0, 579.0, 527.0, 573.0, 627.0, 576.0, 525.0, 527.0, 630.0, 587.0, 584.0, 582.0, 579.0, 579.0, 582.0, 576.0, 522.0, 522.0, 582.0, 633.0, 627.0, 570.0, 582.0, 582.0, 579.0, 525.0, 633.0, 570.0, 576.0, 636.0, 584.0, 573.0, 582.0, 584.0, 579.0, 570.0, 579.0, 570.0, 576.0, 570.0, 582.0, 530.0, 582.0, 354.0, 573.0, 587.0, 582.0, 579.0, 576.0, 579.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 290.0, 254.0, 268.0, 285.0, 297.0, 257.0, 267.0, 287.0, 292.0, 281.0, 280.0, 291.0, 288.0, 296.0, 283.0, 293.0, 289.0, 287.0, 289.0, 292.0, 290.0, 290.0, 289.0, 287.0, 289.0, 285.0, 294.0, 259.0, 260.0, 286.0, 293.0, 291.0, 291.0, 287.0, 289.0, 260.0, 265.0, 289.0, 284.0, 287.0, 295.0, 261.0, 275.0, 232.0, 230.0, 255.0, 255.0, 288.0, 279.0, 288.0, 291.0, 288.0, 291.0, 282.0, 288.0, 288.0, 291.0, 265.0, 265.0, 285.0, 291.0, 267.0, 263.0, 290.0, 277.0, 286.0, 293.0, 288.0, 288.0, 262.0, 257.0, 294.0, 276.0, 286.0, 293.0, 288.0, 291.0, 261.0, 261.0, 317.0, 313.0, 286.0, 287.0, 263.0, 259.0, 289.0, 284.0, 295.0, 292.0, 264.0, 261.0, 283.0, 290.0, 289.0, 293.0, 265.0, 260.0, 258.0, 267.0, 293.0, 286.0, 260.0, 267.0, 280.0, 293.0, 319.0, 308.0, 285.0, 291.0, 267.0, 258.0, 265.0, 262.0, 324.0, 306.0, 296.0, 291.0, 288.0, 296.0, 287.0, 295.0, 286.0, 293.0, 291.0, 288.0, 293.0, 289.0, 288.0, 288.0, 253.0, 269.0, 258.0, 264.0, 292.0, 290.0, 314.0, 319.0, 314.0, 313.0, 286.0, 284.0, 294.0, 288.0, 292.0, 290.0, 285.0, 294.0, 257.0, 268.0, 320.0, 313.0, 293.0, 277.0, 294.0, 282.0, 317.0, 319.0, 291.0, 293.0, 283.0, 290.0, 289.0, 293.0, 290.0, 294.0, 294.0, 285.0, 275.0, 295.0, 286.0, 293.0, 285.0, 285.0, 285.0, 291.0, 284.0, 286.0, 290.0, 292.0, 261.0, 269.0, 289.0, 293.0, 173.0, 181.0, 287.0, 286.0, 299.0, 288.0, 289.0, 293.0, 287.0, 292.0, 291.0, 285.0, 290.0, 289.0, 260.0, 262.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6973273806291641, "mean_inference_ms": 1.2451831322393523, "mean_action_processing_ms": 0.13374559805500635, "mean_env_wait_ms": 0.8394495822798175, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9779200, "num_agent_steps_trained": 9779200, "num_env_steps_sampled": 4889600, "num_env_steps_trained": 4889600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4889600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9779200, "timers": {"training_iteration_time_ms": 3587.953, "learn_time_ms": 1082.429, "learn_throughput": 11825.26, "synch_weights_time_ms": 11.017}, "counters": {"num_env_steps_sampled": 4889600, "num_env_steps_trained": 4889600, "num_agent_steps_sampled": 9779200, "num_agent_steps_trained": 9779200}, "done": false, "episodes_total": 12224, "training_iteration": 382, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-16", "timestamp": 1666581916, "time_this_iter_s": 3.6740386486053467, "time_total_s": 1458.9729454517365, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1458.9729454517365, "timesteps_since_restore": 0, "iterations_since_restore": 382, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.966666666666665, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 176.34, "shaped_reward_min": 114, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.95, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.62, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.87, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.49, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.59, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.25, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.35, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.23, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.9, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.03, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.87, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.59, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.25, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.59, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.25, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005173349054530263, "policy_loss": 0.0001583112170919776, "vf_loss": 7.708772659301758, "vf_explained_var": 0.6065121293067932, "kl": 0.002974329050630331, "entropy": 0.8237053751945496, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4902400, "num_env_steps_trained": 4902400, "num_agent_steps_sampled": 9804800, "num_agent_steps_trained": 9804800}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 354.0, "episode_reward_mean": 571.54, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.77}, "custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 176.34, "shaped_reward_min": 114, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.95, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.62, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.87, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.49, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.59, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.25, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.35, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.23, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.9, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.03, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.87, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.59, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.25, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.59, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.25, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 579.0, 576.0, 519.0, 570.0, 579.0, 579.0, 522.0, 630.0, 573.0, 522.0, 573.0, 587.0, 525.0, 573.0, 582.0, 525.0, 525.0, 579.0, 527.0, 573.0, 627.0, 576.0, 525.0, 527.0, 630.0, 587.0, 584.0, 582.0, 579.0, 579.0, 582.0, 576.0, 522.0, 522.0, 582.0, 633.0, 627.0, 570.0, 582.0, 582.0, 579.0, 525.0, 633.0, 570.0, 576.0, 636.0, 584.0, 573.0, 582.0, 584.0, 579.0, 570.0, 579.0, 570.0, 576.0, 570.0, 582.0, 530.0, 582.0, 354.0, 573.0, 587.0, 582.0, 579.0, 576.0, 579.0, 522.0, 576.0, 573.0, 579.0, 582.0, 579.0, 582.0, 630.0, 582.0, 573.0, 579.0, 576.0, 582.0, 573.0, 579.0, 582.0, 573.0, 579.0, 582.0, 522.0, 582.0, 573.0, 573.0, 576.0, 530.0, 581.0, 582.0, 579.0, 579.0, 582.0, 579.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 277.0, 286.0, 293.0, 288.0, 288.0, 262.0, 257.0, 294.0, 276.0, 286.0, 293.0, 288.0, 291.0, 261.0, 261.0, 317.0, 313.0, 286.0, 287.0, 263.0, 259.0, 289.0, 284.0, 295.0, 292.0, 264.0, 261.0, 283.0, 290.0, 289.0, 293.0, 265.0, 260.0, 258.0, 267.0, 293.0, 286.0, 260.0, 267.0, 280.0, 293.0, 319.0, 308.0, 285.0, 291.0, 267.0, 258.0, 265.0, 262.0, 324.0, 306.0, 296.0, 291.0, 288.0, 296.0, 287.0, 295.0, 286.0, 293.0, 291.0, 288.0, 293.0, 289.0, 288.0, 288.0, 253.0, 269.0, 258.0, 264.0, 292.0, 290.0, 314.0, 319.0, 314.0, 313.0, 286.0, 284.0, 294.0, 288.0, 292.0, 290.0, 285.0, 294.0, 257.0, 268.0, 320.0, 313.0, 293.0, 277.0, 294.0, 282.0, 317.0, 319.0, 291.0, 293.0, 283.0, 290.0, 289.0, 293.0, 290.0, 294.0, 294.0, 285.0, 275.0, 295.0, 286.0, 293.0, 285.0, 285.0, 285.0, 291.0, 284.0, 286.0, 290.0, 292.0, 261.0, 269.0, 289.0, 293.0, 173.0, 181.0, 287.0, 286.0, 299.0, 288.0, 289.0, 293.0, 287.0, 292.0, 291.0, 285.0, 290.0, 289.0, 260.0, 262.0, 292.0, 284.0, 287.0, 286.0, 283.0, 296.0, 288.0, 294.0, 293.0, 286.0, 292.0, 290.0, 309.0, 321.0, 289.0, 293.0, 279.0, 294.0, 276.0, 303.0, 287.0, 289.0, 289.0, 293.0, 290.0, 283.0, 290.0, 289.0, 287.0, 295.0, 284.0, 289.0, 291.0, 288.0, 294.0, 288.0, 260.0, 262.0, 287.0, 295.0, 288.0, 285.0, 289.0, 284.0, 295.0, 281.0, 270.0, 260.0, 295.0, 286.0, 294.0, 288.0, 287.0, 292.0, 288.0, 291.0, 293.0, 289.0, 296.0, 283.0, 285.0, 297.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6972649237496986, "mean_inference_ms": 1.245114700116508, "mean_action_processing_ms": 0.133735206033721, "mean_env_wait_ms": 0.8394135928267644, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 354.0, "episode_reward_mean": 571.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.77}, "hist_stats": {"episode_reward": [567.0, 579.0, 576.0, 519.0, 570.0, 579.0, 579.0, 522.0, 630.0, 573.0, 522.0, 573.0, 587.0, 525.0, 573.0, 582.0, 525.0, 525.0, 579.0, 527.0, 573.0, 627.0, 576.0, 525.0, 527.0, 630.0, 587.0, 584.0, 582.0, 579.0, 579.0, 582.0, 576.0, 522.0, 522.0, 582.0, 633.0, 627.0, 570.0, 582.0, 582.0, 579.0, 525.0, 633.0, 570.0, 576.0, 636.0, 584.0, 573.0, 582.0, 584.0, 579.0, 570.0, 579.0, 570.0, 576.0, 570.0, 582.0, 530.0, 582.0, 354.0, 573.0, 587.0, 582.0, 579.0, 576.0, 579.0, 522.0, 576.0, 573.0, 579.0, 582.0, 579.0, 582.0, 630.0, 582.0, 573.0, 579.0, 576.0, 582.0, 573.0, 579.0, 582.0, 573.0, 579.0, 582.0, 522.0, 582.0, 573.0, 573.0, 576.0, 530.0, 581.0, 582.0, 579.0, 579.0, 582.0, 579.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 277.0, 286.0, 293.0, 288.0, 288.0, 262.0, 257.0, 294.0, 276.0, 286.0, 293.0, 288.0, 291.0, 261.0, 261.0, 317.0, 313.0, 286.0, 287.0, 263.0, 259.0, 289.0, 284.0, 295.0, 292.0, 264.0, 261.0, 283.0, 290.0, 289.0, 293.0, 265.0, 260.0, 258.0, 267.0, 293.0, 286.0, 260.0, 267.0, 280.0, 293.0, 319.0, 308.0, 285.0, 291.0, 267.0, 258.0, 265.0, 262.0, 324.0, 306.0, 296.0, 291.0, 288.0, 296.0, 287.0, 295.0, 286.0, 293.0, 291.0, 288.0, 293.0, 289.0, 288.0, 288.0, 253.0, 269.0, 258.0, 264.0, 292.0, 290.0, 314.0, 319.0, 314.0, 313.0, 286.0, 284.0, 294.0, 288.0, 292.0, 290.0, 285.0, 294.0, 257.0, 268.0, 320.0, 313.0, 293.0, 277.0, 294.0, 282.0, 317.0, 319.0, 291.0, 293.0, 283.0, 290.0, 289.0, 293.0, 290.0, 294.0, 294.0, 285.0, 275.0, 295.0, 286.0, 293.0, 285.0, 285.0, 285.0, 291.0, 284.0, 286.0, 290.0, 292.0, 261.0, 269.0, 289.0, 293.0, 173.0, 181.0, 287.0, 286.0, 299.0, 288.0, 289.0, 293.0, 287.0, 292.0, 291.0, 285.0, 290.0, 289.0, 260.0, 262.0, 292.0, 284.0, 287.0, 286.0, 283.0, 296.0, 288.0, 294.0, 293.0, 286.0, 292.0, 290.0, 309.0, 321.0, 289.0, 293.0, 279.0, 294.0, 276.0, 303.0, 287.0, 289.0, 289.0, 293.0, 290.0, 283.0, 290.0, 289.0, 287.0, 295.0, 284.0, 289.0, 291.0, 288.0, 294.0, 288.0, 260.0, 262.0, 287.0, 295.0, 288.0, 285.0, 289.0, 284.0, 295.0, 281.0, 270.0, 260.0, 295.0, 286.0, 294.0, 288.0, 287.0, 292.0, 288.0, 291.0, 293.0, 289.0, 296.0, 283.0, 285.0, 297.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6972649237496986, "mean_inference_ms": 1.245114700116508, "mean_action_processing_ms": 0.133735206033721, "mean_env_wait_ms": 0.8394135928267644, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9804800, "num_agent_steps_trained": 9804800, "num_env_steps_sampled": 4902400, "num_env_steps_trained": 4902400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4902400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9804800, "timers": {"training_iteration_time_ms": 3604.75, "learn_time_ms": 1080.138, "learn_throughput": 11850.335, "synch_weights_time_ms": 10.938}, "counters": {"num_env_steps_sampled": 4902400, "num_env_steps_trained": 4902400, "num_agent_steps_sampled": 9804800, "num_agent_steps_trained": 9804800}, "done": false, "episodes_total": 12256, "training_iteration": 383, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-20", "timestamp": 1666581920, "time_this_iter_s": 3.900717258453369, "time_total_s": 1462.8736627101898, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1462.8736627101898, "timesteps_since_restore": 0, "iterations_since_restore": 383, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 25.160000000000004, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 197.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 176.55, "shaped_reward_min": 114, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.69, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.93, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.59, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.82, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.34, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.54, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.48, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.17, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.82, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.11, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.34, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.54, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.34, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.54, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002254174090921879, "policy_loss": -0.002626133384183049, "vf_loss": 7.838954448699951, "vf_explained_var": 0.5783690214157104, "kl": 0.0026023017708212137, "entropy": 0.8238720893859863, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4915200, "num_env_steps_trained": 4915200, "num_agent_steps_sampled": 9830400, "num_agent_steps_trained": 9830400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 354.0, "episode_reward_mean": 571.35, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 285.675}, "custom_metrics": {"sparse_reward_mean": 197.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 176.55, "shaped_reward_min": 114, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.69, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.93, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.59, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.82, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.34, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.54, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.48, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.17, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.82, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.11, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.34, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.54, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.34, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.54, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 522.0, 522.0, 582.0, 633.0, 627.0, 570.0, 582.0, 582.0, 579.0, 525.0, 633.0, 570.0, 576.0, 636.0, 584.0, 573.0, 582.0, 584.0, 579.0, 570.0, 579.0, 570.0, 576.0, 570.0, 582.0, 530.0, 582.0, 354.0, 573.0, 587.0, 582.0, 579.0, 576.0, 579.0, 522.0, 576.0, 573.0, 579.0, 582.0, 579.0, 582.0, 630.0, 582.0, 573.0, 579.0, 576.0, 582.0, 573.0, 579.0, 582.0, 573.0, 579.0, 582.0, 522.0, 582.0, 573.0, 573.0, 576.0, 530.0, 581.0, 582.0, 579.0, 579.0, 582.0, 579.0, 582.0, 582.0, 530.0, 576.0, 573.0, 582.0, 519.0, 579.0, 530.0, 576.0, 587.0, 627.0, 579.0, 630.0, 581.0, 579.0, 525.0, 582.0, 576.0, 525.0, 630.0, 525.0, 587.0, 524.0, 582.0, 522.0, 579.0, 627.0, 522.0, 525.0, 582.0, 582.0, 579.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 288.0, 253.0, 269.0, 258.0, 264.0, 292.0, 290.0, 314.0, 319.0, 314.0, 313.0, 286.0, 284.0, 294.0, 288.0, 292.0, 290.0, 285.0, 294.0, 257.0, 268.0, 320.0, 313.0, 293.0, 277.0, 294.0, 282.0, 317.0, 319.0, 291.0, 293.0, 283.0, 290.0, 289.0, 293.0, 290.0, 294.0, 294.0, 285.0, 275.0, 295.0, 286.0, 293.0, 285.0, 285.0, 285.0, 291.0, 284.0, 286.0, 290.0, 292.0, 261.0, 269.0, 289.0, 293.0, 173.0, 181.0, 287.0, 286.0, 299.0, 288.0, 289.0, 293.0, 287.0, 292.0, 291.0, 285.0, 290.0, 289.0, 260.0, 262.0, 292.0, 284.0, 287.0, 286.0, 283.0, 296.0, 288.0, 294.0, 293.0, 286.0, 292.0, 290.0, 309.0, 321.0, 289.0, 293.0, 279.0, 294.0, 276.0, 303.0, 287.0, 289.0, 289.0, 293.0, 290.0, 283.0, 290.0, 289.0, 287.0, 295.0, 284.0, 289.0, 291.0, 288.0, 294.0, 288.0, 260.0, 262.0, 287.0, 295.0, 288.0, 285.0, 289.0, 284.0, 295.0, 281.0, 270.0, 260.0, 295.0, 286.0, 294.0, 288.0, 287.0, 292.0, 288.0, 291.0, 293.0, 289.0, 296.0, 283.0, 285.0, 297.0, 291.0, 291.0, 262.0, 268.0, 292.0, 284.0, 283.0, 290.0, 291.0, 291.0, 256.0, 263.0, 289.0, 290.0, 264.0, 266.0, 286.0, 290.0, 298.0, 289.0, 319.0, 308.0, 286.0, 293.0, 314.0, 316.0, 298.0, 283.0, 291.0, 288.0, 261.0, 264.0, 282.0, 300.0, 288.0, 288.0, 258.0, 267.0, 318.0, 312.0, 266.0, 259.0, 288.0, 299.0, 260.0, 264.0, 287.0, 295.0, 255.0, 267.0, 293.0, 286.0, 310.0, 317.0, 263.0, 259.0, 262.0, 263.0, 301.0, 281.0, 290.0, 292.0, 285.0, 294.0, 271.0, 251.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6972223456770706, "mean_inference_ms": 1.2451278394099532, "mean_action_processing_ms": 0.1337272957493324, "mean_env_wait_ms": 0.8393944616489282, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 354.0, "episode_reward_mean": 571.35, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 285.675}, "hist_stats": {"episode_reward": [576.0, 522.0, 522.0, 582.0, 633.0, 627.0, 570.0, 582.0, 582.0, 579.0, 525.0, 633.0, 570.0, 576.0, 636.0, 584.0, 573.0, 582.0, 584.0, 579.0, 570.0, 579.0, 570.0, 576.0, 570.0, 582.0, 530.0, 582.0, 354.0, 573.0, 587.0, 582.0, 579.0, 576.0, 579.0, 522.0, 576.0, 573.0, 579.0, 582.0, 579.0, 582.0, 630.0, 582.0, 573.0, 579.0, 576.0, 582.0, 573.0, 579.0, 582.0, 573.0, 579.0, 582.0, 522.0, 582.0, 573.0, 573.0, 576.0, 530.0, 581.0, 582.0, 579.0, 579.0, 582.0, 579.0, 582.0, 582.0, 530.0, 576.0, 573.0, 582.0, 519.0, 579.0, 530.0, 576.0, 587.0, 627.0, 579.0, 630.0, 581.0, 579.0, 525.0, 582.0, 576.0, 525.0, 630.0, 525.0, 587.0, 524.0, 582.0, 522.0, 579.0, 627.0, 522.0, 525.0, 582.0, 582.0, 579.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 288.0, 253.0, 269.0, 258.0, 264.0, 292.0, 290.0, 314.0, 319.0, 314.0, 313.0, 286.0, 284.0, 294.0, 288.0, 292.0, 290.0, 285.0, 294.0, 257.0, 268.0, 320.0, 313.0, 293.0, 277.0, 294.0, 282.0, 317.0, 319.0, 291.0, 293.0, 283.0, 290.0, 289.0, 293.0, 290.0, 294.0, 294.0, 285.0, 275.0, 295.0, 286.0, 293.0, 285.0, 285.0, 285.0, 291.0, 284.0, 286.0, 290.0, 292.0, 261.0, 269.0, 289.0, 293.0, 173.0, 181.0, 287.0, 286.0, 299.0, 288.0, 289.0, 293.0, 287.0, 292.0, 291.0, 285.0, 290.0, 289.0, 260.0, 262.0, 292.0, 284.0, 287.0, 286.0, 283.0, 296.0, 288.0, 294.0, 293.0, 286.0, 292.0, 290.0, 309.0, 321.0, 289.0, 293.0, 279.0, 294.0, 276.0, 303.0, 287.0, 289.0, 289.0, 293.0, 290.0, 283.0, 290.0, 289.0, 287.0, 295.0, 284.0, 289.0, 291.0, 288.0, 294.0, 288.0, 260.0, 262.0, 287.0, 295.0, 288.0, 285.0, 289.0, 284.0, 295.0, 281.0, 270.0, 260.0, 295.0, 286.0, 294.0, 288.0, 287.0, 292.0, 288.0, 291.0, 293.0, 289.0, 296.0, 283.0, 285.0, 297.0, 291.0, 291.0, 262.0, 268.0, 292.0, 284.0, 283.0, 290.0, 291.0, 291.0, 256.0, 263.0, 289.0, 290.0, 264.0, 266.0, 286.0, 290.0, 298.0, 289.0, 319.0, 308.0, 286.0, 293.0, 314.0, 316.0, 298.0, 283.0, 291.0, 288.0, 261.0, 264.0, 282.0, 300.0, 288.0, 288.0, 258.0, 267.0, 318.0, 312.0, 266.0, 259.0, 288.0, 299.0, 260.0, 264.0, 287.0, 295.0, 255.0, 267.0, 293.0, 286.0, 310.0, 317.0, 263.0, 259.0, 262.0, 263.0, 301.0, 281.0, 290.0, 292.0, 285.0, 294.0, 271.0, 251.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6972223456770706, "mean_inference_ms": 1.2451278394099532, "mean_action_processing_ms": 0.1337272957493324, "mean_env_wait_ms": 0.8393944616489282, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9830400, "num_agent_steps_trained": 9830400, "num_env_steps_sampled": 4915200, "num_env_steps_trained": 4915200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4915200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9830400, "timers": {"training_iteration_time_ms": 3623.122, "learn_time_ms": 1078.354, "learn_throughput": 11869.94, "synch_weights_time_ms": 10.934}, "counters": {"num_env_steps_sampled": 4915200, "num_env_steps_trained": 4915200, "num_agent_steps_sampled": 9830400, "num_agent_steps_trained": 9830400}, "done": false, "episodes_total": 12288, "training_iteration": 384, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-24", "timestamp": 1666581924, "time_this_iter_s": 3.874924659729004, "time_total_s": 1466.7485873699188, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1466.7485873699188, "timesteps_since_restore": 0, "iterations_since_restore": 384, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.75, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.85, "shaped_reward_min": 159, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.28, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.5, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.14, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.87, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.06, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.82, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.53, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.87, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.06, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.87, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.06, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0014922493137419224, "policy_loss": 0.0011385057587176561, "vf_loss": 7.6861572265625, "vf_explained_var": 0.5910747647285461, "kl": 0.0026840257924050093, "entropy": 0.8297452926635742, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4928000, "num_env_steps_trained": 4928000, "num_agent_steps_sampled": 9856000, "num_agent_steps_trained": 9856000}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 519.0, "episode_reward_mean": 572.05, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.025}, "custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.85, "shaped_reward_min": 159, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.28, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.5, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.14, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.87, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.06, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.82, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.53, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.87, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.06, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.87, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.06, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 579.0, 522.0, 576.0, 573.0, 579.0, 582.0, 579.0, 582.0, 630.0, 582.0, 573.0, 579.0, 576.0, 582.0, 573.0, 579.0, 582.0, 573.0, 579.0, 582.0, 522.0, 582.0, 573.0, 573.0, 576.0, 530.0, 581.0, 582.0, 579.0, 579.0, 582.0, 579.0, 582.0, 582.0, 530.0, 576.0, 573.0, 582.0, 519.0, 579.0, 530.0, 576.0, 587.0, 627.0, 579.0, 630.0, 581.0, 579.0, 525.0, 582.0, 576.0, 525.0, 630.0, 525.0, 587.0, 524.0, 582.0, 522.0, 579.0, 627.0, 522.0, 525.0, 582.0, 582.0, 579.0, 522.0, 581.0, 576.0, 576.0, 582.0, 587.0, 530.0, 582.0, 570.0, 579.0, 587.0, 573.0, 579.0, 573.0, 573.0, 579.0, 573.0, 579.0, 522.0, 576.0, 579.0, 576.0, 579.0, 576.0, 582.0, 581.0, 579.0, 582.0, 530.0, 573.0, 519.0, 582.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 291.0, 285.0, 290.0, 289.0, 260.0, 262.0, 292.0, 284.0, 287.0, 286.0, 283.0, 296.0, 288.0, 294.0, 293.0, 286.0, 292.0, 290.0, 309.0, 321.0, 289.0, 293.0, 279.0, 294.0, 276.0, 303.0, 287.0, 289.0, 289.0, 293.0, 290.0, 283.0, 290.0, 289.0, 287.0, 295.0, 284.0, 289.0, 291.0, 288.0, 294.0, 288.0, 260.0, 262.0, 287.0, 295.0, 288.0, 285.0, 289.0, 284.0, 295.0, 281.0, 270.0, 260.0, 295.0, 286.0, 294.0, 288.0, 287.0, 292.0, 288.0, 291.0, 293.0, 289.0, 296.0, 283.0, 285.0, 297.0, 291.0, 291.0, 262.0, 268.0, 292.0, 284.0, 283.0, 290.0, 291.0, 291.0, 256.0, 263.0, 289.0, 290.0, 264.0, 266.0, 286.0, 290.0, 298.0, 289.0, 319.0, 308.0, 286.0, 293.0, 314.0, 316.0, 298.0, 283.0, 291.0, 288.0, 261.0, 264.0, 282.0, 300.0, 288.0, 288.0, 258.0, 267.0, 318.0, 312.0, 266.0, 259.0, 288.0, 299.0, 260.0, 264.0, 287.0, 295.0, 255.0, 267.0, 293.0, 286.0, 310.0, 317.0, 263.0, 259.0, 262.0, 263.0, 301.0, 281.0, 290.0, 292.0, 285.0, 294.0, 271.0, 251.0, 288.0, 293.0, 288.0, 288.0, 288.0, 288.0, 293.0, 289.0, 290.0, 297.0, 262.0, 268.0, 295.0, 287.0, 287.0, 283.0, 290.0, 289.0, 290.0, 297.0, 289.0, 284.0, 288.0, 291.0, 283.0, 290.0, 278.0, 295.0, 296.0, 283.0, 287.0, 286.0, 289.0, 290.0, 262.0, 260.0, 290.0, 286.0, 295.0, 284.0, 289.0, 287.0, 286.0, 293.0, 291.0, 285.0, 293.0, 289.0, 297.0, 284.0, 287.0, 292.0, 293.0, 289.0, 268.0, 262.0, 290.0, 283.0, 262.0, 257.0, 295.0, 287.0, 315.0, 312.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6971963878902551, "mean_inference_ms": 1.245146808552077, "mean_action_processing_ms": 0.13371996697535277, "mean_env_wait_ms": 0.8393813351805128, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 519.0, "episode_reward_mean": 572.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.025}, "hist_stats": {"episode_reward": [579.0, 576.0, 579.0, 522.0, 576.0, 573.0, 579.0, 582.0, 579.0, 582.0, 630.0, 582.0, 573.0, 579.0, 576.0, 582.0, 573.0, 579.0, 582.0, 573.0, 579.0, 582.0, 522.0, 582.0, 573.0, 573.0, 576.0, 530.0, 581.0, 582.0, 579.0, 579.0, 582.0, 579.0, 582.0, 582.0, 530.0, 576.0, 573.0, 582.0, 519.0, 579.0, 530.0, 576.0, 587.0, 627.0, 579.0, 630.0, 581.0, 579.0, 525.0, 582.0, 576.0, 525.0, 630.0, 525.0, 587.0, 524.0, 582.0, 522.0, 579.0, 627.0, 522.0, 525.0, 582.0, 582.0, 579.0, 522.0, 581.0, 576.0, 576.0, 582.0, 587.0, 530.0, 582.0, 570.0, 579.0, 587.0, 573.0, 579.0, 573.0, 573.0, 579.0, 573.0, 579.0, 522.0, 576.0, 579.0, 576.0, 579.0, 576.0, 582.0, 581.0, 579.0, 582.0, 530.0, 573.0, 519.0, 582.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 291.0, 285.0, 290.0, 289.0, 260.0, 262.0, 292.0, 284.0, 287.0, 286.0, 283.0, 296.0, 288.0, 294.0, 293.0, 286.0, 292.0, 290.0, 309.0, 321.0, 289.0, 293.0, 279.0, 294.0, 276.0, 303.0, 287.0, 289.0, 289.0, 293.0, 290.0, 283.0, 290.0, 289.0, 287.0, 295.0, 284.0, 289.0, 291.0, 288.0, 294.0, 288.0, 260.0, 262.0, 287.0, 295.0, 288.0, 285.0, 289.0, 284.0, 295.0, 281.0, 270.0, 260.0, 295.0, 286.0, 294.0, 288.0, 287.0, 292.0, 288.0, 291.0, 293.0, 289.0, 296.0, 283.0, 285.0, 297.0, 291.0, 291.0, 262.0, 268.0, 292.0, 284.0, 283.0, 290.0, 291.0, 291.0, 256.0, 263.0, 289.0, 290.0, 264.0, 266.0, 286.0, 290.0, 298.0, 289.0, 319.0, 308.0, 286.0, 293.0, 314.0, 316.0, 298.0, 283.0, 291.0, 288.0, 261.0, 264.0, 282.0, 300.0, 288.0, 288.0, 258.0, 267.0, 318.0, 312.0, 266.0, 259.0, 288.0, 299.0, 260.0, 264.0, 287.0, 295.0, 255.0, 267.0, 293.0, 286.0, 310.0, 317.0, 263.0, 259.0, 262.0, 263.0, 301.0, 281.0, 290.0, 292.0, 285.0, 294.0, 271.0, 251.0, 288.0, 293.0, 288.0, 288.0, 288.0, 288.0, 293.0, 289.0, 290.0, 297.0, 262.0, 268.0, 295.0, 287.0, 287.0, 283.0, 290.0, 289.0, 290.0, 297.0, 289.0, 284.0, 288.0, 291.0, 283.0, 290.0, 278.0, 295.0, 296.0, 283.0, 287.0, 286.0, 289.0, 290.0, 262.0, 260.0, 290.0, 286.0, 295.0, 284.0, 289.0, 287.0, 286.0, 293.0, 291.0, 285.0, 293.0, 289.0, 297.0, 284.0, 287.0, 292.0, 293.0, 289.0, 268.0, 262.0, 290.0, 283.0, 262.0, 257.0, 295.0, 287.0, 315.0, 312.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6971963878902551, "mean_inference_ms": 1.245146808552077, "mean_action_processing_ms": 0.13371996697535277, "mean_env_wait_ms": 0.8393813351805128, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9856000, "num_agent_steps_trained": 9856000, "num_env_steps_sampled": 4928000, "num_env_steps_trained": 4928000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4928000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9856000, "timers": {"training_iteration_time_ms": 3648.492, "learn_time_ms": 1095.707, "learn_throughput": 11681.96, "synch_weights_time_ms": 11.378}, "counters": {"num_env_steps_sampled": 4928000, "num_env_steps_trained": 4928000, "num_agent_steps_sampled": 9856000, "num_agent_steps_trained": 9856000}, "done": false, "episodes_total": 12320, "training_iteration": 385, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-28", "timestamp": 1666581928, "time_this_iter_s": 3.7417666912078857, "time_total_s": 1470.4903540611267, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1470.4903540611267, "timesteps_since_restore": 0, "iterations_since_restore": 385, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.966666666666665, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 175.42, "shaped_reward_min": 155, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.23, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.37, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.1, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.27, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.89, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.71, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.77, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.64, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.44, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.54, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.46, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.89, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.89, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0015976729337126017, "policy_loss": -0.0019460530020296574, "vf_loss": 7.6813273429870605, "vf_explained_var": 0.5826541185379028, "kl": 0.002368117216974497, "entropy": 0.8395055532455444, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4940800, "num_env_steps_trained": 4940800, "num_agent_steps_sampled": 9881600, "num_agent_steps_trained": 9881600}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 515.0, "episode_reward_mean": 567.82, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 283.91}, "custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 175.42, "shaped_reward_min": 155, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.23, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.37, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.1, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.27, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.89, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.71, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.77, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.64, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.44, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.54, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.46, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.89, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.89, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 582.0, 582.0, 530.0, 576.0, 573.0, 582.0, 519.0, 579.0, 530.0, 576.0, 587.0, 627.0, 579.0, 630.0, 581.0, 579.0, 525.0, 582.0, 576.0, 525.0, 630.0, 525.0, 587.0, 524.0, 582.0, 522.0, 579.0, 627.0, 522.0, 525.0, 582.0, 582.0, 579.0, 522.0, 581.0, 576.0, 576.0, 582.0, 587.0, 530.0, 582.0, 570.0, 579.0, 587.0, 573.0, 579.0, 573.0, 573.0, 579.0, 573.0, 579.0, 522.0, 576.0, 579.0, 576.0, 579.0, 576.0, 582.0, 581.0, 579.0, 582.0, 530.0, 573.0, 519.0, 582.0, 627.0, 519.0, 587.0, 573.0, 576.0, 576.0, 530.0, 576.0, 530.0, 576.0, 522.0, 573.0, 579.0, 576.0, 525.0, 579.0, 539.0, 579.0, 579.0, 579.0, 573.0, 515.0, 522.0, 576.0, 579.0, 576.0, 573.0, 576.0, 525.0, 567.0, 576.0, 570.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 296.0, 283.0, 285.0, 297.0, 291.0, 291.0, 262.0, 268.0, 292.0, 284.0, 283.0, 290.0, 291.0, 291.0, 256.0, 263.0, 289.0, 290.0, 264.0, 266.0, 286.0, 290.0, 298.0, 289.0, 319.0, 308.0, 286.0, 293.0, 314.0, 316.0, 298.0, 283.0, 291.0, 288.0, 261.0, 264.0, 282.0, 300.0, 288.0, 288.0, 258.0, 267.0, 318.0, 312.0, 266.0, 259.0, 288.0, 299.0, 260.0, 264.0, 287.0, 295.0, 255.0, 267.0, 293.0, 286.0, 310.0, 317.0, 263.0, 259.0, 262.0, 263.0, 301.0, 281.0, 290.0, 292.0, 285.0, 294.0, 271.0, 251.0, 288.0, 293.0, 288.0, 288.0, 288.0, 288.0, 293.0, 289.0, 290.0, 297.0, 262.0, 268.0, 295.0, 287.0, 287.0, 283.0, 290.0, 289.0, 290.0, 297.0, 289.0, 284.0, 288.0, 291.0, 283.0, 290.0, 278.0, 295.0, 296.0, 283.0, 287.0, 286.0, 289.0, 290.0, 262.0, 260.0, 290.0, 286.0, 295.0, 284.0, 289.0, 287.0, 286.0, 293.0, 291.0, 285.0, 293.0, 289.0, 297.0, 284.0, 287.0, 292.0, 293.0, 289.0, 268.0, 262.0, 290.0, 283.0, 262.0, 257.0, 295.0, 287.0, 315.0, 312.0, 259.0, 260.0, 299.0, 288.0, 284.0, 289.0, 289.0, 287.0, 286.0, 290.0, 266.0, 264.0, 288.0, 288.0, 270.0, 260.0, 280.0, 296.0, 260.0, 262.0, 285.0, 288.0, 289.0, 290.0, 290.0, 286.0, 262.0, 263.0, 287.0, 292.0, 270.0, 269.0, 295.0, 284.0, 290.0, 289.0, 288.0, 291.0, 282.0, 291.0, 261.0, 254.0, 262.0, 260.0, 289.0, 287.0, 299.0, 280.0, 293.0, 283.0, 291.0, 282.0, 291.0, 285.0, 264.0, 261.0, 284.0, 283.0, 295.0, 281.0, 285.0, 285.0, 274.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.697175436184877, "mean_inference_ms": 1.2450850463363872, "mean_action_processing_ms": 0.1337122276607287, "mean_env_wait_ms": 0.839306888527343, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 515.0, "episode_reward_mean": 567.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 283.91}, "hist_stats": {"episode_reward": [582.0, 579.0, 582.0, 582.0, 530.0, 576.0, 573.0, 582.0, 519.0, 579.0, 530.0, 576.0, 587.0, 627.0, 579.0, 630.0, 581.0, 579.0, 525.0, 582.0, 576.0, 525.0, 630.0, 525.0, 587.0, 524.0, 582.0, 522.0, 579.0, 627.0, 522.0, 525.0, 582.0, 582.0, 579.0, 522.0, 581.0, 576.0, 576.0, 582.0, 587.0, 530.0, 582.0, 570.0, 579.0, 587.0, 573.0, 579.0, 573.0, 573.0, 579.0, 573.0, 579.0, 522.0, 576.0, 579.0, 576.0, 579.0, 576.0, 582.0, 581.0, 579.0, 582.0, 530.0, 573.0, 519.0, 582.0, 627.0, 519.0, 587.0, 573.0, 576.0, 576.0, 530.0, 576.0, 530.0, 576.0, 522.0, 573.0, 579.0, 576.0, 525.0, 579.0, 539.0, 579.0, 579.0, 579.0, 573.0, 515.0, 522.0, 576.0, 579.0, 576.0, 573.0, 576.0, 525.0, 567.0, 576.0, 570.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 296.0, 283.0, 285.0, 297.0, 291.0, 291.0, 262.0, 268.0, 292.0, 284.0, 283.0, 290.0, 291.0, 291.0, 256.0, 263.0, 289.0, 290.0, 264.0, 266.0, 286.0, 290.0, 298.0, 289.0, 319.0, 308.0, 286.0, 293.0, 314.0, 316.0, 298.0, 283.0, 291.0, 288.0, 261.0, 264.0, 282.0, 300.0, 288.0, 288.0, 258.0, 267.0, 318.0, 312.0, 266.0, 259.0, 288.0, 299.0, 260.0, 264.0, 287.0, 295.0, 255.0, 267.0, 293.0, 286.0, 310.0, 317.0, 263.0, 259.0, 262.0, 263.0, 301.0, 281.0, 290.0, 292.0, 285.0, 294.0, 271.0, 251.0, 288.0, 293.0, 288.0, 288.0, 288.0, 288.0, 293.0, 289.0, 290.0, 297.0, 262.0, 268.0, 295.0, 287.0, 287.0, 283.0, 290.0, 289.0, 290.0, 297.0, 289.0, 284.0, 288.0, 291.0, 283.0, 290.0, 278.0, 295.0, 296.0, 283.0, 287.0, 286.0, 289.0, 290.0, 262.0, 260.0, 290.0, 286.0, 295.0, 284.0, 289.0, 287.0, 286.0, 293.0, 291.0, 285.0, 293.0, 289.0, 297.0, 284.0, 287.0, 292.0, 293.0, 289.0, 268.0, 262.0, 290.0, 283.0, 262.0, 257.0, 295.0, 287.0, 315.0, 312.0, 259.0, 260.0, 299.0, 288.0, 284.0, 289.0, 289.0, 287.0, 286.0, 290.0, 266.0, 264.0, 288.0, 288.0, 270.0, 260.0, 280.0, 296.0, 260.0, 262.0, 285.0, 288.0, 289.0, 290.0, 290.0, 286.0, 262.0, 263.0, 287.0, 292.0, 270.0, 269.0, 295.0, 284.0, 290.0, 289.0, 288.0, 291.0, 282.0, 291.0, 261.0, 254.0, 262.0, 260.0, 289.0, 287.0, 299.0, 280.0, 293.0, 283.0, 291.0, 282.0, 291.0, 285.0, 264.0, 261.0, 284.0, 283.0, 295.0, 281.0, 285.0, 285.0, 274.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.697175436184877, "mean_inference_ms": 1.2450850463363872, "mean_action_processing_ms": 0.1337122276607287, "mean_env_wait_ms": 0.839306888527343, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9881600, "num_agent_steps_trained": 9881600, "num_env_steps_sampled": 4940800, "num_env_steps_trained": 4940800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4940800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9881600, "timers": {"training_iteration_time_ms": 3649.585, "learn_time_ms": 1106.505, "learn_throughput": 11567.954, "synch_weights_time_ms": 11.707}, "counters": {"num_env_steps_sampled": 4940800, "num_env_steps_trained": 4940800, "num_agent_steps_sampled": 9881600, "num_agent_steps_trained": 9881600}, "done": false, "episodes_total": 12352, "training_iteration": 386, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-32", "timestamp": 1666581932, "time_this_iter_s": 3.649009943008423, "time_total_s": 1474.1393640041351, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1474.1393640041351, "timesteps_since_restore": 0, "iterations_since_restore": 386, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.16, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.96, "shaped_reward_min": 108, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.3, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.34, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.15, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.25, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.79, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.87, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.81, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 5.42, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.64, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.4, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.36, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.79, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.87, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.79, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.87, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00029556709341704845, "policy_loss": -0.0006432689260691404, "vf_loss": 7.661875247955322, "vf_explained_var": 0.6070252656936646, "kl": 0.0027911756187677383, "entropy": 0.836971640586853, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4953600, "num_env_steps_trained": 4953600, "num_agent_steps_sampled": 9907200, "num_agent_steps_trained": 9907200}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 348.0, "episode_reward_mean": 567.76, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 283.88}, "custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.96, "shaped_reward_min": 108, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.3, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.34, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.15, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.25, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.79, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.87, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.81, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 5.42, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.64, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.4, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.36, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.79, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.87, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.79, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.87, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 579.0, 522.0, 581.0, 576.0, 576.0, 582.0, 587.0, 530.0, 582.0, 570.0, 579.0, 587.0, 573.0, 579.0, 573.0, 573.0, 579.0, 573.0, 579.0, 522.0, 576.0, 579.0, 576.0, 579.0, 576.0, 582.0, 581.0, 579.0, 582.0, 530.0, 573.0, 519.0, 582.0, 627.0, 519.0, 587.0, 573.0, 576.0, 576.0, 530.0, 576.0, 530.0, 576.0, 522.0, 573.0, 579.0, 576.0, 525.0, 579.0, 539.0, 579.0, 579.0, 579.0, 573.0, 515.0, 522.0, 576.0, 579.0, 576.0, 573.0, 576.0, 525.0, 567.0, 576.0, 570.0, 570.0, 576.0, 582.0, 570.0, 579.0, 579.0, 579.0, 530.0, 576.0, 579.0, 584.0, 570.0, 582.0, 525.0, 624.0, 630.0, 579.0, 576.0, 579.0, 579.0, 530.0, 522.0, 576.0, 348.0, 573.0, 573.0, 579.0, 582.0, 630.0, 573.0, 582.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [301.0, 281.0, 290.0, 292.0, 285.0, 294.0, 271.0, 251.0, 288.0, 293.0, 288.0, 288.0, 288.0, 288.0, 293.0, 289.0, 290.0, 297.0, 262.0, 268.0, 295.0, 287.0, 287.0, 283.0, 290.0, 289.0, 290.0, 297.0, 289.0, 284.0, 288.0, 291.0, 283.0, 290.0, 278.0, 295.0, 296.0, 283.0, 287.0, 286.0, 289.0, 290.0, 262.0, 260.0, 290.0, 286.0, 295.0, 284.0, 289.0, 287.0, 286.0, 293.0, 291.0, 285.0, 293.0, 289.0, 297.0, 284.0, 287.0, 292.0, 293.0, 289.0, 268.0, 262.0, 290.0, 283.0, 262.0, 257.0, 295.0, 287.0, 315.0, 312.0, 259.0, 260.0, 299.0, 288.0, 284.0, 289.0, 289.0, 287.0, 286.0, 290.0, 266.0, 264.0, 288.0, 288.0, 270.0, 260.0, 280.0, 296.0, 260.0, 262.0, 285.0, 288.0, 289.0, 290.0, 290.0, 286.0, 262.0, 263.0, 287.0, 292.0, 270.0, 269.0, 295.0, 284.0, 290.0, 289.0, 288.0, 291.0, 282.0, 291.0, 261.0, 254.0, 262.0, 260.0, 289.0, 287.0, 299.0, 280.0, 293.0, 283.0, 291.0, 282.0, 291.0, 285.0, 264.0, 261.0, 284.0, 283.0, 295.0, 281.0, 285.0, 285.0, 274.0, 296.0, 285.0, 291.0, 291.0, 291.0, 288.0, 282.0, 282.0, 297.0, 289.0, 290.0, 287.0, 292.0, 255.0, 275.0, 289.0, 287.0, 284.0, 295.0, 285.0, 299.0, 290.0, 280.0, 293.0, 289.0, 261.0, 264.0, 308.0, 316.0, 314.0, 316.0, 290.0, 289.0, 288.0, 288.0, 288.0, 291.0, 285.0, 294.0, 262.0, 268.0, 258.0, 264.0, 284.0, 292.0, 172.0, 176.0, 288.0, 285.0, 285.0, 288.0, 293.0, 286.0, 286.0, 296.0, 316.0, 314.0, 290.0, 283.0, 294.0, 288.0, 290.0, 289.0, 285.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6971456596710317, "mean_inference_ms": 1.244961424900404, "mean_action_processing_ms": 0.13370369220716977, "mean_env_wait_ms": 0.8392242722293407, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 348.0, "episode_reward_mean": 567.76, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 283.88}, "hist_stats": {"episode_reward": [582.0, 582.0, 579.0, 522.0, 581.0, 576.0, 576.0, 582.0, 587.0, 530.0, 582.0, 570.0, 579.0, 587.0, 573.0, 579.0, 573.0, 573.0, 579.0, 573.0, 579.0, 522.0, 576.0, 579.0, 576.0, 579.0, 576.0, 582.0, 581.0, 579.0, 582.0, 530.0, 573.0, 519.0, 582.0, 627.0, 519.0, 587.0, 573.0, 576.0, 576.0, 530.0, 576.0, 530.0, 576.0, 522.0, 573.0, 579.0, 576.0, 525.0, 579.0, 539.0, 579.0, 579.0, 579.0, 573.0, 515.0, 522.0, 576.0, 579.0, 576.0, 573.0, 576.0, 525.0, 567.0, 576.0, 570.0, 570.0, 576.0, 582.0, 570.0, 579.0, 579.0, 579.0, 530.0, 576.0, 579.0, 584.0, 570.0, 582.0, 525.0, 624.0, 630.0, 579.0, 576.0, 579.0, 579.0, 530.0, 522.0, 576.0, 348.0, 573.0, 573.0, 579.0, 582.0, 630.0, 573.0, 582.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [301.0, 281.0, 290.0, 292.0, 285.0, 294.0, 271.0, 251.0, 288.0, 293.0, 288.0, 288.0, 288.0, 288.0, 293.0, 289.0, 290.0, 297.0, 262.0, 268.0, 295.0, 287.0, 287.0, 283.0, 290.0, 289.0, 290.0, 297.0, 289.0, 284.0, 288.0, 291.0, 283.0, 290.0, 278.0, 295.0, 296.0, 283.0, 287.0, 286.0, 289.0, 290.0, 262.0, 260.0, 290.0, 286.0, 295.0, 284.0, 289.0, 287.0, 286.0, 293.0, 291.0, 285.0, 293.0, 289.0, 297.0, 284.0, 287.0, 292.0, 293.0, 289.0, 268.0, 262.0, 290.0, 283.0, 262.0, 257.0, 295.0, 287.0, 315.0, 312.0, 259.0, 260.0, 299.0, 288.0, 284.0, 289.0, 289.0, 287.0, 286.0, 290.0, 266.0, 264.0, 288.0, 288.0, 270.0, 260.0, 280.0, 296.0, 260.0, 262.0, 285.0, 288.0, 289.0, 290.0, 290.0, 286.0, 262.0, 263.0, 287.0, 292.0, 270.0, 269.0, 295.0, 284.0, 290.0, 289.0, 288.0, 291.0, 282.0, 291.0, 261.0, 254.0, 262.0, 260.0, 289.0, 287.0, 299.0, 280.0, 293.0, 283.0, 291.0, 282.0, 291.0, 285.0, 264.0, 261.0, 284.0, 283.0, 295.0, 281.0, 285.0, 285.0, 274.0, 296.0, 285.0, 291.0, 291.0, 291.0, 288.0, 282.0, 282.0, 297.0, 289.0, 290.0, 287.0, 292.0, 255.0, 275.0, 289.0, 287.0, 284.0, 295.0, 285.0, 299.0, 290.0, 280.0, 293.0, 289.0, 261.0, 264.0, 308.0, 316.0, 314.0, 316.0, 290.0, 289.0, 288.0, 288.0, 288.0, 291.0, 285.0, 294.0, 262.0, 268.0, 258.0, 264.0, 284.0, 292.0, 172.0, 176.0, 288.0, 285.0, 285.0, 288.0, 293.0, 286.0, 286.0, 296.0, 316.0, 314.0, 290.0, 283.0, 294.0, 288.0, 290.0, 289.0, 285.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6971456596710317, "mean_inference_ms": 1.244961424900404, "mean_action_processing_ms": 0.13370369220716977, "mean_env_wait_ms": 0.8392242722293407, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9907200, "num_agent_steps_trained": 9907200, "num_env_steps_sampled": 4953600, "num_env_steps_trained": 4953600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4953600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9907200, "timers": {"training_iteration_time_ms": 3647.842, "learn_time_ms": 1106.231, "learn_throughput": 11570.818, "synch_weights_time_ms": 11.207}, "counters": {"num_env_steps_sampled": 4953600, "num_env_steps_trained": 4953600, "num_agent_steps_sampled": 9907200, "num_agent_steps_trained": 9907200}, "done": false, "episodes_total": 12384, "training_iteration": 387, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-35", "timestamp": 1666581935, "time_this_iter_s": 3.654477119445801, "time_total_s": 1477.793841123581, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1477.793841123581, "timesteps_since_restore": 0, "iterations_since_restore": 387, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.88333333333333, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.58, "shaped_reward_min": 108, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.43, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.28, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.29, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.19, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.89, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.76, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.58, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.67, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.29, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.89, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.76, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.89, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.76, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019375028787180781, "policy_loss": 0.0015798690728843212, "vf_loss": 7.705456733703613, "vf_explained_var": 0.6002016067504883, "kl": 0.0024976124987006187, "entropy": 0.8258252739906311, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4966400, "num_env_steps_trained": 4966400, "num_agent_steps_sampled": 9932800, "num_agent_steps_trained": 9932800}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 348.0, "episode_reward_mean": 567.38, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 283.69}, "custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.58, "shaped_reward_min": 108, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.43, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.28, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.29, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.19, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.89, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.76, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.58, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.67, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.29, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.89, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.76, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.89, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.76, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 519.0, 582.0, 627.0, 519.0, 587.0, 573.0, 576.0, 576.0, 530.0, 576.0, 530.0, 576.0, 522.0, 573.0, 579.0, 576.0, 525.0, 579.0, 539.0, 579.0, 579.0, 579.0, 573.0, 515.0, 522.0, 576.0, 579.0, 576.0, 573.0, 576.0, 525.0, 567.0, 576.0, 570.0, 570.0, 576.0, 582.0, 570.0, 579.0, 579.0, 579.0, 530.0, 576.0, 579.0, 584.0, 570.0, 582.0, 525.0, 624.0, 630.0, 579.0, 576.0, 579.0, 579.0, 530.0, 522.0, 576.0, 348.0, 573.0, 573.0, 579.0, 582.0, 630.0, 573.0, 582.0, 579.0, 573.0, 579.0, 522.0, 579.0, 576.0, 579.0, 573.0, 522.0, 530.0, 579.0, 582.0, 627.0, 576.0, 573.0, 579.0, 576.0, 579.0, 581.0, 584.0, 576.0, 630.0, 573.0, 579.0, 582.0, 530.0, 576.0, 579.0, 579.0, 447.0, 582.0, 530.0, 633.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 283.0, 262.0, 257.0, 295.0, 287.0, 315.0, 312.0, 259.0, 260.0, 299.0, 288.0, 284.0, 289.0, 289.0, 287.0, 286.0, 290.0, 266.0, 264.0, 288.0, 288.0, 270.0, 260.0, 280.0, 296.0, 260.0, 262.0, 285.0, 288.0, 289.0, 290.0, 290.0, 286.0, 262.0, 263.0, 287.0, 292.0, 270.0, 269.0, 295.0, 284.0, 290.0, 289.0, 288.0, 291.0, 282.0, 291.0, 261.0, 254.0, 262.0, 260.0, 289.0, 287.0, 299.0, 280.0, 293.0, 283.0, 291.0, 282.0, 291.0, 285.0, 264.0, 261.0, 284.0, 283.0, 295.0, 281.0, 285.0, 285.0, 274.0, 296.0, 285.0, 291.0, 291.0, 291.0, 288.0, 282.0, 282.0, 297.0, 289.0, 290.0, 287.0, 292.0, 255.0, 275.0, 289.0, 287.0, 284.0, 295.0, 285.0, 299.0, 290.0, 280.0, 293.0, 289.0, 261.0, 264.0, 308.0, 316.0, 314.0, 316.0, 290.0, 289.0, 288.0, 288.0, 288.0, 291.0, 285.0, 294.0, 262.0, 268.0, 258.0, 264.0, 284.0, 292.0, 172.0, 176.0, 288.0, 285.0, 285.0, 288.0, 293.0, 286.0, 286.0, 296.0, 316.0, 314.0, 290.0, 283.0, 294.0, 288.0, 290.0, 289.0, 285.0, 288.0, 290.0, 289.0, 257.0, 265.0, 290.0, 289.0, 296.0, 280.0, 293.0, 286.0, 277.0, 296.0, 259.0, 263.0, 269.0, 261.0, 288.0, 291.0, 296.0, 286.0, 317.0, 310.0, 285.0, 291.0, 286.0, 287.0, 291.0, 288.0, 290.0, 286.0, 288.0, 291.0, 282.0, 299.0, 297.0, 287.0, 290.0, 286.0, 311.0, 319.0, 285.0, 288.0, 291.0, 288.0, 291.0, 291.0, 262.0, 268.0, 284.0, 292.0, 291.0, 288.0, 290.0, 289.0, 229.0, 218.0, 291.0, 291.0, 260.0, 270.0, 314.0, 319.0, 290.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6971033523311602, "mean_inference_ms": 1.2448045816757196, "mean_action_processing_ms": 0.13369576714133724, "mean_env_wait_ms": 0.839134076244018, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 348.0, "episode_reward_mean": 567.38, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 283.69}, "hist_stats": {"episode_reward": [573.0, 519.0, 582.0, 627.0, 519.0, 587.0, 573.0, 576.0, 576.0, 530.0, 576.0, 530.0, 576.0, 522.0, 573.0, 579.0, 576.0, 525.0, 579.0, 539.0, 579.0, 579.0, 579.0, 573.0, 515.0, 522.0, 576.0, 579.0, 576.0, 573.0, 576.0, 525.0, 567.0, 576.0, 570.0, 570.0, 576.0, 582.0, 570.0, 579.0, 579.0, 579.0, 530.0, 576.0, 579.0, 584.0, 570.0, 582.0, 525.0, 624.0, 630.0, 579.0, 576.0, 579.0, 579.0, 530.0, 522.0, 576.0, 348.0, 573.0, 573.0, 579.0, 582.0, 630.0, 573.0, 582.0, 579.0, 573.0, 579.0, 522.0, 579.0, 576.0, 579.0, 573.0, 522.0, 530.0, 579.0, 582.0, 627.0, 576.0, 573.0, 579.0, 576.0, 579.0, 581.0, 584.0, 576.0, 630.0, 573.0, 579.0, 582.0, 530.0, 576.0, 579.0, 579.0, 447.0, 582.0, 530.0, 633.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 283.0, 262.0, 257.0, 295.0, 287.0, 315.0, 312.0, 259.0, 260.0, 299.0, 288.0, 284.0, 289.0, 289.0, 287.0, 286.0, 290.0, 266.0, 264.0, 288.0, 288.0, 270.0, 260.0, 280.0, 296.0, 260.0, 262.0, 285.0, 288.0, 289.0, 290.0, 290.0, 286.0, 262.0, 263.0, 287.0, 292.0, 270.0, 269.0, 295.0, 284.0, 290.0, 289.0, 288.0, 291.0, 282.0, 291.0, 261.0, 254.0, 262.0, 260.0, 289.0, 287.0, 299.0, 280.0, 293.0, 283.0, 291.0, 282.0, 291.0, 285.0, 264.0, 261.0, 284.0, 283.0, 295.0, 281.0, 285.0, 285.0, 274.0, 296.0, 285.0, 291.0, 291.0, 291.0, 288.0, 282.0, 282.0, 297.0, 289.0, 290.0, 287.0, 292.0, 255.0, 275.0, 289.0, 287.0, 284.0, 295.0, 285.0, 299.0, 290.0, 280.0, 293.0, 289.0, 261.0, 264.0, 308.0, 316.0, 314.0, 316.0, 290.0, 289.0, 288.0, 288.0, 288.0, 291.0, 285.0, 294.0, 262.0, 268.0, 258.0, 264.0, 284.0, 292.0, 172.0, 176.0, 288.0, 285.0, 285.0, 288.0, 293.0, 286.0, 286.0, 296.0, 316.0, 314.0, 290.0, 283.0, 294.0, 288.0, 290.0, 289.0, 285.0, 288.0, 290.0, 289.0, 257.0, 265.0, 290.0, 289.0, 296.0, 280.0, 293.0, 286.0, 277.0, 296.0, 259.0, 263.0, 269.0, 261.0, 288.0, 291.0, 296.0, 286.0, 317.0, 310.0, 285.0, 291.0, 286.0, 287.0, 291.0, 288.0, 290.0, 286.0, 288.0, 291.0, 282.0, 299.0, 297.0, 287.0, 290.0, 286.0, 311.0, 319.0, 285.0, 288.0, 291.0, 288.0, 291.0, 291.0, 262.0, 268.0, 284.0, 292.0, 291.0, 288.0, 290.0, 289.0, 229.0, 218.0, 291.0, 291.0, 260.0, 270.0, 314.0, 319.0, 290.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6971033523311602, "mean_inference_ms": 1.2448045816757196, "mean_action_processing_ms": 0.13369576714133724, "mean_env_wait_ms": 0.839134076244018, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9932800, "num_agent_steps_trained": 9932800, "num_env_steps_sampled": 4966400, "num_env_steps_trained": 4966400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4966400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9932800, "timers": {"training_iteration_time_ms": 3650.674, "learn_time_ms": 1110.814, "learn_throughput": 11523.082, "synch_weights_time_ms": 11.715}, "counters": {"num_env_steps_sampled": 4966400, "num_env_steps_trained": 4966400, "num_agent_steps_sampled": 9932800, "num_agent_steps_trained": 9932800}, "done": false, "episodes_total": 12416, "training_iteration": 388, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-39", "timestamp": 1666581939, "time_this_iter_s": 3.681104898452759, "time_total_s": 1481.4749460220337, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1481.4749460220337, "timesteps_since_restore": 0, "iterations_since_restore": 388, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.9, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 197.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.74, "shaped_reward_min": 108, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.1, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.61, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.98, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.49, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.62, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.14, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.88, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.63, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.31, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.3, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.62, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.14, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.62, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.14, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.000489800819195807, "policy_loss": 0.00010905158706009388, "vf_loss": 7.963174819946289, "vf_explained_var": 0.5872219800949097, "kl": 0.002789913909509778, "entropy": 0.83113694190979, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4979200, "num_env_steps_trained": 4979200, "num_agent_steps_sampled": 9958400, "num_agent_steps_trained": 9958400}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 348.0, "episode_reward_mean": 568.74, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 284.37}, "custom_metrics": {"sparse_reward_mean": 197.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.74, "shaped_reward_min": 108, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.1, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.61, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.98, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.49, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.62, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.14, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.88, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.63, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.31, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.3, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.62, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.14, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.62, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.14, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 576.0, 570.0, 570.0, 576.0, 582.0, 570.0, 579.0, 579.0, 579.0, 530.0, 576.0, 579.0, 584.0, 570.0, 582.0, 525.0, 624.0, 630.0, 579.0, 576.0, 579.0, 579.0, 530.0, 522.0, 576.0, 348.0, 573.0, 573.0, 579.0, 582.0, 630.0, 573.0, 582.0, 579.0, 573.0, 579.0, 522.0, 579.0, 576.0, 579.0, 573.0, 522.0, 530.0, 579.0, 582.0, 627.0, 576.0, 573.0, 579.0, 576.0, 579.0, 581.0, 584.0, 576.0, 630.0, 573.0, 579.0, 582.0, 530.0, 576.0, 579.0, 579.0, 447.0, 582.0, 530.0, 633.0, 576.0, 576.0, 579.0, 579.0, 522.0, 582.0, 582.0, 573.0, 633.0, 570.0, 579.0, 519.0, 584.0, 522.0, 579.0, 579.0, 579.0, 573.0, 582.0, 525.0, 579.0, 579.0, 630.0, 582.0, 579.0, 501.0, 579.0, 552.0, 522.0, 579.0, 522.0, 525.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 283.0, 295.0, 281.0, 285.0, 285.0, 274.0, 296.0, 285.0, 291.0, 291.0, 291.0, 288.0, 282.0, 282.0, 297.0, 289.0, 290.0, 287.0, 292.0, 255.0, 275.0, 289.0, 287.0, 284.0, 295.0, 285.0, 299.0, 290.0, 280.0, 293.0, 289.0, 261.0, 264.0, 308.0, 316.0, 314.0, 316.0, 290.0, 289.0, 288.0, 288.0, 288.0, 291.0, 285.0, 294.0, 262.0, 268.0, 258.0, 264.0, 284.0, 292.0, 172.0, 176.0, 288.0, 285.0, 285.0, 288.0, 293.0, 286.0, 286.0, 296.0, 316.0, 314.0, 290.0, 283.0, 294.0, 288.0, 290.0, 289.0, 285.0, 288.0, 290.0, 289.0, 257.0, 265.0, 290.0, 289.0, 296.0, 280.0, 293.0, 286.0, 277.0, 296.0, 259.0, 263.0, 269.0, 261.0, 288.0, 291.0, 296.0, 286.0, 317.0, 310.0, 285.0, 291.0, 286.0, 287.0, 291.0, 288.0, 290.0, 286.0, 288.0, 291.0, 282.0, 299.0, 297.0, 287.0, 290.0, 286.0, 311.0, 319.0, 285.0, 288.0, 291.0, 288.0, 291.0, 291.0, 262.0, 268.0, 284.0, 292.0, 291.0, 288.0, 290.0, 289.0, 229.0, 218.0, 291.0, 291.0, 260.0, 270.0, 314.0, 319.0, 290.0, 286.0, 288.0, 288.0, 284.0, 295.0, 293.0, 286.0, 254.0, 268.0, 286.0, 296.0, 291.0, 291.0, 281.0, 292.0, 308.0, 325.0, 299.0, 271.0, 288.0, 291.0, 261.0, 258.0, 293.0, 291.0, 267.0, 255.0, 284.0, 295.0, 291.0, 288.0, 289.0, 290.0, 292.0, 281.0, 293.0, 289.0, 261.0, 264.0, 283.0, 296.0, 290.0, 289.0, 311.0, 319.0, 286.0, 296.0, 287.0, 292.0, 247.0, 254.0, 288.0, 291.0, 270.0, 282.0, 256.0, 266.0, 289.0, 290.0, 260.0, 262.0, 245.0, 280.0, 290.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6970445040200594, "mean_inference_ms": 1.2446568491000776, "mean_action_processing_ms": 0.13368825935226153, "mean_env_wait_ms": 0.8390474799522084, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 348.0, "episode_reward_mean": 568.74, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 284.37}, "hist_stats": {"episode_reward": [567.0, 576.0, 570.0, 570.0, 576.0, 582.0, 570.0, 579.0, 579.0, 579.0, 530.0, 576.0, 579.0, 584.0, 570.0, 582.0, 525.0, 624.0, 630.0, 579.0, 576.0, 579.0, 579.0, 530.0, 522.0, 576.0, 348.0, 573.0, 573.0, 579.0, 582.0, 630.0, 573.0, 582.0, 579.0, 573.0, 579.0, 522.0, 579.0, 576.0, 579.0, 573.0, 522.0, 530.0, 579.0, 582.0, 627.0, 576.0, 573.0, 579.0, 576.0, 579.0, 581.0, 584.0, 576.0, 630.0, 573.0, 579.0, 582.0, 530.0, 576.0, 579.0, 579.0, 447.0, 582.0, 530.0, 633.0, 576.0, 576.0, 579.0, 579.0, 522.0, 582.0, 582.0, 573.0, 633.0, 570.0, 579.0, 519.0, 584.0, 522.0, 579.0, 579.0, 579.0, 573.0, 582.0, 525.0, 579.0, 579.0, 630.0, 582.0, 579.0, 501.0, 579.0, 552.0, 522.0, 579.0, 522.0, 525.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 283.0, 295.0, 281.0, 285.0, 285.0, 274.0, 296.0, 285.0, 291.0, 291.0, 291.0, 288.0, 282.0, 282.0, 297.0, 289.0, 290.0, 287.0, 292.0, 255.0, 275.0, 289.0, 287.0, 284.0, 295.0, 285.0, 299.0, 290.0, 280.0, 293.0, 289.0, 261.0, 264.0, 308.0, 316.0, 314.0, 316.0, 290.0, 289.0, 288.0, 288.0, 288.0, 291.0, 285.0, 294.0, 262.0, 268.0, 258.0, 264.0, 284.0, 292.0, 172.0, 176.0, 288.0, 285.0, 285.0, 288.0, 293.0, 286.0, 286.0, 296.0, 316.0, 314.0, 290.0, 283.0, 294.0, 288.0, 290.0, 289.0, 285.0, 288.0, 290.0, 289.0, 257.0, 265.0, 290.0, 289.0, 296.0, 280.0, 293.0, 286.0, 277.0, 296.0, 259.0, 263.0, 269.0, 261.0, 288.0, 291.0, 296.0, 286.0, 317.0, 310.0, 285.0, 291.0, 286.0, 287.0, 291.0, 288.0, 290.0, 286.0, 288.0, 291.0, 282.0, 299.0, 297.0, 287.0, 290.0, 286.0, 311.0, 319.0, 285.0, 288.0, 291.0, 288.0, 291.0, 291.0, 262.0, 268.0, 284.0, 292.0, 291.0, 288.0, 290.0, 289.0, 229.0, 218.0, 291.0, 291.0, 260.0, 270.0, 314.0, 319.0, 290.0, 286.0, 288.0, 288.0, 284.0, 295.0, 293.0, 286.0, 254.0, 268.0, 286.0, 296.0, 291.0, 291.0, 281.0, 292.0, 308.0, 325.0, 299.0, 271.0, 288.0, 291.0, 261.0, 258.0, 293.0, 291.0, 267.0, 255.0, 284.0, 295.0, 291.0, 288.0, 289.0, 290.0, 292.0, 281.0, 293.0, 289.0, 261.0, 264.0, 283.0, 296.0, 290.0, 289.0, 311.0, 319.0, 286.0, 296.0, 287.0, 292.0, 247.0, 254.0, 288.0, 291.0, 270.0, 282.0, 256.0, 266.0, 289.0, 290.0, 260.0, 262.0, 245.0, 280.0, 290.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6970445040200594, "mean_inference_ms": 1.2446568491000776, "mean_action_processing_ms": 0.13368825935226153, "mean_env_wait_ms": 0.8390474799522084, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9958400, "num_agent_steps_trained": 9958400, "num_env_steps_sampled": 4979200, "num_env_steps_trained": 4979200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4979200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9958400, "timers": {"training_iteration_time_ms": 3643.09, "learn_time_ms": 1109.725, "learn_throughput": 11534.391, "synch_weights_time_ms": 11.803}, "counters": {"num_env_steps_sampled": 4979200, "num_env_steps_trained": 4979200, "num_agent_steps_sampled": 9958400, "num_agent_steps_trained": 9958400}, "done": false, "episodes_total": 12448, "training_iteration": 389, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-43", "timestamp": 1666581943, "time_this_iter_s": 3.614347457885742, "time_total_s": 1485.0892934799194, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1485.0892934799194, "timesteps_since_restore": 0, "iterations_since_restore": 389, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.499999999999996, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 198.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.1, "shaped_reward_min": 127, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.48, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.24, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.38, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.07, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.64, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.72, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.31, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.3, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.07, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.07, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -8.018617518246174e-05, "policy_loss": -0.0004352282849140465, "vf_loss": 7.723010063171387, "vf_explained_var": 0.5921529531478882, "kl": 0.002627419773489237, "entropy": 0.8345175981521606, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4992000, "num_env_steps_trained": 4992000, "num_agent_steps_sampled": 9984000, "num_agent_steps_trained": 9984000}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 447.0, "episode_reward_mean": 572.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 218.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 286.45}, "custom_metrics": {"sparse_reward_mean": 198.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.1, "shaped_reward_min": 127, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.48, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.24, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.38, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.07, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.64, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.72, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.31, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.3, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.07, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.07, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 582.0, 579.0, 573.0, 579.0, 522.0, 579.0, 576.0, 579.0, 573.0, 522.0, 530.0, 579.0, 582.0, 627.0, 576.0, 573.0, 579.0, 576.0, 579.0, 581.0, 584.0, 576.0, 630.0, 573.0, 579.0, 582.0, 530.0, 576.0, 579.0, 579.0, 447.0, 582.0, 530.0, 633.0, 576.0, 576.0, 579.0, 579.0, 522.0, 582.0, 582.0, 573.0, 633.0, 570.0, 579.0, 519.0, 584.0, 522.0, 579.0, 579.0, 579.0, 573.0, 582.0, 525.0, 579.0, 579.0, 630.0, 582.0, 579.0, 501.0, 579.0, 552.0, 522.0, 579.0, 522.0, 525.0, 579.0, 630.0, 522.0, 579.0, 630.0, 582.0, 579.0, 582.0, 630.0, 587.0, 579.0, 570.0, 570.0, 633.0, 570.0, 579.0, 579.0, 576.0, 525.0, 576.0, 582.0, 576.0, 522.0, 582.0, 570.0, 627.0, 579.0, 582.0, 576.0, 576.0, 579.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 283.0, 294.0, 288.0, 290.0, 289.0, 285.0, 288.0, 290.0, 289.0, 257.0, 265.0, 290.0, 289.0, 296.0, 280.0, 293.0, 286.0, 277.0, 296.0, 259.0, 263.0, 269.0, 261.0, 288.0, 291.0, 296.0, 286.0, 317.0, 310.0, 285.0, 291.0, 286.0, 287.0, 291.0, 288.0, 290.0, 286.0, 288.0, 291.0, 282.0, 299.0, 297.0, 287.0, 290.0, 286.0, 311.0, 319.0, 285.0, 288.0, 291.0, 288.0, 291.0, 291.0, 262.0, 268.0, 284.0, 292.0, 291.0, 288.0, 290.0, 289.0, 229.0, 218.0, 291.0, 291.0, 260.0, 270.0, 314.0, 319.0, 290.0, 286.0, 288.0, 288.0, 284.0, 295.0, 293.0, 286.0, 254.0, 268.0, 286.0, 296.0, 291.0, 291.0, 281.0, 292.0, 308.0, 325.0, 299.0, 271.0, 288.0, 291.0, 261.0, 258.0, 293.0, 291.0, 267.0, 255.0, 284.0, 295.0, 291.0, 288.0, 289.0, 290.0, 292.0, 281.0, 293.0, 289.0, 261.0, 264.0, 283.0, 296.0, 290.0, 289.0, 311.0, 319.0, 286.0, 296.0, 287.0, 292.0, 247.0, 254.0, 288.0, 291.0, 270.0, 282.0, 256.0, 266.0, 289.0, 290.0, 260.0, 262.0, 245.0, 280.0, 290.0, 289.0, 314.0, 316.0, 253.0, 269.0, 285.0, 294.0, 311.0, 319.0, 288.0, 294.0, 290.0, 289.0, 297.0, 285.0, 317.0, 313.0, 293.0, 294.0, 290.0, 289.0, 282.0, 288.0, 288.0, 282.0, 318.0, 315.0, 279.0, 291.0, 290.0, 289.0, 287.0, 292.0, 296.0, 280.0, 267.0, 258.0, 286.0, 290.0, 291.0, 291.0, 292.0, 284.0, 257.0, 265.0, 292.0, 290.0, 277.0, 293.0, 315.0, 312.0, 291.0, 288.0, 291.0, 291.0, 297.0, 279.0, 285.0, 291.0, 285.0, 294.0, 291.0, 291.0, 284.0, 295.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6969998730166297, "mean_inference_ms": 1.2445213151785859, "mean_action_processing_ms": 0.13368180538004462, "mean_env_wait_ms": 0.838974390971066, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 447.0, "episode_reward_mean": 572.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 218.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 286.45}, "hist_stats": {"episode_reward": [573.0, 582.0, 579.0, 573.0, 579.0, 522.0, 579.0, 576.0, 579.0, 573.0, 522.0, 530.0, 579.0, 582.0, 627.0, 576.0, 573.0, 579.0, 576.0, 579.0, 581.0, 584.0, 576.0, 630.0, 573.0, 579.0, 582.0, 530.0, 576.0, 579.0, 579.0, 447.0, 582.0, 530.0, 633.0, 576.0, 576.0, 579.0, 579.0, 522.0, 582.0, 582.0, 573.0, 633.0, 570.0, 579.0, 519.0, 584.0, 522.0, 579.0, 579.0, 579.0, 573.0, 582.0, 525.0, 579.0, 579.0, 630.0, 582.0, 579.0, 501.0, 579.0, 552.0, 522.0, 579.0, 522.0, 525.0, 579.0, 630.0, 522.0, 579.0, 630.0, 582.0, 579.0, 582.0, 630.0, 587.0, 579.0, 570.0, 570.0, 633.0, 570.0, 579.0, 579.0, 576.0, 525.0, 576.0, 582.0, 576.0, 522.0, 582.0, 570.0, 627.0, 579.0, 582.0, 576.0, 576.0, 579.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 283.0, 294.0, 288.0, 290.0, 289.0, 285.0, 288.0, 290.0, 289.0, 257.0, 265.0, 290.0, 289.0, 296.0, 280.0, 293.0, 286.0, 277.0, 296.0, 259.0, 263.0, 269.0, 261.0, 288.0, 291.0, 296.0, 286.0, 317.0, 310.0, 285.0, 291.0, 286.0, 287.0, 291.0, 288.0, 290.0, 286.0, 288.0, 291.0, 282.0, 299.0, 297.0, 287.0, 290.0, 286.0, 311.0, 319.0, 285.0, 288.0, 291.0, 288.0, 291.0, 291.0, 262.0, 268.0, 284.0, 292.0, 291.0, 288.0, 290.0, 289.0, 229.0, 218.0, 291.0, 291.0, 260.0, 270.0, 314.0, 319.0, 290.0, 286.0, 288.0, 288.0, 284.0, 295.0, 293.0, 286.0, 254.0, 268.0, 286.0, 296.0, 291.0, 291.0, 281.0, 292.0, 308.0, 325.0, 299.0, 271.0, 288.0, 291.0, 261.0, 258.0, 293.0, 291.0, 267.0, 255.0, 284.0, 295.0, 291.0, 288.0, 289.0, 290.0, 292.0, 281.0, 293.0, 289.0, 261.0, 264.0, 283.0, 296.0, 290.0, 289.0, 311.0, 319.0, 286.0, 296.0, 287.0, 292.0, 247.0, 254.0, 288.0, 291.0, 270.0, 282.0, 256.0, 266.0, 289.0, 290.0, 260.0, 262.0, 245.0, 280.0, 290.0, 289.0, 314.0, 316.0, 253.0, 269.0, 285.0, 294.0, 311.0, 319.0, 288.0, 294.0, 290.0, 289.0, 297.0, 285.0, 317.0, 313.0, 293.0, 294.0, 290.0, 289.0, 282.0, 288.0, 288.0, 282.0, 318.0, 315.0, 279.0, 291.0, 290.0, 289.0, 287.0, 292.0, 296.0, 280.0, 267.0, 258.0, 286.0, 290.0, 291.0, 291.0, 292.0, 284.0, 257.0, 265.0, 292.0, 290.0, 277.0, 293.0, 315.0, 312.0, 291.0, 288.0, 291.0, 291.0, 297.0, 279.0, 285.0, 291.0, 285.0, 294.0, 291.0, 291.0, 284.0, 295.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6969998730166297, "mean_inference_ms": 1.2445213151785859, "mean_action_processing_ms": 0.13368180538004462, "mean_env_wait_ms": 0.838974390971066, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9984000, "num_agent_steps_trained": 9984000, "num_env_steps_sampled": 4992000, "num_env_steps_trained": 4992000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4992000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9984000, "timers": {"training_iteration_time_ms": 3655.183, "learn_time_ms": 1119.61, "learn_throughput": 11432.549, "synch_weights_time_ms": 11.891}, "counters": {"num_env_steps_sampled": 4992000, "num_env_steps_trained": 4992000, "num_agent_steps_sampled": 9984000, "num_agent_steps_trained": 9984000}, "done": false, "episodes_total": 12480, "training_iteration": 390, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-47", "timestamp": 1666581947, "time_this_iter_s": 3.7840723991394043, "time_total_s": 1488.8733658790588, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1488.8733658790588, "timesteps_since_restore": 0, "iterations_since_restore": 390, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.833333333333332, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.72, "shaped_reward_min": 138, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.44, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.35, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.28, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.21, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.98, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.95, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.59, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.26, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.71, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.26, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.98, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.95, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.98, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.95, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0028976090252399445, "policy_loss": -0.003267057705670595, "vf_loss": 7.814639091491699, "vf_explained_var": 0.5801650881767273, "kl": 0.0025380898732692003, "entropy": 0.8240329027175903, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5004800, "num_env_steps_trained": 5004800, "num_agent_steps_sampled": 10009600, "num_agent_steps_trained": 10009600}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 465.0, "episode_reward_mean": 572.92, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 286.46}, "custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.72, "shaped_reward_min": 138, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.44, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.35, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.28, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.21, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.98, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.95, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.59, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.26, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.71, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.26, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.98, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.95, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.98, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.95, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 530.0, 633.0, 576.0, 576.0, 579.0, 579.0, 522.0, 582.0, 582.0, 573.0, 633.0, 570.0, 579.0, 519.0, 584.0, 522.0, 579.0, 579.0, 579.0, 573.0, 582.0, 525.0, 579.0, 579.0, 630.0, 582.0, 579.0, 501.0, 579.0, 552.0, 522.0, 579.0, 522.0, 525.0, 579.0, 630.0, 522.0, 579.0, 630.0, 582.0, 579.0, 582.0, 630.0, 587.0, 579.0, 570.0, 570.0, 633.0, 570.0, 579.0, 579.0, 576.0, 525.0, 576.0, 582.0, 576.0, 522.0, 582.0, 570.0, 627.0, 579.0, 582.0, 576.0, 576.0, 579.0, 582.0, 579.0, 579.0, 570.0, 582.0, 573.0, 582.0, 570.0, 582.0, 581.0, 567.0, 579.0, 519.0, 579.0, 581.0, 579.0, 570.0, 465.0, 576.0, 522.0, 627.0, 630.0, 582.0, 576.0, 579.0, 573.0, 579.0, 633.0, 576.0, 570.0, 498.0, 525.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 260.0, 270.0, 314.0, 319.0, 290.0, 286.0, 288.0, 288.0, 284.0, 295.0, 293.0, 286.0, 254.0, 268.0, 286.0, 296.0, 291.0, 291.0, 281.0, 292.0, 308.0, 325.0, 299.0, 271.0, 288.0, 291.0, 261.0, 258.0, 293.0, 291.0, 267.0, 255.0, 284.0, 295.0, 291.0, 288.0, 289.0, 290.0, 292.0, 281.0, 293.0, 289.0, 261.0, 264.0, 283.0, 296.0, 290.0, 289.0, 311.0, 319.0, 286.0, 296.0, 287.0, 292.0, 247.0, 254.0, 288.0, 291.0, 270.0, 282.0, 256.0, 266.0, 289.0, 290.0, 260.0, 262.0, 245.0, 280.0, 290.0, 289.0, 314.0, 316.0, 253.0, 269.0, 285.0, 294.0, 311.0, 319.0, 288.0, 294.0, 290.0, 289.0, 297.0, 285.0, 317.0, 313.0, 293.0, 294.0, 290.0, 289.0, 282.0, 288.0, 288.0, 282.0, 318.0, 315.0, 279.0, 291.0, 290.0, 289.0, 287.0, 292.0, 296.0, 280.0, 267.0, 258.0, 286.0, 290.0, 291.0, 291.0, 292.0, 284.0, 257.0, 265.0, 292.0, 290.0, 277.0, 293.0, 315.0, 312.0, 291.0, 288.0, 291.0, 291.0, 297.0, 279.0, 285.0, 291.0, 285.0, 294.0, 291.0, 291.0, 284.0, 295.0, 290.0, 289.0, 286.0, 284.0, 296.0, 286.0, 283.0, 290.0, 291.0, 291.0, 279.0, 291.0, 296.0, 286.0, 285.0, 296.0, 283.0, 284.0, 290.0, 289.0, 257.0, 262.0, 288.0, 291.0, 291.0, 290.0, 288.0, 291.0, 284.0, 286.0, 239.0, 226.0, 287.0, 289.0, 251.0, 271.0, 314.0, 313.0, 306.0, 324.0, 293.0, 289.0, 290.0, 286.0, 291.0, 288.0, 279.0, 294.0, 291.0, 288.0, 324.0, 309.0, 296.0, 280.0, 282.0, 288.0, 250.0, 248.0, 275.0, 250.0, 288.0, 291.0, 285.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6969574750027725, "mean_inference_ms": 1.244395143457617, "mean_action_processing_ms": 0.13367687287664567, "mean_env_wait_ms": 0.8389141369511748, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 465.0, "episode_reward_mean": 572.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 286.46}, "hist_stats": {"episode_reward": [582.0, 530.0, 633.0, 576.0, 576.0, 579.0, 579.0, 522.0, 582.0, 582.0, 573.0, 633.0, 570.0, 579.0, 519.0, 584.0, 522.0, 579.0, 579.0, 579.0, 573.0, 582.0, 525.0, 579.0, 579.0, 630.0, 582.0, 579.0, 501.0, 579.0, 552.0, 522.0, 579.0, 522.0, 525.0, 579.0, 630.0, 522.0, 579.0, 630.0, 582.0, 579.0, 582.0, 630.0, 587.0, 579.0, 570.0, 570.0, 633.0, 570.0, 579.0, 579.0, 576.0, 525.0, 576.0, 582.0, 576.0, 522.0, 582.0, 570.0, 627.0, 579.0, 582.0, 576.0, 576.0, 579.0, 582.0, 579.0, 579.0, 570.0, 582.0, 573.0, 582.0, 570.0, 582.0, 581.0, 567.0, 579.0, 519.0, 579.0, 581.0, 579.0, 570.0, 465.0, 576.0, 522.0, 627.0, 630.0, 582.0, 576.0, 579.0, 573.0, 579.0, 633.0, 576.0, 570.0, 498.0, 525.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 260.0, 270.0, 314.0, 319.0, 290.0, 286.0, 288.0, 288.0, 284.0, 295.0, 293.0, 286.0, 254.0, 268.0, 286.0, 296.0, 291.0, 291.0, 281.0, 292.0, 308.0, 325.0, 299.0, 271.0, 288.0, 291.0, 261.0, 258.0, 293.0, 291.0, 267.0, 255.0, 284.0, 295.0, 291.0, 288.0, 289.0, 290.0, 292.0, 281.0, 293.0, 289.0, 261.0, 264.0, 283.0, 296.0, 290.0, 289.0, 311.0, 319.0, 286.0, 296.0, 287.0, 292.0, 247.0, 254.0, 288.0, 291.0, 270.0, 282.0, 256.0, 266.0, 289.0, 290.0, 260.0, 262.0, 245.0, 280.0, 290.0, 289.0, 314.0, 316.0, 253.0, 269.0, 285.0, 294.0, 311.0, 319.0, 288.0, 294.0, 290.0, 289.0, 297.0, 285.0, 317.0, 313.0, 293.0, 294.0, 290.0, 289.0, 282.0, 288.0, 288.0, 282.0, 318.0, 315.0, 279.0, 291.0, 290.0, 289.0, 287.0, 292.0, 296.0, 280.0, 267.0, 258.0, 286.0, 290.0, 291.0, 291.0, 292.0, 284.0, 257.0, 265.0, 292.0, 290.0, 277.0, 293.0, 315.0, 312.0, 291.0, 288.0, 291.0, 291.0, 297.0, 279.0, 285.0, 291.0, 285.0, 294.0, 291.0, 291.0, 284.0, 295.0, 290.0, 289.0, 286.0, 284.0, 296.0, 286.0, 283.0, 290.0, 291.0, 291.0, 279.0, 291.0, 296.0, 286.0, 285.0, 296.0, 283.0, 284.0, 290.0, 289.0, 257.0, 262.0, 288.0, 291.0, 291.0, 290.0, 288.0, 291.0, 284.0, 286.0, 239.0, 226.0, 287.0, 289.0, 251.0, 271.0, 314.0, 313.0, 306.0, 324.0, 293.0, 289.0, 290.0, 286.0, 291.0, 288.0, 279.0, 294.0, 291.0, 288.0, 324.0, 309.0, 296.0, 280.0, 282.0, 288.0, 250.0, 248.0, 275.0, 250.0, 288.0, 291.0, 285.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6969574750027725, "mean_inference_ms": 1.244395143457617, "mean_action_processing_ms": 0.13367687287664567, "mean_env_wait_ms": 0.8389141369511748, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10009600, "num_agent_steps_trained": 10009600, "num_env_steps_sampled": 5004800, "num_env_steps_trained": 5004800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5004800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10009600, "timers": {"training_iteration_time_ms": 3662.928, "learn_time_ms": 1125.14, "learn_throughput": 11376.366, "synch_weights_time_ms": 13.044}, "counters": {"num_env_steps_sampled": 5004800, "num_env_steps_trained": 5004800, "num_agent_steps_sampled": 10009600, "num_agent_steps_trained": 10009600}, "done": false, "episodes_total": 12512, "training_iteration": 391, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-51", "timestamp": 1666581951, "time_this_iter_s": 3.6799001693725586, "time_total_s": 1492.5532660484314, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1492.5532660484314, "timesteps_since_restore": 0, "iterations_since_restore": 391, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.9, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 199.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.59, "shaped_reward_min": 138, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.47, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.26, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.32, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.89, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.1, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.71, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.4, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.89, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.1, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.89, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.1, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0016610324382781982, "policy_loss": -0.002023993991315365, "vf_loss": 7.828275680541992, "vf_explained_var": 0.5818252563476562, "kl": 0.003011357504874468, "entropy": 0.839729905128479, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5017600, "num_env_steps_trained": 5017600, "num_agent_steps_sampled": 10035200, "num_agent_steps_trained": 10035200}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 465.0, "episode_reward_mean": 574.99, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 287.495}, "custom_metrics": {"sparse_reward_mean": 199.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.59, "shaped_reward_min": 138, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.47, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.26, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.32, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.89, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.1, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.71, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.4, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.89, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.1, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.89, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.1, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 522.0, 525.0, 579.0, 630.0, 522.0, 579.0, 630.0, 582.0, 579.0, 582.0, 630.0, 587.0, 579.0, 570.0, 570.0, 633.0, 570.0, 579.0, 579.0, 576.0, 525.0, 576.0, 582.0, 576.0, 522.0, 582.0, 570.0, 627.0, 579.0, 582.0, 576.0, 576.0, 579.0, 582.0, 579.0, 579.0, 570.0, 582.0, 573.0, 582.0, 570.0, 582.0, 581.0, 567.0, 579.0, 519.0, 579.0, 581.0, 579.0, 570.0, 465.0, 576.0, 522.0, 627.0, 630.0, 582.0, 576.0, 579.0, 573.0, 579.0, 633.0, 576.0, 570.0, 498.0, 525.0, 579.0, 573.0, 579.0, 576.0, 581.0, 582.0, 579.0, 579.0, 582.0, 573.0, 579.0, 587.0, 582.0, 582.0, 579.0, 582.0, 579.0, 555.0, 584.0, 582.0, 579.0, 576.0, 582.0, 582.0, 581.0, 530.0, 576.0, 570.0, 570.0, 573.0, 576.0, 576.0, 582.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 290.0, 260.0, 262.0, 245.0, 280.0, 290.0, 289.0, 314.0, 316.0, 253.0, 269.0, 285.0, 294.0, 311.0, 319.0, 288.0, 294.0, 290.0, 289.0, 297.0, 285.0, 317.0, 313.0, 293.0, 294.0, 290.0, 289.0, 282.0, 288.0, 288.0, 282.0, 318.0, 315.0, 279.0, 291.0, 290.0, 289.0, 287.0, 292.0, 296.0, 280.0, 267.0, 258.0, 286.0, 290.0, 291.0, 291.0, 292.0, 284.0, 257.0, 265.0, 292.0, 290.0, 277.0, 293.0, 315.0, 312.0, 291.0, 288.0, 291.0, 291.0, 297.0, 279.0, 285.0, 291.0, 285.0, 294.0, 291.0, 291.0, 284.0, 295.0, 290.0, 289.0, 286.0, 284.0, 296.0, 286.0, 283.0, 290.0, 291.0, 291.0, 279.0, 291.0, 296.0, 286.0, 285.0, 296.0, 283.0, 284.0, 290.0, 289.0, 257.0, 262.0, 288.0, 291.0, 291.0, 290.0, 288.0, 291.0, 284.0, 286.0, 239.0, 226.0, 287.0, 289.0, 251.0, 271.0, 314.0, 313.0, 306.0, 324.0, 293.0, 289.0, 290.0, 286.0, 291.0, 288.0, 279.0, 294.0, 291.0, 288.0, 324.0, 309.0, 296.0, 280.0, 282.0, 288.0, 250.0, 248.0, 275.0, 250.0, 288.0, 291.0, 285.0, 288.0, 287.0, 292.0, 287.0, 289.0, 293.0, 288.0, 290.0, 292.0, 284.0, 295.0, 286.0, 293.0, 288.0, 294.0, 290.0, 283.0, 298.0, 281.0, 287.0, 300.0, 301.0, 281.0, 293.0, 289.0, 288.0, 291.0, 290.0, 292.0, 287.0, 292.0, 283.0, 272.0, 288.0, 296.0, 293.0, 289.0, 288.0, 291.0, 288.0, 288.0, 293.0, 289.0, 290.0, 292.0, 284.0, 297.0, 275.0, 255.0, 286.0, 290.0, 275.0, 295.0, 293.0, 277.0, 287.0, 286.0, 292.0, 284.0, 288.0, 288.0, 290.0, 292.0, 293.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6969268436790937, "mean_inference_ms": 1.2442765016526312, "mean_action_processing_ms": 0.13367506415155378, "mean_env_wait_ms": 0.8388712862759672, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 465.0, "episode_reward_mean": 574.99, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 287.495}, "hist_stats": {"episode_reward": [579.0, 522.0, 525.0, 579.0, 630.0, 522.0, 579.0, 630.0, 582.0, 579.0, 582.0, 630.0, 587.0, 579.0, 570.0, 570.0, 633.0, 570.0, 579.0, 579.0, 576.0, 525.0, 576.0, 582.0, 576.0, 522.0, 582.0, 570.0, 627.0, 579.0, 582.0, 576.0, 576.0, 579.0, 582.0, 579.0, 579.0, 570.0, 582.0, 573.0, 582.0, 570.0, 582.0, 581.0, 567.0, 579.0, 519.0, 579.0, 581.0, 579.0, 570.0, 465.0, 576.0, 522.0, 627.0, 630.0, 582.0, 576.0, 579.0, 573.0, 579.0, 633.0, 576.0, 570.0, 498.0, 525.0, 579.0, 573.0, 579.0, 576.0, 581.0, 582.0, 579.0, 579.0, 582.0, 573.0, 579.0, 587.0, 582.0, 582.0, 579.0, 582.0, 579.0, 555.0, 584.0, 582.0, 579.0, 576.0, 582.0, 582.0, 581.0, 530.0, 576.0, 570.0, 570.0, 573.0, 576.0, 576.0, 582.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 290.0, 260.0, 262.0, 245.0, 280.0, 290.0, 289.0, 314.0, 316.0, 253.0, 269.0, 285.0, 294.0, 311.0, 319.0, 288.0, 294.0, 290.0, 289.0, 297.0, 285.0, 317.0, 313.0, 293.0, 294.0, 290.0, 289.0, 282.0, 288.0, 288.0, 282.0, 318.0, 315.0, 279.0, 291.0, 290.0, 289.0, 287.0, 292.0, 296.0, 280.0, 267.0, 258.0, 286.0, 290.0, 291.0, 291.0, 292.0, 284.0, 257.0, 265.0, 292.0, 290.0, 277.0, 293.0, 315.0, 312.0, 291.0, 288.0, 291.0, 291.0, 297.0, 279.0, 285.0, 291.0, 285.0, 294.0, 291.0, 291.0, 284.0, 295.0, 290.0, 289.0, 286.0, 284.0, 296.0, 286.0, 283.0, 290.0, 291.0, 291.0, 279.0, 291.0, 296.0, 286.0, 285.0, 296.0, 283.0, 284.0, 290.0, 289.0, 257.0, 262.0, 288.0, 291.0, 291.0, 290.0, 288.0, 291.0, 284.0, 286.0, 239.0, 226.0, 287.0, 289.0, 251.0, 271.0, 314.0, 313.0, 306.0, 324.0, 293.0, 289.0, 290.0, 286.0, 291.0, 288.0, 279.0, 294.0, 291.0, 288.0, 324.0, 309.0, 296.0, 280.0, 282.0, 288.0, 250.0, 248.0, 275.0, 250.0, 288.0, 291.0, 285.0, 288.0, 287.0, 292.0, 287.0, 289.0, 293.0, 288.0, 290.0, 292.0, 284.0, 295.0, 286.0, 293.0, 288.0, 294.0, 290.0, 283.0, 298.0, 281.0, 287.0, 300.0, 301.0, 281.0, 293.0, 289.0, 288.0, 291.0, 290.0, 292.0, 287.0, 292.0, 283.0, 272.0, 288.0, 296.0, 293.0, 289.0, 288.0, 291.0, 288.0, 288.0, 293.0, 289.0, 290.0, 292.0, 284.0, 297.0, 275.0, 255.0, 286.0, 290.0, 275.0, 295.0, 293.0, 277.0, 287.0, 286.0, 292.0, 284.0, 288.0, 288.0, 290.0, 292.0, 293.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6969268436790937, "mean_inference_ms": 1.2442765016526312, "mean_action_processing_ms": 0.13367506415155378, "mean_env_wait_ms": 0.8388712862759672, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10035200, "num_agent_steps_trained": 10035200, "num_env_steps_sampled": 5017600, "num_env_steps_trained": 5017600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5017600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10035200, "timers": {"training_iteration_time_ms": 3669.587, "learn_time_ms": 1130.295, "learn_throughput": 11324.482, "synch_weights_time_ms": 12.194}, "counters": {"num_env_steps_sampled": 5017600, "num_env_steps_trained": 5017600, "num_agent_steps_sampled": 10035200, "num_agent_steps_trained": 10035200}, "done": false, "episodes_total": 12544, "training_iteration": 392, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-55", "timestamp": 1666581955, "time_this_iter_s": 3.7447876930236816, "time_total_s": 1496.298053741455, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1496.298053741455, "timesteps_since_restore": 0, "iterations_since_restore": 392, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.783333333333335, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 199.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.84, "shaped_reward_min": 138, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.48, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.32, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.33, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.14, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.0, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.98, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.41, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.0, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.98, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.0, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.98, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007807082729414105, "policy_loss": 0.0004165899008512497, "vf_loss": 7.79111385345459, "vf_explained_var": 0.5709396600723267, "kl": 0.0023730946704745293, "entropy": 0.8299859762191772, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5030400, "num_env_steps_trained": 5030400, "num_agent_steps_sampled": 10060800, "num_agent_steps_trained": 10060800}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 465.0, "episode_reward_mean": 575.24, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 287.62}, "custom_metrics": {"sparse_reward_mean": 199.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.84, "shaped_reward_min": 138, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.48, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.32, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.33, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.14, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.0, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.98, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.41, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.0, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.98, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.0, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.98, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 579.0, 582.0, 579.0, 579.0, 570.0, 582.0, 573.0, 582.0, 570.0, 582.0, 581.0, 567.0, 579.0, 519.0, 579.0, 581.0, 579.0, 570.0, 465.0, 576.0, 522.0, 627.0, 630.0, 582.0, 576.0, 579.0, 573.0, 579.0, 633.0, 576.0, 570.0, 498.0, 525.0, 579.0, 573.0, 579.0, 576.0, 581.0, 582.0, 579.0, 579.0, 582.0, 573.0, 579.0, 587.0, 582.0, 582.0, 579.0, 582.0, 579.0, 555.0, 584.0, 582.0, 579.0, 576.0, 582.0, 582.0, 581.0, 530.0, 576.0, 570.0, 570.0, 573.0, 576.0, 576.0, 582.0, 573.0, 582.0, 582.0, 579.0, 570.0, 576.0, 582.0, 587.0, 587.0, 582.0, 582.0, 579.0, 530.0, 627.0, 576.0, 579.0, 573.0, 582.0, 525.0, 579.0, 627.0, 576.0, 579.0, 579.0, 576.0, 627.0, 576.0, 579.0, 573.0, 573.0, 582.0, 525.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 285.0, 294.0, 291.0, 291.0, 284.0, 295.0, 290.0, 289.0, 286.0, 284.0, 296.0, 286.0, 283.0, 290.0, 291.0, 291.0, 279.0, 291.0, 296.0, 286.0, 285.0, 296.0, 283.0, 284.0, 290.0, 289.0, 257.0, 262.0, 288.0, 291.0, 291.0, 290.0, 288.0, 291.0, 284.0, 286.0, 239.0, 226.0, 287.0, 289.0, 251.0, 271.0, 314.0, 313.0, 306.0, 324.0, 293.0, 289.0, 290.0, 286.0, 291.0, 288.0, 279.0, 294.0, 291.0, 288.0, 324.0, 309.0, 296.0, 280.0, 282.0, 288.0, 250.0, 248.0, 275.0, 250.0, 288.0, 291.0, 285.0, 288.0, 287.0, 292.0, 287.0, 289.0, 293.0, 288.0, 290.0, 292.0, 284.0, 295.0, 286.0, 293.0, 288.0, 294.0, 290.0, 283.0, 298.0, 281.0, 287.0, 300.0, 301.0, 281.0, 293.0, 289.0, 288.0, 291.0, 290.0, 292.0, 287.0, 292.0, 283.0, 272.0, 288.0, 296.0, 293.0, 289.0, 288.0, 291.0, 288.0, 288.0, 293.0, 289.0, 290.0, 292.0, 284.0, 297.0, 275.0, 255.0, 286.0, 290.0, 275.0, 295.0, 293.0, 277.0, 287.0, 286.0, 292.0, 284.0, 288.0, 288.0, 290.0, 292.0, 293.0, 280.0, 294.0, 288.0, 288.0, 294.0, 285.0, 294.0, 295.0, 275.0, 291.0, 285.0, 290.0, 292.0, 290.0, 297.0, 295.0, 292.0, 296.0, 286.0, 293.0, 289.0, 291.0, 288.0, 257.0, 273.0, 311.0, 316.0, 288.0, 288.0, 278.0, 301.0, 283.0, 290.0, 289.0, 293.0, 259.0, 266.0, 296.0, 283.0, 313.0, 314.0, 290.0, 286.0, 285.0, 294.0, 282.0, 297.0, 293.0, 283.0, 311.0, 316.0, 296.0, 280.0, 291.0, 288.0, 293.0, 280.0, 293.0, 280.0, 288.0, 294.0, 263.0, 262.0, 285.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6968894112824401, "mean_inference_ms": 1.2441523884701358, "mean_action_processing_ms": 0.1336716106768715, "mean_env_wait_ms": 0.8388130845267634, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 465.0, "episode_reward_mean": 575.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 287.62}, "hist_stats": {"episode_reward": [576.0, 579.0, 582.0, 579.0, 579.0, 570.0, 582.0, 573.0, 582.0, 570.0, 582.0, 581.0, 567.0, 579.0, 519.0, 579.0, 581.0, 579.0, 570.0, 465.0, 576.0, 522.0, 627.0, 630.0, 582.0, 576.0, 579.0, 573.0, 579.0, 633.0, 576.0, 570.0, 498.0, 525.0, 579.0, 573.0, 579.0, 576.0, 581.0, 582.0, 579.0, 579.0, 582.0, 573.0, 579.0, 587.0, 582.0, 582.0, 579.0, 582.0, 579.0, 555.0, 584.0, 582.0, 579.0, 576.0, 582.0, 582.0, 581.0, 530.0, 576.0, 570.0, 570.0, 573.0, 576.0, 576.0, 582.0, 573.0, 582.0, 582.0, 579.0, 570.0, 576.0, 582.0, 587.0, 587.0, 582.0, 582.0, 579.0, 530.0, 627.0, 576.0, 579.0, 573.0, 582.0, 525.0, 579.0, 627.0, 576.0, 579.0, 579.0, 576.0, 627.0, 576.0, 579.0, 573.0, 573.0, 582.0, 525.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 285.0, 294.0, 291.0, 291.0, 284.0, 295.0, 290.0, 289.0, 286.0, 284.0, 296.0, 286.0, 283.0, 290.0, 291.0, 291.0, 279.0, 291.0, 296.0, 286.0, 285.0, 296.0, 283.0, 284.0, 290.0, 289.0, 257.0, 262.0, 288.0, 291.0, 291.0, 290.0, 288.0, 291.0, 284.0, 286.0, 239.0, 226.0, 287.0, 289.0, 251.0, 271.0, 314.0, 313.0, 306.0, 324.0, 293.0, 289.0, 290.0, 286.0, 291.0, 288.0, 279.0, 294.0, 291.0, 288.0, 324.0, 309.0, 296.0, 280.0, 282.0, 288.0, 250.0, 248.0, 275.0, 250.0, 288.0, 291.0, 285.0, 288.0, 287.0, 292.0, 287.0, 289.0, 293.0, 288.0, 290.0, 292.0, 284.0, 295.0, 286.0, 293.0, 288.0, 294.0, 290.0, 283.0, 298.0, 281.0, 287.0, 300.0, 301.0, 281.0, 293.0, 289.0, 288.0, 291.0, 290.0, 292.0, 287.0, 292.0, 283.0, 272.0, 288.0, 296.0, 293.0, 289.0, 288.0, 291.0, 288.0, 288.0, 293.0, 289.0, 290.0, 292.0, 284.0, 297.0, 275.0, 255.0, 286.0, 290.0, 275.0, 295.0, 293.0, 277.0, 287.0, 286.0, 292.0, 284.0, 288.0, 288.0, 290.0, 292.0, 293.0, 280.0, 294.0, 288.0, 288.0, 294.0, 285.0, 294.0, 295.0, 275.0, 291.0, 285.0, 290.0, 292.0, 290.0, 297.0, 295.0, 292.0, 296.0, 286.0, 293.0, 289.0, 291.0, 288.0, 257.0, 273.0, 311.0, 316.0, 288.0, 288.0, 278.0, 301.0, 283.0, 290.0, 289.0, 293.0, 259.0, 266.0, 296.0, 283.0, 313.0, 314.0, 290.0, 286.0, 285.0, 294.0, 282.0, 297.0, 293.0, 283.0, 311.0, 316.0, 296.0, 280.0, 291.0, 288.0, 293.0, 280.0, 293.0, 280.0, 288.0, 294.0, 263.0, 262.0, 285.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6968894112824401, "mean_inference_ms": 1.2441523884701358, "mean_action_processing_ms": 0.1336716106768715, "mean_env_wait_ms": 0.8388130845267634, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10060800, "num_agent_steps_trained": 10060800, "num_env_steps_sampled": 5030400, "num_env_steps_trained": 5030400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5030400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10060800, "timers": {"training_iteration_time_ms": 3647.035, "learn_time_ms": 1133.033, "learn_throughput": 11297.107, "synch_weights_time_ms": 11.676}, "counters": {"num_env_steps_sampled": 5030400, "num_env_steps_trained": 5030400, "num_agent_steps_sampled": 10060800, "num_agent_steps_trained": 10060800}, "done": false, "episodes_total": 12576, "training_iteration": 393, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-59", "timestamp": 1666581959, "time_this_iter_s": 3.6797895431518555, "time_total_s": 1499.977843284607, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1499.977843284607, "timesteps_since_restore": 0, "iterations_since_restore": 393, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.03333333333333, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.01, "shaped_reward_min": 138, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.48, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.2, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.4, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.05, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.03, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.82, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.7, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.28, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.03, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.82, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.03, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.82, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002479029353708029, "policy_loss": -0.0028482386842370033, "vf_loss": 7.850776672363281, "vf_explained_var": 0.5610437393188477, "kl": 0.0021069832146167755, "entropy": 0.8317380547523499, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5043200, "num_env_steps_trained": 5043200, "num_agent_steps_sampled": 10086400, "num_agent_steps_trained": 10086400}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 419.0, "episode_reward_mean": 571.61, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 285.805}, "custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.01, "shaped_reward_min": 138, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.48, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.2, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.4, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.05, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.03, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.82, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.7, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.28, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.03, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.82, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.03, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.82, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [498.0, 525.0, 579.0, 573.0, 579.0, 576.0, 581.0, 582.0, 579.0, 579.0, 582.0, 573.0, 579.0, 587.0, 582.0, 582.0, 579.0, 582.0, 579.0, 555.0, 584.0, 582.0, 579.0, 576.0, 582.0, 582.0, 581.0, 530.0, 576.0, 570.0, 570.0, 573.0, 576.0, 576.0, 582.0, 573.0, 582.0, 582.0, 579.0, 570.0, 576.0, 582.0, 587.0, 587.0, 582.0, 582.0, 579.0, 530.0, 627.0, 576.0, 579.0, 573.0, 582.0, 525.0, 579.0, 627.0, 576.0, 579.0, 579.0, 576.0, 627.0, 576.0, 579.0, 573.0, 573.0, 582.0, 525.0, 573.0, 570.0, 579.0, 419.0, 579.0, 582.0, 579.0, 579.0, 587.0, 582.0, 573.0, 576.0, 573.0, 582.0, 582.0, 570.0, 582.0, 570.0, 573.0, 587.0, 570.0, 573.0, 576.0, 570.0, 579.0, 530.0, 567.0, 570.0, 465.0, 527.0, 582.0, 522.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [250.0, 248.0, 275.0, 250.0, 288.0, 291.0, 285.0, 288.0, 287.0, 292.0, 287.0, 289.0, 293.0, 288.0, 290.0, 292.0, 284.0, 295.0, 286.0, 293.0, 288.0, 294.0, 290.0, 283.0, 298.0, 281.0, 287.0, 300.0, 301.0, 281.0, 293.0, 289.0, 288.0, 291.0, 290.0, 292.0, 287.0, 292.0, 283.0, 272.0, 288.0, 296.0, 293.0, 289.0, 288.0, 291.0, 288.0, 288.0, 293.0, 289.0, 290.0, 292.0, 284.0, 297.0, 275.0, 255.0, 286.0, 290.0, 275.0, 295.0, 293.0, 277.0, 287.0, 286.0, 292.0, 284.0, 288.0, 288.0, 290.0, 292.0, 293.0, 280.0, 294.0, 288.0, 288.0, 294.0, 285.0, 294.0, 295.0, 275.0, 291.0, 285.0, 290.0, 292.0, 290.0, 297.0, 295.0, 292.0, 296.0, 286.0, 293.0, 289.0, 291.0, 288.0, 257.0, 273.0, 311.0, 316.0, 288.0, 288.0, 278.0, 301.0, 283.0, 290.0, 289.0, 293.0, 259.0, 266.0, 296.0, 283.0, 313.0, 314.0, 290.0, 286.0, 285.0, 294.0, 282.0, 297.0, 293.0, 283.0, 311.0, 316.0, 296.0, 280.0, 291.0, 288.0, 293.0, 280.0, 293.0, 280.0, 288.0, 294.0, 263.0, 262.0, 285.0, 288.0, 284.0, 286.0, 284.0, 295.0, 218.0, 201.0, 295.0, 284.0, 288.0, 294.0, 288.0, 291.0, 293.0, 286.0, 290.0, 297.0, 294.0, 288.0, 294.0, 279.0, 285.0, 291.0, 288.0, 285.0, 290.0, 292.0, 285.0, 297.0, 279.0, 291.0, 292.0, 290.0, 285.0, 285.0, 278.0, 295.0, 291.0, 296.0, 279.0, 291.0, 284.0, 289.0, 280.0, 296.0, 279.0, 291.0, 290.0, 289.0, 264.0, 266.0, 283.0, 284.0, 279.0, 291.0, 235.0, 230.0, 265.0, 262.0, 286.0, 296.0, 262.0, 260.0, 288.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6968450632443728, "mean_inference_ms": 1.2440176732192514, "mean_action_processing_ms": 0.1336656129888326, "mean_env_wait_ms": 0.838737991964549, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 419.0, "episode_reward_mean": 571.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 285.805}, "hist_stats": {"episode_reward": [498.0, 525.0, 579.0, 573.0, 579.0, 576.0, 581.0, 582.0, 579.0, 579.0, 582.0, 573.0, 579.0, 587.0, 582.0, 582.0, 579.0, 582.0, 579.0, 555.0, 584.0, 582.0, 579.0, 576.0, 582.0, 582.0, 581.0, 530.0, 576.0, 570.0, 570.0, 573.0, 576.0, 576.0, 582.0, 573.0, 582.0, 582.0, 579.0, 570.0, 576.0, 582.0, 587.0, 587.0, 582.0, 582.0, 579.0, 530.0, 627.0, 576.0, 579.0, 573.0, 582.0, 525.0, 579.0, 627.0, 576.0, 579.0, 579.0, 576.0, 627.0, 576.0, 579.0, 573.0, 573.0, 582.0, 525.0, 573.0, 570.0, 579.0, 419.0, 579.0, 582.0, 579.0, 579.0, 587.0, 582.0, 573.0, 576.0, 573.0, 582.0, 582.0, 570.0, 582.0, 570.0, 573.0, 587.0, 570.0, 573.0, 576.0, 570.0, 579.0, 530.0, 567.0, 570.0, 465.0, 527.0, 582.0, 522.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [250.0, 248.0, 275.0, 250.0, 288.0, 291.0, 285.0, 288.0, 287.0, 292.0, 287.0, 289.0, 293.0, 288.0, 290.0, 292.0, 284.0, 295.0, 286.0, 293.0, 288.0, 294.0, 290.0, 283.0, 298.0, 281.0, 287.0, 300.0, 301.0, 281.0, 293.0, 289.0, 288.0, 291.0, 290.0, 292.0, 287.0, 292.0, 283.0, 272.0, 288.0, 296.0, 293.0, 289.0, 288.0, 291.0, 288.0, 288.0, 293.0, 289.0, 290.0, 292.0, 284.0, 297.0, 275.0, 255.0, 286.0, 290.0, 275.0, 295.0, 293.0, 277.0, 287.0, 286.0, 292.0, 284.0, 288.0, 288.0, 290.0, 292.0, 293.0, 280.0, 294.0, 288.0, 288.0, 294.0, 285.0, 294.0, 295.0, 275.0, 291.0, 285.0, 290.0, 292.0, 290.0, 297.0, 295.0, 292.0, 296.0, 286.0, 293.0, 289.0, 291.0, 288.0, 257.0, 273.0, 311.0, 316.0, 288.0, 288.0, 278.0, 301.0, 283.0, 290.0, 289.0, 293.0, 259.0, 266.0, 296.0, 283.0, 313.0, 314.0, 290.0, 286.0, 285.0, 294.0, 282.0, 297.0, 293.0, 283.0, 311.0, 316.0, 296.0, 280.0, 291.0, 288.0, 293.0, 280.0, 293.0, 280.0, 288.0, 294.0, 263.0, 262.0, 285.0, 288.0, 284.0, 286.0, 284.0, 295.0, 218.0, 201.0, 295.0, 284.0, 288.0, 294.0, 288.0, 291.0, 293.0, 286.0, 290.0, 297.0, 294.0, 288.0, 294.0, 279.0, 285.0, 291.0, 288.0, 285.0, 290.0, 292.0, 285.0, 297.0, 279.0, 291.0, 292.0, 290.0, 285.0, 285.0, 278.0, 295.0, 291.0, 296.0, 279.0, 291.0, 284.0, 289.0, 280.0, 296.0, 279.0, 291.0, 290.0, 289.0, 264.0, 266.0, 283.0, 284.0, 279.0, 291.0, 235.0, 230.0, 265.0, 262.0, 286.0, 296.0, 262.0, 260.0, 288.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6968450632443728, "mean_inference_ms": 1.2440176732192514, "mean_action_processing_ms": 0.1336656129888326, "mean_env_wait_ms": 0.838737991964549, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10086400, "num_agent_steps_trained": 10086400, "num_env_steps_sampled": 5043200, "num_env_steps_trained": 5043200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5043200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10086400, "timers": {"training_iteration_time_ms": 3612.599, "learn_time_ms": 1126.168, "learn_throughput": 11365.974, "synch_weights_time_ms": 11.582}, "counters": {"num_env_steps_sampled": 5043200, "num_env_steps_trained": 5043200, "num_agent_steps_sampled": 10086400, "num_agent_steps_trained": 10086400}, "done": false, "episodes_total": 12608, "training_iteration": 394, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-02", "timestamp": 1666581962, "time_this_iter_s": 3.534458875656128, "time_total_s": 1503.512302160263, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1503.512302160263, "timesteps_since_restore": 0, "iterations_since_restore": 394, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.339999999999996, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 175.26, "shaped_reward_min": 85, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.8, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.84, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.72, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.69, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.41, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.29, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.41, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.29, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.41, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.29, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.000399833545088768, "policy_loss": -0.000769574660807848, "vf_loss": 7.834109306335449, "vf_explained_var": 0.580340564250946, "kl": 0.0025107176043093204, "entropy": 0.827340841293335, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5056000, "num_env_steps_trained": 5056000, "num_agent_steps_sampled": 10112000, "num_agent_steps_trained": 10112000}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 205.0, "episode_reward_mean": 568.86, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 284.43}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 175.26, "shaped_reward_min": 85, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.8, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.84, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.72, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.69, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.41, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.29, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.41, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.29, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.41, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.29, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 576.0, 582.0, 573.0, 582.0, 582.0, 579.0, 570.0, 576.0, 582.0, 587.0, 587.0, 582.0, 582.0, 579.0, 530.0, 627.0, 576.0, 579.0, 573.0, 582.0, 525.0, 579.0, 627.0, 576.0, 579.0, 579.0, 576.0, 627.0, 576.0, 579.0, 573.0, 573.0, 582.0, 525.0, 573.0, 570.0, 579.0, 419.0, 579.0, 582.0, 579.0, 579.0, 587.0, 582.0, 573.0, 576.0, 573.0, 582.0, 582.0, 570.0, 582.0, 570.0, 573.0, 587.0, 570.0, 573.0, 576.0, 570.0, 579.0, 530.0, 567.0, 570.0, 465.0, 527.0, 582.0, 522.0, 579.0, 579.0, 582.0, 579.0, 498.0, 582.0, 570.0, 582.0, 579.0, 576.0, 573.0, 570.0, 579.0, 570.0, 576.0, 205.0, 587.0, 582.0, 576.0, 579.0, 582.0, 579.0, 584.0, 573.0, 579.0, 579.0, 633.0, 525.0, 573.0, 582.0, 573.0, 573.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 284.0, 288.0, 288.0, 290.0, 292.0, 293.0, 280.0, 294.0, 288.0, 288.0, 294.0, 285.0, 294.0, 295.0, 275.0, 291.0, 285.0, 290.0, 292.0, 290.0, 297.0, 295.0, 292.0, 296.0, 286.0, 293.0, 289.0, 291.0, 288.0, 257.0, 273.0, 311.0, 316.0, 288.0, 288.0, 278.0, 301.0, 283.0, 290.0, 289.0, 293.0, 259.0, 266.0, 296.0, 283.0, 313.0, 314.0, 290.0, 286.0, 285.0, 294.0, 282.0, 297.0, 293.0, 283.0, 311.0, 316.0, 296.0, 280.0, 291.0, 288.0, 293.0, 280.0, 293.0, 280.0, 288.0, 294.0, 263.0, 262.0, 285.0, 288.0, 284.0, 286.0, 284.0, 295.0, 218.0, 201.0, 295.0, 284.0, 288.0, 294.0, 288.0, 291.0, 293.0, 286.0, 290.0, 297.0, 294.0, 288.0, 294.0, 279.0, 285.0, 291.0, 288.0, 285.0, 290.0, 292.0, 285.0, 297.0, 279.0, 291.0, 292.0, 290.0, 285.0, 285.0, 278.0, 295.0, 291.0, 296.0, 279.0, 291.0, 284.0, 289.0, 280.0, 296.0, 279.0, 291.0, 290.0, 289.0, 264.0, 266.0, 283.0, 284.0, 279.0, 291.0, 235.0, 230.0, 265.0, 262.0, 286.0, 296.0, 262.0, 260.0, 288.0, 291.0, 294.0, 285.0, 293.0, 289.0, 290.0, 289.0, 241.0, 257.0, 291.0, 291.0, 286.0, 284.0, 291.0, 291.0, 291.0, 288.0, 285.0, 291.0, 286.0, 287.0, 275.0, 295.0, 285.0, 294.0, 285.0, 285.0, 294.0, 282.0, 108.0, 97.0, 296.0, 291.0, 290.0, 292.0, 284.0, 292.0, 292.0, 287.0, 290.0, 292.0, 293.0, 286.0, 302.0, 282.0, 283.0, 290.0, 286.0, 293.0, 285.0, 294.0, 316.0, 317.0, 269.0, 256.0, 293.0, 280.0, 291.0, 291.0, 280.0, 293.0, 291.0, 282.0, 293.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.696793336703791, "mean_inference_ms": 1.243883291734237, "mean_action_processing_ms": 0.1336582079706314, "mean_env_wait_ms": 0.8386582320927474, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 205.0, "episode_reward_mean": 568.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 284.43}, "hist_stats": {"episode_reward": [576.0, 576.0, 582.0, 573.0, 582.0, 582.0, 579.0, 570.0, 576.0, 582.0, 587.0, 587.0, 582.0, 582.0, 579.0, 530.0, 627.0, 576.0, 579.0, 573.0, 582.0, 525.0, 579.0, 627.0, 576.0, 579.0, 579.0, 576.0, 627.0, 576.0, 579.0, 573.0, 573.0, 582.0, 525.0, 573.0, 570.0, 579.0, 419.0, 579.0, 582.0, 579.0, 579.0, 587.0, 582.0, 573.0, 576.0, 573.0, 582.0, 582.0, 570.0, 582.0, 570.0, 573.0, 587.0, 570.0, 573.0, 576.0, 570.0, 579.0, 530.0, 567.0, 570.0, 465.0, 527.0, 582.0, 522.0, 579.0, 579.0, 582.0, 579.0, 498.0, 582.0, 570.0, 582.0, 579.0, 576.0, 573.0, 570.0, 579.0, 570.0, 576.0, 205.0, 587.0, 582.0, 576.0, 579.0, 582.0, 579.0, 584.0, 573.0, 579.0, 579.0, 633.0, 525.0, 573.0, 582.0, 573.0, 573.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 284.0, 288.0, 288.0, 290.0, 292.0, 293.0, 280.0, 294.0, 288.0, 288.0, 294.0, 285.0, 294.0, 295.0, 275.0, 291.0, 285.0, 290.0, 292.0, 290.0, 297.0, 295.0, 292.0, 296.0, 286.0, 293.0, 289.0, 291.0, 288.0, 257.0, 273.0, 311.0, 316.0, 288.0, 288.0, 278.0, 301.0, 283.0, 290.0, 289.0, 293.0, 259.0, 266.0, 296.0, 283.0, 313.0, 314.0, 290.0, 286.0, 285.0, 294.0, 282.0, 297.0, 293.0, 283.0, 311.0, 316.0, 296.0, 280.0, 291.0, 288.0, 293.0, 280.0, 293.0, 280.0, 288.0, 294.0, 263.0, 262.0, 285.0, 288.0, 284.0, 286.0, 284.0, 295.0, 218.0, 201.0, 295.0, 284.0, 288.0, 294.0, 288.0, 291.0, 293.0, 286.0, 290.0, 297.0, 294.0, 288.0, 294.0, 279.0, 285.0, 291.0, 288.0, 285.0, 290.0, 292.0, 285.0, 297.0, 279.0, 291.0, 292.0, 290.0, 285.0, 285.0, 278.0, 295.0, 291.0, 296.0, 279.0, 291.0, 284.0, 289.0, 280.0, 296.0, 279.0, 291.0, 290.0, 289.0, 264.0, 266.0, 283.0, 284.0, 279.0, 291.0, 235.0, 230.0, 265.0, 262.0, 286.0, 296.0, 262.0, 260.0, 288.0, 291.0, 294.0, 285.0, 293.0, 289.0, 290.0, 289.0, 241.0, 257.0, 291.0, 291.0, 286.0, 284.0, 291.0, 291.0, 291.0, 288.0, 285.0, 291.0, 286.0, 287.0, 275.0, 295.0, 285.0, 294.0, 285.0, 285.0, 294.0, 282.0, 108.0, 97.0, 296.0, 291.0, 290.0, 292.0, 284.0, 292.0, 292.0, 287.0, 290.0, 292.0, 293.0, 286.0, 302.0, 282.0, 283.0, 290.0, 286.0, 293.0, 285.0, 294.0, 316.0, 317.0, 269.0, 256.0, 293.0, 280.0, 291.0, 291.0, 280.0, 293.0, 291.0, 282.0, 293.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.696793336703791, "mean_inference_ms": 1.243883291734237, "mean_action_processing_ms": 0.1336582079706314, "mean_env_wait_ms": 0.8386582320927474, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10112000, "num_agent_steps_trained": 10112000, "num_env_steps_sampled": 5056000, "num_env_steps_trained": 5056000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5056000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10112000, "timers": {"training_iteration_time_ms": 3593.292, "learn_time_ms": 1112.946, "learn_throughput": 11501.003, "synch_weights_time_ms": 11.562}, "counters": {"num_env_steps_sampled": 5056000, "num_env_steps_trained": 5056000, "num_agent_steps_sampled": 10112000, "num_agent_steps_trained": 10112000}, "done": false, "episodes_total": 12640, "training_iteration": 395, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-06", "timestamp": 1666581966, "time_this_iter_s": 3.558100461959839, "time_total_s": 1507.070402622223, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1507.070402622223, "timesteps_since_restore": 0, "iterations_since_restore": 395, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.6, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.93, "shaped_reward_min": 85, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.79, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.75, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.67, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.59, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.31, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.21, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.09, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.0, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.31, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.21, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.31, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.21, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011330365668982267, "policy_loss": 0.0007607118459418416, "vf_loss": 7.895255088806152, "vf_explained_var": 0.5666273236274719, "kl": 0.0026027029380202293, "entropy": 0.8344019055366516, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5068800, "num_env_steps_trained": 5068800, "num_agent_steps_sampled": 10137600, "num_agent_steps_trained": 10137600}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 205.0, "episode_reward_mean": 565.13, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 282.565}, "custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.93, "shaped_reward_min": 85, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.79, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.75, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.67, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.59, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.31, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.21, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.09, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.0, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.31, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.21, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.31, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.21, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 582.0, 525.0, 573.0, 570.0, 579.0, 419.0, 579.0, 582.0, 579.0, 579.0, 587.0, 582.0, 573.0, 576.0, 573.0, 582.0, 582.0, 570.0, 582.0, 570.0, 573.0, 587.0, 570.0, 573.0, 576.0, 570.0, 579.0, 530.0, 567.0, 570.0, 465.0, 527.0, 582.0, 522.0, 579.0, 579.0, 582.0, 579.0, 498.0, 582.0, 570.0, 582.0, 579.0, 576.0, 573.0, 570.0, 579.0, 570.0, 576.0, 205.0, 587.0, 582.0, 576.0, 579.0, 582.0, 579.0, 584.0, 573.0, 579.0, 579.0, 633.0, 525.0, 573.0, 582.0, 573.0, 573.0, 582.0, 525.0, 573.0, 570.0, 576.0, 516.0, 627.0, 579.0, 579.0, 573.0, 579.0, 582.0, 579.0, 576.0, 579.0, 576.0, 525.0, 627.0, 576.0, 530.0, 522.0, 582.0, 579.0, 573.0, 579.0, 573.0, 525.0, 576.0, 582.0, 576.0, 570.0, 525.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 280.0, 288.0, 294.0, 263.0, 262.0, 285.0, 288.0, 284.0, 286.0, 284.0, 295.0, 218.0, 201.0, 295.0, 284.0, 288.0, 294.0, 288.0, 291.0, 293.0, 286.0, 290.0, 297.0, 294.0, 288.0, 294.0, 279.0, 285.0, 291.0, 288.0, 285.0, 290.0, 292.0, 285.0, 297.0, 279.0, 291.0, 292.0, 290.0, 285.0, 285.0, 278.0, 295.0, 291.0, 296.0, 279.0, 291.0, 284.0, 289.0, 280.0, 296.0, 279.0, 291.0, 290.0, 289.0, 264.0, 266.0, 283.0, 284.0, 279.0, 291.0, 235.0, 230.0, 265.0, 262.0, 286.0, 296.0, 262.0, 260.0, 288.0, 291.0, 294.0, 285.0, 293.0, 289.0, 290.0, 289.0, 241.0, 257.0, 291.0, 291.0, 286.0, 284.0, 291.0, 291.0, 291.0, 288.0, 285.0, 291.0, 286.0, 287.0, 275.0, 295.0, 285.0, 294.0, 285.0, 285.0, 294.0, 282.0, 108.0, 97.0, 296.0, 291.0, 290.0, 292.0, 284.0, 292.0, 292.0, 287.0, 290.0, 292.0, 293.0, 286.0, 302.0, 282.0, 283.0, 290.0, 286.0, 293.0, 285.0, 294.0, 316.0, 317.0, 269.0, 256.0, 293.0, 280.0, 291.0, 291.0, 280.0, 293.0, 291.0, 282.0, 293.0, 289.0, 259.0, 266.0, 289.0, 284.0, 282.0, 288.0, 293.0, 283.0, 251.0, 265.0, 310.0, 317.0, 290.0, 289.0, 282.0, 297.0, 287.0, 286.0, 288.0, 291.0, 288.0, 294.0, 294.0, 285.0, 287.0, 289.0, 289.0, 290.0, 291.0, 285.0, 260.0, 265.0, 313.0, 314.0, 285.0, 291.0, 262.0, 268.0, 261.0, 261.0, 294.0, 288.0, 285.0, 294.0, 285.0, 288.0, 293.0, 286.0, 285.0, 288.0, 260.0, 265.0, 289.0, 287.0, 287.0, 295.0, 285.0, 291.0, 285.0, 285.0, 263.0, 262.0, 275.0, 301.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6967359713771896, "mean_inference_ms": 1.2437290472164755, "mean_action_processing_ms": 0.13364784707998414, "mean_env_wait_ms": 0.838560435275775, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 205.0, "episode_reward_mean": 565.13, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 282.565}, "hist_stats": {"episode_reward": [573.0, 582.0, 525.0, 573.0, 570.0, 579.0, 419.0, 579.0, 582.0, 579.0, 579.0, 587.0, 582.0, 573.0, 576.0, 573.0, 582.0, 582.0, 570.0, 582.0, 570.0, 573.0, 587.0, 570.0, 573.0, 576.0, 570.0, 579.0, 530.0, 567.0, 570.0, 465.0, 527.0, 582.0, 522.0, 579.0, 579.0, 582.0, 579.0, 498.0, 582.0, 570.0, 582.0, 579.0, 576.0, 573.0, 570.0, 579.0, 570.0, 576.0, 205.0, 587.0, 582.0, 576.0, 579.0, 582.0, 579.0, 584.0, 573.0, 579.0, 579.0, 633.0, 525.0, 573.0, 582.0, 573.0, 573.0, 582.0, 525.0, 573.0, 570.0, 576.0, 516.0, 627.0, 579.0, 579.0, 573.0, 579.0, 582.0, 579.0, 576.0, 579.0, 576.0, 525.0, 627.0, 576.0, 530.0, 522.0, 582.0, 579.0, 573.0, 579.0, 573.0, 525.0, 576.0, 582.0, 576.0, 570.0, 525.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 280.0, 288.0, 294.0, 263.0, 262.0, 285.0, 288.0, 284.0, 286.0, 284.0, 295.0, 218.0, 201.0, 295.0, 284.0, 288.0, 294.0, 288.0, 291.0, 293.0, 286.0, 290.0, 297.0, 294.0, 288.0, 294.0, 279.0, 285.0, 291.0, 288.0, 285.0, 290.0, 292.0, 285.0, 297.0, 279.0, 291.0, 292.0, 290.0, 285.0, 285.0, 278.0, 295.0, 291.0, 296.0, 279.0, 291.0, 284.0, 289.0, 280.0, 296.0, 279.0, 291.0, 290.0, 289.0, 264.0, 266.0, 283.0, 284.0, 279.0, 291.0, 235.0, 230.0, 265.0, 262.0, 286.0, 296.0, 262.0, 260.0, 288.0, 291.0, 294.0, 285.0, 293.0, 289.0, 290.0, 289.0, 241.0, 257.0, 291.0, 291.0, 286.0, 284.0, 291.0, 291.0, 291.0, 288.0, 285.0, 291.0, 286.0, 287.0, 275.0, 295.0, 285.0, 294.0, 285.0, 285.0, 294.0, 282.0, 108.0, 97.0, 296.0, 291.0, 290.0, 292.0, 284.0, 292.0, 292.0, 287.0, 290.0, 292.0, 293.0, 286.0, 302.0, 282.0, 283.0, 290.0, 286.0, 293.0, 285.0, 294.0, 316.0, 317.0, 269.0, 256.0, 293.0, 280.0, 291.0, 291.0, 280.0, 293.0, 291.0, 282.0, 293.0, 289.0, 259.0, 266.0, 289.0, 284.0, 282.0, 288.0, 293.0, 283.0, 251.0, 265.0, 310.0, 317.0, 290.0, 289.0, 282.0, 297.0, 287.0, 286.0, 288.0, 291.0, 288.0, 294.0, 294.0, 285.0, 287.0, 289.0, 289.0, 290.0, 291.0, 285.0, 260.0, 265.0, 313.0, 314.0, 285.0, 291.0, 262.0, 268.0, 261.0, 261.0, 294.0, 288.0, 285.0, 294.0, 285.0, 288.0, 293.0, 286.0, 285.0, 288.0, 260.0, 265.0, 289.0, 287.0, 287.0, 295.0, 285.0, 291.0, 285.0, 285.0, 263.0, 262.0, 275.0, 301.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6967359713771896, "mean_inference_ms": 1.2437290472164755, "mean_action_processing_ms": 0.13364784707998414, "mean_env_wait_ms": 0.838560435275775, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10137600, "num_agent_steps_trained": 10137600, "num_env_steps_sampled": 5068800, "num_env_steps_trained": 5068800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5068800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10137600, "timers": {"training_iteration_time_ms": 3587.318, "learn_time_ms": 1109.296, "learn_throughput": 11538.847, "synch_weights_time_ms": 10.745}, "counters": {"num_env_steps_sampled": 5068800, "num_env_steps_trained": 5068800, "num_agent_steps_sampled": 10137600, "num_agent_steps_trained": 10137600}, "done": false, "episodes_total": 12672, "training_iteration": 396, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-10", "timestamp": 1666581970, "time_this_iter_s": 3.577162981033325, "time_total_s": 1510.6475656032562, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1510.6475656032562, "timesteps_since_restore": 0, "iterations_since_restore": 396, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.68, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 174.43, "shaped_reward_min": 85, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.24, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.45, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.06, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.27, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.74, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.88, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.28, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.91, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.74, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.88, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.74, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.88, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003785345470532775, "policy_loss": 0.0034148150589317083, "vf_loss": 7.838505744934082, "vf_explained_var": 0.5580576062202454, "kl": 0.00246319267898798, "entropy": 0.8266406059265137, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5081600, "num_env_steps_trained": 5081600, "num_agent_steps_sampled": 10163200, "num_agent_steps_trained": 10163200}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 205.0, "episode_reward_mean": 566.03, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 283.015}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 174.43, "shaped_reward_min": 85, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.24, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.45, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.06, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.27, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.74, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.88, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.28, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.91, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.74, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.88, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.74, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.88, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [527.0, 582.0, 522.0, 579.0, 579.0, 582.0, 579.0, 498.0, 582.0, 570.0, 582.0, 579.0, 576.0, 573.0, 570.0, 579.0, 570.0, 576.0, 205.0, 587.0, 582.0, 576.0, 579.0, 582.0, 579.0, 584.0, 573.0, 579.0, 579.0, 633.0, 525.0, 573.0, 582.0, 573.0, 573.0, 582.0, 525.0, 573.0, 570.0, 576.0, 516.0, 627.0, 579.0, 579.0, 573.0, 579.0, 582.0, 579.0, 576.0, 579.0, 576.0, 525.0, 627.0, 576.0, 530.0, 522.0, 582.0, 579.0, 573.0, 579.0, 573.0, 525.0, 576.0, 582.0, 576.0, 570.0, 525.0, 576.0, 582.0, 576.0, 582.0, 468.0, 570.0, 570.0, 576.0, 582.0, 519.0, 579.0, 579.0, 525.0, 587.0, 579.0, 582.0, 582.0, 570.0, 522.0, 579.0, 576.0, 525.0, 579.0, 579.0, 570.0, 582.0, 576.0, 582.0, 579.0, 573.0, 576.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 262.0, 286.0, 296.0, 262.0, 260.0, 288.0, 291.0, 294.0, 285.0, 293.0, 289.0, 290.0, 289.0, 241.0, 257.0, 291.0, 291.0, 286.0, 284.0, 291.0, 291.0, 291.0, 288.0, 285.0, 291.0, 286.0, 287.0, 275.0, 295.0, 285.0, 294.0, 285.0, 285.0, 294.0, 282.0, 108.0, 97.0, 296.0, 291.0, 290.0, 292.0, 284.0, 292.0, 292.0, 287.0, 290.0, 292.0, 293.0, 286.0, 302.0, 282.0, 283.0, 290.0, 286.0, 293.0, 285.0, 294.0, 316.0, 317.0, 269.0, 256.0, 293.0, 280.0, 291.0, 291.0, 280.0, 293.0, 291.0, 282.0, 293.0, 289.0, 259.0, 266.0, 289.0, 284.0, 282.0, 288.0, 293.0, 283.0, 251.0, 265.0, 310.0, 317.0, 290.0, 289.0, 282.0, 297.0, 287.0, 286.0, 288.0, 291.0, 288.0, 294.0, 294.0, 285.0, 287.0, 289.0, 289.0, 290.0, 291.0, 285.0, 260.0, 265.0, 313.0, 314.0, 285.0, 291.0, 262.0, 268.0, 261.0, 261.0, 294.0, 288.0, 285.0, 294.0, 285.0, 288.0, 293.0, 286.0, 285.0, 288.0, 260.0, 265.0, 289.0, 287.0, 287.0, 295.0, 285.0, 291.0, 285.0, 285.0, 263.0, 262.0, 275.0, 301.0, 296.0, 286.0, 292.0, 284.0, 288.0, 294.0, 236.0, 232.0, 285.0, 285.0, 282.0, 288.0, 285.0, 291.0, 288.0, 294.0, 267.0, 252.0, 290.0, 289.0, 294.0, 285.0, 258.0, 267.0, 291.0, 296.0, 286.0, 293.0, 290.0, 292.0, 293.0, 289.0, 283.0, 287.0, 268.0, 254.0, 288.0, 291.0, 288.0, 288.0, 268.0, 257.0, 289.0, 290.0, 288.0, 291.0, 287.0, 283.0, 296.0, 286.0, 291.0, 285.0, 295.0, 287.0, 294.0, 285.0, 286.0, 287.0, 286.0, 290.0, 287.0, 295.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6966830217276799, "mean_inference_ms": 1.2435711010311095, "mean_action_processing_ms": 0.1336373918179333, "mean_env_wait_ms": 0.8384640728388132, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 205.0, "episode_reward_mean": 566.03, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 283.015}, "hist_stats": {"episode_reward": [527.0, 582.0, 522.0, 579.0, 579.0, 582.0, 579.0, 498.0, 582.0, 570.0, 582.0, 579.0, 576.0, 573.0, 570.0, 579.0, 570.0, 576.0, 205.0, 587.0, 582.0, 576.0, 579.0, 582.0, 579.0, 584.0, 573.0, 579.0, 579.0, 633.0, 525.0, 573.0, 582.0, 573.0, 573.0, 582.0, 525.0, 573.0, 570.0, 576.0, 516.0, 627.0, 579.0, 579.0, 573.0, 579.0, 582.0, 579.0, 576.0, 579.0, 576.0, 525.0, 627.0, 576.0, 530.0, 522.0, 582.0, 579.0, 573.0, 579.0, 573.0, 525.0, 576.0, 582.0, 576.0, 570.0, 525.0, 576.0, 582.0, 576.0, 582.0, 468.0, 570.0, 570.0, 576.0, 582.0, 519.0, 579.0, 579.0, 525.0, 587.0, 579.0, 582.0, 582.0, 570.0, 522.0, 579.0, 576.0, 525.0, 579.0, 579.0, 570.0, 582.0, 576.0, 582.0, 579.0, 573.0, 576.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 262.0, 286.0, 296.0, 262.0, 260.0, 288.0, 291.0, 294.0, 285.0, 293.0, 289.0, 290.0, 289.0, 241.0, 257.0, 291.0, 291.0, 286.0, 284.0, 291.0, 291.0, 291.0, 288.0, 285.0, 291.0, 286.0, 287.0, 275.0, 295.0, 285.0, 294.0, 285.0, 285.0, 294.0, 282.0, 108.0, 97.0, 296.0, 291.0, 290.0, 292.0, 284.0, 292.0, 292.0, 287.0, 290.0, 292.0, 293.0, 286.0, 302.0, 282.0, 283.0, 290.0, 286.0, 293.0, 285.0, 294.0, 316.0, 317.0, 269.0, 256.0, 293.0, 280.0, 291.0, 291.0, 280.0, 293.0, 291.0, 282.0, 293.0, 289.0, 259.0, 266.0, 289.0, 284.0, 282.0, 288.0, 293.0, 283.0, 251.0, 265.0, 310.0, 317.0, 290.0, 289.0, 282.0, 297.0, 287.0, 286.0, 288.0, 291.0, 288.0, 294.0, 294.0, 285.0, 287.0, 289.0, 289.0, 290.0, 291.0, 285.0, 260.0, 265.0, 313.0, 314.0, 285.0, 291.0, 262.0, 268.0, 261.0, 261.0, 294.0, 288.0, 285.0, 294.0, 285.0, 288.0, 293.0, 286.0, 285.0, 288.0, 260.0, 265.0, 289.0, 287.0, 287.0, 295.0, 285.0, 291.0, 285.0, 285.0, 263.0, 262.0, 275.0, 301.0, 296.0, 286.0, 292.0, 284.0, 288.0, 294.0, 236.0, 232.0, 285.0, 285.0, 282.0, 288.0, 285.0, 291.0, 288.0, 294.0, 267.0, 252.0, 290.0, 289.0, 294.0, 285.0, 258.0, 267.0, 291.0, 296.0, 286.0, 293.0, 290.0, 292.0, 293.0, 289.0, 283.0, 287.0, 268.0, 254.0, 288.0, 291.0, 288.0, 288.0, 268.0, 257.0, 289.0, 290.0, 288.0, 291.0, 287.0, 283.0, 296.0, 286.0, 291.0, 285.0, 295.0, 287.0, 294.0, 285.0, 286.0, 287.0, 286.0, 290.0, 287.0, 295.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6966830217276799, "mean_inference_ms": 1.2435711010311095, "mean_action_processing_ms": 0.1336373918179333, "mean_env_wait_ms": 0.8384640728388132, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10163200, "num_agent_steps_trained": 10163200, "num_env_steps_sampled": 5081600, "num_env_steps_trained": 5081600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5081600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10163200, "timers": {"training_iteration_time_ms": 3585.77, "learn_time_ms": 1112.588, "learn_throughput": 11504.708, "synch_weights_time_ms": 11.765}, "counters": {"num_env_steps_sampled": 5081600, "num_env_steps_trained": 5081600, "num_agent_steps_sampled": 10163200, "num_agent_steps_trained": 10163200}, "done": false, "episodes_total": 12704, "training_iteration": 397, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-14", "timestamp": 1666581974, "time_this_iter_s": 3.645744562149048, "time_total_s": 1514.2933101654053, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1514.2933101654053, "timesteps_since_restore": 0, "iterations_since_restore": 397, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.55, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 173.9, "shaped_reward_min": 138, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.27, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.21, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.09, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.05, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.78, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.71, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.2, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.99, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.89, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.78, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.71, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.78, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.71, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002451391890645027, "policy_loss": -0.0028326634783297777, "vf_loss": 7.9434614181518555, "vf_explained_var": 0.5600583553314209, "kl": 0.002439986914396286, "entropy": 0.8261496424674988, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5094400, "num_env_steps_trained": 5094400, "num_agent_steps_sampled": 10188800, "num_agent_steps_trained": 10188800}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 468.0, "episode_reward_mean": 565.1, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 232.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 282.55}, "custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 173.9, "shaped_reward_min": 138, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.27, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.21, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.09, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.05, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.78, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.71, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.2, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.99, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.89, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.78, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.71, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.78, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.71, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 573.0, 573.0, 582.0, 525.0, 573.0, 570.0, 576.0, 516.0, 627.0, 579.0, 579.0, 573.0, 579.0, 582.0, 579.0, 576.0, 579.0, 576.0, 525.0, 627.0, 576.0, 530.0, 522.0, 582.0, 579.0, 573.0, 579.0, 573.0, 525.0, 576.0, 582.0, 576.0, 570.0, 525.0, 576.0, 582.0, 576.0, 582.0, 468.0, 570.0, 570.0, 576.0, 582.0, 519.0, 579.0, 579.0, 525.0, 587.0, 579.0, 582.0, 582.0, 570.0, 522.0, 579.0, 576.0, 525.0, 579.0, 579.0, 570.0, 582.0, 576.0, 582.0, 579.0, 573.0, 576.0, 582.0, 579.0, 525.0, 573.0, 582.0, 530.0, 573.0, 590.0, 525.0, 530.0, 498.0, 576.0, 576.0, 582.0, 573.0, 522.0, 573.0, 522.0, 573.0, 579.0, 519.0, 527.0, 516.0, 576.0, 581.0, 573.0, 573.0, 579.0, 525.0, 582.0, 564.0, 579.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 280.0, 293.0, 291.0, 282.0, 293.0, 289.0, 259.0, 266.0, 289.0, 284.0, 282.0, 288.0, 293.0, 283.0, 251.0, 265.0, 310.0, 317.0, 290.0, 289.0, 282.0, 297.0, 287.0, 286.0, 288.0, 291.0, 288.0, 294.0, 294.0, 285.0, 287.0, 289.0, 289.0, 290.0, 291.0, 285.0, 260.0, 265.0, 313.0, 314.0, 285.0, 291.0, 262.0, 268.0, 261.0, 261.0, 294.0, 288.0, 285.0, 294.0, 285.0, 288.0, 293.0, 286.0, 285.0, 288.0, 260.0, 265.0, 289.0, 287.0, 287.0, 295.0, 285.0, 291.0, 285.0, 285.0, 263.0, 262.0, 275.0, 301.0, 296.0, 286.0, 292.0, 284.0, 288.0, 294.0, 236.0, 232.0, 285.0, 285.0, 282.0, 288.0, 285.0, 291.0, 288.0, 294.0, 267.0, 252.0, 290.0, 289.0, 294.0, 285.0, 258.0, 267.0, 291.0, 296.0, 286.0, 293.0, 290.0, 292.0, 293.0, 289.0, 283.0, 287.0, 268.0, 254.0, 288.0, 291.0, 288.0, 288.0, 268.0, 257.0, 289.0, 290.0, 288.0, 291.0, 287.0, 283.0, 296.0, 286.0, 291.0, 285.0, 295.0, 287.0, 294.0, 285.0, 286.0, 287.0, 286.0, 290.0, 287.0, 295.0, 289.0, 290.0, 260.0, 265.0, 283.0, 290.0, 292.0, 290.0, 262.0, 268.0, 290.0, 283.0, 297.0, 293.0, 264.0, 261.0, 260.0, 270.0, 249.0, 249.0, 285.0, 291.0, 289.0, 287.0, 291.0, 291.0, 285.0, 288.0, 268.0, 254.0, 288.0, 285.0, 264.0, 258.0, 283.0, 290.0, 288.0, 291.0, 266.0, 253.0, 270.0, 257.0, 255.0, 261.0, 296.0, 280.0, 284.0, 297.0, 285.0, 288.0, 289.0, 284.0, 288.0, 291.0, 265.0, 260.0, 287.0, 295.0, 282.0, 282.0, 288.0, 291.0, 285.0, 291.0, 292.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6966483530142179, "mean_inference_ms": 1.2434967982497485, "mean_action_processing_ms": 0.13362693614740914, "mean_env_wait_ms": 0.838467798835966, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 468.0, "episode_reward_mean": 565.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 232.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 282.55}, "hist_stats": {"episode_reward": [582.0, 573.0, 573.0, 582.0, 525.0, 573.0, 570.0, 576.0, 516.0, 627.0, 579.0, 579.0, 573.0, 579.0, 582.0, 579.0, 576.0, 579.0, 576.0, 525.0, 627.0, 576.0, 530.0, 522.0, 582.0, 579.0, 573.0, 579.0, 573.0, 525.0, 576.0, 582.0, 576.0, 570.0, 525.0, 576.0, 582.0, 576.0, 582.0, 468.0, 570.0, 570.0, 576.0, 582.0, 519.0, 579.0, 579.0, 525.0, 587.0, 579.0, 582.0, 582.0, 570.0, 522.0, 579.0, 576.0, 525.0, 579.0, 579.0, 570.0, 582.0, 576.0, 582.0, 579.0, 573.0, 576.0, 582.0, 579.0, 525.0, 573.0, 582.0, 530.0, 573.0, 590.0, 525.0, 530.0, 498.0, 576.0, 576.0, 582.0, 573.0, 522.0, 573.0, 522.0, 573.0, 579.0, 519.0, 527.0, 516.0, 576.0, 581.0, 573.0, 573.0, 579.0, 525.0, 582.0, 564.0, 579.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 280.0, 293.0, 291.0, 282.0, 293.0, 289.0, 259.0, 266.0, 289.0, 284.0, 282.0, 288.0, 293.0, 283.0, 251.0, 265.0, 310.0, 317.0, 290.0, 289.0, 282.0, 297.0, 287.0, 286.0, 288.0, 291.0, 288.0, 294.0, 294.0, 285.0, 287.0, 289.0, 289.0, 290.0, 291.0, 285.0, 260.0, 265.0, 313.0, 314.0, 285.0, 291.0, 262.0, 268.0, 261.0, 261.0, 294.0, 288.0, 285.0, 294.0, 285.0, 288.0, 293.0, 286.0, 285.0, 288.0, 260.0, 265.0, 289.0, 287.0, 287.0, 295.0, 285.0, 291.0, 285.0, 285.0, 263.0, 262.0, 275.0, 301.0, 296.0, 286.0, 292.0, 284.0, 288.0, 294.0, 236.0, 232.0, 285.0, 285.0, 282.0, 288.0, 285.0, 291.0, 288.0, 294.0, 267.0, 252.0, 290.0, 289.0, 294.0, 285.0, 258.0, 267.0, 291.0, 296.0, 286.0, 293.0, 290.0, 292.0, 293.0, 289.0, 283.0, 287.0, 268.0, 254.0, 288.0, 291.0, 288.0, 288.0, 268.0, 257.0, 289.0, 290.0, 288.0, 291.0, 287.0, 283.0, 296.0, 286.0, 291.0, 285.0, 295.0, 287.0, 294.0, 285.0, 286.0, 287.0, 286.0, 290.0, 287.0, 295.0, 289.0, 290.0, 260.0, 265.0, 283.0, 290.0, 292.0, 290.0, 262.0, 268.0, 290.0, 283.0, 297.0, 293.0, 264.0, 261.0, 260.0, 270.0, 249.0, 249.0, 285.0, 291.0, 289.0, 287.0, 291.0, 291.0, 285.0, 288.0, 268.0, 254.0, 288.0, 285.0, 264.0, 258.0, 283.0, 290.0, 288.0, 291.0, 266.0, 253.0, 270.0, 257.0, 255.0, 261.0, 296.0, 280.0, 284.0, 297.0, 285.0, 288.0, 289.0, 284.0, 288.0, 291.0, 265.0, 260.0, 287.0, 295.0, 282.0, 282.0, 288.0, 291.0, 285.0, 291.0, 292.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6966483530142179, "mean_inference_ms": 1.2434967982497485, "mean_action_processing_ms": 0.13362693614740914, "mean_env_wait_ms": 0.838467798835966, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10188800, "num_agent_steps_trained": 10188800, "num_env_steps_sampled": 5094400, "num_env_steps_trained": 5094400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5094400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10188800, "timers": {"training_iteration_time_ms": 3615.429, "learn_time_ms": 1112.523, "learn_throughput": 11505.382, "synch_weights_time_ms": 11.227}, "counters": {"num_env_steps_sampled": 5094400, "num_env_steps_trained": 5094400, "num_agent_steps_sampled": 10188800, "num_agent_steps_trained": 10188800}, "done": false, "episodes_total": 12736, "training_iteration": 398, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-18", "timestamp": 1666581978, "time_this_iter_s": 3.9683709144592285, "time_total_s": 1518.2616810798645, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1518.2616810798645, "timesteps_since_restore": 0, "iterations_since_restore": 398, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.883333333333336, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 172.97, "shaped_reward_min": 138, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.02, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.32, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.84, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.18, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.57, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.76, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.25, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.99, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.94, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.57, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.76, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.57, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.76, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00010686222231015563, "policy_loss": -0.00027928390773013234, "vf_loss": 7.995491027832031, "vf_explained_var": 0.5691348314285278, "kl": 0.003024071455001831, "entropy": 0.8268085718154907, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5107200, "num_env_steps_trained": 5107200, "num_agent_steps_sampled": 10214400, "num_agent_steps_trained": 10214400}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 468.0, "episode_reward_mean": 562.57, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 232.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 281.285}, "custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 172.97, "shaped_reward_min": 138, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.02, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.32, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.84, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.18, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.57, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.76, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.25, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.99, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.94, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.57, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.76, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.57, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.76, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 570.0, 525.0, 576.0, 582.0, 576.0, 582.0, 468.0, 570.0, 570.0, 576.0, 582.0, 519.0, 579.0, 579.0, 525.0, 587.0, 579.0, 582.0, 582.0, 570.0, 522.0, 579.0, 576.0, 525.0, 579.0, 579.0, 570.0, 582.0, 576.0, 582.0, 579.0, 573.0, 576.0, 582.0, 579.0, 525.0, 573.0, 582.0, 530.0, 573.0, 590.0, 525.0, 530.0, 498.0, 576.0, 576.0, 582.0, 573.0, 522.0, 573.0, 522.0, 573.0, 579.0, 519.0, 527.0, 516.0, 576.0, 581.0, 573.0, 573.0, 579.0, 525.0, 582.0, 564.0, 579.0, 576.0, 576.0, 576.0, 573.0, 525.0, 530.0, 579.0, 576.0, 549.0, 579.0, 576.0, 576.0, 590.0, 573.0, 522.0, 516.0, 579.0, 525.0, 573.0, 468.0, 576.0, 573.0, 573.0, 579.0, 525.0, 573.0, 627.0, 582.0, 576.0, 522.0, 576.0, 582.0, 570.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 285.0, 285.0, 263.0, 262.0, 275.0, 301.0, 296.0, 286.0, 292.0, 284.0, 288.0, 294.0, 236.0, 232.0, 285.0, 285.0, 282.0, 288.0, 285.0, 291.0, 288.0, 294.0, 267.0, 252.0, 290.0, 289.0, 294.0, 285.0, 258.0, 267.0, 291.0, 296.0, 286.0, 293.0, 290.0, 292.0, 293.0, 289.0, 283.0, 287.0, 268.0, 254.0, 288.0, 291.0, 288.0, 288.0, 268.0, 257.0, 289.0, 290.0, 288.0, 291.0, 287.0, 283.0, 296.0, 286.0, 291.0, 285.0, 295.0, 287.0, 294.0, 285.0, 286.0, 287.0, 286.0, 290.0, 287.0, 295.0, 289.0, 290.0, 260.0, 265.0, 283.0, 290.0, 292.0, 290.0, 262.0, 268.0, 290.0, 283.0, 297.0, 293.0, 264.0, 261.0, 260.0, 270.0, 249.0, 249.0, 285.0, 291.0, 289.0, 287.0, 291.0, 291.0, 285.0, 288.0, 268.0, 254.0, 288.0, 285.0, 264.0, 258.0, 283.0, 290.0, 288.0, 291.0, 266.0, 253.0, 270.0, 257.0, 255.0, 261.0, 296.0, 280.0, 284.0, 297.0, 285.0, 288.0, 289.0, 284.0, 288.0, 291.0, 265.0, 260.0, 287.0, 295.0, 282.0, 282.0, 288.0, 291.0, 285.0, 291.0, 292.0, 284.0, 281.0, 295.0, 284.0, 289.0, 267.0, 258.0, 268.0, 262.0, 288.0, 291.0, 291.0, 285.0, 281.0, 268.0, 291.0, 288.0, 291.0, 285.0, 284.0, 292.0, 294.0, 296.0, 290.0, 283.0, 270.0, 252.0, 254.0, 262.0, 293.0, 286.0, 261.0, 264.0, 292.0, 281.0, 233.0, 235.0, 290.0, 286.0, 281.0, 292.0, 284.0, 289.0, 282.0, 297.0, 262.0, 263.0, 284.0, 289.0, 311.0, 316.0, 287.0, 295.0, 286.0, 290.0, 260.0, 262.0, 294.0, 282.0, 290.0, 292.0, 277.0, 293.0, 275.0, 301.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6966158957094319, "mean_inference_ms": 1.243448250922046, "mean_action_processing_ms": 0.13362095833138962, "mean_env_wait_ms": 0.8385006632752647, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 468.0, "episode_reward_mean": 562.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 232.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 281.285}, "hist_stats": {"episode_reward": [576.0, 570.0, 525.0, 576.0, 582.0, 576.0, 582.0, 468.0, 570.0, 570.0, 576.0, 582.0, 519.0, 579.0, 579.0, 525.0, 587.0, 579.0, 582.0, 582.0, 570.0, 522.0, 579.0, 576.0, 525.0, 579.0, 579.0, 570.0, 582.0, 576.0, 582.0, 579.0, 573.0, 576.0, 582.0, 579.0, 525.0, 573.0, 582.0, 530.0, 573.0, 590.0, 525.0, 530.0, 498.0, 576.0, 576.0, 582.0, 573.0, 522.0, 573.0, 522.0, 573.0, 579.0, 519.0, 527.0, 516.0, 576.0, 581.0, 573.0, 573.0, 579.0, 525.0, 582.0, 564.0, 579.0, 576.0, 576.0, 576.0, 573.0, 525.0, 530.0, 579.0, 576.0, 549.0, 579.0, 576.0, 576.0, 590.0, 573.0, 522.0, 516.0, 579.0, 525.0, 573.0, 468.0, 576.0, 573.0, 573.0, 579.0, 525.0, 573.0, 627.0, 582.0, 576.0, 522.0, 576.0, 582.0, 570.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 285.0, 285.0, 263.0, 262.0, 275.0, 301.0, 296.0, 286.0, 292.0, 284.0, 288.0, 294.0, 236.0, 232.0, 285.0, 285.0, 282.0, 288.0, 285.0, 291.0, 288.0, 294.0, 267.0, 252.0, 290.0, 289.0, 294.0, 285.0, 258.0, 267.0, 291.0, 296.0, 286.0, 293.0, 290.0, 292.0, 293.0, 289.0, 283.0, 287.0, 268.0, 254.0, 288.0, 291.0, 288.0, 288.0, 268.0, 257.0, 289.0, 290.0, 288.0, 291.0, 287.0, 283.0, 296.0, 286.0, 291.0, 285.0, 295.0, 287.0, 294.0, 285.0, 286.0, 287.0, 286.0, 290.0, 287.0, 295.0, 289.0, 290.0, 260.0, 265.0, 283.0, 290.0, 292.0, 290.0, 262.0, 268.0, 290.0, 283.0, 297.0, 293.0, 264.0, 261.0, 260.0, 270.0, 249.0, 249.0, 285.0, 291.0, 289.0, 287.0, 291.0, 291.0, 285.0, 288.0, 268.0, 254.0, 288.0, 285.0, 264.0, 258.0, 283.0, 290.0, 288.0, 291.0, 266.0, 253.0, 270.0, 257.0, 255.0, 261.0, 296.0, 280.0, 284.0, 297.0, 285.0, 288.0, 289.0, 284.0, 288.0, 291.0, 265.0, 260.0, 287.0, 295.0, 282.0, 282.0, 288.0, 291.0, 285.0, 291.0, 292.0, 284.0, 281.0, 295.0, 284.0, 289.0, 267.0, 258.0, 268.0, 262.0, 288.0, 291.0, 291.0, 285.0, 281.0, 268.0, 291.0, 288.0, 291.0, 285.0, 284.0, 292.0, 294.0, 296.0, 290.0, 283.0, 270.0, 252.0, 254.0, 262.0, 293.0, 286.0, 261.0, 264.0, 292.0, 281.0, 233.0, 235.0, 290.0, 286.0, 281.0, 292.0, 284.0, 289.0, 282.0, 297.0, 262.0, 263.0, 284.0, 289.0, 311.0, 316.0, 287.0, 295.0, 286.0, 290.0, 260.0, 262.0, 294.0, 282.0, 290.0, 292.0, 277.0, 293.0, 275.0, 301.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6966158957094319, "mean_inference_ms": 1.243448250922046, "mean_action_processing_ms": 0.13362095833138962, "mean_env_wait_ms": 0.8385006632752647, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10214400, "num_agent_steps_trained": 10214400, "num_env_steps_sampled": 5107200, "num_env_steps_trained": 5107200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5107200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10214400, "timers": {"training_iteration_time_ms": 3630.802, "learn_time_ms": 1121.196, "learn_throughput": 11416.384, "synch_weights_time_ms": 10.982}, "counters": {"num_env_steps_sampled": 5107200, "num_env_steps_trained": 5107200, "num_agent_steps_sampled": 10214400, "num_agent_steps_trained": 10214400}, "done": false, "episodes_total": 12768, "training_iteration": 399, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-22", "timestamp": 1666581982, "time_this_iter_s": 3.7618231773376465, "time_total_s": 1522.0235042572021, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1522.0235042572021, "timesteps_since_restore": 0, "iterations_since_restore": 399, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.82, "ram_util_percent": 10.62}}
+{"evaluation": {"average_sparse_reward": 200.0, "num_healthy_workers": 0, "num_recreated_workers": 0}, "custom_metrics": {"sparse_reward_mean": 194.0, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 172.6, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.6, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.57, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.47, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.39, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.2, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.06, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.35, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.05, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.2, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.06, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.2, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.06, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002051019575446844, "policy_loss": -0.0024280953221023083, "vf_loss": 7.887781143188477, "vf_explained_var": 0.6008471250534058, "kl": 0.003506100969389081, "entropy": 0.8234077095985413, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5120000, "num_env_steps_trained": 5120000, "num_agent_steps_sampled": 10240000, "num_agent_steps_trained": 10240000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 560.6, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 59.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 280.3}, "custom_metrics": {"sparse_reward_mean": 194.0, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 172.6, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.6, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.57, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.47, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.39, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.2, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.06, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.35, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.05, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.2, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.06, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.2, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.06, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 576.0, 582.0, 579.0, 525.0, 573.0, 582.0, 530.0, 573.0, 590.0, 525.0, 530.0, 498.0, 576.0, 576.0, 582.0, 573.0, 522.0, 573.0, 522.0, 573.0, 579.0, 519.0, 527.0, 516.0, 576.0, 581.0, 573.0, 573.0, 579.0, 525.0, 582.0, 564.0, 579.0, 576.0, 576.0, 576.0, 573.0, 525.0, 530.0, 579.0, 576.0, 549.0, 579.0, 576.0, 576.0, 590.0, 573.0, 522.0, 516.0, 579.0, 525.0, 573.0, 468.0, 576.0, 573.0, 573.0, 579.0, 525.0, 573.0, 627.0, 582.0, 576.0, 522.0, 576.0, 582.0, 570.0, 576.0, 579.0, 579.0, 579.0, 582.0, 582.0, 570.0, 578.0, 579.0, 579.0, 570.0, 579.0, 579.0, 579.0, 579.0, 530.0, 123.0, 579.0, 639.0, 582.0, 582.0, 525.0, 570.0, 576.0, 587.0, 576.0, 519.0, 582.0, 582.0, 582.0, 579.0, 576.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 287.0, 286.0, 290.0, 287.0, 295.0, 289.0, 290.0, 260.0, 265.0, 283.0, 290.0, 292.0, 290.0, 262.0, 268.0, 290.0, 283.0, 297.0, 293.0, 264.0, 261.0, 260.0, 270.0, 249.0, 249.0, 285.0, 291.0, 289.0, 287.0, 291.0, 291.0, 285.0, 288.0, 268.0, 254.0, 288.0, 285.0, 264.0, 258.0, 283.0, 290.0, 288.0, 291.0, 266.0, 253.0, 270.0, 257.0, 255.0, 261.0, 296.0, 280.0, 284.0, 297.0, 285.0, 288.0, 289.0, 284.0, 288.0, 291.0, 265.0, 260.0, 287.0, 295.0, 282.0, 282.0, 288.0, 291.0, 285.0, 291.0, 292.0, 284.0, 281.0, 295.0, 284.0, 289.0, 267.0, 258.0, 268.0, 262.0, 288.0, 291.0, 291.0, 285.0, 281.0, 268.0, 291.0, 288.0, 291.0, 285.0, 284.0, 292.0, 294.0, 296.0, 290.0, 283.0, 270.0, 252.0, 254.0, 262.0, 293.0, 286.0, 261.0, 264.0, 292.0, 281.0, 233.0, 235.0, 290.0, 286.0, 281.0, 292.0, 284.0, 289.0, 282.0, 297.0, 262.0, 263.0, 284.0, 289.0, 311.0, 316.0, 287.0, 295.0, 286.0, 290.0, 260.0, 262.0, 294.0, 282.0, 290.0, 292.0, 277.0, 293.0, 275.0, 301.0, 290.0, 289.0, 288.0, 291.0, 289.0, 290.0, 291.0, 291.0, 289.0, 293.0, 287.0, 283.0, 296.0, 282.0, 294.0, 285.0, 291.0, 288.0, 286.0, 284.0, 285.0, 294.0, 292.0, 287.0, 288.0, 291.0, 289.0, 290.0, 264.0, 266.0, 59.0, 64.0, 288.0, 291.0, 319.0, 320.0, 288.0, 294.0, 288.0, 294.0, 266.0, 259.0, 285.0, 285.0, 284.0, 292.0, 289.0, 298.0, 287.0, 289.0, 261.0, 258.0, 291.0, 291.0, 296.0, 286.0, 290.0, 292.0, 289.0, 290.0, 281.0, 295.0, 260.0, 265.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6965676636854887, "mean_inference_ms": 1.2434046138073682, "mean_action_processing_ms": 0.13361575469972117, "mean_env_wait_ms": 0.8385373096956714, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 560.6, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 59.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 280.3}, "hist_stats": {"episode_reward": [573.0, 576.0, 582.0, 579.0, 525.0, 573.0, 582.0, 530.0, 573.0, 590.0, 525.0, 530.0, 498.0, 576.0, 576.0, 582.0, 573.0, 522.0, 573.0, 522.0, 573.0, 579.0, 519.0, 527.0, 516.0, 576.0, 581.0, 573.0, 573.0, 579.0, 525.0, 582.0, 564.0, 579.0, 576.0, 576.0, 576.0, 573.0, 525.0, 530.0, 579.0, 576.0, 549.0, 579.0, 576.0, 576.0, 590.0, 573.0, 522.0, 516.0, 579.0, 525.0, 573.0, 468.0, 576.0, 573.0, 573.0, 579.0, 525.0, 573.0, 627.0, 582.0, 576.0, 522.0, 576.0, 582.0, 570.0, 576.0, 579.0, 579.0, 579.0, 582.0, 582.0, 570.0, 578.0, 579.0, 579.0, 570.0, 579.0, 579.0, 579.0, 579.0, 530.0, 123.0, 579.0, 639.0, 582.0, 582.0, 525.0, 570.0, 576.0, 587.0, 576.0, 519.0, 582.0, 582.0, 582.0, 579.0, 576.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 287.0, 286.0, 290.0, 287.0, 295.0, 289.0, 290.0, 260.0, 265.0, 283.0, 290.0, 292.0, 290.0, 262.0, 268.0, 290.0, 283.0, 297.0, 293.0, 264.0, 261.0, 260.0, 270.0, 249.0, 249.0, 285.0, 291.0, 289.0, 287.0, 291.0, 291.0, 285.0, 288.0, 268.0, 254.0, 288.0, 285.0, 264.0, 258.0, 283.0, 290.0, 288.0, 291.0, 266.0, 253.0, 270.0, 257.0, 255.0, 261.0, 296.0, 280.0, 284.0, 297.0, 285.0, 288.0, 289.0, 284.0, 288.0, 291.0, 265.0, 260.0, 287.0, 295.0, 282.0, 282.0, 288.0, 291.0, 285.0, 291.0, 292.0, 284.0, 281.0, 295.0, 284.0, 289.0, 267.0, 258.0, 268.0, 262.0, 288.0, 291.0, 291.0, 285.0, 281.0, 268.0, 291.0, 288.0, 291.0, 285.0, 284.0, 292.0, 294.0, 296.0, 290.0, 283.0, 270.0, 252.0, 254.0, 262.0, 293.0, 286.0, 261.0, 264.0, 292.0, 281.0, 233.0, 235.0, 290.0, 286.0, 281.0, 292.0, 284.0, 289.0, 282.0, 297.0, 262.0, 263.0, 284.0, 289.0, 311.0, 316.0, 287.0, 295.0, 286.0, 290.0, 260.0, 262.0, 294.0, 282.0, 290.0, 292.0, 277.0, 293.0, 275.0, 301.0, 290.0, 289.0, 288.0, 291.0, 289.0, 290.0, 291.0, 291.0, 289.0, 293.0, 287.0, 283.0, 296.0, 282.0, 294.0, 285.0, 291.0, 288.0, 286.0, 284.0, 285.0, 294.0, 292.0, 287.0, 288.0, 291.0, 289.0, 290.0, 264.0, 266.0, 59.0, 64.0, 288.0, 291.0, 319.0, 320.0, 288.0, 294.0, 288.0, 294.0, 266.0, 259.0, 285.0, 285.0, 284.0, 292.0, 289.0, 298.0, 287.0, 289.0, 261.0, 258.0, 291.0, 291.0, 296.0, 286.0, 290.0, 292.0, 289.0, 290.0, 281.0, 295.0, 260.0, 265.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6965676636854887, "mean_inference_ms": 1.2434046138073682, "mean_action_processing_ms": 0.13361575469972117, "mean_env_wait_ms": 0.8385373096956714, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10240000, "num_agent_steps_trained": 10240000, "num_env_steps_sampled": 5120000, "num_env_steps_trained": 5120000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5120000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10240000, "timers": {"training_iteration_time_ms": 3619.382, "learn_time_ms": 1115.094, "learn_throughput": 11478.853, "synch_weights_time_ms": 11.084}, "counters": {"num_env_steps_sampled": 5120000, "num_env_steps_trained": 5120000, "num_agent_steps_sampled": 10240000, "num_agent_steps_trained": 10240000}, "done": false, "episodes_total": 12800, "training_iteration": 400, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-29", "timestamp": 1666581989, "time_this_iter_s": 7.1384642124176025, "time_total_s": 1529.1619684696198, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1529.1619684696198, "timesteps_since_restore": 0, "iterations_since_restore": 400, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 14.381818181818183, "ram_util_percent": 10.599999999999998}}
+{"custom_metrics": {"sparse_reward_mean": 192.6, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 171.35, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.34, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.57, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.39, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.91, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.1, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.0, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.0, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.7, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.99, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.91, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.1, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.91, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.1, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0029064586851745844, "policy_loss": -0.0032941235695034266, "vf_loss": 8.069677352905273, "vf_explained_var": 0.5652687549591064, "kl": 0.002700523007661104, "entropy": 0.8386068344116211, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5132800, "num_env_steps_trained": 5132800, "num_agent_steps_sampled": 10265600, "num_agent_steps_trained": 10265600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 556.55, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 278.275}, "custom_metrics": {"sparse_reward_mean": 192.6, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 171.35, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.34, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.57, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.39, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.91, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.1, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.0, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.0, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.7, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.99, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.91, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.1, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.91, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.1, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [564.0, 579.0, 576.0, 576.0, 576.0, 573.0, 525.0, 530.0, 579.0, 576.0, 549.0, 579.0, 576.0, 576.0, 590.0, 573.0, 522.0, 516.0, 579.0, 525.0, 573.0, 468.0, 576.0, 573.0, 573.0, 579.0, 525.0, 573.0, 627.0, 582.0, 576.0, 522.0, 576.0, 582.0, 570.0, 576.0, 579.0, 579.0, 579.0, 582.0, 582.0, 570.0, 578.0, 579.0, 579.0, 570.0, 579.0, 579.0, 579.0, 579.0, 530.0, 123.0, 579.0, 639.0, 582.0, 582.0, 525.0, 570.0, 576.0, 587.0, 576.0, 519.0, 582.0, 582.0, 582.0, 579.0, 576.0, 525.0, 582.0, 579.0, 519.0, 573.0, 573.0, 525.0, 66.0, 419.0, 582.0, 576.0, 522.0, 579.0, 570.0, 513.0, 513.0, 576.0, 582.0, 527.0, 579.0, 582.0, 570.0, 582.0, 576.0, 627.0, 573.0, 582.0, 579.0, 579.0, 582.0, 576.0, 573.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 282.0, 288.0, 291.0, 285.0, 291.0, 292.0, 284.0, 281.0, 295.0, 284.0, 289.0, 267.0, 258.0, 268.0, 262.0, 288.0, 291.0, 291.0, 285.0, 281.0, 268.0, 291.0, 288.0, 291.0, 285.0, 284.0, 292.0, 294.0, 296.0, 290.0, 283.0, 270.0, 252.0, 254.0, 262.0, 293.0, 286.0, 261.0, 264.0, 292.0, 281.0, 233.0, 235.0, 290.0, 286.0, 281.0, 292.0, 284.0, 289.0, 282.0, 297.0, 262.0, 263.0, 284.0, 289.0, 311.0, 316.0, 287.0, 295.0, 286.0, 290.0, 260.0, 262.0, 294.0, 282.0, 290.0, 292.0, 277.0, 293.0, 275.0, 301.0, 290.0, 289.0, 288.0, 291.0, 289.0, 290.0, 291.0, 291.0, 289.0, 293.0, 287.0, 283.0, 296.0, 282.0, 294.0, 285.0, 291.0, 288.0, 286.0, 284.0, 285.0, 294.0, 292.0, 287.0, 288.0, 291.0, 289.0, 290.0, 264.0, 266.0, 59.0, 64.0, 288.0, 291.0, 319.0, 320.0, 288.0, 294.0, 288.0, 294.0, 266.0, 259.0, 285.0, 285.0, 284.0, 292.0, 289.0, 298.0, 287.0, 289.0, 261.0, 258.0, 291.0, 291.0, 296.0, 286.0, 290.0, 292.0, 289.0, 290.0, 281.0, 295.0, 260.0, 265.0, 283.0, 299.0, 284.0, 295.0, 256.0, 263.0, 283.0, 290.0, 280.0, 293.0, 260.0, 265.0, 31.0, 35.0, 210.0, 209.0, 289.0, 293.0, 289.0, 287.0, 261.0, 261.0, 292.0, 287.0, 284.0, 286.0, 257.0, 256.0, 255.0, 258.0, 285.0, 291.0, 294.0, 288.0, 257.0, 270.0, 290.0, 289.0, 291.0, 291.0, 282.0, 288.0, 293.0, 289.0, 291.0, 285.0, 316.0, 311.0, 288.0, 285.0, 292.0, 290.0, 290.0, 289.0, 291.0, 288.0, 293.0, 289.0, 291.0, 285.0, 282.0, 291.0, 263.0, 259.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6965010651464741, "mean_inference_ms": 1.2432963575523006, "mean_action_processing_ms": 0.13361169316252014, "mean_env_wait_ms": 0.8384959904466414, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 556.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 278.275}, "hist_stats": {"episode_reward": [564.0, 579.0, 576.0, 576.0, 576.0, 573.0, 525.0, 530.0, 579.0, 576.0, 549.0, 579.0, 576.0, 576.0, 590.0, 573.0, 522.0, 516.0, 579.0, 525.0, 573.0, 468.0, 576.0, 573.0, 573.0, 579.0, 525.0, 573.0, 627.0, 582.0, 576.0, 522.0, 576.0, 582.0, 570.0, 576.0, 579.0, 579.0, 579.0, 582.0, 582.0, 570.0, 578.0, 579.0, 579.0, 570.0, 579.0, 579.0, 579.0, 579.0, 530.0, 123.0, 579.0, 639.0, 582.0, 582.0, 525.0, 570.0, 576.0, 587.0, 576.0, 519.0, 582.0, 582.0, 582.0, 579.0, 576.0, 525.0, 582.0, 579.0, 519.0, 573.0, 573.0, 525.0, 66.0, 419.0, 582.0, 576.0, 522.0, 579.0, 570.0, 513.0, 513.0, 576.0, 582.0, 527.0, 579.0, 582.0, 570.0, 582.0, 576.0, 627.0, 573.0, 582.0, 579.0, 579.0, 582.0, 576.0, 573.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 282.0, 288.0, 291.0, 285.0, 291.0, 292.0, 284.0, 281.0, 295.0, 284.0, 289.0, 267.0, 258.0, 268.0, 262.0, 288.0, 291.0, 291.0, 285.0, 281.0, 268.0, 291.0, 288.0, 291.0, 285.0, 284.0, 292.0, 294.0, 296.0, 290.0, 283.0, 270.0, 252.0, 254.0, 262.0, 293.0, 286.0, 261.0, 264.0, 292.0, 281.0, 233.0, 235.0, 290.0, 286.0, 281.0, 292.0, 284.0, 289.0, 282.0, 297.0, 262.0, 263.0, 284.0, 289.0, 311.0, 316.0, 287.0, 295.0, 286.0, 290.0, 260.0, 262.0, 294.0, 282.0, 290.0, 292.0, 277.0, 293.0, 275.0, 301.0, 290.0, 289.0, 288.0, 291.0, 289.0, 290.0, 291.0, 291.0, 289.0, 293.0, 287.0, 283.0, 296.0, 282.0, 294.0, 285.0, 291.0, 288.0, 286.0, 284.0, 285.0, 294.0, 292.0, 287.0, 288.0, 291.0, 289.0, 290.0, 264.0, 266.0, 59.0, 64.0, 288.0, 291.0, 319.0, 320.0, 288.0, 294.0, 288.0, 294.0, 266.0, 259.0, 285.0, 285.0, 284.0, 292.0, 289.0, 298.0, 287.0, 289.0, 261.0, 258.0, 291.0, 291.0, 296.0, 286.0, 290.0, 292.0, 289.0, 290.0, 281.0, 295.0, 260.0, 265.0, 283.0, 299.0, 284.0, 295.0, 256.0, 263.0, 283.0, 290.0, 280.0, 293.0, 260.0, 265.0, 31.0, 35.0, 210.0, 209.0, 289.0, 293.0, 289.0, 287.0, 261.0, 261.0, 292.0, 287.0, 284.0, 286.0, 257.0, 256.0, 255.0, 258.0, 285.0, 291.0, 294.0, 288.0, 257.0, 270.0, 290.0, 289.0, 291.0, 291.0, 282.0, 288.0, 293.0, 289.0, 291.0, 285.0, 316.0, 311.0, 288.0, 285.0, 292.0, 290.0, 290.0, 289.0, 291.0, 288.0, 293.0, 289.0, 291.0, 285.0, 282.0, 291.0, 263.0, 259.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6965010651464741, "mean_inference_ms": 1.2432963575523006, "mean_action_processing_ms": 0.13361169316252014, "mean_env_wait_ms": 0.8384959904466414, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10265600, "num_agent_steps_trained": 10265600, "num_env_steps_sampled": 5132800, "num_env_steps_trained": 5132800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5132800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10265600, "timers": {"training_iteration_time_ms": 3617.502, "learn_time_ms": 1119.217, "learn_throughput": 11436.564, "synch_weights_time_ms": 10.734}, "counters": {"num_env_steps_sampled": 5132800, "num_env_steps_trained": 5132800, "num_agent_steps_sampled": 10265600, "num_agent_steps_trained": 10265600}, "done": false, "episodes_total": 12832, "training_iteration": 401, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-33", "timestamp": 1666581993, "time_this_iter_s": 3.6597113609313965, "time_total_s": 1532.8216798305511, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1532.8216798305511, "timesteps_since_restore": 0, "iterations_since_restore": 401, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.959999999999997, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 193.6, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 172.69, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.3, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.9, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.22, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.69, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.8, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.49, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.41, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.96, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.06, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.8, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.49, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.8, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.49, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003041105344891548, "policy_loss": 0.002663327381014824, "vf_loss": 7.898676872253418, "vf_explained_var": 0.5853292942047119, "kl": 0.004315956961363554, "entropy": 0.8241794109344482, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5145600, "num_env_steps_trained": 5145600, "num_agent_steps_sampled": 10291200, "num_agent_steps_trained": 10291200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 559.89, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 279.945}, "custom_metrics": {"sparse_reward_mean": 193.6, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 172.69, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.3, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.9, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.22, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.69, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.8, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.49, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.41, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.96, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.06, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.8, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.49, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.8, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.49, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 582.0, 570.0, 576.0, 579.0, 579.0, 579.0, 582.0, 582.0, 570.0, 578.0, 579.0, 579.0, 570.0, 579.0, 579.0, 579.0, 579.0, 530.0, 123.0, 579.0, 639.0, 582.0, 582.0, 525.0, 570.0, 576.0, 587.0, 576.0, 519.0, 582.0, 582.0, 582.0, 579.0, 576.0, 525.0, 582.0, 579.0, 519.0, 573.0, 573.0, 525.0, 66.0, 419.0, 582.0, 576.0, 522.0, 579.0, 570.0, 513.0, 513.0, 576.0, 582.0, 527.0, 579.0, 582.0, 570.0, 582.0, 576.0, 627.0, 573.0, 582.0, 579.0, 579.0, 582.0, 576.0, 573.0, 522.0, 582.0, 579.0, 579.0, 570.0, 579.0, 579.0, 582.0, 587.0, 576.0, 582.0, 582.0, 522.0, 582.0, 582.0, 573.0, 579.0, 579.0, 573.0, 567.0, 522.0, 573.0, 573.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 522.0, 579.0, 567.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 282.0, 290.0, 292.0, 277.0, 293.0, 275.0, 301.0, 290.0, 289.0, 288.0, 291.0, 289.0, 290.0, 291.0, 291.0, 289.0, 293.0, 287.0, 283.0, 296.0, 282.0, 294.0, 285.0, 291.0, 288.0, 286.0, 284.0, 285.0, 294.0, 292.0, 287.0, 288.0, 291.0, 289.0, 290.0, 264.0, 266.0, 59.0, 64.0, 288.0, 291.0, 319.0, 320.0, 288.0, 294.0, 288.0, 294.0, 266.0, 259.0, 285.0, 285.0, 284.0, 292.0, 289.0, 298.0, 287.0, 289.0, 261.0, 258.0, 291.0, 291.0, 296.0, 286.0, 290.0, 292.0, 289.0, 290.0, 281.0, 295.0, 260.0, 265.0, 283.0, 299.0, 284.0, 295.0, 256.0, 263.0, 283.0, 290.0, 280.0, 293.0, 260.0, 265.0, 31.0, 35.0, 210.0, 209.0, 289.0, 293.0, 289.0, 287.0, 261.0, 261.0, 292.0, 287.0, 284.0, 286.0, 257.0, 256.0, 255.0, 258.0, 285.0, 291.0, 294.0, 288.0, 257.0, 270.0, 290.0, 289.0, 291.0, 291.0, 282.0, 288.0, 293.0, 289.0, 291.0, 285.0, 316.0, 311.0, 288.0, 285.0, 292.0, 290.0, 290.0, 289.0, 291.0, 288.0, 293.0, 289.0, 291.0, 285.0, 282.0, 291.0, 263.0, 259.0, 290.0, 292.0, 288.0, 291.0, 288.0, 291.0, 288.0, 282.0, 283.0, 296.0, 285.0, 294.0, 289.0, 293.0, 296.0, 291.0, 287.0, 289.0, 294.0, 288.0, 285.0, 297.0, 259.0, 263.0, 295.0, 287.0, 290.0, 292.0, 290.0, 283.0, 290.0, 289.0, 288.0, 291.0, 283.0, 290.0, 277.0, 290.0, 254.0, 268.0, 282.0, 291.0, 292.0, 281.0, 290.0, 289.0, 285.0, 294.0, 291.0, 288.0, 291.0, 288.0, 289.0, 290.0, 289.0, 287.0, 254.0, 268.0, 289.0, 290.0, 271.0, 296.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.696437434705532, "mean_inference_ms": 1.2431677188040304, "mean_action_processing_ms": 0.1336046089583616, "mean_env_wait_ms": 0.8384268867865966, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 559.89, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 279.945}, "hist_stats": {"episode_reward": [576.0, 582.0, 570.0, 576.0, 579.0, 579.0, 579.0, 582.0, 582.0, 570.0, 578.0, 579.0, 579.0, 570.0, 579.0, 579.0, 579.0, 579.0, 530.0, 123.0, 579.0, 639.0, 582.0, 582.0, 525.0, 570.0, 576.0, 587.0, 576.0, 519.0, 582.0, 582.0, 582.0, 579.0, 576.0, 525.0, 582.0, 579.0, 519.0, 573.0, 573.0, 525.0, 66.0, 419.0, 582.0, 576.0, 522.0, 579.0, 570.0, 513.0, 513.0, 576.0, 582.0, 527.0, 579.0, 582.0, 570.0, 582.0, 576.0, 627.0, 573.0, 582.0, 579.0, 579.0, 582.0, 576.0, 573.0, 522.0, 582.0, 579.0, 579.0, 570.0, 579.0, 579.0, 582.0, 587.0, 576.0, 582.0, 582.0, 522.0, 582.0, 582.0, 573.0, 579.0, 579.0, 573.0, 567.0, 522.0, 573.0, 573.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 522.0, 579.0, 567.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 282.0, 290.0, 292.0, 277.0, 293.0, 275.0, 301.0, 290.0, 289.0, 288.0, 291.0, 289.0, 290.0, 291.0, 291.0, 289.0, 293.0, 287.0, 283.0, 296.0, 282.0, 294.0, 285.0, 291.0, 288.0, 286.0, 284.0, 285.0, 294.0, 292.0, 287.0, 288.0, 291.0, 289.0, 290.0, 264.0, 266.0, 59.0, 64.0, 288.0, 291.0, 319.0, 320.0, 288.0, 294.0, 288.0, 294.0, 266.0, 259.0, 285.0, 285.0, 284.0, 292.0, 289.0, 298.0, 287.0, 289.0, 261.0, 258.0, 291.0, 291.0, 296.0, 286.0, 290.0, 292.0, 289.0, 290.0, 281.0, 295.0, 260.0, 265.0, 283.0, 299.0, 284.0, 295.0, 256.0, 263.0, 283.0, 290.0, 280.0, 293.0, 260.0, 265.0, 31.0, 35.0, 210.0, 209.0, 289.0, 293.0, 289.0, 287.0, 261.0, 261.0, 292.0, 287.0, 284.0, 286.0, 257.0, 256.0, 255.0, 258.0, 285.0, 291.0, 294.0, 288.0, 257.0, 270.0, 290.0, 289.0, 291.0, 291.0, 282.0, 288.0, 293.0, 289.0, 291.0, 285.0, 316.0, 311.0, 288.0, 285.0, 292.0, 290.0, 290.0, 289.0, 291.0, 288.0, 293.0, 289.0, 291.0, 285.0, 282.0, 291.0, 263.0, 259.0, 290.0, 292.0, 288.0, 291.0, 288.0, 291.0, 288.0, 282.0, 283.0, 296.0, 285.0, 294.0, 289.0, 293.0, 296.0, 291.0, 287.0, 289.0, 294.0, 288.0, 285.0, 297.0, 259.0, 263.0, 295.0, 287.0, 290.0, 292.0, 290.0, 283.0, 290.0, 289.0, 288.0, 291.0, 283.0, 290.0, 277.0, 290.0, 254.0, 268.0, 282.0, 291.0, 292.0, 281.0, 290.0, 289.0, 285.0, 294.0, 291.0, 288.0, 291.0, 288.0, 289.0, 290.0, 289.0, 287.0, 254.0, 268.0, 289.0, 290.0, 271.0, 296.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.696437434705532, "mean_inference_ms": 1.2431677188040304, "mean_action_processing_ms": 0.1336046089583616, "mean_env_wait_ms": 0.8384268867865966, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10291200, "num_agent_steps_trained": 10291200, "num_env_steps_sampled": 5145600, "num_env_steps_trained": 5145600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5145600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10291200, "timers": {"training_iteration_time_ms": 3608.289, "learn_time_ms": 1113.078, "learn_throughput": 11499.647, "synch_weights_time_ms": 11.262}, "counters": {"num_env_steps_sampled": 5145600, "num_env_steps_trained": 5145600, "num_agent_steps_sampled": 10291200, "num_agent_steps_trained": 10291200}, "done": false, "episodes_total": 12864, "training_iteration": 402, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-37", "timestamp": 1666581997, "time_this_iter_s": 3.6457085609436035, "time_total_s": 1536.4673883914948, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1536.4673883914948, "timesteps_since_restore": 0, "iterations_since_restore": 402, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.516666666666666, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 173.79, "shaped_reward_min": 26, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.56, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.84, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.45, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.62, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.03, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.4, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.31, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.05, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.03, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.4, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.03, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.4, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0022524138912558556, "policy_loss": -0.0026310046669095755, "vf_loss": 7.907987117767334, "vf_explained_var": 0.5833997130393982, "kl": 0.002337719313800335, "entropy": 0.824416995048523, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5158400, "num_env_steps_trained": 5158400, "num_agent_steps_sampled": 10316800, "num_agent_steps_trained": 10316800}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 66.0, "episode_reward_mean": 563.39, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 281.695}, "custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 173.79, "shaped_reward_min": 26, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.56, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.84, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.45, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.62, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.03, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.4, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.31, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.05, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.03, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.4, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.03, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.4, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 576.0, 525.0, 582.0, 579.0, 519.0, 573.0, 573.0, 525.0, 66.0, 419.0, 582.0, 576.0, 522.0, 579.0, 570.0, 513.0, 513.0, 576.0, 582.0, 527.0, 579.0, 582.0, 570.0, 582.0, 576.0, 627.0, 573.0, 582.0, 579.0, 579.0, 582.0, 576.0, 573.0, 522.0, 582.0, 579.0, 579.0, 570.0, 579.0, 579.0, 582.0, 587.0, 576.0, 582.0, 582.0, 522.0, 582.0, 582.0, 573.0, 579.0, 579.0, 573.0, 567.0, 522.0, 573.0, 573.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 522.0, 579.0, 567.0, 579.0, 579.0, 582.0, 579.0, 525.0, 570.0, 579.0, 530.0, 525.0, 573.0, 582.0, 584.0, 573.0, 573.0, 573.0, 527.0, 587.0, 527.0, 570.0, 579.0, 576.0, 579.0, 582.0, 624.0, 579.0, 576.0, 579.0, 579.0, 579.0, 576.0, 587.0, 590.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 289.0, 290.0, 281.0, 295.0, 260.0, 265.0, 283.0, 299.0, 284.0, 295.0, 256.0, 263.0, 283.0, 290.0, 280.0, 293.0, 260.0, 265.0, 31.0, 35.0, 210.0, 209.0, 289.0, 293.0, 289.0, 287.0, 261.0, 261.0, 292.0, 287.0, 284.0, 286.0, 257.0, 256.0, 255.0, 258.0, 285.0, 291.0, 294.0, 288.0, 257.0, 270.0, 290.0, 289.0, 291.0, 291.0, 282.0, 288.0, 293.0, 289.0, 291.0, 285.0, 316.0, 311.0, 288.0, 285.0, 292.0, 290.0, 290.0, 289.0, 291.0, 288.0, 293.0, 289.0, 291.0, 285.0, 282.0, 291.0, 263.0, 259.0, 290.0, 292.0, 288.0, 291.0, 288.0, 291.0, 288.0, 282.0, 283.0, 296.0, 285.0, 294.0, 289.0, 293.0, 296.0, 291.0, 287.0, 289.0, 294.0, 288.0, 285.0, 297.0, 259.0, 263.0, 295.0, 287.0, 290.0, 292.0, 290.0, 283.0, 290.0, 289.0, 288.0, 291.0, 283.0, 290.0, 277.0, 290.0, 254.0, 268.0, 282.0, 291.0, 292.0, 281.0, 290.0, 289.0, 285.0, 294.0, 291.0, 288.0, 291.0, 288.0, 289.0, 290.0, 289.0, 287.0, 254.0, 268.0, 289.0, 290.0, 271.0, 296.0, 289.0, 290.0, 293.0, 286.0, 294.0, 288.0, 285.0, 294.0, 260.0, 265.0, 277.0, 293.0, 286.0, 293.0, 262.0, 268.0, 266.0, 259.0, 286.0, 287.0, 291.0, 291.0, 290.0, 294.0, 284.0, 289.0, 279.0, 294.0, 286.0, 287.0, 260.0, 267.0, 298.0, 289.0, 261.0, 266.0, 282.0, 288.0, 281.0, 298.0, 293.0, 283.0, 288.0, 291.0, 297.0, 285.0, 316.0, 308.0, 294.0, 285.0, 283.0, 293.0, 281.0, 298.0, 293.0, 286.0, 291.0, 288.0, 285.0, 291.0, 301.0, 286.0, 297.0, 293.0, 288.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6963864365610132, "mean_inference_ms": 1.243033695858959, "mean_action_processing_ms": 0.1335977157557835, "mean_env_wait_ms": 0.8383555357581876, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 66.0, "episode_reward_mean": 563.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 281.695}, "hist_stats": {"episode_reward": [582.0, 579.0, 576.0, 525.0, 582.0, 579.0, 519.0, 573.0, 573.0, 525.0, 66.0, 419.0, 582.0, 576.0, 522.0, 579.0, 570.0, 513.0, 513.0, 576.0, 582.0, 527.0, 579.0, 582.0, 570.0, 582.0, 576.0, 627.0, 573.0, 582.0, 579.0, 579.0, 582.0, 576.0, 573.0, 522.0, 582.0, 579.0, 579.0, 570.0, 579.0, 579.0, 582.0, 587.0, 576.0, 582.0, 582.0, 522.0, 582.0, 582.0, 573.0, 579.0, 579.0, 573.0, 567.0, 522.0, 573.0, 573.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 522.0, 579.0, 567.0, 579.0, 579.0, 582.0, 579.0, 525.0, 570.0, 579.0, 530.0, 525.0, 573.0, 582.0, 584.0, 573.0, 573.0, 573.0, 527.0, 587.0, 527.0, 570.0, 579.0, 576.0, 579.0, 582.0, 624.0, 579.0, 576.0, 579.0, 579.0, 579.0, 576.0, 587.0, 590.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 289.0, 290.0, 281.0, 295.0, 260.0, 265.0, 283.0, 299.0, 284.0, 295.0, 256.0, 263.0, 283.0, 290.0, 280.0, 293.0, 260.0, 265.0, 31.0, 35.0, 210.0, 209.0, 289.0, 293.0, 289.0, 287.0, 261.0, 261.0, 292.0, 287.0, 284.0, 286.0, 257.0, 256.0, 255.0, 258.0, 285.0, 291.0, 294.0, 288.0, 257.0, 270.0, 290.0, 289.0, 291.0, 291.0, 282.0, 288.0, 293.0, 289.0, 291.0, 285.0, 316.0, 311.0, 288.0, 285.0, 292.0, 290.0, 290.0, 289.0, 291.0, 288.0, 293.0, 289.0, 291.0, 285.0, 282.0, 291.0, 263.0, 259.0, 290.0, 292.0, 288.0, 291.0, 288.0, 291.0, 288.0, 282.0, 283.0, 296.0, 285.0, 294.0, 289.0, 293.0, 296.0, 291.0, 287.0, 289.0, 294.0, 288.0, 285.0, 297.0, 259.0, 263.0, 295.0, 287.0, 290.0, 292.0, 290.0, 283.0, 290.0, 289.0, 288.0, 291.0, 283.0, 290.0, 277.0, 290.0, 254.0, 268.0, 282.0, 291.0, 292.0, 281.0, 290.0, 289.0, 285.0, 294.0, 291.0, 288.0, 291.0, 288.0, 289.0, 290.0, 289.0, 287.0, 254.0, 268.0, 289.0, 290.0, 271.0, 296.0, 289.0, 290.0, 293.0, 286.0, 294.0, 288.0, 285.0, 294.0, 260.0, 265.0, 277.0, 293.0, 286.0, 293.0, 262.0, 268.0, 266.0, 259.0, 286.0, 287.0, 291.0, 291.0, 290.0, 294.0, 284.0, 289.0, 279.0, 294.0, 286.0, 287.0, 260.0, 267.0, 298.0, 289.0, 261.0, 266.0, 282.0, 288.0, 281.0, 298.0, 293.0, 283.0, 288.0, 291.0, 297.0, 285.0, 316.0, 308.0, 294.0, 285.0, 283.0, 293.0, 281.0, 298.0, 293.0, 286.0, 291.0, 288.0, 285.0, 291.0, 301.0, 286.0, 297.0, 293.0, 288.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6963864365610132, "mean_inference_ms": 1.243033695858959, "mean_action_processing_ms": 0.1335977157557835, "mean_env_wait_ms": 0.8383555357581876, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10316800, "num_agent_steps_trained": 10316800, "num_env_steps_sampled": 5158400, "num_env_steps_trained": 5158400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5158400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10316800, "timers": {"training_iteration_time_ms": 3602.134, "learn_time_ms": 1104.185, "learn_throughput": 11592.264, "synch_weights_time_ms": 11.262}, "counters": {"num_env_steps_sampled": 5158400, "num_env_steps_trained": 5158400, "num_agent_steps_sampled": 10316800, "num_agent_steps_trained": 10316800}, "done": false, "episodes_total": 12896, "training_iteration": 403, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-40", "timestamp": 1666582000, "time_this_iter_s": 3.616574764251709, "time_total_s": 1540.0839631557465, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1540.0839631557465, "timesteps_since_restore": 0, "iterations_since_restore": 403, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.68, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.82, "shaped_reward_min": 142, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.5, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.15, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.4, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.98, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.69, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.24, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.12, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.69, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.69, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0013055593008175492, "policy_loss": -0.0016851406544446945, "vf_loss": 7.959023952484131, "vf_explained_var": 0.5701768398284912, "kl": 0.0024630685802549124, "entropy": 0.8326427340507507, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5171200, "num_env_steps_trained": 5171200, "num_agent_steps_sampled": 10342400, "num_agent_steps_trained": 10342400}, "sampler_results": {"episode_reward_max": 624.0, "episode_reward_min": 462.0, "episode_reward_mean": 567.22, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 283.61}, "custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.82, "shaped_reward_min": 142, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.5, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.15, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.4, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.98, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.69, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.24, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.12, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.69, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.69, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 576.0, 573.0, 522.0, 582.0, 579.0, 579.0, 570.0, 579.0, 579.0, 582.0, 587.0, 576.0, 582.0, 582.0, 522.0, 582.0, 582.0, 573.0, 579.0, 579.0, 573.0, 567.0, 522.0, 573.0, 573.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 522.0, 579.0, 567.0, 579.0, 579.0, 582.0, 579.0, 525.0, 570.0, 579.0, 530.0, 525.0, 573.0, 582.0, 584.0, 573.0, 573.0, 573.0, 527.0, 587.0, 527.0, 570.0, 579.0, 576.0, 579.0, 582.0, 624.0, 579.0, 576.0, 579.0, 579.0, 579.0, 576.0, 587.0, 590.0, 576.0, 522.0, 579.0, 525.0, 473.0, 570.0, 576.0, 522.0, 579.0, 573.0, 573.0, 579.0, 573.0, 579.0, 519.0, 576.0, 582.0, 576.0, 525.0, 570.0, 462.0, 582.0, 579.0, 573.0, 570.0, 582.0, 579.0, 582.0, 527.0, 570.0, 579.0, 527.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 291.0, 285.0, 282.0, 291.0, 263.0, 259.0, 290.0, 292.0, 288.0, 291.0, 288.0, 291.0, 288.0, 282.0, 283.0, 296.0, 285.0, 294.0, 289.0, 293.0, 296.0, 291.0, 287.0, 289.0, 294.0, 288.0, 285.0, 297.0, 259.0, 263.0, 295.0, 287.0, 290.0, 292.0, 290.0, 283.0, 290.0, 289.0, 288.0, 291.0, 283.0, 290.0, 277.0, 290.0, 254.0, 268.0, 282.0, 291.0, 292.0, 281.0, 290.0, 289.0, 285.0, 294.0, 291.0, 288.0, 291.0, 288.0, 289.0, 290.0, 289.0, 287.0, 254.0, 268.0, 289.0, 290.0, 271.0, 296.0, 289.0, 290.0, 293.0, 286.0, 294.0, 288.0, 285.0, 294.0, 260.0, 265.0, 277.0, 293.0, 286.0, 293.0, 262.0, 268.0, 266.0, 259.0, 286.0, 287.0, 291.0, 291.0, 290.0, 294.0, 284.0, 289.0, 279.0, 294.0, 286.0, 287.0, 260.0, 267.0, 298.0, 289.0, 261.0, 266.0, 282.0, 288.0, 281.0, 298.0, 293.0, 283.0, 288.0, 291.0, 297.0, 285.0, 316.0, 308.0, 294.0, 285.0, 283.0, 293.0, 281.0, 298.0, 293.0, 286.0, 291.0, 288.0, 285.0, 291.0, 301.0, 286.0, 297.0, 293.0, 288.0, 288.0, 263.0, 259.0, 289.0, 290.0, 259.0, 266.0, 235.0, 238.0, 285.0, 285.0, 281.0, 295.0, 261.0, 261.0, 285.0, 294.0, 290.0, 283.0, 289.0, 284.0, 292.0, 287.0, 280.0, 293.0, 291.0, 288.0, 262.0, 257.0, 287.0, 289.0, 285.0, 297.0, 286.0, 290.0, 264.0, 261.0, 279.0, 291.0, 232.0, 230.0, 293.0, 289.0, 288.0, 291.0, 287.0, 286.0, 281.0, 289.0, 295.0, 287.0, 280.0, 299.0, 287.0, 295.0, 264.0, 263.0, 284.0, 286.0, 293.0, 286.0, 258.0, 269.0, 278.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6963418754833549, "mean_inference_ms": 1.2429065862961082, "mean_action_processing_ms": 0.13359205131518181, "mean_env_wait_ms": 0.8382902032666086, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 624.0, "episode_reward_min": 462.0, "episode_reward_mean": 567.22, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 283.61}, "hist_stats": {"episode_reward": [582.0, 576.0, 573.0, 522.0, 582.0, 579.0, 579.0, 570.0, 579.0, 579.0, 582.0, 587.0, 576.0, 582.0, 582.0, 522.0, 582.0, 582.0, 573.0, 579.0, 579.0, 573.0, 567.0, 522.0, 573.0, 573.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 522.0, 579.0, 567.0, 579.0, 579.0, 582.0, 579.0, 525.0, 570.0, 579.0, 530.0, 525.0, 573.0, 582.0, 584.0, 573.0, 573.0, 573.0, 527.0, 587.0, 527.0, 570.0, 579.0, 576.0, 579.0, 582.0, 624.0, 579.0, 576.0, 579.0, 579.0, 579.0, 576.0, 587.0, 590.0, 576.0, 522.0, 579.0, 525.0, 473.0, 570.0, 576.0, 522.0, 579.0, 573.0, 573.0, 579.0, 573.0, 579.0, 519.0, 576.0, 582.0, 576.0, 525.0, 570.0, 462.0, 582.0, 579.0, 573.0, 570.0, 582.0, 579.0, 582.0, 527.0, 570.0, 579.0, 527.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 291.0, 285.0, 282.0, 291.0, 263.0, 259.0, 290.0, 292.0, 288.0, 291.0, 288.0, 291.0, 288.0, 282.0, 283.0, 296.0, 285.0, 294.0, 289.0, 293.0, 296.0, 291.0, 287.0, 289.0, 294.0, 288.0, 285.0, 297.0, 259.0, 263.0, 295.0, 287.0, 290.0, 292.0, 290.0, 283.0, 290.0, 289.0, 288.0, 291.0, 283.0, 290.0, 277.0, 290.0, 254.0, 268.0, 282.0, 291.0, 292.0, 281.0, 290.0, 289.0, 285.0, 294.0, 291.0, 288.0, 291.0, 288.0, 289.0, 290.0, 289.0, 287.0, 254.0, 268.0, 289.0, 290.0, 271.0, 296.0, 289.0, 290.0, 293.0, 286.0, 294.0, 288.0, 285.0, 294.0, 260.0, 265.0, 277.0, 293.0, 286.0, 293.0, 262.0, 268.0, 266.0, 259.0, 286.0, 287.0, 291.0, 291.0, 290.0, 294.0, 284.0, 289.0, 279.0, 294.0, 286.0, 287.0, 260.0, 267.0, 298.0, 289.0, 261.0, 266.0, 282.0, 288.0, 281.0, 298.0, 293.0, 283.0, 288.0, 291.0, 297.0, 285.0, 316.0, 308.0, 294.0, 285.0, 283.0, 293.0, 281.0, 298.0, 293.0, 286.0, 291.0, 288.0, 285.0, 291.0, 301.0, 286.0, 297.0, 293.0, 288.0, 288.0, 263.0, 259.0, 289.0, 290.0, 259.0, 266.0, 235.0, 238.0, 285.0, 285.0, 281.0, 295.0, 261.0, 261.0, 285.0, 294.0, 290.0, 283.0, 289.0, 284.0, 292.0, 287.0, 280.0, 293.0, 291.0, 288.0, 262.0, 257.0, 287.0, 289.0, 285.0, 297.0, 286.0, 290.0, 264.0, 261.0, 279.0, 291.0, 232.0, 230.0, 293.0, 289.0, 288.0, 291.0, 287.0, 286.0, 281.0, 289.0, 295.0, 287.0, 280.0, 299.0, 287.0, 295.0, 264.0, 263.0, 284.0, 286.0, 293.0, 286.0, 258.0, 269.0, 278.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6963418754833549, "mean_inference_ms": 1.2429065862961082, "mean_action_processing_ms": 0.13359205131518181, "mean_env_wait_ms": 0.8382902032666086, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10342400, "num_agent_steps_trained": 10342400, "num_env_steps_sampled": 5171200, "num_env_steps_trained": 5171200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5171200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10342400, "timers": {"training_iteration_time_ms": 3612.65, "learn_time_ms": 1108.936, "learn_throughput": 11542.593, "synch_weights_time_ms": 11.868}, "counters": {"num_env_steps_sampled": 5171200, "num_env_steps_trained": 5171200, "num_agent_steps_sampled": 10342400, "num_agent_steps_trained": 10342400}, "done": false, "episodes_total": 12928, "training_iteration": 404, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-44", "timestamp": 1666582004, "time_this_iter_s": 3.6314189434051514, "time_total_s": 1543.7153820991516, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1543.7153820991516, "timesteps_since_restore": 0, "iterations_since_restore": 404, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.96, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.9, "shaped_reward_min": 142, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.88, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.81, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.77, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.65, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.3, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.26, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.25, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.09, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.96, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.3, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.26, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.3, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.26, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002099148230627179, "policy_loss": -0.0024678613990545273, "vf_loss": 7.786614894866943, "vf_explained_var": 0.5827337503433228, "kl": 0.002610996598377824, "entropy": 0.8198966979980469, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5184000, "num_env_steps_trained": 5184000, "num_agent_steps_sampled": 10368000, "num_agent_steps_trained": 10368000}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 462.0, "episode_reward_mean": 568.1, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 284.05}, "custom_metrics": {"sparse_reward_mean": 196.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.9, "shaped_reward_min": 142, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.88, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.81, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.77, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.65, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.3, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.26, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.25, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.09, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.96, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.3, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.26, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.3, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.26, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 579.0, 567.0, 579.0, 579.0, 582.0, 579.0, 525.0, 570.0, 579.0, 530.0, 525.0, 573.0, 582.0, 584.0, 573.0, 573.0, 573.0, 527.0, 587.0, 527.0, 570.0, 579.0, 576.0, 579.0, 582.0, 624.0, 579.0, 576.0, 579.0, 579.0, 579.0, 576.0, 587.0, 590.0, 576.0, 522.0, 579.0, 525.0, 473.0, 570.0, 576.0, 522.0, 579.0, 573.0, 573.0, 579.0, 573.0, 579.0, 519.0, 576.0, 582.0, 576.0, 525.0, 570.0, 462.0, 582.0, 579.0, 573.0, 570.0, 582.0, 579.0, 582.0, 527.0, 570.0, 579.0, 527.0, 567.0, 579.0, 570.0, 582.0, 570.0, 582.0, 579.0, 576.0, 579.0, 630.0, 576.0, 525.0, 579.0, 530.0, 570.0, 570.0, 576.0, 570.0, 576.0, 573.0, 579.0, 573.0, 582.0, 584.0, 582.0, 582.0, 587.0, 573.0, 573.0, 579.0, 579.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 268.0, 289.0, 290.0, 271.0, 296.0, 289.0, 290.0, 293.0, 286.0, 294.0, 288.0, 285.0, 294.0, 260.0, 265.0, 277.0, 293.0, 286.0, 293.0, 262.0, 268.0, 266.0, 259.0, 286.0, 287.0, 291.0, 291.0, 290.0, 294.0, 284.0, 289.0, 279.0, 294.0, 286.0, 287.0, 260.0, 267.0, 298.0, 289.0, 261.0, 266.0, 282.0, 288.0, 281.0, 298.0, 293.0, 283.0, 288.0, 291.0, 297.0, 285.0, 316.0, 308.0, 294.0, 285.0, 283.0, 293.0, 281.0, 298.0, 293.0, 286.0, 291.0, 288.0, 285.0, 291.0, 301.0, 286.0, 297.0, 293.0, 288.0, 288.0, 263.0, 259.0, 289.0, 290.0, 259.0, 266.0, 235.0, 238.0, 285.0, 285.0, 281.0, 295.0, 261.0, 261.0, 285.0, 294.0, 290.0, 283.0, 289.0, 284.0, 292.0, 287.0, 280.0, 293.0, 291.0, 288.0, 262.0, 257.0, 287.0, 289.0, 285.0, 297.0, 286.0, 290.0, 264.0, 261.0, 279.0, 291.0, 232.0, 230.0, 293.0, 289.0, 288.0, 291.0, 287.0, 286.0, 281.0, 289.0, 295.0, 287.0, 280.0, 299.0, 287.0, 295.0, 264.0, 263.0, 284.0, 286.0, 293.0, 286.0, 258.0, 269.0, 278.0, 289.0, 285.0, 294.0, 286.0, 284.0, 292.0, 290.0, 279.0, 291.0, 292.0, 290.0, 287.0, 292.0, 285.0, 291.0, 280.0, 299.0, 317.0, 313.0, 284.0, 292.0, 263.0, 262.0, 283.0, 296.0, 269.0, 261.0, 282.0, 288.0, 280.0, 290.0, 290.0, 286.0, 290.0, 280.0, 288.0, 288.0, 286.0, 287.0, 291.0, 288.0, 290.0, 283.0, 290.0, 292.0, 292.0, 292.0, 288.0, 294.0, 284.0, 298.0, 286.0, 301.0, 291.0, 282.0, 281.0, 292.0, 293.0, 286.0, 290.0, 289.0, 281.0, 292.0, 283.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6963077094515928, "mean_inference_ms": 1.2427992128696683, "mean_action_processing_ms": 0.13358854048906127, "mean_env_wait_ms": 0.8382407331104629, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 462.0, "episode_reward_mean": 568.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 284.05}, "hist_stats": {"episode_reward": [522.0, 579.0, 567.0, 579.0, 579.0, 582.0, 579.0, 525.0, 570.0, 579.0, 530.0, 525.0, 573.0, 582.0, 584.0, 573.0, 573.0, 573.0, 527.0, 587.0, 527.0, 570.0, 579.0, 576.0, 579.0, 582.0, 624.0, 579.0, 576.0, 579.0, 579.0, 579.0, 576.0, 587.0, 590.0, 576.0, 522.0, 579.0, 525.0, 473.0, 570.0, 576.0, 522.0, 579.0, 573.0, 573.0, 579.0, 573.0, 579.0, 519.0, 576.0, 582.0, 576.0, 525.0, 570.0, 462.0, 582.0, 579.0, 573.0, 570.0, 582.0, 579.0, 582.0, 527.0, 570.0, 579.0, 527.0, 567.0, 579.0, 570.0, 582.0, 570.0, 582.0, 579.0, 576.0, 579.0, 630.0, 576.0, 525.0, 579.0, 530.0, 570.0, 570.0, 576.0, 570.0, 576.0, 573.0, 579.0, 573.0, 582.0, 584.0, 582.0, 582.0, 587.0, 573.0, 573.0, 579.0, 579.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 268.0, 289.0, 290.0, 271.0, 296.0, 289.0, 290.0, 293.0, 286.0, 294.0, 288.0, 285.0, 294.0, 260.0, 265.0, 277.0, 293.0, 286.0, 293.0, 262.0, 268.0, 266.0, 259.0, 286.0, 287.0, 291.0, 291.0, 290.0, 294.0, 284.0, 289.0, 279.0, 294.0, 286.0, 287.0, 260.0, 267.0, 298.0, 289.0, 261.0, 266.0, 282.0, 288.0, 281.0, 298.0, 293.0, 283.0, 288.0, 291.0, 297.0, 285.0, 316.0, 308.0, 294.0, 285.0, 283.0, 293.0, 281.0, 298.0, 293.0, 286.0, 291.0, 288.0, 285.0, 291.0, 301.0, 286.0, 297.0, 293.0, 288.0, 288.0, 263.0, 259.0, 289.0, 290.0, 259.0, 266.0, 235.0, 238.0, 285.0, 285.0, 281.0, 295.0, 261.0, 261.0, 285.0, 294.0, 290.0, 283.0, 289.0, 284.0, 292.0, 287.0, 280.0, 293.0, 291.0, 288.0, 262.0, 257.0, 287.0, 289.0, 285.0, 297.0, 286.0, 290.0, 264.0, 261.0, 279.0, 291.0, 232.0, 230.0, 293.0, 289.0, 288.0, 291.0, 287.0, 286.0, 281.0, 289.0, 295.0, 287.0, 280.0, 299.0, 287.0, 295.0, 264.0, 263.0, 284.0, 286.0, 293.0, 286.0, 258.0, 269.0, 278.0, 289.0, 285.0, 294.0, 286.0, 284.0, 292.0, 290.0, 279.0, 291.0, 292.0, 290.0, 287.0, 292.0, 285.0, 291.0, 280.0, 299.0, 317.0, 313.0, 284.0, 292.0, 263.0, 262.0, 283.0, 296.0, 269.0, 261.0, 282.0, 288.0, 280.0, 290.0, 290.0, 286.0, 290.0, 280.0, 288.0, 288.0, 286.0, 287.0, 291.0, 288.0, 290.0, 283.0, 290.0, 292.0, 292.0, 292.0, 288.0, 294.0, 284.0, 298.0, 286.0, 301.0, 291.0, 282.0, 281.0, 292.0, 293.0, 286.0, 290.0, 289.0, 281.0, 292.0, 283.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6963077094515928, "mean_inference_ms": 1.2427992128696683, "mean_action_processing_ms": 0.13358854048906127, "mean_env_wait_ms": 0.8382407331104629, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10368000, "num_agent_steps_trained": 10368000, "num_env_steps_sampled": 5184000, "num_env_steps_trained": 5184000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5184000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10368000, "timers": {"training_iteration_time_ms": 3632.726, "learn_time_ms": 1118.741, "learn_throughput": 11441.431, "synch_weights_time_ms": 11.388}, "counters": {"num_env_steps_sampled": 5184000, "num_env_steps_trained": 5184000, "num_agent_steps_sampled": 10368000, "num_agent_steps_trained": 10368000}, "done": false, "episodes_total": 12960, "training_iteration": 405, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-48", "timestamp": 1666582008, "time_this_iter_s": 3.759228229522705, "time_total_s": 1547.4746103286743, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1547.4746103286743, "timesteps_since_restore": 0, "iterations_since_restore": 405, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.95, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 173.99, "shaped_reward_min": 142, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.59, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.93, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.49, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.78, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.04, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.44, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.21, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.04, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.44, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.04, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.44, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004778525326400995, "policy_loss": 0.00010655797086656094, "vf_loss": 7.847720146179199, "vf_explained_var": 0.5784578919410706, "kl": 0.002981501165777445, "entropy": 0.8269562721252441, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5196800, "num_env_steps_trained": 5196800, "num_agent_steps_sampled": 10393600, "num_agent_steps_trained": 10393600}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 462.0, "episode_reward_mean": 565.59, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 282.795}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 173.99, "shaped_reward_min": 142, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.59, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.93, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.49, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.78, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.04, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.44, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.21, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.04, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.44, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.04, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.44, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 587.0, 590.0, 576.0, 522.0, 579.0, 525.0, 473.0, 570.0, 576.0, 522.0, 579.0, 573.0, 573.0, 579.0, 573.0, 579.0, 519.0, 576.0, 582.0, 576.0, 525.0, 570.0, 462.0, 582.0, 579.0, 573.0, 570.0, 582.0, 579.0, 582.0, 527.0, 570.0, 579.0, 527.0, 567.0, 579.0, 570.0, 582.0, 570.0, 582.0, 579.0, 576.0, 579.0, 630.0, 576.0, 525.0, 579.0, 530.0, 570.0, 570.0, 576.0, 570.0, 576.0, 573.0, 579.0, 573.0, 582.0, 584.0, 582.0, 582.0, 587.0, 573.0, 573.0, 579.0, 579.0, 573.0, 576.0, 576.0, 513.0, 579.0, 579.0, 525.0, 519.0, 579.0, 576.0, 579.0, 569.0, 579.0, 519.0, 582.0, 576.0, 582.0, 579.0, 579.0, 579.0, 525.0, 576.0, 522.0, 525.0, 576.0, 573.0, 573.0, 522.0, 579.0, 525.0, 573.0, 579.0, 570.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 301.0, 286.0, 297.0, 293.0, 288.0, 288.0, 263.0, 259.0, 289.0, 290.0, 259.0, 266.0, 235.0, 238.0, 285.0, 285.0, 281.0, 295.0, 261.0, 261.0, 285.0, 294.0, 290.0, 283.0, 289.0, 284.0, 292.0, 287.0, 280.0, 293.0, 291.0, 288.0, 262.0, 257.0, 287.0, 289.0, 285.0, 297.0, 286.0, 290.0, 264.0, 261.0, 279.0, 291.0, 232.0, 230.0, 293.0, 289.0, 288.0, 291.0, 287.0, 286.0, 281.0, 289.0, 295.0, 287.0, 280.0, 299.0, 287.0, 295.0, 264.0, 263.0, 284.0, 286.0, 293.0, 286.0, 258.0, 269.0, 278.0, 289.0, 285.0, 294.0, 286.0, 284.0, 292.0, 290.0, 279.0, 291.0, 292.0, 290.0, 287.0, 292.0, 285.0, 291.0, 280.0, 299.0, 317.0, 313.0, 284.0, 292.0, 263.0, 262.0, 283.0, 296.0, 269.0, 261.0, 282.0, 288.0, 280.0, 290.0, 290.0, 286.0, 290.0, 280.0, 288.0, 288.0, 286.0, 287.0, 291.0, 288.0, 290.0, 283.0, 290.0, 292.0, 292.0, 292.0, 288.0, 294.0, 284.0, 298.0, 286.0, 301.0, 291.0, 282.0, 281.0, 292.0, 293.0, 286.0, 290.0, 289.0, 281.0, 292.0, 283.0, 293.0, 286.0, 290.0, 256.0, 257.0, 290.0, 289.0, 289.0, 290.0, 270.0, 255.0, 260.0, 259.0, 290.0, 289.0, 291.0, 285.0, 286.0, 293.0, 285.0, 284.0, 291.0, 288.0, 253.0, 266.0, 291.0, 291.0, 290.0, 286.0, 288.0, 294.0, 285.0, 294.0, 288.0, 291.0, 293.0, 286.0, 262.0, 263.0, 291.0, 285.0, 272.0, 250.0, 262.0, 263.0, 286.0, 290.0, 286.0, 287.0, 289.0, 284.0, 267.0, 255.0, 292.0, 287.0, 259.0, 266.0, 285.0, 288.0, 288.0, 291.0, 281.0, 289.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6962743444013426, "mean_inference_ms": 1.2426879235154644, "mean_action_processing_ms": 0.13358549381719295, "mean_env_wait_ms": 0.8381940028659765, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 462.0, "episode_reward_mean": 565.59, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 282.795}, "hist_stats": {"episode_reward": [576.0, 587.0, 590.0, 576.0, 522.0, 579.0, 525.0, 473.0, 570.0, 576.0, 522.0, 579.0, 573.0, 573.0, 579.0, 573.0, 579.0, 519.0, 576.0, 582.0, 576.0, 525.0, 570.0, 462.0, 582.0, 579.0, 573.0, 570.0, 582.0, 579.0, 582.0, 527.0, 570.0, 579.0, 527.0, 567.0, 579.0, 570.0, 582.0, 570.0, 582.0, 579.0, 576.0, 579.0, 630.0, 576.0, 525.0, 579.0, 530.0, 570.0, 570.0, 576.0, 570.0, 576.0, 573.0, 579.0, 573.0, 582.0, 584.0, 582.0, 582.0, 587.0, 573.0, 573.0, 579.0, 579.0, 573.0, 576.0, 576.0, 513.0, 579.0, 579.0, 525.0, 519.0, 579.0, 576.0, 579.0, 569.0, 579.0, 519.0, 582.0, 576.0, 582.0, 579.0, 579.0, 579.0, 525.0, 576.0, 522.0, 525.0, 576.0, 573.0, 573.0, 522.0, 579.0, 525.0, 573.0, 579.0, 570.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 301.0, 286.0, 297.0, 293.0, 288.0, 288.0, 263.0, 259.0, 289.0, 290.0, 259.0, 266.0, 235.0, 238.0, 285.0, 285.0, 281.0, 295.0, 261.0, 261.0, 285.0, 294.0, 290.0, 283.0, 289.0, 284.0, 292.0, 287.0, 280.0, 293.0, 291.0, 288.0, 262.0, 257.0, 287.0, 289.0, 285.0, 297.0, 286.0, 290.0, 264.0, 261.0, 279.0, 291.0, 232.0, 230.0, 293.0, 289.0, 288.0, 291.0, 287.0, 286.0, 281.0, 289.0, 295.0, 287.0, 280.0, 299.0, 287.0, 295.0, 264.0, 263.0, 284.0, 286.0, 293.0, 286.0, 258.0, 269.0, 278.0, 289.0, 285.0, 294.0, 286.0, 284.0, 292.0, 290.0, 279.0, 291.0, 292.0, 290.0, 287.0, 292.0, 285.0, 291.0, 280.0, 299.0, 317.0, 313.0, 284.0, 292.0, 263.0, 262.0, 283.0, 296.0, 269.0, 261.0, 282.0, 288.0, 280.0, 290.0, 290.0, 286.0, 290.0, 280.0, 288.0, 288.0, 286.0, 287.0, 291.0, 288.0, 290.0, 283.0, 290.0, 292.0, 292.0, 292.0, 288.0, 294.0, 284.0, 298.0, 286.0, 301.0, 291.0, 282.0, 281.0, 292.0, 293.0, 286.0, 290.0, 289.0, 281.0, 292.0, 283.0, 293.0, 286.0, 290.0, 256.0, 257.0, 290.0, 289.0, 289.0, 290.0, 270.0, 255.0, 260.0, 259.0, 290.0, 289.0, 291.0, 285.0, 286.0, 293.0, 285.0, 284.0, 291.0, 288.0, 253.0, 266.0, 291.0, 291.0, 290.0, 286.0, 288.0, 294.0, 285.0, 294.0, 288.0, 291.0, 293.0, 286.0, 262.0, 263.0, 291.0, 285.0, 272.0, 250.0, 262.0, 263.0, 286.0, 290.0, 286.0, 287.0, 289.0, 284.0, 267.0, 255.0, 292.0, 287.0, 259.0, 266.0, 285.0, 288.0, 288.0, 291.0, 281.0, 289.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6962743444013426, "mean_inference_ms": 1.2426879235154644, "mean_action_processing_ms": 0.13358549381719295, "mean_env_wait_ms": 0.8381940028659765, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10393600, "num_agent_steps_trained": 10393600, "num_env_steps_sampled": 5196800, "num_env_steps_trained": 5196800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5196800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10393600, "timers": {"training_iteration_time_ms": 3645.848, "learn_time_ms": 1124.775, "learn_throughput": 11380.057, "synch_weights_time_ms": 11.893}, "counters": {"num_env_steps_sampled": 5196800, "num_env_steps_trained": 5196800, "num_agent_steps_sampled": 10393600, "num_agent_steps_trained": 10393600}, "done": false, "episodes_total": 12992, "training_iteration": 406, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-52", "timestamp": 1666582012, "time_this_iter_s": 3.7158660888671875, "time_total_s": 1551.1904764175415, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1551.1904764175415, "timesteps_since_restore": 0, "iterations_since_restore": 406, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.63333333333333, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.35, "shaped_reward_min": 145, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.8, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.77, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.65, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.59, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.24, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.28, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.37, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.08, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.84, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.24, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.28, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.24, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.28, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009458856075070798, "policy_loss": -0.0013118607457727194, "vf_loss": 7.801250457763672, "vf_explained_var": 0.5747021436691284, "kl": 0.002498175483196974, "entropy": 0.8282989263534546, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5209600, "num_env_steps_trained": 5209600, "num_agent_steps_sampled": 10419200, "num_agent_steps_trained": 10419200}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 465.0, "episode_reward_mean": 567.55, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 283.775}, "custom_metrics": {"sparse_reward_mean": 196.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.35, "shaped_reward_min": 145, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.8, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.77, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.65, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.59, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.24, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.28, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.37, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.08, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.84, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.24, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.28, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.24, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.28, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 579.0, 527.0, 567.0, 579.0, 570.0, 582.0, 570.0, 582.0, 579.0, 576.0, 579.0, 630.0, 576.0, 525.0, 579.0, 530.0, 570.0, 570.0, 576.0, 570.0, 576.0, 573.0, 579.0, 573.0, 582.0, 584.0, 582.0, 582.0, 587.0, 573.0, 573.0, 579.0, 579.0, 573.0, 576.0, 576.0, 513.0, 579.0, 579.0, 525.0, 519.0, 579.0, 576.0, 579.0, 569.0, 579.0, 519.0, 582.0, 576.0, 582.0, 579.0, 579.0, 579.0, 525.0, 576.0, 522.0, 525.0, 576.0, 573.0, 573.0, 522.0, 579.0, 525.0, 573.0, 579.0, 570.0, 579.0, 573.0, 579.0, 576.0, 582.0, 570.0, 630.0, 525.0, 581.0, 576.0, 579.0, 570.0, 573.0, 579.0, 581.0, 582.0, 573.0, 570.0, 579.0, 579.0, 465.0, 525.0, 573.0, 522.0, 573.0, 579.0, 582.0, 570.0, 522.0, 579.0, 527.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 286.0, 293.0, 286.0, 258.0, 269.0, 278.0, 289.0, 285.0, 294.0, 286.0, 284.0, 292.0, 290.0, 279.0, 291.0, 292.0, 290.0, 287.0, 292.0, 285.0, 291.0, 280.0, 299.0, 317.0, 313.0, 284.0, 292.0, 263.0, 262.0, 283.0, 296.0, 269.0, 261.0, 282.0, 288.0, 280.0, 290.0, 290.0, 286.0, 290.0, 280.0, 288.0, 288.0, 286.0, 287.0, 291.0, 288.0, 290.0, 283.0, 290.0, 292.0, 292.0, 292.0, 288.0, 294.0, 284.0, 298.0, 286.0, 301.0, 291.0, 282.0, 281.0, 292.0, 293.0, 286.0, 290.0, 289.0, 281.0, 292.0, 283.0, 293.0, 286.0, 290.0, 256.0, 257.0, 290.0, 289.0, 289.0, 290.0, 270.0, 255.0, 260.0, 259.0, 290.0, 289.0, 291.0, 285.0, 286.0, 293.0, 285.0, 284.0, 291.0, 288.0, 253.0, 266.0, 291.0, 291.0, 290.0, 286.0, 288.0, 294.0, 285.0, 294.0, 288.0, 291.0, 293.0, 286.0, 262.0, 263.0, 291.0, 285.0, 272.0, 250.0, 262.0, 263.0, 286.0, 290.0, 286.0, 287.0, 289.0, 284.0, 267.0, 255.0, 292.0, 287.0, 259.0, 266.0, 285.0, 288.0, 288.0, 291.0, 281.0, 289.0, 286.0, 293.0, 285.0, 288.0, 288.0, 291.0, 292.0, 284.0, 298.0, 284.0, 282.0, 288.0, 316.0, 314.0, 253.0, 272.0, 292.0, 289.0, 288.0, 288.0, 293.0, 286.0, 290.0, 280.0, 284.0, 289.0, 291.0, 288.0, 294.0, 287.0, 293.0, 289.0, 284.0, 289.0, 281.0, 289.0, 293.0, 286.0, 292.0, 287.0, 236.0, 229.0, 264.0, 261.0, 286.0, 287.0, 256.0, 266.0, 288.0, 285.0, 284.0, 295.0, 293.0, 289.0, 290.0, 280.0, 255.0, 267.0, 292.0, 287.0, 263.0, 264.0, 290.0, 289.0, 288.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6962311827286716, "mean_inference_ms": 1.242566188112285, "mean_action_processing_ms": 0.133582226305427, "mean_env_wait_ms": 0.8381410128120548, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 465.0, "episode_reward_mean": 567.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 283.775}, "hist_stats": {"episode_reward": [570.0, 579.0, 527.0, 567.0, 579.0, 570.0, 582.0, 570.0, 582.0, 579.0, 576.0, 579.0, 630.0, 576.0, 525.0, 579.0, 530.0, 570.0, 570.0, 576.0, 570.0, 576.0, 573.0, 579.0, 573.0, 582.0, 584.0, 582.0, 582.0, 587.0, 573.0, 573.0, 579.0, 579.0, 573.0, 576.0, 576.0, 513.0, 579.0, 579.0, 525.0, 519.0, 579.0, 576.0, 579.0, 569.0, 579.0, 519.0, 582.0, 576.0, 582.0, 579.0, 579.0, 579.0, 525.0, 576.0, 522.0, 525.0, 576.0, 573.0, 573.0, 522.0, 579.0, 525.0, 573.0, 579.0, 570.0, 579.0, 573.0, 579.0, 576.0, 582.0, 570.0, 630.0, 525.0, 581.0, 576.0, 579.0, 570.0, 573.0, 579.0, 581.0, 582.0, 573.0, 570.0, 579.0, 579.0, 465.0, 525.0, 573.0, 522.0, 573.0, 579.0, 582.0, 570.0, 522.0, 579.0, 527.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 286.0, 293.0, 286.0, 258.0, 269.0, 278.0, 289.0, 285.0, 294.0, 286.0, 284.0, 292.0, 290.0, 279.0, 291.0, 292.0, 290.0, 287.0, 292.0, 285.0, 291.0, 280.0, 299.0, 317.0, 313.0, 284.0, 292.0, 263.0, 262.0, 283.0, 296.0, 269.0, 261.0, 282.0, 288.0, 280.0, 290.0, 290.0, 286.0, 290.0, 280.0, 288.0, 288.0, 286.0, 287.0, 291.0, 288.0, 290.0, 283.0, 290.0, 292.0, 292.0, 292.0, 288.0, 294.0, 284.0, 298.0, 286.0, 301.0, 291.0, 282.0, 281.0, 292.0, 293.0, 286.0, 290.0, 289.0, 281.0, 292.0, 283.0, 293.0, 286.0, 290.0, 256.0, 257.0, 290.0, 289.0, 289.0, 290.0, 270.0, 255.0, 260.0, 259.0, 290.0, 289.0, 291.0, 285.0, 286.0, 293.0, 285.0, 284.0, 291.0, 288.0, 253.0, 266.0, 291.0, 291.0, 290.0, 286.0, 288.0, 294.0, 285.0, 294.0, 288.0, 291.0, 293.0, 286.0, 262.0, 263.0, 291.0, 285.0, 272.0, 250.0, 262.0, 263.0, 286.0, 290.0, 286.0, 287.0, 289.0, 284.0, 267.0, 255.0, 292.0, 287.0, 259.0, 266.0, 285.0, 288.0, 288.0, 291.0, 281.0, 289.0, 286.0, 293.0, 285.0, 288.0, 288.0, 291.0, 292.0, 284.0, 298.0, 284.0, 282.0, 288.0, 316.0, 314.0, 253.0, 272.0, 292.0, 289.0, 288.0, 288.0, 293.0, 286.0, 290.0, 280.0, 284.0, 289.0, 291.0, 288.0, 294.0, 287.0, 293.0, 289.0, 284.0, 289.0, 281.0, 289.0, 293.0, 286.0, 292.0, 287.0, 236.0, 229.0, 264.0, 261.0, 286.0, 287.0, 256.0, 266.0, 288.0, 285.0, 284.0, 295.0, 293.0, 289.0, 290.0, 280.0, 255.0, 267.0, 292.0, 287.0, 263.0, 264.0, 290.0, 289.0, 288.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6962311827286716, "mean_inference_ms": 1.242566188112285, "mean_action_processing_ms": 0.133582226305427, "mean_env_wait_ms": 0.8381410128120548, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10419200, "num_agent_steps_trained": 10419200, "num_env_steps_sampled": 5209600, "num_env_steps_trained": 5209600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5209600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10419200, "timers": {"training_iteration_time_ms": 3655.211, "learn_time_ms": 1130.41, "learn_throughput": 11323.327, "synch_weights_time_ms": 11.094}, "counters": {"num_env_steps_sampled": 5209600, "num_env_steps_trained": 5209600, "num_agent_steps_sampled": 10419200, "num_agent_steps_trained": 10419200}, "done": false, "episodes_total": 13024, "training_iteration": 407, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-56", "timestamp": 1666582016, "time_this_iter_s": 3.7446985244750977, "time_total_s": 1554.9351749420166, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1554.9351749420166, "timesteps_since_restore": 0, "iterations_since_restore": 407, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.44, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 193.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 172.7, "shaped_reward_min": 102, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.54, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.69, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.36, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.49, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.05, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.21, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.89, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.26, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.7, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.05, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.21, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.05, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.21, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010337545536458492, "policy_loss": -0.0014150127535685897, "vf_loss": 7.953579902648926, "vf_explained_var": 0.5916837453842163, "kl": 0.00292446231469512, "entropy": 0.828201174736023, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5222400, "num_env_steps_trained": 5222400, "num_agent_steps_sampled": 10444800, "num_agent_steps_trained": 10444800}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 222.0, "episode_reward_mean": 559.5, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 99.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 279.75}, "custom_metrics": {"sparse_reward_mean": 193.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 172.7, "shaped_reward_min": 102, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.54, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.69, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.36, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.49, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.05, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.21, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.89, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.26, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.7, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.05, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.21, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.05, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.21, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 573.0, 576.0, 576.0, 513.0, 579.0, 579.0, 525.0, 519.0, 579.0, 576.0, 579.0, 569.0, 579.0, 519.0, 582.0, 576.0, 582.0, 579.0, 579.0, 579.0, 525.0, 576.0, 522.0, 525.0, 576.0, 573.0, 573.0, 522.0, 579.0, 525.0, 573.0, 579.0, 570.0, 579.0, 573.0, 579.0, 576.0, 582.0, 570.0, 630.0, 525.0, 581.0, 576.0, 579.0, 570.0, 573.0, 579.0, 581.0, 582.0, 573.0, 570.0, 579.0, 579.0, 465.0, 525.0, 573.0, 522.0, 573.0, 579.0, 582.0, 570.0, 522.0, 579.0, 527.0, 579.0, 579.0, 579.0, 573.0, 582.0, 579.0, 530.0, 582.0, 582.0, 222.0, 570.0, 516.0, 576.0, 633.0, 582.0, 516.0, 582.0, 525.0, 582.0, 587.0, 522.0, 584.0, 305.0, 573.0, 582.0, 630.0, 576.0, 525.0, 573.0, 527.0, 576.0, 570.0, 582.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 290.0, 289.0, 281.0, 292.0, 283.0, 293.0, 286.0, 290.0, 256.0, 257.0, 290.0, 289.0, 289.0, 290.0, 270.0, 255.0, 260.0, 259.0, 290.0, 289.0, 291.0, 285.0, 286.0, 293.0, 285.0, 284.0, 291.0, 288.0, 253.0, 266.0, 291.0, 291.0, 290.0, 286.0, 288.0, 294.0, 285.0, 294.0, 288.0, 291.0, 293.0, 286.0, 262.0, 263.0, 291.0, 285.0, 272.0, 250.0, 262.0, 263.0, 286.0, 290.0, 286.0, 287.0, 289.0, 284.0, 267.0, 255.0, 292.0, 287.0, 259.0, 266.0, 285.0, 288.0, 288.0, 291.0, 281.0, 289.0, 286.0, 293.0, 285.0, 288.0, 288.0, 291.0, 292.0, 284.0, 298.0, 284.0, 282.0, 288.0, 316.0, 314.0, 253.0, 272.0, 292.0, 289.0, 288.0, 288.0, 293.0, 286.0, 290.0, 280.0, 284.0, 289.0, 291.0, 288.0, 294.0, 287.0, 293.0, 289.0, 284.0, 289.0, 281.0, 289.0, 293.0, 286.0, 292.0, 287.0, 236.0, 229.0, 264.0, 261.0, 286.0, 287.0, 256.0, 266.0, 288.0, 285.0, 284.0, 295.0, 293.0, 289.0, 290.0, 280.0, 255.0, 267.0, 292.0, 287.0, 263.0, 264.0, 290.0, 289.0, 288.0, 291.0, 296.0, 283.0, 287.0, 286.0, 287.0, 295.0, 294.0, 285.0, 260.0, 270.0, 291.0, 291.0, 292.0, 290.0, 123.0, 99.0, 280.0, 290.0, 256.0, 260.0, 290.0, 286.0, 317.0, 316.0, 293.0, 289.0, 252.0, 264.0, 294.0, 288.0, 260.0, 265.0, 295.0, 287.0, 293.0, 294.0, 262.0, 260.0, 290.0, 294.0, 151.0, 154.0, 282.0, 291.0, 293.0, 289.0, 311.0, 319.0, 288.0, 288.0, 258.0, 267.0, 291.0, 282.0, 260.0, 267.0, 280.0, 296.0, 287.0, 283.0, 295.0, 287.0, 261.0, 261.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6961734234605836, "mean_inference_ms": 1.242427938000899, "mean_action_processing_ms": 0.1335768680504018, "mean_env_wait_ms": 0.8380733234527808, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 222.0, "episode_reward_mean": 559.5, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 99.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 279.75}, "hist_stats": {"episode_reward": [579.0, 579.0, 573.0, 576.0, 576.0, 513.0, 579.0, 579.0, 525.0, 519.0, 579.0, 576.0, 579.0, 569.0, 579.0, 519.0, 582.0, 576.0, 582.0, 579.0, 579.0, 579.0, 525.0, 576.0, 522.0, 525.0, 576.0, 573.0, 573.0, 522.0, 579.0, 525.0, 573.0, 579.0, 570.0, 579.0, 573.0, 579.0, 576.0, 582.0, 570.0, 630.0, 525.0, 581.0, 576.0, 579.0, 570.0, 573.0, 579.0, 581.0, 582.0, 573.0, 570.0, 579.0, 579.0, 465.0, 525.0, 573.0, 522.0, 573.0, 579.0, 582.0, 570.0, 522.0, 579.0, 527.0, 579.0, 579.0, 579.0, 573.0, 582.0, 579.0, 530.0, 582.0, 582.0, 222.0, 570.0, 516.0, 576.0, 633.0, 582.0, 516.0, 582.0, 525.0, 582.0, 587.0, 522.0, 584.0, 305.0, 573.0, 582.0, 630.0, 576.0, 525.0, 573.0, 527.0, 576.0, 570.0, 582.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 290.0, 289.0, 281.0, 292.0, 283.0, 293.0, 286.0, 290.0, 256.0, 257.0, 290.0, 289.0, 289.0, 290.0, 270.0, 255.0, 260.0, 259.0, 290.0, 289.0, 291.0, 285.0, 286.0, 293.0, 285.0, 284.0, 291.0, 288.0, 253.0, 266.0, 291.0, 291.0, 290.0, 286.0, 288.0, 294.0, 285.0, 294.0, 288.0, 291.0, 293.0, 286.0, 262.0, 263.0, 291.0, 285.0, 272.0, 250.0, 262.0, 263.0, 286.0, 290.0, 286.0, 287.0, 289.0, 284.0, 267.0, 255.0, 292.0, 287.0, 259.0, 266.0, 285.0, 288.0, 288.0, 291.0, 281.0, 289.0, 286.0, 293.0, 285.0, 288.0, 288.0, 291.0, 292.0, 284.0, 298.0, 284.0, 282.0, 288.0, 316.0, 314.0, 253.0, 272.0, 292.0, 289.0, 288.0, 288.0, 293.0, 286.0, 290.0, 280.0, 284.0, 289.0, 291.0, 288.0, 294.0, 287.0, 293.0, 289.0, 284.0, 289.0, 281.0, 289.0, 293.0, 286.0, 292.0, 287.0, 236.0, 229.0, 264.0, 261.0, 286.0, 287.0, 256.0, 266.0, 288.0, 285.0, 284.0, 295.0, 293.0, 289.0, 290.0, 280.0, 255.0, 267.0, 292.0, 287.0, 263.0, 264.0, 290.0, 289.0, 288.0, 291.0, 296.0, 283.0, 287.0, 286.0, 287.0, 295.0, 294.0, 285.0, 260.0, 270.0, 291.0, 291.0, 292.0, 290.0, 123.0, 99.0, 280.0, 290.0, 256.0, 260.0, 290.0, 286.0, 317.0, 316.0, 293.0, 289.0, 252.0, 264.0, 294.0, 288.0, 260.0, 265.0, 295.0, 287.0, 293.0, 294.0, 262.0, 260.0, 290.0, 294.0, 151.0, 154.0, 282.0, 291.0, 293.0, 289.0, 311.0, 319.0, 288.0, 288.0, 258.0, 267.0, 291.0, 282.0, 260.0, 267.0, 280.0, 296.0, 287.0, 283.0, 295.0, 287.0, 261.0, 261.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6961734234605836, "mean_inference_ms": 1.242427938000899, "mean_action_processing_ms": 0.1335768680504018, "mean_env_wait_ms": 0.8380733234527808, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10444800, "num_agent_steps_trained": 10444800, "num_env_steps_sampled": 5222400, "num_env_steps_trained": 5222400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5222400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10444800, "timers": {"training_iteration_time_ms": 3618.454, "learn_time_ms": 1124.575, "learn_throughput": 11382.075, "synch_weights_time_ms": 11.401}, "counters": {"num_env_steps_sampled": 5222400, "num_env_steps_trained": 5222400, "num_agent_steps_sampled": 10444800, "num_agent_steps_trained": 10444800}, "done": false, "episodes_total": 13056, "training_iteration": 408, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-00", "timestamp": 1666582020, "time_this_iter_s": 3.608797073364258, "time_total_s": 1558.5439720153809, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1558.5439720153809, "timesteps_since_restore": 0, "iterations_since_restore": 408, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.833333333333332, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.89, "shaped_reward_min": 102, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.92, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.42, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.7, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.24, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.47, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.92, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.18, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.98, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.95, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.82, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.47, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.92, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.47, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.92, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0002713116118684411, "policy_loss": -0.00010633841156959534, "vf_loss": 7.856367588043213, "vf_explained_var": 0.5783417224884033, "kl": 0.003222328145056963, "entropy": 0.815973699092865, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5235200, "num_env_steps_trained": 5235200, "num_agent_steps_sampled": 10470400, "num_agent_steps_trained": 10470400}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 222.0, "episode_reward_mean": 562.69, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 99.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 281.345}, "custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.89, "shaped_reward_min": 102, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.92, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.42, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.7, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.24, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.47, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.92, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.18, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.98, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.95, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.82, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.47, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.92, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.47, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.92, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 579.0, 570.0, 579.0, 573.0, 579.0, 576.0, 582.0, 570.0, 630.0, 525.0, 581.0, 576.0, 579.0, 570.0, 573.0, 579.0, 581.0, 582.0, 573.0, 570.0, 579.0, 579.0, 465.0, 525.0, 573.0, 522.0, 573.0, 579.0, 582.0, 570.0, 522.0, 579.0, 527.0, 579.0, 579.0, 579.0, 573.0, 582.0, 579.0, 530.0, 582.0, 582.0, 222.0, 570.0, 516.0, 576.0, 633.0, 582.0, 516.0, 582.0, 525.0, 582.0, 587.0, 522.0, 584.0, 305.0, 573.0, 582.0, 630.0, 576.0, 525.0, 573.0, 527.0, 576.0, 570.0, 582.0, 522.0, 587.0, 576.0, 522.0, 573.0, 573.0, 579.0, 582.0, 576.0, 582.0, 582.0, 579.0, 579.0, 576.0, 573.0, 576.0, 584.0, 573.0, 579.0, 530.0, 579.0, 576.0, 576.0, 576.0, 582.0, 582.0, 582.0, 579.0, 573.0, 579.0, 582.0, 525.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 288.0, 288.0, 291.0, 281.0, 289.0, 286.0, 293.0, 285.0, 288.0, 288.0, 291.0, 292.0, 284.0, 298.0, 284.0, 282.0, 288.0, 316.0, 314.0, 253.0, 272.0, 292.0, 289.0, 288.0, 288.0, 293.0, 286.0, 290.0, 280.0, 284.0, 289.0, 291.0, 288.0, 294.0, 287.0, 293.0, 289.0, 284.0, 289.0, 281.0, 289.0, 293.0, 286.0, 292.0, 287.0, 236.0, 229.0, 264.0, 261.0, 286.0, 287.0, 256.0, 266.0, 288.0, 285.0, 284.0, 295.0, 293.0, 289.0, 290.0, 280.0, 255.0, 267.0, 292.0, 287.0, 263.0, 264.0, 290.0, 289.0, 288.0, 291.0, 296.0, 283.0, 287.0, 286.0, 287.0, 295.0, 294.0, 285.0, 260.0, 270.0, 291.0, 291.0, 292.0, 290.0, 123.0, 99.0, 280.0, 290.0, 256.0, 260.0, 290.0, 286.0, 317.0, 316.0, 293.0, 289.0, 252.0, 264.0, 294.0, 288.0, 260.0, 265.0, 295.0, 287.0, 293.0, 294.0, 262.0, 260.0, 290.0, 294.0, 151.0, 154.0, 282.0, 291.0, 293.0, 289.0, 311.0, 319.0, 288.0, 288.0, 258.0, 267.0, 291.0, 282.0, 260.0, 267.0, 280.0, 296.0, 287.0, 283.0, 295.0, 287.0, 261.0, 261.0, 297.0, 290.0, 292.0, 284.0, 259.0, 263.0, 278.0, 295.0, 291.0, 282.0, 291.0, 288.0, 284.0, 298.0, 291.0, 285.0, 292.0, 290.0, 293.0, 289.0, 290.0, 289.0, 288.0, 291.0, 285.0, 291.0, 292.0, 281.0, 291.0, 285.0, 288.0, 296.0, 286.0, 287.0, 286.0, 293.0, 259.0, 271.0, 283.0, 296.0, 289.0, 287.0, 285.0, 291.0, 289.0, 287.0, 288.0, 294.0, 290.0, 292.0, 293.0, 289.0, 291.0, 288.0, 285.0, 288.0, 291.0, 288.0, 291.0, 291.0, 266.0, 259.0, 268.0, 251.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.696108511703455, "mean_inference_ms": 1.242292456608421, "mean_action_processing_ms": 0.13357113804808782, "mean_env_wait_ms": 0.8380045678347366, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 222.0, "episode_reward_mean": 562.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 99.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 281.345}, "hist_stats": {"episode_reward": [573.0, 579.0, 570.0, 579.0, 573.0, 579.0, 576.0, 582.0, 570.0, 630.0, 525.0, 581.0, 576.0, 579.0, 570.0, 573.0, 579.0, 581.0, 582.0, 573.0, 570.0, 579.0, 579.0, 465.0, 525.0, 573.0, 522.0, 573.0, 579.0, 582.0, 570.0, 522.0, 579.0, 527.0, 579.0, 579.0, 579.0, 573.0, 582.0, 579.0, 530.0, 582.0, 582.0, 222.0, 570.0, 516.0, 576.0, 633.0, 582.0, 516.0, 582.0, 525.0, 582.0, 587.0, 522.0, 584.0, 305.0, 573.0, 582.0, 630.0, 576.0, 525.0, 573.0, 527.0, 576.0, 570.0, 582.0, 522.0, 587.0, 576.0, 522.0, 573.0, 573.0, 579.0, 582.0, 576.0, 582.0, 582.0, 579.0, 579.0, 576.0, 573.0, 576.0, 584.0, 573.0, 579.0, 530.0, 579.0, 576.0, 576.0, 576.0, 582.0, 582.0, 582.0, 579.0, 573.0, 579.0, 582.0, 525.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 288.0, 288.0, 291.0, 281.0, 289.0, 286.0, 293.0, 285.0, 288.0, 288.0, 291.0, 292.0, 284.0, 298.0, 284.0, 282.0, 288.0, 316.0, 314.0, 253.0, 272.0, 292.0, 289.0, 288.0, 288.0, 293.0, 286.0, 290.0, 280.0, 284.0, 289.0, 291.0, 288.0, 294.0, 287.0, 293.0, 289.0, 284.0, 289.0, 281.0, 289.0, 293.0, 286.0, 292.0, 287.0, 236.0, 229.0, 264.0, 261.0, 286.0, 287.0, 256.0, 266.0, 288.0, 285.0, 284.0, 295.0, 293.0, 289.0, 290.0, 280.0, 255.0, 267.0, 292.0, 287.0, 263.0, 264.0, 290.0, 289.0, 288.0, 291.0, 296.0, 283.0, 287.0, 286.0, 287.0, 295.0, 294.0, 285.0, 260.0, 270.0, 291.0, 291.0, 292.0, 290.0, 123.0, 99.0, 280.0, 290.0, 256.0, 260.0, 290.0, 286.0, 317.0, 316.0, 293.0, 289.0, 252.0, 264.0, 294.0, 288.0, 260.0, 265.0, 295.0, 287.0, 293.0, 294.0, 262.0, 260.0, 290.0, 294.0, 151.0, 154.0, 282.0, 291.0, 293.0, 289.0, 311.0, 319.0, 288.0, 288.0, 258.0, 267.0, 291.0, 282.0, 260.0, 267.0, 280.0, 296.0, 287.0, 283.0, 295.0, 287.0, 261.0, 261.0, 297.0, 290.0, 292.0, 284.0, 259.0, 263.0, 278.0, 295.0, 291.0, 282.0, 291.0, 288.0, 284.0, 298.0, 291.0, 285.0, 292.0, 290.0, 293.0, 289.0, 290.0, 289.0, 288.0, 291.0, 285.0, 291.0, 292.0, 281.0, 291.0, 285.0, 288.0, 296.0, 286.0, 287.0, 286.0, 293.0, 259.0, 271.0, 283.0, 296.0, 289.0, 287.0, 285.0, 291.0, 289.0, 287.0, 288.0, 294.0, 290.0, 292.0, 293.0, 289.0, 291.0, 288.0, 285.0, 288.0, 291.0, 288.0, 291.0, 291.0, 266.0, 259.0, 268.0, 251.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.696108511703455, "mean_inference_ms": 1.242292456608421, "mean_action_processing_ms": 0.13357113804808782, "mean_env_wait_ms": 0.8380045678347366, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10470400, "num_agent_steps_trained": 10470400, "num_env_steps_sampled": 5235200, "num_env_steps_trained": 5235200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5235200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10470400, "timers": {"training_iteration_time_ms": 3606.824, "learn_time_ms": 1117.374, "learn_throughput": 11455.429, "synch_weights_time_ms": 11.576}, "counters": {"num_env_steps_sampled": 5235200, "num_env_steps_trained": 5235200, "num_agent_steps_sampled": 10470400, "num_agent_steps_trained": 10470400}, "done": false, "episodes_total": 13088, "training_iteration": 409, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-03", "timestamp": 1666582023, "time_this_iter_s": 3.6459784507751465, "time_total_s": 1562.189950466156, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1562.189950466156, "timesteps_since_restore": 0, "iterations_since_restore": 409, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.54, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 194.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 174.21, "shaped_reward_min": 102, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.77, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.6, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.59, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.44, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.39, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.06, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.27, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.05, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.39, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.06, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.39, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.06, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0010149907320737839, "policy_loss": 0.0006468441570177674, "vf_loss": 7.79993200302124, "vf_explained_var": 0.5932613611221313, "kl": 0.002992228837683797, "entropy": 0.8236936330795288, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5248000, "num_env_steps_trained": 5248000, "num_agent_steps_sampled": 10496000, "num_agent_steps_trained": 10496000}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 222.0, "episode_reward_mean": 562.21, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 99.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 281.105}, "custom_metrics": {"sparse_reward_mean": 194.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 174.21, "shaped_reward_min": 102, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.77, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.6, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.59, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.44, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.39, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.06, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.27, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.05, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.39, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.06, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.39, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.06, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 527.0, 579.0, 579.0, 579.0, 573.0, 582.0, 579.0, 530.0, 582.0, 582.0, 222.0, 570.0, 516.0, 576.0, 633.0, 582.0, 516.0, 582.0, 525.0, 582.0, 587.0, 522.0, 584.0, 305.0, 573.0, 582.0, 630.0, 576.0, 525.0, 573.0, 527.0, 576.0, 570.0, 582.0, 522.0, 587.0, 576.0, 522.0, 573.0, 573.0, 579.0, 582.0, 576.0, 582.0, 582.0, 579.0, 579.0, 576.0, 573.0, 576.0, 584.0, 573.0, 579.0, 530.0, 579.0, 576.0, 576.0, 576.0, 582.0, 582.0, 582.0, 579.0, 573.0, 579.0, 582.0, 525.0, 519.0, 576.0, 573.0, 582.0, 576.0, 576.0, 582.0, 525.0, 570.0, 579.0, 525.0, 522.0, 525.0, 522.0, 582.0, 573.0, 587.0, 582.0, 576.0, 576.0, 576.0, 522.0, 576.0, 582.0, 582.0, 579.0, 573.0, 579.0, 576.0, 579.0, 587.0, 522.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 287.0, 263.0, 264.0, 290.0, 289.0, 288.0, 291.0, 296.0, 283.0, 287.0, 286.0, 287.0, 295.0, 294.0, 285.0, 260.0, 270.0, 291.0, 291.0, 292.0, 290.0, 123.0, 99.0, 280.0, 290.0, 256.0, 260.0, 290.0, 286.0, 317.0, 316.0, 293.0, 289.0, 252.0, 264.0, 294.0, 288.0, 260.0, 265.0, 295.0, 287.0, 293.0, 294.0, 262.0, 260.0, 290.0, 294.0, 151.0, 154.0, 282.0, 291.0, 293.0, 289.0, 311.0, 319.0, 288.0, 288.0, 258.0, 267.0, 291.0, 282.0, 260.0, 267.0, 280.0, 296.0, 287.0, 283.0, 295.0, 287.0, 261.0, 261.0, 297.0, 290.0, 292.0, 284.0, 259.0, 263.0, 278.0, 295.0, 291.0, 282.0, 291.0, 288.0, 284.0, 298.0, 291.0, 285.0, 292.0, 290.0, 293.0, 289.0, 290.0, 289.0, 288.0, 291.0, 285.0, 291.0, 292.0, 281.0, 291.0, 285.0, 288.0, 296.0, 286.0, 287.0, 286.0, 293.0, 259.0, 271.0, 283.0, 296.0, 289.0, 287.0, 285.0, 291.0, 289.0, 287.0, 288.0, 294.0, 290.0, 292.0, 293.0, 289.0, 291.0, 288.0, 285.0, 288.0, 291.0, 288.0, 291.0, 291.0, 266.0, 259.0, 268.0, 251.0, 289.0, 287.0, 280.0, 293.0, 291.0, 291.0, 292.0, 284.0, 291.0, 285.0, 291.0, 291.0, 256.0, 269.0, 281.0, 289.0, 288.0, 291.0, 260.0, 265.0, 262.0, 260.0, 259.0, 266.0, 265.0, 257.0, 294.0, 288.0, 287.0, 286.0, 298.0, 289.0, 296.0, 286.0, 288.0, 288.0, 291.0, 285.0, 290.0, 286.0, 256.0, 266.0, 290.0, 286.0, 291.0, 291.0, 294.0, 288.0, 290.0, 289.0, 293.0, 280.0, 291.0, 288.0, 286.0, 290.0, 289.0, 290.0, 296.0, 291.0, 258.0, 264.0, 296.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6960422117755253, "mean_inference_ms": 1.242148343321641, "mean_action_processing_ms": 0.13356248071098079, "mean_env_wait_ms": 0.8379216638832903, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 222.0, "episode_reward_mean": 562.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 99.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 281.105}, "hist_stats": {"episode_reward": [579.0, 527.0, 579.0, 579.0, 579.0, 573.0, 582.0, 579.0, 530.0, 582.0, 582.0, 222.0, 570.0, 516.0, 576.0, 633.0, 582.0, 516.0, 582.0, 525.0, 582.0, 587.0, 522.0, 584.0, 305.0, 573.0, 582.0, 630.0, 576.0, 525.0, 573.0, 527.0, 576.0, 570.0, 582.0, 522.0, 587.0, 576.0, 522.0, 573.0, 573.0, 579.0, 582.0, 576.0, 582.0, 582.0, 579.0, 579.0, 576.0, 573.0, 576.0, 584.0, 573.0, 579.0, 530.0, 579.0, 576.0, 576.0, 576.0, 582.0, 582.0, 582.0, 579.0, 573.0, 579.0, 582.0, 525.0, 519.0, 576.0, 573.0, 582.0, 576.0, 576.0, 582.0, 525.0, 570.0, 579.0, 525.0, 522.0, 525.0, 522.0, 582.0, 573.0, 587.0, 582.0, 576.0, 576.0, 576.0, 522.0, 576.0, 582.0, 582.0, 579.0, 573.0, 579.0, 576.0, 579.0, 587.0, 522.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 287.0, 263.0, 264.0, 290.0, 289.0, 288.0, 291.0, 296.0, 283.0, 287.0, 286.0, 287.0, 295.0, 294.0, 285.0, 260.0, 270.0, 291.0, 291.0, 292.0, 290.0, 123.0, 99.0, 280.0, 290.0, 256.0, 260.0, 290.0, 286.0, 317.0, 316.0, 293.0, 289.0, 252.0, 264.0, 294.0, 288.0, 260.0, 265.0, 295.0, 287.0, 293.0, 294.0, 262.0, 260.0, 290.0, 294.0, 151.0, 154.0, 282.0, 291.0, 293.0, 289.0, 311.0, 319.0, 288.0, 288.0, 258.0, 267.0, 291.0, 282.0, 260.0, 267.0, 280.0, 296.0, 287.0, 283.0, 295.0, 287.0, 261.0, 261.0, 297.0, 290.0, 292.0, 284.0, 259.0, 263.0, 278.0, 295.0, 291.0, 282.0, 291.0, 288.0, 284.0, 298.0, 291.0, 285.0, 292.0, 290.0, 293.0, 289.0, 290.0, 289.0, 288.0, 291.0, 285.0, 291.0, 292.0, 281.0, 291.0, 285.0, 288.0, 296.0, 286.0, 287.0, 286.0, 293.0, 259.0, 271.0, 283.0, 296.0, 289.0, 287.0, 285.0, 291.0, 289.0, 287.0, 288.0, 294.0, 290.0, 292.0, 293.0, 289.0, 291.0, 288.0, 285.0, 288.0, 291.0, 288.0, 291.0, 291.0, 266.0, 259.0, 268.0, 251.0, 289.0, 287.0, 280.0, 293.0, 291.0, 291.0, 292.0, 284.0, 291.0, 285.0, 291.0, 291.0, 256.0, 269.0, 281.0, 289.0, 288.0, 291.0, 260.0, 265.0, 262.0, 260.0, 259.0, 266.0, 265.0, 257.0, 294.0, 288.0, 287.0, 286.0, 298.0, 289.0, 296.0, 286.0, 288.0, 288.0, 291.0, 285.0, 290.0, 286.0, 256.0, 266.0, 290.0, 286.0, 291.0, 291.0, 294.0, 288.0, 290.0, 289.0, 293.0, 280.0, 291.0, 288.0, 286.0, 290.0, 289.0, 290.0, 296.0, 291.0, 258.0, 264.0, 296.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6960422117755253, "mean_inference_ms": 1.242148343321641, "mean_action_processing_ms": 0.13356248071098079, "mean_env_wait_ms": 0.8379216638832903, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10496000, "num_agent_steps_trained": 10496000, "num_env_steps_sampled": 5248000, "num_env_steps_trained": 5248000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5248000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10496000, "timers": {"training_iteration_time_ms": 3604.876, "learn_time_ms": 1118.078, "learn_throughput": 11448.214, "synch_weights_time_ms": 11.981}, "counters": {"num_env_steps_sampled": 5248000, "num_env_steps_trained": 5248000, "num_agent_steps_sampled": 10496000, "num_agent_steps_trained": 10496000}, "done": false, "episodes_total": 13120, "training_iteration": 410, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-07", "timestamp": 1666582027, "time_this_iter_s": 3.6446609497070312, "time_total_s": 1565.834611415863, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1565.834611415863, "timesteps_since_restore": 0, "iterations_since_restore": 410, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.4, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.19, "shaped_reward_min": 148, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.09, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.48, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.93, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.33, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.69, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.97, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.31, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.18, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.98, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.9, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.86, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.69, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.97, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.69, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.97, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00011297990567982197, "policy_loss": -0.0004916824400424957, "vf_loss": 7.8775482177734375, "vf_explained_var": 0.5837694406509399, "kl": 0.0028352581430226564, "entropy": 0.8181036114692688, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5260800, "num_env_steps_trained": 5260800, "num_agent_steps_sampled": 10521600, "num_agent_steps_trained": 10521600}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 468.0, "episode_reward_mean": 567.19, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 234.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 283.595}, "custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.19, "shaped_reward_min": 148, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.09, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.48, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.93, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.33, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.69, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.97, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.31, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.18, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.98, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.9, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.86, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.69, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.97, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.69, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.97, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 570.0, 582.0, 522.0, 587.0, 576.0, 522.0, 573.0, 573.0, 579.0, 582.0, 576.0, 582.0, 582.0, 579.0, 579.0, 576.0, 573.0, 576.0, 584.0, 573.0, 579.0, 530.0, 579.0, 576.0, 576.0, 576.0, 582.0, 582.0, 582.0, 579.0, 573.0, 579.0, 582.0, 525.0, 519.0, 576.0, 573.0, 582.0, 576.0, 576.0, 582.0, 525.0, 570.0, 579.0, 525.0, 522.0, 525.0, 522.0, 582.0, 573.0, 587.0, 582.0, 576.0, 576.0, 576.0, 522.0, 576.0, 582.0, 582.0, 579.0, 573.0, 579.0, 576.0, 579.0, 587.0, 522.0, 579.0, 579.0, 519.0, 573.0, 593.0, 579.0, 579.0, 579.0, 576.0, 576.0, 573.0, 570.0, 570.0, 525.0, 522.0, 582.0, 570.0, 573.0, 525.0, 516.0, 579.0, 576.0, 530.0, 627.0, 579.0, 579.0, 579.0, 539.0, 468.0, 582.0, 579.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [280.0, 296.0, 287.0, 283.0, 295.0, 287.0, 261.0, 261.0, 297.0, 290.0, 292.0, 284.0, 259.0, 263.0, 278.0, 295.0, 291.0, 282.0, 291.0, 288.0, 284.0, 298.0, 291.0, 285.0, 292.0, 290.0, 293.0, 289.0, 290.0, 289.0, 288.0, 291.0, 285.0, 291.0, 292.0, 281.0, 291.0, 285.0, 288.0, 296.0, 286.0, 287.0, 286.0, 293.0, 259.0, 271.0, 283.0, 296.0, 289.0, 287.0, 285.0, 291.0, 289.0, 287.0, 288.0, 294.0, 290.0, 292.0, 293.0, 289.0, 291.0, 288.0, 285.0, 288.0, 291.0, 288.0, 291.0, 291.0, 266.0, 259.0, 268.0, 251.0, 289.0, 287.0, 280.0, 293.0, 291.0, 291.0, 292.0, 284.0, 291.0, 285.0, 291.0, 291.0, 256.0, 269.0, 281.0, 289.0, 288.0, 291.0, 260.0, 265.0, 262.0, 260.0, 259.0, 266.0, 265.0, 257.0, 294.0, 288.0, 287.0, 286.0, 298.0, 289.0, 296.0, 286.0, 288.0, 288.0, 291.0, 285.0, 290.0, 286.0, 256.0, 266.0, 290.0, 286.0, 291.0, 291.0, 294.0, 288.0, 290.0, 289.0, 293.0, 280.0, 291.0, 288.0, 286.0, 290.0, 289.0, 290.0, 296.0, 291.0, 258.0, 264.0, 296.0, 283.0, 292.0, 287.0, 249.0, 270.0, 286.0, 287.0, 297.0, 296.0, 296.0, 283.0, 291.0, 288.0, 290.0, 289.0, 292.0, 284.0, 289.0, 287.0, 288.0, 285.0, 283.0, 287.0, 287.0, 283.0, 263.0, 262.0, 256.0, 266.0, 289.0, 293.0, 278.0, 292.0, 288.0, 285.0, 260.0, 265.0, 265.0, 251.0, 292.0, 287.0, 287.0, 289.0, 263.0, 267.0, 307.0, 320.0, 289.0, 290.0, 291.0, 288.0, 286.0, 293.0, 270.0, 269.0, 234.0, 234.0, 290.0, 292.0, 285.0, 294.0, 289.0, 293.0, 285.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6959714884925522, "mean_inference_ms": 1.2419952146413202, "mean_action_processing_ms": 0.13355366138274052, "mean_env_wait_ms": 0.8378320110425799, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 468.0, "episode_reward_mean": 567.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 234.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 283.595}, "hist_stats": {"episode_reward": [576.0, 570.0, 582.0, 522.0, 587.0, 576.0, 522.0, 573.0, 573.0, 579.0, 582.0, 576.0, 582.0, 582.0, 579.0, 579.0, 576.0, 573.0, 576.0, 584.0, 573.0, 579.0, 530.0, 579.0, 576.0, 576.0, 576.0, 582.0, 582.0, 582.0, 579.0, 573.0, 579.0, 582.0, 525.0, 519.0, 576.0, 573.0, 582.0, 576.0, 576.0, 582.0, 525.0, 570.0, 579.0, 525.0, 522.0, 525.0, 522.0, 582.0, 573.0, 587.0, 582.0, 576.0, 576.0, 576.0, 522.0, 576.0, 582.0, 582.0, 579.0, 573.0, 579.0, 576.0, 579.0, 587.0, 522.0, 579.0, 579.0, 519.0, 573.0, 593.0, 579.0, 579.0, 579.0, 576.0, 576.0, 573.0, 570.0, 570.0, 525.0, 522.0, 582.0, 570.0, 573.0, 525.0, 516.0, 579.0, 576.0, 530.0, 627.0, 579.0, 579.0, 579.0, 539.0, 468.0, 582.0, 579.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [280.0, 296.0, 287.0, 283.0, 295.0, 287.0, 261.0, 261.0, 297.0, 290.0, 292.0, 284.0, 259.0, 263.0, 278.0, 295.0, 291.0, 282.0, 291.0, 288.0, 284.0, 298.0, 291.0, 285.0, 292.0, 290.0, 293.0, 289.0, 290.0, 289.0, 288.0, 291.0, 285.0, 291.0, 292.0, 281.0, 291.0, 285.0, 288.0, 296.0, 286.0, 287.0, 286.0, 293.0, 259.0, 271.0, 283.0, 296.0, 289.0, 287.0, 285.0, 291.0, 289.0, 287.0, 288.0, 294.0, 290.0, 292.0, 293.0, 289.0, 291.0, 288.0, 285.0, 288.0, 291.0, 288.0, 291.0, 291.0, 266.0, 259.0, 268.0, 251.0, 289.0, 287.0, 280.0, 293.0, 291.0, 291.0, 292.0, 284.0, 291.0, 285.0, 291.0, 291.0, 256.0, 269.0, 281.0, 289.0, 288.0, 291.0, 260.0, 265.0, 262.0, 260.0, 259.0, 266.0, 265.0, 257.0, 294.0, 288.0, 287.0, 286.0, 298.0, 289.0, 296.0, 286.0, 288.0, 288.0, 291.0, 285.0, 290.0, 286.0, 256.0, 266.0, 290.0, 286.0, 291.0, 291.0, 294.0, 288.0, 290.0, 289.0, 293.0, 280.0, 291.0, 288.0, 286.0, 290.0, 289.0, 290.0, 296.0, 291.0, 258.0, 264.0, 296.0, 283.0, 292.0, 287.0, 249.0, 270.0, 286.0, 287.0, 297.0, 296.0, 296.0, 283.0, 291.0, 288.0, 290.0, 289.0, 292.0, 284.0, 289.0, 287.0, 288.0, 285.0, 283.0, 287.0, 287.0, 283.0, 263.0, 262.0, 256.0, 266.0, 289.0, 293.0, 278.0, 292.0, 288.0, 285.0, 260.0, 265.0, 265.0, 251.0, 292.0, 287.0, 287.0, 289.0, 263.0, 267.0, 307.0, 320.0, 289.0, 290.0, 291.0, 288.0, 286.0, 293.0, 270.0, 269.0, 234.0, 234.0, 290.0, 292.0, 285.0, 294.0, 289.0, 293.0, 285.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6959714884925522, "mean_inference_ms": 1.2419952146413202, "mean_action_processing_ms": 0.13355366138274052, "mean_env_wait_ms": 0.8378320110425799, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10521600, "num_agent_steps_trained": 10521600, "num_env_steps_sampled": 5260800, "num_env_steps_trained": 5260800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5260800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10521600, "timers": {"training_iteration_time_ms": 3605.814, "learn_time_ms": 1120.922, "learn_throughput": 11419.173, "synch_weights_time_ms": 11.381}, "counters": {"num_env_steps_sampled": 5260800, "num_env_steps_trained": 5260800, "num_agent_steps_sampled": 10521600, "num_agent_steps_trained": 10521600}, "done": false, "episodes_total": 13152, "training_iteration": 411, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-11", "timestamp": 1666582031, "time_this_iter_s": 3.681643009185791, "time_total_s": 1569.5162544250488, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1569.5162544250488, "timesteps_since_restore": 0, "iterations_since_restore": 411, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.499999999999996, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.96, "shaped_reward_min": 147, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.65, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.73, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.55, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.59, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.26, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.3, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.11, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.26, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.3, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.26, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.3, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001020381459966302, "policy_loss": 0.0006417357362806797, "vf_loss": 7.845973968505859, "vf_explained_var": 0.5801359415054321, "kl": 0.0029763891361653805, "entropy": 0.8119027614593506, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5273600, "num_env_steps_trained": 5273600, "num_agent_steps_sampled": 10547200, "num_agent_steps_trained": 10547200}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 468.0, "episode_reward_mean": 566.96, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 234.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.48}, "custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.96, "shaped_reward_min": 147, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.65, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.73, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.55, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.59, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.26, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.3, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.11, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.26, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.3, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.26, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.3, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 525.0, 519.0, 576.0, 573.0, 582.0, 576.0, 576.0, 582.0, 525.0, 570.0, 579.0, 525.0, 522.0, 525.0, 522.0, 582.0, 573.0, 587.0, 582.0, 576.0, 576.0, 576.0, 522.0, 576.0, 582.0, 582.0, 579.0, 573.0, 579.0, 576.0, 579.0, 587.0, 522.0, 579.0, 579.0, 519.0, 573.0, 593.0, 579.0, 579.0, 579.0, 576.0, 576.0, 573.0, 570.0, 570.0, 525.0, 522.0, 582.0, 570.0, 573.0, 525.0, 516.0, 579.0, 576.0, 530.0, 627.0, 579.0, 579.0, 579.0, 539.0, 468.0, 582.0, 579.0, 582.0, 579.0, 570.0, 584.0, 519.0, 582.0, 630.0, 582.0, 579.0, 579.0, 579.0, 582.0, 630.0, 579.0, 570.0, 576.0, 522.0, 582.0, 579.0, 570.0, 573.0, 584.0, 579.0, 507.0, 584.0, 579.0, 579.0, 573.0, 579.0, 573.0, 579.0, 573.0, 527.0, 530.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 291.0, 291.0, 266.0, 259.0, 268.0, 251.0, 289.0, 287.0, 280.0, 293.0, 291.0, 291.0, 292.0, 284.0, 291.0, 285.0, 291.0, 291.0, 256.0, 269.0, 281.0, 289.0, 288.0, 291.0, 260.0, 265.0, 262.0, 260.0, 259.0, 266.0, 265.0, 257.0, 294.0, 288.0, 287.0, 286.0, 298.0, 289.0, 296.0, 286.0, 288.0, 288.0, 291.0, 285.0, 290.0, 286.0, 256.0, 266.0, 290.0, 286.0, 291.0, 291.0, 294.0, 288.0, 290.0, 289.0, 293.0, 280.0, 291.0, 288.0, 286.0, 290.0, 289.0, 290.0, 296.0, 291.0, 258.0, 264.0, 296.0, 283.0, 292.0, 287.0, 249.0, 270.0, 286.0, 287.0, 297.0, 296.0, 296.0, 283.0, 291.0, 288.0, 290.0, 289.0, 292.0, 284.0, 289.0, 287.0, 288.0, 285.0, 283.0, 287.0, 287.0, 283.0, 263.0, 262.0, 256.0, 266.0, 289.0, 293.0, 278.0, 292.0, 288.0, 285.0, 260.0, 265.0, 265.0, 251.0, 292.0, 287.0, 287.0, 289.0, 263.0, 267.0, 307.0, 320.0, 289.0, 290.0, 291.0, 288.0, 286.0, 293.0, 270.0, 269.0, 234.0, 234.0, 290.0, 292.0, 285.0, 294.0, 289.0, 293.0, 285.0, 294.0, 279.0, 291.0, 300.0, 284.0, 255.0, 264.0, 289.0, 293.0, 308.0, 322.0, 288.0, 294.0, 288.0, 291.0, 286.0, 293.0, 290.0, 289.0, 290.0, 292.0, 313.0, 317.0, 283.0, 296.0, 283.0, 287.0, 287.0, 289.0, 268.0, 254.0, 292.0, 290.0, 288.0, 291.0, 284.0, 286.0, 282.0, 291.0, 298.0, 286.0, 288.0, 291.0, 251.0, 256.0, 290.0, 294.0, 284.0, 295.0, 289.0, 290.0, 289.0, 284.0, 287.0, 292.0, 290.0, 283.0, 291.0, 288.0, 282.0, 291.0, 266.0, 261.0, 262.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6959482648112225, "mean_inference_ms": 1.2418496628761109, "mean_action_processing_ms": 0.13354408807769896, "mean_env_wait_ms": 0.8377504940675667, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 468.0, "episode_reward_mean": 566.96, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 234.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.48}, "hist_stats": {"episode_reward": [579.0, 582.0, 525.0, 519.0, 576.0, 573.0, 582.0, 576.0, 576.0, 582.0, 525.0, 570.0, 579.0, 525.0, 522.0, 525.0, 522.0, 582.0, 573.0, 587.0, 582.0, 576.0, 576.0, 576.0, 522.0, 576.0, 582.0, 582.0, 579.0, 573.0, 579.0, 576.0, 579.0, 587.0, 522.0, 579.0, 579.0, 519.0, 573.0, 593.0, 579.0, 579.0, 579.0, 576.0, 576.0, 573.0, 570.0, 570.0, 525.0, 522.0, 582.0, 570.0, 573.0, 525.0, 516.0, 579.0, 576.0, 530.0, 627.0, 579.0, 579.0, 579.0, 539.0, 468.0, 582.0, 579.0, 582.0, 579.0, 570.0, 584.0, 519.0, 582.0, 630.0, 582.0, 579.0, 579.0, 579.0, 582.0, 630.0, 579.0, 570.0, 576.0, 522.0, 582.0, 579.0, 570.0, 573.0, 584.0, 579.0, 507.0, 584.0, 579.0, 579.0, 573.0, 579.0, 573.0, 579.0, 573.0, 527.0, 530.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 291.0, 291.0, 266.0, 259.0, 268.0, 251.0, 289.0, 287.0, 280.0, 293.0, 291.0, 291.0, 292.0, 284.0, 291.0, 285.0, 291.0, 291.0, 256.0, 269.0, 281.0, 289.0, 288.0, 291.0, 260.0, 265.0, 262.0, 260.0, 259.0, 266.0, 265.0, 257.0, 294.0, 288.0, 287.0, 286.0, 298.0, 289.0, 296.0, 286.0, 288.0, 288.0, 291.0, 285.0, 290.0, 286.0, 256.0, 266.0, 290.0, 286.0, 291.0, 291.0, 294.0, 288.0, 290.0, 289.0, 293.0, 280.0, 291.0, 288.0, 286.0, 290.0, 289.0, 290.0, 296.0, 291.0, 258.0, 264.0, 296.0, 283.0, 292.0, 287.0, 249.0, 270.0, 286.0, 287.0, 297.0, 296.0, 296.0, 283.0, 291.0, 288.0, 290.0, 289.0, 292.0, 284.0, 289.0, 287.0, 288.0, 285.0, 283.0, 287.0, 287.0, 283.0, 263.0, 262.0, 256.0, 266.0, 289.0, 293.0, 278.0, 292.0, 288.0, 285.0, 260.0, 265.0, 265.0, 251.0, 292.0, 287.0, 287.0, 289.0, 263.0, 267.0, 307.0, 320.0, 289.0, 290.0, 291.0, 288.0, 286.0, 293.0, 270.0, 269.0, 234.0, 234.0, 290.0, 292.0, 285.0, 294.0, 289.0, 293.0, 285.0, 294.0, 279.0, 291.0, 300.0, 284.0, 255.0, 264.0, 289.0, 293.0, 308.0, 322.0, 288.0, 294.0, 288.0, 291.0, 286.0, 293.0, 290.0, 289.0, 290.0, 292.0, 313.0, 317.0, 283.0, 296.0, 283.0, 287.0, 287.0, 289.0, 268.0, 254.0, 292.0, 290.0, 288.0, 291.0, 284.0, 286.0, 282.0, 291.0, 298.0, 286.0, 288.0, 291.0, 251.0, 256.0, 290.0, 294.0, 284.0, 295.0, 289.0, 290.0, 289.0, 284.0, 287.0, 292.0, 290.0, 283.0, 291.0, 288.0, 282.0, 291.0, 266.0, 261.0, 262.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6959482648112225, "mean_inference_ms": 1.2418496628761109, "mean_action_processing_ms": 0.13354408807769896, "mean_env_wait_ms": 0.8377504940675667, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10547200, "num_agent_steps_trained": 10547200, "num_env_steps_sampled": 5273600, "num_env_steps_trained": 5273600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5273600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10547200, "timers": {"training_iteration_time_ms": 3621.116, "learn_time_ms": 1123.843, "learn_throughput": 11389.493, "synch_weights_time_ms": 11.325}, "counters": {"num_env_steps_sampled": 5273600, "num_env_steps_trained": 5273600, "num_agent_steps_sampled": 10547200, "num_agent_steps_trained": 10547200}, "done": false, "episodes_total": 13184, "training_iteration": 412, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-15", "timestamp": 1666582035, "time_this_iter_s": 3.787947416305542, "time_total_s": 1573.3042018413544, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1573.3042018413544, "timesteps_since_restore": 0, "iterations_since_restore": 412, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.419999999999998, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.27, "shaped_reward_min": 147, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.59, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.93, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.47, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.74, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.13, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.5, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.17, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.13, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.5, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.13, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.5, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.003513246774673462, "policy_loss": -0.0038899758365005255, "vf_loss": 7.859567165374756, "vf_explained_var": 0.5757486820220947, "kl": 0.0025270835030823946, "entropy": 0.8184552788734436, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5286400, "num_env_steps_trained": 5286400, "num_agent_steps_sampled": 10572800, "num_agent_steps_trained": 10572800}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 468.0, "episode_reward_mean": 568.87, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 234.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 284.435}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.27, "shaped_reward_min": 147, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.59, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.93, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.47, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.74, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.13, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.5, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.17, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.13, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.5, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.13, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.5, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 587.0, 522.0, 579.0, 579.0, 519.0, 573.0, 593.0, 579.0, 579.0, 579.0, 576.0, 576.0, 573.0, 570.0, 570.0, 525.0, 522.0, 582.0, 570.0, 573.0, 525.0, 516.0, 579.0, 576.0, 530.0, 627.0, 579.0, 579.0, 579.0, 539.0, 468.0, 582.0, 579.0, 582.0, 579.0, 570.0, 584.0, 519.0, 582.0, 630.0, 582.0, 579.0, 579.0, 579.0, 582.0, 630.0, 579.0, 570.0, 576.0, 522.0, 582.0, 579.0, 570.0, 573.0, 584.0, 579.0, 507.0, 584.0, 579.0, 579.0, 573.0, 579.0, 573.0, 579.0, 573.0, 527.0, 530.0, 522.0, 576.0, 561.0, 582.0, 579.0, 522.0, 573.0, 576.0, 573.0, 584.0, 587.0, 630.0, 579.0, 579.0, 576.0, 582.0, 576.0, 576.0, 579.0, 579.0, 525.0, 579.0, 522.0, 576.0, 513.0, 576.0, 576.0, 582.0, 582.0, 570.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 290.0, 296.0, 291.0, 258.0, 264.0, 296.0, 283.0, 292.0, 287.0, 249.0, 270.0, 286.0, 287.0, 297.0, 296.0, 296.0, 283.0, 291.0, 288.0, 290.0, 289.0, 292.0, 284.0, 289.0, 287.0, 288.0, 285.0, 283.0, 287.0, 287.0, 283.0, 263.0, 262.0, 256.0, 266.0, 289.0, 293.0, 278.0, 292.0, 288.0, 285.0, 260.0, 265.0, 265.0, 251.0, 292.0, 287.0, 287.0, 289.0, 263.0, 267.0, 307.0, 320.0, 289.0, 290.0, 291.0, 288.0, 286.0, 293.0, 270.0, 269.0, 234.0, 234.0, 290.0, 292.0, 285.0, 294.0, 289.0, 293.0, 285.0, 294.0, 279.0, 291.0, 300.0, 284.0, 255.0, 264.0, 289.0, 293.0, 308.0, 322.0, 288.0, 294.0, 288.0, 291.0, 286.0, 293.0, 290.0, 289.0, 290.0, 292.0, 313.0, 317.0, 283.0, 296.0, 283.0, 287.0, 287.0, 289.0, 268.0, 254.0, 292.0, 290.0, 288.0, 291.0, 284.0, 286.0, 282.0, 291.0, 298.0, 286.0, 288.0, 291.0, 251.0, 256.0, 290.0, 294.0, 284.0, 295.0, 289.0, 290.0, 289.0, 284.0, 287.0, 292.0, 290.0, 283.0, 291.0, 288.0, 282.0, 291.0, 266.0, 261.0, 262.0, 268.0, 267.0, 255.0, 285.0, 291.0, 282.0, 279.0, 286.0, 296.0, 292.0, 287.0, 254.0, 268.0, 286.0, 287.0, 294.0, 282.0, 283.0, 290.0, 290.0, 294.0, 291.0, 296.0, 316.0, 314.0, 285.0, 294.0, 289.0, 290.0, 288.0, 288.0, 291.0, 291.0, 288.0, 288.0, 293.0, 283.0, 296.0, 283.0, 295.0, 284.0, 261.0, 264.0, 292.0, 287.0, 256.0, 266.0, 285.0, 291.0, 259.0, 254.0, 292.0, 284.0, 284.0, 292.0, 288.0, 294.0, 290.0, 292.0, 289.0, 281.0, 292.0, 290.0, 286.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6959201132144984, "mean_inference_ms": 1.2417779063156014, "mean_action_processing_ms": 0.13353379320628925, "mean_env_wait_ms": 0.8377310076399764, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 468.0, "episode_reward_mean": 568.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 234.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 284.435}, "hist_stats": {"episode_reward": [579.0, 587.0, 522.0, 579.0, 579.0, 519.0, 573.0, 593.0, 579.0, 579.0, 579.0, 576.0, 576.0, 573.0, 570.0, 570.0, 525.0, 522.0, 582.0, 570.0, 573.0, 525.0, 516.0, 579.0, 576.0, 530.0, 627.0, 579.0, 579.0, 579.0, 539.0, 468.0, 582.0, 579.0, 582.0, 579.0, 570.0, 584.0, 519.0, 582.0, 630.0, 582.0, 579.0, 579.0, 579.0, 582.0, 630.0, 579.0, 570.0, 576.0, 522.0, 582.0, 579.0, 570.0, 573.0, 584.0, 579.0, 507.0, 584.0, 579.0, 579.0, 573.0, 579.0, 573.0, 579.0, 573.0, 527.0, 530.0, 522.0, 576.0, 561.0, 582.0, 579.0, 522.0, 573.0, 576.0, 573.0, 584.0, 587.0, 630.0, 579.0, 579.0, 576.0, 582.0, 576.0, 576.0, 579.0, 579.0, 525.0, 579.0, 522.0, 576.0, 513.0, 576.0, 576.0, 582.0, 582.0, 570.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 290.0, 296.0, 291.0, 258.0, 264.0, 296.0, 283.0, 292.0, 287.0, 249.0, 270.0, 286.0, 287.0, 297.0, 296.0, 296.0, 283.0, 291.0, 288.0, 290.0, 289.0, 292.0, 284.0, 289.0, 287.0, 288.0, 285.0, 283.0, 287.0, 287.0, 283.0, 263.0, 262.0, 256.0, 266.0, 289.0, 293.0, 278.0, 292.0, 288.0, 285.0, 260.0, 265.0, 265.0, 251.0, 292.0, 287.0, 287.0, 289.0, 263.0, 267.0, 307.0, 320.0, 289.0, 290.0, 291.0, 288.0, 286.0, 293.0, 270.0, 269.0, 234.0, 234.0, 290.0, 292.0, 285.0, 294.0, 289.0, 293.0, 285.0, 294.0, 279.0, 291.0, 300.0, 284.0, 255.0, 264.0, 289.0, 293.0, 308.0, 322.0, 288.0, 294.0, 288.0, 291.0, 286.0, 293.0, 290.0, 289.0, 290.0, 292.0, 313.0, 317.0, 283.0, 296.0, 283.0, 287.0, 287.0, 289.0, 268.0, 254.0, 292.0, 290.0, 288.0, 291.0, 284.0, 286.0, 282.0, 291.0, 298.0, 286.0, 288.0, 291.0, 251.0, 256.0, 290.0, 294.0, 284.0, 295.0, 289.0, 290.0, 289.0, 284.0, 287.0, 292.0, 290.0, 283.0, 291.0, 288.0, 282.0, 291.0, 266.0, 261.0, 262.0, 268.0, 267.0, 255.0, 285.0, 291.0, 282.0, 279.0, 286.0, 296.0, 292.0, 287.0, 254.0, 268.0, 286.0, 287.0, 294.0, 282.0, 283.0, 290.0, 290.0, 294.0, 291.0, 296.0, 316.0, 314.0, 285.0, 294.0, 289.0, 290.0, 288.0, 288.0, 291.0, 291.0, 288.0, 288.0, 293.0, 283.0, 296.0, 283.0, 295.0, 284.0, 261.0, 264.0, 292.0, 287.0, 256.0, 266.0, 285.0, 291.0, 259.0, 254.0, 292.0, 284.0, 284.0, 292.0, 288.0, 294.0, 290.0, 292.0, 289.0, 281.0, 292.0, 290.0, 286.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6959201132144984, "mean_inference_ms": 1.2417779063156014, "mean_action_processing_ms": 0.13353379320628925, "mean_env_wait_ms": 0.8377310076399764, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10572800, "num_agent_steps_trained": 10572800, "num_env_steps_sampled": 5286400, "num_env_steps_trained": 5286400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5286400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10572800, "timers": {"training_iteration_time_ms": 3637.716, "learn_time_ms": 1122.363, "learn_throughput": 11404.509, "synch_weights_time_ms": 11.926}, "counters": {"num_env_steps_sampled": 5286400, "num_env_steps_trained": 5286400, "num_agent_steps_sampled": 10572800, "num_agent_steps_trained": 10572800}, "done": false, "episodes_total": 13216, "training_iteration": 413, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-19", "timestamp": 1666582039, "time_this_iter_s": 3.7886815071105957, "time_total_s": 1577.092883348465, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1577.092883348465, "timesteps_since_restore": 0, "iterations_since_restore": 413, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.566666666666666, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.81, "shaped_reward_min": 147, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.21, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.54, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.1, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.34, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.72, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.12, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.69, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.4, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.72, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.12, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.72, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.12, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006164342630654573, "policy_loss": 0.00024116405984386802, "vf_loss": 7.793488025665283, "vf_explained_var": 0.5921193361282349, "kl": 0.002510129939764738, "entropy": 0.8081568479537964, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5299200, "num_env_steps_trained": 5299200, "num_agent_steps_sampled": 10598400, "num_agent_steps_trained": 10598400}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 507.0, "episode_reward_mean": 574.01, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 245.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 287.005}, "custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.81, "shaped_reward_min": 147, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.21, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.54, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.1, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.34, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.72, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.12, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.69, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.4, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.72, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.12, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.72, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.12, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 582.0, 579.0, 570.0, 584.0, 519.0, 582.0, 630.0, 582.0, 579.0, 579.0, 579.0, 582.0, 630.0, 579.0, 570.0, 576.0, 522.0, 582.0, 579.0, 570.0, 573.0, 584.0, 579.0, 507.0, 584.0, 579.0, 579.0, 573.0, 579.0, 573.0, 579.0, 573.0, 527.0, 530.0, 522.0, 576.0, 561.0, 582.0, 579.0, 522.0, 573.0, 576.0, 573.0, 584.0, 587.0, 630.0, 579.0, 579.0, 576.0, 582.0, 576.0, 576.0, 579.0, 579.0, 525.0, 579.0, 522.0, 576.0, 513.0, 576.0, 576.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 579.0, 522.0, 576.0, 579.0, 579.0, 633.0, 576.0, 582.0, 579.0, 579.0, 576.0, 584.0, 579.0, 579.0, 576.0, 579.0, 573.0, 630.0, 584.0, 582.0, 576.0, 513.0, 582.0, 579.0, 587.0, 582.0, 579.0, 582.0, 570.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 285.0, 294.0, 289.0, 293.0, 285.0, 294.0, 279.0, 291.0, 300.0, 284.0, 255.0, 264.0, 289.0, 293.0, 308.0, 322.0, 288.0, 294.0, 288.0, 291.0, 286.0, 293.0, 290.0, 289.0, 290.0, 292.0, 313.0, 317.0, 283.0, 296.0, 283.0, 287.0, 287.0, 289.0, 268.0, 254.0, 292.0, 290.0, 288.0, 291.0, 284.0, 286.0, 282.0, 291.0, 298.0, 286.0, 288.0, 291.0, 251.0, 256.0, 290.0, 294.0, 284.0, 295.0, 289.0, 290.0, 289.0, 284.0, 287.0, 292.0, 290.0, 283.0, 291.0, 288.0, 282.0, 291.0, 266.0, 261.0, 262.0, 268.0, 267.0, 255.0, 285.0, 291.0, 282.0, 279.0, 286.0, 296.0, 292.0, 287.0, 254.0, 268.0, 286.0, 287.0, 294.0, 282.0, 283.0, 290.0, 290.0, 294.0, 291.0, 296.0, 316.0, 314.0, 285.0, 294.0, 289.0, 290.0, 288.0, 288.0, 291.0, 291.0, 288.0, 288.0, 293.0, 283.0, 296.0, 283.0, 295.0, 284.0, 261.0, 264.0, 292.0, 287.0, 256.0, 266.0, 285.0, 291.0, 259.0, 254.0, 292.0, 284.0, 284.0, 292.0, 288.0, 294.0, 290.0, 292.0, 289.0, 281.0, 292.0, 290.0, 286.0, 290.0, 292.0, 287.0, 290.0, 289.0, 255.0, 267.0, 288.0, 288.0, 293.0, 286.0, 290.0, 289.0, 319.0, 314.0, 285.0, 291.0, 291.0, 291.0, 293.0, 286.0, 291.0, 288.0, 297.0, 279.0, 292.0, 292.0, 291.0, 288.0, 291.0, 288.0, 290.0, 286.0, 296.0, 283.0, 285.0, 288.0, 311.0, 319.0, 300.0, 284.0, 288.0, 294.0, 285.0, 291.0, 245.0, 268.0, 293.0, 289.0, 287.0, 292.0, 293.0, 294.0, 284.0, 298.0, 283.0, 296.0, 290.0, 292.0, 290.0, 280.0, 288.0, 294.0, 285.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6959090726770885, "mean_inference_ms": 1.241716896421168, "mean_action_processing_ms": 0.13352372525812178, "mean_env_wait_ms": 0.8377185596955341, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 507.0, "episode_reward_mean": 574.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 245.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 287.005}, "hist_stats": {"episode_reward": [582.0, 579.0, 582.0, 579.0, 570.0, 584.0, 519.0, 582.0, 630.0, 582.0, 579.0, 579.0, 579.0, 582.0, 630.0, 579.0, 570.0, 576.0, 522.0, 582.0, 579.0, 570.0, 573.0, 584.0, 579.0, 507.0, 584.0, 579.0, 579.0, 573.0, 579.0, 573.0, 579.0, 573.0, 527.0, 530.0, 522.0, 576.0, 561.0, 582.0, 579.0, 522.0, 573.0, 576.0, 573.0, 584.0, 587.0, 630.0, 579.0, 579.0, 576.0, 582.0, 576.0, 576.0, 579.0, 579.0, 525.0, 579.0, 522.0, 576.0, 513.0, 576.0, 576.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 579.0, 522.0, 576.0, 579.0, 579.0, 633.0, 576.0, 582.0, 579.0, 579.0, 576.0, 584.0, 579.0, 579.0, 576.0, 579.0, 573.0, 630.0, 584.0, 582.0, 576.0, 513.0, 582.0, 579.0, 587.0, 582.0, 579.0, 582.0, 570.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 285.0, 294.0, 289.0, 293.0, 285.0, 294.0, 279.0, 291.0, 300.0, 284.0, 255.0, 264.0, 289.0, 293.0, 308.0, 322.0, 288.0, 294.0, 288.0, 291.0, 286.0, 293.0, 290.0, 289.0, 290.0, 292.0, 313.0, 317.0, 283.0, 296.0, 283.0, 287.0, 287.0, 289.0, 268.0, 254.0, 292.0, 290.0, 288.0, 291.0, 284.0, 286.0, 282.0, 291.0, 298.0, 286.0, 288.0, 291.0, 251.0, 256.0, 290.0, 294.0, 284.0, 295.0, 289.0, 290.0, 289.0, 284.0, 287.0, 292.0, 290.0, 283.0, 291.0, 288.0, 282.0, 291.0, 266.0, 261.0, 262.0, 268.0, 267.0, 255.0, 285.0, 291.0, 282.0, 279.0, 286.0, 296.0, 292.0, 287.0, 254.0, 268.0, 286.0, 287.0, 294.0, 282.0, 283.0, 290.0, 290.0, 294.0, 291.0, 296.0, 316.0, 314.0, 285.0, 294.0, 289.0, 290.0, 288.0, 288.0, 291.0, 291.0, 288.0, 288.0, 293.0, 283.0, 296.0, 283.0, 295.0, 284.0, 261.0, 264.0, 292.0, 287.0, 256.0, 266.0, 285.0, 291.0, 259.0, 254.0, 292.0, 284.0, 284.0, 292.0, 288.0, 294.0, 290.0, 292.0, 289.0, 281.0, 292.0, 290.0, 286.0, 290.0, 292.0, 287.0, 290.0, 289.0, 255.0, 267.0, 288.0, 288.0, 293.0, 286.0, 290.0, 289.0, 319.0, 314.0, 285.0, 291.0, 291.0, 291.0, 293.0, 286.0, 291.0, 288.0, 297.0, 279.0, 292.0, 292.0, 291.0, 288.0, 291.0, 288.0, 290.0, 286.0, 296.0, 283.0, 285.0, 288.0, 311.0, 319.0, 300.0, 284.0, 288.0, 294.0, 285.0, 291.0, 245.0, 268.0, 293.0, 289.0, 287.0, 292.0, 293.0, 294.0, 284.0, 298.0, 283.0, 296.0, 290.0, 292.0, 290.0, 280.0, 288.0, 294.0, 285.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6959090726770885, "mean_inference_ms": 1.241716896421168, "mean_action_processing_ms": 0.13352372525812178, "mean_env_wait_ms": 0.8377185596955341, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10598400, "num_agent_steps_trained": 10598400, "num_env_steps_sampled": 5299200, "num_env_steps_trained": 5299200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5299200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10598400, "timers": {"training_iteration_time_ms": 3638.704, "learn_time_ms": 1125.557, "learn_throughput": 11372.149, "synch_weights_time_ms": 11.463}, "counters": {"num_env_steps_sampled": 5299200, "num_env_steps_trained": 5299200, "num_agent_steps_sampled": 10598400, "num_agent_steps_trained": 10598400}, "done": false, "episodes_total": 13248, "training_iteration": 414, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-23", "timestamp": 1666582043, "time_this_iter_s": 3.6348578929901123, "time_total_s": 1580.727741241455, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1580.727741241455, "timesteps_since_restore": 0, "iterations_since_restore": 414, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.66, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 198.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.53, "shaped_reward_min": 150, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.08, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.7, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.96, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.51, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.64, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.22, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.75, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.74, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.54, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.5, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.64, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.22, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.64, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.22, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002787010744214058, "policy_loss": 0.002409706125035882, "vf_loss": 7.769173622131348, "vf_explained_var": 0.5715185403823853, "kl": 0.002564822556450963, "entropy": 0.7992253303527832, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5312000, "num_env_steps_trained": 5312000, "num_agent_steps_sampled": 10624000, "num_agent_steps_trained": 10624000}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 510.0, "episode_reward_mean": 574.13, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 245.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 287.065}, "custom_metrics": {"sparse_reward_mean": 198.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.53, "shaped_reward_min": 150, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.08, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.7, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.96, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.51, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.64, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.22, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.75, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.74, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.54, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.5, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.64, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.22, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.64, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.22, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 573.0, 527.0, 530.0, 522.0, 576.0, 561.0, 582.0, 579.0, 522.0, 573.0, 576.0, 573.0, 584.0, 587.0, 630.0, 579.0, 579.0, 576.0, 582.0, 576.0, 576.0, 579.0, 579.0, 525.0, 579.0, 522.0, 576.0, 513.0, 576.0, 576.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 579.0, 522.0, 576.0, 579.0, 579.0, 633.0, 576.0, 582.0, 579.0, 579.0, 576.0, 584.0, 579.0, 579.0, 576.0, 579.0, 573.0, 630.0, 584.0, 582.0, 576.0, 513.0, 582.0, 579.0, 587.0, 582.0, 579.0, 582.0, 570.0, 582.0, 579.0, 579.0, 627.0, 570.0, 582.0, 573.0, 573.0, 579.0, 582.0, 570.0, 579.0, 525.0, 510.0, 570.0, 573.0, 579.0, 582.0, 582.0, 573.0, 579.0, 579.0, 576.0, 582.0, 576.0, 579.0, 570.0, 579.0, 630.0, 573.0, 579.0, 576.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 282.0, 291.0, 266.0, 261.0, 262.0, 268.0, 267.0, 255.0, 285.0, 291.0, 282.0, 279.0, 286.0, 296.0, 292.0, 287.0, 254.0, 268.0, 286.0, 287.0, 294.0, 282.0, 283.0, 290.0, 290.0, 294.0, 291.0, 296.0, 316.0, 314.0, 285.0, 294.0, 289.0, 290.0, 288.0, 288.0, 291.0, 291.0, 288.0, 288.0, 293.0, 283.0, 296.0, 283.0, 295.0, 284.0, 261.0, 264.0, 292.0, 287.0, 256.0, 266.0, 285.0, 291.0, 259.0, 254.0, 292.0, 284.0, 284.0, 292.0, 288.0, 294.0, 290.0, 292.0, 289.0, 281.0, 292.0, 290.0, 286.0, 290.0, 292.0, 287.0, 290.0, 289.0, 255.0, 267.0, 288.0, 288.0, 293.0, 286.0, 290.0, 289.0, 319.0, 314.0, 285.0, 291.0, 291.0, 291.0, 293.0, 286.0, 291.0, 288.0, 297.0, 279.0, 292.0, 292.0, 291.0, 288.0, 291.0, 288.0, 290.0, 286.0, 296.0, 283.0, 285.0, 288.0, 311.0, 319.0, 300.0, 284.0, 288.0, 294.0, 285.0, 291.0, 245.0, 268.0, 293.0, 289.0, 287.0, 292.0, 293.0, 294.0, 284.0, 298.0, 283.0, 296.0, 290.0, 292.0, 290.0, 280.0, 288.0, 294.0, 285.0, 294.0, 291.0, 288.0, 306.0, 321.0, 289.0, 281.0, 290.0, 292.0, 283.0, 290.0, 288.0, 285.0, 293.0, 286.0, 282.0, 300.0, 280.0, 290.0, 285.0, 294.0, 265.0, 260.0, 255.0, 255.0, 285.0, 285.0, 287.0, 286.0, 288.0, 291.0, 290.0, 292.0, 292.0, 290.0, 285.0, 288.0, 288.0, 291.0, 293.0, 286.0, 288.0, 288.0, 293.0, 289.0, 288.0, 288.0, 289.0, 290.0, 295.0, 275.0, 295.0, 284.0, 313.0, 317.0, 285.0, 288.0, 287.0, 292.0, 282.0, 294.0, 285.0, 291.0, 283.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6958701226777856, "mean_inference_ms": 1.2416536502241968, "mean_action_processing_ms": 0.1335153926353554, "mean_env_wait_ms": 0.8377099069977573, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 510.0, "episode_reward_mean": 574.13, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 245.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 287.065}, "hist_stats": {"episode_reward": [579.0, 573.0, 527.0, 530.0, 522.0, 576.0, 561.0, 582.0, 579.0, 522.0, 573.0, 576.0, 573.0, 584.0, 587.0, 630.0, 579.0, 579.0, 576.0, 582.0, 576.0, 576.0, 579.0, 579.0, 525.0, 579.0, 522.0, 576.0, 513.0, 576.0, 576.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 579.0, 522.0, 576.0, 579.0, 579.0, 633.0, 576.0, 582.0, 579.0, 579.0, 576.0, 584.0, 579.0, 579.0, 576.0, 579.0, 573.0, 630.0, 584.0, 582.0, 576.0, 513.0, 582.0, 579.0, 587.0, 582.0, 579.0, 582.0, 570.0, 582.0, 579.0, 579.0, 627.0, 570.0, 582.0, 573.0, 573.0, 579.0, 582.0, 570.0, 579.0, 525.0, 510.0, 570.0, 573.0, 579.0, 582.0, 582.0, 573.0, 579.0, 579.0, 576.0, 582.0, 576.0, 579.0, 570.0, 579.0, 630.0, 573.0, 579.0, 576.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 282.0, 291.0, 266.0, 261.0, 262.0, 268.0, 267.0, 255.0, 285.0, 291.0, 282.0, 279.0, 286.0, 296.0, 292.0, 287.0, 254.0, 268.0, 286.0, 287.0, 294.0, 282.0, 283.0, 290.0, 290.0, 294.0, 291.0, 296.0, 316.0, 314.0, 285.0, 294.0, 289.0, 290.0, 288.0, 288.0, 291.0, 291.0, 288.0, 288.0, 293.0, 283.0, 296.0, 283.0, 295.0, 284.0, 261.0, 264.0, 292.0, 287.0, 256.0, 266.0, 285.0, 291.0, 259.0, 254.0, 292.0, 284.0, 284.0, 292.0, 288.0, 294.0, 290.0, 292.0, 289.0, 281.0, 292.0, 290.0, 286.0, 290.0, 292.0, 287.0, 290.0, 289.0, 255.0, 267.0, 288.0, 288.0, 293.0, 286.0, 290.0, 289.0, 319.0, 314.0, 285.0, 291.0, 291.0, 291.0, 293.0, 286.0, 291.0, 288.0, 297.0, 279.0, 292.0, 292.0, 291.0, 288.0, 291.0, 288.0, 290.0, 286.0, 296.0, 283.0, 285.0, 288.0, 311.0, 319.0, 300.0, 284.0, 288.0, 294.0, 285.0, 291.0, 245.0, 268.0, 293.0, 289.0, 287.0, 292.0, 293.0, 294.0, 284.0, 298.0, 283.0, 296.0, 290.0, 292.0, 290.0, 280.0, 288.0, 294.0, 285.0, 294.0, 291.0, 288.0, 306.0, 321.0, 289.0, 281.0, 290.0, 292.0, 283.0, 290.0, 288.0, 285.0, 293.0, 286.0, 282.0, 300.0, 280.0, 290.0, 285.0, 294.0, 265.0, 260.0, 255.0, 255.0, 285.0, 285.0, 287.0, 286.0, 288.0, 291.0, 290.0, 292.0, 292.0, 290.0, 285.0, 288.0, 288.0, 291.0, 293.0, 286.0, 288.0, 288.0, 293.0, 289.0, 288.0, 288.0, 289.0, 290.0, 295.0, 275.0, 295.0, 284.0, 313.0, 317.0, 285.0, 288.0, 287.0, 292.0, 282.0, 294.0, 285.0, 291.0, 283.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6958701226777856, "mean_inference_ms": 1.2416536502241968, "mean_action_processing_ms": 0.1335153926353554, "mean_env_wait_ms": 0.8377099069977573, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10624000, "num_agent_steps_trained": 10624000, "num_env_steps_sampled": 5312000, "num_env_steps_trained": 5312000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5312000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10624000, "timers": {"training_iteration_time_ms": 3633.496, "learn_time_ms": 1126.315, "learn_throughput": 11364.496, "synch_weights_time_ms": 11.876}, "counters": {"num_env_steps_sampled": 5312000, "num_env_steps_trained": 5312000, "num_agent_steps_sampled": 10624000, "num_agent_steps_trained": 10624000}, "done": false, "episodes_total": 13280, "training_iteration": 415, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-27", "timestamp": 1666582047, "time_this_iter_s": 3.71405029296875, "time_total_s": 1584.4417915344238, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1584.4417915344238, "timesteps_since_restore": 0, "iterations_since_restore": 415, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.033333333333335, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 199.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 177.4, "shaped_reward_min": 150, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.44, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.38, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.34, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.21, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.09, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.92, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.63, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.39, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.65, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.09, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.92, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.09, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.92, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0019702562130987644, "policy_loss": -0.0023428606800734997, "vf_loss": 7.736230850219727, "vf_explained_var": 0.5634697079658508, "kl": 0.002214438281953335, "entropy": 0.802035391330719, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5324800, "num_env_steps_trained": 5324800, "num_agent_steps_sampled": 10649600, "num_agent_steps_trained": 10649600}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 510.0, "episode_reward_mean": 576.2, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 245.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 288.1}, "custom_metrics": {"sparse_reward_mean": 199.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 177.4, "shaped_reward_min": 150, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.44, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.38, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.34, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.21, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.09, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.92, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.63, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.39, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.65, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.09, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.92, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.09, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.92, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 570.0, 582.0, 576.0, 579.0, 579.0, 522.0, 576.0, 579.0, 579.0, 633.0, 576.0, 582.0, 579.0, 579.0, 576.0, 584.0, 579.0, 579.0, 576.0, 579.0, 573.0, 630.0, 584.0, 582.0, 576.0, 513.0, 582.0, 579.0, 587.0, 582.0, 579.0, 582.0, 570.0, 582.0, 579.0, 579.0, 627.0, 570.0, 582.0, 573.0, 573.0, 579.0, 582.0, 570.0, 579.0, 525.0, 510.0, 570.0, 573.0, 579.0, 582.0, 582.0, 573.0, 579.0, 579.0, 576.0, 582.0, 576.0, 579.0, 570.0, 579.0, 630.0, 573.0, 579.0, 576.0, 576.0, 576.0, 573.0, 590.0, 579.0, 582.0, 576.0, 587.0, 576.0, 570.0, 579.0, 576.0, 573.0, 579.0, 579.0, 576.0, 579.0, 582.0, 579.0, 525.0, 584.0, 582.0, 539.0, 582.0, 579.0, 579.0, 525.0, 573.0, 570.0, 573.0, 582.0, 582.0, 570.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 289.0, 281.0, 292.0, 290.0, 286.0, 290.0, 292.0, 287.0, 290.0, 289.0, 255.0, 267.0, 288.0, 288.0, 293.0, 286.0, 290.0, 289.0, 319.0, 314.0, 285.0, 291.0, 291.0, 291.0, 293.0, 286.0, 291.0, 288.0, 297.0, 279.0, 292.0, 292.0, 291.0, 288.0, 291.0, 288.0, 290.0, 286.0, 296.0, 283.0, 285.0, 288.0, 311.0, 319.0, 300.0, 284.0, 288.0, 294.0, 285.0, 291.0, 245.0, 268.0, 293.0, 289.0, 287.0, 292.0, 293.0, 294.0, 284.0, 298.0, 283.0, 296.0, 290.0, 292.0, 290.0, 280.0, 288.0, 294.0, 285.0, 294.0, 291.0, 288.0, 306.0, 321.0, 289.0, 281.0, 290.0, 292.0, 283.0, 290.0, 288.0, 285.0, 293.0, 286.0, 282.0, 300.0, 280.0, 290.0, 285.0, 294.0, 265.0, 260.0, 255.0, 255.0, 285.0, 285.0, 287.0, 286.0, 288.0, 291.0, 290.0, 292.0, 292.0, 290.0, 285.0, 288.0, 288.0, 291.0, 293.0, 286.0, 288.0, 288.0, 293.0, 289.0, 288.0, 288.0, 289.0, 290.0, 295.0, 275.0, 295.0, 284.0, 313.0, 317.0, 285.0, 288.0, 287.0, 292.0, 282.0, 294.0, 285.0, 291.0, 283.0, 293.0, 289.0, 284.0, 299.0, 291.0, 288.0, 291.0, 292.0, 290.0, 292.0, 284.0, 297.0, 290.0, 285.0, 291.0, 283.0, 287.0, 290.0, 289.0, 283.0, 293.0, 285.0, 288.0, 288.0, 291.0, 294.0, 285.0, 293.0, 283.0, 290.0, 289.0, 293.0, 289.0, 289.0, 290.0, 269.0, 256.0, 296.0, 288.0, 294.0, 288.0, 272.0, 267.0, 287.0, 295.0, 286.0, 293.0, 291.0, 288.0, 262.0, 263.0, 289.0, 284.0, 287.0, 283.0, 287.0, 286.0, 295.0, 287.0, 293.0, 289.0, 287.0, 283.0, 285.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6958264003113149, "mean_inference_ms": 1.2415504249231188, "mean_action_processing_ms": 0.1335099692280639, "mean_env_wait_ms": 0.8376494855811777, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 510.0, "episode_reward_mean": 576.2, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 245.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 288.1}, "hist_stats": {"episode_reward": [582.0, 570.0, 582.0, 576.0, 579.0, 579.0, 522.0, 576.0, 579.0, 579.0, 633.0, 576.0, 582.0, 579.0, 579.0, 576.0, 584.0, 579.0, 579.0, 576.0, 579.0, 573.0, 630.0, 584.0, 582.0, 576.0, 513.0, 582.0, 579.0, 587.0, 582.0, 579.0, 582.0, 570.0, 582.0, 579.0, 579.0, 627.0, 570.0, 582.0, 573.0, 573.0, 579.0, 582.0, 570.0, 579.0, 525.0, 510.0, 570.0, 573.0, 579.0, 582.0, 582.0, 573.0, 579.0, 579.0, 576.0, 582.0, 576.0, 579.0, 570.0, 579.0, 630.0, 573.0, 579.0, 576.0, 576.0, 576.0, 573.0, 590.0, 579.0, 582.0, 576.0, 587.0, 576.0, 570.0, 579.0, 576.0, 573.0, 579.0, 579.0, 576.0, 579.0, 582.0, 579.0, 525.0, 584.0, 582.0, 539.0, 582.0, 579.0, 579.0, 525.0, 573.0, 570.0, 573.0, 582.0, 582.0, 570.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 289.0, 281.0, 292.0, 290.0, 286.0, 290.0, 292.0, 287.0, 290.0, 289.0, 255.0, 267.0, 288.0, 288.0, 293.0, 286.0, 290.0, 289.0, 319.0, 314.0, 285.0, 291.0, 291.0, 291.0, 293.0, 286.0, 291.0, 288.0, 297.0, 279.0, 292.0, 292.0, 291.0, 288.0, 291.0, 288.0, 290.0, 286.0, 296.0, 283.0, 285.0, 288.0, 311.0, 319.0, 300.0, 284.0, 288.0, 294.0, 285.0, 291.0, 245.0, 268.0, 293.0, 289.0, 287.0, 292.0, 293.0, 294.0, 284.0, 298.0, 283.0, 296.0, 290.0, 292.0, 290.0, 280.0, 288.0, 294.0, 285.0, 294.0, 291.0, 288.0, 306.0, 321.0, 289.0, 281.0, 290.0, 292.0, 283.0, 290.0, 288.0, 285.0, 293.0, 286.0, 282.0, 300.0, 280.0, 290.0, 285.0, 294.0, 265.0, 260.0, 255.0, 255.0, 285.0, 285.0, 287.0, 286.0, 288.0, 291.0, 290.0, 292.0, 292.0, 290.0, 285.0, 288.0, 288.0, 291.0, 293.0, 286.0, 288.0, 288.0, 293.0, 289.0, 288.0, 288.0, 289.0, 290.0, 295.0, 275.0, 295.0, 284.0, 313.0, 317.0, 285.0, 288.0, 287.0, 292.0, 282.0, 294.0, 285.0, 291.0, 283.0, 293.0, 289.0, 284.0, 299.0, 291.0, 288.0, 291.0, 292.0, 290.0, 292.0, 284.0, 297.0, 290.0, 285.0, 291.0, 283.0, 287.0, 290.0, 289.0, 283.0, 293.0, 285.0, 288.0, 288.0, 291.0, 294.0, 285.0, 293.0, 283.0, 290.0, 289.0, 293.0, 289.0, 289.0, 290.0, 269.0, 256.0, 296.0, 288.0, 294.0, 288.0, 272.0, 267.0, 287.0, 295.0, 286.0, 293.0, 291.0, 288.0, 262.0, 263.0, 289.0, 284.0, 287.0, 283.0, 287.0, 286.0, 295.0, 287.0, 293.0, 289.0, 287.0, 283.0, 285.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6958264003113149, "mean_inference_ms": 1.2415504249231188, "mean_action_processing_ms": 0.1335099692280639, "mean_env_wait_ms": 0.8376494855811777, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10649600, "num_agent_steps_trained": 10649600, "num_env_steps_sampled": 5324800, "num_env_steps_trained": 5324800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5324800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10649600, "timers": {"training_iteration_time_ms": 3633.959, "learn_time_ms": 1128.403, "learn_throughput": 11343.467, "synch_weights_time_ms": 11.389}, "counters": {"num_env_steps_sampled": 5324800, "num_env_steps_trained": 5324800, "num_agent_steps_sampled": 10649600, "num_agent_steps_trained": 10649600}, "done": false, "episodes_total": 13312, "training_iteration": 416, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-31", "timestamp": 1666582051, "time_this_iter_s": 3.7223761081695557, "time_total_s": 1588.1641676425934, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1588.1641676425934, "timesteps_since_restore": 0, "iterations_since_restore": 416, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.516666666666666, "ram_util_percent": 10.616666666666665}}
+{"custom_metrics": {"sparse_reward_mean": 199.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 177.2, "shaped_reward_min": 150, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.79, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.89, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.73, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.75, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.5, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.46, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.49, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.8, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.22, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.5, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.46, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.5, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.46, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00037188551505096257, "policy_loss": -1.1931115295737982e-06, "vf_loss": 7.761588096618652, "vf_explained_var": 0.5580202341079712, "kl": 0.002245605457574129, "entropy": 0.8061589598655701, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5337600, "num_env_steps_trained": 5337600, "num_agent_steps_sampled": 10675200, "num_agent_steps_trained": 10675200}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 510.0, "episode_reward_mean": 576.0, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 255.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.0}, "custom_metrics": {"sparse_reward_mean": 199.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 177.2, "shaped_reward_min": 150, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.79, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.89, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.73, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.75, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.5, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.46, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.49, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.8, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.22, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.5, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.46, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.5, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.46, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 570.0, 582.0, 579.0, 579.0, 627.0, 570.0, 582.0, 573.0, 573.0, 579.0, 582.0, 570.0, 579.0, 525.0, 510.0, 570.0, 573.0, 579.0, 582.0, 582.0, 573.0, 579.0, 579.0, 576.0, 582.0, 576.0, 579.0, 570.0, 579.0, 630.0, 573.0, 579.0, 576.0, 576.0, 576.0, 573.0, 590.0, 579.0, 582.0, 576.0, 587.0, 576.0, 570.0, 579.0, 576.0, 573.0, 579.0, 579.0, 576.0, 579.0, 582.0, 579.0, 525.0, 584.0, 582.0, 539.0, 582.0, 579.0, 579.0, 525.0, 573.0, 570.0, 573.0, 582.0, 582.0, 570.0, 576.0, 579.0, 579.0, 573.0, 579.0, 579.0, 573.0, 579.0, 570.0, 584.0, 573.0, 573.0, 630.0, 533.0, 579.0, 576.0, 582.0, 573.0, 576.0, 587.0, 573.0, 633.0, 530.0, 579.0, 576.0, 530.0, 582.0, 579.0, 573.0, 582.0, 627.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 290.0, 280.0, 288.0, 294.0, 285.0, 294.0, 291.0, 288.0, 306.0, 321.0, 289.0, 281.0, 290.0, 292.0, 283.0, 290.0, 288.0, 285.0, 293.0, 286.0, 282.0, 300.0, 280.0, 290.0, 285.0, 294.0, 265.0, 260.0, 255.0, 255.0, 285.0, 285.0, 287.0, 286.0, 288.0, 291.0, 290.0, 292.0, 292.0, 290.0, 285.0, 288.0, 288.0, 291.0, 293.0, 286.0, 288.0, 288.0, 293.0, 289.0, 288.0, 288.0, 289.0, 290.0, 295.0, 275.0, 295.0, 284.0, 313.0, 317.0, 285.0, 288.0, 287.0, 292.0, 282.0, 294.0, 285.0, 291.0, 283.0, 293.0, 289.0, 284.0, 299.0, 291.0, 288.0, 291.0, 292.0, 290.0, 292.0, 284.0, 297.0, 290.0, 285.0, 291.0, 283.0, 287.0, 290.0, 289.0, 283.0, 293.0, 285.0, 288.0, 288.0, 291.0, 294.0, 285.0, 293.0, 283.0, 290.0, 289.0, 293.0, 289.0, 289.0, 290.0, 269.0, 256.0, 296.0, 288.0, 294.0, 288.0, 272.0, 267.0, 287.0, 295.0, 286.0, 293.0, 291.0, 288.0, 262.0, 263.0, 289.0, 284.0, 287.0, 283.0, 287.0, 286.0, 295.0, 287.0, 293.0, 289.0, 287.0, 283.0, 285.0, 291.0, 286.0, 293.0, 288.0, 291.0, 285.0, 288.0, 296.0, 283.0, 291.0, 288.0, 291.0, 282.0, 292.0, 287.0, 285.0, 285.0, 288.0, 296.0, 297.0, 276.0, 285.0, 288.0, 319.0, 311.0, 263.0, 270.0, 289.0, 290.0, 290.0, 286.0, 293.0, 289.0, 277.0, 296.0, 292.0, 284.0, 304.0, 283.0, 284.0, 289.0, 324.0, 309.0, 268.0, 262.0, 294.0, 285.0, 287.0, 289.0, 262.0, 268.0, 288.0, 294.0, 294.0, 285.0, 285.0, 288.0, 295.0, 287.0, 310.0, 317.0, 293.0, 286.0, 284.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6957941137045737, "mean_inference_ms": 1.241436612359549, "mean_action_processing_ms": 0.13350729003532036, "mean_env_wait_ms": 0.8376030328400872, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 510.0, "episode_reward_mean": 576.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 255.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.0}, "hist_stats": {"episode_reward": [582.0, 570.0, 582.0, 579.0, 579.0, 627.0, 570.0, 582.0, 573.0, 573.0, 579.0, 582.0, 570.0, 579.0, 525.0, 510.0, 570.0, 573.0, 579.0, 582.0, 582.0, 573.0, 579.0, 579.0, 576.0, 582.0, 576.0, 579.0, 570.0, 579.0, 630.0, 573.0, 579.0, 576.0, 576.0, 576.0, 573.0, 590.0, 579.0, 582.0, 576.0, 587.0, 576.0, 570.0, 579.0, 576.0, 573.0, 579.0, 579.0, 576.0, 579.0, 582.0, 579.0, 525.0, 584.0, 582.0, 539.0, 582.0, 579.0, 579.0, 525.0, 573.0, 570.0, 573.0, 582.0, 582.0, 570.0, 576.0, 579.0, 579.0, 573.0, 579.0, 579.0, 573.0, 579.0, 570.0, 584.0, 573.0, 573.0, 630.0, 533.0, 579.0, 576.0, 582.0, 573.0, 576.0, 587.0, 573.0, 633.0, 530.0, 579.0, 576.0, 530.0, 582.0, 579.0, 573.0, 582.0, 627.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 290.0, 280.0, 288.0, 294.0, 285.0, 294.0, 291.0, 288.0, 306.0, 321.0, 289.0, 281.0, 290.0, 292.0, 283.0, 290.0, 288.0, 285.0, 293.0, 286.0, 282.0, 300.0, 280.0, 290.0, 285.0, 294.0, 265.0, 260.0, 255.0, 255.0, 285.0, 285.0, 287.0, 286.0, 288.0, 291.0, 290.0, 292.0, 292.0, 290.0, 285.0, 288.0, 288.0, 291.0, 293.0, 286.0, 288.0, 288.0, 293.0, 289.0, 288.0, 288.0, 289.0, 290.0, 295.0, 275.0, 295.0, 284.0, 313.0, 317.0, 285.0, 288.0, 287.0, 292.0, 282.0, 294.0, 285.0, 291.0, 283.0, 293.0, 289.0, 284.0, 299.0, 291.0, 288.0, 291.0, 292.0, 290.0, 292.0, 284.0, 297.0, 290.0, 285.0, 291.0, 283.0, 287.0, 290.0, 289.0, 283.0, 293.0, 285.0, 288.0, 288.0, 291.0, 294.0, 285.0, 293.0, 283.0, 290.0, 289.0, 293.0, 289.0, 289.0, 290.0, 269.0, 256.0, 296.0, 288.0, 294.0, 288.0, 272.0, 267.0, 287.0, 295.0, 286.0, 293.0, 291.0, 288.0, 262.0, 263.0, 289.0, 284.0, 287.0, 283.0, 287.0, 286.0, 295.0, 287.0, 293.0, 289.0, 287.0, 283.0, 285.0, 291.0, 286.0, 293.0, 288.0, 291.0, 285.0, 288.0, 296.0, 283.0, 291.0, 288.0, 291.0, 282.0, 292.0, 287.0, 285.0, 285.0, 288.0, 296.0, 297.0, 276.0, 285.0, 288.0, 319.0, 311.0, 263.0, 270.0, 289.0, 290.0, 290.0, 286.0, 293.0, 289.0, 277.0, 296.0, 292.0, 284.0, 304.0, 283.0, 284.0, 289.0, 324.0, 309.0, 268.0, 262.0, 294.0, 285.0, 287.0, 289.0, 262.0, 268.0, 288.0, 294.0, 294.0, 285.0, 285.0, 288.0, 295.0, 287.0, 310.0, 317.0, 293.0, 286.0, 284.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6957941137045737, "mean_inference_ms": 1.241436612359549, "mean_action_processing_ms": 0.13350729003532036, "mean_env_wait_ms": 0.8376030328400872, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10675200, "num_agent_steps_trained": 10675200, "num_env_steps_sampled": 5337600, "num_env_steps_trained": 5337600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5337600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10675200, "timers": {"training_iteration_time_ms": 3629.299, "learn_time_ms": 1123.612, "learn_throughput": 11391.837, "synch_weights_time_ms": 11.698}, "counters": {"num_env_steps_sampled": 5337600, "num_env_steps_trained": 5337600, "num_agent_steps_sampled": 10675200, "num_agent_steps_trained": 10675200}, "done": false, "episodes_total": 13344, "training_iteration": 417, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-34", "timestamp": 1666582054, "time_this_iter_s": 3.6620047092437744, "time_total_s": 1591.8261723518372, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1591.8261723518372, "timesteps_since_restore": 0, "iterations_since_restore": 417, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.139999999999997, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 177.22, "shaped_reward_min": 162, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.14, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.55, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.05, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.4, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.81, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.14, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.39, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.91, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.12, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.81, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.14, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.81, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.14, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008182072779163718, "policy_loss": -0.0011884444393217564, "vf_loss": 7.7140302658081055, "vf_explained_var": 0.5830328464508057, "kl": 0.002344300504773855, "entropy": 0.8023296594619751, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5350400, "num_env_steps_trained": 5350400, "num_agent_steps_sampled": 10700800, "num_agent_steps_trained": 10700800}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 522.0, "episode_reward_mean": 575.22, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 287.61}, "custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 177.22, "shaped_reward_min": 162, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.14, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.55, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.05, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.4, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.81, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.14, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.39, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.91, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.12, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.81, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.14, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.81, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.14, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 576.0, 576.0, 573.0, 590.0, 579.0, 582.0, 576.0, 587.0, 576.0, 570.0, 579.0, 576.0, 573.0, 579.0, 579.0, 576.0, 579.0, 582.0, 579.0, 525.0, 584.0, 582.0, 539.0, 582.0, 579.0, 579.0, 525.0, 573.0, 570.0, 573.0, 582.0, 582.0, 570.0, 576.0, 579.0, 579.0, 573.0, 579.0, 579.0, 573.0, 579.0, 570.0, 584.0, 573.0, 573.0, 630.0, 533.0, 579.0, 576.0, 582.0, 573.0, 576.0, 587.0, 573.0, 633.0, 530.0, 579.0, 576.0, 530.0, 582.0, 579.0, 573.0, 582.0, 627.0, 579.0, 573.0, 576.0, 579.0, 579.0, 582.0, 522.0, 582.0, 582.0, 576.0, 579.0, 576.0, 570.0, 570.0, 582.0, 630.0, 576.0, 576.0, 576.0, 587.0, 579.0, 570.0, 627.0, 525.0, 576.0, 525.0, 564.0, 579.0, 573.0, 576.0, 576.0, 587.0, 579.0, 530.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 282.0, 294.0, 285.0, 291.0, 283.0, 293.0, 289.0, 284.0, 299.0, 291.0, 288.0, 291.0, 292.0, 290.0, 292.0, 284.0, 297.0, 290.0, 285.0, 291.0, 283.0, 287.0, 290.0, 289.0, 283.0, 293.0, 285.0, 288.0, 288.0, 291.0, 294.0, 285.0, 293.0, 283.0, 290.0, 289.0, 293.0, 289.0, 289.0, 290.0, 269.0, 256.0, 296.0, 288.0, 294.0, 288.0, 272.0, 267.0, 287.0, 295.0, 286.0, 293.0, 291.0, 288.0, 262.0, 263.0, 289.0, 284.0, 287.0, 283.0, 287.0, 286.0, 295.0, 287.0, 293.0, 289.0, 287.0, 283.0, 285.0, 291.0, 286.0, 293.0, 288.0, 291.0, 285.0, 288.0, 296.0, 283.0, 291.0, 288.0, 291.0, 282.0, 292.0, 287.0, 285.0, 285.0, 288.0, 296.0, 297.0, 276.0, 285.0, 288.0, 319.0, 311.0, 263.0, 270.0, 289.0, 290.0, 290.0, 286.0, 293.0, 289.0, 277.0, 296.0, 292.0, 284.0, 304.0, 283.0, 284.0, 289.0, 324.0, 309.0, 268.0, 262.0, 294.0, 285.0, 287.0, 289.0, 262.0, 268.0, 288.0, 294.0, 294.0, 285.0, 285.0, 288.0, 295.0, 287.0, 310.0, 317.0, 293.0, 286.0, 284.0, 289.0, 282.0, 294.0, 290.0, 289.0, 291.0, 288.0, 290.0, 292.0, 265.0, 257.0, 296.0, 286.0, 292.0, 290.0, 286.0, 290.0, 292.0, 287.0, 283.0, 293.0, 290.0, 280.0, 284.0, 286.0, 294.0, 288.0, 316.0, 314.0, 280.0, 296.0, 288.0, 288.0, 283.0, 293.0, 285.0, 302.0, 288.0, 291.0, 279.0, 291.0, 315.0, 312.0, 268.0, 257.0, 287.0, 289.0, 266.0, 259.0, 278.0, 286.0, 292.0, 287.0, 286.0, 287.0, 294.0, 282.0, 288.0, 288.0, 299.0, 288.0, 291.0, 288.0, 266.0, 264.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6957477904564686, "mean_inference_ms": 1.241316312344584, "mean_action_processing_ms": 0.13350205868262507, "mean_env_wait_ms": 0.837545382909305, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 522.0, "episode_reward_mean": 575.22, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 287.61}, "hist_stats": {"episode_reward": [579.0, 576.0, 576.0, 576.0, 573.0, 590.0, 579.0, 582.0, 576.0, 587.0, 576.0, 570.0, 579.0, 576.0, 573.0, 579.0, 579.0, 576.0, 579.0, 582.0, 579.0, 525.0, 584.0, 582.0, 539.0, 582.0, 579.0, 579.0, 525.0, 573.0, 570.0, 573.0, 582.0, 582.0, 570.0, 576.0, 579.0, 579.0, 573.0, 579.0, 579.0, 573.0, 579.0, 570.0, 584.0, 573.0, 573.0, 630.0, 533.0, 579.0, 576.0, 582.0, 573.0, 576.0, 587.0, 573.0, 633.0, 530.0, 579.0, 576.0, 530.0, 582.0, 579.0, 573.0, 582.0, 627.0, 579.0, 573.0, 576.0, 579.0, 579.0, 582.0, 522.0, 582.0, 582.0, 576.0, 579.0, 576.0, 570.0, 570.0, 582.0, 630.0, 576.0, 576.0, 576.0, 587.0, 579.0, 570.0, 627.0, 525.0, 576.0, 525.0, 564.0, 579.0, 573.0, 576.0, 576.0, 587.0, 579.0, 530.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 282.0, 294.0, 285.0, 291.0, 283.0, 293.0, 289.0, 284.0, 299.0, 291.0, 288.0, 291.0, 292.0, 290.0, 292.0, 284.0, 297.0, 290.0, 285.0, 291.0, 283.0, 287.0, 290.0, 289.0, 283.0, 293.0, 285.0, 288.0, 288.0, 291.0, 294.0, 285.0, 293.0, 283.0, 290.0, 289.0, 293.0, 289.0, 289.0, 290.0, 269.0, 256.0, 296.0, 288.0, 294.0, 288.0, 272.0, 267.0, 287.0, 295.0, 286.0, 293.0, 291.0, 288.0, 262.0, 263.0, 289.0, 284.0, 287.0, 283.0, 287.0, 286.0, 295.0, 287.0, 293.0, 289.0, 287.0, 283.0, 285.0, 291.0, 286.0, 293.0, 288.0, 291.0, 285.0, 288.0, 296.0, 283.0, 291.0, 288.0, 291.0, 282.0, 292.0, 287.0, 285.0, 285.0, 288.0, 296.0, 297.0, 276.0, 285.0, 288.0, 319.0, 311.0, 263.0, 270.0, 289.0, 290.0, 290.0, 286.0, 293.0, 289.0, 277.0, 296.0, 292.0, 284.0, 304.0, 283.0, 284.0, 289.0, 324.0, 309.0, 268.0, 262.0, 294.0, 285.0, 287.0, 289.0, 262.0, 268.0, 288.0, 294.0, 294.0, 285.0, 285.0, 288.0, 295.0, 287.0, 310.0, 317.0, 293.0, 286.0, 284.0, 289.0, 282.0, 294.0, 290.0, 289.0, 291.0, 288.0, 290.0, 292.0, 265.0, 257.0, 296.0, 286.0, 292.0, 290.0, 286.0, 290.0, 292.0, 287.0, 283.0, 293.0, 290.0, 280.0, 284.0, 286.0, 294.0, 288.0, 316.0, 314.0, 280.0, 296.0, 288.0, 288.0, 283.0, 293.0, 285.0, 302.0, 288.0, 291.0, 279.0, 291.0, 315.0, 312.0, 268.0, 257.0, 287.0, 289.0, 266.0, 259.0, 278.0, 286.0, 292.0, 287.0, 286.0, 287.0, 294.0, 282.0, 288.0, 288.0, 299.0, 288.0, 291.0, 288.0, 266.0, 264.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6957477904564686, "mean_inference_ms": 1.241316312344584, "mean_action_processing_ms": 0.13350205868262507, "mean_env_wait_ms": 0.837545382909305, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10700800, "num_agent_steps_trained": 10700800, "num_env_steps_sampled": 5350400, "num_env_steps_trained": 5350400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5350400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10700800, "timers": {"training_iteration_time_ms": 3634.106, "learn_time_ms": 1129.378, "learn_throughput": 11333.669, "synch_weights_time_ms": 12.346}, "counters": {"num_env_steps_sampled": 5350400, "num_env_steps_trained": 5350400, "num_agent_steps_sampled": 10700800, "num_agent_steps_trained": 10700800}, "done": false, "episodes_total": 13376, "training_iteration": 418, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-38", "timestamp": 1666582058, "time_this_iter_s": 3.6582388877868652, "time_total_s": 1595.484411239624, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1595.484411239624, "timesteps_since_restore": 0, "iterations_since_restore": 418, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.9, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 177.12, "shaped_reward_min": 147, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.4, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.41, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.25, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.3, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.93, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.0, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.35, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.15, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.12, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.93, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.0, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.93, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.0, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011450252495706081, "policy_loss": -0.0015236774925142527, "vf_loss": 7.782792568206787, "vf_explained_var": 0.5690507888793945, "kl": 0.0021786403376609087, "entropy": 0.7992550134658813, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5363200, "num_env_steps_trained": 5363200, "num_agent_steps_sampled": 10726400, "num_agent_steps_trained": 10726400}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 507.0, "episode_reward_mean": 575.12, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 287.56}, "custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 177.12, "shaped_reward_min": 147, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.4, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.41, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.25, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.3, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.93, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.0, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.35, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.15, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.12, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.93, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.0, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.93, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.0, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 570.0, 576.0, 579.0, 579.0, 573.0, 579.0, 579.0, 573.0, 579.0, 570.0, 584.0, 573.0, 573.0, 630.0, 533.0, 579.0, 576.0, 582.0, 573.0, 576.0, 587.0, 573.0, 633.0, 530.0, 579.0, 576.0, 530.0, 582.0, 579.0, 573.0, 582.0, 627.0, 579.0, 573.0, 576.0, 579.0, 579.0, 582.0, 522.0, 582.0, 582.0, 576.0, 579.0, 576.0, 570.0, 570.0, 582.0, 630.0, 576.0, 576.0, 576.0, 587.0, 579.0, 570.0, 627.0, 525.0, 576.0, 525.0, 564.0, 579.0, 573.0, 576.0, 576.0, 587.0, 579.0, 530.0, 573.0, 573.0, 630.0, 576.0, 579.0, 522.0, 519.0, 582.0, 576.0, 590.0, 525.0, 630.0, 576.0, 579.0, 576.0, 579.0, 573.0, 576.0, 633.0, 576.0, 576.0, 582.0, 525.0, 582.0, 507.0, 579.0, 582.0, 633.0, 533.0, 530.0, 587.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 287.0, 293.0, 289.0, 287.0, 283.0, 285.0, 291.0, 286.0, 293.0, 288.0, 291.0, 285.0, 288.0, 296.0, 283.0, 291.0, 288.0, 291.0, 282.0, 292.0, 287.0, 285.0, 285.0, 288.0, 296.0, 297.0, 276.0, 285.0, 288.0, 319.0, 311.0, 263.0, 270.0, 289.0, 290.0, 290.0, 286.0, 293.0, 289.0, 277.0, 296.0, 292.0, 284.0, 304.0, 283.0, 284.0, 289.0, 324.0, 309.0, 268.0, 262.0, 294.0, 285.0, 287.0, 289.0, 262.0, 268.0, 288.0, 294.0, 294.0, 285.0, 285.0, 288.0, 295.0, 287.0, 310.0, 317.0, 293.0, 286.0, 284.0, 289.0, 282.0, 294.0, 290.0, 289.0, 291.0, 288.0, 290.0, 292.0, 265.0, 257.0, 296.0, 286.0, 292.0, 290.0, 286.0, 290.0, 292.0, 287.0, 283.0, 293.0, 290.0, 280.0, 284.0, 286.0, 294.0, 288.0, 316.0, 314.0, 280.0, 296.0, 288.0, 288.0, 283.0, 293.0, 285.0, 302.0, 288.0, 291.0, 279.0, 291.0, 315.0, 312.0, 268.0, 257.0, 287.0, 289.0, 266.0, 259.0, 278.0, 286.0, 292.0, 287.0, 286.0, 287.0, 294.0, 282.0, 288.0, 288.0, 299.0, 288.0, 291.0, 288.0, 266.0, 264.0, 285.0, 288.0, 283.0, 290.0, 319.0, 311.0, 289.0, 287.0, 290.0, 289.0, 268.0, 254.0, 259.0, 260.0, 288.0, 294.0, 284.0, 292.0, 297.0, 293.0, 264.0, 261.0, 319.0, 311.0, 292.0, 284.0, 289.0, 290.0, 290.0, 286.0, 288.0, 291.0, 284.0, 289.0, 288.0, 288.0, 316.0, 317.0, 286.0, 290.0, 292.0, 284.0, 293.0, 289.0, 256.0, 269.0, 294.0, 288.0, 254.0, 253.0, 287.0, 292.0, 290.0, 292.0, 324.0, 309.0, 269.0, 264.0, 270.0, 260.0, 294.0, 293.0, 296.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6957078568111794, "mean_inference_ms": 1.2412049370913687, "mean_action_processing_ms": 0.13349627433994735, "mean_env_wait_ms": 0.8374904032152473, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 507.0, "episode_reward_mean": 575.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 287.56}, "hist_stats": {"episode_reward": [582.0, 582.0, 570.0, 576.0, 579.0, 579.0, 573.0, 579.0, 579.0, 573.0, 579.0, 570.0, 584.0, 573.0, 573.0, 630.0, 533.0, 579.0, 576.0, 582.0, 573.0, 576.0, 587.0, 573.0, 633.0, 530.0, 579.0, 576.0, 530.0, 582.0, 579.0, 573.0, 582.0, 627.0, 579.0, 573.0, 576.0, 579.0, 579.0, 582.0, 522.0, 582.0, 582.0, 576.0, 579.0, 576.0, 570.0, 570.0, 582.0, 630.0, 576.0, 576.0, 576.0, 587.0, 579.0, 570.0, 627.0, 525.0, 576.0, 525.0, 564.0, 579.0, 573.0, 576.0, 576.0, 587.0, 579.0, 530.0, 573.0, 573.0, 630.0, 576.0, 579.0, 522.0, 519.0, 582.0, 576.0, 590.0, 525.0, 630.0, 576.0, 579.0, 576.0, 579.0, 573.0, 576.0, 633.0, 576.0, 576.0, 582.0, 525.0, 582.0, 507.0, 579.0, 582.0, 633.0, 533.0, 530.0, 587.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 287.0, 293.0, 289.0, 287.0, 283.0, 285.0, 291.0, 286.0, 293.0, 288.0, 291.0, 285.0, 288.0, 296.0, 283.0, 291.0, 288.0, 291.0, 282.0, 292.0, 287.0, 285.0, 285.0, 288.0, 296.0, 297.0, 276.0, 285.0, 288.0, 319.0, 311.0, 263.0, 270.0, 289.0, 290.0, 290.0, 286.0, 293.0, 289.0, 277.0, 296.0, 292.0, 284.0, 304.0, 283.0, 284.0, 289.0, 324.0, 309.0, 268.0, 262.0, 294.0, 285.0, 287.0, 289.0, 262.0, 268.0, 288.0, 294.0, 294.0, 285.0, 285.0, 288.0, 295.0, 287.0, 310.0, 317.0, 293.0, 286.0, 284.0, 289.0, 282.0, 294.0, 290.0, 289.0, 291.0, 288.0, 290.0, 292.0, 265.0, 257.0, 296.0, 286.0, 292.0, 290.0, 286.0, 290.0, 292.0, 287.0, 283.0, 293.0, 290.0, 280.0, 284.0, 286.0, 294.0, 288.0, 316.0, 314.0, 280.0, 296.0, 288.0, 288.0, 283.0, 293.0, 285.0, 302.0, 288.0, 291.0, 279.0, 291.0, 315.0, 312.0, 268.0, 257.0, 287.0, 289.0, 266.0, 259.0, 278.0, 286.0, 292.0, 287.0, 286.0, 287.0, 294.0, 282.0, 288.0, 288.0, 299.0, 288.0, 291.0, 288.0, 266.0, 264.0, 285.0, 288.0, 283.0, 290.0, 319.0, 311.0, 289.0, 287.0, 290.0, 289.0, 268.0, 254.0, 259.0, 260.0, 288.0, 294.0, 284.0, 292.0, 297.0, 293.0, 264.0, 261.0, 319.0, 311.0, 292.0, 284.0, 289.0, 290.0, 290.0, 286.0, 288.0, 291.0, 284.0, 289.0, 288.0, 288.0, 316.0, 317.0, 286.0, 290.0, 292.0, 284.0, 293.0, 289.0, 256.0, 269.0, 294.0, 288.0, 254.0, 253.0, 287.0, 292.0, 290.0, 292.0, 324.0, 309.0, 269.0, 264.0, 270.0, 260.0, 294.0, 293.0, 296.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6957078568111794, "mean_inference_ms": 1.2412049370913687, "mean_action_processing_ms": 0.13349627433994735, "mean_env_wait_ms": 0.8374904032152473, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10726400, "num_agent_steps_trained": 10726400, "num_env_steps_sampled": 5363200, "num_env_steps_trained": 5363200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5363200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10726400, "timers": {"training_iteration_time_ms": 3635.528, "learn_time_ms": 1127.91, "learn_throughput": 11348.427, "synch_weights_time_ms": 12.317}, "counters": {"num_env_steps_sampled": 5363200, "num_env_steps_trained": 5363200, "num_agent_steps_sampled": 10726400, "num_agent_steps_trained": 10726400}, "done": false, "episodes_total": 13408, "training_iteration": 419, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-42", "timestamp": 1666582062, "time_this_iter_s": 3.6541497707366943, "time_total_s": 1599.1385610103607, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1599.1385610103607, "timesteps_since_restore": 0, "iterations_since_restore": 419, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.733333333333334, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 198.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 176.9, "shaped_reward_min": 119, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.44, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.41, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.3, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.23, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.98, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.93, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.33, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.06, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.03, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.98, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.93, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.98, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.93, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00018203884246759117, "policy_loss": -0.00019825922208838165, "vf_loss": 7.813397407531738, "vf_explained_var": 0.5786846280097961, "kl": 0.003914575092494488, "entropy": 0.8020821213722229, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5376000, "num_env_steps_trained": 5376000, "num_agent_steps_sampled": 10752000, "num_agent_steps_trained": 10752000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 319.0, "episode_reward_mean": 573.7, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 157.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 286.85}, "custom_metrics": {"sparse_reward_mean": 198.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 176.9, "shaped_reward_min": 119, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.44, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.41, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.3, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.23, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.98, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.93, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.33, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.06, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.03, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.98, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.93, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.98, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.93, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 627.0, 579.0, 573.0, 576.0, 579.0, 579.0, 582.0, 522.0, 582.0, 582.0, 576.0, 579.0, 576.0, 570.0, 570.0, 582.0, 630.0, 576.0, 576.0, 576.0, 587.0, 579.0, 570.0, 627.0, 525.0, 576.0, 525.0, 564.0, 579.0, 573.0, 576.0, 576.0, 587.0, 579.0, 530.0, 573.0, 573.0, 630.0, 576.0, 579.0, 522.0, 519.0, 582.0, 576.0, 590.0, 525.0, 630.0, 576.0, 579.0, 576.0, 579.0, 573.0, 576.0, 633.0, 576.0, 576.0, 582.0, 525.0, 582.0, 507.0, 579.0, 582.0, 633.0, 533.0, 530.0, 587.0, 584.0, 576.0, 627.0, 533.0, 576.0, 573.0, 576.0, 576.0, 584.0, 636.0, 530.0, 525.0, 639.0, 573.0, 573.0, 582.0, 582.0, 582.0, 573.0, 576.0, 579.0, 576.0, 581.0, 627.0, 579.0, 319.0, 576.0, 579.0, 576.0, 576.0, 579.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 287.0, 310.0, 317.0, 293.0, 286.0, 284.0, 289.0, 282.0, 294.0, 290.0, 289.0, 291.0, 288.0, 290.0, 292.0, 265.0, 257.0, 296.0, 286.0, 292.0, 290.0, 286.0, 290.0, 292.0, 287.0, 283.0, 293.0, 290.0, 280.0, 284.0, 286.0, 294.0, 288.0, 316.0, 314.0, 280.0, 296.0, 288.0, 288.0, 283.0, 293.0, 285.0, 302.0, 288.0, 291.0, 279.0, 291.0, 315.0, 312.0, 268.0, 257.0, 287.0, 289.0, 266.0, 259.0, 278.0, 286.0, 292.0, 287.0, 286.0, 287.0, 294.0, 282.0, 288.0, 288.0, 299.0, 288.0, 291.0, 288.0, 266.0, 264.0, 285.0, 288.0, 283.0, 290.0, 319.0, 311.0, 289.0, 287.0, 290.0, 289.0, 268.0, 254.0, 259.0, 260.0, 288.0, 294.0, 284.0, 292.0, 297.0, 293.0, 264.0, 261.0, 319.0, 311.0, 292.0, 284.0, 289.0, 290.0, 290.0, 286.0, 288.0, 291.0, 284.0, 289.0, 288.0, 288.0, 316.0, 317.0, 286.0, 290.0, 292.0, 284.0, 293.0, 289.0, 256.0, 269.0, 294.0, 288.0, 254.0, 253.0, 287.0, 292.0, 290.0, 292.0, 324.0, 309.0, 269.0, 264.0, 270.0, 260.0, 294.0, 293.0, 296.0, 288.0, 287.0, 289.0, 306.0, 321.0, 262.0, 271.0, 286.0, 290.0, 288.0, 285.0, 285.0, 291.0, 290.0, 286.0, 285.0, 299.0, 319.0, 317.0, 262.0, 268.0, 270.0, 255.0, 320.0, 319.0, 286.0, 287.0, 286.0, 287.0, 296.0, 286.0, 290.0, 292.0, 295.0, 287.0, 288.0, 285.0, 291.0, 285.0, 289.0, 290.0, 293.0, 283.0, 288.0, 293.0, 301.0, 326.0, 287.0, 292.0, 162.0, 157.0, 294.0, 282.0, 287.0, 292.0, 286.0, 290.0, 295.0, 281.0, 291.0, 288.0, 296.0, 286.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6956595979438758, "mean_inference_ms": 1.2410946468727593, "mean_action_processing_ms": 0.1334893448144431, "mean_env_wait_ms": 0.8374309604509989, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 319.0, "episode_reward_mean": 573.7, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 157.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 286.85}, "hist_stats": {"episode_reward": [582.0, 627.0, 579.0, 573.0, 576.0, 579.0, 579.0, 582.0, 522.0, 582.0, 582.0, 576.0, 579.0, 576.0, 570.0, 570.0, 582.0, 630.0, 576.0, 576.0, 576.0, 587.0, 579.0, 570.0, 627.0, 525.0, 576.0, 525.0, 564.0, 579.0, 573.0, 576.0, 576.0, 587.0, 579.0, 530.0, 573.0, 573.0, 630.0, 576.0, 579.0, 522.0, 519.0, 582.0, 576.0, 590.0, 525.0, 630.0, 576.0, 579.0, 576.0, 579.0, 573.0, 576.0, 633.0, 576.0, 576.0, 582.0, 525.0, 582.0, 507.0, 579.0, 582.0, 633.0, 533.0, 530.0, 587.0, 584.0, 576.0, 627.0, 533.0, 576.0, 573.0, 576.0, 576.0, 584.0, 636.0, 530.0, 525.0, 639.0, 573.0, 573.0, 582.0, 582.0, 582.0, 573.0, 576.0, 579.0, 576.0, 581.0, 627.0, 579.0, 319.0, 576.0, 579.0, 576.0, 576.0, 579.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 287.0, 310.0, 317.0, 293.0, 286.0, 284.0, 289.0, 282.0, 294.0, 290.0, 289.0, 291.0, 288.0, 290.0, 292.0, 265.0, 257.0, 296.0, 286.0, 292.0, 290.0, 286.0, 290.0, 292.0, 287.0, 283.0, 293.0, 290.0, 280.0, 284.0, 286.0, 294.0, 288.0, 316.0, 314.0, 280.0, 296.0, 288.0, 288.0, 283.0, 293.0, 285.0, 302.0, 288.0, 291.0, 279.0, 291.0, 315.0, 312.0, 268.0, 257.0, 287.0, 289.0, 266.0, 259.0, 278.0, 286.0, 292.0, 287.0, 286.0, 287.0, 294.0, 282.0, 288.0, 288.0, 299.0, 288.0, 291.0, 288.0, 266.0, 264.0, 285.0, 288.0, 283.0, 290.0, 319.0, 311.0, 289.0, 287.0, 290.0, 289.0, 268.0, 254.0, 259.0, 260.0, 288.0, 294.0, 284.0, 292.0, 297.0, 293.0, 264.0, 261.0, 319.0, 311.0, 292.0, 284.0, 289.0, 290.0, 290.0, 286.0, 288.0, 291.0, 284.0, 289.0, 288.0, 288.0, 316.0, 317.0, 286.0, 290.0, 292.0, 284.0, 293.0, 289.0, 256.0, 269.0, 294.0, 288.0, 254.0, 253.0, 287.0, 292.0, 290.0, 292.0, 324.0, 309.0, 269.0, 264.0, 270.0, 260.0, 294.0, 293.0, 296.0, 288.0, 287.0, 289.0, 306.0, 321.0, 262.0, 271.0, 286.0, 290.0, 288.0, 285.0, 285.0, 291.0, 290.0, 286.0, 285.0, 299.0, 319.0, 317.0, 262.0, 268.0, 270.0, 255.0, 320.0, 319.0, 286.0, 287.0, 286.0, 287.0, 296.0, 286.0, 290.0, 292.0, 295.0, 287.0, 288.0, 285.0, 291.0, 285.0, 289.0, 290.0, 293.0, 283.0, 288.0, 293.0, 301.0, 326.0, 287.0, 292.0, 162.0, 157.0, 294.0, 282.0, 287.0, 292.0, 286.0, 290.0, 295.0, 281.0, 291.0, 288.0, 296.0, 286.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6956595979438758, "mean_inference_ms": 1.2410946468727593, "mean_action_processing_ms": 0.1334893448144431, "mean_env_wait_ms": 0.8374309604509989, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10752000, "num_agent_steps_trained": 10752000, "num_env_steps_sampled": 5376000, "num_env_steps_trained": 5376000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5376000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10752000, "timers": {"training_iteration_time_ms": 3639.429, "learn_time_ms": 1124.377, "learn_throughput": 11384.079, "synch_weights_time_ms": 12.4}, "counters": {"num_env_steps_sampled": 5376000, "num_env_steps_trained": 5376000, "num_agent_steps_sampled": 10752000, "num_agent_steps_trained": 10752000}, "done": false, "episodes_total": 13440, "training_iteration": 420, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-46", "timestamp": 1666582066, "time_this_iter_s": 3.6863930225372314, "time_total_s": 1602.824954032898, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1602.824954032898, "timesteps_since_restore": 0, "iterations_since_restore": 420, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.72, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 197.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 176.44, "shaped_reward_min": 119, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.48, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.28, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.38, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.11, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.78, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.3, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.29, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.15, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.15, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.01, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.78, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.78, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0001731559168547392, "policy_loss": -0.00021116388961672783, "vf_loss": 7.883594512939453, "vf_explained_var": 0.5794853568077087, "kl": 0.00237162783741951, "entropy": 0.8080763816833496, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5388800, "num_env_steps_trained": 5388800, "num_agent_steps_sampled": 10777600, "num_agent_steps_trained": 10777600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 319.0, "episode_reward_mean": 570.44, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 157.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 285.22}, "custom_metrics": {"sparse_reward_mean": 197.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 176.44, "shaped_reward_min": 119, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.48, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.28, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.38, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.11, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.78, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.3, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.29, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.15, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.15, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.01, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.78, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.78, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 587.0, 579.0, 530.0, 573.0, 573.0, 630.0, 576.0, 579.0, 522.0, 519.0, 582.0, 576.0, 590.0, 525.0, 630.0, 576.0, 579.0, 576.0, 579.0, 573.0, 576.0, 633.0, 576.0, 576.0, 582.0, 525.0, 582.0, 507.0, 579.0, 582.0, 633.0, 533.0, 530.0, 587.0, 584.0, 576.0, 627.0, 533.0, 576.0, 573.0, 576.0, 576.0, 584.0, 636.0, 530.0, 525.0, 639.0, 573.0, 573.0, 582.0, 582.0, 582.0, 573.0, 576.0, 579.0, 576.0, 581.0, 627.0, 579.0, 319.0, 576.0, 579.0, 576.0, 576.0, 579.0, 582.0, 579.0, 582.0, 582.0, 579.0, 524.0, 579.0, 576.0, 507.0, 513.0, 579.0, 579.0, 579.0, 579.0, 584.0, 582.0, 576.0, 579.0, 530.0, 570.0, 570.0, 582.0, 525.0, 582.0, 579.0, 579.0, 573.0, 587.0, 582.0, 582.0, 519.0, 576.0, 530.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 288.0, 299.0, 288.0, 291.0, 288.0, 266.0, 264.0, 285.0, 288.0, 283.0, 290.0, 319.0, 311.0, 289.0, 287.0, 290.0, 289.0, 268.0, 254.0, 259.0, 260.0, 288.0, 294.0, 284.0, 292.0, 297.0, 293.0, 264.0, 261.0, 319.0, 311.0, 292.0, 284.0, 289.0, 290.0, 290.0, 286.0, 288.0, 291.0, 284.0, 289.0, 288.0, 288.0, 316.0, 317.0, 286.0, 290.0, 292.0, 284.0, 293.0, 289.0, 256.0, 269.0, 294.0, 288.0, 254.0, 253.0, 287.0, 292.0, 290.0, 292.0, 324.0, 309.0, 269.0, 264.0, 270.0, 260.0, 294.0, 293.0, 296.0, 288.0, 287.0, 289.0, 306.0, 321.0, 262.0, 271.0, 286.0, 290.0, 288.0, 285.0, 285.0, 291.0, 290.0, 286.0, 285.0, 299.0, 319.0, 317.0, 262.0, 268.0, 270.0, 255.0, 320.0, 319.0, 286.0, 287.0, 286.0, 287.0, 296.0, 286.0, 290.0, 292.0, 295.0, 287.0, 288.0, 285.0, 291.0, 285.0, 289.0, 290.0, 293.0, 283.0, 288.0, 293.0, 301.0, 326.0, 287.0, 292.0, 162.0, 157.0, 294.0, 282.0, 287.0, 292.0, 286.0, 290.0, 295.0, 281.0, 291.0, 288.0, 296.0, 286.0, 291.0, 288.0, 294.0, 288.0, 287.0, 295.0, 286.0, 293.0, 257.0, 267.0, 296.0, 283.0, 291.0, 285.0, 247.0, 260.0, 270.0, 243.0, 291.0, 288.0, 286.0, 293.0, 285.0, 294.0, 287.0, 292.0, 289.0, 295.0, 281.0, 301.0, 286.0, 290.0, 289.0, 290.0, 263.0, 267.0, 291.0, 279.0, 283.0, 287.0, 286.0, 296.0, 267.0, 258.0, 293.0, 289.0, 295.0, 284.0, 292.0, 287.0, 290.0, 283.0, 292.0, 295.0, 292.0, 290.0, 294.0, 288.0, 258.0, 261.0, 293.0, 283.0, 255.0, 275.0, 291.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6956192043467821, "mean_inference_ms": 1.240981555894833, "mean_action_processing_ms": 0.13348244018147493, "mean_env_wait_ms": 0.8373655983322869, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 319.0, "episode_reward_mean": 570.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 157.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 285.22}, "hist_stats": {"episode_reward": [576.0, 587.0, 579.0, 530.0, 573.0, 573.0, 630.0, 576.0, 579.0, 522.0, 519.0, 582.0, 576.0, 590.0, 525.0, 630.0, 576.0, 579.0, 576.0, 579.0, 573.0, 576.0, 633.0, 576.0, 576.0, 582.0, 525.0, 582.0, 507.0, 579.0, 582.0, 633.0, 533.0, 530.0, 587.0, 584.0, 576.0, 627.0, 533.0, 576.0, 573.0, 576.0, 576.0, 584.0, 636.0, 530.0, 525.0, 639.0, 573.0, 573.0, 582.0, 582.0, 582.0, 573.0, 576.0, 579.0, 576.0, 581.0, 627.0, 579.0, 319.0, 576.0, 579.0, 576.0, 576.0, 579.0, 582.0, 579.0, 582.0, 582.0, 579.0, 524.0, 579.0, 576.0, 507.0, 513.0, 579.0, 579.0, 579.0, 579.0, 584.0, 582.0, 576.0, 579.0, 530.0, 570.0, 570.0, 582.0, 525.0, 582.0, 579.0, 579.0, 573.0, 587.0, 582.0, 582.0, 519.0, 576.0, 530.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 288.0, 299.0, 288.0, 291.0, 288.0, 266.0, 264.0, 285.0, 288.0, 283.0, 290.0, 319.0, 311.0, 289.0, 287.0, 290.0, 289.0, 268.0, 254.0, 259.0, 260.0, 288.0, 294.0, 284.0, 292.0, 297.0, 293.0, 264.0, 261.0, 319.0, 311.0, 292.0, 284.0, 289.0, 290.0, 290.0, 286.0, 288.0, 291.0, 284.0, 289.0, 288.0, 288.0, 316.0, 317.0, 286.0, 290.0, 292.0, 284.0, 293.0, 289.0, 256.0, 269.0, 294.0, 288.0, 254.0, 253.0, 287.0, 292.0, 290.0, 292.0, 324.0, 309.0, 269.0, 264.0, 270.0, 260.0, 294.0, 293.0, 296.0, 288.0, 287.0, 289.0, 306.0, 321.0, 262.0, 271.0, 286.0, 290.0, 288.0, 285.0, 285.0, 291.0, 290.0, 286.0, 285.0, 299.0, 319.0, 317.0, 262.0, 268.0, 270.0, 255.0, 320.0, 319.0, 286.0, 287.0, 286.0, 287.0, 296.0, 286.0, 290.0, 292.0, 295.0, 287.0, 288.0, 285.0, 291.0, 285.0, 289.0, 290.0, 293.0, 283.0, 288.0, 293.0, 301.0, 326.0, 287.0, 292.0, 162.0, 157.0, 294.0, 282.0, 287.0, 292.0, 286.0, 290.0, 295.0, 281.0, 291.0, 288.0, 296.0, 286.0, 291.0, 288.0, 294.0, 288.0, 287.0, 295.0, 286.0, 293.0, 257.0, 267.0, 296.0, 283.0, 291.0, 285.0, 247.0, 260.0, 270.0, 243.0, 291.0, 288.0, 286.0, 293.0, 285.0, 294.0, 287.0, 292.0, 289.0, 295.0, 281.0, 301.0, 286.0, 290.0, 289.0, 290.0, 263.0, 267.0, 291.0, 279.0, 283.0, 287.0, 286.0, 296.0, 267.0, 258.0, 293.0, 289.0, 295.0, 284.0, 292.0, 287.0, 290.0, 283.0, 292.0, 295.0, 292.0, 290.0, 294.0, 288.0, 258.0, 261.0, 293.0, 283.0, 255.0, 275.0, 291.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6956192043467821, "mean_inference_ms": 1.240981555894833, "mean_action_processing_ms": 0.13348244018147493, "mean_env_wait_ms": 0.8373655983322869, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10777600, "num_agent_steps_trained": 10777600, "num_env_steps_sampled": 5388800, "num_env_steps_trained": 5388800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5388800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10777600, "timers": {"training_iteration_time_ms": 3632.633, "learn_time_ms": 1117.212, "learn_throughput": 11457.093, "synch_weights_time_ms": 12.361}, "counters": {"num_env_steps_sampled": 5388800, "num_env_steps_trained": 5388800, "num_agent_steps_sampled": 10777600, "num_agent_steps_trained": 10777600}, "done": false, "episodes_total": 13472, "training_iteration": 421, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-50", "timestamp": 1666582070, "time_this_iter_s": 3.590386152267456, "time_total_s": 1606.4153401851654, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1606.4153401851654, "timesteps_since_restore": 0, "iterations_since_restore": 421, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.716666666666665, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 177.09, "shaped_reward_min": 119, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.21, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.59, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.17, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.42, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.78, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.05, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.11, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.08, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.78, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.05, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.78, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.05, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008794745663180947, "policy_loss": 0.0005092529463581741, "vf_loss": 7.746166706085205, "vf_explained_var": 0.5636370182037354, "kl": 0.0031342788133770227, "entropy": 0.8087892532348633, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5401600, "num_env_steps_trained": 5401600, "num_agent_steps_sampled": 10803200, "num_agent_steps_trained": 10803200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 319.0, "episode_reward_mean": 573.49, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 157.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 286.745}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 177.09, "shaped_reward_min": 119, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.21, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.59, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.17, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.42, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.78, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.05, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.11, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.08, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.78, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.05, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.78, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.05, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [533.0, 530.0, 587.0, 584.0, 576.0, 627.0, 533.0, 576.0, 573.0, 576.0, 576.0, 584.0, 636.0, 530.0, 525.0, 639.0, 573.0, 573.0, 582.0, 582.0, 582.0, 573.0, 576.0, 579.0, 576.0, 581.0, 627.0, 579.0, 319.0, 576.0, 579.0, 576.0, 576.0, 579.0, 582.0, 579.0, 582.0, 582.0, 579.0, 524.0, 579.0, 576.0, 507.0, 513.0, 579.0, 579.0, 579.0, 579.0, 584.0, 582.0, 576.0, 579.0, 530.0, 570.0, 570.0, 582.0, 525.0, 582.0, 579.0, 579.0, 573.0, 587.0, 582.0, 582.0, 519.0, 576.0, 530.0, 584.0, 630.0, 573.0, 570.0, 533.0, 579.0, 576.0, 587.0, 579.0, 576.0, 579.0, 570.0, 516.0, 630.0, 633.0, 627.0, 570.0, 630.0, 579.0, 576.0, 579.0, 525.0, 630.0, 587.0, 573.0, 579.0, 573.0, 579.0, 579.0, 584.0, 630.0, 573.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 264.0, 270.0, 260.0, 294.0, 293.0, 296.0, 288.0, 287.0, 289.0, 306.0, 321.0, 262.0, 271.0, 286.0, 290.0, 288.0, 285.0, 285.0, 291.0, 290.0, 286.0, 285.0, 299.0, 319.0, 317.0, 262.0, 268.0, 270.0, 255.0, 320.0, 319.0, 286.0, 287.0, 286.0, 287.0, 296.0, 286.0, 290.0, 292.0, 295.0, 287.0, 288.0, 285.0, 291.0, 285.0, 289.0, 290.0, 293.0, 283.0, 288.0, 293.0, 301.0, 326.0, 287.0, 292.0, 162.0, 157.0, 294.0, 282.0, 287.0, 292.0, 286.0, 290.0, 295.0, 281.0, 291.0, 288.0, 296.0, 286.0, 291.0, 288.0, 294.0, 288.0, 287.0, 295.0, 286.0, 293.0, 257.0, 267.0, 296.0, 283.0, 291.0, 285.0, 247.0, 260.0, 270.0, 243.0, 291.0, 288.0, 286.0, 293.0, 285.0, 294.0, 287.0, 292.0, 289.0, 295.0, 281.0, 301.0, 286.0, 290.0, 289.0, 290.0, 263.0, 267.0, 291.0, 279.0, 283.0, 287.0, 286.0, 296.0, 267.0, 258.0, 293.0, 289.0, 295.0, 284.0, 292.0, 287.0, 290.0, 283.0, 292.0, 295.0, 292.0, 290.0, 294.0, 288.0, 258.0, 261.0, 293.0, 283.0, 255.0, 275.0, 291.0, 293.0, 321.0, 309.0, 285.0, 288.0, 277.0, 293.0, 265.0, 268.0, 287.0, 292.0, 289.0, 287.0, 288.0, 299.0, 286.0, 293.0, 285.0, 291.0, 291.0, 288.0, 291.0, 279.0, 259.0, 257.0, 317.0, 313.0, 319.0, 314.0, 313.0, 314.0, 294.0, 276.0, 314.0, 316.0, 290.0, 289.0, 288.0, 288.0, 292.0, 287.0, 264.0, 261.0, 319.0, 311.0, 288.0, 299.0, 288.0, 285.0, 290.0, 289.0, 293.0, 280.0, 290.0, 289.0, 291.0, 288.0, 290.0, 294.0, 314.0, 316.0, 285.0, 288.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6955737985586028, "mean_inference_ms": 1.240856488537721, "mean_action_processing_ms": 0.13347434229680755, "mean_env_wait_ms": 0.8372907922870119, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 319.0, "episode_reward_mean": 573.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 157.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 286.745}, "hist_stats": {"episode_reward": [533.0, 530.0, 587.0, 584.0, 576.0, 627.0, 533.0, 576.0, 573.0, 576.0, 576.0, 584.0, 636.0, 530.0, 525.0, 639.0, 573.0, 573.0, 582.0, 582.0, 582.0, 573.0, 576.0, 579.0, 576.0, 581.0, 627.0, 579.0, 319.0, 576.0, 579.0, 576.0, 576.0, 579.0, 582.0, 579.0, 582.0, 582.0, 579.0, 524.0, 579.0, 576.0, 507.0, 513.0, 579.0, 579.0, 579.0, 579.0, 584.0, 582.0, 576.0, 579.0, 530.0, 570.0, 570.0, 582.0, 525.0, 582.0, 579.0, 579.0, 573.0, 587.0, 582.0, 582.0, 519.0, 576.0, 530.0, 584.0, 630.0, 573.0, 570.0, 533.0, 579.0, 576.0, 587.0, 579.0, 576.0, 579.0, 570.0, 516.0, 630.0, 633.0, 627.0, 570.0, 630.0, 579.0, 576.0, 579.0, 525.0, 630.0, 587.0, 573.0, 579.0, 573.0, 579.0, 579.0, 584.0, 630.0, 573.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 264.0, 270.0, 260.0, 294.0, 293.0, 296.0, 288.0, 287.0, 289.0, 306.0, 321.0, 262.0, 271.0, 286.0, 290.0, 288.0, 285.0, 285.0, 291.0, 290.0, 286.0, 285.0, 299.0, 319.0, 317.0, 262.0, 268.0, 270.0, 255.0, 320.0, 319.0, 286.0, 287.0, 286.0, 287.0, 296.0, 286.0, 290.0, 292.0, 295.0, 287.0, 288.0, 285.0, 291.0, 285.0, 289.0, 290.0, 293.0, 283.0, 288.0, 293.0, 301.0, 326.0, 287.0, 292.0, 162.0, 157.0, 294.0, 282.0, 287.0, 292.0, 286.0, 290.0, 295.0, 281.0, 291.0, 288.0, 296.0, 286.0, 291.0, 288.0, 294.0, 288.0, 287.0, 295.0, 286.0, 293.0, 257.0, 267.0, 296.0, 283.0, 291.0, 285.0, 247.0, 260.0, 270.0, 243.0, 291.0, 288.0, 286.0, 293.0, 285.0, 294.0, 287.0, 292.0, 289.0, 295.0, 281.0, 301.0, 286.0, 290.0, 289.0, 290.0, 263.0, 267.0, 291.0, 279.0, 283.0, 287.0, 286.0, 296.0, 267.0, 258.0, 293.0, 289.0, 295.0, 284.0, 292.0, 287.0, 290.0, 283.0, 292.0, 295.0, 292.0, 290.0, 294.0, 288.0, 258.0, 261.0, 293.0, 283.0, 255.0, 275.0, 291.0, 293.0, 321.0, 309.0, 285.0, 288.0, 277.0, 293.0, 265.0, 268.0, 287.0, 292.0, 289.0, 287.0, 288.0, 299.0, 286.0, 293.0, 285.0, 291.0, 291.0, 288.0, 291.0, 279.0, 259.0, 257.0, 317.0, 313.0, 319.0, 314.0, 313.0, 314.0, 294.0, 276.0, 314.0, 316.0, 290.0, 289.0, 288.0, 288.0, 292.0, 287.0, 264.0, 261.0, 319.0, 311.0, 288.0, 299.0, 288.0, 285.0, 290.0, 289.0, 293.0, 280.0, 290.0, 289.0, 291.0, 288.0, 290.0, 294.0, 314.0, 316.0, 285.0, 288.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6955737985586028, "mean_inference_ms": 1.240856488537721, "mean_action_processing_ms": 0.13347434229680755, "mean_env_wait_ms": 0.8372907922870119, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10803200, "num_agent_steps_trained": 10803200, "num_env_steps_sampled": 5401600, "num_env_steps_trained": 5401600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5401600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10803200, "timers": {"training_iteration_time_ms": 3602.841, "learn_time_ms": 1102.311, "learn_throughput": 11611.973, "synch_weights_time_ms": 11.848}, "counters": {"num_env_steps_sampled": 5401600, "num_env_steps_trained": 5401600, "num_agent_steps_sampled": 10803200, "num_agent_steps_trained": 10803200}, "done": false, "episodes_total": 13504, "training_iteration": 422, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-53", "timestamp": 1666582073, "time_this_iter_s": 3.482290744781494, "time_total_s": 1609.897630929947, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1609.897630929947, "timesteps_since_restore": 0, "iterations_since_restore": 422, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.599999999999998, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 197.0, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 175.84, "shaped_reward_min": 97, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.22, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.3, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.14, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.16, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.73, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.89, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.35, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.29, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.73, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.89, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.73, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.89, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002271961886435747, "policy_loss": -0.002665527630597353, "vf_loss": 7.900609016418457, "vf_explained_var": 0.5644186735153198, "kl": 0.005383658222854137, "entropy": 0.7929897904396057, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5414400, "num_env_steps_trained": 5414400, "num_agent_steps_sampled": 10828800, "num_agent_steps_trained": 10828800}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 265.0, "episode_reward_mean": 569.84, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 126.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 284.92}, "custom_metrics": {"sparse_reward_mean": 197.0, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 175.84, "shaped_reward_min": 97, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.22, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.3, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.14, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.16, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.73, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.89, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.35, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.29, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.73, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.89, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.73, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.89, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 579.0, 582.0, 579.0, 582.0, 582.0, 579.0, 524.0, 579.0, 576.0, 507.0, 513.0, 579.0, 579.0, 579.0, 579.0, 584.0, 582.0, 576.0, 579.0, 530.0, 570.0, 570.0, 582.0, 525.0, 582.0, 579.0, 579.0, 573.0, 587.0, 582.0, 582.0, 519.0, 576.0, 530.0, 584.0, 630.0, 573.0, 570.0, 533.0, 579.0, 576.0, 587.0, 579.0, 576.0, 579.0, 570.0, 516.0, 630.0, 633.0, 627.0, 570.0, 630.0, 579.0, 576.0, 579.0, 525.0, 630.0, 587.0, 573.0, 579.0, 573.0, 579.0, 579.0, 584.0, 630.0, 573.0, 582.0, 587.0, 579.0, 570.0, 582.0, 579.0, 297.0, 576.0, 582.0, 581.0, 576.0, 525.0, 579.0, 579.0, 579.0, 576.0, 265.0, 582.0, 579.0, 584.0, 573.0, 525.0, 579.0, 573.0, 573.0, 587.0, 579.0, 579.0, 576.0, 570.0, 579.0, 630.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 281.0, 291.0, 288.0, 296.0, 286.0, 291.0, 288.0, 294.0, 288.0, 287.0, 295.0, 286.0, 293.0, 257.0, 267.0, 296.0, 283.0, 291.0, 285.0, 247.0, 260.0, 270.0, 243.0, 291.0, 288.0, 286.0, 293.0, 285.0, 294.0, 287.0, 292.0, 289.0, 295.0, 281.0, 301.0, 286.0, 290.0, 289.0, 290.0, 263.0, 267.0, 291.0, 279.0, 283.0, 287.0, 286.0, 296.0, 267.0, 258.0, 293.0, 289.0, 295.0, 284.0, 292.0, 287.0, 290.0, 283.0, 292.0, 295.0, 292.0, 290.0, 294.0, 288.0, 258.0, 261.0, 293.0, 283.0, 255.0, 275.0, 291.0, 293.0, 321.0, 309.0, 285.0, 288.0, 277.0, 293.0, 265.0, 268.0, 287.0, 292.0, 289.0, 287.0, 288.0, 299.0, 286.0, 293.0, 285.0, 291.0, 291.0, 288.0, 291.0, 279.0, 259.0, 257.0, 317.0, 313.0, 319.0, 314.0, 313.0, 314.0, 294.0, 276.0, 314.0, 316.0, 290.0, 289.0, 288.0, 288.0, 292.0, 287.0, 264.0, 261.0, 319.0, 311.0, 288.0, 299.0, 288.0, 285.0, 290.0, 289.0, 293.0, 280.0, 290.0, 289.0, 291.0, 288.0, 290.0, 294.0, 314.0, 316.0, 285.0, 288.0, 292.0, 290.0, 289.0, 298.0, 289.0, 290.0, 291.0, 279.0, 291.0, 291.0, 295.0, 284.0, 148.0, 149.0, 281.0, 295.0, 294.0, 288.0, 283.0, 298.0, 289.0, 287.0, 256.0, 269.0, 285.0, 294.0, 292.0, 287.0, 295.0, 284.0, 285.0, 291.0, 139.0, 126.0, 292.0, 290.0, 288.0, 291.0, 287.0, 297.0, 286.0, 287.0, 257.0, 268.0, 282.0, 297.0, 282.0, 291.0, 283.0, 290.0, 294.0, 293.0, 291.0, 288.0, 286.0, 293.0, 286.0, 290.0, 282.0, 288.0, 289.0, 290.0, 316.0, 314.0, 292.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6955416679613102, "mean_inference_ms": 1.2407360427750618, "mean_action_processing_ms": 0.13346611969775435, "mean_env_wait_ms": 0.837218924460605, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 265.0, "episode_reward_mean": 569.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 126.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 284.92}, "hist_stats": {"episode_reward": [576.0, 579.0, 582.0, 579.0, 582.0, 582.0, 579.0, 524.0, 579.0, 576.0, 507.0, 513.0, 579.0, 579.0, 579.0, 579.0, 584.0, 582.0, 576.0, 579.0, 530.0, 570.0, 570.0, 582.0, 525.0, 582.0, 579.0, 579.0, 573.0, 587.0, 582.0, 582.0, 519.0, 576.0, 530.0, 584.0, 630.0, 573.0, 570.0, 533.0, 579.0, 576.0, 587.0, 579.0, 576.0, 579.0, 570.0, 516.0, 630.0, 633.0, 627.0, 570.0, 630.0, 579.0, 576.0, 579.0, 525.0, 630.0, 587.0, 573.0, 579.0, 573.0, 579.0, 579.0, 584.0, 630.0, 573.0, 582.0, 587.0, 579.0, 570.0, 582.0, 579.0, 297.0, 576.0, 582.0, 581.0, 576.0, 525.0, 579.0, 579.0, 579.0, 576.0, 265.0, 582.0, 579.0, 584.0, 573.0, 525.0, 579.0, 573.0, 573.0, 587.0, 579.0, 579.0, 576.0, 570.0, 579.0, 630.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 281.0, 291.0, 288.0, 296.0, 286.0, 291.0, 288.0, 294.0, 288.0, 287.0, 295.0, 286.0, 293.0, 257.0, 267.0, 296.0, 283.0, 291.0, 285.0, 247.0, 260.0, 270.0, 243.0, 291.0, 288.0, 286.0, 293.0, 285.0, 294.0, 287.0, 292.0, 289.0, 295.0, 281.0, 301.0, 286.0, 290.0, 289.0, 290.0, 263.0, 267.0, 291.0, 279.0, 283.0, 287.0, 286.0, 296.0, 267.0, 258.0, 293.0, 289.0, 295.0, 284.0, 292.0, 287.0, 290.0, 283.0, 292.0, 295.0, 292.0, 290.0, 294.0, 288.0, 258.0, 261.0, 293.0, 283.0, 255.0, 275.0, 291.0, 293.0, 321.0, 309.0, 285.0, 288.0, 277.0, 293.0, 265.0, 268.0, 287.0, 292.0, 289.0, 287.0, 288.0, 299.0, 286.0, 293.0, 285.0, 291.0, 291.0, 288.0, 291.0, 279.0, 259.0, 257.0, 317.0, 313.0, 319.0, 314.0, 313.0, 314.0, 294.0, 276.0, 314.0, 316.0, 290.0, 289.0, 288.0, 288.0, 292.0, 287.0, 264.0, 261.0, 319.0, 311.0, 288.0, 299.0, 288.0, 285.0, 290.0, 289.0, 293.0, 280.0, 290.0, 289.0, 291.0, 288.0, 290.0, 294.0, 314.0, 316.0, 285.0, 288.0, 292.0, 290.0, 289.0, 298.0, 289.0, 290.0, 291.0, 279.0, 291.0, 291.0, 295.0, 284.0, 148.0, 149.0, 281.0, 295.0, 294.0, 288.0, 283.0, 298.0, 289.0, 287.0, 256.0, 269.0, 285.0, 294.0, 292.0, 287.0, 295.0, 284.0, 285.0, 291.0, 139.0, 126.0, 292.0, 290.0, 288.0, 291.0, 287.0, 297.0, 286.0, 287.0, 257.0, 268.0, 282.0, 297.0, 282.0, 291.0, 283.0, 290.0, 294.0, 293.0, 291.0, 288.0, 286.0, 293.0, 286.0, 290.0, 282.0, 288.0, 289.0, 290.0, 316.0, 314.0, 292.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6955416679613102, "mean_inference_ms": 1.2407360427750618, "mean_action_processing_ms": 0.13346611969775435, "mean_env_wait_ms": 0.837218924460605, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10828800, "num_agent_steps_trained": 10828800, "num_env_steps_sampled": 5414400, "num_env_steps_trained": 5414400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5414400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10828800, "timers": {"training_iteration_time_ms": 3589.332, "learn_time_ms": 1103.678, "learn_throughput": 11597.584, "synch_weights_time_ms": 11.257}, "counters": {"num_env_steps_sampled": 5414400, "num_env_steps_trained": 5414400, "num_agent_steps_sampled": 10828800, "num_agent_steps_trained": 10828800}, "done": false, "episodes_total": 13536, "training_iteration": 423, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-57", "timestamp": 1666582077, "time_this_iter_s": 3.654621124267578, "time_total_s": 1613.5522520542145, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1613.5522520542145, "timesteps_since_restore": 0, "iterations_since_restore": 423, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.939999999999998, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 174.58, "shaped_reward_min": 43, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.89, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.4, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.76, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.25, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.38, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.0, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.33, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.14, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.03, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.38, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.0, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.38, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.0, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0032100635580718517, "policy_loss": 0.002823136979714036, "vf_loss": 7.889589309692383, "vf_explained_var": 0.5627092123031616, "kl": 0.0029599564149975777, "entropy": 0.8040643930435181, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5427200, "num_env_steps_trained": 5427200, "num_agent_steps_sampled": 10854400, "num_agent_steps_trained": 10854400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 123.0, "episode_reward_mean": 566.18, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.09}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 174.58, "shaped_reward_min": 43, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.89, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.4, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.76, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.25, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.38, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.0, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.33, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.14, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.03, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.38, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.0, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.38, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.0, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 576.0, 530.0, 584.0, 630.0, 573.0, 570.0, 533.0, 579.0, 576.0, 587.0, 579.0, 576.0, 579.0, 570.0, 516.0, 630.0, 633.0, 627.0, 570.0, 630.0, 579.0, 576.0, 579.0, 525.0, 630.0, 587.0, 573.0, 579.0, 573.0, 579.0, 579.0, 584.0, 630.0, 573.0, 582.0, 587.0, 579.0, 570.0, 582.0, 579.0, 297.0, 576.0, 582.0, 581.0, 576.0, 525.0, 579.0, 579.0, 579.0, 576.0, 265.0, 582.0, 579.0, 584.0, 573.0, 525.0, 579.0, 573.0, 573.0, 587.0, 579.0, 579.0, 576.0, 570.0, 579.0, 630.0, 573.0, 576.0, 579.0, 576.0, 576.0, 627.0, 519.0, 579.0, 513.0, 630.0, 570.0, 582.0, 579.0, 587.0, 579.0, 123.0, 570.0, 587.0, 573.0, 579.0, 582.0, 579.0, 576.0, 522.0, 582.0, 579.0, 584.0, 573.0, 582.0, 522.0, 519.0, 636.0, 530.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [258.0, 261.0, 293.0, 283.0, 255.0, 275.0, 291.0, 293.0, 321.0, 309.0, 285.0, 288.0, 277.0, 293.0, 265.0, 268.0, 287.0, 292.0, 289.0, 287.0, 288.0, 299.0, 286.0, 293.0, 285.0, 291.0, 291.0, 288.0, 291.0, 279.0, 259.0, 257.0, 317.0, 313.0, 319.0, 314.0, 313.0, 314.0, 294.0, 276.0, 314.0, 316.0, 290.0, 289.0, 288.0, 288.0, 292.0, 287.0, 264.0, 261.0, 319.0, 311.0, 288.0, 299.0, 288.0, 285.0, 290.0, 289.0, 293.0, 280.0, 290.0, 289.0, 291.0, 288.0, 290.0, 294.0, 314.0, 316.0, 285.0, 288.0, 292.0, 290.0, 289.0, 298.0, 289.0, 290.0, 291.0, 279.0, 291.0, 291.0, 295.0, 284.0, 148.0, 149.0, 281.0, 295.0, 294.0, 288.0, 283.0, 298.0, 289.0, 287.0, 256.0, 269.0, 285.0, 294.0, 292.0, 287.0, 295.0, 284.0, 285.0, 291.0, 139.0, 126.0, 292.0, 290.0, 288.0, 291.0, 287.0, 297.0, 286.0, 287.0, 257.0, 268.0, 282.0, 297.0, 282.0, 291.0, 283.0, 290.0, 294.0, 293.0, 291.0, 288.0, 286.0, 293.0, 286.0, 290.0, 282.0, 288.0, 289.0, 290.0, 316.0, 314.0, 292.0, 281.0, 296.0, 280.0, 293.0, 286.0, 277.0, 299.0, 291.0, 285.0, 312.0, 315.0, 250.0, 269.0, 291.0, 288.0, 253.0, 260.0, 308.0, 322.0, 285.0, 285.0, 290.0, 292.0, 290.0, 289.0, 296.0, 291.0, 286.0, 293.0, 63.0, 60.0, 288.0, 282.0, 295.0, 292.0, 282.0, 291.0, 288.0, 291.0, 290.0, 292.0, 283.0, 296.0, 290.0, 286.0, 254.0, 268.0, 291.0, 291.0, 290.0, 289.0, 288.0, 296.0, 289.0, 284.0, 294.0, 288.0, 265.0, 257.0, 254.0, 265.0, 316.0, 320.0, 266.0, 264.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6955096508230176, "mean_inference_ms": 1.2406080064538771, "mean_action_processing_ms": 0.1334579355462166, "mean_env_wait_ms": 0.837142387495199, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 123.0, "episode_reward_mean": 566.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.09}, "hist_stats": {"episode_reward": [519.0, 576.0, 530.0, 584.0, 630.0, 573.0, 570.0, 533.0, 579.0, 576.0, 587.0, 579.0, 576.0, 579.0, 570.0, 516.0, 630.0, 633.0, 627.0, 570.0, 630.0, 579.0, 576.0, 579.0, 525.0, 630.0, 587.0, 573.0, 579.0, 573.0, 579.0, 579.0, 584.0, 630.0, 573.0, 582.0, 587.0, 579.0, 570.0, 582.0, 579.0, 297.0, 576.0, 582.0, 581.0, 576.0, 525.0, 579.0, 579.0, 579.0, 576.0, 265.0, 582.0, 579.0, 584.0, 573.0, 525.0, 579.0, 573.0, 573.0, 587.0, 579.0, 579.0, 576.0, 570.0, 579.0, 630.0, 573.0, 576.0, 579.0, 576.0, 576.0, 627.0, 519.0, 579.0, 513.0, 630.0, 570.0, 582.0, 579.0, 587.0, 579.0, 123.0, 570.0, 587.0, 573.0, 579.0, 582.0, 579.0, 576.0, 522.0, 582.0, 579.0, 584.0, 573.0, 582.0, 522.0, 519.0, 636.0, 530.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [258.0, 261.0, 293.0, 283.0, 255.0, 275.0, 291.0, 293.0, 321.0, 309.0, 285.0, 288.0, 277.0, 293.0, 265.0, 268.0, 287.0, 292.0, 289.0, 287.0, 288.0, 299.0, 286.0, 293.0, 285.0, 291.0, 291.0, 288.0, 291.0, 279.0, 259.0, 257.0, 317.0, 313.0, 319.0, 314.0, 313.0, 314.0, 294.0, 276.0, 314.0, 316.0, 290.0, 289.0, 288.0, 288.0, 292.0, 287.0, 264.0, 261.0, 319.0, 311.0, 288.0, 299.0, 288.0, 285.0, 290.0, 289.0, 293.0, 280.0, 290.0, 289.0, 291.0, 288.0, 290.0, 294.0, 314.0, 316.0, 285.0, 288.0, 292.0, 290.0, 289.0, 298.0, 289.0, 290.0, 291.0, 279.0, 291.0, 291.0, 295.0, 284.0, 148.0, 149.0, 281.0, 295.0, 294.0, 288.0, 283.0, 298.0, 289.0, 287.0, 256.0, 269.0, 285.0, 294.0, 292.0, 287.0, 295.0, 284.0, 285.0, 291.0, 139.0, 126.0, 292.0, 290.0, 288.0, 291.0, 287.0, 297.0, 286.0, 287.0, 257.0, 268.0, 282.0, 297.0, 282.0, 291.0, 283.0, 290.0, 294.0, 293.0, 291.0, 288.0, 286.0, 293.0, 286.0, 290.0, 282.0, 288.0, 289.0, 290.0, 316.0, 314.0, 292.0, 281.0, 296.0, 280.0, 293.0, 286.0, 277.0, 299.0, 291.0, 285.0, 312.0, 315.0, 250.0, 269.0, 291.0, 288.0, 253.0, 260.0, 308.0, 322.0, 285.0, 285.0, 290.0, 292.0, 290.0, 289.0, 296.0, 291.0, 286.0, 293.0, 63.0, 60.0, 288.0, 282.0, 295.0, 292.0, 282.0, 291.0, 288.0, 291.0, 290.0, 292.0, 283.0, 296.0, 290.0, 286.0, 254.0, 268.0, 291.0, 291.0, 290.0, 289.0, 288.0, 296.0, 289.0, 284.0, 294.0, 288.0, 265.0, 257.0, 254.0, 265.0, 316.0, 320.0, 266.0, 264.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6955096508230176, "mean_inference_ms": 1.2406080064538771, "mean_action_processing_ms": 0.1334579355462166, "mean_env_wait_ms": 0.837142387495199, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10854400, "num_agent_steps_trained": 10854400, "num_env_steps_sampled": 5427200, "num_env_steps_trained": 5427200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5427200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10854400, "timers": {"training_iteration_time_ms": 3587.87, "learn_time_ms": 1099.972, "learn_throughput": 11636.659, "synch_weights_time_ms": 11.749}, "counters": {"num_env_steps_sampled": 5427200, "num_env_steps_trained": 5427200, "num_agent_steps_sampled": 10854400, "num_agent_steps_trained": 10854400}, "done": false, "episodes_total": 13568, "training_iteration": 424, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-01", "timestamp": 1666582081, "time_this_iter_s": 3.6314749717712402, "time_total_s": 1617.1837270259857, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1617.1837270259857, "timesteps_since_restore": 0, "iterations_since_restore": 424, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.38333333333333, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 173.89, "shaped_reward_min": 43, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.05, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.11, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.84, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.94, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.55, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.74, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.19, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.01, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.89, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.86, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.55, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.74, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.55, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.74, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008383437525480986, "policy_loss": 0.00044354514102451503, "vf_loss": 7.910788536071777, "vf_explained_var": 0.5708798170089722, "kl": 0.0022159921936690807, "entropy": 0.7925610542297363, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5440000, "num_env_steps_trained": 5440000, "num_agent_steps_sampled": 10880000, "num_agent_steps_trained": 10880000}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 123.0, "episode_reward_mean": 562.69, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 281.345}, "custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 173.89, "shaped_reward_min": 43, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.05, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.11, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.84, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.94, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.55, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.74, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.19, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.01, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.89, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.86, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.55, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.74, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.55, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.74, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 630.0, 573.0, 582.0, 587.0, 579.0, 570.0, 582.0, 579.0, 297.0, 576.0, 582.0, 581.0, 576.0, 525.0, 579.0, 579.0, 579.0, 576.0, 265.0, 582.0, 579.0, 584.0, 573.0, 525.0, 579.0, 573.0, 573.0, 587.0, 579.0, 579.0, 576.0, 570.0, 579.0, 630.0, 573.0, 576.0, 579.0, 576.0, 576.0, 627.0, 519.0, 579.0, 513.0, 630.0, 570.0, 582.0, 579.0, 587.0, 579.0, 123.0, 570.0, 587.0, 573.0, 579.0, 582.0, 579.0, 576.0, 522.0, 582.0, 579.0, 584.0, 573.0, 582.0, 522.0, 519.0, 636.0, 530.0, 582.0, 576.0, 633.0, 584.0, 582.0, 576.0, 519.0, 579.0, 587.0, 587.0, 576.0, 573.0, 579.0, 579.0, 570.0, 633.0, 579.0, 582.0, 579.0, 579.0, 581.0, 465.0, 524.0, 579.0, 519.0, 627.0, 459.0, 462.0, 587.0, 636.0, 522.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 294.0, 314.0, 316.0, 285.0, 288.0, 292.0, 290.0, 289.0, 298.0, 289.0, 290.0, 291.0, 279.0, 291.0, 291.0, 295.0, 284.0, 148.0, 149.0, 281.0, 295.0, 294.0, 288.0, 283.0, 298.0, 289.0, 287.0, 256.0, 269.0, 285.0, 294.0, 292.0, 287.0, 295.0, 284.0, 285.0, 291.0, 139.0, 126.0, 292.0, 290.0, 288.0, 291.0, 287.0, 297.0, 286.0, 287.0, 257.0, 268.0, 282.0, 297.0, 282.0, 291.0, 283.0, 290.0, 294.0, 293.0, 291.0, 288.0, 286.0, 293.0, 286.0, 290.0, 282.0, 288.0, 289.0, 290.0, 316.0, 314.0, 292.0, 281.0, 296.0, 280.0, 293.0, 286.0, 277.0, 299.0, 291.0, 285.0, 312.0, 315.0, 250.0, 269.0, 291.0, 288.0, 253.0, 260.0, 308.0, 322.0, 285.0, 285.0, 290.0, 292.0, 290.0, 289.0, 296.0, 291.0, 286.0, 293.0, 63.0, 60.0, 288.0, 282.0, 295.0, 292.0, 282.0, 291.0, 288.0, 291.0, 290.0, 292.0, 283.0, 296.0, 290.0, 286.0, 254.0, 268.0, 291.0, 291.0, 290.0, 289.0, 288.0, 296.0, 289.0, 284.0, 294.0, 288.0, 265.0, 257.0, 254.0, 265.0, 316.0, 320.0, 266.0, 264.0, 295.0, 287.0, 289.0, 287.0, 324.0, 309.0, 286.0, 298.0, 292.0, 290.0, 294.0, 282.0, 271.0, 248.0, 291.0, 288.0, 288.0, 299.0, 293.0, 294.0, 286.0, 290.0, 283.0, 290.0, 289.0, 290.0, 290.0, 289.0, 289.0, 281.0, 315.0, 318.0, 291.0, 288.0, 291.0, 291.0, 288.0, 291.0, 286.0, 293.0, 278.0, 303.0, 231.0, 234.0, 258.0, 266.0, 285.0, 294.0, 268.0, 251.0, 308.0, 319.0, 220.0, 239.0, 229.0, 233.0, 293.0, 294.0, 319.0, 317.0, 261.0, 261.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6954787574741181, "mean_inference_ms": 1.2404898821971377, "mean_action_processing_ms": 0.13345151424050777, "mean_env_wait_ms": 0.8370745362550585, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 123.0, "episode_reward_mean": 562.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 281.345}, "hist_stats": {"episode_reward": [584.0, 630.0, 573.0, 582.0, 587.0, 579.0, 570.0, 582.0, 579.0, 297.0, 576.0, 582.0, 581.0, 576.0, 525.0, 579.0, 579.0, 579.0, 576.0, 265.0, 582.0, 579.0, 584.0, 573.0, 525.0, 579.0, 573.0, 573.0, 587.0, 579.0, 579.0, 576.0, 570.0, 579.0, 630.0, 573.0, 576.0, 579.0, 576.0, 576.0, 627.0, 519.0, 579.0, 513.0, 630.0, 570.0, 582.0, 579.0, 587.0, 579.0, 123.0, 570.0, 587.0, 573.0, 579.0, 582.0, 579.0, 576.0, 522.0, 582.0, 579.0, 584.0, 573.0, 582.0, 522.0, 519.0, 636.0, 530.0, 582.0, 576.0, 633.0, 584.0, 582.0, 576.0, 519.0, 579.0, 587.0, 587.0, 576.0, 573.0, 579.0, 579.0, 570.0, 633.0, 579.0, 582.0, 579.0, 579.0, 581.0, 465.0, 524.0, 579.0, 519.0, 627.0, 459.0, 462.0, 587.0, 636.0, 522.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 294.0, 314.0, 316.0, 285.0, 288.0, 292.0, 290.0, 289.0, 298.0, 289.0, 290.0, 291.0, 279.0, 291.0, 291.0, 295.0, 284.0, 148.0, 149.0, 281.0, 295.0, 294.0, 288.0, 283.0, 298.0, 289.0, 287.0, 256.0, 269.0, 285.0, 294.0, 292.0, 287.0, 295.0, 284.0, 285.0, 291.0, 139.0, 126.0, 292.0, 290.0, 288.0, 291.0, 287.0, 297.0, 286.0, 287.0, 257.0, 268.0, 282.0, 297.0, 282.0, 291.0, 283.0, 290.0, 294.0, 293.0, 291.0, 288.0, 286.0, 293.0, 286.0, 290.0, 282.0, 288.0, 289.0, 290.0, 316.0, 314.0, 292.0, 281.0, 296.0, 280.0, 293.0, 286.0, 277.0, 299.0, 291.0, 285.0, 312.0, 315.0, 250.0, 269.0, 291.0, 288.0, 253.0, 260.0, 308.0, 322.0, 285.0, 285.0, 290.0, 292.0, 290.0, 289.0, 296.0, 291.0, 286.0, 293.0, 63.0, 60.0, 288.0, 282.0, 295.0, 292.0, 282.0, 291.0, 288.0, 291.0, 290.0, 292.0, 283.0, 296.0, 290.0, 286.0, 254.0, 268.0, 291.0, 291.0, 290.0, 289.0, 288.0, 296.0, 289.0, 284.0, 294.0, 288.0, 265.0, 257.0, 254.0, 265.0, 316.0, 320.0, 266.0, 264.0, 295.0, 287.0, 289.0, 287.0, 324.0, 309.0, 286.0, 298.0, 292.0, 290.0, 294.0, 282.0, 271.0, 248.0, 291.0, 288.0, 288.0, 299.0, 293.0, 294.0, 286.0, 290.0, 283.0, 290.0, 289.0, 290.0, 290.0, 289.0, 289.0, 281.0, 315.0, 318.0, 291.0, 288.0, 291.0, 291.0, 288.0, 291.0, 286.0, 293.0, 278.0, 303.0, 231.0, 234.0, 258.0, 266.0, 285.0, 294.0, 268.0, 251.0, 308.0, 319.0, 220.0, 239.0, 229.0, 233.0, 293.0, 294.0, 319.0, 317.0, 261.0, 261.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6954787574741181, "mean_inference_ms": 1.2404898821971377, "mean_action_processing_ms": 0.13345151424050777, "mean_env_wait_ms": 0.8370745362550585, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10880000, "num_agent_steps_trained": 10880000, "num_env_steps_sampled": 5440000, "num_env_steps_trained": 5440000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5440000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10880000, "timers": {"training_iteration_time_ms": 3577.926, "learn_time_ms": 1094.808, "learn_throughput": 11691.547, "synch_weights_time_ms": 12.038}, "counters": {"num_env_steps_sampled": 5440000, "num_env_steps_trained": 5440000, "num_agent_steps_sampled": 10880000, "num_agent_steps_trained": 10880000}, "done": false, "episodes_total": 13600, "training_iteration": 425, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-05", "timestamp": 1666582085, "time_this_iter_s": 3.607473134994507, "time_total_s": 1620.7912001609802, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1620.7912001609802, "timesteps_since_restore": 0, "iterations_since_restore": 425, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.98, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 175.25, "shaped_reward_min": 43, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.93, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.42, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.77, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.26, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.48, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.03, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.18, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.9, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.48, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.03, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.48, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.03, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0019460651092231274, "policy_loss": -0.0023312822449952364, "vf_loss": 7.81488037109375, "vf_explained_var": 0.5743376016616821, "kl": 0.002288772724568844, "entropy": 0.7925402522087097, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5452800, "num_env_steps_trained": 5452800, "num_agent_steps_sampled": 10905600, "num_agent_steps_trained": 10905600}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 123.0, "episode_reward_mean": 568.05, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.025}, "custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 175.25, "shaped_reward_min": 43, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.93, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.42, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.77, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.26, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.48, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.03, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.18, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.9, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.48, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.03, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.48, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.03, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 579.0, 630.0, 573.0, 576.0, 579.0, 576.0, 576.0, 627.0, 519.0, 579.0, 513.0, 630.0, 570.0, 582.0, 579.0, 587.0, 579.0, 123.0, 570.0, 587.0, 573.0, 579.0, 582.0, 579.0, 576.0, 522.0, 582.0, 579.0, 584.0, 573.0, 582.0, 522.0, 519.0, 636.0, 530.0, 582.0, 576.0, 633.0, 584.0, 582.0, 576.0, 519.0, 579.0, 587.0, 587.0, 576.0, 573.0, 579.0, 579.0, 570.0, 633.0, 579.0, 582.0, 579.0, 579.0, 581.0, 465.0, 524.0, 579.0, 519.0, 627.0, 459.0, 462.0, 587.0, 636.0, 522.0, 582.0, 576.0, 530.0, 533.0, 582.0, 576.0, 579.0, 570.0, 584.0, 525.0, 522.0, 576.0, 582.0, 576.0, 590.0, 573.0, 582.0, 582.0, 581.0, 584.0, 584.0, 624.0, 579.0, 582.0, 573.0, 522.0, 627.0, 576.0, 627.0, 576.0, 576.0, 573.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 288.0, 289.0, 290.0, 316.0, 314.0, 292.0, 281.0, 296.0, 280.0, 293.0, 286.0, 277.0, 299.0, 291.0, 285.0, 312.0, 315.0, 250.0, 269.0, 291.0, 288.0, 253.0, 260.0, 308.0, 322.0, 285.0, 285.0, 290.0, 292.0, 290.0, 289.0, 296.0, 291.0, 286.0, 293.0, 63.0, 60.0, 288.0, 282.0, 295.0, 292.0, 282.0, 291.0, 288.0, 291.0, 290.0, 292.0, 283.0, 296.0, 290.0, 286.0, 254.0, 268.0, 291.0, 291.0, 290.0, 289.0, 288.0, 296.0, 289.0, 284.0, 294.0, 288.0, 265.0, 257.0, 254.0, 265.0, 316.0, 320.0, 266.0, 264.0, 295.0, 287.0, 289.0, 287.0, 324.0, 309.0, 286.0, 298.0, 292.0, 290.0, 294.0, 282.0, 271.0, 248.0, 291.0, 288.0, 288.0, 299.0, 293.0, 294.0, 286.0, 290.0, 283.0, 290.0, 289.0, 290.0, 290.0, 289.0, 289.0, 281.0, 315.0, 318.0, 291.0, 288.0, 291.0, 291.0, 288.0, 291.0, 286.0, 293.0, 278.0, 303.0, 231.0, 234.0, 258.0, 266.0, 285.0, 294.0, 268.0, 251.0, 308.0, 319.0, 220.0, 239.0, 229.0, 233.0, 293.0, 294.0, 319.0, 317.0, 261.0, 261.0, 292.0, 290.0, 291.0, 285.0, 268.0, 262.0, 267.0, 266.0, 294.0, 288.0, 291.0, 285.0, 283.0, 296.0, 278.0, 292.0, 284.0, 300.0, 266.0, 259.0, 261.0, 261.0, 291.0, 285.0, 296.0, 286.0, 287.0, 289.0, 298.0, 292.0, 296.0, 277.0, 293.0, 289.0, 294.0, 288.0, 293.0, 288.0, 294.0, 290.0, 300.0, 284.0, 311.0, 313.0, 290.0, 289.0, 288.0, 294.0, 285.0, 288.0, 262.0, 260.0, 313.0, 314.0, 288.0, 288.0, 319.0, 308.0, 290.0, 286.0, 290.0, 286.0, 284.0, 289.0, 287.0, 297.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6954404501584768, "mean_inference_ms": 1.2403714512167434, "mean_action_processing_ms": 0.13344596157879127, "mean_env_wait_ms": 0.8370100823602792, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 123.0, "episode_reward_mean": 568.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.025}, "hist_stats": {"episode_reward": [570.0, 579.0, 630.0, 573.0, 576.0, 579.0, 576.0, 576.0, 627.0, 519.0, 579.0, 513.0, 630.0, 570.0, 582.0, 579.0, 587.0, 579.0, 123.0, 570.0, 587.0, 573.0, 579.0, 582.0, 579.0, 576.0, 522.0, 582.0, 579.0, 584.0, 573.0, 582.0, 522.0, 519.0, 636.0, 530.0, 582.0, 576.0, 633.0, 584.0, 582.0, 576.0, 519.0, 579.0, 587.0, 587.0, 576.0, 573.0, 579.0, 579.0, 570.0, 633.0, 579.0, 582.0, 579.0, 579.0, 581.0, 465.0, 524.0, 579.0, 519.0, 627.0, 459.0, 462.0, 587.0, 636.0, 522.0, 582.0, 576.0, 530.0, 533.0, 582.0, 576.0, 579.0, 570.0, 584.0, 525.0, 522.0, 576.0, 582.0, 576.0, 590.0, 573.0, 582.0, 582.0, 581.0, 584.0, 584.0, 624.0, 579.0, 582.0, 573.0, 522.0, 627.0, 576.0, 627.0, 576.0, 576.0, 573.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 288.0, 289.0, 290.0, 316.0, 314.0, 292.0, 281.0, 296.0, 280.0, 293.0, 286.0, 277.0, 299.0, 291.0, 285.0, 312.0, 315.0, 250.0, 269.0, 291.0, 288.0, 253.0, 260.0, 308.0, 322.0, 285.0, 285.0, 290.0, 292.0, 290.0, 289.0, 296.0, 291.0, 286.0, 293.0, 63.0, 60.0, 288.0, 282.0, 295.0, 292.0, 282.0, 291.0, 288.0, 291.0, 290.0, 292.0, 283.0, 296.0, 290.0, 286.0, 254.0, 268.0, 291.0, 291.0, 290.0, 289.0, 288.0, 296.0, 289.0, 284.0, 294.0, 288.0, 265.0, 257.0, 254.0, 265.0, 316.0, 320.0, 266.0, 264.0, 295.0, 287.0, 289.0, 287.0, 324.0, 309.0, 286.0, 298.0, 292.0, 290.0, 294.0, 282.0, 271.0, 248.0, 291.0, 288.0, 288.0, 299.0, 293.0, 294.0, 286.0, 290.0, 283.0, 290.0, 289.0, 290.0, 290.0, 289.0, 289.0, 281.0, 315.0, 318.0, 291.0, 288.0, 291.0, 291.0, 288.0, 291.0, 286.0, 293.0, 278.0, 303.0, 231.0, 234.0, 258.0, 266.0, 285.0, 294.0, 268.0, 251.0, 308.0, 319.0, 220.0, 239.0, 229.0, 233.0, 293.0, 294.0, 319.0, 317.0, 261.0, 261.0, 292.0, 290.0, 291.0, 285.0, 268.0, 262.0, 267.0, 266.0, 294.0, 288.0, 291.0, 285.0, 283.0, 296.0, 278.0, 292.0, 284.0, 300.0, 266.0, 259.0, 261.0, 261.0, 291.0, 285.0, 296.0, 286.0, 287.0, 289.0, 298.0, 292.0, 296.0, 277.0, 293.0, 289.0, 294.0, 288.0, 293.0, 288.0, 294.0, 290.0, 300.0, 284.0, 311.0, 313.0, 290.0, 289.0, 288.0, 294.0, 285.0, 288.0, 262.0, 260.0, 313.0, 314.0, 288.0, 288.0, 319.0, 308.0, 290.0, 286.0, 290.0, 286.0, 284.0, 289.0, 287.0, 297.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6954404501584768, "mean_inference_ms": 1.2403714512167434, "mean_action_processing_ms": 0.13344596157879127, "mean_env_wait_ms": 0.8370100823602792, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10905600, "num_agent_steps_trained": 10905600, "num_env_steps_sampled": 5452800, "num_env_steps_trained": 5452800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5452800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10905600, "timers": {"training_iteration_time_ms": 3580.853, "learn_time_ms": 1095.196, "learn_throughput": 11687.404, "synch_weights_time_ms": 12.223}, "counters": {"num_env_steps_sampled": 5452800, "num_env_steps_trained": 5452800, "num_agent_steps_sampled": 10905600, "num_agent_steps_trained": 10905600}, "done": false, "episodes_total": 13632, "training_iteration": 426, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-09", "timestamp": 1666582089, "time_this_iter_s": 3.7375569343566895, "time_total_s": 1624.528757095337, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1624.528757095337, "timesteps_since_restore": 0, "iterations_since_restore": 426, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.73333333333333, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.62, "shaped_reward_min": 119, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.15, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.31, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.03, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.15, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.74, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.91, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.37, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.07, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.01, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.74, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.91, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.74, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.91, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0014132090145722032, "policy_loss": -0.0017901980318129063, "vf_loss": 7.7431488037109375, "vf_explained_var": 0.5979735851287842, "kl": 0.0031745489686727524, "entropy": 0.794650673866272, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5465600, "num_env_steps_trained": 5465600, "num_agent_steps_sampled": 10931200, "num_agent_steps_trained": 10931200}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 399.0, "episode_reward_mean": 569.22, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 188.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.61}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.62, "shaped_reward_min": 119, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.15, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.31, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.03, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.15, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.74, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.91, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.37, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.07, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.01, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.74, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.91, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.74, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.91, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 519.0, 636.0, 530.0, 582.0, 576.0, 633.0, 584.0, 582.0, 576.0, 519.0, 579.0, 587.0, 587.0, 576.0, 573.0, 579.0, 579.0, 570.0, 633.0, 579.0, 582.0, 579.0, 579.0, 581.0, 465.0, 524.0, 579.0, 519.0, 627.0, 459.0, 462.0, 587.0, 636.0, 522.0, 582.0, 576.0, 530.0, 533.0, 582.0, 576.0, 579.0, 570.0, 584.0, 525.0, 522.0, 576.0, 582.0, 576.0, 590.0, 573.0, 582.0, 582.0, 581.0, 584.0, 584.0, 624.0, 579.0, 582.0, 573.0, 522.0, 627.0, 576.0, 627.0, 576.0, 576.0, 573.0, 584.0, 627.0, 465.0, 590.0, 582.0, 587.0, 507.0, 576.0, 579.0, 399.0, 573.0, 473.0, 630.0, 590.0, 546.0, 579.0, 579.0, 573.0, 522.0, 579.0, 579.0, 576.0, 590.0, 579.0, 627.0, 582.0, 579.0, 582.0, 570.0, 573.0, 573.0, 582.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 257.0, 254.0, 265.0, 316.0, 320.0, 266.0, 264.0, 295.0, 287.0, 289.0, 287.0, 324.0, 309.0, 286.0, 298.0, 292.0, 290.0, 294.0, 282.0, 271.0, 248.0, 291.0, 288.0, 288.0, 299.0, 293.0, 294.0, 286.0, 290.0, 283.0, 290.0, 289.0, 290.0, 290.0, 289.0, 289.0, 281.0, 315.0, 318.0, 291.0, 288.0, 291.0, 291.0, 288.0, 291.0, 286.0, 293.0, 278.0, 303.0, 231.0, 234.0, 258.0, 266.0, 285.0, 294.0, 268.0, 251.0, 308.0, 319.0, 220.0, 239.0, 229.0, 233.0, 293.0, 294.0, 319.0, 317.0, 261.0, 261.0, 292.0, 290.0, 291.0, 285.0, 268.0, 262.0, 267.0, 266.0, 294.0, 288.0, 291.0, 285.0, 283.0, 296.0, 278.0, 292.0, 284.0, 300.0, 266.0, 259.0, 261.0, 261.0, 291.0, 285.0, 296.0, 286.0, 287.0, 289.0, 298.0, 292.0, 296.0, 277.0, 293.0, 289.0, 294.0, 288.0, 293.0, 288.0, 294.0, 290.0, 300.0, 284.0, 311.0, 313.0, 290.0, 289.0, 288.0, 294.0, 285.0, 288.0, 262.0, 260.0, 313.0, 314.0, 288.0, 288.0, 319.0, 308.0, 290.0, 286.0, 290.0, 286.0, 284.0, 289.0, 287.0, 297.0, 309.0, 318.0, 233.0, 232.0, 299.0, 291.0, 287.0, 295.0, 295.0, 292.0, 253.0, 254.0, 285.0, 291.0, 288.0, 291.0, 188.0, 211.0, 288.0, 285.0, 242.0, 231.0, 321.0, 309.0, 297.0, 293.0, 274.0, 272.0, 283.0, 296.0, 283.0, 296.0, 288.0, 285.0, 260.0, 262.0, 291.0, 288.0, 289.0, 290.0, 291.0, 285.0, 308.0, 282.0, 289.0, 290.0, 313.0, 314.0, 292.0, 290.0, 291.0, 288.0, 288.0, 294.0, 287.0, 283.0, 286.0, 287.0, 276.0, 297.0, 291.0, 291.0, 299.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6953774070106227, "mean_inference_ms": 1.2402776848961765, "mean_action_processing_ms": 0.13343806812747527, "mean_env_wait_ms": 0.8370408488588459, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 399.0, "episode_reward_mean": 569.22, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 188.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.61}, "hist_stats": {"episode_reward": [522.0, 519.0, 636.0, 530.0, 582.0, 576.0, 633.0, 584.0, 582.0, 576.0, 519.0, 579.0, 587.0, 587.0, 576.0, 573.0, 579.0, 579.0, 570.0, 633.0, 579.0, 582.0, 579.0, 579.0, 581.0, 465.0, 524.0, 579.0, 519.0, 627.0, 459.0, 462.0, 587.0, 636.0, 522.0, 582.0, 576.0, 530.0, 533.0, 582.0, 576.0, 579.0, 570.0, 584.0, 525.0, 522.0, 576.0, 582.0, 576.0, 590.0, 573.0, 582.0, 582.0, 581.0, 584.0, 584.0, 624.0, 579.0, 582.0, 573.0, 522.0, 627.0, 576.0, 627.0, 576.0, 576.0, 573.0, 584.0, 627.0, 465.0, 590.0, 582.0, 587.0, 507.0, 576.0, 579.0, 399.0, 573.0, 473.0, 630.0, 590.0, 546.0, 579.0, 579.0, 573.0, 522.0, 579.0, 579.0, 576.0, 590.0, 579.0, 627.0, 582.0, 579.0, 582.0, 570.0, 573.0, 573.0, 582.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 257.0, 254.0, 265.0, 316.0, 320.0, 266.0, 264.0, 295.0, 287.0, 289.0, 287.0, 324.0, 309.0, 286.0, 298.0, 292.0, 290.0, 294.0, 282.0, 271.0, 248.0, 291.0, 288.0, 288.0, 299.0, 293.0, 294.0, 286.0, 290.0, 283.0, 290.0, 289.0, 290.0, 290.0, 289.0, 289.0, 281.0, 315.0, 318.0, 291.0, 288.0, 291.0, 291.0, 288.0, 291.0, 286.0, 293.0, 278.0, 303.0, 231.0, 234.0, 258.0, 266.0, 285.0, 294.0, 268.0, 251.0, 308.0, 319.0, 220.0, 239.0, 229.0, 233.0, 293.0, 294.0, 319.0, 317.0, 261.0, 261.0, 292.0, 290.0, 291.0, 285.0, 268.0, 262.0, 267.0, 266.0, 294.0, 288.0, 291.0, 285.0, 283.0, 296.0, 278.0, 292.0, 284.0, 300.0, 266.0, 259.0, 261.0, 261.0, 291.0, 285.0, 296.0, 286.0, 287.0, 289.0, 298.0, 292.0, 296.0, 277.0, 293.0, 289.0, 294.0, 288.0, 293.0, 288.0, 294.0, 290.0, 300.0, 284.0, 311.0, 313.0, 290.0, 289.0, 288.0, 294.0, 285.0, 288.0, 262.0, 260.0, 313.0, 314.0, 288.0, 288.0, 319.0, 308.0, 290.0, 286.0, 290.0, 286.0, 284.0, 289.0, 287.0, 297.0, 309.0, 318.0, 233.0, 232.0, 299.0, 291.0, 287.0, 295.0, 295.0, 292.0, 253.0, 254.0, 285.0, 291.0, 288.0, 291.0, 188.0, 211.0, 288.0, 285.0, 242.0, 231.0, 321.0, 309.0, 297.0, 293.0, 274.0, 272.0, 283.0, 296.0, 283.0, 296.0, 288.0, 285.0, 260.0, 262.0, 291.0, 288.0, 289.0, 290.0, 291.0, 285.0, 308.0, 282.0, 289.0, 290.0, 313.0, 314.0, 292.0, 290.0, 291.0, 288.0, 288.0, 294.0, 287.0, 283.0, 286.0, 287.0, 276.0, 297.0, 291.0, 291.0, 299.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6953774070106227, "mean_inference_ms": 1.2402776848961765, "mean_action_processing_ms": 0.13343806812747527, "mean_env_wait_ms": 0.8370408488588459, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10931200, "num_agent_steps_trained": 10931200, "num_env_steps_sampled": 5465600, "num_env_steps_trained": 5465600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5465600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10931200, "timers": {"training_iteration_time_ms": 3600.212, "learn_time_ms": 1090.835, "learn_throughput": 11734.128, "synch_weights_time_ms": 11.717}, "counters": {"num_env_steps_sampled": 5465600, "num_env_steps_trained": 5465600, "num_agent_steps_sampled": 10931200, "num_agent_steps_trained": 10931200}, "done": false, "episodes_total": 13664, "training_iteration": 427, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-13", "timestamp": 1666582093, "time_this_iter_s": 3.8690433502197266, "time_total_s": 1628.3978004455566, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1628.3978004455566, "timesteps_since_restore": 0, "iterations_since_restore": 427, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.360000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 177.65, "shaped_reward_min": 119, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.95, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.68, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.86, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.57, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.6, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.35, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.55, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.27, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.23, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.19, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.6, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.35, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.6, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.35, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008390977163799107, "policy_loss": -0.0012106244685128331, "vf_loss": 7.669140815734863, "vf_explained_var": 0.5955076217651367, "kl": 0.002353356685489416, "entropy": 0.7907739281654358, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5478400, "num_env_steps_trained": 5478400, "num_agent_steps_sampled": 10956800, "num_agent_steps_trained": 10956800}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 399.0, "episode_reward_mean": 577.25, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 188.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.625}, "custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 177.65, "shaped_reward_min": 119, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.95, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.68, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.86, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.57, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.6, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.35, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.55, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.27, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.23, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.19, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.6, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.35, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.6, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.35, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 636.0, 522.0, 582.0, 576.0, 530.0, 533.0, 582.0, 576.0, 579.0, 570.0, 584.0, 525.0, 522.0, 576.0, 582.0, 576.0, 590.0, 573.0, 582.0, 582.0, 581.0, 584.0, 584.0, 624.0, 579.0, 582.0, 573.0, 522.0, 627.0, 576.0, 627.0, 576.0, 576.0, 573.0, 584.0, 627.0, 465.0, 590.0, 582.0, 587.0, 507.0, 576.0, 579.0, 399.0, 573.0, 473.0, 630.0, 590.0, 546.0, 579.0, 579.0, 573.0, 522.0, 579.0, 579.0, 576.0, 590.0, 579.0, 627.0, 582.0, 579.0, 582.0, 570.0, 573.0, 573.0, 582.0, 584.0, 624.0, 584.0, 579.0, 570.0, 630.0, 584.0, 576.0, 576.0, 576.0, 579.0, 582.0, 582.0, 573.0, 630.0, 579.0, 590.0, 576.0, 579.0, 582.0, 582.0, 627.0, 627.0, 627.0, 579.0, 579.0, 579.0, 579.0, 630.0, 570.0, 573.0, 573.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 294.0, 319.0, 317.0, 261.0, 261.0, 292.0, 290.0, 291.0, 285.0, 268.0, 262.0, 267.0, 266.0, 294.0, 288.0, 291.0, 285.0, 283.0, 296.0, 278.0, 292.0, 284.0, 300.0, 266.0, 259.0, 261.0, 261.0, 291.0, 285.0, 296.0, 286.0, 287.0, 289.0, 298.0, 292.0, 296.0, 277.0, 293.0, 289.0, 294.0, 288.0, 293.0, 288.0, 294.0, 290.0, 300.0, 284.0, 311.0, 313.0, 290.0, 289.0, 288.0, 294.0, 285.0, 288.0, 262.0, 260.0, 313.0, 314.0, 288.0, 288.0, 319.0, 308.0, 290.0, 286.0, 290.0, 286.0, 284.0, 289.0, 287.0, 297.0, 309.0, 318.0, 233.0, 232.0, 299.0, 291.0, 287.0, 295.0, 295.0, 292.0, 253.0, 254.0, 285.0, 291.0, 288.0, 291.0, 188.0, 211.0, 288.0, 285.0, 242.0, 231.0, 321.0, 309.0, 297.0, 293.0, 274.0, 272.0, 283.0, 296.0, 283.0, 296.0, 288.0, 285.0, 260.0, 262.0, 291.0, 288.0, 289.0, 290.0, 291.0, 285.0, 308.0, 282.0, 289.0, 290.0, 313.0, 314.0, 292.0, 290.0, 291.0, 288.0, 288.0, 294.0, 287.0, 283.0, 286.0, 287.0, 276.0, 297.0, 291.0, 291.0, 299.0, 285.0, 301.0, 323.0, 299.0, 285.0, 294.0, 285.0, 282.0, 288.0, 306.0, 324.0, 288.0, 296.0, 290.0, 286.0, 288.0, 288.0, 288.0, 288.0, 288.0, 291.0, 291.0, 291.0, 290.0, 292.0, 286.0, 287.0, 309.0, 321.0, 287.0, 292.0, 296.0, 294.0, 283.0, 293.0, 294.0, 285.0, 295.0, 287.0, 295.0, 287.0, 311.0, 316.0, 306.0, 321.0, 319.0, 308.0, 291.0, 288.0, 293.0, 286.0, 288.0, 291.0, 288.0, 291.0, 308.0, 322.0, 277.0, 293.0, 279.0, 294.0, 290.0, 283.0, 295.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6953154186385797, "mean_inference_ms": 1.2402239966956277, "mean_action_processing_ms": 0.13342831773414113, "mean_env_wait_ms": 0.837058849138891, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 399.0, "episode_reward_mean": 577.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 188.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.625}, "hist_stats": {"episode_reward": [587.0, 636.0, 522.0, 582.0, 576.0, 530.0, 533.0, 582.0, 576.0, 579.0, 570.0, 584.0, 525.0, 522.0, 576.0, 582.0, 576.0, 590.0, 573.0, 582.0, 582.0, 581.0, 584.0, 584.0, 624.0, 579.0, 582.0, 573.0, 522.0, 627.0, 576.0, 627.0, 576.0, 576.0, 573.0, 584.0, 627.0, 465.0, 590.0, 582.0, 587.0, 507.0, 576.0, 579.0, 399.0, 573.0, 473.0, 630.0, 590.0, 546.0, 579.0, 579.0, 573.0, 522.0, 579.0, 579.0, 576.0, 590.0, 579.0, 627.0, 582.0, 579.0, 582.0, 570.0, 573.0, 573.0, 582.0, 584.0, 624.0, 584.0, 579.0, 570.0, 630.0, 584.0, 576.0, 576.0, 576.0, 579.0, 582.0, 582.0, 573.0, 630.0, 579.0, 590.0, 576.0, 579.0, 582.0, 582.0, 627.0, 627.0, 627.0, 579.0, 579.0, 579.0, 579.0, 630.0, 570.0, 573.0, 573.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 294.0, 319.0, 317.0, 261.0, 261.0, 292.0, 290.0, 291.0, 285.0, 268.0, 262.0, 267.0, 266.0, 294.0, 288.0, 291.0, 285.0, 283.0, 296.0, 278.0, 292.0, 284.0, 300.0, 266.0, 259.0, 261.0, 261.0, 291.0, 285.0, 296.0, 286.0, 287.0, 289.0, 298.0, 292.0, 296.0, 277.0, 293.0, 289.0, 294.0, 288.0, 293.0, 288.0, 294.0, 290.0, 300.0, 284.0, 311.0, 313.0, 290.0, 289.0, 288.0, 294.0, 285.0, 288.0, 262.0, 260.0, 313.0, 314.0, 288.0, 288.0, 319.0, 308.0, 290.0, 286.0, 290.0, 286.0, 284.0, 289.0, 287.0, 297.0, 309.0, 318.0, 233.0, 232.0, 299.0, 291.0, 287.0, 295.0, 295.0, 292.0, 253.0, 254.0, 285.0, 291.0, 288.0, 291.0, 188.0, 211.0, 288.0, 285.0, 242.0, 231.0, 321.0, 309.0, 297.0, 293.0, 274.0, 272.0, 283.0, 296.0, 283.0, 296.0, 288.0, 285.0, 260.0, 262.0, 291.0, 288.0, 289.0, 290.0, 291.0, 285.0, 308.0, 282.0, 289.0, 290.0, 313.0, 314.0, 292.0, 290.0, 291.0, 288.0, 288.0, 294.0, 287.0, 283.0, 286.0, 287.0, 276.0, 297.0, 291.0, 291.0, 299.0, 285.0, 301.0, 323.0, 299.0, 285.0, 294.0, 285.0, 282.0, 288.0, 306.0, 324.0, 288.0, 296.0, 290.0, 286.0, 288.0, 288.0, 288.0, 288.0, 288.0, 291.0, 291.0, 291.0, 290.0, 292.0, 286.0, 287.0, 309.0, 321.0, 287.0, 292.0, 296.0, 294.0, 283.0, 293.0, 294.0, 285.0, 295.0, 287.0, 295.0, 287.0, 311.0, 316.0, 306.0, 321.0, 319.0, 308.0, 291.0, 288.0, 293.0, 286.0, 288.0, 291.0, 288.0, 291.0, 308.0, 322.0, 277.0, 293.0, 279.0, 294.0, 290.0, 283.0, 295.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6953154186385797, "mean_inference_ms": 1.2402239966956277, "mean_action_processing_ms": 0.13342831773414113, "mean_env_wait_ms": 0.837058849138891, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10956800, "num_agent_steps_trained": 10956800, "num_env_steps_sampled": 5478400, "num_env_steps_trained": 5478400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5478400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10956800, "timers": {"training_iteration_time_ms": 3611.663, "learn_time_ms": 1089.293, "learn_throughput": 11750.742, "synch_weights_time_ms": 11.348}, "counters": {"num_env_steps_sampled": 5478400, "num_env_steps_trained": 5478400, "num_agent_steps_sampled": 10956800, "num_agent_steps_trained": 10956800}, "done": false, "episodes_total": 13696, "training_iteration": 428, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-17", "timestamp": 1666582097, "time_this_iter_s": 3.7833468914031982, "time_total_s": 1632.1811473369598, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1632.1811473369598, "timesteps_since_restore": 0, "iterations_since_restore": 428, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.900000000000002, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 200.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 177.44, "shaped_reward_min": 119, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.12, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.67, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.0, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.48, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.78, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.23, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.14, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.78, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.23, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.78, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.23, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008235832792706788, "policy_loss": -0.001210855320096016, "vf_loss": 7.875899791717529, "vf_explained_var": 0.5872540473937988, "kl": 0.0029848506674170494, "entropy": 0.8006356954574585, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5491200, "num_env_steps_trained": 5491200, "num_agent_steps_sampled": 10982400, "num_agent_steps_trained": 10982400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 399.0, "episode_reward_mean": 577.44, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 188.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.72}, "custom_metrics": {"sparse_reward_mean": 200.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 177.44, "shaped_reward_min": 119, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.12, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.67, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.0, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.48, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.78, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.23, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.14, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.78, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.23, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.78, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.23, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 576.0, 573.0, 584.0, 627.0, 465.0, 590.0, 582.0, 587.0, 507.0, 576.0, 579.0, 399.0, 573.0, 473.0, 630.0, 590.0, 546.0, 579.0, 579.0, 573.0, 522.0, 579.0, 579.0, 576.0, 590.0, 579.0, 627.0, 582.0, 579.0, 582.0, 570.0, 573.0, 573.0, 582.0, 584.0, 624.0, 584.0, 579.0, 570.0, 630.0, 584.0, 576.0, 576.0, 576.0, 579.0, 582.0, 582.0, 573.0, 630.0, 579.0, 590.0, 576.0, 579.0, 582.0, 582.0, 627.0, 627.0, 627.0, 579.0, 579.0, 579.0, 579.0, 630.0, 570.0, 573.0, 573.0, 584.0, 576.0, 590.0, 576.0, 538.0, 579.0, 576.0, 582.0, 579.0, 576.0, 576.0, 576.0, 579.0, 576.0, 582.0, 579.0, 579.0, 570.0, 516.0, 579.0, 576.0, 576.0, 519.0, 627.0, 522.0, 579.0, 630.0, 636.0, 584.0, 579.0, 636.0, 579.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 286.0, 290.0, 286.0, 284.0, 289.0, 287.0, 297.0, 309.0, 318.0, 233.0, 232.0, 299.0, 291.0, 287.0, 295.0, 295.0, 292.0, 253.0, 254.0, 285.0, 291.0, 288.0, 291.0, 188.0, 211.0, 288.0, 285.0, 242.0, 231.0, 321.0, 309.0, 297.0, 293.0, 274.0, 272.0, 283.0, 296.0, 283.0, 296.0, 288.0, 285.0, 260.0, 262.0, 291.0, 288.0, 289.0, 290.0, 291.0, 285.0, 308.0, 282.0, 289.0, 290.0, 313.0, 314.0, 292.0, 290.0, 291.0, 288.0, 288.0, 294.0, 287.0, 283.0, 286.0, 287.0, 276.0, 297.0, 291.0, 291.0, 299.0, 285.0, 301.0, 323.0, 299.0, 285.0, 294.0, 285.0, 282.0, 288.0, 306.0, 324.0, 288.0, 296.0, 290.0, 286.0, 288.0, 288.0, 288.0, 288.0, 288.0, 291.0, 291.0, 291.0, 290.0, 292.0, 286.0, 287.0, 309.0, 321.0, 287.0, 292.0, 296.0, 294.0, 283.0, 293.0, 294.0, 285.0, 295.0, 287.0, 295.0, 287.0, 311.0, 316.0, 306.0, 321.0, 319.0, 308.0, 291.0, 288.0, 293.0, 286.0, 288.0, 291.0, 288.0, 291.0, 308.0, 322.0, 277.0, 293.0, 279.0, 294.0, 290.0, 283.0, 295.0, 289.0, 277.0, 299.0, 301.0, 289.0, 288.0, 288.0, 275.0, 263.0, 296.0, 283.0, 288.0, 288.0, 291.0, 291.0, 288.0, 291.0, 284.0, 292.0, 285.0, 291.0, 286.0, 290.0, 288.0, 291.0, 289.0, 287.0, 284.0, 298.0, 285.0, 294.0, 294.0, 285.0, 296.0, 274.0, 244.0, 272.0, 293.0, 286.0, 291.0, 285.0, 288.0, 288.0, 266.0, 253.0, 317.0, 310.0, 255.0, 267.0, 293.0, 286.0, 309.0, 321.0, 319.0, 317.0, 293.0, 291.0, 296.0, 283.0, 317.0, 319.0, 284.0, 295.0, 266.0, 250.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6952512538145208, "mean_inference_ms": 1.2401818534278526, "mean_action_processing_ms": 0.13341855729231697, "mean_env_wait_ms": 0.837076962461623, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 399.0, "episode_reward_mean": 577.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 188.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.72}, "hist_stats": {"episode_reward": [576.0, 576.0, 573.0, 584.0, 627.0, 465.0, 590.0, 582.0, 587.0, 507.0, 576.0, 579.0, 399.0, 573.0, 473.0, 630.0, 590.0, 546.0, 579.0, 579.0, 573.0, 522.0, 579.0, 579.0, 576.0, 590.0, 579.0, 627.0, 582.0, 579.0, 582.0, 570.0, 573.0, 573.0, 582.0, 584.0, 624.0, 584.0, 579.0, 570.0, 630.0, 584.0, 576.0, 576.0, 576.0, 579.0, 582.0, 582.0, 573.0, 630.0, 579.0, 590.0, 576.0, 579.0, 582.0, 582.0, 627.0, 627.0, 627.0, 579.0, 579.0, 579.0, 579.0, 630.0, 570.0, 573.0, 573.0, 584.0, 576.0, 590.0, 576.0, 538.0, 579.0, 576.0, 582.0, 579.0, 576.0, 576.0, 576.0, 579.0, 576.0, 582.0, 579.0, 579.0, 570.0, 516.0, 579.0, 576.0, 576.0, 519.0, 627.0, 522.0, 579.0, 630.0, 636.0, 584.0, 579.0, 636.0, 579.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 286.0, 290.0, 286.0, 284.0, 289.0, 287.0, 297.0, 309.0, 318.0, 233.0, 232.0, 299.0, 291.0, 287.0, 295.0, 295.0, 292.0, 253.0, 254.0, 285.0, 291.0, 288.0, 291.0, 188.0, 211.0, 288.0, 285.0, 242.0, 231.0, 321.0, 309.0, 297.0, 293.0, 274.0, 272.0, 283.0, 296.0, 283.0, 296.0, 288.0, 285.0, 260.0, 262.0, 291.0, 288.0, 289.0, 290.0, 291.0, 285.0, 308.0, 282.0, 289.0, 290.0, 313.0, 314.0, 292.0, 290.0, 291.0, 288.0, 288.0, 294.0, 287.0, 283.0, 286.0, 287.0, 276.0, 297.0, 291.0, 291.0, 299.0, 285.0, 301.0, 323.0, 299.0, 285.0, 294.0, 285.0, 282.0, 288.0, 306.0, 324.0, 288.0, 296.0, 290.0, 286.0, 288.0, 288.0, 288.0, 288.0, 288.0, 291.0, 291.0, 291.0, 290.0, 292.0, 286.0, 287.0, 309.0, 321.0, 287.0, 292.0, 296.0, 294.0, 283.0, 293.0, 294.0, 285.0, 295.0, 287.0, 295.0, 287.0, 311.0, 316.0, 306.0, 321.0, 319.0, 308.0, 291.0, 288.0, 293.0, 286.0, 288.0, 291.0, 288.0, 291.0, 308.0, 322.0, 277.0, 293.0, 279.0, 294.0, 290.0, 283.0, 295.0, 289.0, 277.0, 299.0, 301.0, 289.0, 288.0, 288.0, 275.0, 263.0, 296.0, 283.0, 288.0, 288.0, 291.0, 291.0, 288.0, 291.0, 284.0, 292.0, 285.0, 291.0, 286.0, 290.0, 288.0, 291.0, 289.0, 287.0, 284.0, 298.0, 285.0, 294.0, 294.0, 285.0, 296.0, 274.0, 244.0, 272.0, 293.0, 286.0, 291.0, 285.0, 288.0, 288.0, 266.0, 253.0, 317.0, 310.0, 255.0, 267.0, 293.0, 286.0, 309.0, 321.0, 319.0, 317.0, 293.0, 291.0, 296.0, 283.0, 317.0, 319.0, 284.0, 295.0, 266.0, 250.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6952512538145208, "mean_inference_ms": 1.2401818534278526, "mean_action_processing_ms": 0.13341855729231697, "mean_env_wait_ms": 0.837076962461623, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10982400, "num_agent_steps_trained": 10982400, "num_env_steps_sampled": 5491200, "num_env_steps_trained": 5491200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5491200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10982400, "timers": {"training_iteration_time_ms": 3610.481, "learn_time_ms": 1089.999, "learn_throughput": 11743.134, "synch_weights_time_ms": 11.371}, "counters": {"num_env_steps_sampled": 5491200, "num_env_steps_trained": 5491200, "num_agent_steps_sampled": 10982400, "num_agent_steps_trained": 10982400}, "done": false, "episodes_total": 13728, "training_iteration": 429, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-20", "timestamp": 1666582100, "time_this_iter_s": 3.6428945064544678, "time_total_s": 1635.8240418434143, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1635.8240418434143, "timesteps_since_restore": 0, "iterations_since_restore": 429, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.94, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 200.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 178.27, "shaped_reward_min": 156, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.36, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.52, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.25, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.28, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.03, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.02, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.13, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.26, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.03, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.99, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.03, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.02, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.03, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.02, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003178417682647705, "policy_loss": 0.0027919402346014977, "vf_loss": 7.772497653961182, "vf_explained_var": 0.5636935234069824, "kl": 0.002719259588047862, "entropy": 0.7815442085266113, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5504000, "num_env_steps_trained": 5504000, "num_agent_steps_sampled": 11008000, "num_agent_steps_trained": 11008000}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 516.0, "episode_reward_mean": 579.87, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 244.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 289.935}, "custom_metrics": {"sparse_reward_mean": 200.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 178.27, "shaped_reward_min": 156, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.36, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.52, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.25, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.28, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.03, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.02, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.13, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.26, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.03, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.99, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.03, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.02, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.03, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.02, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 573.0, 582.0, 584.0, 624.0, 584.0, 579.0, 570.0, 630.0, 584.0, 576.0, 576.0, 576.0, 579.0, 582.0, 582.0, 573.0, 630.0, 579.0, 590.0, 576.0, 579.0, 582.0, 582.0, 627.0, 627.0, 627.0, 579.0, 579.0, 579.0, 579.0, 630.0, 570.0, 573.0, 573.0, 584.0, 576.0, 590.0, 576.0, 538.0, 579.0, 576.0, 582.0, 579.0, 576.0, 576.0, 576.0, 579.0, 576.0, 582.0, 579.0, 579.0, 570.0, 516.0, 579.0, 576.0, 576.0, 519.0, 627.0, 522.0, 579.0, 630.0, 636.0, 584.0, 579.0, 636.0, 579.0, 516.0, 582.0, 522.0, 573.0, 582.0, 576.0, 584.0, 627.0, 582.0, 576.0, 579.0, 576.0, 630.0, 573.0, 579.0, 573.0, 573.0, 576.0, 576.0, 573.0, 519.0, 627.0, 581.0, 576.0, 579.0, 576.0, 573.0, 527.0, 584.0, 530.0, 533.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 287.0, 276.0, 297.0, 291.0, 291.0, 299.0, 285.0, 301.0, 323.0, 299.0, 285.0, 294.0, 285.0, 282.0, 288.0, 306.0, 324.0, 288.0, 296.0, 290.0, 286.0, 288.0, 288.0, 288.0, 288.0, 288.0, 291.0, 291.0, 291.0, 290.0, 292.0, 286.0, 287.0, 309.0, 321.0, 287.0, 292.0, 296.0, 294.0, 283.0, 293.0, 294.0, 285.0, 295.0, 287.0, 295.0, 287.0, 311.0, 316.0, 306.0, 321.0, 319.0, 308.0, 291.0, 288.0, 293.0, 286.0, 288.0, 291.0, 288.0, 291.0, 308.0, 322.0, 277.0, 293.0, 279.0, 294.0, 290.0, 283.0, 295.0, 289.0, 277.0, 299.0, 301.0, 289.0, 288.0, 288.0, 275.0, 263.0, 296.0, 283.0, 288.0, 288.0, 291.0, 291.0, 288.0, 291.0, 284.0, 292.0, 285.0, 291.0, 286.0, 290.0, 288.0, 291.0, 289.0, 287.0, 284.0, 298.0, 285.0, 294.0, 294.0, 285.0, 296.0, 274.0, 244.0, 272.0, 293.0, 286.0, 291.0, 285.0, 288.0, 288.0, 266.0, 253.0, 317.0, 310.0, 255.0, 267.0, 293.0, 286.0, 309.0, 321.0, 319.0, 317.0, 293.0, 291.0, 296.0, 283.0, 317.0, 319.0, 284.0, 295.0, 266.0, 250.0, 291.0, 291.0, 266.0, 256.0, 283.0, 290.0, 292.0, 290.0, 286.0, 290.0, 300.0, 284.0, 311.0, 316.0, 291.0, 291.0, 294.0, 282.0, 291.0, 288.0, 285.0, 291.0, 309.0, 321.0, 288.0, 285.0, 289.0, 290.0, 286.0, 287.0, 287.0, 286.0, 286.0, 290.0, 289.0, 287.0, 283.0, 290.0, 252.0, 267.0, 314.0, 313.0, 303.0, 278.0, 278.0, 298.0, 285.0, 294.0, 290.0, 286.0, 285.0, 288.0, 267.0, 260.0, 284.0, 300.0, 261.0, 269.0, 261.0, 272.0, 292.0, 284.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6952016011638078, "mean_inference_ms": 1.2401249587874137, "mean_action_processing_ms": 0.13341175036794614, "mean_env_wait_ms": 0.8370252127859996, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 516.0, "episode_reward_mean": 579.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 244.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 289.935}, "hist_stats": {"episode_reward": [573.0, 573.0, 582.0, 584.0, 624.0, 584.0, 579.0, 570.0, 630.0, 584.0, 576.0, 576.0, 576.0, 579.0, 582.0, 582.0, 573.0, 630.0, 579.0, 590.0, 576.0, 579.0, 582.0, 582.0, 627.0, 627.0, 627.0, 579.0, 579.0, 579.0, 579.0, 630.0, 570.0, 573.0, 573.0, 584.0, 576.0, 590.0, 576.0, 538.0, 579.0, 576.0, 582.0, 579.0, 576.0, 576.0, 576.0, 579.0, 576.0, 582.0, 579.0, 579.0, 570.0, 516.0, 579.0, 576.0, 576.0, 519.0, 627.0, 522.0, 579.0, 630.0, 636.0, 584.0, 579.0, 636.0, 579.0, 516.0, 582.0, 522.0, 573.0, 582.0, 576.0, 584.0, 627.0, 582.0, 576.0, 579.0, 576.0, 630.0, 573.0, 579.0, 573.0, 573.0, 576.0, 576.0, 573.0, 519.0, 627.0, 581.0, 576.0, 579.0, 576.0, 573.0, 527.0, 584.0, 530.0, 533.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 287.0, 276.0, 297.0, 291.0, 291.0, 299.0, 285.0, 301.0, 323.0, 299.0, 285.0, 294.0, 285.0, 282.0, 288.0, 306.0, 324.0, 288.0, 296.0, 290.0, 286.0, 288.0, 288.0, 288.0, 288.0, 288.0, 291.0, 291.0, 291.0, 290.0, 292.0, 286.0, 287.0, 309.0, 321.0, 287.0, 292.0, 296.0, 294.0, 283.0, 293.0, 294.0, 285.0, 295.0, 287.0, 295.0, 287.0, 311.0, 316.0, 306.0, 321.0, 319.0, 308.0, 291.0, 288.0, 293.0, 286.0, 288.0, 291.0, 288.0, 291.0, 308.0, 322.0, 277.0, 293.0, 279.0, 294.0, 290.0, 283.0, 295.0, 289.0, 277.0, 299.0, 301.0, 289.0, 288.0, 288.0, 275.0, 263.0, 296.0, 283.0, 288.0, 288.0, 291.0, 291.0, 288.0, 291.0, 284.0, 292.0, 285.0, 291.0, 286.0, 290.0, 288.0, 291.0, 289.0, 287.0, 284.0, 298.0, 285.0, 294.0, 294.0, 285.0, 296.0, 274.0, 244.0, 272.0, 293.0, 286.0, 291.0, 285.0, 288.0, 288.0, 266.0, 253.0, 317.0, 310.0, 255.0, 267.0, 293.0, 286.0, 309.0, 321.0, 319.0, 317.0, 293.0, 291.0, 296.0, 283.0, 317.0, 319.0, 284.0, 295.0, 266.0, 250.0, 291.0, 291.0, 266.0, 256.0, 283.0, 290.0, 292.0, 290.0, 286.0, 290.0, 300.0, 284.0, 311.0, 316.0, 291.0, 291.0, 294.0, 282.0, 291.0, 288.0, 285.0, 291.0, 309.0, 321.0, 288.0, 285.0, 289.0, 290.0, 286.0, 287.0, 287.0, 286.0, 286.0, 290.0, 289.0, 287.0, 283.0, 290.0, 252.0, 267.0, 314.0, 313.0, 303.0, 278.0, 278.0, 298.0, 285.0, 294.0, 290.0, 286.0, 285.0, 288.0, 267.0, 260.0, 284.0, 300.0, 261.0, 269.0, 261.0, 272.0, 292.0, 284.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6952016011638078, "mean_inference_ms": 1.2401249587874137, "mean_action_processing_ms": 0.13341175036794614, "mean_env_wait_ms": 0.8370252127859996, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11008000, "num_agent_steps_trained": 11008000, "num_env_steps_sampled": 5504000, "num_env_steps_trained": 5504000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5504000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11008000, "timers": {"training_iteration_time_ms": 3615.238, "learn_time_ms": 1099.735, "learn_throughput": 11639.171, "synch_weights_time_ms": 11.164}, "counters": {"num_env_steps_sampled": 5504000, "num_env_steps_trained": 5504000, "num_agent_steps_sampled": 11008000, "num_agent_steps_trained": 11008000}, "done": false, "episodes_total": 13760, "training_iteration": 430, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-24", "timestamp": 1666582104, "time_this_iter_s": 3.7221291065216064, "time_total_s": 1639.546170949936, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1639.546170949936, "timesteps_since_restore": 0, "iterations_since_restore": 430, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.683333333333334, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.56, "shaped_reward_min": 133, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.47, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.28, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.05, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.79, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.23, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.02, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.21, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.86, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.79, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.79, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002081596525385976, "policy_loss": 0.0016915076412260532, "vf_loss": 7.813340663909912, "vf_explained_var": 0.5801672339439392, "kl": 0.002716578310355544, "entropy": 0.7824891805648804, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5516800, "num_env_steps_trained": 5516800, "num_agent_steps_sampled": 11033600, "num_agent_steps_trained": 11033600}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 453.0, "episode_reward_mean": 572.56, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 225.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.28}, "custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.56, "shaped_reward_min": 133, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.47, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.28, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.05, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.79, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.23, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.02, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.21, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.86, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.79, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.79, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 573.0, 573.0, 584.0, 576.0, 590.0, 576.0, 538.0, 579.0, 576.0, 582.0, 579.0, 576.0, 576.0, 576.0, 579.0, 576.0, 582.0, 579.0, 579.0, 570.0, 516.0, 579.0, 576.0, 576.0, 519.0, 627.0, 522.0, 579.0, 630.0, 636.0, 584.0, 579.0, 636.0, 579.0, 516.0, 582.0, 522.0, 573.0, 582.0, 576.0, 584.0, 627.0, 582.0, 576.0, 579.0, 576.0, 630.0, 573.0, 579.0, 573.0, 573.0, 576.0, 576.0, 573.0, 519.0, 627.0, 581.0, 576.0, 579.0, 576.0, 573.0, 527.0, 584.0, 530.0, 533.0, 576.0, 579.0, 519.0, 573.0, 576.0, 582.0, 582.0, 579.0, 579.0, 582.0, 519.0, 573.0, 584.0, 587.0, 453.0, 579.0, 576.0, 525.0, 582.0, 576.0, 533.0, 570.0, 533.0, 576.0, 579.0, 576.0, 587.0, 587.0, 582.0, 579.0, 519.0, 582.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [277.0, 293.0, 279.0, 294.0, 290.0, 283.0, 295.0, 289.0, 277.0, 299.0, 301.0, 289.0, 288.0, 288.0, 275.0, 263.0, 296.0, 283.0, 288.0, 288.0, 291.0, 291.0, 288.0, 291.0, 284.0, 292.0, 285.0, 291.0, 286.0, 290.0, 288.0, 291.0, 289.0, 287.0, 284.0, 298.0, 285.0, 294.0, 294.0, 285.0, 296.0, 274.0, 244.0, 272.0, 293.0, 286.0, 291.0, 285.0, 288.0, 288.0, 266.0, 253.0, 317.0, 310.0, 255.0, 267.0, 293.0, 286.0, 309.0, 321.0, 319.0, 317.0, 293.0, 291.0, 296.0, 283.0, 317.0, 319.0, 284.0, 295.0, 266.0, 250.0, 291.0, 291.0, 266.0, 256.0, 283.0, 290.0, 292.0, 290.0, 286.0, 290.0, 300.0, 284.0, 311.0, 316.0, 291.0, 291.0, 294.0, 282.0, 291.0, 288.0, 285.0, 291.0, 309.0, 321.0, 288.0, 285.0, 289.0, 290.0, 286.0, 287.0, 287.0, 286.0, 286.0, 290.0, 289.0, 287.0, 283.0, 290.0, 252.0, 267.0, 314.0, 313.0, 303.0, 278.0, 278.0, 298.0, 285.0, 294.0, 290.0, 286.0, 285.0, 288.0, 267.0, 260.0, 284.0, 300.0, 261.0, 269.0, 261.0, 272.0, 292.0, 284.0, 291.0, 288.0, 256.0, 263.0, 285.0, 288.0, 298.0, 278.0, 293.0, 289.0, 292.0, 290.0, 283.0, 296.0, 291.0, 288.0, 291.0, 291.0, 260.0, 259.0, 284.0, 289.0, 294.0, 290.0, 298.0, 289.0, 225.0, 228.0, 282.0, 297.0, 277.0, 299.0, 263.0, 262.0, 293.0, 289.0, 286.0, 290.0, 263.0, 270.0, 290.0, 280.0, 274.0, 259.0, 283.0, 293.0, 285.0, 294.0, 290.0, 286.0, 289.0, 298.0, 299.0, 288.0, 292.0, 290.0, 290.0, 289.0, 261.0, 258.0, 283.0, 299.0, 283.0, 299.0, 316.0, 314.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.69515274208325, "mean_inference_ms": 1.2400323722875128, "mean_action_processing_ms": 0.13340651629842287, "mean_env_wait_ms": 0.8369591097856848, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 453.0, "episode_reward_mean": 572.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 225.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.28}, "hist_stats": {"episode_reward": [570.0, 573.0, 573.0, 584.0, 576.0, 590.0, 576.0, 538.0, 579.0, 576.0, 582.0, 579.0, 576.0, 576.0, 576.0, 579.0, 576.0, 582.0, 579.0, 579.0, 570.0, 516.0, 579.0, 576.0, 576.0, 519.0, 627.0, 522.0, 579.0, 630.0, 636.0, 584.0, 579.0, 636.0, 579.0, 516.0, 582.0, 522.0, 573.0, 582.0, 576.0, 584.0, 627.0, 582.0, 576.0, 579.0, 576.0, 630.0, 573.0, 579.0, 573.0, 573.0, 576.0, 576.0, 573.0, 519.0, 627.0, 581.0, 576.0, 579.0, 576.0, 573.0, 527.0, 584.0, 530.0, 533.0, 576.0, 579.0, 519.0, 573.0, 576.0, 582.0, 582.0, 579.0, 579.0, 582.0, 519.0, 573.0, 584.0, 587.0, 453.0, 579.0, 576.0, 525.0, 582.0, 576.0, 533.0, 570.0, 533.0, 576.0, 579.0, 576.0, 587.0, 587.0, 582.0, 579.0, 519.0, 582.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [277.0, 293.0, 279.0, 294.0, 290.0, 283.0, 295.0, 289.0, 277.0, 299.0, 301.0, 289.0, 288.0, 288.0, 275.0, 263.0, 296.0, 283.0, 288.0, 288.0, 291.0, 291.0, 288.0, 291.0, 284.0, 292.0, 285.0, 291.0, 286.0, 290.0, 288.0, 291.0, 289.0, 287.0, 284.0, 298.0, 285.0, 294.0, 294.0, 285.0, 296.0, 274.0, 244.0, 272.0, 293.0, 286.0, 291.0, 285.0, 288.0, 288.0, 266.0, 253.0, 317.0, 310.0, 255.0, 267.0, 293.0, 286.0, 309.0, 321.0, 319.0, 317.0, 293.0, 291.0, 296.0, 283.0, 317.0, 319.0, 284.0, 295.0, 266.0, 250.0, 291.0, 291.0, 266.0, 256.0, 283.0, 290.0, 292.0, 290.0, 286.0, 290.0, 300.0, 284.0, 311.0, 316.0, 291.0, 291.0, 294.0, 282.0, 291.0, 288.0, 285.0, 291.0, 309.0, 321.0, 288.0, 285.0, 289.0, 290.0, 286.0, 287.0, 287.0, 286.0, 286.0, 290.0, 289.0, 287.0, 283.0, 290.0, 252.0, 267.0, 314.0, 313.0, 303.0, 278.0, 278.0, 298.0, 285.0, 294.0, 290.0, 286.0, 285.0, 288.0, 267.0, 260.0, 284.0, 300.0, 261.0, 269.0, 261.0, 272.0, 292.0, 284.0, 291.0, 288.0, 256.0, 263.0, 285.0, 288.0, 298.0, 278.0, 293.0, 289.0, 292.0, 290.0, 283.0, 296.0, 291.0, 288.0, 291.0, 291.0, 260.0, 259.0, 284.0, 289.0, 294.0, 290.0, 298.0, 289.0, 225.0, 228.0, 282.0, 297.0, 277.0, 299.0, 263.0, 262.0, 293.0, 289.0, 286.0, 290.0, 263.0, 270.0, 290.0, 280.0, 274.0, 259.0, 283.0, 293.0, 285.0, 294.0, 290.0, 286.0, 289.0, 298.0, 299.0, 288.0, 292.0, 290.0, 290.0, 289.0, 261.0, 258.0, 283.0, 299.0, 283.0, 299.0, 316.0, 314.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.69515274208325, "mean_inference_ms": 1.2400323722875128, "mean_action_processing_ms": 0.13340651629842287, "mean_env_wait_ms": 0.8369591097856848, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11033600, "num_agent_steps_trained": 11033600, "num_env_steps_sampled": 5516800, "num_env_steps_trained": 5516800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5516800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11033600, "timers": {"training_iteration_time_ms": 3627.097, "learn_time_ms": 1104.409, "learn_throughput": 11589.905, "synch_weights_time_ms": 10.991}, "counters": {"num_env_steps_sampled": 5516800, "num_env_steps_trained": 5516800, "num_agent_steps_sampled": 11033600, "num_agent_steps_trained": 11033600}, "done": false, "episodes_total": 13792, "training_iteration": 431, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-28", "timestamp": 1666582108, "time_this_iter_s": 3.7307004928588867, "time_total_s": 1643.2768714427948, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1643.2768714427948, "timesteps_since_restore": 0, "iterations_since_restore": 431, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.9, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 175.03, "shaped_reward_min": 77, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.89, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.59, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.73, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.18, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.4, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.15, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.43, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.91, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.75, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.18, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.4, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.18, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.4, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007839496247470379, "policy_loss": 0.0003952703846152872, "vf_loss": 7.842353343963623, "vf_explained_var": 0.5919654369354248, "kl": 0.0032928823493421078, "entropy": 0.7911117076873779, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5529600, "num_env_steps_trained": 5529600, "num_agent_steps_sampled": 11059200, "num_agent_steps_trained": 11059200}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 237.0, "episode_reward_mean": 566.63, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 118.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.315}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 175.03, "shaped_reward_min": 77, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.89, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.59, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.73, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.18, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.4, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.15, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.43, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.91, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.75, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.18, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.4, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.18, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.4, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 636.0, 579.0, 516.0, 582.0, 522.0, 573.0, 582.0, 576.0, 584.0, 627.0, 582.0, 576.0, 579.0, 576.0, 630.0, 573.0, 579.0, 573.0, 573.0, 576.0, 576.0, 573.0, 519.0, 627.0, 581.0, 576.0, 579.0, 576.0, 573.0, 527.0, 584.0, 530.0, 533.0, 576.0, 579.0, 519.0, 573.0, 576.0, 582.0, 582.0, 579.0, 579.0, 582.0, 519.0, 573.0, 584.0, 587.0, 453.0, 579.0, 576.0, 525.0, 582.0, 576.0, 533.0, 570.0, 533.0, 576.0, 579.0, 576.0, 587.0, 587.0, 582.0, 579.0, 519.0, 582.0, 582.0, 630.0, 342.0, 627.0, 579.0, 582.0, 584.0, 527.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 633.0, 516.0, 576.0, 627.0, 476.0, 579.0, 587.0, 636.0, 579.0, 576.0, 633.0, 576.0, 530.0, 237.0, 581.0, 510.0, 584.0, 472.0, 576.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 283.0, 317.0, 319.0, 284.0, 295.0, 266.0, 250.0, 291.0, 291.0, 266.0, 256.0, 283.0, 290.0, 292.0, 290.0, 286.0, 290.0, 300.0, 284.0, 311.0, 316.0, 291.0, 291.0, 294.0, 282.0, 291.0, 288.0, 285.0, 291.0, 309.0, 321.0, 288.0, 285.0, 289.0, 290.0, 286.0, 287.0, 287.0, 286.0, 286.0, 290.0, 289.0, 287.0, 283.0, 290.0, 252.0, 267.0, 314.0, 313.0, 303.0, 278.0, 278.0, 298.0, 285.0, 294.0, 290.0, 286.0, 285.0, 288.0, 267.0, 260.0, 284.0, 300.0, 261.0, 269.0, 261.0, 272.0, 292.0, 284.0, 291.0, 288.0, 256.0, 263.0, 285.0, 288.0, 298.0, 278.0, 293.0, 289.0, 292.0, 290.0, 283.0, 296.0, 291.0, 288.0, 291.0, 291.0, 260.0, 259.0, 284.0, 289.0, 294.0, 290.0, 298.0, 289.0, 225.0, 228.0, 282.0, 297.0, 277.0, 299.0, 263.0, 262.0, 293.0, 289.0, 286.0, 290.0, 263.0, 270.0, 290.0, 280.0, 274.0, 259.0, 283.0, 293.0, 285.0, 294.0, 290.0, 286.0, 289.0, 298.0, 299.0, 288.0, 292.0, 290.0, 290.0, 289.0, 261.0, 258.0, 283.0, 299.0, 283.0, 299.0, 316.0, 314.0, 180.0, 162.0, 316.0, 311.0, 293.0, 286.0, 291.0, 291.0, 287.0, 297.0, 253.0, 274.0, 291.0, 291.0, 299.0, 288.0, 289.0, 290.0, 283.0, 296.0, 292.0, 290.0, 291.0, 288.0, 317.0, 316.0, 266.0, 250.0, 294.0, 282.0, 312.0, 315.0, 234.0, 242.0, 291.0, 288.0, 296.0, 291.0, 317.0, 319.0, 288.0, 291.0, 286.0, 290.0, 314.0, 319.0, 294.0, 282.0, 265.0, 265.0, 118.0, 119.0, 293.0, 288.0, 240.0, 270.0, 297.0, 287.0, 239.0, 233.0, 295.0, 281.0, 319.0, 308.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6951108003077172, "mean_inference_ms": 1.2399412656561892, "mean_action_processing_ms": 0.1334012911910848, "mean_env_wait_ms": 0.8369005759153684, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 237.0, "episode_reward_mean": 566.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 118.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.315}, "hist_stats": {"episode_reward": [579.0, 636.0, 579.0, 516.0, 582.0, 522.0, 573.0, 582.0, 576.0, 584.0, 627.0, 582.0, 576.0, 579.0, 576.0, 630.0, 573.0, 579.0, 573.0, 573.0, 576.0, 576.0, 573.0, 519.0, 627.0, 581.0, 576.0, 579.0, 576.0, 573.0, 527.0, 584.0, 530.0, 533.0, 576.0, 579.0, 519.0, 573.0, 576.0, 582.0, 582.0, 579.0, 579.0, 582.0, 519.0, 573.0, 584.0, 587.0, 453.0, 579.0, 576.0, 525.0, 582.0, 576.0, 533.0, 570.0, 533.0, 576.0, 579.0, 576.0, 587.0, 587.0, 582.0, 579.0, 519.0, 582.0, 582.0, 630.0, 342.0, 627.0, 579.0, 582.0, 584.0, 527.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 633.0, 516.0, 576.0, 627.0, 476.0, 579.0, 587.0, 636.0, 579.0, 576.0, 633.0, 576.0, 530.0, 237.0, 581.0, 510.0, 584.0, 472.0, 576.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 283.0, 317.0, 319.0, 284.0, 295.0, 266.0, 250.0, 291.0, 291.0, 266.0, 256.0, 283.0, 290.0, 292.0, 290.0, 286.0, 290.0, 300.0, 284.0, 311.0, 316.0, 291.0, 291.0, 294.0, 282.0, 291.0, 288.0, 285.0, 291.0, 309.0, 321.0, 288.0, 285.0, 289.0, 290.0, 286.0, 287.0, 287.0, 286.0, 286.0, 290.0, 289.0, 287.0, 283.0, 290.0, 252.0, 267.0, 314.0, 313.0, 303.0, 278.0, 278.0, 298.0, 285.0, 294.0, 290.0, 286.0, 285.0, 288.0, 267.0, 260.0, 284.0, 300.0, 261.0, 269.0, 261.0, 272.0, 292.0, 284.0, 291.0, 288.0, 256.0, 263.0, 285.0, 288.0, 298.0, 278.0, 293.0, 289.0, 292.0, 290.0, 283.0, 296.0, 291.0, 288.0, 291.0, 291.0, 260.0, 259.0, 284.0, 289.0, 294.0, 290.0, 298.0, 289.0, 225.0, 228.0, 282.0, 297.0, 277.0, 299.0, 263.0, 262.0, 293.0, 289.0, 286.0, 290.0, 263.0, 270.0, 290.0, 280.0, 274.0, 259.0, 283.0, 293.0, 285.0, 294.0, 290.0, 286.0, 289.0, 298.0, 299.0, 288.0, 292.0, 290.0, 290.0, 289.0, 261.0, 258.0, 283.0, 299.0, 283.0, 299.0, 316.0, 314.0, 180.0, 162.0, 316.0, 311.0, 293.0, 286.0, 291.0, 291.0, 287.0, 297.0, 253.0, 274.0, 291.0, 291.0, 299.0, 288.0, 289.0, 290.0, 283.0, 296.0, 292.0, 290.0, 291.0, 288.0, 317.0, 316.0, 266.0, 250.0, 294.0, 282.0, 312.0, 315.0, 234.0, 242.0, 291.0, 288.0, 296.0, 291.0, 317.0, 319.0, 288.0, 291.0, 286.0, 290.0, 314.0, 319.0, 294.0, 282.0, 265.0, 265.0, 118.0, 119.0, 293.0, 288.0, 240.0, 270.0, 297.0, 287.0, 239.0, 233.0, 295.0, 281.0, 319.0, 308.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6951108003077172, "mean_inference_ms": 1.2399412656561892, "mean_action_processing_ms": 0.1334012911910848, "mean_env_wait_ms": 0.8369005759153684, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11059200, "num_agent_steps_trained": 11059200, "num_env_steps_sampled": 5529600, "num_env_steps_trained": 5529600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5529600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11059200, "timers": {"training_iteration_time_ms": 3646.379, "learn_time_ms": 1118.615, "learn_throughput": 11442.726, "synch_weights_time_ms": 11.386}, "counters": {"num_env_steps_sampled": 5529600, "num_env_steps_trained": 5529600, "num_agent_steps_sampled": 11059200, "num_agent_steps_trained": 11059200}, "done": false, "episodes_total": 13824, "training_iteration": 432, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-32", "timestamp": 1666582112, "time_this_iter_s": 3.6900887489318848, "time_total_s": 1646.9669601917267, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1646.9669601917267, "timesteps_since_restore": 0, "iterations_since_restore": 432, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.833333333333332, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 175.82, "shaped_reward_min": 77, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.69, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.93, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.83, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.54, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.2, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.51, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.22, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.85, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.54, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.54, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.004141518846154213, "policy_loss": -0.004519705194979906, "vf_loss": 7.712698459625244, "vf_explained_var": 0.5621180534362793, "kl": 0.002915637567639351, "entropy": 0.7861687541007996, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5542400, "num_env_steps_trained": 5542400, "num_agent_steps_sampled": 11084800, "num_agent_steps_trained": 11084800}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 237.0, "episode_reward_mean": 569.42, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 118.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 284.71}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 175.82, "shaped_reward_min": 77, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.69, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.93, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.83, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.54, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.2, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.51, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.22, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.85, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.54, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.54, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 533.0, 576.0, 579.0, 519.0, 573.0, 576.0, 582.0, 582.0, 579.0, 579.0, 582.0, 519.0, 573.0, 584.0, 587.0, 453.0, 579.0, 576.0, 525.0, 582.0, 576.0, 533.0, 570.0, 533.0, 576.0, 579.0, 576.0, 587.0, 587.0, 582.0, 579.0, 519.0, 582.0, 582.0, 630.0, 342.0, 627.0, 579.0, 582.0, 584.0, 527.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 633.0, 516.0, 576.0, 627.0, 476.0, 579.0, 587.0, 636.0, 579.0, 576.0, 633.0, 576.0, 530.0, 237.0, 581.0, 510.0, 584.0, 472.0, 576.0, 627.0, 570.0, 579.0, 582.0, 579.0, 630.0, 630.0, 587.0, 579.0, 576.0, 627.0, 582.0, 576.0, 630.0, 576.0, 582.0, 627.0, 582.0, 630.0, 579.0, 582.0, 582.0, 582.0, 525.0, 579.0, 570.0, 573.0, 579.0, 573.0, 630.0, 510.0, 582.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 269.0, 261.0, 272.0, 292.0, 284.0, 291.0, 288.0, 256.0, 263.0, 285.0, 288.0, 298.0, 278.0, 293.0, 289.0, 292.0, 290.0, 283.0, 296.0, 291.0, 288.0, 291.0, 291.0, 260.0, 259.0, 284.0, 289.0, 294.0, 290.0, 298.0, 289.0, 225.0, 228.0, 282.0, 297.0, 277.0, 299.0, 263.0, 262.0, 293.0, 289.0, 286.0, 290.0, 263.0, 270.0, 290.0, 280.0, 274.0, 259.0, 283.0, 293.0, 285.0, 294.0, 290.0, 286.0, 289.0, 298.0, 299.0, 288.0, 292.0, 290.0, 290.0, 289.0, 261.0, 258.0, 283.0, 299.0, 283.0, 299.0, 316.0, 314.0, 180.0, 162.0, 316.0, 311.0, 293.0, 286.0, 291.0, 291.0, 287.0, 297.0, 253.0, 274.0, 291.0, 291.0, 299.0, 288.0, 289.0, 290.0, 283.0, 296.0, 292.0, 290.0, 291.0, 288.0, 317.0, 316.0, 266.0, 250.0, 294.0, 282.0, 312.0, 315.0, 234.0, 242.0, 291.0, 288.0, 296.0, 291.0, 317.0, 319.0, 288.0, 291.0, 286.0, 290.0, 314.0, 319.0, 294.0, 282.0, 265.0, 265.0, 118.0, 119.0, 293.0, 288.0, 240.0, 270.0, 297.0, 287.0, 239.0, 233.0, 295.0, 281.0, 319.0, 308.0, 294.0, 276.0, 293.0, 286.0, 290.0, 292.0, 283.0, 296.0, 316.0, 314.0, 311.0, 319.0, 296.0, 291.0, 294.0, 285.0, 291.0, 285.0, 306.0, 321.0, 290.0, 292.0, 285.0, 291.0, 314.0, 316.0, 283.0, 293.0, 291.0, 291.0, 315.0, 312.0, 291.0, 291.0, 315.0, 315.0, 290.0, 289.0, 294.0, 288.0, 289.0, 293.0, 294.0, 288.0, 255.0, 270.0, 288.0, 291.0, 275.0, 295.0, 289.0, 284.0, 288.0, 291.0, 289.0, 284.0, 312.0, 318.0, 245.0, 265.0, 290.0, 292.0, 294.0, 279.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6950606518602862, "mean_inference_ms": 1.2398391732462262, "mean_action_processing_ms": 0.1333949690210147, "mean_env_wait_ms": 0.8368347695385678, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 237.0, "episode_reward_mean": 569.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 118.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 284.71}, "hist_stats": {"episode_reward": [530.0, 533.0, 576.0, 579.0, 519.0, 573.0, 576.0, 582.0, 582.0, 579.0, 579.0, 582.0, 519.0, 573.0, 584.0, 587.0, 453.0, 579.0, 576.0, 525.0, 582.0, 576.0, 533.0, 570.0, 533.0, 576.0, 579.0, 576.0, 587.0, 587.0, 582.0, 579.0, 519.0, 582.0, 582.0, 630.0, 342.0, 627.0, 579.0, 582.0, 584.0, 527.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 633.0, 516.0, 576.0, 627.0, 476.0, 579.0, 587.0, 636.0, 579.0, 576.0, 633.0, 576.0, 530.0, 237.0, 581.0, 510.0, 584.0, 472.0, 576.0, 627.0, 570.0, 579.0, 582.0, 579.0, 630.0, 630.0, 587.0, 579.0, 576.0, 627.0, 582.0, 576.0, 630.0, 576.0, 582.0, 627.0, 582.0, 630.0, 579.0, 582.0, 582.0, 582.0, 525.0, 579.0, 570.0, 573.0, 579.0, 573.0, 630.0, 510.0, 582.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 269.0, 261.0, 272.0, 292.0, 284.0, 291.0, 288.0, 256.0, 263.0, 285.0, 288.0, 298.0, 278.0, 293.0, 289.0, 292.0, 290.0, 283.0, 296.0, 291.0, 288.0, 291.0, 291.0, 260.0, 259.0, 284.0, 289.0, 294.0, 290.0, 298.0, 289.0, 225.0, 228.0, 282.0, 297.0, 277.0, 299.0, 263.0, 262.0, 293.0, 289.0, 286.0, 290.0, 263.0, 270.0, 290.0, 280.0, 274.0, 259.0, 283.0, 293.0, 285.0, 294.0, 290.0, 286.0, 289.0, 298.0, 299.0, 288.0, 292.0, 290.0, 290.0, 289.0, 261.0, 258.0, 283.0, 299.0, 283.0, 299.0, 316.0, 314.0, 180.0, 162.0, 316.0, 311.0, 293.0, 286.0, 291.0, 291.0, 287.0, 297.0, 253.0, 274.0, 291.0, 291.0, 299.0, 288.0, 289.0, 290.0, 283.0, 296.0, 292.0, 290.0, 291.0, 288.0, 317.0, 316.0, 266.0, 250.0, 294.0, 282.0, 312.0, 315.0, 234.0, 242.0, 291.0, 288.0, 296.0, 291.0, 317.0, 319.0, 288.0, 291.0, 286.0, 290.0, 314.0, 319.0, 294.0, 282.0, 265.0, 265.0, 118.0, 119.0, 293.0, 288.0, 240.0, 270.0, 297.0, 287.0, 239.0, 233.0, 295.0, 281.0, 319.0, 308.0, 294.0, 276.0, 293.0, 286.0, 290.0, 292.0, 283.0, 296.0, 316.0, 314.0, 311.0, 319.0, 296.0, 291.0, 294.0, 285.0, 291.0, 285.0, 306.0, 321.0, 290.0, 292.0, 285.0, 291.0, 314.0, 316.0, 283.0, 293.0, 291.0, 291.0, 315.0, 312.0, 291.0, 291.0, 315.0, 315.0, 290.0, 289.0, 294.0, 288.0, 289.0, 293.0, 294.0, 288.0, 255.0, 270.0, 288.0, 291.0, 275.0, 295.0, 289.0, 284.0, 288.0, 291.0, 289.0, 284.0, 312.0, 318.0, 245.0, 265.0, 290.0, 292.0, 294.0, 279.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6950606518602862, "mean_inference_ms": 1.2398391732462262, "mean_action_processing_ms": 0.1333949690210147, "mean_env_wait_ms": 0.8368347695385678, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11084800, "num_agent_steps_trained": 11084800, "num_env_steps_sampled": 5542400, "num_env_steps_trained": 5542400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5542400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11084800, "timers": {"training_iteration_time_ms": 3637.444, "learn_time_ms": 1114.878, "learn_throughput": 11481.075, "synch_weights_time_ms": 12.533}, "counters": {"num_env_steps_sampled": 5542400, "num_env_steps_trained": 5542400, "num_agent_steps_sampled": 11084800, "num_agent_steps_trained": 11084800}, "done": false, "episodes_total": 13856, "training_iteration": 433, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-36", "timestamp": 1666582116, "time_this_iter_s": 3.568324565887451, "time_total_s": 1650.5352847576141, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1650.5352847576141, "timesteps_since_restore": 0, "iterations_since_restore": 433, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.419999999999998, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 176.65, "shaped_reward_min": 77, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.05, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.49, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.92, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.16, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.67, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.23, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.96, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.26, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.88, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.22, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.16, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.67, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.16, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.67, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013387391809374094, "policy_loss": 0.0009609795524738729, "vf_loss": 7.725425720214844, "vf_explained_var": 0.5909132957458496, "kl": 0.002303453627973795, "entropy": 0.7895658016204834, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5555200, "num_env_steps_trained": 5555200, "num_agent_steps_sampled": 11110400, "num_agent_steps_trained": 11110400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 237.0, "episode_reward_mean": 573.05, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 118.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 286.525}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 176.65, "shaped_reward_min": 77, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.05, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.49, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.92, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.16, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.67, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.23, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.96, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.26, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.88, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.22, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.16, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.67, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.16, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.67, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 582.0, 582.0, 630.0, 342.0, 627.0, 579.0, 582.0, 584.0, 527.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 633.0, 516.0, 576.0, 627.0, 476.0, 579.0, 587.0, 636.0, 579.0, 576.0, 633.0, 576.0, 530.0, 237.0, 581.0, 510.0, 584.0, 472.0, 576.0, 627.0, 570.0, 579.0, 582.0, 579.0, 630.0, 630.0, 587.0, 579.0, 576.0, 627.0, 582.0, 576.0, 630.0, 576.0, 582.0, 627.0, 582.0, 630.0, 579.0, 582.0, 582.0, 582.0, 525.0, 579.0, 570.0, 573.0, 579.0, 573.0, 630.0, 510.0, 582.0, 573.0, 582.0, 524.0, 522.0, 627.0, 630.0, 524.0, 576.0, 582.0, 576.0, 633.0, 630.0, 582.0, 582.0, 581.0, 576.0, 624.0, 584.0, 633.0, 470.0, 582.0, 579.0, 530.0, 627.0, 579.0, 525.0, 579.0, 536.0, 576.0, 578.0, 522.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 258.0, 283.0, 299.0, 283.0, 299.0, 316.0, 314.0, 180.0, 162.0, 316.0, 311.0, 293.0, 286.0, 291.0, 291.0, 287.0, 297.0, 253.0, 274.0, 291.0, 291.0, 299.0, 288.0, 289.0, 290.0, 283.0, 296.0, 292.0, 290.0, 291.0, 288.0, 317.0, 316.0, 266.0, 250.0, 294.0, 282.0, 312.0, 315.0, 234.0, 242.0, 291.0, 288.0, 296.0, 291.0, 317.0, 319.0, 288.0, 291.0, 286.0, 290.0, 314.0, 319.0, 294.0, 282.0, 265.0, 265.0, 118.0, 119.0, 293.0, 288.0, 240.0, 270.0, 297.0, 287.0, 239.0, 233.0, 295.0, 281.0, 319.0, 308.0, 294.0, 276.0, 293.0, 286.0, 290.0, 292.0, 283.0, 296.0, 316.0, 314.0, 311.0, 319.0, 296.0, 291.0, 294.0, 285.0, 291.0, 285.0, 306.0, 321.0, 290.0, 292.0, 285.0, 291.0, 314.0, 316.0, 283.0, 293.0, 291.0, 291.0, 315.0, 312.0, 291.0, 291.0, 315.0, 315.0, 290.0, 289.0, 294.0, 288.0, 289.0, 293.0, 294.0, 288.0, 255.0, 270.0, 288.0, 291.0, 275.0, 295.0, 289.0, 284.0, 288.0, 291.0, 289.0, 284.0, 312.0, 318.0, 245.0, 265.0, 290.0, 292.0, 294.0, 279.0, 292.0, 290.0, 256.0, 268.0, 261.0, 261.0, 306.0, 321.0, 307.0, 323.0, 256.0, 268.0, 286.0, 290.0, 291.0, 291.0, 283.0, 293.0, 318.0, 315.0, 313.0, 317.0, 286.0, 296.0, 286.0, 296.0, 287.0, 294.0, 284.0, 292.0, 305.0, 319.0, 294.0, 290.0, 314.0, 319.0, 244.0, 226.0, 288.0, 294.0, 288.0, 291.0, 267.0, 263.0, 318.0, 309.0, 298.0, 281.0, 254.0, 271.0, 294.0, 285.0, 260.0, 276.0, 288.0, 288.0, 296.0, 282.0, 264.0, 258.0, 286.0, 296.0, 293.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6950081891474147, "mean_inference_ms": 1.2397288239976665, "mean_action_processing_ms": 0.1333872792140637, "mean_env_wait_ms": 0.8367610499142053, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 237.0, "episode_reward_mean": 573.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 118.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 286.525}, "hist_stats": {"episode_reward": [519.0, 582.0, 582.0, 630.0, 342.0, 627.0, 579.0, 582.0, 584.0, 527.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 633.0, 516.0, 576.0, 627.0, 476.0, 579.0, 587.0, 636.0, 579.0, 576.0, 633.0, 576.0, 530.0, 237.0, 581.0, 510.0, 584.0, 472.0, 576.0, 627.0, 570.0, 579.0, 582.0, 579.0, 630.0, 630.0, 587.0, 579.0, 576.0, 627.0, 582.0, 576.0, 630.0, 576.0, 582.0, 627.0, 582.0, 630.0, 579.0, 582.0, 582.0, 582.0, 525.0, 579.0, 570.0, 573.0, 579.0, 573.0, 630.0, 510.0, 582.0, 573.0, 582.0, 524.0, 522.0, 627.0, 630.0, 524.0, 576.0, 582.0, 576.0, 633.0, 630.0, 582.0, 582.0, 581.0, 576.0, 624.0, 584.0, 633.0, 470.0, 582.0, 579.0, 530.0, 627.0, 579.0, 525.0, 579.0, 536.0, 576.0, 578.0, 522.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 258.0, 283.0, 299.0, 283.0, 299.0, 316.0, 314.0, 180.0, 162.0, 316.0, 311.0, 293.0, 286.0, 291.0, 291.0, 287.0, 297.0, 253.0, 274.0, 291.0, 291.0, 299.0, 288.0, 289.0, 290.0, 283.0, 296.0, 292.0, 290.0, 291.0, 288.0, 317.0, 316.0, 266.0, 250.0, 294.0, 282.0, 312.0, 315.0, 234.0, 242.0, 291.0, 288.0, 296.0, 291.0, 317.0, 319.0, 288.0, 291.0, 286.0, 290.0, 314.0, 319.0, 294.0, 282.0, 265.0, 265.0, 118.0, 119.0, 293.0, 288.0, 240.0, 270.0, 297.0, 287.0, 239.0, 233.0, 295.0, 281.0, 319.0, 308.0, 294.0, 276.0, 293.0, 286.0, 290.0, 292.0, 283.0, 296.0, 316.0, 314.0, 311.0, 319.0, 296.0, 291.0, 294.0, 285.0, 291.0, 285.0, 306.0, 321.0, 290.0, 292.0, 285.0, 291.0, 314.0, 316.0, 283.0, 293.0, 291.0, 291.0, 315.0, 312.0, 291.0, 291.0, 315.0, 315.0, 290.0, 289.0, 294.0, 288.0, 289.0, 293.0, 294.0, 288.0, 255.0, 270.0, 288.0, 291.0, 275.0, 295.0, 289.0, 284.0, 288.0, 291.0, 289.0, 284.0, 312.0, 318.0, 245.0, 265.0, 290.0, 292.0, 294.0, 279.0, 292.0, 290.0, 256.0, 268.0, 261.0, 261.0, 306.0, 321.0, 307.0, 323.0, 256.0, 268.0, 286.0, 290.0, 291.0, 291.0, 283.0, 293.0, 318.0, 315.0, 313.0, 317.0, 286.0, 296.0, 286.0, 296.0, 287.0, 294.0, 284.0, 292.0, 305.0, 319.0, 294.0, 290.0, 314.0, 319.0, 244.0, 226.0, 288.0, 294.0, 288.0, 291.0, 267.0, 263.0, 318.0, 309.0, 298.0, 281.0, 254.0, 271.0, 294.0, 285.0, 260.0, 276.0, 288.0, 288.0, 296.0, 282.0, 264.0, 258.0, 286.0, 296.0, 293.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6950081891474147, "mean_inference_ms": 1.2397288239976665, "mean_action_processing_ms": 0.1333872792140637, "mean_env_wait_ms": 0.8367610499142053, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11110400, "num_agent_steps_trained": 11110400, "num_env_steps_sampled": 5555200, "num_env_steps_trained": 5555200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5555200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11110400, "timers": {"training_iteration_time_ms": 3640.154, "learn_time_ms": 1118.553, "learn_throughput": 11443.353, "synch_weights_time_ms": 13.047}, "counters": {"num_env_steps_sampled": 5555200, "num_env_steps_trained": 5555200, "num_agent_steps_sampled": 11110400, "num_agent_steps_trained": 11110400}, "done": false, "episodes_total": 13888, "training_iteration": 434, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-39", "timestamp": 1666582119, "time_this_iter_s": 3.6646738052368164, "time_total_s": 1654.199958562851, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1654.199958562851, "timesteps_since_restore": 0, "iterations_since_restore": 434, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.5, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.29, "shaped_reward_min": 133, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.08, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.56, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.03, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.42, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.77, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.2, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.07, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.77, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.2, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.77, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.2, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0017595021054148674, "policy_loss": -0.0021555940620601177, "vf_loss": 7.937746047973633, "vf_explained_var": 0.5771285891532898, "kl": 0.0027183406054973602, "entropy": 0.7953658103942871, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5568000, "num_env_steps_trained": 5568000, "num_agent_steps_sampled": 11136000, "num_agent_steps_trained": 11136000}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 453.0, "episode_reward_mean": 575.29, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 225.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 287.645}, "custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.29, "shaped_reward_min": 133, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.08, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.56, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.03, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.42, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.77, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.2, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.07, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.77, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.2, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.77, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.2, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 472.0, 576.0, 627.0, 570.0, 579.0, 582.0, 579.0, 630.0, 630.0, 587.0, 579.0, 576.0, 627.0, 582.0, 576.0, 630.0, 576.0, 582.0, 627.0, 582.0, 630.0, 579.0, 582.0, 582.0, 582.0, 525.0, 579.0, 570.0, 573.0, 579.0, 573.0, 630.0, 510.0, 582.0, 573.0, 582.0, 524.0, 522.0, 627.0, 630.0, 524.0, 576.0, 582.0, 576.0, 633.0, 630.0, 582.0, 582.0, 581.0, 576.0, 624.0, 584.0, 633.0, 470.0, 582.0, 579.0, 530.0, 627.0, 579.0, 525.0, 579.0, 536.0, 576.0, 578.0, 522.0, 582.0, 576.0, 573.0, 533.0, 579.0, 579.0, 453.0, 630.0, 579.0, 522.0, 579.0, 582.0, 576.0, 576.0, 527.0, 579.0, 582.0, 576.0, 581.0, 633.0, 579.0, 576.0, 522.0, 579.0, 510.0, 582.0, 587.0, 587.0, 582.0, 570.0, 525.0, 579.0, 519.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 287.0, 239.0, 233.0, 295.0, 281.0, 319.0, 308.0, 294.0, 276.0, 293.0, 286.0, 290.0, 292.0, 283.0, 296.0, 316.0, 314.0, 311.0, 319.0, 296.0, 291.0, 294.0, 285.0, 291.0, 285.0, 306.0, 321.0, 290.0, 292.0, 285.0, 291.0, 314.0, 316.0, 283.0, 293.0, 291.0, 291.0, 315.0, 312.0, 291.0, 291.0, 315.0, 315.0, 290.0, 289.0, 294.0, 288.0, 289.0, 293.0, 294.0, 288.0, 255.0, 270.0, 288.0, 291.0, 275.0, 295.0, 289.0, 284.0, 288.0, 291.0, 289.0, 284.0, 312.0, 318.0, 245.0, 265.0, 290.0, 292.0, 294.0, 279.0, 292.0, 290.0, 256.0, 268.0, 261.0, 261.0, 306.0, 321.0, 307.0, 323.0, 256.0, 268.0, 286.0, 290.0, 291.0, 291.0, 283.0, 293.0, 318.0, 315.0, 313.0, 317.0, 286.0, 296.0, 286.0, 296.0, 287.0, 294.0, 284.0, 292.0, 305.0, 319.0, 294.0, 290.0, 314.0, 319.0, 244.0, 226.0, 288.0, 294.0, 288.0, 291.0, 267.0, 263.0, 318.0, 309.0, 298.0, 281.0, 254.0, 271.0, 294.0, 285.0, 260.0, 276.0, 288.0, 288.0, 296.0, 282.0, 264.0, 258.0, 286.0, 296.0, 293.0, 283.0, 288.0, 285.0, 270.0, 263.0, 289.0, 290.0, 289.0, 290.0, 225.0, 228.0, 311.0, 319.0, 290.0, 289.0, 257.0, 265.0, 290.0, 289.0, 289.0, 293.0, 287.0, 289.0, 288.0, 288.0, 255.0, 272.0, 291.0, 288.0, 293.0, 289.0, 283.0, 293.0, 280.0, 301.0, 314.0, 319.0, 289.0, 290.0, 290.0, 286.0, 261.0, 261.0, 293.0, 286.0, 258.0, 252.0, 288.0, 294.0, 290.0, 297.0, 296.0, 291.0, 296.0, 286.0, 288.0, 282.0, 261.0, 264.0, 293.0, 286.0, 268.0, 251.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6949450051728931, "mean_inference_ms": 1.2396064286152753, "mean_action_processing_ms": 0.1333788711332576, "mean_env_wait_ms": 0.8366754177335741, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 453.0, "episode_reward_mean": 575.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 225.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 287.645}, "hist_stats": {"episode_reward": [584.0, 472.0, 576.0, 627.0, 570.0, 579.0, 582.0, 579.0, 630.0, 630.0, 587.0, 579.0, 576.0, 627.0, 582.0, 576.0, 630.0, 576.0, 582.0, 627.0, 582.0, 630.0, 579.0, 582.0, 582.0, 582.0, 525.0, 579.0, 570.0, 573.0, 579.0, 573.0, 630.0, 510.0, 582.0, 573.0, 582.0, 524.0, 522.0, 627.0, 630.0, 524.0, 576.0, 582.0, 576.0, 633.0, 630.0, 582.0, 582.0, 581.0, 576.0, 624.0, 584.0, 633.0, 470.0, 582.0, 579.0, 530.0, 627.0, 579.0, 525.0, 579.0, 536.0, 576.0, 578.0, 522.0, 582.0, 576.0, 573.0, 533.0, 579.0, 579.0, 453.0, 630.0, 579.0, 522.0, 579.0, 582.0, 576.0, 576.0, 527.0, 579.0, 582.0, 576.0, 581.0, 633.0, 579.0, 576.0, 522.0, 579.0, 510.0, 582.0, 587.0, 587.0, 582.0, 570.0, 525.0, 579.0, 519.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 287.0, 239.0, 233.0, 295.0, 281.0, 319.0, 308.0, 294.0, 276.0, 293.0, 286.0, 290.0, 292.0, 283.0, 296.0, 316.0, 314.0, 311.0, 319.0, 296.0, 291.0, 294.0, 285.0, 291.0, 285.0, 306.0, 321.0, 290.0, 292.0, 285.0, 291.0, 314.0, 316.0, 283.0, 293.0, 291.0, 291.0, 315.0, 312.0, 291.0, 291.0, 315.0, 315.0, 290.0, 289.0, 294.0, 288.0, 289.0, 293.0, 294.0, 288.0, 255.0, 270.0, 288.0, 291.0, 275.0, 295.0, 289.0, 284.0, 288.0, 291.0, 289.0, 284.0, 312.0, 318.0, 245.0, 265.0, 290.0, 292.0, 294.0, 279.0, 292.0, 290.0, 256.0, 268.0, 261.0, 261.0, 306.0, 321.0, 307.0, 323.0, 256.0, 268.0, 286.0, 290.0, 291.0, 291.0, 283.0, 293.0, 318.0, 315.0, 313.0, 317.0, 286.0, 296.0, 286.0, 296.0, 287.0, 294.0, 284.0, 292.0, 305.0, 319.0, 294.0, 290.0, 314.0, 319.0, 244.0, 226.0, 288.0, 294.0, 288.0, 291.0, 267.0, 263.0, 318.0, 309.0, 298.0, 281.0, 254.0, 271.0, 294.0, 285.0, 260.0, 276.0, 288.0, 288.0, 296.0, 282.0, 264.0, 258.0, 286.0, 296.0, 293.0, 283.0, 288.0, 285.0, 270.0, 263.0, 289.0, 290.0, 289.0, 290.0, 225.0, 228.0, 311.0, 319.0, 290.0, 289.0, 257.0, 265.0, 290.0, 289.0, 289.0, 293.0, 287.0, 289.0, 288.0, 288.0, 255.0, 272.0, 291.0, 288.0, 293.0, 289.0, 283.0, 293.0, 280.0, 301.0, 314.0, 319.0, 289.0, 290.0, 290.0, 286.0, 261.0, 261.0, 293.0, 286.0, 258.0, 252.0, 288.0, 294.0, 290.0, 297.0, 296.0, 291.0, 296.0, 286.0, 288.0, 282.0, 261.0, 264.0, 293.0, 286.0, 268.0, 251.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6949450051728931, "mean_inference_ms": 1.2396064286152753, "mean_action_processing_ms": 0.1333788711332576, "mean_env_wait_ms": 0.8366754177335741, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11136000, "num_agent_steps_trained": 11136000, "num_env_steps_sampled": 5568000, "num_env_steps_trained": 5568000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5568000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11136000, "timers": {"training_iteration_time_ms": 3653.946, "learn_time_ms": 1129.094, "learn_throughput": 11336.518, "synch_weights_time_ms": 12.99}, "counters": {"num_env_steps_sampled": 5568000, "num_env_steps_trained": 5568000, "num_agent_steps_sampled": 11136000, "num_agent_steps_trained": 11136000}, "done": false, "episodes_total": 13920, "training_iteration": 435, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-43", "timestamp": 1666582123, "time_this_iter_s": 3.749124050140381, "time_total_s": 1657.9490826129913, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1657.9490826129913, "timesteps_since_restore": 0, "iterations_since_restore": 435, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.62, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 198.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.0, "shaped_reward_min": 133, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.08, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.59, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.97, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.41, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.78, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.2, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.9, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.78, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.2, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.78, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.2, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0002803339157253504, "policy_loss": -0.00010873284190893173, "vf_loss": 7.808413982391357, "vf_explained_var": 0.583696722984314, "kl": 0.0023585986346006393, "entropy": 0.7835491299629211, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5580800, "num_env_steps_trained": 5580800, "num_agent_steps_sampled": 11161600, "num_agent_steps_trained": 11161600}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 453.0, "episode_reward_mean": 573.8, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 225.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 286.9}, "custom_metrics": {"sparse_reward_mean": 198.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.0, "shaped_reward_min": 133, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.08, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.59, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.97, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.41, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.78, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.2, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.9, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.78, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.2, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.78, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.2, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 510.0, 582.0, 573.0, 582.0, 524.0, 522.0, 627.0, 630.0, 524.0, 576.0, 582.0, 576.0, 633.0, 630.0, 582.0, 582.0, 581.0, 576.0, 624.0, 584.0, 633.0, 470.0, 582.0, 579.0, 530.0, 627.0, 579.0, 525.0, 579.0, 536.0, 576.0, 578.0, 522.0, 582.0, 576.0, 573.0, 533.0, 579.0, 579.0, 453.0, 630.0, 579.0, 522.0, 579.0, 582.0, 576.0, 576.0, 527.0, 579.0, 582.0, 576.0, 581.0, 633.0, 579.0, 576.0, 522.0, 579.0, 510.0, 582.0, 587.0, 587.0, 582.0, 570.0, 525.0, 579.0, 519.0, 582.0, 516.0, 573.0, 630.0, 576.0, 582.0, 530.0, 579.0, 516.0, 573.0, 576.0, 579.0, 579.0, 581.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 582.0, 633.0, 590.0, 630.0, 582.0, 573.0, 582.0, 627.0, 579.0, 582.0, 576.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [312.0, 318.0, 245.0, 265.0, 290.0, 292.0, 294.0, 279.0, 292.0, 290.0, 256.0, 268.0, 261.0, 261.0, 306.0, 321.0, 307.0, 323.0, 256.0, 268.0, 286.0, 290.0, 291.0, 291.0, 283.0, 293.0, 318.0, 315.0, 313.0, 317.0, 286.0, 296.0, 286.0, 296.0, 287.0, 294.0, 284.0, 292.0, 305.0, 319.0, 294.0, 290.0, 314.0, 319.0, 244.0, 226.0, 288.0, 294.0, 288.0, 291.0, 267.0, 263.0, 318.0, 309.0, 298.0, 281.0, 254.0, 271.0, 294.0, 285.0, 260.0, 276.0, 288.0, 288.0, 296.0, 282.0, 264.0, 258.0, 286.0, 296.0, 293.0, 283.0, 288.0, 285.0, 270.0, 263.0, 289.0, 290.0, 289.0, 290.0, 225.0, 228.0, 311.0, 319.0, 290.0, 289.0, 257.0, 265.0, 290.0, 289.0, 289.0, 293.0, 287.0, 289.0, 288.0, 288.0, 255.0, 272.0, 291.0, 288.0, 293.0, 289.0, 283.0, 293.0, 280.0, 301.0, 314.0, 319.0, 289.0, 290.0, 290.0, 286.0, 261.0, 261.0, 293.0, 286.0, 258.0, 252.0, 288.0, 294.0, 290.0, 297.0, 296.0, 291.0, 296.0, 286.0, 288.0, 282.0, 261.0, 264.0, 293.0, 286.0, 268.0, 251.0, 291.0, 291.0, 248.0, 268.0, 280.0, 293.0, 312.0, 318.0, 286.0, 290.0, 281.0, 301.0, 261.0, 269.0, 286.0, 293.0, 257.0, 259.0, 284.0, 289.0, 286.0, 290.0, 283.0, 296.0, 290.0, 289.0, 288.0, 293.0, 289.0, 290.0, 291.0, 288.0, 286.0, 293.0, 286.0, 293.0, 292.0, 287.0, 285.0, 291.0, 293.0, 289.0, 314.0, 319.0, 299.0, 291.0, 316.0, 314.0, 289.0, 293.0, 287.0, 286.0, 294.0, 288.0, 308.0, 319.0, 283.0, 296.0, 294.0, 288.0, 286.0, 290.0, 293.0, 286.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6948806595730435, "mean_inference_ms": 1.2394844780647098, "mean_action_processing_ms": 0.1333695851340957, "mean_env_wait_ms": 0.8365902839854666, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 453.0, "episode_reward_mean": 573.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 225.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 286.9}, "hist_stats": {"episode_reward": [630.0, 510.0, 582.0, 573.0, 582.0, 524.0, 522.0, 627.0, 630.0, 524.0, 576.0, 582.0, 576.0, 633.0, 630.0, 582.0, 582.0, 581.0, 576.0, 624.0, 584.0, 633.0, 470.0, 582.0, 579.0, 530.0, 627.0, 579.0, 525.0, 579.0, 536.0, 576.0, 578.0, 522.0, 582.0, 576.0, 573.0, 533.0, 579.0, 579.0, 453.0, 630.0, 579.0, 522.0, 579.0, 582.0, 576.0, 576.0, 527.0, 579.0, 582.0, 576.0, 581.0, 633.0, 579.0, 576.0, 522.0, 579.0, 510.0, 582.0, 587.0, 587.0, 582.0, 570.0, 525.0, 579.0, 519.0, 582.0, 516.0, 573.0, 630.0, 576.0, 582.0, 530.0, 579.0, 516.0, 573.0, 576.0, 579.0, 579.0, 581.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 582.0, 633.0, 590.0, 630.0, 582.0, 573.0, 582.0, 627.0, 579.0, 582.0, 576.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [312.0, 318.0, 245.0, 265.0, 290.0, 292.0, 294.0, 279.0, 292.0, 290.0, 256.0, 268.0, 261.0, 261.0, 306.0, 321.0, 307.0, 323.0, 256.0, 268.0, 286.0, 290.0, 291.0, 291.0, 283.0, 293.0, 318.0, 315.0, 313.0, 317.0, 286.0, 296.0, 286.0, 296.0, 287.0, 294.0, 284.0, 292.0, 305.0, 319.0, 294.0, 290.0, 314.0, 319.0, 244.0, 226.0, 288.0, 294.0, 288.0, 291.0, 267.0, 263.0, 318.0, 309.0, 298.0, 281.0, 254.0, 271.0, 294.0, 285.0, 260.0, 276.0, 288.0, 288.0, 296.0, 282.0, 264.0, 258.0, 286.0, 296.0, 293.0, 283.0, 288.0, 285.0, 270.0, 263.0, 289.0, 290.0, 289.0, 290.0, 225.0, 228.0, 311.0, 319.0, 290.0, 289.0, 257.0, 265.0, 290.0, 289.0, 289.0, 293.0, 287.0, 289.0, 288.0, 288.0, 255.0, 272.0, 291.0, 288.0, 293.0, 289.0, 283.0, 293.0, 280.0, 301.0, 314.0, 319.0, 289.0, 290.0, 290.0, 286.0, 261.0, 261.0, 293.0, 286.0, 258.0, 252.0, 288.0, 294.0, 290.0, 297.0, 296.0, 291.0, 296.0, 286.0, 288.0, 282.0, 261.0, 264.0, 293.0, 286.0, 268.0, 251.0, 291.0, 291.0, 248.0, 268.0, 280.0, 293.0, 312.0, 318.0, 286.0, 290.0, 281.0, 301.0, 261.0, 269.0, 286.0, 293.0, 257.0, 259.0, 284.0, 289.0, 286.0, 290.0, 283.0, 296.0, 290.0, 289.0, 288.0, 293.0, 289.0, 290.0, 291.0, 288.0, 286.0, 293.0, 286.0, 293.0, 292.0, 287.0, 285.0, 291.0, 293.0, 289.0, 314.0, 319.0, 299.0, 291.0, 316.0, 314.0, 289.0, 293.0, 287.0, 286.0, 294.0, 288.0, 308.0, 319.0, 283.0, 296.0, 294.0, 288.0, 286.0, 290.0, 293.0, 286.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6948806595730435, "mean_inference_ms": 1.2394844780647098, "mean_action_processing_ms": 0.1333695851340957, "mean_env_wait_ms": 0.8365902839854666, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11161600, "num_agent_steps_trained": 11161600, "num_env_steps_sampled": 5580800, "num_env_steps_trained": 5580800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5580800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11161600, "timers": {"training_iteration_time_ms": 3651.61, "learn_time_ms": 1132.739, "learn_throughput": 11300.041, "synch_weights_time_ms": 12.854}, "counters": {"num_env_steps_sampled": 5580800, "num_env_steps_trained": 5580800, "num_agent_steps_sampled": 11161600, "num_agent_steps_trained": 11161600}, "done": false, "episodes_total": 13952, "training_iteration": 436, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-47", "timestamp": 1666582127, "time_this_iter_s": 3.7395496368408203, "time_total_s": 1661.6886322498322, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1661.6886322498322, "timesteps_since_restore": 0, "iterations_since_restore": 436, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.783333333333335, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 175.77, "shaped_reward_min": 29, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.11, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.38, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 15.99, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 16.23, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.77, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.0, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.12, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.97, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.94, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.77, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.0, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.77, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.0, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001821667654439807, "policy_loss": 0.0014255057321861386, "vf_loss": 7.874016761779785, "vf_explained_var": 0.6142792105674744, "kl": 0.0027908834163099527, "entropy": 0.7824777364730835, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5593600, "num_env_steps_trained": 5593600, "num_agent_steps_sampled": 11187200, "num_agent_steps_trained": 11187200}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 69.0, "episode_reward_mean": 569.37, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 284.685}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 175.77, "shaped_reward_min": 29, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.11, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.38, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 15.99, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 16.23, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.77, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.0, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.12, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.97, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.94, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.77, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.0, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.77, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.0, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [578.0, 522.0, 582.0, 576.0, 573.0, 533.0, 579.0, 579.0, 453.0, 630.0, 579.0, 522.0, 579.0, 582.0, 576.0, 576.0, 527.0, 579.0, 582.0, 576.0, 581.0, 633.0, 579.0, 576.0, 522.0, 579.0, 510.0, 582.0, 587.0, 587.0, 582.0, 570.0, 525.0, 579.0, 519.0, 582.0, 516.0, 573.0, 630.0, 576.0, 582.0, 530.0, 579.0, 516.0, 573.0, 576.0, 579.0, 579.0, 581.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 582.0, 633.0, 590.0, 630.0, 582.0, 573.0, 582.0, 627.0, 579.0, 582.0, 576.0, 579.0, 582.0, 582.0, 630.0, 582.0, 636.0, 627.0, 582.0, 584.0, 573.0, 630.0, 579.0, 573.0, 579.0, 584.0, 581.0, 587.0, 576.0, 530.0, 522.0, 576.0, 579.0, 579.0, 576.0, 576.0, 573.0, 636.0, 294.0, 587.0, 627.0, 576.0, 636.0, 582.0, 69.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 282.0, 264.0, 258.0, 286.0, 296.0, 293.0, 283.0, 288.0, 285.0, 270.0, 263.0, 289.0, 290.0, 289.0, 290.0, 225.0, 228.0, 311.0, 319.0, 290.0, 289.0, 257.0, 265.0, 290.0, 289.0, 289.0, 293.0, 287.0, 289.0, 288.0, 288.0, 255.0, 272.0, 291.0, 288.0, 293.0, 289.0, 283.0, 293.0, 280.0, 301.0, 314.0, 319.0, 289.0, 290.0, 290.0, 286.0, 261.0, 261.0, 293.0, 286.0, 258.0, 252.0, 288.0, 294.0, 290.0, 297.0, 296.0, 291.0, 296.0, 286.0, 288.0, 282.0, 261.0, 264.0, 293.0, 286.0, 268.0, 251.0, 291.0, 291.0, 248.0, 268.0, 280.0, 293.0, 312.0, 318.0, 286.0, 290.0, 281.0, 301.0, 261.0, 269.0, 286.0, 293.0, 257.0, 259.0, 284.0, 289.0, 286.0, 290.0, 283.0, 296.0, 290.0, 289.0, 288.0, 293.0, 289.0, 290.0, 291.0, 288.0, 286.0, 293.0, 286.0, 293.0, 292.0, 287.0, 285.0, 291.0, 293.0, 289.0, 314.0, 319.0, 299.0, 291.0, 316.0, 314.0, 289.0, 293.0, 287.0, 286.0, 294.0, 288.0, 308.0, 319.0, 283.0, 296.0, 294.0, 288.0, 286.0, 290.0, 293.0, 286.0, 291.0, 291.0, 281.0, 301.0, 311.0, 319.0, 293.0, 289.0, 325.0, 311.0, 314.0, 313.0, 291.0, 291.0, 294.0, 290.0, 285.0, 288.0, 314.0, 316.0, 290.0, 289.0, 293.0, 280.0, 293.0, 286.0, 285.0, 299.0, 290.0, 291.0, 300.0, 287.0, 291.0, 285.0, 267.0, 263.0, 263.0, 259.0, 294.0, 282.0, 288.0, 291.0, 289.0, 290.0, 283.0, 293.0, 286.0, 290.0, 298.0, 275.0, 319.0, 317.0, 146.0, 148.0, 295.0, 292.0, 316.0, 311.0, 293.0, 283.0, 314.0, 322.0, 288.0, 294.0, 35.0, 34.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6948230068460599, "mean_inference_ms": 1.2393774164488374, "mean_action_processing_ms": 0.13336225311356412, "mean_env_wait_ms": 0.8365219769699798, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 69.0, "episode_reward_mean": 569.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 284.685}, "hist_stats": {"episode_reward": [578.0, 522.0, 582.0, 576.0, 573.0, 533.0, 579.0, 579.0, 453.0, 630.0, 579.0, 522.0, 579.0, 582.0, 576.0, 576.0, 527.0, 579.0, 582.0, 576.0, 581.0, 633.0, 579.0, 576.0, 522.0, 579.0, 510.0, 582.0, 587.0, 587.0, 582.0, 570.0, 525.0, 579.0, 519.0, 582.0, 516.0, 573.0, 630.0, 576.0, 582.0, 530.0, 579.0, 516.0, 573.0, 576.0, 579.0, 579.0, 581.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 582.0, 633.0, 590.0, 630.0, 582.0, 573.0, 582.0, 627.0, 579.0, 582.0, 576.0, 579.0, 582.0, 582.0, 630.0, 582.0, 636.0, 627.0, 582.0, 584.0, 573.0, 630.0, 579.0, 573.0, 579.0, 584.0, 581.0, 587.0, 576.0, 530.0, 522.0, 576.0, 579.0, 579.0, 576.0, 576.0, 573.0, 636.0, 294.0, 587.0, 627.0, 576.0, 636.0, 582.0, 69.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 282.0, 264.0, 258.0, 286.0, 296.0, 293.0, 283.0, 288.0, 285.0, 270.0, 263.0, 289.0, 290.0, 289.0, 290.0, 225.0, 228.0, 311.0, 319.0, 290.0, 289.0, 257.0, 265.0, 290.0, 289.0, 289.0, 293.0, 287.0, 289.0, 288.0, 288.0, 255.0, 272.0, 291.0, 288.0, 293.0, 289.0, 283.0, 293.0, 280.0, 301.0, 314.0, 319.0, 289.0, 290.0, 290.0, 286.0, 261.0, 261.0, 293.0, 286.0, 258.0, 252.0, 288.0, 294.0, 290.0, 297.0, 296.0, 291.0, 296.0, 286.0, 288.0, 282.0, 261.0, 264.0, 293.0, 286.0, 268.0, 251.0, 291.0, 291.0, 248.0, 268.0, 280.0, 293.0, 312.0, 318.0, 286.0, 290.0, 281.0, 301.0, 261.0, 269.0, 286.0, 293.0, 257.0, 259.0, 284.0, 289.0, 286.0, 290.0, 283.0, 296.0, 290.0, 289.0, 288.0, 293.0, 289.0, 290.0, 291.0, 288.0, 286.0, 293.0, 286.0, 293.0, 292.0, 287.0, 285.0, 291.0, 293.0, 289.0, 314.0, 319.0, 299.0, 291.0, 316.0, 314.0, 289.0, 293.0, 287.0, 286.0, 294.0, 288.0, 308.0, 319.0, 283.0, 296.0, 294.0, 288.0, 286.0, 290.0, 293.0, 286.0, 291.0, 291.0, 281.0, 301.0, 311.0, 319.0, 293.0, 289.0, 325.0, 311.0, 314.0, 313.0, 291.0, 291.0, 294.0, 290.0, 285.0, 288.0, 314.0, 316.0, 290.0, 289.0, 293.0, 280.0, 293.0, 286.0, 285.0, 299.0, 290.0, 291.0, 300.0, 287.0, 291.0, 285.0, 267.0, 263.0, 263.0, 259.0, 294.0, 282.0, 288.0, 291.0, 289.0, 290.0, 283.0, 293.0, 286.0, 290.0, 298.0, 275.0, 319.0, 317.0, 146.0, 148.0, 295.0, 292.0, 316.0, 311.0, 293.0, 283.0, 314.0, 322.0, 288.0, 294.0, 35.0, 34.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6948230068460599, "mean_inference_ms": 1.2393774164488374, "mean_action_processing_ms": 0.13336225311356412, "mean_env_wait_ms": 0.8365219769699798, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11187200, "num_agent_steps_trained": 11187200, "num_env_steps_sampled": 5593600, "num_env_steps_trained": 5593600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5593600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11187200, "timers": {"training_iteration_time_ms": 3630.519, "learn_time_ms": 1135.538, "learn_throughput": 11272.186, "synch_weights_time_ms": 12.895}, "counters": {"num_env_steps_sampled": 5593600, "num_env_steps_trained": 5593600, "num_agent_steps_sampled": 11187200, "num_agent_steps_trained": 11187200}, "done": false, "episodes_total": 13984, "training_iteration": 437, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-51", "timestamp": 1666582131, "time_this_iter_s": 3.654266595840454, "time_total_s": 1665.3428988456726, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1665.3428988456726, "timesteps_since_restore": 0, "iterations_since_restore": 437, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.720000000000002, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 176.67, "shaped_reward_min": 29, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.32, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.3, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.17, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 16.15, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.96, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.96, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.33, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.09, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.92, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.96, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.96, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.96, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.96, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0028519879560917616, "policy_loss": -0.003249123226851225, "vf_loss": 7.91046142578125, "vf_explained_var": 0.5582560300827026, "kl": 0.00226527638733387, "entropy": 0.7878231406211853, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5606400, "num_env_steps_trained": 5606400, "num_agent_steps_sampled": 11212800, "num_agent_steps_trained": 11212800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 69.0, "episode_reward_mean": 573.07, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 286.535}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 176.67, "shaped_reward_min": 29, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.32, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.3, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.17, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 16.15, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.96, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.96, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.33, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.09, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.92, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.96, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.96, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.96, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.96, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 579.0, 519.0, 582.0, 516.0, 573.0, 630.0, 576.0, 582.0, 530.0, 579.0, 516.0, 573.0, 576.0, 579.0, 579.0, 581.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 582.0, 633.0, 590.0, 630.0, 582.0, 573.0, 582.0, 627.0, 579.0, 582.0, 576.0, 579.0, 582.0, 582.0, 630.0, 582.0, 636.0, 627.0, 582.0, 584.0, 573.0, 630.0, 579.0, 573.0, 579.0, 584.0, 581.0, 587.0, 576.0, 530.0, 522.0, 576.0, 579.0, 579.0, 576.0, 576.0, 573.0, 636.0, 294.0, 587.0, 627.0, 576.0, 636.0, 582.0, 69.0, 630.0, 522.0, 627.0, 522.0, 525.0, 627.0, 576.0, 587.0, 582.0, 576.0, 510.0, 576.0, 579.0, 627.0, 579.0, 579.0, 579.0, 582.0, 573.0, 579.0, 573.0, 465.0, 582.0, 582.0, 639.0, 579.0, 587.0, 582.0, 579.0, 633.0, 624.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 264.0, 293.0, 286.0, 268.0, 251.0, 291.0, 291.0, 248.0, 268.0, 280.0, 293.0, 312.0, 318.0, 286.0, 290.0, 281.0, 301.0, 261.0, 269.0, 286.0, 293.0, 257.0, 259.0, 284.0, 289.0, 286.0, 290.0, 283.0, 296.0, 290.0, 289.0, 288.0, 293.0, 289.0, 290.0, 291.0, 288.0, 286.0, 293.0, 286.0, 293.0, 292.0, 287.0, 285.0, 291.0, 293.0, 289.0, 314.0, 319.0, 299.0, 291.0, 316.0, 314.0, 289.0, 293.0, 287.0, 286.0, 294.0, 288.0, 308.0, 319.0, 283.0, 296.0, 294.0, 288.0, 286.0, 290.0, 293.0, 286.0, 291.0, 291.0, 281.0, 301.0, 311.0, 319.0, 293.0, 289.0, 325.0, 311.0, 314.0, 313.0, 291.0, 291.0, 294.0, 290.0, 285.0, 288.0, 314.0, 316.0, 290.0, 289.0, 293.0, 280.0, 293.0, 286.0, 285.0, 299.0, 290.0, 291.0, 300.0, 287.0, 291.0, 285.0, 267.0, 263.0, 263.0, 259.0, 294.0, 282.0, 288.0, 291.0, 289.0, 290.0, 283.0, 293.0, 286.0, 290.0, 298.0, 275.0, 319.0, 317.0, 146.0, 148.0, 295.0, 292.0, 316.0, 311.0, 293.0, 283.0, 314.0, 322.0, 288.0, 294.0, 35.0, 34.0, 309.0, 321.0, 257.0, 265.0, 313.0, 314.0, 249.0, 273.0, 263.0, 262.0, 311.0, 316.0, 294.0, 282.0, 287.0, 300.0, 295.0, 287.0, 295.0, 281.0, 251.0, 259.0, 285.0, 291.0, 291.0, 288.0, 313.0, 314.0, 288.0, 291.0, 288.0, 291.0, 283.0, 296.0, 297.0, 285.0, 296.0, 277.0, 286.0, 293.0, 290.0, 283.0, 237.0, 228.0, 288.0, 294.0, 288.0, 294.0, 321.0, 318.0, 289.0, 290.0, 291.0, 296.0, 291.0, 291.0, 283.0, 296.0, 314.0, 319.0, 311.0, 313.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6947747072864041, "mean_inference_ms": 1.2392755170077125, "mean_action_processing_ms": 0.1333566636626615, "mean_env_wait_ms": 0.8364654056699539, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 69.0, "episode_reward_mean": 573.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 286.535}, "hist_stats": {"episode_reward": [525.0, 579.0, 519.0, 582.0, 516.0, 573.0, 630.0, 576.0, 582.0, 530.0, 579.0, 516.0, 573.0, 576.0, 579.0, 579.0, 581.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 582.0, 633.0, 590.0, 630.0, 582.0, 573.0, 582.0, 627.0, 579.0, 582.0, 576.0, 579.0, 582.0, 582.0, 630.0, 582.0, 636.0, 627.0, 582.0, 584.0, 573.0, 630.0, 579.0, 573.0, 579.0, 584.0, 581.0, 587.0, 576.0, 530.0, 522.0, 576.0, 579.0, 579.0, 576.0, 576.0, 573.0, 636.0, 294.0, 587.0, 627.0, 576.0, 636.0, 582.0, 69.0, 630.0, 522.0, 627.0, 522.0, 525.0, 627.0, 576.0, 587.0, 582.0, 576.0, 510.0, 576.0, 579.0, 627.0, 579.0, 579.0, 579.0, 582.0, 573.0, 579.0, 573.0, 465.0, 582.0, 582.0, 639.0, 579.0, 587.0, 582.0, 579.0, 633.0, 624.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 264.0, 293.0, 286.0, 268.0, 251.0, 291.0, 291.0, 248.0, 268.0, 280.0, 293.0, 312.0, 318.0, 286.0, 290.0, 281.0, 301.0, 261.0, 269.0, 286.0, 293.0, 257.0, 259.0, 284.0, 289.0, 286.0, 290.0, 283.0, 296.0, 290.0, 289.0, 288.0, 293.0, 289.0, 290.0, 291.0, 288.0, 286.0, 293.0, 286.0, 293.0, 292.0, 287.0, 285.0, 291.0, 293.0, 289.0, 314.0, 319.0, 299.0, 291.0, 316.0, 314.0, 289.0, 293.0, 287.0, 286.0, 294.0, 288.0, 308.0, 319.0, 283.0, 296.0, 294.0, 288.0, 286.0, 290.0, 293.0, 286.0, 291.0, 291.0, 281.0, 301.0, 311.0, 319.0, 293.0, 289.0, 325.0, 311.0, 314.0, 313.0, 291.0, 291.0, 294.0, 290.0, 285.0, 288.0, 314.0, 316.0, 290.0, 289.0, 293.0, 280.0, 293.0, 286.0, 285.0, 299.0, 290.0, 291.0, 300.0, 287.0, 291.0, 285.0, 267.0, 263.0, 263.0, 259.0, 294.0, 282.0, 288.0, 291.0, 289.0, 290.0, 283.0, 293.0, 286.0, 290.0, 298.0, 275.0, 319.0, 317.0, 146.0, 148.0, 295.0, 292.0, 316.0, 311.0, 293.0, 283.0, 314.0, 322.0, 288.0, 294.0, 35.0, 34.0, 309.0, 321.0, 257.0, 265.0, 313.0, 314.0, 249.0, 273.0, 263.0, 262.0, 311.0, 316.0, 294.0, 282.0, 287.0, 300.0, 295.0, 287.0, 295.0, 281.0, 251.0, 259.0, 285.0, 291.0, 291.0, 288.0, 313.0, 314.0, 288.0, 291.0, 288.0, 291.0, 283.0, 296.0, 297.0, 285.0, 296.0, 277.0, 286.0, 293.0, 290.0, 283.0, 237.0, 228.0, 288.0, 294.0, 288.0, 294.0, 321.0, 318.0, 289.0, 290.0, 291.0, 296.0, 291.0, 291.0, 283.0, 296.0, 314.0, 319.0, 311.0, 313.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6947747072864041, "mean_inference_ms": 1.2392755170077125, "mean_action_processing_ms": 0.1333566636626615, "mean_env_wait_ms": 0.8364654056699539, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11212800, "num_agent_steps_trained": 11212800, "num_env_steps_sampled": 5606400, "num_env_steps_trained": 5606400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5606400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11212800, "timers": {"training_iteration_time_ms": 3624.323, "learn_time_ms": 1138.837, "learn_throughput": 11239.539, "synch_weights_time_ms": 12.217}, "counters": {"num_env_steps_sampled": 5606400, "num_env_steps_trained": 5606400, "num_agent_steps_sampled": 11212800, "num_agent_steps_trained": 11212800}, "done": false, "episodes_total": 14016, "training_iteration": 438, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-55", "timestamp": 1666582135, "time_this_iter_s": 3.7086472511291504, "time_total_s": 1669.0515460968018, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1669.0515460968018, "timesteps_since_restore": 0, "iterations_since_restore": 438, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.7, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 176.9, "shaped_reward_min": 29, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.5, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.05, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.38, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.94, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.79, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.31, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.1, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.27, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.79, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.79, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007691812934353948, "policy_loss": 0.00037388643249869347, "vf_loss": 7.888741493225098, "vf_explained_var": 0.5663248896598816, "kl": 0.002474588342010975, "entropy": 0.7871589660644531, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5619200, "num_env_steps_trained": 5619200, "num_agent_steps_sampled": 11238400, "num_agent_steps_trained": 11238400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 69.0, "episode_reward_mean": 572.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 286.45}, "custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 176.9, "shaped_reward_min": 29, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.5, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.05, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.38, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.94, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.79, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.31, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.1, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.27, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.79, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.79, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 576.0, 579.0, 582.0, 582.0, 630.0, 582.0, 636.0, 627.0, 582.0, 584.0, 573.0, 630.0, 579.0, 573.0, 579.0, 584.0, 581.0, 587.0, 576.0, 530.0, 522.0, 576.0, 579.0, 579.0, 576.0, 576.0, 573.0, 636.0, 294.0, 587.0, 627.0, 576.0, 636.0, 582.0, 69.0, 630.0, 522.0, 627.0, 522.0, 525.0, 627.0, 576.0, 587.0, 582.0, 576.0, 510.0, 576.0, 579.0, 627.0, 579.0, 579.0, 579.0, 582.0, 573.0, 579.0, 573.0, 465.0, 582.0, 582.0, 639.0, 579.0, 587.0, 582.0, 579.0, 633.0, 624.0, 579.0, 582.0, 636.0, 579.0, 570.0, 584.0, 522.0, 576.0, 573.0, 579.0, 633.0, 525.0, 579.0, 582.0, 579.0, 525.0, 582.0, 573.0, 576.0, 587.0, 582.0, 582.0, 582.0, 627.0, 582.0, 582.0, 536.0, 582.0, 630.0, 587.0, 527.0, 576.0, 510.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 286.0, 290.0, 293.0, 286.0, 291.0, 291.0, 281.0, 301.0, 311.0, 319.0, 293.0, 289.0, 325.0, 311.0, 314.0, 313.0, 291.0, 291.0, 294.0, 290.0, 285.0, 288.0, 314.0, 316.0, 290.0, 289.0, 293.0, 280.0, 293.0, 286.0, 285.0, 299.0, 290.0, 291.0, 300.0, 287.0, 291.0, 285.0, 267.0, 263.0, 263.0, 259.0, 294.0, 282.0, 288.0, 291.0, 289.0, 290.0, 283.0, 293.0, 286.0, 290.0, 298.0, 275.0, 319.0, 317.0, 146.0, 148.0, 295.0, 292.0, 316.0, 311.0, 293.0, 283.0, 314.0, 322.0, 288.0, 294.0, 35.0, 34.0, 309.0, 321.0, 257.0, 265.0, 313.0, 314.0, 249.0, 273.0, 263.0, 262.0, 311.0, 316.0, 294.0, 282.0, 287.0, 300.0, 295.0, 287.0, 295.0, 281.0, 251.0, 259.0, 285.0, 291.0, 291.0, 288.0, 313.0, 314.0, 288.0, 291.0, 288.0, 291.0, 283.0, 296.0, 297.0, 285.0, 296.0, 277.0, 286.0, 293.0, 290.0, 283.0, 237.0, 228.0, 288.0, 294.0, 288.0, 294.0, 321.0, 318.0, 289.0, 290.0, 291.0, 296.0, 291.0, 291.0, 283.0, 296.0, 314.0, 319.0, 311.0, 313.0, 291.0, 288.0, 288.0, 294.0, 320.0, 316.0, 288.0, 291.0, 283.0, 287.0, 290.0, 294.0, 256.0, 266.0, 288.0, 288.0, 283.0, 290.0, 292.0, 287.0, 317.0, 316.0, 265.0, 260.0, 294.0, 285.0, 292.0, 290.0, 286.0, 293.0, 261.0, 264.0, 294.0, 288.0, 282.0, 291.0, 289.0, 287.0, 296.0, 291.0, 287.0, 295.0, 290.0, 292.0, 289.0, 293.0, 316.0, 311.0, 286.0, 296.0, 290.0, 292.0, 264.0, 272.0, 297.0, 285.0, 314.0, 316.0, 294.0, 293.0, 262.0, 265.0, 293.0, 283.0, 253.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.694728533373717, "mean_inference_ms": 1.2391726393322413, "mean_action_processing_ms": 0.13335137141154382, "mean_env_wait_ms": 0.8364092327819402, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 69.0, "episode_reward_mean": 572.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 286.45}, "hist_stats": {"episode_reward": [582.0, 576.0, 579.0, 582.0, 582.0, 630.0, 582.0, 636.0, 627.0, 582.0, 584.0, 573.0, 630.0, 579.0, 573.0, 579.0, 584.0, 581.0, 587.0, 576.0, 530.0, 522.0, 576.0, 579.0, 579.0, 576.0, 576.0, 573.0, 636.0, 294.0, 587.0, 627.0, 576.0, 636.0, 582.0, 69.0, 630.0, 522.0, 627.0, 522.0, 525.0, 627.0, 576.0, 587.0, 582.0, 576.0, 510.0, 576.0, 579.0, 627.0, 579.0, 579.0, 579.0, 582.0, 573.0, 579.0, 573.0, 465.0, 582.0, 582.0, 639.0, 579.0, 587.0, 582.0, 579.0, 633.0, 624.0, 579.0, 582.0, 636.0, 579.0, 570.0, 584.0, 522.0, 576.0, 573.0, 579.0, 633.0, 525.0, 579.0, 582.0, 579.0, 525.0, 582.0, 573.0, 576.0, 587.0, 582.0, 582.0, 582.0, 627.0, 582.0, 582.0, 536.0, 582.0, 630.0, 587.0, 527.0, 576.0, 510.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 286.0, 290.0, 293.0, 286.0, 291.0, 291.0, 281.0, 301.0, 311.0, 319.0, 293.0, 289.0, 325.0, 311.0, 314.0, 313.0, 291.0, 291.0, 294.0, 290.0, 285.0, 288.0, 314.0, 316.0, 290.0, 289.0, 293.0, 280.0, 293.0, 286.0, 285.0, 299.0, 290.0, 291.0, 300.0, 287.0, 291.0, 285.0, 267.0, 263.0, 263.0, 259.0, 294.0, 282.0, 288.0, 291.0, 289.0, 290.0, 283.0, 293.0, 286.0, 290.0, 298.0, 275.0, 319.0, 317.0, 146.0, 148.0, 295.0, 292.0, 316.0, 311.0, 293.0, 283.0, 314.0, 322.0, 288.0, 294.0, 35.0, 34.0, 309.0, 321.0, 257.0, 265.0, 313.0, 314.0, 249.0, 273.0, 263.0, 262.0, 311.0, 316.0, 294.0, 282.0, 287.0, 300.0, 295.0, 287.0, 295.0, 281.0, 251.0, 259.0, 285.0, 291.0, 291.0, 288.0, 313.0, 314.0, 288.0, 291.0, 288.0, 291.0, 283.0, 296.0, 297.0, 285.0, 296.0, 277.0, 286.0, 293.0, 290.0, 283.0, 237.0, 228.0, 288.0, 294.0, 288.0, 294.0, 321.0, 318.0, 289.0, 290.0, 291.0, 296.0, 291.0, 291.0, 283.0, 296.0, 314.0, 319.0, 311.0, 313.0, 291.0, 288.0, 288.0, 294.0, 320.0, 316.0, 288.0, 291.0, 283.0, 287.0, 290.0, 294.0, 256.0, 266.0, 288.0, 288.0, 283.0, 290.0, 292.0, 287.0, 317.0, 316.0, 265.0, 260.0, 294.0, 285.0, 292.0, 290.0, 286.0, 293.0, 261.0, 264.0, 294.0, 288.0, 282.0, 291.0, 289.0, 287.0, 296.0, 291.0, 287.0, 295.0, 290.0, 292.0, 289.0, 293.0, 316.0, 311.0, 286.0, 296.0, 290.0, 292.0, 264.0, 272.0, 297.0, 285.0, 314.0, 316.0, 294.0, 293.0, 262.0, 265.0, 293.0, 283.0, 253.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.694728533373717, "mean_inference_ms": 1.2391726393322413, "mean_action_processing_ms": 0.13335137141154382, "mean_env_wait_ms": 0.8364092327819402, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11238400, "num_agent_steps_trained": 11238400, "num_env_steps_sampled": 5619200, "num_env_steps_trained": 5619200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5619200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11238400, "timers": {"training_iteration_time_ms": 3623.556, "learn_time_ms": 1141.687, "learn_throughput": 11211.476, "synch_weights_time_ms": 12.285}, "counters": {"num_env_steps_sampled": 5619200, "num_env_steps_trained": 5619200, "num_agent_steps_sampled": 11238400, "num_agent_steps_trained": 11238400}, "done": false, "episodes_total": 14048, "training_iteration": 439, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-59", "timestamp": 1666582139, "time_this_iter_s": 3.6420202255249023, "time_total_s": 1672.6935663223267, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1672.6935663223267, "timesteps_since_restore": 0, "iterations_since_restore": 439, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.82, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 193.4, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 172.48, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.12, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.78, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.0, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.67, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.71, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.47, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.18, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.36, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.93, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.71, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.47, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.71, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.47, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0049171969294548035, "policy_loss": -0.005316406488418579, "vf_loss": 7.9975738525390625, "vf_explained_var": 0.5843226909637451, "kl": 0.0027200470212846994, "entropy": 0.8010973334312439, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5632000, "num_env_steps_trained": 5632000, "num_agent_steps_sampled": 11264000, "num_agent_steps_trained": 11264000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 559.28, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 279.64}, "custom_metrics": {"sparse_reward_mean": 193.4, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 172.48, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.12, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.78, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.0, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.67, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.71, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.47, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.18, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.36, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.93, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.71, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.47, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.71, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.47, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 636.0, 582.0, 69.0, 630.0, 522.0, 627.0, 522.0, 525.0, 627.0, 576.0, 587.0, 582.0, 576.0, 510.0, 576.0, 579.0, 627.0, 579.0, 579.0, 579.0, 582.0, 573.0, 579.0, 573.0, 465.0, 582.0, 582.0, 639.0, 579.0, 587.0, 582.0, 579.0, 633.0, 624.0, 579.0, 582.0, 636.0, 579.0, 570.0, 584.0, 522.0, 576.0, 573.0, 579.0, 633.0, 525.0, 579.0, 582.0, 579.0, 525.0, 582.0, 573.0, 576.0, 587.0, 582.0, 582.0, 582.0, 627.0, 582.0, 582.0, 536.0, 582.0, 630.0, 587.0, 527.0, 576.0, 510.0, 582.0, 627.0, 465.0, 633.0, 519.0, 582.0, 533.0, 405.0, 522.0, 581.0, 522.0, 66.0, 579.0, 530.0, 576.0, 570.0, 579.0, 510.0, 582.0, 570.0, 627.0, 570.0, 579.0, 279.0, 579.0, 579.0, 573.0, 453.0, 579.0, 582.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 283.0, 314.0, 322.0, 288.0, 294.0, 35.0, 34.0, 309.0, 321.0, 257.0, 265.0, 313.0, 314.0, 249.0, 273.0, 263.0, 262.0, 311.0, 316.0, 294.0, 282.0, 287.0, 300.0, 295.0, 287.0, 295.0, 281.0, 251.0, 259.0, 285.0, 291.0, 291.0, 288.0, 313.0, 314.0, 288.0, 291.0, 288.0, 291.0, 283.0, 296.0, 297.0, 285.0, 296.0, 277.0, 286.0, 293.0, 290.0, 283.0, 237.0, 228.0, 288.0, 294.0, 288.0, 294.0, 321.0, 318.0, 289.0, 290.0, 291.0, 296.0, 291.0, 291.0, 283.0, 296.0, 314.0, 319.0, 311.0, 313.0, 291.0, 288.0, 288.0, 294.0, 320.0, 316.0, 288.0, 291.0, 283.0, 287.0, 290.0, 294.0, 256.0, 266.0, 288.0, 288.0, 283.0, 290.0, 292.0, 287.0, 317.0, 316.0, 265.0, 260.0, 294.0, 285.0, 292.0, 290.0, 286.0, 293.0, 261.0, 264.0, 294.0, 288.0, 282.0, 291.0, 289.0, 287.0, 296.0, 291.0, 287.0, 295.0, 290.0, 292.0, 289.0, 293.0, 316.0, 311.0, 286.0, 296.0, 290.0, 292.0, 264.0, 272.0, 297.0, 285.0, 314.0, 316.0, 294.0, 293.0, 262.0, 265.0, 293.0, 283.0, 253.0, 257.0, 299.0, 283.0, 311.0, 316.0, 239.0, 226.0, 314.0, 319.0, 262.0, 257.0, 289.0, 293.0, 268.0, 265.0, 207.0, 198.0, 264.0, 258.0, 291.0, 290.0, 254.0, 268.0, 32.0, 34.0, 286.0, 293.0, 267.0, 263.0, 286.0, 290.0, 284.0, 286.0, 286.0, 293.0, 257.0, 253.0, 291.0, 291.0, 294.0, 276.0, 316.0, 311.0, 282.0, 288.0, 289.0, 290.0, 139.0, 140.0, 291.0, 288.0, 291.0, 288.0, 290.0, 283.0, 219.0, 234.0, 293.0, 286.0, 296.0, 286.0, 294.0, 288.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6946899078139533, "mean_inference_ms": 1.2390712973970437, "mean_action_processing_ms": 0.13334667802007993, "mean_env_wait_ms": 0.8363554136317487, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 559.28, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 279.64}, "hist_stats": {"episode_reward": [576.0, 636.0, 582.0, 69.0, 630.0, 522.0, 627.0, 522.0, 525.0, 627.0, 576.0, 587.0, 582.0, 576.0, 510.0, 576.0, 579.0, 627.0, 579.0, 579.0, 579.0, 582.0, 573.0, 579.0, 573.0, 465.0, 582.0, 582.0, 639.0, 579.0, 587.0, 582.0, 579.0, 633.0, 624.0, 579.0, 582.0, 636.0, 579.0, 570.0, 584.0, 522.0, 576.0, 573.0, 579.0, 633.0, 525.0, 579.0, 582.0, 579.0, 525.0, 582.0, 573.0, 576.0, 587.0, 582.0, 582.0, 582.0, 627.0, 582.0, 582.0, 536.0, 582.0, 630.0, 587.0, 527.0, 576.0, 510.0, 582.0, 627.0, 465.0, 633.0, 519.0, 582.0, 533.0, 405.0, 522.0, 581.0, 522.0, 66.0, 579.0, 530.0, 576.0, 570.0, 579.0, 510.0, 582.0, 570.0, 627.0, 570.0, 579.0, 279.0, 579.0, 579.0, 573.0, 453.0, 579.0, 582.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 283.0, 314.0, 322.0, 288.0, 294.0, 35.0, 34.0, 309.0, 321.0, 257.0, 265.0, 313.0, 314.0, 249.0, 273.0, 263.0, 262.0, 311.0, 316.0, 294.0, 282.0, 287.0, 300.0, 295.0, 287.0, 295.0, 281.0, 251.0, 259.0, 285.0, 291.0, 291.0, 288.0, 313.0, 314.0, 288.0, 291.0, 288.0, 291.0, 283.0, 296.0, 297.0, 285.0, 296.0, 277.0, 286.0, 293.0, 290.0, 283.0, 237.0, 228.0, 288.0, 294.0, 288.0, 294.0, 321.0, 318.0, 289.0, 290.0, 291.0, 296.0, 291.0, 291.0, 283.0, 296.0, 314.0, 319.0, 311.0, 313.0, 291.0, 288.0, 288.0, 294.0, 320.0, 316.0, 288.0, 291.0, 283.0, 287.0, 290.0, 294.0, 256.0, 266.0, 288.0, 288.0, 283.0, 290.0, 292.0, 287.0, 317.0, 316.0, 265.0, 260.0, 294.0, 285.0, 292.0, 290.0, 286.0, 293.0, 261.0, 264.0, 294.0, 288.0, 282.0, 291.0, 289.0, 287.0, 296.0, 291.0, 287.0, 295.0, 290.0, 292.0, 289.0, 293.0, 316.0, 311.0, 286.0, 296.0, 290.0, 292.0, 264.0, 272.0, 297.0, 285.0, 314.0, 316.0, 294.0, 293.0, 262.0, 265.0, 293.0, 283.0, 253.0, 257.0, 299.0, 283.0, 311.0, 316.0, 239.0, 226.0, 314.0, 319.0, 262.0, 257.0, 289.0, 293.0, 268.0, 265.0, 207.0, 198.0, 264.0, 258.0, 291.0, 290.0, 254.0, 268.0, 32.0, 34.0, 286.0, 293.0, 267.0, 263.0, 286.0, 290.0, 284.0, 286.0, 286.0, 293.0, 257.0, 253.0, 291.0, 291.0, 294.0, 276.0, 316.0, 311.0, 282.0, 288.0, 289.0, 290.0, 139.0, 140.0, 291.0, 288.0, 291.0, 288.0, 290.0, 283.0, 219.0, 234.0, 293.0, 286.0, 296.0, 286.0, 294.0, 288.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6946899078139533, "mean_inference_ms": 1.2390712973970437, "mean_action_processing_ms": 0.13334667802007993, "mean_env_wait_ms": 0.8363554136317487, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11264000, "num_agent_steps_trained": 11264000, "num_env_steps_sampled": 5632000, "num_env_steps_trained": 5632000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5632000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11264000, "timers": {"training_iteration_time_ms": 3615.506, "learn_time_ms": 1133.411, "learn_throughput": 11293.34, "synch_weights_time_ms": 12.353}, "counters": {"num_env_steps_sampled": 5632000, "num_env_steps_trained": 5632000, "num_agent_steps_sampled": 11264000, "num_agent_steps_trained": 11264000}, "done": false, "episodes_total": 14080, "training_iteration": 440, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-03", "timestamp": 1666582143, "time_this_iter_s": 3.6508986949920654, "time_total_s": 1676.3444650173187, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1676.3444650173187, "timesteps_since_restore": 0, "iterations_since_restore": 440, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.950000000000003, "ram_util_percent": 10.616666666666665}}
+{"custom_metrics": {"sparse_reward_mean": 195.0, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 173.6, "shaped_reward_min": 26, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.44, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.62, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 16.36, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.51, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.06, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.32, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.16, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.9, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.81, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.06, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.32, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.06, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.32, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015632144641131163, "policy_loss": 0.0011703792260959744, "vf_loss": 7.836904525756836, "vf_explained_var": 0.583203911781311, "kl": 0.0023779491893947124, "entropy": 0.781711220741272, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5644800, "num_env_steps_trained": 5644800, "num_agent_steps_sampled": 11289600, "num_agent_steps_trained": 11289600}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 66.0, "episode_reward_mean": 563.6, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 281.8}, "custom_metrics": {"sparse_reward_mean": 195.0, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 173.6, "shaped_reward_min": 26, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.44, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.62, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 16.36, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.51, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.06, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.32, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.16, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.9, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.81, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.06, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.32, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.06, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.32, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 633.0, 624.0, 579.0, 582.0, 636.0, 579.0, 570.0, 584.0, 522.0, 576.0, 573.0, 579.0, 633.0, 525.0, 579.0, 582.0, 579.0, 525.0, 582.0, 573.0, 576.0, 587.0, 582.0, 582.0, 582.0, 627.0, 582.0, 582.0, 536.0, 582.0, 630.0, 587.0, 527.0, 576.0, 510.0, 582.0, 627.0, 465.0, 633.0, 519.0, 582.0, 533.0, 405.0, 522.0, 581.0, 522.0, 66.0, 579.0, 530.0, 576.0, 570.0, 579.0, 510.0, 582.0, 570.0, 627.0, 570.0, 579.0, 279.0, 579.0, 579.0, 573.0, 453.0, 579.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 522.0, 510.0, 573.0, 570.0, 573.0, 525.0, 582.0, 579.0, 627.0, 579.0, 525.0, 579.0, 576.0, 576.0, 584.0, 564.0, 582.0, 587.0, 570.0, 627.0, 582.0, 579.0, 579.0, 582.0, 582.0, 582.0, 579.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 314.0, 319.0, 311.0, 313.0, 291.0, 288.0, 288.0, 294.0, 320.0, 316.0, 288.0, 291.0, 283.0, 287.0, 290.0, 294.0, 256.0, 266.0, 288.0, 288.0, 283.0, 290.0, 292.0, 287.0, 317.0, 316.0, 265.0, 260.0, 294.0, 285.0, 292.0, 290.0, 286.0, 293.0, 261.0, 264.0, 294.0, 288.0, 282.0, 291.0, 289.0, 287.0, 296.0, 291.0, 287.0, 295.0, 290.0, 292.0, 289.0, 293.0, 316.0, 311.0, 286.0, 296.0, 290.0, 292.0, 264.0, 272.0, 297.0, 285.0, 314.0, 316.0, 294.0, 293.0, 262.0, 265.0, 293.0, 283.0, 253.0, 257.0, 299.0, 283.0, 311.0, 316.0, 239.0, 226.0, 314.0, 319.0, 262.0, 257.0, 289.0, 293.0, 268.0, 265.0, 207.0, 198.0, 264.0, 258.0, 291.0, 290.0, 254.0, 268.0, 32.0, 34.0, 286.0, 293.0, 267.0, 263.0, 286.0, 290.0, 284.0, 286.0, 286.0, 293.0, 257.0, 253.0, 291.0, 291.0, 294.0, 276.0, 316.0, 311.0, 282.0, 288.0, 289.0, 290.0, 139.0, 140.0, 291.0, 288.0, 291.0, 288.0, 290.0, 283.0, 219.0, 234.0, 293.0, 286.0, 296.0, 286.0, 294.0, 288.0, 292.0, 290.0, 286.0, 293.0, 283.0, 293.0, 288.0, 294.0, 257.0, 265.0, 267.0, 243.0, 291.0, 282.0, 283.0, 287.0, 288.0, 285.0, 267.0, 258.0, 286.0, 296.0, 286.0, 293.0, 308.0, 319.0, 296.0, 283.0, 267.0, 258.0, 296.0, 283.0, 291.0, 285.0, 280.0, 296.0, 293.0, 291.0, 292.0, 272.0, 289.0, 293.0, 296.0, 291.0, 284.0, 286.0, 318.0, 309.0, 293.0, 289.0, 294.0, 285.0, 288.0, 291.0, 292.0, 290.0, 294.0, 288.0, 284.0, 298.0, 288.0, 291.0, 311.0, 319.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6946474203690406, "mean_inference_ms": 1.2389727343902235, "mean_action_processing_ms": 0.1333403613331771, "mean_env_wait_ms": 0.8363260041776678, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 66.0, "episode_reward_mean": 563.6, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 281.8}, "hist_stats": {"episode_reward": [579.0, 633.0, 624.0, 579.0, 582.0, 636.0, 579.0, 570.0, 584.0, 522.0, 576.0, 573.0, 579.0, 633.0, 525.0, 579.0, 582.0, 579.0, 525.0, 582.0, 573.0, 576.0, 587.0, 582.0, 582.0, 582.0, 627.0, 582.0, 582.0, 536.0, 582.0, 630.0, 587.0, 527.0, 576.0, 510.0, 582.0, 627.0, 465.0, 633.0, 519.0, 582.0, 533.0, 405.0, 522.0, 581.0, 522.0, 66.0, 579.0, 530.0, 576.0, 570.0, 579.0, 510.0, 582.0, 570.0, 627.0, 570.0, 579.0, 279.0, 579.0, 579.0, 573.0, 453.0, 579.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 522.0, 510.0, 573.0, 570.0, 573.0, 525.0, 582.0, 579.0, 627.0, 579.0, 525.0, 579.0, 576.0, 576.0, 584.0, 564.0, 582.0, 587.0, 570.0, 627.0, 582.0, 579.0, 579.0, 582.0, 582.0, 582.0, 579.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 314.0, 319.0, 311.0, 313.0, 291.0, 288.0, 288.0, 294.0, 320.0, 316.0, 288.0, 291.0, 283.0, 287.0, 290.0, 294.0, 256.0, 266.0, 288.0, 288.0, 283.0, 290.0, 292.0, 287.0, 317.0, 316.0, 265.0, 260.0, 294.0, 285.0, 292.0, 290.0, 286.0, 293.0, 261.0, 264.0, 294.0, 288.0, 282.0, 291.0, 289.0, 287.0, 296.0, 291.0, 287.0, 295.0, 290.0, 292.0, 289.0, 293.0, 316.0, 311.0, 286.0, 296.0, 290.0, 292.0, 264.0, 272.0, 297.0, 285.0, 314.0, 316.0, 294.0, 293.0, 262.0, 265.0, 293.0, 283.0, 253.0, 257.0, 299.0, 283.0, 311.0, 316.0, 239.0, 226.0, 314.0, 319.0, 262.0, 257.0, 289.0, 293.0, 268.0, 265.0, 207.0, 198.0, 264.0, 258.0, 291.0, 290.0, 254.0, 268.0, 32.0, 34.0, 286.0, 293.0, 267.0, 263.0, 286.0, 290.0, 284.0, 286.0, 286.0, 293.0, 257.0, 253.0, 291.0, 291.0, 294.0, 276.0, 316.0, 311.0, 282.0, 288.0, 289.0, 290.0, 139.0, 140.0, 291.0, 288.0, 291.0, 288.0, 290.0, 283.0, 219.0, 234.0, 293.0, 286.0, 296.0, 286.0, 294.0, 288.0, 292.0, 290.0, 286.0, 293.0, 283.0, 293.0, 288.0, 294.0, 257.0, 265.0, 267.0, 243.0, 291.0, 282.0, 283.0, 287.0, 288.0, 285.0, 267.0, 258.0, 286.0, 296.0, 286.0, 293.0, 308.0, 319.0, 296.0, 283.0, 267.0, 258.0, 296.0, 283.0, 291.0, 285.0, 280.0, 296.0, 293.0, 291.0, 292.0, 272.0, 289.0, 293.0, 296.0, 291.0, 284.0, 286.0, 318.0, 309.0, 293.0, 289.0, 294.0, 285.0, 288.0, 291.0, 292.0, 290.0, 294.0, 288.0, 284.0, 298.0, 288.0, 291.0, 311.0, 319.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6946474203690406, "mean_inference_ms": 1.2389727343902235, "mean_action_processing_ms": 0.1333403613331771, "mean_env_wait_ms": 0.8363260041776678, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11289600, "num_agent_steps_trained": 11289600, "num_env_steps_sampled": 5644800, "num_env_steps_trained": 5644800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5644800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11289600, "timers": {"training_iteration_time_ms": 3624.105, "learn_time_ms": 1131.518, "learn_throughput": 11312.237, "synch_weights_time_ms": 12.495}, "counters": {"num_env_steps_sampled": 5644800, "num_env_steps_trained": 5644800, "num_agent_steps_sampled": 11289600, "num_agent_steps_trained": 11289600}, "done": false, "episodes_total": 14112, "training_iteration": 441, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-07", "timestamp": 1666582147, "time_this_iter_s": 3.8103065490722656, "time_total_s": 1680.154771566391, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1680.154771566391, "timesteps_since_restore": 0, "iterations_since_restore": 441, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.699999999999996, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 172.99, "shaped_reward_min": 26, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.17, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.95, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.09, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.81, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.79, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.54, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.28, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.28, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.98, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.92, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.89, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.87, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.79, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.54, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.79, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.54, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0020413980819284916, "policy_loss": -0.0024421534035354853, "vf_loss": 7.864043235778809, "vf_explained_var": 0.5776045322418213, "kl": 0.0032261847518384457, "entropy": 0.7712999582290649, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5657600, "num_env_steps_trained": 5657600, "num_agent_steps_sampled": 11315200, "num_agent_steps_trained": 11315200}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 66.0, "episode_reward_mean": 562.59, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 281.295}, "custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 172.99, "shaped_reward_min": 26, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.17, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.95, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.09, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.81, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.79, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.54, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.28, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.28, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.98, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.92, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.89, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.87, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.79, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.54, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.79, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.54, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 527.0, 576.0, 510.0, 582.0, 627.0, 465.0, 633.0, 519.0, 582.0, 533.0, 405.0, 522.0, 581.0, 522.0, 66.0, 579.0, 530.0, 576.0, 570.0, 579.0, 510.0, 582.0, 570.0, 627.0, 570.0, 579.0, 279.0, 579.0, 579.0, 573.0, 453.0, 579.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 522.0, 510.0, 573.0, 570.0, 573.0, 525.0, 582.0, 579.0, 627.0, 579.0, 525.0, 579.0, 576.0, 576.0, 584.0, 564.0, 582.0, 587.0, 570.0, 627.0, 582.0, 579.0, 579.0, 582.0, 582.0, 582.0, 579.0, 630.0, 579.0, 579.0, 582.0, 627.0, 582.0, 576.0, 570.0, 576.0, 582.0, 573.0, 525.0, 630.0, 579.0, 522.0, 576.0, 630.0, 579.0, 521.0, 570.0, 582.0, 582.0, 576.0, 630.0, 633.0, 576.0, 576.0, 579.0, 582.0, 522.0, 579.0, 579.0, 584.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 293.0, 262.0, 265.0, 293.0, 283.0, 253.0, 257.0, 299.0, 283.0, 311.0, 316.0, 239.0, 226.0, 314.0, 319.0, 262.0, 257.0, 289.0, 293.0, 268.0, 265.0, 207.0, 198.0, 264.0, 258.0, 291.0, 290.0, 254.0, 268.0, 32.0, 34.0, 286.0, 293.0, 267.0, 263.0, 286.0, 290.0, 284.0, 286.0, 286.0, 293.0, 257.0, 253.0, 291.0, 291.0, 294.0, 276.0, 316.0, 311.0, 282.0, 288.0, 289.0, 290.0, 139.0, 140.0, 291.0, 288.0, 291.0, 288.0, 290.0, 283.0, 219.0, 234.0, 293.0, 286.0, 296.0, 286.0, 294.0, 288.0, 292.0, 290.0, 286.0, 293.0, 283.0, 293.0, 288.0, 294.0, 257.0, 265.0, 267.0, 243.0, 291.0, 282.0, 283.0, 287.0, 288.0, 285.0, 267.0, 258.0, 286.0, 296.0, 286.0, 293.0, 308.0, 319.0, 296.0, 283.0, 267.0, 258.0, 296.0, 283.0, 291.0, 285.0, 280.0, 296.0, 293.0, 291.0, 292.0, 272.0, 289.0, 293.0, 296.0, 291.0, 284.0, 286.0, 318.0, 309.0, 293.0, 289.0, 294.0, 285.0, 288.0, 291.0, 292.0, 290.0, 294.0, 288.0, 284.0, 298.0, 288.0, 291.0, 311.0, 319.0, 286.0, 293.0, 288.0, 291.0, 288.0, 294.0, 316.0, 311.0, 284.0, 298.0, 288.0, 288.0, 293.0, 277.0, 294.0, 282.0, 296.0, 286.0, 283.0, 290.0, 265.0, 260.0, 311.0, 319.0, 296.0, 283.0, 257.0, 265.0, 291.0, 285.0, 319.0, 311.0, 292.0, 287.0, 266.0, 255.0, 288.0, 282.0, 296.0, 286.0, 291.0, 291.0, 290.0, 286.0, 311.0, 319.0, 314.0, 319.0, 288.0, 288.0, 278.0, 298.0, 296.0, 283.0, 286.0, 296.0, 258.0, 264.0, 283.0, 296.0, 284.0, 295.0, 298.0, 286.0, 286.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6945962982346888, "mean_inference_ms": 1.2389306404779257, "mean_action_processing_ms": 0.13333350576114789, "mean_env_wait_ms": 0.8363681916751531, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 66.0, "episode_reward_mean": 562.59, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 281.295}, "hist_stats": {"episode_reward": [587.0, 527.0, 576.0, 510.0, 582.0, 627.0, 465.0, 633.0, 519.0, 582.0, 533.0, 405.0, 522.0, 581.0, 522.0, 66.0, 579.0, 530.0, 576.0, 570.0, 579.0, 510.0, 582.0, 570.0, 627.0, 570.0, 579.0, 279.0, 579.0, 579.0, 573.0, 453.0, 579.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 522.0, 510.0, 573.0, 570.0, 573.0, 525.0, 582.0, 579.0, 627.0, 579.0, 525.0, 579.0, 576.0, 576.0, 584.0, 564.0, 582.0, 587.0, 570.0, 627.0, 582.0, 579.0, 579.0, 582.0, 582.0, 582.0, 579.0, 630.0, 579.0, 579.0, 582.0, 627.0, 582.0, 576.0, 570.0, 576.0, 582.0, 573.0, 525.0, 630.0, 579.0, 522.0, 576.0, 630.0, 579.0, 521.0, 570.0, 582.0, 582.0, 576.0, 630.0, 633.0, 576.0, 576.0, 579.0, 582.0, 522.0, 579.0, 579.0, 584.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 293.0, 262.0, 265.0, 293.0, 283.0, 253.0, 257.0, 299.0, 283.0, 311.0, 316.0, 239.0, 226.0, 314.0, 319.0, 262.0, 257.0, 289.0, 293.0, 268.0, 265.0, 207.0, 198.0, 264.0, 258.0, 291.0, 290.0, 254.0, 268.0, 32.0, 34.0, 286.0, 293.0, 267.0, 263.0, 286.0, 290.0, 284.0, 286.0, 286.0, 293.0, 257.0, 253.0, 291.0, 291.0, 294.0, 276.0, 316.0, 311.0, 282.0, 288.0, 289.0, 290.0, 139.0, 140.0, 291.0, 288.0, 291.0, 288.0, 290.0, 283.0, 219.0, 234.0, 293.0, 286.0, 296.0, 286.0, 294.0, 288.0, 292.0, 290.0, 286.0, 293.0, 283.0, 293.0, 288.0, 294.0, 257.0, 265.0, 267.0, 243.0, 291.0, 282.0, 283.0, 287.0, 288.0, 285.0, 267.0, 258.0, 286.0, 296.0, 286.0, 293.0, 308.0, 319.0, 296.0, 283.0, 267.0, 258.0, 296.0, 283.0, 291.0, 285.0, 280.0, 296.0, 293.0, 291.0, 292.0, 272.0, 289.0, 293.0, 296.0, 291.0, 284.0, 286.0, 318.0, 309.0, 293.0, 289.0, 294.0, 285.0, 288.0, 291.0, 292.0, 290.0, 294.0, 288.0, 284.0, 298.0, 288.0, 291.0, 311.0, 319.0, 286.0, 293.0, 288.0, 291.0, 288.0, 294.0, 316.0, 311.0, 284.0, 298.0, 288.0, 288.0, 293.0, 277.0, 294.0, 282.0, 296.0, 286.0, 283.0, 290.0, 265.0, 260.0, 311.0, 319.0, 296.0, 283.0, 257.0, 265.0, 291.0, 285.0, 319.0, 311.0, 292.0, 287.0, 266.0, 255.0, 288.0, 282.0, 296.0, 286.0, 291.0, 291.0, 290.0, 286.0, 311.0, 319.0, 314.0, 319.0, 288.0, 288.0, 278.0, 298.0, 296.0, 283.0, 286.0, 296.0, 258.0, 264.0, 283.0, 296.0, 284.0, 295.0, 298.0, 286.0, 286.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6945962982346888, "mean_inference_ms": 1.2389306404779257, "mean_action_processing_ms": 0.13333350576114789, "mean_env_wait_ms": 0.8363681916751531, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11315200, "num_agent_steps_trained": 11315200, "num_env_steps_sampled": 5657600, "num_env_steps_trained": 5657600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5657600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11315200, "timers": {"training_iteration_time_ms": 3657.426, "learn_time_ms": 1142.083, "learn_throughput": 11207.588, "synch_weights_time_ms": 11.94}, "counters": {"num_env_steps_sampled": 5657600, "num_env_steps_trained": 5657600, "num_agent_steps_sampled": 11315200, "num_agent_steps_trained": 11315200}, "done": false, "episodes_total": 14144, "training_iteration": 442, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-11", "timestamp": 1666582151, "time_this_iter_s": 4.022900342941284, "time_total_s": 1684.1776719093323, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1684.1776719093323, "timesteps_since_restore": 0, "iterations_since_restore": 442, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.483333333333334, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 200.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.63, "shaped_reward_min": 85, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.73, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.18, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.67, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.99, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.44, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.8, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.14, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.28, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.95, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.44, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.8, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.44, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.8, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0034412413369864225, "policy_loss": 0.0030490802600979805, "vf_loss": 7.769399166107178, "vf_explained_var": 0.5706442594528198, "kl": 0.007476408034563065, "entropy": 0.7695587873458862, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5670400, "num_env_steps_trained": 5670400, "num_agent_steps_sampled": 11340800, "num_agent_steps_trained": 11340800}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 285.0, "episode_reward_mean": 579.83, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 134.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 289.915}, "custom_metrics": {"sparse_reward_mean": 200.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.63, "shaped_reward_min": 85, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.73, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.18, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.67, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.99, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.44, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.8, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.14, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.28, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.95, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.44, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.8, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.44, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.8, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 522.0, 510.0, 573.0, 570.0, 573.0, 525.0, 582.0, 579.0, 627.0, 579.0, 525.0, 579.0, 576.0, 576.0, 584.0, 564.0, 582.0, 587.0, 570.0, 627.0, 582.0, 579.0, 579.0, 582.0, 582.0, 582.0, 579.0, 630.0, 579.0, 579.0, 582.0, 627.0, 582.0, 576.0, 570.0, 576.0, 582.0, 573.0, 525.0, 630.0, 579.0, 522.0, 576.0, 630.0, 579.0, 521.0, 570.0, 582.0, 582.0, 576.0, 630.0, 633.0, 576.0, 576.0, 579.0, 582.0, 522.0, 579.0, 579.0, 584.0, 582.0, 627.0, 579.0, 582.0, 636.0, 576.0, 582.0, 587.0, 630.0, 633.0, 570.0, 479.0, 582.0, 587.0, 579.0, 627.0, 285.0, 587.0, 624.0, 584.0, 582.0, 582.0, 579.0, 636.0, 573.0, 636.0, 579.0, 636.0, 579.0, 627.0, 582.0, 587.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 296.0, 286.0, 294.0, 288.0, 292.0, 290.0, 286.0, 293.0, 283.0, 293.0, 288.0, 294.0, 257.0, 265.0, 267.0, 243.0, 291.0, 282.0, 283.0, 287.0, 288.0, 285.0, 267.0, 258.0, 286.0, 296.0, 286.0, 293.0, 308.0, 319.0, 296.0, 283.0, 267.0, 258.0, 296.0, 283.0, 291.0, 285.0, 280.0, 296.0, 293.0, 291.0, 292.0, 272.0, 289.0, 293.0, 296.0, 291.0, 284.0, 286.0, 318.0, 309.0, 293.0, 289.0, 294.0, 285.0, 288.0, 291.0, 292.0, 290.0, 294.0, 288.0, 284.0, 298.0, 288.0, 291.0, 311.0, 319.0, 286.0, 293.0, 288.0, 291.0, 288.0, 294.0, 316.0, 311.0, 284.0, 298.0, 288.0, 288.0, 293.0, 277.0, 294.0, 282.0, 296.0, 286.0, 283.0, 290.0, 265.0, 260.0, 311.0, 319.0, 296.0, 283.0, 257.0, 265.0, 291.0, 285.0, 319.0, 311.0, 292.0, 287.0, 266.0, 255.0, 288.0, 282.0, 296.0, 286.0, 291.0, 291.0, 290.0, 286.0, 311.0, 319.0, 314.0, 319.0, 288.0, 288.0, 278.0, 298.0, 296.0, 283.0, 286.0, 296.0, 258.0, 264.0, 283.0, 296.0, 284.0, 295.0, 298.0, 286.0, 286.0, 296.0, 321.0, 306.0, 296.0, 283.0, 285.0, 297.0, 322.0, 314.0, 286.0, 290.0, 291.0, 291.0, 296.0, 291.0, 317.0, 313.0, 321.0, 312.0, 283.0, 287.0, 248.0, 231.0, 289.0, 293.0, 289.0, 298.0, 293.0, 286.0, 316.0, 311.0, 134.0, 151.0, 292.0, 295.0, 306.0, 318.0, 296.0, 288.0, 292.0, 290.0, 293.0, 289.0, 286.0, 293.0, 327.0, 309.0, 288.0, 285.0, 314.0, 322.0, 291.0, 288.0, 314.0, 322.0, 289.0, 290.0, 311.0, 316.0, 286.0, 296.0, 294.0, 293.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6945383548540814, "mean_inference_ms": 1.2388862595050705, "mean_action_processing_ms": 0.1333267827355028, "mean_env_wait_ms": 0.8364096381922611, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 285.0, "episode_reward_mean": 579.83, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 134.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 289.915}, "hist_stats": {"episode_reward": [579.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 522.0, 510.0, 573.0, 570.0, 573.0, 525.0, 582.0, 579.0, 627.0, 579.0, 525.0, 579.0, 576.0, 576.0, 584.0, 564.0, 582.0, 587.0, 570.0, 627.0, 582.0, 579.0, 579.0, 582.0, 582.0, 582.0, 579.0, 630.0, 579.0, 579.0, 582.0, 627.0, 582.0, 576.0, 570.0, 576.0, 582.0, 573.0, 525.0, 630.0, 579.0, 522.0, 576.0, 630.0, 579.0, 521.0, 570.0, 582.0, 582.0, 576.0, 630.0, 633.0, 576.0, 576.0, 579.0, 582.0, 522.0, 579.0, 579.0, 584.0, 582.0, 627.0, 579.0, 582.0, 636.0, 576.0, 582.0, 587.0, 630.0, 633.0, 570.0, 479.0, 582.0, 587.0, 579.0, 627.0, 285.0, 587.0, 624.0, 584.0, 582.0, 582.0, 579.0, 636.0, 573.0, 636.0, 579.0, 636.0, 579.0, 627.0, 582.0, 587.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 296.0, 286.0, 294.0, 288.0, 292.0, 290.0, 286.0, 293.0, 283.0, 293.0, 288.0, 294.0, 257.0, 265.0, 267.0, 243.0, 291.0, 282.0, 283.0, 287.0, 288.0, 285.0, 267.0, 258.0, 286.0, 296.0, 286.0, 293.0, 308.0, 319.0, 296.0, 283.0, 267.0, 258.0, 296.0, 283.0, 291.0, 285.0, 280.0, 296.0, 293.0, 291.0, 292.0, 272.0, 289.0, 293.0, 296.0, 291.0, 284.0, 286.0, 318.0, 309.0, 293.0, 289.0, 294.0, 285.0, 288.0, 291.0, 292.0, 290.0, 294.0, 288.0, 284.0, 298.0, 288.0, 291.0, 311.0, 319.0, 286.0, 293.0, 288.0, 291.0, 288.0, 294.0, 316.0, 311.0, 284.0, 298.0, 288.0, 288.0, 293.0, 277.0, 294.0, 282.0, 296.0, 286.0, 283.0, 290.0, 265.0, 260.0, 311.0, 319.0, 296.0, 283.0, 257.0, 265.0, 291.0, 285.0, 319.0, 311.0, 292.0, 287.0, 266.0, 255.0, 288.0, 282.0, 296.0, 286.0, 291.0, 291.0, 290.0, 286.0, 311.0, 319.0, 314.0, 319.0, 288.0, 288.0, 278.0, 298.0, 296.0, 283.0, 286.0, 296.0, 258.0, 264.0, 283.0, 296.0, 284.0, 295.0, 298.0, 286.0, 286.0, 296.0, 321.0, 306.0, 296.0, 283.0, 285.0, 297.0, 322.0, 314.0, 286.0, 290.0, 291.0, 291.0, 296.0, 291.0, 317.0, 313.0, 321.0, 312.0, 283.0, 287.0, 248.0, 231.0, 289.0, 293.0, 289.0, 298.0, 293.0, 286.0, 316.0, 311.0, 134.0, 151.0, 292.0, 295.0, 306.0, 318.0, 296.0, 288.0, 292.0, 290.0, 293.0, 289.0, 286.0, 293.0, 327.0, 309.0, 288.0, 285.0, 314.0, 322.0, 291.0, 288.0, 314.0, 322.0, 289.0, 290.0, 311.0, 316.0, 286.0, 296.0, 294.0, 293.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6945383548540814, "mean_inference_ms": 1.2388862595050705, "mean_action_processing_ms": 0.1333267827355028, "mean_env_wait_ms": 0.8364096381922611, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11340800, "num_agent_steps_trained": 11340800, "num_env_steps_sampled": 5670400, "num_env_steps_trained": 5670400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5670400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11340800, "timers": {"training_iteration_time_ms": 3679.648, "learn_time_ms": 1158.316, "learn_throughput": 11050.529, "synch_weights_time_ms": 11.806}, "counters": {"num_env_steps_sampled": 5670400, "num_env_steps_trained": 5670400, "num_agent_steps_sampled": 11340800, "num_agent_steps_trained": 11340800}, "done": false, "episodes_total": 14176, "training_iteration": 443, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-15", "timestamp": 1666582155, "time_this_iter_s": 3.784215211868286, "time_total_s": 1687.9618871212006, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1687.9618871212006, "timesteps_since_restore": 0, "iterations_since_restore": 443, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.98333333333333, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 202.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 180.24, "shaped_reward_min": 85, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.91, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.32, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.84, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.14, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.57, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.93, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.35, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.54, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.37, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.24, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.96, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.57, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.93, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.57, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.93, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002385055646300316, "policy_loss": 0.0020049570593982935, "vf_loss": 7.654664993286133, "vf_explained_var": 0.5863066911697388, "kl": 0.0024675102904438972, "entropy": 0.7707381248474121, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5683200, "num_env_steps_trained": 5683200, "num_agent_steps_sampled": 11366400, "num_agent_steps_trained": 11366400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 285.0, "episode_reward_mean": 585.04, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 134.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 292.52}, "custom_metrics": {"sparse_reward_mean": 202.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 180.24, "shaped_reward_min": 85, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.91, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.32, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.84, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.14, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.57, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.93, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.35, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.54, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.37, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.24, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.96, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.57, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.93, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.57, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.93, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 630.0, 579.0, 579.0, 582.0, 627.0, 582.0, 576.0, 570.0, 576.0, 582.0, 573.0, 525.0, 630.0, 579.0, 522.0, 576.0, 630.0, 579.0, 521.0, 570.0, 582.0, 582.0, 576.0, 630.0, 633.0, 576.0, 576.0, 579.0, 582.0, 522.0, 579.0, 579.0, 584.0, 582.0, 627.0, 579.0, 582.0, 636.0, 576.0, 582.0, 587.0, 630.0, 633.0, 570.0, 479.0, 582.0, 587.0, 579.0, 627.0, 285.0, 587.0, 624.0, 584.0, 582.0, 582.0, 579.0, 636.0, 573.0, 636.0, 579.0, 636.0, 579.0, 627.0, 582.0, 587.0, 582.0, 579.0, 630.0, 579.0, 519.0, 579.0, 573.0, 576.0, 630.0, 630.0, 573.0, 587.0, 582.0, 633.0, 582.0, 627.0, 584.0, 582.0, 567.0, 579.0, 584.0, 587.0, 579.0, 587.0, 630.0, 582.0, 582.0, 579.0, 627.0, 630.0, 582.0, 581.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 298.0, 288.0, 291.0, 311.0, 319.0, 286.0, 293.0, 288.0, 291.0, 288.0, 294.0, 316.0, 311.0, 284.0, 298.0, 288.0, 288.0, 293.0, 277.0, 294.0, 282.0, 296.0, 286.0, 283.0, 290.0, 265.0, 260.0, 311.0, 319.0, 296.0, 283.0, 257.0, 265.0, 291.0, 285.0, 319.0, 311.0, 292.0, 287.0, 266.0, 255.0, 288.0, 282.0, 296.0, 286.0, 291.0, 291.0, 290.0, 286.0, 311.0, 319.0, 314.0, 319.0, 288.0, 288.0, 278.0, 298.0, 296.0, 283.0, 286.0, 296.0, 258.0, 264.0, 283.0, 296.0, 284.0, 295.0, 298.0, 286.0, 286.0, 296.0, 321.0, 306.0, 296.0, 283.0, 285.0, 297.0, 322.0, 314.0, 286.0, 290.0, 291.0, 291.0, 296.0, 291.0, 317.0, 313.0, 321.0, 312.0, 283.0, 287.0, 248.0, 231.0, 289.0, 293.0, 289.0, 298.0, 293.0, 286.0, 316.0, 311.0, 134.0, 151.0, 292.0, 295.0, 306.0, 318.0, 296.0, 288.0, 292.0, 290.0, 293.0, 289.0, 286.0, 293.0, 327.0, 309.0, 288.0, 285.0, 314.0, 322.0, 291.0, 288.0, 314.0, 322.0, 289.0, 290.0, 311.0, 316.0, 286.0, 296.0, 294.0, 293.0, 292.0, 290.0, 291.0, 288.0, 314.0, 316.0, 288.0, 291.0, 256.0, 263.0, 285.0, 294.0, 293.0, 280.0, 291.0, 285.0, 316.0, 314.0, 316.0, 314.0, 291.0, 282.0, 286.0, 301.0, 288.0, 294.0, 316.0, 317.0, 289.0, 293.0, 321.0, 306.0, 296.0, 288.0, 285.0, 297.0, 294.0, 273.0, 287.0, 292.0, 296.0, 288.0, 293.0, 294.0, 296.0, 283.0, 299.0, 288.0, 319.0, 311.0, 293.0, 289.0, 293.0, 289.0, 282.0, 297.0, 308.0, 319.0, 314.0, 316.0, 291.0, 291.0, 283.0, 298.0, 283.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6944838399502834, "mean_inference_ms": 1.2388383439445032, "mean_action_processing_ms": 0.13331991171472862, "mean_env_wait_ms": 0.836413731185545, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 285.0, "episode_reward_mean": 585.04, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 134.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 292.52}, "hist_stats": {"episode_reward": [582.0, 579.0, 630.0, 579.0, 579.0, 582.0, 627.0, 582.0, 576.0, 570.0, 576.0, 582.0, 573.0, 525.0, 630.0, 579.0, 522.0, 576.0, 630.0, 579.0, 521.0, 570.0, 582.0, 582.0, 576.0, 630.0, 633.0, 576.0, 576.0, 579.0, 582.0, 522.0, 579.0, 579.0, 584.0, 582.0, 627.0, 579.0, 582.0, 636.0, 576.0, 582.0, 587.0, 630.0, 633.0, 570.0, 479.0, 582.0, 587.0, 579.0, 627.0, 285.0, 587.0, 624.0, 584.0, 582.0, 582.0, 579.0, 636.0, 573.0, 636.0, 579.0, 636.0, 579.0, 627.0, 582.0, 587.0, 582.0, 579.0, 630.0, 579.0, 519.0, 579.0, 573.0, 576.0, 630.0, 630.0, 573.0, 587.0, 582.0, 633.0, 582.0, 627.0, 584.0, 582.0, 567.0, 579.0, 584.0, 587.0, 579.0, 587.0, 630.0, 582.0, 582.0, 579.0, 627.0, 630.0, 582.0, 581.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 298.0, 288.0, 291.0, 311.0, 319.0, 286.0, 293.0, 288.0, 291.0, 288.0, 294.0, 316.0, 311.0, 284.0, 298.0, 288.0, 288.0, 293.0, 277.0, 294.0, 282.0, 296.0, 286.0, 283.0, 290.0, 265.0, 260.0, 311.0, 319.0, 296.0, 283.0, 257.0, 265.0, 291.0, 285.0, 319.0, 311.0, 292.0, 287.0, 266.0, 255.0, 288.0, 282.0, 296.0, 286.0, 291.0, 291.0, 290.0, 286.0, 311.0, 319.0, 314.0, 319.0, 288.0, 288.0, 278.0, 298.0, 296.0, 283.0, 286.0, 296.0, 258.0, 264.0, 283.0, 296.0, 284.0, 295.0, 298.0, 286.0, 286.0, 296.0, 321.0, 306.0, 296.0, 283.0, 285.0, 297.0, 322.0, 314.0, 286.0, 290.0, 291.0, 291.0, 296.0, 291.0, 317.0, 313.0, 321.0, 312.0, 283.0, 287.0, 248.0, 231.0, 289.0, 293.0, 289.0, 298.0, 293.0, 286.0, 316.0, 311.0, 134.0, 151.0, 292.0, 295.0, 306.0, 318.0, 296.0, 288.0, 292.0, 290.0, 293.0, 289.0, 286.0, 293.0, 327.0, 309.0, 288.0, 285.0, 314.0, 322.0, 291.0, 288.0, 314.0, 322.0, 289.0, 290.0, 311.0, 316.0, 286.0, 296.0, 294.0, 293.0, 292.0, 290.0, 291.0, 288.0, 314.0, 316.0, 288.0, 291.0, 256.0, 263.0, 285.0, 294.0, 293.0, 280.0, 291.0, 285.0, 316.0, 314.0, 316.0, 314.0, 291.0, 282.0, 286.0, 301.0, 288.0, 294.0, 316.0, 317.0, 289.0, 293.0, 321.0, 306.0, 296.0, 288.0, 285.0, 297.0, 294.0, 273.0, 287.0, 292.0, 296.0, 288.0, 293.0, 294.0, 296.0, 283.0, 299.0, 288.0, 319.0, 311.0, 293.0, 289.0, 293.0, 289.0, 282.0, 297.0, 308.0, 319.0, 314.0, 316.0, 291.0, 291.0, 283.0, 298.0, 283.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6944838399502834, "mean_inference_ms": 1.2388383439445032, "mean_action_processing_ms": 0.13331991171472862, "mean_env_wait_ms": 0.836413731185545, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11366400, "num_agent_steps_trained": 11366400, "num_env_steps_sampled": 5683200, "num_env_steps_trained": 5683200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5683200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11366400, "timers": {"training_iteration_time_ms": 3681.302, "learn_time_ms": 1161.467, "learn_throughput": 11020.542, "synch_weights_time_ms": 11.211}, "counters": {"num_env_steps_sampled": 5683200, "num_env_steps_trained": 5683200, "num_agent_steps_sampled": 11366400, "num_agent_steps_trained": 11366400}, "done": false, "episodes_total": 14208, "training_iteration": 444, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-19", "timestamp": 1666582159, "time_this_iter_s": 3.6792352199554443, "time_total_s": 1691.641122341156, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1691.641122341156, "timesteps_since_restore": 0, "iterations_since_restore": 444, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.34, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 203.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 181.56, "shaped_reward_min": 85, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.87, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.55, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.77, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.39, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.46, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.2, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.47, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.51, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.26, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.35, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.22, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.14, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.46, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.2, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.46, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.2, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0007226134184747934, "policy_loss": -0.0011081930715590715, "vf_loss": 7.645465850830078, "vf_explained_var": 0.5968180894851685, "kl": 0.002879057079553604, "entropy": 0.7579330801963806, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5696000, "num_env_steps_trained": 5696000, "num_agent_steps_sampled": 11392000, "num_agent_steps_trained": 11392000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 285.0, "episode_reward_mean": 587.96, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 134.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 293.98}, "custom_metrics": {"sparse_reward_mean": 203.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 181.56, "shaped_reward_min": 85, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.87, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.55, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.77, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.39, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.46, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.2, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.47, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.51, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.26, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.35, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.22, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.14, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.46, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.2, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.46, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.2, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 584.0, 582.0, 627.0, 579.0, 582.0, 636.0, 576.0, 582.0, 587.0, 630.0, 633.0, 570.0, 479.0, 582.0, 587.0, 579.0, 627.0, 285.0, 587.0, 624.0, 584.0, 582.0, 582.0, 579.0, 636.0, 573.0, 636.0, 579.0, 636.0, 579.0, 627.0, 582.0, 587.0, 582.0, 579.0, 630.0, 579.0, 519.0, 579.0, 573.0, 576.0, 630.0, 630.0, 573.0, 587.0, 582.0, 633.0, 582.0, 627.0, 584.0, 582.0, 567.0, 579.0, 584.0, 587.0, 579.0, 587.0, 630.0, 582.0, 582.0, 579.0, 627.0, 630.0, 582.0, 581.0, 576.0, 587.0, 582.0, 576.0, 579.0, 627.0, 630.0, 519.0, 633.0, 627.0, 579.0, 579.0, 636.0, 582.0, 576.0, 633.0, 627.0, 573.0, 582.0, 630.0, 582.0, 582.0, 587.0, 530.0, 627.0, 582.0, 579.0, 582.0, 587.0, 627.0, 582.0, 436.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 284.0, 295.0, 298.0, 286.0, 286.0, 296.0, 321.0, 306.0, 296.0, 283.0, 285.0, 297.0, 322.0, 314.0, 286.0, 290.0, 291.0, 291.0, 296.0, 291.0, 317.0, 313.0, 321.0, 312.0, 283.0, 287.0, 248.0, 231.0, 289.0, 293.0, 289.0, 298.0, 293.0, 286.0, 316.0, 311.0, 134.0, 151.0, 292.0, 295.0, 306.0, 318.0, 296.0, 288.0, 292.0, 290.0, 293.0, 289.0, 286.0, 293.0, 327.0, 309.0, 288.0, 285.0, 314.0, 322.0, 291.0, 288.0, 314.0, 322.0, 289.0, 290.0, 311.0, 316.0, 286.0, 296.0, 294.0, 293.0, 292.0, 290.0, 291.0, 288.0, 314.0, 316.0, 288.0, 291.0, 256.0, 263.0, 285.0, 294.0, 293.0, 280.0, 291.0, 285.0, 316.0, 314.0, 316.0, 314.0, 291.0, 282.0, 286.0, 301.0, 288.0, 294.0, 316.0, 317.0, 289.0, 293.0, 321.0, 306.0, 296.0, 288.0, 285.0, 297.0, 294.0, 273.0, 287.0, 292.0, 296.0, 288.0, 293.0, 294.0, 296.0, 283.0, 299.0, 288.0, 319.0, 311.0, 293.0, 289.0, 293.0, 289.0, 282.0, 297.0, 308.0, 319.0, 314.0, 316.0, 291.0, 291.0, 283.0, 298.0, 283.0, 293.0, 296.0, 291.0, 293.0, 289.0, 285.0, 291.0, 291.0, 288.0, 314.0, 313.0, 319.0, 311.0, 266.0, 253.0, 322.0, 311.0, 318.0, 309.0, 284.0, 295.0, 287.0, 292.0, 314.0, 322.0, 291.0, 291.0, 283.0, 293.0, 308.0, 325.0, 306.0, 321.0, 285.0, 288.0, 289.0, 293.0, 319.0, 311.0, 295.0, 287.0, 288.0, 294.0, 283.0, 304.0, 270.0, 260.0, 318.0, 309.0, 289.0, 293.0, 293.0, 286.0, 291.0, 291.0, 299.0, 288.0, 313.0, 314.0, 291.0, 291.0, 223.0, 213.0, 316.0, 323.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.694432246453092, "mean_inference_ms": 1.2387215474986069, "mean_action_processing_ms": 0.13331381116020222, "mean_env_wait_ms": 0.8363609822221093, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 285.0, "episode_reward_mean": 587.96, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 134.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 293.98}, "hist_stats": {"episode_reward": [579.0, 579.0, 584.0, 582.0, 627.0, 579.0, 582.0, 636.0, 576.0, 582.0, 587.0, 630.0, 633.0, 570.0, 479.0, 582.0, 587.0, 579.0, 627.0, 285.0, 587.0, 624.0, 584.0, 582.0, 582.0, 579.0, 636.0, 573.0, 636.0, 579.0, 636.0, 579.0, 627.0, 582.0, 587.0, 582.0, 579.0, 630.0, 579.0, 519.0, 579.0, 573.0, 576.0, 630.0, 630.0, 573.0, 587.0, 582.0, 633.0, 582.0, 627.0, 584.0, 582.0, 567.0, 579.0, 584.0, 587.0, 579.0, 587.0, 630.0, 582.0, 582.0, 579.0, 627.0, 630.0, 582.0, 581.0, 576.0, 587.0, 582.0, 576.0, 579.0, 627.0, 630.0, 519.0, 633.0, 627.0, 579.0, 579.0, 636.0, 582.0, 576.0, 633.0, 627.0, 573.0, 582.0, 630.0, 582.0, 582.0, 587.0, 530.0, 627.0, 582.0, 579.0, 582.0, 587.0, 627.0, 582.0, 436.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 284.0, 295.0, 298.0, 286.0, 286.0, 296.0, 321.0, 306.0, 296.0, 283.0, 285.0, 297.0, 322.0, 314.0, 286.0, 290.0, 291.0, 291.0, 296.0, 291.0, 317.0, 313.0, 321.0, 312.0, 283.0, 287.0, 248.0, 231.0, 289.0, 293.0, 289.0, 298.0, 293.0, 286.0, 316.0, 311.0, 134.0, 151.0, 292.0, 295.0, 306.0, 318.0, 296.0, 288.0, 292.0, 290.0, 293.0, 289.0, 286.0, 293.0, 327.0, 309.0, 288.0, 285.0, 314.0, 322.0, 291.0, 288.0, 314.0, 322.0, 289.0, 290.0, 311.0, 316.0, 286.0, 296.0, 294.0, 293.0, 292.0, 290.0, 291.0, 288.0, 314.0, 316.0, 288.0, 291.0, 256.0, 263.0, 285.0, 294.0, 293.0, 280.0, 291.0, 285.0, 316.0, 314.0, 316.0, 314.0, 291.0, 282.0, 286.0, 301.0, 288.0, 294.0, 316.0, 317.0, 289.0, 293.0, 321.0, 306.0, 296.0, 288.0, 285.0, 297.0, 294.0, 273.0, 287.0, 292.0, 296.0, 288.0, 293.0, 294.0, 296.0, 283.0, 299.0, 288.0, 319.0, 311.0, 293.0, 289.0, 293.0, 289.0, 282.0, 297.0, 308.0, 319.0, 314.0, 316.0, 291.0, 291.0, 283.0, 298.0, 283.0, 293.0, 296.0, 291.0, 293.0, 289.0, 285.0, 291.0, 291.0, 288.0, 314.0, 313.0, 319.0, 311.0, 266.0, 253.0, 322.0, 311.0, 318.0, 309.0, 284.0, 295.0, 287.0, 292.0, 314.0, 322.0, 291.0, 291.0, 283.0, 293.0, 308.0, 325.0, 306.0, 321.0, 285.0, 288.0, 289.0, 293.0, 319.0, 311.0, 295.0, 287.0, 288.0, 294.0, 283.0, 304.0, 270.0, 260.0, 318.0, 309.0, 289.0, 293.0, 293.0, 286.0, 291.0, 291.0, 299.0, 288.0, 313.0, 314.0, 291.0, 291.0, 223.0, 213.0, 316.0, 323.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.694432246453092, "mean_inference_ms": 1.2387215474986069, "mean_action_processing_ms": 0.13331381116020222, "mean_env_wait_ms": 0.8363609822221093, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11392000, "num_agent_steps_trained": 11392000, "num_env_steps_sampled": 5696000, "num_env_steps_trained": 5696000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5696000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11392000, "timers": {"training_iteration_time_ms": 3686.651, "learn_time_ms": 1166.893, "learn_throughput": 10969.296, "synch_weights_time_ms": 10.581}, "counters": {"num_env_steps_sampled": 5696000, "num_env_steps_trained": 5696000, "num_agent_steps_sampled": 11392000, "num_agent_steps_trained": 11392000}, "done": false, "episodes_total": 14240, "training_iteration": 445, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-22", "timestamp": 1666582162, "time_this_iter_s": 3.789203643798828, "time_total_s": 1695.4303259849548, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1695.4303259849548, "timesteps_since_restore": 0, "iterations_since_restore": 445, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.633333333333333, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 205.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 183.52, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.87, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.93, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.76, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.83, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.47, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.58, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.63, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.42, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.32, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.24, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.2, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.47, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.58, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.47, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.58, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002931396011263132, "policy_loss": -0.0033237093593925238, "vf_loss": 7.657073497772217, "vf_explained_var": 0.5952635407447815, "kl": 0.002693354617804289, "entropy": 0.7467869520187378, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5708800, "num_env_steps_trained": 5708800, "num_agent_steps_sampled": 11417600, "num_agent_steps_trained": 11417600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 436.0, "episode_reward_mean": 595.12, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 213.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 297.56}, "custom_metrics": {"sparse_reward_mean": 205.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 183.52, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.87, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.93, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.76, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.83, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.47, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.58, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.63, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.42, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.32, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.24, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.2, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.47, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.58, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.47, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.58, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 582.0, 587.0, 582.0, 579.0, 630.0, 579.0, 519.0, 579.0, 573.0, 576.0, 630.0, 630.0, 573.0, 587.0, 582.0, 633.0, 582.0, 627.0, 584.0, 582.0, 567.0, 579.0, 584.0, 587.0, 579.0, 587.0, 630.0, 582.0, 582.0, 579.0, 627.0, 630.0, 582.0, 581.0, 576.0, 587.0, 582.0, 576.0, 579.0, 627.0, 630.0, 519.0, 633.0, 627.0, 579.0, 579.0, 636.0, 582.0, 576.0, 633.0, 627.0, 573.0, 582.0, 630.0, 582.0, 582.0, 587.0, 530.0, 627.0, 582.0, 579.0, 582.0, 587.0, 627.0, 582.0, 436.0, 639.0, 576.0, 582.0, 576.0, 516.0, 636.0, 627.0, 579.0, 582.0, 633.0, 582.0, 639.0, 582.0, 639.0, 636.0, 582.0, 587.0, 627.0, 633.0, 573.0, 579.0, 579.0, 582.0, 636.0, 636.0, 636.0, 633.0, 630.0, 582.0, 582.0, 630.0, 636.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 316.0, 286.0, 296.0, 294.0, 293.0, 292.0, 290.0, 291.0, 288.0, 314.0, 316.0, 288.0, 291.0, 256.0, 263.0, 285.0, 294.0, 293.0, 280.0, 291.0, 285.0, 316.0, 314.0, 316.0, 314.0, 291.0, 282.0, 286.0, 301.0, 288.0, 294.0, 316.0, 317.0, 289.0, 293.0, 321.0, 306.0, 296.0, 288.0, 285.0, 297.0, 294.0, 273.0, 287.0, 292.0, 296.0, 288.0, 293.0, 294.0, 296.0, 283.0, 299.0, 288.0, 319.0, 311.0, 293.0, 289.0, 293.0, 289.0, 282.0, 297.0, 308.0, 319.0, 314.0, 316.0, 291.0, 291.0, 283.0, 298.0, 283.0, 293.0, 296.0, 291.0, 293.0, 289.0, 285.0, 291.0, 291.0, 288.0, 314.0, 313.0, 319.0, 311.0, 266.0, 253.0, 322.0, 311.0, 318.0, 309.0, 284.0, 295.0, 287.0, 292.0, 314.0, 322.0, 291.0, 291.0, 283.0, 293.0, 308.0, 325.0, 306.0, 321.0, 285.0, 288.0, 289.0, 293.0, 319.0, 311.0, 295.0, 287.0, 288.0, 294.0, 283.0, 304.0, 270.0, 260.0, 318.0, 309.0, 289.0, 293.0, 293.0, 286.0, 291.0, 291.0, 299.0, 288.0, 313.0, 314.0, 291.0, 291.0, 223.0, 213.0, 316.0, 323.0, 284.0, 292.0, 292.0, 290.0, 289.0, 287.0, 250.0, 266.0, 317.0, 319.0, 319.0, 308.0, 291.0, 288.0, 289.0, 293.0, 317.0, 316.0, 291.0, 291.0, 320.0, 319.0, 296.0, 286.0, 319.0, 320.0, 321.0, 315.0, 292.0, 290.0, 291.0, 296.0, 321.0, 306.0, 316.0, 317.0, 293.0, 280.0, 288.0, 291.0, 293.0, 286.0, 291.0, 291.0, 316.0, 320.0, 317.0, 319.0, 317.0, 319.0, 314.0, 319.0, 316.0, 314.0, 293.0, 289.0, 288.0, 294.0, 318.0, 312.0, 317.0, 319.0, 316.0, 314.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6943827201604756, "mean_inference_ms": 1.2386117672157435, "mean_action_processing_ms": 0.13330709458738682, "mean_env_wait_ms": 0.8362894782649013, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 436.0, "episode_reward_mean": 595.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 213.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 297.56}, "hist_stats": {"episode_reward": [627.0, 582.0, 587.0, 582.0, 579.0, 630.0, 579.0, 519.0, 579.0, 573.0, 576.0, 630.0, 630.0, 573.0, 587.0, 582.0, 633.0, 582.0, 627.0, 584.0, 582.0, 567.0, 579.0, 584.0, 587.0, 579.0, 587.0, 630.0, 582.0, 582.0, 579.0, 627.0, 630.0, 582.0, 581.0, 576.0, 587.0, 582.0, 576.0, 579.0, 627.0, 630.0, 519.0, 633.0, 627.0, 579.0, 579.0, 636.0, 582.0, 576.0, 633.0, 627.0, 573.0, 582.0, 630.0, 582.0, 582.0, 587.0, 530.0, 627.0, 582.0, 579.0, 582.0, 587.0, 627.0, 582.0, 436.0, 639.0, 576.0, 582.0, 576.0, 516.0, 636.0, 627.0, 579.0, 582.0, 633.0, 582.0, 639.0, 582.0, 639.0, 636.0, 582.0, 587.0, 627.0, 633.0, 573.0, 579.0, 579.0, 582.0, 636.0, 636.0, 636.0, 633.0, 630.0, 582.0, 582.0, 630.0, 636.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 316.0, 286.0, 296.0, 294.0, 293.0, 292.0, 290.0, 291.0, 288.0, 314.0, 316.0, 288.0, 291.0, 256.0, 263.0, 285.0, 294.0, 293.0, 280.0, 291.0, 285.0, 316.0, 314.0, 316.0, 314.0, 291.0, 282.0, 286.0, 301.0, 288.0, 294.0, 316.0, 317.0, 289.0, 293.0, 321.0, 306.0, 296.0, 288.0, 285.0, 297.0, 294.0, 273.0, 287.0, 292.0, 296.0, 288.0, 293.0, 294.0, 296.0, 283.0, 299.0, 288.0, 319.0, 311.0, 293.0, 289.0, 293.0, 289.0, 282.0, 297.0, 308.0, 319.0, 314.0, 316.0, 291.0, 291.0, 283.0, 298.0, 283.0, 293.0, 296.0, 291.0, 293.0, 289.0, 285.0, 291.0, 291.0, 288.0, 314.0, 313.0, 319.0, 311.0, 266.0, 253.0, 322.0, 311.0, 318.0, 309.0, 284.0, 295.0, 287.0, 292.0, 314.0, 322.0, 291.0, 291.0, 283.0, 293.0, 308.0, 325.0, 306.0, 321.0, 285.0, 288.0, 289.0, 293.0, 319.0, 311.0, 295.0, 287.0, 288.0, 294.0, 283.0, 304.0, 270.0, 260.0, 318.0, 309.0, 289.0, 293.0, 293.0, 286.0, 291.0, 291.0, 299.0, 288.0, 313.0, 314.0, 291.0, 291.0, 223.0, 213.0, 316.0, 323.0, 284.0, 292.0, 292.0, 290.0, 289.0, 287.0, 250.0, 266.0, 317.0, 319.0, 319.0, 308.0, 291.0, 288.0, 289.0, 293.0, 317.0, 316.0, 291.0, 291.0, 320.0, 319.0, 296.0, 286.0, 319.0, 320.0, 321.0, 315.0, 292.0, 290.0, 291.0, 296.0, 321.0, 306.0, 316.0, 317.0, 293.0, 280.0, 288.0, 291.0, 293.0, 286.0, 291.0, 291.0, 316.0, 320.0, 317.0, 319.0, 317.0, 319.0, 314.0, 319.0, 316.0, 314.0, 293.0, 289.0, 288.0, 294.0, 318.0, 312.0, 317.0, 319.0, 316.0, 314.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6943827201604756, "mean_inference_ms": 1.2386117672157435, "mean_action_processing_ms": 0.13330709458738682, "mean_env_wait_ms": 0.8362894782649013, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11417600, "num_agent_steps_trained": 11417600, "num_env_steps_sampled": 5708800, "num_env_steps_trained": 5708800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5708800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11417600, "timers": {"training_iteration_time_ms": 3674.432, "learn_time_ms": 1151.071, "learn_throughput": 11120.082, "synch_weights_time_ms": 10.502}, "counters": {"num_env_steps_sampled": 5708800, "num_env_steps_trained": 5708800, "num_agent_steps_sampled": 11417600, "num_agent_steps_trained": 11417600}, "done": false, "episodes_total": 14272, "training_iteration": 446, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-26", "timestamp": 1666582166, "time_this_iter_s": 3.600297212600708, "time_total_s": 1699.0306231975555, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1699.0306231975555, "timesteps_since_restore": 0, "iterations_since_restore": 446, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.720000000000002, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 204.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 182.27, "shaped_reward_min": 54, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.78, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.82, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.68, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.72, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.35, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.5, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.55, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.27, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.17, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.15, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.35, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.5, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.35, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.5, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0001785162603482604, "policy_loss": -0.0005822917446494102, "vf_loss": 7.868484020233154, "vf_explained_var": 0.5918752551078796, "kl": 0.003730412572622299, "entropy": 0.7661446928977966, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5721600, "num_env_steps_trained": 5721600, "num_agent_steps_sampled": 11443200, "num_agent_steps_trained": 11443200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 174.0, "episode_reward_mean": 591.87, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 295.935}, "custom_metrics": {"sparse_reward_mean": 204.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 182.27, "shaped_reward_min": 54, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.78, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.82, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.68, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.72, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.35, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.5, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.55, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.27, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.17, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.15, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.35, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.5, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.35, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.5, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 581.0, 576.0, 587.0, 582.0, 576.0, 579.0, 627.0, 630.0, 519.0, 633.0, 627.0, 579.0, 579.0, 636.0, 582.0, 576.0, 633.0, 627.0, 573.0, 582.0, 630.0, 582.0, 582.0, 587.0, 530.0, 627.0, 582.0, 579.0, 582.0, 587.0, 627.0, 582.0, 436.0, 639.0, 576.0, 582.0, 576.0, 516.0, 636.0, 627.0, 579.0, 582.0, 633.0, 582.0, 639.0, 582.0, 639.0, 636.0, 582.0, 587.0, 627.0, 633.0, 573.0, 579.0, 579.0, 582.0, 636.0, 636.0, 636.0, 633.0, 630.0, 582.0, 582.0, 630.0, 636.0, 630.0, 174.0, 576.0, 576.0, 636.0, 624.0, 576.0, 527.0, 630.0, 576.0, 579.0, 587.0, 573.0, 582.0, 573.0, 633.0, 579.0, 630.0, 579.0, 582.0, 636.0, 636.0, 587.0, 630.0, 630.0, 633.0, 582.0, 593.0, 633.0, 570.0, 582.0, 510.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 316.0, 291.0, 291.0, 283.0, 298.0, 283.0, 293.0, 296.0, 291.0, 293.0, 289.0, 285.0, 291.0, 291.0, 288.0, 314.0, 313.0, 319.0, 311.0, 266.0, 253.0, 322.0, 311.0, 318.0, 309.0, 284.0, 295.0, 287.0, 292.0, 314.0, 322.0, 291.0, 291.0, 283.0, 293.0, 308.0, 325.0, 306.0, 321.0, 285.0, 288.0, 289.0, 293.0, 319.0, 311.0, 295.0, 287.0, 288.0, 294.0, 283.0, 304.0, 270.0, 260.0, 318.0, 309.0, 289.0, 293.0, 293.0, 286.0, 291.0, 291.0, 299.0, 288.0, 313.0, 314.0, 291.0, 291.0, 223.0, 213.0, 316.0, 323.0, 284.0, 292.0, 292.0, 290.0, 289.0, 287.0, 250.0, 266.0, 317.0, 319.0, 319.0, 308.0, 291.0, 288.0, 289.0, 293.0, 317.0, 316.0, 291.0, 291.0, 320.0, 319.0, 296.0, 286.0, 319.0, 320.0, 321.0, 315.0, 292.0, 290.0, 291.0, 296.0, 321.0, 306.0, 316.0, 317.0, 293.0, 280.0, 288.0, 291.0, 293.0, 286.0, 291.0, 291.0, 316.0, 320.0, 317.0, 319.0, 317.0, 319.0, 314.0, 319.0, 316.0, 314.0, 293.0, 289.0, 288.0, 294.0, 318.0, 312.0, 317.0, 319.0, 316.0, 314.0, 91.0, 83.0, 289.0, 287.0, 283.0, 293.0, 316.0, 320.0, 302.0, 322.0, 293.0, 283.0, 267.0, 260.0, 313.0, 317.0, 287.0, 289.0, 301.0, 278.0, 291.0, 296.0, 282.0, 291.0, 294.0, 288.0, 290.0, 283.0, 312.0, 321.0, 291.0, 288.0, 311.0, 319.0, 286.0, 293.0, 291.0, 291.0, 317.0, 319.0, 317.0, 319.0, 297.0, 290.0, 319.0, 311.0, 313.0, 317.0, 319.0, 314.0, 289.0, 293.0, 296.0, 297.0, 320.0, 313.0, 283.0, 287.0, 294.0, 288.0, 252.0, 258.0, 273.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6943396641366125, "mean_inference_ms": 1.238497366545473, "mean_action_processing_ms": 0.13330002318819154, "mean_env_wait_ms": 0.8362190938658364, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 174.0, "episode_reward_mean": 591.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 295.935}, "hist_stats": {"episode_reward": [630.0, 582.0, 581.0, 576.0, 587.0, 582.0, 576.0, 579.0, 627.0, 630.0, 519.0, 633.0, 627.0, 579.0, 579.0, 636.0, 582.0, 576.0, 633.0, 627.0, 573.0, 582.0, 630.0, 582.0, 582.0, 587.0, 530.0, 627.0, 582.0, 579.0, 582.0, 587.0, 627.0, 582.0, 436.0, 639.0, 576.0, 582.0, 576.0, 516.0, 636.0, 627.0, 579.0, 582.0, 633.0, 582.0, 639.0, 582.0, 639.0, 636.0, 582.0, 587.0, 627.0, 633.0, 573.0, 579.0, 579.0, 582.0, 636.0, 636.0, 636.0, 633.0, 630.0, 582.0, 582.0, 630.0, 636.0, 630.0, 174.0, 576.0, 576.0, 636.0, 624.0, 576.0, 527.0, 630.0, 576.0, 579.0, 587.0, 573.0, 582.0, 573.0, 633.0, 579.0, 630.0, 579.0, 582.0, 636.0, 636.0, 587.0, 630.0, 630.0, 633.0, 582.0, 593.0, 633.0, 570.0, 582.0, 510.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 316.0, 291.0, 291.0, 283.0, 298.0, 283.0, 293.0, 296.0, 291.0, 293.0, 289.0, 285.0, 291.0, 291.0, 288.0, 314.0, 313.0, 319.0, 311.0, 266.0, 253.0, 322.0, 311.0, 318.0, 309.0, 284.0, 295.0, 287.0, 292.0, 314.0, 322.0, 291.0, 291.0, 283.0, 293.0, 308.0, 325.0, 306.0, 321.0, 285.0, 288.0, 289.0, 293.0, 319.0, 311.0, 295.0, 287.0, 288.0, 294.0, 283.0, 304.0, 270.0, 260.0, 318.0, 309.0, 289.0, 293.0, 293.0, 286.0, 291.0, 291.0, 299.0, 288.0, 313.0, 314.0, 291.0, 291.0, 223.0, 213.0, 316.0, 323.0, 284.0, 292.0, 292.0, 290.0, 289.0, 287.0, 250.0, 266.0, 317.0, 319.0, 319.0, 308.0, 291.0, 288.0, 289.0, 293.0, 317.0, 316.0, 291.0, 291.0, 320.0, 319.0, 296.0, 286.0, 319.0, 320.0, 321.0, 315.0, 292.0, 290.0, 291.0, 296.0, 321.0, 306.0, 316.0, 317.0, 293.0, 280.0, 288.0, 291.0, 293.0, 286.0, 291.0, 291.0, 316.0, 320.0, 317.0, 319.0, 317.0, 319.0, 314.0, 319.0, 316.0, 314.0, 293.0, 289.0, 288.0, 294.0, 318.0, 312.0, 317.0, 319.0, 316.0, 314.0, 91.0, 83.0, 289.0, 287.0, 283.0, 293.0, 316.0, 320.0, 302.0, 322.0, 293.0, 283.0, 267.0, 260.0, 313.0, 317.0, 287.0, 289.0, 301.0, 278.0, 291.0, 296.0, 282.0, 291.0, 294.0, 288.0, 290.0, 283.0, 312.0, 321.0, 291.0, 288.0, 311.0, 319.0, 286.0, 293.0, 291.0, 291.0, 317.0, 319.0, 317.0, 319.0, 297.0, 290.0, 319.0, 311.0, 313.0, 317.0, 319.0, 314.0, 289.0, 293.0, 296.0, 297.0, 320.0, 313.0, 283.0, 287.0, 294.0, 288.0, 252.0, 258.0, 273.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6943396641366125, "mean_inference_ms": 1.238497366545473, "mean_action_processing_ms": 0.13330002318819154, "mean_env_wait_ms": 0.8362190938658364, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11443200, "num_agent_steps_trained": 11443200, "num_env_steps_sampled": 5721600, "num_env_steps_trained": 5721600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5721600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11443200, "timers": {"training_iteration_time_ms": 3671.393, "learn_time_ms": 1148.433, "learn_throughput": 11145.621, "synch_weights_time_ms": 10.642}, "counters": {"num_env_steps_sampled": 5721600, "num_env_steps_trained": 5721600, "num_agent_steps_sampled": 11443200, "num_agent_steps_trained": 11443200}, "done": false, "episodes_total": 14304, "training_iteration": 447, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-30", "timestamp": 1666582170, "time_this_iter_s": 3.6180174350738525, "time_total_s": 1702.6486406326294, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1702.6486406326294, "timesteps_since_restore": 0, "iterations_since_restore": 447, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.45, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 205.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 182.55, "shaped_reward_min": 54, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.95, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 16.55, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.88, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 16.4, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.64, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.21, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.55, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.36, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.11, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.27, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.22, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.64, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.21, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.64, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.21, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002196711488068104, "policy_loss": 0.001786647131666541, "vf_loss": 7.865475177764893, "vf_explained_var": 0.5819615721702576, "kl": 0.002893120050430298, "entropy": 0.7529665231704712, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5734400, "num_env_steps_trained": 5734400, "num_agent_steps_sampled": 11468800, "num_agent_steps_trained": 11468800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 174.0, "episode_reward_mean": 592.55, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 296.275}, "custom_metrics": {"sparse_reward_mean": 205.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 182.55, "shaped_reward_min": 54, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.95, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 16.55, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.88, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 16.4, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.64, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.21, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.55, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.36, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.11, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.27, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.22, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.64, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.21, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.64, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.21, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 582.0, 436.0, 639.0, 576.0, 582.0, 576.0, 516.0, 636.0, 627.0, 579.0, 582.0, 633.0, 582.0, 639.0, 582.0, 639.0, 636.0, 582.0, 587.0, 627.0, 633.0, 573.0, 579.0, 579.0, 582.0, 636.0, 636.0, 636.0, 633.0, 630.0, 582.0, 582.0, 630.0, 636.0, 630.0, 174.0, 576.0, 576.0, 636.0, 624.0, 576.0, 527.0, 630.0, 576.0, 579.0, 587.0, 573.0, 582.0, 573.0, 633.0, 579.0, 630.0, 579.0, 582.0, 636.0, 636.0, 587.0, 630.0, 630.0, 633.0, 582.0, 593.0, 633.0, 570.0, 582.0, 510.0, 567.0, 627.0, 639.0, 587.0, 582.0, 587.0, 587.0, 579.0, 624.0, 627.0, 582.0, 582.0, 587.0, 633.0, 576.0, 573.0, 519.0, 582.0, 582.0, 579.0, 582.0, 630.0, 582.0, 633.0, 627.0, 582.0, 633.0, 582.0, 627.0, 570.0, 587.0, 527.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 291.0, 291.0, 223.0, 213.0, 316.0, 323.0, 284.0, 292.0, 292.0, 290.0, 289.0, 287.0, 250.0, 266.0, 317.0, 319.0, 319.0, 308.0, 291.0, 288.0, 289.0, 293.0, 317.0, 316.0, 291.0, 291.0, 320.0, 319.0, 296.0, 286.0, 319.0, 320.0, 321.0, 315.0, 292.0, 290.0, 291.0, 296.0, 321.0, 306.0, 316.0, 317.0, 293.0, 280.0, 288.0, 291.0, 293.0, 286.0, 291.0, 291.0, 316.0, 320.0, 317.0, 319.0, 317.0, 319.0, 314.0, 319.0, 316.0, 314.0, 293.0, 289.0, 288.0, 294.0, 318.0, 312.0, 317.0, 319.0, 316.0, 314.0, 91.0, 83.0, 289.0, 287.0, 283.0, 293.0, 316.0, 320.0, 302.0, 322.0, 293.0, 283.0, 267.0, 260.0, 313.0, 317.0, 287.0, 289.0, 301.0, 278.0, 291.0, 296.0, 282.0, 291.0, 294.0, 288.0, 290.0, 283.0, 312.0, 321.0, 291.0, 288.0, 311.0, 319.0, 286.0, 293.0, 291.0, 291.0, 317.0, 319.0, 317.0, 319.0, 297.0, 290.0, 319.0, 311.0, 313.0, 317.0, 319.0, 314.0, 289.0, 293.0, 296.0, 297.0, 320.0, 313.0, 283.0, 287.0, 294.0, 288.0, 252.0, 258.0, 273.0, 294.0, 308.0, 319.0, 323.0, 316.0, 294.0, 293.0, 291.0, 291.0, 293.0, 294.0, 283.0, 304.0, 294.0, 285.0, 325.0, 299.0, 311.0, 316.0, 295.0, 287.0, 294.0, 288.0, 294.0, 293.0, 322.0, 311.0, 283.0, 293.0, 280.0, 293.0, 263.0, 256.0, 286.0, 296.0, 293.0, 289.0, 283.0, 296.0, 298.0, 284.0, 319.0, 311.0, 300.0, 282.0, 320.0, 313.0, 321.0, 306.0, 288.0, 294.0, 322.0, 311.0, 289.0, 293.0, 311.0, 316.0, 276.0, 294.0, 292.0, 295.0, 265.0, 262.0, 319.0, 317.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6942923385290828, "mean_inference_ms": 1.2383771063815237, "mean_action_processing_ms": 0.13329224665823974, "mean_env_wait_ms": 0.8361472055742027, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 174.0, "episode_reward_mean": 592.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 296.275}, "hist_stats": {"episode_reward": [627.0, 582.0, 436.0, 639.0, 576.0, 582.0, 576.0, 516.0, 636.0, 627.0, 579.0, 582.0, 633.0, 582.0, 639.0, 582.0, 639.0, 636.0, 582.0, 587.0, 627.0, 633.0, 573.0, 579.0, 579.0, 582.0, 636.0, 636.0, 636.0, 633.0, 630.0, 582.0, 582.0, 630.0, 636.0, 630.0, 174.0, 576.0, 576.0, 636.0, 624.0, 576.0, 527.0, 630.0, 576.0, 579.0, 587.0, 573.0, 582.0, 573.0, 633.0, 579.0, 630.0, 579.0, 582.0, 636.0, 636.0, 587.0, 630.0, 630.0, 633.0, 582.0, 593.0, 633.0, 570.0, 582.0, 510.0, 567.0, 627.0, 639.0, 587.0, 582.0, 587.0, 587.0, 579.0, 624.0, 627.0, 582.0, 582.0, 587.0, 633.0, 576.0, 573.0, 519.0, 582.0, 582.0, 579.0, 582.0, 630.0, 582.0, 633.0, 627.0, 582.0, 633.0, 582.0, 627.0, 570.0, 587.0, 527.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 291.0, 291.0, 223.0, 213.0, 316.0, 323.0, 284.0, 292.0, 292.0, 290.0, 289.0, 287.0, 250.0, 266.0, 317.0, 319.0, 319.0, 308.0, 291.0, 288.0, 289.0, 293.0, 317.0, 316.0, 291.0, 291.0, 320.0, 319.0, 296.0, 286.0, 319.0, 320.0, 321.0, 315.0, 292.0, 290.0, 291.0, 296.0, 321.0, 306.0, 316.0, 317.0, 293.0, 280.0, 288.0, 291.0, 293.0, 286.0, 291.0, 291.0, 316.0, 320.0, 317.0, 319.0, 317.0, 319.0, 314.0, 319.0, 316.0, 314.0, 293.0, 289.0, 288.0, 294.0, 318.0, 312.0, 317.0, 319.0, 316.0, 314.0, 91.0, 83.0, 289.0, 287.0, 283.0, 293.0, 316.0, 320.0, 302.0, 322.0, 293.0, 283.0, 267.0, 260.0, 313.0, 317.0, 287.0, 289.0, 301.0, 278.0, 291.0, 296.0, 282.0, 291.0, 294.0, 288.0, 290.0, 283.0, 312.0, 321.0, 291.0, 288.0, 311.0, 319.0, 286.0, 293.0, 291.0, 291.0, 317.0, 319.0, 317.0, 319.0, 297.0, 290.0, 319.0, 311.0, 313.0, 317.0, 319.0, 314.0, 289.0, 293.0, 296.0, 297.0, 320.0, 313.0, 283.0, 287.0, 294.0, 288.0, 252.0, 258.0, 273.0, 294.0, 308.0, 319.0, 323.0, 316.0, 294.0, 293.0, 291.0, 291.0, 293.0, 294.0, 283.0, 304.0, 294.0, 285.0, 325.0, 299.0, 311.0, 316.0, 295.0, 287.0, 294.0, 288.0, 294.0, 293.0, 322.0, 311.0, 283.0, 293.0, 280.0, 293.0, 263.0, 256.0, 286.0, 296.0, 293.0, 289.0, 283.0, 296.0, 298.0, 284.0, 319.0, 311.0, 300.0, 282.0, 320.0, 313.0, 321.0, 306.0, 288.0, 294.0, 322.0, 311.0, 289.0, 293.0, 311.0, 316.0, 276.0, 294.0, 292.0, 295.0, 265.0, 262.0, 319.0, 317.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6942923385290828, "mean_inference_ms": 1.2383771063815237, "mean_action_processing_ms": 0.13329224665823974, "mean_env_wait_ms": 0.8361472055742027, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11468800, "num_agent_steps_trained": 11468800, "num_env_steps_sampled": 5734400, "num_env_steps_trained": 5734400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5734400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11468800, "timers": {"training_iteration_time_ms": 3669.461, "learn_time_ms": 1149.523, "learn_throughput": 11135.05, "synch_weights_time_ms": 10.803}, "counters": {"num_env_steps_sampled": 5734400, "num_env_steps_trained": 5734400, "num_agent_steps_sampled": 11468800, "num_agent_steps_trained": 11468800}, "done": false, "episodes_total": 14336, "training_iteration": 448, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-34", "timestamp": 1666582174, "time_this_iter_s": 3.6794474124908447, "time_total_s": 1706.3280880451202, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1706.3280880451202, "timesteps_since_restore": 0, "iterations_since_restore": 448, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.659999999999997, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 203.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 180.34, "shaped_reward_min": 54, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.77, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.43, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.7, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.24, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.06, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.41, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.61, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.33, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.03, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.25, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.18, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.06, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.06, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0027708648703992367, "policy_loss": 0.0023652866948395967, "vf_loss": 7.862312316894531, "vf_explained_var": 0.6047019958496094, "kl": 0.003352868603542447, "entropy": 0.7613069415092468, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5747200, "num_env_steps_trained": 5747200, "num_agent_steps_sampled": 11494400, "num_agent_steps_trained": 11494400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 174.0, "episode_reward_mean": 586.34, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 293.17}, "custom_metrics": {"sparse_reward_mean": 203.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 180.34, "shaped_reward_min": 54, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.77, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.43, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.7, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.24, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.06, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.41, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.61, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.33, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.03, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.25, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.18, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.06, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.06, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 636.0, 630.0, 174.0, 576.0, 576.0, 636.0, 624.0, 576.0, 527.0, 630.0, 576.0, 579.0, 587.0, 573.0, 582.0, 573.0, 633.0, 579.0, 630.0, 579.0, 582.0, 636.0, 636.0, 587.0, 630.0, 630.0, 633.0, 582.0, 593.0, 633.0, 570.0, 582.0, 510.0, 567.0, 627.0, 639.0, 587.0, 582.0, 587.0, 587.0, 579.0, 624.0, 627.0, 582.0, 582.0, 587.0, 633.0, 576.0, 573.0, 519.0, 582.0, 582.0, 579.0, 582.0, 630.0, 582.0, 633.0, 627.0, 582.0, 633.0, 582.0, 627.0, 570.0, 587.0, 527.0, 636.0, 633.0, 633.0, 573.0, 630.0, 627.0, 579.0, 582.0, 587.0, 579.0, 180.0, 570.0, 630.0, 627.0, 570.0, 579.0, 413.0, 579.0, 576.0, 630.0, 582.0, 630.0, 576.0, 636.0, 525.0, 639.0, 587.0, 579.0, 522.0, 582.0, 639.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 318.0, 312.0, 317.0, 319.0, 316.0, 314.0, 91.0, 83.0, 289.0, 287.0, 283.0, 293.0, 316.0, 320.0, 302.0, 322.0, 293.0, 283.0, 267.0, 260.0, 313.0, 317.0, 287.0, 289.0, 301.0, 278.0, 291.0, 296.0, 282.0, 291.0, 294.0, 288.0, 290.0, 283.0, 312.0, 321.0, 291.0, 288.0, 311.0, 319.0, 286.0, 293.0, 291.0, 291.0, 317.0, 319.0, 317.0, 319.0, 297.0, 290.0, 319.0, 311.0, 313.0, 317.0, 319.0, 314.0, 289.0, 293.0, 296.0, 297.0, 320.0, 313.0, 283.0, 287.0, 294.0, 288.0, 252.0, 258.0, 273.0, 294.0, 308.0, 319.0, 323.0, 316.0, 294.0, 293.0, 291.0, 291.0, 293.0, 294.0, 283.0, 304.0, 294.0, 285.0, 325.0, 299.0, 311.0, 316.0, 295.0, 287.0, 294.0, 288.0, 294.0, 293.0, 322.0, 311.0, 283.0, 293.0, 280.0, 293.0, 263.0, 256.0, 286.0, 296.0, 293.0, 289.0, 283.0, 296.0, 298.0, 284.0, 319.0, 311.0, 300.0, 282.0, 320.0, 313.0, 321.0, 306.0, 288.0, 294.0, 322.0, 311.0, 289.0, 293.0, 311.0, 316.0, 276.0, 294.0, 292.0, 295.0, 265.0, 262.0, 319.0, 317.0, 316.0, 317.0, 317.0, 316.0, 285.0, 288.0, 314.0, 316.0, 310.0, 317.0, 283.0, 296.0, 293.0, 289.0, 289.0, 298.0, 292.0, 287.0, 89.0, 91.0, 271.0, 299.0, 317.0, 313.0, 308.0, 319.0, 290.0, 280.0, 291.0, 288.0, 205.0, 208.0, 294.0, 285.0, 285.0, 291.0, 316.0, 314.0, 291.0, 291.0, 314.0, 316.0, 283.0, 293.0, 316.0, 320.0, 266.0, 259.0, 318.0, 321.0, 291.0, 296.0, 293.0, 286.0, 256.0, 266.0, 291.0, 291.0, 324.0, 315.0, 314.0, 322.0, 317.0, 316.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.694239495603513, "mean_inference_ms": 1.2382438214862441, "mean_action_processing_ms": 0.13328110236079596, "mean_env_wait_ms": 0.8360554712674403, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 174.0, "episode_reward_mean": 586.34, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 293.17}, "hist_stats": {"episode_reward": [582.0, 630.0, 636.0, 630.0, 174.0, 576.0, 576.0, 636.0, 624.0, 576.0, 527.0, 630.0, 576.0, 579.0, 587.0, 573.0, 582.0, 573.0, 633.0, 579.0, 630.0, 579.0, 582.0, 636.0, 636.0, 587.0, 630.0, 630.0, 633.0, 582.0, 593.0, 633.0, 570.0, 582.0, 510.0, 567.0, 627.0, 639.0, 587.0, 582.0, 587.0, 587.0, 579.0, 624.0, 627.0, 582.0, 582.0, 587.0, 633.0, 576.0, 573.0, 519.0, 582.0, 582.0, 579.0, 582.0, 630.0, 582.0, 633.0, 627.0, 582.0, 633.0, 582.0, 627.0, 570.0, 587.0, 527.0, 636.0, 633.0, 633.0, 573.0, 630.0, 627.0, 579.0, 582.0, 587.0, 579.0, 180.0, 570.0, 630.0, 627.0, 570.0, 579.0, 413.0, 579.0, 576.0, 630.0, 582.0, 630.0, 576.0, 636.0, 525.0, 639.0, 587.0, 579.0, 522.0, 582.0, 639.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 318.0, 312.0, 317.0, 319.0, 316.0, 314.0, 91.0, 83.0, 289.0, 287.0, 283.0, 293.0, 316.0, 320.0, 302.0, 322.0, 293.0, 283.0, 267.0, 260.0, 313.0, 317.0, 287.0, 289.0, 301.0, 278.0, 291.0, 296.0, 282.0, 291.0, 294.0, 288.0, 290.0, 283.0, 312.0, 321.0, 291.0, 288.0, 311.0, 319.0, 286.0, 293.0, 291.0, 291.0, 317.0, 319.0, 317.0, 319.0, 297.0, 290.0, 319.0, 311.0, 313.0, 317.0, 319.0, 314.0, 289.0, 293.0, 296.0, 297.0, 320.0, 313.0, 283.0, 287.0, 294.0, 288.0, 252.0, 258.0, 273.0, 294.0, 308.0, 319.0, 323.0, 316.0, 294.0, 293.0, 291.0, 291.0, 293.0, 294.0, 283.0, 304.0, 294.0, 285.0, 325.0, 299.0, 311.0, 316.0, 295.0, 287.0, 294.0, 288.0, 294.0, 293.0, 322.0, 311.0, 283.0, 293.0, 280.0, 293.0, 263.0, 256.0, 286.0, 296.0, 293.0, 289.0, 283.0, 296.0, 298.0, 284.0, 319.0, 311.0, 300.0, 282.0, 320.0, 313.0, 321.0, 306.0, 288.0, 294.0, 322.0, 311.0, 289.0, 293.0, 311.0, 316.0, 276.0, 294.0, 292.0, 295.0, 265.0, 262.0, 319.0, 317.0, 316.0, 317.0, 317.0, 316.0, 285.0, 288.0, 314.0, 316.0, 310.0, 317.0, 283.0, 296.0, 293.0, 289.0, 289.0, 298.0, 292.0, 287.0, 89.0, 91.0, 271.0, 299.0, 317.0, 313.0, 308.0, 319.0, 290.0, 280.0, 291.0, 288.0, 205.0, 208.0, 294.0, 285.0, 285.0, 291.0, 316.0, 314.0, 291.0, 291.0, 314.0, 316.0, 283.0, 293.0, 316.0, 320.0, 266.0, 259.0, 318.0, 321.0, 291.0, 296.0, 293.0, 286.0, 256.0, 266.0, 291.0, 291.0, 324.0, 315.0, 314.0, 322.0, 317.0, 316.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.694239495603513, "mean_inference_ms": 1.2382438214862441, "mean_action_processing_ms": 0.13328110236079596, "mean_env_wait_ms": 0.8360554712674403, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11494400, "num_agent_steps_trained": 11494400, "num_env_steps_sampled": 5747200, "num_env_steps_trained": 5747200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5747200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11494400, "timers": {"training_iteration_time_ms": 3668.549, "learn_time_ms": 1147.299, "learn_throughput": 11156.636, "synch_weights_time_ms": 10.947}, "counters": {"num_env_steps_sampled": 5747200, "num_env_steps_trained": 5747200, "num_agent_steps_sampled": 11494400, "num_agent_steps_trained": 11494400}, "done": false, "episodes_total": 14368, "training_iteration": 449, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-38", "timestamp": 1666582178, "time_this_iter_s": 3.6126880645751953, "time_total_s": 1709.9407761096954, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1709.9407761096954, "timesteps_since_restore": 0, "iterations_since_restore": 449, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.32, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 204.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 181.4, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.87, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.46, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.83, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.28, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.58, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.07, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.41, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.34, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.95, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.27, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.58, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.07, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.58, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.07, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002730258274823427, "policy_loss": -0.0031343363225460052, "vf_loss": 7.75507926940918, "vf_explained_var": 0.6010830402374268, "kl": 0.002409199485555291, "entropy": 0.7428597211837769, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5760000, "num_env_steps_trained": 5760000, "num_agent_steps_sampled": 11520000, "num_agent_steps_trained": 11520000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 590.2, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 295.1}, "custom_metrics": {"sparse_reward_mean": 204.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 181.4, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.87, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.46, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.83, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.28, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.58, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.07, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.41, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.34, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.95, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.27, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.58, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.07, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.58, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.07, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 582.0, 510.0, 567.0, 627.0, 639.0, 587.0, 582.0, 587.0, 587.0, 579.0, 624.0, 627.0, 582.0, 582.0, 587.0, 633.0, 576.0, 573.0, 519.0, 582.0, 582.0, 579.0, 582.0, 630.0, 582.0, 633.0, 627.0, 582.0, 633.0, 582.0, 627.0, 570.0, 587.0, 527.0, 636.0, 633.0, 633.0, 573.0, 630.0, 627.0, 579.0, 582.0, 587.0, 579.0, 180.0, 570.0, 630.0, 627.0, 570.0, 579.0, 413.0, 579.0, 576.0, 630.0, 582.0, 630.0, 576.0, 636.0, 525.0, 639.0, 587.0, 579.0, 522.0, 582.0, 639.0, 636.0, 633.0, 579.0, 579.0, 633.0, 630.0, 582.0, 582.0, 579.0, 630.0, 587.0, 627.0, 579.0, 633.0, 582.0, 630.0, 576.0, 630.0, 582.0, 582.0, 630.0, 627.0, 582.0, 639.0, 570.0, 627.0, 576.0, 573.0, 582.0, 584.0, 582.0, 627.0, 582.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 287.0, 294.0, 288.0, 252.0, 258.0, 273.0, 294.0, 308.0, 319.0, 323.0, 316.0, 294.0, 293.0, 291.0, 291.0, 293.0, 294.0, 283.0, 304.0, 294.0, 285.0, 325.0, 299.0, 311.0, 316.0, 295.0, 287.0, 294.0, 288.0, 294.0, 293.0, 322.0, 311.0, 283.0, 293.0, 280.0, 293.0, 263.0, 256.0, 286.0, 296.0, 293.0, 289.0, 283.0, 296.0, 298.0, 284.0, 319.0, 311.0, 300.0, 282.0, 320.0, 313.0, 321.0, 306.0, 288.0, 294.0, 322.0, 311.0, 289.0, 293.0, 311.0, 316.0, 276.0, 294.0, 292.0, 295.0, 265.0, 262.0, 319.0, 317.0, 316.0, 317.0, 317.0, 316.0, 285.0, 288.0, 314.0, 316.0, 310.0, 317.0, 283.0, 296.0, 293.0, 289.0, 289.0, 298.0, 292.0, 287.0, 89.0, 91.0, 271.0, 299.0, 317.0, 313.0, 308.0, 319.0, 290.0, 280.0, 291.0, 288.0, 205.0, 208.0, 294.0, 285.0, 285.0, 291.0, 316.0, 314.0, 291.0, 291.0, 314.0, 316.0, 283.0, 293.0, 316.0, 320.0, 266.0, 259.0, 318.0, 321.0, 291.0, 296.0, 293.0, 286.0, 256.0, 266.0, 291.0, 291.0, 324.0, 315.0, 314.0, 322.0, 317.0, 316.0, 290.0, 289.0, 296.0, 283.0, 312.0, 321.0, 312.0, 318.0, 291.0, 291.0, 288.0, 294.0, 289.0, 290.0, 308.0, 322.0, 286.0, 301.0, 319.0, 308.0, 287.0, 292.0, 319.0, 314.0, 297.0, 285.0, 319.0, 311.0, 288.0, 288.0, 311.0, 319.0, 289.0, 293.0, 292.0, 290.0, 311.0, 319.0, 311.0, 316.0, 294.0, 288.0, 322.0, 317.0, 282.0, 288.0, 319.0, 308.0, 283.0, 293.0, 287.0, 286.0, 288.0, 294.0, 290.0, 294.0, 292.0, 290.0, 311.0, 316.0, 290.0, 292.0, 317.0, 316.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.694306098577838, "mean_inference_ms": 1.2382181975816857, "mean_action_processing_ms": 0.1332722053111739, "mean_env_wait_ms": 0.835985405310874, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 590.2, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 295.1}, "hist_stats": {"episode_reward": [570.0, 582.0, 510.0, 567.0, 627.0, 639.0, 587.0, 582.0, 587.0, 587.0, 579.0, 624.0, 627.0, 582.0, 582.0, 587.0, 633.0, 576.0, 573.0, 519.0, 582.0, 582.0, 579.0, 582.0, 630.0, 582.0, 633.0, 627.0, 582.0, 633.0, 582.0, 627.0, 570.0, 587.0, 527.0, 636.0, 633.0, 633.0, 573.0, 630.0, 627.0, 579.0, 582.0, 587.0, 579.0, 180.0, 570.0, 630.0, 627.0, 570.0, 579.0, 413.0, 579.0, 576.0, 630.0, 582.0, 630.0, 576.0, 636.0, 525.0, 639.0, 587.0, 579.0, 522.0, 582.0, 639.0, 636.0, 633.0, 579.0, 579.0, 633.0, 630.0, 582.0, 582.0, 579.0, 630.0, 587.0, 627.0, 579.0, 633.0, 582.0, 630.0, 576.0, 630.0, 582.0, 582.0, 630.0, 627.0, 582.0, 639.0, 570.0, 627.0, 576.0, 573.0, 582.0, 584.0, 582.0, 627.0, 582.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 287.0, 294.0, 288.0, 252.0, 258.0, 273.0, 294.0, 308.0, 319.0, 323.0, 316.0, 294.0, 293.0, 291.0, 291.0, 293.0, 294.0, 283.0, 304.0, 294.0, 285.0, 325.0, 299.0, 311.0, 316.0, 295.0, 287.0, 294.0, 288.0, 294.0, 293.0, 322.0, 311.0, 283.0, 293.0, 280.0, 293.0, 263.0, 256.0, 286.0, 296.0, 293.0, 289.0, 283.0, 296.0, 298.0, 284.0, 319.0, 311.0, 300.0, 282.0, 320.0, 313.0, 321.0, 306.0, 288.0, 294.0, 322.0, 311.0, 289.0, 293.0, 311.0, 316.0, 276.0, 294.0, 292.0, 295.0, 265.0, 262.0, 319.0, 317.0, 316.0, 317.0, 317.0, 316.0, 285.0, 288.0, 314.0, 316.0, 310.0, 317.0, 283.0, 296.0, 293.0, 289.0, 289.0, 298.0, 292.0, 287.0, 89.0, 91.0, 271.0, 299.0, 317.0, 313.0, 308.0, 319.0, 290.0, 280.0, 291.0, 288.0, 205.0, 208.0, 294.0, 285.0, 285.0, 291.0, 316.0, 314.0, 291.0, 291.0, 314.0, 316.0, 283.0, 293.0, 316.0, 320.0, 266.0, 259.0, 318.0, 321.0, 291.0, 296.0, 293.0, 286.0, 256.0, 266.0, 291.0, 291.0, 324.0, 315.0, 314.0, 322.0, 317.0, 316.0, 290.0, 289.0, 296.0, 283.0, 312.0, 321.0, 312.0, 318.0, 291.0, 291.0, 288.0, 294.0, 289.0, 290.0, 308.0, 322.0, 286.0, 301.0, 319.0, 308.0, 287.0, 292.0, 319.0, 314.0, 297.0, 285.0, 319.0, 311.0, 288.0, 288.0, 311.0, 319.0, 289.0, 293.0, 292.0, 290.0, 311.0, 319.0, 311.0, 316.0, 294.0, 288.0, 322.0, 317.0, 282.0, 288.0, 319.0, 308.0, 283.0, 293.0, 287.0, 286.0, 288.0, 294.0, 290.0, 294.0, 292.0, 290.0, 311.0, 316.0, 290.0, 292.0, 317.0, 316.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.694306098577838, "mean_inference_ms": 1.2382181975816857, "mean_action_processing_ms": 0.1332722053111739, "mean_env_wait_ms": 0.835985405310874, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11520000, "num_agent_steps_trained": 11520000, "num_env_steps_sampled": 5760000, "num_env_steps_trained": 5760000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5760000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11520000, "timers": {"training_iteration_time_ms": 3691.668, "learn_time_ms": 1147.415, "learn_throughput": 11155.507, "synch_weights_time_ms": 10.937}, "counters": {"num_env_steps_sampled": 5760000, "num_env_steps_trained": 5760000, "num_agent_steps_sampled": 11520000, "num_agent_steps_trained": 11520000}, "done": false, "episodes_total": 14400, "training_iteration": 450, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-42", "timestamp": 1666582182, "time_this_iter_s": 3.8742873668670654, "time_total_s": 1713.8150634765625, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1713.8150634765625, "timesteps_since_restore": 0, "iterations_since_restore": 450, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.483333333333334, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 203.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 181.5, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.8, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.54, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.72, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.36, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.52, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.18, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.43, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.33, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.96, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.52, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.18, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.52, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.18, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001394312595948577, "policy_loss": -0.0018049200298264623, "vf_loss": 7.862459182739258, "vf_explained_var": 0.5844732522964478, "kl": 0.0030340240336954594, "entropy": 0.75127774477005, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5772800, "num_env_steps_trained": 5772800, "num_agent_steps_sampled": 11545600, "num_agent_steps_trained": 11545600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 589.1, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 294.55}, "custom_metrics": {"sparse_reward_mean": 203.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 181.5, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.8, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.54, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.72, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.36, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.52, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.18, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.43, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.33, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.96, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.52, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.18, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.52, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.18, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 587.0, 527.0, 636.0, 633.0, 633.0, 573.0, 630.0, 627.0, 579.0, 582.0, 587.0, 579.0, 180.0, 570.0, 630.0, 627.0, 570.0, 579.0, 413.0, 579.0, 576.0, 630.0, 582.0, 630.0, 576.0, 636.0, 525.0, 639.0, 587.0, 579.0, 522.0, 582.0, 639.0, 636.0, 633.0, 579.0, 579.0, 633.0, 630.0, 582.0, 582.0, 579.0, 630.0, 587.0, 627.0, 579.0, 633.0, 582.0, 630.0, 576.0, 630.0, 582.0, 582.0, 630.0, 627.0, 582.0, 639.0, 570.0, 627.0, 576.0, 573.0, 582.0, 584.0, 582.0, 627.0, 582.0, 633.0, 582.0, 636.0, 582.0, 579.0, 579.0, 630.0, 587.0, 579.0, 584.0, 576.0, 582.0, 579.0, 576.0, 627.0, 579.0, 636.0, 587.0, 633.0, 633.0, 582.0, 630.0, 590.0, 576.0, 582.0, 522.0, 579.0, 573.0, 570.0, 527.0, 582.0, 539.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 294.0, 292.0, 295.0, 265.0, 262.0, 319.0, 317.0, 316.0, 317.0, 317.0, 316.0, 285.0, 288.0, 314.0, 316.0, 310.0, 317.0, 283.0, 296.0, 293.0, 289.0, 289.0, 298.0, 292.0, 287.0, 89.0, 91.0, 271.0, 299.0, 317.0, 313.0, 308.0, 319.0, 290.0, 280.0, 291.0, 288.0, 205.0, 208.0, 294.0, 285.0, 285.0, 291.0, 316.0, 314.0, 291.0, 291.0, 314.0, 316.0, 283.0, 293.0, 316.0, 320.0, 266.0, 259.0, 318.0, 321.0, 291.0, 296.0, 293.0, 286.0, 256.0, 266.0, 291.0, 291.0, 324.0, 315.0, 314.0, 322.0, 317.0, 316.0, 290.0, 289.0, 296.0, 283.0, 312.0, 321.0, 312.0, 318.0, 291.0, 291.0, 288.0, 294.0, 289.0, 290.0, 308.0, 322.0, 286.0, 301.0, 319.0, 308.0, 287.0, 292.0, 319.0, 314.0, 297.0, 285.0, 319.0, 311.0, 288.0, 288.0, 311.0, 319.0, 289.0, 293.0, 292.0, 290.0, 311.0, 319.0, 311.0, 316.0, 294.0, 288.0, 322.0, 317.0, 282.0, 288.0, 319.0, 308.0, 283.0, 293.0, 287.0, 286.0, 288.0, 294.0, 290.0, 294.0, 292.0, 290.0, 311.0, 316.0, 290.0, 292.0, 317.0, 316.0, 289.0, 293.0, 317.0, 319.0, 288.0, 294.0, 283.0, 296.0, 288.0, 291.0, 313.0, 317.0, 293.0, 294.0, 292.0, 287.0, 294.0, 290.0, 284.0, 292.0, 286.0, 296.0, 299.0, 280.0, 291.0, 285.0, 311.0, 316.0, 288.0, 291.0, 327.0, 309.0, 293.0, 294.0, 316.0, 317.0, 317.0, 316.0, 291.0, 291.0, 312.0, 318.0, 293.0, 297.0, 287.0, 289.0, 289.0, 293.0, 268.0, 254.0, 291.0, 288.0, 280.0, 293.0, 283.0, 287.0, 274.0, 253.0, 288.0, 294.0, 265.0, 274.0, 311.0, 322.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6943649712257816, "mean_inference_ms": 1.2381989697127307, "mean_action_processing_ms": 0.13326478263615582, "mean_env_wait_ms": 0.8359216257077268, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 589.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 294.55}, "hist_stats": {"episode_reward": [570.0, 587.0, 527.0, 636.0, 633.0, 633.0, 573.0, 630.0, 627.0, 579.0, 582.0, 587.0, 579.0, 180.0, 570.0, 630.0, 627.0, 570.0, 579.0, 413.0, 579.0, 576.0, 630.0, 582.0, 630.0, 576.0, 636.0, 525.0, 639.0, 587.0, 579.0, 522.0, 582.0, 639.0, 636.0, 633.0, 579.0, 579.0, 633.0, 630.0, 582.0, 582.0, 579.0, 630.0, 587.0, 627.0, 579.0, 633.0, 582.0, 630.0, 576.0, 630.0, 582.0, 582.0, 630.0, 627.0, 582.0, 639.0, 570.0, 627.0, 576.0, 573.0, 582.0, 584.0, 582.0, 627.0, 582.0, 633.0, 582.0, 636.0, 582.0, 579.0, 579.0, 630.0, 587.0, 579.0, 584.0, 576.0, 582.0, 579.0, 576.0, 627.0, 579.0, 636.0, 587.0, 633.0, 633.0, 582.0, 630.0, 590.0, 576.0, 582.0, 522.0, 579.0, 573.0, 570.0, 527.0, 582.0, 539.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 294.0, 292.0, 295.0, 265.0, 262.0, 319.0, 317.0, 316.0, 317.0, 317.0, 316.0, 285.0, 288.0, 314.0, 316.0, 310.0, 317.0, 283.0, 296.0, 293.0, 289.0, 289.0, 298.0, 292.0, 287.0, 89.0, 91.0, 271.0, 299.0, 317.0, 313.0, 308.0, 319.0, 290.0, 280.0, 291.0, 288.0, 205.0, 208.0, 294.0, 285.0, 285.0, 291.0, 316.0, 314.0, 291.0, 291.0, 314.0, 316.0, 283.0, 293.0, 316.0, 320.0, 266.0, 259.0, 318.0, 321.0, 291.0, 296.0, 293.0, 286.0, 256.0, 266.0, 291.0, 291.0, 324.0, 315.0, 314.0, 322.0, 317.0, 316.0, 290.0, 289.0, 296.0, 283.0, 312.0, 321.0, 312.0, 318.0, 291.0, 291.0, 288.0, 294.0, 289.0, 290.0, 308.0, 322.0, 286.0, 301.0, 319.0, 308.0, 287.0, 292.0, 319.0, 314.0, 297.0, 285.0, 319.0, 311.0, 288.0, 288.0, 311.0, 319.0, 289.0, 293.0, 292.0, 290.0, 311.0, 319.0, 311.0, 316.0, 294.0, 288.0, 322.0, 317.0, 282.0, 288.0, 319.0, 308.0, 283.0, 293.0, 287.0, 286.0, 288.0, 294.0, 290.0, 294.0, 292.0, 290.0, 311.0, 316.0, 290.0, 292.0, 317.0, 316.0, 289.0, 293.0, 317.0, 319.0, 288.0, 294.0, 283.0, 296.0, 288.0, 291.0, 313.0, 317.0, 293.0, 294.0, 292.0, 287.0, 294.0, 290.0, 284.0, 292.0, 286.0, 296.0, 299.0, 280.0, 291.0, 285.0, 311.0, 316.0, 288.0, 291.0, 327.0, 309.0, 293.0, 294.0, 316.0, 317.0, 317.0, 316.0, 291.0, 291.0, 312.0, 318.0, 293.0, 297.0, 287.0, 289.0, 289.0, 293.0, 268.0, 254.0, 291.0, 288.0, 280.0, 293.0, 283.0, 287.0, 274.0, 253.0, 288.0, 294.0, 265.0, 274.0, 311.0, 322.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6943649712257816, "mean_inference_ms": 1.2381989697127307, "mean_action_processing_ms": 0.13326478263615582, "mean_env_wait_ms": 0.8359216257077268, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11545600, "num_agent_steps_trained": 11545600, "num_env_steps_sampled": 5772800, "num_env_steps_trained": 5772800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5772800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11545600, "timers": {"training_iteration_time_ms": 3668.128, "learn_time_ms": 1140.587, "learn_throughput": 11222.293, "synch_weights_time_ms": 12.155}, "counters": {"num_env_steps_sampled": 5772800, "num_env_steps_trained": 5772800, "num_agent_steps_sampled": 11545600, "num_agent_steps_trained": 11545600}, "done": false, "episodes_total": 14432, "training_iteration": 451, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-45", "timestamp": 1666582185, "time_this_iter_s": 3.5603537559509277, "time_total_s": 1717.3754172325134, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1717.3754172325134, "timesteps_since_restore": 0, "iterations_since_restore": 451, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.62, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 206.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.48, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.07, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.66, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.99, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.52, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.79, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.31, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.69, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.21, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.5, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.4, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.35, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.79, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.31, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.79, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.31, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003468575421720743, "policy_loss": 0.0030773193575441837, "vf_loss": 7.6872453689575195, "vf_explained_var": 0.5932549834251404, "kl": 0.002268628915771842, "entropy": 0.7549370527267456, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5785600, "num_env_steps_trained": 5785600, "num_agent_steps_sampled": 11571200, "num_agent_steps_trained": 11571200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 595.88, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 297.94}, "custom_metrics": {"sparse_reward_mean": 206.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.48, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.07, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.66, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.99, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.52, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.79, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.31, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.69, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.21, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.5, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.4, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.35, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.79, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.31, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.79, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.31, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 639.0, 636.0, 633.0, 579.0, 579.0, 633.0, 630.0, 582.0, 582.0, 579.0, 630.0, 587.0, 627.0, 579.0, 633.0, 582.0, 630.0, 576.0, 630.0, 582.0, 582.0, 630.0, 627.0, 582.0, 639.0, 570.0, 627.0, 576.0, 573.0, 582.0, 584.0, 582.0, 627.0, 582.0, 633.0, 582.0, 636.0, 582.0, 579.0, 579.0, 630.0, 587.0, 579.0, 584.0, 576.0, 582.0, 579.0, 576.0, 627.0, 579.0, 636.0, 587.0, 633.0, 633.0, 582.0, 630.0, 590.0, 576.0, 582.0, 522.0, 579.0, 573.0, 570.0, 527.0, 582.0, 539.0, 633.0, 579.0, 582.0, 627.0, 582.0, 630.0, 633.0, 579.0, 630.0, 584.0, 582.0, 582.0, 573.0, 573.0, 627.0, 582.0, 627.0, 639.0, 533.0, 636.0, 576.0, 582.0, 576.0, 630.0, 582.0, 630.0, 630.0, 633.0, 522.0, 582.0, 579.0, 519.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 324.0, 315.0, 314.0, 322.0, 317.0, 316.0, 290.0, 289.0, 296.0, 283.0, 312.0, 321.0, 312.0, 318.0, 291.0, 291.0, 288.0, 294.0, 289.0, 290.0, 308.0, 322.0, 286.0, 301.0, 319.0, 308.0, 287.0, 292.0, 319.0, 314.0, 297.0, 285.0, 319.0, 311.0, 288.0, 288.0, 311.0, 319.0, 289.0, 293.0, 292.0, 290.0, 311.0, 319.0, 311.0, 316.0, 294.0, 288.0, 322.0, 317.0, 282.0, 288.0, 319.0, 308.0, 283.0, 293.0, 287.0, 286.0, 288.0, 294.0, 290.0, 294.0, 292.0, 290.0, 311.0, 316.0, 290.0, 292.0, 317.0, 316.0, 289.0, 293.0, 317.0, 319.0, 288.0, 294.0, 283.0, 296.0, 288.0, 291.0, 313.0, 317.0, 293.0, 294.0, 292.0, 287.0, 294.0, 290.0, 284.0, 292.0, 286.0, 296.0, 299.0, 280.0, 291.0, 285.0, 311.0, 316.0, 288.0, 291.0, 327.0, 309.0, 293.0, 294.0, 316.0, 317.0, 317.0, 316.0, 291.0, 291.0, 312.0, 318.0, 293.0, 297.0, 287.0, 289.0, 289.0, 293.0, 268.0, 254.0, 291.0, 288.0, 280.0, 293.0, 283.0, 287.0, 274.0, 253.0, 288.0, 294.0, 265.0, 274.0, 311.0, 322.0, 292.0, 287.0, 286.0, 296.0, 316.0, 311.0, 291.0, 291.0, 314.0, 316.0, 312.0, 321.0, 290.0, 289.0, 314.0, 316.0, 296.0, 288.0, 295.0, 287.0, 283.0, 299.0, 288.0, 285.0, 283.0, 290.0, 316.0, 311.0, 293.0, 289.0, 313.0, 314.0, 324.0, 315.0, 265.0, 268.0, 320.0, 316.0, 282.0, 294.0, 286.0, 296.0, 283.0, 293.0, 311.0, 319.0, 289.0, 293.0, 317.0, 313.0, 308.0, 322.0, 314.0, 319.0, 271.0, 251.0, 294.0, 288.0, 285.0, 294.0, 253.0, 266.0, 309.0, 321.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6944271013396697, "mean_inference_ms": 1.2381871099870232, "mean_action_processing_ms": 0.13325826778233354, "mean_env_wait_ms": 0.835862806925344, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 595.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 297.94}, "hist_stats": {"episode_reward": [582.0, 639.0, 636.0, 633.0, 579.0, 579.0, 633.0, 630.0, 582.0, 582.0, 579.0, 630.0, 587.0, 627.0, 579.0, 633.0, 582.0, 630.0, 576.0, 630.0, 582.0, 582.0, 630.0, 627.0, 582.0, 639.0, 570.0, 627.0, 576.0, 573.0, 582.0, 584.0, 582.0, 627.0, 582.0, 633.0, 582.0, 636.0, 582.0, 579.0, 579.0, 630.0, 587.0, 579.0, 584.0, 576.0, 582.0, 579.0, 576.0, 627.0, 579.0, 636.0, 587.0, 633.0, 633.0, 582.0, 630.0, 590.0, 576.0, 582.0, 522.0, 579.0, 573.0, 570.0, 527.0, 582.0, 539.0, 633.0, 579.0, 582.0, 627.0, 582.0, 630.0, 633.0, 579.0, 630.0, 584.0, 582.0, 582.0, 573.0, 573.0, 627.0, 582.0, 627.0, 639.0, 533.0, 636.0, 576.0, 582.0, 576.0, 630.0, 582.0, 630.0, 630.0, 633.0, 522.0, 582.0, 579.0, 519.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 324.0, 315.0, 314.0, 322.0, 317.0, 316.0, 290.0, 289.0, 296.0, 283.0, 312.0, 321.0, 312.0, 318.0, 291.0, 291.0, 288.0, 294.0, 289.0, 290.0, 308.0, 322.0, 286.0, 301.0, 319.0, 308.0, 287.0, 292.0, 319.0, 314.0, 297.0, 285.0, 319.0, 311.0, 288.0, 288.0, 311.0, 319.0, 289.0, 293.0, 292.0, 290.0, 311.0, 319.0, 311.0, 316.0, 294.0, 288.0, 322.0, 317.0, 282.0, 288.0, 319.0, 308.0, 283.0, 293.0, 287.0, 286.0, 288.0, 294.0, 290.0, 294.0, 292.0, 290.0, 311.0, 316.0, 290.0, 292.0, 317.0, 316.0, 289.0, 293.0, 317.0, 319.0, 288.0, 294.0, 283.0, 296.0, 288.0, 291.0, 313.0, 317.0, 293.0, 294.0, 292.0, 287.0, 294.0, 290.0, 284.0, 292.0, 286.0, 296.0, 299.0, 280.0, 291.0, 285.0, 311.0, 316.0, 288.0, 291.0, 327.0, 309.0, 293.0, 294.0, 316.0, 317.0, 317.0, 316.0, 291.0, 291.0, 312.0, 318.0, 293.0, 297.0, 287.0, 289.0, 289.0, 293.0, 268.0, 254.0, 291.0, 288.0, 280.0, 293.0, 283.0, 287.0, 274.0, 253.0, 288.0, 294.0, 265.0, 274.0, 311.0, 322.0, 292.0, 287.0, 286.0, 296.0, 316.0, 311.0, 291.0, 291.0, 314.0, 316.0, 312.0, 321.0, 290.0, 289.0, 314.0, 316.0, 296.0, 288.0, 295.0, 287.0, 283.0, 299.0, 288.0, 285.0, 283.0, 290.0, 316.0, 311.0, 293.0, 289.0, 313.0, 314.0, 324.0, 315.0, 265.0, 268.0, 320.0, 316.0, 282.0, 294.0, 286.0, 296.0, 283.0, 293.0, 311.0, 319.0, 289.0, 293.0, 317.0, 313.0, 308.0, 322.0, 314.0, 319.0, 271.0, 251.0, 294.0, 288.0, 285.0, 294.0, 253.0, 266.0, 309.0, 321.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6944271013396697, "mean_inference_ms": 1.2381871099870232, "mean_action_processing_ms": 0.13325826778233354, "mean_env_wait_ms": 0.835862806925344, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11571200, "num_agent_steps_trained": 11571200, "num_env_steps_sampled": 5785600, "num_env_steps_trained": 5785600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5785600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11571200, "timers": {"training_iteration_time_ms": 3624.048, "learn_time_ms": 1120.872, "learn_throughput": 11419.678, "synch_weights_time_ms": 12.16}, "counters": {"num_env_steps_sampled": 5785600, "num_env_steps_trained": 5785600, "num_agent_steps_sampled": 11571200, "num_agent_steps_trained": 11571200}, "done": false, "episodes_total": 14464, "training_iteration": 452, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-49", "timestamp": 1666582189, "time_this_iter_s": 3.579380512237549, "time_total_s": 1720.954797744751, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1720.954797744751, "timesteps_since_restore": 0, "iterations_since_restore": 452, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.766666666666666, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 204.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 182.55, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.12, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.44, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 17.03, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.24, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.01, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.82, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.12, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.26, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.76, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.58, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.43, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.89, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.36, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.82, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.12, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.82, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.12, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008576636901125312, "policy_loss": 0.0004561354289762676, "vf_loss": 7.776826858520508, "vf_explained_var": 0.6098490953445435, "kl": 0.003105924464762211, "entropy": 0.7523094415664673, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5798400, "num_env_steps_trained": 5798400, "num_agent_steps_sampled": 11596800, "num_agent_steps_trained": 11596800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 476.0, "episode_reward_mean": 591.35, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 295.675}, "custom_metrics": {"sparse_reward_mean": 204.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 182.55, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.12, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.44, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 17.03, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.24, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.01, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.82, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.12, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.26, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.76, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.58, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.43, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.89, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.36, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.82, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.12, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.82, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.12, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 627.0, 582.0, 633.0, 582.0, 636.0, 582.0, 579.0, 579.0, 630.0, 587.0, 579.0, 584.0, 576.0, 582.0, 579.0, 576.0, 627.0, 579.0, 636.0, 587.0, 633.0, 633.0, 582.0, 630.0, 590.0, 576.0, 582.0, 522.0, 579.0, 573.0, 570.0, 527.0, 582.0, 539.0, 633.0, 579.0, 582.0, 627.0, 582.0, 630.0, 633.0, 579.0, 630.0, 584.0, 582.0, 582.0, 573.0, 573.0, 627.0, 582.0, 627.0, 639.0, 533.0, 636.0, 576.0, 582.0, 576.0, 630.0, 582.0, 630.0, 630.0, 633.0, 522.0, 582.0, 579.0, 519.0, 630.0, 639.0, 579.0, 573.0, 522.0, 639.0, 584.0, 587.0, 582.0, 519.0, 639.0, 576.0, 582.0, 579.0, 533.0, 579.0, 633.0, 579.0, 630.0, 582.0, 633.0, 582.0, 576.0, 476.0, 525.0, 633.0, 636.0, 579.0, 581.0, 636.0, 627.0, 627.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 290.0, 311.0, 316.0, 290.0, 292.0, 317.0, 316.0, 289.0, 293.0, 317.0, 319.0, 288.0, 294.0, 283.0, 296.0, 288.0, 291.0, 313.0, 317.0, 293.0, 294.0, 292.0, 287.0, 294.0, 290.0, 284.0, 292.0, 286.0, 296.0, 299.0, 280.0, 291.0, 285.0, 311.0, 316.0, 288.0, 291.0, 327.0, 309.0, 293.0, 294.0, 316.0, 317.0, 317.0, 316.0, 291.0, 291.0, 312.0, 318.0, 293.0, 297.0, 287.0, 289.0, 289.0, 293.0, 268.0, 254.0, 291.0, 288.0, 280.0, 293.0, 283.0, 287.0, 274.0, 253.0, 288.0, 294.0, 265.0, 274.0, 311.0, 322.0, 292.0, 287.0, 286.0, 296.0, 316.0, 311.0, 291.0, 291.0, 314.0, 316.0, 312.0, 321.0, 290.0, 289.0, 314.0, 316.0, 296.0, 288.0, 295.0, 287.0, 283.0, 299.0, 288.0, 285.0, 283.0, 290.0, 316.0, 311.0, 293.0, 289.0, 313.0, 314.0, 324.0, 315.0, 265.0, 268.0, 320.0, 316.0, 282.0, 294.0, 286.0, 296.0, 283.0, 293.0, 311.0, 319.0, 289.0, 293.0, 317.0, 313.0, 308.0, 322.0, 314.0, 319.0, 271.0, 251.0, 294.0, 288.0, 285.0, 294.0, 253.0, 266.0, 309.0, 321.0, 320.0, 319.0, 291.0, 288.0, 285.0, 288.0, 253.0, 269.0, 317.0, 322.0, 299.0, 285.0, 291.0, 296.0, 291.0, 291.0, 261.0, 258.0, 317.0, 322.0, 274.0, 302.0, 296.0, 286.0, 289.0, 290.0, 259.0, 274.0, 293.0, 286.0, 314.0, 319.0, 284.0, 295.0, 314.0, 316.0, 297.0, 285.0, 314.0, 319.0, 294.0, 288.0, 289.0, 287.0, 237.0, 239.0, 263.0, 262.0, 319.0, 314.0, 314.0, 322.0, 278.0, 301.0, 283.0, 298.0, 319.0, 317.0, 313.0, 314.0, 311.0, 316.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6944050950948926, "mean_inference_ms": 1.23808158227914, "mean_action_processing_ms": 0.13325236698769452, "mean_env_wait_ms": 0.8357987332989592, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 476.0, "episode_reward_mean": 591.35, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 295.675}, "hist_stats": {"episode_reward": [582.0, 627.0, 582.0, 633.0, 582.0, 636.0, 582.0, 579.0, 579.0, 630.0, 587.0, 579.0, 584.0, 576.0, 582.0, 579.0, 576.0, 627.0, 579.0, 636.0, 587.0, 633.0, 633.0, 582.0, 630.0, 590.0, 576.0, 582.0, 522.0, 579.0, 573.0, 570.0, 527.0, 582.0, 539.0, 633.0, 579.0, 582.0, 627.0, 582.0, 630.0, 633.0, 579.0, 630.0, 584.0, 582.0, 582.0, 573.0, 573.0, 627.0, 582.0, 627.0, 639.0, 533.0, 636.0, 576.0, 582.0, 576.0, 630.0, 582.0, 630.0, 630.0, 633.0, 522.0, 582.0, 579.0, 519.0, 630.0, 639.0, 579.0, 573.0, 522.0, 639.0, 584.0, 587.0, 582.0, 519.0, 639.0, 576.0, 582.0, 579.0, 533.0, 579.0, 633.0, 579.0, 630.0, 582.0, 633.0, 582.0, 576.0, 476.0, 525.0, 633.0, 636.0, 579.0, 581.0, 636.0, 627.0, 627.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 290.0, 311.0, 316.0, 290.0, 292.0, 317.0, 316.0, 289.0, 293.0, 317.0, 319.0, 288.0, 294.0, 283.0, 296.0, 288.0, 291.0, 313.0, 317.0, 293.0, 294.0, 292.0, 287.0, 294.0, 290.0, 284.0, 292.0, 286.0, 296.0, 299.0, 280.0, 291.0, 285.0, 311.0, 316.0, 288.0, 291.0, 327.0, 309.0, 293.0, 294.0, 316.0, 317.0, 317.0, 316.0, 291.0, 291.0, 312.0, 318.0, 293.0, 297.0, 287.0, 289.0, 289.0, 293.0, 268.0, 254.0, 291.0, 288.0, 280.0, 293.0, 283.0, 287.0, 274.0, 253.0, 288.0, 294.0, 265.0, 274.0, 311.0, 322.0, 292.0, 287.0, 286.0, 296.0, 316.0, 311.0, 291.0, 291.0, 314.0, 316.0, 312.0, 321.0, 290.0, 289.0, 314.0, 316.0, 296.0, 288.0, 295.0, 287.0, 283.0, 299.0, 288.0, 285.0, 283.0, 290.0, 316.0, 311.0, 293.0, 289.0, 313.0, 314.0, 324.0, 315.0, 265.0, 268.0, 320.0, 316.0, 282.0, 294.0, 286.0, 296.0, 283.0, 293.0, 311.0, 319.0, 289.0, 293.0, 317.0, 313.0, 308.0, 322.0, 314.0, 319.0, 271.0, 251.0, 294.0, 288.0, 285.0, 294.0, 253.0, 266.0, 309.0, 321.0, 320.0, 319.0, 291.0, 288.0, 285.0, 288.0, 253.0, 269.0, 317.0, 322.0, 299.0, 285.0, 291.0, 296.0, 291.0, 291.0, 261.0, 258.0, 317.0, 322.0, 274.0, 302.0, 296.0, 286.0, 289.0, 290.0, 259.0, 274.0, 293.0, 286.0, 314.0, 319.0, 284.0, 295.0, 314.0, 316.0, 297.0, 285.0, 314.0, 319.0, 294.0, 288.0, 289.0, 287.0, 237.0, 239.0, 263.0, 262.0, 319.0, 314.0, 314.0, 322.0, 278.0, 301.0, 283.0, 298.0, 319.0, 317.0, 313.0, 314.0, 311.0, 316.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6944050950948926, "mean_inference_ms": 1.23808158227914, "mean_action_processing_ms": 0.13325236698769452, "mean_env_wait_ms": 0.8357987332989592, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11596800, "num_agent_steps_trained": 11596800, "num_env_steps_sampled": 5798400, "num_env_steps_trained": 5798400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5798400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11596800, "timers": {"training_iteration_time_ms": 3603.123, "learn_time_ms": 1103.911, "learn_throughput": 11595.142, "synch_weights_time_ms": 11.954}, "counters": {"num_env_steps_sampled": 5798400, "num_env_steps_trained": 5798400, "num_agent_steps_sampled": 11596800, "num_agent_steps_trained": 11596800}, "done": false, "episodes_total": 14496, "training_iteration": 453, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-53", "timestamp": 1666582193, "time_this_iter_s": 3.5724422931671143, "time_total_s": 1724.527240037918, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1724.527240037918, "timesteps_since_restore": 0, "iterations_since_restore": 453, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.0, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 204.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 182.14, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.41, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 17.16, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.2, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.78, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.06, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.21, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.0, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.59, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.45, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.9, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.37, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.78, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.06, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.78, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.06, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0006028414354659617, "policy_loss": -0.0009957019938156009, "vf_loss": 7.673003196716309, "vf_explained_var": 0.6046704053878784, "kl": 0.0030364375561475754, "entropy": 0.7488775253295898, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5811200, "num_env_steps_trained": 5811200, "num_agent_steps_sampled": 11622400, "num_agent_steps_trained": 11622400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 476.0, "episode_reward_mean": 591.74, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 295.87}, "custom_metrics": {"sparse_reward_mean": 204.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 182.14, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.41, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 17.16, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.2, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.78, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.06, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.21, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.0, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.59, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.45, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.9, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.37, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.78, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.06, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.78, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.06, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [527.0, 582.0, 539.0, 633.0, 579.0, 582.0, 627.0, 582.0, 630.0, 633.0, 579.0, 630.0, 584.0, 582.0, 582.0, 573.0, 573.0, 627.0, 582.0, 627.0, 639.0, 533.0, 636.0, 576.0, 582.0, 576.0, 630.0, 582.0, 630.0, 630.0, 633.0, 522.0, 582.0, 579.0, 519.0, 630.0, 639.0, 579.0, 573.0, 522.0, 639.0, 584.0, 587.0, 582.0, 519.0, 639.0, 576.0, 582.0, 579.0, 533.0, 579.0, 633.0, 579.0, 630.0, 582.0, 633.0, 582.0, 576.0, 476.0, 525.0, 633.0, 636.0, 579.0, 581.0, 636.0, 627.0, 627.0, 582.0, 630.0, 573.0, 570.0, 582.0, 627.0, 587.0, 587.0, 584.0, 573.0, 570.0, 627.0, 573.0, 584.0, 579.0, 573.0, 516.0, 630.0, 579.0, 627.0, 630.0, 576.0, 630.0, 636.0, 630.0, 579.0, 579.0, 630.0, 636.0, 579.0, 582.0, 573.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [274.0, 253.0, 288.0, 294.0, 265.0, 274.0, 311.0, 322.0, 292.0, 287.0, 286.0, 296.0, 316.0, 311.0, 291.0, 291.0, 314.0, 316.0, 312.0, 321.0, 290.0, 289.0, 314.0, 316.0, 296.0, 288.0, 295.0, 287.0, 283.0, 299.0, 288.0, 285.0, 283.0, 290.0, 316.0, 311.0, 293.0, 289.0, 313.0, 314.0, 324.0, 315.0, 265.0, 268.0, 320.0, 316.0, 282.0, 294.0, 286.0, 296.0, 283.0, 293.0, 311.0, 319.0, 289.0, 293.0, 317.0, 313.0, 308.0, 322.0, 314.0, 319.0, 271.0, 251.0, 294.0, 288.0, 285.0, 294.0, 253.0, 266.0, 309.0, 321.0, 320.0, 319.0, 291.0, 288.0, 285.0, 288.0, 253.0, 269.0, 317.0, 322.0, 299.0, 285.0, 291.0, 296.0, 291.0, 291.0, 261.0, 258.0, 317.0, 322.0, 274.0, 302.0, 296.0, 286.0, 289.0, 290.0, 259.0, 274.0, 293.0, 286.0, 314.0, 319.0, 284.0, 295.0, 314.0, 316.0, 297.0, 285.0, 314.0, 319.0, 294.0, 288.0, 289.0, 287.0, 237.0, 239.0, 263.0, 262.0, 319.0, 314.0, 314.0, 322.0, 278.0, 301.0, 283.0, 298.0, 319.0, 317.0, 313.0, 314.0, 311.0, 316.0, 292.0, 290.0, 315.0, 315.0, 282.0, 291.0, 280.0, 290.0, 292.0, 290.0, 311.0, 316.0, 294.0, 293.0, 293.0, 294.0, 299.0, 285.0, 281.0, 292.0, 286.0, 284.0, 313.0, 314.0, 286.0, 287.0, 288.0, 296.0, 292.0, 287.0, 294.0, 279.0, 255.0, 261.0, 311.0, 319.0, 288.0, 291.0, 308.0, 319.0, 316.0, 314.0, 280.0, 296.0, 311.0, 319.0, 322.0, 314.0, 324.0, 306.0, 284.0, 295.0, 290.0, 289.0, 313.0, 317.0, 319.0, 317.0, 291.0, 288.0, 291.0, 291.0, 277.0, 296.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.694358388660809, "mean_inference_ms": 1.2379814229738246, "mean_action_processing_ms": 0.13324649046471473, "mean_env_wait_ms": 0.8357372653835803, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 476.0, "episode_reward_mean": 591.74, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 295.87}, "hist_stats": {"episode_reward": [527.0, 582.0, 539.0, 633.0, 579.0, 582.0, 627.0, 582.0, 630.0, 633.0, 579.0, 630.0, 584.0, 582.0, 582.0, 573.0, 573.0, 627.0, 582.0, 627.0, 639.0, 533.0, 636.0, 576.0, 582.0, 576.0, 630.0, 582.0, 630.0, 630.0, 633.0, 522.0, 582.0, 579.0, 519.0, 630.0, 639.0, 579.0, 573.0, 522.0, 639.0, 584.0, 587.0, 582.0, 519.0, 639.0, 576.0, 582.0, 579.0, 533.0, 579.0, 633.0, 579.0, 630.0, 582.0, 633.0, 582.0, 576.0, 476.0, 525.0, 633.0, 636.0, 579.0, 581.0, 636.0, 627.0, 627.0, 582.0, 630.0, 573.0, 570.0, 582.0, 627.0, 587.0, 587.0, 584.0, 573.0, 570.0, 627.0, 573.0, 584.0, 579.0, 573.0, 516.0, 630.0, 579.0, 627.0, 630.0, 576.0, 630.0, 636.0, 630.0, 579.0, 579.0, 630.0, 636.0, 579.0, 582.0, 573.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [274.0, 253.0, 288.0, 294.0, 265.0, 274.0, 311.0, 322.0, 292.0, 287.0, 286.0, 296.0, 316.0, 311.0, 291.0, 291.0, 314.0, 316.0, 312.0, 321.0, 290.0, 289.0, 314.0, 316.0, 296.0, 288.0, 295.0, 287.0, 283.0, 299.0, 288.0, 285.0, 283.0, 290.0, 316.0, 311.0, 293.0, 289.0, 313.0, 314.0, 324.0, 315.0, 265.0, 268.0, 320.0, 316.0, 282.0, 294.0, 286.0, 296.0, 283.0, 293.0, 311.0, 319.0, 289.0, 293.0, 317.0, 313.0, 308.0, 322.0, 314.0, 319.0, 271.0, 251.0, 294.0, 288.0, 285.0, 294.0, 253.0, 266.0, 309.0, 321.0, 320.0, 319.0, 291.0, 288.0, 285.0, 288.0, 253.0, 269.0, 317.0, 322.0, 299.0, 285.0, 291.0, 296.0, 291.0, 291.0, 261.0, 258.0, 317.0, 322.0, 274.0, 302.0, 296.0, 286.0, 289.0, 290.0, 259.0, 274.0, 293.0, 286.0, 314.0, 319.0, 284.0, 295.0, 314.0, 316.0, 297.0, 285.0, 314.0, 319.0, 294.0, 288.0, 289.0, 287.0, 237.0, 239.0, 263.0, 262.0, 319.0, 314.0, 314.0, 322.0, 278.0, 301.0, 283.0, 298.0, 319.0, 317.0, 313.0, 314.0, 311.0, 316.0, 292.0, 290.0, 315.0, 315.0, 282.0, 291.0, 280.0, 290.0, 292.0, 290.0, 311.0, 316.0, 294.0, 293.0, 293.0, 294.0, 299.0, 285.0, 281.0, 292.0, 286.0, 284.0, 313.0, 314.0, 286.0, 287.0, 288.0, 296.0, 292.0, 287.0, 294.0, 279.0, 255.0, 261.0, 311.0, 319.0, 288.0, 291.0, 308.0, 319.0, 316.0, 314.0, 280.0, 296.0, 311.0, 319.0, 322.0, 314.0, 324.0, 306.0, 284.0, 295.0, 290.0, 289.0, 313.0, 317.0, 319.0, 317.0, 291.0, 288.0, 291.0, 291.0, 277.0, 296.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.694358388660809, "mean_inference_ms": 1.2379814229738246, "mean_action_processing_ms": 0.13324649046471473, "mean_env_wait_ms": 0.8357372653835803, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11622400, "num_agent_steps_trained": 11622400, "num_env_steps_sampled": 5811200, "num_env_steps_trained": 5811200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5811200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11622400, "timers": {"training_iteration_time_ms": 3599.392, "learn_time_ms": 1095.152, "learn_throughput": 11687.876, "synch_weights_time_ms": 11.457}, "counters": {"num_env_steps_sampled": 5811200, "num_env_steps_trained": 5811200, "num_agent_steps_sampled": 11622400, "num_agent_steps_trained": 11622400}, "done": false, "episodes_total": 14528, "training_iteration": 454, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-57", "timestamp": 1666582197, "time_this_iter_s": 3.6274921894073486, "time_total_s": 1728.1547322273254, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1728.1547322273254, "timesteps_since_restore": 0, "iterations_since_restore": 454, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.666666666666668, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 205.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 182.08, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.97, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.55, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.92, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.27, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.56, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.19, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.14, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.47, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.04, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.37, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.28, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.56, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.19, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.56, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.19, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0014731429982930422, "policy_loss": 0.0010732044465839863, "vf_loss": 7.764483451843262, "vf_explained_var": 0.5941508412361145, "kl": 0.003150323638692498, "entropy": 0.7530198097229004, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5824000, "num_env_steps_trained": 5824000, "num_agent_steps_sampled": 11648000, "num_agent_steps_trained": 11648000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 476.0, "episode_reward_mean": 592.48, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 296.24}, "custom_metrics": {"sparse_reward_mean": 205.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 182.08, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.97, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.55, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.92, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.27, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.56, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.19, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.14, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.47, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.04, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.37, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.28, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.56, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.19, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.56, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.19, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 519.0, 630.0, 639.0, 579.0, 573.0, 522.0, 639.0, 584.0, 587.0, 582.0, 519.0, 639.0, 576.0, 582.0, 579.0, 533.0, 579.0, 633.0, 579.0, 630.0, 582.0, 633.0, 582.0, 576.0, 476.0, 525.0, 633.0, 636.0, 579.0, 581.0, 636.0, 627.0, 627.0, 582.0, 630.0, 573.0, 570.0, 582.0, 627.0, 587.0, 587.0, 584.0, 573.0, 570.0, 627.0, 573.0, 584.0, 579.0, 573.0, 516.0, 630.0, 579.0, 627.0, 630.0, 576.0, 630.0, 636.0, 630.0, 579.0, 579.0, 630.0, 636.0, 579.0, 582.0, 573.0, 582.0, 639.0, 627.0, 630.0, 630.0, 582.0, 582.0, 576.0, 582.0, 584.0, 576.0, 576.0, 627.0, 584.0, 581.0, 630.0, 587.0, 573.0, 525.0, 624.0, 579.0, 633.0, 579.0, 587.0, 579.0, 630.0, 573.0, 630.0, 579.0, 579.0, 576.0, 630.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 285.0, 294.0, 253.0, 266.0, 309.0, 321.0, 320.0, 319.0, 291.0, 288.0, 285.0, 288.0, 253.0, 269.0, 317.0, 322.0, 299.0, 285.0, 291.0, 296.0, 291.0, 291.0, 261.0, 258.0, 317.0, 322.0, 274.0, 302.0, 296.0, 286.0, 289.0, 290.0, 259.0, 274.0, 293.0, 286.0, 314.0, 319.0, 284.0, 295.0, 314.0, 316.0, 297.0, 285.0, 314.0, 319.0, 294.0, 288.0, 289.0, 287.0, 237.0, 239.0, 263.0, 262.0, 319.0, 314.0, 314.0, 322.0, 278.0, 301.0, 283.0, 298.0, 319.0, 317.0, 313.0, 314.0, 311.0, 316.0, 292.0, 290.0, 315.0, 315.0, 282.0, 291.0, 280.0, 290.0, 292.0, 290.0, 311.0, 316.0, 294.0, 293.0, 293.0, 294.0, 299.0, 285.0, 281.0, 292.0, 286.0, 284.0, 313.0, 314.0, 286.0, 287.0, 288.0, 296.0, 292.0, 287.0, 294.0, 279.0, 255.0, 261.0, 311.0, 319.0, 288.0, 291.0, 308.0, 319.0, 316.0, 314.0, 280.0, 296.0, 311.0, 319.0, 322.0, 314.0, 324.0, 306.0, 284.0, 295.0, 290.0, 289.0, 313.0, 317.0, 319.0, 317.0, 291.0, 288.0, 291.0, 291.0, 277.0, 296.0, 291.0, 291.0, 319.0, 320.0, 314.0, 313.0, 314.0, 316.0, 312.0, 318.0, 291.0, 291.0, 289.0, 293.0, 291.0, 285.0, 286.0, 296.0, 301.0, 283.0, 293.0, 283.0, 289.0, 287.0, 313.0, 314.0, 290.0, 294.0, 298.0, 283.0, 311.0, 319.0, 294.0, 293.0, 287.0, 286.0, 260.0, 265.0, 313.0, 311.0, 283.0, 296.0, 317.0, 316.0, 291.0, 288.0, 305.0, 282.0, 285.0, 294.0, 320.0, 310.0, 283.0, 290.0, 316.0, 314.0, 293.0, 286.0, 293.0, 286.0, 288.0, 288.0, 311.0, 319.0, 311.0, 316.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6943095786537452, "mean_inference_ms": 1.2378769399875804, "mean_action_processing_ms": 0.13324093695251057, "mean_env_wait_ms": 0.8356776468016364, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 476.0, "episode_reward_mean": 592.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 296.24}, "hist_stats": {"episode_reward": [582.0, 579.0, 519.0, 630.0, 639.0, 579.0, 573.0, 522.0, 639.0, 584.0, 587.0, 582.0, 519.0, 639.0, 576.0, 582.0, 579.0, 533.0, 579.0, 633.0, 579.0, 630.0, 582.0, 633.0, 582.0, 576.0, 476.0, 525.0, 633.0, 636.0, 579.0, 581.0, 636.0, 627.0, 627.0, 582.0, 630.0, 573.0, 570.0, 582.0, 627.0, 587.0, 587.0, 584.0, 573.0, 570.0, 627.0, 573.0, 584.0, 579.0, 573.0, 516.0, 630.0, 579.0, 627.0, 630.0, 576.0, 630.0, 636.0, 630.0, 579.0, 579.0, 630.0, 636.0, 579.0, 582.0, 573.0, 582.0, 639.0, 627.0, 630.0, 630.0, 582.0, 582.0, 576.0, 582.0, 584.0, 576.0, 576.0, 627.0, 584.0, 581.0, 630.0, 587.0, 573.0, 525.0, 624.0, 579.0, 633.0, 579.0, 587.0, 579.0, 630.0, 573.0, 630.0, 579.0, 579.0, 576.0, 630.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 285.0, 294.0, 253.0, 266.0, 309.0, 321.0, 320.0, 319.0, 291.0, 288.0, 285.0, 288.0, 253.0, 269.0, 317.0, 322.0, 299.0, 285.0, 291.0, 296.0, 291.0, 291.0, 261.0, 258.0, 317.0, 322.0, 274.0, 302.0, 296.0, 286.0, 289.0, 290.0, 259.0, 274.0, 293.0, 286.0, 314.0, 319.0, 284.0, 295.0, 314.0, 316.0, 297.0, 285.0, 314.0, 319.0, 294.0, 288.0, 289.0, 287.0, 237.0, 239.0, 263.0, 262.0, 319.0, 314.0, 314.0, 322.0, 278.0, 301.0, 283.0, 298.0, 319.0, 317.0, 313.0, 314.0, 311.0, 316.0, 292.0, 290.0, 315.0, 315.0, 282.0, 291.0, 280.0, 290.0, 292.0, 290.0, 311.0, 316.0, 294.0, 293.0, 293.0, 294.0, 299.0, 285.0, 281.0, 292.0, 286.0, 284.0, 313.0, 314.0, 286.0, 287.0, 288.0, 296.0, 292.0, 287.0, 294.0, 279.0, 255.0, 261.0, 311.0, 319.0, 288.0, 291.0, 308.0, 319.0, 316.0, 314.0, 280.0, 296.0, 311.0, 319.0, 322.0, 314.0, 324.0, 306.0, 284.0, 295.0, 290.0, 289.0, 313.0, 317.0, 319.0, 317.0, 291.0, 288.0, 291.0, 291.0, 277.0, 296.0, 291.0, 291.0, 319.0, 320.0, 314.0, 313.0, 314.0, 316.0, 312.0, 318.0, 291.0, 291.0, 289.0, 293.0, 291.0, 285.0, 286.0, 296.0, 301.0, 283.0, 293.0, 283.0, 289.0, 287.0, 313.0, 314.0, 290.0, 294.0, 298.0, 283.0, 311.0, 319.0, 294.0, 293.0, 287.0, 286.0, 260.0, 265.0, 313.0, 311.0, 283.0, 296.0, 317.0, 316.0, 291.0, 288.0, 305.0, 282.0, 285.0, 294.0, 320.0, 310.0, 283.0, 290.0, 316.0, 314.0, 293.0, 286.0, 293.0, 286.0, 288.0, 288.0, 311.0, 319.0, 311.0, 316.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6943095786537452, "mean_inference_ms": 1.2378769399875804, "mean_action_processing_ms": 0.13324093695251057, "mean_env_wait_ms": 0.8356776468016364, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11648000, "num_agent_steps_trained": 11648000, "num_env_steps_sampled": 5824000, "num_env_steps_trained": 5824000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5824000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11648000, "timers": {"training_iteration_time_ms": 3584.012, "learn_time_ms": 1084.705, "learn_throughput": 11800.44, "synch_weights_time_ms": 11.283}, "counters": {"num_env_steps_sampled": 5824000, "num_env_steps_trained": 5824000, "num_agent_steps_sampled": 11648000, "num_agent_steps_trained": 11648000}, "done": false, "episodes_total": 14560, "training_iteration": 455, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-01", "timestamp": 1666582201, "time_this_iter_s": 3.6407976150512695, "time_total_s": 1731.7955298423767, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1731.7955298423767, "timesteps_since_restore": 0, "iterations_since_restore": 455, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.120000000000005, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 206.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.18, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.0, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.62, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.95, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.39, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.59, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.29, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.4, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.34, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.1, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.59, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.29, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.59, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.29, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00027783436235040426, "policy_loss": -0.00011880556121468544, "vf_loss": 7.770425319671631, "vf_explained_var": 0.602925181388855, "kl": 0.0028033058624714613, "entropy": 0.7608031034469604, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5836800, "num_env_steps_trained": 5836800, "num_agent_steps_sampled": 11673600, "num_agent_steps_trained": 11673600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 596.38, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 255.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 298.19}, "custom_metrics": {"sparse_reward_mean": 206.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.18, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.0, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.62, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.95, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.39, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.59, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.29, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.4, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.34, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.1, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.59, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.29, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.59, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.29, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 627.0, 627.0, 582.0, 630.0, 573.0, 570.0, 582.0, 627.0, 587.0, 587.0, 584.0, 573.0, 570.0, 627.0, 573.0, 584.0, 579.0, 573.0, 516.0, 630.0, 579.0, 627.0, 630.0, 576.0, 630.0, 636.0, 630.0, 579.0, 579.0, 630.0, 636.0, 579.0, 582.0, 573.0, 582.0, 639.0, 627.0, 630.0, 630.0, 582.0, 582.0, 576.0, 582.0, 584.0, 576.0, 576.0, 627.0, 584.0, 581.0, 630.0, 587.0, 573.0, 525.0, 624.0, 579.0, 633.0, 579.0, 587.0, 579.0, 630.0, 573.0, 630.0, 579.0, 579.0, 576.0, 630.0, 627.0, 630.0, 630.0, 573.0, 582.0, 576.0, 587.0, 582.0, 630.0, 582.0, 582.0, 627.0, 636.0, 587.0, 573.0, 624.0, 582.0, 530.0, 573.0, 536.0, 633.0, 630.0, 587.0, 587.0, 587.0, 582.0, 573.0, 582.0, 582.0, 636.0, 587.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 313.0, 314.0, 311.0, 316.0, 292.0, 290.0, 315.0, 315.0, 282.0, 291.0, 280.0, 290.0, 292.0, 290.0, 311.0, 316.0, 294.0, 293.0, 293.0, 294.0, 299.0, 285.0, 281.0, 292.0, 286.0, 284.0, 313.0, 314.0, 286.0, 287.0, 288.0, 296.0, 292.0, 287.0, 294.0, 279.0, 255.0, 261.0, 311.0, 319.0, 288.0, 291.0, 308.0, 319.0, 316.0, 314.0, 280.0, 296.0, 311.0, 319.0, 322.0, 314.0, 324.0, 306.0, 284.0, 295.0, 290.0, 289.0, 313.0, 317.0, 319.0, 317.0, 291.0, 288.0, 291.0, 291.0, 277.0, 296.0, 291.0, 291.0, 319.0, 320.0, 314.0, 313.0, 314.0, 316.0, 312.0, 318.0, 291.0, 291.0, 289.0, 293.0, 291.0, 285.0, 286.0, 296.0, 301.0, 283.0, 293.0, 283.0, 289.0, 287.0, 313.0, 314.0, 290.0, 294.0, 298.0, 283.0, 311.0, 319.0, 294.0, 293.0, 287.0, 286.0, 260.0, 265.0, 313.0, 311.0, 283.0, 296.0, 317.0, 316.0, 291.0, 288.0, 305.0, 282.0, 285.0, 294.0, 320.0, 310.0, 283.0, 290.0, 316.0, 314.0, 293.0, 286.0, 293.0, 286.0, 288.0, 288.0, 311.0, 319.0, 311.0, 316.0, 304.0, 326.0, 314.0, 316.0, 283.0, 290.0, 286.0, 296.0, 293.0, 283.0, 296.0, 291.0, 291.0, 291.0, 314.0, 316.0, 291.0, 291.0, 290.0, 292.0, 313.0, 314.0, 317.0, 319.0, 296.0, 291.0, 285.0, 288.0, 318.0, 306.0, 292.0, 290.0, 268.0, 262.0, 288.0, 285.0, 265.0, 271.0, 315.0, 318.0, 316.0, 314.0, 290.0, 297.0, 295.0, 292.0, 294.0, 293.0, 297.0, 285.0, 290.0, 283.0, 291.0, 291.0, 291.0, 291.0, 317.0, 319.0, 294.0, 293.0, 317.0, 316.0, 317.0, 319.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6942631112972455, "mean_inference_ms": 1.2377770538170634, "mean_action_processing_ms": 0.13323589639973, "mean_env_wait_ms": 0.8356181316431414, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 596.38, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 255.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 298.19}, "hist_stats": {"episode_reward": [636.0, 627.0, 627.0, 582.0, 630.0, 573.0, 570.0, 582.0, 627.0, 587.0, 587.0, 584.0, 573.0, 570.0, 627.0, 573.0, 584.0, 579.0, 573.0, 516.0, 630.0, 579.0, 627.0, 630.0, 576.0, 630.0, 636.0, 630.0, 579.0, 579.0, 630.0, 636.0, 579.0, 582.0, 573.0, 582.0, 639.0, 627.0, 630.0, 630.0, 582.0, 582.0, 576.0, 582.0, 584.0, 576.0, 576.0, 627.0, 584.0, 581.0, 630.0, 587.0, 573.0, 525.0, 624.0, 579.0, 633.0, 579.0, 587.0, 579.0, 630.0, 573.0, 630.0, 579.0, 579.0, 576.0, 630.0, 627.0, 630.0, 630.0, 573.0, 582.0, 576.0, 587.0, 582.0, 630.0, 582.0, 582.0, 627.0, 636.0, 587.0, 573.0, 624.0, 582.0, 530.0, 573.0, 536.0, 633.0, 630.0, 587.0, 587.0, 587.0, 582.0, 573.0, 582.0, 582.0, 636.0, 587.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 313.0, 314.0, 311.0, 316.0, 292.0, 290.0, 315.0, 315.0, 282.0, 291.0, 280.0, 290.0, 292.0, 290.0, 311.0, 316.0, 294.0, 293.0, 293.0, 294.0, 299.0, 285.0, 281.0, 292.0, 286.0, 284.0, 313.0, 314.0, 286.0, 287.0, 288.0, 296.0, 292.0, 287.0, 294.0, 279.0, 255.0, 261.0, 311.0, 319.0, 288.0, 291.0, 308.0, 319.0, 316.0, 314.0, 280.0, 296.0, 311.0, 319.0, 322.0, 314.0, 324.0, 306.0, 284.0, 295.0, 290.0, 289.0, 313.0, 317.0, 319.0, 317.0, 291.0, 288.0, 291.0, 291.0, 277.0, 296.0, 291.0, 291.0, 319.0, 320.0, 314.0, 313.0, 314.0, 316.0, 312.0, 318.0, 291.0, 291.0, 289.0, 293.0, 291.0, 285.0, 286.0, 296.0, 301.0, 283.0, 293.0, 283.0, 289.0, 287.0, 313.0, 314.0, 290.0, 294.0, 298.0, 283.0, 311.0, 319.0, 294.0, 293.0, 287.0, 286.0, 260.0, 265.0, 313.0, 311.0, 283.0, 296.0, 317.0, 316.0, 291.0, 288.0, 305.0, 282.0, 285.0, 294.0, 320.0, 310.0, 283.0, 290.0, 316.0, 314.0, 293.0, 286.0, 293.0, 286.0, 288.0, 288.0, 311.0, 319.0, 311.0, 316.0, 304.0, 326.0, 314.0, 316.0, 283.0, 290.0, 286.0, 296.0, 293.0, 283.0, 296.0, 291.0, 291.0, 291.0, 314.0, 316.0, 291.0, 291.0, 290.0, 292.0, 313.0, 314.0, 317.0, 319.0, 296.0, 291.0, 285.0, 288.0, 318.0, 306.0, 292.0, 290.0, 268.0, 262.0, 288.0, 285.0, 265.0, 271.0, 315.0, 318.0, 316.0, 314.0, 290.0, 297.0, 295.0, 292.0, 294.0, 293.0, 297.0, 285.0, 290.0, 283.0, 291.0, 291.0, 291.0, 291.0, 317.0, 319.0, 294.0, 293.0, 317.0, 316.0, 317.0, 319.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6942631112972455, "mean_inference_ms": 1.2377770538170634, "mean_action_processing_ms": 0.13323589639973, "mean_env_wait_ms": 0.8356181316431414, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11673600, "num_agent_steps_trained": 11673600, "num_env_steps_sampled": 5836800, "num_env_steps_trained": 5836800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5836800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11673600, "timers": {"training_iteration_time_ms": 3584.814, "learn_time_ms": 1086.882, "learn_throughput": 11776.804, "synch_weights_time_ms": 11.306}, "counters": {"num_env_steps_sampled": 5836800, "num_env_steps_trained": 5836800, "num_agent_steps_sampled": 11673600, "num_agent_steps_trained": 11673600}, "done": false, "episodes_total": 14592, "training_iteration": 456, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-04", "timestamp": 1666582204, "time_this_iter_s": 3.611459255218506, "time_total_s": 1735.4069890975952, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1735.4069890975952, "timesteps_since_restore": 0, "iterations_since_restore": 456, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.34, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 205.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.45, "shaped_reward_min": 165, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.08, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.54, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 17.0, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.33, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.75, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.25, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.48, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.37, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.31, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.75, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.25, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.75, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.25, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0005887583829462528, "policy_loss": -0.0009819632396101952, "vf_loss": 7.737387657165527, "vf_explained_var": 0.621029794216156, "kl": 0.0028006762731820345, "entropy": 0.7610688209533691, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5849600, "num_env_steps_trained": 5849600, "num_agent_steps_sampled": 11699200, "num_agent_steps_trained": 11699200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 594.65, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 259.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 297.325}, "custom_metrics": {"sparse_reward_mean": 205.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.45, "shaped_reward_min": 165, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.08, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.54, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 17.0, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.33, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.75, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.25, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.48, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.37, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.31, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.75, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.25, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.75, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.25, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 573.0, 582.0, 639.0, 627.0, 630.0, 630.0, 582.0, 582.0, 576.0, 582.0, 584.0, 576.0, 576.0, 627.0, 584.0, 581.0, 630.0, 587.0, 573.0, 525.0, 624.0, 579.0, 633.0, 579.0, 587.0, 579.0, 630.0, 573.0, 630.0, 579.0, 579.0, 576.0, 630.0, 627.0, 630.0, 630.0, 573.0, 582.0, 576.0, 587.0, 582.0, 630.0, 582.0, 582.0, 627.0, 636.0, 587.0, 573.0, 624.0, 582.0, 530.0, 573.0, 536.0, 633.0, 630.0, 587.0, 587.0, 587.0, 582.0, 573.0, 582.0, 582.0, 636.0, 587.0, 633.0, 636.0, 582.0, 627.0, 579.0, 576.0, 639.0, 579.0, 630.0, 582.0, 579.0, 579.0, 582.0, 582.0, 573.0, 627.0, 582.0, 630.0, 587.0, 584.0, 587.0, 636.0, 576.0, 636.0, 573.0, 570.0, 639.0, 527.0, 590.0, 587.0, 639.0, 579.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 291.0, 291.0, 277.0, 296.0, 291.0, 291.0, 319.0, 320.0, 314.0, 313.0, 314.0, 316.0, 312.0, 318.0, 291.0, 291.0, 289.0, 293.0, 291.0, 285.0, 286.0, 296.0, 301.0, 283.0, 293.0, 283.0, 289.0, 287.0, 313.0, 314.0, 290.0, 294.0, 298.0, 283.0, 311.0, 319.0, 294.0, 293.0, 287.0, 286.0, 260.0, 265.0, 313.0, 311.0, 283.0, 296.0, 317.0, 316.0, 291.0, 288.0, 305.0, 282.0, 285.0, 294.0, 320.0, 310.0, 283.0, 290.0, 316.0, 314.0, 293.0, 286.0, 293.0, 286.0, 288.0, 288.0, 311.0, 319.0, 311.0, 316.0, 304.0, 326.0, 314.0, 316.0, 283.0, 290.0, 286.0, 296.0, 293.0, 283.0, 296.0, 291.0, 291.0, 291.0, 314.0, 316.0, 291.0, 291.0, 290.0, 292.0, 313.0, 314.0, 317.0, 319.0, 296.0, 291.0, 285.0, 288.0, 318.0, 306.0, 292.0, 290.0, 268.0, 262.0, 288.0, 285.0, 265.0, 271.0, 315.0, 318.0, 316.0, 314.0, 290.0, 297.0, 295.0, 292.0, 294.0, 293.0, 297.0, 285.0, 290.0, 283.0, 291.0, 291.0, 291.0, 291.0, 317.0, 319.0, 294.0, 293.0, 317.0, 316.0, 317.0, 319.0, 292.0, 290.0, 309.0, 318.0, 286.0, 293.0, 291.0, 285.0, 317.0, 322.0, 293.0, 286.0, 313.0, 317.0, 292.0, 290.0, 283.0, 296.0, 283.0, 296.0, 293.0, 289.0, 286.0, 296.0, 286.0, 287.0, 311.0, 316.0, 288.0, 294.0, 315.0, 315.0, 284.0, 303.0, 293.0, 291.0, 298.0, 289.0, 320.0, 316.0, 283.0, 293.0, 322.0, 314.0, 290.0, 283.0, 277.0, 293.0, 320.0, 319.0, 268.0, 259.0, 296.0, 294.0, 288.0, 299.0, 320.0, 319.0, 286.0, 293.0, 291.0, 285.0, 288.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6942176481103501, "mean_inference_ms": 1.237682546407229, "mean_action_processing_ms": 0.13323101112203006, "mean_env_wait_ms": 0.8355603487641339, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 594.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 259.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 297.325}, "hist_stats": {"episode_reward": [579.0, 582.0, 573.0, 582.0, 639.0, 627.0, 630.0, 630.0, 582.0, 582.0, 576.0, 582.0, 584.0, 576.0, 576.0, 627.0, 584.0, 581.0, 630.0, 587.0, 573.0, 525.0, 624.0, 579.0, 633.0, 579.0, 587.0, 579.0, 630.0, 573.0, 630.0, 579.0, 579.0, 576.0, 630.0, 627.0, 630.0, 630.0, 573.0, 582.0, 576.0, 587.0, 582.0, 630.0, 582.0, 582.0, 627.0, 636.0, 587.0, 573.0, 624.0, 582.0, 530.0, 573.0, 536.0, 633.0, 630.0, 587.0, 587.0, 587.0, 582.0, 573.0, 582.0, 582.0, 636.0, 587.0, 633.0, 636.0, 582.0, 627.0, 579.0, 576.0, 639.0, 579.0, 630.0, 582.0, 579.0, 579.0, 582.0, 582.0, 573.0, 627.0, 582.0, 630.0, 587.0, 584.0, 587.0, 636.0, 576.0, 636.0, 573.0, 570.0, 639.0, 527.0, 590.0, 587.0, 639.0, 579.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 291.0, 291.0, 277.0, 296.0, 291.0, 291.0, 319.0, 320.0, 314.0, 313.0, 314.0, 316.0, 312.0, 318.0, 291.0, 291.0, 289.0, 293.0, 291.0, 285.0, 286.0, 296.0, 301.0, 283.0, 293.0, 283.0, 289.0, 287.0, 313.0, 314.0, 290.0, 294.0, 298.0, 283.0, 311.0, 319.0, 294.0, 293.0, 287.0, 286.0, 260.0, 265.0, 313.0, 311.0, 283.0, 296.0, 317.0, 316.0, 291.0, 288.0, 305.0, 282.0, 285.0, 294.0, 320.0, 310.0, 283.0, 290.0, 316.0, 314.0, 293.0, 286.0, 293.0, 286.0, 288.0, 288.0, 311.0, 319.0, 311.0, 316.0, 304.0, 326.0, 314.0, 316.0, 283.0, 290.0, 286.0, 296.0, 293.0, 283.0, 296.0, 291.0, 291.0, 291.0, 314.0, 316.0, 291.0, 291.0, 290.0, 292.0, 313.0, 314.0, 317.0, 319.0, 296.0, 291.0, 285.0, 288.0, 318.0, 306.0, 292.0, 290.0, 268.0, 262.0, 288.0, 285.0, 265.0, 271.0, 315.0, 318.0, 316.0, 314.0, 290.0, 297.0, 295.0, 292.0, 294.0, 293.0, 297.0, 285.0, 290.0, 283.0, 291.0, 291.0, 291.0, 291.0, 317.0, 319.0, 294.0, 293.0, 317.0, 316.0, 317.0, 319.0, 292.0, 290.0, 309.0, 318.0, 286.0, 293.0, 291.0, 285.0, 317.0, 322.0, 293.0, 286.0, 313.0, 317.0, 292.0, 290.0, 283.0, 296.0, 283.0, 296.0, 293.0, 289.0, 286.0, 296.0, 286.0, 287.0, 311.0, 316.0, 288.0, 294.0, 315.0, 315.0, 284.0, 303.0, 293.0, 291.0, 298.0, 289.0, 320.0, 316.0, 283.0, 293.0, 322.0, 314.0, 290.0, 283.0, 277.0, 293.0, 320.0, 319.0, 268.0, 259.0, 296.0, 294.0, 288.0, 299.0, 320.0, 319.0, 286.0, 293.0, 291.0, 285.0, 288.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6942176481103501, "mean_inference_ms": 1.237682546407229, "mean_action_processing_ms": 0.13323101112203006, "mean_env_wait_ms": 0.8355603487641339, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11699200, "num_agent_steps_trained": 11699200, "num_env_steps_sampled": 5849600, "num_env_steps_trained": 5849600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5849600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11699200, "timers": {"training_iteration_time_ms": 3577.701, "learn_time_ms": 1083.327, "learn_throughput": 11815.452, "synch_weights_time_ms": 11.746}, "counters": {"num_env_steps_sampled": 5849600, "num_env_steps_trained": 5849600, "num_agent_steps_sampled": 11699200, "num_agent_steps_trained": 11699200}, "done": false, "episodes_total": 14624, "training_iteration": 457, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-08", "timestamp": 1666582208, "time_this_iter_s": 3.549762010574341, "time_total_s": 1738.9567511081696, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1738.9567511081696, "timesteps_since_restore": 0, "iterations_since_restore": 457, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.416666666666668, "ram_util_percent": 10.633333333333333}}
+{"custom_metrics": {"sparse_reward_mean": 203.8, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 182.24, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.86, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.66, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.71, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.42, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.5, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.33, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.23, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.42, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.5, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.33, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.5, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.33, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0004856521845795214, "policy_loss": -0.0008793252054601908, "vf_loss": 7.77410888671875, "vf_explained_var": 0.6171582937240601, "kl": 0.0027221820782870054, "entropy": 0.7674758434295654, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5862400, "num_env_steps_trained": 5862400, "num_agent_steps_sampled": 11724800, "num_agent_steps_trained": 11724800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 589.84, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 61.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 294.92}, "custom_metrics": {"sparse_reward_mean": 203.8, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 182.24, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.86, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.66, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.71, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.42, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.5, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.33, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.23, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.42, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.5, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.33, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.5, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.33, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 630.0, 627.0, 630.0, 630.0, 573.0, 582.0, 576.0, 587.0, 582.0, 630.0, 582.0, 582.0, 627.0, 636.0, 587.0, 573.0, 624.0, 582.0, 530.0, 573.0, 536.0, 633.0, 630.0, 587.0, 587.0, 587.0, 582.0, 573.0, 582.0, 582.0, 636.0, 587.0, 633.0, 636.0, 582.0, 627.0, 579.0, 576.0, 639.0, 579.0, 630.0, 582.0, 579.0, 579.0, 582.0, 582.0, 573.0, 627.0, 582.0, 630.0, 587.0, 584.0, 587.0, 636.0, 576.0, 636.0, 573.0, 570.0, 639.0, 527.0, 590.0, 587.0, 639.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 576.0, 582.0, 579.0, 579.0, 576.0, 630.0, 627.0, 633.0, 587.0, 630.0, 582.0, 587.0, 579.0, 630.0, 582.0, 579.0, 627.0, 582.0, 579.0, 636.0, 630.0, 627.0, 582.0, 582.0, 530.0, 123.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 288.0, 288.0, 311.0, 319.0, 311.0, 316.0, 304.0, 326.0, 314.0, 316.0, 283.0, 290.0, 286.0, 296.0, 293.0, 283.0, 296.0, 291.0, 291.0, 291.0, 314.0, 316.0, 291.0, 291.0, 290.0, 292.0, 313.0, 314.0, 317.0, 319.0, 296.0, 291.0, 285.0, 288.0, 318.0, 306.0, 292.0, 290.0, 268.0, 262.0, 288.0, 285.0, 265.0, 271.0, 315.0, 318.0, 316.0, 314.0, 290.0, 297.0, 295.0, 292.0, 294.0, 293.0, 297.0, 285.0, 290.0, 283.0, 291.0, 291.0, 291.0, 291.0, 317.0, 319.0, 294.0, 293.0, 317.0, 316.0, 317.0, 319.0, 292.0, 290.0, 309.0, 318.0, 286.0, 293.0, 291.0, 285.0, 317.0, 322.0, 293.0, 286.0, 313.0, 317.0, 292.0, 290.0, 283.0, 296.0, 283.0, 296.0, 293.0, 289.0, 286.0, 296.0, 286.0, 287.0, 311.0, 316.0, 288.0, 294.0, 315.0, 315.0, 284.0, 303.0, 293.0, 291.0, 298.0, 289.0, 320.0, 316.0, 283.0, 293.0, 322.0, 314.0, 290.0, 283.0, 277.0, 293.0, 320.0, 319.0, 268.0, 259.0, 296.0, 294.0, 288.0, 299.0, 320.0, 319.0, 286.0, 293.0, 291.0, 285.0, 288.0, 294.0, 295.0, 287.0, 288.0, 294.0, 289.0, 293.0, 283.0, 296.0, 280.0, 296.0, 285.0, 297.0, 285.0, 291.0, 289.0, 293.0, 296.0, 283.0, 285.0, 294.0, 294.0, 282.0, 316.0, 314.0, 319.0, 308.0, 314.0, 319.0, 298.0, 289.0, 316.0, 314.0, 292.0, 290.0, 290.0, 297.0, 290.0, 289.0, 316.0, 314.0, 295.0, 287.0, 293.0, 286.0, 310.0, 317.0, 293.0, 289.0, 290.0, 289.0, 314.0, 322.0, 313.0, 317.0, 316.0, 311.0, 292.0, 290.0, 291.0, 291.0, 267.0, 263.0, 62.0, 61.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.694177385931617, "mean_inference_ms": 1.2375913086644352, "mean_action_processing_ms": 0.1332256740537696, "mean_env_wait_ms": 0.8355032005845738, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 589.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 61.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 294.92}, "hist_stats": {"episode_reward": [579.0, 576.0, 630.0, 627.0, 630.0, 630.0, 573.0, 582.0, 576.0, 587.0, 582.0, 630.0, 582.0, 582.0, 627.0, 636.0, 587.0, 573.0, 624.0, 582.0, 530.0, 573.0, 536.0, 633.0, 630.0, 587.0, 587.0, 587.0, 582.0, 573.0, 582.0, 582.0, 636.0, 587.0, 633.0, 636.0, 582.0, 627.0, 579.0, 576.0, 639.0, 579.0, 630.0, 582.0, 579.0, 579.0, 582.0, 582.0, 573.0, 627.0, 582.0, 630.0, 587.0, 584.0, 587.0, 636.0, 576.0, 636.0, 573.0, 570.0, 639.0, 527.0, 590.0, 587.0, 639.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 576.0, 582.0, 579.0, 579.0, 576.0, 630.0, 627.0, 633.0, 587.0, 630.0, 582.0, 587.0, 579.0, 630.0, 582.0, 579.0, 627.0, 582.0, 579.0, 636.0, 630.0, 627.0, 582.0, 582.0, 530.0, 123.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 288.0, 288.0, 311.0, 319.0, 311.0, 316.0, 304.0, 326.0, 314.0, 316.0, 283.0, 290.0, 286.0, 296.0, 293.0, 283.0, 296.0, 291.0, 291.0, 291.0, 314.0, 316.0, 291.0, 291.0, 290.0, 292.0, 313.0, 314.0, 317.0, 319.0, 296.0, 291.0, 285.0, 288.0, 318.0, 306.0, 292.0, 290.0, 268.0, 262.0, 288.0, 285.0, 265.0, 271.0, 315.0, 318.0, 316.0, 314.0, 290.0, 297.0, 295.0, 292.0, 294.0, 293.0, 297.0, 285.0, 290.0, 283.0, 291.0, 291.0, 291.0, 291.0, 317.0, 319.0, 294.0, 293.0, 317.0, 316.0, 317.0, 319.0, 292.0, 290.0, 309.0, 318.0, 286.0, 293.0, 291.0, 285.0, 317.0, 322.0, 293.0, 286.0, 313.0, 317.0, 292.0, 290.0, 283.0, 296.0, 283.0, 296.0, 293.0, 289.0, 286.0, 296.0, 286.0, 287.0, 311.0, 316.0, 288.0, 294.0, 315.0, 315.0, 284.0, 303.0, 293.0, 291.0, 298.0, 289.0, 320.0, 316.0, 283.0, 293.0, 322.0, 314.0, 290.0, 283.0, 277.0, 293.0, 320.0, 319.0, 268.0, 259.0, 296.0, 294.0, 288.0, 299.0, 320.0, 319.0, 286.0, 293.0, 291.0, 285.0, 288.0, 294.0, 295.0, 287.0, 288.0, 294.0, 289.0, 293.0, 283.0, 296.0, 280.0, 296.0, 285.0, 297.0, 285.0, 291.0, 289.0, 293.0, 296.0, 283.0, 285.0, 294.0, 294.0, 282.0, 316.0, 314.0, 319.0, 308.0, 314.0, 319.0, 298.0, 289.0, 316.0, 314.0, 292.0, 290.0, 290.0, 297.0, 290.0, 289.0, 316.0, 314.0, 295.0, 287.0, 293.0, 286.0, 310.0, 317.0, 293.0, 289.0, 290.0, 289.0, 314.0, 322.0, 313.0, 317.0, 316.0, 311.0, 292.0, 290.0, 291.0, 291.0, 267.0, 263.0, 62.0, 61.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.694177385931617, "mean_inference_ms": 1.2375913086644352, "mean_action_processing_ms": 0.1332256740537696, "mean_env_wait_ms": 0.8355032005845738, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11724800, "num_agent_steps_trained": 11724800, "num_env_steps_sampled": 5862400, "num_env_steps_trained": 5862400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5862400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11724800, "timers": {"training_iteration_time_ms": 3563.501, "learn_time_ms": 1067.366, "learn_throughput": 11992.141, "synch_weights_time_ms": 11.841}, "counters": {"num_env_steps_sampled": 5862400, "num_env_steps_trained": 5862400, "num_agent_steps_sampled": 11724800, "num_agent_steps_trained": 11724800}, "done": false, "episodes_total": 14656, "training_iteration": 458, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-12", "timestamp": 1666582212, "time_this_iter_s": 3.5484938621520996, "time_total_s": 1742.5052449703217, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1742.5052449703217, "timesteps_since_restore": 0, "iterations_since_restore": 458, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.54, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 203.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 181.62, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.84, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.64, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.66, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.37, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.48, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.33, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.49, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.38, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.03, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.48, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.33, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.48, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.33, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008513854118064046, "policy_loss": 0.0004615961806848645, "vf_loss": 7.743380069732666, "vf_explained_var": 0.6081850528717041, "kl": 0.0025588269345462322, "entropy": 0.7690985202789307, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5875200, "num_env_steps_trained": 5875200, "num_agent_steps_sampled": 11750400, "num_agent_steps_trained": 11750400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 588.02, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 61.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 294.01}, "custom_metrics": {"sparse_reward_mean": 203.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 181.62, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.84, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.64, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.66, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.37, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.48, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.33, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.49, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.38, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.03, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.48, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.33, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.48, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.33, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 587.0, 633.0, 636.0, 582.0, 627.0, 579.0, 576.0, 639.0, 579.0, 630.0, 582.0, 579.0, 579.0, 582.0, 582.0, 573.0, 627.0, 582.0, 630.0, 587.0, 584.0, 587.0, 636.0, 576.0, 636.0, 573.0, 570.0, 639.0, 527.0, 590.0, 587.0, 639.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 576.0, 582.0, 579.0, 579.0, 576.0, 630.0, 627.0, 633.0, 587.0, 630.0, 582.0, 587.0, 579.0, 630.0, 582.0, 579.0, 627.0, 582.0, 579.0, 636.0, 630.0, 627.0, 582.0, 582.0, 530.0, 123.0, 633.0, 576.0, 636.0, 479.0, 579.0, 582.0, 633.0, 630.0, 579.0, 579.0, 636.0, 576.0, 582.0, 576.0, 527.0, 582.0, 576.0, 582.0, 579.0, 576.0, 627.0, 513.0, 630.0, 582.0, 582.0, 573.0, 587.0, 582.0, 579.0, 630.0, 576.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 319.0, 294.0, 293.0, 317.0, 316.0, 317.0, 319.0, 292.0, 290.0, 309.0, 318.0, 286.0, 293.0, 291.0, 285.0, 317.0, 322.0, 293.0, 286.0, 313.0, 317.0, 292.0, 290.0, 283.0, 296.0, 283.0, 296.0, 293.0, 289.0, 286.0, 296.0, 286.0, 287.0, 311.0, 316.0, 288.0, 294.0, 315.0, 315.0, 284.0, 303.0, 293.0, 291.0, 298.0, 289.0, 320.0, 316.0, 283.0, 293.0, 322.0, 314.0, 290.0, 283.0, 277.0, 293.0, 320.0, 319.0, 268.0, 259.0, 296.0, 294.0, 288.0, 299.0, 320.0, 319.0, 286.0, 293.0, 291.0, 285.0, 288.0, 294.0, 295.0, 287.0, 288.0, 294.0, 289.0, 293.0, 283.0, 296.0, 280.0, 296.0, 285.0, 297.0, 285.0, 291.0, 289.0, 293.0, 296.0, 283.0, 285.0, 294.0, 294.0, 282.0, 316.0, 314.0, 319.0, 308.0, 314.0, 319.0, 298.0, 289.0, 316.0, 314.0, 292.0, 290.0, 290.0, 297.0, 290.0, 289.0, 316.0, 314.0, 295.0, 287.0, 293.0, 286.0, 310.0, 317.0, 293.0, 289.0, 290.0, 289.0, 314.0, 322.0, 313.0, 317.0, 316.0, 311.0, 292.0, 290.0, 291.0, 291.0, 267.0, 263.0, 62.0, 61.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 248.0, 231.0, 291.0, 288.0, 290.0, 292.0, 317.0, 316.0, 316.0, 314.0, 286.0, 293.0, 288.0, 291.0, 317.0, 319.0, 286.0, 290.0, 292.0, 290.0, 285.0, 291.0, 254.0, 273.0, 294.0, 288.0, 285.0, 291.0, 288.0, 294.0, 291.0, 288.0, 293.0, 283.0, 314.0, 313.0, 265.0, 248.0, 316.0, 314.0, 287.0, 295.0, 290.0, 292.0, 294.0, 279.0, 299.0, 288.0, 293.0, 289.0, 293.0, 286.0, 314.0, 316.0, 289.0, 287.0, 309.0, 327.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6941295586287947, "mean_inference_ms": 1.2374918339988819, "mean_action_processing_ms": 0.13322028825706636, "mean_env_wait_ms": 0.8354410346542884, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 588.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 61.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 294.01}, "hist_stats": {"episode_reward": [636.0, 587.0, 633.0, 636.0, 582.0, 627.0, 579.0, 576.0, 639.0, 579.0, 630.0, 582.0, 579.0, 579.0, 582.0, 582.0, 573.0, 627.0, 582.0, 630.0, 587.0, 584.0, 587.0, 636.0, 576.0, 636.0, 573.0, 570.0, 639.0, 527.0, 590.0, 587.0, 639.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 576.0, 582.0, 579.0, 579.0, 576.0, 630.0, 627.0, 633.0, 587.0, 630.0, 582.0, 587.0, 579.0, 630.0, 582.0, 579.0, 627.0, 582.0, 579.0, 636.0, 630.0, 627.0, 582.0, 582.0, 530.0, 123.0, 633.0, 576.0, 636.0, 479.0, 579.0, 582.0, 633.0, 630.0, 579.0, 579.0, 636.0, 576.0, 582.0, 576.0, 527.0, 582.0, 576.0, 582.0, 579.0, 576.0, 627.0, 513.0, 630.0, 582.0, 582.0, 573.0, 587.0, 582.0, 579.0, 630.0, 576.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 319.0, 294.0, 293.0, 317.0, 316.0, 317.0, 319.0, 292.0, 290.0, 309.0, 318.0, 286.0, 293.0, 291.0, 285.0, 317.0, 322.0, 293.0, 286.0, 313.0, 317.0, 292.0, 290.0, 283.0, 296.0, 283.0, 296.0, 293.0, 289.0, 286.0, 296.0, 286.0, 287.0, 311.0, 316.0, 288.0, 294.0, 315.0, 315.0, 284.0, 303.0, 293.0, 291.0, 298.0, 289.0, 320.0, 316.0, 283.0, 293.0, 322.0, 314.0, 290.0, 283.0, 277.0, 293.0, 320.0, 319.0, 268.0, 259.0, 296.0, 294.0, 288.0, 299.0, 320.0, 319.0, 286.0, 293.0, 291.0, 285.0, 288.0, 294.0, 295.0, 287.0, 288.0, 294.0, 289.0, 293.0, 283.0, 296.0, 280.0, 296.0, 285.0, 297.0, 285.0, 291.0, 289.0, 293.0, 296.0, 283.0, 285.0, 294.0, 294.0, 282.0, 316.0, 314.0, 319.0, 308.0, 314.0, 319.0, 298.0, 289.0, 316.0, 314.0, 292.0, 290.0, 290.0, 297.0, 290.0, 289.0, 316.0, 314.0, 295.0, 287.0, 293.0, 286.0, 310.0, 317.0, 293.0, 289.0, 290.0, 289.0, 314.0, 322.0, 313.0, 317.0, 316.0, 311.0, 292.0, 290.0, 291.0, 291.0, 267.0, 263.0, 62.0, 61.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 248.0, 231.0, 291.0, 288.0, 290.0, 292.0, 317.0, 316.0, 316.0, 314.0, 286.0, 293.0, 288.0, 291.0, 317.0, 319.0, 286.0, 290.0, 292.0, 290.0, 285.0, 291.0, 254.0, 273.0, 294.0, 288.0, 285.0, 291.0, 288.0, 294.0, 291.0, 288.0, 293.0, 283.0, 314.0, 313.0, 265.0, 248.0, 316.0, 314.0, 287.0, 295.0, 290.0, 292.0, 294.0, 279.0, 299.0, 288.0, 293.0, 289.0, 293.0, 286.0, 314.0, 316.0, 289.0, 287.0, 309.0, 327.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6941295586287947, "mean_inference_ms": 1.2374918339988819, "mean_action_processing_ms": 0.13322028825706636, "mean_env_wait_ms": 0.8354410346542884, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11750400, "num_agent_steps_trained": 11750400, "num_env_steps_sampled": 5875200, "num_env_steps_trained": 5875200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5875200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11750400, "timers": {"training_iteration_time_ms": 3554.168, "learn_time_ms": 1062.428, "learn_throughput": 12047.879, "synch_weights_time_ms": 12.479}, "counters": {"num_env_steps_sampled": 5875200, "num_env_steps_trained": 5875200, "num_agent_steps_sampled": 11750400, "num_agent_steps_trained": 11750400}, "done": false, "episodes_total": 14688, "training_iteration": 459, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-15", "timestamp": 1666582215, "time_this_iter_s": 3.5224757194519043, "time_total_s": 1746.0277206897736, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1746.0277206897736, "timesteps_since_restore": 0, "iterations_since_restore": 459, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.860000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 201.8, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 179.96, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.55, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.8, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.37, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.49, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.12, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.45, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.24, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.21, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.08, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.12, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.45, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.12, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.45, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015454906970262527, "policy_loss": 0.001154657220467925, "vf_loss": 7.783836364746094, "vf_explained_var": 0.6133667826652527, "kl": 0.002887023612856865, "entropy": 0.7751001119613647, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5888000, "num_env_steps_trained": 5888000, "num_agent_steps_sampled": 11776000, "num_agent_steps_trained": 11776000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 583.56, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 61.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 291.78}, "custom_metrics": {"sparse_reward_mean": 201.8, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 179.96, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.55, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.8, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.37, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.49, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.12, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.45, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.24, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.21, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.08, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.12, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.45, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.12, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.45, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 576.0, 582.0, 579.0, 579.0, 576.0, 630.0, 627.0, 633.0, 587.0, 630.0, 582.0, 587.0, 579.0, 630.0, 582.0, 579.0, 627.0, 582.0, 579.0, 636.0, 630.0, 627.0, 582.0, 582.0, 530.0, 123.0, 633.0, 576.0, 636.0, 479.0, 579.0, 582.0, 633.0, 630.0, 579.0, 579.0, 636.0, 576.0, 582.0, 576.0, 527.0, 582.0, 576.0, 582.0, 579.0, 576.0, 627.0, 513.0, 630.0, 582.0, 582.0, 573.0, 587.0, 582.0, 579.0, 630.0, 576.0, 636.0, 630.0, 582.0, 579.0, 513.0, 579.0, 587.0, 579.0, 582.0, 576.0, 579.0, 582.0, 582.0, 576.0, 576.0, 627.0, 573.0, 576.0, 582.0, 576.0, 636.0, 573.0, 633.0, 633.0, 582.0, 516.0, 587.0, 579.0, 587.0, 525.0, 573.0, 630.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [320.0, 319.0, 286.0, 293.0, 291.0, 285.0, 288.0, 294.0, 295.0, 287.0, 288.0, 294.0, 289.0, 293.0, 283.0, 296.0, 280.0, 296.0, 285.0, 297.0, 285.0, 291.0, 289.0, 293.0, 296.0, 283.0, 285.0, 294.0, 294.0, 282.0, 316.0, 314.0, 319.0, 308.0, 314.0, 319.0, 298.0, 289.0, 316.0, 314.0, 292.0, 290.0, 290.0, 297.0, 290.0, 289.0, 316.0, 314.0, 295.0, 287.0, 293.0, 286.0, 310.0, 317.0, 293.0, 289.0, 290.0, 289.0, 314.0, 322.0, 313.0, 317.0, 316.0, 311.0, 292.0, 290.0, 291.0, 291.0, 267.0, 263.0, 62.0, 61.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 248.0, 231.0, 291.0, 288.0, 290.0, 292.0, 317.0, 316.0, 316.0, 314.0, 286.0, 293.0, 288.0, 291.0, 317.0, 319.0, 286.0, 290.0, 292.0, 290.0, 285.0, 291.0, 254.0, 273.0, 294.0, 288.0, 285.0, 291.0, 288.0, 294.0, 291.0, 288.0, 293.0, 283.0, 314.0, 313.0, 265.0, 248.0, 316.0, 314.0, 287.0, 295.0, 290.0, 292.0, 294.0, 279.0, 299.0, 288.0, 293.0, 289.0, 293.0, 286.0, 314.0, 316.0, 289.0, 287.0, 309.0, 327.0, 312.0, 318.0, 291.0, 291.0, 285.0, 294.0, 247.0, 266.0, 288.0, 291.0, 293.0, 294.0, 291.0, 288.0, 292.0, 290.0, 291.0, 285.0, 290.0, 289.0, 296.0, 286.0, 289.0, 293.0, 280.0, 296.0, 281.0, 295.0, 306.0, 321.0, 291.0, 282.0, 288.0, 288.0, 292.0, 290.0, 285.0, 291.0, 317.0, 319.0, 279.0, 294.0, 314.0, 319.0, 314.0, 319.0, 290.0, 292.0, 262.0, 254.0, 295.0, 292.0, 286.0, 293.0, 291.0, 296.0, 267.0, 258.0, 290.0, 283.0, 314.0, 316.0, 289.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6940799934848328, "mean_inference_ms": 1.2373845747527157, "mean_action_processing_ms": 0.1332148833976582, "mean_env_wait_ms": 0.8353725766416104, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 583.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 61.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 291.78}, "hist_stats": {"episode_reward": [639.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 576.0, 582.0, 579.0, 579.0, 576.0, 630.0, 627.0, 633.0, 587.0, 630.0, 582.0, 587.0, 579.0, 630.0, 582.0, 579.0, 627.0, 582.0, 579.0, 636.0, 630.0, 627.0, 582.0, 582.0, 530.0, 123.0, 633.0, 576.0, 636.0, 479.0, 579.0, 582.0, 633.0, 630.0, 579.0, 579.0, 636.0, 576.0, 582.0, 576.0, 527.0, 582.0, 576.0, 582.0, 579.0, 576.0, 627.0, 513.0, 630.0, 582.0, 582.0, 573.0, 587.0, 582.0, 579.0, 630.0, 576.0, 636.0, 630.0, 582.0, 579.0, 513.0, 579.0, 587.0, 579.0, 582.0, 576.0, 579.0, 582.0, 582.0, 576.0, 576.0, 627.0, 573.0, 576.0, 582.0, 576.0, 636.0, 573.0, 633.0, 633.0, 582.0, 516.0, 587.0, 579.0, 587.0, 525.0, 573.0, 630.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [320.0, 319.0, 286.0, 293.0, 291.0, 285.0, 288.0, 294.0, 295.0, 287.0, 288.0, 294.0, 289.0, 293.0, 283.0, 296.0, 280.0, 296.0, 285.0, 297.0, 285.0, 291.0, 289.0, 293.0, 296.0, 283.0, 285.0, 294.0, 294.0, 282.0, 316.0, 314.0, 319.0, 308.0, 314.0, 319.0, 298.0, 289.0, 316.0, 314.0, 292.0, 290.0, 290.0, 297.0, 290.0, 289.0, 316.0, 314.0, 295.0, 287.0, 293.0, 286.0, 310.0, 317.0, 293.0, 289.0, 290.0, 289.0, 314.0, 322.0, 313.0, 317.0, 316.0, 311.0, 292.0, 290.0, 291.0, 291.0, 267.0, 263.0, 62.0, 61.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 248.0, 231.0, 291.0, 288.0, 290.0, 292.0, 317.0, 316.0, 316.0, 314.0, 286.0, 293.0, 288.0, 291.0, 317.0, 319.0, 286.0, 290.0, 292.0, 290.0, 285.0, 291.0, 254.0, 273.0, 294.0, 288.0, 285.0, 291.0, 288.0, 294.0, 291.0, 288.0, 293.0, 283.0, 314.0, 313.0, 265.0, 248.0, 316.0, 314.0, 287.0, 295.0, 290.0, 292.0, 294.0, 279.0, 299.0, 288.0, 293.0, 289.0, 293.0, 286.0, 314.0, 316.0, 289.0, 287.0, 309.0, 327.0, 312.0, 318.0, 291.0, 291.0, 285.0, 294.0, 247.0, 266.0, 288.0, 291.0, 293.0, 294.0, 291.0, 288.0, 292.0, 290.0, 291.0, 285.0, 290.0, 289.0, 296.0, 286.0, 289.0, 293.0, 280.0, 296.0, 281.0, 295.0, 306.0, 321.0, 291.0, 282.0, 288.0, 288.0, 292.0, 290.0, 285.0, 291.0, 317.0, 319.0, 279.0, 294.0, 314.0, 319.0, 314.0, 319.0, 290.0, 292.0, 262.0, 254.0, 295.0, 292.0, 286.0, 293.0, 291.0, 296.0, 267.0, 258.0, 290.0, 283.0, 314.0, 316.0, 289.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6940799934848328, "mean_inference_ms": 1.2373845747527157, "mean_action_processing_ms": 0.1332148833976582, "mean_env_wait_ms": 0.8353725766416104, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11776000, "num_agent_steps_trained": 11776000, "num_env_steps_sampled": 5888000, "num_env_steps_trained": 5888000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5888000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11776000, "timers": {"training_iteration_time_ms": 3526.638, "learn_time_ms": 1056.455, "learn_throughput": 12115.986, "synch_weights_time_ms": 12.692}, "counters": {"num_env_steps_sampled": 5888000, "num_env_steps_trained": 5888000, "num_agent_steps_sampled": 11776000, "num_agent_steps_trained": 11776000}, "done": false, "episodes_total": 14720, "training_iteration": 460, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-19", "timestamp": 1666582219, "time_this_iter_s": 3.6077170372009277, "time_total_s": 1749.6354377269745, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1749.6354377269745, "timesteps_since_restore": 0, "iterations_since_restore": 460, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.166666666666668, "ram_util_percent": 10.616666666666665}}
+{"custom_metrics": {"sparse_reward_mean": 201.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 179.67, "shaped_reward_min": 43, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.55, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.8, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.53, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.1, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.45, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.23, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.16, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.11, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.1, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.45, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.1, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.45, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002214742125943303, "policy_loss": -0.0026062014512717724, "vf_loss": 7.726222991943359, "vf_explained_var": 0.6182592511177063, "kl": 0.002622842788696289, "entropy": 0.7623258829116821, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5900800, "num_env_steps_trained": 5900800, "num_agent_steps_sampled": 11801600, "num_agent_steps_trained": 11801600}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 123.0, "episode_reward_mean": 582.07, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 61.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 291.035}, "custom_metrics": {"sparse_reward_mean": 201.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 179.67, "shaped_reward_min": 43, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.55, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.8, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.53, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.1, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.45, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.23, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.16, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.11, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.1, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.45, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.1, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.45, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 530.0, 123.0, 633.0, 576.0, 636.0, 479.0, 579.0, 582.0, 633.0, 630.0, 579.0, 579.0, 636.0, 576.0, 582.0, 576.0, 527.0, 582.0, 576.0, 582.0, 579.0, 576.0, 627.0, 513.0, 630.0, 582.0, 582.0, 573.0, 587.0, 582.0, 579.0, 630.0, 576.0, 636.0, 630.0, 582.0, 579.0, 513.0, 579.0, 587.0, 579.0, 582.0, 576.0, 579.0, 582.0, 582.0, 576.0, 576.0, 627.0, 573.0, 576.0, 582.0, 576.0, 636.0, 573.0, 633.0, 633.0, 582.0, 516.0, 587.0, 579.0, 587.0, 525.0, 573.0, 630.0, 576.0, 579.0, 581.0, 630.0, 576.0, 627.0, 636.0, 633.0, 636.0, 630.0, 582.0, 582.0, 630.0, 579.0, 576.0, 579.0, 582.0, 579.0, 579.0, 630.0, 576.0, 579.0, 539.0, 587.0, 579.0, 582.0, 582.0, 579.0, 627.0, 576.0, 636.0, 536.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 290.0, 291.0, 291.0, 267.0, 263.0, 62.0, 61.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 248.0, 231.0, 291.0, 288.0, 290.0, 292.0, 317.0, 316.0, 316.0, 314.0, 286.0, 293.0, 288.0, 291.0, 317.0, 319.0, 286.0, 290.0, 292.0, 290.0, 285.0, 291.0, 254.0, 273.0, 294.0, 288.0, 285.0, 291.0, 288.0, 294.0, 291.0, 288.0, 293.0, 283.0, 314.0, 313.0, 265.0, 248.0, 316.0, 314.0, 287.0, 295.0, 290.0, 292.0, 294.0, 279.0, 299.0, 288.0, 293.0, 289.0, 293.0, 286.0, 314.0, 316.0, 289.0, 287.0, 309.0, 327.0, 312.0, 318.0, 291.0, 291.0, 285.0, 294.0, 247.0, 266.0, 288.0, 291.0, 293.0, 294.0, 291.0, 288.0, 292.0, 290.0, 291.0, 285.0, 290.0, 289.0, 296.0, 286.0, 289.0, 293.0, 280.0, 296.0, 281.0, 295.0, 306.0, 321.0, 291.0, 282.0, 288.0, 288.0, 292.0, 290.0, 285.0, 291.0, 317.0, 319.0, 279.0, 294.0, 314.0, 319.0, 314.0, 319.0, 290.0, 292.0, 262.0, 254.0, 295.0, 292.0, 286.0, 293.0, 291.0, 296.0, 267.0, 258.0, 290.0, 283.0, 314.0, 316.0, 289.0, 287.0, 294.0, 285.0, 288.0, 293.0, 314.0, 316.0, 284.0, 292.0, 327.0, 300.0, 311.0, 325.0, 319.0, 314.0, 321.0, 315.0, 321.0, 309.0, 290.0, 292.0, 293.0, 289.0, 313.0, 317.0, 284.0, 295.0, 288.0, 288.0, 294.0, 285.0, 284.0, 298.0, 291.0, 288.0, 291.0, 288.0, 319.0, 311.0, 285.0, 291.0, 290.0, 289.0, 271.0, 268.0, 293.0, 294.0, 284.0, 295.0, 296.0, 286.0, 298.0, 284.0, 290.0, 289.0, 306.0, 321.0, 281.0, 295.0, 322.0, 314.0, 268.0, 268.0, 260.0, 265.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6940392492048548, "mean_inference_ms": 1.2372834252469516, "mean_action_processing_ms": 0.13321063444359982, "mean_env_wait_ms": 0.8353075411248264, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 123.0, "episode_reward_mean": 582.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 61.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 291.035}, "hist_stats": {"episode_reward": [582.0, 582.0, 530.0, 123.0, 633.0, 576.0, 636.0, 479.0, 579.0, 582.0, 633.0, 630.0, 579.0, 579.0, 636.0, 576.0, 582.0, 576.0, 527.0, 582.0, 576.0, 582.0, 579.0, 576.0, 627.0, 513.0, 630.0, 582.0, 582.0, 573.0, 587.0, 582.0, 579.0, 630.0, 576.0, 636.0, 630.0, 582.0, 579.0, 513.0, 579.0, 587.0, 579.0, 582.0, 576.0, 579.0, 582.0, 582.0, 576.0, 576.0, 627.0, 573.0, 576.0, 582.0, 576.0, 636.0, 573.0, 633.0, 633.0, 582.0, 516.0, 587.0, 579.0, 587.0, 525.0, 573.0, 630.0, 576.0, 579.0, 581.0, 630.0, 576.0, 627.0, 636.0, 633.0, 636.0, 630.0, 582.0, 582.0, 630.0, 579.0, 576.0, 579.0, 582.0, 579.0, 579.0, 630.0, 576.0, 579.0, 539.0, 587.0, 579.0, 582.0, 582.0, 579.0, 627.0, 576.0, 636.0, 536.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 290.0, 291.0, 291.0, 267.0, 263.0, 62.0, 61.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 248.0, 231.0, 291.0, 288.0, 290.0, 292.0, 317.0, 316.0, 316.0, 314.0, 286.0, 293.0, 288.0, 291.0, 317.0, 319.0, 286.0, 290.0, 292.0, 290.0, 285.0, 291.0, 254.0, 273.0, 294.0, 288.0, 285.0, 291.0, 288.0, 294.0, 291.0, 288.0, 293.0, 283.0, 314.0, 313.0, 265.0, 248.0, 316.0, 314.0, 287.0, 295.0, 290.0, 292.0, 294.0, 279.0, 299.0, 288.0, 293.0, 289.0, 293.0, 286.0, 314.0, 316.0, 289.0, 287.0, 309.0, 327.0, 312.0, 318.0, 291.0, 291.0, 285.0, 294.0, 247.0, 266.0, 288.0, 291.0, 293.0, 294.0, 291.0, 288.0, 292.0, 290.0, 291.0, 285.0, 290.0, 289.0, 296.0, 286.0, 289.0, 293.0, 280.0, 296.0, 281.0, 295.0, 306.0, 321.0, 291.0, 282.0, 288.0, 288.0, 292.0, 290.0, 285.0, 291.0, 317.0, 319.0, 279.0, 294.0, 314.0, 319.0, 314.0, 319.0, 290.0, 292.0, 262.0, 254.0, 295.0, 292.0, 286.0, 293.0, 291.0, 296.0, 267.0, 258.0, 290.0, 283.0, 314.0, 316.0, 289.0, 287.0, 294.0, 285.0, 288.0, 293.0, 314.0, 316.0, 284.0, 292.0, 327.0, 300.0, 311.0, 325.0, 319.0, 314.0, 321.0, 315.0, 321.0, 309.0, 290.0, 292.0, 293.0, 289.0, 313.0, 317.0, 284.0, 295.0, 288.0, 288.0, 294.0, 285.0, 284.0, 298.0, 291.0, 288.0, 291.0, 288.0, 319.0, 311.0, 285.0, 291.0, 290.0, 289.0, 271.0, 268.0, 293.0, 294.0, 284.0, 295.0, 296.0, 286.0, 298.0, 284.0, 290.0, 289.0, 306.0, 321.0, 281.0, 295.0, 322.0, 314.0, 268.0, 268.0, 260.0, 265.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6940392492048548, "mean_inference_ms": 1.2372834252469516, "mean_action_processing_ms": 0.13321063444359982, "mean_env_wait_ms": 0.8353075411248264, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11801600, "num_agent_steps_trained": 11801600, "num_env_steps_sampled": 5900800, "num_env_steps_trained": 5900800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5900800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11801600, "timers": {"training_iteration_time_ms": 3536.669, "learn_time_ms": 1064.03, "learn_throughput": 12029.737, "synch_weights_time_ms": 11.465}, "counters": {"num_env_steps_sampled": 5900800, "num_env_steps_trained": 5900800, "num_agent_steps_sampled": 11801600, "num_agent_steps_trained": 11801600}, "done": false, "episodes_total": 14752, "training_iteration": 461, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-23", "timestamp": 1666582223, "time_this_iter_s": 3.673919439315796, "time_total_s": 1753.3093571662903, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1753.3093571662903, "timesteps_since_restore": 0, "iterations_since_restore": 461, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.659999999999997, "ram_util_percent": 10.64}}
+{"custom_metrics": {"sparse_reward_mean": 203.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.44, "shaped_reward_min": 153, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.83, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.83, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.62, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.39, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.43, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.26, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.18, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.33, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.08, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.39, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.43, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.39, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.43, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009108797530643642, "policy_loss": -0.0012938372092321515, "vf_loss": 7.6942853927612305, "vf_explained_var": 0.6132245659828186, "kl": 0.002680128440260887, "entropy": 0.7729424238204956, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5913600, "num_env_steps_trained": 5913600, "num_agent_steps_sampled": 11827200, "num_agent_steps_trained": 11827200}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 513.0, "episode_reward_mean": 588.24, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 247.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 294.12}, "custom_metrics": {"sparse_reward_mean": 203.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.44, "shaped_reward_min": 153, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.83, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.83, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.62, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.39, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.43, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.26, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.18, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.33, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.08, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.39, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.43, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.39, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.43, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 630.0, 576.0, 636.0, 630.0, 582.0, 579.0, 513.0, 579.0, 587.0, 579.0, 582.0, 576.0, 579.0, 582.0, 582.0, 576.0, 576.0, 627.0, 573.0, 576.0, 582.0, 576.0, 636.0, 573.0, 633.0, 633.0, 582.0, 516.0, 587.0, 579.0, 587.0, 525.0, 573.0, 630.0, 576.0, 579.0, 581.0, 630.0, 576.0, 627.0, 636.0, 633.0, 636.0, 630.0, 582.0, 582.0, 630.0, 579.0, 576.0, 579.0, 582.0, 579.0, 579.0, 630.0, 576.0, 579.0, 539.0, 587.0, 579.0, 582.0, 582.0, 579.0, 627.0, 576.0, 636.0, 536.0, 525.0, 633.0, 587.0, 582.0, 576.0, 587.0, 582.0, 579.0, 579.0, 636.0, 630.0, 576.0, 573.0, 582.0, 576.0, 527.0, 590.0, 582.0, 519.0, 633.0, 582.0, 582.0, 633.0, 579.0, 522.0, 573.0, 627.0, 630.0, 576.0, 590.0, 573.0, 576.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 314.0, 316.0, 289.0, 287.0, 309.0, 327.0, 312.0, 318.0, 291.0, 291.0, 285.0, 294.0, 247.0, 266.0, 288.0, 291.0, 293.0, 294.0, 291.0, 288.0, 292.0, 290.0, 291.0, 285.0, 290.0, 289.0, 296.0, 286.0, 289.0, 293.0, 280.0, 296.0, 281.0, 295.0, 306.0, 321.0, 291.0, 282.0, 288.0, 288.0, 292.0, 290.0, 285.0, 291.0, 317.0, 319.0, 279.0, 294.0, 314.0, 319.0, 314.0, 319.0, 290.0, 292.0, 262.0, 254.0, 295.0, 292.0, 286.0, 293.0, 291.0, 296.0, 267.0, 258.0, 290.0, 283.0, 314.0, 316.0, 289.0, 287.0, 294.0, 285.0, 288.0, 293.0, 314.0, 316.0, 284.0, 292.0, 327.0, 300.0, 311.0, 325.0, 319.0, 314.0, 321.0, 315.0, 321.0, 309.0, 290.0, 292.0, 293.0, 289.0, 313.0, 317.0, 284.0, 295.0, 288.0, 288.0, 294.0, 285.0, 284.0, 298.0, 291.0, 288.0, 291.0, 288.0, 319.0, 311.0, 285.0, 291.0, 290.0, 289.0, 271.0, 268.0, 293.0, 294.0, 284.0, 295.0, 296.0, 286.0, 298.0, 284.0, 290.0, 289.0, 306.0, 321.0, 281.0, 295.0, 322.0, 314.0, 268.0, 268.0, 260.0, 265.0, 318.0, 315.0, 294.0, 293.0, 293.0, 289.0, 288.0, 288.0, 294.0, 293.0, 281.0, 301.0, 281.0, 298.0, 291.0, 288.0, 317.0, 319.0, 319.0, 311.0, 287.0, 289.0, 287.0, 286.0, 287.0, 295.0, 283.0, 293.0, 256.0, 271.0, 296.0, 294.0, 290.0, 292.0, 268.0, 251.0, 322.0, 311.0, 295.0, 287.0, 290.0, 292.0, 312.0, 321.0, 291.0, 288.0, 261.0, 261.0, 284.0, 289.0, 319.0, 308.0, 317.0, 313.0, 286.0, 290.0, 289.0, 301.0, 283.0, 290.0, 286.0, 290.0, 315.0, 321.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6939940852335686, "mean_inference_ms": 1.2371854185406679, "mean_action_processing_ms": 0.13320635803541322, "mean_env_wait_ms": 0.8352454574282127, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 513.0, "episode_reward_mean": 588.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 247.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 294.12}, "hist_stats": {"episode_reward": [579.0, 630.0, 576.0, 636.0, 630.0, 582.0, 579.0, 513.0, 579.0, 587.0, 579.0, 582.0, 576.0, 579.0, 582.0, 582.0, 576.0, 576.0, 627.0, 573.0, 576.0, 582.0, 576.0, 636.0, 573.0, 633.0, 633.0, 582.0, 516.0, 587.0, 579.0, 587.0, 525.0, 573.0, 630.0, 576.0, 579.0, 581.0, 630.0, 576.0, 627.0, 636.0, 633.0, 636.0, 630.0, 582.0, 582.0, 630.0, 579.0, 576.0, 579.0, 582.0, 579.0, 579.0, 630.0, 576.0, 579.0, 539.0, 587.0, 579.0, 582.0, 582.0, 579.0, 627.0, 576.0, 636.0, 536.0, 525.0, 633.0, 587.0, 582.0, 576.0, 587.0, 582.0, 579.0, 579.0, 636.0, 630.0, 576.0, 573.0, 582.0, 576.0, 527.0, 590.0, 582.0, 519.0, 633.0, 582.0, 582.0, 633.0, 579.0, 522.0, 573.0, 627.0, 630.0, 576.0, 590.0, 573.0, 576.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 314.0, 316.0, 289.0, 287.0, 309.0, 327.0, 312.0, 318.0, 291.0, 291.0, 285.0, 294.0, 247.0, 266.0, 288.0, 291.0, 293.0, 294.0, 291.0, 288.0, 292.0, 290.0, 291.0, 285.0, 290.0, 289.0, 296.0, 286.0, 289.0, 293.0, 280.0, 296.0, 281.0, 295.0, 306.0, 321.0, 291.0, 282.0, 288.0, 288.0, 292.0, 290.0, 285.0, 291.0, 317.0, 319.0, 279.0, 294.0, 314.0, 319.0, 314.0, 319.0, 290.0, 292.0, 262.0, 254.0, 295.0, 292.0, 286.0, 293.0, 291.0, 296.0, 267.0, 258.0, 290.0, 283.0, 314.0, 316.0, 289.0, 287.0, 294.0, 285.0, 288.0, 293.0, 314.0, 316.0, 284.0, 292.0, 327.0, 300.0, 311.0, 325.0, 319.0, 314.0, 321.0, 315.0, 321.0, 309.0, 290.0, 292.0, 293.0, 289.0, 313.0, 317.0, 284.0, 295.0, 288.0, 288.0, 294.0, 285.0, 284.0, 298.0, 291.0, 288.0, 291.0, 288.0, 319.0, 311.0, 285.0, 291.0, 290.0, 289.0, 271.0, 268.0, 293.0, 294.0, 284.0, 295.0, 296.0, 286.0, 298.0, 284.0, 290.0, 289.0, 306.0, 321.0, 281.0, 295.0, 322.0, 314.0, 268.0, 268.0, 260.0, 265.0, 318.0, 315.0, 294.0, 293.0, 293.0, 289.0, 288.0, 288.0, 294.0, 293.0, 281.0, 301.0, 281.0, 298.0, 291.0, 288.0, 317.0, 319.0, 319.0, 311.0, 287.0, 289.0, 287.0, 286.0, 287.0, 295.0, 283.0, 293.0, 256.0, 271.0, 296.0, 294.0, 290.0, 292.0, 268.0, 251.0, 322.0, 311.0, 295.0, 287.0, 290.0, 292.0, 312.0, 321.0, 291.0, 288.0, 261.0, 261.0, 284.0, 289.0, 319.0, 308.0, 317.0, 313.0, 286.0, 290.0, 289.0, 301.0, 283.0, 290.0, 286.0, 290.0, 315.0, 321.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6939940852335686, "mean_inference_ms": 1.2371854185406679, "mean_action_processing_ms": 0.13320635803541322, "mean_env_wait_ms": 0.8352454574282127, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11827200, "num_agent_steps_trained": 11827200, "num_env_steps_sampled": 5913600, "num_env_steps_trained": 5913600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5913600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11827200, "timers": {"training_iteration_time_ms": 3533.966, "learn_time_ms": 1067.253, "learn_throughput": 11993.409, "synch_weights_time_ms": 12.171}, "counters": {"num_env_steps_sampled": 5913600, "num_env_steps_trained": 5913600, "num_agent_steps_sampled": 11827200, "num_agent_steps_trained": 11827200}, "done": false, "episodes_total": 14784, "training_iteration": 462, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-27", "timestamp": 1666582227, "time_this_iter_s": 3.553008794784546, "time_total_s": 1756.8623659610748, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1756.8623659610748, "timesteps_since_restore": 0, "iterations_since_restore": 462, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.720000000000002, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 203.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.53, "shaped_reward_min": 159, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.9, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.64, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.7, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.46, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.51, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.3, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.21, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.12, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.43, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.27, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.22, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.51, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.3, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.51, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.3, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0042098090052604675, "policy_loss": 0.003829691093415022, "vf_loss": 7.629401206970215, "vf_explained_var": 0.6158540844917297, "kl": 0.0028561637736856937, "entropy": 0.765643835067749, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5926400, "num_env_steps_trained": 5926400, "num_agent_steps_sampled": 11852800, "num_agent_steps_trained": 11852800}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 519.0, "episode_reward_mean": 587.93, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 293.965}, "custom_metrics": {"sparse_reward_mean": 203.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.53, "shaped_reward_min": 159, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.9, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.64, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.7, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.46, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.51, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.3, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.21, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.12, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.43, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.27, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.22, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.51, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.3, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.51, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.3, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 573.0, 630.0, 576.0, 579.0, 581.0, 630.0, 576.0, 627.0, 636.0, 633.0, 636.0, 630.0, 582.0, 582.0, 630.0, 579.0, 576.0, 579.0, 582.0, 579.0, 579.0, 630.0, 576.0, 579.0, 539.0, 587.0, 579.0, 582.0, 582.0, 579.0, 627.0, 576.0, 636.0, 536.0, 525.0, 633.0, 587.0, 582.0, 576.0, 587.0, 582.0, 579.0, 579.0, 636.0, 630.0, 576.0, 573.0, 582.0, 576.0, 527.0, 590.0, 582.0, 519.0, 633.0, 582.0, 582.0, 633.0, 579.0, 522.0, 573.0, 627.0, 630.0, 576.0, 590.0, 573.0, 576.0, 636.0, 579.0, 627.0, 582.0, 630.0, 576.0, 633.0, 525.0, 627.0, 627.0, 582.0, 522.0, 576.0, 633.0, 584.0, 579.0, 582.0, 630.0, 582.0, 533.0, 582.0, 636.0, 630.0, 579.0, 582.0, 587.0, 579.0, 530.0, 582.0, 576.0, 582.0, 579.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 258.0, 290.0, 283.0, 314.0, 316.0, 289.0, 287.0, 294.0, 285.0, 288.0, 293.0, 314.0, 316.0, 284.0, 292.0, 327.0, 300.0, 311.0, 325.0, 319.0, 314.0, 321.0, 315.0, 321.0, 309.0, 290.0, 292.0, 293.0, 289.0, 313.0, 317.0, 284.0, 295.0, 288.0, 288.0, 294.0, 285.0, 284.0, 298.0, 291.0, 288.0, 291.0, 288.0, 319.0, 311.0, 285.0, 291.0, 290.0, 289.0, 271.0, 268.0, 293.0, 294.0, 284.0, 295.0, 296.0, 286.0, 298.0, 284.0, 290.0, 289.0, 306.0, 321.0, 281.0, 295.0, 322.0, 314.0, 268.0, 268.0, 260.0, 265.0, 318.0, 315.0, 294.0, 293.0, 293.0, 289.0, 288.0, 288.0, 294.0, 293.0, 281.0, 301.0, 281.0, 298.0, 291.0, 288.0, 317.0, 319.0, 319.0, 311.0, 287.0, 289.0, 287.0, 286.0, 287.0, 295.0, 283.0, 293.0, 256.0, 271.0, 296.0, 294.0, 290.0, 292.0, 268.0, 251.0, 322.0, 311.0, 295.0, 287.0, 290.0, 292.0, 312.0, 321.0, 291.0, 288.0, 261.0, 261.0, 284.0, 289.0, 319.0, 308.0, 317.0, 313.0, 286.0, 290.0, 289.0, 301.0, 283.0, 290.0, 286.0, 290.0, 315.0, 321.0, 288.0, 291.0, 311.0, 316.0, 293.0, 289.0, 312.0, 318.0, 291.0, 285.0, 317.0, 316.0, 260.0, 265.0, 309.0, 318.0, 314.0, 313.0, 292.0, 290.0, 251.0, 271.0, 287.0, 289.0, 315.0, 318.0, 287.0, 297.0, 289.0, 290.0, 286.0, 296.0, 309.0, 321.0, 281.0, 301.0, 274.0, 259.0, 299.0, 283.0, 314.0, 322.0, 313.0, 317.0, 283.0, 296.0, 286.0, 296.0, 291.0, 296.0, 293.0, 286.0, 260.0, 270.0, 289.0, 293.0, 283.0, 293.0, 288.0, 294.0, 285.0, 294.0, 259.0, 260.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.693949131407544, "mean_inference_ms": 1.2370817107814052, "mean_action_processing_ms": 0.13320149620964622, "mean_env_wait_ms": 0.8351810086093567, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 519.0, "episode_reward_mean": 587.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 293.965}, "hist_stats": {"episode_reward": [525.0, 573.0, 630.0, 576.0, 579.0, 581.0, 630.0, 576.0, 627.0, 636.0, 633.0, 636.0, 630.0, 582.0, 582.0, 630.0, 579.0, 576.0, 579.0, 582.0, 579.0, 579.0, 630.0, 576.0, 579.0, 539.0, 587.0, 579.0, 582.0, 582.0, 579.0, 627.0, 576.0, 636.0, 536.0, 525.0, 633.0, 587.0, 582.0, 576.0, 587.0, 582.0, 579.0, 579.0, 636.0, 630.0, 576.0, 573.0, 582.0, 576.0, 527.0, 590.0, 582.0, 519.0, 633.0, 582.0, 582.0, 633.0, 579.0, 522.0, 573.0, 627.0, 630.0, 576.0, 590.0, 573.0, 576.0, 636.0, 579.0, 627.0, 582.0, 630.0, 576.0, 633.0, 525.0, 627.0, 627.0, 582.0, 522.0, 576.0, 633.0, 584.0, 579.0, 582.0, 630.0, 582.0, 533.0, 582.0, 636.0, 630.0, 579.0, 582.0, 587.0, 579.0, 530.0, 582.0, 576.0, 582.0, 579.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 258.0, 290.0, 283.0, 314.0, 316.0, 289.0, 287.0, 294.0, 285.0, 288.0, 293.0, 314.0, 316.0, 284.0, 292.0, 327.0, 300.0, 311.0, 325.0, 319.0, 314.0, 321.0, 315.0, 321.0, 309.0, 290.0, 292.0, 293.0, 289.0, 313.0, 317.0, 284.0, 295.0, 288.0, 288.0, 294.0, 285.0, 284.0, 298.0, 291.0, 288.0, 291.0, 288.0, 319.0, 311.0, 285.0, 291.0, 290.0, 289.0, 271.0, 268.0, 293.0, 294.0, 284.0, 295.0, 296.0, 286.0, 298.0, 284.0, 290.0, 289.0, 306.0, 321.0, 281.0, 295.0, 322.0, 314.0, 268.0, 268.0, 260.0, 265.0, 318.0, 315.0, 294.0, 293.0, 293.0, 289.0, 288.0, 288.0, 294.0, 293.0, 281.0, 301.0, 281.0, 298.0, 291.0, 288.0, 317.0, 319.0, 319.0, 311.0, 287.0, 289.0, 287.0, 286.0, 287.0, 295.0, 283.0, 293.0, 256.0, 271.0, 296.0, 294.0, 290.0, 292.0, 268.0, 251.0, 322.0, 311.0, 295.0, 287.0, 290.0, 292.0, 312.0, 321.0, 291.0, 288.0, 261.0, 261.0, 284.0, 289.0, 319.0, 308.0, 317.0, 313.0, 286.0, 290.0, 289.0, 301.0, 283.0, 290.0, 286.0, 290.0, 315.0, 321.0, 288.0, 291.0, 311.0, 316.0, 293.0, 289.0, 312.0, 318.0, 291.0, 285.0, 317.0, 316.0, 260.0, 265.0, 309.0, 318.0, 314.0, 313.0, 292.0, 290.0, 251.0, 271.0, 287.0, 289.0, 315.0, 318.0, 287.0, 297.0, 289.0, 290.0, 286.0, 296.0, 309.0, 321.0, 281.0, 301.0, 274.0, 259.0, 299.0, 283.0, 314.0, 322.0, 313.0, 317.0, 283.0, 296.0, 286.0, 296.0, 291.0, 296.0, 293.0, 286.0, 260.0, 270.0, 289.0, 293.0, 283.0, 293.0, 288.0, 294.0, 285.0, 294.0, 259.0, 260.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.693949131407544, "mean_inference_ms": 1.2370817107814052, "mean_action_processing_ms": 0.13320149620964622, "mean_env_wait_ms": 0.8351810086093567, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11852800, "num_agent_steps_trained": 11852800, "num_env_steps_sampled": 5926400, "num_env_steps_trained": 5926400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5926400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11852800, "timers": {"training_iteration_time_ms": 3539.299, "learn_time_ms": 1072.003, "learn_throughput": 11940.269, "synch_weights_time_ms": 11.492}, "counters": {"num_env_steps_sampled": 5926400, "num_env_steps_trained": 5926400, "num_agent_steps_sampled": 11852800, "num_agent_steps_trained": 11852800}, "done": false, "episodes_total": 14816, "training_iteration": 463, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-31", "timestamp": 1666582231, "time_this_iter_s": 3.610844373703003, "time_total_s": 1760.4732103347778, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1760.4732103347778, "timesteps_since_restore": 0, "iterations_since_restore": 463, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.466666666666665, "ram_util_percent": 10.616666666666665}}
+{"custom_metrics": {"sparse_reward_mean": 202.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.34, "shaped_reward_min": 159, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.15, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.27, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 17.01, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.13, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.87, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.91, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.03, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.61, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.82, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.45, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.76, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.4, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.87, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.91, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.87, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.91, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0031582540832459927, "policy_loss": -0.0035429480485618114, "vf_loss": 7.66179084777832, "vf_explained_var": 0.6139554381370544, "kl": 0.002852272940799594, "entropy": 0.7629702091217041, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5939200, "num_env_steps_trained": 5939200, "num_agent_steps_sampled": 11878400, "num_agent_steps_trained": 11878400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 519.0, "episode_reward_mean": 586.94, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 293.47}, "custom_metrics": {"sparse_reward_mean": 202.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.34, "shaped_reward_min": 159, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.15, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.27, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 17.01, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.13, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.87, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.91, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.03, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.61, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.82, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.45, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.76, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.4, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.87, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.91, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.87, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.91, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 636.0, 536.0, 525.0, 633.0, 587.0, 582.0, 576.0, 587.0, 582.0, 579.0, 579.0, 636.0, 630.0, 576.0, 573.0, 582.0, 576.0, 527.0, 590.0, 582.0, 519.0, 633.0, 582.0, 582.0, 633.0, 579.0, 522.0, 573.0, 627.0, 630.0, 576.0, 590.0, 573.0, 576.0, 636.0, 579.0, 627.0, 582.0, 630.0, 576.0, 633.0, 525.0, 627.0, 627.0, 582.0, 522.0, 576.0, 633.0, 584.0, 579.0, 582.0, 630.0, 582.0, 533.0, 582.0, 636.0, 630.0, 579.0, 582.0, 587.0, 579.0, 530.0, 582.0, 576.0, 582.0, 579.0, 519.0, 582.0, 576.0, 525.0, 633.0, 579.0, 576.0, 633.0, 630.0, 582.0, 630.0, 579.0, 579.0, 573.0, 582.0, 582.0, 587.0, 579.0, 627.0, 630.0, 633.0, 579.0, 582.0, 582.0, 525.0, 579.0, 573.0, 576.0, 582.0, 587.0, 630.0, 582.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 295.0, 322.0, 314.0, 268.0, 268.0, 260.0, 265.0, 318.0, 315.0, 294.0, 293.0, 293.0, 289.0, 288.0, 288.0, 294.0, 293.0, 281.0, 301.0, 281.0, 298.0, 291.0, 288.0, 317.0, 319.0, 319.0, 311.0, 287.0, 289.0, 287.0, 286.0, 287.0, 295.0, 283.0, 293.0, 256.0, 271.0, 296.0, 294.0, 290.0, 292.0, 268.0, 251.0, 322.0, 311.0, 295.0, 287.0, 290.0, 292.0, 312.0, 321.0, 291.0, 288.0, 261.0, 261.0, 284.0, 289.0, 319.0, 308.0, 317.0, 313.0, 286.0, 290.0, 289.0, 301.0, 283.0, 290.0, 286.0, 290.0, 315.0, 321.0, 288.0, 291.0, 311.0, 316.0, 293.0, 289.0, 312.0, 318.0, 291.0, 285.0, 317.0, 316.0, 260.0, 265.0, 309.0, 318.0, 314.0, 313.0, 292.0, 290.0, 251.0, 271.0, 287.0, 289.0, 315.0, 318.0, 287.0, 297.0, 289.0, 290.0, 286.0, 296.0, 309.0, 321.0, 281.0, 301.0, 274.0, 259.0, 299.0, 283.0, 314.0, 322.0, 313.0, 317.0, 283.0, 296.0, 286.0, 296.0, 291.0, 296.0, 293.0, 286.0, 260.0, 270.0, 289.0, 293.0, 283.0, 293.0, 288.0, 294.0, 285.0, 294.0, 259.0, 260.0, 289.0, 293.0, 289.0, 287.0, 260.0, 265.0, 308.0, 325.0, 291.0, 288.0, 282.0, 294.0, 317.0, 316.0, 317.0, 313.0, 289.0, 293.0, 311.0, 319.0, 288.0, 291.0, 288.0, 291.0, 287.0, 286.0, 290.0, 292.0, 289.0, 293.0, 297.0, 290.0, 297.0, 282.0, 322.0, 305.0, 314.0, 316.0, 314.0, 319.0, 290.0, 289.0, 289.0, 293.0, 291.0, 291.0, 262.0, 263.0, 289.0, 290.0, 288.0, 285.0, 293.0, 283.0, 290.0, 292.0, 287.0, 300.0, 319.0, 311.0, 288.0, 294.0, 288.0, 299.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6938957952732318, "mean_inference_ms": 1.237007163913863, "mean_action_processing_ms": 0.13319639842425457, "mean_env_wait_ms": 0.8351925904711837, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 519.0, "episode_reward_mean": 586.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 293.47}, "hist_stats": {"episode_reward": [576.0, 636.0, 536.0, 525.0, 633.0, 587.0, 582.0, 576.0, 587.0, 582.0, 579.0, 579.0, 636.0, 630.0, 576.0, 573.0, 582.0, 576.0, 527.0, 590.0, 582.0, 519.0, 633.0, 582.0, 582.0, 633.0, 579.0, 522.0, 573.0, 627.0, 630.0, 576.0, 590.0, 573.0, 576.0, 636.0, 579.0, 627.0, 582.0, 630.0, 576.0, 633.0, 525.0, 627.0, 627.0, 582.0, 522.0, 576.0, 633.0, 584.0, 579.0, 582.0, 630.0, 582.0, 533.0, 582.0, 636.0, 630.0, 579.0, 582.0, 587.0, 579.0, 530.0, 582.0, 576.0, 582.0, 579.0, 519.0, 582.0, 576.0, 525.0, 633.0, 579.0, 576.0, 633.0, 630.0, 582.0, 630.0, 579.0, 579.0, 573.0, 582.0, 582.0, 587.0, 579.0, 627.0, 630.0, 633.0, 579.0, 582.0, 582.0, 525.0, 579.0, 573.0, 576.0, 582.0, 587.0, 630.0, 582.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 295.0, 322.0, 314.0, 268.0, 268.0, 260.0, 265.0, 318.0, 315.0, 294.0, 293.0, 293.0, 289.0, 288.0, 288.0, 294.0, 293.0, 281.0, 301.0, 281.0, 298.0, 291.0, 288.0, 317.0, 319.0, 319.0, 311.0, 287.0, 289.0, 287.0, 286.0, 287.0, 295.0, 283.0, 293.0, 256.0, 271.0, 296.0, 294.0, 290.0, 292.0, 268.0, 251.0, 322.0, 311.0, 295.0, 287.0, 290.0, 292.0, 312.0, 321.0, 291.0, 288.0, 261.0, 261.0, 284.0, 289.0, 319.0, 308.0, 317.0, 313.0, 286.0, 290.0, 289.0, 301.0, 283.0, 290.0, 286.0, 290.0, 315.0, 321.0, 288.0, 291.0, 311.0, 316.0, 293.0, 289.0, 312.0, 318.0, 291.0, 285.0, 317.0, 316.0, 260.0, 265.0, 309.0, 318.0, 314.0, 313.0, 292.0, 290.0, 251.0, 271.0, 287.0, 289.0, 315.0, 318.0, 287.0, 297.0, 289.0, 290.0, 286.0, 296.0, 309.0, 321.0, 281.0, 301.0, 274.0, 259.0, 299.0, 283.0, 314.0, 322.0, 313.0, 317.0, 283.0, 296.0, 286.0, 296.0, 291.0, 296.0, 293.0, 286.0, 260.0, 270.0, 289.0, 293.0, 283.0, 293.0, 288.0, 294.0, 285.0, 294.0, 259.0, 260.0, 289.0, 293.0, 289.0, 287.0, 260.0, 265.0, 308.0, 325.0, 291.0, 288.0, 282.0, 294.0, 317.0, 316.0, 317.0, 313.0, 289.0, 293.0, 311.0, 319.0, 288.0, 291.0, 288.0, 291.0, 287.0, 286.0, 290.0, 292.0, 289.0, 293.0, 297.0, 290.0, 297.0, 282.0, 322.0, 305.0, 314.0, 316.0, 314.0, 319.0, 290.0, 289.0, 289.0, 293.0, 291.0, 291.0, 262.0, 263.0, 289.0, 290.0, 288.0, 285.0, 293.0, 283.0, 290.0, 292.0, 287.0, 300.0, 319.0, 311.0, 288.0, 294.0, 288.0, 299.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6938957952732318, "mean_inference_ms": 1.237007163913863, "mean_action_processing_ms": 0.13319639842425457, "mean_env_wait_ms": 0.8351925904711837, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11878400, "num_agent_steps_trained": 11878400, "num_env_steps_sampled": 5939200, "num_env_steps_trained": 5939200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5939200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11878400, "timers": {"training_iteration_time_ms": 3545.968, "learn_time_ms": 1064.613, "learn_throughput": 12023.149, "synch_weights_time_ms": 12.154}, "counters": {"num_env_steps_sampled": 5939200, "num_env_steps_trained": 5939200, "num_agent_steps_sampled": 11878400, "num_agent_steps_trained": 11878400}, "done": false, "episodes_total": 14848, "training_iteration": 464, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-34", "timestamp": 1666582234, "time_this_iter_s": 3.704625129699707, "time_total_s": 1764.1778354644775, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1764.1778354644775, "timesteps_since_restore": 0, "iterations_since_restore": 464, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.779999999999998, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 202.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 180.39, "shaped_reward_min": 159, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.54, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.7, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 17.39, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.54, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 17.24, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.38, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 4.88, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.81, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.78, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.68, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.56, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.5, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 17.24, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.38, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 17.24, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.38, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005369747523218393, "policy_loss": 0.00015844125300645828, "vf_loss": 7.604679107666016, "vf_explained_var": 0.6035102605819702, "kl": 0.003216771874576807, "entropy": 0.7638680338859558, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5952000, "num_env_steps_trained": 5952000, "num_agent_steps_sampled": 11904000, "num_agent_steps_trained": 11904000}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 519.0, "episode_reward_mean": 584.79, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 292.395}, "custom_metrics": {"sparse_reward_mean": 202.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 180.39, "shaped_reward_min": 159, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.54, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.7, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 17.39, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.54, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 17.24, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.38, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 4.88, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.81, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.78, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.68, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.56, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.5, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 17.24, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.38, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 17.24, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.38, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [590.0, 573.0, 576.0, 636.0, 579.0, 627.0, 582.0, 630.0, 576.0, 633.0, 525.0, 627.0, 627.0, 582.0, 522.0, 576.0, 633.0, 584.0, 579.0, 582.0, 630.0, 582.0, 533.0, 582.0, 636.0, 630.0, 579.0, 582.0, 587.0, 579.0, 530.0, 582.0, 576.0, 582.0, 579.0, 519.0, 582.0, 576.0, 525.0, 633.0, 579.0, 576.0, 633.0, 630.0, 582.0, 630.0, 579.0, 579.0, 573.0, 582.0, 582.0, 587.0, 579.0, 627.0, 630.0, 633.0, 579.0, 582.0, 582.0, 525.0, 579.0, 573.0, 576.0, 582.0, 587.0, 630.0, 582.0, 587.0, 636.0, 530.0, 536.0, 573.0, 576.0, 582.0, 570.0, 570.0, 630.0, 587.0, 573.0, 576.0, 573.0, 573.0, 579.0, 579.0, 579.0, 530.0, 579.0, 522.0, 630.0, 630.0, 576.0, 579.0, 576.0, 573.0, 522.0, 579.0, 633.0, 582.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 301.0, 283.0, 290.0, 286.0, 290.0, 315.0, 321.0, 288.0, 291.0, 311.0, 316.0, 293.0, 289.0, 312.0, 318.0, 291.0, 285.0, 317.0, 316.0, 260.0, 265.0, 309.0, 318.0, 314.0, 313.0, 292.0, 290.0, 251.0, 271.0, 287.0, 289.0, 315.0, 318.0, 287.0, 297.0, 289.0, 290.0, 286.0, 296.0, 309.0, 321.0, 281.0, 301.0, 274.0, 259.0, 299.0, 283.0, 314.0, 322.0, 313.0, 317.0, 283.0, 296.0, 286.0, 296.0, 291.0, 296.0, 293.0, 286.0, 260.0, 270.0, 289.0, 293.0, 283.0, 293.0, 288.0, 294.0, 285.0, 294.0, 259.0, 260.0, 289.0, 293.0, 289.0, 287.0, 260.0, 265.0, 308.0, 325.0, 291.0, 288.0, 282.0, 294.0, 317.0, 316.0, 317.0, 313.0, 289.0, 293.0, 311.0, 319.0, 288.0, 291.0, 288.0, 291.0, 287.0, 286.0, 290.0, 292.0, 289.0, 293.0, 297.0, 290.0, 297.0, 282.0, 322.0, 305.0, 314.0, 316.0, 314.0, 319.0, 290.0, 289.0, 289.0, 293.0, 291.0, 291.0, 262.0, 263.0, 289.0, 290.0, 288.0, 285.0, 293.0, 283.0, 290.0, 292.0, 287.0, 300.0, 319.0, 311.0, 288.0, 294.0, 288.0, 299.0, 320.0, 316.0, 263.0, 267.0, 269.0, 267.0, 283.0, 290.0, 295.0, 281.0, 292.0, 290.0, 280.0, 290.0, 286.0, 284.0, 317.0, 313.0, 299.0, 288.0, 284.0, 289.0, 286.0, 290.0, 296.0, 277.0, 283.0, 290.0, 296.0, 283.0, 289.0, 290.0, 296.0, 283.0, 258.0, 272.0, 287.0, 292.0, 266.0, 256.0, 313.0, 317.0, 310.0, 320.0, 287.0, 289.0, 291.0, 288.0, 289.0, 287.0, 285.0, 288.0, 258.0, 264.0, 296.0, 283.0, 313.0, 320.0, 294.0, 288.0, 293.0, 283.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6938517527377859, "mean_inference_ms": 1.236971528906978, "mean_action_processing_ms": 0.13319191119677623, "mean_env_wait_ms": 0.8352425477668859, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 519.0, "episode_reward_mean": 584.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 292.395}, "hist_stats": {"episode_reward": [590.0, 573.0, 576.0, 636.0, 579.0, 627.0, 582.0, 630.0, 576.0, 633.0, 525.0, 627.0, 627.0, 582.0, 522.0, 576.0, 633.0, 584.0, 579.0, 582.0, 630.0, 582.0, 533.0, 582.0, 636.0, 630.0, 579.0, 582.0, 587.0, 579.0, 530.0, 582.0, 576.0, 582.0, 579.0, 519.0, 582.0, 576.0, 525.0, 633.0, 579.0, 576.0, 633.0, 630.0, 582.0, 630.0, 579.0, 579.0, 573.0, 582.0, 582.0, 587.0, 579.0, 627.0, 630.0, 633.0, 579.0, 582.0, 582.0, 525.0, 579.0, 573.0, 576.0, 582.0, 587.0, 630.0, 582.0, 587.0, 636.0, 530.0, 536.0, 573.0, 576.0, 582.0, 570.0, 570.0, 630.0, 587.0, 573.0, 576.0, 573.0, 573.0, 579.0, 579.0, 579.0, 530.0, 579.0, 522.0, 630.0, 630.0, 576.0, 579.0, 576.0, 573.0, 522.0, 579.0, 633.0, 582.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 301.0, 283.0, 290.0, 286.0, 290.0, 315.0, 321.0, 288.0, 291.0, 311.0, 316.0, 293.0, 289.0, 312.0, 318.0, 291.0, 285.0, 317.0, 316.0, 260.0, 265.0, 309.0, 318.0, 314.0, 313.0, 292.0, 290.0, 251.0, 271.0, 287.0, 289.0, 315.0, 318.0, 287.0, 297.0, 289.0, 290.0, 286.0, 296.0, 309.0, 321.0, 281.0, 301.0, 274.0, 259.0, 299.0, 283.0, 314.0, 322.0, 313.0, 317.0, 283.0, 296.0, 286.0, 296.0, 291.0, 296.0, 293.0, 286.0, 260.0, 270.0, 289.0, 293.0, 283.0, 293.0, 288.0, 294.0, 285.0, 294.0, 259.0, 260.0, 289.0, 293.0, 289.0, 287.0, 260.0, 265.0, 308.0, 325.0, 291.0, 288.0, 282.0, 294.0, 317.0, 316.0, 317.0, 313.0, 289.0, 293.0, 311.0, 319.0, 288.0, 291.0, 288.0, 291.0, 287.0, 286.0, 290.0, 292.0, 289.0, 293.0, 297.0, 290.0, 297.0, 282.0, 322.0, 305.0, 314.0, 316.0, 314.0, 319.0, 290.0, 289.0, 289.0, 293.0, 291.0, 291.0, 262.0, 263.0, 289.0, 290.0, 288.0, 285.0, 293.0, 283.0, 290.0, 292.0, 287.0, 300.0, 319.0, 311.0, 288.0, 294.0, 288.0, 299.0, 320.0, 316.0, 263.0, 267.0, 269.0, 267.0, 283.0, 290.0, 295.0, 281.0, 292.0, 290.0, 280.0, 290.0, 286.0, 284.0, 317.0, 313.0, 299.0, 288.0, 284.0, 289.0, 286.0, 290.0, 296.0, 277.0, 283.0, 290.0, 296.0, 283.0, 289.0, 290.0, 296.0, 283.0, 258.0, 272.0, 287.0, 292.0, 266.0, 256.0, 313.0, 317.0, 310.0, 320.0, 287.0, 289.0, 291.0, 288.0, 289.0, 287.0, 285.0, 288.0, 258.0, 264.0, 296.0, 283.0, 313.0, 320.0, 294.0, 288.0, 293.0, 283.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6938517527377859, "mean_inference_ms": 1.236971528906978, "mean_action_processing_ms": 0.13319191119677623, "mean_env_wait_ms": 0.8352425477668859, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11904000, "num_agent_steps_trained": 11904000, "num_env_steps_sampled": 5952000, "num_env_steps_trained": 5952000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5952000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11904000, "timers": {"training_iteration_time_ms": 3551.553, "learn_time_ms": 1050.795, "learn_throughput": 12181.259, "synch_weights_time_ms": 12.202}, "counters": {"num_env_steps_sampled": 5952000, "num_env_steps_trained": 5952000, "num_agent_steps_sampled": 11904000, "num_agent_steps_trained": 11904000}, "done": false, "episodes_total": 14880, "training_iteration": 465, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-38", "timestamp": 1666582238, "time_this_iter_s": 3.6914851665496826, "time_total_s": 1767.8693206310272, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1767.8693206310272, "timesteps_since_restore": 0, "iterations_since_restore": 465, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.166666666666668, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 202.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.98, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.56, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.63, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 17.41, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.48, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 17.26, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.29, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.91, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.69, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.83, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.58, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.69, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.53, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.64, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.48, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 17.26, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.29, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 17.26, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.29, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0025697732344269753, "policy_loss": -0.0029487167485058308, "vf_loss": 7.580999851226807, "vf_explained_var": 0.590236485004425, "kl": 0.0032103369012475014, "entropy": 0.7583142518997192, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5964800, "num_env_steps_trained": 5964800, "num_agent_steps_sampled": 11929600, "num_agent_steps_trained": 11929600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 584.78, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 252.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 292.39}, "custom_metrics": {"sparse_reward_mean": 202.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.98, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.56, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.63, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 17.41, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.48, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 17.26, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.29, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.91, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.69, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.83, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.58, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.69, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.53, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.64, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.48, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 17.26, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.29, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 17.26, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.29, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 582.0, 579.0, 519.0, 582.0, 576.0, 525.0, 633.0, 579.0, 576.0, 633.0, 630.0, 582.0, 630.0, 579.0, 579.0, 573.0, 582.0, 582.0, 587.0, 579.0, 627.0, 630.0, 633.0, 579.0, 582.0, 582.0, 525.0, 579.0, 573.0, 576.0, 582.0, 587.0, 630.0, 582.0, 587.0, 636.0, 530.0, 536.0, 573.0, 576.0, 582.0, 570.0, 570.0, 630.0, 587.0, 573.0, 576.0, 573.0, 573.0, 579.0, 579.0, 579.0, 530.0, 579.0, 522.0, 630.0, 630.0, 576.0, 579.0, 576.0, 573.0, 522.0, 579.0, 633.0, 582.0, 576.0, 582.0, 630.0, 627.0, 579.0, 630.0, 527.0, 582.0, 630.0, 579.0, 587.0, 573.0, 570.0, 582.0, 584.0, 627.0, 576.0, 633.0, 579.0, 630.0, 567.0, 579.0, 582.0, 579.0, 582.0, 633.0, 579.0, 639.0, 570.0, 576.0, 582.0, 579.0, 525.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 293.0, 288.0, 294.0, 285.0, 294.0, 259.0, 260.0, 289.0, 293.0, 289.0, 287.0, 260.0, 265.0, 308.0, 325.0, 291.0, 288.0, 282.0, 294.0, 317.0, 316.0, 317.0, 313.0, 289.0, 293.0, 311.0, 319.0, 288.0, 291.0, 288.0, 291.0, 287.0, 286.0, 290.0, 292.0, 289.0, 293.0, 297.0, 290.0, 297.0, 282.0, 322.0, 305.0, 314.0, 316.0, 314.0, 319.0, 290.0, 289.0, 289.0, 293.0, 291.0, 291.0, 262.0, 263.0, 289.0, 290.0, 288.0, 285.0, 293.0, 283.0, 290.0, 292.0, 287.0, 300.0, 319.0, 311.0, 288.0, 294.0, 288.0, 299.0, 320.0, 316.0, 263.0, 267.0, 269.0, 267.0, 283.0, 290.0, 295.0, 281.0, 292.0, 290.0, 280.0, 290.0, 286.0, 284.0, 317.0, 313.0, 299.0, 288.0, 284.0, 289.0, 286.0, 290.0, 296.0, 277.0, 283.0, 290.0, 296.0, 283.0, 289.0, 290.0, 296.0, 283.0, 258.0, 272.0, 287.0, 292.0, 266.0, 256.0, 313.0, 317.0, 310.0, 320.0, 287.0, 289.0, 291.0, 288.0, 289.0, 287.0, 285.0, 288.0, 258.0, 264.0, 296.0, 283.0, 313.0, 320.0, 294.0, 288.0, 293.0, 283.0, 292.0, 290.0, 314.0, 316.0, 308.0, 319.0, 290.0, 289.0, 316.0, 314.0, 252.0, 275.0, 288.0, 294.0, 313.0, 317.0, 283.0, 296.0, 288.0, 299.0, 289.0, 284.0, 285.0, 285.0, 292.0, 290.0, 296.0, 288.0, 314.0, 313.0, 286.0, 290.0, 320.0, 313.0, 288.0, 291.0, 319.0, 311.0, 282.0, 285.0, 294.0, 285.0, 291.0, 291.0, 289.0, 290.0, 296.0, 286.0, 318.0, 315.0, 292.0, 287.0, 319.0, 320.0, 278.0, 292.0, 284.0, 292.0, 293.0, 289.0, 286.0, 293.0, 269.0, 256.0, 286.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.693806990818135, "mean_inference_ms": 1.2369434972732614, "mean_action_processing_ms": 0.13318722167200414, "mean_env_wait_ms": 0.8352965205333814, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 584.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 252.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 292.39}, "hist_stats": {"episode_reward": [576.0, 582.0, 579.0, 519.0, 582.0, 576.0, 525.0, 633.0, 579.0, 576.0, 633.0, 630.0, 582.0, 630.0, 579.0, 579.0, 573.0, 582.0, 582.0, 587.0, 579.0, 627.0, 630.0, 633.0, 579.0, 582.0, 582.0, 525.0, 579.0, 573.0, 576.0, 582.0, 587.0, 630.0, 582.0, 587.0, 636.0, 530.0, 536.0, 573.0, 576.0, 582.0, 570.0, 570.0, 630.0, 587.0, 573.0, 576.0, 573.0, 573.0, 579.0, 579.0, 579.0, 530.0, 579.0, 522.0, 630.0, 630.0, 576.0, 579.0, 576.0, 573.0, 522.0, 579.0, 633.0, 582.0, 576.0, 582.0, 630.0, 627.0, 579.0, 630.0, 527.0, 582.0, 630.0, 579.0, 587.0, 573.0, 570.0, 582.0, 584.0, 627.0, 576.0, 633.0, 579.0, 630.0, 567.0, 579.0, 582.0, 579.0, 582.0, 633.0, 579.0, 639.0, 570.0, 576.0, 582.0, 579.0, 525.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 293.0, 288.0, 294.0, 285.0, 294.0, 259.0, 260.0, 289.0, 293.0, 289.0, 287.0, 260.0, 265.0, 308.0, 325.0, 291.0, 288.0, 282.0, 294.0, 317.0, 316.0, 317.0, 313.0, 289.0, 293.0, 311.0, 319.0, 288.0, 291.0, 288.0, 291.0, 287.0, 286.0, 290.0, 292.0, 289.0, 293.0, 297.0, 290.0, 297.0, 282.0, 322.0, 305.0, 314.0, 316.0, 314.0, 319.0, 290.0, 289.0, 289.0, 293.0, 291.0, 291.0, 262.0, 263.0, 289.0, 290.0, 288.0, 285.0, 293.0, 283.0, 290.0, 292.0, 287.0, 300.0, 319.0, 311.0, 288.0, 294.0, 288.0, 299.0, 320.0, 316.0, 263.0, 267.0, 269.0, 267.0, 283.0, 290.0, 295.0, 281.0, 292.0, 290.0, 280.0, 290.0, 286.0, 284.0, 317.0, 313.0, 299.0, 288.0, 284.0, 289.0, 286.0, 290.0, 296.0, 277.0, 283.0, 290.0, 296.0, 283.0, 289.0, 290.0, 296.0, 283.0, 258.0, 272.0, 287.0, 292.0, 266.0, 256.0, 313.0, 317.0, 310.0, 320.0, 287.0, 289.0, 291.0, 288.0, 289.0, 287.0, 285.0, 288.0, 258.0, 264.0, 296.0, 283.0, 313.0, 320.0, 294.0, 288.0, 293.0, 283.0, 292.0, 290.0, 314.0, 316.0, 308.0, 319.0, 290.0, 289.0, 316.0, 314.0, 252.0, 275.0, 288.0, 294.0, 313.0, 317.0, 283.0, 296.0, 288.0, 299.0, 289.0, 284.0, 285.0, 285.0, 292.0, 290.0, 296.0, 288.0, 314.0, 313.0, 286.0, 290.0, 320.0, 313.0, 288.0, 291.0, 319.0, 311.0, 282.0, 285.0, 294.0, 285.0, 291.0, 291.0, 289.0, 290.0, 296.0, 286.0, 318.0, 315.0, 292.0, 287.0, 319.0, 320.0, 278.0, 292.0, 284.0, 292.0, 293.0, 289.0, 286.0, 293.0, 269.0, 256.0, 286.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.693806990818135, "mean_inference_ms": 1.2369434972732614, "mean_action_processing_ms": 0.13318722167200414, "mean_env_wait_ms": 0.8352965205333814, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11929600, "num_agent_steps_trained": 11929600, "num_env_steps_sampled": 5964800, "num_env_steps_trained": 5964800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5964800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11929600, "timers": {"training_iteration_time_ms": 3551.928, "learn_time_ms": 1053.252, "learn_throughput": 12152.835, "synch_weights_time_ms": 12.209}, "counters": {"num_env_steps_sampled": 5964800, "num_env_steps_trained": 5964800, "num_agent_steps_sampled": 11929600, "num_agent_steps_trained": 11929600}, "done": false, "episodes_total": 14912, "training_iteration": 466, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-42", "timestamp": 1666582242, "time_this_iter_s": 3.598388195037842, "time_total_s": 1771.467708826065, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1771.467708826065, "timesteps_since_restore": 0, "iterations_since_restore": 466, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.7, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 176.98, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.26, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.33, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 17.08, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.15, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.93, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.0, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.77, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.66, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.41, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.61, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.32, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.93, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.0, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.93, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.0, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003803360741585493, "policy_loss": -0.0007807364454492927, "vf_loss": 7.784968852996826, "vf_explained_var": 0.5778146386146545, "kl": 0.004238889552652836, "entropy": 0.7561917304992676, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5977600, "num_env_steps_trained": 5977600, "num_agent_steps_sampled": 11955200, "num_agent_steps_trained": 11955200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 573.38, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.69}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 176.98, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.26, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.33, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 17.08, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.15, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.93, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.0, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.77, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.66, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.41, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.61, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.32, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.93, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.0, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.93, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.0, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 630.0, 582.0, 587.0, 636.0, 530.0, 536.0, 573.0, 576.0, 582.0, 570.0, 570.0, 630.0, 587.0, 573.0, 576.0, 573.0, 573.0, 579.0, 579.0, 579.0, 530.0, 579.0, 522.0, 630.0, 630.0, 576.0, 579.0, 576.0, 573.0, 522.0, 579.0, 633.0, 582.0, 576.0, 582.0, 630.0, 627.0, 579.0, 630.0, 527.0, 582.0, 630.0, 579.0, 587.0, 573.0, 570.0, 582.0, 584.0, 627.0, 576.0, 633.0, 579.0, 630.0, 567.0, 579.0, 582.0, 579.0, 582.0, 633.0, 579.0, 639.0, 570.0, 576.0, 582.0, 579.0, 525.0, 573.0, 579.0, 587.0, 522.0, 570.0, 576.0, 525.0, 576.0, 627.0, 576.0, 576.0, 627.0, 587.0, 582.0, 584.0, 525.0, 570.0, 582.0, 576.0, 582.0, 473.0, 582.0, 579.0, 579.0, 525.0, 587.0, 576.0, 66.0, 582.0, 587.0, 582.0, 354.0, 590.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 300.0, 319.0, 311.0, 288.0, 294.0, 288.0, 299.0, 320.0, 316.0, 263.0, 267.0, 269.0, 267.0, 283.0, 290.0, 295.0, 281.0, 292.0, 290.0, 280.0, 290.0, 286.0, 284.0, 317.0, 313.0, 299.0, 288.0, 284.0, 289.0, 286.0, 290.0, 296.0, 277.0, 283.0, 290.0, 296.0, 283.0, 289.0, 290.0, 296.0, 283.0, 258.0, 272.0, 287.0, 292.0, 266.0, 256.0, 313.0, 317.0, 310.0, 320.0, 287.0, 289.0, 291.0, 288.0, 289.0, 287.0, 285.0, 288.0, 258.0, 264.0, 296.0, 283.0, 313.0, 320.0, 294.0, 288.0, 293.0, 283.0, 292.0, 290.0, 314.0, 316.0, 308.0, 319.0, 290.0, 289.0, 316.0, 314.0, 252.0, 275.0, 288.0, 294.0, 313.0, 317.0, 283.0, 296.0, 288.0, 299.0, 289.0, 284.0, 285.0, 285.0, 292.0, 290.0, 296.0, 288.0, 314.0, 313.0, 286.0, 290.0, 320.0, 313.0, 288.0, 291.0, 319.0, 311.0, 282.0, 285.0, 294.0, 285.0, 291.0, 291.0, 289.0, 290.0, 296.0, 286.0, 318.0, 315.0, 292.0, 287.0, 319.0, 320.0, 278.0, 292.0, 284.0, 292.0, 293.0, 289.0, 286.0, 293.0, 269.0, 256.0, 286.0, 287.0, 291.0, 288.0, 288.0, 299.0, 263.0, 259.0, 284.0, 286.0, 288.0, 288.0, 263.0, 262.0, 288.0, 288.0, 321.0, 306.0, 288.0, 288.0, 294.0, 282.0, 311.0, 316.0, 288.0, 299.0, 288.0, 294.0, 299.0, 285.0, 263.0, 262.0, 290.0, 280.0, 292.0, 290.0, 285.0, 291.0, 293.0, 289.0, 231.0, 242.0, 290.0, 292.0, 294.0, 285.0, 282.0, 297.0, 263.0, 262.0, 301.0, 286.0, 290.0, 286.0, 31.0, 35.0, 287.0, 295.0, 295.0, 292.0, 289.0, 293.0, 175.0, 179.0, 289.0, 301.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6937653903818739, "mean_inference_ms": 1.2368836731151416, "mean_action_processing_ms": 0.13318189319245324, "mean_env_wait_ms": 0.8352902993544786, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 573.38, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.69}, "hist_stats": {"episode_reward": [587.0, 630.0, 582.0, 587.0, 636.0, 530.0, 536.0, 573.0, 576.0, 582.0, 570.0, 570.0, 630.0, 587.0, 573.0, 576.0, 573.0, 573.0, 579.0, 579.0, 579.0, 530.0, 579.0, 522.0, 630.0, 630.0, 576.0, 579.0, 576.0, 573.0, 522.0, 579.0, 633.0, 582.0, 576.0, 582.0, 630.0, 627.0, 579.0, 630.0, 527.0, 582.0, 630.0, 579.0, 587.0, 573.0, 570.0, 582.0, 584.0, 627.0, 576.0, 633.0, 579.0, 630.0, 567.0, 579.0, 582.0, 579.0, 582.0, 633.0, 579.0, 639.0, 570.0, 576.0, 582.0, 579.0, 525.0, 573.0, 579.0, 587.0, 522.0, 570.0, 576.0, 525.0, 576.0, 627.0, 576.0, 576.0, 627.0, 587.0, 582.0, 584.0, 525.0, 570.0, 582.0, 576.0, 582.0, 473.0, 582.0, 579.0, 579.0, 525.0, 587.0, 576.0, 66.0, 582.0, 587.0, 582.0, 354.0, 590.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 300.0, 319.0, 311.0, 288.0, 294.0, 288.0, 299.0, 320.0, 316.0, 263.0, 267.0, 269.0, 267.0, 283.0, 290.0, 295.0, 281.0, 292.0, 290.0, 280.0, 290.0, 286.0, 284.0, 317.0, 313.0, 299.0, 288.0, 284.0, 289.0, 286.0, 290.0, 296.0, 277.0, 283.0, 290.0, 296.0, 283.0, 289.0, 290.0, 296.0, 283.0, 258.0, 272.0, 287.0, 292.0, 266.0, 256.0, 313.0, 317.0, 310.0, 320.0, 287.0, 289.0, 291.0, 288.0, 289.0, 287.0, 285.0, 288.0, 258.0, 264.0, 296.0, 283.0, 313.0, 320.0, 294.0, 288.0, 293.0, 283.0, 292.0, 290.0, 314.0, 316.0, 308.0, 319.0, 290.0, 289.0, 316.0, 314.0, 252.0, 275.0, 288.0, 294.0, 313.0, 317.0, 283.0, 296.0, 288.0, 299.0, 289.0, 284.0, 285.0, 285.0, 292.0, 290.0, 296.0, 288.0, 314.0, 313.0, 286.0, 290.0, 320.0, 313.0, 288.0, 291.0, 319.0, 311.0, 282.0, 285.0, 294.0, 285.0, 291.0, 291.0, 289.0, 290.0, 296.0, 286.0, 318.0, 315.0, 292.0, 287.0, 319.0, 320.0, 278.0, 292.0, 284.0, 292.0, 293.0, 289.0, 286.0, 293.0, 269.0, 256.0, 286.0, 287.0, 291.0, 288.0, 288.0, 299.0, 263.0, 259.0, 284.0, 286.0, 288.0, 288.0, 263.0, 262.0, 288.0, 288.0, 321.0, 306.0, 288.0, 288.0, 294.0, 282.0, 311.0, 316.0, 288.0, 299.0, 288.0, 294.0, 299.0, 285.0, 263.0, 262.0, 290.0, 280.0, 292.0, 290.0, 285.0, 291.0, 293.0, 289.0, 231.0, 242.0, 290.0, 292.0, 294.0, 285.0, 282.0, 297.0, 263.0, 262.0, 301.0, 286.0, 290.0, 286.0, 31.0, 35.0, 287.0, 295.0, 295.0, 292.0, 289.0, 293.0, 175.0, 179.0, 289.0, 301.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6937653903818739, "mean_inference_ms": 1.2368836731151416, "mean_action_processing_ms": 0.13318189319245324, "mean_env_wait_ms": 0.8352902993544786, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11955200, "num_agent_steps_trained": 11955200, "num_env_steps_sampled": 5977600, "num_env_steps_trained": 5977600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5977600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11955200, "timers": {"training_iteration_time_ms": 3555.436, "learn_time_ms": 1057.925, "learn_throughput": 12099.152, "synch_weights_time_ms": 11.784}, "counters": {"num_env_steps_sampled": 5977600, "num_env_steps_trained": 5977600, "num_agent_steps_sampled": 11955200, "num_agent_steps_trained": 11955200}, "done": false, "episodes_total": 14944, "training_iteration": 467, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-46", "timestamp": 1666582246, "time_this_iter_s": 3.579774856567383, "time_total_s": 1775.0474836826324, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1775.0474836826324, "timesteps_since_restore": 0, "iterations_since_restore": 467, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.880000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 177.08, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.86, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.71, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.71, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.56, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.57, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.41, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.98, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.89, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.76, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.27, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.73, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.57, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.41, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.57, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.41, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0024767438881099224, "policy_loss": -0.002866453258320689, "vf_loss": 7.718918323516846, "vf_explained_var": 0.5764654874801636, "kl": 0.0026280293241143227, "entropy": 0.7643646597862244, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5990400, "num_env_steps_trained": 5990400, "num_agent_steps_sampled": 11980800, "num_agent_steps_trained": 11980800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 572.68, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.34}, "custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 177.08, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.86, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.71, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.71, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.56, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.57, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.41, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.98, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.89, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.76, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.27, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.73, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.57, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.41, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.57, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.41, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 582.0, 576.0, 582.0, 630.0, 627.0, 579.0, 630.0, 527.0, 582.0, 630.0, 579.0, 587.0, 573.0, 570.0, 582.0, 584.0, 627.0, 576.0, 633.0, 579.0, 630.0, 567.0, 579.0, 582.0, 579.0, 582.0, 633.0, 579.0, 639.0, 570.0, 576.0, 582.0, 579.0, 525.0, 573.0, 579.0, 587.0, 522.0, 570.0, 576.0, 525.0, 576.0, 627.0, 576.0, 576.0, 627.0, 587.0, 582.0, 584.0, 525.0, 570.0, 582.0, 576.0, 582.0, 473.0, 582.0, 579.0, 579.0, 525.0, 587.0, 576.0, 66.0, 582.0, 587.0, 582.0, 354.0, 590.0, 576.0, 587.0, 525.0, 633.0, 582.0, 530.0, 582.0, 630.0, 582.0, 633.0, 582.0, 633.0, 582.0, 573.0, 582.0, 579.0, 579.0, 579.0, 582.0, 405.0, 573.0, 582.0, 582.0, 582.0, 544.0, 573.0, 576.0, 573.0, 579.0, 579.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 320.0, 294.0, 288.0, 293.0, 283.0, 292.0, 290.0, 314.0, 316.0, 308.0, 319.0, 290.0, 289.0, 316.0, 314.0, 252.0, 275.0, 288.0, 294.0, 313.0, 317.0, 283.0, 296.0, 288.0, 299.0, 289.0, 284.0, 285.0, 285.0, 292.0, 290.0, 296.0, 288.0, 314.0, 313.0, 286.0, 290.0, 320.0, 313.0, 288.0, 291.0, 319.0, 311.0, 282.0, 285.0, 294.0, 285.0, 291.0, 291.0, 289.0, 290.0, 296.0, 286.0, 318.0, 315.0, 292.0, 287.0, 319.0, 320.0, 278.0, 292.0, 284.0, 292.0, 293.0, 289.0, 286.0, 293.0, 269.0, 256.0, 286.0, 287.0, 291.0, 288.0, 288.0, 299.0, 263.0, 259.0, 284.0, 286.0, 288.0, 288.0, 263.0, 262.0, 288.0, 288.0, 321.0, 306.0, 288.0, 288.0, 294.0, 282.0, 311.0, 316.0, 288.0, 299.0, 288.0, 294.0, 299.0, 285.0, 263.0, 262.0, 290.0, 280.0, 292.0, 290.0, 285.0, 291.0, 293.0, 289.0, 231.0, 242.0, 290.0, 292.0, 294.0, 285.0, 282.0, 297.0, 263.0, 262.0, 301.0, 286.0, 290.0, 286.0, 31.0, 35.0, 287.0, 295.0, 295.0, 292.0, 289.0, 293.0, 175.0, 179.0, 289.0, 301.0, 291.0, 285.0, 296.0, 291.0, 260.0, 265.0, 314.0, 319.0, 291.0, 291.0, 262.0, 268.0, 294.0, 288.0, 314.0, 316.0, 291.0, 291.0, 314.0, 319.0, 294.0, 288.0, 314.0, 319.0, 299.0, 283.0, 285.0, 288.0, 289.0, 293.0, 289.0, 290.0, 286.0, 293.0, 288.0, 291.0, 284.0, 298.0, 191.0, 214.0, 291.0, 282.0, 288.0, 294.0, 294.0, 288.0, 292.0, 290.0, 266.0, 278.0, 286.0, 287.0, 290.0, 286.0, 286.0, 287.0, 286.0, 293.0, 294.0, 285.0, 287.0, 292.0, 288.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6937253357167055, "mean_inference_ms": 1.2367844547201465, "mean_action_processing_ms": 0.1331752417353079, "mean_env_wait_ms": 0.8352193484259972, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 572.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.34}, "hist_stats": {"episode_reward": [633.0, 582.0, 576.0, 582.0, 630.0, 627.0, 579.0, 630.0, 527.0, 582.0, 630.0, 579.0, 587.0, 573.0, 570.0, 582.0, 584.0, 627.0, 576.0, 633.0, 579.0, 630.0, 567.0, 579.0, 582.0, 579.0, 582.0, 633.0, 579.0, 639.0, 570.0, 576.0, 582.0, 579.0, 525.0, 573.0, 579.0, 587.0, 522.0, 570.0, 576.0, 525.0, 576.0, 627.0, 576.0, 576.0, 627.0, 587.0, 582.0, 584.0, 525.0, 570.0, 582.0, 576.0, 582.0, 473.0, 582.0, 579.0, 579.0, 525.0, 587.0, 576.0, 66.0, 582.0, 587.0, 582.0, 354.0, 590.0, 576.0, 587.0, 525.0, 633.0, 582.0, 530.0, 582.0, 630.0, 582.0, 633.0, 582.0, 633.0, 582.0, 573.0, 582.0, 579.0, 579.0, 579.0, 582.0, 405.0, 573.0, 582.0, 582.0, 582.0, 544.0, 573.0, 576.0, 573.0, 579.0, 579.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 320.0, 294.0, 288.0, 293.0, 283.0, 292.0, 290.0, 314.0, 316.0, 308.0, 319.0, 290.0, 289.0, 316.0, 314.0, 252.0, 275.0, 288.0, 294.0, 313.0, 317.0, 283.0, 296.0, 288.0, 299.0, 289.0, 284.0, 285.0, 285.0, 292.0, 290.0, 296.0, 288.0, 314.0, 313.0, 286.0, 290.0, 320.0, 313.0, 288.0, 291.0, 319.0, 311.0, 282.0, 285.0, 294.0, 285.0, 291.0, 291.0, 289.0, 290.0, 296.0, 286.0, 318.0, 315.0, 292.0, 287.0, 319.0, 320.0, 278.0, 292.0, 284.0, 292.0, 293.0, 289.0, 286.0, 293.0, 269.0, 256.0, 286.0, 287.0, 291.0, 288.0, 288.0, 299.0, 263.0, 259.0, 284.0, 286.0, 288.0, 288.0, 263.0, 262.0, 288.0, 288.0, 321.0, 306.0, 288.0, 288.0, 294.0, 282.0, 311.0, 316.0, 288.0, 299.0, 288.0, 294.0, 299.0, 285.0, 263.0, 262.0, 290.0, 280.0, 292.0, 290.0, 285.0, 291.0, 293.0, 289.0, 231.0, 242.0, 290.0, 292.0, 294.0, 285.0, 282.0, 297.0, 263.0, 262.0, 301.0, 286.0, 290.0, 286.0, 31.0, 35.0, 287.0, 295.0, 295.0, 292.0, 289.0, 293.0, 175.0, 179.0, 289.0, 301.0, 291.0, 285.0, 296.0, 291.0, 260.0, 265.0, 314.0, 319.0, 291.0, 291.0, 262.0, 268.0, 294.0, 288.0, 314.0, 316.0, 291.0, 291.0, 314.0, 319.0, 294.0, 288.0, 314.0, 319.0, 299.0, 283.0, 285.0, 288.0, 289.0, 293.0, 289.0, 290.0, 286.0, 293.0, 288.0, 291.0, 284.0, 298.0, 191.0, 214.0, 291.0, 282.0, 288.0, 294.0, 294.0, 288.0, 292.0, 290.0, 266.0, 278.0, 286.0, 287.0, 290.0, 286.0, 286.0, 287.0, 286.0, 293.0, 294.0, 285.0, 287.0, 292.0, 288.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6937253357167055, "mean_inference_ms": 1.2367844547201465, "mean_action_processing_ms": 0.1331752417353079, "mean_env_wait_ms": 0.8352193484259972, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11980800, "num_agent_steps_trained": 11980800, "num_env_steps_sampled": 5990400, "num_env_steps_trained": 5990400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5990400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11980800, "timers": {"training_iteration_time_ms": 3566.61, "learn_time_ms": 1066.856, "learn_throughput": 11997.872, "synch_weights_time_ms": 12.749}, "counters": {"num_env_steps_sampled": 5990400, "num_env_steps_trained": 5990400, "num_agent_steps_sampled": 11980800, "num_agent_steps_trained": 11980800}, "done": false, "episodes_total": 14976, "training_iteration": 468, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-50", "timestamp": 1666582250, "time_this_iter_s": 3.6539394855499268, "time_total_s": 1778.7014231681824, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1778.7014231681824, "timesteps_since_restore": 0, "iterations_since_restore": 468, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.416666666666668, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 177.19, "shaped_reward_min": 26, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.97, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.69, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 16.86, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.51, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.68, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.39, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.58, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.87, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.75, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.74, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.17, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.68, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.39, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.68, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.39, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016068818513303995, "policy_loss": 0.0012329340679571033, "vf_loss": 7.566259384155273, "vf_explained_var": 0.6088271737098694, "kl": 0.002691782545298338, "entropy": 0.7653552293777466, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6003200, "num_env_steps_trained": 6003200, "num_agent_steps_sampled": 12006400, "num_agent_steps_trained": 12006400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 66.0, "episode_reward_mean": 572.39, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 286.195}, "custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 177.19, "shaped_reward_min": 26, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.97, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.69, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 16.86, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.51, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.68, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.39, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.58, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.87, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.75, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.74, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.17, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.68, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.39, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.68, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.39, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 525.0, 573.0, 579.0, 587.0, 522.0, 570.0, 576.0, 525.0, 576.0, 627.0, 576.0, 576.0, 627.0, 587.0, 582.0, 584.0, 525.0, 570.0, 582.0, 576.0, 582.0, 473.0, 582.0, 579.0, 579.0, 525.0, 587.0, 576.0, 66.0, 582.0, 587.0, 582.0, 354.0, 590.0, 576.0, 587.0, 525.0, 633.0, 582.0, 530.0, 582.0, 630.0, 582.0, 633.0, 582.0, 633.0, 582.0, 573.0, 582.0, 579.0, 579.0, 579.0, 582.0, 405.0, 573.0, 582.0, 582.0, 582.0, 544.0, 573.0, 576.0, 573.0, 579.0, 579.0, 579.0, 576.0, 627.0, 579.0, 579.0, 636.0, 579.0, 573.0, 579.0, 636.0, 630.0, 582.0, 579.0, 579.0, 576.0, 636.0, 627.0, 636.0, 519.0, 627.0, 582.0, 587.0, 579.0, 587.0, 582.0, 636.0, 579.0, 633.0, 579.0, 579.0, 579.0, 570.0, 522.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 286.0, 293.0, 269.0, 256.0, 286.0, 287.0, 291.0, 288.0, 288.0, 299.0, 263.0, 259.0, 284.0, 286.0, 288.0, 288.0, 263.0, 262.0, 288.0, 288.0, 321.0, 306.0, 288.0, 288.0, 294.0, 282.0, 311.0, 316.0, 288.0, 299.0, 288.0, 294.0, 299.0, 285.0, 263.0, 262.0, 290.0, 280.0, 292.0, 290.0, 285.0, 291.0, 293.0, 289.0, 231.0, 242.0, 290.0, 292.0, 294.0, 285.0, 282.0, 297.0, 263.0, 262.0, 301.0, 286.0, 290.0, 286.0, 31.0, 35.0, 287.0, 295.0, 295.0, 292.0, 289.0, 293.0, 175.0, 179.0, 289.0, 301.0, 291.0, 285.0, 296.0, 291.0, 260.0, 265.0, 314.0, 319.0, 291.0, 291.0, 262.0, 268.0, 294.0, 288.0, 314.0, 316.0, 291.0, 291.0, 314.0, 319.0, 294.0, 288.0, 314.0, 319.0, 299.0, 283.0, 285.0, 288.0, 289.0, 293.0, 289.0, 290.0, 286.0, 293.0, 288.0, 291.0, 284.0, 298.0, 191.0, 214.0, 291.0, 282.0, 288.0, 294.0, 294.0, 288.0, 292.0, 290.0, 266.0, 278.0, 286.0, 287.0, 290.0, 286.0, 286.0, 287.0, 286.0, 293.0, 294.0, 285.0, 287.0, 292.0, 288.0, 288.0, 316.0, 311.0, 291.0, 288.0, 285.0, 294.0, 317.0, 319.0, 288.0, 291.0, 288.0, 285.0, 293.0, 286.0, 319.0, 317.0, 315.0, 315.0, 299.0, 283.0, 288.0, 291.0, 291.0, 288.0, 284.0, 292.0, 320.0, 316.0, 316.0, 311.0, 312.0, 324.0, 266.0, 253.0, 311.0, 316.0, 293.0, 289.0, 288.0, 299.0, 286.0, 293.0, 288.0, 299.0, 297.0, 285.0, 317.0, 319.0, 286.0, 293.0, 320.0, 313.0, 287.0, 292.0, 286.0, 293.0, 288.0, 291.0, 290.0, 280.0, 258.0, 264.0, 295.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6936868282656675, "mean_inference_ms": 1.2366718724154204, "mean_action_processing_ms": 0.133169323115993, "mean_env_wait_ms": 0.8351444322158054, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 66.0, "episode_reward_mean": 572.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 286.195}, "hist_stats": {"episode_reward": [582.0, 579.0, 525.0, 573.0, 579.0, 587.0, 522.0, 570.0, 576.0, 525.0, 576.0, 627.0, 576.0, 576.0, 627.0, 587.0, 582.0, 584.0, 525.0, 570.0, 582.0, 576.0, 582.0, 473.0, 582.0, 579.0, 579.0, 525.0, 587.0, 576.0, 66.0, 582.0, 587.0, 582.0, 354.0, 590.0, 576.0, 587.0, 525.0, 633.0, 582.0, 530.0, 582.0, 630.0, 582.0, 633.0, 582.0, 633.0, 582.0, 573.0, 582.0, 579.0, 579.0, 579.0, 582.0, 405.0, 573.0, 582.0, 582.0, 582.0, 544.0, 573.0, 576.0, 573.0, 579.0, 579.0, 579.0, 576.0, 627.0, 579.0, 579.0, 636.0, 579.0, 573.0, 579.0, 636.0, 630.0, 582.0, 579.0, 579.0, 576.0, 636.0, 627.0, 636.0, 519.0, 627.0, 582.0, 587.0, 579.0, 587.0, 582.0, 636.0, 579.0, 633.0, 579.0, 579.0, 579.0, 570.0, 522.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 286.0, 293.0, 269.0, 256.0, 286.0, 287.0, 291.0, 288.0, 288.0, 299.0, 263.0, 259.0, 284.0, 286.0, 288.0, 288.0, 263.0, 262.0, 288.0, 288.0, 321.0, 306.0, 288.0, 288.0, 294.0, 282.0, 311.0, 316.0, 288.0, 299.0, 288.0, 294.0, 299.0, 285.0, 263.0, 262.0, 290.0, 280.0, 292.0, 290.0, 285.0, 291.0, 293.0, 289.0, 231.0, 242.0, 290.0, 292.0, 294.0, 285.0, 282.0, 297.0, 263.0, 262.0, 301.0, 286.0, 290.0, 286.0, 31.0, 35.0, 287.0, 295.0, 295.0, 292.0, 289.0, 293.0, 175.0, 179.0, 289.0, 301.0, 291.0, 285.0, 296.0, 291.0, 260.0, 265.0, 314.0, 319.0, 291.0, 291.0, 262.0, 268.0, 294.0, 288.0, 314.0, 316.0, 291.0, 291.0, 314.0, 319.0, 294.0, 288.0, 314.0, 319.0, 299.0, 283.0, 285.0, 288.0, 289.0, 293.0, 289.0, 290.0, 286.0, 293.0, 288.0, 291.0, 284.0, 298.0, 191.0, 214.0, 291.0, 282.0, 288.0, 294.0, 294.0, 288.0, 292.0, 290.0, 266.0, 278.0, 286.0, 287.0, 290.0, 286.0, 286.0, 287.0, 286.0, 293.0, 294.0, 285.0, 287.0, 292.0, 288.0, 288.0, 316.0, 311.0, 291.0, 288.0, 285.0, 294.0, 317.0, 319.0, 288.0, 291.0, 288.0, 285.0, 293.0, 286.0, 319.0, 317.0, 315.0, 315.0, 299.0, 283.0, 288.0, 291.0, 291.0, 288.0, 284.0, 292.0, 320.0, 316.0, 316.0, 311.0, 312.0, 324.0, 266.0, 253.0, 311.0, 316.0, 293.0, 289.0, 288.0, 299.0, 286.0, 293.0, 288.0, 299.0, 297.0, 285.0, 317.0, 319.0, 286.0, 293.0, 320.0, 313.0, 287.0, 292.0, 286.0, 293.0, 288.0, 291.0, 290.0, 280.0, 258.0, 264.0, 295.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6936868282656675, "mean_inference_ms": 1.2366718724154204, "mean_action_processing_ms": 0.133169323115993, "mean_env_wait_ms": 0.8351444322158054, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12006400, "num_agent_steps_trained": 12006400, "num_env_steps_sampled": 6003200, "num_env_steps_trained": 6003200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6003200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12006400, "timers": {"training_iteration_time_ms": 3573.074, "learn_time_ms": 1067.591, "learn_throughput": 11989.609, "synch_weights_time_ms": 11.929}, "counters": {"num_env_steps_sampled": 6003200, "num_env_steps_trained": 6003200, "num_agent_steps_sampled": 12006400, "num_agent_steps_trained": 12006400}, "done": false, "episodes_total": 15008, "training_iteration": 469, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-53", "timestamp": 1666582253, "time_this_iter_s": 3.6023151874542236, "time_total_s": 1782.3037383556366, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1782.3037383556366, "timesteps_since_restore": 0, "iterations_since_restore": 469, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.7, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 201.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 180.41, "shaped_reward_min": 114, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.3, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.98, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 17.21, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.86, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.9, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.77, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.02, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.65, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.93, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.52, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.84, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.37, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.84, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.9, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.77, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.9, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.77, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004652185889426619, "policy_loss": 9.670722647570074e-05, "vf_loss": 7.476959228515625, "vf_explained_var": 0.5936903953552246, "kl": 0.002335474593564868, "entropy": 0.7583696246147156, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6016000, "num_env_steps_trained": 6016000, "num_agent_steps_sampled": 12032000, "num_agent_steps_trained": 12032000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 354.0, "episode_reward_mean": 583.61, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 291.805}, "custom_metrics": {"sparse_reward_mean": 201.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 180.41, "shaped_reward_min": 114, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.3, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.98, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 17.21, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.86, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.9, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.77, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.02, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.65, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.93, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.52, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.84, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.37, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.84, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.9, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.77, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.9, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.77, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 582.0, 354.0, 590.0, 576.0, 587.0, 525.0, 633.0, 582.0, 530.0, 582.0, 630.0, 582.0, 633.0, 582.0, 633.0, 582.0, 573.0, 582.0, 579.0, 579.0, 579.0, 582.0, 405.0, 573.0, 582.0, 582.0, 582.0, 544.0, 573.0, 576.0, 573.0, 579.0, 579.0, 579.0, 576.0, 627.0, 579.0, 579.0, 636.0, 579.0, 573.0, 579.0, 636.0, 630.0, 582.0, 579.0, 579.0, 576.0, 636.0, 627.0, 636.0, 519.0, 627.0, 582.0, 587.0, 579.0, 587.0, 582.0, 636.0, 579.0, 633.0, 579.0, 579.0, 579.0, 570.0, 522.0, 582.0, 582.0, 579.0, 633.0, 582.0, 582.0, 630.0, 576.0, 582.0, 570.0, 582.0, 570.0, 633.0, 587.0, 582.0, 579.0, 633.0, 584.0, 456.0, 630.0, 582.0, 587.0, 582.0, 582.0, 573.0, 587.0, 587.0, 519.0, 627.0, 582.0, 630.0, 630.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 292.0, 289.0, 293.0, 175.0, 179.0, 289.0, 301.0, 291.0, 285.0, 296.0, 291.0, 260.0, 265.0, 314.0, 319.0, 291.0, 291.0, 262.0, 268.0, 294.0, 288.0, 314.0, 316.0, 291.0, 291.0, 314.0, 319.0, 294.0, 288.0, 314.0, 319.0, 299.0, 283.0, 285.0, 288.0, 289.0, 293.0, 289.0, 290.0, 286.0, 293.0, 288.0, 291.0, 284.0, 298.0, 191.0, 214.0, 291.0, 282.0, 288.0, 294.0, 294.0, 288.0, 292.0, 290.0, 266.0, 278.0, 286.0, 287.0, 290.0, 286.0, 286.0, 287.0, 286.0, 293.0, 294.0, 285.0, 287.0, 292.0, 288.0, 288.0, 316.0, 311.0, 291.0, 288.0, 285.0, 294.0, 317.0, 319.0, 288.0, 291.0, 288.0, 285.0, 293.0, 286.0, 319.0, 317.0, 315.0, 315.0, 299.0, 283.0, 288.0, 291.0, 291.0, 288.0, 284.0, 292.0, 320.0, 316.0, 316.0, 311.0, 312.0, 324.0, 266.0, 253.0, 311.0, 316.0, 293.0, 289.0, 288.0, 299.0, 286.0, 293.0, 288.0, 299.0, 297.0, 285.0, 317.0, 319.0, 286.0, 293.0, 320.0, 313.0, 287.0, 292.0, 286.0, 293.0, 288.0, 291.0, 290.0, 280.0, 258.0, 264.0, 295.0, 287.0, 292.0, 290.0, 287.0, 292.0, 314.0, 319.0, 288.0, 294.0, 296.0, 286.0, 314.0, 316.0, 292.0, 284.0, 296.0, 286.0, 294.0, 276.0, 293.0, 289.0, 287.0, 283.0, 315.0, 318.0, 297.0, 290.0, 290.0, 292.0, 286.0, 293.0, 317.0, 316.0, 291.0, 293.0, 223.0, 233.0, 309.0, 321.0, 291.0, 291.0, 289.0, 298.0, 289.0, 293.0, 294.0, 288.0, 279.0, 294.0, 296.0, 291.0, 286.0, 301.0, 256.0, 263.0, 314.0, 313.0, 285.0, 297.0, 316.0, 314.0, 314.0, 316.0, 319.0, 320.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6936436685004764, "mean_inference_ms": 1.23656257555819, "mean_action_processing_ms": 0.13316370238818215, "mean_env_wait_ms": 0.8350735735228274, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 354.0, "episode_reward_mean": 583.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 291.805}, "hist_stats": {"episode_reward": [587.0, 582.0, 354.0, 590.0, 576.0, 587.0, 525.0, 633.0, 582.0, 530.0, 582.0, 630.0, 582.0, 633.0, 582.0, 633.0, 582.0, 573.0, 582.0, 579.0, 579.0, 579.0, 582.0, 405.0, 573.0, 582.0, 582.0, 582.0, 544.0, 573.0, 576.0, 573.0, 579.0, 579.0, 579.0, 576.0, 627.0, 579.0, 579.0, 636.0, 579.0, 573.0, 579.0, 636.0, 630.0, 582.0, 579.0, 579.0, 576.0, 636.0, 627.0, 636.0, 519.0, 627.0, 582.0, 587.0, 579.0, 587.0, 582.0, 636.0, 579.0, 633.0, 579.0, 579.0, 579.0, 570.0, 522.0, 582.0, 582.0, 579.0, 633.0, 582.0, 582.0, 630.0, 576.0, 582.0, 570.0, 582.0, 570.0, 633.0, 587.0, 582.0, 579.0, 633.0, 584.0, 456.0, 630.0, 582.0, 587.0, 582.0, 582.0, 573.0, 587.0, 587.0, 519.0, 627.0, 582.0, 630.0, 630.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 292.0, 289.0, 293.0, 175.0, 179.0, 289.0, 301.0, 291.0, 285.0, 296.0, 291.0, 260.0, 265.0, 314.0, 319.0, 291.0, 291.0, 262.0, 268.0, 294.0, 288.0, 314.0, 316.0, 291.0, 291.0, 314.0, 319.0, 294.0, 288.0, 314.0, 319.0, 299.0, 283.0, 285.0, 288.0, 289.0, 293.0, 289.0, 290.0, 286.0, 293.0, 288.0, 291.0, 284.0, 298.0, 191.0, 214.0, 291.0, 282.0, 288.0, 294.0, 294.0, 288.0, 292.0, 290.0, 266.0, 278.0, 286.0, 287.0, 290.0, 286.0, 286.0, 287.0, 286.0, 293.0, 294.0, 285.0, 287.0, 292.0, 288.0, 288.0, 316.0, 311.0, 291.0, 288.0, 285.0, 294.0, 317.0, 319.0, 288.0, 291.0, 288.0, 285.0, 293.0, 286.0, 319.0, 317.0, 315.0, 315.0, 299.0, 283.0, 288.0, 291.0, 291.0, 288.0, 284.0, 292.0, 320.0, 316.0, 316.0, 311.0, 312.0, 324.0, 266.0, 253.0, 311.0, 316.0, 293.0, 289.0, 288.0, 299.0, 286.0, 293.0, 288.0, 299.0, 297.0, 285.0, 317.0, 319.0, 286.0, 293.0, 320.0, 313.0, 287.0, 292.0, 286.0, 293.0, 288.0, 291.0, 290.0, 280.0, 258.0, 264.0, 295.0, 287.0, 292.0, 290.0, 287.0, 292.0, 314.0, 319.0, 288.0, 294.0, 296.0, 286.0, 314.0, 316.0, 292.0, 284.0, 296.0, 286.0, 294.0, 276.0, 293.0, 289.0, 287.0, 283.0, 315.0, 318.0, 297.0, 290.0, 290.0, 292.0, 286.0, 293.0, 317.0, 316.0, 291.0, 293.0, 223.0, 233.0, 309.0, 321.0, 291.0, 291.0, 289.0, 298.0, 289.0, 293.0, 294.0, 288.0, 279.0, 294.0, 296.0, 291.0, 286.0, 301.0, 256.0, 263.0, 314.0, 313.0, 285.0, 297.0, 316.0, 314.0, 314.0, 316.0, 319.0, 320.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6936436685004764, "mean_inference_ms": 1.23656257555819, "mean_action_processing_ms": 0.13316370238818215, "mean_env_wait_ms": 0.8350735735228274, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12032000, "num_agent_steps_trained": 12032000, "num_env_steps_sampled": 6016000, "num_env_steps_trained": 6016000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6016000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12032000, "timers": {"training_iteration_time_ms": 3573.205, "learn_time_ms": 1072.241, "learn_throughput": 11937.62, "synch_weights_time_ms": 11.283}, "counters": {"num_env_steps_sampled": 6016000, "num_env_steps_trained": 6016000, "num_agent_steps_sampled": 12032000, "num_agent_steps_trained": 12032000}, "done": false, "episodes_total": 15040, "training_iteration": 470, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-57", "timestamp": 1666582257, "time_this_iter_s": 3.593625545501709, "time_total_s": 1785.8973639011383, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1785.8973639011383, "timesteps_since_restore": 0, "iterations_since_restore": 470, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.7, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 202.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 180.25, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.33, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.0, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 17.22, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.86, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.88, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.77, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.55, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.96, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.44, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.9, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.32, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.9, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.23, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.88, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.77, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.88, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.77, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0006142787169665098, "policy_loss": -0.000997619703412056, "vf_loss": 7.701852321624756, "vf_explained_var": 0.5957615375518799, "kl": 0.002363224048167467, "entropy": 0.7736892700195312, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6028800, "num_env_steps_trained": 6028800, "num_agent_steps_sampled": 12057600, "num_agent_steps_trained": 12057600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 585.45, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 292.725}, "custom_metrics": {"sparse_reward_mean": 202.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 180.25, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.33, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.0, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 17.22, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.86, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.88, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.77, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.55, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.96, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.44, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.9, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.32, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.9, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.23, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.88, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.77, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.88, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.77, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 576.0, 627.0, 579.0, 579.0, 636.0, 579.0, 573.0, 579.0, 636.0, 630.0, 582.0, 579.0, 579.0, 576.0, 636.0, 627.0, 636.0, 519.0, 627.0, 582.0, 587.0, 579.0, 587.0, 582.0, 636.0, 579.0, 633.0, 579.0, 579.0, 579.0, 570.0, 522.0, 582.0, 582.0, 579.0, 633.0, 582.0, 582.0, 630.0, 576.0, 582.0, 570.0, 582.0, 570.0, 633.0, 587.0, 582.0, 579.0, 633.0, 584.0, 456.0, 630.0, 582.0, 587.0, 582.0, 582.0, 573.0, 587.0, 587.0, 519.0, 627.0, 582.0, 630.0, 630.0, 639.0, 630.0, 530.0, 582.0, 579.0, 630.0, 582.0, 570.0, 579.0, 627.0, 582.0, 519.0, 587.0, 573.0, 627.0, 402.0, 579.0, 525.0, 630.0, 579.0, 582.0, 633.0, 564.0, 519.0, 579.0, 627.0, 579.0, 525.0, 579.0, 582.0, 522.0, 636.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 294.0, 285.0, 287.0, 292.0, 288.0, 288.0, 316.0, 311.0, 291.0, 288.0, 285.0, 294.0, 317.0, 319.0, 288.0, 291.0, 288.0, 285.0, 293.0, 286.0, 319.0, 317.0, 315.0, 315.0, 299.0, 283.0, 288.0, 291.0, 291.0, 288.0, 284.0, 292.0, 320.0, 316.0, 316.0, 311.0, 312.0, 324.0, 266.0, 253.0, 311.0, 316.0, 293.0, 289.0, 288.0, 299.0, 286.0, 293.0, 288.0, 299.0, 297.0, 285.0, 317.0, 319.0, 286.0, 293.0, 320.0, 313.0, 287.0, 292.0, 286.0, 293.0, 288.0, 291.0, 290.0, 280.0, 258.0, 264.0, 295.0, 287.0, 292.0, 290.0, 287.0, 292.0, 314.0, 319.0, 288.0, 294.0, 296.0, 286.0, 314.0, 316.0, 292.0, 284.0, 296.0, 286.0, 294.0, 276.0, 293.0, 289.0, 287.0, 283.0, 315.0, 318.0, 297.0, 290.0, 290.0, 292.0, 286.0, 293.0, 317.0, 316.0, 291.0, 293.0, 223.0, 233.0, 309.0, 321.0, 291.0, 291.0, 289.0, 298.0, 289.0, 293.0, 294.0, 288.0, 279.0, 294.0, 296.0, 291.0, 286.0, 301.0, 256.0, 263.0, 314.0, 313.0, 285.0, 297.0, 316.0, 314.0, 314.0, 316.0, 319.0, 320.0, 319.0, 311.0, 260.0, 270.0, 291.0, 291.0, 289.0, 290.0, 310.0, 320.0, 294.0, 288.0, 290.0, 280.0, 295.0, 284.0, 314.0, 313.0, 292.0, 290.0, 252.0, 267.0, 288.0, 299.0, 285.0, 288.0, 321.0, 306.0, 202.0, 200.0, 288.0, 291.0, 260.0, 265.0, 319.0, 311.0, 291.0, 288.0, 294.0, 288.0, 319.0, 314.0, 279.0, 285.0, 264.0, 255.0, 292.0, 287.0, 313.0, 314.0, 286.0, 293.0, 261.0, 264.0, 293.0, 286.0, 288.0, 294.0, 261.0, 261.0, 320.0, 316.0, 290.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6935970828707496, "mean_inference_ms": 1.2364517360045373, "mean_action_processing_ms": 0.13315791629657373, "mean_env_wait_ms": 0.8350010650008328, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 585.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 292.725}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 576.0, 627.0, 579.0, 579.0, 636.0, 579.0, 573.0, 579.0, 636.0, 630.0, 582.0, 579.0, 579.0, 576.0, 636.0, 627.0, 636.0, 519.0, 627.0, 582.0, 587.0, 579.0, 587.0, 582.0, 636.0, 579.0, 633.0, 579.0, 579.0, 579.0, 570.0, 522.0, 582.0, 582.0, 579.0, 633.0, 582.0, 582.0, 630.0, 576.0, 582.0, 570.0, 582.0, 570.0, 633.0, 587.0, 582.0, 579.0, 633.0, 584.0, 456.0, 630.0, 582.0, 587.0, 582.0, 582.0, 573.0, 587.0, 587.0, 519.0, 627.0, 582.0, 630.0, 630.0, 639.0, 630.0, 530.0, 582.0, 579.0, 630.0, 582.0, 570.0, 579.0, 627.0, 582.0, 519.0, 587.0, 573.0, 627.0, 402.0, 579.0, 525.0, 630.0, 579.0, 582.0, 633.0, 564.0, 519.0, 579.0, 627.0, 579.0, 525.0, 579.0, 582.0, 522.0, 636.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 294.0, 285.0, 287.0, 292.0, 288.0, 288.0, 316.0, 311.0, 291.0, 288.0, 285.0, 294.0, 317.0, 319.0, 288.0, 291.0, 288.0, 285.0, 293.0, 286.0, 319.0, 317.0, 315.0, 315.0, 299.0, 283.0, 288.0, 291.0, 291.0, 288.0, 284.0, 292.0, 320.0, 316.0, 316.0, 311.0, 312.0, 324.0, 266.0, 253.0, 311.0, 316.0, 293.0, 289.0, 288.0, 299.0, 286.0, 293.0, 288.0, 299.0, 297.0, 285.0, 317.0, 319.0, 286.0, 293.0, 320.0, 313.0, 287.0, 292.0, 286.0, 293.0, 288.0, 291.0, 290.0, 280.0, 258.0, 264.0, 295.0, 287.0, 292.0, 290.0, 287.0, 292.0, 314.0, 319.0, 288.0, 294.0, 296.0, 286.0, 314.0, 316.0, 292.0, 284.0, 296.0, 286.0, 294.0, 276.0, 293.0, 289.0, 287.0, 283.0, 315.0, 318.0, 297.0, 290.0, 290.0, 292.0, 286.0, 293.0, 317.0, 316.0, 291.0, 293.0, 223.0, 233.0, 309.0, 321.0, 291.0, 291.0, 289.0, 298.0, 289.0, 293.0, 294.0, 288.0, 279.0, 294.0, 296.0, 291.0, 286.0, 301.0, 256.0, 263.0, 314.0, 313.0, 285.0, 297.0, 316.0, 314.0, 314.0, 316.0, 319.0, 320.0, 319.0, 311.0, 260.0, 270.0, 291.0, 291.0, 289.0, 290.0, 310.0, 320.0, 294.0, 288.0, 290.0, 280.0, 295.0, 284.0, 314.0, 313.0, 292.0, 290.0, 252.0, 267.0, 288.0, 299.0, 285.0, 288.0, 321.0, 306.0, 202.0, 200.0, 288.0, 291.0, 260.0, 265.0, 319.0, 311.0, 291.0, 288.0, 294.0, 288.0, 319.0, 314.0, 279.0, 285.0, 264.0, 255.0, 292.0, 287.0, 313.0, 314.0, 286.0, 293.0, 261.0, 264.0, 293.0, 286.0, 288.0, 294.0, 261.0, 261.0, 320.0, 316.0, 290.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6935970828707496, "mean_inference_ms": 1.2364517360045373, "mean_action_processing_ms": 0.13315791629657373, "mean_env_wait_ms": 0.8350010650008328, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12057600, "num_agent_steps_trained": 12057600, "num_env_steps_sampled": 6028800, "num_env_steps_trained": 6028800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6028800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12057600, "timers": {"training_iteration_time_ms": 3569.429, "learn_time_ms": 1068.864, "learn_throughput": 11975.332, "synch_weights_time_ms": 11.406}, "counters": {"num_env_steps_sampled": 6028800, "num_env_steps_trained": 6028800, "num_agent_steps_sampled": 12057600, "num_agent_steps_trained": 12057600}, "done": false, "episodes_total": 15072, "training_iteration": 471, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-01", "timestamp": 1666582261, "time_this_iter_s": 3.6229758262634277, "time_total_s": 1789.5203397274017, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1789.5203397274017, "timesteps_since_restore": 0, "iterations_since_restore": 471, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.439999999999998, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.95, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.92, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.8, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.79, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.64, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.49, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.56, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.02, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.43, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.89, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.31, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.84, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.84, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.49, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.56, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.49, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.56, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0034096711315214634, "policy_loss": -0.0038029851857572794, "vf_loss": 7.788510799407959, "vf_explained_var": 0.601466715335846, "kl": 0.0035887700505554676, "entropy": 0.7710731625556946, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6041600, "num_env_steps_trained": 6041600, "num_agent_steps_sampled": 12083200, "num_agent_steps_trained": 12083200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 574.95, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 287.475}, "custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.95, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.92, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.8, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.79, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.64, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.49, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.56, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.02, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.43, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.89, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.31, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.84, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.84, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.49, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.56, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.49, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.56, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 570.0, 522.0, 582.0, 582.0, 579.0, 633.0, 582.0, 582.0, 630.0, 576.0, 582.0, 570.0, 582.0, 570.0, 633.0, 587.0, 582.0, 579.0, 633.0, 584.0, 456.0, 630.0, 582.0, 587.0, 582.0, 582.0, 573.0, 587.0, 587.0, 519.0, 627.0, 582.0, 630.0, 630.0, 639.0, 630.0, 530.0, 582.0, 579.0, 630.0, 582.0, 570.0, 579.0, 627.0, 582.0, 519.0, 587.0, 573.0, 627.0, 402.0, 579.0, 525.0, 630.0, 579.0, 582.0, 633.0, 564.0, 519.0, 579.0, 627.0, 579.0, 525.0, 579.0, 582.0, 522.0, 636.0, 579.0, 582.0, 579.0, 582.0, 516.0, 473.0, 582.0, 576.0, 576.0, 587.0, 582.0, 465.0, 573.0, 576.0, 579.0, 630.0, 579.0, 582.0, 576.0, 573.0, 576.0, 519.0, 570.0, 516.0, 576.0, 570.0, 525.0, 582.0, 579.0, 627.0, 570.0, 408.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 290.0, 280.0, 258.0, 264.0, 295.0, 287.0, 292.0, 290.0, 287.0, 292.0, 314.0, 319.0, 288.0, 294.0, 296.0, 286.0, 314.0, 316.0, 292.0, 284.0, 296.0, 286.0, 294.0, 276.0, 293.0, 289.0, 287.0, 283.0, 315.0, 318.0, 297.0, 290.0, 290.0, 292.0, 286.0, 293.0, 317.0, 316.0, 291.0, 293.0, 223.0, 233.0, 309.0, 321.0, 291.0, 291.0, 289.0, 298.0, 289.0, 293.0, 294.0, 288.0, 279.0, 294.0, 296.0, 291.0, 286.0, 301.0, 256.0, 263.0, 314.0, 313.0, 285.0, 297.0, 316.0, 314.0, 314.0, 316.0, 319.0, 320.0, 319.0, 311.0, 260.0, 270.0, 291.0, 291.0, 289.0, 290.0, 310.0, 320.0, 294.0, 288.0, 290.0, 280.0, 295.0, 284.0, 314.0, 313.0, 292.0, 290.0, 252.0, 267.0, 288.0, 299.0, 285.0, 288.0, 321.0, 306.0, 202.0, 200.0, 288.0, 291.0, 260.0, 265.0, 319.0, 311.0, 291.0, 288.0, 294.0, 288.0, 319.0, 314.0, 279.0, 285.0, 264.0, 255.0, 292.0, 287.0, 313.0, 314.0, 286.0, 293.0, 261.0, 264.0, 293.0, 286.0, 288.0, 294.0, 261.0, 261.0, 320.0, 316.0, 290.0, 289.0, 288.0, 294.0, 286.0, 293.0, 291.0, 291.0, 267.0, 249.0, 238.0, 235.0, 291.0, 291.0, 284.0, 292.0, 289.0, 287.0, 294.0, 293.0, 291.0, 291.0, 223.0, 242.0, 292.0, 281.0, 286.0, 290.0, 293.0, 286.0, 317.0, 313.0, 286.0, 293.0, 289.0, 293.0, 292.0, 284.0, 283.0, 290.0, 286.0, 290.0, 271.0, 248.0, 289.0, 281.0, 254.0, 262.0, 286.0, 290.0, 280.0, 290.0, 258.0, 267.0, 290.0, 292.0, 293.0, 286.0, 312.0, 315.0, 284.0, 286.0, 200.0, 208.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6935537348079653, "mean_inference_ms": 1.2363514110154021, "mean_action_processing_ms": 0.13315288003710485, "mean_env_wait_ms": 0.8349377343006816, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 574.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 287.475}, "hist_stats": {"episode_reward": [579.0, 570.0, 522.0, 582.0, 582.0, 579.0, 633.0, 582.0, 582.0, 630.0, 576.0, 582.0, 570.0, 582.0, 570.0, 633.0, 587.0, 582.0, 579.0, 633.0, 584.0, 456.0, 630.0, 582.0, 587.0, 582.0, 582.0, 573.0, 587.0, 587.0, 519.0, 627.0, 582.0, 630.0, 630.0, 639.0, 630.0, 530.0, 582.0, 579.0, 630.0, 582.0, 570.0, 579.0, 627.0, 582.0, 519.0, 587.0, 573.0, 627.0, 402.0, 579.0, 525.0, 630.0, 579.0, 582.0, 633.0, 564.0, 519.0, 579.0, 627.0, 579.0, 525.0, 579.0, 582.0, 522.0, 636.0, 579.0, 582.0, 579.0, 582.0, 516.0, 473.0, 582.0, 576.0, 576.0, 587.0, 582.0, 465.0, 573.0, 576.0, 579.0, 630.0, 579.0, 582.0, 576.0, 573.0, 576.0, 519.0, 570.0, 516.0, 576.0, 570.0, 525.0, 582.0, 579.0, 627.0, 570.0, 408.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 290.0, 280.0, 258.0, 264.0, 295.0, 287.0, 292.0, 290.0, 287.0, 292.0, 314.0, 319.0, 288.0, 294.0, 296.0, 286.0, 314.0, 316.0, 292.0, 284.0, 296.0, 286.0, 294.0, 276.0, 293.0, 289.0, 287.0, 283.0, 315.0, 318.0, 297.0, 290.0, 290.0, 292.0, 286.0, 293.0, 317.0, 316.0, 291.0, 293.0, 223.0, 233.0, 309.0, 321.0, 291.0, 291.0, 289.0, 298.0, 289.0, 293.0, 294.0, 288.0, 279.0, 294.0, 296.0, 291.0, 286.0, 301.0, 256.0, 263.0, 314.0, 313.0, 285.0, 297.0, 316.0, 314.0, 314.0, 316.0, 319.0, 320.0, 319.0, 311.0, 260.0, 270.0, 291.0, 291.0, 289.0, 290.0, 310.0, 320.0, 294.0, 288.0, 290.0, 280.0, 295.0, 284.0, 314.0, 313.0, 292.0, 290.0, 252.0, 267.0, 288.0, 299.0, 285.0, 288.0, 321.0, 306.0, 202.0, 200.0, 288.0, 291.0, 260.0, 265.0, 319.0, 311.0, 291.0, 288.0, 294.0, 288.0, 319.0, 314.0, 279.0, 285.0, 264.0, 255.0, 292.0, 287.0, 313.0, 314.0, 286.0, 293.0, 261.0, 264.0, 293.0, 286.0, 288.0, 294.0, 261.0, 261.0, 320.0, 316.0, 290.0, 289.0, 288.0, 294.0, 286.0, 293.0, 291.0, 291.0, 267.0, 249.0, 238.0, 235.0, 291.0, 291.0, 284.0, 292.0, 289.0, 287.0, 294.0, 293.0, 291.0, 291.0, 223.0, 242.0, 292.0, 281.0, 286.0, 290.0, 293.0, 286.0, 317.0, 313.0, 286.0, 293.0, 289.0, 293.0, 292.0, 284.0, 283.0, 290.0, 286.0, 290.0, 271.0, 248.0, 289.0, 281.0, 254.0, 262.0, 286.0, 290.0, 280.0, 290.0, 258.0, 267.0, 290.0, 292.0, 293.0, 286.0, 312.0, 315.0, 284.0, 286.0, 200.0, 208.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6935537348079653, "mean_inference_ms": 1.2363514110154021, "mean_action_processing_ms": 0.13315288003710485, "mean_env_wait_ms": 0.8349377343006816, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12083200, "num_agent_steps_trained": 12083200, "num_env_steps_sampled": 6041600, "num_env_steps_trained": 6041600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6041600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12083200, "timers": {"training_iteration_time_ms": 3574.198, "learn_time_ms": 1069.172, "learn_throughput": 11971.882, "synch_weights_time_ms": 10.839}, "counters": {"num_env_steps_sampled": 6041600, "num_env_steps_trained": 6041600, "num_agent_steps_sampled": 12083200, "num_agent_steps_trained": 12083200}, "done": false, "episodes_total": 15104, "training_iteration": 472, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-05", "timestamp": 1666582265, "time_this_iter_s": 3.600015878677368, "time_total_s": 1793.120355606079, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1793.120355606079, "timesteps_since_restore": 0, "iterations_since_restore": 472, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.98, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 197.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.52, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.01, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.47, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.87, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.29, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.68, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.12, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 4.9, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.78, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.36, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.73, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.72, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.68, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.12, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.68, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.12, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003118330496363342, "policy_loss": -0.0007060921634547412, "vf_loss": 7.771193504333496, "vf_explained_var": 0.6044775247573853, "kl": 0.0033143041655421257, "entropy": 0.7657217979431152, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6054400, "num_env_steps_trained": 6054400, "num_agent_steps_sampled": 12108800, "num_agent_steps_trained": 12108800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 569.92, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.96}, "custom_metrics": {"sparse_reward_mean": 197.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.52, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.01, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.47, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.87, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.29, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.68, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.12, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 4.9, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.78, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.36, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.73, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.72, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.68, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.12, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.68, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.12, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 630.0, 639.0, 630.0, 530.0, 582.0, 579.0, 630.0, 582.0, 570.0, 579.0, 627.0, 582.0, 519.0, 587.0, 573.0, 627.0, 402.0, 579.0, 525.0, 630.0, 579.0, 582.0, 633.0, 564.0, 519.0, 579.0, 627.0, 579.0, 525.0, 579.0, 582.0, 522.0, 636.0, 579.0, 582.0, 579.0, 582.0, 516.0, 473.0, 582.0, 576.0, 576.0, 587.0, 582.0, 465.0, 573.0, 576.0, 579.0, 630.0, 579.0, 582.0, 576.0, 573.0, 576.0, 519.0, 570.0, 516.0, 576.0, 570.0, 525.0, 582.0, 579.0, 627.0, 570.0, 408.0, 579.0, 411.0, 582.0, 530.0, 639.0, 587.0, 582.0, 573.0, 576.0, 579.0, 579.0, 525.0, 582.0, 525.0, 570.0, 573.0, 576.0, 576.0, 582.0, 576.0, 627.0, 525.0, 587.0, 579.0, 513.0, 576.0, 636.0, 579.0, 519.0, 579.0, 527.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 297.0, 316.0, 314.0, 314.0, 316.0, 319.0, 320.0, 319.0, 311.0, 260.0, 270.0, 291.0, 291.0, 289.0, 290.0, 310.0, 320.0, 294.0, 288.0, 290.0, 280.0, 295.0, 284.0, 314.0, 313.0, 292.0, 290.0, 252.0, 267.0, 288.0, 299.0, 285.0, 288.0, 321.0, 306.0, 202.0, 200.0, 288.0, 291.0, 260.0, 265.0, 319.0, 311.0, 291.0, 288.0, 294.0, 288.0, 319.0, 314.0, 279.0, 285.0, 264.0, 255.0, 292.0, 287.0, 313.0, 314.0, 286.0, 293.0, 261.0, 264.0, 293.0, 286.0, 288.0, 294.0, 261.0, 261.0, 320.0, 316.0, 290.0, 289.0, 288.0, 294.0, 286.0, 293.0, 291.0, 291.0, 267.0, 249.0, 238.0, 235.0, 291.0, 291.0, 284.0, 292.0, 289.0, 287.0, 294.0, 293.0, 291.0, 291.0, 223.0, 242.0, 292.0, 281.0, 286.0, 290.0, 293.0, 286.0, 317.0, 313.0, 286.0, 293.0, 289.0, 293.0, 292.0, 284.0, 283.0, 290.0, 286.0, 290.0, 271.0, 248.0, 289.0, 281.0, 254.0, 262.0, 286.0, 290.0, 280.0, 290.0, 258.0, 267.0, 290.0, 292.0, 293.0, 286.0, 312.0, 315.0, 284.0, 286.0, 200.0, 208.0, 291.0, 288.0, 206.0, 205.0, 297.0, 285.0, 268.0, 262.0, 316.0, 323.0, 289.0, 298.0, 291.0, 291.0, 283.0, 290.0, 292.0, 284.0, 294.0, 285.0, 293.0, 286.0, 268.0, 257.0, 295.0, 287.0, 263.0, 262.0, 287.0, 283.0, 288.0, 285.0, 291.0, 285.0, 289.0, 287.0, 289.0, 293.0, 286.0, 290.0, 319.0, 308.0, 258.0, 267.0, 286.0, 301.0, 292.0, 287.0, 253.0, 260.0, 286.0, 290.0, 312.0, 324.0, 292.0, 287.0, 269.0, 250.0, 291.0, 288.0, 267.0, 260.0, 291.0, 285.0, 294.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6935046216697461, "mean_inference_ms": 1.2362480338015869, "mean_action_processing_ms": 0.13314683432029636, "mean_env_wait_ms": 0.8348684647105502, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 569.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.96}, "hist_stats": {"episode_reward": [582.0, 630.0, 630.0, 639.0, 630.0, 530.0, 582.0, 579.0, 630.0, 582.0, 570.0, 579.0, 627.0, 582.0, 519.0, 587.0, 573.0, 627.0, 402.0, 579.0, 525.0, 630.0, 579.0, 582.0, 633.0, 564.0, 519.0, 579.0, 627.0, 579.0, 525.0, 579.0, 582.0, 522.0, 636.0, 579.0, 582.0, 579.0, 582.0, 516.0, 473.0, 582.0, 576.0, 576.0, 587.0, 582.0, 465.0, 573.0, 576.0, 579.0, 630.0, 579.0, 582.0, 576.0, 573.0, 576.0, 519.0, 570.0, 516.0, 576.0, 570.0, 525.0, 582.0, 579.0, 627.0, 570.0, 408.0, 579.0, 411.0, 582.0, 530.0, 639.0, 587.0, 582.0, 573.0, 576.0, 579.0, 579.0, 525.0, 582.0, 525.0, 570.0, 573.0, 576.0, 576.0, 582.0, 576.0, 627.0, 525.0, 587.0, 579.0, 513.0, 576.0, 636.0, 579.0, 519.0, 579.0, 527.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 297.0, 316.0, 314.0, 314.0, 316.0, 319.0, 320.0, 319.0, 311.0, 260.0, 270.0, 291.0, 291.0, 289.0, 290.0, 310.0, 320.0, 294.0, 288.0, 290.0, 280.0, 295.0, 284.0, 314.0, 313.0, 292.0, 290.0, 252.0, 267.0, 288.0, 299.0, 285.0, 288.0, 321.0, 306.0, 202.0, 200.0, 288.0, 291.0, 260.0, 265.0, 319.0, 311.0, 291.0, 288.0, 294.0, 288.0, 319.0, 314.0, 279.0, 285.0, 264.0, 255.0, 292.0, 287.0, 313.0, 314.0, 286.0, 293.0, 261.0, 264.0, 293.0, 286.0, 288.0, 294.0, 261.0, 261.0, 320.0, 316.0, 290.0, 289.0, 288.0, 294.0, 286.0, 293.0, 291.0, 291.0, 267.0, 249.0, 238.0, 235.0, 291.0, 291.0, 284.0, 292.0, 289.0, 287.0, 294.0, 293.0, 291.0, 291.0, 223.0, 242.0, 292.0, 281.0, 286.0, 290.0, 293.0, 286.0, 317.0, 313.0, 286.0, 293.0, 289.0, 293.0, 292.0, 284.0, 283.0, 290.0, 286.0, 290.0, 271.0, 248.0, 289.0, 281.0, 254.0, 262.0, 286.0, 290.0, 280.0, 290.0, 258.0, 267.0, 290.0, 292.0, 293.0, 286.0, 312.0, 315.0, 284.0, 286.0, 200.0, 208.0, 291.0, 288.0, 206.0, 205.0, 297.0, 285.0, 268.0, 262.0, 316.0, 323.0, 289.0, 298.0, 291.0, 291.0, 283.0, 290.0, 292.0, 284.0, 294.0, 285.0, 293.0, 286.0, 268.0, 257.0, 295.0, 287.0, 263.0, 262.0, 287.0, 283.0, 288.0, 285.0, 291.0, 285.0, 289.0, 287.0, 289.0, 293.0, 286.0, 290.0, 319.0, 308.0, 258.0, 267.0, 286.0, 301.0, 292.0, 287.0, 253.0, 260.0, 286.0, 290.0, 312.0, 324.0, 292.0, 287.0, 269.0, 250.0, 291.0, 288.0, 267.0, 260.0, 291.0, 285.0, 294.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6935046216697461, "mean_inference_ms": 1.2362480338015869, "mean_action_processing_ms": 0.13314683432029636, "mean_env_wait_ms": 0.8348684647105502, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12108800, "num_agent_steps_trained": 12108800, "num_env_steps_sampled": 6054400, "num_env_steps_trained": 6054400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6054400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12108800, "timers": {"training_iteration_time_ms": 3566.626, "learn_time_ms": 1064.88, "learn_throughput": 12020.135, "synch_weights_time_ms": 11.342}, "counters": {"num_env_steps_sampled": 6054400, "num_env_steps_trained": 6054400, "num_agent_steps_sampled": 12108800, "num_agent_steps_trained": 12108800}, "done": false, "episodes_total": 15136, "training_iteration": 473, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-08", "timestamp": 1666582268, "time_this_iter_s": 3.544827699661255, "time_total_s": 1796.6651833057404, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1796.6651833057404, "timesteps_since_restore": 0, "iterations_since_restore": 473, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.433333333333334, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.36, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.08, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.37, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.89, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.19, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.72, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.07, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 4.86, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.73, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.4, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.69, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.67, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.16, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.72, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.07, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.72, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.07, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0026534530334174633, "policy_loss": 0.0022541265934705734, "vf_loss": 7.776371479034424, "vf_explained_var": 0.6059039235115051, "kl": 0.003932251129299402, "entropy": 0.7566198110580444, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6067200, "num_env_steps_trained": 6067200, "num_agent_steps_sampled": 12134400, "num_agent_steps_trained": 12134400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 568.16, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.08}, "custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.36, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.08, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.37, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.89, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.19, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.72, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.07, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 4.86, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.73, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.4, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.69, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.67, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.16, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.72, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.07, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.72, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.07, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 522.0, 636.0, 579.0, 582.0, 579.0, 582.0, 516.0, 473.0, 582.0, 576.0, 576.0, 587.0, 582.0, 465.0, 573.0, 576.0, 579.0, 630.0, 579.0, 582.0, 576.0, 573.0, 576.0, 519.0, 570.0, 516.0, 576.0, 570.0, 525.0, 582.0, 579.0, 627.0, 570.0, 408.0, 579.0, 411.0, 582.0, 530.0, 639.0, 587.0, 582.0, 573.0, 576.0, 579.0, 579.0, 525.0, 582.0, 525.0, 570.0, 573.0, 576.0, 576.0, 582.0, 576.0, 627.0, 525.0, 587.0, 579.0, 513.0, 576.0, 636.0, 579.0, 519.0, 579.0, 527.0, 576.0, 582.0, 525.0, 633.0, 530.0, 582.0, 579.0, 519.0, 630.0, 630.0, 522.0, 582.0, 579.0, 587.0, 627.0, 573.0, 579.0, 576.0, 582.0, 576.0, 582.0, 587.0, 579.0, 573.0, 579.0, 636.0, 582.0, 530.0, 519.0, 576.0, 525.0, 582.0, 570.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 261.0, 261.0, 320.0, 316.0, 290.0, 289.0, 288.0, 294.0, 286.0, 293.0, 291.0, 291.0, 267.0, 249.0, 238.0, 235.0, 291.0, 291.0, 284.0, 292.0, 289.0, 287.0, 294.0, 293.0, 291.0, 291.0, 223.0, 242.0, 292.0, 281.0, 286.0, 290.0, 293.0, 286.0, 317.0, 313.0, 286.0, 293.0, 289.0, 293.0, 292.0, 284.0, 283.0, 290.0, 286.0, 290.0, 271.0, 248.0, 289.0, 281.0, 254.0, 262.0, 286.0, 290.0, 280.0, 290.0, 258.0, 267.0, 290.0, 292.0, 293.0, 286.0, 312.0, 315.0, 284.0, 286.0, 200.0, 208.0, 291.0, 288.0, 206.0, 205.0, 297.0, 285.0, 268.0, 262.0, 316.0, 323.0, 289.0, 298.0, 291.0, 291.0, 283.0, 290.0, 292.0, 284.0, 294.0, 285.0, 293.0, 286.0, 268.0, 257.0, 295.0, 287.0, 263.0, 262.0, 287.0, 283.0, 288.0, 285.0, 291.0, 285.0, 289.0, 287.0, 289.0, 293.0, 286.0, 290.0, 319.0, 308.0, 258.0, 267.0, 286.0, 301.0, 292.0, 287.0, 253.0, 260.0, 286.0, 290.0, 312.0, 324.0, 292.0, 287.0, 269.0, 250.0, 291.0, 288.0, 267.0, 260.0, 291.0, 285.0, 294.0, 288.0, 261.0, 264.0, 318.0, 315.0, 265.0, 265.0, 294.0, 288.0, 296.0, 283.0, 251.0, 268.0, 316.0, 314.0, 317.0, 313.0, 259.0, 263.0, 296.0, 286.0, 289.0, 290.0, 296.0, 291.0, 306.0, 321.0, 280.0, 293.0, 291.0, 288.0, 286.0, 290.0, 286.0, 296.0, 294.0, 282.0, 297.0, 285.0, 294.0, 293.0, 283.0, 296.0, 288.0, 285.0, 294.0, 285.0, 317.0, 319.0, 292.0, 290.0, 262.0, 268.0, 257.0, 262.0, 286.0, 290.0, 265.0, 260.0, 290.0, 292.0, 283.0, 287.0, 283.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6934521429818895, "mean_inference_ms": 1.2361475067867969, "mean_action_processing_ms": 0.13314094767674708, "mean_env_wait_ms": 0.8348020091355736, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 568.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.08}, "hist_stats": {"episode_reward": [582.0, 522.0, 636.0, 579.0, 582.0, 579.0, 582.0, 516.0, 473.0, 582.0, 576.0, 576.0, 587.0, 582.0, 465.0, 573.0, 576.0, 579.0, 630.0, 579.0, 582.0, 576.0, 573.0, 576.0, 519.0, 570.0, 516.0, 576.0, 570.0, 525.0, 582.0, 579.0, 627.0, 570.0, 408.0, 579.0, 411.0, 582.0, 530.0, 639.0, 587.0, 582.0, 573.0, 576.0, 579.0, 579.0, 525.0, 582.0, 525.0, 570.0, 573.0, 576.0, 576.0, 582.0, 576.0, 627.0, 525.0, 587.0, 579.0, 513.0, 576.0, 636.0, 579.0, 519.0, 579.0, 527.0, 576.0, 582.0, 525.0, 633.0, 530.0, 582.0, 579.0, 519.0, 630.0, 630.0, 522.0, 582.0, 579.0, 587.0, 627.0, 573.0, 579.0, 576.0, 582.0, 576.0, 582.0, 587.0, 579.0, 573.0, 579.0, 636.0, 582.0, 530.0, 519.0, 576.0, 525.0, 582.0, 570.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 261.0, 261.0, 320.0, 316.0, 290.0, 289.0, 288.0, 294.0, 286.0, 293.0, 291.0, 291.0, 267.0, 249.0, 238.0, 235.0, 291.0, 291.0, 284.0, 292.0, 289.0, 287.0, 294.0, 293.0, 291.0, 291.0, 223.0, 242.0, 292.0, 281.0, 286.0, 290.0, 293.0, 286.0, 317.0, 313.0, 286.0, 293.0, 289.0, 293.0, 292.0, 284.0, 283.0, 290.0, 286.0, 290.0, 271.0, 248.0, 289.0, 281.0, 254.0, 262.0, 286.0, 290.0, 280.0, 290.0, 258.0, 267.0, 290.0, 292.0, 293.0, 286.0, 312.0, 315.0, 284.0, 286.0, 200.0, 208.0, 291.0, 288.0, 206.0, 205.0, 297.0, 285.0, 268.0, 262.0, 316.0, 323.0, 289.0, 298.0, 291.0, 291.0, 283.0, 290.0, 292.0, 284.0, 294.0, 285.0, 293.0, 286.0, 268.0, 257.0, 295.0, 287.0, 263.0, 262.0, 287.0, 283.0, 288.0, 285.0, 291.0, 285.0, 289.0, 287.0, 289.0, 293.0, 286.0, 290.0, 319.0, 308.0, 258.0, 267.0, 286.0, 301.0, 292.0, 287.0, 253.0, 260.0, 286.0, 290.0, 312.0, 324.0, 292.0, 287.0, 269.0, 250.0, 291.0, 288.0, 267.0, 260.0, 291.0, 285.0, 294.0, 288.0, 261.0, 264.0, 318.0, 315.0, 265.0, 265.0, 294.0, 288.0, 296.0, 283.0, 251.0, 268.0, 316.0, 314.0, 317.0, 313.0, 259.0, 263.0, 296.0, 286.0, 289.0, 290.0, 296.0, 291.0, 306.0, 321.0, 280.0, 293.0, 291.0, 288.0, 286.0, 290.0, 286.0, 296.0, 294.0, 282.0, 297.0, 285.0, 294.0, 293.0, 283.0, 296.0, 288.0, 285.0, 294.0, 285.0, 317.0, 319.0, 292.0, 290.0, 262.0, 268.0, 257.0, 262.0, 286.0, 290.0, 265.0, 260.0, 290.0, 292.0, 283.0, 287.0, 283.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6934521429818895, "mean_inference_ms": 1.2361475067867969, "mean_action_processing_ms": 0.13314094767674708, "mean_env_wait_ms": 0.8348020091355736, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12134400, "num_agent_steps_trained": 12134400, "num_env_steps_sampled": 6067200, "num_env_steps_trained": 6067200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6067200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12134400, "timers": {"training_iteration_time_ms": 3557.83, "learn_time_ms": 1069.891, "learn_throughput": 11963.839, "synch_weights_time_ms": 11.043}, "counters": {"num_env_steps_sampled": 6067200, "num_env_steps_trained": 6067200, "num_agent_steps_sampled": 12134400, "num_agent_steps_trained": 12134400}, "done": false, "episodes_total": 15168, "training_iteration": 474, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-12", "timestamp": 1666582272, "time_this_iter_s": 3.609189748764038, "time_total_s": 1800.2743730545044, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1800.2743730545044, "timesteps_since_restore": 0, "iterations_since_restore": 474, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.380000000000003, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.5, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.94, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.7, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.74, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.51, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.58, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.35, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.99, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.85, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.37, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.82, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.58, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.35, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.58, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.35, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003242699895054102, "policy_loss": -0.0007143347756937146, "vf_loss": 7.679488182067871, "vf_explained_var": 0.58504319190979, "kl": 0.002902800217270851, "entropy": 0.7557680010795593, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6080000, "num_env_steps_trained": 6080000, "num_agent_steps_sampled": 12160000, "num_agent_steps_trained": 12160000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 572.1, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 286.05}, "custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.5, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.94, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.7, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.74, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.51, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.58, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.35, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.99, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.85, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.37, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.82, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.58, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.35, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.58, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.35, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 570.0, 408.0, 579.0, 411.0, 582.0, 530.0, 639.0, 587.0, 582.0, 573.0, 576.0, 579.0, 579.0, 525.0, 582.0, 525.0, 570.0, 573.0, 576.0, 576.0, 582.0, 576.0, 627.0, 525.0, 587.0, 579.0, 513.0, 576.0, 636.0, 579.0, 519.0, 579.0, 527.0, 576.0, 582.0, 525.0, 633.0, 530.0, 582.0, 579.0, 519.0, 630.0, 630.0, 522.0, 582.0, 579.0, 587.0, 627.0, 573.0, 579.0, 576.0, 582.0, 576.0, 582.0, 587.0, 579.0, 573.0, 579.0, 636.0, 582.0, 530.0, 519.0, 576.0, 525.0, 582.0, 570.0, 573.0, 579.0, 627.0, 522.0, 579.0, 530.0, 510.0, 576.0, 587.0, 579.0, 573.0, 627.0, 582.0, 576.0, 579.0, 582.0, 587.0, 639.0, 633.0, 473.0, 627.0, 630.0, 582.0, 582.0, 582.0, 582.0, 579.0, 525.0, 573.0, 522.0, 516.0, 627.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [312.0, 315.0, 284.0, 286.0, 200.0, 208.0, 291.0, 288.0, 206.0, 205.0, 297.0, 285.0, 268.0, 262.0, 316.0, 323.0, 289.0, 298.0, 291.0, 291.0, 283.0, 290.0, 292.0, 284.0, 294.0, 285.0, 293.0, 286.0, 268.0, 257.0, 295.0, 287.0, 263.0, 262.0, 287.0, 283.0, 288.0, 285.0, 291.0, 285.0, 289.0, 287.0, 289.0, 293.0, 286.0, 290.0, 319.0, 308.0, 258.0, 267.0, 286.0, 301.0, 292.0, 287.0, 253.0, 260.0, 286.0, 290.0, 312.0, 324.0, 292.0, 287.0, 269.0, 250.0, 291.0, 288.0, 267.0, 260.0, 291.0, 285.0, 294.0, 288.0, 261.0, 264.0, 318.0, 315.0, 265.0, 265.0, 294.0, 288.0, 296.0, 283.0, 251.0, 268.0, 316.0, 314.0, 317.0, 313.0, 259.0, 263.0, 296.0, 286.0, 289.0, 290.0, 296.0, 291.0, 306.0, 321.0, 280.0, 293.0, 291.0, 288.0, 286.0, 290.0, 286.0, 296.0, 294.0, 282.0, 297.0, 285.0, 294.0, 293.0, 283.0, 296.0, 288.0, 285.0, 294.0, 285.0, 317.0, 319.0, 292.0, 290.0, 262.0, 268.0, 257.0, 262.0, 286.0, 290.0, 265.0, 260.0, 290.0, 292.0, 283.0, 287.0, 283.0, 290.0, 293.0, 286.0, 311.0, 316.0, 255.0, 267.0, 286.0, 293.0, 267.0, 263.0, 258.0, 252.0, 285.0, 291.0, 285.0, 302.0, 286.0, 293.0, 278.0, 295.0, 313.0, 314.0, 293.0, 289.0, 287.0, 289.0, 292.0, 287.0, 293.0, 289.0, 301.0, 286.0, 320.0, 319.0, 319.0, 314.0, 242.0, 231.0, 313.0, 314.0, 319.0, 311.0, 291.0, 291.0, 291.0, 291.0, 290.0, 292.0, 293.0, 289.0, 292.0, 287.0, 260.0, 265.0, 288.0, 285.0, 247.0, 275.0, 265.0, 251.0, 322.0, 305.0, 310.0, 317.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6933924476202722, "mean_inference_ms": 1.2360384651209435, "mean_action_processing_ms": 0.13313298981871072, "mean_env_wait_ms": 0.8347216666702647, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 572.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 286.05}, "hist_stats": {"episode_reward": [627.0, 570.0, 408.0, 579.0, 411.0, 582.0, 530.0, 639.0, 587.0, 582.0, 573.0, 576.0, 579.0, 579.0, 525.0, 582.0, 525.0, 570.0, 573.0, 576.0, 576.0, 582.0, 576.0, 627.0, 525.0, 587.0, 579.0, 513.0, 576.0, 636.0, 579.0, 519.0, 579.0, 527.0, 576.0, 582.0, 525.0, 633.0, 530.0, 582.0, 579.0, 519.0, 630.0, 630.0, 522.0, 582.0, 579.0, 587.0, 627.0, 573.0, 579.0, 576.0, 582.0, 576.0, 582.0, 587.0, 579.0, 573.0, 579.0, 636.0, 582.0, 530.0, 519.0, 576.0, 525.0, 582.0, 570.0, 573.0, 579.0, 627.0, 522.0, 579.0, 530.0, 510.0, 576.0, 587.0, 579.0, 573.0, 627.0, 582.0, 576.0, 579.0, 582.0, 587.0, 639.0, 633.0, 473.0, 627.0, 630.0, 582.0, 582.0, 582.0, 582.0, 579.0, 525.0, 573.0, 522.0, 516.0, 627.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [312.0, 315.0, 284.0, 286.0, 200.0, 208.0, 291.0, 288.0, 206.0, 205.0, 297.0, 285.0, 268.0, 262.0, 316.0, 323.0, 289.0, 298.0, 291.0, 291.0, 283.0, 290.0, 292.0, 284.0, 294.0, 285.0, 293.0, 286.0, 268.0, 257.0, 295.0, 287.0, 263.0, 262.0, 287.0, 283.0, 288.0, 285.0, 291.0, 285.0, 289.0, 287.0, 289.0, 293.0, 286.0, 290.0, 319.0, 308.0, 258.0, 267.0, 286.0, 301.0, 292.0, 287.0, 253.0, 260.0, 286.0, 290.0, 312.0, 324.0, 292.0, 287.0, 269.0, 250.0, 291.0, 288.0, 267.0, 260.0, 291.0, 285.0, 294.0, 288.0, 261.0, 264.0, 318.0, 315.0, 265.0, 265.0, 294.0, 288.0, 296.0, 283.0, 251.0, 268.0, 316.0, 314.0, 317.0, 313.0, 259.0, 263.0, 296.0, 286.0, 289.0, 290.0, 296.0, 291.0, 306.0, 321.0, 280.0, 293.0, 291.0, 288.0, 286.0, 290.0, 286.0, 296.0, 294.0, 282.0, 297.0, 285.0, 294.0, 293.0, 283.0, 296.0, 288.0, 285.0, 294.0, 285.0, 317.0, 319.0, 292.0, 290.0, 262.0, 268.0, 257.0, 262.0, 286.0, 290.0, 265.0, 260.0, 290.0, 292.0, 283.0, 287.0, 283.0, 290.0, 293.0, 286.0, 311.0, 316.0, 255.0, 267.0, 286.0, 293.0, 267.0, 263.0, 258.0, 252.0, 285.0, 291.0, 285.0, 302.0, 286.0, 293.0, 278.0, 295.0, 313.0, 314.0, 293.0, 289.0, 287.0, 289.0, 292.0, 287.0, 293.0, 289.0, 301.0, 286.0, 320.0, 319.0, 319.0, 314.0, 242.0, 231.0, 313.0, 314.0, 319.0, 311.0, 291.0, 291.0, 291.0, 291.0, 290.0, 292.0, 293.0, 289.0, 292.0, 287.0, 260.0, 265.0, 288.0, 285.0, 247.0, 275.0, 265.0, 251.0, 322.0, 305.0, 310.0, 317.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6933924476202722, "mean_inference_ms": 1.2360384651209435, "mean_action_processing_ms": 0.13313298981871072, "mean_env_wait_ms": 0.8347216666702647, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12160000, "num_agent_steps_trained": 12160000, "num_env_steps_sampled": 6080000, "num_env_steps_trained": 6080000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6080000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12160000, "timers": {"training_iteration_time_ms": 3547.816, "learn_time_ms": 1072.66, "learn_throughput": 11932.954, "synch_weights_time_ms": 11.065}, "counters": {"num_env_steps_sampled": 6080000, "num_env_steps_trained": 6080000, "num_agent_steps_sampled": 12160000, "num_agent_steps_trained": 12160000}, "done": false, "episodes_total": 15200, "training_iteration": 475, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-16", "timestamp": 1666582276, "time_this_iter_s": 3.5919501781463623, "time_total_s": 1803.8663232326508, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1803.8663232326508, "timesteps_since_restore": 0, "iterations_since_restore": 475, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.660000000000004, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 198.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.15, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.91, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.82, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.74, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.68, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.53, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.54, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.1, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.43, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.29, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.87, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.84, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.53, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.54, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.53, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.54, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005083472467958927, "policy_loss": 0.004696611315011978, "vf_loss": 7.64980411529541, "vf_explained_var": 0.5964254140853882, "kl": 0.0038652834482491016, "entropy": 0.7562379837036133, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6092800, "num_env_steps_trained": 6092800, "num_agent_steps_sampled": 12185600, "num_agent_steps_trained": 12185600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 574.75, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 287.375}, "custom_metrics": {"sparse_reward_mean": 198.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.15, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.91, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.82, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.74, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.68, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.53, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.54, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.1, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.43, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.29, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.87, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.84, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.53, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.54, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.53, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.54, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 527.0, 576.0, 582.0, 525.0, 633.0, 530.0, 582.0, 579.0, 519.0, 630.0, 630.0, 522.0, 582.0, 579.0, 587.0, 627.0, 573.0, 579.0, 576.0, 582.0, 576.0, 582.0, 587.0, 579.0, 573.0, 579.0, 636.0, 582.0, 530.0, 519.0, 576.0, 525.0, 582.0, 570.0, 573.0, 579.0, 627.0, 522.0, 579.0, 530.0, 510.0, 576.0, 587.0, 579.0, 573.0, 627.0, 582.0, 576.0, 579.0, 582.0, 587.0, 639.0, 633.0, 473.0, 627.0, 630.0, 582.0, 582.0, 582.0, 582.0, 579.0, 525.0, 573.0, 522.0, 516.0, 627.0, 627.0, 525.0, 567.0, 579.0, 627.0, 576.0, 522.0, 630.0, 587.0, 633.0, 462.0, 582.0, 570.0, 576.0, 522.0, 582.0, 570.0, 522.0, 582.0, 582.0, 633.0, 630.0, 579.0, 527.0, 579.0, 582.0, 579.0, 579.0, 567.0, 579.0, 582.0, 519.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 267.0, 260.0, 291.0, 285.0, 294.0, 288.0, 261.0, 264.0, 318.0, 315.0, 265.0, 265.0, 294.0, 288.0, 296.0, 283.0, 251.0, 268.0, 316.0, 314.0, 317.0, 313.0, 259.0, 263.0, 296.0, 286.0, 289.0, 290.0, 296.0, 291.0, 306.0, 321.0, 280.0, 293.0, 291.0, 288.0, 286.0, 290.0, 286.0, 296.0, 294.0, 282.0, 297.0, 285.0, 294.0, 293.0, 283.0, 296.0, 288.0, 285.0, 294.0, 285.0, 317.0, 319.0, 292.0, 290.0, 262.0, 268.0, 257.0, 262.0, 286.0, 290.0, 265.0, 260.0, 290.0, 292.0, 283.0, 287.0, 283.0, 290.0, 293.0, 286.0, 311.0, 316.0, 255.0, 267.0, 286.0, 293.0, 267.0, 263.0, 258.0, 252.0, 285.0, 291.0, 285.0, 302.0, 286.0, 293.0, 278.0, 295.0, 313.0, 314.0, 293.0, 289.0, 287.0, 289.0, 292.0, 287.0, 293.0, 289.0, 301.0, 286.0, 320.0, 319.0, 319.0, 314.0, 242.0, 231.0, 313.0, 314.0, 319.0, 311.0, 291.0, 291.0, 291.0, 291.0, 290.0, 292.0, 293.0, 289.0, 292.0, 287.0, 260.0, 265.0, 288.0, 285.0, 247.0, 275.0, 265.0, 251.0, 322.0, 305.0, 310.0, 317.0, 265.0, 260.0, 287.0, 280.0, 291.0, 288.0, 314.0, 313.0, 288.0, 288.0, 273.0, 249.0, 315.0, 315.0, 286.0, 301.0, 321.0, 312.0, 236.0, 226.0, 291.0, 291.0, 276.0, 294.0, 289.0, 287.0, 263.0, 259.0, 292.0, 290.0, 284.0, 286.0, 267.0, 255.0, 291.0, 291.0, 289.0, 293.0, 317.0, 316.0, 317.0, 313.0, 288.0, 291.0, 265.0, 262.0, 292.0, 287.0, 296.0, 286.0, 294.0, 285.0, 288.0, 291.0, 281.0, 286.0, 287.0, 292.0, 292.0, 290.0, 252.0, 267.0, 289.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6933303687204011, "mean_inference_ms": 1.2359272963791816, "mean_action_processing_ms": 0.13312594714549067, "mean_env_wait_ms": 0.8346400835968194, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 574.75, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 287.375}, "hist_stats": {"episode_reward": [579.0, 527.0, 576.0, 582.0, 525.0, 633.0, 530.0, 582.0, 579.0, 519.0, 630.0, 630.0, 522.0, 582.0, 579.0, 587.0, 627.0, 573.0, 579.0, 576.0, 582.0, 576.0, 582.0, 587.0, 579.0, 573.0, 579.0, 636.0, 582.0, 530.0, 519.0, 576.0, 525.0, 582.0, 570.0, 573.0, 579.0, 627.0, 522.0, 579.0, 530.0, 510.0, 576.0, 587.0, 579.0, 573.0, 627.0, 582.0, 576.0, 579.0, 582.0, 587.0, 639.0, 633.0, 473.0, 627.0, 630.0, 582.0, 582.0, 582.0, 582.0, 579.0, 525.0, 573.0, 522.0, 516.0, 627.0, 627.0, 525.0, 567.0, 579.0, 627.0, 576.0, 522.0, 630.0, 587.0, 633.0, 462.0, 582.0, 570.0, 576.0, 522.0, 582.0, 570.0, 522.0, 582.0, 582.0, 633.0, 630.0, 579.0, 527.0, 579.0, 582.0, 579.0, 579.0, 567.0, 579.0, 582.0, 519.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 267.0, 260.0, 291.0, 285.0, 294.0, 288.0, 261.0, 264.0, 318.0, 315.0, 265.0, 265.0, 294.0, 288.0, 296.0, 283.0, 251.0, 268.0, 316.0, 314.0, 317.0, 313.0, 259.0, 263.0, 296.0, 286.0, 289.0, 290.0, 296.0, 291.0, 306.0, 321.0, 280.0, 293.0, 291.0, 288.0, 286.0, 290.0, 286.0, 296.0, 294.0, 282.0, 297.0, 285.0, 294.0, 293.0, 283.0, 296.0, 288.0, 285.0, 294.0, 285.0, 317.0, 319.0, 292.0, 290.0, 262.0, 268.0, 257.0, 262.0, 286.0, 290.0, 265.0, 260.0, 290.0, 292.0, 283.0, 287.0, 283.0, 290.0, 293.0, 286.0, 311.0, 316.0, 255.0, 267.0, 286.0, 293.0, 267.0, 263.0, 258.0, 252.0, 285.0, 291.0, 285.0, 302.0, 286.0, 293.0, 278.0, 295.0, 313.0, 314.0, 293.0, 289.0, 287.0, 289.0, 292.0, 287.0, 293.0, 289.0, 301.0, 286.0, 320.0, 319.0, 319.0, 314.0, 242.0, 231.0, 313.0, 314.0, 319.0, 311.0, 291.0, 291.0, 291.0, 291.0, 290.0, 292.0, 293.0, 289.0, 292.0, 287.0, 260.0, 265.0, 288.0, 285.0, 247.0, 275.0, 265.0, 251.0, 322.0, 305.0, 310.0, 317.0, 265.0, 260.0, 287.0, 280.0, 291.0, 288.0, 314.0, 313.0, 288.0, 288.0, 273.0, 249.0, 315.0, 315.0, 286.0, 301.0, 321.0, 312.0, 236.0, 226.0, 291.0, 291.0, 276.0, 294.0, 289.0, 287.0, 263.0, 259.0, 292.0, 290.0, 284.0, 286.0, 267.0, 255.0, 291.0, 291.0, 289.0, 293.0, 317.0, 316.0, 317.0, 313.0, 288.0, 291.0, 265.0, 262.0, 292.0, 287.0, 296.0, 286.0, 294.0, 285.0, 288.0, 291.0, 281.0, 286.0, 287.0, 292.0, 292.0, 290.0, 252.0, 267.0, 289.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6933303687204011, "mean_inference_ms": 1.2359272963791816, "mean_action_processing_ms": 0.13312594714549067, "mean_env_wait_ms": 0.8346400835968194, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12185600, "num_agent_steps_trained": 12185600, "num_env_steps_sampled": 6092800, "num_env_steps_trained": 6092800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6092800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12185600, "timers": {"training_iteration_time_ms": 3539.364, "learn_time_ms": 1070.222, "learn_throughput": 11960.137, "synch_weights_time_ms": 11.015}, "counters": {"num_env_steps_sampled": 6092800, "num_env_steps_trained": 6092800, "num_agent_steps_sampled": 12185600, "num_agent_steps_trained": 12185600}, "done": false, "episodes_total": 15232, "training_iteration": 476, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-20", "timestamp": 1666582280, "time_this_iter_s": 3.5145702362060547, "time_total_s": 1807.3808934688568, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1807.3808934688568, "timesteps_since_restore": 0, "iterations_since_restore": 476, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.566666666666666, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.59, "shaped_reward_min": 141, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.7, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.01, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.58, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.86, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.69, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.19, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.88, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.2, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.87, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.84, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.69, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.69, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004522355739027262, "policy_loss": 6.512660183943808e-05, "vf_loss": 7.6786627769470215, "vf_explained_var": 0.596559464931488, "kl": 0.002718728268519044, "entropy": 0.7615171670913696, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6105600, "num_env_steps_trained": 6105600, "num_agent_steps_sampled": 12211200, "num_agent_steps_trained": 12211200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 573.79, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 286.895}, "custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.59, "shaped_reward_min": 141, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.7, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.01, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.58, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.86, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.69, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.19, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.88, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.2, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.87, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.84, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.69, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.69, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 582.0, 570.0, 573.0, 579.0, 627.0, 522.0, 579.0, 530.0, 510.0, 576.0, 587.0, 579.0, 573.0, 627.0, 582.0, 576.0, 579.0, 582.0, 587.0, 639.0, 633.0, 473.0, 627.0, 630.0, 582.0, 582.0, 582.0, 582.0, 579.0, 525.0, 573.0, 522.0, 516.0, 627.0, 627.0, 525.0, 567.0, 579.0, 627.0, 576.0, 522.0, 630.0, 587.0, 633.0, 462.0, 582.0, 570.0, 576.0, 522.0, 582.0, 570.0, 522.0, 582.0, 582.0, 633.0, 630.0, 579.0, 527.0, 579.0, 582.0, 579.0, 579.0, 567.0, 579.0, 582.0, 519.0, 582.0, 533.0, 522.0, 573.0, 582.0, 579.0, 576.0, 630.0, 501.0, 587.0, 633.0, 630.0, 579.0, 579.0, 579.0, 530.0, 582.0, 573.0, 570.0, 530.0, 582.0, 519.0, 587.0, 525.0, 633.0, 630.0, 630.0, 525.0, 519.0, 582.0, 582.0, 582.0, 558.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 260.0, 290.0, 292.0, 283.0, 287.0, 283.0, 290.0, 293.0, 286.0, 311.0, 316.0, 255.0, 267.0, 286.0, 293.0, 267.0, 263.0, 258.0, 252.0, 285.0, 291.0, 285.0, 302.0, 286.0, 293.0, 278.0, 295.0, 313.0, 314.0, 293.0, 289.0, 287.0, 289.0, 292.0, 287.0, 293.0, 289.0, 301.0, 286.0, 320.0, 319.0, 319.0, 314.0, 242.0, 231.0, 313.0, 314.0, 319.0, 311.0, 291.0, 291.0, 291.0, 291.0, 290.0, 292.0, 293.0, 289.0, 292.0, 287.0, 260.0, 265.0, 288.0, 285.0, 247.0, 275.0, 265.0, 251.0, 322.0, 305.0, 310.0, 317.0, 265.0, 260.0, 287.0, 280.0, 291.0, 288.0, 314.0, 313.0, 288.0, 288.0, 273.0, 249.0, 315.0, 315.0, 286.0, 301.0, 321.0, 312.0, 236.0, 226.0, 291.0, 291.0, 276.0, 294.0, 289.0, 287.0, 263.0, 259.0, 292.0, 290.0, 284.0, 286.0, 267.0, 255.0, 291.0, 291.0, 289.0, 293.0, 317.0, 316.0, 317.0, 313.0, 288.0, 291.0, 265.0, 262.0, 292.0, 287.0, 296.0, 286.0, 294.0, 285.0, 288.0, 291.0, 281.0, 286.0, 287.0, 292.0, 292.0, 290.0, 252.0, 267.0, 289.0, 293.0, 263.0, 270.0, 258.0, 264.0, 287.0, 286.0, 292.0, 290.0, 285.0, 294.0, 286.0, 290.0, 314.0, 316.0, 253.0, 248.0, 290.0, 297.0, 316.0, 317.0, 316.0, 314.0, 289.0, 290.0, 297.0, 282.0, 287.0, 292.0, 263.0, 267.0, 289.0, 293.0, 286.0, 287.0, 284.0, 286.0, 266.0, 264.0, 299.0, 283.0, 249.0, 270.0, 288.0, 299.0, 263.0, 262.0, 320.0, 313.0, 316.0, 314.0, 311.0, 319.0, 263.0, 262.0, 248.0, 271.0, 289.0, 293.0, 288.0, 294.0, 289.0, 293.0, 281.0, 277.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6932739430598015, "mean_inference_ms": 1.2358048086334636, "mean_action_processing_ms": 0.13311876846202841, "mean_env_wait_ms": 0.8345568859029008, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 573.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 286.895}, "hist_stats": {"episode_reward": [525.0, 582.0, 570.0, 573.0, 579.0, 627.0, 522.0, 579.0, 530.0, 510.0, 576.0, 587.0, 579.0, 573.0, 627.0, 582.0, 576.0, 579.0, 582.0, 587.0, 639.0, 633.0, 473.0, 627.0, 630.0, 582.0, 582.0, 582.0, 582.0, 579.0, 525.0, 573.0, 522.0, 516.0, 627.0, 627.0, 525.0, 567.0, 579.0, 627.0, 576.0, 522.0, 630.0, 587.0, 633.0, 462.0, 582.0, 570.0, 576.0, 522.0, 582.0, 570.0, 522.0, 582.0, 582.0, 633.0, 630.0, 579.0, 527.0, 579.0, 582.0, 579.0, 579.0, 567.0, 579.0, 582.0, 519.0, 582.0, 533.0, 522.0, 573.0, 582.0, 579.0, 576.0, 630.0, 501.0, 587.0, 633.0, 630.0, 579.0, 579.0, 579.0, 530.0, 582.0, 573.0, 570.0, 530.0, 582.0, 519.0, 587.0, 525.0, 633.0, 630.0, 630.0, 525.0, 519.0, 582.0, 582.0, 582.0, 558.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 260.0, 290.0, 292.0, 283.0, 287.0, 283.0, 290.0, 293.0, 286.0, 311.0, 316.0, 255.0, 267.0, 286.0, 293.0, 267.0, 263.0, 258.0, 252.0, 285.0, 291.0, 285.0, 302.0, 286.0, 293.0, 278.0, 295.0, 313.0, 314.0, 293.0, 289.0, 287.0, 289.0, 292.0, 287.0, 293.0, 289.0, 301.0, 286.0, 320.0, 319.0, 319.0, 314.0, 242.0, 231.0, 313.0, 314.0, 319.0, 311.0, 291.0, 291.0, 291.0, 291.0, 290.0, 292.0, 293.0, 289.0, 292.0, 287.0, 260.0, 265.0, 288.0, 285.0, 247.0, 275.0, 265.0, 251.0, 322.0, 305.0, 310.0, 317.0, 265.0, 260.0, 287.0, 280.0, 291.0, 288.0, 314.0, 313.0, 288.0, 288.0, 273.0, 249.0, 315.0, 315.0, 286.0, 301.0, 321.0, 312.0, 236.0, 226.0, 291.0, 291.0, 276.0, 294.0, 289.0, 287.0, 263.0, 259.0, 292.0, 290.0, 284.0, 286.0, 267.0, 255.0, 291.0, 291.0, 289.0, 293.0, 317.0, 316.0, 317.0, 313.0, 288.0, 291.0, 265.0, 262.0, 292.0, 287.0, 296.0, 286.0, 294.0, 285.0, 288.0, 291.0, 281.0, 286.0, 287.0, 292.0, 292.0, 290.0, 252.0, 267.0, 289.0, 293.0, 263.0, 270.0, 258.0, 264.0, 287.0, 286.0, 292.0, 290.0, 285.0, 294.0, 286.0, 290.0, 314.0, 316.0, 253.0, 248.0, 290.0, 297.0, 316.0, 317.0, 316.0, 314.0, 289.0, 290.0, 297.0, 282.0, 287.0, 292.0, 263.0, 267.0, 289.0, 293.0, 286.0, 287.0, 284.0, 286.0, 266.0, 264.0, 299.0, 283.0, 249.0, 270.0, 288.0, 299.0, 263.0, 262.0, 320.0, 313.0, 316.0, 314.0, 311.0, 319.0, 263.0, 262.0, 248.0, 271.0, 289.0, 293.0, 288.0, 294.0, 289.0, 293.0, 281.0, 277.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6932739430598015, "mean_inference_ms": 1.2358048086334636, "mean_action_processing_ms": 0.13311876846202841, "mean_env_wait_ms": 0.8345568859029008, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12211200, "num_agent_steps_trained": 12211200, "num_env_steps_sampled": 6105600, "num_env_steps_trained": 6105600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6105600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12211200, "timers": {"training_iteration_time_ms": 3534.379, "learn_time_ms": 1066.085, "learn_throughput": 12006.542, "synch_weights_time_ms": 11.596}, "counters": {"num_env_steps_sampled": 6105600, "num_env_steps_trained": 6105600, "num_agent_steps_sampled": 12211200, "num_agent_steps_trained": 12211200}, "done": false, "episodes_total": 15264, "training_iteration": 477, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-23", "timestamp": 1666582283, "time_this_iter_s": 3.535604476928711, "time_total_s": 1810.9164979457855, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1810.9164979457855, "timesteps_since_restore": 0, "iterations_since_restore": 477, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.84, "ram_util_percent": 10.620000000000001}}
+{"custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.9, "shaped_reward_min": 141, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.65, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.9, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.78, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.3, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.64, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.28, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.95, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.96, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.94, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.3, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.64, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.3, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.64, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002086335327476263, "policy_loss": 0.0016811969690024853, "vf_loss": 7.846938133239746, "vf_explained_var": 0.5670309066772461, "kl": 0.003477412974461913, "entropy": 0.7591124176979065, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6118400, "num_env_steps_trained": 6118400, "num_agent_steps_sampled": 12236800, "num_agent_steps_trained": 12236800}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 462.0, "episode_reward_mean": 571.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.95}, "custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.9, "shaped_reward_min": 141, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.65, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.9, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.78, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.3, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.64, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.28, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.95, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.96, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.94, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.3, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.64, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.3, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.64, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 516.0, 627.0, 627.0, 525.0, 567.0, 579.0, 627.0, 576.0, 522.0, 630.0, 587.0, 633.0, 462.0, 582.0, 570.0, 576.0, 522.0, 582.0, 570.0, 522.0, 582.0, 582.0, 633.0, 630.0, 579.0, 527.0, 579.0, 582.0, 579.0, 579.0, 567.0, 579.0, 582.0, 519.0, 582.0, 533.0, 522.0, 573.0, 582.0, 579.0, 576.0, 630.0, 501.0, 587.0, 633.0, 630.0, 579.0, 579.0, 579.0, 530.0, 582.0, 573.0, 570.0, 530.0, 582.0, 519.0, 587.0, 525.0, 633.0, 630.0, 630.0, 525.0, 519.0, 582.0, 582.0, 582.0, 558.0, 587.0, 576.0, 519.0, 527.0, 579.0, 573.0, 576.0, 627.0, 582.0, 573.0, 576.0, 579.0, 579.0, 530.0, 579.0, 576.0, 582.0, 573.0, 576.0, 525.0, 519.0, 582.0, 627.0, 516.0, 579.0, 579.0, 573.0, 530.0, 627.0, 573.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [247.0, 275.0, 265.0, 251.0, 322.0, 305.0, 310.0, 317.0, 265.0, 260.0, 287.0, 280.0, 291.0, 288.0, 314.0, 313.0, 288.0, 288.0, 273.0, 249.0, 315.0, 315.0, 286.0, 301.0, 321.0, 312.0, 236.0, 226.0, 291.0, 291.0, 276.0, 294.0, 289.0, 287.0, 263.0, 259.0, 292.0, 290.0, 284.0, 286.0, 267.0, 255.0, 291.0, 291.0, 289.0, 293.0, 317.0, 316.0, 317.0, 313.0, 288.0, 291.0, 265.0, 262.0, 292.0, 287.0, 296.0, 286.0, 294.0, 285.0, 288.0, 291.0, 281.0, 286.0, 287.0, 292.0, 292.0, 290.0, 252.0, 267.0, 289.0, 293.0, 263.0, 270.0, 258.0, 264.0, 287.0, 286.0, 292.0, 290.0, 285.0, 294.0, 286.0, 290.0, 314.0, 316.0, 253.0, 248.0, 290.0, 297.0, 316.0, 317.0, 316.0, 314.0, 289.0, 290.0, 297.0, 282.0, 287.0, 292.0, 263.0, 267.0, 289.0, 293.0, 286.0, 287.0, 284.0, 286.0, 266.0, 264.0, 299.0, 283.0, 249.0, 270.0, 288.0, 299.0, 263.0, 262.0, 320.0, 313.0, 316.0, 314.0, 311.0, 319.0, 263.0, 262.0, 248.0, 271.0, 289.0, 293.0, 288.0, 294.0, 289.0, 293.0, 281.0, 277.0, 299.0, 288.0, 299.0, 277.0, 257.0, 262.0, 260.0, 267.0, 291.0, 288.0, 296.0, 277.0, 292.0, 284.0, 316.0, 311.0, 293.0, 289.0, 280.0, 293.0, 296.0, 280.0, 296.0, 283.0, 291.0, 288.0, 269.0, 261.0, 299.0, 280.0, 292.0, 284.0, 297.0, 285.0, 288.0, 285.0, 293.0, 283.0, 270.0, 255.0, 265.0, 254.0, 299.0, 283.0, 303.0, 324.0, 260.0, 256.0, 288.0, 291.0, 285.0, 294.0, 293.0, 280.0, 260.0, 270.0, 313.0, 314.0, 288.0, 285.0, 289.0, 293.0, 293.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6932216431064484, "mean_inference_ms": 1.235692735455491, "mean_action_processing_ms": 0.13311392444286807, "mean_env_wait_ms": 0.8344878818156225, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 462.0, "episode_reward_mean": 571.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.95}, "hist_stats": {"episode_reward": [522.0, 516.0, 627.0, 627.0, 525.0, 567.0, 579.0, 627.0, 576.0, 522.0, 630.0, 587.0, 633.0, 462.0, 582.0, 570.0, 576.0, 522.0, 582.0, 570.0, 522.0, 582.0, 582.0, 633.0, 630.0, 579.0, 527.0, 579.0, 582.0, 579.0, 579.0, 567.0, 579.0, 582.0, 519.0, 582.0, 533.0, 522.0, 573.0, 582.0, 579.0, 576.0, 630.0, 501.0, 587.0, 633.0, 630.0, 579.0, 579.0, 579.0, 530.0, 582.0, 573.0, 570.0, 530.0, 582.0, 519.0, 587.0, 525.0, 633.0, 630.0, 630.0, 525.0, 519.0, 582.0, 582.0, 582.0, 558.0, 587.0, 576.0, 519.0, 527.0, 579.0, 573.0, 576.0, 627.0, 582.0, 573.0, 576.0, 579.0, 579.0, 530.0, 579.0, 576.0, 582.0, 573.0, 576.0, 525.0, 519.0, 582.0, 627.0, 516.0, 579.0, 579.0, 573.0, 530.0, 627.0, 573.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [247.0, 275.0, 265.0, 251.0, 322.0, 305.0, 310.0, 317.0, 265.0, 260.0, 287.0, 280.0, 291.0, 288.0, 314.0, 313.0, 288.0, 288.0, 273.0, 249.0, 315.0, 315.0, 286.0, 301.0, 321.0, 312.0, 236.0, 226.0, 291.0, 291.0, 276.0, 294.0, 289.0, 287.0, 263.0, 259.0, 292.0, 290.0, 284.0, 286.0, 267.0, 255.0, 291.0, 291.0, 289.0, 293.0, 317.0, 316.0, 317.0, 313.0, 288.0, 291.0, 265.0, 262.0, 292.0, 287.0, 296.0, 286.0, 294.0, 285.0, 288.0, 291.0, 281.0, 286.0, 287.0, 292.0, 292.0, 290.0, 252.0, 267.0, 289.0, 293.0, 263.0, 270.0, 258.0, 264.0, 287.0, 286.0, 292.0, 290.0, 285.0, 294.0, 286.0, 290.0, 314.0, 316.0, 253.0, 248.0, 290.0, 297.0, 316.0, 317.0, 316.0, 314.0, 289.0, 290.0, 297.0, 282.0, 287.0, 292.0, 263.0, 267.0, 289.0, 293.0, 286.0, 287.0, 284.0, 286.0, 266.0, 264.0, 299.0, 283.0, 249.0, 270.0, 288.0, 299.0, 263.0, 262.0, 320.0, 313.0, 316.0, 314.0, 311.0, 319.0, 263.0, 262.0, 248.0, 271.0, 289.0, 293.0, 288.0, 294.0, 289.0, 293.0, 281.0, 277.0, 299.0, 288.0, 299.0, 277.0, 257.0, 262.0, 260.0, 267.0, 291.0, 288.0, 296.0, 277.0, 292.0, 284.0, 316.0, 311.0, 293.0, 289.0, 280.0, 293.0, 296.0, 280.0, 296.0, 283.0, 291.0, 288.0, 269.0, 261.0, 299.0, 280.0, 292.0, 284.0, 297.0, 285.0, 288.0, 285.0, 293.0, 283.0, 270.0, 255.0, 265.0, 254.0, 299.0, 283.0, 303.0, 324.0, 260.0, 256.0, 288.0, 291.0, 285.0, 294.0, 293.0, 280.0, 260.0, 270.0, 313.0, 314.0, 288.0, 285.0, 289.0, 293.0, 293.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6932216431064484, "mean_inference_ms": 1.235692735455491, "mean_action_processing_ms": 0.13311392444286807, "mean_env_wait_ms": 0.8344878818156225, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12236800, "num_agent_steps_trained": 12236800, "num_env_steps_sampled": 6118400, "num_env_steps_trained": 6118400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6118400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12236800, "timers": {"training_iteration_time_ms": 3535.795, "learn_time_ms": 1064.167, "learn_throughput": 12028.182, "synch_weights_time_ms": 10.668}, "counters": {"num_env_steps_sampled": 6118400, "num_env_steps_trained": 6118400, "num_agent_steps_sampled": 12236800, "num_agent_steps_trained": 12236800}, "done": false, "episodes_total": 15296, "training_iteration": 478, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-27", "timestamp": 1666582287, "time_this_iter_s": 3.668811082839966, "time_total_s": 1814.5853090286255, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1814.5853090286255, "timesteps_since_restore": 0, "iterations_since_restore": 478, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.86, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 199.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 178.02, "shaped_reward_min": 141, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.85, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.03, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.73, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.88, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.55, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.75, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.21, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.01, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.23, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.92, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.89, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.08, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.55, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.75, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.55, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.75, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0014865855919197202, "policy_loss": 0.00110257463529706, "vf_loss": 7.5981879234313965, "vf_explained_var": 0.6002695560455322, "kl": 0.003477748716250062, "entropy": 0.751615047454834, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6131200, "num_env_steps_trained": 6131200, "num_agent_steps_sampled": 12262400, "num_agent_steps_trained": 12262400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 501.0, "episode_reward_mean": 576.42, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.21}, "custom_metrics": {"sparse_reward_mean": 199.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 178.02, "shaped_reward_min": 141, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.85, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.03, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.73, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.88, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.55, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.75, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.21, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.01, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.23, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.92, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.89, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.08, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.55, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.75, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.55, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.75, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 519.0, 582.0, 533.0, 522.0, 573.0, 582.0, 579.0, 576.0, 630.0, 501.0, 587.0, 633.0, 630.0, 579.0, 579.0, 579.0, 530.0, 582.0, 573.0, 570.0, 530.0, 582.0, 519.0, 587.0, 525.0, 633.0, 630.0, 630.0, 525.0, 519.0, 582.0, 582.0, 582.0, 558.0, 587.0, 576.0, 519.0, 527.0, 579.0, 573.0, 576.0, 627.0, 582.0, 573.0, 576.0, 579.0, 579.0, 530.0, 579.0, 576.0, 582.0, 573.0, 576.0, 525.0, 519.0, 582.0, 627.0, 516.0, 579.0, 579.0, 573.0, 530.0, 627.0, 573.0, 582.0, 582.0, 582.0, 579.0, 630.0, 636.0, 582.0, 582.0, 582.0, 582.0, 587.0, 582.0, 579.0, 582.0, 573.0, 582.0, 582.0, 582.0, 636.0, 533.0, 576.0, 582.0, 573.0, 627.0, 627.0, 582.0, 579.0, 639.0, 525.0, 582.0, 587.0, 582.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 292.0, 290.0, 252.0, 267.0, 289.0, 293.0, 263.0, 270.0, 258.0, 264.0, 287.0, 286.0, 292.0, 290.0, 285.0, 294.0, 286.0, 290.0, 314.0, 316.0, 253.0, 248.0, 290.0, 297.0, 316.0, 317.0, 316.0, 314.0, 289.0, 290.0, 297.0, 282.0, 287.0, 292.0, 263.0, 267.0, 289.0, 293.0, 286.0, 287.0, 284.0, 286.0, 266.0, 264.0, 299.0, 283.0, 249.0, 270.0, 288.0, 299.0, 263.0, 262.0, 320.0, 313.0, 316.0, 314.0, 311.0, 319.0, 263.0, 262.0, 248.0, 271.0, 289.0, 293.0, 288.0, 294.0, 289.0, 293.0, 281.0, 277.0, 299.0, 288.0, 299.0, 277.0, 257.0, 262.0, 260.0, 267.0, 291.0, 288.0, 296.0, 277.0, 292.0, 284.0, 316.0, 311.0, 293.0, 289.0, 280.0, 293.0, 296.0, 280.0, 296.0, 283.0, 291.0, 288.0, 269.0, 261.0, 299.0, 280.0, 292.0, 284.0, 297.0, 285.0, 288.0, 285.0, 293.0, 283.0, 270.0, 255.0, 265.0, 254.0, 299.0, 283.0, 303.0, 324.0, 260.0, 256.0, 288.0, 291.0, 285.0, 294.0, 293.0, 280.0, 260.0, 270.0, 313.0, 314.0, 288.0, 285.0, 289.0, 293.0, 293.0, 289.0, 292.0, 290.0, 291.0, 288.0, 316.0, 314.0, 319.0, 317.0, 287.0, 295.0, 289.0, 293.0, 289.0, 293.0, 297.0, 285.0, 293.0, 294.0, 289.0, 293.0, 288.0, 291.0, 291.0, 291.0, 281.0, 292.0, 289.0, 293.0, 289.0, 293.0, 297.0, 285.0, 317.0, 319.0, 271.0, 262.0, 292.0, 284.0, 291.0, 291.0, 285.0, 288.0, 311.0, 316.0, 308.0, 319.0, 292.0, 290.0, 294.0, 285.0, 324.0, 315.0, 264.0, 261.0, 286.0, 296.0, 288.0, 299.0, 291.0, 291.0, 293.0, 289.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6931697245030041, "mean_inference_ms": 1.235632701432748, "mean_action_processing_ms": 0.13310679759987598, "mean_env_wait_ms": 0.8344476003245105, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 501.0, "episode_reward_mean": 576.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.21}, "hist_stats": {"episode_reward": [579.0, 582.0, 519.0, 582.0, 533.0, 522.0, 573.0, 582.0, 579.0, 576.0, 630.0, 501.0, 587.0, 633.0, 630.0, 579.0, 579.0, 579.0, 530.0, 582.0, 573.0, 570.0, 530.0, 582.0, 519.0, 587.0, 525.0, 633.0, 630.0, 630.0, 525.0, 519.0, 582.0, 582.0, 582.0, 558.0, 587.0, 576.0, 519.0, 527.0, 579.0, 573.0, 576.0, 627.0, 582.0, 573.0, 576.0, 579.0, 579.0, 530.0, 579.0, 576.0, 582.0, 573.0, 576.0, 525.0, 519.0, 582.0, 627.0, 516.0, 579.0, 579.0, 573.0, 530.0, 627.0, 573.0, 582.0, 582.0, 582.0, 579.0, 630.0, 636.0, 582.0, 582.0, 582.0, 582.0, 587.0, 582.0, 579.0, 582.0, 573.0, 582.0, 582.0, 582.0, 636.0, 533.0, 576.0, 582.0, 573.0, 627.0, 627.0, 582.0, 579.0, 639.0, 525.0, 582.0, 587.0, 582.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 292.0, 290.0, 252.0, 267.0, 289.0, 293.0, 263.0, 270.0, 258.0, 264.0, 287.0, 286.0, 292.0, 290.0, 285.0, 294.0, 286.0, 290.0, 314.0, 316.0, 253.0, 248.0, 290.0, 297.0, 316.0, 317.0, 316.0, 314.0, 289.0, 290.0, 297.0, 282.0, 287.0, 292.0, 263.0, 267.0, 289.0, 293.0, 286.0, 287.0, 284.0, 286.0, 266.0, 264.0, 299.0, 283.0, 249.0, 270.0, 288.0, 299.0, 263.0, 262.0, 320.0, 313.0, 316.0, 314.0, 311.0, 319.0, 263.0, 262.0, 248.0, 271.0, 289.0, 293.0, 288.0, 294.0, 289.0, 293.0, 281.0, 277.0, 299.0, 288.0, 299.0, 277.0, 257.0, 262.0, 260.0, 267.0, 291.0, 288.0, 296.0, 277.0, 292.0, 284.0, 316.0, 311.0, 293.0, 289.0, 280.0, 293.0, 296.0, 280.0, 296.0, 283.0, 291.0, 288.0, 269.0, 261.0, 299.0, 280.0, 292.0, 284.0, 297.0, 285.0, 288.0, 285.0, 293.0, 283.0, 270.0, 255.0, 265.0, 254.0, 299.0, 283.0, 303.0, 324.0, 260.0, 256.0, 288.0, 291.0, 285.0, 294.0, 293.0, 280.0, 260.0, 270.0, 313.0, 314.0, 288.0, 285.0, 289.0, 293.0, 293.0, 289.0, 292.0, 290.0, 291.0, 288.0, 316.0, 314.0, 319.0, 317.0, 287.0, 295.0, 289.0, 293.0, 289.0, 293.0, 297.0, 285.0, 293.0, 294.0, 289.0, 293.0, 288.0, 291.0, 291.0, 291.0, 281.0, 292.0, 289.0, 293.0, 289.0, 293.0, 297.0, 285.0, 317.0, 319.0, 271.0, 262.0, 292.0, 284.0, 291.0, 291.0, 285.0, 288.0, 311.0, 316.0, 308.0, 319.0, 292.0, 290.0, 294.0, 285.0, 324.0, 315.0, 264.0, 261.0, 286.0, 296.0, 288.0, 299.0, 291.0, 291.0, 293.0, 289.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6931697245030041, "mean_inference_ms": 1.235632701432748, "mean_action_processing_ms": 0.13310679759987598, "mean_env_wait_ms": 0.8344476003245105, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12262400, "num_agent_steps_trained": 12262400, "num_env_steps_sampled": 6131200, "num_env_steps_trained": 6131200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6131200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12262400, "timers": {"training_iteration_time_ms": 3557.232, "learn_time_ms": 1062.71, "learn_throughput": 12044.681, "synch_weights_time_ms": 10.597}, "counters": {"num_env_steps_sampled": 6131200, "num_env_steps_trained": 6131200, "num_agent_steps_sampled": 12262400, "num_agent_steps_trained": 12262400}, "done": false, "episodes_total": 15328, "training_iteration": 479, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-31", "timestamp": 1666582291, "time_this_iter_s": 3.794663190841675, "time_total_s": 1818.3799722194672, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1818.3799722194672, "timesteps_since_restore": 0, "iterations_since_restore": 479, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.8, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.01, "shaped_reward_min": 131, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.51, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.87, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.37, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.68, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.61, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.2, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.13, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.94, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.61, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.61, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0032907375134527683, "policy_loss": 0.002880503423511982, "vf_loss": 7.935708045959473, "vf_explained_var": 0.5589988231658936, "kl": 0.0031868487130850554, "entropy": 0.766674816608429, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6144000, "num_env_steps_trained": 6144000, "num_agent_steps_sampled": 12288000, "num_agent_steps_trained": 12288000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 411.0, "episode_reward_mean": 568.81, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.405}, "custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.01, "shaped_reward_min": 131, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.51, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.87, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.37, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.68, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.61, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.2, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.13, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.94, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.61, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.61, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 582.0, 558.0, 587.0, 576.0, 519.0, 527.0, 579.0, 573.0, 576.0, 627.0, 582.0, 573.0, 576.0, 579.0, 579.0, 530.0, 579.0, 576.0, 582.0, 573.0, 576.0, 525.0, 519.0, 582.0, 627.0, 516.0, 579.0, 579.0, 573.0, 530.0, 627.0, 573.0, 582.0, 582.0, 582.0, 579.0, 630.0, 636.0, 582.0, 582.0, 582.0, 582.0, 587.0, 582.0, 579.0, 582.0, 573.0, 582.0, 582.0, 582.0, 636.0, 533.0, 576.0, 582.0, 573.0, 627.0, 627.0, 582.0, 579.0, 639.0, 525.0, 582.0, 587.0, 582.0, 582.0, 579.0, 573.0, 573.0, 579.0, 411.0, 573.0, 579.0, 582.0, 530.0, 627.0, 530.0, 525.0, 533.0, 630.0, 570.0, 576.0, 530.0, 570.0, 530.0, 411.0, 522.0, 413.0, 579.0, 587.0, 579.0, 525.0, 525.0, 519.0, 579.0, 579.0, 525.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 288.0, 294.0, 289.0, 293.0, 281.0, 277.0, 299.0, 288.0, 299.0, 277.0, 257.0, 262.0, 260.0, 267.0, 291.0, 288.0, 296.0, 277.0, 292.0, 284.0, 316.0, 311.0, 293.0, 289.0, 280.0, 293.0, 296.0, 280.0, 296.0, 283.0, 291.0, 288.0, 269.0, 261.0, 299.0, 280.0, 292.0, 284.0, 297.0, 285.0, 288.0, 285.0, 293.0, 283.0, 270.0, 255.0, 265.0, 254.0, 299.0, 283.0, 303.0, 324.0, 260.0, 256.0, 288.0, 291.0, 285.0, 294.0, 293.0, 280.0, 260.0, 270.0, 313.0, 314.0, 288.0, 285.0, 289.0, 293.0, 293.0, 289.0, 292.0, 290.0, 291.0, 288.0, 316.0, 314.0, 319.0, 317.0, 287.0, 295.0, 289.0, 293.0, 289.0, 293.0, 297.0, 285.0, 293.0, 294.0, 289.0, 293.0, 288.0, 291.0, 291.0, 291.0, 281.0, 292.0, 289.0, 293.0, 289.0, 293.0, 297.0, 285.0, 317.0, 319.0, 271.0, 262.0, 292.0, 284.0, 291.0, 291.0, 285.0, 288.0, 311.0, 316.0, 308.0, 319.0, 292.0, 290.0, 294.0, 285.0, 324.0, 315.0, 264.0, 261.0, 286.0, 296.0, 288.0, 299.0, 291.0, 291.0, 293.0, 289.0, 289.0, 290.0, 285.0, 288.0, 283.0, 290.0, 295.0, 284.0, 208.0, 203.0, 288.0, 285.0, 286.0, 293.0, 297.0, 285.0, 265.0, 265.0, 316.0, 311.0, 264.0, 266.0, 258.0, 267.0, 270.0, 263.0, 316.0, 314.0, 285.0, 285.0, 291.0, 285.0, 261.0, 269.0, 285.0, 285.0, 265.0, 265.0, 208.0, 203.0, 262.0, 260.0, 212.0, 201.0, 296.0, 283.0, 296.0, 291.0, 286.0, 293.0, 263.0, 262.0, 263.0, 262.0, 265.0, 254.0, 294.0, 285.0, 288.0, 291.0, 260.0, 265.0, 289.0, 287.0, 296.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6931151782683085, "mean_inference_ms": 1.2356500040177392, "mean_action_processing_ms": 0.13309948751345646, "mean_env_wait_ms": 0.8344131259950188, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 411.0, "episode_reward_mean": 568.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.405}, "hist_stats": {"episode_reward": [582.0, 582.0, 582.0, 558.0, 587.0, 576.0, 519.0, 527.0, 579.0, 573.0, 576.0, 627.0, 582.0, 573.0, 576.0, 579.0, 579.0, 530.0, 579.0, 576.0, 582.0, 573.0, 576.0, 525.0, 519.0, 582.0, 627.0, 516.0, 579.0, 579.0, 573.0, 530.0, 627.0, 573.0, 582.0, 582.0, 582.0, 579.0, 630.0, 636.0, 582.0, 582.0, 582.0, 582.0, 587.0, 582.0, 579.0, 582.0, 573.0, 582.0, 582.0, 582.0, 636.0, 533.0, 576.0, 582.0, 573.0, 627.0, 627.0, 582.0, 579.0, 639.0, 525.0, 582.0, 587.0, 582.0, 582.0, 579.0, 573.0, 573.0, 579.0, 411.0, 573.0, 579.0, 582.0, 530.0, 627.0, 530.0, 525.0, 533.0, 630.0, 570.0, 576.0, 530.0, 570.0, 530.0, 411.0, 522.0, 413.0, 579.0, 587.0, 579.0, 525.0, 525.0, 519.0, 579.0, 579.0, 525.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 288.0, 294.0, 289.0, 293.0, 281.0, 277.0, 299.0, 288.0, 299.0, 277.0, 257.0, 262.0, 260.0, 267.0, 291.0, 288.0, 296.0, 277.0, 292.0, 284.0, 316.0, 311.0, 293.0, 289.0, 280.0, 293.0, 296.0, 280.0, 296.0, 283.0, 291.0, 288.0, 269.0, 261.0, 299.0, 280.0, 292.0, 284.0, 297.0, 285.0, 288.0, 285.0, 293.0, 283.0, 270.0, 255.0, 265.0, 254.0, 299.0, 283.0, 303.0, 324.0, 260.0, 256.0, 288.0, 291.0, 285.0, 294.0, 293.0, 280.0, 260.0, 270.0, 313.0, 314.0, 288.0, 285.0, 289.0, 293.0, 293.0, 289.0, 292.0, 290.0, 291.0, 288.0, 316.0, 314.0, 319.0, 317.0, 287.0, 295.0, 289.0, 293.0, 289.0, 293.0, 297.0, 285.0, 293.0, 294.0, 289.0, 293.0, 288.0, 291.0, 291.0, 291.0, 281.0, 292.0, 289.0, 293.0, 289.0, 293.0, 297.0, 285.0, 317.0, 319.0, 271.0, 262.0, 292.0, 284.0, 291.0, 291.0, 285.0, 288.0, 311.0, 316.0, 308.0, 319.0, 292.0, 290.0, 294.0, 285.0, 324.0, 315.0, 264.0, 261.0, 286.0, 296.0, 288.0, 299.0, 291.0, 291.0, 293.0, 289.0, 289.0, 290.0, 285.0, 288.0, 283.0, 290.0, 295.0, 284.0, 208.0, 203.0, 288.0, 285.0, 286.0, 293.0, 297.0, 285.0, 265.0, 265.0, 316.0, 311.0, 264.0, 266.0, 258.0, 267.0, 270.0, 263.0, 316.0, 314.0, 285.0, 285.0, 291.0, 285.0, 261.0, 269.0, 285.0, 285.0, 265.0, 265.0, 208.0, 203.0, 262.0, 260.0, 212.0, 201.0, 296.0, 283.0, 296.0, 291.0, 286.0, 293.0, 263.0, 262.0, 263.0, 262.0, 265.0, 254.0, 294.0, 285.0, 288.0, 291.0, 260.0, 265.0, 289.0, 287.0, 296.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6931151782683085, "mean_inference_ms": 1.2356500040177392, "mean_action_processing_ms": 0.13309948751345646, "mean_env_wait_ms": 0.8344131259950188, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12288000, "num_agent_steps_trained": 12288000, "num_env_steps_sampled": 6144000, "num_env_steps_trained": 6144000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6144000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12288000, "timers": {"training_iteration_time_ms": 3565.377, "learn_time_ms": 1052.259, "learn_throughput": 12164.3, "synch_weights_time_ms": 11.253}, "counters": {"num_env_steps_sampled": 6144000, "num_env_steps_trained": 6144000, "num_agent_steps_sampled": 12288000, "num_agent_steps_trained": 12288000}, "done": false, "episodes_total": 15360, "training_iteration": 480, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-35", "timestamp": 1666582295, "time_this_iter_s": 3.6855309009552, "time_total_s": 1822.0655031204224, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1822.0655031204224, "timesteps_since_restore": 0, "iterations_since_restore": 480, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.32, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 197.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.8, "shaped_reward_min": 131, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.71, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.74, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.59, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.5, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.45, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.5, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.17, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.27, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.87, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.45, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.5, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.45, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.5, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016352785751223564, "policy_loss": 0.0012512167450040579, "vf_loss": 7.683942794799805, "vf_explained_var": 0.5772947072982788, "kl": 0.003058413974940777, "entropy": 0.7686662077903748, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6156800, "num_env_steps_trained": 6156800, "num_agent_steps_sampled": 12313600, "num_agent_steps_trained": 12313600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 411.0, "episode_reward_mean": 570.8, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.4}, "custom_metrics": {"sparse_reward_mean": 197.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.8, "shaped_reward_min": 131, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.71, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.74, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.59, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.5, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.45, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.5, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.17, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.27, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.87, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.45, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.5, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.45, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.5, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 573.0, 582.0, 582.0, 582.0, 579.0, 630.0, 636.0, 582.0, 582.0, 582.0, 582.0, 587.0, 582.0, 579.0, 582.0, 573.0, 582.0, 582.0, 582.0, 636.0, 533.0, 576.0, 582.0, 573.0, 627.0, 627.0, 582.0, 579.0, 639.0, 525.0, 582.0, 587.0, 582.0, 582.0, 579.0, 573.0, 573.0, 579.0, 411.0, 573.0, 579.0, 582.0, 530.0, 627.0, 530.0, 525.0, 533.0, 630.0, 570.0, 576.0, 530.0, 570.0, 530.0, 411.0, 522.0, 413.0, 579.0, 587.0, 579.0, 525.0, 525.0, 519.0, 579.0, 579.0, 525.0, 576.0, 579.0, 582.0, 576.0, 579.0, 570.0, 525.0, 579.0, 579.0, 573.0, 573.0, 587.0, 636.0, 582.0, 579.0, 576.0, 525.0, 579.0, 570.0, 582.0, 573.0, 582.0, 576.0, 587.0, 582.0, 630.0, 587.0, 522.0, 582.0, 579.0, 510.0, 582.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 288.0, 285.0, 289.0, 293.0, 293.0, 289.0, 292.0, 290.0, 291.0, 288.0, 316.0, 314.0, 319.0, 317.0, 287.0, 295.0, 289.0, 293.0, 289.0, 293.0, 297.0, 285.0, 293.0, 294.0, 289.0, 293.0, 288.0, 291.0, 291.0, 291.0, 281.0, 292.0, 289.0, 293.0, 289.0, 293.0, 297.0, 285.0, 317.0, 319.0, 271.0, 262.0, 292.0, 284.0, 291.0, 291.0, 285.0, 288.0, 311.0, 316.0, 308.0, 319.0, 292.0, 290.0, 294.0, 285.0, 324.0, 315.0, 264.0, 261.0, 286.0, 296.0, 288.0, 299.0, 291.0, 291.0, 293.0, 289.0, 289.0, 290.0, 285.0, 288.0, 283.0, 290.0, 295.0, 284.0, 208.0, 203.0, 288.0, 285.0, 286.0, 293.0, 297.0, 285.0, 265.0, 265.0, 316.0, 311.0, 264.0, 266.0, 258.0, 267.0, 270.0, 263.0, 316.0, 314.0, 285.0, 285.0, 291.0, 285.0, 261.0, 269.0, 285.0, 285.0, 265.0, 265.0, 208.0, 203.0, 262.0, 260.0, 212.0, 201.0, 296.0, 283.0, 296.0, 291.0, 286.0, 293.0, 263.0, 262.0, 263.0, 262.0, 265.0, 254.0, 294.0, 285.0, 288.0, 291.0, 260.0, 265.0, 289.0, 287.0, 296.0, 283.0, 289.0, 293.0, 288.0, 288.0, 294.0, 285.0, 283.0, 287.0, 271.0, 254.0, 293.0, 286.0, 291.0, 288.0, 286.0, 287.0, 285.0, 288.0, 296.0, 291.0, 314.0, 322.0, 294.0, 288.0, 293.0, 286.0, 293.0, 283.0, 263.0, 262.0, 288.0, 291.0, 284.0, 286.0, 295.0, 287.0, 280.0, 293.0, 297.0, 285.0, 291.0, 285.0, 296.0, 291.0, 288.0, 294.0, 315.0, 315.0, 296.0, 291.0, 260.0, 262.0, 293.0, 289.0, 291.0, 288.0, 253.0, 257.0, 291.0, 291.0, 283.0, 296.0, 288.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6930632322567098, "mean_inference_ms": 1.2356661180576958, "mean_action_processing_ms": 0.13309089723525241, "mean_env_wait_ms": 0.8343735986878177, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 411.0, "episode_reward_mean": 570.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.4}, "hist_stats": {"episode_reward": [627.0, 573.0, 582.0, 582.0, 582.0, 579.0, 630.0, 636.0, 582.0, 582.0, 582.0, 582.0, 587.0, 582.0, 579.0, 582.0, 573.0, 582.0, 582.0, 582.0, 636.0, 533.0, 576.0, 582.0, 573.0, 627.0, 627.0, 582.0, 579.0, 639.0, 525.0, 582.0, 587.0, 582.0, 582.0, 579.0, 573.0, 573.0, 579.0, 411.0, 573.0, 579.0, 582.0, 530.0, 627.0, 530.0, 525.0, 533.0, 630.0, 570.0, 576.0, 530.0, 570.0, 530.0, 411.0, 522.0, 413.0, 579.0, 587.0, 579.0, 525.0, 525.0, 519.0, 579.0, 579.0, 525.0, 576.0, 579.0, 582.0, 576.0, 579.0, 570.0, 525.0, 579.0, 579.0, 573.0, 573.0, 587.0, 636.0, 582.0, 579.0, 576.0, 525.0, 579.0, 570.0, 582.0, 573.0, 582.0, 576.0, 587.0, 582.0, 630.0, 587.0, 522.0, 582.0, 579.0, 510.0, 582.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 288.0, 285.0, 289.0, 293.0, 293.0, 289.0, 292.0, 290.0, 291.0, 288.0, 316.0, 314.0, 319.0, 317.0, 287.0, 295.0, 289.0, 293.0, 289.0, 293.0, 297.0, 285.0, 293.0, 294.0, 289.0, 293.0, 288.0, 291.0, 291.0, 291.0, 281.0, 292.0, 289.0, 293.0, 289.0, 293.0, 297.0, 285.0, 317.0, 319.0, 271.0, 262.0, 292.0, 284.0, 291.0, 291.0, 285.0, 288.0, 311.0, 316.0, 308.0, 319.0, 292.0, 290.0, 294.0, 285.0, 324.0, 315.0, 264.0, 261.0, 286.0, 296.0, 288.0, 299.0, 291.0, 291.0, 293.0, 289.0, 289.0, 290.0, 285.0, 288.0, 283.0, 290.0, 295.0, 284.0, 208.0, 203.0, 288.0, 285.0, 286.0, 293.0, 297.0, 285.0, 265.0, 265.0, 316.0, 311.0, 264.0, 266.0, 258.0, 267.0, 270.0, 263.0, 316.0, 314.0, 285.0, 285.0, 291.0, 285.0, 261.0, 269.0, 285.0, 285.0, 265.0, 265.0, 208.0, 203.0, 262.0, 260.0, 212.0, 201.0, 296.0, 283.0, 296.0, 291.0, 286.0, 293.0, 263.0, 262.0, 263.0, 262.0, 265.0, 254.0, 294.0, 285.0, 288.0, 291.0, 260.0, 265.0, 289.0, 287.0, 296.0, 283.0, 289.0, 293.0, 288.0, 288.0, 294.0, 285.0, 283.0, 287.0, 271.0, 254.0, 293.0, 286.0, 291.0, 288.0, 286.0, 287.0, 285.0, 288.0, 296.0, 291.0, 314.0, 322.0, 294.0, 288.0, 293.0, 286.0, 293.0, 283.0, 263.0, 262.0, 288.0, 291.0, 284.0, 286.0, 295.0, 287.0, 280.0, 293.0, 297.0, 285.0, 291.0, 285.0, 296.0, 291.0, 288.0, 294.0, 315.0, 315.0, 296.0, 291.0, 260.0, 262.0, 293.0, 289.0, 291.0, 288.0, 253.0, 257.0, 291.0, 291.0, 283.0, 296.0, 288.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6930632322567098, "mean_inference_ms": 1.2356661180576958, "mean_action_processing_ms": 0.13309089723525241, "mean_env_wait_ms": 0.8343735986878177, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12313600, "num_agent_steps_trained": 12313600, "num_env_steps_sampled": 6156800, "num_env_steps_trained": 6156800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6156800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12313600, "timers": {"training_iteration_time_ms": 3557.096, "learn_time_ms": 1047.153, "learn_throughput": 12223.622, "synch_weights_time_ms": 11.191}, "counters": {"num_env_steps_sampled": 6156800, "num_env_steps_trained": 6156800, "num_agent_steps_sampled": 12313600, "num_agent_steps_trained": 12313600}, "done": false, "episodes_total": 15392, "training_iteration": 481, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-39", "timestamp": 1666582299, "time_this_iter_s": 3.5412771701812744, "time_total_s": 1825.6067802906036, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1825.6067802906036, "timesteps_since_restore": 0, "iterations_since_restore": 481, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.53333333333333, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 174.69, "shaped_reward_min": 131, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.72, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.46, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 16.59, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.22, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.17, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.08, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.95, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.21, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.79, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.72, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.17, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.17, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001813309732824564, "policy_loss": -0.0022062822245061398, "vf_loss": 7.713008880615234, "vf_explained_var": 0.5789626240730286, "kl": 0.0033301603980362415, "entropy": 0.7566564679145813, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6169600, "num_env_steps_trained": 6169600, "num_agent_steps_sampled": 12339200, "num_agent_steps_trained": 12339200}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 411.0, "episode_reward_mean": 564.29, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 282.145}, "custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 174.69, "shaped_reward_min": 131, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.72, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.46, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 16.59, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.22, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.17, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.08, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.95, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.21, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.79, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.72, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.17, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.17, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 582.0, 582.0, 579.0, 573.0, 573.0, 579.0, 411.0, 573.0, 579.0, 582.0, 530.0, 627.0, 530.0, 525.0, 533.0, 630.0, 570.0, 576.0, 530.0, 570.0, 530.0, 411.0, 522.0, 413.0, 579.0, 587.0, 579.0, 525.0, 525.0, 519.0, 579.0, 579.0, 525.0, 576.0, 579.0, 582.0, 576.0, 579.0, 570.0, 525.0, 579.0, 579.0, 573.0, 573.0, 587.0, 636.0, 582.0, 579.0, 576.0, 525.0, 579.0, 570.0, 582.0, 573.0, 582.0, 576.0, 587.0, 582.0, 630.0, 587.0, 522.0, 582.0, 579.0, 510.0, 582.0, 579.0, 579.0, 582.0, 519.0, 576.0, 630.0, 582.0, 582.0, 533.0, 573.0, 582.0, 579.0, 576.0, 530.0, 627.0, 579.0, 582.0, 584.0, 570.0, 582.0, 582.0, 579.0, 525.0, 570.0, 522.0, 579.0, 587.0, 573.0, 582.0, 570.0, 459.0, 530.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 299.0, 291.0, 291.0, 293.0, 289.0, 289.0, 290.0, 285.0, 288.0, 283.0, 290.0, 295.0, 284.0, 208.0, 203.0, 288.0, 285.0, 286.0, 293.0, 297.0, 285.0, 265.0, 265.0, 316.0, 311.0, 264.0, 266.0, 258.0, 267.0, 270.0, 263.0, 316.0, 314.0, 285.0, 285.0, 291.0, 285.0, 261.0, 269.0, 285.0, 285.0, 265.0, 265.0, 208.0, 203.0, 262.0, 260.0, 212.0, 201.0, 296.0, 283.0, 296.0, 291.0, 286.0, 293.0, 263.0, 262.0, 263.0, 262.0, 265.0, 254.0, 294.0, 285.0, 288.0, 291.0, 260.0, 265.0, 289.0, 287.0, 296.0, 283.0, 289.0, 293.0, 288.0, 288.0, 294.0, 285.0, 283.0, 287.0, 271.0, 254.0, 293.0, 286.0, 291.0, 288.0, 286.0, 287.0, 285.0, 288.0, 296.0, 291.0, 314.0, 322.0, 294.0, 288.0, 293.0, 286.0, 293.0, 283.0, 263.0, 262.0, 288.0, 291.0, 284.0, 286.0, 295.0, 287.0, 280.0, 293.0, 297.0, 285.0, 291.0, 285.0, 296.0, 291.0, 288.0, 294.0, 315.0, 315.0, 296.0, 291.0, 260.0, 262.0, 293.0, 289.0, 291.0, 288.0, 253.0, 257.0, 291.0, 291.0, 283.0, 296.0, 288.0, 291.0, 292.0, 290.0, 267.0, 252.0, 296.0, 280.0, 319.0, 311.0, 299.0, 283.0, 289.0, 293.0, 268.0, 265.0, 285.0, 288.0, 294.0, 288.0, 292.0, 287.0, 297.0, 279.0, 273.0, 257.0, 316.0, 311.0, 287.0, 292.0, 292.0, 290.0, 288.0, 296.0, 280.0, 290.0, 291.0, 291.0, 291.0, 291.0, 288.0, 291.0, 260.0, 265.0, 274.0, 296.0, 257.0, 265.0, 286.0, 293.0, 293.0, 294.0, 281.0, 292.0, 288.0, 294.0, 286.0, 284.0, 240.0, 219.0, 261.0, 269.0, 288.0, 288.0, 283.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6930222020217434, "mean_inference_ms": 1.2356416687774716, "mean_action_processing_ms": 0.13308365283071852, "mean_env_wait_ms": 0.8343179738544374, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 411.0, "episode_reward_mean": 564.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 282.145}, "hist_stats": {"episode_reward": [587.0, 582.0, 582.0, 579.0, 573.0, 573.0, 579.0, 411.0, 573.0, 579.0, 582.0, 530.0, 627.0, 530.0, 525.0, 533.0, 630.0, 570.0, 576.0, 530.0, 570.0, 530.0, 411.0, 522.0, 413.0, 579.0, 587.0, 579.0, 525.0, 525.0, 519.0, 579.0, 579.0, 525.0, 576.0, 579.0, 582.0, 576.0, 579.0, 570.0, 525.0, 579.0, 579.0, 573.0, 573.0, 587.0, 636.0, 582.0, 579.0, 576.0, 525.0, 579.0, 570.0, 582.0, 573.0, 582.0, 576.0, 587.0, 582.0, 630.0, 587.0, 522.0, 582.0, 579.0, 510.0, 582.0, 579.0, 579.0, 582.0, 519.0, 576.0, 630.0, 582.0, 582.0, 533.0, 573.0, 582.0, 579.0, 576.0, 530.0, 627.0, 579.0, 582.0, 584.0, 570.0, 582.0, 582.0, 579.0, 525.0, 570.0, 522.0, 579.0, 587.0, 573.0, 582.0, 570.0, 459.0, 530.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 299.0, 291.0, 291.0, 293.0, 289.0, 289.0, 290.0, 285.0, 288.0, 283.0, 290.0, 295.0, 284.0, 208.0, 203.0, 288.0, 285.0, 286.0, 293.0, 297.0, 285.0, 265.0, 265.0, 316.0, 311.0, 264.0, 266.0, 258.0, 267.0, 270.0, 263.0, 316.0, 314.0, 285.0, 285.0, 291.0, 285.0, 261.0, 269.0, 285.0, 285.0, 265.0, 265.0, 208.0, 203.0, 262.0, 260.0, 212.0, 201.0, 296.0, 283.0, 296.0, 291.0, 286.0, 293.0, 263.0, 262.0, 263.0, 262.0, 265.0, 254.0, 294.0, 285.0, 288.0, 291.0, 260.0, 265.0, 289.0, 287.0, 296.0, 283.0, 289.0, 293.0, 288.0, 288.0, 294.0, 285.0, 283.0, 287.0, 271.0, 254.0, 293.0, 286.0, 291.0, 288.0, 286.0, 287.0, 285.0, 288.0, 296.0, 291.0, 314.0, 322.0, 294.0, 288.0, 293.0, 286.0, 293.0, 283.0, 263.0, 262.0, 288.0, 291.0, 284.0, 286.0, 295.0, 287.0, 280.0, 293.0, 297.0, 285.0, 291.0, 285.0, 296.0, 291.0, 288.0, 294.0, 315.0, 315.0, 296.0, 291.0, 260.0, 262.0, 293.0, 289.0, 291.0, 288.0, 253.0, 257.0, 291.0, 291.0, 283.0, 296.0, 288.0, 291.0, 292.0, 290.0, 267.0, 252.0, 296.0, 280.0, 319.0, 311.0, 299.0, 283.0, 289.0, 293.0, 268.0, 265.0, 285.0, 288.0, 294.0, 288.0, 292.0, 287.0, 297.0, 279.0, 273.0, 257.0, 316.0, 311.0, 287.0, 292.0, 292.0, 290.0, 288.0, 296.0, 280.0, 290.0, 291.0, 291.0, 291.0, 291.0, 288.0, 291.0, 260.0, 265.0, 274.0, 296.0, 257.0, 265.0, 286.0, 293.0, 293.0, 294.0, 281.0, 292.0, 288.0, 294.0, 286.0, 284.0, 240.0, 219.0, 261.0, 269.0, 288.0, 288.0, 283.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6930222020217434, "mean_inference_ms": 1.2356416687774716, "mean_action_processing_ms": 0.13308365283071852, "mean_env_wait_ms": 0.8343179738544374, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12339200, "num_agent_steps_trained": 12339200, "num_env_steps_sampled": 6169600, "num_env_steps_trained": 6169600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6169600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12339200, "timers": {"training_iteration_time_ms": 3554.381, "learn_time_ms": 1045.846, "learn_throughput": 12238.898, "synch_weights_time_ms": 11.105}, "counters": {"num_env_steps_sampled": 6169600, "num_env_steps_trained": 6169600, "num_agent_steps_sampled": 12339200, "num_agent_steps_trained": 12339200}, "done": false, "episodes_total": 15424, "training_iteration": 482, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-42", "timestamp": 1666582302, "time_this_iter_s": 3.5798044204711914, "time_total_s": 1829.1865847110748, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1829.1865847110748, "timesteps_since_restore": 0, "iterations_since_restore": 482, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.7, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.69, "shaped_reward_min": 139, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.88, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.99, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.74, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.78, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.53, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.65, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.2, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.36, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.22, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.91, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.53, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.65, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.53, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.65, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008528552716597915, "policy_loss": 0.0004703389131464064, "vf_loss": 7.599314212799072, "vf_explained_var": 0.6006312370300293, "kl": 0.0031734949443489313, "entropy": 0.7548311352729797, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6182400, "num_env_steps_trained": 6182400, "num_agent_steps_sampled": 12364800, "num_agent_steps_trained": 12364800}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 459.0, "episode_reward_mean": 575.69, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 287.845}, "custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.69, "shaped_reward_min": 139, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.88, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.99, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.74, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.78, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.53, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.65, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.2, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.36, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.22, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.91, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.53, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.65, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.53, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.65, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 525.0, 576.0, 579.0, 582.0, 576.0, 579.0, 570.0, 525.0, 579.0, 579.0, 573.0, 573.0, 587.0, 636.0, 582.0, 579.0, 576.0, 525.0, 579.0, 570.0, 582.0, 573.0, 582.0, 576.0, 587.0, 582.0, 630.0, 587.0, 522.0, 582.0, 579.0, 510.0, 582.0, 579.0, 579.0, 582.0, 519.0, 576.0, 630.0, 582.0, 582.0, 533.0, 573.0, 582.0, 579.0, 576.0, 530.0, 627.0, 579.0, 582.0, 584.0, 570.0, 582.0, 582.0, 579.0, 525.0, 570.0, 522.0, 579.0, 587.0, 573.0, 582.0, 570.0, 459.0, 530.0, 576.0, 576.0, 576.0, 576.0, 573.0, 587.0, 630.0, 579.0, 582.0, 576.0, 579.0, 582.0, 576.0, 587.0, 582.0, 633.0, 582.0, 582.0, 579.0, 636.0, 570.0, 630.0, 522.0, 579.0, 582.0, 579.0, 582.0, 576.0, 519.0, 570.0, 573.0, 633.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 260.0, 265.0, 289.0, 287.0, 296.0, 283.0, 289.0, 293.0, 288.0, 288.0, 294.0, 285.0, 283.0, 287.0, 271.0, 254.0, 293.0, 286.0, 291.0, 288.0, 286.0, 287.0, 285.0, 288.0, 296.0, 291.0, 314.0, 322.0, 294.0, 288.0, 293.0, 286.0, 293.0, 283.0, 263.0, 262.0, 288.0, 291.0, 284.0, 286.0, 295.0, 287.0, 280.0, 293.0, 297.0, 285.0, 291.0, 285.0, 296.0, 291.0, 288.0, 294.0, 315.0, 315.0, 296.0, 291.0, 260.0, 262.0, 293.0, 289.0, 291.0, 288.0, 253.0, 257.0, 291.0, 291.0, 283.0, 296.0, 288.0, 291.0, 292.0, 290.0, 267.0, 252.0, 296.0, 280.0, 319.0, 311.0, 299.0, 283.0, 289.0, 293.0, 268.0, 265.0, 285.0, 288.0, 294.0, 288.0, 292.0, 287.0, 297.0, 279.0, 273.0, 257.0, 316.0, 311.0, 287.0, 292.0, 292.0, 290.0, 288.0, 296.0, 280.0, 290.0, 291.0, 291.0, 291.0, 291.0, 288.0, 291.0, 260.0, 265.0, 274.0, 296.0, 257.0, 265.0, 286.0, 293.0, 293.0, 294.0, 281.0, 292.0, 288.0, 294.0, 286.0, 284.0, 240.0, 219.0, 261.0, 269.0, 288.0, 288.0, 283.0, 293.0, 286.0, 290.0, 293.0, 283.0, 291.0, 282.0, 290.0, 297.0, 308.0, 322.0, 297.0, 282.0, 296.0, 286.0, 288.0, 288.0, 295.0, 284.0, 293.0, 289.0, 294.0, 282.0, 290.0, 297.0, 291.0, 291.0, 316.0, 317.0, 290.0, 292.0, 294.0, 288.0, 293.0, 286.0, 317.0, 319.0, 282.0, 288.0, 317.0, 313.0, 265.0, 257.0, 291.0, 288.0, 294.0, 288.0, 294.0, 285.0, 296.0, 286.0, 290.0, 286.0, 258.0, 261.0, 279.0, 291.0, 287.0, 286.0, 314.0, 319.0, 291.0, 291.0, 322.0, 314.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.692977369639183, "mean_inference_ms": 1.2355441667271112, "mean_action_processing_ms": 0.13307732437620612, "mean_env_wait_ms": 0.8342554470754044, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 459.0, "episode_reward_mean": 575.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 287.845}, "hist_stats": {"episode_reward": [579.0, 525.0, 576.0, 579.0, 582.0, 576.0, 579.0, 570.0, 525.0, 579.0, 579.0, 573.0, 573.0, 587.0, 636.0, 582.0, 579.0, 576.0, 525.0, 579.0, 570.0, 582.0, 573.0, 582.0, 576.0, 587.0, 582.0, 630.0, 587.0, 522.0, 582.0, 579.0, 510.0, 582.0, 579.0, 579.0, 582.0, 519.0, 576.0, 630.0, 582.0, 582.0, 533.0, 573.0, 582.0, 579.0, 576.0, 530.0, 627.0, 579.0, 582.0, 584.0, 570.0, 582.0, 582.0, 579.0, 525.0, 570.0, 522.0, 579.0, 587.0, 573.0, 582.0, 570.0, 459.0, 530.0, 576.0, 576.0, 576.0, 576.0, 573.0, 587.0, 630.0, 579.0, 582.0, 576.0, 579.0, 582.0, 576.0, 587.0, 582.0, 633.0, 582.0, 582.0, 579.0, 636.0, 570.0, 630.0, 522.0, 579.0, 582.0, 579.0, 582.0, 576.0, 519.0, 570.0, 573.0, 633.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 260.0, 265.0, 289.0, 287.0, 296.0, 283.0, 289.0, 293.0, 288.0, 288.0, 294.0, 285.0, 283.0, 287.0, 271.0, 254.0, 293.0, 286.0, 291.0, 288.0, 286.0, 287.0, 285.0, 288.0, 296.0, 291.0, 314.0, 322.0, 294.0, 288.0, 293.0, 286.0, 293.0, 283.0, 263.0, 262.0, 288.0, 291.0, 284.0, 286.0, 295.0, 287.0, 280.0, 293.0, 297.0, 285.0, 291.0, 285.0, 296.0, 291.0, 288.0, 294.0, 315.0, 315.0, 296.0, 291.0, 260.0, 262.0, 293.0, 289.0, 291.0, 288.0, 253.0, 257.0, 291.0, 291.0, 283.0, 296.0, 288.0, 291.0, 292.0, 290.0, 267.0, 252.0, 296.0, 280.0, 319.0, 311.0, 299.0, 283.0, 289.0, 293.0, 268.0, 265.0, 285.0, 288.0, 294.0, 288.0, 292.0, 287.0, 297.0, 279.0, 273.0, 257.0, 316.0, 311.0, 287.0, 292.0, 292.0, 290.0, 288.0, 296.0, 280.0, 290.0, 291.0, 291.0, 291.0, 291.0, 288.0, 291.0, 260.0, 265.0, 274.0, 296.0, 257.0, 265.0, 286.0, 293.0, 293.0, 294.0, 281.0, 292.0, 288.0, 294.0, 286.0, 284.0, 240.0, 219.0, 261.0, 269.0, 288.0, 288.0, 283.0, 293.0, 286.0, 290.0, 293.0, 283.0, 291.0, 282.0, 290.0, 297.0, 308.0, 322.0, 297.0, 282.0, 296.0, 286.0, 288.0, 288.0, 295.0, 284.0, 293.0, 289.0, 294.0, 282.0, 290.0, 297.0, 291.0, 291.0, 316.0, 317.0, 290.0, 292.0, 294.0, 288.0, 293.0, 286.0, 317.0, 319.0, 282.0, 288.0, 317.0, 313.0, 265.0, 257.0, 291.0, 288.0, 294.0, 288.0, 294.0, 285.0, 296.0, 286.0, 290.0, 286.0, 258.0, 261.0, 279.0, 291.0, 287.0, 286.0, 314.0, 319.0, 291.0, 291.0, 322.0, 314.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.692977369639183, "mean_inference_ms": 1.2355441667271112, "mean_action_processing_ms": 0.13307732437620612, "mean_env_wait_ms": 0.8342554470754044, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12364800, "num_agent_steps_trained": 12364800, "num_env_steps_sampled": 6182400, "num_env_steps_trained": 6182400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6182400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12364800, "timers": {"training_iteration_time_ms": 3558.941, "learn_time_ms": 1043.607, "learn_throughput": 12265.148, "synch_weights_time_ms": 11.435}, "counters": {"num_env_steps_sampled": 6182400, "num_env_steps_trained": 6182400, "num_agent_steps_sampled": 12364800, "num_agent_steps_trained": 12364800}, "done": false, "episodes_total": 15456, "training_iteration": 483, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-46", "timestamp": 1666582306, "time_this_iter_s": 3.5927608013153076, "time_total_s": 1832.7793455123901, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1832.7793455123901, "timesteps_since_restore": 0, "iterations_since_restore": 483, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.919999999999998, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 174.85, "shaped_reward_min": 9, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.69, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.65, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.48, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.41, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.32, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.3, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.05, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.93, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.79, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.32, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.3, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.32, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.3, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0031761766877025366, "policy_loss": 0.0027809746097773314, "vf_loss": 7.762106895446777, "vf_explained_var": 0.5784608721733093, "kl": 0.0038599662948399782, "entropy": 0.7620162963867188, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6195200, "num_env_steps_trained": 6195200, "num_agent_steps_sampled": 12390400, "num_agent_steps_trained": 12390400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 9.0, "episode_reward_mean": 566.45, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.225}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 174.85, "shaped_reward_min": 9, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.69, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.65, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.48, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.41, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.32, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.3, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.05, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.93, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.79, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.32, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.3, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.32, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.3, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [510.0, 582.0, 579.0, 579.0, 582.0, 519.0, 576.0, 630.0, 582.0, 582.0, 533.0, 573.0, 582.0, 579.0, 576.0, 530.0, 627.0, 579.0, 582.0, 584.0, 570.0, 582.0, 582.0, 579.0, 525.0, 570.0, 522.0, 579.0, 587.0, 573.0, 582.0, 570.0, 459.0, 530.0, 576.0, 576.0, 576.0, 576.0, 573.0, 587.0, 630.0, 579.0, 582.0, 576.0, 579.0, 582.0, 576.0, 587.0, 582.0, 633.0, 582.0, 582.0, 579.0, 636.0, 570.0, 630.0, 522.0, 579.0, 582.0, 579.0, 582.0, 576.0, 519.0, 570.0, 573.0, 633.0, 582.0, 636.0, 525.0, 587.0, 9.0, 627.0, 533.0, 573.0, 579.0, 573.0, 582.0, 582.0, 582.0, 576.0, 630.0, 530.0, 522.0, 582.0, 573.0, 570.0, 582.0, 630.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 579.0, 573.0, 411.0, 459.0, 465.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [253.0, 257.0, 291.0, 291.0, 283.0, 296.0, 288.0, 291.0, 292.0, 290.0, 267.0, 252.0, 296.0, 280.0, 319.0, 311.0, 299.0, 283.0, 289.0, 293.0, 268.0, 265.0, 285.0, 288.0, 294.0, 288.0, 292.0, 287.0, 297.0, 279.0, 273.0, 257.0, 316.0, 311.0, 287.0, 292.0, 292.0, 290.0, 288.0, 296.0, 280.0, 290.0, 291.0, 291.0, 291.0, 291.0, 288.0, 291.0, 260.0, 265.0, 274.0, 296.0, 257.0, 265.0, 286.0, 293.0, 293.0, 294.0, 281.0, 292.0, 288.0, 294.0, 286.0, 284.0, 240.0, 219.0, 261.0, 269.0, 288.0, 288.0, 283.0, 293.0, 286.0, 290.0, 293.0, 283.0, 291.0, 282.0, 290.0, 297.0, 308.0, 322.0, 297.0, 282.0, 296.0, 286.0, 288.0, 288.0, 295.0, 284.0, 293.0, 289.0, 294.0, 282.0, 290.0, 297.0, 291.0, 291.0, 316.0, 317.0, 290.0, 292.0, 294.0, 288.0, 293.0, 286.0, 317.0, 319.0, 282.0, 288.0, 317.0, 313.0, 265.0, 257.0, 291.0, 288.0, 294.0, 288.0, 294.0, 285.0, 296.0, 286.0, 290.0, 286.0, 258.0, 261.0, 279.0, 291.0, 287.0, 286.0, 314.0, 319.0, 291.0, 291.0, 322.0, 314.0, 265.0, 260.0, 288.0, 299.0, 6.0, 3.0, 319.0, 308.0, 268.0, 265.0, 280.0, 293.0, 283.0, 296.0, 283.0, 290.0, 292.0, 290.0, 294.0, 288.0, 292.0, 290.0, 283.0, 293.0, 317.0, 313.0, 258.0, 272.0, 274.0, 248.0, 291.0, 291.0, 286.0, 287.0, 295.0, 275.0, 289.0, 293.0, 314.0, 316.0, 294.0, 288.0, 284.0, 298.0, 288.0, 282.0, 291.0, 291.0, 294.0, 282.0, 289.0, 290.0, 292.0, 287.0, 287.0, 286.0, 208.0, 203.0, 226.0, 233.0, 231.0, 234.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.692940406076851, "mean_inference_ms": 1.2354490487938525, "mean_action_processing_ms": 0.1330713413614881, "mean_env_wait_ms": 0.8341982079274745, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 9.0, "episode_reward_mean": 566.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.225}, "hist_stats": {"episode_reward": [510.0, 582.0, 579.0, 579.0, 582.0, 519.0, 576.0, 630.0, 582.0, 582.0, 533.0, 573.0, 582.0, 579.0, 576.0, 530.0, 627.0, 579.0, 582.0, 584.0, 570.0, 582.0, 582.0, 579.0, 525.0, 570.0, 522.0, 579.0, 587.0, 573.0, 582.0, 570.0, 459.0, 530.0, 576.0, 576.0, 576.0, 576.0, 573.0, 587.0, 630.0, 579.0, 582.0, 576.0, 579.0, 582.0, 576.0, 587.0, 582.0, 633.0, 582.0, 582.0, 579.0, 636.0, 570.0, 630.0, 522.0, 579.0, 582.0, 579.0, 582.0, 576.0, 519.0, 570.0, 573.0, 633.0, 582.0, 636.0, 525.0, 587.0, 9.0, 627.0, 533.0, 573.0, 579.0, 573.0, 582.0, 582.0, 582.0, 576.0, 630.0, 530.0, 522.0, 582.0, 573.0, 570.0, 582.0, 630.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 579.0, 573.0, 411.0, 459.0, 465.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [253.0, 257.0, 291.0, 291.0, 283.0, 296.0, 288.0, 291.0, 292.0, 290.0, 267.0, 252.0, 296.0, 280.0, 319.0, 311.0, 299.0, 283.0, 289.0, 293.0, 268.0, 265.0, 285.0, 288.0, 294.0, 288.0, 292.0, 287.0, 297.0, 279.0, 273.0, 257.0, 316.0, 311.0, 287.0, 292.0, 292.0, 290.0, 288.0, 296.0, 280.0, 290.0, 291.0, 291.0, 291.0, 291.0, 288.0, 291.0, 260.0, 265.0, 274.0, 296.0, 257.0, 265.0, 286.0, 293.0, 293.0, 294.0, 281.0, 292.0, 288.0, 294.0, 286.0, 284.0, 240.0, 219.0, 261.0, 269.0, 288.0, 288.0, 283.0, 293.0, 286.0, 290.0, 293.0, 283.0, 291.0, 282.0, 290.0, 297.0, 308.0, 322.0, 297.0, 282.0, 296.0, 286.0, 288.0, 288.0, 295.0, 284.0, 293.0, 289.0, 294.0, 282.0, 290.0, 297.0, 291.0, 291.0, 316.0, 317.0, 290.0, 292.0, 294.0, 288.0, 293.0, 286.0, 317.0, 319.0, 282.0, 288.0, 317.0, 313.0, 265.0, 257.0, 291.0, 288.0, 294.0, 288.0, 294.0, 285.0, 296.0, 286.0, 290.0, 286.0, 258.0, 261.0, 279.0, 291.0, 287.0, 286.0, 314.0, 319.0, 291.0, 291.0, 322.0, 314.0, 265.0, 260.0, 288.0, 299.0, 6.0, 3.0, 319.0, 308.0, 268.0, 265.0, 280.0, 293.0, 283.0, 296.0, 283.0, 290.0, 292.0, 290.0, 294.0, 288.0, 292.0, 290.0, 283.0, 293.0, 317.0, 313.0, 258.0, 272.0, 274.0, 248.0, 291.0, 291.0, 286.0, 287.0, 295.0, 275.0, 289.0, 293.0, 314.0, 316.0, 294.0, 288.0, 284.0, 298.0, 288.0, 282.0, 291.0, 291.0, 294.0, 282.0, 289.0, 290.0, 292.0, 287.0, 287.0, 286.0, 208.0, 203.0, 226.0, 233.0, 231.0, 234.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.692940406076851, "mean_inference_ms": 1.2354490487938525, "mean_action_processing_ms": 0.1330713413614881, "mean_env_wait_ms": 0.8341982079274745, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12390400, "num_agent_steps_trained": 12390400, "num_env_steps_sampled": 6195200, "num_env_steps_trained": 6195200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6195200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12390400, "timers": {"training_iteration_time_ms": 3552.513, "learn_time_ms": 1043.736, "learn_throughput": 12263.643, "synch_weights_time_ms": 11.18}, "counters": {"num_env_steps_sampled": 6195200, "num_env_steps_trained": 6195200, "num_agent_steps_sampled": 12390400, "num_agent_steps_trained": 12390400}, "done": false, "episodes_total": 15488, "training_iteration": 484, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-50", "timestamp": 1666582310, "time_this_iter_s": 3.5351099967956543, "time_total_s": 1836.3144555091858, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1836.3144555091858, "timesteps_since_restore": 0, "iterations_since_restore": 484, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.7, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 174.73, "shaped_reward_min": 9, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.48, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.81, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.57, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.42, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.05, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.98, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.24, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.82, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.42, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.42, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001401905552484095, "policy_loss": -0.0017940457910299301, "vf_loss": 7.739469051361084, "vf_explained_var": 0.5467299818992615, "kl": 0.0025778058916330338, "entropy": 0.7636134624481201, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6208000, "num_env_steps_trained": 6208000, "num_agent_steps_sampled": 12416000, "num_agent_steps_trained": 12416000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 9.0, "episode_reward_mean": 565.53, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 282.765}, "custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 174.73, "shaped_reward_min": 9, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.48, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.81, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.57, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.42, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.05, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.98, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.24, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.82, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.42, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.42, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [459.0, 530.0, 576.0, 576.0, 576.0, 576.0, 573.0, 587.0, 630.0, 579.0, 582.0, 576.0, 579.0, 582.0, 576.0, 587.0, 582.0, 633.0, 582.0, 582.0, 579.0, 636.0, 570.0, 630.0, 522.0, 579.0, 582.0, 579.0, 582.0, 576.0, 519.0, 570.0, 573.0, 633.0, 582.0, 636.0, 525.0, 587.0, 9.0, 627.0, 533.0, 573.0, 579.0, 573.0, 582.0, 582.0, 582.0, 576.0, 630.0, 530.0, 522.0, 582.0, 573.0, 570.0, 582.0, 630.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 579.0, 573.0, 411.0, 459.0, 465.0, 582.0, 522.0, 525.0, 582.0, 627.0, 576.0, 579.0, 525.0, 522.0, 530.0, 522.0, 639.0, 633.0, 582.0, 579.0, 587.0, 522.0, 630.0, 522.0, 579.0, 579.0, 519.0, 576.0, 579.0, 582.0, 522.0, 582.0, 582.0, 579.0, 627.0, 582.0, 584.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [240.0, 219.0, 261.0, 269.0, 288.0, 288.0, 283.0, 293.0, 286.0, 290.0, 293.0, 283.0, 291.0, 282.0, 290.0, 297.0, 308.0, 322.0, 297.0, 282.0, 296.0, 286.0, 288.0, 288.0, 295.0, 284.0, 293.0, 289.0, 294.0, 282.0, 290.0, 297.0, 291.0, 291.0, 316.0, 317.0, 290.0, 292.0, 294.0, 288.0, 293.0, 286.0, 317.0, 319.0, 282.0, 288.0, 317.0, 313.0, 265.0, 257.0, 291.0, 288.0, 294.0, 288.0, 294.0, 285.0, 296.0, 286.0, 290.0, 286.0, 258.0, 261.0, 279.0, 291.0, 287.0, 286.0, 314.0, 319.0, 291.0, 291.0, 322.0, 314.0, 265.0, 260.0, 288.0, 299.0, 6.0, 3.0, 319.0, 308.0, 268.0, 265.0, 280.0, 293.0, 283.0, 296.0, 283.0, 290.0, 292.0, 290.0, 294.0, 288.0, 292.0, 290.0, 283.0, 293.0, 317.0, 313.0, 258.0, 272.0, 274.0, 248.0, 291.0, 291.0, 286.0, 287.0, 295.0, 275.0, 289.0, 293.0, 314.0, 316.0, 294.0, 288.0, 284.0, 298.0, 288.0, 282.0, 291.0, 291.0, 294.0, 282.0, 289.0, 290.0, 292.0, 287.0, 287.0, 286.0, 208.0, 203.0, 226.0, 233.0, 231.0, 234.0, 291.0, 291.0, 251.0, 271.0, 258.0, 267.0, 292.0, 290.0, 306.0, 321.0, 288.0, 288.0, 286.0, 293.0, 261.0, 264.0, 265.0, 257.0, 267.0, 263.0, 267.0, 255.0, 317.0, 322.0, 319.0, 314.0, 293.0, 289.0, 285.0, 294.0, 291.0, 296.0, 263.0, 259.0, 323.0, 307.0, 257.0, 265.0, 289.0, 290.0, 289.0, 290.0, 263.0, 256.0, 286.0, 290.0, 286.0, 293.0, 291.0, 291.0, 269.0, 253.0, 286.0, 296.0, 288.0, 294.0, 288.0, 291.0, 306.0, 321.0, 288.0, 294.0, 289.0, 295.0, 262.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6929093261383754, "mean_inference_ms": 1.235359682696947, "mean_action_processing_ms": 0.13306645221527427, "mean_env_wait_ms": 0.8341424568541566, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 9.0, "episode_reward_mean": 565.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 282.765}, "hist_stats": {"episode_reward": [459.0, 530.0, 576.0, 576.0, 576.0, 576.0, 573.0, 587.0, 630.0, 579.0, 582.0, 576.0, 579.0, 582.0, 576.0, 587.0, 582.0, 633.0, 582.0, 582.0, 579.0, 636.0, 570.0, 630.0, 522.0, 579.0, 582.0, 579.0, 582.0, 576.0, 519.0, 570.0, 573.0, 633.0, 582.0, 636.0, 525.0, 587.0, 9.0, 627.0, 533.0, 573.0, 579.0, 573.0, 582.0, 582.0, 582.0, 576.0, 630.0, 530.0, 522.0, 582.0, 573.0, 570.0, 582.0, 630.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 579.0, 573.0, 411.0, 459.0, 465.0, 582.0, 522.0, 525.0, 582.0, 627.0, 576.0, 579.0, 525.0, 522.0, 530.0, 522.0, 639.0, 633.0, 582.0, 579.0, 587.0, 522.0, 630.0, 522.0, 579.0, 579.0, 519.0, 576.0, 579.0, 582.0, 522.0, 582.0, 582.0, 579.0, 627.0, 582.0, 584.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [240.0, 219.0, 261.0, 269.0, 288.0, 288.0, 283.0, 293.0, 286.0, 290.0, 293.0, 283.0, 291.0, 282.0, 290.0, 297.0, 308.0, 322.0, 297.0, 282.0, 296.0, 286.0, 288.0, 288.0, 295.0, 284.0, 293.0, 289.0, 294.0, 282.0, 290.0, 297.0, 291.0, 291.0, 316.0, 317.0, 290.0, 292.0, 294.0, 288.0, 293.0, 286.0, 317.0, 319.0, 282.0, 288.0, 317.0, 313.0, 265.0, 257.0, 291.0, 288.0, 294.0, 288.0, 294.0, 285.0, 296.0, 286.0, 290.0, 286.0, 258.0, 261.0, 279.0, 291.0, 287.0, 286.0, 314.0, 319.0, 291.0, 291.0, 322.0, 314.0, 265.0, 260.0, 288.0, 299.0, 6.0, 3.0, 319.0, 308.0, 268.0, 265.0, 280.0, 293.0, 283.0, 296.0, 283.0, 290.0, 292.0, 290.0, 294.0, 288.0, 292.0, 290.0, 283.0, 293.0, 317.0, 313.0, 258.0, 272.0, 274.0, 248.0, 291.0, 291.0, 286.0, 287.0, 295.0, 275.0, 289.0, 293.0, 314.0, 316.0, 294.0, 288.0, 284.0, 298.0, 288.0, 282.0, 291.0, 291.0, 294.0, 282.0, 289.0, 290.0, 292.0, 287.0, 287.0, 286.0, 208.0, 203.0, 226.0, 233.0, 231.0, 234.0, 291.0, 291.0, 251.0, 271.0, 258.0, 267.0, 292.0, 290.0, 306.0, 321.0, 288.0, 288.0, 286.0, 293.0, 261.0, 264.0, 265.0, 257.0, 267.0, 263.0, 267.0, 255.0, 317.0, 322.0, 319.0, 314.0, 293.0, 289.0, 285.0, 294.0, 291.0, 296.0, 263.0, 259.0, 323.0, 307.0, 257.0, 265.0, 289.0, 290.0, 289.0, 290.0, 263.0, 256.0, 286.0, 290.0, 286.0, 293.0, 291.0, 291.0, 269.0, 253.0, 286.0, 296.0, 288.0, 294.0, 288.0, 291.0, 306.0, 321.0, 288.0, 294.0, 289.0, 295.0, 262.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6929093261383754, "mean_inference_ms": 1.235359682696947, "mean_action_processing_ms": 0.13306645221527427, "mean_env_wait_ms": 0.8341424568541566, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12416000, "num_agent_steps_trained": 12416000, "num_env_steps_sampled": 6208000, "num_env_steps_trained": 6208000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6208000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12416000, "timers": {"training_iteration_time_ms": 3554.834, "learn_time_ms": 1048.901, "learn_throughput": 12203.248, "synch_weights_time_ms": 11.264}, "counters": {"num_env_steps_sampled": 6208000, "num_env_steps_trained": 6208000, "num_agent_steps_sampled": 12416000, "num_agent_steps_trained": 12416000}, "done": false, "episodes_total": 15520, "training_iteration": 485, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-54", "timestamp": 1666582314, "time_this_iter_s": 3.635488986968994, "time_total_s": 1839.9499444961548, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1839.9499444961548, "timesteps_since_restore": 0, "iterations_since_restore": 485, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.9, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 193.4, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 172.63, "shaped_reward_min": 9, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.36, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.44, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.14, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.19, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.1, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.48, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.81, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.35, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.64, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.61, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.1, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.1, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0027828095480799675, "policy_loss": -0.0031895372085273266, "vf_loss": 7.829740524291992, "vf_explained_var": 0.5534740686416626, "kl": 0.003179178573191166, "entropy": 0.7524949312210083, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6220800, "num_env_steps_trained": 6220800, "num_agent_steps_sampled": 12441600, "num_agent_steps_trained": 12441600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 9.0, "episode_reward_mean": 559.43, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 279.715}, "custom_metrics": {"sparse_reward_mean": 193.4, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 172.63, "shaped_reward_min": 9, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.36, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.44, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.14, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.19, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.1, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.48, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.81, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.35, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.64, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.61, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.1, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.1, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 633.0, 582.0, 636.0, 525.0, 587.0, 9.0, 627.0, 533.0, 573.0, 579.0, 573.0, 582.0, 582.0, 582.0, 576.0, 630.0, 530.0, 522.0, 582.0, 573.0, 570.0, 582.0, 630.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 579.0, 573.0, 411.0, 459.0, 465.0, 582.0, 522.0, 525.0, 582.0, 627.0, 576.0, 579.0, 525.0, 522.0, 530.0, 522.0, 639.0, 633.0, 582.0, 579.0, 587.0, 522.0, 630.0, 522.0, 579.0, 579.0, 519.0, 576.0, 579.0, 582.0, 522.0, 582.0, 582.0, 579.0, 627.0, 582.0, 584.0, 519.0, 573.0, 453.0, 576.0, 525.0, 525.0, 639.0, 465.0, 582.0, 405.0, 522.0, 530.0, 573.0, 582.0, 519.0, 576.0, 630.0, 576.0, 630.0, 627.0, 525.0, 573.0, 579.0, 570.0, 525.0, 573.0, 582.0, 570.0, 579.0, 525.0, 570.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 286.0, 314.0, 319.0, 291.0, 291.0, 322.0, 314.0, 265.0, 260.0, 288.0, 299.0, 6.0, 3.0, 319.0, 308.0, 268.0, 265.0, 280.0, 293.0, 283.0, 296.0, 283.0, 290.0, 292.0, 290.0, 294.0, 288.0, 292.0, 290.0, 283.0, 293.0, 317.0, 313.0, 258.0, 272.0, 274.0, 248.0, 291.0, 291.0, 286.0, 287.0, 295.0, 275.0, 289.0, 293.0, 314.0, 316.0, 294.0, 288.0, 284.0, 298.0, 288.0, 282.0, 291.0, 291.0, 294.0, 282.0, 289.0, 290.0, 292.0, 287.0, 287.0, 286.0, 208.0, 203.0, 226.0, 233.0, 231.0, 234.0, 291.0, 291.0, 251.0, 271.0, 258.0, 267.0, 292.0, 290.0, 306.0, 321.0, 288.0, 288.0, 286.0, 293.0, 261.0, 264.0, 265.0, 257.0, 267.0, 263.0, 267.0, 255.0, 317.0, 322.0, 319.0, 314.0, 293.0, 289.0, 285.0, 294.0, 291.0, 296.0, 263.0, 259.0, 323.0, 307.0, 257.0, 265.0, 289.0, 290.0, 289.0, 290.0, 263.0, 256.0, 286.0, 290.0, 286.0, 293.0, 291.0, 291.0, 269.0, 253.0, 286.0, 296.0, 288.0, 294.0, 288.0, 291.0, 306.0, 321.0, 288.0, 294.0, 289.0, 295.0, 262.0, 257.0, 290.0, 283.0, 224.0, 229.0, 285.0, 291.0, 257.0, 268.0, 260.0, 265.0, 317.0, 322.0, 231.0, 234.0, 291.0, 291.0, 208.0, 197.0, 260.0, 262.0, 265.0, 265.0, 287.0, 286.0, 289.0, 293.0, 252.0, 267.0, 291.0, 285.0, 315.0, 315.0, 292.0, 284.0, 314.0, 316.0, 314.0, 313.0, 258.0, 267.0, 282.0, 291.0, 285.0, 294.0, 288.0, 282.0, 260.0, 265.0, 294.0, 279.0, 288.0, 294.0, 288.0, 282.0, 280.0, 299.0, 261.0, 264.0, 275.0, 295.0, 291.0, 291.0, 291.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6928998996816387, "mean_inference_ms": 1.2352711461877957, "mean_action_processing_ms": 0.13306123880522405, "mean_env_wait_ms": 0.8340857422690848, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 9.0, "episode_reward_mean": 559.43, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 279.715}, "hist_stats": {"episode_reward": [573.0, 633.0, 582.0, 636.0, 525.0, 587.0, 9.0, 627.0, 533.0, 573.0, 579.0, 573.0, 582.0, 582.0, 582.0, 576.0, 630.0, 530.0, 522.0, 582.0, 573.0, 570.0, 582.0, 630.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 579.0, 573.0, 411.0, 459.0, 465.0, 582.0, 522.0, 525.0, 582.0, 627.0, 576.0, 579.0, 525.0, 522.0, 530.0, 522.0, 639.0, 633.0, 582.0, 579.0, 587.0, 522.0, 630.0, 522.0, 579.0, 579.0, 519.0, 576.0, 579.0, 582.0, 522.0, 582.0, 582.0, 579.0, 627.0, 582.0, 584.0, 519.0, 573.0, 453.0, 576.0, 525.0, 525.0, 639.0, 465.0, 582.0, 405.0, 522.0, 530.0, 573.0, 582.0, 519.0, 576.0, 630.0, 576.0, 630.0, 627.0, 525.0, 573.0, 579.0, 570.0, 525.0, 573.0, 582.0, 570.0, 579.0, 525.0, 570.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 286.0, 314.0, 319.0, 291.0, 291.0, 322.0, 314.0, 265.0, 260.0, 288.0, 299.0, 6.0, 3.0, 319.0, 308.0, 268.0, 265.0, 280.0, 293.0, 283.0, 296.0, 283.0, 290.0, 292.0, 290.0, 294.0, 288.0, 292.0, 290.0, 283.0, 293.0, 317.0, 313.0, 258.0, 272.0, 274.0, 248.0, 291.0, 291.0, 286.0, 287.0, 295.0, 275.0, 289.0, 293.0, 314.0, 316.0, 294.0, 288.0, 284.0, 298.0, 288.0, 282.0, 291.0, 291.0, 294.0, 282.0, 289.0, 290.0, 292.0, 287.0, 287.0, 286.0, 208.0, 203.0, 226.0, 233.0, 231.0, 234.0, 291.0, 291.0, 251.0, 271.0, 258.0, 267.0, 292.0, 290.0, 306.0, 321.0, 288.0, 288.0, 286.0, 293.0, 261.0, 264.0, 265.0, 257.0, 267.0, 263.0, 267.0, 255.0, 317.0, 322.0, 319.0, 314.0, 293.0, 289.0, 285.0, 294.0, 291.0, 296.0, 263.0, 259.0, 323.0, 307.0, 257.0, 265.0, 289.0, 290.0, 289.0, 290.0, 263.0, 256.0, 286.0, 290.0, 286.0, 293.0, 291.0, 291.0, 269.0, 253.0, 286.0, 296.0, 288.0, 294.0, 288.0, 291.0, 306.0, 321.0, 288.0, 294.0, 289.0, 295.0, 262.0, 257.0, 290.0, 283.0, 224.0, 229.0, 285.0, 291.0, 257.0, 268.0, 260.0, 265.0, 317.0, 322.0, 231.0, 234.0, 291.0, 291.0, 208.0, 197.0, 260.0, 262.0, 265.0, 265.0, 287.0, 286.0, 289.0, 293.0, 252.0, 267.0, 291.0, 285.0, 315.0, 315.0, 292.0, 284.0, 314.0, 316.0, 314.0, 313.0, 258.0, 267.0, 282.0, 291.0, 285.0, 294.0, 288.0, 282.0, 260.0, 265.0, 294.0, 279.0, 288.0, 294.0, 288.0, 282.0, 280.0, 299.0, 261.0, 264.0, 275.0, 295.0, 291.0, 291.0, 291.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6928998996816387, "mean_inference_ms": 1.2352711461877957, "mean_action_processing_ms": 0.13306123880522405, "mean_env_wait_ms": 0.8340857422690848, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12441600, "num_agent_steps_trained": 12441600, "num_env_steps_sampled": 6220800, "num_env_steps_trained": 6220800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6220800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12441600, "timers": {"training_iteration_time_ms": 3569.324, "learn_time_ms": 1047.351, "learn_throughput": 12221.315, "synch_weights_time_ms": 11.468}, "counters": {"num_env_steps_sampled": 6220800, "num_env_steps_trained": 6220800, "num_agent_steps_sampled": 12441600, "num_agent_steps_trained": 12441600}, "done": false, "episodes_total": 15552, "training_iteration": 486, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-57", "timestamp": 1666582317, "time_this_iter_s": 3.665339469909668, "time_total_s": 1843.6152839660645, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1843.6152839660645, "timesteps_since_restore": 0, "iterations_since_restore": 486, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.933333333333334, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 174.09, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.57, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.59, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.38, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.2, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.9, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 4.84, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.35, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.63, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.2, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.2, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0029020761139690876, "policy_loss": 0.002499224850907922, "vf_loss": 7.774298667907715, "vf_explained_var": 0.5695394277572632, "kl": 0.002982937265187502, "entropy": 0.7491560578346252, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6233600, "num_env_steps_trained": 6233600, "num_agent_steps_sampled": 12467200, "num_agent_steps_trained": 12467200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 405.0, "episode_reward_mean": 563.69, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 197.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 281.845}, "custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 174.09, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.57, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.59, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.38, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.2, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.9, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 4.84, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.35, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.63, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.2, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.2, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [411.0, 459.0, 465.0, 582.0, 522.0, 525.0, 582.0, 627.0, 576.0, 579.0, 525.0, 522.0, 530.0, 522.0, 639.0, 633.0, 582.0, 579.0, 587.0, 522.0, 630.0, 522.0, 579.0, 579.0, 519.0, 576.0, 579.0, 582.0, 522.0, 582.0, 582.0, 579.0, 627.0, 582.0, 584.0, 519.0, 573.0, 453.0, 576.0, 525.0, 525.0, 639.0, 465.0, 582.0, 405.0, 522.0, 530.0, 573.0, 582.0, 519.0, 576.0, 630.0, 576.0, 630.0, 627.0, 525.0, 573.0, 579.0, 570.0, 525.0, 573.0, 582.0, 570.0, 579.0, 525.0, 570.0, 582.0, 576.0, 587.0, 573.0, 582.0, 627.0, 630.0, 579.0, 522.0, 519.0, 582.0, 576.0, 630.0, 630.0, 579.0, 573.0, 527.0, 579.0, 576.0, 582.0, 579.0, 582.0, 573.0, 582.0, 584.0, 587.0, 579.0, 582.0, 587.0, 525.0, 530.0, 525.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [208.0, 203.0, 226.0, 233.0, 231.0, 234.0, 291.0, 291.0, 251.0, 271.0, 258.0, 267.0, 292.0, 290.0, 306.0, 321.0, 288.0, 288.0, 286.0, 293.0, 261.0, 264.0, 265.0, 257.0, 267.0, 263.0, 267.0, 255.0, 317.0, 322.0, 319.0, 314.0, 293.0, 289.0, 285.0, 294.0, 291.0, 296.0, 263.0, 259.0, 323.0, 307.0, 257.0, 265.0, 289.0, 290.0, 289.0, 290.0, 263.0, 256.0, 286.0, 290.0, 286.0, 293.0, 291.0, 291.0, 269.0, 253.0, 286.0, 296.0, 288.0, 294.0, 288.0, 291.0, 306.0, 321.0, 288.0, 294.0, 289.0, 295.0, 262.0, 257.0, 290.0, 283.0, 224.0, 229.0, 285.0, 291.0, 257.0, 268.0, 260.0, 265.0, 317.0, 322.0, 231.0, 234.0, 291.0, 291.0, 208.0, 197.0, 260.0, 262.0, 265.0, 265.0, 287.0, 286.0, 289.0, 293.0, 252.0, 267.0, 291.0, 285.0, 315.0, 315.0, 292.0, 284.0, 314.0, 316.0, 314.0, 313.0, 258.0, 267.0, 282.0, 291.0, 285.0, 294.0, 288.0, 282.0, 260.0, 265.0, 294.0, 279.0, 288.0, 294.0, 288.0, 282.0, 280.0, 299.0, 261.0, 264.0, 275.0, 295.0, 291.0, 291.0, 291.0, 285.0, 298.0, 289.0, 291.0, 282.0, 296.0, 286.0, 317.0, 310.0, 314.0, 316.0, 284.0, 295.0, 262.0, 260.0, 262.0, 257.0, 286.0, 296.0, 286.0, 290.0, 314.0, 316.0, 314.0, 316.0, 286.0, 293.0, 288.0, 285.0, 269.0, 258.0, 281.0, 298.0, 294.0, 282.0, 289.0, 293.0, 292.0, 287.0, 294.0, 288.0, 285.0, 288.0, 291.0, 291.0, 288.0, 296.0, 297.0, 290.0, 289.0, 290.0, 296.0, 286.0, 299.0, 288.0, 260.0, 265.0, 262.0, 268.0, 265.0, 260.0, 288.0, 285.0, 294.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.692892027218891, "mean_inference_ms": 1.235187879018693, "mean_action_processing_ms": 0.13305748815812188, "mean_env_wait_ms": 0.8340378901116045, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 405.0, "episode_reward_mean": 563.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 197.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 281.845}, "hist_stats": {"episode_reward": [411.0, 459.0, 465.0, 582.0, 522.0, 525.0, 582.0, 627.0, 576.0, 579.0, 525.0, 522.0, 530.0, 522.0, 639.0, 633.0, 582.0, 579.0, 587.0, 522.0, 630.0, 522.0, 579.0, 579.0, 519.0, 576.0, 579.0, 582.0, 522.0, 582.0, 582.0, 579.0, 627.0, 582.0, 584.0, 519.0, 573.0, 453.0, 576.0, 525.0, 525.0, 639.0, 465.0, 582.0, 405.0, 522.0, 530.0, 573.0, 582.0, 519.0, 576.0, 630.0, 576.0, 630.0, 627.0, 525.0, 573.0, 579.0, 570.0, 525.0, 573.0, 582.0, 570.0, 579.0, 525.0, 570.0, 582.0, 576.0, 587.0, 573.0, 582.0, 627.0, 630.0, 579.0, 522.0, 519.0, 582.0, 576.0, 630.0, 630.0, 579.0, 573.0, 527.0, 579.0, 576.0, 582.0, 579.0, 582.0, 573.0, 582.0, 584.0, 587.0, 579.0, 582.0, 587.0, 525.0, 530.0, 525.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [208.0, 203.0, 226.0, 233.0, 231.0, 234.0, 291.0, 291.0, 251.0, 271.0, 258.0, 267.0, 292.0, 290.0, 306.0, 321.0, 288.0, 288.0, 286.0, 293.0, 261.0, 264.0, 265.0, 257.0, 267.0, 263.0, 267.0, 255.0, 317.0, 322.0, 319.0, 314.0, 293.0, 289.0, 285.0, 294.0, 291.0, 296.0, 263.0, 259.0, 323.0, 307.0, 257.0, 265.0, 289.0, 290.0, 289.0, 290.0, 263.0, 256.0, 286.0, 290.0, 286.0, 293.0, 291.0, 291.0, 269.0, 253.0, 286.0, 296.0, 288.0, 294.0, 288.0, 291.0, 306.0, 321.0, 288.0, 294.0, 289.0, 295.0, 262.0, 257.0, 290.0, 283.0, 224.0, 229.0, 285.0, 291.0, 257.0, 268.0, 260.0, 265.0, 317.0, 322.0, 231.0, 234.0, 291.0, 291.0, 208.0, 197.0, 260.0, 262.0, 265.0, 265.0, 287.0, 286.0, 289.0, 293.0, 252.0, 267.0, 291.0, 285.0, 315.0, 315.0, 292.0, 284.0, 314.0, 316.0, 314.0, 313.0, 258.0, 267.0, 282.0, 291.0, 285.0, 294.0, 288.0, 282.0, 260.0, 265.0, 294.0, 279.0, 288.0, 294.0, 288.0, 282.0, 280.0, 299.0, 261.0, 264.0, 275.0, 295.0, 291.0, 291.0, 291.0, 285.0, 298.0, 289.0, 291.0, 282.0, 296.0, 286.0, 317.0, 310.0, 314.0, 316.0, 284.0, 295.0, 262.0, 260.0, 262.0, 257.0, 286.0, 296.0, 286.0, 290.0, 314.0, 316.0, 314.0, 316.0, 286.0, 293.0, 288.0, 285.0, 269.0, 258.0, 281.0, 298.0, 294.0, 282.0, 289.0, 293.0, 292.0, 287.0, 294.0, 288.0, 285.0, 288.0, 291.0, 291.0, 288.0, 296.0, 297.0, 290.0, 289.0, 290.0, 296.0, 286.0, 299.0, 288.0, 260.0, 265.0, 262.0, 268.0, 265.0, 260.0, 288.0, 285.0, 294.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.692892027218891, "mean_inference_ms": 1.235187879018693, "mean_action_processing_ms": 0.13305748815812188, "mean_env_wait_ms": 0.8340378901116045, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12467200, "num_agent_steps_trained": 12467200, "num_env_steps_sampled": 6233600, "num_env_steps_trained": 6233600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6233600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12467200, "timers": {"training_iteration_time_ms": 3579.273, "learn_time_ms": 1045.789, "learn_throughput": 12239.561, "synch_weights_time_ms": 11.168}, "counters": {"num_env_steps_sampled": 6233600, "num_env_steps_trained": 6233600, "num_agent_steps_sampled": 12467200, "num_agent_steps_trained": 12467200}, "done": false, "episodes_total": 15584, "training_iteration": 487, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-01", "timestamp": 1666582321, "time_this_iter_s": 3.6495282649993896, "time_total_s": 1847.2648122310638, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1847.2648122310638, "timesteps_since_restore": 0, "iterations_since_restore": 487, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.580000000000002, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.64, "shaped_reward_min": 113, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.39, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.72, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.3, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.59, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.39, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.02, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 4.92, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.29, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.8, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.74, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.39, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.39, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002688134554773569, "policy_loss": -0.0030803410336375237, "vf_loss": 7.730833053588867, "vf_explained_var": 0.5763280391693115, "kl": 0.0021167888771742582, "entropy": 0.7617533206939697, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6246400, "num_env_steps_trained": 6246400, "num_agent_steps_sampled": 12492800, "num_agent_steps_trained": 12492800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 353.0, "episode_reward_mean": 565.44, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 176.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 282.72}, "custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.64, "shaped_reward_min": 113, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.39, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.72, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.3, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.59, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.39, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.02, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 4.92, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.29, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.8, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.74, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.39, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.39, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 582.0, 584.0, 519.0, 573.0, 453.0, 576.0, 525.0, 525.0, 639.0, 465.0, 582.0, 405.0, 522.0, 530.0, 573.0, 582.0, 519.0, 576.0, 630.0, 576.0, 630.0, 627.0, 525.0, 573.0, 579.0, 570.0, 525.0, 573.0, 582.0, 570.0, 579.0, 525.0, 570.0, 582.0, 576.0, 587.0, 573.0, 582.0, 627.0, 630.0, 579.0, 522.0, 519.0, 582.0, 576.0, 630.0, 630.0, 579.0, 573.0, 527.0, 579.0, 576.0, 582.0, 579.0, 582.0, 573.0, 582.0, 584.0, 587.0, 579.0, 582.0, 587.0, 525.0, 530.0, 525.0, 573.0, 579.0, 636.0, 579.0, 525.0, 627.0, 530.0, 353.0, 582.0, 522.0, 582.0, 579.0, 573.0, 587.0, 584.0, 627.0, 627.0, 522.0, 579.0, 576.0, 525.0, 582.0, 516.0, 582.0, 525.0, 582.0, 579.0, 530.0, 530.0, 576.0, 587.0, 516.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [306.0, 321.0, 288.0, 294.0, 289.0, 295.0, 262.0, 257.0, 290.0, 283.0, 224.0, 229.0, 285.0, 291.0, 257.0, 268.0, 260.0, 265.0, 317.0, 322.0, 231.0, 234.0, 291.0, 291.0, 208.0, 197.0, 260.0, 262.0, 265.0, 265.0, 287.0, 286.0, 289.0, 293.0, 252.0, 267.0, 291.0, 285.0, 315.0, 315.0, 292.0, 284.0, 314.0, 316.0, 314.0, 313.0, 258.0, 267.0, 282.0, 291.0, 285.0, 294.0, 288.0, 282.0, 260.0, 265.0, 294.0, 279.0, 288.0, 294.0, 288.0, 282.0, 280.0, 299.0, 261.0, 264.0, 275.0, 295.0, 291.0, 291.0, 291.0, 285.0, 298.0, 289.0, 291.0, 282.0, 296.0, 286.0, 317.0, 310.0, 314.0, 316.0, 284.0, 295.0, 262.0, 260.0, 262.0, 257.0, 286.0, 296.0, 286.0, 290.0, 314.0, 316.0, 314.0, 316.0, 286.0, 293.0, 288.0, 285.0, 269.0, 258.0, 281.0, 298.0, 294.0, 282.0, 289.0, 293.0, 292.0, 287.0, 294.0, 288.0, 285.0, 288.0, 291.0, 291.0, 288.0, 296.0, 297.0, 290.0, 289.0, 290.0, 296.0, 286.0, 299.0, 288.0, 260.0, 265.0, 262.0, 268.0, 265.0, 260.0, 288.0, 285.0, 294.0, 285.0, 319.0, 317.0, 290.0, 289.0, 262.0, 263.0, 309.0, 318.0, 264.0, 266.0, 177.0, 176.0, 293.0, 289.0, 268.0, 254.0, 293.0, 289.0, 290.0, 289.0, 287.0, 286.0, 296.0, 291.0, 285.0, 299.0, 311.0, 316.0, 306.0, 321.0, 257.0, 265.0, 291.0, 288.0, 289.0, 287.0, 259.0, 266.0, 294.0, 288.0, 261.0, 255.0, 292.0, 290.0, 252.0, 273.0, 292.0, 290.0, 288.0, 291.0, 273.0, 257.0, 270.0, 260.0, 290.0, 286.0, 291.0, 296.0, 259.0, 257.0, 290.0, 286.0, 283.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6928745322580707, "mean_inference_ms": 1.2351036526199097, "mean_action_processing_ms": 0.13305333665262153, "mean_env_wait_ms": 0.8339880958234599, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 353.0, "episode_reward_mean": 565.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 176.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 282.72}, "hist_stats": {"episode_reward": [627.0, 582.0, 584.0, 519.0, 573.0, 453.0, 576.0, 525.0, 525.0, 639.0, 465.0, 582.0, 405.0, 522.0, 530.0, 573.0, 582.0, 519.0, 576.0, 630.0, 576.0, 630.0, 627.0, 525.0, 573.0, 579.0, 570.0, 525.0, 573.0, 582.0, 570.0, 579.0, 525.0, 570.0, 582.0, 576.0, 587.0, 573.0, 582.0, 627.0, 630.0, 579.0, 522.0, 519.0, 582.0, 576.0, 630.0, 630.0, 579.0, 573.0, 527.0, 579.0, 576.0, 582.0, 579.0, 582.0, 573.0, 582.0, 584.0, 587.0, 579.0, 582.0, 587.0, 525.0, 530.0, 525.0, 573.0, 579.0, 636.0, 579.0, 525.0, 627.0, 530.0, 353.0, 582.0, 522.0, 582.0, 579.0, 573.0, 587.0, 584.0, 627.0, 627.0, 522.0, 579.0, 576.0, 525.0, 582.0, 516.0, 582.0, 525.0, 582.0, 579.0, 530.0, 530.0, 576.0, 587.0, 516.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [306.0, 321.0, 288.0, 294.0, 289.0, 295.0, 262.0, 257.0, 290.0, 283.0, 224.0, 229.0, 285.0, 291.0, 257.0, 268.0, 260.0, 265.0, 317.0, 322.0, 231.0, 234.0, 291.0, 291.0, 208.0, 197.0, 260.0, 262.0, 265.0, 265.0, 287.0, 286.0, 289.0, 293.0, 252.0, 267.0, 291.0, 285.0, 315.0, 315.0, 292.0, 284.0, 314.0, 316.0, 314.0, 313.0, 258.0, 267.0, 282.0, 291.0, 285.0, 294.0, 288.0, 282.0, 260.0, 265.0, 294.0, 279.0, 288.0, 294.0, 288.0, 282.0, 280.0, 299.0, 261.0, 264.0, 275.0, 295.0, 291.0, 291.0, 291.0, 285.0, 298.0, 289.0, 291.0, 282.0, 296.0, 286.0, 317.0, 310.0, 314.0, 316.0, 284.0, 295.0, 262.0, 260.0, 262.0, 257.0, 286.0, 296.0, 286.0, 290.0, 314.0, 316.0, 314.0, 316.0, 286.0, 293.0, 288.0, 285.0, 269.0, 258.0, 281.0, 298.0, 294.0, 282.0, 289.0, 293.0, 292.0, 287.0, 294.0, 288.0, 285.0, 288.0, 291.0, 291.0, 288.0, 296.0, 297.0, 290.0, 289.0, 290.0, 296.0, 286.0, 299.0, 288.0, 260.0, 265.0, 262.0, 268.0, 265.0, 260.0, 288.0, 285.0, 294.0, 285.0, 319.0, 317.0, 290.0, 289.0, 262.0, 263.0, 309.0, 318.0, 264.0, 266.0, 177.0, 176.0, 293.0, 289.0, 268.0, 254.0, 293.0, 289.0, 290.0, 289.0, 287.0, 286.0, 296.0, 291.0, 285.0, 299.0, 311.0, 316.0, 306.0, 321.0, 257.0, 265.0, 291.0, 288.0, 289.0, 287.0, 259.0, 266.0, 294.0, 288.0, 261.0, 255.0, 292.0, 290.0, 252.0, 273.0, 292.0, 290.0, 288.0, 291.0, 273.0, 257.0, 270.0, 260.0, 290.0, 286.0, 291.0, 296.0, 259.0, 257.0, 290.0, 286.0, 283.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6928745322580707, "mean_inference_ms": 1.2351036526199097, "mean_action_processing_ms": 0.13305333665262153, "mean_env_wait_ms": 0.8339880958234599, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12492800, "num_agent_steps_trained": 12492800, "num_env_steps_sampled": 6246400, "num_env_steps_trained": 6246400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6246400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12492800, "timers": {"training_iteration_time_ms": 3568.033, "learn_time_ms": 1043.184, "learn_throughput": 12270.124, "synch_weights_time_ms": 12.104}, "counters": {"num_env_steps_sampled": 6246400, "num_env_steps_trained": 6246400, "num_agent_steps_sampled": 12492800, "num_agent_steps_trained": 12492800}, "done": false, "episodes_total": 15616, "training_iteration": 488, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-05", "timestamp": 1666582325, "time_this_iter_s": 3.5665717124938965, "time_total_s": 1850.8313839435577, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1850.8313839435577, "timesteps_since_restore": 0, "iterations_since_restore": 488, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.72, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 175.19, "shaped_reward_min": 113, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.52, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.71, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.6, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.4, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.01, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.39, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 4.89, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.29, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.8, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.73, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.4, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.4, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0014873126056045294, "policy_loss": -0.0018815764924511313, "vf_loss": 7.711610317230225, "vf_explained_var": 0.5623108148574829, "kl": 0.0029111807234585285, "entropy": 0.7537927031517029, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6259200, "num_env_steps_trained": 6259200, "num_agent_steps_sampled": 12518400, "num_agent_steps_trained": 12518400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 353.0, "episode_reward_mean": 567.19, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 176.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.595}, "custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 175.19, "shaped_reward_min": 113, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.52, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.71, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.6, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.4, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.01, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.39, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 4.89, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.29, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.8, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.73, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.4, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.4, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 570.0, 582.0, 576.0, 587.0, 573.0, 582.0, 627.0, 630.0, 579.0, 522.0, 519.0, 582.0, 576.0, 630.0, 630.0, 579.0, 573.0, 527.0, 579.0, 576.0, 582.0, 579.0, 582.0, 573.0, 582.0, 584.0, 587.0, 579.0, 582.0, 587.0, 525.0, 530.0, 525.0, 573.0, 579.0, 636.0, 579.0, 525.0, 627.0, 530.0, 353.0, 582.0, 522.0, 582.0, 579.0, 573.0, 587.0, 584.0, 627.0, 627.0, 522.0, 579.0, 576.0, 525.0, 582.0, 516.0, 582.0, 525.0, 582.0, 579.0, 530.0, 530.0, 576.0, 587.0, 516.0, 576.0, 579.0, 573.0, 513.0, 522.0, 522.0, 582.0, 576.0, 579.0, 519.0, 513.0, 530.0, 579.0, 579.0, 573.0, 570.0, 582.0, 582.0, 570.0, 582.0, 513.0, 576.0, 582.0, 570.0, 579.0, 573.0, 519.0, 582.0, 579.0, 579.0, 582.0, 630.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 264.0, 275.0, 295.0, 291.0, 291.0, 291.0, 285.0, 298.0, 289.0, 291.0, 282.0, 296.0, 286.0, 317.0, 310.0, 314.0, 316.0, 284.0, 295.0, 262.0, 260.0, 262.0, 257.0, 286.0, 296.0, 286.0, 290.0, 314.0, 316.0, 314.0, 316.0, 286.0, 293.0, 288.0, 285.0, 269.0, 258.0, 281.0, 298.0, 294.0, 282.0, 289.0, 293.0, 292.0, 287.0, 294.0, 288.0, 285.0, 288.0, 291.0, 291.0, 288.0, 296.0, 297.0, 290.0, 289.0, 290.0, 296.0, 286.0, 299.0, 288.0, 260.0, 265.0, 262.0, 268.0, 265.0, 260.0, 288.0, 285.0, 294.0, 285.0, 319.0, 317.0, 290.0, 289.0, 262.0, 263.0, 309.0, 318.0, 264.0, 266.0, 177.0, 176.0, 293.0, 289.0, 268.0, 254.0, 293.0, 289.0, 290.0, 289.0, 287.0, 286.0, 296.0, 291.0, 285.0, 299.0, 311.0, 316.0, 306.0, 321.0, 257.0, 265.0, 291.0, 288.0, 289.0, 287.0, 259.0, 266.0, 294.0, 288.0, 261.0, 255.0, 292.0, 290.0, 252.0, 273.0, 292.0, 290.0, 288.0, 291.0, 273.0, 257.0, 270.0, 260.0, 290.0, 286.0, 291.0, 296.0, 259.0, 257.0, 290.0, 286.0, 283.0, 296.0, 282.0, 291.0, 257.0, 256.0, 261.0, 261.0, 260.0, 262.0, 287.0, 295.0, 290.0, 286.0, 291.0, 288.0, 251.0, 268.0, 253.0, 260.0, 267.0, 263.0, 289.0, 290.0, 286.0, 293.0, 286.0, 287.0, 285.0, 285.0, 289.0, 293.0, 291.0, 291.0, 288.0, 282.0, 288.0, 294.0, 262.0, 251.0, 285.0, 291.0, 288.0, 294.0, 285.0, 285.0, 296.0, 283.0, 288.0, 285.0, 255.0, 264.0, 294.0, 288.0, 289.0, 290.0, 284.0, 295.0, 292.0, 290.0, 321.0, 309.0, 286.0, 296.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6928325929990626, "mean_inference_ms": 1.23501652551429, "mean_action_processing_ms": 0.1330490622589401, "mean_env_wait_ms": 0.8339361870383712, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 353.0, "episode_reward_mean": 567.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 176.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.595}, "hist_stats": {"episode_reward": [525.0, 570.0, 582.0, 576.0, 587.0, 573.0, 582.0, 627.0, 630.0, 579.0, 522.0, 519.0, 582.0, 576.0, 630.0, 630.0, 579.0, 573.0, 527.0, 579.0, 576.0, 582.0, 579.0, 582.0, 573.0, 582.0, 584.0, 587.0, 579.0, 582.0, 587.0, 525.0, 530.0, 525.0, 573.0, 579.0, 636.0, 579.0, 525.0, 627.0, 530.0, 353.0, 582.0, 522.0, 582.0, 579.0, 573.0, 587.0, 584.0, 627.0, 627.0, 522.0, 579.0, 576.0, 525.0, 582.0, 516.0, 582.0, 525.0, 582.0, 579.0, 530.0, 530.0, 576.0, 587.0, 516.0, 576.0, 579.0, 573.0, 513.0, 522.0, 522.0, 582.0, 576.0, 579.0, 519.0, 513.0, 530.0, 579.0, 579.0, 573.0, 570.0, 582.0, 582.0, 570.0, 582.0, 513.0, 576.0, 582.0, 570.0, 579.0, 573.0, 519.0, 582.0, 579.0, 579.0, 582.0, 630.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 264.0, 275.0, 295.0, 291.0, 291.0, 291.0, 285.0, 298.0, 289.0, 291.0, 282.0, 296.0, 286.0, 317.0, 310.0, 314.0, 316.0, 284.0, 295.0, 262.0, 260.0, 262.0, 257.0, 286.0, 296.0, 286.0, 290.0, 314.0, 316.0, 314.0, 316.0, 286.0, 293.0, 288.0, 285.0, 269.0, 258.0, 281.0, 298.0, 294.0, 282.0, 289.0, 293.0, 292.0, 287.0, 294.0, 288.0, 285.0, 288.0, 291.0, 291.0, 288.0, 296.0, 297.0, 290.0, 289.0, 290.0, 296.0, 286.0, 299.0, 288.0, 260.0, 265.0, 262.0, 268.0, 265.0, 260.0, 288.0, 285.0, 294.0, 285.0, 319.0, 317.0, 290.0, 289.0, 262.0, 263.0, 309.0, 318.0, 264.0, 266.0, 177.0, 176.0, 293.0, 289.0, 268.0, 254.0, 293.0, 289.0, 290.0, 289.0, 287.0, 286.0, 296.0, 291.0, 285.0, 299.0, 311.0, 316.0, 306.0, 321.0, 257.0, 265.0, 291.0, 288.0, 289.0, 287.0, 259.0, 266.0, 294.0, 288.0, 261.0, 255.0, 292.0, 290.0, 252.0, 273.0, 292.0, 290.0, 288.0, 291.0, 273.0, 257.0, 270.0, 260.0, 290.0, 286.0, 291.0, 296.0, 259.0, 257.0, 290.0, 286.0, 283.0, 296.0, 282.0, 291.0, 257.0, 256.0, 261.0, 261.0, 260.0, 262.0, 287.0, 295.0, 290.0, 286.0, 291.0, 288.0, 251.0, 268.0, 253.0, 260.0, 267.0, 263.0, 289.0, 290.0, 286.0, 293.0, 286.0, 287.0, 285.0, 285.0, 289.0, 293.0, 291.0, 291.0, 288.0, 282.0, 288.0, 294.0, 262.0, 251.0, 285.0, 291.0, 288.0, 294.0, 285.0, 285.0, 296.0, 283.0, 288.0, 285.0, 255.0, 264.0, 294.0, 288.0, 289.0, 290.0, 284.0, 295.0, 292.0, 290.0, 321.0, 309.0, 286.0, 296.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6928325929990626, "mean_inference_ms": 1.23501652551429, "mean_action_processing_ms": 0.1330490622589401, "mean_env_wait_ms": 0.8339361870383712, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12518400, "num_agent_steps_trained": 12518400, "num_env_steps_sampled": 6259200, "num_env_steps_trained": 6259200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6259200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12518400, "timers": {"training_iteration_time_ms": 3537.262, "learn_time_ms": 1041.837, "learn_throughput": 12285.989, "synch_weights_time_ms": 12.137}, "counters": {"num_env_steps_sampled": 6259200, "num_env_steps_trained": 6259200, "num_agent_steps_sampled": 12518400, "num_agent_steps_trained": 12518400}, "done": false, "episodes_total": 15648, "training_iteration": 489, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-09", "timestamp": 1666582329, "time_this_iter_s": 3.4958958625793457, "time_total_s": 1854.327279806137, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1854.327279806137, "timesteps_since_restore": 0, "iterations_since_restore": 489, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.479999999999997, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 192.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 172.43, "shaped_reward_min": 113, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.09, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.53, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.99, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.46, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 15.86, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.32, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.85, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.75, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.69, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.86, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.32, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.86, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.32, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0012956246500834823, "policy_loss": -0.0016962646041065454, "vf_loss": 7.817093372344971, "vf_explained_var": 0.5554914474487305, "kl": 0.0026428524870425463, "entropy": 0.7621381878852844, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6272000, "num_env_steps_trained": 6272000, "num_agent_steps_sampled": 12544000, "num_agent_steps_trained": 12544000}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 353.0, "episode_reward_mean": 556.83, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 164.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 278.415}, "custom_metrics": {"sparse_reward_mean": 192.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 172.43, "shaped_reward_min": 113, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.09, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.53, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.99, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.46, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 15.86, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.32, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.85, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.75, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.69, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.86, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.32, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.86, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.32, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 525.0, 573.0, 579.0, 636.0, 579.0, 525.0, 627.0, 530.0, 353.0, 582.0, 522.0, 582.0, 579.0, 573.0, 587.0, 584.0, 627.0, 627.0, 522.0, 579.0, 576.0, 525.0, 582.0, 516.0, 582.0, 525.0, 582.0, 579.0, 530.0, 530.0, 576.0, 587.0, 516.0, 576.0, 579.0, 573.0, 513.0, 522.0, 522.0, 582.0, 576.0, 579.0, 519.0, 513.0, 530.0, 579.0, 579.0, 573.0, 570.0, 582.0, 582.0, 570.0, 582.0, 513.0, 576.0, 582.0, 570.0, 579.0, 573.0, 519.0, 582.0, 579.0, 579.0, 582.0, 630.0, 582.0, 579.0, 576.0, 587.0, 579.0, 582.0, 465.0, 633.0, 525.0, 462.0, 525.0, 530.0, 525.0, 468.0, 522.0, 582.0, 530.0, 530.0, 525.0, 630.0, 627.0, 519.0, 468.0, 522.0, 582.0, 579.0, 522.0, 353.0, 530.0, 525.0, 630.0, 636.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 268.0, 265.0, 260.0, 288.0, 285.0, 294.0, 285.0, 319.0, 317.0, 290.0, 289.0, 262.0, 263.0, 309.0, 318.0, 264.0, 266.0, 177.0, 176.0, 293.0, 289.0, 268.0, 254.0, 293.0, 289.0, 290.0, 289.0, 287.0, 286.0, 296.0, 291.0, 285.0, 299.0, 311.0, 316.0, 306.0, 321.0, 257.0, 265.0, 291.0, 288.0, 289.0, 287.0, 259.0, 266.0, 294.0, 288.0, 261.0, 255.0, 292.0, 290.0, 252.0, 273.0, 292.0, 290.0, 288.0, 291.0, 273.0, 257.0, 270.0, 260.0, 290.0, 286.0, 291.0, 296.0, 259.0, 257.0, 290.0, 286.0, 283.0, 296.0, 282.0, 291.0, 257.0, 256.0, 261.0, 261.0, 260.0, 262.0, 287.0, 295.0, 290.0, 286.0, 291.0, 288.0, 251.0, 268.0, 253.0, 260.0, 267.0, 263.0, 289.0, 290.0, 286.0, 293.0, 286.0, 287.0, 285.0, 285.0, 289.0, 293.0, 291.0, 291.0, 288.0, 282.0, 288.0, 294.0, 262.0, 251.0, 285.0, 291.0, 288.0, 294.0, 285.0, 285.0, 296.0, 283.0, 288.0, 285.0, 255.0, 264.0, 294.0, 288.0, 289.0, 290.0, 284.0, 295.0, 292.0, 290.0, 321.0, 309.0, 286.0, 296.0, 286.0, 293.0, 282.0, 294.0, 291.0, 296.0, 284.0, 295.0, 294.0, 288.0, 228.0, 237.0, 319.0, 314.0, 268.0, 257.0, 225.0, 237.0, 264.0, 261.0, 266.0, 264.0, 264.0, 261.0, 231.0, 237.0, 265.0, 257.0, 292.0, 290.0, 257.0, 273.0, 261.0, 269.0, 257.0, 268.0, 312.0, 318.0, 309.0, 318.0, 259.0, 260.0, 239.0, 229.0, 260.0, 262.0, 294.0, 288.0, 289.0, 290.0, 268.0, 254.0, 189.0, 164.0, 259.0, 271.0, 265.0, 260.0, 319.0, 311.0, 316.0, 320.0, 289.0, 290.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6927798074894155, "mean_inference_ms": 1.2349156682504754, "mean_action_processing_ms": 0.13304283052223212, "mean_env_wait_ms": 0.8338676264458542, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 353.0, "episode_reward_mean": 556.83, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 164.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 278.415}, "hist_stats": {"episode_reward": [530.0, 525.0, 573.0, 579.0, 636.0, 579.0, 525.0, 627.0, 530.0, 353.0, 582.0, 522.0, 582.0, 579.0, 573.0, 587.0, 584.0, 627.0, 627.0, 522.0, 579.0, 576.0, 525.0, 582.0, 516.0, 582.0, 525.0, 582.0, 579.0, 530.0, 530.0, 576.0, 587.0, 516.0, 576.0, 579.0, 573.0, 513.0, 522.0, 522.0, 582.0, 576.0, 579.0, 519.0, 513.0, 530.0, 579.0, 579.0, 573.0, 570.0, 582.0, 582.0, 570.0, 582.0, 513.0, 576.0, 582.0, 570.0, 579.0, 573.0, 519.0, 582.0, 579.0, 579.0, 582.0, 630.0, 582.0, 579.0, 576.0, 587.0, 579.0, 582.0, 465.0, 633.0, 525.0, 462.0, 525.0, 530.0, 525.0, 468.0, 522.0, 582.0, 530.0, 530.0, 525.0, 630.0, 627.0, 519.0, 468.0, 522.0, 582.0, 579.0, 522.0, 353.0, 530.0, 525.0, 630.0, 636.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 268.0, 265.0, 260.0, 288.0, 285.0, 294.0, 285.0, 319.0, 317.0, 290.0, 289.0, 262.0, 263.0, 309.0, 318.0, 264.0, 266.0, 177.0, 176.0, 293.0, 289.0, 268.0, 254.0, 293.0, 289.0, 290.0, 289.0, 287.0, 286.0, 296.0, 291.0, 285.0, 299.0, 311.0, 316.0, 306.0, 321.0, 257.0, 265.0, 291.0, 288.0, 289.0, 287.0, 259.0, 266.0, 294.0, 288.0, 261.0, 255.0, 292.0, 290.0, 252.0, 273.0, 292.0, 290.0, 288.0, 291.0, 273.0, 257.0, 270.0, 260.0, 290.0, 286.0, 291.0, 296.0, 259.0, 257.0, 290.0, 286.0, 283.0, 296.0, 282.0, 291.0, 257.0, 256.0, 261.0, 261.0, 260.0, 262.0, 287.0, 295.0, 290.0, 286.0, 291.0, 288.0, 251.0, 268.0, 253.0, 260.0, 267.0, 263.0, 289.0, 290.0, 286.0, 293.0, 286.0, 287.0, 285.0, 285.0, 289.0, 293.0, 291.0, 291.0, 288.0, 282.0, 288.0, 294.0, 262.0, 251.0, 285.0, 291.0, 288.0, 294.0, 285.0, 285.0, 296.0, 283.0, 288.0, 285.0, 255.0, 264.0, 294.0, 288.0, 289.0, 290.0, 284.0, 295.0, 292.0, 290.0, 321.0, 309.0, 286.0, 296.0, 286.0, 293.0, 282.0, 294.0, 291.0, 296.0, 284.0, 295.0, 294.0, 288.0, 228.0, 237.0, 319.0, 314.0, 268.0, 257.0, 225.0, 237.0, 264.0, 261.0, 266.0, 264.0, 264.0, 261.0, 231.0, 237.0, 265.0, 257.0, 292.0, 290.0, 257.0, 273.0, 261.0, 269.0, 257.0, 268.0, 312.0, 318.0, 309.0, 318.0, 259.0, 260.0, 239.0, 229.0, 260.0, 262.0, 294.0, 288.0, 289.0, 290.0, 268.0, 254.0, 189.0, 164.0, 259.0, 271.0, 265.0, 260.0, 319.0, 311.0, 316.0, 320.0, 289.0, 290.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6927798074894155, "mean_inference_ms": 1.2349156682504754, "mean_action_processing_ms": 0.13304283052223212, "mean_env_wait_ms": 0.8338676264458542, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12544000, "num_agent_steps_trained": 12544000, "num_env_steps_sampled": 6272000, "num_env_steps_trained": 6272000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6272000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12544000, "timers": {"training_iteration_time_ms": 3527.969, "learn_time_ms": 1049.929, "learn_throughput": 12191.303, "synch_weights_time_ms": 11.408}, "counters": {"num_env_steps_sampled": 6272000, "num_env_steps_trained": 6272000, "num_agent_steps_sampled": 12544000, "num_agent_steps_trained": 12544000}, "done": false, "episodes_total": 15680, "training_iteration": 490, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-12", "timestamp": 1666582332, "time_this_iter_s": 3.6058645248413086, "time_total_s": 1857.9331443309784, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1857.9331443309784, "timesteps_since_restore": 0, "iterations_since_restore": 490, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.7, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 189.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 169.77, "shaped_reward_min": 94, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.0, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.23, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.87, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.13, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 15.75, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.0, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.9, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.71, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.63, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.57, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.75, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.0, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.75, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.0, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0024327579885721207, "policy_loss": -0.002835115185007453, "vf_loss": 7.80023193359375, "vf_explained_var": 0.5509117841720581, "kl": 0.003577027004212141, "entropy": 0.7553344964981079, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6284800, "num_env_steps_trained": 6284800, "num_agent_steps_sampled": 12569600, "num_agent_steps_trained": 12569600}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 294.0, "episode_reward_mean": 548.57, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 141.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 274.285}, "custom_metrics": {"sparse_reward_mean": 189.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 169.77, "shaped_reward_min": 94, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.0, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.23, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.87, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.13, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 15.75, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.0, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.9, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.71, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.63, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.57, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.75, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.0, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.75, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.0, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 516.0, 576.0, 579.0, 573.0, 513.0, 522.0, 522.0, 582.0, 576.0, 579.0, 519.0, 513.0, 530.0, 579.0, 579.0, 573.0, 570.0, 582.0, 582.0, 570.0, 582.0, 513.0, 576.0, 582.0, 570.0, 579.0, 573.0, 519.0, 582.0, 579.0, 579.0, 582.0, 630.0, 582.0, 579.0, 576.0, 587.0, 579.0, 582.0, 465.0, 633.0, 525.0, 462.0, 525.0, 530.0, 525.0, 468.0, 522.0, 582.0, 530.0, 530.0, 525.0, 630.0, 627.0, 519.0, 468.0, 522.0, 582.0, 579.0, 522.0, 353.0, 530.0, 525.0, 630.0, 636.0, 579.0, 582.0, 573.0, 294.0, 510.0, 570.0, 573.0, 522.0, 530.0, 525.0, 573.0, 582.0, 582.0, 522.0, 468.0, 294.0, 582.0, 576.0, 473.0, 576.0, 539.0, 510.0, 579.0, 573.0, 587.0, 573.0, 579.0, 519.0, 522.0, 582.0, 582.0, 530.0, 579.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 296.0, 259.0, 257.0, 290.0, 286.0, 283.0, 296.0, 282.0, 291.0, 257.0, 256.0, 261.0, 261.0, 260.0, 262.0, 287.0, 295.0, 290.0, 286.0, 291.0, 288.0, 251.0, 268.0, 253.0, 260.0, 267.0, 263.0, 289.0, 290.0, 286.0, 293.0, 286.0, 287.0, 285.0, 285.0, 289.0, 293.0, 291.0, 291.0, 288.0, 282.0, 288.0, 294.0, 262.0, 251.0, 285.0, 291.0, 288.0, 294.0, 285.0, 285.0, 296.0, 283.0, 288.0, 285.0, 255.0, 264.0, 294.0, 288.0, 289.0, 290.0, 284.0, 295.0, 292.0, 290.0, 321.0, 309.0, 286.0, 296.0, 286.0, 293.0, 282.0, 294.0, 291.0, 296.0, 284.0, 295.0, 294.0, 288.0, 228.0, 237.0, 319.0, 314.0, 268.0, 257.0, 225.0, 237.0, 264.0, 261.0, 266.0, 264.0, 264.0, 261.0, 231.0, 237.0, 265.0, 257.0, 292.0, 290.0, 257.0, 273.0, 261.0, 269.0, 257.0, 268.0, 312.0, 318.0, 309.0, 318.0, 259.0, 260.0, 239.0, 229.0, 260.0, 262.0, 294.0, 288.0, 289.0, 290.0, 268.0, 254.0, 189.0, 164.0, 259.0, 271.0, 265.0, 260.0, 319.0, 311.0, 316.0, 320.0, 289.0, 290.0, 291.0, 291.0, 285.0, 288.0, 149.0, 145.0, 263.0, 247.0, 288.0, 282.0, 285.0, 288.0, 259.0, 263.0, 264.0, 266.0, 260.0, 265.0, 282.0, 291.0, 291.0, 291.0, 291.0, 291.0, 263.0, 259.0, 230.0, 238.0, 141.0, 153.0, 286.0, 296.0, 288.0, 288.0, 240.0, 233.0, 291.0, 285.0, 274.0, 265.0, 251.0, 259.0, 293.0, 286.0, 290.0, 283.0, 299.0, 288.0, 286.0, 287.0, 291.0, 288.0, 267.0, 252.0, 260.0, 262.0, 283.0, 299.0, 293.0, 289.0, 259.0, 271.0, 286.0, 293.0, 264.0, 255.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6927431395642533, "mean_inference_ms": 1.234820555408916, "mean_action_processing_ms": 0.13303677357463245, "mean_env_wait_ms": 0.8338055430125193, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 294.0, "episode_reward_mean": 548.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 141.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 274.285}, "hist_stats": {"episode_reward": [587.0, 516.0, 576.0, 579.0, 573.0, 513.0, 522.0, 522.0, 582.0, 576.0, 579.0, 519.0, 513.0, 530.0, 579.0, 579.0, 573.0, 570.0, 582.0, 582.0, 570.0, 582.0, 513.0, 576.0, 582.0, 570.0, 579.0, 573.0, 519.0, 582.0, 579.0, 579.0, 582.0, 630.0, 582.0, 579.0, 576.0, 587.0, 579.0, 582.0, 465.0, 633.0, 525.0, 462.0, 525.0, 530.0, 525.0, 468.0, 522.0, 582.0, 530.0, 530.0, 525.0, 630.0, 627.0, 519.0, 468.0, 522.0, 582.0, 579.0, 522.0, 353.0, 530.0, 525.0, 630.0, 636.0, 579.0, 582.0, 573.0, 294.0, 510.0, 570.0, 573.0, 522.0, 530.0, 525.0, 573.0, 582.0, 582.0, 522.0, 468.0, 294.0, 582.0, 576.0, 473.0, 576.0, 539.0, 510.0, 579.0, 573.0, 587.0, 573.0, 579.0, 519.0, 522.0, 582.0, 582.0, 530.0, 579.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 296.0, 259.0, 257.0, 290.0, 286.0, 283.0, 296.0, 282.0, 291.0, 257.0, 256.0, 261.0, 261.0, 260.0, 262.0, 287.0, 295.0, 290.0, 286.0, 291.0, 288.0, 251.0, 268.0, 253.0, 260.0, 267.0, 263.0, 289.0, 290.0, 286.0, 293.0, 286.0, 287.0, 285.0, 285.0, 289.0, 293.0, 291.0, 291.0, 288.0, 282.0, 288.0, 294.0, 262.0, 251.0, 285.0, 291.0, 288.0, 294.0, 285.0, 285.0, 296.0, 283.0, 288.0, 285.0, 255.0, 264.0, 294.0, 288.0, 289.0, 290.0, 284.0, 295.0, 292.0, 290.0, 321.0, 309.0, 286.0, 296.0, 286.0, 293.0, 282.0, 294.0, 291.0, 296.0, 284.0, 295.0, 294.0, 288.0, 228.0, 237.0, 319.0, 314.0, 268.0, 257.0, 225.0, 237.0, 264.0, 261.0, 266.0, 264.0, 264.0, 261.0, 231.0, 237.0, 265.0, 257.0, 292.0, 290.0, 257.0, 273.0, 261.0, 269.0, 257.0, 268.0, 312.0, 318.0, 309.0, 318.0, 259.0, 260.0, 239.0, 229.0, 260.0, 262.0, 294.0, 288.0, 289.0, 290.0, 268.0, 254.0, 189.0, 164.0, 259.0, 271.0, 265.0, 260.0, 319.0, 311.0, 316.0, 320.0, 289.0, 290.0, 291.0, 291.0, 285.0, 288.0, 149.0, 145.0, 263.0, 247.0, 288.0, 282.0, 285.0, 288.0, 259.0, 263.0, 264.0, 266.0, 260.0, 265.0, 282.0, 291.0, 291.0, 291.0, 291.0, 291.0, 263.0, 259.0, 230.0, 238.0, 141.0, 153.0, 286.0, 296.0, 288.0, 288.0, 240.0, 233.0, 291.0, 285.0, 274.0, 265.0, 251.0, 259.0, 293.0, 286.0, 290.0, 283.0, 299.0, 288.0, 286.0, 287.0, 291.0, 288.0, 267.0, 252.0, 260.0, 262.0, 283.0, 299.0, 293.0, 289.0, 259.0, 271.0, 286.0, 293.0, 264.0, 255.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6927431395642533, "mean_inference_ms": 1.234820555408916, "mean_action_processing_ms": 0.13303677357463245, "mean_env_wait_ms": 0.8338055430125193, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12569600, "num_agent_steps_trained": 12569600, "num_env_steps_sampled": 6284800, "num_env_steps_trained": 6284800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6284800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12569600, "timers": {"training_iteration_time_ms": 3545.175, "learn_time_ms": 1055.527, "learn_throughput": 12126.644, "synch_weights_time_ms": 12.153}, "counters": {"num_env_steps_sampled": 6284800, "num_env_steps_trained": 6284800, "num_agent_steps_sampled": 12569600, "num_agent_steps_trained": 12569600}, "done": false, "episodes_total": 15712, "training_iteration": 491, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-16", "timestamp": 1666582336, "time_this_iter_s": 3.721700668334961, "time_total_s": 1861.6548449993134, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1861.6548449993134, "timesteps_since_restore": 0, "iterations_since_restore": 491, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.94, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 189.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 169.89, "shaped_reward_min": 94, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.06, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.18, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.91, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.09, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.01, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.0, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 0, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 15.8, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 14.95, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.96, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.78, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.65, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.58, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.8, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 14.95, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.8, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 14.95, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008974068332463503, "policy_loss": -0.0012979262974113226, "vf_loss": 7.716615676879883, "vf_explained_var": 0.5693703889846802, "kl": 0.002612018259242177, "entropy": 0.7422833442687988, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6297600, "num_env_steps_trained": 6297600, "num_agent_steps_sampled": 12595200, "num_agent_steps_trained": 12595200}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 294.0, "episode_reward_mean": 547.89, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 141.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 273.945}, "custom_metrics": {"sparse_reward_mean": 189.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 169.89, "shaped_reward_min": 94, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.06, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.18, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.91, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.09, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.01, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.0, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 0, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 15.8, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 14.95, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.96, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.78, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.65, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.58, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.8, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 14.95, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.8, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 14.95, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 582.0, 579.0, 576.0, 587.0, 579.0, 582.0, 465.0, 633.0, 525.0, 462.0, 525.0, 530.0, 525.0, 468.0, 522.0, 582.0, 530.0, 530.0, 525.0, 630.0, 627.0, 519.0, 468.0, 522.0, 582.0, 579.0, 522.0, 353.0, 530.0, 525.0, 630.0, 636.0, 579.0, 582.0, 573.0, 294.0, 510.0, 570.0, 573.0, 522.0, 530.0, 525.0, 573.0, 582.0, 582.0, 522.0, 468.0, 294.0, 582.0, 576.0, 473.0, 576.0, 539.0, 510.0, 579.0, 573.0, 587.0, 573.0, 579.0, 519.0, 522.0, 582.0, 582.0, 530.0, 579.0, 519.0, 579.0, 582.0, 570.0, 579.0, 579.0, 570.0, 525.0, 576.0, 479.0, 627.0, 510.0, 525.0, 525.0, 579.0, 522.0, 525.0, 576.0, 636.0, 576.0, 525.0, 525.0, 573.0, 579.0, 590.0, 579.0, 573.0, 576.0, 573.0, 584.0, 473.0, 516.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 290.0, 321.0, 309.0, 286.0, 296.0, 286.0, 293.0, 282.0, 294.0, 291.0, 296.0, 284.0, 295.0, 294.0, 288.0, 228.0, 237.0, 319.0, 314.0, 268.0, 257.0, 225.0, 237.0, 264.0, 261.0, 266.0, 264.0, 264.0, 261.0, 231.0, 237.0, 265.0, 257.0, 292.0, 290.0, 257.0, 273.0, 261.0, 269.0, 257.0, 268.0, 312.0, 318.0, 309.0, 318.0, 259.0, 260.0, 239.0, 229.0, 260.0, 262.0, 294.0, 288.0, 289.0, 290.0, 268.0, 254.0, 189.0, 164.0, 259.0, 271.0, 265.0, 260.0, 319.0, 311.0, 316.0, 320.0, 289.0, 290.0, 291.0, 291.0, 285.0, 288.0, 149.0, 145.0, 263.0, 247.0, 288.0, 282.0, 285.0, 288.0, 259.0, 263.0, 264.0, 266.0, 260.0, 265.0, 282.0, 291.0, 291.0, 291.0, 291.0, 291.0, 263.0, 259.0, 230.0, 238.0, 141.0, 153.0, 286.0, 296.0, 288.0, 288.0, 240.0, 233.0, 291.0, 285.0, 274.0, 265.0, 251.0, 259.0, 293.0, 286.0, 290.0, 283.0, 299.0, 288.0, 286.0, 287.0, 291.0, 288.0, 267.0, 252.0, 260.0, 262.0, 283.0, 299.0, 293.0, 289.0, 259.0, 271.0, 286.0, 293.0, 264.0, 255.0, 290.0, 289.0, 290.0, 292.0, 290.0, 280.0, 294.0, 285.0, 296.0, 283.0, 287.0, 283.0, 260.0, 265.0, 289.0, 287.0, 236.0, 243.0, 311.0, 316.0, 268.0, 242.0, 262.0, 263.0, 260.0, 265.0, 284.0, 295.0, 261.0, 261.0, 258.0, 267.0, 284.0, 292.0, 320.0, 316.0, 294.0, 282.0, 266.0, 259.0, 263.0, 262.0, 288.0, 285.0, 292.0, 287.0, 291.0, 299.0, 285.0, 294.0, 282.0, 291.0, 288.0, 288.0, 288.0, 285.0, 294.0, 290.0, 232.0, 241.0, 262.0, 254.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6927135667510714, "mean_inference_ms": 1.2347289499299863, "mean_action_processing_ms": 0.13303129612242792, "mean_env_wait_ms": 0.8337459211393875, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 294.0, "episode_reward_mean": 547.89, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 141.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 273.945}, "hist_stats": {"episode_reward": [582.0, 630.0, 582.0, 579.0, 576.0, 587.0, 579.0, 582.0, 465.0, 633.0, 525.0, 462.0, 525.0, 530.0, 525.0, 468.0, 522.0, 582.0, 530.0, 530.0, 525.0, 630.0, 627.0, 519.0, 468.0, 522.0, 582.0, 579.0, 522.0, 353.0, 530.0, 525.0, 630.0, 636.0, 579.0, 582.0, 573.0, 294.0, 510.0, 570.0, 573.0, 522.0, 530.0, 525.0, 573.0, 582.0, 582.0, 522.0, 468.0, 294.0, 582.0, 576.0, 473.0, 576.0, 539.0, 510.0, 579.0, 573.0, 587.0, 573.0, 579.0, 519.0, 522.0, 582.0, 582.0, 530.0, 579.0, 519.0, 579.0, 582.0, 570.0, 579.0, 579.0, 570.0, 525.0, 576.0, 479.0, 627.0, 510.0, 525.0, 525.0, 579.0, 522.0, 525.0, 576.0, 636.0, 576.0, 525.0, 525.0, 573.0, 579.0, 590.0, 579.0, 573.0, 576.0, 573.0, 584.0, 473.0, 516.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 290.0, 321.0, 309.0, 286.0, 296.0, 286.0, 293.0, 282.0, 294.0, 291.0, 296.0, 284.0, 295.0, 294.0, 288.0, 228.0, 237.0, 319.0, 314.0, 268.0, 257.0, 225.0, 237.0, 264.0, 261.0, 266.0, 264.0, 264.0, 261.0, 231.0, 237.0, 265.0, 257.0, 292.0, 290.0, 257.0, 273.0, 261.0, 269.0, 257.0, 268.0, 312.0, 318.0, 309.0, 318.0, 259.0, 260.0, 239.0, 229.0, 260.0, 262.0, 294.0, 288.0, 289.0, 290.0, 268.0, 254.0, 189.0, 164.0, 259.0, 271.0, 265.0, 260.0, 319.0, 311.0, 316.0, 320.0, 289.0, 290.0, 291.0, 291.0, 285.0, 288.0, 149.0, 145.0, 263.0, 247.0, 288.0, 282.0, 285.0, 288.0, 259.0, 263.0, 264.0, 266.0, 260.0, 265.0, 282.0, 291.0, 291.0, 291.0, 291.0, 291.0, 263.0, 259.0, 230.0, 238.0, 141.0, 153.0, 286.0, 296.0, 288.0, 288.0, 240.0, 233.0, 291.0, 285.0, 274.0, 265.0, 251.0, 259.0, 293.0, 286.0, 290.0, 283.0, 299.0, 288.0, 286.0, 287.0, 291.0, 288.0, 267.0, 252.0, 260.0, 262.0, 283.0, 299.0, 293.0, 289.0, 259.0, 271.0, 286.0, 293.0, 264.0, 255.0, 290.0, 289.0, 290.0, 292.0, 290.0, 280.0, 294.0, 285.0, 296.0, 283.0, 287.0, 283.0, 260.0, 265.0, 289.0, 287.0, 236.0, 243.0, 311.0, 316.0, 268.0, 242.0, 262.0, 263.0, 260.0, 265.0, 284.0, 295.0, 261.0, 261.0, 258.0, 267.0, 284.0, 292.0, 320.0, 316.0, 294.0, 282.0, 266.0, 259.0, 263.0, 262.0, 288.0, 285.0, 292.0, 287.0, 291.0, 299.0, 285.0, 294.0, 282.0, 291.0, 288.0, 288.0, 288.0, 285.0, 294.0, 290.0, 232.0, 241.0, 262.0, 254.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6927135667510714, "mean_inference_ms": 1.2347289499299863, "mean_action_processing_ms": 0.13303129612242792, "mean_env_wait_ms": 0.8337459211393875, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12595200, "num_agent_steps_trained": 12595200, "num_env_steps_sampled": 6297600, "num_env_steps_trained": 6297600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6297600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12595200, "timers": {"training_iteration_time_ms": 3549.89, "learn_time_ms": 1056.008, "learn_throughput": 12121.115, "synch_weights_time_ms": 13.313}, "counters": {"num_env_steps_sampled": 6297600, "num_env_steps_trained": 6297600, "num_agent_steps_sampled": 12595200, "num_agent_steps_trained": 12595200}, "done": false, "episodes_total": 15744, "training_iteration": 492, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-20", "timestamp": 1666582340, "time_this_iter_s": 3.6109235286712646, "time_total_s": 1865.2657685279846, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1865.2657685279846, "timesteps_since_restore": 0, "iterations_since_restore": 492, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.51666666666667, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 193.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 172.55, "shaped_reward_min": 94, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.28, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.5, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.14, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.38, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.22, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.83, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.2, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.73, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.68, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.22, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.22, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002336945617571473, "policy_loss": 0.001948866993188858, "vf_loss": 7.629458427429199, "vf_explained_var": 0.576850175857544, "kl": 0.002895065350458026, "entropy": 0.7497336268424988, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6310400, "num_env_steps_trained": 6310400, "num_agent_steps_sampled": 12620800, "num_agent_steps_trained": 12620800}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 294.0, "episode_reward_mean": 558.55, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 141.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 279.275}, "custom_metrics": {"sparse_reward_mean": 193.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 172.55, "shaped_reward_min": 94, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.28, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.5, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.14, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.38, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.22, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.83, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.2, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.73, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.68, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.22, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.22, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 579.0, 582.0, 573.0, 294.0, 510.0, 570.0, 573.0, 522.0, 530.0, 525.0, 573.0, 582.0, 582.0, 522.0, 468.0, 294.0, 582.0, 576.0, 473.0, 576.0, 539.0, 510.0, 579.0, 573.0, 587.0, 573.0, 579.0, 519.0, 522.0, 582.0, 582.0, 530.0, 579.0, 519.0, 579.0, 582.0, 570.0, 579.0, 579.0, 570.0, 525.0, 576.0, 479.0, 627.0, 510.0, 525.0, 525.0, 579.0, 522.0, 525.0, 576.0, 636.0, 576.0, 525.0, 525.0, 573.0, 579.0, 590.0, 579.0, 573.0, 576.0, 573.0, 584.0, 473.0, 516.0, 582.0, 582.0, 582.0, 587.0, 630.0, 576.0, 579.0, 573.0, 525.0, 525.0, 582.0, 573.0, 567.0, 522.0, 582.0, 582.0, 630.0, 582.0, 582.0, 573.0, 576.0, 522.0, 587.0, 582.0, 576.0, 627.0, 576.0, 576.0, 579.0, 573.0, 582.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 311.0, 316.0, 320.0, 289.0, 290.0, 291.0, 291.0, 285.0, 288.0, 149.0, 145.0, 263.0, 247.0, 288.0, 282.0, 285.0, 288.0, 259.0, 263.0, 264.0, 266.0, 260.0, 265.0, 282.0, 291.0, 291.0, 291.0, 291.0, 291.0, 263.0, 259.0, 230.0, 238.0, 141.0, 153.0, 286.0, 296.0, 288.0, 288.0, 240.0, 233.0, 291.0, 285.0, 274.0, 265.0, 251.0, 259.0, 293.0, 286.0, 290.0, 283.0, 299.0, 288.0, 286.0, 287.0, 291.0, 288.0, 267.0, 252.0, 260.0, 262.0, 283.0, 299.0, 293.0, 289.0, 259.0, 271.0, 286.0, 293.0, 264.0, 255.0, 290.0, 289.0, 290.0, 292.0, 290.0, 280.0, 294.0, 285.0, 296.0, 283.0, 287.0, 283.0, 260.0, 265.0, 289.0, 287.0, 236.0, 243.0, 311.0, 316.0, 268.0, 242.0, 262.0, 263.0, 260.0, 265.0, 284.0, 295.0, 261.0, 261.0, 258.0, 267.0, 284.0, 292.0, 320.0, 316.0, 294.0, 282.0, 266.0, 259.0, 263.0, 262.0, 288.0, 285.0, 292.0, 287.0, 291.0, 299.0, 285.0, 294.0, 282.0, 291.0, 288.0, 288.0, 288.0, 285.0, 294.0, 290.0, 232.0, 241.0, 262.0, 254.0, 291.0, 291.0, 290.0, 292.0, 291.0, 291.0, 292.0, 295.0, 313.0, 317.0, 288.0, 288.0, 292.0, 287.0, 288.0, 285.0, 266.0, 259.0, 262.0, 263.0, 286.0, 296.0, 286.0, 287.0, 277.0, 290.0, 260.0, 262.0, 290.0, 292.0, 298.0, 284.0, 308.0, 322.0, 288.0, 294.0, 295.0, 287.0, 279.0, 294.0, 289.0, 287.0, 252.0, 270.0, 291.0, 296.0, 292.0, 290.0, 292.0, 284.0, 314.0, 313.0, 280.0, 296.0, 288.0, 288.0, 294.0, 285.0, 284.0, 289.0, 289.0, 293.0, 289.0, 290.0, 288.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6927007948953394, "mean_inference_ms": 1.2346649904093174, "mean_action_processing_ms": 0.1330296152925825, "mean_env_wait_ms": 0.8337135496717951, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 294.0, "episode_reward_mean": 558.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 141.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 279.275}, "hist_stats": {"episode_reward": [630.0, 636.0, 579.0, 582.0, 573.0, 294.0, 510.0, 570.0, 573.0, 522.0, 530.0, 525.0, 573.0, 582.0, 582.0, 522.0, 468.0, 294.0, 582.0, 576.0, 473.0, 576.0, 539.0, 510.0, 579.0, 573.0, 587.0, 573.0, 579.0, 519.0, 522.0, 582.0, 582.0, 530.0, 579.0, 519.0, 579.0, 582.0, 570.0, 579.0, 579.0, 570.0, 525.0, 576.0, 479.0, 627.0, 510.0, 525.0, 525.0, 579.0, 522.0, 525.0, 576.0, 636.0, 576.0, 525.0, 525.0, 573.0, 579.0, 590.0, 579.0, 573.0, 576.0, 573.0, 584.0, 473.0, 516.0, 582.0, 582.0, 582.0, 587.0, 630.0, 576.0, 579.0, 573.0, 525.0, 525.0, 582.0, 573.0, 567.0, 522.0, 582.0, 582.0, 630.0, 582.0, 582.0, 573.0, 576.0, 522.0, 587.0, 582.0, 576.0, 627.0, 576.0, 576.0, 579.0, 573.0, 582.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 311.0, 316.0, 320.0, 289.0, 290.0, 291.0, 291.0, 285.0, 288.0, 149.0, 145.0, 263.0, 247.0, 288.0, 282.0, 285.0, 288.0, 259.0, 263.0, 264.0, 266.0, 260.0, 265.0, 282.0, 291.0, 291.0, 291.0, 291.0, 291.0, 263.0, 259.0, 230.0, 238.0, 141.0, 153.0, 286.0, 296.0, 288.0, 288.0, 240.0, 233.0, 291.0, 285.0, 274.0, 265.0, 251.0, 259.0, 293.0, 286.0, 290.0, 283.0, 299.0, 288.0, 286.0, 287.0, 291.0, 288.0, 267.0, 252.0, 260.0, 262.0, 283.0, 299.0, 293.0, 289.0, 259.0, 271.0, 286.0, 293.0, 264.0, 255.0, 290.0, 289.0, 290.0, 292.0, 290.0, 280.0, 294.0, 285.0, 296.0, 283.0, 287.0, 283.0, 260.0, 265.0, 289.0, 287.0, 236.0, 243.0, 311.0, 316.0, 268.0, 242.0, 262.0, 263.0, 260.0, 265.0, 284.0, 295.0, 261.0, 261.0, 258.0, 267.0, 284.0, 292.0, 320.0, 316.0, 294.0, 282.0, 266.0, 259.0, 263.0, 262.0, 288.0, 285.0, 292.0, 287.0, 291.0, 299.0, 285.0, 294.0, 282.0, 291.0, 288.0, 288.0, 288.0, 285.0, 294.0, 290.0, 232.0, 241.0, 262.0, 254.0, 291.0, 291.0, 290.0, 292.0, 291.0, 291.0, 292.0, 295.0, 313.0, 317.0, 288.0, 288.0, 292.0, 287.0, 288.0, 285.0, 266.0, 259.0, 262.0, 263.0, 286.0, 296.0, 286.0, 287.0, 277.0, 290.0, 260.0, 262.0, 290.0, 292.0, 298.0, 284.0, 308.0, 322.0, 288.0, 294.0, 295.0, 287.0, 279.0, 294.0, 289.0, 287.0, 252.0, 270.0, 291.0, 296.0, 292.0, 290.0, 292.0, 284.0, 314.0, 313.0, 280.0, 296.0, 288.0, 288.0, 294.0, 285.0, 284.0, 289.0, 289.0, 293.0, 289.0, 290.0, 288.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6927007948953394, "mean_inference_ms": 1.2346649904093174, "mean_action_processing_ms": 0.1330296152925825, "mean_env_wait_ms": 0.8337135496717951, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12620800, "num_agent_steps_trained": 12620800, "num_env_steps_sampled": 6310400, "num_env_steps_trained": 6310400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6310400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12620800, "timers": {"training_iteration_time_ms": 3555.657, "learn_time_ms": 1059.616, "learn_throughput": 12079.845, "synch_weights_time_ms": 12.544}, "counters": {"num_env_steps_sampled": 6310400, "num_env_steps_trained": 6310400, "num_agent_steps_sampled": 12620800, "num_agent_steps_trained": 12620800}, "done": false, "episodes_total": 15776, "training_iteration": 493, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-24", "timestamp": 1666582344, "time_this_iter_s": 3.6424057483673096, "time_total_s": 1868.908174276352, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1868.908174276352, "timesteps_since_restore": 0, "iterations_since_restore": 493, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.2, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 173.65, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.14, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.85, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.03, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.73, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.85, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.6, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.08, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.86, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.82, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.93, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.85, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.6, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.85, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.6, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002367196138948202, "policy_loss": 0.001961060333997011, "vf_loss": 7.817628860473633, "vf_explained_var": 0.5302909016609192, "kl": 0.0031909747049212456, "entropy": 0.7512529492378235, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6323200, "num_env_steps_trained": 6323200, "num_agent_steps_sampled": 12646400, "num_agent_steps_trained": 12646400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 563.25, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 281.625}, "custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 173.65, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.14, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.85, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.03, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.73, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.85, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.6, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.08, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.86, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.82, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.93, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.85, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.6, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.85, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.6, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 530.0, 579.0, 519.0, 579.0, 582.0, 570.0, 579.0, 579.0, 570.0, 525.0, 576.0, 479.0, 627.0, 510.0, 525.0, 525.0, 579.0, 522.0, 525.0, 576.0, 636.0, 576.0, 525.0, 525.0, 573.0, 579.0, 590.0, 579.0, 573.0, 576.0, 573.0, 584.0, 473.0, 516.0, 582.0, 582.0, 582.0, 587.0, 630.0, 576.0, 579.0, 573.0, 525.0, 525.0, 582.0, 573.0, 567.0, 522.0, 582.0, 582.0, 630.0, 582.0, 582.0, 573.0, 576.0, 522.0, 587.0, 582.0, 576.0, 627.0, 576.0, 576.0, 579.0, 573.0, 582.0, 579.0, 573.0, 522.0, 522.0, 576.0, 582.0, 530.0, 639.0, 516.0, 570.0, 576.0, 582.0, 579.0, 582.0, 516.0, 582.0, 587.0, 522.0, 516.0, 576.0, 570.0, 522.0, 582.0, 579.0, 576.0, 522.0, 522.0, 579.0, 570.0, 570.0, 579.0, 444.0, 519.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 259.0, 271.0, 286.0, 293.0, 264.0, 255.0, 290.0, 289.0, 290.0, 292.0, 290.0, 280.0, 294.0, 285.0, 296.0, 283.0, 287.0, 283.0, 260.0, 265.0, 289.0, 287.0, 236.0, 243.0, 311.0, 316.0, 268.0, 242.0, 262.0, 263.0, 260.0, 265.0, 284.0, 295.0, 261.0, 261.0, 258.0, 267.0, 284.0, 292.0, 320.0, 316.0, 294.0, 282.0, 266.0, 259.0, 263.0, 262.0, 288.0, 285.0, 292.0, 287.0, 291.0, 299.0, 285.0, 294.0, 282.0, 291.0, 288.0, 288.0, 288.0, 285.0, 294.0, 290.0, 232.0, 241.0, 262.0, 254.0, 291.0, 291.0, 290.0, 292.0, 291.0, 291.0, 292.0, 295.0, 313.0, 317.0, 288.0, 288.0, 292.0, 287.0, 288.0, 285.0, 266.0, 259.0, 262.0, 263.0, 286.0, 296.0, 286.0, 287.0, 277.0, 290.0, 260.0, 262.0, 290.0, 292.0, 298.0, 284.0, 308.0, 322.0, 288.0, 294.0, 295.0, 287.0, 279.0, 294.0, 289.0, 287.0, 252.0, 270.0, 291.0, 296.0, 292.0, 290.0, 292.0, 284.0, 314.0, 313.0, 280.0, 296.0, 288.0, 288.0, 294.0, 285.0, 284.0, 289.0, 289.0, 293.0, 289.0, 290.0, 288.0, 285.0, 260.0, 262.0, 264.0, 258.0, 288.0, 288.0, 285.0, 297.0, 264.0, 266.0, 322.0, 317.0, 260.0, 256.0, 279.0, 291.0, 294.0, 282.0, 288.0, 294.0, 288.0, 291.0, 294.0, 288.0, 258.0, 258.0, 291.0, 291.0, 296.0, 291.0, 256.0, 266.0, 261.0, 255.0, 288.0, 288.0, 278.0, 292.0, 261.0, 261.0, 294.0, 288.0, 289.0, 290.0, 287.0, 289.0, 261.0, 261.0, 265.0, 257.0, 293.0, 286.0, 288.0, 282.0, 291.0, 279.0, 291.0, 288.0, 219.0, 225.0, 261.0, 258.0, 284.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.692662292395533, "mean_inference_ms": 1.234709989130729, "mean_action_processing_ms": 0.1330246376150592, "mean_env_wait_ms": 0.8336586327420582, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 563.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 281.625}, "hist_stats": {"episode_reward": [582.0, 530.0, 579.0, 519.0, 579.0, 582.0, 570.0, 579.0, 579.0, 570.0, 525.0, 576.0, 479.0, 627.0, 510.0, 525.0, 525.0, 579.0, 522.0, 525.0, 576.0, 636.0, 576.0, 525.0, 525.0, 573.0, 579.0, 590.0, 579.0, 573.0, 576.0, 573.0, 584.0, 473.0, 516.0, 582.0, 582.0, 582.0, 587.0, 630.0, 576.0, 579.0, 573.0, 525.0, 525.0, 582.0, 573.0, 567.0, 522.0, 582.0, 582.0, 630.0, 582.0, 582.0, 573.0, 576.0, 522.0, 587.0, 582.0, 576.0, 627.0, 576.0, 576.0, 579.0, 573.0, 582.0, 579.0, 573.0, 522.0, 522.0, 576.0, 582.0, 530.0, 639.0, 516.0, 570.0, 576.0, 582.0, 579.0, 582.0, 516.0, 582.0, 587.0, 522.0, 516.0, 576.0, 570.0, 522.0, 582.0, 579.0, 576.0, 522.0, 522.0, 579.0, 570.0, 570.0, 579.0, 444.0, 519.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 259.0, 271.0, 286.0, 293.0, 264.0, 255.0, 290.0, 289.0, 290.0, 292.0, 290.0, 280.0, 294.0, 285.0, 296.0, 283.0, 287.0, 283.0, 260.0, 265.0, 289.0, 287.0, 236.0, 243.0, 311.0, 316.0, 268.0, 242.0, 262.0, 263.0, 260.0, 265.0, 284.0, 295.0, 261.0, 261.0, 258.0, 267.0, 284.0, 292.0, 320.0, 316.0, 294.0, 282.0, 266.0, 259.0, 263.0, 262.0, 288.0, 285.0, 292.0, 287.0, 291.0, 299.0, 285.0, 294.0, 282.0, 291.0, 288.0, 288.0, 288.0, 285.0, 294.0, 290.0, 232.0, 241.0, 262.0, 254.0, 291.0, 291.0, 290.0, 292.0, 291.0, 291.0, 292.0, 295.0, 313.0, 317.0, 288.0, 288.0, 292.0, 287.0, 288.0, 285.0, 266.0, 259.0, 262.0, 263.0, 286.0, 296.0, 286.0, 287.0, 277.0, 290.0, 260.0, 262.0, 290.0, 292.0, 298.0, 284.0, 308.0, 322.0, 288.0, 294.0, 295.0, 287.0, 279.0, 294.0, 289.0, 287.0, 252.0, 270.0, 291.0, 296.0, 292.0, 290.0, 292.0, 284.0, 314.0, 313.0, 280.0, 296.0, 288.0, 288.0, 294.0, 285.0, 284.0, 289.0, 289.0, 293.0, 289.0, 290.0, 288.0, 285.0, 260.0, 262.0, 264.0, 258.0, 288.0, 288.0, 285.0, 297.0, 264.0, 266.0, 322.0, 317.0, 260.0, 256.0, 279.0, 291.0, 294.0, 282.0, 288.0, 294.0, 288.0, 291.0, 294.0, 288.0, 258.0, 258.0, 291.0, 291.0, 296.0, 291.0, 256.0, 266.0, 261.0, 255.0, 288.0, 288.0, 278.0, 292.0, 261.0, 261.0, 294.0, 288.0, 289.0, 290.0, 287.0, 289.0, 261.0, 261.0, 265.0, 257.0, 293.0, 286.0, 288.0, 282.0, 291.0, 279.0, 291.0, 288.0, 219.0, 225.0, 261.0, 258.0, 284.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.692662292395533, "mean_inference_ms": 1.234709989130729, "mean_action_processing_ms": 0.1330246376150592, "mean_env_wait_ms": 0.8336586327420582, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12646400, "num_agent_steps_trained": 12646400, "num_env_steps_sampled": 6323200, "num_env_steps_trained": 6323200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6323200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12646400, "timers": {"training_iteration_time_ms": 3584.233, "learn_time_ms": 1056.792, "learn_throughput": 12112.126, "synch_weights_time_ms": 12.517}, "counters": {"num_env_steps_sampled": 6323200, "num_env_steps_trained": 6323200, "num_agent_steps_sampled": 12646400, "num_agent_steps_trained": 12646400}, "done": false, "episodes_total": 15808, "training_iteration": 494, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-28", "timestamp": 1666582348, "time_this_iter_s": 3.8276045322418213, "time_total_s": 1872.7357788085938, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1872.7357788085938, "timesteps_since_restore": 0, "iterations_since_restore": 494, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.2, "ram_util_percent": 10.616666666666667}}
+{"custom_metrics": {"sparse_reward_mean": 194.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 172.95, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.13, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.7, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.0, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.55, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.86, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.42, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.03, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.93, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.79, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.74, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.86, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.42, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.86, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.42, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0009088999358937144, "policy_loss": 0.0005068274331279099, "vf_loss": 7.789777755737305, "vf_explained_var": 0.5383257865905762, "kl": 0.00321396766230464, "entropy": 0.7538089752197266, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6336000, "num_env_steps_trained": 6336000, "num_agent_steps_sampled": 12672000, "num_agent_steps_trained": 12672000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 560.95, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 280.475}, "custom_metrics": {"sparse_reward_mean": 194.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 172.95, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.13, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.7, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.0, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.55, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.86, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.42, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.03, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.93, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.79, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.74, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.86, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.42, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.86, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.42, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 473.0, 516.0, 582.0, 582.0, 582.0, 587.0, 630.0, 576.0, 579.0, 573.0, 525.0, 525.0, 582.0, 573.0, 567.0, 522.0, 582.0, 582.0, 630.0, 582.0, 582.0, 573.0, 576.0, 522.0, 587.0, 582.0, 576.0, 627.0, 576.0, 576.0, 579.0, 573.0, 582.0, 579.0, 573.0, 522.0, 522.0, 576.0, 582.0, 530.0, 639.0, 516.0, 570.0, 576.0, 582.0, 579.0, 582.0, 516.0, 582.0, 587.0, 522.0, 516.0, 576.0, 570.0, 522.0, 582.0, 579.0, 576.0, 522.0, 522.0, 579.0, 570.0, 570.0, 579.0, 444.0, 519.0, 576.0, 573.0, 582.0, 576.0, 573.0, 579.0, 522.0, 579.0, 525.0, 573.0, 530.0, 570.0, 465.0, 525.0, 516.0, 525.0, 525.0, 582.0, 579.0, 582.0, 522.0, 573.0, 513.0, 525.0, 570.0, 630.0, 627.0, 582.0, 525.0, 525.0, 582.0, 582.0, 476.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 290.0, 232.0, 241.0, 262.0, 254.0, 291.0, 291.0, 290.0, 292.0, 291.0, 291.0, 292.0, 295.0, 313.0, 317.0, 288.0, 288.0, 292.0, 287.0, 288.0, 285.0, 266.0, 259.0, 262.0, 263.0, 286.0, 296.0, 286.0, 287.0, 277.0, 290.0, 260.0, 262.0, 290.0, 292.0, 298.0, 284.0, 308.0, 322.0, 288.0, 294.0, 295.0, 287.0, 279.0, 294.0, 289.0, 287.0, 252.0, 270.0, 291.0, 296.0, 292.0, 290.0, 292.0, 284.0, 314.0, 313.0, 280.0, 296.0, 288.0, 288.0, 294.0, 285.0, 284.0, 289.0, 289.0, 293.0, 289.0, 290.0, 288.0, 285.0, 260.0, 262.0, 264.0, 258.0, 288.0, 288.0, 285.0, 297.0, 264.0, 266.0, 322.0, 317.0, 260.0, 256.0, 279.0, 291.0, 294.0, 282.0, 288.0, 294.0, 288.0, 291.0, 294.0, 288.0, 258.0, 258.0, 291.0, 291.0, 296.0, 291.0, 256.0, 266.0, 261.0, 255.0, 288.0, 288.0, 278.0, 292.0, 261.0, 261.0, 294.0, 288.0, 289.0, 290.0, 287.0, 289.0, 261.0, 261.0, 265.0, 257.0, 293.0, 286.0, 288.0, 282.0, 291.0, 279.0, 291.0, 288.0, 219.0, 225.0, 261.0, 258.0, 284.0, 292.0, 285.0, 288.0, 294.0, 288.0, 283.0, 293.0, 278.0, 295.0, 281.0, 298.0, 264.0, 258.0, 293.0, 286.0, 266.0, 259.0, 282.0, 291.0, 263.0, 267.0, 287.0, 283.0, 228.0, 237.0, 265.0, 260.0, 261.0, 255.0, 263.0, 262.0, 263.0, 262.0, 291.0, 291.0, 289.0, 290.0, 289.0, 293.0, 265.0, 257.0, 285.0, 288.0, 256.0, 257.0, 263.0, 262.0, 287.0, 283.0, 321.0, 309.0, 313.0, 314.0, 289.0, 293.0, 270.0, 255.0, 268.0, 257.0, 291.0, 291.0, 291.0, 291.0, 240.0, 236.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6926126229211826, "mean_inference_ms": 1.2347401808599863, "mean_action_processing_ms": 0.133017718901637, "mean_env_wait_ms": 0.8335926233535009, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 560.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 280.475}, "hist_stats": {"episode_reward": [584.0, 473.0, 516.0, 582.0, 582.0, 582.0, 587.0, 630.0, 576.0, 579.0, 573.0, 525.0, 525.0, 582.0, 573.0, 567.0, 522.0, 582.0, 582.0, 630.0, 582.0, 582.0, 573.0, 576.0, 522.0, 587.0, 582.0, 576.0, 627.0, 576.0, 576.0, 579.0, 573.0, 582.0, 579.0, 573.0, 522.0, 522.0, 576.0, 582.0, 530.0, 639.0, 516.0, 570.0, 576.0, 582.0, 579.0, 582.0, 516.0, 582.0, 587.0, 522.0, 516.0, 576.0, 570.0, 522.0, 582.0, 579.0, 576.0, 522.0, 522.0, 579.0, 570.0, 570.0, 579.0, 444.0, 519.0, 576.0, 573.0, 582.0, 576.0, 573.0, 579.0, 522.0, 579.0, 525.0, 573.0, 530.0, 570.0, 465.0, 525.0, 516.0, 525.0, 525.0, 582.0, 579.0, 582.0, 522.0, 573.0, 513.0, 525.0, 570.0, 630.0, 627.0, 582.0, 525.0, 525.0, 582.0, 582.0, 476.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 290.0, 232.0, 241.0, 262.0, 254.0, 291.0, 291.0, 290.0, 292.0, 291.0, 291.0, 292.0, 295.0, 313.0, 317.0, 288.0, 288.0, 292.0, 287.0, 288.0, 285.0, 266.0, 259.0, 262.0, 263.0, 286.0, 296.0, 286.0, 287.0, 277.0, 290.0, 260.0, 262.0, 290.0, 292.0, 298.0, 284.0, 308.0, 322.0, 288.0, 294.0, 295.0, 287.0, 279.0, 294.0, 289.0, 287.0, 252.0, 270.0, 291.0, 296.0, 292.0, 290.0, 292.0, 284.0, 314.0, 313.0, 280.0, 296.0, 288.0, 288.0, 294.0, 285.0, 284.0, 289.0, 289.0, 293.0, 289.0, 290.0, 288.0, 285.0, 260.0, 262.0, 264.0, 258.0, 288.0, 288.0, 285.0, 297.0, 264.0, 266.0, 322.0, 317.0, 260.0, 256.0, 279.0, 291.0, 294.0, 282.0, 288.0, 294.0, 288.0, 291.0, 294.0, 288.0, 258.0, 258.0, 291.0, 291.0, 296.0, 291.0, 256.0, 266.0, 261.0, 255.0, 288.0, 288.0, 278.0, 292.0, 261.0, 261.0, 294.0, 288.0, 289.0, 290.0, 287.0, 289.0, 261.0, 261.0, 265.0, 257.0, 293.0, 286.0, 288.0, 282.0, 291.0, 279.0, 291.0, 288.0, 219.0, 225.0, 261.0, 258.0, 284.0, 292.0, 285.0, 288.0, 294.0, 288.0, 283.0, 293.0, 278.0, 295.0, 281.0, 298.0, 264.0, 258.0, 293.0, 286.0, 266.0, 259.0, 282.0, 291.0, 263.0, 267.0, 287.0, 283.0, 228.0, 237.0, 265.0, 260.0, 261.0, 255.0, 263.0, 262.0, 263.0, 262.0, 291.0, 291.0, 289.0, 290.0, 289.0, 293.0, 265.0, 257.0, 285.0, 288.0, 256.0, 257.0, 263.0, 262.0, 287.0, 283.0, 321.0, 309.0, 313.0, 314.0, 289.0, 293.0, 270.0, 255.0, 268.0, 257.0, 291.0, 291.0, 291.0, 291.0, 240.0, 236.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6926126229211826, "mean_inference_ms": 1.2347401808599863, "mean_action_processing_ms": 0.133017718901637, "mean_env_wait_ms": 0.8335926233535009, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12672000, "num_agent_steps_trained": 12672000, "num_env_steps_sampled": 6336000, "num_env_steps_trained": 6336000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6336000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12672000, "timers": {"training_iteration_time_ms": 3570.269, "learn_time_ms": 1049.903, "learn_throughput": 12191.597, "synch_weights_time_ms": 12.94}, "counters": {"num_env_steps_sampled": 6336000, "num_env_steps_trained": 6336000, "num_agent_steps_sampled": 12672000, "num_agent_steps_trained": 12672000}, "done": false, "episodes_total": 15840, "training_iteration": 495, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-32", "timestamp": 1666582352, "time_this_iter_s": 3.4956037998199463, "time_total_s": 1876.2313826084137, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1876.2313826084137, "timesteps_since_restore": 0, "iterations_since_restore": 495, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.080000000000002, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 193.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 172.61, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.01, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.8, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.89, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.65, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.75, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.5, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.13, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.01, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.81, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.77, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.75, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.5, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.75, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.5, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0010021739872172475, "policy_loss": 0.0005998615524731576, "vf_loss": 7.735168933868408, "vf_explained_var": 0.5922386646270752, "kl": 0.0024840538389980793, "entropy": 0.7424072027206421, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6348800, "num_env_steps_trained": 6348800, "num_agent_steps_sampled": 12697600, "num_agent_steps_trained": 12697600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 559.41, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 279.705}, "custom_metrics": {"sparse_reward_mean": 193.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 172.61, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.01, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.8, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.89, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.65, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.75, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.5, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.13, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.01, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.81, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.77, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.75, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.5, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.75, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.5, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 582.0, 579.0, 573.0, 522.0, 522.0, 576.0, 582.0, 530.0, 639.0, 516.0, 570.0, 576.0, 582.0, 579.0, 582.0, 516.0, 582.0, 587.0, 522.0, 516.0, 576.0, 570.0, 522.0, 582.0, 579.0, 576.0, 522.0, 522.0, 579.0, 570.0, 570.0, 579.0, 444.0, 519.0, 576.0, 573.0, 582.0, 576.0, 573.0, 579.0, 522.0, 579.0, 525.0, 573.0, 530.0, 570.0, 465.0, 525.0, 516.0, 525.0, 525.0, 582.0, 579.0, 582.0, 522.0, 573.0, 513.0, 525.0, 570.0, 630.0, 627.0, 582.0, 525.0, 525.0, 582.0, 582.0, 476.0, 633.0, 579.0, 570.0, 579.0, 525.0, 530.0, 579.0, 582.0, 522.0, 579.0, 582.0, 525.0, 576.0, 579.0, 582.0, 579.0, 522.0, 525.0, 582.0, 582.0, 522.0, 573.0, 573.0, 582.0, 573.0, 630.0, 519.0, 579.0, 579.0, 587.0, 582.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 289.0, 289.0, 293.0, 289.0, 290.0, 288.0, 285.0, 260.0, 262.0, 264.0, 258.0, 288.0, 288.0, 285.0, 297.0, 264.0, 266.0, 322.0, 317.0, 260.0, 256.0, 279.0, 291.0, 294.0, 282.0, 288.0, 294.0, 288.0, 291.0, 294.0, 288.0, 258.0, 258.0, 291.0, 291.0, 296.0, 291.0, 256.0, 266.0, 261.0, 255.0, 288.0, 288.0, 278.0, 292.0, 261.0, 261.0, 294.0, 288.0, 289.0, 290.0, 287.0, 289.0, 261.0, 261.0, 265.0, 257.0, 293.0, 286.0, 288.0, 282.0, 291.0, 279.0, 291.0, 288.0, 219.0, 225.0, 261.0, 258.0, 284.0, 292.0, 285.0, 288.0, 294.0, 288.0, 283.0, 293.0, 278.0, 295.0, 281.0, 298.0, 264.0, 258.0, 293.0, 286.0, 266.0, 259.0, 282.0, 291.0, 263.0, 267.0, 287.0, 283.0, 228.0, 237.0, 265.0, 260.0, 261.0, 255.0, 263.0, 262.0, 263.0, 262.0, 291.0, 291.0, 289.0, 290.0, 289.0, 293.0, 265.0, 257.0, 285.0, 288.0, 256.0, 257.0, 263.0, 262.0, 287.0, 283.0, 321.0, 309.0, 313.0, 314.0, 289.0, 293.0, 270.0, 255.0, 268.0, 257.0, 291.0, 291.0, 291.0, 291.0, 240.0, 236.0, 316.0, 317.0, 294.0, 285.0, 281.0, 289.0, 286.0, 293.0, 266.0, 259.0, 260.0, 270.0, 288.0, 291.0, 289.0, 293.0, 260.0, 262.0, 281.0, 298.0, 291.0, 291.0, 264.0, 261.0, 289.0, 287.0, 289.0, 290.0, 299.0, 283.0, 288.0, 291.0, 256.0, 266.0, 265.0, 260.0, 288.0, 294.0, 291.0, 291.0, 260.0, 262.0, 280.0, 293.0, 279.0, 294.0, 294.0, 288.0, 293.0, 280.0, 318.0, 312.0, 262.0, 257.0, 296.0, 283.0, 288.0, 291.0, 297.0, 290.0, 287.0, 295.0, 266.0, 259.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6925542642918654, "mean_inference_ms": 1.2347568136712699, "mean_action_processing_ms": 0.13300886957405056, "mean_env_wait_ms": 0.8335146608846887, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 559.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 279.705}, "hist_stats": {"episode_reward": [573.0, 582.0, 579.0, 573.0, 522.0, 522.0, 576.0, 582.0, 530.0, 639.0, 516.0, 570.0, 576.0, 582.0, 579.0, 582.0, 516.0, 582.0, 587.0, 522.0, 516.0, 576.0, 570.0, 522.0, 582.0, 579.0, 576.0, 522.0, 522.0, 579.0, 570.0, 570.0, 579.0, 444.0, 519.0, 576.0, 573.0, 582.0, 576.0, 573.0, 579.0, 522.0, 579.0, 525.0, 573.0, 530.0, 570.0, 465.0, 525.0, 516.0, 525.0, 525.0, 582.0, 579.0, 582.0, 522.0, 573.0, 513.0, 525.0, 570.0, 630.0, 627.0, 582.0, 525.0, 525.0, 582.0, 582.0, 476.0, 633.0, 579.0, 570.0, 579.0, 525.0, 530.0, 579.0, 582.0, 522.0, 579.0, 582.0, 525.0, 576.0, 579.0, 582.0, 579.0, 522.0, 525.0, 582.0, 582.0, 522.0, 573.0, 573.0, 582.0, 573.0, 630.0, 519.0, 579.0, 579.0, 587.0, 582.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 289.0, 289.0, 293.0, 289.0, 290.0, 288.0, 285.0, 260.0, 262.0, 264.0, 258.0, 288.0, 288.0, 285.0, 297.0, 264.0, 266.0, 322.0, 317.0, 260.0, 256.0, 279.0, 291.0, 294.0, 282.0, 288.0, 294.0, 288.0, 291.0, 294.0, 288.0, 258.0, 258.0, 291.0, 291.0, 296.0, 291.0, 256.0, 266.0, 261.0, 255.0, 288.0, 288.0, 278.0, 292.0, 261.0, 261.0, 294.0, 288.0, 289.0, 290.0, 287.0, 289.0, 261.0, 261.0, 265.0, 257.0, 293.0, 286.0, 288.0, 282.0, 291.0, 279.0, 291.0, 288.0, 219.0, 225.0, 261.0, 258.0, 284.0, 292.0, 285.0, 288.0, 294.0, 288.0, 283.0, 293.0, 278.0, 295.0, 281.0, 298.0, 264.0, 258.0, 293.0, 286.0, 266.0, 259.0, 282.0, 291.0, 263.0, 267.0, 287.0, 283.0, 228.0, 237.0, 265.0, 260.0, 261.0, 255.0, 263.0, 262.0, 263.0, 262.0, 291.0, 291.0, 289.0, 290.0, 289.0, 293.0, 265.0, 257.0, 285.0, 288.0, 256.0, 257.0, 263.0, 262.0, 287.0, 283.0, 321.0, 309.0, 313.0, 314.0, 289.0, 293.0, 270.0, 255.0, 268.0, 257.0, 291.0, 291.0, 291.0, 291.0, 240.0, 236.0, 316.0, 317.0, 294.0, 285.0, 281.0, 289.0, 286.0, 293.0, 266.0, 259.0, 260.0, 270.0, 288.0, 291.0, 289.0, 293.0, 260.0, 262.0, 281.0, 298.0, 291.0, 291.0, 264.0, 261.0, 289.0, 287.0, 289.0, 290.0, 299.0, 283.0, 288.0, 291.0, 256.0, 266.0, 265.0, 260.0, 288.0, 294.0, 291.0, 291.0, 260.0, 262.0, 280.0, 293.0, 279.0, 294.0, 294.0, 288.0, 293.0, 280.0, 318.0, 312.0, 262.0, 257.0, 296.0, 283.0, 288.0, 291.0, 297.0, 290.0, 287.0, 295.0, 266.0, 259.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6925542642918654, "mean_inference_ms": 1.2347568136712699, "mean_action_processing_ms": 0.13300886957405056, "mean_env_wait_ms": 0.8335146608846887, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12697600, "num_agent_steps_trained": 12697600, "num_env_steps_sampled": 6348800, "num_env_steps_trained": 6348800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6348800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12697600, "timers": {"training_iteration_time_ms": 3559.471, "learn_time_ms": 1047.13, "learn_throughput": 12223.894, "synch_weights_time_ms": 13.498}, "counters": {"num_env_steps_sampled": 6348800, "num_env_steps_trained": 6348800, "num_agent_steps_sampled": 12697600, "num_agent_steps_trained": 12697600}, "done": false, "episodes_total": 15872, "training_iteration": 496, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-35", "timestamp": 1666582355, "time_this_iter_s": 3.5756309032440186, "time_total_s": 1879.8070135116577, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1879.8070135116577, "timesteps_since_restore": 0, "iterations_since_restore": 496, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.72, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 192.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 171.18, "shaped_reward_min": 60, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.24, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.44, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.1, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.26, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 15.91, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.13, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.79, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.68, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.65, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.91, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.13, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.91, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.13, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -4.584819544106722e-05, "policy_loss": -0.00045904231956228614, "vf_loss": 7.8609466552734375, "vf_explained_var": 0.5243180990219116, "kl": 0.004045985639095306, "entropy": 0.7458009719848633, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6361600, "num_env_steps_trained": 6361600, "num_agent_steps_sampled": 12723200, "num_agent_steps_trained": 12723200}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 180.0, "episode_reward_mean": 555.18, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 277.59}, "custom_metrics": {"sparse_reward_mean": 192.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 171.18, "shaped_reward_min": 60, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.24, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.44, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.1, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.26, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 15.91, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.13, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.79, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.68, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.65, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.91, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.13, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.91, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.13, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 444.0, 519.0, 576.0, 573.0, 582.0, 576.0, 573.0, 579.0, 522.0, 579.0, 525.0, 573.0, 530.0, 570.0, 465.0, 525.0, 516.0, 525.0, 525.0, 582.0, 579.0, 582.0, 522.0, 573.0, 513.0, 525.0, 570.0, 630.0, 627.0, 582.0, 525.0, 525.0, 582.0, 582.0, 476.0, 633.0, 579.0, 570.0, 579.0, 525.0, 530.0, 579.0, 582.0, 522.0, 579.0, 582.0, 525.0, 576.0, 579.0, 582.0, 579.0, 522.0, 525.0, 582.0, 582.0, 522.0, 573.0, 573.0, 582.0, 573.0, 630.0, 519.0, 579.0, 579.0, 587.0, 582.0, 525.0, 579.0, 576.0, 519.0, 570.0, 587.0, 582.0, 516.0, 570.0, 579.0, 579.0, 579.0, 570.0, 516.0, 573.0, 582.0, 492.0, 180.0, 579.0, 627.0, 479.0, 504.0, 579.0, 579.0, 519.0, 576.0, 530.0, 533.0, 590.0, 579.0, 573.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 219.0, 225.0, 261.0, 258.0, 284.0, 292.0, 285.0, 288.0, 294.0, 288.0, 283.0, 293.0, 278.0, 295.0, 281.0, 298.0, 264.0, 258.0, 293.0, 286.0, 266.0, 259.0, 282.0, 291.0, 263.0, 267.0, 287.0, 283.0, 228.0, 237.0, 265.0, 260.0, 261.0, 255.0, 263.0, 262.0, 263.0, 262.0, 291.0, 291.0, 289.0, 290.0, 289.0, 293.0, 265.0, 257.0, 285.0, 288.0, 256.0, 257.0, 263.0, 262.0, 287.0, 283.0, 321.0, 309.0, 313.0, 314.0, 289.0, 293.0, 270.0, 255.0, 268.0, 257.0, 291.0, 291.0, 291.0, 291.0, 240.0, 236.0, 316.0, 317.0, 294.0, 285.0, 281.0, 289.0, 286.0, 293.0, 266.0, 259.0, 260.0, 270.0, 288.0, 291.0, 289.0, 293.0, 260.0, 262.0, 281.0, 298.0, 291.0, 291.0, 264.0, 261.0, 289.0, 287.0, 289.0, 290.0, 299.0, 283.0, 288.0, 291.0, 256.0, 266.0, 265.0, 260.0, 288.0, 294.0, 291.0, 291.0, 260.0, 262.0, 280.0, 293.0, 279.0, 294.0, 294.0, 288.0, 293.0, 280.0, 318.0, 312.0, 262.0, 257.0, 296.0, 283.0, 288.0, 291.0, 297.0, 290.0, 287.0, 295.0, 266.0, 259.0, 294.0, 285.0, 288.0, 288.0, 262.0, 257.0, 284.0, 286.0, 291.0, 296.0, 293.0, 289.0, 253.0, 263.0, 288.0, 282.0, 290.0, 289.0, 292.0, 287.0, 288.0, 291.0, 283.0, 287.0, 266.0, 250.0, 288.0, 285.0, 289.0, 293.0, 242.0, 250.0, 92.0, 88.0, 293.0, 286.0, 316.0, 311.0, 245.0, 234.0, 245.0, 259.0, 288.0, 291.0, 290.0, 289.0, 263.0, 256.0, 282.0, 294.0, 259.0, 271.0, 268.0, 265.0, 291.0, 299.0, 283.0, 296.0, 284.0, 289.0, 290.0, 289.0, 290.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6925057574101982, "mean_inference_ms": 1.2346697990609385, "mean_action_processing_ms": 0.13300347069514712, "mean_env_wait_ms": 0.8334533851780352, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 180.0, "episode_reward_mean": 555.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 277.59}, "hist_stats": {"episode_reward": [579.0, 444.0, 519.0, 576.0, 573.0, 582.0, 576.0, 573.0, 579.0, 522.0, 579.0, 525.0, 573.0, 530.0, 570.0, 465.0, 525.0, 516.0, 525.0, 525.0, 582.0, 579.0, 582.0, 522.0, 573.0, 513.0, 525.0, 570.0, 630.0, 627.0, 582.0, 525.0, 525.0, 582.0, 582.0, 476.0, 633.0, 579.0, 570.0, 579.0, 525.0, 530.0, 579.0, 582.0, 522.0, 579.0, 582.0, 525.0, 576.0, 579.0, 582.0, 579.0, 522.0, 525.0, 582.0, 582.0, 522.0, 573.0, 573.0, 582.0, 573.0, 630.0, 519.0, 579.0, 579.0, 587.0, 582.0, 525.0, 579.0, 576.0, 519.0, 570.0, 587.0, 582.0, 516.0, 570.0, 579.0, 579.0, 579.0, 570.0, 516.0, 573.0, 582.0, 492.0, 180.0, 579.0, 627.0, 479.0, 504.0, 579.0, 579.0, 519.0, 576.0, 530.0, 533.0, 590.0, 579.0, 573.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 219.0, 225.0, 261.0, 258.0, 284.0, 292.0, 285.0, 288.0, 294.0, 288.0, 283.0, 293.0, 278.0, 295.0, 281.0, 298.0, 264.0, 258.0, 293.0, 286.0, 266.0, 259.0, 282.0, 291.0, 263.0, 267.0, 287.0, 283.0, 228.0, 237.0, 265.0, 260.0, 261.0, 255.0, 263.0, 262.0, 263.0, 262.0, 291.0, 291.0, 289.0, 290.0, 289.0, 293.0, 265.0, 257.0, 285.0, 288.0, 256.0, 257.0, 263.0, 262.0, 287.0, 283.0, 321.0, 309.0, 313.0, 314.0, 289.0, 293.0, 270.0, 255.0, 268.0, 257.0, 291.0, 291.0, 291.0, 291.0, 240.0, 236.0, 316.0, 317.0, 294.0, 285.0, 281.0, 289.0, 286.0, 293.0, 266.0, 259.0, 260.0, 270.0, 288.0, 291.0, 289.0, 293.0, 260.0, 262.0, 281.0, 298.0, 291.0, 291.0, 264.0, 261.0, 289.0, 287.0, 289.0, 290.0, 299.0, 283.0, 288.0, 291.0, 256.0, 266.0, 265.0, 260.0, 288.0, 294.0, 291.0, 291.0, 260.0, 262.0, 280.0, 293.0, 279.0, 294.0, 294.0, 288.0, 293.0, 280.0, 318.0, 312.0, 262.0, 257.0, 296.0, 283.0, 288.0, 291.0, 297.0, 290.0, 287.0, 295.0, 266.0, 259.0, 294.0, 285.0, 288.0, 288.0, 262.0, 257.0, 284.0, 286.0, 291.0, 296.0, 293.0, 289.0, 253.0, 263.0, 288.0, 282.0, 290.0, 289.0, 292.0, 287.0, 288.0, 291.0, 283.0, 287.0, 266.0, 250.0, 288.0, 285.0, 289.0, 293.0, 242.0, 250.0, 92.0, 88.0, 293.0, 286.0, 316.0, 311.0, 245.0, 234.0, 245.0, 259.0, 288.0, 291.0, 290.0, 289.0, 263.0, 256.0, 282.0, 294.0, 259.0, 271.0, 268.0, 265.0, 291.0, 299.0, 283.0, 296.0, 284.0, 289.0, 290.0, 289.0, 290.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6925057574101982, "mean_inference_ms": 1.2346697990609385, "mean_action_processing_ms": 0.13300347069514712, "mean_env_wait_ms": 0.8334533851780352, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12723200, "num_agent_steps_trained": 12723200, "num_env_steps_sampled": 6361600, "num_env_steps_trained": 6361600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6361600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12723200, "timers": {"training_iteration_time_ms": 3556.961, "learn_time_ms": 1045.48, "learn_throughput": 12243.182, "synch_weights_time_ms": 13.117}, "counters": {"num_env_steps_sampled": 6361600, "num_env_steps_trained": 6361600, "num_agent_steps_sampled": 12723200, "num_agent_steps_trained": 12723200}, "done": false, "episodes_total": 15904, "training_iteration": 497, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-39", "timestamp": 1666582359, "time_this_iter_s": 3.6132354736328125, "time_total_s": 1883.4202489852905, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1883.4202489852905, "timesteps_since_restore": 0, "iterations_since_restore": 497, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.533333333333335, "ram_util_percent": 10.6}}
+{"custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.55, "shaped_reward_min": 60, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.46, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.71, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.53, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.06, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.42, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.0, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.85, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.76, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.74, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.06, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.42, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.06, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.42, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0005341880023479462, "policy_loss": -0.0009257107158191502, "vf_loss": 7.641595840454102, "vf_explained_var": 0.5615800619125366, "kl": 0.0024605700746178627, "entropy": 0.7452712655067444, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6374400, "num_env_steps_trained": 6374400, "num_agent_steps_sampled": 12748800, "num_agent_steps_trained": 12748800}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 180.0, "episode_reward_mean": 562.35, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 281.175}, "custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.55, "shaped_reward_min": 60, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.46, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.71, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.53, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.06, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.42, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.0, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.85, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.76, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.74, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.06, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.42, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.06, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.42, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 582.0, 582.0, 476.0, 633.0, 579.0, 570.0, 579.0, 525.0, 530.0, 579.0, 582.0, 522.0, 579.0, 582.0, 525.0, 576.0, 579.0, 582.0, 579.0, 522.0, 525.0, 582.0, 582.0, 522.0, 573.0, 573.0, 582.0, 573.0, 630.0, 519.0, 579.0, 579.0, 587.0, 582.0, 525.0, 579.0, 576.0, 519.0, 570.0, 587.0, 582.0, 516.0, 570.0, 579.0, 579.0, 579.0, 570.0, 516.0, 573.0, 582.0, 492.0, 180.0, 579.0, 627.0, 479.0, 504.0, 579.0, 579.0, 519.0, 576.0, 530.0, 533.0, 590.0, 579.0, 573.0, 579.0, 576.0, 582.0, 582.0, 522.0, 587.0, 582.0, 579.0, 627.0, 573.0, 525.0, 579.0, 579.0, 582.0, 579.0, 570.0, 630.0, 522.0, 579.0, 570.0, 627.0, 513.0, 582.0, 573.0, 582.0, 579.0, 525.0, 579.0, 582.0, 630.0, 579.0, 579.0, 579.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [268.0, 257.0, 291.0, 291.0, 291.0, 291.0, 240.0, 236.0, 316.0, 317.0, 294.0, 285.0, 281.0, 289.0, 286.0, 293.0, 266.0, 259.0, 260.0, 270.0, 288.0, 291.0, 289.0, 293.0, 260.0, 262.0, 281.0, 298.0, 291.0, 291.0, 264.0, 261.0, 289.0, 287.0, 289.0, 290.0, 299.0, 283.0, 288.0, 291.0, 256.0, 266.0, 265.0, 260.0, 288.0, 294.0, 291.0, 291.0, 260.0, 262.0, 280.0, 293.0, 279.0, 294.0, 294.0, 288.0, 293.0, 280.0, 318.0, 312.0, 262.0, 257.0, 296.0, 283.0, 288.0, 291.0, 297.0, 290.0, 287.0, 295.0, 266.0, 259.0, 294.0, 285.0, 288.0, 288.0, 262.0, 257.0, 284.0, 286.0, 291.0, 296.0, 293.0, 289.0, 253.0, 263.0, 288.0, 282.0, 290.0, 289.0, 292.0, 287.0, 288.0, 291.0, 283.0, 287.0, 266.0, 250.0, 288.0, 285.0, 289.0, 293.0, 242.0, 250.0, 92.0, 88.0, 293.0, 286.0, 316.0, 311.0, 245.0, 234.0, 245.0, 259.0, 288.0, 291.0, 290.0, 289.0, 263.0, 256.0, 282.0, 294.0, 259.0, 271.0, 268.0, 265.0, 291.0, 299.0, 283.0, 296.0, 284.0, 289.0, 290.0, 289.0, 290.0, 286.0, 286.0, 296.0, 294.0, 288.0, 259.0, 263.0, 287.0, 300.0, 294.0, 288.0, 288.0, 291.0, 321.0, 306.0, 288.0, 285.0, 258.0, 267.0, 287.0, 292.0, 293.0, 286.0, 291.0, 291.0, 294.0, 285.0, 278.0, 292.0, 319.0, 311.0, 253.0, 269.0, 291.0, 288.0, 281.0, 289.0, 308.0, 319.0, 262.0, 251.0, 291.0, 291.0, 291.0, 282.0, 285.0, 297.0, 286.0, 293.0, 265.0, 260.0, 290.0, 289.0, 292.0, 290.0, 316.0, 314.0, 291.0, 288.0, 288.0, 291.0, 288.0, 291.0, 257.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6924666674580615, "mean_inference_ms": 1.234577253561023, "mean_action_processing_ms": 0.13299922325755478, "mean_env_wait_ms": 0.8334022090532933, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 180.0, "episode_reward_mean": 562.35, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 281.175}, "hist_stats": {"episode_reward": [525.0, 582.0, 582.0, 476.0, 633.0, 579.0, 570.0, 579.0, 525.0, 530.0, 579.0, 582.0, 522.0, 579.0, 582.0, 525.0, 576.0, 579.0, 582.0, 579.0, 522.0, 525.0, 582.0, 582.0, 522.0, 573.0, 573.0, 582.0, 573.0, 630.0, 519.0, 579.0, 579.0, 587.0, 582.0, 525.0, 579.0, 576.0, 519.0, 570.0, 587.0, 582.0, 516.0, 570.0, 579.0, 579.0, 579.0, 570.0, 516.0, 573.0, 582.0, 492.0, 180.0, 579.0, 627.0, 479.0, 504.0, 579.0, 579.0, 519.0, 576.0, 530.0, 533.0, 590.0, 579.0, 573.0, 579.0, 576.0, 582.0, 582.0, 522.0, 587.0, 582.0, 579.0, 627.0, 573.0, 525.0, 579.0, 579.0, 582.0, 579.0, 570.0, 630.0, 522.0, 579.0, 570.0, 627.0, 513.0, 582.0, 573.0, 582.0, 579.0, 525.0, 579.0, 582.0, 630.0, 579.0, 579.0, 579.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [268.0, 257.0, 291.0, 291.0, 291.0, 291.0, 240.0, 236.0, 316.0, 317.0, 294.0, 285.0, 281.0, 289.0, 286.0, 293.0, 266.0, 259.0, 260.0, 270.0, 288.0, 291.0, 289.0, 293.0, 260.0, 262.0, 281.0, 298.0, 291.0, 291.0, 264.0, 261.0, 289.0, 287.0, 289.0, 290.0, 299.0, 283.0, 288.0, 291.0, 256.0, 266.0, 265.0, 260.0, 288.0, 294.0, 291.0, 291.0, 260.0, 262.0, 280.0, 293.0, 279.0, 294.0, 294.0, 288.0, 293.0, 280.0, 318.0, 312.0, 262.0, 257.0, 296.0, 283.0, 288.0, 291.0, 297.0, 290.0, 287.0, 295.0, 266.0, 259.0, 294.0, 285.0, 288.0, 288.0, 262.0, 257.0, 284.0, 286.0, 291.0, 296.0, 293.0, 289.0, 253.0, 263.0, 288.0, 282.0, 290.0, 289.0, 292.0, 287.0, 288.0, 291.0, 283.0, 287.0, 266.0, 250.0, 288.0, 285.0, 289.0, 293.0, 242.0, 250.0, 92.0, 88.0, 293.0, 286.0, 316.0, 311.0, 245.0, 234.0, 245.0, 259.0, 288.0, 291.0, 290.0, 289.0, 263.0, 256.0, 282.0, 294.0, 259.0, 271.0, 268.0, 265.0, 291.0, 299.0, 283.0, 296.0, 284.0, 289.0, 290.0, 289.0, 290.0, 286.0, 286.0, 296.0, 294.0, 288.0, 259.0, 263.0, 287.0, 300.0, 294.0, 288.0, 288.0, 291.0, 321.0, 306.0, 288.0, 285.0, 258.0, 267.0, 287.0, 292.0, 293.0, 286.0, 291.0, 291.0, 294.0, 285.0, 278.0, 292.0, 319.0, 311.0, 253.0, 269.0, 291.0, 288.0, 281.0, 289.0, 308.0, 319.0, 262.0, 251.0, 291.0, 291.0, 291.0, 282.0, 285.0, 297.0, 286.0, 293.0, 265.0, 260.0, 290.0, 289.0, 292.0, 290.0, 316.0, 314.0, 291.0, 288.0, 288.0, 291.0, 288.0, 291.0, 257.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6924666674580615, "mean_inference_ms": 1.234577253561023, "mean_action_processing_ms": 0.13299922325755478, "mean_env_wait_ms": 0.8334022090532933, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12748800, "num_agent_steps_trained": 12748800, "num_env_steps_sampled": 6374400, "num_env_steps_trained": 6374400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6374400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12748800, "timers": {"training_iteration_time_ms": 3555.338, "learn_time_ms": 1042.446, "learn_throughput": 12278.813, "synch_weights_time_ms": 12.005}, "counters": {"num_env_steps_sampled": 6374400, "num_env_steps_trained": 6374400, "num_agent_steps_sampled": 12748800, "num_agent_steps_trained": 12748800}, "done": false, "episodes_total": 15936, "training_iteration": 498, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-43", "timestamp": 1666582363, "time_this_iter_s": 3.5497889518737793, "time_total_s": 1886.9700379371643, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1886.9700379371643, "timesteps_since_restore": 0, "iterations_since_restore": 498, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.68, "ram_util_percent": 10.62}}
+{"custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 174.36, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.53, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.79, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.41, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.61, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.14, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.5, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.05, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.86, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.82, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.14, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.5, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.14, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.5, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001022728392854333, "policy_loss": -0.0014227591454982758, "vf_loss": 7.671509265899658, "vf_explained_var": 0.5705825090408325, "kl": 0.0024727080017328262, "entropy": 0.7342387437820435, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6387200, "num_env_steps_trained": 6387200, "num_agent_steps_sampled": 12774400, "num_agent_steps_trained": 12774400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 566.76, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.38}, "custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 174.36, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.53, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.79, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.41, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.61, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.14, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.5, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.05, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.86, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.82, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.14, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.5, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.14, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.5, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 587.0, 582.0, 525.0, 579.0, 576.0, 519.0, 570.0, 587.0, 582.0, 516.0, 570.0, 579.0, 579.0, 579.0, 570.0, 516.0, 573.0, 582.0, 492.0, 180.0, 579.0, 627.0, 479.0, 504.0, 579.0, 579.0, 519.0, 576.0, 530.0, 533.0, 590.0, 579.0, 573.0, 579.0, 576.0, 582.0, 582.0, 522.0, 587.0, 582.0, 579.0, 627.0, 573.0, 525.0, 579.0, 579.0, 582.0, 579.0, 570.0, 630.0, 522.0, 579.0, 570.0, 627.0, 513.0, 582.0, 573.0, 582.0, 579.0, 525.0, 579.0, 582.0, 630.0, 579.0, 579.0, 579.0, 525.0, 516.0, 582.0, 639.0, 576.0, 456.0, 633.0, 630.0, 633.0, 582.0, 576.0, 579.0, 579.0, 582.0, 525.0, 405.0, 636.0, 576.0, 516.0, 587.0, 573.0, 576.0, 636.0, 579.0, 527.0, 582.0, 462.0, 633.0, 627.0, 630.0, 633.0, 570.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 297.0, 290.0, 287.0, 295.0, 266.0, 259.0, 294.0, 285.0, 288.0, 288.0, 262.0, 257.0, 284.0, 286.0, 291.0, 296.0, 293.0, 289.0, 253.0, 263.0, 288.0, 282.0, 290.0, 289.0, 292.0, 287.0, 288.0, 291.0, 283.0, 287.0, 266.0, 250.0, 288.0, 285.0, 289.0, 293.0, 242.0, 250.0, 92.0, 88.0, 293.0, 286.0, 316.0, 311.0, 245.0, 234.0, 245.0, 259.0, 288.0, 291.0, 290.0, 289.0, 263.0, 256.0, 282.0, 294.0, 259.0, 271.0, 268.0, 265.0, 291.0, 299.0, 283.0, 296.0, 284.0, 289.0, 290.0, 289.0, 290.0, 286.0, 286.0, 296.0, 294.0, 288.0, 259.0, 263.0, 287.0, 300.0, 294.0, 288.0, 288.0, 291.0, 321.0, 306.0, 288.0, 285.0, 258.0, 267.0, 287.0, 292.0, 293.0, 286.0, 291.0, 291.0, 294.0, 285.0, 278.0, 292.0, 319.0, 311.0, 253.0, 269.0, 291.0, 288.0, 281.0, 289.0, 308.0, 319.0, 262.0, 251.0, 291.0, 291.0, 291.0, 282.0, 285.0, 297.0, 286.0, 293.0, 265.0, 260.0, 290.0, 289.0, 292.0, 290.0, 316.0, 314.0, 291.0, 288.0, 288.0, 291.0, 288.0, 291.0, 257.0, 268.0, 265.0, 251.0, 291.0, 291.0, 322.0, 317.0, 280.0, 296.0, 228.0, 228.0, 311.0, 322.0, 319.0, 311.0, 317.0, 316.0, 296.0, 286.0, 284.0, 292.0, 292.0, 287.0, 283.0, 296.0, 290.0, 292.0, 262.0, 263.0, 208.0, 197.0, 317.0, 319.0, 289.0, 287.0, 265.0, 251.0, 288.0, 299.0, 290.0, 283.0, 294.0, 282.0, 320.0, 316.0, 290.0, 289.0, 265.0, 262.0, 291.0, 291.0, 231.0, 231.0, 317.0, 316.0, 316.0, 311.0, 309.0, 321.0, 321.0, 312.0, 287.0, 283.0, 317.0, 316.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6924363322164473, "mean_inference_ms": 1.2344869938567593, "mean_action_processing_ms": 0.13299604129951562, "mean_env_wait_ms": 0.8333524514034752, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 566.76, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.38}, "hist_stats": {"episode_reward": [579.0, 587.0, 582.0, 525.0, 579.0, 576.0, 519.0, 570.0, 587.0, 582.0, 516.0, 570.0, 579.0, 579.0, 579.0, 570.0, 516.0, 573.0, 582.0, 492.0, 180.0, 579.0, 627.0, 479.0, 504.0, 579.0, 579.0, 519.0, 576.0, 530.0, 533.0, 590.0, 579.0, 573.0, 579.0, 576.0, 582.0, 582.0, 522.0, 587.0, 582.0, 579.0, 627.0, 573.0, 525.0, 579.0, 579.0, 582.0, 579.0, 570.0, 630.0, 522.0, 579.0, 570.0, 627.0, 513.0, 582.0, 573.0, 582.0, 579.0, 525.0, 579.0, 582.0, 630.0, 579.0, 579.0, 579.0, 525.0, 516.0, 582.0, 639.0, 576.0, 456.0, 633.0, 630.0, 633.0, 582.0, 576.0, 579.0, 579.0, 582.0, 525.0, 405.0, 636.0, 576.0, 516.0, 587.0, 573.0, 576.0, 636.0, 579.0, 527.0, 582.0, 462.0, 633.0, 627.0, 630.0, 633.0, 570.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 297.0, 290.0, 287.0, 295.0, 266.0, 259.0, 294.0, 285.0, 288.0, 288.0, 262.0, 257.0, 284.0, 286.0, 291.0, 296.0, 293.0, 289.0, 253.0, 263.0, 288.0, 282.0, 290.0, 289.0, 292.0, 287.0, 288.0, 291.0, 283.0, 287.0, 266.0, 250.0, 288.0, 285.0, 289.0, 293.0, 242.0, 250.0, 92.0, 88.0, 293.0, 286.0, 316.0, 311.0, 245.0, 234.0, 245.0, 259.0, 288.0, 291.0, 290.0, 289.0, 263.0, 256.0, 282.0, 294.0, 259.0, 271.0, 268.0, 265.0, 291.0, 299.0, 283.0, 296.0, 284.0, 289.0, 290.0, 289.0, 290.0, 286.0, 286.0, 296.0, 294.0, 288.0, 259.0, 263.0, 287.0, 300.0, 294.0, 288.0, 288.0, 291.0, 321.0, 306.0, 288.0, 285.0, 258.0, 267.0, 287.0, 292.0, 293.0, 286.0, 291.0, 291.0, 294.0, 285.0, 278.0, 292.0, 319.0, 311.0, 253.0, 269.0, 291.0, 288.0, 281.0, 289.0, 308.0, 319.0, 262.0, 251.0, 291.0, 291.0, 291.0, 282.0, 285.0, 297.0, 286.0, 293.0, 265.0, 260.0, 290.0, 289.0, 292.0, 290.0, 316.0, 314.0, 291.0, 288.0, 288.0, 291.0, 288.0, 291.0, 257.0, 268.0, 265.0, 251.0, 291.0, 291.0, 322.0, 317.0, 280.0, 296.0, 228.0, 228.0, 311.0, 322.0, 319.0, 311.0, 317.0, 316.0, 296.0, 286.0, 284.0, 292.0, 292.0, 287.0, 283.0, 296.0, 290.0, 292.0, 262.0, 263.0, 208.0, 197.0, 317.0, 319.0, 289.0, 287.0, 265.0, 251.0, 288.0, 299.0, 290.0, 283.0, 294.0, 282.0, 320.0, 316.0, 290.0, 289.0, 265.0, 262.0, 291.0, 291.0, 231.0, 231.0, 317.0, 316.0, 316.0, 311.0, 309.0, 321.0, 321.0, 312.0, 287.0, 283.0, 317.0, 316.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6924363322164473, "mean_inference_ms": 1.2344869938567593, "mean_action_processing_ms": 0.13299604129951562, "mean_env_wait_ms": 0.8333524514034752, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12774400, "num_agent_steps_trained": 12774400, "num_env_steps_sampled": 6387200, "num_env_steps_trained": 6387200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6387200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12774400, "timers": {"training_iteration_time_ms": 3572.611, "learn_time_ms": 1048.765, "learn_throughput": 12204.837, "synch_weights_time_ms": 12.104}, "counters": {"num_env_steps_sampled": 6387200, "num_env_steps_trained": 6387200, "num_agent_steps_sampled": 12774400, "num_agent_steps_trained": 12774400}, "done": false, "episodes_total": 15968, "training_iteration": 499, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-47", "timestamp": 1666582367, "time_this_iter_s": 3.6723570823669434, "time_total_s": 1890.6423950195312, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1890.6423950195312, "timesteps_since_restore": 0, "iterations_since_restore": 499, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.866666666666667, "ram_util_percent": 10.616666666666667}}
+{"evaluation": {"average_sparse_reward": 200.0, "num_healthy_workers": 0, "num_recreated_workers": 0}, "custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.73, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.51, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.24, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.02, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.1, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.96, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.18, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.29, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.06, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.1, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.96, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.1, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.96, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010659887921065092, "policy_loss": -0.0014806217513978481, "vf_loss": 7.818281173706055, "vf_explained_var": 0.5564167499542236, "kl": 0.0027102380990982056, "entropy": 0.7343902587890625, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6400000, "num_env_steps_trained": 6400000, "num_agent_steps_sampled": 12800000, "num_agent_steps_trained": 12800000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 405.0, "episode_reward_mean": 573.93, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 197.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 286.965}, "custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.73, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.51, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.24, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.02, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.1, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.96, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.18, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.29, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.06, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.1, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.96, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.1, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.96, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 573.0, 579.0, 576.0, 582.0, 582.0, 522.0, 587.0, 582.0, 579.0, 627.0, 573.0, 525.0, 579.0, 579.0, 582.0, 579.0, 570.0, 630.0, 522.0, 579.0, 570.0, 627.0, 513.0, 582.0, 573.0, 582.0, 579.0, 525.0, 579.0, 582.0, 630.0, 579.0, 579.0, 579.0, 525.0, 516.0, 582.0, 639.0, 576.0, 456.0, 633.0, 630.0, 633.0, 582.0, 576.0, 579.0, 579.0, 582.0, 525.0, 405.0, 636.0, 576.0, 516.0, 587.0, 573.0, 576.0, 636.0, 579.0, 527.0, 582.0, 462.0, 633.0, 627.0, 630.0, 633.0, 570.0, 633.0, 519.0, 627.0, 462.0, 630.0, 590.0, 582.0, 522.0, 579.0, 530.0, 465.0, 465.0, 579.0, 576.0, 576.0, 582.0, 579.0, 633.0, 630.0, 582.0, 516.0, 590.0, 522.0, 582.0, 573.0, 582.0, 582.0, 633.0, 633.0, 579.0, 573.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 284.0, 289.0, 290.0, 289.0, 290.0, 286.0, 286.0, 296.0, 294.0, 288.0, 259.0, 263.0, 287.0, 300.0, 294.0, 288.0, 288.0, 291.0, 321.0, 306.0, 288.0, 285.0, 258.0, 267.0, 287.0, 292.0, 293.0, 286.0, 291.0, 291.0, 294.0, 285.0, 278.0, 292.0, 319.0, 311.0, 253.0, 269.0, 291.0, 288.0, 281.0, 289.0, 308.0, 319.0, 262.0, 251.0, 291.0, 291.0, 291.0, 282.0, 285.0, 297.0, 286.0, 293.0, 265.0, 260.0, 290.0, 289.0, 292.0, 290.0, 316.0, 314.0, 291.0, 288.0, 288.0, 291.0, 288.0, 291.0, 257.0, 268.0, 265.0, 251.0, 291.0, 291.0, 322.0, 317.0, 280.0, 296.0, 228.0, 228.0, 311.0, 322.0, 319.0, 311.0, 317.0, 316.0, 296.0, 286.0, 284.0, 292.0, 292.0, 287.0, 283.0, 296.0, 290.0, 292.0, 262.0, 263.0, 208.0, 197.0, 317.0, 319.0, 289.0, 287.0, 265.0, 251.0, 288.0, 299.0, 290.0, 283.0, 294.0, 282.0, 320.0, 316.0, 290.0, 289.0, 265.0, 262.0, 291.0, 291.0, 231.0, 231.0, 317.0, 316.0, 316.0, 311.0, 309.0, 321.0, 321.0, 312.0, 287.0, 283.0, 317.0, 316.0, 269.0, 250.0, 313.0, 314.0, 228.0, 234.0, 323.0, 307.0, 296.0, 294.0, 289.0, 293.0, 259.0, 263.0, 290.0, 289.0, 268.0, 262.0, 231.0, 234.0, 235.0, 230.0, 291.0, 288.0, 287.0, 289.0, 293.0, 283.0, 291.0, 291.0, 294.0, 285.0, 319.0, 314.0, 316.0, 314.0, 289.0, 293.0, 253.0, 263.0, 296.0, 294.0, 251.0, 271.0, 291.0, 291.0, 289.0, 284.0, 285.0, 297.0, 294.0, 288.0, 316.0, 317.0, 319.0, 314.0, 289.0, 290.0, 285.0, 288.0, 289.0, 290.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6923988056837467, "mean_inference_ms": 1.2343933696692113, "mean_action_processing_ms": 0.13299150779059749, "mean_env_wait_ms": 0.8332940369038218, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 405.0, "episode_reward_mean": 573.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 197.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 286.965}, "hist_stats": {"episode_reward": [579.0, 573.0, 579.0, 576.0, 582.0, 582.0, 522.0, 587.0, 582.0, 579.0, 627.0, 573.0, 525.0, 579.0, 579.0, 582.0, 579.0, 570.0, 630.0, 522.0, 579.0, 570.0, 627.0, 513.0, 582.0, 573.0, 582.0, 579.0, 525.0, 579.0, 582.0, 630.0, 579.0, 579.0, 579.0, 525.0, 516.0, 582.0, 639.0, 576.0, 456.0, 633.0, 630.0, 633.0, 582.0, 576.0, 579.0, 579.0, 582.0, 525.0, 405.0, 636.0, 576.0, 516.0, 587.0, 573.0, 576.0, 636.0, 579.0, 527.0, 582.0, 462.0, 633.0, 627.0, 630.0, 633.0, 570.0, 633.0, 519.0, 627.0, 462.0, 630.0, 590.0, 582.0, 522.0, 579.0, 530.0, 465.0, 465.0, 579.0, 576.0, 576.0, 582.0, 579.0, 633.0, 630.0, 582.0, 516.0, 590.0, 522.0, 582.0, 573.0, 582.0, 582.0, 633.0, 633.0, 579.0, 573.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 284.0, 289.0, 290.0, 289.0, 290.0, 286.0, 286.0, 296.0, 294.0, 288.0, 259.0, 263.0, 287.0, 300.0, 294.0, 288.0, 288.0, 291.0, 321.0, 306.0, 288.0, 285.0, 258.0, 267.0, 287.0, 292.0, 293.0, 286.0, 291.0, 291.0, 294.0, 285.0, 278.0, 292.0, 319.0, 311.0, 253.0, 269.0, 291.0, 288.0, 281.0, 289.0, 308.0, 319.0, 262.0, 251.0, 291.0, 291.0, 291.0, 282.0, 285.0, 297.0, 286.0, 293.0, 265.0, 260.0, 290.0, 289.0, 292.0, 290.0, 316.0, 314.0, 291.0, 288.0, 288.0, 291.0, 288.0, 291.0, 257.0, 268.0, 265.0, 251.0, 291.0, 291.0, 322.0, 317.0, 280.0, 296.0, 228.0, 228.0, 311.0, 322.0, 319.0, 311.0, 317.0, 316.0, 296.0, 286.0, 284.0, 292.0, 292.0, 287.0, 283.0, 296.0, 290.0, 292.0, 262.0, 263.0, 208.0, 197.0, 317.0, 319.0, 289.0, 287.0, 265.0, 251.0, 288.0, 299.0, 290.0, 283.0, 294.0, 282.0, 320.0, 316.0, 290.0, 289.0, 265.0, 262.0, 291.0, 291.0, 231.0, 231.0, 317.0, 316.0, 316.0, 311.0, 309.0, 321.0, 321.0, 312.0, 287.0, 283.0, 317.0, 316.0, 269.0, 250.0, 313.0, 314.0, 228.0, 234.0, 323.0, 307.0, 296.0, 294.0, 289.0, 293.0, 259.0, 263.0, 290.0, 289.0, 268.0, 262.0, 231.0, 234.0, 235.0, 230.0, 291.0, 288.0, 287.0, 289.0, 293.0, 283.0, 291.0, 291.0, 294.0, 285.0, 319.0, 314.0, 316.0, 314.0, 289.0, 293.0, 253.0, 263.0, 296.0, 294.0, 251.0, 271.0, 291.0, 291.0, 289.0, 284.0, 285.0, 297.0, 294.0, 288.0, 316.0, 317.0, 319.0, 314.0, 289.0, 290.0, 285.0, 288.0, 289.0, 290.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6923988056837467, "mean_inference_ms": 1.2343933696692113, "mean_action_processing_ms": 0.13299150779059749, "mean_env_wait_ms": 0.8332940369038218, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12800000, "num_agent_steps_trained": 12800000, "num_env_steps_sampled": 6400000, "num_env_steps_trained": 6400000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6400000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12800000, "timers": {"training_iteration_time_ms": 3575.465, "learn_time_ms": 1047.578, "learn_throughput": 12218.661, "synch_weights_time_ms": 13.203}, "counters": {"num_env_steps_sampled": 6400000, "num_env_steps_trained": 6400000, "num_agent_steps_sampled": 12800000, "num_agent_steps_trained": 12800000}, "done": false, "episodes_total": 16000, "training_iteration": 500, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-54", "timestamp": 1666582374, "time_this_iter_s": 7.072265625, "time_total_s": 1897.7146606445312, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f515018d9d0>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": "<ray.rllib.policy.policy.PolicySpec object at 0x7f51940ca410>"}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": "<function gen_trainer_from_params.<locals>.select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "<class 'human_aware_rl.rllib.rllib.TrainingCallbacks'>", "create_env_on_driver": false, "custom_eval_function": "<function get_rllib_eval_function.<locals>._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1897.7146606445312, "timesteps_since_restore": 0, "iterations_since_restore": 500, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 15.12, "ram_util_percent": 10.599999999999998}}
diff --git a/human_aware_rl/rllib/rllib.py b/human_aware_rl/rllib/rllib.py
index 228fceff..1d65607b 100644
--- a/human_aware_rl/rllib/rllib.py
+++ b/human_aware_rl/rllib/rllib.py
@@ -7,8 +7,8 @@
 from ray.tune.logger import UnifiedLogger
 from ray.tune.result import DEFAULT_RESULTS_DIR
 from ray.rllib.env.multi_agent_env import MultiAgentEnv
-from ray.rllib.agents.callbacks import DefaultCallbacks
-from ray.rllib.agents.ppo.ppo import PPOTrainer
+from ray.rllib.algorithms.callbacks import DefaultCallbacks
+from ray.rllib.agents.ppo import PPOTrainer
 from ray.rllib.models import ModelCatalog
 from human_aware_rl.rllib.utils import softmax, get_base_ae, get_required_arguments, iterable_equal
 from datetime import datetime
@@ -140,10 +140,13 @@ def __init__(self, base_env, reward_shaping_factor=0.0, reward_shaping_horizon=0
         self.reward_shaping_factor = reward_shaping_factor
         self.reward_shaping_horizon = reward_shaping_horizon
         self.use_phi = use_phi
-        self._setup_observation_space()
-        self.action_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS))
         self.anneal_bc_factor(0)
-        self.reset()
+        self._agent_ids = set(self.reset().keys())
+        #fixes deprecation warnings
+        self._spaces_in_preferred_format = True
+
+        
+
     
     def _validate_featurize_fns(self, mapping):
         assert 'ppo' in mapping, "At least one ppo agent must be specified"
@@ -166,12 +169,19 @@ def _validate_schedule(self, schedule):
         if (schedule[-1][0] < float('inf')):
             schedule.append((float('inf'), schedule[-1][1]))
 
-    def _setup_observation_space(self):
-        dummy_state = self.base_env.mdp.get_standard_start_state()
+    def _setup_action_space(self,agents):
+        action_sp = {}
+        for agent in agents:
+            action_sp[agent] = gym.spaces.Discrete(len(Action.ALL_ACTIONS))
+        self.action_space = gym.spaces.Dict(action_sp)
+        self.shared_action_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS))
 
+    def _setup_observation_space(self,agents):
+        dummy_state = self.base_env.mdp.get_standard_start_state()
         #ppo observation
         featurize_fn_ppo = lambda state: self.base_env.lossless_state_encoding_mdp(state)
         obs_shape = featurize_fn_ppo(dummy_state)[0].shape
+
         high = np.ones(obs_shape) * float("inf")
         low = np.ones(obs_shape) * 0
         self.ppo_observation_space = gym.spaces.Box(np.float32(low), np.float32(high), dtype=np.float32)
@@ -182,6 +192,16 @@ def _setup_observation_space(self):
         high = np.ones(obs_shape) * 100
         low = np.ones(obs_shape) * -100
         self.bc_observation_space = gym.spaces.Box(np.float32(low), np.float32(high), dtype=np.float32)
+        #hardcode mapping between action space and agent
+        ob_space = {}
+        for agent in agents:
+            if agent.startswith("ppo"):
+                ob_space[agent] = self.ppo_observation_space
+            else:
+                ob_space[agent] = self.bc_observation_space
+        self.observation_space = gym.spaces.Dict(ob_space)
+
+
 
     def _get_featurize_fn(self, agent_id):
         if agent_id.startswith('ppo'):
@@ -209,7 +229,11 @@ def _populate_agents(self):
         # Ensure agent names are unique
         agents[0] = agents[0] + '_0'
         agents[1] = agents[1] + '_1'
-        
+
+        #logically the action_space and the observation_space should be set along with the generated agents
+        #the agents are also randomized in each iteration if bc agents are allowed, which requires reestablishing the action & observation space
+        self._setup_action_space(agents)
+        self._setup_observation_space(agents)
         return agents
 
     def _anneal(self, start_v, curr_t, end_t, end_v=0, start_t=0):
@@ -233,7 +257,8 @@ def step(self, action_dict):
             observation: formatted to be standard input for self.agent_idx's policy
         """
         action = [action_dict[self.curr_agents[0]], action_dict[self.curr_agents[1]]]
-        assert all(self.action_space.contains(a) for a in action), "%r (%s) invalid"%(action, type(action))
+
+        assert all(self.action_space[agent].contains(action_dict[agent]) for agent in action_dict), "%r (%s) invalid"%(action, type(action))
         joint_action = [Action.INDEX_TO_ACTION[a] for a in action]
         # take a step in the current base environment
 
@@ -362,7 +387,7 @@ def on_episode_end(self, worker, base_env, policies, episode, **kwargs):
         shaped_reward (int) - total reward shaping reward the agent earned this episode
         """
         # Get rllib.OvercookedMultiAgentEnv refernce from rllib wraper
-        env = base_env.get_unwrapped()[0]
+        env = base_env.get_sub_environments()[0]
         # Both agents share the same info so it doesn't matter whose we use, just use 0th agent's
         info_dict = episode.last_info_for(env.curr_agents[0])
 
@@ -507,8 +532,8 @@ def gen_trainer_from_params(params):
     if not ray.is_initialized():
         init_params = {
             "ignore_reinit_error" : True,
-            "include_webui" : False,
-            "temp_dir" : params['ray_params']['temp_dir'],
+            "include_dashboard" : False,
+            "_temp_dir" : params['ray_params']['temp_dir'],
             "log_to_driver" : params['verbose'],
             "logging_level" : logging.INFO if params['verbose'] else logging.CRITICAL
         }
@@ -533,16 +558,16 @@ def gen_policy(policy_type="ppo"):
         if policy_type == "ppo":
             config = {
                 "model" : {
-                    "custom_options" : model_params,
+                    'custom_model_config' : model_params,
                     
                     "custom_model" : "MyPPOModel"
                 }
             }
-            return (None, env.ppo_observation_space, env.action_space, config)
+            return (None, env.ppo_observation_space, env.shared_action_space, config)
         elif policy_type == "bc":
             bc_cls = bc_params['bc_policy_cls']
             bc_config = bc_params['bc_config']
-            return (bc_cls, env.bc_observation_space, env.action_space, bc_config)
+            return (bc_cls, env.bc_observation_space, env.shared_action_space, bc_config)
 
     # Rllib compatible way of setting the directory we store agent checkpoints in
     logdir_prefix = "{0}_{1}_{2}".format(params["experiment_name"], params['training_params']['seed'], timestr)
@@ -572,13 +597,13 @@ def custom_logger_creator(config):
 
     multi_agent_config['policies'] = { policy : gen_policy(policy) for policy in all_policies }
 
-    def select_policy(agent_id):
+    def select_policy(agent_id, episode, worker, **kwargs):
         if agent_id.startswith('ppo'):
             return 'ppo'
         if agent_id.startswith('bc'):
             return 'bc'
     multi_agent_config['policy_mapping_fn'] = select_policy
-    multi_agent_config['policies_to_train'] = 'ppo'
+    multi_agent_config['policies_to_train'] = {'ppo'}
 
     if "outer_shape" not in environment_params:
         environment_params["outer_shape"] = None
@@ -592,7 +617,7 @@ def select_policy(agent_id):
                                         environment_params["outer_shape"], 'ppo', 'ppo' if self_play else 'bc',
                                         verbose=params['verbose']),
         "env_config" : environment_params,
-        "eager" : False,
+        "eager_tracing" : False,
         **training_params
     }, logger_creator=custom_logger_creator)
     return trainer
@@ -605,8 +630,7 @@ def select_policy(agent_id):
 def save_trainer(trainer, params, path=None):
     """
     Saves a serialized trainer checkpoint at `path`. If none provided, the default path is
-    ~/ray_results/<experiment_results_dir>/checkpoint_<i>/checkpoint-<i>
-
+    ~/ray_results/<experiment_results_dir>/checkpoint_<i>
     Note that `params` should follow the same schema as the dict passed into `gen_trainer_from_params`
     """
     # Save trainer
@@ -624,7 +648,7 @@ def save_trainer(trainer, params, path=None):
 def load_trainer(save_path, true_num_workers=False):
     """
     Returns a ray compatible trainer object that was previously saved at `save_path` by a call to `save_trainer`
-    Note that `save_path` is the full path to the checkpoint FILE, not the checkpoint directory
+    Note that `save_path` is the full path to the checkpoint directory
     Additionally we decide if we want to use the same number of remote workers (see ray library Training APIs)
     as we store in the previous configuration, by default = False, we use only the local worker
     (see ray library API)
@@ -634,35 +658,13 @@ def load_trainer(save_path, true_num_workers=False):
     with open(config_path, "rb") as f:
         # We use dill (instead of pickle) here because we must deserialize functions
         config = dill.load(f)
-
     if not true_num_workers:
         # Override this param to lower overhead in trainer creation
         config['training_params']['num_workers'] = 0
 
-    # Get un-trained trainer object with proper config
-    trainer = gen_trainer_from_params(config)
-
-    # Load weights into dummy object
-    trainer.restore(save_path)
-    return trainer
-
-def load_trainer(save_path, true_num_workers=False):
-    """
-    Returns a ray compatible trainer object that was previously saved at `save_path` by a call to `save_trainer`
-    Note that `save_path` is the full path to the checkpoint FILE, not the checkpoint directory
-    Additionally we decide if we want to use the same number of remote workers (see ray library Training APIs)
-    as we store in the previous configuration, by default = False, we use only the local worker
-    (see ray library API)
-    """
-    # Read in params used to create trainer
-    config_path = os.path.join(os.path.dirname(save_path), "config.pkl")
-    with open(config_path, "rb") as f:
-        # We use dill (instead of pickle) here because we must deserialize functions
-        config = dill.load(f)
-
-    if not true_num_workers:
-        # Override this param to lower overhead in trainer creation
-        config['training_params']['num_workers'] = 0
+    if config["training_params"]["num_gpus"] == 1:
+        #all other configs for the server can be kept for local testing 
+        config["training_params"]["num_gpus"] = 0
 
     if "trained_example" in save_path:
         # For the unit testing we update the result directory in order to avoid an error
@@ -670,7 +672,6 @@ def load_trainer(save_path, true_num_workers=False):
 
     # Get un-trained trainer object with proper config
     trainer = gen_trainer_from_params(config)
-
     # Load weights into dummy object
     trainer.restore(save_path)
     return trainer
diff --git a/requirements.txt b/requirements.txt
index c8d63a0a..88810759 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,11 +7,11 @@ pymongo
 dill
 matplotlib
 requests
-numpy==1.19.5
+numpy
 seaborn==0.9.0
 pygame==1.9.5
-ray==0.8.5
+ray==2.0.0
 protobuf
-tensorflow==2.0.2
+tensorflow==2.10
 -e ./overcooked_ai
 -e . 
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 7b59f4f4..d2beb4d4 100644
--- a/setup.py
+++ b/setup.py
@@ -17,11 +17,11 @@
         "dill",
         "matplotlib",
         "requests",
-        "numpy==1.19.5",
+        "numpy",
         "seaborn==0.9.0",
         "pygame==1.9.5",
-        "ray[rllib]==0.8.5",
+        "ray[rllib]==2.0.0",
         "protobuf",
-        "tensorflow==2.0.2",
+        "tensorflow==2.10",
     ],
 )

From 8e01e315d5a5f7730137e1153bf775d2e791e274 Mon Sep 17 00:00:00 2001
From: jyan1999 <jyan19991112@gmail.com>
Date: Tue, 25 Oct 2022 12:57:14 -0700
Subject: [PATCH 37/38] Update Ray Updated ray[rllib] >= 2.0.0, Tensorflow ==
 2.10 Updated model configurations to comply with the new API

---
 human_aware_rl/ppo/ppo_rllib.py | 2 --
 requirements.txt                | 2 +-
 setup.py                        | 2 +-
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/human_aware_rl/ppo/ppo_rllib.py b/human_aware_rl/ppo/ppo_rllib.py
index c2ec7160..d9ece769 100644
--- a/human_aware_rl/ppo/ppo_rllib.py
+++ b/human_aware_rl/ppo/ppo_rllib.py
@@ -69,7 +69,6 @@ def __init__(self, obs_space, action_space, num_outputs, model_config, name, **k
         value_out = tf.keras.layers.Dense(1)(out)
 
         self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out])
-        #self.register_variables(self.base_model.variables)
 
 
     def forward(self, input_dict, state=None, seq_lens=None):
@@ -175,7 +174,6 @@ def __init__(self, obs_space, action_space, num_outputs, model_config, name, **k
             inputs=[flattened_obs_inputs, seq_in, lstm_h_in, lstm_c_in],
             outputs=[layer_out, value_out, h_out, c_out]
         )
-        #self.register_variables(self.base_model.variables)
 
 
     def forward_rnn(self, inputs, state, seq_lens):
diff --git a/requirements.txt b/requirements.txt
index 88810759..85b8092d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,7 +10,7 @@ requests
 numpy
 seaborn==0.9.0
 pygame==1.9.5
-ray==2.0.0
+ray[rllib]>=2.0.0
 protobuf
 tensorflow==2.10
 -e ./overcooked_ai
diff --git a/setup.py b/setup.py
index d2beb4d4..4a12d57c 100644
--- a/setup.py
+++ b/setup.py
@@ -20,7 +20,7 @@
         "numpy",
         "seaborn==0.9.0",
         "pygame==1.9.5",
-        "ray[rllib]==2.0.0",
+        "ray[rllib]>=2.0.0",
         "protobuf",
         "tensorflow==2.10",
     ],

From 55c37ae04e77f867cf452a0df4c98642d2034027 Mon Sep 17 00:00:00 2001
From: jyan1999 <49133332+jyan1999@users.noreply.github.com>
Date: Sun, 27 Nov 2022 15:55:00 -0800
Subject: [PATCH 38/38] Update README.md

Overwrote master with neurips2019 commit. Updated readme.
---
 README.md | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d58b30ab..855199f8 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,12 @@
 # Human-Aware Reinforcement Learning
 
-This code can be used to reproduce the results in the paper [On the Utility of Learning about Humans for Human-AI Coordination](https://arxiv.org/abs/1910.05789). *Note that this repository uses a specific older commit of the [overcooked_ai repository](https://github.com/HumanCompatibleAI/overcooked_ai)*, and should not be expected to work with the current version of that repository.
+## :warning: DEPRECATION WARNING
+
+This repo is being deprecated and should no longer be used indepdently. This repo is now a module under the [overcooked_ai](https://github.com/HumanCompatibleAI/overcooked_ai/tree/master) project as we are in the process of consolidating several repos into one for convenience and better maintainability. 
+
+This repo should now **only** be used to reproduce the results in the 2019 paper [On the Utility of Learning about Humans for Human-AI Coordination](https://arxiv.org/abs/1910.05789). 
+
+*Note that this repository uses a specific older commit of the [overcooked_ai repository](https://github.com/HumanCompatibleAI/overcooked_ai)*, and should not be expected to work with the current version of that repository.
 
 To play the game with trained agents, you can use [Overcooked-Demo](https://github.com/HumanCompatibleAI/overcooked-demo).